diff --git a/README.md b/README.md index fc04832e0021333bc1f90f97a6f83a57141470fe..7fdbd792078cb0ee106acb4fed99b40fe98a4c92 100644 --- a/README.md +++ b/README.md @@ -49,15 +49,6 @@ More details can be found in our paper. If you use LibMoE, please cite it using this BibTeX: ``` -@misc{nguyen2024libmoelibrarycomprehensivebenchmarking, - title={LIBMoE: A Library for comprehensive benchmarking Mixture of Experts in Large Language Models}, - author={Nam V. Nguyen and Thong T. Doan and Luong Tran and Van Nguyen and Quang Pham}, - year={2024}, - eprint={2411.00918}, - archivePrefix={arXiv}, - primaryClass={cs.CL}, - url={https://arxiv.org/abs/2411.00918}, -} ``` diff --git a/pft/added_tokens.json b/pft/added_tokens.json deleted file mode 100644 index c9d3d3a1b74d87e381e471f7b33784015d2dc0ea..0000000000000000000000000000000000000000 --- a/pft/added_tokens.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "<|assistant|>": 32001, - "<|endoftext|>": 32000, - "<|end|>": 32007, - "<|placeholder1|>": 32002, - "<|placeholder2|>": 32003, - "<|placeholder3|>": 32004, - "<|placeholder4|>": 32005, - "<|placeholder5|>": 32008, - "<|placeholder6|>": 32009, - "<|system|>": 32006, - "<|user|>": 32010 -} diff --git a/pft/clip.bin b/pft/clip.bin deleted file mode 100644 index 0aa7d519218cb04e7fe9f60a61778ca020c6dc36..0000000000000000000000000000000000000000 --- a/pft/clip.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5b1f13272c4ab8444bc5f83478f1d15155b8e702c8a8aa900cd9d5d775aa3985 -size 824993767 diff --git a/pft/config.json b/pft/config.json deleted file mode 100644 index 6d0acd5f19ce77e0093b0ef9df5ea1496388c203..0000000000000000000000000000000000000000 --- a/pft/config.json +++ /dev/null @@ -1,66 +0,0 @@ -{ - "_name_or_path": "/cm/archive/thongdt4/toolkitmoe/checkpoints/transformers_checkpoints/phi-3-mini-4k-instruct", - "architectures": [ - "LlavaPhiForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" - }, - "balance_loss_coef": 0.0, - "bos_token_id": 1, - "clip_smoe": false, - "dropout": false, - "embd_pdrop": 0.0, - "eos_token_id": 32000, - "freeze_mm_mlp_adapter": false, - "hidden_act": "silu", - "hidden_size": 3072, - "image_aspect_ratio": "pad", - "initializer_range": 0.02, - "intermediate_size": 8192, - "local_rank": 0, - "max_position_embeddings": 4096, - "mlp_smoe": false, - "mm_hidden_size": 1152, - "mm_patch_merge_type": "flat", - "mm_projector_lr": null, - "mm_projector_type": "mlp2x_gelu", - "mm_use_im_patch_token": false, - "mm_use_im_start_end": false, - "mm_vision_select_feature": "patch", - "mm_vision_select_layer": -2, - "mm_vision_tower": "google/siglip-so400m-patch14-224", - "model_type": "llava_phi", - "moe_name": "smoe", - "num_attention_heads": 32, - "num_experts": 1, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "num_layers": 3, - "num_selected": 1, - "original_max_position_embeddings": 4096, - "pad_token_id": 32000, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "router_z_loss_coef": 0.0, - "scales": [ - 1, - 3 - ], - "sliding_window": 2047, - "tie_word_embeddings": false, - "tokenizer_model_max_length": 4096, - "tokenizer_padding_side": "right", - "torch_dtype": "bfloat16", - "training": true, - "transformers_version": "4.43.2", - "tune_mm_mlp_adapter": false, - "use_cache": true, - "use_mm_proj": true, - "vocab_size": 32064 -} diff --git a/pft/generation_config.json b/pft/generation_config.json deleted file mode 100644 index 3a20824ea777f1ebd11da590160a7209fe3b62c6..0000000000000000000000000000000000000000 --- a/pft/generation_config.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "_from_model_config": true, - "bos_token_id": 1, - "do_sample": true, - "eos_token_id": [ - 32000, - 32001, - 32007 - ], - "pad_token_id": 32000, - "transformers_version": "4.43.2" -} diff --git a/pft/mm_projector.bin b/pft/mm_projector.bin deleted file mode 100644 index 1086de661f104f8cc359e17b72068a4319a97626..0000000000000000000000000000000000000000 --- a/pft/mm_projector.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0e72e505ed3caaa80a5ca6abf7c96dda3c412c513581b6a779f770ec93532a5c -size 33044536 diff --git a/pft/model-00001-of-00002.safetensors b/pft/model-00001-of-00002.safetensors deleted file mode 100644 index c9124209a9fa90f5e05a3e8ab7e2cb7dfa2bfc81..0000000000000000000000000000000000000000 --- a/pft/model-00001-of-00002.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a71c80ca9162a80052aeae0c4118b94c172ff2689c661cc40ce1d5eec6d4abd3 -size 4972489328 diff --git a/pft/model-00002-of-00002.safetensors b/pft/model-00002-of-00002.safetensors deleted file mode 100644 index ceb041a8983b27d14cb5b89d2c0a38720ca4d69c..0000000000000000000000000000000000000000 --- a/pft/model-00002-of-00002.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:640ff76f658c238dec128bfeb1715c7fcdb3686a7f0218a445de898c096370ff -size 3527678240 diff --git a/pft/model.safetensors.index.json b/pft/model.safetensors.index.json deleted file mode 100644 index 5838aaf700aed9f622d4cbd8edb680eca1ef8c8d..0000000000000000000000000000000000000000 --- a/pft/model.safetensors.index.json +++ /dev/null @@ -1,641 +0,0 @@ -{ - "metadata": { - "total_size": 8500081632 - }, - "weight_map": { - "lm_head.weight": "model-00002-of-00002.safetensors", - "model.embed_tokens.weight": "model-00001-of-00002.safetensors", - "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.0.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.0.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.1.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.1.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.10.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.10.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.11.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.11.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.12.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.12.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.13.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.13.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.14.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.14.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.15.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.15.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.16.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.16.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.17.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.17.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.18.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.18.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.19.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.19.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.2.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.2.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.20.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.20.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.20.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.20.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.20.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.21.mlp.gate_up_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.21.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.21.self_attn.qkv_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.22.mlp.gate_up_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.22.self_attn.qkv_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.23.mlp.gate_up_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.23.self_attn.qkv_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.24.mlp.gate_up_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.24.self_attn.qkv_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.25.mlp.gate_up_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.25.self_attn.qkv_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.26.mlp.gate_up_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.26.self_attn.qkv_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.27.mlp.gate_up_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.27.self_attn.qkv_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.28.input_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.28.mlp.down_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.28.mlp.gate_up_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.28.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.28.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.28.self_attn.qkv_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.29.input_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.29.mlp.down_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.29.mlp.gate_up_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.29.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.29.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.29.self_attn.qkv_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.3.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.3.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.30.mlp.down_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.30.mlp.gate_up_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.30.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.30.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.30.self_attn.qkv_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.31.mlp.down_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.31.mlp.gate_up_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.31.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.31.self_attn.qkv_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.4.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.4.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.5.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.5.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.6.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.6.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.7.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.7.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.8.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.8.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.9.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.9.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.mm_projector.0.bias": "model-00002-of-00002.safetensors", - "model.mm_projector.0.weight": "model-00002-of-00002.safetensors", - "model.mm_projector.2.bias": "model-00002-of-00002.safetensors", - "model.mm_projector.2.weight": "model-00002-of-00002.safetensors", - "model.norm.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.embeddings.patch_embedding.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.embeddings.patch_embedding.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.embeddings.position_embedding.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.mlp.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.mlp.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.mlp.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.mlp.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.mlp.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.mlp.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.mlp.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.mlp.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.mlp.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.mlp.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.mlp.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.mlp.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00002-of-00002.safetensors" - } -} diff --git a/pft/special_tokens_map.json b/pft/special_tokens_map.json deleted file mode 100644 index 3e4d5a5bc1cb51753cc9ae0305ece0da60052b10..0000000000000000000000000000000000000000 --- a/pft/special_tokens_map.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "bos_token": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - }, - "eos_token": { - "content": "<|endoftext|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - }, - "pad_token": "", - "unk_token": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - } -} diff --git a/pft/tokenizer.model b/pft/tokenizer.model deleted file mode 100644 index 6c00c742ce03c627d6cd5b795984876fa49fa899..0000000000000000000000000000000000000000 --- a/pft/tokenizer.model +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 -size 499723 diff --git a/pft/tokenizer_config.json b/pft/tokenizer_config.json deleted file mode 100644 index aab14c4783ec8f57c9ea220a10119d3d1e258ffd..0000000000000000000000000000000000000000 --- a/pft/tokenizer_config.json +++ /dev/null @@ -1,132 +0,0 @@ -{ - "add_bos_token": true, - "add_eos_token": false, - "add_prefix_space": true, - "added_tokens_decoder": { - "0": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "1": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "2": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": false - }, - "32000": { - "content": "<|endoftext|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "32001": { - "content": "<|assistant|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32002": { - "content": "<|placeholder1|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32003": { - "content": "<|placeholder2|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32004": { - "content": "<|placeholder3|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32005": { - "content": "<|placeholder4|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32006": { - "content": "<|system|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32007": { - "content": "<|end|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32008": { - "content": "<|placeholder5|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32009": { - "content": "<|placeholder6|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32010": { - "content": "<|user|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - } - }, - "bos_token": "", - "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}", - "clean_up_tokenization_spaces": false, - "eos_token": "<|endoftext|>", - "legacy": false, - "model_max_length": 4096, - "pad_token": "", - "padding_side": "right", - "sp_model_kwargs": {}, - "spaces_between_special_tokens": false, - "tokenizer_class": "LlamaTokenizer", - "unk_token": "", - "use_default_system_prompt": false -} diff --git a/pft/trainer_state.json b/pft/trainer_state.json deleted file mode 100644 index 75fae89b333b7aa73077d8f0ff4ca8e58ca5e1be..0000000000000000000000000000000000000000 --- a/pft/trainer_state.json +++ /dev/null @@ -1,51674 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.9999322171761675, - "eval_steps": 500, - "global_step": 7376, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.00013556564766488172, - "grad_norm": 18.125877412826984, - "learning_rate": 9.009009009009009e-09, - "loss": 2.2256, - "step": 1 - }, - { - "epoch": 0.00027113129532976344, - "grad_norm": 18.575106069665193, - "learning_rate": 1.8018018018018017e-08, - "loss": 2.2097, - "step": 2 - }, - { - "epoch": 0.00040669694299464516, - "grad_norm": 17.987579225304927, - "learning_rate": 2.7027027027027028e-08, - "loss": 2.1808, - "step": 3 - }, - { - "epoch": 0.0005422625906595269, - "grad_norm": 18.14957323842981, - "learning_rate": 3.6036036036036035e-08, - "loss": 2.2013, - "step": 4 - }, - { - "epoch": 0.0006778282383244086, - "grad_norm": 18.891833829939664, - "learning_rate": 4.504504504504504e-08, - "loss": 2.2111, - "step": 5 - }, - { - "epoch": 0.0008133938859892903, - "grad_norm": 18.03446624709322, - "learning_rate": 5.4054054054054056e-08, - "loss": 2.2461, - "step": 6 - }, - { - "epoch": 0.000948959533654172, - "grad_norm": 18.24050155332701, - "learning_rate": 6.306306306306305e-08, - "loss": 2.211, - "step": 7 - }, - { - "epoch": 0.0010845251813190538, - "grad_norm": 19.018440407380332, - "learning_rate": 7.207207207207207e-08, - "loss": 2.218, - "step": 8 - }, - { - "epoch": 0.0012200908289839354, - "grad_norm": 18.329567473829734, - "learning_rate": 8.108108108108108e-08, - "loss": 2.2347, - "step": 9 - }, - { - "epoch": 0.0013556564766488172, - "grad_norm": 17.932607346687153, - "learning_rate": 9.009009009009008e-08, - "loss": 2.1847, - "step": 10 - }, - { - "epoch": 0.0014912221243136988, - "grad_norm": 17.529153704317615, - "learning_rate": 9.909909909909909e-08, - "loss": 2.1931, - "step": 11 - }, - { - "epoch": 0.0016267877719785807, - "grad_norm": 18.69285777846952, - "learning_rate": 1.0810810810810811e-07, - "loss": 2.2519, - "step": 12 - }, - { - "epoch": 0.0017623534196434623, - "grad_norm": 19.077419217385636, - "learning_rate": 1.171171171171171e-07, - "loss": 2.1795, - "step": 13 - }, - { - "epoch": 0.001897919067308344, - "grad_norm": 18.06789605667497, - "learning_rate": 1.261261261261261e-07, - "loss": 2.2142, - "step": 14 - }, - { - "epoch": 0.002033484714973226, - "grad_norm": 18.75831609191383, - "learning_rate": 1.3513513513513515e-07, - "loss": 2.2478, - "step": 15 - }, - { - "epoch": 0.0021690503626381075, - "grad_norm": 18.39612340288509, - "learning_rate": 1.4414414414414414e-07, - "loss": 2.1925, - "step": 16 - }, - { - "epoch": 0.002304616010302989, - "grad_norm": 18.554913488079723, - "learning_rate": 1.5315315315315313e-07, - "loss": 2.2179, - "step": 17 - }, - { - "epoch": 0.0024401816579678708, - "grad_norm": 38.07597474747701, - "learning_rate": 1.6216216216216215e-07, - "loss": 2.2219, - "step": 18 - }, - { - "epoch": 0.002575747305632753, - "grad_norm": 17.43170832915659, - "learning_rate": 1.7117117117117117e-07, - "loss": 2.1919, - "step": 19 - }, - { - "epoch": 0.0027113129532976344, - "grad_norm": 17.86869497780154, - "learning_rate": 1.8018018018018017e-07, - "loss": 2.1832, - "step": 20 - }, - { - "epoch": 0.002846878600962516, - "grad_norm": 17.400942579077945, - "learning_rate": 1.891891891891892e-07, - "loss": 2.1731, - "step": 21 - }, - { - "epoch": 0.0029824442486273976, - "grad_norm": 17.794419976167035, - "learning_rate": 1.9819819819819818e-07, - "loss": 2.157, - "step": 22 - }, - { - "epoch": 0.0031180098962922797, - "grad_norm": 17.60989825541936, - "learning_rate": 2.072072072072072e-07, - "loss": 2.1693, - "step": 23 - }, - { - "epoch": 0.0032535755439571613, - "grad_norm": 17.877157930048536, - "learning_rate": 2.1621621621621622e-07, - "loss": 2.1597, - "step": 24 - }, - { - "epoch": 0.003389141191622043, - "grad_norm": 17.214382261373643, - "learning_rate": 2.2522522522522522e-07, - "loss": 2.1885, - "step": 25 - }, - { - "epoch": 0.0035247068392869245, - "grad_norm": 17.82215354363436, - "learning_rate": 2.342342342342342e-07, - "loss": 2.213, - "step": 26 - }, - { - "epoch": 0.0036602724869518066, - "grad_norm": 17.48605450775996, - "learning_rate": 2.4324324324324326e-07, - "loss": 2.1852, - "step": 27 - }, - { - "epoch": 0.003795838134616688, - "grad_norm": 17.194467765872567, - "learning_rate": 2.522522522522522e-07, - "loss": 2.1676, - "step": 28 - }, - { - "epoch": 0.00393140378228157, - "grad_norm": 17.461563514138195, - "learning_rate": 2.6126126126126124e-07, - "loss": 2.2146, - "step": 29 - }, - { - "epoch": 0.004066969429946452, - "grad_norm": 17.553802619475682, - "learning_rate": 2.702702702702703e-07, - "loss": 2.1804, - "step": 30 - }, - { - "epoch": 0.0042025350776113335, - "grad_norm": 15.727757919973058, - "learning_rate": 2.7927927927927923e-07, - "loss": 2.1141, - "step": 31 - }, - { - "epoch": 0.004338100725276215, - "grad_norm": 15.870045263004812, - "learning_rate": 2.882882882882883e-07, - "loss": 2.1322, - "step": 32 - }, - { - "epoch": 0.004473666372941097, - "grad_norm": 15.653808206129256, - "learning_rate": 2.972972972972973e-07, - "loss": 2.074, - "step": 33 - }, - { - "epoch": 0.004609232020605978, - "grad_norm": 16.202900988650217, - "learning_rate": 3.0630630630630627e-07, - "loss": 2.1224, - "step": 34 - }, - { - "epoch": 0.00474479766827086, - "grad_norm": 15.112372739808663, - "learning_rate": 3.153153153153153e-07, - "loss": 2.0799, - "step": 35 - }, - { - "epoch": 0.0048803633159357415, - "grad_norm": 16.273271545011845, - "learning_rate": 3.243243243243243e-07, - "loss": 2.0979, - "step": 36 - }, - { - "epoch": 0.005015928963600624, - "grad_norm": 15.492324542235627, - "learning_rate": 3.333333333333333e-07, - "loss": 2.1027, - "step": 37 - }, - { - "epoch": 0.005151494611265506, - "grad_norm": 15.658889991500574, - "learning_rate": 3.4234234234234235e-07, - "loss": 2.103, - "step": 38 - }, - { - "epoch": 0.005287060258930387, - "grad_norm": 15.686295524607946, - "learning_rate": 3.5135135135135134e-07, - "loss": 2.1413, - "step": 39 - }, - { - "epoch": 0.005422625906595269, - "grad_norm": 14.892668679393763, - "learning_rate": 3.6036036036036033e-07, - "loss": 2.0617, - "step": 40 - }, - { - "epoch": 0.0055581915542601504, - "grad_norm": 15.104561965162503, - "learning_rate": 3.6936936936936933e-07, - "loss": 2.1291, - "step": 41 - }, - { - "epoch": 0.005693757201925032, - "grad_norm": 50.84289315840259, - "learning_rate": 3.783783783783784e-07, - "loss": 2.0986, - "step": 42 - }, - { - "epoch": 0.005829322849589914, - "grad_norm": 14.365304452673064, - "learning_rate": 3.8738738738738737e-07, - "loss": 2.0043, - "step": 43 - }, - { - "epoch": 0.005964888497254795, - "grad_norm": 10.862225563349813, - "learning_rate": 3.9639639639639636e-07, - "loss": 1.9274, - "step": 44 - }, - { - "epoch": 0.006100454144919678, - "grad_norm": 10.281201109516811, - "learning_rate": 4.054054054054054e-07, - "loss": 1.914, - "step": 45 - }, - { - "epoch": 0.006236019792584559, - "grad_norm": 10.563454835939782, - "learning_rate": 4.144144144144144e-07, - "loss": 1.8657, - "step": 46 - }, - { - "epoch": 0.006371585440249441, - "grad_norm": 10.413475523735038, - "learning_rate": 4.234234234234234e-07, - "loss": 1.8987, - "step": 47 - }, - { - "epoch": 0.006507151087914323, - "grad_norm": 9.759551915050734, - "learning_rate": 4.3243243243243244e-07, - "loss": 1.8896, - "step": 48 - }, - { - "epoch": 0.006642716735579204, - "grad_norm": 10.897712729575717, - "learning_rate": 4.414414414414414e-07, - "loss": 1.8855, - "step": 49 - }, - { - "epoch": 0.006778282383244086, - "grad_norm": 9.505649038101781, - "learning_rate": 4.5045045045045043e-07, - "loss": 1.8994, - "step": 50 - }, - { - "epoch": 0.0069138480309089674, - "grad_norm": 9.578178265042242, - "learning_rate": 4.594594594594595e-07, - "loss": 1.8742, - "step": 51 - }, - { - "epoch": 0.007049413678573849, - "grad_norm": 9.307823018071025, - "learning_rate": 4.684684684684684e-07, - "loss": 1.8742, - "step": 52 - }, - { - "epoch": 0.0071849793262387315, - "grad_norm": 14.49170364918165, - "learning_rate": 4.774774774774775e-07, - "loss": 1.8863, - "step": 53 - }, - { - "epoch": 0.007320544973903613, - "grad_norm": 8.535634607737279, - "learning_rate": 4.864864864864865e-07, - "loss": 1.8812, - "step": 54 - }, - { - "epoch": 0.007456110621568495, - "grad_norm": 8.670029705651041, - "learning_rate": 4.954954954954955e-07, - "loss": 1.8653, - "step": 55 - }, - { - "epoch": 0.007591676269233376, - "grad_norm": 8.694807721816511, - "learning_rate": 5.045045045045044e-07, - "loss": 1.8576, - "step": 56 - }, - { - "epoch": 0.007727241916898258, - "grad_norm": 7.9799064372643675, - "learning_rate": 5.135135135135134e-07, - "loss": 1.8281, - "step": 57 - }, - { - "epoch": 0.00786280756456314, - "grad_norm": 8.090538394129242, - "learning_rate": 5.225225225225225e-07, - "loss": 1.8089, - "step": 58 - }, - { - "epoch": 0.007998373212228021, - "grad_norm": 7.293605455837586, - "learning_rate": 5.315315315315315e-07, - "loss": 1.822, - "step": 59 - }, - { - "epoch": 0.008133938859892904, - "grad_norm": 6.7733444296458165, - "learning_rate": 5.405405405405406e-07, - "loss": 1.8025, - "step": 60 - }, - { - "epoch": 0.008269504507557784, - "grad_norm": 6.526193824925845, - "learning_rate": 5.495495495495495e-07, - "loss": 1.7774, - "step": 61 - }, - { - "epoch": 0.008405070155222667, - "grad_norm": 6.169869450080902, - "learning_rate": 5.585585585585585e-07, - "loss": 1.759, - "step": 62 - }, - { - "epoch": 0.008540635802887548, - "grad_norm": 6.244580753381197, - "learning_rate": 5.675675675675675e-07, - "loss": 1.7203, - "step": 63 - }, - { - "epoch": 0.00867620145055243, - "grad_norm": 6.413451234785374, - "learning_rate": 5.765765765765766e-07, - "loss": 1.7197, - "step": 64 - }, - { - "epoch": 0.008811767098217311, - "grad_norm": 5.7985441197410825, - "learning_rate": 5.855855855855856e-07, - "loss": 1.7207, - "step": 65 - }, - { - "epoch": 0.008947332745882193, - "grad_norm": 5.172037609166836, - "learning_rate": 5.945945945945947e-07, - "loss": 1.6594, - "step": 66 - }, - { - "epoch": 0.009082898393547076, - "grad_norm": 4.789146941489647, - "learning_rate": 6.036036036036036e-07, - "loss": 1.6631, - "step": 67 - }, - { - "epoch": 0.009218464041211957, - "grad_norm": 4.888130228558099, - "learning_rate": 6.126126126126125e-07, - "loss": 1.6505, - "step": 68 - }, - { - "epoch": 0.009354029688876839, - "grad_norm": 4.72145895286354, - "learning_rate": 6.216216216216216e-07, - "loss": 1.6738, - "step": 69 - }, - { - "epoch": 0.00948959533654172, - "grad_norm": 4.688849568932902, - "learning_rate": 6.306306306306306e-07, - "loss": 1.5781, - "step": 70 - }, - { - "epoch": 0.009625160984206602, - "grad_norm": 4.472983807698425, - "learning_rate": 6.396396396396397e-07, - "loss": 1.6653, - "step": 71 - }, - { - "epoch": 0.009760726631871483, - "grad_norm": 6.282537547307923, - "learning_rate": 6.486486486486486e-07, - "loss": 1.6129, - "step": 72 - }, - { - "epoch": 0.009896292279536366, - "grad_norm": 4.4389597615669425, - "learning_rate": 6.576576576576577e-07, - "loss": 1.6464, - "step": 73 - }, - { - "epoch": 0.010031857927201248, - "grad_norm": 4.744669092593942, - "learning_rate": 6.666666666666666e-07, - "loss": 1.6015, - "step": 74 - }, - { - "epoch": 0.010167423574866129, - "grad_norm": 4.043033516989053, - "learning_rate": 6.756756756756756e-07, - "loss": 1.5977, - "step": 75 - }, - { - "epoch": 0.010302989222531011, - "grad_norm": 4.0901548745402065, - "learning_rate": 6.846846846846847e-07, - "loss": 1.6015, - "step": 76 - }, - { - "epoch": 0.010438554870195892, - "grad_norm": 3.5345655746592524, - "learning_rate": 6.936936936936936e-07, - "loss": 1.5632, - "step": 77 - }, - { - "epoch": 0.010574120517860774, - "grad_norm": 3.42165843123686, - "learning_rate": 7.027027027027027e-07, - "loss": 1.5857, - "step": 78 - }, - { - "epoch": 0.010709686165525655, - "grad_norm": 4.0097105004671185, - "learning_rate": 7.117117117117116e-07, - "loss": 1.5817, - "step": 79 - }, - { - "epoch": 0.010845251813190538, - "grad_norm": 3.62178830640266, - "learning_rate": 7.207207207207207e-07, - "loss": 1.5962, - "step": 80 - }, - { - "epoch": 0.010980817460855418, - "grad_norm": 2.9689517832347243, - "learning_rate": 7.297297297297297e-07, - "loss": 1.5797, - "step": 81 - }, - { - "epoch": 0.011116383108520301, - "grad_norm": 3.932773383636214, - "learning_rate": 7.387387387387387e-07, - "loss": 1.5595, - "step": 82 - }, - { - "epoch": 0.011251948756185183, - "grad_norm": 6.215147773809456, - "learning_rate": 7.477477477477477e-07, - "loss": 1.558, - "step": 83 - }, - { - "epoch": 0.011387514403850064, - "grad_norm": 2.5732430251458602, - "learning_rate": 7.567567567567568e-07, - "loss": 1.5785, - "step": 84 - }, - { - "epoch": 0.011523080051514947, - "grad_norm": 2.3541213515066692, - "learning_rate": 7.657657657657657e-07, - "loss": 1.5525, - "step": 85 - }, - { - "epoch": 0.011658645699179827, - "grad_norm": 2.511064144446062, - "learning_rate": 7.747747747747747e-07, - "loss": 1.5145, - "step": 86 - }, - { - "epoch": 0.01179421134684471, - "grad_norm": 2.5389146513400247, - "learning_rate": 7.837837837837838e-07, - "loss": 1.5299, - "step": 87 - }, - { - "epoch": 0.01192977699450959, - "grad_norm": 2.309950607482175, - "learning_rate": 7.927927927927927e-07, - "loss": 1.5318, - "step": 88 - }, - { - "epoch": 0.012065342642174473, - "grad_norm": 2.2782333246844497, - "learning_rate": 8.018018018018018e-07, - "loss": 1.4753, - "step": 89 - }, - { - "epoch": 0.012200908289839356, - "grad_norm": 2.2522351609780276, - "learning_rate": 8.108108108108108e-07, - "loss": 1.5165, - "step": 90 - }, - { - "epoch": 0.012336473937504236, - "grad_norm": 2.1489656127024834, - "learning_rate": 8.198198198198198e-07, - "loss": 1.5621, - "step": 91 - }, - { - "epoch": 0.012472039585169119, - "grad_norm": 2.0640732346322954, - "learning_rate": 8.288288288288288e-07, - "loss": 1.5032, - "step": 92 - }, - { - "epoch": 0.012607605232834, - "grad_norm": 2.1850771808883245, - "learning_rate": 8.378378378378377e-07, - "loss": 1.5248, - "step": 93 - }, - { - "epoch": 0.012743170880498882, - "grad_norm": 5.397629167169215, - "learning_rate": 8.468468468468468e-07, - "loss": 1.5472, - "step": 94 - }, - { - "epoch": 0.012878736528163763, - "grad_norm": 3.0343865733000936, - "learning_rate": 8.558558558558558e-07, - "loss": 1.5012, - "step": 95 - }, - { - "epoch": 0.013014302175828645, - "grad_norm": 2.0247553082514234, - "learning_rate": 8.648648648648649e-07, - "loss": 1.5016, - "step": 96 - }, - { - "epoch": 0.013149867823493526, - "grad_norm": 2.028684912356176, - "learning_rate": 8.738738738738738e-07, - "loss": 1.5198, - "step": 97 - }, - { - "epoch": 0.013285433471158408, - "grad_norm": 1.9936719747295053, - "learning_rate": 8.828828828828828e-07, - "loss": 1.4768, - "step": 98 - }, - { - "epoch": 0.013420999118823291, - "grad_norm": 2.394638054988374, - "learning_rate": 8.918918918918918e-07, - "loss": 1.4677, - "step": 99 - }, - { - "epoch": 0.013556564766488172, - "grad_norm": 1.8856183755568405, - "learning_rate": 9.009009009009009e-07, - "loss": 1.4968, - "step": 100 - }, - { - "epoch": 0.013692130414153054, - "grad_norm": 1.9825808581112305, - "learning_rate": 9.099099099099099e-07, - "loss": 1.5007, - "step": 101 - }, - { - "epoch": 0.013827696061817935, - "grad_norm": 1.7659364717549417, - "learning_rate": 9.18918918918919e-07, - "loss": 1.4824, - "step": 102 - }, - { - "epoch": 0.013963261709482817, - "grad_norm": 1.7762659690983733, - "learning_rate": 9.279279279279278e-07, - "loss": 1.5033, - "step": 103 - }, - { - "epoch": 0.014098827357147698, - "grad_norm": 2.1391833734896735, - "learning_rate": 9.369369369369368e-07, - "loss": 1.5005, - "step": 104 - }, - { - "epoch": 0.01423439300481258, - "grad_norm": 1.9011595037236182, - "learning_rate": 9.459459459459459e-07, - "loss": 1.5045, - "step": 105 - }, - { - "epoch": 0.014369958652477463, - "grad_norm": 2.2002247031894195, - "learning_rate": 9.54954954954955e-07, - "loss": 1.4792, - "step": 106 - }, - { - "epoch": 0.014505524300142344, - "grad_norm": 1.7448928450465162, - "learning_rate": 9.63963963963964e-07, - "loss": 1.4462, - "step": 107 - }, - { - "epoch": 0.014641089947807226, - "grad_norm": 1.7211374264964578, - "learning_rate": 9.72972972972973e-07, - "loss": 1.5107, - "step": 108 - }, - { - "epoch": 0.014776655595472107, - "grad_norm": 1.600752809444846, - "learning_rate": 9.819819819819819e-07, - "loss": 1.4589, - "step": 109 - }, - { - "epoch": 0.01491222124313699, - "grad_norm": 1.7932620551261558, - "learning_rate": 9.90990990990991e-07, - "loss": 1.456, - "step": 110 - }, - { - "epoch": 0.01504778689080187, - "grad_norm": 2.5563079053550974, - "learning_rate": 1e-06, - "loss": 1.5029, - "step": 111 - }, - { - "epoch": 0.015183352538466753, - "grad_norm": 1.651804197346688, - "learning_rate": 1.0090090090090088e-06, - "loss": 1.4614, - "step": 112 - }, - { - "epoch": 0.015318918186131633, - "grad_norm": 1.809665649726903, - "learning_rate": 1.018018018018018e-06, - "loss": 1.4983, - "step": 113 - }, - { - "epoch": 0.015454483833796516, - "grad_norm": 1.9704955423231216, - "learning_rate": 1.0270270270270269e-06, - "loss": 1.4995, - "step": 114 - }, - { - "epoch": 0.015590049481461398, - "grad_norm": 1.8564147008584495, - "learning_rate": 1.0360360360360361e-06, - "loss": 1.4982, - "step": 115 - }, - { - "epoch": 0.01572561512912628, - "grad_norm": 2.7752294926333874, - "learning_rate": 1.045045045045045e-06, - "loss": 1.5092, - "step": 116 - }, - { - "epoch": 0.01586118077679116, - "grad_norm": 1.9978296660606034, - "learning_rate": 1.0540540540540538e-06, - "loss": 1.5197, - "step": 117 - }, - { - "epoch": 0.015996746424456042, - "grad_norm": 2.292743255856348, - "learning_rate": 1.063063063063063e-06, - "loss": 1.4486, - "step": 118 - }, - { - "epoch": 0.016132312072120923, - "grad_norm": 1.6221312610569312, - "learning_rate": 1.072072072072072e-06, - "loss": 1.521, - "step": 119 - }, - { - "epoch": 0.016267877719785807, - "grad_norm": 1.5226308477955839, - "learning_rate": 1.0810810810810812e-06, - "loss": 1.4713, - "step": 120 - }, - { - "epoch": 0.016403443367450688, - "grad_norm": 1.4943840457168778, - "learning_rate": 1.09009009009009e-06, - "loss": 1.4652, - "step": 121 - }, - { - "epoch": 0.01653900901511557, - "grad_norm": 1.8456896776980867, - "learning_rate": 1.099099099099099e-06, - "loss": 1.4847, - "step": 122 - }, - { - "epoch": 0.016674574662780453, - "grad_norm": 2.2109612951023463, - "learning_rate": 1.108108108108108e-06, - "loss": 1.4555, - "step": 123 - }, - { - "epoch": 0.016810140310445334, - "grad_norm": 1.9060824289056564, - "learning_rate": 1.117117117117117e-06, - "loss": 1.4835, - "step": 124 - }, - { - "epoch": 0.016945705958110215, - "grad_norm": 2.17978891463711, - "learning_rate": 1.1261261261261262e-06, - "loss": 1.4566, - "step": 125 - }, - { - "epoch": 0.017081271605775095, - "grad_norm": 2.130487148499443, - "learning_rate": 1.135135135135135e-06, - "loss": 1.4639, - "step": 126 - }, - { - "epoch": 0.01721683725343998, - "grad_norm": 1.4411412214757613, - "learning_rate": 1.1441441441441443e-06, - "loss": 1.4661, - "step": 127 - }, - { - "epoch": 0.01735240290110486, - "grad_norm": 3.34110909004883, - "learning_rate": 1.1531531531531531e-06, - "loss": 1.4544, - "step": 128 - }, - { - "epoch": 0.01748796854876974, - "grad_norm": 1.6208206011256618, - "learning_rate": 1.162162162162162e-06, - "loss": 1.4568, - "step": 129 - }, - { - "epoch": 0.017623534196434622, - "grad_norm": 1.7308675609587647, - "learning_rate": 1.1711711711711712e-06, - "loss": 1.4409, - "step": 130 - }, - { - "epoch": 0.017759099844099506, - "grad_norm": 1.6152176317721023, - "learning_rate": 1.18018018018018e-06, - "loss": 1.4611, - "step": 131 - }, - { - "epoch": 0.017894665491764387, - "grad_norm": 2.293184739829164, - "learning_rate": 1.1891891891891893e-06, - "loss": 1.5031, - "step": 132 - }, - { - "epoch": 0.018030231139429267, - "grad_norm": 1.5892167186194808, - "learning_rate": 1.1981981981981981e-06, - "loss": 1.4471, - "step": 133 - }, - { - "epoch": 0.01816579678709415, - "grad_norm": 1.7844967448919111, - "learning_rate": 1.2072072072072072e-06, - "loss": 1.4468, - "step": 134 - }, - { - "epoch": 0.018301362434759032, - "grad_norm": 2.0389564578521457, - "learning_rate": 1.2162162162162162e-06, - "loss": 1.4848, - "step": 135 - }, - { - "epoch": 0.018436928082423913, - "grad_norm": 1.4783055906275804, - "learning_rate": 1.225225225225225e-06, - "loss": 1.4674, - "step": 136 - }, - { - "epoch": 0.018572493730088794, - "grad_norm": 2.4562318823134457, - "learning_rate": 1.2342342342342343e-06, - "loss": 1.4601, - "step": 137 - }, - { - "epoch": 0.018708059377753678, - "grad_norm": 1.6909573117382803, - "learning_rate": 1.2432432432432432e-06, - "loss": 1.4712, - "step": 138 - }, - { - "epoch": 0.01884362502541856, - "grad_norm": 2.939979936304758, - "learning_rate": 1.2522522522522522e-06, - "loss": 1.4358, - "step": 139 - }, - { - "epoch": 0.01897919067308344, - "grad_norm": 1.4102524226155284, - "learning_rate": 1.2612612612612613e-06, - "loss": 1.4583, - "step": 140 - }, - { - "epoch": 0.019114756320748324, - "grad_norm": 6.426404923995833, - "learning_rate": 1.27027027027027e-06, - "loss": 1.4173, - "step": 141 - }, - { - "epoch": 0.019250321968413205, - "grad_norm": 2.255748860786786, - "learning_rate": 1.2792792792792793e-06, - "loss": 1.4446, - "step": 142 - }, - { - "epoch": 0.019385887616078085, - "grad_norm": 1.699843312858549, - "learning_rate": 1.2882882882882882e-06, - "loss": 1.4728, - "step": 143 - }, - { - "epoch": 0.019521453263742966, - "grad_norm": 1.78156546308971, - "learning_rate": 1.2972972972972972e-06, - "loss": 1.4793, - "step": 144 - }, - { - "epoch": 0.01965701891140785, - "grad_norm": 1.5366396140321357, - "learning_rate": 1.3063063063063063e-06, - "loss": 1.4538, - "step": 145 - }, - { - "epoch": 0.01979258455907273, - "grad_norm": 1.547307226903954, - "learning_rate": 1.3153153153153153e-06, - "loss": 1.4461, - "step": 146 - }, - { - "epoch": 0.019928150206737612, - "grad_norm": 2.0633466637585194, - "learning_rate": 1.3243243243243244e-06, - "loss": 1.4722, - "step": 147 - }, - { - "epoch": 0.020063715854402496, - "grad_norm": 1.4619401177013418, - "learning_rate": 1.3333333333333332e-06, - "loss": 1.4309, - "step": 148 - }, - { - "epoch": 0.020199281502067377, - "grad_norm": 1.45083484339537, - "learning_rate": 1.3423423423423422e-06, - "loss": 1.4565, - "step": 149 - }, - { - "epoch": 0.020334847149732257, - "grad_norm": 2.5877409100427395, - "learning_rate": 1.3513513513513513e-06, - "loss": 1.4302, - "step": 150 - }, - { - "epoch": 0.020470412797397138, - "grad_norm": 1.508521639038679, - "learning_rate": 1.3603603603603603e-06, - "loss": 1.4859, - "step": 151 - }, - { - "epoch": 0.020605978445062022, - "grad_norm": 1.436472506897216, - "learning_rate": 1.3693693693693694e-06, - "loss": 1.4413, - "step": 152 - }, - { - "epoch": 0.020741544092726903, - "grad_norm": 2.4578930596905773, - "learning_rate": 1.3783783783783782e-06, - "loss": 1.4195, - "step": 153 - }, - { - "epoch": 0.020877109740391784, - "grad_norm": 2.0500962423136997, - "learning_rate": 1.3873873873873873e-06, - "loss": 1.4243, - "step": 154 - }, - { - "epoch": 0.021012675388056668, - "grad_norm": 1.5882460823207682, - "learning_rate": 1.3963963963963963e-06, - "loss": 1.4329, - "step": 155 - }, - { - "epoch": 0.02114824103572155, - "grad_norm": 3.1752762150764218, - "learning_rate": 1.4054054054054054e-06, - "loss": 1.4409, - "step": 156 - }, - { - "epoch": 0.02128380668338643, - "grad_norm": 1.6982173765105, - "learning_rate": 1.4144144144144144e-06, - "loss": 1.4204, - "step": 157 - }, - { - "epoch": 0.02141937233105131, - "grad_norm": 1.581035863249022, - "learning_rate": 1.4234234234234232e-06, - "loss": 1.4585, - "step": 158 - }, - { - "epoch": 0.021554937978716195, - "grad_norm": 1.93521312225197, - "learning_rate": 1.4324324324324323e-06, - "loss": 1.4704, - "step": 159 - }, - { - "epoch": 0.021690503626381075, - "grad_norm": 1.6249367150225102, - "learning_rate": 1.4414414414414413e-06, - "loss": 1.4134, - "step": 160 - }, - { - "epoch": 0.021826069274045956, - "grad_norm": 1.4811609234146867, - "learning_rate": 1.4504504504504504e-06, - "loss": 1.4293, - "step": 161 - }, - { - "epoch": 0.021961634921710837, - "grad_norm": 1.3878222282088852, - "learning_rate": 1.4594594594594594e-06, - "loss": 1.442, - "step": 162 - }, - { - "epoch": 0.02209720056937572, - "grad_norm": 1.5391308775646662, - "learning_rate": 1.4684684684684685e-06, - "loss": 1.4161, - "step": 163 - }, - { - "epoch": 0.022232766217040602, - "grad_norm": 1.9920836524815897, - "learning_rate": 1.4774774774774773e-06, - "loss": 1.4121, - "step": 164 - }, - { - "epoch": 0.022368331864705483, - "grad_norm": 1.4346200638714859, - "learning_rate": 1.4864864864864864e-06, - "loss": 1.4251, - "step": 165 - }, - { - "epoch": 0.022503897512370367, - "grad_norm": 2.3132809185165972, - "learning_rate": 1.4954954954954954e-06, - "loss": 1.4283, - "step": 166 - }, - { - "epoch": 0.022639463160035248, - "grad_norm": 1.41423836433183, - "learning_rate": 1.5045045045045045e-06, - "loss": 1.4433, - "step": 167 - }, - { - "epoch": 0.022775028807700128, - "grad_norm": 1.7796186206985323, - "learning_rate": 1.5135135135135135e-06, - "loss": 1.4494, - "step": 168 - }, - { - "epoch": 0.02291059445536501, - "grad_norm": 6.889500514542377, - "learning_rate": 1.5225225225225225e-06, - "loss": 1.4313, - "step": 169 - }, - { - "epoch": 0.023046160103029893, - "grad_norm": 4.9336646627198775, - "learning_rate": 1.5315315315315314e-06, - "loss": 1.468, - "step": 170 - }, - { - "epoch": 0.023181725750694774, - "grad_norm": 1.4030117249228922, - "learning_rate": 1.5405405405405404e-06, - "loss": 1.4206, - "step": 171 - }, - { - "epoch": 0.023317291398359655, - "grad_norm": 1.7007506196957312, - "learning_rate": 1.5495495495495495e-06, - "loss": 1.44, - "step": 172 - }, - { - "epoch": 0.02345285704602454, - "grad_norm": 1.8971210833189927, - "learning_rate": 1.5585585585585585e-06, - "loss": 1.4063, - "step": 173 - }, - { - "epoch": 0.02358842269368942, - "grad_norm": 1.302982687460494, - "learning_rate": 1.5675675675675676e-06, - "loss": 1.4093, - "step": 174 - }, - { - "epoch": 0.0237239883413543, - "grad_norm": 2.2037029089976627, - "learning_rate": 1.5765765765765766e-06, - "loss": 1.4092, - "step": 175 - }, - { - "epoch": 0.02385955398901918, - "grad_norm": 1.546495077213584, - "learning_rate": 1.5855855855855855e-06, - "loss": 1.4189, - "step": 176 - }, - { - "epoch": 0.023995119636684065, - "grad_norm": 1.3966886247323087, - "learning_rate": 1.5945945945945945e-06, - "loss": 1.4001, - "step": 177 - }, - { - "epoch": 0.024130685284348946, - "grad_norm": 1.8904525443350804, - "learning_rate": 1.6036036036036035e-06, - "loss": 1.4298, - "step": 178 - }, - { - "epoch": 0.024266250932013827, - "grad_norm": 1.9875049087964187, - "learning_rate": 1.6126126126126126e-06, - "loss": 1.4165, - "step": 179 - }, - { - "epoch": 0.02440181657967871, - "grad_norm": 1.8295107220300855, - "learning_rate": 1.6216216216216216e-06, - "loss": 1.413, - "step": 180 - }, - { - "epoch": 0.024537382227343592, - "grad_norm": 1.8721875972432258, - "learning_rate": 1.6306306306306305e-06, - "loss": 1.445, - "step": 181 - }, - { - "epoch": 0.024672947875008473, - "grad_norm": 1.6163221657653166, - "learning_rate": 1.6396396396396395e-06, - "loss": 1.4308, - "step": 182 - }, - { - "epoch": 0.024808513522673353, - "grad_norm": 1.682771990716447, - "learning_rate": 1.6486486486486486e-06, - "loss": 1.4708, - "step": 183 - }, - { - "epoch": 0.024944079170338238, - "grad_norm": 1.3278602918850941, - "learning_rate": 1.6576576576576576e-06, - "loss": 1.4075, - "step": 184 - }, - { - "epoch": 0.025079644818003118, - "grad_norm": 1.5370038639960675, - "learning_rate": 1.6666666666666667e-06, - "loss": 1.4227, - "step": 185 - }, - { - "epoch": 0.025215210465668, - "grad_norm": 1.3609780817540533, - "learning_rate": 1.6756756756756755e-06, - "loss": 1.396, - "step": 186 - }, - { - "epoch": 0.02535077611333288, - "grad_norm": 1.9616205364055233, - "learning_rate": 1.6846846846846845e-06, - "loss": 1.4416, - "step": 187 - }, - { - "epoch": 0.025486341760997764, - "grad_norm": 1.502754110534782, - "learning_rate": 1.6936936936936936e-06, - "loss": 1.4101, - "step": 188 - }, - { - "epoch": 0.025621907408662645, - "grad_norm": 1.4521395335603045, - "learning_rate": 1.7027027027027026e-06, - "loss": 1.4428, - "step": 189 - }, - { - "epoch": 0.025757473056327525, - "grad_norm": 3.7487179759648113, - "learning_rate": 1.7117117117117117e-06, - "loss": 1.3872, - "step": 190 - }, - { - "epoch": 0.02589303870399241, - "grad_norm": 1.4140542815630024, - "learning_rate": 1.7207207207207205e-06, - "loss": 1.3796, - "step": 191 - }, - { - "epoch": 0.02602860435165729, - "grad_norm": 2.8669528622986946, - "learning_rate": 1.7297297297297298e-06, - "loss": 1.4689, - "step": 192 - }, - { - "epoch": 0.02616416999932217, - "grad_norm": 1.4424153910055988, - "learning_rate": 1.7387387387387386e-06, - "loss": 1.4577, - "step": 193 - }, - { - "epoch": 0.026299735646987052, - "grad_norm": 1.9261284405368198, - "learning_rate": 1.7477477477477477e-06, - "loss": 1.438, - "step": 194 - }, - { - "epoch": 0.026435301294651936, - "grad_norm": 1.550197529171603, - "learning_rate": 1.7567567567567567e-06, - "loss": 1.4081, - "step": 195 - }, - { - "epoch": 0.026570866942316817, - "grad_norm": 1.5827461344328615, - "learning_rate": 1.7657657657657655e-06, - "loss": 1.3965, - "step": 196 - }, - { - "epoch": 0.026706432589981698, - "grad_norm": 1.6333341826873844, - "learning_rate": 1.7747747747747748e-06, - "loss": 1.3822, - "step": 197 - }, - { - "epoch": 0.026841998237646582, - "grad_norm": 1.47768039344046, - "learning_rate": 1.7837837837837836e-06, - "loss": 1.3954, - "step": 198 - }, - { - "epoch": 0.026977563885311463, - "grad_norm": 1.5898366360488652, - "learning_rate": 1.7927927927927927e-06, - "loss": 1.3794, - "step": 199 - }, - { - "epoch": 0.027113129532976343, - "grad_norm": 1.2938267773765495, - "learning_rate": 1.8018018018018017e-06, - "loss": 1.3826, - "step": 200 - }, - { - "epoch": 0.027248695180641224, - "grad_norm": 2.227679548552012, - "learning_rate": 1.8108108108108106e-06, - "loss": 1.4095, - "step": 201 - }, - { - "epoch": 0.02738426082830611, - "grad_norm": 2.4805075352385337, - "learning_rate": 1.8198198198198198e-06, - "loss": 1.4351, - "step": 202 - }, - { - "epoch": 0.02751982647597099, - "grad_norm": 1.7940482497185923, - "learning_rate": 1.8288288288288287e-06, - "loss": 1.4303, - "step": 203 - }, - { - "epoch": 0.02765539212363587, - "grad_norm": 1.457976892866785, - "learning_rate": 1.837837837837838e-06, - "loss": 1.4472, - "step": 204 - }, - { - "epoch": 0.027790957771300754, - "grad_norm": 2.53622589650347, - "learning_rate": 1.8468468468468467e-06, - "loss": 1.3951, - "step": 205 - }, - { - "epoch": 0.027926523418965635, - "grad_norm": 1.6195927538794186, - "learning_rate": 1.8558558558558556e-06, - "loss": 1.415, - "step": 206 - }, - { - "epoch": 0.028062089066630515, - "grad_norm": 2.1837454661422706, - "learning_rate": 1.8648648648648648e-06, - "loss": 1.4127, - "step": 207 - }, - { - "epoch": 0.028197654714295396, - "grad_norm": 1.4198349174104905, - "learning_rate": 1.8738738738738737e-06, - "loss": 1.3787, - "step": 208 - }, - { - "epoch": 0.02833322036196028, - "grad_norm": 1.4656777320207848, - "learning_rate": 1.882882882882883e-06, - "loss": 1.4205, - "step": 209 - }, - { - "epoch": 0.02846878600962516, - "grad_norm": 1.6785057466910236, - "learning_rate": 1.8918918918918918e-06, - "loss": 1.4213, - "step": 210 - }, - { - "epoch": 0.028604351657290042, - "grad_norm": 1.9029803580014049, - "learning_rate": 1.9009009009009008e-06, - "loss": 1.3933, - "step": 211 - }, - { - "epoch": 0.028739917304954926, - "grad_norm": 1.4514965193801417, - "learning_rate": 1.90990990990991e-06, - "loss": 1.398, - "step": 212 - }, - { - "epoch": 0.028875482952619807, - "grad_norm": 1.5073996378558847, - "learning_rate": 1.9189189189189187e-06, - "loss": 1.3896, - "step": 213 - }, - { - "epoch": 0.029011048600284688, - "grad_norm": 1.7748347060874479, - "learning_rate": 1.927927927927928e-06, - "loss": 1.4348, - "step": 214 - }, - { - "epoch": 0.02914661424794957, - "grad_norm": 1.8375574018886902, - "learning_rate": 1.936936936936937e-06, - "loss": 1.4113, - "step": 215 - }, - { - "epoch": 0.029282179895614453, - "grad_norm": 2.0217922617020148, - "learning_rate": 1.945945945945946e-06, - "loss": 1.4095, - "step": 216 - }, - { - "epoch": 0.029417745543279333, - "grad_norm": 2.5576946487773156, - "learning_rate": 1.954954954954955e-06, - "loss": 1.4181, - "step": 217 - }, - { - "epoch": 0.029553311190944214, - "grad_norm": 1.4934484346103987, - "learning_rate": 1.9639639639639637e-06, - "loss": 1.4385, - "step": 218 - }, - { - "epoch": 0.029688876838609095, - "grad_norm": 1.6591344813889835, - "learning_rate": 1.972972972972973e-06, - "loss": 1.3929, - "step": 219 - }, - { - "epoch": 0.02982444248627398, - "grad_norm": 2.267077270370139, - "learning_rate": 1.981981981981982e-06, - "loss": 1.3873, - "step": 220 - }, - { - "epoch": 0.02996000813393886, - "grad_norm": 1.6180749945734663, - "learning_rate": 1.990990990990991e-06, - "loss": 1.3392, - "step": 221 - }, - { - "epoch": 0.03009557378160374, - "grad_norm": 1.9129981942757963, - "learning_rate": 2e-06, - "loss": 1.401, - "step": 222 - }, - { - "epoch": 0.030231139429268625, - "grad_norm": 1.8423101243120148, - "learning_rate": 1.9999999035789467e-06, - "loss": 1.4093, - "step": 223 - }, - { - "epoch": 0.030366705076933505, - "grad_norm": 1.7923225098739226, - "learning_rate": 1.9999996143158056e-06, - "loss": 1.3785, - "step": 224 - }, - { - "epoch": 0.030502270724598386, - "grad_norm": 1.4152977108066773, - "learning_rate": 1.9999991322106323e-06, - "loss": 1.4059, - "step": 225 - }, - { - "epoch": 0.030637836372263267, - "grad_norm": 1.3968389316481733, - "learning_rate": 1.99999845726352e-06, - "loss": 1.3961, - "step": 226 - }, - { - "epoch": 0.03077340201992815, - "grad_norm": 3.053507055159351, - "learning_rate": 1.9999975894745984e-06, - "loss": 1.4086, - "step": 227 - }, - { - "epoch": 0.030908967667593032, - "grad_norm": 2.9751924850683524, - "learning_rate": 1.9999965288440357e-06, - "loss": 1.3839, - "step": 228 - }, - { - "epoch": 0.031044533315257913, - "grad_norm": 1.646364576594217, - "learning_rate": 1.9999952753720353e-06, - "loss": 1.3706, - "step": 229 - }, - { - "epoch": 0.031180098962922797, - "grad_norm": 3.230983478795083, - "learning_rate": 1.99999382905884e-06, - "loss": 1.3953, - "step": 230 - }, - { - "epoch": 0.03131566461058768, - "grad_norm": 2.26181431775139, - "learning_rate": 1.9999921899047284e-06, - "loss": 1.4046, - "step": 231 - }, - { - "epoch": 0.03145123025825256, - "grad_norm": 1.588638845962109, - "learning_rate": 1.999990357910016e-06, - "loss": 1.3482, - "step": 232 - }, - { - "epoch": 0.03158679590591744, - "grad_norm": 1.6597400418101047, - "learning_rate": 1.9999883330750567e-06, - "loss": 1.3582, - "step": 233 - }, - { - "epoch": 0.03172236155358232, - "grad_norm": 2.1747177060830114, - "learning_rate": 1.9999861154002405e-06, - "loss": 1.4298, - "step": 234 - }, - { - "epoch": 0.0318579272012472, - "grad_norm": 1.6836443961752021, - "learning_rate": 1.9999837048859957e-06, - "loss": 1.3691, - "step": 235 - }, - { - "epoch": 0.031993492848912085, - "grad_norm": 2.669920245046704, - "learning_rate": 1.999981101532787e-06, - "loss": 1.3863, - "step": 236 - }, - { - "epoch": 0.03212905849657697, - "grad_norm": 1.7847142852608644, - "learning_rate": 1.9999783053411157e-06, - "loss": 1.3718, - "step": 237 - }, - { - "epoch": 0.032264624144241846, - "grad_norm": 1.5100144572864929, - "learning_rate": 1.999975316311522e-06, - "loss": 1.3664, - "step": 238 - }, - { - "epoch": 0.03240018979190673, - "grad_norm": 2.045560083966139, - "learning_rate": 1.9999721344445816e-06, - "loss": 1.4312, - "step": 239 - }, - { - "epoch": 0.032535755439571615, - "grad_norm": 1.7578228212823335, - "learning_rate": 1.9999687597409084e-06, - "loss": 1.4197, - "step": 240 - }, - { - "epoch": 0.03267132108723649, - "grad_norm": 1.5150763427478837, - "learning_rate": 1.9999651922011532e-06, - "loss": 1.4094, - "step": 241 - }, - { - "epoch": 0.032806886734901376, - "grad_norm": 1.4228113562579867, - "learning_rate": 1.999961431826004e-06, - "loss": 1.3764, - "step": 242 - }, - { - "epoch": 0.03294245238256626, - "grad_norm": 1.9042803018030525, - "learning_rate": 1.999957478616186e-06, - "loss": 1.3931, - "step": 243 - }, - { - "epoch": 0.03307801803023114, - "grad_norm": 3.7439796611228138, - "learning_rate": 1.9999533325724613e-06, - "loss": 1.4239, - "step": 244 - }, - { - "epoch": 0.03321358367789602, - "grad_norm": 1.618571786703449, - "learning_rate": 1.9999489936956295e-06, - "loss": 1.3549, - "step": 245 - }, - { - "epoch": 0.033349149325560906, - "grad_norm": 2.1638352228442366, - "learning_rate": 1.9999444619865273e-06, - "loss": 1.382, - "step": 246 - }, - { - "epoch": 0.03348471497322578, - "grad_norm": 1.9762693184900288, - "learning_rate": 1.999939737446029e-06, - "loss": 1.3722, - "step": 247 - }, - { - "epoch": 0.03362028062089067, - "grad_norm": 1.5884978959141258, - "learning_rate": 1.999934820075045e-06, - "loss": 1.3963, - "step": 248 - }, - { - "epoch": 0.033755846268555545, - "grad_norm": 3.3597247998139865, - "learning_rate": 1.9999297098745245e-06, - "loss": 1.4029, - "step": 249 - }, - { - "epoch": 0.03389141191622043, - "grad_norm": 2.533795469663496, - "learning_rate": 1.999924406845452e-06, - "loss": 1.4026, - "step": 250 - }, - { - "epoch": 0.03402697756388531, - "grad_norm": 1.7138149535131035, - "learning_rate": 1.9999189109888503e-06, - "loss": 1.371, - "step": 251 - }, - { - "epoch": 0.03416254321155019, - "grad_norm": 3.5813752229584384, - "learning_rate": 1.9999132223057797e-06, - "loss": 1.4068, - "step": 252 - }, - { - "epoch": 0.034298108859215075, - "grad_norm": 3.025953083706524, - "learning_rate": 1.999907340797337e-06, - "loss": 1.3787, - "step": 253 - }, - { - "epoch": 0.03443367450687996, - "grad_norm": 1.786445380141776, - "learning_rate": 1.9999012664646567e-06, - "loss": 1.3976, - "step": 254 - }, - { - "epoch": 0.034569240154544836, - "grad_norm": 1.5656536352639006, - "learning_rate": 1.99989499930891e-06, - "loss": 1.3897, - "step": 255 - }, - { - "epoch": 0.03470480580220972, - "grad_norm": 1.3648540026649725, - "learning_rate": 1.999888539331305e-06, - "loss": 1.3393, - "step": 256 - }, - { - "epoch": 0.034840371449874605, - "grad_norm": 1.5203044333366489, - "learning_rate": 1.999881886533088e-06, - "loss": 1.4017, - "step": 257 - }, - { - "epoch": 0.03497593709753948, - "grad_norm": 1.6437939139098705, - "learning_rate": 1.9998750409155416e-06, - "loss": 1.3432, - "step": 258 - }, - { - "epoch": 0.035111502745204366, - "grad_norm": 1.3632232085799463, - "learning_rate": 1.999868002479986e-06, - "loss": 1.4045, - "step": 259 - }, - { - "epoch": 0.035247068392869244, - "grad_norm": 1.8013308999434519, - "learning_rate": 1.9998607712277792e-06, - "loss": 1.383, - "step": 260 - }, - { - "epoch": 0.03538263404053413, - "grad_norm": 2.1705263078904675, - "learning_rate": 1.9998533471603145e-06, - "loss": 1.3833, - "step": 261 - }, - { - "epoch": 0.03551819968819901, - "grad_norm": 3.5599028899477614, - "learning_rate": 1.9998457302790245e-06, - "loss": 1.3548, - "step": 262 - }, - { - "epoch": 0.03565376533586389, - "grad_norm": 1.476502682456324, - "learning_rate": 1.9998379205853775e-06, - "loss": 1.3657, - "step": 263 - }, - { - "epoch": 0.03578933098352877, - "grad_norm": 1.3611372130590276, - "learning_rate": 1.9998299180808796e-06, - "loss": 1.3753, - "step": 264 - }, - { - "epoch": 0.03592489663119366, - "grad_norm": 1.5840286110035533, - "learning_rate": 1.999821722767075e-06, - "loss": 1.3539, - "step": 265 - }, - { - "epoch": 0.036060462278858535, - "grad_norm": 4.3459998401920075, - "learning_rate": 1.9998133346455422e-06, - "loss": 1.3669, - "step": 266 - }, - { - "epoch": 0.03619602792652342, - "grad_norm": 2.765006940144401, - "learning_rate": 1.9998047537179007e-06, - "loss": 1.3655, - "step": 267 - }, - { - "epoch": 0.0363315935741883, - "grad_norm": 2.431275820592414, - "learning_rate": 1.999795979985804e-06, - "loss": 1.3602, - "step": 268 - }, - { - "epoch": 0.03646715922185318, - "grad_norm": 3.5342801795274776, - "learning_rate": 1.9997870134509444e-06, - "loss": 1.3687, - "step": 269 - }, - { - "epoch": 0.036602724869518065, - "grad_norm": 3.808465354088291, - "learning_rate": 1.9997778541150515e-06, - "loss": 1.3466, - "step": 270 - }, - { - "epoch": 0.03673829051718295, - "grad_norm": 1.4780015542853115, - "learning_rate": 1.9997685019798908e-06, - "loss": 1.349, - "step": 271 - }, - { - "epoch": 0.036873856164847826, - "grad_norm": 1.714870548940344, - "learning_rate": 1.999758957047266e-06, - "loss": 1.4021, - "step": 272 - }, - { - "epoch": 0.03700942181251271, - "grad_norm": 1.3230004777146438, - "learning_rate": 1.9997492193190185e-06, - "loss": 1.3568, - "step": 273 - }, - { - "epoch": 0.03714498746017759, - "grad_norm": 1.8110858636213139, - "learning_rate": 1.9997392887970253e-06, - "loss": 1.3444, - "step": 274 - }, - { - "epoch": 0.03728055310784247, - "grad_norm": 1.7348389529794637, - "learning_rate": 1.999729165483202e-06, - "loss": 1.3589, - "step": 275 - }, - { - "epoch": 0.037416118755507356, - "grad_norm": 4.58905979756494, - "learning_rate": 1.9997188493795e-06, - "loss": 1.3841, - "step": 276 - }, - { - "epoch": 0.037551684403172234, - "grad_norm": 1.6616078857912495, - "learning_rate": 1.99970834048791e-06, - "loss": 1.3707, - "step": 277 - }, - { - "epoch": 0.03768725005083712, - "grad_norm": 4.100661358329321, - "learning_rate": 1.999697638810457e-06, - "loss": 1.3559, - "step": 278 - }, - { - "epoch": 0.037822815698502, - "grad_norm": 1.6221211967660296, - "learning_rate": 1.9996867443492057e-06, - "loss": 1.4025, - "step": 279 - }, - { - "epoch": 0.03795838134616688, - "grad_norm": 1.8909396951712054, - "learning_rate": 1.999675657106257e-06, - "loss": 1.3776, - "step": 280 - }, - { - "epoch": 0.038093946993831763, - "grad_norm": 1.4818904244208466, - "learning_rate": 1.9996643770837486e-06, - "loss": 1.3619, - "step": 281 - }, - { - "epoch": 0.03822951264149665, - "grad_norm": 1.5812633425411216, - "learning_rate": 1.999652904283856e-06, - "loss": 1.382, - "step": 282 - }, - { - "epoch": 0.038365078289161525, - "grad_norm": 2.9052633029195594, - "learning_rate": 1.9996412387087914e-06, - "loss": 1.3617, - "step": 283 - }, - { - "epoch": 0.03850064393682641, - "grad_norm": 2.415691397097965, - "learning_rate": 1.9996293803608053e-06, - "loss": 1.3109, - "step": 284 - }, - { - "epoch": 0.038636209584491286, - "grad_norm": 4.4138928962698385, - "learning_rate": 1.9996173292421828e-06, - "loss": 1.362, - "step": 285 - }, - { - "epoch": 0.03877177523215617, - "grad_norm": 2.3766560639895897, - "learning_rate": 1.9996050853552494e-06, - "loss": 1.3952, - "step": 286 - }, - { - "epoch": 0.038907340879821055, - "grad_norm": 1.4418157325319962, - "learning_rate": 1.999592648702366e-06, - "loss": 1.3757, - "step": 287 - }, - { - "epoch": 0.03904290652748593, - "grad_norm": 4.055586109581618, - "learning_rate": 1.99958001928593e-06, - "loss": 1.4183, - "step": 288 - }, - { - "epoch": 0.039178472175150816, - "grad_norm": 1.5242713187387358, - "learning_rate": 1.9995671971083777e-06, - "loss": 1.361, - "step": 289 - }, - { - "epoch": 0.0393140378228157, - "grad_norm": 2.2334324103608356, - "learning_rate": 1.9995541821721814e-06, - "loss": 1.3576, - "step": 290 - }, - { - "epoch": 0.03944960347048058, - "grad_norm": 1.5343574253424124, - "learning_rate": 1.9995409744798512e-06, - "loss": 1.382, - "step": 291 - }, - { - "epoch": 0.03958516911814546, - "grad_norm": 1.583373180152813, - "learning_rate": 1.999527574033934e-06, - "loss": 1.3135, - "step": 292 - }, - { - "epoch": 0.039720734765810346, - "grad_norm": 2.352486658860294, - "learning_rate": 1.9995139808370142e-06, - "loss": 1.3639, - "step": 293 - }, - { - "epoch": 0.039856300413475224, - "grad_norm": 1.6484056620695784, - "learning_rate": 1.9995001948917124e-06, - "loss": 1.3332, - "step": 294 - }, - { - "epoch": 0.03999186606114011, - "grad_norm": 1.4791136008289367, - "learning_rate": 1.999486216200688e-06, - "loss": 1.4007, - "step": 295 - }, - { - "epoch": 0.04012743170880499, - "grad_norm": 2.9236723957249935, - "learning_rate": 1.999472044766636e-06, - "loss": 1.3845, - "step": 296 - }, - { - "epoch": 0.04026299735646987, - "grad_norm": 2.249130489607479, - "learning_rate": 1.9994576805922898e-06, - "loss": 1.3467, - "step": 297 - }, - { - "epoch": 0.040398563004134753, - "grad_norm": 2.1734040392364733, - "learning_rate": 1.9994431236804187e-06, - "loss": 1.3609, - "step": 298 - }, - { - "epoch": 0.04053412865179963, - "grad_norm": 1.6823636153893295, - "learning_rate": 1.9994283740338306e-06, - "loss": 1.3823, - "step": 299 - }, - { - "epoch": 0.040669694299464515, - "grad_norm": 1.5942223645940168, - "learning_rate": 1.9994134316553693e-06, - "loss": 1.3737, - "step": 300 - }, - { - "epoch": 0.0408052599471294, - "grad_norm": 3.934694898981306, - "learning_rate": 1.999398296547917e-06, - "loss": 1.343, - "step": 301 - }, - { - "epoch": 0.040940825594794276, - "grad_norm": 2.1365601314987357, - "learning_rate": 1.9993829687143913e-06, - "loss": 1.3778, - "step": 302 - }, - { - "epoch": 0.04107639124245916, - "grad_norm": 1.6688410529771212, - "learning_rate": 1.9993674481577497e-06, - "loss": 1.3612, - "step": 303 - }, - { - "epoch": 0.041211956890124045, - "grad_norm": 1.445156093247341, - "learning_rate": 1.9993517348809836e-06, - "loss": 1.3442, - "step": 304 - }, - { - "epoch": 0.04134752253778892, - "grad_norm": 1.6797246076382437, - "learning_rate": 1.999335828887124e-06, - "loss": 1.3551, - "step": 305 - }, - { - "epoch": 0.041483088185453806, - "grad_norm": 1.4807200739511652, - "learning_rate": 1.999319730179238e-06, - "loss": 1.3689, - "step": 306 - }, - { - "epoch": 0.04161865383311869, - "grad_norm": 1.5732891502193933, - "learning_rate": 1.9993034387604302e-06, - "loss": 1.3534, - "step": 307 - }, - { - "epoch": 0.04175421948078357, - "grad_norm": 1.7907093428905916, - "learning_rate": 1.9992869546338428e-06, - "loss": 1.3544, - "step": 308 - }, - { - "epoch": 0.04188978512844845, - "grad_norm": 1.4432705106405477, - "learning_rate": 1.9992702778026532e-06, - "loss": 1.3242, - "step": 309 - }, - { - "epoch": 0.042025350776113336, - "grad_norm": 1.507957929311929, - "learning_rate": 1.999253408270079e-06, - "loss": 1.3268, - "step": 310 - }, - { - "epoch": 0.042160916423778214, - "grad_norm": 2.021480862810751, - "learning_rate": 1.9992363460393724e-06, - "loss": 1.3381, - "step": 311 - }, - { - "epoch": 0.0422964820714431, - "grad_norm": 1.4655177625555775, - "learning_rate": 1.9992190911138236e-06, - "loss": 1.3423, - "step": 312 - }, - { - "epoch": 0.042432047719107975, - "grad_norm": 1.557338696652907, - "learning_rate": 1.999201643496761e-06, - "loss": 1.3508, - "step": 313 - }, - { - "epoch": 0.04256761336677286, - "grad_norm": 1.7631541953217642, - "learning_rate": 1.9991840031915484e-06, - "loss": 1.3467, - "step": 314 - }, - { - "epoch": 0.042703179014437743, - "grad_norm": 1.5028353366919411, - "learning_rate": 1.9991661702015877e-06, - "loss": 1.3539, - "step": 315 - }, - { - "epoch": 0.04283874466210262, - "grad_norm": 2.295092356146177, - "learning_rate": 1.9991481445303182e-06, - "loss": 1.3458, - "step": 316 - }, - { - "epoch": 0.042974310309767505, - "grad_norm": 2.790374286694081, - "learning_rate": 1.999129926181216e-06, - "loss": 1.335, - "step": 317 - }, - { - "epoch": 0.04310987595743239, - "grad_norm": 1.4576901644431888, - "learning_rate": 1.9991115151577938e-06, - "loss": 1.3345, - "step": 318 - }, - { - "epoch": 0.043245441605097266, - "grad_norm": 1.7461486806201925, - "learning_rate": 1.999092911463603e-06, - "loss": 1.3929, - "step": 319 - }, - { - "epoch": 0.04338100725276215, - "grad_norm": 2.069875655321941, - "learning_rate": 1.99907411510223e-06, - "loss": 1.3811, - "step": 320 - }, - { - "epoch": 0.043516572900427035, - "grad_norm": 1.527983376757812, - "learning_rate": 1.9990551260773003e-06, - "loss": 1.3192, - "step": 321 - }, - { - "epoch": 0.04365213854809191, - "grad_norm": 2.25858086886602, - "learning_rate": 1.9990359443924755e-06, - "loss": 1.3629, - "step": 322 - }, - { - "epoch": 0.043787704195756796, - "grad_norm": 1.4590870633625341, - "learning_rate": 1.999016570051455e-06, - "loss": 1.3548, - "step": 323 - }, - { - "epoch": 0.043923269843421674, - "grad_norm": 1.489840231507941, - "learning_rate": 1.9989970030579744e-06, - "loss": 1.342, - "step": 324 - }, - { - "epoch": 0.04405883549108656, - "grad_norm": 1.3602016998108024, - "learning_rate": 1.9989772434158076e-06, - "loss": 1.3622, - "step": 325 - }, - { - "epoch": 0.04419440113875144, - "grad_norm": 1.4431552442543385, - "learning_rate": 1.9989572911287647e-06, - "loss": 1.3421, - "step": 326 - }, - { - "epoch": 0.04432996678641632, - "grad_norm": 1.5287812055082524, - "learning_rate": 1.9989371462006938e-06, - "loss": 1.332, - "step": 327 - }, - { - "epoch": 0.044465532434081204, - "grad_norm": 1.548941937251561, - "learning_rate": 1.998916808635479e-06, - "loss": 1.3543, - "step": 328 - }, - { - "epoch": 0.04460109808174609, - "grad_norm": 1.4005227344051445, - "learning_rate": 1.998896278437043e-06, - "loss": 1.3504, - "step": 329 - }, - { - "epoch": 0.044736663729410965, - "grad_norm": 2.283482308203264, - "learning_rate": 1.998875555609344e-06, - "loss": 1.357, - "step": 330 - }, - { - "epoch": 0.04487222937707585, - "grad_norm": 1.4186019016749811, - "learning_rate": 1.998854640156379e-06, - "loss": 1.3469, - "step": 331 - }, - { - "epoch": 0.045007795024740734, - "grad_norm": 1.3390962680005782, - "learning_rate": 1.998833532082181e-06, - "loss": 1.2842, - "step": 332 - }, - { - "epoch": 0.04514336067240561, - "grad_norm": 5.860717030717646, - "learning_rate": 1.9988122313908212e-06, - "loss": 1.3753, - "step": 333 - }, - { - "epoch": 0.045278926320070495, - "grad_norm": 1.618106217979852, - "learning_rate": 1.998790738086406e-06, - "loss": 1.3889, - "step": 334 - }, - { - "epoch": 0.04541449196773538, - "grad_norm": 1.4616368136814442, - "learning_rate": 1.9987690521730817e-06, - "loss": 1.3626, - "step": 335 - }, - { - "epoch": 0.045550057615400256, - "grad_norm": 1.5439152338544893, - "learning_rate": 1.9987471736550287e-06, - "loss": 1.3815, - "step": 336 - }, - { - "epoch": 0.04568562326306514, - "grad_norm": 1.4904743508531704, - "learning_rate": 1.9987251025364677e-06, - "loss": 1.3926, - "step": 337 - }, - { - "epoch": 0.04582118891073002, - "grad_norm": 1.7104431384970558, - "learning_rate": 1.9987028388216532e-06, - "loss": 1.3498, - "step": 338 - }, - { - "epoch": 0.0459567545583949, - "grad_norm": 1.9871322000163192, - "learning_rate": 1.99868038251488e-06, - "loss": 1.3535, - "step": 339 - }, - { - "epoch": 0.046092320206059786, - "grad_norm": 2.2622135153707226, - "learning_rate": 1.9986577336204782e-06, - "loss": 1.3214, - "step": 340 - }, - { - "epoch": 0.046227885853724664, - "grad_norm": 1.8411111935251365, - "learning_rate": 1.9986348921428154e-06, - "loss": 1.3622, - "step": 341 - }, - { - "epoch": 0.04636345150138955, - "grad_norm": 5.494406066239442, - "learning_rate": 1.9986118580862964e-06, - "loss": 1.4114, - "step": 342 - }, - { - "epoch": 0.04649901714905443, - "grad_norm": 1.3489395394272685, - "learning_rate": 1.998588631455363e-06, - "loss": 1.3313, - "step": 343 - }, - { - "epoch": 0.04663458279671931, - "grad_norm": 1.9813144604074164, - "learning_rate": 1.9985652122544947e-06, - "loss": 1.309, - "step": 344 - }, - { - "epoch": 0.046770148444384194, - "grad_norm": 1.6903097159004108, - "learning_rate": 1.998541600488207e-06, - "loss": 1.311, - "step": 345 - }, - { - "epoch": 0.04690571409204908, - "grad_norm": 2.2276062021794227, - "learning_rate": 1.998517796161054e-06, - "loss": 1.3767, - "step": 346 - }, - { - "epoch": 0.047041279739713955, - "grad_norm": 1.471609708172449, - "learning_rate": 1.9984937992776257e-06, - "loss": 1.371, - "step": 347 - }, - { - "epoch": 0.04717684538737884, - "grad_norm": 1.5370347383457588, - "learning_rate": 1.99846960984255e-06, - "loss": 1.376, - "step": 348 - }, - { - "epoch": 0.04731241103504372, - "grad_norm": 1.5523030914392526, - "learning_rate": 1.9984452278604907e-06, - "loss": 1.3692, - "step": 349 - }, - { - "epoch": 0.0474479766827086, - "grad_norm": 1.4353164965009617, - "learning_rate": 1.998420653336151e-06, - "loss": 1.3321, - "step": 350 - }, - { - "epoch": 0.047583542330373485, - "grad_norm": 1.565217788762805, - "learning_rate": 1.99839588627427e-06, - "loss": 1.3287, - "step": 351 - }, - { - "epoch": 0.04771910797803836, - "grad_norm": 1.8615156984198393, - "learning_rate": 1.9983709266796224e-06, - "loss": 1.3005, - "step": 352 - }, - { - "epoch": 0.047854673625703247, - "grad_norm": 2.4369499137376955, - "learning_rate": 1.9983457745570222e-06, - "loss": 1.3404, - "step": 353 - }, - { - "epoch": 0.04799023927336813, - "grad_norm": 1.607346094921835, - "learning_rate": 1.99832042991132e-06, - "loss": 1.3405, - "step": 354 - }, - { - "epoch": 0.04812580492103301, - "grad_norm": 1.6184839158540065, - "learning_rate": 1.9982948927474033e-06, - "loss": 1.3596, - "step": 355 - }, - { - "epoch": 0.04826137056869789, - "grad_norm": 2.8590844541543716, - "learning_rate": 1.9982691630701966e-06, - "loss": 1.346, - "step": 356 - }, - { - "epoch": 0.048396936216362776, - "grad_norm": 1.776482412113547, - "learning_rate": 1.9982432408846615e-06, - "loss": 1.3614, - "step": 357 - }, - { - "epoch": 0.048532501864027654, - "grad_norm": 3.770129586164139, - "learning_rate": 1.998217126195797e-06, - "loss": 1.3537, - "step": 358 - }, - { - "epoch": 0.04866806751169254, - "grad_norm": 2.04428253071013, - "learning_rate": 1.9981908190086398e-06, - "loss": 1.3468, - "step": 359 - }, - { - "epoch": 0.04880363315935742, - "grad_norm": 1.5159915733939324, - "learning_rate": 1.9981643193282617e-06, - "loss": 1.3178, - "step": 360 - }, - { - "epoch": 0.0489391988070223, - "grad_norm": 1.7170315618627663, - "learning_rate": 1.9981376271597735e-06, - "loss": 1.3931, - "step": 361 - }, - { - "epoch": 0.049074764454687184, - "grad_norm": 1.6230027904156388, - "learning_rate": 1.9981107425083233e-06, - "loss": 1.3351, - "step": 362 - }, - { - "epoch": 0.04921033010235206, - "grad_norm": 11.869326817553834, - "learning_rate": 1.9980836653790946e-06, - "loss": 1.3617, - "step": 363 - }, - { - "epoch": 0.049345895750016945, - "grad_norm": 2.1147216152060295, - "learning_rate": 1.9980563957773097e-06, - "loss": 1.3339, - "step": 364 - }, - { - "epoch": 0.04948146139768183, - "grad_norm": 1.598455104492896, - "learning_rate": 1.998028933708227e-06, - "loss": 1.3199, - "step": 365 - }, - { - "epoch": 0.04961702704534671, - "grad_norm": 1.7633906596270812, - "learning_rate": 1.9980012791771424e-06, - "loss": 1.3732, - "step": 366 - }, - { - "epoch": 0.04975259269301159, - "grad_norm": 1.8207984269053112, - "learning_rate": 1.9979734321893885e-06, - "loss": 1.3153, - "step": 367 - }, - { - "epoch": 0.049888158340676475, - "grad_norm": 2.1495108479927683, - "learning_rate": 1.9979453927503364e-06, - "loss": 1.3655, - "step": 368 - }, - { - "epoch": 0.05002372398834135, - "grad_norm": 1.596046919186072, - "learning_rate": 1.9979171608653923e-06, - "loss": 1.3301, - "step": 369 - }, - { - "epoch": 0.050159289636006237, - "grad_norm": 1.795715776451096, - "learning_rate": 1.9978887365400006e-06, - "loss": 1.3384, - "step": 370 - }, - { - "epoch": 0.05029485528367112, - "grad_norm": 3.5816793761628496, - "learning_rate": 1.997860119779643e-06, - "loss": 1.331, - "step": 371 - }, - { - "epoch": 0.050430420931336, - "grad_norm": 1.734198087439023, - "learning_rate": 1.9978313105898378e-06, - "loss": 1.3396, - "step": 372 - }, - { - "epoch": 0.05056598657900088, - "grad_norm": 1.5274546220495695, - "learning_rate": 1.997802308976141e-06, - "loss": 1.3723, - "step": 373 - }, - { - "epoch": 0.05070155222666576, - "grad_norm": 2.188705127964835, - "learning_rate": 1.997773114944145e-06, - "loss": 1.3462, - "step": 374 - }, - { - "epoch": 0.050837117874330644, - "grad_norm": 2.0257058017226215, - "learning_rate": 1.99774372849948e-06, - "loss": 1.2909, - "step": 375 - }, - { - "epoch": 0.05097268352199553, - "grad_norm": 1.9760634049413812, - "learning_rate": 1.9977141496478124e-06, - "loss": 1.3347, - "step": 376 - }, - { - "epoch": 0.051108249169660405, - "grad_norm": 4.1368254722518945, - "learning_rate": 1.9976843783948463e-06, - "loss": 1.372, - "step": 377 - }, - { - "epoch": 0.05124381481732529, - "grad_norm": 1.8292719934168464, - "learning_rate": 1.9976544147463237e-06, - "loss": 1.327, - "step": 378 - }, - { - "epoch": 0.051379380464990174, - "grad_norm": 4.208575732877597, - "learning_rate": 1.9976242587080216e-06, - "loss": 1.366, - "step": 379 - }, - { - "epoch": 0.05151494611265505, - "grad_norm": 3.742350703435567, - "learning_rate": 1.997593910285756e-06, - "loss": 1.3197, - "step": 380 - }, - { - "epoch": 0.051650511760319935, - "grad_norm": 1.4592105287386117, - "learning_rate": 1.9975633694853797e-06, - "loss": 1.3536, - "step": 381 - }, - { - "epoch": 0.05178607740798482, - "grad_norm": 1.6300626078368101, - "learning_rate": 1.9975326363127815e-06, - "loss": 1.334, - "step": 382 - }, - { - "epoch": 0.0519216430556497, - "grad_norm": 1.9820916323856055, - "learning_rate": 1.9975017107738887e-06, - "loss": 1.3494, - "step": 383 - }, - { - "epoch": 0.05205720870331458, - "grad_norm": 4.4990301636312955, - "learning_rate": 1.997470592874665e-06, - "loss": 1.3449, - "step": 384 - }, - { - "epoch": 0.052192774350979465, - "grad_norm": 1.5630700204309569, - "learning_rate": 1.9974392826211107e-06, - "loss": 1.377, - "step": 385 - }, - { - "epoch": 0.05232833999864434, - "grad_norm": 1.6289114310746307, - "learning_rate": 1.997407780019264e-06, - "loss": 1.3061, - "step": 386 - }, - { - "epoch": 0.05246390564630923, - "grad_norm": 2.086155189139043, - "learning_rate": 1.9973760850752e-06, - "loss": 1.3142, - "step": 387 - }, - { - "epoch": 0.052599471293974104, - "grad_norm": 1.6497620612641346, - "learning_rate": 1.997344197795031e-06, - "loss": 1.3621, - "step": 388 - }, - { - "epoch": 0.05273503694163899, - "grad_norm": 4.02957297002193, - "learning_rate": 1.9973121181849056e-06, - "loss": 1.3509, - "step": 389 - }, - { - "epoch": 0.05287060258930387, - "grad_norm": 1.6178439486427196, - "learning_rate": 1.997279846251011e-06, - "loss": 1.3368, - "step": 390 - }, - { - "epoch": 0.05300616823696875, - "grad_norm": 1.4741117061716669, - "learning_rate": 1.99724738199957e-06, - "loss": 1.3484, - "step": 391 - }, - { - "epoch": 0.053141733884633634, - "grad_norm": 3.249748446659568, - "learning_rate": 1.997214725436843e-06, - "loss": 1.362, - "step": 392 - }, - { - "epoch": 0.05327729953229852, - "grad_norm": 3.666770151554077, - "learning_rate": 1.997181876569128e-06, - "loss": 1.3191, - "step": 393 - }, - { - "epoch": 0.053412865179963395, - "grad_norm": 2.0381016666494327, - "learning_rate": 1.9971488354027592e-06, - "loss": 1.3278, - "step": 394 - }, - { - "epoch": 0.05354843082762828, - "grad_norm": 4.632742415684971, - "learning_rate": 1.997115601944108e-06, - "loss": 1.2984, - "step": 395 - }, - { - "epoch": 0.053683996475293164, - "grad_norm": 1.6658717188127294, - "learning_rate": 1.9970821761995843e-06, - "loss": 1.3036, - "step": 396 - }, - { - "epoch": 0.05381956212295804, - "grad_norm": 3.0114119128877137, - "learning_rate": 1.9970485581756334e-06, - "loss": 1.3339, - "step": 397 - }, - { - "epoch": 0.053955127770622925, - "grad_norm": 1.3842415618330917, - "learning_rate": 1.997014747878738e-06, - "loss": 1.3188, - "step": 398 - }, - { - "epoch": 0.0540906934182878, - "grad_norm": 1.4315856550353896, - "learning_rate": 1.996980745315419e-06, - "loss": 1.3469, - "step": 399 - }, - { - "epoch": 0.05422625906595269, - "grad_norm": 1.6406857491847975, - "learning_rate": 1.9969465504922324e-06, - "loss": 1.3675, - "step": 400 - }, - { - "epoch": 0.05436182471361757, - "grad_norm": 2.016661666282183, - "learning_rate": 1.9969121634157734e-06, - "loss": 1.3483, - "step": 401 - }, - { - "epoch": 0.05449739036128245, - "grad_norm": 1.6753470392988217, - "learning_rate": 1.9968775840926725e-06, - "loss": 1.3324, - "step": 402 - }, - { - "epoch": 0.05463295600894733, - "grad_norm": 1.5111473604936305, - "learning_rate": 1.996842812529598e-06, - "loss": 1.2842, - "step": 403 - }, - { - "epoch": 0.05476852165661222, - "grad_norm": 1.6828378650794957, - "learning_rate": 1.9968078487332563e-06, - "loss": 1.3304, - "step": 404 - }, - { - "epoch": 0.054904087304277094, - "grad_norm": 2.168934223045547, - "learning_rate": 1.9967726927103893e-06, - "loss": 1.3165, - "step": 405 - }, - { - "epoch": 0.05503965295194198, - "grad_norm": 1.8997720956075808, - "learning_rate": 1.9967373444677763e-06, - "loss": 1.2941, - "step": 406 - }, - { - "epoch": 0.05517521859960686, - "grad_norm": 2.7517396334209123, - "learning_rate": 1.996701804012234e-06, - "loss": 1.3277, - "step": 407 - }, - { - "epoch": 0.05531078424727174, - "grad_norm": 2.2067764042230524, - "learning_rate": 1.9966660713506167e-06, - "loss": 1.3626, - "step": 408 - }, - { - "epoch": 0.055446349894936624, - "grad_norm": 1.7041715825335415, - "learning_rate": 1.996630146489815e-06, - "loss": 1.3254, - "step": 409 - }, - { - "epoch": 0.05558191554260151, - "grad_norm": 1.5771474992965888, - "learning_rate": 1.996594029436756e-06, - "loss": 1.3367, - "step": 410 - }, - { - "epoch": 0.055717481190266385, - "grad_norm": 5.784844170210368, - "learning_rate": 1.9965577201984048e-06, - "loss": 1.2865, - "step": 411 - }, - { - "epoch": 0.05585304683793127, - "grad_norm": 1.9801906393826596, - "learning_rate": 1.9965212187817644e-06, - "loss": 1.3325, - "step": 412 - }, - { - "epoch": 0.05598861248559615, - "grad_norm": 1.9624822261665378, - "learning_rate": 1.9964845251938722e-06, - "loss": 1.3566, - "step": 413 - }, - { - "epoch": 0.05612417813326103, - "grad_norm": 2.2097174616707704, - "learning_rate": 1.9964476394418054e-06, - "loss": 1.3153, - "step": 414 - }, - { - "epoch": 0.056259743780925915, - "grad_norm": 1.4470693231991125, - "learning_rate": 1.996410561532677e-06, - "loss": 1.3222, - "step": 415 - }, - { - "epoch": 0.05639530942859079, - "grad_norm": 1.5564458343382892, - "learning_rate": 1.996373291473637e-06, - "loss": 1.3184, - "step": 416 - }, - { - "epoch": 0.05653087507625568, - "grad_norm": 1.6882197797594178, - "learning_rate": 1.9963358292718723e-06, - "loss": 1.3763, - "step": 417 - }, - { - "epoch": 0.05666644072392056, - "grad_norm": 1.565012291020919, - "learning_rate": 1.996298174934608e-06, - "loss": 1.3176, - "step": 418 - }, - { - "epoch": 0.05680200637158544, - "grad_norm": 1.6013360211328695, - "learning_rate": 1.996260328469104e-06, - "loss": 1.3158, - "step": 419 - }, - { - "epoch": 0.05693757201925032, - "grad_norm": 2.513638370491214, - "learning_rate": 1.9962222898826608e-06, - "loss": 1.3361, - "step": 420 - }, - { - "epoch": 0.05707313766691521, - "grad_norm": 2.424746708148747, - "learning_rate": 1.996184059182612e-06, - "loss": 1.3288, - "step": 421 - }, - { - "epoch": 0.057208703314580084, - "grad_norm": 1.8247804104149357, - "learning_rate": 1.996145636376331e-06, - "loss": 1.349, - "step": 422 - }, - { - "epoch": 0.05734426896224497, - "grad_norm": 1.9809696762546272, - "learning_rate": 1.996107021471227e-06, - "loss": 1.3488, - "step": 423 - }, - { - "epoch": 0.05747983460990985, - "grad_norm": 1.5862141344091834, - "learning_rate": 1.996068214474747e-06, - "loss": 1.3386, - "step": 424 - }, - { - "epoch": 0.05761540025757473, - "grad_norm": 1.6883426978358795, - "learning_rate": 1.996029215394374e-06, - "loss": 1.3168, - "step": 425 - }, - { - "epoch": 0.057750965905239614, - "grad_norm": 1.5589826421852886, - "learning_rate": 1.9959900242376294e-06, - "loss": 1.2977, - "step": 426 - }, - { - "epoch": 0.05788653155290449, - "grad_norm": 1.711752196616678, - "learning_rate": 1.9959506410120702e-06, - "loss": 1.3076, - "step": 427 - }, - { - "epoch": 0.058022097200569375, - "grad_norm": 1.4669977289469502, - "learning_rate": 1.9959110657252915e-06, - "loss": 1.327, - "step": 428 - }, - { - "epoch": 0.05815766284823426, - "grad_norm": 1.44419022891671, - "learning_rate": 1.995871298384925e-06, - "loss": 1.3304, - "step": 429 - }, - { - "epoch": 0.05829322849589914, - "grad_norm": 4.917571214962596, - "learning_rate": 1.9958313389986395e-06, - "loss": 1.3045, - "step": 430 - }, - { - "epoch": 0.05842879414356402, - "grad_norm": 1.6250185548028586, - "learning_rate": 1.995791187574141e-06, - "loss": 1.3262, - "step": 431 - }, - { - "epoch": 0.058564359791228905, - "grad_norm": 1.433707422981661, - "learning_rate": 1.995750844119172e-06, - "loss": 1.3117, - "step": 432 - }, - { - "epoch": 0.05869992543889378, - "grad_norm": 1.5697212323629302, - "learning_rate": 1.995710308641513e-06, - "loss": 1.3278, - "step": 433 - }, - { - "epoch": 0.05883549108655867, - "grad_norm": 2.2646359788149573, - "learning_rate": 1.9956695811489803e-06, - "loss": 1.3089, - "step": 434 - }, - { - "epoch": 0.05897105673422355, - "grad_norm": 5.023230639955923, - "learning_rate": 1.9956286616494287e-06, - "loss": 1.2927, - "step": 435 - }, - { - "epoch": 0.05910662238188843, - "grad_norm": 1.5985371999441, - "learning_rate": 1.9955875501507485e-06, - "loss": 1.3176, - "step": 436 - }, - { - "epoch": 0.05924218802955331, - "grad_norm": 1.4716544494839214, - "learning_rate": 1.995546246660868e-06, - "loss": 1.288, - "step": 437 - }, - { - "epoch": 0.05937775367721819, - "grad_norm": 2.164608683204455, - "learning_rate": 1.995504751187752e-06, - "loss": 1.3516, - "step": 438 - }, - { - "epoch": 0.059513319324883074, - "grad_norm": 1.64331861990943, - "learning_rate": 1.9954630637394027e-06, - "loss": 1.2823, - "step": 439 - }, - { - "epoch": 0.05964888497254796, - "grad_norm": 1.4723217058941405, - "learning_rate": 1.9954211843238594e-06, - "loss": 1.3084, - "step": 440 - }, - { - "epoch": 0.059784450620212835, - "grad_norm": 1.6709222848785577, - "learning_rate": 1.9953791129491983e-06, - "loss": 1.3354, - "step": 441 - }, - { - "epoch": 0.05992001626787772, - "grad_norm": 1.556185035062882, - "learning_rate": 1.995336849623532e-06, - "loss": 1.347, - "step": 442 - }, - { - "epoch": 0.060055581915542604, - "grad_norm": 2.203247919926396, - "learning_rate": 1.995294394355011e-06, - "loss": 1.2845, - "step": 443 - }, - { - "epoch": 0.06019114756320748, - "grad_norm": 1.524745899137362, - "learning_rate": 1.9952517471518228e-06, - "loss": 1.3146, - "step": 444 - }, - { - "epoch": 0.060326713210872365, - "grad_norm": 1.8508110768572208, - "learning_rate": 1.9952089080221907e-06, - "loss": 1.2908, - "step": 445 - }, - { - "epoch": 0.06046227885853725, - "grad_norm": 1.7391566072419, - "learning_rate": 1.9951658769743766e-06, - "loss": 1.3391, - "step": 446 - }, - { - "epoch": 0.06059784450620213, - "grad_norm": 1.8279658169804338, - "learning_rate": 1.9951226540166785e-06, - "loss": 1.314, - "step": 447 - }, - { - "epoch": 0.06073341015386701, - "grad_norm": 1.4676813551434693, - "learning_rate": 1.9950792391574316e-06, - "loss": 1.3272, - "step": 448 - }, - { - "epoch": 0.060868975801531895, - "grad_norm": 1.6382691829959917, - "learning_rate": 1.995035632405008e-06, - "loss": 1.3357, - "step": 449 - }, - { - "epoch": 0.06100454144919677, - "grad_norm": 1.8445399743875985, - "learning_rate": 1.994991833767817e-06, - "loss": 1.3177, - "step": 450 - }, - { - "epoch": 0.06114010709686166, - "grad_norm": 1.9515047605021112, - "learning_rate": 1.994947843254305e-06, - "loss": 1.3173, - "step": 451 - }, - { - "epoch": 0.061275672744526534, - "grad_norm": 2.2603209811392904, - "learning_rate": 1.994903660872955e-06, - "loss": 1.3073, - "step": 452 - }, - { - "epoch": 0.06141123839219142, - "grad_norm": 1.419849389488627, - "learning_rate": 1.9948592866322873e-06, - "loss": 1.2722, - "step": 453 - }, - { - "epoch": 0.0615468040398563, - "grad_norm": 1.9003273639738958, - "learning_rate": 1.9948147205408593e-06, - "loss": 1.3486, - "step": 454 - }, - { - "epoch": 0.06168236968752118, - "grad_norm": 1.8132409892624903, - "learning_rate": 1.9947699626072646e-06, - "loss": 1.3251, - "step": 455 - }, - { - "epoch": 0.061817935335186064, - "grad_norm": 1.4649313077167263, - "learning_rate": 1.9947250128401354e-06, - "loss": 1.2827, - "step": 456 - }, - { - "epoch": 0.06195350098285095, - "grad_norm": 2.001177415425561, - "learning_rate": 1.994679871248139e-06, - "loss": 1.3168, - "step": 457 - }, - { - "epoch": 0.062089066630515825, - "grad_norm": 1.8607492276313142, - "learning_rate": 1.9946345378399807e-06, - "loss": 1.3489, - "step": 458 - }, - { - "epoch": 0.06222463227818071, - "grad_norm": 1.7635360590414757, - "learning_rate": 1.9945890126244038e-06, - "loss": 1.3577, - "step": 459 - }, - { - "epoch": 0.062360197925845594, - "grad_norm": 1.8454874815312912, - "learning_rate": 1.9945432956101858e-06, - "loss": 1.3115, - "step": 460 - }, - { - "epoch": 0.06249576357351047, - "grad_norm": 1.6026541863132866, - "learning_rate": 1.994497386806144e-06, - "loss": 1.3157, - "step": 461 - }, - { - "epoch": 0.06263132922117536, - "grad_norm": 2.6847942475959767, - "learning_rate": 1.9944512862211313e-06, - "loss": 1.2664, - "step": 462 - }, - { - "epoch": 0.06276689486884024, - "grad_norm": 1.9449916937171503, - "learning_rate": 1.9944049938640377e-06, - "loss": 1.2971, - "step": 463 - }, - { - "epoch": 0.06290246051650512, - "grad_norm": 1.4022842639250457, - "learning_rate": 1.9943585097437903e-06, - "loss": 1.3126, - "step": 464 - }, - { - "epoch": 0.06303802616417, - "grad_norm": 1.926694244568102, - "learning_rate": 1.9943118338693533e-06, - "loss": 1.2791, - "step": 465 - }, - { - "epoch": 0.06317359181183488, - "grad_norm": 1.9104559946057846, - "learning_rate": 1.994264966249728e-06, - "loss": 1.3286, - "step": 466 - }, - { - "epoch": 0.06330915745949976, - "grad_norm": 2.762839817865476, - "learning_rate": 1.9942179068939516e-06, - "loss": 1.3058, - "step": 467 - }, - { - "epoch": 0.06344472310716465, - "grad_norm": 1.4725354866187492, - "learning_rate": 1.9941706558111004e-06, - "loss": 1.3204, - "step": 468 - }, - { - "epoch": 0.06358028875482953, - "grad_norm": 1.7162197224725495, - "learning_rate": 1.9941232130102854e-06, - "loss": 1.3061, - "step": 469 - }, - { - "epoch": 0.0637158544024944, - "grad_norm": 1.6356961100078748, - "learning_rate": 1.9940755785006564e-06, - "loss": 1.3048, - "step": 470 - }, - { - "epoch": 0.06385142005015929, - "grad_norm": 2.883742681067345, - "learning_rate": 1.994027752291398e-06, - "loss": 1.3028, - "step": 471 - }, - { - "epoch": 0.06398698569782417, - "grad_norm": 2.8245688472067143, - "learning_rate": 1.9939797343917344e-06, - "loss": 1.3033, - "step": 472 - }, - { - "epoch": 0.06412255134548905, - "grad_norm": 1.530754263008746, - "learning_rate": 1.9939315248109253e-06, - "loss": 1.3265, - "step": 473 - }, - { - "epoch": 0.06425811699315394, - "grad_norm": 1.6941335404899922, - "learning_rate": 1.993883123558267e-06, - "loss": 1.3044, - "step": 474 - }, - { - "epoch": 0.06439368264081882, - "grad_norm": 1.9094991559219776, - "learning_rate": 1.9938345306430936e-06, - "loss": 1.2954, - "step": 475 - }, - { - "epoch": 0.06452924828848369, - "grad_norm": 2.070619884497355, - "learning_rate": 1.9937857460747757e-06, - "loss": 1.3231, - "step": 476 - }, - { - "epoch": 0.06466481393614858, - "grad_norm": 1.6996400118328405, - "learning_rate": 1.9937367698627208e-06, - "loss": 1.3233, - "step": 477 - }, - { - "epoch": 0.06480037958381346, - "grad_norm": 1.5968529957784185, - "learning_rate": 1.9936876020163746e-06, - "loss": 1.3158, - "step": 478 - }, - { - "epoch": 0.06493594523147835, - "grad_norm": 1.5585931057617113, - "learning_rate": 1.9936382425452176e-06, - "loss": 1.3498, - "step": 479 - }, - { - "epoch": 0.06507151087914323, - "grad_norm": 1.6816609897079249, - "learning_rate": 1.993588691458769e-06, - "loss": 1.3026, - "step": 480 - }, - { - "epoch": 0.06520707652680811, - "grad_norm": 1.6979704883039763, - "learning_rate": 1.993538948766584e-06, - "loss": 1.323, - "step": 481 - }, - { - "epoch": 0.06534264217447298, - "grad_norm": 1.4992519989584911, - "learning_rate": 1.9934890144782558e-06, - "loss": 1.3057, - "step": 482 - }, - { - "epoch": 0.06547820782213787, - "grad_norm": 1.8104416207849634, - "learning_rate": 1.9934388886034126e-06, - "loss": 1.2636, - "step": 483 - }, - { - "epoch": 0.06561377346980275, - "grad_norm": 1.7547422058059843, - "learning_rate": 1.993388571151722e-06, - "loss": 1.3164, - "step": 484 - }, - { - "epoch": 0.06574933911746764, - "grad_norm": 2.0678389452371, - "learning_rate": 1.993338062132886e-06, - "loss": 1.3669, - "step": 485 - }, - { - "epoch": 0.06588490476513252, - "grad_norm": 1.9701122273144858, - "learning_rate": 1.993287361556646e-06, - "loss": 1.3276, - "step": 486 - }, - { - "epoch": 0.06602047041279739, - "grad_norm": 1.9412510091233752, - "learning_rate": 1.9932364694327795e-06, - "loss": 1.297, - "step": 487 - }, - { - "epoch": 0.06615603606046228, - "grad_norm": 3.310422642067009, - "learning_rate": 1.9931853857710995e-06, - "loss": 1.3203, - "step": 488 - }, - { - "epoch": 0.06629160170812716, - "grad_norm": 1.7928286895455505, - "learning_rate": 1.9931341105814575e-06, - "loss": 1.3354, - "step": 489 - }, - { - "epoch": 0.06642716735579204, - "grad_norm": 1.519300892551403, - "learning_rate": 1.993082643873742e-06, - "loss": 1.349, - "step": 490 - }, - { - "epoch": 0.06656273300345693, - "grad_norm": 5.584329426558805, - "learning_rate": 1.9930309856578772e-06, - "loss": 1.2887, - "step": 491 - }, - { - "epoch": 0.06669829865112181, - "grad_norm": 1.62419084173422, - "learning_rate": 1.992979135943825e-06, - "loss": 1.3043, - "step": 492 - }, - { - "epoch": 0.06683386429878668, - "grad_norm": 1.6850969082886642, - "learning_rate": 1.9929270947415852e-06, - "loss": 1.296, - "step": 493 - }, - { - "epoch": 0.06696942994645157, - "grad_norm": 5.404630141479217, - "learning_rate": 1.9928748620611927e-06, - "loss": 1.3213, - "step": 494 - }, - { - "epoch": 0.06710499559411645, - "grad_norm": 1.7852175932919099, - "learning_rate": 1.99282243791272e-06, - "loss": 1.3772, - "step": 495 - }, - { - "epoch": 0.06724056124178134, - "grad_norm": 2.4986628623650278, - "learning_rate": 1.992769822306277e-06, - "loss": 1.3403, - "step": 496 - }, - { - "epoch": 0.06737612688944622, - "grad_norm": 1.4346852549025286, - "learning_rate": 1.992717015252011e-06, - "loss": 1.3058, - "step": 497 - }, - { - "epoch": 0.06751169253711109, - "grad_norm": 2.488055620019409, - "learning_rate": 1.992664016760104e-06, - "loss": 1.3185, - "step": 498 - }, - { - "epoch": 0.06764725818477597, - "grad_norm": 2.8565567213505023, - "learning_rate": 1.992610826840777e-06, - "loss": 1.2932, - "step": 499 - }, - { - "epoch": 0.06778282383244086, - "grad_norm": 1.6128376424556348, - "learning_rate": 1.9925574455042873e-06, - "loss": 1.3464, - "step": 500 - }, - { - "epoch": 0.06791838948010574, - "grad_norm": 1.5802302125869778, - "learning_rate": 1.9925038727609287e-06, - "loss": 1.2843, - "step": 501 - }, - { - "epoch": 0.06805395512777063, - "grad_norm": 1.933618973642034, - "learning_rate": 1.9924501086210334e-06, - "loss": 1.341, - "step": 502 - }, - { - "epoch": 0.06818952077543551, - "grad_norm": 1.4995581323401785, - "learning_rate": 1.9923961530949677e-06, - "loss": 1.2896, - "step": 503 - }, - { - "epoch": 0.06832508642310038, - "grad_norm": 1.7532118700751427, - "learning_rate": 1.9923420061931376e-06, - "loss": 1.2881, - "step": 504 - }, - { - "epoch": 0.06846065207076527, - "grad_norm": 1.9874518329772233, - "learning_rate": 1.992287667925985e-06, - "loss": 1.2627, - "step": 505 - }, - { - "epoch": 0.06859621771843015, - "grad_norm": 1.416892297483906, - "learning_rate": 1.992233138303988e-06, - "loss": 1.2797, - "step": 506 - }, - { - "epoch": 0.06873178336609503, - "grad_norm": 1.6236043899055141, - "learning_rate": 1.9921784173376626e-06, - "loss": 1.2901, - "step": 507 - }, - { - "epoch": 0.06886734901375992, - "grad_norm": 1.4614626414428689, - "learning_rate": 1.9921235050375612e-06, - "loss": 1.2923, - "step": 508 - }, - { - "epoch": 0.06900291466142479, - "grad_norm": 1.4960417070658094, - "learning_rate": 1.9920684014142736e-06, - "loss": 1.3025, - "step": 509 - }, - { - "epoch": 0.06913848030908967, - "grad_norm": 1.8175874161478727, - "learning_rate": 1.992013106478425e-06, - "loss": 1.3134, - "step": 510 - }, - { - "epoch": 0.06927404595675456, - "grad_norm": 4.675520892339957, - "learning_rate": 1.9919576202406795e-06, - "loss": 1.265, - "step": 511 - }, - { - "epoch": 0.06940961160441944, - "grad_norm": 3.2588933028334264, - "learning_rate": 1.9919019427117372e-06, - "loss": 1.3017, - "step": 512 - }, - { - "epoch": 0.06954517725208433, - "grad_norm": 1.7187836335470021, - "learning_rate": 1.9918460739023348e-06, - "loss": 1.3391, - "step": 513 - }, - { - "epoch": 0.06968074289974921, - "grad_norm": 1.6891302130948875, - "learning_rate": 1.991790013823246e-06, - "loss": 1.3281, - "step": 514 - }, - { - "epoch": 0.06981630854741408, - "grad_norm": 1.5992363755937586, - "learning_rate": 1.991733762485282e-06, - "loss": 1.3046, - "step": 515 - }, - { - "epoch": 0.06995187419507896, - "grad_norm": 1.6975915094643845, - "learning_rate": 1.9916773198992897e-06, - "loss": 1.3027, - "step": 516 - }, - { - "epoch": 0.07008743984274385, - "grad_norm": 1.7819207707701166, - "learning_rate": 1.9916206860761546e-06, - "loss": 1.2921, - "step": 517 - }, - { - "epoch": 0.07022300549040873, - "grad_norm": 1.9452075033276783, - "learning_rate": 1.9915638610267974e-06, - "loss": 1.3292, - "step": 518 - }, - { - "epoch": 0.07035857113807362, - "grad_norm": 1.5916496332558832, - "learning_rate": 1.9915068447621765e-06, - "loss": 1.3243, - "step": 519 - }, - { - "epoch": 0.07049413678573849, - "grad_norm": 1.4423492102187532, - "learning_rate": 1.9914496372932873e-06, - "loss": 1.3075, - "step": 520 - }, - { - "epoch": 0.07062970243340337, - "grad_norm": 2.4555355242774586, - "learning_rate": 1.9913922386311612e-06, - "loss": 1.3174, - "step": 521 - }, - { - "epoch": 0.07076526808106826, - "grad_norm": 1.3462200257572452, - "learning_rate": 1.9913346487868676e-06, - "loss": 1.3203, - "step": 522 - }, - { - "epoch": 0.07090083372873314, - "grad_norm": 1.9351246112699338, - "learning_rate": 1.9912768677715123e-06, - "loss": 1.3217, - "step": 523 - }, - { - "epoch": 0.07103639937639802, - "grad_norm": 1.9896765270656418, - "learning_rate": 1.9912188955962376e-06, - "loss": 1.3278, - "step": 524 - }, - { - "epoch": 0.07117196502406291, - "grad_norm": 1.6145284674745477, - "learning_rate": 1.991160732272223e-06, - "loss": 1.3164, - "step": 525 - }, - { - "epoch": 0.07130753067172778, - "grad_norm": 1.5942759380528135, - "learning_rate": 1.9911023778106846e-06, - "loss": 1.3077, - "step": 526 - }, - { - "epoch": 0.07144309631939266, - "grad_norm": 2.047575289827827, - "learning_rate": 1.9910438322228762e-06, - "loss": 1.3126, - "step": 527 - }, - { - "epoch": 0.07157866196705755, - "grad_norm": 1.7041947373741728, - "learning_rate": 1.990985095520088e-06, - "loss": 1.2908, - "step": 528 - }, - { - "epoch": 0.07171422761472243, - "grad_norm": 1.4762904263915089, - "learning_rate": 1.990926167713646e-06, - "loss": 1.2483, - "step": 529 - }, - { - "epoch": 0.07184979326238732, - "grad_norm": 1.489943388069022, - "learning_rate": 1.9908670488149145e-06, - "loss": 1.288, - "step": 530 - }, - { - "epoch": 0.0719853589100522, - "grad_norm": 1.8990393053973695, - "learning_rate": 1.9908077388352943e-06, - "loss": 1.2713, - "step": 531 - }, - { - "epoch": 0.07212092455771707, - "grad_norm": 2.0358807226289755, - "learning_rate": 1.9907482377862226e-06, - "loss": 1.3144, - "step": 532 - }, - { - "epoch": 0.07225649020538195, - "grad_norm": 1.5567197672750046, - "learning_rate": 1.990688545679173e-06, - "loss": 1.2997, - "step": 533 - }, - { - "epoch": 0.07239205585304684, - "grad_norm": 2.126356687117648, - "learning_rate": 1.990628662525658e-06, - "loss": 1.292, - "step": 534 - }, - { - "epoch": 0.07252762150071172, - "grad_norm": 1.4903445990419877, - "learning_rate": 1.9905685883372254e-06, - "loss": 1.295, - "step": 535 - }, - { - "epoch": 0.0726631871483766, - "grad_norm": 1.481390527472389, - "learning_rate": 1.990508323125459e-06, - "loss": 1.2703, - "step": 536 - }, - { - "epoch": 0.07279875279604148, - "grad_norm": 1.5704060471992556, - "learning_rate": 1.9904478669019815e-06, - "loss": 1.2386, - "step": 537 - }, - { - "epoch": 0.07293431844370636, - "grad_norm": 1.5689773296724414, - "learning_rate": 1.990387219678451e-06, - "loss": 1.3121, - "step": 538 - }, - { - "epoch": 0.07306988409137125, - "grad_norm": 1.4870583683999612, - "learning_rate": 1.9903263814665624e-06, - "loss": 1.2591, - "step": 539 - }, - { - "epoch": 0.07320544973903613, - "grad_norm": 1.6471986131768939, - "learning_rate": 1.9902653522780482e-06, - "loss": 1.2907, - "step": 540 - }, - { - "epoch": 0.07334101538670101, - "grad_norm": 1.7367319575823017, - "learning_rate": 1.990204132124678e-06, - "loss": 1.2741, - "step": 541 - }, - { - "epoch": 0.0734765810343659, - "grad_norm": 1.4761462763749382, - "learning_rate": 1.990142721018257e-06, - "loss": 1.2685, - "step": 542 - }, - { - "epoch": 0.07361214668203077, - "grad_norm": 1.5581884993448196, - "learning_rate": 1.990081118970628e-06, - "loss": 1.2779, - "step": 543 - }, - { - "epoch": 0.07374771232969565, - "grad_norm": 1.4879160991291551, - "learning_rate": 1.99001932599367e-06, - "loss": 1.2951, - "step": 544 - }, - { - "epoch": 0.07388327797736054, - "grad_norm": 4.185222222717664, - "learning_rate": 1.9899573420993003e-06, - "loss": 1.3256, - "step": 545 - }, - { - "epoch": 0.07401884362502542, - "grad_norm": 1.55144865719735, - "learning_rate": 1.9898951672994708e-06, - "loss": 1.303, - "step": 546 - }, - { - "epoch": 0.0741544092726903, - "grad_norm": 1.4335482947892946, - "learning_rate": 1.9898328016061726e-06, - "loss": 1.2841, - "step": 547 - }, - { - "epoch": 0.07428997492035518, - "grad_norm": 1.5598381220287991, - "learning_rate": 1.9897702450314316e-06, - "loss": 1.2985, - "step": 548 - }, - { - "epoch": 0.07442554056802006, - "grad_norm": 1.890981458795422, - "learning_rate": 1.9897074975873116e-06, - "loss": 1.3111, - "step": 549 - }, - { - "epoch": 0.07456110621568494, - "grad_norm": 1.6154079356462079, - "learning_rate": 1.9896445592859134e-06, - "loss": 1.2919, - "step": 550 - }, - { - "epoch": 0.07469667186334983, - "grad_norm": 1.6989202802312897, - "learning_rate": 1.989581430139373e-06, - "loss": 1.309, - "step": 551 - }, - { - "epoch": 0.07483223751101471, - "grad_norm": 1.5538909561532737, - "learning_rate": 1.9895181101598656e-06, - "loss": 1.2899, - "step": 552 - }, - { - "epoch": 0.0749678031586796, - "grad_norm": 2.5168039316526745, - "learning_rate": 1.9894545993596014e-06, - "loss": 1.2638, - "step": 553 - }, - { - "epoch": 0.07510336880634447, - "grad_norm": 1.5630990434876166, - "learning_rate": 1.9893908977508277e-06, - "loss": 1.2884, - "step": 554 - }, - { - "epoch": 0.07523893445400935, - "grad_norm": 1.836554456480694, - "learning_rate": 1.9893270053458293e-06, - "loss": 1.3104, - "step": 555 - }, - { - "epoch": 0.07537450010167424, - "grad_norm": 1.4659186365549168, - "learning_rate": 1.9892629221569274e-06, - "loss": 1.3231, - "step": 556 - }, - { - "epoch": 0.07551006574933912, - "grad_norm": 1.5972956343711222, - "learning_rate": 1.989198648196479e-06, - "loss": 1.3139, - "step": 557 - }, - { - "epoch": 0.075645631397004, - "grad_norm": 1.7672485452533742, - "learning_rate": 1.9891341834768806e-06, - "loss": 1.3099, - "step": 558 - }, - { - "epoch": 0.07578119704466887, - "grad_norm": 1.5838838767637349, - "learning_rate": 1.9890695280105622e-06, - "loss": 1.2889, - "step": 559 - }, - { - "epoch": 0.07591676269233376, - "grad_norm": 1.5364930705973818, - "learning_rate": 1.9890046818099925e-06, - "loss": 1.2781, - "step": 560 - }, - { - "epoch": 0.07605232833999864, - "grad_norm": 1.5693138789188021, - "learning_rate": 1.9889396448876765e-06, - "loss": 1.3543, - "step": 561 - }, - { - "epoch": 0.07618789398766353, - "grad_norm": 1.438154199064127, - "learning_rate": 1.9888744172561563e-06, - "loss": 1.3266, - "step": 562 - }, - { - "epoch": 0.07632345963532841, - "grad_norm": 1.494128019686765, - "learning_rate": 1.9888089989280107e-06, - "loss": 1.295, - "step": 563 - }, - { - "epoch": 0.0764590252829933, - "grad_norm": 1.4710901347307717, - "learning_rate": 1.9887433899158547e-06, - "loss": 1.3267, - "step": 564 - }, - { - "epoch": 0.07659459093065817, - "grad_norm": 1.7565577152342584, - "learning_rate": 1.9886775902323402e-06, - "loss": 1.3213, - "step": 565 - }, - { - "epoch": 0.07673015657832305, - "grad_norm": 1.5562145381014498, - "learning_rate": 1.9886115998901572e-06, - "loss": 1.3249, - "step": 566 - }, - { - "epoch": 0.07686572222598793, - "grad_norm": 1.6294774303994506, - "learning_rate": 1.9885454189020303e-06, - "loss": 1.3127, - "step": 567 - }, - { - "epoch": 0.07700128787365282, - "grad_norm": 1.6613780821092223, - "learning_rate": 1.988479047280723e-06, - "loss": 1.3302, - "step": 568 - }, - { - "epoch": 0.0771368535213177, - "grad_norm": 1.8873318123532934, - "learning_rate": 1.9884124850390336e-06, - "loss": 1.2922, - "step": 569 - }, - { - "epoch": 0.07727241916898257, - "grad_norm": 1.8687203650642756, - "learning_rate": 1.9883457321897984e-06, - "loss": 1.3248, - "step": 570 - }, - { - "epoch": 0.07740798481664746, - "grad_norm": 3.9050825380692187, - "learning_rate": 1.9882787887458907e-06, - "loss": 1.2933, - "step": 571 - }, - { - "epoch": 0.07754355046431234, - "grad_norm": 1.589537659341866, - "learning_rate": 1.988211654720219e-06, - "loss": 1.3532, - "step": 572 - }, - { - "epoch": 0.07767911611197723, - "grad_norm": 1.542611601841325, - "learning_rate": 1.9881443301257308e-06, - "loss": 1.2784, - "step": 573 - }, - { - "epoch": 0.07781468175964211, - "grad_norm": 2.208028798484998, - "learning_rate": 1.988076814975408e-06, - "loss": 1.2977, - "step": 574 - }, - { - "epoch": 0.077950247407307, - "grad_norm": 1.5380495056127808, - "learning_rate": 1.988009109282271e-06, - "loss": 1.3173, - "step": 575 - }, - { - "epoch": 0.07808581305497186, - "grad_norm": 1.9697441115530157, - "learning_rate": 1.9879412130593765e-06, - "loss": 1.3302, - "step": 576 - }, - { - "epoch": 0.07822137870263675, - "grad_norm": 1.6340040526633612, - "learning_rate": 1.9878731263198165e-06, - "loss": 1.2785, - "step": 577 - }, - { - "epoch": 0.07835694435030163, - "grad_norm": 1.703497370768127, - "learning_rate": 1.987804849076723e-06, - "loss": 1.2677, - "step": 578 - }, - { - "epoch": 0.07849250999796652, - "grad_norm": 1.4224659947371023, - "learning_rate": 1.9877363813432607e-06, - "loss": 1.3357, - "step": 579 - }, - { - "epoch": 0.0786280756456314, - "grad_norm": 1.7189135770354766, - "learning_rate": 1.9876677231326347e-06, - "loss": 1.293, - "step": 580 - }, - { - "epoch": 0.07876364129329629, - "grad_norm": 1.5125706713928668, - "learning_rate": 1.9875988744580837e-06, - "loss": 1.2872, - "step": 581 - }, - { - "epoch": 0.07889920694096116, - "grad_norm": 1.368428556534405, - "learning_rate": 1.987529835332886e-06, - "loss": 1.2611, - "step": 582 - }, - { - "epoch": 0.07903477258862604, - "grad_norm": 1.7246034795356069, - "learning_rate": 1.9874606057703546e-06, - "loss": 1.3097, - "step": 583 - }, - { - "epoch": 0.07917033823629092, - "grad_norm": 1.871146294229989, - "learning_rate": 1.9873911857838395e-06, - "loss": 1.2831, - "step": 584 - }, - { - "epoch": 0.07930590388395581, - "grad_norm": 1.496208218264772, - "learning_rate": 1.9873215753867286e-06, - "loss": 1.3226, - "step": 585 - }, - { - "epoch": 0.07944146953162069, - "grad_norm": 1.5727741277520515, - "learning_rate": 1.987251774592445e-06, - "loss": 1.313, - "step": 586 - }, - { - "epoch": 0.07957703517928556, - "grad_norm": 1.7678799112843224, - "learning_rate": 1.98718178341445e-06, - "loss": 1.3125, - "step": 587 - }, - { - "epoch": 0.07971260082695045, - "grad_norm": 3.5595752817692676, - "learning_rate": 1.9871116018662403e-06, - "loss": 1.2978, - "step": 588 - }, - { - "epoch": 0.07984816647461533, - "grad_norm": 7.279514237780942, - "learning_rate": 1.98704122996135e-06, - "loss": 1.2978, - "step": 589 - }, - { - "epoch": 0.07998373212228022, - "grad_norm": 1.6383011755600667, - "learning_rate": 1.9869706677133493e-06, - "loss": 1.2944, - "step": 590 - }, - { - "epoch": 0.0801192977699451, - "grad_norm": 1.5608035960675017, - "learning_rate": 1.9868999151358465e-06, - "loss": 1.329, - "step": 591 - }, - { - "epoch": 0.08025486341760998, - "grad_norm": 1.8924487716303573, - "learning_rate": 1.9868289722424846e-06, - "loss": 1.3027, - "step": 592 - }, - { - "epoch": 0.08039042906527485, - "grad_norm": 1.565913059053905, - "learning_rate": 1.9867578390469454e-06, - "loss": 1.3055, - "step": 593 - }, - { - "epoch": 0.08052599471293974, - "grad_norm": 1.6289662979337494, - "learning_rate": 1.986686515562946e-06, - "loss": 1.2839, - "step": 594 - }, - { - "epoch": 0.08066156036060462, - "grad_norm": 1.4494767063201306, - "learning_rate": 1.9866150018042403e-06, - "loss": 1.3068, - "step": 595 - }, - { - "epoch": 0.08079712600826951, - "grad_norm": 1.6060449184920051, - "learning_rate": 1.986543297784619e-06, - "loss": 1.3181, - "step": 596 - }, - { - "epoch": 0.08093269165593439, - "grad_norm": 3.3757983347850717, - "learning_rate": 1.9864714035179108e-06, - "loss": 1.2989, - "step": 597 - }, - { - "epoch": 0.08106825730359926, - "grad_norm": 1.5558330191708303, - "learning_rate": 1.986399319017979e-06, - "loss": 1.2712, - "step": 598 - }, - { - "epoch": 0.08120382295126415, - "grad_norm": 1.5500131424810792, - "learning_rate": 1.986327044298724e-06, - "loss": 1.3021, - "step": 599 - }, - { - "epoch": 0.08133938859892903, - "grad_norm": 2.4455017466103537, - "learning_rate": 1.986254579374085e-06, - "loss": 1.2548, - "step": 600 - }, - { - "epoch": 0.08147495424659391, - "grad_norm": 1.5970706452197627, - "learning_rate": 1.9861819242580353e-06, - "loss": 1.2783, - "step": 601 - }, - { - "epoch": 0.0816105198942588, - "grad_norm": 1.860376714419881, - "learning_rate": 1.9861090789645855e-06, - "loss": 1.2716, - "step": 602 - }, - { - "epoch": 0.08174608554192368, - "grad_norm": 2.4738764071870185, - "learning_rate": 1.9860360435077837e-06, - "loss": 1.2924, - "step": 603 - }, - { - "epoch": 0.08188165118958855, - "grad_norm": 1.6775167006194183, - "learning_rate": 1.9859628179017142e-06, - "loss": 1.273, - "step": 604 - }, - { - "epoch": 0.08201721683725344, - "grad_norm": 1.3743539604551787, - "learning_rate": 1.985889402160498e-06, - "loss": 1.2924, - "step": 605 - }, - { - "epoch": 0.08215278248491832, - "grad_norm": 11.46999079888356, - "learning_rate": 1.985815796298293e-06, - "loss": 1.2951, - "step": 606 - }, - { - "epoch": 0.0822883481325832, - "grad_norm": 1.6177478902833045, - "learning_rate": 1.985742000329293e-06, - "loss": 1.3026, - "step": 607 - }, - { - "epoch": 0.08242391378024809, - "grad_norm": 1.935217073295268, - "learning_rate": 1.9856680142677294e-06, - "loss": 1.2596, - "step": 608 - }, - { - "epoch": 0.08255947942791296, - "grad_norm": 3.4876172471554123, - "learning_rate": 1.9855938381278698e-06, - "loss": 1.2901, - "step": 609 - }, - { - "epoch": 0.08269504507557784, - "grad_norm": 1.4612246110604123, - "learning_rate": 1.985519471924018e-06, - "loss": 1.2744, - "step": 610 - }, - { - "epoch": 0.08283061072324273, - "grad_norm": 2.9041846662895208, - "learning_rate": 1.985444915670515e-06, - "loss": 1.2911, - "step": 611 - }, - { - "epoch": 0.08296617637090761, - "grad_norm": 1.8387015285821628, - "learning_rate": 1.9853701693817393e-06, - "loss": 1.2485, - "step": 612 - }, - { - "epoch": 0.0831017420185725, - "grad_norm": 2.0760928548157147, - "learning_rate": 1.985295233072104e-06, - "loss": 1.2823, - "step": 613 - }, - { - "epoch": 0.08323730766623738, - "grad_norm": 3.0119348906280803, - "learning_rate": 1.9852201067560607e-06, - "loss": 1.311, - "step": 614 - }, - { - "epoch": 0.08337287331390225, - "grad_norm": 1.6481286499751628, - "learning_rate": 1.9851447904480964e-06, - "loss": 1.2941, - "step": 615 - }, - { - "epoch": 0.08350843896156714, - "grad_norm": 1.4797109489588276, - "learning_rate": 1.9850692841627356e-06, - "loss": 1.3031, - "step": 616 - }, - { - "epoch": 0.08364400460923202, - "grad_norm": 1.4279429291671268, - "learning_rate": 1.984993587914539e-06, - "loss": 1.2538, - "step": 617 - }, - { - "epoch": 0.0837795702568969, - "grad_norm": 1.9789331247025645, - "learning_rate": 1.9849177017181044e-06, - "loss": 1.268, - "step": 618 - }, - { - "epoch": 0.08391513590456179, - "grad_norm": 1.9271980328183445, - "learning_rate": 1.984841625588065e-06, - "loss": 1.2746, - "step": 619 - }, - { - "epoch": 0.08405070155222667, - "grad_norm": 1.806767483167396, - "learning_rate": 1.9847653595390923e-06, - "loss": 1.2829, - "step": 620 - }, - { - "epoch": 0.08418626719989154, - "grad_norm": 2.0638256433097677, - "learning_rate": 1.984688903585893e-06, - "loss": 1.2844, - "step": 621 - }, - { - "epoch": 0.08432183284755643, - "grad_norm": 1.5518598035443159, - "learning_rate": 1.9846122577432116e-06, - "loss": 1.2663, - "step": 622 - }, - { - "epoch": 0.08445739849522131, - "grad_norm": 1.5909301133353464, - "learning_rate": 1.9845354220258283e-06, - "loss": 1.2503, - "step": 623 - }, - { - "epoch": 0.0845929641428862, - "grad_norm": 1.5921965847653332, - "learning_rate": 1.9844583964485604e-06, - "loss": 1.2843, - "step": 624 - }, - { - "epoch": 0.08472852979055108, - "grad_norm": 1.5415413150445911, - "learning_rate": 1.9843811810262612e-06, - "loss": 1.2924, - "step": 625 - }, - { - "epoch": 0.08486409543821595, - "grad_norm": 1.744085814825703, - "learning_rate": 1.984303775773822e-06, - "loss": 1.2931, - "step": 626 - }, - { - "epoch": 0.08499966108588083, - "grad_norm": 1.6386385339250844, - "learning_rate": 1.9842261807061685e-06, - "loss": 1.2939, - "step": 627 - }, - { - "epoch": 0.08513522673354572, - "grad_norm": 1.9602338823924021, - "learning_rate": 1.984148395838266e-06, - "loss": 1.2914, - "step": 628 - }, - { - "epoch": 0.0852707923812106, - "grad_norm": 3.034970277790808, - "learning_rate": 1.984070421185113e-06, - "loss": 1.2855, - "step": 629 - }, - { - "epoch": 0.08540635802887549, - "grad_norm": 2.7067929006597238, - "learning_rate": 1.983992256761747e-06, - "loss": 1.2665, - "step": 630 - }, - { - "epoch": 0.08554192367654037, - "grad_norm": 1.5579224610277251, - "learning_rate": 1.983913902583242e-06, - "loss": 1.2647, - "step": 631 - }, - { - "epoch": 0.08567748932420524, - "grad_norm": 1.8017031984047098, - "learning_rate": 1.983835358664707e-06, - "loss": 1.3258, - "step": 632 - }, - { - "epoch": 0.08581305497187013, - "grad_norm": 1.7528031181788906, - "learning_rate": 1.9837566250212894e-06, - "loss": 1.298, - "step": 633 - }, - { - "epoch": 0.08594862061953501, - "grad_norm": 1.832451846287892, - "learning_rate": 1.9836777016681723e-06, - "loss": 1.2302, - "step": 634 - }, - { - "epoch": 0.0860841862671999, - "grad_norm": 1.5162800859613486, - "learning_rate": 1.9835985886205744e-06, - "loss": 1.2836, - "step": 635 - }, - { - "epoch": 0.08621975191486478, - "grad_norm": 1.605871001008823, - "learning_rate": 1.983519285893753e-06, - "loss": 1.3177, - "step": 636 - }, - { - "epoch": 0.08635531756252965, - "grad_norm": 1.7684459789954774, - "learning_rate": 1.983439793503e-06, - "loss": 1.272, - "step": 637 - }, - { - "epoch": 0.08649088321019453, - "grad_norm": 1.945871398967508, - "learning_rate": 1.9833601114636465e-06, - "loss": 1.2668, - "step": 638 - }, - { - "epoch": 0.08662644885785942, - "grad_norm": 1.5121979103328829, - "learning_rate": 1.9832802397910578e-06, - "loss": 1.286, - "step": 639 - }, - { - "epoch": 0.0867620145055243, - "grad_norm": 1.615637411914517, - "learning_rate": 1.983200178500636e-06, - "loss": 1.3077, - "step": 640 - }, - { - "epoch": 0.08689758015318919, - "grad_norm": 3.9163056751226297, - "learning_rate": 1.9831199276078208e-06, - "loss": 1.3055, - "step": 641 - }, - { - "epoch": 0.08703314580085407, - "grad_norm": 1.6355687740136384, - "learning_rate": 1.9830394871280876e-06, - "loss": 1.2425, - "step": 642 - }, - { - "epoch": 0.08716871144851894, - "grad_norm": 2.1948653079827203, - "learning_rate": 1.982958857076949e-06, - "loss": 1.2974, - "step": 643 - }, - { - "epoch": 0.08730427709618382, - "grad_norm": 1.473366323435812, - "learning_rate": 1.982878037469954e-06, - "loss": 1.2924, - "step": 644 - }, - { - "epoch": 0.08743984274384871, - "grad_norm": 1.4961064009714087, - "learning_rate": 1.9827970283226883e-06, - "loss": 1.3147, - "step": 645 - }, - { - "epoch": 0.08757540839151359, - "grad_norm": 2.0229297439654714, - "learning_rate": 1.9827158296507727e-06, - "loss": 1.2883, - "step": 646 - }, - { - "epoch": 0.08771097403917848, - "grad_norm": 1.566352372823629, - "learning_rate": 1.9826344414698667e-06, - "loss": 1.2684, - "step": 647 - }, - { - "epoch": 0.08784653968684335, - "grad_norm": 1.9824141973275196, - "learning_rate": 1.982552863795665e-06, - "loss": 1.2628, - "step": 648 - }, - { - "epoch": 0.08798210533450823, - "grad_norm": 2.191669969229068, - "learning_rate": 1.9824710966438995e-06, - "loss": 1.2856, - "step": 649 - }, - { - "epoch": 0.08811767098217312, - "grad_norm": 1.4306657579367612, - "learning_rate": 1.982389140030338e-06, - "loss": 1.2354, - "step": 650 - }, - { - "epoch": 0.088253236629838, - "grad_norm": 3.97632168514313, - "learning_rate": 1.9823069939707856e-06, - "loss": 1.28, - "step": 651 - }, - { - "epoch": 0.08838880227750288, - "grad_norm": 1.6510888374742998, - "learning_rate": 1.982224658481083e-06, - "loss": 1.2711, - "step": 652 - }, - { - "epoch": 0.08852436792516777, - "grad_norm": 1.4052834146854785, - "learning_rate": 1.9821421335771084e-06, - "loss": 1.2867, - "step": 653 - }, - { - "epoch": 0.08865993357283264, - "grad_norm": 1.9967319857182486, - "learning_rate": 1.9820594192747757e-06, - "loss": 1.3004, - "step": 654 - }, - { - "epoch": 0.08879549922049752, - "grad_norm": 1.7202008962386708, - "learning_rate": 1.981976515590036e-06, - "loss": 1.3077, - "step": 655 - }, - { - "epoch": 0.08893106486816241, - "grad_norm": 1.9391541858495354, - "learning_rate": 1.9818934225388765e-06, - "loss": 1.3162, - "step": 656 - }, - { - "epoch": 0.08906663051582729, - "grad_norm": 2.128898698074163, - "learning_rate": 1.981810140137321e-06, - "loss": 1.2822, - "step": 657 - }, - { - "epoch": 0.08920219616349218, - "grad_norm": 1.604711215743902, - "learning_rate": 1.9817266684014303e-06, - "loss": 1.2546, - "step": 658 - }, - { - "epoch": 0.08933776181115705, - "grad_norm": 1.5999653178498394, - "learning_rate": 1.9816430073473005e-06, - "loss": 1.2746, - "step": 659 - }, - { - "epoch": 0.08947332745882193, - "grad_norm": 31.988137306423816, - "learning_rate": 1.9815591569910653e-06, - "loss": 1.2958, - "step": 660 - }, - { - "epoch": 0.08960889310648681, - "grad_norm": 21.29864133661415, - "learning_rate": 1.9814751173488944e-06, - "loss": 1.3031, - "step": 661 - }, - { - "epoch": 0.0897444587541517, - "grad_norm": 1.5469991411494257, - "learning_rate": 1.981390888436995e-06, - "loss": 1.3298, - "step": 662 - }, - { - "epoch": 0.08988002440181658, - "grad_norm": 1.3577465240185425, - "learning_rate": 1.981306470271609e-06, - "loss": 1.2852, - "step": 663 - }, - { - "epoch": 0.09001559004948147, - "grad_norm": 2.1317730038060314, - "learning_rate": 1.9812218628690165e-06, - "loss": 1.2787, - "step": 664 - }, - { - "epoch": 0.09015115569714634, - "grad_norm": 1.7924445701889, - "learning_rate": 1.981137066245533e-06, - "loss": 1.2581, - "step": 665 - }, - { - "epoch": 0.09028672134481122, - "grad_norm": 1.410246700531771, - "learning_rate": 1.981052080417511e-06, - "loss": 1.2896, - "step": 666 - }, - { - "epoch": 0.0904222869924761, - "grad_norm": 1.711366035994104, - "learning_rate": 1.980966905401339e-06, - "loss": 1.2506, - "step": 667 - }, - { - "epoch": 0.09055785264014099, - "grad_norm": 2.1698919251586957, - "learning_rate": 1.9808815412134424e-06, - "loss": 1.2688, - "step": 668 - }, - { - "epoch": 0.09069341828780587, - "grad_norm": 2.116424341389474, - "learning_rate": 1.9807959878702833e-06, - "loss": 1.2509, - "step": 669 - }, - { - "epoch": 0.09082898393547076, - "grad_norm": 2.3480725387597463, - "learning_rate": 1.98071024538836e-06, - "loss": 1.3034, - "step": 670 - }, - { - "epoch": 0.09096454958313563, - "grad_norm": 1.6315038060072091, - "learning_rate": 1.980624313784207e-06, - "loss": 1.2753, - "step": 671 - }, - { - "epoch": 0.09110011523080051, - "grad_norm": 2.1210837512882947, - "learning_rate": 1.980538193074396e-06, - "loss": 1.2584, - "step": 672 - }, - { - "epoch": 0.0912356808784654, - "grad_norm": 1.4806465269317812, - "learning_rate": 1.980451883275534e-06, - "loss": 1.2661, - "step": 673 - }, - { - "epoch": 0.09137124652613028, - "grad_norm": 1.6625372093772222, - "learning_rate": 1.9803653844042655e-06, - "loss": 1.2812, - "step": 674 - }, - { - "epoch": 0.09150681217379517, - "grad_norm": 1.7453684209775335, - "learning_rate": 1.9802786964772714e-06, - "loss": 1.2793, - "step": 675 - }, - { - "epoch": 0.09164237782146004, - "grad_norm": 2.372444755215296, - "learning_rate": 1.9801918195112684e-06, - "loss": 1.2797, - "step": 676 - }, - { - "epoch": 0.09177794346912492, - "grad_norm": 1.4763829577649146, - "learning_rate": 1.9801047535230103e-06, - "loss": 1.2957, - "step": 677 - }, - { - "epoch": 0.0919135091167898, - "grad_norm": 1.7086929623242846, - "learning_rate": 1.9800174985292866e-06, - "loss": 1.2947, - "step": 678 - }, - { - "epoch": 0.09204907476445469, - "grad_norm": 1.8121606895128257, - "learning_rate": 1.9799300545469248e-06, - "loss": 1.2879, - "step": 679 - }, - { - "epoch": 0.09218464041211957, - "grad_norm": 1.663852066692697, - "learning_rate": 1.9798424215927864e-06, - "loss": 1.2335, - "step": 680 - }, - { - "epoch": 0.09232020605978446, - "grad_norm": 1.7059759383233075, - "learning_rate": 1.979754599683772e-06, - "loss": 1.3039, - "step": 681 - }, - { - "epoch": 0.09245577170744933, - "grad_norm": 2.0413233065089136, - "learning_rate": 1.979666588836816e-06, - "loss": 1.2673, - "step": 682 - }, - { - "epoch": 0.09259133735511421, - "grad_norm": 1.5974223087749404, - "learning_rate": 1.9795783890688917e-06, - "loss": 1.2979, - "step": 683 - }, - { - "epoch": 0.0927269030027791, - "grad_norm": 1.7254315449324926, - "learning_rate": 1.9794900003970073e-06, - "loss": 1.2782, - "step": 684 - }, - { - "epoch": 0.09286246865044398, - "grad_norm": 1.9582526062781564, - "learning_rate": 1.9794014228382085e-06, - "loss": 1.26, - "step": 685 - }, - { - "epoch": 0.09299803429810886, - "grad_norm": 1.6855648064107889, - "learning_rate": 1.9793126564095756e-06, - "loss": 1.2911, - "step": 686 - }, - { - "epoch": 0.09313359994577373, - "grad_norm": 1.4815049983234934, - "learning_rate": 1.979223701128227e-06, - "loss": 1.3178, - "step": 687 - }, - { - "epoch": 0.09326916559343862, - "grad_norm": 1.628627802900006, - "learning_rate": 1.979134557011318e-06, - "loss": 1.2783, - "step": 688 - }, - { - "epoch": 0.0934047312411035, - "grad_norm": 2.427822361817264, - "learning_rate": 1.979045224076038e-06, - "loss": 1.3106, - "step": 689 - }, - { - "epoch": 0.09354029688876839, - "grad_norm": 2.169833160909322, - "learning_rate": 1.9789557023396145e-06, - "loss": 1.2688, - "step": 690 - }, - { - "epoch": 0.09367586253643327, - "grad_norm": 1.8474082868980088, - "learning_rate": 1.9788659918193115e-06, - "loss": 1.3, - "step": 691 - }, - { - "epoch": 0.09381142818409816, - "grad_norm": 1.6028672641968185, - "learning_rate": 1.9787760925324285e-06, - "loss": 1.2876, - "step": 692 - }, - { - "epoch": 0.09394699383176303, - "grad_norm": 2.113549766034792, - "learning_rate": 1.9786860044963023e-06, - "loss": 1.2763, - "step": 693 - }, - { - "epoch": 0.09408255947942791, - "grad_norm": 2.119107258607182, - "learning_rate": 1.978595727728305e-06, - "loss": 1.2953, - "step": 694 - }, - { - "epoch": 0.0942181251270928, - "grad_norm": 1.3580683384851495, - "learning_rate": 1.9785052622458467e-06, - "loss": 1.2754, - "step": 695 - }, - { - "epoch": 0.09435369077475768, - "grad_norm": 2.0347326317362673, - "learning_rate": 1.978414608066372e-06, - "loss": 1.2978, - "step": 696 - }, - { - "epoch": 0.09448925642242256, - "grad_norm": 1.9530036036028577, - "learning_rate": 1.9783237652073633e-06, - "loss": 1.2492, - "step": 697 - }, - { - "epoch": 0.09462482207008743, - "grad_norm": 1.6265189119043595, - "learning_rate": 1.978232733686339e-06, - "loss": 1.2465, - "step": 698 - }, - { - "epoch": 0.09476038771775232, - "grad_norm": 1.5374981086648056, - "learning_rate": 1.9781415135208536e-06, - "loss": 1.2769, - "step": 699 - }, - { - "epoch": 0.0948959533654172, - "grad_norm": 1.581981420974454, - "learning_rate": 1.9780501047284983e-06, - "loss": 1.2713, - "step": 700 - }, - { - "epoch": 0.09503151901308209, - "grad_norm": 1.5877442380354205, - "learning_rate": 1.977958507326901e-06, - "loss": 1.2608, - "step": 701 - }, - { - "epoch": 0.09516708466074697, - "grad_norm": 1.6784522141134899, - "learning_rate": 1.9778667213337242e-06, - "loss": 1.2807, - "step": 702 - }, - { - "epoch": 0.09530265030841185, - "grad_norm": 2.6132137248801355, - "learning_rate": 1.97777474676667e-06, - "loss": 1.2672, - "step": 703 - }, - { - "epoch": 0.09543821595607672, - "grad_norm": 1.9561132148325933, - "learning_rate": 1.9776825836434733e-06, - "loss": 1.2653, - "step": 704 - }, - { - "epoch": 0.09557378160374161, - "grad_norm": 1.4291774300514024, - "learning_rate": 1.977590231981908e-06, - "loss": 1.2746, - "step": 705 - }, - { - "epoch": 0.09570934725140649, - "grad_norm": 1.451404608189407, - "learning_rate": 1.977497691799783e-06, - "loss": 1.267, - "step": 706 - }, - { - "epoch": 0.09584491289907138, - "grad_norm": 1.8276056261059224, - "learning_rate": 1.9774049631149443e-06, - "loss": 1.2785, - "step": 707 - }, - { - "epoch": 0.09598047854673626, - "grad_norm": 1.6032764817152547, - "learning_rate": 1.977312045945273e-06, - "loss": 1.2802, - "step": 708 - }, - { - "epoch": 0.09611604419440115, - "grad_norm": 1.7309554508393425, - "learning_rate": 1.9772189403086884e-06, - "loss": 1.2625, - "step": 709 - }, - { - "epoch": 0.09625160984206602, - "grad_norm": 1.5614635745975578, - "learning_rate": 1.977125646223145e-06, - "loss": 1.268, - "step": 710 - }, - { - "epoch": 0.0963871754897309, - "grad_norm": 1.7466695641535293, - "learning_rate": 1.977032163706633e-06, - "loss": 1.3235, - "step": 711 - }, - { - "epoch": 0.09652274113739578, - "grad_norm": 2.0245549024682687, - "learning_rate": 1.976938492777182e-06, - "loss": 1.2496, - "step": 712 - }, - { - "epoch": 0.09665830678506067, - "grad_norm": 1.4833217778779997, - "learning_rate": 1.976844633452853e-06, - "loss": 1.2721, - "step": 713 - }, - { - "epoch": 0.09679387243272555, - "grad_norm": 2.485928686175528, - "learning_rate": 1.976750585751747e-06, - "loss": 1.2542, - "step": 714 - }, - { - "epoch": 0.09692943808039042, - "grad_norm": 1.5198694354628275, - "learning_rate": 1.9766563496920014e-06, - "loss": 1.2904, - "step": 715 - }, - { - "epoch": 0.09706500372805531, - "grad_norm": 1.633720577847526, - "learning_rate": 1.9765619252917873e-06, - "loss": 1.2773, - "step": 716 - }, - { - "epoch": 0.09720056937572019, - "grad_norm": 2.510407800678708, - "learning_rate": 1.9764673125693146e-06, - "loss": 1.2943, - "step": 717 - }, - { - "epoch": 0.09733613502338508, - "grad_norm": 3.021141519839254, - "learning_rate": 1.9763725115428284e-06, - "loss": 1.2542, - "step": 718 - }, - { - "epoch": 0.09747170067104996, - "grad_norm": 1.7126479713661096, - "learning_rate": 1.9762775222306107e-06, - "loss": 1.2736, - "step": 719 - }, - { - "epoch": 0.09760726631871484, - "grad_norm": 1.6262182782458037, - "learning_rate": 1.976182344650979e-06, - "loss": 1.2648, - "step": 720 - }, - { - "epoch": 0.09774283196637971, - "grad_norm": 1.7336122607713624, - "learning_rate": 1.9760869788222873e-06, - "loss": 1.2591, - "step": 721 - }, - { - "epoch": 0.0978783976140446, - "grad_norm": 2.665015743333432, - "learning_rate": 1.9759914247629264e-06, - "loss": 1.2929, - "step": 722 - }, - { - "epoch": 0.09801396326170948, - "grad_norm": 1.5452797836536165, - "learning_rate": 1.975895682491324e-06, - "loss": 1.2462, - "step": 723 - }, - { - "epoch": 0.09814952890937437, - "grad_norm": 1.7389593036870483, - "learning_rate": 1.975799752025942e-06, - "loss": 1.3001, - "step": 724 - }, - { - "epoch": 0.09828509455703925, - "grad_norm": 1.5629965610880847, - "learning_rate": 1.97570363338528e-06, - "loss": 1.2664, - "step": 725 - }, - { - "epoch": 0.09842066020470412, - "grad_norm": 2.2302046147953467, - "learning_rate": 1.9756073265878746e-06, - "loss": 1.2757, - "step": 726 - }, - { - "epoch": 0.098556225852369, - "grad_norm": 1.7159532034122267, - "learning_rate": 1.9755108316522967e-06, - "loss": 1.2799, - "step": 727 - }, - { - "epoch": 0.09869179150003389, - "grad_norm": 1.447176459956692, - "learning_rate": 1.9754141485971555e-06, - "loss": 1.2756, - "step": 728 - }, - { - "epoch": 0.09882735714769877, - "grad_norm": 1.5665690954691323, - "learning_rate": 1.9753172774410952e-06, - "loss": 1.2874, - "step": 729 - }, - { - "epoch": 0.09896292279536366, - "grad_norm": 1.574369159208473, - "learning_rate": 1.9752202182027967e-06, - "loss": 1.2722, - "step": 730 - }, - { - "epoch": 0.09909848844302854, - "grad_norm": 15.412214893481114, - "learning_rate": 1.9751229709009767e-06, - "loss": 1.2504, - "step": 731 - }, - { - "epoch": 0.09923405409069341, - "grad_norm": 1.4367962899242634, - "learning_rate": 1.975025535554389e-06, - "loss": 1.3092, - "step": 732 - }, - { - "epoch": 0.0993696197383583, - "grad_norm": 1.599804043516488, - "learning_rate": 1.9749279121818236e-06, - "loss": 1.2439, - "step": 733 - }, - { - "epoch": 0.09950518538602318, - "grad_norm": 1.5663906812806803, - "learning_rate": 1.9748301008021055e-06, - "loss": 1.2838, - "step": 734 - }, - { - "epoch": 0.09964075103368807, - "grad_norm": 1.6403049877053975, - "learning_rate": 1.9747321014340974e-06, - "loss": 1.2334, - "step": 735 - }, - { - "epoch": 0.09977631668135295, - "grad_norm": 1.5204378637994362, - "learning_rate": 1.974633914096698e-06, - "loss": 1.2487, - "step": 736 - }, - { - "epoch": 0.09991188232901782, - "grad_norm": 1.6438341392102949, - "learning_rate": 1.974535538808841e-06, - "loss": 1.2689, - "step": 737 - }, - { - "epoch": 0.1000474479766827, - "grad_norm": 1.40066131309602, - "learning_rate": 1.9744369755894977e-06, - "loss": 1.3051, - "step": 738 - }, - { - "epoch": 0.10018301362434759, - "grad_norm": 1.5697385949561993, - "learning_rate": 1.974338224457676e-06, - "loss": 1.2584, - "step": 739 - }, - { - "epoch": 0.10031857927201247, - "grad_norm": 1.4839171349542781, - "learning_rate": 1.9742392854324186e-06, - "loss": 1.2915, - "step": 740 - }, - { - "epoch": 0.10045414491967736, - "grad_norm": 2.314523955568001, - "learning_rate": 1.974140158532805e-06, - "loss": 1.2947, - "step": 741 - }, - { - "epoch": 0.10058971056734224, - "grad_norm": 1.9879510536035845, - "learning_rate": 1.974040843777951e-06, - "loss": 1.2274, - "step": 742 - }, - { - "epoch": 0.10072527621500711, - "grad_norm": 8.461030352584999, - "learning_rate": 1.973941341187009e-06, - "loss": 1.2831, - "step": 743 - }, - { - "epoch": 0.100860841862672, - "grad_norm": 1.510644350033131, - "learning_rate": 1.9738416507791676e-06, - "loss": 1.2887, - "step": 744 - }, - { - "epoch": 0.10099640751033688, - "grad_norm": 1.758178071911335, - "learning_rate": 1.9737417725736507e-06, - "loss": 1.2325, - "step": 745 - }, - { - "epoch": 0.10113197315800176, - "grad_norm": 1.436185039703425, - "learning_rate": 1.9736417065897187e-06, - "loss": 1.3195, - "step": 746 - }, - { - "epoch": 0.10126753880566665, - "grad_norm": 2.0841992510118224, - "learning_rate": 1.9735414528466694e-06, - "loss": 1.292, - "step": 747 - }, - { - "epoch": 0.10140310445333152, - "grad_norm": 1.504809454982794, - "learning_rate": 1.9734410113638356e-06, - "loss": 1.2721, - "step": 748 - }, - { - "epoch": 0.1015386701009964, - "grad_norm": 1.7374768813281256, - "learning_rate": 1.973340382160587e-06, - "loss": 1.2808, - "step": 749 - }, - { - "epoch": 0.10167423574866129, - "grad_norm": 1.6362596739393156, - "learning_rate": 1.973239565256328e-06, - "loss": 1.3203, - "step": 750 - }, - { - "epoch": 0.10180980139632617, - "grad_norm": 1.6898965219227324, - "learning_rate": 1.973138560670502e-06, - "loss": 1.3024, - "step": 751 - }, - { - "epoch": 0.10194536704399106, - "grad_norm": 1.66743253980346, - "learning_rate": 1.973037368422585e-06, - "loss": 1.2398, - "step": 752 - }, - { - "epoch": 0.10208093269165594, - "grad_norm": 1.935099864389448, - "learning_rate": 1.9729359885320933e-06, - "loss": 1.2805, - "step": 753 - }, - { - "epoch": 0.10221649833932081, - "grad_norm": 1.5528957201794682, - "learning_rate": 1.9728344210185757e-06, - "loss": 1.2511, - "step": 754 - }, - { - "epoch": 0.1023520639869857, - "grad_norm": 1.6256867277587643, - "learning_rate": 1.9727326659016187e-06, - "loss": 1.2845, - "step": 755 - }, - { - "epoch": 0.10248762963465058, - "grad_norm": 1.4314543653225704, - "learning_rate": 1.972630723200846e-06, - "loss": 1.2543, - "step": 756 - }, - { - "epoch": 0.10262319528231546, - "grad_norm": 1.4374501481988509, - "learning_rate": 1.9725285929359156e-06, - "loss": 1.2625, - "step": 757 - }, - { - "epoch": 0.10275876092998035, - "grad_norm": 2.147596532864122, - "learning_rate": 1.9724262751265222e-06, - "loss": 1.2891, - "step": 758 - }, - { - "epoch": 0.10289432657764523, - "grad_norm": 2.0275417697695466, - "learning_rate": 1.972323769792398e-06, - "loss": 1.2992, - "step": 759 - }, - { - "epoch": 0.1030298922253101, - "grad_norm": 1.7541285057707512, - "learning_rate": 1.97222107695331e-06, - "loss": 1.2954, - "step": 760 - }, - { - "epoch": 0.10316545787297499, - "grad_norm": 1.9552489776499686, - "learning_rate": 1.9721181966290614e-06, - "loss": 1.2581, - "step": 761 - }, - { - "epoch": 0.10330102352063987, - "grad_norm": 1.6272177470116274, - "learning_rate": 1.9720151288394916e-06, - "loss": 1.2797, - "step": 762 - }, - { - "epoch": 0.10343658916830475, - "grad_norm": 2.0469354878465325, - "learning_rate": 1.9719118736044773e-06, - "loss": 1.2483, - "step": 763 - }, - { - "epoch": 0.10357215481596964, - "grad_norm": 7.172800527407506, - "learning_rate": 1.97180843094393e-06, - "loss": 1.2517, - "step": 764 - }, - { - "epoch": 0.10370772046363451, - "grad_norm": 1.6862036695136389, - "learning_rate": 1.9717048008777978e-06, - "loss": 1.2885, - "step": 765 - }, - { - "epoch": 0.1038432861112994, - "grad_norm": 1.9719088387570782, - "learning_rate": 1.9716009834260645e-06, - "loss": 1.2922, - "step": 766 - }, - { - "epoch": 0.10397885175896428, - "grad_norm": 1.4861523448379703, - "learning_rate": 1.971496978608751e-06, - "loss": 1.2729, - "step": 767 - }, - { - "epoch": 0.10411441740662916, - "grad_norm": 1.6197310960897238, - "learning_rate": 1.971392786445914e-06, - "loss": 1.2735, - "step": 768 - }, - { - "epoch": 0.10424998305429405, - "grad_norm": 1.589928799974308, - "learning_rate": 1.9712884069576455e-06, - "loss": 1.2458, - "step": 769 - }, - { - "epoch": 0.10438554870195893, - "grad_norm": 2.3989492689158567, - "learning_rate": 1.971183840164075e-06, - "loss": 1.2482, - "step": 770 - }, - { - "epoch": 0.1045211143496238, - "grad_norm": 1.4517086319681582, - "learning_rate": 1.9710790860853667e-06, - "loss": 1.2516, - "step": 771 - }, - { - "epoch": 0.10465667999728868, - "grad_norm": 1.7076974939104437, - "learning_rate": 1.9709741447417223e-06, - "loss": 1.2577, - "step": 772 - }, - { - "epoch": 0.10479224564495357, - "grad_norm": 1.922689967618789, - "learning_rate": 1.970869016153378e-06, - "loss": 1.2943, - "step": 773 - }, - { - "epoch": 0.10492781129261845, - "grad_norm": 1.5125418793431575, - "learning_rate": 1.9707637003406075e-06, - "loss": 1.2622, - "step": 774 - }, - { - "epoch": 0.10506337694028334, - "grad_norm": 1.4636607439305682, - "learning_rate": 1.9706581973237202e-06, - "loss": 1.2527, - "step": 775 - }, - { - "epoch": 0.10519894258794821, - "grad_norm": 1.3812441509516777, - "learning_rate": 1.9705525071230616e-06, - "loss": 1.27, - "step": 776 - }, - { - "epoch": 0.10533450823561309, - "grad_norm": 2.865237880813151, - "learning_rate": 1.9704466297590134e-06, - "loss": 1.2911, - "step": 777 - }, - { - "epoch": 0.10547007388327798, - "grad_norm": 1.6553431100127027, - "learning_rate": 1.9703405652519924e-06, - "loss": 1.3086, - "step": 778 - }, - { - "epoch": 0.10560563953094286, - "grad_norm": 2.1164501762478816, - "learning_rate": 1.970234313622453e-06, - "loss": 1.285, - "step": 779 - }, - { - "epoch": 0.10574120517860774, - "grad_norm": 1.609086384716465, - "learning_rate": 1.9701278748908844e-06, - "loss": 1.2739, - "step": 780 - }, - { - "epoch": 0.10587677082627263, - "grad_norm": 1.7841574504373858, - "learning_rate": 1.9700212490778136e-06, - "loss": 1.2728, - "step": 781 - }, - { - "epoch": 0.1060123364739375, - "grad_norm": 1.523955430834669, - "learning_rate": 1.969914436203801e-06, - "loss": 1.2828, - "step": 782 - }, - { - "epoch": 0.10614790212160238, - "grad_norm": 2.806397425455087, - "learning_rate": 1.9698074362894456e-06, - "loss": 1.3043, - "step": 783 - }, - { - "epoch": 0.10628346776926727, - "grad_norm": 1.9566490640604284, - "learning_rate": 1.9697002493553815e-06, - "loss": 1.2696, - "step": 784 - }, - { - "epoch": 0.10641903341693215, - "grad_norm": 1.6209967146272017, - "learning_rate": 1.969592875422279e-06, - "loss": 1.292, - "step": 785 - }, - { - "epoch": 0.10655459906459704, - "grad_norm": 3.36843914374822, - "learning_rate": 1.9694853145108433e-06, - "loss": 1.2622, - "step": 786 - }, - { - "epoch": 0.1066901647122619, - "grad_norm": 2.13696258033473, - "learning_rate": 1.969377566641818e-06, - "loss": 1.2687, - "step": 787 - }, - { - "epoch": 0.10682573035992679, - "grad_norm": 1.8649322043139447, - "learning_rate": 1.96926963183598e-06, - "loss": 1.2863, - "step": 788 - }, - { - "epoch": 0.10696129600759167, - "grad_norm": 1.7278471887021751, - "learning_rate": 1.9691615101141454e-06, - "loss": 1.2946, - "step": 789 - }, - { - "epoch": 0.10709686165525656, - "grad_norm": 4.8500837955772065, - "learning_rate": 1.969053201497163e-06, - "loss": 1.2829, - "step": 790 - }, - { - "epoch": 0.10723242730292144, - "grad_norm": 1.5680663245981747, - "learning_rate": 1.96894470600592e-06, - "loss": 1.2842, - "step": 791 - }, - { - "epoch": 0.10736799295058633, - "grad_norm": 1.741450741261888, - "learning_rate": 1.9688360236613388e-06, - "loss": 1.2313, - "step": 792 - }, - { - "epoch": 0.1075035585982512, - "grad_norm": 1.8561874330629216, - "learning_rate": 1.968727154484378e-06, - "loss": 1.3176, - "step": 793 - }, - { - "epoch": 0.10763912424591608, - "grad_norm": 1.8914746300131384, - "learning_rate": 1.968618098496032e-06, - "loss": 1.2903, - "step": 794 - }, - { - "epoch": 0.10777468989358097, - "grad_norm": 1.7908453917409826, - "learning_rate": 1.9685088557173318e-06, - "loss": 1.2777, - "step": 795 - }, - { - "epoch": 0.10791025554124585, - "grad_norm": 1.5520296848145565, - "learning_rate": 1.968399426169344e-06, - "loss": 1.267, - "step": 796 - }, - { - "epoch": 0.10804582118891073, - "grad_norm": 1.4858809256622454, - "learning_rate": 1.9682898098731707e-06, - "loss": 1.2412, - "step": 797 - }, - { - "epoch": 0.1081813868365756, - "grad_norm": 2.643632999590669, - "learning_rate": 1.9681800068499507e-06, - "loss": 1.2746, - "step": 798 - }, - { - "epoch": 0.10831695248424049, - "grad_norm": 1.7226708374056627, - "learning_rate": 1.9680700171208583e-06, - "loss": 1.331, - "step": 799 - }, - { - "epoch": 0.10845251813190537, - "grad_norm": 1.6143992489412122, - "learning_rate": 1.9679598407071053e-06, - "loss": 1.2848, - "step": 800 - }, - { - "epoch": 0.10858808377957026, - "grad_norm": 1.7507716186101254, - "learning_rate": 1.967849477629937e-06, - "loss": 1.2781, - "step": 801 - }, - { - "epoch": 0.10872364942723514, - "grad_norm": 1.521036152500972, - "learning_rate": 1.9677389279106367e-06, - "loss": 1.2729, - "step": 802 - }, - { - "epoch": 0.10885921507490003, - "grad_norm": 1.5719555105983984, - "learning_rate": 1.9676281915705236e-06, - "loss": 1.2481, - "step": 803 - }, - { - "epoch": 0.1089947807225649, - "grad_norm": 2.051349756770951, - "learning_rate": 1.9675172686309516e-06, - "loss": 1.2776, - "step": 804 - }, - { - "epoch": 0.10913034637022978, - "grad_norm": 2.3211077189387646, - "learning_rate": 1.9674061591133114e-06, - "loss": 1.2728, - "step": 805 - }, - { - "epoch": 0.10926591201789466, - "grad_norm": 3.674067968202453, - "learning_rate": 1.9672948630390295e-06, - "loss": 1.2412, - "step": 806 - }, - { - "epoch": 0.10940147766555955, - "grad_norm": 4.688287815384254, - "learning_rate": 1.9671833804295684e-06, - "loss": 1.2928, - "step": 807 - }, - { - "epoch": 0.10953704331322443, - "grad_norm": 1.7692705197883027, - "learning_rate": 1.967071711306427e-06, - "loss": 1.2818, - "step": 808 - }, - { - "epoch": 0.10967260896088932, - "grad_norm": 1.6640975327189038, - "learning_rate": 1.96695985569114e-06, - "loss": 1.2749, - "step": 809 - }, - { - "epoch": 0.10980817460855419, - "grad_norm": 2.1424330672471092, - "learning_rate": 1.966847813605277e-06, - "loss": 1.268, - "step": 810 - }, - { - "epoch": 0.10994374025621907, - "grad_norm": 1.6713294698217311, - "learning_rate": 1.9667355850704456e-06, - "loss": 1.2908, - "step": 811 - }, - { - "epoch": 0.11007930590388396, - "grad_norm": 1.505286279798439, - "learning_rate": 1.9666231701082876e-06, - "loss": 1.2187, - "step": 812 - }, - { - "epoch": 0.11021487155154884, - "grad_norm": 3.1227536262024036, - "learning_rate": 1.966510568740481e-06, - "loss": 1.2636, - "step": 813 - }, - { - "epoch": 0.11035043719921372, - "grad_norm": 1.943919832350122, - "learning_rate": 1.9663977809887406e-06, - "loss": 1.2398, - "step": 814 - }, - { - "epoch": 0.1104860028468786, - "grad_norm": 1.955432040392769, - "learning_rate": 1.966284806874816e-06, - "loss": 1.2322, - "step": 815 - }, - { - "epoch": 0.11062156849454348, - "grad_norm": 1.5113860136999075, - "learning_rate": 1.966171646420494e-06, - "loss": 1.2533, - "step": 816 - }, - { - "epoch": 0.11075713414220836, - "grad_norm": 3.1272395709036194, - "learning_rate": 1.9660582996475962e-06, - "loss": 1.3028, - "step": 817 - }, - { - "epoch": 0.11089269978987325, - "grad_norm": 1.6515597811575264, - "learning_rate": 1.9659447665779815e-06, - "loss": 1.2249, - "step": 818 - }, - { - "epoch": 0.11102826543753813, - "grad_norm": 2.052907730950357, - "learning_rate": 1.965831047233543e-06, - "loss": 1.2509, - "step": 819 - }, - { - "epoch": 0.11116383108520302, - "grad_norm": 1.7395146985490642, - "learning_rate": 1.965717141636211e-06, - "loss": 1.2844, - "step": 820 - }, - { - "epoch": 0.11129939673286789, - "grad_norm": 1.52026905932968, - "learning_rate": 1.9656030498079507e-06, - "loss": 1.2737, - "step": 821 - }, - { - "epoch": 0.11143496238053277, - "grad_norm": 1.464574043811576, - "learning_rate": 1.9654887717707645e-06, - "loss": 1.2561, - "step": 822 - }, - { - "epoch": 0.11157052802819765, - "grad_norm": 1.453437200165557, - "learning_rate": 1.96537430754669e-06, - "loss": 1.3, - "step": 823 - }, - { - "epoch": 0.11170609367586254, - "grad_norm": 1.8553514592430425, - "learning_rate": 1.9652596571578003e-06, - "loss": 1.2916, - "step": 824 - }, - { - "epoch": 0.11184165932352742, - "grad_norm": 1.8809571857512666, - "learning_rate": 1.9651448206262047e-06, - "loss": 1.2487, - "step": 825 - }, - { - "epoch": 0.1119772249711923, - "grad_norm": 28.029758845478085, - "learning_rate": 1.965029797974049e-06, - "loss": 1.2655, - "step": 826 - }, - { - "epoch": 0.11211279061885718, - "grad_norm": 1.8027089268293792, - "learning_rate": 1.9649145892235145e-06, - "loss": 1.2479, - "step": 827 - }, - { - "epoch": 0.11224835626652206, - "grad_norm": 1.647419514738696, - "learning_rate": 1.964799194396818e-06, - "loss": 1.2611, - "step": 828 - }, - { - "epoch": 0.11238392191418695, - "grad_norm": 1.8593231742725629, - "learning_rate": 1.9646836135162125e-06, - "loss": 1.2625, - "step": 829 - }, - { - "epoch": 0.11251948756185183, - "grad_norm": 6.613446707953263, - "learning_rate": 1.9645678466039864e-06, - "loss": 1.2351, - "step": 830 - }, - { - "epoch": 0.11265505320951671, - "grad_norm": 1.5591967723197564, - "learning_rate": 1.9644518936824658e-06, - "loss": 1.2822, - "step": 831 - }, - { - "epoch": 0.11279061885718158, - "grad_norm": 2.0745683815045965, - "learning_rate": 1.9643357547740097e-06, - "loss": 1.2418, - "step": 832 - }, - { - "epoch": 0.11292618450484647, - "grad_norm": 3.8872711137150486, - "learning_rate": 1.9642194299010155e-06, - "loss": 1.2102, - "step": 833 - }, - { - "epoch": 0.11306175015251135, - "grad_norm": 1.8209809749464891, - "learning_rate": 1.9641029190859155e-06, - "loss": 1.2567, - "step": 834 - }, - { - "epoch": 0.11319731580017624, - "grad_norm": 1.5381605282961741, - "learning_rate": 1.9639862223511777e-06, - "loss": 1.2434, - "step": 835 - }, - { - "epoch": 0.11333288144784112, - "grad_norm": 1.5928669147722587, - "learning_rate": 1.9638693397193057e-06, - "loss": 1.2346, - "step": 836 - }, - { - "epoch": 0.11346844709550599, - "grad_norm": 2.092950986212143, - "learning_rate": 1.9637522712128407e-06, - "loss": 1.243, - "step": 837 - }, - { - "epoch": 0.11360401274317088, - "grad_norm": 1.898738301824002, - "learning_rate": 1.963635016854357e-06, - "loss": 1.2708, - "step": 838 - }, - { - "epoch": 0.11373957839083576, - "grad_norm": 1.3826524158905262, - "learning_rate": 1.963517576666467e-06, - "loss": 1.2562, - "step": 839 - }, - { - "epoch": 0.11387514403850064, - "grad_norm": 1.5628965029900301, - "learning_rate": 1.9633999506718176e-06, - "loss": 1.2732, - "step": 840 - }, - { - "epoch": 0.11401070968616553, - "grad_norm": 1.6022526906669057, - "learning_rate": 1.9632821388930926e-06, - "loss": 1.2423, - "step": 841 - }, - { - "epoch": 0.11414627533383041, - "grad_norm": 2.192606508762241, - "learning_rate": 1.9631641413530102e-06, - "loss": 1.2242, - "step": 842 - }, - { - "epoch": 0.11428184098149528, - "grad_norm": 1.5667686858178926, - "learning_rate": 1.9630459580743264e-06, - "loss": 1.2926, - "step": 843 - }, - { - "epoch": 0.11441740662916017, - "grad_norm": 1.4633853208155367, - "learning_rate": 1.9629275890798315e-06, - "loss": 1.2607, - "step": 844 - }, - { - "epoch": 0.11455297227682505, - "grad_norm": 1.8028736484332653, - "learning_rate": 1.962809034392352e-06, - "loss": 1.2674, - "step": 845 - }, - { - "epoch": 0.11468853792448994, - "grad_norm": 3.9443603648925145, - "learning_rate": 1.96269029403475e-06, - "loss": 1.2771, - "step": 846 - }, - { - "epoch": 0.11482410357215482, - "grad_norm": 1.4212543363678605, - "learning_rate": 1.962571368029924e-06, - "loss": 1.27, - "step": 847 - }, - { - "epoch": 0.1149596692198197, - "grad_norm": 1.6921129641813615, - "learning_rate": 1.9624522564008074e-06, - "loss": 1.2582, - "step": 848 - }, - { - "epoch": 0.11509523486748457, - "grad_norm": 1.577105224860581, - "learning_rate": 1.9623329591703706e-06, - "loss": 1.2588, - "step": 849 - }, - { - "epoch": 0.11523080051514946, - "grad_norm": 1.7494780956972387, - "learning_rate": 1.962213476361619e-06, - "loss": 1.2758, - "step": 850 - }, - { - "epoch": 0.11536636616281434, - "grad_norm": 1.574599791434754, - "learning_rate": 1.962093807997593e-06, - "loss": 1.2577, - "step": 851 - }, - { - "epoch": 0.11550193181047923, - "grad_norm": 1.7132079566088423, - "learning_rate": 1.961973954101371e-06, - "loss": 1.2835, - "step": 852 - }, - { - "epoch": 0.11563749745814411, - "grad_norm": 1.8375047510298568, - "learning_rate": 1.961853914696065e-06, - "loss": 1.2781, - "step": 853 - }, - { - "epoch": 0.11577306310580898, - "grad_norm": 1.470682876200792, - "learning_rate": 1.961733689804824e-06, - "loss": 1.2357, - "step": 854 - }, - { - "epoch": 0.11590862875347387, - "grad_norm": 2.8470346219756038, - "learning_rate": 1.961613279450833e-06, - "loss": 1.284, - "step": 855 - }, - { - "epoch": 0.11604419440113875, - "grad_norm": 1.6955449224934596, - "learning_rate": 1.9614926836573107e-06, - "loss": 1.2863, - "step": 856 - }, - { - "epoch": 0.11617976004880363, - "grad_norm": 1.3978470557756484, - "learning_rate": 1.9613719024475145e-06, - "loss": 1.2617, - "step": 857 - }, - { - "epoch": 0.11631532569646852, - "grad_norm": 1.5304688625654441, - "learning_rate": 1.961250935844735e-06, - "loss": 1.3009, - "step": 858 - }, - { - "epoch": 0.1164508913441334, - "grad_norm": 1.5143135757191295, - "learning_rate": 1.9611297838723007e-06, - "loss": 1.2718, - "step": 859 - }, - { - "epoch": 0.11658645699179827, - "grad_norm": 2.2230684604389186, - "learning_rate": 1.961008446553574e-06, - "loss": 1.3124, - "step": 860 - }, - { - "epoch": 0.11672202263946316, - "grad_norm": 1.7129151105577303, - "learning_rate": 1.9608869239119545e-06, - "loss": 1.2847, - "step": 861 - }, - { - "epoch": 0.11685758828712804, - "grad_norm": 1.59888880782774, - "learning_rate": 1.960765215970876e-06, - "loss": 1.2335, - "step": 862 - }, - { - "epoch": 0.11699315393479293, - "grad_norm": 2.3816511970848517, - "learning_rate": 1.9606433227538095e-06, - "loss": 1.2871, - "step": 863 - }, - { - "epoch": 0.11712871958245781, - "grad_norm": 1.7804844509307958, - "learning_rate": 1.960521244284261e-06, - "loss": 1.2938, - "step": 864 - }, - { - "epoch": 0.11726428523012268, - "grad_norm": 1.4907439497119248, - "learning_rate": 1.960398980585773e-06, - "loss": 1.2542, - "step": 865 - }, - { - "epoch": 0.11739985087778756, - "grad_norm": 1.6721954678806472, - "learning_rate": 1.960276531681922e-06, - "loss": 1.2449, - "step": 866 - }, - { - "epoch": 0.11753541652545245, - "grad_norm": 1.5468347731108112, - "learning_rate": 1.960153897596322e-06, - "loss": 1.2477, - "step": 867 - }, - { - "epoch": 0.11767098217311733, - "grad_norm": 1.770939032990603, - "learning_rate": 1.960031078352622e-06, - "loss": 1.2556, - "step": 868 - }, - { - "epoch": 0.11780654782078222, - "grad_norm": 2.2391009352404954, - "learning_rate": 1.9599080739745064e-06, - "loss": 1.2497, - "step": 869 - }, - { - "epoch": 0.1179421134684471, - "grad_norm": 1.8081896855269712, - "learning_rate": 1.9597848844856955e-06, - "loss": 1.285, - "step": 870 - }, - { - "epoch": 0.11807767911611197, - "grad_norm": 2.483366602345101, - "learning_rate": 1.959661509909946e-06, - "loss": 1.2504, - "step": 871 - }, - { - "epoch": 0.11821324476377686, - "grad_norm": 1.6509566775363147, - "learning_rate": 1.9595379502710495e-06, - "loss": 1.2656, - "step": 872 - }, - { - "epoch": 0.11834881041144174, - "grad_norm": 1.6096692846908967, - "learning_rate": 1.9594142055928333e-06, - "loss": 1.2341, - "step": 873 - }, - { - "epoch": 0.11848437605910662, - "grad_norm": 1.5067061419783216, - "learning_rate": 1.9592902758991606e-06, - "loss": 1.266, - "step": 874 - }, - { - "epoch": 0.11861994170677151, - "grad_norm": 2.6258702287766993, - "learning_rate": 1.9591661612139306e-06, - "loss": 1.2768, - "step": 875 - }, - { - "epoch": 0.11875550735443638, - "grad_norm": 1.6810064756713663, - "learning_rate": 1.9590418615610775e-06, - "loss": 1.2808, - "step": 876 - }, - { - "epoch": 0.11889107300210126, - "grad_norm": 1.579070452797444, - "learning_rate": 1.9589173769645714e-06, - "loss": 1.2732, - "step": 877 - }, - { - "epoch": 0.11902663864976615, - "grad_norm": 1.5661392494707786, - "learning_rate": 1.958792707448419e-06, - "loss": 1.2651, - "step": 878 - }, - { - "epoch": 0.11916220429743103, - "grad_norm": 1.6304416687657992, - "learning_rate": 1.9586678530366606e-06, - "loss": 1.2424, - "step": 879 - }, - { - "epoch": 0.11929776994509592, - "grad_norm": 2.4025959783358357, - "learning_rate": 1.958542813753374e-06, - "loss": 1.3082, - "step": 880 - }, - { - "epoch": 0.1194333355927608, - "grad_norm": 1.564548294064639, - "learning_rate": 1.9584175896226725e-06, - "loss": 1.2769, - "step": 881 - }, - { - "epoch": 0.11956890124042567, - "grad_norm": 1.5298088942977284, - "learning_rate": 1.9582921806687037e-06, - "loss": 1.2767, - "step": 882 - }, - { - "epoch": 0.11970446688809055, - "grad_norm": 1.6865875894779256, - "learning_rate": 1.9581665869156526e-06, - "loss": 1.2384, - "step": 883 - }, - { - "epoch": 0.11984003253575544, - "grad_norm": 1.4491629330031184, - "learning_rate": 1.958040808387738e-06, - "loss": 1.2436, - "step": 884 - }, - { - "epoch": 0.11997559818342032, - "grad_norm": 1.8647869300767543, - "learning_rate": 1.9579148451092163e-06, - "loss": 1.2513, - "step": 885 - }, - { - "epoch": 0.12011116383108521, - "grad_norm": 2.4608023949001243, - "learning_rate": 1.957788697104378e-06, - "loss": 1.3089, - "step": 886 - }, - { - "epoch": 0.12024672947875008, - "grad_norm": 1.8284479007455114, - "learning_rate": 1.9576623643975496e-06, - "loss": 1.2626, - "step": 887 - }, - { - "epoch": 0.12038229512641496, - "grad_norm": 2.124596345281153, - "learning_rate": 1.9575358470130934e-06, - "loss": 1.2415, - "step": 888 - }, - { - "epoch": 0.12051786077407985, - "grad_norm": 2.041288693029506, - "learning_rate": 1.9574091449754074e-06, - "loss": 1.2621, - "step": 889 - }, - { - "epoch": 0.12065342642174473, - "grad_norm": 1.5473955140496722, - "learning_rate": 1.9572822583089253e-06, - "loss": 1.2783, - "step": 890 - }, - { - "epoch": 0.12078899206940961, - "grad_norm": 2.3053571096674985, - "learning_rate": 1.9571551870381163e-06, - "loss": 1.2513, - "step": 891 - }, - { - "epoch": 0.1209245577170745, - "grad_norm": 1.6755594865886592, - "learning_rate": 1.9570279311874842e-06, - "loss": 1.2547, - "step": 892 - }, - { - "epoch": 0.12106012336473937, - "grad_norm": 5.483868877736675, - "learning_rate": 1.9569004907815706e-06, - "loss": 1.2611, - "step": 893 - }, - { - "epoch": 0.12119568901240425, - "grad_norm": 1.750377807540651, - "learning_rate": 1.9567728658449503e-06, - "loss": 1.2454, - "step": 894 - }, - { - "epoch": 0.12133125466006914, - "grad_norm": 1.573484089789557, - "learning_rate": 1.956645056402235e-06, - "loss": 1.2313, - "step": 895 - }, - { - "epoch": 0.12146682030773402, - "grad_norm": 1.5941705475565895, - "learning_rate": 1.956517062478072e-06, - "loss": 1.2603, - "step": 896 - }, - { - "epoch": 0.1216023859553989, - "grad_norm": 1.8638804973797685, - "learning_rate": 1.956388884097144e-06, - "loss": 1.2771, - "step": 897 - }, - { - "epoch": 0.12173795160306379, - "grad_norm": 1.7147061697928825, - "learning_rate": 1.9562605212841686e-06, - "loss": 1.2595, - "step": 898 - }, - { - "epoch": 0.12187351725072866, - "grad_norm": 1.6116627891911504, - "learning_rate": 1.9561319740639e-06, - "loss": 1.2728, - "step": 899 - }, - { - "epoch": 0.12200908289839354, - "grad_norm": 1.7251546211093454, - "learning_rate": 1.9560032424611274e-06, - "loss": 1.2491, - "step": 900 - }, - { - "epoch": 0.12214464854605843, - "grad_norm": 1.9040553631325392, - "learning_rate": 1.955874326500676e-06, - "loss": 1.2709, - "step": 901 - }, - { - "epoch": 0.12228021419372331, - "grad_norm": 1.9650259584293959, - "learning_rate": 1.955745226207406e-06, - "loss": 1.2401, - "step": 902 - }, - { - "epoch": 0.1224157798413882, - "grad_norm": 2.259370800687737, - "learning_rate": 1.9556159416062127e-06, - "loss": 1.2279, - "step": 903 - }, - { - "epoch": 0.12255134548905307, - "grad_norm": 1.711526510230783, - "learning_rate": 1.955486472722029e-06, - "loss": 1.2946, - "step": 904 - }, - { - "epoch": 0.12268691113671795, - "grad_norm": 1.713021412522385, - "learning_rate": 1.955356819579821e-06, - "loss": 1.2416, - "step": 905 - }, - { - "epoch": 0.12282247678438284, - "grad_norm": 2.7568415075320063, - "learning_rate": 1.955226982204591e-06, - "loss": 1.2621, - "step": 906 - }, - { - "epoch": 0.12295804243204772, - "grad_norm": 1.8293832950823714, - "learning_rate": 1.955096960621378e-06, - "loss": 1.2687, - "step": 907 - }, - { - "epoch": 0.1230936080797126, - "grad_norm": 1.5054633707831675, - "learning_rate": 1.9549667548552553e-06, - "loss": 1.2433, - "step": 908 - }, - { - "epoch": 0.12322917372737749, - "grad_norm": 1.7093653082722424, - "learning_rate": 1.9548363649313315e-06, - "loss": 1.2549, - "step": 909 - }, - { - "epoch": 0.12336473937504236, - "grad_norm": 2.0778024326846856, - "learning_rate": 1.9547057908747522e-06, - "loss": 1.2695, - "step": 910 - }, - { - "epoch": 0.12350030502270724, - "grad_norm": 1.9262297794942371, - "learning_rate": 1.954575032710697e-06, - "loss": 1.2514, - "step": 911 - }, - { - "epoch": 0.12363587067037213, - "grad_norm": 1.8291105906396925, - "learning_rate": 1.954444090464382e-06, - "loss": 1.2685, - "step": 912 - }, - { - "epoch": 0.12377143631803701, - "grad_norm": 1.5773506851358237, - "learning_rate": 1.9543129641610575e-06, - "loss": 1.2502, - "step": 913 - }, - { - "epoch": 0.1239070019657019, - "grad_norm": 1.5919369569765018, - "learning_rate": 1.9541816538260105e-06, - "loss": 1.2751, - "step": 914 - }, - { - "epoch": 0.12404256761336677, - "grad_norm": 1.399763568474772, - "learning_rate": 1.954050159484564e-06, - "loss": 1.2669, - "step": 915 - }, - { - "epoch": 0.12417813326103165, - "grad_norm": 2.16546190402148, - "learning_rate": 1.953918481162075e-06, - "loss": 1.2362, - "step": 916 - }, - { - "epoch": 0.12431369890869653, - "grad_norm": 1.73251220484067, - "learning_rate": 1.953786618883937e-06, - "loss": 1.2402, - "step": 917 - }, - { - "epoch": 0.12444926455636142, - "grad_norm": 1.7625479248174385, - "learning_rate": 1.953654572675578e-06, - "loss": 1.3062, - "step": 918 - }, - { - "epoch": 0.1245848302040263, - "grad_norm": 1.8203387771668529, - "learning_rate": 1.953522342562462e-06, - "loss": 1.2366, - "step": 919 - }, - { - "epoch": 0.12472039585169119, - "grad_norm": 4.1837069463971766, - "learning_rate": 1.9533899285700893e-06, - "loss": 1.2094, - "step": 920 - }, - { - "epoch": 0.12485596149935606, - "grad_norm": 2.1386376931682487, - "learning_rate": 1.9532573307239942e-06, - "loss": 1.2134, - "step": 921 - }, - { - "epoch": 0.12499152714702094, - "grad_norm": 1.633000635039251, - "learning_rate": 1.9531245490497475e-06, - "loss": 1.269, - "step": 922 - }, - { - "epoch": 0.12512709279468584, - "grad_norm": 1.6443585593265144, - "learning_rate": 1.952991583572955e-06, - "loss": 1.261, - "step": 923 - }, - { - "epoch": 0.1252626584423507, - "grad_norm": 1.9309861202426057, - "learning_rate": 1.9528584343192583e-06, - "loss": 1.2779, - "step": 924 - }, - { - "epoch": 0.12539822409001558, - "grad_norm": 1.5251835346521132, - "learning_rate": 1.9527251013143338e-06, - "loss": 1.2408, - "step": 925 - }, - { - "epoch": 0.12553378973768048, - "grad_norm": 2.3326594121496087, - "learning_rate": 1.9525915845838942e-06, - "loss": 1.2583, - "step": 926 - }, - { - "epoch": 0.12566935538534535, - "grad_norm": 1.6055791984579688, - "learning_rate": 1.952457884153686e-06, - "loss": 1.2362, - "step": 927 - }, - { - "epoch": 0.12580492103301025, - "grad_norm": 1.5491879525430206, - "learning_rate": 1.952324000049494e-06, - "loss": 1.2691, - "step": 928 - }, - { - "epoch": 0.12594048668067512, - "grad_norm": 1.5590892503345732, - "learning_rate": 1.952189932297135e-06, - "loss": 1.2489, - "step": 929 - }, - { - "epoch": 0.12607605232834, - "grad_norm": 1.8469697510756622, - "learning_rate": 1.9520556809224643e-06, - "loss": 1.2739, - "step": 930 - }, - { - "epoch": 0.1262116179760049, - "grad_norm": 1.5485436947356637, - "learning_rate": 1.9519212459513702e-06, - "loss": 1.3113, - "step": 931 - }, - { - "epoch": 0.12634718362366976, - "grad_norm": 1.6380746384276927, - "learning_rate": 1.951786627409778e-06, - "loss": 1.2209, - "step": 932 - }, - { - "epoch": 0.12648274927133465, - "grad_norm": 1.8540978125985466, - "learning_rate": 1.9516518253236474e-06, - "loss": 1.2641, - "step": 933 - }, - { - "epoch": 0.12661831491899952, - "grad_norm": 1.9975992261991338, - "learning_rate": 1.9515168397189743e-06, - "loss": 1.2238, - "step": 934 - }, - { - "epoch": 0.1267538805666644, - "grad_norm": 1.5048399844714937, - "learning_rate": 1.95138167062179e-06, - "loss": 1.2511, - "step": 935 - }, - { - "epoch": 0.1268894462143293, - "grad_norm": 1.5726204659702656, - "learning_rate": 1.9512463180581595e-06, - "loss": 1.2512, - "step": 936 - }, - { - "epoch": 0.12702501186199416, - "grad_norm": 1.4102582231240288, - "learning_rate": 1.9511107820541857e-06, - "loss": 1.2321, - "step": 937 - }, - { - "epoch": 0.12716057750965906, - "grad_norm": 1.6845560577582603, - "learning_rate": 1.9509750626360053e-06, - "loss": 1.2357, - "step": 938 - }, - { - "epoch": 0.12729614315732393, - "grad_norm": 2.4101320418003174, - "learning_rate": 1.95083915982979e-06, - "loss": 1.2503, - "step": 939 - }, - { - "epoch": 0.1274317088049888, - "grad_norm": 1.7748304908683372, - "learning_rate": 1.950703073661749e-06, - "loss": 1.2797, - "step": 940 - }, - { - "epoch": 0.1275672744526537, - "grad_norm": 1.6825985708168958, - "learning_rate": 1.950566804158124e-06, - "loss": 1.2588, - "step": 941 - }, - { - "epoch": 0.12770284010031857, - "grad_norm": 2.318233717026581, - "learning_rate": 1.9504303513451944e-06, - "loss": 1.2325, - "step": 942 - }, - { - "epoch": 0.12783840574798347, - "grad_norm": 1.9955951570388242, - "learning_rate": 1.9502937152492737e-06, - "loss": 1.241, - "step": 943 - }, - { - "epoch": 0.12797397139564834, - "grad_norm": 3.811293611060677, - "learning_rate": 1.950156895896711e-06, - "loss": 1.2621, - "step": 944 - }, - { - "epoch": 0.12810953704331324, - "grad_norm": 4.146487348999009, - "learning_rate": 1.9500198933138914e-06, - "loss": 1.2391, - "step": 945 - }, - { - "epoch": 0.1282451026909781, - "grad_norm": 2.062762742642024, - "learning_rate": 1.949882707527234e-06, - "loss": 1.2337, - "step": 946 - }, - { - "epoch": 0.12838066833864298, - "grad_norm": 2.340283441362446, - "learning_rate": 1.949745338563195e-06, - "loss": 1.2417, - "step": 947 - }, - { - "epoch": 0.12851623398630788, - "grad_norm": 1.5092933902873382, - "learning_rate": 1.949607786448264e-06, - "loss": 1.2407, - "step": 948 - }, - { - "epoch": 0.12865179963397275, - "grad_norm": 1.7006953803196607, - "learning_rate": 1.9494700512089664e-06, - "loss": 1.2827, - "step": 949 - }, - { - "epoch": 0.12878736528163764, - "grad_norm": 1.6632783516972538, - "learning_rate": 1.949332132871865e-06, - "loss": 1.2231, - "step": 950 - }, - { - "epoch": 0.12892293092930251, - "grad_norm": 1.7847625417318536, - "learning_rate": 1.9491940314635553e-06, - "loss": 1.2286, - "step": 951 - }, - { - "epoch": 0.12905849657696739, - "grad_norm": 1.9323350468769132, - "learning_rate": 1.9490557470106686e-06, - "loss": 1.2835, - "step": 952 - }, - { - "epoch": 0.12919406222463228, - "grad_norm": 2.7392971333017626, - "learning_rate": 1.9489172795398727e-06, - "loss": 1.247, - "step": 953 - }, - { - "epoch": 0.12932962787229715, - "grad_norm": 2.133722951619094, - "learning_rate": 1.9487786290778696e-06, - "loss": 1.2522, - "step": 954 - }, - { - "epoch": 0.12946519351996205, - "grad_norm": 1.5538398639379472, - "learning_rate": 1.9486397956513975e-06, - "loss": 1.2428, - "step": 955 - }, - { - "epoch": 0.12960075916762692, - "grad_norm": 1.4564176615658888, - "learning_rate": 1.9485007792872285e-06, - "loss": 1.2831, - "step": 956 - }, - { - "epoch": 0.1297363248152918, - "grad_norm": 1.7431866935350133, - "learning_rate": 1.9483615800121713e-06, - "loss": 1.2384, - "step": 957 - }, - { - "epoch": 0.1298718904629567, - "grad_norm": 4.199055910669216, - "learning_rate": 1.9482221978530695e-06, - "loss": 1.2368, - "step": 958 - }, - { - "epoch": 0.13000745611062156, - "grad_norm": 1.9180469132147882, - "learning_rate": 1.9480826328368018e-06, - "loss": 1.2497, - "step": 959 - }, - { - "epoch": 0.13014302175828646, - "grad_norm": 2.1644490275887778, - "learning_rate": 1.9479428849902816e-06, - "loss": 1.2328, - "step": 960 - }, - { - "epoch": 0.13027858740595133, - "grad_norm": 3.6484802505987433, - "learning_rate": 1.9478029543404587e-06, - "loss": 1.2695, - "step": 961 - }, - { - "epoch": 0.13041415305361623, - "grad_norm": 1.704523904307557, - "learning_rate": 1.9476628409143177e-06, - "loss": 1.2703, - "step": 962 - }, - { - "epoch": 0.1305497187012811, - "grad_norm": 1.558489971669944, - "learning_rate": 1.9475225447388787e-06, - "loss": 1.2696, - "step": 963 - }, - { - "epoch": 0.13068528434894597, - "grad_norm": 1.6901874918740454, - "learning_rate": 1.9473820658411954e-06, - "loss": 1.2486, - "step": 964 - }, - { - "epoch": 0.13082084999661087, - "grad_norm": 1.9222331896112053, - "learning_rate": 1.9472414042483594e-06, - "loss": 1.238, - "step": 965 - }, - { - "epoch": 0.13095641564427574, - "grad_norm": 1.7530321550174606, - "learning_rate": 1.9471005599874955e-06, - "loss": 1.2607, - "step": 966 - }, - { - "epoch": 0.13109198129194063, - "grad_norm": 1.4503963193403409, - "learning_rate": 1.9469595330857644e-06, - "loss": 1.2408, - "step": 967 - }, - { - "epoch": 0.1312275469396055, - "grad_norm": 1.5980469437026408, - "learning_rate": 1.946818323570362e-06, - "loss": 1.2289, - "step": 968 - }, - { - "epoch": 0.13136311258727038, - "grad_norm": 1.4017840449695484, - "learning_rate": 1.9466769314685204e-06, - "loss": 1.2325, - "step": 969 - }, - { - "epoch": 0.13149867823493527, - "grad_norm": 1.9304585352014205, - "learning_rate": 1.9465353568075047e-06, - "loss": 1.2476, - "step": 970 - }, - { - "epoch": 0.13163424388260014, - "grad_norm": 2.01696243886089, - "learning_rate": 1.946393599614617e-06, - "loss": 1.269, - "step": 971 - }, - { - "epoch": 0.13176980953026504, - "grad_norm": 1.733295074817846, - "learning_rate": 1.9462516599171944e-06, - "loss": 1.2892, - "step": 972 - }, - { - "epoch": 0.1319053751779299, - "grad_norm": 1.4628268309194339, - "learning_rate": 1.946109537742608e-06, - "loss": 1.2021, - "step": 973 - }, - { - "epoch": 0.13204094082559478, - "grad_norm": 2.7535239576876442, - "learning_rate": 1.945967233118265e-06, - "loss": 1.2291, - "step": 974 - }, - { - "epoch": 0.13217650647325968, - "grad_norm": 2.2078161543026664, - "learning_rate": 1.945824746071609e-06, - "loss": 1.2288, - "step": 975 - }, - { - "epoch": 0.13231207212092455, - "grad_norm": 1.8435092818112777, - "learning_rate": 1.945682076630116e-06, - "loss": 1.2595, - "step": 976 - }, - { - "epoch": 0.13244763776858945, - "grad_norm": 3.6071763864409605, - "learning_rate": 1.9455392248212995e-06, - "loss": 1.2195, - "step": 977 - }, - { - "epoch": 0.13258320341625432, - "grad_norm": 2.0235196455044924, - "learning_rate": 1.945396190672707e-06, - "loss": 1.2358, - "step": 978 - }, - { - "epoch": 0.1327187690639192, - "grad_norm": 1.4486029875879691, - "learning_rate": 1.9452529742119214e-06, - "loss": 1.2534, - "step": 979 - }, - { - "epoch": 0.1328543347115841, - "grad_norm": 2.5352602551756123, - "learning_rate": 1.9451095754665613e-06, - "loss": 1.2354, - "step": 980 - }, - { - "epoch": 0.13298990035924896, - "grad_norm": 1.934007336805061, - "learning_rate": 1.94496599446428e-06, - "loss": 1.2105, - "step": 981 - }, - { - "epoch": 0.13312546600691386, - "grad_norm": 3.189438140346245, - "learning_rate": 1.9448222312327654e-06, - "loss": 1.2421, - "step": 982 - }, - { - "epoch": 0.13326103165457873, - "grad_norm": 3.5107164287035686, - "learning_rate": 1.944678285799742e-06, - "loss": 1.2391, - "step": 983 - }, - { - "epoch": 0.13339659730224362, - "grad_norm": 1.5396452199778563, - "learning_rate": 1.944534158192968e-06, - "loss": 1.217, - "step": 984 - }, - { - "epoch": 0.1335321629499085, - "grad_norm": 1.5770756667325352, - "learning_rate": 1.944389848440237e-06, - "loss": 1.2503, - "step": 985 - }, - { - "epoch": 0.13366772859757337, - "grad_norm": 1.5516526555796026, - "learning_rate": 1.9442453565693782e-06, - "loss": 1.2193, - "step": 986 - }, - { - "epoch": 0.13380329424523826, - "grad_norm": 2.21353888232779, - "learning_rate": 1.944100682608256e-06, - "loss": 1.2163, - "step": 987 - }, - { - "epoch": 0.13393885989290313, - "grad_norm": 1.5906559194041874, - "learning_rate": 1.943955826584769e-06, - "loss": 1.2093, - "step": 988 - }, - { - "epoch": 0.13407442554056803, - "grad_norm": 1.8270371307828708, - "learning_rate": 1.9438107885268525e-06, - "loss": 1.2416, - "step": 989 - }, - { - "epoch": 0.1342099911882329, - "grad_norm": 1.8214332420952695, - "learning_rate": 1.9436655684624755e-06, - "loss": 1.288, - "step": 990 - }, - { - "epoch": 0.13434555683589777, - "grad_norm": 1.7807802522939966, - "learning_rate": 1.9435201664196424e-06, - "loss": 1.2267, - "step": 991 - }, - { - "epoch": 0.13448112248356267, - "grad_norm": 1.4421310512404386, - "learning_rate": 1.9433745824263924e-06, - "loss": 1.2049, - "step": 992 - }, - { - "epoch": 0.13461668813122754, - "grad_norm": 3.0535123223330585, - "learning_rate": 1.943228816510801e-06, - "loss": 1.2697, - "step": 993 - }, - { - "epoch": 0.13475225377889244, - "grad_norm": 1.618002938654719, - "learning_rate": 1.943082868700978e-06, - "loss": 1.2454, - "step": 994 - }, - { - "epoch": 0.1348878194265573, - "grad_norm": 2.063114468061899, - "learning_rate": 1.9429367390250676e-06, - "loss": 1.2384, - "step": 995 - }, - { - "epoch": 0.13502338507422218, - "grad_norm": 1.6476688126295096, - "learning_rate": 1.942790427511251e-06, - "loss": 1.2167, - "step": 996 - }, - { - "epoch": 0.13515895072188708, - "grad_norm": 1.6522621324689257, - "learning_rate": 1.9426439341877412e-06, - "loss": 1.245, - "step": 997 - }, - { - "epoch": 0.13529451636955195, - "grad_norm": 4.629193745275285, - "learning_rate": 1.94249725908279e-06, - "loss": 1.2277, - "step": 998 - }, - { - "epoch": 0.13543008201721685, - "grad_norm": 1.5550512782376582, - "learning_rate": 1.942350402224682e-06, - "loss": 1.2688, - "step": 999 - }, - { - "epoch": 0.13556564766488172, - "grad_norm": 2.5833196831304255, - "learning_rate": 1.942203363641738e-06, - "loss": 1.2176, - "step": 1000 - }, - { - "epoch": 0.1357012133125466, - "grad_norm": 1.5065549043132132, - "learning_rate": 1.942056143362312e-06, - "loss": 1.2276, - "step": 1001 - }, - { - "epoch": 0.13583677896021149, - "grad_norm": 1.7019198883921853, - "learning_rate": 1.941908741414795e-06, - "loss": 1.2407, - "step": 1002 - }, - { - "epoch": 0.13597234460787636, - "grad_norm": 1.574869340432279, - "learning_rate": 1.941761157827612e-06, - "loss": 1.2219, - "step": 1003 - }, - { - "epoch": 0.13610791025554125, - "grad_norm": 1.410944734758737, - "learning_rate": 1.9416133926292236e-06, - "loss": 1.2221, - "step": 1004 - }, - { - "epoch": 0.13624347590320612, - "grad_norm": 2.788638790929034, - "learning_rate": 1.941465445848125e-06, - "loss": 1.2613, - "step": 1005 - }, - { - "epoch": 0.13637904155087102, - "grad_norm": 1.7568412809975187, - "learning_rate": 1.941317317512847e-06, - "loss": 1.2221, - "step": 1006 - }, - { - "epoch": 0.1365146071985359, - "grad_norm": 2.299969308708164, - "learning_rate": 1.9411690076519545e-06, - "loss": 1.1949, - "step": 1007 - }, - { - "epoch": 0.13665017284620076, - "grad_norm": 1.6568367021554524, - "learning_rate": 1.941020516294048e-06, - "loss": 1.2502, - "step": 1008 - }, - { - "epoch": 0.13678573849386566, - "grad_norm": 3.3068268915109935, - "learning_rate": 1.9408718434677625e-06, - "loss": 1.2696, - "step": 1009 - }, - { - "epoch": 0.13692130414153053, - "grad_norm": 3.684918638552213, - "learning_rate": 1.9407229892017694e-06, - "loss": 1.2494, - "step": 1010 - }, - { - "epoch": 0.13705686978919543, - "grad_norm": 2.1588618398920563, - "learning_rate": 1.940573953524773e-06, - "loss": 1.2418, - "step": 1011 - }, - { - "epoch": 0.1371924354368603, - "grad_norm": 1.721442174371149, - "learning_rate": 1.9404247364655145e-06, - "loss": 1.2246, - "step": 1012 - }, - { - "epoch": 0.13732800108452517, - "grad_norm": 2.222148292280711, - "learning_rate": 1.9402753380527684e-06, - "loss": 1.228, - "step": 1013 - }, - { - "epoch": 0.13746356673219007, - "grad_norm": 2.2430435221199696, - "learning_rate": 1.9401257583153456e-06, - "loss": 1.2168, - "step": 1014 - }, - { - "epoch": 0.13759913237985494, - "grad_norm": 2.3466447232708174, - "learning_rate": 1.9399759972820913e-06, - "loss": 1.2454, - "step": 1015 - }, - { - "epoch": 0.13773469802751984, - "grad_norm": 2.1238090837329273, - "learning_rate": 1.9398260549818856e-06, - "loss": 1.2413, - "step": 1016 - }, - { - "epoch": 0.1378702636751847, - "grad_norm": 2.1028257799796544, - "learning_rate": 1.9396759314436435e-06, - "loss": 1.2315, - "step": 1017 - }, - { - "epoch": 0.13800582932284958, - "grad_norm": 2.080874171713308, - "learning_rate": 1.939525626696316e-06, - "loss": 1.2405, - "step": 1018 - }, - { - "epoch": 0.13814139497051448, - "grad_norm": 1.5411331889190538, - "learning_rate": 1.9393751407688866e-06, - "loss": 1.2149, - "step": 1019 - }, - { - "epoch": 0.13827696061817935, - "grad_norm": 2.3341300867884187, - "learning_rate": 1.9392244736903773e-06, - "loss": 1.2877, - "step": 1020 - }, - { - "epoch": 0.13841252626584424, - "grad_norm": 1.5131619221438795, - "learning_rate": 1.9390736254898414e-06, - "loss": 1.2436, - "step": 1021 - }, - { - "epoch": 0.1385480919135091, - "grad_norm": 1.9827983977686559, - "learning_rate": 1.9389225961963698e-06, - "loss": 1.2891, - "step": 1022 - }, - { - "epoch": 0.138683657561174, - "grad_norm": 1.7180846359585797, - "learning_rate": 1.9387713858390863e-06, - "loss": 1.2498, - "step": 1023 - }, - { - "epoch": 0.13881922320883888, - "grad_norm": 1.796373587435938, - "learning_rate": 1.938619994447152e-06, - "loss": 1.2135, - "step": 1024 - }, - { - "epoch": 0.13895478885650375, - "grad_norm": 2.8750565975614086, - "learning_rate": 1.9384684220497604e-06, - "loss": 1.248, - "step": 1025 - }, - { - "epoch": 0.13909035450416865, - "grad_norm": 1.8841064704704917, - "learning_rate": 1.9383166686761416e-06, - "loss": 1.2845, - "step": 1026 - }, - { - "epoch": 0.13922592015183352, - "grad_norm": 1.8522660131010587, - "learning_rate": 1.9381647343555596e-06, - "loss": 1.2784, - "step": 1027 - }, - { - "epoch": 0.13936148579949842, - "grad_norm": 2.2344481844928277, - "learning_rate": 1.938012619117314e-06, - "loss": 1.237, - "step": 1028 - }, - { - "epoch": 0.1394970514471633, - "grad_norm": 1.6358962620147233, - "learning_rate": 1.9378603229907393e-06, - "loss": 1.2152, - "step": 1029 - }, - { - "epoch": 0.13963261709482816, - "grad_norm": 2.1623826403018582, - "learning_rate": 1.937707846005204e-06, - "loss": 1.2126, - "step": 1030 - }, - { - "epoch": 0.13976818274249306, - "grad_norm": 1.5030566097659113, - "learning_rate": 1.9375551881901127e-06, - "loss": 1.2304, - "step": 1031 - }, - { - "epoch": 0.13990374839015793, - "grad_norm": 1.442164329931959, - "learning_rate": 1.937402349574904e-06, - "loss": 1.2061, - "step": 1032 - }, - { - "epoch": 0.14003931403782283, - "grad_norm": 2.1453054942473244, - "learning_rate": 1.9372493301890517e-06, - "loss": 1.2333, - "step": 1033 - }, - { - "epoch": 0.1401748796854877, - "grad_norm": 1.4595766186205736, - "learning_rate": 1.9370961300620636e-06, - "loss": 1.2615, - "step": 1034 - }, - { - "epoch": 0.14031044533315257, - "grad_norm": 1.7408383274526922, - "learning_rate": 1.9369427492234846e-06, - "loss": 1.1919, - "step": 1035 - }, - { - "epoch": 0.14044601098081747, - "grad_norm": 1.6629685247415986, - "learning_rate": 1.9367891877028917e-06, - "loss": 1.2413, - "step": 1036 - }, - { - "epoch": 0.14058157662848234, - "grad_norm": 1.9195332910945415, - "learning_rate": 1.9366354455298987e-06, - "loss": 1.2698, - "step": 1037 - }, - { - "epoch": 0.14071714227614723, - "grad_norm": 1.9719464111009932, - "learning_rate": 1.936481522734153e-06, - "loss": 1.2528, - "step": 1038 - }, - { - "epoch": 0.1408527079238121, - "grad_norm": 1.5905040641842048, - "learning_rate": 1.9363274193453383e-06, - "loss": 1.2351, - "step": 1039 - }, - { - "epoch": 0.14098827357147697, - "grad_norm": 1.9179926180865963, - "learning_rate": 1.9361731353931714e-06, - "loss": 1.2337, - "step": 1040 - }, - { - "epoch": 0.14112383921914187, - "grad_norm": 1.4822847307782494, - "learning_rate": 1.936018670907405e-06, - "loss": 1.245, - "step": 1041 - }, - { - "epoch": 0.14125940486680674, - "grad_norm": 1.4730599140408605, - "learning_rate": 1.935864025917827e-06, - "loss": 1.2531, - "step": 1042 - }, - { - "epoch": 0.14139497051447164, - "grad_norm": 1.4692166809858849, - "learning_rate": 1.935709200454258e-06, - "loss": 1.222, - "step": 1043 - }, - { - "epoch": 0.1415305361621365, - "grad_norm": 1.7454797775346012, - "learning_rate": 1.9355541945465563e-06, - "loss": 1.298, - "step": 1044 - }, - { - "epoch": 0.1416661018098014, - "grad_norm": 5.8620166782359275, - "learning_rate": 1.9353990082246127e-06, - "loss": 1.3151, - "step": 1045 - }, - { - "epoch": 0.14180166745746628, - "grad_norm": 1.7086252052887503, - "learning_rate": 1.935243641518354e-06, - "loss": 1.2816, - "step": 1046 - }, - { - "epoch": 0.14193723310513115, - "grad_norm": 1.566179966940149, - "learning_rate": 1.935088094457742e-06, - "loss": 1.2654, - "step": 1047 - }, - { - "epoch": 0.14207279875279605, - "grad_norm": 1.7835897301884485, - "learning_rate": 1.9349323670727717e-06, - "loss": 1.2154, - "step": 1048 - }, - { - "epoch": 0.14220836440046092, - "grad_norm": 1.9926627062192526, - "learning_rate": 1.9347764593934743e-06, - "loss": 1.2928, - "step": 1049 - }, - { - "epoch": 0.14234393004812582, - "grad_norm": 1.5588812230665556, - "learning_rate": 1.934620371449915e-06, - "loss": 1.274, - "step": 1050 - }, - { - "epoch": 0.1424794956957907, - "grad_norm": 1.9580567973424576, - "learning_rate": 1.934464103272195e-06, - "loss": 1.2608, - "step": 1051 - }, - { - "epoch": 0.14261506134345556, - "grad_norm": 1.717027785698081, - "learning_rate": 1.9343076548904483e-06, - "loss": 1.2553, - "step": 1052 - }, - { - "epoch": 0.14275062699112046, - "grad_norm": 1.7913816875644528, - "learning_rate": 1.9341510263348457e-06, - "loss": 1.2208, - "step": 1053 - }, - { - "epoch": 0.14288619263878533, - "grad_norm": 1.5720918716416261, - "learning_rate": 1.9339942176355916e-06, - "loss": 1.282, - "step": 1054 - }, - { - "epoch": 0.14302175828645022, - "grad_norm": 1.5339229858413248, - "learning_rate": 1.933837228822925e-06, - "loss": 1.2646, - "step": 1055 - }, - { - "epoch": 0.1431573239341151, - "grad_norm": 1.761629118117543, - "learning_rate": 1.9336800599271203e-06, - "loss": 1.2709, - "step": 1056 - }, - { - "epoch": 0.14329288958177996, - "grad_norm": 1.5855380432133905, - "learning_rate": 1.933522710978486e-06, - "loss": 1.245, - "step": 1057 - }, - { - "epoch": 0.14342845522944486, - "grad_norm": 1.8979733295549495, - "learning_rate": 1.9333651820073655e-06, - "loss": 1.2583, - "step": 1058 - }, - { - "epoch": 0.14356402087710973, - "grad_norm": 2.479709621772689, - "learning_rate": 1.933207473044137e-06, - "loss": 1.2134, - "step": 1059 - }, - { - "epoch": 0.14369958652477463, - "grad_norm": 1.576859906610863, - "learning_rate": 1.9330495841192138e-06, - "loss": 1.2718, - "step": 1060 - }, - { - "epoch": 0.1438351521724395, - "grad_norm": 3.540699839668719, - "learning_rate": 1.9328915152630435e-06, - "loss": 1.2267, - "step": 1061 - }, - { - "epoch": 0.1439707178201044, - "grad_norm": 1.5705515201510214, - "learning_rate": 1.932733266506108e-06, - "loss": 1.2354, - "step": 1062 - }, - { - "epoch": 0.14410628346776927, - "grad_norm": 2.16139489254235, - "learning_rate": 1.9325748378789246e-06, - "loss": 1.2447, - "step": 1063 - }, - { - "epoch": 0.14424184911543414, - "grad_norm": 1.465821364618215, - "learning_rate": 1.9324162294120453e-06, - "loss": 1.2747, - "step": 1064 - }, - { - "epoch": 0.14437741476309904, - "grad_norm": 2.504830942337657, - "learning_rate": 1.9322574411360557e-06, - "loss": 1.2474, - "step": 1065 - }, - { - "epoch": 0.1445129804107639, - "grad_norm": 2.1908325490122413, - "learning_rate": 1.932098473081578e-06, - "loss": 1.2621, - "step": 1066 - }, - { - "epoch": 0.1446485460584288, - "grad_norm": 1.6381375697961302, - "learning_rate": 1.931939325279267e-06, - "loss": 1.2613, - "step": 1067 - }, - { - "epoch": 0.14478411170609368, - "grad_norm": 1.5216608961750206, - "learning_rate": 1.9317799977598136e-06, - "loss": 1.2224, - "step": 1068 - }, - { - "epoch": 0.14491967735375855, - "grad_norm": 1.5274998652345184, - "learning_rate": 1.9316204905539425e-06, - "loss": 1.2635, - "step": 1069 - }, - { - "epoch": 0.14505524300142345, - "grad_norm": 1.6531542168770128, - "learning_rate": 1.9314608036924133e-06, - "loss": 1.2686, - "step": 1070 - }, - { - "epoch": 0.14519080864908832, - "grad_norm": 1.6320942262711018, - "learning_rate": 1.931300937206021e-06, - "loss": 1.2688, - "step": 1071 - }, - { - "epoch": 0.1453263742967532, - "grad_norm": 1.7134817181697977, - "learning_rate": 1.931140891125594e-06, - "loss": 1.2592, - "step": 1072 - }, - { - "epoch": 0.14546193994441808, - "grad_norm": 1.8990782502506465, - "learning_rate": 1.9309806654819963e-06, - "loss": 1.3013, - "step": 1073 - }, - { - "epoch": 0.14559750559208295, - "grad_norm": 2.8992076134083056, - "learning_rate": 1.9308202603061258e-06, - "loss": 1.2322, - "step": 1074 - }, - { - "epoch": 0.14573307123974785, - "grad_norm": 1.7924359275620443, - "learning_rate": 1.9306596756289155e-06, - "loss": 1.2646, - "step": 1075 - }, - { - "epoch": 0.14586863688741272, - "grad_norm": 2.589357184122086, - "learning_rate": 1.930498911481333e-06, - "loss": 1.2202, - "step": 1076 - }, - { - "epoch": 0.14600420253507762, - "grad_norm": 1.514507062572406, - "learning_rate": 1.9303379678943805e-06, - "loss": 1.2618, - "step": 1077 - }, - { - "epoch": 0.1461397681827425, - "grad_norm": 2.21857224424984, - "learning_rate": 1.9301768448990946e-06, - "loss": 1.2187, - "step": 1078 - }, - { - "epoch": 0.14627533383040736, - "grad_norm": 1.684930269417547, - "learning_rate": 1.930015542526546e-06, - "loss": 1.2324, - "step": 1079 - }, - { - "epoch": 0.14641089947807226, - "grad_norm": 1.6980941124964881, - "learning_rate": 1.9298540608078417e-06, - "loss": 1.2578, - "step": 1080 - }, - { - "epoch": 0.14654646512573713, - "grad_norm": 1.9283474314568836, - "learning_rate": 1.9296923997741216e-06, - "loss": 1.2642, - "step": 1081 - }, - { - "epoch": 0.14668203077340203, - "grad_norm": 1.9871472888123223, - "learning_rate": 1.9295305594565604e-06, - "loss": 1.2381, - "step": 1082 - }, - { - "epoch": 0.1468175964210669, - "grad_norm": 1.6381357187314456, - "learning_rate": 1.9293685398863683e-06, - "loss": 1.2286, - "step": 1083 - }, - { - "epoch": 0.1469531620687318, - "grad_norm": 2.0381799474763467, - "learning_rate": 1.929206341094789e-06, - "loss": 1.2484, - "step": 1084 - }, - { - "epoch": 0.14708872771639667, - "grad_norm": 1.5780553269938595, - "learning_rate": 1.9290439631131018e-06, - "loss": 1.2626, - "step": 1085 - }, - { - "epoch": 0.14722429336406154, - "grad_norm": 1.5213523064816792, - "learning_rate": 1.9288814059726196e-06, - "loss": 1.2394, - "step": 1086 - }, - { - "epoch": 0.14735985901172644, - "grad_norm": 1.4910641375543334, - "learning_rate": 1.92871866970469e-06, - "loss": 1.2201, - "step": 1087 - }, - { - "epoch": 0.1474954246593913, - "grad_norm": 1.7718289793966837, - "learning_rate": 1.9285557543406964e-06, - "loss": 1.2921, - "step": 1088 - }, - { - "epoch": 0.1476309903070562, - "grad_norm": 2.234223464086942, - "learning_rate": 1.928392659912055e-06, - "loss": 1.2693, - "step": 1089 - }, - { - "epoch": 0.14776655595472107, - "grad_norm": 1.5484761311109212, - "learning_rate": 1.9282293864502176e-06, - "loss": 1.2324, - "step": 1090 - }, - { - "epoch": 0.14790212160238594, - "grad_norm": 1.623377859674405, - "learning_rate": 1.92806593398667e-06, - "loss": 1.2116, - "step": 1091 - }, - { - "epoch": 0.14803768725005084, - "grad_norm": 1.7609292215800592, - "learning_rate": 1.9279023025529324e-06, - "loss": 1.2509, - "step": 1092 - }, - { - "epoch": 0.1481732528977157, - "grad_norm": 1.5743545669382824, - "learning_rate": 1.9277384921805604e-06, - "loss": 1.2395, - "step": 1093 - }, - { - "epoch": 0.1483088185453806, - "grad_norm": 1.4889518523415273, - "learning_rate": 1.927574502901143e-06, - "loss": 1.2201, - "step": 1094 - }, - { - "epoch": 0.14844438419304548, - "grad_norm": 1.547272045781177, - "learning_rate": 1.927410334746305e-06, - "loss": 1.2674, - "step": 1095 - }, - { - "epoch": 0.14857994984071035, - "grad_norm": 1.6071091633340264, - "learning_rate": 1.927245987747704e-06, - "loss": 1.254, - "step": 1096 - }, - { - "epoch": 0.14871551548837525, - "grad_norm": 1.4398518326977447, - "learning_rate": 1.9270814619370337e-06, - "loss": 1.2594, - "step": 1097 - }, - { - "epoch": 0.14885108113604012, - "grad_norm": 1.3711175378709786, - "learning_rate": 1.9269167573460217e-06, - "loss": 1.2635, - "step": 1098 - }, - { - "epoch": 0.14898664678370502, - "grad_norm": 1.6961472382439038, - "learning_rate": 1.9267518740064294e-06, - "loss": 1.2466, - "step": 1099 - }, - { - "epoch": 0.1491222124313699, - "grad_norm": 1.4329585743434698, - "learning_rate": 1.9265868119500538e-06, - "loss": 1.2132, - "step": 1100 - }, - { - "epoch": 0.1492577780790348, - "grad_norm": 1.4695568548420856, - "learning_rate": 1.926421571208725e-06, - "loss": 1.209, - "step": 1101 - }, - { - "epoch": 0.14939334372669966, - "grad_norm": 1.7605132260240421, - "learning_rate": 1.9262561518143095e-06, - "loss": 1.2825, - "step": 1102 - }, - { - "epoch": 0.14952890937436453, - "grad_norm": 1.3301928667894327, - "learning_rate": 1.9260905537987063e-06, - "loss": 1.2508, - "step": 1103 - }, - { - "epoch": 0.14966447502202943, - "grad_norm": 6.929964487614914, - "learning_rate": 1.92592477719385e-06, - "loss": 1.201, - "step": 1104 - }, - { - "epoch": 0.1498000406696943, - "grad_norm": 2.5866661583793262, - "learning_rate": 1.925758822031709e-06, - "loss": 1.2395, - "step": 1105 - }, - { - "epoch": 0.1499356063173592, - "grad_norm": 19.370744269261632, - "learning_rate": 1.9255926883442867e-06, - "loss": 1.2643, - "step": 1106 - }, - { - "epoch": 0.15007117196502406, - "grad_norm": 2.0639397159644135, - "learning_rate": 1.9254263761636207e-06, - "loss": 1.2371, - "step": 1107 - }, - { - "epoch": 0.15020673761268893, - "grad_norm": 1.5614205405619115, - "learning_rate": 1.925259885521783e-06, - "loss": 1.2334, - "step": 1108 - }, - { - "epoch": 0.15034230326035383, - "grad_norm": 1.8479651083743123, - "learning_rate": 1.92509321645088e-06, - "loss": 1.2765, - "step": 1109 - }, - { - "epoch": 0.1504778689080187, - "grad_norm": 1.616766122076782, - "learning_rate": 1.924926368983052e-06, - "loss": 1.2343, - "step": 1110 - }, - { - "epoch": 0.1506134345556836, - "grad_norm": 1.382961203809606, - "learning_rate": 1.9247593431504756e-06, - "loss": 1.2137, - "step": 1111 - }, - { - "epoch": 0.15074900020334847, - "grad_norm": 1.76848505449108, - "learning_rate": 1.9245921389853588e-06, - "loss": 1.2257, - "step": 1112 - }, - { - "epoch": 0.15088456585101334, - "grad_norm": 1.6339262343198608, - "learning_rate": 1.9244247565199463e-06, - "loss": 1.2185, - "step": 1113 - }, - { - "epoch": 0.15102013149867824, - "grad_norm": 1.4410188654277871, - "learning_rate": 1.9242571957865165e-06, - "loss": 1.2851, - "step": 1114 - }, - { - "epoch": 0.1511556971463431, - "grad_norm": 3.6892103596362236, - "learning_rate": 1.924089456817382e-06, - "loss": 1.23, - "step": 1115 - }, - { - "epoch": 0.151291262794008, - "grad_norm": 1.717427087138512, - "learning_rate": 1.92392153964489e-06, - "loss": 1.2438, - "step": 1116 - }, - { - "epoch": 0.15142682844167288, - "grad_norm": 1.5144565294721724, - "learning_rate": 1.923753444301423e-06, - "loss": 1.2566, - "step": 1117 - }, - { - "epoch": 0.15156239408933775, - "grad_norm": 2.1200251198755597, - "learning_rate": 1.923585170819395e-06, - "loss": 1.2408, - "step": 1118 - }, - { - "epoch": 0.15169795973700265, - "grad_norm": 1.5959484013162997, - "learning_rate": 1.923416719231257e-06, - "loss": 1.2386, - "step": 1119 - }, - { - "epoch": 0.15183352538466752, - "grad_norm": 1.6121421127344069, - "learning_rate": 1.9232480895694945e-06, - "loss": 1.2093, - "step": 1120 - }, - { - "epoch": 0.15196909103233242, - "grad_norm": 2.2639654161351976, - "learning_rate": 1.9230792818666252e-06, - "loss": 1.2231, - "step": 1121 - }, - { - "epoch": 0.15210465667999729, - "grad_norm": 1.8090956013049044, - "learning_rate": 1.9229102961552026e-06, - "loss": 1.2751, - "step": 1122 - }, - { - "epoch": 0.15224022232766218, - "grad_norm": 1.7833830803799655, - "learning_rate": 1.9227411324678146e-06, - "loss": 1.2246, - "step": 1123 - }, - { - "epoch": 0.15237578797532705, - "grad_norm": 1.990772546781389, - "learning_rate": 1.922571790837083e-06, - "loss": 1.2535, - "step": 1124 - }, - { - "epoch": 0.15251135362299192, - "grad_norm": 1.6577418607565044, - "learning_rate": 1.9224022712956635e-06, - "loss": 1.2145, - "step": 1125 - }, - { - "epoch": 0.15264691927065682, - "grad_norm": 1.694170600213874, - "learning_rate": 1.922232573876247e-06, - "loss": 1.2227, - "step": 1126 - }, - { - "epoch": 0.1527824849183217, - "grad_norm": 1.781975288379602, - "learning_rate": 1.922062698611559e-06, - "loss": 1.249, - "step": 1127 - }, - { - "epoch": 0.1529180505659866, - "grad_norm": 1.9529326466284553, - "learning_rate": 1.921892645534357e-06, - "loss": 1.2876, - "step": 1128 - }, - { - "epoch": 0.15305361621365146, - "grad_norm": 3.1682936404836433, - "learning_rate": 1.9217224146774357e-06, - "loss": 1.2357, - "step": 1129 - }, - { - "epoch": 0.15318918186131633, - "grad_norm": 1.87652476818638, - "learning_rate": 1.921552006073622e-06, - "loss": 1.2294, - "step": 1130 - }, - { - "epoch": 0.15332474750898123, - "grad_norm": 1.5612227816075344, - "learning_rate": 1.9213814197557787e-06, - "loss": 1.2478, - "step": 1131 - }, - { - "epoch": 0.1534603131566461, - "grad_norm": 1.798022402090002, - "learning_rate": 1.9212106557568016e-06, - "loss": 1.2221, - "step": 1132 - }, - { - "epoch": 0.153595878804311, - "grad_norm": 1.5537937290506243, - "learning_rate": 1.9210397141096206e-06, - "loss": 1.2212, - "step": 1133 - }, - { - "epoch": 0.15373144445197587, - "grad_norm": 2.6531505752624893, - "learning_rate": 1.9208685948472014e-06, - "loss": 1.2528, - "step": 1134 - }, - { - "epoch": 0.15386701009964074, - "grad_norm": 2.0741859638859834, - "learning_rate": 1.9206972980025426e-06, - "loss": 1.2135, - "step": 1135 - }, - { - "epoch": 0.15400257574730564, - "grad_norm": 1.9547774873773638, - "learning_rate": 1.9205258236086773e-06, - "loss": 1.2487, - "step": 1136 - }, - { - "epoch": 0.1541381413949705, - "grad_norm": 2.490340225160739, - "learning_rate": 1.920354171698673e-06, - "loss": 1.2241, - "step": 1137 - }, - { - "epoch": 0.1542737070426354, - "grad_norm": 1.6602771040225255, - "learning_rate": 1.9201823423056315e-06, - "loss": 1.2469, - "step": 1138 - }, - { - "epoch": 0.15440927269030028, - "grad_norm": 1.5894970894257991, - "learning_rate": 1.920010335462689e-06, - "loss": 1.2474, - "step": 1139 - }, - { - "epoch": 0.15454483833796515, - "grad_norm": 1.4475018218797087, - "learning_rate": 1.9198381512030154e-06, - "loss": 1.2674, - "step": 1140 - }, - { - "epoch": 0.15468040398563004, - "grad_norm": 2.2686122524967702, - "learning_rate": 1.919665789559815e-06, - "loss": 1.2041, - "step": 1141 - }, - { - "epoch": 0.15481596963329491, - "grad_norm": 4.755632879202707, - "learning_rate": 1.9194932505663265e-06, - "loss": 1.2261, - "step": 1142 - }, - { - "epoch": 0.1549515352809598, - "grad_norm": 1.5154836531519884, - "learning_rate": 1.9193205342558227e-06, - "loss": 1.2387, - "step": 1143 - }, - { - "epoch": 0.15508710092862468, - "grad_norm": 2.1036614308386383, - "learning_rate": 1.9191476406616107e-06, - "loss": 1.2435, - "step": 1144 - }, - { - "epoch": 0.15522266657628958, - "grad_norm": 1.5064555355427527, - "learning_rate": 1.918974569817031e-06, - "loss": 1.2028, - "step": 1145 - }, - { - "epoch": 0.15535823222395445, - "grad_norm": 1.5956690421817357, - "learning_rate": 1.9188013217554596e-06, - "loss": 1.2146, - "step": 1146 - }, - { - "epoch": 0.15549379787161932, - "grad_norm": 1.509386506319828, - "learning_rate": 1.918627896510306e-06, - "loss": 1.259, - "step": 1147 - }, - { - "epoch": 0.15562936351928422, - "grad_norm": 1.672563204754197, - "learning_rate": 1.9184542941150143e-06, - "loss": 1.2486, - "step": 1148 - }, - { - "epoch": 0.1557649291669491, - "grad_norm": 1.7490006335791861, - "learning_rate": 1.9182805146030614e-06, - "loss": 1.2295, - "step": 1149 - }, - { - "epoch": 0.155900494814614, - "grad_norm": 3.763672108816059, - "learning_rate": 1.9181065580079593e-06, - "loss": 1.2474, - "step": 1150 - }, - { - "epoch": 0.15603606046227886, - "grad_norm": 1.5024007021267705, - "learning_rate": 1.917932424363255e-06, - "loss": 1.2269, - "step": 1151 - }, - { - "epoch": 0.15617162610994373, - "grad_norm": 2.3766770980247456, - "learning_rate": 1.9177581137025284e-06, - "loss": 1.2431, - "step": 1152 - }, - { - "epoch": 0.15630719175760863, - "grad_norm": 1.5447336328246781, - "learning_rate": 1.9175836260593937e-06, - "loss": 1.2087, - "step": 1153 - }, - { - "epoch": 0.1564427574052735, - "grad_norm": 2.7889290329424754, - "learning_rate": 1.9174089614674998e-06, - "loss": 1.2441, - "step": 1154 - }, - { - "epoch": 0.1565783230529384, - "grad_norm": 1.7774401376333266, - "learning_rate": 1.9172341199605293e-06, - "loss": 1.2176, - "step": 1155 - }, - { - "epoch": 0.15671388870060327, - "grad_norm": 1.4469050520054572, - "learning_rate": 1.9170591015721987e-06, - "loss": 1.2107, - "step": 1156 - }, - { - "epoch": 0.15684945434826814, - "grad_norm": 1.6442936470501763, - "learning_rate": 1.9168839063362595e-06, - "loss": 1.226, - "step": 1157 - }, - { - "epoch": 0.15698501999593303, - "grad_norm": 1.7311151737691326, - "learning_rate": 1.9167085342864962e-06, - "loss": 1.1854, - "step": 1158 - }, - { - "epoch": 0.1571205856435979, - "grad_norm": 1.7632834396197599, - "learning_rate": 1.9165329854567285e-06, - "loss": 1.2083, - "step": 1159 - }, - { - "epoch": 0.1572561512912628, - "grad_norm": 1.5629431299353793, - "learning_rate": 1.916357259880809e-06, - "loss": 1.2286, - "step": 1160 - }, - { - "epoch": 0.15739171693892767, - "grad_norm": 1.780901606881394, - "learning_rate": 1.916181357592625e-06, - "loss": 1.2614, - "step": 1161 - }, - { - "epoch": 0.15752728258659257, - "grad_norm": 1.6487292480399713, - "learning_rate": 1.916005278626098e-06, - "loss": 1.2467, - "step": 1162 - }, - { - "epoch": 0.15766284823425744, - "grad_norm": 4.940013726523356, - "learning_rate": 1.915829023015184e-06, - "loss": 1.2655, - "step": 1163 - }, - { - "epoch": 0.1577984138819223, - "grad_norm": 1.7259005643464387, - "learning_rate": 1.915652590793872e-06, - "loss": 1.2395, - "step": 1164 - }, - { - "epoch": 0.1579339795295872, - "grad_norm": 1.525014882541036, - "learning_rate": 1.9154759819961854e-06, - "loss": 1.2373, - "step": 1165 - }, - { - "epoch": 0.15806954517725208, - "grad_norm": 1.6222758131718455, - "learning_rate": 1.915299196656182e-06, - "loss": 1.2503, - "step": 1166 - }, - { - "epoch": 0.15820511082491698, - "grad_norm": 1.6840964910118656, - "learning_rate": 1.9151222348079535e-06, - "loss": 1.2671, - "step": 1167 - }, - { - "epoch": 0.15834067647258185, - "grad_norm": 1.5257174692329623, - "learning_rate": 1.9149450964856254e-06, - "loss": 1.2846, - "step": 1168 - }, - { - "epoch": 0.15847624212024672, - "grad_norm": 1.706683071505236, - "learning_rate": 1.914767781723358e-06, - "loss": 1.2292, - "step": 1169 - }, - { - "epoch": 0.15861180776791162, - "grad_norm": 1.6023260113111397, - "learning_rate": 1.914590290555344e-06, - "loss": 1.2554, - "step": 1170 - }, - { - "epoch": 0.1587473734155765, - "grad_norm": 1.5406375554359437, - "learning_rate": 1.9144126230158124e-06, - "loss": 1.2337, - "step": 1171 - }, - { - "epoch": 0.15888293906324139, - "grad_norm": 1.5614017910805822, - "learning_rate": 1.9142347791390242e-06, - "loss": 1.3002, - "step": 1172 - }, - { - "epoch": 0.15901850471090626, - "grad_norm": 1.4215292028143196, - "learning_rate": 1.9140567589592755e-06, - "loss": 1.1813, - "step": 1173 - }, - { - "epoch": 0.15915407035857113, - "grad_norm": 1.810761761200612, - "learning_rate": 1.9138785625108955e-06, - "loss": 1.2798, - "step": 1174 - }, - { - "epoch": 0.15928963600623602, - "grad_norm": 1.4550924451753835, - "learning_rate": 1.9137001898282484e-06, - "loss": 1.211, - "step": 1175 - }, - { - "epoch": 0.1594252016539009, - "grad_norm": 1.5473973021249587, - "learning_rate": 1.9135216409457327e-06, - "loss": 1.2634, - "step": 1176 - }, - { - "epoch": 0.1595607673015658, - "grad_norm": 1.6378708174674714, - "learning_rate": 1.913342915897779e-06, - "loss": 1.239, - "step": 1177 - }, - { - "epoch": 0.15969633294923066, - "grad_norm": 1.7654457986608207, - "learning_rate": 1.9131640147188534e-06, - "loss": 1.2225, - "step": 1178 - }, - { - "epoch": 0.15983189859689553, - "grad_norm": 1.6029160562752705, - "learning_rate": 1.912984937443456e-06, - "loss": 1.2444, - "step": 1179 - }, - { - "epoch": 0.15996746424456043, - "grad_norm": 1.4333996312722654, - "learning_rate": 1.9128056841061197e-06, - "loss": 1.2737, - "step": 1180 - }, - { - "epoch": 0.1601030298922253, - "grad_norm": 2.069719950627627, - "learning_rate": 1.912626254741413e-06, - "loss": 1.2314, - "step": 1181 - }, - { - "epoch": 0.1602385955398902, - "grad_norm": 1.8788936096644884, - "learning_rate": 1.912446649383936e-06, - "loss": 1.236, - "step": 1182 - }, - { - "epoch": 0.16037416118755507, - "grad_norm": 1.5950967914701581, - "learning_rate": 1.9122668680683255e-06, - "loss": 1.2442, - "step": 1183 - }, - { - "epoch": 0.16050972683521997, - "grad_norm": 2.6976221081388667, - "learning_rate": 1.9120869108292504e-06, - "loss": 1.2089, - "step": 1184 - }, - { - "epoch": 0.16064529248288484, - "grad_norm": 1.6255358878455806, - "learning_rate": 1.9119067777014146e-06, - "loss": 1.2258, - "step": 1185 - }, - { - "epoch": 0.1607808581305497, - "grad_norm": 12.487634551788583, - "learning_rate": 1.9117264687195546e-06, - "loss": 1.2753, - "step": 1186 - }, - { - "epoch": 0.1609164237782146, - "grad_norm": 15.564762236857428, - "learning_rate": 1.911545983918442e-06, - "loss": 1.2464, - "step": 1187 - }, - { - "epoch": 0.16105198942587948, - "grad_norm": 1.6242780976490165, - "learning_rate": 1.911365323332881e-06, - "loss": 1.2278, - "step": 1188 - }, - { - "epoch": 0.16118755507354438, - "grad_norm": 1.8438710182797384, - "learning_rate": 1.9111844869977123e-06, - "loss": 1.2856, - "step": 1189 - }, - { - "epoch": 0.16132312072120925, - "grad_norm": 2.0417358295332217, - "learning_rate": 1.911003474947807e-06, - "loss": 1.2283, - "step": 1190 - }, - { - "epoch": 0.16145868636887412, - "grad_norm": 2.473317266962249, - "learning_rate": 1.910822287218073e-06, - "loss": 1.1957, - "step": 1191 - }, - { - "epoch": 0.16159425201653901, - "grad_norm": 2.4852478328023624, - "learning_rate": 1.9106409238434503e-06, - "loss": 1.2027, - "step": 1192 - }, - { - "epoch": 0.16172981766420388, - "grad_norm": 1.5432661128407745, - "learning_rate": 1.9104593848589137e-06, - "loss": 1.2377, - "step": 1193 - }, - { - "epoch": 0.16186538331186878, - "grad_norm": 2.0349679231482205, - "learning_rate": 1.9102776702994713e-06, - "loss": 1.257, - "step": 1194 - }, - { - "epoch": 0.16200094895953365, - "grad_norm": 1.684171345537619, - "learning_rate": 1.9100957802001654e-06, - "loss": 1.2242, - "step": 1195 - }, - { - "epoch": 0.16213651460719852, - "grad_norm": 1.70763519729722, - "learning_rate": 1.9099137145960724e-06, - "loss": 1.2475, - "step": 1196 - }, - { - "epoch": 0.16227208025486342, - "grad_norm": 4.0869004632376615, - "learning_rate": 1.909731473522302e-06, - "loss": 1.2311, - "step": 1197 - }, - { - "epoch": 0.1624076459025283, - "grad_norm": 2.5110663095637156, - "learning_rate": 1.9095490570139977e-06, - "loss": 1.2088, - "step": 1198 - }, - { - "epoch": 0.1625432115501932, - "grad_norm": 1.7364312627774643, - "learning_rate": 1.9093664651063375e-06, - "loss": 1.238, - "step": 1199 - }, - { - "epoch": 0.16267877719785806, - "grad_norm": 2.420896023366959, - "learning_rate": 1.9091836978345323e-06, - "loss": 1.2116, - "step": 1200 - }, - { - "epoch": 0.16281434284552296, - "grad_norm": 1.5908414163359035, - "learning_rate": 1.909000755233828e-06, - "loss": 1.2444, - "step": 1201 - }, - { - "epoch": 0.16294990849318783, - "grad_norm": 1.6358950548825064, - "learning_rate": 1.908817637339503e-06, - "loss": 1.2023, - "step": 1202 - }, - { - "epoch": 0.1630854741408527, - "grad_norm": 1.9549365335637314, - "learning_rate": 1.9086343441868706e-06, - "loss": 1.2205, - "step": 1203 - }, - { - "epoch": 0.1632210397885176, - "grad_norm": 1.881001839422245, - "learning_rate": 1.908450875811277e-06, - "loss": 1.2732, - "step": 1204 - }, - { - "epoch": 0.16335660543618247, - "grad_norm": 1.3809549840052702, - "learning_rate": 1.908267232248103e-06, - "loss": 1.2525, - "step": 1205 - }, - { - "epoch": 0.16349217108384737, - "grad_norm": 1.8758952208296058, - "learning_rate": 1.9080834135327624e-06, - "loss": 1.2481, - "step": 1206 - }, - { - "epoch": 0.16362773673151224, - "grad_norm": 1.7296358964948957, - "learning_rate": 1.907899419700704e-06, - "loss": 1.2614, - "step": 1207 - }, - { - "epoch": 0.1637633023791771, - "grad_norm": 2.110213983996092, - "learning_rate": 1.9077152507874086e-06, - "loss": 1.1906, - "step": 1208 - }, - { - "epoch": 0.163898868026842, - "grad_norm": 1.5370691582123748, - "learning_rate": 1.9075309068283928e-06, - "loss": 1.2345, - "step": 1209 - }, - { - "epoch": 0.16403443367450687, - "grad_norm": 1.4505410365828166, - "learning_rate": 1.9073463878592046e-06, - "loss": 1.2003, - "step": 1210 - }, - { - "epoch": 0.16416999932217177, - "grad_norm": 2.692676497575644, - "learning_rate": 1.9071616939154279e-06, - "loss": 1.2444, - "step": 1211 - }, - { - "epoch": 0.16430556496983664, - "grad_norm": 1.6517613509834048, - "learning_rate": 1.9069768250326792e-06, - "loss": 1.2737, - "step": 1212 - }, - { - "epoch": 0.1644411306175015, - "grad_norm": 1.9260493509077707, - "learning_rate": 1.9067917812466088e-06, - "loss": 1.2173, - "step": 1213 - }, - { - "epoch": 0.1645766962651664, - "grad_norm": 1.94710200650408, - "learning_rate": 1.9066065625929014e-06, - "loss": 1.2303, - "step": 1214 - }, - { - "epoch": 0.16471226191283128, - "grad_norm": 2.2388468924762304, - "learning_rate": 1.9064211691072747e-06, - "loss": 1.2253, - "step": 1215 - }, - { - "epoch": 0.16484782756049618, - "grad_norm": 1.7769142661547939, - "learning_rate": 1.9062356008254804e-06, - "loss": 1.246, - "step": 1216 - }, - { - "epoch": 0.16498339320816105, - "grad_norm": 1.6544897023854173, - "learning_rate": 1.906049857783304e-06, - "loss": 1.2254, - "step": 1217 - }, - { - "epoch": 0.16511895885582592, - "grad_norm": 1.6194001785332748, - "learning_rate": 1.905863940016564e-06, - "loss": 1.2371, - "step": 1218 - }, - { - "epoch": 0.16525452450349082, - "grad_norm": 2.781212935737495, - "learning_rate": 1.9056778475611143e-06, - "loss": 1.2224, - "step": 1219 - }, - { - "epoch": 0.1653900901511557, - "grad_norm": 1.6325972068946706, - "learning_rate": 1.9054915804528403e-06, - "loss": 1.2327, - "step": 1220 - }, - { - "epoch": 0.1655256557988206, - "grad_norm": 1.6477275619708773, - "learning_rate": 1.9053051387276625e-06, - "loss": 1.2619, - "step": 1221 - }, - { - "epoch": 0.16566122144648546, - "grad_norm": 1.9504701627505017, - "learning_rate": 1.9051185224215347e-06, - "loss": 1.218, - "step": 1222 - }, - { - "epoch": 0.16579678709415036, - "grad_norm": 1.549018003834068, - "learning_rate": 1.9049317315704445e-06, - "loss": 1.2065, - "step": 1223 - }, - { - "epoch": 0.16593235274181523, - "grad_norm": 1.6276232020915615, - "learning_rate": 1.904744766210413e-06, - "loss": 1.2225, - "step": 1224 - }, - { - "epoch": 0.1660679183894801, - "grad_norm": 1.4139028864868886, - "learning_rate": 1.904557626377495e-06, - "loss": 1.2441, - "step": 1225 - }, - { - "epoch": 0.166203484037145, - "grad_norm": 2.899999838819042, - "learning_rate": 1.9043703121077788e-06, - "loss": 1.2195, - "step": 1226 - }, - { - "epoch": 0.16633904968480986, - "grad_norm": 1.6808682270855506, - "learning_rate": 1.9041828234373866e-06, - "loss": 1.2488, - "step": 1227 - }, - { - "epoch": 0.16647461533247476, - "grad_norm": 2.1294958919748472, - "learning_rate": 1.903995160402474e-06, - "loss": 1.1919, - "step": 1228 - }, - { - "epoch": 0.16661018098013963, - "grad_norm": 2.2390371296199727, - "learning_rate": 1.9038073230392306e-06, - "loss": 1.2346, - "step": 1229 - }, - { - "epoch": 0.1667457466278045, - "grad_norm": 1.4030177778805542, - "learning_rate": 1.903619311383879e-06, - "loss": 1.2556, - "step": 1230 - }, - { - "epoch": 0.1668813122754694, - "grad_norm": 2.8284758571552615, - "learning_rate": 1.903431125472676e-06, - "loss": 1.2438, - "step": 1231 - }, - { - "epoch": 0.16701687792313427, - "grad_norm": 2.8926412654743943, - "learning_rate": 1.903242765341912e-06, - "loss": 1.2607, - "step": 1232 - }, - { - "epoch": 0.16715244357079917, - "grad_norm": 2.9119395512227704, - "learning_rate": 1.90305423102791e-06, - "loss": 1.2115, - "step": 1233 - }, - { - "epoch": 0.16728800921846404, - "grad_norm": 1.9369059504769695, - "learning_rate": 1.902865522567028e-06, - "loss": 1.223, - "step": 1234 - }, - { - "epoch": 0.1674235748661289, - "grad_norm": 1.6368639667097638, - "learning_rate": 1.9026766399956568e-06, - "loss": 1.2371, - "step": 1235 - }, - { - "epoch": 0.1675591405137938, - "grad_norm": 1.5421275831292478, - "learning_rate": 1.9024875833502208e-06, - "loss": 1.2217, - "step": 1236 - }, - { - "epoch": 0.16769470616145868, - "grad_norm": 1.995335951544947, - "learning_rate": 1.9022983526671784e-06, - "loss": 1.1748, - "step": 1237 - }, - { - "epoch": 0.16783027180912358, - "grad_norm": 1.4814132833880838, - "learning_rate": 1.9021089479830206e-06, - "loss": 1.2395, - "step": 1238 - }, - { - "epoch": 0.16796583745678845, - "grad_norm": 1.5876523981299182, - "learning_rate": 1.9019193693342733e-06, - "loss": 1.2299, - "step": 1239 - }, - { - "epoch": 0.16810140310445335, - "grad_norm": 1.4715063588536506, - "learning_rate": 1.9017296167574948e-06, - "loss": 1.2034, - "step": 1240 - }, - { - "epoch": 0.16823696875211822, - "grad_norm": 1.7527293572058136, - "learning_rate": 1.9015396902892775e-06, - "loss": 1.2346, - "step": 1241 - }, - { - "epoch": 0.16837253439978309, - "grad_norm": 2.177961238330356, - "learning_rate": 1.9013495899662474e-06, - "loss": 1.2821, - "step": 1242 - }, - { - "epoch": 0.16850810004744798, - "grad_norm": 2.1873694149236678, - "learning_rate": 1.9011593158250637e-06, - "loss": 1.2377, - "step": 1243 - }, - { - "epoch": 0.16864366569511285, - "grad_norm": 1.4210379898299248, - "learning_rate": 1.9009688679024189e-06, - "loss": 1.198, - "step": 1244 - }, - { - "epoch": 0.16877923134277775, - "grad_norm": 3.5550944373368436, - "learning_rate": 1.9007782462350401e-06, - "loss": 1.2429, - "step": 1245 - }, - { - "epoch": 0.16891479699044262, - "grad_norm": 1.64143143668381, - "learning_rate": 1.9005874508596868e-06, - "loss": 1.2313, - "step": 1246 - }, - { - "epoch": 0.1690503626381075, - "grad_norm": 1.481486942956639, - "learning_rate": 1.9003964818131524e-06, - "loss": 1.2676, - "step": 1247 - }, - { - "epoch": 0.1691859282857724, - "grad_norm": 1.5562905245091379, - "learning_rate": 1.9002053391322636e-06, - "loss": 1.207, - "step": 1248 - }, - { - "epoch": 0.16932149393343726, - "grad_norm": 1.7416632070939642, - "learning_rate": 1.900014022853881e-06, - "loss": 1.2367, - "step": 1249 - }, - { - "epoch": 0.16945705958110216, - "grad_norm": 1.594186395973025, - "learning_rate": 1.8998225330148988e-06, - "loss": 1.209, - "step": 1250 - }, - { - "epoch": 0.16959262522876703, - "grad_norm": 1.6449336822475855, - "learning_rate": 1.8996308696522432e-06, - "loss": 1.2315, - "step": 1251 - }, - { - "epoch": 0.1697281908764319, - "grad_norm": 2.5663776721328806, - "learning_rate": 1.899439032802876e-06, - "loss": 1.2385, - "step": 1252 - }, - { - "epoch": 0.1698637565240968, - "grad_norm": 2.1140748259454702, - "learning_rate": 1.8992470225037911e-06, - "loss": 1.2893, - "step": 1253 - }, - { - "epoch": 0.16999932217176167, - "grad_norm": 6.085412541415054, - "learning_rate": 1.899054838792016e-06, - "loss": 1.2212, - "step": 1254 - }, - { - "epoch": 0.17013488781942657, - "grad_norm": 1.4520217755509597, - "learning_rate": 1.8988624817046119e-06, - "loss": 1.1964, - "step": 1255 - }, - { - "epoch": 0.17027045346709144, - "grad_norm": 1.546899649762348, - "learning_rate": 1.8986699512786735e-06, - "loss": 1.2381, - "step": 1256 - }, - { - "epoch": 0.1704060191147563, - "grad_norm": 2.1238574846122815, - "learning_rate": 1.898477247551329e-06, - "loss": 1.2077, - "step": 1257 - }, - { - "epoch": 0.1705415847624212, - "grad_norm": 1.6135432756906116, - "learning_rate": 1.8982843705597388e-06, - "loss": 1.2369, - "step": 1258 - }, - { - "epoch": 0.17067715041008608, - "grad_norm": 2.737044276758795, - "learning_rate": 1.8980913203410988e-06, - "loss": 1.2425, - "step": 1259 - }, - { - "epoch": 0.17081271605775097, - "grad_norm": 1.9297603166611985, - "learning_rate": 1.8978980969326366e-06, - "loss": 1.246, - "step": 1260 - }, - { - "epoch": 0.17094828170541584, - "grad_norm": 1.8124515151795635, - "learning_rate": 1.897704700371614e-06, - "loss": 1.2198, - "step": 1261 - }, - { - "epoch": 0.17108384735308074, - "grad_norm": 1.6942674342538102, - "learning_rate": 1.8975111306953261e-06, - "loss": 1.2775, - "step": 1262 - }, - { - "epoch": 0.1712194130007456, - "grad_norm": 1.6128875945812085, - "learning_rate": 1.8973173879411011e-06, - "loss": 1.2579, - "step": 1263 - }, - { - "epoch": 0.17135497864841048, - "grad_norm": 1.4569367179601904, - "learning_rate": 1.8971234721463008e-06, - "loss": 1.2598, - "step": 1264 - }, - { - "epoch": 0.17149054429607538, - "grad_norm": 1.954573564663893, - "learning_rate": 1.8969293833483202e-06, - "loss": 1.201, - "step": 1265 - }, - { - "epoch": 0.17162610994374025, - "grad_norm": 1.381549668336407, - "learning_rate": 1.896735121584588e-06, - "loss": 1.1705, - "step": 1266 - }, - { - "epoch": 0.17176167559140515, - "grad_norm": 1.4541714981855904, - "learning_rate": 1.8965406868925664e-06, - "loss": 1.2044, - "step": 1267 - }, - { - "epoch": 0.17189724123907002, - "grad_norm": 1.5748815834073446, - "learning_rate": 1.89634607930975e-06, - "loss": 1.2377, - "step": 1268 - }, - { - "epoch": 0.1720328068867349, - "grad_norm": 1.6459742780296809, - "learning_rate": 1.8961512988736671e-06, - "loss": 1.2378, - "step": 1269 - }, - { - "epoch": 0.1721683725343998, - "grad_norm": 1.5444869272892492, - "learning_rate": 1.8959563456218807e-06, - "loss": 1.2289, - "step": 1270 - }, - { - "epoch": 0.17230393818206466, - "grad_norm": 1.6880338697947572, - "learning_rate": 1.8957612195919847e-06, - "loss": 1.2314, - "step": 1271 - }, - { - "epoch": 0.17243950382972956, - "grad_norm": 2.6579808758625747, - "learning_rate": 1.8955659208216086e-06, - "loss": 1.2367, - "step": 1272 - }, - { - "epoch": 0.17257506947739443, - "grad_norm": 1.6248204963880861, - "learning_rate": 1.8953704493484138e-06, - "loss": 1.2281, - "step": 1273 - }, - { - "epoch": 0.1727106351250593, - "grad_norm": 2.857034709332075, - "learning_rate": 1.8951748052100954e-06, - "loss": 1.2959, - "step": 1274 - }, - { - "epoch": 0.1728462007727242, - "grad_norm": 1.5590003975053879, - "learning_rate": 1.894978988444382e-06, - "loss": 1.2029, - "step": 1275 - }, - { - "epoch": 0.17298176642038907, - "grad_norm": 2.8395255824637937, - "learning_rate": 1.8947829990890347e-06, - "loss": 1.2658, - "step": 1276 - }, - { - "epoch": 0.17311733206805396, - "grad_norm": 1.6717236496037429, - "learning_rate": 1.8945868371818493e-06, - "loss": 1.2303, - "step": 1277 - }, - { - "epoch": 0.17325289771571883, - "grad_norm": 1.847374491782956, - "learning_rate": 1.8943905027606539e-06, - "loss": 1.2273, - "step": 1278 - }, - { - "epoch": 0.17338846336338373, - "grad_norm": 1.4158919288090577, - "learning_rate": 1.8941939958633099e-06, - "loss": 1.257, - "step": 1279 - }, - { - "epoch": 0.1735240290110486, - "grad_norm": 1.4785555608788699, - "learning_rate": 1.8939973165277123e-06, - "loss": 1.237, - "step": 1280 - }, - { - "epoch": 0.17365959465871347, - "grad_norm": 1.9495125029137732, - "learning_rate": 1.8938004647917886e-06, - "loss": 1.2172, - "step": 1281 - }, - { - "epoch": 0.17379516030637837, - "grad_norm": 1.5991833864144316, - "learning_rate": 1.8936034406935008e-06, - "loss": 1.1927, - "step": 1282 - }, - { - "epoch": 0.17393072595404324, - "grad_norm": 1.7374298732450029, - "learning_rate": 1.8934062442708432e-06, - "loss": 1.1908, - "step": 1283 - }, - { - "epoch": 0.17406629160170814, - "grad_norm": 1.703671329384703, - "learning_rate": 1.8932088755618434e-06, - "loss": 1.2465, - "step": 1284 - }, - { - "epoch": 0.174201857249373, - "grad_norm": 1.861851451173301, - "learning_rate": 1.8930113346045627e-06, - "loss": 1.2178, - "step": 1285 - }, - { - "epoch": 0.17433742289703788, - "grad_norm": 1.675580066393857, - "learning_rate": 1.892813621437095e-06, - "loss": 1.2194, - "step": 1286 - }, - { - "epoch": 0.17447298854470278, - "grad_norm": 1.8281451948733107, - "learning_rate": 1.8926157360975674e-06, - "loss": 1.2209, - "step": 1287 - }, - { - "epoch": 0.17460855419236765, - "grad_norm": 1.8423592869332466, - "learning_rate": 1.8924176786241416e-06, - "loss": 1.2207, - "step": 1288 - }, - { - "epoch": 0.17474411984003255, - "grad_norm": 14.63064942656738, - "learning_rate": 1.8922194490550103e-06, - "loss": 1.2224, - "step": 1289 - }, - { - "epoch": 0.17487968548769742, - "grad_norm": 1.6884032259046406, - "learning_rate": 1.8920210474284014e-06, - "loss": 1.2614, - "step": 1290 - }, - { - "epoch": 0.1750152511353623, - "grad_norm": 1.5821086911107092, - "learning_rate": 1.8918224737825743e-06, - "loss": 1.2051, - "step": 1291 - }, - { - "epoch": 0.17515081678302719, - "grad_norm": 1.8467802568178933, - "learning_rate": 1.891623728155823e-06, - "loss": 1.2144, - "step": 1292 - }, - { - "epoch": 0.17528638243069206, - "grad_norm": 1.6312057343935724, - "learning_rate": 1.8914248105864738e-06, - "loss": 1.2218, - "step": 1293 - }, - { - "epoch": 0.17542194807835695, - "grad_norm": 1.9575475176522887, - "learning_rate": 1.8912257211128864e-06, - "loss": 1.2457, - "step": 1294 - }, - { - "epoch": 0.17555751372602182, - "grad_norm": 1.478477660404088, - "learning_rate": 1.8910264597734535e-06, - "loss": 1.2041, - "step": 1295 - }, - { - "epoch": 0.1756930793736867, - "grad_norm": 2.1459025528346225, - "learning_rate": 1.8908270266066011e-06, - "loss": 1.2449, - "step": 1296 - }, - { - "epoch": 0.1758286450213516, - "grad_norm": 1.6990274792728293, - "learning_rate": 1.8906274216507885e-06, - "loss": 1.2408, - "step": 1297 - }, - { - "epoch": 0.17596421066901646, - "grad_norm": 1.4856002633854293, - "learning_rate": 1.8904276449445079e-06, - "loss": 1.2152, - "step": 1298 - }, - { - "epoch": 0.17609977631668136, - "grad_norm": 2.247785644635343, - "learning_rate": 1.8902276965262845e-06, - "loss": 1.2075, - "step": 1299 - }, - { - "epoch": 0.17623534196434623, - "grad_norm": 1.4032737641863449, - "learning_rate": 1.8900275764346768e-06, - "loss": 1.2134, - "step": 1300 - }, - { - "epoch": 0.17637090761201113, - "grad_norm": 2.165152703994322, - "learning_rate": 1.8898272847082764e-06, - "loss": 1.2292, - "step": 1301 - }, - { - "epoch": 0.176506473259676, - "grad_norm": 1.4857191988094804, - "learning_rate": 1.8896268213857078e-06, - "loss": 1.2579, - "step": 1302 - }, - { - "epoch": 0.17664203890734087, - "grad_norm": 1.4626631045837586, - "learning_rate": 1.8894261865056293e-06, - "loss": 1.2156, - "step": 1303 - }, - { - "epoch": 0.17677760455500577, - "grad_norm": 2.04013157789782, - "learning_rate": 1.8892253801067315e-06, - "loss": 1.2303, - "step": 1304 - }, - { - "epoch": 0.17691317020267064, - "grad_norm": 1.7668663080322815, - "learning_rate": 1.889024402227738e-06, - "loss": 1.2018, - "step": 1305 - }, - { - "epoch": 0.17704873585033554, - "grad_norm": 2.266898818391618, - "learning_rate": 1.8888232529074062e-06, - "loss": 1.2023, - "step": 1306 - }, - { - "epoch": 0.1771843014980004, - "grad_norm": 1.9946332144073193, - "learning_rate": 1.888621932184526e-06, - "loss": 1.2844, - "step": 1307 - }, - { - "epoch": 0.17731986714566528, - "grad_norm": 1.5185393924210941, - "learning_rate": 1.8884204400979206e-06, - "loss": 1.2484, - "step": 1308 - }, - { - "epoch": 0.17745543279333018, - "grad_norm": 2.3782533565887047, - "learning_rate": 1.888218776686446e-06, - "loss": 1.2054, - "step": 1309 - }, - { - "epoch": 0.17759099844099505, - "grad_norm": 1.7387908136071994, - "learning_rate": 1.8880169419889915e-06, - "loss": 1.2072, - "step": 1310 - }, - { - "epoch": 0.17772656408865994, - "grad_norm": 1.7420104866217532, - "learning_rate": 1.8878149360444793e-06, - "loss": 1.1975, - "step": 1311 - }, - { - "epoch": 0.17786212973632481, - "grad_norm": 2.120785017835378, - "learning_rate": 1.8876127588918648e-06, - "loss": 1.2445, - "step": 1312 - }, - { - "epoch": 0.17799769538398968, - "grad_norm": 2.52340646920416, - "learning_rate": 1.887410410570136e-06, - "loss": 1.2234, - "step": 1313 - }, - { - "epoch": 0.17813326103165458, - "grad_norm": 1.4588727672582837, - "learning_rate": 1.8872078911183145e-06, - "loss": 1.2447, - "step": 1314 - }, - { - "epoch": 0.17826882667931945, - "grad_norm": 3.025330976926478, - "learning_rate": 1.8870052005754542e-06, - "loss": 1.1925, - "step": 1315 - }, - { - "epoch": 0.17840439232698435, - "grad_norm": 1.5295150877744053, - "learning_rate": 1.8868023389806428e-06, - "loss": 1.2012, - "step": 1316 - }, - { - "epoch": 0.17853995797464922, - "grad_norm": 1.7736095923822441, - "learning_rate": 1.8865993063730002e-06, - "loss": 1.2358, - "step": 1317 - }, - { - "epoch": 0.1786755236223141, - "grad_norm": 1.5129914960932915, - "learning_rate": 1.8863961027916794e-06, - "loss": 1.2537, - "step": 1318 - }, - { - "epoch": 0.178811089269979, - "grad_norm": 1.7327605754442004, - "learning_rate": 1.8861927282758673e-06, - "loss": 1.2428, - "step": 1319 - }, - { - "epoch": 0.17894665491764386, - "grad_norm": 1.6054407287986312, - "learning_rate": 1.8859891828647827e-06, - "loss": 1.1973, - "step": 1320 - }, - { - "epoch": 0.17908222056530876, - "grad_norm": 1.8767285860216136, - "learning_rate": 1.8857854665976777e-06, - "loss": 1.2334, - "step": 1321 - }, - { - "epoch": 0.17921778621297363, - "grad_norm": 1.5813608476492758, - "learning_rate": 1.8855815795138375e-06, - "loss": 1.2471, - "step": 1322 - }, - { - "epoch": 0.17935335186063853, - "grad_norm": 1.5743123148880938, - "learning_rate": 1.8853775216525803e-06, - "loss": 1.2552, - "step": 1323 - }, - { - "epoch": 0.1794889175083034, - "grad_norm": 1.6145579983179494, - "learning_rate": 1.8851732930532563e-06, - "loss": 1.2085, - "step": 1324 - }, - { - "epoch": 0.17962448315596827, - "grad_norm": 2.428013942504317, - "learning_rate": 1.8849688937552502e-06, - "loss": 1.2252, - "step": 1325 - }, - { - "epoch": 0.17976004880363317, - "grad_norm": 1.6134097929973992, - "learning_rate": 1.8847643237979783e-06, - "loss": 1.2156, - "step": 1326 - }, - { - "epoch": 0.17989561445129804, - "grad_norm": 1.951922039760306, - "learning_rate": 1.8845595832208905e-06, - "loss": 1.1987, - "step": 1327 - }, - { - "epoch": 0.18003118009896293, - "grad_norm": 1.73916113485436, - "learning_rate": 1.8843546720634693e-06, - "loss": 1.2558, - "step": 1328 - }, - { - "epoch": 0.1801667457466278, - "grad_norm": 2.0435564943498634, - "learning_rate": 1.8841495903652302e-06, - "loss": 1.2035, - "step": 1329 - }, - { - "epoch": 0.18030231139429267, - "grad_norm": 1.5093139401357645, - "learning_rate": 1.883944338165722e-06, - "loss": 1.2434, - "step": 1330 - }, - { - "epoch": 0.18043787704195757, - "grad_norm": 1.972707663184111, - "learning_rate": 1.8837389155045253e-06, - "loss": 1.2112, - "step": 1331 - }, - { - "epoch": 0.18057344268962244, - "grad_norm": 1.4462918077330376, - "learning_rate": 1.883533322421255e-06, - "loss": 1.2217, - "step": 1332 - }, - { - "epoch": 0.18070900833728734, - "grad_norm": 1.6914485955939509, - "learning_rate": 1.883327558955557e-06, - "loss": 1.2428, - "step": 1333 - }, - { - "epoch": 0.1808445739849522, - "grad_norm": 3.0198531318910944, - "learning_rate": 1.8831216251471123e-06, - "loss": 1.2474, - "step": 1334 - }, - { - "epoch": 0.18098013963261708, - "grad_norm": 1.8020316494539865, - "learning_rate": 1.8829155210356329e-06, - "loss": 1.2021, - "step": 1335 - }, - { - "epoch": 0.18111570528028198, - "grad_norm": 1.9348041114957417, - "learning_rate": 1.8827092466608647e-06, - "loss": 1.2059, - "step": 1336 - }, - { - "epoch": 0.18125127092794685, - "grad_norm": 1.6731421412991756, - "learning_rate": 1.8825028020625858e-06, - "loss": 1.1826, - "step": 1337 - }, - { - "epoch": 0.18138683657561175, - "grad_norm": 2.147033880395358, - "learning_rate": 1.8822961872806076e-06, - "loss": 1.2231, - "step": 1338 - }, - { - "epoch": 0.18152240222327662, - "grad_norm": 1.4569115526616976, - "learning_rate": 1.8820894023547745e-06, - "loss": 1.2028, - "step": 1339 - }, - { - "epoch": 0.18165796787094152, - "grad_norm": 1.262445466261922, - "learning_rate": 1.8818824473249624e-06, - "loss": 1.2079, - "step": 1340 - }, - { - "epoch": 0.1817935335186064, - "grad_norm": 1.542281370237099, - "learning_rate": 1.8816753222310818e-06, - "loss": 1.2439, - "step": 1341 - }, - { - "epoch": 0.18192909916627126, - "grad_norm": 1.7217766648322952, - "learning_rate": 1.8814680271130747e-06, - "loss": 1.2101, - "step": 1342 - }, - { - "epoch": 0.18206466481393616, - "grad_norm": 2.086004561649804, - "learning_rate": 1.8812605620109165e-06, - "loss": 1.1935, - "step": 1343 - }, - { - "epoch": 0.18220023046160103, - "grad_norm": 1.7744362365568493, - "learning_rate": 1.881052926964615e-06, - "loss": 1.2461, - "step": 1344 - }, - { - "epoch": 0.18233579610926592, - "grad_norm": 2.2582454011363913, - "learning_rate": 1.8808451220142114e-06, - "loss": 1.2422, - "step": 1345 - }, - { - "epoch": 0.1824713617569308, - "grad_norm": 2.3300203013256375, - "learning_rate": 1.880637147199779e-06, - "loss": 1.2448, - "step": 1346 - }, - { - "epoch": 0.18260692740459566, - "grad_norm": 1.799948915139363, - "learning_rate": 1.8804290025614242e-06, - "loss": 1.2555, - "step": 1347 - }, - { - "epoch": 0.18274249305226056, - "grad_norm": 1.7521780434742782, - "learning_rate": 1.8802206881392858e-06, - "loss": 1.2257, - "step": 1348 - }, - { - "epoch": 0.18287805869992543, - "grad_norm": 1.5365784827504991, - "learning_rate": 1.8800122039735355e-06, - "loss": 1.2496, - "step": 1349 - }, - { - "epoch": 0.18301362434759033, - "grad_norm": 1.6062616176570053, - "learning_rate": 1.8798035501043783e-06, - "loss": 1.1928, - "step": 1350 - }, - { - "epoch": 0.1831491899952552, - "grad_norm": 1.4771253931149013, - "learning_rate": 1.879594726572051e-06, - "loss": 1.201, - "step": 1351 - }, - { - "epoch": 0.18328475564292007, - "grad_norm": 1.5046859599316353, - "learning_rate": 1.8793857334168243e-06, - "loss": 1.2611, - "step": 1352 - }, - { - "epoch": 0.18342032129058497, - "grad_norm": 1.958647834984457, - "learning_rate": 1.8791765706789997e-06, - "loss": 1.1959, - "step": 1353 - }, - { - "epoch": 0.18355588693824984, - "grad_norm": 1.3977943083897169, - "learning_rate": 1.8789672383989134e-06, - "loss": 1.206, - "step": 1354 - }, - { - "epoch": 0.18369145258591474, - "grad_norm": 1.4698720079510912, - "learning_rate": 1.8787577366169336e-06, - "loss": 1.2473, - "step": 1355 - }, - { - "epoch": 0.1838270182335796, - "grad_norm": 5.155243754010471, - "learning_rate": 1.8785480653734607e-06, - "loss": 1.2369, - "step": 1356 - }, - { - "epoch": 0.18396258388124448, - "grad_norm": 2.566332179175006, - "learning_rate": 1.878338224708928e-06, - "loss": 1.2336, - "step": 1357 - }, - { - "epoch": 0.18409814952890938, - "grad_norm": 1.5462264028884325, - "learning_rate": 1.878128214663802e-06, - "loss": 1.2067, - "step": 1358 - }, - { - "epoch": 0.18423371517657425, - "grad_norm": 1.9323371172192303, - "learning_rate": 1.8779180352785814e-06, - "loss": 1.2251, - "step": 1359 - }, - { - "epoch": 0.18436928082423915, - "grad_norm": 1.5844227812979286, - "learning_rate": 1.8777076865937976e-06, - "loss": 1.2228, - "step": 1360 - }, - { - "epoch": 0.18450484647190402, - "grad_norm": 1.9436030831516313, - "learning_rate": 1.8774971686500143e-06, - "loss": 1.215, - "step": 1361 - }, - { - "epoch": 0.18464041211956891, - "grad_norm": 1.4151339029696255, - "learning_rate": 1.877286481487829e-06, - "loss": 1.1975, - "step": 1362 - }, - { - "epoch": 0.18477597776723378, - "grad_norm": 1.7516739680541114, - "learning_rate": 1.8770756251478703e-06, - "loss": 1.2466, - "step": 1363 - }, - { - "epoch": 0.18491154341489865, - "grad_norm": 1.535957023227142, - "learning_rate": 1.8768645996708007e-06, - "loss": 1.2252, - "step": 1364 - }, - { - "epoch": 0.18504710906256355, - "grad_norm": 1.7369372446456999, - "learning_rate": 1.8766534050973144e-06, - "loss": 1.1846, - "step": 1365 - }, - { - "epoch": 0.18518267471022842, - "grad_norm": 1.886149450244158, - "learning_rate": 1.876442041468139e-06, - "loss": 1.2119, - "step": 1366 - }, - { - "epoch": 0.18531824035789332, - "grad_norm": 1.722451823386913, - "learning_rate": 1.876230508824034e-06, - "loss": 1.2215, - "step": 1367 - }, - { - "epoch": 0.1854538060055582, - "grad_norm": 2.5464862810796145, - "learning_rate": 1.876018807205792e-06, - "loss": 1.1975, - "step": 1368 - }, - { - "epoch": 0.18558937165322306, - "grad_norm": 1.4767566694489909, - "learning_rate": 1.875806936654238e-06, - "loss": 1.2029, - "step": 1369 - }, - { - "epoch": 0.18572493730088796, - "grad_norm": 2.5762626565571707, - "learning_rate": 1.8755948972102292e-06, - "loss": 1.2244, - "step": 1370 - }, - { - "epoch": 0.18586050294855283, - "grad_norm": 1.5158147214514315, - "learning_rate": 1.8753826889146562e-06, - "loss": 1.2325, - "step": 1371 - }, - { - "epoch": 0.18599606859621773, - "grad_norm": 1.8491504947796384, - "learning_rate": 1.8751703118084413e-06, - "loss": 1.2132, - "step": 1372 - }, - { - "epoch": 0.1861316342438826, - "grad_norm": 1.418267841191721, - "learning_rate": 1.8749577659325401e-06, - "loss": 1.193, - "step": 1373 - }, - { - "epoch": 0.18626719989154747, - "grad_norm": 1.5579546203705545, - "learning_rate": 1.8747450513279403e-06, - "loss": 1.1999, - "step": 1374 - }, - { - "epoch": 0.18640276553921237, - "grad_norm": 1.5655437170466329, - "learning_rate": 1.874532168035662e-06, - "loss": 1.2669, - "step": 1375 - }, - { - "epoch": 0.18653833118687724, - "grad_norm": 1.364652416288234, - "learning_rate": 1.8743191160967584e-06, - "loss": 1.2369, - "step": 1376 - }, - { - "epoch": 0.18667389683454214, - "grad_norm": 1.4618003112972837, - "learning_rate": 1.8741058955523145e-06, - "loss": 1.2488, - "step": 1377 - }, - { - "epoch": 0.186809462482207, - "grad_norm": 1.6635047030516643, - "learning_rate": 1.8738925064434485e-06, - "loss": 1.2275, - "step": 1378 - }, - { - "epoch": 0.1869450281298719, - "grad_norm": 1.982085294965747, - "learning_rate": 1.8736789488113108e-06, - "loss": 1.2161, - "step": 1379 - }, - { - "epoch": 0.18708059377753677, - "grad_norm": 1.8064094172369536, - "learning_rate": 1.8734652226970844e-06, - "loss": 1.2104, - "step": 1380 - }, - { - "epoch": 0.18721615942520164, - "grad_norm": 1.7220920245287827, - "learning_rate": 1.8732513281419843e-06, - "loss": 1.2282, - "step": 1381 - }, - { - "epoch": 0.18735172507286654, - "grad_norm": 1.7478242397035015, - "learning_rate": 1.8730372651872585e-06, - "loss": 1.2209, - "step": 1382 - }, - { - "epoch": 0.1874872907205314, - "grad_norm": 1.6033971846602753, - "learning_rate": 1.8728230338741877e-06, - "loss": 1.1873, - "step": 1383 - }, - { - "epoch": 0.1876228563681963, - "grad_norm": 1.5356201008053036, - "learning_rate": 1.8726086342440842e-06, - "loss": 1.2096, - "step": 1384 - }, - { - "epoch": 0.18775842201586118, - "grad_norm": 1.4041536948625855, - "learning_rate": 1.8723940663382939e-06, - "loss": 1.2091, - "step": 1385 - }, - { - "epoch": 0.18789398766352605, - "grad_norm": 1.4362847145742166, - "learning_rate": 1.8721793301981937e-06, - "loss": 1.1929, - "step": 1386 - }, - { - "epoch": 0.18802955331119095, - "grad_norm": 2.984363726860373, - "learning_rate": 1.8719644258651942e-06, - "loss": 1.2356, - "step": 1387 - }, - { - "epoch": 0.18816511895885582, - "grad_norm": 1.5221416449088556, - "learning_rate": 1.8717493533807386e-06, - "loss": 1.2016, - "step": 1388 - }, - { - "epoch": 0.18830068460652072, - "grad_norm": 1.610399382730175, - "learning_rate": 1.871534112786301e-06, - "loss": 1.2047, - "step": 1389 - }, - { - "epoch": 0.1884362502541856, - "grad_norm": 2.0042908418669247, - "learning_rate": 1.8713187041233893e-06, - "loss": 1.2012, - "step": 1390 - }, - { - "epoch": 0.18857181590185046, - "grad_norm": 1.5663836862419407, - "learning_rate": 1.8711031274335434e-06, - "loss": 1.1585, - "step": 1391 - }, - { - "epoch": 0.18870738154951536, - "grad_norm": 1.8402522509483414, - "learning_rate": 1.8708873827583352e-06, - "loss": 1.2463, - "step": 1392 - }, - { - "epoch": 0.18884294719718023, - "grad_norm": 2.0786600819659435, - "learning_rate": 1.8706714701393697e-06, - "loss": 1.2435, - "step": 1393 - }, - { - "epoch": 0.18897851284484513, - "grad_norm": 1.854288466513282, - "learning_rate": 1.8704553896182838e-06, - "loss": 1.2101, - "step": 1394 - }, - { - "epoch": 0.18911407849251, - "grad_norm": 1.4671808543724592, - "learning_rate": 1.870239141236747e-06, - "loss": 1.2555, - "step": 1395 - }, - { - "epoch": 0.18924964414017487, - "grad_norm": 1.5266128351661312, - "learning_rate": 1.870022725036461e-06, - "loss": 1.1862, - "step": 1396 - }, - { - "epoch": 0.18938520978783976, - "grad_norm": 1.8555742750651882, - "learning_rate": 1.8698061410591604e-06, - "loss": 1.2171, - "step": 1397 - }, - { - "epoch": 0.18952077543550463, - "grad_norm": 2.2848609680464906, - "learning_rate": 1.8695893893466108e-06, - "loss": 1.1999, - "step": 1398 - }, - { - "epoch": 0.18965634108316953, - "grad_norm": 1.810559669922742, - "learning_rate": 1.869372469940612e-06, - "loss": 1.2411, - "step": 1399 - }, - { - "epoch": 0.1897919067308344, - "grad_norm": 1.632557466654525, - "learning_rate": 1.8691553828829948e-06, - "loss": 1.2286, - "step": 1400 - }, - { - "epoch": 0.1899274723784993, - "grad_norm": 1.5970728773050953, - "learning_rate": 1.8689381282156222e-06, - "loss": 1.1898, - "step": 1401 - }, - { - "epoch": 0.19006303802616417, - "grad_norm": 2.0485310915296755, - "learning_rate": 1.868720705980391e-06, - "loss": 1.2338, - "step": 1402 - }, - { - "epoch": 0.19019860367382904, - "grad_norm": 2.2894986115728404, - "learning_rate": 1.8685031162192287e-06, - "loss": 1.2296, - "step": 1403 - }, - { - "epoch": 0.19033416932149394, - "grad_norm": 1.733041333670982, - "learning_rate": 1.8682853589740962e-06, - "loss": 1.2067, - "step": 1404 - }, - { - "epoch": 0.1904697349691588, - "grad_norm": 1.6022640755790163, - "learning_rate": 1.8680674342869858e-06, - "loss": 1.2103, - "step": 1405 - }, - { - "epoch": 0.1906053006168237, - "grad_norm": 2.2647397947293184, - "learning_rate": 1.867849342199923e-06, - "loss": 1.1987, - "step": 1406 - }, - { - "epoch": 0.19074086626448858, - "grad_norm": 1.7544470852859406, - "learning_rate": 1.867631082754965e-06, - "loss": 1.2209, - "step": 1407 - }, - { - "epoch": 0.19087643191215345, - "grad_norm": 1.7207728872358996, - "learning_rate": 1.8674126559942009e-06, - "loss": 1.2153, - "step": 1408 - }, - { - "epoch": 0.19101199755981835, - "grad_norm": 3.9648662622809288, - "learning_rate": 1.8671940619597532e-06, - "loss": 1.1899, - "step": 1409 - }, - { - "epoch": 0.19114756320748322, - "grad_norm": 1.7689130305060925, - "learning_rate": 1.8669753006937762e-06, - "loss": 1.2441, - "step": 1410 - }, - { - "epoch": 0.19128312885514812, - "grad_norm": 1.7660883348477603, - "learning_rate": 1.8667563722384559e-06, - "loss": 1.1786, - "step": 1411 - }, - { - "epoch": 0.19141869450281299, - "grad_norm": 3.6368626541420968, - "learning_rate": 1.8665372766360107e-06, - "loss": 1.2526, - "step": 1412 - }, - { - "epoch": 0.19155426015047786, - "grad_norm": 1.633891041403092, - "learning_rate": 1.866318013928692e-06, - "loss": 1.1852, - "step": 1413 - }, - { - "epoch": 0.19168982579814275, - "grad_norm": 1.6601210619601163, - "learning_rate": 1.8660985841587824e-06, - "loss": 1.1753, - "step": 1414 - }, - { - "epoch": 0.19182539144580762, - "grad_norm": 1.9037365646764177, - "learning_rate": 1.8658789873685973e-06, - "loss": 1.2393, - "step": 1415 - }, - { - "epoch": 0.19196095709347252, - "grad_norm": 1.5533688635612095, - "learning_rate": 1.8656592236004847e-06, - "loss": 1.2156, - "step": 1416 - }, - { - "epoch": 0.1920965227411374, - "grad_norm": 1.8846462744658266, - "learning_rate": 1.8654392928968239e-06, - "loss": 1.2129, - "step": 1417 - }, - { - "epoch": 0.1922320883888023, - "grad_norm": 2.815923467876235, - "learning_rate": 1.8652191953000265e-06, - "loss": 1.1925, - "step": 1418 - }, - { - "epoch": 0.19236765403646716, - "grad_norm": 2.374362237648059, - "learning_rate": 1.864998930852537e-06, - "loss": 1.1997, - "step": 1419 - }, - { - "epoch": 0.19250321968413203, - "grad_norm": 2.0791296963082786, - "learning_rate": 1.8647784995968317e-06, - "loss": 1.1727, - "step": 1420 - }, - { - "epoch": 0.19263878533179693, - "grad_norm": 1.6634745806632492, - "learning_rate": 1.8645579015754189e-06, - "loss": 1.1864, - "step": 1421 - }, - { - "epoch": 0.1927743509794618, - "grad_norm": 1.8055219301629448, - "learning_rate": 1.8643371368308389e-06, - "loss": 1.2485, - "step": 1422 - }, - { - "epoch": 0.1929099166271267, - "grad_norm": 1.4879911570768993, - "learning_rate": 1.8641162054056651e-06, - "loss": 1.2316, - "step": 1423 - }, - { - "epoch": 0.19304548227479157, - "grad_norm": 2.194260441396623, - "learning_rate": 1.8638951073425018e-06, - "loss": 1.1989, - "step": 1424 - }, - { - "epoch": 0.19318104792245644, - "grad_norm": 1.6035308419587084, - "learning_rate": 1.8636738426839863e-06, - "loss": 1.205, - "step": 1425 - }, - { - "epoch": 0.19331661357012134, - "grad_norm": 2.5114083014614135, - "learning_rate": 1.8634524114727878e-06, - "loss": 1.2029, - "step": 1426 - }, - { - "epoch": 0.1934521792177862, - "grad_norm": 1.3771221547958599, - "learning_rate": 1.8632308137516071e-06, - "loss": 1.223, - "step": 1427 - }, - { - "epoch": 0.1935877448654511, - "grad_norm": 2.0230105341588462, - "learning_rate": 1.8630090495631783e-06, - "loss": 1.2507, - "step": 1428 - }, - { - "epoch": 0.19372331051311598, - "grad_norm": 1.8266623147508114, - "learning_rate": 1.8627871189502662e-06, - "loss": 1.2105, - "step": 1429 - }, - { - "epoch": 0.19385887616078085, - "grad_norm": 2.470269112695365, - "learning_rate": 1.8625650219556688e-06, - "loss": 1.2227, - "step": 1430 - }, - { - "epoch": 0.19399444180844574, - "grad_norm": 1.4199452970446456, - "learning_rate": 1.8623427586222154e-06, - "loss": 1.2152, - "step": 1431 - }, - { - "epoch": 0.19413000745611061, - "grad_norm": 1.748907051897559, - "learning_rate": 1.8621203289927681e-06, - "loss": 1.2686, - "step": 1432 - }, - { - "epoch": 0.1942655731037755, - "grad_norm": 2.8378595694679043, - "learning_rate": 1.8618977331102204e-06, - "loss": 1.2067, - "step": 1433 - }, - { - "epoch": 0.19440113875144038, - "grad_norm": 1.4890481890509066, - "learning_rate": 1.861674971017498e-06, - "loss": 1.206, - "step": 1434 - }, - { - "epoch": 0.19453670439910525, - "grad_norm": 1.7184265477984764, - "learning_rate": 1.8614520427575596e-06, - "loss": 1.2033, - "step": 1435 - }, - { - "epoch": 0.19467227004677015, - "grad_norm": 1.5231135016006527, - "learning_rate": 1.8612289483733942e-06, - "loss": 1.2011, - "step": 1436 - }, - { - "epoch": 0.19480783569443502, - "grad_norm": 1.6302778308509214, - "learning_rate": 1.8610056879080247e-06, - "loss": 1.1826, - "step": 1437 - }, - { - "epoch": 0.19494340134209992, - "grad_norm": 1.6373423635766284, - "learning_rate": 1.8607822614045041e-06, - "loss": 1.2717, - "step": 1438 - }, - { - "epoch": 0.1950789669897648, - "grad_norm": 1.6108192145484106, - "learning_rate": 1.8605586689059195e-06, - "loss": 1.1767, - "step": 1439 - }, - { - "epoch": 0.1952145326374297, - "grad_norm": 1.9309400916976882, - "learning_rate": 1.8603349104553882e-06, - "loss": 1.1586, - "step": 1440 - }, - { - "epoch": 0.19535009828509456, - "grad_norm": 1.8883982587020072, - "learning_rate": 1.8601109860960603e-06, - "loss": 1.162, - "step": 1441 - }, - { - "epoch": 0.19548566393275943, - "grad_norm": 1.6488161141166962, - "learning_rate": 1.8598868958711185e-06, - "loss": 1.195, - "step": 1442 - }, - { - "epoch": 0.19562122958042433, - "grad_norm": 1.9957640301394148, - "learning_rate": 1.8596626398237762e-06, - "loss": 1.2069, - "step": 1443 - }, - { - "epoch": 0.1957567952280892, - "grad_norm": 2.7526610923526262, - "learning_rate": 1.8594382179972794e-06, - "loss": 1.2074, - "step": 1444 - }, - { - "epoch": 0.1958923608757541, - "grad_norm": 1.8344438524131959, - "learning_rate": 1.8592136304349063e-06, - "loss": 1.2417, - "step": 1445 - }, - { - "epoch": 0.19602792652341897, - "grad_norm": 1.6221374170844824, - "learning_rate": 1.8589888771799669e-06, - "loss": 1.2091, - "step": 1446 - }, - { - "epoch": 0.19616349217108384, - "grad_norm": 1.8134246908259164, - "learning_rate": 1.858763958275803e-06, - "loss": 1.2024, - "step": 1447 - }, - { - "epoch": 0.19629905781874873, - "grad_norm": 1.897391310741579, - "learning_rate": 1.8585388737657883e-06, - "loss": 1.2772, - "step": 1448 - }, - { - "epoch": 0.1964346234664136, - "grad_norm": 1.5838776862653434, - "learning_rate": 1.8583136236933287e-06, - "loss": 1.2478, - "step": 1449 - }, - { - "epoch": 0.1965701891140785, - "grad_norm": 2.6233584354252844, - "learning_rate": 1.858088208101862e-06, - "loss": 1.2074, - "step": 1450 - }, - { - "epoch": 0.19670575476174337, - "grad_norm": 2.0325473656311215, - "learning_rate": 1.8578626270348576e-06, - "loss": 1.1699, - "step": 1451 - }, - { - "epoch": 0.19684132040940824, - "grad_norm": 1.5249489793256887, - "learning_rate": 1.8576368805358171e-06, - "loss": 1.2583, - "step": 1452 - }, - { - "epoch": 0.19697688605707314, - "grad_norm": 2.5336619214188287, - "learning_rate": 1.857410968648274e-06, - "loss": 1.2088, - "step": 1453 - }, - { - "epoch": 0.197112451704738, - "grad_norm": 5.526362990903123, - "learning_rate": 1.8571848914157938e-06, - "loss": 1.2041, - "step": 1454 - }, - { - "epoch": 0.1972480173524029, - "grad_norm": 1.4463317403521831, - "learning_rate": 1.8569586488819732e-06, - "loss": 1.2125, - "step": 1455 - }, - { - "epoch": 0.19738358300006778, - "grad_norm": 1.5884230624069613, - "learning_rate": 1.8567322410904416e-06, - "loss": 1.2026, - "step": 1456 - }, - { - "epoch": 0.19751914864773265, - "grad_norm": 1.5394975552416765, - "learning_rate": 1.8565056680848602e-06, - "loss": 1.2395, - "step": 1457 - }, - { - "epoch": 0.19765471429539755, - "grad_norm": 1.643902045820333, - "learning_rate": 1.8562789299089212e-06, - "loss": 1.1994, - "step": 1458 - }, - { - "epoch": 0.19779027994306242, - "grad_norm": 2.1249366081654313, - "learning_rate": 1.8560520266063497e-06, - "loss": 1.2033, - "step": 1459 - }, - { - "epoch": 0.19792584559072732, - "grad_norm": 1.8275232453307295, - "learning_rate": 1.8558249582209022e-06, - "loss": 1.1973, - "step": 1460 - }, - { - "epoch": 0.1980614112383922, - "grad_norm": 5.720250820967446, - "learning_rate": 1.8555977247963673e-06, - "loss": 1.1959, - "step": 1461 - }, - { - "epoch": 0.19819697688605709, - "grad_norm": 1.609614926661516, - "learning_rate": 1.8553703263765646e-06, - "loss": 1.2067, - "step": 1462 - }, - { - "epoch": 0.19833254253372196, - "grad_norm": 1.7616481678400506, - "learning_rate": 1.8551427630053463e-06, - "loss": 1.1987, - "step": 1463 - }, - { - "epoch": 0.19846810818138683, - "grad_norm": 1.5572661113564807, - "learning_rate": 1.854915034726596e-06, - "loss": 1.1826, - "step": 1464 - }, - { - "epoch": 0.19860367382905172, - "grad_norm": 2.0013263125281875, - "learning_rate": 1.8546871415842298e-06, - "loss": 1.2597, - "step": 1465 - }, - { - "epoch": 0.1987392394767166, - "grad_norm": 1.5865046479419145, - "learning_rate": 1.8544590836221947e-06, - "loss": 1.1598, - "step": 1466 - }, - { - "epoch": 0.1988748051243815, - "grad_norm": 1.595637595452128, - "learning_rate": 1.8542308608844704e-06, - "loss": 1.2171, - "step": 1467 - }, - { - "epoch": 0.19901037077204636, - "grad_norm": 1.5870980760306073, - "learning_rate": 1.854002473415067e-06, - "loss": 1.225, - "step": 1468 - }, - { - "epoch": 0.19914593641971123, - "grad_norm": 1.69386209377305, - "learning_rate": 1.853773921258028e-06, - "loss": 1.2268, - "step": 1469 - }, - { - "epoch": 0.19928150206737613, - "grad_norm": 1.6524942510555918, - "learning_rate": 1.8535452044574274e-06, - "loss": 1.2332, - "step": 1470 - }, - { - "epoch": 0.199417067715041, - "grad_norm": 1.4409531546934857, - "learning_rate": 1.8533163230573716e-06, - "loss": 1.2337, - "step": 1471 - }, - { - "epoch": 0.1995526333627059, - "grad_norm": 1.7663722281156902, - "learning_rate": 1.8530872771019984e-06, - "loss": 1.2206, - "step": 1472 - }, - { - "epoch": 0.19968819901037077, - "grad_norm": 1.6505727713358247, - "learning_rate": 1.8528580666354782e-06, - "loss": 1.2214, - "step": 1473 - }, - { - "epoch": 0.19982376465803564, - "grad_norm": 1.607780640518207, - "learning_rate": 1.8526286917020114e-06, - "loss": 1.2247, - "step": 1474 - }, - { - "epoch": 0.19995933030570054, - "grad_norm": 1.8686278620028887, - "learning_rate": 1.852399152345832e-06, - "loss": 1.1859, - "step": 1475 - }, - { - "epoch": 0.2000948959533654, - "grad_norm": 1.6971478087710758, - "learning_rate": 1.8521694486112045e-06, - "loss": 1.2025, - "step": 1476 - }, - { - "epoch": 0.2002304616010303, - "grad_norm": 8.345244654216257, - "learning_rate": 1.851939580542425e-06, - "loss": 1.1966, - "step": 1477 - }, - { - "epoch": 0.20036602724869518, - "grad_norm": 1.6647515177232668, - "learning_rate": 1.8517095481838228e-06, - "loss": 1.216, - "step": 1478 - }, - { - "epoch": 0.20050159289636008, - "grad_norm": 1.499680963000177, - "learning_rate": 1.8514793515797567e-06, - "loss": 1.2472, - "step": 1479 - }, - { - "epoch": 0.20063715854402495, - "grad_norm": 1.7540804653460536, - "learning_rate": 1.8512489907746193e-06, - "loss": 1.2421, - "step": 1480 - }, - { - "epoch": 0.20077272419168982, - "grad_norm": 1.721438621975217, - "learning_rate": 1.851018465812833e-06, - "loss": 1.1973, - "step": 1481 - }, - { - "epoch": 0.20090828983935471, - "grad_norm": 1.6258642392718168, - "learning_rate": 1.8507877767388531e-06, - "loss": 1.2275, - "step": 1482 - }, - { - "epoch": 0.20104385548701958, - "grad_norm": 1.8715188206680318, - "learning_rate": 1.8505569235971663e-06, - "loss": 1.213, - "step": 1483 - }, - { - "epoch": 0.20117942113468448, - "grad_norm": 2.466888309048399, - "learning_rate": 1.8503259064322907e-06, - "loss": 1.2428, - "step": 1484 - }, - { - "epoch": 0.20131498678234935, - "grad_norm": 2.1719231785945476, - "learning_rate": 1.8500947252887759e-06, - "loss": 1.223, - "step": 1485 - }, - { - "epoch": 0.20145055243001422, - "grad_norm": 1.6233843671621229, - "learning_rate": 1.8498633802112039e-06, - "loss": 1.2284, - "step": 1486 - }, - { - "epoch": 0.20158611807767912, - "grad_norm": 1.6635614136871975, - "learning_rate": 1.849631871244187e-06, - "loss": 1.2237, - "step": 1487 - }, - { - "epoch": 0.201721683725344, - "grad_norm": 2.3411804594157104, - "learning_rate": 1.8494001984323706e-06, - "loss": 1.2035, - "step": 1488 - }, - { - "epoch": 0.2018572493730089, - "grad_norm": 1.5795904821003797, - "learning_rate": 1.8491683618204307e-06, - "loss": 1.2218, - "step": 1489 - }, - { - "epoch": 0.20199281502067376, - "grad_norm": 1.6758742692242126, - "learning_rate": 1.848936361453075e-06, - "loss": 1.2017, - "step": 1490 - }, - { - "epoch": 0.20212838066833863, - "grad_norm": 2.8778155508246543, - "learning_rate": 1.8487041973750434e-06, - "loss": 1.2196, - "step": 1491 - }, - { - "epoch": 0.20226394631600353, - "grad_norm": 1.8233660087137162, - "learning_rate": 1.8484718696311063e-06, - "loss": 1.2507, - "step": 1492 - }, - { - "epoch": 0.2023995119636684, - "grad_norm": 1.9087829808650099, - "learning_rate": 1.8482393782660669e-06, - "loss": 1.2239, - "step": 1493 - }, - { - "epoch": 0.2025350776113333, - "grad_norm": 1.8402037864055822, - "learning_rate": 1.8480067233247584e-06, - "loss": 1.1705, - "step": 1494 - }, - { - "epoch": 0.20267064325899817, - "grad_norm": 1.637831807597322, - "learning_rate": 1.8477739048520475e-06, - "loss": 1.186, - "step": 1495 - }, - { - "epoch": 0.20280620890666304, - "grad_norm": 1.4963793505310783, - "learning_rate": 1.847540922892831e-06, - "loss": 1.2343, - "step": 1496 - }, - { - "epoch": 0.20294177455432794, - "grad_norm": 1.5651642181554637, - "learning_rate": 1.8473077774920377e-06, - "loss": 1.2131, - "step": 1497 - }, - { - "epoch": 0.2030773402019928, - "grad_norm": 1.8665762617411188, - "learning_rate": 1.8470744686946276e-06, - "loss": 1.1985, - "step": 1498 - }, - { - "epoch": 0.2032129058496577, - "grad_norm": 1.9371182550227313, - "learning_rate": 1.8468409965455924e-06, - "loss": 1.1995, - "step": 1499 - }, - { - "epoch": 0.20334847149732257, - "grad_norm": 1.5610907609843288, - "learning_rate": 1.8466073610899557e-06, - "loss": 1.1966, - "step": 1500 - }, - { - "epoch": 0.20348403714498747, - "grad_norm": 1.9716712695215797, - "learning_rate": 1.846373562372772e-06, - "loss": 1.1871, - "step": 1501 - }, - { - "epoch": 0.20361960279265234, - "grad_norm": 1.645614424587316, - "learning_rate": 1.846139600439128e-06, - "loss": 1.2632, - "step": 1502 - }, - { - "epoch": 0.2037551684403172, - "grad_norm": 1.6089787174082277, - "learning_rate": 1.845905475334141e-06, - "loss": 1.2345, - "step": 1503 - }, - { - "epoch": 0.2038907340879821, - "grad_norm": 2.0170454313410504, - "learning_rate": 1.84567118710296e-06, - "loss": 1.2238, - "step": 1504 - }, - { - "epoch": 0.20402629973564698, - "grad_norm": 2.1431420252537374, - "learning_rate": 1.8454367357907663e-06, - "loss": 1.2681, - "step": 1505 - }, - { - "epoch": 0.20416186538331188, - "grad_norm": 1.5245170012338731, - "learning_rate": 1.8452021214427713e-06, - "loss": 1.2217, - "step": 1506 - }, - { - "epoch": 0.20429743103097675, - "grad_norm": 1.840721614634791, - "learning_rate": 1.8449673441042188e-06, - "loss": 1.2326, - "step": 1507 - }, - { - "epoch": 0.20443299667864162, - "grad_norm": 2.2335259309065485, - "learning_rate": 1.8447324038203838e-06, - "loss": 1.2407, - "step": 1508 - }, - { - "epoch": 0.20456856232630652, - "grad_norm": 1.5869144023672115, - "learning_rate": 1.8444973006365724e-06, - "loss": 1.2057, - "step": 1509 - }, - { - "epoch": 0.2047041279739714, - "grad_norm": 2.0710730151773276, - "learning_rate": 1.844262034598123e-06, - "loss": 1.2384, - "step": 1510 - }, - { - "epoch": 0.2048396936216363, - "grad_norm": 2.0889404607634625, - "learning_rate": 1.8440266057504044e-06, - "loss": 1.2182, - "step": 1511 - }, - { - "epoch": 0.20497525926930116, - "grad_norm": 1.7642131891462585, - "learning_rate": 1.843791014138817e-06, - "loss": 1.1591, - "step": 1512 - }, - { - "epoch": 0.20511082491696603, - "grad_norm": 1.9746253772660132, - "learning_rate": 1.843555259808793e-06, - "loss": 1.2345, - "step": 1513 - }, - { - "epoch": 0.20524639056463093, - "grad_norm": 2.05351498901923, - "learning_rate": 1.8433193428057958e-06, - "loss": 1.2182, - "step": 1514 - }, - { - "epoch": 0.2053819562122958, - "grad_norm": 1.7040651734399646, - "learning_rate": 1.84308326317532e-06, - "loss": 1.2187, - "step": 1515 - }, - { - "epoch": 0.2055175218599607, - "grad_norm": 6.555490550965792, - "learning_rate": 1.842847020962892e-06, - "loss": 1.2135, - "step": 1516 - }, - { - "epoch": 0.20565308750762556, - "grad_norm": 4.530004862173186, - "learning_rate": 1.842610616214069e-06, - "loss": 1.2326, - "step": 1517 - }, - { - "epoch": 0.20578865315529046, - "grad_norm": 2.1179255363959735, - "learning_rate": 1.8423740489744399e-06, - "loss": 1.2216, - "step": 1518 - }, - { - "epoch": 0.20592421880295533, - "grad_norm": 1.71067410018918, - "learning_rate": 1.8421373192896248e-06, - "loss": 1.2169, - "step": 1519 - }, - { - "epoch": 0.2060597844506202, - "grad_norm": 3.7765904232240173, - "learning_rate": 1.841900427205275e-06, - "loss": 1.2289, - "step": 1520 - }, - { - "epoch": 0.2061953500982851, - "grad_norm": 1.6153259671425968, - "learning_rate": 1.8416633727670732e-06, - "loss": 1.2044, - "step": 1521 - }, - { - "epoch": 0.20633091574594997, - "grad_norm": 2.253939066988127, - "learning_rate": 1.8414261560207337e-06, - "loss": 1.2289, - "step": 1522 - }, - { - "epoch": 0.20646648139361487, - "grad_norm": 1.4792919881276108, - "learning_rate": 1.8411887770120021e-06, - "loss": 1.2055, - "step": 1523 - }, - { - "epoch": 0.20660204704127974, - "grad_norm": 9.172037241917007, - "learning_rate": 1.8409512357866546e-06, - "loss": 1.2303, - "step": 1524 - }, - { - "epoch": 0.2067376126889446, - "grad_norm": 2.6246347218183392, - "learning_rate": 1.8407135323904995e-06, - "loss": 1.1691, - "step": 1525 - }, - { - "epoch": 0.2068731783366095, - "grad_norm": 2.0098110816669217, - "learning_rate": 1.8404756668693758e-06, - "loss": 1.1696, - "step": 1526 - }, - { - "epoch": 0.20700874398427438, - "grad_norm": 2.0997443842574093, - "learning_rate": 1.8402376392691539e-06, - "loss": 1.216, - "step": 1527 - }, - { - "epoch": 0.20714430963193928, - "grad_norm": 1.5396270424978564, - "learning_rate": 1.8399994496357359e-06, - "loss": 1.223, - "step": 1528 - }, - { - "epoch": 0.20727987527960415, - "grad_norm": 11.132097027972966, - "learning_rate": 1.8397610980150544e-06, - "loss": 1.2237, - "step": 1529 - }, - { - "epoch": 0.20741544092726902, - "grad_norm": 2.2335897422201607, - "learning_rate": 1.8395225844530738e-06, - "loss": 1.2037, - "step": 1530 - }, - { - "epoch": 0.20755100657493392, - "grad_norm": 1.7503758665638165, - "learning_rate": 1.8392839089957897e-06, - "loss": 1.2171, - "step": 1531 - }, - { - "epoch": 0.2076865722225988, - "grad_norm": 1.7990343359003957, - "learning_rate": 1.8390450716892288e-06, - "loss": 1.2164, - "step": 1532 - }, - { - "epoch": 0.20782213787026368, - "grad_norm": 2.1807638500099538, - "learning_rate": 1.8388060725794485e-06, - "loss": 1.254, - "step": 1533 - }, - { - "epoch": 0.20795770351792855, - "grad_norm": 1.8761947647881143, - "learning_rate": 1.8385669117125385e-06, - "loss": 1.2012, - "step": 1534 - }, - { - "epoch": 0.20809326916559343, - "grad_norm": 1.3991005515032644, - "learning_rate": 1.8383275891346186e-06, - "loss": 1.2408, - "step": 1535 - }, - { - "epoch": 0.20822883481325832, - "grad_norm": 1.6401167163868693, - "learning_rate": 1.8380881048918404e-06, - "loss": 1.2022, - "step": 1536 - }, - { - "epoch": 0.2083644004609232, - "grad_norm": 2.0765176109563193, - "learning_rate": 1.837848459030387e-06, - "loss": 1.1872, - "step": 1537 - }, - { - "epoch": 0.2084999661085881, - "grad_norm": 1.562235070914228, - "learning_rate": 1.8376086515964716e-06, - "loss": 1.2129, - "step": 1538 - }, - { - "epoch": 0.20863553175625296, - "grad_norm": 1.7230220888420706, - "learning_rate": 1.8373686826363397e-06, - "loss": 1.2033, - "step": 1539 - }, - { - "epoch": 0.20877109740391786, - "grad_norm": 1.7555941069437508, - "learning_rate": 1.837128552196267e-06, - "loss": 1.1789, - "step": 1540 - }, - { - "epoch": 0.20890666305158273, - "grad_norm": 1.6150811884025469, - "learning_rate": 1.8368882603225609e-06, - "loss": 1.2518, - "step": 1541 - }, - { - "epoch": 0.2090422286992476, - "grad_norm": 1.7409068812620287, - "learning_rate": 1.8366478070615596e-06, - "loss": 1.2151, - "step": 1542 - }, - { - "epoch": 0.2091777943469125, - "grad_norm": 2.124381554882674, - "learning_rate": 1.8364071924596328e-06, - "loss": 1.224, - "step": 1543 - }, - { - "epoch": 0.20931335999457737, - "grad_norm": 1.540122056310556, - "learning_rate": 1.8361664165631817e-06, - "loss": 1.1896, - "step": 1544 - }, - { - "epoch": 0.20944892564224227, - "grad_norm": 2.156473230009947, - "learning_rate": 1.8359254794186368e-06, - "loss": 1.2153, - "step": 1545 - }, - { - "epoch": 0.20958449128990714, - "grad_norm": 1.5759770479318151, - "learning_rate": 1.835684381072462e-06, - "loss": 1.2311, - "step": 1546 - }, - { - "epoch": 0.209720056937572, - "grad_norm": 1.911148110436354, - "learning_rate": 1.8354431215711506e-06, - "loss": 1.1562, - "step": 1547 - }, - { - "epoch": 0.2098556225852369, - "grad_norm": 1.4412852715211495, - "learning_rate": 1.8352017009612276e-06, - "loss": 1.1735, - "step": 1548 - }, - { - "epoch": 0.20999118823290178, - "grad_norm": 1.7036443382857072, - "learning_rate": 1.8349601192892498e-06, - "loss": 1.2029, - "step": 1549 - }, - { - "epoch": 0.21012675388056667, - "grad_norm": 1.8248371454403671, - "learning_rate": 1.8347183766018033e-06, - "loss": 1.218, - "step": 1550 - }, - { - "epoch": 0.21026231952823154, - "grad_norm": 2.9805756530113974, - "learning_rate": 1.8344764729455066e-06, - "loss": 1.2506, - "step": 1551 - }, - { - "epoch": 0.21039788517589642, - "grad_norm": 1.6314382238558447, - "learning_rate": 1.8342344083670097e-06, - "loss": 1.2109, - "step": 1552 - }, - { - "epoch": 0.2105334508235613, - "grad_norm": 1.7643587659367264, - "learning_rate": 1.8339921829129916e-06, - "loss": 1.2087, - "step": 1553 - }, - { - "epoch": 0.21066901647122618, - "grad_norm": 2.221104940849202, - "learning_rate": 1.8337497966301645e-06, - "loss": 1.2461, - "step": 1554 - }, - { - "epoch": 0.21080458211889108, - "grad_norm": 2.2168916868475157, - "learning_rate": 1.8335072495652702e-06, - "loss": 1.1404, - "step": 1555 - }, - { - "epoch": 0.21094014776655595, - "grad_norm": 1.4438428207923866, - "learning_rate": 1.8332645417650822e-06, - "loss": 1.2275, - "step": 1556 - }, - { - "epoch": 0.21107571341422085, - "grad_norm": 1.9486399292582846, - "learning_rate": 1.8330216732764049e-06, - "loss": 1.2189, - "step": 1557 - }, - { - "epoch": 0.21121127906188572, - "grad_norm": 1.651497944948543, - "learning_rate": 1.832778644146073e-06, - "loss": 1.1959, - "step": 1558 - }, - { - "epoch": 0.2113468447095506, - "grad_norm": 1.739882139116549, - "learning_rate": 1.8325354544209532e-06, - "loss": 1.1642, - "step": 1559 - }, - { - "epoch": 0.2114824103572155, - "grad_norm": 1.9284959407745148, - "learning_rate": 1.832292104147943e-06, - "loss": 1.1948, - "step": 1560 - }, - { - "epoch": 0.21161797600488036, - "grad_norm": 1.8462793059878042, - "learning_rate": 1.8320485933739697e-06, - "loss": 1.2339, - "step": 1561 - }, - { - "epoch": 0.21175354165254526, - "grad_norm": 3.597084975474117, - "learning_rate": 1.8318049221459932e-06, - "loss": 1.2045, - "step": 1562 - }, - { - "epoch": 0.21188910730021013, - "grad_norm": 3.1520769588179247, - "learning_rate": 1.8315610905110032e-06, - "loss": 1.2243, - "step": 1563 - }, - { - "epoch": 0.212024672947875, - "grad_norm": 2.1083811358216784, - "learning_rate": 1.8313170985160213e-06, - "loss": 1.2008, - "step": 1564 - }, - { - "epoch": 0.2121602385955399, - "grad_norm": 2.1283563381761152, - "learning_rate": 1.8310729462080987e-06, - "loss": 1.2401, - "step": 1565 - }, - { - "epoch": 0.21229580424320477, - "grad_norm": 2.260862047413429, - "learning_rate": 1.8308286336343183e-06, - "loss": 1.2222, - "step": 1566 - }, - { - "epoch": 0.21243136989086966, - "grad_norm": 2.012657634351372, - "learning_rate": 1.8305841608417945e-06, - "loss": 1.1943, - "step": 1567 - }, - { - "epoch": 0.21256693553853453, - "grad_norm": 1.65164426225241, - "learning_rate": 1.8303395278776712e-06, - "loss": 1.2314, - "step": 1568 - }, - { - "epoch": 0.2127025011861994, - "grad_norm": 1.866727372579322, - "learning_rate": 1.830094734789124e-06, - "loss": 1.2328, - "step": 1569 - }, - { - "epoch": 0.2128380668338643, - "grad_norm": 1.5850788107753944, - "learning_rate": 1.82984978162336e-06, - "loss": 1.2272, - "step": 1570 - }, - { - "epoch": 0.21297363248152917, - "grad_norm": 2.0893640554753548, - "learning_rate": 1.8296046684276161e-06, - "loss": 1.247, - "step": 1571 - }, - { - "epoch": 0.21310919812919407, - "grad_norm": 2.6350475485991365, - "learning_rate": 1.8293593952491602e-06, - "loss": 1.2004, - "step": 1572 - }, - { - "epoch": 0.21324476377685894, - "grad_norm": 1.7362131277934978, - "learning_rate": 1.8291139621352913e-06, - "loss": 1.1897, - "step": 1573 - }, - { - "epoch": 0.2133803294245238, - "grad_norm": 1.4074132445337593, - "learning_rate": 1.8288683691333398e-06, - "loss": 1.2026, - "step": 1574 - }, - { - "epoch": 0.2135158950721887, - "grad_norm": 1.622099390491543, - "learning_rate": 1.8286226162906657e-06, - "loss": 1.2099, - "step": 1575 - }, - { - "epoch": 0.21365146071985358, - "grad_norm": 2.144061572448268, - "learning_rate": 1.8283767036546612e-06, - "loss": 1.2586, - "step": 1576 - }, - { - "epoch": 0.21378702636751848, - "grad_norm": 1.610406921811909, - "learning_rate": 1.8281306312727477e-06, - "loss": 1.2425, - "step": 1577 - }, - { - "epoch": 0.21392259201518335, - "grad_norm": 1.6768408922161657, - "learning_rate": 1.8278843991923791e-06, - "loss": 1.2419, - "step": 1578 - }, - { - "epoch": 0.21405815766284825, - "grad_norm": 1.5506307469239886, - "learning_rate": 1.8276380074610392e-06, - "loss": 1.1702, - "step": 1579 - }, - { - "epoch": 0.21419372331051312, - "grad_norm": 1.5472384508060257, - "learning_rate": 1.8273914561262422e-06, - "loss": 1.2223, - "step": 1580 - }, - { - "epoch": 0.214329288958178, - "grad_norm": 1.567723144777334, - "learning_rate": 1.8271447452355343e-06, - "loss": 1.2131, - "step": 1581 - }, - { - "epoch": 0.2144648546058429, - "grad_norm": 1.8999643404208981, - "learning_rate": 1.826897874836491e-06, - "loss": 1.2424, - "step": 1582 - }, - { - "epoch": 0.21460042025350776, - "grad_norm": 1.9854732745463948, - "learning_rate": 1.8266508449767196e-06, - "loss": 1.1983, - "step": 1583 - }, - { - "epoch": 0.21473598590117265, - "grad_norm": 1.7939774213202193, - "learning_rate": 1.8264036557038581e-06, - "loss": 1.2164, - "step": 1584 - }, - { - "epoch": 0.21487155154883752, - "grad_norm": 1.7386587016301276, - "learning_rate": 1.826156307065575e-06, - "loss": 1.2077, - "step": 1585 - }, - { - "epoch": 0.2150071171965024, - "grad_norm": 1.559098206226636, - "learning_rate": 1.8259087991095692e-06, - "loss": 1.2114, - "step": 1586 - }, - { - "epoch": 0.2151426828441673, - "grad_norm": 2.283958481665517, - "learning_rate": 1.8256611318835709e-06, - "loss": 1.1835, - "step": 1587 - }, - { - "epoch": 0.21527824849183216, - "grad_norm": 1.6044377530388516, - "learning_rate": 1.8254133054353406e-06, - "loss": 1.1789, - "step": 1588 - }, - { - "epoch": 0.21541381413949706, - "grad_norm": 2.281700217724224, - "learning_rate": 1.8251653198126697e-06, - "loss": 1.1928, - "step": 1589 - }, - { - "epoch": 0.21554937978716193, - "grad_norm": 1.4197043205795181, - "learning_rate": 1.8249171750633808e-06, - "loss": 1.154, - "step": 1590 - }, - { - "epoch": 0.2156849454348268, - "grad_norm": 1.6472509856827486, - "learning_rate": 1.8246688712353256e-06, - "loss": 1.2325, - "step": 1591 - }, - { - "epoch": 0.2158205110824917, - "grad_norm": 1.9151424997141822, - "learning_rate": 1.8244204083763886e-06, - "loss": 1.2151, - "step": 1592 - }, - { - "epoch": 0.21595607673015657, - "grad_norm": 1.6872629847644691, - "learning_rate": 1.824171786534483e-06, - "loss": 1.1989, - "step": 1593 - }, - { - "epoch": 0.21609164237782147, - "grad_norm": 1.7203183619501972, - "learning_rate": 1.823923005757554e-06, - "loss": 1.2195, - "step": 1594 - }, - { - "epoch": 0.21622720802548634, - "grad_norm": 1.7380644898352045, - "learning_rate": 1.8236740660935772e-06, - "loss": 1.1688, - "step": 1595 - }, - { - "epoch": 0.2163627736731512, - "grad_norm": 1.8182675923533507, - "learning_rate": 1.8234249675905584e-06, - "loss": 1.2465, - "step": 1596 - }, - { - "epoch": 0.2164983393208161, - "grad_norm": 1.4906130532205106, - "learning_rate": 1.8231757102965343e-06, - "loss": 1.1878, - "step": 1597 - }, - { - "epoch": 0.21663390496848098, - "grad_norm": 2.2010266864627246, - "learning_rate": 1.8229262942595724e-06, - "loss": 1.2065, - "step": 1598 - }, - { - "epoch": 0.21676947061614588, - "grad_norm": 6.04184868651582, - "learning_rate": 1.8226767195277702e-06, - "loss": 1.2099, - "step": 1599 - }, - { - "epoch": 0.21690503626381075, - "grad_norm": 1.6964479712475071, - "learning_rate": 1.8224269861492565e-06, - "loss": 1.2017, - "step": 1600 - }, - { - "epoch": 0.21704060191147564, - "grad_norm": 2.9689891706680815, - "learning_rate": 1.8221770941721904e-06, - "loss": 1.1988, - "step": 1601 - }, - { - "epoch": 0.21717616755914051, - "grad_norm": 1.6809818801973095, - "learning_rate": 1.8219270436447615e-06, - "loss": 1.2066, - "step": 1602 - }, - { - "epoch": 0.21731173320680539, - "grad_norm": 1.509991434974565, - "learning_rate": 1.8216768346151904e-06, - "loss": 1.2056, - "step": 1603 - }, - { - "epoch": 0.21744729885447028, - "grad_norm": 1.6976515942138783, - "learning_rate": 1.8214264671317272e-06, - "loss": 1.1842, - "step": 1604 - }, - { - "epoch": 0.21758286450213515, - "grad_norm": 1.8811310387719844, - "learning_rate": 1.821175941242654e-06, - "loss": 1.2177, - "step": 1605 - }, - { - "epoch": 0.21771843014980005, - "grad_norm": 2.159582871370095, - "learning_rate": 1.8209252569962828e-06, - "loss": 1.2417, - "step": 1606 - }, - { - "epoch": 0.21785399579746492, - "grad_norm": 1.8431441214637967, - "learning_rate": 1.8206744144409553e-06, - "loss": 1.2168, - "step": 1607 - }, - { - "epoch": 0.2179895614451298, - "grad_norm": 1.5470372411002222, - "learning_rate": 1.8204234136250452e-06, - "loss": 1.2267, - "step": 1608 - }, - { - "epoch": 0.2181251270927947, - "grad_norm": 1.5221054613018028, - "learning_rate": 1.8201722545969557e-06, - "loss": 1.1908, - "step": 1609 - }, - { - "epoch": 0.21826069274045956, - "grad_norm": 1.9339386726401724, - "learning_rate": 1.8199209374051212e-06, - "loss": 1.1958, - "step": 1610 - }, - { - "epoch": 0.21839625838812446, - "grad_norm": 1.692555567617147, - "learning_rate": 1.8196694620980058e-06, - "loss": 1.2115, - "step": 1611 - }, - { - "epoch": 0.21853182403578933, - "grad_norm": 1.6665501765281199, - "learning_rate": 1.8194178287241047e-06, - "loss": 1.2333, - "step": 1612 - }, - { - "epoch": 0.2186673896834542, - "grad_norm": 1.8455282451532584, - "learning_rate": 1.8191660373319433e-06, - "loss": 1.2046, - "step": 1613 - }, - { - "epoch": 0.2188029553311191, - "grad_norm": 1.5941480342456726, - "learning_rate": 1.8189140879700779e-06, - "loss": 1.1813, - "step": 1614 - }, - { - "epoch": 0.21893852097878397, - "grad_norm": 1.5293579906398593, - "learning_rate": 1.818661980687095e-06, - "loss": 1.1959, - "step": 1615 - }, - { - "epoch": 0.21907408662644887, - "grad_norm": 1.664067029089426, - "learning_rate": 1.8184097155316108e-06, - "loss": 1.2396, - "step": 1616 - }, - { - "epoch": 0.21920965227411374, - "grad_norm": 2.7542588268157995, - "learning_rate": 1.8181572925522732e-06, - "loss": 1.2153, - "step": 1617 - }, - { - "epoch": 0.21934521792177863, - "grad_norm": 2.1578123154700135, - "learning_rate": 1.81790471179776e-06, - "loss": 1.1845, - "step": 1618 - }, - { - "epoch": 0.2194807835694435, - "grad_norm": 1.5011819589909037, - "learning_rate": 1.8176519733167792e-06, - "loss": 1.1819, - "step": 1619 - }, - { - "epoch": 0.21961634921710838, - "grad_norm": 2.4242399691644483, - "learning_rate": 1.8173990771580694e-06, - "loss": 1.2058, - "step": 1620 - }, - { - "epoch": 0.21975191486477327, - "grad_norm": 1.9305810386033582, - "learning_rate": 1.8171460233704e-06, - "loss": 1.2263, - "step": 1621 - }, - { - "epoch": 0.21988748051243814, - "grad_norm": 1.979075536096811, - "learning_rate": 1.8168928120025698e-06, - "loss": 1.219, - "step": 1622 - }, - { - "epoch": 0.22002304616010304, - "grad_norm": 1.74170704713699, - "learning_rate": 1.816639443103409e-06, - "loss": 1.2265, - "step": 1623 - }, - { - "epoch": 0.2201586118077679, - "grad_norm": 4.677919739776005, - "learning_rate": 1.8163859167217778e-06, - "loss": 1.2273, - "step": 1624 - }, - { - "epoch": 0.22029417745543278, - "grad_norm": 3.3972992968313083, - "learning_rate": 1.816132232906567e-06, - "loss": 1.1934, - "step": 1625 - }, - { - "epoch": 0.22042974310309768, - "grad_norm": 1.7621936312130335, - "learning_rate": 1.815878391706697e-06, - "loss": 1.1917, - "step": 1626 - }, - { - "epoch": 0.22056530875076255, - "grad_norm": 2.323534281212155, - "learning_rate": 1.8156243931711194e-06, - "loss": 1.1932, - "step": 1627 - }, - { - "epoch": 0.22070087439842745, - "grad_norm": 2.00024925154576, - "learning_rate": 1.8153702373488157e-06, - "loss": 1.1897, - "step": 1628 - }, - { - "epoch": 0.22083644004609232, - "grad_norm": 1.6218703561558876, - "learning_rate": 1.815115924288798e-06, - "loss": 1.2068, - "step": 1629 - }, - { - "epoch": 0.2209720056937572, - "grad_norm": 1.7038042153241142, - "learning_rate": 1.8148614540401082e-06, - "loss": 1.1875, - "step": 1630 - }, - { - "epoch": 0.2211075713414221, - "grad_norm": 1.4999321828155547, - "learning_rate": 1.8146068266518193e-06, - "loss": 1.184, - "step": 1631 - }, - { - "epoch": 0.22124313698908696, - "grad_norm": 1.5002165086510584, - "learning_rate": 1.8143520421730338e-06, - "loss": 1.2146, - "step": 1632 - }, - { - "epoch": 0.22137870263675186, - "grad_norm": 2.024084773070431, - "learning_rate": 1.8140971006528854e-06, - "loss": 1.2147, - "step": 1633 - }, - { - "epoch": 0.22151426828441673, - "grad_norm": 1.577097388717524, - "learning_rate": 1.8138420021405367e-06, - "loss": 1.1781, - "step": 1634 - }, - { - "epoch": 0.2216498339320816, - "grad_norm": 1.6981537445208257, - "learning_rate": 1.8135867466851824e-06, - "loss": 1.2313, - "step": 1635 - }, - { - "epoch": 0.2217853995797465, - "grad_norm": 1.7633386064242647, - "learning_rate": 1.813331334336046e-06, - "loss": 1.1821, - "step": 1636 - }, - { - "epoch": 0.22192096522741137, - "grad_norm": 1.6132980774946981, - "learning_rate": 1.8130757651423817e-06, - "loss": 1.1947, - "step": 1637 - }, - { - "epoch": 0.22205653087507626, - "grad_norm": 3.459745828829997, - "learning_rate": 1.812820039153474e-06, - "loss": 1.1601, - "step": 1638 - }, - { - "epoch": 0.22219209652274113, - "grad_norm": 1.5651026472328764, - "learning_rate": 1.812564156418638e-06, - "loss": 1.2034, - "step": 1639 - }, - { - "epoch": 0.22232766217040603, - "grad_norm": 3.82426832267302, - "learning_rate": 1.8123081169872184e-06, - "loss": 1.2411, - "step": 1640 - }, - { - "epoch": 0.2224632278180709, - "grad_norm": 2.3754300597167433, - "learning_rate": 1.8120519209085905e-06, - "loss": 1.1913, - "step": 1641 - }, - { - "epoch": 0.22259879346573577, - "grad_norm": 1.4326386968510665, - "learning_rate": 1.8117955682321594e-06, - "loss": 1.2158, - "step": 1642 - }, - { - "epoch": 0.22273435911340067, - "grad_norm": 2.785327652972002, - "learning_rate": 1.811539059007361e-06, - "loss": 1.1938, - "step": 1643 - }, - { - "epoch": 0.22286992476106554, - "grad_norm": 1.7381143392316023, - "learning_rate": 1.8112823932836609e-06, - "loss": 1.1944, - "step": 1644 - }, - { - "epoch": 0.22300549040873044, - "grad_norm": 1.991953347758075, - "learning_rate": 1.8110255711105552e-06, - "loss": 1.2093, - "step": 1645 - }, - { - "epoch": 0.2231410560563953, - "grad_norm": 1.7441380485397322, - "learning_rate": 1.81076859253757e-06, - "loss": 1.1746, - "step": 1646 - }, - { - "epoch": 0.22327662170406018, - "grad_norm": 1.6096621385423762, - "learning_rate": 1.8105114576142615e-06, - "loss": 1.2429, - "step": 1647 - }, - { - "epoch": 0.22341218735172508, - "grad_norm": 1.4444640205791388, - "learning_rate": 1.810254166390216e-06, - "loss": 1.2142, - "step": 1648 - }, - { - "epoch": 0.22354775299938995, - "grad_norm": 2.441873773578424, - "learning_rate": 1.8099967189150505e-06, - "loss": 1.2055, - "step": 1649 - }, - { - "epoch": 0.22368331864705485, - "grad_norm": 1.8839335680738951, - "learning_rate": 1.8097391152384113e-06, - "loss": 1.2116, - "step": 1650 - }, - { - "epoch": 0.22381888429471972, - "grad_norm": 1.447198983726194, - "learning_rate": 1.8094813554099754e-06, - "loss": 1.2151, - "step": 1651 - }, - { - "epoch": 0.2239544499423846, - "grad_norm": 1.6709832235031776, - "learning_rate": 1.80922343947945e-06, - "loss": 1.1931, - "step": 1652 - }, - { - "epoch": 0.22409001559004949, - "grad_norm": 1.5188877017885898, - "learning_rate": 1.808965367496572e-06, - "loss": 1.1907, - "step": 1653 - }, - { - "epoch": 0.22422558123771436, - "grad_norm": 1.748022234056033, - "learning_rate": 1.808707139511108e-06, - "loss": 1.242, - "step": 1654 - }, - { - "epoch": 0.22436114688537925, - "grad_norm": 2.004557560228434, - "learning_rate": 1.808448755572856e-06, - "loss": 1.2201, - "step": 1655 - }, - { - "epoch": 0.22449671253304412, - "grad_norm": 2.791930348607405, - "learning_rate": 1.808190215731643e-06, - "loss": 1.2064, - "step": 1656 - }, - { - "epoch": 0.22463227818070902, - "grad_norm": 1.5331999877580431, - "learning_rate": 1.8079315200373265e-06, - "loss": 1.2305, - "step": 1657 - }, - { - "epoch": 0.2247678438283739, - "grad_norm": 1.6011126697086775, - "learning_rate": 1.8076726685397934e-06, - "loss": 1.2598, - "step": 1658 - }, - { - "epoch": 0.22490340947603876, - "grad_norm": 1.607604012665984, - "learning_rate": 1.8074136612889619e-06, - "loss": 1.2083, - "step": 1659 - }, - { - "epoch": 0.22503897512370366, - "grad_norm": 1.9054429482288957, - "learning_rate": 1.8071544983347791e-06, - "loss": 1.2293, - "step": 1660 - }, - { - "epoch": 0.22517454077136853, - "grad_norm": 1.5984487019449007, - "learning_rate": 1.8068951797272222e-06, - "loss": 1.2025, - "step": 1661 - }, - { - "epoch": 0.22531010641903343, - "grad_norm": 2.129379605367283, - "learning_rate": 1.8066357055162994e-06, - "loss": 1.2065, - "step": 1662 - }, - { - "epoch": 0.2254456720666983, - "grad_norm": 1.900584341832968, - "learning_rate": 1.8063760757520483e-06, - "loss": 1.202, - "step": 1663 - }, - { - "epoch": 0.22558123771436317, - "grad_norm": 2.9919570546051437, - "learning_rate": 1.8061162904845356e-06, - "loss": 1.1652, - "step": 1664 - }, - { - "epoch": 0.22571680336202807, - "grad_norm": 1.5793478282678424, - "learning_rate": 1.80585634976386e-06, - "loss": 1.2039, - "step": 1665 - }, - { - "epoch": 0.22585236900969294, - "grad_norm": 1.7971091848688938, - "learning_rate": 1.8055962536401479e-06, - "loss": 1.2263, - "step": 1666 - }, - { - "epoch": 0.22598793465735784, - "grad_norm": 1.6265570920417962, - "learning_rate": 1.8053360021635572e-06, - "loss": 1.2306, - "step": 1667 - }, - { - "epoch": 0.2261235003050227, - "grad_norm": 2.3622065514647974, - "learning_rate": 1.8050755953842757e-06, - "loss": 1.234, - "step": 1668 - }, - { - "epoch": 0.22625906595268758, - "grad_norm": 1.8138887005133384, - "learning_rate": 1.8048150333525206e-06, - "loss": 1.2004, - "step": 1669 - }, - { - "epoch": 0.22639463160035248, - "grad_norm": 2.1598857715331308, - "learning_rate": 1.8045543161185388e-06, - "loss": 1.2197, - "step": 1670 - }, - { - "epoch": 0.22653019724801735, - "grad_norm": 2.354101209300282, - "learning_rate": 1.8042934437326082e-06, - "loss": 1.2096, - "step": 1671 - }, - { - "epoch": 0.22666576289568224, - "grad_norm": 1.5437047653428955, - "learning_rate": 1.8040324162450355e-06, - "loss": 1.2114, - "step": 1672 - }, - { - "epoch": 0.2268013285433471, - "grad_norm": 2.625082248184305, - "learning_rate": 1.8037712337061582e-06, - "loss": 1.231, - "step": 1673 - }, - { - "epoch": 0.22693689419101198, - "grad_norm": 1.9642766299564542, - "learning_rate": 1.803509896166343e-06, - "loss": 1.2347, - "step": 1674 - }, - { - "epoch": 0.22707245983867688, - "grad_norm": 1.5206744748662435, - "learning_rate": 1.8032484036759866e-06, - "loss": 1.1841, - "step": 1675 - }, - { - "epoch": 0.22720802548634175, - "grad_norm": 1.6165088353155703, - "learning_rate": 1.8029867562855161e-06, - "loss": 1.1874, - "step": 1676 - }, - { - "epoch": 0.22734359113400665, - "grad_norm": 1.681966331734533, - "learning_rate": 1.8027249540453878e-06, - "loss": 1.206, - "step": 1677 - }, - { - "epoch": 0.22747915678167152, - "grad_norm": 1.6984957205583373, - "learning_rate": 1.802462997006089e-06, - "loss": 1.2496, - "step": 1678 - }, - { - "epoch": 0.22761472242933642, - "grad_norm": 1.593306209709851, - "learning_rate": 1.8022008852181351e-06, - "loss": 1.2189, - "step": 1679 - }, - { - "epoch": 0.2277502880770013, - "grad_norm": 1.7226803132480977, - "learning_rate": 1.801938618732073e-06, - "loss": 1.2002, - "step": 1680 - }, - { - "epoch": 0.22788585372466616, - "grad_norm": 1.7366445784058124, - "learning_rate": 1.801676197598478e-06, - "loss": 1.2147, - "step": 1681 - }, - { - "epoch": 0.22802141937233106, - "grad_norm": 1.5158680578171195, - "learning_rate": 1.8014136218679566e-06, - "loss": 1.2397, - "step": 1682 - }, - { - "epoch": 0.22815698501999593, - "grad_norm": 1.7485095621788613, - "learning_rate": 1.8011508915911441e-06, - "loss": 1.1553, - "step": 1683 - }, - { - "epoch": 0.22829255066766083, - "grad_norm": 1.5778678046223609, - "learning_rate": 1.800888006818706e-06, - "loss": 1.2074, - "step": 1684 - }, - { - "epoch": 0.2284281163153257, - "grad_norm": 3.3089925440327947, - "learning_rate": 1.8006249676013377e-06, - "loss": 1.2148, - "step": 1685 - }, - { - "epoch": 0.22856368196299057, - "grad_norm": 1.5306692223401936, - "learning_rate": 1.8003617739897642e-06, - "loss": 1.161, - "step": 1686 - }, - { - "epoch": 0.22869924761065547, - "grad_norm": 2.633873383338012, - "learning_rate": 1.8000984260347401e-06, - "loss": 1.2041, - "step": 1687 - }, - { - "epoch": 0.22883481325832034, - "grad_norm": 3.069460347978959, - "learning_rate": 1.7998349237870506e-06, - "loss": 1.1847, - "step": 1688 - }, - { - "epoch": 0.22897037890598523, - "grad_norm": 2.4260859375786987, - "learning_rate": 1.7995712672975088e-06, - "loss": 1.2024, - "step": 1689 - }, - { - "epoch": 0.2291059445536501, - "grad_norm": 4.529317923278312, - "learning_rate": 1.79930745661696e-06, - "loss": 1.2111, - "step": 1690 - }, - { - "epoch": 0.22924151020131497, - "grad_norm": 1.8021158291553345, - "learning_rate": 1.7990434917962776e-06, - "loss": 1.2091, - "step": 1691 - }, - { - "epoch": 0.22937707584897987, - "grad_norm": 2.767083589345307, - "learning_rate": 1.7987793728863649e-06, - "loss": 1.1838, - "step": 1692 - }, - { - "epoch": 0.22951264149664474, - "grad_norm": 2.084973231171956, - "learning_rate": 1.7985150999381553e-06, - "loss": 1.1868, - "step": 1693 - }, - { - "epoch": 0.22964820714430964, - "grad_norm": 3.862602475650728, - "learning_rate": 1.798250673002612e-06, - "loss": 1.1911, - "step": 1694 - }, - { - "epoch": 0.2297837727919745, - "grad_norm": 1.612358713168583, - "learning_rate": 1.797986092130727e-06, - "loss": 1.2086, - "step": 1695 - }, - { - "epoch": 0.2299193384396394, - "grad_norm": 1.77913259783897, - "learning_rate": 1.7977213573735234e-06, - "loss": 1.1593, - "step": 1696 - }, - { - "epoch": 0.23005490408730428, - "grad_norm": 1.9137547708032894, - "learning_rate": 1.7974564687820526e-06, - "loss": 1.2393, - "step": 1697 - }, - { - "epoch": 0.23019046973496915, - "grad_norm": 2.7372111318217223, - "learning_rate": 1.7971914264073967e-06, - "loss": 1.2138, - "step": 1698 - }, - { - "epoch": 0.23032603538263405, - "grad_norm": 1.9082125181838738, - "learning_rate": 1.796926230300667e-06, - "loss": 1.2221, - "step": 1699 - }, - { - "epoch": 0.23046160103029892, - "grad_norm": 1.3448794426563628, - "learning_rate": 1.7966608805130043e-06, - "loss": 1.1884, - "step": 1700 - }, - { - "epoch": 0.23059716667796382, - "grad_norm": 1.887980307745491, - "learning_rate": 1.7963953770955791e-06, - "loss": 1.1772, - "step": 1701 - }, - { - "epoch": 0.2307327323256287, - "grad_norm": 1.8523347765913138, - "learning_rate": 1.7961297200995917e-06, - "loss": 1.1775, - "step": 1702 - }, - { - "epoch": 0.23086829797329356, - "grad_norm": 1.5608201085120448, - "learning_rate": 1.7958639095762722e-06, - "loss": 1.1793, - "step": 1703 - }, - { - "epoch": 0.23100386362095846, - "grad_norm": 1.5941973850053575, - "learning_rate": 1.79559794557688e-06, - "loss": 1.2048, - "step": 1704 - }, - { - "epoch": 0.23113942926862333, - "grad_norm": 1.5359152745265474, - "learning_rate": 1.795331828152704e-06, - "loss": 1.1675, - "step": 1705 - }, - { - "epoch": 0.23127499491628822, - "grad_norm": 2.1062349941265603, - "learning_rate": 1.7950655573550627e-06, - "loss": 1.2207, - "step": 1706 - }, - { - "epoch": 0.2314105605639531, - "grad_norm": 1.5769604708602056, - "learning_rate": 1.7947991332353048e-06, - "loss": 1.2185, - "step": 1707 - }, - { - "epoch": 0.23154612621161796, - "grad_norm": 1.5213613248341018, - "learning_rate": 1.7945325558448078e-06, - "loss": 1.1756, - "step": 1708 - }, - { - "epoch": 0.23168169185928286, - "grad_norm": 1.9716780437529604, - "learning_rate": 1.7942658252349787e-06, - "loss": 1.1959, - "step": 1709 - }, - { - "epoch": 0.23181725750694773, - "grad_norm": 1.91008605650578, - "learning_rate": 1.7939989414572552e-06, - "loss": 1.196, - "step": 1710 - }, - { - "epoch": 0.23195282315461263, - "grad_norm": 1.9129720633085083, - "learning_rate": 1.7937319045631032e-06, - "loss": 1.1916, - "step": 1711 - }, - { - "epoch": 0.2320883888022775, - "grad_norm": 2.8607253245953155, - "learning_rate": 1.7934647146040185e-06, - "loss": 1.1805, - "step": 1712 - }, - { - "epoch": 0.23222395444994237, - "grad_norm": 2.1118796130412862, - "learning_rate": 1.793197371631527e-06, - "loss": 1.1755, - "step": 1713 - }, - { - "epoch": 0.23235952009760727, - "grad_norm": 1.5980823408314888, - "learning_rate": 1.7929298756971836e-06, - "loss": 1.2285, - "step": 1714 - }, - { - "epoch": 0.23249508574527214, - "grad_norm": 1.6635323161805202, - "learning_rate": 1.7926622268525725e-06, - "loss": 1.1972, - "step": 1715 - }, - { - "epoch": 0.23263065139293704, - "grad_norm": 1.6137798032606994, - "learning_rate": 1.792394425149308e-06, - "loss": 1.1726, - "step": 1716 - }, - { - "epoch": 0.2327662170406019, - "grad_norm": 2.1488517089444867, - "learning_rate": 1.792126470639033e-06, - "loss": 1.1891, - "step": 1717 - }, - { - "epoch": 0.2329017826882668, - "grad_norm": 1.6810790490647396, - "learning_rate": 1.7918583633734212e-06, - "loss": 1.1736, - "step": 1718 - }, - { - "epoch": 0.23303734833593168, - "grad_norm": 1.571631876275798, - "learning_rate": 1.7915901034041744e-06, - "loss": 1.2056, - "step": 1719 - }, - { - "epoch": 0.23317291398359655, - "grad_norm": 1.932655677976464, - "learning_rate": 1.7913216907830248e-06, - "loss": 1.2124, - "step": 1720 - }, - { - "epoch": 0.23330847963126145, - "grad_norm": 1.9050204153521608, - "learning_rate": 1.7910531255617332e-06, - "loss": 1.1958, - "step": 1721 - }, - { - "epoch": 0.23344404527892632, - "grad_norm": 2.252603124524, - "learning_rate": 1.7907844077920905e-06, - "loss": 1.2455, - "step": 1722 - }, - { - "epoch": 0.2335796109265912, - "grad_norm": 2.0949886425250437, - "learning_rate": 1.790515537525917e-06, - "loss": 1.1982, - "step": 1723 - }, - { - "epoch": 0.23371517657425608, - "grad_norm": 1.9383415455256918, - "learning_rate": 1.7902465148150623e-06, - "loss": 1.1766, - "step": 1724 - }, - { - "epoch": 0.23385074222192095, - "grad_norm": 1.6127418578636872, - "learning_rate": 1.7899773397114046e-06, - "loss": 1.1991, - "step": 1725 - }, - { - "epoch": 0.23398630786958585, - "grad_norm": 1.5109002611956859, - "learning_rate": 1.789708012266853e-06, - "loss": 1.2273, - "step": 1726 - }, - { - "epoch": 0.23412187351725072, - "grad_norm": 2.0823966834311505, - "learning_rate": 1.7894385325333444e-06, - "loss": 1.1681, - "step": 1727 - }, - { - "epoch": 0.23425743916491562, - "grad_norm": 1.5794320696903419, - "learning_rate": 1.7891689005628466e-06, - "loss": 1.2318, - "step": 1728 - }, - { - "epoch": 0.2343930048125805, - "grad_norm": 1.8782760432250274, - "learning_rate": 1.7888991164073554e-06, - "loss": 1.2182, - "step": 1729 - }, - { - "epoch": 0.23452857046024536, - "grad_norm": 1.5345817923067055, - "learning_rate": 1.7886291801188968e-06, - "loss": 1.2277, - "step": 1730 - }, - { - "epoch": 0.23466413610791026, - "grad_norm": 1.6130538576422029, - "learning_rate": 1.788359091749526e-06, - "loss": 1.2579, - "step": 1731 - }, - { - "epoch": 0.23479970175557513, - "grad_norm": 1.8952059093929858, - "learning_rate": 1.7880888513513272e-06, - "loss": 1.2334, - "step": 1732 - }, - { - "epoch": 0.23493526740324003, - "grad_norm": 1.9969935681567166, - "learning_rate": 1.7878184589764142e-06, - "loss": 1.241, - "step": 1733 - }, - { - "epoch": 0.2350708330509049, - "grad_norm": 1.669399572405858, - "learning_rate": 1.7875479146769303e-06, - "loss": 1.1892, - "step": 1734 - }, - { - "epoch": 0.23520639869856977, - "grad_norm": 2.961538890073287, - "learning_rate": 1.7872772185050474e-06, - "loss": 1.2032, - "step": 1735 - }, - { - "epoch": 0.23534196434623467, - "grad_norm": 2.502055703493878, - "learning_rate": 1.7870063705129672e-06, - "loss": 1.2078, - "step": 1736 - }, - { - "epoch": 0.23547752999389954, - "grad_norm": 1.5653153080453501, - "learning_rate": 1.786735370752921e-06, - "loss": 1.2293, - "step": 1737 - }, - { - "epoch": 0.23561309564156444, - "grad_norm": 1.5403161878728662, - "learning_rate": 1.7864642192771683e-06, - "loss": 1.2173, - "step": 1738 - }, - { - "epoch": 0.2357486612892293, - "grad_norm": 1.7150678812519777, - "learning_rate": 1.786192916137999e-06, - "loss": 1.2056, - "step": 1739 - }, - { - "epoch": 0.2358842269368942, - "grad_norm": 1.5368865527623417, - "learning_rate": 1.7859214613877316e-06, - "loss": 1.1691, - "step": 1740 - }, - { - "epoch": 0.23601979258455907, - "grad_norm": 1.5488523705781576, - "learning_rate": 1.7856498550787141e-06, - "loss": 1.1953, - "step": 1741 - }, - { - "epoch": 0.23615535823222394, - "grad_norm": 1.4922721206679952, - "learning_rate": 1.7853780972633239e-06, - "loss": 1.179, - "step": 1742 - }, - { - "epoch": 0.23629092387988884, - "grad_norm": 2.1613134248061656, - "learning_rate": 1.7851061879939669e-06, - "loss": 1.2082, - "step": 1743 - }, - { - "epoch": 0.2364264895275537, - "grad_norm": 2.1828051850654546, - "learning_rate": 1.7848341273230786e-06, - "loss": 1.2335, - "step": 1744 - }, - { - "epoch": 0.2365620551752186, - "grad_norm": 2.0082302386401407, - "learning_rate": 1.784561915303124e-06, - "loss": 1.1818, - "step": 1745 - }, - { - "epoch": 0.23669762082288348, - "grad_norm": 1.5342248812923145, - "learning_rate": 1.784289551986597e-06, - "loss": 1.2012, - "step": 1746 - }, - { - "epoch": 0.23683318647054835, - "grad_norm": 2.6614065442501142, - "learning_rate": 1.7840170374260206e-06, - "loss": 1.2011, - "step": 1747 - }, - { - "epoch": 0.23696875211821325, - "grad_norm": 1.6621065729122688, - "learning_rate": 1.7837443716739474e-06, - "loss": 1.189, - "step": 1748 - }, - { - "epoch": 0.23710431776587812, - "grad_norm": 2.251791095587067, - "learning_rate": 1.7834715547829584e-06, - "loss": 1.2043, - "step": 1749 - }, - { - "epoch": 0.23723988341354302, - "grad_norm": 3.1919404392213484, - "learning_rate": 1.7831985868056646e-06, - "loss": 1.1682, - "step": 1750 - }, - { - "epoch": 0.2373754490612079, - "grad_norm": 1.4308438946852087, - "learning_rate": 1.7829254677947054e-06, - "loss": 1.2074, - "step": 1751 - }, - { - "epoch": 0.23751101470887276, - "grad_norm": 2.0664863780825793, - "learning_rate": 1.7826521978027499e-06, - "loss": 1.1878, - "step": 1752 - }, - { - "epoch": 0.23764658035653766, - "grad_norm": 1.588363488616962, - "learning_rate": 1.7823787768824958e-06, - "loss": 1.2233, - "step": 1753 - }, - { - "epoch": 0.23778214600420253, - "grad_norm": 1.9712780021028689, - "learning_rate": 1.7821052050866703e-06, - "loss": 1.2213, - "step": 1754 - }, - { - "epoch": 0.23791771165186743, - "grad_norm": 1.713164839646067, - "learning_rate": 1.7818314824680298e-06, - "loss": 1.2271, - "step": 1755 - }, - { - "epoch": 0.2380532772995323, - "grad_norm": 2.0582344321464836, - "learning_rate": 1.7815576090793592e-06, - "loss": 1.18, - "step": 1756 - }, - { - "epoch": 0.2381888429471972, - "grad_norm": 1.709783999282695, - "learning_rate": 1.781283584973473e-06, - "loss": 1.2608, - "step": 1757 - }, - { - "epoch": 0.23832440859486206, - "grad_norm": 3.3576143130481215, - "learning_rate": 1.781009410203214e-06, - "loss": 1.1656, - "step": 1758 - }, - { - "epoch": 0.23845997424252693, - "grad_norm": 1.5737733190919279, - "learning_rate": 1.7807350848214557e-06, - "loss": 1.2036, - "step": 1759 - }, - { - "epoch": 0.23859553989019183, - "grad_norm": 1.6641856568527742, - "learning_rate": 1.780460608881099e-06, - "loss": 1.2188, - "step": 1760 - }, - { - "epoch": 0.2387311055378567, - "grad_norm": 1.5867628931576359, - "learning_rate": 1.7801859824350743e-06, - "loss": 1.2094, - "step": 1761 - }, - { - "epoch": 0.2388666711855216, - "grad_norm": 1.5714295066589938, - "learning_rate": 1.7799112055363415e-06, - "loss": 1.166, - "step": 1762 - }, - { - "epoch": 0.23900223683318647, - "grad_norm": 1.436096097500095, - "learning_rate": 1.7796362782378887e-06, - "loss": 1.1802, - "step": 1763 - }, - { - "epoch": 0.23913780248085134, - "grad_norm": 1.4277780313203803, - "learning_rate": 1.7793612005927337e-06, - "loss": 1.2173, - "step": 1764 - }, - { - "epoch": 0.23927336812851624, - "grad_norm": 1.6646991629245973, - "learning_rate": 1.7790859726539232e-06, - "loss": 1.1835, - "step": 1765 - }, - { - "epoch": 0.2394089337761811, - "grad_norm": 2.2543976930553735, - "learning_rate": 1.7788105944745325e-06, - "loss": 1.1929, - "step": 1766 - }, - { - "epoch": 0.239544499423846, - "grad_norm": 6.387713840112075, - "learning_rate": 1.7785350661076663e-06, - "loss": 1.1871, - "step": 1767 - }, - { - "epoch": 0.23968006507151088, - "grad_norm": 2.4139263835036613, - "learning_rate": 1.778259387606458e-06, - "loss": 1.2121, - "step": 1768 - }, - { - "epoch": 0.23981563071917575, - "grad_norm": 20.87516555750094, - "learning_rate": 1.7779835590240699e-06, - "loss": 1.1818, - "step": 1769 - }, - { - "epoch": 0.23995119636684065, - "grad_norm": 1.9206608843571586, - "learning_rate": 1.7777075804136938e-06, - "loss": 1.1768, - "step": 1770 - }, - { - "epoch": 0.24008676201450552, - "grad_norm": 2.045805922901558, - "learning_rate": 1.7774314518285492e-06, - "loss": 1.2018, - "step": 1771 - }, - { - "epoch": 0.24022232766217042, - "grad_norm": 2.5713845367875248, - "learning_rate": 1.777155173321886e-06, - "loss": 1.2031, - "step": 1772 - }, - { - "epoch": 0.24035789330983529, - "grad_norm": 1.723989865826696, - "learning_rate": 1.7768787449469823e-06, - "loss": 1.1655, - "step": 1773 - }, - { - "epoch": 0.24049345895750016, - "grad_norm": 1.5934280140319634, - "learning_rate": 1.7766021667571448e-06, - "loss": 1.2068, - "step": 1774 - }, - { - "epoch": 0.24062902460516505, - "grad_norm": 1.807203659933249, - "learning_rate": 1.7763254388057094e-06, - "loss": 1.2257, - "step": 1775 - }, - { - "epoch": 0.24076459025282992, - "grad_norm": 1.47426042796791, - "learning_rate": 1.7760485611460415e-06, - "loss": 1.1919, - "step": 1776 - }, - { - "epoch": 0.24090015590049482, - "grad_norm": 1.500017597255888, - "learning_rate": 1.7757715338315337e-06, - "loss": 1.2326, - "step": 1777 - }, - { - "epoch": 0.2410357215481597, - "grad_norm": 1.8745677117075954, - "learning_rate": 1.7754943569156096e-06, - "loss": 1.2207, - "step": 1778 - }, - { - "epoch": 0.2411712871958246, - "grad_norm": 1.8839605247005642, - "learning_rate": 1.7752170304517202e-06, - "loss": 1.2451, - "step": 1779 - }, - { - "epoch": 0.24130685284348946, - "grad_norm": 1.5916181777925038, - "learning_rate": 1.7749395544933455e-06, - "loss": 1.2075, - "step": 1780 - }, - { - "epoch": 0.24144241849115433, - "grad_norm": 1.8478201458957617, - "learning_rate": 1.7746619290939946e-06, - "loss": 1.1896, - "step": 1781 - }, - { - "epoch": 0.24157798413881923, - "grad_norm": 1.6319989047383878, - "learning_rate": 1.7743841543072055e-06, - "loss": 1.1797, - "step": 1782 - }, - { - "epoch": 0.2417135497864841, - "grad_norm": 1.9817309450200067, - "learning_rate": 1.7741062301865453e-06, - "loss": 1.1604, - "step": 1783 - }, - { - "epoch": 0.241849115434149, - "grad_norm": 2.1406734479760225, - "learning_rate": 1.7738281567856088e-06, - "loss": 1.2062, - "step": 1784 - }, - { - "epoch": 0.24198468108181387, - "grad_norm": 1.4184247001152535, - "learning_rate": 1.7735499341580203e-06, - "loss": 1.2202, - "step": 1785 - }, - { - "epoch": 0.24212024672947874, - "grad_norm": 2.290925986780862, - "learning_rate": 1.7732715623574333e-06, - "loss": 1.1747, - "step": 1786 - }, - { - "epoch": 0.24225581237714364, - "grad_norm": 1.9059915894155868, - "learning_rate": 1.772993041437529e-06, - "loss": 1.1741, - "step": 1787 - }, - { - "epoch": 0.2423913780248085, - "grad_norm": 2.0579543036916523, - "learning_rate": 1.7727143714520184e-06, - "loss": 1.2066, - "step": 1788 - }, - { - "epoch": 0.2425269436724734, - "grad_norm": 1.6285903985022356, - "learning_rate": 1.7724355524546409e-06, - "loss": 1.1801, - "step": 1789 - }, - { - "epoch": 0.24266250932013828, - "grad_norm": 1.682379930427517, - "learning_rate": 1.7721565844991641e-06, - "loss": 1.2031, - "step": 1790 - }, - { - "epoch": 0.24279807496780315, - "grad_norm": 2.09349970654746, - "learning_rate": 1.7718774676393852e-06, - "loss": 1.2128, - "step": 1791 - }, - { - "epoch": 0.24293364061546804, - "grad_norm": 2.1484355957257972, - "learning_rate": 1.7715982019291293e-06, - "loss": 1.1931, - "step": 1792 - }, - { - "epoch": 0.24306920626313291, - "grad_norm": 1.9753447983236694, - "learning_rate": 1.771318787422251e-06, - "loss": 1.2101, - "step": 1793 - }, - { - "epoch": 0.2432047719107978, - "grad_norm": 1.6962573271436958, - "learning_rate": 1.7710392241726328e-06, - "loss": 1.1998, - "step": 1794 - }, - { - "epoch": 0.24334033755846268, - "grad_norm": 1.7980419704907058, - "learning_rate": 1.7707595122341865e-06, - "loss": 1.2386, - "step": 1795 - }, - { - "epoch": 0.24347590320612758, - "grad_norm": 8.613993252694431, - "learning_rate": 1.7704796516608524e-06, - "loss": 1.2127, - "step": 1796 - }, - { - "epoch": 0.24361146885379245, - "grad_norm": 3.5718276657984545, - "learning_rate": 1.7701996425065992e-06, - "loss": 1.1714, - "step": 1797 - }, - { - "epoch": 0.24374703450145732, - "grad_norm": 1.870368356694085, - "learning_rate": 1.7699194848254244e-06, - "loss": 1.202, - "step": 1798 - }, - { - "epoch": 0.24388260014912222, - "grad_norm": 1.5707149587876872, - "learning_rate": 1.7696391786713545e-06, - "loss": 1.2042, - "step": 1799 - }, - { - "epoch": 0.2440181657967871, - "grad_norm": 1.566050580511284, - "learning_rate": 1.769358724098444e-06, - "loss": 1.1919, - "step": 1800 - }, - { - "epoch": 0.244153731444452, - "grad_norm": 1.6689480753183854, - "learning_rate": 1.7690781211607767e-06, - "loss": 1.1982, - "step": 1801 - }, - { - "epoch": 0.24428929709211686, - "grad_norm": 3.0455734368122402, - "learning_rate": 1.7687973699124643e-06, - "loss": 1.185, - "step": 1802 - }, - { - "epoch": 0.24442486273978173, - "grad_norm": 1.7733693962421544, - "learning_rate": 1.7685164704076476e-06, - "loss": 1.1707, - "step": 1803 - }, - { - "epoch": 0.24456042838744663, - "grad_norm": 3.85358938352622, - "learning_rate": 1.768235422700496e-06, - "loss": 1.2599, - "step": 1804 - }, - { - "epoch": 0.2446959940351115, - "grad_norm": 2.9734544688163878, - "learning_rate": 1.767954226845207e-06, - "loss": 1.209, - "step": 1805 - }, - { - "epoch": 0.2448315596827764, - "grad_norm": 1.6322176203724004, - "learning_rate": 1.7676728828960075e-06, - "loss": 1.1638, - "step": 1806 - }, - { - "epoch": 0.24496712533044127, - "grad_norm": 2.8151168345249187, - "learning_rate": 1.7673913909071523e-06, - "loss": 1.2148, - "step": 1807 - }, - { - "epoch": 0.24510269097810614, - "grad_norm": 1.8911683052883512, - "learning_rate": 1.7671097509329242e-06, - "loss": 1.1674, - "step": 1808 - }, - { - "epoch": 0.24523825662577103, - "grad_norm": 1.8360651026447021, - "learning_rate": 1.7668279630276364e-06, - "loss": 1.207, - "step": 1809 - }, - { - "epoch": 0.2453738222734359, - "grad_norm": 1.7404166480993435, - "learning_rate": 1.7665460272456287e-06, - "loss": 1.2138, - "step": 1810 - }, - { - "epoch": 0.2455093879211008, - "grad_norm": 1.731248513004992, - "learning_rate": 1.7662639436412703e-06, - "loss": 1.1933, - "step": 1811 - }, - { - "epoch": 0.24564495356876567, - "grad_norm": 1.7846574779238498, - "learning_rate": 1.7659817122689589e-06, - "loss": 1.1658, - "step": 1812 - }, - { - "epoch": 0.24578051921643054, - "grad_norm": 1.5990289844867998, - "learning_rate": 1.7656993331831208e-06, - "loss": 1.2048, - "step": 1813 - }, - { - "epoch": 0.24591608486409544, - "grad_norm": 2.7871185551958355, - "learning_rate": 1.76541680643821e-06, - "loss": 1.2181, - "step": 1814 - }, - { - "epoch": 0.2460516505117603, - "grad_norm": 1.669064959222726, - "learning_rate": 1.7651341320887102e-06, - "loss": 1.1683, - "step": 1815 - }, - { - "epoch": 0.2461872161594252, - "grad_norm": 1.632013968878007, - "learning_rate": 1.7648513101891325e-06, - "loss": 1.203, - "step": 1816 - }, - { - "epoch": 0.24632278180709008, - "grad_norm": 1.7004497817847526, - "learning_rate": 1.764568340794017e-06, - "loss": 1.2506, - "step": 1817 - }, - { - "epoch": 0.24645834745475498, - "grad_norm": 1.5278590534539576, - "learning_rate": 1.7642852239579323e-06, - "loss": 1.19, - "step": 1818 - }, - { - "epoch": 0.24659391310241985, - "grad_norm": 1.8935000858509914, - "learning_rate": 1.7640019597354747e-06, - "loss": 1.1937, - "step": 1819 - }, - { - "epoch": 0.24672947875008472, - "grad_norm": 1.5194441560760525, - "learning_rate": 1.76371854818127e-06, - "loss": 1.1924, - "step": 1820 - }, - { - "epoch": 0.24686504439774962, - "grad_norm": 1.8005219934015375, - "learning_rate": 1.7634349893499719e-06, - "loss": 1.2207, - "step": 1821 - }, - { - "epoch": 0.2470006100454145, - "grad_norm": 1.990527349272436, - "learning_rate": 1.7631512832962622e-06, - "loss": 1.196, - "step": 1822 - }, - { - "epoch": 0.24713617569307939, - "grad_norm": 2.060578909722346, - "learning_rate": 1.7628674300748511e-06, - "loss": 1.2021, - "step": 1823 - }, - { - "epoch": 0.24727174134074426, - "grad_norm": 1.5460374330086066, - "learning_rate": 1.7625834297404783e-06, - "loss": 1.2007, - "step": 1824 - }, - { - "epoch": 0.24740730698840913, - "grad_norm": 1.6275969963310644, - "learning_rate": 1.7622992823479103e-06, - "loss": 1.1983, - "step": 1825 - }, - { - "epoch": 0.24754287263607402, - "grad_norm": 1.6969775810897163, - "learning_rate": 1.7620149879519431e-06, - "loss": 1.1597, - "step": 1826 - }, - { - "epoch": 0.2476784382837389, - "grad_norm": 1.5664039432194394, - "learning_rate": 1.7617305466074002e-06, - "loss": 1.1873, - "step": 1827 - }, - { - "epoch": 0.2478140039314038, - "grad_norm": 1.7428389619900455, - "learning_rate": 1.7614459583691342e-06, - "loss": 1.2317, - "step": 1828 - }, - { - "epoch": 0.24794956957906866, - "grad_norm": 2.1372815312711837, - "learning_rate": 1.7611612232920258e-06, - "loss": 1.1918, - "step": 1829 - }, - { - "epoch": 0.24808513522673353, - "grad_norm": 1.7520161558403065, - "learning_rate": 1.7608763414309835e-06, - "loss": 1.2, - "step": 1830 - }, - { - "epoch": 0.24822070087439843, - "grad_norm": 2.3755529095277113, - "learning_rate": 1.7605913128409449e-06, - "loss": 1.1475, - "step": 1831 - }, - { - "epoch": 0.2483562665220633, - "grad_norm": 1.5309263233771964, - "learning_rate": 1.7603061375768754e-06, - "loss": 1.1743, - "step": 1832 - }, - { - "epoch": 0.2484918321697282, - "grad_norm": 1.7269799839440687, - "learning_rate": 1.7600208156937688e-06, - "loss": 1.1935, - "step": 1833 - }, - { - "epoch": 0.24862739781739307, - "grad_norm": 1.6732120570165234, - "learning_rate": 1.759735347246647e-06, - "loss": 1.1617, - "step": 1834 - }, - { - "epoch": 0.24876296346505797, - "grad_norm": 1.4891959458162647, - "learning_rate": 1.7594497322905603e-06, - "loss": 1.1989, - "step": 1835 - }, - { - "epoch": 0.24889852911272284, - "grad_norm": 1.6975475816490415, - "learning_rate": 1.759163970880588e-06, - "loss": 1.167, - "step": 1836 - }, - { - "epoch": 0.2490340947603877, - "grad_norm": 2.237393199567373, - "learning_rate": 1.7588780630718358e-06, - "loss": 1.2006, - "step": 1837 - }, - { - "epoch": 0.2491696604080526, - "grad_norm": 1.6064712565819388, - "learning_rate": 1.7585920089194394e-06, - "loss": 1.1902, - "step": 1838 - }, - { - "epoch": 0.24930522605571748, - "grad_norm": 1.9959067513247652, - "learning_rate": 1.7583058084785625e-06, - "loss": 1.2142, - "step": 1839 - }, - { - "epoch": 0.24944079170338238, - "grad_norm": 1.7500628474871012, - "learning_rate": 1.758019461804396e-06, - "loss": 1.1787, - "step": 1840 - }, - { - "epoch": 0.24957635735104725, - "grad_norm": 1.5591458931669742, - "learning_rate": 1.7577329689521596e-06, - "loss": 1.1815, - "step": 1841 - }, - { - "epoch": 0.24971192299871212, - "grad_norm": 1.9415918528662648, - "learning_rate": 1.7574463299771011e-06, - "loss": 1.1723, - "step": 1842 - }, - { - "epoch": 0.24984748864637701, - "grad_norm": 2.3490481854399845, - "learning_rate": 1.7571595449344972e-06, - "loss": 1.1979, - "step": 1843 - }, - { - "epoch": 0.24998305429404188, - "grad_norm": 2.086094533353555, - "learning_rate": 1.7568726138796515e-06, - "loss": 1.2109, - "step": 1844 - }, - { - "epoch": 0.25011861994170675, - "grad_norm": 1.7169559678402486, - "learning_rate": 1.7565855368678965e-06, - "loss": 1.2032, - "step": 1845 - }, - { - "epoch": 0.2502541855893717, - "grad_norm": 2.3590007267406334, - "learning_rate": 1.756298313954593e-06, - "loss": 1.2356, - "step": 1846 - }, - { - "epoch": 0.25038975123703655, - "grad_norm": 1.6563728680264345, - "learning_rate": 1.7560109451951295e-06, - "loss": 1.1719, - "step": 1847 - }, - { - "epoch": 0.2505253168847014, - "grad_norm": 2.076519506943116, - "learning_rate": 1.7557234306449227e-06, - "loss": 1.1914, - "step": 1848 - }, - { - "epoch": 0.2506608825323663, - "grad_norm": 1.7143262198778149, - "learning_rate": 1.7554357703594178e-06, - "loss": 1.2197, - "step": 1849 - }, - { - "epoch": 0.25079644818003116, - "grad_norm": 1.865420801327151, - "learning_rate": 1.7551479643940874e-06, - "loss": 1.1895, - "step": 1850 - }, - { - "epoch": 0.2509320138276961, - "grad_norm": 1.489585723070958, - "learning_rate": 1.7548600128044328e-06, - "loss": 1.1906, - "step": 1851 - }, - { - "epoch": 0.25106757947536096, - "grad_norm": 3.4311812015809053, - "learning_rate": 1.7545719156459835e-06, - "loss": 1.1868, - "step": 1852 - }, - { - "epoch": 0.25120314512302583, - "grad_norm": 1.6129139443062457, - "learning_rate": 1.7542836729742964e-06, - "loss": 1.1882, - "step": 1853 - }, - { - "epoch": 0.2513387107706907, - "grad_norm": 1.5706514257913424, - "learning_rate": 1.753995284844957e-06, - "loss": 1.21, - "step": 1854 - }, - { - "epoch": 0.25147427641835557, - "grad_norm": 4.569138060378896, - "learning_rate": 1.7537067513135787e-06, - "loss": 1.2375, - "step": 1855 - }, - { - "epoch": 0.2516098420660205, - "grad_norm": 1.7913134045219632, - "learning_rate": 1.7534180724358026e-06, - "loss": 1.1788, - "step": 1856 - }, - { - "epoch": 0.25174540771368537, - "grad_norm": 3.2890760601152285, - "learning_rate": 1.7531292482672982e-06, - "loss": 1.2394, - "step": 1857 - }, - { - "epoch": 0.25188097336135024, - "grad_norm": 1.71206368837618, - "learning_rate": 1.7528402788637633e-06, - "loss": 1.1845, - "step": 1858 - }, - { - "epoch": 0.2520165390090151, - "grad_norm": 1.528301103250045, - "learning_rate": 1.7525511642809232e-06, - "loss": 1.184, - "step": 1859 - }, - { - "epoch": 0.25215210465668, - "grad_norm": 1.806588701858939, - "learning_rate": 1.7522619045745312e-06, - "loss": 1.1938, - "step": 1860 - }, - { - "epoch": 0.2522876703043449, - "grad_norm": 1.559793363496674, - "learning_rate": 1.751972499800369e-06, - "loss": 1.1562, - "step": 1861 - }, - { - "epoch": 0.2524232359520098, - "grad_norm": 1.605726718859392, - "learning_rate": 1.7516829500142461e-06, - "loss": 1.1703, - "step": 1862 - }, - { - "epoch": 0.25255880159967464, - "grad_norm": 1.608485927710475, - "learning_rate": 1.7513932552719995e-06, - "loss": 1.1765, - "step": 1863 - }, - { - "epoch": 0.2526943672473395, - "grad_norm": 5.694720899321809, - "learning_rate": 1.7511034156294948e-06, - "loss": 1.1917, - "step": 1864 - }, - { - "epoch": 0.2528299328950044, - "grad_norm": 1.826878690726368, - "learning_rate": 1.7508134311426253e-06, - "loss": 1.1734, - "step": 1865 - }, - { - "epoch": 0.2529654985426693, - "grad_norm": 2.206755246750216, - "learning_rate": 1.750523301867312e-06, - "loss": 1.1864, - "step": 1866 - }, - { - "epoch": 0.2531010641903342, - "grad_norm": 1.8519559895031703, - "learning_rate": 1.7502330278595043e-06, - "loss": 1.2315, - "step": 1867 - }, - { - "epoch": 0.25323662983799905, - "grad_norm": 2.329847785735694, - "learning_rate": 1.7499426091751792e-06, - "loss": 1.191, - "step": 1868 - }, - { - "epoch": 0.2533721954856639, - "grad_norm": 1.8100596771426842, - "learning_rate": 1.7496520458703416e-06, - "loss": 1.2478, - "step": 1869 - }, - { - "epoch": 0.2535077611333288, - "grad_norm": 1.7301729282557006, - "learning_rate": 1.7493613380010244e-06, - "loss": 1.1818, - "step": 1870 - }, - { - "epoch": 0.2536433267809937, - "grad_norm": 1.6595545667590552, - "learning_rate": 1.7490704856232882e-06, - "loss": 1.1962, - "step": 1871 - }, - { - "epoch": 0.2537788924286586, - "grad_norm": 1.77899817473183, - "learning_rate": 1.7487794887932216e-06, - "loss": 1.1976, - "step": 1872 - }, - { - "epoch": 0.25391445807632346, - "grad_norm": 1.5608200297789305, - "learning_rate": 1.7484883475669412e-06, - "loss": 1.2147, - "step": 1873 - }, - { - "epoch": 0.2540500237239883, - "grad_norm": 1.9176783642045296, - "learning_rate": 1.748197062000591e-06, - "loss": 1.1977, - "step": 1874 - }, - { - "epoch": 0.2541855893716532, - "grad_norm": 2.4696813874017667, - "learning_rate": 1.7479056321503436e-06, - "loss": 1.1484, - "step": 1875 - }, - { - "epoch": 0.2543211550193181, - "grad_norm": 1.563125237635046, - "learning_rate": 1.7476140580723984e-06, - "loss": 1.157, - "step": 1876 - }, - { - "epoch": 0.254456720666983, - "grad_norm": 1.8531539890142796, - "learning_rate": 1.7473223398229836e-06, - "loss": 1.2018, - "step": 1877 - }, - { - "epoch": 0.25459228631464786, - "grad_norm": 1.6228733728381488, - "learning_rate": 1.7470304774583542e-06, - "loss": 1.2057, - "step": 1878 - }, - { - "epoch": 0.25472785196231273, - "grad_norm": 1.4814496947019944, - "learning_rate": 1.7467384710347943e-06, - "loss": 1.1746, - "step": 1879 - }, - { - "epoch": 0.2548634176099776, - "grad_norm": 2.2326865337126773, - "learning_rate": 1.7464463206086144e-06, - "loss": 1.2008, - "step": 1880 - }, - { - "epoch": 0.25499898325764253, - "grad_norm": 2.034774868453193, - "learning_rate": 1.7461540262361538e-06, - "loss": 1.203, - "step": 1881 - }, - { - "epoch": 0.2551345489053074, - "grad_norm": 1.4405408078617963, - "learning_rate": 1.7458615879737791e-06, - "loss": 1.2423, - "step": 1882 - }, - { - "epoch": 0.25527011455297227, - "grad_norm": 3.156298832770999, - "learning_rate": 1.7455690058778844e-06, - "loss": 1.1979, - "step": 1883 - }, - { - "epoch": 0.25540568020063714, - "grad_norm": 2.3055070326134324, - "learning_rate": 1.7452762800048924e-06, - "loss": 1.1804, - "step": 1884 - }, - { - "epoch": 0.25554124584830207, - "grad_norm": 2.229454277381315, - "learning_rate": 1.7449834104112525e-06, - "loss": 1.2092, - "step": 1885 - }, - { - "epoch": 0.25567681149596694, - "grad_norm": 1.5652240956628032, - "learning_rate": 1.7446903971534423e-06, - "loss": 1.1865, - "step": 1886 - }, - { - "epoch": 0.2558123771436318, - "grad_norm": 1.6660615301329724, - "learning_rate": 1.7443972402879674e-06, - "loss": 1.1908, - "step": 1887 - }, - { - "epoch": 0.2559479427912967, - "grad_norm": 1.6808585576327417, - "learning_rate": 1.7441039398713605e-06, - "loss": 1.1963, - "step": 1888 - }, - { - "epoch": 0.25608350843896155, - "grad_norm": 1.5467596849211303, - "learning_rate": 1.7438104959601826e-06, - "loss": 1.1519, - "step": 1889 - }, - { - "epoch": 0.2562190740866265, - "grad_norm": 3.69015853324842, - "learning_rate": 1.7435169086110217e-06, - "loss": 1.1797, - "step": 1890 - }, - { - "epoch": 0.25635463973429135, - "grad_norm": 1.7706617480142048, - "learning_rate": 1.743223177880494e-06, - "loss": 1.1908, - "step": 1891 - }, - { - "epoch": 0.2564902053819562, - "grad_norm": 1.6231691497162457, - "learning_rate": 1.742929303825243e-06, - "loss": 1.2308, - "step": 1892 - }, - { - "epoch": 0.2566257710296211, - "grad_norm": 1.9855269091945533, - "learning_rate": 1.7426352865019402e-06, - "loss": 1.1719, - "step": 1893 - }, - { - "epoch": 0.25676133667728596, - "grad_norm": 1.799176782751917, - "learning_rate": 1.7423411259672841e-06, - "loss": 1.1918, - "step": 1894 - }, - { - "epoch": 0.2568969023249509, - "grad_norm": 2.0465651162837526, - "learning_rate": 1.7420468222780017e-06, - "loss": 1.1817, - "step": 1895 - }, - { - "epoch": 0.25703246797261575, - "grad_norm": 1.4893487485090278, - "learning_rate": 1.7417523754908473e-06, - "loss": 1.2212, - "step": 1896 - }, - { - "epoch": 0.2571680336202806, - "grad_norm": 1.890896093423901, - "learning_rate": 1.741457785662602e-06, - "loss": 1.1908, - "step": 1897 - }, - { - "epoch": 0.2573035992679455, - "grad_norm": 1.6175566263019743, - "learning_rate": 1.7411630528500757e-06, - "loss": 1.184, - "step": 1898 - }, - { - "epoch": 0.25743916491561036, - "grad_norm": 1.6520556262686763, - "learning_rate": 1.7408681771101048e-06, - "loss": 1.2404, - "step": 1899 - }, - { - "epoch": 0.2575747305632753, - "grad_norm": 2.1122157217879396, - "learning_rate": 1.740573158499554e-06, - "loss": 1.236, - "step": 1900 - }, - { - "epoch": 0.25771029621094016, - "grad_norm": 1.5213514849602392, - "learning_rate": 1.7402779970753154e-06, - "loss": 1.2032, - "step": 1901 - }, - { - "epoch": 0.25784586185860503, - "grad_norm": 1.9520475976161087, - "learning_rate": 1.7399826928943084e-06, - "loss": 1.1938, - "step": 1902 - }, - { - "epoch": 0.2579814275062699, - "grad_norm": 1.3801054875397702, - "learning_rate": 1.7396872460134805e-06, - "loss": 1.2022, - "step": 1903 - }, - { - "epoch": 0.25811699315393477, - "grad_norm": 1.9008585718455056, - "learning_rate": 1.7393916564898055e-06, - "loss": 1.2163, - "step": 1904 - }, - { - "epoch": 0.2582525588015997, - "grad_norm": 1.5983095060650903, - "learning_rate": 1.739095924380286e-06, - "loss": 1.1785, - "step": 1905 - }, - { - "epoch": 0.25838812444926457, - "grad_norm": 2.228462564286304, - "learning_rate": 1.7388000497419518e-06, - "loss": 1.2412, - "step": 1906 - }, - { - "epoch": 0.25852369009692944, - "grad_norm": 1.6483064746175413, - "learning_rate": 1.7385040326318597e-06, - "loss": 1.1703, - "step": 1907 - }, - { - "epoch": 0.2586592557445943, - "grad_norm": 1.5791599364332414, - "learning_rate": 1.738207873107094e-06, - "loss": 1.1739, - "step": 1908 - }, - { - "epoch": 0.2587948213922592, - "grad_norm": 2.0036923709334964, - "learning_rate": 1.7379115712247675e-06, - "loss": 1.2011, - "step": 1909 - }, - { - "epoch": 0.2589303870399241, - "grad_norm": 1.4852610218076565, - "learning_rate": 1.7376151270420186e-06, - "loss": 1.1761, - "step": 1910 - }, - { - "epoch": 0.259065952687589, - "grad_norm": 1.3702449279427933, - "learning_rate": 1.737318540616015e-06, - "loss": 1.1806, - "step": 1911 - }, - { - "epoch": 0.25920151833525384, - "grad_norm": 2.119310443418174, - "learning_rate": 1.7370218120039512e-06, - "loss": 1.1565, - "step": 1912 - }, - { - "epoch": 0.2593370839829187, - "grad_norm": 1.4777002582062737, - "learning_rate": 1.7367249412630484e-06, - "loss": 1.1854, - "step": 1913 - }, - { - "epoch": 0.2594726496305836, - "grad_norm": 2.2001975424656997, - "learning_rate": 1.7364279284505564e-06, - "loss": 1.1993, - "step": 1914 - }, - { - "epoch": 0.2596082152782485, - "grad_norm": 2.458182272053733, - "learning_rate": 1.736130773623751e-06, - "loss": 1.1365, - "step": 1915 - }, - { - "epoch": 0.2597437809259134, - "grad_norm": 1.562020078578069, - "learning_rate": 1.7358334768399368e-06, - "loss": 1.2149, - "step": 1916 - }, - { - "epoch": 0.25987934657357825, - "grad_norm": 1.6105973046841557, - "learning_rate": 1.7355360381564449e-06, - "loss": 1.2031, - "step": 1917 - }, - { - "epoch": 0.2600149122212431, - "grad_norm": 1.5189268595765233, - "learning_rate": 1.7352384576306336e-06, - "loss": 1.1936, - "step": 1918 - }, - { - "epoch": 0.260150477868908, - "grad_norm": 1.880591860972907, - "learning_rate": 1.7349407353198898e-06, - "loss": 1.2086, - "step": 1919 - }, - { - "epoch": 0.2602860435165729, - "grad_norm": 1.6743979957835917, - "learning_rate": 1.7346428712816262e-06, - "loss": 1.1613, - "step": 1920 - }, - { - "epoch": 0.2604216091642378, - "grad_norm": 1.9872728341646062, - "learning_rate": 1.734344865573284e-06, - "loss": 1.2136, - "step": 1921 - }, - { - "epoch": 0.26055717481190266, - "grad_norm": 2.268637977879567, - "learning_rate": 1.734046718252331e-06, - "loss": 1.1816, - "step": 1922 - }, - { - "epoch": 0.26069274045956753, - "grad_norm": 1.8785686317423995, - "learning_rate": 1.7337484293762627e-06, - "loss": 1.1635, - "step": 1923 - }, - { - "epoch": 0.26082830610723245, - "grad_norm": 1.6830443513280406, - "learning_rate": 1.7334499990026014e-06, - "loss": 1.1738, - "step": 1924 - }, - { - "epoch": 0.2609638717548973, - "grad_norm": 1.5578132445282213, - "learning_rate": 1.7331514271888973e-06, - "loss": 1.1892, - "step": 1925 - }, - { - "epoch": 0.2610994374025622, - "grad_norm": 1.5266502040744918, - "learning_rate": 1.7328527139927278e-06, - "loss": 1.1657, - "step": 1926 - }, - { - "epoch": 0.26123500305022707, - "grad_norm": 1.653365193232319, - "learning_rate": 1.7325538594716971e-06, - "loss": 1.199, - "step": 1927 - }, - { - "epoch": 0.26137056869789194, - "grad_norm": 2.9146467579667625, - "learning_rate": 1.7322548636834372e-06, - "loss": 1.2631, - "step": 1928 - }, - { - "epoch": 0.26150613434555686, - "grad_norm": 9.030360399092245, - "learning_rate": 1.7319557266856067e-06, - "loss": 1.1873, - "step": 1929 - }, - { - "epoch": 0.26164169999322173, - "grad_norm": 1.8895664567206647, - "learning_rate": 1.731656448535892e-06, - "loss": 1.1889, - "step": 1930 - }, - { - "epoch": 0.2617772656408866, - "grad_norm": 1.69212756737808, - "learning_rate": 1.7313570292920065e-06, - "loss": 1.1456, - "step": 1931 - }, - { - "epoch": 0.2619128312885515, - "grad_norm": 1.55934895472438, - "learning_rate": 1.731057469011691e-06, - "loss": 1.1954, - "step": 1932 - }, - { - "epoch": 0.26204839693621634, - "grad_norm": 1.4238402245932757, - "learning_rate": 1.7307577677527135e-06, - "loss": 1.2062, - "step": 1933 - }, - { - "epoch": 0.26218396258388127, - "grad_norm": 1.7953814389761782, - "learning_rate": 1.7304579255728684e-06, - "loss": 1.2016, - "step": 1934 - }, - { - "epoch": 0.26231952823154614, - "grad_norm": 1.4109062293297472, - "learning_rate": 1.7301579425299782e-06, - "loss": 1.1712, - "step": 1935 - }, - { - "epoch": 0.262455093879211, - "grad_norm": 2.148570769901404, - "learning_rate": 1.7298578186818925e-06, - "loss": 1.1801, - "step": 1936 - }, - { - "epoch": 0.2625906595268759, - "grad_norm": 1.610953342059103, - "learning_rate": 1.7295575540864875e-06, - "loss": 1.1946, - "step": 1937 - }, - { - "epoch": 0.26272622517454075, - "grad_norm": 2.5846969144524516, - "learning_rate": 1.729257148801667e-06, - "loss": 1.1541, - "step": 1938 - }, - { - "epoch": 0.2628617908222057, - "grad_norm": 1.630289677762051, - "learning_rate": 1.7289566028853616e-06, - "loss": 1.1656, - "step": 1939 - }, - { - "epoch": 0.26299735646987055, - "grad_norm": 1.6695357196690213, - "learning_rate": 1.7286559163955297e-06, - "loss": 1.2318, - "step": 1940 - }, - { - "epoch": 0.2631329221175354, - "grad_norm": 1.5489853930953343, - "learning_rate": 1.7283550893901557e-06, - "loss": 1.1972, - "step": 1941 - }, - { - "epoch": 0.2632684877652003, - "grad_norm": 1.8694701251980057, - "learning_rate": 1.728054121927252e-06, - "loss": 1.1545, - "step": 1942 - }, - { - "epoch": 0.26340405341286516, - "grad_norm": 2.2065872356442786, - "learning_rate": 1.727753014064858e-06, - "loss": 1.2294, - "step": 1943 - }, - { - "epoch": 0.2635396190605301, - "grad_norm": 1.7749296194891162, - "learning_rate": 1.7274517658610397e-06, - "loss": 1.1787, - "step": 1944 - }, - { - "epoch": 0.26367518470819495, - "grad_norm": 1.5695302537397104, - "learning_rate": 1.7271503773738906e-06, - "loss": 1.1754, - "step": 1945 - }, - { - "epoch": 0.2638107503558598, - "grad_norm": 1.472538467148062, - "learning_rate": 1.7268488486615307e-06, - "loss": 1.1677, - "step": 1946 - }, - { - "epoch": 0.2639463160035247, - "grad_norm": 2.7061770699601477, - "learning_rate": 1.726547179782108e-06, - "loss": 1.1804, - "step": 1947 - }, - { - "epoch": 0.26408188165118957, - "grad_norm": 2.5972446727747522, - "learning_rate": 1.7262453707937964e-06, - "loss": 1.1761, - "step": 1948 - }, - { - "epoch": 0.2642174472988545, - "grad_norm": 1.5812911586825953, - "learning_rate": 1.725943421754798e-06, - "loss": 1.2051, - "step": 1949 - }, - { - "epoch": 0.26435301294651936, - "grad_norm": 1.7871580041661546, - "learning_rate": 1.7256413327233408e-06, - "loss": 1.1811, - "step": 1950 - }, - { - "epoch": 0.26448857859418423, - "grad_norm": 2.2977873622357703, - "learning_rate": 1.7253391037576806e-06, - "loss": 1.1679, - "step": 1951 - }, - { - "epoch": 0.2646241442418491, - "grad_norm": 1.4162446167856595, - "learning_rate": 1.7250367349160994e-06, - "loss": 1.1833, - "step": 1952 - }, - { - "epoch": 0.26475970988951397, - "grad_norm": 1.6948537226921172, - "learning_rate": 1.724734226256907e-06, - "loss": 1.2328, - "step": 1953 - }, - { - "epoch": 0.2648952755371789, - "grad_norm": 1.9185820877845787, - "learning_rate": 1.7244315778384403e-06, - "loss": 1.199, - "step": 1954 - }, - { - "epoch": 0.26503084118484377, - "grad_norm": 3.940088186294935, - "learning_rate": 1.7241287897190616e-06, - "loss": 1.1909, - "step": 1955 - }, - { - "epoch": 0.26516640683250864, - "grad_norm": 1.9741516417306701, - "learning_rate": 1.7238258619571616e-06, - "loss": 1.2153, - "step": 1956 - }, - { - "epoch": 0.2653019724801735, - "grad_norm": 1.659230328574816, - "learning_rate": 1.7235227946111582e-06, - "loss": 1.1843, - "step": 1957 - }, - { - "epoch": 0.2654375381278384, - "grad_norm": 1.4416609341995261, - "learning_rate": 1.7232195877394948e-06, - "loss": 1.2289, - "step": 1958 - }, - { - "epoch": 0.2655731037755033, - "grad_norm": 1.999871336464743, - "learning_rate": 1.7229162414006426e-06, - "loss": 1.2091, - "step": 1959 - }, - { - "epoch": 0.2657086694231682, - "grad_norm": 1.6407417007496632, - "learning_rate": 1.7226127556530997e-06, - "loss": 1.201, - "step": 1960 - }, - { - "epoch": 0.26584423507083305, - "grad_norm": 1.6533415972684002, - "learning_rate": 1.7223091305553905e-06, - "loss": 1.2276, - "step": 1961 - }, - { - "epoch": 0.2659798007184979, - "grad_norm": 1.668007442997153, - "learning_rate": 1.7220053661660673e-06, - "loss": 1.1978, - "step": 1962 - }, - { - "epoch": 0.2661153663661628, - "grad_norm": 1.9750930637928574, - "learning_rate": 1.7217014625437085e-06, - "loss": 1.1756, - "step": 1963 - }, - { - "epoch": 0.2662509320138277, - "grad_norm": 1.8416910598082321, - "learning_rate": 1.721397419746919e-06, - "loss": 1.1984, - "step": 1964 - }, - { - "epoch": 0.2663864976614926, - "grad_norm": 1.6742950333274575, - "learning_rate": 1.721093237834332e-06, - "loss": 1.1772, - "step": 1965 - }, - { - "epoch": 0.26652206330915745, - "grad_norm": 1.7342107952510437, - "learning_rate": 1.7207889168646056e-06, - "loss": 1.1949, - "step": 1966 - }, - { - "epoch": 0.2666576289568223, - "grad_norm": 1.5776670772389574, - "learning_rate": 1.7204844568964262e-06, - "loss": 1.1836, - "step": 1967 - }, - { - "epoch": 0.26679319460448725, - "grad_norm": 2.4337526266627307, - "learning_rate": 1.7201798579885067e-06, - "loss": 1.1801, - "step": 1968 - }, - { - "epoch": 0.2669287602521521, - "grad_norm": 1.5401607386204241, - "learning_rate": 1.7198751201995862e-06, - "loss": 1.2301, - "step": 1969 - }, - { - "epoch": 0.267064325899817, - "grad_norm": 1.97544889062139, - "learning_rate": 1.7195702435884312e-06, - "loss": 1.1522, - "step": 1970 - }, - { - "epoch": 0.26719989154748186, - "grad_norm": 1.3907376262451205, - "learning_rate": 1.7192652282138346e-06, - "loss": 1.1853, - "step": 1971 - }, - { - "epoch": 0.26733545719514673, - "grad_norm": 1.6056729771749296, - "learning_rate": 1.7189600741346164e-06, - "loss": 1.186, - "step": 1972 - }, - { - "epoch": 0.26747102284281166, - "grad_norm": 1.6254880721500145, - "learning_rate": 1.7186547814096232e-06, - "loss": 1.192, - "step": 1973 - }, - { - "epoch": 0.2676065884904765, - "grad_norm": 1.8042638019186525, - "learning_rate": 1.7183493500977275e-06, - "loss": 1.2228, - "step": 1974 - }, - { - "epoch": 0.2677421541381414, - "grad_norm": 2.1272322029119954, - "learning_rate": 1.7180437802578302e-06, - "loss": 1.2055, - "step": 1975 - }, - { - "epoch": 0.26787771978580627, - "grad_norm": 1.4275244900274735, - "learning_rate": 1.717738071948858e-06, - "loss": 1.188, - "step": 1976 - }, - { - "epoch": 0.26801328543347114, - "grad_norm": 1.8545478393390604, - "learning_rate": 1.7174322252297638e-06, - "loss": 1.2351, - "step": 1977 - }, - { - "epoch": 0.26814885108113606, - "grad_norm": 1.4872109487518286, - "learning_rate": 1.7171262401595282e-06, - "loss": 1.1411, - "step": 1978 - }, - { - "epoch": 0.26828441672880093, - "grad_norm": 1.6750643679294117, - "learning_rate": 1.7168201167971579e-06, - "loss": 1.1697, - "step": 1979 - }, - { - "epoch": 0.2684199823764658, - "grad_norm": 1.696007116366721, - "learning_rate": 1.7165138552016861e-06, - "loss": 1.2039, - "step": 1980 - }, - { - "epoch": 0.2685555480241307, - "grad_norm": 1.4211552375257703, - "learning_rate": 1.7162074554321736e-06, - "loss": 1.1858, - "step": 1981 - }, - { - "epoch": 0.26869111367179555, - "grad_norm": 1.5223419114179875, - "learning_rate": 1.7159009175477061e-06, - "loss": 1.1931, - "step": 1982 - }, - { - "epoch": 0.26882667931946047, - "grad_norm": 1.6713043753262893, - "learning_rate": 1.715594241607398e-06, - "loss": 1.1766, - "step": 1983 - }, - { - "epoch": 0.26896224496712534, - "grad_norm": 2.004327858016542, - "learning_rate": 1.7152874276703888e-06, - "loss": 1.2229, - "step": 1984 - }, - { - "epoch": 0.2690978106147902, - "grad_norm": 1.5088331168131832, - "learning_rate": 1.7149804757958456e-06, - "loss": 1.1647, - "step": 1985 - }, - { - "epoch": 0.2692333762624551, - "grad_norm": 4.393546907090314, - "learning_rate": 1.714673386042961e-06, - "loss": 1.1888, - "step": 1986 - }, - { - "epoch": 0.26936894191011995, - "grad_norm": 2.142208610405135, - "learning_rate": 1.7143661584709553e-06, - "loss": 1.2261, - "step": 1987 - }, - { - "epoch": 0.2695045075577849, - "grad_norm": 1.6431224218901093, - "learning_rate": 1.714058793139075e-06, - "loss": 1.1691, - "step": 1988 - }, - { - "epoch": 0.26964007320544975, - "grad_norm": 1.9581167821880678, - "learning_rate": 1.7137512901065924e-06, - "loss": 1.2012, - "step": 1989 - }, - { - "epoch": 0.2697756388531146, - "grad_norm": 1.936656157322999, - "learning_rate": 1.713443649432808e-06, - "loss": 1.1963, - "step": 1990 - }, - { - "epoch": 0.2699112045007795, - "grad_norm": 1.6017263986061223, - "learning_rate": 1.7131358711770472e-06, - "loss": 1.1818, - "step": 1991 - }, - { - "epoch": 0.27004677014844436, - "grad_norm": 1.4596365210906028, - "learning_rate": 1.7128279553986626e-06, - "loss": 1.2157, - "step": 1992 - }, - { - "epoch": 0.2701823357961093, - "grad_norm": 1.5463918163931358, - "learning_rate": 1.7125199021570339e-06, - "loss": 1.1643, - "step": 1993 - }, - { - "epoch": 0.27031790144377416, - "grad_norm": 2.598697279830573, - "learning_rate": 1.712211711511566e-06, - "loss": 1.1969, - "step": 1994 - }, - { - "epoch": 0.270453467091439, - "grad_norm": 5.697155207364408, - "learning_rate": 1.7119033835216916e-06, - "loss": 1.1602, - "step": 1995 - }, - { - "epoch": 0.2705890327391039, - "grad_norm": 1.4922258914912339, - "learning_rate": 1.7115949182468693e-06, - "loss": 1.1798, - "step": 1996 - }, - { - "epoch": 0.27072459838676877, - "grad_norm": 3.8196694220345786, - "learning_rate": 1.7112863157465838e-06, - "loss": 1.1809, - "step": 1997 - }, - { - "epoch": 0.2708601640344337, - "grad_norm": 2.4054557958232308, - "learning_rate": 1.7109775760803466e-06, - "loss": 1.1768, - "step": 1998 - }, - { - "epoch": 0.27099572968209856, - "grad_norm": 1.67102754116193, - "learning_rate": 1.7106686993076962e-06, - "loss": 1.1802, - "step": 1999 - }, - { - "epoch": 0.27113129532976343, - "grad_norm": 1.5835503580826054, - "learning_rate": 1.710359685488197e-06, - "loss": 1.1893, - "step": 2000 - }, - { - "epoch": 0.2712668609774283, - "grad_norm": 1.3898760657863136, - "learning_rate": 1.7100505346814396e-06, - "loss": 1.1888, - "step": 2001 - }, - { - "epoch": 0.2714024266250932, - "grad_norm": 1.7653024645489073, - "learning_rate": 1.709741246947041e-06, - "loss": 1.1528, - "step": 2002 - }, - { - "epoch": 0.2715379922727581, - "grad_norm": 1.4898545875198783, - "learning_rate": 1.709431822344646e-06, - "loss": 1.1568, - "step": 2003 - }, - { - "epoch": 0.27167355792042297, - "grad_norm": 1.4750672539757093, - "learning_rate": 1.7091222609339234e-06, - "loss": 1.1762, - "step": 2004 - }, - { - "epoch": 0.27180912356808784, - "grad_norm": 15.037890770712737, - "learning_rate": 1.7088125627745704e-06, - "loss": 1.1656, - "step": 2005 - }, - { - "epoch": 0.2719446892157527, - "grad_norm": 1.640911715201682, - "learning_rate": 1.7085027279263098e-06, - "loss": 1.1885, - "step": 2006 - }, - { - "epoch": 0.27208025486341764, - "grad_norm": 2.2533330416567585, - "learning_rate": 1.7081927564488908e-06, - "loss": 1.2016, - "step": 2007 - }, - { - "epoch": 0.2722158205110825, - "grad_norm": 1.5611126126275892, - "learning_rate": 1.7078826484020886e-06, - "loss": 1.2347, - "step": 2008 - }, - { - "epoch": 0.2723513861587474, - "grad_norm": 1.4630660388287986, - "learning_rate": 1.7075724038457053e-06, - "loss": 1.1271, - "step": 2009 - }, - { - "epoch": 0.27248695180641225, - "grad_norm": 1.715012287834882, - "learning_rate": 1.7072620228395693e-06, - "loss": 1.1695, - "step": 2010 - }, - { - "epoch": 0.2726225174540771, - "grad_norm": 1.8098977198841975, - "learning_rate": 1.7069515054435351e-06, - "loss": 1.2131, - "step": 2011 - }, - { - "epoch": 0.27275808310174204, - "grad_norm": 1.814793796728554, - "learning_rate": 1.7066408517174832e-06, - "loss": 1.2183, - "step": 2012 - }, - { - "epoch": 0.2728936487494069, - "grad_norm": 1.6197766881787514, - "learning_rate": 1.706330061721321e-06, - "loss": 1.1772, - "step": 2013 - }, - { - "epoch": 0.2730292143970718, - "grad_norm": 2.2079284857824, - "learning_rate": 1.7060191355149817e-06, - "loss": 1.1769, - "step": 2014 - }, - { - "epoch": 0.27316478004473665, - "grad_norm": 2.657007119554157, - "learning_rate": 1.7057080731584252e-06, - "loss": 1.2444, - "step": 2015 - }, - { - "epoch": 0.2733003456924015, - "grad_norm": 1.9038406041899838, - "learning_rate": 1.7053968747116374e-06, - "loss": 1.1678, - "step": 2016 - }, - { - "epoch": 0.27343591134006645, - "grad_norm": 1.4638568466325355, - "learning_rate": 1.7050855402346303e-06, - "loss": 1.1553, - "step": 2017 - }, - { - "epoch": 0.2735714769877313, - "grad_norm": 1.5759010306113495, - "learning_rate": 1.7047740697874425e-06, - "loss": 1.1912, - "step": 2018 - }, - { - "epoch": 0.2737070426353962, - "grad_norm": 1.942771339768464, - "learning_rate": 1.7044624634301382e-06, - "loss": 1.2057, - "step": 2019 - }, - { - "epoch": 0.27384260828306106, - "grad_norm": 1.8149382338391082, - "learning_rate": 1.7041507212228088e-06, - "loss": 1.178, - "step": 2020 - }, - { - "epoch": 0.27397817393072593, - "grad_norm": 1.7434494756144343, - "learning_rate": 1.7038388432255709e-06, - "loss": 1.1832, - "step": 2021 - }, - { - "epoch": 0.27411373957839086, - "grad_norm": 1.5760589969470857, - "learning_rate": 1.7035268294985677e-06, - "loss": 1.2014, - "step": 2022 - }, - { - "epoch": 0.27424930522605573, - "grad_norm": 1.6174131772668392, - "learning_rate": 1.703214680101969e-06, - "loss": 1.1904, - "step": 2023 - }, - { - "epoch": 0.2743848708737206, - "grad_norm": 2.1288264063097384, - "learning_rate": 1.70290239509597e-06, - "loss": 1.1987, - "step": 2024 - }, - { - "epoch": 0.27452043652138547, - "grad_norm": 1.6448856810238437, - "learning_rate": 1.7025899745407925e-06, - "loss": 1.1788, - "step": 2025 - }, - { - "epoch": 0.27465600216905034, - "grad_norm": 1.7298974951110495, - "learning_rate": 1.7022774184966845e-06, - "loss": 1.1861, - "step": 2026 - }, - { - "epoch": 0.27479156781671527, - "grad_norm": 1.8391302030430474, - "learning_rate": 1.7019647270239194e-06, - "loss": 1.2012, - "step": 2027 - }, - { - "epoch": 0.27492713346438014, - "grad_norm": 2.3091252882676665, - "learning_rate": 1.7016519001827977e-06, - "loss": 1.2049, - "step": 2028 - }, - { - "epoch": 0.275062699112045, - "grad_norm": 1.5317250361791064, - "learning_rate": 1.7013389380336458e-06, - "loss": 1.206, - "step": 2029 - }, - { - "epoch": 0.2751982647597099, - "grad_norm": 1.6881415669918651, - "learning_rate": 1.7010258406368157e-06, - "loss": 1.2019, - "step": 2030 - }, - { - "epoch": 0.27533383040737475, - "grad_norm": 3.378315573417716, - "learning_rate": 1.7007126080526857e-06, - "loss": 1.1634, - "step": 2031 - }, - { - "epoch": 0.2754693960550397, - "grad_norm": 1.6460833025406625, - "learning_rate": 1.7003992403416603e-06, - "loss": 1.2428, - "step": 2032 - }, - { - "epoch": 0.27560496170270454, - "grad_norm": 3.5171393792539116, - "learning_rate": 1.70008573756417e-06, - "loss": 1.1731, - "step": 2033 - }, - { - "epoch": 0.2757405273503694, - "grad_norm": 1.8206227223668643, - "learning_rate": 1.6997720997806714e-06, - "loss": 1.2453, - "step": 2034 - }, - { - "epoch": 0.2758760929980343, - "grad_norm": 2.3371221758725427, - "learning_rate": 1.699458327051647e-06, - "loss": 1.2155, - "step": 2035 - }, - { - "epoch": 0.27601165864569915, - "grad_norm": 2.6366083846899664, - "learning_rate": 1.6991444194376054e-06, - "loss": 1.2362, - "step": 2036 - }, - { - "epoch": 0.2761472242933641, - "grad_norm": 1.5174168858232495, - "learning_rate": 1.6988303769990813e-06, - "loss": 1.1502, - "step": 2037 - }, - { - "epoch": 0.27628278994102895, - "grad_norm": 1.949013473519719, - "learning_rate": 1.6985161997966352e-06, - "loss": 1.1827, - "step": 2038 - }, - { - "epoch": 0.2764183555886938, - "grad_norm": 1.6671072179711797, - "learning_rate": 1.6982018878908536e-06, - "loss": 1.2027, - "step": 2039 - }, - { - "epoch": 0.2765539212363587, - "grad_norm": 1.4350800723338488, - "learning_rate": 1.6978874413423495e-06, - "loss": 1.1893, - "step": 2040 - }, - { - "epoch": 0.27668948688402356, - "grad_norm": 1.7796166021201278, - "learning_rate": 1.6975728602117609e-06, - "loss": 1.1769, - "step": 2041 - }, - { - "epoch": 0.2768250525316885, - "grad_norm": 2.2447580322391776, - "learning_rate": 1.6972581445597527e-06, - "loss": 1.1975, - "step": 2042 - }, - { - "epoch": 0.27696061817935336, - "grad_norm": 1.7888584592527919, - "learning_rate": 1.6969432944470148e-06, - "loss": 1.2046, - "step": 2043 - }, - { - "epoch": 0.2770961838270182, - "grad_norm": 1.8046316654247059, - "learning_rate": 1.6966283099342643e-06, - "loss": 1.188, - "step": 2044 - }, - { - "epoch": 0.2772317494746831, - "grad_norm": 2.172512058120931, - "learning_rate": 1.6963131910822427e-06, - "loss": 1.1563, - "step": 2045 - }, - { - "epoch": 0.277367315122348, - "grad_norm": 2.528516575409953, - "learning_rate": 1.6959979379517186e-06, - "loss": 1.1568, - "step": 2046 - }, - { - "epoch": 0.2775028807700129, - "grad_norm": 1.7834674794986773, - "learning_rate": 1.6956825506034863e-06, - "loss": 1.1759, - "step": 2047 - }, - { - "epoch": 0.27763844641767776, - "grad_norm": 1.786700554001862, - "learning_rate": 1.6953670290983656e-06, - "loss": 1.1463, - "step": 2048 - }, - { - "epoch": 0.27777401206534263, - "grad_norm": 4.178710594563023, - "learning_rate": 1.6950513734972018e-06, - "loss": 1.1599, - "step": 2049 - }, - { - "epoch": 0.2779095777130075, - "grad_norm": 1.7127750032941875, - "learning_rate": 1.6947355838608672e-06, - "loss": 1.1477, - "step": 2050 - }, - { - "epoch": 0.27804514336067243, - "grad_norm": 4.095253908226092, - "learning_rate": 1.6944196602502593e-06, - "loss": 1.2064, - "step": 2051 - }, - { - "epoch": 0.2781807090083373, - "grad_norm": 1.5697711673112849, - "learning_rate": 1.694103602726301e-06, - "loss": 1.2093, - "step": 2052 - }, - { - "epoch": 0.27831627465600217, - "grad_norm": 4.893848928021499, - "learning_rate": 1.6937874113499425e-06, - "loss": 1.1714, - "step": 2053 - }, - { - "epoch": 0.27845184030366704, - "grad_norm": 9.28167250276754, - "learning_rate": 1.6934710861821575e-06, - "loss": 1.2232, - "step": 2054 - }, - { - "epoch": 0.2785874059513319, - "grad_norm": 1.686751735802921, - "learning_rate": 1.6931546272839477e-06, - "loss": 1.1992, - "step": 2055 - }, - { - "epoch": 0.27872297159899684, - "grad_norm": 3.0594556793124186, - "learning_rate": 1.6928380347163396e-06, - "loss": 1.2004, - "step": 2056 - }, - { - "epoch": 0.2788585372466617, - "grad_norm": 1.6990238126974933, - "learning_rate": 1.6925213085403849e-06, - "loss": 1.1935, - "step": 2057 - }, - { - "epoch": 0.2789941028943266, - "grad_norm": 1.5683180071753926, - "learning_rate": 1.6922044488171627e-06, - "loss": 1.192, - "step": 2058 - }, - { - "epoch": 0.27912966854199145, - "grad_norm": 3.3309688802269917, - "learning_rate": 1.6918874556077764e-06, - "loss": 1.2156, - "step": 2059 - }, - { - "epoch": 0.2792652341896563, - "grad_norm": 1.4490199165508548, - "learning_rate": 1.6915703289733558e-06, - "loss": 1.2215, - "step": 2060 - }, - { - "epoch": 0.27940079983732125, - "grad_norm": 1.5701985791839532, - "learning_rate": 1.6912530689750559e-06, - "loss": 1.2142, - "step": 2061 - }, - { - "epoch": 0.2795363654849861, - "grad_norm": 2.558326140837903, - "learning_rate": 1.6909356756740586e-06, - "loss": 1.118, - "step": 2062 - }, - { - "epoch": 0.279671931132651, - "grad_norm": 2.1780762527410613, - "learning_rate": 1.6906181491315697e-06, - "loss": 1.1775, - "step": 2063 - }, - { - "epoch": 0.27980749678031586, - "grad_norm": 1.8496798506907293, - "learning_rate": 1.6903004894088223e-06, - "loss": 1.1885, - "step": 2064 - }, - { - "epoch": 0.2799430624279807, - "grad_norm": 1.9290254902910118, - "learning_rate": 1.6899826965670742e-06, - "loss": 1.2211, - "step": 2065 - }, - { - "epoch": 0.28007862807564565, - "grad_norm": 1.6166362894190358, - "learning_rate": 1.6896647706676098e-06, - "loss": 1.2058, - "step": 2066 - }, - { - "epoch": 0.2802141937233105, - "grad_norm": 1.5203334244429207, - "learning_rate": 1.6893467117717383e-06, - "loss": 1.1915, - "step": 2067 - }, - { - "epoch": 0.2803497593709754, - "grad_norm": 2.280717729373391, - "learning_rate": 1.6890285199407945e-06, - "loss": 1.2263, - "step": 2068 - }, - { - "epoch": 0.28048532501864026, - "grad_norm": 5.010421966459476, - "learning_rate": 1.6887101952361395e-06, - "loss": 1.221, - "step": 2069 - }, - { - "epoch": 0.28062089066630513, - "grad_norm": 1.545694125094765, - "learning_rate": 1.6883917377191602e-06, - "loss": 1.1985, - "step": 2070 - }, - { - "epoch": 0.28075645631397006, - "grad_norm": 2.7954683249671595, - "learning_rate": 1.6880731474512677e-06, - "loss": 1.1948, - "step": 2071 - }, - { - "epoch": 0.28089202196163493, - "grad_norm": 2.0459396812208754, - "learning_rate": 1.6877544244938998e-06, - "loss": 1.1922, - "step": 2072 - }, - { - "epoch": 0.2810275876092998, - "grad_norm": 2.906883807657313, - "learning_rate": 1.6874355689085205e-06, - "loss": 1.1972, - "step": 2073 - }, - { - "epoch": 0.28116315325696467, - "grad_norm": 2.1161013906393933, - "learning_rate": 1.6871165807566174e-06, - "loss": 1.1629, - "step": 2074 - }, - { - "epoch": 0.28129871890462954, - "grad_norm": 3.9794767986569353, - "learning_rate": 1.686797460099706e-06, - "loss": 1.165, - "step": 2075 - }, - { - "epoch": 0.28143428455229447, - "grad_norm": 2.464204241781635, - "learning_rate": 1.6864782069993252e-06, - "loss": 1.1957, - "step": 2076 - }, - { - "epoch": 0.28156985019995934, - "grad_norm": 1.6119431308239913, - "learning_rate": 1.6861588215170413e-06, - "loss": 1.1992, - "step": 2077 - }, - { - "epoch": 0.2817054158476242, - "grad_norm": 1.717908910873342, - "learning_rate": 1.6858393037144447e-06, - "loss": 1.146, - "step": 2078 - }, - { - "epoch": 0.2818409814952891, - "grad_norm": 2.184480232540292, - "learning_rate": 1.6855196536531522e-06, - "loss": 1.2074, - "step": 2079 - }, - { - "epoch": 0.28197654714295395, - "grad_norm": 1.8856832755093356, - "learning_rate": 1.6851998713948055e-06, - "loss": 1.1717, - "step": 2080 - }, - { - "epoch": 0.2821121127906189, - "grad_norm": 2.0382240288207885, - "learning_rate": 1.6848799570010725e-06, - "loss": 1.2074, - "step": 2081 - }, - { - "epoch": 0.28224767843828374, - "grad_norm": 1.7280610321556569, - "learning_rate": 1.6845599105336456e-06, - "loss": 1.1928, - "step": 2082 - }, - { - "epoch": 0.2823832440859486, - "grad_norm": 1.5030412120601344, - "learning_rate": 1.6842397320542436e-06, - "loss": 1.1865, - "step": 2083 - }, - { - "epoch": 0.2825188097336135, - "grad_norm": 1.7962922884141603, - "learning_rate": 1.6839194216246107e-06, - "loss": 1.2085, - "step": 2084 - }, - { - "epoch": 0.2826543753812784, - "grad_norm": 1.8042564241210972, - "learning_rate": 1.6835989793065152e-06, - "loss": 1.172, - "step": 2085 - }, - { - "epoch": 0.2827899410289433, - "grad_norm": 1.6194942194558668, - "learning_rate": 1.683278405161753e-06, - "loss": 1.161, - "step": 2086 - }, - { - "epoch": 0.28292550667660815, - "grad_norm": 1.7400490532123518, - "learning_rate": 1.682957699252144e-06, - "loss": 1.1601, - "step": 2087 - }, - { - "epoch": 0.283061072324273, - "grad_norm": 3.237773363237634, - "learning_rate": 1.6826368616395331e-06, - "loss": 1.1862, - "step": 2088 - }, - { - "epoch": 0.2831966379719379, - "grad_norm": 1.7728150507346314, - "learning_rate": 1.6823158923857924e-06, - "loss": 1.1451, - "step": 2089 - }, - { - "epoch": 0.2833322036196028, - "grad_norm": 2.082199964289682, - "learning_rate": 1.6819947915528173e-06, - "loss": 1.1518, - "step": 2090 - }, - { - "epoch": 0.2834677692672677, - "grad_norm": 2.8217225438763345, - "learning_rate": 1.6816735592025303e-06, - "loss": 1.2393, - "step": 2091 - }, - { - "epoch": 0.28360333491493256, - "grad_norm": 1.8377490005443708, - "learning_rate": 1.681352195396878e-06, - "loss": 1.175, - "step": 2092 - }, - { - "epoch": 0.28373890056259743, - "grad_norm": 1.4920119925549888, - "learning_rate": 1.681030700197833e-06, - "loss": 1.1869, - "step": 2093 - }, - { - "epoch": 0.2838744662102623, - "grad_norm": 1.5529914400384028, - "learning_rate": 1.6807090736673932e-06, - "loss": 1.2275, - "step": 2094 - }, - { - "epoch": 0.2840100318579272, - "grad_norm": 1.645941063645527, - "learning_rate": 1.6803873158675823e-06, - "loss": 1.1629, - "step": 2095 - }, - { - "epoch": 0.2841455975055921, - "grad_norm": 1.6619067871978555, - "learning_rate": 1.6800654268604478e-06, - "loss": 1.1484, - "step": 2096 - }, - { - "epoch": 0.28428116315325697, - "grad_norm": 2.2615904965712037, - "learning_rate": 1.6797434067080635e-06, - "loss": 1.1769, - "step": 2097 - }, - { - "epoch": 0.28441672880092184, - "grad_norm": 1.6340220397476881, - "learning_rate": 1.679421255472529e-06, - "loss": 1.1831, - "step": 2098 - }, - { - "epoch": 0.2845522944485867, - "grad_norm": 1.775883142388447, - "learning_rate": 1.6790989732159685e-06, - "loss": 1.1621, - "step": 2099 - }, - { - "epoch": 0.28468786009625163, - "grad_norm": 3.8930587643355543, - "learning_rate": 1.6787765600005317e-06, - "loss": 1.2275, - "step": 2100 - }, - { - "epoch": 0.2848234257439165, - "grad_norm": 1.7523969781256725, - "learning_rate": 1.6784540158883928e-06, - "loss": 1.1634, - "step": 2101 - }, - { - "epoch": 0.2849589913915814, - "grad_norm": 1.5565211990477799, - "learning_rate": 1.6781313409417527e-06, - "loss": 1.1781, - "step": 2102 - }, - { - "epoch": 0.28509455703924624, - "grad_norm": 3.119861519653099, - "learning_rate": 1.6778085352228362e-06, - "loss": 1.2036, - "step": 2103 - }, - { - "epoch": 0.2852301226869111, - "grad_norm": 1.6790522687523852, - "learning_rate": 1.6774855987938938e-06, - "loss": 1.1884, - "step": 2104 - }, - { - "epoch": 0.28536568833457604, - "grad_norm": 1.4709659294610595, - "learning_rate": 1.6771625317172018e-06, - "loss": 1.1593, - "step": 2105 - }, - { - "epoch": 0.2855012539822409, - "grad_norm": 1.9091410608829393, - "learning_rate": 1.6768393340550607e-06, - "loss": 1.1675, - "step": 2106 - }, - { - "epoch": 0.2856368196299058, - "grad_norm": 1.4070639264123368, - "learning_rate": 1.6765160058697962e-06, - "loss": 1.1908, - "step": 2107 - }, - { - "epoch": 0.28577238527757065, - "grad_norm": 2.2128526491370284, - "learning_rate": 1.6761925472237604e-06, - "loss": 1.1719, - "step": 2108 - }, - { - "epoch": 0.2859079509252355, - "grad_norm": 1.4968119108618396, - "learning_rate": 1.6758689581793295e-06, - "loss": 1.1482, - "step": 2109 - }, - { - "epoch": 0.28604351657290045, - "grad_norm": 5.187870739938606, - "learning_rate": 1.675545238798905e-06, - "loss": 1.1709, - "step": 2110 - }, - { - "epoch": 0.2861790822205653, - "grad_norm": 1.4261266927896192, - "learning_rate": 1.6752213891449134e-06, - "loss": 1.185, - "step": 2111 - }, - { - "epoch": 0.2863146478682302, - "grad_norm": 5.845974474682294, - "learning_rate": 1.674897409279807e-06, - "loss": 1.2078, - "step": 2112 - }, - { - "epoch": 0.28645021351589506, - "grad_norm": 1.96166965591953, - "learning_rate": 1.6745732992660622e-06, - "loss": 1.2013, - "step": 2113 - }, - { - "epoch": 0.28658577916355993, - "grad_norm": 1.8087376311109828, - "learning_rate": 1.6742490591661817e-06, - "loss": 1.2061, - "step": 2114 - }, - { - "epoch": 0.28672134481122485, - "grad_norm": 1.7975921609168997, - "learning_rate": 1.6739246890426922e-06, - "loss": 1.2, - "step": 2115 - }, - { - "epoch": 0.2868569104588897, - "grad_norm": 1.6047152817047308, - "learning_rate": 1.673600188958146e-06, - "loss": 1.1654, - "step": 2116 - }, - { - "epoch": 0.2869924761065546, - "grad_norm": 4.062464731824118, - "learning_rate": 1.6732755589751208e-06, - "loss": 1.2023, - "step": 2117 - }, - { - "epoch": 0.28712804175421947, - "grad_norm": 1.5666667008158446, - "learning_rate": 1.6729507991562181e-06, - "loss": 1.1764, - "step": 2118 - }, - { - "epoch": 0.28726360740188434, - "grad_norm": 1.5268301253635783, - "learning_rate": 1.6726259095640663e-06, - "loss": 1.1531, - "step": 2119 - }, - { - "epoch": 0.28739917304954926, - "grad_norm": 1.6898355069265987, - "learning_rate": 1.6723008902613168e-06, - "loss": 1.1791, - "step": 2120 - }, - { - "epoch": 0.28753473869721413, - "grad_norm": 4.177735032764537, - "learning_rate": 1.6719757413106475e-06, - "loss": 1.1904, - "step": 2121 - }, - { - "epoch": 0.287670304344879, - "grad_norm": 1.380986445747835, - "learning_rate": 1.6716504627747608e-06, - "loss": 1.1832, - "step": 2122 - }, - { - "epoch": 0.2878058699925439, - "grad_norm": 1.4552098426724664, - "learning_rate": 1.6713250547163839e-06, - "loss": 1.1643, - "step": 2123 - }, - { - "epoch": 0.2879414356402088, - "grad_norm": 1.6387652859248818, - "learning_rate": 1.6709995171982697e-06, - "loss": 1.1878, - "step": 2124 - }, - { - "epoch": 0.28807700128787367, - "grad_norm": 1.6984165428851645, - "learning_rate": 1.6706738502831948e-06, - "loss": 1.145, - "step": 2125 - }, - { - "epoch": 0.28821256693553854, - "grad_norm": 2.6044731378958854, - "learning_rate": 1.6703480540339617e-06, - "loss": 1.1729, - "step": 2126 - }, - { - "epoch": 0.2883481325832034, - "grad_norm": 1.4222194919326705, - "learning_rate": 1.670022128513398e-06, - "loss": 1.1843, - "step": 2127 - }, - { - "epoch": 0.2884836982308683, - "grad_norm": 1.3728353188492077, - "learning_rate": 1.6696960737843556e-06, - "loss": 1.1385, - "step": 2128 - }, - { - "epoch": 0.2886192638785332, - "grad_norm": 1.5321557658790825, - "learning_rate": 1.6693698899097117e-06, - "loss": 1.1906, - "step": 2129 - }, - { - "epoch": 0.2887548295261981, - "grad_norm": 1.7766364015141909, - "learning_rate": 1.6690435769523684e-06, - "loss": 1.1931, - "step": 2130 - }, - { - "epoch": 0.28889039517386295, - "grad_norm": 1.6487778872864252, - "learning_rate": 1.668717134975252e-06, - "loss": 1.2152, - "step": 2131 - }, - { - "epoch": 0.2890259608215278, - "grad_norm": 4.019791972647947, - "learning_rate": 1.668390564041315e-06, - "loss": 1.2107, - "step": 2132 - }, - { - "epoch": 0.2891615264691927, - "grad_norm": 1.5591095992501522, - "learning_rate": 1.6680638642135334e-06, - "loss": 1.1818, - "step": 2133 - }, - { - "epoch": 0.2892970921168576, - "grad_norm": 1.3721178565236052, - "learning_rate": 1.667737035554909e-06, - "loss": 1.1982, - "step": 2134 - }, - { - "epoch": 0.2894326577645225, - "grad_norm": 2.1408560051119547, - "learning_rate": 1.6674100781284683e-06, - "loss": 1.2319, - "step": 2135 - }, - { - "epoch": 0.28956822341218735, - "grad_norm": 1.4730397269423732, - "learning_rate": 1.6670829919972622e-06, - "loss": 1.1836, - "step": 2136 - }, - { - "epoch": 0.2897037890598522, - "grad_norm": 1.8443844826724085, - "learning_rate": 1.6667557772243668e-06, - "loss": 1.1861, - "step": 2137 - }, - { - "epoch": 0.2898393547075171, - "grad_norm": 1.7111041133565996, - "learning_rate": 1.6664284338728824e-06, - "loss": 1.1807, - "step": 2138 - }, - { - "epoch": 0.289974920355182, - "grad_norm": 2.559662952107668, - "learning_rate": 1.6661009620059355e-06, - "loss": 1.1728, - "step": 2139 - }, - { - "epoch": 0.2901104860028469, - "grad_norm": 1.5372400928420669, - "learning_rate": 1.6657733616866755e-06, - "loss": 1.1953, - "step": 2140 - }, - { - "epoch": 0.29024605165051176, - "grad_norm": 1.6569429956121038, - "learning_rate": 1.6654456329782783e-06, - "loss": 1.1588, - "step": 2141 - }, - { - "epoch": 0.29038161729817663, - "grad_norm": 1.5884050184071286, - "learning_rate": 1.6651177759439432e-06, - "loss": 1.1682, - "step": 2142 - }, - { - "epoch": 0.2905171829458415, - "grad_norm": 2.6739860281851193, - "learning_rate": 1.6647897906468953e-06, - "loss": 1.1696, - "step": 2143 - }, - { - "epoch": 0.2906527485935064, - "grad_norm": 1.5579988696209868, - "learning_rate": 1.6644616771503838e-06, - "loss": 1.1611, - "step": 2144 - }, - { - "epoch": 0.2907883142411713, - "grad_norm": 1.7716224654329218, - "learning_rate": 1.6641334355176827e-06, - "loss": 1.2092, - "step": 2145 - }, - { - "epoch": 0.29092387988883617, - "grad_norm": 1.5655497093316786, - "learning_rate": 1.6638050658120913e-06, - "loss": 1.2361, - "step": 2146 - }, - { - "epoch": 0.29105944553650104, - "grad_norm": 1.7029908271407974, - "learning_rate": 1.6634765680969323e-06, - "loss": 1.1855, - "step": 2147 - }, - { - "epoch": 0.2911950111841659, - "grad_norm": 1.9260714273325592, - "learning_rate": 1.6631479424355548e-06, - "loss": 1.1692, - "step": 2148 - }, - { - "epoch": 0.29133057683183083, - "grad_norm": 1.5062672763471887, - "learning_rate": 1.6628191888913308e-06, - "loss": 1.197, - "step": 2149 - }, - { - "epoch": 0.2914661424794957, - "grad_norm": 3.11811261303228, - "learning_rate": 1.662490307527658e-06, - "loss": 1.1906, - "step": 2150 - }, - { - "epoch": 0.2916017081271606, - "grad_norm": 1.7081963715599247, - "learning_rate": 1.6621612984079592e-06, - "loss": 1.1999, - "step": 2151 - }, - { - "epoch": 0.29173727377482545, - "grad_norm": 2.395185711486267, - "learning_rate": 1.6618321615956808e-06, - "loss": 1.176, - "step": 2152 - }, - { - "epoch": 0.2918728394224903, - "grad_norm": 1.6723119294442945, - "learning_rate": 1.661502897154294e-06, - "loss": 1.183, - "step": 2153 - }, - { - "epoch": 0.29200840507015524, - "grad_norm": 1.7508338087184163, - "learning_rate": 1.6611735051472948e-06, - "loss": 1.1923, - "step": 2154 - }, - { - "epoch": 0.2921439707178201, - "grad_norm": 1.508588602519883, - "learning_rate": 1.6608439856382046e-06, - "loss": 1.1501, - "step": 2155 - }, - { - "epoch": 0.292279536365485, - "grad_norm": 3.1739705310889264, - "learning_rate": 1.660514338690568e-06, - "loss": 1.1949, - "step": 2156 - }, - { - "epoch": 0.29241510201314985, - "grad_norm": 1.5114834393965324, - "learning_rate": 1.6601845643679548e-06, - "loss": 1.1444, - "step": 2157 - }, - { - "epoch": 0.2925506676608147, - "grad_norm": 1.5722158081646018, - "learning_rate": 1.6598546627339598e-06, - "loss": 1.1815, - "step": 2158 - }, - { - "epoch": 0.29268623330847965, - "grad_norm": 1.470640863523126, - "learning_rate": 1.6595246338522016e-06, - "loss": 1.2152, - "step": 2159 - }, - { - "epoch": 0.2928217989561445, - "grad_norm": 1.5030645395086537, - "learning_rate": 1.6591944777863237e-06, - "loss": 1.2109, - "step": 2160 - }, - { - "epoch": 0.2929573646038094, - "grad_norm": 1.6135586666415747, - "learning_rate": 1.6588641945999937e-06, - "loss": 1.1725, - "step": 2161 - }, - { - "epoch": 0.29309293025147426, - "grad_norm": 1.5460996107594243, - "learning_rate": 1.658533784356905e-06, - "loss": 1.1998, - "step": 2162 - }, - { - "epoch": 0.2932284958991392, - "grad_norm": 1.8738597638642738, - "learning_rate": 1.658203247120774e-06, - "loss": 1.2696, - "step": 2163 - }, - { - "epoch": 0.29336406154680406, - "grad_norm": 1.6534484402706113, - "learning_rate": 1.6578725829553425e-06, - "loss": 1.1811, - "step": 2164 - }, - { - "epoch": 0.2934996271944689, - "grad_norm": 1.900974075409964, - "learning_rate": 1.6575417919243765e-06, - "loss": 1.1601, - "step": 2165 - }, - { - "epoch": 0.2936351928421338, - "grad_norm": 1.6522487226577396, - "learning_rate": 1.6572108740916657e-06, - "loss": 1.1678, - "step": 2166 - }, - { - "epoch": 0.29377075848979867, - "grad_norm": 1.718593012018825, - "learning_rate": 1.656879829521026e-06, - "loss": 1.197, - "step": 2167 - }, - { - "epoch": 0.2939063241374636, - "grad_norm": 2.111875872103038, - "learning_rate": 1.656548658276296e-06, - "loss": 1.1904, - "step": 2168 - }, - { - "epoch": 0.29404188978512846, - "grad_norm": 1.4324709795612764, - "learning_rate": 1.6562173604213396e-06, - "loss": 1.1802, - "step": 2169 - }, - { - "epoch": 0.29417745543279333, - "grad_norm": 1.4635490634486161, - "learning_rate": 1.6558859360200454e-06, - "loss": 1.1881, - "step": 2170 - }, - { - "epoch": 0.2943130210804582, - "grad_norm": 1.5151935076499172, - "learning_rate": 1.6555543851363256e-06, - "loss": 1.1768, - "step": 2171 - }, - { - "epoch": 0.2944485867281231, - "grad_norm": 1.4948681815967815, - "learning_rate": 1.6552227078341171e-06, - "loss": 1.1738, - "step": 2172 - }, - { - "epoch": 0.294584152375788, - "grad_norm": 1.4509915760047964, - "learning_rate": 1.6548909041773817e-06, - "loss": 1.1357, - "step": 2173 - }, - { - "epoch": 0.29471971802345287, - "grad_norm": 2.368909656427332, - "learning_rate": 1.6545589742301048e-06, - "loss": 1.1752, - "step": 2174 - }, - { - "epoch": 0.29485528367111774, - "grad_norm": 1.7545878134828194, - "learning_rate": 1.6542269180562961e-06, - "loss": 1.2181, - "step": 2175 - }, - { - "epoch": 0.2949908493187826, - "grad_norm": 1.680492589709372, - "learning_rate": 1.6538947357199907e-06, - "loss": 1.141, - "step": 2176 - }, - { - "epoch": 0.2951264149664475, - "grad_norm": 1.5475363547760452, - "learning_rate": 1.6535624272852471e-06, - "loss": 1.181, - "step": 2177 - }, - { - "epoch": 0.2952619806141124, - "grad_norm": 1.7041851493867874, - "learning_rate": 1.653229992816148e-06, - "loss": 1.16, - "step": 2178 - }, - { - "epoch": 0.2953975462617773, - "grad_norm": 3.644869217193937, - "learning_rate": 1.6528974323768016e-06, - "loss": 1.166, - "step": 2179 - }, - { - "epoch": 0.29553311190944215, - "grad_norm": 1.7230281952566826, - "learning_rate": 1.6525647460313388e-06, - "loss": 1.1692, - "step": 2180 - }, - { - "epoch": 0.295668677557107, - "grad_norm": 1.8032940899520207, - "learning_rate": 1.6522319338439156e-06, - "loss": 1.1904, - "step": 2181 - }, - { - "epoch": 0.2958042432047719, - "grad_norm": 2.1457730185841046, - "learning_rate": 1.6518989958787125e-06, - "loss": 1.1765, - "step": 2182 - }, - { - "epoch": 0.2959398088524368, - "grad_norm": 1.5941099345681002, - "learning_rate": 1.6515659321999337e-06, - "loss": 1.1971, - "step": 2183 - }, - { - "epoch": 0.2960753745001017, - "grad_norm": 1.3964662902888996, - "learning_rate": 1.6512327428718082e-06, - "loss": 1.1685, - "step": 2184 - }, - { - "epoch": 0.29621094014776655, - "grad_norm": 1.7166562236447311, - "learning_rate": 1.6508994279585885e-06, - "loss": 1.136, - "step": 2185 - }, - { - "epoch": 0.2963465057954314, - "grad_norm": 1.461270427278781, - "learning_rate": 1.6505659875245524e-06, - "loss": 1.1894, - "step": 2186 - }, - { - "epoch": 0.2964820714430963, - "grad_norm": 9.03191809812402, - "learning_rate": 1.6502324216340004e-06, - "loss": 1.1818, - "step": 2187 - }, - { - "epoch": 0.2966176370907612, - "grad_norm": 1.6312968356736324, - "learning_rate": 1.6498987303512588e-06, - "loss": 1.1686, - "step": 2188 - }, - { - "epoch": 0.2967532027384261, - "grad_norm": 2.0184580944241417, - "learning_rate": 1.649564913740677e-06, - "loss": 1.1684, - "step": 2189 - }, - { - "epoch": 0.29688876838609096, - "grad_norm": 1.5069293586223662, - "learning_rate": 1.6492309718666289e-06, - "loss": 1.1953, - "step": 2190 - }, - { - "epoch": 0.29702433403375583, - "grad_norm": 1.5504198492317949, - "learning_rate": 1.6488969047935125e-06, - "loss": 1.1642, - "step": 2191 - }, - { - "epoch": 0.2971598996814207, - "grad_norm": 1.682234718643412, - "learning_rate": 1.6485627125857504e-06, - "loss": 1.1725, - "step": 2192 - }, - { - "epoch": 0.29729546532908563, - "grad_norm": 1.9877740814869254, - "learning_rate": 1.6482283953077884e-06, - "loss": 1.1729, - "step": 2193 - }, - { - "epoch": 0.2974310309767505, - "grad_norm": 1.510768049065923, - "learning_rate": 1.6478939530240971e-06, - "loss": 1.1829, - "step": 2194 - }, - { - "epoch": 0.29756659662441537, - "grad_norm": 1.7763455433091624, - "learning_rate": 1.6475593857991714e-06, - "loss": 1.1851, - "step": 2195 - }, - { - "epoch": 0.29770216227208024, - "grad_norm": 7.674691708094931, - "learning_rate": 1.6472246936975293e-06, - "loss": 1.1766, - "step": 2196 - }, - { - "epoch": 0.2978377279197451, - "grad_norm": 1.593162385666502, - "learning_rate": 1.6468898767837142e-06, - "loss": 1.1717, - "step": 2197 - }, - { - "epoch": 0.29797329356741004, - "grad_norm": 1.5909070494527071, - "learning_rate": 1.6465549351222924e-06, - "loss": 1.2426, - "step": 2198 - }, - { - "epoch": 0.2981088592150749, - "grad_norm": 1.8738212662421183, - "learning_rate": 1.646219868777855e-06, - "loss": 1.145, - "step": 2199 - }, - { - "epoch": 0.2982444248627398, - "grad_norm": 1.620098181552219, - "learning_rate": 1.645884677815017e-06, - "loss": 1.1588, - "step": 2200 - }, - { - "epoch": 0.29837999051040465, - "grad_norm": 1.5399506366532598, - "learning_rate": 1.645549362298417e-06, - "loss": 1.1879, - "step": 2201 - }, - { - "epoch": 0.2985155561580696, - "grad_norm": 1.6601486043868814, - "learning_rate": 1.6452139222927181e-06, - "loss": 1.1962, - "step": 2202 - }, - { - "epoch": 0.29865112180573444, - "grad_norm": 1.5109089007318248, - "learning_rate": 1.6448783578626076e-06, - "loss": 1.1531, - "step": 2203 - }, - { - "epoch": 0.2987866874533993, - "grad_norm": 1.7941075209668806, - "learning_rate": 1.6445426690727959e-06, - "loss": 1.1493, - "step": 2204 - }, - { - "epoch": 0.2989222531010642, - "grad_norm": 1.5155919435176042, - "learning_rate": 1.6442068559880182e-06, - "loss": 1.167, - "step": 2205 - }, - { - "epoch": 0.29905781874872905, - "grad_norm": 1.5373517836757657, - "learning_rate": 1.6438709186730333e-06, - "loss": 1.1437, - "step": 2206 - }, - { - "epoch": 0.299193384396394, - "grad_norm": 2.865111793900049, - "learning_rate": 1.6435348571926245e-06, - "loss": 1.1712, - "step": 2207 - }, - { - "epoch": 0.29932895004405885, - "grad_norm": 1.8922695981444253, - "learning_rate": 1.6431986716115982e-06, - "loss": 1.1873, - "step": 2208 - }, - { - "epoch": 0.2994645156917237, - "grad_norm": 1.7455592257429309, - "learning_rate": 1.6428623619947848e-06, - "loss": 1.2345, - "step": 2209 - }, - { - "epoch": 0.2996000813393886, - "grad_norm": 1.4032804638091825, - "learning_rate": 1.6425259284070395e-06, - "loss": 1.1882, - "step": 2210 - }, - { - "epoch": 0.29973564698705346, - "grad_norm": 1.7367070218942253, - "learning_rate": 1.6421893709132405e-06, - "loss": 1.1394, - "step": 2211 - }, - { - "epoch": 0.2998712126347184, - "grad_norm": 1.5492656383007153, - "learning_rate": 1.641852689578291e-06, - "loss": 1.1957, - "step": 2212 - }, - { - "epoch": 0.30000677828238326, - "grad_norm": 1.8646349835115428, - "learning_rate": 1.6415158844671163e-06, - "loss": 1.1967, - "step": 2213 - }, - { - "epoch": 0.3001423439300481, - "grad_norm": 2.772527790230075, - "learning_rate": 1.6411789556446673e-06, - "loss": 1.1583, - "step": 2214 - }, - { - "epoch": 0.300277909577713, - "grad_norm": 1.4405273525513895, - "learning_rate": 1.640841903175918e-06, - "loss": 1.2045, - "step": 2215 - }, - { - "epoch": 0.30041347522537787, - "grad_norm": 1.5489151854552172, - "learning_rate": 1.640504727125866e-06, - "loss": 1.1688, - "step": 2216 - }, - { - "epoch": 0.3005490408730428, - "grad_norm": 2.007738357966311, - "learning_rate": 1.640167427559533e-06, - "loss": 1.2325, - "step": 2217 - }, - { - "epoch": 0.30068460652070766, - "grad_norm": 1.7308720487844722, - "learning_rate": 1.639830004541965e-06, - "loss": 1.1649, - "step": 2218 - }, - { - "epoch": 0.30082017216837253, - "grad_norm": 1.7311825807519343, - "learning_rate": 1.6394924581382312e-06, - "loss": 1.194, - "step": 2219 - }, - { - "epoch": 0.3009557378160374, - "grad_norm": 1.6473467606238767, - "learning_rate": 1.6391547884134247e-06, - "loss": 1.199, - "step": 2220 - }, - { - "epoch": 0.3010913034637023, - "grad_norm": 1.8465730520814907, - "learning_rate": 1.6388169954326623e-06, - "loss": 1.1959, - "step": 2221 - }, - { - "epoch": 0.3012268691113672, - "grad_norm": 1.5214972186551377, - "learning_rate": 1.6384790792610849e-06, - "loss": 1.1933, - "step": 2222 - }, - { - "epoch": 0.30136243475903207, - "grad_norm": 1.8155207582150754, - "learning_rate": 1.6381410399638571e-06, - "loss": 1.1555, - "step": 2223 - }, - { - "epoch": 0.30149800040669694, - "grad_norm": 1.7678329106837112, - "learning_rate": 1.6378028776061666e-06, - "loss": 1.203, - "step": 2224 - }, - { - "epoch": 0.3016335660543618, - "grad_norm": 4.5321879325399195, - "learning_rate": 1.6374645922532257e-06, - "loss": 1.1559, - "step": 2225 - }, - { - "epoch": 0.3017691317020267, - "grad_norm": 1.5634660993110618, - "learning_rate": 1.63712618397027e-06, - "loss": 1.1903, - "step": 2226 - }, - { - "epoch": 0.3019046973496916, - "grad_norm": 1.4662694618989305, - "learning_rate": 1.636787652822559e-06, - "loss": 1.1724, - "step": 2227 - }, - { - "epoch": 0.3020402629973565, - "grad_norm": 2.004891687870438, - "learning_rate": 1.6364489988753757e-06, - "loss": 1.1679, - "step": 2228 - }, - { - "epoch": 0.30217582864502135, - "grad_norm": 1.4021933123751527, - "learning_rate": 1.6361102221940268e-06, - "loss": 1.1911, - "step": 2229 - }, - { - "epoch": 0.3023113942926862, - "grad_norm": 1.551558396233573, - "learning_rate": 1.6357713228438428e-06, - "loss": 1.1306, - "step": 2230 - }, - { - "epoch": 0.3024469599403511, - "grad_norm": 1.4830443959178379, - "learning_rate": 1.6354323008901773e-06, - "loss": 1.1599, - "step": 2231 - }, - { - "epoch": 0.302582525588016, - "grad_norm": 1.7488268826619244, - "learning_rate": 1.6350931563984087e-06, - "loss": 1.1852, - "step": 2232 - }, - { - "epoch": 0.3027180912356809, - "grad_norm": 2.0679886265604455, - "learning_rate": 1.6347538894339379e-06, - "loss": 1.1634, - "step": 2233 - }, - { - "epoch": 0.30285365688334576, - "grad_norm": 1.6356591324010776, - "learning_rate": 1.6344145000621898e-06, - "loss": 1.1501, - "step": 2234 - }, - { - "epoch": 0.3029892225310106, - "grad_norm": 1.878891681889651, - "learning_rate": 1.6340749883486136e-06, - "loss": 1.2101, - "step": 2235 - }, - { - "epoch": 0.3031247881786755, - "grad_norm": 1.5239402565036233, - "learning_rate": 1.6337353543586808e-06, - "loss": 1.1959, - "step": 2236 - }, - { - "epoch": 0.3032603538263404, - "grad_norm": 1.6148709851262455, - "learning_rate": 1.6333955981578868e-06, - "loss": 1.189, - "step": 2237 - }, - { - "epoch": 0.3033959194740053, - "grad_norm": 2.124616178582552, - "learning_rate": 1.633055719811752e-06, - "loss": 1.1857, - "step": 2238 - }, - { - "epoch": 0.30353148512167016, - "grad_norm": 2.4759417794089758, - "learning_rate": 1.6327157193858182e-06, - "loss": 1.2211, - "step": 2239 - }, - { - "epoch": 0.30366705076933503, - "grad_norm": 2.074203608020143, - "learning_rate": 1.6323755969456526e-06, - "loss": 1.1702, - "step": 2240 - }, - { - "epoch": 0.30380261641699996, - "grad_norm": 1.5284584807615968, - "learning_rate": 1.6320353525568447e-06, - "loss": 1.1445, - "step": 2241 - }, - { - "epoch": 0.30393818206466483, - "grad_norm": 1.6983740572292625, - "learning_rate": 1.6316949862850082e-06, - "loss": 1.1657, - "step": 2242 - }, - { - "epoch": 0.3040737477123297, - "grad_norm": 1.7479912591183382, - "learning_rate": 1.6313544981957797e-06, - "loss": 1.215, - "step": 2243 - }, - { - "epoch": 0.30420931335999457, - "grad_norm": 1.6664489850291668, - "learning_rate": 1.6310138883548199e-06, - "loss": 1.162, - "step": 2244 - }, - { - "epoch": 0.30434487900765944, - "grad_norm": 2.0383598258729143, - "learning_rate": 1.6306731568278126e-06, - "loss": 1.1748, - "step": 2245 - }, - { - "epoch": 0.30448044465532437, - "grad_norm": 2.871595089557037, - "learning_rate": 1.6303323036804652e-06, - "loss": 1.2126, - "step": 2246 - }, - { - "epoch": 0.30461601030298924, - "grad_norm": 1.4793349557650004, - "learning_rate": 1.6299913289785087e-06, - "loss": 1.1531, - "step": 2247 - }, - { - "epoch": 0.3047515759506541, - "grad_norm": 3.9711181230727575, - "learning_rate": 1.6296502327876974e-06, - "loss": 1.1944, - "step": 2248 - }, - { - "epoch": 0.304887141598319, - "grad_norm": 2.028232427994707, - "learning_rate": 1.6293090151738086e-06, - "loss": 1.1784, - "step": 2249 - }, - { - "epoch": 0.30502270724598385, - "grad_norm": 1.9069654033596326, - "learning_rate": 1.6289676762026438e-06, - "loss": 1.1558, - "step": 2250 - }, - { - "epoch": 0.3051582728936488, - "grad_norm": 2.076427219593821, - "learning_rate": 1.6286262159400275e-06, - "loss": 1.1779, - "step": 2251 - }, - { - "epoch": 0.30529383854131364, - "grad_norm": 1.56477639107368, - "learning_rate": 1.6282846344518073e-06, - "loss": 1.1843, - "step": 2252 - }, - { - "epoch": 0.3054294041889785, - "grad_norm": 2.0519512216032108, - "learning_rate": 1.627942931803855e-06, - "loss": 1.1703, - "step": 2253 - }, - { - "epoch": 0.3055649698366434, - "grad_norm": 1.7306838426181153, - "learning_rate": 1.627601108062065e-06, - "loss": 1.1693, - "step": 2254 - }, - { - "epoch": 0.30570053548430826, - "grad_norm": 1.63197554643986, - "learning_rate": 1.6272591632923548e-06, - "loss": 1.1796, - "step": 2255 - }, - { - "epoch": 0.3058361011319732, - "grad_norm": 1.7189607065223398, - "learning_rate": 1.6269170975606665e-06, - "loss": 1.1537, - "step": 2256 - }, - { - "epoch": 0.30597166677963805, - "grad_norm": 1.5092914459386333, - "learning_rate": 1.6265749109329647e-06, - "loss": 1.185, - "step": 2257 - }, - { - "epoch": 0.3061072324273029, - "grad_norm": 1.4780163930503543, - "learning_rate": 1.6262326034752371e-06, - "loss": 1.1372, - "step": 2258 - }, - { - "epoch": 0.3062427980749678, - "grad_norm": 1.4498386758978106, - "learning_rate": 1.6258901752534947e-06, - "loss": 1.1997, - "step": 2259 - }, - { - "epoch": 0.30637836372263266, - "grad_norm": 1.6832735097674074, - "learning_rate": 1.625547626333773e-06, - "loss": 1.1954, - "step": 2260 - }, - { - "epoch": 0.3065139293702976, - "grad_norm": 1.8843429826472684, - "learning_rate": 1.6252049567821294e-06, - "loss": 1.1965, - "step": 2261 - }, - { - "epoch": 0.30664949501796246, - "grad_norm": 1.9281034762771578, - "learning_rate": 1.6248621666646448e-06, - "loss": 1.1686, - "step": 2262 - }, - { - "epoch": 0.30678506066562733, - "grad_norm": 1.7006632762529028, - "learning_rate": 1.6245192560474237e-06, - "loss": 1.1401, - "step": 2263 - }, - { - "epoch": 0.3069206263132922, - "grad_norm": 1.4912200993312832, - "learning_rate": 1.6241762249965935e-06, - "loss": 1.1645, - "step": 2264 - }, - { - "epoch": 0.30705619196095707, - "grad_norm": 1.7979518635513156, - "learning_rate": 1.6238330735783054e-06, - "loss": 1.132, - "step": 2265 - }, - { - "epoch": 0.307191757608622, - "grad_norm": 1.8731677505572497, - "learning_rate": 1.6234898018587336e-06, - "loss": 1.1831, - "step": 2266 - }, - { - "epoch": 0.30732732325628687, - "grad_norm": 1.6164338712643667, - "learning_rate": 1.6231464099040748e-06, - "loss": 1.1631, - "step": 2267 - }, - { - "epoch": 0.30746288890395174, - "grad_norm": 1.9767680923414377, - "learning_rate": 1.6228028977805495e-06, - "loss": 1.1824, - "step": 2268 - }, - { - "epoch": 0.3075984545516166, - "grad_norm": 1.4911346375283991, - "learning_rate": 1.6224592655544016e-06, - "loss": 1.1813, - "step": 2269 - }, - { - "epoch": 0.3077340201992815, - "grad_norm": 1.885006011944974, - "learning_rate": 1.6221155132918979e-06, - "loss": 1.2209, - "step": 2270 - }, - { - "epoch": 0.3078695858469464, - "grad_norm": 4.8203723055687, - "learning_rate": 1.6217716410593281e-06, - "loss": 1.1846, - "step": 2271 - }, - { - "epoch": 0.3080051514946113, - "grad_norm": 1.3461631958768916, - "learning_rate": 1.621427648923005e-06, - "loss": 1.1815, - "step": 2272 - }, - { - "epoch": 0.30814071714227614, - "grad_norm": 3.57680347768065, - "learning_rate": 1.6210835369492652e-06, - "loss": 1.1605, - "step": 2273 - }, - { - "epoch": 0.308276282789941, - "grad_norm": 1.9309547235826354, - "learning_rate": 1.6207393052044678e-06, - "loss": 1.1853, - "step": 2274 - }, - { - "epoch": 0.3084118484376059, - "grad_norm": 2.01111197167501, - "learning_rate": 1.6203949537549954e-06, - "loss": 1.1899, - "step": 2275 - }, - { - "epoch": 0.3085474140852708, - "grad_norm": 1.5124705205819138, - "learning_rate": 1.6200504826672533e-06, - "loss": 1.1737, - "step": 2276 - }, - { - "epoch": 0.3086829797329357, - "grad_norm": 1.408477043646839, - "learning_rate": 1.6197058920076696e-06, - "loss": 1.1879, - "step": 2277 - }, - { - "epoch": 0.30881854538060055, - "grad_norm": 4.409721317152355, - "learning_rate": 1.6193611818426968e-06, - "loss": 1.1963, - "step": 2278 - }, - { - "epoch": 0.3089541110282654, - "grad_norm": 1.5985974845674158, - "learning_rate": 1.6190163522388088e-06, - "loss": 1.1715, - "step": 2279 - }, - { - "epoch": 0.3090896766759303, - "grad_norm": 1.5413308932422158, - "learning_rate": 1.6186714032625033e-06, - "loss": 1.1363, - "step": 2280 - }, - { - "epoch": 0.3092252423235952, - "grad_norm": 1.685002545307349, - "learning_rate": 1.6183263349803014e-06, - "loss": 1.1964, - "step": 2281 - }, - { - "epoch": 0.3093608079712601, - "grad_norm": 1.691496011624455, - "learning_rate": 1.6179811474587464e-06, - "loss": 1.1899, - "step": 2282 - }, - { - "epoch": 0.30949637361892496, - "grad_norm": 2.2323438100246644, - "learning_rate": 1.6176358407644055e-06, - "loss": 1.1892, - "step": 2283 - }, - { - "epoch": 0.30963193926658983, - "grad_norm": 1.6909169945917093, - "learning_rate": 1.6172904149638677e-06, - "loss": 1.2243, - "step": 2284 - }, - { - "epoch": 0.30976750491425475, - "grad_norm": 1.8975948950418353, - "learning_rate": 1.616944870123746e-06, - "loss": 1.1819, - "step": 2285 - }, - { - "epoch": 0.3099030705619196, - "grad_norm": 1.5005015795620948, - "learning_rate": 1.616599206310676e-06, - "loss": 1.1792, - "step": 2286 - }, - { - "epoch": 0.3100386362095845, - "grad_norm": 1.839744109878868, - "learning_rate": 1.616253423591316e-06, - "loss": 1.1805, - "step": 2287 - }, - { - "epoch": 0.31017420185724937, - "grad_norm": 2.3728940794234297, - "learning_rate": 1.6159075220323482e-06, - "loss": 1.1871, - "step": 2288 - }, - { - "epoch": 0.31030976750491424, - "grad_norm": 1.7662184364437283, - "learning_rate": 1.6155615017004762e-06, - "loss": 1.1649, - "step": 2289 - }, - { - "epoch": 0.31044533315257916, - "grad_norm": 1.5642968713253589, - "learning_rate": 1.6152153626624275e-06, - "loss": 1.1907, - "step": 2290 - }, - { - "epoch": 0.31058089880024403, - "grad_norm": 1.5109249948360797, - "learning_rate": 1.6148691049849523e-06, - "loss": 1.1485, - "step": 2291 - }, - { - "epoch": 0.3107164644479089, - "grad_norm": 1.8389085247563501, - "learning_rate": 1.6145227287348238e-06, - "loss": 1.1912, - "step": 2292 - }, - { - "epoch": 0.3108520300955738, - "grad_norm": 2.111970094811141, - "learning_rate": 1.6141762339788376e-06, - "loss": 1.1853, - "step": 2293 - }, - { - "epoch": 0.31098759574323864, - "grad_norm": 1.6186001360672304, - "learning_rate": 1.6138296207838127e-06, - "loss": 1.2111, - "step": 2294 - }, - { - "epoch": 0.31112316139090357, - "grad_norm": 1.6850691507314608, - "learning_rate": 1.6134828892165907e-06, - "loss": 1.1937, - "step": 2295 - }, - { - "epoch": 0.31125872703856844, - "grad_norm": 1.6744835537414455, - "learning_rate": 1.6131360393440362e-06, - "loss": 1.1416, - "step": 2296 - }, - { - "epoch": 0.3113942926862333, - "grad_norm": 1.4488850293180109, - "learning_rate": 1.6127890712330364e-06, - "loss": 1.1585, - "step": 2297 - }, - { - "epoch": 0.3115298583338982, - "grad_norm": 1.723607085187055, - "learning_rate": 1.6124419849505013e-06, - "loss": 1.1602, - "step": 2298 - }, - { - "epoch": 0.31166542398156305, - "grad_norm": 1.5097934806168283, - "learning_rate": 1.6120947805633636e-06, - "loss": 1.1837, - "step": 2299 - }, - { - "epoch": 0.311800989629228, - "grad_norm": 6.539789627040813, - "learning_rate": 1.6117474581385788e-06, - "loss": 1.1544, - "step": 2300 - }, - { - "epoch": 0.31193655527689285, - "grad_norm": 2.4504632990273407, - "learning_rate": 1.611400017743126e-06, - "loss": 1.1586, - "step": 2301 - }, - { - "epoch": 0.3120721209245577, - "grad_norm": 1.9615578692670996, - "learning_rate": 1.6110524594440055e-06, - "loss": 1.1561, - "step": 2302 - }, - { - "epoch": 0.3122076865722226, - "grad_norm": 2.390178530444945, - "learning_rate": 1.6107047833082418e-06, - "loss": 1.1514, - "step": 2303 - }, - { - "epoch": 0.31234325221988746, - "grad_norm": 1.5989217776436109, - "learning_rate": 1.6103569894028813e-06, - "loss": 1.1907, - "step": 2304 - }, - { - "epoch": 0.3124788178675524, - "grad_norm": 1.6686825721764456, - "learning_rate": 1.6100090777949928e-06, - "loss": 1.1441, - "step": 2305 - }, - { - "epoch": 0.31261438351521725, - "grad_norm": 1.5825243599452143, - "learning_rate": 1.6096610485516693e-06, - "loss": 1.1769, - "step": 2306 - }, - { - "epoch": 0.3127499491628821, - "grad_norm": 1.5986638031577636, - "learning_rate": 1.6093129017400248e-06, - "loss": 1.1932, - "step": 2307 - }, - { - "epoch": 0.312885514810547, - "grad_norm": 1.7706742056834075, - "learning_rate": 1.6089646374271965e-06, - "loss": 1.2051, - "step": 2308 - }, - { - "epoch": 0.31302108045821186, - "grad_norm": 1.5425079632167962, - "learning_rate": 1.6086162556803453e-06, - "loss": 1.1647, - "step": 2309 - }, - { - "epoch": 0.3131566461058768, - "grad_norm": 1.941703076602271, - "learning_rate": 1.608267756566653e-06, - "loss": 1.176, - "step": 2310 - }, - { - "epoch": 0.31329221175354166, - "grad_norm": 2.137755517178777, - "learning_rate": 1.607919140153325e-06, - "loss": 1.1631, - "step": 2311 - }, - { - "epoch": 0.31342777740120653, - "grad_norm": 1.7538880700143313, - "learning_rate": 1.6075704065075897e-06, - "loss": 1.1917, - "step": 2312 - }, - { - "epoch": 0.3135633430488714, - "grad_norm": 1.6779084593612938, - "learning_rate": 1.6072215556966975e-06, - "loss": 1.2134, - "step": 2313 - }, - { - "epoch": 0.31369890869653627, - "grad_norm": 1.9731799999535071, - "learning_rate": 1.6068725877879213e-06, - "loss": 1.1526, - "step": 2314 - }, - { - "epoch": 0.3138344743442012, - "grad_norm": 1.3490883368221591, - "learning_rate": 1.6065235028485567e-06, - "loss": 1.1379, - "step": 2315 - }, - { - "epoch": 0.31397003999186607, - "grad_norm": 1.5038960888663697, - "learning_rate": 1.6061743009459225e-06, - "loss": 1.1801, - "step": 2316 - }, - { - "epoch": 0.31410560563953094, - "grad_norm": 1.5136817650087178, - "learning_rate": 1.605824982147359e-06, - "loss": 1.1607, - "step": 2317 - }, - { - "epoch": 0.3142411712871958, - "grad_norm": 1.824990317987802, - "learning_rate": 1.6054755465202296e-06, - "loss": 1.1551, - "step": 2318 - }, - { - "epoch": 0.3143767369348607, - "grad_norm": 1.8924377687369396, - "learning_rate": 1.6051259941319209e-06, - "loss": 1.1643, - "step": 2319 - }, - { - "epoch": 0.3145123025825256, - "grad_norm": 1.6092038909894297, - "learning_rate": 1.6047763250498405e-06, - "loss": 1.1862, - "step": 2320 - }, - { - "epoch": 0.3146478682301905, - "grad_norm": 3.031015956269415, - "learning_rate": 1.6044265393414196e-06, - "loss": 1.2246, - "step": 2321 - }, - { - "epoch": 0.31478343387785535, - "grad_norm": 1.5633691301533488, - "learning_rate": 1.6040766370741117e-06, - "loss": 1.1778, - "step": 2322 - }, - { - "epoch": 0.3149189995255202, - "grad_norm": 3.4225138557255486, - "learning_rate": 1.6037266183153925e-06, - "loss": 1.2127, - "step": 2323 - }, - { - "epoch": 0.31505456517318514, - "grad_norm": 1.4276908880565657, - "learning_rate": 1.6033764831327607e-06, - "loss": 1.2107, - "step": 2324 - }, - { - "epoch": 0.31519013082085, - "grad_norm": 1.5155189556066262, - "learning_rate": 1.6030262315937368e-06, - "loss": 1.2392, - "step": 2325 - }, - { - "epoch": 0.3153256964685149, - "grad_norm": 1.8993223174071954, - "learning_rate": 1.6026758637658642e-06, - "loss": 1.2028, - "step": 2326 - }, - { - "epoch": 0.31546126211617975, - "grad_norm": 1.9254866665681047, - "learning_rate": 1.6023253797167084e-06, - "loss": 1.1944, - "step": 2327 - }, - { - "epoch": 0.3155968277638446, - "grad_norm": 1.965991989257432, - "learning_rate": 1.6019747795138576e-06, - "loss": 1.1696, - "step": 2328 - }, - { - "epoch": 0.31573239341150955, - "grad_norm": 2.1414909685138013, - "learning_rate": 1.6016240632249222e-06, - "loss": 1.2023, - "step": 2329 - }, - { - "epoch": 0.3158679590591744, - "grad_norm": 1.524831322906403, - "learning_rate": 1.6012732309175356e-06, - "loss": 1.1878, - "step": 2330 - }, - { - "epoch": 0.3160035247068393, - "grad_norm": 1.8951859979922014, - "learning_rate": 1.600922282659352e-06, - "loss": 1.1598, - "step": 2331 - }, - { - "epoch": 0.31613909035450416, - "grad_norm": 1.5073687768359594, - "learning_rate": 1.60057121851805e-06, - "loss": 1.1498, - "step": 2332 - }, - { - "epoch": 0.31627465600216903, - "grad_norm": 1.5444546314373493, - "learning_rate": 1.600220038561329e-06, - "loss": 1.1875, - "step": 2333 - }, - { - "epoch": 0.31641022164983396, - "grad_norm": 1.8203232350187495, - "learning_rate": 1.5998687428569113e-06, - "loss": 1.187, - "step": 2334 - }, - { - "epoch": 0.3165457872974988, - "grad_norm": 1.4044320621981543, - "learning_rate": 1.5995173314725419e-06, - "loss": 1.1589, - "step": 2335 - }, - { - "epoch": 0.3166813529451637, - "grad_norm": 1.6866627277627506, - "learning_rate": 1.5991658044759871e-06, - "loss": 1.1758, - "step": 2336 - }, - { - "epoch": 0.31681691859282857, - "grad_norm": 2.2999233963117955, - "learning_rate": 1.5988141619350363e-06, - "loss": 1.1606, - "step": 2337 - }, - { - "epoch": 0.31695248424049344, - "grad_norm": 2.090396372651635, - "learning_rate": 1.5984624039175016e-06, - "loss": 1.2313, - "step": 2338 - }, - { - "epoch": 0.31708804988815836, - "grad_norm": 2.0072713314362884, - "learning_rate": 1.5981105304912159e-06, - "loss": 1.1887, - "step": 2339 - }, - { - "epoch": 0.31722361553582323, - "grad_norm": 1.6152399846838474, - "learning_rate": 1.5977585417240358e-06, - "loss": 1.1494, - "step": 2340 - }, - { - "epoch": 0.3173591811834881, - "grad_norm": 1.7517249866509867, - "learning_rate": 1.5974064376838392e-06, - "loss": 1.1634, - "step": 2341 - }, - { - "epoch": 0.317494746831153, - "grad_norm": 1.933758024227007, - "learning_rate": 1.5970542184385268e-06, - "loss": 1.1779, - "step": 2342 - }, - { - "epoch": 0.31763031247881784, - "grad_norm": 1.7450828836738248, - "learning_rate": 1.5967018840560212e-06, - "loss": 1.154, - "step": 2343 - }, - { - "epoch": 0.31776587812648277, - "grad_norm": 1.6042586475211897, - "learning_rate": 1.5963494346042674e-06, - "loss": 1.1915, - "step": 2344 - }, - { - "epoch": 0.31790144377414764, - "grad_norm": 1.887246980986904, - "learning_rate": 1.5959968701512326e-06, - "loss": 1.1731, - "step": 2345 - }, - { - "epoch": 0.3180370094218125, - "grad_norm": 1.7624973461811497, - "learning_rate": 1.5956441907649057e-06, - "loss": 1.1658, - "step": 2346 - }, - { - "epoch": 0.3181725750694774, - "grad_norm": 1.7648269486975643, - "learning_rate": 1.595291396513298e-06, - "loss": 1.1995, - "step": 2347 - }, - { - "epoch": 0.31830814071714225, - "grad_norm": 1.6450161068654188, - "learning_rate": 1.594938487464444e-06, - "loss": 1.176, - "step": 2348 - }, - { - "epoch": 0.3184437063648072, - "grad_norm": 2.197566408688202, - "learning_rate": 1.5945854636863987e-06, - "loss": 1.2011, - "step": 2349 - }, - { - "epoch": 0.31857927201247205, - "grad_norm": 2.0331364452339744, - "learning_rate": 1.59423232524724e-06, - "loss": 1.2104, - "step": 2350 - }, - { - "epoch": 0.3187148376601369, - "grad_norm": 1.8748133646770986, - "learning_rate": 1.593879072215068e-06, - "loss": 1.1867, - "step": 2351 - }, - { - "epoch": 0.3188504033078018, - "grad_norm": 3.3401255785047854, - "learning_rate": 1.5935257046580048e-06, - "loss": 1.1741, - "step": 2352 - }, - { - "epoch": 0.31898596895546666, - "grad_norm": 1.6990653858963403, - "learning_rate": 1.5931722226441945e-06, - "loss": 1.1815, - "step": 2353 - }, - { - "epoch": 0.3191215346031316, - "grad_norm": 1.6894254354442597, - "learning_rate": 1.5928186262418032e-06, - "loss": 1.1835, - "step": 2354 - }, - { - "epoch": 0.31925710025079646, - "grad_norm": 3.2985981614948887, - "learning_rate": 1.5924649155190191e-06, - "loss": 1.1814, - "step": 2355 - }, - { - "epoch": 0.3193926658984613, - "grad_norm": 1.6182994367226786, - "learning_rate": 1.5921110905440526e-06, - "loss": 1.2003, - "step": 2356 - }, - { - "epoch": 0.3195282315461262, - "grad_norm": 3.2417193514481237, - "learning_rate": 1.5917571513851364e-06, - "loss": 1.1754, - "step": 2357 - }, - { - "epoch": 0.31966379719379107, - "grad_norm": 5.2990410514098025, - "learning_rate": 1.5914030981105246e-06, - "loss": 1.1666, - "step": 2358 - }, - { - "epoch": 0.319799362841456, - "grad_norm": 2.306250304500467, - "learning_rate": 1.5910489307884936e-06, - "loss": 1.1297, - "step": 2359 - }, - { - "epoch": 0.31993492848912086, - "grad_norm": 2.2765956514294685, - "learning_rate": 1.5906946494873415e-06, - "loss": 1.1801, - "step": 2360 - }, - { - "epoch": 0.32007049413678573, - "grad_norm": 1.623784165003658, - "learning_rate": 1.590340254275389e-06, - "loss": 1.2186, - "step": 2361 - }, - { - "epoch": 0.3202060597844506, - "grad_norm": 1.7071773159171726, - "learning_rate": 1.5899857452209787e-06, - "loss": 1.1502, - "step": 2362 - }, - { - "epoch": 0.32034162543211553, - "grad_norm": 2.243936224892669, - "learning_rate": 1.589631122392474e-06, - "loss": 1.1745, - "step": 2363 - }, - { - "epoch": 0.3204771910797804, - "grad_norm": 1.3851470312317449, - "learning_rate": 1.5892763858582618e-06, - "loss": 1.1608, - "step": 2364 - }, - { - "epoch": 0.32061275672744527, - "grad_norm": 1.5858681806269863, - "learning_rate": 1.58892153568675e-06, - "loss": 1.1592, - "step": 2365 - }, - { - "epoch": 0.32074832237511014, - "grad_norm": 1.5875087502734546, - "learning_rate": 1.588566571946369e-06, - "loss": 1.1688, - "step": 2366 - }, - { - "epoch": 0.320883888022775, - "grad_norm": 1.5638234038575933, - "learning_rate": 1.58821149470557e-06, - "loss": 1.1585, - "step": 2367 - }, - { - "epoch": 0.32101945367043994, - "grad_norm": 1.4957368603944114, - "learning_rate": 1.5878563040328276e-06, - "loss": 1.1799, - "step": 2368 - }, - { - "epoch": 0.3211550193181048, - "grad_norm": 1.6655081246978516, - "learning_rate": 1.5875009999966371e-06, - "loss": 1.1893, - "step": 2369 - }, - { - "epoch": 0.3212905849657697, - "grad_norm": 1.8077750281122111, - "learning_rate": 1.5871455826655163e-06, - "loss": 1.1582, - "step": 2370 - }, - { - "epoch": 0.32142615061343455, - "grad_norm": 1.6220647991940846, - "learning_rate": 1.5867900521080044e-06, - "loss": 1.1661, - "step": 2371 - }, - { - "epoch": 0.3215617162610994, - "grad_norm": 1.5371450599673544, - "learning_rate": 1.586434408392663e-06, - "loss": 1.188, - "step": 2372 - }, - { - "epoch": 0.32169728190876434, - "grad_norm": 1.6296124512695802, - "learning_rate": 1.5860786515880745e-06, - "loss": 1.1622, - "step": 2373 - }, - { - "epoch": 0.3218328475564292, - "grad_norm": 1.457812276425042, - "learning_rate": 1.5857227817628447e-06, - "loss": 1.1849, - "step": 2374 - }, - { - "epoch": 0.3219684132040941, - "grad_norm": 1.9339609349430458, - "learning_rate": 1.5853667989855999e-06, - "loss": 1.1533, - "step": 2375 - }, - { - "epoch": 0.32210397885175895, - "grad_norm": 1.5226320037673682, - "learning_rate": 1.5850107033249884e-06, - "loss": 1.2102, - "step": 2376 - }, - { - "epoch": 0.3222395444994238, - "grad_norm": 1.8975768345338047, - "learning_rate": 1.5846544948496807e-06, - "loss": 1.1539, - "step": 2377 - }, - { - "epoch": 0.32237511014708875, - "grad_norm": 1.6742247894037718, - "learning_rate": 1.5842981736283685e-06, - "loss": 1.1751, - "step": 2378 - }, - { - "epoch": 0.3225106757947536, - "grad_norm": 1.8963518925654999, - "learning_rate": 1.5839417397297656e-06, - "loss": 1.1614, - "step": 2379 - }, - { - "epoch": 0.3226462414424185, - "grad_norm": 1.5578793019319777, - "learning_rate": 1.5835851932226074e-06, - "loss": 1.2505, - "step": 2380 - }, - { - "epoch": 0.32278180709008336, - "grad_norm": 2.054467570308797, - "learning_rate": 1.5832285341756517e-06, - "loss": 1.1718, - "step": 2381 - }, - { - "epoch": 0.32291737273774823, - "grad_norm": 2.1064876163794226, - "learning_rate": 1.5828717626576766e-06, - "loss": 1.1581, - "step": 2382 - }, - { - "epoch": 0.32305293838541316, - "grad_norm": 1.3495249853088787, - "learning_rate": 1.582514878737483e-06, - "loss": 1.1551, - "step": 2383 - }, - { - "epoch": 0.32318850403307803, - "grad_norm": 1.4937041585474513, - "learning_rate": 1.5821578824838932e-06, - "loss": 1.1608, - "step": 2384 - }, - { - "epoch": 0.3233240696807429, - "grad_norm": 2.894016989008943, - "learning_rate": 1.5818007739657512e-06, - "loss": 1.2006, - "step": 2385 - }, - { - "epoch": 0.32345963532840777, - "grad_norm": 1.8886973283213486, - "learning_rate": 1.5814435532519221e-06, - "loss": 1.1565, - "step": 2386 - }, - { - "epoch": 0.32359520097607264, - "grad_norm": 1.6017218377698488, - "learning_rate": 1.5810862204112933e-06, - "loss": 1.1723, - "step": 2387 - }, - { - "epoch": 0.32373076662373756, - "grad_norm": 1.3930688957896256, - "learning_rate": 1.580728775512774e-06, - "loss": 1.1771, - "step": 2388 - }, - { - "epoch": 0.32386633227140244, - "grad_norm": 1.4645276182434142, - "learning_rate": 1.5803712186252943e-06, - "loss": 1.1658, - "step": 2389 - }, - { - "epoch": 0.3240018979190673, - "grad_norm": 1.8249369847945909, - "learning_rate": 1.5800135498178065e-06, - "loss": 1.1976, - "step": 2390 - }, - { - "epoch": 0.3241374635667322, - "grad_norm": 1.6426730291732201, - "learning_rate": 1.5796557691592835e-06, - "loss": 1.1876, - "step": 2391 - }, - { - "epoch": 0.32427302921439705, - "grad_norm": 3.0008985068475953, - "learning_rate": 1.579297876718721e-06, - "loss": 1.1748, - "step": 2392 - }, - { - "epoch": 0.32440859486206197, - "grad_norm": 2.1120758083173192, - "learning_rate": 1.5789398725651358e-06, - "loss": 1.1522, - "step": 2393 - }, - { - "epoch": 0.32454416050972684, - "grad_norm": 1.4801114783726053, - "learning_rate": 1.5785817567675661e-06, - "loss": 1.1738, - "step": 2394 - }, - { - "epoch": 0.3246797261573917, - "grad_norm": 1.5150684369414056, - "learning_rate": 1.5782235293950717e-06, - "loss": 1.1512, - "step": 2395 - }, - { - "epoch": 0.3248152918050566, - "grad_norm": 2.0736155853334846, - "learning_rate": 1.5778651905167334e-06, - "loss": 1.1481, - "step": 2396 - }, - { - "epoch": 0.32495085745272145, - "grad_norm": 1.645577856732359, - "learning_rate": 1.577506740201655e-06, - "loss": 1.1541, - "step": 2397 - }, - { - "epoch": 0.3250864231003864, - "grad_norm": 1.6399533061237859, - "learning_rate": 1.5771481785189601e-06, - "loss": 1.1121, - "step": 2398 - }, - { - "epoch": 0.32522198874805125, - "grad_norm": 1.4212925617606895, - "learning_rate": 1.5767895055377948e-06, - "loss": 1.1868, - "step": 2399 - }, - { - "epoch": 0.3253575543957161, - "grad_norm": 1.8091578425409152, - "learning_rate": 1.5764307213273264e-06, - "loss": 1.1901, - "step": 2400 - }, - { - "epoch": 0.325493120043381, - "grad_norm": 1.535521360318581, - "learning_rate": 1.5760718259567432e-06, - "loss": 1.196, - "step": 2401 - }, - { - "epoch": 0.3256286856910459, - "grad_norm": 2.0655018866299146, - "learning_rate": 1.5757128194952557e-06, - "loss": 1.1482, - "step": 2402 - }, - { - "epoch": 0.3257642513387108, - "grad_norm": 2.056248585639207, - "learning_rate": 1.5753537020120952e-06, - "loss": 1.2007, - "step": 2403 - }, - { - "epoch": 0.32589981698637566, - "grad_norm": 2.1610810112457743, - "learning_rate": 1.5749944735765153e-06, - "loss": 1.1941, - "step": 2404 - }, - { - "epoch": 0.3260353826340405, - "grad_norm": 2.2927141040183274, - "learning_rate": 1.5746351342577895e-06, - "loss": 1.1849, - "step": 2405 - }, - { - "epoch": 0.3261709482817054, - "grad_norm": 1.8259938559396405, - "learning_rate": 1.5742756841252143e-06, - "loss": 1.1709, - "step": 2406 - }, - { - "epoch": 0.3263065139293703, - "grad_norm": 1.525156547300931, - "learning_rate": 1.573916123248106e-06, - "loss": 1.1799, - "step": 2407 - }, - { - "epoch": 0.3264420795770352, - "grad_norm": 1.5150914763073366, - "learning_rate": 1.5735564516958039e-06, - "loss": 1.1828, - "step": 2408 - }, - { - "epoch": 0.32657764522470006, - "grad_norm": 1.5417327140608577, - "learning_rate": 1.5731966695376672e-06, - "loss": 1.1764, - "step": 2409 - }, - { - "epoch": 0.32671321087236493, - "grad_norm": 1.6972880074054852, - "learning_rate": 1.5728367768430775e-06, - "loss": 1.1398, - "step": 2410 - }, - { - "epoch": 0.3268487765200298, - "grad_norm": 2.0346956846178736, - "learning_rate": 1.572476773681437e-06, - "loss": 1.1858, - "step": 2411 - }, - { - "epoch": 0.32698434216769473, - "grad_norm": 2.0227328838770573, - "learning_rate": 1.5721166601221695e-06, - "loss": 1.1843, - "step": 2412 - }, - { - "epoch": 0.3271199078153596, - "grad_norm": 2.011001560730901, - "learning_rate": 1.5717564362347203e-06, - "loss": 1.1855, - "step": 2413 - }, - { - "epoch": 0.32725547346302447, - "grad_norm": 1.6069357693962427, - "learning_rate": 1.5713961020885553e-06, - "loss": 1.1732, - "step": 2414 - }, - { - "epoch": 0.32739103911068934, - "grad_norm": 1.5054250754300262, - "learning_rate": 1.5710356577531628e-06, - "loss": 1.1523, - "step": 2415 - }, - { - "epoch": 0.3275266047583542, - "grad_norm": 2.743787149706714, - "learning_rate": 1.5706751032980506e-06, - "loss": 1.1865, - "step": 2416 - }, - { - "epoch": 0.32766217040601914, - "grad_norm": 2.302912367189527, - "learning_rate": 1.5703144387927499e-06, - "loss": 1.1821, - "step": 2417 - }, - { - "epoch": 0.327797736053684, - "grad_norm": 1.42021082169721, - "learning_rate": 1.5699536643068113e-06, - "loss": 1.2046, - "step": 2418 - }, - { - "epoch": 0.3279333017013489, - "grad_norm": 2.664306656732138, - "learning_rate": 1.5695927799098071e-06, - "loss": 1.188, - "step": 2419 - }, - { - "epoch": 0.32806886734901375, - "grad_norm": 2.1705036351352844, - "learning_rate": 1.5692317856713318e-06, - "loss": 1.1993, - "step": 2420 - }, - { - "epoch": 0.3282044329966786, - "grad_norm": 2.5842735758955317, - "learning_rate": 1.5688706816609995e-06, - "loss": 1.1808, - "step": 2421 - }, - { - "epoch": 0.32833999864434354, - "grad_norm": 1.8483898436947468, - "learning_rate": 1.5685094679484472e-06, - "loss": 1.1838, - "step": 2422 - }, - { - "epoch": 0.3284755642920084, - "grad_norm": 1.6030608777532764, - "learning_rate": 1.5681481446033312e-06, - "loss": 1.1961, - "step": 2423 - }, - { - "epoch": 0.3286111299396733, - "grad_norm": 2.6417704120693846, - "learning_rate": 1.56778671169533e-06, - "loss": 1.2259, - "step": 2424 - }, - { - "epoch": 0.32874669558733816, - "grad_norm": 1.5902744067164605, - "learning_rate": 1.5674251692941436e-06, - "loss": 1.1925, - "step": 2425 - }, - { - "epoch": 0.328882261235003, - "grad_norm": 1.5350277061523991, - "learning_rate": 1.5670635174694923e-06, - "loss": 1.1898, - "step": 2426 - }, - { - "epoch": 0.32901782688266795, - "grad_norm": 1.5788224172512797, - "learning_rate": 1.5667017562911176e-06, - "loss": 1.1882, - "step": 2427 - }, - { - "epoch": 0.3291533925303328, - "grad_norm": 1.4664354902078172, - "learning_rate": 1.5663398858287824e-06, - "loss": 1.1899, - "step": 2428 - }, - { - "epoch": 0.3292889581779977, - "grad_norm": 3.159941625186901, - "learning_rate": 1.565977906152271e-06, - "loss": 1.1313, - "step": 2429 - }, - { - "epoch": 0.32942452382566256, - "grad_norm": 1.5223465378928511, - "learning_rate": 1.5656158173313876e-06, - "loss": 1.1611, - "step": 2430 - }, - { - "epoch": 0.32956008947332743, - "grad_norm": 1.9838471669717157, - "learning_rate": 1.5652536194359586e-06, - "loss": 1.1907, - "step": 2431 - }, - { - "epoch": 0.32969565512099236, - "grad_norm": 1.5405536111166134, - "learning_rate": 1.5648913125358312e-06, - "loss": 1.1635, - "step": 2432 - }, - { - "epoch": 0.32983122076865723, - "grad_norm": 1.5667194011447718, - "learning_rate": 1.564528896700873e-06, - "loss": 1.188, - "step": 2433 - }, - { - "epoch": 0.3299667864163221, - "grad_norm": 1.5868210256804955, - "learning_rate": 1.5641663720009732e-06, - "loss": 1.1692, - "step": 2434 - }, - { - "epoch": 0.33010235206398697, - "grad_norm": 2.3149739011782713, - "learning_rate": 1.5638037385060416e-06, - "loss": 1.2011, - "step": 2435 - }, - { - "epoch": 0.33023791771165184, - "grad_norm": 1.695726942726153, - "learning_rate": 1.5634409962860096e-06, - "loss": 1.1867, - "step": 2436 - }, - { - "epoch": 0.33037348335931677, - "grad_norm": 1.5609236874808228, - "learning_rate": 1.5630781454108291e-06, - "loss": 1.1605, - "step": 2437 - }, - { - "epoch": 0.33050904900698164, - "grad_norm": 3.4510973065814867, - "learning_rate": 1.5627151859504726e-06, - "loss": 1.1863, - "step": 2438 - }, - { - "epoch": 0.3306446146546465, - "grad_norm": 1.6859709070395692, - "learning_rate": 1.5623521179749346e-06, - "loss": 1.1863, - "step": 2439 - }, - { - "epoch": 0.3307801803023114, - "grad_norm": 2.013174630471076, - "learning_rate": 1.5619889415542296e-06, - "loss": 1.1978, - "step": 2440 - }, - { - "epoch": 0.3309157459499763, - "grad_norm": 1.5723782647334572, - "learning_rate": 1.5616256567583932e-06, - "loss": 1.1935, - "step": 2441 - }, - { - "epoch": 0.3310513115976412, - "grad_norm": 1.623091182875469, - "learning_rate": 1.561262263657482e-06, - "loss": 1.2109, - "step": 2442 - }, - { - "epoch": 0.33118687724530604, - "grad_norm": 1.9675913954818742, - "learning_rate": 1.5608987623215736e-06, - "loss": 1.1789, - "step": 2443 - }, - { - "epoch": 0.3313224428929709, - "grad_norm": 1.4205579394382242, - "learning_rate": 1.5605351528207664e-06, - "loss": 1.1901, - "step": 2444 - }, - { - "epoch": 0.3314580085406358, - "grad_norm": 1.5363328390314617, - "learning_rate": 1.5601714352251798e-06, - "loss": 1.2263, - "step": 2445 - }, - { - "epoch": 0.3315935741883007, - "grad_norm": 2.6178367687286572, - "learning_rate": 1.5598076096049533e-06, - "loss": 1.1753, - "step": 2446 - }, - { - "epoch": 0.3317291398359656, - "grad_norm": 1.5437201157609632, - "learning_rate": 1.5594436760302483e-06, - "loss": 1.1678, - "step": 2447 - }, - { - "epoch": 0.33186470548363045, - "grad_norm": 1.519128811164236, - "learning_rate": 1.5590796345712465e-06, - "loss": 1.1851, - "step": 2448 - }, - { - "epoch": 0.3320002711312953, - "grad_norm": 2.0852787949411606, - "learning_rate": 1.55871548529815e-06, - "loss": 1.1467, - "step": 2449 - }, - { - "epoch": 0.3321358367789602, - "grad_norm": 1.9039829909751242, - "learning_rate": 1.5583512282811826e-06, - "loss": 1.1755, - "step": 2450 - }, - { - "epoch": 0.3322714024266251, - "grad_norm": 1.6213708410077348, - "learning_rate": 1.557986863590588e-06, - "loss": 1.1266, - "step": 2451 - }, - { - "epoch": 0.33240696807429, - "grad_norm": 1.6125414810141137, - "learning_rate": 1.5576223912966313e-06, - "loss": 1.1572, - "step": 2452 - }, - { - "epoch": 0.33254253372195486, - "grad_norm": 2.0045319330219704, - "learning_rate": 1.557257811469598e-06, - "loss": 1.1484, - "step": 2453 - }, - { - "epoch": 0.33267809936961973, - "grad_norm": 2.024883789818294, - "learning_rate": 1.5568931241797947e-06, - "loss": 1.1952, - "step": 2454 - }, - { - "epoch": 0.3328136650172846, - "grad_norm": 1.717904007534209, - "learning_rate": 1.556528329497548e-06, - "loss": 1.1652, - "step": 2455 - }, - { - "epoch": 0.3329492306649495, - "grad_norm": 1.4518009489660013, - "learning_rate": 1.5561634274932061e-06, - "loss": 1.1628, - "step": 2456 - }, - { - "epoch": 0.3330847963126144, - "grad_norm": 1.43067597014406, - "learning_rate": 1.555798418237137e-06, - "loss": 1.1693, - "step": 2457 - }, - { - "epoch": 0.33322036196027927, - "grad_norm": 4.8694453739639885, - "learning_rate": 1.5554333017997306e-06, - "loss": 1.185, - "step": 2458 - }, - { - "epoch": 0.33335592760794414, - "grad_norm": 1.8795835037245296, - "learning_rate": 1.5550680782513962e-06, - "loss": 1.1588, - "step": 2459 - }, - { - "epoch": 0.333491493255609, - "grad_norm": 1.7179070252605535, - "learning_rate": 1.554702747662564e-06, - "loss": 1.1931, - "step": 2460 - }, - { - "epoch": 0.33362705890327393, - "grad_norm": 3.77732312194548, - "learning_rate": 1.5543373101036856e-06, - "loss": 1.1846, - "step": 2461 - }, - { - "epoch": 0.3337626245509388, - "grad_norm": 1.4782603736428397, - "learning_rate": 1.5539717656452327e-06, - "loss": 1.1679, - "step": 2462 - }, - { - "epoch": 0.3338981901986037, - "grad_norm": 1.4162565188289116, - "learning_rate": 1.5536061143576978e-06, - "loss": 1.1697, - "step": 2463 - }, - { - "epoch": 0.33403375584626854, - "grad_norm": 1.9523545826049042, - "learning_rate": 1.5532403563115932e-06, - "loss": 1.1854, - "step": 2464 - }, - { - "epoch": 0.3341693214939334, - "grad_norm": 2.793284211111756, - "learning_rate": 1.5528744915774532e-06, - "loss": 1.1699, - "step": 2465 - }, - { - "epoch": 0.33430488714159834, - "grad_norm": 1.5375877435188245, - "learning_rate": 1.5525085202258316e-06, - "loss": 1.1743, - "step": 2466 - }, - { - "epoch": 0.3344404527892632, - "grad_norm": 1.458781594400697, - "learning_rate": 1.552142442327303e-06, - "loss": 1.1625, - "step": 2467 - }, - { - "epoch": 0.3345760184369281, - "grad_norm": 2.848634658433064, - "learning_rate": 1.5517762579524628e-06, - "loss": 1.1371, - "step": 2468 - }, - { - "epoch": 0.33471158408459295, - "grad_norm": 1.4733233348566235, - "learning_rate": 1.5514099671719267e-06, - "loss": 1.1903, - "step": 2469 - }, - { - "epoch": 0.3348471497322578, - "grad_norm": 1.6231390822779852, - "learning_rate": 1.551043570056331e-06, - "loss": 1.1904, - "step": 2470 - }, - { - "epoch": 0.33498271537992275, - "grad_norm": 1.387384446871848, - "learning_rate": 1.5506770666763324e-06, - "loss": 1.1425, - "step": 2471 - }, - { - "epoch": 0.3351182810275876, - "grad_norm": 1.789215845983205, - "learning_rate": 1.5503104571026084e-06, - "loss": 1.1885, - "step": 2472 - }, - { - "epoch": 0.3352538466752525, - "grad_norm": 1.6376428233867075, - "learning_rate": 1.5499437414058564e-06, - "loss": 1.1822, - "step": 2473 - }, - { - "epoch": 0.33538941232291736, - "grad_norm": 2.1071659509489256, - "learning_rate": 1.5495769196567955e-06, - "loss": 1.162, - "step": 2474 - }, - { - "epoch": 0.3355249779705822, - "grad_norm": 2.694708877945794, - "learning_rate": 1.5492099919261632e-06, - "loss": 1.2231, - "step": 2475 - }, - { - "epoch": 0.33566054361824715, - "grad_norm": 1.7996605542018287, - "learning_rate": 1.5488429582847192e-06, - "loss": 1.186, - "step": 2476 - }, - { - "epoch": 0.335796109265912, - "grad_norm": 1.4860243335394454, - "learning_rate": 1.5484758188032433e-06, - "loss": 1.1727, - "step": 2477 - }, - { - "epoch": 0.3359316749135769, - "grad_norm": 3.041463545264503, - "learning_rate": 1.5481085735525348e-06, - "loss": 1.2026, - "step": 2478 - }, - { - "epoch": 0.33606724056124176, - "grad_norm": 1.7865892972747621, - "learning_rate": 1.5477412226034145e-06, - "loss": 1.1745, - "step": 2479 - }, - { - "epoch": 0.3362028062089067, - "grad_norm": 2.724890390905319, - "learning_rate": 1.547373766026723e-06, - "loss": 1.2254, - "step": 2480 - }, - { - "epoch": 0.33633837185657156, - "grad_norm": 1.8847462630361331, - "learning_rate": 1.5470062038933213e-06, - "loss": 1.1825, - "step": 2481 - }, - { - "epoch": 0.33647393750423643, - "grad_norm": 1.7636095449982443, - "learning_rate": 1.5466385362740911e-06, - "loss": 1.1863, - "step": 2482 - }, - { - "epoch": 0.3366095031519013, - "grad_norm": 1.5912222753111827, - "learning_rate": 1.5462707632399342e-06, - "loss": 1.1385, - "step": 2483 - }, - { - "epoch": 0.33674506879956617, - "grad_norm": 2.0725028742949854, - "learning_rate": 1.5459028848617726e-06, - "loss": 1.1757, - "step": 2484 - }, - { - "epoch": 0.3368806344472311, - "grad_norm": 1.6811715023806568, - "learning_rate": 1.5455349012105486e-06, - "loss": 1.1996, - "step": 2485 - }, - { - "epoch": 0.33701620009489597, - "grad_norm": 2.138084763125434, - "learning_rate": 1.545166812357225e-06, - "loss": 1.1475, - "step": 2486 - }, - { - "epoch": 0.33715176574256084, - "grad_norm": 1.8614744277495263, - "learning_rate": 1.5447986183727852e-06, - "loss": 1.1936, - "step": 2487 - }, - { - "epoch": 0.3372873313902257, - "grad_norm": 1.8532908290761294, - "learning_rate": 1.5444303193282324e-06, - "loss": 1.1874, - "step": 2488 - }, - { - "epoch": 0.3374228970378906, - "grad_norm": 1.6072316612163118, - "learning_rate": 1.5440619152945896e-06, - "loss": 1.1238, - "step": 2489 - }, - { - "epoch": 0.3375584626855555, - "grad_norm": 1.779336481222247, - "learning_rate": 1.5436934063429013e-06, - "loss": 1.2176, - "step": 2490 - }, - { - "epoch": 0.3376940283332204, - "grad_norm": 1.866141743533603, - "learning_rate": 1.5433247925442308e-06, - "loss": 1.1547, - "step": 2491 - }, - { - "epoch": 0.33782959398088525, - "grad_norm": 1.677248425844946, - "learning_rate": 1.542956073969663e-06, - "loss": 1.1523, - "step": 2492 - }, - { - "epoch": 0.3379651596285501, - "grad_norm": 1.414584296905524, - "learning_rate": 1.5425872506903024e-06, - "loss": 1.1388, - "step": 2493 - }, - { - "epoch": 0.338100725276215, - "grad_norm": 1.5103118399048494, - "learning_rate": 1.542218322777273e-06, - "loss": 1.1656, - "step": 2494 - }, - { - "epoch": 0.3382362909238799, - "grad_norm": 1.6262971582968297, - "learning_rate": 1.5418492903017204e-06, - "loss": 1.1831, - "step": 2495 - }, - { - "epoch": 0.3383718565715448, - "grad_norm": 4.9528617762390095, - "learning_rate": 1.5414801533348091e-06, - "loss": 1.2187, - "step": 2496 - }, - { - "epoch": 0.33850742221920965, - "grad_norm": 1.7424828956375824, - "learning_rate": 1.5411109119477247e-06, - "loss": 1.2044, - "step": 2497 - }, - { - "epoch": 0.3386429878668745, - "grad_norm": 1.6747450334596354, - "learning_rate": 1.5407415662116718e-06, - "loss": 1.2084, - "step": 2498 - }, - { - "epoch": 0.3387785535145394, - "grad_norm": 1.4823345879102376, - "learning_rate": 1.5403721161978764e-06, - "loss": 1.155, - "step": 2499 - }, - { - "epoch": 0.3389141191622043, - "grad_norm": 2.1644350441087963, - "learning_rate": 1.5400025619775838e-06, - "loss": 1.1368, - "step": 2500 - }, - { - "epoch": 0.3390496848098692, - "grad_norm": 1.4653870772719526, - "learning_rate": 1.5396329036220598e-06, - "loss": 1.1756, - "step": 2501 - }, - { - "epoch": 0.33918525045753406, - "grad_norm": 1.395342112381541, - "learning_rate": 1.5392631412025898e-06, - "loss": 1.1704, - "step": 2502 - }, - { - "epoch": 0.33932081610519893, - "grad_norm": 1.4299158852173628, - "learning_rate": 1.5388932747904797e-06, - "loss": 1.1719, - "step": 2503 - }, - { - "epoch": 0.3394563817528638, - "grad_norm": 1.5549226997827177, - "learning_rate": 1.5385233044570554e-06, - "loss": 1.169, - "step": 2504 - }, - { - "epoch": 0.3395919474005287, - "grad_norm": 2.241707468736357, - "learning_rate": 1.5381532302736627e-06, - "loss": 1.1835, - "step": 2505 - }, - { - "epoch": 0.3397275130481936, - "grad_norm": 1.4914693436815392, - "learning_rate": 1.5377830523116675e-06, - "loss": 1.1728, - "step": 2506 - }, - { - "epoch": 0.33986307869585847, - "grad_norm": 2.8773001417485293, - "learning_rate": 1.5374127706424553e-06, - "loss": 1.1611, - "step": 2507 - }, - { - "epoch": 0.33999864434352334, - "grad_norm": 1.7267867579365823, - "learning_rate": 1.5370423853374325e-06, - "loss": 1.1762, - "step": 2508 - }, - { - "epoch": 0.3401342099911882, - "grad_norm": 2.259543963609326, - "learning_rate": 1.5366718964680253e-06, - "loss": 1.1483, - "step": 2509 - }, - { - "epoch": 0.34026977563885313, - "grad_norm": 1.4967040716016933, - "learning_rate": 1.5363013041056787e-06, - "loss": 1.1948, - "step": 2510 - }, - { - "epoch": 0.340405341286518, - "grad_norm": 1.3780298561868003, - "learning_rate": 1.5359306083218588e-06, - "loss": 1.1557, - "step": 2511 - }, - { - "epoch": 0.3405409069341829, - "grad_norm": 1.6046577039637295, - "learning_rate": 1.5355598091880517e-06, - "loss": 1.1429, - "step": 2512 - }, - { - "epoch": 0.34067647258184774, - "grad_norm": 1.5508988052623558, - "learning_rate": 1.5351889067757627e-06, - "loss": 1.2082, - "step": 2513 - }, - { - "epoch": 0.3408120382295126, - "grad_norm": 1.4921958529181265, - "learning_rate": 1.5348179011565176e-06, - "loss": 1.2185, - "step": 2514 - }, - { - "epoch": 0.34094760387717754, - "grad_norm": 2.8223432121667194, - "learning_rate": 1.5344467924018619e-06, - "loss": 1.119, - "step": 2515 - }, - { - "epoch": 0.3410831695248424, - "grad_norm": 1.756732120846369, - "learning_rate": 1.534075580583361e-06, - "loss": 1.1302, - "step": 2516 - }, - { - "epoch": 0.3412187351725073, - "grad_norm": 2.864295136149504, - "learning_rate": 1.5337042657726e-06, - "loss": 1.1926, - "step": 2517 - }, - { - "epoch": 0.34135430082017215, - "grad_norm": 1.6905958170357525, - "learning_rate": 1.5333328480411842e-06, - "loss": 1.1366, - "step": 2518 - }, - { - "epoch": 0.3414898664678371, - "grad_norm": 1.8048699616793222, - "learning_rate": 1.5329613274607387e-06, - "loss": 1.1926, - "step": 2519 - }, - { - "epoch": 0.34162543211550195, - "grad_norm": 1.8553406742078025, - "learning_rate": 1.5325897041029078e-06, - "loss": 1.1991, - "step": 2520 - }, - { - "epoch": 0.3417609977631668, - "grad_norm": 2.098655324178943, - "learning_rate": 1.5322179780393567e-06, - "loss": 1.1544, - "step": 2521 - }, - { - "epoch": 0.3418965634108317, - "grad_norm": 1.6852362315562086, - "learning_rate": 1.5318461493417694e-06, - "loss": 1.1837, - "step": 2522 - }, - { - "epoch": 0.34203212905849656, - "grad_norm": 1.5156483614554936, - "learning_rate": 1.5314742180818504e-06, - "loss": 1.2024, - "step": 2523 - }, - { - "epoch": 0.3421676947061615, - "grad_norm": 1.808453115831744, - "learning_rate": 1.5311021843313238e-06, - "loss": 1.1648, - "step": 2524 - }, - { - "epoch": 0.34230326035382636, - "grad_norm": 4.060134911734256, - "learning_rate": 1.5307300481619332e-06, - "loss": 1.2094, - "step": 2525 - }, - { - "epoch": 0.3424388260014912, - "grad_norm": 1.5878831808262936, - "learning_rate": 1.5303578096454422e-06, - "loss": 1.1558, - "step": 2526 - }, - { - "epoch": 0.3425743916491561, - "grad_norm": 1.9932507375469515, - "learning_rate": 1.5299854688536339e-06, - "loss": 1.1625, - "step": 2527 - }, - { - "epoch": 0.34270995729682097, - "grad_norm": 1.7845201018686696, - "learning_rate": 1.5296130258583113e-06, - "loss": 1.1752, - "step": 2528 - }, - { - "epoch": 0.3428455229444859, - "grad_norm": 1.6182848093081028, - "learning_rate": 1.5292404807312971e-06, - "loss": 1.1724, - "step": 2529 - }, - { - "epoch": 0.34298108859215076, - "grad_norm": 1.9114662878221582, - "learning_rate": 1.5288678335444342e-06, - "loss": 1.1783, - "step": 2530 - }, - { - "epoch": 0.34311665423981563, - "grad_norm": 2.13137595556096, - "learning_rate": 1.5284950843695838e-06, - "loss": 1.132, - "step": 2531 - }, - { - "epoch": 0.3432522198874805, - "grad_norm": 2.3929772499321214, - "learning_rate": 1.5281222332786282e-06, - "loss": 1.183, - "step": 2532 - }, - { - "epoch": 0.3433877855351454, - "grad_norm": 1.7716415564978079, - "learning_rate": 1.527749280343469e-06, - "loss": 1.1614, - "step": 2533 - }, - { - "epoch": 0.3435233511828103, - "grad_norm": 2.09980812283455, - "learning_rate": 1.527376225636026e-06, - "loss": 1.1854, - "step": 2534 - }, - { - "epoch": 0.34365891683047517, - "grad_norm": 1.5614142256696892, - "learning_rate": 1.5270030692282415e-06, - "loss": 1.166, - "step": 2535 - }, - { - "epoch": 0.34379448247814004, - "grad_norm": 1.5078887046774765, - "learning_rate": 1.526629811192075e-06, - "loss": 1.1267, - "step": 2536 - }, - { - "epoch": 0.3439300481258049, - "grad_norm": 1.642819765764812, - "learning_rate": 1.5262564515995062e-06, - "loss": 1.1978, - "step": 2537 - }, - { - "epoch": 0.3440656137734698, - "grad_norm": 1.5237180085200164, - "learning_rate": 1.5258829905225348e-06, - "loss": 1.1492, - "step": 2538 - }, - { - "epoch": 0.3442011794211347, - "grad_norm": 1.5141178622976035, - "learning_rate": 1.5255094280331795e-06, - "loss": 1.1766, - "step": 2539 - }, - { - "epoch": 0.3443367450687996, - "grad_norm": 5.619721061171953, - "learning_rate": 1.5251357642034793e-06, - "loss": 1.1449, - "step": 2540 - }, - { - "epoch": 0.34447231071646445, - "grad_norm": 1.6241353015196955, - "learning_rate": 1.524761999105492e-06, - "loss": 1.1358, - "step": 2541 - }, - { - "epoch": 0.3446078763641293, - "grad_norm": 1.6926103549732228, - "learning_rate": 1.5243881328112953e-06, - "loss": 1.1738, - "step": 2542 - }, - { - "epoch": 0.3447434420117942, - "grad_norm": 1.9681743824040814, - "learning_rate": 1.5240141653929868e-06, - "loss": 1.1917, - "step": 2543 - }, - { - "epoch": 0.3448790076594591, - "grad_norm": 2.217168395222342, - "learning_rate": 1.5236400969226828e-06, - "loss": 1.1587, - "step": 2544 - }, - { - "epoch": 0.345014573307124, - "grad_norm": 1.9416355869891382, - "learning_rate": 1.5232659274725195e-06, - "loss": 1.1598, - "step": 2545 - }, - { - "epoch": 0.34515013895478885, - "grad_norm": 1.4402059356037482, - "learning_rate": 1.5228916571146522e-06, - "loss": 1.1672, - "step": 2546 - }, - { - "epoch": 0.3452857046024537, - "grad_norm": 1.6183820138757468, - "learning_rate": 1.5225172859212565e-06, - "loss": 1.1933, - "step": 2547 - }, - { - "epoch": 0.3454212702501186, - "grad_norm": 1.8486492836004675, - "learning_rate": 1.5221428139645266e-06, - "loss": 1.1569, - "step": 2548 - }, - { - "epoch": 0.3455568358977835, - "grad_norm": 2.546437437519464, - "learning_rate": 1.5217682413166767e-06, - "loss": 1.1722, - "step": 2549 - }, - { - "epoch": 0.3456924015454484, - "grad_norm": 1.7171944837968884, - "learning_rate": 1.5213935680499397e-06, - "loss": 1.1202, - "step": 2550 - }, - { - "epoch": 0.34582796719311326, - "grad_norm": 1.4917218700641648, - "learning_rate": 1.521018794236569e-06, - "loss": 1.1391, - "step": 2551 - }, - { - "epoch": 0.34596353284077813, - "grad_norm": 1.6290184684525204, - "learning_rate": 1.5206439199488366e-06, - "loss": 1.1948, - "step": 2552 - }, - { - "epoch": 0.346099098488443, - "grad_norm": 2.8265169201285634, - "learning_rate": 1.5202689452590339e-06, - "loss": 1.1738, - "step": 2553 - }, - { - "epoch": 0.34623466413610793, - "grad_norm": 1.6065619320293492, - "learning_rate": 1.5198938702394717e-06, - "loss": 1.1376, - "step": 2554 - }, - { - "epoch": 0.3463702297837728, - "grad_norm": 1.556238190013172, - "learning_rate": 1.5195186949624804e-06, - "loss": 1.1587, - "step": 2555 - }, - { - "epoch": 0.34650579543143767, - "grad_norm": 1.421661396832942, - "learning_rate": 1.5191434195004098e-06, - "loss": 1.1451, - "step": 2556 - }, - { - "epoch": 0.34664136107910254, - "grad_norm": 1.5504893398726336, - "learning_rate": 1.5187680439256285e-06, - "loss": 1.1392, - "step": 2557 - }, - { - "epoch": 0.34677692672676746, - "grad_norm": 2.303879566344356, - "learning_rate": 1.5183925683105251e-06, - "loss": 1.1717, - "step": 2558 - }, - { - "epoch": 0.34691249237443234, - "grad_norm": 1.554204394951351, - "learning_rate": 1.5180169927275066e-06, - "loss": 1.1598, - "step": 2559 - }, - { - "epoch": 0.3470480580220972, - "grad_norm": 1.4422866830862724, - "learning_rate": 1.517641317249e-06, - "loss": 1.2145, - "step": 2560 - }, - { - "epoch": 0.3471836236697621, - "grad_norm": 1.7845357057228097, - "learning_rate": 1.5172655419474514e-06, - "loss": 1.1684, - "step": 2561 - }, - { - "epoch": 0.34731918931742695, - "grad_norm": 1.502789015374646, - "learning_rate": 1.5168896668953261e-06, - "loss": 1.15, - "step": 2562 - }, - { - "epoch": 0.34745475496509187, - "grad_norm": 1.7295365680916088, - "learning_rate": 1.5165136921651084e-06, - "loss": 1.147, - "step": 2563 - }, - { - "epoch": 0.34759032061275674, - "grad_norm": 1.444049198985322, - "learning_rate": 1.5161376178293028e-06, - "loss": 1.1719, - "step": 2564 - }, - { - "epoch": 0.3477258862604216, - "grad_norm": 1.7690747832997413, - "learning_rate": 1.5157614439604313e-06, - "loss": 1.1451, - "step": 2565 - }, - { - "epoch": 0.3478614519080865, - "grad_norm": 1.640830262567967, - "learning_rate": 1.5153851706310367e-06, - "loss": 1.2154, - "step": 2566 - }, - { - "epoch": 0.34799701755575135, - "grad_norm": 1.7095111064878008, - "learning_rate": 1.51500879791368e-06, - "loss": 1.1573, - "step": 2567 - }, - { - "epoch": 0.3481325832034163, - "grad_norm": 1.5583161837792532, - "learning_rate": 1.5146323258809423e-06, - "loss": 1.1586, - "step": 2568 - }, - { - "epoch": 0.34826814885108115, - "grad_norm": 1.5212248087193019, - "learning_rate": 1.5142557546054224e-06, - "loss": 1.2024, - "step": 2569 - }, - { - "epoch": 0.348403714498746, - "grad_norm": 1.815975967976842, - "learning_rate": 1.5138790841597398e-06, - "loss": 1.1905, - "step": 2570 - }, - { - "epoch": 0.3485392801464109, - "grad_norm": 1.7936911150149335, - "learning_rate": 1.5135023146165317e-06, - "loss": 1.2275, - "step": 2571 - }, - { - "epoch": 0.34867484579407576, - "grad_norm": 1.5879685888431645, - "learning_rate": 1.513125446048456e-06, - "loss": 1.198, - "step": 2572 - }, - { - "epoch": 0.3488104114417407, - "grad_norm": 1.812959768474053, - "learning_rate": 1.5127484785281884e-06, - "loss": 1.1703, - "step": 2573 - }, - { - "epoch": 0.34894597708940556, - "grad_norm": 9.456260472523303, - "learning_rate": 1.5123714121284237e-06, - "loss": 1.1845, - "step": 2574 - }, - { - "epoch": 0.3490815427370704, - "grad_norm": 1.555701307340489, - "learning_rate": 1.5119942469218768e-06, - "loss": 1.194, - "step": 2575 - }, - { - "epoch": 0.3492171083847353, - "grad_norm": 1.5805052149630412, - "learning_rate": 1.5116169829812807e-06, - "loss": 1.1989, - "step": 2576 - }, - { - "epoch": 0.34935267403240017, - "grad_norm": 2.006200982505679, - "learning_rate": 1.511239620379388e-06, - "loss": 1.1831, - "step": 2577 - }, - { - "epoch": 0.3494882396800651, - "grad_norm": 1.410119377687749, - "learning_rate": 1.51086215918897e-06, - "loss": 1.1841, - "step": 2578 - }, - { - "epoch": 0.34962380532772996, - "grad_norm": 1.6914081412295234, - "learning_rate": 1.510484599482817e-06, - "loss": 1.1409, - "step": 2579 - }, - { - "epoch": 0.34975937097539483, - "grad_norm": 2.229864755581548, - "learning_rate": 1.5101069413337386e-06, - "loss": 1.1461, - "step": 2580 - }, - { - "epoch": 0.3498949366230597, - "grad_norm": 2.4279656970330596, - "learning_rate": 1.5097291848145631e-06, - "loss": 1.1791, - "step": 2581 - }, - { - "epoch": 0.3500305022707246, - "grad_norm": 1.5196804626418745, - "learning_rate": 1.5093513299981378e-06, - "loss": 1.1623, - "step": 2582 - }, - { - "epoch": 0.3501660679183895, - "grad_norm": 1.6335671396799987, - "learning_rate": 1.5089733769573292e-06, - "loss": 1.2023, - "step": 2583 - }, - { - "epoch": 0.35030163356605437, - "grad_norm": 1.4514494445632573, - "learning_rate": 1.5085953257650223e-06, - "loss": 1.1579, - "step": 2584 - }, - { - "epoch": 0.35043719921371924, - "grad_norm": 1.382426184860584, - "learning_rate": 1.5082171764941216e-06, - "loss": 1.1788, - "step": 2585 - }, - { - "epoch": 0.3505727648613841, - "grad_norm": 1.582656130171159, - "learning_rate": 1.5078389292175499e-06, - "loss": 1.1792, - "step": 2586 - }, - { - "epoch": 0.350708330509049, - "grad_norm": 1.4269047092156713, - "learning_rate": 1.5074605840082494e-06, - "loss": 1.1553, - "step": 2587 - }, - { - "epoch": 0.3508438961567139, - "grad_norm": 1.8867114122806237, - "learning_rate": 1.5070821409391812e-06, - "loss": 1.1917, - "step": 2588 - }, - { - "epoch": 0.3509794618043788, - "grad_norm": 2.0566925428007834, - "learning_rate": 1.5067036000833242e-06, - "loss": 1.1725, - "step": 2589 - }, - { - "epoch": 0.35111502745204365, - "grad_norm": 2.2461253662337235, - "learning_rate": 1.5063249615136782e-06, - "loss": 1.1457, - "step": 2590 - }, - { - "epoch": 0.3512505930997085, - "grad_norm": 1.4567243202906368, - "learning_rate": 1.5059462253032595e-06, - "loss": 1.198, - "step": 2591 - }, - { - "epoch": 0.3513861587473734, - "grad_norm": 1.456370806969757, - "learning_rate": 1.5055673915251052e-06, - "loss": 1.1314, - "step": 2592 - }, - { - "epoch": 0.3515217243950383, - "grad_norm": 1.69199709753919, - "learning_rate": 1.5051884602522702e-06, - "loss": 1.1419, - "step": 2593 - }, - { - "epoch": 0.3516572900427032, - "grad_norm": 1.6902121083878445, - "learning_rate": 1.5048094315578284e-06, - "loss": 1.1767, - "step": 2594 - }, - { - "epoch": 0.35179285569036806, - "grad_norm": 2.829407612821414, - "learning_rate": 1.5044303055148722e-06, - "loss": 1.1298, - "step": 2595 - }, - { - "epoch": 0.3519284213380329, - "grad_norm": 1.8621016415936875, - "learning_rate": 1.5040510821965135e-06, - "loss": 1.1906, - "step": 2596 - }, - { - "epoch": 0.3520639869856978, - "grad_norm": 1.42644507353959, - "learning_rate": 1.5036717616758824e-06, - "loss": 1.1353, - "step": 2597 - }, - { - "epoch": 0.3521995526333627, - "grad_norm": 2.986860449195858, - "learning_rate": 1.5032923440261276e-06, - "loss": 1.1749, - "step": 2598 - }, - { - "epoch": 0.3523351182810276, - "grad_norm": 1.6744503535993989, - "learning_rate": 1.5029128293204174e-06, - "loss": 1.1748, - "step": 2599 - }, - { - "epoch": 0.35247068392869246, - "grad_norm": 1.9020136274752257, - "learning_rate": 1.5025332176319373e-06, - "loss": 1.1637, - "step": 2600 - }, - { - "epoch": 0.35260624957635733, - "grad_norm": 1.617007796143291, - "learning_rate": 1.5021535090338932e-06, - "loss": 1.1474, - "step": 2601 - }, - { - "epoch": 0.35274181522402226, - "grad_norm": 1.641163905145374, - "learning_rate": 1.5017737035995087e-06, - "loss": 1.1568, - "step": 2602 - }, - { - "epoch": 0.35287738087168713, - "grad_norm": 1.7116085402724026, - "learning_rate": 1.5013938014020262e-06, - "loss": 1.158, - "step": 2603 - }, - { - "epoch": 0.353012946519352, - "grad_norm": 1.6092847179237328, - "learning_rate": 1.501013802514707e-06, - "loss": 1.1586, - "step": 2604 - }, - { - "epoch": 0.35314851216701687, - "grad_norm": 1.3522334242409297, - "learning_rate": 1.5006337070108304e-06, - "loss": 1.226, - "step": 2605 - }, - { - "epoch": 0.35328407781468174, - "grad_norm": 1.949933208991432, - "learning_rate": 1.5002535149636952e-06, - "loss": 1.1681, - "step": 2606 - }, - { - "epoch": 0.35341964346234667, - "grad_norm": 1.5059996110551774, - "learning_rate": 1.4998732264466186e-06, - "loss": 1.1212, - "step": 2607 - }, - { - "epoch": 0.35355520911001154, - "grad_norm": 1.8096207275950944, - "learning_rate": 1.499492841532936e-06, - "loss": 1.1652, - "step": 2608 - }, - { - "epoch": 0.3536907747576764, - "grad_norm": 1.3935979112776484, - "learning_rate": 1.4991123602960017e-06, - "loss": 1.1858, - "step": 2609 - }, - { - "epoch": 0.3538263404053413, - "grad_norm": 1.6413332160041496, - "learning_rate": 1.4987317828091882e-06, - "loss": 1.194, - "step": 2610 - }, - { - "epoch": 0.35396190605300615, - "grad_norm": 1.5220159948431358, - "learning_rate": 1.4983511091458874e-06, - "loss": 1.1656, - "step": 2611 - }, - { - "epoch": 0.3540974717006711, - "grad_norm": 1.6628804682471179, - "learning_rate": 1.4979703393795086e-06, - "loss": 1.1838, - "step": 2612 - }, - { - "epoch": 0.35423303734833594, - "grad_norm": 1.7277515314341483, - "learning_rate": 1.4975894735834809e-06, - "loss": 1.1005, - "step": 2613 - }, - { - "epoch": 0.3543686029960008, - "grad_norm": 2.498333622509205, - "learning_rate": 1.4972085118312511e-06, - "loss": 1.1407, - "step": 2614 - }, - { - "epoch": 0.3545041686436657, - "grad_norm": 1.4829501892534938, - "learning_rate": 1.4968274541962845e-06, - "loss": 1.1729, - "step": 2615 - }, - { - "epoch": 0.35463973429133056, - "grad_norm": 1.8082218582473992, - "learning_rate": 1.4964463007520647e-06, - "loss": 1.1768, - "step": 2616 - }, - { - "epoch": 0.3547752999389955, - "grad_norm": 2.218279636327135, - "learning_rate": 1.4960650515720947e-06, - "loss": 1.1579, - "step": 2617 - }, - { - "epoch": 0.35491086558666035, - "grad_norm": 8.338122575372838, - "learning_rate": 1.4956837067298954e-06, - "loss": 1.1927, - "step": 2618 - }, - { - "epoch": 0.3550464312343252, - "grad_norm": 1.6389055402395662, - "learning_rate": 1.4953022662990057e-06, - "loss": 1.1424, - "step": 2619 - }, - { - "epoch": 0.3551819968819901, - "grad_norm": 1.8754516584790053, - "learning_rate": 1.4949207303529835e-06, - "loss": 1.1943, - "step": 2620 - }, - { - "epoch": 0.35531756252965496, - "grad_norm": 1.7544957315491456, - "learning_rate": 1.4945390989654054e-06, - "loss": 1.1813, - "step": 2621 - }, - { - "epoch": 0.3554531281773199, - "grad_norm": 1.5513847430497485, - "learning_rate": 1.4941573722098655e-06, - "loss": 1.1841, - "step": 2622 - }, - { - "epoch": 0.35558869382498476, - "grad_norm": 1.6228288687048842, - "learning_rate": 1.4937755501599772e-06, - "loss": 1.1401, - "step": 2623 - }, - { - "epoch": 0.35572425947264963, - "grad_norm": 1.732401383353003, - "learning_rate": 1.4933936328893714e-06, - "loss": 1.1638, - "step": 2624 - }, - { - "epoch": 0.3558598251203145, - "grad_norm": 1.6561927837817787, - "learning_rate": 1.4930116204716984e-06, - "loss": 1.1838, - "step": 2625 - }, - { - "epoch": 0.35599539076797937, - "grad_norm": 1.6215881394865508, - "learning_rate": 1.492629512980626e-06, - "loss": 1.144, - "step": 2626 - }, - { - "epoch": 0.3561309564156443, - "grad_norm": 1.517726743805834, - "learning_rate": 1.4922473104898404e-06, - "loss": 1.1547, - "step": 2627 - }, - { - "epoch": 0.35626652206330917, - "grad_norm": 1.4326048213493696, - "learning_rate": 1.4918650130730467e-06, - "loss": 1.1593, - "step": 2628 - }, - { - "epoch": 0.35640208771097404, - "grad_norm": 1.6211379927951852, - "learning_rate": 1.491482620803968e-06, - "loss": 1.1617, - "step": 2629 - }, - { - "epoch": 0.3565376533586389, - "grad_norm": 2.048579414086659, - "learning_rate": 1.491100133756345e-06, - "loss": 1.1657, - "step": 2630 - }, - { - "epoch": 0.3566732190063038, - "grad_norm": 1.4810531516457066, - "learning_rate": 1.490717552003938e-06, - "loss": 1.1905, - "step": 2631 - }, - { - "epoch": 0.3568087846539687, - "grad_norm": 2.054141332738419, - "learning_rate": 1.4903348756205242e-06, - "loss": 1.1781, - "step": 2632 - }, - { - "epoch": 0.3569443503016336, - "grad_norm": 1.636274616456946, - "learning_rate": 1.4899521046799005e-06, - "loss": 1.1906, - "step": 2633 - }, - { - "epoch": 0.35707991594929844, - "grad_norm": 1.4463960948216283, - "learning_rate": 1.4895692392558806e-06, - "loss": 1.1612, - "step": 2634 - }, - { - "epoch": 0.3572154815969633, - "grad_norm": 1.69005728935076, - "learning_rate": 1.4891862794222976e-06, - "loss": 1.1679, - "step": 2635 - }, - { - "epoch": 0.3573510472446282, - "grad_norm": 1.492695415241083, - "learning_rate": 1.4888032252530017e-06, - "loss": 1.1852, - "step": 2636 - }, - { - "epoch": 0.3574866128922931, - "grad_norm": 1.4455297598554233, - "learning_rate": 1.4884200768218625e-06, - "loss": 1.1925, - "step": 2637 - }, - { - "epoch": 0.357622178539958, - "grad_norm": 1.5109543644058567, - "learning_rate": 1.4880368342027665e-06, - "loss": 1.1737, - "step": 2638 - }, - { - "epoch": 0.35775774418762285, - "grad_norm": 1.5689054597352117, - "learning_rate": 1.4876534974696196e-06, - "loss": 1.1856, - "step": 2639 - }, - { - "epoch": 0.3578933098352877, - "grad_norm": 1.9962714194800582, - "learning_rate": 1.487270066696345e-06, - "loss": 1.1375, - "step": 2640 - }, - { - "epoch": 0.35802887548295265, - "grad_norm": 1.8058997121070783, - "learning_rate": 1.4868865419568841e-06, - "loss": 1.1196, - "step": 2641 - }, - { - "epoch": 0.3581644411306175, - "grad_norm": 1.5141522141726145, - "learning_rate": 1.4865029233251971e-06, - "loss": 1.168, - "step": 2642 - }, - { - "epoch": 0.3583000067782824, - "grad_norm": 1.476717533108141, - "learning_rate": 1.4861192108752617e-06, - "loss": 1.1935, - "step": 2643 - }, - { - "epoch": 0.35843557242594726, - "grad_norm": 2.703030261936946, - "learning_rate": 1.485735404681073e-06, - "loss": 1.1608, - "step": 2644 - }, - { - "epoch": 0.35857113807361213, - "grad_norm": 1.898779611935178, - "learning_rate": 1.4853515048166463e-06, - "loss": 1.1927, - "step": 2645 - }, - { - "epoch": 0.35870670372127705, - "grad_norm": 1.950449628391994, - "learning_rate": 1.4849675113560128e-06, - "loss": 1.125, - "step": 2646 - }, - { - "epoch": 0.3588422693689419, - "grad_norm": 7.609229216719428, - "learning_rate": 1.4845834243732228e-06, - "loss": 1.1733, - "step": 2647 - }, - { - "epoch": 0.3589778350166068, - "grad_norm": 1.6598753266371196, - "learning_rate": 1.4841992439423445e-06, - "loss": 1.1431, - "step": 2648 - }, - { - "epoch": 0.35911340066427166, - "grad_norm": 1.5982399382278576, - "learning_rate": 1.483814970137464e-06, - "loss": 1.1305, - "step": 2649 - }, - { - "epoch": 0.35924896631193654, - "grad_norm": 1.525270162105972, - "learning_rate": 1.4834306030326855e-06, - "loss": 1.1502, - "step": 2650 - }, - { - "epoch": 0.35938453195960146, - "grad_norm": 1.6641512463010923, - "learning_rate": 1.4830461427021311e-06, - "loss": 1.1942, - "step": 2651 - }, - { - "epoch": 0.35952009760726633, - "grad_norm": 1.3609225403667926, - "learning_rate": 1.4826615892199415e-06, - "loss": 1.1502, - "step": 2652 - }, - { - "epoch": 0.3596556632549312, - "grad_norm": 1.3240542645303324, - "learning_rate": 1.482276942660274e-06, - "loss": 1.1138, - "step": 2653 - }, - { - "epoch": 0.35979122890259607, - "grad_norm": 1.3975066078441705, - "learning_rate": 1.481892203097305e-06, - "loss": 1.1467, - "step": 2654 - }, - { - "epoch": 0.35992679455026094, - "grad_norm": 2.029336325609661, - "learning_rate": 1.481507370605228e-06, - "loss": 1.1797, - "step": 2655 - }, - { - "epoch": 0.36006236019792587, - "grad_norm": 1.5001549699594299, - "learning_rate": 1.481122445258256e-06, - "loss": 1.1843, - "step": 2656 - }, - { - "epoch": 0.36019792584559074, - "grad_norm": 1.618504789382162, - "learning_rate": 1.4807374271306182e-06, - "loss": 1.1735, - "step": 2657 - }, - { - "epoch": 0.3603334914932556, - "grad_norm": 1.9499612071448267, - "learning_rate": 1.4803523162965618e-06, - "loss": 1.1426, - "step": 2658 - }, - { - "epoch": 0.3604690571409205, - "grad_norm": 1.3618743902976063, - "learning_rate": 1.4799671128303533e-06, - "loss": 1.1408, - "step": 2659 - }, - { - "epoch": 0.36060462278858535, - "grad_norm": 1.788661516120287, - "learning_rate": 1.4795818168062755e-06, - "loss": 1.1535, - "step": 2660 - }, - { - "epoch": 0.3607401884362503, - "grad_norm": 4.736508785158781, - "learning_rate": 1.47919642829863e-06, - "loss": 1.1855, - "step": 2661 - }, - { - "epoch": 0.36087575408391515, - "grad_norm": 1.9843319143338536, - "learning_rate": 1.4788109473817359e-06, - "loss": 1.1813, - "step": 2662 - }, - { - "epoch": 0.36101131973158, - "grad_norm": 1.9136566941430528, - "learning_rate": 1.4784253741299298e-06, - "loss": 1.177, - "step": 2663 - }, - { - "epoch": 0.3611468853792449, - "grad_norm": 1.3647472889171828, - "learning_rate": 1.4780397086175672e-06, - "loss": 1.1285, - "step": 2664 - }, - { - "epoch": 0.36128245102690976, - "grad_norm": 1.3730753360843175, - "learning_rate": 1.4776539509190198e-06, - "loss": 1.1702, - "step": 2665 - }, - { - "epoch": 0.3614180166745747, - "grad_norm": 1.4336975348514496, - "learning_rate": 1.4772681011086788e-06, - "loss": 1.141, - "step": 2666 - }, - { - "epoch": 0.36155358232223955, - "grad_norm": 1.541120201273637, - "learning_rate": 1.4768821592609513e-06, - "loss": 1.1562, - "step": 2667 - }, - { - "epoch": 0.3616891479699044, - "grad_norm": 1.6555035906673434, - "learning_rate": 1.4764961254502639e-06, - "loss": 1.1991, - "step": 2668 - }, - { - "epoch": 0.3618247136175693, - "grad_norm": 1.8525512304244152, - "learning_rate": 1.47610999975106e-06, - "loss": 1.1596, - "step": 2669 - }, - { - "epoch": 0.36196027926523416, - "grad_norm": 1.7768786227129278, - "learning_rate": 1.4757237822378009e-06, - "loss": 1.1602, - "step": 2670 - }, - { - "epoch": 0.3620958449128991, - "grad_norm": 3.939037369301901, - "learning_rate": 1.4753374729849656e-06, - "loss": 1.1899, - "step": 2671 - }, - { - "epoch": 0.36223141056056396, - "grad_norm": 1.7575313204059744, - "learning_rate": 1.4749510720670503e-06, - "loss": 1.1343, - "step": 2672 - }, - { - "epoch": 0.36236697620822883, - "grad_norm": 1.578567985133941, - "learning_rate": 1.47456457955857e-06, - "loss": 1.1443, - "step": 2673 - }, - { - "epoch": 0.3625025418558937, - "grad_norm": 1.5396381306932578, - "learning_rate": 1.4741779955340565e-06, - "loss": 1.1262, - "step": 2674 - }, - { - "epoch": 0.36263810750355857, - "grad_norm": 2.8833840762682477, - "learning_rate": 1.4737913200680596e-06, - "loss": 1.1393, - "step": 2675 - }, - { - "epoch": 0.3627736731512235, - "grad_norm": 1.7680226556399328, - "learning_rate": 1.4734045532351463e-06, - "loss": 1.2197, - "step": 2676 - }, - { - "epoch": 0.36290923879888837, - "grad_norm": 1.7081557341720084, - "learning_rate": 1.473017695109902e-06, - "loss": 1.1425, - "step": 2677 - }, - { - "epoch": 0.36304480444655324, - "grad_norm": 1.8734224000525042, - "learning_rate": 1.472630745766929e-06, - "loss": 1.1902, - "step": 2678 - }, - { - "epoch": 0.3631803700942181, - "grad_norm": 2.293544196026003, - "learning_rate": 1.4722437052808472e-06, - "loss": 1.1724, - "step": 2679 - }, - { - "epoch": 0.36331593574188303, - "grad_norm": 1.6461253430764882, - "learning_rate": 1.4718565737262945e-06, - "loss": 1.1533, - "step": 2680 - }, - { - "epoch": 0.3634515013895479, - "grad_norm": 1.7012907618929891, - "learning_rate": 1.4714693511779262e-06, - "loss": 1.1618, - "step": 2681 - }, - { - "epoch": 0.3635870670372128, - "grad_norm": 1.6192203253896096, - "learning_rate": 1.471082037710415e-06, - "loss": 1.1833, - "step": 2682 - }, - { - "epoch": 0.36372263268487764, - "grad_norm": 1.4083848894603994, - "learning_rate": 1.4706946333984514e-06, - "loss": 1.1541, - "step": 2683 - }, - { - "epoch": 0.3638581983325425, - "grad_norm": 1.540624138247834, - "learning_rate": 1.4703071383167433e-06, - "loss": 1.1601, - "step": 2684 - }, - { - "epoch": 0.36399376398020744, - "grad_norm": 2.247881257183708, - "learning_rate": 1.4699195525400158e-06, - "loss": 1.1798, - "step": 2685 - }, - { - "epoch": 0.3641293296278723, - "grad_norm": 1.5638328371116674, - "learning_rate": 1.469531876143012e-06, - "loss": 1.1931, - "step": 2686 - }, - { - "epoch": 0.3642648952755372, - "grad_norm": 1.6333906232835949, - "learning_rate": 1.4691441092004921e-06, - "loss": 1.1425, - "step": 2687 - }, - { - "epoch": 0.36440046092320205, - "grad_norm": 1.6152983902891416, - "learning_rate": 1.4687562517872342e-06, - "loss": 1.1752, - "step": 2688 - }, - { - "epoch": 0.3645360265708669, - "grad_norm": 1.8313478628575959, - "learning_rate": 1.4683683039780328e-06, - "loss": 1.1909, - "step": 2689 - }, - { - "epoch": 0.36467159221853185, - "grad_norm": 2.679128731466459, - "learning_rate": 1.4679802658477013e-06, - "loss": 1.1697, - "step": 2690 - }, - { - "epoch": 0.3648071578661967, - "grad_norm": 6.753283924530054, - "learning_rate": 1.4675921374710696e-06, - "loss": 1.1629, - "step": 2691 - }, - { - "epoch": 0.3649427235138616, - "grad_norm": 1.4216753453823174, - "learning_rate": 1.467203918922985e-06, - "loss": 1.1626, - "step": 2692 - }, - { - "epoch": 0.36507828916152646, - "grad_norm": 1.9760819969022407, - "learning_rate": 1.4668156102783125e-06, - "loss": 1.1352, - "step": 2693 - }, - { - "epoch": 0.36521385480919133, - "grad_norm": 1.6073711014251932, - "learning_rate": 1.4664272116119345e-06, - "loss": 1.1484, - "step": 2694 - }, - { - "epoch": 0.36534942045685626, - "grad_norm": 1.6345763109718703, - "learning_rate": 1.4660387229987504e-06, - "loss": 1.1306, - "step": 2695 - }, - { - "epoch": 0.3654849861045211, - "grad_norm": 2.4928566282945708, - "learning_rate": 1.4656501445136774e-06, - "loss": 1.193, - "step": 2696 - }, - { - "epoch": 0.365620551752186, - "grad_norm": 2.1972041566387652, - "learning_rate": 1.4652614762316495e-06, - "loss": 1.1321, - "step": 2697 - }, - { - "epoch": 0.36575611739985087, - "grad_norm": 1.4602277677590003, - "learning_rate": 1.4648727182276186e-06, - "loss": 1.1898, - "step": 2698 - }, - { - "epoch": 0.36589168304751574, - "grad_norm": 1.401122869535631, - "learning_rate": 1.4644838705765534e-06, - "loss": 1.1607, - "step": 2699 - }, - { - "epoch": 0.36602724869518066, - "grad_norm": 1.549340835051294, - "learning_rate": 1.46409493335344e-06, - "loss": 1.1585, - "step": 2700 - }, - { - "epoch": 0.36616281434284553, - "grad_norm": 2.083453949802379, - "learning_rate": 1.4637059066332824e-06, - "loss": 1.1817, - "step": 2701 - }, - { - "epoch": 0.3662983799905104, - "grad_norm": 1.5353936920011488, - "learning_rate": 1.4633167904911008e-06, - "loss": 1.1849, - "step": 2702 - }, - { - "epoch": 0.3664339456381753, - "grad_norm": 1.4694783857342484, - "learning_rate": 1.4629275850019336e-06, - "loss": 1.167, - "step": 2703 - }, - { - "epoch": 0.36656951128584014, - "grad_norm": 1.8538216918000776, - "learning_rate": 1.4625382902408354e-06, - "loss": 1.1791, - "step": 2704 - }, - { - "epoch": 0.36670507693350507, - "grad_norm": 1.987606862086196, - "learning_rate": 1.4621489062828788e-06, - "loss": 1.1516, - "step": 2705 - }, - { - "epoch": 0.36684064258116994, - "grad_norm": 1.678344312207345, - "learning_rate": 1.461759433203154e-06, - "loss": 1.1449, - "step": 2706 - }, - { - "epoch": 0.3669762082288348, - "grad_norm": 1.7523857112068422, - "learning_rate": 1.4613698710767674e-06, - "loss": 1.1699, - "step": 2707 - }, - { - "epoch": 0.3671117738764997, - "grad_norm": 1.4245468151012477, - "learning_rate": 1.4609802199788427e-06, - "loss": 1.1605, - "step": 2708 - }, - { - "epoch": 0.36724733952416455, - "grad_norm": 1.5498498574434827, - "learning_rate": 1.4605904799845218e-06, - "loss": 1.1512, - "step": 2709 - }, - { - "epoch": 0.3673829051718295, - "grad_norm": 1.6221057289562324, - "learning_rate": 1.4602006511689623e-06, - "loss": 1.1799, - "step": 2710 - }, - { - "epoch": 0.36751847081949435, - "grad_norm": 1.560958347284891, - "learning_rate": 1.4598107336073396e-06, - "loss": 1.1352, - "step": 2711 - }, - { - "epoch": 0.3676540364671592, - "grad_norm": 2.0584691154729886, - "learning_rate": 1.4594207273748467e-06, - "loss": 1.1442, - "step": 2712 - }, - { - "epoch": 0.3677896021148241, - "grad_norm": 1.570543900221731, - "learning_rate": 1.459030632546693e-06, - "loss": 1.1917, - "step": 2713 - }, - { - "epoch": 0.36792516776248896, - "grad_norm": 1.7043370759474203, - "learning_rate": 1.458640449198105e-06, - "loss": 1.1886, - "step": 2714 - }, - { - "epoch": 0.3680607334101539, - "grad_norm": 1.5445081300652688, - "learning_rate": 1.4582501774043268e-06, - "loss": 1.2099, - "step": 2715 - }, - { - "epoch": 0.36819629905781875, - "grad_norm": 1.550816099092105, - "learning_rate": 1.4578598172406189e-06, - "loss": 1.1643, - "step": 2716 - }, - { - "epoch": 0.3683318647054836, - "grad_norm": 2.739707662416962, - "learning_rate": 1.4574693687822594e-06, - "loss": 1.1648, - "step": 2717 - }, - { - "epoch": 0.3684674303531485, - "grad_norm": 2.3007387096578467, - "learning_rate": 1.4570788321045432e-06, - "loss": 1.1352, - "step": 2718 - }, - { - "epoch": 0.3686029960008134, - "grad_norm": 1.625210015977814, - "learning_rate": 1.4566882072827824e-06, - "loss": 1.1404, - "step": 2719 - }, - { - "epoch": 0.3687385616484783, - "grad_norm": 1.3942560086227505, - "learning_rate": 1.4562974943923054e-06, - "loss": 1.1996, - "step": 2720 - }, - { - "epoch": 0.36887412729614316, - "grad_norm": 1.6253800163027943, - "learning_rate": 1.4559066935084588e-06, - "loss": 1.1483, - "step": 2721 - }, - { - "epoch": 0.36900969294380803, - "grad_norm": 1.4809094884735345, - "learning_rate": 1.4555158047066047e-06, - "loss": 1.1728, - "step": 2722 - }, - { - "epoch": 0.3691452585914729, - "grad_norm": 1.7007893145949957, - "learning_rate": 1.4551248280621234e-06, - "loss": 1.1505, - "step": 2723 - }, - { - "epoch": 0.36928082423913783, - "grad_norm": 3.170640423971189, - "learning_rate": 1.4547337636504116e-06, - "loss": 1.1973, - "step": 2724 - }, - { - "epoch": 0.3694163898868027, - "grad_norm": 1.9667372866432289, - "learning_rate": 1.4543426115468829e-06, - "loss": 1.1352, - "step": 2725 - }, - { - "epoch": 0.36955195553446757, - "grad_norm": 1.4645855255181286, - "learning_rate": 1.453951371826968e-06, - "loss": 1.1382, - "step": 2726 - }, - { - "epoch": 0.36968752118213244, - "grad_norm": 1.611839684995952, - "learning_rate": 1.4535600445661143e-06, - "loss": 1.2117, - "step": 2727 - }, - { - "epoch": 0.3698230868297973, - "grad_norm": 6.521376993789329, - "learning_rate": 1.453168629839786e-06, - "loss": 1.1844, - "step": 2728 - }, - { - "epoch": 0.36995865247746224, - "grad_norm": 1.6457127001467826, - "learning_rate": 1.4527771277234648e-06, - "loss": 1.2082, - "step": 2729 - }, - { - "epoch": 0.3700942181251271, - "grad_norm": 1.6601472053421304, - "learning_rate": 1.4523855382926483e-06, - "loss": 1.2099, - "step": 2730 - }, - { - "epoch": 0.370229783772792, - "grad_norm": 1.5007357714298144, - "learning_rate": 1.4519938616228518e-06, - "loss": 1.1591, - "step": 2731 - }, - { - "epoch": 0.37036534942045685, - "grad_norm": 1.6830521179323668, - "learning_rate": 1.4516020977896067e-06, - "loss": 1.1685, - "step": 2732 - }, - { - "epoch": 0.3705009150681217, - "grad_norm": 1.6553329746508367, - "learning_rate": 1.4512102468684621e-06, - "loss": 1.1799, - "step": 2733 - }, - { - "epoch": 0.37063648071578664, - "grad_norm": 1.4562088837869065, - "learning_rate": 1.4508183089349828e-06, - "loss": 1.1496, - "step": 2734 - }, - { - "epoch": 0.3707720463634515, - "grad_norm": 1.617960202713426, - "learning_rate": 1.4504262840647512e-06, - "loss": 1.1275, - "step": 2735 - }, - { - "epoch": 0.3709076120111164, - "grad_norm": 1.4177137596930662, - "learning_rate": 1.4500341723333663e-06, - "loss": 1.1855, - "step": 2736 - }, - { - "epoch": 0.37104317765878125, - "grad_norm": 1.3988461259865945, - "learning_rate": 1.4496419738164434e-06, - "loss": 1.1586, - "step": 2737 - }, - { - "epoch": 0.3711787433064461, - "grad_norm": 2.521803783387221, - "learning_rate": 1.449249688589615e-06, - "loss": 1.1288, - "step": 2738 - }, - { - "epoch": 0.37131430895411105, - "grad_norm": 2.194761253198728, - "learning_rate": 1.4488573167285307e-06, - "loss": 1.1664, - "step": 2739 - }, - { - "epoch": 0.3714498746017759, - "grad_norm": 1.864805239577867, - "learning_rate": 1.448464858308856e-06, - "loss": 1.178, - "step": 2740 - }, - { - "epoch": 0.3715854402494408, - "grad_norm": 1.7336757709708022, - "learning_rate": 1.4480723134062732e-06, - "loss": 1.1279, - "step": 2741 - }, - { - "epoch": 0.37172100589710566, - "grad_norm": 2.2076014722787325, - "learning_rate": 1.4476796820964814e-06, - "loss": 1.1564, - "step": 2742 - }, - { - "epoch": 0.37185657154477053, - "grad_norm": 2.2552869483442586, - "learning_rate": 1.4472869644551966e-06, - "loss": 1.1697, - "step": 2743 - }, - { - "epoch": 0.37199213719243546, - "grad_norm": 1.4144394525579016, - "learning_rate": 1.4468941605581518e-06, - "loss": 1.1388, - "step": 2744 - }, - { - "epoch": 0.3721277028401003, - "grad_norm": 2.385013353517025, - "learning_rate": 1.4465012704810952e-06, - "loss": 1.1782, - "step": 2745 - }, - { - "epoch": 0.3722632684877652, - "grad_norm": 1.7620320118311337, - "learning_rate": 1.4461082942997936e-06, - "loss": 1.1664, - "step": 2746 - }, - { - "epoch": 0.37239883413543007, - "grad_norm": 1.9821329333901359, - "learning_rate": 1.4457152320900283e-06, - "loss": 1.1646, - "step": 2747 - }, - { - "epoch": 0.37253439978309494, - "grad_norm": 1.6355533646171287, - "learning_rate": 1.445322083927599e-06, - "loss": 1.1546, - "step": 2748 - }, - { - "epoch": 0.37266996543075986, - "grad_norm": 1.4563067611909064, - "learning_rate": 1.444928849888321e-06, - "loss": 1.144, - "step": 2749 - }, - { - "epoch": 0.37280553107842473, - "grad_norm": 2.066800687620252, - "learning_rate": 1.4445355300480262e-06, - "loss": 1.105, - "step": 2750 - }, - { - "epoch": 0.3729410967260896, - "grad_norm": 1.5970715835534157, - "learning_rate": 1.4441421244825636e-06, - "loss": 1.1606, - "step": 2751 - }, - { - "epoch": 0.3730766623737545, - "grad_norm": 1.5288751793718627, - "learning_rate": 1.443748633267798e-06, - "loss": 1.179, - "step": 2752 - }, - { - "epoch": 0.37321222802141935, - "grad_norm": 1.4850752210606024, - "learning_rate": 1.443355056479611e-06, - "loss": 1.1857, - "step": 2753 - }, - { - "epoch": 0.37334779366908427, - "grad_norm": 2.999854428786684, - "learning_rate": 1.4429613941939016e-06, - "loss": 1.1613, - "step": 2754 - }, - { - "epoch": 0.37348335931674914, - "grad_norm": 1.5169384826054657, - "learning_rate": 1.4425676464865835e-06, - "loss": 1.2191, - "step": 2755 - }, - { - "epoch": 0.373618924964414, - "grad_norm": 1.8646520958786716, - "learning_rate": 1.442173813433588e-06, - "loss": 1.1693, - "step": 2756 - }, - { - "epoch": 0.3737544906120789, - "grad_norm": 2.6900789268856564, - "learning_rate": 1.4417798951108632e-06, - "loss": 1.2018, - "step": 2757 - }, - { - "epoch": 0.3738900562597438, - "grad_norm": 1.498842219490377, - "learning_rate": 1.4413858915943728e-06, - "loss": 1.1784, - "step": 2758 - }, - { - "epoch": 0.3740256219074087, - "grad_norm": 1.4582280748645529, - "learning_rate": 1.4409918029600972e-06, - "loss": 1.1986, - "step": 2759 - }, - { - "epoch": 0.37416118755507355, - "grad_norm": 2.802451553917144, - "learning_rate": 1.4405976292840332e-06, - "loss": 1.1657, - "step": 2760 - }, - { - "epoch": 0.3742967532027384, - "grad_norm": 1.6250789091739706, - "learning_rate": 1.4402033706421945e-06, - "loss": 1.1343, - "step": 2761 - }, - { - "epoch": 0.3744323188504033, - "grad_norm": 1.545422975223051, - "learning_rate": 1.4398090271106104e-06, - "loss": 1.1815, - "step": 2762 - }, - { - "epoch": 0.3745678844980682, - "grad_norm": 2.0476824632080595, - "learning_rate": 1.4394145987653272e-06, - "loss": 1.1729, - "step": 2763 - }, - { - "epoch": 0.3747034501457331, - "grad_norm": 1.5275166535597073, - "learning_rate": 1.4390200856824072e-06, - "loss": 1.1519, - "step": 2764 - }, - { - "epoch": 0.37483901579339796, - "grad_norm": 2.281607861973894, - "learning_rate": 1.438625487937929e-06, - "loss": 1.2188, - "step": 2765 - }, - { - "epoch": 0.3749745814410628, - "grad_norm": 1.3657368675486452, - "learning_rate": 1.4382308056079876e-06, - "loss": 1.1518, - "step": 2766 - }, - { - "epoch": 0.3751101470887277, - "grad_norm": 1.4402786953655446, - "learning_rate": 1.4378360387686948e-06, - "loss": 1.1871, - "step": 2767 - }, - { - "epoch": 0.3752457127363926, - "grad_norm": 1.507309898553813, - "learning_rate": 1.4374411874961777e-06, - "loss": 1.1639, - "step": 2768 - }, - { - "epoch": 0.3753812783840575, - "grad_norm": 1.3346213048229865, - "learning_rate": 1.437046251866581e-06, - "loss": 1.1583, - "step": 2769 - }, - { - "epoch": 0.37551684403172236, - "grad_norm": 1.8142667398115502, - "learning_rate": 1.436651231956064e-06, - "loss": 1.1631, - "step": 2770 - }, - { - "epoch": 0.37565240967938723, - "grad_norm": 1.3890426144393107, - "learning_rate": 1.4362561278408038e-06, - "loss": 1.1875, - "step": 2771 - }, - { - "epoch": 0.3757879753270521, - "grad_norm": 1.5638339865458988, - "learning_rate": 1.435860939596993e-06, - "loss": 1.1867, - "step": 2772 - }, - { - "epoch": 0.37592354097471703, - "grad_norm": 1.7711571853676347, - "learning_rate": 1.43546566730084e-06, - "loss": 1.1247, - "step": 2773 - }, - { - "epoch": 0.3760591066223819, - "grad_norm": 1.766265656206971, - "learning_rate": 1.4350703110285709e-06, - "loss": 1.1743, - "step": 2774 - }, - { - "epoch": 0.37619467227004677, - "grad_norm": 1.5642211881676202, - "learning_rate": 1.4346748708564264e-06, - "loss": 1.1943, - "step": 2775 - }, - { - "epoch": 0.37633023791771164, - "grad_norm": 2.047967602826698, - "learning_rate": 1.4342793468606643e-06, - "loss": 1.1559, - "step": 2776 - }, - { - "epoch": 0.3764658035653765, - "grad_norm": 2.006996651248735, - "learning_rate": 1.433883739117558e-06, - "loss": 1.1308, - "step": 2777 - }, - { - "epoch": 0.37660136921304144, - "grad_norm": 1.5186452992241666, - "learning_rate": 1.4334880477033976e-06, - "loss": 1.1715, - "step": 2778 - }, - { - "epoch": 0.3767369348607063, - "grad_norm": 1.6607362567734327, - "learning_rate": 1.4330922726944889e-06, - "loss": 1.1301, - "step": 2779 - }, - { - "epoch": 0.3768725005083712, - "grad_norm": 1.5453222295281377, - "learning_rate": 1.432696414167154e-06, - "loss": 1.2095, - "step": 2780 - }, - { - "epoch": 0.37700806615603605, - "grad_norm": 1.792396594101511, - "learning_rate": 1.4323004721977312e-06, - "loss": 1.1287, - "step": 2781 - }, - { - "epoch": 0.3771436318037009, - "grad_norm": 3.1082710768684962, - "learning_rate": 1.4319044468625748e-06, - "loss": 1.1102, - "step": 2782 - }, - { - "epoch": 0.37727919745136584, - "grad_norm": 1.9713543794933754, - "learning_rate": 1.4315083382380552e-06, - "loss": 1.1734, - "step": 2783 - }, - { - "epoch": 0.3774147630990307, - "grad_norm": 1.7442480204491544, - "learning_rate": 1.4311121464005582e-06, - "loss": 1.1717, - "step": 2784 - }, - { - "epoch": 0.3775503287466956, - "grad_norm": 1.5122265404592905, - "learning_rate": 1.430715871426487e-06, - "loss": 1.1914, - "step": 2785 - }, - { - "epoch": 0.37768589439436046, - "grad_norm": 1.5204944970032126, - "learning_rate": 1.43031951339226e-06, - "loss": 1.1836, - "step": 2786 - }, - { - "epoch": 0.3778214600420253, - "grad_norm": 1.9529502867331507, - "learning_rate": 1.4299230723743112e-06, - "loss": 1.2126, - "step": 2787 - }, - { - "epoch": 0.37795702568969025, - "grad_norm": 1.778852281482099, - "learning_rate": 1.4295265484490918e-06, - "loss": 1.1605, - "step": 2788 - }, - { - "epoch": 0.3780925913373551, - "grad_norm": 1.4864297961303838, - "learning_rate": 1.429129941693068e-06, - "loss": 1.1824, - "step": 2789 - }, - { - "epoch": 0.37822815698502, - "grad_norm": 1.5960510938654302, - "learning_rate": 1.428733252182722e-06, - "loss": 1.1389, - "step": 2790 - }, - { - "epoch": 0.37836372263268486, - "grad_norm": 1.5167815357069063, - "learning_rate": 1.4283364799945527e-06, - "loss": 1.1194, - "step": 2791 - }, - { - "epoch": 0.37849928828034973, - "grad_norm": 1.7848490350515618, - "learning_rate": 1.4279396252050747e-06, - "loss": 1.1639, - "step": 2792 - }, - { - "epoch": 0.37863485392801466, - "grad_norm": 1.3359202176708957, - "learning_rate": 1.4275426878908174e-06, - "loss": 1.1498, - "step": 2793 - }, - { - "epoch": 0.37877041957567953, - "grad_norm": 2.3674322986010736, - "learning_rate": 1.4271456681283275e-06, - "loss": 1.1603, - "step": 2794 - }, - { - "epoch": 0.3789059852233444, - "grad_norm": 1.359766048932401, - "learning_rate": 1.4267485659941676e-06, - "loss": 1.1589, - "step": 2795 - }, - { - "epoch": 0.37904155087100927, - "grad_norm": 1.5939151875662965, - "learning_rate": 1.4263513815649152e-06, - "loss": 1.1489, - "step": 2796 - }, - { - "epoch": 0.3791771165186742, - "grad_norm": 3.0727943411257757, - "learning_rate": 1.4259541149171643e-06, - "loss": 1.1617, - "step": 2797 - }, - { - "epoch": 0.37931268216633907, - "grad_norm": 3.3112961852151908, - "learning_rate": 1.4255567661275247e-06, - "loss": 1.1536, - "step": 2798 - }, - { - "epoch": 0.37944824781400394, - "grad_norm": 1.938616387147304, - "learning_rate": 1.4251593352726217e-06, - "loss": 1.1737, - "step": 2799 - }, - { - "epoch": 0.3795838134616688, - "grad_norm": 6.050452952233669, - "learning_rate": 1.4247618224290968e-06, - "loss": 1.1849, - "step": 2800 - }, - { - "epoch": 0.3797193791093337, - "grad_norm": 1.5210417409490518, - "learning_rate": 1.4243642276736076e-06, - "loss": 1.1666, - "step": 2801 - }, - { - "epoch": 0.3798549447569986, - "grad_norm": 2.062975898183793, - "learning_rate": 1.4239665510828266e-06, - "loss": 1.2055, - "step": 2802 - }, - { - "epoch": 0.3799905104046635, - "grad_norm": 1.605288343327243, - "learning_rate": 1.423568792733443e-06, - "loss": 1.1483, - "step": 2803 - }, - { - "epoch": 0.38012607605232834, - "grad_norm": 1.4507483767338765, - "learning_rate": 1.423170952702161e-06, - "loss": 1.1534, - "step": 2804 - }, - { - "epoch": 0.3802616416999932, - "grad_norm": 1.4515498793616715, - "learning_rate": 1.422773031065701e-06, - "loss": 1.1898, - "step": 2805 - }, - { - "epoch": 0.3803972073476581, - "grad_norm": 1.514980659908831, - "learning_rate": 1.4223750279007993e-06, - "loss": 1.1514, - "step": 2806 - }, - { - "epoch": 0.380532772995323, - "grad_norm": 1.556497991888125, - "learning_rate": 1.4219769432842075e-06, - "loss": 1.1817, - "step": 2807 - }, - { - "epoch": 0.3806683386429879, - "grad_norm": 1.6070197668394395, - "learning_rate": 1.4215787772926931e-06, - "loss": 1.1577, - "step": 2808 - }, - { - "epoch": 0.38080390429065275, - "grad_norm": 1.439184088336358, - "learning_rate": 1.4211805300030389e-06, - "loss": 1.132, - "step": 2809 - }, - { - "epoch": 0.3809394699383176, - "grad_norm": 1.5014660350209088, - "learning_rate": 1.4207822014920443e-06, - "loss": 1.1486, - "step": 2810 - }, - { - "epoch": 0.3810750355859825, - "grad_norm": 2.271999104279936, - "learning_rate": 1.420383791836524e-06, - "loss": 1.1726, - "step": 2811 - }, - { - "epoch": 0.3812106012336474, - "grad_norm": 1.593736085399741, - "learning_rate": 1.419985301113307e-06, - "loss": 1.1614, - "step": 2812 - }, - { - "epoch": 0.3813461668813123, - "grad_norm": 1.573277696128947, - "learning_rate": 1.4195867293992405e-06, - "loss": 1.1839, - "step": 2813 - }, - { - "epoch": 0.38148173252897716, - "grad_norm": 1.4693870652003027, - "learning_rate": 1.419188076771185e-06, - "loss": 1.1818, - "step": 2814 - }, - { - "epoch": 0.38161729817664203, - "grad_norm": 1.9838664104176267, - "learning_rate": 1.4187893433060176e-06, - "loss": 1.1466, - "step": 2815 - }, - { - "epoch": 0.3817528638243069, - "grad_norm": 1.6339054825705612, - "learning_rate": 1.4183905290806313e-06, - "loss": 1.1721, - "step": 2816 - }, - { - "epoch": 0.3818884294719718, - "grad_norm": 1.771101620208098, - "learning_rate": 1.4179916341719339e-06, - "loss": 1.1665, - "step": 2817 - }, - { - "epoch": 0.3820239951196367, - "grad_norm": 1.4680352031402837, - "learning_rate": 1.4175926586568493e-06, - "loss": 1.1578, - "step": 2818 - }, - { - "epoch": 0.38215956076730156, - "grad_norm": 1.8037288665695252, - "learning_rate": 1.4171936026123168e-06, - "loss": 1.1694, - "step": 2819 - }, - { - "epoch": 0.38229512641496644, - "grad_norm": 1.6703585960298812, - "learning_rate": 1.4167944661152911e-06, - "loss": 1.1833, - "step": 2820 - }, - { - "epoch": 0.3824306920626313, - "grad_norm": 1.9509561287145525, - "learning_rate": 1.4163952492427424e-06, - "loss": 1.1768, - "step": 2821 - }, - { - "epoch": 0.38256625771029623, - "grad_norm": 1.5296067377296414, - "learning_rate": 1.415995952071657e-06, - "loss": 1.1926, - "step": 2822 - }, - { - "epoch": 0.3827018233579611, - "grad_norm": 1.5787528569878315, - "learning_rate": 1.415596574679036e-06, - "loss": 1.1582, - "step": 2823 - }, - { - "epoch": 0.38283738900562597, - "grad_norm": 1.4771473276512048, - "learning_rate": 1.4151971171418959e-06, - "loss": 1.1471, - "step": 2824 - }, - { - "epoch": 0.38297295465329084, - "grad_norm": 1.462319358778104, - "learning_rate": 1.4147975795372694e-06, - "loss": 1.1279, - "step": 2825 - }, - { - "epoch": 0.3831085203009557, - "grad_norm": 2.272576307997905, - "learning_rate": 1.4143979619422035e-06, - "loss": 1.1804, - "step": 2826 - }, - { - "epoch": 0.38324408594862064, - "grad_norm": 1.3750311165685134, - "learning_rate": 1.4139982644337617e-06, - "loss": 1.1548, - "step": 2827 - }, - { - "epoch": 0.3833796515962855, - "grad_norm": 1.5630439347123788, - "learning_rate": 1.4135984870890228e-06, - "loss": 1.112, - "step": 2828 - }, - { - "epoch": 0.3835152172439504, - "grad_norm": 1.6064647658951947, - "learning_rate": 1.4131986299850803e-06, - "loss": 1.2068, - "step": 2829 - }, - { - "epoch": 0.38365078289161525, - "grad_norm": 1.6814441809724268, - "learning_rate": 1.4127986931990437e-06, - "loss": 1.1124, - "step": 2830 - }, - { - "epoch": 0.3837863485392801, - "grad_norm": 1.659809146381489, - "learning_rate": 1.4123986768080375e-06, - "loss": 1.1398, - "step": 2831 - }, - { - "epoch": 0.38392191418694505, - "grad_norm": 1.739252464929412, - "learning_rate": 1.4119985808892016e-06, - "loss": 1.1957, - "step": 2832 - }, - { - "epoch": 0.3840574798346099, - "grad_norm": 1.7205162995875132, - "learning_rate": 1.4115984055196918e-06, - "loss": 1.1416, - "step": 2833 - }, - { - "epoch": 0.3841930454822748, - "grad_norm": 2.419135482554706, - "learning_rate": 1.4111981507766782e-06, - "loss": 1.1631, - "step": 2834 - }, - { - "epoch": 0.38432861112993966, - "grad_norm": 2.430771888359287, - "learning_rate": 1.4107978167373469e-06, - "loss": 1.1893, - "step": 2835 - }, - { - "epoch": 0.3844641767776046, - "grad_norm": 1.4687859092262687, - "learning_rate": 1.4103974034788994e-06, - "loss": 1.1398, - "step": 2836 - }, - { - "epoch": 0.38459974242526945, - "grad_norm": 1.5287193327902664, - "learning_rate": 1.4099969110785521e-06, - "loss": 1.1674, - "step": 2837 - }, - { - "epoch": 0.3847353080729343, - "grad_norm": 1.663613394681787, - "learning_rate": 1.409596339613537e-06, - "loss": 1.1683, - "step": 2838 - }, - { - "epoch": 0.3848708737205992, - "grad_norm": 1.683140308729507, - "learning_rate": 1.409195689161101e-06, - "loss": 1.1778, - "step": 2839 - }, - { - "epoch": 0.38500643936826406, - "grad_norm": 1.55034548735741, - "learning_rate": 1.4087949597985062e-06, - "loss": 1.1898, - "step": 2840 - }, - { - "epoch": 0.385142005015929, - "grad_norm": 2.1240340575140007, - "learning_rate": 1.4083941516030303e-06, - "loss": 1.1963, - "step": 2841 - }, - { - "epoch": 0.38527757066359386, - "grad_norm": 1.6122656313664805, - "learning_rate": 1.407993264651966e-06, - "loss": 1.139, - "step": 2842 - }, - { - "epoch": 0.38541313631125873, - "grad_norm": 1.7973892877832507, - "learning_rate": 1.4075922990226209e-06, - "loss": 1.1571, - "step": 2843 - }, - { - "epoch": 0.3855487019589236, - "grad_norm": 1.5595611002580205, - "learning_rate": 1.407191254792318e-06, - "loss": 1.1208, - "step": 2844 - }, - { - "epoch": 0.38568426760658847, - "grad_norm": 1.7303972267487238, - "learning_rate": 1.4067901320383962e-06, - "loss": 1.1401, - "step": 2845 - }, - { - "epoch": 0.3858198332542534, - "grad_norm": 1.4602619799089978, - "learning_rate": 1.4063889308382084e-06, - "loss": 1.1356, - "step": 2846 - }, - { - "epoch": 0.38595539890191827, - "grad_norm": 5.912755060816053, - "learning_rate": 1.405987651269123e-06, - "loss": 1.1651, - "step": 2847 - }, - { - "epoch": 0.38609096454958314, - "grad_norm": 1.468187479131227, - "learning_rate": 1.4055862934085239e-06, - "loss": 1.1908, - "step": 2848 - }, - { - "epoch": 0.386226530197248, - "grad_norm": 2.032308973452589, - "learning_rate": 1.4051848573338095e-06, - "loss": 1.1506, - "step": 2849 - }, - { - "epoch": 0.3863620958449129, - "grad_norm": 1.9065344099718349, - "learning_rate": 1.4047833431223936e-06, - "loss": 1.1346, - "step": 2850 - }, - { - "epoch": 0.3864976614925778, - "grad_norm": 1.5646153956305318, - "learning_rate": 1.4043817508517053e-06, - "loss": 1.2145, - "step": 2851 - }, - { - "epoch": 0.3866332271402427, - "grad_norm": 2.3350765558369537, - "learning_rate": 1.4039800805991883e-06, - "loss": 1.1438, - "step": 2852 - }, - { - "epoch": 0.38676879278790754, - "grad_norm": 1.960640282858717, - "learning_rate": 1.403578332442302e-06, - "loss": 1.1888, - "step": 2853 - }, - { - "epoch": 0.3869043584355724, - "grad_norm": 1.722859454804138, - "learning_rate": 1.4031765064585196e-06, - "loss": 1.2039, - "step": 2854 - }, - { - "epoch": 0.3870399240832373, - "grad_norm": 4.763504354624624, - "learning_rate": 1.4027746027253301e-06, - "loss": 1.1486, - "step": 2855 - }, - { - "epoch": 0.3871754897309022, - "grad_norm": 1.953915042025117, - "learning_rate": 1.402372621320238e-06, - "loss": 1.2267, - "step": 2856 - }, - { - "epoch": 0.3873110553785671, - "grad_norm": 1.7048647882690227, - "learning_rate": 1.401970562320762e-06, - "loss": 1.1844, - "step": 2857 - }, - { - "epoch": 0.38744662102623195, - "grad_norm": 1.5869268974344535, - "learning_rate": 1.4015684258044363e-06, - "loss": 1.1266, - "step": 2858 - }, - { - "epoch": 0.3875821866738968, - "grad_norm": 2.1777112685571196, - "learning_rate": 1.401166211848809e-06, - "loss": 1.1201, - "step": 2859 - }, - { - "epoch": 0.3877177523215617, - "grad_norm": 1.5964885574440972, - "learning_rate": 1.4007639205314448e-06, - "loss": 1.1492, - "step": 2860 - }, - { - "epoch": 0.3878533179692266, - "grad_norm": 1.557816694305163, - "learning_rate": 1.4003615519299216e-06, - "loss": 1.1376, - "step": 2861 - }, - { - "epoch": 0.3879888836168915, - "grad_norm": 1.6541381384829568, - "learning_rate": 1.3999591061218334e-06, - "loss": 1.1471, - "step": 2862 - }, - { - "epoch": 0.38812444926455636, - "grad_norm": 1.4192962732636445, - "learning_rate": 1.399556583184789e-06, - "loss": 1.1747, - "step": 2863 - }, - { - "epoch": 0.38826001491222123, - "grad_norm": 1.8912671716612806, - "learning_rate": 1.3991539831964114e-06, - "loss": 1.1209, - "step": 2864 - }, - { - "epoch": 0.3883955805598861, - "grad_norm": 1.851004432886314, - "learning_rate": 1.3987513062343385e-06, - "loss": 1.1865, - "step": 2865 - }, - { - "epoch": 0.388531146207551, - "grad_norm": 3.714269895667919, - "learning_rate": 1.3983485523762243e-06, - "loss": 1.1715, - "step": 2866 - }, - { - "epoch": 0.3886667118552159, - "grad_norm": 1.7590417170206345, - "learning_rate": 1.3979457216997358e-06, - "loss": 1.1667, - "step": 2867 - }, - { - "epoch": 0.38880227750288077, - "grad_norm": 1.53919376052351, - "learning_rate": 1.397542814282556e-06, - "loss": 1.1451, - "step": 2868 - }, - { - "epoch": 0.38893784315054564, - "grad_norm": 2.1993843749852506, - "learning_rate": 1.3971398302023824e-06, - "loss": 1.1583, - "step": 2869 - }, - { - "epoch": 0.3890734087982105, - "grad_norm": 1.6972561291139334, - "learning_rate": 1.3967367695369276e-06, - "loss": 1.1885, - "step": 2870 - }, - { - "epoch": 0.38920897444587543, - "grad_norm": 1.6110741947190144, - "learning_rate": 1.3963336323639183e-06, - "loss": 1.1874, - "step": 2871 - }, - { - "epoch": 0.3893445400935403, - "grad_norm": 2.4217238783890216, - "learning_rate": 1.3959304187610967e-06, - "loss": 1.1262, - "step": 2872 - }, - { - "epoch": 0.3894801057412052, - "grad_norm": 3.6583943371089007, - "learning_rate": 1.3955271288062188e-06, - "loss": 1.1768, - "step": 2873 - }, - { - "epoch": 0.38961567138887004, - "grad_norm": 1.7348174040132258, - "learning_rate": 1.3951237625770564e-06, - "loss": 1.1436, - "step": 2874 - }, - { - "epoch": 0.3897512370365349, - "grad_norm": 2.2319838188109054, - "learning_rate": 1.3947203201513953e-06, - "loss": 1.167, - "step": 2875 - }, - { - "epoch": 0.38988680268419984, - "grad_norm": 1.3440004308366116, - "learning_rate": 1.3943168016070361e-06, - "loss": 1.1587, - "step": 2876 - }, - { - "epoch": 0.3900223683318647, - "grad_norm": 1.6829786304331738, - "learning_rate": 1.3939132070217942e-06, - "loss": 1.1479, - "step": 2877 - }, - { - "epoch": 0.3901579339795296, - "grad_norm": 1.5602532432458818, - "learning_rate": 1.3935095364734998e-06, - "loss": 1.1644, - "step": 2878 - }, - { - "epoch": 0.39029349962719445, - "grad_norm": 2.0652739771439044, - "learning_rate": 1.3931057900399976e-06, - "loss": 1.1777, - "step": 2879 - }, - { - "epoch": 0.3904290652748594, - "grad_norm": 1.6789162853314994, - "learning_rate": 1.3927019677991466e-06, - "loss": 1.1767, - "step": 2880 - }, - { - "epoch": 0.39056463092252425, - "grad_norm": 1.4786200085944008, - "learning_rate": 1.3922980698288212e-06, - "loss": 1.1443, - "step": 2881 - }, - { - "epoch": 0.3907001965701891, - "grad_norm": 1.6133007358015177, - "learning_rate": 1.3918940962069093e-06, - "loss": 1.2077, - "step": 2882 - }, - { - "epoch": 0.390835762217854, - "grad_norm": 2.077044504825278, - "learning_rate": 1.3914900470113144e-06, - "loss": 1.1566, - "step": 2883 - }, - { - "epoch": 0.39097132786551886, - "grad_norm": 1.5059348100292025, - "learning_rate": 1.3910859223199545e-06, - "loss": 1.182, - "step": 2884 - }, - { - "epoch": 0.3911068935131838, - "grad_norm": 1.6441667476776818, - "learning_rate": 1.3906817222107611e-06, - "loss": 1.1853, - "step": 2885 - }, - { - "epoch": 0.39124245916084865, - "grad_norm": 1.6273875730492553, - "learning_rate": 1.3902774467616817e-06, - "loss": 1.1535, - "step": 2886 - }, - { - "epoch": 0.3913780248085135, - "grad_norm": 2.1041340297572857, - "learning_rate": 1.3898730960506772e-06, - "loss": 1.193, - "step": 2887 - }, - { - "epoch": 0.3915135904561784, - "grad_norm": 2.3837881528394416, - "learning_rate": 1.3894686701557237e-06, - "loss": 1.1562, - "step": 2888 - }, - { - "epoch": 0.39164915610384327, - "grad_norm": 2.5358096117213336, - "learning_rate": 1.3890641691548113e-06, - "loss": 1.1878, - "step": 2889 - }, - { - "epoch": 0.3917847217515082, - "grad_norm": 1.769766446914845, - "learning_rate": 1.3886595931259451e-06, - "loss": 1.1568, - "step": 2890 - }, - { - "epoch": 0.39192028739917306, - "grad_norm": 1.697012730275507, - "learning_rate": 1.3882549421471442e-06, - "loss": 1.1964, - "step": 2891 - }, - { - "epoch": 0.39205585304683793, - "grad_norm": 2.008216127379292, - "learning_rate": 1.3878502162964422e-06, - "loss": 1.1401, - "step": 2892 - }, - { - "epoch": 0.3921914186945028, - "grad_norm": 2.131718553934897, - "learning_rate": 1.3874454156518877e-06, - "loss": 1.1467, - "step": 2893 - }, - { - "epoch": 0.3923269843421677, - "grad_norm": 1.8073738374387083, - "learning_rate": 1.3870405402915436e-06, - "loss": 1.1843, - "step": 2894 - }, - { - "epoch": 0.3924625499898326, - "grad_norm": 1.7597850311135184, - "learning_rate": 1.3866355902934856e-06, - "loss": 1.1494, - "step": 2895 - }, - { - "epoch": 0.39259811563749747, - "grad_norm": 1.7196738852766922, - "learning_rate": 1.3862305657358065e-06, - "loss": 1.1617, - "step": 2896 - }, - { - "epoch": 0.39273368128516234, - "grad_norm": 1.620564139548848, - "learning_rate": 1.385825466696611e-06, - "loss": 1.1435, - "step": 2897 - }, - { - "epoch": 0.3928692469328272, - "grad_norm": 1.5533674435527773, - "learning_rate": 1.3854202932540202e-06, - "loss": 1.1689, - "step": 2898 - }, - { - "epoch": 0.3930048125804921, - "grad_norm": 1.9526508040501527, - "learning_rate": 1.3850150454861682e-06, - "loss": 1.1692, - "step": 2899 - }, - { - "epoch": 0.393140378228157, - "grad_norm": 4.0511027052084305, - "learning_rate": 1.3846097234712034e-06, - "loss": 1.1305, - "step": 2900 - }, - { - "epoch": 0.3932759438758219, - "grad_norm": 1.3881770108330898, - "learning_rate": 1.3842043272872896e-06, - "loss": 1.1707, - "step": 2901 - }, - { - "epoch": 0.39341150952348675, - "grad_norm": 2.9452558568559226, - "learning_rate": 1.383798857012604e-06, - "loss": 1.1938, - "step": 2902 - }, - { - "epoch": 0.3935470751711516, - "grad_norm": 1.8139675021921795, - "learning_rate": 1.3833933127253383e-06, - "loss": 1.1583, - "step": 2903 - }, - { - "epoch": 0.3936826408188165, - "grad_norm": 1.6966298641360933, - "learning_rate": 1.3829876945036987e-06, - "loss": 1.1508, - "step": 2904 - }, - { - "epoch": 0.3938182064664814, - "grad_norm": 1.828601933779395, - "learning_rate": 1.3825820024259052e-06, - "loss": 1.1564, - "step": 2905 - }, - { - "epoch": 0.3939537721141463, - "grad_norm": 1.6276557530401048, - "learning_rate": 1.3821762365701926e-06, - "loss": 1.159, - "step": 2906 - }, - { - "epoch": 0.39408933776181115, - "grad_norm": 1.6373057377384896, - "learning_rate": 1.3817703970148092e-06, - "loss": 1.173, - "step": 2907 - }, - { - "epoch": 0.394224903409476, - "grad_norm": 1.5005677370245436, - "learning_rate": 1.3813644838380184e-06, - "loss": 1.1837, - "step": 2908 - }, - { - "epoch": 0.3943604690571409, - "grad_norm": 1.9014083895892977, - "learning_rate": 1.3809584971180975e-06, - "loss": 1.1602, - "step": 2909 - }, - { - "epoch": 0.3944960347048058, - "grad_norm": 1.4106759286088375, - "learning_rate": 1.3805524369333371e-06, - "loss": 1.1794, - "step": 2910 - }, - { - "epoch": 0.3946316003524707, - "grad_norm": 1.665078968449005, - "learning_rate": 1.3801463033620433e-06, - "loss": 1.1674, - "step": 2911 - }, - { - "epoch": 0.39476716600013556, - "grad_norm": 2.3539886591753616, - "learning_rate": 1.3797400964825357e-06, - "loss": 1.1628, - "step": 2912 - }, - { - "epoch": 0.39490273164780043, - "grad_norm": 1.5468405646208678, - "learning_rate": 1.3793338163731476e-06, - "loss": 1.172, - "step": 2913 - }, - { - "epoch": 0.3950382972954653, - "grad_norm": 1.3619832253614186, - "learning_rate": 1.3789274631122277e-06, - "loss": 1.1424, - "step": 2914 - }, - { - "epoch": 0.3951738629431302, - "grad_norm": 1.4089555660920028, - "learning_rate": 1.3785210367781375e-06, - "loss": 1.1659, - "step": 2915 - }, - { - "epoch": 0.3953094285907951, - "grad_norm": 1.6534910604019812, - "learning_rate": 1.378114537449253e-06, - "loss": 1.1539, - "step": 2916 - }, - { - "epoch": 0.39544499423845997, - "grad_norm": 1.6802042294786912, - "learning_rate": 1.3777079652039646e-06, - "loss": 1.1964, - "step": 2917 - }, - { - "epoch": 0.39558055988612484, - "grad_norm": 2.0181628931180993, - "learning_rate": 1.3773013201206768e-06, - "loss": 1.2102, - "step": 2918 - }, - { - "epoch": 0.39571612553378976, - "grad_norm": 1.6093728355565118, - "learning_rate": 1.3768946022778075e-06, - "loss": 1.186, - "step": 2919 - }, - { - "epoch": 0.39585169118145463, - "grad_norm": 2.2887939059251776, - "learning_rate": 1.3764878117537895e-06, - "loss": 1.1468, - "step": 2920 - }, - { - "epoch": 0.3959872568291195, - "grad_norm": 1.7763276159349957, - "learning_rate": 1.3760809486270684e-06, - "loss": 1.1688, - "step": 2921 - }, - { - "epoch": 0.3961228224767844, - "grad_norm": 1.7209920560787226, - "learning_rate": 1.3756740129761053e-06, - "loss": 1.2155, - "step": 2922 - }, - { - "epoch": 0.39625838812444925, - "grad_norm": 1.4698348934716852, - "learning_rate": 1.3752670048793743e-06, - "loss": 1.2136, - "step": 2923 - }, - { - "epoch": 0.39639395377211417, - "grad_norm": 1.4954826335745455, - "learning_rate": 1.3748599244153632e-06, - "loss": 1.1275, - "step": 2924 - }, - { - "epoch": 0.39652951941977904, - "grad_norm": 2.7486124319846947, - "learning_rate": 1.3744527716625746e-06, - "loss": 1.1518, - "step": 2925 - }, - { - "epoch": 0.3966650850674439, - "grad_norm": 1.858455766066703, - "learning_rate": 1.3740455466995248e-06, - "loss": 1.1443, - "step": 2926 - }, - { - "epoch": 0.3968006507151088, - "grad_norm": 1.5975256607675536, - "learning_rate": 1.373638249604744e-06, - "loss": 1.1919, - "step": 2927 - }, - { - "epoch": 0.39693621636277365, - "grad_norm": 1.9632931797466104, - "learning_rate": 1.3732308804567761e-06, - "loss": 1.1586, - "step": 2928 - }, - { - "epoch": 0.3970717820104386, - "grad_norm": 1.5821228807001735, - "learning_rate": 1.3728234393341789e-06, - "loss": 1.1025, - "step": 2929 - }, - { - "epoch": 0.39720734765810345, - "grad_norm": 1.7383154711250326, - "learning_rate": 1.3724159263155246e-06, - "loss": 1.1192, - "step": 2930 - }, - { - "epoch": 0.3973429133057683, - "grad_norm": 1.4810305802599066, - "learning_rate": 1.3720083414793984e-06, - "loss": 1.1696, - "step": 2931 - }, - { - "epoch": 0.3974784789534332, - "grad_norm": 1.4849575254618865, - "learning_rate": 1.3716006849043998e-06, - "loss": 1.1723, - "step": 2932 - }, - { - "epoch": 0.39761404460109806, - "grad_norm": 5.019997044318018, - "learning_rate": 1.3711929566691424e-06, - "loss": 1.1475, - "step": 2933 - }, - { - "epoch": 0.397749610248763, - "grad_norm": 1.6334647975835166, - "learning_rate": 1.3707851568522534e-06, - "loss": 1.1712, - "step": 2934 - }, - { - "epoch": 0.39788517589642786, - "grad_norm": 5.448050018329099, - "learning_rate": 1.3703772855323739e-06, - "loss": 1.1226, - "step": 2935 - }, - { - "epoch": 0.3980207415440927, - "grad_norm": 1.5252027590123771, - "learning_rate": 1.3699693427881582e-06, - "loss": 1.1475, - "step": 2936 - }, - { - "epoch": 0.3981563071917576, - "grad_norm": 1.5007526886757023, - "learning_rate": 1.3695613286982754e-06, - "loss": 1.1312, - "step": 2937 - }, - { - "epoch": 0.39829187283942247, - "grad_norm": 1.4671660672214968, - "learning_rate": 1.3691532433414073e-06, - "loss": 1.1763, - "step": 2938 - }, - { - "epoch": 0.3984274384870874, - "grad_norm": 1.456291440662813, - "learning_rate": 1.36874508679625e-06, - "loss": 1.1163, - "step": 2939 - }, - { - "epoch": 0.39856300413475226, - "grad_norm": 1.8556237199225252, - "learning_rate": 1.3683368591415137e-06, - "loss": 1.1167, - "step": 2940 - }, - { - "epoch": 0.39869856978241713, - "grad_norm": 1.5608833360170438, - "learning_rate": 1.3679285604559211e-06, - "loss": 1.1523, - "step": 2941 - }, - { - "epoch": 0.398834135430082, - "grad_norm": 1.6937871043535924, - "learning_rate": 1.3675201908182103e-06, - "loss": 1.1631, - "step": 2942 - }, - { - "epoch": 0.3989697010777469, - "grad_norm": 1.582177444873064, - "learning_rate": 1.3671117503071317e-06, - "loss": 1.1608, - "step": 2943 - }, - { - "epoch": 0.3991052667254118, - "grad_norm": 1.4342281188732642, - "learning_rate": 1.3667032390014497e-06, - "loss": 1.161, - "step": 2944 - }, - { - "epoch": 0.39924083237307667, - "grad_norm": 2.1830147250567, - "learning_rate": 1.3662946569799426e-06, - "loss": 1.1808, - "step": 2945 - }, - { - "epoch": 0.39937639802074154, - "grad_norm": 1.618749165415772, - "learning_rate": 1.3658860043214024e-06, - "loss": 1.2023, - "step": 2946 - }, - { - "epoch": 0.3995119636684064, - "grad_norm": 1.6403535266157367, - "learning_rate": 1.3654772811046344e-06, - "loss": 1.1531, - "step": 2947 - }, - { - "epoch": 0.3996475293160713, - "grad_norm": 1.6309386479020063, - "learning_rate": 1.3650684874084577e-06, - "loss": 1.165, - "step": 2948 - }, - { - "epoch": 0.3997830949637362, - "grad_norm": 1.3680705573137304, - "learning_rate": 1.3646596233117047e-06, - "loss": 1.1093, - "step": 2949 - }, - { - "epoch": 0.3999186606114011, - "grad_norm": 1.705299133856725, - "learning_rate": 1.364250688893222e-06, - "loss": 1.1386, - "step": 2950 - }, - { - "epoch": 0.40005422625906595, - "grad_norm": 1.817826824670329, - "learning_rate": 1.3638416842318691e-06, - "loss": 1.1404, - "step": 2951 - }, - { - "epoch": 0.4001897919067308, - "grad_norm": 1.543778521773758, - "learning_rate": 1.3634326094065194e-06, - "loss": 1.1666, - "step": 2952 - }, - { - "epoch": 0.4003253575543957, - "grad_norm": 1.4875320474474816, - "learning_rate": 1.3630234644960597e-06, - "loss": 1.1707, - "step": 2953 - }, - { - "epoch": 0.4004609232020606, - "grad_norm": 2.391640724631687, - "learning_rate": 1.3626142495793902e-06, - "loss": 1.1752, - "step": 2954 - }, - { - "epoch": 0.4005964888497255, - "grad_norm": 1.5074333318442203, - "learning_rate": 1.3622049647354252e-06, - "loss": 1.1693, - "step": 2955 - }, - { - "epoch": 0.40073205449739036, - "grad_norm": 2.423258827205519, - "learning_rate": 1.361795610043092e-06, - "loss": 1.1489, - "step": 2956 - }, - { - "epoch": 0.4008676201450552, - "grad_norm": 1.7766385485254983, - "learning_rate": 1.3613861855813308e-06, - "loss": 1.1778, - "step": 2957 - }, - { - "epoch": 0.40100318579272015, - "grad_norm": 1.4707697143132035, - "learning_rate": 1.3609766914290965e-06, - "loss": 1.1509, - "step": 2958 - }, - { - "epoch": 0.401138751440385, - "grad_norm": 1.9958112302291842, - "learning_rate": 1.3605671276653565e-06, - "loss": 1.183, - "step": 2959 - }, - { - "epoch": 0.4012743170880499, - "grad_norm": 1.949409904889143, - "learning_rate": 1.3601574943690924e-06, - "loss": 1.2306, - "step": 2960 - }, - { - "epoch": 0.40140988273571476, - "grad_norm": 1.5556201584728675, - "learning_rate": 1.3597477916192985e-06, - "loss": 1.1905, - "step": 2961 - }, - { - "epoch": 0.40154544838337963, - "grad_norm": 1.4379553521059147, - "learning_rate": 1.3593380194949823e-06, - "loss": 1.167, - "step": 2962 - }, - { - "epoch": 0.40168101403104456, - "grad_norm": 1.3911873519179774, - "learning_rate": 1.3589281780751659e-06, - "loss": 1.1426, - "step": 2963 - }, - { - "epoch": 0.40181657967870943, - "grad_norm": 1.5030159497242004, - "learning_rate": 1.358518267438883e-06, - "loss": 1.1624, - "step": 2964 - }, - { - "epoch": 0.4019521453263743, - "grad_norm": 1.6750322556222297, - "learning_rate": 1.3581082876651824e-06, - "loss": 1.206, - "step": 2965 - }, - { - "epoch": 0.40208771097403917, - "grad_norm": 1.9965189814847508, - "learning_rate": 1.3576982388331258e-06, - "loss": 1.1712, - "step": 2966 - }, - { - "epoch": 0.40222327662170404, - "grad_norm": 2.025000175823773, - "learning_rate": 1.3572881210217869e-06, - "loss": 1.1368, - "step": 2967 - }, - { - "epoch": 0.40235884226936897, - "grad_norm": 1.8972174296372128, - "learning_rate": 1.3568779343102539e-06, - "loss": 1.1891, - "step": 2968 - }, - { - "epoch": 0.40249440791703384, - "grad_norm": 1.4308764717888318, - "learning_rate": 1.3564676787776282e-06, - "loss": 1.1129, - "step": 2969 - }, - { - "epoch": 0.4026299735646987, - "grad_norm": 1.4537409126888967, - "learning_rate": 1.356057354503025e-06, - "loss": 1.1768, - "step": 2970 - }, - { - "epoch": 0.4027655392123636, - "grad_norm": 1.8119229602360758, - "learning_rate": 1.3556469615655713e-06, - "loss": 1.2113, - "step": 2971 - }, - { - "epoch": 0.40290110486002845, - "grad_norm": 4.364344594971192, - "learning_rate": 1.355236500044408e-06, - "loss": 1.1596, - "step": 2972 - }, - { - "epoch": 0.4030366705076934, - "grad_norm": 4.24985190217303, - "learning_rate": 1.3548259700186901e-06, - "loss": 1.1584, - "step": 2973 - }, - { - "epoch": 0.40317223615535824, - "grad_norm": 2.4172476085522594, - "learning_rate": 1.3544153715675848e-06, - "loss": 1.2085, - "step": 2974 - }, - { - "epoch": 0.4033078018030231, - "grad_norm": 1.414267639127324, - "learning_rate": 1.3540047047702725e-06, - "loss": 1.1452, - "step": 2975 - }, - { - "epoch": 0.403443367450688, - "grad_norm": 1.7153493047614972, - "learning_rate": 1.353593969705947e-06, - "loss": 1.1437, - "step": 2976 - }, - { - "epoch": 0.40357893309835285, - "grad_norm": 2.0081981841408205, - "learning_rate": 1.353183166453816e-06, - "loss": 1.187, - "step": 2977 - }, - { - "epoch": 0.4037144987460178, - "grad_norm": 2.579701491391784, - "learning_rate": 1.352772295093099e-06, - "loss": 1.1836, - "step": 2978 - }, - { - "epoch": 0.40385006439368265, - "grad_norm": 1.5218429184248605, - "learning_rate": 1.3523613557030298e-06, - "loss": 1.1693, - "step": 2979 - }, - { - "epoch": 0.4039856300413475, - "grad_norm": 1.482985648336377, - "learning_rate": 1.3519503483628541e-06, - "loss": 1.1582, - "step": 2980 - }, - { - "epoch": 0.4041211956890124, - "grad_norm": 1.6578596576152131, - "learning_rate": 1.351539273151832e-06, - "loss": 1.1295, - "step": 2981 - }, - { - "epoch": 0.40425676133667726, - "grad_norm": 1.5236561442131458, - "learning_rate": 1.3511281301492358e-06, - "loss": 1.1846, - "step": 2982 - }, - { - "epoch": 0.4043923269843422, - "grad_norm": 1.4209968689140218, - "learning_rate": 1.3507169194343514e-06, - "loss": 1.1618, - "step": 2983 - }, - { - "epoch": 0.40452789263200706, - "grad_norm": 1.7711222021577624, - "learning_rate": 1.3503056410864777e-06, - "loss": 1.1712, - "step": 2984 - }, - { - "epoch": 0.40466345827967193, - "grad_norm": 1.573985053656633, - "learning_rate": 1.349894295184926e-06, - "loss": 1.1726, - "step": 2985 - }, - { - "epoch": 0.4047990239273368, - "grad_norm": 1.8008048734857227, - "learning_rate": 1.3494828818090215e-06, - "loss": 1.1556, - "step": 2986 - }, - { - "epoch": 0.40493458957500167, - "grad_norm": 1.7127172276083944, - "learning_rate": 1.349071401038102e-06, - "loss": 1.1636, - "step": 2987 - }, - { - "epoch": 0.4050701552226666, - "grad_norm": 1.7790089198373586, - "learning_rate": 1.348659852951518e-06, - "loss": 1.1284, - "step": 2988 - }, - { - "epoch": 0.40520572087033147, - "grad_norm": 1.6540583780597542, - "learning_rate": 1.3482482376286338e-06, - "loss": 1.1365, - "step": 2989 - }, - { - "epoch": 0.40534128651799634, - "grad_norm": 1.7482892199023257, - "learning_rate": 1.3478365551488256e-06, - "loss": 1.1715, - "step": 2990 - }, - { - "epoch": 0.4054768521656612, - "grad_norm": 1.621260201466407, - "learning_rate": 1.3474248055914834e-06, - "loss": 1.1599, - "step": 2991 - }, - { - "epoch": 0.4056124178133261, - "grad_norm": 1.6008358173348922, - "learning_rate": 1.3470129890360103e-06, - "loss": 1.142, - "step": 2992 - }, - { - "epoch": 0.405747983460991, - "grad_norm": 1.4446118753847812, - "learning_rate": 1.3466011055618207e-06, - "loss": 1.1253, - "step": 2993 - }, - { - "epoch": 0.40588354910865587, - "grad_norm": 1.6821957075353624, - "learning_rate": 1.3461891552483442e-06, - "loss": 1.1678, - "step": 2994 - }, - { - "epoch": 0.40601911475632074, - "grad_norm": 2.4361166858420136, - "learning_rate": 1.3457771381750217e-06, - "loss": 1.1479, - "step": 2995 - }, - { - "epoch": 0.4061546804039856, - "grad_norm": 1.5904096485020944, - "learning_rate": 1.3453650544213076e-06, - "loss": 1.1559, - "step": 2996 - }, - { - "epoch": 0.40629024605165054, - "grad_norm": 1.8171258367911183, - "learning_rate": 1.344952904066669e-06, - "loss": 1.1524, - "step": 2997 - }, - { - "epoch": 0.4064258116993154, - "grad_norm": 1.490680958988263, - "learning_rate": 1.3445406871905855e-06, - "loss": 1.1572, - "step": 2998 - }, - { - "epoch": 0.4065613773469803, - "grad_norm": 1.5139356001230093, - "learning_rate": 1.34412840387255e-06, - "loss": 1.1474, - "step": 2999 - }, - { - "epoch": 0.40669694299464515, - "grad_norm": 1.6335278425770918, - "learning_rate": 1.3437160541920685e-06, - "loss": 1.1577, - "step": 3000 - }, - { - "epoch": 0.40683250864231, - "grad_norm": 1.4820501388631733, - "learning_rate": 1.3433036382286589e-06, - "loss": 1.1694, - "step": 3001 - }, - { - "epoch": 0.40696807428997495, - "grad_norm": 1.5655418855615892, - "learning_rate": 1.3428911560618525e-06, - "loss": 1.183, - "step": 3002 - }, - { - "epoch": 0.4071036399376398, - "grad_norm": 1.429386320774816, - "learning_rate": 1.3424786077711933e-06, - "loss": 1.1682, - "step": 3003 - }, - { - "epoch": 0.4072392055853047, - "grad_norm": 1.4824716555677548, - "learning_rate": 1.342065993436238e-06, - "loss": 1.1156, - "step": 3004 - }, - { - "epoch": 0.40737477123296956, - "grad_norm": 9.037844752533024, - "learning_rate": 1.3416533131365563e-06, - "loss": 1.15, - "step": 3005 - }, - { - "epoch": 0.4075103368806344, - "grad_norm": 1.4736608220927312, - "learning_rate": 1.3412405669517296e-06, - "loss": 1.157, - "step": 3006 - }, - { - "epoch": 0.40764590252829935, - "grad_norm": 1.913598186432291, - "learning_rate": 1.3408277549613534e-06, - "loss": 1.1517, - "step": 3007 - }, - { - "epoch": 0.4077814681759642, - "grad_norm": 1.319910064254687, - "learning_rate": 1.3404148772450348e-06, - "loss": 1.1707, - "step": 3008 - }, - { - "epoch": 0.4079170338236291, - "grad_norm": 1.3941334320343979, - "learning_rate": 1.340001933882394e-06, - "loss": 1.1348, - "step": 3009 - }, - { - "epoch": 0.40805259947129396, - "grad_norm": 1.5806683754088922, - "learning_rate": 1.3395889249530642e-06, - "loss": 1.1788, - "step": 3010 - }, - { - "epoch": 0.40818816511895883, - "grad_norm": 1.824847761334936, - "learning_rate": 1.339175850536691e-06, - "loss": 1.1533, - "step": 3011 - }, - { - "epoch": 0.40832373076662376, - "grad_norm": 1.50896343686145, - "learning_rate": 1.338762710712932e-06, - "loss": 1.1604, - "step": 3012 - }, - { - "epoch": 0.40845929641428863, - "grad_norm": 1.4420071746377094, - "learning_rate": 1.3383495055614586e-06, - "loss": 1.1482, - "step": 3013 - }, - { - "epoch": 0.4085948620619535, - "grad_norm": 1.426725595670434, - "learning_rate": 1.3379362351619537e-06, - "loss": 1.1684, - "step": 3014 - }, - { - "epoch": 0.40873042770961837, - "grad_norm": 1.4223784678318407, - "learning_rate": 1.3375228995941132e-06, - "loss": 1.2014, - "step": 3015 - }, - { - "epoch": 0.40886599335728324, - "grad_norm": 1.7523576359619732, - "learning_rate": 1.337109498937646e-06, - "loss": 1.1913, - "step": 3016 - }, - { - "epoch": 0.40900155900494817, - "grad_norm": 4.73984162821829, - "learning_rate": 1.3366960332722728e-06, - "loss": 1.1363, - "step": 3017 - }, - { - "epoch": 0.40913712465261304, - "grad_norm": 1.8833208454846375, - "learning_rate": 1.3362825026777272e-06, - "loss": 1.1915, - "step": 3018 - }, - { - "epoch": 0.4092726903002779, - "grad_norm": 1.410841023499633, - "learning_rate": 1.3358689072337554e-06, - "loss": 1.1661, - "step": 3019 - }, - { - "epoch": 0.4094082559479428, - "grad_norm": 1.619929943771246, - "learning_rate": 1.3354552470201161e-06, - "loss": 1.1318, - "step": 3020 - }, - { - "epoch": 0.40954382159560765, - "grad_norm": 2.779923050188363, - "learning_rate": 1.3350415221165805e-06, - "loss": 1.1537, - "step": 3021 - }, - { - "epoch": 0.4096793872432726, - "grad_norm": 1.5230873987998803, - "learning_rate": 1.3346277326029317e-06, - "loss": 1.1482, - "step": 3022 - }, - { - "epoch": 0.40981495289093745, - "grad_norm": 1.689604048714454, - "learning_rate": 1.3342138785589666e-06, - "loss": 1.1446, - "step": 3023 - }, - { - "epoch": 0.4099505185386023, - "grad_norm": 1.4062020003720674, - "learning_rate": 1.3337999600644928e-06, - "loss": 1.2011, - "step": 3024 - }, - { - "epoch": 0.4100860841862672, - "grad_norm": 3.0183277639508415, - "learning_rate": 1.3333859771993315e-06, - "loss": 1.1586, - "step": 3025 - }, - { - "epoch": 0.41022164983393206, - "grad_norm": 1.4482351939597709, - "learning_rate": 1.332971930043316e-06, - "loss": 1.1429, - "step": 3026 - }, - { - "epoch": 0.410357215481597, - "grad_norm": 2.1634725770068024, - "learning_rate": 1.3325578186762923e-06, - "loss": 1.1752, - "step": 3027 - }, - { - "epoch": 0.41049278112926185, - "grad_norm": 1.824843380878061, - "learning_rate": 1.3321436431781183e-06, - "loss": 1.1589, - "step": 3028 - }, - { - "epoch": 0.4106283467769267, - "grad_norm": 1.546505938653725, - "learning_rate": 1.3317294036286644e-06, - "loss": 1.1171, - "step": 3029 - }, - { - "epoch": 0.4107639124245916, - "grad_norm": 1.6712608849364268, - "learning_rate": 1.3313151001078135e-06, - "loss": 1.1344, - "step": 3030 - }, - { - "epoch": 0.41089947807225646, - "grad_norm": 2.013583970927868, - "learning_rate": 1.3309007326954608e-06, - "loss": 1.1347, - "step": 3031 - }, - { - "epoch": 0.4110350437199214, - "grad_norm": 1.506368310524576, - "learning_rate": 1.330486301471514e-06, - "loss": 1.1812, - "step": 3032 - }, - { - "epoch": 0.41117060936758626, - "grad_norm": 2.014179564165211, - "learning_rate": 1.3300718065158924e-06, - "loss": 1.1216, - "step": 3033 - }, - { - "epoch": 0.41130617501525113, - "grad_norm": 1.9893124243438511, - "learning_rate": 1.3296572479085284e-06, - "loss": 1.2167, - "step": 3034 - }, - { - "epoch": 0.411441740662916, - "grad_norm": 1.4361196392121307, - "learning_rate": 1.3292426257293668e-06, - "loss": 1.1328, - "step": 3035 - }, - { - "epoch": 0.4115773063105809, - "grad_norm": 2.0658066984512895, - "learning_rate": 1.3288279400583631e-06, - "loss": 1.172, - "step": 3036 - }, - { - "epoch": 0.4117128719582458, - "grad_norm": 1.5196156729294548, - "learning_rate": 1.3284131909754868e-06, - "loss": 1.1572, - "step": 3037 - }, - { - "epoch": 0.41184843760591067, - "grad_norm": 2.2117400603866093, - "learning_rate": 1.3279983785607192e-06, - "loss": 1.1556, - "step": 3038 - }, - { - "epoch": 0.41198400325357554, - "grad_norm": 1.6185009024302024, - "learning_rate": 1.327583502894053e-06, - "loss": 1.1278, - "step": 3039 - }, - { - "epoch": 0.4121195689012404, - "grad_norm": 1.5764578662916684, - "learning_rate": 1.3271685640554943e-06, - "loss": 1.1326, - "step": 3040 - }, - { - "epoch": 0.41225513454890533, - "grad_norm": 1.4127489966857139, - "learning_rate": 1.3267535621250604e-06, - "loss": 1.1457, - "step": 3041 - }, - { - "epoch": 0.4123907001965702, - "grad_norm": 1.6004494035225776, - "learning_rate": 1.3263384971827816e-06, - "loss": 1.1548, - "step": 3042 - }, - { - "epoch": 0.4125262658442351, - "grad_norm": 1.3806129811082926, - "learning_rate": 1.3259233693086993e-06, - "loss": 1.1428, - "step": 3043 - }, - { - "epoch": 0.41266183149189994, - "grad_norm": 1.7035808085016433, - "learning_rate": 1.3255081785828678e-06, - "loss": 1.1634, - "step": 3044 - }, - { - "epoch": 0.4127973971395648, - "grad_norm": 1.4791083051538811, - "learning_rate": 1.3250929250853537e-06, - "loss": 1.1793, - "step": 3045 - }, - { - "epoch": 0.41293296278722974, - "grad_norm": 1.9553326337111017, - "learning_rate": 1.324677608896235e-06, - "loss": 1.1555, - "step": 3046 - }, - { - "epoch": 0.4130685284348946, - "grad_norm": 1.4772360884770326, - "learning_rate": 1.3242622300956027e-06, - "loss": 1.1438, - "step": 3047 - }, - { - "epoch": 0.4132040940825595, - "grad_norm": 1.4811318763209484, - "learning_rate": 1.3238467887635583e-06, - "loss": 1.1618, - "step": 3048 - }, - { - "epoch": 0.41333965973022435, - "grad_norm": 3.351399129027451, - "learning_rate": 1.3234312849802173e-06, - "loss": 1.166, - "step": 3049 - }, - { - "epoch": 0.4134752253778892, - "grad_norm": 2.1841089130053395, - "learning_rate": 1.323015718825706e-06, - "loss": 1.133, - "step": 3050 - }, - { - "epoch": 0.41361079102555415, - "grad_norm": 2.7924505354012052, - "learning_rate": 1.3226000903801632e-06, - "loss": 1.1298, - "step": 3051 - }, - { - "epoch": 0.413746356673219, - "grad_norm": 1.8691480453541558, - "learning_rate": 1.322184399723739e-06, - "loss": 1.1872, - "step": 3052 - }, - { - "epoch": 0.4138819223208839, - "grad_norm": 1.4580262222271165, - "learning_rate": 1.3217686469365967e-06, - "loss": 1.1805, - "step": 3053 - }, - { - "epoch": 0.41401748796854876, - "grad_norm": 1.7338173901392382, - "learning_rate": 1.3213528320989107e-06, - "loss": 1.169, - "step": 3054 - }, - { - "epoch": 0.41415305361621363, - "grad_norm": 1.47936257799703, - "learning_rate": 1.3209369552908676e-06, - "loss": 1.127, - "step": 3055 - }, - { - "epoch": 0.41428861926387855, - "grad_norm": 2.5743683461216467, - "learning_rate": 1.320521016592666e-06, - "loss": 1.1396, - "step": 3056 - }, - { - "epoch": 0.4144241849115434, - "grad_norm": 1.727185728144914, - "learning_rate": 1.3201050160845164e-06, - "loss": 1.1605, - "step": 3057 - }, - { - "epoch": 0.4145597505592083, - "grad_norm": 1.8638213145180602, - "learning_rate": 1.3196889538466413e-06, - "loss": 1.13, - "step": 3058 - }, - { - "epoch": 0.41469531620687317, - "grad_norm": 1.5935752825833525, - "learning_rate": 1.319272829959275e-06, - "loss": 1.1442, - "step": 3059 - }, - { - "epoch": 0.41483088185453804, - "grad_norm": 2.3861144183150005, - "learning_rate": 1.3188566445026635e-06, - "loss": 1.1706, - "step": 3060 - }, - { - "epoch": 0.41496644750220296, - "grad_norm": 1.7932350091450062, - "learning_rate": 1.3184403975570648e-06, - "loss": 1.1385, - "step": 3061 - }, - { - "epoch": 0.41510201314986783, - "grad_norm": 1.4526245091815548, - "learning_rate": 1.3180240892027494e-06, - "loss": 1.1508, - "step": 3062 - }, - { - "epoch": 0.4152375787975327, - "grad_norm": 1.5430268659187045, - "learning_rate": 1.3176077195199984e-06, - "loss": 1.1505, - "step": 3063 - }, - { - "epoch": 0.4153731444451976, - "grad_norm": 1.772463411362096, - "learning_rate": 1.3171912885891061e-06, - "loss": 1.1704, - "step": 3064 - }, - { - "epoch": 0.41550871009286244, - "grad_norm": 1.5908375376393264, - "learning_rate": 1.3167747964903775e-06, - "loss": 1.183, - "step": 3065 - }, - { - "epoch": 0.41564427574052737, - "grad_norm": 2.356156565589939, - "learning_rate": 1.3163582433041296e-06, - "loss": 1.1567, - "step": 3066 - }, - { - "epoch": 0.41577984138819224, - "grad_norm": 1.8248926885676873, - "learning_rate": 1.3159416291106916e-06, - "loss": 1.1846, - "step": 3067 - }, - { - "epoch": 0.4159154070358571, - "grad_norm": 1.5008855269755847, - "learning_rate": 1.3155249539904049e-06, - "loss": 1.1746, - "step": 3068 - }, - { - "epoch": 0.416050972683522, - "grad_norm": 1.6132698774320378, - "learning_rate": 1.3151082180236209e-06, - "loss": 1.175, - "step": 3069 - }, - { - "epoch": 0.41618653833118685, - "grad_norm": 1.908583824256697, - "learning_rate": 1.3146914212907042e-06, - "loss": 1.1355, - "step": 3070 - }, - { - "epoch": 0.4163221039788518, - "grad_norm": 1.9766885636598193, - "learning_rate": 1.3142745638720314e-06, - "loss": 1.1541, - "step": 3071 - }, - { - "epoch": 0.41645766962651665, - "grad_norm": 1.5695378613134423, - "learning_rate": 1.3138576458479893e-06, - "loss": 1.1756, - "step": 3072 - }, - { - "epoch": 0.4165932352741815, - "grad_norm": 2.867925659609644, - "learning_rate": 1.3134406672989779e-06, - "loss": 1.1414, - "step": 3073 - }, - { - "epoch": 0.4167288009218464, - "grad_norm": 1.6486689906895649, - "learning_rate": 1.313023628305408e-06, - "loss": 1.1728, - "step": 3074 - }, - { - "epoch": 0.4168643665695113, - "grad_norm": 1.6532423690513778, - "learning_rate": 1.3126065289477019e-06, - "loss": 1.2033, - "step": 3075 - }, - { - "epoch": 0.4169999322171762, - "grad_norm": 1.5634942692465632, - "learning_rate": 1.3121893693062947e-06, - "loss": 1.1552, - "step": 3076 - }, - { - "epoch": 0.41713549786484105, - "grad_norm": 85.4996727769988, - "learning_rate": 1.3117721494616319e-06, - "loss": 1.1499, - "step": 3077 - }, - { - "epoch": 0.4172710635125059, - "grad_norm": 1.4608761150844305, - "learning_rate": 1.3113548694941708e-06, - "loss": 1.0957, - "step": 3078 - }, - { - "epoch": 0.4174066291601708, - "grad_norm": 1.5891287066359499, - "learning_rate": 1.3109375294843808e-06, - "loss": 1.1311, - "step": 3079 - }, - { - "epoch": 0.4175421948078357, - "grad_norm": 1.4566246421178983, - "learning_rate": 1.3105201295127426e-06, - "loss": 1.2088, - "step": 3080 - }, - { - "epoch": 0.4176777604555006, - "grad_norm": 1.6296087811308304, - "learning_rate": 1.3101026696597487e-06, - "loss": 1.1721, - "step": 3081 - }, - { - "epoch": 0.41781332610316546, - "grad_norm": 1.4840653142866052, - "learning_rate": 1.3096851500059028e-06, - "loss": 1.1334, - "step": 3082 - }, - { - "epoch": 0.41794889175083033, - "grad_norm": 2.2283947559752844, - "learning_rate": 1.3092675706317197e-06, - "loss": 1.1813, - "step": 3083 - }, - { - "epoch": 0.4180844573984952, - "grad_norm": 1.7710971902079036, - "learning_rate": 1.3088499316177272e-06, - "loss": 1.1977, - "step": 3084 - }, - { - "epoch": 0.4182200230461601, - "grad_norm": 1.7749363426409754, - "learning_rate": 1.3084322330444635e-06, - "loss": 1.1343, - "step": 3085 - }, - { - "epoch": 0.418355588693825, - "grad_norm": 2.3600671997237153, - "learning_rate": 1.3080144749924782e-06, - "loss": 1.1364, - "step": 3086 - }, - { - "epoch": 0.41849115434148987, - "grad_norm": 2.529652083597637, - "learning_rate": 1.3075966575423326e-06, - "loss": 1.1978, - "step": 3087 - }, - { - "epoch": 0.41862671998915474, - "grad_norm": 2.5869066570465766, - "learning_rate": 1.3071787807745996e-06, - "loss": 1.1489, - "step": 3088 - }, - { - "epoch": 0.4187622856368196, - "grad_norm": 1.7117098585518964, - "learning_rate": 1.3067608447698633e-06, - "loss": 1.1252, - "step": 3089 - }, - { - "epoch": 0.41889785128448453, - "grad_norm": 1.601865974853601, - "learning_rate": 1.3063428496087196e-06, - "loss": 1.1706, - "step": 3090 - }, - { - "epoch": 0.4190334169321494, - "grad_norm": 2.6335143970976085, - "learning_rate": 1.3059247953717758e-06, - "loss": 1.1729, - "step": 3091 - }, - { - "epoch": 0.4191689825798143, - "grad_norm": 2.1963969427207686, - "learning_rate": 1.3055066821396498e-06, - "loss": 1.1762, - "step": 3092 - }, - { - "epoch": 0.41930454822747915, - "grad_norm": 1.4590742760191215, - "learning_rate": 1.3050885099929716e-06, - "loss": 1.1318, - "step": 3093 - }, - { - "epoch": 0.419440113875144, - "grad_norm": 1.8051605357817924, - "learning_rate": 1.3046702790123824e-06, - "loss": 1.1271, - "step": 3094 - }, - { - "epoch": 0.41957567952280894, - "grad_norm": 1.4351830853783298, - "learning_rate": 1.3042519892785353e-06, - "loss": 1.1613, - "step": 3095 - }, - { - "epoch": 0.4197112451704738, - "grad_norm": 1.5343789029306965, - "learning_rate": 1.3038336408720932e-06, - "loss": 1.1435, - "step": 3096 - }, - { - "epoch": 0.4198468108181387, - "grad_norm": 2.424919846356393, - "learning_rate": 1.303415233873732e-06, - "loss": 1.1789, - "step": 3097 - }, - { - "epoch": 0.41998237646580355, - "grad_norm": 6.107785821685712, - "learning_rate": 1.3029967683641378e-06, - "loss": 1.1649, - "step": 3098 - }, - { - "epoch": 0.4201179421134684, - "grad_norm": 2.912580751317892, - "learning_rate": 1.3025782444240085e-06, - "loss": 1.1671, - "step": 3099 - }, - { - "epoch": 0.42025350776113335, - "grad_norm": 1.6444689255496552, - "learning_rate": 1.3021596621340533e-06, - "loss": 1.143, - "step": 3100 - }, - { - "epoch": 0.4203890734087982, - "grad_norm": 1.4708546316661175, - "learning_rate": 1.3017410215749924e-06, - "loss": 1.1267, - "step": 3101 - }, - { - "epoch": 0.4205246390564631, - "grad_norm": 1.563970051967962, - "learning_rate": 1.3013223228275571e-06, - "loss": 1.2191, - "step": 3102 - }, - { - "epoch": 0.42066020470412796, - "grad_norm": 1.8372974467957068, - "learning_rate": 1.3009035659724904e-06, - "loss": 1.171, - "step": 3103 - }, - { - "epoch": 0.42079577035179283, - "grad_norm": 1.6203704006667359, - "learning_rate": 1.3004847510905463e-06, - "loss": 1.1661, - "step": 3104 - }, - { - "epoch": 0.42093133599945776, - "grad_norm": 1.7299620928043133, - "learning_rate": 1.30006587826249e-06, - "loss": 1.1665, - "step": 3105 - }, - { - "epoch": 0.4210669016471226, - "grad_norm": 1.527243927916385, - "learning_rate": 1.2996469475690975e-06, - "loss": 1.1295, - "step": 3106 - }, - { - "epoch": 0.4212024672947875, - "grad_norm": 1.5459111647293593, - "learning_rate": 1.2992279590911563e-06, - "loss": 1.189, - "step": 3107 - }, - { - "epoch": 0.42133803294245237, - "grad_norm": 1.657715763685031, - "learning_rate": 1.298808912909465e-06, - "loss": 1.1381, - "step": 3108 - }, - { - "epoch": 0.42147359859011724, - "grad_norm": 1.4198176550989168, - "learning_rate": 1.298389809104834e-06, - "loss": 1.1699, - "step": 3109 - }, - { - "epoch": 0.42160916423778216, - "grad_norm": 1.3858170308571767, - "learning_rate": 1.297970647758083e-06, - "loss": 1.1491, - "step": 3110 - }, - { - "epoch": 0.42174472988544703, - "grad_norm": 1.7501739595821408, - "learning_rate": 1.2975514289500451e-06, - "loss": 1.1715, - "step": 3111 - }, - { - "epoch": 0.4218802955331119, - "grad_norm": 1.4504465095563333, - "learning_rate": 1.2971321527615629e-06, - "loss": 1.1978, - "step": 3112 - }, - { - "epoch": 0.4220158611807768, - "grad_norm": 1.5088708445488614, - "learning_rate": 1.2967128192734902e-06, - "loss": 1.1709, - "step": 3113 - }, - { - "epoch": 0.4221514268284417, - "grad_norm": 2.741104596764835, - "learning_rate": 1.2962934285666924e-06, - "loss": 1.1764, - "step": 3114 - }, - { - "epoch": 0.42228699247610657, - "grad_norm": 4.6771413040022765, - "learning_rate": 1.295873980722046e-06, - "loss": 1.1879, - "step": 3115 - }, - { - "epoch": 0.42242255812377144, - "grad_norm": 1.530633143429659, - "learning_rate": 1.2954544758204374e-06, - "loss": 1.17, - "step": 3116 - }, - { - "epoch": 0.4225581237714363, - "grad_norm": 1.4784226482616707, - "learning_rate": 1.2950349139427659e-06, - "loss": 1.1415, - "step": 3117 - }, - { - "epoch": 0.4226936894191012, - "grad_norm": 1.54450023954128, - "learning_rate": 1.2946152951699398e-06, - "loss": 1.1624, - "step": 3118 - }, - { - "epoch": 0.4228292550667661, - "grad_norm": 1.4511824616975326, - "learning_rate": 1.2941956195828797e-06, - "loss": 1.1734, - "step": 3119 - }, - { - "epoch": 0.422964820714431, - "grad_norm": 1.6147612751281493, - "learning_rate": 1.2937758872625166e-06, - "loss": 1.1502, - "step": 3120 - }, - { - "epoch": 0.42310038636209585, - "grad_norm": 1.5402848112110301, - "learning_rate": 1.2933560982897924e-06, - "loss": 1.1409, - "step": 3121 - }, - { - "epoch": 0.4232359520097607, - "grad_norm": 1.4737170827031436, - "learning_rate": 1.2929362527456604e-06, - "loss": 1.1757, - "step": 3122 - }, - { - "epoch": 0.4233715176574256, - "grad_norm": 1.4902436644037902, - "learning_rate": 1.2925163507110843e-06, - "loss": 1.1545, - "step": 3123 - }, - { - "epoch": 0.4235070833050905, - "grad_norm": 1.7205064561456682, - "learning_rate": 1.292096392267039e-06, - "loss": 1.1538, - "step": 3124 - }, - { - "epoch": 0.4236426489527554, - "grad_norm": 1.556970950006168, - "learning_rate": 1.2916763774945101e-06, - "loss": 1.1475, - "step": 3125 - }, - { - "epoch": 0.42377821460042026, - "grad_norm": 1.539234995742151, - "learning_rate": 1.2912563064744938e-06, - "loss": 1.1565, - "step": 3126 - }, - { - "epoch": 0.4239137802480851, - "grad_norm": 1.667291249478388, - "learning_rate": 1.2908361792879984e-06, - "loss": 1.1554, - "step": 3127 - }, - { - "epoch": 0.42404934589575, - "grad_norm": 1.3827416009142899, - "learning_rate": 1.2904159960160415e-06, - "loss": 1.1361, - "step": 3128 - }, - { - "epoch": 0.4241849115434149, - "grad_norm": 1.5181675078583197, - "learning_rate": 1.289995756739652e-06, - "loss": 1.1634, - "step": 3129 - }, - { - "epoch": 0.4243204771910798, - "grad_norm": 1.5866631101497217, - "learning_rate": 1.2895754615398697e-06, - "loss": 1.126, - "step": 3130 - }, - { - "epoch": 0.42445604283874466, - "grad_norm": 1.547753519187641, - "learning_rate": 1.2891551104977457e-06, - "loss": 1.1183, - "step": 3131 - }, - { - "epoch": 0.42459160848640953, - "grad_norm": 1.51790355562571, - "learning_rate": 1.2887347036943407e-06, - "loss": 1.1515, - "step": 3132 - }, - { - "epoch": 0.4247271741340744, - "grad_norm": 1.4547865896027543, - "learning_rate": 1.288314241210728e-06, - "loss": 1.1792, - "step": 3133 - }, - { - "epoch": 0.42486273978173933, - "grad_norm": 1.474068101032312, - "learning_rate": 1.2878937231279892e-06, - "loss": 1.1627, - "step": 3134 - }, - { - "epoch": 0.4249983054294042, - "grad_norm": 2.145227035254027, - "learning_rate": 1.2874731495272181e-06, - "loss": 1.1346, - "step": 3135 - }, - { - "epoch": 0.42513387107706907, - "grad_norm": 1.8826671200680443, - "learning_rate": 1.2870525204895197e-06, - "loss": 1.1925, - "step": 3136 - }, - { - "epoch": 0.42526943672473394, - "grad_norm": 1.5974365936444923, - "learning_rate": 1.2866318360960084e-06, - "loss": 1.1391, - "step": 3137 - }, - { - "epoch": 0.4254050023723988, - "grad_norm": 1.8383636896941988, - "learning_rate": 1.2862110964278102e-06, - "loss": 1.1609, - "step": 3138 - }, - { - "epoch": 0.42554056802006374, - "grad_norm": 2.1510786628948293, - "learning_rate": 1.2857903015660612e-06, - "loss": 1.1862, - "step": 3139 - }, - { - "epoch": 0.4256761336677286, - "grad_norm": 1.7471968913884803, - "learning_rate": 1.2853694515919082e-06, - "loss": 1.1658, - "step": 3140 - }, - { - "epoch": 0.4258116993153935, - "grad_norm": 2.524841813929566, - "learning_rate": 1.2849485465865092e-06, - "loss": 1.1512, - "step": 3141 - }, - { - "epoch": 0.42594726496305835, - "grad_norm": 6.996042888041468, - "learning_rate": 1.2845275866310324e-06, - "loss": 1.1706, - "step": 3142 - }, - { - "epoch": 0.4260828306107232, - "grad_norm": 1.5019775257781363, - "learning_rate": 1.2841065718066563e-06, - "loss": 1.1421, - "step": 3143 - }, - { - "epoch": 0.42621839625838814, - "grad_norm": 1.6731008909155871, - "learning_rate": 1.2836855021945705e-06, - "loss": 1.1647, - "step": 3144 - }, - { - "epoch": 0.426353961906053, - "grad_norm": 1.7948555567328346, - "learning_rate": 1.283264377875975e-06, - "loss": 1.1671, - "step": 3145 - }, - { - "epoch": 0.4264895275537179, - "grad_norm": 1.5185440132364287, - "learning_rate": 1.2828431989320797e-06, - "loss": 1.1316, - "step": 3146 - }, - { - "epoch": 0.42662509320138275, - "grad_norm": 1.808833546727928, - "learning_rate": 1.2824219654441067e-06, - "loss": 1.1414, - "step": 3147 - }, - { - "epoch": 0.4267606588490476, - "grad_norm": 1.4101271540775084, - "learning_rate": 1.2820006774932866e-06, - "loss": 1.1764, - "step": 3148 - }, - { - "epoch": 0.42689622449671255, - "grad_norm": 1.5825629389496412, - "learning_rate": 1.281579335160862e-06, - "loss": 1.1473, - "step": 3149 - }, - { - "epoch": 0.4270317901443774, - "grad_norm": 1.522857814055657, - "learning_rate": 1.281157938528085e-06, - "loss": 1.1283, - "step": 3150 - }, - { - "epoch": 0.4271673557920423, - "grad_norm": 1.6255331376117934, - "learning_rate": 1.280736487676219e-06, - "loss": 1.154, - "step": 3151 - }, - { - "epoch": 0.42730292143970716, - "grad_norm": 1.3665781703786546, - "learning_rate": 1.2803149826865375e-06, - "loss": 1.1532, - "step": 3152 - }, - { - "epoch": 0.4274384870873721, - "grad_norm": 1.7816047670527708, - "learning_rate": 1.279893423640324e-06, - "loss": 1.1437, - "step": 3153 - }, - { - "epoch": 0.42757405273503696, - "grad_norm": 1.8035484631902388, - "learning_rate": 1.2794718106188734e-06, - "loss": 1.1868, - "step": 3154 - }, - { - "epoch": 0.42770961838270183, - "grad_norm": 1.3529656519595523, - "learning_rate": 1.27905014370349e-06, - "loss": 1.1685, - "step": 3155 - }, - { - "epoch": 0.4278451840303667, - "grad_norm": 1.9089637435330438, - "learning_rate": 1.2786284229754892e-06, - "loss": 1.1399, - "step": 3156 - }, - { - "epoch": 0.42798074967803157, - "grad_norm": 1.7611622450196192, - "learning_rate": 1.2782066485161961e-06, - "loss": 1.1578, - "step": 3157 - }, - { - "epoch": 0.4281163153256965, - "grad_norm": 1.5898400788087077, - "learning_rate": 1.2777848204069473e-06, - "loss": 1.18, - "step": 3158 - }, - { - "epoch": 0.42825188097336137, - "grad_norm": 1.72988417636035, - "learning_rate": 1.2773629387290883e-06, - "loss": 1.1542, - "step": 3159 - }, - { - "epoch": 0.42838744662102624, - "grad_norm": 37.194487590736294, - "learning_rate": 1.276941003563976e-06, - "loss": 1.1229, - "step": 3160 - }, - { - "epoch": 0.4285230122686911, - "grad_norm": 1.3467072532638165, - "learning_rate": 1.276519014992977e-06, - "loss": 1.1755, - "step": 3161 - }, - { - "epoch": 0.428658577916356, - "grad_norm": 1.5473168372354684, - "learning_rate": 1.276096973097469e-06, - "loss": 1.1152, - "step": 3162 - }, - { - "epoch": 0.4287941435640209, - "grad_norm": 1.6144015928885658, - "learning_rate": 1.275674877958839e-06, - "loss": 1.1764, - "step": 3163 - }, - { - "epoch": 0.4289297092116858, - "grad_norm": 1.7478721157148958, - "learning_rate": 1.2752527296584847e-06, - "loss": 1.1493, - "step": 3164 - }, - { - "epoch": 0.42906527485935064, - "grad_norm": 1.697762983095258, - "learning_rate": 1.2748305282778142e-06, - "loss": 1.13, - "step": 3165 - }, - { - "epoch": 0.4292008405070155, - "grad_norm": 1.4184532505484515, - "learning_rate": 1.2744082738982457e-06, - "loss": 1.149, - "step": 3166 - }, - { - "epoch": 0.4293364061546804, - "grad_norm": 1.7300709602832642, - "learning_rate": 1.2739859666012076e-06, - "loss": 1.1586, - "step": 3167 - }, - { - "epoch": 0.4294719718023453, - "grad_norm": 1.6235020270879368, - "learning_rate": 1.2735636064681387e-06, - "loss": 1.1453, - "step": 3168 - }, - { - "epoch": 0.4296075374500102, - "grad_norm": 1.692007137402705, - "learning_rate": 1.2731411935804877e-06, - "loss": 1.1432, - "step": 3169 - }, - { - "epoch": 0.42974310309767505, - "grad_norm": 1.6773269126656398, - "learning_rate": 1.2727187280197133e-06, - "loss": 1.1398, - "step": 3170 - }, - { - "epoch": 0.4298786687453399, - "grad_norm": 1.7469366860660391, - "learning_rate": 1.272296209867285e-06, - "loss": 1.1742, - "step": 3171 - }, - { - "epoch": 0.4300142343930048, - "grad_norm": 4.558604276506105, - "learning_rate": 1.2718736392046824e-06, - "loss": 1.1503, - "step": 3172 - }, - { - "epoch": 0.4301498000406697, - "grad_norm": 1.5226736308947884, - "learning_rate": 1.271451016113394e-06, - "loss": 1.1235, - "step": 3173 - }, - { - "epoch": 0.4302853656883346, - "grad_norm": 1.4817786839337552, - "learning_rate": 1.27102834067492e-06, - "loss": 1.1595, - "step": 3174 - }, - { - "epoch": 0.43042093133599946, - "grad_norm": 1.559793427090105, - "learning_rate": 1.2706056129707703e-06, - "loss": 1.1848, - "step": 3175 - }, - { - "epoch": 0.4305564969836643, - "grad_norm": 2.3657512218012497, - "learning_rate": 1.2701828330824638e-06, - "loss": 1.1796, - "step": 3176 - }, - { - "epoch": 0.4306920626313292, - "grad_norm": 1.5075021426407098, - "learning_rate": 1.2697600010915306e-06, - "loss": 1.1917, - "step": 3177 - }, - { - "epoch": 0.4308276282789941, - "grad_norm": 1.6510234303800357, - "learning_rate": 1.2693371170795107e-06, - "loss": 1.1215, - "step": 3178 - }, - { - "epoch": 0.430963193926659, - "grad_norm": 1.4656668652059128, - "learning_rate": 1.2689141811279536e-06, - "loss": 1.1403, - "step": 3179 - }, - { - "epoch": 0.43109875957432386, - "grad_norm": 1.5060450817364808, - "learning_rate": 1.2684911933184193e-06, - "loss": 1.141, - "step": 3180 - }, - { - "epoch": 0.43123432522198873, - "grad_norm": 1.5336204307736274, - "learning_rate": 1.2680681537324779e-06, - "loss": 1.1674, - "step": 3181 - }, - { - "epoch": 0.4313698908696536, - "grad_norm": 1.4706252163066993, - "learning_rate": 1.267645062451709e-06, - "loss": 1.1216, - "step": 3182 - }, - { - "epoch": 0.43150545651731853, - "grad_norm": 1.548410665605616, - "learning_rate": 1.2672219195577023e-06, - "loss": 1.2431, - "step": 3183 - }, - { - "epoch": 0.4316410221649834, - "grad_norm": 2.189684840287999, - "learning_rate": 1.266798725132058e-06, - "loss": 1.1556, - "step": 3184 - }, - { - "epoch": 0.43177658781264827, - "grad_norm": 1.4528497420314503, - "learning_rate": 1.2663754792563852e-06, - "loss": 1.1307, - "step": 3185 - }, - { - "epoch": 0.43191215346031314, - "grad_norm": 1.563784882372342, - "learning_rate": 1.2659521820123042e-06, - "loss": 1.1812, - "step": 3186 - }, - { - "epoch": 0.432047719107978, - "grad_norm": 1.7687160879267911, - "learning_rate": 1.265528833481444e-06, - "loss": 1.1607, - "step": 3187 - }, - { - "epoch": 0.43218328475564294, - "grad_norm": 1.536900977789783, - "learning_rate": 1.2651054337454443e-06, - "loss": 1.1575, - "step": 3188 - }, - { - "epoch": 0.4323188504033078, - "grad_norm": 1.485390608522885, - "learning_rate": 1.2646819828859545e-06, - "loss": 1.1634, - "step": 3189 - }, - { - "epoch": 0.4324544160509727, - "grad_norm": 1.8918687038917237, - "learning_rate": 1.2642584809846333e-06, - "loss": 1.1568, - "step": 3190 - }, - { - "epoch": 0.43258998169863755, - "grad_norm": 2.602486985966941, - "learning_rate": 1.2638349281231503e-06, - "loss": 1.1788, - "step": 3191 - }, - { - "epoch": 0.4327255473463024, - "grad_norm": 1.579691753701753, - "learning_rate": 1.2634113243831836e-06, - "loss": 1.1737, - "step": 3192 - }, - { - "epoch": 0.43286111299396735, - "grad_norm": 1.6524643967184733, - "learning_rate": 1.2629876698464223e-06, - "loss": 1.1383, - "step": 3193 - }, - { - "epoch": 0.4329966786416322, - "grad_norm": 1.9185081776555426, - "learning_rate": 1.2625639645945652e-06, - "loss": 1.1631, - "step": 3194 - }, - { - "epoch": 0.4331322442892971, - "grad_norm": 1.5662633973464515, - "learning_rate": 1.2621402087093195e-06, - "loss": 1.1696, - "step": 3195 - }, - { - "epoch": 0.43326780993696196, - "grad_norm": 2.0359133323833984, - "learning_rate": 1.261716402272404e-06, - "loss": 1.1485, - "step": 3196 - }, - { - "epoch": 0.4334033755846269, - "grad_norm": 1.6597370912669163, - "learning_rate": 1.2612925453655462e-06, - "loss": 1.1382, - "step": 3197 - }, - { - "epoch": 0.43353894123229175, - "grad_norm": 1.555343681590784, - "learning_rate": 1.2608686380704838e-06, - "loss": 1.1022, - "step": 3198 - }, - { - "epoch": 0.4336745068799566, - "grad_norm": 3.2298364626811913, - "learning_rate": 1.2604446804689635e-06, - "loss": 1.1697, - "step": 3199 - }, - { - "epoch": 0.4338100725276215, - "grad_norm": 4.232577542417798, - "learning_rate": 1.2600206726427422e-06, - "loss": 1.1521, - "step": 3200 - }, - { - "epoch": 0.43394563817528636, - "grad_norm": 1.611605569922186, - "learning_rate": 1.2595966146735868e-06, - "loss": 1.1568, - "step": 3201 - }, - { - "epoch": 0.4340812038229513, - "grad_norm": 1.9346191649569062, - "learning_rate": 1.2591725066432734e-06, - "loss": 1.1403, - "step": 3202 - }, - { - "epoch": 0.43421676947061616, - "grad_norm": 1.596272402065424, - "learning_rate": 1.258748348633588e-06, - "loss": 1.1324, - "step": 3203 - }, - { - "epoch": 0.43435233511828103, - "grad_norm": 1.7013131973406388, - "learning_rate": 1.2583241407263259e-06, - "loss": 1.142, - "step": 3204 - }, - { - "epoch": 0.4344879007659459, - "grad_norm": 1.507977351316248, - "learning_rate": 1.2578998830032924e-06, - "loss": 1.1692, - "step": 3205 - }, - { - "epoch": 0.43462346641361077, - "grad_norm": 2.4878004295675478, - "learning_rate": 1.257475575546302e-06, - "loss": 1.1361, - "step": 3206 - }, - { - "epoch": 0.4347590320612757, - "grad_norm": 1.4530813183050282, - "learning_rate": 1.2570512184371796e-06, - "loss": 1.1372, - "step": 3207 - }, - { - "epoch": 0.43489459770894057, - "grad_norm": 5.642950804893734, - "learning_rate": 1.2566268117577583e-06, - "loss": 1.1584, - "step": 3208 - }, - { - "epoch": 0.43503016335660544, - "grad_norm": 1.5505491416945127, - "learning_rate": 1.2562023555898823e-06, - "loss": 1.1506, - "step": 3209 - }, - { - "epoch": 0.4351657290042703, - "grad_norm": 1.498696198310259, - "learning_rate": 1.2557778500154044e-06, - "loss": 1.1418, - "step": 3210 - }, - { - "epoch": 0.4353012946519352, - "grad_norm": 1.9902097112031991, - "learning_rate": 1.2553532951161868e-06, - "loss": 1.1913, - "step": 3211 - }, - { - "epoch": 0.4354368602996001, - "grad_norm": 1.4471133256828637, - "learning_rate": 1.2549286909741024e-06, - "loss": 1.1039, - "step": 3212 - }, - { - "epoch": 0.435572425947265, - "grad_norm": 1.4673989695038776, - "learning_rate": 1.254504037671032e-06, - "loss": 1.1837, - "step": 3213 - }, - { - "epoch": 0.43570799159492984, - "grad_norm": 6.571570594319828, - "learning_rate": 1.2540793352888667e-06, - "loss": 1.1284, - "step": 3214 - }, - { - "epoch": 0.4358435572425947, - "grad_norm": 1.4986502085861304, - "learning_rate": 1.2536545839095072e-06, - "loss": 1.1809, - "step": 3215 - }, - { - "epoch": 0.4359791228902596, - "grad_norm": 1.5705099429770013, - "learning_rate": 1.2532297836148636e-06, - "loss": 1.1369, - "step": 3216 - }, - { - "epoch": 0.4361146885379245, - "grad_norm": 1.4791293868196709, - "learning_rate": 1.2528049344868553e-06, - "loss": 1.158, - "step": 3217 - }, - { - "epoch": 0.4362502541855894, - "grad_norm": 1.6579404694495858, - "learning_rate": 1.2523800366074104e-06, - "loss": 1.1553, - "step": 3218 - }, - { - "epoch": 0.43638581983325425, - "grad_norm": 1.600338864299649, - "learning_rate": 1.251955090058468e-06, - "loss": 1.1705, - "step": 3219 - }, - { - "epoch": 0.4365213854809191, - "grad_norm": 2.312358584403603, - "learning_rate": 1.251530094921975e-06, - "loss": 1.1863, - "step": 3220 - }, - { - "epoch": 0.436656951128584, - "grad_norm": 1.7349811012669276, - "learning_rate": 1.2511050512798889e-06, - "loss": 1.1256, - "step": 3221 - }, - { - "epoch": 0.4367925167762489, - "grad_norm": 3.53424531356544, - "learning_rate": 1.2506799592141754e-06, - "loss": 1.1262, - "step": 3222 - }, - { - "epoch": 0.4369280824239138, - "grad_norm": 1.5239093203996437, - "learning_rate": 1.2502548188068109e-06, - "loss": 1.1756, - "step": 3223 - }, - { - "epoch": 0.43706364807157866, - "grad_norm": 1.4966105055694634, - "learning_rate": 1.24982963013978e-06, - "loss": 1.1218, - "step": 3224 - }, - { - "epoch": 0.43719921371924353, - "grad_norm": 1.55485872690755, - "learning_rate": 1.2494043932950768e-06, - "loss": 1.1682, - "step": 3225 - }, - { - "epoch": 0.4373347793669084, - "grad_norm": 1.601325191815758, - "learning_rate": 1.248979108354705e-06, - "loss": 1.1508, - "step": 3226 - }, - { - "epoch": 0.4374703450145733, - "grad_norm": 1.747882938437111, - "learning_rate": 1.2485537754006776e-06, - "loss": 1.1301, - "step": 3227 - }, - { - "epoch": 0.4376059106622382, - "grad_norm": 2.6330048791586886, - "learning_rate": 1.2481283945150164e-06, - "loss": 1.1722, - "step": 3228 - }, - { - "epoch": 0.43774147630990307, - "grad_norm": 2.0052923526765976, - "learning_rate": 1.2477029657797531e-06, - "loss": 1.1838, - "step": 3229 - }, - { - "epoch": 0.43787704195756794, - "grad_norm": 1.440005714740513, - "learning_rate": 1.247277489276928e-06, - "loss": 1.1789, - "step": 3230 - }, - { - "epoch": 0.4380126076052328, - "grad_norm": 1.8840509712574505, - "learning_rate": 1.2468519650885912e-06, - "loss": 1.1743, - "step": 3231 - }, - { - "epoch": 0.43814817325289773, - "grad_norm": 16.33458105594972, - "learning_rate": 1.2464263932968012e-06, - "loss": 1.1334, - "step": 3232 - }, - { - "epoch": 0.4382837389005626, - "grad_norm": 1.7766821117190994, - "learning_rate": 1.2460007739836265e-06, - "loss": 1.1638, - "step": 3233 - }, - { - "epoch": 0.4384193045482275, - "grad_norm": 1.8215923336766116, - "learning_rate": 1.2455751072311443e-06, - "loss": 1.1338, - "step": 3234 - }, - { - "epoch": 0.43855487019589234, - "grad_norm": 2.0103642597187235, - "learning_rate": 1.245149393121441e-06, - "loss": 1.1835, - "step": 3235 - }, - { - "epoch": 0.43869043584355727, - "grad_norm": 1.651919186041763, - "learning_rate": 1.2447236317366124e-06, - "loss": 1.164, - "step": 3236 - }, - { - "epoch": 0.43882600149122214, - "grad_norm": 1.7080742911521356, - "learning_rate": 1.2442978231587633e-06, - "loss": 1.1488, - "step": 3237 - }, - { - "epoch": 0.438961567138887, - "grad_norm": 1.5284977541376796, - "learning_rate": 1.2438719674700073e-06, - "loss": 1.1616, - "step": 3238 - }, - { - "epoch": 0.4390971327865519, - "grad_norm": 1.5481020576483937, - "learning_rate": 1.2434460647524675e-06, - "loss": 1.1114, - "step": 3239 - }, - { - "epoch": 0.43923269843421675, - "grad_norm": 2.071447032571576, - "learning_rate": 1.2430201150882755e-06, - "loss": 1.1277, - "step": 3240 - }, - { - "epoch": 0.4393682640818817, - "grad_norm": 1.4723496682261221, - "learning_rate": 1.2425941185595726e-06, - "loss": 1.1858, - "step": 3241 - }, - { - "epoch": 0.43950382972954655, - "grad_norm": 1.5376443649286868, - "learning_rate": 1.2421680752485092e-06, - "loss": 1.1505, - "step": 3242 - }, - { - "epoch": 0.4396393953772114, - "grad_norm": 2.30511740724904, - "learning_rate": 1.241741985237244e-06, - "loss": 1.0963, - "step": 3243 - }, - { - "epoch": 0.4397749610248763, - "grad_norm": 1.5235653493440653, - "learning_rate": 1.241315848607945e-06, - "loss": 1.1756, - "step": 3244 - }, - { - "epoch": 0.43991052667254116, - "grad_norm": 2.5077791047346016, - "learning_rate": 1.2408896654427894e-06, - "loss": 1.1488, - "step": 3245 - }, - { - "epoch": 0.4400460923202061, - "grad_norm": 1.9233372184503001, - "learning_rate": 1.2404634358239632e-06, - "loss": 1.209, - "step": 3246 - }, - { - "epoch": 0.44018165796787095, - "grad_norm": 1.6030113042174845, - "learning_rate": 1.2400371598336617e-06, - "loss": 1.1953, - "step": 3247 - }, - { - "epoch": 0.4403172236155358, - "grad_norm": 2.2097170084505966, - "learning_rate": 1.2396108375540885e-06, - "loss": 1.1729, - "step": 3248 - }, - { - "epoch": 0.4404527892632007, - "grad_norm": 1.7824253758388764, - "learning_rate": 1.2391844690674567e-06, - "loss": 1.1405, - "step": 3249 - }, - { - "epoch": 0.44058835491086557, - "grad_norm": 1.9686298213157343, - "learning_rate": 1.2387580544559881e-06, - "loss": 1.1471, - "step": 3250 - }, - { - "epoch": 0.4407239205585305, - "grad_norm": 1.6418082181826572, - "learning_rate": 1.2383315938019132e-06, - "loss": 1.1515, - "step": 3251 - }, - { - "epoch": 0.44085948620619536, - "grad_norm": 1.464348900551424, - "learning_rate": 1.2379050871874719e-06, - "loss": 1.1789, - "step": 3252 - }, - { - "epoch": 0.44099505185386023, - "grad_norm": 1.6436375450091674, - "learning_rate": 1.2374785346949125e-06, - "loss": 1.1246, - "step": 3253 - }, - { - "epoch": 0.4411306175015251, - "grad_norm": 2.0390049753146755, - "learning_rate": 1.2370519364064919e-06, - "loss": 1.1627, - "step": 3254 - }, - { - "epoch": 0.44126618314918997, - "grad_norm": 1.6895838627855406, - "learning_rate": 1.2366252924044767e-06, - "loss": 1.1499, - "step": 3255 - }, - { - "epoch": 0.4414017487968549, - "grad_norm": 1.7608190544120044, - "learning_rate": 1.236198602771142e-06, - "loss": 1.1505, - "step": 3256 - }, - { - "epoch": 0.44153731444451977, - "grad_norm": 3.991215205470924, - "learning_rate": 1.2357718675887707e-06, - "loss": 1.1434, - "step": 3257 - }, - { - "epoch": 0.44167288009218464, - "grad_norm": 1.6242560095037306, - "learning_rate": 1.235345086939656e-06, - "loss": 1.1144, - "step": 3258 - }, - { - "epoch": 0.4418084457398495, - "grad_norm": 1.4079850078321585, - "learning_rate": 1.234918260906099e-06, - "loss": 1.1523, - "step": 3259 - }, - { - "epoch": 0.4419440113875144, - "grad_norm": 2.1354154694076595, - "learning_rate": 1.2344913895704096e-06, - "loss": 1.1559, - "step": 3260 - }, - { - "epoch": 0.4420795770351793, - "grad_norm": 1.720468108687745, - "learning_rate": 1.234064473014907e-06, - "loss": 1.1666, - "step": 3261 - }, - { - "epoch": 0.4422151426828442, - "grad_norm": 2.5153818569895314, - "learning_rate": 1.2336375113219182e-06, - "loss": 1.1752, - "step": 3262 - }, - { - "epoch": 0.44235070833050905, - "grad_norm": 5.742628085912226, - "learning_rate": 1.2332105045737796e-06, - "loss": 1.1513, - "step": 3263 - }, - { - "epoch": 0.4424862739781739, - "grad_norm": 1.942575073983917, - "learning_rate": 1.2327834528528357e-06, - "loss": 1.1534, - "step": 3264 - }, - { - "epoch": 0.4426218396258388, - "grad_norm": 1.6247650040007169, - "learning_rate": 1.2323563562414407e-06, - "loss": 1.1502, - "step": 3265 - }, - { - "epoch": 0.4427574052735037, - "grad_norm": 1.4842343108399836, - "learning_rate": 1.2319292148219566e-06, - "loss": 1.1631, - "step": 3266 - }, - { - "epoch": 0.4428929709211686, - "grad_norm": 1.8318447518478602, - "learning_rate": 1.2315020286767538e-06, - "loss": 1.1519, - "step": 3267 - }, - { - "epoch": 0.44302853656883345, - "grad_norm": 1.4444951922378129, - "learning_rate": 1.2310747978882126e-06, - "loss": 1.1627, - "step": 3268 - }, - { - "epoch": 0.4431641022164983, - "grad_norm": 1.785447186010145, - "learning_rate": 1.2306475225387203e-06, - "loss": 1.1815, - "step": 3269 - }, - { - "epoch": 0.4432996678641632, - "grad_norm": 1.7508032686291954, - "learning_rate": 1.2302202027106739e-06, - "loss": 1.1502, - "step": 3270 - }, - { - "epoch": 0.4434352335118281, - "grad_norm": 2.5133820054776645, - "learning_rate": 1.2297928384864787e-06, - "loss": 1.1832, - "step": 3271 - }, - { - "epoch": 0.443570799159493, - "grad_norm": 2.9687017472843023, - "learning_rate": 1.2293654299485485e-06, - "loss": 1.1337, - "step": 3272 - }, - { - "epoch": 0.44370636480715786, - "grad_norm": 2.9283044129033486, - "learning_rate": 1.2289379771793059e-06, - "loss": 1.158, - "step": 3273 - }, - { - "epoch": 0.44384193045482273, - "grad_norm": 1.680816267725318, - "learning_rate": 1.2285104802611812e-06, - "loss": 1.1661, - "step": 3274 - }, - { - "epoch": 0.44397749610248766, - "grad_norm": 1.8214446284725856, - "learning_rate": 1.2280829392766143e-06, - "loss": 1.1957, - "step": 3275 - }, - { - "epoch": 0.4441130617501525, - "grad_norm": 1.6344554722226396, - "learning_rate": 1.2276553543080527e-06, - "loss": 1.1541, - "step": 3276 - }, - { - "epoch": 0.4442486273978174, - "grad_norm": 1.6416107902517707, - "learning_rate": 1.2272277254379533e-06, - "loss": 1.1536, - "step": 3277 - }, - { - "epoch": 0.44438419304548227, - "grad_norm": 1.7975116243119598, - "learning_rate": 1.2268000527487803e-06, - "loss": 1.1708, - "step": 3278 - }, - { - "epoch": 0.44451975869314714, - "grad_norm": 1.6721700640249624, - "learning_rate": 1.2263723363230076e-06, - "loss": 1.1538, - "step": 3279 - }, - { - "epoch": 0.44465532434081206, - "grad_norm": 1.5662034890834837, - "learning_rate": 1.2259445762431168e-06, - "loss": 1.1282, - "step": 3280 - }, - { - "epoch": 0.44479088998847693, - "grad_norm": 1.8444268726796649, - "learning_rate": 1.2255167725915981e-06, - "loss": 1.1584, - "step": 3281 - }, - { - "epoch": 0.4449264556361418, - "grad_norm": 1.607833260256158, - "learning_rate": 1.2250889254509496e-06, - "loss": 1.1673, - "step": 3282 - }, - { - "epoch": 0.4450620212838067, - "grad_norm": 1.7379573735629854, - "learning_rate": 1.2246610349036785e-06, - "loss": 1.1468, - "step": 3283 - }, - { - "epoch": 0.44519758693147155, - "grad_norm": 1.7223481273856827, - "learning_rate": 1.2242331010323005e-06, - "loss": 1.1536, - "step": 3284 - }, - { - "epoch": 0.44533315257913647, - "grad_norm": 1.7395335208965865, - "learning_rate": 1.2238051239193387e-06, - "loss": 1.1553, - "step": 3285 - }, - { - "epoch": 0.44546871822680134, - "grad_norm": 1.608388248875753, - "learning_rate": 1.2233771036473255e-06, - "loss": 1.1446, - "step": 3286 - }, - { - "epoch": 0.4456042838744662, - "grad_norm": 1.7390980593178567, - "learning_rate": 1.2229490402988014e-06, - "loss": 1.1595, - "step": 3287 - }, - { - "epoch": 0.4457398495221311, - "grad_norm": 1.7824982538032725, - "learning_rate": 1.2225209339563143e-06, - "loss": 1.1586, - "step": 3288 - }, - { - "epoch": 0.44587541516979595, - "grad_norm": 1.5331277766042948, - "learning_rate": 1.2220927847024218e-06, - "loss": 1.1766, - "step": 3289 - }, - { - "epoch": 0.4460109808174609, - "grad_norm": 1.6205217814494162, - "learning_rate": 1.2216645926196886e-06, - "loss": 1.1623, - "step": 3290 - }, - { - "epoch": 0.44614654646512575, - "grad_norm": 1.6425414034066592, - "learning_rate": 1.2212363577906889e-06, - "loss": 1.1265, - "step": 3291 - }, - { - "epoch": 0.4462821121127906, - "grad_norm": 1.7301649363924163, - "learning_rate": 1.2208080802980037e-06, - "loss": 1.1276, - "step": 3292 - }, - { - "epoch": 0.4464176777604555, - "grad_norm": 1.9506606813635394, - "learning_rate": 1.220379760224223e-06, - "loss": 1.1655, - "step": 3293 - }, - { - "epoch": 0.44655324340812036, - "grad_norm": 1.5954290294752294, - "learning_rate": 1.2199513976519451e-06, - "loss": 1.1423, - "step": 3294 - }, - { - "epoch": 0.4466888090557853, - "grad_norm": 2.0475042441318294, - "learning_rate": 1.2195229926637764e-06, - "loss": 1.1749, - "step": 3295 - }, - { - "epoch": 0.44682437470345016, - "grad_norm": 1.4886456816435272, - "learning_rate": 1.2190945453423315e-06, - "loss": 1.0945, - "step": 3296 - }, - { - "epoch": 0.446959940351115, - "grad_norm": 1.4685200540995849, - "learning_rate": 1.2186660557702328e-06, - "loss": 1.1816, - "step": 3297 - }, - { - "epoch": 0.4470955059987799, - "grad_norm": 1.5392291684623705, - "learning_rate": 1.2182375240301114e-06, - "loss": 1.1292, - "step": 3298 - }, - { - "epoch": 0.44723107164644477, - "grad_norm": 1.9044477523336485, - "learning_rate": 1.217808950204606e-06, - "loss": 1.1676, - "step": 3299 - }, - { - "epoch": 0.4473666372941097, - "grad_norm": 2.675918377513272, - "learning_rate": 1.217380334376364e-06, - "loss": 1.1525, - "step": 3300 - }, - { - "epoch": 0.44750220294177456, - "grad_norm": 1.7857823923563956, - "learning_rate": 1.2169516766280404e-06, - "loss": 1.1934, - "step": 3301 - }, - { - "epoch": 0.44763776858943943, - "grad_norm": 1.7439104273997204, - "learning_rate": 1.2165229770422986e-06, - "loss": 1.1987, - "step": 3302 - }, - { - "epoch": 0.4477733342371043, - "grad_norm": 1.7788413834454972, - "learning_rate": 1.2160942357018096e-06, - "loss": 1.1473, - "step": 3303 - }, - { - "epoch": 0.4479088998847692, - "grad_norm": 1.3903410097462903, - "learning_rate": 1.215665452689253e-06, - "loss": 1.135, - "step": 3304 - }, - { - "epoch": 0.4480444655324341, - "grad_norm": 1.4785474686672486, - "learning_rate": 1.2152366280873163e-06, - "loss": 1.1714, - "step": 3305 - }, - { - "epoch": 0.44818003118009897, - "grad_norm": 1.665863666355688, - "learning_rate": 1.2148077619786948e-06, - "loss": 1.1715, - "step": 3306 - }, - { - "epoch": 0.44831559682776384, - "grad_norm": 1.3672070056462164, - "learning_rate": 1.214378854446092e-06, - "loss": 1.144, - "step": 3307 - }, - { - "epoch": 0.4484511624754287, - "grad_norm": 1.5607947189036087, - "learning_rate": 1.2139499055722193e-06, - "loss": 1.1775, - "step": 3308 - }, - { - "epoch": 0.4485867281230936, - "grad_norm": 1.792482544467519, - "learning_rate": 1.213520915439796e-06, - "loss": 1.0883, - "step": 3309 - }, - { - "epoch": 0.4487222937707585, - "grad_norm": 1.8839115733404674, - "learning_rate": 1.2130918841315496e-06, - "loss": 1.1603, - "step": 3310 - }, - { - "epoch": 0.4488578594184234, - "grad_norm": 6.1033074495148165, - "learning_rate": 1.2126628117302156e-06, - "loss": 1.1326, - "step": 3311 - }, - { - "epoch": 0.44899342506608825, - "grad_norm": 1.7119958481581332, - "learning_rate": 1.212233698318537e-06, - "loss": 1.1645, - "step": 3312 - }, - { - "epoch": 0.4491289907137531, - "grad_norm": 1.547338697328333, - "learning_rate": 1.2118045439792648e-06, - "loss": 1.1554, - "step": 3313 - }, - { - "epoch": 0.44926455636141804, - "grad_norm": 2.964396021602258, - "learning_rate": 1.2113753487951584e-06, - "loss": 1.1437, - "step": 3314 - }, - { - "epoch": 0.4494001220090829, - "grad_norm": 1.7718205734956578, - "learning_rate": 1.2109461128489842e-06, - "loss": 1.1579, - "step": 3315 - }, - { - "epoch": 0.4495356876567478, - "grad_norm": 1.547725392783264, - "learning_rate": 1.2105168362235176e-06, - "loss": 1.152, - "step": 3316 - }, - { - "epoch": 0.44967125330441265, - "grad_norm": 1.7142684813329903, - "learning_rate": 1.2100875190015405e-06, - "loss": 1.1293, - "step": 3317 - }, - { - "epoch": 0.4498068189520775, - "grad_norm": 1.4330504597041882, - "learning_rate": 1.2096581612658438e-06, - "loss": 1.1734, - "step": 3318 - }, - { - "epoch": 0.44994238459974245, - "grad_norm": 1.7318090156575805, - "learning_rate": 1.2092287630992257e-06, - "loss": 1.1831, - "step": 3319 - }, - { - "epoch": 0.4500779502474073, - "grad_norm": 1.4397016206181021, - "learning_rate": 1.208799324584492e-06, - "loss": 1.1232, - "step": 3320 - }, - { - "epoch": 0.4502135158950722, - "grad_norm": 1.8340632720464851, - "learning_rate": 1.2083698458044572e-06, - "loss": 1.1886, - "step": 3321 - }, - { - "epoch": 0.45034908154273706, - "grad_norm": 1.8205679266246426, - "learning_rate": 1.207940326841942e-06, - "loss": 1.1451, - "step": 3322 - }, - { - "epoch": 0.45048464719040193, - "grad_norm": 1.5575606975658127, - "learning_rate": 1.2075107677797763e-06, - "loss": 1.1329, - "step": 3323 - }, - { - "epoch": 0.45062021283806686, - "grad_norm": 1.6471653336499343, - "learning_rate": 1.2070811687007969e-06, - "loss": 1.1407, - "step": 3324 - }, - { - "epoch": 0.45075577848573173, - "grad_norm": 1.6487416319869697, - "learning_rate": 1.2066515296878488e-06, - "loss": 1.1749, - "step": 3325 - }, - { - "epoch": 0.4508913441333966, - "grad_norm": 1.5447305743387372, - "learning_rate": 1.2062218508237845e-06, - "loss": 1.1831, - "step": 3326 - }, - { - "epoch": 0.45102690978106147, - "grad_norm": 1.552864351033013, - "learning_rate": 1.2057921321914638e-06, - "loss": 1.1056, - "step": 3327 - }, - { - "epoch": 0.45116247542872634, - "grad_norm": 1.4556329271799824, - "learning_rate": 1.205362373873755e-06, - "loss": 1.1276, - "step": 3328 - }, - { - "epoch": 0.45129804107639127, - "grad_norm": 1.547389685310996, - "learning_rate": 1.2049325759535334e-06, - "loss": 1.151, - "step": 3329 - }, - { - "epoch": 0.45143360672405614, - "grad_norm": 2.0746634710983995, - "learning_rate": 1.2045027385136823e-06, - "loss": 1.1345, - "step": 3330 - }, - { - "epoch": 0.451569172371721, - "grad_norm": 1.9199974406318976, - "learning_rate": 1.2040728616370924e-06, - "loss": 1.1524, - "step": 3331 - }, - { - "epoch": 0.4517047380193859, - "grad_norm": 2.7571291562833755, - "learning_rate": 1.2036429454066616e-06, - "loss": 1.1672, - "step": 3332 - }, - { - "epoch": 0.45184030366705075, - "grad_norm": 3.9844818432773272, - "learning_rate": 1.2032129899052965e-06, - "loss": 1.1983, - "step": 3333 - }, - { - "epoch": 0.4519758693147157, - "grad_norm": 1.7121079259425511, - "learning_rate": 1.2027829952159104e-06, - "loss": 1.1775, - "step": 3334 - }, - { - "epoch": 0.45211143496238054, - "grad_norm": 1.4977541180913634, - "learning_rate": 1.2023529614214242e-06, - "loss": 1.1428, - "step": 3335 - }, - { - "epoch": 0.4522470006100454, - "grad_norm": 1.80431070848047, - "learning_rate": 1.2019228886047666e-06, - "loss": 1.1642, - "step": 3336 - }, - { - "epoch": 0.4523825662577103, - "grad_norm": 2.1593292777049355, - "learning_rate": 1.2014927768488739e-06, - "loss": 1.1259, - "step": 3337 - }, - { - "epoch": 0.45251813190537515, - "grad_norm": 1.5109637946841203, - "learning_rate": 1.2010626262366896e-06, - "loss": 1.1413, - "step": 3338 - }, - { - "epoch": 0.4526536975530401, - "grad_norm": 2.8966970215984857, - "learning_rate": 1.2006324368511651e-06, - "loss": 1.1706, - "step": 3339 - }, - { - "epoch": 0.45278926320070495, - "grad_norm": 1.7841079914665035, - "learning_rate": 1.200202208775259e-06, - "loss": 1.1612, - "step": 3340 - }, - { - "epoch": 0.4529248288483698, - "grad_norm": 1.5216754077132102, - "learning_rate": 1.1997719420919368e-06, - "loss": 1.1553, - "step": 3341 - }, - { - "epoch": 0.4530603944960347, - "grad_norm": 1.557661184347607, - "learning_rate": 1.1993416368841727e-06, - "loss": 1.1772, - "step": 3342 - }, - { - "epoch": 0.45319596014369956, - "grad_norm": 2.2319256928960165, - "learning_rate": 1.1989112932349473e-06, - "loss": 1.1461, - "step": 3343 - }, - { - "epoch": 0.4533315257913645, - "grad_norm": 1.962236803021401, - "learning_rate": 1.1984809112272493e-06, - "loss": 1.1556, - "step": 3344 - }, - { - "epoch": 0.45346709143902936, - "grad_norm": 1.4760737564453223, - "learning_rate": 1.1980504909440743e-06, - "loss": 1.1526, - "step": 3345 - }, - { - "epoch": 0.4536026570866942, - "grad_norm": 2.01301904372849, - "learning_rate": 1.1976200324684253e-06, - "loss": 1.1867, - "step": 3346 - }, - { - "epoch": 0.4537382227343591, - "grad_norm": 1.4131492730910546, - "learning_rate": 1.197189535883313e-06, - "loss": 1.1582, - "step": 3347 - }, - { - "epoch": 0.45387378838202397, - "grad_norm": 1.5826239201200616, - "learning_rate": 1.1967590012717552e-06, - "loss": 1.0993, - "step": 3348 - }, - { - "epoch": 0.4540093540296889, - "grad_norm": 1.5335423911296349, - "learning_rate": 1.1963284287167772e-06, - "loss": 1.2, - "step": 3349 - }, - { - "epoch": 0.45414491967735376, - "grad_norm": 1.6420133495622085, - "learning_rate": 1.1958978183014111e-06, - "loss": 1.1589, - "step": 3350 - }, - { - "epoch": 0.45428048532501863, - "grad_norm": 1.5694745038522437, - "learning_rate": 1.1954671701086976e-06, - "loss": 1.1566, - "step": 3351 - }, - { - "epoch": 0.4544160509726835, - "grad_norm": 1.405865806749544, - "learning_rate": 1.195036484221683e-06, - "loss": 1.1825, - "step": 3352 - }, - { - "epoch": 0.45455161662034843, - "grad_norm": 2.9841549547383477, - "learning_rate": 1.194605760723422e-06, - "loss": 1.1437, - "step": 3353 - }, - { - "epoch": 0.4546871822680133, - "grad_norm": 1.4828440805288003, - "learning_rate": 1.1941749996969762e-06, - "loss": 1.1755, - "step": 3354 - }, - { - "epoch": 0.45482274791567817, - "grad_norm": 1.5813056478637613, - "learning_rate": 1.1937442012254144e-06, - "loss": 1.1478, - "step": 3355 - }, - { - "epoch": 0.45495831356334304, - "grad_norm": 2.045158831463965, - "learning_rate": 1.1933133653918126e-06, - "loss": 1.1681, - "step": 3356 - }, - { - "epoch": 0.4550938792110079, - "grad_norm": 2.537995643909469, - "learning_rate": 1.1928824922792543e-06, - "loss": 1.1682, - "step": 3357 - }, - { - "epoch": 0.45522944485867284, - "grad_norm": 1.4681169147828277, - "learning_rate": 1.1924515819708298e-06, - "loss": 1.1595, - "step": 3358 - }, - { - "epoch": 0.4553650105063377, - "grad_norm": 1.998353359519511, - "learning_rate": 1.1920206345496372e-06, - "loss": 1.1763, - "step": 3359 - }, - { - "epoch": 0.4555005761540026, - "grad_norm": 1.5564454495359157, - "learning_rate": 1.1915896500987809e-06, - "loss": 1.106, - "step": 3360 - }, - { - "epoch": 0.45563614180166745, - "grad_norm": 3.404020772293265, - "learning_rate": 1.1911586287013725e-06, - "loss": 1.1605, - "step": 3361 - }, - { - "epoch": 0.4557717074493323, - "grad_norm": 1.463561729281201, - "learning_rate": 1.1907275704405316e-06, - "loss": 1.1852, - "step": 3362 - }, - { - "epoch": 0.45590727309699725, - "grad_norm": 1.5366917880386286, - "learning_rate": 1.1902964753993842e-06, - "loss": 1.1574, - "step": 3363 - }, - { - "epoch": 0.4560428387446621, - "grad_norm": 2.630766547163037, - "learning_rate": 1.1898653436610637e-06, - "loss": 1.1451, - "step": 3364 - }, - { - "epoch": 0.456178404392327, - "grad_norm": 1.4356278082908327, - "learning_rate": 1.1894341753087105e-06, - "loss": 1.1286, - "step": 3365 - }, - { - "epoch": 0.45631397003999186, - "grad_norm": 2.1643592272010426, - "learning_rate": 1.1890029704254716e-06, - "loss": 1.1443, - "step": 3366 - }, - { - "epoch": 0.4564495356876567, - "grad_norm": 1.8542390086800153, - "learning_rate": 1.188571729094502e-06, - "loss": 1.1208, - "step": 3367 - }, - { - "epoch": 0.45658510133532165, - "grad_norm": 1.8375508848702908, - "learning_rate": 1.1881404513989629e-06, - "loss": 1.1546, - "step": 3368 - }, - { - "epoch": 0.4567206669829865, - "grad_norm": 1.6307993911120482, - "learning_rate": 1.1877091374220228e-06, - "loss": 1.1797, - "step": 3369 - }, - { - "epoch": 0.4568562326306514, - "grad_norm": 2.6750429257379342, - "learning_rate": 1.1872777872468572e-06, - "loss": 1.1434, - "step": 3370 - }, - { - "epoch": 0.45699179827831626, - "grad_norm": 2.985921028839723, - "learning_rate": 1.1868464009566485e-06, - "loss": 1.1472, - "step": 3371 - }, - { - "epoch": 0.45712736392598113, - "grad_norm": 1.6309629769090157, - "learning_rate": 1.1864149786345868e-06, - "loss": 1.135, - "step": 3372 - }, - { - "epoch": 0.45726292957364606, - "grad_norm": 1.4537892923876943, - "learning_rate": 1.1859835203638675e-06, - "loss": 1.1715, - "step": 3373 - }, - { - "epoch": 0.45739849522131093, - "grad_norm": 1.3830602721200975, - "learning_rate": 1.1855520262276943e-06, - "loss": 1.0847, - "step": 3374 - }, - { - "epoch": 0.4575340608689758, - "grad_norm": 1.623009000010444, - "learning_rate": 1.1851204963092775e-06, - "loss": 1.13, - "step": 3375 - }, - { - "epoch": 0.45766962651664067, - "grad_norm": 1.7215118270343857, - "learning_rate": 1.1846889306918344e-06, - "loss": 1.1661, - "step": 3376 - }, - { - "epoch": 0.45780519216430554, - "grad_norm": 1.681022466616578, - "learning_rate": 1.1842573294585889e-06, - "loss": 1.1693, - "step": 3377 - }, - { - "epoch": 0.45794075781197047, - "grad_norm": 1.6517533286039925, - "learning_rate": 1.1838256926927718e-06, - "loss": 1.1152, - "step": 3378 - }, - { - "epoch": 0.45807632345963534, - "grad_norm": 1.6332659624051278, - "learning_rate": 1.1833940204776208e-06, - "loss": 1.1756, - "step": 3379 - }, - { - "epoch": 0.4582118891073002, - "grad_norm": 3.5973810451632238, - "learning_rate": 1.1829623128963807e-06, - "loss": 1.1737, - "step": 3380 - }, - { - "epoch": 0.4583474547549651, - "grad_norm": 2.1063365399170224, - "learning_rate": 1.1825305700323025e-06, - "loss": 1.1402, - "step": 3381 - }, - { - "epoch": 0.45848302040262995, - "grad_norm": 1.7374132837656904, - "learning_rate": 1.182098791968645e-06, - "loss": 1.117, - "step": 3382 - }, - { - "epoch": 0.4586185860502949, - "grad_norm": 1.6492865015413294, - "learning_rate": 1.1816669787886727e-06, - "loss": 1.1144, - "step": 3383 - }, - { - "epoch": 0.45875415169795974, - "grad_norm": 2.748047782514574, - "learning_rate": 1.1812351305756575e-06, - "loss": 1.1702, - "step": 3384 - }, - { - "epoch": 0.4588897173456246, - "grad_norm": 1.539550841693305, - "learning_rate": 1.1808032474128782e-06, - "loss": 1.1779, - "step": 3385 - }, - { - "epoch": 0.4590252829932895, - "grad_norm": 1.6666799411348443, - "learning_rate": 1.1803713293836198e-06, - "loss": 1.1341, - "step": 3386 - }, - { - "epoch": 0.45916084864095436, - "grad_norm": 2.150674927245081, - "learning_rate": 1.179939376571174e-06, - "loss": 1.191, - "step": 3387 - }, - { - "epoch": 0.4592964142886193, - "grad_norm": 1.6829115178898237, - "learning_rate": 1.1795073890588401e-06, - "loss": 1.1577, - "step": 3388 - }, - { - "epoch": 0.45943197993628415, - "grad_norm": 2.7051111700749844, - "learning_rate": 1.179075366929923e-06, - "loss": 1.1416, - "step": 3389 - }, - { - "epoch": 0.459567545583949, - "grad_norm": 3.196928646978494, - "learning_rate": 1.1786433102677348e-06, - "loss": 1.1403, - "step": 3390 - }, - { - "epoch": 0.4597031112316139, - "grad_norm": 1.7499724696914283, - "learning_rate": 1.1782112191555946e-06, - "loss": 1.1539, - "step": 3391 - }, - { - "epoch": 0.4598386768792788, - "grad_norm": 1.883604360177047, - "learning_rate": 1.1777790936768272e-06, - "loss": 1.1766, - "step": 3392 - }, - { - "epoch": 0.4599742425269437, - "grad_norm": 1.947487441941586, - "learning_rate": 1.1773469339147653e-06, - "loss": 1.1304, - "step": 3393 - }, - { - "epoch": 0.46010980817460856, - "grad_norm": 1.5286862851894245, - "learning_rate": 1.1769147399527466e-06, - "loss": 1.1438, - "step": 3394 - }, - { - "epoch": 0.46024537382227343, - "grad_norm": 2.709978568337033, - "learning_rate": 1.176482511874117e-06, - "loss": 1.1828, - "step": 3395 - }, - { - "epoch": 0.4603809394699383, - "grad_norm": 1.4031025757964122, - "learning_rate": 1.1760502497622281e-06, - "loss": 1.1569, - "step": 3396 - }, - { - "epoch": 0.4605165051176032, - "grad_norm": 1.9752835552067582, - "learning_rate": 1.1756179537004383e-06, - "loss": 1.1641, - "step": 3397 - }, - { - "epoch": 0.4606520707652681, - "grad_norm": 2.0542190341804694, - "learning_rate": 1.175185623772112e-06, - "loss": 1.1227, - "step": 3398 - }, - { - "epoch": 0.46078763641293297, - "grad_norm": 1.8945649203690382, - "learning_rate": 1.1747532600606213e-06, - "loss": 1.1413, - "step": 3399 - }, - { - "epoch": 0.46092320206059784, - "grad_norm": 1.6039852749787311, - "learning_rate": 1.174320862649344e-06, - "loss": 1.1533, - "step": 3400 - }, - { - "epoch": 0.4610587677082627, - "grad_norm": 2.214731351963026, - "learning_rate": 1.173888431621664e-06, - "loss": 1.1535, - "step": 3401 - }, - { - "epoch": 0.46119433335592763, - "grad_norm": 1.513109189506774, - "learning_rate": 1.1734559670609727e-06, - "loss": 1.1252, - "step": 3402 - }, - { - "epoch": 0.4613298990035925, - "grad_norm": 1.4613202784349688, - "learning_rate": 1.1730234690506671e-06, - "loss": 1.1235, - "step": 3403 - }, - { - "epoch": 0.4614654646512574, - "grad_norm": 1.5618015867180446, - "learning_rate": 1.1725909376741515e-06, - "loss": 1.1321, - "step": 3404 - }, - { - "epoch": 0.46160103029892224, - "grad_norm": 1.6136561156407137, - "learning_rate": 1.1721583730148356e-06, - "loss": 1.1483, - "step": 3405 - }, - { - "epoch": 0.4617365959465871, - "grad_norm": 1.5026375219832422, - "learning_rate": 1.1717257751561367e-06, - "loss": 1.1403, - "step": 3406 - }, - { - "epoch": 0.46187216159425204, - "grad_norm": 1.5582720373011743, - "learning_rate": 1.1712931441814775e-06, - "loss": 1.1147, - "step": 3407 - }, - { - "epoch": 0.4620077272419169, - "grad_norm": 2.05741881952399, - "learning_rate": 1.1708604801742877e-06, - "loss": 1.158, - "step": 3408 - }, - { - "epoch": 0.4621432928895818, - "grad_norm": 1.713945328475039, - "learning_rate": 1.1704277832180027e-06, - "loss": 1.145, - "step": 3409 - }, - { - "epoch": 0.46227885853724665, - "grad_norm": 1.7931190270837247, - "learning_rate": 1.1699950533960652e-06, - "loss": 1.1443, - "step": 3410 - }, - { - "epoch": 0.4624144241849115, - "grad_norm": 1.5947320443964803, - "learning_rate": 1.1695622907919233e-06, - "loss": 1.1502, - "step": 3411 - }, - { - "epoch": 0.46254998983257645, - "grad_norm": 2.3789703888511182, - "learning_rate": 1.1691294954890323e-06, - "loss": 1.1518, - "step": 3412 - }, - { - "epoch": 0.4626855554802413, - "grad_norm": 1.4346717169947016, - "learning_rate": 1.168696667570853e-06, - "loss": 1.1454, - "step": 3413 - }, - { - "epoch": 0.4628211211279062, - "grad_norm": 1.4551661726266283, - "learning_rate": 1.1682638071208532e-06, - "loss": 1.1286, - "step": 3414 - }, - { - "epoch": 0.46295668677557106, - "grad_norm": 1.6380596965761713, - "learning_rate": 1.1678309142225062e-06, - "loss": 1.159, - "step": 3415 - }, - { - "epoch": 0.46309225242323593, - "grad_norm": 1.4816350233813613, - "learning_rate": 1.1673979889592923e-06, - "loss": 1.1453, - "step": 3416 - }, - { - "epoch": 0.46322781807090085, - "grad_norm": 2.0795376549099465, - "learning_rate": 1.1669650314146973e-06, - "loss": 1.1836, - "step": 3417 - }, - { - "epoch": 0.4633633837185657, - "grad_norm": 1.7188072156364993, - "learning_rate": 1.166532041672214e-06, - "loss": 1.183, - "step": 3418 - }, - { - "epoch": 0.4634989493662306, - "grad_norm": 1.9669977990323493, - "learning_rate": 1.166099019815341e-06, - "loss": 1.1177, - "step": 3419 - }, - { - "epoch": 0.46363451501389547, - "grad_norm": 1.5229918611264304, - "learning_rate": 1.1656659659275835e-06, - "loss": 1.1537, - "step": 3420 - }, - { - "epoch": 0.46377008066156034, - "grad_norm": 1.754452511779192, - "learning_rate": 1.1652328800924517e-06, - "loss": 1.1291, - "step": 3421 - }, - { - "epoch": 0.46390564630922526, - "grad_norm": 2.090700064945903, - "learning_rate": 1.1647997623934636e-06, - "loss": 1.1486, - "step": 3422 - }, - { - "epoch": 0.46404121195689013, - "grad_norm": 1.4184816729364818, - "learning_rate": 1.164366612914142e-06, - "loss": 1.137, - "step": 3423 - }, - { - "epoch": 0.464176777604555, - "grad_norm": 2.539394226587885, - "learning_rate": 1.1639334317380164e-06, - "loss": 1.1209, - "step": 3424 - }, - { - "epoch": 0.4643123432522199, - "grad_norm": 1.714356764268565, - "learning_rate": 1.1635002189486228e-06, - "loss": 1.1663, - "step": 3425 - }, - { - "epoch": 0.46444790889988474, - "grad_norm": 2.154329039942759, - "learning_rate": 1.1630669746295022e-06, - "loss": 1.1853, - "step": 3426 - }, - { - "epoch": 0.46458347454754967, - "grad_norm": 1.4852294414370182, - "learning_rate": 1.1626336988642029e-06, - "loss": 1.1504, - "step": 3427 - }, - { - "epoch": 0.46471904019521454, - "grad_norm": 2.305827956312661, - "learning_rate": 1.1622003917362788e-06, - "loss": 1.1335, - "step": 3428 - }, - { - "epoch": 0.4648546058428794, - "grad_norm": 1.6923961469874318, - "learning_rate": 1.1617670533292892e-06, - "loss": 1.1414, - "step": 3429 - }, - { - "epoch": 0.4649901714905443, - "grad_norm": 1.5822904527013693, - "learning_rate": 1.1613336837268001e-06, - "loss": 1.1421, - "step": 3430 - }, - { - "epoch": 0.4651257371382092, - "grad_norm": 1.892925694541542, - "learning_rate": 1.1609002830123837e-06, - "loss": 1.1676, - "step": 3431 - }, - { - "epoch": 0.4652613027858741, - "grad_norm": 2.2258424193966246, - "learning_rate": 1.1604668512696179e-06, - "loss": 1.1927, - "step": 3432 - }, - { - "epoch": 0.46539686843353895, - "grad_norm": 1.7296071023994972, - "learning_rate": 1.1600333885820867e-06, - "loss": 1.146, - "step": 3433 - }, - { - "epoch": 0.4655324340812038, - "grad_norm": 2.4518343035333463, - "learning_rate": 1.1595998950333793e-06, - "loss": 1.1488, - "step": 3434 - }, - { - "epoch": 0.4656679997288687, - "grad_norm": 1.7391401923269687, - "learning_rate": 1.159166370707092e-06, - "loss": 1.174, - "step": 3435 - }, - { - "epoch": 0.4658035653765336, - "grad_norm": 1.552300165, - "learning_rate": 1.1587328156868266e-06, - "loss": 1.2169, - "step": 3436 - }, - { - "epoch": 0.4659391310241985, - "grad_norm": 2.0678483330885933, - "learning_rate": 1.1582992300561906e-06, - "loss": 1.1984, - "step": 3437 - }, - { - "epoch": 0.46607469667186335, - "grad_norm": 3.927103582864385, - "learning_rate": 1.157865613898798e-06, - "loss": 1.1383, - "step": 3438 - }, - { - "epoch": 0.4662102623195282, - "grad_norm": 2.1019650022546186, - "learning_rate": 1.1574319672982673e-06, - "loss": 1.1243, - "step": 3439 - }, - { - "epoch": 0.4663458279671931, - "grad_norm": 1.420555724650725, - "learning_rate": 1.1569982903382247e-06, - "loss": 1.1385, - "step": 3440 - }, - { - "epoch": 0.466481393614858, - "grad_norm": 1.4983658405408697, - "learning_rate": 1.156564583102301e-06, - "loss": 1.0997, - "step": 3441 - }, - { - "epoch": 0.4666169592625229, - "grad_norm": 1.3944877221710248, - "learning_rate": 1.1561308456741336e-06, - "loss": 1.145, - "step": 3442 - }, - { - "epoch": 0.46675252491018776, - "grad_norm": 1.6939676094159934, - "learning_rate": 1.1556970781373648e-06, - "loss": 1.1692, - "step": 3443 - }, - { - "epoch": 0.46688809055785263, - "grad_norm": 1.6577709222647992, - "learning_rate": 1.1552632805756436e-06, - "loss": 1.1551, - "step": 3444 - }, - { - "epoch": 0.4670236562055175, - "grad_norm": 1.6221387141361685, - "learning_rate": 1.154829453072624e-06, - "loss": 1.1744, - "step": 3445 - }, - { - "epoch": 0.4671592218531824, - "grad_norm": 1.4934217912831786, - "learning_rate": 1.1543955957119667e-06, - "loss": 1.1541, - "step": 3446 - }, - { - "epoch": 0.4672947875008473, - "grad_norm": 1.5753337785725665, - "learning_rate": 1.1539617085773373e-06, - "loss": 1.1297, - "step": 3447 - }, - { - "epoch": 0.46743035314851217, - "grad_norm": 2.042005952754216, - "learning_rate": 1.1535277917524079e-06, - "loss": 1.1635, - "step": 3448 - }, - { - "epoch": 0.46756591879617704, - "grad_norm": 1.9486029801667553, - "learning_rate": 1.153093845320856e-06, - "loss": 1.1567, - "step": 3449 - }, - { - "epoch": 0.4677014844438419, - "grad_norm": 2.897086707218251, - "learning_rate": 1.152659869366364e-06, - "loss": 1.1482, - "step": 3450 - }, - { - "epoch": 0.46783705009150683, - "grad_norm": 1.675148485588197, - "learning_rate": 1.1522258639726215e-06, - "loss": 1.1651, - "step": 3451 - }, - { - "epoch": 0.4679726157391717, - "grad_norm": 1.449241037723842, - "learning_rate": 1.1517918292233226e-06, - "loss": 1.1369, - "step": 3452 - }, - { - "epoch": 0.4681081813868366, - "grad_norm": 2.42959256914325, - "learning_rate": 1.1513577652021678e-06, - "loss": 1.1761, - "step": 3453 - }, - { - "epoch": 0.46824374703450145, - "grad_norm": 2.146067508865502, - "learning_rate": 1.1509236719928627e-06, - "loss": 1.1762, - "step": 3454 - }, - { - "epoch": 0.4683793126821663, - "grad_norm": 1.5734806144850175, - "learning_rate": 1.1504895496791185e-06, - "loss": 1.1689, - "step": 3455 - }, - { - "epoch": 0.46851487832983124, - "grad_norm": 2.440662532404865, - "learning_rate": 1.1500553983446526e-06, - "loss": 1.1709, - "step": 3456 - }, - { - "epoch": 0.4686504439774961, - "grad_norm": 1.8476485891967291, - "learning_rate": 1.1496212180731877e-06, - "loss": 1.1846, - "step": 3457 - }, - { - "epoch": 0.468786009625161, - "grad_norm": 1.598923796792217, - "learning_rate": 1.149187008948452e-06, - "loss": 1.1983, - "step": 3458 - }, - { - "epoch": 0.46892157527282585, - "grad_norm": 1.5757200734586951, - "learning_rate": 1.1487527710541794e-06, - "loss": 1.1323, - "step": 3459 - }, - { - "epoch": 0.4690571409204907, - "grad_norm": 1.5817369966169483, - "learning_rate": 1.1483185044741088e-06, - "loss": 1.1255, - "step": 3460 - }, - { - "epoch": 0.46919270656815565, - "grad_norm": 1.4610154660583967, - "learning_rate": 1.1478842092919854e-06, - "loss": 1.1443, - "step": 3461 - }, - { - "epoch": 0.4693282722158205, - "grad_norm": 1.9515605073465088, - "learning_rate": 1.1474498855915596e-06, - "loss": 1.1242, - "step": 3462 - }, - { - "epoch": 0.4694638378634854, - "grad_norm": 1.9907881192487036, - "learning_rate": 1.1470155334565869e-06, - "loss": 1.1843, - "step": 3463 - }, - { - "epoch": 0.46959940351115026, - "grad_norm": 1.7901217422935565, - "learning_rate": 1.1465811529708295e-06, - "loss": 1.1478, - "step": 3464 - }, - { - "epoch": 0.46973496915881513, - "grad_norm": 2.5006089395308853, - "learning_rate": 1.1461467442180537e-06, - "loss": 1.1617, - "step": 3465 - }, - { - "epoch": 0.46987053480648006, - "grad_norm": 2.414645502083628, - "learning_rate": 1.1457123072820319e-06, - "loss": 1.1659, - "step": 3466 - }, - { - "epoch": 0.4700061004541449, - "grad_norm": 1.641511730048037, - "learning_rate": 1.1452778422465416e-06, - "loss": 1.1494, - "step": 3467 - }, - { - "epoch": 0.4701416661018098, - "grad_norm": 1.6361453295987942, - "learning_rate": 1.1448433491953665e-06, - "loss": 1.1805, - "step": 3468 - }, - { - "epoch": 0.47027723174947467, - "grad_norm": 1.5832396902165105, - "learning_rate": 1.1444088282122945e-06, - "loss": 1.1744, - "step": 3469 - }, - { - "epoch": 0.47041279739713954, - "grad_norm": 1.6795600103588808, - "learning_rate": 1.1439742793811205e-06, - "loss": 1.1667, - "step": 3470 - }, - { - "epoch": 0.47054836304480446, - "grad_norm": 1.9030930538895539, - "learning_rate": 1.1435397027856425e-06, - "loss": 1.1455, - "step": 3471 - }, - { - "epoch": 0.47068392869246933, - "grad_norm": 1.4848307805859566, - "learning_rate": 1.1431050985096663e-06, - "loss": 1.135, - "step": 3472 - }, - { - "epoch": 0.4708194943401342, - "grad_norm": 1.5981666669549186, - "learning_rate": 1.142670466637001e-06, - "loss": 1.1856, - "step": 3473 - }, - { - "epoch": 0.4709550599877991, - "grad_norm": 1.4113213201107069, - "learning_rate": 1.142235807251463e-06, - "loss": 1.1278, - "step": 3474 - }, - { - "epoch": 0.471090625635464, - "grad_norm": 2.1632224331304153, - "learning_rate": 1.1418011204368717e-06, - "loss": 1.1493, - "step": 3475 - }, - { - "epoch": 0.47122619128312887, - "grad_norm": 2.0273461490755165, - "learning_rate": 1.1413664062770538e-06, - "loss": 1.1192, - "step": 3476 - }, - { - "epoch": 0.47136175693079374, - "grad_norm": 1.5991104647776848, - "learning_rate": 1.1409316648558404e-06, - "loss": 1.1343, - "step": 3477 - }, - { - "epoch": 0.4714973225784586, - "grad_norm": 1.4689919523373078, - "learning_rate": 1.140496896257068e-06, - "loss": 1.1631, - "step": 3478 - }, - { - "epoch": 0.4716328882261235, - "grad_norm": 1.7555138779640804, - "learning_rate": 1.140062100564578e-06, - "loss": 1.1303, - "step": 3479 - }, - { - "epoch": 0.4717684538737884, - "grad_norm": 1.5018546144556142, - "learning_rate": 1.1396272778622175e-06, - "loss": 1.1531, - "step": 3480 - }, - { - "epoch": 0.4719040195214533, - "grad_norm": 1.4964562484280601, - "learning_rate": 1.1391924282338388e-06, - "loss": 1.1312, - "step": 3481 - }, - { - "epoch": 0.47203958516911815, - "grad_norm": 1.5680142847535083, - "learning_rate": 1.1387575517632987e-06, - "loss": 1.1093, - "step": 3482 - }, - { - "epoch": 0.472175150816783, - "grad_norm": 1.7117594713547797, - "learning_rate": 1.1383226485344604e-06, - "loss": 1.1148, - "step": 3483 - }, - { - "epoch": 0.4723107164644479, - "grad_norm": 1.5501612860609972, - "learning_rate": 1.137887718631191e-06, - "loss": 1.1743, - "step": 3484 - }, - { - "epoch": 0.4724462821121128, - "grad_norm": 1.5014904146999122, - "learning_rate": 1.1374527621373636e-06, - "loss": 1.1496, - "step": 3485 - }, - { - "epoch": 0.4725818477597777, - "grad_norm": 1.6575657562580006, - "learning_rate": 1.1370177791368558e-06, - "loss": 1.1271, - "step": 3486 - }, - { - "epoch": 0.47271741340744255, - "grad_norm": 1.5079871303232595, - "learning_rate": 1.136582769713551e-06, - "loss": 1.1437, - "step": 3487 - }, - { - "epoch": 0.4728529790551074, - "grad_norm": 1.3974102484443782, - "learning_rate": 1.136147733951337e-06, - "loss": 1.1295, - "step": 3488 - }, - { - "epoch": 0.4729885447027723, - "grad_norm": 1.6424085656564664, - "learning_rate": 1.1357126719341076e-06, - "loss": 1.1472, - "step": 3489 - }, - { - "epoch": 0.4731241103504372, - "grad_norm": 1.9094533572269357, - "learning_rate": 1.1352775837457605e-06, - "loss": 1.1398, - "step": 3490 - }, - { - "epoch": 0.4732596759981021, - "grad_norm": 1.8811731682995831, - "learning_rate": 1.134842469470199e-06, - "loss": 1.1599, - "step": 3491 - }, - { - "epoch": 0.47339524164576696, - "grad_norm": 1.5803695903490464, - "learning_rate": 1.1344073291913317e-06, - "loss": 1.1865, - "step": 3492 - }, - { - "epoch": 0.47353080729343183, - "grad_norm": 1.7041300096324976, - "learning_rate": 1.133972162993072e-06, - "loss": 1.1512, - "step": 3493 - }, - { - "epoch": 0.4736663729410967, - "grad_norm": 1.899278853905375, - "learning_rate": 1.1335369709593382e-06, - "loss": 1.1306, - "step": 3494 - }, - { - "epoch": 0.47380193858876163, - "grad_norm": 1.4353409014035914, - "learning_rate": 1.1331017531740533e-06, - "loss": 1.1394, - "step": 3495 - }, - { - "epoch": 0.4739375042364265, - "grad_norm": 1.7215127423678345, - "learning_rate": 1.132666509721146e-06, - "loss": 1.1692, - "step": 3496 - }, - { - "epoch": 0.47407306988409137, - "grad_norm": 1.4851389769314325, - "learning_rate": 1.1322312406845498e-06, - "loss": 1.1525, - "step": 3497 - }, - { - "epoch": 0.47420863553175624, - "grad_norm": 1.8688507473367009, - "learning_rate": 1.1317959461482028e-06, - "loss": 1.1738, - "step": 3498 - }, - { - "epoch": 0.4743442011794211, - "grad_norm": 1.7946664777533146, - "learning_rate": 1.1313606261960475e-06, - "loss": 1.1564, - "step": 3499 - }, - { - "epoch": 0.47447976682708604, - "grad_norm": 1.5953237606369206, - "learning_rate": 1.1309252809120324e-06, - "loss": 1.1249, - "step": 3500 - }, - { - "epoch": 0.4746153324747509, - "grad_norm": 1.591117908241102, - "learning_rate": 1.1304899103801105e-06, - "loss": 1.177, - "step": 3501 - }, - { - "epoch": 0.4747508981224158, - "grad_norm": 1.576464722521765, - "learning_rate": 1.1300545146842393e-06, - "loss": 1.1297, - "step": 3502 - }, - { - "epoch": 0.47488646377008065, - "grad_norm": 1.7142510863238611, - "learning_rate": 1.1296190939083815e-06, - "loss": 1.1366, - "step": 3503 - }, - { - "epoch": 0.4750220294177455, - "grad_norm": 1.669074597119181, - "learning_rate": 1.1291836481365045e-06, - "loss": 1.1491, - "step": 3504 - }, - { - "epoch": 0.47515759506541044, - "grad_norm": 1.555550288970278, - "learning_rate": 1.128748177452581e-06, - "loss": 1.1202, - "step": 3505 - }, - { - "epoch": 0.4752931607130753, - "grad_norm": 4.634173967072817, - "learning_rate": 1.1283126819405873e-06, - "loss": 1.1748, - "step": 3506 - }, - { - "epoch": 0.4754287263607402, - "grad_norm": 1.5116326540967229, - "learning_rate": 1.127877161684506e-06, - "loss": 1.1451, - "step": 3507 - }, - { - "epoch": 0.47556429200840505, - "grad_norm": 1.495431225140549, - "learning_rate": 1.1274416167683234e-06, - "loss": 1.1434, - "step": 3508 - }, - { - "epoch": 0.4756998576560699, - "grad_norm": 1.6959891766493962, - "learning_rate": 1.127006047276031e-06, - "loss": 1.1172, - "step": 3509 - }, - { - "epoch": 0.47583542330373485, - "grad_norm": 4.000128354779319, - "learning_rate": 1.126570453291625e-06, - "loss": 1.1503, - "step": 3510 - }, - { - "epoch": 0.4759709889513997, - "grad_norm": 2.8464616867088197, - "learning_rate": 1.126134834899106e-06, - "loss": 1.1731, - "step": 3511 - }, - { - "epoch": 0.4761065545990646, - "grad_norm": 1.6957230803588321, - "learning_rate": 1.1256991921824798e-06, - "loss": 1.1639, - "step": 3512 - }, - { - "epoch": 0.47624212024672946, - "grad_norm": 1.8968857885985984, - "learning_rate": 1.1252635252257567e-06, - "loss": 1.1343, - "step": 3513 - }, - { - "epoch": 0.4763776858943944, - "grad_norm": 1.6626059862878253, - "learning_rate": 1.1248278341129516e-06, - "loss": 1.0962, - "step": 3514 - }, - { - "epoch": 0.47651325154205926, - "grad_norm": 2.225410002522759, - "learning_rate": 1.1243921189280838e-06, - "loss": 1.1434, - "step": 3515 - }, - { - "epoch": 0.4766488171897241, - "grad_norm": 3.4144896655371038, - "learning_rate": 1.1239563797551777e-06, - "loss": 1.1508, - "step": 3516 - }, - { - "epoch": 0.476784382837389, - "grad_norm": 1.7368280834202425, - "learning_rate": 1.1235206166782622e-06, - "loss": 1.1438, - "step": 3517 - }, - { - "epoch": 0.47691994848505387, - "grad_norm": 2.0270855601006685, - "learning_rate": 1.1230848297813712e-06, - "loss": 1.1373, - "step": 3518 - }, - { - "epoch": 0.4770555141327188, - "grad_norm": 1.870470398959643, - "learning_rate": 1.122649019148542e-06, - "loss": 1.1197, - "step": 3519 - }, - { - "epoch": 0.47719107978038366, - "grad_norm": 1.5050994827951414, - "learning_rate": 1.122213184863818e-06, - "loss": 1.1555, - "step": 3520 - }, - { - "epoch": 0.47732664542804853, - "grad_norm": 3.4789121565400585, - "learning_rate": 1.1217773270112454e-06, - "loss": 1.2026, - "step": 3521 - }, - { - "epoch": 0.4774622110757134, - "grad_norm": 1.615078589655677, - "learning_rate": 1.121341445674877e-06, - "loss": 1.1274, - "step": 3522 - }, - { - "epoch": 0.4775977767233783, - "grad_norm": 1.5757036239671598, - "learning_rate": 1.1209055409387682e-06, - "loss": 1.1342, - "step": 3523 - }, - { - "epoch": 0.4777333423710432, - "grad_norm": 1.5746144315243455, - "learning_rate": 1.1204696128869803e-06, - "loss": 1.1086, - "step": 3524 - }, - { - "epoch": 0.47786890801870807, - "grad_norm": 1.8820150395686257, - "learning_rate": 1.1200336616035788e-06, - "loss": 1.1105, - "step": 3525 - }, - { - "epoch": 0.47800447366637294, - "grad_norm": 1.8742291476026685, - "learning_rate": 1.1195976871726332e-06, - "loss": 1.1635, - "step": 3526 - }, - { - "epoch": 0.4781400393140378, - "grad_norm": 1.951524037742861, - "learning_rate": 1.1191616896782172e-06, - "loss": 1.1339, - "step": 3527 - }, - { - "epoch": 0.4782756049617027, - "grad_norm": 1.6014674126459307, - "learning_rate": 1.1187256692044103e-06, - "loss": 1.1659, - "step": 3528 - }, - { - "epoch": 0.4784111706093676, - "grad_norm": 3.475180086771282, - "learning_rate": 1.1182896258352949e-06, - "loss": 1.1619, - "step": 3529 - }, - { - "epoch": 0.4785467362570325, - "grad_norm": 1.6395769996318221, - "learning_rate": 1.1178535596549592e-06, - "loss": 1.119, - "step": 3530 - }, - { - "epoch": 0.47868230190469735, - "grad_norm": 1.6589074269184587, - "learning_rate": 1.1174174707474947e-06, - "loss": 1.1031, - "step": 3531 - }, - { - "epoch": 0.4788178675523622, - "grad_norm": 1.634600205398327, - "learning_rate": 1.116981359196998e-06, - "loss": 1.1775, - "step": 3532 - }, - { - "epoch": 0.4789534332000271, - "grad_norm": 1.6424491905147218, - "learning_rate": 1.116545225087569e-06, - "loss": 1.1547, - "step": 3533 - }, - { - "epoch": 0.479088998847692, - "grad_norm": 1.7856758618082655, - "learning_rate": 1.1161090685033138e-06, - "loss": 1.1723, - "step": 3534 - }, - { - "epoch": 0.4792245644953569, - "grad_norm": 1.6994810065764525, - "learning_rate": 1.1156728895283412e-06, - "loss": 1.1534, - "step": 3535 - }, - { - "epoch": 0.47936013014302176, - "grad_norm": 1.6267079556303796, - "learning_rate": 1.1152366882467647e-06, - "loss": 1.1543, - "step": 3536 - }, - { - "epoch": 0.4794956957906866, - "grad_norm": 1.726620665201554, - "learning_rate": 1.1148004647427027e-06, - "loss": 1.1547, - "step": 3537 - }, - { - "epoch": 0.4796312614383515, - "grad_norm": 1.8660509226275437, - "learning_rate": 1.114364219100277e-06, - "loss": 1.1362, - "step": 3538 - }, - { - "epoch": 0.4797668270860164, - "grad_norm": 1.839066248027463, - "learning_rate": 1.1139279514036147e-06, - "loss": 1.1383, - "step": 3539 - }, - { - "epoch": 0.4799023927336813, - "grad_norm": 1.7585818599694187, - "learning_rate": 1.1134916617368464e-06, - "loss": 1.1361, - "step": 3540 - }, - { - "epoch": 0.48003795838134616, - "grad_norm": 1.6698461643494962, - "learning_rate": 1.1130553501841066e-06, - "loss": 1.1247, - "step": 3541 - }, - { - "epoch": 0.48017352402901103, - "grad_norm": 3.6819199121130897, - "learning_rate": 1.112619016829535e-06, - "loss": 1.1579, - "step": 3542 - }, - { - "epoch": 0.4803090896766759, - "grad_norm": 1.6637489696940353, - "learning_rate": 1.1121826617572752e-06, - "loss": 1.1257, - "step": 3543 - }, - { - "epoch": 0.48044465532434083, - "grad_norm": 14.079769457726842, - "learning_rate": 1.1117462850514744e-06, - "loss": 1.1685, - "step": 3544 - }, - { - "epoch": 0.4805802209720057, - "grad_norm": 1.7584390000318697, - "learning_rate": 1.1113098867962844e-06, - "loss": 1.1958, - "step": 3545 - }, - { - "epoch": 0.48071578661967057, - "grad_norm": 1.6944694402627782, - "learning_rate": 1.1108734670758616e-06, - "loss": 1.1016, - "step": 3546 - }, - { - "epoch": 0.48085135226733544, - "grad_norm": 1.8434701576201205, - "learning_rate": 1.1104370259743659e-06, - "loss": 1.1935, - "step": 3547 - }, - { - "epoch": 0.4809869179150003, - "grad_norm": 1.618594862144222, - "learning_rate": 1.1100005635759612e-06, - "loss": 1.1344, - "step": 3548 - }, - { - "epoch": 0.48112248356266524, - "grad_norm": 1.6581809867725898, - "learning_rate": 1.1095640799648162e-06, - "loss": 1.1199, - "step": 3549 - }, - { - "epoch": 0.4812580492103301, - "grad_norm": 1.5292023469937734, - "learning_rate": 1.1091275752251035e-06, - "loss": 1.1378, - "step": 3550 - }, - { - "epoch": 0.481393614857995, - "grad_norm": 2.547324884475726, - "learning_rate": 1.1086910494409993e-06, - "loss": 1.1242, - "step": 3551 - }, - { - "epoch": 0.48152918050565985, - "grad_norm": 1.4587412753588946, - "learning_rate": 1.1082545026966841e-06, - "loss": 1.1125, - "step": 3552 - }, - { - "epoch": 0.4816647461533248, - "grad_norm": 2.0356807597178372, - "learning_rate": 1.1078179350763424e-06, - "loss": 1.1318, - "step": 3553 - }, - { - "epoch": 0.48180031180098964, - "grad_norm": 1.5597202029501183, - "learning_rate": 1.107381346664163e-06, - "loss": 1.1278, - "step": 3554 - }, - { - "epoch": 0.4819358774486545, - "grad_norm": 1.7286622544531334, - "learning_rate": 1.1069447375443386e-06, - "loss": 1.1483, - "step": 3555 - }, - { - "epoch": 0.4820714430963194, - "grad_norm": 2.060587130749749, - "learning_rate": 1.106508107801066e-06, - "loss": 1.1212, - "step": 3556 - }, - { - "epoch": 0.48220700874398426, - "grad_norm": 1.7196926523993636, - "learning_rate": 1.1060714575185453e-06, - "loss": 1.1518, - "step": 3557 - }, - { - "epoch": 0.4823425743916492, - "grad_norm": 1.41171119181269, - "learning_rate": 1.105634786780981e-06, - "loss": 1.1396, - "step": 3558 - }, - { - "epoch": 0.48247814003931405, - "grad_norm": 1.599562223284275, - "learning_rate": 1.105198095672582e-06, - "loss": 1.119, - "step": 3559 - }, - { - "epoch": 0.4826137056869789, - "grad_norm": 1.4917147032442624, - "learning_rate": 1.104761384277561e-06, - "loss": 1.1186, - "step": 3560 - }, - { - "epoch": 0.4827492713346438, - "grad_norm": 1.6168949740774694, - "learning_rate": 1.1043246526801338e-06, - "loss": 1.1587, - "step": 3561 - }, - { - "epoch": 0.48288483698230866, - "grad_norm": 1.4859103520353631, - "learning_rate": 1.1038879009645205e-06, - "loss": 1.1045, - "step": 3562 - }, - { - "epoch": 0.4830204026299736, - "grad_norm": 3.885561354927812, - "learning_rate": 1.103451129214946e-06, - "loss": 1.161, - "step": 3563 - }, - { - "epoch": 0.48315596827763846, - "grad_norm": 1.9221148348198203, - "learning_rate": 1.1030143375156375e-06, - "loss": 1.1527, - "step": 3564 - }, - { - "epoch": 0.48329153392530333, - "grad_norm": 1.75989723732532, - "learning_rate": 1.1025775259508275e-06, - "loss": 1.1733, - "step": 3565 - }, - { - "epoch": 0.4834270995729682, - "grad_norm": 1.7103540616345116, - "learning_rate": 1.1021406946047508e-06, - "loss": 1.1465, - "step": 3566 - }, - { - "epoch": 0.48356266522063307, - "grad_norm": 1.8311931828727706, - "learning_rate": 1.101703843561648e-06, - "loss": 1.1101, - "step": 3567 - }, - { - "epoch": 0.483698230868298, - "grad_norm": 1.807395435795349, - "learning_rate": 1.1012669729057615e-06, - "loss": 1.199, - "step": 3568 - }, - { - "epoch": 0.48383379651596287, - "grad_norm": 1.855809180092538, - "learning_rate": 1.1008300827213385e-06, - "loss": 1.175, - "step": 3569 - }, - { - "epoch": 0.48396936216362774, - "grad_norm": 2.0357714369253372, - "learning_rate": 1.10039317309263e-06, - "loss": 1.1949, - "step": 3570 - }, - { - "epoch": 0.4841049278112926, - "grad_norm": 1.6849342557682192, - "learning_rate": 1.0999562441038909e-06, - "loss": 1.1453, - "step": 3571 - }, - { - "epoch": 0.4842404934589575, - "grad_norm": 1.5414945499009933, - "learning_rate": 1.0995192958393785e-06, - "loss": 1.1649, - "step": 3572 - }, - { - "epoch": 0.4843760591066224, - "grad_norm": 1.4245061912298957, - "learning_rate": 1.099082328383356e-06, - "loss": 1.1538, - "step": 3573 - }, - { - "epoch": 0.4845116247542873, - "grad_norm": 1.3855599084535017, - "learning_rate": 1.098645341820088e-06, - "loss": 1.1401, - "step": 3574 - }, - { - "epoch": 0.48464719040195214, - "grad_norm": 1.658349376318634, - "learning_rate": 1.098208336233845e-06, - "loss": 1.1459, - "step": 3575 - }, - { - "epoch": 0.484782756049617, - "grad_norm": 2.4413000616109803, - "learning_rate": 1.0977713117088994e-06, - "loss": 1.1445, - "step": 3576 - }, - { - "epoch": 0.4849183216972819, - "grad_norm": 2.196368411649948, - "learning_rate": 1.097334268329528e-06, - "loss": 1.2057, - "step": 3577 - }, - { - "epoch": 0.4850538873449468, - "grad_norm": 1.8163540428859475, - "learning_rate": 1.0968972061800115e-06, - "loss": 1.1323, - "step": 3578 - }, - { - "epoch": 0.4851894529926117, - "grad_norm": 1.5120990782538632, - "learning_rate": 1.0964601253446332e-06, - "loss": 1.115, - "step": 3579 - }, - { - "epoch": 0.48532501864027655, - "grad_norm": 2.3518273107092185, - "learning_rate": 1.0960230259076817e-06, - "loss": 1.1383, - "step": 3580 - }, - { - "epoch": 0.4854605842879414, - "grad_norm": 1.5239199306453117, - "learning_rate": 1.0955859079534473e-06, - "loss": 1.1647, - "step": 3581 - }, - { - "epoch": 0.4855961499356063, - "grad_norm": 2.2092938308389054, - "learning_rate": 1.0951487715662253e-06, - "loss": 1.1311, - "step": 3582 - }, - { - "epoch": 0.4857317155832712, - "grad_norm": 1.852310506404215, - "learning_rate": 1.0947116168303137e-06, - "loss": 1.1266, - "step": 3583 - }, - { - "epoch": 0.4858672812309361, - "grad_norm": 1.3835570279171496, - "learning_rate": 1.0942744438300141e-06, - "loss": 1.1334, - "step": 3584 - }, - { - "epoch": 0.48600284687860096, - "grad_norm": 1.6111095015817956, - "learning_rate": 1.0938372526496324e-06, - "loss": 1.1367, - "step": 3585 - }, - { - "epoch": 0.48613841252626583, - "grad_norm": 1.332272235781251, - "learning_rate": 1.0934000433734772e-06, - "loss": 1.125, - "step": 3586 - }, - { - "epoch": 0.4862739781739307, - "grad_norm": 1.5291006896304649, - "learning_rate": 1.0929628160858611e-06, - "loss": 1.1486, - "step": 3587 - }, - { - "epoch": 0.4864095438215956, - "grad_norm": 1.6830807334701, - "learning_rate": 1.0925255708710994e-06, - "loss": 1.1507, - "step": 3588 - }, - { - "epoch": 0.4865451094692605, - "grad_norm": 1.471632599906325, - "learning_rate": 1.0920883078135118e-06, - "loss": 1.1904, - "step": 3589 - }, - { - "epoch": 0.48668067511692537, - "grad_norm": 1.8609444338989496, - "learning_rate": 1.0916510269974208e-06, - "loss": 1.1764, - "step": 3590 - }, - { - "epoch": 0.48681624076459024, - "grad_norm": 1.7713957800441362, - "learning_rate": 1.091213728507153e-06, - "loss": 1.1315, - "step": 3591 - }, - { - "epoch": 0.48695180641225516, - "grad_norm": 2.4900453775048654, - "learning_rate": 1.0907764124270374e-06, - "loss": 1.1617, - "step": 3592 - }, - { - "epoch": 0.48708737205992003, - "grad_norm": 1.9271141262861093, - "learning_rate": 1.0903390788414072e-06, - "loss": 1.143, - "step": 3593 - }, - { - "epoch": 0.4872229377075849, - "grad_norm": 1.4566165417455617, - "learning_rate": 1.089901727834599e-06, - "loss": 1.0852, - "step": 3594 - }, - { - "epoch": 0.4873585033552498, - "grad_norm": 1.5508365977653122, - "learning_rate": 1.0894643594909518e-06, - "loss": 1.1504, - "step": 3595 - }, - { - "epoch": 0.48749406900291464, - "grad_norm": 1.8506040839194813, - "learning_rate": 1.0890269738948096e-06, - "loss": 1.1416, - "step": 3596 - }, - { - "epoch": 0.48762963465057957, - "grad_norm": 1.4606664964243519, - "learning_rate": 1.088589571130518e-06, - "loss": 1.1479, - "step": 3597 - }, - { - "epoch": 0.48776520029824444, - "grad_norm": 1.986660481361705, - "learning_rate": 1.0881521512824268e-06, - "loss": 1.1576, - "step": 3598 - }, - { - "epoch": 0.4879007659459093, - "grad_norm": 1.8391449564072078, - "learning_rate": 1.0877147144348892e-06, - "loss": 1.1328, - "step": 3599 - }, - { - "epoch": 0.4880363315935742, - "grad_norm": 1.5244862049633476, - "learning_rate": 1.087277260672261e-06, - "loss": 1.1766, - "step": 3600 - }, - { - "epoch": 0.48817189724123905, - "grad_norm": 4.585203862917811, - "learning_rate": 1.0868397900789024e-06, - "loss": 1.155, - "step": 3601 - }, - { - "epoch": 0.488307462888904, - "grad_norm": 1.694488797285962, - "learning_rate": 1.0864023027391753e-06, - "loss": 1.1434, - "step": 3602 - }, - { - "epoch": 0.48844302853656885, - "grad_norm": 10.115804396934161, - "learning_rate": 1.0859647987374464e-06, - "loss": 1.1413, - "step": 3603 - }, - { - "epoch": 0.4885785941842337, - "grad_norm": 2.1493452001448725, - "learning_rate": 1.0855272781580846e-06, - "loss": 1.1608, - "step": 3604 - }, - { - "epoch": 0.4887141598318986, - "grad_norm": 1.5723223528194172, - "learning_rate": 1.0850897410854624e-06, - "loss": 1.118, - "step": 3605 - }, - { - "epoch": 0.48884972547956346, - "grad_norm": 1.5913617391687054, - "learning_rate": 1.084652187603955e-06, - "loss": 1.1671, - "step": 3606 - }, - { - "epoch": 0.4889852911272284, - "grad_norm": 1.751777917364118, - "learning_rate": 1.0842146177979418e-06, - "loss": 1.1794, - "step": 3607 - }, - { - "epoch": 0.48912085677489325, - "grad_norm": 1.719324861346867, - "learning_rate": 1.0837770317518043e-06, - "loss": 1.168, - "step": 3608 - }, - { - "epoch": 0.4892564224225581, - "grad_norm": 1.7670869890836165, - "learning_rate": 1.083339429549927e-06, - "loss": 1.1475, - "step": 3609 - }, - { - "epoch": 0.489391988070223, - "grad_norm": 1.459870902763866, - "learning_rate": 1.0829018112766993e-06, - "loss": 1.1177, - "step": 3610 - }, - { - "epoch": 0.48952755371788786, - "grad_norm": 1.75756387402939, - "learning_rate": 1.0824641770165112e-06, - "loss": 1.1597, - "step": 3611 - }, - { - "epoch": 0.4896631193655528, - "grad_norm": 1.5133482408390362, - "learning_rate": 1.0820265268537578e-06, - "loss": 1.2001, - "step": 3612 - }, - { - "epoch": 0.48979868501321766, - "grad_norm": 1.5308530949473897, - "learning_rate": 1.0815888608728359e-06, - "loss": 1.174, - "step": 3613 - }, - { - "epoch": 0.48993425066088253, - "grad_norm": 1.7066146410517056, - "learning_rate": 1.0811511791581463e-06, - "loss": 1.1538, - "step": 3614 - }, - { - "epoch": 0.4900698163085474, - "grad_norm": 2.216916690854367, - "learning_rate": 1.0807134817940923e-06, - "loss": 1.1613, - "step": 3615 - }, - { - "epoch": 0.49020538195621227, - "grad_norm": 1.652733013116373, - "learning_rate": 1.0802757688650805e-06, - "loss": 1.1849, - "step": 3616 - }, - { - "epoch": 0.4903409476038772, - "grad_norm": 1.5380584692950303, - "learning_rate": 1.0798380404555203e-06, - "loss": 1.1391, - "step": 3617 - }, - { - "epoch": 0.49047651325154207, - "grad_norm": 1.4933788104817227, - "learning_rate": 1.0794002966498246e-06, - "loss": 1.1093, - "step": 3618 - }, - { - "epoch": 0.49061207889920694, - "grad_norm": 2.514099117082194, - "learning_rate": 1.0789625375324078e-06, - "loss": 1.107, - "step": 3619 - }, - { - "epoch": 0.4907476445468718, - "grad_norm": 3.9613565064765224, - "learning_rate": 1.0785247631876892e-06, - "loss": 1.158, - "step": 3620 - }, - { - "epoch": 0.4908832101945367, - "grad_norm": 1.455276633418966, - "learning_rate": 1.0780869737000898e-06, - "loss": 1.1166, - "step": 3621 - }, - { - "epoch": 0.4910187758422016, - "grad_norm": 1.9438681092867243, - "learning_rate": 1.0776491691540342e-06, - "loss": 1.1494, - "step": 3622 - }, - { - "epoch": 0.4911543414898665, - "grad_norm": 1.4988356625843238, - "learning_rate": 1.077211349633949e-06, - "loss": 1.1793, - "step": 3623 - }, - { - "epoch": 0.49128990713753135, - "grad_norm": 1.7715508198827887, - "learning_rate": 1.0767735152242646e-06, - "loss": 1.1659, - "step": 3624 - }, - { - "epoch": 0.4914254727851962, - "grad_norm": 1.480427068908792, - "learning_rate": 1.0763356660094139e-06, - "loss": 1.1432, - "step": 3625 - }, - { - "epoch": 0.4915610384328611, - "grad_norm": 1.9875808485211517, - "learning_rate": 1.0758978020738323e-06, - "loss": 1.16, - "step": 3626 - }, - { - "epoch": 0.491696604080526, - "grad_norm": 1.7478768703554663, - "learning_rate": 1.0754599235019586e-06, - "loss": 1.1718, - "step": 3627 - }, - { - "epoch": 0.4918321697281909, - "grad_norm": 1.9555723776927423, - "learning_rate": 1.0750220303782345e-06, - "loss": 1.1648, - "step": 3628 - }, - { - "epoch": 0.49196773537585575, - "grad_norm": 1.5870595506694363, - "learning_rate": 1.074584122787104e-06, - "loss": 1.1758, - "step": 3629 - }, - { - "epoch": 0.4921033010235206, - "grad_norm": 1.5006773241771592, - "learning_rate": 1.074146200813014e-06, - "loss": 1.153, - "step": 3630 - }, - { - "epoch": 0.49223886667118555, - "grad_norm": 1.5631614136815106, - "learning_rate": 1.0737082645404147e-06, - "loss": 1.1025, - "step": 3631 - }, - { - "epoch": 0.4923744323188504, - "grad_norm": 1.6497289138725941, - "learning_rate": 1.0732703140537583e-06, - "loss": 1.1409, - "step": 3632 - }, - { - "epoch": 0.4925099979665153, - "grad_norm": 2.510305790751615, - "learning_rate": 1.0728323494375e-06, - "loss": 1.1443, - "step": 3633 - }, - { - "epoch": 0.49264556361418016, - "grad_norm": 1.694873716676153, - "learning_rate": 1.0723943707760984e-06, - "loss": 1.1738, - "step": 3634 - }, - { - "epoch": 0.49278112926184503, - "grad_norm": 2.4941820561688637, - "learning_rate": 1.0719563781540135e-06, - "loss": 1.1368, - "step": 3635 - }, - { - "epoch": 0.49291669490950996, - "grad_norm": 1.571842359996238, - "learning_rate": 1.071518371655709e-06, - "loss": 1.1606, - "step": 3636 - }, - { - "epoch": 0.4930522605571748, - "grad_norm": 2.97197829610926, - "learning_rate": 1.0710803513656514e-06, - "loss": 1.1555, - "step": 3637 - }, - { - "epoch": 0.4931878262048397, - "grad_norm": 2.0427206215293348, - "learning_rate": 1.0706423173683092e-06, - "loss": 1.1412, - "step": 3638 - }, - { - "epoch": 0.49332339185250457, - "grad_norm": 1.5917610832403006, - "learning_rate": 1.0702042697481536e-06, - "loss": 1.1419, - "step": 3639 - }, - { - "epoch": 0.49345895750016944, - "grad_norm": 2.189765919938558, - "learning_rate": 1.0697662085896583e-06, - "loss": 1.1527, - "step": 3640 - }, - { - "epoch": 0.49359452314783436, - "grad_norm": 3.8075447735845334, - "learning_rate": 1.0693281339773009e-06, - "loss": 1.1606, - "step": 3641 - }, - { - "epoch": 0.49373008879549923, - "grad_norm": 1.4672893303343444, - "learning_rate": 1.0688900459955596e-06, - "loss": 1.1377, - "step": 3642 - }, - { - "epoch": 0.4938656544431641, - "grad_norm": 4.226370464822672, - "learning_rate": 1.0684519447289171e-06, - "loss": 1.1098, - "step": 3643 - }, - { - "epoch": 0.494001220090829, - "grad_norm": 1.8562753725188814, - "learning_rate": 1.0680138302618572e-06, - "loss": 1.1524, - "step": 3644 - }, - { - "epoch": 0.49413678573849384, - "grad_norm": 1.5195636530093366, - "learning_rate": 1.0675757026788672e-06, - "loss": 1.1975, - "step": 3645 - }, - { - "epoch": 0.49427235138615877, - "grad_norm": 1.5069791121545708, - "learning_rate": 1.0671375620644363e-06, - "loss": 1.1394, - "step": 3646 - }, - { - "epoch": 0.49440791703382364, - "grad_norm": 2.018290895387507, - "learning_rate": 1.0666994085030563e-06, - "loss": 1.1227, - "step": 3647 - }, - { - "epoch": 0.4945434826814885, - "grad_norm": 1.8320517007479566, - "learning_rate": 1.066261242079222e-06, - "loss": 1.1245, - "step": 3648 - }, - { - "epoch": 0.4946790483291534, - "grad_norm": 4.749633734151363, - "learning_rate": 1.0658230628774302e-06, - "loss": 1.2089, - "step": 3649 - }, - { - "epoch": 0.49481461397681825, - "grad_norm": 1.7624989145233199, - "learning_rate": 1.0653848709821806e-06, - "loss": 1.1467, - "step": 3650 - }, - { - "epoch": 0.4949501796244832, - "grad_norm": 1.61451018754693, - "learning_rate": 1.0649466664779744e-06, - "loss": 1.1513, - "step": 3651 - }, - { - "epoch": 0.49508574527214805, - "grad_norm": 1.5505637657610423, - "learning_rate": 1.0645084494493164e-06, - "loss": 1.1617, - "step": 3652 - }, - { - "epoch": 0.4952213109198129, - "grad_norm": 1.7064563990422226, - "learning_rate": 1.064070219980713e-06, - "loss": 1.103, - "step": 3653 - }, - { - "epoch": 0.4953568765674778, - "grad_norm": 1.4640474180440175, - "learning_rate": 1.0636319781566736e-06, - "loss": 1.1331, - "step": 3654 - }, - { - "epoch": 0.49549244221514266, - "grad_norm": 1.8161999973380896, - "learning_rate": 1.0631937240617093e-06, - "loss": 1.1356, - "step": 3655 - }, - { - "epoch": 0.4956280078628076, - "grad_norm": 1.8423959417134634, - "learning_rate": 1.062755457780334e-06, - "loss": 1.151, - "step": 3656 - }, - { - "epoch": 0.49576357351047246, - "grad_norm": 1.9149671267522905, - "learning_rate": 1.0623171793970642e-06, - "loss": 1.0889, - "step": 3657 - }, - { - "epoch": 0.4958991391581373, - "grad_norm": 1.528626502649572, - "learning_rate": 1.0618788889964182e-06, - "loss": 1.1644, - "step": 3658 - }, - { - "epoch": 0.4960347048058022, - "grad_norm": 1.7782142760661765, - "learning_rate": 1.061440586662917e-06, - "loss": 1.1793, - "step": 3659 - }, - { - "epoch": 0.49617027045346707, - "grad_norm": 1.5355297194402997, - "learning_rate": 1.0610022724810837e-06, - "loss": 1.1849, - "step": 3660 - }, - { - "epoch": 0.496305836101132, - "grad_norm": 1.4072073961900664, - "learning_rate": 1.0605639465354435e-06, - "loss": 1.1453, - "step": 3661 - }, - { - "epoch": 0.49644140174879686, - "grad_norm": 1.4463208894312496, - "learning_rate": 1.0601256089105242e-06, - "loss": 1.1659, - "step": 3662 - }, - { - "epoch": 0.49657696739646173, - "grad_norm": 3.2763275339156945, - "learning_rate": 1.059687259690856e-06, - "loss": 1.1483, - "step": 3663 - }, - { - "epoch": 0.4967125330441266, - "grad_norm": 1.7701030785086718, - "learning_rate": 1.0592488989609708e-06, - "loss": 1.1367, - "step": 3664 - }, - { - "epoch": 0.4968480986917915, - "grad_norm": 1.7907577389580676, - "learning_rate": 1.0588105268054032e-06, - "loss": 1.1433, - "step": 3665 - }, - { - "epoch": 0.4969836643394564, - "grad_norm": 1.5954810073016321, - "learning_rate": 1.0583721433086899e-06, - "loss": 1.1688, - "step": 3666 - }, - { - "epoch": 0.49711922998712127, - "grad_norm": 2.4753086292965114, - "learning_rate": 1.0579337485553695e-06, - "loss": 1.1478, - "step": 3667 - }, - { - "epoch": 0.49725479563478614, - "grad_norm": 1.5247498763310616, - "learning_rate": 1.0574953426299825e-06, - "loss": 1.1729, - "step": 3668 - }, - { - "epoch": 0.497390361282451, - "grad_norm": 1.5058008125974485, - "learning_rate": 1.057056925617073e-06, - "loss": 1.1473, - "step": 3669 - }, - { - "epoch": 0.49752592693011594, - "grad_norm": 1.574996421534381, - "learning_rate": 1.0566184976011855e-06, - "loss": 1.131, - "step": 3670 - }, - { - "epoch": 0.4976614925777808, - "grad_norm": 1.6220675743569348, - "learning_rate": 1.0561800586668678e-06, - "loss": 1.1561, - "step": 3671 - }, - { - "epoch": 0.4977970582254457, - "grad_norm": 1.6016358677118139, - "learning_rate": 1.0557416088986692e-06, - "loss": 1.12, - "step": 3672 - }, - { - "epoch": 0.49793262387311055, - "grad_norm": 1.6920729219608819, - "learning_rate": 1.0553031483811414e-06, - "loss": 1.1398, - "step": 3673 - }, - { - "epoch": 0.4980681895207754, - "grad_norm": 1.479376435842318, - "learning_rate": 1.054864677198838e-06, - "loss": 1.13, - "step": 3674 - }, - { - "epoch": 0.49820375516844034, - "grad_norm": 1.5016128915376805, - "learning_rate": 1.0544261954363146e-06, - "loss": 1.1522, - "step": 3675 - }, - { - "epoch": 0.4983393208161052, - "grad_norm": 2.462371473609971, - "learning_rate": 1.0539877031781289e-06, - "loss": 1.147, - "step": 3676 - }, - { - "epoch": 0.4984748864637701, - "grad_norm": 6.884988825597979, - "learning_rate": 1.053549200508841e-06, - "loss": 1.1284, - "step": 3677 - }, - { - "epoch": 0.49861045211143495, - "grad_norm": 1.7378347440513875, - "learning_rate": 1.0531106875130123e-06, - "loss": 1.1408, - "step": 3678 - }, - { - "epoch": 0.4987460177590998, - "grad_norm": 4.671052269168908, - "learning_rate": 1.0526721642752069e-06, - "loss": 1.13, - "step": 3679 - }, - { - "epoch": 0.49888158340676475, - "grad_norm": 1.5749102780323372, - "learning_rate": 1.0522336308799904e-06, - "loss": 1.1245, - "step": 3680 - }, - { - "epoch": 0.4990171490544296, - "grad_norm": 1.5131360196593595, - "learning_rate": 1.0517950874119304e-06, - "loss": 1.1318, - "step": 3681 - }, - { - "epoch": 0.4991527147020945, - "grad_norm": 2.562763393362881, - "learning_rate": 1.0513565339555965e-06, - "loss": 1.1854, - "step": 3682 - }, - { - "epoch": 0.49928828034975936, - "grad_norm": 2.4026849492594207, - "learning_rate": 1.0509179705955607e-06, - "loss": 1.1602, - "step": 3683 - }, - { - "epoch": 0.49942384599742423, - "grad_norm": 1.84264408437198, - "learning_rate": 1.050479397416396e-06, - "loss": 1.1653, - "step": 3684 - }, - { - "epoch": 0.49955941164508916, - "grad_norm": 1.5782361793280155, - "learning_rate": 1.050040814502678e-06, - "loss": 1.1463, - "step": 3685 - }, - { - "epoch": 0.49969497729275403, - "grad_norm": 1.6220058909962602, - "learning_rate": 1.049602221938984e-06, - "loss": 1.1352, - "step": 3686 - }, - { - "epoch": 0.4998305429404189, - "grad_norm": 1.9568684814025437, - "learning_rate": 1.0491636198098932e-06, - "loss": 1.1538, - "step": 3687 - }, - { - "epoch": 0.49996610858808377, - "grad_norm": 1.6565553467673286, - "learning_rate": 1.048725008199986e-06, - "loss": 1.1485, - "step": 3688 - }, - { - "epoch": 0.5001016742357487, - "grad_norm": 2.242811777789825, - "learning_rate": 1.0482863871938459e-06, - "loss": 1.1412, - "step": 3689 - }, - { - "epoch": 0.5002372398834135, - "grad_norm": 1.6241588631858979, - "learning_rate": 1.047847756876057e-06, - "loss": 1.1695, - "step": 3690 - }, - { - "epoch": 0.5003728055310784, - "grad_norm": 1.5891348004163717, - "learning_rate": 1.0474091173312058e-06, - "loss": 1.161, - "step": 3691 - }, - { - "epoch": 0.5005083711787434, - "grad_norm": 1.7482768549375916, - "learning_rate": 1.0469704686438807e-06, - "loss": 1.1465, - "step": 3692 - }, - { - "epoch": 0.5006439368264082, - "grad_norm": 1.7935797948865637, - "learning_rate": 1.0465318108986713e-06, - "loss": 1.1507, - "step": 3693 - }, - { - "epoch": 0.5007795024740731, - "grad_norm": 1.3745606635336294, - "learning_rate": 1.04609314418017e-06, - "loss": 1.1712, - "step": 3694 - }, - { - "epoch": 0.5009150681217379, - "grad_norm": 1.5633039679717373, - "learning_rate": 1.045654468572969e-06, - "loss": 1.1558, - "step": 3695 - }, - { - "epoch": 0.5010506337694028, - "grad_norm": 1.4327804371129096, - "learning_rate": 1.0452157841616645e-06, - "loss": 1.1095, - "step": 3696 - }, - { - "epoch": 0.5011861994170678, - "grad_norm": 1.60910734150205, - "learning_rate": 1.044777091030853e-06, - "loss": 1.1416, - "step": 3697 - }, - { - "epoch": 0.5013217650647326, - "grad_norm": 1.5456345367732929, - "learning_rate": 1.0443383892651325e-06, - "loss": 1.1227, - "step": 3698 - }, - { - "epoch": 0.5014573307123975, - "grad_norm": 1.8706089301247457, - "learning_rate": 1.043899678949104e-06, - "loss": 1.1406, - "step": 3699 - }, - { - "epoch": 0.5015928963600623, - "grad_norm": 2.1068679581829497, - "learning_rate": 1.0434609601673687e-06, - "loss": 1.1365, - "step": 3700 - }, - { - "epoch": 0.5017284620077272, - "grad_norm": 1.4875713543742959, - "learning_rate": 1.0430222330045304e-06, - "loss": 1.1426, - "step": 3701 - }, - { - "epoch": 0.5018640276553922, - "grad_norm": 2.145547164029504, - "learning_rate": 1.0425834975451942e-06, - "loss": 1.2324, - "step": 3702 - }, - { - "epoch": 0.501999593303057, - "grad_norm": 2.0197221463151322, - "learning_rate": 1.0421447538739664e-06, - "loss": 1.1614, - "step": 3703 - }, - { - "epoch": 0.5021351589507219, - "grad_norm": 1.622471554879056, - "learning_rate": 1.0417060020754555e-06, - "loss": 1.1015, - "step": 3704 - }, - { - "epoch": 0.5022707245983867, - "grad_norm": 1.4906523777739624, - "learning_rate": 1.0412672422342714e-06, - "loss": 1.1422, - "step": 3705 - }, - { - "epoch": 0.5024062902460517, - "grad_norm": 1.4515354655512587, - "learning_rate": 1.0408284744350255e-06, - "loss": 1.1034, - "step": 3706 - }, - { - "epoch": 0.5025418558937166, - "grad_norm": 2.060809831944587, - "learning_rate": 1.0403896987623304e-06, - "loss": 1.154, - "step": 3707 - }, - { - "epoch": 0.5026774215413814, - "grad_norm": 1.4577985200057395, - "learning_rate": 1.039950915300801e-06, - "loss": 1.1582, - "step": 3708 - }, - { - "epoch": 0.5028129871890463, - "grad_norm": 1.4740277895083194, - "learning_rate": 1.039512124135053e-06, - "loss": 1.0987, - "step": 3709 - }, - { - "epoch": 0.5029485528367111, - "grad_norm": 4.0482080494936765, - "learning_rate": 1.0390733253497033e-06, - "loss": 1.1539, - "step": 3710 - }, - { - "epoch": 0.5030841184843761, - "grad_norm": 8.210457208192071, - "learning_rate": 1.0386345190293714e-06, - "loss": 1.1181, - "step": 3711 - }, - { - "epoch": 0.503219684132041, - "grad_norm": 3.575353482584083, - "learning_rate": 1.0381957052586774e-06, - "loss": 1.1223, - "step": 3712 - }, - { - "epoch": 0.5033552497797058, - "grad_norm": 1.7502711882166442, - "learning_rate": 1.037756884122243e-06, - "loss": 1.1383, - "step": 3713 - }, - { - "epoch": 0.5034908154273707, - "grad_norm": 1.4819637585915433, - "learning_rate": 1.037318055704692e-06, - "loss": 1.1308, - "step": 3714 - }, - { - "epoch": 0.5036263810750355, - "grad_norm": 1.4303085087055922, - "learning_rate": 1.0368792200906482e-06, - "loss": 1.1203, - "step": 3715 - }, - { - "epoch": 0.5037619467227005, - "grad_norm": 1.7089172131133035, - "learning_rate": 1.0364403773647379e-06, - "loss": 1.1069, - "step": 3716 - }, - { - "epoch": 0.5038975123703654, - "grad_norm": 1.9181995171101505, - "learning_rate": 1.0360015276115888e-06, - "loss": 1.1598, - "step": 3717 - }, - { - "epoch": 0.5040330780180302, - "grad_norm": 1.7887625357502435, - "learning_rate": 1.035562670915829e-06, - "loss": 1.1428, - "step": 3718 - }, - { - "epoch": 0.5041686436656951, - "grad_norm": 1.4185976702587364, - "learning_rate": 1.0351238073620887e-06, - "loss": 1.1554, - "step": 3719 - }, - { - "epoch": 0.50430420931336, - "grad_norm": 1.8658457628501681, - "learning_rate": 1.0346849370349997e-06, - "loss": 1.1357, - "step": 3720 - }, - { - "epoch": 0.5044397749610249, - "grad_norm": 1.8978550323440646, - "learning_rate": 1.0342460600191942e-06, - "loss": 1.1603, - "step": 3721 - }, - { - "epoch": 0.5045753406086898, - "grad_norm": 2.4026477978676435, - "learning_rate": 1.0338071763993065e-06, - "loss": 1.1338, - "step": 3722 - }, - { - "epoch": 0.5047109062563546, - "grad_norm": 1.5817287276038865, - "learning_rate": 1.0333682862599714e-06, - "loss": 1.1603, - "step": 3723 - }, - { - "epoch": 0.5048464719040195, - "grad_norm": 1.5052837864743007, - "learning_rate": 1.032929389685826e-06, - "loss": 1.1054, - "step": 3724 - }, - { - "epoch": 0.5049820375516844, - "grad_norm": 1.6332221142612835, - "learning_rate": 1.0324904867615077e-06, - "loss": 1.1864, - "step": 3725 - }, - { - "epoch": 0.5051176031993493, - "grad_norm": 1.667317259692694, - "learning_rate": 1.0320515775716554e-06, - "loss": 1.1444, - "step": 3726 - }, - { - "epoch": 0.5052531688470142, - "grad_norm": 2.77999413096949, - "learning_rate": 1.0316126622009092e-06, - "loss": 1.1319, - "step": 3727 - }, - { - "epoch": 0.505388734494679, - "grad_norm": 2.4620416029528505, - "learning_rate": 1.0311737407339106e-06, - "loss": 1.1346, - "step": 3728 - }, - { - "epoch": 0.505524300142344, - "grad_norm": 1.8470012609819948, - "learning_rate": 1.0307348132553024e-06, - "loss": 1.1191, - "step": 3729 - }, - { - "epoch": 0.5056598657900088, - "grad_norm": 1.6012391633145808, - "learning_rate": 1.030295879849728e-06, - "loss": 1.1615, - "step": 3730 - }, - { - "epoch": 0.5057954314376737, - "grad_norm": 1.9865130268321027, - "learning_rate": 1.0298569406018325e-06, - "loss": 1.1564, - "step": 3731 - }, - { - "epoch": 0.5059309970853386, - "grad_norm": 1.7475169806821904, - "learning_rate": 1.0294179955962614e-06, - "loss": 1.1183, - "step": 3732 - }, - { - "epoch": 0.5060665627330034, - "grad_norm": 1.476561925148515, - "learning_rate": 1.0289790449176622e-06, - "loss": 1.1647, - "step": 3733 - }, - { - "epoch": 0.5062021283806684, - "grad_norm": 3.7322238048386684, - "learning_rate": 1.0285400886506828e-06, - "loss": 1.1303, - "step": 3734 - }, - { - "epoch": 0.5063376940283332, - "grad_norm": 1.5802359990046724, - "learning_rate": 1.0281011268799726e-06, - "loss": 1.124, - "step": 3735 - }, - { - "epoch": 0.5064732596759981, - "grad_norm": 1.7648532923324178, - "learning_rate": 1.0276621596901821e-06, - "loss": 1.1369, - "step": 3736 - }, - { - "epoch": 0.506608825323663, - "grad_norm": 1.9678365201587165, - "learning_rate": 1.0272231871659624e-06, - "loss": 1.1269, - "step": 3737 - }, - { - "epoch": 0.5067443909713278, - "grad_norm": 1.6460921394053207, - "learning_rate": 1.026784209391966e-06, - "loss": 1.1502, - "step": 3738 - }, - { - "epoch": 0.5068799566189928, - "grad_norm": 2.1078601090317513, - "learning_rate": 1.026345226452846e-06, - "loss": 1.1538, - "step": 3739 - }, - { - "epoch": 0.5070155222666576, - "grad_norm": 1.8495175400994377, - "learning_rate": 1.0259062384332573e-06, - "loss": 1.137, - "step": 3740 - }, - { - "epoch": 0.5071510879143225, - "grad_norm": 3.233473973081341, - "learning_rate": 1.0254672454178547e-06, - "loss": 1.1552, - "step": 3741 - }, - { - "epoch": 0.5072866535619874, - "grad_norm": 1.4502085510720206, - "learning_rate": 1.0250282474912952e-06, - "loss": 1.1478, - "step": 3742 - }, - { - "epoch": 0.5074222192096522, - "grad_norm": 1.5794972799973739, - "learning_rate": 1.0245892447382354e-06, - "loss": 1.1338, - "step": 3743 - }, - { - "epoch": 0.5075577848573172, - "grad_norm": 3.0150658976470686, - "learning_rate": 1.0241502372433342e-06, - "loss": 1.1419, - "step": 3744 - }, - { - "epoch": 0.507693350504982, - "grad_norm": 1.4966937717247666, - "learning_rate": 1.02371122509125e-06, - "loss": 1.1361, - "step": 3745 - }, - { - "epoch": 0.5078289161526469, - "grad_norm": 1.4680119866043455, - "learning_rate": 1.0232722083666435e-06, - "loss": 1.1444, - "step": 3746 - }, - { - "epoch": 0.5079644818003118, - "grad_norm": 1.4469558645020175, - "learning_rate": 1.022833187154175e-06, - "loss": 1.1421, - "step": 3747 - }, - { - "epoch": 0.5081000474479767, - "grad_norm": 1.7563004155939272, - "learning_rate": 1.022394161538507e-06, - "loss": 1.1327, - "step": 3748 - }, - { - "epoch": 0.5082356130956416, - "grad_norm": 15.751217277738931, - "learning_rate": 1.0219551316043016e-06, - "loss": 1.154, - "step": 3749 - }, - { - "epoch": 0.5083711787433064, - "grad_norm": 3.734338202354972, - "learning_rate": 1.0215160974362223e-06, - "loss": 1.0939, - "step": 3750 - }, - { - "epoch": 0.5085067443909713, - "grad_norm": 1.6821975704703676, - "learning_rate": 1.0210770591189333e-06, - "loss": 1.1108, - "step": 3751 - }, - { - "epoch": 0.5086423100386362, - "grad_norm": 1.5417800380967217, - "learning_rate": 1.0206380167371e-06, - "loss": 1.1462, - "step": 3752 - }, - { - "epoch": 0.5087778756863011, - "grad_norm": 3.676404480567771, - "learning_rate": 1.0201989703753881e-06, - "loss": 1.1555, - "step": 3753 - }, - { - "epoch": 0.508913441333966, - "grad_norm": 1.6165012238473813, - "learning_rate": 1.0197599201184642e-06, - "loss": 1.169, - "step": 3754 - }, - { - "epoch": 0.5090490069816308, - "grad_norm": 1.7526814345463846, - "learning_rate": 1.0193208660509956e-06, - "loss": 1.1454, - "step": 3755 - }, - { - "epoch": 0.5091845726292957, - "grad_norm": 1.761587529927443, - "learning_rate": 1.0188818082576505e-06, - "loss": 1.1381, - "step": 3756 - }, - { - "epoch": 0.5093201382769607, - "grad_norm": 2.379813359444368, - "learning_rate": 1.0184427468230976e-06, - "loss": 1.1485, - "step": 3757 - }, - { - "epoch": 0.5094557039246255, - "grad_norm": 1.739535264095292, - "learning_rate": 1.0180036818320067e-06, - "loss": 1.1512, - "step": 3758 - }, - { - "epoch": 0.5095912695722904, - "grad_norm": 1.5936619348469676, - "learning_rate": 1.0175646133690479e-06, - "loss": 1.1488, - "step": 3759 - }, - { - "epoch": 0.5097268352199552, - "grad_norm": 1.7558292578896764, - "learning_rate": 1.017125541518892e-06, - "loss": 1.142, - "step": 3760 - }, - { - "epoch": 0.5098624008676201, - "grad_norm": 1.6956825410098038, - "learning_rate": 1.0166864663662104e-06, - "loss": 1.1631, - "step": 3761 - }, - { - "epoch": 0.5099979665152851, - "grad_norm": 3.1181943013403512, - "learning_rate": 1.016247387995676e-06, - "loss": 1.1436, - "step": 3762 - }, - { - "epoch": 0.5101335321629499, - "grad_norm": 2.2854720111206923, - "learning_rate": 1.0158083064919605e-06, - "loss": 1.1074, - "step": 3763 - }, - { - "epoch": 0.5102690978106148, - "grad_norm": 1.515114121700042, - "learning_rate": 1.0153692219397385e-06, - "loss": 1.1482, - "step": 3764 - }, - { - "epoch": 0.5104046634582796, - "grad_norm": 1.5072502021012482, - "learning_rate": 1.014930134423683e-06, - "loss": 1.1436, - "step": 3765 - }, - { - "epoch": 0.5105402291059445, - "grad_norm": 2.841311540570994, - "learning_rate": 1.0144910440284689e-06, - "loss": 1.1345, - "step": 3766 - }, - { - "epoch": 0.5106757947536095, - "grad_norm": 1.644298285819212, - "learning_rate": 1.0140519508387713e-06, - "loss": 1.1344, - "step": 3767 - }, - { - "epoch": 0.5108113604012743, - "grad_norm": 1.812476427425424, - "learning_rate": 1.013612854939266e-06, - "loss": 1.1777, - "step": 3768 - }, - { - "epoch": 0.5109469260489392, - "grad_norm": 4.624759798781143, - "learning_rate": 1.013173756414629e-06, - "loss": 1.1257, - "step": 3769 - }, - { - "epoch": 0.5110824916966041, - "grad_norm": 1.3442891029306898, - "learning_rate": 1.0127346553495371e-06, - "loss": 1.1631, - "step": 3770 - }, - { - "epoch": 0.511218057344269, - "grad_norm": 2.07488525940021, - "learning_rate": 1.0122955518286672e-06, - "loss": 1.1076, - "step": 3771 - }, - { - "epoch": 0.5113536229919339, - "grad_norm": 1.6647965337098636, - "learning_rate": 1.0118564459366976e-06, - "loss": 1.1188, - "step": 3772 - }, - { - "epoch": 0.5114891886395987, - "grad_norm": 1.6867358228347287, - "learning_rate": 1.0114173377583057e-06, - "loss": 1.1411, - "step": 3773 - }, - { - "epoch": 0.5116247542872636, - "grad_norm": 1.3723920800727132, - "learning_rate": 1.0109782273781706e-06, - "loss": 1.1216, - "step": 3774 - }, - { - "epoch": 0.5117603199349285, - "grad_norm": 1.7163976377592385, - "learning_rate": 1.0105391148809707e-06, - "loss": 1.1576, - "step": 3775 - }, - { - "epoch": 0.5118958855825934, - "grad_norm": 3.476889427981893, - "learning_rate": 1.010100000351386e-06, - "loss": 1.15, - "step": 3776 - }, - { - "epoch": 0.5120314512302583, - "grad_norm": 2.017557696189528, - "learning_rate": 1.0096608838740956e-06, - "loss": 1.0844, - "step": 3777 - }, - { - "epoch": 0.5121670168779231, - "grad_norm": 1.6129077668893823, - "learning_rate": 1.0092217655337806e-06, - "loss": 1.1847, - "step": 3778 - }, - { - "epoch": 0.512302582525588, - "grad_norm": 1.6490547256148527, - "learning_rate": 1.0087826454151205e-06, - "loss": 1.196, - "step": 3779 - }, - { - "epoch": 0.512438148173253, - "grad_norm": 1.6108935087575438, - "learning_rate": 1.0083435236027967e-06, - "loss": 1.1439, - "step": 3780 - }, - { - "epoch": 0.5125737138209178, - "grad_norm": 1.8722766208353923, - "learning_rate": 1.00790440018149e-06, - "loss": 1.1361, - "step": 3781 - }, - { - "epoch": 0.5127092794685827, - "grad_norm": 1.5677020392575725, - "learning_rate": 1.0074652752358822e-06, - "loss": 1.1033, - "step": 3782 - }, - { - "epoch": 0.5128448451162475, - "grad_norm": 1.6967363382907665, - "learning_rate": 1.0070261488506551e-06, - "loss": 1.1271, - "step": 3783 - }, - { - "epoch": 0.5129804107639124, - "grad_norm": 1.9986352860376846, - "learning_rate": 1.0065870211104906e-06, - "loss": 1.1833, - "step": 3784 - }, - { - "epoch": 0.5131159764115774, - "grad_norm": 2.022572591020284, - "learning_rate": 1.006147892100071e-06, - "loss": 1.1528, - "step": 3785 - }, - { - "epoch": 0.5132515420592422, - "grad_norm": 1.6232828510313575, - "learning_rate": 1.0057087619040792e-06, - "loss": 1.1285, - "step": 3786 - }, - { - "epoch": 0.5133871077069071, - "grad_norm": 1.7491368409401762, - "learning_rate": 1.0052696306071974e-06, - "loss": 1.1485, - "step": 3787 - }, - { - "epoch": 0.5135226733545719, - "grad_norm": 1.498290936046994, - "learning_rate": 1.0048304982941089e-06, - "loss": 1.1584, - "step": 3788 - }, - { - "epoch": 0.5136582390022368, - "grad_norm": 1.840202118310834, - "learning_rate": 1.0043913650494972e-06, - "loss": 1.1388, - "step": 3789 - }, - { - "epoch": 0.5137938046499018, - "grad_norm": 2.124050355158528, - "learning_rate": 1.0039522309580453e-06, - "loss": 1.1352, - "step": 3790 - }, - { - "epoch": 0.5139293702975666, - "grad_norm": 1.39727836655907, - "learning_rate": 1.003513096104437e-06, - "loss": 1.1239, - "step": 3791 - }, - { - "epoch": 0.5140649359452315, - "grad_norm": 1.6178262479559251, - "learning_rate": 1.0030739605733557e-06, - "loss": 1.1634, - "step": 3792 - }, - { - "epoch": 0.5142005015928963, - "grad_norm": 1.7754006135384162, - "learning_rate": 1.0026348244494853e-06, - "loss": 1.159, - "step": 3793 - }, - { - "epoch": 0.5143360672405612, - "grad_norm": 2.8851718217555344, - "learning_rate": 1.0021956878175099e-06, - "loss": 1.13, - "step": 3794 - }, - { - "epoch": 0.5144716328882262, - "grad_norm": 2.420087597140344, - "learning_rate": 1.0017565507621135e-06, - "loss": 1.1298, - "step": 3795 - }, - { - "epoch": 0.514607198535891, - "grad_norm": 1.963089540298795, - "learning_rate": 1.0013174133679801e-06, - "loss": 1.17, - "step": 3796 - }, - { - "epoch": 0.5147427641835559, - "grad_norm": 1.8640347783058695, - "learning_rate": 1.0008782757197939e-06, - "loss": 1.1858, - "step": 3797 - }, - { - "epoch": 0.5148783298312207, - "grad_norm": 1.437103615637163, - "learning_rate": 1.000439137902239e-06, - "loss": 1.1458, - "step": 3798 - }, - { - "epoch": 0.5150138954788857, - "grad_norm": 1.6098972280538904, - "learning_rate": 1e-06, - "loss": 1.1381, - "step": 3799 - }, - { - "epoch": 0.5151494611265506, - "grad_norm": 1.6965140431276151, - "learning_rate": 9.995608620977612e-07, - "loss": 1.1373, - "step": 3800 - }, - { - "epoch": 0.5152850267742154, - "grad_norm": 1.485745212333801, - "learning_rate": 9.991217242802063e-07, - "loss": 1.157, - "step": 3801 - }, - { - "epoch": 0.5154205924218803, - "grad_norm": 2.6078019357956164, - "learning_rate": 9.986825866320202e-07, - "loss": 1.1616, - "step": 3802 - }, - { - "epoch": 0.5155561580695451, - "grad_norm": 1.6940971244328535, - "learning_rate": 9.982434492378864e-07, - "loss": 1.1847, - "step": 3803 - }, - { - "epoch": 0.5156917237172101, - "grad_norm": 1.6090642550238574, - "learning_rate": 9.978043121824903e-07, - "loss": 1.1241, - "step": 3804 - }, - { - "epoch": 0.515827289364875, - "grad_norm": 3.041758742831177, - "learning_rate": 9.973651755505146e-07, - "loss": 1.1145, - "step": 3805 - }, - { - "epoch": 0.5159628550125398, - "grad_norm": 1.5515238635834339, - "learning_rate": 9.969260394266446e-07, - "loss": 1.1687, - "step": 3806 - }, - { - "epoch": 0.5160984206602047, - "grad_norm": 1.7330144741535902, - "learning_rate": 9.96486903895563e-07, - "loss": 1.1734, - "step": 3807 - }, - { - "epoch": 0.5162339863078695, - "grad_norm": 1.7519483874025938, - "learning_rate": 9.960477690419548e-07, - "loss": 1.1349, - "step": 3808 - }, - { - "epoch": 0.5163695519555345, - "grad_norm": 2.1529271550425517, - "learning_rate": 9.956086349505027e-07, - "loss": 1.1421, - "step": 3809 - }, - { - "epoch": 0.5165051176031994, - "grad_norm": 1.7612404075346286, - "learning_rate": 9.95169501705891e-07, - "loss": 1.1223, - "step": 3810 - }, - { - "epoch": 0.5166406832508642, - "grad_norm": 1.3792800202113686, - "learning_rate": 9.947303693928026e-07, - "loss": 1.1614, - "step": 3811 - }, - { - "epoch": 0.5167762488985291, - "grad_norm": 1.6613793020034537, - "learning_rate": 9.94291238095921e-07, - "loss": 1.1562, - "step": 3812 - }, - { - "epoch": 0.516911814546194, - "grad_norm": 3.0935254998930994, - "learning_rate": 9.938521078999288e-07, - "loss": 1.1465, - "step": 3813 - }, - { - "epoch": 0.5170473801938589, - "grad_norm": 1.8286072135606555, - "learning_rate": 9.934129788895093e-07, - "loss": 1.1316, - "step": 3814 - }, - { - "epoch": 0.5171829458415238, - "grad_norm": 2.0204385965987184, - "learning_rate": 9.92973851149345e-07, - "loss": 1.1787, - "step": 3815 - }, - { - "epoch": 0.5173185114891886, - "grad_norm": 1.4790884584841835, - "learning_rate": 9.92534724764118e-07, - "loss": 1.1195, - "step": 3816 - }, - { - "epoch": 0.5174540771368535, - "grad_norm": 2.1788083105156906, - "learning_rate": 9.920955998185102e-07, - "loss": 1.1087, - "step": 3817 - }, - { - "epoch": 0.5175896427845184, - "grad_norm": 1.4886195522952153, - "learning_rate": 9.916564763972035e-07, - "loss": 1.1542, - "step": 3818 - }, - { - "epoch": 0.5177252084321833, - "grad_norm": 1.528986652175169, - "learning_rate": 9.912173545848796e-07, - "loss": 1.1071, - "step": 3819 - }, - { - "epoch": 0.5178607740798482, - "grad_norm": 1.840939645187074, - "learning_rate": 9.907782344662194e-07, - "loss": 1.14, - "step": 3820 - }, - { - "epoch": 0.517996339727513, - "grad_norm": 1.5429265154614986, - "learning_rate": 9.903391161259043e-07, - "loss": 1.1433, - "step": 3821 - }, - { - "epoch": 0.518131905375178, - "grad_norm": 4.156043806667673, - "learning_rate": 9.898999996486137e-07, - "loss": 1.1147, - "step": 3822 - }, - { - "epoch": 0.5182674710228428, - "grad_norm": 3.04561903971447, - "learning_rate": 9.894608851190292e-07, - "loss": 1.1237, - "step": 3823 - }, - { - "epoch": 0.5184030366705077, - "grad_norm": 1.4604267705917786, - "learning_rate": 9.890217726218293e-07, - "loss": 1.1276, - "step": 3824 - }, - { - "epoch": 0.5185386023181726, - "grad_norm": 1.8474085112993734, - "learning_rate": 9.885826622416942e-07, - "loss": 1.1663, - "step": 3825 - }, - { - "epoch": 0.5186741679658374, - "grad_norm": 1.4386563556170044, - "learning_rate": 9.88143554063302e-07, - "loss": 1.1567, - "step": 3826 - }, - { - "epoch": 0.5188097336135024, - "grad_norm": 1.805810281403571, - "learning_rate": 9.877044481713327e-07, - "loss": 1.1354, - "step": 3827 - }, - { - "epoch": 0.5189452992611672, - "grad_norm": 1.5454204218446463, - "learning_rate": 9.872653446504632e-07, - "loss": 1.1121, - "step": 3828 - }, - { - "epoch": 0.5190808649088321, - "grad_norm": 1.7025366363958732, - "learning_rate": 9.86826243585371e-07, - "loss": 1.1789, - "step": 3829 - }, - { - "epoch": 0.519216430556497, - "grad_norm": 1.4620737293193315, - "learning_rate": 9.863871450607342e-07, - "loss": 1.1474, - "step": 3830 - }, - { - "epoch": 0.5193519962041618, - "grad_norm": 2.028261936229309, - "learning_rate": 9.859480491612288e-07, - "loss": 1.1524, - "step": 3831 - }, - { - "epoch": 0.5194875618518268, - "grad_norm": 2.203600315270896, - "learning_rate": 9.855089559715314e-07, - "loss": 1.1313, - "step": 3832 - }, - { - "epoch": 0.5196231274994916, - "grad_norm": 4.836023954474209, - "learning_rate": 9.850698655763171e-07, - "loss": 1.1614, - "step": 3833 - }, - { - "epoch": 0.5197586931471565, - "grad_norm": 1.6069582580220267, - "learning_rate": 9.846307780602619e-07, - "loss": 1.1089, - "step": 3834 - }, - { - "epoch": 0.5198942587948214, - "grad_norm": 1.5927755451057266, - "learning_rate": 9.841916935080392e-07, - "loss": 1.1414, - "step": 3835 - }, - { - "epoch": 0.5200298244424862, - "grad_norm": 1.5722488451444716, - "learning_rate": 9.837526120043242e-07, - "loss": 1.1379, - "step": 3836 - }, - { - "epoch": 0.5201653900901512, - "grad_norm": 2.2233437686814463, - "learning_rate": 9.833135336337893e-07, - "loss": 1.1087, - "step": 3837 - }, - { - "epoch": 0.520300955737816, - "grad_norm": 6.931028654297517, - "learning_rate": 9.82874458481108e-07, - "loss": 1.1435, - "step": 3838 - }, - { - "epoch": 0.5204365213854809, - "grad_norm": 1.7395578328148433, - "learning_rate": 9.82435386630952e-07, - "loss": 1.1872, - "step": 3839 - }, - { - "epoch": 0.5205720870331458, - "grad_norm": 1.5829108907688485, - "learning_rate": 9.819963181679934e-07, - "loss": 1.1053, - "step": 3840 - }, - { - "epoch": 0.5207076526808107, - "grad_norm": 2.4979757414892223, - "learning_rate": 9.81557253176902e-07, - "loss": 1.0986, - "step": 3841 - }, - { - "epoch": 0.5208432183284756, - "grad_norm": 1.4742627658578178, - "learning_rate": 9.811181917423495e-07, - "loss": 1.1363, - "step": 3842 - }, - { - "epoch": 0.5209787839761404, - "grad_norm": 1.5919608816191824, - "learning_rate": 9.806791339490047e-07, - "loss": 1.1009, - "step": 3843 - }, - { - "epoch": 0.5211143496238053, - "grad_norm": 2.1331043987425358, - "learning_rate": 9.802400798815357e-07, - "loss": 1.1566, - "step": 3844 - }, - { - "epoch": 0.5212499152714702, - "grad_norm": 1.8358623117586705, - "learning_rate": 9.79801029624612e-07, - "loss": 1.1097, - "step": 3845 - }, - { - "epoch": 0.5213854809191351, - "grad_norm": 1.4660668135280408, - "learning_rate": 9.793619832629001e-07, - "loss": 1.1616, - "step": 3846 - }, - { - "epoch": 0.5215210465668, - "grad_norm": 1.7713203299248932, - "learning_rate": 9.789229408810668e-07, - "loss": 1.1395, - "step": 3847 - }, - { - "epoch": 0.5216566122144649, - "grad_norm": 1.4747368718706004, - "learning_rate": 9.784839025637778e-07, - "loss": 1.1309, - "step": 3848 - }, - { - "epoch": 0.5217921778621297, - "grad_norm": 1.6294665424592707, - "learning_rate": 9.780448683956983e-07, - "loss": 1.1483, - "step": 3849 - }, - { - "epoch": 0.5219277435097947, - "grad_norm": 1.5160659318156828, - "learning_rate": 9.77605838461493e-07, - "loss": 1.1691, - "step": 3850 - }, - { - "epoch": 0.5220633091574595, - "grad_norm": 1.592660670226848, - "learning_rate": 9.771668128458251e-07, - "loss": 1.089, - "step": 3851 - }, - { - "epoch": 0.5221988748051244, - "grad_norm": 1.8108746137965357, - "learning_rate": 9.767277916333564e-07, - "loss": 1.1121, - "step": 3852 - }, - { - "epoch": 0.5223344404527893, - "grad_norm": 1.7498308158802973, - "learning_rate": 9.762887749087501e-07, - "loss": 1.159, - "step": 3853 - }, - { - "epoch": 0.5224700061004541, - "grad_norm": 1.6004326265373434, - "learning_rate": 9.758497627566657e-07, - "loss": 1.1561, - "step": 3854 - }, - { - "epoch": 0.5226055717481191, - "grad_norm": 1.4932150282247383, - "learning_rate": 9.754107552617645e-07, - "loss": 1.1721, - "step": 3855 - }, - { - "epoch": 0.5227411373957839, - "grad_norm": 1.717654314344305, - "learning_rate": 9.749717525087051e-07, - "loss": 1.1486, - "step": 3856 - }, - { - "epoch": 0.5228767030434488, - "grad_norm": 1.7612172774392394, - "learning_rate": 9.745327545821452e-07, - "loss": 1.139, - "step": 3857 - }, - { - "epoch": 0.5230122686911137, - "grad_norm": 1.7505418906845625, - "learning_rate": 9.74093761566743e-07, - "loss": 1.0943, - "step": 3858 - }, - { - "epoch": 0.5231478343387785, - "grad_norm": 1.8461893927655517, - "learning_rate": 9.736547735471539e-07, - "loss": 1.1417, - "step": 3859 - }, - { - "epoch": 0.5232833999864435, - "grad_norm": 1.7405831498273523, - "learning_rate": 9.732157906080343e-07, - "loss": 1.1777, - "step": 3860 - }, - { - "epoch": 0.5234189656341083, - "grad_norm": 1.4327050090433808, - "learning_rate": 9.727768128340375e-07, - "loss": 1.1303, - "step": 3861 - }, - { - "epoch": 0.5235545312817732, - "grad_norm": 3.152626251475727, - "learning_rate": 9.72337840309818e-07, - "loss": 1.1461, - "step": 3862 - }, - { - "epoch": 0.5236900969294381, - "grad_norm": 1.6474954991029214, - "learning_rate": 9.718988731200271e-07, - "loss": 1.1682, - "step": 3863 - }, - { - "epoch": 0.523825662577103, - "grad_norm": 1.4055151125265204, - "learning_rate": 9.714599113493171e-07, - "loss": 1.1367, - "step": 3864 - }, - { - "epoch": 0.5239612282247679, - "grad_norm": 1.61269235458932, - "learning_rate": 9.710209550823375e-07, - "loss": 1.1397, - "step": 3865 - }, - { - "epoch": 0.5240967938724327, - "grad_norm": 1.7573813927770752, - "learning_rate": 9.705820044037387e-07, - "loss": 1.141, - "step": 3866 - }, - { - "epoch": 0.5242323595200976, - "grad_norm": 1.3891504257161602, - "learning_rate": 9.701430593981674e-07, - "loss": 1.1645, - "step": 3867 - }, - { - "epoch": 0.5243679251677625, - "grad_norm": 1.6418828498550277, - "learning_rate": 9.697041201502718e-07, - "loss": 1.2171, - "step": 3868 - }, - { - "epoch": 0.5245034908154274, - "grad_norm": 1.5063490966419066, - "learning_rate": 9.692651867446973e-07, - "loss": 1.1616, - "step": 3869 - }, - { - "epoch": 0.5246390564630923, - "grad_norm": 2.959613109664356, - "learning_rate": 9.688262592660893e-07, - "loss": 1.1233, - "step": 3870 - }, - { - "epoch": 0.5247746221107571, - "grad_norm": 1.4354524567733877, - "learning_rate": 9.68387337799091e-07, - "loss": 1.1301, - "step": 3871 - }, - { - "epoch": 0.524910187758422, - "grad_norm": 1.5812304587597648, - "learning_rate": 9.679484224283447e-07, - "loss": 1.1841, - "step": 3872 - }, - { - "epoch": 0.525045753406087, - "grad_norm": 1.6514985264557942, - "learning_rate": 9.675095132384927e-07, - "loss": 1.1123, - "step": 3873 - }, - { - "epoch": 0.5251813190537518, - "grad_norm": 1.7538776097634916, - "learning_rate": 9.67070610314174e-07, - "loss": 1.1342, - "step": 3874 - }, - { - "epoch": 0.5253168847014167, - "grad_norm": 2.373865766486846, - "learning_rate": 9.666317137400287e-07, - "loss": 1.1243, - "step": 3875 - }, - { - "epoch": 0.5254524503490815, - "grad_norm": 2.411804763155915, - "learning_rate": 9.661928236006936e-07, - "loss": 1.0929, - "step": 3876 - }, - { - "epoch": 0.5255880159967464, - "grad_norm": 1.7828671352491299, - "learning_rate": 9.65753939980806e-07, - "loss": 1.1973, - "step": 3877 - }, - { - "epoch": 0.5257235816444114, - "grad_norm": 5.300435496526742, - "learning_rate": 9.653150629650004e-07, - "loss": 1.1185, - "step": 3878 - }, - { - "epoch": 0.5258591472920762, - "grad_norm": 1.515301454230536, - "learning_rate": 9.648761926379112e-07, - "loss": 1.1647, - "step": 3879 - }, - { - "epoch": 0.5259947129397411, - "grad_norm": 1.977735375263373, - "learning_rate": 9.644373290841712e-07, - "loss": 1.1563, - "step": 3880 - }, - { - "epoch": 0.5261302785874059, - "grad_norm": 1.5203113143361526, - "learning_rate": 9.639984723884112e-07, - "loss": 1.1437, - "step": 3881 - }, - { - "epoch": 0.5262658442350708, - "grad_norm": 1.5218940097811937, - "learning_rate": 9.635596226352618e-07, - "loss": 1.1517, - "step": 3882 - }, - { - "epoch": 0.5264014098827358, - "grad_norm": 1.7701040326006894, - "learning_rate": 9.63120779909352e-07, - "loss": 1.1545, - "step": 3883 - }, - { - "epoch": 0.5265369755304006, - "grad_norm": 1.900904362418526, - "learning_rate": 9.626819442953081e-07, - "loss": 1.1659, - "step": 3884 - }, - { - "epoch": 0.5266725411780655, - "grad_norm": 1.946278591349942, - "learning_rate": 9.622431158777568e-07, - "loss": 1.1337, - "step": 3885 - }, - { - "epoch": 0.5268081068257303, - "grad_norm": 1.8151296094721412, - "learning_rate": 9.618042947413228e-07, - "loss": 1.1619, - "step": 3886 - }, - { - "epoch": 0.5269436724733952, - "grad_norm": 3.327973485879826, - "learning_rate": 9.613654809706288e-07, - "loss": 1.1231, - "step": 3887 - }, - { - "epoch": 0.5270792381210602, - "grad_norm": 1.4797915591583486, - "learning_rate": 9.60926674650297e-07, - "loss": 1.1413, - "step": 3888 - }, - { - "epoch": 0.527214803768725, - "grad_norm": 3.3569987720244434, - "learning_rate": 9.604878758649472e-07, - "loss": 1.1397, - "step": 3889 - }, - { - "epoch": 0.5273503694163899, - "grad_norm": 9.16176608546038, - "learning_rate": 9.60049084699199e-07, - "loss": 1.1078, - "step": 3890 - }, - { - "epoch": 0.5274859350640547, - "grad_norm": 2.2274185968694593, - "learning_rate": 9.596103012376695e-07, - "loss": 1.103, - "step": 3891 - }, - { - "epoch": 0.5276215007117196, - "grad_norm": 1.7646551931003238, - "learning_rate": 9.591715255649746e-07, - "loss": 1.1416, - "step": 3892 - }, - { - "epoch": 0.5277570663593846, - "grad_norm": 1.5041364638629717, - "learning_rate": 9.587327577657283e-07, - "loss": 1.1568, - "step": 3893 - }, - { - "epoch": 0.5278926320070494, - "grad_norm": 1.6128350375712481, - "learning_rate": 9.582939979245444e-07, - "loss": 1.172, - "step": 3894 - }, - { - "epoch": 0.5280281976547143, - "grad_norm": 2.4675676920849146, - "learning_rate": 9.578552461260335e-07, - "loss": 1.1397, - "step": 3895 - }, - { - "epoch": 0.5281637633023791, - "grad_norm": 1.8268944070007982, - "learning_rate": 9.57416502454806e-07, - "loss": 1.1611, - "step": 3896 - }, - { - "epoch": 0.5282993289500441, - "grad_norm": 1.6746158142852754, - "learning_rate": 9.569777669954693e-07, - "loss": 1.1563, - "step": 3897 - }, - { - "epoch": 0.528434894597709, - "grad_norm": 2.540946556557655, - "learning_rate": 9.565390398326312e-07, - "loss": 1.1479, - "step": 3898 - }, - { - "epoch": 0.5285704602453738, - "grad_norm": 1.7139158713676466, - "learning_rate": 9.561003210508963e-07, - "loss": 1.1435, - "step": 3899 - }, - { - "epoch": 0.5287060258930387, - "grad_norm": 1.5234907753564322, - "learning_rate": 9.556616107348675e-07, - "loss": 1.1234, - "step": 3900 - }, - { - "epoch": 0.5288415915407035, - "grad_norm": 1.733072083010232, - "learning_rate": 9.552229089691474e-07, - "loss": 1.1328, - "step": 3901 - }, - { - "epoch": 0.5289771571883685, - "grad_norm": 1.8316930578900048, - "learning_rate": 9.547842158383354e-07, - "loss": 1.1348, - "step": 3902 - }, - { - "epoch": 0.5291127228360334, - "grad_norm": 1.6319615679472268, - "learning_rate": 9.54345531427031e-07, - "loss": 1.1271, - "step": 3903 - }, - { - "epoch": 0.5292482884836982, - "grad_norm": 1.7308129970822055, - "learning_rate": 9.539068558198301e-07, - "loss": 1.169, - "step": 3904 - }, - { - "epoch": 0.5293838541313631, - "grad_norm": 1.824660247441993, - "learning_rate": 9.534681891013286e-07, - "loss": 1.1607, - "step": 3905 - }, - { - "epoch": 0.5295194197790279, - "grad_norm": 1.7022690390353408, - "learning_rate": 9.530295313561192e-07, - "loss": 1.151, - "step": 3906 - }, - { - "epoch": 0.5296549854266929, - "grad_norm": 1.6946162079989933, - "learning_rate": 9.525908826687943e-07, - "loss": 1.1746, - "step": 3907 - }, - { - "epoch": 0.5297905510743578, - "grad_norm": 1.7515640178595928, - "learning_rate": 9.521522431239429e-07, - "loss": 1.1191, - "step": 3908 - }, - { - "epoch": 0.5299261167220226, - "grad_norm": 1.4469400175871883, - "learning_rate": 9.517136128061543e-07, - "loss": 1.1173, - "step": 3909 - }, - { - "epoch": 0.5300616823696875, - "grad_norm": 2.1385794986294107, - "learning_rate": 9.51274991800014e-07, - "loss": 1.099, - "step": 3910 - }, - { - "epoch": 0.5301972480173524, - "grad_norm": 1.8758223337425535, - "learning_rate": 9.508363801901069e-07, - "loss": 1.1409, - "step": 3911 - }, - { - "epoch": 0.5303328136650173, - "grad_norm": 1.6057180421808077, - "learning_rate": 9.50397778061016e-07, - "loss": 1.1463, - "step": 3912 - }, - { - "epoch": 0.5304683793126822, - "grad_norm": 1.4747276510247216, - "learning_rate": 9.49959185497322e-07, - "loss": 1.0638, - "step": 3913 - }, - { - "epoch": 0.530603944960347, - "grad_norm": 1.8018572660113148, - "learning_rate": 9.49520602583604e-07, - "loss": 1.1388, - "step": 3914 - }, - { - "epoch": 0.5307395106080119, - "grad_norm": 2.0279776745253306, - "learning_rate": 9.490820294044394e-07, - "loss": 1.1292, - "step": 3915 - }, - { - "epoch": 0.5308750762556768, - "grad_norm": 1.4642492741737183, - "learning_rate": 9.486434660444034e-07, - "loss": 1.1788, - "step": 3916 - }, - { - "epoch": 0.5310106419033417, - "grad_norm": 1.5822183873636648, - "learning_rate": 9.482049125880697e-07, - "loss": 1.1742, - "step": 3917 - }, - { - "epoch": 0.5311462075510066, - "grad_norm": 1.6613854416523042, - "learning_rate": 9.477663691200099e-07, - "loss": 1.1373, - "step": 3918 - }, - { - "epoch": 0.5312817731986714, - "grad_norm": 1.6830385799985845, - "learning_rate": 9.47327835724793e-07, - "loss": 1.1154, - "step": 3919 - }, - { - "epoch": 0.5314173388463364, - "grad_norm": 1.7935667863136269, - "learning_rate": 9.468893124869878e-07, - "loss": 1.145, - "step": 3920 - }, - { - "epoch": 0.5315529044940012, - "grad_norm": 1.5401423126100569, - "learning_rate": 9.464507994911589e-07, - "loss": 1.136, - "step": 3921 - }, - { - "epoch": 0.5316884701416661, - "grad_norm": 5.3408133848874915, - "learning_rate": 9.460122968218711e-07, - "loss": 1.1145, - "step": 3922 - }, - { - "epoch": 0.531824035789331, - "grad_norm": 1.9105087984031632, - "learning_rate": 9.455738045636853e-07, - "loss": 1.1083, - "step": 3923 - }, - { - "epoch": 0.5319596014369958, - "grad_norm": 2.7354195011992917, - "learning_rate": 9.451353228011622e-07, - "loss": 1.1249, - "step": 3924 - }, - { - "epoch": 0.5320951670846608, - "grad_norm": 1.7967146302067714, - "learning_rate": 9.446968516188584e-07, - "loss": 1.1341, - "step": 3925 - }, - { - "epoch": 0.5322307327323256, - "grad_norm": 2.1393089981439015, - "learning_rate": 9.442583911013308e-07, - "loss": 1.1125, - "step": 3926 - }, - { - "epoch": 0.5323662983799905, - "grad_norm": 2.0798940982068768, - "learning_rate": 9.438199413331323e-07, - "loss": 1.1367, - "step": 3927 - }, - { - "epoch": 0.5325018640276554, - "grad_norm": 1.7805568946113108, - "learning_rate": 9.433815023988144e-07, - "loss": 1.153, - "step": 3928 - }, - { - "epoch": 0.5326374296753202, - "grad_norm": 1.8920733733394852, - "learning_rate": 9.429430743829272e-07, - "loss": 1.1012, - "step": 3929 - }, - { - "epoch": 0.5327729953229852, - "grad_norm": 1.5442828320690682, - "learning_rate": 9.425046573700174e-07, - "loss": 1.0842, - "step": 3930 - }, - { - "epoch": 0.5329085609706501, - "grad_norm": 3.769269250318628, - "learning_rate": 9.420662514446309e-07, - "loss": 1.1379, - "step": 3931 - }, - { - "epoch": 0.5330441266183149, - "grad_norm": 1.5437535084785903, - "learning_rate": 9.4162785669131e-07, - "loss": 1.1206, - "step": 3932 - }, - { - "epoch": 0.5331796922659798, - "grad_norm": 1.3756762066882513, - "learning_rate": 9.411894731945968e-07, - "loss": 1.1761, - "step": 3933 - }, - { - "epoch": 0.5333152579136446, - "grad_norm": 1.608099464622897, - "learning_rate": 9.40751101039029e-07, - "loss": 1.1443, - "step": 3934 - }, - { - "epoch": 0.5334508235613096, - "grad_norm": 1.5803458077068513, - "learning_rate": 9.403127403091441e-07, - "loss": 1.1366, - "step": 3935 - }, - { - "epoch": 0.5335863892089745, - "grad_norm": 1.7706034744522712, - "learning_rate": 9.398743910894755e-07, - "loss": 1.1707, - "step": 3936 - }, - { - "epoch": 0.5337219548566393, - "grad_norm": 1.6277929252896688, - "learning_rate": 9.394360534645566e-07, - "loss": 1.1345, - "step": 3937 - }, - { - "epoch": 0.5338575205043042, - "grad_norm": 1.5038275074635077, - "learning_rate": 9.389977275189163e-07, - "loss": 1.1376, - "step": 3938 - }, - { - "epoch": 0.533993086151969, - "grad_norm": 1.7403062547128598, - "learning_rate": 9.38559413337083e-07, - "loss": 1.1575, - "step": 3939 - }, - { - "epoch": 0.534128651799634, - "grad_norm": 1.8891470550503489, - "learning_rate": 9.381211110035819e-07, - "loss": 1.1461, - "step": 3940 - }, - { - "epoch": 0.5342642174472989, - "grad_norm": 2.0991645292708565, - "learning_rate": 9.376828206029358e-07, - "loss": 1.1286, - "step": 3941 - }, - { - "epoch": 0.5343997830949637, - "grad_norm": 2.7633237949695584, - "learning_rate": 9.372445422196662e-07, - "loss": 1.1221, - "step": 3942 - }, - { - "epoch": 0.5345353487426286, - "grad_norm": 1.5269555086304247, - "learning_rate": 9.368062759382908e-07, - "loss": 1.13, - "step": 3943 - }, - { - "epoch": 0.5346709143902935, - "grad_norm": 1.9311033237646111, - "learning_rate": 9.363680218433267e-07, - "loss": 1.1568, - "step": 3944 - }, - { - "epoch": 0.5348064800379584, - "grad_norm": 1.426954834413145, - "learning_rate": 9.359297800192871e-07, - "loss": 1.1546, - "step": 3945 - }, - { - "epoch": 0.5349420456856233, - "grad_norm": 2.8391062486333363, - "learning_rate": 9.354915505506838e-07, - "loss": 1.1552, - "step": 3946 - }, - { - "epoch": 0.5350776113332881, - "grad_norm": 1.8692108903209737, - "learning_rate": 9.350533335220256e-07, - "loss": 1.1529, - "step": 3947 - }, - { - "epoch": 0.535213176980953, - "grad_norm": 2.709043716441755, - "learning_rate": 9.346151290178195e-07, - "loss": 1.136, - "step": 3948 - }, - { - "epoch": 0.5353487426286179, - "grad_norm": 1.7462871120092351, - "learning_rate": 9.341769371225696e-07, - "loss": 1.1385, - "step": 3949 - }, - { - "epoch": 0.5354843082762828, - "grad_norm": 1.6094722767230376, - "learning_rate": 9.337387579207779e-07, - "loss": 1.124, - "step": 3950 - }, - { - "epoch": 0.5356198739239477, - "grad_norm": 1.4382934197572197, - "learning_rate": 9.333005914969434e-07, - "loss": 1.1345, - "step": 3951 - }, - { - "epoch": 0.5357554395716125, - "grad_norm": 1.7369710644799246, - "learning_rate": 9.328624379355639e-07, - "loss": 1.1309, - "step": 3952 - }, - { - "epoch": 0.5358910052192775, - "grad_norm": 1.7591233272958737, - "learning_rate": 9.324242973211326e-07, - "loss": 1.1018, - "step": 3953 - }, - { - "epoch": 0.5360265708669423, - "grad_norm": 1.7008116439505885, - "learning_rate": 9.319861697381427e-07, - "loss": 1.1309, - "step": 3954 - }, - { - "epoch": 0.5361621365146072, - "grad_norm": 1.7275809694711328, - "learning_rate": 9.315480552710832e-07, - "loss": 1.0833, - "step": 3955 - }, - { - "epoch": 0.5362977021622721, - "grad_norm": 1.8745886968108774, - "learning_rate": 9.311099540044402e-07, - "loss": 1.1408, - "step": 3956 - }, - { - "epoch": 0.5364332678099369, - "grad_norm": 3.2018032072797693, - "learning_rate": 9.306718660226996e-07, - "loss": 1.1729, - "step": 3957 - }, - { - "epoch": 0.5365688334576019, - "grad_norm": 1.9196333846058828, - "learning_rate": 9.302337914103416e-07, - "loss": 1.1636, - "step": 3958 - }, - { - "epoch": 0.5367043991052667, - "grad_norm": 1.5997020088413285, - "learning_rate": 9.297957302518469e-07, - "loss": 1.2061, - "step": 3959 - }, - { - "epoch": 0.5368399647529316, - "grad_norm": 3.1950367642490667, - "learning_rate": 9.293576826316909e-07, - "loss": 1.1211, - "step": 3960 - }, - { - "epoch": 0.5369755304005965, - "grad_norm": 1.5864621202101075, - "learning_rate": 9.289196486343487e-07, - "loss": 1.1369, - "step": 3961 - }, - { - "epoch": 0.5371110960482613, - "grad_norm": 1.5965549053563621, - "learning_rate": 9.284816283442907e-07, - "loss": 1.1458, - "step": 3962 - }, - { - "epoch": 0.5372466616959263, - "grad_norm": 1.4167124041054016, - "learning_rate": 9.280436218459866e-07, - "loss": 1.1271, - "step": 3963 - }, - { - "epoch": 0.5373822273435911, - "grad_norm": 1.6834201147055754, - "learning_rate": 9.276056292239016e-07, - "loss": 1.1664, - "step": 3964 - }, - { - "epoch": 0.537517792991256, - "grad_norm": 1.6157824738227264, - "learning_rate": 9.271676505625e-07, - "loss": 1.1269, - "step": 3965 - }, - { - "epoch": 0.5376533586389209, - "grad_norm": 1.9973078806661086, - "learning_rate": 9.267296859462416e-07, - "loss": 1.1337, - "step": 3966 - }, - { - "epoch": 0.5377889242865858, - "grad_norm": 1.4816613786203243, - "learning_rate": 9.262917354595854e-07, - "loss": 1.1697, - "step": 3967 - }, - { - "epoch": 0.5379244899342507, - "grad_norm": 1.4300663710638506, - "learning_rate": 9.258537991869861e-07, - "loss": 1.1311, - "step": 3968 - }, - { - "epoch": 0.5380600555819155, - "grad_norm": 4.197234708252887, - "learning_rate": 9.254158772128961e-07, - "loss": 1.1262, - "step": 3969 - }, - { - "epoch": 0.5381956212295804, - "grad_norm": 1.586103274298997, - "learning_rate": 9.249779696217658e-07, - "loss": 1.1712, - "step": 3970 - }, - { - "epoch": 0.5383311868772453, - "grad_norm": 1.4321070736924264, - "learning_rate": 9.245400764980413e-07, - "loss": 1.0968, - "step": 3971 - }, - { - "epoch": 0.5384667525249102, - "grad_norm": 1.6166836608920132, - "learning_rate": 9.241021979261681e-07, - "loss": 1.1487, - "step": 3972 - }, - { - "epoch": 0.5386023181725751, - "grad_norm": 1.8028273525427696, - "learning_rate": 9.236643339905863e-07, - "loss": 1.1279, - "step": 3973 - }, - { - "epoch": 0.5387378838202399, - "grad_norm": 1.4045469105452582, - "learning_rate": 9.232264847757356e-07, - "loss": 1.1622, - "step": 3974 - }, - { - "epoch": 0.5388734494679048, - "grad_norm": 1.5161538107787003, - "learning_rate": 9.227886503660509e-07, - "loss": 1.1037, - "step": 3975 - }, - { - "epoch": 0.5390090151155698, - "grad_norm": 2.384954535464977, - "learning_rate": 9.223508308459659e-07, - "loss": 1.1402, - "step": 3976 - }, - { - "epoch": 0.5391445807632346, - "grad_norm": 2.0705494141923455, - "learning_rate": 9.219130262999101e-07, - "loss": 1.1319, - "step": 3977 - }, - { - "epoch": 0.5392801464108995, - "grad_norm": 1.6172109674518025, - "learning_rate": 9.214752368123107e-07, - "loss": 1.1565, - "step": 3978 - }, - { - "epoch": 0.5394157120585643, - "grad_norm": 1.528251745885748, - "learning_rate": 9.21037462467592e-07, - "loss": 1.1373, - "step": 3979 - }, - { - "epoch": 0.5395512777062292, - "grad_norm": 1.697461598154436, - "learning_rate": 9.205997033501756e-07, - "loss": 1.1234, - "step": 3980 - }, - { - "epoch": 0.5396868433538942, - "grad_norm": 1.5594737282066502, - "learning_rate": 9.201619595444795e-07, - "loss": 1.1882, - "step": 3981 - }, - { - "epoch": 0.539822409001559, - "grad_norm": 2.0031835670564075, - "learning_rate": 9.197242311349195e-07, - "loss": 1.1535, - "step": 3982 - }, - { - "epoch": 0.5399579746492239, - "grad_norm": 1.720051511864373, - "learning_rate": 9.192865182059077e-07, - "loss": 1.1214, - "step": 3983 - }, - { - "epoch": 0.5400935402968887, - "grad_norm": 1.7846625465967871, - "learning_rate": 9.188488208418538e-07, - "loss": 1.1029, - "step": 3984 - }, - { - "epoch": 0.5402291059445536, - "grad_norm": 1.6655984906894223, - "learning_rate": 9.184111391271642e-07, - "loss": 1.1118, - "step": 3985 - }, - { - "epoch": 0.5403646715922186, - "grad_norm": 1.5668392025615916, - "learning_rate": 9.179734731462423e-07, - "loss": 1.1271, - "step": 3986 - }, - { - "epoch": 0.5405002372398834, - "grad_norm": 1.5530850637675997, - "learning_rate": 9.175358229834888e-07, - "loss": 1.0894, - "step": 3987 - }, - { - "epoch": 0.5406358028875483, - "grad_norm": 1.8115391858283925, - "learning_rate": 9.170981887233007e-07, - "loss": 1.1498, - "step": 3988 - }, - { - "epoch": 0.5407713685352131, - "grad_norm": 1.6326583011463607, - "learning_rate": 9.166605704500728e-07, - "loss": 1.111, - "step": 3989 - }, - { - "epoch": 0.540906934182878, - "grad_norm": 1.5550377536730526, - "learning_rate": 9.162229682481957e-07, - "loss": 1.1291, - "step": 3990 - }, - { - "epoch": 0.541042499830543, - "grad_norm": 1.8418227549409383, - "learning_rate": 9.157853822020582e-07, - "loss": 1.1521, - "step": 3991 - }, - { - "epoch": 0.5411780654782078, - "grad_norm": 1.578555286532687, - "learning_rate": 9.153478123960446e-07, - "loss": 1.1308, - "step": 3992 - }, - { - "epoch": 0.5413136311258727, - "grad_norm": 1.5970968320738483, - "learning_rate": 9.149102589145376e-07, - "loss": 1.1494, - "step": 3993 - }, - { - "epoch": 0.5414491967735375, - "grad_norm": 2.204999633261993, - "learning_rate": 9.144727218419151e-07, - "loss": 1.1749, - "step": 3994 - }, - { - "epoch": 0.5415847624212025, - "grad_norm": 1.598858448240508, - "learning_rate": 9.140352012625536e-07, - "loss": 1.1212, - "step": 3995 - }, - { - "epoch": 0.5417203280688674, - "grad_norm": 2.260286772764513, - "learning_rate": 9.135976972608248e-07, - "loss": 1.1425, - "step": 3996 - }, - { - "epoch": 0.5418558937165322, - "grad_norm": 1.5653492636481954, - "learning_rate": 9.131602099210978e-07, - "loss": 1.1776, - "step": 3997 - }, - { - "epoch": 0.5419914593641971, - "grad_norm": 1.7252343836341248, - "learning_rate": 9.127227393277391e-07, - "loss": 1.1547, - "step": 3998 - }, - { - "epoch": 0.5421270250118619, - "grad_norm": 1.905002121864447, - "learning_rate": 9.12285285565111e-07, - "loss": 1.1254, - "step": 3999 - }, - { - "epoch": 0.5422625906595269, - "grad_norm": 1.8944898898336646, - "learning_rate": 9.118478487175735e-07, - "loss": 1.1509, - "step": 4000 - }, - { - "epoch": 0.5423981563071918, - "grad_norm": 1.5256815678274867, - "learning_rate": 9.114104288694821e-07, - "loss": 1.1508, - "step": 4001 - }, - { - "epoch": 0.5425337219548566, - "grad_norm": 1.9864163950832316, - "learning_rate": 9.109730261051905e-07, - "loss": 1.1548, - "step": 4002 - }, - { - "epoch": 0.5426692876025215, - "grad_norm": 1.5936323097044178, - "learning_rate": 9.105356405090479e-07, - "loss": 1.1256, - "step": 4003 - }, - { - "epoch": 0.5428048532501863, - "grad_norm": 1.840236767391538, - "learning_rate": 9.100982721654011e-07, - "loss": 1.1385, - "step": 4004 - }, - { - "epoch": 0.5429404188978513, - "grad_norm": 1.9954352911855533, - "learning_rate": 9.096609211585926e-07, - "loss": 1.1368, - "step": 4005 - }, - { - "epoch": 0.5430759845455162, - "grad_norm": 1.5589498033981692, - "learning_rate": 9.092235875729627e-07, - "loss": 1.1303, - "step": 4006 - }, - { - "epoch": 0.543211550193181, - "grad_norm": 1.5130481889529435, - "learning_rate": 9.087862714928471e-07, - "loss": 1.1111, - "step": 4007 - }, - { - "epoch": 0.5433471158408459, - "grad_norm": 1.6393815854560134, - "learning_rate": 9.083489730025791e-07, - "loss": 1.144, - "step": 4008 - }, - { - "epoch": 0.5434826814885109, - "grad_norm": 1.9817906874716276, - "learning_rate": 9.079116921864883e-07, - "loss": 1.1393, - "step": 4009 - }, - { - "epoch": 0.5436182471361757, - "grad_norm": 1.4592579201377023, - "learning_rate": 9.074744291289007e-07, - "loss": 1.1237, - "step": 4010 - }, - { - "epoch": 0.5437538127838406, - "grad_norm": 1.6008523021898433, - "learning_rate": 9.070371839141393e-07, - "loss": 1.1033, - "step": 4011 - }, - { - "epoch": 0.5438893784315054, - "grad_norm": 2.4121636770114248, - "learning_rate": 9.065999566265229e-07, - "loss": 1.1397, - "step": 4012 - }, - { - "epoch": 0.5440249440791703, - "grad_norm": 1.7651489668498048, - "learning_rate": 9.061627473503677e-07, - "loss": 1.1795, - "step": 4013 - }, - { - "epoch": 0.5441605097268353, - "grad_norm": 2.6000111174092186, - "learning_rate": 9.057255561699859e-07, - "loss": 1.147, - "step": 4014 - }, - { - "epoch": 0.5442960753745001, - "grad_norm": 1.9657373138870953, - "learning_rate": 9.052883831696865e-07, - "loss": 1.1142, - "step": 4015 - }, - { - "epoch": 0.544431641022165, - "grad_norm": 2.1984837331989326, - "learning_rate": 9.048512284337747e-07, - "loss": 1.141, - "step": 4016 - }, - { - "epoch": 0.5445672066698298, - "grad_norm": 1.5013606336255727, - "learning_rate": 9.044140920465529e-07, - "loss": 1.1625, - "step": 4017 - }, - { - "epoch": 0.5447027723174948, - "grad_norm": 1.4853184594828879, - "learning_rate": 9.039769740923182e-07, - "loss": 1.1279, - "step": 4018 - }, - { - "epoch": 0.5448383379651597, - "grad_norm": 1.5855307159415042, - "learning_rate": 9.035398746553667e-07, - "loss": 1.127, - "step": 4019 - }, - { - "epoch": 0.5449739036128245, - "grad_norm": 1.7144798876393268, - "learning_rate": 9.031027938199884e-07, - "loss": 1.1511, - "step": 4020 - }, - { - "epoch": 0.5451094692604894, - "grad_norm": 1.8752141478407691, - "learning_rate": 9.02665731670472e-07, - "loss": 1.1254, - "step": 4021 - }, - { - "epoch": 0.5452450349081542, - "grad_norm": 1.655521426414369, - "learning_rate": 9.022286882911005e-07, - "loss": 1.1517, - "step": 4022 - }, - { - "epoch": 0.5453806005558192, - "grad_norm": 1.5877432929742874, - "learning_rate": 9.01791663766155e-07, - "loss": 1.1371, - "step": 4023 - }, - { - "epoch": 0.5455161662034841, - "grad_norm": 1.5740448168207701, - "learning_rate": 9.01354658179912e-07, - "loss": 1.1111, - "step": 4024 - }, - { - "epoch": 0.5456517318511489, - "grad_norm": 2.319005522267522, - "learning_rate": 9.009176716166442e-07, - "loss": 1.1273, - "step": 4025 - }, - { - "epoch": 0.5457872974988138, - "grad_norm": 1.4317388427112172, - "learning_rate": 9.004807041606217e-07, - "loss": 1.1312, - "step": 4026 - }, - { - "epoch": 0.5459228631464786, - "grad_norm": 1.51944930691087, - "learning_rate": 9.000437558961094e-07, - "loss": 1.0891, - "step": 4027 - }, - { - "epoch": 0.5460584287941436, - "grad_norm": 1.4334466206834038, - "learning_rate": 8.996068269073701e-07, - "loss": 1.1364, - "step": 4028 - }, - { - "epoch": 0.5461939944418085, - "grad_norm": 1.7291177231224637, - "learning_rate": 8.991699172786614e-07, - "loss": 1.1053, - "step": 4029 - }, - { - "epoch": 0.5463295600894733, - "grad_norm": 1.8521865637257795, - "learning_rate": 8.987330270942388e-07, - "loss": 1.1823, - "step": 4030 - }, - { - "epoch": 0.5464651257371382, - "grad_norm": 1.5103781134402934, - "learning_rate": 8.98296156438352e-07, - "loss": 1.143, - "step": 4031 - }, - { - "epoch": 0.546600691384803, - "grad_norm": 1.9161517374485035, - "learning_rate": 8.978593053952492e-07, - "loss": 1.1192, - "step": 4032 - }, - { - "epoch": 0.546736257032468, - "grad_norm": 1.5987047520662883, - "learning_rate": 8.974224740491725e-07, - "loss": 1.1456, - "step": 4033 - }, - { - "epoch": 0.5468718226801329, - "grad_norm": 1.5835721015942135, - "learning_rate": 8.969856624843625e-07, - "loss": 1.138, - "step": 4034 - }, - { - "epoch": 0.5470073883277977, - "grad_norm": 1.60784914215882, - "learning_rate": 8.965488707850539e-07, - "loss": 1.139, - "step": 4035 - }, - { - "epoch": 0.5471429539754626, - "grad_norm": 1.5528551580397438, - "learning_rate": 8.961120990354794e-07, - "loss": 1.1555, - "step": 4036 - }, - { - "epoch": 0.5472785196231275, - "grad_norm": 1.6469743510735977, - "learning_rate": 8.956753473198662e-07, - "loss": 1.1493, - "step": 4037 - }, - { - "epoch": 0.5474140852707924, - "grad_norm": 2.38839964712956, - "learning_rate": 8.952386157224391e-07, - "loss": 1.1395, - "step": 4038 - }, - { - "epoch": 0.5475496509184573, - "grad_norm": 1.776608236131249, - "learning_rate": 8.948019043274181e-07, - "loss": 1.1326, - "step": 4039 - }, - { - "epoch": 0.5476852165661221, - "grad_norm": 1.839164829703461, - "learning_rate": 8.943652132190189e-07, - "loss": 1.149, - "step": 4040 - }, - { - "epoch": 0.547820782213787, - "grad_norm": 2.387492187113473, - "learning_rate": 8.939285424814551e-07, - "loss": 1.1462, - "step": 4041 - }, - { - "epoch": 0.5479563478614519, - "grad_norm": 1.5765621283512716, - "learning_rate": 8.934918921989341e-07, - "loss": 1.1322, - "step": 4042 - }, - { - "epoch": 0.5480919135091168, - "grad_norm": 1.5783106090653327, - "learning_rate": 8.930552624556615e-07, - "loss": 1.1451, - "step": 4043 - }, - { - "epoch": 0.5482274791567817, - "grad_norm": 1.7076706248871436, - "learning_rate": 8.92618653335837e-07, - "loss": 1.1458, - "step": 4044 - }, - { - "epoch": 0.5483630448044465, - "grad_norm": 1.7964338042543342, - "learning_rate": 8.921820649236576e-07, - "loss": 1.1294, - "step": 4045 - }, - { - "epoch": 0.5484986104521115, - "grad_norm": 2.019596569141138, - "learning_rate": 8.917454973033161e-07, - "loss": 1.1157, - "step": 4046 - }, - { - "epoch": 0.5486341760997763, - "grad_norm": 1.6900258529138212, - "learning_rate": 8.913089505590007e-07, - "loss": 1.1533, - "step": 4047 - }, - { - "epoch": 0.5487697417474412, - "grad_norm": 1.669594833773368, - "learning_rate": 8.908724247748963e-07, - "loss": 1.1414, - "step": 4048 - }, - { - "epoch": 0.5489053073951061, - "grad_norm": 3.0280010844862546, - "learning_rate": 8.904359200351837e-07, - "loss": 1.1753, - "step": 4049 - }, - { - "epoch": 0.5490408730427709, - "grad_norm": 2.927262357133036, - "learning_rate": 8.899994364240385e-07, - "loss": 1.1306, - "step": 4050 - }, - { - "epoch": 0.5491764386904359, - "grad_norm": 1.5866021820075271, - "learning_rate": 8.895629740256343e-07, - "loss": 1.1184, - "step": 4051 - }, - { - "epoch": 0.5493120043381007, - "grad_norm": 1.5358775066203663, - "learning_rate": 8.891265329241387e-07, - "loss": 1.2029, - "step": 4052 - }, - { - "epoch": 0.5494475699857656, - "grad_norm": 1.7952816333831787, - "learning_rate": 8.886901132037155e-07, - "loss": 1.1744, - "step": 4053 - }, - { - "epoch": 0.5495831356334305, - "grad_norm": 1.6802241386413324, - "learning_rate": 8.88253714948526e-07, - "loss": 1.1664, - "step": 4054 - }, - { - "epoch": 0.5497187012810953, - "grad_norm": 1.8013425138862826, - "learning_rate": 8.87817338242725e-07, - "loss": 1.1747, - "step": 4055 - }, - { - "epoch": 0.5498542669287603, - "grad_norm": 1.5769747426029008, - "learning_rate": 8.873809831704652e-07, - "loss": 1.1463, - "step": 4056 - }, - { - "epoch": 0.5499898325764251, - "grad_norm": 1.7103010334023014, - "learning_rate": 8.869446498158935e-07, - "loss": 1.1267, - "step": 4057 - }, - { - "epoch": 0.55012539822409, - "grad_norm": 1.7753447197658734, - "learning_rate": 8.865083382631539e-07, - "loss": 1.1408, - "step": 4058 - }, - { - "epoch": 0.5502609638717549, - "grad_norm": 1.4197755602998843, - "learning_rate": 8.860720485963851e-07, - "loss": 1.1402, - "step": 4059 - }, - { - "epoch": 0.5503965295194198, - "grad_norm": 1.8720053106006902, - "learning_rate": 8.856357808997229e-07, - "loss": 1.1575, - "step": 4060 - }, - { - "epoch": 0.5505320951670847, - "grad_norm": 1.553173999315336, - "learning_rate": 8.851995352572972e-07, - "loss": 1.1548, - "step": 4061 - }, - { - "epoch": 0.5506676608147495, - "grad_norm": 1.6992505710878092, - "learning_rate": 8.847633117532353e-07, - "loss": 1.1412, - "step": 4062 - }, - { - "epoch": 0.5508032264624144, - "grad_norm": 1.6566246197547656, - "learning_rate": 8.843271104716588e-07, - "loss": 1.1824, - "step": 4063 - }, - { - "epoch": 0.5509387921100793, - "grad_norm": 1.536544493617469, - "learning_rate": 8.838909314966863e-07, - "loss": 1.1413, - "step": 4064 - }, - { - "epoch": 0.5510743577577442, - "grad_norm": 2.5137027779332204, - "learning_rate": 8.834547749124307e-07, - "loss": 1.1465, - "step": 4065 - }, - { - "epoch": 0.5512099234054091, - "grad_norm": 1.8263726324470377, - "learning_rate": 8.830186408030023e-07, - "loss": 1.1111, - "step": 4066 - }, - { - "epoch": 0.5513454890530739, - "grad_norm": 1.45775743083772, - "learning_rate": 8.825825292525056e-07, - "loss": 1.1779, - "step": 4067 - }, - { - "epoch": 0.5514810547007388, - "grad_norm": 1.5677916855743508, - "learning_rate": 8.821464403450408e-07, - "loss": 1.1543, - "step": 4068 - }, - { - "epoch": 0.5516166203484038, - "grad_norm": 2.8766285864445043, - "learning_rate": 8.817103741647052e-07, - "loss": 1.1138, - "step": 4069 - }, - { - "epoch": 0.5517521859960686, - "grad_norm": 1.6499324689095587, - "learning_rate": 8.812743307955899e-07, - "loss": 1.1673, - "step": 4070 - }, - { - "epoch": 0.5518877516437335, - "grad_norm": 1.7061261754153114, - "learning_rate": 8.80838310321783e-07, - "loss": 1.119, - "step": 4071 - }, - { - "epoch": 0.5520233172913983, - "grad_norm": 1.6810894644905399, - "learning_rate": 8.80402312827367e-07, - "loss": 1.1243, - "step": 4072 - }, - { - "epoch": 0.5521588829390632, - "grad_norm": 1.5871141876578345, - "learning_rate": 8.799663383964213e-07, - "loss": 1.1108, - "step": 4073 - }, - { - "epoch": 0.5522944485867282, - "grad_norm": 1.6560511097416506, - "learning_rate": 8.795303871130196e-07, - "loss": 1.1434, - "step": 4074 - }, - { - "epoch": 0.552430014234393, - "grad_norm": 1.4789641874559005, - "learning_rate": 8.790944590612318e-07, - "loss": 1.1275, - "step": 4075 - }, - { - "epoch": 0.5525655798820579, - "grad_norm": 1.5551134249759244, - "learning_rate": 8.786585543251232e-07, - "loss": 1.1423, - "step": 4076 - }, - { - "epoch": 0.5527011455297227, - "grad_norm": 1.8267683333062337, - "learning_rate": 8.782226729887546e-07, - "loss": 1.133, - "step": 4077 - }, - { - "epoch": 0.5528367111773876, - "grad_norm": 1.4594944535754646, - "learning_rate": 8.777868151361823e-07, - "loss": 1.1452, - "step": 4078 - }, - { - "epoch": 0.5529722768250526, - "grad_norm": 1.511131069141946, - "learning_rate": 8.773509808514581e-07, - "loss": 1.0993, - "step": 4079 - }, - { - "epoch": 0.5531078424727174, - "grad_norm": 1.5737444562049019, - "learning_rate": 8.769151702186289e-07, - "loss": 1.1499, - "step": 4080 - }, - { - "epoch": 0.5532434081203823, - "grad_norm": 3.1925465794375, - "learning_rate": 8.764793833217377e-07, - "loss": 1.1563, - "step": 4081 - }, - { - "epoch": 0.5533789737680471, - "grad_norm": 3.178844614030592, - "learning_rate": 8.760436202448223e-07, - "loss": 1.0882, - "step": 4082 - }, - { - "epoch": 0.553514539415712, - "grad_norm": 1.5358409438204397, - "learning_rate": 8.756078810719163e-07, - "loss": 1.1407, - "step": 4083 - }, - { - "epoch": 0.553650105063377, - "grad_norm": 1.5228624762339982, - "learning_rate": 8.751721658870488e-07, - "loss": 1.1338, - "step": 4084 - }, - { - "epoch": 0.5537856707110418, - "grad_norm": 1.8512435845546995, - "learning_rate": 8.747364747742433e-07, - "loss": 1.1122, - "step": 4085 - }, - { - "epoch": 0.5539212363587067, - "grad_norm": 1.7855864403529425, - "learning_rate": 8.743008078175202e-07, - "loss": 1.1651, - "step": 4086 - }, - { - "epoch": 0.5540568020063716, - "grad_norm": 4.237322228967542, - "learning_rate": 8.73865165100894e-07, - "loss": 1.1378, - "step": 4087 - }, - { - "epoch": 0.5541923676540365, - "grad_norm": 1.5079255229872401, - "learning_rate": 8.734295467083752e-07, - "loss": 1.1364, - "step": 4088 - }, - { - "epoch": 0.5543279333017014, - "grad_norm": 2.0962834264316443, - "learning_rate": 8.729939527239688e-07, - "loss": 1.1358, - "step": 4089 - }, - { - "epoch": 0.5544634989493662, - "grad_norm": 1.8220625360290157, - "learning_rate": 8.725583832316767e-07, - "loss": 1.1538, - "step": 4090 - }, - { - "epoch": 0.5545990645970311, - "grad_norm": 1.5646060382839637, - "learning_rate": 8.721228383154939e-07, - "loss": 1.1571, - "step": 4091 - }, - { - "epoch": 0.554734630244696, - "grad_norm": 1.8441176509926933, - "learning_rate": 8.716873180594128e-07, - "loss": 1.112, - "step": 4092 - }, - { - "epoch": 0.5548701958923609, - "grad_norm": 1.6309013026839787, - "learning_rate": 8.71251822547419e-07, - "loss": 1.1586, - "step": 4093 - }, - { - "epoch": 0.5550057615400258, - "grad_norm": 1.7096390223376987, - "learning_rate": 8.708163518634956e-07, - "loss": 1.155, - "step": 4094 - }, - { - "epoch": 0.5551413271876906, - "grad_norm": 1.531360610982817, - "learning_rate": 8.703809060916188e-07, - "loss": 1.1289, - "step": 4095 - }, - { - "epoch": 0.5552768928353555, - "grad_norm": 1.69197801219766, - "learning_rate": 8.699454853157608e-07, - "loss": 1.1492, - "step": 4096 - }, - { - "epoch": 0.5554124584830205, - "grad_norm": 1.540872990384378, - "learning_rate": 8.695100896198898e-07, - "loss": 1.1395, - "step": 4097 - }, - { - "epoch": 0.5555480241306853, - "grad_norm": 1.756706166765803, - "learning_rate": 8.690747190879676e-07, - "loss": 1.1524, - "step": 4098 - }, - { - "epoch": 0.5556835897783502, - "grad_norm": 1.5406728588137266, - "learning_rate": 8.686393738039527e-07, - "loss": 1.1243, - "step": 4099 - }, - { - "epoch": 0.555819155426015, - "grad_norm": 1.6981763023563432, - "learning_rate": 8.682040538517973e-07, - "loss": 1.1504, - "step": 4100 - }, - { - "epoch": 0.5559547210736799, - "grad_norm": 2.1741822333265977, - "learning_rate": 8.677687593154503e-07, - "loss": 1.1097, - "step": 4101 - }, - { - "epoch": 0.5560902867213449, - "grad_norm": 1.6945949565923084, - "learning_rate": 8.673334902788536e-07, - "loss": 1.1221, - "step": 4102 - }, - { - "epoch": 0.5562258523690097, - "grad_norm": 3.3362684873111466, - "learning_rate": 8.668982468259467e-07, - "loss": 1.1875, - "step": 4103 - }, - { - "epoch": 0.5563614180166746, - "grad_norm": 1.8061470829881572, - "learning_rate": 8.664630290406618e-07, - "loss": 1.1572, - "step": 4104 - }, - { - "epoch": 0.5564969836643394, - "grad_norm": 1.4574612916406884, - "learning_rate": 8.660278370069281e-07, - "loss": 1.1478, - "step": 4105 - }, - { - "epoch": 0.5566325493120043, - "grad_norm": 1.4337854513912798, - "learning_rate": 8.655926708086684e-07, - "loss": 1.1405, - "step": 4106 - }, - { - "epoch": 0.5567681149596693, - "grad_norm": 1.5151889264026215, - "learning_rate": 8.651575305298011e-07, - "loss": 1.1347, - "step": 4107 - }, - { - "epoch": 0.5569036806073341, - "grad_norm": 1.5882214040450653, - "learning_rate": 8.6472241625424e-07, - "loss": 1.1653, - "step": 4108 - }, - { - "epoch": 0.557039246254999, - "grad_norm": 1.816518684543613, - "learning_rate": 8.642873280658924e-07, - "loss": 1.1484, - "step": 4109 - }, - { - "epoch": 0.5571748119026638, - "grad_norm": 2.3204515837412205, - "learning_rate": 8.63852266048663e-07, - "loss": 1.1246, - "step": 4110 - }, - { - "epoch": 0.5573103775503288, - "grad_norm": 1.6007596147627656, - "learning_rate": 8.634172302864491e-07, - "loss": 1.1281, - "step": 4111 - }, - { - "epoch": 0.5574459431979937, - "grad_norm": 7.411235995537328, - "learning_rate": 8.629822208631442e-07, - "loss": 1.132, - "step": 4112 - }, - { - "epoch": 0.5575815088456585, - "grad_norm": 1.52127921297392, - "learning_rate": 8.625472378626365e-07, - "loss": 1.1268, - "step": 4113 - }, - { - "epoch": 0.5577170744933234, - "grad_norm": 1.5914875958245174, - "learning_rate": 8.62112281368809e-07, - "loss": 1.128, - "step": 4114 - }, - { - "epoch": 0.5578526401409882, - "grad_norm": 1.8158463922004142, - "learning_rate": 8.616773514655395e-07, - "loss": 1.1038, - "step": 4115 - }, - { - "epoch": 0.5579882057886532, - "grad_norm": 1.4161768772189804, - "learning_rate": 8.612424482367014e-07, - "loss": 1.1264, - "step": 4116 - }, - { - "epoch": 0.5581237714363181, - "grad_norm": 1.3852361117271006, - "learning_rate": 8.608075717661611e-07, - "loss": 1.1212, - "step": 4117 - }, - { - "epoch": 0.5582593370839829, - "grad_norm": 2.021346167164917, - "learning_rate": 8.603727221377826e-07, - "loss": 1.1638, - "step": 4118 - }, - { - "epoch": 0.5583949027316478, - "grad_norm": 1.758990582339411, - "learning_rate": 8.599378994354218e-07, - "loss": 1.1775, - "step": 4119 - }, - { - "epoch": 0.5585304683793126, - "grad_norm": 2.484619368477784, - "learning_rate": 8.595031037429321e-07, - "loss": 1.134, - "step": 4120 - }, - { - "epoch": 0.5586660340269776, - "grad_norm": 1.7540577414119942, - "learning_rate": 8.590683351441594e-07, - "loss": 1.1717, - "step": 4121 - }, - { - "epoch": 0.5588015996746425, - "grad_norm": 1.5048262816174063, - "learning_rate": 8.586335937229462e-07, - "loss": 1.1679, - "step": 4122 - }, - { - "epoch": 0.5589371653223073, - "grad_norm": 1.7221170563003156, - "learning_rate": 8.581988795631285e-07, - "loss": 1.1521, - "step": 4123 - }, - { - "epoch": 0.5590727309699722, - "grad_norm": 1.916362032197253, - "learning_rate": 8.577641927485373e-07, - "loss": 1.1292, - "step": 4124 - }, - { - "epoch": 0.559208296617637, - "grad_norm": 2.1496549172633936, - "learning_rate": 8.573295333629991e-07, - "loss": 1.1522, - "step": 4125 - }, - { - "epoch": 0.559343862265302, - "grad_norm": 2.075927915023682, - "learning_rate": 8.568949014903339e-07, - "loss": 1.1397, - "step": 4126 - }, - { - "epoch": 0.5594794279129669, - "grad_norm": 2.5310569930623776, - "learning_rate": 8.564602972143576e-07, - "loss": 1.1689, - "step": 4127 - }, - { - "epoch": 0.5596149935606317, - "grad_norm": 1.658375959252895, - "learning_rate": 8.560257206188797e-07, - "loss": 1.13, - "step": 4128 - }, - { - "epoch": 0.5597505592082966, - "grad_norm": 1.7098611553200727, - "learning_rate": 8.555911717877053e-07, - "loss": 1.1316, - "step": 4129 - }, - { - "epoch": 0.5598861248559615, - "grad_norm": 1.8879600901945481, - "learning_rate": 8.551566508046334e-07, - "loss": 1.1367, - "step": 4130 - }, - { - "epoch": 0.5600216905036264, - "grad_norm": 1.5815500581129023, - "learning_rate": 8.547221577534583e-07, - "loss": 1.1334, - "step": 4131 - }, - { - "epoch": 0.5601572561512913, - "grad_norm": 1.4989039763667082, - "learning_rate": 8.542876927179679e-07, - "loss": 1.1495, - "step": 4132 - }, - { - "epoch": 0.5602928217989561, - "grad_norm": 2.3770048467249536, - "learning_rate": 8.538532557819463e-07, - "loss": 1.113, - "step": 4133 - }, - { - "epoch": 0.560428387446621, - "grad_norm": 1.762354992705088, - "learning_rate": 8.534188470291704e-07, - "loss": 1.1304, - "step": 4134 - }, - { - "epoch": 0.5605639530942859, - "grad_norm": 1.4167257169555996, - "learning_rate": 8.529844665434129e-07, - "loss": 1.1572, - "step": 4135 - }, - { - "epoch": 0.5606995187419508, - "grad_norm": 1.880270593390615, - "learning_rate": 8.525501144084409e-07, - "loss": 1.1326, - "step": 4136 - }, - { - "epoch": 0.5608350843896157, - "grad_norm": 1.5883771049211937, - "learning_rate": 8.521157907080148e-07, - "loss": 1.1226, - "step": 4137 - }, - { - "epoch": 0.5609706500372805, - "grad_norm": 2.1708827611390524, - "learning_rate": 8.516814955258916e-07, - "loss": 1.11, - "step": 4138 - }, - { - "epoch": 0.5611062156849455, - "grad_norm": 1.6982236479696795, - "learning_rate": 8.512472289458208e-07, - "loss": 1.1804, - "step": 4139 - }, - { - "epoch": 0.5612417813326103, - "grad_norm": 3.7235935837303162, - "learning_rate": 8.508129910515482e-07, - "loss": 1.1351, - "step": 4140 - }, - { - "epoch": 0.5613773469802752, - "grad_norm": 1.745562815076997, - "learning_rate": 8.503787819268124e-07, - "loss": 1.1503, - "step": 4141 - }, - { - "epoch": 0.5615129126279401, - "grad_norm": 5.710598146962606, - "learning_rate": 8.499446016553473e-07, - "loss": 1.1433, - "step": 4142 - }, - { - "epoch": 0.5616484782756049, - "grad_norm": 1.557511812111272, - "learning_rate": 8.495104503208816e-07, - "loss": 1.1832, - "step": 4143 - }, - { - "epoch": 0.5617840439232699, - "grad_norm": 1.4614163830609677, - "learning_rate": 8.490763280071375e-07, - "loss": 1.1374, - "step": 4144 - }, - { - "epoch": 0.5619196095709347, - "grad_norm": 1.6929700092828959, - "learning_rate": 8.486422347978323e-07, - "loss": 1.1075, - "step": 4145 - }, - { - "epoch": 0.5620551752185996, - "grad_norm": 1.8496309688548889, - "learning_rate": 8.482081707766775e-07, - "loss": 1.104, - "step": 4146 - }, - { - "epoch": 0.5621907408662645, - "grad_norm": 2.531439762693102, - "learning_rate": 8.477741360273785e-07, - "loss": 1.1571, - "step": 4147 - }, - { - "epoch": 0.5623263065139293, - "grad_norm": 1.5116159642284632, - "learning_rate": 8.47340130633636e-07, - "loss": 1.1174, - "step": 4148 - }, - { - "epoch": 0.5624618721615943, - "grad_norm": 1.5365249262748184, - "learning_rate": 8.46906154679144e-07, - "loss": 1.148, - "step": 4149 - }, - { - "epoch": 0.5625974378092591, - "grad_norm": 1.513199881440175, - "learning_rate": 8.46472208247592e-07, - "loss": 1.1428, - "step": 4150 - }, - { - "epoch": 0.562733003456924, - "grad_norm": 1.686410703702844, - "learning_rate": 8.460382914226628e-07, - "loss": 1.1106, - "step": 4151 - }, - { - "epoch": 0.5628685691045889, - "grad_norm": 1.6509314242219109, - "learning_rate": 8.456044042880333e-07, - "loss": 1.1232, - "step": 4152 - }, - { - "epoch": 0.5630041347522537, - "grad_norm": 1.6649988015885475, - "learning_rate": 8.451705469273763e-07, - "loss": 1.1564, - "step": 4153 - }, - { - "epoch": 0.5631397003999187, - "grad_norm": 1.8351498647132205, - "learning_rate": 8.447367194243567e-07, - "loss": 1.1576, - "step": 4154 - }, - { - "epoch": 0.5632752660475835, - "grad_norm": 2.9326382783255562, - "learning_rate": 8.443029218626355e-07, - "loss": 1.1455, - "step": 4155 - }, - { - "epoch": 0.5634108316952484, - "grad_norm": 1.590875167527014, - "learning_rate": 8.438691543258665e-07, - "loss": 1.145, - "step": 4156 - }, - { - "epoch": 0.5635463973429133, - "grad_norm": 2.128508362687925, - "learning_rate": 8.434354168976989e-07, - "loss": 1.1449, - "step": 4157 - }, - { - "epoch": 0.5636819629905782, - "grad_norm": 2.0109811557312764, - "learning_rate": 8.430017096617751e-07, - "loss": 1.2012, - "step": 4158 - }, - { - "epoch": 0.5638175286382431, - "grad_norm": 1.6446048853103505, - "learning_rate": 8.425680327017326e-07, - "loss": 1.0808, - "step": 4159 - }, - { - "epoch": 0.5639530942859079, - "grad_norm": 1.8175384622989226, - "learning_rate": 8.42134386101202e-07, - "loss": 1.1216, - "step": 4160 - }, - { - "epoch": 0.5640886599335728, - "grad_norm": 1.5832224771485246, - "learning_rate": 8.417007699438093e-07, - "loss": 1.1436, - "step": 4161 - }, - { - "epoch": 0.5642242255812377, - "grad_norm": 2.0285999895458433, - "learning_rate": 8.412671843131731e-07, - "loss": 1.1062, - "step": 4162 - }, - { - "epoch": 0.5643597912289026, - "grad_norm": 8.683779842898055, - "learning_rate": 8.408336292929079e-07, - "loss": 1.114, - "step": 4163 - }, - { - "epoch": 0.5644953568765675, - "grad_norm": 1.843910468956719, - "learning_rate": 8.40400104966621e-07, - "loss": 1.1463, - "step": 4164 - }, - { - "epoch": 0.5646309225242324, - "grad_norm": 1.7587510345669792, - "learning_rate": 8.399666114179136e-07, - "loss": 1.1696, - "step": 4165 - }, - { - "epoch": 0.5647664881718972, - "grad_norm": 1.7402977025816908, - "learning_rate": 8.395331487303823e-07, - "loss": 1.1351, - "step": 4166 - }, - { - "epoch": 0.5649020538195622, - "grad_norm": 1.5026233189203588, - "learning_rate": 8.390997169876161e-07, - "loss": 1.1821, - "step": 4167 - }, - { - "epoch": 0.565037619467227, - "grad_norm": 1.6704764338106353, - "learning_rate": 8.386663162732001e-07, - "loss": 1.155, - "step": 4168 - }, - { - "epoch": 0.5651731851148919, - "grad_norm": 1.4111927132450768, - "learning_rate": 8.38232946670711e-07, - "loss": 1.1062, - "step": 4169 - }, - { - "epoch": 0.5653087507625568, - "grad_norm": 1.720555112228216, - "learning_rate": 8.377996082637215e-07, - "loss": 1.1148, - "step": 4170 - }, - { - "epoch": 0.5654443164102216, - "grad_norm": 1.8592739519489285, - "learning_rate": 8.37366301135797e-07, - "loss": 1.16, - "step": 4171 - }, - { - "epoch": 0.5655798820578866, - "grad_norm": 2.098706668119273, - "learning_rate": 8.369330253704979e-07, - "loss": 1.1736, - "step": 4172 - }, - { - "epoch": 0.5657154477055514, - "grad_norm": 1.7827167182987291, - "learning_rate": 8.364997810513774e-07, - "loss": 1.117, - "step": 4173 - }, - { - "epoch": 0.5658510133532163, - "grad_norm": 1.5536052660128805, - "learning_rate": 8.360665682619837e-07, - "loss": 1.103, - "step": 4174 - }, - { - "epoch": 0.5659865790008812, - "grad_norm": 2.163344098445573, - "learning_rate": 8.356333870858581e-07, - "loss": 1.1776, - "step": 4175 - }, - { - "epoch": 0.566122144648546, - "grad_norm": 1.5011990653505216, - "learning_rate": 8.352002376065364e-07, - "loss": 1.1465, - "step": 4176 - }, - { - "epoch": 0.566257710296211, - "grad_norm": 1.6712060417978678, - "learning_rate": 8.347671199075481e-07, - "loss": 1.0928, - "step": 4177 - }, - { - "epoch": 0.5663932759438758, - "grad_norm": 1.625782910802641, - "learning_rate": 8.343340340724168e-07, - "loss": 1.1443, - "step": 4178 - }, - { - "epoch": 0.5665288415915407, - "grad_norm": 1.7425650368043235, - "learning_rate": 8.339009801846589e-07, - "loss": 1.1129, - "step": 4179 - }, - { - "epoch": 0.5666644072392056, - "grad_norm": 1.5623086467890757, - "learning_rate": 8.334679583277859e-07, - "loss": 1.1029, - "step": 4180 - }, - { - "epoch": 0.5667999728868705, - "grad_norm": 1.669647325571345, - "learning_rate": 8.330349685853027e-07, - "loss": 1.1569, - "step": 4181 - }, - { - "epoch": 0.5669355385345354, - "grad_norm": 1.9363508850863382, - "learning_rate": 8.326020110407079e-07, - "loss": 1.1243, - "step": 4182 - }, - { - "epoch": 0.5670711041822002, - "grad_norm": 1.6614716632060702, - "learning_rate": 8.32169085777494e-07, - "loss": 1.1381, - "step": 4183 - }, - { - "epoch": 0.5672066698298651, - "grad_norm": 1.6305941756939906, - "learning_rate": 8.317361928791467e-07, - "loss": 1.1705, - "step": 4184 - }, - { - "epoch": 0.56734223547753, - "grad_norm": 3.337869544609601, - "learning_rate": 8.313033324291469e-07, - "loss": 1.1815, - "step": 4185 - }, - { - "epoch": 0.5674778011251949, - "grad_norm": 1.6743581746854872, - "learning_rate": 8.308705045109675e-07, - "loss": 1.1718, - "step": 4186 - }, - { - "epoch": 0.5676133667728598, - "grad_norm": 1.4448781247406894, - "learning_rate": 8.304377092080766e-07, - "loss": 1.1437, - "step": 4187 - }, - { - "epoch": 0.5677489324205246, - "grad_norm": 1.746530944790666, - "learning_rate": 8.300049466039346e-07, - "loss": 1.1674, - "step": 4188 - }, - { - "epoch": 0.5678844980681895, - "grad_norm": 1.4344432250346677, - "learning_rate": 8.295722167819973e-07, - "loss": 1.1006, - "step": 4189 - }, - { - "epoch": 0.5680200637158545, - "grad_norm": 1.5648035508012366, - "learning_rate": 8.291395198257122e-07, - "loss": 1.17, - "step": 4190 - }, - { - "epoch": 0.5681556293635193, - "grad_norm": 1.4939719733154513, - "learning_rate": 8.287068558185224e-07, - "loss": 1.1511, - "step": 4191 - }, - { - "epoch": 0.5682911950111842, - "grad_norm": 2.400169251643613, - "learning_rate": 8.282742248438634e-07, - "loss": 1.1088, - "step": 4192 - }, - { - "epoch": 0.568426760658849, - "grad_norm": 2.0472999005281998, - "learning_rate": 8.278416269851643e-07, - "loss": 1.1442, - "step": 4193 - }, - { - "epoch": 0.5685623263065139, - "grad_norm": 1.6461841698630155, - "learning_rate": 8.274090623258489e-07, - "loss": 1.1466, - "step": 4194 - }, - { - "epoch": 0.5686978919541789, - "grad_norm": 1.6796920467250323, - "learning_rate": 8.269765309493328e-07, - "loss": 1.0951, - "step": 4195 - }, - { - "epoch": 0.5688334576018437, - "grad_norm": 2.810966115088102, - "learning_rate": 8.265440329390276e-07, - "loss": 1.1468, - "step": 4196 - }, - { - "epoch": 0.5689690232495086, - "grad_norm": 1.4327547937825558, - "learning_rate": 8.261115683783361e-07, - "loss": 1.1323, - "step": 4197 - }, - { - "epoch": 0.5691045888971734, - "grad_norm": 1.9876818419593691, - "learning_rate": 8.256791373506563e-07, - "loss": 1.0961, - "step": 4198 - }, - { - "epoch": 0.5692401545448383, - "grad_norm": 2.013163599788195, - "learning_rate": 8.252467399393786e-07, - "loss": 1.1274, - "step": 4199 - }, - { - "epoch": 0.5693757201925033, - "grad_norm": 1.5928753457701152, - "learning_rate": 8.248143762278879e-07, - "loss": 1.1363, - "step": 4200 - }, - { - "epoch": 0.5695112858401681, - "grad_norm": 1.773243487553507, - "learning_rate": 8.243820462995617e-07, - "loss": 1.1018, - "step": 4201 - }, - { - "epoch": 0.569646851487833, - "grad_norm": 1.4601234580762357, - "learning_rate": 8.239497502377719e-07, - "loss": 1.1452, - "step": 4202 - }, - { - "epoch": 0.5697824171354978, - "grad_norm": 1.5806721320809127, - "learning_rate": 8.235174881258827e-07, - "loss": 1.1571, - "step": 4203 - }, - { - "epoch": 0.5699179827831627, - "grad_norm": 1.6735299791926972, - "learning_rate": 8.230852600472533e-07, - "loss": 1.153, - "step": 4204 - }, - { - "epoch": 0.5700535484308277, - "grad_norm": 1.7210041460245629, - "learning_rate": 8.226530660852349e-07, - "loss": 1.1267, - "step": 4205 - }, - { - "epoch": 0.5701891140784925, - "grad_norm": 1.5281874384083363, - "learning_rate": 8.222209063231727e-07, - "loss": 1.1164, - "step": 4206 - }, - { - "epoch": 0.5703246797261574, - "grad_norm": 1.654562702534214, - "learning_rate": 8.217887808444056e-07, - "loss": 1.1311, - "step": 4207 - }, - { - "epoch": 0.5704602453738222, - "grad_norm": 2.2514823073567034, - "learning_rate": 8.213566897322651e-07, - "loss": 1.1246, - "step": 4208 - }, - { - "epoch": 0.5705958110214872, - "grad_norm": 1.5410104711355437, - "learning_rate": 8.209246330700772e-07, - "loss": 1.1317, - "step": 4209 - }, - { - "epoch": 0.5707313766691521, - "grad_norm": 1.5038826963086989, - "learning_rate": 8.204926109411601e-07, - "loss": 1.1324, - "step": 4210 - }, - { - "epoch": 0.5708669423168169, - "grad_norm": 1.6181056261096318, - "learning_rate": 8.20060623428826e-07, - "loss": 1.1493, - "step": 4211 - }, - { - "epoch": 0.5710025079644818, - "grad_norm": 1.5867712151457343, - "learning_rate": 8.196286706163804e-07, - "loss": 1.1086, - "step": 4212 - }, - { - "epoch": 0.5711380736121466, - "grad_norm": 7.89937960169837, - "learning_rate": 8.191967525871219e-07, - "loss": 1.1306, - "step": 4213 - }, - { - "epoch": 0.5712736392598116, - "grad_norm": 2.7494990459522626, - "learning_rate": 8.187648694243423e-07, - "loss": 1.1096, - "step": 4214 - }, - { - "epoch": 0.5714092049074765, - "grad_norm": 2.596186279221983, - "learning_rate": 8.183330212113273e-07, - "loss": 1.1424, - "step": 4215 - }, - { - "epoch": 0.5715447705551413, - "grad_norm": 1.6942443075832543, - "learning_rate": 8.179012080313549e-07, - "loss": 1.1007, - "step": 4216 - }, - { - "epoch": 0.5716803362028062, - "grad_norm": 2.114614284549197, - "learning_rate": 8.174694299676974e-07, - "loss": 1.1406, - "step": 4217 - }, - { - "epoch": 0.571815901850471, - "grad_norm": 1.4953138130059591, - "learning_rate": 8.170376871036193e-07, - "loss": 1.1567, - "step": 4218 - }, - { - "epoch": 0.571951467498136, - "grad_norm": 2.2252632151734866, - "learning_rate": 8.166059795223793e-07, - "loss": 1.128, - "step": 4219 - }, - { - "epoch": 0.5720870331458009, - "grad_norm": 1.6702072450320868, - "learning_rate": 8.161743073072286e-07, - "loss": 1.1381, - "step": 4220 - }, - { - "epoch": 0.5722225987934657, - "grad_norm": 1.905048118662072, - "learning_rate": 8.157426705414113e-07, - "loss": 1.1606, - "step": 4221 - }, - { - "epoch": 0.5723581644411306, - "grad_norm": 1.7560752911915052, - "learning_rate": 8.153110693081657e-07, - "loss": 1.1227, - "step": 4222 - }, - { - "epoch": 0.5724937300887954, - "grad_norm": 1.6801407620829023, - "learning_rate": 8.148795036907224e-07, - "loss": 1.1755, - "step": 4223 - }, - { - "epoch": 0.5726292957364604, - "grad_norm": 1.4717622371875947, - "learning_rate": 8.144479737723058e-07, - "loss": 1.1179, - "step": 4224 - }, - { - "epoch": 0.5727648613841253, - "grad_norm": 2.496532982338593, - "learning_rate": 8.140164796361327e-07, - "loss": 1.1824, - "step": 4225 - }, - { - "epoch": 0.5729004270317901, - "grad_norm": 1.9908106237316874, - "learning_rate": 8.135850213654135e-07, - "loss": 1.1219, - "step": 4226 - }, - { - "epoch": 0.573035992679455, - "grad_norm": 2.7403888772431717, - "learning_rate": 8.131535990433513e-07, - "loss": 1.1293, - "step": 4227 - }, - { - "epoch": 0.5731715583271199, - "grad_norm": 1.6356470130812542, - "learning_rate": 8.127222127531429e-07, - "loss": 1.1601, - "step": 4228 - }, - { - "epoch": 0.5733071239747848, - "grad_norm": 1.463159596502155, - "learning_rate": 8.122908625779771e-07, - "loss": 1.1211, - "step": 4229 - }, - { - "epoch": 0.5734426896224497, - "grad_norm": 1.9284702580462807, - "learning_rate": 8.118595486010372e-07, - "loss": 1.1596, - "step": 4230 - }, - { - "epoch": 0.5735782552701145, - "grad_norm": 2.0724159859428926, - "learning_rate": 8.114282709054978e-07, - "loss": 1.138, - "step": 4231 - }, - { - "epoch": 0.5737138209177794, - "grad_norm": 1.6391778383200706, - "learning_rate": 8.109970295745284e-07, - "loss": 1.1556, - "step": 4232 - }, - { - "epoch": 0.5738493865654443, - "grad_norm": 1.5004457825960955, - "learning_rate": 8.105658246912895e-07, - "loss": 1.107, - "step": 4233 - }, - { - "epoch": 0.5739849522131092, - "grad_norm": 2.2948444050508128, - "learning_rate": 8.101346563389363e-07, - "loss": 1.0961, - "step": 4234 - }, - { - "epoch": 0.5741205178607741, - "grad_norm": 1.5472861971138319, - "learning_rate": 8.097035246006161e-07, - "loss": 1.1634, - "step": 4235 - }, - { - "epoch": 0.5742560835084389, - "grad_norm": 1.7251769045441083, - "learning_rate": 8.092724295594685e-07, - "loss": 1.1368, - "step": 4236 - }, - { - "epoch": 0.5743916491561039, - "grad_norm": 1.5063305982521753, - "learning_rate": 8.088413712986279e-07, - "loss": 1.1586, - "step": 4237 - }, - { - "epoch": 0.5745272148037687, - "grad_norm": 2.021975362500354, - "learning_rate": 8.084103499012194e-07, - "loss": 1.1386, - "step": 4238 - }, - { - "epoch": 0.5746627804514336, - "grad_norm": 1.4317890020690693, - "learning_rate": 8.07979365450363e-07, - "loss": 1.1565, - "step": 4239 - }, - { - "epoch": 0.5747983460990985, - "grad_norm": 1.4902287978800997, - "learning_rate": 8.075484180291701e-07, - "loss": 1.141, - "step": 4240 - }, - { - "epoch": 0.5749339117467633, - "grad_norm": 1.4252132564713151, - "learning_rate": 8.071175077207457e-07, - "loss": 1.1601, - "step": 4241 - }, - { - "epoch": 0.5750694773944283, - "grad_norm": 1.6458633630693131, - "learning_rate": 8.066866346081873e-07, - "loss": 1.1294, - "step": 4242 - }, - { - "epoch": 0.5752050430420931, - "grad_norm": 1.6721287224998935, - "learning_rate": 8.062557987745856e-07, - "loss": 1.1416, - "step": 4243 - }, - { - "epoch": 0.575340608689758, - "grad_norm": 2.322995240814629, - "learning_rate": 8.058250003030238e-07, - "loss": 1.2021, - "step": 4244 - }, - { - "epoch": 0.5754761743374229, - "grad_norm": 1.7332403179183402, - "learning_rate": 8.053942392765781e-07, - "loss": 1.1506, - "step": 4245 - }, - { - "epoch": 0.5756117399850877, - "grad_norm": 1.7796671486068016, - "learning_rate": 8.049635157783169e-07, - "loss": 1.1379, - "step": 4246 - }, - { - "epoch": 0.5757473056327527, - "grad_norm": 1.7032840107375538, - "learning_rate": 8.045328298913024e-07, - "loss": 1.1451, - "step": 4247 - }, - { - "epoch": 0.5758828712804176, - "grad_norm": 1.3640802540371904, - "learning_rate": 8.041021816985887e-07, - "loss": 1.1151, - "step": 4248 - }, - { - "epoch": 0.5760184369280824, - "grad_norm": 7.19413828799656, - "learning_rate": 8.03671571283223e-07, - "loss": 1.1518, - "step": 4249 - }, - { - "epoch": 0.5761540025757473, - "grad_norm": 1.620661897286451, - "learning_rate": 8.03240998728245e-07, - "loss": 1.1461, - "step": 4250 - }, - { - "epoch": 0.5762895682234122, - "grad_norm": 1.6139080968962773, - "learning_rate": 8.028104641166871e-07, - "loss": 1.0997, - "step": 4251 - }, - { - "epoch": 0.5764251338710771, - "grad_norm": 2.9019730310763703, - "learning_rate": 8.02379967531575e-07, - "loss": 1.1382, - "step": 4252 - }, - { - "epoch": 0.576560699518742, - "grad_norm": 2.0892329676796115, - "learning_rate": 8.019495090559257e-07, - "loss": 1.1344, - "step": 4253 - }, - { - "epoch": 0.5766962651664068, - "grad_norm": 1.4074684769987518, - "learning_rate": 8.015190887727509e-07, - "loss": 1.1345, - "step": 4254 - }, - { - "epoch": 0.5768318308140717, - "grad_norm": 1.5951014016177483, - "learning_rate": 8.010887067650526e-07, - "loss": 1.1202, - "step": 4255 - }, - { - "epoch": 0.5769673964617366, - "grad_norm": 1.897722770254828, - "learning_rate": 8.006583631158275e-07, - "loss": 1.1493, - "step": 4256 - }, - { - "epoch": 0.5771029621094015, - "grad_norm": 1.6301711918749067, - "learning_rate": 8.002280579080632e-07, - "loss": 1.1269, - "step": 4257 - }, - { - "epoch": 0.5772385277570664, - "grad_norm": 2.7468641364337913, - "learning_rate": 7.997977912247413e-07, - "loss": 1.1407, - "step": 4258 - }, - { - "epoch": 0.5773740934047312, - "grad_norm": 1.8393965981427962, - "learning_rate": 7.993675631488348e-07, - "loss": 1.1069, - "step": 4259 - }, - { - "epoch": 0.5775096590523962, - "grad_norm": 1.763603688185099, - "learning_rate": 7.989373737633103e-07, - "loss": 1.1303, - "step": 4260 - }, - { - "epoch": 0.577645224700061, - "grad_norm": 2.238083483726902, - "learning_rate": 7.985072231511259e-07, - "loss": 1.0923, - "step": 4261 - }, - { - "epoch": 0.5777807903477259, - "grad_norm": 1.6713738548695456, - "learning_rate": 7.980771113952335e-07, - "loss": 1.1364, - "step": 4262 - }, - { - "epoch": 0.5779163559953908, - "grad_norm": 1.4280508893524158, - "learning_rate": 7.976470385785762e-07, - "loss": 1.117, - "step": 4263 - }, - { - "epoch": 0.5780519216430556, - "grad_norm": 2.063406498628777, - "learning_rate": 7.972170047840898e-07, - "loss": 1.1438, - "step": 4264 - }, - { - "epoch": 0.5781874872907206, - "grad_norm": 1.7038284750585204, - "learning_rate": 7.967870100947038e-07, - "loss": 1.1733, - "step": 4265 - }, - { - "epoch": 0.5783230529383854, - "grad_norm": 1.4788911891554104, - "learning_rate": 7.963570545933384e-07, - "loss": 1.1096, - "step": 4266 - }, - { - "epoch": 0.5784586185860503, - "grad_norm": 1.5248524396637237, - "learning_rate": 7.95927138362908e-07, - "loss": 1.14, - "step": 4267 - }, - { - "epoch": 0.5785941842337152, - "grad_norm": 1.44808614981907, - "learning_rate": 7.954972614863177e-07, - "loss": 1.1193, - "step": 4268 - }, - { - "epoch": 0.57872974988138, - "grad_norm": 1.571796570683775, - "learning_rate": 7.950674240464667e-07, - "loss": 1.1617, - "step": 4269 - }, - { - "epoch": 0.578865315529045, - "grad_norm": 1.560266235482675, - "learning_rate": 7.946376261262449e-07, - "loss": 1.1643, - "step": 4270 - }, - { - "epoch": 0.5790008811767098, - "grad_norm": 1.7762953289875087, - "learning_rate": 7.942078678085363e-07, - "loss": 1.1632, - "step": 4271 - }, - { - "epoch": 0.5791364468243747, - "grad_norm": 1.7980997350686991, - "learning_rate": 7.937781491762156e-07, - "loss": 1.1612, - "step": 4272 - }, - { - "epoch": 0.5792720124720396, - "grad_norm": 1.5645631841721752, - "learning_rate": 7.933484703121513e-07, - "loss": 1.123, - "step": 4273 - }, - { - "epoch": 0.5794075781197044, - "grad_norm": 1.5648777726810812, - "learning_rate": 7.929188312992031e-07, - "loss": 1.1403, - "step": 4274 - }, - { - "epoch": 0.5795431437673694, - "grad_norm": 1.695827195206307, - "learning_rate": 7.924892322202236e-07, - "loss": 1.1333, - "step": 4275 - }, - { - "epoch": 0.5796787094150342, - "grad_norm": 1.4096401955599196, - "learning_rate": 7.920596731580582e-07, - "loss": 1.1352, - "step": 4276 - }, - { - "epoch": 0.5798142750626991, - "grad_norm": 1.8199379759691554, - "learning_rate": 7.91630154195543e-07, - "loss": 1.1977, - "step": 4277 - }, - { - "epoch": 0.579949840710364, - "grad_norm": 1.7295429106368632, - "learning_rate": 7.912006754155078e-07, - "loss": 1.1732, - "step": 4278 - }, - { - "epoch": 0.5800854063580289, - "grad_norm": 1.5592435144538888, - "learning_rate": 7.907712369007743e-07, - "loss": 1.1438, - "step": 4279 - }, - { - "epoch": 0.5802209720056938, - "grad_norm": 10.959757451632926, - "learning_rate": 7.903418387341564e-07, - "loss": 1.155, - "step": 4280 - }, - { - "epoch": 0.5803565376533586, - "grad_norm": 1.5203454718613547, - "learning_rate": 7.899124809984595e-07, - "loss": 1.1434, - "step": 4281 - }, - { - "epoch": 0.5804921033010235, - "grad_norm": 3.042771593135852, - "learning_rate": 7.894831637764828e-07, - "loss": 1.144, - "step": 4282 - }, - { - "epoch": 0.5806276689486884, - "grad_norm": 1.6249831308186309, - "learning_rate": 7.890538871510156e-07, - "loss": 1.1369, - "step": 4283 - }, - { - "epoch": 0.5807632345963533, - "grad_norm": 1.6681471294950783, - "learning_rate": 7.886246512048418e-07, - "loss": 1.1226, - "step": 4284 - }, - { - "epoch": 0.5808988002440182, - "grad_norm": 1.9167182135649241, - "learning_rate": 7.88195456020735e-07, - "loss": 1.0976, - "step": 4285 - }, - { - "epoch": 0.581034365891683, - "grad_norm": 1.5133723891841295, - "learning_rate": 7.87766301681463e-07, - "loss": 1.1605, - "step": 4286 - }, - { - "epoch": 0.5811699315393479, - "grad_norm": 1.6405692004700636, - "learning_rate": 7.873371882697841e-07, - "loss": 1.1416, - "step": 4287 - }, - { - "epoch": 0.5813054971870129, - "grad_norm": 1.5984149408446044, - "learning_rate": 7.869081158684503e-07, - "loss": 1.1172, - "step": 4288 - }, - { - "epoch": 0.5814410628346777, - "grad_norm": 1.7098628239447555, - "learning_rate": 7.864790845602038e-07, - "loss": 1.0889, - "step": 4289 - }, - { - "epoch": 0.5815766284823426, - "grad_norm": 2.0623568522022797, - "learning_rate": 7.860500944277809e-07, - "loss": 1.1511, - "step": 4290 - }, - { - "epoch": 0.5817121941300074, - "grad_norm": 2.2907250466284057, - "learning_rate": 7.856211455539084e-07, - "loss": 1.1398, - "step": 4291 - }, - { - "epoch": 0.5818477597776723, - "grad_norm": 2.2533290548071663, - "learning_rate": 7.851922380213053e-07, - "loss": 1.1545, - "step": 4292 - }, - { - "epoch": 0.5819833254253373, - "grad_norm": 1.8949249691713341, - "learning_rate": 7.847633719126839e-07, - "loss": 1.1011, - "step": 4293 - }, - { - "epoch": 0.5821188910730021, - "grad_norm": 1.4896049363347317, - "learning_rate": 7.84334547310747e-07, - "loss": 1.1417, - "step": 4294 - }, - { - "epoch": 0.582254456720667, - "grad_norm": 1.565041425811932, - "learning_rate": 7.839057642981905e-07, - "loss": 1.1219, - "step": 4295 - }, - { - "epoch": 0.5823900223683318, - "grad_norm": 1.4918027463042942, - "learning_rate": 7.834770229577015e-07, - "loss": 1.1652, - "step": 4296 - }, - { - "epoch": 0.5825255880159967, - "grad_norm": 1.7305034445431453, - "learning_rate": 7.830483233719597e-07, - "loss": 1.1184, - "step": 4297 - }, - { - "epoch": 0.5826611536636617, - "grad_norm": 2.5394717980821415, - "learning_rate": 7.826196656236357e-07, - "loss": 1.1215, - "step": 4298 - }, - { - "epoch": 0.5827967193113265, - "grad_norm": 2.035242049058492, - "learning_rate": 7.821910497953939e-07, - "loss": 1.0981, - "step": 4299 - }, - { - "epoch": 0.5829322849589914, - "grad_norm": 1.591090260012412, - "learning_rate": 7.817624759698884e-07, - "loss": 1.087, - "step": 4300 - }, - { - "epoch": 0.5830678506066562, - "grad_norm": 1.6905698274986207, - "learning_rate": 7.813339442297671e-07, - "loss": 1.1427, - "step": 4301 - }, - { - "epoch": 0.5832034162543211, - "grad_norm": 1.7891287815521557, - "learning_rate": 7.809054546576686e-07, - "loss": 1.1777, - "step": 4302 - }, - { - "epoch": 0.5833389819019861, - "grad_norm": 2.1210088268155483, - "learning_rate": 7.804770073362236e-07, - "loss": 1.1312, - "step": 4303 - }, - { - "epoch": 0.5834745475496509, - "grad_norm": 1.6021508024678994, - "learning_rate": 7.800486023480551e-07, - "loss": 1.1681, - "step": 4304 - }, - { - "epoch": 0.5836101131973158, - "grad_norm": 2.695259499524025, - "learning_rate": 7.796202397757771e-07, - "loss": 1.157, - "step": 4305 - }, - { - "epoch": 0.5837456788449806, - "grad_norm": 2.8578066258284798, - "learning_rate": 7.791919197019967e-07, - "loss": 1.1347, - "step": 4306 - }, - { - "epoch": 0.5838812444926456, - "grad_norm": 1.6464290721404722, - "learning_rate": 7.787636422093114e-07, - "loss": 1.157, - "step": 4307 - }, - { - "epoch": 0.5840168101403105, - "grad_norm": 2.0344201628254206, - "learning_rate": 7.783354073803114e-07, - "loss": 1.1787, - "step": 4308 - }, - { - "epoch": 0.5841523757879753, - "grad_norm": 1.6305861142085205, - "learning_rate": 7.779072152975783e-07, - "loss": 1.1397, - "step": 4309 - }, - { - "epoch": 0.5842879414356402, - "grad_norm": 1.524317147137655, - "learning_rate": 7.774790660436857e-07, - "loss": 1.1073, - "step": 4310 - }, - { - "epoch": 0.584423507083305, - "grad_norm": 1.3770611198606157, - "learning_rate": 7.770509597011986e-07, - "loss": 1.0929, - "step": 4311 - }, - { - "epoch": 0.58455907273097, - "grad_norm": 1.973469144958103, - "learning_rate": 7.766228963526744e-07, - "loss": 1.1385, - "step": 4312 - }, - { - "epoch": 0.5846946383786349, - "grad_norm": 1.520234655809796, - "learning_rate": 7.761948760806611e-07, - "loss": 1.1532, - "step": 4313 - }, - { - "epoch": 0.5848302040262997, - "grad_norm": 1.601687456898116, - "learning_rate": 7.757668989676995e-07, - "loss": 1.1051, - "step": 4314 - }, - { - "epoch": 0.5849657696739646, - "grad_norm": 1.9037796229353834, - "learning_rate": 7.753389650963212e-07, - "loss": 1.1151, - "step": 4315 - }, - { - "epoch": 0.5851013353216294, - "grad_norm": 2.672164755295703, - "learning_rate": 7.749110745490505e-07, - "loss": 1.1267, - "step": 4316 - }, - { - "epoch": 0.5852369009692944, - "grad_norm": 2.050725049135422, - "learning_rate": 7.744832274084019e-07, - "loss": 1.1322, - "step": 4317 - }, - { - "epoch": 0.5853724666169593, - "grad_norm": 1.5040309928471614, - "learning_rate": 7.740554237568832e-07, - "loss": 1.113, - "step": 4318 - }, - { - "epoch": 0.5855080322646241, - "grad_norm": 1.75903738912333, - "learning_rate": 7.736276636769925e-07, - "loss": 1.1729, - "step": 4319 - }, - { - "epoch": 0.585643597912289, - "grad_norm": 1.351364069317839, - "learning_rate": 7.731999472512196e-07, - "loss": 1.092, - "step": 4320 - }, - { - "epoch": 0.5857791635599539, - "grad_norm": 1.6309755618727242, - "learning_rate": 7.727722745620471e-07, - "loss": 1.1529, - "step": 4321 - }, - { - "epoch": 0.5859147292076188, - "grad_norm": 1.614764759357974, - "learning_rate": 7.723446456919473e-07, - "loss": 1.1533, - "step": 4322 - }, - { - "epoch": 0.5860502948552837, - "grad_norm": 1.7771194328284223, - "learning_rate": 7.719170607233861e-07, - "loss": 1.1292, - "step": 4323 - }, - { - "epoch": 0.5861858605029485, - "grad_norm": 1.7698688787390124, - "learning_rate": 7.714895197388188e-07, - "loss": 1.1332, - "step": 4324 - }, - { - "epoch": 0.5863214261506134, - "grad_norm": 1.6592989781969671, - "learning_rate": 7.710620228206944e-07, - "loss": 1.1577, - "step": 4325 - }, - { - "epoch": 0.5864569917982784, - "grad_norm": 1.5341375398369674, - "learning_rate": 7.706345700514512e-07, - "loss": 1.1573, - "step": 4326 - }, - { - "epoch": 0.5865925574459432, - "grad_norm": 1.6532436560906063, - "learning_rate": 7.702071615135212e-07, - "loss": 1.0902, - "step": 4327 - }, - { - "epoch": 0.5867281230936081, - "grad_norm": 1.5144982943009389, - "learning_rate": 7.697797972893258e-07, - "loss": 1.1175, - "step": 4328 - }, - { - "epoch": 0.5868636887412729, - "grad_norm": 1.8236322455638587, - "learning_rate": 7.693524774612797e-07, - "loss": 1.131, - "step": 4329 - }, - { - "epoch": 0.5869992543889379, - "grad_norm": 1.5506456064949399, - "learning_rate": 7.689252021117874e-07, - "loss": 1.1377, - "step": 4330 - }, - { - "epoch": 0.5871348200366028, - "grad_norm": 1.6163931298025707, - "learning_rate": 7.684979713232461e-07, - "loss": 1.1076, - "step": 4331 - }, - { - "epoch": 0.5872703856842676, - "grad_norm": 1.4818819329234871, - "learning_rate": 7.680707851780433e-07, - "loss": 1.1365, - "step": 4332 - }, - { - "epoch": 0.5874059513319325, - "grad_norm": 1.533424051460587, - "learning_rate": 7.676436437585593e-07, - "loss": 1.1286, - "step": 4333 - }, - { - "epoch": 0.5875415169795973, - "grad_norm": 2.299075859363382, - "learning_rate": 7.672165471471643e-07, - "loss": 1.151, - "step": 4334 - }, - { - "epoch": 0.5876770826272623, - "grad_norm": 4.251272787675027, - "learning_rate": 7.667894954262205e-07, - "loss": 1.1494, - "step": 4335 - }, - { - "epoch": 0.5878126482749272, - "grad_norm": 1.848765311380358, - "learning_rate": 7.66362488678082e-07, - "loss": 1.1604, - "step": 4336 - }, - { - "epoch": 0.587948213922592, - "grad_norm": 1.6877032713732425, - "learning_rate": 7.659355269850929e-07, - "loss": 1.1418, - "step": 4337 - }, - { - "epoch": 0.5880837795702569, - "grad_norm": 1.6989338325825334, - "learning_rate": 7.655086104295904e-07, - "loss": 1.1388, - "step": 4338 - }, - { - "epoch": 0.5882193452179217, - "grad_norm": 1.7903979148453333, - "learning_rate": 7.65081739093901e-07, - "loss": 1.1389, - "step": 4339 - }, - { - "epoch": 0.5883549108655867, - "grad_norm": 2.045324160508782, - "learning_rate": 7.646549130603439e-07, - "loss": 1.1631, - "step": 4340 - }, - { - "epoch": 0.5884904765132516, - "grad_norm": 1.7958708294981742, - "learning_rate": 7.642281324112292e-07, - "loss": 1.1305, - "step": 4341 - }, - { - "epoch": 0.5886260421609164, - "grad_norm": 1.5537707445583453, - "learning_rate": 7.638013972288581e-07, - "loss": 1.098, - "step": 4342 - }, - { - "epoch": 0.5887616078085813, - "grad_norm": 1.9699645418138658, - "learning_rate": 7.63374707595523e-07, - "loss": 1.1095, - "step": 4343 - }, - { - "epoch": 0.5888971734562461, - "grad_norm": 1.821184115159573, - "learning_rate": 7.629480635935082e-07, - "loss": 1.1399, - "step": 4344 - }, - { - "epoch": 0.5890327391039111, - "grad_norm": 1.7340863992677047, - "learning_rate": 7.625214653050874e-07, - "loss": 1.123, - "step": 4345 - }, - { - "epoch": 0.589168304751576, - "grad_norm": 2.0347253277602677, - "learning_rate": 7.620949128125282e-07, - "loss": 1.1293, - "step": 4346 - }, - { - "epoch": 0.5893038703992408, - "grad_norm": 3.352548394662374, - "learning_rate": 7.616684061980867e-07, - "loss": 1.1446, - "step": 4347 - }, - { - "epoch": 0.5894394360469057, - "grad_norm": 1.5710689847454289, - "learning_rate": 7.612419455440119e-07, - "loss": 1.1527, - "step": 4348 - }, - { - "epoch": 0.5895750016945706, - "grad_norm": 1.6919199296393415, - "learning_rate": 7.608155309325435e-07, - "loss": 1.1438, - "step": 4349 - }, - { - "epoch": 0.5897105673422355, - "grad_norm": 1.5288999874041473, - "learning_rate": 7.603891624459114e-07, - "loss": 1.1181, - "step": 4350 - }, - { - "epoch": 0.5898461329899004, - "grad_norm": 1.4895373443797895, - "learning_rate": 7.599628401663384e-07, - "loss": 1.168, - "step": 4351 - }, - { - "epoch": 0.5899816986375652, - "grad_norm": 1.6001538605322403, - "learning_rate": 7.595365641760367e-07, - "loss": 1.1533, - "step": 4352 - }, - { - "epoch": 0.5901172642852301, - "grad_norm": 1.5849582943377434, - "learning_rate": 7.591103345572109e-07, - "loss": 1.103, - "step": 4353 - }, - { - "epoch": 0.590252829932895, - "grad_norm": 1.613132409144807, - "learning_rate": 7.58684151392055e-07, - "loss": 1.1587, - "step": 4354 - }, - { - "epoch": 0.5903883955805599, - "grad_norm": 1.662308406391821, - "learning_rate": 7.582580147627562e-07, - "loss": 1.163, - "step": 4355 - }, - { - "epoch": 0.5905239612282248, - "grad_norm": 2.5664563299579024, - "learning_rate": 7.578319247514906e-07, - "loss": 1.0964, - "step": 4356 - }, - { - "epoch": 0.5906595268758896, - "grad_norm": 1.4563353973700353, - "learning_rate": 7.574058814404272e-07, - "loss": 1.1526, - "step": 4357 - }, - { - "epoch": 0.5907950925235546, - "grad_norm": 1.82174490414821, - "learning_rate": 7.569798849117241e-07, - "loss": 1.1545, - "step": 4358 - }, - { - "epoch": 0.5909306581712194, - "grad_norm": 1.8345189938798672, - "learning_rate": 7.565539352475325e-07, - "loss": 1.1422, - "step": 4359 - }, - { - "epoch": 0.5910662238188843, - "grad_norm": 1.6173024548892285, - "learning_rate": 7.561280325299924e-07, - "loss": 1.1083, - "step": 4360 - }, - { - "epoch": 0.5912017894665492, - "grad_norm": 1.6459614752831229, - "learning_rate": 7.557021768412366e-07, - "loss": 1.1533, - "step": 4361 - }, - { - "epoch": 0.591337355114214, - "grad_norm": 1.5593394005804868, - "learning_rate": 7.552763682633877e-07, - "loss": 1.1077, - "step": 4362 - }, - { - "epoch": 0.591472920761879, - "grad_norm": 1.9138300348443826, - "learning_rate": 7.548506068785589e-07, - "loss": 1.0824, - "step": 4363 - }, - { - "epoch": 0.5916084864095438, - "grad_norm": 2.1602719300077484, - "learning_rate": 7.544248927688561e-07, - "loss": 1.1568, - "step": 4364 - }, - { - "epoch": 0.5917440520572087, - "grad_norm": 1.7849413613539333, - "learning_rate": 7.539992260163735e-07, - "loss": 1.1214, - "step": 4365 - }, - { - "epoch": 0.5918796177048736, - "grad_norm": 1.8800649555140474, - "learning_rate": 7.535736067031991e-07, - "loss": 1.1737, - "step": 4366 - }, - { - "epoch": 0.5920151833525384, - "grad_norm": 1.6494114195479546, - "learning_rate": 7.531480349114088e-07, - "loss": 1.122, - "step": 4367 - }, - { - "epoch": 0.5921507490002034, - "grad_norm": 1.529138063293265, - "learning_rate": 7.527225107230721e-07, - "loss": 1.1351, - "step": 4368 - }, - { - "epoch": 0.5922863146478682, - "grad_norm": 1.5130405444146295, - "learning_rate": 7.52297034220247e-07, - "loss": 1.1283, - "step": 4369 - }, - { - "epoch": 0.5924218802955331, - "grad_norm": 4.706591519487228, - "learning_rate": 7.518716054849836e-07, - "loss": 1.1528, - "step": 4370 - }, - { - "epoch": 0.592557445943198, - "grad_norm": 1.7455720069843457, - "learning_rate": 7.514462245993225e-07, - "loss": 1.1469, - "step": 4371 - }, - { - "epoch": 0.5926930115908629, - "grad_norm": 1.4701551848668586, - "learning_rate": 7.51020891645295e-07, - "loss": 1.1142, - "step": 4372 - }, - { - "epoch": 0.5928285772385278, - "grad_norm": 2.3700004429718824, - "learning_rate": 7.505956067049232e-07, - "loss": 1.1661, - "step": 4373 - }, - { - "epoch": 0.5929641428861926, - "grad_norm": 1.6729472670703416, - "learning_rate": 7.501703698602202e-07, - "loss": 1.1408, - "step": 4374 - }, - { - "epoch": 0.5930997085338575, - "grad_norm": 1.8872346825549602, - "learning_rate": 7.497451811931891e-07, - "loss": 1.1285, - "step": 4375 - }, - { - "epoch": 0.5932352741815224, - "grad_norm": 1.5298969800969362, - "learning_rate": 7.493200407858245e-07, - "loss": 1.1283, - "step": 4376 - }, - { - "epoch": 0.5933708398291873, - "grad_norm": 2.273177413658046, - "learning_rate": 7.488949487201112e-07, - "loss": 1.1575, - "step": 4377 - }, - { - "epoch": 0.5935064054768522, - "grad_norm": 1.8967857670056962, - "learning_rate": 7.48469905078025e-07, - "loss": 1.1501, - "step": 4378 - }, - { - "epoch": 0.593641971124517, - "grad_norm": 1.5249667169070003, - "learning_rate": 7.480449099415322e-07, - "loss": 1.1399, - "step": 4379 - }, - { - "epoch": 0.5937775367721819, - "grad_norm": 1.8640791305739484, - "learning_rate": 7.476199633925894e-07, - "loss": 1.1326, - "step": 4380 - }, - { - "epoch": 0.5939131024198469, - "grad_norm": 1.5237063717471913, - "learning_rate": 7.471950655131451e-07, - "loss": 1.1193, - "step": 4381 - }, - { - "epoch": 0.5940486680675117, - "grad_norm": 1.5819151628531438, - "learning_rate": 7.467702163851363e-07, - "loss": 1.1204, - "step": 4382 - }, - { - "epoch": 0.5941842337151766, - "grad_norm": 1.6613672978583196, - "learning_rate": 7.463454160904927e-07, - "loss": 1.1269, - "step": 4383 - }, - { - "epoch": 0.5943197993628414, - "grad_norm": 8.20259787349857, - "learning_rate": 7.459206647111331e-07, - "loss": 1.1576, - "step": 4384 - }, - { - "epoch": 0.5944553650105063, - "grad_norm": 1.5810161301637762, - "learning_rate": 7.454959623289682e-07, - "loss": 1.1073, - "step": 4385 - }, - { - "epoch": 0.5945909306581713, - "grad_norm": 1.9423153561301443, - "learning_rate": 7.450713090258976e-07, - "loss": 1.0947, - "step": 4386 - }, - { - "epoch": 0.5947264963058361, - "grad_norm": 6.904488034547616, - "learning_rate": 7.44646704883813e-07, - "loss": 1.1489, - "step": 4387 - }, - { - "epoch": 0.594862061953501, - "grad_norm": 1.7378081900089435, - "learning_rate": 7.442221499845955e-07, - "loss": 1.1161, - "step": 4388 - }, - { - "epoch": 0.5949976276011658, - "grad_norm": 1.5454608770556548, - "learning_rate": 7.437976444101177e-07, - "loss": 1.1377, - "step": 4389 - }, - { - "epoch": 0.5951331932488307, - "grad_norm": 1.8708632468726125, - "learning_rate": 7.433731882422418e-07, - "loss": 1.1085, - "step": 4390 - }, - { - "epoch": 0.5952687588964957, - "grad_norm": 1.5704695990839246, - "learning_rate": 7.429487815628206e-07, - "loss": 1.1148, - "step": 4391 - }, - { - "epoch": 0.5954043245441605, - "grad_norm": 2.820885748485331, - "learning_rate": 7.425244244536981e-07, - "loss": 1.1692, - "step": 4392 - }, - { - "epoch": 0.5955398901918254, - "grad_norm": 1.6911282766156912, - "learning_rate": 7.421001169967076e-07, - "loss": 1.1603, - "step": 4393 - }, - { - "epoch": 0.5956754558394902, - "grad_norm": 1.7945142163729557, - "learning_rate": 7.416758592736742e-07, - "loss": 1.1097, - "step": 4394 - }, - { - "epoch": 0.5958110214871551, - "grad_norm": 1.7856664175357966, - "learning_rate": 7.41251651366412e-07, - "loss": 1.1334, - "step": 4395 - }, - { - "epoch": 0.5959465871348201, - "grad_norm": 2.9979440894555265, - "learning_rate": 7.408274933567267e-07, - "loss": 1.1217, - "step": 4396 - }, - { - "epoch": 0.5960821527824849, - "grad_norm": 1.4965119862095082, - "learning_rate": 7.404033853264131e-07, - "loss": 1.1179, - "step": 4397 - }, - { - "epoch": 0.5962177184301498, - "grad_norm": 1.5682233369074923, - "learning_rate": 7.399793273572578e-07, - "loss": 1.185, - "step": 4398 - }, - { - "epoch": 0.5963532840778146, - "grad_norm": 3.60571628703349, - "learning_rate": 7.395553195310364e-07, - "loss": 1.1406, - "step": 4399 - }, - { - "epoch": 0.5964888497254796, - "grad_norm": 1.532035864699447, - "learning_rate": 7.391313619295163e-07, - "loss": 1.1085, - "step": 4400 - }, - { - "epoch": 0.5966244153731445, - "grad_norm": 1.7899527918693714, - "learning_rate": 7.387074546344536e-07, - "loss": 1.1383, - "step": 4401 - }, - { - "epoch": 0.5967599810208093, - "grad_norm": 1.7814107476557575, - "learning_rate": 7.382835977275959e-07, - "loss": 1.1467, - "step": 4402 - }, - { - "epoch": 0.5968955466684742, - "grad_norm": 1.457756229401108, - "learning_rate": 7.378597912906805e-07, - "loss": 1.1338, - "step": 4403 - }, - { - "epoch": 0.5970311123161391, - "grad_norm": 2.794423734553538, - "learning_rate": 7.374360354054348e-07, - "loss": 1.1066, - "step": 4404 - }, - { - "epoch": 0.597166677963804, - "grad_norm": 1.6780093986153028, - "learning_rate": 7.370123301535777e-07, - "loss": 1.146, - "step": 4405 - }, - { - "epoch": 0.5973022436114689, - "grad_norm": 1.7690678007593121, - "learning_rate": 7.365886756168165e-07, - "loss": 1.1793, - "step": 4406 - }, - { - "epoch": 0.5974378092591337, - "grad_norm": 1.801333359381476, - "learning_rate": 7.3616507187685e-07, - "loss": 1.1563, - "step": 4407 - }, - { - "epoch": 0.5975733749067986, - "grad_norm": 1.604817352990756, - "learning_rate": 7.357415190153666e-07, - "loss": 1.0941, - "step": 4408 - }, - { - "epoch": 0.5977089405544636, - "grad_norm": 1.7223154704450543, - "learning_rate": 7.353180171140455e-07, - "loss": 1.1457, - "step": 4409 - }, - { - "epoch": 0.5978445062021284, - "grad_norm": 3.4203302021784294, - "learning_rate": 7.348945662545556e-07, - "loss": 1.1201, - "step": 4410 - }, - { - "epoch": 0.5979800718497933, - "grad_norm": 1.5902162714736003, - "learning_rate": 7.34471166518556e-07, - "loss": 1.1488, - "step": 4411 - }, - { - "epoch": 0.5981156374974581, - "grad_norm": 2.368560881960591, - "learning_rate": 7.340478179876957e-07, - "loss": 1.1618, - "step": 4412 - }, - { - "epoch": 0.598251203145123, - "grad_norm": 2.349930713769084, - "learning_rate": 7.336245207436147e-07, - "loss": 1.1374, - "step": 4413 - }, - { - "epoch": 0.598386768792788, - "grad_norm": 3.693893642274647, - "learning_rate": 7.332012748679419e-07, - "loss": 1.1097, - "step": 4414 - }, - { - "epoch": 0.5985223344404528, - "grad_norm": 2.123182372374166, - "learning_rate": 7.327780804422977e-07, - "loss": 1.1539, - "step": 4415 - }, - { - "epoch": 0.5986579000881177, - "grad_norm": 3.732657885234626, - "learning_rate": 7.32354937548291e-07, - "loss": 1.1636, - "step": 4416 - }, - { - "epoch": 0.5987934657357825, - "grad_norm": 1.5636966133659724, - "learning_rate": 7.319318462675223e-07, - "loss": 1.1135, - "step": 4417 - }, - { - "epoch": 0.5989290313834474, - "grad_norm": 2.1412063028498087, - "learning_rate": 7.315088066815809e-07, - "loss": 1.1184, - "step": 4418 - }, - { - "epoch": 0.5990645970311124, - "grad_norm": 2.10131816222588, - "learning_rate": 7.310858188720466e-07, - "loss": 1.1204, - "step": 4419 - }, - { - "epoch": 0.5992001626787772, - "grad_norm": 1.4857802694310593, - "learning_rate": 7.306628829204897e-07, - "loss": 1.128, - "step": 4420 - }, - { - "epoch": 0.5993357283264421, - "grad_norm": 1.6812756022268571, - "learning_rate": 7.302399989084695e-07, - "loss": 1.1633, - "step": 4421 - }, - { - "epoch": 0.5994712939741069, - "grad_norm": 2.6414115975868806, - "learning_rate": 7.298171669175365e-07, - "loss": 1.1341, - "step": 4422 - }, - { - "epoch": 0.5996068596217718, - "grad_norm": 1.5279092745997136, - "learning_rate": 7.293943870292299e-07, - "loss": 1.1402, - "step": 4423 - }, - { - "epoch": 0.5997424252694368, - "grad_norm": 1.6596064799672616, - "learning_rate": 7.289716593250798e-07, - "loss": 1.1523, - "step": 4424 - }, - { - "epoch": 0.5998779909171016, - "grad_norm": 1.5881054060334978, - "learning_rate": 7.285489838866057e-07, - "loss": 1.1569, - "step": 4425 - }, - { - "epoch": 0.6000135565647665, - "grad_norm": 1.5051428091503156, - "learning_rate": 7.281263607953177e-07, - "loss": 1.1046, - "step": 4426 - }, - { - "epoch": 0.6001491222124313, - "grad_norm": 1.6633557026924215, - "learning_rate": 7.277037901327145e-07, - "loss": 1.1129, - "step": 4427 - }, - { - "epoch": 0.6002846878600963, - "grad_norm": 1.6845907911493256, - "learning_rate": 7.272812719802865e-07, - "loss": 1.1451, - "step": 4428 - }, - { - "epoch": 0.6004202535077612, - "grad_norm": 1.7637691769981811, - "learning_rate": 7.268588064195122e-07, - "loss": 1.1296, - "step": 4429 - }, - { - "epoch": 0.600555819155426, - "grad_norm": 1.573491249240248, - "learning_rate": 7.264363935318612e-07, - "loss": 1.1393, - "step": 4430 - }, - { - "epoch": 0.6006913848030909, - "grad_norm": 1.6447310297629474, - "learning_rate": 7.260140333987925e-07, - "loss": 1.1392, - "step": 4431 - }, - { - "epoch": 0.6008269504507557, - "grad_norm": 1.5968441985542767, - "learning_rate": 7.255917261017543e-07, - "loss": 1.1326, - "step": 4432 - }, - { - "epoch": 0.6009625160984207, - "grad_norm": 1.9299702959857667, - "learning_rate": 7.25169471722186e-07, - "loss": 1.1295, - "step": 4433 - }, - { - "epoch": 0.6010980817460856, - "grad_norm": 1.8642916467071857, - "learning_rate": 7.247472703415154e-07, - "loss": 1.1499, - "step": 4434 - }, - { - "epoch": 0.6012336473937504, - "grad_norm": 1.5785690160837518, - "learning_rate": 7.243251220411612e-07, - "loss": 1.1971, - "step": 4435 - }, - { - "epoch": 0.6013692130414153, - "grad_norm": 1.4392031166462547, - "learning_rate": 7.23903026902531e-07, - "loss": 1.1415, - "step": 4436 - }, - { - "epoch": 0.6015047786890801, - "grad_norm": 7.716786804473418, - "learning_rate": 7.234809850070231e-07, - "loss": 1.0925, - "step": 4437 - }, - { - "epoch": 0.6016403443367451, - "grad_norm": 1.494721558862639, - "learning_rate": 7.230589964360242e-07, - "loss": 1.122, - "step": 4438 - }, - { - "epoch": 0.60177590998441, - "grad_norm": 1.6122072682965642, - "learning_rate": 7.226370612709119e-07, - "loss": 1.1408, - "step": 4439 - }, - { - "epoch": 0.6019114756320748, - "grad_norm": 1.7543570221656362, - "learning_rate": 7.222151795930528e-07, - "loss": 1.1208, - "step": 4440 - }, - { - "epoch": 0.6020470412797397, - "grad_norm": 1.7173564091678917, - "learning_rate": 7.21793351483804e-07, - "loss": 1.165, - "step": 4441 - }, - { - "epoch": 0.6021826069274046, - "grad_norm": 1.8338264709594905, - "learning_rate": 7.213715770245108e-07, - "loss": 1.1592, - "step": 4442 - }, - { - "epoch": 0.6023181725750695, - "grad_norm": 1.6184793816179404, - "learning_rate": 7.209498562965101e-07, - "loss": 1.1691, - "step": 4443 - }, - { - "epoch": 0.6024537382227344, - "grad_norm": 2.8146334167348135, - "learning_rate": 7.205281893811264e-07, - "loss": 1.1361, - "step": 4444 - }, - { - "epoch": 0.6025893038703992, - "grad_norm": 1.4896496195174955, - "learning_rate": 7.201065763596758e-07, - "loss": 1.1365, - "step": 4445 - }, - { - "epoch": 0.6027248695180641, - "grad_norm": 1.61393152059539, - "learning_rate": 7.196850173134628e-07, - "loss": 1.1163, - "step": 4446 - }, - { - "epoch": 0.602860435165729, - "grad_norm": 3.113226781607572, - "learning_rate": 7.192635123237809e-07, - "loss": 1.1189, - "step": 4447 - }, - { - "epoch": 0.6029960008133939, - "grad_norm": 1.5215118815895317, - "learning_rate": 7.188420614719152e-07, - "loss": 1.157, - "step": 4448 - }, - { - "epoch": 0.6031315664610588, - "grad_norm": 1.7208484890966995, - "learning_rate": 7.184206648391381e-07, - "loss": 1.1488, - "step": 4449 - }, - { - "epoch": 0.6032671321087236, - "grad_norm": 1.955896913348491, - "learning_rate": 7.179993225067136e-07, - "loss": 1.1671, - "step": 4450 - }, - { - "epoch": 0.6034026977563886, - "grad_norm": 2.05372816243167, - "learning_rate": 7.175780345558934e-07, - "loss": 1.1218, - "step": 4451 - }, - { - "epoch": 0.6035382634040534, - "grad_norm": 1.776834741907752, - "learning_rate": 7.171568010679203e-07, - "loss": 1.1305, - "step": 4452 - }, - { - "epoch": 0.6036738290517183, - "grad_norm": 1.837984027707176, - "learning_rate": 7.167356221240251e-07, - "loss": 1.126, - "step": 4453 - }, - { - "epoch": 0.6038093946993832, - "grad_norm": 1.4575844895021117, - "learning_rate": 7.163144978054296e-07, - "loss": 1.1416, - "step": 4454 - }, - { - "epoch": 0.603944960347048, - "grad_norm": 1.7511720658362409, - "learning_rate": 7.158934281933435e-07, - "loss": 1.1108, - "step": 4455 - }, - { - "epoch": 0.604080525994713, - "grad_norm": 1.5939746925819607, - "learning_rate": 7.154724133689676e-07, - "loss": 1.1491, - "step": 4456 - }, - { - "epoch": 0.6042160916423778, - "grad_norm": 1.5395649970225227, - "learning_rate": 7.150514534134905e-07, - "loss": 1.169, - "step": 4457 - }, - { - "epoch": 0.6043516572900427, - "grad_norm": 1.7011008395799496, - "learning_rate": 7.146305484080916e-07, - "loss": 1.1111, - "step": 4458 - }, - { - "epoch": 0.6044872229377076, - "grad_norm": 1.8370073996456635, - "learning_rate": 7.142096984339392e-07, - "loss": 1.1435, - "step": 4459 - }, - { - "epoch": 0.6046227885853724, - "grad_norm": 2.425330745885285, - "learning_rate": 7.137889035721898e-07, - "loss": 1.1135, - "step": 4460 - }, - { - "epoch": 0.6047583542330374, - "grad_norm": 1.8588056605775678, - "learning_rate": 7.133681639039917e-07, - "loss": 1.1276, - "step": 4461 - }, - { - "epoch": 0.6048939198807022, - "grad_norm": 1.6227868404709163, - "learning_rate": 7.129474795104802e-07, - "loss": 1.1399, - "step": 4462 - }, - { - "epoch": 0.6050294855283671, - "grad_norm": 1.6220416176634014, - "learning_rate": 7.12526850472782e-07, - "loss": 1.1435, - "step": 4463 - }, - { - "epoch": 0.605165051176032, - "grad_norm": 2.218505756586712, - "learning_rate": 7.121062768720109e-07, - "loss": 1.0991, - "step": 4464 - }, - { - "epoch": 0.6053006168236968, - "grad_norm": 1.6300872815605245, - "learning_rate": 7.116857587892724e-07, - "loss": 1.0958, - "step": 4465 - }, - { - "epoch": 0.6054361824713618, - "grad_norm": 1.5524200430634658, - "learning_rate": 7.112652963056589e-07, - "loss": 1.0963, - "step": 4466 - }, - { - "epoch": 0.6055717481190266, - "grad_norm": 2.0296785891948548, - "learning_rate": 7.108448895022544e-07, - "loss": 1.1001, - "step": 4467 - }, - { - "epoch": 0.6057073137666915, - "grad_norm": 1.75282875803731, - "learning_rate": 7.104245384601303e-07, - "loss": 1.1103, - "step": 4468 - }, - { - "epoch": 0.6058428794143564, - "grad_norm": 1.9988048030585461, - "learning_rate": 7.100042432603481e-07, - "loss": 1.1247, - "step": 4469 - }, - { - "epoch": 0.6059784450620213, - "grad_norm": 1.6007947113350756, - "learning_rate": 7.095840039839587e-07, - "loss": 1.137, - "step": 4470 - }, - { - "epoch": 0.6061140107096862, - "grad_norm": 1.7048483160488441, - "learning_rate": 7.091638207120015e-07, - "loss": 1.1557, - "step": 4471 - }, - { - "epoch": 0.606249576357351, - "grad_norm": 1.6530385538340406, - "learning_rate": 7.087436935255058e-07, - "loss": 1.1396, - "step": 4472 - }, - { - "epoch": 0.6063851420050159, - "grad_norm": 2.125404356642552, - "learning_rate": 7.083236225054901e-07, - "loss": 1.1581, - "step": 4473 - }, - { - "epoch": 0.6065207076526808, - "grad_norm": 1.8701296270779124, - "learning_rate": 7.079036077329612e-07, - "loss": 1.1368, - "step": 4474 - }, - { - "epoch": 0.6066562733003457, - "grad_norm": 1.3903795147450222, - "learning_rate": 7.074836492889158e-07, - "loss": 1.1501, - "step": 4475 - }, - { - "epoch": 0.6067918389480106, - "grad_norm": 1.7006662160514403, - "learning_rate": 7.070637472543397e-07, - "loss": 1.1423, - "step": 4476 - }, - { - "epoch": 0.6069274045956754, - "grad_norm": 1.5354216727414138, - "learning_rate": 7.066439017102076e-07, - "loss": 1.1383, - "step": 4477 - }, - { - "epoch": 0.6070629702433403, - "grad_norm": 3.238427008717177, - "learning_rate": 7.062241127374838e-07, - "loss": 1.0689, - "step": 4478 - }, - { - "epoch": 0.6071985358910053, - "grad_norm": 1.745015301833533, - "learning_rate": 7.058043804171203e-07, - "loss": 1.1508, - "step": 4479 - }, - { - "epoch": 0.6073341015386701, - "grad_norm": 1.5442179376694685, - "learning_rate": 7.053847048300603e-07, - "loss": 1.0993, - "step": 4480 - }, - { - "epoch": 0.607469667186335, - "grad_norm": 1.6567685812751853, - "learning_rate": 7.04965086057234e-07, - "loss": 1.1397, - "step": 4481 - }, - { - "epoch": 0.6076052328339999, - "grad_norm": 1.5174005411124134, - "learning_rate": 7.045455241795624e-07, - "loss": 1.1245, - "step": 4482 - }, - { - "epoch": 0.6077407984816647, - "grad_norm": 1.5344725671430652, - "learning_rate": 7.041260192779539e-07, - "loss": 1.1494, - "step": 4483 - }, - { - "epoch": 0.6078763641293297, - "grad_norm": 1.5032059523780748, - "learning_rate": 7.037065714333075e-07, - "loss": 1.1414, - "step": 4484 - }, - { - "epoch": 0.6080119297769945, - "grad_norm": 1.7617568134348471, - "learning_rate": 7.032871807265096e-07, - "loss": 1.1451, - "step": 4485 - }, - { - "epoch": 0.6081474954246594, - "grad_norm": 1.8124716188595564, - "learning_rate": 7.028678472384373e-07, - "loss": 1.1362, - "step": 4486 - }, - { - "epoch": 0.6082830610723243, - "grad_norm": 1.50831067470735, - "learning_rate": 7.02448571049955e-07, - "loss": 1.1301, - "step": 4487 - }, - { - "epoch": 0.6084186267199891, - "grad_norm": 1.6425129359874426, - "learning_rate": 7.020293522419168e-07, - "loss": 1.1432, - "step": 4488 - }, - { - "epoch": 0.6085541923676541, - "grad_norm": 1.6645939157235385, - "learning_rate": 7.016101908951663e-07, - "loss": 1.1608, - "step": 4489 - }, - { - "epoch": 0.6086897580153189, - "grad_norm": 1.6265322101399269, - "learning_rate": 7.011910870905349e-07, - "loss": 1.0763, - "step": 4490 - }, - { - "epoch": 0.6088253236629838, - "grad_norm": 2.8859283740483352, - "learning_rate": 7.00772040908844e-07, - "loss": 1.1158, - "step": 4491 - }, - { - "epoch": 0.6089608893106487, - "grad_norm": 1.8900874879735372, - "learning_rate": 7.003530524309025e-07, - "loss": 1.1095, - "step": 4492 - }, - { - "epoch": 0.6090964549583135, - "grad_norm": 1.5605791947669456, - "learning_rate": 6.999341217375103e-07, - "loss": 1.12, - "step": 4493 - }, - { - "epoch": 0.6092320206059785, - "grad_norm": 1.6171384525146804, - "learning_rate": 6.995152489094535e-07, - "loss": 1.1414, - "step": 4494 - }, - { - "epoch": 0.6093675862536433, - "grad_norm": 1.6629559288068192, - "learning_rate": 6.990964340275095e-07, - "loss": 1.1683, - "step": 4495 - }, - { - "epoch": 0.6095031519013082, - "grad_norm": 1.769845179887708, - "learning_rate": 6.986776771724427e-07, - "loss": 1.1658, - "step": 4496 - }, - { - "epoch": 0.6096387175489731, - "grad_norm": 1.4339589726480508, - "learning_rate": 6.982589784250077e-07, - "loss": 1.1551, - "step": 4497 - }, - { - "epoch": 0.609774283196638, - "grad_norm": 1.5568245261308535, - "learning_rate": 6.978403378659466e-07, - "loss": 1.1341, - "step": 4498 - }, - { - "epoch": 0.6099098488443029, - "grad_norm": 1.558099672003262, - "learning_rate": 6.974217555759913e-07, - "loss": 1.1273, - "step": 4499 - }, - { - "epoch": 0.6100454144919677, - "grad_norm": 2.955644305688091, - "learning_rate": 6.970032316358623e-07, - "loss": 1.1484, - "step": 4500 - }, - { - "epoch": 0.6101809801396326, - "grad_norm": 1.5434271191731803, - "learning_rate": 6.965847661262681e-07, - "loss": 1.157, - "step": 4501 - }, - { - "epoch": 0.6103165457872975, - "grad_norm": 2.186985335608288, - "learning_rate": 6.96166359127907e-07, - "loss": 1.1012, - "step": 4502 - }, - { - "epoch": 0.6104521114349624, - "grad_norm": 1.8578194716237213, - "learning_rate": 6.957480107214648e-07, - "loss": 1.1162, - "step": 4503 - }, - { - "epoch": 0.6105876770826273, - "grad_norm": 1.9292318932061505, - "learning_rate": 6.953297209876174e-07, - "loss": 1.1245, - "step": 4504 - }, - { - "epoch": 0.6107232427302921, - "grad_norm": 1.7113940673614543, - "learning_rate": 6.949114900070284e-07, - "loss": 1.0758, - "step": 4505 - }, - { - "epoch": 0.610858808377957, - "grad_norm": 1.5202679227228135, - "learning_rate": 6.944933178603503e-07, - "loss": 1.1344, - "step": 4506 - }, - { - "epoch": 0.610994374025622, - "grad_norm": 19.521904748691686, - "learning_rate": 6.940752046282242e-07, - "loss": 1.1485, - "step": 4507 - }, - { - "epoch": 0.6111299396732868, - "grad_norm": 2.2389932520466154, - "learning_rate": 6.936571503912803e-07, - "loss": 1.1467, - "step": 4508 - }, - { - "epoch": 0.6112655053209517, - "grad_norm": 2.8503555433087597, - "learning_rate": 6.932391552301366e-07, - "loss": 1.1738, - "step": 4509 - }, - { - "epoch": 0.6114010709686165, - "grad_norm": 2.268224624427505, - "learning_rate": 6.928212192254006e-07, - "loss": 1.1586, - "step": 4510 - }, - { - "epoch": 0.6115366366162814, - "grad_norm": 1.8580987853971451, - "learning_rate": 6.924033424576674e-07, - "loss": 1.1565, - "step": 4511 - }, - { - "epoch": 0.6116722022639464, - "grad_norm": 1.5224275384205892, - "learning_rate": 6.91985525007522e-07, - "loss": 1.098, - "step": 4512 - }, - { - "epoch": 0.6118077679116112, - "grad_norm": 1.5491055486086598, - "learning_rate": 6.915677669555363e-07, - "loss": 1.1669, - "step": 4513 - }, - { - "epoch": 0.6119433335592761, - "grad_norm": 1.481067836938021, - "learning_rate": 6.911500683822726e-07, - "loss": 1.1545, - "step": 4514 - }, - { - "epoch": 0.6120788992069409, - "grad_norm": 1.4307051433481928, - "learning_rate": 6.907324293682803e-07, - "loss": 1.1747, - "step": 4515 - }, - { - "epoch": 0.6122144648546058, - "grad_norm": 1.6389109473528913, - "learning_rate": 6.903148499940974e-07, - "loss": 1.1027, - "step": 4516 - }, - { - "epoch": 0.6123500305022708, - "grad_norm": 1.5816612654651598, - "learning_rate": 6.898973303402516e-07, - "loss": 1.1191, - "step": 4517 - }, - { - "epoch": 0.6124855961499356, - "grad_norm": 8.488043305219726, - "learning_rate": 6.894798704872574e-07, - "loss": 1.1243, - "step": 4518 - }, - { - "epoch": 0.6126211617976005, - "grad_norm": 1.5759959150172058, - "learning_rate": 6.890624705156194e-07, - "loss": 1.1314, - "step": 4519 - }, - { - "epoch": 0.6127567274452653, - "grad_norm": 1.7537504176390346, - "learning_rate": 6.886451305058293e-07, - "loss": 1.1226, - "step": 4520 - }, - { - "epoch": 0.6128922930929303, - "grad_norm": 2.948374758370644, - "learning_rate": 6.882278505383685e-07, - "loss": 1.1482, - "step": 4521 - }, - { - "epoch": 0.6130278587405952, - "grad_norm": 1.7591021939875564, - "learning_rate": 6.878106306937053e-07, - "loss": 1.1078, - "step": 4522 - }, - { - "epoch": 0.61316342438826, - "grad_norm": 1.7170267635501548, - "learning_rate": 6.873934710522979e-07, - "loss": 1.1214, - "step": 4523 - }, - { - "epoch": 0.6132989900359249, - "grad_norm": 4.484760436003515, - "learning_rate": 6.86976371694592e-07, - "loss": 1.1618, - "step": 4524 - }, - { - "epoch": 0.6134345556835897, - "grad_norm": 2.1110000417905073, - "learning_rate": 6.865593327010221e-07, - "loss": 1.0979, - "step": 4525 - }, - { - "epoch": 0.6135701213312547, - "grad_norm": 1.917532415879903, - "learning_rate": 6.861423541520104e-07, - "loss": 1.1821, - "step": 4526 - }, - { - "epoch": 0.6137056869789196, - "grad_norm": 2.2558159011680936, - "learning_rate": 6.857254361279688e-07, - "loss": 1.1161, - "step": 4527 - }, - { - "epoch": 0.6138412526265844, - "grad_norm": 1.6605723674120318, - "learning_rate": 6.853085787092956e-07, - "loss": 1.1253, - "step": 4528 - }, - { - "epoch": 0.6139768182742493, - "grad_norm": 4.592016835627234, - "learning_rate": 6.848917819763793e-07, - "loss": 1.1591, - "step": 4529 - }, - { - "epoch": 0.6141123839219141, - "grad_norm": 7.0868054944575585, - "learning_rate": 6.844750460095956e-07, - "loss": 1.1274, - "step": 4530 - }, - { - "epoch": 0.6142479495695791, - "grad_norm": 1.491859944055205, - "learning_rate": 6.840583708893083e-07, - "loss": 1.1541, - "step": 4531 - }, - { - "epoch": 0.614383515217244, - "grad_norm": 2.549133347606657, - "learning_rate": 6.836417566958707e-07, - "loss": 1.2007, - "step": 4532 - }, - { - "epoch": 0.6145190808649088, - "grad_norm": 1.9676764409402996, - "learning_rate": 6.832252035096227e-07, - "loss": 1.1304, - "step": 4533 - }, - { - "epoch": 0.6146546465125737, - "grad_norm": 1.9683209168651759, - "learning_rate": 6.82808711410894e-07, - "loss": 1.1834, - "step": 4534 - }, - { - "epoch": 0.6147902121602385, - "grad_norm": 1.6808770360947067, - "learning_rate": 6.823922804800016e-07, - "loss": 1.1323, - "step": 4535 - }, - { - "epoch": 0.6149257778079035, - "grad_norm": 2.05725443960613, - "learning_rate": 6.819759107972507e-07, - "loss": 1.1917, - "step": 4536 - }, - { - "epoch": 0.6150613434555684, - "grad_norm": 1.86544207070624, - "learning_rate": 6.815596024429351e-07, - "loss": 1.1303, - "step": 4537 - }, - { - "epoch": 0.6151969091032332, - "grad_norm": 2.215596263259654, - "learning_rate": 6.811433554973366e-07, - "loss": 1.1944, - "step": 4538 - }, - { - "epoch": 0.6153324747508981, - "grad_norm": 1.6601286191591305, - "learning_rate": 6.807271700407251e-07, - "loss": 1.1201, - "step": 4539 - }, - { - "epoch": 0.615468040398563, - "grad_norm": 1.6033505218651847, - "learning_rate": 6.803110461533587e-07, - "loss": 1.0886, - "step": 4540 - }, - { - "epoch": 0.6156036060462279, - "grad_norm": 1.8777171018448862, - "learning_rate": 6.798949839154834e-07, - "loss": 1.1467, - "step": 4541 - }, - { - "epoch": 0.6157391716938928, - "grad_norm": 1.8249390345085108, - "learning_rate": 6.79478983407334e-07, - "loss": 1.0962, - "step": 4542 - }, - { - "epoch": 0.6158747373415576, - "grad_norm": 1.683851942392328, - "learning_rate": 6.790630447091325e-07, - "loss": 1.1171, - "step": 4543 - }, - { - "epoch": 0.6160103029892225, - "grad_norm": 1.7769644365678379, - "learning_rate": 6.786471679010895e-07, - "loss": 1.1272, - "step": 4544 - }, - { - "epoch": 0.6161458686368874, - "grad_norm": 1.9699858745664762, - "learning_rate": 6.782313530634036e-07, - "loss": 1.1507, - "step": 4545 - }, - { - "epoch": 0.6162814342845523, - "grad_norm": 1.762647754126047, - "learning_rate": 6.77815600276261e-07, - "loss": 1.1285, - "step": 4546 - }, - { - "epoch": 0.6164169999322172, - "grad_norm": 1.6982032840341865, - "learning_rate": 6.773999096198373e-07, - "loss": 1.1237, - "step": 4547 - }, - { - "epoch": 0.616552565579882, - "grad_norm": 1.9456154018627647, - "learning_rate": 6.769842811742941e-07, - "loss": 1.1068, - "step": 4548 - }, - { - "epoch": 0.616688131227547, - "grad_norm": 1.5991101358985949, - "learning_rate": 6.765687150197827e-07, - "loss": 1.1624, - "step": 4549 - }, - { - "epoch": 0.6168236968752118, - "grad_norm": 1.8063471671469524, - "learning_rate": 6.761532112364414e-07, - "loss": 1.1338, - "step": 4550 - }, - { - "epoch": 0.6169592625228767, - "grad_norm": 1.5972258826500176, - "learning_rate": 6.757377699043976e-07, - "loss": 1.1379, - "step": 4551 - }, - { - "epoch": 0.6170948281705416, - "grad_norm": 1.9268050070278382, - "learning_rate": 6.753223911037646e-07, - "loss": 1.1646, - "step": 4552 - }, - { - "epoch": 0.6172303938182064, - "grad_norm": 1.9475270201040769, - "learning_rate": 6.749070749146461e-07, - "loss": 1.165, - "step": 4553 - }, - { - "epoch": 0.6173659594658714, - "grad_norm": 1.7539586088340586, - "learning_rate": 6.744918214171318e-07, - "loss": 1.139, - "step": 4554 - }, - { - "epoch": 0.6175015251135362, - "grad_norm": 1.5329982001525995, - "learning_rate": 6.740766306913007e-07, - "loss": 1.1336, - "step": 4555 - }, - { - "epoch": 0.6176370907612011, - "grad_norm": 1.513929808896686, - "learning_rate": 6.736615028172183e-07, - "loss": 1.1784, - "step": 4556 - }, - { - "epoch": 0.617772656408866, - "grad_norm": 1.7145403934734202, - "learning_rate": 6.732464378749394e-07, - "loss": 1.1271, - "step": 4557 - }, - { - "epoch": 0.6179082220565308, - "grad_norm": 1.9515474808079747, - "learning_rate": 6.728314359445058e-07, - "loss": 1.2104, - "step": 4558 - }, - { - "epoch": 0.6180437877041958, - "grad_norm": 1.5547261198585394, - "learning_rate": 6.724164971059469e-07, - "loss": 1.1305, - "step": 4559 - }, - { - "epoch": 0.6181793533518606, - "grad_norm": 3.7162217853946755, - "learning_rate": 6.720016214392812e-07, - "loss": 1.1204, - "step": 4560 - }, - { - "epoch": 0.6183149189995255, - "grad_norm": 1.7855180324318014, - "learning_rate": 6.715868090245131e-07, - "loss": 1.1388, - "step": 4561 - }, - { - "epoch": 0.6184504846471904, - "grad_norm": 1.6265696238556115, - "learning_rate": 6.711720599416373e-07, - "loss": 1.0836, - "step": 4562 - }, - { - "epoch": 0.6185860502948552, - "grad_norm": 1.4425001368028796, - "learning_rate": 6.707573742706334e-07, - "loss": 1.1385, - "step": 4563 - }, - { - "epoch": 0.6187216159425202, - "grad_norm": 1.4977853390354534, - "learning_rate": 6.703427520914715e-07, - "loss": 1.0878, - "step": 4564 - }, - { - "epoch": 0.6188571815901851, - "grad_norm": 1.9155385393453983, - "learning_rate": 6.699281934841073e-07, - "loss": 1.1536, - "step": 4565 - }, - { - "epoch": 0.6189927472378499, - "grad_norm": 1.8150698968005594, - "learning_rate": 6.69513698528486e-07, - "loss": 1.144, - "step": 4566 - }, - { - "epoch": 0.6191283128855148, - "grad_norm": 1.4826149373039823, - "learning_rate": 6.69099267304539e-07, - "loss": 1.1377, - "step": 4567 - }, - { - "epoch": 0.6192638785331797, - "grad_norm": 2.343604164951536, - "learning_rate": 6.686848998921864e-07, - "loss": 1.143, - "step": 4568 - }, - { - "epoch": 0.6193994441808446, - "grad_norm": 2.006597385725254, - "learning_rate": 6.682705963713355e-07, - "loss": 1.1504, - "step": 4569 - }, - { - "epoch": 0.6195350098285095, - "grad_norm": 1.457132840918752, - "learning_rate": 6.678563568218816e-07, - "loss": 1.1273, - "step": 4570 - }, - { - "epoch": 0.6196705754761743, - "grad_norm": 2.3102343287120735, - "learning_rate": 6.674421813237079e-07, - "loss": 1.185, - "step": 4571 - }, - { - "epoch": 0.6198061411238392, - "grad_norm": 1.695611581758358, - "learning_rate": 6.670280699566841e-07, - "loss": 1.1274, - "step": 4572 - }, - { - "epoch": 0.6199417067715041, - "grad_norm": 1.8052143776893101, - "learning_rate": 6.666140228006687e-07, - "loss": 1.0977, - "step": 4573 - }, - { - "epoch": 0.620077272419169, - "grad_norm": 1.6759866667264236, - "learning_rate": 6.662000399355075e-07, - "loss": 1.179, - "step": 4574 - }, - { - "epoch": 0.6202128380668339, - "grad_norm": 1.835166891807368, - "learning_rate": 6.657861214410338e-07, - "loss": 1.1354, - "step": 4575 - }, - { - "epoch": 0.6203484037144987, - "grad_norm": 1.8615790234295002, - "learning_rate": 6.653722673970681e-07, - "loss": 1.1423, - "step": 4576 - }, - { - "epoch": 0.6204839693621637, - "grad_norm": 1.9810717438620913, - "learning_rate": 6.649584778834196e-07, - "loss": 1.1087, - "step": 4577 - }, - { - "epoch": 0.6206195350098285, - "grad_norm": 1.5744846927761758, - "learning_rate": 6.645447529798838e-07, - "loss": 1.1291, - "step": 4578 - }, - { - "epoch": 0.6207551006574934, - "grad_norm": 1.8739954347538756, - "learning_rate": 6.641310927662447e-07, - "loss": 1.1196, - "step": 4579 - }, - { - "epoch": 0.6208906663051583, - "grad_norm": 2.2242537124786614, - "learning_rate": 6.637174973222727e-07, - "loss": 1.1314, - "step": 4580 - }, - { - "epoch": 0.6210262319528231, - "grad_norm": 1.579689258828854, - "learning_rate": 6.633039667277274e-07, - "loss": 1.1459, - "step": 4581 - }, - { - "epoch": 0.6211617976004881, - "grad_norm": 1.7703161291220928, - "learning_rate": 6.62890501062354e-07, - "loss": 1.139, - "step": 4582 - }, - { - "epoch": 0.6212973632481529, - "grad_norm": 1.8889693043926918, - "learning_rate": 6.624771004058868e-07, - "loss": 1.146, - "step": 4583 - }, - { - "epoch": 0.6214329288958178, - "grad_norm": 1.7957769411102924, - "learning_rate": 6.620637648380463e-07, - "loss": 1.1509, - "step": 4584 - }, - { - "epoch": 0.6215684945434827, - "grad_norm": 1.6506737581912285, - "learning_rate": 6.616504944385415e-07, - "loss": 1.1092, - "step": 4585 - }, - { - "epoch": 0.6217040601911475, - "grad_norm": 2.175826034115448, - "learning_rate": 6.612372892870681e-07, - "loss": 1.1601, - "step": 4586 - }, - { - "epoch": 0.6218396258388125, - "grad_norm": 1.7160388410887775, - "learning_rate": 6.608241494633092e-07, - "loss": 1.1489, - "step": 4587 - }, - { - "epoch": 0.6219751914864773, - "grad_norm": 1.8788963302019355, - "learning_rate": 6.604110750469358e-07, - "loss": 1.1409, - "step": 4588 - }, - { - "epoch": 0.6221107571341422, - "grad_norm": 1.8648010724030464, - "learning_rate": 6.599980661176059e-07, - "loss": 1.1391, - "step": 4589 - }, - { - "epoch": 0.6222463227818071, - "grad_norm": 2.805344339195097, - "learning_rate": 6.595851227549656e-07, - "loss": 1.1072, - "step": 4590 - }, - { - "epoch": 0.622381888429472, - "grad_norm": 1.6361349441993225, - "learning_rate": 6.591722450386468e-07, - "loss": 1.1272, - "step": 4591 - }, - { - "epoch": 0.6225174540771369, - "grad_norm": 1.7896488313057757, - "learning_rate": 6.587594330482707e-07, - "loss": 1.1919, - "step": 4592 - }, - { - "epoch": 0.6226530197248017, - "grad_norm": 2.35721217153974, - "learning_rate": 6.583466868634437e-07, - "loss": 1.1486, - "step": 4593 - }, - { - "epoch": 0.6227885853724666, - "grad_norm": 1.6036256350064935, - "learning_rate": 6.579340065637619e-07, - "loss": 1.1372, - "step": 4594 - }, - { - "epoch": 0.6229241510201315, - "grad_norm": 1.7433083723020044, - "learning_rate": 6.575213922288064e-07, - "loss": 1.1243, - "step": 4595 - }, - { - "epoch": 0.6230597166677964, - "grad_norm": 1.5592022801139758, - "learning_rate": 6.571088439381475e-07, - "loss": 1.1445, - "step": 4596 - }, - { - "epoch": 0.6231952823154613, - "grad_norm": 1.8056318698046991, - "learning_rate": 6.566963617713412e-07, - "loss": 1.1657, - "step": 4597 - }, - { - "epoch": 0.6233308479631261, - "grad_norm": 1.3731170495690361, - "learning_rate": 6.562839458079315e-07, - "loss": 1.1263, - "step": 4598 - }, - { - "epoch": 0.623466413610791, - "grad_norm": 2.2313794770411373, - "learning_rate": 6.558715961274501e-07, - "loss": 1.1256, - "step": 4599 - }, - { - "epoch": 0.623601979258456, - "grad_norm": 1.6042756249550607, - "learning_rate": 6.554593128094145e-07, - "loss": 1.1344, - "step": 4600 - }, - { - "epoch": 0.6237375449061208, - "grad_norm": 1.6680688658062552, - "learning_rate": 6.550470959333313e-07, - "loss": 1.1216, - "step": 4601 - }, - { - "epoch": 0.6238731105537857, - "grad_norm": 1.4154834557758997, - "learning_rate": 6.546349455786925e-07, - "loss": 1.129, - "step": 4602 - }, - { - "epoch": 0.6240086762014505, - "grad_norm": 1.5947726115204481, - "learning_rate": 6.542228618249784e-07, - "loss": 1.1752, - "step": 4603 - }, - { - "epoch": 0.6241442418491154, - "grad_norm": 1.8500559020546727, - "learning_rate": 6.538108447516557e-07, - "loss": 1.1497, - "step": 4604 - }, - { - "epoch": 0.6242798074967804, - "grad_norm": 1.4434021302904376, - "learning_rate": 6.533988944381792e-07, - "loss": 1.1521, - "step": 4605 - }, - { - "epoch": 0.6244153731444452, - "grad_norm": 2.3252465195067282, - "learning_rate": 6.529870109639899e-07, - "loss": 1.123, - "step": 4606 - }, - { - "epoch": 0.6245509387921101, - "grad_norm": 1.8281085663447667, - "learning_rate": 6.525751944085166e-07, - "loss": 1.1638, - "step": 4607 - }, - { - "epoch": 0.6246865044397749, - "grad_norm": 1.56780936385411, - "learning_rate": 6.521634448511743e-07, - "loss": 1.1461, - "step": 4608 - }, - { - "epoch": 0.6248220700874398, - "grad_norm": 2.0601089584628554, - "learning_rate": 6.517517623713664e-07, - "loss": 1.166, - "step": 4609 - }, - { - "epoch": 0.6249576357351048, - "grad_norm": 5.563411859330603, - "learning_rate": 6.513401470484817e-07, - "loss": 1.1106, - "step": 4610 - }, - { - "epoch": 0.6250932013827696, - "grad_norm": 1.6153040812743757, - "learning_rate": 6.50928598961898e-07, - "loss": 1.1485, - "step": 4611 - }, - { - "epoch": 0.6252287670304345, - "grad_norm": 1.4182788003380116, - "learning_rate": 6.505171181909782e-07, - "loss": 1.1334, - "step": 4612 - }, - { - "epoch": 0.6253643326780993, - "grad_norm": 1.6051666785918448, - "learning_rate": 6.501057048150738e-07, - "loss": 1.1463, - "step": 4613 - }, - { - "epoch": 0.6254998983257642, - "grad_norm": 1.614119426304828, - "learning_rate": 6.496943589135225e-07, - "loss": 1.1381, - "step": 4614 - }, - { - "epoch": 0.6256354639734292, - "grad_norm": 1.8133526089481435, - "learning_rate": 6.492830805656484e-07, - "loss": 1.1612, - "step": 4615 - }, - { - "epoch": 0.625771029621094, - "grad_norm": 1.4997820738639664, - "learning_rate": 6.488718698507643e-07, - "loss": 1.1178, - "step": 4616 - }, - { - "epoch": 0.6259065952687589, - "grad_norm": 1.8020889293543205, - "learning_rate": 6.484607268481681e-07, - "loss": 1.171, - "step": 4617 - }, - { - "epoch": 0.6260421609164237, - "grad_norm": 1.7157573490192808, - "learning_rate": 6.480496516371461e-07, - "loss": 1.1652, - "step": 4618 - }, - { - "epoch": 0.6261777265640887, - "grad_norm": 2.354921352569446, - "learning_rate": 6.476386442969703e-07, - "loss": 1.1259, - "step": 4619 - }, - { - "epoch": 0.6263132922117536, - "grad_norm": 1.9191437554191901, - "learning_rate": 6.472277049069011e-07, - "loss": 1.1101, - "step": 4620 - }, - { - "epoch": 0.6264488578594184, - "grad_norm": 1.6601145572823235, - "learning_rate": 6.468168335461839e-07, - "loss": 1.1522, - "step": 4621 - }, - { - "epoch": 0.6265844235070833, - "grad_norm": 1.6051424023930336, - "learning_rate": 6.464060302940528e-07, - "loss": 1.1131, - "step": 4622 - }, - { - "epoch": 0.6267199891547481, - "grad_norm": 1.767607454114888, - "learning_rate": 6.459952952297274e-07, - "loss": 1.1434, - "step": 4623 - }, - { - "epoch": 0.6268555548024131, - "grad_norm": 1.55958350797291, - "learning_rate": 6.455846284324153e-07, - "loss": 1.1534, - "step": 4624 - }, - { - "epoch": 0.626991120450078, - "grad_norm": 2.492372355041594, - "learning_rate": 6.451740299813097e-07, - "loss": 1.1373, - "step": 4625 - }, - { - "epoch": 0.6271266860977428, - "grad_norm": 1.98182353622362, - "learning_rate": 6.447634999555919e-07, - "loss": 1.1643, - "step": 4626 - }, - { - "epoch": 0.6272622517454077, - "grad_norm": 12.82056275963859, - "learning_rate": 6.443530384344291e-07, - "loss": 1.1725, - "step": 4627 - }, - { - "epoch": 0.6273978173930725, - "grad_norm": 1.7850317125290267, - "learning_rate": 6.439426454969752e-07, - "loss": 1.122, - "step": 4628 - }, - { - "epoch": 0.6275333830407375, - "grad_norm": 1.6106855005822434, - "learning_rate": 6.435323212223718e-07, - "loss": 1.1228, - "step": 4629 - }, - { - "epoch": 0.6276689486884024, - "grad_norm": 1.5305925093216468, - "learning_rate": 6.431220656897463e-07, - "loss": 1.0872, - "step": 4630 - }, - { - "epoch": 0.6278045143360672, - "grad_norm": 1.649052085570948, - "learning_rate": 6.427118789782136e-07, - "loss": 1.1606, - "step": 4631 - }, - { - "epoch": 0.6279400799837321, - "grad_norm": 1.5856030258830829, - "learning_rate": 6.423017611668744e-07, - "loss": 1.1256, - "step": 4632 - }, - { - "epoch": 0.628075645631397, - "grad_norm": 1.5363467683365424, - "learning_rate": 6.418917123348176e-07, - "loss": 1.1662, - "step": 4633 - }, - { - "epoch": 0.6282112112790619, - "grad_norm": 1.4294900637266494, - "learning_rate": 6.41481732561117e-07, - "loss": 1.1323, - "step": 4634 - }, - { - "epoch": 0.6283467769267268, - "grad_norm": 1.6287397354727207, - "learning_rate": 6.410718219248344e-07, - "loss": 1.1529, - "step": 4635 - }, - { - "epoch": 0.6284823425743916, - "grad_norm": 1.6348056530672597, - "learning_rate": 6.406619805050177e-07, - "loss": 1.1205, - "step": 4636 - }, - { - "epoch": 0.6286179082220565, - "grad_norm": 1.5619496923376825, - "learning_rate": 6.402522083807016e-07, - "loss": 1.1634, - "step": 4637 - }, - { - "epoch": 0.6287534738697214, - "grad_norm": 1.802975792221973, - "learning_rate": 6.398425056309073e-07, - "loss": 1.1303, - "step": 4638 - }, - { - "epoch": 0.6288890395173863, - "grad_norm": 1.6690348815072635, - "learning_rate": 6.394328723346433e-07, - "loss": 1.1457, - "step": 4639 - }, - { - "epoch": 0.6290246051650512, - "grad_norm": 1.6706408250776914, - "learning_rate": 6.390233085709034e-07, - "loss": 1.1516, - "step": 4640 - }, - { - "epoch": 0.629160170812716, - "grad_norm": 4.375835609605315, - "learning_rate": 6.386138144186693e-07, - "loss": 1.1406, - "step": 4641 - }, - { - "epoch": 0.629295736460381, - "grad_norm": 1.5263945532607583, - "learning_rate": 6.382043899569083e-07, - "loss": 1.1083, - "step": 4642 - }, - { - "epoch": 0.6294313021080459, - "grad_norm": 1.431262459980951, - "learning_rate": 6.377950352645748e-07, - "loss": 1.1074, - "step": 4643 - }, - { - "epoch": 0.6295668677557107, - "grad_norm": 1.6836188236534182, - "learning_rate": 6.373857504206099e-07, - "loss": 1.1443, - "step": 4644 - }, - { - "epoch": 0.6297024334033756, - "grad_norm": 1.8225218100296694, - "learning_rate": 6.369765355039405e-07, - "loss": 1.1345, - "step": 4645 - }, - { - "epoch": 0.6298379990510404, - "grad_norm": 2.2123773430005027, - "learning_rate": 6.365673905934809e-07, - "loss": 1.0915, - "step": 4646 - }, - { - "epoch": 0.6299735646987054, - "grad_norm": 4.097555054526147, - "learning_rate": 6.361583157681309e-07, - "loss": 1.1149, - "step": 4647 - }, - { - "epoch": 0.6301091303463703, - "grad_norm": 1.5832150835040408, - "learning_rate": 6.357493111067781e-07, - "loss": 1.1689, - "step": 4648 - }, - { - "epoch": 0.6302446959940351, - "grad_norm": 1.6145678294733874, - "learning_rate": 6.353403766882951e-07, - "loss": 1.1633, - "step": 4649 - }, - { - "epoch": 0.6303802616417, - "grad_norm": 1.965163899410995, - "learning_rate": 6.349315125915424e-07, - "loss": 1.1704, - "step": 4650 - }, - { - "epoch": 0.6305158272893648, - "grad_norm": 1.5180820903086434, - "learning_rate": 6.345227188953653e-07, - "loss": 1.1188, - "step": 4651 - }, - { - "epoch": 0.6306513929370298, - "grad_norm": 1.5367566013319123, - "learning_rate": 6.341139956785974e-07, - "loss": 1.1378, - "step": 4652 - }, - { - "epoch": 0.6307869585846947, - "grad_norm": 2.207456740524406, - "learning_rate": 6.337053430200571e-07, - "loss": 1.1632, - "step": 4653 - }, - { - "epoch": 0.6309225242323595, - "grad_norm": 16.20860539415881, - "learning_rate": 6.332967609985502e-07, - "loss": 1.1237, - "step": 4654 - }, - { - "epoch": 0.6310580898800244, - "grad_norm": 1.661158650124365, - "learning_rate": 6.328882496928685e-07, - "loss": 1.0898, - "step": 4655 - }, - { - "epoch": 0.6311936555276892, - "grad_norm": 1.5356206744852985, - "learning_rate": 6.324798091817897e-07, - "loss": 1.1439, - "step": 4656 - }, - { - "epoch": 0.6313292211753542, - "grad_norm": 2.1011570288322714, - "learning_rate": 6.320714395440789e-07, - "loss": 1.1174, - "step": 4657 - }, - { - "epoch": 0.6314647868230191, - "grad_norm": 2.6732494514444216, - "learning_rate": 6.316631408584865e-07, - "loss": 1.1335, - "step": 4658 - }, - { - "epoch": 0.6316003524706839, - "grad_norm": 1.4332297177470865, - "learning_rate": 6.312549132037501e-07, - "loss": 1.095, - "step": 4659 - }, - { - "epoch": 0.6317359181183488, - "grad_norm": 1.6472704030842156, - "learning_rate": 6.308467566585927e-07, - "loss": 1.1242, - "step": 4660 - }, - { - "epoch": 0.6318714837660137, - "grad_norm": 1.6485419576158205, - "learning_rate": 6.304386713017249e-07, - "loss": 1.1428, - "step": 4661 - }, - { - "epoch": 0.6320070494136786, - "grad_norm": 2.042215452039785, - "learning_rate": 6.300306572118417e-07, - "loss": 1.1292, - "step": 4662 - }, - { - "epoch": 0.6321426150613435, - "grad_norm": 20.10658789247313, - "learning_rate": 6.296227144676262e-07, - "loss": 1.1413, - "step": 4663 - }, - { - "epoch": 0.6322781807090083, - "grad_norm": 1.6087040865507323, - "learning_rate": 6.292148431477465e-07, - "loss": 1.1315, - "step": 4664 - }, - { - "epoch": 0.6324137463566732, - "grad_norm": 1.3815119697078648, - "learning_rate": 6.288070433308575e-07, - "loss": 1.1367, - "step": 4665 - }, - { - "epoch": 0.6325493120043381, - "grad_norm": 2.217856334669187, - "learning_rate": 6.283993150956002e-07, - "loss": 1.1402, - "step": 4666 - }, - { - "epoch": 0.632684877652003, - "grad_norm": 1.7701314919303819, - "learning_rate": 6.279916585206018e-07, - "loss": 1.1077, - "step": 4667 - }, - { - "epoch": 0.6328204432996679, - "grad_norm": 1.5174131112388105, - "learning_rate": 6.275840736844754e-07, - "loss": 1.1156, - "step": 4668 - }, - { - "epoch": 0.6329560089473327, - "grad_norm": 1.6118489874443953, - "learning_rate": 6.27176560665821e-07, - "loss": 1.113, - "step": 4669 - }, - { - "epoch": 0.6330915745949977, - "grad_norm": 2.0550219207908658, - "learning_rate": 6.267691195432239e-07, - "loss": 1.1311, - "step": 4670 - }, - { - "epoch": 0.6332271402426625, - "grad_norm": 1.4253436975563871, - "learning_rate": 6.263617503952559e-07, - "loss": 1.1468, - "step": 4671 - }, - { - "epoch": 0.6333627058903274, - "grad_norm": 6.9655790907520885, - "learning_rate": 6.259544533004751e-07, - "loss": 1.1878, - "step": 4672 - }, - { - "epoch": 0.6334982715379923, - "grad_norm": 1.5937455338257902, - "learning_rate": 6.255472283374253e-07, - "loss": 1.1152, - "step": 4673 - }, - { - "epoch": 0.6336338371856571, - "grad_norm": 2.0095629658355803, - "learning_rate": 6.251400755846371e-07, - "loss": 1.1497, - "step": 4674 - }, - { - "epoch": 0.6337694028333221, - "grad_norm": 1.4772086517309146, - "learning_rate": 6.247329951206259e-07, - "loss": 1.1321, - "step": 4675 - }, - { - "epoch": 0.6339049684809869, - "grad_norm": 1.6177941983639816, - "learning_rate": 6.243259870238948e-07, - "loss": 1.1245, - "step": 4676 - }, - { - "epoch": 0.6340405341286518, - "grad_norm": 1.7105982717904655, - "learning_rate": 6.239190513729313e-07, - "loss": 1.1156, - "step": 4677 - }, - { - "epoch": 0.6341760997763167, - "grad_norm": 1.6093805552788716, - "learning_rate": 6.235121882462107e-07, - "loss": 1.1478, - "step": 4678 - }, - { - "epoch": 0.6343116654239815, - "grad_norm": 1.4381519120867787, - "learning_rate": 6.23105397722192e-07, - "loss": 1.1458, - "step": 4679 - }, - { - "epoch": 0.6344472310716465, - "grad_norm": 1.7302899964056457, - "learning_rate": 6.226986798793231e-07, - "loss": 1.1125, - "step": 4680 - }, - { - "epoch": 0.6345827967193113, - "grad_norm": 2.1471012207023707, - "learning_rate": 6.22292034796035e-07, - "loss": 1.1207, - "step": 4681 - }, - { - "epoch": 0.6347183623669762, - "grad_norm": 1.7922990960906722, - "learning_rate": 6.21885462550747e-07, - "loss": 1.1396, - "step": 4682 - }, - { - "epoch": 0.6348539280146411, - "grad_norm": 2.1389509784101652, - "learning_rate": 6.214789632218628e-07, - "loss": 1.177, - "step": 4683 - }, - { - "epoch": 0.634989493662306, - "grad_norm": 2.027334924419264, - "learning_rate": 6.210725368877723e-07, - "loss": 1.0614, - "step": 4684 - }, - { - "epoch": 0.6351250593099709, - "grad_norm": 1.4843095727688944, - "learning_rate": 6.206661836268525e-07, - "loss": 1.1203, - "step": 4685 - }, - { - "epoch": 0.6352606249576357, - "grad_norm": 1.54520779913543, - "learning_rate": 6.202599035174645e-07, - "loss": 1.1026, - "step": 4686 - }, - { - "epoch": 0.6353961906053006, - "grad_norm": 1.6665335044803176, - "learning_rate": 6.19853696637957e-07, - "loss": 1.0938, - "step": 4687 - }, - { - "epoch": 0.6355317562529655, - "grad_norm": 2.213570921451466, - "learning_rate": 6.194475630666629e-07, - "loss": 1.1767, - "step": 4688 - }, - { - "epoch": 0.6356673219006304, - "grad_norm": 1.7015229885607612, - "learning_rate": 6.190415028819029e-07, - "loss": 1.0971, - "step": 4689 - }, - { - "epoch": 0.6358028875482953, - "grad_norm": 1.6044426966140521, - "learning_rate": 6.186355161619814e-07, - "loss": 1.1315, - "step": 4690 - }, - { - "epoch": 0.6359384531959601, - "grad_norm": 1.8635318509353649, - "learning_rate": 6.182296029851908e-07, - "loss": 1.1403, - "step": 4691 - }, - { - "epoch": 0.636074018843625, - "grad_norm": 3.197957759710942, - "learning_rate": 6.178237634298073e-07, - "loss": 1.1529, - "step": 4692 - }, - { - "epoch": 0.63620958449129, - "grad_norm": 2.0791145267831213, - "learning_rate": 6.174179975740949e-07, - "loss": 1.1402, - "step": 4693 - }, - { - "epoch": 0.6363451501389548, - "grad_norm": 1.7128785194045597, - "learning_rate": 6.170123054963012e-07, - "loss": 1.1092, - "step": 4694 - }, - { - "epoch": 0.6364807157866197, - "grad_norm": 1.590221654974441, - "learning_rate": 6.166066872746616e-07, - "loss": 1.1595, - "step": 4695 - }, - { - "epoch": 0.6366162814342845, - "grad_norm": 1.5794912366665248, - "learning_rate": 6.162011429873959e-07, - "loss": 1.1366, - "step": 4696 - }, - { - "epoch": 0.6367518470819494, - "grad_norm": 1.3919189143376318, - "learning_rate": 6.157956727127102e-07, - "loss": 1.1023, - "step": 4697 - }, - { - "epoch": 0.6368874127296144, - "grad_norm": 1.5029417389744626, - "learning_rate": 6.153902765287966e-07, - "loss": 1.1213, - "step": 4698 - }, - { - "epoch": 0.6370229783772792, - "grad_norm": 1.7704918248350094, - "learning_rate": 6.149849545138319e-07, - "loss": 1.0979, - "step": 4699 - }, - { - "epoch": 0.6371585440249441, - "grad_norm": 1.736574976808114, - "learning_rate": 6.145797067459799e-07, - "loss": 1.1276, - "step": 4700 - }, - { - "epoch": 0.6372941096726089, - "grad_norm": 1.5461458964233954, - "learning_rate": 6.141745333033889e-07, - "loss": 1.1086, - "step": 4701 - }, - { - "epoch": 0.6374296753202738, - "grad_norm": 1.7901803042219957, - "learning_rate": 6.137694342641937e-07, - "loss": 1.1261, - "step": 4702 - }, - { - "epoch": 0.6375652409679388, - "grad_norm": 1.778634607121448, - "learning_rate": 6.133644097065143e-07, - "loss": 1.1388, - "step": 4703 - }, - { - "epoch": 0.6377008066156036, - "grad_norm": 1.6734839179700316, - "learning_rate": 6.129594597084567e-07, - "loss": 1.1171, - "step": 4704 - }, - { - "epoch": 0.6378363722632685, - "grad_norm": 1.7462402796992553, - "learning_rate": 6.125545843481119e-07, - "loss": 1.0836, - "step": 4705 - }, - { - "epoch": 0.6379719379109333, - "grad_norm": 1.4530541675334887, - "learning_rate": 6.121497837035576e-07, - "loss": 1.0843, - "step": 4706 - }, - { - "epoch": 0.6381075035585982, - "grad_norm": 1.5630401903611495, - "learning_rate": 6.117450578528556e-07, - "loss": 1.1184, - "step": 4707 - }, - { - "epoch": 0.6382430692062632, - "grad_norm": 4.293753479795108, - "learning_rate": 6.11340406874055e-07, - "loss": 1.1605, - "step": 4708 - }, - { - "epoch": 0.638378634853928, - "grad_norm": 1.545013820740773, - "learning_rate": 6.109358308451885e-07, - "loss": 1.1107, - "step": 4709 - }, - { - "epoch": 0.6385142005015929, - "grad_norm": 1.4591484714184346, - "learning_rate": 6.105313298442764e-07, - "loss": 1.1374, - "step": 4710 - }, - { - "epoch": 0.6386497661492577, - "grad_norm": 1.7091342994456478, - "learning_rate": 6.10126903949323e-07, - "loss": 1.161, - "step": 4711 - }, - { - "epoch": 0.6387853317969227, - "grad_norm": 1.513614848739321, - "learning_rate": 6.097225532383184e-07, - "loss": 1.1042, - "step": 4712 - }, - { - "epoch": 0.6389208974445876, - "grad_norm": 1.6440060978913553, - "learning_rate": 6.093182777892392e-07, - "loss": 1.1379, - "step": 4713 - }, - { - "epoch": 0.6390564630922524, - "grad_norm": 1.5879174377576, - "learning_rate": 6.089140776800456e-07, - "loss": 1.1292, - "step": 4714 - }, - { - "epoch": 0.6391920287399173, - "grad_norm": 1.7243121830493096, - "learning_rate": 6.085099529886857e-07, - "loss": 1.1219, - "step": 4715 - }, - { - "epoch": 0.6393275943875821, - "grad_norm": 3.281395686382957, - "learning_rate": 6.081059037930907e-07, - "loss": 1.143, - "step": 4716 - }, - { - "epoch": 0.6394631600352471, - "grad_norm": 1.6399865390090904, - "learning_rate": 6.07701930171179e-07, - "loss": 1.135, - "step": 4717 - }, - { - "epoch": 0.639598725682912, - "grad_norm": 4.069768460085548, - "learning_rate": 6.072980322008532e-07, - "loss": 1.1121, - "step": 4718 - }, - { - "epoch": 0.6397342913305768, - "grad_norm": 1.7664382249474369, - "learning_rate": 6.068942099600025e-07, - "loss": 1.1206, - "step": 4719 - }, - { - "epoch": 0.6398698569782417, - "grad_norm": 1.9773410202404775, - "learning_rate": 6.064904635264999e-07, - "loss": 1.1282, - "step": 4720 - }, - { - "epoch": 0.6400054226259067, - "grad_norm": 1.5169785420591682, - "learning_rate": 6.060867929782057e-07, - "loss": 1.1125, - "step": 4721 - }, - { - "epoch": 0.6401409882735715, - "grad_norm": 1.7123588170228587, - "learning_rate": 6.056831983929638e-07, - "loss": 1.0986, - "step": 4722 - }, - { - "epoch": 0.6402765539212364, - "grad_norm": 1.373797065466126, - "learning_rate": 6.052796798486049e-07, - "loss": 1.1288, - "step": 4723 - }, - { - "epoch": 0.6404121195689012, - "grad_norm": 1.8121542652861544, - "learning_rate": 6.048762374229435e-07, - "loss": 1.1312, - "step": 4724 - }, - { - "epoch": 0.6405476852165661, - "grad_norm": 1.561042616135415, - "learning_rate": 6.044728711937812e-07, - "loss": 1.1254, - "step": 4725 - }, - { - "epoch": 0.6406832508642311, - "grad_norm": 2.4534404249585133, - "learning_rate": 6.040695812389036e-07, - "loss": 1.1507, - "step": 4726 - }, - { - "epoch": 0.6408188165118959, - "grad_norm": 3.0914819205445676, - "learning_rate": 6.036663676360816e-07, - "loss": 1.1142, - "step": 4727 - }, - { - "epoch": 0.6409543821595608, - "grad_norm": 1.64063592275413, - "learning_rate": 6.032632304630726e-07, - "loss": 1.1491, - "step": 4728 - }, - { - "epoch": 0.6410899478072256, - "grad_norm": 1.851200188184754, - "learning_rate": 6.028601697976175e-07, - "loss": 1.1334, - "step": 4729 - }, - { - "epoch": 0.6412255134548905, - "grad_norm": 1.517280575791147, - "learning_rate": 6.024571857174442e-07, - "loss": 1.1283, - "step": 4730 - }, - { - "epoch": 0.6413610791025555, - "grad_norm": 3.634174145098893, - "learning_rate": 6.020542783002643e-07, - "loss": 1.1651, - "step": 4731 - }, - { - "epoch": 0.6414966447502203, - "grad_norm": 1.3087396030520786, - "learning_rate": 6.01651447623776e-07, - "loss": 1.0855, - "step": 4732 - }, - { - "epoch": 0.6416322103978852, - "grad_norm": 1.5188945487899586, - "learning_rate": 6.012486937656613e-07, - "loss": 1.1287, - "step": 4733 - }, - { - "epoch": 0.64176777604555, - "grad_norm": 1.8050243655585028, - "learning_rate": 6.008460168035887e-07, - "loss": 1.159, - "step": 4734 - }, - { - "epoch": 0.641903341693215, - "grad_norm": 1.6359350392699559, - "learning_rate": 6.004434168152109e-07, - "loss": 1.1154, - "step": 4735 - }, - { - "epoch": 0.6420389073408799, - "grad_norm": 1.456111712271028, - "learning_rate": 6.000408938781665e-07, - "loss": 1.135, - "step": 4736 - }, - { - "epoch": 0.6421744729885447, - "grad_norm": 1.622122018698129, - "learning_rate": 5.996384480700783e-07, - "loss": 1.1304, - "step": 4737 - }, - { - "epoch": 0.6423100386362096, - "grad_norm": 1.9474343005938992, - "learning_rate": 5.992360794685554e-07, - "loss": 1.1186, - "step": 4738 - }, - { - "epoch": 0.6424456042838744, - "grad_norm": 1.6444491076014611, - "learning_rate": 5.988337881511909e-07, - "loss": 1.1348, - "step": 4739 - }, - { - "epoch": 0.6425811699315394, - "grad_norm": 5.260249591569377, - "learning_rate": 5.984315741955639e-07, - "loss": 1.1501, - "step": 4740 - }, - { - "epoch": 0.6427167355792043, - "grad_norm": 2.2533240837166537, - "learning_rate": 5.98029437679238e-07, - "loss": 1.1949, - "step": 4741 - }, - { - "epoch": 0.6428523012268691, - "grad_norm": 1.3841362051466948, - "learning_rate": 5.976273786797619e-07, - "loss": 1.1675, - "step": 4742 - }, - { - "epoch": 0.642987866874534, - "grad_norm": 1.7663484814430361, - "learning_rate": 5.972253972746701e-07, - "loss": 1.1477, - "step": 4743 - }, - { - "epoch": 0.6431234325221988, - "grad_norm": 1.6236263029833253, - "learning_rate": 5.968234935414807e-07, - "loss": 1.1011, - "step": 4744 - }, - { - "epoch": 0.6432589981698638, - "grad_norm": 1.4754618043916852, - "learning_rate": 5.964216675576983e-07, - "loss": 1.089, - "step": 4745 - }, - { - "epoch": 0.6433945638175287, - "grad_norm": 1.4385016088441636, - "learning_rate": 5.960199194008115e-07, - "loss": 1.1391, - "step": 4746 - }, - { - "epoch": 0.6435301294651935, - "grad_norm": 1.7842562045011214, - "learning_rate": 5.956182491482946e-07, - "loss": 1.1381, - "step": 4747 - }, - { - "epoch": 0.6436656951128584, - "grad_norm": 1.5534908489610553, - "learning_rate": 5.952166568776062e-07, - "loss": 1.0876, - "step": 4748 - }, - { - "epoch": 0.6438012607605232, - "grad_norm": 1.456589360284409, - "learning_rate": 5.948151426661904e-07, - "loss": 1.1559, - "step": 4749 - }, - { - "epoch": 0.6439368264081882, - "grad_norm": 1.7936609741484348, - "learning_rate": 5.944137065914759e-07, - "loss": 1.1087, - "step": 4750 - }, - { - "epoch": 0.6440723920558531, - "grad_norm": 1.8449583051647789, - "learning_rate": 5.94012348730877e-07, - "loss": 1.1351, - "step": 4751 - }, - { - "epoch": 0.6442079577035179, - "grad_norm": 1.5290777489414773, - "learning_rate": 5.936110691617915e-07, - "loss": 1.1397, - "step": 4752 - }, - { - "epoch": 0.6443435233511828, - "grad_norm": 1.5649258629166274, - "learning_rate": 5.932098679616038e-07, - "loss": 1.1161, - "step": 4753 - }, - { - "epoch": 0.6444790889988476, - "grad_norm": 1.5618133551614133, - "learning_rate": 5.928087452076821e-07, - "loss": 1.1021, - "step": 4754 - }, - { - "epoch": 0.6446146546465126, - "grad_norm": 2.469582549653953, - "learning_rate": 5.924077009773794e-07, - "loss": 1.143, - "step": 4755 - }, - { - "epoch": 0.6447502202941775, - "grad_norm": 2.0495477986063064, - "learning_rate": 5.920067353480345e-07, - "loss": 1.1024, - "step": 4756 - }, - { - "epoch": 0.6448857859418423, - "grad_norm": 1.840210373717833, - "learning_rate": 5.916058483969698e-07, - "loss": 1.1559, - "step": 4757 - }, - { - "epoch": 0.6450213515895072, - "grad_norm": 5.983948953427069, - "learning_rate": 5.912050402014941e-07, - "loss": 1.1158, - "step": 4758 - }, - { - "epoch": 0.6451569172371721, - "grad_norm": 1.5250035233833985, - "learning_rate": 5.908043108388989e-07, - "loss": 1.1256, - "step": 4759 - }, - { - "epoch": 0.645292482884837, - "grad_norm": 1.8014221589616437, - "learning_rate": 5.90403660386463e-07, - "loss": 1.1402, - "step": 4760 - }, - { - "epoch": 0.6454280485325019, - "grad_norm": 2.178085009790153, - "learning_rate": 5.900030889214476e-07, - "loss": 1.1459, - "step": 4761 - }, - { - "epoch": 0.6455636141801667, - "grad_norm": 1.7672833105832866, - "learning_rate": 5.896025965211005e-07, - "loss": 1.0965, - "step": 4762 - }, - { - "epoch": 0.6456991798278316, - "grad_norm": 1.9827291389676238, - "learning_rate": 5.89202183262653e-07, - "loss": 1.113, - "step": 4763 - }, - { - "epoch": 0.6458347454754965, - "grad_norm": 2.0989667300957597, - "learning_rate": 5.888018492233219e-07, - "loss": 1.1433, - "step": 4764 - }, - { - "epoch": 0.6459703111231614, - "grad_norm": 1.4925023613189676, - "learning_rate": 5.884015944803084e-07, - "loss": 1.1254, - "step": 4765 - }, - { - "epoch": 0.6461058767708263, - "grad_norm": 1.7983804038807079, - "learning_rate": 5.880014191107982e-07, - "loss": 1.1188, - "step": 4766 - }, - { - "epoch": 0.6462414424184911, - "grad_norm": 2.9109856295749967, - "learning_rate": 5.876013231919628e-07, - "loss": 1.1262, - "step": 4767 - }, - { - "epoch": 0.6463770080661561, - "grad_norm": 1.7268479507998833, - "learning_rate": 5.872013068009565e-07, - "loss": 1.1202, - "step": 4768 - }, - { - "epoch": 0.6465125737138209, - "grad_norm": 2.23156694876669, - "learning_rate": 5.868013700149197e-07, - "loss": 1.1006, - "step": 4769 - }, - { - "epoch": 0.6466481393614858, - "grad_norm": 1.4390360160047948, - "learning_rate": 5.864015129109771e-07, - "loss": 1.1427, - "step": 4770 - }, - { - "epoch": 0.6467837050091507, - "grad_norm": 1.7992384844511071, - "learning_rate": 5.860017355662381e-07, - "loss": 1.1763, - "step": 4771 - }, - { - "epoch": 0.6469192706568155, - "grad_norm": 1.394781329931129, - "learning_rate": 5.856020380577964e-07, - "loss": 1.106, - "step": 4772 - }, - { - "epoch": 0.6470548363044805, - "grad_norm": 1.5666544851153166, - "learning_rate": 5.852024204627308e-07, - "loss": 1.1172, - "step": 4773 - }, - { - "epoch": 0.6471904019521453, - "grad_norm": 1.7726511091335129, - "learning_rate": 5.84802882858104e-07, - "loss": 1.1376, - "step": 4774 - }, - { - "epoch": 0.6473259675998102, - "grad_norm": 1.8837075199723314, - "learning_rate": 5.844034253209641e-07, - "loss": 1.1044, - "step": 4775 - }, - { - "epoch": 0.6474615332474751, - "grad_norm": 1.4140192018698483, - "learning_rate": 5.840040479283428e-07, - "loss": 1.1207, - "step": 4776 - }, - { - "epoch": 0.6475970988951399, - "grad_norm": 2.027173616106004, - "learning_rate": 5.836047507572575e-07, - "loss": 1.1225, - "step": 4777 - }, - { - "epoch": 0.6477326645428049, - "grad_norm": 1.560800280468979, - "learning_rate": 5.832055338847089e-07, - "loss": 1.1192, - "step": 4778 - }, - { - "epoch": 0.6478682301904697, - "grad_norm": 1.5163969393808416, - "learning_rate": 5.828063973876833e-07, - "loss": 1.1313, - "step": 4779 - }, - { - "epoch": 0.6480037958381346, - "grad_norm": 1.5937114658688518, - "learning_rate": 5.824073413431507e-07, - "loss": 1.1015, - "step": 4780 - }, - { - "epoch": 0.6481393614857995, - "grad_norm": 1.4884182323335657, - "learning_rate": 5.820083658280661e-07, - "loss": 1.1212, - "step": 4781 - }, - { - "epoch": 0.6482749271334644, - "grad_norm": 1.3985217282443203, - "learning_rate": 5.816094709193688e-07, - "loss": 1.1238, - "step": 4782 - }, - { - "epoch": 0.6484104927811293, - "grad_norm": 1.5505452058544897, - "learning_rate": 5.812106566939824e-07, - "loss": 1.1192, - "step": 4783 - }, - { - "epoch": 0.6485460584287941, - "grad_norm": 1.774965276246935, - "learning_rate": 5.808119232288151e-07, - "loss": 1.1294, - "step": 4784 - }, - { - "epoch": 0.648681624076459, - "grad_norm": 1.9032722818287082, - "learning_rate": 5.804132706007597e-07, - "loss": 1.1548, - "step": 4785 - }, - { - "epoch": 0.6488171897241239, - "grad_norm": 1.751331187769959, - "learning_rate": 5.800146988866927e-07, - "loss": 1.1353, - "step": 4786 - }, - { - "epoch": 0.6489527553717888, - "grad_norm": 1.482689811907791, - "learning_rate": 5.796162081634761e-07, - "loss": 1.1205, - "step": 4787 - }, - { - "epoch": 0.6490883210194537, - "grad_norm": 1.7631365838050608, - "learning_rate": 5.792177985079558e-07, - "loss": 1.0928, - "step": 4788 - }, - { - "epoch": 0.6492238866671185, - "grad_norm": 1.6598386292132472, - "learning_rate": 5.788194699969608e-07, - "loss": 1.1539, - "step": 4789 - }, - { - "epoch": 0.6493594523147834, - "grad_norm": 1.5260592144897889, - "learning_rate": 5.784212227073073e-07, - "loss": 1.1258, - "step": 4790 - }, - { - "epoch": 0.6494950179624484, - "grad_norm": 1.6598695501727219, - "learning_rate": 5.780230567157924e-07, - "loss": 1.1726, - "step": 4791 - }, - { - "epoch": 0.6496305836101132, - "grad_norm": 1.674905777860736, - "learning_rate": 5.776249720992009e-07, - "loss": 1.1137, - "step": 4792 - }, - { - "epoch": 0.6497661492577781, - "grad_norm": 1.645837848639911, - "learning_rate": 5.772269689342988e-07, - "loss": 1.1131, - "step": 4793 - }, - { - "epoch": 0.6499017149054429, - "grad_norm": 1.887975526385798, - "learning_rate": 5.768290472978392e-07, - "loss": 1.0866, - "step": 4794 - }, - { - "epoch": 0.6500372805531078, - "grad_norm": 1.834810884157617, - "learning_rate": 5.764312072665574e-07, - "loss": 1.1586, - "step": 4795 - }, - { - "epoch": 0.6501728462007728, - "grad_norm": 1.6201278155326666, - "learning_rate": 5.760334489171735e-07, - "loss": 1.1236, - "step": 4796 - }, - { - "epoch": 0.6503084118484376, - "grad_norm": 1.5993838670254967, - "learning_rate": 5.756357723263926e-07, - "loss": 1.1498, - "step": 4797 - }, - { - "epoch": 0.6504439774961025, - "grad_norm": 1.6927508746378985, - "learning_rate": 5.752381775709032e-07, - "loss": 1.1423, - "step": 4798 - }, - { - "epoch": 0.6505795431437674, - "grad_norm": 1.3839523761065653, - "learning_rate": 5.748406647273784e-07, - "loss": 1.1216, - "step": 4799 - }, - { - "epoch": 0.6507151087914322, - "grad_norm": 2.0003171969273565, - "learning_rate": 5.744432338724754e-07, - "loss": 1.1294, - "step": 4800 - }, - { - "epoch": 0.6508506744390972, - "grad_norm": 1.5449604132150574, - "learning_rate": 5.740458850828356e-07, - "loss": 1.1168, - "step": 4801 - }, - { - "epoch": 0.650986240086762, - "grad_norm": 1.9171615869878587, - "learning_rate": 5.736486184350846e-07, - "loss": 1.1667, - "step": 4802 - }, - { - "epoch": 0.6511218057344269, - "grad_norm": 1.9458848163108826, - "learning_rate": 5.732514340058321e-07, - "loss": 1.0991, - "step": 4803 - }, - { - "epoch": 0.6512573713820918, - "grad_norm": 1.840157615495904, - "learning_rate": 5.728543318716721e-07, - "loss": 1.1912, - "step": 4804 - }, - { - "epoch": 0.6513929370297566, - "grad_norm": 1.6110570960689108, - "learning_rate": 5.724573121091825e-07, - "loss": 1.1832, - "step": 4805 - }, - { - "epoch": 0.6515285026774216, - "grad_norm": 1.7562075168769882, - "learning_rate": 5.720603747949253e-07, - "loss": 1.1925, - "step": 4806 - }, - { - "epoch": 0.6516640683250864, - "grad_norm": 2.2906608286258017, - "learning_rate": 5.716635200054469e-07, - "loss": 1.1217, - "step": 4807 - }, - { - "epoch": 0.6517996339727513, - "grad_norm": 1.7656550550543244, - "learning_rate": 5.712667478172776e-07, - "loss": 1.1791, - "step": 4808 - }, - { - "epoch": 0.6519351996204162, - "grad_norm": 1.4147632488668616, - "learning_rate": 5.708700583069319e-07, - "loss": 1.1003, - "step": 4809 - }, - { - "epoch": 0.652070765268081, - "grad_norm": 1.6987489598107293, - "learning_rate": 5.704734515509085e-07, - "loss": 1.1472, - "step": 4810 - }, - { - "epoch": 0.652206330915746, - "grad_norm": 1.5394448023122558, - "learning_rate": 5.700769276256886e-07, - "loss": 1.1505, - "step": 4811 - }, - { - "epoch": 0.6523418965634108, - "grad_norm": 3.409492384711982, - "learning_rate": 5.696804866077404e-07, - "loss": 1.159, - "step": 4812 - }, - { - "epoch": 0.6524774622110757, - "grad_norm": 1.385971446420812, - "learning_rate": 5.692841285735128e-07, - "loss": 1.1361, - "step": 4813 - }, - { - "epoch": 0.6526130278587406, - "grad_norm": 1.6786071088656378, - "learning_rate": 5.68887853599442e-07, - "loss": 1.1433, - "step": 4814 - }, - { - "epoch": 0.6527485935064055, - "grad_norm": 1.9365883718795547, - "learning_rate": 5.684916617619453e-07, - "loss": 1.1428, - "step": 4815 - }, - { - "epoch": 0.6528841591540704, - "grad_norm": 1.5444935150035093, - "learning_rate": 5.680955531374255e-07, - "loss": 1.1516, - "step": 4816 - }, - { - "epoch": 0.6530197248017352, - "grad_norm": 1.6008589819816113, - "learning_rate": 5.676995278022688e-07, - "loss": 1.1516, - "step": 4817 - }, - { - "epoch": 0.6531552904494001, - "grad_norm": 1.6389993935419649, - "learning_rate": 5.67303585832846e-07, - "loss": 1.1274, - "step": 4818 - }, - { - "epoch": 0.653290856097065, - "grad_norm": 1.422135320040644, - "learning_rate": 5.669077273055111e-07, - "loss": 1.1394, - "step": 4819 - }, - { - "epoch": 0.6534264217447299, - "grad_norm": 1.578812663583261, - "learning_rate": 5.665119522966024e-07, - "loss": 1.1387, - "step": 4820 - }, - { - "epoch": 0.6535619873923948, - "grad_norm": 1.790432260707421, - "learning_rate": 5.661162608824419e-07, - "loss": 1.1066, - "step": 4821 - }, - { - "epoch": 0.6536975530400596, - "grad_norm": 1.7637080109842824, - "learning_rate": 5.657206531393358e-07, - "loss": 1.1121, - "step": 4822 - }, - { - "epoch": 0.6538331186877245, - "grad_norm": 1.5391488266255702, - "learning_rate": 5.653251291435735e-07, - "loss": 1.1005, - "step": 4823 - }, - { - "epoch": 0.6539686843353895, - "grad_norm": 1.5857314613039635, - "learning_rate": 5.64929688971429e-07, - "loss": 1.1517, - "step": 4824 - }, - { - "epoch": 0.6541042499830543, - "grad_norm": 1.491047819331301, - "learning_rate": 5.645343326991602e-07, - "loss": 1.1052, - "step": 4825 - }, - { - "epoch": 0.6542398156307192, - "grad_norm": 1.9852371405976332, - "learning_rate": 5.641390604030072e-07, - "loss": 1.1229, - "step": 4826 - }, - { - "epoch": 0.654375381278384, - "grad_norm": 1.8605379437968697, - "learning_rate": 5.637438721591967e-07, - "loss": 1.1583, - "step": 4827 - }, - { - "epoch": 0.6545109469260489, - "grad_norm": 1.7932505097639544, - "learning_rate": 5.633487680439361e-07, - "loss": 1.1571, - "step": 4828 - }, - { - "epoch": 0.6546465125737139, - "grad_norm": 2.9109495541394494, - "learning_rate": 5.629537481334195e-07, - "loss": 1.116, - "step": 4829 - }, - { - "epoch": 0.6547820782213787, - "grad_norm": 1.5386671113458654, - "learning_rate": 5.625588125038221e-07, - "loss": 1.1381, - "step": 4830 - }, - { - "epoch": 0.6549176438690436, - "grad_norm": 2.5497132010942174, - "learning_rate": 5.621639612313056e-07, - "loss": 1.1533, - "step": 4831 - }, - { - "epoch": 0.6550532095167084, - "grad_norm": 6.117626161625898, - "learning_rate": 5.617691943920122e-07, - "loss": 1.0929, - "step": 4832 - }, - { - "epoch": 0.6551887751643733, - "grad_norm": 1.4349254725269918, - "learning_rate": 5.613745120620712e-07, - "loss": 1.1402, - "step": 4833 - }, - { - "epoch": 0.6553243408120383, - "grad_norm": 1.840040398916793, - "learning_rate": 5.609799143175927e-07, - "loss": 1.0646, - "step": 4834 - }, - { - "epoch": 0.6554599064597031, - "grad_norm": 1.525388982804501, - "learning_rate": 5.605854012346729e-07, - "loss": 1.097, - "step": 4835 - }, - { - "epoch": 0.655595472107368, - "grad_norm": 1.651461390341418, - "learning_rate": 5.601909728893892e-07, - "loss": 1.0922, - "step": 4836 - }, - { - "epoch": 0.6557310377550328, - "grad_norm": 1.9081693106616995, - "learning_rate": 5.597966293578055e-07, - "loss": 1.156, - "step": 4837 - }, - { - "epoch": 0.6558666034026978, - "grad_norm": 1.4906267108249123, - "learning_rate": 5.594023707159668e-07, - "loss": 1.1506, - "step": 4838 - }, - { - "epoch": 0.6560021690503627, - "grad_norm": 3.0652479519767293, - "learning_rate": 5.590081970399028e-07, - "loss": 1.1207, - "step": 4839 - }, - { - "epoch": 0.6561377346980275, - "grad_norm": 1.4695469643269363, - "learning_rate": 5.586141084056273e-07, - "loss": 1.1546, - "step": 4840 - }, - { - "epoch": 0.6562733003456924, - "grad_norm": 1.525083153588519, - "learning_rate": 5.582201048891367e-07, - "loss": 1.1202, - "step": 4841 - }, - { - "epoch": 0.6564088659933572, - "grad_norm": 1.66487690647815, - "learning_rate": 5.578261865664118e-07, - "loss": 1.1176, - "step": 4842 - }, - { - "epoch": 0.6565444316410222, - "grad_norm": 1.6860839277146589, - "learning_rate": 5.574323535134164e-07, - "loss": 1.1405, - "step": 4843 - }, - { - "epoch": 0.6566799972886871, - "grad_norm": 1.6772305329557835, - "learning_rate": 5.570386058060983e-07, - "loss": 1.0947, - "step": 4844 - }, - { - "epoch": 0.6568155629363519, - "grad_norm": 1.9718045851503474, - "learning_rate": 5.566449435203886e-07, - "loss": 1.1031, - "step": 4845 - }, - { - "epoch": 0.6569511285840168, - "grad_norm": 1.944762874328042, - "learning_rate": 5.562513667322018e-07, - "loss": 1.1137, - "step": 4846 - }, - { - "epoch": 0.6570866942316816, - "grad_norm": 2.2689253463486545, - "learning_rate": 5.558578755174363e-07, - "loss": 1.1394, - "step": 4847 - }, - { - "epoch": 0.6572222598793466, - "grad_norm": 1.4700970496743098, - "learning_rate": 5.554644699519735e-07, - "loss": 1.1488, - "step": 4848 - }, - { - "epoch": 0.6573578255270115, - "grad_norm": 1.4240240327451372, - "learning_rate": 5.550711501116788e-07, - "loss": 1.1765, - "step": 4849 - }, - { - "epoch": 0.6574933911746763, - "grad_norm": 2.8113261174521074, - "learning_rate": 5.546779160724012e-07, - "loss": 1.0962, - "step": 4850 - }, - { - "epoch": 0.6576289568223412, - "grad_norm": 1.773310944450205, - "learning_rate": 5.542847679099715e-07, - "loss": 1.1177, - "step": 4851 - }, - { - "epoch": 0.657764522470006, - "grad_norm": 1.5935818266799504, - "learning_rate": 5.538917057002069e-07, - "loss": 1.1171, - "step": 4852 - }, - { - "epoch": 0.657900088117671, - "grad_norm": 1.6361928036485163, - "learning_rate": 5.534987295189049e-07, - "loss": 1.1315, - "step": 4853 - }, - { - "epoch": 0.6580356537653359, - "grad_norm": 1.8985550571482281, - "learning_rate": 5.531058394418487e-07, - "loss": 1.1232, - "step": 4854 - }, - { - "epoch": 0.6581712194130007, - "grad_norm": 1.4871525864752673, - "learning_rate": 5.527130355448035e-07, - "loss": 1.1453, - "step": 4855 - }, - { - "epoch": 0.6583067850606656, - "grad_norm": 2.0306342238415533, - "learning_rate": 5.523203179035189e-07, - "loss": 1.09, - "step": 4856 - }, - { - "epoch": 0.6584423507083305, - "grad_norm": 1.6225203436305788, - "learning_rate": 5.519276865937272e-07, - "loss": 1.1061, - "step": 4857 - }, - { - "epoch": 0.6585779163559954, - "grad_norm": 1.6151284288386374, - "learning_rate": 5.515351416911442e-07, - "loss": 1.1419, - "step": 4858 - }, - { - "epoch": 0.6587134820036603, - "grad_norm": 1.4982969677734927, - "learning_rate": 5.511426832714694e-07, - "loss": 1.172, - "step": 4859 - }, - { - "epoch": 0.6588490476513251, - "grad_norm": 1.6125121319532338, - "learning_rate": 5.507503114103849e-07, - "loss": 1.204, - "step": 4860 - }, - { - "epoch": 0.65898461329899, - "grad_norm": 1.5093757944427297, - "learning_rate": 5.503580261835566e-07, - "loss": 1.1243, - "step": 4861 - }, - { - "epoch": 0.6591201789466549, - "grad_norm": 1.4065380966713095, - "learning_rate": 5.499658276666338e-07, - "loss": 1.123, - "step": 4862 - }, - { - "epoch": 0.6592557445943198, - "grad_norm": 2.3337697425529362, - "learning_rate": 5.495737159352487e-07, - "loss": 1.0961, - "step": 4863 - }, - { - "epoch": 0.6593913102419847, - "grad_norm": 1.5450043311446877, - "learning_rate": 5.491816910650171e-07, - "loss": 1.1274, - "step": 4864 - }, - { - "epoch": 0.6595268758896495, - "grad_norm": 2.3195342632177205, - "learning_rate": 5.48789753131538e-07, - "loss": 1.1236, - "step": 4865 - }, - { - "epoch": 0.6596624415373145, - "grad_norm": 3.24530023226589, - "learning_rate": 5.483979022103935e-07, - "loss": 1.1242, - "step": 4866 - }, - { - "epoch": 0.6597980071849793, - "grad_norm": 1.39663365273393, - "learning_rate": 5.480061383771481e-07, - "loss": 1.1763, - "step": 4867 - }, - { - "epoch": 0.6599335728326442, - "grad_norm": 1.9855353776043534, - "learning_rate": 5.476144617073519e-07, - "loss": 1.1247, - "step": 4868 - }, - { - "epoch": 0.6600691384803091, - "grad_norm": 1.7559768207005786, - "learning_rate": 5.472228722765351e-07, - "loss": 1.1648, - "step": 4869 - }, - { - "epoch": 0.6602047041279739, - "grad_norm": 2.1316261776080414, - "learning_rate": 5.46831370160214e-07, - "loss": 1.1217, - "step": 4870 - }, - { - "epoch": 0.6603402697756389, - "grad_norm": 1.7414733024030613, - "learning_rate": 5.464399554338856e-07, - "loss": 1.1025, - "step": 4871 - }, - { - "epoch": 0.6604758354233037, - "grad_norm": 1.473699092558706, - "learning_rate": 5.460486281730322e-07, - "loss": 1.1396, - "step": 4872 - }, - { - "epoch": 0.6606114010709686, - "grad_norm": 1.691059413651753, - "learning_rate": 5.456573884531168e-07, - "loss": 1.154, - "step": 4873 - }, - { - "epoch": 0.6607469667186335, - "grad_norm": 1.963653470652611, - "learning_rate": 5.452662363495884e-07, - "loss": 1.1557, - "step": 4874 - }, - { - "epoch": 0.6608825323662983, - "grad_norm": 1.5569111058000522, - "learning_rate": 5.448751719378762e-07, - "loss": 1.1109, - "step": 4875 - }, - { - "epoch": 0.6610180980139633, - "grad_norm": 1.7733760879529628, - "learning_rate": 5.444841952933953e-07, - "loss": 1.1567, - "step": 4876 - }, - { - "epoch": 0.6611536636616281, - "grad_norm": 1.5452544358024716, - "learning_rate": 5.440933064915413e-07, - "loss": 1.1167, - "step": 4877 - }, - { - "epoch": 0.661289229309293, - "grad_norm": 1.5477538676060125, - "learning_rate": 5.437025056076945e-07, - "loss": 1.1577, - "step": 4878 - }, - { - "epoch": 0.6614247949569579, - "grad_norm": 2.182703681354459, - "learning_rate": 5.433117927172176e-07, - "loss": 1.1226, - "step": 4879 - }, - { - "epoch": 0.6615603606046228, - "grad_norm": 1.9689939617306953, - "learning_rate": 5.429211678954566e-07, - "loss": 1.1316, - "step": 4880 - }, - { - "epoch": 0.6616959262522877, - "grad_norm": 1.5725579169508583, - "learning_rate": 5.425306312177404e-07, - "loss": 1.1293, - "step": 4881 - }, - { - "epoch": 0.6618314918999526, - "grad_norm": 1.6614086000587205, - "learning_rate": 5.421401827593812e-07, - "loss": 1.1635, - "step": 4882 - }, - { - "epoch": 0.6619670575476174, - "grad_norm": 1.4894274184387255, - "learning_rate": 5.417498225956734e-07, - "loss": 1.1633, - "step": 4883 - }, - { - "epoch": 0.6621026231952823, - "grad_norm": 2.046543305345037, - "learning_rate": 5.413595508018951e-07, - "loss": 1.1248, - "step": 4884 - }, - { - "epoch": 0.6622381888429472, - "grad_norm": 1.5149373726900681, - "learning_rate": 5.409693674533071e-07, - "loss": 1.163, - "step": 4885 - }, - { - "epoch": 0.6623737544906121, - "grad_norm": 2.0872600392110185, - "learning_rate": 5.405792726251532e-07, - "loss": 1.1295, - "step": 4886 - }, - { - "epoch": 0.662509320138277, - "grad_norm": 1.5495095681961184, - "learning_rate": 5.401892663926606e-07, - "loss": 1.1192, - "step": 4887 - }, - { - "epoch": 0.6626448857859418, - "grad_norm": 1.63120116165608, - "learning_rate": 5.397993488310378e-07, - "loss": 1.1671, - "step": 4888 - }, - { - "epoch": 0.6627804514336068, - "grad_norm": 1.5985680306594543, - "learning_rate": 5.394095200154786e-07, - "loss": 1.0818, - "step": 4889 - }, - { - "epoch": 0.6629160170812716, - "grad_norm": 1.547326956425439, - "learning_rate": 5.39019780021157e-07, - "loss": 1.1516, - "step": 4890 - }, - { - "epoch": 0.6630515827289365, - "grad_norm": 2.7125985141999402, - "learning_rate": 5.386301289232329e-07, - "loss": 1.1143, - "step": 4891 - }, - { - "epoch": 0.6631871483766014, - "grad_norm": 1.5602338878808106, - "learning_rate": 5.382405667968457e-07, - "loss": 1.0793, - "step": 4892 - }, - { - "epoch": 0.6633227140242662, - "grad_norm": 1.5499172130164718, - "learning_rate": 5.378510937171212e-07, - "loss": 1.1304, - "step": 4893 - }, - { - "epoch": 0.6634582796719312, - "grad_norm": 1.578329287390987, - "learning_rate": 5.37461709759165e-07, - "loss": 1.1404, - "step": 4894 - }, - { - "epoch": 0.663593845319596, - "grad_norm": 1.776867286731425, - "learning_rate": 5.370724149980668e-07, - "loss": 1.1079, - "step": 4895 - }, - { - "epoch": 0.6637294109672609, - "grad_norm": 7.07356222666573, - "learning_rate": 5.366832095088994e-07, - "loss": 1.1247, - "step": 4896 - }, - { - "epoch": 0.6638649766149258, - "grad_norm": 1.5821058266035843, - "learning_rate": 5.362940933667177e-07, - "loss": 1.1164, - "step": 4897 - }, - { - "epoch": 0.6640005422625906, - "grad_norm": 2.1923072693357417, - "learning_rate": 5.359050666465599e-07, - "loss": 1.1328, - "step": 4898 - }, - { - "epoch": 0.6641361079102556, - "grad_norm": 1.7945749701620397, - "learning_rate": 5.355161294234465e-07, - "loss": 1.1344, - "step": 4899 - }, - { - "epoch": 0.6642716735579204, - "grad_norm": 1.8130187936298026, - "learning_rate": 5.351272817723813e-07, - "loss": 1.1481, - "step": 4900 - }, - { - "epoch": 0.6644072392055853, - "grad_norm": 1.716975009757803, - "learning_rate": 5.347385237683504e-07, - "loss": 1.1433, - "step": 4901 - }, - { - "epoch": 0.6645428048532502, - "grad_norm": 1.8397952012077903, - "learning_rate": 5.343498554863225e-07, - "loss": 1.1376, - "step": 4902 - }, - { - "epoch": 0.664678370500915, - "grad_norm": 1.5560436072961044, - "learning_rate": 5.339612770012494e-07, - "loss": 1.1604, - "step": 4903 - }, - { - "epoch": 0.66481393614858, - "grad_norm": 1.473404711358916, - "learning_rate": 5.335727883880654e-07, - "loss": 1.1204, - "step": 4904 - }, - { - "epoch": 0.6649495017962448, - "grad_norm": 1.6837577652231523, - "learning_rate": 5.331843897216873e-07, - "loss": 1.1246, - "step": 4905 - }, - { - "epoch": 0.6650850674439097, - "grad_norm": 1.7003248053624211, - "learning_rate": 5.327960810770149e-07, - "loss": 1.0902, - "step": 4906 - }, - { - "epoch": 0.6652206330915746, - "grad_norm": 1.4544231159780867, - "learning_rate": 5.324078625289304e-07, - "loss": 1.1519, - "step": 4907 - }, - { - "epoch": 0.6653561987392395, - "grad_norm": 1.5815478445414703, - "learning_rate": 5.320197341522985e-07, - "loss": 1.1813, - "step": 4908 - }, - { - "epoch": 0.6654917643869044, - "grad_norm": 4.73791278421319, - "learning_rate": 5.316316960219673e-07, - "loss": 1.1214, - "step": 4909 - }, - { - "epoch": 0.6656273300345692, - "grad_norm": 1.5073378586420585, - "learning_rate": 5.312437482127659e-07, - "loss": 1.1605, - "step": 4910 - }, - { - "epoch": 0.6657628956822341, - "grad_norm": 1.4753460925312933, - "learning_rate": 5.30855890799508e-07, - "loss": 1.0966, - "step": 4911 - }, - { - "epoch": 0.665898461329899, - "grad_norm": 1.5866062493114987, - "learning_rate": 5.304681238569877e-07, - "loss": 1.1329, - "step": 4912 - }, - { - "epoch": 0.6660340269775639, - "grad_norm": 2.858366448986451, - "learning_rate": 5.300804474599842e-07, - "loss": 1.1459, - "step": 4913 - }, - { - "epoch": 0.6661695926252288, - "grad_norm": 1.8130524009981435, - "learning_rate": 5.296928616832568e-07, - "loss": 1.1098, - "step": 4914 - }, - { - "epoch": 0.6663051582728936, - "grad_norm": 2.1361444318176317, - "learning_rate": 5.293053666015485e-07, - "loss": 1.1651, - "step": 4915 - }, - { - "epoch": 0.6664407239205585, - "grad_norm": 1.666276228266202, - "learning_rate": 5.28917962289585e-07, - "loss": 1.111, - "step": 4916 - }, - { - "epoch": 0.6665762895682235, - "grad_norm": 1.9111815769145295, - "learning_rate": 5.28530648822074e-07, - "loss": 1.079, - "step": 4917 - }, - { - "epoch": 0.6667118552158883, - "grad_norm": 1.7978023797790923, - "learning_rate": 5.281434262737056e-07, - "loss": 1.159, - "step": 4918 - }, - { - "epoch": 0.6668474208635532, - "grad_norm": 1.7604711589371427, - "learning_rate": 5.277562947191529e-07, - "loss": 1.1245, - "step": 4919 - }, - { - "epoch": 0.666982986511218, - "grad_norm": 2.2046924113091975, - "learning_rate": 5.273692542330713e-07, - "loss": 1.1517, - "step": 4920 - }, - { - "epoch": 0.6671185521588829, - "grad_norm": 1.4505991656566142, - "learning_rate": 5.269823048900981e-07, - "loss": 1.1761, - "step": 4921 - }, - { - "epoch": 0.6672541178065479, - "grad_norm": 1.5723252503022231, - "learning_rate": 5.265954467648539e-07, - "loss": 1.1094, - "step": 4922 - }, - { - "epoch": 0.6673896834542127, - "grad_norm": 1.5739384728577925, - "learning_rate": 5.262086799319405e-07, - "loss": 1.1444, - "step": 4923 - }, - { - "epoch": 0.6675252491018776, - "grad_norm": 1.50472044754599, - "learning_rate": 5.258220044659438e-07, - "loss": 1.1235, - "step": 4924 - }, - { - "epoch": 0.6676608147495424, - "grad_norm": 1.7357482128233432, - "learning_rate": 5.2543542044143e-07, - "loss": 1.1078, - "step": 4925 - }, - { - "epoch": 0.6677963803972073, - "grad_norm": 1.8326018079334034, - "learning_rate": 5.2504892793295e-07, - "loss": 1.132, - "step": 4926 - }, - { - "epoch": 0.6679319460448723, - "grad_norm": 1.5611745200190938, - "learning_rate": 5.246625270150346e-07, - "loss": 1.1204, - "step": 4927 - }, - { - "epoch": 0.6680675116925371, - "grad_norm": 2.5191713115782095, - "learning_rate": 5.242762177621994e-07, - "loss": 1.188, - "step": 4928 - }, - { - "epoch": 0.668203077340202, - "grad_norm": 1.8831786794947099, - "learning_rate": 5.238900002489398e-07, - "loss": 1.1071, - "step": 4929 - }, - { - "epoch": 0.6683386429878668, - "grad_norm": 1.624339115395409, - "learning_rate": 5.235038745497363e-07, - "loss": 1.0947, - "step": 4930 - }, - { - "epoch": 0.6684742086355318, - "grad_norm": 1.5950439673565042, - "learning_rate": 5.231178407390484e-07, - "loss": 1.1275, - "step": 4931 - }, - { - "epoch": 0.6686097742831967, - "grad_norm": 1.8061238904627441, - "learning_rate": 5.227318988913216e-07, - "loss": 1.1267, - "step": 4932 - }, - { - "epoch": 0.6687453399308615, - "grad_norm": 1.480704036521496, - "learning_rate": 5.223460490809799e-07, - "loss": 1.1119, - "step": 4933 - }, - { - "epoch": 0.6688809055785264, - "grad_norm": 1.7133727492552788, - "learning_rate": 5.21960291382433e-07, - "loss": 1.1857, - "step": 4934 - }, - { - "epoch": 0.6690164712261912, - "grad_norm": 1.541130165019227, - "learning_rate": 5.215746258700698e-07, - "loss": 1.1089, - "step": 4935 - }, - { - "epoch": 0.6691520368738562, - "grad_norm": 1.5732731753014844, - "learning_rate": 5.211890526182642e-07, - "loss": 1.122, - "step": 4936 - }, - { - "epoch": 0.6692876025215211, - "grad_norm": 1.4119339177426045, - "learning_rate": 5.208035717013702e-07, - "loss": 1.1287, - "step": 4937 - }, - { - "epoch": 0.6694231681691859, - "grad_norm": 2.0549599581611164, - "learning_rate": 5.204181831937245e-07, - "loss": 1.1744, - "step": 4938 - }, - { - "epoch": 0.6695587338168508, - "grad_norm": 1.8005072196363703, - "learning_rate": 5.200328871696468e-07, - "loss": 1.1711, - "step": 4939 - }, - { - "epoch": 0.6696942994645156, - "grad_norm": 1.632659057878715, - "learning_rate": 5.19647683703438e-07, - "loss": 1.1384, - "step": 4940 - }, - { - "epoch": 0.6698298651121806, - "grad_norm": 2.2267577818323203, - "learning_rate": 5.192625728693819e-07, - "loss": 1.1461, - "step": 4941 - }, - { - "epoch": 0.6699654307598455, - "grad_norm": 1.8371558032101727, - "learning_rate": 5.188775547417439e-07, - "loss": 1.1431, - "step": 4942 - }, - { - "epoch": 0.6701009964075103, - "grad_norm": 2.3756173095374282, - "learning_rate": 5.184926293947716e-07, - "loss": 1.1705, - "step": 4943 - }, - { - "epoch": 0.6702365620551752, - "grad_norm": 2.2876009572906764, - "learning_rate": 5.181077969026951e-07, - "loss": 1.1258, - "step": 4944 - }, - { - "epoch": 0.67037212770284, - "grad_norm": 1.6043297028300725, - "learning_rate": 5.17723057339726e-07, - "loss": 1.1264, - "step": 4945 - }, - { - "epoch": 0.670507693350505, - "grad_norm": 3.0171732411674648, - "learning_rate": 5.173384107800585e-07, - "loss": 1.1354, - "step": 4946 - }, - { - "epoch": 0.6706432589981699, - "grad_norm": 1.5744043188639119, - "learning_rate": 5.169538572978684e-07, - "loss": 1.135, - "step": 4947 - }, - { - "epoch": 0.6707788246458347, - "grad_norm": 1.4175648420777738, - "learning_rate": 5.165693969673142e-07, - "loss": 1.1166, - "step": 4948 - }, - { - "epoch": 0.6709143902934996, - "grad_norm": 2.0736191441389367, - "learning_rate": 5.161850298625362e-07, - "loss": 1.0794, - "step": 4949 - }, - { - "epoch": 0.6710499559411645, - "grad_norm": 1.9750548096126308, - "learning_rate": 5.158007560576557e-07, - "loss": 1.136, - "step": 4950 - }, - { - "epoch": 0.6711855215888294, - "grad_norm": 1.7781073786350896, - "learning_rate": 5.154165756267774e-07, - "loss": 1.1729, - "step": 4951 - }, - { - "epoch": 0.6713210872364943, - "grad_norm": 1.6835518335811073, - "learning_rate": 5.150324886439874e-07, - "loss": 1.135, - "step": 4952 - }, - { - "epoch": 0.6714566528841591, - "grad_norm": 1.8107074866187132, - "learning_rate": 5.14648495183354e-07, - "loss": 1.1366, - "step": 4953 - }, - { - "epoch": 0.671592218531824, - "grad_norm": 1.4952984933271551, - "learning_rate": 5.142645953189271e-07, - "loss": 1.1236, - "step": 4954 - }, - { - "epoch": 0.6717277841794889, - "grad_norm": 1.9371337709341832, - "learning_rate": 5.138807891247388e-07, - "loss": 1.1417, - "step": 4955 - }, - { - "epoch": 0.6718633498271538, - "grad_norm": 5.6258013018276465, - "learning_rate": 5.13497076674803e-07, - "loss": 1.138, - "step": 4956 - }, - { - "epoch": 0.6719989154748187, - "grad_norm": 1.4379189315279797, - "learning_rate": 5.13113458043116e-07, - "loss": 1.1268, - "step": 4957 - }, - { - "epoch": 0.6721344811224835, - "grad_norm": 2.0713250268479495, - "learning_rate": 5.127299333036552e-07, - "loss": 1.1235, - "step": 4958 - }, - { - "epoch": 0.6722700467701485, - "grad_norm": 1.9608845164061142, - "learning_rate": 5.123465025303804e-07, - "loss": 1.1277, - "step": 4959 - }, - { - "epoch": 0.6724056124178134, - "grad_norm": 1.5553016074626629, - "learning_rate": 5.119631657972334e-07, - "loss": 1.0877, - "step": 4960 - }, - { - "epoch": 0.6725411780654782, - "grad_norm": 1.9863658037818084, - "learning_rate": 5.115799231781377e-07, - "loss": 1.1134, - "step": 4961 - }, - { - "epoch": 0.6726767437131431, - "grad_norm": 1.5175915489188927, - "learning_rate": 5.111967747469983e-07, - "loss": 1.1386, - "step": 4962 - }, - { - "epoch": 0.6728123093608079, - "grad_norm": 1.585868289908071, - "learning_rate": 5.108137205777026e-07, - "loss": 1.1365, - "step": 4963 - }, - { - "epoch": 0.6729478750084729, - "grad_norm": 1.8326415030101058, - "learning_rate": 5.104307607441193e-07, - "loss": 1.1182, - "step": 4964 - }, - { - "epoch": 0.6730834406561378, - "grad_norm": 1.4495713023596106, - "learning_rate": 5.100478953200999e-07, - "loss": 1.0752, - "step": 4965 - }, - { - "epoch": 0.6732190063038026, - "grad_norm": 1.9727004799061012, - "learning_rate": 5.096651243794756e-07, - "loss": 1.1024, - "step": 4966 - }, - { - "epoch": 0.6733545719514675, - "grad_norm": 1.6749910471147682, - "learning_rate": 5.092824479960625e-07, - "loss": 1.089, - "step": 4967 - }, - { - "epoch": 0.6734901375991323, - "grad_norm": 1.469564032873871, - "learning_rate": 5.088998662436548e-07, - "loss": 1.0934, - "step": 4968 - }, - { - "epoch": 0.6736257032467973, - "grad_norm": 6.342404211432201, - "learning_rate": 5.085173791960324e-07, - "loss": 1.0755, - "step": 4969 - }, - { - "epoch": 0.6737612688944622, - "grad_norm": 2.0620273549095147, - "learning_rate": 5.081349869269529e-07, - "loss": 1.1134, - "step": 4970 - }, - { - "epoch": 0.673896834542127, - "grad_norm": 1.4867427888242264, - "learning_rate": 5.077526895101596e-07, - "loss": 1.0899, - "step": 4971 - }, - { - "epoch": 0.6740324001897919, - "grad_norm": 4.598567487102877, - "learning_rate": 5.073704870193736e-07, - "loss": 1.0901, - "step": 4972 - }, - { - "epoch": 0.6741679658374568, - "grad_norm": 1.5937081704067992, - "learning_rate": 5.069883795283015e-07, - "loss": 1.1303, - "step": 4973 - }, - { - "epoch": 0.6743035314851217, - "grad_norm": 1.866566661943804, - "learning_rate": 5.066063671106281e-07, - "loss": 1.1403, - "step": 4974 - }, - { - "epoch": 0.6744390971327866, - "grad_norm": 2.0487427252542334, - "learning_rate": 5.062244498400228e-07, - "loss": 1.11, - "step": 4975 - }, - { - "epoch": 0.6745746627804514, - "grad_norm": 1.6636100433612249, - "learning_rate": 5.058426277901344e-07, - "loss": 1.1826, - "step": 4976 - }, - { - "epoch": 0.6747102284281163, - "grad_norm": 1.488384521401093, - "learning_rate": 5.054609010345947e-07, - "loss": 1.133, - "step": 4977 - }, - { - "epoch": 0.6748457940757812, - "grad_norm": 1.8084519697168833, - "learning_rate": 5.050792696470165e-07, - "loss": 1.1727, - "step": 4978 - }, - { - "epoch": 0.6749813597234461, - "grad_norm": 1.6474575645592657, - "learning_rate": 5.046977337009945e-07, - "loss": 1.0928, - "step": 4979 - }, - { - "epoch": 0.675116925371111, - "grad_norm": 2.0371243281747575, - "learning_rate": 5.043162932701048e-07, - "loss": 1.1291, - "step": 4980 - }, - { - "epoch": 0.6752524910187758, - "grad_norm": 1.5694867285406233, - "learning_rate": 5.039349484279053e-07, - "loss": 1.1316, - "step": 4981 - }, - { - "epoch": 0.6753880566664408, - "grad_norm": 1.6258280346986795, - "learning_rate": 5.035536992479352e-07, - "loss": 1.1382, - "step": 4982 - }, - { - "epoch": 0.6755236223141056, - "grad_norm": 1.8222504119619058, - "learning_rate": 5.031725458037157e-07, - "loss": 1.1334, - "step": 4983 - }, - { - "epoch": 0.6756591879617705, - "grad_norm": 5.453570530041879, - "learning_rate": 5.027914881687489e-07, - "loss": 1.1073, - "step": 4984 - }, - { - "epoch": 0.6757947536094354, - "grad_norm": 1.5692625113883847, - "learning_rate": 5.024105264165188e-07, - "loss": 1.1238, - "step": 4985 - }, - { - "epoch": 0.6759303192571002, - "grad_norm": 2.297705205606987, - "learning_rate": 5.020296606204915e-07, - "loss": 1.1403, - "step": 4986 - }, - { - "epoch": 0.6760658849047652, - "grad_norm": 1.700488453724067, - "learning_rate": 5.016488908541125e-07, - "loss": 1.1082, - "step": 4987 - }, - { - "epoch": 0.67620145055243, - "grad_norm": 1.5776329195978689, - "learning_rate": 5.01268217190812e-07, - "loss": 1.1208, - "step": 4988 - }, - { - "epoch": 0.6763370162000949, - "grad_norm": 4.587535454487696, - "learning_rate": 5.008876397039983e-07, - "loss": 1.1549, - "step": 4989 - }, - { - "epoch": 0.6764725818477598, - "grad_norm": 1.6405808091828626, - "learning_rate": 5.005071584670644e-07, - "loss": 1.1318, - "step": 4990 - }, - { - "epoch": 0.6766081474954246, - "grad_norm": 1.989999847994308, - "learning_rate": 5.001267735533811e-07, - "loss": 1.1369, - "step": 4991 - }, - { - "epoch": 0.6767437131430896, - "grad_norm": 1.4788568410602503, - "learning_rate": 4.997464850363049e-07, - "loss": 1.1577, - "step": 4992 - }, - { - "epoch": 0.6768792787907544, - "grad_norm": 1.5467800909427947, - "learning_rate": 4.993662929891698e-07, - "loss": 1.1846, - "step": 4993 - }, - { - "epoch": 0.6770148444384193, - "grad_norm": 1.8774359882506313, - "learning_rate": 4.989861974852934e-07, - "loss": 1.1405, - "step": 4994 - }, - { - "epoch": 0.6771504100860842, - "grad_norm": 2.01882573585862, - "learning_rate": 4.986061985979739e-07, - "loss": 1.1425, - "step": 4995 - }, - { - "epoch": 0.677285975733749, - "grad_norm": 1.648455401956844, - "learning_rate": 4.982262964004913e-07, - "loss": 1.0915, - "step": 4996 - }, - { - "epoch": 0.677421541381414, - "grad_norm": 1.6548294472028942, - "learning_rate": 4.978464909661067e-07, - "loss": 1.1051, - "step": 4997 - }, - { - "epoch": 0.6775571070290788, - "grad_norm": 2.544847698198007, - "learning_rate": 4.974667823680626e-07, - "loss": 1.1694, - "step": 4998 - }, - { - "epoch": 0.6776926726767437, - "grad_norm": 14.242079179833981, - "learning_rate": 4.970871706795827e-07, - "loss": 1.121, - "step": 4999 - }, - { - "epoch": 0.6778282383244086, - "grad_norm": 1.8892950052883828, - "learning_rate": 4.967076559738722e-07, - "loss": 1.1463, - "step": 5000 - }, - { - "epoch": 0.6779638039720735, - "grad_norm": 1.6879212454529837, - "learning_rate": 4.963282383241175e-07, - "loss": 1.1285, - "step": 5001 - }, - { - "epoch": 0.6780993696197384, - "grad_norm": 1.4369508277170684, - "learning_rate": 4.959489178034863e-07, - "loss": 1.1244, - "step": 5002 - }, - { - "epoch": 0.6782349352674032, - "grad_norm": 1.387495776319263, - "learning_rate": 4.955696944851276e-07, - "loss": 1.1152, - "step": 5003 - }, - { - "epoch": 0.6783705009150681, - "grad_norm": 1.5234262046566502, - "learning_rate": 4.951905684421716e-07, - "loss": 1.1578, - "step": 5004 - }, - { - "epoch": 0.678506066562733, - "grad_norm": 2.5900619591174063, - "learning_rate": 4.948115397477296e-07, - "loss": 1.1361, - "step": 5005 - }, - { - "epoch": 0.6786416322103979, - "grad_norm": 2.3919751395177715, - "learning_rate": 4.94432608474895e-07, - "loss": 1.1352, - "step": 5006 - }, - { - "epoch": 0.6787771978580628, - "grad_norm": 1.491447483398711, - "learning_rate": 4.940537746967403e-07, - "loss": 1.1206, - "step": 5007 - }, - { - "epoch": 0.6789127635057276, - "grad_norm": 2.499854131013477, - "learning_rate": 4.936750384863222e-07, - "loss": 1.107, - "step": 5008 - }, - { - "epoch": 0.6790483291533925, - "grad_norm": 1.573972456719156, - "learning_rate": 4.932963999166755e-07, - "loss": 1.146, - "step": 5009 - }, - { - "epoch": 0.6791838948010575, - "grad_norm": 7.060728208549765, - "learning_rate": 4.929178590608191e-07, - "loss": 1.152, - "step": 5010 - }, - { - "epoch": 0.6793194604487223, - "grad_norm": 1.709316596950634, - "learning_rate": 4.925394159917506e-07, - "loss": 1.104, - "step": 5011 - }, - { - "epoch": 0.6794550260963872, - "grad_norm": 2.313662758453731, - "learning_rate": 4.921610707824501e-07, - "loss": 1.1593, - "step": 5012 - }, - { - "epoch": 0.679590591744052, - "grad_norm": 1.7341284306983658, - "learning_rate": 4.917828235058785e-07, - "loss": 1.1525, - "step": 5013 - }, - { - "epoch": 0.6797261573917169, - "grad_norm": 1.6154296548619989, - "learning_rate": 4.914046742349777e-07, - "loss": 1.1105, - "step": 5014 - }, - { - "epoch": 0.6798617230393819, - "grad_norm": 2.2993263415272707, - "learning_rate": 4.910266230426708e-07, - "loss": 1.1239, - "step": 5015 - }, - { - "epoch": 0.6799972886870467, - "grad_norm": 1.5117160004057655, - "learning_rate": 4.906486700018622e-07, - "loss": 1.144, - "step": 5016 - }, - { - "epoch": 0.6801328543347116, - "grad_norm": 1.89462920924638, - "learning_rate": 4.90270815185437e-07, - "loss": 1.1482, - "step": 5017 - }, - { - "epoch": 0.6802684199823764, - "grad_norm": 1.4783794877219154, - "learning_rate": 4.898930586662614e-07, - "loss": 1.1256, - "step": 5018 - }, - { - "epoch": 0.6804039856300413, - "grad_norm": 1.8689579490955446, - "learning_rate": 4.89515400517183e-07, - "loss": 1.1333, - "step": 5019 - }, - { - "epoch": 0.6805395512777063, - "grad_norm": 1.7879082427758457, - "learning_rate": 4.891378408110301e-07, - "loss": 1.117, - "step": 5020 - }, - { - "epoch": 0.6806751169253711, - "grad_norm": 2.1126618076518273, - "learning_rate": 4.887603796206124e-07, - "loss": 1.1304, - "step": 5021 - }, - { - "epoch": 0.680810682573036, - "grad_norm": 1.733785098331935, - "learning_rate": 4.883830170187193e-07, - "loss": 1.1196, - "step": 5022 - }, - { - "epoch": 0.6809462482207008, - "grad_norm": 1.523921206359258, - "learning_rate": 4.880057530781237e-07, - "loss": 1.1375, - "step": 5023 - }, - { - "epoch": 0.6810818138683657, - "grad_norm": 1.7105298581218666, - "learning_rate": 4.876285878715763e-07, - "loss": 1.0992, - "step": 5024 - }, - { - "epoch": 0.6812173795160307, - "grad_norm": 1.927293677911524, - "learning_rate": 4.872515214718123e-07, - "loss": 1.1822, - "step": 5025 - }, - { - "epoch": 0.6813529451636955, - "grad_norm": 1.5267178977235765, - "learning_rate": 4.86874553951544e-07, - "loss": 1.1043, - "step": 5026 - }, - { - "epoch": 0.6814885108113604, - "grad_norm": 1.8709934643226926, - "learning_rate": 4.864976853834684e-07, - "loss": 1.1251, - "step": 5027 - }, - { - "epoch": 0.6816240764590252, - "grad_norm": 1.5878407763071387, - "learning_rate": 4.861209158402601e-07, - "loss": 1.1201, - "step": 5028 - }, - { - "epoch": 0.6817596421066902, - "grad_norm": 1.5137489418216632, - "learning_rate": 4.857442453945779e-07, - "loss": 1.1022, - "step": 5029 - }, - { - "epoch": 0.6818952077543551, - "grad_norm": 1.5158565706772829, - "learning_rate": 4.853676741190576e-07, - "loss": 1.114, - "step": 5030 - }, - { - "epoch": 0.6820307734020199, - "grad_norm": 1.4333889196114566, - "learning_rate": 4.849912020863198e-07, - "loss": 1.1566, - "step": 5031 - }, - { - "epoch": 0.6821663390496848, - "grad_norm": 1.614346978124686, - "learning_rate": 4.846148293689629e-07, - "loss": 1.1195, - "step": 5032 - }, - { - "epoch": 0.6823019046973496, - "grad_norm": 1.9958076213266087, - "learning_rate": 4.842385560395687e-07, - "loss": 1.1226, - "step": 5033 - }, - { - "epoch": 0.6824374703450146, - "grad_norm": 1.7569109844815372, - "learning_rate": 4.838623821706973e-07, - "loss": 1.1166, - "step": 5034 - }, - { - "epoch": 0.6825730359926795, - "grad_norm": 1.8302412138297257, - "learning_rate": 4.834863078348915e-07, - "loss": 1.1698, - "step": 5035 - }, - { - "epoch": 0.6827086016403443, - "grad_norm": 1.7437547676890182, - "learning_rate": 4.831103331046739e-07, - "loss": 1.1335, - "step": 5036 - }, - { - "epoch": 0.6828441672880092, - "grad_norm": 1.6852946364260437, - "learning_rate": 4.827344580525487e-07, - "loss": 1.1036, - "step": 5037 - }, - { - "epoch": 0.6829797329356742, - "grad_norm": 2.3816224138841084, - "learning_rate": 4.82358682751e-07, - "loss": 1.1026, - "step": 5038 - }, - { - "epoch": 0.683115298583339, - "grad_norm": 2.4570264849093713, - "learning_rate": 4.819830072724934e-07, - "loss": 1.1477, - "step": 5039 - }, - { - "epoch": 0.6832508642310039, - "grad_norm": 1.8938029011784432, - "learning_rate": 4.816074316894749e-07, - "loss": 1.1439, - "step": 5040 - }, - { - "epoch": 0.6833864298786687, - "grad_norm": 2.629711618674366, - "learning_rate": 4.812319560743713e-07, - "loss": 1.1714, - "step": 5041 - }, - { - "epoch": 0.6835219955263336, - "grad_norm": 1.7319043354465704, - "learning_rate": 4.8085658049959e-07, - "loss": 1.1069, - "step": 5042 - }, - { - "epoch": 0.6836575611739986, - "grad_norm": 1.7199438437713077, - "learning_rate": 4.804813050375194e-07, - "loss": 1.0981, - "step": 5043 - }, - { - "epoch": 0.6837931268216634, - "grad_norm": 2.3801132509817937, - "learning_rate": 4.801061297605282e-07, - "loss": 1.1271, - "step": 5044 - }, - { - "epoch": 0.6839286924693283, - "grad_norm": 1.4373562282067949, - "learning_rate": 4.797310547409661e-07, - "loss": 1.1011, - "step": 5045 - }, - { - "epoch": 0.6840642581169931, - "grad_norm": 1.4093543606547574, - "learning_rate": 4.793560800511634e-07, - "loss": 1.1363, - "step": 5046 - }, - { - "epoch": 0.684199823764658, - "grad_norm": 2.6787194991569585, - "learning_rate": 4.789812057634308e-07, - "loss": 1.1794, - "step": 5047 - }, - { - "epoch": 0.684335389412323, - "grad_norm": 1.7198131040542501, - "learning_rate": 4.786064319500604e-07, - "loss": 1.1294, - "step": 5048 - }, - { - "epoch": 0.6844709550599878, - "grad_norm": 1.6875094785507658, - "learning_rate": 4.782317586833236e-07, - "loss": 1.1117, - "step": 5049 - }, - { - "epoch": 0.6846065207076527, - "grad_norm": 1.9852535945707153, - "learning_rate": 4.778571860354737e-07, - "loss": 1.1318, - "step": 5050 - }, - { - "epoch": 0.6847420863553175, - "grad_norm": 1.7652776558460812, - "learning_rate": 4.774827140787437e-07, - "loss": 1.1446, - "step": 5051 - }, - { - "epoch": 0.6848776520029825, - "grad_norm": 1.7592656551920434, - "learning_rate": 4.77108342885348e-07, - "loss": 1.1332, - "step": 5052 - }, - { - "epoch": 0.6850132176506474, - "grad_norm": 1.7365095443821854, - "learning_rate": 4.767340725274809e-07, - "loss": 1.1109, - "step": 5053 - }, - { - "epoch": 0.6851487832983122, - "grad_norm": 1.5362023092957744, - "learning_rate": 4.763599030773173e-07, - "loss": 1.1521, - "step": 5054 - }, - { - "epoch": 0.6852843489459771, - "grad_norm": 1.5644655130046778, - "learning_rate": 4.7598583460701324e-07, - "loss": 1.1227, - "step": 5055 - }, - { - "epoch": 0.6854199145936419, - "grad_norm": 1.6267093382810747, - "learning_rate": 4.756118671887046e-07, - "loss": 1.1139, - "step": 5056 - }, - { - "epoch": 0.6855554802413069, - "grad_norm": 1.640278609310388, - "learning_rate": 4.7523800089450804e-07, - "loss": 1.1296, - "step": 5057 - }, - { - "epoch": 0.6856910458889718, - "grad_norm": 2.242040638782064, - "learning_rate": 4.748642357965208e-07, - "loss": 1.1437, - "step": 5058 - }, - { - "epoch": 0.6858266115366366, - "grad_norm": 1.6384593414804838, - "learning_rate": 4.7449057196682063e-07, - "loss": 1.1202, - "step": 5059 - }, - { - "epoch": 0.6859621771843015, - "grad_norm": 1.545152355349733, - "learning_rate": 4.7411700947746534e-07, - "loss": 1.138, - "step": 5060 - }, - { - "epoch": 0.6860977428319663, - "grad_norm": 9.415203748550557, - "learning_rate": 4.737435484004939e-07, - "loss": 1.1512, - "step": 5061 - }, - { - "epoch": 0.6862333084796313, - "grad_norm": 1.7113111348252474, - "learning_rate": 4.7337018880792544e-07, - "loss": 1.1493, - "step": 5062 - }, - { - "epoch": 0.6863688741272962, - "grad_norm": 1.515789461636531, - "learning_rate": 4.729969307717583e-07, - "loss": 1.0947, - "step": 5063 - }, - { - "epoch": 0.686504439774961, - "grad_norm": 1.7697422926944724, - "learning_rate": 4.7262377436397396e-07, - "loss": 1.1674, - "step": 5064 - }, - { - "epoch": 0.6866400054226259, - "grad_norm": 1.87253731164435, - "learning_rate": 4.722507196565311e-07, - "loss": 1.1189, - "step": 5065 - }, - { - "epoch": 0.6867755710702907, - "grad_norm": 1.7232275074282548, - "learning_rate": 4.718777667213719e-07, - "loss": 1.1561, - "step": 5066 - }, - { - "epoch": 0.6869111367179557, - "grad_norm": 1.717831271781981, - "learning_rate": 4.7150491563041597e-07, - "loss": 1.1223, - "step": 5067 - }, - { - "epoch": 0.6870467023656206, - "grad_norm": 1.349248746260784, - "learning_rate": 4.7113216645556606e-07, - "loss": 1.0871, - "step": 5068 - }, - { - "epoch": 0.6871822680132854, - "grad_norm": 1.575165426352292, - "learning_rate": 4.707595192687025e-07, - "loss": 1.1234, - "step": 5069 - }, - { - "epoch": 0.6873178336609503, - "grad_norm": 1.5237737124777995, - "learning_rate": 4.703869741416888e-07, - "loss": 1.1367, - "step": 5070 - }, - { - "epoch": 0.6874533993086152, - "grad_norm": 1.936097523162276, - "learning_rate": 4.700145311463659e-07, - "loss": 1.0725, - "step": 5071 - }, - { - "epoch": 0.6875889649562801, - "grad_norm": 3.019980141195836, - "learning_rate": 4.696421903545579e-07, - "loss": 1.1317, - "step": 5072 - }, - { - "epoch": 0.687724530603945, - "grad_norm": 1.7211989158285992, - "learning_rate": 4.692699518380664e-07, - "loss": 1.1334, - "step": 5073 - }, - { - "epoch": 0.6878600962516098, - "grad_norm": 1.6830052712317567, - "learning_rate": 4.6889781566867617e-07, - "loss": 1.1433, - "step": 5074 - }, - { - "epoch": 0.6879956618992747, - "grad_norm": 1.5471417561457257, - "learning_rate": 4.685257819181494e-07, - "loss": 1.1283, - "step": 5075 - }, - { - "epoch": 0.6881312275469396, - "grad_norm": 1.6789305393272496, - "learning_rate": 4.6815385065823053e-07, - "loss": 1.0979, - "step": 5076 - }, - { - "epoch": 0.6882667931946045, - "grad_norm": 1.6995569448742935, - "learning_rate": 4.677820219606433e-07, - "loss": 1.134, - "step": 5077 - }, - { - "epoch": 0.6884023588422694, - "grad_norm": 1.6147989514347159, - "learning_rate": 4.6741029589709216e-07, - "loss": 1.1362, - "step": 5078 - }, - { - "epoch": 0.6885379244899342, - "grad_norm": 2.276587095093122, - "learning_rate": 4.6703867253926144e-07, - "loss": 1.1033, - "step": 5079 - }, - { - "epoch": 0.6886734901375992, - "grad_norm": 1.670423625015705, - "learning_rate": 4.666671519588158e-07, - "loss": 1.1057, - "step": 5080 - }, - { - "epoch": 0.688809055785264, - "grad_norm": 1.7635757128817013, - "learning_rate": 4.662957342274e-07, - "loss": 1.1459, - "step": 5081 - }, - { - "epoch": 0.6889446214329289, - "grad_norm": 1.8977456402324373, - "learning_rate": 4.6592441941663896e-07, - "loss": 1.1593, - "step": 5082 - }, - { - "epoch": 0.6890801870805938, - "grad_norm": 1.5255802336835635, - "learning_rate": 4.655532075981383e-07, - "loss": 1.1287, - "step": 5083 - }, - { - "epoch": 0.6892157527282586, - "grad_norm": 2.053453475384124, - "learning_rate": 4.6518209884348227e-07, - "loss": 1.1358, - "step": 5084 - }, - { - "epoch": 0.6893513183759236, - "grad_norm": 1.7119566438481733, - "learning_rate": 4.648110932242375e-07, - "loss": 1.1347, - "step": 5085 - }, - { - "epoch": 0.6894868840235884, - "grad_norm": 1.6839460763947565, - "learning_rate": 4.644401908119482e-07, - "loss": 1.1282, - "step": 5086 - }, - { - "epoch": 0.6896224496712533, - "grad_norm": 1.5649669171286584, - "learning_rate": 4.640693916781414e-07, - "loss": 1.1246, - "step": 5087 - }, - { - "epoch": 0.6897580153189182, - "grad_norm": 1.5520216661043789, - "learning_rate": 4.636986958943212e-07, - "loss": 1.0894, - "step": 5088 - }, - { - "epoch": 0.689893580966583, - "grad_norm": 1.480994939720819, - "learning_rate": 4.6332810353197503e-07, - "loss": 1.1349, - "step": 5089 - }, - { - "epoch": 0.690029146614248, - "grad_norm": 1.8363490408408871, - "learning_rate": 4.629576146625674e-07, - "loss": 1.1623, - "step": 5090 - }, - { - "epoch": 0.6901647122619128, - "grad_norm": 2.3686811902379423, - "learning_rate": 4.625872293575448e-07, - "loss": 1.0866, - "step": 5091 - }, - { - "epoch": 0.6903002779095777, - "grad_norm": 1.529245460946844, - "learning_rate": 4.6221694768833276e-07, - "loss": 1.1194, - "step": 5092 - }, - { - "epoch": 0.6904358435572426, - "grad_norm": 1.767152784764307, - "learning_rate": 4.6184676972633753e-07, - "loss": 1.1201, - "step": 5093 - }, - { - "epoch": 0.6905714092049074, - "grad_norm": 1.5088201768766683, - "learning_rate": 4.614766955429447e-07, - "loss": 1.1429, - "step": 5094 - }, - { - "epoch": 0.6907069748525724, - "grad_norm": 3.804781154481856, - "learning_rate": 4.6110672520952033e-07, - "loss": 1.1266, - "step": 5095 - }, - { - "epoch": 0.6908425405002372, - "grad_norm": 1.4937794011732857, - "learning_rate": 4.607368587974102e-07, - "loss": 1.1376, - "step": 5096 - }, - { - "epoch": 0.6909781061479021, - "grad_norm": 1.7991873139240044, - "learning_rate": 4.6036709637794026e-07, - "loss": 1.1405, - "step": 5097 - }, - { - "epoch": 0.691113671795567, - "grad_norm": 1.4341544182929433, - "learning_rate": 4.599974380224161e-07, - "loss": 1.1648, - "step": 5098 - }, - { - "epoch": 0.6912492374432319, - "grad_norm": 1.6617796800786673, - "learning_rate": 4.5962788380212346e-07, - "loss": 1.126, - "step": 5099 - }, - { - "epoch": 0.6913848030908968, - "grad_norm": 1.427249386289172, - "learning_rate": 4.592584337883281e-07, - "loss": 1.1137, - "step": 5100 - }, - { - "epoch": 0.6915203687385616, - "grad_norm": 1.6209354731282715, - "learning_rate": 4.5888908805227536e-07, - "loss": 1.1461, - "step": 5101 - }, - { - "epoch": 0.6916559343862265, - "grad_norm": 1.7531249505814985, - "learning_rate": 4.585198466651907e-07, - "loss": 1.1335, - "step": 5102 - }, - { - "epoch": 0.6917915000338914, - "grad_norm": 2.1047497543628944, - "learning_rate": 4.581507096982794e-07, - "loss": 1.0979, - "step": 5103 - }, - { - "epoch": 0.6919270656815563, - "grad_norm": 1.6224796112440663, - "learning_rate": 4.5778167722272674e-07, - "loss": 1.1059, - "step": 5104 - }, - { - "epoch": 0.6920626313292212, - "grad_norm": 2.5418199582552874, - "learning_rate": 4.57412749309698e-07, - "loss": 1.0953, - "step": 5105 - }, - { - "epoch": 0.692198196976886, - "grad_norm": 6.533709851156899, - "learning_rate": 4.570439260303368e-07, - "loss": 1.1118, - "step": 5106 - }, - { - "epoch": 0.6923337626245509, - "grad_norm": 1.5579263753593178, - "learning_rate": 4.566752074557694e-07, - "loss": 1.1477, - "step": 5107 - }, - { - "epoch": 0.6924693282722159, - "grad_norm": 1.5024366427352795, - "learning_rate": 4.563065936570988e-07, - "loss": 1.1242, - "step": 5108 - }, - { - "epoch": 0.6926048939198807, - "grad_norm": 1.6618188818437418, - "learning_rate": 4.559380847054106e-07, - "loss": 1.1295, - "step": 5109 - }, - { - "epoch": 0.6927404595675456, - "grad_norm": 1.882921368972763, - "learning_rate": 4.555696806717679e-07, - "loss": 1.1504, - "step": 5110 - }, - { - "epoch": 0.6928760252152104, - "grad_norm": 1.724458687739725, - "learning_rate": 4.552013816272148e-07, - "loss": 1.106, - "step": 5111 - }, - { - "epoch": 0.6930115908628753, - "grad_norm": 1.784435397195635, - "learning_rate": 4.548331876427749e-07, - "loss": 1.1161, - "step": 5112 - }, - { - "epoch": 0.6931471565105403, - "grad_norm": 1.5563460942203795, - "learning_rate": 4.544650987894514e-07, - "loss": 1.1186, - "step": 5113 - }, - { - "epoch": 0.6932827221582051, - "grad_norm": 1.5468048580184683, - "learning_rate": 4.5409711513822745e-07, - "loss": 1.1652, - "step": 5114 - }, - { - "epoch": 0.69341828780587, - "grad_norm": 1.5813698509845677, - "learning_rate": 4.537292367600658e-07, - "loss": 1.1338, - "step": 5115 - }, - { - "epoch": 0.6935538534535349, - "grad_norm": 1.489228430410561, - "learning_rate": 4.5336146372590876e-07, - "loss": 1.1116, - "step": 5116 - }, - { - "epoch": 0.6936894191011997, - "grad_norm": 2.0498603993642983, - "learning_rate": 4.5299379610667865e-07, - "loss": 1.1904, - "step": 5117 - }, - { - "epoch": 0.6938249847488647, - "grad_norm": 2.074859003824733, - "learning_rate": 4.5262623397327706e-07, - "loss": 1.1272, - "step": 5118 - }, - { - "epoch": 0.6939605503965295, - "grad_norm": 1.5511335100253298, - "learning_rate": 4.522587773965856e-07, - "loss": 1.171, - "step": 5119 - }, - { - "epoch": 0.6940961160441944, - "grad_norm": 1.691992357473617, - "learning_rate": 4.518914264474657e-07, - "loss": 1.1502, - "step": 5120 - }, - { - "epoch": 0.6942316816918593, - "grad_norm": 1.992792281023209, - "learning_rate": 4.5152418119675684e-07, - "loss": 1.1105, - "step": 5121 - }, - { - "epoch": 0.6943672473395242, - "grad_norm": 1.4639211990523915, - "learning_rate": 4.5115704171528103e-07, - "loss": 1.0933, - "step": 5122 - }, - { - "epoch": 0.6945028129871891, - "grad_norm": 1.6191077508830551, - "learning_rate": 4.507900080738367e-07, - "loss": 1.1674, - "step": 5123 - }, - { - "epoch": 0.6946383786348539, - "grad_norm": 1.4545318571516928, - "learning_rate": 4.5042308034320487e-07, - "loss": 1.1586, - "step": 5124 - }, - { - "epoch": 0.6947739442825188, - "grad_norm": 2.270814683559817, - "learning_rate": 4.500562585941432e-07, - "loss": 1.0901, - "step": 5125 - }, - { - "epoch": 0.6949095099301837, - "grad_norm": 1.569916862722935, - "learning_rate": 4.496895428973917e-07, - "loss": 1.147, - "step": 5126 - }, - { - "epoch": 0.6950450755778486, - "grad_norm": 1.6426784695613386, - "learning_rate": 4.4932293332366733e-07, - "loss": 1.1513, - "step": 5127 - }, - { - "epoch": 0.6951806412255135, - "grad_norm": 1.5483762394791516, - "learning_rate": 4.489564299436691e-07, - "loss": 1.1255, - "step": 5128 - }, - { - "epoch": 0.6953162068731783, - "grad_norm": 3.14891976840678, - "learning_rate": 4.4859003282807305e-07, - "loss": 1.1031, - "step": 5129 - }, - { - "epoch": 0.6954517725208432, - "grad_norm": 1.484878898529198, - "learning_rate": 4.4822374204753734e-07, - "loss": 1.144, - "step": 5130 - }, - { - "epoch": 0.6955873381685082, - "grad_norm": 1.4134385728342282, - "learning_rate": 4.4785755767269675e-07, - "loss": 1.1242, - "step": 5131 - }, - { - "epoch": 0.695722903816173, - "grad_norm": 1.5299152786384362, - "learning_rate": 4.474914797741686e-07, - "loss": 1.1147, - "step": 5132 - }, - { - "epoch": 0.6958584694638379, - "grad_norm": 1.4642357989527712, - "learning_rate": 4.471255084225468e-07, - "loss": 1.114, - "step": 5133 - }, - { - "epoch": 0.6959940351115027, - "grad_norm": 1.8896965438528213, - "learning_rate": 4.467596436884068e-07, - "loss": 1.1296, - "step": 5134 - }, - { - "epoch": 0.6961296007591676, - "grad_norm": 1.7232441953908024, - "learning_rate": 4.463938856423023e-07, - "loss": 1.1357, - "step": 5135 - }, - { - "epoch": 0.6962651664068326, - "grad_norm": 1.5104092022431248, - "learning_rate": 4.4602823435476723e-07, - "loss": 1.1438, - "step": 5136 - }, - { - "epoch": 0.6964007320544974, - "grad_norm": 1.7490588616220661, - "learning_rate": 4.4566268989631427e-07, - "loss": 1.133, - "step": 5137 - }, - { - "epoch": 0.6965362977021623, - "grad_norm": 1.5685346061176184, - "learning_rate": 4.452972523374359e-07, - "loss": 1.1355, - "step": 5138 - }, - { - "epoch": 0.6966718633498271, - "grad_norm": 1.5020041858796862, - "learning_rate": 4.4493192174860394e-07, - "loss": 1.1573, - "step": 5139 - }, - { - "epoch": 0.696807428997492, - "grad_norm": 1.8323229978720112, - "learning_rate": 4.4456669820026935e-07, - "loss": 1.1429, - "step": 5140 - }, - { - "epoch": 0.696942994645157, - "grad_norm": 2.032889927678819, - "learning_rate": 4.442015817628627e-07, - "loss": 1.141, - "step": 5141 - }, - { - "epoch": 0.6970785602928218, - "grad_norm": 1.774093739919427, - "learning_rate": 4.438365725067937e-07, - "loss": 1.1512, - "step": 5142 - }, - { - "epoch": 0.6972141259404867, - "grad_norm": 1.9847054500465258, - "learning_rate": 4.434716705024518e-07, - "loss": 1.1214, - "step": 5143 - }, - { - "epoch": 0.6973496915881515, - "grad_norm": 1.6432638525407721, - "learning_rate": 4.4310687582020524e-07, - "loss": 1.1011, - "step": 5144 - }, - { - "epoch": 0.6974852572358164, - "grad_norm": 1.5559487196291886, - "learning_rate": 4.4274218853040213e-07, - "loss": 1.1156, - "step": 5145 - }, - { - "epoch": 0.6976208228834814, - "grad_norm": 1.5785875176425834, - "learning_rate": 4.4237760870336883e-07, - "loss": 1.1256, - "step": 5146 - }, - { - "epoch": 0.6977563885311462, - "grad_norm": 1.4689482446722029, - "learning_rate": 4.420131364094122e-07, - "loss": 1.0941, - "step": 5147 - }, - { - "epoch": 0.6978919541788111, - "grad_norm": 1.9923430989270816, - "learning_rate": 4.4164877171881765e-07, - "loss": 1.1894, - "step": 5148 - }, - { - "epoch": 0.6980275198264759, - "grad_norm": 2.294579829786927, - "learning_rate": 4.4128451470185013e-07, - "loss": 1.1651, - "step": 5149 - }, - { - "epoch": 0.6981630854741409, - "grad_norm": 1.95373740293365, - "learning_rate": 4.409203654287538e-07, - "loss": 1.1281, - "step": 5150 - }, - { - "epoch": 0.6982986511218058, - "grad_norm": 1.547597427144631, - "learning_rate": 4.4055632396975174e-07, - "loss": 1.1168, - "step": 5151 - }, - { - "epoch": 0.6984342167694706, - "grad_norm": 1.686671344083448, - "learning_rate": 4.4019239039504676e-07, - "loss": 1.1168, - "step": 5152 - }, - { - "epoch": 0.6985697824171355, - "grad_norm": 1.5510773000629237, - "learning_rate": 4.3982856477482034e-07, - "loss": 1.1239, - "step": 5153 - }, - { - "epoch": 0.6987053480648003, - "grad_norm": 3.5525914020915885, - "learning_rate": 4.394648471792335e-07, - "loss": 1.1472, - "step": 5154 - }, - { - "epoch": 0.6988409137124653, - "grad_norm": 2.494593967683602, - "learning_rate": 4.391012376784263e-07, - "loss": 1.1193, - "step": 5155 - }, - { - "epoch": 0.6989764793601302, - "grad_norm": 1.5056410194079155, - "learning_rate": 4.3873773634251796e-07, - "loss": 1.1046, - "step": 5156 - }, - { - "epoch": 0.699112045007795, - "grad_norm": 1.5187471277456848, - "learning_rate": 4.3837434324160684e-07, - "loss": 1.1389, - "step": 5157 - }, - { - "epoch": 0.6992476106554599, - "grad_norm": 1.7087411130894514, - "learning_rate": 4.380110584457705e-07, - "loss": 1.112, - "step": 5158 - }, - { - "epoch": 0.6993831763031247, - "grad_norm": 1.442902424367394, - "learning_rate": 4.376478820250653e-07, - "loss": 1.1115, - "step": 5159 - }, - { - "epoch": 0.6995187419507897, - "grad_norm": 1.4275466980614566, - "learning_rate": 4.3728481404952724e-07, - "loss": 1.1225, - "step": 5160 - }, - { - "epoch": 0.6996543075984546, - "grad_norm": 1.4685065150512506, - "learning_rate": 4.369218545891713e-07, - "loss": 1.1669, - "step": 5161 - }, - { - "epoch": 0.6997898732461194, - "grad_norm": 2.2108542333134733, - "learning_rate": 4.3655900371399025e-07, - "loss": 1.1329, - "step": 5162 - }, - { - "epoch": 0.6999254388937843, - "grad_norm": 3.344905293060164, - "learning_rate": 4.361962614939586e-07, - "loss": 1.1495, - "step": 5163 - }, - { - "epoch": 0.7000610045414492, - "grad_norm": 1.795965568742283, - "learning_rate": 4.358336279990268e-07, - "loss": 1.1217, - "step": 5164 - }, - { - "epoch": 0.7001965701891141, - "grad_norm": 1.5069226137106995, - "learning_rate": 4.354711032991273e-07, - "loss": 1.1108, - "step": 5165 - }, - { - "epoch": 0.700332135836779, - "grad_norm": 1.525596147377813, - "learning_rate": 4.3510868746416875e-07, - "loss": 1.1472, - "step": 5166 - }, - { - "epoch": 0.7004677014844438, - "grad_norm": 2.1280308714805742, - "learning_rate": 4.3474638056404146e-07, - "loss": 1.1299, - "step": 5167 - }, - { - "epoch": 0.7006032671321087, - "grad_norm": 1.5199261069620764, - "learning_rate": 4.343841826686121e-07, - "loss": 1.1285, - "step": 5168 - }, - { - "epoch": 0.7007388327797736, - "grad_norm": 1.80980919640645, - "learning_rate": 4.3402209384772925e-07, - "loss": 1.1455, - "step": 5169 - }, - { - "epoch": 0.7008743984274385, - "grad_norm": 1.7799040358788523, - "learning_rate": 4.336601141712172e-07, - "loss": 1.0864, - "step": 5170 - }, - { - "epoch": 0.7010099640751034, - "grad_norm": 1.7247317629129393, - "learning_rate": 4.332982437088825e-07, - "loss": 1.0857, - "step": 5171 - }, - { - "epoch": 0.7011455297227682, - "grad_norm": 2.366321711237902, - "learning_rate": 4.3293648253050786e-07, - "loss": 1.1197, - "step": 5172 - }, - { - "epoch": 0.7012810953704331, - "grad_norm": 2.041352476523526, - "learning_rate": 4.3257483070585644e-07, - "loss": 1.1545, - "step": 5173 - }, - { - "epoch": 0.701416661018098, - "grad_norm": 1.542656702842601, - "learning_rate": 4.3221328830466996e-07, - "loss": 1.1068, - "step": 5174 - }, - { - "epoch": 0.7015522266657629, - "grad_norm": 2.4787532864342756, - "learning_rate": 4.318518553966689e-07, - "loss": 1.1496, - "step": 5175 - }, - { - "epoch": 0.7016877923134278, - "grad_norm": 1.7336070677634021, - "learning_rate": 4.3149053205155295e-07, - "loss": 1.1471, - "step": 5176 - }, - { - "epoch": 0.7018233579610926, - "grad_norm": 2.0884633642661807, - "learning_rate": 4.3112931833900036e-07, - "loss": 1.1067, - "step": 5177 - }, - { - "epoch": 0.7019589236087576, - "grad_norm": 1.4989075297811785, - "learning_rate": 4.307682143286683e-07, - "loss": 1.1357, - "step": 5178 - }, - { - "epoch": 0.7020944892564224, - "grad_norm": 1.6157098889333126, - "learning_rate": 4.3040722009019284e-07, - "loss": 1.1037, - "step": 5179 - }, - { - "epoch": 0.7022300549040873, - "grad_norm": 1.6479421046060307, - "learning_rate": 4.300463356931888e-07, - "loss": 1.1304, - "step": 5180 - }, - { - "epoch": 0.7023656205517522, - "grad_norm": 2.638010571897554, - "learning_rate": 4.296855612072501e-07, - "loss": 1.0905, - "step": 5181 - }, - { - "epoch": 0.702501186199417, - "grad_norm": 3.003822875960002, - "learning_rate": 4.293248967019495e-07, - "loss": 1.1828, - "step": 5182 - }, - { - "epoch": 0.702636751847082, - "grad_norm": 1.4357844578088785, - "learning_rate": 4.289643422468372e-07, - "loss": 1.1237, - "step": 5183 - }, - { - "epoch": 0.7027723174947468, - "grad_norm": 1.5145957259865335, - "learning_rate": 4.286038979114447e-07, - "loss": 1.1183, - "step": 5184 - }, - { - "epoch": 0.7029078831424117, - "grad_norm": 1.7120519289252663, - "learning_rate": 4.282435637652795e-07, - "loss": 1.1539, - "step": 5185 - }, - { - "epoch": 0.7030434487900766, - "grad_norm": 1.756582006206239, - "learning_rate": 4.278833398778305e-07, - "loss": 1.1263, - "step": 5186 - }, - { - "epoch": 0.7031790144377414, - "grad_norm": 1.9604136553414062, - "learning_rate": 4.2752322631856275e-07, - "loss": 1.136, - "step": 5187 - }, - { - "epoch": 0.7033145800854064, - "grad_norm": 1.6260757702910649, - "learning_rate": 4.2716322315692266e-07, - "loss": 1.124, - "step": 5188 - }, - { - "epoch": 0.7034501457330712, - "grad_norm": 1.6124312898629183, - "learning_rate": 4.2680333046233286e-07, - "loss": 1.1135, - "step": 5189 - }, - { - "epoch": 0.7035857113807361, - "grad_norm": 1.4877343963859904, - "learning_rate": 4.2644354830419627e-07, - "loss": 1.116, - "step": 5190 - }, - { - "epoch": 0.703721277028401, - "grad_norm": 1.3871520063717433, - "learning_rate": 4.2608387675189404e-07, - "loss": 1.1025, - "step": 5191 - }, - { - "epoch": 0.7038568426760659, - "grad_norm": 1.5423904316836008, - "learning_rate": 4.2572431587478594e-07, - "loss": 1.1152, - "step": 5192 - }, - { - "epoch": 0.7039924083237308, - "grad_norm": 2.468598936334706, - "learning_rate": 4.253648657422105e-07, - "loss": 1.1469, - "step": 5193 - }, - { - "epoch": 0.7041279739713956, - "grad_norm": 1.6887204738130872, - "learning_rate": 4.2500552642348475e-07, - "loss": 1.1273, - "step": 5194 - }, - { - "epoch": 0.7042635396190605, - "grad_norm": 1.7092647778240457, - "learning_rate": 4.2464629798790453e-07, - "loss": 1.1223, - "step": 5195 - }, - { - "epoch": 0.7043991052667254, - "grad_norm": 1.9536531382368467, - "learning_rate": 4.242871805047442e-07, - "loss": 1.1514, - "step": 5196 - }, - { - "epoch": 0.7045346709143903, - "grad_norm": 1.7888068094399, - "learning_rate": 4.2392817404325665e-07, - "loss": 1.1368, - "step": 5197 - }, - { - "epoch": 0.7046702365620552, - "grad_norm": 1.9575359611723084, - "learning_rate": 4.2356927867267355e-07, - "loss": 1.1127, - "step": 5198 - }, - { - "epoch": 0.7048058022097201, - "grad_norm": 1.6471327778831848, - "learning_rate": 4.23210494462205e-07, - "loss": 1.1348, - "step": 5199 - }, - { - "epoch": 0.7049413678573849, - "grad_norm": 2.0491976447299853, - "learning_rate": 4.228518214810396e-07, - "loss": 1.1794, - "step": 5200 - }, - { - "epoch": 0.7050769335050499, - "grad_norm": 1.747783443617125, - "learning_rate": 4.2249325979834484e-07, - "loss": 1.1606, - "step": 5201 - }, - { - "epoch": 0.7052124991527147, - "grad_norm": 1.4726412126945436, - "learning_rate": 4.221348094832666e-07, - "loss": 1.1403, - "step": 5202 - }, - { - "epoch": 0.7053480648003796, - "grad_norm": 1.5457224743932823, - "learning_rate": 4.217764706049283e-07, - "loss": 1.1452, - "step": 5203 - }, - { - "epoch": 0.7054836304480445, - "grad_norm": 1.445285695012565, - "learning_rate": 4.2141824323243416e-07, - "loss": 1.1456, - "step": 5204 - }, - { - "epoch": 0.7056191960957093, - "grad_norm": 1.5510119995707496, - "learning_rate": 4.21060127434864e-07, - "loss": 1.083, - "step": 5205 - }, - { - "epoch": 0.7057547617433743, - "grad_norm": 1.5060515451345011, - "learning_rate": 4.207021232812792e-07, - "loss": 1.1071, - "step": 5206 - }, - { - "epoch": 0.7058903273910391, - "grad_norm": 2.345330398500057, - "learning_rate": 4.2034423084071637e-07, - "loss": 1.1186, - "step": 5207 - }, - { - "epoch": 0.706025893038704, - "grad_norm": 1.479765459274507, - "learning_rate": 4.199864501821939e-07, - "loss": 1.0955, - "step": 5208 - }, - { - "epoch": 0.7061614586863689, - "grad_norm": 1.5512866178670361, - "learning_rate": 4.196287813747058e-07, - "loss": 1.1168, - "step": 5209 - }, - { - "epoch": 0.7062970243340337, - "grad_norm": 1.9112686569861843, - "learning_rate": 4.1927122448722597e-07, - "loss": 1.1148, - "step": 5210 - }, - { - "epoch": 0.7064325899816987, - "grad_norm": 2.06121322631287, - "learning_rate": 4.1891377958870657e-07, - "loss": 1.1184, - "step": 5211 - }, - { - "epoch": 0.7065681556293635, - "grad_norm": 4.166803987071641, - "learning_rate": 4.18556446748078e-07, - "loss": 1.1622, - "step": 5212 - }, - { - "epoch": 0.7067037212770284, - "grad_norm": 1.697410459373497, - "learning_rate": 4.1819922603424895e-07, - "loss": 1.0854, - "step": 5213 - }, - { - "epoch": 0.7068392869246933, - "grad_norm": 2.44950345913227, - "learning_rate": 4.1784211751610675e-07, - "loss": 1.1633, - "step": 5214 - }, - { - "epoch": 0.7069748525723581, - "grad_norm": 1.5052845724092532, - "learning_rate": 4.174851212625169e-07, - "loss": 1.1258, - "step": 5215 - }, - { - "epoch": 0.7071104182200231, - "grad_norm": 1.595589654372437, - "learning_rate": 4.171282373423234e-07, - "loss": 1.1547, - "step": 5216 - }, - { - "epoch": 0.7072459838676879, - "grad_norm": 1.704528808987417, - "learning_rate": 4.167714658243486e-07, - "loss": 1.1545, - "step": 5217 - }, - { - "epoch": 0.7073815495153528, - "grad_norm": 1.7017848115767975, - "learning_rate": 4.1641480677739236e-07, - "loss": 1.1534, - "step": 5218 - }, - { - "epoch": 0.7075171151630177, - "grad_norm": 1.6415328387063495, - "learning_rate": 4.160582602702347e-07, - "loss": 1.1105, - "step": 5219 - }, - { - "epoch": 0.7076526808106826, - "grad_norm": 1.5720760624575498, - "learning_rate": 4.1570182637163153e-07, - "loss": 1.1034, - "step": 5220 - }, - { - "epoch": 0.7077882464583475, - "grad_norm": 1.539168852420745, - "learning_rate": 4.153455051503196e-07, - "loss": 1.1291, - "step": 5221 - }, - { - "epoch": 0.7079238121060123, - "grad_norm": 1.730582165648342, - "learning_rate": 4.149892966750114e-07, - "loss": 1.1475, - "step": 5222 - }, - { - "epoch": 0.7080593777536772, - "grad_norm": 1.8487646126120718, - "learning_rate": 4.1463320101440027e-07, - "loss": 1.1623, - "step": 5223 - }, - { - "epoch": 0.7081949434013421, - "grad_norm": 1.477493117274552, - "learning_rate": 4.1427721823715487e-07, - "loss": 1.1262, - "step": 5224 - }, - { - "epoch": 0.708330509049007, - "grad_norm": 1.5991883156065025, - "learning_rate": 4.1392134841192537e-07, - "loss": 1.1622, - "step": 5225 - }, - { - "epoch": 0.7084660746966719, - "grad_norm": 1.6985563623376927, - "learning_rate": 4.135655916073368e-07, - "loss": 1.1416, - "step": 5226 - }, - { - "epoch": 0.7086016403443367, - "grad_norm": 1.8339216229961295, - "learning_rate": 4.132099478919957e-07, - "loss": 1.1293, - "step": 5227 - }, - { - "epoch": 0.7087372059920016, - "grad_norm": 1.7125331897052676, - "learning_rate": 4.1285441733448344e-07, - "loss": 1.1168, - "step": 5228 - }, - { - "epoch": 0.7088727716396666, - "grad_norm": 1.6949293423518235, - "learning_rate": 4.124990000033629e-07, - "loss": 1.1279, - "step": 5229 - }, - { - "epoch": 0.7090083372873314, - "grad_norm": 1.5685737611907498, - "learning_rate": 4.1214369596717244e-07, - "loss": 1.1471, - "step": 5230 - }, - { - "epoch": 0.7091439029349963, - "grad_norm": 2.30986322913246, - "learning_rate": 4.1178850529442996e-07, - "loss": 1.1258, - "step": 5231 - }, - { - "epoch": 0.7092794685826611, - "grad_norm": 1.389786470593863, - "learning_rate": 4.1143342805363123e-07, - "loss": 1.103, - "step": 5232 - }, - { - "epoch": 0.709415034230326, - "grad_norm": 1.5880655493023719, - "learning_rate": 4.1107846431325e-07, - "loss": 1.1321, - "step": 5233 - }, - { - "epoch": 0.709550599877991, - "grad_norm": 1.5595836552473266, - "learning_rate": 4.1072361414173815e-07, - "loss": 1.1324, - "step": 5234 - }, - { - "epoch": 0.7096861655256558, - "grad_norm": 2.0203654320421878, - "learning_rate": 4.10368877607526e-07, - "loss": 1.1156, - "step": 5235 - }, - { - "epoch": 0.7098217311733207, - "grad_norm": 1.451772559225182, - "learning_rate": 4.100142547790214e-07, - "loss": 1.1199, - "step": 5236 - }, - { - "epoch": 0.7099572968209855, - "grad_norm": 1.5244509004737692, - "learning_rate": 4.096597457246108e-07, - "loss": 1.1141, - "step": 5237 - }, - { - "epoch": 0.7100928624686504, - "grad_norm": 1.4880395236746309, - "learning_rate": 4.0930535051265835e-07, - "loss": 1.1291, - "step": 5238 - }, - { - "epoch": 0.7102284281163154, - "grad_norm": 1.4373969986147546, - "learning_rate": 4.0895106921150644e-07, - "loss": 1.1674, - "step": 5239 - }, - { - "epoch": 0.7103639937639802, - "grad_norm": 2.3657876307551775, - "learning_rate": 4.0859690188947525e-07, - "loss": 1.1213, - "step": 5240 - }, - { - "epoch": 0.7104995594116451, - "grad_norm": 1.5824531213489121, - "learning_rate": 4.0824284861486346e-07, - "loss": 1.1642, - "step": 5241 - }, - { - "epoch": 0.7106351250593099, - "grad_norm": 2.2393930669605537, - "learning_rate": 4.0788890945594714e-07, - "loss": 1.1594, - "step": 5242 - }, - { - "epoch": 0.7107706907069749, - "grad_norm": 1.7762242367859529, - "learning_rate": 4.0753508448098085e-07, - "loss": 1.0915, - "step": 5243 - }, - { - "epoch": 0.7109062563546398, - "grad_norm": 1.65059641712582, - "learning_rate": 4.0718137375819717e-07, - "loss": 1.1188, - "step": 5244 - }, - { - "epoch": 0.7110418220023046, - "grad_norm": 4.130383397636464, - "learning_rate": 4.0682777735580586e-07, - "loss": 1.1292, - "step": 5245 - }, - { - "epoch": 0.7111773876499695, - "grad_norm": 1.5144684009256741, - "learning_rate": 4.064742953419954e-07, - "loss": 1.1455, - "step": 5246 - }, - { - "epoch": 0.7113129532976343, - "grad_norm": 1.7490407058242181, - "learning_rate": 4.061209277849321e-07, - "loss": 1.0959, - "step": 5247 - }, - { - "epoch": 0.7114485189452993, - "grad_norm": 1.4328449315089336, - "learning_rate": 4.057676747527601e-07, - "loss": 1.1321, - "step": 5248 - }, - { - "epoch": 0.7115840845929642, - "grad_norm": 1.8868452962395321, - "learning_rate": 4.054145363136013e-07, - "loss": 1.1463, - "step": 5249 - }, - { - "epoch": 0.711719650240629, - "grad_norm": 1.5472538221781234, - "learning_rate": 4.05061512535556e-07, - "loss": 1.1077, - "step": 5250 - }, - { - "epoch": 0.7118552158882939, - "grad_norm": 1.7648109620298167, - "learning_rate": 4.047086034867018e-07, - "loss": 1.106, - "step": 5251 - }, - { - "epoch": 0.7119907815359587, - "grad_norm": 11.78138489146233, - "learning_rate": 4.0435580923509436e-07, - "loss": 1.119, - "step": 5252 - }, - { - "epoch": 0.7121263471836237, - "grad_norm": 1.4804305933257895, - "learning_rate": 4.040031298487675e-07, - "loss": 1.1391, - "step": 5253 - }, - { - "epoch": 0.7122619128312886, - "grad_norm": 1.7336752431692217, - "learning_rate": 4.036505653957325e-07, - "loss": 1.1419, - "step": 5254 - }, - { - "epoch": 0.7123974784789534, - "grad_norm": 1.7013382498300442, - "learning_rate": 4.032981159439787e-07, - "loss": 1.1488, - "step": 5255 - }, - { - "epoch": 0.7125330441266183, - "grad_norm": 2.320387665209212, - "learning_rate": 4.029457815614731e-07, - "loss": 1.1306, - "step": 5256 - }, - { - "epoch": 0.7126686097742831, - "grad_norm": 1.7361153983083748, - "learning_rate": 4.025935623161607e-07, - "loss": 1.1393, - "step": 5257 - }, - { - "epoch": 0.7128041754219481, - "grad_norm": 1.6309816187284483, - "learning_rate": 4.022414582759646e-07, - "loss": 1.1475, - "step": 5258 - }, - { - "epoch": 0.712939741069613, - "grad_norm": 1.9102681715029068, - "learning_rate": 4.01889469508784e-07, - "loss": 1.1292, - "step": 5259 - }, - { - "epoch": 0.7130753067172778, - "grad_norm": 1.831065170140749, - "learning_rate": 4.0153759608249883e-07, - "loss": 1.1373, - "step": 5260 - }, - { - "epoch": 0.7132108723649427, - "grad_norm": 1.590231950569179, - "learning_rate": 4.011858380649634e-07, - "loss": 1.0675, - "step": 5261 - }, - { - "epoch": 0.7133464380126076, - "grad_norm": 1.6990801357412049, - "learning_rate": 4.008341955240132e-07, - "loss": 1.1557, - "step": 5262 - }, - { - "epoch": 0.7134820036602725, - "grad_norm": 1.56336966499735, - "learning_rate": 4.0048266852745815e-07, - "loss": 1.117, - "step": 5263 - }, - { - "epoch": 0.7136175693079374, - "grad_norm": 1.556608420894694, - "learning_rate": 4.0013125714308883e-07, - "loss": 1.1357, - "step": 5264 - }, - { - "epoch": 0.7137531349556022, - "grad_norm": 1.931719427342082, - "learning_rate": 3.9977996143867086e-07, - "loss": 1.1513, - "step": 5265 - }, - { - "epoch": 0.7138887006032671, - "grad_norm": 1.6665022988374465, - "learning_rate": 3.9942878148195015e-07, - "loss": 1.0905, - "step": 5266 - }, - { - "epoch": 0.714024266250932, - "grad_norm": 2.8245528476949686, - "learning_rate": 3.9907771734064756e-07, - "loss": 1.1542, - "step": 5267 - }, - { - "epoch": 0.7141598318985969, - "grad_norm": 1.6667423633274212, - "learning_rate": 3.987267690824646e-07, - "loss": 1.1561, - "step": 5268 - }, - { - "epoch": 0.7142953975462618, - "grad_norm": 1.429718527251034, - "learning_rate": 3.983759367750772e-07, - "loss": 1.104, - "step": 5269 - }, - { - "epoch": 0.7144309631939266, - "grad_norm": 1.6826403959477096, - "learning_rate": 3.980252204861423e-07, - "loss": 1.1263, - "step": 5270 - }, - { - "epoch": 0.7145665288415916, - "grad_norm": 1.6649721589858357, - "learning_rate": 3.9767462028329156e-07, - "loss": 1.1372, - "step": 5271 - }, - { - "epoch": 0.7147020944892564, - "grad_norm": 1.459311204651444, - "learning_rate": 3.973241362341357e-07, - "loss": 1.0844, - "step": 5272 - }, - { - "epoch": 0.7148376601369213, - "grad_norm": 1.5058131953409792, - "learning_rate": 3.9697376840626304e-07, - "loss": 1.1222, - "step": 5273 - }, - { - "epoch": 0.7149732257845862, - "grad_norm": 1.5133304395007847, - "learning_rate": 3.9662351686723914e-07, - "loss": 1.1564, - "step": 5274 - }, - { - "epoch": 0.715108791432251, - "grad_norm": 1.3918228432112048, - "learning_rate": 3.962733816846073e-07, - "loss": 1.0932, - "step": 5275 - }, - { - "epoch": 0.715244357079916, - "grad_norm": 1.7017032303140962, - "learning_rate": 3.9592336292588825e-07, - "loss": 1.0964, - "step": 5276 - }, - { - "epoch": 0.7153799227275809, - "grad_norm": 1.7718491721845813, - "learning_rate": 3.9557346065858034e-07, - "loss": 1.1332, - "step": 5277 - }, - { - "epoch": 0.7155154883752457, - "grad_norm": 1.5917459322224354, - "learning_rate": 3.952236749501594e-07, - "loss": 1.1402, - "step": 5278 - }, - { - "epoch": 0.7156510540229106, - "grad_norm": 1.6577029036195212, - "learning_rate": 3.948740058680791e-07, - "loss": 1.1481, - "step": 5279 - }, - { - "epoch": 0.7157866196705754, - "grad_norm": 1.8992902920006884, - "learning_rate": 3.9452445347977e-07, - "loss": 1.1325, - "step": 5280 - }, - { - "epoch": 0.7159221853182404, - "grad_norm": 1.509212087737686, - "learning_rate": 3.941750178526413e-07, - "loss": 1.1343, - "step": 5281 - }, - { - "epoch": 0.7160577509659053, - "grad_norm": 1.8484722074793531, - "learning_rate": 3.938256990540775e-07, - "loss": 1.149, - "step": 5282 - }, - { - "epoch": 0.7161933166135701, - "grad_norm": 2.215097372451185, - "learning_rate": 3.934764971514434e-07, - "loss": 1.1102, - "step": 5283 - }, - { - "epoch": 0.716328882261235, - "grad_norm": 2.5515179276865294, - "learning_rate": 3.931274122120786e-07, - "loss": 1.1552, - "step": 5284 - }, - { - "epoch": 0.7164644479088998, - "grad_norm": 1.7200462381235366, - "learning_rate": 3.9277844430330277e-07, - "loss": 1.1667, - "step": 5285 - }, - { - "epoch": 0.7166000135565648, - "grad_norm": 1.3937328072542279, - "learning_rate": 3.9242959349241036e-07, - "loss": 1.147, - "step": 5286 - }, - { - "epoch": 0.7167355792042297, - "grad_norm": 1.727350989625206, - "learning_rate": 3.9208085984667507e-07, - "loss": 1.1181, - "step": 5287 - }, - { - "epoch": 0.7168711448518945, - "grad_norm": 1.6230529665815427, - "learning_rate": 3.917322434333472e-07, - "loss": 1.1512, - "step": 5288 - }, - { - "epoch": 0.7170067104995594, - "grad_norm": 3.321688143915773, - "learning_rate": 3.913837443196549e-07, - "loss": 1.1162, - "step": 5289 - }, - { - "epoch": 0.7171422761472243, - "grad_norm": 1.4396734832756524, - "learning_rate": 3.9103536257280343e-07, - "loss": 1.1432, - "step": 5290 - }, - { - "epoch": 0.7172778417948892, - "grad_norm": 1.4511889116798282, - "learning_rate": 3.9068709825997534e-07, - "loss": 1.1474, - "step": 5291 - }, - { - "epoch": 0.7174134074425541, - "grad_norm": 10.073086145103824, - "learning_rate": 3.903389514483308e-07, - "loss": 1.1046, - "step": 5292 - }, - { - "epoch": 0.7175489730902189, - "grad_norm": 1.5696892194670138, - "learning_rate": 3.899909222050071e-07, - "loss": 1.097, - "step": 5293 - }, - { - "epoch": 0.7176845387378838, - "grad_norm": 1.5068312711052936, - "learning_rate": 3.896430105971188e-07, - "loss": 1.1382, - "step": 5294 - }, - { - "epoch": 0.7178201043855487, - "grad_norm": 2.0829325094650697, - "learning_rate": 3.8929521669175813e-07, - "loss": 1.161, - "step": 5295 - }, - { - "epoch": 0.7179556700332136, - "grad_norm": 1.5093566280218527, - "learning_rate": 3.889475405559943e-07, - "loss": 1.1286, - "step": 5296 - }, - { - "epoch": 0.7180912356808785, - "grad_norm": 2.2391027174337426, - "learning_rate": 3.88599982256874e-07, - "loss": 1.1762, - "step": 5297 - }, - { - "epoch": 0.7182268013285433, - "grad_norm": 1.5896870131509178, - "learning_rate": 3.8825254186142097e-07, - "loss": 1.1481, - "step": 5298 - }, - { - "epoch": 0.7183623669762083, - "grad_norm": 1.6447315278921162, - "learning_rate": 3.8790521943663633e-07, - "loss": 1.1166, - "step": 5299 - }, - { - "epoch": 0.7184979326238731, - "grad_norm": 1.7239418100330277, - "learning_rate": 3.875580150494986e-07, - "loss": 1.1328, - "step": 5300 - }, - { - "epoch": 0.718633498271538, - "grad_norm": 1.751061019257844, - "learning_rate": 3.8721092876696373e-07, - "loss": 1.1335, - "step": 5301 - }, - { - "epoch": 0.7187690639192029, - "grad_norm": 1.7450590644460298, - "learning_rate": 3.868639606559635e-07, - "loss": 1.1437, - "step": 5302 - }, - { - "epoch": 0.7189046295668677, - "grad_norm": 1.5232856056244652, - "learning_rate": 3.8651711078340923e-07, - "loss": 1.1325, - "step": 5303 - }, - { - "epoch": 0.7190401952145327, - "grad_norm": 1.6126366693121217, - "learning_rate": 3.86170379216187e-07, - "loss": 1.1042, - "step": 5304 - }, - { - "epoch": 0.7191757608621975, - "grad_norm": 1.5259163279994197, - "learning_rate": 3.8582376602116254e-07, - "loss": 1.1233, - "step": 5305 - }, - { - "epoch": 0.7193113265098624, - "grad_norm": 1.4473163978124917, - "learning_rate": 3.854772712651765e-07, - "loss": 1.1317, - "step": 5306 - }, - { - "epoch": 0.7194468921575273, - "grad_norm": 1.615976562993275, - "learning_rate": 3.8513089501504783e-07, - "loss": 1.114, - "step": 5307 - }, - { - "epoch": 0.7195824578051921, - "grad_norm": 1.4882343163753862, - "learning_rate": 3.847846373375726e-07, - "loss": 1.1016, - "step": 5308 - }, - { - "epoch": 0.7197180234528571, - "grad_norm": 2.582763622244887, - "learning_rate": 3.844384982995239e-07, - "loss": 1.1783, - "step": 5309 - }, - { - "epoch": 0.7198535891005219, - "grad_norm": 1.6540082073029063, - "learning_rate": 3.8409247796765185e-07, - "loss": 1.1486, - "step": 5310 - }, - { - "epoch": 0.7199891547481868, - "grad_norm": 1.601433990779869, - "learning_rate": 3.837465764086837e-07, - "loss": 1.0838, - "step": 5311 - }, - { - "epoch": 0.7201247203958517, - "grad_norm": 1.5069715491230624, - "learning_rate": 3.83400793689324e-07, - "loss": 1.0816, - "step": 5312 - }, - { - "epoch": 0.7202602860435166, - "grad_norm": 3.75770513362962, - "learning_rate": 3.83055129876254e-07, - "loss": 1.1634, - "step": 5313 - }, - { - "epoch": 0.7203958516911815, - "grad_norm": 1.633032408312539, - "learning_rate": 3.8270958503613225e-07, - "loss": 1.1203, - "step": 5314 - }, - { - "epoch": 0.7205314173388463, - "grad_norm": 1.5468424187150256, - "learning_rate": 3.8236415923559463e-07, - "loss": 1.1421, - "step": 5315 - }, - { - "epoch": 0.7206669829865112, - "grad_norm": 4.040558721220551, - "learning_rate": 3.820188525412538e-07, - "loss": 1.1195, - "step": 5316 - }, - { - "epoch": 0.7208025486341761, - "grad_norm": 1.4551998165788553, - "learning_rate": 3.8167366501969855e-07, - "loss": 1.1098, - "step": 5317 - }, - { - "epoch": 0.720938114281841, - "grad_norm": 1.6373174000956847, - "learning_rate": 3.8132859673749685e-07, - "loss": 1.1313, - "step": 5318 - }, - { - "epoch": 0.7210736799295059, - "grad_norm": 1.6588385393599137, - "learning_rate": 3.809836477611912e-07, - "loss": 1.0898, - "step": 5319 - }, - { - "epoch": 0.7212092455771707, - "grad_norm": 1.971090972747685, - "learning_rate": 3.806388181573035e-07, - "loss": 1.1081, - "step": 5320 - }, - { - "epoch": 0.7213448112248356, - "grad_norm": 1.5693387341522138, - "learning_rate": 3.8029410799233006e-07, - "loss": 1.1414, - "step": 5321 - }, - { - "epoch": 0.7214803768725006, - "grad_norm": 1.4543872603596673, - "learning_rate": 3.7994951733274695e-07, - "loss": 1.1339, - "step": 5322 - }, - { - "epoch": 0.7216159425201654, - "grad_norm": 1.6915110995361213, - "learning_rate": 3.7960504624500436e-07, - "loss": 1.1491, - "step": 5323 - }, - { - "epoch": 0.7217515081678303, - "grad_norm": 1.9628740381412688, - "learning_rate": 3.792606947955321e-07, - "loss": 1.1462, - "step": 5324 - }, - { - "epoch": 0.7218870738154951, - "grad_norm": 1.740884404802411, - "learning_rate": 3.7891646305073456e-07, - "loss": 1.1314, - "step": 5325 - }, - { - "epoch": 0.72202263946316, - "grad_norm": 1.8920527548328734, - "learning_rate": 3.78572351076995e-07, - "loss": 1.0977, - "step": 5326 - }, - { - "epoch": 0.722158205110825, - "grad_norm": 2.1901691268956696, - "learning_rate": 3.7822835894067185e-07, - "loss": 1.1042, - "step": 5327 - }, - { - "epoch": 0.7222937707584898, - "grad_norm": 4.8582837711244, - "learning_rate": 3.7788448670810225e-07, - "loss": 1.1256, - "step": 5328 - }, - { - "epoch": 0.7224293364061547, - "grad_norm": 4.2925483522460635, - "learning_rate": 3.775407344455984e-07, - "loss": 1.1539, - "step": 5329 - }, - { - "epoch": 0.7225649020538195, - "grad_norm": 1.7789777923480774, - "learning_rate": 3.7719710221945055e-07, - "loss": 1.151, - "step": 5330 - }, - { - "epoch": 0.7227004677014844, - "grad_norm": 1.6133117067537395, - "learning_rate": 3.768535900959253e-07, - "loss": 1.1012, - "step": 5331 - }, - { - "epoch": 0.7228360333491494, - "grad_norm": 1.6532868934925096, - "learning_rate": 3.765101981412665e-07, - "loss": 1.1533, - "step": 5332 - }, - { - "epoch": 0.7229715989968142, - "grad_norm": 1.7982502645113594, - "learning_rate": 3.7616692642169443e-07, - "loss": 1.1056, - "step": 5333 - }, - { - "epoch": 0.7231071646444791, - "grad_norm": 1.63032509701791, - "learning_rate": 3.7582377500340636e-07, - "loss": 1.1375, - "step": 5334 - }, - { - "epoch": 0.7232427302921439, - "grad_norm": 1.8477589761599433, - "learning_rate": 3.7548074395257634e-07, - "loss": 1.1342, - "step": 5335 - }, - { - "epoch": 0.7233782959398088, - "grad_norm": 1.676501131544696, - "learning_rate": 3.751378333353552e-07, - "loss": 1.1036, - "step": 5336 - }, - { - "epoch": 0.7235138615874738, - "grad_norm": 3.3375513840649615, - "learning_rate": 3.747950432178706e-07, - "loss": 1.133, - "step": 5337 - }, - { - "epoch": 0.7236494272351386, - "grad_norm": 2.0252146592194644, - "learning_rate": 3.744523736662267e-07, - "loss": 1.1347, - "step": 5338 - }, - { - "epoch": 0.7237849928828035, - "grad_norm": 1.6008889218275155, - "learning_rate": 3.7410982474650486e-07, - "loss": 1.1547, - "step": 5339 - }, - { - "epoch": 0.7239205585304683, - "grad_norm": 1.5289742876317323, - "learning_rate": 3.7376739652476287e-07, - "loss": 1.1435, - "step": 5340 - }, - { - "epoch": 0.7240561241781333, - "grad_norm": 1.3548548296898886, - "learning_rate": 3.734250890670352e-07, - "loss": 1.1266, - "step": 5341 - }, - { - "epoch": 0.7241916898257982, - "grad_norm": 1.4501088046025934, - "learning_rate": 3.730829024393333e-07, - "loss": 1.1159, - "step": 5342 - }, - { - "epoch": 0.724327255473463, - "grad_norm": 1.7586006276412973, - "learning_rate": 3.727408367076453e-07, - "loss": 1.1308, - "step": 5343 - }, - { - "epoch": 0.7244628211211279, - "grad_norm": 1.5768717898243372, - "learning_rate": 3.723988919379354e-07, - "loss": 1.1092, - "step": 5344 - }, - { - "epoch": 0.7245983867687927, - "grad_norm": 1.6089322018986985, - "learning_rate": 3.7205706819614527e-07, - "loss": 1.1126, - "step": 5345 - }, - { - "epoch": 0.7247339524164577, - "grad_norm": 1.648889357778547, - "learning_rate": 3.717153655481927e-07, - "loss": 1.147, - "step": 5346 - }, - { - "epoch": 0.7248695180641226, - "grad_norm": 1.4387618832484639, - "learning_rate": 3.7137378405997267e-07, - "loss": 1.1249, - "step": 5347 - }, - { - "epoch": 0.7250050837117874, - "grad_norm": 2.1508829398636142, - "learning_rate": 3.710323237973563e-07, - "loss": 1.1056, - "step": 5348 - }, - { - "epoch": 0.7251406493594523, - "grad_norm": 1.6359677630210596, - "learning_rate": 3.7069098482619145e-07, - "loss": 1.1365, - "step": 5349 - }, - { - "epoch": 0.7252762150071171, - "grad_norm": 1.6683256245325149, - "learning_rate": 3.703497672123026e-07, - "loss": 1.1613, - "step": 5350 - }, - { - "epoch": 0.7254117806547821, - "grad_norm": 1.5574589705677118, - "learning_rate": 3.7000867102149114e-07, - "loss": 1.1153, - "step": 5351 - }, - { - "epoch": 0.725547346302447, - "grad_norm": 1.7093488679766402, - "learning_rate": 3.6966769631953466e-07, - "loss": 1.0803, - "step": 5352 - }, - { - "epoch": 0.7256829119501118, - "grad_norm": 1.9779567844076673, - "learning_rate": 3.693268431721873e-07, - "loss": 1.1722, - "step": 5353 - }, - { - "epoch": 0.7258184775977767, - "grad_norm": 1.7946643803294549, - "learning_rate": 3.6898611164518e-07, - "loss": 1.1685, - "step": 5354 - }, - { - "epoch": 0.7259540432454417, - "grad_norm": 2.5495909084339865, - "learning_rate": 3.6864550180422014e-07, - "loss": 1.1362, - "step": 5355 - }, - { - "epoch": 0.7260896088931065, - "grad_norm": 1.5521536902378574, - "learning_rate": 3.683050137149918e-07, - "loss": 1.1442, - "step": 5356 - }, - { - "epoch": 0.7262251745407714, - "grad_norm": 1.713839041472764, - "learning_rate": 3.6796464744315545e-07, - "loss": 1.1196, - "step": 5357 - }, - { - "epoch": 0.7263607401884362, - "grad_norm": 2.3498739017507213, - "learning_rate": 3.6762440305434726e-07, - "loss": 1.1606, - "step": 5358 - }, - { - "epoch": 0.7264963058361011, - "grad_norm": 1.7127325697308675, - "learning_rate": 3.6728428061418195e-07, - "loss": 1.1088, - "step": 5359 - }, - { - "epoch": 0.7266318714837661, - "grad_norm": 1.5026447938416305, - "learning_rate": 3.66944280188248e-07, - "loss": 1.1482, - "step": 5360 - }, - { - "epoch": 0.7267674371314309, - "grad_norm": 1.900219968070367, - "learning_rate": 3.6660440184211326e-07, - "loss": 1.1287, - "step": 5361 - }, - { - "epoch": 0.7269030027790958, - "grad_norm": 1.5841686170573437, - "learning_rate": 3.662646456413193e-07, - "loss": 1.1629, - "step": 5362 - }, - { - "epoch": 0.7270385684267606, - "grad_norm": 1.5914643060159959, - "learning_rate": 3.6592501165138666e-07, - "loss": 1.1347, - "step": 5363 - }, - { - "epoch": 0.7271741340744255, - "grad_norm": 1.4799565453565167, - "learning_rate": 3.6558549993780985e-07, - "loss": 1.1525, - "step": 5364 - }, - { - "epoch": 0.7273096997220905, - "grad_norm": 1.6565926951218564, - "learning_rate": 3.6524611056606226e-07, - "loss": 1.1421, - "step": 5365 - }, - { - "epoch": 0.7274452653697553, - "grad_norm": 1.4556822576153634, - "learning_rate": 3.6490684360159106e-07, - "loss": 1.1108, - "step": 5366 - }, - { - "epoch": 0.7275808310174202, - "grad_norm": 2.5528233900067914, - "learning_rate": 3.6456769910982264e-07, - "loss": 1.1116, - "step": 5367 - }, - { - "epoch": 0.727716396665085, - "grad_norm": 1.4939420697250805, - "learning_rate": 3.6422867715615703e-07, - "loss": 1.1539, - "step": 5368 - }, - { - "epoch": 0.72785196231275, - "grad_norm": 1.4303025416609347, - "learning_rate": 3.638897778059732e-07, - "loss": 1.1593, - "step": 5369 - }, - { - "epoch": 0.7279875279604149, - "grad_norm": 1.6823315045135818, - "learning_rate": 3.6355100112462425e-07, - "loss": 1.1451, - "step": 5370 - }, - { - "epoch": 0.7281230936080797, - "grad_norm": 1.9308563775271876, - "learning_rate": 3.632123471774409e-07, - "loss": 1.1515, - "step": 5371 - }, - { - "epoch": 0.7282586592557446, - "grad_norm": 1.584642679373204, - "learning_rate": 3.628738160297299e-07, - "loss": 1.0938, - "step": 5372 - }, - { - "epoch": 0.7283942249034094, - "grad_norm": 1.9075297872239034, - "learning_rate": 3.625354077467743e-07, - "loss": 1.1418, - "step": 5373 - }, - { - "epoch": 0.7285297905510744, - "grad_norm": 1.7571528061291155, - "learning_rate": 3.6219712239383336e-07, - "loss": 1.0971, - "step": 5374 - }, - { - "epoch": 0.7286653561987393, - "grad_norm": 1.7138119599225152, - "learning_rate": 3.6185896003614303e-07, - "loss": 1.1258, - "step": 5375 - }, - { - "epoch": 0.7288009218464041, - "grad_norm": 1.591787000841555, - "learning_rate": 3.6152092073891504e-07, - "loss": 1.0777, - "step": 5376 - }, - { - "epoch": 0.728936487494069, - "grad_norm": 1.7388555023862784, - "learning_rate": 3.6118300456733764e-07, - "loss": 1.1951, - "step": 5377 - }, - { - "epoch": 0.7290720531417338, - "grad_norm": 1.4934167498004367, - "learning_rate": 3.6084521158657555e-07, - "loss": 1.0942, - "step": 5378 - }, - { - "epoch": 0.7292076187893988, - "grad_norm": 2.8021124413537755, - "learning_rate": 3.605075418617687e-07, - "loss": 1.1458, - "step": 5379 - }, - { - "epoch": 0.7293431844370637, - "grad_norm": 3.6190118745023088, - "learning_rate": 3.6016999545803504e-07, - "loss": 1.1489, - "step": 5380 - }, - { - "epoch": 0.7294787500847285, - "grad_norm": 1.407547746958454, - "learning_rate": 3.5983257244046674e-07, - "loss": 1.1335, - "step": 5381 - }, - { - "epoch": 0.7296143157323934, - "grad_norm": 1.97139438527592, - "learning_rate": 3.594952728741343e-07, - "loss": 1.1451, - "step": 5382 - }, - { - "epoch": 0.7297498813800583, - "grad_norm": 1.4901025037073317, - "learning_rate": 3.591580968240819e-07, - "loss": 1.1545, - "step": 5383 - }, - { - "epoch": 0.7298854470277232, - "grad_norm": 1.4100062862663427, - "learning_rate": 3.5882104435533276e-07, - "loss": 1.1456, - "step": 5384 - }, - { - "epoch": 0.7300210126753881, - "grad_norm": 1.7424813098664567, - "learning_rate": 3.584841155328837e-07, - "loss": 1.113, - "step": 5385 - }, - { - "epoch": 0.7301565783230529, - "grad_norm": 2.859959380240663, - "learning_rate": 3.581473104217092e-07, - "loss": 1.1337, - "step": 5386 - }, - { - "epoch": 0.7302921439707178, - "grad_norm": 1.459121443394398, - "learning_rate": 3.578106290867593e-07, - "loss": 1.1177, - "step": 5387 - }, - { - "epoch": 0.7304277096183827, - "grad_norm": 2.1778847993213333, - "learning_rate": 3.5747407159296063e-07, - "loss": 1.1188, - "step": 5388 - }, - { - "epoch": 0.7305632752660476, - "grad_norm": 3.389916580355758, - "learning_rate": 3.571376380052152e-07, - "loss": 1.1072, - "step": 5389 - }, - { - "epoch": 0.7306988409137125, - "grad_norm": 2.053505597413966, - "learning_rate": 3.5680132838840205e-07, - "loss": 1.0945, - "step": 5390 - }, - { - "epoch": 0.7308344065613773, - "grad_norm": 1.4669416084417857, - "learning_rate": 3.564651428073755e-07, - "loss": 1.1478, - "step": 5391 - }, - { - "epoch": 0.7309699722090423, - "grad_norm": 2.9409395337407642, - "learning_rate": 3.561290813269665e-07, - "loss": 1.1009, - "step": 5392 - }, - { - "epoch": 0.7311055378567071, - "grad_norm": 2.0139537786396544, - "learning_rate": 3.5579314401198166e-07, - "loss": 1.1601, - "step": 5393 - }, - { - "epoch": 0.731241103504372, - "grad_norm": 1.5668191357023813, - "learning_rate": 3.5545733092720396e-07, - "loss": 1.1251, - "step": 5394 - }, - { - "epoch": 0.7313766691520369, - "grad_norm": 1.5213604467066775, - "learning_rate": 3.551216421373924e-07, - "loss": 1.146, - "step": 5395 - }, - { - "epoch": 0.7315122347997017, - "grad_norm": 8.253879808306294, - "learning_rate": 3.5478607770728164e-07, - "loss": 1.1382, - "step": 5396 - }, - { - "epoch": 0.7316478004473667, - "grad_norm": 2.1856457477416376, - "learning_rate": 3.544506377015829e-07, - "loss": 1.1471, - "step": 5397 - }, - { - "epoch": 0.7317833660950315, - "grad_norm": 1.6628551061935597, - "learning_rate": 3.5411532218498296e-07, - "loss": 1.0925, - "step": 5398 - }, - { - "epoch": 0.7319189317426964, - "grad_norm": 2.906611581536199, - "learning_rate": 3.537801312221448e-07, - "loss": 1.1028, - "step": 5399 - }, - { - "epoch": 0.7320544973903613, - "grad_norm": 1.7158519900854976, - "learning_rate": 3.5344506487770774e-07, - "loss": 1.1147, - "step": 5400 - }, - { - "epoch": 0.7321900630380261, - "grad_norm": 1.6288067123258536, - "learning_rate": 3.5311012321628577e-07, - "loss": 1.1368, - "step": 5401 - }, - { - "epoch": 0.7323256286856911, - "grad_norm": 1.6751899073223848, - "learning_rate": 3.527753063024708e-07, - "loss": 1.1344, - "step": 5402 - }, - { - "epoch": 0.7324611943333559, - "grad_norm": 2.0010202474647434, - "learning_rate": 3.524406142008285e-07, - "loss": 1.1232, - "step": 5403 - }, - { - "epoch": 0.7325967599810208, - "grad_norm": 2.008241393520621, - "learning_rate": 3.5210604697590297e-07, - "loss": 1.1104, - "step": 5404 - }, - { - "epoch": 0.7327323256286857, - "grad_norm": 1.6555350496095416, - "learning_rate": 3.5177160469221176e-07, - "loss": 1.1669, - "step": 5405 - }, - { - "epoch": 0.7328678912763505, - "grad_norm": 1.5572893016974747, - "learning_rate": 3.514372874142497e-07, - "loss": 1.1751, - "step": 5406 - }, - { - "epoch": 0.7330034569240155, - "grad_norm": 1.5216618847835734, - "learning_rate": 3.511030952064874e-07, - "loss": 1.1446, - "step": 5407 - }, - { - "epoch": 0.7331390225716803, - "grad_norm": 2.433042915758862, - "learning_rate": 3.507690281333712e-07, - "loss": 1.1263, - "step": 5408 - }, - { - "epoch": 0.7332745882193452, - "grad_norm": 1.4352852945143089, - "learning_rate": 3.504350862593231e-07, - "loss": 1.1379, - "step": 5409 - }, - { - "epoch": 0.7334101538670101, - "grad_norm": 1.5042558645646191, - "learning_rate": 3.501012696487412e-07, - "loss": 1.1182, - "step": 5410 - }, - { - "epoch": 0.733545719514675, - "grad_norm": 1.8700099941989463, - "learning_rate": 3.497675783659995e-07, - "loss": 1.1316, - "step": 5411 - }, - { - "epoch": 0.7336812851623399, - "grad_norm": 2.046069674109708, - "learning_rate": 3.4943401247544766e-07, - "loss": 1.1412, - "step": 5412 - }, - { - "epoch": 0.7338168508100047, - "grad_norm": 2.0136486857969422, - "learning_rate": 3.491005720414113e-07, - "loss": 1.1091, - "step": 5413 - }, - { - "epoch": 0.7339524164576696, - "grad_norm": 1.6536559680445275, - "learning_rate": 3.487672571281918e-07, - "loss": 1.1083, - "step": 5414 - }, - { - "epoch": 0.7340879821053345, - "grad_norm": 2.4195219944589983, - "learning_rate": 3.4843406780006644e-07, - "loss": 1.1054, - "step": 5415 - }, - { - "epoch": 0.7342235477529994, - "grad_norm": 1.6820929332280206, - "learning_rate": 3.481010041212874e-07, - "loss": 1.1352, - "step": 5416 - }, - { - "epoch": 0.7343591134006643, - "grad_norm": 1.9443004503340873, - "learning_rate": 3.477680661560846e-07, - "loss": 1.1658, - "step": 5417 - }, - { - "epoch": 0.7344946790483291, - "grad_norm": 1.5192109441316397, - "learning_rate": 3.4743525396866114e-07, - "loss": 1.184, - "step": 5418 - }, - { - "epoch": 0.734630244695994, - "grad_norm": 1.5894226642342957, - "learning_rate": 3.471025676231986e-07, - "loss": 1.1256, - "step": 5419 - }, - { - "epoch": 0.734765810343659, - "grad_norm": 1.5362456898170755, - "learning_rate": 3.467700071838515e-07, - "loss": 1.1885, - "step": 5420 - }, - { - "epoch": 0.7349013759913238, - "grad_norm": 1.580365728095864, - "learning_rate": 3.4643757271475293e-07, - "loss": 1.1174, - "step": 5421 - }, - { - "epoch": 0.7350369416389887, - "grad_norm": 1.6461600129635068, - "learning_rate": 3.4610526428000897e-07, - "loss": 1.1348, - "step": 5422 - }, - { - "epoch": 0.7351725072866535, - "grad_norm": 1.4986721513113068, - "learning_rate": 3.457730819437038e-07, - "loss": 1.1457, - "step": 5423 - }, - { - "epoch": 0.7353080729343184, - "grad_norm": 1.638576972704897, - "learning_rate": 3.454410257698951e-07, - "loss": 1.1008, - "step": 5424 - }, - { - "epoch": 0.7354436385819834, - "grad_norm": 1.955956952511495, - "learning_rate": 3.451090958226184e-07, - "loss": 1.1385, - "step": 5425 - }, - { - "epoch": 0.7355792042296482, - "grad_norm": 1.5935517381885116, - "learning_rate": 3.447772921658825e-07, - "loss": 1.1199, - "step": 5426 - }, - { - "epoch": 0.7357147698773131, - "grad_norm": 2.170661477147692, - "learning_rate": 3.444456148636744e-07, - "loss": 1.1426, - "step": 5427 - }, - { - "epoch": 0.7358503355249779, - "grad_norm": 1.892813161112932, - "learning_rate": 3.441140639799546e-07, - "loss": 1.1322, - "step": 5428 - }, - { - "epoch": 0.7359859011726428, - "grad_norm": 1.4624303327186696, - "learning_rate": 3.4378263957866026e-07, - "loss": 1.1017, - "step": 5429 - }, - { - "epoch": 0.7361214668203078, - "grad_norm": 1.8566063392234644, - "learning_rate": 3.4345134172370407e-07, - "loss": 1.0855, - "step": 5430 - }, - { - "epoch": 0.7362570324679726, - "grad_norm": 1.693678681570148, - "learning_rate": 3.431201704789741e-07, - "loss": 1.1495, - "step": 5431 - }, - { - "epoch": 0.7363925981156375, - "grad_norm": 7.553133914609454, - "learning_rate": 3.427891259083342e-07, - "loss": 1.1118, - "step": 5432 - }, - { - "epoch": 0.7365281637633023, - "grad_norm": 1.6968663582405663, - "learning_rate": 3.4245820807562365e-07, - "loss": 1.1088, - "step": 5433 - }, - { - "epoch": 0.7366637294109672, - "grad_norm": 1.444493463599562, - "learning_rate": 3.4212741704465733e-07, - "loss": 1.131, - "step": 5434 - }, - { - "epoch": 0.7367992950586322, - "grad_norm": 1.48865289293312, - "learning_rate": 3.4179675287922573e-07, - "loss": 1.1233, - "step": 5435 - }, - { - "epoch": 0.736934860706297, - "grad_norm": 1.3760182778638879, - "learning_rate": 3.4146621564309476e-07, - "loss": 1.1286, - "step": 5436 - }, - { - "epoch": 0.7370704263539619, - "grad_norm": 2.254641591979723, - "learning_rate": 3.41135805400006e-07, - "loss": 1.1563, - "step": 5437 - }, - { - "epoch": 0.7372059920016268, - "grad_norm": 2.0481365711066237, - "learning_rate": 3.408055222136763e-07, - "loss": 1.1333, - "step": 5438 - }, - { - "epoch": 0.7373415576492917, - "grad_norm": 1.6866803225304787, - "learning_rate": 3.4047536614779837e-07, - "loss": 1.1451, - "step": 5439 - }, - { - "epoch": 0.7374771232969566, - "grad_norm": 1.5932886296946749, - "learning_rate": 3.4014533726604046e-07, - "loss": 1.1272, - "step": 5440 - }, - { - "epoch": 0.7376126889446214, - "grad_norm": 1.5722710480827515, - "learning_rate": 3.398154356320454e-07, - "loss": 1.0952, - "step": 5441 - }, - { - "epoch": 0.7377482545922863, - "grad_norm": 1.405599446950876, - "learning_rate": 3.394856613094322e-07, - "loss": 1.0729, - "step": 5442 - }, - { - "epoch": 0.7378838202399512, - "grad_norm": 1.5161537799119773, - "learning_rate": 3.3915601436179564e-07, - "loss": 1.0958, - "step": 5443 - }, - { - "epoch": 0.7380193858876161, - "grad_norm": 2.2141859200810377, - "learning_rate": 3.388264948527052e-07, - "loss": 1.1238, - "step": 5444 - }, - { - "epoch": 0.738154951535281, - "grad_norm": 1.691664708227661, - "learning_rate": 3.384971028457063e-07, - "loss": 1.1674, - "step": 5445 - }, - { - "epoch": 0.7382905171829458, - "grad_norm": 2.4787790792952458, - "learning_rate": 3.381678384043195e-07, - "loss": 1.1313, - "step": 5446 - }, - { - "epoch": 0.7384260828306107, - "grad_norm": 1.7111807498636238, - "learning_rate": 3.378387015920409e-07, - "loss": 1.1161, - "step": 5447 - }, - { - "epoch": 0.7385616484782757, - "grad_norm": 1.544324011946911, - "learning_rate": 3.3750969247234184e-07, - "loss": 1.1533, - "step": 5448 - }, - { - "epoch": 0.7386972141259405, - "grad_norm": 1.5623764280778365, - "learning_rate": 3.371808111086694e-07, - "loss": 1.1133, - "step": 5449 - }, - { - "epoch": 0.7388327797736054, - "grad_norm": 1.5266805840427737, - "learning_rate": 3.3685205756444534e-07, - "loss": 1.0878, - "step": 5450 - }, - { - "epoch": 0.7389683454212702, - "grad_norm": 1.884331026054299, - "learning_rate": 3.365234319030675e-07, - "loss": 1.1351, - "step": 5451 - }, - { - "epoch": 0.7391039110689351, - "grad_norm": 1.4951264048254902, - "learning_rate": 3.361949341879087e-07, - "loss": 1.1213, - "step": 5452 - }, - { - "epoch": 0.7392394767166001, - "grad_norm": 1.4771081448917478, - "learning_rate": 3.35866564482317e-07, - "loss": 1.1346, - "step": 5453 - }, - { - "epoch": 0.7393750423642649, - "grad_norm": 1.719214690349787, - "learning_rate": 3.3553832284961603e-07, - "loss": 1.1251, - "step": 5454 - }, - { - "epoch": 0.7395106080119298, - "grad_norm": 2.9246437480897503, - "learning_rate": 3.352102093531045e-07, - "loss": 1.1223, - "step": 5455 - }, - { - "epoch": 0.7396461736595946, - "grad_norm": 1.7356744286117596, - "learning_rate": 3.348822240560569e-07, - "loss": 1.1405, - "step": 5456 - }, - { - "epoch": 0.7397817393072595, - "grad_norm": 1.72362424367332, - "learning_rate": 3.345543670217217e-07, - "loss": 1.1217, - "step": 5457 - }, - { - "epoch": 0.7399173049549245, - "grad_norm": 1.640456352964763, - "learning_rate": 3.3422663831332477e-07, - "loss": 1.1193, - "step": 5458 - }, - { - "epoch": 0.7400528706025893, - "grad_norm": 1.6213249922530506, - "learning_rate": 3.338990379940646e-07, - "loss": 1.15, - "step": 5459 - }, - { - "epoch": 0.7401884362502542, - "grad_norm": 1.576244465381312, - "learning_rate": 3.335715661271178e-07, - "loss": 1.1146, - "step": 5460 - }, - { - "epoch": 0.740324001897919, - "grad_norm": 1.5347533552344916, - "learning_rate": 3.3324422277563326e-07, - "loss": 1.1356, - "step": 5461 - }, - { - "epoch": 0.740459567545584, - "grad_norm": 1.4942103187230449, - "learning_rate": 3.32917008002738e-07, - "loss": 1.1171, - "step": 5462 - }, - { - "epoch": 0.7405951331932489, - "grad_norm": 1.42505667346409, - "learning_rate": 3.3258992187153144e-07, - "loss": 1.1193, - "step": 5463 - }, - { - "epoch": 0.7407306988409137, - "grad_norm": 1.5574173274775787, - "learning_rate": 3.322629644450909e-07, - "loss": 1.1401, - "step": 5464 - }, - { - "epoch": 0.7408662644885786, - "grad_norm": 2.3096845814778977, - "learning_rate": 3.319361357864663e-07, - "loss": 1.1283, - "step": 5465 - }, - { - "epoch": 0.7410018301362434, - "grad_norm": 1.6516551431017594, - "learning_rate": 3.316094359586852e-07, - "loss": 1.101, - "step": 5466 - }, - { - "epoch": 0.7411373957839084, - "grad_norm": 1.8869829602288586, - "learning_rate": 3.3128286502474803e-07, - "loss": 1.1396, - "step": 5467 - }, - { - "epoch": 0.7412729614315733, - "grad_norm": 1.567032895238786, - "learning_rate": 3.3095642304763183e-07, - "loss": 1.1446, - "step": 5468 - }, - { - "epoch": 0.7414085270792381, - "grad_norm": 1.4800807474569573, - "learning_rate": 3.306301100902883e-07, - "loss": 1.0695, - "step": 5469 - }, - { - "epoch": 0.741544092726903, - "grad_norm": 1.794952602599827, - "learning_rate": 3.303039262156443e-07, - "loss": 1.0878, - "step": 5470 - }, - { - "epoch": 0.7416796583745678, - "grad_norm": 1.7002187965826974, - "learning_rate": 3.2997787148660195e-07, - "loss": 1.1043, - "step": 5471 - }, - { - "epoch": 0.7418152240222328, - "grad_norm": 1.9690895390758825, - "learning_rate": 3.296519459660383e-07, - "loss": 1.1793, - "step": 5472 - }, - { - "epoch": 0.7419507896698977, - "grad_norm": 1.4636092838014303, - "learning_rate": 3.293261497168054e-07, - "loss": 1.1188, - "step": 5473 - }, - { - "epoch": 0.7420863553175625, - "grad_norm": 2.195885692573736, - "learning_rate": 3.2900048280173055e-07, - "loss": 1.1042, - "step": 5474 - }, - { - "epoch": 0.7422219209652274, - "grad_norm": 2.6767345663038196, - "learning_rate": 3.2867494528361605e-07, - "loss": 1.1524, - "step": 5475 - }, - { - "epoch": 0.7423574866128922, - "grad_norm": 1.7499265386893947, - "learning_rate": 3.2834953722523915e-07, - "loss": 1.1093, - "step": 5476 - }, - { - "epoch": 0.7424930522605572, - "grad_norm": 1.4574293002971066, - "learning_rate": 3.2802425868935277e-07, - "loss": 1.1044, - "step": 5477 - }, - { - "epoch": 0.7426286179082221, - "grad_norm": 4.14219888688796, - "learning_rate": 3.276991097386831e-07, - "loss": 1.1355, - "step": 5478 - }, - { - "epoch": 0.7427641835558869, - "grad_norm": 1.760778820606215, - "learning_rate": 3.27374090435934e-07, - "loss": 1.1559, - "step": 5479 - }, - { - "epoch": 0.7428997492035518, - "grad_norm": 2.0734748228465776, - "learning_rate": 3.270492008437815e-07, - "loss": 1.0908, - "step": 5480 - }, - { - "epoch": 0.7430353148512167, - "grad_norm": 7.563311375904182, - "learning_rate": 3.267244410248794e-07, - "loss": 1.0963, - "step": 5481 - }, - { - "epoch": 0.7431708804988816, - "grad_norm": 1.743421472261632, - "learning_rate": 3.2639981104185355e-07, - "loss": 1.1263, - "step": 5482 - }, - { - "epoch": 0.7433064461465465, - "grad_norm": 2.359873876176534, - "learning_rate": 3.260753109573078e-07, - "loss": 1.1341, - "step": 5483 - }, - { - "epoch": 0.7434420117942113, - "grad_norm": 1.6341039815062548, - "learning_rate": 3.2575094083381837e-07, - "loss": 1.164, - "step": 5484 - }, - { - "epoch": 0.7435775774418762, - "grad_norm": 1.932144684498183, - "learning_rate": 3.2542670073393776e-07, - "loss": 1.0938, - "step": 5485 - }, - { - "epoch": 0.7437131430895411, - "grad_norm": 1.7038590039557413, - "learning_rate": 3.251025907201932e-07, - "loss": 1.1081, - "step": 5486 - }, - { - "epoch": 0.743848708737206, - "grad_norm": 1.7202618527776445, - "learning_rate": 3.247786108550866e-07, - "loss": 1.146, - "step": 5487 - }, - { - "epoch": 0.7439842743848709, - "grad_norm": 2.350135238490276, - "learning_rate": 3.244547612010952e-07, - "loss": 1.1415, - "step": 5488 - }, - { - "epoch": 0.7441198400325357, - "grad_norm": 1.7967400164209104, - "learning_rate": 3.241310418206705e-07, - "loss": 1.1565, - "step": 5489 - }, - { - "epoch": 0.7442554056802007, - "grad_norm": 1.5584964189336472, - "learning_rate": 3.238074527762394e-07, - "loss": 1.1089, - "step": 5490 - }, - { - "epoch": 0.7443909713278655, - "grad_norm": 1.7932036658188204, - "learning_rate": 3.2348399413020365e-07, - "loss": 1.1302, - "step": 5491 - }, - { - "epoch": 0.7445265369755304, - "grad_norm": 2.6328628802066296, - "learning_rate": 3.231606659449394e-07, - "loss": 1.166, - "step": 5492 - }, - { - "epoch": 0.7446621026231953, - "grad_norm": 3.578098917486883, - "learning_rate": 3.228374682827982e-07, - "loss": 1.1112, - "step": 5493 - }, - { - "epoch": 0.7447976682708601, - "grad_norm": 1.5832213671022848, - "learning_rate": 3.2251440120610596e-07, - "loss": 1.1482, - "step": 5494 - }, - { - "epoch": 0.7449332339185251, - "grad_norm": 1.6171499419171973, - "learning_rate": 3.2219146477716376e-07, - "loss": 1.1206, - "step": 5495 - }, - { - "epoch": 0.7450687995661899, - "grad_norm": 1.4703217493180636, - "learning_rate": 3.2186865905824724e-07, - "loss": 1.1065, - "step": 5496 - }, - { - "epoch": 0.7452043652138548, - "grad_norm": 1.5024557964552725, - "learning_rate": 3.215459841116073e-07, - "loss": 1.1073, - "step": 5497 - }, - { - "epoch": 0.7453399308615197, - "grad_norm": 1.9816720477552259, - "learning_rate": 3.212234399994682e-07, - "loss": 1.0972, - "step": 5498 - }, - { - "epoch": 0.7454754965091845, - "grad_norm": 2.3375007335841516, - "learning_rate": 3.209010267840315e-07, - "loss": 1.1611, - "step": 5499 - }, - { - "epoch": 0.7456110621568495, - "grad_norm": 1.9187251494649622, - "learning_rate": 3.205787445274707e-07, - "loss": 1.1659, - "step": 5500 - }, - { - "epoch": 0.7457466278045143, - "grad_norm": 1.6062590154827598, - "learning_rate": 3.2025659329193654e-07, - "loss": 1.1448, - "step": 5501 - }, - { - "epoch": 0.7458821934521792, - "grad_norm": 1.4939746924883037, - "learning_rate": 3.1993457313955217e-07, - "loss": 1.1405, - "step": 5502 - }, - { - "epoch": 0.7460177590998441, - "grad_norm": 1.8736251677387314, - "learning_rate": 3.19612684132418e-07, - "loss": 1.1028, - "step": 5503 - }, - { - "epoch": 0.746153324747509, - "grad_norm": 1.7377266897192405, - "learning_rate": 3.1929092633260667e-07, - "loss": 1.0818, - "step": 5504 - }, - { - "epoch": 0.7462888903951739, - "grad_norm": 2.4415260339332296, - "learning_rate": 3.1896929980216704e-07, - "loss": 1.1247, - "step": 5505 - }, - { - "epoch": 0.7464244560428387, - "grad_norm": 1.6368998999934332, - "learning_rate": 3.186478046031221e-07, - "loss": 1.1217, - "step": 5506 - }, - { - "epoch": 0.7465600216905036, - "grad_norm": 1.9540345543982602, - "learning_rate": 3.1832644079746984e-07, - "loss": 1.1567, - "step": 5507 - }, - { - "epoch": 0.7466955873381685, - "grad_norm": 1.4923984714052274, - "learning_rate": 3.180052084471827e-07, - "loss": 1.1425, - "step": 5508 - }, - { - "epoch": 0.7468311529858334, - "grad_norm": 1.7714277139015153, - "learning_rate": 3.176841076142077e-07, - "loss": 1.1684, - "step": 5509 - }, - { - "epoch": 0.7469667186334983, - "grad_norm": 1.8374084246756608, - "learning_rate": 3.173631383604667e-07, - "loss": 1.1301, - "step": 5510 - }, - { - "epoch": 0.7471022842811631, - "grad_norm": 1.5103891851512357, - "learning_rate": 3.170423007478561e-07, - "loss": 1.0982, - "step": 5511 - }, - { - "epoch": 0.747237849928828, - "grad_norm": 1.7962127606904872, - "learning_rate": 3.167215948382471e-07, - "loss": 1.107, - "step": 5512 - }, - { - "epoch": 0.747373415576493, - "grad_norm": 1.8715169750840128, - "learning_rate": 3.164010206934845e-07, - "loss": 1.1373, - "step": 5513 - }, - { - "epoch": 0.7475089812241578, - "grad_norm": 1.5311105600563946, - "learning_rate": 3.160805783753897e-07, - "loss": 1.1228, - "step": 5514 - }, - { - "epoch": 0.7476445468718227, - "grad_norm": 1.7059777609659832, - "learning_rate": 3.1576026794575615e-07, - "loss": 1.1462, - "step": 5515 - }, - { - "epoch": 0.7477801125194876, - "grad_norm": 1.7333260152947012, - "learning_rate": 3.154400894663546e-07, - "loss": 1.1689, - "step": 5516 - }, - { - "epoch": 0.7479156781671524, - "grad_norm": 1.5309773473111432, - "learning_rate": 3.1512004299892747e-07, - "loss": 1.134, - "step": 5517 - }, - { - "epoch": 0.7480512438148174, - "grad_norm": 1.4780125113362768, - "learning_rate": 3.1480012860519453e-07, - "loss": 1.1287, - "step": 5518 - }, - { - "epoch": 0.7481868094624822, - "grad_norm": 1.7576410215133556, - "learning_rate": 3.1448034634684764e-07, - "loss": 1.1155, - "step": 5519 - }, - { - "epoch": 0.7483223751101471, - "grad_norm": 1.793892781747249, - "learning_rate": 3.141606962855553e-07, - "loss": 1.1396, - "step": 5520 - }, - { - "epoch": 0.748457940757812, - "grad_norm": 1.6766186973303765, - "learning_rate": 3.1384117848295843e-07, - "loss": 1.1123, - "step": 5521 - }, - { - "epoch": 0.7485935064054768, - "grad_norm": 1.5836635428238968, - "learning_rate": 3.135217930006747e-07, - "loss": 1.1599, - "step": 5522 - }, - { - "epoch": 0.7487290720531418, - "grad_norm": 2.002128225237155, - "learning_rate": 3.1320253990029387e-07, - "loss": 1.1767, - "step": 5523 - }, - { - "epoch": 0.7488646377008066, - "grad_norm": 1.7963045424301964, - "learning_rate": 3.128834192433826e-07, - "loss": 1.1042, - "step": 5524 - }, - { - "epoch": 0.7490002033484715, - "grad_norm": 1.6688857983588012, - "learning_rate": 3.125644310914798e-07, - "loss": 1.1179, - "step": 5525 - }, - { - "epoch": 0.7491357689961364, - "grad_norm": 1.6052195466166834, - "learning_rate": 3.122455755061002e-07, - "loss": 1.1428, - "step": 5526 - }, - { - "epoch": 0.7492713346438012, - "grad_norm": 2.4567159738537, - "learning_rate": 3.1192685254873254e-07, - "loss": 1.1006, - "step": 5527 - }, - { - "epoch": 0.7494069002914662, - "grad_norm": 1.469671160722059, - "learning_rate": 3.1160826228084004e-07, - "loss": 1.1075, - "step": 5528 - }, - { - "epoch": 0.749542465939131, - "grad_norm": 1.847860214113767, - "learning_rate": 3.1128980476386035e-07, - "loss": 1.1639, - "step": 5529 - }, - { - "epoch": 0.7496780315867959, - "grad_norm": 1.8037843195191312, - "learning_rate": 3.109714800592055e-07, - "loss": 1.11, - "step": 5530 - }, - { - "epoch": 0.7498135972344608, - "grad_norm": 2.33180803362352, - "learning_rate": 3.106532882282618e-07, - "loss": 1.1144, - "step": 5531 - }, - { - "epoch": 0.7499491628821257, - "grad_norm": 1.816970734271735, - "learning_rate": 3.103352293323901e-07, - "loss": 1.1003, - "step": 5532 - }, - { - "epoch": 0.7500847285297906, - "grad_norm": 1.8820494767397826, - "learning_rate": 3.1001730343292556e-07, - "loss": 1.1249, - "step": 5533 - }, - { - "epoch": 0.7502202941774554, - "grad_norm": 1.788949789074081, - "learning_rate": 3.096995105911776e-07, - "loss": 1.1559, - "step": 5534 - }, - { - "epoch": 0.7503558598251203, - "grad_norm": 3.547484775501951, - "learning_rate": 3.093818508684302e-07, - "loss": 1.1213, - "step": 5535 - }, - { - "epoch": 0.7504914254727852, - "grad_norm": 1.6422240073416166, - "learning_rate": 3.090643243259414e-07, - "loss": 1.1403, - "step": 5536 - }, - { - "epoch": 0.7506269911204501, - "grad_norm": 1.5055879346738459, - "learning_rate": 3.0874693102494374e-07, - "loss": 1.1488, - "step": 5537 - }, - { - "epoch": 0.750762556768115, - "grad_norm": 1.7408642936774008, - "learning_rate": 3.084296710266441e-07, - "loss": 1.0942, - "step": 5538 - }, - { - "epoch": 0.7508981224157798, - "grad_norm": 1.5103707856914317, - "learning_rate": 3.081125443922237e-07, - "loss": 1.1576, - "step": 5539 - }, - { - "epoch": 0.7510336880634447, - "grad_norm": 1.630367856322379, - "learning_rate": 3.077955511828374e-07, - "loss": 1.1281, - "step": 5540 - }, - { - "epoch": 0.7511692537111097, - "grad_norm": 1.8022751393733152, - "learning_rate": 3.074786914596151e-07, - "loss": 1.1541, - "step": 5541 - }, - { - "epoch": 0.7513048193587745, - "grad_norm": 2.2325322477184217, - "learning_rate": 3.071619652836608e-07, - "loss": 1.121, - "step": 5542 - }, - { - "epoch": 0.7514403850064394, - "grad_norm": 1.6983989446516363, - "learning_rate": 3.068453727160525e-07, - "loss": 1.1466, - "step": 5543 - }, - { - "epoch": 0.7515759506541042, - "grad_norm": 1.99312177143774, - "learning_rate": 3.065289138178426e-07, - "loss": 1.097, - "step": 5544 - }, - { - "epoch": 0.7517115163017691, - "grad_norm": 4.040862394817943, - "learning_rate": 3.062125886500578e-07, - "loss": 1.1244, - "step": 5545 - }, - { - "epoch": 0.7518470819494341, - "grad_norm": 2.0421904605845627, - "learning_rate": 3.0589639727369886e-07, - "loss": 1.1341, - "step": 5546 - }, - { - "epoch": 0.7519826475970989, - "grad_norm": 1.6483650457347803, - "learning_rate": 3.0558033974974076e-07, - "loss": 1.1375, - "step": 5547 - }, - { - "epoch": 0.7521182132447638, - "grad_norm": 1.4401385045094528, - "learning_rate": 3.052644161391328e-07, - "loss": 1.1043, - "step": 5548 - }, - { - "epoch": 0.7522537788924286, - "grad_norm": 1.5110498533296846, - "learning_rate": 3.0494862650279816e-07, - "loss": 1.1329, - "step": 5549 - }, - { - "epoch": 0.7523893445400935, - "grad_norm": 1.857247983156151, - "learning_rate": 3.046329709016345e-07, - "loss": 1.1213, - "step": 5550 - }, - { - "epoch": 0.7525249101877585, - "grad_norm": 1.5415842021583062, - "learning_rate": 3.043174493965136e-07, - "loss": 1.1421, - "step": 5551 - }, - { - "epoch": 0.7526604758354233, - "grad_norm": 1.6041590719159964, - "learning_rate": 3.040020620482812e-07, - "loss": 1.1125, - "step": 5552 - }, - { - "epoch": 0.7527960414830882, - "grad_norm": 1.6455157484727976, - "learning_rate": 3.0368680891775755e-07, - "loss": 1.1376, - "step": 5553 - }, - { - "epoch": 0.752931607130753, - "grad_norm": 2.0410842432238767, - "learning_rate": 3.033716900657357e-07, - "loss": 1.1329, - "step": 5554 - }, - { - "epoch": 0.753067172778418, - "grad_norm": 1.7796914465872653, - "learning_rate": 3.0305670555298533e-07, - "loss": 1.1395, - "step": 5555 - }, - { - "epoch": 0.7532027384260829, - "grad_norm": 2.353766170597031, - "learning_rate": 3.027418554402473e-07, - "loss": 1.1058, - "step": 5556 - }, - { - "epoch": 0.7533383040737477, - "grad_norm": 1.648801329880353, - "learning_rate": 3.024271397882393e-07, - "loss": 1.1227, - "step": 5557 - }, - { - "epoch": 0.7534738697214126, - "grad_norm": 1.9133781142300537, - "learning_rate": 3.021125586576504e-07, - "loss": 1.1765, - "step": 5558 - }, - { - "epoch": 0.7536094353690774, - "grad_norm": 1.4686032469849908, - "learning_rate": 3.017981121091464e-07, - "loss": 1.1213, - "step": 5559 - }, - { - "epoch": 0.7537450010167424, - "grad_norm": 1.4542108394339408, - "learning_rate": 3.014838002033645e-07, - "loss": 1.1569, - "step": 5560 - }, - { - "epoch": 0.7538805666644073, - "grad_norm": 1.6358563065856728, - "learning_rate": 3.0116962300091876e-07, - "loss": 1.0795, - "step": 5561 - }, - { - "epoch": 0.7540161323120721, - "grad_norm": 2.2293608805753053, - "learning_rate": 3.0085558056239426e-07, - "loss": 1.1105, - "step": 5562 - }, - { - "epoch": 0.754151697959737, - "grad_norm": 1.696473482453051, - "learning_rate": 3.0054167294835306e-07, - "loss": 1.0829, - "step": 5563 - }, - { - "epoch": 0.7542872636074018, - "grad_norm": 1.550396622193765, - "learning_rate": 3.002279002193283e-07, - "loss": 1.1295, - "step": 5564 - }, - { - "epoch": 0.7544228292550668, - "grad_norm": 4.741270784454152, - "learning_rate": 2.9991426243583005e-07, - "loss": 1.0673, - "step": 5565 - }, - { - "epoch": 0.7545583949027317, - "grad_norm": 1.5387227356937365, - "learning_rate": 2.9960075965833974e-07, - "loss": 1.1619, - "step": 5566 - }, - { - "epoch": 0.7546939605503965, - "grad_norm": 1.5379903708314284, - "learning_rate": 2.9928739194731444e-07, - "loss": 1.1155, - "step": 5567 - }, - { - "epoch": 0.7548295261980614, - "grad_norm": 1.5600155533663091, - "learning_rate": 2.9897415936318436e-07, - "loss": 1.1436, - "step": 5568 - }, - { - "epoch": 0.7549650918457262, - "grad_norm": 1.863179639713212, - "learning_rate": 2.986610619663542e-07, - "loss": 1.0783, - "step": 5569 - }, - { - "epoch": 0.7551006574933912, - "grad_norm": 2.123255181421133, - "learning_rate": 2.983480998172022e-07, - "loss": 1.1309, - "step": 5570 - }, - { - "epoch": 0.7552362231410561, - "grad_norm": 2.2650315829197667, - "learning_rate": 2.980352729760807e-07, - "loss": 1.0829, - "step": 5571 - }, - { - "epoch": 0.7553717887887209, - "grad_norm": 1.8355617406344606, - "learning_rate": 2.9772258150331565e-07, - "loss": 1.1229, - "step": 5572 - }, - { - "epoch": 0.7555073544363858, - "grad_norm": 1.4940820904529237, - "learning_rate": 2.974100254592075e-07, - "loss": 1.1633, - "step": 5573 - }, - { - "epoch": 0.7556429200840507, - "grad_norm": 1.674237846961914, - "learning_rate": 2.970976049040299e-07, - "loss": 1.1323, - "step": 5574 - }, - { - "epoch": 0.7557784857317156, - "grad_norm": 2.1618443569652372, - "learning_rate": 2.967853198980309e-07, - "loss": 1.1154, - "step": 5575 - }, - { - "epoch": 0.7559140513793805, - "grad_norm": 2.229964321499073, - "learning_rate": 2.964731705014324e-07, - "loss": 1.1618, - "step": 5576 - }, - { - "epoch": 0.7560496170270453, - "grad_norm": 1.4518011096624115, - "learning_rate": 2.9616115677442897e-07, - "loss": 1.1322, - "step": 5577 - }, - { - "epoch": 0.7561851826747102, - "grad_norm": 27.813716316483962, - "learning_rate": 2.9584927877719145e-07, - "loss": 1.0867, - "step": 5578 - }, - { - "epoch": 0.7563207483223751, - "grad_norm": 3.6542636438948564, - "learning_rate": 2.9553753656986155e-07, - "loss": 1.1357, - "step": 5579 - }, - { - "epoch": 0.75645631397004, - "grad_norm": 3.0897147234302906, - "learning_rate": 2.952259302125578e-07, - "loss": 1.141, - "step": 5580 - }, - { - "epoch": 0.7565918796177049, - "grad_norm": 2.5318576847913934, - "learning_rate": 2.9491445976536977e-07, - "loss": 1.0947, - "step": 5581 - }, - { - "epoch": 0.7567274452653697, - "grad_norm": 1.6100171197760038, - "learning_rate": 2.9460312528836274e-07, - "loss": 1.0914, - "step": 5582 - }, - { - "epoch": 0.7568630109130347, - "grad_norm": 1.4494342102356041, - "learning_rate": 2.942919268415748e-07, - "loss": 1.0752, - "step": 5583 - }, - { - "epoch": 0.7569985765606995, - "grad_norm": 5.070365382599341, - "learning_rate": 2.9398086448501837e-07, - "loss": 1.1105, - "step": 5584 - }, - { - "epoch": 0.7571341422083644, - "grad_norm": 1.6857971647099654, - "learning_rate": 2.9366993827867913e-07, - "loss": 1.1375, - "step": 5585 - }, - { - "epoch": 0.7572697078560293, - "grad_norm": 1.5886395274692071, - "learning_rate": 2.9335914828251694e-07, - "loss": 1.1187, - "step": 5586 - }, - { - "epoch": 0.7574052735036941, - "grad_norm": 2.3394344435622756, - "learning_rate": 2.9304849455646505e-07, - "loss": 1.1509, - "step": 5587 - }, - { - "epoch": 0.7575408391513591, - "grad_norm": 1.704176868538531, - "learning_rate": 2.9273797716043067e-07, - "loss": 1.1166, - "step": 5588 - }, - { - "epoch": 0.7576764047990239, - "grad_norm": 3.348805901278821, - "learning_rate": 2.9242759615429467e-07, - "loss": 1.1174, - "step": 5589 - }, - { - "epoch": 0.7578119704466888, - "grad_norm": 1.7921985362996256, - "learning_rate": 2.9211735159791153e-07, - "loss": 1.1158, - "step": 5590 - }, - { - "epoch": 0.7579475360943537, - "grad_norm": 1.547289058710695, - "learning_rate": 2.918072435511093e-07, - "loss": 1.1522, - "step": 5591 - }, - { - "epoch": 0.7580831017420185, - "grad_norm": 1.8175384541299482, - "learning_rate": 2.914972720736901e-07, - "loss": 1.1357, - "step": 5592 - }, - { - "epoch": 0.7582186673896835, - "grad_norm": 1.7440840237851118, - "learning_rate": 2.9118743722542937e-07, - "loss": 1.1298, - "step": 5593 - }, - { - "epoch": 0.7583542330373484, - "grad_norm": 1.6703991785900456, - "learning_rate": 2.908777390660765e-07, - "loss": 1.1665, - "step": 5594 - }, - { - "epoch": 0.7584897986850132, - "grad_norm": 2.2444178070394205, - "learning_rate": 2.9056817765535404e-07, - "loss": 1.1238, - "step": 5595 - }, - { - "epoch": 0.7586253643326781, - "grad_norm": 1.9435359463398785, - "learning_rate": 2.9025875305295886e-07, - "loss": 1.1889, - "step": 5596 - }, - { - "epoch": 0.758760929980343, - "grad_norm": 1.5461033830150697, - "learning_rate": 2.8994946531856035e-07, - "loss": 1.1444, - "step": 5597 - }, - { - "epoch": 0.7588964956280079, - "grad_norm": 2.2814083513201395, - "learning_rate": 2.8964031451180316e-07, - "loss": 1.1478, - "step": 5598 - }, - { - "epoch": 0.7590320612756728, - "grad_norm": 2.074788460194466, - "learning_rate": 2.893313006923035e-07, - "loss": 1.1126, - "step": 5599 - }, - { - "epoch": 0.7591676269233376, - "grad_norm": 1.6604660365576887, - "learning_rate": 2.8902242391965335e-07, - "loss": 1.137, - "step": 5600 - }, - { - "epoch": 0.7593031925710025, - "grad_norm": 1.5846811716332339, - "learning_rate": 2.8871368425341634e-07, - "loss": 1.1765, - "step": 5601 - }, - { - "epoch": 0.7594387582186674, - "grad_norm": 2.4118306604915376, - "learning_rate": 2.8840508175313095e-07, - "loss": 1.1738, - "step": 5602 - }, - { - "epoch": 0.7595743238663323, - "grad_norm": 1.8835080628617322, - "learning_rate": 2.880966164783084e-07, - "loss": 1.117, - "step": 5603 - }, - { - "epoch": 0.7597098895139972, - "grad_norm": 1.4896125857720466, - "learning_rate": 2.87788288488434e-07, - "loss": 1.102, - "step": 5604 - }, - { - "epoch": 0.759845455161662, - "grad_norm": 1.6896861462113835, - "learning_rate": 2.8748009784296625e-07, - "loss": 1.1297, - "step": 5605 - }, - { - "epoch": 0.759981020809327, - "grad_norm": 1.492355632963475, - "learning_rate": 2.871720446013374e-07, - "loss": 1.1615, - "step": 5606 - }, - { - "epoch": 0.7601165864569918, - "grad_norm": 1.5984960491360272, - "learning_rate": 2.8686412882295287e-07, - "loss": 1.101, - "step": 5607 - }, - { - "epoch": 0.7602521521046567, - "grad_norm": 1.5063541086492231, - "learning_rate": 2.865563505671921e-07, - "loss": 1.1443, - "step": 5608 - }, - { - "epoch": 0.7603877177523216, - "grad_norm": 1.5717164639175574, - "learning_rate": 2.8624870989340757e-07, - "loss": 1.1126, - "step": 5609 - }, - { - "epoch": 0.7605232833999864, - "grad_norm": 2.9780807050683724, - "learning_rate": 2.8594120686092515e-07, - "loss": 1.1472, - "step": 5610 - }, - { - "epoch": 0.7606588490476514, - "grad_norm": 2.0606361176516867, - "learning_rate": 2.8563384152904503e-07, - "loss": 1.1506, - "step": 5611 - }, - { - "epoch": 0.7607944146953162, - "grad_norm": 1.8204321812960078, - "learning_rate": 2.8532661395703905e-07, - "loss": 1.1269, - "step": 5612 - }, - { - "epoch": 0.7609299803429811, - "grad_norm": 1.762175776467936, - "learning_rate": 2.8501952420415486e-07, - "loss": 1.1587, - "step": 5613 - }, - { - "epoch": 0.761065545990646, - "grad_norm": 1.664388342264442, - "learning_rate": 2.847125723296111e-07, - "loss": 1.1018, - "step": 5614 - }, - { - "epoch": 0.7612011116383108, - "grad_norm": 2.6581111935376995, - "learning_rate": 2.8440575839260227e-07, - "loss": 1.1162, - "step": 5615 - }, - { - "epoch": 0.7613366772859758, - "grad_norm": 1.682087628907367, - "learning_rate": 2.8409908245229374e-07, - "loss": 1.1267, - "step": 5616 - }, - { - "epoch": 0.7614722429336406, - "grad_norm": 3.8676968135518663, - "learning_rate": 2.8379254456782685e-07, - "loss": 1.1328, - "step": 5617 - }, - { - "epoch": 0.7616078085813055, - "grad_norm": 1.7437732954169305, - "learning_rate": 2.8348614479831367e-07, - "loss": 1.0913, - "step": 5618 - }, - { - "epoch": 0.7617433742289704, - "grad_norm": 2.0843382428616986, - "learning_rate": 2.8317988320284223e-07, - "loss": 1.1299, - "step": 5619 - }, - { - "epoch": 0.7618789398766352, - "grad_norm": 2.283955976947826, - "learning_rate": 2.828737598404716e-07, - "loss": 1.12, - "step": 5620 - }, - { - "epoch": 0.7620145055243002, - "grad_norm": 2.1084534241123056, - "learning_rate": 2.8256777477023617e-07, - "loss": 1.1115, - "step": 5621 - }, - { - "epoch": 0.762150071171965, - "grad_norm": 1.800005781123401, - "learning_rate": 2.822619280511418e-07, - "loss": 1.1505, - "step": 5622 - }, - { - "epoch": 0.7622856368196299, - "grad_norm": 2.3842002755511444, - "learning_rate": 2.8195621974216975e-07, - "loss": 1.1398, - "step": 5623 - }, - { - "epoch": 0.7624212024672948, - "grad_norm": 6.714243454529217, - "learning_rate": 2.816506499022725e-07, - "loss": 1.1331, - "step": 5624 - }, - { - "epoch": 0.7625567681149596, - "grad_norm": 1.7609729143708799, - "learning_rate": 2.8134521859037707e-07, - "loss": 1.141, - "step": 5625 - }, - { - "epoch": 0.7626923337626246, - "grad_norm": 1.5239622529603516, - "learning_rate": 2.810399258653836e-07, - "loss": 1.1615, - "step": 5626 - }, - { - "epoch": 0.7628278994102894, - "grad_norm": 3.5631276883319507, - "learning_rate": 2.807347717861653e-07, - "loss": 1.1349, - "step": 5627 - }, - { - "epoch": 0.7629634650579543, - "grad_norm": 1.8553489036147899, - "learning_rate": 2.8042975641156864e-07, - "loss": 1.1316, - "step": 5628 - }, - { - "epoch": 0.7630990307056192, - "grad_norm": 1.4772299209734274, - "learning_rate": 2.8012487980041354e-07, - "loss": 1.1407, - "step": 5629 - }, - { - "epoch": 0.7632345963532841, - "grad_norm": 3.9468636028690747, - "learning_rate": 2.798201420114931e-07, - "loss": 1.1265, - "step": 5630 - }, - { - "epoch": 0.763370162000949, - "grad_norm": 1.7134322414987744, - "learning_rate": 2.795155431035735e-07, - "loss": 1.0689, - "step": 5631 - }, - { - "epoch": 0.7635057276486138, - "grad_norm": 1.940253211228026, - "learning_rate": 2.7921108313539423e-07, - "loss": 1.1822, - "step": 5632 - }, - { - "epoch": 0.7636412932962787, - "grad_norm": 1.8064958921477863, - "learning_rate": 2.78906762165668e-07, - "loss": 1.1436, - "step": 5633 - }, - { - "epoch": 0.7637768589439436, - "grad_norm": 2.5909160911899316, - "learning_rate": 2.786025802530807e-07, - "loss": 1.1518, - "step": 5634 - }, - { - "epoch": 0.7639124245916085, - "grad_norm": 1.4751079906944076, - "learning_rate": 2.782985374562915e-07, - "loss": 1.1314, - "step": 5635 - }, - { - "epoch": 0.7640479902392734, - "grad_norm": 2.1731077109154935, - "learning_rate": 2.779946338339325e-07, - "loss": 1.121, - "step": 5636 - }, - { - "epoch": 0.7641835558869382, - "grad_norm": 2.285638389693649, - "learning_rate": 2.776908694446095e-07, - "loss": 1.1148, - "step": 5637 - }, - { - "epoch": 0.7643191215346031, - "grad_norm": 1.771322601359761, - "learning_rate": 2.773872443469005e-07, - "loss": 1.1027, - "step": 5638 - }, - { - "epoch": 0.7644546871822681, - "grad_norm": 2.500113336908096, - "learning_rate": 2.770837585993575e-07, - "loss": 1.0819, - "step": 5639 - }, - { - "epoch": 0.7645902528299329, - "grad_norm": 1.35215445226337, - "learning_rate": 2.767804122605053e-07, - "loss": 1.1048, - "step": 5640 - }, - { - "epoch": 0.7647258184775978, - "grad_norm": 1.9114997174522634, - "learning_rate": 2.764772053888419e-07, - "loss": 1.1342, - "step": 5641 - }, - { - "epoch": 0.7648613841252626, - "grad_norm": 1.7247238748679388, - "learning_rate": 2.7617413804283815e-07, - "loss": 1.1764, - "step": 5642 - }, - { - "epoch": 0.7649969497729275, - "grad_norm": 1.5345911202673956, - "learning_rate": 2.7587121028093853e-07, - "loss": 1.1229, - "step": 5643 - }, - { - "epoch": 0.7651325154205925, - "grad_norm": 1.676807640477458, - "learning_rate": 2.7556842216155996e-07, - "loss": 1.1299, - "step": 5644 - }, - { - "epoch": 0.7652680810682573, - "grad_norm": 1.4208565885683302, - "learning_rate": 2.752657737430928e-07, - "loss": 1.1024, - "step": 5645 - }, - { - "epoch": 0.7654036467159222, - "grad_norm": 1.5472630259370819, - "learning_rate": 2.749632650839006e-07, - "loss": 1.1518, - "step": 5646 - }, - { - "epoch": 0.765539212363587, - "grad_norm": 1.7392703394890774, - "learning_rate": 2.746608962423196e-07, - "loss": 1.1284, - "step": 5647 - }, - { - "epoch": 0.7656747780112519, - "grad_norm": 1.804932523558963, - "learning_rate": 2.7435866727665924e-07, - "loss": 1.117, - "step": 5648 - }, - { - "epoch": 0.7658103436589169, - "grad_norm": 1.5448921705768355, - "learning_rate": 2.74056578245202e-07, - "loss": 1.0989, - "step": 5649 - }, - { - "epoch": 0.7659459093065817, - "grad_norm": 1.7411987633088435, - "learning_rate": 2.7375462920620354e-07, - "loss": 1.1239, - "step": 5650 - }, - { - "epoch": 0.7660814749542466, - "grad_norm": 1.563886194147513, - "learning_rate": 2.7345282021789204e-07, - "loss": 1.1448, - "step": 5651 - }, - { - "epoch": 0.7662170406019114, - "grad_norm": 1.496557279490038, - "learning_rate": 2.731511513384696e-07, - "loss": 1.088, - "step": 5652 - }, - { - "epoch": 0.7663526062495764, - "grad_norm": 2.826096521012251, - "learning_rate": 2.7284962262610946e-07, - "loss": 1.1463, - "step": 5653 - }, - { - "epoch": 0.7664881718972413, - "grad_norm": 1.5571720066461916, - "learning_rate": 2.7254823413896056e-07, - "loss": 1.1188, - "step": 5654 - }, - { - "epoch": 0.7666237375449061, - "grad_norm": 1.989042467916981, - "learning_rate": 2.7224698593514183e-07, - "loss": 1.1461, - "step": 5655 - }, - { - "epoch": 0.766759303192571, - "grad_norm": 1.6413555754068816, - "learning_rate": 2.7194587807274803e-07, - "loss": 1.1165, - "step": 5656 - }, - { - "epoch": 0.7668948688402358, - "grad_norm": 1.6154400962522693, - "learning_rate": 2.7164491060984417e-07, - "loss": 1.133, - "step": 5657 - }, - { - "epoch": 0.7670304344879008, - "grad_norm": 1.8116939591841072, - "learning_rate": 2.713440836044705e-07, - "loss": 1.1414, - "step": 5658 - }, - { - "epoch": 0.7671660001355657, - "grad_norm": 1.7520666762063646, - "learning_rate": 2.710433971146381e-07, - "loss": 1.1196, - "step": 5659 - }, - { - "epoch": 0.7673015657832305, - "grad_norm": 2.150855315746426, - "learning_rate": 2.7074285119833315e-07, - "loss": 1.1631, - "step": 5660 - }, - { - "epoch": 0.7674371314308954, - "grad_norm": 1.7839445829121041, - "learning_rate": 2.704424459135123e-07, - "loss": 1.1165, - "step": 5661 - }, - { - "epoch": 0.7675726970785602, - "grad_norm": 1.9585759737062995, - "learning_rate": 2.701421813181076e-07, - "loss": 1.0909, - "step": 5662 - }, - { - "epoch": 0.7677082627262252, - "grad_norm": 1.443182086245629, - "learning_rate": 2.6984205747002153e-07, - "loss": 1.0767, - "step": 5663 - }, - { - "epoch": 0.7678438283738901, - "grad_norm": 1.7291665467284376, - "learning_rate": 2.6954207442713174e-07, - "loss": 1.1198, - "step": 5664 - }, - { - "epoch": 0.7679793940215549, - "grad_norm": 2.2361729653968676, - "learning_rate": 2.692422322472866e-07, - "loss": 1.1631, - "step": 5665 - }, - { - "epoch": 0.7681149596692198, - "grad_norm": 1.6686827436776654, - "learning_rate": 2.689425309883089e-07, - "loss": 1.1081, - "step": 5666 - }, - { - "epoch": 0.7682505253168846, - "grad_norm": 1.4675112332964075, - "learning_rate": 2.6864297070799336e-07, - "loss": 1.1015, - "step": 5667 - }, - { - "epoch": 0.7683860909645496, - "grad_norm": 1.3799723468907235, - "learning_rate": 2.6834355146410793e-07, - "loss": 1.1275, - "step": 5668 - }, - { - "epoch": 0.7685216566122145, - "grad_norm": 1.9767399474476277, - "learning_rate": 2.6804427331439327e-07, - "loss": 1.1201, - "step": 5669 - }, - { - "epoch": 0.7686572222598793, - "grad_norm": 2.8349438300724237, - "learning_rate": 2.677451363165628e-07, - "loss": 1.1522, - "step": 5670 - }, - { - "epoch": 0.7687927879075442, - "grad_norm": 2.5870445140764633, - "learning_rate": 2.674461405283027e-07, - "loss": 1.1792, - "step": 5671 - }, - { - "epoch": 0.7689283535552092, - "grad_norm": 1.5494674200035028, - "learning_rate": 2.671472860072721e-07, - "loss": 1.1132, - "step": 5672 - }, - { - "epoch": 0.769063919202874, - "grad_norm": 1.9497797552711735, - "learning_rate": 2.6684857281110286e-07, - "loss": 1.1911, - "step": 5673 - }, - { - "epoch": 0.7691994848505389, - "grad_norm": 2.0315771012224926, - "learning_rate": 2.6655000099739857e-07, - "loss": 1.1097, - "step": 5674 - }, - { - "epoch": 0.7693350504982037, - "grad_norm": 2.128669224633602, - "learning_rate": 2.662515706237376e-07, - "loss": 1.0866, - "step": 5675 - }, - { - "epoch": 0.7694706161458686, - "grad_norm": 2.3521127122017105, - "learning_rate": 2.6595328174766885e-07, - "loss": 1.104, - "step": 5676 - }, - { - "epoch": 0.7696061817935336, - "grad_norm": 1.8074692011542461, - "learning_rate": 2.656551344267162e-07, - "loss": 1.1474, - "step": 5677 - }, - { - "epoch": 0.7697417474411984, - "grad_norm": 1.70120445543075, - "learning_rate": 2.6535712871837357e-07, - "loss": 1.1084, - "step": 5678 - }, - { - "epoch": 0.7698773130888633, - "grad_norm": 1.680435659669913, - "learning_rate": 2.6505926468011044e-07, - "loss": 1.1348, - "step": 5679 - }, - { - "epoch": 0.7700128787365281, - "grad_norm": 1.4640943543878653, - "learning_rate": 2.6476154236936643e-07, - "loss": 1.1059, - "step": 5680 - }, - { - "epoch": 0.770148444384193, - "grad_norm": 1.5252418481508832, - "learning_rate": 2.6446396184355545e-07, - "loss": 1.1096, - "step": 5681 - }, - { - "epoch": 0.770284010031858, - "grad_norm": 1.8475514017426458, - "learning_rate": 2.641665231600634e-07, - "loss": 1.1397, - "step": 5682 - }, - { - "epoch": 0.7704195756795228, - "grad_norm": 1.840725333248945, - "learning_rate": 2.6386922637624906e-07, - "loss": 1.0626, - "step": 5683 - }, - { - "epoch": 0.7705551413271877, - "grad_norm": 1.561312042780428, - "learning_rate": 2.635720715494438e-07, - "loss": 1.1007, - "step": 5684 - }, - { - "epoch": 0.7706907069748525, - "grad_norm": 4.002909948897374, - "learning_rate": 2.6327505873695157e-07, - "loss": 1.1938, - "step": 5685 - }, - { - "epoch": 0.7708262726225175, - "grad_norm": 1.506512654400048, - "learning_rate": 2.629781879960488e-07, - "loss": 1.1125, - "step": 5686 - }, - { - "epoch": 0.7709618382701824, - "grad_norm": 1.84927581254869, - "learning_rate": 2.626814593839848e-07, - "loss": 1.1257, - "step": 5687 - }, - { - "epoch": 0.7710974039178472, - "grad_norm": 1.5106834986264215, - "learning_rate": 2.623848729579813e-07, - "loss": 1.1378, - "step": 5688 - }, - { - "epoch": 0.7712329695655121, - "grad_norm": 1.7846330766219878, - "learning_rate": 2.620884287752327e-07, - "loss": 1.1311, - "step": 5689 - }, - { - "epoch": 0.7713685352131769, - "grad_norm": 2.0817278138654167, - "learning_rate": 2.61792126892906e-07, - "loss": 1.128, - "step": 5690 - }, - { - "epoch": 0.7715041008608419, - "grad_norm": 1.4815320871194724, - "learning_rate": 2.614959673681404e-07, - "loss": 1.1165, - "step": 5691 - }, - { - "epoch": 0.7716396665085068, - "grad_norm": 1.4689885228887125, - "learning_rate": 2.611999502580482e-07, - "loss": 1.1211, - "step": 5692 - }, - { - "epoch": 0.7717752321561716, - "grad_norm": 1.7789227422220348, - "learning_rate": 2.6090407561971405e-07, - "loss": 1.0844, - "step": 5693 - }, - { - "epoch": 0.7719107978038365, - "grad_norm": 1.8759987628352057, - "learning_rate": 2.6060834351019433e-07, - "loss": 1.1269, - "step": 5694 - }, - { - "epoch": 0.7720463634515013, - "grad_norm": 2.029652783773119, - "learning_rate": 2.6031275398651986e-07, - "loss": 1.1544, - "step": 5695 - }, - { - "epoch": 0.7721819290991663, - "grad_norm": 1.4795274179946105, - "learning_rate": 2.6001730710569123e-07, - "loss": 1.1055, - "step": 5696 - }, - { - "epoch": 0.7723174947468312, - "grad_norm": 1.8751556444923918, - "learning_rate": 2.597220029246846e-07, - "loss": 1.1084, - "step": 5697 - }, - { - "epoch": 0.772453060394496, - "grad_norm": 1.5452740812167731, - "learning_rate": 2.594268415004457e-07, - "loss": 1.1588, - "step": 5698 - }, - { - "epoch": 0.7725886260421609, - "grad_norm": 1.753984398407572, - "learning_rate": 2.591318228898953e-07, - "loss": 1.212, - "step": 5699 - }, - { - "epoch": 0.7727241916898258, - "grad_norm": 2.0966738314670668, - "learning_rate": 2.5883694714992446e-07, - "loss": 1.1191, - "step": 5700 - }, - { - "epoch": 0.7728597573374907, - "grad_norm": 1.7720588288256773, - "learning_rate": 2.5854221433739797e-07, - "loss": 1.1519, - "step": 5701 - }, - { - "epoch": 0.7729953229851556, - "grad_norm": 1.7944920889074518, - "learning_rate": 2.582476245091527e-07, - "loss": 1.134, - "step": 5702 - }, - { - "epoch": 0.7731308886328204, - "grad_norm": 1.5100032796687892, - "learning_rate": 2.579531777219981e-07, - "loss": 1.1359, - "step": 5703 - }, - { - "epoch": 0.7732664542804853, - "grad_norm": 4.268247178252124, - "learning_rate": 2.576588740327158e-07, - "loss": 1.1025, - "step": 5704 - }, - { - "epoch": 0.7734020199281502, - "grad_norm": 1.8519197534584118, - "learning_rate": 2.573647134980599e-07, - "loss": 1.1196, - "step": 5705 - }, - { - "epoch": 0.7735375855758151, - "grad_norm": 1.627651099164805, - "learning_rate": 2.57070696174757e-07, - "loss": 1.1296, - "step": 5706 - }, - { - "epoch": 0.77367315122348, - "grad_norm": 1.648874773476019, - "learning_rate": 2.5677682211950604e-07, - "loss": 1.1166, - "step": 5707 - }, - { - "epoch": 0.7738087168711448, - "grad_norm": 1.624688645170778, - "learning_rate": 2.564830913889783e-07, - "loss": 1.1318, - "step": 5708 - }, - { - "epoch": 0.7739442825188098, - "grad_norm": 1.8252867587571606, - "learning_rate": 2.561895040398173e-07, - "loss": 1.112, - "step": 5709 - }, - { - "epoch": 0.7740798481664746, - "grad_norm": 1.9669579390099454, - "learning_rate": 2.5589606012863964e-07, - "loss": 1.129, - "step": 5710 - }, - { - "epoch": 0.7742154138141395, - "grad_norm": 1.5514123688804375, - "learning_rate": 2.556027597120325e-07, - "loss": 1.1353, - "step": 5711 - }, - { - "epoch": 0.7743509794618044, - "grad_norm": 1.4792583635683072, - "learning_rate": 2.553096028465578e-07, - "loss": 1.139, - "step": 5712 - }, - { - "epoch": 0.7744865451094692, - "grad_norm": 1.8677832853060468, - "learning_rate": 2.550165895887474e-07, - "loss": 1.1231, - "step": 5713 - }, - { - "epoch": 0.7746221107571342, - "grad_norm": 1.9840449952975443, - "learning_rate": 2.547237199951078e-07, - "loss": 1.1526, - "step": 5714 - }, - { - "epoch": 0.774757676404799, - "grad_norm": 1.753293586746801, - "learning_rate": 2.5443099412211535e-07, - "loss": 1.1365, - "step": 5715 - }, - { - "epoch": 0.7748932420524639, - "grad_norm": 1.9212116246410464, - "learning_rate": 2.54138412026221e-07, - "loss": 1.1553, - "step": 5716 - }, - { - "epoch": 0.7750288077001288, - "grad_norm": 1.5480432406054123, - "learning_rate": 2.5384597376384596e-07, - "loss": 1.1441, - "step": 5717 - }, - { - "epoch": 0.7751643733477936, - "grad_norm": 1.7774262470210713, - "learning_rate": 2.535536793913856e-07, - "loss": 1.1291, - "step": 5718 - }, - { - "epoch": 0.7752999389954586, - "grad_norm": 1.8670495106498213, - "learning_rate": 2.532615289652055e-07, - "loss": 1.1191, - "step": 5719 - }, - { - "epoch": 0.7754355046431234, - "grad_norm": 1.7477990052021781, - "learning_rate": 2.5296952254164573e-07, - "loss": 1.1097, - "step": 5720 - }, - { - "epoch": 0.7755710702907883, - "grad_norm": 1.379199762722718, - "learning_rate": 2.5267766017701664e-07, - "loss": 1.0891, - "step": 5721 - }, - { - "epoch": 0.7757066359384532, - "grad_norm": 2.0205197727021833, - "learning_rate": 2.5238594192760165e-07, - "loss": 1.1144, - "step": 5722 - }, - { - "epoch": 0.775842201586118, - "grad_norm": 2.215384073068831, - "learning_rate": 2.5209436784965657e-07, - "loss": 1.1222, - "step": 5723 - }, - { - "epoch": 0.775977767233783, - "grad_norm": 2.4827117882649254, - "learning_rate": 2.5180293799940886e-07, - "loss": 1.1059, - "step": 5724 - }, - { - "epoch": 0.7761133328814478, - "grad_norm": 1.7128538698623845, - "learning_rate": 2.5151165243305885e-07, - "loss": 1.1112, - "step": 5725 - }, - { - "epoch": 0.7762488985291127, - "grad_norm": 2.0575261789536765, - "learning_rate": 2.512205112067783e-07, - "loss": 1.1155, - "step": 5726 - }, - { - "epoch": 0.7763844641767776, - "grad_norm": 1.487284105844084, - "learning_rate": 2.5092951437671184e-07, - "loss": 1.1724, - "step": 5727 - }, - { - "epoch": 0.7765200298244425, - "grad_norm": 1.4568735818675593, - "learning_rate": 2.5063866199897556e-07, - "loss": 1.1122, - "step": 5728 - }, - { - "epoch": 0.7766555954721074, - "grad_norm": 1.5588949293132903, - "learning_rate": 2.5034795412965825e-07, - "loss": 1.0837, - "step": 5729 - }, - { - "epoch": 0.7767911611197722, - "grad_norm": 1.46462974501356, - "learning_rate": 2.500573908248207e-07, - "loss": 1.1533, - "step": 5730 - }, - { - "epoch": 0.7769267267674371, - "grad_norm": 1.576298091531953, - "learning_rate": 2.497669721404956e-07, - "loss": 1.1188, - "step": 5731 - }, - { - "epoch": 0.777062292415102, - "grad_norm": 3.251544230442681, - "learning_rate": 2.494766981326878e-07, - "loss": 1.1289, - "step": 5732 - }, - { - "epoch": 0.7771978580627669, - "grad_norm": 1.650595715420313, - "learning_rate": 2.4918656885737465e-07, - "loss": 1.0897, - "step": 5733 - }, - { - "epoch": 0.7773334237104318, - "grad_norm": 1.4876732255281484, - "learning_rate": 2.488965843705051e-07, - "loss": 1.1326, - "step": 5734 - }, - { - "epoch": 0.7774689893580966, - "grad_norm": 2.2302829572228986, - "learning_rate": 2.4860674472800036e-07, - "loss": 1.1242, - "step": 5735 - }, - { - "epoch": 0.7776045550057615, - "grad_norm": 1.6917192193483126, - "learning_rate": 2.483170499857541e-07, - "loss": 1.1694, - "step": 5736 - }, - { - "epoch": 0.7777401206534265, - "grad_norm": 1.5696538148860835, - "learning_rate": 2.48027500199631e-07, - "loss": 1.1318, - "step": 5737 - }, - { - "epoch": 0.7778756863010913, - "grad_norm": 1.9199400699972917, - "learning_rate": 2.477380954254689e-07, - "loss": 1.1123, - "step": 5738 - }, - { - "epoch": 0.7780112519487562, - "grad_norm": 1.7309498228480176, - "learning_rate": 2.4744883571907694e-07, - "loss": 1.1068, - "step": 5739 - }, - { - "epoch": 0.778146817596421, - "grad_norm": 1.628707553694707, - "learning_rate": 2.471597211362367e-07, - "loss": 1.1465, - "step": 5740 - }, - { - "epoch": 0.7782823832440859, - "grad_norm": 1.8569190470983585, - "learning_rate": 2.468707517327019e-07, - "loss": 1.1631, - "step": 5741 - }, - { - "epoch": 0.7784179488917509, - "grad_norm": 2.617797586839627, - "learning_rate": 2.465819275641976e-07, - "loss": 1.1282, - "step": 5742 - }, - { - "epoch": 0.7785535145394157, - "grad_norm": 1.5161008274572638, - "learning_rate": 2.462932486864215e-07, - "loss": 1.1432, - "step": 5743 - }, - { - "epoch": 0.7786890801870806, - "grad_norm": 1.9153637917214947, - "learning_rate": 2.4600471515504293e-07, - "loss": 1.0816, - "step": 5744 - }, - { - "epoch": 0.7788246458347454, - "grad_norm": 1.5500232730392414, - "learning_rate": 2.4571632702570356e-07, - "loss": 1.1131, - "step": 5745 - }, - { - "epoch": 0.7789602114824103, - "grad_norm": 2.4338918571073327, - "learning_rate": 2.454280843540164e-07, - "loss": 1.1392, - "step": 5746 - }, - { - "epoch": 0.7790957771300753, - "grad_norm": 2.0195569483396265, - "learning_rate": 2.4513998719556693e-07, - "loss": 1.1217, - "step": 5747 - }, - { - "epoch": 0.7792313427777401, - "grad_norm": 1.6217204083222712, - "learning_rate": 2.448520356059125e-07, - "loss": 1.1143, - "step": 5748 - }, - { - "epoch": 0.779366908425405, - "grad_norm": 3.0827091168540686, - "learning_rate": 2.4456422964058254e-07, - "loss": 1.1796, - "step": 5749 - }, - { - "epoch": 0.7795024740730698, - "grad_norm": 1.5752895318454925, - "learning_rate": 2.442765693550772e-07, - "loss": 1.1331, - "step": 5750 - }, - { - "epoch": 0.7796380397207348, - "grad_norm": 1.5859472303781381, - "learning_rate": 2.4398905480487073e-07, - "loss": 1.0746, - "step": 5751 - }, - { - "epoch": 0.7797736053683997, - "grad_norm": 1.5240579359040423, - "learning_rate": 2.4370168604540697e-07, - "loss": 1.1063, - "step": 5752 - }, - { - "epoch": 0.7799091710160645, - "grad_norm": 1.985519442423172, - "learning_rate": 2.4341446313210365e-07, - "loss": 1.1251, - "step": 5753 - }, - { - "epoch": 0.7800447366637294, - "grad_norm": 1.7941875747618343, - "learning_rate": 2.4312738612034843e-07, - "loss": 1.1262, - "step": 5754 - }, - { - "epoch": 0.7801803023113943, - "grad_norm": 1.787953134989362, - "learning_rate": 2.428404550655031e-07, - "loss": 1.1036, - "step": 5755 - }, - { - "epoch": 0.7803158679590592, - "grad_norm": 1.628650374980659, - "learning_rate": 2.425536700228986e-07, - "loss": 1.1296, - "step": 5756 - }, - { - "epoch": 0.7804514336067241, - "grad_norm": 1.5212046658863316, - "learning_rate": 2.422670310478406e-07, - "loss": 1.1315, - "step": 5757 - }, - { - "epoch": 0.7805869992543889, - "grad_norm": 2.3081437435452608, - "learning_rate": 2.4198053819560394e-07, - "loss": 1.1148, - "step": 5758 - }, - { - "epoch": 0.7807225649020538, - "grad_norm": 1.512742882172894, - "learning_rate": 2.4169419152143766e-07, - "loss": 1.1501, - "step": 5759 - }, - { - "epoch": 0.7808581305497188, - "grad_norm": 1.672194272131186, - "learning_rate": 2.414079910805601e-07, - "loss": 1.1313, - "step": 5760 - }, - { - "epoch": 0.7809936961973836, - "grad_norm": 1.5796308940057597, - "learning_rate": 2.4112193692816416e-07, - "loss": 1.1389, - "step": 5761 - }, - { - "epoch": 0.7811292618450485, - "grad_norm": 1.5432716383541651, - "learning_rate": 2.4083602911941224e-07, - "loss": 1.1454, - "step": 5762 - }, - { - "epoch": 0.7812648274927133, - "grad_norm": 1.6911481873925136, - "learning_rate": 2.405502677094395e-07, - "loss": 1.1039, - "step": 5763 - }, - { - "epoch": 0.7814003931403782, - "grad_norm": 1.3737472335887604, - "learning_rate": 2.4026465275335306e-07, - "loss": 1.1485, - "step": 5764 - }, - { - "epoch": 0.7815359587880432, - "grad_norm": 1.7058843150854925, - "learning_rate": 2.399791843062312e-07, - "loss": 1.1027, - "step": 5765 - }, - { - "epoch": 0.781671524435708, - "grad_norm": 1.5124896985253808, - "learning_rate": 2.396938624231245e-07, - "loss": 1.1042, - "step": 5766 - }, - { - "epoch": 0.7818070900833729, - "grad_norm": 1.3804768468587438, - "learning_rate": 2.3940868715905495e-07, - "loss": 1.1211, - "step": 5767 - }, - { - "epoch": 0.7819426557310377, - "grad_norm": 1.5493348935887008, - "learning_rate": 2.3912365856901627e-07, - "loss": 1.114, - "step": 5768 - }, - { - "epoch": 0.7820782213787026, - "grad_norm": 1.5666084906540247, - "learning_rate": 2.38838776707974e-07, - "loss": 1.1912, - "step": 5769 - }, - { - "epoch": 0.7822137870263676, - "grad_norm": 1.8368344195400135, - "learning_rate": 2.3855404163086556e-07, - "loss": 1.1027, - "step": 5770 - }, - { - "epoch": 0.7823493526740324, - "grad_norm": 1.6631617411416244, - "learning_rate": 2.3826945339259964e-07, - "loss": 1.1171, - "step": 5771 - }, - { - "epoch": 0.7824849183216973, - "grad_norm": 1.801587855160405, - "learning_rate": 2.379850120480571e-07, - "loss": 1.1797, - "step": 5772 - }, - { - "epoch": 0.7826204839693621, - "grad_norm": 1.4023732103273001, - "learning_rate": 2.3770071765208956e-07, - "loss": 1.1438, - "step": 5773 - }, - { - "epoch": 0.782756049617027, - "grad_norm": 2.121097580159299, - "learning_rate": 2.3741657025952188e-07, - "loss": 1.1524, - "step": 5774 - }, - { - "epoch": 0.782891615264692, - "grad_norm": 1.5601538355660463, - "learning_rate": 2.3713256992514853e-07, - "loss": 1.1668, - "step": 5775 - }, - { - "epoch": 0.7830271809123568, - "grad_norm": 1.6774335909097535, - "learning_rate": 2.3684871670373806e-07, - "loss": 1.1653, - "step": 5776 - }, - { - "epoch": 0.7831627465600217, - "grad_norm": 1.852304796795427, - "learning_rate": 2.365650106500282e-07, - "loss": 1.0917, - "step": 5777 - }, - { - "epoch": 0.7832983122076865, - "grad_norm": 1.6714038507911109, - "learning_rate": 2.3628145181872994e-07, - "loss": 1.1153, - "step": 5778 - }, - { - "epoch": 0.7834338778553515, - "grad_norm": 2.483408934314613, - "learning_rate": 2.359980402645253e-07, - "loss": 1.1694, - "step": 5779 - }, - { - "epoch": 0.7835694435030164, - "grad_norm": 1.6621037752215582, - "learning_rate": 2.3571477604206792e-07, - "loss": 1.1591, - "step": 5780 - }, - { - "epoch": 0.7837050091506812, - "grad_norm": 1.7767525583452555, - "learning_rate": 2.3543165920598308e-07, - "loss": 1.1401, - "step": 5781 - }, - { - "epoch": 0.7838405747983461, - "grad_norm": 1.4915365057069099, - "learning_rate": 2.3514868981086755e-07, - "loss": 1.0891, - "step": 5782 - }, - { - "epoch": 0.7839761404460109, - "grad_norm": 2.6703622455341955, - "learning_rate": 2.3486586791128982e-07, - "loss": 1.1821, - "step": 5783 - }, - { - "epoch": 0.7841117060936759, - "grad_norm": 1.7000377983798203, - "learning_rate": 2.345831935617899e-07, - "loss": 1.1108, - "step": 5784 - }, - { - "epoch": 0.7842472717413408, - "grad_norm": 1.7652439256719816, - "learning_rate": 2.3430066681687932e-07, - "loss": 1.1145, - "step": 5785 - }, - { - "epoch": 0.7843828373890056, - "grad_norm": 4.44678629393062, - "learning_rate": 2.3401828773104103e-07, - "loss": 1.0935, - "step": 5786 - }, - { - "epoch": 0.7845184030366705, - "grad_norm": 1.5774725151948086, - "learning_rate": 2.3373605635872972e-07, - "loss": 1.0985, - "step": 5787 - }, - { - "epoch": 0.7846539686843353, - "grad_norm": 2.324811105978859, - "learning_rate": 2.334539727543713e-07, - "loss": 1.1266, - "step": 5788 - }, - { - "epoch": 0.7847895343320003, - "grad_norm": 2.642141267442177, - "learning_rate": 2.3317203697236353e-07, - "loss": 1.1721, - "step": 5789 - }, - { - "epoch": 0.7849250999796652, - "grad_norm": 1.7969127762212427, - "learning_rate": 2.3289024906707555e-07, - "loss": 1.1484, - "step": 5790 - }, - { - "epoch": 0.78506066562733, - "grad_norm": 1.66294539772085, - "learning_rate": 2.3260860909284773e-07, - "loss": 1.1458, - "step": 5791 - }, - { - "epoch": 0.7851962312749949, - "grad_norm": 1.379065123482126, - "learning_rate": 2.3232711710399255e-07, - "loss": 1.1253, - "step": 5792 - }, - { - "epoch": 0.7853317969226598, - "grad_norm": 2.1729931226247063, - "learning_rate": 2.3204577315479269e-07, - "loss": 1.1468, - "step": 5793 - }, - { - "epoch": 0.7854673625703247, - "grad_norm": 1.693911973020694, - "learning_rate": 2.3176457729950417e-07, - "loss": 1.1089, - "step": 5794 - }, - { - "epoch": 0.7856029282179896, - "grad_norm": 2.029599191386079, - "learning_rate": 2.3148352959235218e-07, - "loss": 1.1337, - "step": 5795 - }, - { - "epoch": 0.7857384938656544, - "grad_norm": 6.1893071613152975, - "learning_rate": 2.3120263008753582e-07, - "loss": 1.1678, - "step": 5796 - }, - { - "epoch": 0.7858740595133193, - "grad_norm": 1.9856636833490904, - "learning_rate": 2.309218788392232e-07, - "loss": 1.1268, - "step": 5797 - }, - { - "epoch": 0.7860096251609842, - "grad_norm": 1.6490316321109244, - "learning_rate": 2.3064127590155603e-07, - "loss": 1.1241, - "step": 5798 - }, - { - "epoch": 0.7861451908086491, - "grad_norm": 1.6221237178949006, - "learning_rate": 2.3036082132864555e-07, - "loss": 1.1315, - "step": 5799 - }, - { - "epoch": 0.786280756456314, - "grad_norm": 2.065970292088523, - "learning_rate": 2.300805151745756e-07, - "loss": 1.1386, - "step": 5800 - }, - { - "epoch": 0.7864163221039788, - "grad_norm": 1.5007402191977883, - "learning_rate": 2.2980035749340088e-07, - "loss": 1.108, - "step": 5801 - }, - { - "epoch": 0.7865518877516438, - "grad_norm": 1.5967098929279577, - "learning_rate": 2.2952034833914757e-07, - "loss": 1.1317, - "step": 5802 - }, - { - "epoch": 0.7866874533993086, - "grad_norm": 2.045866777993252, - "learning_rate": 2.292404877658134e-07, - "loss": 1.1261, - "step": 5803 - }, - { - "epoch": 0.7868230190469735, - "grad_norm": 1.5838485648686977, - "learning_rate": 2.2896077582736705e-07, - "loss": 1.1258, - "step": 5804 - }, - { - "epoch": 0.7869585846946384, - "grad_norm": 1.4960913318219085, - "learning_rate": 2.2868121257774885e-07, - "loss": 1.116, - "step": 5805 - }, - { - "epoch": 0.7870941503423032, - "grad_norm": 1.5870582738861427, - "learning_rate": 2.2840179807087044e-07, - "loss": 1.1417, - "step": 5806 - }, - { - "epoch": 0.7872297159899682, - "grad_norm": 1.5177218730895383, - "learning_rate": 2.2812253236061497e-07, - "loss": 1.0956, - "step": 5807 - }, - { - "epoch": 0.787365281637633, - "grad_norm": 1.5463433476004085, - "learning_rate": 2.2784341550083574e-07, - "loss": 1.1064, - "step": 5808 - }, - { - "epoch": 0.7875008472852979, - "grad_norm": 1.5987714363973962, - "learning_rate": 2.275644475453593e-07, - "loss": 1.1371, - "step": 5809 - }, - { - "epoch": 0.7876364129329628, - "grad_norm": 1.5945550350623803, - "learning_rate": 2.272856285479814e-07, - "loss": 1.1789, - "step": 5810 - }, - { - "epoch": 0.7877719785806276, - "grad_norm": 1.6266187655936322, - "learning_rate": 2.2700695856247122e-07, - "loss": 1.1072, - "step": 5811 - }, - { - "epoch": 0.7879075442282926, - "grad_norm": 1.5349631432881738, - "learning_rate": 2.2672843764256678e-07, - "loss": 1.1081, - "step": 5812 - }, - { - "epoch": 0.7880431098759574, - "grad_norm": 1.438617189153611, - "learning_rate": 2.264500658419799e-07, - "loss": 1.1144, - "step": 5813 - }, - { - "epoch": 0.7881786755236223, - "grad_norm": 1.5935577961760994, - "learning_rate": 2.261718432143912e-07, - "loss": 1.1039, - "step": 5814 - }, - { - "epoch": 0.7883142411712872, - "grad_norm": 1.497581996745133, - "learning_rate": 2.2589376981345487e-07, - "loss": 1.0867, - "step": 5815 - }, - { - "epoch": 0.788449806818952, - "grad_norm": 2.1820441502487116, - "learning_rate": 2.25615845692794e-07, - "loss": 1.1277, - "step": 5816 - }, - { - "epoch": 0.788585372466617, - "grad_norm": 1.680364754583142, - "learning_rate": 2.253380709060053e-07, - "loss": 1.121, - "step": 5817 - }, - { - "epoch": 0.7887209381142818, - "grad_norm": 1.7075044724992658, - "learning_rate": 2.2506044550665438e-07, - "loss": 1.1312, - "step": 5818 - }, - { - "epoch": 0.7888565037619467, - "grad_norm": 1.6595850330958393, - "learning_rate": 2.247829695482799e-07, - "loss": 1.1111, - "step": 5819 - }, - { - "epoch": 0.7889920694096116, - "grad_norm": 1.6561852938020194, - "learning_rate": 2.2450564308439036e-07, - "loss": 1.1521, - "step": 5820 - }, - { - "epoch": 0.7891276350572765, - "grad_norm": 1.703730149412812, - "learning_rate": 2.2422846616846613e-07, - "loss": 1.0982, - "step": 5821 - }, - { - "epoch": 0.7892632007049414, - "grad_norm": 1.455996343973915, - "learning_rate": 2.2395143885395873e-07, - "loss": 1.1326, - "step": 5822 - }, - { - "epoch": 0.7893987663526062, - "grad_norm": 2.445082378018391, - "learning_rate": 2.236745611942905e-07, - "loss": 1.1546, - "step": 5823 - }, - { - "epoch": 0.7895343320002711, - "grad_norm": 1.6335140394546, - "learning_rate": 2.2339783324285523e-07, - "loss": 1.1349, - "step": 5824 - }, - { - "epoch": 0.789669897647936, - "grad_norm": 1.5790870980083669, - "learning_rate": 2.231212550530177e-07, - "loss": 1.1398, - "step": 5825 - }, - { - "epoch": 0.7898054632956009, - "grad_norm": 1.495593796504678, - "learning_rate": 2.2284482667811378e-07, - "loss": 1.1161, - "step": 5826 - }, - { - "epoch": 0.7899410289432658, - "grad_norm": 1.490608977572971, - "learning_rate": 2.2256854817145065e-07, - "loss": 1.1268, - "step": 5827 - }, - { - "epoch": 0.7900765945909306, - "grad_norm": 2.225633679226286, - "learning_rate": 2.2229241958630617e-07, - "loss": 1.1648, - "step": 5828 - }, - { - "epoch": 0.7902121602385955, - "grad_norm": 1.608798382841865, - "learning_rate": 2.2201644097592987e-07, - "loss": 1.1144, - "step": 5829 - }, - { - "epoch": 0.7903477258862605, - "grad_norm": 1.6854126328051005, - "learning_rate": 2.217406123935418e-07, - "loss": 1.147, - "step": 5830 - }, - { - "epoch": 0.7904832915339253, - "grad_norm": 1.5212861814490075, - "learning_rate": 2.2146493389233357e-07, - "loss": 1.0909, - "step": 5831 - }, - { - "epoch": 0.7906188571815902, - "grad_norm": 1.597020492811265, - "learning_rate": 2.211894055254673e-07, - "loss": 1.1394, - "step": 5832 - }, - { - "epoch": 0.7907544228292551, - "grad_norm": 1.383942920243841, - "learning_rate": 2.20914027346077e-07, - "loss": 1.148, - "step": 5833 - }, - { - "epoch": 0.7908899884769199, - "grad_norm": 2.7433711089379615, - "learning_rate": 2.206387994072665e-07, - "loss": 1.0979, - "step": 5834 - }, - { - "epoch": 0.7910255541245849, - "grad_norm": 1.4491503908746541, - "learning_rate": 2.2036372176211148e-07, - "loss": 1.1301, - "step": 5835 - }, - { - "epoch": 0.7911611197722497, - "grad_norm": 1.7447229529194401, - "learning_rate": 2.200887944636588e-07, - "loss": 1.1163, - "step": 5836 - }, - { - "epoch": 0.7912966854199146, - "grad_norm": 1.6152480231431734, - "learning_rate": 2.198140175649259e-07, - "loss": 1.1566, - "step": 5837 - }, - { - "epoch": 0.7914322510675795, - "grad_norm": 1.6331907977895017, - "learning_rate": 2.195393911189012e-07, - "loss": 1.1604, - "step": 5838 - }, - { - "epoch": 0.7915678167152443, - "grad_norm": 1.5008468615445212, - "learning_rate": 2.192649151785444e-07, - "loss": 1.1529, - "step": 5839 - }, - { - "epoch": 0.7917033823629093, - "grad_norm": 1.5324957666718024, - "learning_rate": 2.1899058979678586e-07, - "loss": 1.129, - "step": 5840 - }, - { - "epoch": 0.7918389480105741, - "grad_norm": 1.7896934796825126, - "learning_rate": 2.1871641502652728e-07, - "loss": 1.1239, - "step": 5841 - }, - { - "epoch": 0.791974513658239, - "grad_norm": 2.297925757499211, - "learning_rate": 2.1844239092064088e-07, - "loss": 1.1463, - "step": 5842 - }, - { - "epoch": 0.7921100793059039, - "grad_norm": 1.5063098639936239, - "learning_rate": 2.181685175319702e-07, - "loss": 1.102, - "step": 5843 - }, - { - "epoch": 0.7922456449535688, - "grad_norm": 3.3638544469355756, - "learning_rate": 2.1789479491332953e-07, - "loss": 1.1486, - "step": 5844 - }, - { - "epoch": 0.7923812106012337, - "grad_norm": 1.4557246055837676, - "learning_rate": 2.176212231175041e-07, - "loss": 1.1555, - "step": 5845 - }, - { - "epoch": 0.7925167762488985, - "grad_norm": 1.574502121674619, - "learning_rate": 2.1734780219725e-07, - "loss": 1.1182, - "step": 5846 - }, - { - "epoch": 0.7926523418965634, - "grad_norm": 1.8395205046317087, - "learning_rate": 2.1707453220529448e-07, - "loss": 1.1574, - "step": 5847 - }, - { - "epoch": 0.7927879075442283, - "grad_norm": 1.9893790208651607, - "learning_rate": 2.1680141319433564e-07, - "loss": 1.1256, - "step": 5848 - }, - { - "epoch": 0.7929234731918932, - "grad_norm": 1.6683323326084807, - "learning_rate": 2.165284452170415e-07, - "loss": 1.097, - "step": 5849 - }, - { - "epoch": 0.7930590388395581, - "grad_norm": 1.7409633875722939, - "learning_rate": 2.1625562832605281e-07, - "loss": 1.1431, - "step": 5850 - }, - { - "epoch": 0.7931946044872229, - "grad_norm": 1.6945682453886448, - "learning_rate": 2.159829625739793e-07, - "loss": 1.1549, - "step": 5851 - }, - { - "epoch": 0.7933301701348878, - "grad_norm": 1.7454408425338046, - "learning_rate": 2.157104480134032e-07, - "loss": 1.0971, - "step": 5852 - }, - { - "epoch": 0.7934657357825528, - "grad_norm": 1.6650570060088044, - "learning_rate": 2.1543808469687596e-07, - "loss": 1.0904, - "step": 5853 - }, - { - "epoch": 0.7936013014302176, - "grad_norm": 1.7093116132351402, - "learning_rate": 2.1516587267692165e-07, - "loss": 1.0765, - "step": 5854 - }, - { - "epoch": 0.7937368670778825, - "grad_norm": 1.902201932598715, - "learning_rate": 2.1489381200603307e-07, - "loss": 1.1715, - "step": 5855 - }, - { - "epoch": 0.7938724327255473, - "grad_norm": 1.7218029147146168, - "learning_rate": 2.1462190273667624e-07, - "loss": 1.1173, - "step": 5856 - }, - { - "epoch": 0.7940079983732122, - "grad_norm": 2.064520127862063, - "learning_rate": 2.1435014492128545e-07, - "loss": 1.078, - "step": 5857 - }, - { - "epoch": 0.7941435640208772, - "grad_norm": 1.710476011066988, - "learning_rate": 2.1407853861226833e-07, - "loss": 1.1195, - "step": 5858 - }, - { - "epoch": 0.794279129668542, - "grad_norm": 1.6510636449119345, - "learning_rate": 2.1380708386200075e-07, - "loss": 1.1151, - "step": 5859 - }, - { - "epoch": 0.7944146953162069, - "grad_norm": 1.9567311254020667, - "learning_rate": 2.1353578072283175e-07, - "loss": 1.1351, - "step": 5860 - }, - { - "epoch": 0.7945502609638717, - "grad_norm": 1.5287161173770623, - "learning_rate": 2.1326462924707912e-07, - "loss": 1.1217, - "step": 5861 - }, - { - "epoch": 0.7946858266115366, - "grad_norm": 1.698762325839484, - "learning_rate": 2.129936294870327e-07, - "loss": 1.139, - "step": 5862 - }, - { - "epoch": 0.7948213922592016, - "grad_norm": 1.6031500730598798, - "learning_rate": 2.127227814949526e-07, - "loss": 1.13, - "step": 5863 - }, - { - "epoch": 0.7949569579068664, - "grad_norm": 1.8139888321131024, - "learning_rate": 2.124520853230697e-07, - "loss": 1.1443, - "step": 5864 - }, - { - "epoch": 0.7950925235545313, - "grad_norm": 1.9264489056237715, - "learning_rate": 2.1218154102358554e-07, - "loss": 1.1543, - "step": 5865 - }, - { - "epoch": 0.7952280892021961, - "grad_norm": 2.1082107902066265, - "learning_rate": 2.1191114864867255e-07, - "loss": 1.1242, - "step": 5866 - }, - { - "epoch": 0.795363654849861, - "grad_norm": 3.0672126577495193, - "learning_rate": 2.1164090825047388e-07, - "loss": 1.1164, - "step": 5867 - }, - { - "epoch": 0.795499220497526, - "grad_norm": 1.4661560917481267, - "learning_rate": 2.1137081988110294e-07, - "loss": 1.137, - "step": 5868 - }, - { - "epoch": 0.7956347861451908, - "grad_norm": 1.6459527588650773, - "learning_rate": 2.1110088359264445e-07, - "loss": 1.1485, - "step": 5869 - }, - { - "epoch": 0.7957703517928557, - "grad_norm": 2.7996337293430176, - "learning_rate": 2.108310994371534e-07, - "loss": 1.142, - "step": 5870 - }, - { - "epoch": 0.7959059174405205, - "grad_norm": 2.9636247806762617, - "learning_rate": 2.105614674666556e-07, - "loss": 1.0834, - "step": 5871 - }, - { - "epoch": 0.7960414830881855, - "grad_norm": 2.376165704702495, - "learning_rate": 2.1029198773314693e-07, - "loss": 1.1439, - "step": 5872 - }, - { - "epoch": 0.7961770487358504, - "grad_norm": 3.815284781287962, - "learning_rate": 2.1002266028859539e-07, - "loss": 1.1318, - "step": 5873 - }, - { - "epoch": 0.7963126143835152, - "grad_norm": 1.5437890446851927, - "learning_rate": 2.0975348518493762e-07, - "loss": 1.1282, - "step": 5874 - }, - { - "epoch": 0.7964481800311801, - "grad_norm": 1.4232154067935234, - "learning_rate": 2.094844624740828e-07, - "loss": 1.1275, - "step": 5875 - }, - { - "epoch": 0.7965837456788449, - "grad_norm": 1.675773029907477, - "learning_rate": 2.092155922079093e-07, - "loss": 1.1347, - "step": 5876 - }, - { - "epoch": 0.7967193113265099, - "grad_norm": 4.015224857311656, - "learning_rate": 2.0894687443826675e-07, - "loss": 1.119, - "step": 5877 - }, - { - "epoch": 0.7968548769741748, - "grad_norm": 1.7876379771261166, - "learning_rate": 2.0867830921697527e-07, - "loss": 1.1547, - "step": 5878 - }, - { - "epoch": 0.7969904426218396, - "grad_norm": 1.6837450883213865, - "learning_rate": 2.0840989659582552e-07, - "loss": 1.0903, - "step": 5879 - }, - { - "epoch": 0.7971260082695045, - "grad_norm": 1.6586246175268393, - "learning_rate": 2.081416366265787e-07, - "loss": 1.1217, - "step": 5880 - }, - { - "epoch": 0.7972615739171693, - "grad_norm": 1.535166140827417, - "learning_rate": 2.078735293609668e-07, - "loss": 1.1281, - "step": 5881 - }, - { - "epoch": 0.7973971395648343, - "grad_norm": 1.5442562146302554, - "learning_rate": 2.0760557485069208e-07, - "loss": 1.0794, - "step": 5882 - }, - { - "epoch": 0.7975327052124992, - "grad_norm": 2.57548996881889, - "learning_rate": 2.073377731474275e-07, - "loss": 1.1199, - "step": 5883 - }, - { - "epoch": 0.797668270860164, - "grad_norm": 1.8245495433141452, - "learning_rate": 2.0707012430281646e-07, - "loss": 1.1432, - "step": 5884 - }, - { - "epoch": 0.7978038365078289, - "grad_norm": 1.402715529630314, - "learning_rate": 2.0680262836847294e-07, - "loss": 1.0975, - "step": 5885 - }, - { - "epoch": 0.7979394021554937, - "grad_norm": 2.519447357558393, - "learning_rate": 2.065352853959814e-07, - "loss": 1.1197, - "step": 5886 - }, - { - "epoch": 0.7980749678031587, - "grad_norm": 1.9611231377696992, - "learning_rate": 2.0626809543689682e-07, - "loss": 1.1227, - "step": 5887 - }, - { - "epoch": 0.7982105334508236, - "grad_norm": 2.0485625557210323, - "learning_rate": 2.0600105854274474e-07, - "loss": 1.0923, - "step": 5888 - }, - { - "epoch": 0.7983460990984884, - "grad_norm": 2.079130426944005, - "learning_rate": 2.0573417476502108e-07, - "loss": 1.1334, - "step": 5889 - }, - { - "epoch": 0.7984816647461533, - "grad_norm": 1.6897061609832829, - "learning_rate": 2.0546744415519223e-07, - "loss": 1.0915, - "step": 5890 - }, - { - "epoch": 0.7986172303938182, - "grad_norm": 1.6315960763555541, - "learning_rate": 2.052008667646954e-07, - "loss": 1.1506, - "step": 5891 - }, - { - "epoch": 0.7987527960414831, - "grad_norm": 1.54394959815121, - "learning_rate": 2.049344426449371e-07, - "loss": 1.0845, - "step": 5892 - }, - { - "epoch": 0.798888361689148, - "grad_norm": 1.6450757729192877, - "learning_rate": 2.0466817184729624e-07, - "loss": 1.0853, - "step": 5893 - }, - { - "epoch": 0.7990239273368128, - "grad_norm": 1.590901418774006, - "learning_rate": 2.0440205442311987e-07, - "loss": 1.1226, - "step": 5894 - }, - { - "epoch": 0.7991594929844777, - "grad_norm": 5.383892802791, - "learning_rate": 2.041360904237278e-07, - "loss": 1.1115, - "step": 5895 - }, - { - "epoch": 0.7992950586321426, - "grad_norm": 2.2364235403522956, - "learning_rate": 2.0387027990040827e-07, - "loss": 1.1136, - "step": 5896 - }, - { - "epoch": 0.7994306242798075, - "grad_norm": 1.4483243014809557, - "learning_rate": 2.0360462290442105e-07, - "loss": 1.0921, - "step": 5897 - }, - { - "epoch": 0.7995661899274724, - "grad_norm": 1.7909769512841927, - "learning_rate": 2.033391194869959e-07, - "loss": 1.1413, - "step": 5898 - }, - { - "epoch": 0.7997017555751372, - "grad_norm": 2.265014926645139, - "learning_rate": 2.03073769699333e-07, - "loss": 1.1386, - "step": 5899 - }, - { - "epoch": 0.7998373212228022, - "grad_norm": 1.8109907808204184, - "learning_rate": 2.0280857359260316e-07, - "loss": 1.1165, - "step": 5900 - }, - { - "epoch": 0.799972886870467, - "grad_norm": 1.6316682197190682, - "learning_rate": 2.025435312179472e-07, - "loss": 1.15, - "step": 5901 - }, - { - "epoch": 0.8001084525181319, - "grad_norm": 1.8799022561926828, - "learning_rate": 2.0227864262647664e-07, - "loss": 1.1258, - "step": 5902 - }, - { - "epoch": 0.8002440181657968, - "grad_norm": 1.8097054631037865, - "learning_rate": 2.0201390786927286e-07, - "loss": 1.1226, - "step": 5903 - }, - { - "epoch": 0.8003795838134616, - "grad_norm": 1.6626462382932796, - "learning_rate": 2.017493269973881e-07, - "loss": 1.1129, - "step": 5904 - }, - { - "epoch": 0.8005151494611266, - "grad_norm": 1.4890572749226454, - "learning_rate": 2.014849000618446e-07, - "loss": 1.1183, - "step": 5905 - }, - { - "epoch": 0.8006507151087914, - "grad_norm": 5.595374927760697, - "learning_rate": 2.012206271136353e-07, - "loss": 1.1269, - "step": 5906 - }, - { - "epoch": 0.8007862807564563, - "grad_norm": 2.118717689969066, - "learning_rate": 2.0095650820372234e-07, - "loss": 1.1451, - "step": 5907 - }, - { - "epoch": 0.8009218464041212, - "grad_norm": 1.4636488412353184, - "learning_rate": 2.006925433830401e-07, - "loss": 1.1109, - "step": 5908 - }, - { - "epoch": 0.801057412051786, - "grad_norm": 1.670185209462989, - "learning_rate": 2.0042873270249094e-07, - "loss": 1.1386, - "step": 5909 - }, - { - "epoch": 0.801192977699451, - "grad_norm": 1.5121969333613947, - "learning_rate": 2.0016507621294975e-07, - "loss": 1.104, - "step": 5910 - }, - { - "epoch": 0.8013285433471159, - "grad_norm": 3.1899102411378957, - "learning_rate": 1.9990157396525963e-07, - "loss": 1.1396, - "step": 5911 - }, - { - "epoch": 0.8014641089947807, - "grad_norm": 1.983345166157163, - "learning_rate": 1.9963822601023595e-07, - "loss": 1.125, - "step": 5912 - }, - { - "epoch": 0.8015996746424456, - "grad_norm": 1.4731488756207114, - "learning_rate": 1.9937503239866205e-07, - "loss": 1.1361, - "step": 5913 - }, - { - "epoch": 0.8017352402901105, - "grad_norm": 2.5629145780583995, - "learning_rate": 1.9911199318129403e-07, - "loss": 1.1394, - "step": 5914 - }, - { - "epoch": 0.8018708059377754, - "grad_norm": 1.698767346847256, - "learning_rate": 1.9884910840885571e-07, - "loss": 1.1318, - "step": 5915 - }, - { - "epoch": 0.8020063715854403, - "grad_norm": 2.6247620709556116, - "learning_rate": 1.9858637813204349e-07, - "loss": 1.1282, - "step": 5916 - }, - { - "epoch": 0.8021419372331051, - "grad_norm": 1.5675204088589259, - "learning_rate": 1.983238024015217e-07, - "loss": 1.1253, - "step": 5917 - }, - { - "epoch": 0.80227750288077, - "grad_norm": 1.7225519624920504, - "learning_rate": 1.9806138126792716e-07, - "loss": 1.1497, - "step": 5918 - }, - { - "epoch": 0.8024130685284349, - "grad_norm": 1.5817506981773852, - "learning_rate": 1.9779911478186485e-07, - "loss": 1.1387, - "step": 5919 - }, - { - "epoch": 0.8025486341760998, - "grad_norm": 2.050380438604846, - "learning_rate": 1.9753700299391107e-07, - "loss": 1.1004, - "step": 5920 - }, - { - "epoch": 0.8026841998237647, - "grad_norm": 1.5347275832982614, - "learning_rate": 1.9727504595461198e-07, - "loss": 1.0741, - "step": 5921 - }, - { - "epoch": 0.8028197654714295, - "grad_norm": 1.448949332619657, - "learning_rate": 1.970132437144839e-07, - "loss": 1.135, - "step": 5922 - }, - { - "epoch": 0.8029553311190945, - "grad_norm": 1.500497188838573, - "learning_rate": 1.967515963240135e-07, - "loss": 1.1028, - "step": 5923 - }, - { - "epoch": 0.8030908967667593, - "grad_norm": 1.9002963399426132, - "learning_rate": 1.9649010383365717e-07, - "loss": 1.1306, - "step": 5924 - }, - { - "epoch": 0.8032264624144242, - "grad_norm": 1.737722704996477, - "learning_rate": 1.962287662938419e-07, - "loss": 1.0952, - "step": 5925 - }, - { - "epoch": 0.8033620280620891, - "grad_norm": 1.5314961857640172, - "learning_rate": 1.9596758375496435e-07, - "loss": 1.1094, - "step": 5926 - }, - { - "epoch": 0.8034975937097539, - "grad_norm": 1.7356739357913282, - "learning_rate": 1.9570655626739176e-07, - "loss": 1.1271, - "step": 5927 - }, - { - "epoch": 0.8036331593574189, - "grad_norm": 1.4387784383487527, - "learning_rate": 1.9544568388146098e-07, - "loss": 1.1114, - "step": 5928 - }, - { - "epoch": 0.8037687250050837, - "grad_norm": 1.4790715597794624, - "learning_rate": 1.951849666474793e-07, - "loss": 1.1143, - "step": 5929 - }, - { - "epoch": 0.8039042906527486, - "grad_norm": 1.4890621640432262, - "learning_rate": 1.9492440461572401e-07, - "loss": 1.0994, - "step": 5930 - }, - { - "epoch": 0.8040398563004135, - "grad_norm": 1.9867567259411645, - "learning_rate": 1.9466399783644249e-07, - "loss": 1.1364, - "step": 5931 - }, - { - "epoch": 0.8041754219480783, - "grad_norm": 2.0917836928122, - "learning_rate": 1.9440374635985224e-07, - "loss": 1.1145, - "step": 5932 - }, - { - "epoch": 0.8043109875957433, - "grad_norm": 1.501505201952952, - "learning_rate": 1.941436502361402e-07, - "loss": 1.1464, - "step": 5933 - }, - { - "epoch": 0.8044465532434081, - "grad_norm": 1.696569752390788, - "learning_rate": 1.9388370951546428e-07, - "loss": 1.1357, - "step": 5934 - }, - { - "epoch": 0.804582118891073, - "grad_norm": 2.4447330932008597, - "learning_rate": 1.9362392424795183e-07, - "loss": 1.1319, - "step": 5935 - }, - { - "epoch": 0.8047176845387379, - "grad_norm": 1.646948690225786, - "learning_rate": 1.933642944837004e-07, - "loss": 1.1222, - "step": 5936 - }, - { - "epoch": 0.8048532501864027, - "grad_norm": 2.035494010563723, - "learning_rate": 1.9310482027277763e-07, - "loss": 1.1129, - "step": 5937 - }, - { - "epoch": 0.8049888158340677, - "grad_norm": 1.6951652257494048, - "learning_rate": 1.9284550166522108e-07, - "loss": 1.1208, - "step": 5938 - }, - { - "epoch": 0.8051243814817325, - "grad_norm": 1.8723600824037987, - "learning_rate": 1.9258633871103814e-07, - "loss": 1.137, - "step": 5939 - }, - { - "epoch": 0.8052599471293974, - "grad_norm": 1.5529951698634274, - "learning_rate": 1.923273314602065e-07, - "loss": 1.1222, - "step": 5940 - }, - { - "epoch": 0.8053955127770623, - "grad_norm": 1.7179537666837605, - "learning_rate": 1.920684799626736e-07, - "loss": 1.1076, - "step": 5941 - }, - { - "epoch": 0.8055310784247272, - "grad_norm": 1.3832473941481473, - "learning_rate": 1.9180978426835693e-07, - "loss": 1.1377, - "step": 5942 - }, - { - "epoch": 0.8056666440723921, - "grad_norm": 2.3701027615066326, - "learning_rate": 1.9155124442714387e-07, - "loss": 1.1709, - "step": 5943 - }, - { - "epoch": 0.8058022097200569, - "grad_norm": 1.5756754271746136, - "learning_rate": 1.912928604888918e-07, - "loss": 1.0928, - "step": 5944 - }, - { - "epoch": 0.8059377753677218, - "grad_norm": 2.073521022808377, - "learning_rate": 1.91034632503428e-07, - "loss": 1.1373, - "step": 5945 - }, - { - "epoch": 0.8060733410153867, - "grad_norm": 1.9263968056506517, - "learning_rate": 1.907765605205498e-07, - "loss": 1.1525, - "step": 5946 - }, - { - "epoch": 0.8062089066630516, - "grad_norm": 1.5709850823521951, - "learning_rate": 1.9051864459002454e-07, - "loss": 1.137, - "step": 5947 - }, - { - "epoch": 0.8063444723107165, - "grad_norm": 1.4540116611193388, - "learning_rate": 1.9026088476158851e-07, - "loss": 1.1012, - "step": 5948 - }, - { - "epoch": 0.8064800379583813, - "grad_norm": 1.751530905541245, - "learning_rate": 1.9000328108494967e-07, - "loss": 1.1079, - "step": 5949 - }, - { - "epoch": 0.8066156036060462, - "grad_norm": 1.5041963309521462, - "learning_rate": 1.897458336097838e-07, - "loss": 1.1722, - "step": 5950 - }, - { - "epoch": 0.8067511692537112, - "grad_norm": 1.763235699885478, - "learning_rate": 1.8948854238573874e-07, - "loss": 1.1282, - "step": 5951 - }, - { - "epoch": 0.806886734901376, - "grad_norm": 1.4910932712330793, - "learning_rate": 1.8923140746242994e-07, - "loss": 1.1714, - "step": 5952 - }, - { - "epoch": 0.8070223005490409, - "grad_norm": 1.7811854420489461, - "learning_rate": 1.8897442888944492e-07, - "loss": 1.1743, - "step": 5953 - }, - { - "epoch": 0.8071578661967057, - "grad_norm": 1.4643921808084872, - "learning_rate": 1.8871760671633895e-07, - "loss": 1.056, - "step": 5954 - }, - { - "epoch": 0.8072934318443706, - "grad_norm": 1.526181505132543, - "learning_rate": 1.884609409926391e-07, - "loss": 1.1208, - "step": 5955 - }, - { - "epoch": 0.8074289974920356, - "grad_norm": 1.852073557472295, - "learning_rate": 1.882044317678404e-07, - "loss": 1.1315, - "step": 5956 - }, - { - "epoch": 0.8075645631397004, - "grad_norm": 1.9343205902495728, - "learning_rate": 1.8794807909140963e-07, - "loss": 1.1551, - "step": 5957 - }, - { - "epoch": 0.8077001287873653, - "grad_norm": 1.809520918462569, - "learning_rate": 1.8769188301278126e-07, - "loss": 1.1787, - "step": 5958 - }, - { - "epoch": 0.8078356944350301, - "grad_norm": 2.0683614207771166, - "learning_rate": 1.8743584358136188e-07, - "loss": 1.1337, - "step": 5959 - }, - { - "epoch": 0.807971260082695, - "grad_norm": 4.800317612728052, - "learning_rate": 1.8717996084652587e-07, - "loss": 1.1541, - "step": 5960 - }, - { - "epoch": 0.80810682573036, - "grad_norm": 1.5978512553209485, - "learning_rate": 1.8692423485761833e-07, - "loss": 1.1025, - "step": 5961 - }, - { - "epoch": 0.8082423913780248, - "grad_norm": 1.8507475534815128, - "learning_rate": 1.86668665663954e-07, - "loss": 1.1092, - "step": 5962 - }, - { - "epoch": 0.8083779570256897, - "grad_norm": 1.8454431362164936, - "learning_rate": 1.8641325331481762e-07, - "loss": 1.1341, - "step": 5963 - }, - { - "epoch": 0.8085135226733545, - "grad_norm": 1.8377880324617795, - "learning_rate": 1.861579978594632e-07, - "loss": 1.0993, - "step": 5964 - }, - { - "epoch": 0.8086490883210194, - "grad_norm": 1.9545861166657241, - "learning_rate": 1.859028993471148e-07, - "loss": 1.1031, - "step": 5965 - }, - { - "epoch": 0.8087846539686844, - "grad_norm": 1.749398694057212, - "learning_rate": 1.8564795782696607e-07, - "loss": 1.1182, - "step": 5966 - }, - { - "epoch": 0.8089202196163492, - "grad_norm": 1.6565323386548123, - "learning_rate": 1.8539317334818072e-07, - "loss": 1.1007, - "step": 5967 - }, - { - "epoch": 0.8090557852640141, - "grad_norm": 1.6894258221706542, - "learning_rate": 1.8513854595989198e-07, - "loss": 1.1014, - "step": 5968 - }, - { - "epoch": 0.8091913509116789, - "grad_norm": 1.5956661698275836, - "learning_rate": 1.848840757112019e-07, - "loss": 1.1002, - "step": 5969 - }, - { - "epoch": 0.8093269165593439, - "grad_norm": 2.1483610270450963, - "learning_rate": 1.8462976265118436e-07, - "loss": 1.1371, - "step": 5970 - }, - { - "epoch": 0.8094624822070088, - "grad_norm": 1.419875148172435, - "learning_rate": 1.8437560682888043e-07, - "loss": 1.1209, - "step": 5971 - }, - { - "epoch": 0.8095980478546736, - "grad_norm": 2.0063314386953133, - "learning_rate": 1.8412160829330304e-07, - "loss": 1.1242, - "step": 5972 - }, - { - "epoch": 0.8097336135023385, - "grad_norm": 1.4499072495161034, - "learning_rate": 1.8386776709343278e-07, - "loss": 1.0517, - "step": 5973 - }, - { - "epoch": 0.8098691791500033, - "grad_norm": 1.6036227727168066, - "learning_rate": 1.8361408327822203e-07, - "loss": 1.1155, - "step": 5974 - }, - { - "epoch": 0.8100047447976683, - "grad_norm": 1.6140414943140244, - "learning_rate": 1.8336055689659091e-07, - "loss": 1.1382, - "step": 5975 - }, - { - "epoch": 0.8101403104453332, - "grad_norm": 1.5551060513512636, - "learning_rate": 1.831071879974302e-07, - "loss": 1.1246, - "step": 5976 - }, - { - "epoch": 0.810275876092998, - "grad_norm": 2.08953667574848, - "learning_rate": 1.8285397662960022e-07, - "loss": 1.1347, - "step": 5977 - }, - { - "epoch": 0.8104114417406629, - "grad_norm": 2.1326879707999806, - "learning_rate": 1.8260092284193062e-07, - "loss": 1.1154, - "step": 5978 - }, - { - "epoch": 0.8105470073883277, - "grad_norm": 1.5317621218182431, - "learning_rate": 1.823480266832209e-07, - "loss": 1.1389, - "step": 5979 - }, - { - "epoch": 0.8106825730359927, - "grad_norm": 1.660837399962454, - "learning_rate": 1.8209528820224008e-07, - "loss": 1.1475, - "step": 5980 - }, - { - "epoch": 0.8108181386836576, - "grad_norm": 1.8279320702147228, - "learning_rate": 1.8184270744772678e-07, - "loss": 1.1161, - "step": 5981 - }, - { - "epoch": 0.8109537043313224, - "grad_norm": 1.6996888472212568, - "learning_rate": 1.815902844683892e-07, - "loss": 1.1505, - "step": 5982 - }, - { - "epoch": 0.8110892699789873, - "grad_norm": 1.523338831842278, - "learning_rate": 1.8133801931290516e-07, - "loss": 1.1302, - "step": 5983 - }, - { - "epoch": 0.8112248356266522, - "grad_norm": 1.6959830344405085, - "learning_rate": 1.8108591202992195e-07, - "loss": 1.1414, - "step": 5984 - }, - { - "epoch": 0.8113604012743171, - "grad_norm": 1.5504338169240748, - "learning_rate": 1.808339626680565e-07, - "loss": 1.1243, - "step": 5985 - }, - { - "epoch": 0.811495966921982, - "grad_norm": 1.9464548006397318, - "learning_rate": 1.8058217127589526e-07, - "loss": 1.0959, - "step": 5986 - }, - { - "epoch": 0.8116315325696468, - "grad_norm": 1.63456231908327, - "learning_rate": 1.8033053790199415e-07, - "loss": 1.1594, - "step": 5987 - }, - { - "epoch": 0.8117670982173117, - "grad_norm": 2.1266680270254206, - "learning_rate": 1.8007906259487904e-07, - "loss": 1.1127, - "step": 5988 - }, - { - "epoch": 0.8119026638649767, - "grad_norm": 1.9741367429624257, - "learning_rate": 1.7982774540304402e-07, - "loss": 1.0971, - "step": 5989 - }, - { - "epoch": 0.8120382295126415, - "grad_norm": 2.4779406741131345, - "learning_rate": 1.7957658637495488e-07, - "loss": 1.1254, - "step": 5990 - }, - { - "epoch": 0.8121737951603064, - "grad_norm": 2.3206012117719363, - "learning_rate": 1.7932558555904453e-07, - "loss": 1.0816, - "step": 5991 - }, - { - "epoch": 0.8123093608079712, - "grad_norm": 1.5682214853043726, - "learning_rate": 1.790747430037174e-07, - "loss": 1.1314, - "step": 5992 - }, - { - "epoch": 0.8124449264556362, - "grad_norm": 2.517079855007105, - "learning_rate": 1.7882405875734564e-07, - "loss": 1.1567, - "step": 5993 - }, - { - "epoch": 0.8125804921033011, - "grad_norm": 1.9160382319120954, - "learning_rate": 1.785735328682727e-07, - "loss": 1.1031, - "step": 5994 - }, - { - "epoch": 0.8127160577509659, - "grad_norm": 1.7047405572571654, - "learning_rate": 1.7832316538480973e-07, - "loss": 1.1067, - "step": 5995 - }, - { - "epoch": 0.8128516233986308, - "grad_norm": 1.666998235386702, - "learning_rate": 1.7807295635523845e-07, - "loss": 1.0915, - "step": 5996 - }, - { - "epoch": 0.8129871890462956, - "grad_norm": 1.6917107959326478, - "learning_rate": 1.7782290582780958e-07, - "loss": 1.121, - "step": 5997 - }, - { - "epoch": 0.8131227546939606, - "grad_norm": 1.6576505387683373, - "learning_rate": 1.7757301385074342e-07, - "loss": 1.0803, - "step": 5998 - }, - { - "epoch": 0.8132583203416255, - "grad_norm": 2.4883424236793017, - "learning_rate": 1.7732328047222978e-07, - "loss": 1.1169, - "step": 5999 - }, - { - "epoch": 0.8133938859892903, - "grad_norm": 1.7548715703891027, - "learning_rate": 1.7707370574042769e-07, - "loss": 1.1191, - "step": 6000 - }, - { - "epoch": 0.8135294516369552, - "grad_norm": 1.6901260171636896, - "learning_rate": 1.7682428970346553e-07, - "loss": 1.1964, - "step": 6001 - }, - { - "epoch": 0.81366501728462, - "grad_norm": 1.761254168506445, - "learning_rate": 1.765750324094415e-07, - "loss": 1.141, - "step": 6002 - }, - { - "epoch": 0.813800582932285, - "grad_norm": 1.592841377498472, - "learning_rate": 1.763259339064226e-07, - "loss": 1.1255, - "step": 6003 - }, - { - "epoch": 0.8139361485799499, - "grad_norm": 1.6579102951077316, - "learning_rate": 1.7607699424244582e-07, - "loss": 1.153, - "step": 6004 - }, - { - "epoch": 0.8140717142276147, - "grad_norm": 2.0297871946534265, - "learning_rate": 1.7582821346551711e-07, - "loss": 1.0901, - "step": 6005 - }, - { - "epoch": 0.8142072798752796, - "grad_norm": 1.6026151658907208, - "learning_rate": 1.7557959162361148e-07, - "loss": 1.0782, - "step": 6006 - }, - { - "epoch": 0.8143428455229444, - "grad_norm": 1.8550548425805877, - "learning_rate": 1.753311287646745e-07, - "loss": 1.1788, - "step": 6007 - }, - { - "epoch": 0.8144784111706094, - "grad_norm": 1.8153339282926075, - "learning_rate": 1.7508282493661918e-07, - "loss": 1.1265, - "step": 6008 - }, - { - "epoch": 0.8146139768182743, - "grad_norm": 1.8125603525840703, - "learning_rate": 1.7483468018733017e-07, - "loss": 1.096, - "step": 6009 - }, - { - "epoch": 0.8147495424659391, - "grad_norm": 1.4746235114417292, - "learning_rate": 1.7458669456465914e-07, - "loss": 1.0833, - "step": 6010 - }, - { - "epoch": 0.814885108113604, - "grad_norm": 1.5107256601895256, - "learning_rate": 1.7433886811642916e-07, - "loss": 1.1048, - "step": 6011 - }, - { - "epoch": 0.8150206737612689, - "grad_norm": 1.6320236746013845, - "learning_rate": 1.740912008904305e-07, - "loss": 1.1393, - "step": 6012 - }, - { - "epoch": 0.8151562394089338, - "grad_norm": 1.434841448401946, - "learning_rate": 1.7384369293442501e-07, - "loss": 1.1373, - "step": 6013 - }, - { - "epoch": 0.8152918050565987, - "grad_norm": 1.635841738118453, - "learning_rate": 1.7359634429614145e-07, - "loss": 1.0925, - "step": 6014 - }, - { - "epoch": 0.8154273707042635, - "grad_norm": 1.5629268876785556, - "learning_rate": 1.7334915502328028e-07, - "loss": 1.1725, - "step": 6015 - }, - { - "epoch": 0.8155629363519284, - "grad_norm": 2.2876669362547184, - "learning_rate": 1.7310212516350908e-07, - "loss": 1.1441, - "step": 6016 - }, - { - "epoch": 0.8156985019995933, - "grad_norm": 1.6586244743858076, - "learning_rate": 1.7285525476446594e-07, - "loss": 1.1031, - "step": 6017 - }, - { - "epoch": 0.8158340676472582, - "grad_norm": 1.6571672478022463, - "learning_rate": 1.7260854387375778e-07, - "loss": 1.0911, - "step": 6018 - }, - { - "epoch": 0.8159696332949231, - "grad_norm": 1.703094100183799, - "learning_rate": 1.7236199253896089e-07, - "loss": 1.1311, - "step": 6019 - }, - { - "epoch": 0.8161051989425879, - "grad_norm": 2.1974611167135483, - "learning_rate": 1.7211560080762078e-07, - "loss": 1.1336, - "step": 6020 - }, - { - "epoch": 0.8162407645902529, - "grad_norm": 1.7363988494098888, - "learning_rate": 1.718693687272521e-07, - "loss": 1.1199, - "step": 6021 - }, - { - "epoch": 0.8163763302379177, - "grad_norm": 1.7315927508010474, - "learning_rate": 1.716232963453389e-07, - "loss": 1.16, - "step": 6022 - }, - { - "epoch": 0.8165118958855826, - "grad_norm": 1.8132590054904536, - "learning_rate": 1.7137738370933408e-07, - "loss": 1.1503, - "step": 6023 - }, - { - "epoch": 0.8166474615332475, - "grad_norm": 3.293230632754883, - "learning_rate": 1.7113163086666016e-07, - "loss": 1.1363, - "step": 6024 - }, - { - "epoch": 0.8167830271809123, - "grad_norm": 1.5818730261421958, - "learning_rate": 1.7088603786470845e-07, - "loss": 1.0948, - "step": 6025 - }, - { - "epoch": 0.8169185928285773, - "grad_norm": 1.587556193740831, - "learning_rate": 1.7064060475083975e-07, - "loss": 1.1307, - "step": 6026 - }, - { - "epoch": 0.8170541584762421, - "grad_norm": 1.4772631289257365, - "learning_rate": 1.7039533157238394e-07, - "loss": 1.1006, - "step": 6027 - }, - { - "epoch": 0.817189724123907, - "grad_norm": 1.753988032806564, - "learning_rate": 1.7015021837663979e-07, - "loss": 1.135, - "step": 6028 - }, - { - "epoch": 0.8173252897715719, - "grad_norm": 1.5899368651941173, - "learning_rate": 1.6990526521087567e-07, - "loss": 1.1357, - "step": 6029 - }, - { - "epoch": 0.8174608554192367, - "grad_norm": 1.8142258814073933, - "learning_rate": 1.696604721223288e-07, - "loss": 1.1234, - "step": 6030 - }, - { - "epoch": 0.8175964210669017, - "grad_norm": 1.674232229689965, - "learning_rate": 1.6941583915820578e-07, - "loss": 1.1587, - "step": 6031 - }, - { - "epoch": 0.8177319867145665, - "grad_norm": 1.4548381055913768, - "learning_rate": 1.6917136636568176e-07, - "loss": 1.122, - "step": 6032 - }, - { - "epoch": 0.8178675523622314, - "grad_norm": 1.6535245333858326, - "learning_rate": 1.6892705379190153e-07, - "loss": 1.0946, - "step": 6033 - }, - { - "epoch": 0.8180031180098963, - "grad_norm": 1.638372173850085, - "learning_rate": 1.6868290148397878e-07, - "loss": 1.1256, - "step": 6034 - }, - { - "epoch": 0.8181386836575611, - "grad_norm": 1.727856167991448, - "learning_rate": 1.6843890948899665e-07, - "loss": 1.1859, - "step": 6035 - }, - { - "epoch": 0.8182742493052261, - "grad_norm": 1.8081122270328929, - "learning_rate": 1.6819507785400677e-07, - "loss": 1.1568, - "step": 6036 - }, - { - "epoch": 0.8184098149528909, - "grad_norm": 1.8701119213305, - "learning_rate": 1.6795140662603026e-07, - "loss": 1.1347, - "step": 6037 - }, - { - "epoch": 0.8185453806005558, - "grad_norm": 2.0667241227409763, - "learning_rate": 1.6770789585205725e-07, - "loss": 1.0982, - "step": 6038 - }, - { - "epoch": 0.8186809462482207, - "grad_norm": 1.5141935297004685, - "learning_rate": 1.6746454557904677e-07, - "loss": 1.0652, - "step": 6039 - }, - { - "epoch": 0.8188165118958856, - "grad_norm": 1.4801753030981784, - "learning_rate": 1.6722135585392706e-07, - "loss": 1.1348, - "step": 6040 - }, - { - "epoch": 0.8189520775435505, - "grad_norm": 1.4458901064527596, - "learning_rate": 1.6697832672359525e-07, - "loss": 1.1252, - "step": 6041 - }, - { - "epoch": 0.8190876431912153, - "grad_norm": 1.649733523174716, - "learning_rate": 1.6673545823491774e-07, - "loss": 1.1311, - "step": 6042 - }, - { - "epoch": 0.8192232088388802, - "grad_norm": 1.8204788270225223, - "learning_rate": 1.6649275043472965e-07, - "loss": 1.1288, - "step": 6043 - }, - { - "epoch": 0.8193587744865451, - "grad_norm": 1.4475357958056674, - "learning_rate": 1.6625020336983565e-07, - "loss": 1.0954, - "step": 6044 - }, - { - "epoch": 0.81949434013421, - "grad_norm": 2.030120288504051, - "learning_rate": 1.6600781708700816e-07, - "loss": 1.1111, - "step": 6045 - }, - { - "epoch": 0.8196299057818749, - "grad_norm": 4.856077149277165, - "learning_rate": 1.6576559163299053e-07, - "loss": 1.1066, - "step": 6046 - }, - { - "epoch": 0.8197654714295397, - "grad_norm": 1.6769097955874899, - "learning_rate": 1.6552352705449302e-07, - "loss": 1.1158, - "step": 6047 - }, - { - "epoch": 0.8199010370772046, - "grad_norm": 4.39274403469556, - "learning_rate": 1.6528162339819685e-07, - "loss": 1.1041, - "step": 6048 - }, - { - "epoch": 0.8200366027248696, - "grad_norm": 1.788703436930725, - "learning_rate": 1.6503988071075026e-07, - "loss": 1.128, - "step": 6049 - }, - { - "epoch": 0.8201721683725344, - "grad_norm": 1.989861816449875, - "learning_rate": 1.647982990387724e-07, - "loss": 1.0957, - "step": 6050 - }, - { - "epoch": 0.8203077340201993, - "grad_norm": 1.7866908582179641, - "learning_rate": 1.6455687842884936e-07, - "loss": 1.0872, - "step": 6051 - }, - { - "epoch": 0.8204432996678641, - "grad_norm": 1.5320166535962116, - "learning_rate": 1.643156189275382e-07, - "loss": 1.1649, - "step": 6052 - }, - { - "epoch": 0.820578865315529, - "grad_norm": 1.4448362988513614, - "learning_rate": 1.6407452058136294e-07, - "loss": 1.1048, - "step": 6053 - }, - { - "epoch": 0.820714430963194, - "grad_norm": 1.6930877884647348, - "learning_rate": 1.6383358343681852e-07, - "loss": 1.16, - "step": 6054 - }, - { - "epoch": 0.8208499966108588, - "grad_norm": 1.9943306758951778, - "learning_rate": 1.6359280754036675e-07, - "loss": 1.0995, - "step": 6055 - }, - { - "epoch": 0.8209855622585237, - "grad_norm": 1.4679277413028868, - "learning_rate": 1.6335219293844038e-07, - "loss": 1.1093, - "step": 6056 - }, - { - "epoch": 0.8211211279061885, - "grad_norm": 1.6266838424335046, - "learning_rate": 1.6311173967743918e-07, - "loss": 1.1495, - "step": 6057 - }, - { - "epoch": 0.8212566935538534, - "grad_norm": 1.575516403761223, - "learning_rate": 1.6287144780373308e-07, - "loss": 1.1205, - "step": 6058 - }, - { - "epoch": 0.8213922592015184, - "grad_norm": 1.990377363997174, - "learning_rate": 1.6263131736366032e-07, - "loss": 1.1551, - "step": 6059 - }, - { - "epoch": 0.8215278248491832, - "grad_norm": 2.3818960381999053, - "learning_rate": 1.623913484035282e-07, - "loss": 1.1229, - "step": 6060 - }, - { - "epoch": 0.8216633904968481, - "grad_norm": 2.15637372045643, - "learning_rate": 1.6215154096961292e-07, - "loss": 1.1208, - "step": 6061 - }, - { - "epoch": 0.8217989561445129, - "grad_norm": 3.29782982353072, - "learning_rate": 1.619118951081594e-07, - "loss": 1.1226, - "step": 6062 - }, - { - "epoch": 0.8219345217921779, - "grad_norm": 1.5614304900400975, - "learning_rate": 1.616724108653813e-07, - "loss": 1.1218, - "step": 6063 - }, - { - "epoch": 0.8220700874398428, - "grad_norm": 2.6940536420391563, - "learning_rate": 1.614330882874616e-07, - "loss": 1.1303, - "step": 6064 - }, - { - "epoch": 0.8222056530875076, - "grad_norm": 2.4183661525643565, - "learning_rate": 1.611939274205515e-07, - "loss": 1.1414, - "step": 6065 - }, - { - "epoch": 0.8223412187351725, - "grad_norm": 1.7761563795006645, - "learning_rate": 1.6095492831077128e-07, - "loss": 1.1343, - "step": 6066 - }, - { - "epoch": 0.8224767843828373, - "grad_norm": 1.9231075356095122, - "learning_rate": 1.6071609100421048e-07, - "loss": 1.1259, - "step": 6067 - }, - { - "epoch": 0.8226123500305023, - "grad_norm": 1.4564961137741643, - "learning_rate": 1.6047741554692606e-07, - "loss": 1.1663, - "step": 6068 - }, - { - "epoch": 0.8227479156781672, - "grad_norm": 1.5621797665849535, - "learning_rate": 1.6023890198494584e-07, - "loss": 1.1571, - "step": 6069 - }, - { - "epoch": 0.822883481325832, - "grad_norm": 2.03909193309879, - "learning_rate": 1.6000055036426407e-07, - "loss": 1.1363, - "step": 6070 - }, - { - "epoch": 0.8230190469734969, - "grad_norm": 1.612741838396169, - "learning_rate": 1.5976236073084627e-07, - "loss": 1.1047, - "step": 6071 - }, - { - "epoch": 0.8231546126211619, - "grad_norm": 1.4611043610055217, - "learning_rate": 1.595243331306244e-07, - "loss": 1.149, - "step": 6072 - }, - { - "epoch": 0.8232901782688267, - "grad_norm": 1.748512264762146, - "learning_rate": 1.592864676095006e-07, - "loss": 1.1202, - "step": 6073 - }, - { - "epoch": 0.8234257439164916, - "grad_norm": 2.6827996334696893, - "learning_rate": 1.5904876421334534e-07, - "loss": 1.0798, - "step": 6074 - }, - { - "epoch": 0.8235613095641564, - "grad_norm": 1.6145908786602705, - "learning_rate": 1.5881122298799788e-07, - "loss": 1.1196, - "step": 6075 - }, - { - "epoch": 0.8236968752118213, - "grad_norm": 1.553126695005957, - "learning_rate": 1.585738439792661e-07, - "loss": 1.1356, - "step": 6076 - }, - { - "epoch": 0.8238324408594863, - "grad_norm": 1.7470861464852228, - "learning_rate": 1.5833662723292662e-07, - "loss": 1.12, - "step": 6077 - }, - { - "epoch": 0.8239680065071511, - "grad_norm": 1.7327131769954773, - "learning_rate": 1.5809957279472496e-07, - "loss": 1.1218, - "step": 6078 - }, - { - "epoch": 0.824103572154816, - "grad_norm": 1.4617269190312414, - "learning_rate": 1.578626807103751e-07, - "loss": 1.091, - "step": 6079 - }, - { - "epoch": 0.8242391378024808, - "grad_norm": 1.951678845101915, - "learning_rate": 1.5762595102555987e-07, - "loss": 1.108, - "step": 6080 - }, - { - "epoch": 0.8243747034501457, - "grad_norm": 1.6767657086384855, - "learning_rate": 1.5738938378593068e-07, - "loss": 1.1355, - "step": 6081 - }, - { - "epoch": 0.8245102690978107, - "grad_norm": 1.831663030179059, - "learning_rate": 1.5715297903710767e-07, - "loss": 1.1234, - "step": 6082 - }, - { - "epoch": 0.8246458347454755, - "grad_norm": 1.6789076886322105, - "learning_rate": 1.5691673682467967e-07, - "loss": 1.1298, - "step": 6083 - }, - { - "epoch": 0.8247814003931404, - "grad_norm": 1.498642986284596, - "learning_rate": 1.5668065719420398e-07, - "loss": 1.1588, - "step": 6084 - }, - { - "epoch": 0.8249169660408052, - "grad_norm": 1.748159225193498, - "learning_rate": 1.564447401912069e-07, - "loss": 1.0978, - "step": 6085 - }, - { - "epoch": 0.8250525316884701, - "grad_norm": 2.0560717279926677, - "learning_rate": 1.5620898586118292e-07, - "loss": 1.1257, - "step": 6086 - }, - { - "epoch": 0.8251880973361351, - "grad_norm": 1.8411205061859668, - "learning_rate": 1.5597339424959588e-07, - "loss": 1.1487, - "step": 6087 - }, - { - "epoch": 0.8253236629837999, - "grad_norm": 1.61984623707389, - "learning_rate": 1.557379654018769e-07, - "loss": 1.1323, - "step": 6088 - }, - { - "epoch": 0.8254592286314648, - "grad_norm": 1.7186574116629958, - "learning_rate": 1.555026993634275e-07, - "loss": 1.0973, - "step": 6089 - }, - { - "epoch": 0.8255947942791296, - "grad_norm": 1.5308616452342962, - "learning_rate": 1.5526759617961614e-07, - "loss": 1.1329, - "step": 6090 - }, - { - "epoch": 0.8257303599267946, - "grad_norm": 1.7169702015678046, - "learning_rate": 1.5503265589578128e-07, - "loss": 1.1696, - "step": 6091 - }, - { - "epoch": 0.8258659255744595, - "grad_norm": 1.5664658487613379, - "learning_rate": 1.5479787855722858e-07, - "loss": 1.0776, - "step": 6092 - }, - { - "epoch": 0.8260014912221243, - "grad_norm": 1.5810471357671791, - "learning_rate": 1.5456326420923382e-07, - "loss": 1.1247, - "step": 6093 - }, - { - "epoch": 0.8261370568697892, - "grad_norm": 1.619568704918383, - "learning_rate": 1.543288128970399e-07, - "loss": 1.1258, - "step": 6094 - }, - { - "epoch": 0.826272622517454, - "grad_norm": 2.389858300838128, - "learning_rate": 1.5409452466585903e-07, - "loss": 1.1396, - "step": 6095 - }, - { - "epoch": 0.826408188165119, - "grad_norm": 1.6504527549332588, - "learning_rate": 1.5386039956087194e-07, - "loss": 1.1296, - "step": 6096 - }, - { - "epoch": 0.8265437538127839, - "grad_norm": 1.6787119631197842, - "learning_rate": 1.5362643762722782e-07, - "loss": 1.1127, - "step": 6097 - }, - { - "epoch": 0.8266793194604487, - "grad_norm": 1.6650925507351901, - "learning_rate": 1.5339263891004427e-07, - "loss": 1.1382, - "step": 6098 - }, - { - "epoch": 0.8268148851081136, - "grad_norm": 2.8297276151867132, - "learning_rate": 1.5315900345440757e-07, - "loss": 1.1405, - "step": 6099 - }, - { - "epoch": 0.8269504507557784, - "grad_norm": 1.6427962325166952, - "learning_rate": 1.5292553130537255e-07, - "loss": 1.1021, - "step": 6100 - }, - { - "epoch": 0.8270860164034434, - "grad_norm": 4.02367471204802, - "learning_rate": 1.526922225079623e-07, - "loss": 1.1219, - "step": 6101 - }, - { - "epoch": 0.8272215820511083, - "grad_norm": 1.384677950944082, - "learning_rate": 1.524590771071691e-07, - "loss": 1.1328, - "step": 6102 - }, - { - "epoch": 0.8273571476987731, - "grad_norm": 1.700948190567225, - "learning_rate": 1.5222609514795225e-07, - "loss": 1.1157, - "step": 6103 - }, - { - "epoch": 0.827492713346438, - "grad_norm": 1.7557824455836417, - "learning_rate": 1.5199327667524154e-07, - "loss": 1.1378, - "step": 6104 - }, - { - "epoch": 0.8276282789941029, - "grad_norm": 6.652528948664602, - "learning_rate": 1.5176062173393312e-07, - "loss": 1.1081, - "step": 6105 - }, - { - "epoch": 0.8277638446417678, - "grad_norm": 1.6718092801979756, - "learning_rate": 1.5152813036889378e-07, - "loss": 1.1161, - "step": 6106 - }, - { - "epoch": 0.8278994102894327, - "grad_norm": 1.694300805007026, - "learning_rate": 1.5129580262495656e-07, - "loss": 1.1349, - "step": 6107 - }, - { - "epoch": 0.8280349759370975, - "grad_norm": 2.055905461130772, - "learning_rate": 1.5106363854692493e-07, - "loss": 1.1662, - "step": 6108 - }, - { - "epoch": 0.8281705415847624, - "grad_norm": 2.3554125500808727, - "learning_rate": 1.5083163817956913e-07, - "loss": 1.1205, - "step": 6109 - }, - { - "epoch": 0.8283061072324273, - "grad_norm": 1.5391223333915125, - "learning_rate": 1.5059980156762942e-07, - "loss": 1.1108, - "step": 6110 - }, - { - "epoch": 0.8284416728800922, - "grad_norm": 1.9838122081888856, - "learning_rate": 1.5036812875581274e-07, - "loss": 1.0907, - "step": 6111 - }, - { - "epoch": 0.8285772385277571, - "grad_norm": 1.4513873270097286, - "learning_rate": 1.5013661978879632e-07, - "loss": 1.1379, - "step": 6112 - }, - { - "epoch": 0.8287128041754219, - "grad_norm": 1.6337994129067452, - "learning_rate": 1.4990527471122382e-07, - "loss": 1.1001, - "step": 6113 - }, - { - "epoch": 0.8288483698230869, - "grad_norm": 1.6090167383690979, - "learning_rate": 1.4967409356770945e-07, - "loss": 1.1464, - "step": 6114 - }, - { - "epoch": 0.8289839354707517, - "grad_norm": 1.6598662153672805, - "learning_rate": 1.4944307640283382e-07, - "loss": 1.136, - "step": 6115 - }, - { - "epoch": 0.8291195011184166, - "grad_norm": 1.5111428225540444, - "learning_rate": 1.4921222326114692e-07, - "loss": 1.1426, - "step": 6116 - }, - { - "epoch": 0.8292550667660815, - "grad_norm": 1.4180524658545575, - "learning_rate": 1.4898153418716708e-07, - "loss": 1.1349, - "step": 6117 - }, - { - "epoch": 0.8293906324137463, - "grad_norm": 3.412055824367126, - "learning_rate": 1.4875100922538087e-07, - "loss": 1.0679, - "step": 6118 - }, - { - "epoch": 0.8295261980614113, - "grad_norm": 3.275464642569637, - "learning_rate": 1.4852064842024325e-07, - "loss": 1.1471, - "step": 6119 - }, - { - "epoch": 0.8296617637090761, - "grad_norm": 1.7230558705315062, - "learning_rate": 1.4829045181617727e-07, - "loss": 1.126, - "step": 6120 - }, - { - "epoch": 0.829797329356741, - "grad_norm": 1.803157081634202, - "learning_rate": 1.4806041945757474e-07, - "loss": 1.1628, - "step": 6121 - }, - { - "epoch": 0.8299328950044059, - "grad_norm": 1.718803731001436, - "learning_rate": 1.4783055138879562e-07, - "loss": 1.1717, - "step": 6122 - }, - { - "epoch": 0.8300684606520707, - "grad_norm": 1.7611381752627506, - "learning_rate": 1.476008476541679e-07, - "loss": 1.1849, - "step": 6123 - }, - { - "epoch": 0.8302040262997357, - "grad_norm": 1.684325632412977, - "learning_rate": 1.473713082979884e-07, - "loss": 1.0813, - "step": 6124 - }, - { - "epoch": 0.8303395919474005, - "grad_norm": 2.1483924780135086, - "learning_rate": 1.4714193336452174e-07, - "loss": 1.1131, - "step": 6125 - }, - { - "epoch": 0.8304751575950654, - "grad_norm": 1.5514688313505514, - "learning_rate": 1.4691272289800115e-07, - "loss": 1.111, - "step": 6126 - }, - { - "epoch": 0.8306107232427303, - "grad_norm": 1.6299864975334275, - "learning_rate": 1.4668367694262817e-07, - "loss": 1.1669, - "step": 6127 - }, - { - "epoch": 0.8307462888903951, - "grad_norm": 1.7165829109854134, - "learning_rate": 1.4645479554257267e-07, - "loss": 1.1155, - "step": 6128 - }, - { - "epoch": 0.8308818545380601, - "grad_norm": 1.651023582950642, - "learning_rate": 1.4622607874197214e-07, - "loss": 1.1202, - "step": 6129 - }, - { - "epoch": 0.8310174201857249, - "grad_norm": 1.413985248057129, - "learning_rate": 1.4599752658493304e-07, - "loss": 1.1295, - "step": 6130 - }, - { - "epoch": 0.8311529858333898, - "grad_norm": 1.782063026242228, - "learning_rate": 1.457691391155298e-07, - "loss": 1.1378, - "step": 6131 - }, - { - "epoch": 0.8312885514810547, - "grad_norm": 1.5384858641647392, - "learning_rate": 1.4554091637780518e-07, - "loss": 1.1319, - "step": 6132 - }, - { - "epoch": 0.8314241171287196, - "grad_norm": 1.5754456636573182, - "learning_rate": 1.4531285841577024e-07, - "loss": 1.1186, - "step": 6133 - }, - { - "epoch": 0.8315596827763845, - "grad_norm": 1.5320240876770022, - "learning_rate": 1.4508496527340398e-07, - "loss": 1.0807, - "step": 6134 - }, - { - "epoch": 0.8316952484240493, - "grad_norm": 1.5766773624671824, - "learning_rate": 1.448572369946539e-07, - "loss": 1.1196, - "step": 6135 - }, - { - "epoch": 0.8318308140717142, - "grad_norm": 2.110698734649517, - "learning_rate": 1.446296736234356e-07, - "loss": 1.1094, - "step": 6136 - }, - { - "epoch": 0.8319663797193791, - "grad_norm": 1.6997547408761438, - "learning_rate": 1.444022752036328e-07, - "loss": 1.1544, - "step": 6137 - }, - { - "epoch": 0.832101945367044, - "grad_norm": 1.5323370386113027, - "learning_rate": 1.4417504177909767e-07, - "loss": 1.1304, - "step": 6138 - }, - { - "epoch": 0.8322375110147089, - "grad_norm": 3.133218889509942, - "learning_rate": 1.4394797339365017e-07, - "loss": 1.1233, - "step": 6139 - }, - { - "epoch": 0.8323730766623737, - "grad_norm": 1.886917751452, - "learning_rate": 1.437210700910787e-07, - "loss": 1.1427, - "step": 6140 - }, - { - "epoch": 0.8325086423100386, - "grad_norm": 2.2682986523325654, - "learning_rate": 1.4349433191513994e-07, - "loss": 1.1459, - "step": 6141 - }, - { - "epoch": 0.8326442079577036, - "grad_norm": 1.6824181151020183, - "learning_rate": 1.4326775890955833e-07, - "loss": 1.1148, - "step": 6142 - }, - { - "epoch": 0.8327797736053684, - "grad_norm": 3.450355000946064, - "learning_rate": 1.4304135111802707e-07, - "loss": 1.1519, - "step": 6143 - }, - { - "epoch": 0.8329153392530333, - "grad_norm": 2.006705323580505, - "learning_rate": 1.4281510858420632e-07, - "loss": 1.1305, - "step": 6144 - }, - { - "epoch": 0.8330509049006981, - "grad_norm": 1.5645726092087568, - "learning_rate": 1.4258903135172605e-07, - "loss": 1.1392, - "step": 6145 - }, - { - "epoch": 0.833186470548363, - "grad_norm": 1.798705878832206, - "learning_rate": 1.423631194641828e-07, - "loss": 1.0889, - "step": 6146 - }, - { - "epoch": 0.833322036196028, - "grad_norm": 1.5600977009592263, - "learning_rate": 1.421373729651425e-07, - "loss": 1.1087, - "step": 6147 - }, - { - "epoch": 0.8334576018436928, - "grad_norm": 1.7743718820437313, - "learning_rate": 1.4191179189813796e-07, - "loss": 1.0966, - "step": 6148 - }, - { - "epoch": 0.8335931674913577, - "grad_norm": 1.8328980269411923, - "learning_rate": 1.4168637630667135e-07, - "loss": 1.1468, - "step": 6149 - }, - { - "epoch": 0.8337287331390226, - "grad_norm": 1.735205448540986, - "learning_rate": 1.4146112623421158e-07, - "loss": 1.1413, - "step": 6150 - }, - { - "epoch": 0.8338642987866874, - "grad_norm": 1.8543779820222022, - "learning_rate": 1.4123604172419713e-07, - "loss": 1.1042, - "step": 6151 - }, - { - "epoch": 0.8339998644343524, - "grad_norm": 1.5176632858509251, - "learning_rate": 1.410111228200329e-07, - "loss": 1.1759, - "step": 6152 - }, - { - "epoch": 0.8341354300820172, - "grad_norm": 1.5325031080475353, - "learning_rate": 1.407863695650936e-07, - "loss": 1.1416, - "step": 6153 - }, - { - "epoch": 0.8342709957296821, - "grad_norm": 1.7726809654790447, - "learning_rate": 1.405617820027204e-07, - "loss": 1.0972, - "step": 6154 - }, - { - "epoch": 0.834406561377347, - "grad_norm": 1.398630527000847, - "learning_rate": 1.4033736017622388e-07, - "loss": 1.1533, - "step": 6155 - }, - { - "epoch": 0.8345421270250118, - "grad_norm": 2.9571030107063887, - "learning_rate": 1.4011310412888145e-07, - "loss": 1.1518, - "step": 6156 - }, - { - "epoch": 0.8346776926726768, - "grad_norm": 2.205052938578223, - "learning_rate": 1.398890139039395e-07, - "loss": 1.1182, - "step": 6157 - }, - { - "epoch": 0.8348132583203416, - "grad_norm": 2.200818209860699, - "learning_rate": 1.3966508954461175e-07, - "loss": 1.0926, - "step": 6158 - }, - { - "epoch": 0.8349488239680065, - "grad_norm": 1.7132804423581414, - "learning_rate": 1.3944133109408053e-07, - "loss": 1.1624, - "step": 6159 - }, - { - "epoch": 0.8350843896156714, - "grad_norm": 1.7393363717302641, - "learning_rate": 1.3921773859549569e-07, - "loss": 1.1212, - "step": 6160 - }, - { - "epoch": 0.8352199552633363, - "grad_norm": 1.7546078175134212, - "learning_rate": 1.389943120919753e-07, - "loss": 1.1473, - "step": 6161 - }, - { - "epoch": 0.8353555209110012, - "grad_norm": 1.7912742656603087, - "learning_rate": 1.3877105162660564e-07, - "loss": 1.1125, - "step": 6162 - }, - { - "epoch": 0.835491086558666, - "grad_norm": 1.5202624716927995, - "learning_rate": 1.385479572424404e-07, - "loss": 1.1042, - "step": 6163 - }, - { - "epoch": 0.8356266522063309, - "grad_norm": 4.484749064666905, - "learning_rate": 1.3832502898250174e-07, - "loss": 1.1291, - "step": 6164 - }, - { - "epoch": 0.8357622178539958, - "grad_norm": 1.6526783397173164, - "learning_rate": 1.3810226688977967e-07, - "loss": 1.106, - "step": 6165 - }, - { - "epoch": 0.8358977835016607, - "grad_norm": 2.0526572075526435, - "learning_rate": 1.378796710072322e-07, - "loss": 1.1409, - "step": 6166 - }, - { - "epoch": 0.8360333491493256, - "grad_norm": 1.719264493168516, - "learning_rate": 1.3765724137778456e-07, - "loss": 1.131, - "step": 6167 - }, - { - "epoch": 0.8361689147969904, - "grad_norm": 1.558097538178257, - "learning_rate": 1.3743497804433147e-07, - "loss": 1.117, - "step": 6168 - }, - { - "epoch": 0.8363044804446553, - "grad_norm": 1.5545690325087547, - "learning_rate": 1.3721288104973372e-07, - "loss": 1.1111, - "step": 6169 - }, - { - "epoch": 0.8364400460923203, - "grad_norm": 1.6007552120049815, - "learning_rate": 1.3699095043682184e-07, - "loss": 1.0865, - "step": 6170 - }, - { - "epoch": 0.8365756117399851, - "grad_norm": 1.688400599851003, - "learning_rate": 1.3676918624839285e-07, - "loss": 1.1378, - "step": 6171 - }, - { - "epoch": 0.83671117738765, - "grad_norm": 1.9049372141172483, - "learning_rate": 1.3654758852721226e-07, - "loss": 1.1245, - "step": 6172 - }, - { - "epoch": 0.8368467430353148, - "grad_norm": 3.228393807771819, - "learning_rate": 1.363261573160136e-07, - "loss": 1.176, - "step": 6173 - }, - { - "epoch": 0.8369823086829797, - "grad_norm": 2.213699417561093, - "learning_rate": 1.3610489265749801e-07, - "loss": 1.1369, - "step": 6174 - }, - { - "epoch": 0.8371178743306447, - "grad_norm": 1.6452386654735738, - "learning_rate": 1.3588379459433485e-07, - "loss": 1.0655, - "step": 6175 - }, - { - "epoch": 0.8372534399783095, - "grad_norm": 1.7964541027447067, - "learning_rate": 1.3566286316916087e-07, - "loss": 1.1273, - "step": 6176 - }, - { - "epoch": 0.8373890056259744, - "grad_norm": 2.1710580259191117, - "learning_rate": 1.354420984245811e-07, - "loss": 1.1557, - "step": 6177 - }, - { - "epoch": 0.8375245712736392, - "grad_norm": 1.6005759778563469, - "learning_rate": 1.3522150040316826e-07, - "loss": 1.1327, - "step": 6178 - }, - { - "epoch": 0.8376601369213041, - "grad_norm": 2.2194474453615247, - "learning_rate": 1.350010691474629e-07, - "loss": 1.1404, - "step": 6179 - }, - { - "epoch": 0.8377957025689691, - "grad_norm": 1.5975518507396873, - "learning_rate": 1.3478080469997344e-07, - "loss": 1.1496, - "step": 6180 - }, - { - "epoch": 0.8379312682166339, - "grad_norm": 1.6438693437645342, - "learning_rate": 1.3456070710317624e-07, - "loss": 1.0932, - "step": 6181 - }, - { - "epoch": 0.8380668338642988, - "grad_norm": 1.633943879720907, - "learning_rate": 1.3434077639951525e-07, - "loss": 1.1177, - "step": 6182 - }, - { - "epoch": 0.8382023995119636, - "grad_norm": 1.6290968045572614, - "learning_rate": 1.341210126314024e-07, - "loss": 1.0998, - "step": 6183 - }, - { - "epoch": 0.8383379651596286, - "grad_norm": 2.5255227284641575, - "learning_rate": 1.3390141584121772e-07, - "loss": 1.1246, - "step": 6184 - }, - { - "epoch": 0.8384735308072935, - "grad_norm": 1.631872182427361, - "learning_rate": 1.33681986071308e-07, - "loss": 1.1414, - "step": 6185 - }, - { - "epoch": 0.8386090964549583, - "grad_norm": 1.5296933489929978, - "learning_rate": 1.3346272336398934e-07, - "loss": 1.1276, - "step": 6186 - }, - { - "epoch": 0.8387446621026232, - "grad_norm": 1.4717812743992367, - "learning_rate": 1.3324362776154408e-07, - "loss": 1.1403, - "step": 6187 - }, - { - "epoch": 0.838880227750288, - "grad_norm": 2.125996876433454, - "learning_rate": 1.3302469930622383e-07, - "loss": 1.163, - "step": 6188 - }, - { - "epoch": 0.839015793397953, - "grad_norm": 1.4454256738343119, - "learning_rate": 1.3280593804024642e-07, - "loss": 1.1252, - "step": 6189 - }, - { - "epoch": 0.8391513590456179, - "grad_norm": 19.67582792850445, - "learning_rate": 1.3258734400579908e-07, - "loss": 1.1159, - "step": 6190 - }, - { - "epoch": 0.8392869246932827, - "grad_norm": 1.4276577973301068, - "learning_rate": 1.323689172450353e-07, - "loss": 1.1268, - "step": 6191 - }, - { - "epoch": 0.8394224903409476, - "grad_norm": 1.308378592386755, - "learning_rate": 1.3215065780007718e-07, - "loss": 1.0916, - "step": 6192 - }, - { - "epoch": 0.8395580559886124, - "grad_norm": 2.390103187782756, - "learning_rate": 1.3193256571301426e-07, - "loss": 1.1205, - "step": 6193 - }, - { - "epoch": 0.8396936216362774, - "grad_norm": 1.6898130237681153, - "learning_rate": 1.3171464102590392e-07, - "loss": 1.0833, - "step": 6194 - }, - { - "epoch": 0.8398291872839423, - "grad_norm": 1.4058089475199333, - "learning_rate": 1.3149688378077128e-07, - "loss": 1.1454, - "step": 6195 - }, - { - "epoch": 0.8399647529316071, - "grad_norm": 7.200669098133073, - "learning_rate": 1.3127929401960903e-07, - "loss": 1.1045, - "step": 6196 - }, - { - "epoch": 0.840100318579272, - "grad_norm": 1.4582405872122313, - "learning_rate": 1.3106187178437768e-07, - "loss": 1.1343, - "step": 6197 - }, - { - "epoch": 0.8402358842269368, - "grad_norm": 1.6823753535645727, - "learning_rate": 1.3084461711700544e-07, - "loss": 1.0859, - "step": 6198 - }, - { - "epoch": 0.8403714498746018, - "grad_norm": 2.2710293629645446, - "learning_rate": 1.3062753005938798e-07, - "loss": 1.1199, - "step": 6199 - }, - { - "epoch": 0.8405070155222667, - "grad_norm": 1.532202158369591, - "learning_rate": 1.30410610653389e-07, - "loss": 1.14, - "step": 6200 - }, - { - "epoch": 0.8406425811699315, - "grad_norm": 1.718879209658983, - "learning_rate": 1.3019385894083988e-07, - "loss": 1.1189, - "step": 6201 - }, - { - "epoch": 0.8407781468175964, - "grad_norm": 1.6127396368357971, - "learning_rate": 1.2997727496353872e-07, - "loss": 1.1647, - "step": 6202 - }, - { - "epoch": 0.8409137124652613, - "grad_norm": 1.7460391366960477, - "learning_rate": 1.2976085876325303e-07, - "loss": 1.1411, - "step": 6203 - }, - { - "epoch": 0.8410492781129262, - "grad_norm": 1.4366207469057595, - "learning_rate": 1.2954461038171603e-07, - "loss": 1.0961, - "step": 6204 - }, - { - "epoch": 0.8411848437605911, - "grad_norm": 1.5083874877897854, - "learning_rate": 1.2932852986063046e-07, - "loss": 1.1587, - "step": 6205 - }, - { - "epoch": 0.8413204094082559, - "grad_norm": 1.7730101359987072, - "learning_rate": 1.2911261724166468e-07, - "loss": 1.1232, - "step": 6206 - }, - { - "epoch": 0.8414559750559208, - "grad_norm": 2.456315921922907, - "learning_rate": 1.2889687256645686e-07, - "loss": 1.1316, - "step": 6207 - }, - { - "epoch": 0.8415915407035857, - "grad_norm": 2.0651668089173896, - "learning_rate": 1.286812958766106e-07, - "loss": 1.1061, - "step": 6208 - }, - { - "epoch": 0.8417271063512506, - "grad_norm": 3.1351617565452092, - "learning_rate": 1.284658872136991e-07, - "loss": 1.1248, - "step": 6209 - }, - { - "epoch": 0.8418626719989155, - "grad_norm": 1.4653107949629878, - "learning_rate": 1.2825064661926133e-07, - "loss": 1.1437, - "step": 6210 - }, - { - "epoch": 0.8419982376465803, - "grad_norm": 1.4930767503593168, - "learning_rate": 1.280355741348056e-07, - "loss": 1.1723, - "step": 6211 - }, - { - "epoch": 0.8421338032942453, - "grad_norm": 1.6780531496579427, - "learning_rate": 1.278206698018064e-07, - "loss": 1.1269, - "step": 6212 - }, - { - "epoch": 0.8422693689419101, - "grad_norm": 1.5146909857110231, - "learning_rate": 1.2760593366170635e-07, - "loss": 1.116, - "step": 6213 - }, - { - "epoch": 0.842404934589575, - "grad_norm": 1.5311098680301287, - "learning_rate": 1.273913657559158e-07, - "loss": 1.1321, - "step": 6214 - }, - { - "epoch": 0.8425405002372399, - "grad_norm": 1.7836111166491453, - "learning_rate": 1.271769661258124e-07, - "loss": 1.1557, - "step": 6215 - }, - { - "epoch": 0.8426760658849047, - "grad_norm": 2.2227267123059473, - "learning_rate": 1.2696273481274144e-07, - "loss": 1.1285, - "step": 6216 - }, - { - "epoch": 0.8428116315325697, - "grad_norm": 1.5538795100954164, - "learning_rate": 1.2674867185801575e-07, - "loss": 1.1729, - "step": 6217 - }, - { - "epoch": 0.8429471971802345, - "grad_norm": 2.3077833117726665, - "learning_rate": 1.2653477730291563e-07, - "loss": 1.1098, - "step": 6218 - }, - { - "epoch": 0.8430827628278994, - "grad_norm": 1.8388898197698145, - "learning_rate": 1.2632105118868896e-07, - "loss": 1.1653, - "step": 6219 - }, - { - "epoch": 0.8432183284755643, - "grad_norm": 1.944809804747932, - "learning_rate": 1.2610749355655125e-07, - "loss": 1.1417, - "step": 6220 - }, - { - "epoch": 0.8433538941232291, - "grad_norm": 1.6500142001771374, - "learning_rate": 1.2589410444768522e-07, - "loss": 1.1234, - "step": 6221 - }, - { - "epoch": 0.8434894597708941, - "grad_norm": 2.7833530945077505, - "learning_rate": 1.256808839032415e-07, - "loss": 1.1255, - "step": 6222 - }, - { - "epoch": 0.8436250254185589, - "grad_norm": 1.3688588959249868, - "learning_rate": 1.2546783196433774e-07, - "loss": 1.071, - "step": 6223 - }, - { - "epoch": 0.8437605910662238, - "grad_norm": 1.7930877372133254, - "learning_rate": 1.2525494867205954e-07, - "loss": 1.1672, - "step": 6224 - }, - { - "epoch": 0.8438961567138887, - "grad_norm": 1.476620002331289, - "learning_rate": 1.2504223406745963e-07, - "loss": 1.1235, - "step": 6225 - }, - { - "epoch": 0.8440317223615535, - "grad_norm": 1.4673302599674496, - "learning_rate": 1.2482968819155837e-07, - "loss": 1.0821, - "step": 6226 - }, - { - "epoch": 0.8441672880092185, - "grad_norm": 1.820152050199136, - "learning_rate": 1.2461731108534378e-07, - "loss": 1.1336, - "step": 6227 - }, - { - "epoch": 0.8443028536568834, - "grad_norm": 1.7500932185488502, - "learning_rate": 1.244051027897708e-07, - "loss": 1.1312, - "step": 6228 - }, - { - "epoch": 0.8444384193045482, - "grad_norm": 1.6437164150957306, - "learning_rate": 1.2419306334576207e-07, - "loss": 1.081, - "step": 6229 - }, - { - "epoch": 0.8445739849522131, - "grad_norm": 3.3751704959978452, - "learning_rate": 1.2398119279420793e-07, - "loss": 1.1175, - "step": 6230 - }, - { - "epoch": 0.844709550599878, - "grad_norm": 1.9482431033804928, - "learning_rate": 1.2376949117596592e-07, - "loss": 1.1158, - "step": 6231 - }, - { - "epoch": 0.8448451162475429, - "grad_norm": 1.8754051142320427, - "learning_rate": 1.2355795853186102e-07, - "loss": 1.1059, - "step": 6232 - }, - { - "epoch": 0.8449806818952078, - "grad_norm": 1.6787405419264065, - "learning_rate": 1.233465949026855e-07, - "loss": 1.1288, - "step": 6233 - }, - { - "epoch": 0.8451162475428726, - "grad_norm": 4.924311003146424, - "learning_rate": 1.2313540032919935e-07, - "loss": 1.0824, - "step": 6234 - }, - { - "epoch": 0.8452518131905375, - "grad_norm": 2.2952780100036385, - "learning_rate": 1.2292437485212957e-07, - "loss": 1.1866, - "step": 6235 - }, - { - "epoch": 0.8453873788382024, - "grad_norm": 1.995885728239094, - "learning_rate": 1.2271351851217104e-07, - "loss": 1.0957, - "step": 6236 - }, - { - "epoch": 0.8455229444858673, - "grad_norm": 1.5156092398983478, - "learning_rate": 1.225028313499855e-07, - "loss": 1.1319, - "step": 6237 - }, - { - "epoch": 0.8456585101335322, - "grad_norm": 1.6414860723968474, - "learning_rate": 1.222923134062025e-07, - "loss": 1.1114, - "step": 6238 - }, - { - "epoch": 0.845794075781197, - "grad_norm": 1.6473392465517438, - "learning_rate": 1.220819647214185e-07, - "loss": 1.133, - "step": 6239 - }, - { - "epoch": 0.845929641428862, - "grad_norm": 1.9980226081775585, - "learning_rate": 1.2187178533619803e-07, - "loss": 1.1102, - "step": 6240 - }, - { - "epoch": 0.8460652070765268, - "grad_norm": 1.727032058981367, - "learning_rate": 1.216617752910718e-07, - "loss": 1.1115, - "step": 6241 - }, - { - "epoch": 0.8462007727241917, - "grad_norm": 1.6703729579460385, - "learning_rate": 1.2145193462653946e-07, - "loss": 1.0859, - "step": 6242 - }, - { - "epoch": 0.8463363383718566, - "grad_norm": 1.471639792965376, - "learning_rate": 1.212422633830663e-07, - "loss": 1.1789, - "step": 6243 - }, - { - "epoch": 0.8464719040195214, - "grad_norm": 1.9230926605998588, - "learning_rate": 1.2103276160108656e-07, - "loss": 1.1443, - "step": 6244 - }, - { - "epoch": 0.8466074696671864, - "grad_norm": 1.5813902754180822, - "learning_rate": 1.208234293210002e-07, - "loss": 1.1106, - "step": 6245 - }, - { - "epoch": 0.8467430353148512, - "grad_norm": 1.635859603729063, - "learning_rate": 1.2061426658317608e-07, - "loss": 1.1108, - "step": 6246 - }, - { - "epoch": 0.8468786009625161, - "grad_norm": 5.553105477202321, - "learning_rate": 1.2040527342794872e-07, - "loss": 1.1129, - "step": 6247 - }, - { - "epoch": 0.847014166610181, - "grad_norm": 1.954881241479395, - "learning_rate": 1.2019644989562184e-07, - "loss": 1.1284, - "step": 6248 - }, - { - "epoch": 0.8471497322578458, - "grad_norm": 1.5250089631064803, - "learning_rate": 1.1998779602646436e-07, - "loss": 1.1642, - "step": 6249 - }, - { - "epoch": 0.8472852979055108, - "grad_norm": 1.6253421575404319, - "learning_rate": 1.1977931186071443e-07, - "loss": 1.0662, - "step": 6250 - }, - { - "epoch": 0.8474208635531756, - "grad_norm": 1.6304258891235228, - "learning_rate": 1.1957099743857568e-07, - "loss": 1.1333, - "step": 6251 - }, - { - "epoch": 0.8475564292008405, - "grad_norm": 1.9456158664895915, - "learning_rate": 1.1936285280022096e-07, - "loss": 1.1252, - "step": 6252 - }, - { - "epoch": 0.8476919948485054, - "grad_norm": 2.705241450144285, - "learning_rate": 1.1915487798578816e-07, - "loss": 1.0974, - "step": 6253 - }, - { - "epoch": 0.8478275604961703, - "grad_norm": 2.9232509899137074, - "learning_rate": 1.1894707303538476e-07, - "loss": 1.115, - "step": 6254 - }, - { - "epoch": 0.8479631261438352, - "grad_norm": 1.5132327669603225, - "learning_rate": 1.1873943798908336e-07, - "loss": 1.1298, - "step": 6255 - }, - { - "epoch": 0.8480986917915, - "grad_norm": 1.5966311073941208, - "learning_rate": 1.1853197288692518e-07, - "loss": 1.1504, - "step": 6256 - }, - { - "epoch": 0.8482342574391649, - "grad_norm": 1.4883038586147965, - "learning_rate": 1.183246777689182e-07, - "loss": 1.092, - "step": 6257 - }, - { - "epoch": 0.8483698230868298, - "grad_norm": 1.5230143142774115, - "learning_rate": 1.1811755267503754e-07, - "loss": 1.1346, - "step": 6258 - }, - { - "epoch": 0.8485053887344947, - "grad_norm": 1.6496999718442382, - "learning_rate": 1.179105976452256e-07, - "loss": 1.1216, - "step": 6259 - }, - { - "epoch": 0.8486409543821596, - "grad_norm": 4.161538189691567, - "learning_rate": 1.1770381271939223e-07, - "loss": 1.1594, - "step": 6260 - }, - { - "epoch": 0.8487765200298244, - "grad_norm": 1.5798063574744565, - "learning_rate": 1.1749719793741409e-07, - "loss": 1.1266, - "step": 6261 - }, - { - "epoch": 0.8489120856774893, - "grad_norm": 1.6303827372921604, - "learning_rate": 1.172907533391353e-07, - "loss": 1.1404, - "step": 6262 - }, - { - "epoch": 0.8490476513251543, - "grad_norm": 1.4793187116187414, - "learning_rate": 1.1708447896436724e-07, - "loss": 1.091, - "step": 6263 - }, - { - "epoch": 0.8491832169728191, - "grad_norm": 1.4807350806595518, - "learning_rate": 1.1687837485288766e-07, - "loss": 1.1071, - "step": 6264 - }, - { - "epoch": 0.849318782620484, - "grad_norm": 1.6116553290378244, - "learning_rate": 1.1667244104444308e-07, - "loss": 1.0754, - "step": 6265 - }, - { - "epoch": 0.8494543482681488, - "grad_norm": 1.7157587303171171, - "learning_rate": 1.1646667757874507e-07, - "loss": 1.0826, - "step": 6266 - }, - { - "epoch": 0.8495899139158137, - "grad_norm": 1.6261982109352877, - "learning_rate": 1.1626108449547467e-07, - "loss": 1.1027, - "step": 6267 - }, - { - "epoch": 0.8497254795634787, - "grad_norm": 1.4633396453398662, - "learning_rate": 1.1605566183427807e-07, - "loss": 1.1095, - "step": 6268 - }, - { - "epoch": 0.8498610452111435, - "grad_norm": 2.1021104930554704, - "learning_rate": 1.1585040963476966e-07, - "loss": 1.1313, - "step": 6269 - }, - { - "epoch": 0.8499966108588084, - "grad_norm": 1.6818298299550982, - "learning_rate": 1.156453279365307e-07, - "loss": 1.0945, - "step": 6270 - }, - { - "epoch": 0.8501321765064732, - "grad_norm": 1.6267090927225414, - "learning_rate": 1.1544041677910954e-07, - "loss": 1.0861, - "step": 6271 - }, - { - "epoch": 0.8502677421541381, - "grad_norm": 1.5573856466774798, - "learning_rate": 1.152356762020218e-07, - "loss": 1.1427, - "step": 6272 - }, - { - "epoch": 0.8504033078018031, - "grad_norm": 1.5522228909560154, - "learning_rate": 1.1503110624474987e-07, - "loss": 1.1027, - "step": 6273 - }, - { - "epoch": 0.8505388734494679, - "grad_norm": 1.730029109785682, - "learning_rate": 1.1482670694674367e-07, - "loss": 1.1419, - "step": 6274 - }, - { - "epoch": 0.8506744390971328, - "grad_norm": 1.8419395512126264, - "learning_rate": 1.146224783474199e-07, - "loss": 1.0753, - "step": 6275 - }, - { - "epoch": 0.8508100047447976, - "grad_norm": 1.8328799406042335, - "learning_rate": 1.1441842048616234e-07, - "loss": 1.139, - "step": 6276 - }, - { - "epoch": 0.8509455703924625, - "grad_norm": 1.5614141208822658, - "learning_rate": 1.1421453340232213e-07, - "loss": 1.1118, - "step": 6277 - }, - { - "epoch": 0.8510811360401275, - "grad_norm": 1.4353533590609489, - "learning_rate": 1.140108171352172e-07, - "loss": 1.0995, - "step": 6278 - }, - { - "epoch": 0.8512167016877923, - "grad_norm": 1.6282186323200165, - "learning_rate": 1.1380727172413262e-07, - "loss": 1.1227, - "step": 6279 - }, - { - "epoch": 0.8513522673354572, - "grad_norm": 1.9631033167794292, - "learning_rate": 1.1360389720832042e-07, - "loss": 1.1283, - "step": 6280 - }, - { - "epoch": 0.851487832983122, - "grad_norm": 1.5401099238204563, - "learning_rate": 1.1340069362699988e-07, - "loss": 1.1383, - "step": 6281 - }, - { - "epoch": 0.851623398630787, - "grad_norm": 1.7075518189411314, - "learning_rate": 1.1319766101935724e-07, - "loss": 1.1356, - "step": 6282 - }, - { - "epoch": 0.8517589642784519, - "grad_norm": 2.3812545102224627, - "learning_rate": 1.1299479942454592e-07, - "loss": 1.1026, - "step": 6283 - }, - { - "epoch": 0.8518945299261167, - "grad_norm": 3.1285490193099506, - "learning_rate": 1.1279210888168544e-07, - "loss": 1.1126, - "step": 6284 - }, - { - "epoch": 0.8520300955737816, - "grad_norm": 1.4602836528779617, - "learning_rate": 1.1258958942986396e-07, - "loss": 1.1199, - "step": 6285 - }, - { - "epoch": 0.8521656612214464, - "grad_norm": 1.5619678341692986, - "learning_rate": 1.1238724110813502e-07, - "loss": 1.1536, - "step": 6286 - }, - { - "epoch": 0.8523012268691114, - "grad_norm": 1.6631009351194246, - "learning_rate": 1.1218506395552063e-07, - "loss": 1.0948, - "step": 6287 - }, - { - "epoch": 0.8524367925167763, - "grad_norm": 1.5962277167243162, - "learning_rate": 1.1198305801100827e-07, - "loss": 1.1392, - "step": 6288 - }, - { - "epoch": 0.8525723581644411, - "grad_norm": 2.0209338242252644, - "learning_rate": 1.11781223313554e-07, - "loss": 1.1542, - "step": 6289 - }, - { - "epoch": 0.852707923812106, - "grad_norm": 1.47768686943271, - "learning_rate": 1.1157955990207946e-07, - "loss": 1.0818, - "step": 6290 - }, - { - "epoch": 0.8528434894597708, - "grad_norm": 1.548541071112339, - "learning_rate": 1.1137806781547398e-07, - "loss": 1.1455, - "step": 6291 - }, - { - "epoch": 0.8529790551074358, - "grad_norm": 2.163590396585606, - "learning_rate": 1.1117674709259372e-07, - "loss": 1.1149, - "step": 6292 - }, - { - "epoch": 0.8531146207551007, - "grad_norm": 1.6336239760628883, - "learning_rate": 1.1097559777226196e-07, - "loss": 1.1001, - "step": 6293 - }, - { - "epoch": 0.8532501864027655, - "grad_norm": 2.544820886010292, - "learning_rate": 1.1077461989326864e-07, - "loss": 1.1451, - "step": 6294 - }, - { - "epoch": 0.8533857520504304, - "grad_norm": 1.6318164203877048, - "learning_rate": 1.1057381349437067e-07, - "loss": 1.0872, - "step": 6295 - }, - { - "epoch": 0.8535213176980952, - "grad_norm": 1.567352367273081, - "learning_rate": 1.1037317861429208e-07, - "loss": 1.1337, - "step": 6296 - }, - { - "epoch": 0.8536568833457602, - "grad_norm": 1.541311301467861, - "learning_rate": 1.1017271529172367e-07, - "loss": 1.1157, - "step": 6297 - }, - { - "epoch": 0.8537924489934251, - "grad_norm": 1.6164655522081692, - "learning_rate": 1.0997242356532333e-07, - "loss": 1.1349, - "step": 6298 - }, - { - "epoch": 0.8539280146410899, - "grad_norm": 1.4547456201867044, - "learning_rate": 1.0977230347371568e-07, - "loss": 1.0828, - "step": 6299 - }, - { - "epoch": 0.8540635802887548, - "grad_norm": 1.8223892030508726, - "learning_rate": 1.0957235505549233e-07, - "loss": 1.1244, - "step": 6300 - }, - { - "epoch": 0.8541991459364197, - "grad_norm": 1.9795860897177036, - "learning_rate": 1.0937257834921144e-07, - "loss": 1.1006, - "step": 6301 - }, - { - "epoch": 0.8543347115840846, - "grad_norm": 9.854992727714778, - "learning_rate": 1.0917297339339892e-07, - "loss": 1.1107, - "step": 6302 - }, - { - "epoch": 0.8544702772317495, - "grad_norm": 1.6417999665024943, - "learning_rate": 1.0897354022654648e-07, - "loss": 1.1046, - "step": 6303 - }, - { - "epoch": 0.8546058428794143, - "grad_norm": 1.9499446788634787, - "learning_rate": 1.0877427888711377e-07, - "loss": 1.0928, - "step": 6304 - }, - { - "epoch": 0.8547414085270792, - "grad_norm": 3.7487721970116734, - "learning_rate": 1.0857518941352605e-07, - "loss": 1.1524, - "step": 6305 - }, - { - "epoch": 0.8548769741747442, - "grad_norm": 1.7060023505437065, - "learning_rate": 1.0837627184417697e-07, - "loss": 1.1348, - "step": 6306 - }, - { - "epoch": 0.855012539822409, - "grad_norm": 3.220624508989493, - "learning_rate": 1.0817752621742537e-07, - "loss": 1.1125, - "step": 6307 - }, - { - "epoch": 0.8551481054700739, - "grad_norm": 3.253595108513089, - "learning_rate": 1.0797895257159872e-07, - "loss": 1.1694, - "step": 6308 - }, - { - "epoch": 0.8552836711177387, - "grad_norm": 1.548892536137069, - "learning_rate": 1.077805509449895e-07, - "loss": 1.1768, - "step": 6309 - }, - { - "epoch": 0.8554192367654037, - "grad_norm": 1.5148208002154036, - "learning_rate": 1.0758232137585854e-07, - "loss": 1.1047, - "step": 6310 - }, - { - "epoch": 0.8555548024130686, - "grad_norm": 1.7364217174041925, - "learning_rate": 1.073842639024325e-07, - "loss": 1.1174, - "step": 6311 - }, - { - "epoch": 0.8556903680607334, - "grad_norm": 1.4246133990243268, - "learning_rate": 1.0718637856290525e-07, - "loss": 1.1316, - "step": 6312 - }, - { - "epoch": 0.8558259337083983, - "grad_norm": 1.7146879491636184, - "learning_rate": 1.069886653954375e-07, - "loss": 1.1695, - "step": 6313 - }, - { - "epoch": 0.8559614993560631, - "grad_norm": 1.6907214126553551, - "learning_rate": 1.0679112443815652e-07, - "loss": 1.1251, - "step": 6314 - }, - { - "epoch": 0.8560970650037281, - "grad_norm": 1.5524538029749009, - "learning_rate": 1.0659375572915674e-07, - "loss": 1.1309, - "step": 6315 - }, - { - "epoch": 0.856232630651393, - "grad_norm": 2.191509423515223, - "learning_rate": 1.0639655930649894e-07, - "loss": 1.0771, - "step": 6316 - }, - { - "epoch": 0.8563681962990578, - "grad_norm": 3.090471286684315, - "learning_rate": 1.0619953520821112e-07, - "loss": 1.1625, - "step": 6317 - }, - { - "epoch": 0.8565037619467227, - "grad_norm": 1.8644628284636473, - "learning_rate": 1.0600268347228757e-07, - "loss": 1.1591, - "step": 6318 - }, - { - "epoch": 0.8566393275943875, - "grad_norm": 1.7284056918240733, - "learning_rate": 1.0580600413668983e-07, - "loss": 1.1316, - "step": 6319 - }, - { - "epoch": 0.8567748932420525, - "grad_norm": 1.9409456053437604, - "learning_rate": 1.0560949723934587e-07, - "loss": 1.1674, - "step": 6320 - }, - { - "epoch": 0.8569104588897174, - "grad_norm": 1.592536871114864, - "learning_rate": 1.0541316281815038e-07, - "loss": 1.1542, - "step": 6321 - }, - { - "epoch": 0.8570460245373822, - "grad_norm": 1.6339455814786095, - "learning_rate": 1.0521700091096508e-07, - "loss": 1.1136, - "step": 6322 - }, - { - "epoch": 0.8571815901850471, - "grad_norm": 1.778786289227493, - "learning_rate": 1.0502101155561816e-07, - "loss": 1.1326, - "step": 6323 - }, - { - "epoch": 0.857317155832712, - "grad_norm": 1.8168624935921425, - "learning_rate": 1.0482519478990481e-07, - "loss": 1.133, - "step": 6324 - }, - { - "epoch": 0.8574527214803769, - "grad_norm": 1.741366783806273, - "learning_rate": 1.0462955065158618e-07, - "loss": 1.1063, - "step": 6325 - }, - { - "epoch": 0.8575882871280418, - "grad_norm": 1.7821062835592214, - "learning_rate": 1.0443407917839141e-07, - "loss": 1.1074, - "step": 6326 - }, - { - "epoch": 0.8577238527757066, - "grad_norm": 1.6376777490056627, - "learning_rate": 1.0423878040801514e-07, - "loss": 1.1159, - "step": 6327 - }, - { - "epoch": 0.8578594184233715, - "grad_norm": 2.100545131929293, - "learning_rate": 1.0404365437811946e-07, - "loss": 1.0963, - "step": 6328 - }, - { - "epoch": 0.8579949840710364, - "grad_norm": 1.5965145181704496, - "learning_rate": 1.0384870112633271e-07, - "loss": 1.0871, - "step": 6329 - }, - { - "epoch": 0.8581305497187013, - "grad_norm": 1.638885476088325, - "learning_rate": 1.0365392069025014e-07, - "loss": 1.1306, - "step": 6330 - }, - { - "epoch": 0.8582661153663662, - "grad_norm": 1.4313764862985288, - "learning_rate": 1.034593131074336e-07, - "loss": 1.1079, - "step": 6331 - }, - { - "epoch": 0.858401681014031, - "grad_norm": 1.5111905717629663, - "learning_rate": 1.0326487841541176e-07, - "loss": 1.1328, - "step": 6332 - }, - { - "epoch": 0.858537246661696, - "grad_norm": 1.9422213098277183, - "learning_rate": 1.030706166516796e-07, - "loss": 1.1173, - "step": 6333 - }, - { - "epoch": 0.8586728123093608, - "grad_norm": 1.5494497591115421, - "learning_rate": 1.0287652785369916e-07, - "loss": 1.1783, - "step": 6334 - }, - { - "epoch": 0.8588083779570257, - "grad_norm": 1.572732219467658, - "learning_rate": 1.0268261205889894e-07, - "loss": 1.0715, - "step": 6335 - }, - { - "epoch": 0.8589439436046906, - "grad_norm": 1.464778192334496, - "learning_rate": 1.0248886930467393e-07, - "loss": 1.0864, - "step": 6336 - }, - { - "epoch": 0.8590795092523554, - "grad_norm": 1.5997813333873667, - "learning_rate": 1.022952996283859e-07, - "loss": 1.1138, - "step": 6337 - }, - { - "epoch": 0.8592150749000204, - "grad_norm": 1.735016614391985, - "learning_rate": 1.0210190306736333e-07, - "loss": 1.1045, - "step": 6338 - }, - { - "epoch": 0.8593506405476852, - "grad_norm": 1.5993598904790784, - "learning_rate": 1.0190867965890137e-07, - "loss": 1.1035, - "step": 6339 - }, - { - "epoch": 0.8594862061953501, - "grad_norm": 1.5750612626379088, - "learning_rate": 1.0171562944026102e-07, - "loss": 1.1198, - "step": 6340 - }, - { - "epoch": 0.859621771843015, - "grad_norm": 1.7343389159250835, - "learning_rate": 1.0152275244867137e-07, - "loss": 1.1496, - "step": 6341 - }, - { - "epoch": 0.8597573374906798, - "grad_norm": 1.666547425731249, - "learning_rate": 1.0133004872132623e-07, - "loss": 1.1291, - "step": 6342 - }, - { - "epoch": 0.8598929031383448, - "grad_norm": 1.611280149557868, - "learning_rate": 1.0113751829538808e-07, - "loss": 1.1483, - "step": 6343 - }, - { - "epoch": 0.8600284687860096, - "grad_norm": 2.9872102158826435, - "learning_rate": 1.009451612079838e-07, - "loss": 1.0874, - "step": 6344 - }, - { - "epoch": 0.8601640344336745, - "grad_norm": 1.6508402911678626, - "learning_rate": 1.0075297749620904e-07, - "loss": 1.1239, - "step": 6345 - }, - { - "epoch": 0.8602996000813394, - "grad_norm": 1.7200558705013373, - "learning_rate": 1.0056096719712382e-07, - "loss": 1.0982, - "step": 6346 - }, - { - "epoch": 0.8604351657290042, - "grad_norm": 1.7321216456667257, - "learning_rate": 1.0036913034775673e-07, - "loss": 1.1207, - "step": 6347 - }, - { - "epoch": 0.8605707313766692, - "grad_norm": 1.7213199002412487, - "learning_rate": 1.0017746698510122e-07, - "loss": 1.143, - "step": 6348 - }, - { - "epoch": 0.860706297024334, - "grad_norm": 5.1622274214134825, - "learning_rate": 9.998597714611889e-08, - "loss": 1.1293, - "step": 6349 - }, - { - "epoch": 0.8608418626719989, - "grad_norm": 1.4850919532836222, - "learning_rate": 9.979466086773614e-08, - "loss": 1.1327, - "step": 6350 - }, - { - "epoch": 0.8609774283196638, - "grad_norm": 2.851541919343992, - "learning_rate": 9.960351818684764e-08, - "loss": 1.0862, - "step": 6351 - }, - { - "epoch": 0.8611129939673287, - "grad_norm": 1.6864593373543244, - "learning_rate": 9.941254914031316e-08, - "loss": 1.1411, - "step": 6352 - }, - { - "epoch": 0.8612485596149936, - "grad_norm": 1.477035660514135, - "learning_rate": 9.922175376495979e-08, - "loss": 1.1021, - "step": 6353 - }, - { - "epoch": 0.8613841252626584, - "grad_norm": 1.9015682511360699, - "learning_rate": 9.903113209758096e-08, - "loss": 1.1565, - "step": 6354 - }, - { - "epoch": 0.8615196909103233, - "grad_norm": 1.5761767033644603, - "learning_rate": 9.88406841749364e-08, - "loss": 1.13, - "step": 6355 - }, - { - "epoch": 0.8616552565579882, - "grad_norm": 1.423066099577052, - "learning_rate": 9.865041003375263e-08, - "loss": 1.1318, - "step": 6356 - }, - { - "epoch": 0.8617908222056531, - "grad_norm": 1.5662259490509276, - "learning_rate": 9.846030971072239e-08, - "loss": 1.1275, - "step": 6357 - }, - { - "epoch": 0.861926387853318, - "grad_norm": 1.6547383993270737, - "learning_rate": 9.827038324250514e-08, - "loss": 1.1002, - "step": 6358 - }, - { - "epoch": 0.8620619535009828, - "grad_norm": 1.410535919626774, - "learning_rate": 9.80806306657267e-08, - "loss": 1.1377, - "step": 6359 - }, - { - "epoch": 0.8621975191486477, - "grad_norm": 1.4993788859521822, - "learning_rate": 9.789105201697923e-08, - "loss": 1.0864, - "step": 6360 - }, - { - "epoch": 0.8623330847963127, - "grad_norm": 3.776827733384264, - "learning_rate": 9.77016473328216e-08, - "loss": 1.1146, - "step": 6361 - }, - { - "epoch": 0.8624686504439775, - "grad_norm": 4.210679929612081, - "learning_rate": 9.751241664977927e-08, - "loss": 1.1558, - "step": 6362 - }, - { - "epoch": 0.8626042160916424, - "grad_norm": 2.214873095363268, - "learning_rate": 9.732336000434304e-08, - "loss": 1.149, - "step": 6363 - }, - { - "epoch": 0.8627397817393072, - "grad_norm": 1.5966397522088718, - "learning_rate": 9.713447743297198e-08, - "loss": 1.122, - "step": 6364 - }, - { - "epoch": 0.8628753473869721, - "grad_norm": 1.5767226426941463, - "learning_rate": 9.694576897208984e-08, - "loss": 1.1114, - "step": 6365 - }, - { - "epoch": 0.8630109130346371, - "grad_norm": 1.5075402504563102, - "learning_rate": 9.675723465808827e-08, - "loss": 1.1296, - "step": 6366 - }, - { - "epoch": 0.8631464786823019, - "grad_norm": 1.8732668935597623, - "learning_rate": 9.656887452732399e-08, - "loss": 1.1289, - "step": 6367 - }, - { - "epoch": 0.8632820443299668, - "grad_norm": 1.4234868383476011, - "learning_rate": 9.638068861612091e-08, - "loss": 1.0983, - "step": 6368 - }, - { - "epoch": 0.8634176099776316, - "grad_norm": 1.5193370913932285, - "learning_rate": 9.619267696076938e-08, - "loss": 1.0972, - "step": 6369 - }, - { - "epoch": 0.8635531756252965, - "grad_norm": 1.7949080854439359, - "learning_rate": 9.600483959752592e-08, - "loss": 1.1392, - "step": 6370 - }, - { - "epoch": 0.8636887412729615, - "grad_norm": 1.7846594073301987, - "learning_rate": 9.581717656261335e-08, - "loss": 1.1261, - "step": 6371 - }, - { - "epoch": 0.8638243069206263, - "grad_norm": 1.6292411271951053, - "learning_rate": 9.562968789222114e-08, - "loss": 1.1046, - "step": 6372 - }, - { - "epoch": 0.8639598725682912, - "grad_norm": 1.4561003212296701, - "learning_rate": 9.544237362250495e-08, - "loss": 1.1318, - "step": 6373 - }, - { - "epoch": 0.864095438215956, - "grad_norm": 1.6189768825916528, - "learning_rate": 9.525523378958688e-08, - "loss": 1.0715, - "step": 6374 - }, - { - "epoch": 0.864231003863621, - "grad_norm": 1.830106445433859, - "learning_rate": 9.50682684295554e-08, - "loss": 1.0843, - "step": 6375 - }, - { - "epoch": 0.8643665695112859, - "grad_norm": 1.6338487326753481, - "learning_rate": 9.488147757846521e-08, - "loss": 1.1031, - "step": 6376 - }, - { - "epoch": 0.8645021351589507, - "grad_norm": 1.5221827104693977, - "learning_rate": 9.46948612723375e-08, - "loss": 1.1286, - "step": 6377 - }, - { - "epoch": 0.8646377008066156, - "grad_norm": 1.6781266869043607, - "learning_rate": 9.450841954715971e-08, - "loss": 1.1519, - "step": 6378 - }, - { - "epoch": 0.8647732664542804, - "grad_norm": 1.5841083651714827, - "learning_rate": 9.432215243888575e-08, - "loss": 1.1138, - "step": 6379 - }, - { - "epoch": 0.8649088321019454, - "grad_norm": 1.580540196027783, - "learning_rate": 9.413605998343566e-08, - "loss": 1.1116, - "step": 6380 - }, - { - "epoch": 0.8650443977496103, - "grad_norm": 3.0152128575194985, - "learning_rate": 9.395014221669595e-08, - "loss": 1.1141, - "step": 6381 - }, - { - "epoch": 0.8651799633972751, - "grad_norm": 1.5937687110616348, - "learning_rate": 9.376439917451962e-08, - "loss": 1.1352, - "step": 6382 - }, - { - "epoch": 0.86531552904494, - "grad_norm": 1.96021769372135, - "learning_rate": 9.357883089272512e-08, - "loss": 1.1335, - "step": 6383 - }, - { - "epoch": 0.8654510946926048, - "grad_norm": 1.5463423149407587, - "learning_rate": 9.33934374070986e-08, - "loss": 1.1218, - "step": 6384 - }, - { - "epoch": 0.8655866603402698, - "grad_norm": 1.438391069747751, - "learning_rate": 9.320821875339091e-08, - "loss": 1.1109, - "step": 6385 - }, - { - "epoch": 0.8657222259879347, - "grad_norm": 4.094998112037752, - "learning_rate": 9.302317496732092e-08, - "loss": 1.1054, - "step": 6386 - }, - { - "epoch": 0.8658577916355995, - "grad_norm": 1.6774747804731347, - "learning_rate": 9.283830608457199e-08, - "loss": 1.116, - "step": 6387 - }, - { - "epoch": 0.8659933572832644, - "grad_norm": 1.6783253083081413, - "learning_rate": 9.265361214079548e-08, - "loss": 1.1463, - "step": 6388 - }, - { - "epoch": 0.8661289229309294, - "grad_norm": 1.6682801119119148, - "learning_rate": 9.246909317160744e-08, - "loss": 1.0983, - "step": 6389 - }, - { - "epoch": 0.8662644885785942, - "grad_norm": 1.6756325198676987, - "learning_rate": 9.228474921259121e-08, - "loss": 1.1543, - "step": 6390 - }, - { - "epoch": 0.8664000542262591, - "grad_norm": 1.8692533097077568, - "learning_rate": 9.210058029929602e-08, - "loss": 1.1294, - "step": 6391 - }, - { - "epoch": 0.8665356198739239, - "grad_norm": 1.879706266890187, - "learning_rate": 9.191658646723732e-08, - "loss": 1.1369, - "step": 6392 - }, - { - "epoch": 0.8666711855215888, - "grad_norm": 1.6351263282239437, - "learning_rate": 9.173276775189709e-08, - "loss": 1.1068, - "step": 6393 - }, - { - "epoch": 0.8668067511692538, - "grad_norm": 1.9559564931421691, - "learning_rate": 9.154912418872306e-08, - "loss": 1.1567, - "step": 6394 - }, - { - "epoch": 0.8669423168169186, - "grad_norm": 1.573599554712936, - "learning_rate": 9.136565581312961e-08, - "loss": 1.1184, - "step": 6395 - }, - { - "epoch": 0.8670778824645835, - "grad_norm": 1.5640936371708176, - "learning_rate": 9.118236266049705e-08, - "loss": 1.1109, - "step": 6396 - }, - { - "epoch": 0.8672134481122483, - "grad_norm": 1.514935764416863, - "learning_rate": 9.099924476617216e-08, - "loss": 1.0976, - "step": 6397 - }, - { - "epoch": 0.8673490137599132, - "grad_norm": 1.7377552574703037, - "learning_rate": 9.081630216546766e-08, - "loss": 1.1437, - "step": 6398 - }, - { - "epoch": 0.8674845794075782, - "grad_norm": 1.6797392733200085, - "learning_rate": 9.063353489366287e-08, - "loss": 1.1282, - "step": 6399 - }, - { - "epoch": 0.867620145055243, - "grad_norm": 1.6035725593940091, - "learning_rate": 9.045094298600232e-08, - "loss": 1.1495, - "step": 6400 - }, - { - "epoch": 0.8677557107029079, - "grad_norm": 2.145096031680423, - "learning_rate": 9.026852647769822e-08, - "loss": 1.1174, - "step": 6401 - }, - { - "epoch": 0.8678912763505727, - "grad_norm": 1.750686266794915, - "learning_rate": 9.008628540392749e-08, - "loss": 1.1135, - "step": 6402 - }, - { - "epoch": 0.8680268419982377, - "grad_norm": 1.6447062268026524, - "learning_rate": 8.990421979983465e-08, - "loss": 1.1407, - "step": 6403 - }, - { - "epoch": 0.8681624076459026, - "grad_norm": 2.2314175403362553, - "learning_rate": 8.972232970052873e-08, - "loss": 1.1089, - "step": 6404 - }, - { - "epoch": 0.8682979732935674, - "grad_norm": 1.7549058701411144, - "learning_rate": 8.954061514108657e-08, - "loss": 1.1472, - "step": 6405 - }, - { - "epoch": 0.8684335389412323, - "grad_norm": 1.7862171186578535, - "learning_rate": 8.93590761565497e-08, - "loss": 1.1347, - "step": 6406 - }, - { - "epoch": 0.8685691045888971, - "grad_norm": 1.5521060523500179, - "learning_rate": 8.917771278192709e-08, - "loss": 1.1086, - "step": 6407 - }, - { - "epoch": 0.8687046702365621, - "grad_norm": 2.2313625583718655, - "learning_rate": 8.899652505219279e-08, - "loss": 1.0601, - "step": 6408 - }, - { - "epoch": 0.868840235884227, - "grad_norm": 1.4838439351390538, - "learning_rate": 8.881551300228785e-08, - "loss": 1.1106, - "step": 6409 - }, - { - "epoch": 0.8689758015318918, - "grad_norm": 3.573960044210363, - "learning_rate": 8.863467666711865e-08, - "loss": 1.1148, - "step": 6410 - }, - { - "epoch": 0.8691113671795567, - "grad_norm": 1.5980857017045205, - "learning_rate": 8.845401608155822e-08, - "loss": 1.1292, - "step": 6411 - }, - { - "epoch": 0.8692469328272215, - "grad_norm": 1.5224713914577301, - "learning_rate": 8.827353128044535e-08, - "loss": 1.1382, - "step": 6412 - }, - { - "epoch": 0.8693824984748865, - "grad_norm": 1.629330009654892, - "learning_rate": 8.809322229858529e-08, - "loss": 1.111, - "step": 6413 - }, - { - "epoch": 0.8695180641225514, - "grad_norm": 2.3427064849808494, - "learning_rate": 8.791308917074925e-08, - "loss": 1.1287, - "step": 6414 - }, - { - "epoch": 0.8696536297702162, - "grad_norm": 1.6492587836816042, - "learning_rate": 8.773313193167431e-08, - "loss": 1.0701, - "step": 6415 - }, - { - "epoch": 0.8697891954178811, - "grad_norm": 1.5544271734385113, - "learning_rate": 8.755335061606383e-08, - "loss": 1.0729, - "step": 6416 - }, - { - "epoch": 0.869924761065546, - "grad_norm": 1.4916959341582237, - "learning_rate": 8.737374525858743e-08, - "loss": 1.1275, - "step": 6417 - }, - { - "epoch": 0.8700603267132109, - "grad_norm": 1.4423988466854738, - "learning_rate": 8.719431589388026e-08, - "loss": 1.0688, - "step": 6418 - }, - { - "epoch": 0.8701958923608758, - "grad_norm": 5.710962023922603, - "learning_rate": 8.701506255654411e-08, - "loss": 1.1412, - "step": 6419 - }, - { - "epoch": 0.8703314580085406, - "grad_norm": 1.5918331460336514, - "learning_rate": 8.683598528114644e-08, - "loss": 1.1074, - "step": 6420 - }, - { - "epoch": 0.8704670236562055, - "grad_norm": 2.1056262196579207, - "learning_rate": 8.665708410222095e-08, - "loss": 1.1172, - "step": 6421 - }, - { - "epoch": 0.8706025893038704, - "grad_norm": 2.092574407273337, - "learning_rate": 8.647835905426726e-08, - "loss": 1.1233, - "step": 6422 - }, - { - "epoch": 0.8707381549515353, - "grad_norm": 1.6715835349998467, - "learning_rate": 8.629981017175136e-08, - "loss": 1.1316, - "step": 6423 - }, - { - "epoch": 0.8708737205992002, - "grad_norm": 4.2751696839046405, - "learning_rate": 8.61214374891045e-08, - "loss": 1.1755, - "step": 6424 - }, - { - "epoch": 0.871009286246865, - "grad_norm": 1.918725093381928, - "learning_rate": 8.59432410407248e-08, - "loss": 1.1675, - "step": 6425 - }, - { - "epoch": 0.87114485189453, - "grad_norm": 1.7213970631259639, - "learning_rate": 8.576522086097593e-08, - "loss": 1.1208, - "step": 6426 - }, - { - "epoch": 0.8712804175421948, - "grad_norm": 1.8577495992291138, - "learning_rate": 8.55873769841876e-08, - "loss": 1.103, - "step": 6427 - }, - { - "epoch": 0.8714159831898597, - "grad_norm": 2.076637594476322, - "learning_rate": 8.540970944465575e-08, - "loss": 1.1214, - "step": 6428 - }, - { - "epoch": 0.8715515488375246, - "grad_norm": 1.6873613287343516, - "learning_rate": 8.523221827664206e-08, - "loss": 1.1126, - "step": 6429 - }, - { - "epoch": 0.8716871144851894, - "grad_norm": 1.7031558855161, - "learning_rate": 8.505490351437438e-08, - "loss": 1.1177, - "step": 6430 - }, - { - "epoch": 0.8718226801328544, - "grad_norm": 1.4959242177444296, - "learning_rate": 8.487776519204637e-08, - "loss": 1.0455, - "step": 6431 - }, - { - "epoch": 0.8719582457805192, - "grad_norm": 2.1308277993139657, - "learning_rate": 8.470080334381791e-08, - "loss": 1.1049, - "step": 6432 - }, - { - "epoch": 0.8720938114281841, - "grad_norm": 1.602873683766397, - "learning_rate": 8.452401800381448e-08, - "loss": 1.1347, - "step": 6433 - }, - { - "epoch": 0.872229377075849, - "grad_norm": 1.8421494433433585, - "learning_rate": 8.434740920612792e-08, - "loss": 1.0806, - "step": 6434 - }, - { - "epoch": 0.8723649427235138, - "grad_norm": 1.6191515164072836, - "learning_rate": 8.417097698481568e-08, - "loss": 1.114, - "step": 6435 - }, - { - "epoch": 0.8725005083711788, - "grad_norm": 1.5941849031294164, - "learning_rate": 8.399472137390152e-08, - "loss": 1.1216, - "step": 6436 - }, - { - "epoch": 0.8726360740188436, - "grad_norm": 2.502001203756435, - "learning_rate": 8.38186424073748e-08, - "loss": 1.1147, - "step": 6437 - }, - { - "epoch": 0.8727716396665085, - "grad_norm": 1.915386758991212, - "learning_rate": 8.364274011919114e-08, - "loss": 1.128, - "step": 6438 - }, - { - "epoch": 0.8729072053141734, - "grad_norm": 1.8675064571032283, - "learning_rate": 8.346701454327143e-08, - "loss": 1.1607, - "step": 6439 - }, - { - "epoch": 0.8730427709618382, - "grad_norm": 2.4419980812618816, - "learning_rate": 8.329146571350365e-08, - "loss": 1.1142, - "step": 6440 - }, - { - "epoch": 0.8731783366095032, - "grad_norm": 2.45899924666329, - "learning_rate": 8.311609366374028e-08, - "loss": 1.1517, - "step": 6441 - }, - { - "epoch": 0.873313902257168, - "grad_norm": 1.5104144741157344, - "learning_rate": 8.294089842780117e-08, - "loss": 1.1141, - "step": 6442 - }, - { - "epoch": 0.8734494679048329, - "grad_norm": 2.0206014286794582, - "learning_rate": 8.27658800394706e-08, - "loss": 1.0729, - "step": 6443 - }, - { - "epoch": 0.8735850335524978, - "grad_norm": 1.851641068563042, - "learning_rate": 8.259103853250027e-08, - "loss": 1.105, - "step": 6444 - }, - { - "epoch": 0.8737205992001627, - "grad_norm": 2.0836688494138325, - "learning_rate": 8.241637394060619e-08, - "loss": 1.1083, - "step": 6445 - }, - { - "epoch": 0.8738561648478276, - "grad_norm": 1.5127354912276167, - "learning_rate": 8.224188629747175e-08, - "loss": 1.0714, - "step": 6446 - }, - { - "epoch": 0.8739917304954924, - "grad_norm": 1.4759554596680053, - "learning_rate": 8.206757563674493e-08, - "loss": 1.1325, - "step": 6447 - }, - { - "epoch": 0.8741272961431573, - "grad_norm": 2.055183879963849, - "learning_rate": 8.189344199204073e-08, - "loss": 1.1479, - "step": 6448 - }, - { - "epoch": 0.8742628617908222, - "grad_norm": 1.531415587968917, - "learning_rate": 8.171948539693874e-08, - "loss": 1.1227, - "step": 6449 - }, - { - "epoch": 0.8743984274384871, - "grad_norm": 1.508478933850404, - "learning_rate": 8.154570588498599e-08, - "loss": 1.1192, - "step": 6450 - }, - { - "epoch": 0.874533993086152, - "grad_norm": 1.4352222907172287, - "learning_rate": 8.13721034896938e-08, - "loss": 1.1013, - "step": 6451 - }, - { - "epoch": 0.8746695587338168, - "grad_norm": 1.757337057814878, - "learning_rate": 8.119867824454018e-08, - "loss": 1.141, - "step": 6452 - }, - { - "epoch": 0.8748051243814817, - "grad_norm": 1.6423164595715511, - "learning_rate": 8.102543018296892e-08, - "loss": 1.1298, - "step": 6453 - }, - { - "epoch": 0.8749406900291467, - "grad_norm": 1.4762119510269074, - "learning_rate": 8.085235933838952e-08, - "loss": 1.0799, - "step": 6454 - }, - { - "epoch": 0.8750762556768115, - "grad_norm": 1.784798123628636, - "learning_rate": 8.067946574417739e-08, - "loss": 1.0858, - "step": 6455 - }, - { - "epoch": 0.8752118213244764, - "grad_norm": 2.0299916851958306, - "learning_rate": 8.050674943367352e-08, - "loss": 1.148, - "step": 6456 - }, - { - "epoch": 0.8753473869721412, - "grad_norm": 1.9293749139802183, - "learning_rate": 8.033421044018496e-08, - "loss": 1.1312, - "step": 6457 - }, - { - "epoch": 0.8754829526198061, - "grad_norm": 1.9635102917176244, - "learning_rate": 8.016184879698462e-08, - "loss": 1.1282, - "step": 6458 - }, - { - "epoch": 0.8756185182674711, - "grad_norm": 2.57172511487264, - "learning_rate": 7.998966453731093e-08, - "loss": 1.1295, - "step": 6459 - }, - { - "epoch": 0.8757540839151359, - "grad_norm": 1.7037032192283534, - "learning_rate": 7.981765769436833e-08, - "loss": 1.1244, - "step": 6460 - }, - { - "epoch": 0.8758896495628008, - "grad_norm": 1.7591214450534252, - "learning_rate": 7.964582830132704e-08, - "loss": 1.1157, - "step": 6461 - }, - { - "epoch": 0.8760252152104656, - "grad_norm": 1.8587698934734926, - "learning_rate": 7.94741763913227e-08, - "loss": 1.1586, - "step": 6462 - }, - { - "epoch": 0.8761607808581305, - "grad_norm": 1.7277271583504699, - "learning_rate": 7.930270199745748e-08, - "loss": 1.1252, - "step": 6463 - }, - { - "epoch": 0.8762963465057955, - "grad_norm": 1.7569150908258437, - "learning_rate": 7.913140515279837e-08, - "loss": 1.1128, - "step": 6464 - }, - { - "epoch": 0.8764319121534603, - "grad_norm": 1.7546395359769382, - "learning_rate": 7.896028589037929e-08, - "loss": 1.1359, - "step": 6465 - }, - { - "epoch": 0.8765674778011252, - "grad_norm": 1.9260725947377775, - "learning_rate": 7.87893442431985e-08, - "loss": 1.1409, - "step": 6466 - }, - { - "epoch": 0.8767030434487901, - "grad_norm": 1.5367032416373057, - "learning_rate": 7.86185802442212e-08, - "loss": 1.1171, - "step": 6467 - }, - { - "epoch": 0.876838609096455, - "grad_norm": 1.5652829522011904, - "learning_rate": 7.844799392637769e-08, - "loss": 1.1669, - "step": 6468 - }, - { - "epoch": 0.8769741747441199, - "grad_norm": 1.486704396032805, - "learning_rate": 7.827758532256435e-08, - "loss": 1.1087, - "step": 6469 - }, - { - "epoch": 0.8771097403917847, - "grad_norm": 2.054500893125875, - "learning_rate": 7.810735446564298e-08, - "loss": 1.1527, - "step": 6470 - }, - { - "epoch": 0.8772453060394496, - "grad_norm": 1.541235954778072, - "learning_rate": 7.793730138844134e-08, - "loss": 1.1374, - "step": 6471 - }, - { - "epoch": 0.8773808716871145, - "grad_norm": 2.078400558691309, - "learning_rate": 7.776742612375275e-08, - "loss": 1.1124, - "step": 6472 - }, - { - "epoch": 0.8775164373347794, - "grad_norm": 2.2751521317907795, - "learning_rate": 7.759772870433645e-08, - "loss": 1.1042, - "step": 6473 - }, - { - "epoch": 0.8776520029824443, - "grad_norm": 1.8472985746733446, - "learning_rate": 7.742820916291714e-08, - "loss": 1.1466, - "step": 6474 - }, - { - "epoch": 0.8777875686301091, - "grad_norm": 1.6454572414804935, - "learning_rate": 7.725886753218536e-08, - "loss": 1.1141, - "step": 6475 - }, - { - "epoch": 0.877923134277774, - "grad_norm": 1.487219573205965, - "learning_rate": 7.708970384479729e-08, - "loss": 1.0662, - "step": 6476 - }, - { - "epoch": 0.878058699925439, - "grad_norm": 1.953113331059195, - "learning_rate": 7.692071813337487e-08, - "loss": 1.1624, - "step": 6477 - }, - { - "epoch": 0.8781942655731038, - "grad_norm": 1.9945047284082038, - "learning_rate": 7.675191043050556e-08, - "loss": 1.1258, - "step": 6478 - }, - { - "epoch": 0.8783298312207687, - "grad_norm": 1.8376426812159854, - "learning_rate": 7.658328076874287e-08, - "loss": 1.0785, - "step": 6479 - }, - { - "epoch": 0.8784653968684335, - "grad_norm": 2.1485387245678584, - "learning_rate": 7.641482918060504e-08, - "loss": 1.1757, - "step": 6480 - }, - { - "epoch": 0.8786009625160984, - "grad_norm": 1.7398993968427678, - "learning_rate": 7.624655569857751e-08, - "loss": 1.0804, - "step": 6481 - }, - { - "epoch": 0.8787365281637634, - "grad_norm": 1.7653803715019276, - "learning_rate": 7.607846035510957e-08, - "loss": 1.1424, - "step": 6482 - }, - { - "epoch": 0.8788720938114282, - "grad_norm": 1.7571064217045664, - "learning_rate": 7.591054318261802e-08, - "loss": 1.163, - "step": 6483 - }, - { - "epoch": 0.8790076594590931, - "grad_norm": 3.6065209650070376, - "learning_rate": 7.574280421348356e-08, - "loss": 1.147, - "step": 6484 - }, - { - "epoch": 0.8791432251067579, - "grad_norm": 1.687405615165891, - "learning_rate": 7.557524348005395e-08, - "loss": 1.0979, - "step": 6485 - }, - { - "epoch": 0.8792787907544228, - "grad_norm": 1.8453909985956207, - "learning_rate": 7.540786101464136e-08, - "loss": 1.138, - "step": 6486 - }, - { - "epoch": 0.8794143564020878, - "grad_norm": 1.6353479209270256, - "learning_rate": 7.524065684952475e-08, - "loss": 1.1374, - "step": 6487 - }, - { - "epoch": 0.8795499220497526, - "grad_norm": 1.609596729728245, - "learning_rate": 7.507363101694775e-08, - "loss": 1.1187, - "step": 6488 - }, - { - "epoch": 0.8796854876974175, - "grad_norm": 2.228272856350824, - "learning_rate": 7.490678354912006e-08, - "loss": 1.164, - "step": 6489 - }, - { - "epoch": 0.8798210533450823, - "grad_norm": 2.0278306692436074, - "learning_rate": 7.474011447821704e-08, - "loss": 1.1168, - "step": 6490 - }, - { - "epoch": 0.8799566189927472, - "grad_norm": 4.581796471996039, - "learning_rate": 7.457362383637922e-08, - "loss": 1.0944, - "step": 6491 - }, - { - "epoch": 0.8800921846404122, - "grad_norm": 2.0269814257038687, - "learning_rate": 7.440731165571323e-08, - "loss": 1.12, - "step": 6492 - }, - { - "epoch": 0.880227750288077, - "grad_norm": 1.6052003278999012, - "learning_rate": 7.42411779682911e-08, - "loss": 1.1232, - "step": 6493 - }, - { - "epoch": 0.8803633159357419, - "grad_norm": 1.800211209198717, - "learning_rate": 7.407522280615019e-08, - "loss": 1.1177, - "step": 6494 - }, - { - "epoch": 0.8804988815834067, - "grad_norm": 1.4903145761544596, - "learning_rate": 7.39094462012938e-08, - "loss": 1.177, - "step": 6495 - }, - { - "epoch": 0.8806344472310716, - "grad_norm": 1.5975918659646278, - "learning_rate": 7.374384818569069e-08, - "loss": 1.1341, - "step": 6496 - }, - { - "epoch": 0.8807700128787366, - "grad_norm": 1.697869456508576, - "learning_rate": 7.357842879127474e-08, - "loss": 1.135, - "step": 6497 - }, - { - "epoch": 0.8809055785264014, - "grad_norm": 1.4625922143842016, - "learning_rate": 7.341318804994645e-08, - "loss": 1.1435, - "step": 6498 - }, - { - "epoch": 0.8810411441740663, - "grad_norm": 1.9808707770665517, - "learning_rate": 7.324812599357044e-08, - "loss": 1.0952, - "step": 6499 - }, - { - "epoch": 0.8811767098217311, - "grad_norm": 1.5403613043533388, - "learning_rate": 7.308324265397836e-08, - "loss": 1.1285, - "step": 6500 - }, - { - "epoch": 0.8813122754693961, - "grad_norm": 1.5294566700935053, - "learning_rate": 7.291853806296599e-08, - "loss": 1.1211, - "step": 6501 - }, - { - "epoch": 0.881447841117061, - "grad_norm": 1.7874321136532343, - "learning_rate": 7.275401225229583e-08, - "loss": 1.1163, - "step": 6502 - }, - { - "epoch": 0.8815834067647258, - "grad_norm": 4.6034592336414875, - "learning_rate": 7.258966525369492e-08, - "loss": 1.1388, - "step": 6503 - }, - { - "epoch": 0.8817189724123907, - "grad_norm": 1.5412881404295358, - "learning_rate": 7.242549709885693e-08, - "loss": 1.0738, - "step": 6504 - }, - { - "epoch": 0.8818545380600555, - "grad_norm": 1.570243090303494, - "learning_rate": 7.226150781943963e-08, - "loss": 1.0983, - "step": 6505 - }, - { - "epoch": 0.8819901037077205, - "grad_norm": 1.5524187396503986, - "learning_rate": 7.209769744706772e-08, - "loss": 1.0826, - "step": 6506 - }, - { - "epoch": 0.8821256693553854, - "grad_norm": 7.3066240006435645, - "learning_rate": 7.193406601333018e-08, - "loss": 1.1382, - "step": 6507 - }, - { - "epoch": 0.8822612350030502, - "grad_norm": 1.4795508860470992, - "learning_rate": 7.177061354978242e-08, - "loss": 1.1217, - "step": 6508 - }, - { - "epoch": 0.8823968006507151, - "grad_norm": 1.6846359798952926, - "learning_rate": 7.160734008794489e-08, - "loss": 1.1287, - "step": 6509 - }, - { - "epoch": 0.8825323662983799, - "grad_norm": 2.1559656470099897, - "learning_rate": 7.144424565930341e-08, - "loss": 1.1197, - "step": 6510 - }, - { - "epoch": 0.8826679319460449, - "grad_norm": 1.4599551189147213, - "learning_rate": 7.128133029530969e-08, - "loss": 1.0911, - "step": 6511 - }, - { - "epoch": 0.8828034975937098, - "grad_norm": 1.681446592917456, - "learning_rate": 7.111859402738052e-08, - "loss": 1.1397, - "step": 6512 - }, - { - "epoch": 0.8829390632413746, - "grad_norm": 1.342960545840645, - "learning_rate": 7.095603688689833e-08, - "loss": 1.0966, - "step": 6513 - }, - { - "epoch": 0.8830746288890395, - "grad_norm": 1.6262350766870117, - "learning_rate": 7.079365890521106e-08, - "loss": 1.104, - "step": 6514 - }, - { - "epoch": 0.8832101945367044, - "grad_norm": 1.7038080605770212, - "learning_rate": 7.063146011363186e-08, - "loss": 1.0894, - "step": 6515 - }, - { - "epoch": 0.8833457601843693, - "grad_norm": 2.1229868327639574, - "learning_rate": 7.046944054343961e-08, - "loss": 1.1436, - "step": 6516 - }, - { - "epoch": 0.8834813258320342, - "grad_norm": 1.6878251522268506, - "learning_rate": 7.030760022587856e-08, - "loss": 1.1467, - "step": 6517 - }, - { - "epoch": 0.883616891479699, - "grad_norm": 1.5264837565387395, - "learning_rate": 7.014593919215816e-08, - "loss": 1.1151, - "step": 6518 - }, - { - "epoch": 0.8837524571273639, - "grad_norm": 2.2838925219273913, - "learning_rate": 6.998445747345371e-08, - "loss": 1.1093, - "step": 6519 - }, - { - "epoch": 0.8838880227750288, - "grad_norm": 1.9093989734358912, - "learning_rate": 6.982315510090542e-08, - "loss": 1.1489, - "step": 6520 - }, - { - "epoch": 0.8840235884226937, - "grad_norm": 1.6044414148355972, - "learning_rate": 6.966203210561927e-08, - "loss": 1.0994, - "step": 6521 - }, - { - "epoch": 0.8841591540703586, - "grad_norm": 1.5606526390632829, - "learning_rate": 6.950108851866687e-08, - "loss": 1.1143, - "step": 6522 - }, - { - "epoch": 0.8842947197180234, - "grad_norm": 3.336924579399035, - "learning_rate": 6.934032437108439e-08, - "loss": 1.1371, - "step": 6523 - }, - { - "epoch": 0.8844302853656884, - "grad_norm": 2.774352303578399, - "learning_rate": 6.917973969387424e-08, - "loss": 1.1229, - "step": 6524 - }, - { - "epoch": 0.8845658510133532, - "grad_norm": 1.6234063372099086, - "learning_rate": 6.901933451800379e-08, - "loss": 1.1357, - "step": 6525 - }, - { - "epoch": 0.8847014166610181, - "grad_norm": 1.9393489893581717, - "learning_rate": 6.885910887440593e-08, - "loss": 1.1225, - "step": 6526 - }, - { - "epoch": 0.884836982308683, - "grad_norm": 1.4282132534972312, - "learning_rate": 6.869906279397897e-08, - "loss": 1.13, - "step": 6527 - }, - { - "epoch": 0.8849725479563478, - "grad_norm": 2.1060820606793373, - "learning_rate": 6.853919630758653e-08, - "loss": 1.142, - "step": 6528 - }, - { - "epoch": 0.8851081136040128, - "grad_norm": 1.6377494986904886, - "learning_rate": 6.837950944605763e-08, - "loss": 1.1013, - "step": 6529 - }, - { - "epoch": 0.8852436792516776, - "grad_norm": 1.8053029112290662, - "learning_rate": 6.822000224018653e-08, - "loss": 1.1043, - "step": 6530 - }, - { - "epoch": 0.8853792448993425, - "grad_norm": 2.388735035333081, - "learning_rate": 6.806067472073296e-08, - "loss": 1.1391, - "step": 6531 - }, - { - "epoch": 0.8855148105470074, - "grad_norm": 1.4558845823017883, - "learning_rate": 6.790152691842199e-08, - "loss": 1.1505, - "step": 6532 - }, - { - "epoch": 0.8856503761946722, - "grad_norm": 2.681199560891195, - "learning_rate": 6.774255886394397e-08, - "loss": 1.09, - "step": 6533 - }, - { - "epoch": 0.8857859418423372, - "grad_norm": 1.7982764247684924, - "learning_rate": 6.758377058795473e-08, - "loss": 1.1131, - "step": 6534 - }, - { - "epoch": 0.885921507490002, - "grad_norm": 1.510101997961337, - "learning_rate": 6.742516212107541e-08, - "loss": 1.1486, - "step": 6535 - }, - { - "epoch": 0.8860570731376669, - "grad_norm": 1.639166930198824, - "learning_rate": 6.726673349389201e-08, - "loss": 1.1461, - "step": 6536 - }, - { - "epoch": 0.8861926387853318, - "grad_norm": 4.790695385523879, - "learning_rate": 6.710848473695674e-08, - "loss": 1.1212, - "step": 6537 - }, - { - "epoch": 0.8863282044329966, - "grad_norm": 1.9082686754712395, - "learning_rate": 6.69504158807862e-08, - "loss": 1.1172, - "step": 6538 - }, - { - "epoch": 0.8864637700806616, - "grad_norm": 1.507042864579074, - "learning_rate": 6.679252695586312e-08, - "loss": 1.1326, - "step": 6539 - }, - { - "epoch": 0.8865993357283264, - "grad_norm": 1.510492195528489, - "learning_rate": 6.663481799263471e-08, - "loss": 1.0594, - "step": 6540 - }, - { - "epoch": 0.8867349013759913, - "grad_norm": 1.979015304287678, - "learning_rate": 6.647728902151428e-08, - "loss": 1.1141, - "step": 6541 - }, - { - "epoch": 0.8868704670236562, - "grad_norm": 1.5034532346119918, - "learning_rate": 6.631994007287966e-08, - "loss": 1.1256, - "step": 6542 - }, - { - "epoch": 0.887006032671321, - "grad_norm": 1.5552629058632617, - "learning_rate": 6.616277117707492e-08, - "loss": 1.1213, - "step": 6543 - }, - { - "epoch": 0.887141598318986, - "grad_norm": 1.8636455410051425, - "learning_rate": 6.600578236440812e-08, - "loss": 1.2007, - "step": 6544 - }, - { - "epoch": 0.8872771639666509, - "grad_norm": 1.824396659317219, - "learning_rate": 6.584897366515407e-08, - "loss": 1.1178, - "step": 6545 - }, - { - "epoch": 0.8874127296143157, - "grad_norm": 1.6705718443506907, - "learning_rate": 6.569234510955135e-08, - "loss": 1.1462, - "step": 6546 - }, - { - "epoch": 0.8875482952619806, - "grad_norm": 2.854156675551722, - "learning_rate": 6.553589672780524e-08, - "loss": 1.1232, - "step": 6547 - }, - { - "epoch": 0.8876838609096455, - "grad_norm": 1.782727800709161, - "learning_rate": 6.537962855008483e-08, - "loss": 1.1157, - "step": 6548 - }, - { - "epoch": 0.8878194265573104, - "grad_norm": 1.599482694358705, - "learning_rate": 6.522354060652602e-08, - "loss": 1.1315, - "step": 6549 - }, - { - "epoch": 0.8879549922049753, - "grad_norm": 1.5411346861761548, - "learning_rate": 6.50676329272285e-08, - "loss": 1.0893, - "step": 6550 - }, - { - "epoch": 0.8880905578526401, - "grad_norm": 1.885904126821212, - "learning_rate": 6.491190554225811e-08, - "loss": 1.0705, - "step": 6551 - }, - { - "epoch": 0.888226123500305, - "grad_norm": 1.5956734551968133, - "learning_rate": 6.475635848164562e-08, - "loss": 1.1128, - "step": 6552 - }, - { - "epoch": 0.8883616891479699, - "grad_norm": 1.9333745970363465, - "learning_rate": 6.460099177538703e-08, - "loss": 1.1318, - "step": 6553 - }, - { - "epoch": 0.8884972547956348, - "grad_norm": 1.5150821855116756, - "learning_rate": 6.444580545344358e-08, - "loss": 1.1119, - "step": 6554 - }, - { - "epoch": 0.8886328204432997, - "grad_norm": 2.2451059033610665, - "learning_rate": 6.429079954574168e-08, - "loss": 1.1169, - "step": 6555 - }, - { - "epoch": 0.8887683860909645, - "grad_norm": 1.8674666684941905, - "learning_rate": 6.413597408217309e-08, - "loss": 1.1223, - "step": 6556 - }, - { - "epoch": 0.8889039517386295, - "grad_norm": 1.7477754815929119, - "learning_rate": 6.398132909259457e-08, - "loss": 1.0745, - "step": 6557 - }, - { - "epoch": 0.8890395173862943, - "grad_norm": 1.606673370223641, - "learning_rate": 6.382686460682851e-08, - "loss": 1.1229, - "step": 6558 - }, - { - "epoch": 0.8891750830339592, - "grad_norm": 1.8129028718790536, - "learning_rate": 6.367258065466152e-08, - "loss": 1.121, - "step": 6559 - }, - { - "epoch": 0.8893106486816241, - "grad_norm": 1.6807824816579051, - "learning_rate": 6.35184772658468e-08, - "loss": 1.0931, - "step": 6560 - }, - { - "epoch": 0.8894462143292889, - "grad_norm": 4.200766277821165, - "learning_rate": 6.336455447010126e-08, - "loss": 1.1437, - "step": 6561 - }, - { - "epoch": 0.8895817799769539, - "grad_norm": 2.5245472176127493, - "learning_rate": 6.321081229710834e-08, - "loss": 1.1237, - "step": 6562 - }, - { - "epoch": 0.8897173456246187, - "grad_norm": 1.5748112443076114, - "learning_rate": 6.305725077651558e-08, - "loss": 1.1316, - "step": 6563 - }, - { - "epoch": 0.8898529112722836, - "grad_norm": 2.229825331387252, - "learning_rate": 6.290386993793617e-08, - "loss": 1.1055, - "step": 6564 - }, - { - "epoch": 0.8899884769199485, - "grad_norm": 12.754560782925731, - "learning_rate": 6.275066981094857e-08, - "loss": 1.1146, - "step": 6565 - }, - { - "epoch": 0.8901240425676133, - "grad_norm": 2.2862690206419094, - "learning_rate": 6.259765042509602e-08, - "loss": 1.1116, - "step": 6566 - }, - { - "epoch": 0.8902596082152783, - "grad_norm": 2.2029974214789467, - "learning_rate": 6.244481180988714e-08, - "loss": 1.108, - "step": 6567 - }, - { - "epoch": 0.8903951738629431, - "grad_norm": 1.630833353797896, - "learning_rate": 6.229215399479582e-08, - "loss": 1.139, - "step": 6568 - }, - { - "epoch": 0.890530739510608, - "grad_norm": 2.5594386025708165, - "learning_rate": 6.213967700926071e-08, - "loss": 1.1161, - "step": 6569 - }, - { - "epoch": 0.8906663051582729, - "grad_norm": 1.4683875385139797, - "learning_rate": 6.198738088268585e-08, - "loss": 1.1269, - "step": 6570 - }, - { - "epoch": 0.8908018708059378, - "grad_norm": 1.5196703561213696, - "learning_rate": 6.183526564444042e-08, - "loss": 1.1335, - "step": 6571 - }, - { - "epoch": 0.8909374364536027, - "grad_norm": 1.5780416222417675, - "learning_rate": 6.16833313238585e-08, - "loss": 1.1077, - "step": 6572 - }, - { - "epoch": 0.8910730021012675, - "grad_norm": 1.5593170715861027, - "learning_rate": 6.153157795023956e-08, - "loss": 1.1556, - "step": 6573 - }, - { - "epoch": 0.8912085677489324, - "grad_norm": 1.532784394030106, - "learning_rate": 6.138000555284806e-08, - "loss": 1.1073, - "step": 6574 - }, - { - "epoch": 0.8913441333965973, - "grad_norm": 2.0973810335118603, - "learning_rate": 6.12286141609134e-08, - "loss": 1.1264, - "step": 6575 - }, - { - "epoch": 0.8914796990442622, - "grad_norm": 4.358925305721833, - "learning_rate": 6.107740380363036e-08, - "loss": 1.1231, - "step": 6576 - }, - { - "epoch": 0.8916152646919271, - "grad_norm": 1.6006614276930178, - "learning_rate": 6.092637451015847e-08, - "loss": 1.1994, - "step": 6577 - }, - { - "epoch": 0.8917508303395919, - "grad_norm": 2.6028573593495827, - "learning_rate": 6.07755263096229e-08, - "loss": 1.1511, - "step": 6578 - }, - { - "epoch": 0.8918863959872568, - "grad_norm": 1.5880062223442666, - "learning_rate": 6.062485923111293e-08, - "loss": 1.1177, - "step": 6579 - }, - { - "epoch": 0.8920219616349218, - "grad_norm": 1.795462523303609, - "learning_rate": 6.047437330368421e-08, - "loss": 1.1418, - "step": 6580 - }, - { - "epoch": 0.8921575272825866, - "grad_norm": 1.9618468612272715, - "learning_rate": 6.032406855635619e-08, - "loss": 1.1787, - "step": 6581 - }, - { - "epoch": 0.8922930929302515, - "grad_norm": 2.5865581260675925, - "learning_rate": 6.017394501811445e-08, - "loss": 1.1669, - "step": 6582 - }, - { - "epoch": 0.8924286585779163, - "grad_norm": 1.7745883093409707, - "learning_rate": 6.002400271790864e-08, - "loss": 1.0519, - "step": 6583 - }, - { - "epoch": 0.8925642242255812, - "grad_norm": 2.017692585737126, - "learning_rate": 5.987424168465439e-08, - "loss": 1.1417, - "step": 6584 - }, - { - "epoch": 0.8926997898732462, - "grad_norm": 1.4640505395053558, - "learning_rate": 5.972466194723159e-08, - "loss": 1.1379, - "step": 6585 - }, - { - "epoch": 0.892835355520911, - "grad_norm": 1.3944689527040175, - "learning_rate": 5.957526353448572e-08, - "loss": 1.1472, - "step": 6586 - }, - { - "epoch": 0.8929709211685759, - "grad_norm": 1.8858666555645642, - "learning_rate": 5.9426046475226975e-08, - "loss": 1.1333, - "step": 6587 - }, - { - "epoch": 0.8931064868162407, - "grad_norm": 1.4607455102674543, - "learning_rate": 5.9277010798230666e-08, - "loss": 1.1289, - "step": 6588 - }, - { - "epoch": 0.8932420524639056, - "grad_norm": 1.55332974930977, - "learning_rate": 5.912815653223724e-08, - "loss": 1.068, - "step": 6589 - }, - { - "epoch": 0.8933776181115706, - "grad_norm": 1.6272422671388427, - "learning_rate": 5.897948370595207e-08, - "loss": 1.0957, - "step": 6590 - }, - { - "epoch": 0.8935131837592354, - "grad_norm": 1.7528428018353854, - "learning_rate": 5.8830992348045563e-08, - "loss": 1.1149, - "step": 6591 - }, - { - "epoch": 0.8936487494069003, - "grad_norm": 2.357288681272348, - "learning_rate": 5.8682682487152915e-08, - "loss": 1.0887, - "step": 6592 - }, - { - "epoch": 0.8937843150545651, - "grad_norm": 2.000239537972991, - "learning_rate": 5.8534554151874805e-08, - "loss": 1.1268, - "step": 6593 - }, - { - "epoch": 0.89391988070223, - "grad_norm": 1.682855820285893, - "learning_rate": 5.8386607370776274e-08, - "loss": 1.1056, - "step": 6594 - }, - { - "epoch": 0.894055446349895, - "grad_norm": 1.5236714134912441, - "learning_rate": 5.823884217238817e-08, - "loss": 1.1013, - "step": 6595 - }, - { - "epoch": 0.8941910119975598, - "grad_norm": 1.7235570801720563, - "learning_rate": 5.809125858520514e-08, - "loss": 1.106, - "step": 6596 - }, - { - "epoch": 0.8943265776452247, - "grad_norm": 1.952667024462507, - "learning_rate": 5.794385663768819e-08, - "loss": 1.1251, - "step": 6597 - }, - { - "epoch": 0.8944621432928895, - "grad_norm": 2.6003588343652675, - "learning_rate": 5.7796636358262155e-08, - "loss": 1.1193, - "step": 6598 - }, - { - "epoch": 0.8945977089405545, - "grad_norm": 3.240102439614192, - "learning_rate": 5.764959777531775e-08, - "loss": 1.1333, - "step": 6599 - }, - { - "epoch": 0.8947332745882194, - "grad_norm": 1.7217869921175282, - "learning_rate": 5.750274091720964e-08, - "loss": 1.1251, - "step": 6600 - }, - { - "epoch": 0.8948688402358842, - "grad_norm": 1.5449719470270684, - "learning_rate": 5.7356065812258604e-08, - "loss": 1.1077, - "step": 6601 - }, - { - "epoch": 0.8950044058835491, - "grad_norm": 1.8286130688132023, - "learning_rate": 5.720957248874925e-08, - "loss": 1.1133, - "step": 6602 - }, - { - "epoch": 0.8951399715312139, - "grad_norm": 1.5196358266934906, - "learning_rate": 5.706326097493219e-08, - "loss": 1.0904, - "step": 6603 - }, - { - "epoch": 0.8952755371788789, - "grad_norm": 1.7011250642183293, - "learning_rate": 5.691713129902187e-08, - "loss": 1.1234, - "step": 6604 - }, - { - "epoch": 0.8954111028265438, - "grad_norm": 2.5680575605405527, - "learning_rate": 5.677118348919874e-08, - "loss": 1.1181, - "step": 6605 - }, - { - "epoch": 0.8955466684742086, - "grad_norm": 1.9997922004416384, - "learning_rate": 5.662541757360739e-08, - "loss": 1.1375, - "step": 6606 - }, - { - "epoch": 0.8956822341218735, - "grad_norm": 2.0296472063616746, - "learning_rate": 5.6479833580357796e-08, - "loss": 1.1225, - "step": 6607 - }, - { - "epoch": 0.8958177997695383, - "grad_norm": 1.5975487436160027, - "learning_rate": 5.633443153752448e-08, - "loss": 1.0894, - "step": 6608 - }, - { - "epoch": 0.8959533654172033, - "grad_norm": 2.3213587414460366, - "learning_rate": 5.6189211473147256e-08, - "loss": 1.1155, - "step": 6609 - }, - { - "epoch": 0.8960889310648682, - "grad_norm": 1.6806542080930005, - "learning_rate": 5.60441734152306e-08, - "loss": 1.1376, - "step": 6610 - }, - { - "epoch": 0.896224496712533, - "grad_norm": 4.065279305333201, - "learning_rate": 5.5899317391744025e-08, - "loss": 1.1317, - "step": 6611 - }, - { - "epoch": 0.8963600623601979, - "grad_norm": 1.7509581061930606, - "learning_rate": 5.575464343062175e-08, - "loss": 1.1415, - "step": 6612 - }, - { - "epoch": 0.8964956280078628, - "grad_norm": 1.7699001989000014, - "learning_rate": 5.561015155976312e-08, - "loss": 1.1395, - "step": 6613 - }, - { - "epoch": 0.8966311936555277, - "grad_norm": 1.5073314441943841, - "learning_rate": 5.546584180703207e-08, - "loss": 1.1398, - "step": 6614 - }, - { - "epoch": 0.8967667593031926, - "grad_norm": 1.6073959306120074, - "learning_rate": 5.5321714200257884e-08, - "loss": 1.1091, - "step": 6615 - }, - { - "epoch": 0.8969023249508574, - "grad_norm": 1.6619968410178436, - "learning_rate": 5.5177768767234236e-08, - "loss": 1.1056, - "step": 6616 - }, - { - "epoch": 0.8970378905985223, - "grad_norm": 3.060407950129733, - "learning_rate": 5.50340055357198e-08, - "loss": 1.1299, - "step": 6617 - }, - { - "epoch": 0.8971734562461872, - "grad_norm": 1.5210213737044191, - "learning_rate": 5.4890424533438394e-08, - "loss": 1.1248, - "step": 6618 - }, - { - "epoch": 0.8973090218938521, - "grad_norm": 2.0061969673217717, - "learning_rate": 5.4747025788078546e-08, - "loss": 1.1616, - "step": 6619 - }, - { - "epoch": 0.897444587541517, - "grad_norm": 2.305878664406554, - "learning_rate": 5.460380932729303e-08, - "loss": 1.123, - "step": 6620 - }, - { - "epoch": 0.8975801531891818, - "grad_norm": 1.538660258877756, - "learning_rate": 5.4460775178700736e-08, - "loss": 1.0979, - "step": 6621 - }, - { - "epoch": 0.8977157188368468, - "grad_norm": 2.3266916872285894, - "learning_rate": 5.431792336988417e-08, - "loss": 1.1129, - "step": 6622 - }, - { - "epoch": 0.8978512844845117, - "grad_norm": 1.7931062896523229, - "learning_rate": 5.417525392839129e-08, - "loss": 1.1472, - "step": 6623 - }, - { - "epoch": 0.8979868501321765, - "grad_norm": 1.6612948725086316, - "learning_rate": 5.4032766881734745e-08, - "loss": 1.1031, - "step": 6624 - }, - { - "epoch": 0.8981224157798414, - "grad_norm": 1.902199405123414, - "learning_rate": 5.3890462257392246e-08, - "loss": 1.1298, - "step": 6625 - }, - { - "epoch": 0.8982579814275062, - "grad_norm": 1.8510286467002033, - "learning_rate": 5.3748340082805824e-08, - "loss": 1.1135, - "step": 6626 - }, - { - "epoch": 0.8983935470751712, - "grad_norm": 1.811136280052085, - "learning_rate": 5.360640038538278e-08, - "loss": 1.1118, - "step": 6627 - }, - { - "epoch": 0.8985291127228361, - "grad_norm": 1.6679526248469438, - "learning_rate": 5.3464643192495104e-08, - "loss": 1.1376, - "step": 6628 - }, - { - "epoch": 0.8986646783705009, - "grad_norm": 1.5006984794960647, - "learning_rate": 5.33230685314795e-08, - "loss": 1.1071, - "step": 6629 - }, - { - "epoch": 0.8988002440181658, - "grad_norm": 1.524877619884794, - "learning_rate": 5.3181676429637447e-08, - "loss": 1.0911, - "step": 6630 - }, - { - "epoch": 0.8989358096658306, - "grad_norm": 1.7831547786981703, - "learning_rate": 5.304046691423536e-08, - "loss": 1.1231, - "step": 6631 - }, - { - "epoch": 0.8990713753134956, - "grad_norm": 1.5159582753641183, - "learning_rate": 5.289944001250446e-08, - "loss": 1.1431, - "step": 6632 - }, - { - "epoch": 0.8992069409611605, - "grad_norm": 1.7800743892638564, - "learning_rate": 5.275859575164054e-08, - "loss": 1.1689, - "step": 6633 - }, - { - "epoch": 0.8993425066088253, - "grad_norm": 1.456790767864359, - "learning_rate": 5.2617934158804557e-08, - "loss": 1.1414, - "step": 6634 - }, - { - "epoch": 0.8994780722564902, - "grad_norm": 1.4960953037722031, - "learning_rate": 5.247745526112146e-08, - "loss": 1.1271, - "step": 6635 - }, - { - "epoch": 0.899613637904155, - "grad_norm": 2.1940513804178186, - "learning_rate": 5.233715908568215e-08, - "loss": 1.103, - "step": 6636 - }, - { - "epoch": 0.89974920355182, - "grad_norm": 1.5886660397575265, - "learning_rate": 5.219704565954097e-08, - "loss": 1.1026, - "step": 6637 - }, - { - "epoch": 0.8998847691994849, - "grad_norm": 1.5578701011822265, - "learning_rate": 5.2057115009718434e-08, - "loss": 1.1401, - "step": 6638 - }, - { - "epoch": 0.9000203348471497, - "grad_norm": 1.7148906444994907, - "learning_rate": 5.191736716319828e-08, - "loss": 1.1452, - "step": 6639 - }, - { - "epoch": 0.9001559004948146, - "grad_norm": 1.6014404782470708, - "learning_rate": 5.17778021469305e-08, - "loss": 1.1362, - "step": 6640 - }, - { - "epoch": 0.9002914661424795, - "grad_norm": 1.5479957352003657, - "learning_rate": 5.1638419987828365e-08, - "loss": 1.1384, - "step": 6641 - }, - { - "epoch": 0.9004270317901444, - "grad_norm": 2.2775526069232135, - "learning_rate": 5.149922071277146e-08, - "loss": 1.0796, - "step": 6642 - }, - { - "epoch": 0.9005625974378093, - "grad_norm": 1.6060022307579547, - "learning_rate": 5.136020434860244e-08, - "loss": 1.1016, - "step": 6643 - }, - { - "epoch": 0.9006981630854741, - "grad_norm": 1.4532670355378896, - "learning_rate": 5.122137092213019e-08, - "loss": 1.1185, - "step": 6644 - }, - { - "epoch": 0.900833728733139, - "grad_norm": 1.7822637550434186, - "learning_rate": 5.108272046012718e-08, - "loss": 1.1701, - "step": 6645 - }, - { - "epoch": 0.9009692943808039, - "grad_norm": 1.530284647918784, - "learning_rate": 5.094425298933136e-08, - "loss": 1.1434, - "step": 6646 - }, - { - "epoch": 0.9011048600284688, - "grad_norm": 1.7431303458661551, - "learning_rate": 5.080596853644492e-08, - "loss": 1.0955, - "step": 6647 - }, - { - "epoch": 0.9012404256761337, - "grad_norm": 1.5803202785204071, - "learning_rate": 5.066786712813498e-08, - "loss": 1.1587, - "step": 6648 - }, - { - "epoch": 0.9013759913237985, - "grad_norm": 1.386352113674172, - "learning_rate": 5.052994879103323e-08, - "loss": 1.1231, - "step": 6649 - }, - { - "epoch": 0.9015115569714635, - "grad_norm": 1.5567348013329179, - "learning_rate": 5.0392213551736176e-08, - "loss": 1.1231, - "step": 6650 - }, - { - "epoch": 0.9016471226191283, - "grad_norm": 1.5444360797583259, - "learning_rate": 5.0254661436805015e-08, - "loss": 1.1255, - "step": 6651 - }, - { - "epoch": 0.9017826882667932, - "grad_norm": 2.0903465670004513, - "learning_rate": 5.0117292472765635e-08, - "loss": 1.1167, - "step": 6652 - }, - { - "epoch": 0.9019182539144581, - "grad_norm": 1.9227015500862508, - "learning_rate": 4.9980106686108416e-08, - "loss": 1.1351, - "step": 6653 - }, - { - "epoch": 0.9020538195621229, - "grad_norm": 1.8182804879239725, - "learning_rate": 4.9843104103288625e-08, - "loss": 1.1097, - "step": 6654 - }, - { - "epoch": 0.9021893852097879, - "grad_norm": 1.8976278076365194, - "learning_rate": 4.9706284750726135e-08, - "loss": 1.1231, - "step": 6655 - }, - { - "epoch": 0.9023249508574527, - "grad_norm": 1.7457852220421306, - "learning_rate": 4.956964865480551e-08, - "loss": 1.1159, - "step": 6656 - }, - { - "epoch": 0.9024605165051176, - "grad_norm": 1.897936942282887, - "learning_rate": 4.9433195841875995e-08, - "loss": 1.1064, - "step": 6657 - }, - { - "epoch": 0.9025960821527825, - "grad_norm": 1.6608822132725811, - "learning_rate": 4.9296926338251e-08, - "loss": 1.082, - "step": 6658 - }, - { - "epoch": 0.9027316478004473, - "grad_norm": 2.0757131756801726, - "learning_rate": 4.916084017020972e-08, - "loss": 1.1053, - "step": 6659 - }, - { - "epoch": 0.9028672134481123, - "grad_norm": 1.56117956905002, - "learning_rate": 4.9024937363994714e-08, - "loss": 1.1733, - "step": 6660 - }, - { - "epoch": 0.9030027790957771, - "grad_norm": 2.1015768213541097, - "learning_rate": 4.888921794581424e-08, - "loss": 1.115, - "step": 6661 - }, - { - "epoch": 0.903138344743442, - "grad_norm": 1.5234965693332603, - "learning_rate": 4.875368194184026e-08, - "loss": 1.1115, - "step": 6662 - }, - { - "epoch": 0.9032739103911069, - "grad_norm": 1.6342897665718141, - "learning_rate": 4.8618329378210085e-08, - "loss": 1.1514, - "step": 6663 - }, - { - "epoch": 0.9034094760387718, - "grad_norm": 1.413906405943974, - "learning_rate": 4.848316028102539e-08, - "loss": 1.1137, - "step": 6664 - }, - { - "epoch": 0.9035450416864367, - "grad_norm": 2.6508059638420405, - "learning_rate": 4.834817467635233e-08, - "loss": 1.1449, - "step": 6665 - }, - { - "epoch": 0.9036806073341015, - "grad_norm": 5.016390010568439, - "learning_rate": 4.821337259022196e-08, - "loss": 1.0717, - "step": 6666 - }, - { - "epoch": 0.9038161729817664, - "grad_norm": 2.0720699804373455, - "learning_rate": 4.807875404862971e-08, - "loss": 1.1325, - "step": 6667 - }, - { - "epoch": 0.9039517386294313, - "grad_norm": 1.492958027759036, - "learning_rate": 4.794431907753571e-08, - "loss": 1.1052, - "step": 6668 - }, - { - "epoch": 0.9040873042770962, - "grad_norm": 1.7249110693672132, - "learning_rate": 4.781006770286478e-08, - "loss": 1.1398, - "step": 6669 - }, - { - "epoch": 0.9042228699247611, - "grad_norm": 2.060991366187916, - "learning_rate": 4.767599995050609e-08, - "loss": 1.1335, - "step": 6670 - }, - { - "epoch": 0.9043584355724259, - "grad_norm": 1.462603687870178, - "learning_rate": 4.7542115846313734e-08, - "loss": 1.098, - "step": 6671 - }, - { - "epoch": 0.9044940012200908, - "grad_norm": 1.6080861802235715, - "learning_rate": 4.740841541610596e-08, - "loss": 1.1165, - "step": 6672 - }, - { - "epoch": 0.9046295668677558, - "grad_norm": 1.6922030707148257, - "learning_rate": 4.727489868566603e-08, - "loss": 1.1301, - "step": 6673 - }, - { - "epoch": 0.9047651325154206, - "grad_norm": 1.5257125012030721, - "learning_rate": 4.714156568074157e-08, - "loss": 1.1202, - "step": 6674 - }, - { - "epoch": 0.9049006981630855, - "grad_norm": 1.7683038824532942, - "learning_rate": 4.700841642704478e-08, - "loss": 1.1116, - "step": 6675 - }, - { - "epoch": 0.9050362638107503, - "grad_norm": 1.4949725367256426, - "learning_rate": 4.687545095025225e-08, - "loss": 1.09, - "step": 6676 - }, - { - "epoch": 0.9051718294584152, - "grad_norm": 1.7149431658195196, - "learning_rate": 4.6742669276005786e-08, - "loss": 1.151, - "step": 6677 - }, - { - "epoch": 0.9053073951060802, - "grad_norm": 1.7856179239230516, - "learning_rate": 4.661007142991069e-08, - "loss": 1.1053, - "step": 6678 - }, - { - "epoch": 0.905442960753745, - "grad_norm": 1.7028035542955315, - "learning_rate": 4.6477657437537953e-08, - "loss": 1.089, - "step": 6679 - }, - { - "epoch": 0.9055785264014099, - "grad_norm": 2.289038129754105, - "learning_rate": 4.634542732442204e-08, - "loss": 1.1179, - "step": 6680 - }, - { - "epoch": 0.9057140920490747, - "grad_norm": 1.876258421438715, - "learning_rate": 4.62133811160631e-08, - "loss": 1.1063, - "step": 6681 - }, - { - "epoch": 0.9058496576967396, - "grad_norm": 1.4521907970302164, - "learning_rate": 4.608151883792466e-08, - "loss": 1.0833, - "step": 6682 - }, - { - "epoch": 0.9059852233444046, - "grad_norm": 1.470499112431176, - "learning_rate": 4.5949840515435715e-08, - "loss": 1.1551, - "step": 6683 - }, - { - "epoch": 0.9061207889920694, - "grad_norm": 1.9445477257198676, - "learning_rate": 4.581834617398916e-08, - "loss": 1.1042, - "step": 6684 - }, - { - "epoch": 0.9062563546397343, - "grad_norm": 3.6037135350854816, - "learning_rate": 4.568703583894262e-08, - "loss": 1.1003, - "step": 6685 - }, - { - "epoch": 0.9063919202873991, - "grad_norm": 2.020784928636139, - "learning_rate": 4.555590953561839e-08, - "loss": 1.1244, - "step": 6686 - }, - { - "epoch": 0.906527485935064, - "grad_norm": 1.6698337548714017, - "learning_rate": 4.542496728930301e-08, - "loss": 1.1125, - "step": 6687 - }, - { - "epoch": 0.906663051582729, - "grad_norm": 1.5326615595450963, - "learning_rate": 4.529420912524773e-08, - "loss": 1.0959, - "step": 6688 - }, - { - "epoch": 0.9067986172303938, - "grad_norm": 1.5172170760756922, - "learning_rate": 4.516363506866827e-08, - "loss": 1.0954, - "step": 6689 - }, - { - "epoch": 0.9069341828780587, - "grad_norm": 2.1908048598902936, - "learning_rate": 4.503324514474483e-08, - "loss": 1.1453, - "step": 6690 - }, - { - "epoch": 0.9070697485257235, - "grad_norm": 1.8507059242482646, - "learning_rate": 4.4903039378621945e-08, - "loss": 1.0883, - "step": 6691 - }, - { - "epoch": 0.9072053141733885, - "grad_norm": 1.5651735805164195, - "learning_rate": 4.477301779540887e-08, - "loss": 1.1299, - "step": 6692 - }, - { - "epoch": 0.9073408798210534, - "grad_norm": 1.5290292405470145, - "learning_rate": 4.4643180420179113e-08, - "loss": 1.1104, - "step": 6693 - }, - { - "epoch": 0.9074764454687182, - "grad_norm": 1.9330034554907178, - "learning_rate": 4.451352727797109e-08, - "loss": 1.0955, - "step": 6694 - }, - { - "epoch": 0.9076120111163831, - "grad_norm": 1.7245337868867912, - "learning_rate": 4.4384058393786895e-08, - "loss": 1.1443, - "step": 6695 - }, - { - "epoch": 0.9077475767640479, - "grad_norm": 1.6684575726657702, - "learning_rate": 4.425477379259424e-08, - "loss": 1.1144, - "step": 6696 - }, - { - "epoch": 0.9078831424117129, - "grad_norm": 1.752329573293536, - "learning_rate": 4.412567349932384e-08, - "loss": 1.1322, - "step": 6697 - }, - { - "epoch": 0.9080187080593778, - "grad_norm": 1.680912760156265, - "learning_rate": 4.399675753887244e-08, - "loss": 1.1251, - "step": 6698 - }, - { - "epoch": 0.9081542737070426, - "grad_norm": 1.9628110884394931, - "learning_rate": 4.386802593609984e-08, - "loss": 1.1361, - "step": 6699 - }, - { - "epoch": 0.9082898393547075, - "grad_norm": 1.971743044464529, - "learning_rate": 4.37394787158315e-08, - "loss": 1.1054, - "step": 6700 - }, - { - "epoch": 0.9084254050023723, - "grad_norm": 1.800632167910115, - "learning_rate": 4.3611115902856044e-08, - "loss": 1.1226, - "step": 6701 - }, - { - "epoch": 0.9085609706500373, - "grad_norm": 2.7047473475228334, - "learning_rate": 4.3482937521928e-08, - "loss": 1.1452, - "step": 6702 - }, - { - "epoch": 0.9086965362977022, - "grad_norm": 1.974768351493521, - "learning_rate": 4.335494359776493e-08, - "loss": 1.1381, - "step": 6703 - }, - { - "epoch": 0.908832101945367, - "grad_norm": 1.562607231411647, - "learning_rate": 4.322713415504975e-08, - "loss": 1.1037, - "step": 6704 - }, - { - "epoch": 0.9089676675930319, - "grad_norm": 1.9991811837218214, - "learning_rate": 4.3099509218429416e-08, - "loss": 1.0871, - "step": 6705 - }, - { - "epoch": 0.9091032332406969, - "grad_norm": 2.138281737846231, - "learning_rate": 4.297206881251547e-08, - "loss": 1.1217, - "step": 6706 - }, - { - "epoch": 0.9092387988883617, - "grad_norm": 1.6882284021645007, - "learning_rate": 4.284481296188369e-08, - "loss": 1.1379, - "step": 6707 - }, - { - "epoch": 0.9093743645360266, - "grad_norm": 1.6610950977317862, - "learning_rate": 4.271774169107445e-08, - "loss": 1.1086, - "step": 6708 - }, - { - "epoch": 0.9095099301836914, - "grad_norm": 1.6532040100652365, - "learning_rate": 4.259085502459236e-08, - "loss": 1.1369, - "step": 6709 - }, - { - "epoch": 0.9096454958313563, - "grad_norm": 1.6184984377051455, - "learning_rate": 4.246415298690653e-08, - "loss": 1.1221, - "step": 6710 - }, - { - "epoch": 0.9097810614790213, - "grad_norm": 5.9158141817397905, - "learning_rate": 4.2337635602450514e-08, - "loss": 1.0892, - "step": 6711 - }, - { - "epoch": 0.9099166271266861, - "grad_norm": 2.116888207803189, - "learning_rate": 4.2211302895622136e-08, - "loss": 1.123, - "step": 6712 - }, - { - "epoch": 0.910052192774351, - "grad_norm": 2.0291051413326193, - "learning_rate": 4.208515489078368e-08, - "loss": 1.1111, - "step": 6713 - }, - { - "epoch": 0.9101877584220158, - "grad_norm": 1.8600805813884354, - "learning_rate": 4.19591916122618e-08, - "loss": 1.1307, - "step": 6714 - }, - { - "epoch": 0.9103233240696808, - "grad_norm": 1.5303046370928024, - "learning_rate": 4.18334130843474e-08, - "loss": 1.0995, - "step": 6715 - }, - { - "epoch": 0.9104588897173457, - "grad_norm": 1.9234574774213533, - "learning_rate": 4.1707819331296076e-08, - "loss": 1.1495, - "step": 6716 - }, - { - "epoch": 0.9105944553650105, - "grad_norm": 3.740454162414902, - "learning_rate": 4.158241037732746e-08, - "loss": 1.1308, - "step": 6717 - }, - { - "epoch": 0.9107300210126754, - "grad_norm": 1.6030890673262763, - "learning_rate": 4.1457186246625863e-08, - "loss": 1.1221, - "step": 6718 - }, - { - "epoch": 0.9108655866603402, - "grad_norm": 1.5610624890784406, - "learning_rate": 4.133214696333942e-08, - "loss": 1.111, - "step": 6719 - }, - { - "epoch": 0.9110011523080052, - "grad_norm": 1.5318089766167327, - "learning_rate": 4.1207292551581284e-08, - "loss": 1.1166, - "step": 6720 - }, - { - "epoch": 0.9111367179556701, - "grad_norm": 1.5032125742660187, - "learning_rate": 4.1082623035428424e-08, - "loss": 1.1309, - "step": 6721 - }, - { - "epoch": 0.9112722836033349, - "grad_norm": 2.121962295313918, - "learning_rate": 4.095813843892259e-08, - "loss": 1.1084, - "step": 6722 - }, - { - "epoch": 0.9114078492509998, - "grad_norm": 2.0517419754756236, - "learning_rate": 4.08338387860695e-08, - "loss": 1.1345, - "step": 6723 - }, - { - "epoch": 0.9115434148986646, - "grad_norm": 1.7458090046833024, - "learning_rate": 4.0709724100839395e-08, - "loss": 1.1421, - "step": 6724 - }, - { - "epoch": 0.9116789805463296, - "grad_norm": 1.4331189853348463, - "learning_rate": 4.058579440716681e-08, - "loss": 1.1046, - "step": 6725 - }, - { - "epoch": 0.9118145461939945, - "grad_norm": 1.5257597427052438, - "learning_rate": 4.046204972895062e-08, - "loss": 1.135, - "step": 6726 - }, - { - "epoch": 0.9119501118416593, - "grad_norm": 2.228172151358896, - "learning_rate": 4.0338490090053966e-08, - "loss": 1.142, - "step": 6727 - }, - { - "epoch": 0.9120856774893242, - "grad_norm": 1.5574099420773155, - "learning_rate": 4.0215115514304456e-08, - "loss": 1.1405, - "step": 6728 - }, - { - "epoch": 0.912221243136989, - "grad_norm": 2.5602018393970223, - "learning_rate": 4.009192602549383e-08, - "loss": 1.138, - "step": 6729 - }, - { - "epoch": 0.912356808784654, - "grad_norm": 1.5824692749654645, - "learning_rate": 3.996892164737819e-08, - "loss": 1.1308, - "step": 6730 - }, - { - "epoch": 0.9124923744323189, - "grad_norm": 1.5090393442238643, - "learning_rate": 3.9846102403678027e-08, - "loss": 1.1131, - "step": 6731 - }, - { - "epoch": 0.9126279400799837, - "grad_norm": 2.1587481939478876, - "learning_rate": 3.972346831807793e-08, - "loss": 1.1723, - "step": 6732 - }, - { - "epoch": 0.9127635057276486, - "grad_norm": 1.84573197649397, - "learning_rate": 3.960101941422711e-08, - "loss": 1.1097, - "step": 6733 - }, - { - "epoch": 0.9128990713753135, - "grad_norm": 2.5887825311551302, - "learning_rate": 3.947875571573867e-08, - "loss": 1.1287, - "step": 6734 - }, - { - "epoch": 0.9130346370229784, - "grad_norm": 11.19250842158323, - "learning_rate": 3.93566772461904e-08, - "loss": 1.138, - "step": 6735 - }, - { - "epoch": 0.9131702026706433, - "grad_norm": 2.0790792653436423, - "learning_rate": 3.923478402912395e-08, - "loss": 1.1234, - "step": 6736 - }, - { - "epoch": 0.9133057683183081, - "grad_norm": 1.9683439079102498, - "learning_rate": 3.911307608804582e-08, - "loss": 1.0886, - "step": 6737 - }, - { - "epoch": 0.913441333965973, - "grad_norm": 2.717191193340322, - "learning_rate": 3.899155344642579e-08, - "loss": 1.1544, - "step": 6738 - }, - { - "epoch": 0.9135768996136379, - "grad_norm": 1.562624762112161, - "learning_rate": 3.887021612769936e-08, - "loss": 1.1224, - "step": 6739 - }, - { - "epoch": 0.9137124652613028, - "grad_norm": 2.165243127956083, - "learning_rate": 3.8749064155264685e-08, - "loss": 1.1087, - "step": 6740 - }, - { - "epoch": 0.9138480309089677, - "grad_norm": 2.034439418082233, - "learning_rate": 3.862809755248564e-08, - "loss": 1.128, - "step": 6741 - }, - { - "epoch": 0.9139835965566325, - "grad_norm": 1.9175213142748964, - "learning_rate": 3.850731634268911e-08, - "loss": 1.1051, - "step": 6742 - }, - { - "epoch": 0.9141191622042975, - "grad_norm": 1.9945286876189583, - "learning_rate": 3.838672054916725e-08, - "loss": 1.1407, - "step": 6743 - }, - { - "epoch": 0.9142547278519623, - "grad_norm": 1.7840672338425272, - "learning_rate": 3.826631019517568e-08, - "loss": 1.123, - "step": 6744 - }, - { - "epoch": 0.9143902934996272, - "grad_norm": 1.4609813868808128, - "learning_rate": 3.814608530393493e-08, - "loss": 1.1612, - "step": 6745 - }, - { - "epoch": 0.9145258591472921, - "grad_norm": 1.5340116197360563, - "learning_rate": 3.802604589862912e-08, - "loss": 1.1243, - "step": 6746 - }, - { - "epoch": 0.9146614247949569, - "grad_norm": 2.416001947618484, - "learning_rate": 3.790619200240697e-08, - "loss": 1.0812, - "step": 6747 - }, - { - "epoch": 0.9147969904426219, - "grad_norm": 1.4254261331293017, - "learning_rate": 3.7786523638381306e-08, - "loss": 1.1256, - "step": 6748 - }, - { - "epoch": 0.9149325560902867, - "grad_norm": 1.9716278196204002, - "learning_rate": 3.766704082962935e-08, - "loss": 1.1404, - "step": 6749 - }, - { - "epoch": 0.9150681217379516, - "grad_norm": 2.094521490049005, - "learning_rate": 3.754774359919244e-08, - "loss": 1.1515, - "step": 6750 - }, - { - "epoch": 0.9152036873856165, - "grad_norm": 1.558915646150576, - "learning_rate": 3.7428631970076065e-08, - "loss": 1.1473, - "step": 6751 - }, - { - "epoch": 0.9153392530332813, - "grad_norm": 1.5799693612173444, - "learning_rate": 3.730970596524985e-08, - "loss": 1.1157, - "step": 6752 - }, - { - "epoch": 0.9154748186809463, - "grad_norm": 2.194035223954067, - "learning_rate": 3.719096560764778e-08, - "loss": 1.0943, - "step": 6753 - }, - { - "epoch": 0.9156103843286111, - "grad_norm": 2.7299841919253627, - "learning_rate": 3.707241092016811e-08, - "loss": 1.1022, - "step": 6754 - }, - { - "epoch": 0.915745949976276, - "grad_norm": 1.4091904918431264, - "learning_rate": 3.69540419256732e-08, - "loss": 1.1554, - "step": 6755 - }, - { - "epoch": 0.9158815156239409, - "grad_norm": 1.6322601315230896, - "learning_rate": 3.683585864698946e-08, - "loss": 1.1531, - "step": 6756 - }, - { - "epoch": 0.9160170812716057, - "grad_norm": 1.4232384340422002, - "learning_rate": 3.6717861106907447e-08, - "loss": 1.1345, - "step": 6757 - }, - { - "epoch": 0.9161526469192707, - "grad_norm": 1.8531879047673163, - "learning_rate": 3.66000493281825e-08, - "loss": 1.0897, - "step": 6758 - }, - { - "epoch": 0.9162882125669355, - "grad_norm": 1.3953291605460312, - "learning_rate": 3.648242333353324e-08, - "loss": 1.1274, - "step": 6759 - }, - { - "epoch": 0.9164237782146004, - "grad_norm": 1.603409078962768, - "learning_rate": 3.6364983145643066e-08, - "loss": 1.1261, - "step": 6760 - }, - { - "epoch": 0.9165593438622653, - "grad_norm": 1.60890010857303, - "learning_rate": 3.624772878715954e-08, - "loss": 1.1184, - "step": 6761 - }, - { - "epoch": 0.9166949095099302, - "grad_norm": 1.682646441050896, - "learning_rate": 3.6130660280694005e-08, - "loss": 1.0704, - "step": 6762 - }, - { - "epoch": 0.9168304751575951, - "grad_norm": 1.5162503563823384, - "learning_rate": 3.6013777648822406e-08, - "loss": 1.0832, - "step": 6763 - }, - { - "epoch": 0.9169660408052599, - "grad_norm": 1.4596356350941322, - "learning_rate": 3.58970809140845e-08, - "loss": 1.129, - "step": 6764 - }, - { - "epoch": 0.9171016064529248, - "grad_norm": 1.932958168020106, - "learning_rate": 3.5780570098984273e-08, - "loss": 1.1319, - "step": 6765 - }, - { - "epoch": 0.9172371721005897, - "grad_norm": 2.0230214571770717, - "learning_rate": 3.5664245225990206e-08, - "loss": 1.1302, - "step": 6766 - }, - { - "epoch": 0.9173727377482546, - "grad_norm": 1.9827032968015892, - "learning_rate": 3.554810631753436e-08, - "loss": 1.1326, - "step": 6767 - }, - { - "epoch": 0.9175083033959195, - "grad_norm": 1.5951241793424613, - "learning_rate": 3.543215339601324e-08, - "loss": 1.1144, - "step": 6768 - }, - { - "epoch": 0.9176438690435843, - "grad_norm": 1.8770225814610406, - "learning_rate": 3.531638648378754e-08, - "loss": 1.0899, - "step": 6769 - }, - { - "epoch": 0.9177794346912492, - "grad_norm": 1.6157239779198629, - "learning_rate": 3.520080560318195e-08, - "loss": 1.1272, - "step": 6770 - }, - { - "epoch": 0.9179150003389142, - "grad_norm": 1.400026819480723, - "learning_rate": 3.508541077648541e-08, - "loss": 1.1713, - "step": 6771 - }, - { - "epoch": 0.918050565986579, - "grad_norm": 1.6138588854978515, - "learning_rate": 3.497020202595069e-08, - "loss": 1.1089, - "step": 6772 - }, - { - "epoch": 0.9181861316342439, - "grad_norm": 1.4210278852239016, - "learning_rate": 3.485517937379512e-08, - "loss": 1.1037, - "step": 6773 - }, - { - "epoch": 0.9183216972819087, - "grad_norm": 1.4710527517830612, - "learning_rate": 3.474034284219995e-08, - "loss": 1.1078, - "step": 6774 - }, - { - "epoch": 0.9184572629295736, - "grad_norm": 1.5194034262817644, - "learning_rate": 3.462569245331004e-08, - "loss": 1.1188, - "step": 6775 - }, - { - "epoch": 0.9185928285772386, - "grad_norm": 3.080599072232844, - "learning_rate": 3.451122822923547e-08, - "loss": 1.0935, - "step": 6776 - }, - { - "epoch": 0.9187283942249034, - "grad_norm": 2.3948632454021963, - "learning_rate": 3.4396950192049134e-08, - "loss": 1.0742, - "step": 6777 - }, - { - "epoch": 0.9188639598725683, - "grad_norm": 1.6677411665105937, - "learning_rate": 3.4282858363789194e-08, - "loss": 1.0985, - "step": 6778 - }, - { - "epoch": 0.9189995255202331, - "grad_norm": 1.8477414586683605, - "learning_rate": 3.4168952766456924e-08, - "loss": 1.1136, - "step": 6779 - }, - { - "epoch": 0.919135091167898, - "grad_norm": 1.5962607257626056, - "learning_rate": 3.405523342201855e-08, - "loss": 1.1059, - "step": 6780 - }, - { - "epoch": 0.919270656815563, - "grad_norm": 2.5660861461346585, - "learning_rate": 3.39417003524034e-08, - "loss": 1.1438, - "step": 6781 - }, - { - "epoch": 0.9194062224632278, - "grad_norm": 2.0075774509609157, - "learning_rate": 3.3828353579505975e-08, - "loss": 1.108, - "step": 6782 - }, - { - "epoch": 0.9195417881108927, - "grad_norm": 1.5729161408815533, - "learning_rate": 3.3715193125184005e-08, - "loss": 1.143, - "step": 6783 - }, - { - "epoch": 0.9196773537585576, - "grad_norm": 1.4682429376135833, - "learning_rate": 3.3602219011259595e-08, - "loss": 1.1074, - "step": 6784 - }, - { - "epoch": 0.9198129194062225, - "grad_norm": 3.390775636880124, - "learning_rate": 3.3489431259518975e-08, - "loss": 1.1153, - "step": 6785 - }, - { - "epoch": 0.9199484850538874, - "grad_norm": 1.741232804569642, - "learning_rate": 3.337682989171242e-08, - "loss": 1.1601, - "step": 6786 - }, - { - "epoch": 0.9200840507015522, - "grad_norm": 1.6820492376623872, - "learning_rate": 3.326441492955412e-08, - "loss": 1.0889, - "step": 6787 - }, - { - "epoch": 0.9202196163492171, - "grad_norm": 1.6799722808875552, - "learning_rate": 3.3152186394722506e-08, - "loss": 1.1226, - "step": 6788 - }, - { - "epoch": 0.920355181996882, - "grad_norm": 3.4961298688858453, - "learning_rate": 3.304014430885982e-08, - "loss": 1.143, - "step": 6789 - }, - { - "epoch": 0.9204907476445469, - "grad_norm": 2.144515505832102, - "learning_rate": 3.292828869357267e-08, - "loss": 1.1581, - "step": 6790 - }, - { - "epoch": 0.9206263132922118, - "grad_norm": 1.6128295019047643, - "learning_rate": 3.281661957043147e-08, - "loss": 1.1427, - "step": 6791 - }, - { - "epoch": 0.9207618789398766, - "grad_norm": 1.5161136403181574, - "learning_rate": 3.270513696097055e-08, - "loss": 1.1322, - "step": 6792 - }, - { - "epoch": 0.9208974445875415, - "grad_norm": 1.58184905370839, - "learning_rate": 3.2593840886688815e-08, - "loss": 1.1153, - "step": 6793 - }, - { - "epoch": 0.9210330102352065, - "grad_norm": 1.6405503786461217, - "learning_rate": 3.248273136904844e-08, - "loss": 1.1066, - "step": 6794 - }, - { - "epoch": 0.9211685758828713, - "grad_norm": 1.5565647045485356, - "learning_rate": 3.23718084294764e-08, - "loss": 1.129, - "step": 6795 - }, - { - "epoch": 0.9213041415305362, - "grad_norm": 1.5677495352039321, - "learning_rate": 3.226107208936279e-08, - "loss": 1.09, - "step": 6796 - }, - { - "epoch": 0.921439707178201, - "grad_norm": 2.6749829568042713, - "learning_rate": 3.2150522370062886e-08, - "loss": 1.0966, - "step": 6797 - }, - { - "epoch": 0.9215752728258659, - "grad_norm": 2.7098094494613245, - "learning_rate": 3.204015929289483e-08, - "loss": 1.1283, - "step": 6798 - }, - { - "epoch": 0.9217108384735309, - "grad_norm": 1.5895928781437476, - "learning_rate": 3.1929982879141613e-08, - "loss": 1.1278, - "step": 6799 - }, - { - "epoch": 0.9218464041211957, - "grad_norm": 1.4545884396137985, - "learning_rate": 3.181999315004946e-08, - "loss": 1.0909, - "step": 6800 - }, - { - "epoch": 0.9219819697688606, - "grad_norm": 1.5624784181603009, - "learning_rate": 3.171019012682952e-08, - "loss": 1.1426, - "step": 6801 - }, - { - "epoch": 0.9221175354165254, - "grad_norm": 2.148027098447075, - "learning_rate": 3.160057383065606e-08, - "loss": 1.1017, - "step": 6802 - }, - { - "epoch": 0.9222531010641903, - "grad_norm": 1.680919178352029, - "learning_rate": 3.149114428266786e-08, - "loss": 1.1381, - "step": 6803 - }, - { - "epoch": 0.9223886667118553, - "grad_norm": 3.2689270501863383, - "learning_rate": 3.138190150396758e-08, - "loss": 1.1152, - "step": 6804 - }, - { - "epoch": 0.9225242323595201, - "grad_norm": 1.7159018500201586, - "learning_rate": 3.1272845515621816e-08, - "loss": 1.1626, - "step": 6805 - }, - { - "epoch": 0.922659798007185, - "grad_norm": 1.4978633830360233, - "learning_rate": 3.116397633866108e-08, - "loss": 1.1306, - "step": 6806 - }, - { - "epoch": 0.9227953636548498, - "grad_norm": 2.641323732681733, - "learning_rate": 3.1055293994080024e-08, - "loss": 1.1408, - "step": 6807 - }, - { - "epoch": 0.9229309293025147, - "grad_norm": 1.9875510445856372, - "learning_rate": 3.09467985028371e-08, - "loss": 1.1744, - "step": 6808 - }, - { - "epoch": 0.9230664949501797, - "grad_norm": 1.6533342296447722, - "learning_rate": 3.08384898858548e-08, - "loss": 1.1296, - "step": 6809 - }, - { - "epoch": 0.9232020605978445, - "grad_norm": 1.849303513555864, - "learning_rate": 3.073036816401975e-08, - "loss": 1.1224, - "step": 6810 - }, - { - "epoch": 0.9233376262455094, - "grad_norm": 1.4655070424663963, - "learning_rate": 3.062243335818215e-08, - "loss": 1.1015, - "step": 6811 - }, - { - "epoch": 0.9234731918931742, - "grad_norm": 1.634029525598839, - "learning_rate": 3.051468548915648e-08, - "loss": 1.1259, - "step": 6812 - }, - { - "epoch": 0.9236087575408392, - "grad_norm": 1.477225045719399, - "learning_rate": 3.04071245777211e-08, - "loss": 1.1211, - "step": 6813 - }, - { - "epoch": 0.9237443231885041, - "grad_norm": 1.4288388723345116, - "learning_rate": 3.0299750644618205e-08, - "loss": 1.093, - "step": 6814 - }, - { - "epoch": 0.9238798888361689, - "grad_norm": 1.5997482608252016, - "learning_rate": 3.019256371055423e-08, - "loss": 1.1282, - "step": 6815 - }, - { - "epoch": 0.9240154544838338, - "grad_norm": 1.8060546247898834, - "learning_rate": 3.0085563796198866e-08, - "loss": 1.1571, - "step": 6816 - }, - { - "epoch": 0.9241510201314986, - "grad_norm": 1.6961291626064592, - "learning_rate": 2.997875092218671e-08, - "loss": 1.1118, - "step": 6817 - }, - { - "epoch": 0.9242865857791636, - "grad_norm": 1.636482620045243, - "learning_rate": 2.987212510911541e-08, - "loss": 1.1144, - "step": 6818 - }, - { - "epoch": 0.9244221514268285, - "grad_norm": 1.5105339516960778, - "learning_rate": 2.976568637754717e-08, - "loss": 1.112, - "step": 6819 - }, - { - "epoch": 0.9245577170744933, - "grad_norm": 1.6565107763605547, - "learning_rate": 2.9659434748007696e-08, - "loss": 1.0509, - "step": 6820 - }, - { - "epoch": 0.9246932827221582, - "grad_norm": 1.4157411514046387, - "learning_rate": 2.9553370240986808e-08, - "loss": 1.0991, - "step": 6821 - }, - { - "epoch": 0.924828848369823, - "grad_norm": 1.745320662701447, - "learning_rate": 2.944749287693815e-08, - "loss": 1.1286, - "step": 6822 - }, - { - "epoch": 0.924964414017488, - "grad_norm": 1.4368208619665475, - "learning_rate": 2.9341802676279505e-08, - "loss": 1.1242, - "step": 6823 - }, - { - "epoch": 0.9250999796651529, - "grad_norm": 1.5232438748581478, - "learning_rate": 2.923629965939234e-08, - "loss": 1.1082, - "step": 6824 - }, - { - "epoch": 0.9252355453128177, - "grad_norm": 1.5583921286989542, - "learning_rate": 2.913098384662205e-08, - "loss": 1.0806, - "step": 6825 - }, - { - "epoch": 0.9253711109604826, - "grad_norm": 1.8432630004862265, - "learning_rate": 2.902585525827783e-08, - "loss": 1.1239, - "step": 6826 - }, - { - "epoch": 0.9255066766081474, - "grad_norm": 1.6177331432956472, - "learning_rate": 2.8920913914633138e-08, - "loss": 1.118, - "step": 6827 - }, - { - "epoch": 0.9256422422558124, - "grad_norm": 1.60434720254282, - "learning_rate": 2.881615983592489e-08, - "loss": 1.1316, - "step": 6828 - }, - { - "epoch": 0.9257778079034773, - "grad_norm": 1.8222456556668976, - "learning_rate": 2.8711593042354154e-08, - "loss": 1.1048, - "step": 6829 - }, - { - "epoch": 0.9259133735511421, - "grad_norm": 1.5994195524589325, - "learning_rate": 2.8607213554086018e-08, - "loss": 1.1157, - "step": 6830 - }, - { - "epoch": 0.926048939198807, - "grad_norm": 1.638413625477002, - "learning_rate": 2.8503021391248718e-08, - "loss": 1.1348, - "step": 6831 - }, - { - "epoch": 0.9261845048464719, - "grad_norm": 1.6321007229891846, - "learning_rate": 2.839901657393551e-08, - "loss": 1.1535, - "step": 6832 - }, - { - "epoch": 0.9263200704941368, - "grad_norm": 1.4286735246647642, - "learning_rate": 2.829519912220235e-08, - "loss": 1.1073, - "step": 6833 - }, - { - "epoch": 0.9264556361418017, - "grad_norm": 1.6355812828921477, - "learning_rate": 2.819156905607012e-08, - "loss": 1.1676, - "step": 6834 - }, - { - "epoch": 0.9265912017894665, - "grad_norm": 1.689521513639891, - "learning_rate": 2.8088126395522495e-08, - "loss": 1.114, - "step": 6835 - }, - { - "epoch": 0.9267267674371314, - "grad_norm": 2.1147291582176746, - "learning_rate": 2.7984871160508185e-08, - "loss": 1.0799, - "step": 6836 - }, - { - "epoch": 0.9268623330847963, - "grad_norm": 1.7594243840013313, - "learning_rate": 2.7881803370938595e-08, - "loss": 1.1069, - "step": 6837 - }, - { - "epoch": 0.9269978987324612, - "grad_norm": 2.036067408633303, - "learning_rate": 2.777892304669005e-08, - "loss": 1.1386, - "step": 6838 - }, - { - "epoch": 0.9271334643801261, - "grad_norm": 1.7587187651146927, - "learning_rate": 2.7676230207601793e-08, - "loss": 1.1702, - "step": 6839 - }, - { - "epoch": 0.9272690300277909, - "grad_norm": 7.328543404608752, - "learning_rate": 2.757372487347753e-08, - "loss": 1.1124, - "step": 6840 - }, - { - "epoch": 0.9274045956754559, - "grad_norm": 1.682606575809816, - "learning_rate": 2.747140706408446e-08, - "loss": 1.1414, - "step": 6841 - }, - { - "epoch": 0.9275401613231207, - "grad_norm": 2.774167565569425, - "learning_rate": 2.7369276799154017e-08, - "loss": 1.1374, - "step": 6842 - }, - { - "epoch": 0.9276757269707856, - "grad_norm": 1.5950815517922945, - "learning_rate": 2.7267334098381e-08, - "loss": 1.1141, - "step": 6843 - }, - { - "epoch": 0.9278112926184505, - "grad_norm": 1.6738585479040335, - "learning_rate": 2.7165578981424354e-08, - "loss": 1.148, - "step": 6844 - }, - { - "epoch": 0.9279468582661153, - "grad_norm": 1.8693509903456538, - "learning_rate": 2.70640114679066e-08, - "loss": 1.1221, - "step": 6845 - }, - { - "epoch": 0.9280824239137803, - "grad_norm": 2.886684785742913, - "learning_rate": 2.696263157741441e-08, - "loss": 1.1559, - "step": 6846 - }, - { - "epoch": 0.9282179895614451, - "grad_norm": 1.5470843795694595, - "learning_rate": 2.6861439329498026e-08, - "loss": 1.1327, - "step": 6847 - }, - { - "epoch": 0.92835355520911, - "grad_norm": 1.447405442538674, - "learning_rate": 2.6760434743671623e-08, - "loss": 1.1196, - "step": 6848 - }, - { - "epoch": 0.9284891208567749, - "grad_norm": 2.430337625797498, - "learning_rate": 2.665961783941306e-08, - "loss": 1.1377, - "step": 6849 - }, - { - "epoch": 0.9286246865044397, - "grad_norm": 1.5863078858033046, - "learning_rate": 2.6558988636164127e-08, - "loss": 1.1268, - "step": 6850 - }, - { - "epoch": 0.9287602521521047, - "grad_norm": 1.6888681514019686, - "learning_rate": 2.645854715333029e-08, - "loss": 1.1449, - "step": 6851 - }, - { - "epoch": 0.9288958177997695, - "grad_norm": 1.5652525715059276, - "learning_rate": 2.6358293410281062e-08, - "loss": 1.1231, - "step": 6852 - }, - { - "epoch": 0.9290313834474344, - "grad_norm": 1.3425517431882683, - "learning_rate": 2.6258227426349533e-08, - "loss": 1.1221, - "step": 6853 - }, - { - "epoch": 0.9291669490950993, - "grad_norm": 1.5413801893099683, - "learning_rate": 2.6158349220832375e-08, - "loss": 1.1158, - "step": 6854 - }, - { - "epoch": 0.9293025147427642, - "grad_norm": 1.608722259612879, - "learning_rate": 2.605865881299074e-08, - "loss": 1.0877, - "step": 6855 - }, - { - "epoch": 0.9294380803904291, - "grad_norm": 2.288984903035039, - "learning_rate": 2.5959156222048805e-08, - "loss": 1.1438, - "step": 6856 - }, - { - "epoch": 0.9295736460380939, - "grad_norm": 2.1255699451864762, - "learning_rate": 2.585984146719511e-08, - "loss": 1.1132, - "step": 6857 - }, - { - "epoch": 0.9297092116857588, - "grad_norm": 2.307623303092103, - "learning_rate": 2.5760714567581554e-08, - "loss": 1.147, - "step": 6858 - }, - { - "epoch": 0.9298447773334237, - "grad_norm": 1.621981968585142, - "learning_rate": 2.566177554232396e-08, - "loss": 1.0997, - "step": 6859 - }, - { - "epoch": 0.9299803429810886, - "grad_norm": 1.8398464172735132, - "learning_rate": 2.5563024410501954e-08, - "loss": 1.1323, - "step": 6860 - }, - { - "epoch": 0.9301159086287535, - "grad_norm": 2.020775364850191, - "learning_rate": 2.546446119115908e-08, - "loss": 1.1548, - "step": 6861 - }, - { - "epoch": 0.9302514742764184, - "grad_norm": 8.45707872653921, - "learning_rate": 2.5366085903302247e-08, - "loss": 1.1592, - "step": 6862 - }, - { - "epoch": 0.9303870399240832, - "grad_norm": 1.4773130824265868, - "learning_rate": 2.5267898565902503e-08, - "loss": 1.0809, - "step": 6863 - }, - { - "epoch": 0.9305226055717482, - "grad_norm": 1.522975134675905, - "learning_rate": 2.5169899197894363e-08, - "loss": 1.1032, - "step": 6864 - }, - { - "epoch": 0.930658171219413, - "grad_norm": 1.423814298483085, - "learning_rate": 2.507208781817638e-08, - "loss": 1.135, - "step": 6865 - }, - { - "epoch": 0.9307937368670779, - "grad_norm": 1.8082580080641621, - "learning_rate": 2.4974464445610688e-08, - "loss": 1.1338, - "step": 6866 - }, - { - "epoch": 0.9309293025147428, - "grad_norm": 1.5391555850519485, - "learning_rate": 2.4877029099023116e-08, - "loss": 1.1127, - "step": 6867 - }, - { - "epoch": 0.9310648681624076, - "grad_norm": 1.8537119215160822, - "learning_rate": 2.4779781797203303e-08, - "loss": 1.1228, - "step": 6868 - }, - { - "epoch": 0.9312004338100726, - "grad_norm": 1.4158912025470152, - "learning_rate": 2.468272255890469e-08, - "loss": 1.0864, - "step": 6869 - }, - { - "epoch": 0.9313359994577374, - "grad_norm": 1.6358665742437404, - "learning_rate": 2.4585851402844305e-08, - "loss": 1.1055, - "step": 6870 - }, - { - "epoch": 0.9314715651054023, - "grad_norm": 1.8459475676037305, - "learning_rate": 2.4489168347703093e-08, - "loss": 1.1356, - "step": 6871 - }, - { - "epoch": 0.9316071307530672, - "grad_norm": 2.005949064511714, - "learning_rate": 2.4392673412125476e-08, - "loss": 1.1204, - "step": 6872 - }, - { - "epoch": 0.931742696400732, - "grad_norm": 1.7113416633950895, - "learning_rate": 2.429636661472001e-08, - "loss": 1.1178, - "step": 6873 - }, - { - "epoch": 0.931878262048397, - "grad_norm": 1.600431983673069, - "learning_rate": 2.4200247974058175e-08, - "loss": 1.1163, - "step": 6874 - }, - { - "epoch": 0.9320138276960618, - "grad_norm": 1.4908099862583746, - "learning_rate": 2.4104317508676363e-08, - "loss": 1.1376, - "step": 6875 - }, - { - "epoch": 0.9321493933437267, - "grad_norm": 1.695193115508233, - "learning_rate": 2.4008575237073335e-08, - "loss": 1.1364, - "step": 6876 - }, - { - "epoch": 0.9322849589913916, - "grad_norm": 2.1605756427016853, - "learning_rate": 2.3913021177712876e-08, - "loss": 1.1329, - "step": 6877 - }, - { - "epoch": 0.9324205246390564, - "grad_norm": 2.1616886567957994, - "learning_rate": 2.3817655349021247e-08, - "loss": 1.1315, - "step": 6878 - }, - { - "epoch": 0.9325560902867214, - "grad_norm": 1.8165654892241727, - "learning_rate": 2.3722477769389515e-08, - "loss": 1.114, - "step": 6879 - }, - { - "epoch": 0.9326916559343862, - "grad_norm": 1.646726270540223, - "learning_rate": 2.362748845717155e-08, - "loss": 1.1305, - "step": 6880 - }, - { - "epoch": 0.9328272215820511, - "grad_norm": 2.359973408379461, - "learning_rate": 2.3532687430685373e-08, - "loss": 1.091, - "step": 6881 - }, - { - "epoch": 0.932962787229716, - "grad_norm": 2.2144609703574, - "learning_rate": 2.3438074708212795e-08, - "loss": 1.1284, - "step": 6882 - }, - { - "epoch": 0.9330983528773809, - "grad_norm": 1.4799906001551226, - "learning_rate": 2.3343650307998896e-08, - "loss": 1.0994, - "step": 6883 - }, - { - "epoch": 0.9332339185250458, - "grad_norm": 1.7118231466801366, - "learning_rate": 2.3249414248252775e-08, - "loss": 1.165, - "step": 6884 - }, - { - "epoch": 0.9333694841727106, - "grad_norm": 2.5494578071929563, - "learning_rate": 2.3155366547147115e-08, - "loss": 1.1343, - "step": 6885 - }, - { - "epoch": 0.9335050498203755, - "grad_norm": 1.512774227766172, - "learning_rate": 2.30615072228183e-08, - "loss": 1.1041, - "step": 6886 - }, - { - "epoch": 0.9336406154680404, - "grad_norm": 1.6596160337896253, - "learning_rate": 2.2967836293366405e-08, - "loss": 1.0703, - "step": 6887 - }, - { - "epoch": 0.9337761811157053, - "grad_norm": 1.6712557712755436, - "learning_rate": 2.287435377685498e-08, - "loss": 1.13, - "step": 6888 - }, - { - "epoch": 0.9339117467633702, - "grad_norm": 2.9449259168928275, - "learning_rate": 2.2781059691311498e-08, - "loss": 1.1441, - "step": 6889 - }, - { - "epoch": 0.934047312411035, - "grad_norm": 1.7447365669804742, - "learning_rate": 2.268795405472701e-08, - "loss": 1.1434, - "step": 6890 - }, - { - "epoch": 0.9341828780586999, - "grad_norm": 2.0289538258635775, - "learning_rate": 2.259503688505593e-08, - "loss": 1.1486, - "step": 6891 - }, - { - "epoch": 0.9343184437063649, - "grad_norm": 1.7106501023448455, - "learning_rate": 2.2502308200217037e-08, - "loss": 1.1031, - "step": 6892 - }, - { - "epoch": 0.9344540093540297, - "grad_norm": 1.5278767753801876, - "learning_rate": 2.2409768018092024e-08, - "loss": 1.1064, - "step": 6893 - }, - { - "epoch": 0.9345895750016946, - "grad_norm": 1.603858760241135, - "learning_rate": 2.231741635652673e-08, - "loss": 1.0992, - "step": 6894 - }, - { - "epoch": 0.9347251406493594, - "grad_norm": 1.5649204003960822, - "learning_rate": 2.222525323333013e-08, - "loss": 1.0969, - "step": 6895 - }, - { - "epoch": 0.9348607062970243, - "grad_norm": 3.884934791868871, - "learning_rate": 2.2133278666275567e-08, - "loss": 1.1311, - "step": 6896 - }, - { - "epoch": 0.9349962719446893, - "grad_norm": 1.9786188918362608, - "learning_rate": 2.2041492673099182e-08, - "loss": 1.1389, - "step": 6897 - }, - { - "epoch": 0.9351318375923541, - "grad_norm": 1.7392238911593925, - "learning_rate": 2.1949895271501596e-08, - "loss": 1.0899, - "step": 6898 - }, - { - "epoch": 0.935267403240019, - "grad_norm": 1.5506541021135933, - "learning_rate": 2.1858486479146344e-08, - "loss": 1.1179, - "step": 6899 - }, - { - "epoch": 0.9354029688876838, - "grad_norm": 1.9640767209978547, - "learning_rate": 2.1767266313661102e-08, - "loss": 1.1342, - "step": 6900 - }, - { - "epoch": 0.9355385345353487, - "grad_norm": 1.7957256088385278, - "learning_rate": 2.1676234792636693e-08, - "loss": 1.1289, - "step": 6901 - }, - { - "epoch": 0.9356741001830137, - "grad_norm": 1.4143884645959615, - "learning_rate": 2.1585391933628073e-08, - "loss": 1.093, - "step": 6902 - }, - { - "epoch": 0.9358096658306785, - "grad_norm": 1.9548100079438269, - "learning_rate": 2.1494737754153558e-08, - "loss": 1.1602, - "step": 6903 - }, - { - "epoch": 0.9359452314783434, - "grad_norm": 1.5401514396780907, - "learning_rate": 2.1404272271694945e-08, - "loss": 1.1401, - "step": 6904 - }, - { - "epoch": 0.9360807971260082, - "grad_norm": 1.700248843676753, - "learning_rate": 2.1313995503697833e-08, - "loss": 1.1304, - "step": 6905 - }, - { - "epoch": 0.9362163627736731, - "grad_norm": 1.3904635692365201, - "learning_rate": 2.122390746757141e-08, - "loss": 1.1307, - "step": 6906 - }, - { - "epoch": 0.9363519284213381, - "grad_norm": 1.8917772963462942, - "learning_rate": 2.1134008180688445e-08, - "loss": 1.1148, - "step": 6907 - }, - { - "epoch": 0.9364874940690029, - "grad_norm": 2.0561125405443206, - "learning_rate": 2.1044297660385292e-08, - "loss": 1.1402, - "step": 6908 - }, - { - "epoch": 0.9366230597166678, - "grad_norm": 1.4956986766305207, - "learning_rate": 2.0954775923961997e-08, - "loss": 1.1191, - "step": 6909 - }, - { - "epoch": 0.9367586253643326, - "grad_norm": 1.6868920760843293, - "learning_rate": 2.086544298868198e-08, - "loss": 1.1484, - "step": 6910 - }, - { - "epoch": 0.9368941910119976, - "grad_norm": 1.939613091674005, - "learning_rate": 2.077629887177257e-08, - "loss": 1.1569, - "step": 6911 - }, - { - "epoch": 0.9370297566596625, - "grad_norm": 1.5096455755822666, - "learning_rate": 2.0687343590424232e-08, - "loss": 1.0928, - "step": 6912 - }, - { - "epoch": 0.9371653223073273, - "grad_norm": 1.6314963218453522, - "learning_rate": 2.0598577161791587e-08, - "loss": 1.1127, - "step": 6913 - }, - { - "epoch": 0.9373008879549922, - "grad_norm": 2.7060184102772964, - "learning_rate": 2.050999960299249e-08, - "loss": 1.1593, - "step": 6914 - }, - { - "epoch": 0.937436453602657, - "grad_norm": 1.797217227441614, - "learning_rate": 2.0421610931108168e-08, - "loss": 1.1814, - "step": 6915 - }, - { - "epoch": 0.937572019250322, - "grad_norm": 1.6543921223683837, - "learning_rate": 2.033341116318399e-08, - "loss": 1.1058, - "step": 6916 - }, - { - "epoch": 0.9377075848979869, - "grad_norm": 1.8599213769151823, - "learning_rate": 2.0245400316228344e-08, - "loss": 1.1363, - "step": 6917 - }, - { - "epoch": 0.9378431505456517, - "grad_norm": 1.6185017304008251, - "learning_rate": 2.015757840721366e-08, - "loss": 1.1175, - "step": 6918 - }, - { - "epoch": 0.9379787161933166, - "grad_norm": 4.115561270786498, - "learning_rate": 2.006994545307539e-08, - "loss": 1.105, - "step": 6919 - }, - { - "epoch": 0.9381142818409814, - "grad_norm": 2.449940974897843, - "learning_rate": 1.998250147071323e-08, - "loss": 1.1078, - "step": 6920 - }, - { - "epoch": 0.9382498474886464, - "grad_norm": 2.5762316343508314, - "learning_rate": 1.9895246476989703e-08, - "loss": 1.1293, - "step": 6921 - }, - { - "epoch": 0.9383854131363113, - "grad_norm": 1.5649930771073488, - "learning_rate": 1.9808180488731564e-08, - "loss": 1.1367, - "step": 6922 - }, - { - "epoch": 0.9385209787839761, - "grad_norm": 2.0288761922376595, - "learning_rate": 1.9721303522728605e-08, - "loss": 1.1469, - "step": 6923 - }, - { - "epoch": 0.938656544431641, - "grad_norm": 1.5312586474589398, - "learning_rate": 1.9634615595734316e-08, - "loss": 1.1309, - "step": 6924 - }, - { - "epoch": 0.9387921100793059, - "grad_norm": 1.4396189867975817, - "learning_rate": 1.954811672446599e-08, - "loss": 1.1139, - "step": 6925 - }, - { - "epoch": 0.9389276757269708, - "grad_norm": 2.781162420781618, - "learning_rate": 1.9461806925604064e-08, - "loss": 1.1456, - "step": 6926 - }, - { - "epoch": 0.9390632413746357, - "grad_norm": 1.7084542387278123, - "learning_rate": 1.9375686215792886e-08, - "loss": 1.089, - "step": 6927 - }, - { - "epoch": 0.9391988070223005, - "grad_norm": 1.6301217211637664, - "learning_rate": 1.9289754611639954e-08, - "loss": 1.1247, - "step": 6928 - }, - { - "epoch": 0.9393343726699654, - "grad_norm": 2.1881348703900625, - "learning_rate": 1.9204012129716672e-08, - "loss": 1.1086, - "step": 6929 - }, - { - "epoch": 0.9394699383176303, - "grad_norm": 2.0637641662419264, - "learning_rate": 1.911845878655749e-08, - "loss": 1.1188, - "step": 6930 - }, - { - "epoch": 0.9396055039652952, - "grad_norm": 1.3888443120808518, - "learning_rate": 1.9033094598661204e-08, - "loss": 1.0902, - "step": 6931 - }, - { - "epoch": 0.9397410696129601, - "grad_norm": 1.7014554261257817, - "learning_rate": 1.89479195824892e-08, - "loss": 1.1303, - "step": 6932 - }, - { - "epoch": 0.9398766352606249, - "grad_norm": 2.7352697108917043, - "learning_rate": 1.8862933754467013e-08, - "loss": 1.1665, - "step": 6933 - }, - { - "epoch": 0.9400122009082899, - "grad_norm": 1.5174995413619772, - "learning_rate": 1.8778137130983307e-08, - "loss": 1.1348, - "step": 6934 - }, - { - "epoch": 0.9401477665559547, - "grad_norm": 2.2347833559009027, - "learning_rate": 1.8693529728390667e-08, - "loss": 1.1635, - "step": 6935 - }, - { - "epoch": 0.9402833322036196, - "grad_norm": 4.080928535063441, - "learning_rate": 1.860911156300482e-08, - "loss": 1.0835, - "step": 6936 - }, - { - "epoch": 0.9404188978512845, - "grad_norm": 4.595009910780769, - "learning_rate": 1.8524882651105188e-08, - "loss": 1.0638, - "step": 6937 - }, - { - "epoch": 0.9405544634989493, - "grad_norm": 1.6484230087947616, - "learning_rate": 1.844084300893456e-08, - "loss": 1.0847, - "step": 6938 - }, - { - "epoch": 0.9406900291466143, - "grad_norm": 1.891204761805782, - "learning_rate": 1.835699265269963e-08, - "loss": 1.1291, - "step": 6939 - }, - { - "epoch": 0.9408255947942791, - "grad_norm": 2.1444987737733614, - "learning_rate": 1.827333159856981e-08, - "loss": 1.1543, - "step": 6940 - }, - { - "epoch": 0.940961160441944, - "grad_norm": 1.9801879201372212, - "learning_rate": 1.8189859862678848e-08, - "loss": 1.1511, - "step": 6941 - }, - { - "epoch": 0.9410967260896089, - "grad_norm": 1.5178660464089382, - "learning_rate": 1.8106577461123428e-08, - "loss": 1.0982, - "step": 6942 - }, - { - "epoch": 0.9412322917372737, - "grad_norm": 3.0225626309540576, - "learning_rate": 1.802348440996393e-08, - "loss": 1.1179, - "step": 6943 - }, - { - "epoch": 0.9413678573849387, - "grad_norm": 2.4083727305932485, - "learning_rate": 1.794058072522431e-08, - "loss": 1.1289, - "step": 6944 - }, - { - "epoch": 0.9415034230326036, - "grad_norm": 1.6656442020042423, - "learning_rate": 1.7857866422891665e-08, - "loss": 1.1493, - "step": 6945 - }, - { - "epoch": 0.9416389886802684, - "grad_norm": 1.4908323405848853, - "learning_rate": 1.777534151891702e-08, - "loss": 1.1445, - "step": 6946 - }, - { - "epoch": 0.9417745543279333, - "grad_norm": 1.5463777066632447, - "learning_rate": 1.7693006029214418e-08, - "loss": 1.119, - "step": 6947 - }, - { - "epoch": 0.9419101199755981, - "grad_norm": 1.533462198788721, - "learning_rate": 1.7610859969661827e-08, - "loss": 1.099, - "step": 6948 - }, - { - "epoch": 0.9420456856232631, - "grad_norm": 2.8519730681738906, - "learning_rate": 1.7528903356100466e-08, - "loss": 1.1045, - "step": 6949 - }, - { - "epoch": 0.942181251270928, - "grad_norm": 1.608257416663336, - "learning_rate": 1.74471362043348e-08, - "loss": 1.1208, - "step": 6950 - }, - { - "epoch": 0.9423168169185928, - "grad_norm": 1.901796756834136, - "learning_rate": 1.7365558530133218e-08, - "loss": 1.1041, - "step": 6951 - }, - { - "epoch": 0.9424523825662577, - "grad_norm": 1.4729875696270804, - "learning_rate": 1.7284170349227246e-08, - "loss": 1.1117, - "step": 6952 - }, - { - "epoch": 0.9425879482139226, - "grad_norm": 1.5625233190109538, - "learning_rate": 1.7202971677311774e-08, - "loss": 1.072, - "step": 6953 - }, - { - "epoch": 0.9427235138615875, - "grad_norm": 1.6385066947084346, - "learning_rate": 1.712196253004572e-08, - "loss": 1.1512, - "step": 6954 - }, - { - "epoch": 0.9428590795092524, - "grad_norm": 2.1042447321625466, - "learning_rate": 1.704114292305059e-08, - "loss": 1.1363, - "step": 6955 - }, - { - "epoch": 0.9429946451569172, - "grad_norm": 1.4335103444210129, - "learning_rate": 1.6960512871912246e-08, - "loss": 1.1006, - "step": 6956 - }, - { - "epoch": 0.9431302108045821, - "grad_norm": 1.949361592202849, - "learning_rate": 1.6880072392179146e-08, - "loss": 1.1136, - "step": 6957 - }, - { - "epoch": 0.943265776452247, - "grad_norm": 1.8686321771259207, - "learning_rate": 1.6799821499363987e-08, - "loss": 1.1293, - "step": 6958 - }, - { - "epoch": 0.9434013420999119, - "grad_norm": 1.5042476473841653, - "learning_rate": 1.671976020894228e-08, - "loss": 1.1266, - "step": 6959 - }, - { - "epoch": 0.9435369077475768, - "grad_norm": 1.4545882622975386, - "learning_rate": 1.663988853635323e-08, - "loss": 1.1134, - "step": 6960 - }, - { - "epoch": 0.9436724733952416, - "grad_norm": 1.6075648476738564, - "learning_rate": 1.6560206496999517e-08, - "loss": 1.1516, - "step": 6961 - }, - { - "epoch": 0.9438080390429066, - "grad_norm": 2.4707797772849758, - "learning_rate": 1.6480714106247186e-08, - "loss": 1.1035, - "step": 6962 - }, - { - "epoch": 0.9439436046905714, - "grad_norm": 1.5349305209211468, - "learning_rate": 1.6401411379425746e-08, - "loss": 1.1482, - "step": 6963 - }, - { - "epoch": 0.9440791703382363, - "grad_norm": 3.5192745565454544, - "learning_rate": 1.6322298331827967e-08, - "loss": 1.0724, - "step": 6964 - }, - { - "epoch": 0.9442147359859012, - "grad_norm": 1.6381455956064097, - "learning_rate": 1.624337497871042e-08, - "loss": 1.165, - "step": 6965 - }, - { - "epoch": 0.944350301633566, - "grad_norm": 1.8208935821051806, - "learning_rate": 1.6164641335292606e-08, - "loss": 1.0733, - "step": 6966 - }, - { - "epoch": 0.944485867281231, - "grad_norm": 1.7882265840482923, - "learning_rate": 1.6086097416757816e-08, - "loss": 1.1647, - "step": 6967 - }, - { - "epoch": 0.9446214329288958, - "grad_norm": 3.9461249045427995, - "learning_rate": 1.60077432382526e-08, - "loss": 1.0858, - "step": 6968 - }, - { - "epoch": 0.9447569985765607, - "grad_norm": 1.5157451771493287, - "learning_rate": 1.5929578814886878e-08, - "loss": 1.0931, - "step": 6969 - }, - { - "epoch": 0.9448925642242256, - "grad_norm": 1.418917649438975, - "learning_rate": 1.5851604161734256e-08, - "loss": 1.129, - "step": 6970 - }, - { - "epoch": 0.9450281298718904, - "grad_norm": 1.7160193541228337, - "learning_rate": 1.5773819293831148e-08, - "loss": 1.1086, - "step": 6971 - }, - { - "epoch": 0.9451636955195554, - "grad_norm": 2.598892821167392, - "learning_rate": 1.5696224226178224e-08, - "loss": 1.1071, - "step": 6972 - }, - { - "epoch": 0.9452992611672202, - "grad_norm": 4.10186290263118, - "learning_rate": 1.5618818973738625e-08, - "loss": 1.1671, - "step": 6973 - }, - { - "epoch": 0.9454348268148851, - "grad_norm": 1.9416681296511424, - "learning_rate": 1.554160355143974e-08, - "loss": 1.0822, - "step": 6974 - }, - { - "epoch": 0.94557039246255, - "grad_norm": 1.9706453159419404, - "learning_rate": 1.5464577974171554e-08, - "loss": 1.1002, - "step": 6975 - }, - { - "epoch": 0.9457059581102149, - "grad_norm": 1.4920111976838892, - "learning_rate": 1.5387742256788294e-08, - "loss": 1.1616, - "step": 6976 - }, - { - "epoch": 0.9458415237578798, - "grad_norm": 1.4232597052134524, - "learning_rate": 1.531109641410666e-08, - "loss": 1.1262, - "step": 6977 - }, - { - "epoch": 0.9459770894055446, - "grad_norm": 1.520322017523513, - "learning_rate": 1.523464046090761e-08, - "loss": 1.1008, - "step": 6978 - }, - { - "epoch": 0.9461126550532095, - "grad_norm": 1.40208941147488, - "learning_rate": 1.5158374411934793e-08, - "loss": 1.1034, - "step": 6979 - }, - { - "epoch": 0.9462482207008744, - "grad_norm": 1.4798192546542372, - "learning_rate": 1.5082298281895666e-08, - "loss": 1.0933, - "step": 6980 - }, - { - "epoch": 0.9463837863485393, - "grad_norm": 1.5488945366744495, - "learning_rate": 1.500641208546072e-08, - "loss": 1.0924, - "step": 6981 - }, - { - "epoch": 0.9465193519962042, - "grad_norm": 2.5382371368791476, - "learning_rate": 1.493071583726424e-08, - "loss": 1.1301, - "step": 6982 - }, - { - "epoch": 0.946654917643869, - "grad_norm": 1.9912244011108433, - "learning_rate": 1.4855209551903559e-08, - "loss": 1.1357, - "step": 6983 - }, - { - "epoch": 0.9467904832915339, - "grad_norm": 2.419854062804998, - "learning_rate": 1.4779893243939356e-08, - "loss": 1.1251, - "step": 6984 - }, - { - "epoch": 0.9469260489391989, - "grad_norm": 1.8277314370351745, - "learning_rate": 1.4704766927895907e-08, - "loss": 1.1586, - "step": 6985 - }, - { - "epoch": 0.9470616145868637, - "grad_norm": 1.550716254316847, - "learning_rate": 1.462983061826084e-08, - "loss": 1.0853, - "step": 6986 - }, - { - "epoch": 0.9471971802345286, - "grad_norm": 1.4670054580694132, - "learning_rate": 1.4555084329484713e-08, - "loss": 1.1384, - "step": 6987 - }, - { - "epoch": 0.9473327458821934, - "grad_norm": 1.8156367834146792, - "learning_rate": 1.4480528075982102e-08, - "loss": 1.1317, - "step": 6988 - }, - { - "epoch": 0.9474683115298583, - "grad_norm": 1.565948072820113, - "learning_rate": 1.4406161872130396e-08, - "loss": 1.1455, - "step": 6989 - }, - { - "epoch": 0.9476038771775233, - "grad_norm": 1.93530425929698, - "learning_rate": 1.4331985732270457e-08, - "loss": 1.1336, - "step": 6990 - }, - { - "epoch": 0.9477394428251881, - "grad_norm": 1.7384357313483854, - "learning_rate": 1.4257999670706844e-08, - "loss": 1.1466, - "step": 6991 - }, - { - "epoch": 0.947875008472853, - "grad_norm": 1.4668070126874586, - "learning_rate": 1.418420370170681e-08, - "loss": 1.0818, - "step": 6992 - }, - { - "epoch": 0.9480105741205178, - "grad_norm": 1.4765431492607977, - "learning_rate": 1.4110597839501748e-08, - "loss": 1.1269, - "step": 6993 - }, - { - "epoch": 0.9481461397681827, - "grad_norm": 1.9986061847297891, - "learning_rate": 1.4037182098285639e-08, - "loss": 1.1334, - "step": 6994 - }, - { - "epoch": 0.9482817054158477, - "grad_norm": 1.9552551526949897, - "learning_rate": 1.3963956492216377e-08, - "loss": 1.1559, - "step": 6995 - }, - { - "epoch": 0.9484172710635125, - "grad_norm": 1.6178021943041743, - "learning_rate": 1.389092103541456e-08, - "loss": 1.1132, - "step": 6996 - }, - { - "epoch": 0.9485528367111774, - "grad_norm": 1.719740347235922, - "learning_rate": 1.3818075741965029e-08, - "loss": 1.1453, - "step": 6997 - }, - { - "epoch": 0.9486884023588422, - "grad_norm": 1.4432484057948856, - "learning_rate": 1.3745420625914995e-08, - "loss": 1.1107, - "step": 6998 - }, - { - "epoch": 0.9488239680065071, - "grad_norm": 1.685466209092293, - "learning_rate": 1.3672955701275579e-08, - "loss": 1.1244, - "step": 6999 - }, - { - "epoch": 0.9489595336541721, - "grad_norm": 1.4737824858413833, - "learning_rate": 1.360068098202105e-08, - "loss": 1.0703, - "step": 7000 - }, - { - "epoch": 0.9490950993018369, - "grad_norm": 1.682379012966888, - "learning_rate": 1.3528596482089039e-08, - "loss": 1.0797, - "step": 7001 - }, - { - "epoch": 0.9492306649495018, - "grad_norm": 1.409310652916507, - "learning_rate": 1.3456702215380534e-08, - "loss": 1.0984, - "step": 7002 - }, - { - "epoch": 0.9493662305971666, - "grad_norm": 1.6126173864908036, - "learning_rate": 1.3384998195759667e-08, - "loss": 1.122, - "step": 7003 - }, - { - "epoch": 0.9495017962448316, - "grad_norm": 1.6882387965699783, - "learning_rate": 1.3313484437053935e-08, - "loss": 1.1561, - "step": 7004 - }, - { - "epoch": 0.9496373618924965, - "grad_norm": 1.4902724635447342, - "learning_rate": 1.3242160953054415e-08, - "loss": 1.1106, - "step": 7005 - }, - { - "epoch": 0.9497729275401613, - "grad_norm": 1.8774091094312704, - "learning_rate": 1.3171027757515107e-08, - "loss": 1.1282, - "step": 7006 - }, - { - "epoch": 0.9499084931878262, - "grad_norm": 1.8240364686125745, - "learning_rate": 1.3100084864153593e-08, - "loss": 1.1566, - "step": 7007 - }, - { - "epoch": 0.950044058835491, - "grad_norm": 1.4623987849287614, - "learning_rate": 1.3029332286650596e-08, - "loss": 1.1101, - "step": 7008 - }, - { - "epoch": 0.950179624483156, - "grad_norm": 1.5623855965835967, - "learning_rate": 1.295877003865009e-08, - "loss": 1.0816, - "step": 7009 - }, - { - "epoch": 0.9503151901308209, - "grad_norm": 1.9690247623136985, - "learning_rate": 1.2888398133759637e-08, - "loss": 1.1423, - "step": 7010 - }, - { - "epoch": 0.9504507557784857, - "grad_norm": 4.264784535213333, - "learning_rate": 1.2818216585549824e-08, - "loss": 1.0882, - "step": 7011 - }, - { - "epoch": 0.9505863214261506, - "grad_norm": 1.5774333995034693, - "learning_rate": 1.2748225407554603e-08, - "loss": 1.1355, - "step": 7012 - }, - { - "epoch": 0.9507218870738154, - "grad_norm": 2.2253130436577773, - "learning_rate": 1.2678424613271288e-08, - "loss": 1.1422, - "step": 7013 - }, - { - "epoch": 0.9508574527214804, - "grad_norm": 1.5217462288509849, - "learning_rate": 1.2608814216160223e-08, - "loss": 1.1199, - "step": 7014 - }, - { - "epoch": 0.9509930183691453, - "grad_norm": 1.7429813997846948, - "learning_rate": 1.253939422964545e-08, - "loss": 1.0967, - "step": 7015 - }, - { - "epoch": 0.9511285840168101, - "grad_norm": 1.4754466714386876, - "learning_rate": 1.2470164667113926e-08, - "loss": 1.1104, - "step": 7016 - }, - { - "epoch": 0.951264149664475, - "grad_norm": 1.59168026806483, - "learning_rate": 1.2401125541915968e-08, - "loss": 1.121, - "step": 7017 - }, - { - "epoch": 0.9513997153121398, - "grad_norm": 1.4194686319424232, - "learning_rate": 1.2332276867365377e-08, - "loss": 1.0976, - "step": 7018 - }, - { - "epoch": 0.9515352809598048, - "grad_norm": 1.9961022998179265, - "learning_rate": 1.2263618656739083e-08, - "loss": 1.1202, - "step": 7019 - }, - { - "epoch": 0.9516708466074697, - "grad_norm": 1.9676001929465468, - "learning_rate": 1.2195150923277054e-08, - "loss": 1.1055, - "step": 7020 - }, - { - "epoch": 0.9518064122551345, - "grad_norm": 1.539056033471364, - "learning_rate": 1.2126873680183058e-08, - "loss": 1.1202, - "step": 7021 - }, - { - "epoch": 0.9519419779027994, - "grad_norm": 1.940692829828634, - "learning_rate": 1.2058786940623678e-08, - "loss": 1.0999, - "step": 7022 - }, - { - "epoch": 0.9520775435504644, - "grad_norm": 1.561046281358634, - "learning_rate": 1.1990890717728852e-08, - "loss": 1.1289, - "step": 7023 - }, - { - "epoch": 0.9522131091981292, - "grad_norm": 1.6463275957616834, - "learning_rate": 1.1923185024591775e-08, - "loss": 1.1157, - "step": 7024 - }, - { - "epoch": 0.9523486748457941, - "grad_norm": 1.6088985186574614, - "learning_rate": 1.1855669874269225e-08, - "loss": 1.1177, - "step": 7025 - }, - { - "epoch": 0.9524842404934589, - "grad_norm": 2.9850588562226403, - "learning_rate": 1.1788345279780786e-08, - "loss": 1.1137, - "step": 7026 - }, - { - "epoch": 0.9526198061411238, - "grad_norm": 1.6013367272812562, - "learning_rate": 1.1721211254109408e-08, - "loss": 1.1292, - "step": 7027 - }, - { - "epoch": 0.9527553717887888, - "grad_norm": 1.692867673013077, - "learning_rate": 1.1654267810201512e-08, - "loss": 1.1381, - "step": 7028 - }, - { - "epoch": 0.9528909374364536, - "grad_norm": 1.7143692753878064, - "learning_rate": 1.1587514960966437e-08, - "loss": 1.1621, - "step": 7029 - }, - { - "epoch": 0.9530265030841185, - "grad_norm": 1.4412971579451728, - "learning_rate": 1.1520952719277222e-08, - "loss": 1.1649, - "step": 7030 - }, - { - "epoch": 0.9531620687317833, - "grad_norm": 1.5685835808405408, - "learning_rate": 1.1454581097969595e-08, - "loss": 1.1351, - "step": 7031 - }, - { - "epoch": 0.9532976343794483, - "grad_norm": 2.542030850939974, - "learning_rate": 1.1388400109842878e-08, - "loss": 1.106, - "step": 7032 - }, - { - "epoch": 0.9534332000271132, - "grad_norm": 2.060472190017656, - "learning_rate": 1.1322409767659525e-08, - "loss": 1.1542, - "step": 7033 - }, - { - "epoch": 0.953568765674778, - "grad_norm": 1.5997284460101098, - "learning_rate": 1.1256610084145468e-08, - "loss": 1.0845, - "step": 7034 - }, - { - "epoch": 0.9537043313224429, - "grad_norm": 1.6596142918303498, - "learning_rate": 1.1191001071989336e-08, - "loss": 1.1301, - "step": 7035 - }, - { - "epoch": 0.9538398969701077, - "grad_norm": 1.6565750681081113, - "learning_rate": 1.1125582743843564e-08, - "loss": 1.0916, - "step": 7036 - }, - { - "epoch": 0.9539754626177727, - "grad_norm": 1.8110036112163295, - "learning_rate": 1.1060355112323395e-08, - "loss": 1.184, - "step": 7037 - }, - { - "epoch": 0.9541110282654376, - "grad_norm": 1.667465905542004, - "learning_rate": 1.0995318190007652e-08, - "loss": 1.1164, - "step": 7038 - }, - { - "epoch": 0.9542465939131024, - "grad_norm": 1.697623659427002, - "learning_rate": 1.0930471989437862e-08, - "loss": 1.0817, - "step": 7039 - }, - { - "epoch": 0.9543821595607673, - "grad_norm": 3.935573748679422, - "learning_rate": 1.0865816523119464e-08, - "loss": 1.1434, - "step": 7040 - }, - { - "epoch": 0.9545177252084321, - "grad_norm": 1.5734379893153523, - "learning_rate": 1.0801351803520598e-08, - "loss": 1.1055, - "step": 7041 - }, - { - "epoch": 0.9546532908560971, - "grad_norm": 1.4948716022079365, - "learning_rate": 1.0737077843072762e-08, - "loss": 1.1059, - "step": 7042 - }, - { - "epoch": 0.954788856503762, - "grad_norm": 1.824357927718509, - "learning_rate": 1.0672994654170598e-08, - "loss": 1.1228, - "step": 7043 - }, - { - "epoch": 0.9549244221514268, - "grad_norm": 2.4757347757008885, - "learning_rate": 1.060910224917222e-08, - "loss": 1.1123, - "step": 7044 - }, - { - "epoch": 0.9550599877990917, - "grad_norm": 1.5901272981483545, - "learning_rate": 1.054540064039866e-08, - "loss": 1.1083, - "step": 7045 - }, - { - "epoch": 0.9551955534467566, - "grad_norm": 1.7479814841875148, - "learning_rate": 1.0481889840134428e-08, - "loss": 1.1084, - "step": 7046 - }, - { - "epoch": 0.9553311190944215, - "grad_norm": 1.8322311490789407, - "learning_rate": 1.0418569860626836e-08, - "loss": 1.1152, - "step": 7047 - }, - { - "epoch": 0.9554666847420864, - "grad_norm": 1.6550568489772073, - "learning_rate": 1.0355440714086782e-08, - "loss": 1.1354, - "step": 7048 - }, - { - "epoch": 0.9556022503897512, - "grad_norm": 1.7036847840594365, - "learning_rate": 1.0292502412688198e-08, - "loss": 1.116, - "step": 7049 - }, - { - "epoch": 0.9557378160374161, - "grad_norm": 1.5906833250912993, - "learning_rate": 1.0229754968568261e-08, - "loss": 1.1184, - "step": 7050 - }, - { - "epoch": 0.955873381685081, - "grad_norm": 1.783708674905589, - "learning_rate": 1.0167198393827403e-08, - "loss": 1.1177, - "step": 7051 - }, - { - "epoch": 0.9560089473327459, - "grad_norm": 1.764198613565205, - "learning_rate": 1.0104832700528975e-08, - "loss": 1.11, - "step": 7052 - }, - { - "epoch": 0.9561445129804108, - "grad_norm": 1.6596475034240916, - "learning_rate": 1.0042657900699803e-08, - "loss": 1.1342, - "step": 7053 - }, - { - "epoch": 0.9562800786280756, - "grad_norm": 1.6295800351997574, - "learning_rate": 9.980674006329848e-09, - "loss": 1.0919, - "step": 7054 - }, - { - "epoch": 0.9564156442757406, - "grad_norm": 2.8827260203681675, - "learning_rate": 9.918881029372106e-09, - "loss": 1.0943, - "step": 7055 - }, - { - "epoch": 0.9565512099234054, - "grad_norm": 2.587169892135705, - "learning_rate": 9.857278981742934e-09, - "loss": 1.1564, - "step": 7056 - }, - { - "epoch": 0.9566867755710703, - "grad_norm": 1.473161758738789, - "learning_rate": 9.795867875321829e-09, - "loss": 1.1241, - "step": 7057 - }, - { - "epoch": 0.9568223412187352, - "grad_norm": 1.615064137202894, - "learning_rate": 9.734647721951427e-09, - "loss": 1.1261, - "step": 7058 - }, - { - "epoch": 0.9569579068664, - "grad_norm": 1.7201019796528774, - "learning_rate": 9.673618533437511e-09, - "loss": 1.1389, - "step": 7059 - }, - { - "epoch": 0.957093472514065, - "grad_norm": 1.9422316613605863, - "learning_rate": 9.612780321549108e-09, - "loss": 1.1384, - "step": 7060 - }, - { - "epoch": 0.9572290381617298, - "grad_norm": 1.9306842905993578, - "learning_rate": 9.552133098018389e-09, - "loss": 1.1345, - "step": 7061 - }, - { - "epoch": 0.9573646038093947, - "grad_norm": 1.4638964129825902, - "learning_rate": 9.491676874540666e-09, - "loss": 1.0874, - "step": 7062 - }, - { - "epoch": 0.9575001694570596, - "grad_norm": 1.9466127588448845, - "learning_rate": 9.431411662774502e-09, - "loss": 1.1508, - "step": 7063 - }, - { - "epoch": 0.9576357351047244, - "grad_norm": 2.012846626331102, - "learning_rate": 9.37133747434149e-09, - "loss": 1.1348, - "step": 7064 - }, - { - "epoch": 0.9577713007523894, - "grad_norm": 1.5161946064349907, - "learning_rate": 9.311454320826473e-09, - "loss": 1.1384, - "step": 7065 - }, - { - "epoch": 0.9579068664000542, - "grad_norm": 2.141844283193209, - "learning_rate": 9.251762213777437e-09, - "loss": 1.137, - "step": 7066 - }, - { - "epoch": 0.9580424320477191, - "grad_norm": 1.6704077726058348, - "learning_rate": 9.192261164705617e-09, - "loss": 1.1371, - "step": 7067 - }, - { - "epoch": 0.958177997695384, - "grad_norm": 3.13955657952051, - "learning_rate": 9.132951185085281e-09, - "loss": 1.116, - "step": 7068 - }, - { - "epoch": 0.9583135633430488, - "grad_norm": 1.6198257434733616, - "learning_rate": 9.073832286353944e-09, - "loss": 1.1515, - "step": 7069 - }, - { - "epoch": 0.9584491289907138, - "grad_norm": 1.6574897957367236, - "learning_rate": 9.014904479912044e-09, - "loss": 1.1392, - "step": 7070 - }, - { - "epoch": 0.9585846946383786, - "grad_norm": 1.581170948353582, - "learning_rate": 8.956167777123602e-09, - "loss": 1.101, - "step": 7071 - }, - { - "epoch": 0.9587202602860435, - "grad_norm": 1.4310285073229456, - "learning_rate": 8.897622189315224e-09, - "loss": 1.1309, - "step": 7072 - }, - { - "epoch": 0.9588558259337084, - "grad_norm": 1.6116000467483946, - "learning_rate": 8.839267727777211e-09, - "loss": 1.1603, - "step": 7073 - }, - { - "epoch": 0.9589913915813733, - "grad_norm": 2.6239475966839567, - "learning_rate": 8.781104403762563e-09, - "loss": 1.0826, - "step": 7074 - }, - { - "epoch": 0.9591269572290382, - "grad_norm": 1.8855744227272089, - "learning_rate": 8.723132228487861e-09, - "loss": 1.1513, - "step": 7075 - }, - { - "epoch": 0.959262522876703, - "grad_norm": 2.3400778616605713, - "learning_rate": 8.665351213132278e-09, - "loss": 1.119, - "step": 7076 - }, - { - "epoch": 0.9593980885243679, - "grad_norm": 1.4148151668098479, - "learning_rate": 8.607761368838785e-09, - "loss": 1.1057, - "step": 7077 - }, - { - "epoch": 0.9595336541720328, - "grad_norm": 1.534889372354019, - "learning_rate": 8.550362706712832e-09, - "loss": 1.1322, - "step": 7078 - }, - { - "epoch": 0.9596692198196977, - "grad_norm": 2.6689759528307864, - "learning_rate": 8.493155237823347e-09, - "loss": 1.1548, - "step": 7079 - }, - { - "epoch": 0.9598047854673626, - "grad_norm": 2.7413091670938754, - "learning_rate": 8.4361389732025e-09, - "loss": 1.111, - "step": 7080 - }, - { - "epoch": 0.9599403511150274, - "grad_norm": 4.442462107929643, - "learning_rate": 8.379313923845277e-09, - "loss": 1.1088, - "step": 7081 - }, - { - "epoch": 0.9600759167626923, - "grad_norm": 2.834748788569348, - "learning_rate": 8.322680100710022e-09, - "loss": 1.0987, - "step": 7082 - }, - { - "epoch": 0.9602114824103573, - "grad_norm": 2.959571665425855, - "learning_rate": 8.266237514718e-09, - "loss": 1.1446, - "step": 7083 - }, - { - "epoch": 0.9603470480580221, - "grad_norm": 1.7645127785183479, - "learning_rate": 8.209986176753948e-09, - "loss": 1.0843, - "step": 7084 - }, - { - "epoch": 0.960482613705687, - "grad_norm": 1.6267555542610739, - "learning_rate": 8.153926097665186e-09, - "loss": 1.1277, - "step": 7085 - }, - { - "epoch": 0.9606181793533518, - "grad_norm": 4.284502447108667, - "learning_rate": 8.098057288262738e-09, - "loss": 1.1249, - "step": 7086 - }, - { - "epoch": 0.9607537450010167, - "grad_norm": 1.7273229768671976, - "learning_rate": 8.042379759320317e-09, - "loss": 1.1452, - "step": 7087 - }, - { - "epoch": 0.9608893106486817, - "grad_norm": 3.8945717788169163, - "learning_rate": 7.986893521574888e-09, - "loss": 1.1356, - "step": 7088 - }, - { - "epoch": 0.9610248762963465, - "grad_norm": 1.5994513965964476, - "learning_rate": 7.931598585726562e-09, - "loss": 1.1225, - "step": 7089 - }, - { - "epoch": 0.9611604419440114, - "grad_norm": 1.7687039554065647, - "learning_rate": 7.876494962438585e-09, - "loss": 1.1499, - "step": 7090 - }, - { - "epoch": 0.9612960075916762, - "grad_norm": 5.193396830910458, - "learning_rate": 7.821582662337123e-09, - "loss": 1.1165, - "step": 7091 - }, - { - "epoch": 0.9614315732393411, - "grad_norm": 2.1931260975269296, - "learning_rate": 7.766861696011816e-09, - "loss": 1.1086, - "step": 7092 - }, - { - "epoch": 0.9615671388870061, - "grad_norm": 2.20352639195792, - "learning_rate": 7.712332074014893e-09, - "loss": 1.1049, - "step": 7093 - }, - { - "epoch": 0.9617027045346709, - "grad_norm": 1.5174326014010209, - "learning_rate": 7.657993806862162e-09, - "loss": 1.1432, - "step": 7094 - }, - { - "epoch": 0.9618382701823358, - "grad_norm": 1.8046047865699633, - "learning_rate": 7.603846905032129e-09, - "loss": 1.1142, - "step": 7095 - }, - { - "epoch": 0.9619738358300006, - "grad_norm": 2.597758872012904, - "learning_rate": 7.549891378966888e-09, - "loss": 1.1371, - "step": 7096 - }, - { - "epoch": 0.9621094014776655, - "grad_norm": 1.8319171107002798, - "learning_rate": 7.496127239071003e-09, - "loss": 1.1382, - "step": 7097 - }, - { - "epoch": 0.9622449671253305, - "grad_norm": 1.6932803774634821, - "learning_rate": 7.442554495712738e-09, - "loss": 1.1157, - "step": 7098 - }, - { - "epoch": 0.9623805327729953, - "grad_norm": 1.3767774244766573, - "learning_rate": 7.3891731592230496e-09, - "loss": 1.0886, - "step": 7099 - }, - { - "epoch": 0.9625160984206602, - "grad_norm": 2.039447888089875, - "learning_rate": 7.335983239896148e-09, - "loss": 1.1239, - "step": 7100 - }, - { - "epoch": 0.9626516640683251, - "grad_norm": 1.4081469058227525, - "learning_rate": 7.282984747989163e-09, - "loss": 1.124, - "step": 7101 - }, - { - "epoch": 0.96278722971599, - "grad_norm": 3.252523134627172, - "learning_rate": 7.230177693722583e-09, - "loss": 1.1134, - "step": 7102 - }, - { - "epoch": 0.9629227953636549, - "grad_norm": 1.5921518814130362, - "learning_rate": 7.17756208727982e-09, - "loss": 1.1242, - "step": 7103 - }, - { - "epoch": 0.9630583610113197, - "grad_norm": 1.3698675871082773, - "learning_rate": 7.125137938807424e-09, - "loss": 1.105, - "step": 7104 - }, - { - "epoch": 0.9631939266589846, - "grad_norm": 2.4243470379992877, - "learning_rate": 7.072905258414752e-09, - "loss": 1.1147, - "step": 7105 - }, - { - "epoch": 0.9633294923066495, - "grad_norm": 5.523284199982062, - "learning_rate": 7.020864056174635e-09, - "loss": 1.085, - "step": 7106 - }, - { - "epoch": 0.9634650579543144, - "grad_norm": 1.5856766888613834, - "learning_rate": 6.969014342122825e-09, - "loss": 1.1012, - "step": 7107 - }, - { - "epoch": 0.9636006236019793, - "grad_norm": 1.6116208092663489, - "learning_rate": 6.9173561262581e-09, - "loss": 1.1546, - "step": 7108 - }, - { - "epoch": 0.9637361892496441, - "grad_norm": 1.833366073579942, - "learning_rate": 6.86588941854227e-09, - "loss": 1.1238, - "step": 7109 - }, - { - "epoch": 0.963871754897309, - "grad_norm": 1.5748170894715938, - "learning_rate": 6.814614228900506e-09, - "loss": 1.1208, - "step": 7110 - }, - { - "epoch": 0.964007320544974, - "grad_norm": 1.63186729891517, - "learning_rate": 6.763530567220455e-09, - "loss": 1.1544, - "step": 7111 - }, - { - "epoch": 0.9641428861926388, - "grad_norm": 1.648992437110244, - "learning_rate": 6.712638443353569e-09, - "loss": 1.1239, - "step": 7112 - }, - { - "epoch": 0.9642784518403037, - "grad_norm": 1.8048419655246692, - "learning_rate": 6.661937867113665e-09, - "loss": 1.1468, - "step": 7113 - }, - { - "epoch": 0.9644140174879685, - "grad_norm": 4.884367236613135, - "learning_rate": 6.611428848278256e-09, - "loss": 1.1271, - "step": 7114 - }, - { - "epoch": 0.9645495831356334, - "grad_norm": 1.7555428768901487, - "learning_rate": 6.5611113965873265e-09, - "loss": 1.1821, - "step": 7115 - }, - { - "epoch": 0.9646851487832984, - "grad_norm": 1.8133530777846618, - "learning_rate": 6.51098552174445e-09, - "loss": 1.1765, - "step": 7116 - }, - { - "epoch": 0.9648207144309632, - "grad_norm": 1.9610990748474797, - "learning_rate": 6.461051233415782e-09, - "loss": 1.1183, - "step": 7117 - }, - { - "epoch": 0.9649562800786281, - "grad_norm": 1.7801324177139817, - "learning_rate": 6.4113085412309535e-09, - "loss": 1.1273, - "step": 7118 - }, - { - "epoch": 0.9650918457262929, - "grad_norm": 2.758625678388622, - "learning_rate": 6.361757454782291e-09, - "loss": 1.1114, - "step": 7119 - }, - { - "epoch": 0.9652274113739578, - "grad_norm": 1.4349348408163982, - "learning_rate": 6.312397983625483e-09, - "loss": 1.1397, - "step": 7120 - }, - { - "epoch": 0.9653629770216228, - "grad_norm": 1.4629378043246057, - "learning_rate": 6.2632301372789185e-09, - "loss": 1.1284, - "step": 7121 - }, - { - "epoch": 0.9654985426692876, - "grad_norm": 1.4445981636057337, - "learning_rate": 6.214253925224455e-09, - "loss": 1.1088, - "step": 7122 - }, - { - "epoch": 0.9656341083169525, - "grad_norm": 2.432048643503999, - "learning_rate": 6.165469356906539e-09, - "loss": 1.1242, - "step": 7123 - }, - { - "epoch": 0.9657696739646173, - "grad_norm": 1.5410841880490678, - "learning_rate": 6.116876441733087e-09, - "loss": 1.1482, - "step": 7124 - }, - { - "epoch": 0.9659052396122823, - "grad_norm": 2.2142902560410387, - "learning_rate": 6.068475189074829e-09, - "loss": 1.1287, - "step": 7125 - }, - { - "epoch": 0.9660408052599472, - "grad_norm": 1.5111854228730028, - "learning_rate": 6.020265608265407e-09, - "loss": 1.1232, - "step": 7126 - }, - { - "epoch": 0.966176370907612, - "grad_norm": 1.6558155383163164, - "learning_rate": 5.97224770860183e-09, - "loss": 1.1807, - "step": 7127 - }, - { - "epoch": 0.9663119365552769, - "grad_norm": 1.615425679779229, - "learning_rate": 5.924421499343801e-09, - "loss": 1.1129, - "step": 7128 - }, - { - "epoch": 0.9664475022029417, - "grad_norm": 2.127723412201215, - "learning_rate": 5.8767869897145e-09, - "loss": 1.1624, - "step": 7129 - }, - { - "epoch": 0.9665830678506067, - "grad_norm": 2.8623555409848, - "learning_rate": 5.8293441888994655e-09, - "loss": 1.0965, - "step": 7130 - }, - { - "epoch": 0.9667186334982716, - "grad_norm": 1.6066327093785773, - "learning_rate": 5.7820931060481585e-09, - "loss": 1.1039, - "step": 7131 - }, - { - "epoch": 0.9668541991459364, - "grad_norm": 1.7174693726204402, - "learning_rate": 5.735033750272067e-09, - "loss": 1.1019, - "step": 7132 - }, - { - "epoch": 0.9669897647936013, - "grad_norm": 1.6579901600833538, - "learning_rate": 5.68816613064671e-09, - "loss": 1.1466, - "step": 7133 - }, - { - "epoch": 0.9671253304412661, - "grad_norm": 1.5610044045742024, - "learning_rate": 5.6414902562096356e-09, - "loss": 1.1381, - "step": 7134 - }, - { - "epoch": 0.9672608960889311, - "grad_norm": 1.746035875751545, - "learning_rate": 5.595006135962421e-09, - "loss": 1.1312, - "step": 7135 - }, - { - "epoch": 0.967396461736596, - "grad_norm": 1.7734437018590852, - "learning_rate": 5.548713778868786e-09, - "loss": 1.1387, - "step": 7136 - }, - { - "epoch": 0.9675320273842608, - "grad_norm": 1.771007087823352, - "learning_rate": 5.502613193856031e-09, - "loss": 1.1048, - "step": 7137 - }, - { - "epoch": 0.9676675930319257, - "grad_norm": 2.1095793839090997, - "learning_rate": 5.45670438981416e-09, - "loss": 1.1298, - "step": 7138 - }, - { - "epoch": 0.9678031586795905, - "grad_norm": 1.6212194984101325, - "learning_rate": 5.4109873755964205e-09, - "loss": 1.1277, - "step": 7139 - }, - { - "epoch": 0.9679387243272555, - "grad_norm": 1.8277827736060042, - "learning_rate": 5.365462160018985e-09, - "loss": 1.1247, - "step": 7140 - }, - { - "epoch": 0.9680742899749204, - "grad_norm": 1.932440151472279, - "learning_rate": 5.3201287518610525e-09, - "loss": 1.1012, - "step": 7141 - }, - { - "epoch": 0.9682098556225852, - "grad_norm": 1.605497132533525, - "learning_rate": 5.274987159864741e-09, - "loss": 1.1269, - "step": 7142 - }, - { - "epoch": 0.9683454212702501, - "grad_norm": 1.642131176520872, - "learning_rate": 5.2300373927351984e-09, - "loss": 1.1767, - "step": 7143 - }, - { - "epoch": 0.968480986917915, - "grad_norm": 2.129711126272144, - "learning_rate": 5.185279459140823e-09, - "loss": 1.1486, - "step": 7144 - }, - { - "epoch": 0.9686165525655799, - "grad_norm": 1.3656983531698006, - "learning_rate": 5.140713367712601e-09, - "loss": 1.1265, - "step": 7145 - }, - { - "epoch": 0.9687521182132448, - "grad_norm": 1.694345572492879, - "learning_rate": 5.09633912704488e-09, - "loss": 1.1495, - "step": 7146 - }, - { - "epoch": 0.9688876838609096, - "grad_norm": 3.6898666568615197, - "learning_rate": 5.052156745694924e-09, - "loss": 1.1192, - "step": 7147 - }, - { - "epoch": 0.9690232495085745, - "grad_norm": 1.7756295428082591, - "learning_rate": 5.00816623218292e-09, - "loss": 1.1683, - "step": 7148 - }, - { - "epoch": 0.9691588151562394, - "grad_norm": 1.6327934167760025, - "learning_rate": 4.964367594991969e-09, - "loss": 1.117, - "step": 7149 - }, - { - "epoch": 0.9692943808039043, - "grad_norm": 1.530117193891137, - "learning_rate": 4.920760842568539e-09, - "loss": 1.1185, - "step": 7150 - }, - { - "epoch": 0.9694299464515692, - "grad_norm": 1.7007134929227408, - "learning_rate": 4.877345983321568e-09, - "loss": 1.1529, - "step": 7151 - }, - { - "epoch": 0.969565512099234, - "grad_norm": 1.394109594785006, - "learning_rate": 4.834123025623471e-09, - "loss": 1.1608, - "step": 7152 - }, - { - "epoch": 0.969701077746899, - "grad_norm": 1.4413727284940347, - "learning_rate": 4.791091977809358e-09, - "loss": 1.1141, - "step": 7153 - }, - { - "epoch": 0.9698366433945638, - "grad_norm": 1.7147988336135565, - "learning_rate": 4.7482528481774805e-09, - "loss": 1.063, - "step": 7154 - }, - { - "epoch": 0.9699722090422287, - "grad_norm": 1.5485004614149296, - "learning_rate": 4.705605644988897e-09, - "loss": 1.14, - "step": 7155 - }, - { - "epoch": 0.9701077746898936, - "grad_norm": 1.5881198562929004, - "learning_rate": 4.663150376468028e-09, - "loss": 1.1091, - "step": 7156 - }, - { - "epoch": 0.9702433403375584, - "grad_norm": 8.21563052108238, - "learning_rate": 4.62088705080177e-09, - "loss": 1.1738, - "step": 7157 - }, - { - "epoch": 0.9703789059852234, - "grad_norm": 1.5318017690288654, - "learning_rate": 4.5788156761404906e-09, - "loss": 1.1361, - "step": 7158 - }, - { - "epoch": 0.9705144716328882, - "grad_norm": 1.6666308376379215, - "learning_rate": 4.536936260597257e-09, - "loss": 1.1286, - "step": 7159 - }, - { - "epoch": 0.9706500372805531, - "grad_norm": 1.4569601376523809, - "learning_rate": 4.495248812248054e-09, - "loss": 1.1168, - "step": 7160 - }, - { - "epoch": 0.970785602928218, - "grad_norm": 1.8935464034122065, - "learning_rate": 4.453753339132116e-09, - "loss": 1.1085, - "step": 7161 - }, - { - "epoch": 0.9709211685758828, - "grad_norm": 2.1801083624592277, - "learning_rate": 4.412449849251598e-09, - "loss": 1.11, - "step": 7162 - }, - { - "epoch": 0.9710567342235478, - "grad_norm": 1.765009061042792, - "learning_rate": 4.371338350571352e-09, - "loss": 1.1234, - "step": 7163 - }, - { - "epoch": 0.9711922998712126, - "grad_norm": 5.025516634475902, - "learning_rate": 4.3304188510194795e-09, - "loss": 1.1265, - "step": 7164 - }, - { - "epoch": 0.9713278655188775, - "grad_norm": 1.6272756298438467, - "learning_rate": 4.289691358486891e-09, - "loss": 1.1677, - "step": 7165 - }, - { - "epoch": 0.9714634311665424, - "grad_norm": 1.750756976530659, - "learning_rate": 4.249155880827859e-09, - "loss": 1.1239, - "step": 7166 - }, - { - "epoch": 0.9715989968142072, - "grad_norm": 1.7505323691800598, - "learning_rate": 4.2088124258590205e-09, - "loss": 1.128, - "step": 7167 - }, - { - "epoch": 0.9717345624618722, - "grad_norm": 1.6963514678904743, - "learning_rate": 4.168661001360485e-09, - "loss": 1.1118, - "step": 7168 - }, - { - "epoch": 0.971870128109537, - "grad_norm": 2.4482028367342936, - "learning_rate": 4.128701615074947e-09, - "loss": 1.1329, - "step": 7169 - }, - { - "epoch": 0.9720056937572019, - "grad_norm": 1.9912879939316792, - "learning_rate": 4.088934274708466e-09, - "loss": 1.1141, - "step": 7170 - }, - { - "epoch": 0.9721412594048668, - "grad_norm": 1.6844963241004025, - "learning_rate": 4.049358987929685e-09, - "loss": 1.1172, - "step": 7171 - }, - { - "epoch": 0.9722768250525317, - "grad_norm": 1.9139355370590987, - "learning_rate": 4.00997576237061e-09, - "loss": 1.1377, - "step": 7172 - }, - { - "epoch": 0.9724123907001966, - "grad_norm": 1.5628172468659727, - "learning_rate": 3.970784605625721e-09, - "loss": 1.1064, - "step": 7173 - }, - { - "epoch": 0.9725479563478614, - "grad_norm": 1.6457380420326853, - "learning_rate": 3.931785525252862e-09, - "loss": 1.0957, - "step": 7174 - }, - { - "epoch": 0.9726835219955263, - "grad_norm": 1.4870128775466216, - "learning_rate": 3.892978528772684e-09, - "loss": 1.1079, - "step": 7175 - }, - { - "epoch": 0.9728190876431912, - "grad_norm": 1.8757609147809848, - "learning_rate": 3.854363623668866e-09, - "loss": 1.1187, - "step": 7176 - }, - { - "epoch": 0.9729546532908561, - "grad_norm": 1.43305722069973, - "learning_rate": 3.815940817387786e-09, - "loss": 1.13, - "step": 7177 - }, - { - "epoch": 0.973090218938521, - "grad_norm": 1.7813322059908911, - "learning_rate": 3.777710117339183e-09, - "loss": 1.0977, - "step": 7178 - }, - { - "epoch": 0.9732257845861859, - "grad_norm": 1.9816928543233685, - "learning_rate": 3.739671530895605e-09, - "loss": 1.1264, - "step": 7179 - }, - { - "epoch": 0.9733613502338507, - "grad_norm": 2.443577714971343, - "learning_rate": 3.7018250653921834e-09, - "loss": 1.1205, - "step": 7180 - }, - { - "epoch": 0.9734969158815157, - "grad_norm": 1.698637249444351, - "learning_rate": 3.6641707281276357e-09, - "loss": 1.1235, - "step": 7181 - }, - { - "epoch": 0.9736324815291805, - "grad_norm": 5.880252266730391, - "learning_rate": 3.6267085263631537e-09, - "loss": 1.1034, - "step": 7182 - }, - { - "epoch": 0.9737680471768454, - "grad_norm": 4.669589598593262, - "learning_rate": 3.589438467322958e-09, - "loss": 1.1317, - "step": 7183 - }, - { - "epoch": 0.9739036128245103, - "grad_norm": 1.788161112332895, - "learning_rate": 3.5523605581944115e-09, - "loss": 1.137, - "step": 7184 - }, - { - "epoch": 0.9740391784721751, - "grad_norm": 1.5488471794422265, - "learning_rate": 3.5154748061276828e-09, - "loss": 1.1335, - "step": 7185 - }, - { - "epoch": 0.9741747441198401, - "grad_norm": 2.239395786147528, - "learning_rate": 3.47878121823586e-09, - "loss": 1.1334, - "step": 7186 - }, - { - "epoch": 0.9743103097675049, - "grad_norm": 1.456058777517878, - "learning_rate": 3.4422798015949496e-09, - "loss": 1.1102, - "step": 7187 - }, - { - "epoch": 0.9744458754151698, - "grad_norm": 1.6045105197740614, - "learning_rate": 3.405970563244098e-09, - "loss": 1.1207, - "step": 7188 - }, - { - "epoch": 0.9745814410628347, - "grad_norm": 1.7228629535159992, - "learning_rate": 3.36985351018515e-09, - "loss": 1.0564, - "step": 7189 - }, - { - "epoch": 0.9747170067104995, - "grad_norm": 1.662236536188821, - "learning_rate": 3.3339286493830886e-09, - "loss": 1.1506, - "step": 7190 - }, - { - "epoch": 0.9748525723581645, - "grad_norm": 1.4321641722132308, - "learning_rate": 3.2981959877657063e-09, - "loss": 1.1117, - "step": 7191 - }, - { - "epoch": 0.9749881380058293, - "grad_norm": 1.556961504461836, - "learning_rate": 3.2626555322236014e-09, - "loss": 1.1292, - "step": 7192 - }, - { - "epoch": 0.9751237036534942, - "grad_norm": 1.6177580413065933, - "learning_rate": 3.227307289610737e-09, - "loss": 1.0952, - "step": 7193 - }, - { - "epoch": 0.9752592693011591, - "grad_norm": 2.1008089168008084, - "learning_rate": 3.192151266743548e-09, - "loss": 1.1521, - "step": 7194 - }, - { - "epoch": 0.975394834948824, - "grad_norm": 1.625322566779412, - "learning_rate": 3.157187470401723e-09, - "loss": 1.1471, - "step": 7195 - }, - { - "epoch": 0.9755304005964889, - "grad_norm": 1.5960617355888707, - "learning_rate": 3.122415907327647e-09, - "loss": 1.1465, - "step": 7196 - }, - { - "epoch": 0.9756659662441537, - "grad_norm": 1.6375029793787959, - "learning_rate": 3.0878365842268437e-09, - "loss": 1.1421, - "step": 7197 - }, - { - "epoch": 0.9758015318918186, - "grad_norm": 1.7855815021636277, - "learning_rate": 3.053449507767536e-09, - "loss": 1.1312, - "step": 7198 - }, - { - "epoch": 0.9759370975394835, - "grad_norm": 1.712386526470286, - "learning_rate": 3.019254684581085e-09, - "loss": 1.1495, - "step": 7199 - }, - { - "epoch": 0.9760726631871484, - "grad_norm": 1.5076055258157215, - "learning_rate": 2.985252121261661e-09, - "loss": 1.1272, - "step": 7200 - }, - { - "epoch": 0.9762082288348133, - "grad_norm": 1.500264248540446, - "learning_rate": 2.951441824366463e-09, - "loss": 1.1059, - "step": 7201 - }, - { - "epoch": 0.9763437944824781, - "grad_norm": 2.151528438319856, - "learning_rate": 2.9178238004154975e-09, - "loss": 1.1089, - "step": 7202 - }, - { - "epoch": 0.976479360130143, - "grad_norm": 1.5893647377933937, - "learning_rate": 2.88439805589169e-09, - "loss": 1.1106, - "step": 7203 - }, - { - "epoch": 0.976614925777808, - "grad_norm": 1.4593149579414892, - "learning_rate": 2.851164597240996e-09, - "loss": 1.1258, - "step": 7204 - }, - { - "epoch": 0.9767504914254728, - "grad_norm": 1.4193178224789618, - "learning_rate": 2.8181234308721767e-09, - "loss": 1.0864, - "step": 7205 - }, - { - "epoch": 0.9768860570731377, - "grad_norm": 1.5094774160928792, - "learning_rate": 2.7852745631570253e-09, - "loss": 1.1078, - "step": 7206 - }, - { - "epoch": 0.9770216227208025, - "grad_norm": 1.5768918731611272, - "learning_rate": 2.7526180004300294e-09, - "loss": 1.1299, - "step": 7207 - }, - { - "epoch": 0.9771571883684674, - "grad_norm": 2.1060637361563193, - "learning_rate": 2.720153748988929e-09, - "loss": 1.1225, - "step": 7208 - }, - { - "epoch": 0.9772927540161324, - "grad_norm": 1.5345220052402073, - "learning_rate": 2.6878818150941616e-09, - "loss": 1.1418, - "step": 7209 - }, - { - "epoch": 0.9774283196637972, - "grad_norm": 1.866843451961597, - "learning_rate": 2.655802204968971e-09, - "loss": 1.14, - "step": 7210 - }, - { - "epoch": 0.9775638853114621, - "grad_norm": 3.1083991196602057, - "learning_rate": 2.6239149247999635e-09, - "loss": 1.1099, - "step": 7211 - }, - { - "epoch": 0.9776994509591269, - "grad_norm": 1.821442876582516, - "learning_rate": 2.592219980735999e-09, - "loss": 1.1172, - "step": 7212 - }, - { - "epoch": 0.9778350166067918, - "grad_norm": 1.5928813131612303, - "learning_rate": 2.5607173788894097e-09, - "loss": 1.1307, - "step": 7213 - }, - { - "epoch": 0.9779705822544568, - "grad_norm": 1.8277344017183261, - "learning_rate": 2.5294071253351146e-09, - "loss": 1.1187, - "step": 7214 - }, - { - "epoch": 0.9781061479021216, - "grad_norm": 2.7217508647669266, - "learning_rate": 2.498289226111061e-09, - "loss": 1.1008, - "step": 7215 - }, - { - "epoch": 0.9782417135497865, - "grad_norm": 1.51131839204187, - "learning_rate": 2.467363687218227e-09, - "loss": 1.1568, - "step": 7216 - }, - { - "epoch": 0.9783772791974513, - "grad_norm": 1.7286727654199492, - "learning_rate": 2.436630514620286e-09, - "loss": 1.1038, - "step": 7217 - }, - { - "epoch": 0.9785128448451162, - "grad_norm": 2.5352817004383734, - "learning_rate": 2.4060897142438308e-09, - "loss": 1.1291, - "step": 7218 - }, - { - "epoch": 0.9786484104927812, - "grad_norm": 4.092071001533522, - "learning_rate": 2.3757412919783725e-09, - "loss": 1.1472, - "step": 7219 - }, - { - "epoch": 0.978783976140446, - "grad_norm": 1.6961996712544052, - "learning_rate": 2.345585253676452e-09, - "loss": 1.1348, - "step": 7220 - }, - { - "epoch": 0.9789195417881109, - "grad_norm": 1.4335976369217418, - "learning_rate": 2.3156216051535284e-09, - "loss": 1.0832, - "step": 7221 - }, - { - "epoch": 0.9790551074357757, - "grad_norm": 1.524707655639051, - "learning_rate": 2.285850352187646e-09, - "loss": 1.1088, - "step": 7222 - }, - { - "epoch": 0.9791906730834407, - "grad_norm": 1.713964926537705, - "learning_rate": 2.2562715005201016e-09, - "loss": 1.1282, - "step": 7223 - }, - { - "epoch": 0.9793262387311056, - "grad_norm": 2.334572717755796, - "learning_rate": 2.226885055854777e-09, - "loss": 1.1664, - "step": 7224 - }, - { - "epoch": 0.9794618043787704, - "grad_norm": 1.703105475714722, - "learning_rate": 2.1976910238588055e-09, - "loss": 1.1466, - "step": 7225 - }, - { - "epoch": 0.9795973700264353, - "grad_norm": 1.4889680688647378, - "learning_rate": 2.168689410162017e-09, - "loss": 1.0979, - "step": 7226 - }, - { - "epoch": 0.9797329356741001, - "grad_norm": 2.092883462174913, - "learning_rate": 2.1398802203569375e-09, - "loss": 1.137, - "step": 7227 - }, - { - "epoch": 0.9798685013217651, - "grad_norm": 2.163030028758034, - "learning_rate": 2.111263459999457e-09, - "loss": 1.1038, - "step": 7228 - }, - { - "epoch": 0.98000406696943, - "grad_norm": 1.4846331439874394, - "learning_rate": 2.0828391346078277e-09, - "loss": 1.1189, - "step": 7229 - }, - { - "epoch": 0.9801396326170948, - "grad_norm": 1.7607602515102412, - "learning_rate": 2.054607249663665e-09, - "loss": 1.137, - "step": 7230 - }, - { - "epoch": 0.9802751982647597, - "grad_norm": 1.4728729316444726, - "learning_rate": 2.0265678106111685e-09, - "loss": 1.1491, - "step": 7231 - }, - { - "epoch": 0.9804107639124245, - "grad_norm": 3.402884054796431, - "learning_rate": 1.9987208228575693e-09, - "loss": 1.1125, - "step": 7232 - }, - { - "epoch": 0.9805463295600895, - "grad_norm": 1.8106424077540244, - "learning_rate": 1.971066291772905e-09, - "loss": 1.1457, - "step": 7233 - }, - { - "epoch": 0.9806818952077544, - "grad_norm": 2.1111346591818116, - "learning_rate": 1.9436042226901315e-09, - "loss": 1.1426, - "step": 7234 - }, - { - "epoch": 0.9808174608554192, - "grad_norm": 1.9638507957486508, - "learning_rate": 1.9163346209051246e-09, - "loss": 1.105, - "step": 7235 - }, - { - "epoch": 0.9809530265030841, - "grad_norm": 1.5324211078352512, - "learning_rate": 1.889257491676677e-09, - "loss": 1.0901, - "step": 7236 - }, - { - "epoch": 0.981088592150749, - "grad_norm": 1.4832408769560632, - "learning_rate": 1.8623728402261674e-09, - "loss": 1.0991, - "step": 7237 - }, - { - "epoch": 0.9812241577984139, - "grad_norm": 1.7422072779860218, - "learning_rate": 1.8356806717383377e-09, - "loss": 1.145, - "step": 7238 - }, - { - "epoch": 0.9813597234460788, - "grad_norm": 2.048130029994945, - "learning_rate": 1.809180991360404e-09, - "loss": 1.1252, - "step": 7239 - }, - { - "epoch": 0.9814952890937436, - "grad_norm": 1.9863326046610281, - "learning_rate": 1.7828738042027225e-09, - "loss": 1.1343, - "step": 7240 - }, - { - "epoch": 0.9816308547414085, - "grad_norm": 1.683067046540622, - "learning_rate": 1.7567591153383466e-09, - "loss": 1.1396, - "step": 7241 - }, - { - "epoch": 0.9817664203890734, - "grad_norm": 2.02405019811939, - "learning_rate": 1.7308369298033587e-09, - "loss": 1.1409, - "step": 7242 - }, - { - "epoch": 0.9819019860367383, - "grad_norm": 1.8052261782129018, - "learning_rate": 1.7051072525965382e-09, - "loss": 1.1325, - "step": 7243 - }, - { - "epoch": 0.9820375516844032, - "grad_norm": 1.8465800247652366, - "learning_rate": 1.6795700886798049e-09, - "loss": 1.1197, - "step": 7244 - }, - { - "epoch": 0.982173117332068, - "grad_norm": 2.877977040972751, - "learning_rate": 1.6542254429776636e-09, - "loss": 1.057, - "step": 7245 - }, - { - "epoch": 0.982308682979733, - "grad_norm": 2.2625360932983147, - "learning_rate": 1.6290733203776497e-09, - "loss": 1.1551, - "step": 7246 - }, - { - "epoch": 0.9824442486273978, - "grad_norm": 1.4464863003873267, - "learning_rate": 1.6041137257303272e-09, - "loss": 1.1121, - "step": 7247 - }, - { - "epoch": 0.9825798142750627, - "grad_norm": 1.6434167946249036, - "learning_rate": 1.5793466638486242e-09, - "loss": 1.1344, - "step": 7248 - }, - { - "epoch": 0.9827153799227276, - "grad_norm": 1.5923898962341945, - "learning_rate": 1.554772139509053e-09, - "loss": 1.1072, - "step": 7249 - }, - { - "epoch": 0.9828509455703924, - "grad_norm": 1.4401962007480966, - "learning_rate": 1.5303901574502675e-09, - "loss": 1.0999, - "step": 7250 - }, - { - "epoch": 0.9829865112180574, - "grad_norm": 1.5171494943306998, - "learning_rate": 1.5062007223743956e-09, - "loss": 1.1439, - "step": 7251 - }, - { - "epoch": 0.9831220768657222, - "grad_norm": 1.59426220009692, - "learning_rate": 1.482203838946039e-09, - "loss": 1.1106, - "step": 7252 - }, - { - "epoch": 0.9832576425133871, - "grad_norm": 2.3907140402653617, - "learning_rate": 1.4583995117929404e-09, - "loss": 1.1596, - "step": 7253 - }, - { - "epoch": 0.983393208161052, - "grad_norm": 1.653398457282989, - "learning_rate": 1.434787745505317e-09, - "loss": 1.1459, - "step": 7254 - }, - { - "epoch": 0.9835287738087168, - "grad_norm": 1.7115020176672784, - "learning_rate": 1.4113685446368595e-09, - "loss": 1.0971, - "step": 7255 - }, - { - "epoch": 0.9836643394563818, - "grad_norm": 1.7257730876789483, - "learning_rate": 1.388141913703511e-09, - "loss": 1.1313, - "step": 7256 - }, - { - "epoch": 0.9837999051040466, - "grad_norm": 2.1699962380813838, - "learning_rate": 1.3651078571844664e-09, - "loss": 1.1066, - "step": 7257 - }, - { - "epoch": 0.9839354707517115, - "grad_norm": 1.4911502093815314, - "learning_rate": 1.3422663795215062e-09, - "loss": 1.1194, - "step": 7258 - }, - { - "epoch": 0.9840710363993764, - "grad_norm": 1.6729600800754403, - "learning_rate": 1.3196174851196617e-09, - "loss": 1.1033, - "step": 7259 - }, - { - "epoch": 0.9842066020470412, - "grad_norm": 1.3830231095469774, - "learning_rate": 1.2971611783465507e-09, - "loss": 1.0896, - "step": 7260 - }, - { - "epoch": 0.9843421676947062, - "grad_norm": 3.681292708926958, - "learning_rate": 1.274897463532487e-09, - "loss": 1.1369, - "step": 7261 - }, - { - "epoch": 0.9844777333423711, - "grad_norm": 1.85426889268654, - "learning_rate": 1.2528263449710363e-09, - "loss": 1.1328, - "step": 7262 - }, - { - "epoch": 0.9846132989900359, - "grad_norm": 1.9431162663522894, - "learning_rate": 1.2309478269184602e-09, - "loss": 1.1313, - "step": 7263 - }, - { - "epoch": 0.9847488646377008, - "grad_norm": 1.4773329895320648, - "learning_rate": 1.2092619135937177e-09, - "loss": 1.163, - "step": 7264 - }, - { - "epoch": 0.9848844302853657, - "grad_norm": 1.6405068215678367, - "learning_rate": 1.1877686091787963e-09, - "loss": 1.1068, - "step": 7265 - }, - { - "epoch": 0.9850199959330306, - "grad_norm": 1.6989520793794661, - "learning_rate": 1.1664679178186032e-09, - "loss": 1.0761, - "step": 7266 - }, - { - "epoch": 0.9851555615806955, - "grad_norm": 1.702021528687978, - "learning_rate": 1.1453598436208522e-09, - "loss": 1.1465, - "step": 7267 - }, - { - "epoch": 0.9852911272283603, - "grad_norm": 1.7429534303034626, - "learning_rate": 1.1244443906558432e-09, - "loss": 1.1274, - "step": 7268 - }, - { - "epoch": 0.9854266928760252, - "grad_norm": 1.7406907942027194, - "learning_rate": 1.1037215629571272e-09, - "loss": 1.1362, - "step": 7269 - }, - { - "epoch": 0.9855622585236901, - "grad_norm": 1.8932931960084298, - "learning_rate": 1.0831913645209522e-09, - "loss": 1.1298, - "step": 7270 - }, - { - "epoch": 0.985697824171355, - "grad_norm": 1.7352427386614573, - "learning_rate": 1.0628537993063736e-09, - "loss": 1.1801, - "step": 7271 - }, - { - "epoch": 0.9858333898190199, - "grad_norm": 1.7239920723082987, - "learning_rate": 1.042708871235143e-09, - "loss": 1.1398, - "step": 7272 - }, - { - "epoch": 0.9859689554666847, - "grad_norm": 2.1901440698428303, - "learning_rate": 1.0227565841923746e-09, - "loss": 1.0969, - "step": 7273 - }, - { - "epoch": 0.9861045211143497, - "grad_norm": 1.7106245690977555, - "learning_rate": 1.002996942025547e-09, - "loss": 1.1243, - "step": 7274 - }, - { - "epoch": 0.9862400867620145, - "grad_norm": 1.685459944468041, - "learning_rate": 9.834299485450559e-10, - "loss": 1.1415, - "step": 7275 - }, - { - "epoch": 0.9863756524096794, - "grad_norm": 4.688780571079677, - "learning_rate": 9.640556075244388e-10, - "loss": 1.1465, - "step": 7276 - }, - { - "epoch": 0.9865112180573443, - "grad_norm": 1.7967560376704728, - "learning_rate": 9.448739226997072e-10, - "loss": 1.1331, - "step": 7277 - }, - { - "epoch": 0.9866467837050091, - "grad_norm": 1.928662704425858, - "learning_rate": 9.258848977700129e-10, - "loss": 1.1313, - "step": 7278 - }, - { - "epoch": 0.9867823493526741, - "grad_norm": 1.9613054597838901, - "learning_rate": 9.070885363972047e-10, - "loss": 1.1688, - "step": 7279 - }, - { - "epoch": 0.9869179150003389, - "grad_norm": 1.749659226495413, - "learning_rate": 8.884848422060498e-10, - "loss": 1.1608, - "step": 7280 - }, - { - "epoch": 0.9870534806480038, - "grad_norm": 1.6648974152295442, - "learning_rate": 8.700738187840118e-10, - "loss": 1.1392, - "step": 7281 - }, - { - "epoch": 0.9871890462956687, - "grad_norm": 1.892382211603729, - "learning_rate": 8.518554696815838e-10, - "loss": 1.1627, - "step": 7282 - }, - { - "epoch": 0.9873246119433335, - "grad_norm": 1.6501112844948553, - "learning_rate": 8.338297984121778e-10, - "loss": 1.1088, - "step": 7283 - }, - { - "epoch": 0.9874601775909985, - "grad_norm": 1.7644572051868976, - "learning_rate": 8.159968084515689e-10, - "loss": 1.1083, - "step": 7284 - }, - { - "epoch": 0.9875957432386633, - "grad_norm": 2.6966041013335196, - "learning_rate": 7.983565032390061e-10, - "loss": 1.0914, - "step": 7285 - }, - { - "epoch": 0.9877313088863282, - "grad_norm": 1.7506618671260525, - "learning_rate": 7.809088861762125e-10, - "loss": 1.0988, - "step": 7286 - }, - { - "epoch": 0.9878668745339931, - "grad_norm": 1.5435437759790647, - "learning_rate": 7.636539606277192e-10, - "loss": 1.1741, - "step": 7287 - }, - { - "epoch": 0.988002440181658, - "grad_norm": 1.758252587275288, - "learning_rate": 7.465917299210866e-10, - "loss": 1.1157, - "step": 7288 - }, - { - "epoch": 0.9881380058293229, - "grad_norm": 1.5287575916617557, - "learning_rate": 7.297221973465717e-10, - "loss": 1.1256, - "step": 7289 - }, - { - "epoch": 0.9882735714769877, - "grad_norm": 2.193629185375675, - "learning_rate": 7.130453661573499e-10, - "loss": 1.1283, - "step": 7290 - }, - { - "epoch": 0.9884091371246526, - "grad_norm": 1.6327581284090786, - "learning_rate": 6.965612395695153e-10, - "loss": 1.1579, - "step": 7291 - }, - { - "epoch": 0.9885447027723175, - "grad_norm": 1.5418186971137735, - "learning_rate": 6.802698207617474e-10, - "loss": 1.1256, - "step": 7292 - }, - { - "epoch": 0.9886802684199824, - "grad_norm": 2.863059737573039, - "learning_rate": 6.641711128758665e-10, - "loss": 1.0991, - "step": 7293 - }, - { - "epoch": 0.9888158340676473, - "grad_norm": 1.5473231142088129, - "learning_rate": 6.48265119016278e-10, - "loss": 1.0904, - "step": 7294 - }, - { - "epoch": 0.9889513997153121, - "grad_norm": 1.4516835053908697, - "learning_rate": 6.325518422503063e-10, - "loss": 1.1191, - "step": 7295 - }, - { - "epoch": 0.989086965362977, - "grad_norm": 2.556621016364256, - "learning_rate": 6.170312856083048e-10, - "loss": 1.1471, - "step": 7296 - }, - { - "epoch": 0.989222531010642, - "grad_norm": 1.6607784710513507, - "learning_rate": 6.017034520831021e-10, - "loss": 1.1646, - "step": 7297 - }, - { - "epoch": 0.9893580966583068, - "grad_norm": 1.715507417494631, - "learning_rate": 5.865683446305558e-10, - "loss": 1.1165, - "step": 7298 - }, - { - "epoch": 0.9894936623059717, - "grad_norm": 1.4574846145659213, - "learning_rate": 5.716259661695533e-10, - "loss": 1.1379, - "step": 7299 - }, - { - "epoch": 0.9896292279536365, - "grad_norm": 1.582271575839357, - "learning_rate": 5.568763195813453e-10, - "loss": 1.1191, - "step": 7300 - }, - { - "epoch": 0.9897647936013014, - "grad_norm": 1.8607327955016504, - "learning_rate": 5.423194077104343e-10, - "loss": 1.0755, - "step": 7301 - }, - { - "epoch": 0.9899003592489664, - "grad_norm": 1.8320853283583345, - "learning_rate": 5.279552333640191e-10, - "loss": 1.0832, - "step": 7302 - }, - { - "epoch": 0.9900359248966312, - "grad_norm": 1.4823390583347391, - "learning_rate": 5.137837993121064e-10, - "loss": 1.0983, - "step": 7303 - }, - { - "epoch": 0.9901714905442961, - "grad_norm": 3.715413220303096, - "learning_rate": 4.998051082875099e-10, - "loss": 1.2052, - "step": 7304 - }, - { - "epoch": 0.9903070561919609, - "grad_norm": 2.4589387946918704, - "learning_rate": 4.860191629859623e-10, - "loss": 1.1468, - "step": 7305 - }, - { - "epoch": 0.9904426218396258, - "grad_norm": 1.6344181332801768, - "learning_rate": 4.724259660658924e-10, - "loss": 1.1489, - "step": 7306 - }, - { - "epoch": 0.9905781874872908, - "grad_norm": 2.2737258503824194, - "learning_rate": 4.5902552014864815e-10, - "loss": 1.0974, - "step": 7307 - }, - { - "epoch": 0.9907137531349556, - "grad_norm": 2.105938488139748, - "learning_rate": 4.458178278184954e-10, - "loss": 1.1132, - "step": 7308 - }, - { - "epoch": 0.9908493187826205, - "grad_norm": 2.003598053946252, - "learning_rate": 4.328028916222859e-10, - "loss": 1.1324, - "step": 7309 - }, - { - "epoch": 0.9909848844302853, - "grad_norm": 2.0024953594704162, - "learning_rate": 4.199807140700118e-10, - "loss": 1.1435, - "step": 7310 - }, - { - "epoch": 0.9911204500779502, - "grad_norm": 1.5427283002394452, - "learning_rate": 4.073512976342508e-10, - "loss": 1.101, - "step": 7311 - }, - { - "epoch": 0.9912560157256152, - "grad_norm": 1.9290061735456894, - "learning_rate": 3.9491464475049916e-10, - "loss": 1.1491, - "step": 7312 - }, - { - "epoch": 0.99139158137328, - "grad_norm": 1.6138522356975329, - "learning_rate": 3.826707578170607e-10, - "loss": 1.1297, - "step": 7313 - }, - { - "epoch": 0.9915271470209449, - "grad_norm": 1.5536762712081174, - "learning_rate": 3.7061963919504667e-10, - "loss": 1.1368, - "step": 7314 - }, - { - "epoch": 0.9916627126686097, - "grad_norm": 1.4264188384787424, - "learning_rate": 3.5876129120837596e-10, - "loss": 1.1297, - "step": 7315 - }, - { - "epoch": 0.9917982783162747, - "grad_norm": 1.4121047127966044, - "learning_rate": 3.470957161439969e-10, - "loss": 1.1126, - "step": 7316 - }, - { - "epoch": 0.9919338439639396, - "grad_norm": 1.731028168799319, - "learning_rate": 3.3562291625133245e-10, - "loss": 1.1378, - "step": 7317 - }, - { - "epoch": 0.9920694096116044, - "grad_norm": 1.9230802792603376, - "learning_rate": 3.24342893742946e-10, - "loss": 1.1104, - "step": 7318 - }, - { - "epoch": 0.9922049752592693, - "grad_norm": 1.6136834036223806, - "learning_rate": 3.1325565079409755e-10, - "loss": 1.1243, - "step": 7319 - }, - { - "epoch": 0.9923405409069341, - "grad_norm": 1.4996058577826614, - "learning_rate": 3.023611895428546e-10, - "loss": 1.0996, - "step": 7320 - }, - { - "epoch": 0.9924761065545991, - "grad_norm": 1.6168617449142415, - "learning_rate": 2.9165951209020325e-10, - "loss": 1.1101, - "step": 7321 - }, - { - "epoch": 0.992611672202264, - "grad_norm": 1.686072185851146, - "learning_rate": 2.8115062049971493e-10, - "loss": 1.091, - "step": 7322 - }, - { - "epoch": 0.9927472378499288, - "grad_norm": 1.6629707746149576, - "learning_rate": 2.7083451679799084e-10, - "loss": 1.0818, - "step": 7323 - }, - { - "epoch": 0.9928828034975937, - "grad_norm": 1.7730890475593983, - "learning_rate": 2.6071120297443963e-10, - "loss": 1.1495, - "step": 7324 - }, - { - "epoch": 0.9930183691452585, - "grad_norm": 1.9655778763658645, - "learning_rate": 2.507806809813884e-10, - "loss": 1.1052, - "step": 7325 - }, - { - "epoch": 0.9931539347929235, - "grad_norm": 2.056507731685081, - "learning_rate": 2.410429527336388e-10, - "loss": 1.1324, - "step": 7326 - }, - { - "epoch": 0.9932895004405884, - "grad_norm": 1.4850184006335412, - "learning_rate": 2.3149802010913322e-10, - "loss": 1.137, - "step": 7327 - }, - { - "epoch": 0.9934250660882532, - "grad_norm": 2.0140448826711066, - "learning_rate": 2.221458849486213e-10, - "loss": 1.1509, - "step": 7328 - }, - { - "epoch": 0.9935606317359181, - "grad_norm": 2.232749960251188, - "learning_rate": 2.1298654905543834e-10, - "loss": 1.1046, - "step": 7329 - }, - { - "epoch": 0.993696197383583, - "grad_norm": 1.6397173392746007, - "learning_rate": 2.0402001419594917e-10, - "loss": 1.1108, - "step": 7330 - }, - { - "epoch": 0.9938317630312479, - "grad_norm": 1.6447243925517394, - "learning_rate": 1.9524628209943718e-10, - "loss": 1.1567, - "step": 7331 - }, - { - "epoch": 0.9939673286789128, - "grad_norm": 1.5853965296206602, - "learning_rate": 1.8666535445754917e-10, - "loss": 1.1256, - "step": 7332 - }, - { - "epoch": 0.9941028943265776, - "grad_norm": 1.7118124411706108, - "learning_rate": 1.7827723292518358e-10, - "loss": 1.1203, - "step": 7333 - }, - { - "epoch": 0.9942384599742425, - "grad_norm": 2.803321938934508, - "learning_rate": 1.7008191912004645e-10, - "loss": 1.1252, - "step": 7334 - }, - { - "epoch": 0.9943740256219074, - "grad_norm": 1.6795398287726964, - "learning_rate": 1.6207941462242912e-10, - "loss": 1.1239, - "step": 7335 - }, - { - "epoch": 0.9945095912695723, - "grad_norm": 3.0297576749773687, - "learning_rate": 1.5426972097543068e-10, - "loss": 1.1232, - "step": 7336 - }, - { - "epoch": 0.9946451569172372, - "grad_norm": 1.5705092717887783, - "learning_rate": 1.4665283968529062e-10, - "loss": 1.0783, - "step": 7337 - }, - { - "epoch": 0.994780722564902, - "grad_norm": 1.4910709312915966, - "learning_rate": 1.3922877222083407e-10, - "loss": 1.1426, - "step": 7338 - }, - { - "epoch": 0.994916288212567, - "grad_norm": 1.619772143822431, - "learning_rate": 1.3199752001369359e-10, - "loss": 1.1492, - "step": 7339 - }, - { - "epoch": 0.9950518538602319, - "grad_norm": 5.380196955303005, - "learning_rate": 1.2495908445830928e-10, - "loss": 1.1044, - "step": 7340 - }, - { - "epoch": 0.9951874195078967, - "grad_norm": 1.6881572309655417, - "learning_rate": 1.1811346691203982e-10, - "loss": 1.0976, - "step": 7341 - }, - { - "epoch": 0.9953229851555616, - "grad_norm": 1.8214927865906039, - "learning_rate": 1.1146066869494042e-10, - "loss": 1.1443, - "step": 7342 - }, - { - "epoch": 0.9954585508032264, - "grad_norm": 1.6384553030455569, - "learning_rate": 1.0500069109009579e-10, - "loss": 1.1383, - "step": 7343 - }, - { - "epoch": 0.9955941164508914, - "grad_norm": 1.5121953532912003, - "learning_rate": 9.873353534317619e-11, - "loss": 1.0981, - "step": 7344 - }, - { - "epoch": 0.9957296820985563, - "grad_norm": 2.038036255652666, - "learning_rate": 9.265920266265936e-11, - "loss": 1.1168, - "step": 7345 - }, - { - "epoch": 0.9958652477462211, - "grad_norm": 1.6261583637656811, - "learning_rate": 8.677769422005266e-11, - "loss": 1.1384, - "step": 7346 - }, - { - "epoch": 0.996000813393886, - "grad_norm": 2.1995897481442235, - "learning_rate": 8.108901114955991e-11, - "loss": 1.1236, - "step": 7347 - }, - { - "epoch": 0.9961363790415508, - "grad_norm": 1.6704269673792445, - "learning_rate": 7.559315454819249e-11, - "loss": 1.0988, - "step": 7348 - }, - { - "epoch": 0.9962719446892158, - "grad_norm": 2.1384871906587715, - "learning_rate": 7.029012547576929e-11, - "loss": 1.1313, - "step": 7349 - }, - { - "epoch": 0.9964075103368807, - "grad_norm": 2.811126866614931, - "learning_rate": 6.517992495491676e-11, - "loss": 1.1378, - "step": 7350 - }, - { - "epoch": 0.9965430759845455, - "grad_norm": 2.3065960737960696, - "learning_rate": 6.026255397106884e-11, - "loss": 1.1161, - "step": 7351 - }, - { - "epoch": 0.9966786416322104, - "grad_norm": 1.605578152159696, - "learning_rate": 5.553801347257803e-11, - "loss": 1.1476, - "step": 7352 - }, - { - "epoch": 0.9968142072798752, - "grad_norm": 1.4029879694161946, - "learning_rate": 5.1006304370493355e-11, - "loss": 1.1174, - "step": 7353 - }, - { - "epoch": 0.9969497729275402, - "grad_norm": 2.9832720144266087, - "learning_rate": 4.6667427538782386e-11, - "loss": 1.106, - "step": 7354 - }, - { - "epoch": 0.9970853385752051, - "grad_norm": 1.8103886482639782, - "learning_rate": 4.252138381399817e-11, - "loss": 1.1245, - "step": 7355 - }, - { - "epoch": 0.9972209042228699, - "grad_norm": 1.405773299696744, - "learning_rate": 3.856817399594536e-11, - "loss": 1.1186, - "step": 7356 - }, - { - "epoch": 0.9973564698705348, - "grad_norm": 2.404771544856134, - "learning_rate": 3.4807798846681055e-11, - "loss": 1.1553, - "step": 7357 - }, - { - "epoch": 0.9974920355181996, - "grad_norm": 1.9439275154917557, - "learning_rate": 3.124025909151395e-11, - "loss": 1.148, - "step": 7358 - }, - { - "epoch": 0.9976276011658646, - "grad_norm": 1.7945789729856365, - "learning_rate": 2.7865555418338238e-11, - "loss": 1.1742, - "step": 7359 - }, - { - "epoch": 0.9977631668135295, - "grad_norm": 1.6143669685941981, - "learning_rate": 2.4683688477966647e-11, - "loss": 1.1369, - "step": 7360 - }, - { - "epoch": 0.9978987324611943, - "grad_norm": 2.144184300108407, - "learning_rate": 2.1694658884130468e-11, - "loss": 1.1453, - "step": 7361 - }, - { - "epoch": 0.9980342981088592, - "grad_norm": 1.6121861406230185, - "learning_rate": 1.8898467213146473e-11, - "loss": 1.138, - "step": 7362 - }, - { - "epoch": 0.9981698637565241, - "grad_norm": 1.6096134437939005, - "learning_rate": 1.6295114004138965e-11, - "loss": 1.1529, - "step": 7363 - }, - { - "epoch": 0.998305429404189, - "grad_norm": 2.0572823140665686, - "learning_rate": 1.3884599759261818e-11, - "loss": 1.148, - "step": 7364 - }, - { - "epoch": 0.9984409950518539, - "grad_norm": 1.5146281984850485, - "learning_rate": 1.1666924943254386e-11, - "loss": 1.1227, - "step": 7365 - }, - { - "epoch": 0.9985765606995187, - "grad_norm": 1.7126984056723948, - "learning_rate": 9.642089983885604e-12, - "loss": 1.1393, - "step": 7366 - }, - { - "epoch": 0.9987121263471836, - "grad_norm": 1.7243573323349441, - "learning_rate": 7.810095271620908e-12, - "loss": 1.1261, - "step": 7367 - }, - { - "epoch": 0.9988476919948485, - "grad_norm": 1.5924815091108142, - "learning_rate": 6.170941159733267e-12, - "loss": 1.118, - "step": 7368 - }, - { - "epoch": 0.9989832576425134, - "grad_norm": 1.4906155752891566, - "learning_rate": 4.724627964303174e-12, - "loss": 1.1121, - "step": 7369 - }, - { - "epoch": 0.9991188232901783, - "grad_norm": 1.5462989120298993, - "learning_rate": 3.4711559642186527e-12, - "loss": 1.1042, - "step": 7370 - }, - { - "epoch": 0.9992543889378431, - "grad_norm": 2.7375283696759416, - "learning_rate": 2.4105254012862784e-12, - "loss": 1.0976, - "step": 7371 - }, - { - "epoch": 0.9993899545855081, - "grad_norm": 1.470073145713096, - "learning_rate": 1.5427364800091325e-12, - "loss": 1.1355, - "step": 7372 - }, - { - "epoch": 0.9995255202331729, - "grad_norm": 3.1378205058696995, - "learning_rate": 8.67789367586802e-13, - "loss": 1.1058, - "step": 7373 - }, - { - "epoch": 0.9996610858808378, - "grad_norm": 1.6191020535732017, - "learning_rate": 3.856841943594702e-13, - "loss": 1.1435, - "step": 7374 - }, - { - "epoch": 0.9997966515285027, - "grad_norm": 1.650802410263571, - "learning_rate": 9.642105325280425e-14, - "loss": 1.0968, - "step": 7375 - }, - { - "epoch": 0.9999322171761675, - "grad_norm": 1.660676723056901, - "learning_rate": 0.0, - "loss": 1.1592, - "step": 7376 - }, - { - "epoch": 0.9999322171761675, - "step": 7376, - "total_flos": 4.243207408718971e+17, - "train_loss": 1.182327052824797, - "train_runtime": 84350.6068, - "train_samples_per_second": 8.395, - "train_steps_per_second": 0.087 - } - ], - "logging_steps": 1.0, - "max_steps": 7376, - "num_input_tokens_seen": 0, - "num_train_epochs": 1, - "save_steps": 100, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": true - }, - "attributes": {} - } - }, - "total_flos": 4.243207408718971e+17, - "train_batch_size": 6, - "trial_name": null, - "trial_params": null -} diff --git a/pft/training_args.bin b/pft/training_args.bin deleted file mode 100644 index e25cc564efede918f7519e0466646845ce5f3806..0000000000000000000000000000000000000000 --- a/pft/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f1e11108b9ed715af3ef653385c30fabeab6a4f375c5a8908dd3b347ecff7b7f -size 7416 diff --git a/sft/hyperrouter/added_tokens.json b/sft/hyperrouter/added_tokens.json deleted file mode 100644 index c9d3d3a1b74d87e381e471f7b33784015d2dc0ea..0000000000000000000000000000000000000000 --- a/sft/hyperrouter/added_tokens.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "<|assistant|>": 32001, - "<|endoftext|>": 32000, - "<|end|>": 32007, - "<|placeholder1|>": 32002, - "<|placeholder2|>": 32003, - "<|placeholder3|>": 32004, - "<|placeholder4|>": 32005, - "<|placeholder5|>": 32008, - "<|placeholder6|>": 32009, - "<|system|>": 32006, - "<|user|>": 32010 -} diff --git a/sft/hyperrouter/config.json b/sft/hyperrouter/config.json deleted file mode 100644 index b541313e978cbaea2d0bf8e5d60f65ecaf1ab138..0000000000000000000000000000000000000000 --- a/sft/hyperrouter/config.json +++ /dev/null @@ -1,68 +0,0 @@ -{ - "_name_or_path": "/cm/archive/thongdt4/toolkitmoe/checkpoints/phi3mini-siglip224/pft", - "architectures": [ - "LlavaPhiForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" - }, - "balance_loss_coef": 0.1, - "bos_token_id": 1, - "clip_smoe": true, - "dropout": false, - "embd_pdrop": 0.0, - "eos_token_id": 32000, - "freeze_mm_mlp_adapter": false, - "hidden_act": "silu", - "hidden_size": 3072, - "image_aspect_ratio": "pad", - "initializer_range": 0.02, - "intermediate_size": 8192, - "local_rank": 0, - "max_position_embeddings": 4096, - "mlp_smoe": true, - "mm_hidden_size": 1152, - "mm_patch_merge_type": "flat", - "mm_projector_lr": null, - "mm_projector_type": "moe", - "mm_use_im_patch_token": false, - "mm_use_im_start_end": false, - "mm_vision_select_feature": "patch", - "mm_vision_select_layer": -2, - "mm_vision_tower": "google/siglip-so400m-patch14-224", - "model_type": "llava_phi", - "moe_name": "hyperrouter", - "num_attention_heads": 32, - "num_experts": 4, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "num_layers": 3, - "num_selected": 2, - "original_max_position_embeddings": 4096, - "pad_token_id": 32000, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "router_z_loss_coef": 0.01, - "scales": [ - 1, - 3 - ], - "sliding_window": 2047, - "tie_word_embeddings": false, - "tokenizer_model_max_length": 2048, - "tokenizer_padding_side": "right", - "topk_max": 2, - "topk_min": 1, - "torch_dtype": "bfloat16", - "training": true, - "transformers_version": "4.43.2", - "tune_mm_mlp_adapter": false, - "use_cache": true, - "use_mm_proj": true, - "vocab_size": 32064 -} diff --git a/sft/hyperrouter/generation_config.json b/sft/hyperrouter/generation_config.json deleted file mode 100644 index 3a20824ea777f1ebd11da590160a7209fe3b62c6..0000000000000000000000000000000000000000 --- a/sft/hyperrouter/generation_config.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "_from_model_config": true, - "bos_token_id": 1, - "do_sample": true, - "eos_token_id": [ - 32000, - 32001, - 32007 - ], - "pad_token_id": 32000, - "transformers_version": "4.43.2" -} diff --git a/sft/hyperrouter/model-00001-of-00003.safetensors b/sft/hyperrouter/model-00001-of-00003.safetensors deleted file mode 100644 index 072f6639a482c4ab89c1c4e34eb90182f8bcefc9..0000000000000000000000000000000000000000 --- a/sft/hyperrouter/model-00001-of-00003.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9b4d0844c655591f1968d2ce825d5e82df6c438775c090daee14b7412ccc7055 -size 4972489328 diff --git a/sft/hyperrouter/model-00002-of-00003.safetensors b/sft/hyperrouter/model-00002-of-00003.safetensors deleted file mode 100644 index fe1f39d3a5775abd746dbb96e9664b267f403990..0000000000000000000000000000000000000000 --- a/sft/hyperrouter/model-00002-of-00003.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7ee48788c3759ffa9c2c4ed88ce6ee26a64e9e3c0e1386318292139ce2c500cf -size 4995022432 diff --git a/sft/hyperrouter/model-00003-of-00003.safetensors b/sft/hyperrouter/model-00003-of-00003.safetensors deleted file mode 100644 index 39dff8a3c8090a660479610eeea66711cfe24d0f..0000000000000000000000000000000000000000 --- a/sft/hyperrouter/model-00003-of-00003.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:73a616bad6c479415ca0994b6c542dc194b7ded1ce11a763ca4163704544dd86 -size 342468696 diff --git a/sft/hyperrouter/model.safetensors.index.json b/sft/hyperrouter/model.safetensors.index.json deleted file mode 100644 index db030b0a892103170e46b0e3031402e452e46911..0000000000000000000000000000000000000000 --- a/sft/hyperrouter/model.safetensors.index.json +++ /dev/null @@ -1,1117 +0,0 @@ -{ - "metadata": { - "total_size": 10309820576 - }, - "weight_map": { - "lm_head.weight": "model-00003-of-00003.safetensors", - "model.embed_tokens.weight": "model-00001-of-00003.safetensors", - "model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.0.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.0.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.1.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.1.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.10.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.10.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.10.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.10.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.11.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.11.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.11.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.11.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.12.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.12.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.12.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.12.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.13.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.13.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.13.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.13.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.14.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.14.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.14.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.14.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.15.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.15.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.15.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.15.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.16.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.16.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.16.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.16.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.17.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.17.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.17.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.17.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.18.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.18.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.18.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.18.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.19.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.19.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.19.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.19.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.2.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.2.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.20.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.20.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.20.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.20.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.20.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.21.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.21.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.21.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.22.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.22.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.22.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.22.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.23.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.23.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.23.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.23.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.24.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.24.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.24.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.24.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.25.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.25.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.25.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.25.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.26.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.26.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.26.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.26.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.27.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.27.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.27.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.27.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.28.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.28.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.28.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.28.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.28.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.28.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.29.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.29.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.29.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.29.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.29.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.29.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.3.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.3.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.30.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.30.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.30.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.30.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.30.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.30.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.31.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.31.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.31.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.31.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.31.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.31.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.4.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.4.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.5.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.5.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.6.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.6.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.7.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.7.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.8.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.8.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.9.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.9.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.mm_projector.moelayer.experts.0.0.bias": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.0.0.weight": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.0.2.bias": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.0.2.weight": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.1.0.bias": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.1.0.weight": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.1.2.bias": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.1.2.weight": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.2.0.bias": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.2.0.weight": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.2.2.bias": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.2.2.weight": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.3.0.bias": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.3.0.weight": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.3.2.bias": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.3.2.weight": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.hyper_embedding": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.hypernet.0.bias": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.hypernet.0.weight": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.hypernet.2.bias": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.hypernet.2.weight": "model-00003-of-00003.safetensors", - "model.norm.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.embeddings.patch_embedding.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.embeddings.patch_embedding.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.embeddings.position_embedding.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.hyper_embedding": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.hypernet.0.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.hypernet.0.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.hypernet.2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.hypernet.2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.hyper_embedding": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.hypernet.0.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.hypernet.0.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.hypernet.2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.hypernet.2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.hyper_embedding": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.hypernet.0.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.hypernet.0.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.hypernet.2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.hypernet.2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.hyper_embedding": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.hypernet.0.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.hypernet.0.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.hypernet.2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.hypernet.2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.hyper_embedding": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.hypernet.0.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.hypernet.0.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.hypernet.2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.hypernet.2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.hyper_embedding": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.hypernet.0.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.hypernet.0.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.hypernet.2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.hypernet.2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.hyper_embedding": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.hypernet.0.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.hypernet.0.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.hypernet.2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.hypernet.2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.hyper_embedding": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.hypernet.0.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.hypernet.0.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.hypernet.2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.hypernet.2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.hyper_embedding": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.hypernet.0.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.hypernet.0.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.hypernet.2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.hypernet.2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.hyper_embedding": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.hypernet.0.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.hypernet.0.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.hypernet.2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.hypernet.2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.hyper_embedding": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.hypernet.0.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.hypernet.0.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.hypernet.2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.hypernet.2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.hyper_embedding": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.hypernet.0.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.hypernet.0.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.hypernet.2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.hypernet.2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.hyper_embedding": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.hypernet.0.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.hypernet.0.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.hypernet.2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.hypernet.2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.hyper_embedding": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.hypernet.0.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.hypernet.0.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.hypernet.2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.hypernet.2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.hyper_embedding": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.hypernet.0.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.hypernet.0.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.hypernet.2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.hypernet.2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.hyper_embedding": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.hypernet.0.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.hypernet.0.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.hypernet.2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.hypernet.2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.hyper_embedding": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.hypernet.0.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.hypernet.0.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.hypernet.2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.hypernet.2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.hyper_embedding": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.hypernet.0.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.hypernet.0.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.hypernet.2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.hypernet.2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.hyper_embedding": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.hypernet.0.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.hypernet.0.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.hypernet.2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.hypernet.2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.hyper_embedding": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.hypernet.0.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.hypernet.0.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.hypernet.2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.hypernet.2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.hyper_embedding": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.hypernet.0.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.hypernet.0.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.hypernet.2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.hypernet.2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.hyper_embedding": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.hypernet.0.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.hypernet.0.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.hypernet.2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.hypernet.2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.hyper_embedding": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.hypernet.0.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.hypernet.0.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.hypernet.2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.hypernet.2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.hyper_embedding": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.hypernet.0.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.hypernet.0.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.hypernet.2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.hypernet.2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.hyper_embedding": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.hypernet.0.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.hypernet.0.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.hypernet.2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.hypernet.2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.hyper_embedding": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.hypernet.0.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.hypernet.0.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.hypernet.2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.hypernet.2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.hyper_embedding": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.hypernet.0.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.hypernet.0.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.hypernet.2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.hypernet.2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00002-of-00003.safetensors" - } -} diff --git a/sft/hyperrouter/special_tokens_map.json b/sft/hyperrouter/special_tokens_map.json deleted file mode 100644 index 3e4d5a5bc1cb51753cc9ae0305ece0da60052b10..0000000000000000000000000000000000000000 --- a/sft/hyperrouter/special_tokens_map.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "bos_token": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - }, - "eos_token": { - "content": "<|endoftext|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - }, - "pad_token": "", - "unk_token": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - } -} diff --git a/sft/hyperrouter/tokenizer.model b/sft/hyperrouter/tokenizer.model deleted file mode 100644 index 6c00c742ce03c627d6cd5b795984876fa49fa899..0000000000000000000000000000000000000000 --- a/sft/hyperrouter/tokenizer.model +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 -size 499723 diff --git a/sft/hyperrouter/tokenizer_config.json b/sft/hyperrouter/tokenizer_config.json deleted file mode 100644 index 3bd56c6314b14d6a33a69cd1802e04dbc1e47840..0000000000000000000000000000000000000000 --- a/sft/hyperrouter/tokenizer_config.json +++ /dev/null @@ -1,132 +0,0 @@ -{ - "add_bos_token": true, - "add_eos_token": false, - "add_prefix_space": true, - "added_tokens_decoder": { - "0": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "1": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "2": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": false - }, - "32000": { - "content": "<|endoftext|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "32001": { - "content": "<|assistant|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32002": { - "content": "<|placeholder1|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32003": { - "content": "<|placeholder2|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32004": { - "content": "<|placeholder3|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32005": { - "content": "<|placeholder4|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32006": { - "content": "<|system|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32007": { - "content": "<|end|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32008": { - "content": "<|placeholder5|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32009": { - "content": "<|placeholder6|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32010": { - "content": "<|user|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - } - }, - "bos_token": "", - "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}", - "clean_up_tokenization_spaces": false, - "eos_token": "<|endoftext|>", - "legacy": false, - "model_max_length": 2048, - "pad_token": "", - "padding_side": "right", - "sp_model_kwargs": {}, - "spaces_between_special_tokens": false, - "tokenizer_class": "LlamaTokenizer", - "unk_token": "", - "use_default_system_prompt": false -} diff --git a/sft/hyperrouter/trainer_state.json b/sft/hyperrouter/trainer_state.json deleted file mode 100644 index c2e8264a9d8598980b10f3a8466a29ef4c6ab668..0000000000000000000000000000000000000000 --- a/sft/hyperrouter/trainer_state.json +++ /dev/null @@ -1,66571 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.9999398785546805, - "eval_steps": 500, - "global_step": 8316, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.00012024289063909097, - "grad_norm": 16.327665936250575, - "learning_rate": 1.6e-08, - "loss": 1.4003, - "num_input_tokens_seen": 20095, - "step": 1 - }, - { - "epoch": 0.00024048578127818193, - "grad_norm": 21.93773318199164, - "learning_rate": 3.2e-08, - "loss": 1.466, - "num_input_tokens_seen": 38475, - "step": 2 - }, - { - "epoch": 0.0003607286719172729, - "grad_norm": 16.203408261343995, - "learning_rate": 4.8e-08, - "loss": 1.309, - "num_input_tokens_seen": 56760, - "step": 3 - }, - { - "epoch": 0.00048097156255636386, - "grad_norm": 16.462381655575808, - "learning_rate": 6.4e-08, - "loss": 1.3787, - "num_input_tokens_seen": 75345, - "step": 4 - }, - { - "epoch": 0.0006012144531954548, - "grad_norm": 18.602705072838127, - "learning_rate": 8e-08, - "loss": 1.4706, - "num_input_tokens_seen": 92950, - "step": 5 - }, - { - "epoch": 0.0007214573438345458, - "grad_norm": 18.879179648098496, - "learning_rate": 9.6e-08, - "loss": 1.3408, - "num_input_tokens_seen": 112915, - "step": 6 - }, - { - "epoch": 0.0008417002344736367, - "grad_norm": 3.0044250937303145, - "learning_rate": 1.12e-07, - "loss": 0.7825, - "num_input_tokens_seen": 177630, - "step": 7 - }, - { - "epoch": 0.0009619431251127277, - "grad_norm": 28.316157789661276, - "learning_rate": 1.28e-07, - "loss": 1.5846, - "num_input_tokens_seen": 192850, - "step": 8 - }, - { - "epoch": 0.0010821860157518186, - "grad_norm": 15.87731397869836, - "learning_rate": 1.44e-07, - "loss": 1.5224, - "num_input_tokens_seen": 209115, - "step": 9 - }, - { - "epoch": 0.0012024289063909096, - "grad_norm": 15.283545522446454, - "learning_rate": 1.6e-07, - "loss": 1.5422, - "num_input_tokens_seen": 221905, - "step": 10 - }, - { - "epoch": 0.0013226717970300007, - "grad_norm": 14.726159898300184, - "learning_rate": 1.76e-07, - "loss": 1.3525, - "num_input_tokens_seen": 241555, - "step": 11 - }, - { - "epoch": 0.0014429146876690916, - "grad_norm": 12.070506814161272, - "learning_rate": 1.92e-07, - "loss": 1.2949, - "num_input_tokens_seen": 262405, - "step": 12 - }, - { - "epoch": 0.0015631575783081825, - "grad_norm": 15.037569175387873, - "learning_rate": 2.0799999999999998e-07, - "loss": 1.4472, - "num_input_tokens_seen": 279860, - "step": 13 - }, - { - "epoch": 0.0016834004689472734, - "grad_norm": 18.056233630077188, - "learning_rate": 2.24e-07, - "loss": 1.4063, - "num_input_tokens_seen": 301765, - "step": 14 - }, - { - "epoch": 0.0018036433595863645, - "grad_norm": 19.440810891818074, - "learning_rate": 2.4e-07, - "loss": 1.4412, - "num_input_tokens_seen": 323140, - "step": 15 - }, - { - "epoch": 0.0019238862502254555, - "grad_norm": 3.16247087930905, - "learning_rate": 2.56e-07, - "loss": 0.6797, - "num_input_tokens_seen": 378230, - "step": 16 - }, - { - "epoch": 0.0020441291408645466, - "grad_norm": 12.790671908296467, - "learning_rate": 2.72e-07, - "loss": 1.3939, - "num_input_tokens_seen": 396130, - "step": 17 - }, - { - "epoch": 0.0021643720315036373, - "grad_norm": 3.030801715669472, - "learning_rate": 2.88e-07, - "loss": 0.7646, - "num_input_tokens_seen": 457565, - "step": 18 - }, - { - "epoch": 0.0022846149221427284, - "grad_norm": 17.8083550486306, - "learning_rate": 3.0399999999999997e-07, - "loss": 1.3484, - "num_input_tokens_seen": 477960, - "step": 19 - }, - { - "epoch": 0.002404857812781819, - "grad_norm": 3.3889049949298844, - "learning_rate": 3.2e-07, - "loss": 0.8205, - "num_input_tokens_seen": 532020, - "step": 20 - }, - { - "epoch": 0.0025251007034209102, - "grad_norm": 15.467730560738998, - "learning_rate": 3.36e-07, - "loss": 1.3661, - "num_input_tokens_seen": 549880, - "step": 21 - }, - { - "epoch": 0.0026453435940600014, - "grad_norm": 21.911544960122527, - "learning_rate": 3.52e-07, - "loss": 1.3293, - "num_input_tokens_seen": 572290, - "step": 22 - }, - { - "epoch": 0.002765586484699092, - "grad_norm": 20.69427988265371, - "learning_rate": 3.6799999999999996e-07, - "loss": 1.4861, - "num_input_tokens_seen": 589705, - "step": 23 - }, - { - "epoch": 0.002885829375338183, - "grad_norm": 19.429603528947897, - "learning_rate": 3.84e-07, - "loss": 1.4827, - "num_input_tokens_seen": 608200, - "step": 24 - }, - { - "epoch": 0.003006072265977274, - "grad_norm": 17.7170545526128, - "learning_rate": 4e-07, - "loss": 1.3445, - "num_input_tokens_seen": 629060, - "step": 25 - }, - { - "epoch": 0.003126315156616365, - "grad_norm": 11.960001671848111, - "learning_rate": 4.1599999999999997e-07, - "loss": 1.2165, - "num_input_tokens_seen": 648295, - "step": 26 - }, - { - "epoch": 0.003246558047255456, - "grad_norm": 11.74622932889789, - "learning_rate": 4.3199999999999995e-07, - "loss": 1.2545, - "num_input_tokens_seen": 668170, - "step": 27 - }, - { - "epoch": 0.003366800937894547, - "grad_norm": 14.845658893922659, - "learning_rate": 4.48e-07, - "loss": 1.245, - "num_input_tokens_seen": 686765, - "step": 28 - }, - { - "epoch": 0.003487043828533638, - "grad_norm": 13.403104423982521, - "learning_rate": 4.64e-07, - "loss": 1.3309, - "num_input_tokens_seen": 706220, - "step": 29 - }, - { - "epoch": 0.003607286719172729, - "grad_norm": 11.242430165804379, - "learning_rate": 4.8e-07, - "loss": 1.258, - "num_input_tokens_seen": 726070, - "step": 30 - }, - { - "epoch": 0.00372752960981182, - "grad_norm": 14.977088957468919, - "learning_rate": 4.96e-07, - "loss": 1.279, - "num_input_tokens_seen": 744700, - "step": 31 - }, - { - "epoch": 0.003847772500450911, - "grad_norm": 14.752931309798909, - "learning_rate": 5.12e-07, - "loss": 1.354, - "num_input_tokens_seen": 763145, - "step": 32 - }, - { - "epoch": 0.003968015391090002, - "grad_norm": 13.515572929734024, - "learning_rate": 5.28e-07, - "loss": 1.331, - "num_input_tokens_seen": 779715, - "step": 33 - }, - { - "epoch": 0.004088258281729093, - "grad_norm": 11.770082249868278, - "learning_rate": 5.44e-07, - "loss": 1.1584, - "num_input_tokens_seen": 800710, - "step": 34 - }, - { - "epoch": 0.004208501172368184, - "grad_norm": 8.89325054688529, - "learning_rate": 5.6e-07, - "loss": 1.2711, - "num_input_tokens_seen": 819980, - "step": 35 - }, - { - "epoch": 0.0043287440630072746, - "grad_norm": 8.22879362274236, - "learning_rate": 5.76e-07, - "loss": 1.1883, - "num_input_tokens_seen": 838615, - "step": 36 - }, - { - "epoch": 0.004448986953646365, - "grad_norm": 2.6453796695003007, - "learning_rate": 5.919999999999999e-07, - "loss": 0.7104, - "num_input_tokens_seen": 896370, - "step": 37 - }, - { - "epoch": 0.004569229844285457, - "grad_norm": 7.717980340101501, - "learning_rate": 6.079999999999999e-07, - "loss": 1.2097, - "num_input_tokens_seen": 914485, - "step": 38 - }, - { - "epoch": 0.0046894727349245475, - "grad_norm": 9.793112299185252, - "learning_rate": 6.24e-07, - "loss": 1.1771, - "num_input_tokens_seen": 929050, - "step": 39 - }, - { - "epoch": 0.004809715625563638, - "grad_norm": 9.499707645033508, - "learning_rate": 6.4e-07, - "loss": 1.1611, - "num_input_tokens_seen": 946270, - "step": 40 - }, - { - "epoch": 0.00492995851620273, - "grad_norm": 7.216169069521672, - "learning_rate": 6.56e-07, - "loss": 1.1282, - "num_input_tokens_seen": 964925, - "step": 41 - }, - { - "epoch": 0.0050502014068418205, - "grad_norm": 6.939434270515214, - "learning_rate": 6.72e-07, - "loss": 1.2078, - "num_input_tokens_seen": 982930, - "step": 42 - }, - { - "epoch": 0.005170444297480911, - "grad_norm": 22.099305204217522, - "learning_rate": 6.879999999999999e-07, - "loss": 1.1456, - "num_input_tokens_seen": 1003575, - "step": 43 - }, - { - "epoch": 0.005290687188120003, - "grad_norm": 8.346215739815168, - "learning_rate": 7.04e-07, - "loss": 1.0522, - "num_input_tokens_seen": 1018935, - "step": 44 - }, - { - "epoch": 0.005410930078759093, - "grad_norm": 10.480855956190826, - "learning_rate": 7.2e-07, - "loss": 1.0691, - "num_input_tokens_seen": 1035695, - "step": 45 - }, - { - "epoch": 0.005531172969398184, - "grad_norm": 5.395219573536445, - "learning_rate": 7.359999999999999e-07, - "loss": 1.2168, - "num_input_tokens_seen": 1055045, - "step": 46 - }, - { - "epoch": 0.005651415860037276, - "grad_norm": 5.775332818730067, - "learning_rate": 7.52e-07, - "loss": 1.2571, - "num_input_tokens_seen": 1074900, - "step": 47 - }, - { - "epoch": 0.005771658750676366, - "grad_norm": 7.2837732367086865, - "learning_rate": 7.68e-07, - "loss": 1.2879, - "num_input_tokens_seen": 1090690, - "step": 48 - }, - { - "epoch": 0.005891901641315457, - "grad_norm": 5.24541908377467, - "learning_rate": 7.84e-07, - "loss": 1.1913, - "num_input_tokens_seen": 1109180, - "step": 49 - }, - { - "epoch": 0.006012144531954548, - "grad_norm": 5.952518299615795, - "learning_rate": 8e-07, - "loss": 1.0614, - "num_input_tokens_seen": 1126250, - "step": 50 - }, - { - "epoch": 0.006132387422593639, - "grad_norm": 3.896135622580414, - "learning_rate": 8.159999999999999e-07, - "loss": 1.0546, - "num_input_tokens_seen": 1146190, - "step": 51 - }, - { - "epoch": 0.00625263031323273, - "grad_norm": 41.40049844228182, - "learning_rate": 8.319999999999999e-07, - "loss": 1.1177, - "num_input_tokens_seen": 1163045, - "step": 52 - }, - { - "epoch": 0.006372873203871821, - "grad_norm": 3.8000165962503587, - "learning_rate": 8.48e-07, - "loss": 1.0098, - "num_input_tokens_seen": 1182895, - "step": 53 - }, - { - "epoch": 0.006493116094510912, - "grad_norm": 5.752494032489688, - "learning_rate": 8.639999999999999e-07, - "loss": 1.0687, - "num_input_tokens_seen": 1201215, - "step": 54 - }, - { - "epoch": 0.006613358985150003, - "grad_norm": 4.256298698460216, - "learning_rate": 8.799999999999999e-07, - "loss": 0.9817, - "num_input_tokens_seen": 1219080, - "step": 55 - }, - { - "epoch": 0.006733601875789094, - "grad_norm": 5.819299297378959, - "learning_rate": 8.96e-07, - "loss": 1.0443, - "num_input_tokens_seen": 1238190, - "step": 56 - }, - { - "epoch": 0.006853844766428185, - "grad_norm": 3.690848196547338, - "learning_rate": 9.12e-07, - "loss": 0.9853, - "num_input_tokens_seen": 1254185, - "step": 57 - }, - { - "epoch": 0.006974087657067276, - "grad_norm": 3.0774127965544498, - "learning_rate": 9.28e-07, - "loss": 1.1089, - "num_input_tokens_seen": 1275125, - "step": 58 - }, - { - "epoch": 0.007094330547706367, - "grad_norm": 2.9513806577729804, - "learning_rate": 9.439999999999999e-07, - "loss": 1.026, - "num_input_tokens_seen": 1295015, - "step": 59 - }, - { - "epoch": 0.007214573438345458, - "grad_norm": 3.654693188012229, - "learning_rate": 9.6e-07, - "loss": 1.0673, - "num_input_tokens_seen": 1312620, - "step": 60 - }, - { - "epoch": 0.007334816328984549, - "grad_norm": 3.573621302192546, - "learning_rate": 9.759999999999998e-07, - "loss": 1.068, - "num_input_tokens_seen": 1332885, - "step": 61 - }, - { - "epoch": 0.00745505921962364, - "grad_norm": 3.7230555195724224, - "learning_rate": 9.92e-07, - "loss": 0.8656, - "num_input_tokens_seen": 1351555, - "step": 62 - }, - { - "epoch": 0.007575302110262731, - "grad_norm": 4.149777613467972, - "learning_rate": 1.008e-06, - "loss": 1.0267, - "num_input_tokens_seen": 1369165, - "step": 63 - }, - { - "epoch": 0.007695545000901822, - "grad_norm": 6.0850187219007985, - "learning_rate": 1.024e-06, - "loss": 1.0908, - "num_input_tokens_seen": 1388270, - "step": 64 - }, - { - "epoch": 0.007815787891540913, - "grad_norm": 3.340632782893104, - "learning_rate": 1.04e-06, - "loss": 0.9867, - "num_input_tokens_seen": 1407300, - "step": 65 - }, - { - "epoch": 0.007936030782180003, - "grad_norm": 3.582789825081253, - "learning_rate": 1.056e-06, - "loss": 0.9946, - "num_input_tokens_seen": 1424470, - "step": 66 - }, - { - "epoch": 0.008056273672819095, - "grad_norm": 3.1541221437131344, - "learning_rate": 1.072e-06, - "loss": 0.98, - "num_input_tokens_seen": 1442155, - "step": 67 - }, - { - "epoch": 0.008176516563458186, - "grad_norm": 3.6902406400222256, - "learning_rate": 1.088e-06, - "loss": 1.1719, - "num_input_tokens_seen": 1459345, - "step": 68 - }, - { - "epoch": 0.008296759454097276, - "grad_norm": 3.4214569083423814, - "learning_rate": 1.1040000000000001e-06, - "loss": 1.0531, - "num_input_tokens_seen": 1478285, - "step": 69 - }, - { - "epoch": 0.008417002344736368, - "grad_norm": 4.228524063604063, - "learning_rate": 1.12e-06, - "loss": 0.7166, - "num_input_tokens_seen": 1541550, - "step": 70 - }, - { - "epoch": 0.008537245235375458, - "grad_norm": 2.6260912402022254, - "learning_rate": 1.1359999999999998e-06, - "loss": 1.0047, - "num_input_tokens_seen": 1560725, - "step": 71 - }, - { - "epoch": 0.008657488126014549, - "grad_norm": 2.5369467447112273, - "learning_rate": 1.152e-06, - "loss": 0.6681, - "num_input_tokens_seen": 1627375, - "step": 72 - }, - { - "epoch": 0.00877773101665364, - "grad_norm": 3.1859449969188502, - "learning_rate": 1.1679999999999999e-06, - "loss": 1.0805, - "num_input_tokens_seen": 1646330, - "step": 73 - }, - { - "epoch": 0.00889797390729273, - "grad_norm": 2.8657630964965906, - "learning_rate": 1.1839999999999998e-06, - "loss": 1.0692, - "num_input_tokens_seen": 1664480, - "step": 74 - }, - { - "epoch": 0.009018216797931822, - "grad_norm": 10.61256176415637, - "learning_rate": 1.2e-06, - "loss": 0.9123, - "num_input_tokens_seen": 1680835, - "step": 75 - }, - { - "epoch": 0.009138459688570914, - "grad_norm": 2.331018335054291, - "learning_rate": 1.2159999999999999e-06, - "loss": 1.0267, - "num_input_tokens_seen": 1697135, - "step": 76 - }, - { - "epoch": 0.009258702579210003, - "grad_norm": 2.5651943665540737, - "learning_rate": 1.232e-06, - "loss": 0.8549, - "num_input_tokens_seen": 1717210, - "step": 77 - }, - { - "epoch": 0.009378945469849095, - "grad_norm": 2.765305676893068, - "learning_rate": 1.248e-06, - "loss": 0.9875, - "num_input_tokens_seen": 1736200, - "step": 78 - }, - { - "epoch": 0.009499188360488187, - "grad_norm": 3.820065755361661, - "learning_rate": 1.2639999999999999e-06, - "loss": 0.875, - "num_input_tokens_seen": 1754585, - "step": 79 - }, - { - "epoch": 0.009619431251127276, - "grad_norm": 2.9095283464670327, - "learning_rate": 1.28e-06, - "loss": 0.835, - "num_input_tokens_seen": 1773515, - "step": 80 - }, - { - "epoch": 0.009739674141766368, - "grad_norm": 2.7823775247501747, - "learning_rate": 1.296e-06, - "loss": 0.9545, - "num_input_tokens_seen": 1791375, - "step": 81 - }, - { - "epoch": 0.00985991703240546, - "grad_norm": 2.640773573543164, - "learning_rate": 1.312e-06, - "loss": 0.9682, - "num_input_tokens_seen": 1811575, - "step": 82 - }, - { - "epoch": 0.00998015992304455, - "grad_norm": 3.7630226558544817, - "learning_rate": 1.328e-06, - "loss": 0.9128, - "num_input_tokens_seen": 1828625, - "step": 83 - }, - { - "epoch": 0.010100402813683641, - "grad_norm": 2.856679212156785, - "learning_rate": 1.344e-06, - "loss": 1.047, - "num_input_tokens_seen": 1845020, - "step": 84 - }, - { - "epoch": 0.010220645704322733, - "grad_norm": 2.359491250173105, - "learning_rate": 1.3600000000000001e-06, - "loss": 0.9729, - "num_input_tokens_seen": 1863380, - "step": 85 - }, - { - "epoch": 0.010340888594961822, - "grad_norm": 2.468919473260775, - "learning_rate": 1.3759999999999998e-06, - "loss": 1.0167, - "num_input_tokens_seen": 1880685, - "step": 86 - }, - { - "epoch": 0.010461131485600914, - "grad_norm": 2.3568124171488276, - "learning_rate": 1.3919999999999998e-06, - "loss": 1.0316, - "num_input_tokens_seen": 1898240, - "step": 87 - }, - { - "epoch": 0.010581374376240005, - "grad_norm": 2.2460480078903706, - "learning_rate": 1.408e-06, - "loss": 0.8916, - "num_input_tokens_seen": 1919310, - "step": 88 - }, - { - "epoch": 0.010701617266879095, - "grad_norm": 3.4220106666807295, - "learning_rate": 1.4239999999999998e-06, - "loss": 1.005, - "num_input_tokens_seen": 1937895, - "step": 89 - }, - { - "epoch": 0.010821860157518187, - "grad_norm": 3.3077822718427985, - "learning_rate": 1.44e-06, - "loss": 0.8353, - "num_input_tokens_seen": 1955380, - "step": 90 - }, - { - "epoch": 0.010942103048157278, - "grad_norm": 2.2062997048467285, - "learning_rate": 1.456e-06, - "loss": 0.9095, - "num_input_tokens_seen": 1974835, - "step": 91 - }, - { - "epoch": 0.011062345938796368, - "grad_norm": 2.057796965170375, - "learning_rate": 1.4719999999999998e-06, - "loss": 0.9946, - "num_input_tokens_seen": 1998000, - "step": 92 - }, - { - "epoch": 0.01118258882943546, - "grad_norm": 2.270378394421132, - "learning_rate": 1.488e-06, - "loss": 1.0119, - "num_input_tokens_seen": 2015855, - "step": 93 - }, - { - "epoch": 0.011302831720074551, - "grad_norm": 2.4562066154538016, - "learning_rate": 1.504e-06, - "loss": 0.8891, - "num_input_tokens_seen": 2035320, - "step": 94 - }, - { - "epoch": 0.011423074610713641, - "grad_norm": 3.495932623287209, - "learning_rate": 1.5199999999999998e-06, - "loss": 0.9402, - "num_input_tokens_seen": 2051655, - "step": 95 - }, - { - "epoch": 0.011543317501352733, - "grad_norm": 2.7479802049500868, - "learning_rate": 1.536e-06, - "loss": 0.868, - "num_input_tokens_seen": 2072610, - "step": 96 - }, - { - "epoch": 0.011663560391991824, - "grad_norm": 2.559521939773065, - "learning_rate": 1.552e-06, - "loss": 0.9809, - "num_input_tokens_seen": 2088965, - "step": 97 - }, - { - "epoch": 0.011783803282630914, - "grad_norm": 2.0619497793104133, - "learning_rate": 1.568e-06, - "loss": 0.9235, - "num_input_tokens_seen": 2107395, - "step": 98 - }, - { - "epoch": 0.011904046173270006, - "grad_norm": 3.228180736810277, - "learning_rate": 1.584e-06, - "loss": 1.0747, - "num_input_tokens_seen": 2124690, - "step": 99 - }, - { - "epoch": 0.012024289063909096, - "grad_norm": 4.501754251205946, - "learning_rate": 1.6e-06, - "loss": 1.0256, - "num_input_tokens_seen": 2144090, - "step": 100 - }, - { - "epoch": 0.012144531954548187, - "grad_norm": 3.564414236264299, - "learning_rate": 1.616e-06, - "loss": 1.017, - "num_input_tokens_seen": 2160740, - "step": 101 - }, - { - "epoch": 0.012264774845187279, - "grad_norm": 2.7003108999019814, - "learning_rate": 1.6319999999999998e-06, - "loss": 0.8935, - "num_input_tokens_seen": 2177060, - "step": 102 - }, - { - "epoch": 0.012385017735826368, - "grad_norm": 3.8518331275990394, - "learning_rate": 1.648e-06, - "loss": 0.9754, - "num_input_tokens_seen": 2190160, - "step": 103 - }, - { - "epoch": 0.01250526062646546, - "grad_norm": 2.542467424896015, - "learning_rate": 1.6639999999999999e-06, - "loss": 0.9297, - "num_input_tokens_seen": 2208670, - "step": 104 - }, - { - "epoch": 0.012625503517104552, - "grad_norm": 1.9717915662426795, - "learning_rate": 1.6799999999999998e-06, - "loss": 0.9041, - "num_input_tokens_seen": 2230565, - "step": 105 - }, - { - "epoch": 0.012745746407743641, - "grad_norm": 3.3319317561629416, - "learning_rate": 1.696e-06, - "loss": 0.981, - "num_input_tokens_seen": 2248930, - "step": 106 - }, - { - "epoch": 0.012865989298382733, - "grad_norm": 4.152197578092614, - "learning_rate": 1.7119999999999999e-06, - "loss": 0.9556, - "num_input_tokens_seen": 2267770, - "step": 107 - }, - { - "epoch": 0.012986232189021825, - "grad_norm": 2.4287397190036013, - "learning_rate": 1.7279999999999998e-06, - "loss": 1.0385, - "num_input_tokens_seen": 2285500, - "step": 108 - }, - { - "epoch": 0.013106475079660914, - "grad_norm": 9.633842873328389, - "learning_rate": 1.744e-06, - "loss": 0.9105, - "num_input_tokens_seen": 2303695, - "step": 109 - }, - { - "epoch": 0.013226717970300006, - "grad_norm": 3.3400616670912298, - "learning_rate": 1.7599999999999999e-06, - "loss": 1.0078, - "num_input_tokens_seen": 2322330, - "step": 110 - }, - { - "epoch": 0.013346960860939098, - "grad_norm": 2.070687517532242, - "learning_rate": 1.776e-06, - "loss": 0.8576, - "num_input_tokens_seen": 2342930, - "step": 111 - }, - { - "epoch": 0.013467203751578187, - "grad_norm": 2.174410827059307, - "learning_rate": 1.792e-06, - "loss": 0.8111, - "num_input_tokens_seen": 2363860, - "step": 112 - }, - { - "epoch": 0.013587446642217279, - "grad_norm": 2.7768033722885535, - "learning_rate": 1.8079999999999999e-06, - "loss": 1.0026, - "num_input_tokens_seen": 2381385, - "step": 113 - }, - { - "epoch": 0.01370768953285637, - "grad_norm": 2.1856467022399917, - "learning_rate": 1.824e-06, - "loss": 0.9446, - "num_input_tokens_seen": 2400780, - "step": 114 - }, - { - "epoch": 0.01382793242349546, - "grad_norm": 1.8558621114372431, - "learning_rate": 1.84e-06, - "loss": 0.8301, - "num_input_tokens_seen": 2422820, - "step": 115 - }, - { - "epoch": 0.013948175314134552, - "grad_norm": 2.9502438004652007, - "learning_rate": 1.856e-06, - "loss": 0.9762, - "num_input_tokens_seen": 2443295, - "step": 116 - }, - { - "epoch": 0.014068418204773643, - "grad_norm": 2.608620903998619, - "learning_rate": 1.872e-06, - "loss": 0.9259, - "num_input_tokens_seen": 2463610, - "step": 117 - }, - { - "epoch": 0.014188661095412733, - "grad_norm": 1.7840962844132313, - "learning_rate": 1.8879999999999998e-06, - "loss": 0.9212, - "num_input_tokens_seen": 2484605, - "step": 118 - }, - { - "epoch": 0.014308903986051825, - "grad_norm": 1.8457455302803532, - "learning_rate": 1.904e-06, - "loss": 0.9669, - "num_input_tokens_seen": 2504505, - "step": 119 - }, - { - "epoch": 0.014429146876690916, - "grad_norm": 5.241660970805831, - "learning_rate": 1.92e-06, - "loss": 0.9142, - "num_input_tokens_seen": 2522885, - "step": 120 - }, - { - "epoch": 0.014549389767330006, - "grad_norm": 1.9430717357879224, - "learning_rate": 1.9359999999999998e-06, - "loss": 0.8675, - "num_input_tokens_seen": 2540065, - "step": 121 - }, - { - "epoch": 0.014669632657969098, - "grad_norm": 2.294012710983687, - "learning_rate": 1.9519999999999997e-06, - "loss": 0.9447, - "num_input_tokens_seen": 2558535, - "step": 122 - }, - { - "epoch": 0.01478987554860819, - "grad_norm": 2.5445207274775177, - "learning_rate": 1.968e-06, - "loss": 0.9382, - "num_input_tokens_seen": 2576485, - "step": 123 - }, - { - "epoch": 0.01491011843924728, - "grad_norm": 3.440578373906021, - "learning_rate": 1.984e-06, - "loss": 0.8937, - "num_input_tokens_seen": 2594775, - "step": 124 - }, - { - "epoch": 0.01503036132988637, - "grad_norm": 4.179068113596611, - "learning_rate": 2e-06, - "loss": 0.9264, - "num_input_tokens_seen": 2610070, - "step": 125 - }, - { - "epoch": 0.015150604220525462, - "grad_norm": 2.0109128094553106, - "learning_rate": 2.016e-06, - "loss": 1.0141, - "num_input_tokens_seen": 2628545, - "step": 126 - }, - { - "epoch": 0.015270847111164552, - "grad_norm": 2.4370645100205577, - "learning_rate": 2.0319999999999998e-06, - "loss": 0.8733, - "num_input_tokens_seen": 2649150, - "step": 127 - }, - { - "epoch": 0.015391090001803644, - "grad_norm": 2.924916290529411, - "learning_rate": 2.048e-06, - "loss": 0.8337, - "num_input_tokens_seen": 2671155, - "step": 128 - }, - { - "epoch": 0.015511332892442733, - "grad_norm": 2.1589013481779813, - "learning_rate": 2.064e-06, - "loss": 0.8824, - "num_input_tokens_seen": 2690930, - "step": 129 - }, - { - "epoch": 0.015631575783081827, - "grad_norm": 5.078251625531773, - "learning_rate": 2.08e-06, - "loss": 0.9196, - "num_input_tokens_seen": 2708950, - "step": 130 - }, - { - "epoch": 0.015751818673720917, - "grad_norm": 2.5727333362203932, - "learning_rate": 2.096e-06, - "loss": 0.7704, - "num_input_tokens_seen": 2777515, - "step": 131 - }, - { - "epoch": 0.015872061564360006, - "grad_norm": 1.9660847199014126, - "learning_rate": 2.112e-06, - "loss": 0.8605, - "num_input_tokens_seen": 2797685, - "step": 132 - }, - { - "epoch": 0.0159923044549991, - "grad_norm": 8.993657795989597, - "learning_rate": 2.128e-06, - "loss": 0.9505, - "num_input_tokens_seen": 2816880, - "step": 133 - }, - { - "epoch": 0.01611254734563819, - "grad_norm": 2.6035258687730667, - "learning_rate": 2.144e-06, - "loss": 0.9127, - "num_input_tokens_seen": 2833810, - "step": 134 - }, - { - "epoch": 0.01623279023627728, - "grad_norm": 2.3695092892084557, - "learning_rate": 2.16e-06, - "loss": 0.8116, - "num_input_tokens_seen": 2850270, - "step": 135 - }, - { - "epoch": 0.016353033126916373, - "grad_norm": 2.1015483088611235, - "learning_rate": 2.176e-06, - "loss": 0.8841, - "num_input_tokens_seen": 2868385, - "step": 136 - }, - { - "epoch": 0.016473276017555463, - "grad_norm": 2.033342147665128, - "learning_rate": 2.192e-06, - "loss": 0.8097, - "num_input_tokens_seen": 2886555, - "step": 137 - }, - { - "epoch": 0.016593518908194552, - "grad_norm": 5.374117582207986, - "learning_rate": 2.2080000000000003e-06, - "loss": 0.872, - "num_input_tokens_seen": 2903490, - "step": 138 - }, - { - "epoch": 0.016713761798833646, - "grad_norm": 1.7660412787860578, - "learning_rate": 2.2240000000000002e-06, - "loss": 0.9242, - "num_input_tokens_seen": 2923825, - "step": 139 - }, - { - "epoch": 0.016834004689472735, - "grad_norm": 2.8295424624413834, - "learning_rate": 2.24e-06, - "loss": 0.9521, - "num_input_tokens_seen": 2942625, - "step": 140 - }, - { - "epoch": 0.016954247580111825, - "grad_norm": 2.632831787081112, - "learning_rate": 2.2559999999999997e-06, - "loss": 0.9146, - "num_input_tokens_seen": 2962145, - "step": 141 - }, - { - "epoch": 0.017074490470750915, - "grad_norm": 1.9878878790787027, - "learning_rate": 2.2719999999999996e-06, - "loss": 0.8116, - "num_input_tokens_seen": 2982295, - "step": 142 - }, - { - "epoch": 0.01719473336139001, - "grad_norm": 2.662017608521343, - "learning_rate": 2.2879999999999995e-06, - "loss": 0.9065, - "num_input_tokens_seen": 2999565, - "step": 143 - }, - { - "epoch": 0.017314976252029098, - "grad_norm": 3.3444672360880463, - "learning_rate": 2.304e-06, - "loss": 0.8412, - "num_input_tokens_seen": 3019085, - "step": 144 - }, - { - "epoch": 0.017435219142668188, - "grad_norm": 2.5321708153526066, - "learning_rate": 2.32e-06, - "loss": 1.0048, - "num_input_tokens_seen": 3037890, - "step": 145 - }, - { - "epoch": 0.01755546203330728, - "grad_norm": 2.9943159986207117, - "learning_rate": 2.3359999999999997e-06, - "loss": 0.6683, - "num_input_tokens_seen": 3090875, - "step": 146 - }, - { - "epoch": 0.01767570492394637, - "grad_norm": 21.870716491930853, - "learning_rate": 2.3519999999999997e-06, - "loss": 0.8908, - "num_input_tokens_seen": 3110875, - "step": 147 - }, - { - "epoch": 0.01779594781458546, - "grad_norm": 2.1260132216716134, - "learning_rate": 2.3679999999999996e-06, - "loss": 0.9163, - "num_input_tokens_seen": 3129185, - "step": 148 - }, - { - "epoch": 0.017916190705224554, - "grad_norm": 1.8728693818910558, - "learning_rate": 2.384e-06, - "loss": 0.9325, - "num_input_tokens_seen": 3146435, - "step": 149 - }, - { - "epoch": 0.018036433595863644, - "grad_norm": 2.0437753837505306, - "learning_rate": 2.4e-06, - "loss": 0.8984, - "num_input_tokens_seen": 3167015, - "step": 150 - }, - { - "epoch": 0.018156676486502734, - "grad_norm": 2.291205503660783, - "learning_rate": 2.416e-06, - "loss": 0.9711, - "num_input_tokens_seen": 3182675, - "step": 151 - }, - { - "epoch": 0.018276919377141827, - "grad_norm": 2.0189930539034338, - "learning_rate": 2.4319999999999998e-06, - "loss": 0.8963, - "num_input_tokens_seen": 3203770, - "step": 152 - }, - { - "epoch": 0.018397162267780917, - "grad_norm": 3.091421591798236, - "learning_rate": 2.4479999999999997e-06, - "loss": 0.9463, - "num_input_tokens_seen": 3221450, - "step": 153 - }, - { - "epoch": 0.018517405158420007, - "grad_norm": 3.2727675877354874, - "learning_rate": 2.464e-06, - "loss": 0.9849, - "num_input_tokens_seen": 3243945, - "step": 154 - }, - { - "epoch": 0.0186376480490591, - "grad_norm": 1.7930897339711367, - "learning_rate": 2.48e-06, - "loss": 0.9505, - "num_input_tokens_seen": 3262195, - "step": 155 - }, - { - "epoch": 0.01875789093969819, - "grad_norm": 2.641276078851638, - "learning_rate": 2.496e-06, - "loss": 0.7935, - "num_input_tokens_seen": 3282455, - "step": 156 - }, - { - "epoch": 0.01887813383033728, - "grad_norm": 2.33853955250005, - "learning_rate": 2.512e-06, - "loss": 1.0863, - "num_input_tokens_seen": 3299550, - "step": 157 - }, - { - "epoch": 0.018998376720976373, - "grad_norm": 3.0425475329245555, - "learning_rate": 2.5279999999999998e-06, - "loss": 0.874, - "num_input_tokens_seen": 3317085, - "step": 158 - }, - { - "epoch": 0.019118619611615463, - "grad_norm": 2.533432673460887, - "learning_rate": 2.544e-06, - "loss": 0.9242, - "num_input_tokens_seen": 3333405, - "step": 159 - }, - { - "epoch": 0.019238862502254553, - "grad_norm": 2.068036193078605, - "learning_rate": 2.56e-06, - "loss": 0.9028, - "num_input_tokens_seen": 3351535, - "step": 160 - }, - { - "epoch": 0.019359105392893646, - "grad_norm": 8.547892839133752, - "learning_rate": 2.576e-06, - "loss": 0.928, - "num_input_tokens_seen": 3368525, - "step": 161 - }, - { - "epoch": 0.019479348283532736, - "grad_norm": 3.0155686367278887, - "learning_rate": 2.592e-06, - "loss": 0.8641, - "num_input_tokens_seen": 3384280, - "step": 162 - }, - { - "epoch": 0.019599591174171826, - "grad_norm": 3.029801884905472, - "learning_rate": 2.608e-06, - "loss": 0.9857, - "num_input_tokens_seen": 3402485, - "step": 163 - }, - { - "epoch": 0.01971983406481092, - "grad_norm": 2.5536934830139133, - "learning_rate": 2.624e-06, - "loss": 0.9003, - "num_input_tokens_seen": 3420615, - "step": 164 - }, - { - "epoch": 0.01984007695545001, - "grad_norm": 3.315173991714741, - "learning_rate": 2.64e-06, - "loss": 0.7828, - "num_input_tokens_seen": 3437530, - "step": 165 - }, - { - "epoch": 0.0199603198460891, - "grad_norm": 4.120677509621033, - "learning_rate": 2.656e-06, - "loss": 0.8391, - "num_input_tokens_seen": 3457160, - "step": 166 - }, - { - "epoch": 0.020080562736728192, - "grad_norm": 2.0188829573316327, - "learning_rate": 2.672e-06, - "loss": 0.8717, - "num_input_tokens_seen": 3476250, - "step": 167 - }, - { - "epoch": 0.020200805627367282, - "grad_norm": 2.163243368932872, - "learning_rate": 2.688e-06, - "loss": 1.0022, - "num_input_tokens_seen": 3494085, - "step": 168 - }, - { - "epoch": 0.02032104851800637, - "grad_norm": 2.305938588044211, - "learning_rate": 2.704e-06, - "loss": 0.8716, - "num_input_tokens_seen": 3515380, - "step": 169 - }, - { - "epoch": 0.020441291408645465, - "grad_norm": 2.5376505925090993, - "learning_rate": 2.7200000000000002e-06, - "loss": 0.8751, - "num_input_tokens_seen": 3535190, - "step": 170 - }, - { - "epoch": 0.020561534299284555, - "grad_norm": 2.6634625991119214, - "learning_rate": 2.736e-06, - "loss": 0.8974, - "num_input_tokens_seen": 3553835, - "step": 171 - }, - { - "epoch": 0.020681777189923645, - "grad_norm": 2.0605741545982994, - "learning_rate": 2.7519999999999997e-06, - "loss": 1.0094, - "num_input_tokens_seen": 3572085, - "step": 172 - }, - { - "epoch": 0.020802020080562738, - "grad_norm": 1.7783133406681224, - "learning_rate": 2.7679999999999996e-06, - "loss": 0.8672, - "num_input_tokens_seen": 3593105, - "step": 173 - }, - { - "epoch": 0.020922262971201828, - "grad_norm": 2.7612219369538056, - "learning_rate": 2.7839999999999995e-06, - "loss": 0.9931, - "num_input_tokens_seen": 3611790, - "step": 174 - }, - { - "epoch": 0.021042505861840918, - "grad_norm": 1.7932511617528923, - "learning_rate": 2.8e-06, - "loss": 0.9382, - "num_input_tokens_seen": 3628505, - "step": 175 - }, - { - "epoch": 0.02116274875248001, - "grad_norm": 2.2964324675622207, - "learning_rate": 2.816e-06, - "loss": 0.9283, - "num_input_tokens_seen": 3647480, - "step": 176 - }, - { - "epoch": 0.0212829916431191, - "grad_norm": 8.618171898463725, - "learning_rate": 2.8319999999999997e-06, - "loss": 0.7585, - "num_input_tokens_seen": 3667905, - "step": 177 - }, - { - "epoch": 0.02140323453375819, - "grad_norm": 2.9829833791235507, - "learning_rate": 2.8479999999999997e-06, - "loss": 1.0223, - "num_input_tokens_seen": 3681600, - "step": 178 - }, - { - "epoch": 0.021523477424397284, - "grad_norm": 3.2757078309522942, - "learning_rate": 2.8639999999999996e-06, - "loss": 0.964, - "num_input_tokens_seen": 3694815, - "step": 179 - }, - { - "epoch": 0.021643720315036374, - "grad_norm": 5.576198688542104, - "learning_rate": 2.88e-06, - "loss": 0.9364, - "num_input_tokens_seen": 3712635, - "step": 180 - }, - { - "epoch": 0.021763963205675464, - "grad_norm": 1.8422395700543908, - "learning_rate": 2.896e-06, - "loss": 0.9657, - "num_input_tokens_seen": 3731985, - "step": 181 - }, - { - "epoch": 0.021884206096314557, - "grad_norm": 2.260902259036363, - "learning_rate": 2.912e-06, - "loss": 0.9113, - "num_input_tokens_seen": 3751475, - "step": 182 - }, - { - "epoch": 0.022004448986953647, - "grad_norm": 1.7065035724930062, - "learning_rate": 2.9279999999999997e-06, - "loss": 0.8515, - "num_input_tokens_seen": 3771250, - "step": 183 - }, - { - "epoch": 0.022124691877592736, - "grad_norm": 2.1608266208153526, - "learning_rate": 2.9439999999999997e-06, - "loss": 0.8319, - "num_input_tokens_seen": 3789620, - "step": 184 - }, - { - "epoch": 0.02224493476823183, - "grad_norm": 2.151624959169906, - "learning_rate": 2.96e-06, - "loss": 0.884, - "num_input_tokens_seen": 3810290, - "step": 185 - }, - { - "epoch": 0.02236517765887092, - "grad_norm": 2.7629721655525215, - "learning_rate": 2.976e-06, - "loss": 0.8081, - "num_input_tokens_seen": 3828945, - "step": 186 - }, - { - "epoch": 0.02248542054951001, - "grad_norm": 2.3923748982885664, - "learning_rate": 2.992e-06, - "loss": 0.8103, - "num_input_tokens_seen": 3852950, - "step": 187 - }, - { - "epoch": 0.022605663440149103, - "grad_norm": 2.0384417946723525, - "learning_rate": 3.008e-06, - "loss": 0.8736, - "num_input_tokens_seen": 3871625, - "step": 188 - }, - { - "epoch": 0.022725906330788193, - "grad_norm": 2.0998931061117774, - "learning_rate": 3.0239999999999998e-06, - "loss": 0.9131, - "num_input_tokens_seen": 3891910, - "step": 189 - }, - { - "epoch": 0.022846149221427282, - "grad_norm": 2.6885335566194954, - "learning_rate": 3.0399999999999997e-06, - "loss": 0.806, - "num_input_tokens_seen": 3912535, - "step": 190 - }, - { - "epoch": 0.022966392112066376, - "grad_norm": 2.528212138727624, - "learning_rate": 3.056e-06, - "loss": 0.925, - "num_input_tokens_seen": 3930495, - "step": 191 - }, - { - "epoch": 0.023086635002705466, - "grad_norm": 1.9604148235959722, - "learning_rate": 3.072e-06, - "loss": 0.6824, - "num_input_tokens_seen": 3989680, - "step": 192 - }, - { - "epoch": 0.023206877893344555, - "grad_norm": 2.199692256384985, - "learning_rate": 3.088e-06, - "loss": 0.875, - "num_input_tokens_seen": 4007710, - "step": 193 - }, - { - "epoch": 0.02332712078398365, - "grad_norm": 2.0800724959674075, - "learning_rate": 3.104e-06, - "loss": 0.8934, - "num_input_tokens_seen": 4028025, - "step": 194 - }, - { - "epoch": 0.02344736367462274, - "grad_norm": 2.501253824161815, - "learning_rate": 3.1199999999999998e-06, - "loss": 0.8741, - "num_input_tokens_seen": 4047170, - "step": 195 - }, - { - "epoch": 0.02356760656526183, - "grad_norm": 2.737793027306602, - "learning_rate": 3.136e-06, - "loss": 0.8622, - "num_input_tokens_seen": 4063890, - "step": 196 - }, - { - "epoch": 0.02368784945590092, - "grad_norm": 2.6920416062813595, - "learning_rate": 3.152e-06, - "loss": 0.8675, - "num_input_tokens_seen": 4085950, - "step": 197 - }, - { - "epoch": 0.02380809234654001, - "grad_norm": 2.385608531737704, - "learning_rate": 3.168e-06, - "loss": 0.8189, - "num_input_tokens_seen": 4105990, - "step": 198 - }, - { - "epoch": 0.0239283352371791, - "grad_norm": 4.449159379014996, - "learning_rate": 3.184e-06, - "loss": 0.793, - "num_input_tokens_seen": 4123540, - "step": 199 - }, - { - "epoch": 0.02404857812781819, - "grad_norm": 3.036401726967315, - "learning_rate": 3.2e-06, - "loss": 0.8932, - "num_input_tokens_seen": 4144640, - "step": 200 - }, - { - "epoch": 0.024168821018457284, - "grad_norm": 4.079062602335863, - "learning_rate": 3.216e-06, - "loss": 0.8859, - "num_input_tokens_seen": 4161890, - "step": 201 - }, - { - "epoch": 0.024289063909096374, - "grad_norm": 2.4279335564897044, - "learning_rate": 3.232e-06, - "loss": 0.9, - "num_input_tokens_seen": 4181210, - "step": 202 - }, - { - "epoch": 0.024409306799735464, - "grad_norm": 2.1161649705444936, - "learning_rate": 3.248e-06, - "loss": 0.8027, - "num_input_tokens_seen": 4199145, - "step": 203 - }, - { - "epoch": 0.024529549690374557, - "grad_norm": 3.388974815324258, - "learning_rate": 3.2639999999999996e-06, - "loss": 0.8541, - "num_input_tokens_seen": 4218915, - "step": 204 - }, - { - "epoch": 0.024649792581013647, - "grad_norm": 2.617284163672077, - "learning_rate": 3.2799999999999995e-06, - "loss": 0.9179, - "num_input_tokens_seen": 4238330, - "step": 205 - }, - { - "epoch": 0.024770035471652737, - "grad_norm": 2.1494673801549276, - "learning_rate": 3.296e-06, - "loss": 0.9498, - "num_input_tokens_seen": 4260270, - "step": 206 - }, - { - "epoch": 0.02489027836229183, - "grad_norm": 3.1290271939230396, - "learning_rate": 3.312e-06, - "loss": 0.9356, - "num_input_tokens_seen": 4279645, - "step": 207 - }, - { - "epoch": 0.02501052125293092, - "grad_norm": 1.835330343206158, - "learning_rate": 3.3279999999999997e-06, - "loss": 0.8781, - "num_input_tokens_seen": 4299870, - "step": 208 - }, - { - "epoch": 0.02513076414357001, - "grad_norm": 2.7546319081587716, - "learning_rate": 3.3439999999999997e-06, - "loss": 0.9351, - "num_input_tokens_seen": 4316435, - "step": 209 - }, - { - "epoch": 0.025251007034209103, - "grad_norm": 1.992628943271235, - "learning_rate": 3.3599999999999996e-06, - "loss": 0.947, - "num_input_tokens_seen": 4332735, - "step": 210 - }, - { - "epoch": 0.025371249924848193, - "grad_norm": 2.4268098972266694, - "learning_rate": 3.3759999999999995e-06, - "loss": 0.942, - "num_input_tokens_seen": 4349320, - "step": 211 - }, - { - "epoch": 0.025491492815487283, - "grad_norm": 2.1744302593160585, - "learning_rate": 3.392e-06, - "loss": 0.8166, - "num_input_tokens_seen": 4368480, - "step": 212 - }, - { - "epoch": 0.025611735706126376, - "grad_norm": 2.8711878058016747, - "learning_rate": 3.408e-06, - "loss": 0.9028, - "num_input_tokens_seen": 4387470, - "step": 213 - }, - { - "epoch": 0.025731978596765466, - "grad_norm": 2.269168587715047, - "learning_rate": 3.4239999999999997e-06, - "loss": 0.931, - "num_input_tokens_seen": 4404795, - "step": 214 - }, - { - "epoch": 0.025852221487404556, - "grad_norm": 1.6805178706742634, - "learning_rate": 3.4399999999999997e-06, - "loss": 0.9555, - "num_input_tokens_seen": 4424830, - "step": 215 - }, - { - "epoch": 0.02597246437804365, - "grad_norm": 2.0318722960568776, - "learning_rate": 3.4559999999999996e-06, - "loss": 0.8713, - "num_input_tokens_seen": 4445455, - "step": 216 - }, - { - "epoch": 0.02609270726868274, - "grad_norm": 1.7359027883562923, - "learning_rate": 3.472e-06, - "loss": 0.7779, - "num_input_tokens_seen": 4466285, - "step": 217 - }, - { - "epoch": 0.02621295015932183, - "grad_norm": 2.0583098476347024, - "learning_rate": 3.488e-06, - "loss": 0.8926, - "num_input_tokens_seen": 4485320, - "step": 218 - }, - { - "epoch": 0.026333193049960922, - "grad_norm": 7.091301056826507, - "learning_rate": 3.504e-06, - "loss": 0.7684, - "num_input_tokens_seen": 4504175, - "step": 219 - }, - { - "epoch": 0.026453435940600012, - "grad_norm": 2.115494348025387, - "learning_rate": 3.5199999999999998e-06, - "loss": 0.84, - "num_input_tokens_seen": 4521730, - "step": 220 - }, - { - "epoch": 0.026573678831239102, - "grad_norm": 3.0038967169121604, - "learning_rate": 3.5359999999999997e-06, - "loss": 0.9121, - "num_input_tokens_seen": 4538640, - "step": 221 - }, - { - "epoch": 0.026693921721878195, - "grad_norm": 2.408591637044203, - "learning_rate": 3.552e-06, - "loss": 0.869, - "num_input_tokens_seen": 4555180, - "step": 222 - }, - { - "epoch": 0.026814164612517285, - "grad_norm": 3.1693145932686453, - "learning_rate": 3.568e-06, - "loss": 0.801, - "num_input_tokens_seen": 4568380, - "step": 223 - }, - { - "epoch": 0.026934407503156375, - "grad_norm": 1.838139943358324, - "learning_rate": 3.584e-06, - "loss": 0.9592, - "num_input_tokens_seen": 4589265, - "step": 224 - }, - { - "epoch": 0.027054650393795468, - "grad_norm": 3.157467581599869, - "learning_rate": 3.6e-06, - "loss": 0.8215, - "num_input_tokens_seen": 4609295, - "step": 225 - }, - { - "epoch": 0.027174893284434558, - "grad_norm": 1.9012003366482346, - "learning_rate": 3.6159999999999998e-06, - "loss": 0.8722, - "num_input_tokens_seen": 4632785, - "step": 226 - }, - { - "epoch": 0.027295136175073648, - "grad_norm": 3.346106484239129, - "learning_rate": 3.632e-06, - "loss": 0.7924, - "num_input_tokens_seen": 4652505, - "step": 227 - }, - { - "epoch": 0.02741537906571274, - "grad_norm": 2.1370629666543968, - "learning_rate": 3.648e-06, - "loss": 0.6666, - "num_input_tokens_seen": 4712020, - "step": 228 - }, - { - "epoch": 0.02753562195635183, - "grad_norm": 3.7176260038177706, - "learning_rate": 3.664e-06, - "loss": 0.8575, - "num_input_tokens_seen": 4731715, - "step": 229 - }, - { - "epoch": 0.02765586484699092, - "grad_norm": 2.505145173064042, - "learning_rate": 3.68e-06, - "loss": 0.8268, - "num_input_tokens_seen": 4753060, - "step": 230 - }, - { - "epoch": 0.027776107737630014, - "grad_norm": 2.192119201297155, - "learning_rate": 3.696e-06, - "loss": 0.94, - "num_input_tokens_seen": 4770795, - "step": 231 - }, - { - "epoch": 0.027896350628269104, - "grad_norm": 2.0026943252555833, - "learning_rate": 3.712e-06, - "loss": 0.8762, - "num_input_tokens_seen": 4792800, - "step": 232 - }, - { - "epoch": 0.028016593518908194, - "grad_norm": 2.71220011604548, - "learning_rate": 3.728e-06, - "loss": 0.8741, - "num_input_tokens_seen": 4810615, - "step": 233 - }, - { - "epoch": 0.028136836409547287, - "grad_norm": 3.1390203467971576, - "learning_rate": 3.744e-06, - "loss": 0.8368, - "num_input_tokens_seen": 4827680, - "step": 234 - }, - { - "epoch": 0.028257079300186377, - "grad_norm": 2.3865331315851193, - "learning_rate": 3.7599999999999996e-06, - "loss": 0.8407, - "num_input_tokens_seen": 4847165, - "step": 235 - }, - { - "epoch": 0.028377322190825467, - "grad_norm": 2.3064731188423213, - "learning_rate": 3.7759999999999995e-06, - "loss": 0.8599, - "num_input_tokens_seen": 4867025, - "step": 236 - }, - { - "epoch": 0.02849756508146456, - "grad_norm": 2.4143007919638677, - "learning_rate": 3.7919999999999994e-06, - "loss": 0.9558, - "num_input_tokens_seen": 4883725, - "step": 237 - }, - { - "epoch": 0.02861780797210365, - "grad_norm": 2.160624400580004, - "learning_rate": 3.808e-06, - "loss": 0.8349, - "num_input_tokens_seen": 4901435, - "step": 238 - }, - { - "epoch": 0.02873805086274274, - "grad_norm": 2.4326143267020766, - "learning_rate": 3.823999999999999e-06, - "loss": 0.9177, - "num_input_tokens_seen": 4920435, - "step": 239 - }, - { - "epoch": 0.028858293753381833, - "grad_norm": 2.358785591466701, - "learning_rate": 3.84e-06, - "loss": 0.8991, - "num_input_tokens_seen": 4937480, - "step": 240 - }, - { - "epoch": 0.028978536644020923, - "grad_norm": 2.088270749436349, - "learning_rate": 3.856e-06, - "loss": 0.8987, - "num_input_tokens_seen": 4955485, - "step": 241 - }, - { - "epoch": 0.029098779534660012, - "grad_norm": 2.638493191469904, - "learning_rate": 3.8719999999999995e-06, - "loss": 0.9405, - "num_input_tokens_seen": 4974865, - "step": 242 - }, - { - "epoch": 0.029219022425299106, - "grad_norm": 3.1103169614571122, - "learning_rate": 3.888e-06, - "loss": 0.9672, - "num_input_tokens_seen": 4992340, - "step": 243 - }, - { - "epoch": 0.029339265315938196, - "grad_norm": 3.2183741580010072, - "learning_rate": 3.903999999999999e-06, - "loss": 0.8611, - "num_input_tokens_seen": 5010400, - "step": 244 - }, - { - "epoch": 0.029459508206577285, - "grad_norm": 2.0765174747318538, - "learning_rate": 3.92e-06, - "loss": 0.9179, - "num_input_tokens_seen": 5028005, - "step": 245 - }, - { - "epoch": 0.02957975109721638, - "grad_norm": 9.100222361512776, - "learning_rate": 3.936e-06, - "loss": 0.8626, - "num_input_tokens_seen": 5047405, - "step": 246 - }, - { - "epoch": 0.02969999398785547, - "grad_norm": 1.7199314276763102, - "learning_rate": 3.952e-06, - "loss": 0.8203, - "num_input_tokens_seen": 5067665, - "step": 247 - }, - { - "epoch": 0.02982023687849456, - "grad_norm": 2.5520564078883865, - "learning_rate": 3.968e-06, - "loss": 0.8387, - "num_input_tokens_seen": 5089190, - "step": 248 - }, - { - "epoch": 0.02994047976913365, - "grad_norm": 2.1312146138094055, - "learning_rate": 3.9839999999999995e-06, - "loss": 0.7641, - "num_input_tokens_seen": 5109790, - "step": 249 - }, - { - "epoch": 0.03006072265977274, - "grad_norm": 3.8187510687502186, - "learning_rate": 4e-06, - "loss": 0.8636, - "num_input_tokens_seen": 5129345, - "step": 250 - }, - { - "epoch": 0.03018096555041183, - "grad_norm": 3.0551705792419606, - "learning_rate": 3.999999848300794e-06, - "loss": 0.9252, - "num_input_tokens_seen": 5148050, - "step": 251 - }, - { - "epoch": 0.030301208441050925, - "grad_norm": 3.342254164173398, - "learning_rate": 3.999999393203203e-06, - "loss": 0.9036, - "num_input_tokens_seen": 5170180, - "step": 252 - }, - { - "epoch": 0.030421451331690014, - "grad_norm": 1.801950593877542, - "learning_rate": 3.999998634707293e-06, - "loss": 0.8607, - "num_input_tokens_seen": 5189450, - "step": 253 - }, - { - "epoch": 0.030541694222329104, - "grad_norm": 3.050840993158165, - "learning_rate": 3.999997572813182e-06, - "loss": 0.9817, - "num_input_tokens_seen": 5206980, - "step": 254 - }, - { - "epoch": 0.030661937112968194, - "grad_norm": 2.241968008255119, - "learning_rate": 3.999996207521028e-06, - "loss": 0.8901, - "num_input_tokens_seen": 5225410, - "step": 255 - }, - { - "epoch": 0.030782180003607287, - "grad_norm": 3.246824562679245, - "learning_rate": 3.999994538831039e-06, - "loss": 0.8363, - "num_input_tokens_seen": 5241715, - "step": 256 - }, - { - "epoch": 0.030902422894246377, - "grad_norm": 2.600954218048441, - "learning_rate": 3.99999256674347e-06, - "loss": 0.8627, - "num_input_tokens_seen": 5261585, - "step": 257 - }, - { - "epoch": 0.031022665784885467, - "grad_norm": 1.6355519479195977, - "learning_rate": 3.999990291258618e-06, - "loss": 0.5823, - "num_input_tokens_seen": 5319995, - "step": 258 - }, - { - "epoch": 0.03114290867552456, - "grad_norm": 2.603380132499256, - "learning_rate": 3.999987712376829e-06, - "loss": 0.8719, - "num_input_tokens_seen": 5338035, - "step": 259 - }, - { - "epoch": 0.031263151566163654, - "grad_norm": 2.0922252056673805, - "learning_rate": 3.999984830098494e-06, - "loss": 0.8356, - "num_input_tokens_seen": 5357335, - "step": 260 - }, - { - "epoch": 0.03138339445680274, - "grad_norm": 2.7727989478822352, - "learning_rate": 3.999981644424051e-06, - "loss": 0.9976, - "num_input_tokens_seen": 5371855, - "step": 261 - }, - { - "epoch": 0.03150363734744183, - "grad_norm": 2.262770203824872, - "learning_rate": 3.999978155353982e-06, - "loss": 0.8793, - "num_input_tokens_seen": 5388720, - "step": 262 - }, - { - "epoch": 0.03162388023808092, - "grad_norm": 2.279218115073108, - "learning_rate": 3.9999743628888186e-06, - "loss": 0.8065, - "num_input_tokens_seen": 5410230, - "step": 263 - }, - { - "epoch": 0.03174412312872001, - "grad_norm": 2.193294935735661, - "learning_rate": 3.999970267029133e-06, - "loss": 0.8987, - "num_input_tokens_seen": 5428910, - "step": 264 - }, - { - "epoch": 0.0318643660193591, - "grad_norm": 4.773916362267976, - "learning_rate": 3.999965867775548e-06, - "loss": 0.8056, - "num_input_tokens_seen": 5449025, - "step": 265 - }, - { - "epoch": 0.0319846089099982, - "grad_norm": 3.605386742796827, - "learning_rate": 3.9999611651287315e-06, - "loss": 0.8809, - "num_input_tokens_seen": 5466900, - "step": 266 - }, - { - "epoch": 0.03210485180063729, - "grad_norm": 2.9377025853461185, - "learning_rate": 3.999956159089396e-06, - "loss": 0.8182, - "num_input_tokens_seen": 5484070, - "step": 267 - }, - { - "epoch": 0.03222509469127638, - "grad_norm": 2.71557954022971, - "learning_rate": 3.999950849658302e-06, - "loss": 0.8119, - "num_input_tokens_seen": 5502710, - "step": 268 - }, - { - "epoch": 0.03234533758191547, - "grad_norm": 2.6379679571760133, - "learning_rate": 3.999945236836254e-06, - "loss": 0.8534, - "num_input_tokens_seen": 5521395, - "step": 269 - }, - { - "epoch": 0.03246558047255456, - "grad_norm": 3.064483366059667, - "learning_rate": 3.999939320624103e-06, - "loss": 0.9626, - "num_input_tokens_seen": 5536265, - "step": 270 - }, - { - "epoch": 0.03258582336319365, - "grad_norm": 1.8722943900274187, - "learning_rate": 3.999933101022749e-06, - "loss": 0.9041, - "num_input_tokens_seen": 5556390, - "step": 271 - }, - { - "epoch": 0.032706066253832745, - "grad_norm": 2.0240204821360033, - "learning_rate": 3.999926578033132e-06, - "loss": 0.8716, - "num_input_tokens_seen": 5575925, - "step": 272 - }, - { - "epoch": 0.032826309144471835, - "grad_norm": 2.985965330324514, - "learning_rate": 3.999919751656244e-06, - "loss": 0.6461, - "num_input_tokens_seen": 5602545, - "step": 273 - }, - { - "epoch": 0.032946552035110925, - "grad_norm": 2.428543330489089, - "learning_rate": 3.9999126218931195e-06, - "loss": 0.7741, - "num_input_tokens_seen": 5620300, - "step": 274 - }, - { - "epoch": 0.033066794925750015, - "grad_norm": 2.3527258567159324, - "learning_rate": 3.99990518874484e-06, - "loss": 0.9148, - "num_input_tokens_seen": 5636460, - "step": 275 - }, - { - "epoch": 0.033187037816389105, - "grad_norm": 2.3683697736305787, - "learning_rate": 3.999897452212534e-06, - "loss": 0.9356, - "num_input_tokens_seen": 5653510, - "step": 276 - }, - { - "epoch": 0.033307280707028195, - "grad_norm": 2.2864172720411413, - "learning_rate": 3.999889412297374e-06, - "loss": 1.005, - "num_input_tokens_seen": 5672655, - "step": 277 - }, - { - "epoch": 0.03342752359766729, - "grad_norm": 2.112918349242022, - "learning_rate": 3.999881069000581e-06, - "loss": 0.8048, - "num_input_tokens_seen": 5692105, - "step": 278 - }, - { - "epoch": 0.03354776648830638, - "grad_norm": 2.577115421343023, - "learning_rate": 3.99987242232342e-06, - "loss": 0.895, - "num_input_tokens_seen": 5706830, - "step": 279 - }, - { - "epoch": 0.03366800937894547, - "grad_norm": 3.1148692821528727, - "learning_rate": 3.9998634722672026e-06, - "loss": 0.8087, - "num_input_tokens_seen": 5726605, - "step": 280 - }, - { - "epoch": 0.03378825226958456, - "grad_norm": 2.206459668735334, - "learning_rate": 3.999854218833286e-06, - "loss": 0.8067, - "num_input_tokens_seen": 5747145, - "step": 281 - }, - { - "epoch": 0.03390849516022365, - "grad_norm": 3.7228930099786677, - "learning_rate": 3.999844662023075e-06, - "loss": 0.8417, - "num_input_tokens_seen": 5766740, - "step": 282 - }, - { - "epoch": 0.03402873805086274, - "grad_norm": 1.948517128806398, - "learning_rate": 3.999834801838018e-06, - "loss": 0.9347, - "num_input_tokens_seen": 5785440, - "step": 283 - }, - { - "epoch": 0.03414898094150183, - "grad_norm": 2.294143880301539, - "learning_rate": 3.9998246382796115e-06, - "loss": 0.7589, - "num_input_tokens_seen": 5804740, - "step": 284 - }, - { - "epoch": 0.03426922383214093, - "grad_norm": 2.391251086777688, - "learning_rate": 3.999814171349399e-06, - "loss": 0.9201, - "num_input_tokens_seen": 5822320, - "step": 285 - }, - { - "epoch": 0.03438946672278002, - "grad_norm": 2.1133667002300442, - "learning_rate": 3.9998034010489655e-06, - "loss": 0.7525, - "num_input_tokens_seen": 5845730, - "step": 286 - }, - { - "epoch": 0.03450970961341911, - "grad_norm": 3.314309450055164, - "learning_rate": 3.999792327379946e-06, - "loss": 0.7698, - "num_input_tokens_seen": 5864825, - "step": 287 - }, - { - "epoch": 0.034629952504058197, - "grad_norm": 2.949741690829669, - "learning_rate": 3.999780950344021e-06, - "loss": 0.9856, - "num_input_tokens_seen": 5882735, - "step": 288 - }, - { - "epoch": 0.034750195394697286, - "grad_norm": 1.868232974093652, - "learning_rate": 3.999769269942916e-06, - "loss": 0.8319, - "num_input_tokens_seen": 5902495, - "step": 289 - }, - { - "epoch": 0.034870438285336376, - "grad_norm": 1.9171755697676127, - "learning_rate": 3.999757286178402e-06, - "loss": 0.8277, - "num_input_tokens_seen": 5924650, - "step": 290 - }, - { - "epoch": 0.03499068117597547, - "grad_norm": 1.91351843615992, - "learning_rate": 3.999744999052299e-06, - "loss": 0.9143, - "num_input_tokens_seen": 5945760, - "step": 291 - }, - { - "epoch": 0.03511092406661456, - "grad_norm": 1.4049375044033747, - "learning_rate": 3.9997324085664675e-06, - "loss": 0.6745, - "num_input_tokens_seen": 6005710, - "step": 292 - }, - { - "epoch": 0.03523116695725365, - "grad_norm": 2.8311938625552555, - "learning_rate": 3.999719514722821e-06, - "loss": 0.9253, - "num_input_tokens_seen": 6025560, - "step": 293 - }, - { - "epoch": 0.03535140984789274, - "grad_norm": 2.177330379393852, - "learning_rate": 3.999706317523314e-06, - "loss": 0.7661, - "num_input_tokens_seen": 6043840, - "step": 294 - }, - { - "epoch": 0.03547165273853183, - "grad_norm": 1.9078416401645966, - "learning_rate": 3.999692816969948e-06, - "loss": 0.8689, - "num_input_tokens_seen": 6063095, - "step": 295 - }, - { - "epoch": 0.03559189562917092, - "grad_norm": 1.3296035902349783, - "learning_rate": 3.999679013064772e-06, - "loss": 0.717, - "num_input_tokens_seen": 6129560, - "step": 296 - }, - { - "epoch": 0.03571213851981002, - "grad_norm": 2.5049054173091, - "learning_rate": 3.99966490580988e-06, - "loss": 0.8737, - "num_input_tokens_seen": 6146640, - "step": 297 - }, - { - "epoch": 0.03583238141044911, - "grad_norm": 5.201711075237415, - "learning_rate": 3.999650495207411e-06, - "loss": 0.6842, - "num_input_tokens_seen": 6172385, - "step": 298 - }, - { - "epoch": 0.0359526243010882, - "grad_norm": 2.4201328370170963, - "learning_rate": 3.999635781259553e-06, - "loss": 0.9094, - "num_input_tokens_seen": 6187370, - "step": 299 - }, - { - "epoch": 0.03607286719172729, - "grad_norm": 1.3336515001859994, - "learning_rate": 3.999620763968535e-06, - "loss": 0.5733, - "num_input_tokens_seen": 6245965, - "step": 300 - }, - { - "epoch": 0.03619311008236638, - "grad_norm": 2.287233399432092, - "learning_rate": 3.999605443336638e-06, - "loss": 0.8769, - "num_input_tokens_seen": 6267815, - "step": 301 - }, - { - "epoch": 0.03631335297300547, - "grad_norm": 2.972305278115536, - "learning_rate": 3.999589819366185e-06, - "loss": 0.9133, - "num_input_tokens_seen": 6281325, - "step": 302 - }, - { - "epoch": 0.036433595863644565, - "grad_norm": 1.9292914270325559, - "learning_rate": 3.999573892059547e-06, - "loss": 0.8422, - "num_input_tokens_seen": 6300175, - "step": 303 - }, - { - "epoch": 0.036553838754283655, - "grad_norm": 2.211612608083357, - "learning_rate": 3.999557661419138e-06, - "loss": 0.8193, - "num_input_tokens_seen": 6320045, - "step": 304 - }, - { - "epoch": 0.036674081644922744, - "grad_norm": 2.091846611025725, - "learning_rate": 3.9995411274474225e-06, - "loss": 0.8118, - "num_input_tokens_seen": 6339045, - "step": 305 - }, - { - "epoch": 0.036794324535561834, - "grad_norm": 2.1567298014040865, - "learning_rate": 3.999524290146908e-06, - "loss": 0.8335, - "num_input_tokens_seen": 6358970, - "step": 306 - }, - { - "epoch": 0.036914567426200924, - "grad_norm": 2.545724521004469, - "learning_rate": 3.9995071495201485e-06, - "loss": 0.9203, - "num_input_tokens_seen": 6375795, - "step": 307 - }, - { - "epoch": 0.037034810316840014, - "grad_norm": 2.411243479714235, - "learning_rate": 3.999489705569744e-06, - "loss": 0.9852, - "num_input_tokens_seen": 6393215, - "step": 308 - }, - { - "epoch": 0.03715505320747911, - "grad_norm": 2.0455696199630515, - "learning_rate": 3.999471958298341e-06, - "loss": 0.8827, - "num_input_tokens_seen": 6411845, - "step": 309 - }, - { - "epoch": 0.0372752960981182, - "grad_norm": 1.7629648877215305, - "learning_rate": 3.999453907708631e-06, - "loss": 0.7689, - "num_input_tokens_seen": 6433970, - "step": 310 - }, - { - "epoch": 0.03739553898875729, - "grad_norm": 1.7179807281318327, - "learning_rate": 3.999435553803353e-06, - "loss": 0.8306, - "num_input_tokens_seen": 6453090, - "step": 311 - }, - { - "epoch": 0.03751578187939638, - "grad_norm": 2.9646680862818555, - "learning_rate": 3.999416896585292e-06, - "loss": 0.8518, - "num_input_tokens_seen": 6469840, - "step": 312 - }, - { - "epoch": 0.03763602477003547, - "grad_norm": 4.713428310853323, - "learning_rate": 3.9993979360572775e-06, - "loss": 0.8707, - "num_input_tokens_seen": 6489700, - "step": 313 - }, - { - "epoch": 0.03775626766067456, - "grad_norm": 5.195056705690607, - "learning_rate": 3.999378672222185e-06, - "loss": 0.8437, - "num_input_tokens_seen": 6507205, - "step": 314 - }, - { - "epoch": 0.03787651055131366, - "grad_norm": 2.2639651165503487, - "learning_rate": 3.9993591050829385e-06, - "loss": 0.8406, - "num_input_tokens_seen": 6524790, - "step": 315 - }, - { - "epoch": 0.037996753441952746, - "grad_norm": 1.8829600150311268, - "learning_rate": 3.999339234642506e-06, - "loss": 0.7988, - "num_input_tokens_seen": 6544260, - "step": 316 - }, - { - "epoch": 0.038116996332591836, - "grad_norm": 2.091952206463273, - "learning_rate": 3.9993190609038994e-06, - "loss": 0.8488, - "num_input_tokens_seen": 6562745, - "step": 317 - }, - { - "epoch": 0.038237239223230926, - "grad_norm": 1.8130077445805735, - "learning_rate": 3.999298583870182e-06, - "loss": 0.8513, - "num_input_tokens_seen": 6582050, - "step": 318 - }, - { - "epoch": 0.038357482113870016, - "grad_norm": 2.4000573010845656, - "learning_rate": 3.999277803544458e-06, - "loss": 0.7909, - "num_input_tokens_seen": 6601925, - "step": 319 - }, - { - "epoch": 0.038477725004509106, - "grad_norm": 1.044544796224368, - "learning_rate": 3.999256719929882e-06, - "loss": 0.6733, - "num_input_tokens_seen": 6662920, - "step": 320 - }, - { - "epoch": 0.0385979678951482, - "grad_norm": 1.4062611867353632, - "learning_rate": 3.999235333029651e-06, - "loss": 0.7547, - "num_input_tokens_seen": 6716580, - "step": 321 - }, - { - "epoch": 0.03871821078578729, - "grad_norm": 1.7787580536231962, - "learning_rate": 3.999213642847009e-06, - "loss": 0.8153, - "num_input_tokens_seen": 6736885, - "step": 322 - }, - { - "epoch": 0.03883845367642638, - "grad_norm": 2.3954289062976977, - "learning_rate": 3.999191649385247e-06, - "loss": 0.9194, - "num_input_tokens_seen": 6757780, - "step": 323 - }, - { - "epoch": 0.03895869656706547, - "grad_norm": 1.0371840965369292, - "learning_rate": 3.999169352647702e-06, - "loss": 0.6407, - "num_input_tokens_seen": 6818680, - "step": 324 - }, - { - "epoch": 0.03907893945770456, - "grad_norm": 1.8061138549551585, - "learning_rate": 3.999146752637755e-06, - "loss": 0.8295, - "num_input_tokens_seen": 6839445, - "step": 325 - }, - { - "epoch": 0.03919918234834365, - "grad_norm": 2.3364462339647702, - "learning_rate": 3.999123849358836e-06, - "loss": 0.9128, - "num_input_tokens_seen": 6856830, - "step": 326 - }, - { - "epoch": 0.03931942523898275, - "grad_norm": 2.173017484126889, - "learning_rate": 3.999100642814418e-06, - "loss": 0.761, - "num_input_tokens_seen": 6876990, - "step": 327 - }, - { - "epoch": 0.03943966812962184, - "grad_norm": 2.2828147302825417, - "learning_rate": 3.999077133008022e-06, - "loss": 0.9068, - "num_input_tokens_seen": 6895295, - "step": 328 - }, - { - "epoch": 0.03955991102026093, - "grad_norm": 1.8399729475566025, - "learning_rate": 3.9990533199432145e-06, - "loss": 0.9204, - "num_input_tokens_seen": 6916510, - "step": 329 - }, - { - "epoch": 0.03968015391090002, - "grad_norm": 2.2392228130067715, - "learning_rate": 3.999029203623608e-06, - "loss": 0.7745, - "num_input_tokens_seen": 6933950, - "step": 330 - }, - { - "epoch": 0.03980039680153911, - "grad_norm": 1.9495961584579014, - "learning_rate": 3.99900478405286e-06, - "loss": 0.872, - "num_input_tokens_seen": 6952980, - "step": 331 - }, - { - "epoch": 0.0399206396921782, - "grad_norm": 3.880844829169628, - "learning_rate": 3.998980061234676e-06, - "loss": 0.8359, - "num_input_tokens_seen": 6970615, - "step": 332 - }, - { - "epoch": 0.040040882582817294, - "grad_norm": 2.782601678691449, - "learning_rate": 3.9989550351728055e-06, - "loss": 0.7717, - "num_input_tokens_seen": 6987265, - "step": 333 - }, - { - "epoch": 0.040161125473456384, - "grad_norm": 2.2663500631062363, - "learning_rate": 3.998929705871046e-06, - "loss": 0.8401, - "num_input_tokens_seen": 7004340, - "step": 334 - }, - { - "epoch": 0.040281368364095474, - "grad_norm": 2.498865321604322, - "learning_rate": 3.99890407333324e-06, - "loss": 0.8988, - "num_input_tokens_seen": 7022590, - "step": 335 - }, - { - "epoch": 0.040401611254734564, - "grad_norm": 1.6995487788026244, - "learning_rate": 3.998878137563275e-06, - "loss": 0.8713, - "num_input_tokens_seen": 7041860, - "step": 336 - }, - { - "epoch": 0.040521854145373654, - "grad_norm": 2.7293390219193214, - "learning_rate": 3.998851898565085e-06, - "loss": 0.8619, - "num_input_tokens_seen": 7061385, - "step": 337 - }, - { - "epoch": 0.04064209703601274, - "grad_norm": 3.684342630032326, - "learning_rate": 3.998825356342653e-06, - "loss": 0.8402, - "num_input_tokens_seen": 7081280, - "step": 338 - }, - { - "epoch": 0.04076233992665183, - "grad_norm": 3.0892320542520904, - "learning_rate": 3.998798510900003e-06, - "loss": 0.7385, - "num_input_tokens_seen": 7103800, - "step": 339 - }, - { - "epoch": 0.04088258281729093, - "grad_norm": 3.4568619704056247, - "learning_rate": 3.998771362241207e-06, - "loss": 0.8523, - "num_input_tokens_seen": 7123925, - "step": 340 - }, - { - "epoch": 0.04100282570793002, - "grad_norm": 2.1653040587591437, - "learning_rate": 3.998743910370385e-06, - "loss": 0.8912, - "num_input_tokens_seen": 7142505, - "step": 341 - }, - { - "epoch": 0.04112306859856911, - "grad_norm": 2.07299718987642, - "learning_rate": 3.998716155291702e-06, - "loss": 0.7425, - "num_input_tokens_seen": 7160065, - "step": 342 - }, - { - "epoch": 0.0412433114892082, - "grad_norm": 3.1630435537184276, - "learning_rate": 3.998688097009366e-06, - "loss": 0.917, - "num_input_tokens_seen": 7180550, - "step": 343 - }, - { - "epoch": 0.04136355437984729, - "grad_norm": 5.364945260908992, - "learning_rate": 3.998659735527636e-06, - "loss": 0.8233, - "num_input_tokens_seen": 7199360, - "step": 344 - }, - { - "epoch": 0.04148379727048638, - "grad_norm": 1.96164837181541, - "learning_rate": 3.998631070850813e-06, - "loss": 0.785, - "num_input_tokens_seen": 7219700, - "step": 345 - }, - { - "epoch": 0.041604040161125476, - "grad_norm": 2.402471200441801, - "learning_rate": 3.9986021029832455e-06, - "loss": 0.8411, - "num_input_tokens_seen": 7236735, - "step": 346 - }, - { - "epoch": 0.041724283051764566, - "grad_norm": 2.4751875439210544, - "learning_rate": 3.9985728319293285e-06, - "loss": 0.9197, - "num_input_tokens_seen": 7250430, - "step": 347 - }, - { - "epoch": 0.041844525942403656, - "grad_norm": 2.1605151134772966, - "learning_rate": 3.998543257693501e-06, - "loss": 0.8586, - "num_input_tokens_seen": 7266905, - "step": 348 - }, - { - "epoch": 0.041964768833042745, - "grad_norm": 2.5648519501592015, - "learning_rate": 3.998513380280251e-06, - "loss": 0.8907, - "num_input_tokens_seen": 7286905, - "step": 349 - }, - { - "epoch": 0.042085011723681835, - "grad_norm": 4.04527688897182, - "learning_rate": 3.99848319969411e-06, - "loss": 0.9515, - "num_input_tokens_seen": 7304225, - "step": 350 - }, - { - "epoch": 0.042205254614320925, - "grad_norm": 2.424947683136834, - "learning_rate": 3.9984527159396564e-06, - "loss": 0.7904, - "num_input_tokens_seen": 7322585, - "step": 351 - }, - { - "epoch": 0.04232549750496002, - "grad_norm": 2.100428755097484, - "learning_rate": 3.9984219290215154e-06, - "loss": 0.8378, - "num_input_tokens_seen": 7342480, - "step": 352 - }, - { - "epoch": 0.04244574039559911, - "grad_norm": 1.6392051947588568, - "learning_rate": 3.998390838944356e-06, - "loss": 0.8912, - "num_input_tokens_seen": 7363705, - "step": 353 - }, - { - "epoch": 0.0425659832862382, - "grad_norm": 2.721800287622192, - "learning_rate": 3.998359445712895e-06, - "loss": 0.9063, - "num_input_tokens_seen": 7382530, - "step": 354 - }, - { - "epoch": 0.04268622617687729, - "grad_norm": 2.1972396776567025, - "learning_rate": 3.9983277493318955e-06, - "loss": 0.8206, - "num_input_tokens_seen": 7401545, - "step": 355 - }, - { - "epoch": 0.04280646906751638, - "grad_norm": 1.7791387808058512, - "learning_rate": 3.998295749806165e-06, - "loss": 0.8145, - "num_input_tokens_seen": 7422490, - "step": 356 - }, - { - "epoch": 0.04292671195815547, - "grad_norm": 2.350104202077775, - "learning_rate": 3.998263447140558e-06, - "loss": 0.8487, - "num_input_tokens_seen": 7442410, - "step": 357 - }, - { - "epoch": 0.04304695484879457, - "grad_norm": 2.017620277002353, - "learning_rate": 3.998230841339976e-06, - "loss": 0.8244, - "num_input_tokens_seen": 7464140, - "step": 358 - }, - { - "epoch": 0.04316719773943366, - "grad_norm": 2.085608740652392, - "learning_rate": 3.998197932409363e-06, - "loss": 0.8567, - "num_input_tokens_seen": 7481870, - "step": 359 - }, - { - "epoch": 0.04328744063007275, - "grad_norm": 3.9792600701715806, - "learning_rate": 3.9981647203537125e-06, - "loss": 0.8705, - "num_input_tokens_seen": 7499090, - "step": 360 - }, - { - "epoch": 0.04340768352071184, - "grad_norm": 1.9932683344172748, - "learning_rate": 3.998131205178063e-06, - "loss": 0.9655, - "num_input_tokens_seen": 7517280, - "step": 361 - }, - { - "epoch": 0.04352792641135093, - "grad_norm": 2.51843094924229, - "learning_rate": 3.998097386887498e-06, - "loss": 0.7806, - "num_input_tokens_seen": 7534075, - "step": 362 - }, - { - "epoch": 0.04364816930199002, - "grad_norm": 1.811306405222341, - "learning_rate": 3.998063265487148e-06, - "loss": 0.8502, - "num_input_tokens_seen": 7554845, - "step": 363 - }, - { - "epoch": 0.043768412192629114, - "grad_norm": 1.7882383407992861, - "learning_rate": 3.99802884098219e-06, - "loss": 0.8242, - "num_input_tokens_seen": 7572675, - "step": 364 - }, - { - "epoch": 0.043888655083268203, - "grad_norm": 2.579248167976788, - "learning_rate": 3.997994113377845e-06, - "loss": 0.825, - "num_input_tokens_seen": 7591295, - "step": 365 - }, - { - "epoch": 0.04400889797390729, - "grad_norm": 2.6396282439807726, - "learning_rate": 3.9979590826793815e-06, - "loss": 0.8469, - "num_input_tokens_seen": 7612205, - "step": 366 - }, - { - "epoch": 0.04412914086454638, - "grad_norm": 2.2554866451576343, - "learning_rate": 3.997923748892113e-06, - "loss": 0.8281, - "num_input_tokens_seen": 7631245, - "step": 367 - }, - { - "epoch": 0.04424938375518547, - "grad_norm": 1.6408659123853069, - "learning_rate": 3.9978881120214015e-06, - "loss": 0.8907, - "num_input_tokens_seen": 7652485, - "step": 368 - }, - { - "epoch": 0.04436962664582456, - "grad_norm": 1.9384176610863997, - "learning_rate": 3.997852172072652e-06, - "loss": 0.8049, - "num_input_tokens_seen": 7673420, - "step": 369 - }, - { - "epoch": 0.04448986953646366, - "grad_norm": 2.9730436133467797, - "learning_rate": 3.9978159290513155e-06, - "loss": 0.9066, - "num_input_tokens_seen": 7691220, - "step": 370 - }, - { - "epoch": 0.04461011242710275, - "grad_norm": 1.7167953630057466, - "learning_rate": 3.997779382962892e-06, - "loss": 0.8111, - "num_input_tokens_seen": 7713825, - "step": 371 - }, - { - "epoch": 0.04473035531774184, - "grad_norm": 2.0204905057262006, - "learning_rate": 3.997742533812924e-06, - "loss": 0.753, - "num_input_tokens_seen": 7736810, - "step": 372 - }, - { - "epoch": 0.04485059820838093, - "grad_norm": 2.76087899022977, - "learning_rate": 3.997705381607001e-06, - "loss": 0.9243, - "num_input_tokens_seen": 7753345, - "step": 373 - }, - { - "epoch": 0.04497084109902002, - "grad_norm": 1.1474367204063174, - "learning_rate": 3.997667926350761e-06, - "loss": 0.6379, - "num_input_tokens_seen": 7811395, - "step": 374 - }, - { - "epoch": 0.04509108398965911, - "grad_norm": 1.1441503455875692, - "learning_rate": 3.997630168049886e-06, - "loss": 0.6095, - "num_input_tokens_seen": 7869480, - "step": 375 - }, - { - "epoch": 0.045211326880298205, - "grad_norm": 3.3751714026683666, - "learning_rate": 3.997592106710101e-06, - "loss": 0.786, - "num_input_tokens_seen": 7888660, - "step": 376 - }, - { - "epoch": 0.045331569770937295, - "grad_norm": 3.1353310862799533, - "learning_rate": 3.997553742337182e-06, - "loss": 0.6727, - "num_input_tokens_seen": 7907805, - "step": 377 - }, - { - "epoch": 0.045451812661576385, - "grad_norm": 1.8457131451840292, - "learning_rate": 3.997515074936949e-06, - "loss": 0.9199, - "num_input_tokens_seen": 7928400, - "step": 378 - }, - { - "epoch": 0.045572055552215475, - "grad_norm": 3.0976728941209335, - "learning_rate": 3.997476104515268e-06, - "loss": 0.8734, - "num_input_tokens_seen": 7946310, - "step": 379 - }, - { - "epoch": 0.045692298442854565, - "grad_norm": 2.023514488361353, - "learning_rate": 3.9974368310780485e-06, - "loss": 0.7867, - "num_input_tokens_seen": 7963205, - "step": 380 - }, - { - "epoch": 0.045812541333493655, - "grad_norm": 2.539372544643226, - "learning_rate": 3.997397254631251e-06, - "loss": 0.7518, - "num_input_tokens_seen": 7983545, - "step": 381 - }, - { - "epoch": 0.04593278422413275, - "grad_norm": 0.9979931868716626, - "learning_rate": 3.997357375180878e-06, - "loss": 0.6555, - "num_input_tokens_seen": 8047545, - "step": 382 - }, - { - "epoch": 0.04605302711477184, - "grad_norm": 1.8558539879785372, - "learning_rate": 3.997317192732979e-06, - "loss": 0.7538, - "num_input_tokens_seen": 8066045, - "step": 383 - }, - { - "epoch": 0.04617327000541093, - "grad_norm": 2.165453528024373, - "learning_rate": 3.99727670729365e-06, - "loss": 0.8214, - "num_input_tokens_seen": 8084325, - "step": 384 - }, - { - "epoch": 0.04629351289605002, - "grad_norm": 1.7732804162030202, - "learning_rate": 3.997235918869033e-06, - "loss": 0.7825, - "num_input_tokens_seen": 8105080, - "step": 385 - }, - { - "epoch": 0.04641375578668911, - "grad_norm": 1.9076410478989632, - "learning_rate": 3.997194827465315e-06, - "loss": 0.8391, - "num_input_tokens_seen": 8123395, - "step": 386 - }, - { - "epoch": 0.0465339986773282, - "grad_norm": 2.843945838370176, - "learning_rate": 3.997153433088728e-06, - "loss": 0.929, - "num_input_tokens_seen": 8140240, - "step": 387 - }, - { - "epoch": 0.0466542415679673, - "grad_norm": 2.370632887481483, - "learning_rate": 3.997111735745554e-06, - "loss": 0.8082, - "num_input_tokens_seen": 8162930, - "step": 388 - }, - { - "epoch": 0.04677448445860639, - "grad_norm": 1.9790277251201742, - "learning_rate": 3.997069735442118e-06, - "loss": 0.8385, - "num_input_tokens_seen": 8182345, - "step": 389 - }, - { - "epoch": 0.04689472734924548, - "grad_norm": 1.4445478443904698, - "learning_rate": 3.997027432184792e-06, - "loss": 0.7969, - "num_input_tokens_seen": 8206725, - "step": 390 - }, - { - "epoch": 0.04701497023988457, - "grad_norm": 1.9265438364287897, - "learning_rate": 3.99698482597999e-06, - "loss": 0.8819, - "num_input_tokens_seen": 8224125, - "step": 391 - }, - { - "epoch": 0.04713521313052366, - "grad_norm": 0.9855986454469535, - "learning_rate": 3.99694191683418e-06, - "loss": 0.6634, - "num_input_tokens_seen": 8284645, - "step": 392 - }, - { - "epoch": 0.047255456021162746, - "grad_norm": 2.0243212319592274, - "learning_rate": 3.996898704753867e-06, - "loss": 0.833, - "num_input_tokens_seen": 8302315, - "step": 393 - }, - { - "epoch": 0.04737569891180184, - "grad_norm": 2.4106680007724735, - "learning_rate": 3.996855189745609e-06, - "loss": 0.8927, - "num_input_tokens_seen": 8321300, - "step": 394 - }, - { - "epoch": 0.04749594180244093, - "grad_norm": 2.0119202428119425, - "learning_rate": 3.996811371816007e-06, - "loss": 0.9257, - "num_input_tokens_seen": 8343445, - "step": 395 - }, - { - "epoch": 0.04761618469308002, - "grad_norm": 1.8566418594632506, - "learning_rate": 3.996767250971707e-06, - "loss": 0.7908, - "num_input_tokens_seen": 8365905, - "step": 396 - }, - { - "epoch": 0.04773642758371911, - "grad_norm": 2.000281657982543, - "learning_rate": 3.996722827219403e-06, - "loss": 0.8725, - "num_input_tokens_seen": 8387240, - "step": 397 - }, - { - "epoch": 0.0478566704743582, - "grad_norm": 2.545853532271167, - "learning_rate": 3.996678100565833e-06, - "loss": 0.8245, - "num_input_tokens_seen": 8406015, - "step": 398 - }, - { - "epoch": 0.04797691336499729, - "grad_norm": 2.762848078763912, - "learning_rate": 3.996633071017783e-06, - "loss": 0.8767, - "num_input_tokens_seen": 8422365, - "step": 399 - }, - { - "epoch": 0.04809715625563638, - "grad_norm": 2.342698481896991, - "learning_rate": 3.996587738582084e-06, - "loss": 0.8178, - "num_input_tokens_seen": 8438885, - "step": 400 - }, - { - "epoch": 0.04821739914627548, - "grad_norm": 2.2034345914585676, - "learning_rate": 3.9965421032656115e-06, - "loss": 0.8628, - "num_input_tokens_seen": 8458535, - "step": 401 - }, - { - "epoch": 0.04833764203691457, - "grad_norm": 2.715378292705908, - "learning_rate": 3.99649616507529e-06, - "loss": 0.9508, - "num_input_tokens_seen": 8477350, - "step": 402 - }, - { - "epoch": 0.04845788492755366, - "grad_norm": 1.0300050063232031, - "learning_rate": 3.996449924018088e-06, - "loss": 0.6779, - "num_input_tokens_seen": 8537530, - "step": 403 - }, - { - "epoch": 0.04857812781819275, - "grad_norm": 1.9694461933945187, - "learning_rate": 3.99640338010102e-06, - "loss": 0.8075, - "num_input_tokens_seen": 8556355, - "step": 404 - }, - { - "epoch": 0.04869837070883184, - "grad_norm": 2.128598282821002, - "learning_rate": 3.996356533331146e-06, - "loss": 0.792, - "num_input_tokens_seen": 8577945, - "step": 405 - }, - { - "epoch": 0.04881861359947093, - "grad_norm": 3.753623292226186, - "learning_rate": 3.996309383715573e-06, - "loss": 0.6196, - "num_input_tokens_seen": 8596445, - "step": 406 - }, - { - "epoch": 0.048938856490110025, - "grad_norm": 2.1457829206397796, - "learning_rate": 3.996261931261454e-06, - "loss": 0.7354, - "num_input_tokens_seen": 8614745, - "step": 407 - }, - { - "epoch": 0.049059099380749115, - "grad_norm": 1.7048124938654663, - "learning_rate": 3.996214175975987e-06, - "loss": 0.8649, - "num_input_tokens_seen": 8634985, - "step": 408 - }, - { - "epoch": 0.049179342271388204, - "grad_norm": 2.21201774714938, - "learning_rate": 3.996166117866417e-06, - "loss": 0.7991, - "num_input_tokens_seen": 8656640, - "step": 409 - }, - { - "epoch": 0.049299585162027294, - "grad_norm": 3.4935376058018184, - "learning_rate": 3.996117756940035e-06, - "loss": 0.8726, - "num_input_tokens_seen": 8673045, - "step": 410 - }, - { - "epoch": 0.049419828052666384, - "grad_norm": 2.106591600290592, - "learning_rate": 3.996069093204175e-06, - "loss": 0.9743, - "num_input_tokens_seen": 8688725, - "step": 411 - }, - { - "epoch": 0.049540070943305474, - "grad_norm": 2.481923273742918, - "learning_rate": 3.996020126666221e-06, - "loss": 0.891, - "num_input_tokens_seen": 8705425, - "step": 412 - }, - { - "epoch": 0.04966031383394457, - "grad_norm": 2.916968640282379, - "learning_rate": 3.995970857333601e-06, - "loss": 0.829, - "num_input_tokens_seen": 8725555, - "step": 413 - }, - { - "epoch": 0.04978055672458366, - "grad_norm": 2.038539611976774, - "learning_rate": 3.995921285213789e-06, - "loss": 0.8067, - "num_input_tokens_seen": 8745535, - "step": 414 - }, - { - "epoch": 0.04990079961522275, - "grad_norm": 2.3906471122837547, - "learning_rate": 3.995871410314305e-06, - "loss": 0.8173, - "num_input_tokens_seen": 8763815, - "step": 415 - }, - { - "epoch": 0.05002104250586184, - "grad_norm": 1.0956416157862419, - "learning_rate": 3.995821232642714e-06, - "loss": 0.6606, - "num_input_tokens_seen": 8821940, - "step": 416 - }, - { - "epoch": 0.05014128539650093, - "grad_norm": 2.7368031799510644, - "learning_rate": 3.995770752206629e-06, - "loss": 0.8287, - "num_input_tokens_seen": 8842735, - "step": 417 - }, - { - "epoch": 0.05026152828714002, - "grad_norm": 2.095156556256337, - "learning_rate": 3.995719969013709e-06, - "loss": 0.9669, - "num_input_tokens_seen": 8859635, - "step": 418 - }, - { - "epoch": 0.05038177117777912, - "grad_norm": 2.8008922263705243, - "learning_rate": 3.995668883071655e-06, - "loss": 0.8633, - "num_input_tokens_seen": 8875580, - "step": 419 - }, - { - "epoch": 0.050502014068418206, - "grad_norm": 2.4970550100186437, - "learning_rate": 3.995617494388219e-06, - "loss": 0.9164, - "num_input_tokens_seen": 8893420, - "step": 420 - }, - { - "epoch": 0.050622256959057296, - "grad_norm": 1.9457230550900073, - "learning_rate": 3.995565802971196e-06, - "loss": 0.8125, - "num_input_tokens_seen": 8913115, - "step": 421 - }, - { - "epoch": 0.050742499849696386, - "grad_norm": 2.0390095219509674, - "learning_rate": 3.995513808828427e-06, - "loss": 0.6809, - "num_input_tokens_seen": 8935630, - "step": 422 - }, - { - "epoch": 0.050862742740335476, - "grad_norm": 1.9830020846860377, - "learning_rate": 3.9954615119678e-06, - "loss": 0.7624, - "num_input_tokens_seen": 8953905, - "step": 423 - }, - { - "epoch": 0.050982985630974566, - "grad_norm": 1.9335259142108472, - "learning_rate": 3.995408912397248e-06, - "loss": 0.796, - "num_input_tokens_seen": 8971520, - "step": 424 - }, - { - "epoch": 0.05110322852161366, - "grad_norm": 2.46969107853472, - "learning_rate": 3.99535601012475e-06, - "loss": 0.927, - "num_input_tokens_seen": 8986570, - "step": 425 - }, - { - "epoch": 0.05122347141225275, - "grad_norm": 1.8121846701150468, - "learning_rate": 3.995302805158333e-06, - "loss": 0.7607, - "num_input_tokens_seen": 9008945, - "step": 426 - }, - { - "epoch": 0.05134371430289184, - "grad_norm": 2.1195635279438654, - "learning_rate": 3.9952492975060665e-06, - "loss": 0.8385, - "num_input_tokens_seen": 9028735, - "step": 427 - }, - { - "epoch": 0.05146395719353093, - "grad_norm": 2.498068536352519, - "learning_rate": 3.995195487176067e-06, - "loss": 0.8649, - "num_input_tokens_seen": 9048685, - "step": 428 - }, - { - "epoch": 0.05158420008417002, - "grad_norm": 1.9557368729466096, - "learning_rate": 3.995141374176499e-06, - "loss": 0.8566, - "num_input_tokens_seen": 9066800, - "step": 429 - }, - { - "epoch": 0.05170444297480911, - "grad_norm": 1.0325867068624763, - "learning_rate": 3.995086958515572e-06, - "loss": 0.6815, - "num_input_tokens_seen": 9124540, - "step": 430 - }, - { - "epoch": 0.05182468586544821, - "grad_norm": 1.0132728741287735, - "learning_rate": 3.995032240201538e-06, - "loss": 0.648, - "num_input_tokens_seen": 9186655, - "step": 431 - }, - { - "epoch": 0.0519449287560873, - "grad_norm": 1.1170326233416954, - "learning_rate": 3.9949772192427e-06, - "loss": 0.6585, - "num_input_tokens_seen": 9233000, - "step": 432 - }, - { - "epoch": 0.05206517164672639, - "grad_norm": 2.0169408873886088, - "learning_rate": 3.994921895647405e-06, - "loss": 0.8027, - "num_input_tokens_seen": 9250890, - "step": 433 - }, - { - "epoch": 0.05218541453736548, - "grad_norm": 0.8909107788343686, - "learning_rate": 3.994866269424043e-06, - "loss": 0.5729, - "num_input_tokens_seen": 9306980, - "step": 434 - }, - { - "epoch": 0.05230565742800457, - "grad_norm": 2.2576947765939206, - "learning_rate": 3.9948103405810545e-06, - "loss": 0.783, - "num_input_tokens_seen": 9325650, - "step": 435 - }, - { - "epoch": 0.05242590031864366, - "grad_norm": 2.022624134660986, - "learning_rate": 3.994754109126923e-06, - "loss": 0.8571, - "num_input_tokens_seen": 9346865, - "step": 436 - }, - { - "epoch": 0.052546143209282754, - "grad_norm": 1.742196396548821, - "learning_rate": 3.994697575070181e-06, - "loss": 0.9278, - "num_input_tokens_seen": 9366045, - "step": 437 - }, - { - "epoch": 0.052666386099921844, - "grad_norm": 2.9849960890835567, - "learning_rate": 3.994640738419402e-06, - "loss": 0.909, - "num_input_tokens_seen": 9385140, - "step": 438 - }, - { - "epoch": 0.052786628990560934, - "grad_norm": 1.9772482984084772, - "learning_rate": 3.9945835991832075e-06, - "loss": 0.8034, - "num_input_tokens_seen": 9406745, - "step": 439 - }, - { - "epoch": 0.052906871881200024, - "grad_norm": 2.5061167807327234, - "learning_rate": 3.994526157370268e-06, - "loss": 0.9321, - "num_input_tokens_seen": 9425080, - "step": 440 - }, - { - "epoch": 0.053027114771839114, - "grad_norm": 0.9495371009024924, - "learning_rate": 3.994468412989296e-06, - "loss": 0.6172, - "num_input_tokens_seen": 9486210, - "step": 441 - }, - { - "epoch": 0.053147357662478203, - "grad_norm": 2.0882140774216134, - "learning_rate": 3.994410366049052e-06, - "loss": 0.9484, - "num_input_tokens_seen": 9503790, - "step": 442 - }, - { - "epoch": 0.0532676005531173, - "grad_norm": 2.3515828496158893, - "learning_rate": 3.994352016558341e-06, - "loss": 0.8394, - "num_input_tokens_seen": 9520815, - "step": 443 - }, - { - "epoch": 0.05338784344375639, - "grad_norm": 2.1256973121918388, - "learning_rate": 3.994293364526014e-06, - "loss": 0.7441, - "num_input_tokens_seen": 9541420, - "step": 444 - }, - { - "epoch": 0.05350808633439548, - "grad_norm": 2.153764930048083, - "learning_rate": 3.99423440996097e-06, - "loss": 0.8435, - "num_input_tokens_seen": 9560680, - "step": 445 - }, - { - "epoch": 0.05362832922503457, - "grad_norm": 4.727095064478514, - "learning_rate": 3.994175152872152e-06, - "loss": 0.8155, - "num_input_tokens_seen": 9579485, - "step": 446 - }, - { - "epoch": 0.05374857211567366, - "grad_norm": 2.2550071022204383, - "learning_rate": 3.994115593268548e-06, - "loss": 0.7971, - "num_input_tokens_seen": 9598985, - "step": 447 - }, - { - "epoch": 0.05386881500631275, - "grad_norm": 2.0551462595466172, - "learning_rate": 3.994055731159195e-06, - "loss": 0.8194, - "num_input_tokens_seen": 9616175, - "step": 448 - }, - { - "epoch": 0.053989057896951846, - "grad_norm": 1.884791025630162, - "learning_rate": 3.993995566553172e-06, - "loss": 0.8782, - "num_input_tokens_seen": 9634860, - "step": 449 - }, - { - "epoch": 0.054109300787590936, - "grad_norm": 1.715468162095248, - "learning_rate": 3.993935099459607e-06, - "loss": 0.7708, - "num_input_tokens_seen": 9656195, - "step": 450 - }, - { - "epoch": 0.054229543678230026, - "grad_norm": 2.0018991408391567, - "learning_rate": 3.993874329887673e-06, - "loss": 0.75, - "num_input_tokens_seen": 9674570, - "step": 451 - }, - { - "epoch": 0.054349786568869116, - "grad_norm": 2.5245653679969604, - "learning_rate": 3.993813257846589e-06, - "loss": 0.8651, - "num_input_tokens_seen": 9691045, - "step": 452 - }, - { - "epoch": 0.054470029459508205, - "grad_norm": 5.073426511016169, - "learning_rate": 3.993751883345619e-06, - "loss": 0.9362, - "num_input_tokens_seen": 9709125, - "step": 453 - }, - { - "epoch": 0.054590272350147295, - "grad_norm": 2.738593803944557, - "learning_rate": 3.993690206394073e-06, - "loss": 0.8783, - "num_input_tokens_seen": 9725145, - "step": 454 - }, - { - "epoch": 0.054710515240786385, - "grad_norm": 2.2131290663029, - "learning_rate": 3.993628227001307e-06, - "loss": 0.8844, - "num_input_tokens_seen": 9743065, - "step": 455 - }, - { - "epoch": 0.05483075813142548, - "grad_norm": 3.708675805457093, - "learning_rate": 3.993565945176726e-06, - "loss": 0.7133, - "num_input_tokens_seen": 9763810, - "step": 456 - }, - { - "epoch": 0.05495100102206457, - "grad_norm": 2.5102640724122764, - "learning_rate": 3.993503360929776e-06, - "loss": 0.8478, - "num_input_tokens_seen": 9782415, - "step": 457 - }, - { - "epoch": 0.05507124391270366, - "grad_norm": 3.2399637815336764, - "learning_rate": 3.99344047426995e-06, - "loss": 0.8145, - "num_input_tokens_seen": 9803395, - "step": 458 - }, - { - "epoch": 0.05519148680334275, - "grad_norm": 2.2112175900612194, - "learning_rate": 3.993377285206789e-06, - "loss": 0.9372, - "num_input_tokens_seen": 9822900, - "step": 459 - }, - { - "epoch": 0.05531172969398184, - "grad_norm": 1.6419042211246246, - "learning_rate": 3.99331379374988e-06, - "loss": 0.8669, - "num_input_tokens_seen": 9846225, - "step": 460 - }, - { - "epoch": 0.05543197258462093, - "grad_norm": 2.262798783247126, - "learning_rate": 3.993249999908852e-06, - "loss": 0.8075, - "num_input_tokens_seen": 9866095, - "step": 461 - }, - { - "epoch": 0.05555221547526003, - "grad_norm": 1.9975957376622713, - "learning_rate": 3.993185903693384e-06, - "loss": 0.8714, - "num_input_tokens_seen": 9882615, - "step": 462 - }, - { - "epoch": 0.05567245836589912, - "grad_norm": 2.1838738078948317, - "learning_rate": 3.9931215051131995e-06, - "loss": 0.8334, - "num_input_tokens_seen": 9902980, - "step": 463 - }, - { - "epoch": 0.05579270125653821, - "grad_norm": 1.9815331584439932, - "learning_rate": 3.993056804178068e-06, - "loss": 0.8071, - "num_input_tokens_seen": 9924245, - "step": 464 - }, - { - "epoch": 0.0559129441471773, - "grad_norm": 2.0167152865221043, - "learning_rate": 3.992991800897803e-06, - "loss": 0.8468, - "num_input_tokens_seen": 9943770, - "step": 465 - }, - { - "epoch": 0.05603318703781639, - "grad_norm": 2.577183816569157, - "learning_rate": 3.9929264952822665e-06, - "loss": 0.8954, - "num_input_tokens_seen": 9961025, - "step": 466 - }, - { - "epoch": 0.05615342992845548, - "grad_norm": 2.3021964040444125, - "learning_rate": 3.992860887341366e-06, - "loss": 0.8805, - "num_input_tokens_seen": 9978915, - "step": 467 - }, - { - "epoch": 0.056273672819094574, - "grad_norm": 1.9914289743116045, - "learning_rate": 3.992794977085052e-06, - "loss": 0.8205, - "num_input_tokens_seen": 9996635, - "step": 468 - }, - { - "epoch": 0.056393915709733664, - "grad_norm": 2.3997097017497557, - "learning_rate": 3.992728764523326e-06, - "loss": 0.8459, - "num_input_tokens_seen": 10015300, - "step": 469 - }, - { - "epoch": 0.05651415860037275, - "grad_norm": 1.8025508944863202, - "learning_rate": 3.99266224966623e-06, - "loss": 0.8046, - "num_input_tokens_seen": 10035935, - "step": 470 - }, - { - "epoch": 0.05663440149101184, - "grad_norm": 1.8615208777759509, - "learning_rate": 3.992595432523855e-06, - "loss": 0.8732, - "num_input_tokens_seen": 10052945, - "step": 471 - }, - { - "epoch": 0.05675464438165093, - "grad_norm": 2.290649144336832, - "learning_rate": 3.992528313106338e-06, - "loss": 0.8604, - "num_input_tokens_seen": 10070865, - "step": 472 - }, - { - "epoch": 0.05687488727229002, - "grad_norm": 2.2578693713553504, - "learning_rate": 3.9924608914238595e-06, - "loss": 0.8155, - "num_input_tokens_seen": 10085580, - "step": 473 - }, - { - "epoch": 0.05699513016292912, - "grad_norm": 5.416889397401193, - "learning_rate": 3.992393167486648e-06, - "loss": 0.8417, - "num_input_tokens_seen": 10104450, - "step": 474 - }, - { - "epoch": 0.05711537305356821, - "grad_norm": 2.1768818390965174, - "learning_rate": 3.992325141304977e-06, - "loss": 0.8043, - "num_input_tokens_seen": 10122125, - "step": 475 - }, - { - "epoch": 0.0572356159442073, - "grad_norm": 3.5235808281468404, - "learning_rate": 3.992256812889166e-06, - "loss": 0.8623, - "num_input_tokens_seen": 10137950, - "step": 476 - }, - { - "epoch": 0.05735585883484639, - "grad_norm": 4.680692353486723, - "learning_rate": 3.992188182249582e-06, - "loss": 0.7706, - "num_input_tokens_seen": 10159565, - "step": 477 - }, - { - "epoch": 0.05747610172548548, - "grad_norm": 2.0088306482333, - "learning_rate": 3.992119249396633e-06, - "loss": 0.9114, - "num_input_tokens_seen": 10177970, - "step": 478 - }, - { - "epoch": 0.05759634461612457, - "grad_norm": 1.891449319167649, - "learning_rate": 3.992050014340778e-06, - "loss": 0.821, - "num_input_tokens_seen": 10198045, - "step": 479 - }, - { - "epoch": 0.057716587506763666, - "grad_norm": 1.8591412030440282, - "learning_rate": 3.99198047709252e-06, - "loss": 0.5827, - "num_input_tokens_seen": 10259285, - "step": 480 - }, - { - "epoch": 0.057836830397402755, - "grad_norm": 1.8712158366408569, - "learning_rate": 3.991910637662408e-06, - "loss": 0.7941, - "num_input_tokens_seen": 10279295, - "step": 481 - }, - { - "epoch": 0.057957073288041845, - "grad_norm": 1.867321617714949, - "learning_rate": 3.9918404960610355e-06, - "loss": 0.8111, - "num_input_tokens_seen": 10298045, - "step": 482 - }, - { - "epoch": 0.058077316178680935, - "grad_norm": 2.5490296345981354, - "learning_rate": 3.991770052299043e-06, - "loss": 0.7764, - "num_input_tokens_seen": 10315995, - "step": 483 - }, - { - "epoch": 0.058197559069320025, - "grad_norm": 2.654946995194073, - "learning_rate": 3.991699306387118e-06, - "loss": 0.8825, - "num_input_tokens_seen": 10334185, - "step": 484 - }, - { - "epoch": 0.058317801959959115, - "grad_norm": 1.8533565075365972, - "learning_rate": 3.991628258335991e-06, - "loss": 0.7883, - "num_input_tokens_seen": 10356110, - "step": 485 - }, - { - "epoch": 0.05843804485059821, - "grad_norm": 3.7283403379959834, - "learning_rate": 3.991556908156442e-06, - "loss": 0.879, - "num_input_tokens_seen": 10372355, - "step": 486 - }, - { - "epoch": 0.0585582877412373, - "grad_norm": 1.8900953341548485, - "learning_rate": 3.9914852558592914e-06, - "loss": 0.8653, - "num_input_tokens_seen": 10393125, - "step": 487 - }, - { - "epoch": 0.05867853063187639, - "grad_norm": 7.081801536948411, - "learning_rate": 3.991413301455413e-06, - "loss": 0.8098, - "num_input_tokens_seen": 10409295, - "step": 488 - }, - { - "epoch": 0.05879877352251548, - "grad_norm": 2.261731731221124, - "learning_rate": 3.991341044955719e-06, - "loss": 0.7744, - "num_input_tokens_seen": 10428770, - "step": 489 - }, - { - "epoch": 0.05891901641315457, - "grad_norm": 2.1536266199498333, - "learning_rate": 3.991268486371172e-06, - "loss": 0.8194, - "num_input_tokens_seen": 10447045, - "step": 490 - }, - { - "epoch": 0.05903925930379366, - "grad_norm": 2.4097094101941345, - "learning_rate": 3.991195625712779e-06, - "loss": 0.8746, - "num_input_tokens_seen": 10463730, - "step": 491 - }, - { - "epoch": 0.05915950219443276, - "grad_norm": 2.0052990238002812, - "learning_rate": 3.991122462991592e-06, - "loss": 0.8178, - "num_input_tokens_seen": 10482970, - "step": 492 - }, - { - "epoch": 0.05927974508507185, - "grad_norm": 3.124127433901469, - "learning_rate": 3.991048998218712e-06, - "loss": 0.8192, - "num_input_tokens_seen": 10495995, - "step": 493 - }, - { - "epoch": 0.05939998797571094, - "grad_norm": 2.8505665571866277, - "learning_rate": 3.990975231405281e-06, - "loss": 0.7643, - "num_input_tokens_seen": 10514165, - "step": 494 - }, - { - "epoch": 0.05952023086635003, - "grad_norm": 2.189328628985515, - "learning_rate": 3.990901162562491e-06, - "loss": 0.7884, - "num_input_tokens_seen": 10534575, - "step": 495 - }, - { - "epoch": 0.05964047375698912, - "grad_norm": 1.9509847279846109, - "learning_rate": 3.9908267917015765e-06, - "loss": 0.9065, - "num_input_tokens_seen": 10552355, - "step": 496 - }, - { - "epoch": 0.059760716647628206, - "grad_norm": 1.9308492860027955, - "learning_rate": 3.990752118833821e-06, - "loss": 0.9164, - "num_input_tokens_seen": 10569515, - "step": 497 - }, - { - "epoch": 0.0598809595382673, - "grad_norm": 1.8686151732403626, - "learning_rate": 3.990677143970553e-06, - "loss": 0.774, - "num_input_tokens_seen": 10590045, - "step": 498 - }, - { - "epoch": 0.06000120242890639, - "grad_norm": 3.2748339879636847, - "learning_rate": 3.990601867123144e-06, - "loss": 0.8052, - "num_input_tokens_seen": 10609490, - "step": 499 - }, - { - "epoch": 0.06012144531954548, - "grad_norm": 2.4337536752088513, - "learning_rate": 3.990526288303014e-06, - "loss": 0.8524, - "num_input_tokens_seen": 10628000, - "step": 500 - }, - { - "epoch": 0.06024168821018457, - "grad_norm": 1.739362748024375, - "learning_rate": 3.9904504075216295e-06, - "loss": 0.9034, - "num_input_tokens_seen": 10648480, - "step": 501 - }, - { - "epoch": 0.06036193110082366, - "grad_norm": 2.598974519715081, - "learning_rate": 3.990374224790501e-06, - "loss": 0.9345, - "num_input_tokens_seen": 10666405, - "step": 502 - }, - { - "epoch": 0.06048217399146275, - "grad_norm": 1.9783589926431029, - "learning_rate": 3.990297740121185e-06, - "loss": 0.7078, - "num_input_tokens_seen": 10684060, - "step": 503 - }, - { - "epoch": 0.06060241688210185, - "grad_norm": 1.8694123166580234, - "learning_rate": 3.990220953525284e-06, - "loss": 0.7721, - "num_input_tokens_seen": 10700890, - "step": 504 - }, - { - "epoch": 0.06072265977274094, - "grad_norm": 4.149100831355966, - "learning_rate": 3.9901438650144465e-06, - "loss": 0.7522, - "num_input_tokens_seen": 10716860, - "step": 505 - }, - { - "epoch": 0.06084290266338003, - "grad_norm": 2.9211339624220134, - "learning_rate": 3.990066474600367e-06, - "loss": 0.9181, - "num_input_tokens_seen": 10734550, - "step": 506 - }, - { - "epoch": 0.06096314555401912, - "grad_norm": 2.3361459304190144, - "learning_rate": 3.989988782294786e-06, - "loss": 0.6782, - "num_input_tokens_seen": 10754360, - "step": 507 - }, - { - "epoch": 0.06108338844465821, - "grad_norm": 1.9149143850380357, - "learning_rate": 3.989910788109489e-06, - "loss": 0.9458, - "num_input_tokens_seen": 10770730, - "step": 508 - }, - { - "epoch": 0.0612036313352973, - "grad_norm": 2.0719309128283157, - "learning_rate": 3.989832492056307e-06, - "loss": 0.7555, - "num_input_tokens_seen": 10791475, - "step": 509 - }, - { - "epoch": 0.06132387422593639, - "grad_norm": 3.213781318485671, - "learning_rate": 3.989753894147119e-06, - "loss": 0.808, - "num_input_tokens_seen": 10811320, - "step": 510 - }, - { - "epoch": 0.061444117116575485, - "grad_norm": 1.8575915014479352, - "learning_rate": 3.989674994393846e-06, - "loss": 0.7979, - "num_input_tokens_seen": 10830515, - "step": 511 - }, - { - "epoch": 0.061564360007214575, - "grad_norm": 2.2259339012668633, - "learning_rate": 3.98959579280846e-06, - "loss": 0.944, - "num_input_tokens_seen": 10848635, - "step": 512 - }, - { - "epoch": 0.061684602897853665, - "grad_norm": 2.9625590471088588, - "learning_rate": 3.989516289402973e-06, - "loss": 0.8301, - "num_input_tokens_seen": 10863985, - "step": 513 - }, - { - "epoch": 0.061804845788492754, - "grad_norm": 2.6729341212316524, - "learning_rate": 3.989436484189447e-06, - "loss": 0.8184, - "num_input_tokens_seen": 10881650, - "step": 514 - }, - { - "epoch": 0.061925088679131844, - "grad_norm": 2.7756938883123508, - "learning_rate": 3.9893563771799885e-06, - "loss": 0.8167, - "num_input_tokens_seen": 10897845, - "step": 515 - }, - { - "epoch": 0.062045331569770934, - "grad_norm": 2.2899433764014985, - "learning_rate": 3.989275968386749e-06, - "loss": 0.8699, - "num_input_tokens_seen": 10915475, - "step": 516 - }, - { - "epoch": 0.06216557446041003, - "grad_norm": 2.0005072297173645, - "learning_rate": 3.989195257821926e-06, - "loss": 0.7709, - "num_input_tokens_seen": 10933680, - "step": 517 - }, - { - "epoch": 0.06228581735104912, - "grad_norm": 2.2103394415461524, - "learning_rate": 3.989114245497765e-06, - "loss": 0.8482, - "num_input_tokens_seen": 10953200, - "step": 518 - }, - { - "epoch": 0.06240606024168821, - "grad_norm": 2.4226692730025374, - "learning_rate": 3.989032931426554e-06, - "loss": 0.9517, - "num_input_tokens_seen": 10970075, - "step": 519 - }, - { - "epoch": 0.06252630313232731, - "grad_norm": 1.9818754387454325, - "learning_rate": 3.9889513156206295e-06, - "loss": 0.8715, - "num_input_tokens_seen": 10989235, - "step": 520 - }, - { - "epoch": 0.06264654602296639, - "grad_norm": 3.5067303838808677, - "learning_rate": 3.988869398092371e-06, - "loss": 0.7246, - "num_input_tokens_seen": 11008865, - "step": 521 - }, - { - "epoch": 0.06276678891360549, - "grad_norm": 2.20054691021171, - "learning_rate": 3.988787178854206e-06, - "loss": 0.7829, - "num_input_tokens_seen": 11028120, - "step": 522 - }, - { - "epoch": 0.06288703180424457, - "grad_norm": 2.125064072691492, - "learning_rate": 3.988704657918608e-06, - "loss": 0.8785, - "num_input_tokens_seen": 11046900, - "step": 523 - }, - { - "epoch": 0.06300727469488367, - "grad_norm": 2.937737490325927, - "learning_rate": 3.988621835298094e-06, - "loss": 0.8109, - "num_input_tokens_seen": 11063835, - "step": 524 - }, - { - "epoch": 0.06312751758552275, - "grad_norm": 1.8389244440830004, - "learning_rate": 3.988538711005229e-06, - "loss": 0.9203, - "num_input_tokens_seen": 11083010, - "step": 525 - }, - { - "epoch": 0.06324776047616185, - "grad_norm": 2.4991849503375385, - "learning_rate": 3.988455285052622e-06, - "loss": 0.8956, - "num_input_tokens_seen": 11098910, - "step": 526 - }, - { - "epoch": 0.06336800336680094, - "grad_norm": 2.089901340841375, - "learning_rate": 3.98837155745293e-06, - "loss": 0.8299, - "num_input_tokens_seen": 11116670, - "step": 527 - }, - { - "epoch": 0.06348824625744003, - "grad_norm": 2.024543591355315, - "learning_rate": 3.988287528218854e-06, - "loss": 0.7704, - "num_input_tokens_seen": 11135175, - "step": 528 - }, - { - "epoch": 0.06360848914807912, - "grad_norm": 19.767599156326686, - "learning_rate": 3.98820319736314e-06, - "loss": 0.8956, - "num_input_tokens_seen": 11151510, - "step": 529 - }, - { - "epoch": 0.0637287320387182, - "grad_norm": 2.1993387451356927, - "learning_rate": 3.988118564898582e-06, - "loss": 0.8552, - "num_input_tokens_seen": 11170770, - "step": 530 - }, - { - "epoch": 0.0638489749293573, - "grad_norm": 2.637295695710938, - "learning_rate": 3.988033630838019e-06, - "loss": 0.893, - "num_input_tokens_seen": 11184530, - "step": 531 - }, - { - "epoch": 0.0639692178199964, - "grad_norm": 2.4154880255597475, - "learning_rate": 3.987948395194334e-06, - "loss": 0.8716, - "num_input_tokens_seen": 11206630, - "step": 532 - }, - { - "epoch": 0.06408946071063548, - "grad_norm": 3.1376336621433674, - "learning_rate": 3.987862857980458e-06, - "loss": 0.7761, - "num_input_tokens_seen": 11222295, - "step": 533 - }, - { - "epoch": 0.06420970360127458, - "grad_norm": 1.962397799320079, - "learning_rate": 3.987777019209368e-06, - "loss": 0.7658, - "num_input_tokens_seen": 11242530, - "step": 534 - }, - { - "epoch": 0.06432994649191366, - "grad_norm": 1.7555089945683016, - "learning_rate": 3.987690878894084e-06, - "loss": 0.8174, - "num_input_tokens_seen": 11261965, - "step": 535 - }, - { - "epoch": 0.06445018938255276, - "grad_norm": 2.3033790486068138, - "learning_rate": 3.987604437047673e-06, - "loss": 0.8471, - "num_input_tokens_seen": 11281485, - "step": 536 - }, - { - "epoch": 0.06457043227319184, - "grad_norm": 2.152247193422331, - "learning_rate": 3.987517693683251e-06, - "loss": 0.7808, - "num_input_tokens_seen": 11299780, - "step": 537 - }, - { - "epoch": 0.06469067516383094, - "grad_norm": 2.3594580487430146, - "learning_rate": 3.9874306488139745e-06, - "loss": 0.9531, - "num_input_tokens_seen": 11314760, - "step": 538 - }, - { - "epoch": 0.06481091805447003, - "grad_norm": 1.8652951661676307, - "learning_rate": 3.987343302453049e-06, - "loss": 0.8687, - "num_input_tokens_seen": 11335755, - "step": 539 - }, - { - "epoch": 0.06493116094510912, - "grad_norm": 1.974887311788785, - "learning_rate": 3.987255654613724e-06, - "loss": 0.8265, - "num_input_tokens_seen": 11359240, - "step": 540 - }, - { - "epoch": 0.06505140383574821, - "grad_norm": 2.578322020091757, - "learning_rate": 3.987167705309296e-06, - "loss": 0.7032, - "num_input_tokens_seen": 11378235, - "step": 541 - }, - { - "epoch": 0.0651716467263873, - "grad_norm": 2.171382508485462, - "learning_rate": 3.987079454553108e-06, - "loss": 0.9487, - "num_input_tokens_seen": 11395905, - "step": 542 - }, - { - "epoch": 0.0652918896170264, - "grad_norm": 2.031591960511134, - "learning_rate": 3.986990902358546e-06, - "loss": 0.914, - "num_input_tokens_seen": 11412565, - "step": 543 - }, - { - "epoch": 0.06541213250766549, - "grad_norm": 2.364654999890037, - "learning_rate": 3.986902048739045e-06, - "loss": 0.9279, - "num_input_tokens_seen": 11432230, - "step": 544 - }, - { - "epoch": 0.06553237539830457, - "grad_norm": 2.711368073516525, - "learning_rate": 3.986812893708082e-06, - "loss": 0.8021, - "num_input_tokens_seen": 11448140, - "step": 545 - }, - { - "epoch": 0.06565261828894367, - "grad_norm": 2.357659962486148, - "learning_rate": 3.9867234372791826e-06, - "loss": 0.8191, - "num_input_tokens_seen": 11465815, - "step": 546 - }, - { - "epoch": 0.06577286117958275, - "grad_norm": 1.6216400663048482, - "learning_rate": 3.986633679465918e-06, - "loss": 0.8736, - "num_input_tokens_seen": 11485690, - "step": 547 - }, - { - "epoch": 0.06589310407022185, - "grad_norm": 2.2378904325170477, - "learning_rate": 3.986543620281904e-06, - "loss": 0.8114, - "num_input_tokens_seen": 11505060, - "step": 548 - }, - { - "epoch": 0.06601334696086093, - "grad_norm": 1.7709272087727046, - "learning_rate": 3.986453259740802e-06, - "loss": 0.9079, - "num_input_tokens_seen": 11522950, - "step": 549 - }, - { - "epoch": 0.06613358985150003, - "grad_norm": 3.0325094513822615, - "learning_rate": 3.986362597856319e-06, - "loss": 0.7896, - "num_input_tokens_seen": 11539170, - "step": 550 - }, - { - "epoch": 0.06625383274213913, - "grad_norm": 2.629614271337295, - "learning_rate": 3.986271634642211e-06, - "loss": 0.8135, - "num_input_tokens_seen": 11555870, - "step": 551 - }, - { - "epoch": 0.06637407563277821, - "grad_norm": 2.4093144249983816, - "learning_rate": 3.986180370112274e-06, - "loss": 0.8199, - "num_input_tokens_seen": 11572110, - "step": 552 - }, - { - "epoch": 0.0664943185234173, - "grad_norm": 3.207173330731257, - "learning_rate": 3.986088804280354e-06, - "loss": 0.7452, - "num_input_tokens_seen": 11591560, - "step": 553 - }, - { - "epoch": 0.06661456141405639, - "grad_norm": 2.4998134604066484, - "learning_rate": 3.985996937160342e-06, - "loss": 0.935, - "num_input_tokens_seen": 11610470, - "step": 554 - }, - { - "epoch": 0.06673480430469549, - "grad_norm": 2.3329724698044307, - "learning_rate": 3.985904768766173e-06, - "loss": 0.6992, - "num_input_tokens_seen": 11632965, - "step": 555 - }, - { - "epoch": 0.06685504719533458, - "grad_norm": 3.3615753032798015, - "learning_rate": 3.98581229911183e-06, - "loss": 0.7624, - "num_input_tokens_seen": 11651605, - "step": 556 - }, - { - "epoch": 0.06697529008597367, - "grad_norm": 1.6775108627143491, - "learning_rate": 3.985719528211341e-06, - "loss": 0.9134, - "num_input_tokens_seen": 11670695, - "step": 557 - }, - { - "epoch": 0.06709553297661276, - "grad_norm": 0.9879548240861006, - "learning_rate": 3.985626456078777e-06, - "loss": 0.6735, - "num_input_tokens_seen": 11735070, - "step": 558 - }, - { - "epoch": 0.06721577586725185, - "grad_norm": 2.5028444587692573, - "learning_rate": 3.985533082728259e-06, - "loss": 0.8653, - "num_input_tokens_seen": 11750445, - "step": 559 - }, - { - "epoch": 0.06733601875789094, - "grad_norm": 3.0584516804082695, - "learning_rate": 3.985439408173951e-06, - "loss": 0.7492, - "num_input_tokens_seen": 11770390, - "step": 560 - }, - { - "epoch": 0.06745626164853002, - "grad_norm": 3.0339846946346958, - "learning_rate": 3.9853454324300634e-06, - "loss": 0.7143, - "num_input_tokens_seen": 11789320, - "step": 561 - }, - { - "epoch": 0.06757650453916912, - "grad_norm": 2.411848715752077, - "learning_rate": 3.985251155510852e-06, - "loss": 0.7864, - "num_input_tokens_seen": 11808070, - "step": 562 - }, - { - "epoch": 0.06769674742980822, - "grad_norm": 1.809311906223256, - "learning_rate": 3.98515657743062e-06, - "loss": 0.8087, - "num_input_tokens_seen": 11827255, - "step": 563 - }, - { - "epoch": 0.0678169903204473, - "grad_norm": 2.3796174930654455, - "learning_rate": 3.985061698203711e-06, - "loss": 0.7747, - "num_input_tokens_seen": 11844090, - "step": 564 - }, - { - "epoch": 0.0679372332110864, - "grad_norm": 0.9471033307838439, - "learning_rate": 3.984966517844523e-06, - "loss": 0.6705, - "num_input_tokens_seen": 11899055, - "step": 565 - }, - { - "epoch": 0.06805747610172548, - "grad_norm": 2.394016397515654, - "learning_rate": 3.984871036367492e-06, - "loss": 0.813, - "num_input_tokens_seen": 11918800, - "step": 566 - }, - { - "epoch": 0.06817771899236458, - "grad_norm": 2.7025524872217273, - "learning_rate": 3.984775253787102e-06, - "loss": 0.8334, - "num_input_tokens_seen": 11936810, - "step": 567 - }, - { - "epoch": 0.06829796188300366, - "grad_norm": 3.181706360496871, - "learning_rate": 3.984679170117885e-06, - "loss": 0.882, - "num_input_tokens_seen": 11952735, - "step": 568 - }, - { - "epoch": 0.06841820477364276, - "grad_norm": 4.039934765051067, - "learning_rate": 3.984582785374415e-06, - "loss": 0.788, - "num_input_tokens_seen": 11969895, - "step": 569 - }, - { - "epoch": 0.06853844766428185, - "grad_norm": 2.0723409886287025, - "learning_rate": 3.9844860995713155e-06, - "loss": 0.8134, - "num_input_tokens_seen": 11989155, - "step": 570 - }, - { - "epoch": 0.06865869055492094, - "grad_norm": 2.498957001948544, - "learning_rate": 3.9843891127232524e-06, - "loss": 0.8195, - "num_input_tokens_seen": 12006410, - "step": 571 - }, - { - "epoch": 0.06877893344556003, - "grad_norm": 2.6896757999777052, - "learning_rate": 3.984291824844938e-06, - "loss": 0.6745, - "num_input_tokens_seen": 12021225, - "step": 572 - }, - { - "epoch": 0.06889917633619912, - "grad_norm": 2.484003095385279, - "learning_rate": 3.984194235951132e-06, - "loss": 0.8459, - "num_input_tokens_seen": 12037090, - "step": 573 - }, - { - "epoch": 0.06901941922683821, - "grad_norm": 2.9793247518218493, - "learning_rate": 3.9840963460566375e-06, - "loss": 0.8451, - "num_input_tokens_seen": 12055590, - "step": 574 - }, - { - "epoch": 0.06913966211747731, - "grad_norm": 1.5502389198021764, - "learning_rate": 3.983998155176305e-06, - "loss": 0.8925, - "num_input_tokens_seen": 12075670, - "step": 575 - }, - { - "epoch": 0.06925990500811639, - "grad_norm": 0.9987332730721558, - "learning_rate": 3.9838996633250305e-06, - "loss": 0.5921, - "num_input_tokens_seen": 12135905, - "step": 576 - }, - { - "epoch": 0.06938014789875549, - "grad_norm": 2.426078566379721, - "learning_rate": 3.983800870517753e-06, - "loss": 0.8815, - "num_input_tokens_seen": 12152415, - "step": 577 - }, - { - "epoch": 0.06950039078939457, - "grad_norm": 3.500097985887705, - "learning_rate": 3.983701776769463e-06, - "loss": 0.7932, - "num_input_tokens_seen": 12169545, - "step": 578 - }, - { - "epoch": 0.06962063368003367, - "grad_norm": 2.1469261042102312, - "learning_rate": 3.9836023820951885e-06, - "loss": 0.8543, - "num_input_tokens_seen": 12188480, - "step": 579 - }, - { - "epoch": 0.06974087657067275, - "grad_norm": 2.1056036573009393, - "learning_rate": 3.983502686510011e-06, - "loss": 0.6852, - "num_input_tokens_seen": 12209030, - "step": 580 - }, - { - "epoch": 0.06986111946131185, - "grad_norm": 1.9860317082522783, - "learning_rate": 3.9834026900290525e-06, - "loss": 0.7263, - "num_input_tokens_seen": 12228145, - "step": 581 - }, - { - "epoch": 0.06998136235195095, - "grad_norm": 1.9386870743027589, - "learning_rate": 3.983302392667483e-06, - "loss": 0.9968, - "num_input_tokens_seen": 12248710, - "step": 582 - }, - { - "epoch": 0.07010160524259003, - "grad_norm": 1.7863563520140477, - "learning_rate": 3.983201794440517e-06, - "loss": 0.936, - "num_input_tokens_seen": 12268005, - "step": 583 - }, - { - "epoch": 0.07022184813322913, - "grad_norm": 1.7565383272743134, - "learning_rate": 3.9831008953634165e-06, - "loss": 0.6831, - "num_input_tokens_seen": 12287015, - "step": 584 - }, - { - "epoch": 0.07034209102386821, - "grad_norm": 4.215097231793874, - "learning_rate": 3.9829996954514864e-06, - "loss": 0.8131, - "num_input_tokens_seen": 12305875, - "step": 585 - }, - { - "epoch": 0.0704623339145073, - "grad_norm": 1.9596951197989274, - "learning_rate": 3.982898194720079e-06, - "loss": 0.8412, - "num_input_tokens_seen": 12326325, - "step": 586 - }, - { - "epoch": 0.0705825768051464, - "grad_norm": 2.8021330753838245, - "learning_rate": 3.982796393184592e-06, - "loss": 0.8184, - "num_input_tokens_seen": 12345125, - "step": 587 - }, - { - "epoch": 0.07070281969578548, - "grad_norm": 0.8474575318761562, - "learning_rate": 3.98269429086047e-06, - "loss": 0.6509, - "num_input_tokens_seen": 12402685, - "step": 588 - }, - { - "epoch": 0.07082306258642458, - "grad_norm": 2.8861483170817683, - "learning_rate": 3.982591887763199e-06, - "loss": 0.8649, - "num_input_tokens_seen": 12419865, - "step": 589 - }, - { - "epoch": 0.07094330547706366, - "grad_norm": 2.453987272615334, - "learning_rate": 3.982489183908316e-06, - "loss": 0.8157, - "num_input_tokens_seen": 12436005, - "step": 590 - }, - { - "epoch": 0.07106354836770276, - "grad_norm": 1.7210524960477205, - "learning_rate": 3.982386179311399e-06, - "loss": 0.84, - "num_input_tokens_seen": 12456245, - "step": 591 - }, - { - "epoch": 0.07118379125834184, - "grad_norm": 5.366920478723499, - "learning_rate": 3.982282873988075e-06, - "loss": 0.8739, - "num_input_tokens_seen": 12473840, - "step": 592 - }, - { - "epoch": 0.07130403414898094, - "grad_norm": 1.6965908445137974, - "learning_rate": 3.982179267954016e-06, - "loss": 0.8689, - "num_input_tokens_seen": 12493990, - "step": 593 - }, - { - "epoch": 0.07142427703962004, - "grad_norm": 2.346572727571377, - "learning_rate": 3.982075361224937e-06, - "loss": 0.9714, - "num_input_tokens_seen": 12512075, - "step": 594 - }, - { - "epoch": 0.07154451993025912, - "grad_norm": 2.5202770051089964, - "learning_rate": 3.981971153816602e-06, - "loss": 0.8871, - "num_input_tokens_seen": 12529400, - "step": 595 - }, - { - "epoch": 0.07166476282089822, - "grad_norm": 1.5841916320621197, - "learning_rate": 3.981866645744819e-06, - "loss": 0.9521, - "num_input_tokens_seen": 12549835, - "step": 596 - }, - { - "epoch": 0.0717850057115373, - "grad_norm": 2.3125913889850755, - "learning_rate": 3.9817618370254416e-06, - "loss": 0.8122, - "num_input_tokens_seen": 12566210, - "step": 597 - }, - { - "epoch": 0.0719052486021764, - "grad_norm": 2.9059936280908767, - "learning_rate": 3.9816567276743684e-06, - "loss": 0.8656, - "num_input_tokens_seen": 12585795, - "step": 598 - }, - { - "epoch": 0.0720254914928155, - "grad_norm": 2.3164204614290758, - "learning_rate": 3.9815513177075466e-06, - "loss": 0.7691, - "num_input_tokens_seen": 12604300, - "step": 599 - }, - { - "epoch": 0.07214573438345458, - "grad_norm": 1.6556249744709417, - "learning_rate": 3.9814456071409646e-06, - "loss": 0.7075, - "num_input_tokens_seen": 12624555, - "step": 600 - }, - { - "epoch": 0.07226597727409367, - "grad_norm": 2.458557845784842, - "learning_rate": 3.981339595990659e-06, - "loss": 0.8634, - "num_input_tokens_seen": 12642805, - "step": 601 - }, - { - "epoch": 0.07238622016473276, - "grad_norm": 2.154435173306513, - "learning_rate": 3.981233284272713e-06, - "loss": 0.8113, - "num_input_tokens_seen": 12662270, - "step": 602 - }, - { - "epoch": 0.07250646305537185, - "grad_norm": 1.5308595601567334, - "learning_rate": 3.981126672003253e-06, - "loss": 0.8908, - "num_input_tokens_seen": 12684665, - "step": 603 - }, - { - "epoch": 0.07262670594601094, - "grad_norm": 3.135527009694162, - "learning_rate": 3.981019759198451e-06, - "loss": 0.7884, - "num_input_tokens_seen": 12703335, - "step": 604 - }, - { - "epoch": 0.07274694883665003, - "grad_norm": 2.079649669982467, - "learning_rate": 3.980912545874528e-06, - "loss": 0.8344, - "num_input_tokens_seen": 12723220, - "step": 605 - }, - { - "epoch": 0.07286719172728913, - "grad_norm": 2.2494378998527247, - "learning_rate": 3.980805032047746e-06, - "loss": 0.8566, - "num_input_tokens_seen": 12744410, - "step": 606 - }, - { - "epoch": 0.07298743461792821, - "grad_norm": 2.3893920902041406, - "learning_rate": 3.980697217734415e-06, - "loss": 0.8056, - "num_input_tokens_seen": 12761870, - "step": 607 - }, - { - "epoch": 0.07310767750856731, - "grad_norm": 1.9937039003108203, - "learning_rate": 3.980589102950891e-06, - "loss": 0.9136, - "num_input_tokens_seen": 12779755, - "step": 608 - }, - { - "epoch": 0.07322792039920639, - "grad_norm": 2.523353089931453, - "learning_rate": 3.9804806877135755e-06, - "loss": 0.7694, - "num_input_tokens_seen": 12797520, - "step": 609 - }, - { - "epoch": 0.07334816328984549, - "grad_norm": 2.2228327105234498, - "learning_rate": 3.980371972038915e-06, - "loss": 0.8655, - "num_input_tokens_seen": 12817730, - "step": 610 - }, - { - "epoch": 0.07346840618048459, - "grad_norm": 1.7637150102749324, - "learning_rate": 3.980262955943399e-06, - "loss": 0.8363, - "num_input_tokens_seen": 12837115, - "step": 611 - }, - { - "epoch": 0.07358864907112367, - "grad_norm": 2.711290502876977, - "learning_rate": 3.980153639443569e-06, - "loss": 0.8821, - "num_input_tokens_seen": 12852820, - "step": 612 - }, - { - "epoch": 0.07370889196176277, - "grad_norm": 2.702649968237358, - "learning_rate": 3.980044022556005e-06, - "loss": 0.8009, - "num_input_tokens_seen": 12872225, - "step": 613 - }, - { - "epoch": 0.07382913485240185, - "grad_norm": 2.2520685274667596, - "learning_rate": 3.9799341052973375e-06, - "loss": 0.7245, - "num_input_tokens_seen": 12891780, - "step": 614 - }, - { - "epoch": 0.07394937774304094, - "grad_norm": 2.4478281279471057, - "learning_rate": 3.979823887684241e-06, - "loss": 0.7559, - "num_input_tokens_seen": 12910440, - "step": 615 - }, - { - "epoch": 0.07406962063368003, - "grad_norm": 2.356110600878282, - "learning_rate": 3.979713369733434e-06, - "loss": 0.8559, - "num_input_tokens_seen": 12928025, - "step": 616 - }, - { - "epoch": 0.07418986352431912, - "grad_norm": 2.026122785561901, - "learning_rate": 3.979602551461683e-06, - "loss": 0.841, - "num_input_tokens_seen": 12948525, - "step": 617 - }, - { - "epoch": 0.07431010641495822, - "grad_norm": 2.2039280579521723, - "learning_rate": 3.979491432885799e-06, - "loss": 0.9235, - "num_input_tokens_seen": 12964510, - "step": 618 - }, - { - "epoch": 0.0744303493055973, - "grad_norm": 2.009084833115377, - "learning_rate": 3.97938001402264e-06, - "loss": 0.8279, - "num_input_tokens_seen": 12983355, - "step": 619 - }, - { - "epoch": 0.0745505921962364, - "grad_norm": 3.41525478576904, - "learning_rate": 3.979268294889105e-06, - "loss": 0.812, - "num_input_tokens_seen": 12998625, - "step": 620 - }, - { - "epoch": 0.07467083508687548, - "grad_norm": 4.618832816567739, - "learning_rate": 3.979156275502143e-06, - "loss": 0.7523, - "num_input_tokens_seen": 13022005, - "step": 621 - }, - { - "epoch": 0.07479107797751458, - "grad_norm": 2.2994009478839437, - "learning_rate": 3.979043955878749e-06, - "loss": 0.9241, - "num_input_tokens_seen": 13039570, - "step": 622 - }, - { - "epoch": 0.07491132086815366, - "grad_norm": 2.667507308198865, - "learning_rate": 3.978931336035959e-06, - "loss": 0.8308, - "num_input_tokens_seen": 13058100, - "step": 623 - }, - { - "epoch": 0.07503156375879276, - "grad_norm": 2.4720470950896956, - "learning_rate": 3.9788184159908595e-06, - "loss": 0.8211, - "num_input_tokens_seen": 13074950, - "step": 624 - }, - { - "epoch": 0.07515180664943186, - "grad_norm": 3.2995386523457624, - "learning_rate": 3.97870519576058e-06, - "loss": 0.8234, - "num_input_tokens_seen": 13091095, - "step": 625 - }, - { - "epoch": 0.07527204954007094, - "grad_norm": 2.431466590709614, - "learning_rate": 3.978591675362295e-06, - "loss": 0.8008, - "num_input_tokens_seen": 13109530, - "step": 626 - }, - { - "epoch": 0.07539229243071004, - "grad_norm": 2.186595191657186, - "learning_rate": 3.978477854813226e-06, - "loss": 0.8785, - "num_input_tokens_seen": 13128590, - "step": 627 - }, - { - "epoch": 0.07551253532134912, - "grad_norm": 1.8913845495403419, - "learning_rate": 3.97836373413064e-06, - "loss": 0.8281, - "num_input_tokens_seen": 13146365, - "step": 628 - }, - { - "epoch": 0.07563277821198822, - "grad_norm": 1.899667587905074, - "learning_rate": 3.978249313331848e-06, - "loss": 0.75, - "num_input_tokens_seen": 13164315, - "step": 629 - }, - { - "epoch": 0.07575302110262731, - "grad_norm": 5.47117097086709, - "learning_rate": 3.978134592434208e-06, - "loss": 0.6289, - "num_input_tokens_seen": 13181785, - "step": 630 - }, - { - "epoch": 0.0758732639932664, - "grad_norm": 1.1013894868521719, - "learning_rate": 3.978019571455123e-06, - "loss": 0.6476, - "num_input_tokens_seen": 13233450, - "step": 631 - }, - { - "epoch": 0.07599350688390549, - "grad_norm": 2.1038314624286376, - "learning_rate": 3.977904250412042e-06, - "loss": 0.848, - "num_input_tokens_seen": 13252125, - "step": 632 - }, - { - "epoch": 0.07611374977454458, - "grad_norm": 2.6170011284608776, - "learning_rate": 3.97778862932246e-06, - "loss": 0.8636, - "num_input_tokens_seen": 13269010, - "step": 633 - }, - { - "epoch": 0.07623399266518367, - "grad_norm": 2.001324452203999, - "learning_rate": 3.9776727082039144e-06, - "loss": 0.9345, - "num_input_tokens_seen": 13285700, - "step": 634 - }, - { - "epoch": 0.07635423555582276, - "grad_norm": 0.9320747082599746, - "learning_rate": 3.977556487073991e-06, - "loss": 0.5866, - "num_input_tokens_seen": 13339975, - "step": 635 - }, - { - "epoch": 0.07647447844646185, - "grad_norm": 1.9892059577342283, - "learning_rate": 3.97743996595032e-06, - "loss": 0.8049, - "num_input_tokens_seen": 13359735, - "step": 636 - }, - { - "epoch": 0.07659472133710095, - "grad_norm": 1.664456914361808, - "learning_rate": 3.9773231448505804e-06, - "loss": 0.8174, - "num_input_tokens_seen": 13381245, - "step": 637 - }, - { - "epoch": 0.07671496422774003, - "grad_norm": 1.9811590021932886, - "learning_rate": 3.977206023792491e-06, - "loss": 0.7667, - "num_input_tokens_seen": 13400855, - "step": 638 - }, - { - "epoch": 0.07683520711837913, - "grad_norm": 2.905793672851696, - "learning_rate": 3.97708860279382e-06, - "loss": 0.8166, - "num_input_tokens_seen": 13418685, - "step": 639 - }, - { - "epoch": 0.07695545000901821, - "grad_norm": 1.7325124029352574, - "learning_rate": 3.97697088187238e-06, - "loss": 0.7888, - "num_input_tokens_seen": 13438920, - "step": 640 - }, - { - "epoch": 0.07707569289965731, - "grad_norm": 2.00514909654152, - "learning_rate": 3.976852861046029e-06, - "loss": 0.9079, - "num_input_tokens_seen": 13455255, - "step": 641 - }, - { - "epoch": 0.0771959357902964, - "grad_norm": 1.545971169790335, - "learning_rate": 3.97673454033267e-06, - "loss": 0.7941, - "num_input_tokens_seen": 13477075, - "step": 642 - }, - { - "epoch": 0.07731617868093549, - "grad_norm": 2.134137000040373, - "learning_rate": 3.976615919750254e-06, - "loss": 0.8182, - "num_input_tokens_seen": 13494495, - "step": 643 - }, - { - "epoch": 0.07743642157157458, - "grad_norm": 1.9974233890778168, - "learning_rate": 3.976496999316775e-06, - "loss": 0.8633, - "num_input_tokens_seen": 13512970, - "step": 644 - }, - { - "epoch": 0.07755666446221367, - "grad_norm": 2.819701164318358, - "learning_rate": 3.976377779050271e-06, - "loss": 0.8406, - "num_input_tokens_seen": 13530820, - "step": 645 - }, - { - "epoch": 0.07767690735285276, - "grad_norm": 2.3351755050819896, - "learning_rate": 3.976258258968831e-06, - "loss": 0.8353, - "num_input_tokens_seen": 13549085, - "step": 646 - }, - { - "epoch": 0.07779715024349185, - "grad_norm": 2.858789616542731, - "learning_rate": 3.976138439090583e-06, - "loss": 0.7446, - "num_input_tokens_seen": 13566885, - "step": 647 - }, - { - "epoch": 0.07791739313413094, - "grad_norm": 2.376160404782646, - "learning_rate": 3.976018319433706e-06, - "loss": 0.8438, - "num_input_tokens_seen": 13584150, - "step": 648 - }, - { - "epoch": 0.07803763602477004, - "grad_norm": 5.744370796872292, - "learning_rate": 3.9758979000164205e-06, - "loss": 0.9185, - "num_input_tokens_seen": 13600690, - "step": 649 - }, - { - "epoch": 0.07815787891540912, - "grad_norm": 2.751184724187577, - "learning_rate": 3.975777180856995e-06, - "loss": 0.7142, - "num_input_tokens_seen": 13619530, - "step": 650 - }, - { - "epoch": 0.07827812180604822, - "grad_norm": 2.471317199751255, - "learning_rate": 3.975656161973742e-06, - "loss": 0.8673, - "num_input_tokens_seen": 13638335, - "step": 651 - }, - { - "epoch": 0.0783983646966873, - "grad_norm": 2.5016421566209517, - "learning_rate": 3.9755348433850194e-06, - "loss": 0.8896, - "num_input_tokens_seen": 13653395, - "step": 652 - }, - { - "epoch": 0.0785186075873264, - "grad_norm": 1.1513181630416465, - "learning_rate": 3.975413225109232e-06, - "loss": 0.7172, - "num_input_tokens_seen": 13713665, - "step": 653 - }, - { - "epoch": 0.0786388504779655, - "grad_norm": 3.6696349499046605, - "learning_rate": 3.975291307164829e-06, - "loss": 0.9334, - "num_input_tokens_seen": 13732030, - "step": 654 - }, - { - "epoch": 0.07875909336860458, - "grad_norm": 2.140405776309736, - "learning_rate": 3.975169089570306e-06, - "loss": 0.8541, - "num_input_tokens_seen": 13750125, - "step": 655 - }, - { - "epoch": 0.07887933625924368, - "grad_norm": 3.300815460154189, - "learning_rate": 3.975046572344202e-06, - "loss": 0.9103, - "num_input_tokens_seen": 13766305, - "step": 656 - }, - { - "epoch": 0.07899957914988276, - "grad_norm": 2.1186246298588185, - "learning_rate": 3.974923755505103e-06, - "loss": 0.7223, - "num_input_tokens_seen": 13785255, - "step": 657 - }, - { - "epoch": 0.07911982204052186, - "grad_norm": 1.6538269995736954, - "learning_rate": 3.974800639071641e-06, - "loss": 0.9001, - "num_input_tokens_seen": 13805695, - "step": 658 - }, - { - "epoch": 0.07924006493116094, - "grad_norm": 2.356686144343739, - "learning_rate": 3.974677223062492e-06, - "loss": 1.0068, - "num_input_tokens_seen": 13822630, - "step": 659 - }, - { - "epoch": 0.07936030782180004, - "grad_norm": 8.20502877875863, - "learning_rate": 3.974553507496378e-06, - "loss": 0.7419, - "num_input_tokens_seen": 13840925, - "step": 660 - }, - { - "epoch": 0.07948055071243913, - "grad_norm": 2.133690918767308, - "learning_rate": 3.974429492392068e-06, - "loss": 0.8789, - "num_input_tokens_seen": 13860670, - "step": 661 - }, - { - "epoch": 0.07960079360307822, - "grad_norm": 2.516218640713848, - "learning_rate": 3.974305177768373e-06, - "loss": 0.9021, - "num_input_tokens_seen": 13878600, - "step": 662 - }, - { - "epoch": 0.07972103649371731, - "grad_norm": 2.4852367365637718, - "learning_rate": 3.974180563644152e-06, - "loss": 0.8472, - "num_input_tokens_seen": 13896885, - "step": 663 - }, - { - "epoch": 0.0798412793843564, - "grad_norm": 2.3134242343203257, - "learning_rate": 3.97405565003831e-06, - "loss": 0.8874, - "num_input_tokens_seen": 13912690, - "step": 664 - }, - { - "epoch": 0.07996152227499549, - "grad_norm": 2.5881119570541826, - "learning_rate": 3.973930436969794e-06, - "loss": 0.7938, - "num_input_tokens_seen": 13930865, - "step": 665 - }, - { - "epoch": 0.08008176516563459, - "grad_norm": 2.02909855328078, - "learning_rate": 3.973804924457602e-06, - "loss": 0.8578, - "num_input_tokens_seen": 13948665, - "step": 666 - }, - { - "epoch": 0.08020200805627367, - "grad_norm": 1.8463505585984528, - "learning_rate": 3.973679112520771e-06, - "loss": 0.8503, - "num_input_tokens_seen": 13970100, - "step": 667 - }, - { - "epoch": 0.08032225094691277, - "grad_norm": 1.9982400017381965, - "learning_rate": 3.973553001178389e-06, - "loss": 0.987, - "num_input_tokens_seen": 13987325, - "step": 668 - }, - { - "epoch": 0.08044249383755185, - "grad_norm": 2.1371043066152686, - "learning_rate": 3.973426590449585e-06, - "loss": 0.7554, - "num_input_tokens_seen": 14005000, - "step": 669 - }, - { - "epoch": 0.08056273672819095, - "grad_norm": 2.1604572631445897, - "learning_rate": 3.9732998803535364e-06, - "loss": 0.7592, - "num_input_tokens_seen": 14022780, - "step": 670 - }, - { - "epoch": 0.08068297961883003, - "grad_norm": 2.361880208337802, - "learning_rate": 3.973172870909465e-06, - "loss": 0.8566, - "num_input_tokens_seen": 14037265, - "step": 671 - }, - { - "epoch": 0.08080322250946913, - "grad_norm": 2.4549367924767385, - "learning_rate": 3.973045562136638e-06, - "loss": 0.8122, - "num_input_tokens_seen": 14053800, - "step": 672 - }, - { - "epoch": 0.08092346540010822, - "grad_norm": 2.2493861764505767, - "learning_rate": 3.972917954054368e-06, - "loss": 0.9059, - "num_input_tokens_seen": 14072075, - "step": 673 - }, - { - "epoch": 0.08104370829074731, - "grad_norm": 2.3594301558737754, - "learning_rate": 3.972790046682013e-06, - "loss": 0.8122, - "num_input_tokens_seen": 14090470, - "step": 674 - }, - { - "epoch": 0.0811639511813864, - "grad_norm": 3.098041091611056, - "learning_rate": 3.972661840038977e-06, - "loss": 0.796, - "num_input_tokens_seen": 14110480, - "step": 675 - }, - { - "epoch": 0.08128419407202549, - "grad_norm": 2.362030104678018, - "learning_rate": 3.972533334144707e-06, - "loss": 0.8344, - "num_input_tokens_seen": 14127125, - "step": 676 - }, - { - "epoch": 0.08140443696266458, - "grad_norm": 2.6912333012220215, - "learning_rate": 3.972404529018699e-06, - "loss": 0.7895, - "num_input_tokens_seen": 14146705, - "step": 677 - }, - { - "epoch": 0.08152467985330367, - "grad_norm": 1.8724244685298441, - "learning_rate": 3.972275424680493e-06, - "loss": 0.8552, - "num_input_tokens_seen": 14166535, - "step": 678 - }, - { - "epoch": 0.08164492274394276, - "grad_norm": 2.5566213312496706, - "learning_rate": 3.972146021149673e-06, - "loss": 0.9084, - "num_input_tokens_seen": 14184530, - "step": 679 - }, - { - "epoch": 0.08176516563458186, - "grad_norm": 2.6648812216977773, - "learning_rate": 3.972016318445868e-06, - "loss": 0.7945, - "num_input_tokens_seen": 14202250, - "step": 680 - }, - { - "epoch": 0.08188540852522094, - "grad_norm": 2.1998959665602746, - "learning_rate": 3.971886316588757e-06, - "loss": 0.9064, - "num_input_tokens_seen": 14222475, - "step": 681 - }, - { - "epoch": 0.08200565141586004, - "grad_norm": 3.0602546884036093, - "learning_rate": 3.9717560155980595e-06, - "loss": 0.755, - "num_input_tokens_seen": 14237845, - "step": 682 - }, - { - "epoch": 0.08212589430649912, - "grad_norm": 1.9067787928465365, - "learning_rate": 3.971625415493542e-06, - "loss": 0.9266, - "num_input_tokens_seen": 14255885, - "step": 683 - }, - { - "epoch": 0.08224613719713822, - "grad_norm": 1.9935229796720129, - "learning_rate": 3.971494516295017e-06, - "loss": 0.8697, - "num_input_tokens_seen": 14275055, - "step": 684 - }, - { - "epoch": 0.08236638008777732, - "grad_norm": 2.034360920203988, - "learning_rate": 3.971363318022341e-06, - "loss": 0.8461, - "num_input_tokens_seen": 14296115, - "step": 685 - }, - { - "epoch": 0.0824866229784164, - "grad_norm": 2.027756637615474, - "learning_rate": 3.971231820695417e-06, - "loss": 0.6818, - "num_input_tokens_seen": 14319450, - "step": 686 - }, - { - "epoch": 0.0826068658690555, - "grad_norm": 2.4887209020018317, - "learning_rate": 3.971100024334193e-06, - "loss": 0.8098, - "num_input_tokens_seen": 14336690, - "step": 687 - }, - { - "epoch": 0.08272710875969458, - "grad_norm": 2.0144004566929765, - "learning_rate": 3.970967928958663e-06, - "loss": 0.8564, - "num_input_tokens_seen": 14353525, - "step": 688 - }, - { - "epoch": 0.08284735165033368, - "grad_norm": 1.7497338639848277, - "learning_rate": 3.970835534588865e-06, - "loss": 0.8309, - "num_input_tokens_seen": 14370740, - "step": 689 - }, - { - "epoch": 0.08296759454097276, - "grad_norm": 1.7206593880512195, - "learning_rate": 3.970702841244883e-06, - "loss": 0.8536, - "num_input_tokens_seen": 14388780, - "step": 690 - }, - { - "epoch": 0.08308783743161186, - "grad_norm": 2.031411152548322, - "learning_rate": 3.970569848946847e-06, - "loss": 0.8273, - "num_input_tokens_seen": 14408315, - "step": 691 - }, - { - "epoch": 0.08320808032225095, - "grad_norm": 2.6741784887884057, - "learning_rate": 3.970436557714932e-06, - "loss": 0.825, - "num_input_tokens_seen": 14424555, - "step": 692 - }, - { - "epoch": 0.08332832321289003, - "grad_norm": 2.0452193555434426, - "learning_rate": 3.970302967569358e-06, - "loss": 0.8547, - "num_input_tokens_seen": 14442865, - "step": 693 - }, - { - "epoch": 0.08344856610352913, - "grad_norm": 1.963202225097715, - "learning_rate": 3.9701690785303896e-06, - "loss": 0.687, - "num_input_tokens_seen": 14461780, - "step": 694 - }, - { - "epoch": 0.08356880899416821, - "grad_norm": 2.6151209058241403, - "learning_rate": 3.970034890618339e-06, - "loss": 0.8796, - "num_input_tokens_seen": 14481190, - "step": 695 - }, - { - "epoch": 0.08368905188480731, - "grad_norm": 2.020238884687549, - "learning_rate": 3.969900403853562e-06, - "loss": 0.8734, - "num_input_tokens_seen": 14499950, - "step": 696 - }, - { - "epoch": 0.08380929477544641, - "grad_norm": 2.798472618251497, - "learning_rate": 3.96976561825646e-06, - "loss": 0.7787, - "num_input_tokens_seen": 14516760, - "step": 697 - }, - { - "epoch": 0.08392953766608549, - "grad_norm": 2.0945113681639334, - "learning_rate": 3.969630533847479e-06, - "loss": 0.8741, - "num_input_tokens_seen": 14535440, - "step": 698 - }, - { - "epoch": 0.08404978055672459, - "grad_norm": 1.9581714131200367, - "learning_rate": 3.969495150647113e-06, - "loss": 0.8508, - "num_input_tokens_seen": 14553330, - "step": 699 - }, - { - "epoch": 0.08417002344736367, - "grad_norm": 2.5493550795950877, - "learning_rate": 3.969359468675899e-06, - "loss": 0.7649, - "num_input_tokens_seen": 14573180, - "step": 700 - }, - { - "epoch": 0.08429026633800277, - "grad_norm": 1.9733296071230897, - "learning_rate": 3.969223487954418e-06, - "loss": 0.8948, - "num_input_tokens_seen": 14590360, - "step": 701 - }, - { - "epoch": 0.08441050922864185, - "grad_norm": 2.1040598215163953, - "learning_rate": 3.969087208503301e-06, - "loss": 0.8243, - "num_input_tokens_seen": 14610160, - "step": 702 - }, - { - "epoch": 0.08453075211928095, - "grad_norm": 3.247026909668937, - "learning_rate": 3.968950630343219e-06, - "loss": 0.8413, - "num_input_tokens_seen": 14626865, - "step": 703 - }, - { - "epoch": 0.08465099500992004, - "grad_norm": 3.107297568930794, - "learning_rate": 3.968813753494892e-06, - "loss": 0.9291, - "num_input_tokens_seen": 14644745, - "step": 704 - }, - { - "epoch": 0.08477123790055913, - "grad_norm": 14.507405002489918, - "learning_rate": 3.968676577979084e-06, - "loss": 0.7537, - "num_input_tokens_seen": 14664015, - "step": 705 - }, - { - "epoch": 0.08489148079119822, - "grad_norm": 3.2302177816524655, - "learning_rate": 3.968539103816605e-06, - "loss": 0.7812, - "num_input_tokens_seen": 14681535, - "step": 706 - }, - { - "epoch": 0.0850117236818373, - "grad_norm": 2.161604433236569, - "learning_rate": 3.9684013310283085e-06, - "loss": 0.8893, - "num_input_tokens_seen": 14699940, - "step": 707 - }, - { - "epoch": 0.0851319665724764, - "grad_norm": 3.902670916707156, - "learning_rate": 3.9682632596350956e-06, - "loss": 0.6461, - "num_input_tokens_seen": 14720825, - "step": 708 - }, - { - "epoch": 0.0852522094631155, - "grad_norm": 2.375580443107337, - "learning_rate": 3.968124889657911e-06, - "loss": 0.7813, - "num_input_tokens_seen": 14735645, - "step": 709 - }, - { - "epoch": 0.08537245235375458, - "grad_norm": 4.3180750225749795, - "learning_rate": 3.967986221117746e-06, - "loss": 0.9005, - "num_input_tokens_seen": 14751305, - "step": 710 - }, - { - "epoch": 0.08549269524439368, - "grad_norm": 3.2832451594733776, - "learning_rate": 3.967847254035635e-06, - "loss": 0.8662, - "num_input_tokens_seen": 14770410, - "step": 711 - }, - { - "epoch": 0.08561293813503276, - "grad_norm": 10.396563553214172, - "learning_rate": 3.967707988432661e-06, - "loss": 0.87, - "num_input_tokens_seen": 14787835, - "step": 712 - }, - { - "epoch": 0.08573318102567186, - "grad_norm": 2.7680758992832253, - "learning_rate": 3.967568424329949e-06, - "loss": 0.8809, - "num_input_tokens_seen": 14807980, - "step": 713 - }, - { - "epoch": 0.08585342391631094, - "grad_norm": 0.8830994370187513, - "learning_rate": 3.967428561748671e-06, - "loss": 0.5977, - "num_input_tokens_seen": 14875670, - "step": 714 - }, - { - "epoch": 0.08597366680695004, - "grad_norm": 3.127153628250816, - "learning_rate": 3.967288400710045e-06, - "loss": 0.8724, - "num_input_tokens_seen": 14894855, - "step": 715 - }, - { - "epoch": 0.08609390969758914, - "grad_norm": 2.7594761089139164, - "learning_rate": 3.9671479412353335e-06, - "loss": 0.8783, - "num_input_tokens_seen": 14913040, - "step": 716 - }, - { - "epoch": 0.08621415258822822, - "grad_norm": 2.4554005012340547, - "learning_rate": 3.967007183345843e-06, - "loss": 0.7454, - "num_input_tokens_seen": 14932615, - "step": 717 - }, - { - "epoch": 0.08633439547886732, - "grad_norm": 2.362434679121161, - "learning_rate": 3.966866127062927e-06, - "loss": 0.8987, - "num_input_tokens_seen": 14949460, - "step": 718 - }, - { - "epoch": 0.0864546383695064, - "grad_norm": 0.9745794720445365, - "learning_rate": 3.966724772407982e-06, - "loss": 0.6818, - "num_input_tokens_seen": 15006695, - "step": 719 - }, - { - "epoch": 0.0865748812601455, - "grad_norm": 23.01756729782883, - "learning_rate": 3.966583119402454e-06, - "loss": 0.8794, - "num_input_tokens_seen": 15023180, - "step": 720 - }, - { - "epoch": 0.08669512415078459, - "grad_norm": 1.6756533688020472, - "learning_rate": 3.9664411680678305e-06, - "loss": 0.8196, - "num_input_tokens_seen": 15044655, - "step": 721 - }, - { - "epoch": 0.08681536704142367, - "grad_norm": 0.9522742056414344, - "learning_rate": 3.966298918425644e-06, - "loss": 0.6426, - "num_input_tokens_seen": 15101865, - "step": 722 - }, - { - "epoch": 0.08693560993206277, - "grad_norm": 2.4526747326126492, - "learning_rate": 3.966156370497476e-06, - "loss": 0.8321, - "num_input_tokens_seen": 15125195, - "step": 723 - }, - { - "epoch": 0.08705585282270185, - "grad_norm": 1.9182045782137724, - "learning_rate": 3.96601352430495e-06, - "loss": 0.8852, - "num_input_tokens_seen": 15144685, - "step": 724 - }, - { - "epoch": 0.08717609571334095, - "grad_norm": 2.2229430420804173, - "learning_rate": 3.965870379869735e-06, - "loss": 0.8289, - "num_input_tokens_seen": 15166450, - "step": 725 - }, - { - "epoch": 0.08729633860398003, - "grad_norm": 6.086371157458483, - "learning_rate": 3.965726937213547e-06, - "loss": 0.8609, - "num_input_tokens_seen": 15184805, - "step": 726 - }, - { - "epoch": 0.08741658149461913, - "grad_norm": 2.522919711400508, - "learning_rate": 3.965583196358144e-06, - "loss": 0.798, - "num_input_tokens_seen": 15203560, - "step": 727 - }, - { - "epoch": 0.08753682438525823, - "grad_norm": 2.2648971305235137, - "learning_rate": 3.965439157325335e-06, - "loss": 0.7452, - "num_input_tokens_seen": 15220645, - "step": 728 - }, - { - "epoch": 0.08765706727589731, - "grad_norm": 2.5509349094510654, - "learning_rate": 3.965294820136968e-06, - "loss": 0.7569, - "num_input_tokens_seen": 15242165, - "step": 729 - }, - { - "epoch": 0.08777731016653641, - "grad_norm": 2.6537668532063443, - "learning_rate": 3.965150184814938e-06, - "loss": 0.8634, - "num_input_tokens_seen": 15261370, - "step": 730 - }, - { - "epoch": 0.08789755305717549, - "grad_norm": 3.0779170657643014, - "learning_rate": 3.965005251381189e-06, - "loss": 0.752, - "num_input_tokens_seen": 15279025, - "step": 731 - }, - { - "epoch": 0.08801779594781459, - "grad_norm": 0.9427581676011325, - "learning_rate": 3.964860019857705e-06, - "loss": 0.6676, - "num_input_tokens_seen": 15343660, - "step": 732 - }, - { - "epoch": 0.08813803883845367, - "grad_norm": 3.0093943758101096, - "learning_rate": 3.964714490266518e-06, - "loss": 0.8339, - "num_input_tokens_seen": 15364025, - "step": 733 - }, - { - "epoch": 0.08825828172909277, - "grad_norm": 0.9335780088441266, - "learning_rate": 3.964568662629706e-06, - "loss": 0.6608, - "num_input_tokens_seen": 15425050, - "step": 734 - }, - { - "epoch": 0.08837852461973186, - "grad_norm": 2.791396337546597, - "learning_rate": 3.9644225369693895e-06, - "loss": 0.8389, - "num_input_tokens_seen": 15445070, - "step": 735 - }, - { - "epoch": 0.08849876751037095, - "grad_norm": 2.35220968070547, - "learning_rate": 3.964276113307735e-06, - "loss": 0.8763, - "num_input_tokens_seen": 15464755, - "step": 736 - }, - { - "epoch": 0.08861901040101004, - "grad_norm": 2.2433476881964216, - "learning_rate": 3.9641293916669574e-06, - "loss": 0.8068, - "num_input_tokens_seen": 15483435, - "step": 737 - }, - { - "epoch": 0.08873925329164913, - "grad_norm": 1.8917270344456076, - "learning_rate": 3.9639823720693115e-06, - "loss": 0.8288, - "num_input_tokens_seen": 15505010, - "step": 738 - }, - { - "epoch": 0.08885949618228822, - "grad_norm": 0.9172565789983289, - "learning_rate": 3.963835054537102e-06, - "loss": 0.6523, - "num_input_tokens_seen": 15573695, - "step": 739 - }, - { - "epoch": 0.08897973907292732, - "grad_norm": 3.213698472725518, - "learning_rate": 3.963687439092676e-06, - "loss": 0.6137, - "num_input_tokens_seen": 15594100, - "step": 740 - }, - { - "epoch": 0.0890999819635664, - "grad_norm": 3.2958593131421465, - "learning_rate": 3.963539525758427e-06, - "loss": 0.8, - "num_input_tokens_seen": 15613380, - "step": 741 - }, - { - "epoch": 0.0892202248542055, - "grad_norm": 2.008049575068697, - "learning_rate": 3.9633913145567925e-06, - "loss": 0.6789, - "num_input_tokens_seen": 15633590, - "step": 742 - }, - { - "epoch": 0.08934046774484458, - "grad_norm": 2.429284120730918, - "learning_rate": 3.9632428055102575e-06, - "loss": 0.8113, - "num_input_tokens_seen": 15653320, - "step": 743 - }, - { - "epoch": 0.08946071063548368, - "grad_norm": 2.0947660088439384, - "learning_rate": 3.9630939986413495e-06, - "loss": 0.6739, - "num_input_tokens_seen": 15674840, - "step": 744 - }, - { - "epoch": 0.08958095352612276, - "grad_norm": 1.7908076624810088, - "learning_rate": 3.962944893972643e-06, - "loss": 0.7824, - "num_input_tokens_seen": 15693010, - "step": 745 - }, - { - "epoch": 0.08970119641676186, - "grad_norm": 3.521169557438839, - "learning_rate": 3.962795491526756e-06, - "loss": 0.9163, - "num_input_tokens_seen": 15709890, - "step": 746 - }, - { - "epoch": 0.08982143930740095, - "grad_norm": 3.377591863232413, - "learning_rate": 3.962645791326354e-06, - "loss": 0.891, - "num_input_tokens_seen": 15728865, - "step": 747 - }, - { - "epoch": 0.08994168219804004, - "grad_norm": 2.127211643559933, - "learning_rate": 3.962495793394146e-06, - "loss": 0.8281, - "num_input_tokens_seen": 15747775, - "step": 748 - }, - { - "epoch": 0.09006192508867913, - "grad_norm": 0.8535635028227297, - "learning_rate": 3.9623454977528864e-06, - "loss": 0.614, - "num_input_tokens_seen": 15806150, - "step": 749 - }, - { - "epoch": 0.09018216797931822, - "grad_norm": 2.1415739231061433, - "learning_rate": 3.962194904425375e-06, - "loss": 0.8434, - "num_input_tokens_seen": 15826500, - "step": 750 - }, - { - "epoch": 0.09030241086995731, - "grad_norm": 2.131746767776979, - "learning_rate": 3.9620440134344566e-06, - "loss": 0.6823, - "num_input_tokens_seen": 15844375, - "step": 751 - }, - { - "epoch": 0.09042265376059641, - "grad_norm": 2.1946167727437853, - "learning_rate": 3.9618928248030215e-06, - "loss": 0.8209, - "num_input_tokens_seen": 15863605, - "step": 752 - }, - { - "epoch": 0.0905428966512355, - "grad_norm": 3.333589034009174, - "learning_rate": 3.961741338554005e-06, - "loss": 0.8355, - "num_input_tokens_seen": 15881665, - "step": 753 - }, - { - "epoch": 0.09066313954187459, - "grad_norm": 2.691967012825218, - "learning_rate": 3.9615895547103865e-06, - "loss": 0.7532, - "num_input_tokens_seen": 15905030, - "step": 754 - }, - { - "epoch": 0.09078338243251367, - "grad_norm": 2.1934455670244337, - "learning_rate": 3.961437473295193e-06, - "loss": 0.7733, - "num_input_tokens_seen": 15924895, - "step": 755 - }, - { - "epoch": 0.09090362532315277, - "grad_norm": 2.6805744877034945, - "learning_rate": 3.961285094331495e-06, - "loss": 0.7131, - "num_input_tokens_seen": 15942530, - "step": 756 - }, - { - "epoch": 0.09102386821379185, - "grad_norm": 2.083728331683761, - "learning_rate": 3.961132417842406e-06, - "loss": 0.8555, - "num_input_tokens_seen": 15962035, - "step": 757 - }, - { - "epoch": 0.09114411110443095, - "grad_norm": 4.301934916923718, - "learning_rate": 3.960979443851089e-06, - "loss": 0.7509, - "num_input_tokens_seen": 15978780, - "step": 758 - }, - { - "epoch": 0.09126435399507005, - "grad_norm": 1.8769589630604666, - "learning_rate": 3.96082617238075e-06, - "loss": 0.7889, - "num_input_tokens_seen": 16001125, - "step": 759 - }, - { - "epoch": 0.09138459688570913, - "grad_norm": 3.416100826591661, - "learning_rate": 3.960672603454639e-06, - "loss": 0.7973, - "num_input_tokens_seen": 16020825, - "step": 760 - }, - { - "epoch": 0.09150483977634823, - "grad_norm": 3.035661154129103, - "learning_rate": 3.960518737096054e-06, - "loss": 0.7705, - "num_input_tokens_seen": 16040175, - "step": 761 - }, - { - "epoch": 0.09162508266698731, - "grad_norm": 2.568595606205626, - "learning_rate": 3.960364573328334e-06, - "loss": 0.7339, - "num_input_tokens_seen": 16059220, - "step": 762 - }, - { - "epoch": 0.0917453255576264, - "grad_norm": 3.238048025015355, - "learning_rate": 3.9602101121748675e-06, - "loss": 0.8778, - "num_input_tokens_seen": 16079435, - "step": 763 - }, - { - "epoch": 0.0918655684482655, - "grad_norm": 2.807019953961182, - "learning_rate": 3.960055353659085e-06, - "loss": 0.7216, - "num_input_tokens_seen": 16096265, - "step": 764 - }, - { - "epoch": 0.09198581133890459, - "grad_norm": 2.526834797827245, - "learning_rate": 3.959900297804465e-06, - "loss": 0.8372, - "num_input_tokens_seen": 16116155, - "step": 765 - }, - { - "epoch": 0.09210605422954368, - "grad_norm": 2.090176035795695, - "learning_rate": 3.9597449446345276e-06, - "loss": 0.7671, - "num_input_tokens_seen": 16133120, - "step": 766 - }, - { - "epoch": 0.09222629712018277, - "grad_norm": 2.4614232962855316, - "learning_rate": 3.95958929417284e-06, - "loss": 0.8304, - "num_input_tokens_seen": 16150995, - "step": 767 - }, - { - "epoch": 0.09234654001082186, - "grad_norm": 0.8098031810162347, - "learning_rate": 3.9594333464430145e-06, - "loss": 0.6114, - "num_input_tokens_seen": 16220205, - "step": 768 - }, - { - "epoch": 0.09246678290146094, - "grad_norm": 2.3417263053559667, - "learning_rate": 3.959277101468709e-06, - "loss": 0.8784, - "num_input_tokens_seen": 16239475, - "step": 769 - }, - { - "epoch": 0.09258702579210004, - "grad_norm": 2.8853235259600254, - "learning_rate": 3.959120559273624e-06, - "loss": 0.7945, - "num_input_tokens_seen": 16256980, - "step": 770 - }, - { - "epoch": 0.09270726868273914, - "grad_norm": 3.0027926659166178, - "learning_rate": 3.958963719881509e-06, - "loss": 0.8404, - "num_input_tokens_seen": 16274790, - "step": 771 - }, - { - "epoch": 0.09282751157337822, - "grad_norm": 2.2175153618136956, - "learning_rate": 3.958806583316154e-06, - "loss": 0.9344, - "num_input_tokens_seen": 16292480, - "step": 772 - }, - { - "epoch": 0.09294775446401732, - "grad_norm": 15.453202655575843, - "learning_rate": 3.9586491496013985e-06, - "loss": 0.7954, - "num_input_tokens_seen": 16314595, - "step": 773 - }, - { - "epoch": 0.0930679973546564, - "grad_norm": 2.940489711871276, - "learning_rate": 3.958491418761124e-06, - "loss": 0.8164, - "num_input_tokens_seen": 16331885, - "step": 774 - }, - { - "epoch": 0.0931882402452955, - "grad_norm": 3.8848968379515907, - "learning_rate": 3.958333390819258e-06, - "loss": 0.7305, - "num_input_tokens_seen": 16348535, - "step": 775 - }, - { - "epoch": 0.0933084831359346, - "grad_norm": 2.3641481097103534, - "learning_rate": 3.9581750657997754e-06, - "loss": 0.8015, - "num_input_tokens_seen": 16367620, - "step": 776 - }, - { - "epoch": 0.09342872602657368, - "grad_norm": 1.845269461294319, - "learning_rate": 3.95801644372669e-06, - "loss": 0.8904, - "num_input_tokens_seen": 16387245, - "step": 777 - }, - { - "epoch": 0.09354896891721277, - "grad_norm": 2.3842978254680824, - "learning_rate": 3.957857524624068e-06, - "loss": 0.8412, - "num_input_tokens_seen": 16405845, - "step": 778 - }, - { - "epoch": 0.09366921180785186, - "grad_norm": 1.8456349204348133, - "learning_rate": 3.957698308516016e-06, - "loss": 0.8984, - "num_input_tokens_seen": 16426865, - "step": 779 - }, - { - "epoch": 0.09378945469849095, - "grad_norm": 2.1804303757190144, - "learning_rate": 3.957538795426688e-06, - "loss": 0.8203, - "num_input_tokens_seen": 16444010, - "step": 780 - }, - { - "epoch": 0.09390969758913004, - "grad_norm": 2.5682612779600635, - "learning_rate": 3.9573789853802804e-06, - "loss": 0.7692, - "num_input_tokens_seen": 16462205, - "step": 781 - }, - { - "epoch": 0.09402994047976913, - "grad_norm": 3.000839422113299, - "learning_rate": 3.957218878401037e-06, - "loss": 0.7515, - "num_input_tokens_seen": 16480415, - "step": 782 - }, - { - "epoch": 0.09415018337040823, - "grad_norm": 3.5719921084625597, - "learning_rate": 3.957058474513246e-06, - "loss": 0.8985, - "num_input_tokens_seen": 16499990, - "step": 783 - }, - { - "epoch": 0.09427042626104731, - "grad_norm": 1.9410620635859839, - "learning_rate": 3.956897773741241e-06, - "loss": 0.7825, - "num_input_tokens_seen": 16518700, - "step": 784 - }, - { - "epoch": 0.09439066915168641, - "grad_norm": 1.8008475285382617, - "learning_rate": 3.956736776109398e-06, - "loss": 0.717, - "num_input_tokens_seen": 16539595, - "step": 785 - }, - { - "epoch": 0.09451091204232549, - "grad_norm": 2.308861285673119, - "learning_rate": 3.956575481642143e-06, - "loss": 0.8331, - "num_input_tokens_seen": 16558205, - "step": 786 - }, - { - "epoch": 0.09463115493296459, - "grad_norm": 4.338081045427215, - "learning_rate": 3.956413890363943e-06, - "loss": 0.7483, - "num_input_tokens_seen": 16574905, - "step": 787 - }, - { - "epoch": 0.09475139782360369, - "grad_norm": 2.424918293313003, - "learning_rate": 3.956252002299312e-06, - "loss": 0.8215, - "num_input_tokens_seen": 16590525, - "step": 788 - }, - { - "epoch": 0.09487164071424277, - "grad_norm": 2.3242193035105796, - "learning_rate": 3.956089817472807e-06, - "loss": 0.904, - "num_input_tokens_seen": 16607550, - "step": 789 - }, - { - "epoch": 0.09499188360488187, - "grad_norm": 2.6751752028985742, - "learning_rate": 3.955927335909032e-06, - "loss": 0.8571, - "num_input_tokens_seen": 16630480, - "step": 790 - }, - { - "epoch": 0.09511212649552095, - "grad_norm": 3.5030845084728113, - "learning_rate": 3.955764557632634e-06, - "loss": 0.76, - "num_input_tokens_seen": 16650010, - "step": 791 - }, - { - "epoch": 0.09523236938616005, - "grad_norm": 2.6085712476066827, - "learning_rate": 3.955601482668309e-06, - "loss": 0.9435, - "num_input_tokens_seen": 16667590, - "step": 792 - }, - { - "epoch": 0.09535261227679913, - "grad_norm": 2.196506061146064, - "learning_rate": 3.955438111040794e-06, - "loss": 0.8821, - "num_input_tokens_seen": 16685585, - "step": 793 - }, - { - "epoch": 0.09547285516743823, - "grad_norm": 2.1789414118550616, - "learning_rate": 3.955274442774873e-06, - "loss": 0.8053, - "num_input_tokens_seen": 16703885, - "step": 794 - }, - { - "epoch": 0.09559309805807732, - "grad_norm": 2.9322161737975017, - "learning_rate": 3.9551104778953725e-06, - "loss": 0.7192, - "num_input_tokens_seen": 16723900, - "step": 795 - }, - { - "epoch": 0.0957133409487164, - "grad_norm": 2.3579054752705932, - "learning_rate": 3.954946216427167e-06, - "loss": 0.8552, - "num_input_tokens_seen": 16744080, - "step": 796 - }, - { - "epoch": 0.0958335838393555, - "grad_norm": 0.848150156353711, - "learning_rate": 3.954781658395176e-06, - "loss": 0.6461, - "num_input_tokens_seen": 16800055, - "step": 797 - }, - { - "epoch": 0.09595382672999458, - "grad_norm": 2.065885571928616, - "learning_rate": 3.95461680382436e-06, - "loss": 0.9196, - "num_input_tokens_seen": 16818700, - "step": 798 - }, - { - "epoch": 0.09607406962063368, - "grad_norm": 2.774878814838025, - "learning_rate": 3.9544516527397295e-06, - "loss": 0.8571, - "num_input_tokens_seen": 16834770, - "step": 799 - }, - { - "epoch": 0.09619431251127276, - "grad_norm": 3.5550330091887403, - "learning_rate": 3.954286205166338e-06, - "loss": 0.8034, - "num_input_tokens_seen": 16855655, - "step": 800 - }, - { - "epoch": 0.09631455540191186, - "grad_norm": 2.7915733861373297, - "learning_rate": 3.954120461129282e-06, - "loss": 0.8364, - "num_input_tokens_seen": 16872785, - "step": 801 - }, - { - "epoch": 0.09643479829255096, - "grad_norm": 2.0158892148812684, - "learning_rate": 3.953954420653706e-06, - "loss": 0.8388, - "num_input_tokens_seen": 16889530, - "step": 802 - }, - { - "epoch": 0.09655504118319004, - "grad_norm": 1.8801797237659632, - "learning_rate": 3.953788083764798e-06, - "loss": 0.8761, - "num_input_tokens_seen": 16908485, - "step": 803 - }, - { - "epoch": 0.09667528407382914, - "grad_norm": 2.2157862217841924, - "learning_rate": 3.953621450487792e-06, - "loss": 0.9219, - "num_input_tokens_seen": 16926825, - "step": 804 - }, - { - "epoch": 0.09679552696446822, - "grad_norm": 0.8805269388389226, - "learning_rate": 3.953454520847964e-06, - "loss": 0.6507, - "num_input_tokens_seen": 16991390, - "step": 805 - }, - { - "epoch": 0.09691576985510732, - "grad_norm": 2.1653822360162933, - "learning_rate": 3.9532872948706395e-06, - "loss": 0.7341, - "num_input_tokens_seen": 17010605, - "step": 806 - }, - { - "epoch": 0.09703601274574641, - "grad_norm": 3.455326125989557, - "learning_rate": 3.9531197725811845e-06, - "loss": 0.8278, - "num_input_tokens_seen": 17025710, - "step": 807 - }, - { - "epoch": 0.0971562556363855, - "grad_norm": 1.995702753388604, - "learning_rate": 3.952951954005013e-06, - "loss": 0.8692, - "num_input_tokens_seen": 17045115, - "step": 808 - }, - { - "epoch": 0.0972764985270246, - "grad_norm": 1.79485665333028, - "learning_rate": 3.952783839167584e-06, - "loss": 0.8502, - "num_input_tokens_seen": 17064880, - "step": 809 - }, - { - "epoch": 0.09739674141766368, - "grad_norm": 4.291058457118844, - "learning_rate": 3.952615428094398e-06, - "loss": 0.7443, - "num_input_tokens_seen": 17084120, - "step": 810 - }, - { - "epoch": 0.09751698430830277, - "grad_norm": 1.9438052169624325, - "learning_rate": 3.952446720811004e-06, - "loss": 0.7379, - "num_input_tokens_seen": 17102165, - "step": 811 - }, - { - "epoch": 0.09763722719894186, - "grad_norm": 0.8610778902561823, - "learning_rate": 3.952277717342995e-06, - "loss": 0.6713, - "num_input_tokens_seen": 17168320, - "step": 812 - }, - { - "epoch": 0.09775747008958095, - "grad_norm": 6.33912804714025, - "learning_rate": 3.952108417716009e-06, - "loss": 0.8563, - "num_input_tokens_seen": 17187495, - "step": 813 - }, - { - "epoch": 0.09787771298022005, - "grad_norm": 2.1429295127776364, - "learning_rate": 3.951938821955727e-06, - "loss": 0.8494, - "num_input_tokens_seen": 17206615, - "step": 814 - }, - { - "epoch": 0.09799795587085913, - "grad_norm": 1.8863557679900163, - "learning_rate": 3.9517689300878786e-06, - "loss": 0.7577, - "num_input_tokens_seen": 17226070, - "step": 815 - }, - { - "epoch": 0.09811819876149823, - "grad_norm": 2.4820682398711233, - "learning_rate": 3.951598742138236e-06, - "loss": 0.7807, - "num_input_tokens_seen": 17244515, - "step": 816 - }, - { - "epoch": 0.09823844165213731, - "grad_norm": 3.2675054957132144, - "learning_rate": 3.951428258132615e-06, - "loss": 0.7863, - "num_input_tokens_seen": 17262355, - "step": 817 - }, - { - "epoch": 0.09835868454277641, - "grad_norm": 2.1469827370775434, - "learning_rate": 3.951257478096879e-06, - "loss": 0.8413, - "num_input_tokens_seen": 17280440, - "step": 818 - }, - { - "epoch": 0.0984789274334155, - "grad_norm": 2.9853358604500015, - "learning_rate": 3.951086402056936e-06, - "loss": 0.6787, - "num_input_tokens_seen": 17294760, - "step": 819 - }, - { - "epoch": 0.09859917032405459, - "grad_norm": 1.8856098284383407, - "learning_rate": 3.950915030038735e-06, - "loss": 0.8351, - "num_input_tokens_seen": 17314275, - "step": 820 - }, - { - "epoch": 0.09871941321469369, - "grad_norm": 2.958071816735845, - "learning_rate": 3.9507433620682765e-06, - "loss": 0.8371, - "num_input_tokens_seen": 17330930, - "step": 821 - }, - { - "epoch": 0.09883965610533277, - "grad_norm": 2.2258653599664338, - "learning_rate": 3.9505713981716e-06, - "loss": 0.8725, - "num_input_tokens_seen": 17353480, - "step": 822 - }, - { - "epoch": 0.09895989899597187, - "grad_norm": 1.8557577210763645, - "learning_rate": 3.950399138374795e-06, - "loss": 0.8091, - "num_input_tokens_seen": 17372280, - "step": 823 - }, - { - "epoch": 0.09908014188661095, - "grad_norm": 2.1965356814223607, - "learning_rate": 3.95022658270399e-06, - "loss": 0.7395, - "num_input_tokens_seen": 17392365, - "step": 824 - }, - { - "epoch": 0.09920038477725004, - "grad_norm": 2.6909562335043167, - "learning_rate": 3.9500537311853635e-06, - "loss": 0.7752, - "num_input_tokens_seen": 17410040, - "step": 825 - }, - { - "epoch": 0.09932062766788914, - "grad_norm": 2.850219724681345, - "learning_rate": 3.949880583845136e-06, - "loss": 0.8289, - "num_input_tokens_seen": 17427835, - "step": 826 - }, - { - "epoch": 0.09944087055852822, - "grad_norm": 2.4005268900928414, - "learning_rate": 3.949707140709575e-06, - "loss": 0.8102, - "num_input_tokens_seen": 17447285, - "step": 827 - }, - { - "epoch": 0.09956111344916732, - "grad_norm": 2.3337804237662905, - "learning_rate": 3.949533401804991e-06, - "loss": 0.8361, - "num_input_tokens_seen": 17463910, - "step": 828 - }, - { - "epoch": 0.0996813563398064, - "grad_norm": 2.3990471959789197, - "learning_rate": 3.949359367157739e-06, - "loss": 0.9032, - "num_input_tokens_seen": 17482325, - "step": 829 - }, - { - "epoch": 0.0998015992304455, - "grad_norm": 2.1356542482128553, - "learning_rate": 3.949185036794222e-06, - "loss": 0.7626, - "num_input_tokens_seen": 17500055, - "step": 830 - }, - { - "epoch": 0.0999218421210846, - "grad_norm": 1.6977171190632112, - "learning_rate": 3.949010410740884e-06, - "loss": 0.7766, - "num_input_tokens_seen": 17522600, - "step": 831 - }, - { - "epoch": 0.10004208501172368, - "grad_norm": 1.8981337506416434, - "learning_rate": 3.948835489024216e-06, - "loss": 0.8663, - "num_input_tokens_seen": 17542055, - "step": 832 - }, - { - "epoch": 0.10016232790236278, - "grad_norm": 1.9123374217461606, - "learning_rate": 3.948660271670755e-06, - "loss": 0.8918, - "num_input_tokens_seen": 17558925, - "step": 833 - }, - { - "epoch": 0.10028257079300186, - "grad_norm": 2.2080318984779277, - "learning_rate": 3.948484758707079e-06, - "loss": 0.8442, - "num_input_tokens_seen": 17578245, - "step": 834 - }, - { - "epoch": 0.10040281368364096, - "grad_norm": 2.594852234622079, - "learning_rate": 3.948308950159815e-06, - "loss": 0.8347, - "num_input_tokens_seen": 17596645, - "step": 835 - }, - { - "epoch": 0.10052305657428004, - "grad_norm": 2.430344276047469, - "learning_rate": 3.9481328460556326e-06, - "loss": 0.7641, - "num_input_tokens_seen": 17613585, - "step": 836 - }, - { - "epoch": 0.10064329946491914, - "grad_norm": 2.4197432529470997, - "learning_rate": 3.9479564464212455e-06, - "loss": 0.8894, - "num_input_tokens_seen": 17632465, - "step": 837 - }, - { - "epoch": 0.10076354235555823, - "grad_norm": 2.619392329816884, - "learning_rate": 3.947779751283414e-06, - "loss": 0.7641, - "num_input_tokens_seen": 17649355, - "step": 838 - }, - { - "epoch": 0.10088378524619732, - "grad_norm": 1.9800604616141009, - "learning_rate": 3.947602760668944e-06, - "loss": 0.7534, - "num_input_tokens_seen": 17668865, - "step": 839 - }, - { - "epoch": 0.10100402813683641, - "grad_norm": 1.9335559704439675, - "learning_rate": 3.947425474604684e-06, - "loss": 0.7134, - "num_input_tokens_seen": 17692520, - "step": 840 - }, - { - "epoch": 0.1011242710274755, - "grad_norm": 2.1978515842196957, - "learning_rate": 3.947247893117528e-06, - "loss": 0.9242, - "num_input_tokens_seen": 17710745, - "step": 841 - }, - { - "epoch": 0.10124451391811459, - "grad_norm": 7.388582666770699, - "learning_rate": 3.947070016234413e-06, - "loss": 0.6929, - "num_input_tokens_seen": 17726255, - "step": 842 - }, - { - "epoch": 0.10136475680875369, - "grad_norm": 2.6642207060516934, - "learning_rate": 3.946891843982326e-06, - "loss": 0.7443, - "num_input_tokens_seen": 17743640, - "step": 843 - }, - { - "epoch": 0.10148499969939277, - "grad_norm": 15.751139988427198, - "learning_rate": 3.9467133763882935e-06, - "loss": 0.742, - "num_input_tokens_seen": 17761825, - "step": 844 - }, - { - "epoch": 0.10160524259003187, - "grad_norm": 1.954345176234008, - "learning_rate": 3.9465346134793905e-06, - "loss": 0.8621, - "num_input_tokens_seen": 17781355, - "step": 845 - }, - { - "epoch": 0.10172548548067095, - "grad_norm": 2.024418013849752, - "learning_rate": 3.9463555552827335e-06, - "loss": 0.7997, - "num_input_tokens_seen": 17798245, - "step": 846 - }, - { - "epoch": 0.10184572837131005, - "grad_norm": 2.6641568638193243, - "learning_rate": 3.946176201825487e-06, - "loss": 0.8667, - "num_input_tokens_seen": 17816000, - "step": 847 - }, - { - "epoch": 0.10196597126194913, - "grad_norm": 2.868935123915064, - "learning_rate": 3.9459965531348575e-06, - "loss": 0.838, - "num_input_tokens_seen": 17835375, - "step": 848 - }, - { - "epoch": 0.10208621415258823, - "grad_norm": 2.2581213699127827, - "learning_rate": 3.945816609238098e-06, - "loss": 0.8584, - "num_input_tokens_seen": 17854505, - "step": 849 - }, - { - "epoch": 0.10220645704322733, - "grad_norm": 1.8912363106877021, - "learning_rate": 3.945636370162507e-06, - "loss": 0.8493, - "num_input_tokens_seen": 17874335, - "step": 850 - }, - { - "epoch": 0.10232669993386641, - "grad_norm": 1.7434850982417882, - "learning_rate": 3.945455835935425e-06, - "loss": 0.7915, - "num_input_tokens_seen": 17893240, - "step": 851 - }, - { - "epoch": 0.1024469428245055, - "grad_norm": 2.830988142985482, - "learning_rate": 3.94527500658424e-06, - "loss": 0.7414, - "num_input_tokens_seen": 17910625, - "step": 852 - }, - { - "epoch": 0.10256718571514459, - "grad_norm": 1.974619927085637, - "learning_rate": 3.945093882136382e-06, - "loss": 0.8126, - "num_input_tokens_seen": 17934120, - "step": 853 - }, - { - "epoch": 0.10268742860578368, - "grad_norm": 1.9756341736380256, - "learning_rate": 3.944912462619329e-06, - "loss": 0.8376, - "num_input_tokens_seen": 17952805, - "step": 854 - }, - { - "epoch": 0.10280767149642277, - "grad_norm": 2.1421202171865175, - "learning_rate": 3.9447307480606025e-06, - "loss": 0.809, - "num_input_tokens_seen": 17972610, - "step": 855 - }, - { - "epoch": 0.10292791438706186, - "grad_norm": 2.349347297268663, - "learning_rate": 3.944548738487767e-06, - "loss": 0.8997, - "num_input_tokens_seen": 17989845, - "step": 856 - }, - { - "epoch": 0.10304815727770096, - "grad_norm": 2.669528222583635, - "learning_rate": 3.944366433928434e-06, - "loss": 0.8976, - "num_input_tokens_seen": 18009545, - "step": 857 - }, - { - "epoch": 0.10316840016834004, - "grad_norm": 1.7060248167467518, - "learning_rate": 3.9441838344102594e-06, - "loss": 0.8274, - "num_input_tokens_seen": 18028990, - "step": 858 - }, - { - "epoch": 0.10328864305897914, - "grad_norm": 2.404724311856886, - "learning_rate": 3.944000939960943e-06, - "loss": 0.6744, - "num_input_tokens_seen": 18047435, - "step": 859 - }, - { - "epoch": 0.10340888594961822, - "grad_norm": 1.6697933656545882, - "learning_rate": 3.943817750608229e-06, - "loss": 0.8018, - "num_input_tokens_seen": 18069705, - "step": 860 - }, - { - "epoch": 0.10352912884025732, - "grad_norm": 2.7203473805398, - "learning_rate": 3.943634266379908e-06, - "loss": 0.822, - "num_input_tokens_seen": 18086320, - "step": 861 - }, - { - "epoch": 0.10364937173089642, - "grad_norm": 3.164258252910918, - "learning_rate": 3.943450487303815e-06, - "loss": 0.8487, - "num_input_tokens_seen": 18106535, - "step": 862 - }, - { - "epoch": 0.1037696146215355, - "grad_norm": 2.4741336976674484, - "learning_rate": 3.943266413407827e-06, - "loss": 0.8482, - "num_input_tokens_seen": 18125530, - "step": 863 - }, - { - "epoch": 0.1038898575121746, - "grad_norm": 1.752134948496544, - "learning_rate": 3.94308204471987e-06, - "loss": 0.8451, - "num_input_tokens_seen": 18144265, - "step": 864 - }, - { - "epoch": 0.10401010040281368, - "grad_norm": 10.398919262719307, - "learning_rate": 3.942897381267912e-06, - "loss": 0.7419, - "num_input_tokens_seen": 18160350, - "step": 865 - }, - { - "epoch": 0.10413034329345278, - "grad_norm": 2.7296546889668636, - "learning_rate": 3.942712423079965e-06, - "loss": 0.6748, - "num_input_tokens_seen": 18176460, - "step": 866 - }, - { - "epoch": 0.10425058618409186, - "grad_norm": 2.2475341257305628, - "learning_rate": 3.942527170184088e-06, - "loss": 0.8979, - "num_input_tokens_seen": 18192800, - "step": 867 - }, - { - "epoch": 0.10437082907473096, - "grad_norm": 5.726105634050731, - "learning_rate": 3.942341622608385e-06, - "loss": 0.7819, - "num_input_tokens_seen": 18209550, - "step": 868 - }, - { - "epoch": 0.10449107196537005, - "grad_norm": 1.5963926021940098, - "learning_rate": 3.942155780381001e-06, - "loss": 0.7691, - "num_input_tokens_seen": 18233005, - "step": 869 - }, - { - "epoch": 0.10461131485600914, - "grad_norm": 1.914487223740198, - "learning_rate": 3.94196964353013e-06, - "loss": 0.7613, - "num_input_tokens_seen": 18252175, - "step": 870 - }, - { - "epoch": 0.10473155774664823, - "grad_norm": 2.1994763345851345, - "learning_rate": 3.941783212084008e-06, - "loss": 0.8013, - "num_input_tokens_seen": 18269650, - "step": 871 - }, - { - "epoch": 0.10485180063728732, - "grad_norm": 4.017514211352629, - "learning_rate": 3.941596486070916e-06, - "loss": 0.775, - "num_input_tokens_seen": 18287415, - "step": 872 - }, - { - "epoch": 0.10497204352792641, - "grad_norm": 3.2320445141072556, - "learning_rate": 3.941409465519182e-06, - "loss": 0.5768, - "num_input_tokens_seen": 18307660, - "step": 873 - }, - { - "epoch": 0.10509228641856551, - "grad_norm": 1.7936104972660718, - "learning_rate": 3.941222150457176e-06, - "loss": 0.846, - "num_input_tokens_seen": 18330635, - "step": 874 - }, - { - "epoch": 0.10521252930920459, - "grad_norm": 3.2910709671512812, - "learning_rate": 3.941034540913311e-06, - "loss": 0.7261, - "num_input_tokens_seen": 18347885, - "step": 875 - }, - { - "epoch": 0.10533277219984369, - "grad_norm": 1.9848623920406179, - "learning_rate": 3.940846636916051e-06, - "loss": 0.8232, - "num_input_tokens_seen": 18367640, - "step": 876 - }, - { - "epoch": 0.10545301509048277, - "grad_norm": 2.407521735295685, - "learning_rate": 3.940658438493899e-06, - "loss": 0.8626, - "num_input_tokens_seen": 18385205, - "step": 877 - }, - { - "epoch": 0.10557325798112187, - "grad_norm": 4.9600778152505205, - "learning_rate": 3.940469945675405e-06, - "loss": 0.7583, - "num_input_tokens_seen": 18403310, - "step": 878 - }, - { - "epoch": 0.10569350087176095, - "grad_norm": 1.910106903459763, - "learning_rate": 3.940281158489163e-06, - "loss": 0.9092, - "num_input_tokens_seen": 18422260, - "step": 879 - }, - { - "epoch": 0.10581374376240005, - "grad_norm": 1.7108246591914542, - "learning_rate": 3.940092076963812e-06, - "loss": 0.8205, - "num_input_tokens_seen": 18439475, - "step": 880 - }, - { - "epoch": 0.10593398665303914, - "grad_norm": 5.191199562171951, - "learning_rate": 3.9399027011280355e-06, - "loss": 0.7905, - "num_input_tokens_seen": 18461290, - "step": 881 - }, - { - "epoch": 0.10605422954367823, - "grad_norm": 2.4991051124957524, - "learning_rate": 3.939713031010561e-06, - "loss": 0.7711, - "num_input_tokens_seen": 18479375, - "step": 882 - }, - { - "epoch": 0.10617447243431732, - "grad_norm": 2.8233917929338457, - "learning_rate": 3.939523066640163e-06, - "loss": 0.7797, - "num_input_tokens_seen": 18497990, - "step": 883 - }, - { - "epoch": 0.10629471532495641, - "grad_norm": 2.2332503412616016, - "learning_rate": 3.939332808045657e-06, - "loss": 0.8051, - "num_input_tokens_seen": 18517360, - "step": 884 - }, - { - "epoch": 0.1064149582155955, - "grad_norm": 2.545677704667773, - "learning_rate": 3.939142255255906e-06, - "loss": 0.8394, - "num_input_tokens_seen": 18537965, - "step": 885 - }, - { - "epoch": 0.1065352011062346, - "grad_norm": 5.0264005574938055, - "learning_rate": 3.938951408299817e-06, - "loss": 0.874, - "num_input_tokens_seen": 18556525, - "step": 886 - }, - { - "epoch": 0.10665544399687368, - "grad_norm": 0.853644560270433, - "learning_rate": 3.938760267206342e-06, - "loss": 0.577, - "num_input_tokens_seen": 18618065, - "step": 887 - }, - { - "epoch": 0.10677568688751278, - "grad_norm": 2.5750565493754607, - "learning_rate": 3.938568832004475e-06, - "loss": 0.7889, - "num_input_tokens_seen": 18636490, - "step": 888 - }, - { - "epoch": 0.10689592977815186, - "grad_norm": 2.2761256889057613, - "learning_rate": 3.938377102723257e-06, - "loss": 0.7513, - "num_input_tokens_seen": 18653345, - "step": 889 - }, - { - "epoch": 0.10701617266879096, - "grad_norm": 4.025603558156583, - "learning_rate": 3.938185079391774e-06, - "loss": 0.8382, - "num_input_tokens_seen": 18670110, - "step": 890 - }, - { - "epoch": 0.10713641555943004, - "grad_norm": 3.6265857661300474, - "learning_rate": 3.937992762039157e-06, - "loss": 1.0576, - "num_input_tokens_seen": 18683155, - "step": 891 - }, - { - "epoch": 0.10725665845006914, - "grad_norm": 1.8686906155825185, - "learning_rate": 3.937800150694577e-06, - "loss": 0.7968, - "num_input_tokens_seen": 18704050, - "step": 892 - }, - { - "epoch": 0.10737690134070824, - "grad_norm": 2.2303912319943313, - "learning_rate": 3.937607245387255e-06, - "loss": 0.7513, - "num_input_tokens_seen": 18723135, - "step": 893 - }, - { - "epoch": 0.10749714423134732, - "grad_norm": 2.220753653754816, - "learning_rate": 3.937414046146455e-06, - "loss": 0.7252, - "num_input_tokens_seen": 18740810, - "step": 894 - }, - { - "epoch": 0.10761738712198642, - "grad_norm": 1.9937587790455347, - "learning_rate": 3.9372205530014845e-06, - "loss": 0.7551, - "num_input_tokens_seen": 18759010, - "step": 895 - }, - { - "epoch": 0.1077376300126255, - "grad_norm": 2.1924064618632255, - "learning_rate": 3.937026765981696e-06, - "loss": 0.7207, - "num_input_tokens_seen": 18778800, - "step": 896 - }, - { - "epoch": 0.1078578729032646, - "grad_norm": 5.057603383128183, - "learning_rate": 3.936832685116488e-06, - "loss": 0.7882, - "num_input_tokens_seen": 18796615, - "step": 897 - }, - { - "epoch": 0.10797811579390369, - "grad_norm": 2.267029215323489, - "learning_rate": 3.936638310435301e-06, - "loss": 0.8897, - "num_input_tokens_seen": 18814200, - "step": 898 - }, - { - "epoch": 0.10809835868454278, - "grad_norm": 2.525104824510146, - "learning_rate": 3.936443641967623e-06, - "loss": 0.8212, - "num_input_tokens_seen": 18832750, - "step": 899 - }, - { - "epoch": 0.10821860157518187, - "grad_norm": 2.827751131493035, - "learning_rate": 3.936248679742983e-06, - "loss": 0.8215, - "num_input_tokens_seen": 18850965, - "step": 900 - }, - { - "epoch": 0.10833884446582095, - "grad_norm": 1.0736845773898236, - "learning_rate": 3.936053423790958e-06, - "loss": 0.7515, - "num_input_tokens_seen": 18899005, - "step": 901 - }, - { - "epoch": 0.10845908735646005, - "grad_norm": 3.4983442194194265, - "learning_rate": 3.935857874141168e-06, - "loss": 0.769, - "num_input_tokens_seen": 18917560, - "step": 902 - }, - { - "epoch": 0.10857933024709913, - "grad_norm": 2.4660467962538304, - "learning_rate": 3.935662030823279e-06, - "loss": 0.8318, - "num_input_tokens_seen": 18933465, - "step": 903 - }, - { - "epoch": 0.10869957313773823, - "grad_norm": 3.905884307399311, - "learning_rate": 3.935465893866998e-06, - "loss": 0.7177, - "num_input_tokens_seen": 18951410, - "step": 904 - }, - { - "epoch": 0.10881981602837733, - "grad_norm": 4.987969152361887, - "learning_rate": 3.935269463302079e-06, - "loss": 0.8016, - "num_input_tokens_seen": 18969335, - "step": 905 - }, - { - "epoch": 0.10894005891901641, - "grad_norm": 2.9447559752636643, - "learning_rate": 3.935072739158322e-06, - "loss": 0.7721, - "num_input_tokens_seen": 18988765, - "step": 906 - }, - { - "epoch": 0.10906030180965551, - "grad_norm": 2.216283978262971, - "learning_rate": 3.934875721465569e-06, - "loss": 0.793, - "num_input_tokens_seen": 19008905, - "step": 907 - }, - { - "epoch": 0.10918054470029459, - "grad_norm": 2.8980127532769284, - "learning_rate": 3.9346784102537076e-06, - "loss": 0.7044, - "num_input_tokens_seen": 19030760, - "step": 908 - }, - { - "epoch": 0.10930078759093369, - "grad_norm": 2.0406317290886573, - "learning_rate": 3.934480805552669e-06, - "loss": 0.7751, - "num_input_tokens_seen": 19051490, - "step": 909 - }, - { - "epoch": 0.10942103048157277, - "grad_norm": 2.9997279204440845, - "learning_rate": 3.93428290739243e-06, - "loss": 0.8799, - "num_input_tokens_seen": 19070580, - "step": 910 - }, - { - "epoch": 0.10954127337221187, - "grad_norm": 3.1081643562199774, - "learning_rate": 3.9340847158030125e-06, - "loss": 0.7933, - "num_input_tokens_seen": 19083880, - "step": 911 - }, - { - "epoch": 0.10966151626285096, - "grad_norm": 2.1473590426929823, - "learning_rate": 3.9338862308144814e-06, - "loss": 0.7521, - "num_input_tokens_seen": 19102420, - "step": 912 - }, - { - "epoch": 0.10978175915349005, - "grad_norm": 2.0226160343646176, - "learning_rate": 3.933687452456946e-06, - "loss": 0.8446, - "num_input_tokens_seen": 19122040, - "step": 913 - }, - { - "epoch": 0.10990200204412914, - "grad_norm": 3.1411197451802195, - "learning_rate": 3.933488380760563e-06, - "loss": 0.8606, - "num_input_tokens_seen": 19141120, - "step": 914 - }, - { - "epoch": 0.11002224493476823, - "grad_norm": 3.052820266998868, - "learning_rate": 3.9332890157555286e-06, - "loss": 0.873, - "num_input_tokens_seen": 19157775, - "step": 915 - }, - { - "epoch": 0.11014248782540732, - "grad_norm": 2.3795171308120757, - "learning_rate": 3.933089357472088e-06, - "loss": 0.768, - "num_input_tokens_seen": 19175525, - "step": 916 - }, - { - "epoch": 0.11026273071604642, - "grad_norm": 2.0161641117628393, - "learning_rate": 3.932889405940529e-06, - "loss": 0.8564, - "num_input_tokens_seen": 19193340, - "step": 917 - }, - { - "epoch": 0.1103829736066855, - "grad_norm": 2.794965972704058, - "learning_rate": 3.932689161191184e-06, - "loss": 0.8012, - "num_input_tokens_seen": 19210765, - "step": 918 - }, - { - "epoch": 0.1105032164973246, - "grad_norm": 6.322072739647685, - "learning_rate": 3.93248862325443e-06, - "loss": 0.8685, - "num_input_tokens_seen": 19229390, - "step": 919 - }, - { - "epoch": 0.11062345938796368, - "grad_norm": 1.0306669843172556, - "learning_rate": 3.932287792160688e-06, - "loss": 0.6544, - "num_input_tokens_seen": 19287570, - "step": 920 - }, - { - "epoch": 0.11074370227860278, - "grad_norm": 7.896774559115421, - "learning_rate": 3.932086667940424e-06, - "loss": 0.7967, - "num_input_tokens_seen": 19303995, - "step": 921 - }, - { - "epoch": 0.11086394516924186, - "grad_norm": 2.0862080976663204, - "learning_rate": 3.93188525062415e-06, - "loss": 0.8101, - "num_input_tokens_seen": 19324180, - "step": 922 - }, - { - "epoch": 0.11098418805988096, - "grad_norm": 2.89765987559848, - "learning_rate": 3.931683540242418e-06, - "loss": 0.8573, - "num_input_tokens_seen": 19344965, - "step": 923 - }, - { - "epoch": 0.11110443095052006, - "grad_norm": 5.435721533856892, - "learning_rate": 3.9314815368258295e-06, - "loss": 0.9106, - "num_input_tokens_seen": 19361165, - "step": 924 - }, - { - "epoch": 0.11122467384115914, - "grad_norm": 1.7220035487791467, - "learning_rate": 3.9312792404050275e-06, - "loss": 0.7829, - "num_input_tokens_seen": 19378940, - "step": 925 - }, - { - "epoch": 0.11134491673179824, - "grad_norm": 1.8868908922111025, - "learning_rate": 3.9310766510107e-06, - "loss": 0.7763, - "num_input_tokens_seen": 19397835, - "step": 926 - }, - { - "epoch": 0.11146515962243732, - "grad_norm": 2.324027861265247, - "learning_rate": 3.9308737686735806e-06, - "loss": 0.9125, - "num_input_tokens_seen": 19417515, - "step": 927 - }, - { - "epoch": 0.11158540251307641, - "grad_norm": 2.2059056940865562, - "learning_rate": 3.9306705934244455e-06, - "loss": 0.8182, - "num_input_tokens_seen": 19437315, - "step": 928 - }, - { - "epoch": 0.11170564540371551, - "grad_norm": 1.9078024371301123, - "learning_rate": 3.930467125294116e-06, - "loss": 0.8771, - "num_input_tokens_seen": 19456585, - "step": 929 - }, - { - "epoch": 0.1118258882943546, - "grad_norm": 1.0558164853706429, - "learning_rate": 3.930263364313458e-06, - "loss": 0.6383, - "num_input_tokens_seen": 19506875, - "step": 930 - }, - { - "epoch": 0.11194613118499369, - "grad_norm": 2.621749334394303, - "learning_rate": 3.930059310513384e-06, - "loss": 0.8334, - "num_input_tokens_seen": 19525635, - "step": 931 - }, - { - "epoch": 0.11206637407563277, - "grad_norm": 1.99741159031062, - "learning_rate": 3.929854963924846e-06, - "loss": 0.8276, - "num_input_tokens_seen": 19545620, - "step": 932 - }, - { - "epoch": 0.11218661696627187, - "grad_norm": 1.888590085152523, - "learning_rate": 3.929650324578845e-06, - "loss": 0.7692, - "num_input_tokens_seen": 19564805, - "step": 933 - }, - { - "epoch": 0.11230685985691095, - "grad_norm": 3.0609832268736654, - "learning_rate": 3.929445392506423e-06, - "loss": 0.8238, - "num_input_tokens_seen": 19582465, - "step": 934 - }, - { - "epoch": 0.11242710274755005, - "grad_norm": 2.151583165283819, - "learning_rate": 3.92924016773867e-06, - "loss": 0.7609, - "num_input_tokens_seen": 19598680, - "step": 935 - }, - { - "epoch": 0.11254734563818915, - "grad_norm": 2.765652973327632, - "learning_rate": 3.9290346503067175e-06, - "loss": 0.7417, - "num_input_tokens_seen": 19615065, - "step": 936 - }, - { - "epoch": 0.11266758852882823, - "grad_norm": 7.650724153254989, - "learning_rate": 3.9288288402417415e-06, - "loss": 0.7871, - "num_input_tokens_seen": 19641045, - "step": 937 - }, - { - "epoch": 0.11278783141946733, - "grad_norm": 3.689071534542488, - "learning_rate": 3.928622737574964e-06, - "loss": 0.6992, - "num_input_tokens_seen": 19656100, - "step": 938 - }, - { - "epoch": 0.11290807431010641, - "grad_norm": 2.3187054336088995, - "learning_rate": 3.928416342337652e-06, - "loss": 0.9029, - "num_input_tokens_seen": 19675555, - "step": 939 - }, - { - "epoch": 0.1130283172007455, - "grad_norm": 4.7892279579408825, - "learning_rate": 3.928209654561113e-06, - "loss": 0.828, - "num_input_tokens_seen": 19696110, - "step": 940 - }, - { - "epoch": 0.1131485600913846, - "grad_norm": 2.093727068657475, - "learning_rate": 3.928002674276703e-06, - "loss": 0.8032, - "num_input_tokens_seen": 19715220, - "step": 941 - }, - { - "epoch": 0.11326880298202369, - "grad_norm": 2.7878164036963633, - "learning_rate": 3.92779540151582e-06, - "loss": 0.7591, - "num_input_tokens_seen": 19732025, - "step": 942 - }, - { - "epoch": 0.11338904587266278, - "grad_norm": 2.0445347263254963, - "learning_rate": 3.927587836309907e-06, - "loss": 0.8567, - "num_input_tokens_seen": 19749575, - "step": 943 - }, - { - "epoch": 0.11350928876330187, - "grad_norm": 2.599662481174319, - "learning_rate": 3.927379978690452e-06, - "loss": 0.7825, - "num_input_tokens_seen": 19768560, - "step": 944 - }, - { - "epoch": 0.11362953165394096, - "grad_norm": 2.667051665108858, - "learning_rate": 3.927171828688987e-06, - "loss": 0.869, - "num_input_tokens_seen": 19787805, - "step": 945 - }, - { - "epoch": 0.11374977454458005, - "grad_norm": 7.916041774460033, - "learning_rate": 3.926963386337088e-06, - "loss": 0.8193, - "num_input_tokens_seen": 19805755, - "step": 946 - }, - { - "epoch": 0.11387001743521914, - "grad_norm": 2.2942675925445126, - "learning_rate": 3.926754651666375e-06, - "loss": 0.7011, - "num_input_tokens_seen": 19826035, - "step": 947 - }, - { - "epoch": 0.11399026032585824, - "grad_norm": 5.09330277816299, - "learning_rate": 3.926545624708513e-06, - "loss": 0.7891, - "num_input_tokens_seen": 19844995, - "step": 948 - }, - { - "epoch": 0.11411050321649732, - "grad_norm": 2.2372894025517556, - "learning_rate": 3.926336305495213e-06, - "loss": 0.8604, - "num_input_tokens_seen": 19863275, - "step": 949 - }, - { - "epoch": 0.11423074610713642, - "grad_norm": 2.2218400320876595, - "learning_rate": 3.926126694058226e-06, - "loss": 0.8908, - "num_input_tokens_seen": 19882145, - "step": 950 - }, - { - "epoch": 0.1143509889977755, - "grad_norm": 1.5071397877876678, - "learning_rate": 3.92591679042935e-06, - "loss": 0.8138, - "num_input_tokens_seen": 19901755, - "step": 951 - }, - { - "epoch": 0.1144712318884146, - "grad_norm": 1.6842076144983666, - "learning_rate": 3.925706594640429e-06, - "loss": 0.8203, - "num_input_tokens_seen": 19919535, - "step": 952 - }, - { - "epoch": 0.1145914747790537, - "grad_norm": 1.8638045607682039, - "learning_rate": 3.925496106723349e-06, - "loss": 0.7929, - "num_input_tokens_seen": 19936695, - "step": 953 - }, - { - "epoch": 0.11471171766969278, - "grad_norm": 2.3482208978510597, - "learning_rate": 3.9252853267100405e-06, - "loss": 0.8341, - "num_input_tokens_seen": 19955660, - "step": 954 - }, - { - "epoch": 0.11483196056033187, - "grad_norm": 1.8519764025922612, - "learning_rate": 3.9250742546324786e-06, - "loss": 0.8348, - "num_input_tokens_seen": 19975615, - "step": 955 - }, - { - "epoch": 0.11495220345097096, - "grad_norm": 2.1497665036345435, - "learning_rate": 3.924862890522683e-06, - "loss": 0.8615, - "num_input_tokens_seen": 19995345, - "step": 956 - }, - { - "epoch": 0.11507244634161005, - "grad_norm": 2.336480404613175, - "learning_rate": 3.9246512344127174e-06, - "loss": 0.8589, - "num_input_tokens_seen": 20012725, - "step": 957 - }, - { - "epoch": 0.11519268923224914, - "grad_norm": 2.126235469311675, - "learning_rate": 3.9244392863346895e-06, - "loss": 0.8184, - "num_input_tokens_seen": 20031850, - "step": 958 - }, - { - "epoch": 0.11531293212288823, - "grad_norm": 3.695723619848682, - "learning_rate": 3.9242270463207524e-06, - "loss": 0.9144, - "num_input_tokens_seen": 20049960, - "step": 959 - }, - { - "epoch": 0.11543317501352733, - "grad_norm": 3.419444421071364, - "learning_rate": 3.924014514403102e-06, - "loss": 0.8417, - "num_input_tokens_seen": 20065835, - "step": 960 - }, - { - "epoch": 0.11555341790416641, - "grad_norm": 3.286700961716625, - "learning_rate": 3.92380169061398e-06, - "loss": 0.9194, - "num_input_tokens_seen": 20083335, - "step": 961 - }, - { - "epoch": 0.11567366079480551, - "grad_norm": 2.292653429085566, - "learning_rate": 3.9235885749856705e-06, - "loss": 0.8381, - "num_input_tokens_seen": 20101735, - "step": 962 - }, - { - "epoch": 0.1157939036854446, - "grad_norm": 2.2215081064510747, - "learning_rate": 3.9233751675505035e-06, - "loss": 0.828, - "num_input_tokens_seen": 20120165, - "step": 963 - }, - { - "epoch": 0.11591414657608369, - "grad_norm": 3.520903868869938, - "learning_rate": 3.923161468340853e-06, - "loss": 0.8403, - "num_input_tokens_seen": 20139720, - "step": 964 - }, - { - "epoch": 0.11603438946672277, - "grad_norm": 2.77945729393548, - "learning_rate": 3.9229474773891374e-06, - "loss": 0.8103, - "num_input_tokens_seen": 20157980, - "step": 965 - }, - { - "epoch": 0.11615463235736187, - "grad_norm": 2.1320400900549217, - "learning_rate": 3.922733194727818e-06, - "loss": 0.8384, - "num_input_tokens_seen": 20177495, - "step": 966 - }, - { - "epoch": 0.11627487524800097, - "grad_norm": 2.6623691789189525, - "learning_rate": 3.922518620389402e-06, - "loss": 0.8605, - "num_input_tokens_seen": 20194080, - "step": 967 - }, - { - "epoch": 0.11639511813864005, - "grad_norm": 1.9312183690154034, - "learning_rate": 3.922303754406439e-06, - "loss": 0.9012, - "num_input_tokens_seen": 20211640, - "step": 968 - }, - { - "epoch": 0.11651536102927915, - "grad_norm": 1.9809686321716689, - "learning_rate": 3.922088596811526e-06, - "loss": 0.7813, - "num_input_tokens_seen": 20230490, - "step": 969 - }, - { - "epoch": 0.11663560391991823, - "grad_norm": 2.479195251601029, - "learning_rate": 3.9218731476373e-06, - "loss": 0.8561, - "num_input_tokens_seen": 20246395, - "step": 970 - }, - { - "epoch": 0.11675584681055733, - "grad_norm": 2.820202020849856, - "learning_rate": 3.9216574069164455e-06, - "loss": 0.8528, - "num_input_tokens_seen": 20265090, - "step": 971 - }, - { - "epoch": 0.11687608970119642, - "grad_norm": 5.011204560838788, - "learning_rate": 3.921441374681691e-06, - "loss": 0.8041, - "num_input_tokens_seen": 20284870, - "step": 972 - }, - { - "epoch": 0.1169963325918355, - "grad_norm": 1.8417517234955263, - "learning_rate": 3.921225050965808e-06, - "loss": 0.6458, - "num_input_tokens_seen": 20304475, - "step": 973 - }, - { - "epoch": 0.1171165754824746, - "grad_norm": 6.733886307733302, - "learning_rate": 3.921008435801612e-06, - "loss": 0.7405, - "num_input_tokens_seen": 20323280, - "step": 974 - }, - { - "epoch": 0.11723681837311369, - "grad_norm": 2.9710460195717996, - "learning_rate": 3.920791529221963e-06, - "loss": 0.7562, - "num_input_tokens_seen": 20341675, - "step": 975 - }, - { - "epoch": 0.11735706126375278, - "grad_norm": 1.7988373669345004, - "learning_rate": 3.920574331259768e-06, - "loss": 0.759, - "num_input_tokens_seen": 20362595, - "step": 976 - }, - { - "epoch": 0.11747730415439187, - "grad_norm": 2.4923285832027133, - "learning_rate": 3.9203568419479716e-06, - "loss": 0.7975, - "num_input_tokens_seen": 20382870, - "step": 977 - }, - { - "epoch": 0.11759754704503096, - "grad_norm": 2.134094773202001, - "learning_rate": 3.92013906131957e-06, - "loss": 0.7447, - "num_input_tokens_seen": 20401520, - "step": 978 - }, - { - "epoch": 0.11771778993567006, - "grad_norm": 1.6159386837941472, - "learning_rate": 3.9199209894076e-06, - "loss": 0.8181, - "num_input_tokens_seen": 20421555, - "step": 979 - }, - { - "epoch": 0.11783803282630914, - "grad_norm": 2.4867872755139033, - "learning_rate": 3.919702626245142e-06, - "loss": 0.8962, - "num_input_tokens_seen": 20440930, - "step": 980 - }, - { - "epoch": 0.11795827571694824, - "grad_norm": 4.779206276194037, - "learning_rate": 3.919483971865322e-06, - "loss": 0.6499, - "num_input_tokens_seen": 20460645, - "step": 981 - }, - { - "epoch": 0.11807851860758732, - "grad_norm": 2.323180913127229, - "learning_rate": 3.91926502630131e-06, - "loss": 0.8693, - "num_input_tokens_seen": 20480980, - "step": 982 - }, - { - "epoch": 0.11819876149822642, - "grad_norm": 1.9410805261900594, - "learning_rate": 3.91904578958632e-06, - "loss": 0.7243, - "num_input_tokens_seen": 20500115, - "step": 983 - }, - { - "epoch": 0.11831900438886551, - "grad_norm": 2.1025062805630483, - "learning_rate": 3.918826261753608e-06, - "loss": 0.847, - "num_input_tokens_seen": 20519415, - "step": 984 - }, - { - "epoch": 0.1184392472795046, - "grad_norm": 3.050376686172714, - "learning_rate": 3.918606442836478e-06, - "loss": 0.7016, - "num_input_tokens_seen": 20541355, - "step": 985 - }, - { - "epoch": 0.1185594901701437, - "grad_norm": 2.0459997571507005, - "learning_rate": 3.918386332868277e-06, - "loss": 0.7731, - "num_input_tokens_seen": 20559045, - "step": 986 - }, - { - "epoch": 0.11867973306078278, - "grad_norm": 3.8614565445852893, - "learning_rate": 3.918165931882394e-06, - "loss": 0.9354, - "num_input_tokens_seen": 20577165, - "step": 987 - }, - { - "epoch": 0.11879997595142187, - "grad_norm": 2.324334236100757, - "learning_rate": 3.917945239912264e-06, - "loss": 0.7612, - "num_input_tokens_seen": 20594360, - "step": 988 - }, - { - "epoch": 0.11892021884206096, - "grad_norm": 3.0301608156612527, - "learning_rate": 3.917724256991367e-06, - "loss": 0.762, - "num_input_tokens_seen": 20612825, - "step": 989 - }, - { - "epoch": 0.11904046173270005, - "grad_norm": 2.2905780921530687, - "learning_rate": 3.9175029831532245e-06, - "loss": 0.8111, - "num_input_tokens_seen": 20632060, - "step": 990 - }, - { - "epoch": 0.11916070462333915, - "grad_norm": 2.6714023035010817, - "learning_rate": 3.917281418431404e-06, - "loss": 0.8768, - "num_input_tokens_seen": 20650825, - "step": 991 - }, - { - "epoch": 0.11928094751397823, - "grad_norm": 2.6250784998794505, - "learning_rate": 3.917059562859516e-06, - "loss": 0.7691, - "num_input_tokens_seen": 20669870, - "step": 992 - }, - { - "epoch": 0.11940119040461733, - "grad_norm": 2.200459161672563, - "learning_rate": 3.916837416471218e-06, - "loss": 0.8788, - "num_input_tokens_seen": 20686210, - "step": 993 - }, - { - "epoch": 0.11952143329525641, - "grad_norm": 3.6026629209602627, - "learning_rate": 3.916614979300207e-06, - "loss": 0.7256, - "num_input_tokens_seen": 20700775, - "step": 994 - }, - { - "epoch": 0.11964167618589551, - "grad_norm": 1.8927753154406566, - "learning_rate": 3.9163922513802274e-06, - "loss": 0.7799, - "num_input_tokens_seen": 20722830, - "step": 995 - }, - { - "epoch": 0.1197619190765346, - "grad_norm": 4.283772290180489, - "learning_rate": 3.916169232745067e-06, - "loss": 0.8306, - "num_input_tokens_seen": 20740225, - "step": 996 - }, - { - "epoch": 0.11988216196717369, - "grad_norm": 2.917180180786399, - "learning_rate": 3.915945923428559e-06, - "loss": 0.915, - "num_input_tokens_seen": 20756470, - "step": 997 - }, - { - "epoch": 0.12000240485781279, - "grad_norm": 2.5285769235357507, - "learning_rate": 3.915722323464577e-06, - "loss": 0.8316, - "num_input_tokens_seen": 20774795, - "step": 998 - }, - { - "epoch": 0.12012264774845187, - "grad_norm": 2.701686022509613, - "learning_rate": 3.91549843288704e-06, - "loss": 0.7064, - "num_input_tokens_seen": 20798195, - "step": 999 - }, - { - "epoch": 0.12024289063909097, - "grad_norm": 2.649452779955016, - "learning_rate": 3.915274251729916e-06, - "loss": 0.7914, - "num_input_tokens_seen": 20819205, - "step": 1000 - }, - { - "epoch": 0.12036313352973005, - "grad_norm": 1.9241338945506175, - "learning_rate": 3.91504978002721e-06, - "loss": 0.9043, - "num_input_tokens_seen": 20837980, - "step": 1001 - }, - { - "epoch": 0.12048337642036915, - "grad_norm": 2.666476453767147, - "learning_rate": 3.914825017812974e-06, - "loss": 0.7677, - "num_input_tokens_seen": 20854350, - "step": 1002 - }, - { - "epoch": 0.12060361931100824, - "grad_norm": 7.471144322025861, - "learning_rate": 3.9145999651213065e-06, - "loss": 0.7273, - "num_input_tokens_seen": 20873310, - "step": 1003 - }, - { - "epoch": 0.12072386220164733, - "grad_norm": 2.302110803966868, - "learning_rate": 3.9143746219863465e-06, - "loss": 0.8869, - "num_input_tokens_seen": 20890135, - "step": 1004 - }, - { - "epoch": 0.12084410509228642, - "grad_norm": 1.0194162197377155, - "learning_rate": 3.914148988442278e-06, - "loss": 0.7181, - "num_input_tokens_seen": 20945645, - "step": 1005 - }, - { - "epoch": 0.1209643479829255, - "grad_norm": 2.7086078865004373, - "learning_rate": 3.91392306452333e-06, - "loss": 0.9599, - "num_input_tokens_seen": 20962440, - "step": 1006 - }, - { - "epoch": 0.1210845908735646, - "grad_norm": 3.1018479466761737, - "learning_rate": 3.913696850263774e-06, - "loss": 0.6682, - "num_input_tokens_seen": 20976525, - "step": 1007 - }, - { - "epoch": 0.1212048337642037, - "grad_norm": 3.538119670553433, - "learning_rate": 3.913470345697929e-06, - "loss": 0.7875, - "num_input_tokens_seen": 20994875, - "step": 1008 - }, - { - "epoch": 0.12132507665484278, - "grad_norm": 2.1362114783801416, - "learning_rate": 3.913243550860153e-06, - "loss": 0.8598, - "num_input_tokens_seen": 21012360, - "step": 1009 - }, - { - "epoch": 0.12144531954548188, - "grad_norm": 1.9169075846518047, - "learning_rate": 3.913016465784852e-06, - "loss": 0.7601, - "num_input_tokens_seen": 21032755, - "step": 1010 - }, - { - "epoch": 0.12156556243612096, - "grad_norm": 3.128298270693645, - "learning_rate": 3.912789090506474e-06, - "loss": 0.7149, - "num_input_tokens_seen": 21051735, - "step": 1011 - }, - { - "epoch": 0.12168580532676006, - "grad_norm": 3.752731760917993, - "learning_rate": 3.9125614250595114e-06, - "loss": 0.7249, - "num_input_tokens_seen": 21067665, - "step": 1012 - }, - { - "epoch": 0.12180604821739914, - "grad_norm": 6.600958391176295, - "learning_rate": 3.912333469478502e-06, - "loss": 0.8889, - "num_input_tokens_seen": 21085350, - "step": 1013 - }, - { - "epoch": 0.12192629110803824, - "grad_norm": 1.967355705997668, - "learning_rate": 3.912105223798025e-06, - "loss": 0.7822, - "num_input_tokens_seen": 21104490, - "step": 1014 - }, - { - "epoch": 0.12204653399867733, - "grad_norm": 1.1129023768347945, - "learning_rate": 3.9118766880527065e-06, - "loss": 0.72, - "num_input_tokens_seen": 21158645, - "step": 1015 - }, - { - "epoch": 0.12216677688931642, - "grad_norm": 2.140429764695701, - "learning_rate": 3.9116478622772145e-06, - "loss": 0.7227, - "num_input_tokens_seen": 21176940, - "step": 1016 - }, - { - "epoch": 0.12228701977995551, - "grad_norm": 1.8088031100412274, - "learning_rate": 3.911418746506261e-06, - "loss": 0.8741, - "num_input_tokens_seen": 21196790, - "step": 1017 - }, - { - "epoch": 0.1224072626705946, - "grad_norm": 2.165186597708197, - "learning_rate": 3.911189340774604e-06, - "loss": 0.7746, - "num_input_tokens_seen": 21216640, - "step": 1018 - }, - { - "epoch": 0.1225275055612337, - "grad_norm": 1.7605926864044437, - "learning_rate": 3.910959645117043e-06, - "loss": 0.7985, - "num_input_tokens_seen": 21235695, - "step": 1019 - }, - { - "epoch": 0.12264774845187278, - "grad_norm": 0.832855925571204, - "learning_rate": 3.910729659568423e-06, - "loss": 0.5911, - "num_input_tokens_seen": 21292600, - "step": 1020 - }, - { - "epoch": 0.12276799134251187, - "grad_norm": 4.733778872145136, - "learning_rate": 3.9104993841636344e-06, - "loss": 0.8259, - "num_input_tokens_seen": 21312890, - "step": 1021 - }, - { - "epoch": 0.12288823423315097, - "grad_norm": 2.1100773879392847, - "learning_rate": 3.910268818937608e-06, - "loss": 0.8065, - "num_input_tokens_seen": 21330765, - "step": 1022 - }, - { - "epoch": 0.12300847712379005, - "grad_norm": 3.069906921858533, - "learning_rate": 3.9100379639253196e-06, - "loss": 0.8746, - "num_input_tokens_seen": 21347205, - "step": 1023 - }, - { - "epoch": 0.12312872001442915, - "grad_norm": 3.255031621170376, - "learning_rate": 3.909806819161791e-06, - "loss": 0.856, - "num_input_tokens_seen": 21362400, - "step": 1024 - }, - { - "epoch": 0.12324896290506823, - "grad_norm": 1.9260925007559073, - "learning_rate": 3.909575384682086e-06, - "loss": 0.8602, - "num_input_tokens_seen": 21381000, - "step": 1025 - }, - { - "epoch": 0.12336920579570733, - "grad_norm": 1.8598593797901357, - "learning_rate": 3.9093436605213144e-06, - "loss": 0.687, - "num_input_tokens_seen": 21401220, - "step": 1026 - }, - { - "epoch": 0.12348944868634643, - "grad_norm": 3.4914720893430338, - "learning_rate": 3.909111646714627e-06, - "loss": 0.787, - "num_input_tokens_seen": 21421785, - "step": 1027 - }, - { - "epoch": 0.12360969157698551, - "grad_norm": 2.132347825699078, - "learning_rate": 3.9088793432972206e-06, - "loss": 0.7218, - "num_input_tokens_seen": 21440325, - "step": 1028 - }, - { - "epoch": 0.1237299344676246, - "grad_norm": 2.1913524532986215, - "learning_rate": 3.908646750304336e-06, - "loss": 0.8169, - "num_input_tokens_seen": 21457730, - "step": 1029 - }, - { - "epoch": 0.12385017735826369, - "grad_norm": 1.573529337128982, - "learning_rate": 3.908413867771257e-06, - "loss": 0.8606, - "num_input_tokens_seen": 21476360, - "step": 1030 - }, - { - "epoch": 0.12397042024890279, - "grad_norm": 2.4042228735044846, - "learning_rate": 3.908180695733311e-06, - "loss": 0.8079, - "num_input_tokens_seen": 21495570, - "step": 1031 - }, - { - "epoch": 0.12409066313954187, - "grad_norm": 3.0539586376361907, - "learning_rate": 3.907947234225871e-06, - "loss": 0.8273, - "num_input_tokens_seen": 21514300, - "step": 1032 - }, - { - "epoch": 0.12421090603018096, - "grad_norm": 2.0547908614224784, - "learning_rate": 3.907713483284352e-06, - "loss": 0.8697, - "num_input_tokens_seen": 21533495, - "step": 1033 - }, - { - "epoch": 0.12433114892082006, - "grad_norm": 2.850318331985092, - "learning_rate": 3.907479442944216e-06, - "loss": 0.9769, - "num_input_tokens_seen": 21551620, - "step": 1034 - }, - { - "epoch": 0.12445139181145914, - "grad_norm": 2.3318014283313295, - "learning_rate": 3.907245113240963e-06, - "loss": 0.9121, - "num_input_tokens_seen": 21569460, - "step": 1035 - }, - { - "epoch": 0.12457163470209824, - "grad_norm": 2.020499722546725, - "learning_rate": 3.907010494210144e-06, - "loss": 0.7464, - "num_input_tokens_seen": 21591840, - "step": 1036 - }, - { - "epoch": 0.12469187759273732, - "grad_norm": 2.3653044803214476, - "learning_rate": 3.9067755858873495e-06, - "loss": 0.9167, - "num_input_tokens_seen": 21608360, - "step": 1037 - }, - { - "epoch": 0.12481212048337642, - "grad_norm": 0.911710098495449, - "learning_rate": 3.906540388308214e-06, - "loss": 0.6611, - "num_input_tokens_seen": 21667665, - "step": 1038 - }, - { - "epoch": 0.12493236337401552, - "grad_norm": 2.1144263629763174, - "learning_rate": 3.906304901508417e-06, - "loss": 0.8162, - "num_input_tokens_seen": 21686285, - "step": 1039 - }, - { - "epoch": 0.12505260626465461, - "grad_norm": 2.741549915883754, - "learning_rate": 3.9060691255236835e-06, - "loss": 0.7501, - "num_input_tokens_seen": 21706570, - "step": 1040 - }, - { - "epoch": 0.1251728491552937, - "grad_norm": 2.553860671191147, - "learning_rate": 3.905833060389778e-06, - "loss": 0.8162, - "num_input_tokens_seen": 21730410, - "step": 1041 - }, - { - "epoch": 0.12529309204593278, - "grad_norm": 2.2693661185320333, - "learning_rate": 3.905596706142513e-06, - "loss": 0.7831, - "num_input_tokens_seen": 21751540, - "step": 1042 - }, - { - "epoch": 0.12541333493657186, - "grad_norm": 2.1536887635831037, - "learning_rate": 3.9053600628177435e-06, - "loss": 0.8558, - "num_input_tokens_seen": 21770870, - "step": 1043 - }, - { - "epoch": 0.12553357782721097, - "grad_norm": 2.289806123084169, - "learning_rate": 3.905123130451367e-06, - "loss": 0.8469, - "num_input_tokens_seen": 21791690, - "step": 1044 - }, - { - "epoch": 0.12565382071785006, - "grad_norm": 1.8934132417416718, - "learning_rate": 3.904885909079326e-06, - "loss": 0.7916, - "num_input_tokens_seen": 21810195, - "step": 1045 - }, - { - "epoch": 0.12577406360848914, - "grad_norm": 3.4966685366285666, - "learning_rate": 3.904648398737607e-06, - "loss": 0.7704, - "num_input_tokens_seen": 21828480, - "step": 1046 - }, - { - "epoch": 0.12589430649912825, - "grad_norm": 2.02358282457232, - "learning_rate": 3.9044105994622406e-06, - "loss": 0.7808, - "num_input_tokens_seen": 21849345, - "step": 1047 - }, - { - "epoch": 0.12601454938976733, - "grad_norm": 2.0805301061473367, - "learning_rate": 3.9041725112893005e-06, - "loss": 0.824, - "num_input_tokens_seen": 21870290, - "step": 1048 - }, - { - "epoch": 0.12613479228040642, - "grad_norm": 1.8387633852821261, - "learning_rate": 3.903934134254904e-06, - "loss": 0.751, - "num_input_tokens_seen": 21887800, - "step": 1049 - }, - { - "epoch": 0.1262550351710455, - "grad_norm": 2.211872450775569, - "learning_rate": 3.903695468395213e-06, - "loss": 0.8444, - "num_input_tokens_seen": 21905390, - "step": 1050 - }, - { - "epoch": 0.1263752780616846, - "grad_norm": 3.2615181254555936, - "learning_rate": 3.903456513746434e-06, - "loss": 0.5644, - "num_input_tokens_seen": 21926085, - "step": 1051 - }, - { - "epoch": 0.1264955209523237, - "grad_norm": 1.9405068365942182, - "learning_rate": 3.903217270344815e-06, - "loss": 0.8759, - "num_input_tokens_seen": 21946055, - "step": 1052 - }, - { - "epoch": 0.12661576384296278, - "grad_norm": 12.250098913373938, - "learning_rate": 3.902977738226648e-06, - "loss": 0.8227, - "num_input_tokens_seen": 21966510, - "step": 1053 - }, - { - "epoch": 0.12673600673360189, - "grad_norm": 2.665855025520238, - "learning_rate": 3.902737917428273e-06, - "loss": 0.9069, - "num_input_tokens_seen": 21984395, - "step": 1054 - }, - { - "epoch": 0.12685624962424097, - "grad_norm": 3.1758880872398545, - "learning_rate": 3.902497807986068e-06, - "loss": 0.837, - "num_input_tokens_seen": 22004135, - "step": 1055 - }, - { - "epoch": 0.12697649251488005, - "grad_norm": 3.7721605135438323, - "learning_rate": 3.902257409936458e-06, - "loss": 0.829, - "num_input_tokens_seen": 22024620, - "step": 1056 - }, - { - "epoch": 0.12709673540551916, - "grad_norm": 2.211102985639801, - "learning_rate": 3.902016723315912e-06, - "loss": 0.8384, - "num_input_tokens_seen": 22042280, - "step": 1057 - }, - { - "epoch": 0.12721697829615825, - "grad_norm": 6.10875660756262, - "learning_rate": 3.901775748160941e-06, - "loss": 0.6903, - "num_input_tokens_seen": 22061180, - "step": 1058 - }, - { - "epoch": 0.12733722118679733, - "grad_norm": 0.852554714532998, - "learning_rate": 3.901534484508101e-06, - "loss": 0.6373, - "num_input_tokens_seen": 22123575, - "step": 1059 - }, - { - "epoch": 0.1274574640774364, - "grad_norm": 2.495044616275202, - "learning_rate": 3.901292932393991e-06, - "loss": 0.7405, - "num_input_tokens_seen": 22142175, - "step": 1060 - }, - { - "epoch": 0.12757770696807552, - "grad_norm": 3.8646298932163345, - "learning_rate": 3.9010510918552555e-06, - "loss": 0.8501, - "num_input_tokens_seen": 22160970, - "step": 1061 - }, - { - "epoch": 0.1276979498587146, - "grad_norm": 6.437951000804665, - "learning_rate": 3.900808962928581e-06, - "loss": 0.7604, - "num_input_tokens_seen": 22178305, - "step": 1062 - }, - { - "epoch": 0.1278181927493537, - "grad_norm": 2.1712122903532634, - "learning_rate": 3.900566545650698e-06, - "loss": 0.885, - "num_input_tokens_seen": 22195695, - "step": 1063 - }, - { - "epoch": 0.1279384356399928, - "grad_norm": 2.5731375189546593, - "learning_rate": 3.900323840058381e-06, - "loss": 0.808, - "num_input_tokens_seen": 22213125, - "step": 1064 - }, - { - "epoch": 0.12805867853063188, - "grad_norm": 1.998690081017005, - "learning_rate": 3.900080846188449e-06, - "loss": 0.8126, - "num_input_tokens_seen": 22231435, - "step": 1065 - }, - { - "epoch": 0.12817892142127096, - "grad_norm": 1.87419399396287, - "learning_rate": 3.8998375640777625e-06, - "loss": 0.8097, - "num_input_tokens_seen": 22249025, - "step": 1066 - }, - { - "epoch": 0.12829916431191005, - "grad_norm": 0.7547878965101757, - "learning_rate": 3.899593993763228e-06, - "loss": 0.5505, - "num_input_tokens_seen": 22309705, - "step": 1067 - }, - { - "epoch": 0.12841940720254916, - "grad_norm": 3.25408413968592, - "learning_rate": 3.899350135281796e-06, - "loss": 0.8095, - "num_input_tokens_seen": 22330425, - "step": 1068 - }, - { - "epoch": 0.12853965009318824, - "grad_norm": 2.730455823675955, - "learning_rate": 3.899105988670458e-06, - "loss": 0.7862, - "num_input_tokens_seen": 22349650, - "step": 1069 - }, - { - "epoch": 0.12865989298382732, - "grad_norm": 2.9839235693342987, - "learning_rate": 3.898861553966252e-06, - "loss": 0.8201, - "num_input_tokens_seen": 22369020, - "step": 1070 - }, - { - "epoch": 0.12878013587446643, - "grad_norm": 1.7265713224793218, - "learning_rate": 3.898616831206257e-06, - "loss": 0.8716, - "num_input_tokens_seen": 22389165, - "step": 1071 - }, - { - "epoch": 0.12890037876510552, - "grad_norm": 2.426067571234945, - "learning_rate": 3.8983718204276e-06, - "loss": 0.7682, - "num_input_tokens_seen": 22411105, - "step": 1072 - }, - { - "epoch": 0.1290206216557446, - "grad_norm": 2.105795372440824, - "learning_rate": 3.898126521667446e-06, - "loss": 0.8403, - "num_input_tokens_seen": 22430980, - "step": 1073 - }, - { - "epoch": 0.12914086454638368, - "grad_norm": 8.424387604368214, - "learning_rate": 3.897880934963007e-06, - "loss": 0.8327, - "num_input_tokens_seen": 22450250, - "step": 1074 - }, - { - "epoch": 0.1292611074370228, - "grad_norm": 2.5104778899307876, - "learning_rate": 3.89763506035154e-06, - "loss": 0.781, - "num_input_tokens_seen": 22467820, - "step": 1075 - }, - { - "epoch": 0.12938135032766188, - "grad_norm": 1.91814868795964, - "learning_rate": 3.897388897870343e-06, - "loss": 0.8137, - "num_input_tokens_seen": 22488180, - "step": 1076 - }, - { - "epoch": 0.12950159321830096, - "grad_norm": 1.8650031206643212, - "learning_rate": 3.89714244755676e-06, - "loss": 0.7429, - "num_input_tokens_seen": 22509260, - "step": 1077 - }, - { - "epoch": 0.12962183610894007, - "grad_norm": 3.3126320206993563, - "learning_rate": 3.896895709448175e-06, - "loss": 0.8583, - "num_input_tokens_seen": 22528730, - "step": 1078 - }, - { - "epoch": 0.12974207899957915, - "grad_norm": 2.7257410358076313, - "learning_rate": 3.896648683582019e-06, - "loss": 0.7718, - "num_input_tokens_seen": 22543785, - "step": 1079 - }, - { - "epoch": 0.12986232189021824, - "grad_norm": 2.309038443497535, - "learning_rate": 3.896401369995766e-06, - "loss": 0.8048, - "num_input_tokens_seen": 22563310, - "step": 1080 - }, - { - "epoch": 0.12998256478085732, - "grad_norm": 1.7422256657293502, - "learning_rate": 3.896153768726932e-06, - "loss": 0.7867, - "num_input_tokens_seen": 22583340, - "step": 1081 - }, - { - "epoch": 0.13010280767149643, - "grad_norm": 6.013861478407679, - "learning_rate": 3.8959058798130806e-06, - "loss": 0.8733, - "num_input_tokens_seen": 22601035, - "step": 1082 - }, - { - "epoch": 0.1302230505621355, - "grad_norm": 2.109035342626544, - "learning_rate": 3.895657703291814e-06, - "loss": 0.7487, - "num_input_tokens_seen": 22620860, - "step": 1083 - }, - { - "epoch": 0.1303432934527746, - "grad_norm": 2.74506499055514, - "learning_rate": 3.895409239200781e-06, - "loss": 0.7993, - "num_input_tokens_seen": 22636465, - "step": 1084 - }, - { - "epoch": 0.1304635363434137, - "grad_norm": 2.459451829805312, - "learning_rate": 3.895160487577673e-06, - "loss": 0.9195, - "num_input_tokens_seen": 22653755, - "step": 1085 - }, - { - "epoch": 0.1305837792340528, - "grad_norm": 0.8034069745463782, - "learning_rate": 3.894911448460226e-06, - "loss": 0.6314, - "num_input_tokens_seen": 22712790, - "step": 1086 - }, - { - "epoch": 0.13070402212469187, - "grad_norm": 2.087647873250765, - "learning_rate": 3.8946621218862195e-06, - "loss": 0.7254, - "num_input_tokens_seen": 22733510, - "step": 1087 - }, - { - "epoch": 0.13082426501533098, - "grad_norm": 2.3356661577486677, - "learning_rate": 3.894412507893475e-06, - "loss": 0.8837, - "num_input_tokens_seen": 22753510, - "step": 1088 - }, - { - "epoch": 0.13094450790597006, - "grad_norm": 4.958812919249834, - "learning_rate": 3.894162606519859e-06, - "loss": 0.7124, - "num_input_tokens_seen": 22772180, - "step": 1089 - }, - { - "epoch": 0.13106475079660915, - "grad_norm": 3.8791353870661482, - "learning_rate": 3.893912417803282e-06, - "loss": 0.7715, - "num_input_tokens_seen": 22791615, - "step": 1090 - }, - { - "epoch": 0.13118499368724823, - "grad_norm": 2.155677741131767, - "learning_rate": 3.8936619417816975e-06, - "loss": 0.7655, - "num_input_tokens_seen": 22811665, - "step": 1091 - }, - { - "epoch": 0.13130523657788734, - "grad_norm": 4.0205827523181386, - "learning_rate": 3.8934111784931015e-06, - "loss": 0.7188, - "num_input_tokens_seen": 22828835, - "step": 1092 - }, - { - "epoch": 0.13142547946852642, - "grad_norm": 1.0558013032592701, - "learning_rate": 3.893160127975535e-06, - "loss": 0.6405, - "num_input_tokens_seen": 22889245, - "step": 1093 - }, - { - "epoch": 0.1315457223591655, - "grad_norm": 3.3651356696016794, - "learning_rate": 3.8929087902670826e-06, - "loss": 0.803, - "num_input_tokens_seen": 22910595, - "step": 1094 - }, - { - "epoch": 0.13166596524980462, - "grad_norm": 0.9509244316896768, - "learning_rate": 3.8926571654058715e-06, - "loss": 0.6285, - "num_input_tokens_seen": 22966960, - "step": 1095 - }, - { - "epoch": 0.1317862081404437, - "grad_norm": 3.37905317603145, - "learning_rate": 3.892405253430074e-06, - "loss": 0.7687, - "num_input_tokens_seen": 22984200, - "step": 1096 - }, - { - "epoch": 0.13190645103108278, - "grad_norm": 3.0841499998169977, - "learning_rate": 3.892153054377904e-06, - "loss": 0.8191, - "num_input_tokens_seen": 23001325, - "step": 1097 - }, - { - "epoch": 0.13202669392172187, - "grad_norm": 1.044069258755872, - "learning_rate": 3.891900568287619e-06, - "loss": 0.6378, - "num_input_tokens_seen": 23053430, - "step": 1098 - }, - { - "epoch": 0.13214693681236098, - "grad_norm": 3.69115157410813, - "learning_rate": 3.891647795197523e-06, - "loss": 0.727, - "num_input_tokens_seen": 23069190, - "step": 1099 - }, - { - "epoch": 0.13226717970300006, - "grad_norm": 2.7243687240948855, - "learning_rate": 3.8913947351459605e-06, - "loss": 0.6874, - "num_input_tokens_seen": 23086450, - "step": 1100 - }, - { - "epoch": 0.13238742259363914, - "grad_norm": 1.8820422997154056, - "learning_rate": 3.89114138817132e-06, - "loss": 0.6712, - "num_input_tokens_seen": 23102835, - "step": 1101 - }, - { - "epoch": 0.13250766548427825, - "grad_norm": 1.938249978049348, - "learning_rate": 3.890887754312035e-06, - "loss": 0.8472, - "num_input_tokens_seen": 23120800, - "step": 1102 - }, - { - "epoch": 0.13262790837491734, - "grad_norm": 1.7591436046001099, - "learning_rate": 3.890633833606581e-06, - "loss": 0.8703, - "num_input_tokens_seen": 23140210, - "step": 1103 - }, - { - "epoch": 0.13274815126555642, - "grad_norm": 2.832583402007141, - "learning_rate": 3.890379626093477e-06, - "loss": 0.6933, - "num_input_tokens_seen": 23159680, - "step": 1104 - }, - { - "epoch": 0.1328683941561955, - "grad_norm": 2.7227603818785644, - "learning_rate": 3.890125131811287e-06, - "loss": 0.914, - "num_input_tokens_seen": 23177450, - "step": 1105 - }, - { - "epoch": 0.1329886370468346, - "grad_norm": 1.9389941127744519, - "learning_rate": 3.889870350798618e-06, - "loss": 0.7541, - "num_input_tokens_seen": 23194515, - "step": 1106 - }, - { - "epoch": 0.1331088799374737, - "grad_norm": 2.737836930131518, - "learning_rate": 3.889615283094119e-06, - "loss": 0.7856, - "num_input_tokens_seen": 23213425, - "step": 1107 - }, - { - "epoch": 0.13322912282811278, - "grad_norm": 3.8473974026545963, - "learning_rate": 3.889359928736485e-06, - "loss": 0.8443, - "num_input_tokens_seen": 23231090, - "step": 1108 - }, - { - "epoch": 0.1333493657187519, - "grad_norm": 2.3842930921577516, - "learning_rate": 3.889104287764451e-06, - "loss": 0.9012, - "num_input_tokens_seen": 23251185, - "step": 1109 - }, - { - "epoch": 0.13346960860939097, - "grad_norm": 3.2756687011181267, - "learning_rate": 3.888848360216798e-06, - "loss": 0.9028, - "num_input_tokens_seen": 23268550, - "step": 1110 - }, - { - "epoch": 0.13358985150003005, - "grad_norm": 0.8176872946321924, - "learning_rate": 3.888592146132351e-06, - "loss": 0.5815, - "num_input_tokens_seen": 23329540, - "step": 1111 - }, - { - "epoch": 0.13371009439066917, - "grad_norm": 11.073247614933557, - "learning_rate": 3.888335645549978e-06, - "loss": 0.7844, - "num_input_tokens_seen": 23349680, - "step": 1112 - }, - { - "epoch": 0.13383033728130825, - "grad_norm": 2.7809865837993284, - "learning_rate": 3.888078858508588e-06, - "loss": 0.8233, - "num_input_tokens_seen": 23369260, - "step": 1113 - }, - { - "epoch": 0.13395058017194733, - "grad_norm": 5.213570399769174, - "learning_rate": 3.8878217850471365e-06, - "loss": 0.84, - "num_input_tokens_seen": 23388895, - "step": 1114 - }, - { - "epoch": 0.13407082306258641, - "grad_norm": 2.2506166832773884, - "learning_rate": 3.887564425204621e-06, - "loss": 0.7373, - "num_input_tokens_seen": 23410300, - "step": 1115 - }, - { - "epoch": 0.13419106595322552, - "grad_norm": 0.8916455472722215, - "learning_rate": 3.887306779020083e-06, - "loss": 0.5797, - "num_input_tokens_seen": 23464675, - "step": 1116 - }, - { - "epoch": 0.1343113088438646, - "grad_norm": 2.6633588282881187, - "learning_rate": 3.887048846532608e-06, - "loss": 0.7234, - "num_input_tokens_seen": 23481370, - "step": 1117 - }, - { - "epoch": 0.1344315517345037, - "grad_norm": 0.8345272787021525, - "learning_rate": 3.8867906277813224e-06, - "loss": 0.5951, - "num_input_tokens_seen": 23539245, - "step": 1118 - }, - { - "epoch": 0.1345517946251428, - "grad_norm": 2.1196222409841594, - "learning_rate": 3.886532122805399e-06, - "loss": 0.7429, - "num_input_tokens_seen": 23561445, - "step": 1119 - }, - { - "epoch": 0.13467203751578188, - "grad_norm": 3.642784471392457, - "learning_rate": 3.886273331644053e-06, - "loss": 0.8865, - "num_input_tokens_seen": 23580035, - "step": 1120 - }, - { - "epoch": 0.13479228040642097, - "grad_norm": 8.108535940319493, - "learning_rate": 3.886014254336542e-06, - "loss": 0.82, - "num_input_tokens_seen": 23596230, - "step": 1121 - }, - { - "epoch": 0.13491252329706005, - "grad_norm": 1.7142577001519765, - "learning_rate": 3.885754890922168e-06, - "loss": 0.9222, - "num_input_tokens_seen": 23616280, - "step": 1122 - }, - { - "epoch": 0.13503276618769916, - "grad_norm": 1.8992419122116653, - "learning_rate": 3.885495241440277e-06, - "loss": 0.7829, - "num_input_tokens_seen": 23640095, - "step": 1123 - }, - { - "epoch": 0.13515300907833824, - "grad_norm": 2.100107135973512, - "learning_rate": 3.885235305930257e-06, - "loss": 0.744, - "num_input_tokens_seen": 23658015, - "step": 1124 - }, - { - "epoch": 0.13527325196897733, - "grad_norm": 1.9459895809476313, - "learning_rate": 3.884975084431539e-06, - "loss": 0.8566, - "num_input_tokens_seen": 23672685, - "step": 1125 - }, - { - "epoch": 0.13539349485961644, - "grad_norm": 2.4850537161117248, - "learning_rate": 3.8847145769836e-06, - "loss": 0.9214, - "num_input_tokens_seen": 23688825, - "step": 1126 - }, - { - "epoch": 0.13551373775025552, - "grad_norm": 2.455654196382161, - "learning_rate": 3.884453783625959e-06, - "loss": 0.671, - "num_input_tokens_seen": 23706155, - "step": 1127 - }, - { - "epoch": 0.1356339806408946, - "grad_norm": 5.7450600171540085, - "learning_rate": 3.884192704398176e-06, - "loss": 0.8617, - "num_input_tokens_seen": 23723075, - "step": 1128 - }, - { - "epoch": 0.13575422353153369, - "grad_norm": 1.75469449157673, - "learning_rate": 3.883931339339858e-06, - "loss": 0.7453, - "num_input_tokens_seen": 23747180, - "step": 1129 - }, - { - "epoch": 0.1358744664221728, - "grad_norm": 2.3289690118552713, - "learning_rate": 3.883669688490654e-06, - "loss": 0.79, - "num_input_tokens_seen": 23764670, - "step": 1130 - }, - { - "epoch": 0.13599470931281188, - "grad_norm": 1.9796934311098506, - "learning_rate": 3.883407751890256e-06, - "loss": 0.8482, - "num_input_tokens_seen": 23782995, - "step": 1131 - }, - { - "epoch": 0.13611495220345096, - "grad_norm": 1.7913152849158285, - "learning_rate": 3.8831455295783994e-06, - "loss": 0.8452, - "num_input_tokens_seen": 23801965, - "step": 1132 - }, - { - "epoch": 0.13623519509409007, - "grad_norm": 1.761249765237672, - "learning_rate": 3.882883021594864e-06, - "loss": 0.7337, - "num_input_tokens_seen": 23819825, - "step": 1133 - }, - { - "epoch": 0.13635543798472916, - "grad_norm": 2.7216820945681186, - "learning_rate": 3.8826202279794705e-06, - "loss": 0.8683, - "num_input_tokens_seen": 23836605, - "step": 1134 - }, - { - "epoch": 0.13647568087536824, - "grad_norm": 2.6393650700210767, - "learning_rate": 3.882357148772085e-06, - "loss": 0.7014, - "num_input_tokens_seen": 23853750, - "step": 1135 - }, - { - "epoch": 0.13659592376600732, - "grad_norm": 2.6877366932657223, - "learning_rate": 3.882093784012617e-06, - "loss": 0.8511, - "num_input_tokens_seen": 23872110, - "step": 1136 - }, - { - "epoch": 0.13671616665664643, - "grad_norm": 2.424823620199298, - "learning_rate": 3.881830133741019e-06, - "loss": 0.8305, - "num_input_tokens_seen": 23890695, - "step": 1137 - }, - { - "epoch": 0.13683640954728551, - "grad_norm": 3.212446380043299, - "learning_rate": 3.881566197997285e-06, - "loss": 0.7615, - "num_input_tokens_seen": 23906850, - "step": 1138 - }, - { - "epoch": 0.1369566524379246, - "grad_norm": 1.8330378764762854, - "learning_rate": 3.881301976821456e-06, - "loss": 0.7467, - "num_input_tokens_seen": 23926600, - "step": 1139 - }, - { - "epoch": 0.1370768953285637, - "grad_norm": 2.2959177787613845, - "learning_rate": 3.881037470253612e-06, - "loss": 0.9021, - "num_input_tokens_seen": 23945835, - "step": 1140 - }, - { - "epoch": 0.1371971382192028, - "grad_norm": 2.5923940068779285, - "learning_rate": 3.88077267833388e-06, - "loss": 0.7963, - "num_input_tokens_seen": 23962070, - "step": 1141 - }, - { - "epoch": 0.13731738110984187, - "grad_norm": 2.120445260520303, - "learning_rate": 3.880507601102427e-06, - "loss": 0.8338, - "num_input_tokens_seen": 23979725, - "step": 1142 - }, - { - "epoch": 0.13743762400048098, - "grad_norm": 1.8588715922031163, - "learning_rate": 3.880242238599467e-06, - "loss": 0.8158, - "num_input_tokens_seen": 23995970, - "step": 1143 - }, - { - "epoch": 0.13755786689112007, - "grad_norm": 1.9458720166020516, - "learning_rate": 3.879976590865254e-06, - "loss": 0.8272, - "num_input_tokens_seen": 24015145, - "step": 1144 - }, - { - "epoch": 0.13767810978175915, - "grad_norm": 2.4217223962576906, - "learning_rate": 3.879710657940087e-06, - "loss": 0.8691, - "num_input_tokens_seen": 24033815, - "step": 1145 - }, - { - "epoch": 0.13779835267239823, - "grad_norm": 2.248425995860865, - "learning_rate": 3.879444439864308e-06, - "loss": 0.6979, - "num_input_tokens_seen": 24053110, - "step": 1146 - }, - { - "epoch": 0.13791859556303734, - "grad_norm": 1.680901706438742, - "learning_rate": 3.879177936678301e-06, - "loss": 0.8541, - "num_input_tokens_seen": 24071835, - "step": 1147 - }, - { - "epoch": 0.13803883845367643, - "grad_norm": 2.3156656239983002, - "learning_rate": 3.878911148422496e-06, - "loss": 0.7772, - "num_input_tokens_seen": 24093030, - "step": 1148 - }, - { - "epoch": 0.1381590813443155, - "grad_norm": 7.553188026840432, - "learning_rate": 3.878644075137364e-06, - "loss": 0.6975, - "num_input_tokens_seen": 24113400, - "step": 1149 - }, - { - "epoch": 0.13827932423495462, - "grad_norm": 2.4076782871457167, - "learning_rate": 3.878376716863418e-06, - "loss": 0.7869, - "num_input_tokens_seen": 24129420, - "step": 1150 - }, - { - "epoch": 0.1383995671255937, - "grad_norm": 2.8081918755301905, - "learning_rate": 3.878109073641218e-06, - "loss": 0.7126, - "num_input_tokens_seen": 24148170, - "step": 1151 - }, - { - "epoch": 0.13851981001623279, - "grad_norm": 2.2826032507747973, - "learning_rate": 3.877841145511366e-06, - "loss": 0.8057, - "num_input_tokens_seen": 24170630, - "step": 1152 - }, - { - "epoch": 0.13864005290687187, - "grad_norm": 1.7538648544210549, - "learning_rate": 3.8775729325145035e-06, - "loss": 0.8215, - "num_input_tokens_seen": 24189585, - "step": 1153 - }, - { - "epoch": 0.13876029579751098, - "grad_norm": 0.8428788057744127, - "learning_rate": 3.877304434691321e-06, - "loss": 0.6512, - "num_input_tokens_seen": 24256155, - "step": 1154 - }, - { - "epoch": 0.13888053868815006, - "grad_norm": 1.8341979651892155, - "learning_rate": 3.877035652082548e-06, - "loss": 0.7914, - "num_input_tokens_seen": 24275320, - "step": 1155 - }, - { - "epoch": 0.13900078157878915, - "grad_norm": 2.099726460213305, - "learning_rate": 3.87676658472896e-06, - "loss": 0.8435, - "num_input_tokens_seen": 24293850, - "step": 1156 - }, - { - "epoch": 0.13912102446942826, - "grad_norm": 2.593916888642099, - "learning_rate": 3.876497232671372e-06, - "loss": 0.8548, - "num_input_tokens_seen": 24313525, - "step": 1157 - }, - { - "epoch": 0.13924126736006734, - "grad_norm": 2.3880264166108436, - "learning_rate": 3.876227595950647e-06, - "loss": 0.8347, - "num_input_tokens_seen": 24332675, - "step": 1158 - }, - { - "epoch": 0.13936151025070642, - "grad_norm": 1.579093652675001, - "learning_rate": 3.875957674607686e-06, - "loss": 0.7818, - "num_input_tokens_seen": 24354670, - "step": 1159 - }, - { - "epoch": 0.1394817531413455, - "grad_norm": 1.989418947919792, - "learning_rate": 3.8756874686834386e-06, - "loss": 0.8812, - "num_input_tokens_seen": 24372605, - "step": 1160 - }, - { - "epoch": 0.13960199603198462, - "grad_norm": 1.795260215261753, - "learning_rate": 3.875416978218893e-06, - "loss": 0.8001, - "num_input_tokens_seen": 24395520, - "step": 1161 - }, - { - "epoch": 0.1397222389226237, - "grad_norm": 1.9946191462393907, - "learning_rate": 3.8751462032550835e-06, - "loss": 0.8245, - "num_input_tokens_seen": 24412245, - "step": 1162 - }, - { - "epoch": 0.13984248181326278, - "grad_norm": 3.345937205454328, - "learning_rate": 3.874875143833085e-06, - "loss": 0.8183, - "num_input_tokens_seen": 24430205, - "step": 1163 - }, - { - "epoch": 0.1399627247039019, - "grad_norm": 1.8249065040436567, - "learning_rate": 3.874603799994019e-06, - "loss": 0.6892, - "num_input_tokens_seen": 24453460, - "step": 1164 - }, - { - "epoch": 0.14008296759454097, - "grad_norm": 2.221714377845785, - "learning_rate": 3.874332171779046e-06, - "loss": 0.8753, - "num_input_tokens_seen": 24468060, - "step": 1165 - }, - { - "epoch": 0.14020321048518006, - "grad_norm": 2.2640638477222894, - "learning_rate": 3.874060259229373e-06, - "loss": 0.7578, - "num_input_tokens_seen": 24489355, - "step": 1166 - }, - { - "epoch": 0.14032345337581917, - "grad_norm": 2.40342453054275, - "learning_rate": 3.873788062386249e-06, - "loss": 0.9238, - "num_input_tokens_seen": 24507335, - "step": 1167 - }, - { - "epoch": 0.14044369626645825, - "grad_norm": 1.8149702578368532, - "learning_rate": 3.873515581290965e-06, - "loss": 0.8199, - "num_input_tokens_seen": 24531860, - "step": 1168 - }, - { - "epoch": 0.14056393915709733, - "grad_norm": 4.275014581038032, - "learning_rate": 3.8732428159848575e-06, - "loss": 0.753, - "num_input_tokens_seen": 24550555, - "step": 1169 - }, - { - "epoch": 0.14068418204773642, - "grad_norm": 2.3863887431943245, - "learning_rate": 3.872969766509304e-06, - "loss": 0.7874, - "num_input_tokens_seen": 24570830, - "step": 1170 - }, - { - "epoch": 0.14080442493837553, - "grad_norm": 0.8493337014314831, - "learning_rate": 3.872696432905726e-06, - "loss": 0.5836, - "num_input_tokens_seen": 24631370, - "step": 1171 - }, - { - "epoch": 0.1409246678290146, - "grad_norm": 2.19758273516171, - "learning_rate": 3.872422815215589e-06, - "loss": 0.7175, - "num_input_tokens_seen": 24650170, - "step": 1172 - }, - { - "epoch": 0.1410449107196537, - "grad_norm": 2.1380036691566966, - "learning_rate": 3.8721489134803994e-06, - "loss": 0.7451, - "num_input_tokens_seen": 24668680, - "step": 1173 - }, - { - "epoch": 0.1411651536102928, - "grad_norm": 2.965164562060746, - "learning_rate": 3.871874727741707e-06, - "loss": 0.7212, - "num_input_tokens_seen": 24685630, - "step": 1174 - }, - { - "epoch": 0.1412853965009319, - "grad_norm": 1.8456976649706474, - "learning_rate": 3.871600258041108e-06, - "loss": 0.9595, - "num_input_tokens_seen": 24704875, - "step": 1175 - }, - { - "epoch": 0.14140563939157097, - "grad_norm": 2.582217555983381, - "learning_rate": 3.8713255044202375e-06, - "loss": 0.8531, - "num_input_tokens_seen": 24723585, - "step": 1176 - }, - { - "epoch": 0.14152588228221005, - "grad_norm": 2.077489827008881, - "learning_rate": 3.871050466920776e-06, - "loss": 0.811, - "num_input_tokens_seen": 24743210, - "step": 1177 - }, - { - "epoch": 0.14164612517284916, - "grad_norm": 2.104516187419509, - "learning_rate": 3.870775145584447e-06, - "loss": 0.8026, - "num_input_tokens_seen": 24760710, - "step": 1178 - }, - { - "epoch": 0.14176636806348825, - "grad_norm": 3.075178549562538, - "learning_rate": 3.8704995404530145e-06, - "loss": 0.629, - "num_input_tokens_seen": 24776055, - "step": 1179 - }, - { - "epoch": 0.14188661095412733, - "grad_norm": 2.334877802904178, - "learning_rate": 3.870223651568289e-06, - "loss": 0.8465, - "num_input_tokens_seen": 24796490, - "step": 1180 - }, - { - "epoch": 0.14200685384476644, - "grad_norm": 2.0850337068461675, - "learning_rate": 3.869947478972123e-06, - "loss": 0.7993, - "num_input_tokens_seen": 24817235, - "step": 1181 - }, - { - "epoch": 0.14212709673540552, - "grad_norm": 3.1769878158552447, - "learning_rate": 3.869671022706412e-06, - "loss": 0.8095, - "num_input_tokens_seen": 24835685, - "step": 1182 - }, - { - "epoch": 0.1422473396260446, - "grad_norm": 2.0978937815177945, - "learning_rate": 3.869394282813092e-06, - "loss": 0.6501, - "num_input_tokens_seen": 24854605, - "step": 1183 - }, - { - "epoch": 0.1423675825166837, - "grad_norm": 2.6718273633011638, - "learning_rate": 3.869117259334147e-06, - "loss": 0.8945, - "num_input_tokens_seen": 24872250, - "step": 1184 - }, - { - "epoch": 0.1424878254073228, - "grad_norm": 1.9983488153671112, - "learning_rate": 3.868839952311599e-06, - "loss": 0.822, - "num_input_tokens_seen": 24889925, - "step": 1185 - }, - { - "epoch": 0.14260806829796188, - "grad_norm": 2.3074406235932354, - "learning_rate": 3.868562361787516e-06, - "loss": 0.8033, - "num_input_tokens_seen": 24908775, - "step": 1186 - }, - { - "epoch": 0.14272831118860096, - "grad_norm": 2.238242931165128, - "learning_rate": 3.868284487804009e-06, - "loss": 0.6936, - "num_input_tokens_seen": 24927725, - "step": 1187 - }, - { - "epoch": 0.14284855407924008, - "grad_norm": 1.8743448081365774, - "learning_rate": 3.86800633040323e-06, - "loss": 0.7796, - "num_input_tokens_seen": 24948035, - "step": 1188 - }, - { - "epoch": 0.14296879696987916, - "grad_norm": 2.3079073454308787, - "learning_rate": 3.867727889627376e-06, - "loss": 0.7833, - "num_input_tokens_seen": 24967370, - "step": 1189 - }, - { - "epoch": 0.14308903986051824, - "grad_norm": 3.4205602945925473, - "learning_rate": 3.867449165518687e-06, - "loss": 0.7817, - "num_input_tokens_seen": 24983560, - "step": 1190 - }, - { - "epoch": 0.14320928275115732, - "grad_norm": 1.917858774660838, - "learning_rate": 3.867170158119443e-06, - "loss": 0.7064, - "num_input_tokens_seen": 25002280, - "step": 1191 - }, - { - "epoch": 0.14332952564179643, - "grad_norm": 3.484199753147754, - "learning_rate": 3.866890867471972e-06, - "loss": 0.7537, - "num_input_tokens_seen": 25020470, - "step": 1192 - }, - { - "epoch": 0.14344976853243552, - "grad_norm": 2.5817815420350017, - "learning_rate": 3.86661129361864e-06, - "loss": 0.8962, - "num_input_tokens_seen": 25034680, - "step": 1193 - }, - { - "epoch": 0.1435700114230746, - "grad_norm": 4.966148827242535, - "learning_rate": 3.866331436601859e-06, - "loss": 0.8584, - "num_input_tokens_seen": 25052395, - "step": 1194 - }, - { - "epoch": 0.1436902543137137, - "grad_norm": 2.5516887621876903, - "learning_rate": 3.866051296464083e-06, - "loss": 0.7387, - "num_input_tokens_seen": 25070950, - "step": 1195 - }, - { - "epoch": 0.1438104972043528, - "grad_norm": 9.030350797148046, - "learning_rate": 3.86577087324781e-06, - "loss": 0.839, - "num_input_tokens_seen": 25087160, - "step": 1196 - }, - { - "epoch": 0.14393074009499188, - "grad_norm": 4.152997075747749, - "learning_rate": 3.865490166995578e-06, - "loss": 0.7686, - "num_input_tokens_seen": 25105110, - "step": 1197 - }, - { - "epoch": 0.144050982985631, - "grad_norm": 3.8598736239759592, - "learning_rate": 3.86520917774997e-06, - "loss": 0.835, - "num_input_tokens_seen": 25124265, - "step": 1198 - }, - { - "epoch": 0.14417122587627007, - "grad_norm": 2.280213964280314, - "learning_rate": 3.864927905553614e-06, - "loss": 0.7521, - "num_input_tokens_seen": 25141895, - "step": 1199 - }, - { - "epoch": 0.14429146876690915, - "grad_norm": 1.8087576353408834, - "learning_rate": 3.8646463504491765e-06, - "loss": 0.8816, - "num_input_tokens_seen": 25161750, - "step": 1200 - }, - { - "epoch": 0.14441171165754824, - "grad_norm": 2.434974778757965, - "learning_rate": 3.8643645124793705e-06, - "loss": 0.8337, - "num_input_tokens_seen": 25180370, - "step": 1201 - }, - { - "epoch": 0.14453195454818735, - "grad_norm": 1.9602187323739744, - "learning_rate": 3.8640823916869515e-06, - "loss": 0.7477, - "num_input_tokens_seen": 25204400, - "step": 1202 - }, - { - "epoch": 0.14465219743882643, - "grad_norm": 3.1561425020460825, - "learning_rate": 3.863799988114714e-06, - "loss": 0.7797, - "num_input_tokens_seen": 25226150, - "step": 1203 - }, - { - "epoch": 0.1447724403294655, - "grad_norm": 3.02076886404522, - "learning_rate": 3.863517301805502e-06, - "loss": 0.7006, - "num_input_tokens_seen": 25244260, - "step": 1204 - }, - { - "epoch": 0.14489268322010462, - "grad_norm": 2.5779882368855245, - "learning_rate": 3.863234332802196e-06, - "loss": 0.9654, - "num_input_tokens_seen": 25256185, - "step": 1205 - }, - { - "epoch": 0.1450129261107437, - "grad_norm": 3.602838935776098, - "learning_rate": 3.862951081147723e-06, - "loss": 0.742, - "num_input_tokens_seen": 25276070, - "step": 1206 - }, - { - "epoch": 0.1451331690013828, - "grad_norm": 2.04430040283684, - "learning_rate": 3.862667546885053e-06, - "loss": 0.7823, - "num_input_tokens_seen": 25294340, - "step": 1207 - }, - { - "epoch": 0.14525341189202187, - "grad_norm": 2.28504922120642, - "learning_rate": 3.8623837300571965e-06, - "loss": 0.728, - "num_input_tokens_seen": 25313045, - "step": 1208 - }, - { - "epoch": 0.14537365478266098, - "grad_norm": 2.2193805628405454, - "learning_rate": 3.8620996307072085e-06, - "loss": 0.8338, - "num_input_tokens_seen": 25333470, - "step": 1209 - }, - { - "epoch": 0.14549389767330007, - "grad_norm": 2.018436636492823, - "learning_rate": 3.861815248878188e-06, - "loss": 0.6379, - "num_input_tokens_seen": 25350675, - "step": 1210 - }, - { - "epoch": 0.14561414056393915, - "grad_norm": 2.4206716335751204, - "learning_rate": 3.861530584613274e-06, - "loss": 0.8005, - "num_input_tokens_seen": 25368395, - "step": 1211 - }, - { - "epoch": 0.14573438345457826, - "grad_norm": 3.1399369198095357, - "learning_rate": 3.86124563795565e-06, - "loss": 0.8182, - "num_input_tokens_seen": 25386930, - "step": 1212 - }, - { - "epoch": 0.14585462634521734, - "grad_norm": 2.0542085685361515, - "learning_rate": 3.860960408948543e-06, - "loss": 0.7042, - "num_input_tokens_seen": 25408400, - "step": 1213 - }, - { - "epoch": 0.14597486923585642, - "grad_norm": 2.3518370057811695, - "learning_rate": 3.860674897635222e-06, - "loss": 0.9048, - "num_input_tokens_seen": 25424605, - "step": 1214 - }, - { - "epoch": 0.1460951121264955, - "grad_norm": 1.786375568969041, - "learning_rate": 3.860389104058998e-06, - "loss": 0.8263, - "num_input_tokens_seen": 25442555, - "step": 1215 - }, - { - "epoch": 0.14621535501713462, - "grad_norm": 2.0782593913866605, - "learning_rate": 3.860103028263227e-06, - "loss": 0.7223, - "num_input_tokens_seen": 25465380, - "step": 1216 - }, - { - "epoch": 0.1463355979077737, - "grad_norm": 2.278154609023106, - "learning_rate": 3.859816670291304e-06, - "loss": 0.6918, - "num_input_tokens_seen": 25484195, - "step": 1217 - }, - { - "epoch": 0.14645584079841278, - "grad_norm": 2.378237717036832, - "learning_rate": 3.859530030186672e-06, - "loss": 0.8961, - "num_input_tokens_seen": 25500925, - "step": 1218 - }, - { - "epoch": 0.1465760836890519, - "grad_norm": 2.344043564337135, - "learning_rate": 3.859243107992813e-06, - "loss": 0.8223, - "num_input_tokens_seen": 25519450, - "step": 1219 - }, - { - "epoch": 0.14669632657969098, - "grad_norm": 5.219753942097025, - "learning_rate": 3.858955903753252e-06, - "loss": 0.784, - "num_input_tokens_seen": 25537810, - "step": 1220 - }, - { - "epoch": 0.14681656947033006, - "grad_norm": 1.669532118271603, - "learning_rate": 3.858668417511559e-06, - "loss": 0.8276, - "num_input_tokens_seen": 25560280, - "step": 1221 - }, - { - "epoch": 0.14693681236096917, - "grad_norm": 2.092838163063582, - "learning_rate": 3.8583806493113445e-06, - "loss": 0.7609, - "num_input_tokens_seen": 25578345, - "step": 1222 - }, - { - "epoch": 0.14705705525160825, - "grad_norm": 2.2600462017100504, - "learning_rate": 3.858092599196263e-06, - "loss": 0.813, - "num_input_tokens_seen": 25596020, - "step": 1223 - }, - { - "epoch": 0.14717729814224734, - "grad_norm": 2.482597700580532, - "learning_rate": 3.857804267210012e-06, - "loss": 0.8241, - "num_input_tokens_seen": 25615040, - "step": 1224 - }, - { - "epoch": 0.14729754103288642, - "grad_norm": 2.093665233275961, - "learning_rate": 3.857515653396331e-06, - "loss": 0.8799, - "num_input_tokens_seen": 25631970, - "step": 1225 - }, - { - "epoch": 0.14741778392352553, - "grad_norm": 2.544159244375698, - "learning_rate": 3.857226757799001e-06, - "loss": 0.863, - "num_input_tokens_seen": 25649245, - "step": 1226 - }, - { - "epoch": 0.1475380268141646, - "grad_norm": 2.28471473099243, - "learning_rate": 3.85693758046185e-06, - "loss": 0.7321, - "num_input_tokens_seen": 25667255, - "step": 1227 - }, - { - "epoch": 0.1476582697048037, - "grad_norm": 4.382976322352578, - "learning_rate": 3.8566481214287435e-06, - "loss": 0.8233, - "num_input_tokens_seen": 25685095, - "step": 1228 - }, - { - "epoch": 0.1477785125954428, - "grad_norm": 1.9292208842864993, - "learning_rate": 3.8563583807435935e-06, - "loss": 0.8977, - "num_input_tokens_seen": 25700960, - "step": 1229 - }, - { - "epoch": 0.1478987554860819, - "grad_norm": 2.059500723105536, - "learning_rate": 3.856068358450353e-06, - "loss": 0.7703, - "num_input_tokens_seen": 25720630, - "step": 1230 - }, - { - "epoch": 0.14801899837672097, - "grad_norm": 3.4310915179565304, - "learning_rate": 3.8557780545930186e-06, - "loss": 0.8527, - "num_input_tokens_seen": 25738765, - "step": 1231 - }, - { - "epoch": 0.14813924126736006, - "grad_norm": 2.729718138848321, - "learning_rate": 3.855487469215628e-06, - "loss": 0.7855, - "num_input_tokens_seen": 25757415, - "step": 1232 - }, - { - "epoch": 0.14825948415799917, - "grad_norm": 2.6386627463083996, - "learning_rate": 3.855196602362264e-06, - "loss": 0.7202, - "num_input_tokens_seen": 25780055, - "step": 1233 - }, - { - "epoch": 0.14837972704863825, - "grad_norm": 2.121744463035372, - "learning_rate": 3.854905454077051e-06, - "loss": 0.9422, - "num_input_tokens_seen": 25797385, - "step": 1234 - }, - { - "epoch": 0.14849996993927733, - "grad_norm": 1.9113785942100372, - "learning_rate": 3.854614024404155e-06, - "loss": 0.8782, - "num_input_tokens_seen": 25815415, - "step": 1235 - }, - { - "epoch": 0.14862021282991644, - "grad_norm": 1.9757040650042264, - "learning_rate": 3.8543223133877865e-06, - "loss": 0.8974, - "num_input_tokens_seen": 25833730, - "step": 1236 - }, - { - "epoch": 0.14874045572055553, - "grad_norm": 2.0258420940263413, - "learning_rate": 3.854030321072198e-06, - "loss": 0.8721, - "num_input_tokens_seen": 25853355, - "step": 1237 - }, - { - "epoch": 0.1488606986111946, - "grad_norm": 2.073113965953351, - "learning_rate": 3.853738047501682e-06, - "loss": 0.7294, - "num_input_tokens_seen": 25873635, - "step": 1238 - }, - { - "epoch": 0.1489809415018337, - "grad_norm": 2.8219345067994808, - "learning_rate": 3.85344549272058e-06, - "loss": 0.7733, - "num_input_tokens_seen": 25891335, - "step": 1239 - }, - { - "epoch": 0.1491011843924728, - "grad_norm": 1.8922606712380388, - "learning_rate": 3.853152656773269e-06, - "loss": 0.821, - "num_input_tokens_seen": 25912490, - "step": 1240 - }, - { - "epoch": 0.14922142728311188, - "grad_norm": 1.8515395462787125, - "learning_rate": 3.852859539704174e-06, - "loss": 0.8409, - "num_input_tokens_seen": 25931510, - "step": 1241 - }, - { - "epoch": 0.14934167017375097, - "grad_norm": 2.012879530583986, - "learning_rate": 3.852566141557759e-06, - "loss": 0.7568, - "num_input_tokens_seen": 25951360, - "step": 1242 - }, - { - "epoch": 0.14946191306439008, - "grad_norm": 2.442529972328362, - "learning_rate": 3.852272462378535e-06, - "loss": 0.7498, - "num_input_tokens_seen": 25968955, - "step": 1243 - }, - { - "epoch": 0.14958215595502916, - "grad_norm": 1.9992961144426993, - "learning_rate": 3.85197850221105e-06, - "loss": 0.7799, - "num_input_tokens_seen": 25984975, - "step": 1244 - }, - { - "epoch": 0.14970239884566824, - "grad_norm": 1.7058298419999713, - "learning_rate": 3.851684261099899e-06, - "loss": 0.7608, - "num_input_tokens_seen": 26006435, - "step": 1245 - }, - { - "epoch": 0.14982264173630733, - "grad_norm": 2.0742106175521626, - "learning_rate": 3.851389739089718e-06, - "loss": 0.8614, - "num_input_tokens_seen": 26022775, - "step": 1246 - }, - { - "epoch": 0.14994288462694644, - "grad_norm": 1.9388154032300862, - "learning_rate": 3.851094936225186e-06, - "loss": 0.7999, - "num_input_tokens_seen": 26043380, - "step": 1247 - }, - { - "epoch": 0.15006312751758552, - "grad_norm": 1.433384118925013, - "learning_rate": 3.850799852551024e-06, - "loss": 0.7703, - "num_input_tokens_seen": 26065520, - "step": 1248 - }, - { - "epoch": 0.1501833704082246, - "grad_norm": 2.5170145102294597, - "learning_rate": 3.850504488111995e-06, - "loss": 0.8604, - "num_input_tokens_seen": 26081915, - "step": 1249 - }, - { - "epoch": 0.15030361329886371, - "grad_norm": 1.654428917863714, - "learning_rate": 3.850208842952907e-06, - "loss": 0.8184, - "num_input_tokens_seen": 26100440, - "step": 1250 - }, - { - "epoch": 0.1504238561895028, - "grad_norm": 1.7280710308919371, - "learning_rate": 3.849912917118608e-06, - "loss": 0.7907, - "num_input_tokens_seen": 26121200, - "step": 1251 - }, - { - "epoch": 0.15054409908014188, - "grad_norm": 1.1027292970909603, - "learning_rate": 3.849616710653992e-06, - "loss": 0.6267, - "num_input_tokens_seen": 26182390, - "step": 1252 - }, - { - "epoch": 0.150664341970781, - "grad_norm": 1.7787948775832094, - "learning_rate": 3.84932022360399e-06, - "loss": 0.7491, - "num_input_tokens_seen": 26200775, - "step": 1253 - }, - { - "epoch": 0.15078458486142007, - "grad_norm": 2.852026570377626, - "learning_rate": 3.849023456013581e-06, - "loss": 0.8324, - "num_input_tokens_seen": 26218055, - "step": 1254 - }, - { - "epoch": 0.15090482775205916, - "grad_norm": 3.0744392624516963, - "learning_rate": 3.848726407927784e-06, - "loss": 0.6175, - "num_input_tokens_seen": 26238160, - "step": 1255 - }, - { - "epoch": 0.15102507064269824, - "grad_norm": 2.5917276064979964, - "learning_rate": 3.84842907939166e-06, - "loss": 0.8701, - "num_input_tokens_seen": 26257105, - "step": 1256 - }, - { - "epoch": 0.15114531353333735, - "grad_norm": 3.537445758643608, - "learning_rate": 3.8481314704503146e-06, - "loss": 0.7178, - "num_input_tokens_seen": 26276655, - "step": 1257 - }, - { - "epoch": 0.15126555642397643, - "grad_norm": 5.814397022407301, - "learning_rate": 3.847833581148895e-06, - "loss": 0.8826, - "num_input_tokens_seen": 26295285, - "step": 1258 - }, - { - "epoch": 0.15138579931461552, - "grad_norm": 1.9357352262497578, - "learning_rate": 3.84753541153259e-06, - "loss": 0.7958, - "num_input_tokens_seen": 26314575, - "step": 1259 - }, - { - "epoch": 0.15150604220525463, - "grad_norm": 1.659858690748384, - "learning_rate": 3.847236961646633e-06, - "loss": 0.8292, - "num_input_tokens_seen": 26333275, - "step": 1260 - }, - { - "epoch": 0.1516262850958937, - "grad_norm": 2.6075052615493886, - "learning_rate": 3.846938231536296e-06, - "loss": 0.7831, - "num_input_tokens_seen": 26348615, - "step": 1261 - }, - { - "epoch": 0.1517465279865328, - "grad_norm": 1.9914077629325395, - "learning_rate": 3.8466392212468995e-06, - "loss": 0.8069, - "num_input_tokens_seen": 26368525, - "step": 1262 - }, - { - "epoch": 0.15186677087717187, - "grad_norm": 0.8301618911821336, - "learning_rate": 3.8463399308238e-06, - "loss": 0.645, - "num_input_tokens_seen": 26427350, - "step": 1263 - }, - { - "epoch": 0.15198701376781099, - "grad_norm": 1.9567545730329718, - "learning_rate": 3.846040360312402e-06, - "loss": 0.6363, - "num_input_tokens_seen": 26450330, - "step": 1264 - }, - { - "epoch": 0.15210725665845007, - "grad_norm": 2.7513082621790277, - "learning_rate": 3.8457405097581485e-06, - "loss": 0.815, - "num_input_tokens_seen": 26469040, - "step": 1265 - }, - { - "epoch": 0.15222749954908915, - "grad_norm": 2.5777182047653016, - "learning_rate": 3.8454403792065275e-06, - "loss": 0.7785, - "num_input_tokens_seen": 26487580, - "step": 1266 - }, - { - "epoch": 0.15234774243972826, - "grad_norm": 2.0513283701240947, - "learning_rate": 3.845139968703068e-06, - "loss": 0.8525, - "num_input_tokens_seen": 26504820, - "step": 1267 - }, - { - "epoch": 0.15246798533036734, - "grad_norm": 2.0510147415541278, - "learning_rate": 3.844839278293342e-06, - "loss": 0.8252, - "num_input_tokens_seen": 26525390, - "step": 1268 - }, - { - "epoch": 0.15258822822100643, - "grad_norm": 2.5746028380602555, - "learning_rate": 3.8445383080229654e-06, - "loss": 0.7602, - "num_input_tokens_seen": 26541125, - "step": 1269 - }, - { - "epoch": 0.1527084711116455, - "grad_norm": 5.025945563031634, - "learning_rate": 3.844237057937593e-06, - "loss": 0.7308, - "num_input_tokens_seen": 26559850, - "step": 1270 - }, - { - "epoch": 0.15282871400228462, - "grad_norm": 3.504246336163769, - "learning_rate": 3.843935528082926e-06, - "loss": 0.7838, - "num_input_tokens_seen": 26580595, - "step": 1271 - }, - { - "epoch": 0.1529489568929237, - "grad_norm": 2.1537130538107094, - "learning_rate": 3.843633718504704e-06, - "loss": 0.8484, - "num_input_tokens_seen": 26598760, - "step": 1272 - }, - { - "epoch": 0.1530691997835628, - "grad_norm": 2.478490561573534, - "learning_rate": 3.843331629248715e-06, - "loss": 0.8919, - "num_input_tokens_seen": 26616080, - "step": 1273 - }, - { - "epoch": 0.1531894426742019, - "grad_norm": 2.272641781374053, - "learning_rate": 3.843029260360782e-06, - "loss": 0.7655, - "num_input_tokens_seen": 26634170, - "step": 1274 - }, - { - "epoch": 0.15330968556484098, - "grad_norm": 4.182756894710253, - "learning_rate": 3.8427266118867755e-06, - "loss": 0.7823, - "num_input_tokens_seen": 26653640, - "step": 1275 - }, - { - "epoch": 0.15342992845548006, - "grad_norm": 2.4697343284779634, - "learning_rate": 3.842423683872608e-06, - "loss": 0.8208, - "num_input_tokens_seen": 26673935, - "step": 1276 - }, - { - "epoch": 0.15355017134611917, - "grad_norm": 2.758921912262511, - "learning_rate": 3.842120476364232e-06, - "loss": 0.7847, - "num_input_tokens_seen": 26692105, - "step": 1277 - }, - { - "epoch": 0.15367041423675826, - "grad_norm": 2.2516647944219392, - "learning_rate": 3.841816989407644e-06, - "loss": 0.8296, - "num_input_tokens_seen": 26707315, - "step": 1278 - }, - { - "epoch": 0.15379065712739734, - "grad_norm": 2.0724141887527585, - "learning_rate": 3.841513223048884e-06, - "loss": 0.7612, - "num_input_tokens_seen": 26727720, - "step": 1279 - }, - { - "epoch": 0.15391090001803642, - "grad_norm": 3.016533848443925, - "learning_rate": 3.841209177334031e-06, - "loss": 0.7837, - "num_input_tokens_seen": 26745800, - "step": 1280 - }, - { - "epoch": 0.15403114290867553, - "grad_norm": 2.9306784155719496, - "learning_rate": 3.84090485230921e-06, - "loss": 0.7507, - "num_input_tokens_seen": 26763760, - "step": 1281 - }, - { - "epoch": 0.15415138579931462, - "grad_norm": 4.179257497991434, - "learning_rate": 3.840600248020588e-06, - "loss": 0.7629, - "num_input_tokens_seen": 26780420, - "step": 1282 - }, - { - "epoch": 0.1542716286899537, - "grad_norm": 2.4084822387968345, - "learning_rate": 3.840295364514371e-06, - "loss": 0.7933, - "num_input_tokens_seen": 26797520, - "step": 1283 - }, - { - "epoch": 0.1543918715805928, - "grad_norm": 2.437446661749323, - "learning_rate": 3.83999020183681e-06, - "loss": 0.7797, - "num_input_tokens_seen": 26815935, - "step": 1284 - }, - { - "epoch": 0.1545121144712319, - "grad_norm": 2.0890401264597958, - "learning_rate": 3.839684760034199e-06, - "loss": 0.7765, - "num_input_tokens_seen": 26833860, - "step": 1285 - }, - { - "epoch": 0.15463235736187098, - "grad_norm": 2.5797079363326687, - "learning_rate": 3.8393790391528716e-06, - "loss": 0.6503, - "num_input_tokens_seen": 26854275, - "step": 1286 - }, - { - "epoch": 0.15475260025251006, - "grad_norm": 2.1434082151630536, - "learning_rate": 3.8390730392392075e-06, - "loss": 0.8841, - "num_input_tokens_seen": 26873975, - "step": 1287 - }, - { - "epoch": 0.15487284314314917, - "grad_norm": 2.3177742584938477, - "learning_rate": 3.838766760339626e-06, - "loss": 0.794, - "num_input_tokens_seen": 26892220, - "step": 1288 - }, - { - "epoch": 0.15499308603378825, - "grad_norm": 2.796655175919591, - "learning_rate": 3.838460202500587e-06, - "loss": 0.7867, - "num_input_tokens_seen": 26907730, - "step": 1289 - }, - { - "epoch": 0.15511332892442733, - "grad_norm": 2.1343041116359167, - "learning_rate": 3.838153365768599e-06, - "loss": 0.7384, - "num_input_tokens_seen": 26923960, - "step": 1290 - }, - { - "epoch": 0.15523357181506645, - "grad_norm": 2.569057553779175, - "learning_rate": 3.837846250190206e-06, - "loss": 0.7481, - "num_input_tokens_seen": 26946545, - "step": 1291 - }, - { - "epoch": 0.15535381470570553, - "grad_norm": 2.3866090308302, - "learning_rate": 3.837538855811998e-06, - "loss": 0.773, - "num_input_tokens_seen": 26964440, - "step": 1292 - }, - { - "epoch": 0.1554740575963446, - "grad_norm": 2.799496795287829, - "learning_rate": 3.837231182680606e-06, - "loss": 0.6982, - "num_input_tokens_seen": 26982125, - "step": 1293 - }, - { - "epoch": 0.1555943004869837, - "grad_norm": 1.8906334803766507, - "learning_rate": 3.836923230842706e-06, - "loss": 0.7595, - "num_input_tokens_seen": 27000960, - "step": 1294 - }, - { - "epoch": 0.1557145433776228, - "grad_norm": 2.220523574827949, - "learning_rate": 3.836615000345011e-06, - "loss": 0.8046, - "num_input_tokens_seen": 27018860, - "step": 1295 - }, - { - "epoch": 0.1558347862682619, - "grad_norm": 2.207135707949427, - "learning_rate": 3.836306491234282e-06, - "loss": 0.7722, - "num_input_tokens_seen": 27036430, - "step": 1296 - }, - { - "epoch": 0.15595502915890097, - "grad_norm": 2.493064488433665, - "learning_rate": 3.835997703557317e-06, - "loss": 0.7408, - "num_input_tokens_seen": 27052890, - "step": 1297 - }, - { - "epoch": 0.15607527204954008, - "grad_norm": 2.552935961981001, - "learning_rate": 3.83568863736096e-06, - "loss": 0.8015, - "num_input_tokens_seen": 27071480, - "step": 1298 - }, - { - "epoch": 0.15619551494017916, - "grad_norm": 2.439422893072988, - "learning_rate": 3.8353792926920975e-06, - "loss": 0.8841, - "num_input_tokens_seen": 27089850, - "step": 1299 - }, - { - "epoch": 0.15631575783081825, - "grad_norm": 2.521620051677975, - "learning_rate": 3.835069669597655e-06, - "loss": 0.8187, - "num_input_tokens_seen": 27107960, - "step": 1300 - }, - { - "epoch": 0.15643600072145733, - "grad_norm": 2.1091572306019626, - "learning_rate": 3.834759768124603e-06, - "loss": 0.7879, - "num_input_tokens_seen": 27126555, - "step": 1301 - }, - { - "epoch": 0.15655624361209644, - "grad_norm": 76.8875280061683, - "learning_rate": 3.834449588319953e-06, - "loss": 0.7572, - "num_input_tokens_seen": 27144310, - "step": 1302 - }, - { - "epoch": 0.15667648650273552, - "grad_norm": 3.3633950303514397, - "learning_rate": 3.834139130230758e-06, - "loss": 0.8376, - "num_input_tokens_seen": 27163335, - "step": 1303 - }, - { - "epoch": 0.1567967293933746, - "grad_norm": 1.916679119412341, - "learning_rate": 3.833828393904117e-06, - "loss": 0.811, - "num_input_tokens_seen": 27183335, - "step": 1304 - }, - { - "epoch": 0.15691697228401372, - "grad_norm": 2.322982845974901, - "learning_rate": 3.833517379387165e-06, - "loss": 0.7683, - "num_input_tokens_seen": 27199510, - "step": 1305 - }, - { - "epoch": 0.1570372151746528, - "grad_norm": 1.8592348824121185, - "learning_rate": 3.833206086727085e-06, - "loss": 0.89, - "num_input_tokens_seen": 27218580, - "step": 1306 - }, - { - "epoch": 0.15715745806529188, - "grad_norm": 2.1543387627220256, - "learning_rate": 3.8328945159710994e-06, - "loss": 0.709, - "num_input_tokens_seen": 27238480, - "step": 1307 - }, - { - "epoch": 0.157277700955931, - "grad_norm": 2.0037505641022597, - "learning_rate": 3.832582667166473e-06, - "loss": 0.8788, - "num_input_tokens_seen": 27258010, - "step": 1308 - }, - { - "epoch": 0.15739794384657008, - "grad_norm": 2.0021781237442995, - "learning_rate": 3.8322705403605125e-06, - "loss": 0.8171, - "num_input_tokens_seen": 27278075, - "step": 1309 - }, - { - "epoch": 0.15751818673720916, - "grad_norm": 2.2334019858596665, - "learning_rate": 3.831958135600568e-06, - "loss": 0.8163, - "num_input_tokens_seen": 27295345, - "step": 1310 - }, - { - "epoch": 0.15763842962784824, - "grad_norm": 3.429400100664831, - "learning_rate": 3.831645452934032e-06, - "loss": 0.8001, - "num_input_tokens_seen": 27313495, - "step": 1311 - }, - { - "epoch": 0.15775867251848735, - "grad_norm": 2.5480806705361196, - "learning_rate": 3.831332492408336e-06, - "loss": 0.8052, - "num_input_tokens_seen": 27334625, - "step": 1312 - }, - { - "epoch": 0.15787891540912644, - "grad_norm": 1.9905738779330642, - "learning_rate": 3.831019254070957e-06, - "loss": 0.6825, - "num_input_tokens_seen": 27352130, - "step": 1313 - }, - { - "epoch": 0.15799915829976552, - "grad_norm": 2.5776251697101444, - "learning_rate": 3.8307057379694135e-06, - "loss": 0.9569, - "num_input_tokens_seen": 27371185, - "step": 1314 - }, - { - "epoch": 0.15811940119040463, - "grad_norm": 2.370712191880963, - "learning_rate": 3.830391944151264e-06, - "loss": 0.816, - "num_input_tokens_seen": 27386785, - "step": 1315 - }, - { - "epoch": 0.1582396440810437, - "grad_norm": 1.8733668286459888, - "learning_rate": 3.830077872664114e-06, - "loss": 0.6696, - "num_input_tokens_seen": 27407630, - "step": 1316 - }, - { - "epoch": 0.1583598869716828, - "grad_norm": 1.8680214783940308, - "learning_rate": 3.829763523555604e-06, - "loss": 0.7272, - "num_input_tokens_seen": 27427750, - "step": 1317 - }, - { - "epoch": 0.15848012986232188, - "grad_norm": 2.3373558070949296, - "learning_rate": 3.829448896873423e-06, - "loss": 0.7932, - "num_input_tokens_seen": 27446570, - "step": 1318 - }, - { - "epoch": 0.158600372752961, - "grad_norm": 2.201530039165448, - "learning_rate": 3.829133992665299e-06, - "loss": 0.7853, - "num_input_tokens_seen": 27465415, - "step": 1319 - }, - { - "epoch": 0.15872061564360007, - "grad_norm": 2.4817220948294927, - "learning_rate": 3.828818810979002e-06, - "loss": 0.8923, - "num_input_tokens_seen": 27483465, - "step": 1320 - }, - { - "epoch": 0.15884085853423915, - "grad_norm": 4.934492941257204, - "learning_rate": 3.8285033518623454e-06, - "loss": 0.7997, - "num_input_tokens_seen": 27503435, - "step": 1321 - }, - { - "epoch": 0.15896110142487826, - "grad_norm": 2.664115441836404, - "learning_rate": 3.8281876153631845e-06, - "loss": 0.8148, - "num_input_tokens_seen": 27519910, - "step": 1322 - }, - { - "epoch": 0.15908134431551735, - "grad_norm": 2.921151980929316, - "learning_rate": 3.827871601529416e-06, - "loss": 0.6482, - "num_input_tokens_seen": 27538150, - "step": 1323 - }, - { - "epoch": 0.15920158720615643, - "grad_norm": 8.772096420329104, - "learning_rate": 3.827555310408979e-06, - "loss": 0.8006, - "num_input_tokens_seen": 27557265, - "step": 1324 - }, - { - "epoch": 0.1593218300967955, - "grad_norm": 2.0999620719837644, - "learning_rate": 3.827238742049854e-06, - "loss": 0.8206, - "num_input_tokens_seen": 27577280, - "step": 1325 - }, - { - "epoch": 0.15944207298743462, - "grad_norm": 2.057588485196255, - "learning_rate": 3.826921896500066e-06, - "loss": 0.531, - "num_input_tokens_seen": 27598285, - "step": 1326 - }, - { - "epoch": 0.1595623158780737, - "grad_norm": 2.317607682973034, - "learning_rate": 3.826604773807678e-06, - "loss": 0.787, - "num_input_tokens_seen": 27615980, - "step": 1327 - }, - { - "epoch": 0.1596825587687128, - "grad_norm": 3.2685684682426785, - "learning_rate": 3.826287374020798e-06, - "loss": 0.7267, - "num_input_tokens_seen": 27630505, - "step": 1328 - }, - { - "epoch": 0.1598028016593519, - "grad_norm": 3.194547399945437, - "learning_rate": 3.825969697187575e-06, - "loss": 0.8198, - "num_input_tokens_seen": 27649555, - "step": 1329 - }, - { - "epoch": 0.15992304454999098, - "grad_norm": 1.9559845177944728, - "learning_rate": 3.8256517433562015e-06, - "loss": 0.699, - "num_input_tokens_seen": 27667215, - "step": 1330 - }, - { - "epoch": 0.16004328744063007, - "grad_norm": 3.3455049748582626, - "learning_rate": 3.82533351257491e-06, - "loss": 0.9236, - "num_input_tokens_seen": 27684885, - "step": 1331 - }, - { - "epoch": 0.16016353033126918, - "grad_norm": 2.1910572080448065, - "learning_rate": 3.825015004891975e-06, - "loss": 0.8847, - "num_input_tokens_seen": 27703345, - "step": 1332 - }, - { - "epoch": 0.16028377322190826, - "grad_norm": 2.730165213193173, - "learning_rate": 3.824696220355716e-06, - "loss": 0.7603, - "num_input_tokens_seen": 27724655, - "step": 1333 - }, - { - "epoch": 0.16040401611254734, - "grad_norm": 5.972470411195008, - "learning_rate": 3.824377159014491e-06, - "loss": 0.7926, - "num_input_tokens_seen": 27745270, - "step": 1334 - }, - { - "epoch": 0.16052425900318643, - "grad_norm": 5.431198173286477, - "learning_rate": 3.824057820916702e-06, - "loss": 0.8493, - "num_input_tokens_seen": 27762195, - "step": 1335 - }, - { - "epoch": 0.16064450189382554, - "grad_norm": 2.8398681747811123, - "learning_rate": 3.8237382061107904e-06, - "loss": 0.7159, - "num_input_tokens_seen": 27778635, - "step": 1336 - }, - { - "epoch": 0.16076474478446462, - "grad_norm": 3.616908841007189, - "learning_rate": 3.823418314645243e-06, - "loss": 0.7888, - "num_input_tokens_seen": 27797230, - "step": 1337 - }, - { - "epoch": 0.1608849876751037, - "grad_norm": 2.807757964799803, - "learning_rate": 3.823098146568588e-06, - "loss": 0.7547, - "num_input_tokens_seen": 27816655, - "step": 1338 - }, - { - "epoch": 0.1610052305657428, - "grad_norm": 3.513618632551631, - "learning_rate": 3.822777701929394e-06, - "loss": 0.717, - "num_input_tokens_seen": 27838200, - "step": 1339 - }, - { - "epoch": 0.1611254734563819, - "grad_norm": 3.1066323013685686, - "learning_rate": 3.8224569807762714e-06, - "loss": 0.7388, - "num_input_tokens_seen": 27857240, - "step": 1340 - }, - { - "epoch": 0.16124571634702098, - "grad_norm": 2.926871461601744, - "learning_rate": 3.822135983157873e-06, - "loss": 0.7695, - "num_input_tokens_seen": 27876235, - "step": 1341 - }, - { - "epoch": 0.16136595923766006, - "grad_norm": 2.7044087534467054, - "learning_rate": 3.821814709122896e-06, - "loss": 0.8404, - "num_input_tokens_seen": 27894005, - "step": 1342 - }, - { - "epoch": 0.16148620212829917, - "grad_norm": 6.947638090296441, - "learning_rate": 3.821493158720076e-06, - "loss": 0.8544, - "num_input_tokens_seen": 27912830, - "step": 1343 - }, - { - "epoch": 0.16160644501893826, - "grad_norm": 4.806349228038643, - "learning_rate": 3.821171331998191e-06, - "loss": 0.7316, - "num_input_tokens_seen": 27929080, - "step": 1344 - }, - { - "epoch": 0.16172668790957734, - "grad_norm": 0.8439097444460514, - "learning_rate": 3.820849229006064e-06, - "loss": 0.5889, - "num_input_tokens_seen": 27996550, - "step": 1345 - }, - { - "epoch": 0.16184693080021645, - "grad_norm": 2.631377025327218, - "learning_rate": 3.8205268497925564e-06, - "loss": 0.7126, - "num_input_tokens_seen": 28016740, - "step": 1346 - }, - { - "epoch": 0.16196717369085553, - "grad_norm": 3.31699266738122, - "learning_rate": 3.8202041944065725e-06, - "loss": 0.7799, - "num_input_tokens_seen": 28032280, - "step": 1347 - }, - { - "epoch": 0.16208741658149461, - "grad_norm": 2.2779831743242087, - "learning_rate": 3.819881262897061e-06, - "loss": 0.7373, - "num_input_tokens_seen": 28050135, - "step": 1348 - }, - { - "epoch": 0.1622076594721337, - "grad_norm": 5.853857014923446, - "learning_rate": 3.819558055313008e-06, - "loss": 0.7329, - "num_input_tokens_seen": 28070540, - "step": 1349 - }, - { - "epoch": 0.1623279023627728, - "grad_norm": 2.063878564910883, - "learning_rate": 3.819234571703444e-06, - "loss": 0.7672, - "num_input_tokens_seen": 28089085, - "step": 1350 - }, - { - "epoch": 0.1624481452534119, - "grad_norm": 4.151467106043026, - "learning_rate": 3.8189108121174435e-06, - "loss": 0.8506, - "num_input_tokens_seen": 28108570, - "step": 1351 - }, - { - "epoch": 0.16256838814405097, - "grad_norm": 1.8173507918921392, - "learning_rate": 3.818586776604118e-06, - "loss": 0.8305, - "num_input_tokens_seen": 28128930, - "step": 1352 - }, - { - "epoch": 0.16268863103469008, - "grad_norm": 6.114712393327527, - "learning_rate": 3.818262465212625e-06, - "loss": 0.6209, - "num_input_tokens_seen": 28148775, - "step": 1353 - }, - { - "epoch": 0.16280887392532917, - "grad_norm": 3.124103804029256, - "learning_rate": 3.817937877992161e-06, - "loss": 0.7696, - "num_input_tokens_seen": 28165790, - "step": 1354 - }, - { - "epoch": 0.16292911681596825, - "grad_norm": 2.807021774612654, - "learning_rate": 3.817613014991967e-06, - "loss": 0.8469, - "num_input_tokens_seen": 28181650, - "step": 1355 - }, - { - "epoch": 0.16304935970660733, - "grad_norm": 3.3745525051579333, - "learning_rate": 3.817287876261323e-06, - "loss": 0.756, - "num_input_tokens_seen": 28201705, - "step": 1356 - }, - { - "epoch": 0.16316960259724644, - "grad_norm": 2.3411486132923782, - "learning_rate": 3.816962461849553e-06, - "loss": 0.7932, - "num_input_tokens_seen": 28223295, - "step": 1357 - }, - { - "epoch": 0.16328984548788553, - "grad_norm": 4.4311518968104275, - "learning_rate": 3.8166367718060235e-06, - "loss": 0.8403, - "num_input_tokens_seen": 28242905, - "step": 1358 - }, - { - "epoch": 0.1634100883785246, - "grad_norm": 3.4719845149613344, - "learning_rate": 3.816310806180139e-06, - "loss": 0.7534, - "num_input_tokens_seen": 28261035, - "step": 1359 - }, - { - "epoch": 0.16353033126916372, - "grad_norm": 2.0423168503709865, - "learning_rate": 3.81598456502135e-06, - "loss": 0.799, - "num_input_tokens_seen": 28280775, - "step": 1360 - }, - { - "epoch": 0.1636505741598028, - "grad_norm": 5.1445463270225025, - "learning_rate": 3.8156580483791455e-06, - "loss": 0.8652, - "num_input_tokens_seen": 28295685, - "step": 1361 - }, - { - "epoch": 0.16377081705044189, - "grad_norm": 3.16540128174647, - "learning_rate": 3.815331256303059e-06, - "loss": 0.7629, - "num_input_tokens_seen": 28315435, - "step": 1362 - }, - { - "epoch": 0.163891059941081, - "grad_norm": 2.9248364321935614, - "learning_rate": 3.815004188842665e-06, - "loss": 0.7737, - "num_input_tokens_seen": 28333195, - "step": 1363 - }, - { - "epoch": 0.16401130283172008, - "grad_norm": 1.9862311408795017, - "learning_rate": 3.814676846047578e-06, - "loss": 0.7913, - "num_input_tokens_seen": 28353790, - "step": 1364 - }, - { - "epoch": 0.16413154572235916, - "grad_norm": 1.8935930062928639, - "learning_rate": 3.8143492279674565e-06, - "loss": 0.6979, - "num_input_tokens_seen": 28376205, - "step": 1365 - }, - { - "epoch": 0.16425178861299825, - "grad_norm": 0.9834768612405342, - "learning_rate": 3.8140213346519993e-06, - "loss": 0.6581, - "num_input_tokens_seen": 28426520, - "step": 1366 - }, - { - "epoch": 0.16437203150363736, - "grad_norm": 1.7672564952023309, - "learning_rate": 3.813693166150948e-06, - "loss": 0.7667, - "num_input_tokens_seen": 28446450, - "step": 1367 - }, - { - "epoch": 0.16449227439427644, - "grad_norm": 2.883140828353441, - "learning_rate": 3.813364722514086e-06, - "loss": 0.8555, - "num_input_tokens_seen": 28464505, - "step": 1368 - }, - { - "epoch": 0.16461251728491552, - "grad_norm": 2.2682538442601166, - "learning_rate": 3.8130360037912368e-06, - "loss": 0.8092, - "num_input_tokens_seen": 28480670, - "step": 1369 - }, - { - "epoch": 0.16473276017555463, - "grad_norm": 2.263892416476242, - "learning_rate": 3.812707010032268e-06, - "loss": 0.813, - "num_input_tokens_seen": 28499445, - "step": 1370 - }, - { - "epoch": 0.16485300306619372, - "grad_norm": 2.7725838228584827, - "learning_rate": 3.8123777412870863e-06, - "loss": 0.7919, - "num_input_tokens_seen": 28518665, - "step": 1371 - }, - { - "epoch": 0.1649732459568328, - "grad_norm": 2.317519558579017, - "learning_rate": 3.812048197605643e-06, - "loss": 0.7837, - "num_input_tokens_seen": 28537280, - "step": 1372 - }, - { - "epoch": 0.16509348884747188, - "grad_norm": 2.0705585709478598, - "learning_rate": 3.8117183790379277e-06, - "loss": 0.8027, - "num_input_tokens_seen": 28555450, - "step": 1373 - }, - { - "epoch": 0.165213731738111, - "grad_norm": 3.5330293605607874, - "learning_rate": 3.811388285633976e-06, - "loss": 0.938, - "num_input_tokens_seen": 28571155, - "step": 1374 - }, - { - "epoch": 0.16533397462875007, - "grad_norm": 2.2344139474291125, - "learning_rate": 3.811057917443861e-06, - "loss": 0.6152, - "num_input_tokens_seen": 28590140, - "step": 1375 - }, - { - "epoch": 0.16545421751938916, - "grad_norm": 0.8818225596152021, - "learning_rate": 3.8107272745177e-06, - "loss": 0.7024, - "num_input_tokens_seen": 28662190, - "step": 1376 - }, - { - "epoch": 0.16557446041002827, - "grad_norm": 2.0117484218355006, - "learning_rate": 3.8103963569056513e-06, - "loss": 0.7924, - "num_input_tokens_seen": 28681045, - "step": 1377 - }, - { - "epoch": 0.16569470330066735, - "grad_norm": 1.611483272360582, - "learning_rate": 3.8100651646579146e-06, - "loss": 0.8777, - "num_input_tokens_seen": 28699975, - "step": 1378 - }, - { - "epoch": 0.16581494619130643, - "grad_norm": 2.218300737493285, - "learning_rate": 3.8097336978247317e-06, - "loss": 0.9224, - "num_input_tokens_seen": 28716400, - "step": 1379 - }, - { - "epoch": 0.16593518908194552, - "grad_norm": 2.5779844843578634, - "learning_rate": 3.8094019564563854e-06, - "loss": 0.8914, - "num_input_tokens_seen": 28733050, - "step": 1380 - }, - { - "epoch": 0.16605543197258463, - "grad_norm": 3.9080921328117517, - "learning_rate": 3.809069940603201e-06, - "loss": 0.7602, - "num_input_tokens_seen": 28750725, - "step": 1381 - }, - { - "epoch": 0.1661756748632237, - "grad_norm": 2.594512634178134, - "learning_rate": 3.8087376503155452e-06, - "loss": 0.7653, - "num_input_tokens_seen": 28767930, - "step": 1382 - }, - { - "epoch": 0.1662959177538628, - "grad_norm": 1.00309695066994, - "learning_rate": 3.808405085643826e-06, - "loss": 0.6039, - "num_input_tokens_seen": 28832530, - "step": 1383 - }, - { - "epoch": 0.1664161606445019, - "grad_norm": 2.059007914928114, - "learning_rate": 3.8080722466384925e-06, - "loss": 0.8947, - "num_input_tokens_seen": 28850100, - "step": 1384 - }, - { - "epoch": 0.166536403535141, - "grad_norm": 2.1208931077949362, - "learning_rate": 3.8077391333500376e-06, - "loss": 0.6986, - "num_input_tokens_seen": 28868960, - "step": 1385 - }, - { - "epoch": 0.16665664642578007, - "grad_norm": 1.9186469174437826, - "learning_rate": 3.8074057458289934e-06, - "loss": 0.765, - "num_input_tokens_seen": 28889370, - "step": 1386 - }, - { - "epoch": 0.16677688931641918, - "grad_norm": 2.286303493467645, - "learning_rate": 3.807072084125934e-06, - "loss": 0.8121, - "num_input_tokens_seen": 28910940, - "step": 1387 - }, - { - "epoch": 0.16689713220705826, - "grad_norm": 2.7432692759711776, - "learning_rate": 3.806738148291477e-06, - "loss": 0.8034, - "num_input_tokens_seen": 28927485, - "step": 1388 - }, - { - "epoch": 0.16701737509769735, - "grad_norm": 2.0296425136256553, - "learning_rate": 3.806403938376279e-06, - "loss": 0.7118, - "num_input_tokens_seen": 28949570, - "step": 1389 - }, - { - "epoch": 0.16713761798833643, - "grad_norm": 2.254303355473648, - "learning_rate": 3.8060694544310396e-06, - "loss": 0.7697, - "num_input_tokens_seen": 28967800, - "step": 1390 - }, - { - "epoch": 0.16725786087897554, - "grad_norm": 2.0867246596688096, - "learning_rate": 3.8057346965065006e-06, - "loss": 0.788, - "num_input_tokens_seen": 28988750, - "step": 1391 - }, - { - "epoch": 0.16737810376961462, - "grad_norm": 1.6522322160282428, - "learning_rate": 3.805399664653443e-06, - "loss": 0.8461, - "num_input_tokens_seen": 29010610, - "step": 1392 - }, - { - "epoch": 0.1674983466602537, - "grad_norm": 2.61312289615138, - "learning_rate": 3.805064358922692e-06, - "loss": 0.7552, - "num_input_tokens_seen": 29028620, - "step": 1393 - }, - { - "epoch": 0.16761858955089282, - "grad_norm": 2.24430929747971, - "learning_rate": 3.8047287793651136e-06, - "loss": 0.8024, - "num_input_tokens_seen": 29049785, - "step": 1394 - }, - { - "epoch": 0.1677388324415319, - "grad_norm": 2.4872347511290394, - "learning_rate": 3.8043929260316133e-06, - "loss": 0.8857, - "num_input_tokens_seen": 29067660, - "step": 1395 - }, - { - "epoch": 0.16785907533217098, - "grad_norm": 1.974567189010661, - "learning_rate": 3.8040567989731417e-06, - "loss": 0.8287, - "num_input_tokens_seen": 29085325, - "step": 1396 - }, - { - "epoch": 0.16797931822281006, - "grad_norm": 4.516959561007454, - "learning_rate": 3.8037203982406876e-06, - "loss": 0.8011, - "num_input_tokens_seen": 29103210, - "step": 1397 - }, - { - "epoch": 0.16809956111344918, - "grad_norm": 2.321208463719716, - "learning_rate": 3.8033837238852835e-06, - "loss": 0.7282, - "num_input_tokens_seen": 29119630, - "step": 1398 - }, - { - "epoch": 0.16821980400408826, - "grad_norm": 1.9948011370396532, - "learning_rate": 3.8030467759580017e-06, - "loss": 0.6946, - "num_input_tokens_seen": 29140270, - "step": 1399 - }, - { - "epoch": 0.16834004689472734, - "grad_norm": 2.55117562694287, - "learning_rate": 3.802709554509958e-06, - "loss": 0.8709, - "num_input_tokens_seen": 29157790, - "step": 1400 - }, - { - "epoch": 0.16846028978536645, - "grad_norm": 1.9504766751299423, - "learning_rate": 3.8023720595923083e-06, - "loss": 0.7829, - "num_input_tokens_seen": 29176765, - "step": 1401 - }, - { - "epoch": 0.16858053267600553, - "grad_norm": 2.4182892059139087, - "learning_rate": 3.80203429125625e-06, - "loss": 0.8736, - "num_input_tokens_seen": 29194660, - "step": 1402 - }, - { - "epoch": 0.16870077556664462, - "grad_norm": 1.9877660607277075, - "learning_rate": 3.8016962495530225e-06, - "loss": 0.7031, - "num_input_tokens_seen": 29213570, - "step": 1403 - }, - { - "epoch": 0.1688210184572837, - "grad_norm": 3.720472325750388, - "learning_rate": 3.8013579345339063e-06, - "loss": 0.7607, - "num_input_tokens_seen": 29228155, - "step": 1404 - }, - { - "epoch": 0.1689412613479228, - "grad_norm": 1.9936002083985014, - "learning_rate": 3.801019346250224e-06, - "loss": 0.6983, - "num_input_tokens_seen": 29248020, - "step": 1405 - }, - { - "epoch": 0.1690615042385619, - "grad_norm": 2.6521733438559085, - "learning_rate": 3.8006804847533395e-06, - "loss": 0.8325, - "num_input_tokens_seen": 29267255, - "step": 1406 - }, - { - "epoch": 0.16918174712920098, - "grad_norm": 2.111681775362625, - "learning_rate": 3.8003413500946556e-06, - "loss": 0.8454, - "num_input_tokens_seen": 29287085, - "step": 1407 - }, - { - "epoch": 0.1693019900198401, - "grad_norm": 3.3929131761768496, - "learning_rate": 3.8000019423256216e-06, - "loss": 0.8302, - "num_input_tokens_seen": 29304570, - "step": 1408 - }, - { - "epoch": 0.16942223291047917, - "grad_norm": 2.173272743604148, - "learning_rate": 3.7996622614977234e-06, - "loss": 0.877, - "num_input_tokens_seen": 29325480, - "step": 1409 - }, - { - "epoch": 0.16954247580111825, - "grad_norm": 2.1476726294793718, - "learning_rate": 3.799322307662492e-06, - "loss": 0.7899, - "num_input_tokens_seen": 29343020, - "step": 1410 - }, - { - "epoch": 0.16966271869175734, - "grad_norm": 2.754964428139544, - "learning_rate": 3.798982080871496e-06, - "loss": 0.8396, - "num_input_tokens_seen": 29357880, - "step": 1411 - }, - { - "epoch": 0.16978296158239645, - "grad_norm": 2.0320749070398714, - "learning_rate": 3.798641581176349e-06, - "loss": 0.6807, - "num_input_tokens_seen": 29379880, - "step": 1412 - }, - { - "epoch": 0.16990320447303553, - "grad_norm": 2.0666430109772405, - "learning_rate": 3.7983008086287044e-06, - "loss": 0.7441, - "num_input_tokens_seen": 29400920, - "step": 1413 - }, - { - "epoch": 0.1700234473636746, - "grad_norm": 2.4848960376377316, - "learning_rate": 3.797959763280257e-06, - "loss": 0.8016, - "num_input_tokens_seen": 29419325, - "step": 1414 - }, - { - "epoch": 0.17014369025431372, - "grad_norm": 2.3103150987471985, - "learning_rate": 3.797618445182743e-06, - "loss": 0.7874, - "num_input_tokens_seen": 29440440, - "step": 1415 - }, - { - "epoch": 0.1702639331449528, - "grad_norm": 3.26892625245738, - "learning_rate": 3.79727685438794e-06, - "loss": 0.8457, - "num_input_tokens_seen": 29454350, - "step": 1416 - }, - { - "epoch": 0.1703841760355919, - "grad_norm": 0.895684263423864, - "learning_rate": 3.796934990947667e-06, - "loss": 0.6347, - "num_input_tokens_seen": 29515755, - "step": 1417 - }, - { - "epoch": 0.170504418926231, - "grad_norm": 0.9889567298394493, - "learning_rate": 3.7965928549137854e-06, - "loss": 0.6544, - "num_input_tokens_seen": 29572290, - "step": 1418 - }, - { - "epoch": 0.17062466181687008, - "grad_norm": 2.1182777479787775, - "learning_rate": 3.7962504463381953e-06, - "loss": 0.769, - "num_input_tokens_seen": 29593500, - "step": 1419 - }, - { - "epoch": 0.17074490470750917, - "grad_norm": 1.9667559456931794, - "learning_rate": 3.7959077652728412e-06, - "loss": 0.7832, - "num_input_tokens_seen": 29611675, - "step": 1420 - }, - { - "epoch": 0.17086514759814825, - "grad_norm": 2.0891537355877974, - "learning_rate": 3.795564811769707e-06, - "loss": 0.7554, - "num_input_tokens_seen": 29629750, - "step": 1421 - }, - { - "epoch": 0.17098539048878736, - "grad_norm": 2.1678915640683454, - "learning_rate": 3.795221585880818e-06, - "loss": 0.7817, - "num_input_tokens_seen": 29650150, - "step": 1422 - }, - { - "epoch": 0.17110563337942644, - "grad_norm": 1.76719599723931, - "learning_rate": 3.794878087658242e-06, - "loss": 0.912, - "num_input_tokens_seen": 29667640, - "step": 1423 - }, - { - "epoch": 0.17122587627006552, - "grad_norm": 3.1538638824249245, - "learning_rate": 3.7945343171540873e-06, - "loss": 0.7808, - "num_input_tokens_seen": 29688235, - "step": 1424 - }, - { - "epoch": 0.17134611916070464, - "grad_norm": 2.234589477674962, - "learning_rate": 3.7941902744205033e-06, - "loss": 0.7888, - "num_input_tokens_seen": 29708990, - "step": 1425 - }, - { - "epoch": 0.17146636205134372, - "grad_norm": 2.213027658667225, - "learning_rate": 3.7938459595096817e-06, - "loss": 0.8351, - "num_input_tokens_seen": 29727255, - "step": 1426 - }, - { - "epoch": 0.1715866049419828, - "grad_norm": 4.949509936531085, - "learning_rate": 3.7935013724738545e-06, - "loss": 0.8586, - "num_input_tokens_seen": 29747475, - "step": 1427 - }, - { - "epoch": 0.17170684783262188, - "grad_norm": 1.9819250305976224, - "learning_rate": 3.7931565133652945e-06, - "loss": 0.7762, - "num_input_tokens_seen": 29767270, - "step": 1428 - }, - { - "epoch": 0.171827090723261, - "grad_norm": 3.10670289614236, - "learning_rate": 3.792811382236317e-06, - "loss": 0.6751, - "num_input_tokens_seen": 29785500, - "step": 1429 - }, - { - "epoch": 0.17194733361390008, - "grad_norm": 4.828839008089451, - "learning_rate": 3.792465979139279e-06, - "loss": 0.7759, - "num_input_tokens_seen": 29807825, - "step": 1430 - }, - { - "epoch": 0.17206757650453916, - "grad_norm": 1.0843571136356336, - "learning_rate": 3.792120304126576e-06, - "loss": 0.733, - "num_input_tokens_seen": 29870920, - "step": 1431 - }, - { - "epoch": 0.17218781939517827, - "grad_norm": 1.892758435013736, - "learning_rate": 3.791774357250649e-06, - "loss": 0.8348, - "num_input_tokens_seen": 29889470, - "step": 1432 - }, - { - "epoch": 0.17230806228581735, - "grad_norm": 2.384290095308587, - "learning_rate": 3.7914281385639757e-06, - "loss": 0.791, - "num_input_tokens_seen": 29907065, - "step": 1433 - }, - { - "epoch": 0.17242830517645644, - "grad_norm": 1.8831846089619875, - "learning_rate": 3.7910816481190784e-06, - "loss": 0.7903, - "num_input_tokens_seen": 29926600, - "step": 1434 - }, - { - "epoch": 0.17254854806709552, - "grad_norm": 1.8458086399803142, - "learning_rate": 3.7907348859685193e-06, - "loss": 0.7479, - "num_input_tokens_seen": 29948025, - "step": 1435 - }, - { - "epoch": 0.17266879095773463, - "grad_norm": 2.6620837148082814, - "learning_rate": 3.790387852164902e-06, - "loss": 0.8019, - "num_input_tokens_seen": 29968475, - "step": 1436 - }, - { - "epoch": 0.1727890338483737, - "grad_norm": 2.242297848365326, - "learning_rate": 3.7900405467608707e-06, - "loss": 0.7739, - "num_input_tokens_seen": 29987740, - "step": 1437 - }, - { - "epoch": 0.1729092767390128, - "grad_norm": 3.2953358127133536, - "learning_rate": 3.7896929698091114e-06, - "loss": 0.7797, - "num_input_tokens_seen": 30000275, - "step": 1438 - }, - { - "epoch": 0.1730295196296519, - "grad_norm": 5.49889199371149, - "learning_rate": 3.7893451213623518e-06, - "loss": 0.6952, - "num_input_tokens_seen": 30017225, - "step": 1439 - }, - { - "epoch": 0.173149762520291, - "grad_norm": 2.2572037389059427, - "learning_rate": 3.7889970014733606e-06, - "loss": 0.8153, - "num_input_tokens_seen": 30036050, - "step": 1440 - }, - { - "epoch": 0.17327000541093007, - "grad_norm": 1.7027231033888086, - "learning_rate": 3.7886486101949463e-06, - "loss": 0.7696, - "num_input_tokens_seen": 30056950, - "step": 1441 - }, - { - "epoch": 0.17339024830156918, - "grad_norm": 2.36995084415686, - "learning_rate": 3.7882999475799594e-06, - "loss": 0.8704, - "num_input_tokens_seen": 30074705, - "step": 1442 - }, - { - "epoch": 0.17351049119220827, - "grad_norm": 2.0609998726922147, - "learning_rate": 3.787951013681293e-06, - "loss": 0.8061, - "num_input_tokens_seen": 30092470, - "step": 1443 - }, - { - "epoch": 0.17363073408284735, - "grad_norm": 1.9826865241432492, - "learning_rate": 3.787601808551879e-06, - "loss": 0.7778, - "num_input_tokens_seen": 30112005, - "step": 1444 - }, - { - "epoch": 0.17375097697348643, - "grad_norm": 2.5125451356639577, - "learning_rate": 3.7872523322446926e-06, - "loss": 0.8422, - "num_input_tokens_seen": 30130610, - "step": 1445 - }, - { - "epoch": 0.17387121986412554, - "grad_norm": 2.5177695768721593, - "learning_rate": 3.7869025848127478e-06, - "loss": 0.6008, - "num_input_tokens_seen": 30154525, - "step": 1446 - }, - { - "epoch": 0.17399146275476463, - "grad_norm": 2.5390598202422927, - "learning_rate": 3.7865525663091018e-06, - "loss": 0.8061, - "num_input_tokens_seen": 30172455, - "step": 1447 - }, - { - "epoch": 0.1741117056454037, - "grad_norm": 2.3620137624981137, - "learning_rate": 3.7862022767868517e-06, - "loss": 0.865, - "num_input_tokens_seen": 30189765, - "step": 1448 - }, - { - "epoch": 0.17423194853604282, - "grad_norm": 1.914694058538217, - "learning_rate": 3.7858517162991367e-06, - "loss": 0.8463, - "num_input_tokens_seen": 30209560, - "step": 1449 - }, - { - "epoch": 0.1743521914266819, - "grad_norm": 3.4358997147705153, - "learning_rate": 3.7855008848991363e-06, - "loss": 0.6043, - "num_input_tokens_seen": 30227485, - "step": 1450 - }, - { - "epoch": 0.17447243431732098, - "grad_norm": 2.0538998885345703, - "learning_rate": 3.7851497826400714e-06, - "loss": 0.7785, - "num_input_tokens_seen": 30247345, - "step": 1451 - }, - { - "epoch": 0.17459267720796007, - "grad_norm": 2.200192153615084, - "learning_rate": 3.7847984095752034e-06, - "loss": 0.7525, - "num_input_tokens_seen": 30270520, - "step": 1452 - }, - { - "epoch": 0.17471292009859918, - "grad_norm": 2.0587147703885846, - "learning_rate": 3.784446765757836e-06, - "loss": 0.8058, - "num_input_tokens_seen": 30288885, - "step": 1453 - }, - { - "epoch": 0.17483316298923826, - "grad_norm": 2.1658740572742463, - "learning_rate": 3.7840948512413133e-06, - "loss": 0.7723, - "num_input_tokens_seen": 30306190, - "step": 1454 - }, - { - "epoch": 0.17495340587987734, - "grad_norm": 2.2002313052141362, - "learning_rate": 3.7837426660790196e-06, - "loss": 0.7779, - "num_input_tokens_seen": 30327325, - "step": 1455 - }, - { - "epoch": 0.17507364877051645, - "grad_norm": 2.4941086292106673, - "learning_rate": 3.783390210324382e-06, - "loss": 0.8161, - "num_input_tokens_seen": 30346770, - "step": 1456 - }, - { - "epoch": 0.17519389166115554, - "grad_norm": 2.5452965406995958, - "learning_rate": 3.7830374840308676e-06, - "loss": 0.7281, - "num_input_tokens_seen": 30366645, - "step": 1457 - }, - { - "epoch": 0.17531413455179462, - "grad_norm": 2.560689283840395, - "learning_rate": 3.7826844872519842e-06, - "loss": 0.8361, - "num_input_tokens_seen": 30384220, - "step": 1458 - }, - { - "epoch": 0.1754343774424337, - "grad_norm": 2.074247891424234, - "learning_rate": 3.782331220041282e-06, - "loss": 0.7278, - "num_input_tokens_seen": 30404005, - "step": 1459 - }, - { - "epoch": 0.17555462033307281, - "grad_norm": 3.1996771160907143, - "learning_rate": 3.7819776824523504e-06, - "loss": 0.82, - "num_input_tokens_seen": 30421590, - "step": 1460 - }, - { - "epoch": 0.1756748632237119, - "grad_norm": 3.0689989716256143, - "learning_rate": 3.7816238745388213e-06, - "loss": 0.8375, - "num_input_tokens_seen": 30440855, - "step": 1461 - }, - { - "epoch": 0.17579510611435098, - "grad_norm": 2.0200199989520735, - "learning_rate": 3.781269796354367e-06, - "loss": 0.8758, - "num_input_tokens_seen": 30460195, - "step": 1462 - }, - { - "epoch": 0.1759153490049901, - "grad_norm": 2.435885321014921, - "learning_rate": 3.7809154479527006e-06, - "loss": 0.8689, - "num_input_tokens_seen": 30479120, - "step": 1463 - }, - { - "epoch": 0.17603559189562917, - "grad_norm": 2.6333516128504497, - "learning_rate": 3.780560829387577e-06, - "loss": 0.83, - "num_input_tokens_seen": 30497340, - "step": 1464 - }, - { - "epoch": 0.17615583478626826, - "grad_norm": 0.9006979444395685, - "learning_rate": 3.7802059407127915e-06, - "loss": 0.6157, - "num_input_tokens_seen": 30555610, - "step": 1465 - }, - { - "epoch": 0.17627607767690734, - "grad_norm": 2.443150298852987, - "learning_rate": 3.7798507819821797e-06, - "loss": 0.8597, - "num_input_tokens_seen": 30572455, - "step": 1466 - }, - { - "epoch": 0.17639632056754645, - "grad_norm": 2.4955983915757836, - "learning_rate": 3.7794953532496197e-06, - "loss": 0.7954, - "num_input_tokens_seen": 30588080, - "step": 1467 - }, - { - "epoch": 0.17651656345818553, - "grad_norm": 2.3345481875803054, - "learning_rate": 3.7791396545690295e-06, - "loss": 0.6404, - "num_input_tokens_seen": 30649035, - "step": 1468 - }, - { - "epoch": 0.17663680634882462, - "grad_norm": 2.0690896275679527, - "learning_rate": 3.7787836859943685e-06, - "loss": 0.8019, - "num_input_tokens_seen": 30667480, - "step": 1469 - }, - { - "epoch": 0.17675704923946373, - "grad_norm": 2.49706896908787, - "learning_rate": 3.7784274475796363e-06, - "loss": 0.7893, - "num_input_tokens_seen": 30685830, - "step": 1470 - }, - { - "epoch": 0.1768772921301028, - "grad_norm": 2.427799774441815, - "learning_rate": 3.7780709393788745e-06, - "loss": 0.7697, - "num_input_tokens_seen": 30706025, - "step": 1471 - }, - { - "epoch": 0.1769975350207419, - "grad_norm": 2.2401360563273083, - "learning_rate": 3.7777141614461647e-06, - "loss": 0.7484, - "num_input_tokens_seen": 30725450, - "step": 1472 - }, - { - "epoch": 0.177117777911381, - "grad_norm": 2.5582807268762053, - "learning_rate": 3.7773571138356304e-06, - "loss": 0.6875, - "num_input_tokens_seen": 30745340, - "step": 1473 - }, - { - "epoch": 0.17723802080202009, - "grad_norm": 2.3214489684297677, - "learning_rate": 3.776999796601435e-06, - "loss": 0.8938, - "num_input_tokens_seen": 30763820, - "step": 1474 - }, - { - "epoch": 0.17735826369265917, - "grad_norm": 2.0717794265190355, - "learning_rate": 3.776642209797783e-06, - "loss": 0.7166, - "num_input_tokens_seen": 30785370, - "step": 1475 - }, - { - "epoch": 0.17747850658329825, - "grad_norm": 3.816360025997515, - "learning_rate": 3.7762843534789205e-06, - "loss": 0.7768, - "num_input_tokens_seen": 30803840, - "step": 1476 - }, - { - "epoch": 0.17759874947393736, - "grad_norm": 2.5972658467495053, - "learning_rate": 3.7759262276991343e-06, - "loss": 0.8728, - "num_input_tokens_seen": 30821170, - "step": 1477 - }, - { - "epoch": 0.17771899236457644, - "grad_norm": 2.421399770959549, - "learning_rate": 3.7755678325127506e-06, - "loss": 0.8029, - "num_input_tokens_seen": 30838570, - "step": 1478 - }, - { - "epoch": 0.17783923525521553, - "grad_norm": 1.856629464823198, - "learning_rate": 3.7752091679741393e-06, - "loss": 0.7541, - "num_input_tokens_seen": 30856080, - "step": 1479 - }, - { - "epoch": 0.17795947814585464, - "grad_norm": 3.0543108103926246, - "learning_rate": 3.774850234137708e-06, - "loss": 0.7765, - "num_input_tokens_seen": 30873095, - "step": 1480 - }, - { - "epoch": 0.17807972103649372, - "grad_norm": 2.96939523560821, - "learning_rate": 3.7744910310579076e-06, - "loss": 0.8255, - "num_input_tokens_seen": 30891740, - "step": 1481 - }, - { - "epoch": 0.1781999639271328, - "grad_norm": 2.8133808228960855, - "learning_rate": 3.774131558789229e-06, - "loss": 0.848, - "num_input_tokens_seen": 30910790, - "step": 1482 - }, - { - "epoch": 0.1783202068177719, - "grad_norm": 3.394090806278892, - "learning_rate": 3.773771817386203e-06, - "loss": 0.6914, - "num_input_tokens_seen": 30927840, - "step": 1483 - }, - { - "epoch": 0.178440449708411, - "grad_norm": 3.9206975838561537, - "learning_rate": 3.773411806903403e-06, - "loss": 0.7942, - "num_input_tokens_seen": 30946640, - "step": 1484 - }, - { - "epoch": 0.17856069259905008, - "grad_norm": 2.1575725985502188, - "learning_rate": 3.7730515273954415e-06, - "loss": 0.9333, - "num_input_tokens_seen": 30964970, - "step": 1485 - }, - { - "epoch": 0.17868093548968916, - "grad_norm": 3.038165178394511, - "learning_rate": 3.772690978916973e-06, - "loss": 0.8353, - "num_input_tokens_seen": 30984445, - "step": 1486 - }, - { - "epoch": 0.17880117838032827, - "grad_norm": 3.5700927006787975, - "learning_rate": 3.772330161522693e-06, - "loss": 0.8741, - "num_input_tokens_seen": 31002075, - "step": 1487 - }, - { - "epoch": 0.17892142127096736, - "grad_norm": 2.47945334178659, - "learning_rate": 3.7719690752673365e-06, - "loss": 0.7982, - "num_input_tokens_seen": 31022590, - "step": 1488 - }, - { - "epoch": 0.17904166416160644, - "grad_norm": 2.7410631767727565, - "learning_rate": 3.7716077202056796e-06, - "loss": 0.7901, - "num_input_tokens_seen": 31040785, - "step": 1489 - }, - { - "epoch": 0.17916190705224552, - "grad_norm": 4.627595195793667, - "learning_rate": 3.7712460963925404e-06, - "loss": 0.9243, - "num_input_tokens_seen": 31056445, - "step": 1490 - }, - { - "epoch": 0.17928214994288463, - "grad_norm": 2.0240293846166386, - "learning_rate": 3.7708842038827775e-06, - "loss": 0.7474, - "num_input_tokens_seen": 31075125, - "step": 1491 - }, - { - "epoch": 0.17940239283352372, - "grad_norm": 1.8725844728970533, - "learning_rate": 3.770522042731288e-06, - "loss": 0.8479, - "num_input_tokens_seen": 31096740, - "step": 1492 - }, - { - "epoch": 0.1795226357241628, - "grad_norm": 2.4820110686434935, - "learning_rate": 3.7701596129930122e-06, - "loss": 0.8761, - "num_input_tokens_seen": 31115185, - "step": 1493 - }, - { - "epoch": 0.1796428786148019, - "grad_norm": 2.3434476543630147, - "learning_rate": 3.7697969147229315e-06, - "loss": 0.7254, - "num_input_tokens_seen": 31133065, - "step": 1494 - }, - { - "epoch": 0.179763121505441, - "grad_norm": 2.231594758620423, - "learning_rate": 3.7694339479760647e-06, - "loss": 0.8438, - "num_input_tokens_seen": 31151815, - "step": 1495 - }, - { - "epoch": 0.17988336439608008, - "grad_norm": 0.817100011807163, - "learning_rate": 3.769070712807476e-06, - "loss": 0.6057, - "num_input_tokens_seen": 31213565, - "step": 1496 - }, - { - "epoch": 0.18000360728671919, - "grad_norm": 1.8113649945854873, - "learning_rate": 3.768707209272266e-06, - "loss": 0.7844, - "num_input_tokens_seen": 31233415, - "step": 1497 - }, - { - "epoch": 0.18012385017735827, - "grad_norm": 4.42906657152381, - "learning_rate": 3.768343437425579e-06, - "loss": 0.761, - "num_input_tokens_seen": 31251705, - "step": 1498 - }, - { - "epoch": 0.18024409306799735, - "grad_norm": 2.3559640083407154, - "learning_rate": 3.7679793973225987e-06, - "loss": 0.8584, - "num_input_tokens_seen": 31267235, - "step": 1499 - }, - { - "epoch": 0.18036433595863643, - "grad_norm": 0.9283757103183825, - "learning_rate": 3.767615089018549e-06, - "loss": 0.6409, - "num_input_tokens_seen": 31329300, - "step": 1500 - }, - { - "epoch": 0.18048457884927555, - "grad_norm": 1.983195447459231, - "learning_rate": 3.7672505125686966e-06, - "loss": 0.85, - "num_input_tokens_seen": 31345385, - "step": 1501 - }, - { - "epoch": 0.18060482173991463, - "grad_norm": 2.9594970320730645, - "learning_rate": 3.7668856680283455e-06, - "loss": 0.863, - "num_input_tokens_seen": 31362130, - "step": 1502 - }, - { - "epoch": 0.1807250646305537, - "grad_norm": 2.2318074820686102, - "learning_rate": 3.7665205554528437e-06, - "loss": 0.8214, - "num_input_tokens_seen": 31381205, - "step": 1503 - }, - { - "epoch": 0.18084530752119282, - "grad_norm": 2.361084715862096, - "learning_rate": 3.7661551748975782e-06, - "loss": 0.7509, - "num_input_tokens_seen": 31399100, - "step": 1504 - }, - { - "epoch": 0.1809655504118319, - "grad_norm": 0.8497449757372831, - "learning_rate": 3.7657895264179772e-06, - "loss": 0.6147, - "num_input_tokens_seen": 31454795, - "step": 1505 - }, - { - "epoch": 0.181085793302471, - "grad_norm": 2.657925669348378, - "learning_rate": 3.765423610069509e-06, - "loss": 0.7475, - "num_input_tokens_seen": 31479905, - "step": 1506 - }, - { - "epoch": 0.18120603619311007, - "grad_norm": 2.7126668062439423, - "learning_rate": 3.765057425907683e-06, - "loss": 0.7256, - "num_input_tokens_seen": 31501085, - "step": 1507 - }, - { - "epoch": 0.18132627908374918, - "grad_norm": 2.211402235578531, - "learning_rate": 3.764690973988048e-06, - "loss": 0.78, - "num_input_tokens_seen": 31521145, - "step": 1508 - }, - { - "epoch": 0.18144652197438826, - "grad_norm": 3.0580549500636818, - "learning_rate": 3.7643242543661963e-06, - "loss": 0.7427, - "num_input_tokens_seen": 31543525, - "step": 1509 - }, - { - "epoch": 0.18156676486502735, - "grad_norm": 0.8741238408541412, - "learning_rate": 3.7639572670977573e-06, - "loss": 0.6402, - "num_input_tokens_seen": 31598740, - "step": 1510 - }, - { - "epoch": 0.18168700775566646, - "grad_norm": 1.6119891590307953, - "learning_rate": 3.7635900122384042e-06, - "loss": 0.7635, - "num_input_tokens_seen": 31621455, - "step": 1511 - }, - { - "epoch": 0.18180725064630554, - "grad_norm": 2.562713905486325, - "learning_rate": 3.7632224898438477e-06, - "loss": 0.865, - "num_input_tokens_seen": 31637650, - "step": 1512 - }, - { - "epoch": 0.18192749353694462, - "grad_norm": 1.900212068018805, - "learning_rate": 3.762854699969842e-06, - "loss": 0.7919, - "num_input_tokens_seen": 31657880, - "step": 1513 - }, - { - "epoch": 0.1820477364275837, - "grad_norm": 2.0324910335456874, - "learning_rate": 3.762486642672179e-06, - "loss": 0.7216, - "num_input_tokens_seen": 31674540, - "step": 1514 - }, - { - "epoch": 0.18216797931822282, - "grad_norm": 2.8958883369781443, - "learning_rate": 3.7621183180066946e-06, - "loss": 0.8647, - "num_input_tokens_seen": 31692220, - "step": 1515 - }, - { - "epoch": 0.1822882222088619, - "grad_norm": 1.6972364002689149, - "learning_rate": 3.7617497260292625e-06, - "loss": 0.7358, - "num_input_tokens_seen": 31713995, - "step": 1516 - }, - { - "epoch": 0.18240846509950098, - "grad_norm": 3.5413388547440685, - "learning_rate": 3.7613808667957967e-06, - "loss": 0.797, - "num_input_tokens_seen": 31726405, - "step": 1517 - }, - { - "epoch": 0.1825287079901401, - "grad_norm": 2.4730921681730065, - "learning_rate": 3.7610117403622547e-06, - "loss": 0.9085, - "num_input_tokens_seen": 31742685, - "step": 1518 - }, - { - "epoch": 0.18264895088077918, - "grad_norm": 1.9939354510108345, - "learning_rate": 3.7606423467846313e-06, - "loss": 0.897, - "num_input_tokens_seen": 31762010, - "step": 1519 - }, - { - "epoch": 0.18276919377141826, - "grad_norm": 1.541853824738568, - "learning_rate": 3.760272686118964e-06, - "loss": 0.7938, - "num_input_tokens_seen": 31779950, - "step": 1520 - }, - { - "epoch": 0.18288943666205737, - "grad_norm": 1.9722612599950973, - "learning_rate": 3.7599027584213297e-06, - "loss": 0.9158, - "num_input_tokens_seen": 31798550, - "step": 1521 - }, - { - "epoch": 0.18300967955269645, - "grad_norm": 2.3414316911970934, - "learning_rate": 3.7595325637478465e-06, - "loss": 0.774, - "num_input_tokens_seen": 31816295, - "step": 1522 - }, - { - "epoch": 0.18312992244333554, - "grad_norm": 1.8754532727653939, - "learning_rate": 3.7591621021546723e-06, - "loss": 0.8173, - "num_input_tokens_seen": 31838010, - "step": 1523 - }, - { - "epoch": 0.18325016533397462, - "grad_norm": 2.3308493616537382, - "learning_rate": 3.7587913736980062e-06, - "loss": 0.8117, - "num_input_tokens_seen": 31857370, - "step": 1524 - }, - { - "epoch": 0.18337040822461373, - "grad_norm": 2.0447281442595626, - "learning_rate": 3.7584203784340865e-06, - "loss": 0.8438, - "num_input_tokens_seen": 31876260, - "step": 1525 - }, - { - "epoch": 0.1834906511152528, - "grad_norm": 2.9268539782783476, - "learning_rate": 3.7580491164191938e-06, - "loss": 0.8485, - "num_input_tokens_seen": 31894290, - "step": 1526 - }, - { - "epoch": 0.1836108940058919, - "grad_norm": 0.7961140701586151, - "learning_rate": 3.757677587709648e-06, - "loss": 0.6337, - "num_input_tokens_seen": 31957275, - "step": 1527 - }, - { - "epoch": 0.183731136896531, - "grad_norm": 2.538589584797089, - "learning_rate": 3.7573057923618095e-06, - "loss": 0.7563, - "num_input_tokens_seen": 31977090, - "step": 1528 - }, - { - "epoch": 0.1838513797871701, - "grad_norm": 2.486400045525193, - "learning_rate": 3.7569337304320793e-06, - "loss": 0.7415, - "num_input_tokens_seen": 31996395, - "step": 1529 - }, - { - "epoch": 0.18397162267780917, - "grad_norm": 0.8670439149400267, - "learning_rate": 3.756561401976899e-06, - "loss": 0.6564, - "num_input_tokens_seen": 32055820, - "step": 1530 - }, - { - "epoch": 0.18409186556844825, - "grad_norm": 2.2571183394544994, - "learning_rate": 3.7561888070527514e-06, - "loss": 0.8182, - "num_input_tokens_seen": 32077580, - "step": 1531 - }, - { - "epoch": 0.18421210845908736, - "grad_norm": 2.324072075766497, - "learning_rate": 3.7558159457161577e-06, - "loss": 0.7989, - "num_input_tokens_seen": 32095265, - "step": 1532 - }, - { - "epoch": 0.18433235134972645, - "grad_norm": 2.5874483375876918, - "learning_rate": 3.755442818023681e-06, - "loss": 0.7754, - "num_input_tokens_seen": 32114610, - "step": 1533 - }, - { - "epoch": 0.18445259424036553, - "grad_norm": 2.1318883676994735, - "learning_rate": 3.7550694240319246e-06, - "loss": 0.7596, - "num_input_tokens_seen": 32132205, - "step": 1534 - }, - { - "epoch": 0.18457283713100464, - "grad_norm": 2.292800877515025, - "learning_rate": 3.7546957637975326e-06, - "loss": 0.7752, - "num_input_tokens_seen": 32149335, - "step": 1535 - }, - { - "epoch": 0.18469308002164372, - "grad_norm": 1.676362903220582, - "learning_rate": 3.7543218373771873e-06, - "loss": 0.7427, - "num_input_tokens_seen": 32168380, - "step": 1536 - }, - { - "epoch": 0.1848133229122828, - "grad_norm": 1.3722554624399752, - "learning_rate": 3.7539476448276145e-06, - "loss": 0.779, - "num_input_tokens_seen": 32191560, - "step": 1537 - }, - { - "epoch": 0.1849335658029219, - "grad_norm": 0.8277266087378189, - "learning_rate": 3.753573186205579e-06, - "loss": 0.5996, - "num_input_tokens_seen": 32259400, - "step": 1538 - }, - { - "epoch": 0.185053808693561, - "grad_norm": 2.5593223668236327, - "learning_rate": 3.753198461567885e-06, - "loss": 0.7781, - "num_input_tokens_seen": 32276365, - "step": 1539 - }, - { - "epoch": 0.18517405158420008, - "grad_norm": 1.8610055825487877, - "learning_rate": 3.7528234709713783e-06, - "loss": 0.9165, - "num_input_tokens_seen": 32298830, - "step": 1540 - }, - { - "epoch": 0.18529429447483917, - "grad_norm": 2.1749754840799818, - "learning_rate": 3.7524482144729447e-06, - "loss": 0.8385, - "num_input_tokens_seen": 32318005, - "step": 1541 - }, - { - "epoch": 0.18541453736547828, - "grad_norm": 2.2872127328970913, - "learning_rate": 3.7520726921295106e-06, - "loss": 0.8395, - "num_input_tokens_seen": 32334445, - "step": 1542 - }, - { - "epoch": 0.18553478025611736, - "grad_norm": 1.981363897263376, - "learning_rate": 3.751696903998042e-06, - "loss": 0.7258, - "num_input_tokens_seen": 32352800, - "step": 1543 - }, - { - "epoch": 0.18565502314675644, - "grad_norm": 1.699634041629413, - "learning_rate": 3.7513208501355456e-06, - "loss": 0.6974, - "num_input_tokens_seen": 32373625, - "step": 1544 - }, - { - "epoch": 0.18577526603739553, - "grad_norm": 1.969548087294321, - "learning_rate": 3.750944530599069e-06, - "loss": 0.8388, - "num_input_tokens_seen": 32392915, - "step": 1545 - }, - { - "epoch": 0.18589550892803464, - "grad_norm": 2.5525588910784704, - "learning_rate": 3.7505679454456992e-06, - "loss": 0.8125, - "num_input_tokens_seen": 32409245, - "step": 1546 - }, - { - "epoch": 0.18601575181867372, - "grad_norm": 2.39695210265176, - "learning_rate": 3.750191094732564e-06, - "loss": 0.6997, - "num_input_tokens_seen": 32429830, - "step": 1547 - }, - { - "epoch": 0.1861359947093128, - "grad_norm": 2.601566560511127, - "learning_rate": 3.749813978516831e-06, - "loss": 0.7416, - "num_input_tokens_seen": 32450155, - "step": 1548 - }, - { - "epoch": 0.1862562375999519, - "grad_norm": 1.8289719031512919, - "learning_rate": 3.749436596855709e-06, - "loss": 0.7796, - "num_input_tokens_seen": 32469175, - "step": 1549 - }, - { - "epoch": 0.186376480490591, - "grad_norm": 1.8658897214878047, - "learning_rate": 3.749058949806446e-06, - "loss": 0.9114, - "num_input_tokens_seen": 32485620, - "step": 1550 - }, - { - "epoch": 0.18649672338123008, - "grad_norm": 1.9598513917463747, - "learning_rate": 3.748681037426331e-06, - "loss": 0.833, - "num_input_tokens_seen": 32504550, - "step": 1551 - }, - { - "epoch": 0.1866169662718692, - "grad_norm": 2.3075000493749407, - "learning_rate": 3.748302859772693e-06, - "loss": 0.913, - "num_input_tokens_seen": 32521040, - "step": 1552 - }, - { - "epoch": 0.18673720916250827, - "grad_norm": 2.673151895887735, - "learning_rate": 3.7479244169029017e-06, - "loss": 0.619, - "num_input_tokens_seen": 32540550, - "step": 1553 - }, - { - "epoch": 0.18685745205314735, - "grad_norm": 3.721390958786071, - "learning_rate": 3.7475457088743658e-06, - "loss": 0.7414, - "num_input_tokens_seen": 32557520, - "step": 1554 - }, - { - "epoch": 0.18697769494378644, - "grad_norm": 3.6636415545223953, - "learning_rate": 3.7471667357445348e-06, - "loss": 0.7419, - "num_input_tokens_seen": 32577070, - "step": 1555 - }, - { - "epoch": 0.18709793783442555, - "grad_norm": 2.137534855952005, - "learning_rate": 3.7467874975709003e-06, - "loss": 0.7225, - "num_input_tokens_seen": 32597595, - "step": 1556 - }, - { - "epoch": 0.18721818072506463, - "grad_norm": 2.8209618866363457, - "learning_rate": 3.7464079944109904e-06, - "loss": 0.7827, - "num_input_tokens_seen": 32619175, - "step": 1557 - }, - { - "epoch": 0.18733842361570371, - "grad_norm": 2.42780704004583, - "learning_rate": 3.746028226322376e-06, - "loss": 0.7748, - "num_input_tokens_seen": 32634775, - "step": 1558 - }, - { - "epoch": 0.18745866650634282, - "grad_norm": 2.1734248595822696, - "learning_rate": 3.7456481933626686e-06, - "loss": 0.7576, - "num_input_tokens_seen": 32653850, - "step": 1559 - }, - { - "epoch": 0.1875789093969819, - "grad_norm": 3.9478239557000108, - "learning_rate": 3.745267895589518e-06, - "loss": 0.7294, - "num_input_tokens_seen": 32672110, - "step": 1560 - }, - { - "epoch": 0.187699152287621, - "grad_norm": 1.9869960281018462, - "learning_rate": 3.7448873330606154e-06, - "loss": 0.8117, - "num_input_tokens_seen": 32689600, - "step": 1561 - }, - { - "epoch": 0.18781939517826007, - "grad_norm": 2.5533182962354406, - "learning_rate": 3.7445065058336914e-06, - "loss": 0.8758, - "num_input_tokens_seen": 32708190, - "step": 1562 - }, - { - "epoch": 0.18793963806889918, - "grad_norm": 2.0350589320206147, - "learning_rate": 3.7441254139665176e-06, - "loss": 0.8614, - "num_input_tokens_seen": 32724095, - "step": 1563 - }, - { - "epoch": 0.18805988095953827, - "grad_norm": 2.07771725044608, - "learning_rate": 3.743744057516905e-06, - "loss": 0.8237, - "num_input_tokens_seen": 32741875, - "step": 1564 - }, - { - "epoch": 0.18818012385017735, - "grad_norm": 3.5048968201694155, - "learning_rate": 3.743362436542706e-06, - "loss": 0.8811, - "num_input_tokens_seen": 32756285, - "step": 1565 - }, - { - "epoch": 0.18830036674081646, - "grad_norm": 2.218619394655088, - "learning_rate": 3.7429805511018115e-06, - "loss": 0.7671, - "num_input_tokens_seen": 32777665, - "step": 1566 - }, - { - "epoch": 0.18842060963145554, - "grad_norm": 1.8716675557089433, - "learning_rate": 3.742598401252153e-06, - "loss": 0.7753, - "num_input_tokens_seen": 32797585, - "step": 1567 - }, - { - "epoch": 0.18854085252209463, - "grad_norm": 0.7568653270311523, - "learning_rate": 3.7422159870517025e-06, - "loss": 0.6264, - "num_input_tokens_seen": 32862560, - "step": 1568 - }, - { - "epoch": 0.1886610954127337, - "grad_norm": 1.6156463420438107, - "learning_rate": 3.7418333085584717e-06, - "loss": 0.7854, - "num_input_tokens_seen": 32883465, - "step": 1569 - }, - { - "epoch": 0.18878133830337282, - "grad_norm": 2.020256887929179, - "learning_rate": 3.7414503658305128e-06, - "loss": 0.914, - "num_input_tokens_seen": 32900420, - "step": 1570 - }, - { - "epoch": 0.1889015811940119, - "grad_norm": 6.8800146059166, - "learning_rate": 3.7410671589259185e-06, - "loss": 0.773, - "num_input_tokens_seen": 32918740, - "step": 1571 - }, - { - "epoch": 0.18902182408465099, - "grad_norm": 1.8617271403653202, - "learning_rate": 3.7406836879028205e-06, - "loss": 0.7906, - "num_input_tokens_seen": 32938685, - "step": 1572 - }, - { - "epoch": 0.1891420669752901, - "grad_norm": 2.1303595084970004, - "learning_rate": 3.7402999528193907e-06, - "loss": 0.7732, - "num_input_tokens_seen": 32957905, - "step": 1573 - }, - { - "epoch": 0.18926230986592918, - "grad_norm": 4.408069293320368, - "learning_rate": 3.739915953733842e-06, - "loss": 0.8498, - "num_input_tokens_seen": 32975670, - "step": 1574 - }, - { - "epoch": 0.18938255275656826, - "grad_norm": 3.2798886078506246, - "learning_rate": 3.7395316907044264e-06, - "loss": 0.8096, - "num_input_tokens_seen": 32996175, - "step": 1575 - }, - { - "epoch": 0.18950279564720737, - "grad_norm": 1.8304855598548924, - "learning_rate": 3.7391471637894364e-06, - "loss": 0.7841, - "num_input_tokens_seen": 33018160, - "step": 1576 - }, - { - "epoch": 0.18962303853784646, - "grad_norm": 5.080045571426592, - "learning_rate": 3.7387623730472046e-06, - "loss": 0.8523, - "num_input_tokens_seen": 33037800, - "step": 1577 - }, - { - "epoch": 0.18974328142848554, - "grad_norm": 1.657702952235094, - "learning_rate": 3.738377318536103e-06, - "loss": 0.8264, - "num_input_tokens_seen": 33057405, - "step": 1578 - }, - { - "epoch": 0.18986352431912462, - "grad_norm": 2.556705142139931, - "learning_rate": 3.7379920003145447e-06, - "loss": 0.7176, - "num_input_tokens_seen": 33071400, - "step": 1579 - }, - { - "epoch": 0.18998376720976373, - "grad_norm": 3.5880439538801547, - "learning_rate": 3.7376064184409817e-06, - "loss": 0.8379, - "num_input_tokens_seen": 33090700, - "step": 1580 - }, - { - "epoch": 0.19010401010040281, - "grad_norm": 1.747660119237241, - "learning_rate": 3.7372205729739063e-06, - "loss": 0.8632, - "num_input_tokens_seen": 33112235, - "step": 1581 - }, - { - "epoch": 0.1902242529910419, - "grad_norm": 2.2904560904065843, - "learning_rate": 3.7368344639718514e-06, - "loss": 0.7161, - "num_input_tokens_seen": 33129890, - "step": 1582 - }, - { - "epoch": 0.190344495881681, - "grad_norm": 1.6127607413479657, - "learning_rate": 3.7364480914933895e-06, - "loss": 0.8049, - "num_input_tokens_seen": 33149850, - "step": 1583 - }, - { - "epoch": 0.1904647387723201, - "grad_norm": 2.594618153342351, - "learning_rate": 3.7360614555971325e-06, - "loss": 0.8129, - "num_input_tokens_seen": 33169225, - "step": 1584 - }, - { - "epoch": 0.19058498166295917, - "grad_norm": 2.4926917021728574, - "learning_rate": 3.735674556341733e-06, - "loss": 0.8523, - "num_input_tokens_seen": 33188560, - "step": 1585 - }, - { - "epoch": 0.19070522455359826, - "grad_norm": 2.3769816338372762, - "learning_rate": 3.7352873937858835e-06, - "loss": 0.8316, - "num_input_tokens_seen": 33209815, - "step": 1586 - }, - { - "epoch": 0.19082546744423737, - "grad_norm": 2.150547181117884, - "learning_rate": 3.7348999679883155e-06, - "loss": 0.717, - "num_input_tokens_seen": 33227715, - "step": 1587 - }, - { - "epoch": 0.19094571033487645, - "grad_norm": 1.9412753327084251, - "learning_rate": 3.7345122790078026e-06, - "loss": 0.8399, - "num_input_tokens_seen": 33245000, - "step": 1588 - }, - { - "epoch": 0.19106595322551553, - "grad_norm": 2.31855575847196, - "learning_rate": 3.7341243269031556e-06, - "loss": 0.9352, - "num_input_tokens_seen": 33263710, - "step": 1589 - }, - { - "epoch": 0.19118619611615464, - "grad_norm": 1.9440815032483427, - "learning_rate": 3.7337361117332275e-06, - "loss": 0.7733, - "num_input_tokens_seen": 33285170, - "step": 1590 - }, - { - "epoch": 0.19130643900679373, - "grad_norm": 2.0384982790293864, - "learning_rate": 3.7333476335569087e-06, - "loss": 0.7621, - "num_input_tokens_seen": 33302890, - "step": 1591 - }, - { - "epoch": 0.1914266818974328, - "grad_norm": 2.499944301921766, - "learning_rate": 3.7329588924331325e-06, - "loss": 0.6631, - "num_input_tokens_seen": 33323815, - "step": 1592 - }, - { - "epoch": 0.1915469247880719, - "grad_norm": 1.8056664987383, - "learning_rate": 3.732569888420871e-06, - "loss": 0.8186, - "num_input_tokens_seen": 33343070, - "step": 1593 - }, - { - "epoch": 0.191667167678711, - "grad_norm": 2.727366731428961, - "learning_rate": 3.732180621579134e-06, - "loss": 0.8272, - "num_input_tokens_seen": 33362005, - "step": 1594 - }, - { - "epoch": 0.1917874105693501, - "grad_norm": 2.4255683870081017, - "learning_rate": 3.731791091966974e-06, - "loss": 0.8092, - "num_input_tokens_seen": 33382920, - "step": 1595 - }, - { - "epoch": 0.19190765345998917, - "grad_norm": 2.680562863715639, - "learning_rate": 3.7314012996434826e-06, - "loss": 0.7626, - "num_input_tokens_seen": 33401370, - "step": 1596 - }, - { - "epoch": 0.19202789635062828, - "grad_norm": 1.9848195910386617, - "learning_rate": 3.7310112446677907e-06, - "loss": 0.8062, - "num_input_tokens_seen": 33419000, - "step": 1597 - }, - { - "epoch": 0.19214813924126736, - "grad_norm": 2.7456396578817506, - "learning_rate": 3.7306209270990695e-06, - "loss": 0.6846, - "num_input_tokens_seen": 33436725, - "step": 1598 - }, - { - "epoch": 0.19226838213190645, - "grad_norm": 3.2930485429403435, - "learning_rate": 3.730230346996529e-06, - "loss": 0.8623, - "num_input_tokens_seen": 33455985, - "step": 1599 - }, - { - "epoch": 0.19238862502254553, - "grad_norm": 11.831647017186912, - "learning_rate": 3.7298395044194206e-06, - "loss": 0.7112, - "num_input_tokens_seen": 33474515, - "step": 1600 - }, - { - "epoch": 0.19250886791318464, - "grad_norm": 2.237600805877356, - "learning_rate": 3.7294483994270356e-06, - "loss": 0.9328, - "num_input_tokens_seen": 33492560, - "step": 1601 - }, - { - "epoch": 0.19262911080382372, - "grad_norm": 2.1683943949036655, - "learning_rate": 3.7290570320787033e-06, - "loss": 0.7714, - "num_input_tokens_seen": 33511860, - "step": 1602 - }, - { - "epoch": 0.1927493536944628, - "grad_norm": 1.99225558453428, - "learning_rate": 3.728665402433793e-06, - "loss": 0.7078, - "num_input_tokens_seen": 33530150, - "step": 1603 - }, - { - "epoch": 0.19286959658510192, - "grad_norm": 3.7084352496584523, - "learning_rate": 3.7282735105517164e-06, - "loss": 0.8591, - "num_input_tokens_seen": 33547995, - "step": 1604 - }, - { - "epoch": 0.192989839475741, - "grad_norm": 3.0591177755805847, - "learning_rate": 3.727881356491922e-06, - "loss": 0.6855, - "num_input_tokens_seen": 33566125, - "step": 1605 - }, - { - "epoch": 0.19311008236638008, - "grad_norm": 2.484198078876229, - "learning_rate": 3.7274889403139002e-06, - "loss": 0.7461, - "num_input_tokens_seen": 33583470, - "step": 1606 - }, - { - "epoch": 0.1932303252570192, - "grad_norm": 2.55931531512891, - "learning_rate": 3.727096262077179e-06, - "loss": 0.7837, - "num_input_tokens_seen": 33602185, - "step": 1607 - }, - { - "epoch": 0.19335056814765827, - "grad_norm": 2.1256388651417333, - "learning_rate": 3.7267033218413285e-06, - "loss": 0.8565, - "num_input_tokens_seen": 33619700, - "step": 1608 - }, - { - "epoch": 0.19347081103829736, - "grad_norm": 2.1351915308048968, - "learning_rate": 3.726310119665957e-06, - "loss": 0.8084, - "num_input_tokens_seen": 33635755, - "step": 1609 - }, - { - "epoch": 0.19359105392893644, - "grad_norm": 1.7371156523740492, - "learning_rate": 3.725916655610713e-06, - "loss": 0.8533, - "num_input_tokens_seen": 33654805, - "step": 1610 - }, - { - "epoch": 0.19371129681957555, - "grad_norm": 3.3087696257322485, - "learning_rate": 3.725522929735284e-06, - "loss": 0.754, - "num_input_tokens_seen": 33671460, - "step": 1611 - }, - { - "epoch": 0.19383153971021463, - "grad_norm": 15.445245229828124, - "learning_rate": 3.725128942099399e-06, - "loss": 0.7426, - "num_input_tokens_seen": 33691580, - "step": 1612 - }, - { - "epoch": 0.19395178260085372, - "grad_norm": 1.7887488418049415, - "learning_rate": 3.7247346927628245e-06, - "loss": 0.8037, - "num_input_tokens_seen": 33711235, - "step": 1613 - }, - { - "epoch": 0.19407202549149283, - "grad_norm": 2.262937169285758, - "learning_rate": 3.7243401817853694e-06, - "loss": 0.7811, - "num_input_tokens_seen": 33731645, - "step": 1614 - }, - { - "epoch": 0.1941922683821319, - "grad_norm": 1.8610910742503932, - "learning_rate": 3.723945409226879e-06, - "loss": 0.729, - "num_input_tokens_seen": 33749855, - "step": 1615 - }, - { - "epoch": 0.194312511272771, - "grad_norm": 2.278757183184574, - "learning_rate": 3.723550375147241e-06, - "loss": 0.8015, - "num_input_tokens_seen": 33764350, - "step": 1616 - }, - { - "epoch": 0.19443275416341008, - "grad_norm": 2.312731662091948, - "learning_rate": 3.723155079606381e-06, - "loss": 0.8052, - "num_input_tokens_seen": 33784080, - "step": 1617 - }, - { - "epoch": 0.1945529970540492, - "grad_norm": 1.7175504066804508, - "learning_rate": 3.722759522664266e-06, - "loss": 0.6448, - "num_input_tokens_seen": 33801100, - "step": 1618 - }, - { - "epoch": 0.19467323994468827, - "grad_norm": 1.8096838385150495, - "learning_rate": 3.7223637043809016e-06, - "loss": 0.8033, - "num_input_tokens_seen": 33819800, - "step": 1619 - }, - { - "epoch": 0.19479348283532735, - "grad_norm": 2.0151905275269693, - "learning_rate": 3.7219676248163322e-06, - "loss": 0.8569, - "num_input_tokens_seen": 33836685, - "step": 1620 - }, - { - "epoch": 0.19491372572596646, - "grad_norm": 1.993810459595368, - "learning_rate": 3.7215712840306428e-06, - "loss": 0.9196, - "num_input_tokens_seen": 33856215, - "step": 1621 - }, - { - "epoch": 0.19503396861660555, - "grad_norm": 2.0166029183667162, - "learning_rate": 3.721174682083959e-06, - "loss": 0.7883, - "num_input_tokens_seen": 33873030, - "step": 1622 - }, - { - "epoch": 0.19515421150724463, - "grad_norm": 1.7962274802802138, - "learning_rate": 3.7207778190364437e-06, - "loss": 0.8138, - "num_input_tokens_seen": 33891175, - "step": 1623 - }, - { - "epoch": 0.1952744543978837, - "grad_norm": 2.062505413255298, - "learning_rate": 3.720380694948302e-06, - "loss": 0.7386, - "num_input_tokens_seen": 33913780, - "step": 1624 - }, - { - "epoch": 0.19539469728852282, - "grad_norm": 1.0062495249915844, - "learning_rate": 3.719983309879777e-06, - "loss": 0.7392, - "num_input_tokens_seen": 33973280, - "step": 1625 - }, - { - "epoch": 0.1955149401791619, - "grad_norm": 1.968081626200981, - "learning_rate": 3.719585663891151e-06, - "loss": 0.7791, - "num_input_tokens_seen": 33990535, - "step": 1626 - }, - { - "epoch": 0.195635183069801, - "grad_norm": 2.145030036503394, - "learning_rate": 3.719187757042747e-06, - "loss": 0.7844, - "num_input_tokens_seen": 34008075, - "step": 1627 - }, - { - "epoch": 0.1957554259604401, - "grad_norm": 0.8194014281998503, - "learning_rate": 3.7187895893949275e-06, - "loss": 0.5991, - "num_input_tokens_seen": 34074265, - "step": 1628 - }, - { - "epoch": 0.19587566885107918, - "grad_norm": 2.365389004919559, - "learning_rate": 3.7183911610080937e-06, - "loss": 0.7569, - "num_input_tokens_seen": 34090850, - "step": 1629 - }, - { - "epoch": 0.19599591174171827, - "grad_norm": 2.6153362527154163, - "learning_rate": 3.7179924719426872e-06, - "loss": 0.7498, - "num_input_tokens_seen": 34108465, - "step": 1630 - }, - { - "epoch": 0.19611615463235738, - "grad_norm": 7.721002012720539, - "learning_rate": 3.7175935222591885e-06, - "loss": 0.758, - "num_input_tokens_seen": 34127485, - "step": 1631 - }, - { - "epoch": 0.19623639752299646, - "grad_norm": 1.8716485317669629, - "learning_rate": 3.717194312018118e-06, - "loss": 0.7481, - "num_input_tokens_seen": 34146190, - "step": 1632 - }, - { - "epoch": 0.19635664041363554, - "grad_norm": 2.1600212081022145, - "learning_rate": 3.716794841280036e-06, - "loss": 0.7621, - "num_input_tokens_seen": 34164615, - "step": 1633 - }, - { - "epoch": 0.19647688330427462, - "grad_norm": 2.632772414073602, - "learning_rate": 3.7163951101055407e-06, - "loss": 0.7763, - "num_input_tokens_seen": 34182395, - "step": 1634 - }, - { - "epoch": 0.19659712619491373, - "grad_norm": 1.8967059228638845, - "learning_rate": 3.715995118555273e-06, - "loss": 0.7919, - "num_input_tokens_seen": 34202090, - "step": 1635 - }, - { - "epoch": 0.19671736908555282, - "grad_norm": 2.785715465416871, - "learning_rate": 3.7155948666899095e-06, - "loss": 0.855, - "num_input_tokens_seen": 34220670, - "step": 1636 - }, - { - "epoch": 0.1968376119761919, - "grad_norm": 1.959296963404671, - "learning_rate": 3.715194354570169e-06, - "loss": 0.7715, - "num_input_tokens_seen": 34240395, - "step": 1637 - }, - { - "epoch": 0.196957854866831, - "grad_norm": 2.5559562882847415, - "learning_rate": 3.714793582256809e-06, - "loss": 0.828, - "num_input_tokens_seen": 34257180, - "step": 1638 - }, - { - "epoch": 0.1970780977574701, - "grad_norm": 2.6840248804523643, - "learning_rate": 3.7143925498106253e-06, - "loss": 0.8388, - "num_input_tokens_seen": 34275440, - "step": 1639 - }, - { - "epoch": 0.19719834064810918, - "grad_norm": 2.040932250147423, - "learning_rate": 3.7139912572924558e-06, - "loss": 0.7885, - "num_input_tokens_seen": 34294190, - "step": 1640 - }, - { - "epoch": 0.19731858353874826, - "grad_norm": 2.6976264910270173, - "learning_rate": 3.7135897047631744e-06, - "loss": 0.8119, - "num_input_tokens_seen": 34311795, - "step": 1641 - }, - { - "epoch": 0.19743882642938737, - "grad_norm": 2.2230732259828865, - "learning_rate": 3.713187892283698e-06, - "loss": 0.7511, - "num_input_tokens_seen": 34331125, - "step": 1642 - }, - { - "epoch": 0.19755906932002645, - "grad_norm": 2.309092914572269, - "learning_rate": 3.7127858199149796e-06, - "loss": 0.8655, - "num_input_tokens_seen": 34346705, - "step": 1643 - }, - { - "epoch": 0.19767931221066554, - "grad_norm": 2.1362879798713603, - "learning_rate": 3.712383487718015e-06, - "loss": 0.7879, - "num_input_tokens_seen": 34364665, - "step": 1644 - }, - { - "epoch": 0.19779955510130465, - "grad_norm": 1.820599807234752, - "learning_rate": 3.7119808957538365e-06, - "loss": 0.8645, - "num_input_tokens_seen": 34383380, - "step": 1645 - }, - { - "epoch": 0.19791979799194373, - "grad_norm": 1.8362251314843832, - "learning_rate": 3.711578044083517e-06, - "loss": 0.8006, - "num_input_tokens_seen": 34399900, - "step": 1646 - }, - { - "epoch": 0.1980400408825828, - "grad_norm": 1.85253181699229, - "learning_rate": 3.7111749327681694e-06, - "loss": 0.7443, - "num_input_tokens_seen": 34419655, - "step": 1647 - }, - { - "epoch": 0.1981602837732219, - "grad_norm": 2.2160189791189353, - "learning_rate": 3.7107715618689455e-06, - "loss": 0.862, - "num_input_tokens_seen": 34438350, - "step": 1648 - }, - { - "epoch": 0.198280526663861, - "grad_norm": 1.431735504671451, - "learning_rate": 3.710367931447035e-06, - "loss": 0.8286, - "num_input_tokens_seen": 34459850, - "step": 1649 - }, - { - "epoch": 0.1984007695545001, - "grad_norm": 2.3948659806251933, - "learning_rate": 3.7099640415636695e-06, - "loss": 0.8616, - "num_input_tokens_seen": 34479205, - "step": 1650 - }, - { - "epoch": 0.19852101244513917, - "grad_norm": 1.7392946334656993, - "learning_rate": 3.7095598922801187e-06, - "loss": 0.722, - "num_input_tokens_seen": 34501000, - "step": 1651 - }, - { - "epoch": 0.19864125533577828, - "grad_norm": 1.993913314530379, - "learning_rate": 3.7091554836576914e-06, - "loss": 0.7495, - "num_input_tokens_seen": 34517395, - "step": 1652 - }, - { - "epoch": 0.19876149822641737, - "grad_norm": 1.983106055896671, - "learning_rate": 3.708750815757736e-06, - "loss": 0.8284, - "num_input_tokens_seen": 34537885, - "step": 1653 - }, - { - "epoch": 0.19888174111705645, - "grad_norm": 2.492828382722311, - "learning_rate": 3.7083458886416407e-06, - "loss": 0.7329, - "num_input_tokens_seen": 34556800, - "step": 1654 - }, - { - "epoch": 0.19900198400769553, - "grad_norm": 2.5221885356656517, - "learning_rate": 3.707940702370832e-06, - "loss": 0.8774, - "num_input_tokens_seen": 34577365, - "step": 1655 - }, - { - "epoch": 0.19912222689833464, - "grad_norm": 0.7797848497755522, - "learning_rate": 3.707535257006777e-06, - "loss": 0.5964, - "num_input_tokens_seen": 34642710, - "step": 1656 - }, - { - "epoch": 0.19924246978897373, - "grad_norm": 2.2654916235977716, - "learning_rate": 3.707129552610981e-06, - "loss": 0.8856, - "num_input_tokens_seen": 34661080, - "step": 1657 - }, - { - "epoch": 0.1993627126796128, - "grad_norm": 1.9222574941794814, - "learning_rate": 3.70672358924499e-06, - "loss": 0.7357, - "num_input_tokens_seen": 34680040, - "step": 1658 - }, - { - "epoch": 0.19948295557025192, - "grad_norm": 2.080149965547982, - "learning_rate": 3.706317366970386e-06, - "loss": 0.7813, - "num_input_tokens_seen": 34700760, - "step": 1659 - }, - { - "epoch": 0.199603198460891, - "grad_norm": 2.11917690030436, - "learning_rate": 3.705910885848795e-06, - "loss": 0.8374, - "num_input_tokens_seen": 34718855, - "step": 1660 - }, - { - "epoch": 0.19972344135153008, - "grad_norm": 1.9858654369525628, - "learning_rate": 3.705504145941879e-06, - "loss": 0.8424, - "num_input_tokens_seen": 34736745, - "step": 1661 - }, - { - "epoch": 0.1998436842421692, - "grad_norm": 1.9287523000063362, - "learning_rate": 3.7050971473113403e-06, - "loss": 0.7866, - "num_input_tokens_seen": 34756240, - "step": 1662 - }, - { - "epoch": 0.19996392713280828, - "grad_norm": 1.7340725728749529, - "learning_rate": 3.7046898900189196e-06, - "loss": 0.7956, - "num_input_tokens_seen": 34780295, - "step": 1663 - }, - { - "epoch": 0.20008417002344736, - "grad_norm": 3.0674734332445617, - "learning_rate": 3.704282374126398e-06, - "loss": 0.8377, - "num_input_tokens_seen": 34799695, - "step": 1664 - }, - { - "epoch": 0.20020441291408644, - "grad_norm": 1.6831879873041657, - "learning_rate": 3.703874599695595e-06, - "loss": 0.8753, - "num_input_tokens_seen": 34818760, - "step": 1665 - }, - { - "epoch": 0.20032465580472555, - "grad_norm": 2.67746229574641, - "learning_rate": 3.703466566788371e-06, - "loss": 0.7199, - "num_input_tokens_seen": 34837610, - "step": 1666 - }, - { - "epoch": 0.20044489869536464, - "grad_norm": 2.040016308116333, - "learning_rate": 3.703058275466622e-06, - "loss": 0.7426, - "num_input_tokens_seen": 34856565, - "step": 1667 - }, - { - "epoch": 0.20056514158600372, - "grad_norm": 1.688512910100869, - "learning_rate": 3.7026497257922877e-06, - "loss": 0.7754, - "num_input_tokens_seen": 34876595, - "step": 1668 - }, - { - "epoch": 0.20068538447664283, - "grad_norm": 1.619946769116153, - "learning_rate": 3.7022409178273436e-06, - "loss": 0.8427, - "num_input_tokens_seen": 34897295, - "step": 1669 - }, - { - "epoch": 0.2008056273672819, - "grad_norm": 1.7765693512836875, - "learning_rate": 3.7018318516338054e-06, - "loss": 0.7764, - "num_input_tokens_seen": 34916175, - "step": 1670 - }, - { - "epoch": 0.200925870257921, - "grad_norm": 2.190236093410836, - "learning_rate": 3.7014225272737284e-06, - "loss": 0.8159, - "num_input_tokens_seen": 34935120, - "step": 1671 - }, - { - "epoch": 0.20104611314856008, - "grad_norm": 2.3242927807428377, - "learning_rate": 3.7010129448092067e-06, - "loss": 0.7368, - "num_input_tokens_seen": 34951955, - "step": 1672 - }, - { - "epoch": 0.2011663560391992, - "grad_norm": 1.9562228173520317, - "learning_rate": 3.700603104302374e-06, - "loss": 0.7797, - "num_input_tokens_seen": 34971485, - "step": 1673 - }, - { - "epoch": 0.20128659892983827, - "grad_norm": 0.8988801713496993, - "learning_rate": 3.7001930058154027e-06, - "loss": 0.583, - "num_input_tokens_seen": 35036165, - "step": 1674 - }, - { - "epoch": 0.20140684182047736, - "grad_norm": 2.7150023880566065, - "learning_rate": 3.6997826494105037e-06, - "loss": 0.7982, - "num_input_tokens_seen": 35056330, - "step": 1675 - }, - { - "epoch": 0.20152708471111647, - "grad_norm": 2.1370815749037835, - "learning_rate": 3.6993720351499286e-06, - "loss": 0.697, - "num_input_tokens_seen": 35077175, - "step": 1676 - }, - { - "epoch": 0.20164732760175555, - "grad_norm": 1.9898782038150444, - "learning_rate": 3.6989611630959666e-06, - "loss": 0.7743, - "num_input_tokens_seen": 35095450, - "step": 1677 - }, - { - "epoch": 0.20176757049239463, - "grad_norm": 0.9131897228849142, - "learning_rate": 3.6985500333109474e-06, - "loss": 0.6156, - "num_input_tokens_seen": 35163500, - "step": 1678 - }, - { - "epoch": 0.20188781338303372, - "grad_norm": 2.425556843619448, - "learning_rate": 3.6981386458572385e-06, - "loss": 0.7614, - "num_input_tokens_seen": 35181195, - "step": 1679 - }, - { - "epoch": 0.20200805627367283, - "grad_norm": 2.9925600203932308, - "learning_rate": 3.6977270007972468e-06, - "loss": 0.7666, - "num_input_tokens_seen": 35198450, - "step": 1680 - }, - { - "epoch": 0.2021282991643119, - "grad_norm": 5.495708211916301, - "learning_rate": 3.6973150981934196e-06, - "loss": 0.7163, - "num_input_tokens_seen": 35219400, - "step": 1681 - }, - { - "epoch": 0.202248542054951, - "grad_norm": 2.638097405345924, - "learning_rate": 3.6969029381082415e-06, - "loss": 0.8319, - "num_input_tokens_seen": 35235115, - "step": 1682 - }, - { - "epoch": 0.2023687849455901, - "grad_norm": 1.8448858296220345, - "learning_rate": 3.6964905206042365e-06, - "loss": 0.7978, - "num_input_tokens_seen": 35253525, - "step": 1683 - }, - { - "epoch": 0.20248902783622919, - "grad_norm": 1.817013406531175, - "learning_rate": 3.696077845743968e-06, - "loss": 0.802, - "num_input_tokens_seen": 35272835, - "step": 1684 - }, - { - "epoch": 0.20260927072686827, - "grad_norm": 2.3667986526461675, - "learning_rate": 3.69566491359004e-06, - "loss": 0.7302, - "num_input_tokens_seen": 35289200, - "step": 1685 - }, - { - "epoch": 0.20272951361750738, - "grad_norm": 1.8917840455382113, - "learning_rate": 3.695251724205092e-06, - "loss": 0.706, - "num_input_tokens_seen": 35313280, - "step": 1686 - }, - { - "epoch": 0.20284975650814646, - "grad_norm": 1.96974395180062, - "learning_rate": 3.6948382776518054e-06, - "loss": 0.8509, - "num_input_tokens_seen": 35333705, - "step": 1687 - }, - { - "epoch": 0.20296999939878554, - "grad_norm": 2.0377540080513974, - "learning_rate": 3.6944245739929e-06, - "loss": 0.7889, - "num_input_tokens_seen": 35349585, - "step": 1688 - }, - { - "epoch": 0.20309024228942463, - "grad_norm": 2.0466242480256893, - "learning_rate": 3.694010613291133e-06, - "loss": 0.7181, - "num_input_tokens_seen": 35366490, - "step": 1689 - }, - { - "epoch": 0.20321048518006374, - "grad_norm": 1.9592121639298246, - "learning_rate": 3.6935963956093037e-06, - "loss": 0.8798, - "num_input_tokens_seen": 35386295, - "step": 1690 - }, - { - "epoch": 0.20333072807070282, - "grad_norm": 1.6865200037692185, - "learning_rate": 3.6931819210102474e-06, - "loss": 0.6836, - "num_input_tokens_seen": 35405410, - "step": 1691 - }, - { - "epoch": 0.2034509709613419, - "grad_norm": 2.1367374524467926, - "learning_rate": 3.6927671895568402e-06, - "loss": 0.8421, - "num_input_tokens_seen": 35424190, - "step": 1692 - }, - { - "epoch": 0.20357121385198101, - "grad_norm": 1.9639575355473116, - "learning_rate": 3.692352201311996e-06, - "loss": 0.8704, - "num_input_tokens_seen": 35442760, - "step": 1693 - }, - { - "epoch": 0.2036914567426201, - "grad_norm": 2.1035098711192806, - "learning_rate": 3.6919369563386687e-06, - "loss": 0.7565, - "num_input_tokens_seen": 35462280, - "step": 1694 - }, - { - "epoch": 0.20381169963325918, - "grad_norm": 2.2386580809874848, - "learning_rate": 3.69152145469985e-06, - "loss": 0.7954, - "num_input_tokens_seen": 35479045, - "step": 1695 - }, - { - "epoch": 0.20393194252389826, - "grad_norm": 2.5156604878031694, - "learning_rate": 3.691105696458572e-06, - "loss": 0.8119, - "num_input_tokens_seen": 35496060, - "step": 1696 - }, - { - "epoch": 0.20405218541453737, - "grad_norm": 4.767966355245232, - "learning_rate": 3.690689681677904e-06, - "loss": 0.6739, - "num_input_tokens_seen": 35514250, - "step": 1697 - }, - { - "epoch": 0.20417242830517646, - "grad_norm": 1.88629218774707, - "learning_rate": 3.690273410420956e-06, - "loss": 0.8777, - "num_input_tokens_seen": 35533735, - "step": 1698 - }, - { - "epoch": 0.20429267119581554, - "grad_norm": 2.515516650736705, - "learning_rate": 3.689856882750875e-06, - "loss": 0.7702, - "num_input_tokens_seen": 35548655, - "step": 1699 - }, - { - "epoch": 0.20441291408645465, - "grad_norm": 1.696633191333543, - "learning_rate": 3.6894400987308486e-06, - "loss": 0.7829, - "num_input_tokens_seen": 35565895, - "step": 1700 - }, - { - "epoch": 0.20453315697709373, - "grad_norm": 2.621583851450548, - "learning_rate": 3.6890230584241024e-06, - "loss": 0.85, - "num_input_tokens_seen": 35582545, - "step": 1701 - }, - { - "epoch": 0.20465339986773282, - "grad_norm": 0.9313295863936389, - "learning_rate": 3.6886057618939016e-06, - "loss": 0.6997, - "num_input_tokens_seen": 35645085, - "step": 1702 - }, - { - "epoch": 0.2047736427583719, - "grad_norm": 2.166878862803526, - "learning_rate": 3.6881882092035492e-06, - "loss": 0.6989, - "num_input_tokens_seen": 35666190, - "step": 1703 - }, - { - "epoch": 0.204893885649011, - "grad_norm": 1.155973037261191, - "learning_rate": 3.6877704004163873e-06, - "loss": 0.6745, - "num_input_tokens_seen": 35726315, - "step": 1704 - }, - { - "epoch": 0.2050141285396501, - "grad_norm": 2.209310694153699, - "learning_rate": 3.687352335595798e-06, - "loss": 0.7788, - "num_input_tokens_seen": 35745035, - "step": 1705 - }, - { - "epoch": 0.20513437143028918, - "grad_norm": 1.0940077982506569, - "learning_rate": 3.686934014805201e-06, - "loss": 0.7194, - "num_input_tokens_seen": 35795385, - "step": 1706 - }, - { - "epoch": 0.20525461432092829, - "grad_norm": 2.9484250928601945, - "learning_rate": 3.6865154381080552e-06, - "loss": 0.8058, - "num_input_tokens_seen": 35815790, - "step": 1707 - }, - { - "epoch": 0.20537485721156737, - "grad_norm": 3.0038241820001104, - "learning_rate": 3.6860966055678585e-06, - "loss": 0.8181, - "num_input_tokens_seen": 35831865, - "step": 1708 - }, - { - "epoch": 0.20549510010220645, - "grad_norm": 1.7218405523027227, - "learning_rate": 3.685677517248147e-06, - "loss": 0.8546, - "num_input_tokens_seen": 35850475, - "step": 1709 - }, - { - "epoch": 0.20561534299284553, - "grad_norm": 1.8806139356743412, - "learning_rate": 3.6852581732124967e-06, - "loss": 0.7962, - "num_input_tokens_seen": 35867540, - "step": 1710 - }, - { - "epoch": 0.20573558588348465, - "grad_norm": 2.285485242152226, - "learning_rate": 3.6848385735245213e-06, - "loss": 0.7523, - "num_input_tokens_seen": 35886350, - "step": 1711 - }, - { - "epoch": 0.20585582877412373, - "grad_norm": 1.779203810836933, - "learning_rate": 3.6844187182478734e-06, - "loss": 0.8597, - "num_input_tokens_seen": 35906925, - "step": 1712 - }, - { - "epoch": 0.2059760716647628, - "grad_norm": 1.7618907288865344, - "learning_rate": 3.683998607446246e-06, - "loss": 0.7518, - "num_input_tokens_seen": 35925295, - "step": 1713 - }, - { - "epoch": 0.20609631455540192, - "grad_norm": 2.122945836973529, - "learning_rate": 3.6835782411833686e-06, - "loss": 0.742, - "num_input_tokens_seen": 35944535, - "step": 1714 - }, - { - "epoch": 0.206216557446041, - "grad_norm": 1.8805161860115227, - "learning_rate": 3.68315761952301e-06, - "loss": 0.7445, - "num_input_tokens_seen": 35961485, - "step": 1715 - }, - { - "epoch": 0.2063368003366801, - "grad_norm": 2.031283104399946, - "learning_rate": 3.6827367425289797e-06, - "loss": 0.8286, - "num_input_tokens_seen": 35980980, - "step": 1716 - }, - { - "epoch": 0.2064570432273192, - "grad_norm": 2.621724774069149, - "learning_rate": 3.6823156102651225e-06, - "loss": 0.7222, - "num_input_tokens_seen": 35998855, - "step": 1717 - }, - { - "epoch": 0.20657728611795828, - "grad_norm": 1.870451445254049, - "learning_rate": 3.6818942227953257e-06, - "loss": 0.7155, - "num_input_tokens_seen": 36019120, - "step": 1718 - }, - { - "epoch": 0.20669752900859736, - "grad_norm": 2.202148694033474, - "learning_rate": 3.681472580183512e-06, - "loss": 0.6899, - "num_input_tokens_seen": 36037490, - "step": 1719 - }, - { - "epoch": 0.20681777189923645, - "grad_norm": 2.2518439241820363, - "learning_rate": 3.6810506824936455e-06, - "loss": 0.86, - "num_input_tokens_seen": 36055290, - "step": 1720 - }, - { - "epoch": 0.20693801478987556, - "grad_norm": 1.1709678226815556, - "learning_rate": 3.680628529789726e-06, - "loss": 0.6612, - "num_input_tokens_seen": 36107420, - "step": 1721 - }, - { - "epoch": 0.20705825768051464, - "grad_norm": 1.9557401094809275, - "learning_rate": 3.680206122135796e-06, - "loss": 0.8575, - "num_input_tokens_seen": 36127745, - "step": 1722 - }, - { - "epoch": 0.20717850057115372, - "grad_norm": 1.6784370235543111, - "learning_rate": 3.6797834595959323e-06, - "loss": 0.7773, - "num_input_tokens_seen": 36147365, - "step": 1723 - }, - { - "epoch": 0.20729874346179283, - "grad_norm": 2.401998375912347, - "learning_rate": 3.679360542234254e-06, - "loss": 0.7718, - "num_input_tokens_seen": 36166430, - "step": 1724 - }, - { - "epoch": 0.20741898635243192, - "grad_norm": 1.624799421370761, - "learning_rate": 3.678937370114916e-06, - "loss": 0.7169, - "num_input_tokens_seen": 36185955, - "step": 1725 - }, - { - "epoch": 0.207539229243071, - "grad_norm": 1.9325096176461283, - "learning_rate": 3.678513943302114e-06, - "loss": 0.7833, - "num_input_tokens_seen": 36202450, - "step": 1726 - }, - { - "epoch": 0.20765947213371008, - "grad_norm": 1.6361413769553386, - "learning_rate": 3.6780902618600816e-06, - "loss": 0.8473, - "num_input_tokens_seen": 36221900, - "step": 1727 - }, - { - "epoch": 0.2077797150243492, - "grad_norm": 2.518197382896608, - "learning_rate": 3.6776663258530906e-06, - "loss": 0.7795, - "num_input_tokens_seen": 36240270, - "step": 1728 - }, - { - "epoch": 0.20789995791498828, - "grad_norm": 1.8278081803733086, - "learning_rate": 3.6772421353454516e-06, - "loss": 0.7095, - "num_input_tokens_seen": 36258585, - "step": 1729 - }, - { - "epoch": 0.20802020080562736, - "grad_norm": 1.901420879136906, - "learning_rate": 3.6768176904015153e-06, - "loss": 0.8723, - "num_input_tokens_seen": 36278110, - "step": 1730 - }, - { - "epoch": 0.20814044369626647, - "grad_norm": 1.9760401118248234, - "learning_rate": 3.6763929910856674e-06, - "loss": 0.5992, - "num_input_tokens_seen": 36296280, - "step": 1731 - }, - { - "epoch": 0.20826068658690555, - "grad_norm": 2.837331355067723, - "learning_rate": 3.6759680374623365e-06, - "loss": 0.7747, - "num_input_tokens_seen": 36313915, - "step": 1732 - }, - { - "epoch": 0.20838092947754464, - "grad_norm": 2.6931341563366087, - "learning_rate": 3.675542829595986e-06, - "loss": 0.751, - "num_input_tokens_seen": 36333300, - "step": 1733 - }, - { - "epoch": 0.20850117236818372, - "grad_norm": 1.5158485298869635, - "learning_rate": 3.6751173675511213e-06, - "loss": 0.7885, - "num_input_tokens_seen": 36355065, - "step": 1734 - }, - { - "epoch": 0.20862141525882283, - "grad_norm": 2.079115273145643, - "learning_rate": 3.674691651392283e-06, - "loss": 0.8725, - "num_input_tokens_seen": 36372455, - "step": 1735 - }, - { - "epoch": 0.2087416581494619, - "grad_norm": 2.1540501285476856, - "learning_rate": 3.674265681184053e-06, - "loss": 0.7638, - "num_input_tokens_seen": 36395435, - "step": 1736 - }, - { - "epoch": 0.208861901040101, - "grad_norm": 1.7858104004973228, - "learning_rate": 3.6738394569910504e-06, - "loss": 0.8643, - "num_input_tokens_seen": 36415695, - "step": 1737 - }, - { - "epoch": 0.2089821439307401, - "grad_norm": 2.1969425779859773, - "learning_rate": 3.6734129788779333e-06, - "loss": 0.827, - "num_input_tokens_seen": 36434590, - "step": 1738 - }, - { - "epoch": 0.2091023868213792, - "grad_norm": 1.7744586532390394, - "learning_rate": 3.6729862469093976e-06, - "loss": 0.8965, - "num_input_tokens_seen": 36453405, - "step": 1739 - }, - { - "epoch": 0.20922262971201827, - "grad_norm": 2.2876353711637107, - "learning_rate": 3.6725592611501782e-06, - "loss": 0.8223, - "num_input_tokens_seen": 36471800, - "step": 1740 - }, - { - "epoch": 0.20934287260265738, - "grad_norm": 1.7983044959309142, - "learning_rate": 3.672132021665049e-06, - "loss": 0.7587, - "num_input_tokens_seen": 36492135, - "step": 1741 - }, - { - "epoch": 0.20946311549329646, - "grad_norm": 2.2044283306396917, - "learning_rate": 3.6717045285188215e-06, - "loss": 0.8371, - "num_input_tokens_seen": 36509550, - "step": 1742 - }, - { - "epoch": 0.20958335838393555, - "grad_norm": 2.2578066303733966, - "learning_rate": 3.671276781776346e-06, - "loss": 0.8527, - "num_input_tokens_seen": 36527925, - "step": 1743 - }, - { - "epoch": 0.20970360127457463, - "grad_norm": 2.676591364821894, - "learning_rate": 3.6708487815025124e-06, - "loss": 0.6656, - "num_input_tokens_seen": 36548225, - "step": 1744 - }, - { - "epoch": 0.20982384416521374, - "grad_norm": 2.0946013542465525, - "learning_rate": 3.6704205277622463e-06, - "loss": 0.7485, - "num_input_tokens_seen": 36566385, - "step": 1745 - }, - { - "epoch": 0.20994408705585282, - "grad_norm": 1.665079232889727, - "learning_rate": 3.6699920206205146e-06, - "loss": 0.7932, - "num_input_tokens_seen": 36586845, - "step": 1746 - }, - { - "epoch": 0.2100643299464919, - "grad_norm": 1.676139984296906, - "learning_rate": 3.669563260142321e-06, - "loss": 0.8226, - "num_input_tokens_seen": 36605455, - "step": 1747 - }, - { - "epoch": 0.21018457283713102, - "grad_norm": 2.4656115651985586, - "learning_rate": 3.6691342463927083e-06, - "loss": 0.8423, - "num_input_tokens_seen": 36624170, - "step": 1748 - }, - { - "epoch": 0.2103048157277701, - "grad_norm": 2.165749739334828, - "learning_rate": 3.6687049794367574e-06, - "loss": 0.8116, - "num_input_tokens_seen": 36643985, - "step": 1749 - }, - { - "epoch": 0.21042505861840918, - "grad_norm": 1.9593749771324847, - "learning_rate": 3.668275459339588e-06, - "loss": 0.7849, - "num_input_tokens_seen": 36662185, - "step": 1750 - }, - { - "epoch": 0.21054530150904827, - "grad_norm": 1.8050103002391567, - "learning_rate": 3.667845686166358e-06, - "loss": 0.807, - "num_input_tokens_seen": 36678830, - "step": 1751 - }, - { - "epoch": 0.21066554439968738, - "grad_norm": 1.6123897525637478, - "learning_rate": 3.6674156599822634e-06, - "loss": 0.8518, - "num_input_tokens_seen": 36694345, - "step": 1752 - }, - { - "epoch": 0.21078578729032646, - "grad_norm": 5.378396038318722, - "learning_rate": 3.666985380852539e-06, - "loss": 0.8073, - "num_input_tokens_seen": 36713070, - "step": 1753 - }, - { - "epoch": 0.21090603018096554, - "grad_norm": 2.6517511505227205, - "learning_rate": 3.6665548488424576e-06, - "loss": 0.7486, - "num_input_tokens_seen": 36731550, - "step": 1754 - }, - { - "epoch": 0.21102627307160465, - "grad_norm": 1.762766041219322, - "learning_rate": 3.6661240640173307e-06, - "loss": 0.8715, - "num_input_tokens_seen": 36752740, - "step": 1755 - }, - { - "epoch": 0.21114651596224374, - "grad_norm": 0.9534343848868646, - "learning_rate": 3.665693026442508e-06, - "loss": 0.6392, - "num_input_tokens_seen": 36816505, - "step": 1756 - }, - { - "epoch": 0.21126675885288282, - "grad_norm": 1.8700503001592552, - "learning_rate": 3.665261736183378e-06, - "loss": 0.7586, - "num_input_tokens_seen": 36836260, - "step": 1757 - }, - { - "epoch": 0.2113870017435219, - "grad_norm": 3.9147321659655487, - "learning_rate": 3.664830193305366e-06, - "loss": 0.8845, - "num_input_tokens_seen": 36853755, - "step": 1758 - }, - { - "epoch": 0.211507244634161, - "grad_norm": 14.051091440765614, - "learning_rate": 3.6643983978739373e-06, - "loss": 0.7575, - "num_input_tokens_seen": 36870090, - "step": 1759 - }, - { - "epoch": 0.2116274875248001, - "grad_norm": 2.024336694811304, - "learning_rate": 3.6639663499545958e-06, - "loss": 0.8135, - "num_input_tokens_seen": 36889990, - "step": 1760 - }, - { - "epoch": 0.21174773041543918, - "grad_norm": 0.772616748576978, - "learning_rate": 3.6635340496128816e-06, - "loss": 0.6151, - "num_input_tokens_seen": 36946640, - "step": 1761 - }, - { - "epoch": 0.2118679733060783, - "grad_norm": 1.5823800804774302, - "learning_rate": 3.6631014969143747e-06, - "loss": 0.917, - "num_input_tokens_seen": 36966050, - "step": 1762 - }, - { - "epoch": 0.21198821619671737, - "grad_norm": 3.036715949957443, - "learning_rate": 3.6626686919246925e-06, - "loss": 0.8766, - "num_input_tokens_seen": 36986820, - "step": 1763 - }, - { - "epoch": 0.21210845908735645, - "grad_norm": 2.0885069023325116, - "learning_rate": 3.6622356347094927e-06, - "loss": 0.7151, - "num_input_tokens_seen": 37008105, - "step": 1764 - }, - { - "epoch": 0.21222870197799554, - "grad_norm": 2.5122354961197035, - "learning_rate": 3.6618023253344684e-06, - "loss": 0.7779, - "num_input_tokens_seen": 37026685, - "step": 1765 - }, - { - "epoch": 0.21234894486863465, - "grad_norm": 1.8589895640069298, - "learning_rate": 3.6613687638653522e-06, - "loss": 0.8277, - "num_input_tokens_seen": 37044575, - "step": 1766 - }, - { - "epoch": 0.21246918775927373, - "grad_norm": 1.7594423186919539, - "learning_rate": 3.660934950367916e-06, - "loss": 0.7754, - "num_input_tokens_seen": 37063540, - "step": 1767 - }, - { - "epoch": 0.21258943064991281, - "grad_norm": 1.7019040773505065, - "learning_rate": 3.660500884907968e-06, - "loss": 0.8281, - "num_input_tokens_seen": 37084000, - "step": 1768 - }, - { - "epoch": 0.21270967354055192, - "grad_norm": 0.8558150428626566, - "learning_rate": 3.660066567551356e-06, - "loss": 0.625, - "num_input_tokens_seen": 37143865, - "step": 1769 - }, - { - "epoch": 0.212829916431191, - "grad_norm": 2.3487661698096005, - "learning_rate": 3.6596319983639657e-06, - "loss": 0.8221, - "num_input_tokens_seen": 37162165, - "step": 1770 - }, - { - "epoch": 0.2129501593218301, - "grad_norm": 1.5846460651320358, - "learning_rate": 3.659197177411721e-06, - "loss": 0.855, - "num_input_tokens_seen": 37184860, - "step": 1771 - }, - { - "epoch": 0.2130704022124692, - "grad_norm": 2.1984613137248874, - "learning_rate": 3.6587621047605833e-06, - "loss": 0.8099, - "num_input_tokens_seen": 37201750, - "step": 1772 - }, - { - "epoch": 0.21319064510310828, - "grad_norm": 2.3392578497592074, - "learning_rate": 3.6583267804765542e-06, - "loss": 0.8674, - "num_input_tokens_seen": 37215805, - "step": 1773 - }, - { - "epoch": 0.21331088799374737, - "grad_norm": 2.6227986588778824, - "learning_rate": 3.65789120462567e-06, - "loss": 0.8494, - "num_input_tokens_seen": 37234045, - "step": 1774 - }, - { - "epoch": 0.21343113088438645, - "grad_norm": 2.361543801373876, - "learning_rate": 3.6574553772740083e-06, - "loss": 0.7623, - "num_input_tokens_seen": 37251695, - "step": 1775 - }, - { - "epoch": 0.21355137377502556, - "grad_norm": 0.8780642632998993, - "learning_rate": 3.657019298487684e-06, - "loss": 0.6593, - "num_input_tokens_seen": 37316425, - "step": 1776 - }, - { - "epoch": 0.21367161666566464, - "grad_norm": 1.675448065795171, - "learning_rate": 3.6565829683328495e-06, - "loss": 0.8283, - "num_input_tokens_seen": 37338770, - "step": 1777 - }, - { - "epoch": 0.21379185955630373, - "grad_norm": 2.0976154727276493, - "learning_rate": 3.656146386875696e-06, - "loss": 0.8529, - "num_input_tokens_seen": 37357190, - "step": 1778 - }, - { - "epoch": 0.21391210244694284, - "grad_norm": 2.32082506889008, - "learning_rate": 3.6557095541824527e-06, - "loss": 0.7725, - "num_input_tokens_seen": 37377250, - "step": 1779 - }, - { - "epoch": 0.21403234533758192, - "grad_norm": 1.7766199493748616, - "learning_rate": 3.6552724703193855e-06, - "loss": 0.8421, - "num_input_tokens_seen": 37394160, - "step": 1780 - }, - { - "epoch": 0.214152588228221, - "grad_norm": 0.7985597271306865, - "learning_rate": 3.654835135352801e-06, - "loss": 0.5794, - "num_input_tokens_seen": 37448690, - "step": 1781 - }, - { - "epoch": 0.21427283111886009, - "grad_norm": 1.7653736663925563, - "learning_rate": 3.6543975493490424e-06, - "loss": 0.8677, - "num_input_tokens_seen": 37465785, - "step": 1782 - }, - { - "epoch": 0.2143930740094992, - "grad_norm": 1.962107813728839, - "learning_rate": 3.653959712374491e-06, - "loss": 0.7514, - "num_input_tokens_seen": 37483610, - "step": 1783 - }, - { - "epoch": 0.21451331690013828, - "grad_norm": 1.6642107113663052, - "learning_rate": 3.6535216244955663e-06, - "loss": 0.8222, - "num_input_tokens_seen": 37503225, - "step": 1784 - }, - { - "epoch": 0.21463355979077736, - "grad_norm": 1.8214602724421463, - "learning_rate": 3.6530832857787253e-06, - "loss": 0.7053, - "num_input_tokens_seen": 37524315, - "step": 1785 - }, - { - "epoch": 0.21475380268141647, - "grad_norm": 2.176438180297038, - "learning_rate": 3.6526446962904653e-06, - "loss": 0.7991, - "num_input_tokens_seen": 37542750, - "step": 1786 - }, - { - "epoch": 0.21487404557205556, - "grad_norm": 1.7588275994460607, - "learning_rate": 3.652205856097318e-06, - "loss": 0.741, - "num_input_tokens_seen": 37565655, - "step": 1787 - }, - { - "epoch": 0.21499428846269464, - "grad_norm": 2.1164002231632706, - "learning_rate": 3.651766765265856e-06, - "loss": 0.7869, - "num_input_tokens_seen": 37582385, - "step": 1788 - }, - { - "epoch": 0.21511453135333372, - "grad_norm": 2.24463223110277, - "learning_rate": 3.65132742386269e-06, - "loss": 0.7997, - "num_input_tokens_seen": 37597325, - "step": 1789 - }, - { - "epoch": 0.21523477424397283, - "grad_norm": 1.715490416839966, - "learning_rate": 3.6508878319544656e-06, - "loss": 0.844, - "num_input_tokens_seen": 37617260, - "step": 1790 - }, - { - "epoch": 0.21535501713461191, - "grad_norm": 2.447114568916636, - "learning_rate": 3.65044798960787e-06, - "loss": 0.8061, - "num_input_tokens_seen": 37635320, - "step": 1791 - }, - { - "epoch": 0.215475260025251, - "grad_norm": 1.8517650271010826, - "learning_rate": 3.650007896889627e-06, - "loss": 0.7735, - "num_input_tokens_seen": 37653620, - "step": 1792 - }, - { - "epoch": 0.2155955029158901, - "grad_norm": 1.8689050831744136, - "learning_rate": 3.6495675538664974e-06, - "loss": 0.8053, - "num_input_tokens_seen": 37672355, - "step": 1793 - }, - { - "epoch": 0.2157157458065292, - "grad_norm": 1.6525833162112833, - "learning_rate": 3.649126960605282e-06, - "loss": 0.8183, - "num_input_tokens_seen": 37693060, - "step": 1794 - }, - { - "epoch": 0.21583598869716827, - "grad_norm": 2.279537363859075, - "learning_rate": 3.6486861171728174e-06, - "loss": 0.8315, - "num_input_tokens_seen": 37711175, - "step": 1795 - }, - { - "epoch": 0.21595623158780738, - "grad_norm": 1.7906862766897846, - "learning_rate": 3.64824502363598e-06, - "loss": 0.7831, - "num_input_tokens_seen": 37732750, - "step": 1796 - }, - { - "epoch": 0.21607647447844647, - "grad_norm": 3.01915372612889, - "learning_rate": 3.647803680061683e-06, - "loss": 0.7701, - "num_input_tokens_seen": 37752885, - "step": 1797 - }, - { - "epoch": 0.21619671736908555, - "grad_norm": 2.8847506398591807, - "learning_rate": 3.6473620865168776e-06, - "loss": 0.7598, - "num_input_tokens_seen": 37769475, - "step": 1798 - }, - { - "epoch": 0.21631696025972463, - "grad_norm": 2.1950726556323614, - "learning_rate": 3.646920243068554e-06, - "loss": 0.8177, - "num_input_tokens_seen": 37787090, - "step": 1799 - }, - { - "epoch": 0.21643720315036374, - "grad_norm": 1.6332063099557796, - "learning_rate": 3.6464781497837384e-06, - "loss": 0.737, - "num_input_tokens_seen": 37808785, - "step": 1800 - }, - { - "epoch": 0.21655744604100283, - "grad_norm": 1.6423910968289752, - "learning_rate": 3.6460358067294965e-06, - "loss": 0.7295, - "num_input_tokens_seen": 37829735, - "step": 1801 - }, - { - "epoch": 0.2166776889316419, - "grad_norm": 1.9761402097314489, - "learning_rate": 3.645593213972932e-06, - "loss": 0.7753, - "num_input_tokens_seen": 37848360, - "step": 1802 - }, - { - "epoch": 0.21679793182228102, - "grad_norm": 3.1337263419242958, - "learning_rate": 3.6451503715811852e-06, - "loss": 0.7982, - "num_input_tokens_seen": 37866390, - "step": 1803 - }, - { - "epoch": 0.2169181747129201, - "grad_norm": 2.3076125154929867, - "learning_rate": 3.6447072796214345e-06, - "loss": 0.7987, - "num_input_tokens_seen": 37884675, - "step": 1804 - }, - { - "epoch": 0.21703841760355919, - "grad_norm": 0.9544705238557111, - "learning_rate": 3.644263938160898e-06, - "loss": 0.6662, - "num_input_tokens_seen": 37940360, - "step": 1805 - }, - { - "epoch": 0.21715866049419827, - "grad_norm": 1.9577700934723972, - "learning_rate": 3.6438203472668293e-06, - "loss": 0.7089, - "num_input_tokens_seen": 37959725, - "step": 1806 - }, - { - "epoch": 0.21727890338483738, - "grad_norm": 2.0163355174307718, - "learning_rate": 3.6433765070065206e-06, - "loss": 0.8171, - "num_input_tokens_seen": 37977235, - "step": 1807 - }, - { - "epoch": 0.21739914627547646, - "grad_norm": 2.1502965419176037, - "learning_rate": 3.6429324174473025e-06, - "loss": 0.8748, - "num_input_tokens_seen": 37990495, - "step": 1808 - }, - { - "epoch": 0.21751938916611555, - "grad_norm": 1.9979477753045656, - "learning_rate": 3.6424880786565425e-06, - "loss": 0.8512, - "num_input_tokens_seen": 38006360, - "step": 1809 - }, - { - "epoch": 0.21763963205675466, - "grad_norm": 2.098481125344564, - "learning_rate": 3.642043490701648e-06, - "loss": 0.7949, - "num_input_tokens_seen": 38025770, - "step": 1810 - }, - { - "epoch": 0.21775987494739374, - "grad_norm": 1.6266061774061915, - "learning_rate": 3.6415986536500606e-06, - "loss": 0.8127, - "num_input_tokens_seen": 38043820, - "step": 1811 - }, - { - "epoch": 0.21788011783803282, - "grad_norm": 1.825934314096968, - "learning_rate": 3.641153567569263e-06, - "loss": 0.8105, - "num_input_tokens_seen": 38061855, - "step": 1812 - }, - { - "epoch": 0.2180003607286719, - "grad_norm": 2.0152629563030557, - "learning_rate": 3.640708232526774e-06, - "loss": 0.9426, - "num_input_tokens_seen": 38080230, - "step": 1813 - }, - { - "epoch": 0.21812060361931102, - "grad_norm": 2.0198996935154465, - "learning_rate": 3.6402626485901504e-06, - "loss": 0.7772, - "num_input_tokens_seen": 38099045, - "step": 1814 - }, - { - "epoch": 0.2182408465099501, - "grad_norm": 1.8633676708640283, - "learning_rate": 3.639816815826988e-06, - "loss": 0.7784, - "num_input_tokens_seen": 38118090, - "step": 1815 - }, - { - "epoch": 0.21836108940058918, - "grad_norm": 3.03055645298261, - "learning_rate": 3.6393707343049176e-06, - "loss": 0.7762, - "num_input_tokens_seen": 38138140, - "step": 1816 - }, - { - "epoch": 0.2184813322912283, - "grad_norm": 2.6308854968969317, - "learning_rate": 3.6389244040916104e-06, - "loss": 0.7352, - "num_input_tokens_seen": 38156935, - "step": 1817 - }, - { - "epoch": 0.21860157518186737, - "grad_norm": 1.96231934566628, - "learning_rate": 3.6384778252547747e-06, - "loss": 0.7853, - "num_input_tokens_seen": 38172535, - "step": 1818 - }, - { - "epoch": 0.21872181807250646, - "grad_norm": 2.16040105604102, - "learning_rate": 3.638030997862155e-06, - "loss": 0.7693, - "num_input_tokens_seen": 38191190, - "step": 1819 - }, - { - "epoch": 0.21884206096314554, - "grad_norm": 0.8546912444371211, - "learning_rate": 3.6375839219815356e-06, - "loss": 0.6249, - "num_input_tokens_seen": 38248710, - "step": 1820 - }, - { - "epoch": 0.21896230385378465, - "grad_norm": 2.0572362342852375, - "learning_rate": 3.637136597680737e-06, - "loss": 0.8227, - "num_input_tokens_seen": 38268825, - "step": 1821 - }, - { - "epoch": 0.21908254674442373, - "grad_norm": 2.011467520872459, - "learning_rate": 3.6366890250276185e-06, - "loss": 0.8345, - "num_input_tokens_seen": 38289500, - "step": 1822 - }, - { - "epoch": 0.21920278963506282, - "grad_norm": 2.1812186305445453, - "learning_rate": 3.6362412040900764e-06, - "loss": 0.8991, - "num_input_tokens_seen": 38309010, - "step": 1823 - }, - { - "epoch": 0.21932303252570193, - "grad_norm": 1.9498735884910756, - "learning_rate": 3.635793134936044e-06, - "loss": 0.7969, - "num_input_tokens_seen": 38329740, - "step": 1824 - }, - { - "epoch": 0.219443275416341, - "grad_norm": 1.8313355744695072, - "learning_rate": 3.635344817633494e-06, - "loss": 0.727, - "num_input_tokens_seen": 38348775, - "step": 1825 - }, - { - "epoch": 0.2195635183069801, - "grad_norm": 2.2279761128753965, - "learning_rate": 3.634896252250436e-06, - "loss": 0.753, - "num_input_tokens_seen": 38365260, - "step": 1826 - }, - { - "epoch": 0.2196837611976192, - "grad_norm": 1.7546277055684447, - "learning_rate": 3.6344474388549157e-06, - "loss": 0.8228, - "num_input_tokens_seen": 38384635, - "step": 1827 - }, - { - "epoch": 0.2198040040882583, - "grad_norm": 2.305943209207984, - "learning_rate": 3.6339983775150183e-06, - "loss": 0.8083, - "num_input_tokens_seen": 38400915, - "step": 1828 - }, - { - "epoch": 0.21992424697889737, - "grad_norm": 2.4417597642599165, - "learning_rate": 3.6335490682988664e-06, - "loss": 0.8438, - "num_input_tokens_seen": 38416245, - "step": 1829 - }, - { - "epoch": 0.22004448986953645, - "grad_norm": 2.1356834544559655, - "learning_rate": 3.63309951127462e-06, - "loss": 0.8269, - "num_input_tokens_seen": 38432875, - "step": 1830 - }, - { - "epoch": 0.22016473276017556, - "grad_norm": 2.0113820904986315, - "learning_rate": 3.6326497065104757e-06, - "loss": 0.7496, - "num_input_tokens_seen": 38453060, - "step": 1831 - }, - { - "epoch": 0.22028497565081465, - "grad_norm": 2.6705658766359264, - "learning_rate": 3.6321996540746693e-06, - "loss": 0.7782, - "num_input_tokens_seen": 38471855, - "step": 1832 - }, - { - "epoch": 0.22040521854145373, - "grad_norm": 1.8164125356603502, - "learning_rate": 3.631749354035473e-06, - "loss": 0.797, - "num_input_tokens_seen": 38494990, - "step": 1833 - }, - { - "epoch": 0.22052546143209284, - "grad_norm": 2.2040712353406158, - "learning_rate": 3.6312988064611976e-06, - "loss": 0.7754, - "num_input_tokens_seen": 38513020, - "step": 1834 - }, - { - "epoch": 0.22064570432273192, - "grad_norm": 1.8801019026834391, - "learning_rate": 3.6308480114201896e-06, - "loss": 0.7994, - "num_input_tokens_seen": 38534660, - "step": 1835 - }, - { - "epoch": 0.220765947213371, - "grad_norm": 1.7848474606803468, - "learning_rate": 3.630396968980835e-06, - "loss": 0.7617, - "num_input_tokens_seen": 38552255, - "step": 1836 - }, - { - "epoch": 0.2208861901040101, - "grad_norm": 2.6160608806691155, - "learning_rate": 3.6299456792115575e-06, - "loss": 0.844, - "num_input_tokens_seen": 38573230, - "step": 1837 - }, - { - "epoch": 0.2210064329946492, - "grad_norm": 1.9115691628978044, - "learning_rate": 3.629494142180815e-06, - "loss": 0.8073, - "num_input_tokens_seen": 38591695, - "step": 1838 - }, - { - "epoch": 0.22112667588528828, - "grad_norm": 2.2216648717181644, - "learning_rate": 3.6290423579571075e-06, - "loss": 0.8489, - "num_input_tokens_seen": 38607955, - "step": 1839 - }, - { - "epoch": 0.22124691877592736, - "grad_norm": 1.631429787222244, - "learning_rate": 3.6285903266089694e-06, - "loss": 0.7907, - "num_input_tokens_seen": 38626950, - "step": 1840 - }, - { - "epoch": 0.22136716166656648, - "grad_norm": 1.912494853652078, - "learning_rate": 3.628138048204974e-06, - "loss": 0.7678, - "num_input_tokens_seen": 38647355, - "step": 1841 - }, - { - "epoch": 0.22148740455720556, - "grad_norm": 1.9831575817336016, - "learning_rate": 3.6276855228137304e-06, - "loss": 0.7753, - "num_input_tokens_seen": 38665280, - "step": 1842 - }, - { - "epoch": 0.22160764744784464, - "grad_norm": 2.3080394577680328, - "learning_rate": 3.6272327505038874e-06, - "loss": 0.8124, - "num_input_tokens_seen": 38681465, - "step": 1843 - }, - { - "epoch": 0.22172789033848372, - "grad_norm": 1.7988845554334703, - "learning_rate": 3.6267797313441304e-06, - "loss": 0.7793, - "num_input_tokens_seen": 38700975, - "step": 1844 - }, - { - "epoch": 0.22184813322912283, - "grad_norm": 2.5296873394124937, - "learning_rate": 3.6263264654031814e-06, - "loss": 0.8511, - "num_input_tokens_seen": 38717595, - "step": 1845 - }, - { - "epoch": 0.22196837611976192, - "grad_norm": 0.7340154602236587, - "learning_rate": 3.6258729527498008e-06, - "loss": 0.6068, - "num_input_tokens_seen": 38778160, - "step": 1846 - }, - { - "epoch": 0.222088619010401, - "grad_norm": 2.1475370237623843, - "learning_rate": 3.6254191934527854e-06, - "loss": 0.6347, - "num_input_tokens_seen": 38797235, - "step": 1847 - }, - { - "epoch": 0.2222088619010401, - "grad_norm": 1.8191542802363727, - "learning_rate": 3.6249651875809715e-06, - "loss": 0.6392, - "num_input_tokens_seen": 38816835, - "step": 1848 - }, - { - "epoch": 0.2223291047916792, - "grad_norm": 1.8674553736196544, - "learning_rate": 3.62451093520323e-06, - "loss": 0.8902, - "num_input_tokens_seen": 38834460, - "step": 1849 - }, - { - "epoch": 0.22244934768231828, - "grad_norm": 2.0623193064801244, - "learning_rate": 3.6240564363884714e-06, - "loss": 0.8996, - "num_input_tokens_seen": 38854125, - "step": 1850 - }, - { - "epoch": 0.2225695905729574, - "grad_norm": 1.908793957900236, - "learning_rate": 3.6236016912056425e-06, - "loss": 0.7039, - "num_input_tokens_seen": 38872920, - "step": 1851 - }, - { - "epoch": 0.22268983346359647, - "grad_norm": 1.9006776964486918, - "learning_rate": 3.623146699723729e-06, - "loss": 0.8097, - "num_input_tokens_seen": 38892100, - "step": 1852 - }, - { - "epoch": 0.22281007635423555, - "grad_norm": 1.7176681396304596, - "learning_rate": 3.6226914620117507e-06, - "loss": 0.78, - "num_input_tokens_seen": 38910440, - "step": 1853 - }, - { - "epoch": 0.22293031924487464, - "grad_norm": 3.7533601845539586, - "learning_rate": 3.622235978138768e-06, - "loss": 0.8017, - "num_input_tokens_seen": 38927785, - "step": 1854 - }, - { - "epoch": 0.22305056213551375, - "grad_norm": 2.50729892045481, - "learning_rate": 3.621780248173877e-06, - "loss": 0.8164, - "num_input_tokens_seen": 38945705, - "step": 1855 - }, - { - "epoch": 0.22317080502615283, - "grad_norm": 1.1892572803908956, - "learning_rate": 3.6213242721862125e-06, - "loss": 0.65, - "num_input_tokens_seen": 39003880, - "step": 1856 - }, - { - "epoch": 0.2232910479167919, - "grad_norm": 1.9978649573635496, - "learning_rate": 3.6208680502449444e-06, - "loss": 0.7452, - "num_input_tokens_seen": 39024080, - "step": 1857 - }, - { - "epoch": 0.22341129080743102, - "grad_norm": 2.5519758207140133, - "learning_rate": 3.6204115824192817e-06, - "loss": 0.7694, - "num_input_tokens_seen": 39041275, - "step": 1858 - }, - { - "epoch": 0.2235315336980701, - "grad_norm": 2.221701684658989, - "learning_rate": 3.619954868778471e-06, - "loss": 0.774, - "num_input_tokens_seen": 39057690, - "step": 1859 - }, - { - "epoch": 0.2236517765887092, - "grad_norm": 1.9258829886510123, - "learning_rate": 3.6194979093917944e-06, - "loss": 0.8206, - "num_input_tokens_seen": 39076825, - "step": 1860 - }, - { - "epoch": 0.22377201947934827, - "grad_norm": 2.5631935892351754, - "learning_rate": 3.6190407043285724e-06, - "loss": 0.8754, - "num_input_tokens_seen": 39094280, - "step": 1861 - }, - { - "epoch": 0.22389226236998738, - "grad_norm": 1.9336247433549711, - "learning_rate": 3.618583253658163e-06, - "loss": 0.7404, - "num_input_tokens_seen": 39114100, - "step": 1862 - }, - { - "epoch": 0.22401250526062647, - "grad_norm": 1.812798208582827, - "learning_rate": 3.618125557449961e-06, - "loss": 0.8671, - "num_input_tokens_seen": 39131875, - "step": 1863 - }, - { - "epoch": 0.22413274815126555, - "grad_norm": 1.9761938795379543, - "learning_rate": 3.6176676157733983e-06, - "loss": 0.8285, - "num_input_tokens_seen": 39146605, - "step": 1864 - }, - { - "epoch": 0.22425299104190466, - "grad_norm": 2.0903878119532586, - "learning_rate": 3.6172094286979443e-06, - "loss": 0.7448, - "num_input_tokens_seen": 39163695, - "step": 1865 - }, - { - "epoch": 0.22437323393254374, - "grad_norm": 1.3381082508435713, - "learning_rate": 3.6167509962931064e-06, - "loss": 0.8006, - "num_input_tokens_seen": 39189115, - "step": 1866 - }, - { - "epoch": 0.22449347682318282, - "grad_norm": 2.3726250221171066, - "learning_rate": 3.6162923186284276e-06, - "loss": 0.7662, - "num_input_tokens_seen": 39204795, - "step": 1867 - }, - { - "epoch": 0.2246137197138219, - "grad_norm": 2.4969097510737392, - "learning_rate": 3.6158333957734888e-06, - "loss": 0.859, - "num_input_tokens_seen": 39223105, - "step": 1868 - }, - { - "epoch": 0.22473396260446102, - "grad_norm": 2.1247521939150404, - "learning_rate": 3.6153742277979088e-06, - "loss": 0.8344, - "num_input_tokens_seen": 39240255, - "step": 1869 - }, - { - "epoch": 0.2248542054951001, - "grad_norm": 2.168341137545459, - "learning_rate": 3.6149148147713434e-06, - "loss": 0.7852, - "num_input_tokens_seen": 39258210, - "step": 1870 - }, - { - "epoch": 0.22497444838573918, - "grad_norm": 1.7610091189798507, - "learning_rate": 3.614455156763484e-06, - "loss": 0.8603, - "num_input_tokens_seen": 39276235, - "step": 1871 - }, - { - "epoch": 0.2250946912763783, - "grad_norm": 2.0717717211434974, - "learning_rate": 3.613995253844061e-06, - "loss": 0.7109, - "num_input_tokens_seen": 39293635, - "step": 1872 - }, - { - "epoch": 0.22521493416701738, - "grad_norm": 2.408855250361453, - "learning_rate": 3.6135351060828405e-06, - "loss": 0.8072, - "num_input_tokens_seen": 39313830, - "step": 1873 - }, - { - "epoch": 0.22533517705765646, - "grad_norm": 2.869815449067798, - "learning_rate": 3.6130747135496285e-06, - "loss": 0.6872, - "num_input_tokens_seen": 39332550, - "step": 1874 - }, - { - "epoch": 0.22545541994829554, - "grad_norm": 1.7999228418991604, - "learning_rate": 3.6126140763142646e-06, - "loss": 0.6594, - "num_input_tokens_seen": 39357300, - "step": 1875 - }, - { - "epoch": 0.22557566283893465, - "grad_norm": 2.7960108686994563, - "learning_rate": 3.6121531944466275e-06, - "loss": 0.847, - "num_input_tokens_seen": 39374345, - "step": 1876 - }, - { - "epoch": 0.22569590572957374, - "grad_norm": 1.8832018494830414, - "learning_rate": 3.611692068016633e-06, - "loss": 0.7783, - "num_input_tokens_seen": 39390395, - "step": 1877 - }, - { - "epoch": 0.22581614862021282, - "grad_norm": 2.1249052589571793, - "learning_rate": 3.611230697094233e-06, - "loss": 0.7417, - "num_input_tokens_seen": 39406815, - "step": 1878 - }, - { - "epoch": 0.22593639151085193, - "grad_norm": 1.913001581318875, - "learning_rate": 3.610769081749417e-06, - "loss": 0.8721, - "num_input_tokens_seen": 39426755, - "step": 1879 - }, - { - "epoch": 0.226056634401491, - "grad_norm": 2.610814296566517, - "learning_rate": 3.6103072220522117e-06, - "loss": 0.705, - "num_input_tokens_seen": 39442005, - "step": 1880 - }, - { - "epoch": 0.2261768772921301, - "grad_norm": 1.8198415178625762, - "learning_rate": 3.609845118072682e-06, - "loss": 0.9099, - "num_input_tokens_seen": 39460395, - "step": 1881 - }, - { - "epoch": 0.2262971201827692, - "grad_norm": 2.9192030319729048, - "learning_rate": 3.6093827698809276e-06, - "loss": 0.7915, - "num_input_tokens_seen": 39479215, - "step": 1882 - }, - { - "epoch": 0.2264173630734083, - "grad_norm": 2.5983784499890774, - "learning_rate": 3.6089201775470864e-06, - "loss": 0.8402, - "num_input_tokens_seen": 39494390, - "step": 1883 - }, - { - "epoch": 0.22653760596404737, - "grad_norm": 1.4230749118343458, - "learning_rate": 3.6084573411413334e-06, - "loss": 0.7761, - "num_input_tokens_seen": 39513505, - "step": 1884 - }, - { - "epoch": 0.22665784885468646, - "grad_norm": 2.8641327338007176, - "learning_rate": 3.607994260733881e-06, - "loss": 0.8118, - "num_input_tokens_seen": 39532465, - "step": 1885 - }, - { - "epoch": 0.22677809174532557, - "grad_norm": 1.5481582678222425, - "learning_rate": 3.6075309363949776e-06, - "loss": 0.7425, - "num_input_tokens_seen": 39551355, - "step": 1886 - }, - { - "epoch": 0.22689833463596465, - "grad_norm": 3.1816065893436445, - "learning_rate": 3.607067368194909e-06, - "loss": 0.8027, - "num_input_tokens_seen": 39569440, - "step": 1887 - }, - { - "epoch": 0.22701857752660373, - "grad_norm": 1.9406530019100705, - "learning_rate": 3.606603556203999e-06, - "loss": 0.8073, - "num_input_tokens_seen": 39594105, - "step": 1888 - }, - { - "epoch": 0.22713882041724284, - "grad_norm": 1.8902317384700433, - "learning_rate": 3.6061395004926066e-06, - "loss": 0.8358, - "num_input_tokens_seen": 39612760, - "step": 1889 - }, - { - "epoch": 0.22725906330788193, - "grad_norm": 2.0761888540522486, - "learning_rate": 3.6056752011311285e-06, - "loss": 0.8437, - "num_input_tokens_seen": 39630940, - "step": 1890 - }, - { - "epoch": 0.227379306198521, - "grad_norm": 3.1060045921748887, - "learning_rate": 3.60521065819e-06, - "loss": 0.7979, - "num_input_tokens_seen": 39647970, - "step": 1891 - }, - { - "epoch": 0.2274995490891601, - "grad_norm": 1.7324612644895736, - "learning_rate": 3.60474587173969e-06, - "loss": 0.8657, - "num_input_tokens_seen": 39666175, - "step": 1892 - }, - { - "epoch": 0.2276197919797992, - "grad_norm": 1.9323524270200998, - "learning_rate": 3.6042808418507084e-06, - "loss": 0.8293, - "num_input_tokens_seen": 39683580, - "step": 1893 - }, - { - "epoch": 0.22774003487043828, - "grad_norm": 1.941570105420506, - "learning_rate": 3.6038155685935976e-06, - "loss": 0.7653, - "num_input_tokens_seen": 39699870, - "step": 1894 - }, - { - "epoch": 0.22786027776107737, - "grad_norm": 2.4576588742409817, - "learning_rate": 3.60335005203894e-06, - "loss": 0.6998, - "num_input_tokens_seen": 39716260, - "step": 1895 - }, - { - "epoch": 0.22798052065171648, - "grad_norm": 0.9042930779975276, - "learning_rate": 3.6028842922573553e-06, - "loss": 0.6839, - "num_input_tokens_seen": 39780125, - "step": 1896 - }, - { - "epoch": 0.22810076354235556, - "grad_norm": 0.8898505458941157, - "learning_rate": 3.602418289319497e-06, - "loss": 0.6598, - "num_input_tokens_seen": 39838400, - "step": 1897 - }, - { - "epoch": 0.22822100643299464, - "grad_norm": 1.8621562627362105, - "learning_rate": 3.601952043296059e-06, - "loss": 0.727, - "num_input_tokens_seen": 39858115, - "step": 1898 - }, - { - "epoch": 0.22834124932363373, - "grad_norm": 1.9052214302192667, - "learning_rate": 3.6014855542577696e-06, - "loss": 0.8039, - "num_input_tokens_seen": 39875045, - "step": 1899 - }, - { - "epoch": 0.22846149221427284, - "grad_norm": 2.547643582378809, - "learning_rate": 3.6010188222753943e-06, - "loss": 0.8392, - "num_input_tokens_seen": 39895535, - "step": 1900 - }, - { - "epoch": 0.22858173510491192, - "grad_norm": 0.9733291840628823, - "learning_rate": 3.6005518474197372e-06, - "loss": 0.6705, - "num_input_tokens_seen": 39947300, - "step": 1901 - }, - { - "epoch": 0.228701977995551, - "grad_norm": 2.0508446792495802, - "learning_rate": 3.6000846297616373e-06, - "loss": 0.7875, - "num_input_tokens_seen": 39965320, - "step": 1902 - }, - { - "epoch": 0.22882222088619011, - "grad_norm": 2.3883241773732675, - "learning_rate": 3.5996171693719717e-06, - "loss": 0.7304, - "num_input_tokens_seen": 39981135, - "step": 1903 - }, - { - "epoch": 0.2289424637768292, - "grad_norm": 0.908203707794976, - "learning_rate": 3.5991494663216528e-06, - "loss": 0.6674, - "num_input_tokens_seen": 40043840, - "step": 1904 - }, - { - "epoch": 0.22906270666746828, - "grad_norm": 2.7397214990218233, - "learning_rate": 3.5986815206816314e-06, - "loss": 0.8742, - "num_input_tokens_seen": 40062380, - "step": 1905 - }, - { - "epoch": 0.2291829495581074, - "grad_norm": 1.7228749480474173, - "learning_rate": 3.598213332522895e-06, - "loss": 0.7424, - "num_input_tokens_seen": 40082130, - "step": 1906 - }, - { - "epoch": 0.22930319244874647, - "grad_norm": 1.946403900680886, - "learning_rate": 3.597744901916466e-06, - "loss": 0.7714, - "num_input_tokens_seen": 40103135, - "step": 1907 - }, - { - "epoch": 0.22942343533938556, - "grad_norm": 1.871246327206096, - "learning_rate": 3.5972762289334058e-06, - "loss": 0.7642, - "num_input_tokens_seen": 40122485, - "step": 1908 - }, - { - "epoch": 0.22954367823002464, - "grad_norm": 2.073440700065059, - "learning_rate": 3.5968073136448116e-06, - "loss": 0.8482, - "num_input_tokens_seen": 40140225, - "step": 1909 - }, - { - "epoch": 0.22966392112066375, - "grad_norm": 2.0128507389733117, - "learning_rate": 3.596338156121818e-06, - "loss": 0.9019, - "num_input_tokens_seen": 40158830, - "step": 1910 - }, - { - "epoch": 0.22978416401130283, - "grad_norm": 0.7991249344780432, - "learning_rate": 3.595868756435595e-06, - "loss": 0.6227, - "num_input_tokens_seen": 40226230, - "step": 1911 - }, - { - "epoch": 0.22990440690194192, - "grad_norm": 2.152751563852422, - "learning_rate": 3.5953991146573504e-06, - "loss": 0.7911, - "num_input_tokens_seen": 40244595, - "step": 1912 - }, - { - "epoch": 0.23002464979258103, - "grad_norm": 2.382317929602197, - "learning_rate": 3.5949292308583294e-06, - "loss": 0.8227, - "num_input_tokens_seen": 40257560, - "step": 1913 - }, - { - "epoch": 0.2301448926832201, - "grad_norm": 2.14384655963776, - "learning_rate": 3.5944591051098113e-06, - "loss": 0.8011, - "num_input_tokens_seen": 40276460, - "step": 1914 - }, - { - "epoch": 0.2302651355738592, - "grad_norm": 2.059664286123235, - "learning_rate": 3.593988737483115e-06, - "loss": 0.8117, - "num_input_tokens_seen": 40296120, - "step": 1915 - }, - { - "epoch": 0.23038537846449827, - "grad_norm": 2.2813009452218607, - "learning_rate": 3.5935181280495947e-06, - "loss": 0.7799, - "num_input_tokens_seen": 40314420, - "step": 1916 - }, - { - "epoch": 0.23050562135513739, - "grad_norm": 0.9350073377708853, - "learning_rate": 3.5930472768806412e-06, - "loss": 0.5852, - "num_input_tokens_seen": 40372810, - "step": 1917 - }, - { - "epoch": 0.23062586424577647, - "grad_norm": 1.9011411373269642, - "learning_rate": 3.5925761840476826e-06, - "loss": 0.7668, - "num_input_tokens_seen": 40391140, - "step": 1918 - }, - { - "epoch": 0.23074610713641555, - "grad_norm": 1.9402603982195683, - "learning_rate": 3.592104849622183e-06, - "loss": 0.8141, - "num_input_tokens_seen": 40413115, - "step": 1919 - }, - { - "epoch": 0.23086635002705466, - "grad_norm": 1.5434925609479349, - "learning_rate": 3.591633273675644e-06, - "loss": 0.7293, - "num_input_tokens_seen": 40435070, - "step": 1920 - }, - { - "epoch": 0.23098659291769374, - "grad_norm": 1.0316945798435038, - "learning_rate": 3.591161456279602e-06, - "loss": 0.624, - "num_input_tokens_seen": 40480335, - "step": 1921 - }, - { - "epoch": 0.23110683580833283, - "grad_norm": 1.5438785500942025, - "learning_rate": 3.590689397505633e-06, - "loss": 0.7928, - "num_input_tokens_seen": 40500965, - "step": 1922 - }, - { - "epoch": 0.2312270786989719, - "grad_norm": 1.9166282684196445, - "learning_rate": 3.590217097425347e-06, - "loss": 0.8615, - "num_input_tokens_seen": 40520585, - "step": 1923 - }, - { - "epoch": 0.23134732158961102, - "grad_norm": 2.359851919679069, - "learning_rate": 3.589744556110391e-06, - "loss": 0.7084, - "num_input_tokens_seen": 40538295, - "step": 1924 - }, - { - "epoch": 0.2314675644802501, - "grad_norm": 1.6994741035968677, - "learning_rate": 3.5892717736324504e-06, - "loss": 0.8419, - "num_input_tokens_seen": 40560840, - "step": 1925 - }, - { - "epoch": 0.2315878073708892, - "grad_norm": 1.9013900870818812, - "learning_rate": 3.5887987500632447e-06, - "loss": 0.7293, - "num_input_tokens_seen": 40578565, - "step": 1926 - }, - { - "epoch": 0.2317080502615283, - "grad_norm": 2.1498371085491463, - "learning_rate": 3.5883254854745325e-06, - "loss": 0.8286, - "num_input_tokens_seen": 40596675, - "step": 1927 - }, - { - "epoch": 0.23182829315216738, - "grad_norm": 2.032336523439975, - "learning_rate": 3.587851979938107e-06, - "loss": 0.7539, - "num_input_tokens_seen": 40613285, - "step": 1928 - }, - { - "epoch": 0.23194853604280646, - "grad_norm": 3.3820340120023413, - "learning_rate": 3.5873782335257985e-06, - "loss": 0.7689, - "num_input_tokens_seen": 40631170, - "step": 1929 - }, - { - "epoch": 0.23206877893344555, - "grad_norm": 2.1114283118630945, - "learning_rate": 3.5869042463094744e-06, - "loss": 0.7872, - "num_input_tokens_seen": 40648605, - "step": 1930 - }, - { - "epoch": 0.23218902182408466, - "grad_norm": 2.309807461470747, - "learning_rate": 3.586430018361038e-06, - "loss": 0.7675, - "num_input_tokens_seen": 40668095, - "step": 1931 - }, - { - "epoch": 0.23230926471472374, - "grad_norm": 2.0155603281029335, - "learning_rate": 3.5859555497524283e-06, - "loss": 0.7661, - "num_input_tokens_seen": 40685050, - "step": 1932 - }, - { - "epoch": 0.23242950760536282, - "grad_norm": 2.16417221299743, - "learning_rate": 3.5854808405556237e-06, - "loss": 0.9129, - "num_input_tokens_seen": 40704005, - "step": 1933 - }, - { - "epoch": 0.23254975049600193, - "grad_norm": 2.4675576852434182, - "learning_rate": 3.585005890842635e-06, - "loss": 0.7536, - "num_input_tokens_seen": 40722275, - "step": 1934 - }, - { - "epoch": 0.23266999338664102, - "grad_norm": 1.8692484351588736, - "learning_rate": 3.584530700685514e-06, - "loss": 0.8464, - "num_input_tokens_seen": 40742255, - "step": 1935 - }, - { - "epoch": 0.2327902362772801, - "grad_norm": 2.3222816585076345, - "learning_rate": 3.584055270156345e-06, - "loss": 0.8953, - "num_input_tokens_seen": 40758175, - "step": 1936 - }, - { - "epoch": 0.2329104791679192, - "grad_norm": 2.6789768470412003, - "learning_rate": 3.5835795993272513e-06, - "loss": 0.8155, - "num_input_tokens_seen": 40776180, - "step": 1937 - }, - { - "epoch": 0.2330307220585583, - "grad_norm": 1.9214433835965385, - "learning_rate": 3.583103688270391e-06, - "loss": 0.7092, - "num_input_tokens_seen": 40795680, - "step": 1938 - }, - { - "epoch": 0.23315096494919738, - "grad_norm": 2.04890629295157, - "learning_rate": 3.58262753705796e-06, - "loss": 0.8824, - "num_input_tokens_seen": 40810290, - "step": 1939 - }, - { - "epoch": 0.23327120783983646, - "grad_norm": 0.7816616211251448, - "learning_rate": 3.5821511457621902e-06, - "loss": 0.5702, - "num_input_tokens_seen": 40867310, - "step": 1940 - }, - { - "epoch": 0.23339145073047557, - "grad_norm": 4.122146312504833, - "learning_rate": 3.5816745144553497e-06, - "loss": 0.8075, - "num_input_tokens_seen": 40882350, - "step": 1941 - }, - { - "epoch": 0.23351169362111465, - "grad_norm": 2.0814030411958226, - "learning_rate": 3.5811976432097424e-06, - "loss": 0.7522, - "num_input_tokens_seen": 40899740, - "step": 1942 - }, - { - "epoch": 0.23363193651175373, - "grad_norm": 2.078498202941445, - "learning_rate": 3.58072053209771e-06, - "loss": 0.8438, - "num_input_tokens_seen": 40916015, - "step": 1943 - }, - { - "epoch": 0.23375217940239285, - "grad_norm": 2.231386415973234, - "learning_rate": 3.5802431811916296e-06, - "loss": 0.7906, - "num_input_tokens_seen": 40932345, - "step": 1944 - }, - { - "epoch": 0.23387242229303193, - "grad_norm": 2.0200699406527702, - "learning_rate": 3.579765590563916e-06, - "loss": 0.7942, - "num_input_tokens_seen": 40951465, - "step": 1945 - }, - { - "epoch": 0.233992665183671, - "grad_norm": 2.374967798747567, - "learning_rate": 3.579287760287017e-06, - "loss": 0.8102, - "num_input_tokens_seen": 40971935, - "step": 1946 - }, - { - "epoch": 0.2341129080743101, - "grad_norm": 1.7044282873928702, - "learning_rate": 3.5788096904334214e-06, - "loss": 0.722, - "num_input_tokens_seen": 40993365, - "step": 1947 - }, - { - "epoch": 0.2342331509649492, - "grad_norm": 2.611296468352227, - "learning_rate": 3.578331381075651e-06, - "loss": 0.8012, - "num_input_tokens_seen": 41013585, - "step": 1948 - }, - { - "epoch": 0.2343533938555883, - "grad_norm": 2.1889266988398375, - "learning_rate": 3.5778528322862646e-06, - "loss": 0.6941, - "num_input_tokens_seen": 41032125, - "step": 1949 - }, - { - "epoch": 0.23447363674622737, - "grad_norm": 1.9381579271819027, - "learning_rate": 3.577374044137858e-06, - "loss": 0.8577, - "num_input_tokens_seen": 41052600, - "step": 1950 - }, - { - "epoch": 0.23459387963686648, - "grad_norm": 2.1886096039608622, - "learning_rate": 3.5768950167030633e-06, - "loss": 0.7324, - "num_input_tokens_seen": 41077020, - "step": 1951 - }, - { - "epoch": 0.23471412252750556, - "grad_norm": 1.8588150875321676, - "learning_rate": 3.576415750054548e-06, - "loss": 0.7776, - "num_input_tokens_seen": 41096860, - "step": 1952 - }, - { - "epoch": 0.23483436541814465, - "grad_norm": 1.8334784066444243, - "learning_rate": 3.5759362442650172e-06, - "loss": 0.8447, - "num_input_tokens_seen": 41113330, - "step": 1953 - }, - { - "epoch": 0.23495460830878373, - "grad_norm": 2.378774743979848, - "learning_rate": 3.5754564994072113e-06, - "loss": 0.8479, - "num_input_tokens_seen": 41131890, - "step": 1954 - }, - { - "epoch": 0.23507485119942284, - "grad_norm": 2.534952939312894, - "learning_rate": 3.5749765155539067e-06, - "loss": 0.6115, - "num_input_tokens_seen": 41152095, - "step": 1955 - }, - { - "epoch": 0.23519509409006192, - "grad_norm": 2.562524259204442, - "learning_rate": 3.574496292777917e-06, - "loss": 0.9201, - "num_input_tokens_seen": 41170025, - "step": 1956 - }, - { - "epoch": 0.235315336980701, - "grad_norm": 1.834295416569206, - "learning_rate": 3.574015831152092e-06, - "loss": 0.7077, - "num_input_tokens_seen": 41190160, - "step": 1957 - }, - { - "epoch": 0.23543557987134012, - "grad_norm": 3.2013452052103997, - "learning_rate": 3.5735351307493166e-06, - "loss": 0.8434, - "num_input_tokens_seen": 41207830, - "step": 1958 - }, - { - "epoch": 0.2355558227619792, - "grad_norm": 1.6488427306440858, - "learning_rate": 3.5730541916425127e-06, - "loss": 0.7357, - "num_input_tokens_seen": 41229030, - "step": 1959 - }, - { - "epoch": 0.23567606565261828, - "grad_norm": 2.0596323723126404, - "learning_rate": 3.572573013904639e-06, - "loss": 0.8533, - "num_input_tokens_seen": 41248660, - "step": 1960 - }, - { - "epoch": 0.2357963085432574, - "grad_norm": 1.9912405631338357, - "learning_rate": 3.572091597608689e-06, - "loss": 0.9165, - "num_input_tokens_seen": 41266505, - "step": 1961 - }, - { - "epoch": 0.23591655143389648, - "grad_norm": 2.295139618794565, - "learning_rate": 3.571609942827694e-06, - "loss": 0.7327, - "num_input_tokens_seen": 41285340, - "step": 1962 - }, - { - "epoch": 0.23603679432453556, - "grad_norm": 1.8240603745795938, - "learning_rate": 3.57112804963472e-06, - "loss": 0.8712, - "num_input_tokens_seen": 41303275, - "step": 1963 - }, - { - "epoch": 0.23615703721517464, - "grad_norm": 1.8664697395555647, - "learning_rate": 3.57064591810287e-06, - "loss": 0.7622, - "num_input_tokens_seen": 41320495, - "step": 1964 - }, - { - "epoch": 0.23627728010581375, - "grad_norm": 2.51726234462876, - "learning_rate": 3.570163548305284e-06, - "loss": 0.8031, - "num_input_tokens_seen": 41339145, - "step": 1965 - }, - { - "epoch": 0.23639752299645284, - "grad_norm": 2.6233812697566448, - "learning_rate": 3.569680940315135e-06, - "loss": 0.7007, - "num_input_tokens_seen": 41355265, - "step": 1966 - }, - { - "epoch": 0.23651776588709192, - "grad_norm": 1.8418930640849012, - "learning_rate": 3.5691980942056356e-06, - "loss": 0.8124, - "num_input_tokens_seen": 41374355, - "step": 1967 - }, - { - "epoch": 0.23663800877773103, - "grad_norm": 1.7873392222538373, - "learning_rate": 3.5687150100500332e-06, - "loss": 0.788, - "num_input_tokens_seen": 41393775, - "step": 1968 - }, - { - "epoch": 0.2367582516683701, - "grad_norm": 1.9145089141168465, - "learning_rate": 3.568231687921611e-06, - "loss": 0.7407, - "num_input_tokens_seen": 41413670, - "step": 1969 - }, - { - "epoch": 0.2368784945590092, - "grad_norm": 1.5665581282558378, - "learning_rate": 3.5677481278936883e-06, - "loss": 0.8039, - "num_input_tokens_seen": 41432970, - "step": 1970 - }, - { - "epoch": 0.23699873744964828, - "grad_norm": 0.8248098712175799, - "learning_rate": 3.5672643300396214e-06, - "loss": 0.5901, - "num_input_tokens_seen": 41501835, - "step": 1971 - }, - { - "epoch": 0.2371189803402874, - "grad_norm": 2.6703042533564743, - "learning_rate": 3.566780294432802e-06, - "loss": 0.6846, - "num_input_tokens_seen": 41518730, - "step": 1972 - }, - { - "epoch": 0.23723922323092647, - "grad_norm": 2.716035205999358, - "learning_rate": 3.566296021146657e-06, - "loss": 0.7399, - "num_input_tokens_seen": 41537830, - "step": 1973 - }, - { - "epoch": 0.23735946612156555, - "grad_norm": 1.8389660109622008, - "learning_rate": 3.565811510254652e-06, - "loss": 0.7292, - "num_input_tokens_seen": 41558430, - "step": 1974 - }, - { - "epoch": 0.23747970901220466, - "grad_norm": 0.7631641606922571, - "learning_rate": 3.5653267618302845e-06, - "loss": 0.5905, - "num_input_tokens_seen": 41625730, - "step": 1975 - }, - { - "epoch": 0.23759995190284375, - "grad_norm": 2.6548712039004365, - "learning_rate": 3.564841775947093e-06, - "loss": 0.855, - "num_input_tokens_seen": 41646340, - "step": 1976 - }, - { - "epoch": 0.23772019479348283, - "grad_norm": 2.0541591140911724, - "learning_rate": 3.5643565526786475e-06, - "loss": 0.7587, - "num_input_tokens_seen": 41666000, - "step": 1977 - }, - { - "epoch": 0.2378404376841219, - "grad_norm": 1.5183100645913723, - "learning_rate": 3.5638710920985574e-06, - "loss": 0.7697, - "num_input_tokens_seen": 41687180, - "step": 1978 - }, - { - "epoch": 0.23796068057476102, - "grad_norm": 2.2118660110579227, - "learning_rate": 3.563385394280465e-06, - "loss": 0.8153, - "num_input_tokens_seen": 41705225, - "step": 1979 - }, - { - "epoch": 0.2380809234654001, - "grad_norm": 2.085147084130721, - "learning_rate": 3.5628994592980527e-06, - "loss": 0.7709, - "num_input_tokens_seen": 41722850, - "step": 1980 - }, - { - "epoch": 0.2382011663560392, - "grad_norm": 1.868855569274737, - "learning_rate": 3.562413287225034e-06, - "loss": 0.7002, - "num_input_tokens_seen": 41740680, - "step": 1981 - }, - { - "epoch": 0.2383214092466783, - "grad_norm": 2.3100230194182365, - "learning_rate": 3.5619268781351623e-06, - "loss": 0.8874, - "num_input_tokens_seen": 41758470, - "step": 1982 - }, - { - "epoch": 0.23844165213731738, - "grad_norm": 1.8363520100131123, - "learning_rate": 3.5614402321022256e-06, - "loss": 0.7709, - "num_input_tokens_seen": 41776020, - "step": 1983 - }, - { - "epoch": 0.23856189502795647, - "grad_norm": 1.8507315502145791, - "learning_rate": 3.5609533492000463e-06, - "loss": 0.8678, - "num_input_tokens_seen": 41794630, - "step": 1984 - }, - { - "epoch": 0.23868213791859555, - "grad_norm": 2.299590355263758, - "learning_rate": 3.560466229502485e-06, - "loss": 0.7784, - "num_input_tokens_seen": 41813695, - "step": 1985 - }, - { - "epoch": 0.23880238080923466, - "grad_norm": 2.186898759084893, - "learning_rate": 3.5599788730834384e-06, - "loss": 0.8909, - "num_input_tokens_seen": 41831375, - "step": 1986 - }, - { - "epoch": 0.23892262369987374, - "grad_norm": 2.3807540324637033, - "learning_rate": 3.559491280016836e-06, - "loss": 0.8024, - "num_input_tokens_seen": 41849040, - "step": 1987 - }, - { - "epoch": 0.23904286659051283, - "grad_norm": 1.924623316523647, - "learning_rate": 3.5590034503766465e-06, - "loss": 0.715, - "num_input_tokens_seen": 41868425, - "step": 1988 - }, - { - "epoch": 0.23916310948115194, - "grad_norm": 2.274216111557667, - "learning_rate": 3.558515384236874e-06, - "loss": 0.8194, - "num_input_tokens_seen": 41885575, - "step": 1989 - }, - { - "epoch": 0.23928335237179102, - "grad_norm": 1.812759918663933, - "learning_rate": 3.558027081671556e-06, - "loss": 0.8336, - "num_input_tokens_seen": 41902280, - "step": 1990 - }, - { - "epoch": 0.2394035952624301, - "grad_norm": 2.152535117884136, - "learning_rate": 3.557538542754769e-06, - "loss": 0.6907, - "num_input_tokens_seen": 41921695, - "step": 1991 - }, - { - "epoch": 0.2395238381530692, - "grad_norm": 1.943737888704646, - "learning_rate": 3.557049767560623e-06, - "loss": 0.6659, - "num_input_tokens_seen": 41940330, - "step": 1992 - }, - { - "epoch": 0.2396440810437083, - "grad_norm": 1.9323410978081577, - "learning_rate": 3.5565607561632655e-06, - "loss": 0.8579, - "num_input_tokens_seen": 41958890, - "step": 1993 - }, - { - "epoch": 0.23976432393434738, - "grad_norm": 2.3876692544241798, - "learning_rate": 3.5560715086368787e-06, - "loss": 0.788, - "num_input_tokens_seen": 41976480, - "step": 1994 - }, - { - "epoch": 0.23988456682498646, - "grad_norm": 1.8914006405504875, - "learning_rate": 3.5555820250556816e-06, - "loss": 0.8175, - "num_input_tokens_seen": 41993400, - "step": 1995 - }, - { - "epoch": 0.24000480971562557, - "grad_norm": 2.3573687831034613, - "learning_rate": 3.5550923054939278e-06, - "loss": 0.6909, - "num_input_tokens_seen": 42012575, - "step": 1996 - }, - { - "epoch": 0.24012505260626466, - "grad_norm": 1.9824455732222286, - "learning_rate": 3.554602350025908e-06, - "loss": 0.7427, - "num_input_tokens_seen": 42033390, - "step": 1997 - }, - { - "epoch": 0.24024529549690374, - "grad_norm": 2.180520785259545, - "learning_rate": 3.5541121587259477e-06, - "loss": 0.8042, - "num_input_tokens_seen": 42050945, - "step": 1998 - }, - { - "epoch": 0.24036553838754285, - "grad_norm": 0.8148133872401061, - "learning_rate": 3.553621731668408e-06, - "loss": 0.5895, - "num_input_tokens_seen": 42113875, - "step": 1999 - }, - { - "epoch": 0.24048578127818193, - "grad_norm": 1.6484687125744413, - "learning_rate": 3.553131068927688e-06, - "loss": 0.8321, - "num_input_tokens_seen": 42132000, - "step": 2000 - }, - { - "epoch": 0.24060602416882101, - "grad_norm": 1.5957627459394856, - "learning_rate": 3.552640170578219e-06, - "loss": 0.8017, - "num_input_tokens_seen": 42151970, - "step": 2001 - }, - { - "epoch": 0.2407262670594601, - "grad_norm": 1.8658965983823987, - "learning_rate": 3.5521490366944703e-06, - "loss": 0.77, - "num_input_tokens_seen": 42169340, - "step": 2002 - }, - { - "epoch": 0.2408465099500992, - "grad_norm": 2.0297332442341562, - "learning_rate": 3.5516576673509474e-06, - "loss": 0.7948, - "num_input_tokens_seen": 42187060, - "step": 2003 - }, - { - "epoch": 0.2409667528407383, - "grad_norm": 1.6637318474426894, - "learning_rate": 3.5511660626221896e-06, - "loss": 0.8544, - "num_input_tokens_seen": 42207420, - "step": 2004 - }, - { - "epoch": 0.24108699573137737, - "grad_norm": 2.418551717755083, - "learning_rate": 3.5506742225827744e-06, - "loss": 0.8789, - "num_input_tokens_seen": 42223995, - "step": 2005 - }, - { - "epoch": 0.24120723862201648, - "grad_norm": 2.1766767127385402, - "learning_rate": 3.5501821473073116e-06, - "loss": 0.8983, - "num_input_tokens_seen": 42240300, - "step": 2006 - }, - { - "epoch": 0.24132748151265557, - "grad_norm": 2.2011696445638007, - "learning_rate": 3.54968983687045e-06, - "loss": 0.8575, - "num_input_tokens_seen": 42256890, - "step": 2007 - }, - { - "epoch": 0.24144772440329465, - "grad_norm": 2.3247580656665994, - "learning_rate": 3.5491972913468717e-06, - "loss": 0.898, - "num_input_tokens_seen": 42273135, - "step": 2008 - }, - { - "epoch": 0.24156796729393373, - "grad_norm": 2.1757664170620674, - "learning_rate": 3.548704510811297e-06, - "loss": 0.791, - "num_input_tokens_seen": 42292050, - "step": 2009 - }, - { - "epoch": 0.24168821018457284, - "grad_norm": 2.282454304731145, - "learning_rate": 3.5482114953384787e-06, - "loss": 0.7397, - "num_input_tokens_seen": 42311000, - "step": 2010 - }, - { - "epoch": 0.24180845307521193, - "grad_norm": 2.9543059651519874, - "learning_rate": 3.5477182450032077e-06, - "loss": 0.8391, - "num_input_tokens_seen": 42329320, - "step": 2011 - }, - { - "epoch": 0.241928695965851, - "grad_norm": 1.9156207666066267, - "learning_rate": 3.5472247598803097e-06, - "loss": 0.8301, - "num_input_tokens_seen": 42348385, - "step": 2012 - }, - { - "epoch": 0.24204893885649012, - "grad_norm": 2.2988084307208756, - "learning_rate": 3.546731040044645e-06, - "loss": 0.8533, - "num_input_tokens_seen": 42363275, - "step": 2013 - }, - { - "epoch": 0.2421691817471292, - "grad_norm": 2.313373638790436, - "learning_rate": 3.546237085571112e-06, - "loss": 0.7484, - "num_input_tokens_seen": 42381430, - "step": 2014 - }, - { - "epoch": 0.24228942463776829, - "grad_norm": 2.0930367563196532, - "learning_rate": 3.5457428965346425e-06, - "loss": 0.7244, - "num_input_tokens_seen": 42400090, - "step": 2015 - }, - { - "epoch": 0.2424096675284074, - "grad_norm": 1.5929314870914237, - "learning_rate": 3.545248473010205e-06, - "loss": 0.7415, - "num_input_tokens_seen": 42422615, - "step": 2016 - }, - { - "epoch": 0.24252991041904648, - "grad_norm": 1.9203239408810884, - "learning_rate": 3.544753815072802e-06, - "loss": 0.8637, - "num_input_tokens_seen": 42440990, - "step": 2017 - }, - { - "epoch": 0.24265015330968556, - "grad_norm": 1.98105279902387, - "learning_rate": 3.544258922797474e-06, - "loss": 0.8844, - "num_input_tokens_seen": 42458830, - "step": 2018 - }, - { - "epoch": 0.24277039620032465, - "grad_norm": 1.6324632982825704, - "learning_rate": 3.543763796259295e-06, - "loss": 0.7763, - "num_input_tokens_seen": 42478505, - "step": 2019 - }, - { - "epoch": 0.24289063909096376, - "grad_norm": 1.7098462790156814, - "learning_rate": 3.5432684355333754e-06, - "loss": 0.9031, - "num_input_tokens_seen": 42496880, - "step": 2020 - }, - { - "epoch": 0.24301088198160284, - "grad_norm": 2.0373781607688772, - "learning_rate": 3.5427728406948613e-06, - "loss": 0.7584, - "num_input_tokens_seen": 42515715, - "step": 2021 - }, - { - "epoch": 0.24313112487224192, - "grad_norm": 0.7904769503625179, - "learning_rate": 3.5422770118189336e-06, - "loss": 0.5995, - "num_input_tokens_seen": 42579270, - "step": 2022 - }, - { - "epoch": 0.24325136776288103, - "grad_norm": 2.4704779280396734, - "learning_rate": 3.54178094898081e-06, - "loss": 0.738, - "num_input_tokens_seen": 42600600, - "step": 2023 - }, - { - "epoch": 0.24337161065352012, - "grad_norm": 1.850751660477363, - "learning_rate": 3.5412846522557422e-06, - "loss": 0.7248, - "num_input_tokens_seen": 42621210, - "step": 2024 - }, - { - "epoch": 0.2434918535441592, - "grad_norm": 2.5380020233235903, - "learning_rate": 3.540788121719018e-06, - "loss": 0.739, - "num_input_tokens_seen": 42639350, - "step": 2025 - }, - { - "epoch": 0.24361209643479828, - "grad_norm": 1.7385055779837548, - "learning_rate": 3.5402913574459604e-06, - "loss": 0.8224, - "num_input_tokens_seen": 42658975, - "step": 2026 - }, - { - "epoch": 0.2437323393254374, - "grad_norm": 1.646546972204318, - "learning_rate": 3.5397943595119297e-06, - "loss": 0.8484, - "num_input_tokens_seen": 42680115, - "step": 2027 - }, - { - "epoch": 0.24385258221607647, - "grad_norm": 2.7384630475939957, - "learning_rate": 3.5392971279923177e-06, - "loss": 0.7692, - "num_input_tokens_seen": 42698055, - "step": 2028 - }, - { - "epoch": 0.24397282510671556, - "grad_norm": 2.1581138909219804, - "learning_rate": 3.5387996629625557e-06, - "loss": 0.8293, - "num_input_tokens_seen": 42715365, - "step": 2029 - }, - { - "epoch": 0.24409306799735467, - "grad_norm": 0.9004886639915187, - "learning_rate": 3.5383019644981083e-06, - "loss": 0.5971, - "num_input_tokens_seen": 42778780, - "step": 2030 - }, - { - "epoch": 0.24421331088799375, - "grad_norm": 2.453630244589166, - "learning_rate": 3.5378040326744763e-06, - "loss": 0.7286, - "num_input_tokens_seen": 42797985, - "step": 2031 - }, - { - "epoch": 0.24433355377863283, - "grad_norm": 3.5193544304400852, - "learning_rate": 3.5373058675671946e-06, - "loss": 0.8585, - "num_input_tokens_seen": 42815710, - "step": 2032 - }, - { - "epoch": 0.24445379666927192, - "grad_norm": 1.8330589959800159, - "learning_rate": 3.536807469251836e-06, - "loss": 0.7272, - "num_input_tokens_seen": 42834585, - "step": 2033 - }, - { - "epoch": 0.24457403955991103, - "grad_norm": 1.884339570486781, - "learning_rate": 3.5363088378040055e-06, - "loss": 0.8145, - "num_input_tokens_seen": 42853195, - "step": 2034 - }, - { - "epoch": 0.2446942824505501, - "grad_norm": 0.8071806796159787, - "learning_rate": 3.5358099732993463e-06, - "loss": 0.6803, - "num_input_tokens_seen": 42912025, - "step": 2035 - }, - { - "epoch": 0.2448145253411892, - "grad_norm": 2.0116741979293393, - "learning_rate": 3.5353108758135345e-06, - "loss": 0.8927, - "num_input_tokens_seen": 42930140, - "step": 2036 - }, - { - "epoch": 0.2449347682318283, - "grad_norm": 1.7226274994237825, - "learning_rate": 3.5348115454222843e-06, - "loss": 0.8095, - "num_input_tokens_seen": 42952445, - "step": 2037 - }, - { - "epoch": 0.2450550111224674, - "grad_norm": 1.890449788576488, - "learning_rate": 3.5343119822013425e-06, - "loss": 0.8568, - "num_input_tokens_seen": 42971275, - "step": 2038 - }, - { - "epoch": 0.24517525401310647, - "grad_norm": 1.8181680870509245, - "learning_rate": 3.533812186226493e-06, - "loss": 0.766, - "num_input_tokens_seen": 42991705, - "step": 2039 - }, - { - "epoch": 0.24529549690374555, - "grad_norm": 1.856803025547529, - "learning_rate": 3.5333121575735545e-06, - "loss": 0.7609, - "num_input_tokens_seen": 43011065, - "step": 2040 - }, - { - "epoch": 0.24541573979438466, - "grad_norm": 1.9609392417679323, - "learning_rate": 3.532811896318381e-06, - "loss": 0.7505, - "num_input_tokens_seen": 43032855, - "step": 2041 - }, - { - "epoch": 0.24553598268502375, - "grad_norm": 2.286515650532021, - "learning_rate": 3.5323114025368615e-06, - "loss": 0.8074, - "num_input_tokens_seen": 43047640, - "step": 2042 - }, - { - "epoch": 0.24565622557566283, - "grad_norm": 2.914654354511826, - "learning_rate": 3.53181067630492e-06, - "loss": 0.8096, - "num_input_tokens_seen": 43064830, - "step": 2043 - }, - { - "epoch": 0.24577646846630194, - "grad_norm": 1.8052415755406959, - "learning_rate": 3.5313097176985175e-06, - "loss": 0.7582, - "num_input_tokens_seen": 43082860, - "step": 2044 - }, - { - "epoch": 0.24589671135694102, - "grad_norm": 1.9229918510804753, - "learning_rate": 3.5308085267936482e-06, - "loss": 0.8005, - "num_input_tokens_seen": 43100295, - "step": 2045 - }, - { - "epoch": 0.2460169542475801, - "grad_norm": 1.8163909885262082, - "learning_rate": 3.530307103666342e-06, - "loss": 0.898, - "num_input_tokens_seen": 43119095, - "step": 2046 - }, - { - "epoch": 0.24613719713821922, - "grad_norm": 1.602627473312504, - "learning_rate": 3.5298054483926658e-06, - "loss": 0.8, - "num_input_tokens_seen": 43139510, - "step": 2047 - }, - { - "epoch": 0.2462574400288583, - "grad_norm": 2.415646835562988, - "learning_rate": 3.5293035610487187e-06, - "loss": 0.8306, - "num_input_tokens_seen": 43158595, - "step": 2048 - }, - { - "epoch": 0.24637768291949738, - "grad_norm": 0.7493648449419144, - "learning_rate": 3.5288014417106374e-06, - "loss": 0.6413, - "num_input_tokens_seen": 43224335, - "step": 2049 - }, - { - "epoch": 0.24649792581013646, - "grad_norm": 1.8568123621801542, - "learning_rate": 3.528299090454593e-06, - "loss": 0.7474, - "num_input_tokens_seen": 43244590, - "step": 2050 - }, - { - "epoch": 0.24661816870077558, - "grad_norm": 2.407486119507478, - "learning_rate": 3.527796507356792e-06, - "loss": 0.8165, - "num_input_tokens_seen": 43258200, - "step": 2051 - }, - { - "epoch": 0.24673841159141466, - "grad_norm": 2.7678091843740513, - "learning_rate": 3.527293692493475e-06, - "loss": 0.9039, - "num_input_tokens_seen": 43273785, - "step": 2052 - }, - { - "epoch": 0.24685865448205374, - "grad_norm": 2.840227984479663, - "learning_rate": 3.52679064594092e-06, - "loss": 0.7365, - "num_input_tokens_seen": 43290845, - "step": 2053 - }, - { - "epoch": 0.24697889737269285, - "grad_norm": 3.3607301221165264, - "learning_rate": 3.5262873677754375e-06, - "loss": 0.7391, - "num_input_tokens_seen": 43308570, - "step": 2054 - }, - { - "epoch": 0.24709914026333193, - "grad_norm": 1.6237964565075589, - "learning_rate": 3.5257838580733745e-06, - "loss": 0.8015, - "num_input_tokens_seen": 43327895, - "step": 2055 - }, - { - "epoch": 0.24721938315397102, - "grad_norm": 1.9121722110074375, - "learning_rate": 3.5252801169111138e-06, - "loss": 0.8675, - "num_input_tokens_seen": 43345280, - "step": 2056 - }, - { - "epoch": 0.2473396260446101, - "grad_norm": 1.6931106581847013, - "learning_rate": 3.524776144365072e-06, - "loss": 0.7968, - "num_input_tokens_seen": 43363455, - "step": 2057 - }, - { - "epoch": 0.2474598689352492, - "grad_norm": 1.6304278620037065, - "learning_rate": 3.5242719405117012e-06, - "loss": 0.7933, - "num_input_tokens_seen": 43382980, - "step": 2058 - }, - { - "epoch": 0.2475801118258883, - "grad_norm": 3.8068261928314855, - "learning_rate": 3.5237675054274893e-06, - "loss": 0.7508, - "num_input_tokens_seen": 43401900, - "step": 2059 - }, - { - "epoch": 0.24770035471652738, - "grad_norm": 2.305675794792676, - "learning_rate": 3.5232628391889584e-06, - "loss": 0.7935, - "num_input_tokens_seen": 43419910, - "step": 2060 - }, - { - "epoch": 0.2478205976071665, - "grad_norm": 2.6892557416943705, - "learning_rate": 3.522757941872666e-06, - "loss": 0.6339, - "num_input_tokens_seen": 43437785, - "step": 2061 - }, - { - "epoch": 0.24794084049780557, - "grad_norm": 1.609201304057219, - "learning_rate": 3.5222528135552042e-06, - "loss": 0.8237, - "num_input_tokens_seen": 43458965, - "step": 2062 - }, - { - "epoch": 0.24806108338844465, - "grad_norm": 1.8515480458279074, - "learning_rate": 3.5217474543132007e-06, - "loss": 0.8056, - "num_input_tokens_seen": 43477365, - "step": 2063 - }, - { - "epoch": 0.24818132627908374, - "grad_norm": 2.3621856143985607, - "learning_rate": 3.521241864223319e-06, - "loss": 0.6693, - "num_input_tokens_seen": 43496045, - "step": 2064 - }, - { - "epoch": 0.24830156916972285, - "grad_norm": 0.8180592572425286, - "learning_rate": 3.5207360433622552e-06, - "loss": 0.6341, - "num_input_tokens_seen": 43557765, - "step": 2065 - }, - { - "epoch": 0.24842181206036193, - "grad_norm": 1.5793706840060322, - "learning_rate": 3.5202299918067437e-06, - "loss": 0.7377, - "num_input_tokens_seen": 43581080, - "step": 2066 - }, - { - "epoch": 0.248542054951001, - "grad_norm": 2.2629269704749038, - "learning_rate": 3.519723709633551e-06, - "loss": 0.6948, - "num_input_tokens_seen": 43599560, - "step": 2067 - }, - { - "epoch": 0.24866229784164012, - "grad_norm": 1.880290825264862, - "learning_rate": 3.519217196919479e-06, - "loss": 0.8276, - "num_input_tokens_seen": 43618265, - "step": 2068 - }, - { - "epoch": 0.2487825407322792, - "grad_norm": 1.7902536325574798, - "learning_rate": 3.5187104537413664e-06, - "loss": 0.7233, - "num_input_tokens_seen": 43637185, - "step": 2069 - }, - { - "epoch": 0.2489027836229183, - "grad_norm": 2.0169731780268365, - "learning_rate": 3.518203480176086e-06, - "loss": 0.6741, - "num_input_tokens_seen": 43655835, - "step": 2070 - }, - { - "epoch": 0.2490230265135574, - "grad_norm": 1.6276325073230646, - "learning_rate": 3.517696276300545e-06, - "loss": 0.7944, - "num_input_tokens_seen": 43677095, - "step": 2071 - }, - { - "epoch": 0.24914326940419648, - "grad_norm": 2.3480156156059078, - "learning_rate": 3.517188842191685e-06, - "loss": 0.6989, - "num_input_tokens_seen": 43694965, - "step": 2072 - }, - { - "epoch": 0.24926351229483557, - "grad_norm": 2.033355337487096, - "learning_rate": 3.5166811779264837e-06, - "loss": 0.7504, - "num_input_tokens_seen": 43715005, - "step": 2073 - }, - { - "epoch": 0.24938375518547465, - "grad_norm": 1.8799020063600251, - "learning_rate": 3.5161732835819545e-06, - "loss": 0.7792, - "num_input_tokens_seen": 43734035, - "step": 2074 - }, - { - "epoch": 0.24950399807611376, - "grad_norm": 1.8782366078855302, - "learning_rate": 3.515665159235143e-06, - "loss": 0.8252, - "num_input_tokens_seen": 43752640, - "step": 2075 - }, - { - "epoch": 0.24962424096675284, - "grad_norm": 1.7172366300940103, - "learning_rate": 3.5151568049631318e-06, - "loss": 0.7514, - "num_input_tokens_seen": 43771075, - "step": 2076 - }, - { - "epoch": 0.24974448385739192, - "grad_norm": 1.535927721949452, - "learning_rate": 3.514648220843038e-06, - "loss": 0.7942, - "num_input_tokens_seen": 43792625, - "step": 2077 - }, - { - "epoch": 0.24986472674803104, - "grad_norm": 2.409551539587793, - "learning_rate": 3.514139406952014e-06, - "loss": 0.6723, - "num_input_tokens_seen": 43814370, - "step": 2078 - }, - { - "epoch": 0.24998496963867012, - "grad_norm": 1.6476746401070432, - "learning_rate": 3.5136303633672454e-06, - "loss": 0.8353, - "num_input_tokens_seen": 43834220, - "step": 2079 - }, - { - "epoch": 0.25010521252930923, - "grad_norm": 1.7154318910086155, - "learning_rate": 3.5131210901659544e-06, - "loss": 0.7453, - "num_input_tokens_seen": 43855695, - "step": 2080 - }, - { - "epoch": 0.2502254554199483, - "grad_norm": 4.150297074338366, - "learning_rate": 3.5126115874253967e-06, - "loss": 0.8182, - "num_input_tokens_seen": 43874970, - "step": 2081 - }, - { - "epoch": 0.2503456983105874, - "grad_norm": 2.6762236521400276, - "learning_rate": 3.5121018552228644e-06, - "loss": 0.7993, - "num_input_tokens_seen": 43893195, - "step": 2082 - }, - { - "epoch": 0.2504659412012265, - "grad_norm": 2.159700302791591, - "learning_rate": 3.5115918936356827e-06, - "loss": 0.7593, - "num_input_tokens_seen": 43909670, - "step": 2083 - }, - { - "epoch": 0.25058618409186556, - "grad_norm": 1.9639538826739673, - "learning_rate": 3.5110817027412123e-06, - "loss": 0.7825, - "num_input_tokens_seen": 43928480, - "step": 2084 - }, - { - "epoch": 0.25070642698250467, - "grad_norm": 2.1870331538462207, - "learning_rate": 3.5105712826168493e-06, - "loss": 0.6893, - "num_input_tokens_seen": 43947850, - "step": 2085 - }, - { - "epoch": 0.2508266698731437, - "grad_norm": 1.7433921199255855, - "learning_rate": 3.5100606333400235e-06, - "loss": 0.7001, - "num_input_tokens_seen": 43964705, - "step": 2086 - }, - { - "epoch": 0.25094691276378284, - "grad_norm": 2.107617501671543, - "learning_rate": 3.5095497549882006e-06, - "loss": 0.7707, - "num_input_tokens_seen": 43982870, - "step": 2087 - }, - { - "epoch": 0.25106715565442195, - "grad_norm": 3.1158067235897913, - "learning_rate": 3.5090386476388796e-06, - "loss": 0.7208, - "num_input_tokens_seen": 44003380, - "step": 2088 - }, - { - "epoch": 0.251187398545061, - "grad_norm": 2.085876155159922, - "learning_rate": 3.5085273113695965e-06, - "loss": 0.7588, - "num_input_tokens_seen": 44027670, - "step": 2089 - }, - { - "epoch": 0.2513076414357001, - "grad_norm": 1.9147780392185196, - "learning_rate": 3.508015746257919e-06, - "loss": 0.7847, - "num_input_tokens_seen": 44046430, - "step": 2090 - }, - { - "epoch": 0.2514278843263392, - "grad_norm": 2.010849773456429, - "learning_rate": 3.5075039523814518e-06, - "loss": 0.8265, - "num_input_tokens_seen": 44065340, - "step": 2091 - }, - { - "epoch": 0.2515481272169783, - "grad_norm": 2.477381487997849, - "learning_rate": 3.5069919298178335e-06, - "loss": 0.8226, - "num_input_tokens_seen": 44081780, - "step": 2092 - }, - { - "epoch": 0.2516683701076174, - "grad_norm": 1.6739047191476184, - "learning_rate": 3.506479678644738e-06, - "loss": 0.8202, - "num_input_tokens_seen": 44101895, - "step": 2093 - }, - { - "epoch": 0.2517886129982565, - "grad_norm": 2.639004105867083, - "learning_rate": 3.505967198939873e-06, - "loss": 0.7356, - "num_input_tokens_seen": 44118655, - "step": 2094 - }, - { - "epoch": 0.25190885588889556, - "grad_norm": 1.8571230490588497, - "learning_rate": 3.5054544907809813e-06, - "loss": 0.7777, - "num_input_tokens_seen": 44138875, - "step": 2095 - }, - { - "epoch": 0.25202909877953467, - "grad_norm": 1.9237551894281173, - "learning_rate": 3.5049415542458397e-06, - "loss": 0.7946, - "num_input_tokens_seen": 44157500, - "step": 2096 - }, - { - "epoch": 0.2521493416701738, - "grad_norm": 1.701850570795859, - "learning_rate": 3.504428389412262e-06, - "loss": 0.8305, - "num_input_tokens_seen": 44178030, - "step": 2097 - }, - { - "epoch": 0.25226958456081283, - "grad_norm": 2.471204438608791, - "learning_rate": 3.5039149963580927e-06, - "loss": 0.7345, - "num_input_tokens_seen": 44197770, - "step": 2098 - }, - { - "epoch": 0.25238982745145194, - "grad_norm": 2.3895430131241664, - "learning_rate": 3.503401375161215e-06, - "loss": 0.6947, - "num_input_tokens_seen": 44217235, - "step": 2099 - }, - { - "epoch": 0.252510070342091, - "grad_norm": 2.3929994729936674, - "learning_rate": 3.502887525899544e-06, - "loss": 0.8345, - "num_input_tokens_seen": 44235935, - "step": 2100 - }, - { - "epoch": 0.2526303132327301, - "grad_norm": 1.7154447329042763, - "learning_rate": 3.50237344865103e-06, - "loss": 0.8208, - "num_input_tokens_seen": 44256655, - "step": 2101 - }, - { - "epoch": 0.2527505561233692, - "grad_norm": 2.188366363478398, - "learning_rate": 3.501859143493658e-06, - "loss": 0.7692, - "num_input_tokens_seen": 44277005, - "step": 2102 - }, - { - "epoch": 0.2528707990140083, - "grad_norm": 0.9268280160926616, - "learning_rate": 3.5013446105054484e-06, - "loss": 0.6457, - "num_input_tokens_seen": 44329645, - "step": 2103 - }, - { - "epoch": 0.2529910419046474, - "grad_norm": 2.148727841953047, - "learning_rate": 3.5008298497644555e-06, - "loss": 0.7508, - "num_input_tokens_seen": 44348410, - "step": 2104 - }, - { - "epoch": 0.2531112847952865, - "grad_norm": 1.7830399982444798, - "learning_rate": 3.500314861348767e-06, - "loss": 0.8726, - "num_input_tokens_seen": 44368765, - "step": 2105 - }, - { - "epoch": 0.25323152768592555, - "grad_norm": 1.8745524804015687, - "learning_rate": 3.499799645336507e-06, - "loss": 0.7678, - "num_input_tokens_seen": 44385380, - "step": 2106 - }, - { - "epoch": 0.25335177057656466, - "grad_norm": 1.4415840588501692, - "learning_rate": 3.4992842018058336e-06, - "loss": 0.8633, - "num_input_tokens_seen": 44408000, - "step": 2107 - }, - { - "epoch": 0.25347201346720377, - "grad_norm": 2.9254912526342935, - "learning_rate": 3.4987685308349384e-06, - "loss": 0.8665, - "num_input_tokens_seen": 44425450, - "step": 2108 - }, - { - "epoch": 0.2535922563578428, - "grad_norm": 2.270618612635359, - "learning_rate": 3.4982526325020497e-06, - "loss": 0.615, - "num_input_tokens_seen": 44442140, - "step": 2109 - }, - { - "epoch": 0.25371249924848194, - "grad_norm": 2.4502802225620157, - "learning_rate": 3.497736506885427e-06, - "loss": 0.819, - "num_input_tokens_seen": 44457480, - "step": 2110 - }, - { - "epoch": 0.25383274213912105, - "grad_norm": 1.7968232571779221, - "learning_rate": 3.4972201540633676e-06, - "loss": 0.7292, - "num_input_tokens_seen": 44476555, - "step": 2111 - }, - { - "epoch": 0.2539529850297601, - "grad_norm": 1.8596786372081398, - "learning_rate": 3.4967035741142008e-06, - "loss": 0.8454, - "num_input_tokens_seen": 44495095, - "step": 2112 - }, - { - "epoch": 0.2540732279203992, - "grad_norm": 1.940512766138527, - "learning_rate": 3.4961867671162917e-06, - "loss": 0.815, - "num_input_tokens_seen": 44514745, - "step": 2113 - }, - { - "epoch": 0.2541934708110383, - "grad_norm": 2.5143612349076188, - "learning_rate": 3.4956697331480402e-06, - "loss": 0.7747, - "num_input_tokens_seen": 44533035, - "step": 2114 - }, - { - "epoch": 0.2543137137016774, - "grad_norm": 1.5744780532127773, - "learning_rate": 3.495152472287879e-06, - "loss": 0.7951, - "num_input_tokens_seen": 44553465, - "step": 2115 - }, - { - "epoch": 0.2544339565923165, - "grad_norm": 34.26379690078498, - "learning_rate": 3.4946349846142766e-06, - "loss": 0.7322, - "num_input_tokens_seen": 44572325, - "step": 2116 - }, - { - "epoch": 0.25455419948295555, - "grad_norm": 2.1448054262698215, - "learning_rate": 3.4941172702057353e-06, - "loss": 0.7509, - "num_input_tokens_seen": 44592105, - "step": 2117 - }, - { - "epoch": 0.25467444237359466, - "grad_norm": 1.8188379838192985, - "learning_rate": 3.4935993291407924e-06, - "loss": 0.7985, - "num_input_tokens_seen": 44610650, - "step": 2118 - }, - { - "epoch": 0.25479468526423377, - "grad_norm": 2.334983122139885, - "learning_rate": 3.4930811614980183e-06, - "loss": 0.7053, - "num_input_tokens_seen": 44632065, - "step": 2119 - }, - { - "epoch": 0.2549149281548728, - "grad_norm": 1.7362084582043864, - "learning_rate": 3.4925627673560198e-06, - "loss": 0.7891, - "num_input_tokens_seen": 44652445, - "step": 2120 - }, - { - "epoch": 0.25503517104551193, - "grad_norm": 1.6921461899369696, - "learning_rate": 3.4920441467934357e-06, - "loss": 0.8832, - "num_input_tokens_seen": 44672680, - "step": 2121 - }, - { - "epoch": 0.25515541393615104, - "grad_norm": 2.1800507449511457, - "learning_rate": 3.491525299888941e-06, - "loss": 0.8308, - "num_input_tokens_seen": 44691245, - "step": 2122 - }, - { - "epoch": 0.2552756568267901, - "grad_norm": 0.9948294463111278, - "learning_rate": 3.491006226721244e-06, - "loss": 0.6831, - "num_input_tokens_seen": 44755175, - "step": 2123 - }, - { - "epoch": 0.2553958997174292, - "grad_norm": 2.555251923405769, - "learning_rate": 3.4904869273690882e-06, - "loss": 0.7694, - "num_input_tokens_seen": 44772785, - "step": 2124 - }, - { - "epoch": 0.2555161426080683, - "grad_norm": 2.052406816960019, - "learning_rate": 3.4899674019112506e-06, - "loss": 0.8899, - "num_input_tokens_seen": 44791805, - "step": 2125 - }, - { - "epoch": 0.2556363854987074, - "grad_norm": 1.6574644626872266, - "learning_rate": 3.4894476504265428e-06, - "loss": 0.6899, - "num_input_tokens_seen": 44815765, - "step": 2126 - }, - { - "epoch": 0.2557566283893465, - "grad_norm": 0.7862676542900094, - "learning_rate": 3.4889276729938104e-06, - "loss": 0.5734, - "num_input_tokens_seen": 44874015, - "step": 2127 - }, - { - "epoch": 0.2558768712799856, - "grad_norm": 1.9774783479580482, - "learning_rate": 3.488407469691934e-06, - "loss": 0.8014, - "num_input_tokens_seen": 44894430, - "step": 2128 - }, - { - "epoch": 0.25599711417062465, - "grad_norm": 2.3910767444223384, - "learning_rate": 3.487887040599828e-06, - "loss": 0.8051, - "num_input_tokens_seen": 44913950, - "step": 2129 - }, - { - "epoch": 0.25611735706126376, - "grad_norm": 2.6924668493362702, - "learning_rate": 3.4873663857964407e-06, - "loss": 0.7505, - "num_input_tokens_seen": 44930885, - "step": 2130 - }, - { - "epoch": 0.2562375999519028, - "grad_norm": 1.8992595283427995, - "learning_rate": 3.4868455053607556e-06, - "loss": 0.6676, - "num_input_tokens_seen": 44950220, - "step": 2131 - }, - { - "epoch": 0.2563578428425419, - "grad_norm": 2.37519934545032, - "learning_rate": 3.4863243993717887e-06, - "loss": 0.71, - "num_input_tokens_seen": 44969240, - "step": 2132 - }, - { - "epoch": 0.25647808573318104, - "grad_norm": 1.863093317516603, - "learning_rate": 3.485803067908593e-06, - "loss": 0.7773, - "num_input_tokens_seen": 44988470, - "step": 2133 - }, - { - "epoch": 0.2565983286238201, - "grad_norm": 1.7910847964400878, - "learning_rate": 3.485281511050253e-06, - "loss": 0.7882, - "num_input_tokens_seen": 45010325, - "step": 2134 - }, - { - "epoch": 0.2567185715144592, - "grad_norm": 3.058592255023808, - "learning_rate": 3.484759728875889e-06, - "loss": 0.8919, - "num_input_tokens_seen": 45025410, - "step": 2135 - }, - { - "epoch": 0.2568388144050983, - "grad_norm": 1.8227084894311985, - "learning_rate": 3.484237721464654e-06, - "loss": 0.8051, - "num_input_tokens_seen": 45043425, - "step": 2136 - }, - { - "epoch": 0.25695905729573737, - "grad_norm": 2.4415837073030637, - "learning_rate": 3.483715488895737e-06, - "loss": 0.6643, - "num_input_tokens_seen": 45063475, - "step": 2137 - }, - { - "epoch": 0.2570793001863765, - "grad_norm": 2.197391872815811, - "learning_rate": 3.48319303124836e-06, - "loss": 0.7885, - "num_input_tokens_seen": 45083575, - "step": 2138 - }, - { - "epoch": 0.2571995430770156, - "grad_norm": 2.405762954180929, - "learning_rate": 3.4826703486017798e-06, - "loss": 0.6764, - "num_input_tokens_seen": 45102920, - "step": 2139 - }, - { - "epoch": 0.25731978596765465, - "grad_norm": 1.630444781653425, - "learning_rate": 3.4821474410352862e-06, - "loss": 0.7602, - "num_input_tokens_seen": 45121300, - "step": 2140 - }, - { - "epoch": 0.25744002885829376, - "grad_norm": 0.968172411862686, - "learning_rate": 3.481624308628205e-06, - "loss": 0.6683, - "num_input_tokens_seen": 45182390, - "step": 2141 - }, - { - "epoch": 0.25756027174893287, - "grad_norm": 3.4037471300743953, - "learning_rate": 3.481100951459893e-06, - "loss": 0.9978, - "num_input_tokens_seen": 45195130, - "step": 2142 - }, - { - "epoch": 0.2576805146395719, - "grad_norm": 1.621048231219969, - "learning_rate": 3.480577369609745e-06, - "loss": 0.7818, - "num_input_tokens_seen": 45215740, - "step": 2143 - }, - { - "epoch": 0.25780075753021103, - "grad_norm": 2.034880649045596, - "learning_rate": 3.4800535631571874e-06, - "loss": 0.8729, - "num_input_tokens_seen": 45230990, - "step": 2144 - }, - { - "epoch": 0.25792100042085014, - "grad_norm": 2.2247593353905457, - "learning_rate": 3.4795295321816804e-06, - "loss": 0.7703, - "num_input_tokens_seen": 45249535, - "step": 2145 - }, - { - "epoch": 0.2580412433114892, - "grad_norm": 2.0019763486429216, - "learning_rate": 3.47900527676272e-06, - "loss": 0.9097, - "num_input_tokens_seen": 45267590, - "step": 2146 - }, - { - "epoch": 0.2581614862021283, - "grad_norm": 1.9973985384800115, - "learning_rate": 3.478480796979835e-06, - "loss": 0.88, - "num_input_tokens_seen": 45285195, - "step": 2147 - }, - { - "epoch": 0.25828172909276736, - "grad_norm": 1.5622650017445285, - "learning_rate": 3.477956092912589e-06, - "loss": 0.7697, - "num_input_tokens_seen": 45306460, - "step": 2148 - }, - { - "epoch": 0.2584019719834065, - "grad_norm": 0.7082370874638909, - "learning_rate": 3.4774311646405783e-06, - "loss": 0.5816, - "num_input_tokens_seen": 45376085, - "step": 2149 - }, - { - "epoch": 0.2585222148740456, - "grad_norm": 1.963988692512629, - "learning_rate": 3.476906012243435e-06, - "loss": 0.8306, - "num_input_tokens_seen": 45394715, - "step": 2150 - }, - { - "epoch": 0.25864245776468464, - "grad_norm": 2.4758094485214253, - "learning_rate": 3.4763806358008235e-06, - "loss": 0.816, - "num_input_tokens_seen": 45415635, - "step": 2151 - }, - { - "epoch": 0.25876270065532375, - "grad_norm": 2.5172415041472256, - "learning_rate": 3.475855035392444e-06, - "loss": 0.8597, - "num_input_tokens_seen": 45430675, - "step": 2152 - }, - { - "epoch": 0.25888294354596286, - "grad_norm": 1.6722701755583247, - "learning_rate": 3.475329211098029e-06, - "loss": 0.7008, - "num_input_tokens_seen": 45453550, - "step": 2153 - }, - { - "epoch": 0.2590031864366019, - "grad_norm": 1.6905553467223597, - "learning_rate": 3.474803162997345e-06, - "loss": 0.8191, - "num_input_tokens_seen": 45474000, - "step": 2154 - }, - { - "epoch": 0.25912342932724103, - "grad_norm": 0.8806568624122615, - "learning_rate": 3.4742768911701944e-06, - "loss": 0.5767, - "num_input_tokens_seen": 45536415, - "step": 2155 - }, - { - "epoch": 0.25924367221788014, - "grad_norm": 3.683229347470357, - "learning_rate": 3.4737503956964113e-06, - "loss": 0.6925, - "num_input_tokens_seen": 45548440, - "step": 2156 - }, - { - "epoch": 0.2593639151085192, - "grad_norm": 2.2002151234468927, - "learning_rate": 3.473223676655865e-06, - "loss": 0.6697, - "num_input_tokens_seen": 45566160, - "step": 2157 - }, - { - "epoch": 0.2594841579991583, - "grad_norm": 1.7902954293342122, - "learning_rate": 3.4726967341284585e-06, - "loss": 0.7967, - "num_input_tokens_seen": 45583745, - "step": 2158 - }, - { - "epoch": 0.2596044008897974, - "grad_norm": 1.923754365866425, - "learning_rate": 3.4721695681941282e-06, - "loss": 0.7524, - "num_input_tokens_seen": 45602505, - "step": 2159 - }, - { - "epoch": 0.25972464378043647, - "grad_norm": 2.071586187874828, - "learning_rate": 3.471642178932845e-06, - "loss": 0.832, - "num_input_tokens_seen": 45620870, - "step": 2160 - }, - { - "epoch": 0.2598448866710756, - "grad_norm": 1.8995520874012908, - "learning_rate": 3.471114566424613e-06, - "loss": 0.8949, - "num_input_tokens_seen": 45639050, - "step": 2161 - }, - { - "epoch": 0.25996512956171464, - "grad_norm": 1.946944032000019, - "learning_rate": 3.4705867307494715e-06, - "loss": 0.7633, - "num_input_tokens_seen": 45657840, - "step": 2162 - }, - { - "epoch": 0.26008537245235375, - "grad_norm": 2.3324971376385024, - "learning_rate": 3.470058671987492e-06, - "loss": 0.8426, - "num_input_tokens_seen": 45675825, - "step": 2163 - }, - { - "epoch": 0.26020561534299286, - "grad_norm": 2.0286072220221314, - "learning_rate": 3.4695303902187805e-06, - "loss": 0.8294, - "num_input_tokens_seen": 45695100, - "step": 2164 - }, - { - "epoch": 0.2603258582336319, - "grad_norm": 1.7980597260221758, - "learning_rate": 3.4690018855234775e-06, - "loss": 0.7817, - "num_input_tokens_seen": 45715540, - "step": 2165 - }, - { - "epoch": 0.260446101124271, - "grad_norm": 1.7662292637463965, - "learning_rate": 3.4684731579817568e-06, - "loss": 0.8059, - "num_input_tokens_seen": 45736250, - "step": 2166 - }, - { - "epoch": 0.26056634401491013, - "grad_norm": 1.6959706584943053, - "learning_rate": 3.4679442076738247e-06, - "loss": 0.7668, - "num_input_tokens_seen": 45755685, - "step": 2167 - }, - { - "epoch": 0.2606865869055492, - "grad_norm": 4.693639325318077, - "learning_rate": 3.467415034679924e-06, - "loss": 0.8306, - "num_input_tokens_seen": 45775105, - "step": 2168 - }, - { - "epoch": 0.2608068297961883, - "grad_norm": 2.1487949100743715, - "learning_rate": 3.4668856390803295e-06, - "loss": 0.7999, - "num_input_tokens_seen": 45792705, - "step": 2169 - }, - { - "epoch": 0.2609270726868274, - "grad_norm": 1.9495231218974878, - "learning_rate": 3.4663560209553495e-06, - "loss": 0.892, - "num_input_tokens_seen": 45810490, - "step": 2170 - }, - { - "epoch": 0.26104731557746647, - "grad_norm": 1.7341364471547513, - "learning_rate": 3.4658261803853267e-06, - "loss": 0.785, - "num_input_tokens_seen": 45828135, - "step": 2171 - }, - { - "epoch": 0.2611675584681056, - "grad_norm": 2.270725660903174, - "learning_rate": 3.4652961174506383e-06, - "loss": 0.8033, - "num_input_tokens_seen": 45847725, - "step": 2172 - }, - { - "epoch": 0.2612878013587447, - "grad_norm": 1.052839797496255, - "learning_rate": 3.464765832231694e-06, - "loss": 0.6039, - "num_input_tokens_seen": 45901610, - "step": 2173 - }, - { - "epoch": 0.26140804424938374, - "grad_norm": 1.6998793311022538, - "learning_rate": 3.4642353248089373e-06, - "loss": 0.703, - "num_input_tokens_seen": 45920090, - "step": 2174 - }, - { - "epoch": 0.26152828714002285, - "grad_norm": 1.8263511866976727, - "learning_rate": 3.463704595262846e-06, - "loss": 0.8006, - "num_input_tokens_seen": 45940690, - "step": 2175 - }, - { - "epoch": 0.26164853003066196, - "grad_norm": 1.6755540018575477, - "learning_rate": 3.463173643673931e-06, - "loss": 0.6986, - "num_input_tokens_seen": 45962935, - "step": 2176 - }, - { - "epoch": 0.261768772921301, - "grad_norm": 0.9759673344215334, - "learning_rate": 3.4626424701227387e-06, - "loss": 0.6617, - "num_input_tokens_seen": 46017715, - "step": 2177 - }, - { - "epoch": 0.26188901581194013, - "grad_norm": 0.875338679096256, - "learning_rate": 3.4621110746898452e-06, - "loss": 0.6194, - "num_input_tokens_seen": 46085295, - "step": 2178 - }, - { - "epoch": 0.2620092587025792, - "grad_norm": 1.6743386784600551, - "learning_rate": 3.4615794574558654e-06, - "loss": 0.7411, - "num_input_tokens_seen": 46104025, - "step": 2179 - }, - { - "epoch": 0.2621295015932183, - "grad_norm": 2.4061105282545574, - "learning_rate": 3.4610476185014436e-06, - "loss": 0.8492, - "num_input_tokens_seen": 46121005, - "step": 2180 - }, - { - "epoch": 0.2622497444838574, - "grad_norm": 2.5406562160632533, - "learning_rate": 3.4605155579072597e-06, - "loss": 0.7914, - "num_input_tokens_seen": 46140580, - "step": 2181 - }, - { - "epoch": 0.26236998737449646, - "grad_norm": 1.8753165110430303, - "learning_rate": 3.459983275754027e-06, - "loss": 0.7093, - "num_input_tokens_seen": 46159195, - "step": 2182 - }, - { - "epoch": 0.26249023026513557, - "grad_norm": 2.8662073220461406, - "learning_rate": 3.4594507721224918e-06, - "loss": 0.7887, - "num_input_tokens_seen": 46177565, - "step": 2183 - }, - { - "epoch": 0.2626104731557747, - "grad_norm": 2.0352399382495685, - "learning_rate": 3.4589180470934353e-06, - "loss": 0.8151, - "num_input_tokens_seen": 46197150, - "step": 2184 - }, - { - "epoch": 0.26273071604641374, - "grad_norm": 2.404176957693624, - "learning_rate": 3.4583851007476713e-06, - "loss": 0.7681, - "num_input_tokens_seen": 46215340, - "step": 2185 - }, - { - "epoch": 0.26285095893705285, - "grad_norm": 2.617946147317752, - "learning_rate": 3.4578519331660464e-06, - "loss": 0.6916, - "num_input_tokens_seen": 46232055, - "step": 2186 - }, - { - "epoch": 0.26297120182769196, - "grad_norm": 1.8247657448747576, - "learning_rate": 3.4573185444294426e-06, - "loss": 0.8213, - "num_input_tokens_seen": 46250140, - "step": 2187 - }, - { - "epoch": 0.263091444718331, - "grad_norm": 7.264731591927456, - "learning_rate": 3.456784934618774e-06, - "loss": 0.7814, - "num_input_tokens_seen": 46271025, - "step": 2188 - }, - { - "epoch": 0.2632116876089701, - "grad_norm": 2.04998011172163, - "learning_rate": 3.4562511038149897e-06, - "loss": 0.7903, - "num_input_tokens_seen": 46286240, - "step": 2189 - }, - { - "epoch": 0.26333193049960923, - "grad_norm": 1.2344818286963726, - "learning_rate": 3.4557170520990705e-06, - "loss": 0.5989, - "num_input_tokens_seen": 46346635, - "step": 2190 - }, - { - "epoch": 0.2634521733902483, - "grad_norm": 1.487749041657066, - "learning_rate": 3.455182779552032e-06, - "loss": 0.8583, - "num_input_tokens_seen": 46369240, - "step": 2191 - }, - { - "epoch": 0.2635724162808874, - "grad_norm": 1.8305847418538992, - "learning_rate": 3.4546482862549226e-06, - "loss": 0.8371, - "num_input_tokens_seen": 46389275, - "step": 2192 - }, - { - "epoch": 0.2636926591715265, - "grad_norm": 2.20253937856688, - "learning_rate": 3.454113572288825e-06, - "loss": 0.7821, - "num_input_tokens_seen": 46405585, - "step": 2193 - }, - { - "epoch": 0.26381290206216557, - "grad_norm": 1.8257681357010076, - "learning_rate": 3.453578637734854e-06, - "loss": 0.8047, - "num_input_tokens_seen": 46426495, - "step": 2194 - }, - { - "epoch": 0.2639331449528047, - "grad_norm": 2.334486540073954, - "learning_rate": 3.4530434826741605e-06, - "loss": 0.7854, - "num_input_tokens_seen": 46447155, - "step": 2195 - }, - { - "epoch": 0.26405338784344373, - "grad_norm": 1.893350803765931, - "learning_rate": 3.452508107187926e-06, - "loss": 0.6949, - "num_input_tokens_seen": 46470250, - "step": 2196 - }, - { - "epoch": 0.26417363073408284, - "grad_norm": 1.6633260682290516, - "learning_rate": 3.451972511357366e-06, - "loss": 0.7588, - "num_input_tokens_seen": 46489515, - "step": 2197 - }, - { - "epoch": 0.26429387362472195, - "grad_norm": 1.6705127026126372, - "learning_rate": 3.4514366952637296e-06, - "loss": 0.847, - "num_input_tokens_seen": 46508995, - "step": 2198 - }, - { - "epoch": 0.264414116515361, - "grad_norm": 0.8430303941519564, - "learning_rate": 3.450900658988302e-06, - "loss": 0.6352, - "num_input_tokens_seen": 46570265, - "step": 2199 - }, - { - "epoch": 0.2645343594060001, - "grad_norm": 1.9688282072250518, - "learning_rate": 3.450364402612397e-06, - "loss": 0.7795, - "num_input_tokens_seen": 46587140, - "step": 2200 - }, - { - "epoch": 0.26465460229663923, - "grad_norm": 2.0738004212785186, - "learning_rate": 3.449827926217366e-06, - "loss": 0.8291, - "num_input_tokens_seen": 46606295, - "step": 2201 - }, - { - "epoch": 0.2647748451872783, - "grad_norm": 2.16193287978649, - "learning_rate": 3.449291229884591e-06, - "loss": 0.801, - "num_input_tokens_seen": 46627255, - "step": 2202 - }, - { - "epoch": 0.2648950880779174, - "grad_norm": 2.4941842073282823, - "learning_rate": 3.4487543136954887e-06, - "loss": 0.861, - "num_input_tokens_seen": 46646595, - "step": 2203 - }, - { - "epoch": 0.2650153309685565, - "grad_norm": 1.9009527232872498, - "learning_rate": 3.448217177731509e-06, - "loss": 0.9067, - "num_input_tokens_seen": 46666800, - "step": 2204 - }, - { - "epoch": 0.26513557385919556, - "grad_norm": 1.9754661053663947, - "learning_rate": 3.4476798220741348e-06, - "loss": 0.7693, - "num_input_tokens_seen": 46685400, - "step": 2205 - }, - { - "epoch": 0.26525581674983467, - "grad_norm": 1.6107570961172946, - "learning_rate": 3.4471422468048826e-06, - "loss": 0.782, - "num_input_tokens_seen": 46703845, - "step": 2206 - }, - { - "epoch": 0.2653760596404738, - "grad_norm": 4.460285902430027, - "learning_rate": 3.4466044520053022e-06, - "loss": 0.7344, - "num_input_tokens_seen": 46722570, - "step": 2207 - }, - { - "epoch": 0.26549630253111284, - "grad_norm": 2.0064457896255057, - "learning_rate": 3.446066437756977e-06, - "loss": 0.6027, - "num_input_tokens_seen": 46741495, - "step": 2208 - }, - { - "epoch": 0.26561654542175195, - "grad_norm": 2.2774316458225368, - "learning_rate": 3.4455282041415224e-06, - "loss": 0.7534, - "num_input_tokens_seen": 46760425, - "step": 2209 - }, - { - "epoch": 0.265736788312391, - "grad_norm": 2.849207733363642, - "learning_rate": 3.4449897512405894e-06, - "loss": 0.8709, - "num_input_tokens_seen": 46779295, - "step": 2210 - }, - { - "epoch": 0.2658570312030301, - "grad_norm": 1.889225322529489, - "learning_rate": 3.444451079135859e-06, - "loss": 0.7404, - "num_input_tokens_seen": 46798525, - "step": 2211 - }, - { - "epoch": 0.2659772740936692, - "grad_norm": 1.8949274588165255, - "learning_rate": 3.4439121879090485e-06, - "loss": 0.7347, - "num_input_tokens_seen": 46816025, - "step": 2212 - }, - { - "epoch": 0.2660975169843083, - "grad_norm": 1.860559261096679, - "learning_rate": 3.443373077641908e-06, - "loss": 0.8213, - "num_input_tokens_seen": 46834670, - "step": 2213 - }, - { - "epoch": 0.2662177598749474, - "grad_norm": 8.146773699127522, - "learning_rate": 3.4428337484162183e-06, - "loss": 0.8008, - "num_input_tokens_seen": 46855200, - "step": 2214 - }, - { - "epoch": 0.2663380027655865, - "grad_norm": 2.182734401590511, - "learning_rate": 3.4422942003137967e-06, - "loss": 0.8469, - "num_input_tokens_seen": 46872950, - "step": 2215 - }, - { - "epoch": 0.26645824565622556, - "grad_norm": 0.9227837422763205, - "learning_rate": 3.4417544334164916e-06, - "loss": 0.5806, - "num_input_tokens_seen": 46936815, - "step": 2216 - }, - { - "epoch": 0.26657848854686467, - "grad_norm": 1.6901293441811094, - "learning_rate": 3.4412144478061854e-06, - "loss": 0.7766, - "num_input_tokens_seen": 46958945, - "step": 2217 - }, - { - "epoch": 0.2666987314375038, - "grad_norm": 1.8534360895325481, - "learning_rate": 3.4406742435647925e-06, - "loss": 0.7498, - "num_input_tokens_seen": 46978730, - "step": 2218 - }, - { - "epoch": 0.26681897432814283, - "grad_norm": 2.932088770184242, - "learning_rate": 3.440133820774263e-06, - "loss": 0.7847, - "num_input_tokens_seen": 46998260, - "step": 2219 - }, - { - "epoch": 0.26693921721878194, - "grad_norm": 2.739173324547336, - "learning_rate": 3.439593179516578e-06, - "loss": 0.8108, - "num_input_tokens_seen": 47017890, - "step": 2220 - }, - { - "epoch": 0.26705946010942105, - "grad_norm": 1.8765620181825116, - "learning_rate": 3.4390523198737524e-06, - "loss": 0.8069, - "num_input_tokens_seen": 47036770, - "step": 2221 - }, - { - "epoch": 0.2671797030000601, - "grad_norm": 1.7216434298191865, - "learning_rate": 3.4385112419278333e-06, - "loss": 0.7278, - "num_input_tokens_seen": 47057715, - "step": 2222 - }, - { - "epoch": 0.2672999458906992, - "grad_norm": 0.8408885734508484, - "learning_rate": 3.437969945760903e-06, - "loss": 0.6955, - "num_input_tokens_seen": 47115260, - "step": 2223 - }, - { - "epoch": 0.26742018878133833, - "grad_norm": 1.8580465936065493, - "learning_rate": 3.4374284314550755e-06, - "loss": 0.8932, - "num_input_tokens_seen": 47134020, - "step": 2224 - }, - { - "epoch": 0.2675404316719774, - "grad_norm": 2.1219419616278925, - "learning_rate": 3.436886699092498e-06, - "loss": 0.8051, - "num_input_tokens_seen": 47152255, - "step": 2225 - }, - { - "epoch": 0.2676606745626165, - "grad_norm": 2.8806147659039283, - "learning_rate": 3.4363447487553502e-06, - "loss": 0.7135, - "num_input_tokens_seen": 47165290, - "step": 2226 - }, - { - "epoch": 0.26778091745325555, - "grad_norm": 2.2812779444416376, - "learning_rate": 3.4358025805258455e-06, - "loss": 0.7813, - "num_input_tokens_seen": 47184715, - "step": 2227 - }, - { - "epoch": 0.26790116034389466, - "grad_norm": 3.3352759992147303, - "learning_rate": 3.435260194486232e-06, - "loss": 0.8294, - "num_input_tokens_seen": 47202405, - "step": 2228 - }, - { - "epoch": 0.2680214032345338, - "grad_norm": 2.047436263158215, - "learning_rate": 3.4347175907187875e-06, - "loss": 0.8114, - "num_input_tokens_seen": 47219115, - "step": 2229 - }, - { - "epoch": 0.26814164612517283, - "grad_norm": 1.9031646212382474, - "learning_rate": 3.4341747693058254e-06, - "loss": 0.869, - "num_input_tokens_seen": 47237310, - "step": 2230 - }, - { - "epoch": 0.26826188901581194, - "grad_norm": 1.815388871146807, - "learning_rate": 3.4336317303296916e-06, - "loss": 0.7626, - "num_input_tokens_seen": 47258005, - "step": 2231 - }, - { - "epoch": 0.26838213190645105, - "grad_norm": 2.5098328407837944, - "learning_rate": 3.4330884738727635e-06, - "loss": 0.7513, - "num_input_tokens_seen": 47275900, - "step": 2232 - }, - { - "epoch": 0.2685023747970901, - "grad_norm": 1.8592202056204403, - "learning_rate": 3.4325450000174535e-06, - "loss": 0.7088, - "num_input_tokens_seen": 47292260, - "step": 2233 - }, - { - "epoch": 0.2686226176877292, - "grad_norm": 3.6834760310652723, - "learning_rate": 3.4320013088462063e-06, - "loss": 0.7368, - "num_input_tokens_seen": 47309340, - "step": 2234 - }, - { - "epoch": 0.2687428605783683, - "grad_norm": 1.6239923813927342, - "learning_rate": 3.4314574004414987e-06, - "loss": 0.8172, - "num_input_tokens_seen": 47329455, - "step": 2235 - }, - { - "epoch": 0.2688631034690074, - "grad_norm": 1.094267471694779, - "learning_rate": 3.4309132748858424e-06, - "loss": 0.6914, - "num_input_tokens_seen": 47390165, - "step": 2236 - }, - { - "epoch": 0.2689833463596465, - "grad_norm": 1.7073026828010218, - "learning_rate": 3.430368932261779e-06, - "loss": 0.8366, - "num_input_tokens_seen": 47410240, - "step": 2237 - }, - { - "epoch": 0.2691035892502856, - "grad_norm": 2.2535789342823027, - "learning_rate": 3.429824372651886e-06, - "loss": 0.7468, - "num_input_tokens_seen": 47428110, - "step": 2238 - }, - { - "epoch": 0.26922383214092466, - "grad_norm": 2.156970536661827, - "learning_rate": 3.4292795961387732e-06, - "loss": 0.8373, - "num_input_tokens_seen": 47445730, - "step": 2239 - }, - { - "epoch": 0.26934407503156377, - "grad_norm": 2.8140608201981485, - "learning_rate": 3.4287346028050818e-06, - "loss": 0.8706, - "num_input_tokens_seen": 47461520, - "step": 2240 - }, - { - "epoch": 0.2694643179222028, - "grad_norm": 1.4800634493951534, - "learning_rate": 3.4281893927334866e-06, - "loss": 0.792, - "num_input_tokens_seen": 47481150, - "step": 2241 - }, - { - "epoch": 0.26958456081284193, - "grad_norm": 2.26960457523604, - "learning_rate": 3.4276439660066963e-06, - "loss": 0.7464, - "num_input_tokens_seen": 47500570, - "step": 2242 - }, - { - "epoch": 0.26970480370348104, - "grad_norm": 2.4117836098713807, - "learning_rate": 3.427098322707452e-06, - "loss": 0.8347, - "num_input_tokens_seen": 47516255, - "step": 2243 - }, - { - "epoch": 0.2698250465941201, - "grad_norm": 2.075858671628424, - "learning_rate": 3.426552462918526e-06, - "loss": 0.8938, - "num_input_tokens_seen": 47533910, - "step": 2244 - }, - { - "epoch": 0.2699452894847592, - "grad_norm": 2.564195606133789, - "learning_rate": 3.426006386722726e-06, - "loss": 0.7322, - "num_input_tokens_seen": 47551690, - "step": 2245 - }, - { - "epoch": 0.2700655323753983, - "grad_norm": 2.3469277653166585, - "learning_rate": 3.425460094202891e-06, - "loss": 0.9055, - "num_input_tokens_seen": 47569285, - "step": 2246 - }, - { - "epoch": 0.2701857752660374, - "grad_norm": 2.0713905260041887, - "learning_rate": 3.424913585441893e-06, - "loss": 0.8122, - "num_input_tokens_seen": 47586840, - "step": 2247 - }, - { - "epoch": 0.2703060181566765, - "grad_norm": 2.133451806300382, - "learning_rate": 3.424366860522637e-06, - "loss": 0.8697, - "num_input_tokens_seen": 47603585, - "step": 2248 - }, - { - "epoch": 0.2704262610473156, - "grad_norm": 2.8016176917746347, - "learning_rate": 3.423819919528061e-06, - "loss": 0.8336, - "num_input_tokens_seen": 47621390, - "step": 2249 - }, - { - "epoch": 0.27054650393795465, - "grad_norm": 1.7331452914084589, - "learning_rate": 3.4232727625411355e-06, - "loss": 0.7846, - "num_input_tokens_seen": 47640215, - "step": 2250 - }, - { - "epoch": 0.27066674682859376, - "grad_norm": 1.6610563637184759, - "learning_rate": 3.4227253896448626e-06, - "loss": 0.8614, - "num_input_tokens_seen": 47657795, - "step": 2251 - }, - { - "epoch": 0.2707869897192329, - "grad_norm": 2.272445659605438, - "learning_rate": 3.42217780092228e-06, - "loss": 0.8053, - "num_input_tokens_seen": 47675855, - "step": 2252 - }, - { - "epoch": 0.27090723260987193, - "grad_norm": 0.8459225567920692, - "learning_rate": 3.4216299964564554e-06, - "loss": 0.6326, - "num_input_tokens_seen": 47734195, - "step": 2253 - }, - { - "epoch": 0.27102747550051104, - "grad_norm": 1.864517718352496, - "learning_rate": 3.421081976330491e-06, - "loss": 0.8182, - "num_input_tokens_seen": 47752430, - "step": 2254 - }, - { - "epoch": 0.27114771839115015, - "grad_norm": 1.9946337360967077, - "learning_rate": 3.4205337406275207e-06, - "loss": 0.8606, - "num_input_tokens_seen": 47772270, - "step": 2255 - }, - { - "epoch": 0.2712679612817892, - "grad_norm": 2.5037745810711116, - "learning_rate": 3.419985289430711e-06, - "loss": 0.756, - "num_input_tokens_seen": 47788740, - "step": 2256 - }, - { - "epoch": 0.2713882041724283, - "grad_norm": 2.515454140819365, - "learning_rate": 3.419436622823262e-06, - "loss": 0.7843, - "num_input_tokens_seen": 47809180, - "step": 2257 - }, - { - "epoch": 0.27150844706306737, - "grad_norm": 1.6423051647653797, - "learning_rate": 3.4188877408884063e-06, - "loss": 0.7307, - "num_input_tokens_seen": 47829605, - "step": 2258 - }, - { - "epoch": 0.2716286899537065, - "grad_norm": 3.011537137280228, - "learning_rate": 3.4183386437094084e-06, - "loss": 0.6504, - "num_input_tokens_seen": 47845990, - "step": 2259 - }, - { - "epoch": 0.2717489328443456, - "grad_norm": 2.7906706769725194, - "learning_rate": 3.417789331369565e-06, - "loss": 0.8177, - "num_input_tokens_seen": 47861500, - "step": 2260 - }, - { - "epoch": 0.27186917573498465, - "grad_norm": 1.9057125778202275, - "learning_rate": 3.4172398039522088e-06, - "loss": 0.9029, - "num_input_tokens_seen": 47882505, - "step": 2261 - }, - { - "epoch": 0.27198941862562376, - "grad_norm": 1.9142659183243795, - "learning_rate": 3.4166900615407e-06, - "loss": 0.7845, - "num_input_tokens_seen": 47900140, - "step": 2262 - }, - { - "epoch": 0.27210966151626287, - "grad_norm": 2.1066841094433126, - "learning_rate": 3.416140104218436e-06, - "loss": 0.741, - "num_input_tokens_seen": 47919225, - "step": 2263 - }, - { - "epoch": 0.2722299044069019, - "grad_norm": 0.8934330366747231, - "learning_rate": 3.4155899320688437e-06, - "loss": 0.7417, - "num_input_tokens_seen": 47985020, - "step": 2264 - }, - { - "epoch": 0.27235014729754103, - "grad_norm": 2.0616460731994066, - "learning_rate": 3.415039545175384e-06, - "loss": 0.7285, - "num_input_tokens_seen": 48000465, - "step": 2265 - }, - { - "epoch": 0.27247039018818014, - "grad_norm": 2.328166864699395, - "learning_rate": 3.414488943621551e-06, - "loss": 0.6555, - "num_input_tokens_seen": 48018850, - "step": 2266 - }, - { - "epoch": 0.2725906330788192, - "grad_norm": 2.1410102835554357, - "learning_rate": 3.41393812749087e-06, - "loss": 0.7303, - "num_input_tokens_seen": 48036615, - "step": 2267 - }, - { - "epoch": 0.2727108759694583, - "grad_norm": 2.8251924852307178, - "learning_rate": 3.4133870968668984e-06, - "loss": 0.7224, - "num_input_tokens_seen": 48051135, - "step": 2268 - }, - { - "epoch": 0.2728311188600974, - "grad_norm": 1.6637437816768221, - "learning_rate": 3.412835851833229e-06, - "loss": 0.7771, - "num_input_tokens_seen": 48073050, - "step": 2269 - }, - { - "epoch": 0.2729513617507365, - "grad_norm": 1.862581230659382, - "learning_rate": 3.4122843924734834e-06, - "loss": 0.7712, - "num_input_tokens_seen": 48095070, - "step": 2270 - }, - { - "epoch": 0.2730716046413756, - "grad_norm": 2.0034830680973386, - "learning_rate": 3.411732718871319e-06, - "loss": 0.8828, - "num_input_tokens_seen": 48110630, - "step": 2271 - }, - { - "epoch": 0.27319184753201464, - "grad_norm": 1.464338193958209, - "learning_rate": 3.4111808311104227e-06, - "loss": 0.7769, - "num_input_tokens_seen": 48132665, - "step": 2272 - }, - { - "epoch": 0.27331209042265375, - "grad_norm": 1.7927893769858916, - "learning_rate": 3.4106287292745174e-06, - "loss": 0.6859, - "num_input_tokens_seen": 48153905, - "step": 2273 - }, - { - "epoch": 0.27343233331329286, - "grad_norm": 2.043283388861086, - "learning_rate": 3.4100764134473546e-06, - "loss": 0.8202, - "num_input_tokens_seen": 48172910, - "step": 2274 - }, - { - "epoch": 0.2735525762039319, - "grad_norm": 2.6610707435458765, - "learning_rate": 3.4095238837127215e-06, - "loss": 0.8544, - "num_input_tokens_seen": 48191770, - "step": 2275 - }, - { - "epoch": 0.27367281909457103, - "grad_norm": 1.9454266750925413, - "learning_rate": 3.4089711401544355e-06, - "loss": 0.7939, - "num_input_tokens_seen": 48209085, - "step": 2276 - }, - { - "epoch": 0.27379306198521014, - "grad_norm": 2.151313775388218, - "learning_rate": 3.4084181828563486e-06, - "loss": 0.671, - "num_input_tokens_seen": 48225525, - "step": 2277 - }, - { - "epoch": 0.2739133048758492, - "grad_norm": 1.9683967155289295, - "learning_rate": 3.4078650119023423e-06, - "loss": 0.7022, - "num_input_tokens_seen": 48243560, - "step": 2278 - }, - { - "epoch": 0.2740335477664883, - "grad_norm": 2.0107365978161567, - "learning_rate": 3.4073116273763337e-06, - "loss": 0.7442, - "num_input_tokens_seen": 48257725, - "step": 2279 - }, - { - "epoch": 0.2741537906571274, - "grad_norm": 2.291958059631235, - "learning_rate": 3.40675802936227e-06, - "loss": 0.8057, - "num_input_tokens_seen": 48278230, - "step": 2280 - }, - { - "epoch": 0.27427403354776647, - "grad_norm": 1.9577781658485498, - "learning_rate": 3.4062042179441318e-06, - "loss": 0.7076, - "num_input_tokens_seen": 48298420, - "step": 2281 - }, - { - "epoch": 0.2743942764384056, - "grad_norm": 1.8264070487480006, - "learning_rate": 3.4056501932059314e-06, - "loss": 0.8075, - "num_input_tokens_seen": 48316215, - "step": 2282 - }, - { - "epoch": 0.2745145193290447, - "grad_norm": 0.8532713687739912, - "learning_rate": 3.405095955231715e-06, - "loss": 0.6085, - "num_input_tokens_seen": 48367590, - "step": 2283 - }, - { - "epoch": 0.27463476221968375, - "grad_norm": 2.051499555812841, - "learning_rate": 3.4045415041055585e-06, - "loss": 0.9423, - "num_input_tokens_seen": 48382950, - "step": 2284 - }, - { - "epoch": 0.27475500511032286, - "grad_norm": 2.890748148251801, - "learning_rate": 3.403986839911573e-06, - "loss": 0.7982, - "num_input_tokens_seen": 48397310, - "step": 2285 - }, - { - "epoch": 0.27487524800096197, - "grad_norm": 1.8465120291999164, - "learning_rate": 3.4034319627339003e-06, - "loss": 0.7982, - "num_input_tokens_seen": 48413895, - "step": 2286 - }, - { - "epoch": 0.274995490891601, - "grad_norm": 2.4133902134408074, - "learning_rate": 3.402876872656715e-06, - "loss": 0.6887, - "num_input_tokens_seen": 48431935, - "step": 2287 - }, - { - "epoch": 0.27511573378224013, - "grad_norm": 2.9979038913290865, - "learning_rate": 3.402321569764223e-06, - "loss": 0.8912, - "num_input_tokens_seen": 48450960, - "step": 2288 - }, - { - "epoch": 0.2752359766728792, - "grad_norm": 1.7818168492005624, - "learning_rate": 3.4017660541406635e-06, - "loss": 0.8285, - "num_input_tokens_seen": 48466745, - "step": 2289 - }, - { - "epoch": 0.2753562195635183, - "grad_norm": 1.901667196276985, - "learning_rate": 3.4012103258703092e-06, - "loss": 0.7387, - "num_input_tokens_seen": 48485220, - "step": 2290 - }, - { - "epoch": 0.2754764624541574, - "grad_norm": 2.423705664961733, - "learning_rate": 3.4006543850374616e-06, - "loss": 0.8227, - "num_input_tokens_seen": 48499990, - "step": 2291 - }, - { - "epoch": 0.27559670534479647, - "grad_norm": 2.160714636846373, - "learning_rate": 3.4000982317264577e-06, - "loss": 0.7537, - "num_input_tokens_seen": 48516810, - "step": 2292 - }, - { - "epoch": 0.2757169482354356, - "grad_norm": 1.9161741555199356, - "learning_rate": 3.3995418660216657e-06, - "loss": 0.8733, - "num_input_tokens_seen": 48533985, - "step": 2293 - }, - { - "epoch": 0.2758371911260747, - "grad_norm": 2.316333046390821, - "learning_rate": 3.3989852880074848e-06, - "loss": 0.8086, - "num_input_tokens_seen": 48555135, - "step": 2294 - }, - { - "epoch": 0.27595743401671374, - "grad_norm": 0.9008967053694367, - "learning_rate": 3.398428497768348e-06, - "loss": 0.6403, - "num_input_tokens_seen": 48620025, - "step": 2295 - }, - { - "epoch": 0.27607767690735285, - "grad_norm": 1.8778933751956248, - "learning_rate": 3.3978714953887205e-06, - "loss": 0.7131, - "num_input_tokens_seen": 48639500, - "step": 2296 - }, - { - "epoch": 0.27619791979799196, - "grad_norm": 6.191129429766541, - "learning_rate": 3.397314280953098e-06, - "loss": 0.8588, - "num_input_tokens_seen": 48660045, - "step": 2297 - }, - { - "epoch": 0.276318162688631, - "grad_norm": 2.001912299228255, - "learning_rate": 3.3967568545460108e-06, - "loss": 0.7938, - "num_input_tokens_seen": 48679305, - "step": 2298 - }, - { - "epoch": 0.27643840557927013, - "grad_norm": 1.7700210410343649, - "learning_rate": 3.396199216252019e-06, - "loss": 0.7997, - "num_input_tokens_seen": 48697650, - "step": 2299 - }, - { - "epoch": 0.27655864846990924, - "grad_norm": 2.1802939960234893, - "learning_rate": 3.3956413661557156e-06, - "loss": 0.7199, - "num_input_tokens_seen": 48717545, - "step": 2300 - }, - { - "epoch": 0.2766788913605483, - "grad_norm": 2.81793463226604, - "learning_rate": 3.3950833043417273e-06, - "loss": 0.6572, - "num_input_tokens_seen": 48735410, - "step": 2301 - }, - { - "epoch": 0.2767991342511874, - "grad_norm": 3.328374460944523, - "learning_rate": 3.3945250308947105e-06, - "loss": 0.7133, - "num_input_tokens_seen": 48751435, - "step": 2302 - }, - { - "epoch": 0.2769193771418265, - "grad_norm": 1.3003075002858855, - "learning_rate": 3.3939665458993556e-06, - "loss": 0.7, - "num_input_tokens_seen": 48805575, - "step": 2303 - }, - { - "epoch": 0.27703962003246557, - "grad_norm": 2.744390874938876, - "learning_rate": 3.393407849440384e-06, - "loss": 0.7626, - "num_input_tokens_seen": 48824870, - "step": 2304 - }, - { - "epoch": 0.2771598629231047, - "grad_norm": 3.6795682436540846, - "learning_rate": 3.3928489416025495e-06, - "loss": 0.8048, - "num_input_tokens_seen": 48845435, - "step": 2305 - }, - { - "epoch": 0.27728010581374374, - "grad_norm": 2.203084394611633, - "learning_rate": 3.392289822470638e-06, - "loss": 0.7825, - "num_input_tokens_seen": 48863135, - "step": 2306 - }, - { - "epoch": 0.27740034870438285, - "grad_norm": 2.140315436788674, - "learning_rate": 3.3917304921294674e-06, - "loss": 0.7575, - "num_input_tokens_seen": 48881020, - "step": 2307 - }, - { - "epoch": 0.27752059159502196, - "grad_norm": 1.654651789632888, - "learning_rate": 3.3911709506638876e-06, - "loss": 0.8068, - "num_input_tokens_seen": 48900050, - "step": 2308 - }, - { - "epoch": 0.277640834485661, - "grad_norm": 2.103030776941744, - "learning_rate": 3.390611198158781e-06, - "loss": 0.8044, - "num_input_tokens_seen": 48917645, - "step": 2309 - }, - { - "epoch": 0.2777610773763001, - "grad_norm": 2.6317138466191543, - "learning_rate": 3.3900512346990612e-06, - "loss": 0.8928, - "num_input_tokens_seen": 48933355, - "step": 2310 - }, - { - "epoch": 0.27788132026693924, - "grad_norm": 2.1834016492938506, - "learning_rate": 3.389491060369674e-06, - "loss": 0.654, - "num_input_tokens_seen": 48958750, - "step": 2311 - }, - { - "epoch": 0.2780015631575783, - "grad_norm": 2.0215184062906264, - "learning_rate": 3.388930675255598e-06, - "loss": 0.8906, - "num_input_tokens_seen": 48978320, - "step": 2312 - }, - { - "epoch": 0.2781218060482174, - "grad_norm": 8.766957940392066, - "learning_rate": 3.388370079441843e-06, - "loss": 0.7838, - "num_input_tokens_seen": 48993555, - "step": 2313 - }, - { - "epoch": 0.2782420489388565, - "grad_norm": 2.0819072496483115, - "learning_rate": 3.3878092730134505e-06, - "loss": 0.9243, - "num_input_tokens_seen": 49011260, - "step": 2314 - }, - { - "epoch": 0.27836229182949557, - "grad_norm": 1.7454940400959307, - "learning_rate": 3.3872482560554947e-06, - "loss": 0.8007, - "num_input_tokens_seen": 49029755, - "step": 2315 - }, - { - "epoch": 0.2784825347201347, - "grad_norm": 0.8398309645351633, - "learning_rate": 3.386687028653082e-06, - "loss": 0.5924, - "num_input_tokens_seen": 49092320, - "step": 2316 - }, - { - "epoch": 0.2786027776107738, - "grad_norm": 2.0516560227825167, - "learning_rate": 3.386125590891349e-06, - "loss": 0.8418, - "num_input_tokens_seen": 49108915, - "step": 2317 - }, - { - "epoch": 0.27872302050141284, - "grad_norm": 2.195437433171907, - "learning_rate": 3.3855639428554657e-06, - "loss": 0.8304, - "num_input_tokens_seen": 49126165, - "step": 2318 - }, - { - "epoch": 0.27884326339205195, - "grad_norm": 2.0714147772337754, - "learning_rate": 3.385002084630635e-06, - "loss": 0.8106, - "num_input_tokens_seen": 49144855, - "step": 2319 - }, - { - "epoch": 0.278963506282691, - "grad_norm": 2.5695153849915977, - "learning_rate": 3.384440016302088e-06, - "loss": 0.8394, - "num_input_tokens_seen": 49163250, - "step": 2320 - }, - { - "epoch": 0.2790837491733301, - "grad_norm": 2.87457309783845, - "learning_rate": 3.3838777379550923e-06, - "loss": 0.6156, - "num_input_tokens_seen": 49182415, - "step": 2321 - }, - { - "epoch": 0.27920399206396923, - "grad_norm": 2.0630528174721885, - "learning_rate": 3.3833152496749434e-06, - "loss": 0.787, - "num_input_tokens_seen": 49200700, - "step": 2322 - }, - { - "epoch": 0.2793242349546083, - "grad_norm": 2.948427561828374, - "learning_rate": 3.3827525515469715e-06, - "loss": 0.8547, - "num_input_tokens_seen": 49215325, - "step": 2323 - }, - { - "epoch": 0.2794444778452474, - "grad_norm": 2.216959619145687, - "learning_rate": 3.3821896436565367e-06, - "loss": 0.7018, - "num_input_tokens_seen": 49234705, - "step": 2324 - }, - { - "epoch": 0.2795647207358865, - "grad_norm": 1.9411968826961243, - "learning_rate": 3.381626526089032e-06, - "loss": 0.6943, - "num_input_tokens_seen": 49253990, - "step": 2325 - }, - { - "epoch": 0.27968496362652556, - "grad_norm": 2.456723916220405, - "learning_rate": 3.3810631989298815e-06, - "loss": 0.7844, - "num_input_tokens_seen": 49273320, - "step": 2326 - }, - { - "epoch": 0.2798052065171647, - "grad_norm": 2.653837432607224, - "learning_rate": 3.3804996622645423e-06, - "loss": 0.8482, - "num_input_tokens_seen": 49291040, - "step": 2327 - }, - { - "epoch": 0.2799254494078038, - "grad_norm": 1.735249267645477, - "learning_rate": 3.3799359161785015e-06, - "loss": 0.882, - "num_input_tokens_seen": 49310410, - "step": 2328 - }, - { - "epoch": 0.28004569229844284, - "grad_norm": 1.6241011759720148, - "learning_rate": 3.3793719607572798e-06, - "loss": 0.8458, - "num_input_tokens_seen": 49331095, - "step": 2329 - }, - { - "epoch": 0.28016593518908195, - "grad_norm": 1.9786870178526337, - "learning_rate": 3.378807796086428e-06, - "loss": 0.7723, - "num_input_tokens_seen": 49353675, - "step": 2330 - }, - { - "epoch": 0.28028617807972106, - "grad_norm": 2.1043044457698215, - "learning_rate": 3.37824342225153e-06, - "loss": 0.7596, - "num_input_tokens_seen": 49369815, - "step": 2331 - }, - { - "epoch": 0.2804064209703601, - "grad_norm": 1.8449009584367904, - "learning_rate": 3.3776788393382006e-06, - "loss": 0.7674, - "num_input_tokens_seen": 49389015, - "step": 2332 - }, - { - "epoch": 0.2805266638609992, - "grad_norm": 2.3814322280863753, - "learning_rate": 3.3771140474320872e-06, - "loss": 0.7667, - "num_input_tokens_seen": 49408685, - "step": 2333 - }, - { - "epoch": 0.28064690675163834, - "grad_norm": 2.1504838537722177, - "learning_rate": 3.3765490466188664e-06, - "loss": 0.7865, - "num_input_tokens_seen": 49425805, - "step": 2334 - }, - { - "epoch": 0.2807671496422774, - "grad_norm": 2.6568371678436744, - "learning_rate": 3.375983836984251e-06, - "loss": 0.7336, - "num_input_tokens_seen": 49443600, - "step": 2335 - }, - { - "epoch": 0.2808873925329165, - "grad_norm": 2.1006114655073653, - "learning_rate": 3.3754184186139807e-06, - "loss": 0.7305, - "num_input_tokens_seen": 49462345, - "step": 2336 - }, - { - "epoch": 0.28100763542355556, - "grad_norm": 2.193732071946392, - "learning_rate": 3.374852791593831e-06, - "loss": 0.8277, - "num_input_tokens_seen": 49478265, - "step": 2337 - }, - { - "epoch": 0.28112787831419467, - "grad_norm": 4.2055493465037745, - "learning_rate": 3.3742869560096047e-06, - "loss": 0.5424, - "num_input_tokens_seen": 49496550, - "step": 2338 - }, - { - "epoch": 0.2812481212048338, - "grad_norm": 1.9337409973651492, - "learning_rate": 3.3737209119471405e-06, - "loss": 0.759, - "num_input_tokens_seen": 49512780, - "step": 2339 - }, - { - "epoch": 0.28136836409547283, - "grad_norm": 3.6805333674022838, - "learning_rate": 3.373154659492306e-06, - "loss": 0.6438, - "num_input_tokens_seen": 49530640, - "step": 2340 - }, - { - "epoch": 0.28148860698611194, - "grad_norm": 1.9683957509887322, - "learning_rate": 3.3725881987310016e-06, - "loss": 0.8383, - "num_input_tokens_seen": 49547895, - "step": 2341 - }, - { - "epoch": 0.28160884987675106, - "grad_norm": 2.2372899926232854, - "learning_rate": 3.372021529749159e-06, - "loss": 0.8735, - "num_input_tokens_seen": 49566675, - "step": 2342 - }, - { - "epoch": 0.2817290927673901, - "grad_norm": 1.7335200184222752, - "learning_rate": 3.3714546526327405e-06, - "loss": 0.9167, - "num_input_tokens_seen": 49584395, - "step": 2343 - }, - { - "epoch": 0.2818493356580292, - "grad_norm": 2.1711987886453206, - "learning_rate": 3.3708875674677423e-06, - "loss": 0.8815, - "num_input_tokens_seen": 49602090, - "step": 2344 - }, - { - "epoch": 0.28196957854866833, - "grad_norm": 2.051798326515003, - "learning_rate": 3.37032027434019e-06, - "loss": 0.825, - "num_input_tokens_seen": 49621330, - "step": 2345 - }, - { - "epoch": 0.2820898214393074, - "grad_norm": 1.8933125668368698, - "learning_rate": 3.369752773336141e-06, - "loss": 0.8329, - "num_input_tokens_seen": 49640530, - "step": 2346 - }, - { - "epoch": 0.2822100643299465, - "grad_norm": 1.593694445478885, - "learning_rate": 3.3691850645416864e-06, - "loss": 0.7748, - "num_input_tokens_seen": 49659960, - "step": 2347 - }, - { - "epoch": 0.2823303072205856, - "grad_norm": 4.588622696084842, - "learning_rate": 3.368617148042945e-06, - "loss": 0.8283, - "num_input_tokens_seen": 49677350, - "step": 2348 - }, - { - "epoch": 0.28245055011122466, - "grad_norm": 1.7289211220494785, - "learning_rate": 3.3680490239260707e-06, - "loss": 0.8434, - "num_input_tokens_seen": 49696065, - "step": 2349 - }, - { - "epoch": 0.2825707930018638, - "grad_norm": 1.5039041006252167, - "learning_rate": 3.3674806922772476e-06, - "loss": 0.8287, - "num_input_tokens_seen": 49716670, - "step": 2350 - }, - { - "epoch": 0.28269103589250283, - "grad_norm": 1.7468866401604508, - "learning_rate": 3.366912153182691e-06, - "loss": 0.7398, - "num_input_tokens_seen": 49737370, - "step": 2351 - }, - { - "epoch": 0.28281127878314194, - "grad_norm": 2.2261955112367935, - "learning_rate": 3.366343406728647e-06, - "loss": 0.8306, - "num_input_tokens_seen": 49756540, - "step": 2352 - }, - { - "epoch": 0.28293152167378105, - "grad_norm": 2.0896194097100445, - "learning_rate": 3.365774453001395e-06, - "loss": 0.6811, - "num_input_tokens_seen": 49775495, - "step": 2353 - }, - { - "epoch": 0.2830517645644201, - "grad_norm": 2.1200228273944766, - "learning_rate": 3.3652052920872437e-06, - "loss": 0.7157, - "num_input_tokens_seen": 49798080, - "step": 2354 - }, - { - "epoch": 0.2831720074550592, - "grad_norm": 2.9015622918028314, - "learning_rate": 3.3646359240725355e-06, - "loss": 0.8491, - "num_input_tokens_seen": 49816990, - "step": 2355 - }, - { - "epoch": 0.2832922503456983, - "grad_norm": 2.0546911706134647, - "learning_rate": 3.364066349043643e-06, - "loss": 0.6794, - "num_input_tokens_seen": 49837915, - "step": 2356 - }, - { - "epoch": 0.2834124932363374, - "grad_norm": 1.625026343171864, - "learning_rate": 3.3634965670869695e-06, - "loss": 0.8225, - "num_input_tokens_seen": 49854730, - "step": 2357 - }, - { - "epoch": 0.2835327361269765, - "grad_norm": 2.0339618866187283, - "learning_rate": 3.3629265782889506e-06, - "loss": 0.7495, - "num_input_tokens_seen": 49876275, - "step": 2358 - }, - { - "epoch": 0.2836529790176156, - "grad_norm": 2.331083866823628, - "learning_rate": 3.362356382736054e-06, - "loss": 0.7115, - "num_input_tokens_seen": 49896600, - "step": 2359 - }, - { - "epoch": 0.28377322190825466, - "grad_norm": 2.3154376182729304, - "learning_rate": 3.361785980514777e-06, - "loss": 0.9085, - "num_input_tokens_seen": 49912520, - "step": 2360 - }, - { - "epoch": 0.28389346479889377, - "grad_norm": 1.9159869804799416, - "learning_rate": 3.361215371711649e-06, - "loss": 0.7672, - "num_input_tokens_seen": 49931335, - "step": 2361 - }, - { - "epoch": 0.2840137076895329, - "grad_norm": 1.7415243885138814, - "learning_rate": 3.3606445564132326e-06, - "loss": 0.8238, - "num_input_tokens_seen": 49948350, - "step": 2362 - }, - { - "epoch": 0.28413395058017193, - "grad_norm": 2.1226279951787825, - "learning_rate": 3.360073534706118e-06, - "loss": 0.8088, - "num_input_tokens_seen": 49965225, - "step": 2363 - }, - { - "epoch": 0.28425419347081105, - "grad_norm": 2.048902717683217, - "learning_rate": 3.35950230667693e-06, - "loss": 0.759, - "num_input_tokens_seen": 49986640, - "step": 2364 - }, - { - "epoch": 0.28437443636145016, - "grad_norm": 2.180278951163515, - "learning_rate": 3.358930872412323e-06, - "loss": 0.8594, - "num_input_tokens_seen": 50003525, - "step": 2365 - }, - { - "epoch": 0.2844946792520892, - "grad_norm": 1.6277711045186845, - "learning_rate": 3.3583592319989825e-06, - "loss": 0.8023, - "num_input_tokens_seen": 50022615, - "step": 2366 - }, - { - "epoch": 0.2846149221427283, - "grad_norm": 2.995159725165637, - "learning_rate": 3.357787385523627e-06, - "loss": 0.6801, - "num_input_tokens_seen": 50043740, - "step": 2367 - }, - { - "epoch": 0.2847351650333674, - "grad_norm": 1.8263151665286457, - "learning_rate": 3.3572153330730048e-06, - "loss": 0.8348, - "num_input_tokens_seen": 50064555, - "step": 2368 - }, - { - "epoch": 0.2848554079240065, - "grad_norm": 0.8245052233677317, - "learning_rate": 3.3566430747338956e-06, - "loss": 0.6663, - "num_input_tokens_seen": 50119480, - "step": 2369 - }, - { - "epoch": 0.2849756508146456, - "grad_norm": 2.037816991470415, - "learning_rate": 3.35607061059311e-06, - "loss": 0.8651, - "num_input_tokens_seen": 50134130, - "step": 2370 - }, - { - "epoch": 0.28509589370528465, - "grad_norm": 2.0180644576480113, - "learning_rate": 3.3554979407374917e-06, - "loss": 0.7397, - "num_input_tokens_seen": 50155960, - "step": 2371 - }, - { - "epoch": 0.28521613659592376, - "grad_norm": 1.458449352781362, - "learning_rate": 3.354925065253913e-06, - "loss": 0.7357, - "num_input_tokens_seen": 50174775, - "step": 2372 - }, - { - "epoch": 0.2853363794865629, - "grad_norm": 1.8653316341988153, - "learning_rate": 3.3543519842292794e-06, - "loss": 0.8135, - "num_input_tokens_seen": 50194150, - "step": 2373 - }, - { - "epoch": 0.28545662237720193, - "grad_norm": 1.9161792419834263, - "learning_rate": 3.353778697750527e-06, - "loss": 0.8361, - "num_input_tokens_seen": 50212275, - "step": 2374 - }, - { - "epoch": 0.28557686526784104, - "grad_norm": 2.31139332373254, - "learning_rate": 3.3532052059046224e-06, - "loss": 0.886, - "num_input_tokens_seen": 50231105, - "step": 2375 - }, - { - "epoch": 0.28569710815848015, - "grad_norm": 2.03694748948585, - "learning_rate": 3.3526315087785637e-06, - "loss": 0.7238, - "num_input_tokens_seen": 50251940, - "step": 2376 - }, - { - "epoch": 0.2858173510491192, - "grad_norm": 1.9488155469221076, - "learning_rate": 3.3520576064593805e-06, - "loss": 0.8044, - "num_input_tokens_seen": 50271615, - "step": 2377 - }, - { - "epoch": 0.2859375939397583, - "grad_norm": 2.6390625272226833, - "learning_rate": 3.3514834990341337e-06, - "loss": 0.8176, - "num_input_tokens_seen": 50291660, - "step": 2378 - }, - { - "epoch": 0.2860578368303974, - "grad_norm": 3.4807822414807865, - "learning_rate": 3.3509091865899144e-06, - "loss": 0.9207, - "num_input_tokens_seen": 50306570, - "step": 2379 - }, - { - "epoch": 0.2861780797210365, - "grad_norm": 1.8557746554040935, - "learning_rate": 3.350334669213846e-06, - "loss": 0.6995, - "num_input_tokens_seen": 50323695, - "step": 2380 - }, - { - "epoch": 0.2862983226116756, - "grad_norm": 2.205574963023131, - "learning_rate": 3.3497599469930816e-06, - "loss": 0.765, - "num_input_tokens_seen": 50341625, - "step": 2381 - }, - { - "epoch": 0.28641856550231465, - "grad_norm": 2.8725603321002007, - "learning_rate": 3.349185020014807e-06, - "loss": 0.8266, - "num_input_tokens_seen": 50358610, - "step": 2382 - }, - { - "epoch": 0.28653880839295376, - "grad_norm": 4.057371987433365, - "learning_rate": 3.348609888366237e-06, - "loss": 0.7447, - "num_input_tokens_seen": 50377345, - "step": 2383 - }, - { - "epoch": 0.28665905128359287, - "grad_norm": 2.129213760912391, - "learning_rate": 3.348034552134619e-06, - "loss": 0.6367, - "num_input_tokens_seen": 50396470, - "step": 2384 - }, - { - "epoch": 0.2867792941742319, - "grad_norm": 1.8525430496598638, - "learning_rate": 3.3474590114072316e-06, - "loss": 0.8422, - "num_input_tokens_seen": 50414190, - "step": 2385 - }, - { - "epoch": 0.28689953706487104, - "grad_norm": 2.111382287681849, - "learning_rate": 3.3468832662713836e-06, - "loss": 0.8302, - "num_input_tokens_seen": 50432155, - "step": 2386 - }, - { - "epoch": 0.28701977995551015, - "grad_norm": 2.2048254544299914, - "learning_rate": 3.346307316814415e-06, - "loss": 0.8395, - "num_input_tokens_seen": 50447045, - "step": 2387 - }, - { - "epoch": 0.2871400228461492, - "grad_norm": 2.0161948321955636, - "learning_rate": 3.3457311631236965e-06, - "loss": 0.7584, - "num_input_tokens_seen": 50467750, - "step": 2388 - }, - { - "epoch": 0.2872602657367883, - "grad_norm": 1.6782248205404353, - "learning_rate": 3.345154805286631e-06, - "loss": 0.841, - "num_input_tokens_seen": 50487730, - "step": 2389 - }, - { - "epoch": 0.2873805086274274, - "grad_norm": 2.3343942253584467, - "learning_rate": 3.344578243390651e-06, - "loss": 0.7561, - "num_input_tokens_seen": 50503010, - "step": 2390 - }, - { - "epoch": 0.2875007515180665, - "grad_norm": 2.4883317336903685, - "learning_rate": 3.3440014775232206e-06, - "loss": 0.7791, - "num_input_tokens_seen": 50520785, - "step": 2391 - }, - { - "epoch": 0.2876209944087056, - "grad_norm": 1.8567321108212271, - "learning_rate": 3.343424507771834e-06, - "loss": 0.716, - "num_input_tokens_seen": 50538715, - "step": 2392 - }, - { - "epoch": 0.2877412372993447, - "grad_norm": 1.7237803187885317, - "learning_rate": 3.342847334224018e-06, - "loss": 0.8664, - "num_input_tokens_seen": 50555835, - "step": 2393 - }, - { - "epoch": 0.28786148018998375, - "grad_norm": 0.8601963083552077, - "learning_rate": 3.342269956967329e-06, - "loss": 0.6685, - "num_input_tokens_seen": 50617460, - "step": 2394 - }, - { - "epoch": 0.28798172308062286, - "grad_norm": 2.756407830338849, - "learning_rate": 3.341692376089355e-06, - "loss": 0.7159, - "num_input_tokens_seen": 50632735, - "step": 2395 - }, - { - "epoch": 0.288101965971262, - "grad_norm": 4.997115905569317, - "learning_rate": 3.3411145916777146e-06, - "loss": 0.8358, - "num_input_tokens_seen": 50646615, - "step": 2396 - }, - { - "epoch": 0.28822220886190103, - "grad_norm": 2.8799385138002234, - "learning_rate": 3.3405366038200566e-06, - "loss": 0.9075, - "num_input_tokens_seen": 50665270, - "step": 2397 - }, - { - "epoch": 0.28834245175254014, - "grad_norm": 3.1408771963981925, - "learning_rate": 3.3399584126040617e-06, - "loss": 0.8429, - "num_input_tokens_seen": 50684490, - "step": 2398 - }, - { - "epoch": 0.2884626946431792, - "grad_norm": 1.7716305417768738, - "learning_rate": 3.339380018117441e-06, - "loss": 0.8947, - "num_input_tokens_seen": 50705045, - "step": 2399 - }, - { - "epoch": 0.2885829375338183, - "grad_norm": 2.3509406794548253, - "learning_rate": 3.3388014204479366e-06, - "loss": 0.7867, - "num_input_tokens_seen": 50722570, - "step": 2400 - }, - { - "epoch": 0.2887031804244574, - "grad_norm": 1.9856856668262237, - "learning_rate": 3.338222619683321e-06, - "loss": 0.9074, - "num_input_tokens_seen": 50742255, - "step": 2401 - }, - { - "epoch": 0.2888234233150965, - "grad_norm": 2.0710029298337296, - "learning_rate": 3.337643615911398e-06, - "loss": 0.7392, - "num_input_tokens_seen": 50761600, - "step": 2402 - }, - { - "epoch": 0.2889436662057356, - "grad_norm": 2.271643134441523, - "learning_rate": 3.3370644092200026e-06, - "loss": 0.7879, - "num_input_tokens_seen": 50778595, - "step": 2403 - }, - { - "epoch": 0.2890639090963747, - "grad_norm": 1.8935822366119175, - "learning_rate": 3.3364849996969985e-06, - "loss": 0.7848, - "num_input_tokens_seen": 50798335, - "step": 2404 - }, - { - "epoch": 0.28918415198701375, - "grad_norm": 2.203306085437626, - "learning_rate": 3.335905387430283e-06, - "loss": 0.8455, - "num_input_tokens_seen": 50819490, - "step": 2405 - }, - { - "epoch": 0.28930439487765286, - "grad_norm": 2.4950887348299546, - "learning_rate": 3.335325572507782e-06, - "loss": 0.8191, - "num_input_tokens_seen": 50839710, - "step": 2406 - }, - { - "epoch": 0.28942463776829197, - "grad_norm": 1.6024700502263713, - "learning_rate": 3.3347455550174537e-06, - "loss": 0.7422, - "num_input_tokens_seen": 50858770, - "step": 2407 - }, - { - "epoch": 0.289544880658931, - "grad_norm": 2.1834084625158177, - "learning_rate": 3.3341653350472864e-06, - "loss": 0.684, - "num_input_tokens_seen": 50875320, - "step": 2408 - }, - { - "epoch": 0.28966512354957014, - "grad_norm": 2.7441756714514915, - "learning_rate": 3.333584912685298e-06, - "loss": 0.7012, - "num_input_tokens_seen": 50893660, - "step": 2409 - }, - { - "epoch": 0.28978536644020925, - "grad_norm": 0.9030873691811463, - "learning_rate": 3.3330042880195385e-06, - "loss": 0.5874, - "num_input_tokens_seen": 50947730, - "step": 2410 - }, - { - "epoch": 0.2899056093308483, - "grad_norm": 2.7922675411835867, - "learning_rate": 3.3324234611380888e-06, - "loss": 0.7797, - "num_input_tokens_seen": 50966180, - "step": 2411 - }, - { - "epoch": 0.2900258522214874, - "grad_norm": 1.7662829775399878, - "learning_rate": 3.3318424321290596e-06, - "loss": 0.8154, - "num_input_tokens_seen": 50985615, - "step": 2412 - }, - { - "epoch": 0.2901460951121265, - "grad_norm": 0.8971180930924121, - "learning_rate": 3.3312612010805917e-06, - "loss": 0.6452, - "num_input_tokens_seen": 51044910, - "step": 2413 - }, - { - "epoch": 0.2902663380027656, - "grad_norm": 1.8514742773590152, - "learning_rate": 3.330679768080858e-06, - "loss": 0.6922, - "num_input_tokens_seen": 51068515, - "step": 2414 - }, - { - "epoch": 0.2903865808934047, - "grad_norm": 2.396960076832458, - "learning_rate": 3.3300981332180627e-06, - "loss": 0.8371, - "num_input_tokens_seen": 51087440, - "step": 2415 - }, - { - "epoch": 0.29050682378404374, - "grad_norm": 2.0592319946698776, - "learning_rate": 3.3295162965804373e-06, - "loss": 0.8008, - "num_input_tokens_seen": 51105655, - "step": 2416 - }, - { - "epoch": 0.29062706667468285, - "grad_norm": 2.0730644262668476, - "learning_rate": 3.328934258256247e-06, - "loss": 0.7721, - "num_input_tokens_seen": 51123440, - "step": 2417 - }, - { - "epoch": 0.29074730956532197, - "grad_norm": 2.315997863697858, - "learning_rate": 3.3283520183337856e-06, - "loss": 0.6665, - "num_input_tokens_seen": 51142865, - "step": 2418 - }, - { - "epoch": 0.290867552455961, - "grad_norm": 1.6396967508811393, - "learning_rate": 3.32776957690138e-06, - "loss": 0.692, - "num_input_tokens_seen": 51162030, - "step": 2419 - }, - { - "epoch": 0.29098779534660013, - "grad_norm": 1.9017461653737526, - "learning_rate": 3.327186934047385e-06, - "loss": 0.7664, - "num_input_tokens_seen": 51180445, - "step": 2420 - }, - { - "epoch": 0.29110803823723924, - "grad_norm": 11.288210420598688, - "learning_rate": 3.3266040898601877e-06, - "loss": 0.6601, - "num_input_tokens_seen": 51198000, - "step": 2421 - }, - { - "epoch": 0.2912282811278783, - "grad_norm": 1.8089073991980655, - "learning_rate": 3.3260210444282045e-06, - "loss": 0.7775, - "num_input_tokens_seen": 51215675, - "step": 2422 - }, - { - "epoch": 0.2913485240185174, - "grad_norm": 2.067262619173759, - "learning_rate": 3.325437797839883e-06, - "loss": 0.7237, - "num_input_tokens_seen": 51233765, - "step": 2423 - }, - { - "epoch": 0.2914687669091565, - "grad_norm": 3.9177865054011183, - "learning_rate": 3.3248543501837015e-06, - "loss": 0.7518, - "num_input_tokens_seen": 51250690, - "step": 2424 - }, - { - "epoch": 0.2915890097997956, - "grad_norm": 2.1706685490162774, - "learning_rate": 3.3242707015481684e-06, - "loss": 0.7682, - "num_input_tokens_seen": 51270345, - "step": 2425 - }, - { - "epoch": 0.2917092526904347, - "grad_norm": 1.7188174916915615, - "learning_rate": 3.323686852021823e-06, - "loss": 0.8081, - "num_input_tokens_seen": 51287575, - "step": 2426 - }, - { - "epoch": 0.2918294955810738, - "grad_norm": 2.288009234157425, - "learning_rate": 3.323102801693235e-06, - "loss": 0.7929, - "num_input_tokens_seen": 51306060, - "step": 2427 - }, - { - "epoch": 0.29194973847171285, - "grad_norm": 2.153413574201045, - "learning_rate": 3.3225185506510025e-06, - "loss": 0.7951, - "num_input_tokens_seen": 51325090, - "step": 2428 - }, - { - "epoch": 0.29206998136235196, - "grad_norm": 1.8872514057508358, - "learning_rate": 3.3219340989837586e-06, - "loss": 0.8141, - "num_input_tokens_seen": 51344800, - "step": 2429 - }, - { - "epoch": 0.292190224252991, - "grad_norm": 1.9173651755054195, - "learning_rate": 3.3213494467801625e-06, - "loss": 0.8064, - "num_input_tokens_seen": 51363695, - "step": 2430 - }, - { - "epoch": 0.2923104671436301, - "grad_norm": 2.1108424370028094, - "learning_rate": 3.3207645941289063e-06, - "loss": 0.7172, - "num_input_tokens_seen": 51381760, - "step": 2431 - }, - { - "epoch": 0.29243071003426924, - "grad_norm": 3.879031149823903, - "learning_rate": 3.320179541118711e-06, - "loss": 0.8025, - "num_input_tokens_seen": 51403980, - "step": 2432 - }, - { - "epoch": 0.2925509529249083, - "grad_norm": 1.0909535952449185, - "learning_rate": 3.3195942878383293e-06, - "loss": 0.6435, - "num_input_tokens_seen": 51459800, - "step": 2433 - }, - { - "epoch": 0.2926711958155474, - "grad_norm": 1.7813342450566485, - "learning_rate": 3.319008834376543e-06, - "loss": 0.7814, - "num_input_tokens_seen": 51479210, - "step": 2434 - }, - { - "epoch": 0.2927914387061865, - "grad_norm": 2.4602465599161265, - "learning_rate": 3.3184231808221654e-06, - "loss": 0.8752, - "num_input_tokens_seen": 51493255, - "step": 2435 - }, - { - "epoch": 0.29291168159682557, - "grad_norm": 2.400940382068107, - "learning_rate": 3.3178373272640394e-06, - "loss": 0.6267, - "num_input_tokens_seen": 51512070, - "step": 2436 - }, - { - "epoch": 0.2930319244874647, - "grad_norm": 2.1780229300924576, - "learning_rate": 3.317251273791039e-06, - "loss": 0.8618, - "num_input_tokens_seen": 51529300, - "step": 2437 - }, - { - "epoch": 0.2931521673781038, - "grad_norm": 2.2260427300901813, - "learning_rate": 3.316665020492067e-06, - "loss": 0.882, - "num_input_tokens_seen": 51550190, - "step": 2438 - }, - { - "epoch": 0.29327241026874284, - "grad_norm": 1.627963780846525, - "learning_rate": 3.316078567456059e-06, - "loss": 0.8149, - "num_input_tokens_seen": 51567750, - "step": 2439 - }, - { - "epoch": 0.29339265315938196, - "grad_norm": 1.860293357065652, - "learning_rate": 3.3154919147719786e-06, - "loss": 0.7649, - "num_input_tokens_seen": 51588485, - "step": 2440 - }, - { - "epoch": 0.29351289605002107, - "grad_norm": 1.8099545327764344, - "learning_rate": 3.3149050625288206e-06, - "loss": 0.8714, - "num_input_tokens_seen": 51607585, - "step": 2441 - }, - { - "epoch": 0.2936331389406601, - "grad_norm": 1.740896376267365, - "learning_rate": 3.31431801081561e-06, - "loss": 0.8383, - "num_input_tokens_seen": 51626240, - "step": 2442 - }, - { - "epoch": 0.29375338183129923, - "grad_norm": 1.0780926283131673, - "learning_rate": 3.313730759721402e-06, - "loss": 0.6853, - "num_input_tokens_seen": 51688890, - "step": 2443 - }, - { - "epoch": 0.29387362472193834, - "grad_norm": 2.05148341379875, - "learning_rate": 3.313143309335282e-06, - "loss": 0.8552, - "num_input_tokens_seen": 51707100, - "step": 2444 - }, - { - "epoch": 0.2939938676125774, - "grad_norm": 1.7016368137711781, - "learning_rate": 3.3125556597463665e-06, - "loss": 0.8336, - "num_input_tokens_seen": 51726125, - "step": 2445 - }, - { - "epoch": 0.2941141105032165, - "grad_norm": 1.4523775894637847, - "learning_rate": 3.311967811043801e-06, - "loss": 0.6535, - "num_input_tokens_seen": 51747765, - "step": 2446 - }, - { - "epoch": 0.29423435339385556, - "grad_norm": 2.612657832494984, - "learning_rate": 3.3113797633167617e-06, - "loss": 0.8184, - "num_input_tokens_seen": 51765780, - "step": 2447 - }, - { - "epoch": 0.2943545962844947, - "grad_norm": 2.112544091494324, - "learning_rate": 3.310791516654455e-06, - "loss": 0.683, - "num_input_tokens_seen": 51782560, - "step": 2448 - }, - { - "epoch": 0.2944748391751338, - "grad_norm": 2.1895601178464603, - "learning_rate": 3.3102030711461177e-06, - "loss": 0.7939, - "num_input_tokens_seen": 51801855, - "step": 2449 - }, - { - "epoch": 0.29459508206577284, - "grad_norm": 2.035623488088594, - "learning_rate": 3.3096144268810156e-06, - "loss": 0.6792, - "num_input_tokens_seen": 51820335, - "step": 2450 - }, - { - "epoch": 0.29471532495641195, - "grad_norm": 2.5482851314735293, - "learning_rate": 3.3090255839484462e-06, - "loss": 0.7168, - "num_input_tokens_seen": 51838050, - "step": 2451 - }, - { - "epoch": 0.29483556784705106, - "grad_norm": 1.7241812729777966, - "learning_rate": 3.3084365424377366e-06, - "loss": 0.8518, - "num_input_tokens_seen": 51856535, - "step": 2452 - }, - { - "epoch": 0.2949558107376901, - "grad_norm": 0.7890756144280393, - "learning_rate": 3.307847302438245e-06, - "loss": 0.5799, - "num_input_tokens_seen": 51910235, - "step": 2453 - }, - { - "epoch": 0.2950760536283292, - "grad_norm": 2.339193335608346, - "learning_rate": 3.307257864039356e-06, - "loss": 0.7758, - "num_input_tokens_seen": 51927290, - "step": 2454 - }, - { - "epoch": 0.29519629651896834, - "grad_norm": 1.7898455396707815, - "learning_rate": 3.306668227330489e-06, - "loss": 0.7877, - "num_input_tokens_seen": 51944655, - "step": 2455 - }, - { - "epoch": 0.2953165394096074, - "grad_norm": 1.9943872322131644, - "learning_rate": 3.3060783924010904e-06, - "loss": 0.7724, - "num_input_tokens_seen": 51962300, - "step": 2456 - }, - { - "epoch": 0.2954367823002465, - "grad_norm": 2.1246258453013955, - "learning_rate": 3.3054883593406387e-06, - "loss": 0.8452, - "num_input_tokens_seen": 51976770, - "step": 2457 - }, - { - "epoch": 0.2955570251908856, - "grad_norm": 2.648904710154184, - "learning_rate": 3.3048981282386404e-06, - "loss": 0.6413, - "num_input_tokens_seen": 51997800, - "step": 2458 - }, - { - "epoch": 0.29567726808152467, - "grad_norm": 2.185750167176726, - "learning_rate": 3.304307699184634e-06, - "loss": 0.8218, - "num_input_tokens_seen": 52016110, - "step": 2459 - }, - { - "epoch": 0.2957975109721638, - "grad_norm": 1.5885380689557118, - "learning_rate": 3.3037170722681866e-06, - "loss": 0.7912, - "num_input_tokens_seen": 52036665, - "step": 2460 - }, - { - "epoch": 0.29591775386280283, - "grad_norm": 1.9911603431610132, - "learning_rate": 3.3031262475788956e-06, - "loss": 0.6869, - "num_input_tokens_seen": 52053325, - "step": 2461 - }, - { - "epoch": 0.29603799675344195, - "grad_norm": 1.990018127730373, - "learning_rate": 3.3025352252063897e-06, - "loss": 0.7368, - "num_input_tokens_seen": 52071740, - "step": 2462 - }, - { - "epoch": 0.29615823964408106, - "grad_norm": 2.485793276782081, - "learning_rate": 3.3019440052403252e-06, - "loss": 0.7478, - "num_input_tokens_seen": 52091325, - "step": 2463 - }, - { - "epoch": 0.2962784825347201, - "grad_norm": 2.474399644067521, - "learning_rate": 3.30135258777039e-06, - "loss": 0.7083, - "num_input_tokens_seen": 52110415, - "step": 2464 - }, - { - "epoch": 0.2963987254253592, - "grad_norm": 2.1437613636519677, - "learning_rate": 3.3007609728863024e-06, - "loss": 0.6958, - "num_input_tokens_seen": 52128225, - "step": 2465 - }, - { - "epoch": 0.29651896831599833, - "grad_norm": 1.7764703039792589, - "learning_rate": 3.300169160677809e-06, - "loss": 0.7265, - "num_input_tokens_seen": 52151860, - "step": 2466 - }, - { - "epoch": 0.2966392112066374, - "grad_norm": 2.4051179307053316, - "learning_rate": 3.299577151234688e-06, - "loss": 0.7712, - "num_input_tokens_seen": 52169930, - "step": 2467 - }, - { - "epoch": 0.2967594540972765, - "grad_norm": 2.5196428611488666, - "learning_rate": 3.298984944646746e-06, - "loss": 0.7293, - "num_input_tokens_seen": 52188330, - "step": 2468 - }, - { - "epoch": 0.2968796969879156, - "grad_norm": 1.9804512974836999, - "learning_rate": 3.298392541003822e-06, - "loss": 0.8051, - "num_input_tokens_seen": 52207455, - "step": 2469 - }, - { - "epoch": 0.29699993987855466, - "grad_norm": 1.6473123172237887, - "learning_rate": 3.2977999403957806e-06, - "loss": 0.8867, - "num_input_tokens_seen": 52225935, - "step": 2470 - }, - { - "epoch": 0.2971201827691938, - "grad_norm": 2.152949661252587, - "learning_rate": 3.2972071429125207e-06, - "loss": 0.6688, - "num_input_tokens_seen": 52246875, - "step": 2471 - }, - { - "epoch": 0.2972404256598329, - "grad_norm": 2.0907392195718306, - "learning_rate": 3.2966141486439682e-06, - "loss": 0.8814, - "num_input_tokens_seen": 52265785, - "step": 2472 - }, - { - "epoch": 0.29736066855047194, - "grad_norm": 3.0497037353033973, - "learning_rate": 3.29602095768008e-06, - "loss": 0.6507, - "num_input_tokens_seen": 52286020, - "step": 2473 - }, - { - "epoch": 0.29748091144111105, - "grad_norm": 1.7916149757883848, - "learning_rate": 3.2954275701108437e-06, - "loss": 0.6354, - "num_input_tokens_seen": 52306920, - "step": 2474 - }, - { - "epoch": 0.29760115433175016, - "grad_norm": 3.04305122713625, - "learning_rate": 3.294833986026275e-06, - "loss": 0.6865, - "num_input_tokens_seen": 52329880, - "step": 2475 - }, - { - "epoch": 0.2977213972223892, - "grad_norm": 1.911358301830498, - "learning_rate": 3.2942402055164197e-06, - "loss": 0.8544, - "num_input_tokens_seen": 52348235, - "step": 2476 - }, - { - "epoch": 0.2978416401130283, - "grad_norm": 3.4287036945148817, - "learning_rate": 3.2936462286713546e-06, - "loss": 0.7068, - "num_input_tokens_seen": 52366305, - "step": 2477 - }, - { - "epoch": 0.2979618830036674, - "grad_norm": 1.857681706163987, - "learning_rate": 3.2930520555811846e-06, - "loss": 0.7702, - "num_input_tokens_seen": 52385650, - "step": 2478 - }, - { - "epoch": 0.2980821258943065, - "grad_norm": 1.9187220620943757, - "learning_rate": 3.292457686336046e-06, - "loss": 0.7943, - "num_input_tokens_seen": 52404690, - "step": 2479 - }, - { - "epoch": 0.2982023687849456, - "grad_norm": 0.83889210324733, - "learning_rate": 3.291863121026105e-06, - "loss": 0.647, - "num_input_tokens_seen": 52468190, - "step": 2480 - }, - { - "epoch": 0.29832261167558466, - "grad_norm": 2.08143999539811, - "learning_rate": 3.291268359741555e-06, - "loss": 0.7573, - "num_input_tokens_seen": 52491995, - "step": 2481 - }, - { - "epoch": 0.29844285456622377, - "grad_norm": 2.0939528029122765, - "learning_rate": 3.2906734025726213e-06, - "loss": 0.7832, - "num_input_tokens_seen": 52510980, - "step": 2482 - }, - { - "epoch": 0.2985630974568629, - "grad_norm": 2.482796718990891, - "learning_rate": 3.290078249609559e-06, - "loss": 0.8749, - "num_input_tokens_seen": 52530120, - "step": 2483 - }, - { - "epoch": 0.29868334034750194, - "grad_norm": 2.287437119430872, - "learning_rate": 3.2894829009426514e-06, - "loss": 0.8733, - "num_input_tokens_seen": 52547675, - "step": 2484 - }, - { - "epoch": 0.29880358323814105, - "grad_norm": 2.0172152854300367, - "learning_rate": 3.288887356662213e-06, - "loss": 0.7711, - "num_input_tokens_seen": 52568730, - "step": 2485 - }, - { - "epoch": 0.29892382612878016, - "grad_norm": 0.9688866212572336, - "learning_rate": 3.288291616858588e-06, - "loss": 0.6127, - "num_input_tokens_seen": 52623840, - "step": 2486 - }, - { - "epoch": 0.2990440690194192, - "grad_norm": 1.751736459862644, - "learning_rate": 3.287695681622149e-06, - "loss": 0.7675, - "num_input_tokens_seen": 52642910, - "step": 2487 - }, - { - "epoch": 0.2991643119100583, - "grad_norm": 11.101667752029197, - "learning_rate": 3.2870995510432982e-06, - "loss": 0.8052, - "num_input_tokens_seen": 52661110, - "step": 2488 - }, - { - "epoch": 0.29928455480069743, - "grad_norm": 1.9221717110801153, - "learning_rate": 3.2865032252124697e-06, - "loss": 0.7636, - "num_input_tokens_seen": 52681345, - "step": 2489 - }, - { - "epoch": 0.2994047976913365, - "grad_norm": 1.604462359663118, - "learning_rate": 3.2859067042201243e-06, - "loss": 0.7684, - "num_input_tokens_seen": 52703105, - "step": 2490 - }, - { - "epoch": 0.2995250405819756, - "grad_norm": 2.015002945199685, - "learning_rate": 3.2853099881567544e-06, - "loss": 0.768, - "num_input_tokens_seen": 52721225, - "step": 2491 - }, - { - "epoch": 0.29964528347261465, - "grad_norm": 2.4415352218107262, - "learning_rate": 3.284713077112881e-06, - "loss": 0.7875, - "num_input_tokens_seen": 52740375, - "step": 2492 - }, - { - "epoch": 0.29976552636325376, - "grad_norm": 3.0967529356376633, - "learning_rate": 3.284115971179056e-06, - "loss": 0.8609, - "num_input_tokens_seen": 52754125, - "step": 2493 - }, - { - "epoch": 0.2998857692538929, - "grad_norm": 1.8933973354510039, - "learning_rate": 3.283518670445859e-06, - "loss": 0.7945, - "num_input_tokens_seen": 52771755, - "step": 2494 - }, - { - "epoch": 0.30000601214453193, - "grad_norm": 0.7803843287154192, - "learning_rate": 3.2829211750038995e-06, - "loss": 0.5716, - "num_input_tokens_seen": 52840105, - "step": 2495 - }, - { - "epoch": 0.30012625503517104, - "grad_norm": 1.8411967158197522, - "learning_rate": 3.2823234849438183e-06, - "loss": 0.8758, - "num_input_tokens_seen": 52857860, - "step": 2496 - }, - { - "epoch": 0.30024649792581015, - "grad_norm": 3.748644010109616, - "learning_rate": 3.281725600356284e-06, - "loss": 0.7408, - "num_input_tokens_seen": 52877955, - "step": 2497 - }, - { - "epoch": 0.3003667408164492, - "grad_norm": 2.0917554133314273, - "learning_rate": 3.281127521331995e-06, - "loss": 0.6641, - "num_input_tokens_seen": 52898855, - "step": 2498 - }, - { - "epoch": 0.3004869837070883, - "grad_norm": 0.8967966368909067, - "learning_rate": 3.2805292479616798e-06, - "loss": 0.6378, - "num_input_tokens_seen": 52957440, - "step": 2499 - }, - { - "epoch": 0.30060722659772743, - "grad_norm": 3.4630790154255675, - "learning_rate": 3.2799307803360955e-06, - "loss": 0.9137, - "num_input_tokens_seen": 52973090, - "step": 2500 - }, - { - "epoch": 0.3007274694883665, - "grad_norm": 1.4162701957226582, - "learning_rate": 3.27933211854603e-06, - "loss": 0.811, - "num_input_tokens_seen": 52991865, - "step": 2501 - }, - { - "epoch": 0.3008477123790056, - "grad_norm": 1.7011882807860528, - "learning_rate": 3.278733262682299e-06, - "loss": 0.8623, - "num_input_tokens_seen": 53009440, - "step": 2502 - }, - { - "epoch": 0.3009679552696447, - "grad_norm": 2.3751784450082543, - "learning_rate": 3.278134212835749e-06, - "loss": 0.8151, - "num_input_tokens_seen": 53028515, - "step": 2503 - }, - { - "epoch": 0.30108819816028376, - "grad_norm": 2.1668155791787616, - "learning_rate": 3.2775349690972547e-06, - "loss": 0.8051, - "num_input_tokens_seen": 53042385, - "step": 2504 - }, - { - "epoch": 0.30120844105092287, - "grad_norm": 0.9486798203615799, - "learning_rate": 3.276935531557722e-06, - "loss": 0.5637, - "num_input_tokens_seen": 53107325, - "step": 2505 - }, - { - "epoch": 0.301328683941562, - "grad_norm": 2.4167899835553324, - "learning_rate": 3.2763359003080833e-06, - "loss": 0.7956, - "num_input_tokens_seen": 53124000, - "step": 2506 - }, - { - "epoch": 0.30144892683220104, - "grad_norm": 0.9779399931882427, - "learning_rate": 3.2757360754393047e-06, - "loss": 0.6735, - "num_input_tokens_seen": 53187790, - "step": 2507 - }, - { - "epoch": 0.30156916972284015, - "grad_norm": 2.378923066630368, - "learning_rate": 3.2751360570423767e-06, - "loss": 0.637, - "num_input_tokens_seen": 53205895, - "step": 2508 - }, - { - "epoch": 0.3016894126134792, - "grad_norm": 2.431824990597903, - "learning_rate": 3.2745358452083236e-06, - "loss": 0.7553, - "num_input_tokens_seen": 53228515, - "step": 2509 - }, - { - "epoch": 0.3018096555041183, - "grad_norm": 1.337845004814104, - "learning_rate": 3.2739354400281955e-06, - "loss": 0.8118, - "num_input_tokens_seen": 53249455, - "step": 2510 - }, - { - "epoch": 0.3019298983947574, - "grad_norm": 0.9845911362696367, - "learning_rate": 3.2733348415930744e-06, - "loss": 0.6883, - "num_input_tokens_seen": 53311045, - "step": 2511 - }, - { - "epoch": 0.3020501412853965, - "grad_norm": 2.1019031194051676, - "learning_rate": 3.27273404999407e-06, - "loss": 0.814, - "num_input_tokens_seen": 53332985, - "step": 2512 - }, - { - "epoch": 0.3021703841760356, - "grad_norm": 0.8197240349656524, - "learning_rate": 3.272133065322322e-06, - "loss": 0.6264, - "num_input_tokens_seen": 53390975, - "step": 2513 - }, - { - "epoch": 0.3022906270666747, - "grad_norm": 1.7445627268185064, - "learning_rate": 3.271531887669e-06, - "loss": 0.7833, - "num_input_tokens_seen": 53410755, - "step": 2514 - }, - { - "epoch": 0.30241086995731375, - "grad_norm": 2.4318481507289484, - "learning_rate": 3.2709305171253015e-06, - "loss": 0.6357, - "num_input_tokens_seen": 53430595, - "step": 2515 - }, - { - "epoch": 0.30253111284795287, - "grad_norm": 1.8127750855080975, - "learning_rate": 3.270328953782453e-06, - "loss": 0.7777, - "num_input_tokens_seen": 53450115, - "step": 2516 - }, - { - "epoch": 0.302651355738592, - "grad_norm": 3.0239995682327114, - "learning_rate": 3.2697271977317137e-06, - "loss": 0.7878, - "num_input_tokens_seen": 53462600, - "step": 2517 - }, - { - "epoch": 0.30277159862923103, - "grad_norm": 1.7550830901960661, - "learning_rate": 3.269125249064367e-06, - "loss": 0.7742, - "num_input_tokens_seen": 53482015, - "step": 2518 - }, - { - "epoch": 0.30289184151987014, - "grad_norm": 6.035178215518512, - "learning_rate": 3.26852310787173e-06, - "loss": 0.8282, - "num_input_tokens_seen": 53501925, - "step": 2519 - }, - { - "epoch": 0.30301208441050925, - "grad_norm": 1.98512020886652, - "learning_rate": 3.267920774245145e-06, - "loss": 0.7412, - "num_input_tokens_seen": 53521050, - "step": 2520 - }, - { - "epoch": 0.3031323273011483, - "grad_norm": 3.2800418889949605, - "learning_rate": 3.267318248275988e-06, - "loss": 0.841, - "num_input_tokens_seen": 53539885, - "step": 2521 - }, - { - "epoch": 0.3032525701917874, - "grad_norm": 2.500616261269978, - "learning_rate": 3.266715530055659e-06, - "loss": 0.6561, - "num_input_tokens_seen": 53557755, - "step": 2522 - }, - { - "epoch": 0.30337281308242653, - "grad_norm": 1.7281600443888308, - "learning_rate": 3.2661126196755927e-06, - "loss": 0.8001, - "num_input_tokens_seen": 53576585, - "step": 2523 - }, - { - "epoch": 0.3034930559730656, - "grad_norm": 1.1425042884390375, - "learning_rate": 3.265509517227248e-06, - "loss": 0.6048, - "num_input_tokens_seen": 53633120, - "step": 2524 - }, - { - "epoch": 0.3036132988637047, - "grad_norm": 1.9615512724426913, - "learning_rate": 3.2649062228021154e-06, - "loss": 0.7983, - "num_input_tokens_seen": 53650690, - "step": 2525 - }, - { - "epoch": 0.30373354175434375, - "grad_norm": 2.1809817491609684, - "learning_rate": 3.2643027364917145e-06, - "loss": 0.7762, - "num_input_tokens_seen": 53670530, - "step": 2526 - }, - { - "epoch": 0.30385378464498286, - "grad_norm": 1.8760831990023663, - "learning_rate": 3.263699058387594e-06, - "loss": 0.8585, - "num_input_tokens_seen": 53687685, - "step": 2527 - }, - { - "epoch": 0.30397402753562197, - "grad_norm": 2.1256814571681844, - "learning_rate": 3.2630951885813315e-06, - "loss": 0.9042, - "num_input_tokens_seen": 53704800, - "step": 2528 - }, - { - "epoch": 0.304094270426261, - "grad_norm": 1.9710145323797088, - "learning_rate": 3.262491127164533e-06, - "loss": 0.7785, - "num_input_tokens_seen": 53723335, - "step": 2529 - }, - { - "epoch": 0.30421451331690014, - "grad_norm": 2.47045534128996, - "learning_rate": 3.2618868742288337e-06, - "loss": 0.7915, - "num_input_tokens_seen": 53739980, - "step": 2530 - }, - { - "epoch": 0.30433475620753925, - "grad_norm": 3.483552643808775, - "learning_rate": 3.261282429865899e-06, - "loss": 0.7268, - "num_input_tokens_seen": 53757705, - "step": 2531 - }, - { - "epoch": 0.3044549990981783, - "grad_norm": 1.8065887059284331, - "learning_rate": 3.2606777941674225e-06, - "loss": 0.723, - "num_input_tokens_seen": 53776080, - "step": 2532 - }, - { - "epoch": 0.3045752419888174, - "grad_norm": 2.155245326436034, - "learning_rate": 3.2600729672251276e-06, - "loss": 0.8364, - "num_input_tokens_seen": 53793515, - "step": 2533 - }, - { - "epoch": 0.3046954848794565, - "grad_norm": 2.7798033321988713, - "learning_rate": 3.259467949130765e-06, - "loss": 0.6544, - "num_input_tokens_seen": 53814645, - "step": 2534 - }, - { - "epoch": 0.3048157277700956, - "grad_norm": 3.329007330186855, - "learning_rate": 3.2588627399761164e-06, - "loss": 0.8325, - "num_input_tokens_seen": 53830360, - "step": 2535 - }, - { - "epoch": 0.3049359706607347, - "grad_norm": 1.6784768787855624, - "learning_rate": 3.2582573398529903e-06, - "loss": 0.7078, - "num_input_tokens_seen": 53847435, - "step": 2536 - }, - { - "epoch": 0.3050562135513738, - "grad_norm": 2.726315746049457, - "learning_rate": 3.2576517488532265e-06, - "loss": 0.7432, - "num_input_tokens_seen": 53863505, - "step": 2537 - }, - { - "epoch": 0.30517645644201286, - "grad_norm": 1.6952580132360224, - "learning_rate": 3.257045967068692e-06, - "loss": 0.8602, - "num_input_tokens_seen": 53882480, - "step": 2538 - }, - { - "epoch": 0.30529669933265197, - "grad_norm": 1.8713597780020776, - "learning_rate": 3.2564399945912848e-06, - "loss": 0.8181, - "num_input_tokens_seen": 53901990, - "step": 2539 - }, - { - "epoch": 0.305416942223291, - "grad_norm": 2.372692098960834, - "learning_rate": 3.2558338315129287e-06, - "loss": 0.8159, - "num_input_tokens_seen": 53919855, - "step": 2540 - }, - { - "epoch": 0.30553718511393013, - "grad_norm": 1.9393419318240728, - "learning_rate": 3.2552274779255785e-06, - "loss": 0.759, - "num_input_tokens_seen": 53940505, - "step": 2541 - }, - { - "epoch": 0.30565742800456924, - "grad_norm": 2.2690890857295574, - "learning_rate": 3.2546209339212184e-06, - "loss": 0.7682, - "num_input_tokens_seen": 53959245, - "step": 2542 - }, - { - "epoch": 0.3057776708952083, - "grad_norm": 1.8853416978386144, - "learning_rate": 3.25401419959186e-06, - "loss": 0.7641, - "num_input_tokens_seen": 53979575, - "step": 2543 - }, - { - "epoch": 0.3058979137858474, - "grad_norm": 1.9475239188249012, - "learning_rate": 3.253407275029545e-06, - "loss": 0.7596, - "num_input_tokens_seen": 53998200, - "step": 2544 - }, - { - "epoch": 0.3060181566764865, - "grad_norm": 2.1327806900429143, - "learning_rate": 3.2528001603263425e-06, - "loss": 0.7951, - "num_input_tokens_seen": 54019990, - "step": 2545 - }, - { - "epoch": 0.3061383995671256, - "grad_norm": 1.863831689282036, - "learning_rate": 3.2521928555743514e-06, - "loss": 0.8104, - "num_input_tokens_seen": 54037055, - "step": 2546 - }, - { - "epoch": 0.3062586424577647, - "grad_norm": 2.3708063763844365, - "learning_rate": 3.251585360865701e-06, - "loss": 0.6816, - "num_input_tokens_seen": 54054775, - "step": 2547 - }, - { - "epoch": 0.3063788853484038, - "grad_norm": 2.6478345952131472, - "learning_rate": 3.250977676292545e-06, - "loss": 0.7493, - "num_input_tokens_seen": 54072735, - "step": 2548 - }, - { - "epoch": 0.30649912823904285, - "grad_norm": 2.013453104655136, - "learning_rate": 3.2503698019470712e-06, - "loss": 0.7878, - "num_input_tokens_seen": 54088225, - "step": 2549 - }, - { - "epoch": 0.30661937112968196, - "grad_norm": 2.2244452127710765, - "learning_rate": 3.249761737921492e-06, - "loss": 0.7737, - "num_input_tokens_seen": 54104475, - "step": 2550 - }, - { - "epoch": 0.30673961402032107, - "grad_norm": 2.0166115706203906, - "learning_rate": 3.249153484308051e-06, - "loss": 0.7423, - "num_input_tokens_seen": 54122810, - "step": 2551 - }, - { - "epoch": 0.3068598569109601, - "grad_norm": 3.05913576240648, - "learning_rate": 3.2485450411990194e-06, - "loss": 0.7744, - "num_input_tokens_seen": 54141885, - "step": 2552 - }, - { - "epoch": 0.30698009980159924, - "grad_norm": 6.694322438531677, - "learning_rate": 3.2479364086866983e-06, - "loss": 0.8206, - "num_input_tokens_seen": 54161860, - "step": 2553 - }, - { - "epoch": 0.30710034269223835, - "grad_norm": 1.9947849475630965, - "learning_rate": 3.247327586863416e-06, - "loss": 0.8066, - "num_input_tokens_seen": 54182460, - "step": 2554 - }, - { - "epoch": 0.3072205855828774, - "grad_norm": 3.2180216038254907, - "learning_rate": 3.2467185758215304e-06, - "loss": 0.7784, - "num_input_tokens_seen": 54201920, - "step": 2555 - }, - { - "epoch": 0.3073408284735165, - "grad_norm": 2.6023250757146714, - "learning_rate": 3.246109375653428e-06, - "loss": 0.8481, - "num_input_tokens_seen": 54218405, - "step": 2556 - }, - { - "epoch": 0.30746107136415557, - "grad_norm": 1.9423469434797664, - "learning_rate": 3.2454999864515243e-06, - "loss": 0.7801, - "num_input_tokens_seen": 54237500, - "step": 2557 - }, - { - "epoch": 0.3075813142547947, - "grad_norm": 2.021568294685621, - "learning_rate": 3.244890408308263e-06, - "loss": 0.6856, - "num_input_tokens_seen": 54257925, - "step": 2558 - }, - { - "epoch": 0.3077015571454338, - "grad_norm": 3.0515948112502467, - "learning_rate": 3.2442806413161165e-06, - "loss": 0.6104, - "num_input_tokens_seen": 54277290, - "step": 2559 - }, - { - "epoch": 0.30782180003607285, - "grad_norm": 2.1490474422507195, - "learning_rate": 3.2436706855675856e-06, - "loss": 0.7711, - "num_input_tokens_seen": 54294410, - "step": 2560 - }, - { - "epoch": 0.30794204292671196, - "grad_norm": 3.010655875980436, - "learning_rate": 3.2430605411552012e-06, - "loss": 0.8026, - "num_input_tokens_seen": 54314245, - "step": 2561 - }, - { - "epoch": 0.30806228581735107, - "grad_norm": 0.8946030648815743, - "learning_rate": 3.2424502081715205e-06, - "loss": 0.7157, - "num_input_tokens_seen": 54377080, - "step": 2562 - }, - { - "epoch": 0.3081825287079901, - "grad_norm": 2.2138366673970027, - "learning_rate": 3.241839686709132e-06, - "loss": 0.7773, - "num_input_tokens_seen": 54397735, - "step": 2563 - }, - { - "epoch": 0.30830277159862923, - "grad_norm": 3.0243852354493845, - "learning_rate": 3.2412289768606495e-06, - "loss": 0.8195, - "num_input_tokens_seen": 54414025, - "step": 2564 - }, - { - "epoch": 0.30842301448926834, - "grad_norm": 1.6179158188721627, - "learning_rate": 3.240618078718718e-06, - "loss": 0.8168, - "num_input_tokens_seen": 54435205, - "step": 2565 - }, - { - "epoch": 0.3085432573799074, - "grad_norm": 2.0768916323215336, - "learning_rate": 3.240006992376011e-06, - "loss": 0.7389, - "num_input_tokens_seen": 54454550, - "step": 2566 - }, - { - "epoch": 0.3086635002705465, - "grad_norm": 2.473560702463057, - "learning_rate": 3.2393957179252284e-06, - "loss": 0.7532, - "num_input_tokens_seen": 54470805, - "step": 2567 - }, - { - "epoch": 0.3087837431611856, - "grad_norm": 2.591371038379765, - "learning_rate": 3.2387842554591016e-06, - "loss": 0.805, - "num_input_tokens_seen": 54491340, - "step": 2568 - }, - { - "epoch": 0.3089039860518247, - "grad_norm": 2.248663049410497, - "learning_rate": 3.238172605070388e-06, - "loss": 0.8656, - "num_input_tokens_seen": 54506475, - "step": 2569 - }, - { - "epoch": 0.3090242289424638, - "grad_norm": 2.407108476749192, - "learning_rate": 3.2375607668518745e-06, - "loss": 0.7811, - "num_input_tokens_seen": 54519230, - "step": 2570 - }, - { - "epoch": 0.30914447183310284, - "grad_norm": 2.1303557265605098, - "learning_rate": 3.236948740896377e-06, - "loss": 0.8958, - "num_input_tokens_seen": 54533750, - "step": 2571 - }, - { - "epoch": 0.30926471472374195, - "grad_norm": 1.3948547590926945, - "learning_rate": 3.2363365272967384e-06, - "loss": 0.836, - "num_input_tokens_seen": 54556040, - "step": 2572 - }, - { - "epoch": 0.30938495761438106, - "grad_norm": 2.564899293169109, - "learning_rate": 3.235724126145832e-06, - "loss": 0.8058, - "num_input_tokens_seen": 54571795, - "step": 2573 - }, - { - "epoch": 0.3095052005050201, - "grad_norm": 1.7378805506607162, - "learning_rate": 3.235111537536558e-06, - "loss": 0.7723, - "num_input_tokens_seen": 54592330, - "step": 2574 - }, - { - "epoch": 0.30962544339565923, - "grad_norm": 2.289403195162928, - "learning_rate": 3.2344987615618456e-06, - "loss": 0.8289, - "num_input_tokens_seen": 54611885, - "step": 2575 - }, - { - "epoch": 0.30974568628629834, - "grad_norm": 2.024184283736538, - "learning_rate": 3.2338857983146533e-06, - "loss": 0.7819, - "num_input_tokens_seen": 54633105, - "step": 2576 - }, - { - "epoch": 0.3098659291769374, - "grad_norm": 2.2308105918247305, - "learning_rate": 3.233272647887966e-06, - "loss": 0.7624, - "num_input_tokens_seen": 54651715, - "step": 2577 - }, - { - "epoch": 0.3099861720675765, - "grad_norm": 1.4902083919867717, - "learning_rate": 3.2326593103747985e-06, - "loss": 0.8881, - "num_input_tokens_seen": 54670450, - "step": 2578 - }, - { - "epoch": 0.3101064149582156, - "grad_norm": 2.287505695932403, - "learning_rate": 3.2320457858681936e-06, - "loss": 0.8442, - "num_input_tokens_seen": 54688560, - "step": 2579 - }, - { - "epoch": 0.31022665784885467, - "grad_norm": 2.482510842208662, - "learning_rate": 3.2314320744612228e-06, - "loss": 0.8551, - "num_input_tokens_seen": 54703580, - "step": 2580 - }, - { - "epoch": 0.3103469007394938, - "grad_norm": 1.9490418576007904, - "learning_rate": 3.2308181762469854e-06, - "loss": 0.7565, - "num_input_tokens_seen": 54721245, - "step": 2581 - }, - { - "epoch": 0.3104671436301329, - "grad_norm": 8.407402748575718, - "learning_rate": 3.230204091318609e-06, - "loss": 0.7869, - "num_input_tokens_seen": 54741505, - "step": 2582 - }, - { - "epoch": 0.31058738652077195, - "grad_norm": 2.0418739515989945, - "learning_rate": 3.2295898197692503e-06, - "loss": 0.8416, - "num_input_tokens_seen": 54760185, - "step": 2583 - }, - { - "epoch": 0.31070762941141106, - "grad_norm": 1.6905667288903958, - "learning_rate": 3.2289753616920935e-06, - "loss": 0.7874, - "num_input_tokens_seen": 54780925, - "step": 2584 - }, - { - "epoch": 0.31082787230205017, - "grad_norm": 3.1509423699755432, - "learning_rate": 3.228360717180352e-06, - "loss": 0.795, - "num_input_tokens_seen": 54798170, - "step": 2585 - }, - { - "epoch": 0.3109481151926892, - "grad_norm": 0.8804646717585519, - "learning_rate": 3.227745886327266e-06, - "loss": 0.6421, - "num_input_tokens_seen": 54856585, - "step": 2586 - }, - { - "epoch": 0.31106835808332833, - "grad_norm": 0.8218910219540508, - "learning_rate": 3.227130869226105e-06, - "loss": 0.5873, - "num_input_tokens_seen": 54913465, - "step": 2587 - }, - { - "epoch": 0.3111886009739674, - "grad_norm": 2.663260104192462, - "learning_rate": 3.226515665970167e-06, - "loss": 0.8147, - "num_input_tokens_seen": 54930725, - "step": 2588 - }, - { - "epoch": 0.3113088438646065, - "grad_norm": 2.3708506181821973, - "learning_rate": 3.225900276652777e-06, - "loss": 0.8579, - "num_input_tokens_seen": 54947220, - "step": 2589 - }, - { - "epoch": 0.3114290867552456, - "grad_norm": 1.7211735780761468, - "learning_rate": 3.2252847013672906e-06, - "loss": 0.753, - "num_input_tokens_seen": 54969600, - "step": 2590 - }, - { - "epoch": 0.31154932964588467, - "grad_norm": 2.3995555681460847, - "learning_rate": 3.224668940207089e-06, - "loss": 0.7624, - "num_input_tokens_seen": 54988305, - "step": 2591 - }, - { - "epoch": 0.3116695725365238, - "grad_norm": 2.493748193692999, - "learning_rate": 3.2240529932655828e-06, - "loss": 0.8646, - "num_input_tokens_seen": 55007290, - "step": 2592 - }, - { - "epoch": 0.3117898154271629, - "grad_norm": 3.5062824869753424, - "learning_rate": 3.223436860636211e-06, - "loss": 0.8855, - "num_input_tokens_seen": 55022645, - "step": 2593 - }, - { - "epoch": 0.31191005831780194, - "grad_norm": 1.7783212237888943, - "learning_rate": 3.22282054241244e-06, - "loss": 0.7327, - "num_input_tokens_seen": 55045520, - "step": 2594 - }, - { - "epoch": 0.31203030120844105, - "grad_norm": 7.757792925429537, - "learning_rate": 3.222204038687765e-06, - "loss": 0.74, - "num_input_tokens_seen": 55058375, - "step": 2595 - }, - { - "epoch": 0.31215054409908016, - "grad_norm": 1.510014610148318, - "learning_rate": 3.221587349555709e-06, - "loss": 0.8743, - "num_input_tokens_seen": 55078355, - "step": 2596 - }, - { - "epoch": 0.3122707869897192, - "grad_norm": 2.004749260195451, - "learning_rate": 3.2209704751098236e-06, - "loss": 0.685, - "num_input_tokens_seen": 55097105, - "step": 2597 - }, - { - "epoch": 0.31239102988035833, - "grad_norm": 2.870185452203878, - "learning_rate": 3.2203534154436875e-06, - "loss": 0.8292, - "num_input_tokens_seen": 55111180, - "step": 2598 - }, - { - "epoch": 0.31251127277099744, - "grad_norm": 2.723044984494789, - "learning_rate": 3.219736170650909e-06, - "loss": 0.7569, - "num_input_tokens_seen": 55131655, - "step": 2599 - }, - { - "epoch": 0.3126315156616365, - "grad_norm": 3.0518605470923363, - "learning_rate": 3.2191187408251228e-06, - "loss": 0.8437, - "num_input_tokens_seen": 55148535, - "step": 2600 - }, - { - "epoch": 0.3127517585522756, - "grad_norm": 2.2688737869722426, - "learning_rate": 3.218501126059993e-06, - "loss": 0.7781, - "num_input_tokens_seen": 55163650, - "step": 2601 - }, - { - "epoch": 0.31287200144291466, - "grad_norm": 2.886450743997347, - "learning_rate": 3.2178833264492116e-06, - "loss": 0.8054, - "num_input_tokens_seen": 55182075, - "step": 2602 - }, - { - "epoch": 0.31299224433355377, - "grad_norm": 1.9127437455544876, - "learning_rate": 3.217265342086498e-06, - "loss": 0.7585, - "num_input_tokens_seen": 55202285, - "step": 2603 - }, - { - "epoch": 0.3131124872241929, - "grad_norm": 2.3684801627086416, - "learning_rate": 3.216647173065599e-06, - "loss": 0.7293, - "num_input_tokens_seen": 55217470, - "step": 2604 - }, - { - "epoch": 0.31323273011483194, - "grad_norm": 3.034930382059138, - "learning_rate": 3.216028819480292e-06, - "loss": 0.7337, - "num_input_tokens_seen": 55238530, - "step": 2605 - }, - { - "epoch": 0.31335297300547105, - "grad_norm": 2.298032041945607, - "learning_rate": 3.2154102814243793e-06, - "loss": 0.7669, - "num_input_tokens_seen": 55257390, - "step": 2606 - }, - { - "epoch": 0.31347321589611016, - "grad_norm": 4.813548964592026, - "learning_rate": 3.2147915589916937e-06, - "loss": 0.671, - "num_input_tokens_seen": 55278670, - "step": 2607 - }, - { - "epoch": 0.3135934587867492, - "grad_norm": 2.156926740136252, - "learning_rate": 3.2141726522760938e-06, - "loss": 0.8229, - "num_input_tokens_seen": 55296450, - "step": 2608 - }, - { - "epoch": 0.3137137016773883, - "grad_norm": 0.7859195132797734, - "learning_rate": 3.2135535613714693e-06, - "loss": 0.5573, - "num_input_tokens_seen": 55359905, - "step": 2609 - }, - { - "epoch": 0.31383394456802743, - "grad_norm": 2.8678079467420643, - "learning_rate": 3.212934286371733e-06, - "loss": 0.9567, - "num_input_tokens_seen": 55376335, - "step": 2610 - }, - { - "epoch": 0.3139541874586665, - "grad_norm": 2.9905998009294628, - "learning_rate": 3.2123148273708304e-06, - "loss": 0.8356, - "num_input_tokens_seen": 55396245, - "step": 2611 - }, - { - "epoch": 0.3140744303493056, - "grad_norm": 2.2035238407951168, - "learning_rate": 3.211695184462733e-06, - "loss": 0.7571, - "num_input_tokens_seen": 55417140, - "step": 2612 - }, - { - "epoch": 0.3141946732399447, - "grad_norm": 0.8666997879792196, - "learning_rate": 3.2110753577414388e-06, - "loss": 0.6396, - "num_input_tokens_seen": 55478440, - "step": 2613 - }, - { - "epoch": 0.31431491613058377, - "grad_norm": 2.3540436886758336, - "learning_rate": 3.2104553473009753e-06, - "loss": 0.7882, - "num_input_tokens_seen": 55496280, - "step": 2614 - }, - { - "epoch": 0.3144351590212229, - "grad_norm": 2.1637888882836487, - "learning_rate": 3.209835153235399e-06, - "loss": 0.672, - "num_input_tokens_seen": 55517555, - "step": 2615 - }, - { - "epoch": 0.314555401911862, - "grad_norm": 2.1392589128444257, - "learning_rate": 3.2092147756387916e-06, - "loss": 0.6819, - "num_input_tokens_seen": 55537600, - "step": 2616 - }, - { - "epoch": 0.31467564480250104, - "grad_norm": 2.253962196281221, - "learning_rate": 3.208594214605264e-06, - "loss": 0.8303, - "num_input_tokens_seen": 55555865, - "step": 2617 - }, - { - "epoch": 0.31479588769314015, - "grad_norm": 2.1776750893003722, - "learning_rate": 3.2079734702289553e-06, - "loss": 0.7799, - "num_input_tokens_seen": 55574480, - "step": 2618 - }, - { - "epoch": 0.3149161305837792, - "grad_norm": 0.8662034544883165, - "learning_rate": 3.207352542604031e-06, - "loss": 0.6405, - "num_input_tokens_seen": 55636535, - "step": 2619 - }, - { - "epoch": 0.3150363734744183, - "grad_norm": 2.0336266554056017, - "learning_rate": 3.2067314318246864e-06, - "loss": 0.7775, - "num_input_tokens_seen": 55656970, - "step": 2620 - }, - { - "epoch": 0.31515661636505743, - "grad_norm": 2.3024180578021176, - "learning_rate": 3.206110137985143e-06, - "loss": 0.7704, - "num_input_tokens_seen": 55676895, - "step": 2621 - }, - { - "epoch": 0.3152768592556965, - "grad_norm": 2.9483211351038663, - "learning_rate": 3.2054886611796505e-06, - "loss": 0.9147, - "num_input_tokens_seen": 55695610, - "step": 2622 - }, - { - "epoch": 0.3153971021463356, - "grad_norm": 0.9808980100879209, - "learning_rate": 3.204867001502487e-06, - "loss": 0.6881, - "num_input_tokens_seen": 55753985, - "step": 2623 - }, - { - "epoch": 0.3155173450369747, - "grad_norm": 1.9068825281597623, - "learning_rate": 3.2042451590479567e-06, - "loss": 0.8052, - "num_input_tokens_seen": 55774220, - "step": 2624 - }, - { - "epoch": 0.31563758792761376, - "grad_norm": 1.6909574445999993, - "learning_rate": 3.203623133910394e-06, - "loss": 0.8541, - "num_input_tokens_seen": 55792245, - "step": 2625 - }, - { - "epoch": 0.31575783081825287, - "grad_norm": 2.2947670621289595, - "learning_rate": 3.203000926184158e-06, - "loss": 0.7688, - "num_input_tokens_seen": 55810890, - "step": 2626 - }, - { - "epoch": 0.315878073708892, - "grad_norm": 1.8833738170149354, - "learning_rate": 3.202378535963639e-06, - "loss": 0.7689, - "num_input_tokens_seen": 55831525, - "step": 2627 - }, - { - "epoch": 0.31599831659953104, - "grad_norm": 1.5875752667451526, - "learning_rate": 3.2017559633432512e-06, - "loss": 0.8321, - "num_input_tokens_seen": 55850875, - "step": 2628 - }, - { - "epoch": 0.31611855949017015, - "grad_norm": 1.848815010166794, - "learning_rate": 3.2011332084174398e-06, - "loss": 0.6571, - "num_input_tokens_seen": 55871465, - "step": 2629 - }, - { - "epoch": 0.31623880238080926, - "grad_norm": 1.6679081573850048, - "learning_rate": 3.2005102712806756e-06, - "loss": 0.8888, - "num_input_tokens_seen": 55890015, - "step": 2630 - }, - { - "epoch": 0.3163590452714483, - "grad_norm": 2.630733947119322, - "learning_rate": 3.1998871520274575e-06, - "loss": 0.7225, - "num_input_tokens_seen": 55905070, - "step": 2631 - }, - { - "epoch": 0.3164792881620874, - "grad_norm": 1.9738616773715132, - "learning_rate": 3.199263850752312e-06, - "loss": 0.8435, - "num_input_tokens_seen": 55925625, - "step": 2632 - }, - { - "epoch": 0.31659953105272653, - "grad_norm": 2.31469742893676, - "learning_rate": 3.198640367549795e-06, - "loss": 0.8622, - "num_input_tokens_seen": 55944240, - "step": 2633 - }, - { - "epoch": 0.3167197739433656, - "grad_norm": 1.9467912583415112, - "learning_rate": 3.198016702514487e-06, - "loss": 0.8582, - "num_input_tokens_seen": 55964240, - "step": 2634 - }, - { - "epoch": 0.3168400168340047, - "grad_norm": 1.983358707807838, - "learning_rate": 3.1973928557409977e-06, - "loss": 0.8415, - "num_input_tokens_seen": 55982000, - "step": 2635 - }, - { - "epoch": 0.31696025972464376, - "grad_norm": 2.3048669051762034, - "learning_rate": 3.1967688273239636e-06, - "loss": 0.6992, - "num_input_tokens_seen": 56001525, - "step": 2636 - }, - { - "epoch": 0.31708050261528287, - "grad_norm": 2.55983154032076, - "learning_rate": 3.1961446173580503e-06, - "loss": 0.8111, - "num_input_tokens_seen": 56018185, - "step": 2637 - }, - { - "epoch": 0.317200745505922, - "grad_norm": 1.961665145226088, - "learning_rate": 3.1955202259379502e-06, - "loss": 0.7676, - "num_input_tokens_seen": 56039635, - "step": 2638 - }, - { - "epoch": 0.31732098839656103, - "grad_norm": 1.9511712643348056, - "learning_rate": 3.194895653158381e-06, - "loss": 0.8116, - "num_input_tokens_seen": 56058295, - "step": 2639 - }, - { - "epoch": 0.31744123128720014, - "grad_norm": 0.7836212932774443, - "learning_rate": 3.194270899114093e-06, - "loss": 0.5941, - "num_input_tokens_seen": 56123810, - "step": 2640 - }, - { - "epoch": 0.31756147417783925, - "grad_norm": 1.7294662182718141, - "learning_rate": 3.1936459638998575e-06, - "loss": 0.8195, - "num_input_tokens_seen": 56141145, - "step": 2641 - }, - { - "epoch": 0.3176817170684783, - "grad_norm": 1.9321852015725185, - "learning_rate": 3.193020847610479e-06, - "loss": 0.8276, - "num_input_tokens_seen": 56161185, - "step": 2642 - }, - { - "epoch": 0.3178019599591174, - "grad_norm": 2.6795087348816193, - "learning_rate": 3.1923955503407875e-06, - "loss": 0.7154, - "num_input_tokens_seen": 56178855, - "step": 2643 - }, - { - "epoch": 0.31792220284975653, - "grad_norm": 1.9949795294196624, - "learning_rate": 3.191770072185638e-06, - "loss": 0.7671, - "num_input_tokens_seen": 56195570, - "step": 2644 - }, - { - "epoch": 0.3180424457403956, - "grad_norm": 2.807456405073622, - "learning_rate": 3.1911444132399165e-06, - "loss": 0.7292, - "num_input_tokens_seen": 56211860, - "step": 2645 - }, - { - "epoch": 0.3181626886310347, - "grad_norm": 2.153447572373795, - "learning_rate": 3.190518573598534e-06, - "loss": 0.8698, - "num_input_tokens_seen": 56228185, - "step": 2646 - }, - { - "epoch": 0.3182829315216738, - "grad_norm": 1.5792194095603191, - "learning_rate": 3.1898925533564308e-06, - "loss": 0.7718, - "num_input_tokens_seen": 56249375, - "step": 2647 - }, - { - "epoch": 0.31840317441231286, - "grad_norm": 8.325502809464872, - "learning_rate": 3.1892663526085733e-06, - "loss": 0.6427, - "num_input_tokens_seen": 56267470, - "step": 2648 - }, - { - "epoch": 0.31852341730295197, - "grad_norm": 0.7764030664179364, - "learning_rate": 3.188639971449956e-06, - "loss": 0.5965, - "num_input_tokens_seen": 56333240, - "step": 2649 - }, - { - "epoch": 0.318643660193591, - "grad_norm": 2.1659826537997584, - "learning_rate": 3.1880134099755995e-06, - "loss": 0.7244, - "num_input_tokens_seen": 56352595, - "step": 2650 - }, - { - "epoch": 0.31876390308423014, - "grad_norm": 2.0736932254822813, - "learning_rate": 3.1873866682805535e-06, - "loss": 0.6956, - "num_input_tokens_seen": 56373010, - "step": 2651 - }, - { - "epoch": 0.31888414597486925, - "grad_norm": 1.8221722209252493, - "learning_rate": 3.186759746459894e-06, - "loss": 0.8844, - "num_input_tokens_seen": 56391840, - "step": 2652 - }, - { - "epoch": 0.3190043888655083, - "grad_norm": 1.8086018648329212, - "learning_rate": 3.186132644608725e-06, - "loss": 0.7903, - "num_input_tokens_seen": 56410300, - "step": 2653 - }, - { - "epoch": 0.3191246317561474, - "grad_norm": 6.54584810830888, - "learning_rate": 3.1855053628221763e-06, - "loss": 0.712, - "num_input_tokens_seen": 56429275, - "step": 2654 - }, - { - "epoch": 0.3192448746467865, - "grad_norm": 2.3628032075870515, - "learning_rate": 3.184877901195407e-06, - "loss": 0.8966, - "num_input_tokens_seen": 56445690, - "step": 2655 - }, - { - "epoch": 0.3193651175374256, - "grad_norm": 0.8533714042087319, - "learning_rate": 3.184250259823602e-06, - "loss": 0.6629, - "num_input_tokens_seen": 56507940, - "step": 2656 - }, - { - "epoch": 0.3194853604280647, - "grad_norm": 2.403070835149598, - "learning_rate": 3.1836224388019744e-06, - "loss": 0.8079, - "num_input_tokens_seen": 56522950, - "step": 2657 - }, - { - "epoch": 0.3196056033187038, - "grad_norm": 2.069428387188427, - "learning_rate": 3.1829944382257633e-06, - "loss": 0.7631, - "num_input_tokens_seen": 56540800, - "step": 2658 - }, - { - "epoch": 0.31972584620934286, - "grad_norm": 2.30651719670016, - "learning_rate": 3.1823662581902373e-06, - "loss": 0.823, - "num_input_tokens_seen": 56558205, - "step": 2659 - }, - { - "epoch": 0.31984608909998197, - "grad_norm": 2.6735213260650372, - "learning_rate": 3.1817378987906896e-06, - "loss": 0.7462, - "num_input_tokens_seen": 56577430, - "step": 2660 - }, - { - "epoch": 0.3199663319906211, - "grad_norm": 4.34219838385865, - "learning_rate": 3.181109360122442e-06, - "loss": 0.7931, - "num_input_tokens_seen": 56594740, - "step": 2661 - }, - { - "epoch": 0.32008657488126013, - "grad_norm": 2.2130761536745718, - "learning_rate": 3.180480642280844e-06, - "loss": 0.7815, - "num_input_tokens_seen": 56611595, - "step": 2662 - }, - { - "epoch": 0.32020681777189924, - "grad_norm": 1.567823631727516, - "learning_rate": 3.1798517453612714e-06, - "loss": 0.7243, - "num_input_tokens_seen": 56631120, - "step": 2663 - }, - { - "epoch": 0.32032706066253835, - "grad_norm": 1.9712893537711884, - "learning_rate": 3.1792226694591265e-06, - "loss": 0.749, - "num_input_tokens_seen": 56652225, - "step": 2664 - }, - { - "epoch": 0.3204473035531774, - "grad_norm": 1.7701556814905914, - "learning_rate": 3.178593414669841e-06, - "loss": 0.8008, - "num_input_tokens_seen": 56670530, - "step": 2665 - }, - { - "epoch": 0.3205675464438165, - "grad_norm": 2.2098971223214807, - "learning_rate": 3.1779639810888707e-06, - "loss": 0.702, - "num_input_tokens_seen": 56689845, - "step": 2666 - }, - { - "epoch": 0.3206877893344556, - "grad_norm": 1.7246063743039204, - "learning_rate": 3.1773343688117017e-06, - "loss": 0.7581, - "num_input_tokens_seen": 56710475, - "step": 2667 - }, - { - "epoch": 0.3208080322250947, - "grad_norm": 2.178756099863218, - "learning_rate": 3.1767045779338445e-06, - "loss": 0.8376, - "num_input_tokens_seen": 56727855, - "step": 2668 - }, - { - "epoch": 0.3209282751157338, - "grad_norm": 2.3564056461653387, - "learning_rate": 3.176074608550839e-06, - "loss": 0.9053, - "num_input_tokens_seen": 56743395, - "step": 2669 - }, - { - "epoch": 0.32104851800637285, - "grad_norm": 2.470435478527772, - "learning_rate": 3.17544446075825e-06, - "loss": 0.8199, - "num_input_tokens_seen": 56762280, - "step": 2670 - }, - { - "epoch": 0.32116876089701196, - "grad_norm": 1.5967077732301016, - "learning_rate": 3.174814134651671e-06, - "loss": 0.7087, - "num_input_tokens_seen": 56784550, - "step": 2671 - }, - { - "epoch": 0.3212890037876511, - "grad_norm": 1.6741571286119414, - "learning_rate": 3.1741836303267215e-06, - "loss": 0.8023, - "num_input_tokens_seen": 56803805, - "step": 2672 - }, - { - "epoch": 0.32140924667829013, - "grad_norm": 1.7881467202150503, - "learning_rate": 3.1735529478790496e-06, - "loss": 0.753, - "num_input_tokens_seen": 56821515, - "step": 2673 - }, - { - "epoch": 0.32152948956892924, - "grad_norm": 2.0331375900292885, - "learning_rate": 3.1729220874043277e-06, - "loss": 0.7954, - "num_input_tokens_seen": 56843495, - "step": 2674 - }, - { - "epoch": 0.32164973245956835, - "grad_norm": 0.7922160094215054, - "learning_rate": 3.172291048998259e-06, - "loss": 0.5831, - "num_input_tokens_seen": 56903575, - "step": 2675 - }, - { - "epoch": 0.3217699753502074, - "grad_norm": 2.0327577553204743, - "learning_rate": 3.1716598327565694e-06, - "loss": 0.8011, - "num_input_tokens_seen": 56922935, - "step": 2676 - }, - { - "epoch": 0.3218902182408465, - "grad_norm": 1.4745353666774943, - "learning_rate": 3.171028438775015e-06, - "loss": 0.8375, - "num_input_tokens_seen": 56941850, - "step": 2677 - }, - { - "epoch": 0.3220104611314856, - "grad_norm": 6.3525311680619705, - "learning_rate": 3.170396867149377e-06, - "loss": 0.8444, - "num_input_tokens_seen": 56959575, - "step": 2678 - }, - { - "epoch": 0.3221307040221247, - "grad_norm": 2.071384362681274, - "learning_rate": 3.1697651179754657e-06, - "loss": 0.8522, - "num_input_tokens_seen": 56977955, - "step": 2679 - }, - { - "epoch": 0.3222509469127638, - "grad_norm": 1.8745238138653095, - "learning_rate": 3.169133191349115e-06, - "loss": 0.7322, - "num_input_tokens_seen": 57000245, - "step": 2680 - }, - { - "epoch": 0.32237118980340285, - "grad_norm": 2.139584523458616, - "learning_rate": 3.1685010873661898e-06, - "loss": 0.8389, - "num_input_tokens_seen": 57019140, - "step": 2681 - }, - { - "epoch": 0.32249143269404196, - "grad_norm": 2.426915584321933, - "learning_rate": 3.167868806122578e-06, - "loss": 0.7983, - "num_input_tokens_seen": 57037910, - "step": 2682 - }, - { - "epoch": 0.32261167558468107, - "grad_norm": 1.8521185395363011, - "learning_rate": 3.1672363477141968e-06, - "loss": 0.6615, - "num_input_tokens_seen": 57056925, - "step": 2683 - }, - { - "epoch": 0.3227319184753201, - "grad_norm": 2.2596497437861354, - "learning_rate": 3.16660371223699e-06, - "loss": 0.8503, - "num_input_tokens_seen": 57077305, - "step": 2684 - }, - { - "epoch": 0.32285216136595923, - "grad_norm": 2.0402950920581726, - "learning_rate": 3.1659708997869278e-06, - "loss": 0.8573, - "num_input_tokens_seen": 57094940, - "step": 2685 - }, - { - "epoch": 0.32297240425659834, - "grad_norm": 1.652098663387129, - "learning_rate": 3.1653379104600067e-06, - "loss": 0.7417, - "num_input_tokens_seen": 57114805, - "step": 2686 - }, - { - "epoch": 0.3230926471472374, - "grad_norm": 1.9652968726869164, - "learning_rate": 3.1647047443522516e-06, - "loss": 0.6963, - "num_input_tokens_seen": 57135330, - "step": 2687 - }, - { - "epoch": 0.3232128900378765, - "grad_norm": 1.639736119044479, - "learning_rate": 3.164071401559713e-06, - "loss": 0.806, - "num_input_tokens_seen": 57152450, - "step": 2688 - }, - { - "epoch": 0.3233331329285156, - "grad_norm": 1.819350762997877, - "learning_rate": 3.1634378821784678e-06, - "loss": 0.7048, - "num_input_tokens_seen": 57172385, - "step": 2689 - }, - { - "epoch": 0.3234533758191547, - "grad_norm": 6.549150051219702, - "learning_rate": 3.1628041863046208e-06, - "loss": 0.7366, - "num_input_tokens_seen": 57189520, - "step": 2690 - }, - { - "epoch": 0.3235736187097938, - "grad_norm": 2.0620647112057653, - "learning_rate": 3.162170314034304e-06, - "loss": 0.908, - "num_input_tokens_seen": 57206655, - "step": 2691 - }, - { - "epoch": 0.3236938616004329, - "grad_norm": 1.6422682829267043, - "learning_rate": 3.1615362654636738e-06, - "loss": 0.7972, - "num_input_tokens_seen": 57227115, - "step": 2692 - }, - { - "epoch": 0.32381410449107195, - "grad_norm": 2.9180452439611364, - "learning_rate": 3.1609020406889163e-06, - "loss": 0.8648, - "num_input_tokens_seen": 57244270, - "step": 2693 - }, - { - "epoch": 0.32393434738171106, - "grad_norm": 2.147408519500242, - "learning_rate": 3.1602676398062416e-06, - "loss": 0.8423, - "num_input_tokens_seen": 57262900, - "step": 2694 - }, - { - "epoch": 0.3240545902723502, - "grad_norm": 2.8768352107211665, - "learning_rate": 3.1596330629118886e-06, - "loss": 0.6135, - "num_input_tokens_seen": 57282590, - "step": 2695 - }, - { - "epoch": 0.32417483316298923, - "grad_norm": 2.1293200529911225, - "learning_rate": 3.158998310102122e-06, - "loss": 0.7291, - "num_input_tokens_seen": 57300940, - "step": 2696 - }, - { - "epoch": 0.32429507605362834, - "grad_norm": 2.025354213499799, - "learning_rate": 3.1583633814732337e-06, - "loss": 0.8374, - "num_input_tokens_seen": 57320180, - "step": 2697 - }, - { - "epoch": 0.3244153189442674, - "grad_norm": 2.6121707740462736, - "learning_rate": 3.157728277121541e-06, - "loss": 0.7132, - "num_input_tokens_seen": 57338075, - "step": 2698 - }, - { - "epoch": 0.3245355618349065, - "grad_norm": 2.6456435470681865, - "learning_rate": 3.1570929971433897e-06, - "loss": 0.7804, - "num_input_tokens_seen": 57353580, - "step": 2699 - }, - { - "epoch": 0.3246558047255456, - "grad_norm": 1.9632892797007728, - "learning_rate": 3.1564575416351504e-06, - "loss": 0.8239, - "num_input_tokens_seen": 57372000, - "step": 2700 - }, - { - "epoch": 0.32477604761618467, - "grad_norm": 2.5675351813695864, - "learning_rate": 3.1558219106932215e-06, - "loss": 0.7435, - "num_input_tokens_seen": 57391135, - "step": 2701 - }, - { - "epoch": 0.3248962905068238, - "grad_norm": 1.6371075018688064, - "learning_rate": 3.155186104414027e-06, - "loss": 0.8532, - "num_input_tokens_seen": 57410490, - "step": 2702 - }, - { - "epoch": 0.3250165333974629, - "grad_norm": 1.7372905081356436, - "learning_rate": 3.15455012289402e-06, - "loss": 0.7671, - "num_input_tokens_seen": 57429855, - "step": 2703 - }, - { - "epoch": 0.32513677628810195, - "grad_norm": 1.7288619320609986, - "learning_rate": 3.153913966229677e-06, - "loss": 0.8342, - "num_input_tokens_seen": 57448695, - "step": 2704 - }, - { - "epoch": 0.32525701917874106, - "grad_norm": 0.8349752911309698, - "learning_rate": 3.1532776345175027e-06, - "loss": 0.5158, - "num_input_tokens_seen": 57513560, - "step": 2705 - }, - { - "epoch": 0.32537726206938017, - "grad_norm": 2.0010251145225255, - "learning_rate": 3.152641127854028e-06, - "loss": 0.7826, - "num_input_tokens_seen": 57531710, - "step": 2706 - }, - { - "epoch": 0.3254975049600192, - "grad_norm": 2.3279306915581137, - "learning_rate": 3.1520044463358116e-06, - "loss": 0.8108, - "num_input_tokens_seen": 57548160, - "step": 2707 - }, - { - "epoch": 0.32561774785065833, - "grad_norm": 1.6226381097249127, - "learning_rate": 3.1513675900594354e-06, - "loss": 0.7943, - "num_input_tokens_seen": 57566305, - "step": 2708 - }, - { - "epoch": 0.32573799074129745, - "grad_norm": 2.298219233863723, - "learning_rate": 3.1507305591215117e-06, - "loss": 0.8636, - "num_input_tokens_seen": 57583935, - "step": 2709 - }, - { - "epoch": 0.3258582336319365, - "grad_norm": 0.727161206180856, - "learning_rate": 3.150093353618677e-06, - "loss": 0.5815, - "num_input_tokens_seen": 57648385, - "step": 2710 - }, - { - "epoch": 0.3259784765225756, - "grad_norm": 3.163989019330951, - "learning_rate": 3.149455973647596e-06, - "loss": 0.8708, - "num_input_tokens_seen": 57666165, - "step": 2711 - }, - { - "epoch": 0.32609871941321467, - "grad_norm": 2.176791539098969, - "learning_rate": 3.1488184193049563e-06, - "loss": 0.7658, - "num_input_tokens_seen": 57685420, - "step": 2712 - }, - { - "epoch": 0.3262189623038538, - "grad_norm": 1.7633302677195908, - "learning_rate": 3.1481806906874767e-06, - "loss": 0.7272, - "num_input_tokens_seen": 57706450, - "step": 2713 - }, - { - "epoch": 0.3263392051944929, - "grad_norm": 2.1733410738226047, - "learning_rate": 3.147542787891899e-06, - "loss": 0.8722, - "num_input_tokens_seen": 57725515, - "step": 2714 - }, - { - "epoch": 0.32645944808513194, - "grad_norm": 2.247417633752184, - "learning_rate": 3.1469047110149926e-06, - "loss": 0.7485, - "num_input_tokens_seen": 57743975, - "step": 2715 - }, - { - "epoch": 0.32657969097577105, - "grad_norm": 1.8267752316455788, - "learning_rate": 3.1462664601535537e-06, - "loss": 0.8442, - "num_input_tokens_seen": 57763405, - "step": 2716 - }, - { - "epoch": 0.32669993386641016, - "grad_norm": 1.8744550556417996, - "learning_rate": 3.145628035404404e-06, - "loss": 0.7952, - "num_input_tokens_seen": 57782325, - "step": 2717 - }, - { - "epoch": 0.3268201767570492, - "grad_norm": 0.8716495105232366, - "learning_rate": 3.1449894368643922e-06, - "loss": 0.6015, - "num_input_tokens_seen": 57844360, - "step": 2718 - }, - { - "epoch": 0.32694041964768833, - "grad_norm": 1.5618477147636627, - "learning_rate": 3.1443506646303934e-06, - "loss": 0.7182, - "num_input_tokens_seen": 57865380, - "step": 2719 - }, - { - "epoch": 0.32706066253832744, - "grad_norm": 2.7422734548256553, - "learning_rate": 3.1437117187993086e-06, - "loss": 0.6703, - "num_input_tokens_seen": 57887420, - "step": 2720 - }, - { - "epoch": 0.3271809054289665, - "grad_norm": 1.6234747529148854, - "learning_rate": 3.143072599468065e-06, - "loss": 0.7907, - "num_input_tokens_seen": 57906965, - "step": 2721 - }, - { - "epoch": 0.3273011483196056, - "grad_norm": 1.6064520349385707, - "learning_rate": 3.1424333067336174e-06, - "loss": 0.7516, - "num_input_tokens_seen": 57929450, - "step": 2722 - }, - { - "epoch": 0.3274213912102447, - "grad_norm": 1.8748318849504562, - "learning_rate": 3.141793840692945e-06, - "loss": 0.7659, - "num_input_tokens_seen": 57949920, - "step": 2723 - }, - { - "epoch": 0.32754163410088377, - "grad_norm": 2.1334997271929996, - "learning_rate": 3.1411542014430553e-06, - "loss": 0.6101, - "num_input_tokens_seen": 57970720, - "step": 2724 - }, - { - "epoch": 0.3276618769915229, - "grad_norm": 1.7573760131612186, - "learning_rate": 3.1405143890809804e-06, - "loss": 0.8196, - "num_input_tokens_seen": 57989735, - "step": 2725 - }, - { - "epoch": 0.327782119882162, - "grad_norm": 4.459579605018711, - "learning_rate": 3.1398744037037796e-06, - "loss": 0.7013, - "num_input_tokens_seen": 58008790, - "step": 2726 - }, - { - "epoch": 0.32790236277280105, - "grad_norm": 1.9377888080148526, - "learning_rate": 3.139234245408538e-06, - "loss": 0.832, - "num_input_tokens_seen": 58027390, - "step": 2727 - }, - { - "epoch": 0.32802260566344016, - "grad_norm": 1.4230882857102058, - "learning_rate": 3.1385939142923666e-06, - "loss": 0.7622, - "num_input_tokens_seen": 58049500, - "step": 2728 - }, - { - "epoch": 0.3281428485540792, - "grad_norm": 2.328767722205342, - "learning_rate": 3.137953410452405e-06, - "loss": 0.7796, - "num_input_tokens_seen": 58069490, - "step": 2729 - }, - { - "epoch": 0.3282630914447183, - "grad_norm": 2.0193660226844967, - "learning_rate": 3.137312733985814e-06, - "loss": 0.7459, - "num_input_tokens_seen": 58091810, - "step": 2730 - }, - { - "epoch": 0.32838333433535744, - "grad_norm": 1.909184868042113, - "learning_rate": 3.136671884989787e-06, - "loss": 0.7432, - "num_input_tokens_seen": 58111440, - "step": 2731 - }, - { - "epoch": 0.3285035772259965, - "grad_norm": 3.250984534581054, - "learning_rate": 3.1360308635615383e-06, - "loss": 0.8685, - "num_input_tokens_seen": 58129700, - "step": 2732 - }, - { - "epoch": 0.3286238201166356, - "grad_norm": 1.8013503046713124, - "learning_rate": 3.135389669798311e-06, - "loss": 0.7823, - "num_input_tokens_seen": 58147480, - "step": 2733 - }, - { - "epoch": 0.3287440630072747, - "grad_norm": 1.8713527684415947, - "learning_rate": 3.134748303797373e-06, - "loss": 0.7975, - "num_input_tokens_seen": 58164570, - "step": 2734 - }, - { - "epoch": 0.32886430589791377, - "grad_norm": 2.6957095076688375, - "learning_rate": 3.1341067656560203e-06, - "loss": 0.804, - "num_input_tokens_seen": 58182135, - "step": 2735 - }, - { - "epoch": 0.3289845487885529, - "grad_norm": 2.741633372466873, - "learning_rate": 3.133465055471572e-06, - "loss": 0.8631, - "num_input_tokens_seen": 58201640, - "step": 2736 - }, - { - "epoch": 0.329104791679192, - "grad_norm": 3.47478827457579, - "learning_rate": 3.1328231733413767e-06, - "loss": 0.6489, - "num_input_tokens_seen": 58218000, - "step": 2737 - }, - { - "epoch": 0.32922503456983104, - "grad_norm": 2.4625185237992993, - "learning_rate": 3.1321811193628067e-06, - "loss": 0.9008, - "num_input_tokens_seen": 58235865, - "step": 2738 - }, - { - "epoch": 0.32934527746047015, - "grad_norm": 2.501407061504325, - "learning_rate": 3.131538893633261e-06, - "loss": 0.7063, - "num_input_tokens_seen": 58255145, - "step": 2739 - }, - { - "epoch": 0.32946552035110926, - "grad_norm": 2.1514585083941826, - "learning_rate": 3.1308964962501648e-06, - "loss": 0.7775, - "num_input_tokens_seen": 58274690, - "step": 2740 - }, - { - "epoch": 0.3295857632417483, - "grad_norm": 2.5706237545260247, - "learning_rate": 3.1302539273109693e-06, - "loss": 0.8631, - "num_input_tokens_seen": 58291235, - "step": 2741 - }, - { - "epoch": 0.32970600613238743, - "grad_norm": 1.7458017510375092, - "learning_rate": 3.1296111869131513e-06, - "loss": 0.8046, - "num_input_tokens_seen": 58308380, - "step": 2742 - }, - { - "epoch": 0.32982624902302654, - "grad_norm": 2.756484539653937, - "learning_rate": 3.1289682751542153e-06, - "loss": 0.8444, - "num_input_tokens_seen": 58327660, - "step": 2743 - }, - { - "epoch": 0.3299464919136656, - "grad_norm": 3.848639538076117, - "learning_rate": 3.1283251921316883e-06, - "loss": 0.7141, - "num_input_tokens_seen": 58345125, - "step": 2744 - }, - { - "epoch": 0.3300667348043047, - "grad_norm": 2.6946446718393697, - "learning_rate": 3.127681937943128e-06, - "loss": 0.8133, - "num_input_tokens_seen": 58362935, - "step": 2745 - }, - { - "epoch": 0.33018697769494376, - "grad_norm": 2.949483909575551, - "learning_rate": 3.1270385126861134e-06, - "loss": 0.7524, - "num_input_tokens_seen": 58380640, - "step": 2746 - }, - { - "epoch": 0.3303072205855829, - "grad_norm": 2.1452245056967523, - "learning_rate": 3.1263949164582533e-06, - "loss": 0.8206, - "num_input_tokens_seen": 58400010, - "step": 2747 - }, - { - "epoch": 0.330427463476222, - "grad_norm": 2.174426347375206, - "learning_rate": 3.1257511493571797e-06, - "loss": 0.7787, - "num_input_tokens_seen": 58418235, - "step": 2748 - }, - { - "epoch": 0.33054770636686104, - "grad_norm": 2.5200604964587408, - "learning_rate": 3.125107211480552e-06, - "loss": 0.7806, - "num_input_tokens_seen": 58437890, - "step": 2749 - }, - { - "epoch": 0.33066794925750015, - "grad_norm": 1.856137017745213, - "learning_rate": 3.1244631029260536e-06, - "loss": 0.7902, - "num_input_tokens_seen": 58456945, - "step": 2750 - }, - { - "epoch": 0.33078819214813926, - "grad_norm": 0.7748134796518019, - "learning_rate": 3.1238188237913984e-06, - "loss": 0.6264, - "num_input_tokens_seen": 58521205, - "step": 2751 - }, - { - "epoch": 0.3309084350387783, - "grad_norm": 2.554187944704898, - "learning_rate": 3.12317437417432e-06, - "loss": 0.7616, - "num_input_tokens_seen": 58540430, - "step": 2752 - }, - { - "epoch": 0.3310286779294174, - "grad_norm": 2.7429809662048465, - "learning_rate": 3.122529754172582e-06, - "loss": 0.8333, - "num_input_tokens_seen": 58557035, - "step": 2753 - }, - { - "epoch": 0.33114892082005654, - "grad_norm": 2.0071136049488625, - "learning_rate": 3.1218849638839736e-06, - "loss": 0.7247, - "num_input_tokens_seen": 58576015, - "step": 2754 - }, - { - "epoch": 0.3312691637106956, - "grad_norm": 2.0085487201826986, - "learning_rate": 3.121240003406308e-06, - "loss": 0.7751, - "num_input_tokens_seen": 58594585, - "step": 2755 - }, - { - "epoch": 0.3313894066013347, - "grad_norm": 2.7553226068053096, - "learning_rate": 3.120594872837425e-06, - "loss": 0.7155, - "num_input_tokens_seen": 58612975, - "step": 2756 - }, - { - "epoch": 0.3315096494919738, - "grad_norm": 0.8672724726438519, - "learning_rate": 3.1199495722751906e-06, - "loss": 0.6446, - "num_input_tokens_seen": 58672225, - "step": 2757 - }, - { - "epoch": 0.33162989238261287, - "grad_norm": 1.61481718679592, - "learning_rate": 3.1193041018174972e-06, - "loss": 0.8393, - "num_input_tokens_seen": 58692660, - "step": 2758 - }, - { - "epoch": 0.331750135273252, - "grad_norm": 2.149748696279514, - "learning_rate": 3.118658461562261e-06, - "loss": 0.9451, - "num_input_tokens_seen": 58708480, - "step": 2759 - }, - { - "epoch": 0.33187037816389103, - "grad_norm": 2.038008284711372, - "learning_rate": 3.1180126516074254e-06, - "loss": 0.8438, - "num_input_tokens_seen": 58729805, - "step": 2760 - }, - { - "epoch": 0.33199062105453014, - "grad_norm": 3.352699363379197, - "learning_rate": 3.1173666720509603e-06, - "loss": 0.83, - "num_input_tokens_seen": 58746460, - "step": 2761 - }, - { - "epoch": 0.33211086394516925, - "grad_norm": 1.884559777331994, - "learning_rate": 3.1167205229908586e-06, - "loss": 0.6822, - "num_input_tokens_seen": 58767055, - "step": 2762 - }, - { - "epoch": 0.3322311068358083, - "grad_norm": 2.572815908871451, - "learning_rate": 3.116074204525142e-06, - "loss": 0.6225, - "num_input_tokens_seen": 58784950, - "step": 2763 - }, - { - "epoch": 0.3323513497264474, - "grad_norm": 1.6029764448841326, - "learning_rate": 3.1154277167518553e-06, - "loss": 0.826, - "num_input_tokens_seen": 58806285, - "step": 2764 - }, - { - "epoch": 0.33247159261708653, - "grad_norm": 0.8608217438447363, - "learning_rate": 3.114781059769072e-06, - "loss": 0.6219, - "num_input_tokens_seen": 58857330, - "step": 2765 - }, - { - "epoch": 0.3325918355077256, - "grad_norm": 3.2520588667326584, - "learning_rate": 3.1141342336748874e-06, - "loss": 0.6841, - "num_input_tokens_seen": 58876610, - "step": 2766 - }, - { - "epoch": 0.3327120783983647, - "grad_norm": 1.5406510525943908, - "learning_rate": 3.1134872385674253e-06, - "loss": 0.812, - "num_input_tokens_seen": 58900485, - "step": 2767 - }, - { - "epoch": 0.3328323212890038, - "grad_norm": 2.4035155038083045, - "learning_rate": 3.112840074544835e-06, - "loss": 0.8587, - "num_input_tokens_seen": 58919585, - "step": 2768 - }, - { - "epoch": 0.33295256417964286, - "grad_norm": 2.1754588966571236, - "learning_rate": 3.11219274170529e-06, - "loss": 0.6265, - "num_input_tokens_seen": 58941115, - "step": 2769 - }, - { - "epoch": 0.333072807070282, - "grad_norm": 2.167767765999262, - "learning_rate": 3.1115452401469903e-06, - "loss": 0.8087, - "num_input_tokens_seen": 58961235, - "step": 2770 - }, - { - "epoch": 0.3331930499609211, - "grad_norm": 1.931759854733021, - "learning_rate": 3.1108975699681613e-06, - "loss": 0.858, - "num_input_tokens_seen": 58978350, - "step": 2771 - }, - { - "epoch": 0.33331329285156014, - "grad_norm": 2.019200170330616, - "learning_rate": 3.1102497312670542e-06, - "loss": 0.7111, - "num_input_tokens_seen": 58996075, - "step": 2772 - }, - { - "epoch": 0.33343353574219925, - "grad_norm": 4.258261403095521, - "learning_rate": 3.109601724141946e-06, - "loss": 0.8, - "num_input_tokens_seen": 59014790, - "step": 2773 - }, - { - "epoch": 0.33355377863283836, - "grad_norm": 1.6806437356975537, - "learning_rate": 3.108953548691138e-06, - "loss": 0.681, - "num_input_tokens_seen": 59034595, - "step": 2774 - }, - { - "epoch": 0.3336740215234774, - "grad_norm": 2.8488263989856635, - "learning_rate": 3.108305205012959e-06, - "loss": 0.7168, - "num_input_tokens_seen": 59055010, - "step": 2775 - }, - { - "epoch": 0.3337942644141165, - "grad_norm": 2.1918573878030116, - "learning_rate": 3.107656693205761e-06, - "loss": 0.8728, - "num_input_tokens_seen": 59074170, - "step": 2776 - }, - { - "epoch": 0.3339145073047556, - "grad_norm": 2.5064266077010506, - "learning_rate": 3.107008013367924e-06, - "loss": 0.6937, - "num_input_tokens_seen": 59092685, - "step": 2777 - }, - { - "epoch": 0.3340347501953947, - "grad_norm": 3.7241341908238352, - "learning_rate": 3.1063591655978507e-06, - "loss": 0.8622, - "num_input_tokens_seen": 59108355, - "step": 2778 - }, - { - "epoch": 0.3341549930860338, - "grad_norm": 1.8034720128038548, - "learning_rate": 3.105710149993972e-06, - "loss": 0.7858, - "num_input_tokens_seen": 59127405, - "step": 2779 - }, - { - "epoch": 0.33427523597667286, - "grad_norm": 1.7007959847279115, - "learning_rate": 3.1050609666547427e-06, - "loss": 0.8508, - "num_input_tokens_seen": 59146685, - "step": 2780 - }, - { - "epoch": 0.33439547886731197, - "grad_norm": 3.635785976873577, - "learning_rate": 3.104411615678644e-06, - "loss": 0.7639, - "num_input_tokens_seen": 59165255, - "step": 2781 - }, - { - "epoch": 0.3345157217579511, - "grad_norm": 3.132579974044228, - "learning_rate": 3.1037620971641803e-06, - "loss": 0.7332, - "num_input_tokens_seen": 59184765, - "step": 2782 - }, - { - "epoch": 0.33463596464859013, - "grad_norm": 3.3596132386151862, - "learning_rate": 3.1031124112098844e-06, - "loss": 0.6493, - "num_input_tokens_seen": 59202695, - "step": 2783 - }, - { - "epoch": 0.33475620753922924, - "grad_norm": 2.413872215076397, - "learning_rate": 3.1024625579143127e-06, - "loss": 0.7169, - "num_input_tokens_seen": 59219935, - "step": 2784 - }, - { - "epoch": 0.33487645042986836, - "grad_norm": 1.734038045042645, - "learning_rate": 3.101812537376048e-06, - "loss": 0.7197, - "num_input_tokens_seen": 59238675, - "step": 2785 - }, - { - "epoch": 0.3349966933205074, - "grad_norm": 2.0553162598388326, - "learning_rate": 3.101162349693697e-06, - "loss": 0.8379, - "num_input_tokens_seen": 59256690, - "step": 2786 - }, - { - "epoch": 0.3351169362111465, - "grad_norm": 1.729847794193637, - "learning_rate": 3.100511994965893e-06, - "loss": 0.695, - "num_input_tokens_seen": 59276365, - "step": 2787 - }, - { - "epoch": 0.33523717910178563, - "grad_norm": 1.9528519302845946, - "learning_rate": 3.0998614732912947e-06, - "loss": 0.8521, - "num_input_tokens_seen": 59295460, - "step": 2788 - }, - { - "epoch": 0.3353574219924247, - "grad_norm": 1.8970150449988303, - "learning_rate": 3.0992107847685855e-06, - "loss": 0.673, - "num_input_tokens_seen": 59312895, - "step": 2789 - }, - { - "epoch": 0.3354776648830638, - "grad_norm": 1.6691314417713952, - "learning_rate": 3.0985599294964736e-06, - "loss": 0.7879, - "num_input_tokens_seen": 59332170, - "step": 2790 - }, - { - "epoch": 0.33559790777370285, - "grad_norm": 4.688649555518731, - "learning_rate": 3.097908907573695e-06, - "loss": 0.7003, - "num_input_tokens_seen": 59349870, - "step": 2791 - }, - { - "epoch": 0.33571815066434196, - "grad_norm": 2.1372373782661604, - "learning_rate": 3.0972577190990067e-06, - "loss": 0.8875, - "num_input_tokens_seen": 59368070, - "step": 2792 - }, - { - "epoch": 0.3358383935549811, - "grad_norm": 2.3743460348214316, - "learning_rate": 3.096606364171196e-06, - "loss": 0.7991, - "num_input_tokens_seen": 59387580, - "step": 2793 - }, - { - "epoch": 0.33595863644562013, - "grad_norm": 1.9269947028243861, - "learning_rate": 3.0959548428890703e-06, - "loss": 0.8412, - "num_input_tokens_seen": 59406170, - "step": 2794 - }, - { - "epoch": 0.33607887933625924, - "grad_norm": 1.8237253708863697, - "learning_rate": 3.095303155351468e-06, - "loss": 0.8312, - "num_input_tokens_seen": 59426095, - "step": 2795 - }, - { - "epoch": 0.33619912222689835, - "grad_norm": 3.2811700261462478, - "learning_rate": 3.0946513016572464e-06, - "loss": 0.7871, - "num_input_tokens_seen": 59444720, - "step": 2796 - }, - { - "epoch": 0.3363193651175374, - "grad_norm": 3.8493121665902885, - "learning_rate": 3.0939992819052938e-06, - "loss": 0.7625, - "num_input_tokens_seen": 59461950, - "step": 2797 - }, - { - "epoch": 0.3364396080081765, - "grad_norm": 2.335749678065399, - "learning_rate": 3.0933470961945193e-06, - "loss": 0.811, - "num_input_tokens_seen": 59479965, - "step": 2798 - }, - { - "epoch": 0.3365598508988156, - "grad_norm": 2.30020559902466, - "learning_rate": 3.09269474462386e-06, - "loss": 0.6896, - "num_input_tokens_seen": 59499255, - "step": 2799 - }, - { - "epoch": 0.3366800937894547, - "grad_norm": 2.438991638638325, - "learning_rate": 3.092042227292276e-06, - "loss": 0.8197, - "num_input_tokens_seen": 59515810, - "step": 2800 - }, - { - "epoch": 0.3368003366800938, - "grad_norm": 2.302191166023087, - "learning_rate": 3.0913895442987557e-06, - "loss": 0.8764, - "num_input_tokens_seen": 59536495, - "step": 2801 - }, - { - "epoch": 0.3369205795707329, - "grad_norm": 1.6902246842435145, - "learning_rate": 3.090736695742308e-06, - "loss": 0.8435, - "num_input_tokens_seen": 59557345, - "step": 2802 - }, - { - "epoch": 0.33704082246137196, - "grad_norm": 2.709622204578694, - "learning_rate": 3.0900836817219713e-06, - "loss": 0.5072, - "num_input_tokens_seen": 59573495, - "step": 2803 - }, - { - "epoch": 0.33716106535201107, - "grad_norm": 1.949156738012, - "learning_rate": 3.089430502336807e-06, - "loss": 0.8351, - "num_input_tokens_seen": 59593185, - "step": 2804 - }, - { - "epoch": 0.3372813082426502, - "grad_norm": 2.8325630355072677, - "learning_rate": 3.088777157685902e-06, - "loss": 0.8919, - "num_input_tokens_seen": 59608495, - "step": 2805 - }, - { - "epoch": 0.33740155113328923, - "grad_norm": 2.026935667165047, - "learning_rate": 3.088123647868367e-06, - "loss": 0.8535, - "num_input_tokens_seen": 59624765, - "step": 2806 - }, - { - "epoch": 0.33752179402392835, - "grad_norm": 2.3353906795228587, - "learning_rate": 3.0874699729833405e-06, - "loss": 0.81, - "num_input_tokens_seen": 59645855, - "step": 2807 - }, - { - "epoch": 0.3376420369145674, - "grad_norm": 1.6741238948668111, - "learning_rate": 3.086816133129983e-06, - "loss": 0.7956, - "num_input_tokens_seen": 59665835, - "step": 2808 - }, - { - "epoch": 0.3377622798052065, - "grad_norm": 1.9207040290975084, - "learning_rate": 3.0861621284074826e-06, - "loss": 0.7652, - "num_input_tokens_seen": 59686080, - "step": 2809 - }, - { - "epoch": 0.3378825226958456, - "grad_norm": 1.5637863864427821, - "learning_rate": 3.085507958915051e-06, - "loss": 0.7299, - "num_input_tokens_seen": 59704230, - "step": 2810 - }, - { - "epoch": 0.3380027655864847, - "grad_norm": 1.798883797434924, - "learning_rate": 3.0848536247519253e-06, - "loss": 0.707, - "num_input_tokens_seen": 59725535, - "step": 2811 - }, - { - "epoch": 0.3381230084771238, - "grad_norm": 3.018216391490923, - "learning_rate": 3.0841991260173663e-06, - "loss": 0.8514, - "num_input_tokens_seen": 59745160, - "step": 2812 - }, - { - "epoch": 0.3382432513677629, - "grad_norm": 1.7386044483791776, - "learning_rate": 3.0835444628106634e-06, - "loss": 0.7916, - "num_input_tokens_seen": 59763860, - "step": 2813 - }, - { - "epoch": 0.33836349425840195, - "grad_norm": 1.9071974860174172, - "learning_rate": 3.082889635231126e-06, - "loss": 0.8262, - "num_input_tokens_seen": 59782240, - "step": 2814 - }, - { - "epoch": 0.33848373714904106, - "grad_norm": 4.490045600224103, - "learning_rate": 3.0822346433780925e-06, - "loss": 0.7647, - "num_input_tokens_seen": 59802685, - "step": 2815 - }, - { - "epoch": 0.3386039800396802, - "grad_norm": 2.495872181672995, - "learning_rate": 3.0815794873509237e-06, - "loss": 0.8673, - "num_input_tokens_seen": 59820690, - "step": 2816 - }, - { - "epoch": 0.33872422293031923, - "grad_norm": 1.970151639515993, - "learning_rate": 3.0809241672490066e-06, - "loss": 0.7248, - "num_input_tokens_seen": 59838580, - "step": 2817 - }, - { - "epoch": 0.33884446582095834, - "grad_norm": 1.5991942417581513, - "learning_rate": 3.080268683171753e-06, - "loss": 0.8408, - "num_input_tokens_seen": 59858590, - "step": 2818 - }, - { - "epoch": 0.33896470871159745, - "grad_norm": 2.7415043629581843, - "learning_rate": 3.0796130352185985e-06, - "loss": 0.8872, - "num_input_tokens_seen": 59875165, - "step": 2819 - }, - { - "epoch": 0.3390849516022365, - "grad_norm": 1.8129485181822191, - "learning_rate": 3.0789572234890057e-06, - "loss": 0.6669, - "num_input_tokens_seen": 59896525, - "step": 2820 - }, - { - "epoch": 0.3392051944928756, - "grad_norm": 2.4069161910217174, - "learning_rate": 3.0783012480824596e-06, - "loss": 0.7745, - "num_input_tokens_seen": 59915390, - "step": 2821 - }, - { - "epoch": 0.33932543738351467, - "grad_norm": 5.3240901826034195, - "learning_rate": 3.077645109098471e-06, - "loss": 0.7397, - "num_input_tokens_seen": 59931380, - "step": 2822 - }, - { - "epoch": 0.3394456802741538, - "grad_norm": 1.722209468626472, - "learning_rate": 3.076988806636577e-06, - "loss": 0.709, - "num_input_tokens_seen": 59948860, - "step": 2823 - }, - { - "epoch": 0.3395659231647929, - "grad_norm": 2.113534641354055, - "learning_rate": 3.076332340796337e-06, - "loss": 0.878, - "num_input_tokens_seen": 59968190, - "step": 2824 - }, - { - "epoch": 0.33968616605543195, - "grad_norm": 1.7360650334619434, - "learning_rate": 3.075675711677337e-06, - "loss": 0.7998, - "num_input_tokens_seen": 59988005, - "step": 2825 - }, - { - "epoch": 0.33980640894607106, - "grad_norm": 2.161625221142269, - "learning_rate": 3.0750189193791865e-06, - "loss": 0.7698, - "num_input_tokens_seen": 60007310, - "step": 2826 - }, - { - "epoch": 0.33992665183671017, - "grad_norm": 3.5400258198806966, - "learning_rate": 3.0743619640015207e-06, - "loss": 0.7054, - "num_input_tokens_seen": 60027280, - "step": 2827 - }, - { - "epoch": 0.3400468947273492, - "grad_norm": 1.8901926877729285, - "learning_rate": 3.073704845643999e-06, - "loss": 0.9171, - "num_input_tokens_seen": 60044125, - "step": 2828 - }, - { - "epoch": 0.34016713761798834, - "grad_norm": 3.352970335615011, - "learning_rate": 3.0730475644063063e-06, - "loss": 0.7793, - "num_input_tokens_seen": 60058945, - "step": 2829 - }, - { - "epoch": 0.34028738050862745, - "grad_norm": 1.6493481265206238, - "learning_rate": 3.072390120388151e-06, - "loss": 0.6534, - "num_input_tokens_seen": 60076990, - "step": 2830 - }, - { - "epoch": 0.3404076233992665, - "grad_norm": 2.474475380315649, - "learning_rate": 3.071732513689267e-06, - "loss": 0.709, - "num_input_tokens_seen": 60095245, - "step": 2831 - }, - { - "epoch": 0.3405278662899056, - "grad_norm": 6.846865583761615, - "learning_rate": 3.0710747444094125e-06, - "loss": 0.6727, - "num_input_tokens_seen": 60112995, - "step": 2832 - }, - { - "epoch": 0.3406481091805447, - "grad_norm": 2.1503353467810804, - "learning_rate": 3.070416812648372e-06, - "loss": 0.6458, - "num_input_tokens_seen": 60136165, - "step": 2833 - }, - { - "epoch": 0.3407683520711838, - "grad_norm": 2.031735930049402, - "learning_rate": 3.069758718505951e-06, - "loss": 0.6598, - "num_input_tokens_seen": 60157625, - "step": 2834 - }, - { - "epoch": 0.3408885949618229, - "grad_norm": 1.7753580427927367, - "learning_rate": 3.0691004620819836e-06, - "loss": 0.804, - "num_input_tokens_seen": 60177475, - "step": 2835 - }, - { - "epoch": 0.341008837852462, - "grad_norm": 0.8385604745430449, - "learning_rate": 3.0684420434763254e-06, - "loss": 0.6361, - "num_input_tokens_seen": 60243380, - "step": 2836 - }, - { - "epoch": 0.34112908074310105, - "grad_norm": 1.8362738978232802, - "learning_rate": 3.06778346278886e-06, - "loss": 0.7655, - "num_input_tokens_seen": 60261935, - "step": 2837 - }, - { - "epoch": 0.34124932363374016, - "grad_norm": 2.2381543922175866, - "learning_rate": 3.0671247201194906e-06, - "loss": 0.7804, - "num_input_tokens_seen": 60283790, - "step": 2838 - }, - { - "epoch": 0.3413695665243792, - "grad_norm": 1.9379672220776607, - "learning_rate": 3.066465815568151e-06, - "loss": 0.7468, - "num_input_tokens_seen": 60304340, - "step": 2839 - }, - { - "epoch": 0.34148980941501833, - "grad_norm": 1.7784290958254294, - "learning_rate": 3.0658067492347947e-06, - "loss": 0.6779, - "num_input_tokens_seen": 60326700, - "step": 2840 - }, - { - "epoch": 0.34161005230565744, - "grad_norm": 2.1823654584586563, - "learning_rate": 3.0651475212194023e-06, - "loss": 0.6668, - "num_input_tokens_seen": 60345675, - "step": 2841 - }, - { - "epoch": 0.3417302951962965, - "grad_norm": 1.5510014926499225, - "learning_rate": 3.064488131621977e-06, - "loss": 0.7465, - "num_input_tokens_seen": 60368720, - "step": 2842 - }, - { - "epoch": 0.3418505380869356, - "grad_norm": 1.779202623474596, - "learning_rate": 3.063828580542549e-06, - "loss": 0.7341, - "num_input_tokens_seen": 60389635, - "step": 2843 - }, - { - "epoch": 0.3419707809775747, - "grad_norm": 1.7754439046263575, - "learning_rate": 3.0631688680811706e-06, - "loss": 0.7209, - "num_input_tokens_seen": 60408980, - "step": 2844 - }, - { - "epoch": 0.3420910238682138, - "grad_norm": 1.9932715738635667, - "learning_rate": 3.062508994337921e-06, - "loss": 0.7512, - "num_input_tokens_seen": 60428305, - "step": 2845 - }, - { - "epoch": 0.3422112667588529, - "grad_norm": 2.874751239830218, - "learning_rate": 3.0618489594129013e-06, - "loss": 0.7822, - "num_input_tokens_seen": 60446165, - "step": 2846 - }, - { - "epoch": 0.342331509649492, - "grad_norm": 2.0439749162215923, - "learning_rate": 3.061188763406239e-06, - "loss": 0.705, - "num_input_tokens_seen": 60462030, - "step": 2847 - }, - { - "epoch": 0.34245175254013105, - "grad_norm": 2.5900072155005884, - "learning_rate": 3.060528406418085e-06, - "loss": 0.8229, - "num_input_tokens_seen": 60481600, - "step": 2848 - }, - { - "epoch": 0.34257199543077016, - "grad_norm": 1.7684958294198165, - "learning_rate": 3.0598678885486145e-06, - "loss": 0.6206, - "num_input_tokens_seen": 60503860, - "step": 2849 - }, - { - "epoch": 0.34269223832140927, - "grad_norm": 1.9303883221766656, - "learning_rate": 3.0592072098980282e-06, - "loss": 0.7377, - "num_input_tokens_seen": 60523240, - "step": 2850 - }, - { - "epoch": 0.3428124812120483, - "grad_norm": 2.129521971104975, - "learning_rate": 3.0585463705665514e-06, - "loss": 0.7275, - "num_input_tokens_seen": 60543335, - "step": 2851 - }, - { - "epoch": 0.34293272410268744, - "grad_norm": 17.322176662358288, - "learning_rate": 3.0578853706544304e-06, - "loss": 0.7083, - "num_input_tokens_seen": 60560445, - "step": 2852 - }, - { - "epoch": 0.34305296699332655, - "grad_norm": 2.0827704907175724, - "learning_rate": 3.0572242102619404e-06, - "loss": 0.6452, - "num_input_tokens_seen": 60577320, - "step": 2853 - }, - { - "epoch": 0.3431732098839656, - "grad_norm": 1.8626898159940488, - "learning_rate": 3.0565628894893776e-06, - "loss": 0.7999, - "num_input_tokens_seen": 60597675, - "step": 2854 - }, - { - "epoch": 0.3432934527746047, - "grad_norm": 2.1442553149012427, - "learning_rate": 3.055901408437066e-06, - "loss": 0.7426, - "num_input_tokens_seen": 60615920, - "step": 2855 - }, - { - "epoch": 0.34341369566524377, - "grad_norm": 1.7409326367148166, - "learning_rate": 3.055239767205349e-06, - "loss": 0.7797, - "num_input_tokens_seen": 60637390, - "step": 2856 - }, - { - "epoch": 0.3435339385558829, - "grad_norm": 1.9046985936687577, - "learning_rate": 3.054577965894599e-06, - "loss": 0.7671, - "num_input_tokens_seen": 60653255, - "step": 2857 - }, - { - "epoch": 0.343654181446522, - "grad_norm": 1.5901725571863656, - "learning_rate": 3.0539160046052094e-06, - "loss": 0.6985, - "num_input_tokens_seen": 60672675, - "step": 2858 - }, - { - "epoch": 0.34377442433716104, - "grad_norm": 2.408357923079108, - "learning_rate": 3.0532538834376003e-06, - "loss": 0.6956, - "num_input_tokens_seen": 60691955, - "step": 2859 - }, - { - "epoch": 0.34389466722780015, - "grad_norm": 2.143233525566655, - "learning_rate": 3.0525916024922143e-06, - "loss": 0.7784, - "num_input_tokens_seen": 60710860, - "step": 2860 - }, - { - "epoch": 0.34401491011843927, - "grad_norm": 2.621784356142421, - "learning_rate": 3.0519291618695193e-06, - "loss": 0.8386, - "num_input_tokens_seen": 60727980, - "step": 2861 - }, - { - "epoch": 0.3441351530090783, - "grad_norm": 1.822456067354058, - "learning_rate": 3.051266561670007e-06, - "loss": 0.757, - "num_input_tokens_seen": 60746765, - "step": 2862 - }, - { - "epoch": 0.34425539589971743, - "grad_norm": 1.8990869297627924, - "learning_rate": 3.0506038019941933e-06, - "loss": 0.8942, - "num_input_tokens_seen": 60766495, - "step": 2863 - }, - { - "epoch": 0.34437563879035654, - "grad_norm": 2.2645115204256214, - "learning_rate": 3.049940882942617e-06, - "loss": 0.676, - "num_input_tokens_seen": 60785000, - "step": 2864 - }, - { - "epoch": 0.3444958816809956, - "grad_norm": 1.9680746893813048, - "learning_rate": 3.0492778046158448e-06, - "loss": 0.7942, - "num_input_tokens_seen": 60806140, - "step": 2865 - }, - { - "epoch": 0.3446161245716347, - "grad_norm": 3.3283965873512136, - "learning_rate": 3.0486145671144633e-06, - "loss": 0.7623, - "num_input_tokens_seen": 60825650, - "step": 2866 - }, - { - "epoch": 0.3447363674622738, - "grad_norm": 1.9987305200009258, - "learning_rate": 3.047951170539086e-06, - "loss": 0.7607, - "num_input_tokens_seen": 60844995, - "step": 2867 - }, - { - "epoch": 0.3448566103529129, - "grad_norm": 1.844060554489792, - "learning_rate": 3.047287614990349e-06, - "loss": 0.8364, - "num_input_tokens_seen": 60862635, - "step": 2868 - }, - { - "epoch": 0.344976853243552, - "grad_norm": 2.2583330194911873, - "learning_rate": 3.046623900568914e-06, - "loss": 0.6115, - "num_input_tokens_seen": 60884920, - "step": 2869 - }, - { - "epoch": 0.34509709613419104, - "grad_norm": 2.5237396072914366, - "learning_rate": 3.045960027375465e-06, - "loss": 0.6877, - "num_input_tokens_seen": 60902475, - "step": 2870 - }, - { - "epoch": 0.34521733902483015, - "grad_norm": 12.745458521410642, - "learning_rate": 3.045295995510712e-06, - "loss": 0.815, - "num_input_tokens_seen": 60919165, - "step": 2871 - }, - { - "epoch": 0.34533758191546926, - "grad_norm": 1.7615115863204773, - "learning_rate": 3.0446318050753865e-06, - "loss": 0.7282, - "num_input_tokens_seen": 60939365, - "step": 2872 - }, - { - "epoch": 0.3454578248061083, - "grad_norm": 2.0581348142581812, - "learning_rate": 3.0439674561702474e-06, - "loss": 0.7802, - "num_input_tokens_seen": 60958585, - "step": 2873 - }, - { - "epoch": 0.3455780676967474, - "grad_norm": 2.144542562852442, - "learning_rate": 3.0433029488960756e-06, - "loss": 0.8803, - "num_input_tokens_seen": 60976910, - "step": 2874 - }, - { - "epoch": 0.34569831058738654, - "grad_norm": 2.140651205472323, - "learning_rate": 3.0426382833536756e-06, - "loss": 0.6014, - "num_input_tokens_seen": 60999985, - "step": 2875 - }, - { - "epoch": 0.3458185534780256, - "grad_norm": 2.6059404639916135, - "learning_rate": 3.041973459643877e-06, - "loss": 0.7745, - "num_input_tokens_seen": 61019160, - "step": 2876 - }, - { - "epoch": 0.3459387963686647, - "grad_norm": 2.3503444344744016, - "learning_rate": 3.041308477867534e-06, - "loss": 0.6683, - "num_input_tokens_seen": 61040130, - "step": 2877 - }, - { - "epoch": 0.3460590392593038, - "grad_norm": 2.2131216137972465, - "learning_rate": 3.0406433381255214e-06, - "loss": 0.8356, - "num_input_tokens_seen": 61057885, - "step": 2878 - }, - { - "epoch": 0.34617928214994287, - "grad_norm": 2.47492379675046, - "learning_rate": 3.0399780405187425e-06, - "loss": 0.8165, - "num_input_tokens_seen": 61076600, - "step": 2879 - }, - { - "epoch": 0.346299525040582, - "grad_norm": 1.989623315912117, - "learning_rate": 3.0393125851481216e-06, - "loss": 0.7786, - "num_input_tokens_seen": 61096195, - "step": 2880 - }, - { - "epoch": 0.3464197679312211, - "grad_norm": 3.8850219727060677, - "learning_rate": 3.038646972114608e-06, - "loss": 0.8624, - "num_input_tokens_seen": 61112240, - "step": 2881 - }, - { - "epoch": 0.34654001082186014, - "grad_norm": 1.8425940482598202, - "learning_rate": 3.037981201519174e-06, - "loss": 0.6665, - "num_input_tokens_seen": 61132560, - "step": 2882 - }, - { - "epoch": 0.34666025371249926, - "grad_norm": 2.0319774137362763, - "learning_rate": 3.0373152734628175e-06, - "loss": 0.7051, - "num_input_tokens_seen": 61150560, - "step": 2883 - }, - { - "epoch": 0.34678049660313837, - "grad_norm": 1.866237152941598, - "learning_rate": 3.0366491880465593e-06, - "loss": 0.7542, - "num_input_tokens_seen": 61168300, - "step": 2884 - }, - { - "epoch": 0.3469007394937774, - "grad_norm": 1.8519316468060965, - "learning_rate": 3.035982945371443e-06, - "loss": 0.8136, - "num_input_tokens_seen": 61189715, - "step": 2885 - }, - { - "epoch": 0.34702098238441653, - "grad_norm": 8.631024270860742, - "learning_rate": 3.035316545538537e-06, - "loss": 0.8454, - "num_input_tokens_seen": 61208230, - "step": 2886 - }, - { - "epoch": 0.3471412252750556, - "grad_norm": 2.5055331254599857, - "learning_rate": 3.034649988648935e-06, - "loss": 0.7953, - "num_input_tokens_seen": 61227715, - "step": 2887 - }, - { - "epoch": 0.3472614681656947, - "grad_norm": 1.8982922427974478, - "learning_rate": 3.033983274803752e-06, - "loss": 0.8086, - "num_input_tokens_seen": 61247225, - "step": 2888 - }, - { - "epoch": 0.3473817110563338, - "grad_norm": 2.183143383289986, - "learning_rate": 3.0333164041041283e-06, - "loss": 0.7082, - "num_input_tokens_seen": 61263260, - "step": 2889 - }, - { - "epoch": 0.34750195394697286, - "grad_norm": 2.341490343468796, - "learning_rate": 3.0326493766512277e-06, - "loss": 0.7246, - "num_input_tokens_seen": 61282400, - "step": 2890 - }, - { - "epoch": 0.347622196837612, - "grad_norm": 2.8541983930137103, - "learning_rate": 3.0319821925462377e-06, - "loss": 0.7553, - "num_input_tokens_seen": 61305215, - "step": 2891 - }, - { - "epoch": 0.3477424397282511, - "grad_norm": 3.1364684002491825, - "learning_rate": 3.0313148518903696e-06, - "loss": 0.9432, - "num_input_tokens_seen": 61324760, - "step": 2892 - }, - { - "epoch": 0.34786268261889014, - "grad_norm": 2.65460936854414, - "learning_rate": 3.0306473547848593e-06, - "loss": 0.8011, - "num_input_tokens_seen": 61341520, - "step": 2893 - }, - { - "epoch": 0.34798292550952925, - "grad_norm": 1.9559459850183425, - "learning_rate": 3.029979701330964e-06, - "loss": 0.7681, - "num_input_tokens_seen": 61360665, - "step": 2894 - }, - { - "epoch": 0.34810316840016836, - "grad_norm": 2.7312612243689376, - "learning_rate": 3.0293118916299668e-06, - "loss": 0.8029, - "num_input_tokens_seen": 61378840, - "step": 2895 - }, - { - "epoch": 0.3482234112908074, - "grad_norm": 1.8766845125557394, - "learning_rate": 3.0286439257831735e-06, - "loss": 0.7357, - "num_input_tokens_seen": 61398030, - "step": 2896 - }, - { - "epoch": 0.3483436541814465, - "grad_norm": 2.121773912955864, - "learning_rate": 3.0279758038919156e-06, - "loss": 0.7043, - "num_input_tokens_seen": 61415975, - "step": 2897 - }, - { - "epoch": 0.34846389707208564, - "grad_norm": 2.3054469818517993, - "learning_rate": 3.0273075260575455e-06, - "loss": 0.7788, - "num_input_tokens_seen": 61434595, - "step": 2898 - }, - { - "epoch": 0.3485841399627247, - "grad_norm": 1.8807126879633407, - "learning_rate": 3.0266390923814404e-06, - "loss": 0.7919, - "num_input_tokens_seen": 61452375, - "step": 2899 - }, - { - "epoch": 0.3487043828533638, - "grad_norm": 1.7987034315937522, - "learning_rate": 3.0259705029650008e-06, - "loss": 0.8192, - "num_input_tokens_seen": 61470025, - "step": 2900 - }, - { - "epoch": 0.34882462574400286, - "grad_norm": 1.7613147697472733, - "learning_rate": 3.025301757909652e-06, - "loss": 0.7245, - "num_input_tokens_seen": 61489940, - "step": 2901 - }, - { - "epoch": 0.34894486863464197, - "grad_norm": 2.25089082353769, - "learning_rate": 3.0246328573168414e-06, - "loss": 0.802, - "num_input_tokens_seen": 61510975, - "step": 2902 - }, - { - "epoch": 0.3490651115252811, - "grad_norm": 2.0417213987288814, - "learning_rate": 3.0239638012880412e-06, - "loss": 0.781, - "num_input_tokens_seen": 61530590, - "step": 2903 - }, - { - "epoch": 0.34918535441592014, - "grad_norm": 2.7389571660604446, - "learning_rate": 3.023294589924746e-06, - "loss": 0.8024, - "num_input_tokens_seen": 61547245, - "step": 2904 - }, - { - "epoch": 0.34930559730655925, - "grad_norm": 2.2009726955070246, - "learning_rate": 3.022625223328476e-06, - "loss": 0.7805, - "num_input_tokens_seen": 61568705, - "step": 2905 - }, - { - "epoch": 0.34942584019719836, - "grad_norm": 1.4464638904550626, - "learning_rate": 3.0219557016007727e-06, - "loss": 0.6865, - "num_input_tokens_seen": 61588555, - "step": 2906 - }, - { - "epoch": 0.3495460830878374, - "grad_norm": 2.0068703189783434, - "learning_rate": 3.021286024843202e-06, - "loss": 0.6964, - "num_input_tokens_seen": 61606470, - "step": 2907 - }, - { - "epoch": 0.3496663259784765, - "grad_norm": 1.1287761145393838, - "learning_rate": 3.0206161931573526e-06, - "loss": 0.6994, - "num_input_tokens_seen": 61658740, - "step": 2908 - }, - { - "epoch": 0.34978656886911563, - "grad_norm": 1.7189752630444677, - "learning_rate": 3.0199462066448388e-06, - "loss": 0.9246, - "num_input_tokens_seen": 61680655, - "step": 2909 - }, - { - "epoch": 0.3499068117597547, - "grad_norm": 1.7926214310382942, - "learning_rate": 3.019276065407296e-06, - "loss": 0.6894, - "num_input_tokens_seen": 61699495, - "step": 2910 - }, - { - "epoch": 0.3500270546503938, - "grad_norm": 2.2809386669017786, - "learning_rate": 3.018605769546385e-06, - "loss": 0.8036, - "num_input_tokens_seen": 61719770, - "step": 2911 - }, - { - "epoch": 0.3501472975410329, - "grad_norm": 5.424755331499995, - "learning_rate": 3.0179353191637876e-06, - "loss": 0.7982, - "num_input_tokens_seen": 61738450, - "step": 2912 - }, - { - "epoch": 0.35026754043167196, - "grad_norm": 1.777364050476785, - "learning_rate": 3.0172647143612125e-06, - "loss": 0.7082, - "num_input_tokens_seen": 61757820, - "step": 2913 - }, - { - "epoch": 0.3503877833223111, - "grad_norm": 1.9990078594526504, - "learning_rate": 3.016593955240389e-06, - "loss": 0.8118, - "num_input_tokens_seen": 61776230, - "step": 2914 - }, - { - "epoch": 0.3505080262129502, - "grad_norm": 0.821546981645845, - "learning_rate": 3.015923041903071e-06, - "loss": 0.6555, - "num_input_tokens_seen": 61842075, - "step": 2915 - }, - { - "epoch": 0.35062826910358924, - "grad_norm": 2.109389742120155, - "learning_rate": 3.0152519744510347e-06, - "loss": 0.8299, - "num_input_tokens_seen": 61861595, - "step": 2916 - }, - { - "epoch": 0.35074851199422835, - "grad_norm": 1.82926646141423, - "learning_rate": 3.014580752986082e-06, - "loss": 0.8287, - "num_input_tokens_seen": 61880190, - "step": 2917 - }, - { - "epoch": 0.3508687548848674, - "grad_norm": 2.1995226057289767, - "learning_rate": 3.0139093776100345e-06, - "loss": 0.7872, - "num_input_tokens_seen": 61896500, - "step": 2918 - }, - { - "epoch": 0.3509889977755065, - "grad_norm": 1.7525718147909617, - "learning_rate": 3.013237848424741e-06, - "loss": 0.7463, - "num_input_tokens_seen": 61915605, - "step": 2919 - }, - { - "epoch": 0.35110924066614563, - "grad_norm": 2.198443869401436, - "learning_rate": 3.012566165532072e-06, - "loss": 0.7486, - "num_input_tokens_seen": 61934115, - "step": 2920 - }, - { - "epoch": 0.3512294835567847, - "grad_norm": 2.455083748335562, - "learning_rate": 3.0118943290339207e-06, - "loss": 0.7628, - "num_input_tokens_seen": 61954045, - "step": 2921 - }, - { - "epoch": 0.3513497264474238, - "grad_norm": 1.744022161688745, - "learning_rate": 3.011222339032204e-06, - "loss": 0.6749, - "num_input_tokens_seen": 61971915, - "step": 2922 - }, - { - "epoch": 0.3514699693380629, - "grad_norm": 2.386340831268044, - "learning_rate": 3.0105501956288626e-06, - "loss": 0.6872, - "num_input_tokens_seen": 61992105, - "step": 2923 - }, - { - "epoch": 0.35159021222870196, - "grad_norm": 3.1679533381969858, - "learning_rate": 3.0098778989258594e-06, - "loss": 0.7361, - "num_input_tokens_seen": 62010435, - "step": 2924 - }, - { - "epoch": 0.35171045511934107, - "grad_norm": 2.300553385752973, - "learning_rate": 3.009205449025183e-06, - "loss": 0.8762, - "num_input_tokens_seen": 62026350, - "step": 2925 - }, - { - "epoch": 0.3518306980099802, - "grad_norm": 2.0321620335432033, - "learning_rate": 3.0085328460288415e-06, - "loss": 0.6233, - "num_input_tokens_seen": 62042830, - "step": 2926 - }, - { - "epoch": 0.35195094090061924, - "grad_norm": 2.3221737163131704, - "learning_rate": 3.0078600900388694e-06, - "loss": 0.706, - "num_input_tokens_seen": 62062855, - "step": 2927 - }, - { - "epoch": 0.35207118379125835, - "grad_norm": 1.8354561557972358, - "learning_rate": 3.007187181157323e-06, - "loss": 0.7342, - "num_input_tokens_seen": 62082585, - "step": 2928 - }, - { - "epoch": 0.35219142668189746, - "grad_norm": 2.705029204385408, - "learning_rate": 3.006514119486282e-06, - "loss": 0.6695, - "num_input_tokens_seen": 62099135, - "step": 2929 - }, - { - "epoch": 0.3523116695725365, - "grad_norm": 2.421581983842595, - "learning_rate": 3.005840905127849e-06, - "loss": 0.6886, - "num_input_tokens_seen": 62115760, - "step": 2930 - }, - { - "epoch": 0.3524319124631756, - "grad_norm": 2.4685443049028115, - "learning_rate": 3.0051675381841516e-06, - "loss": 0.866, - "num_input_tokens_seen": 62132790, - "step": 2931 - }, - { - "epoch": 0.3525521553538147, - "grad_norm": 1.5695941660375252, - "learning_rate": 3.0044940187573363e-06, - "loss": 0.7638, - "num_input_tokens_seen": 62153520, - "step": 2932 - }, - { - "epoch": 0.3526723982444538, - "grad_norm": 2.016889098332623, - "learning_rate": 3.003820346949578e-06, - "loss": 0.6576, - "num_input_tokens_seen": 62171320, - "step": 2933 - }, - { - "epoch": 0.3527926411350929, - "grad_norm": 2.681639029178212, - "learning_rate": 3.0031465228630708e-06, - "loss": 0.7949, - "num_input_tokens_seen": 62191925, - "step": 2934 - }, - { - "epoch": 0.35291288402573195, - "grad_norm": 2.6267459798754023, - "learning_rate": 3.0024725466000337e-06, - "loss": 0.8599, - "num_input_tokens_seen": 62211600, - "step": 2935 - }, - { - "epoch": 0.35303312691637107, - "grad_norm": 2.145509044175393, - "learning_rate": 3.0017984182627087e-06, - "loss": 0.7887, - "num_input_tokens_seen": 62230645, - "step": 2936 - }, - { - "epoch": 0.3531533698070102, - "grad_norm": 1.9341048340175657, - "learning_rate": 3.00112413795336e-06, - "loss": 0.8163, - "num_input_tokens_seen": 62250200, - "step": 2937 - }, - { - "epoch": 0.35327361269764923, - "grad_norm": 2.2827976822780376, - "learning_rate": 3.000449705774275e-06, - "loss": 0.801, - "num_input_tokens_seen": 62268160, - "step": 2938 - }, - { - "epoch": 0.35339385558828834, - "grad_norm": 2.105138400041192, - "learning_rate": 2.9997751218277663e-06, - "loss": 0.7094, - "num_input_tokens_seen": 62286035, - "step": 2939 - }, - { - "epoch": 0.35351409847892745, - "grad_norm": 2.34830310939989, - "learning_rate": 2.9991003862161655e-06, - "loss": 0.7661, - "num_input_tokens_seen": 62304695, - "step": 2940 - }, - { - "epoch": 0.3536343413695665, - "grad_norm": 1.8490820719029954, - "learning_rate": 2.998425499041831e-06, - "loss": 0.7409, - "num_input_tokens_seen": 62324930, - "step": 2941 - }, - { - "epoch": 0.3537545842602056, - "grad_norm": 0.9134622313556636, - "learning_rate": 2.997750460407142e-06, - "loss": 0.6281, - "num_input_tokens_seen": 62386005, - "step": 2942 - }, - { - "epoch": 0.35387482715084473, - "grad_norm": 2.844586058302895, - "learning_rate": 2.9970752704145014e-06, - "loss": 0.7043, - "num_input_tokens_seen": 62402940, - "step": 2943 - }, - { - "epoch": 0.3539950700414838, - "grad_norm": 0.739508397987474, - "learning_rate": 2.9963999291663347e-06, - "loss": 0.5973, - "num_input_tokens_seen": 62468440, - "step": 2944 - }, - { - "epoch": 0.3541153129321229, - "grad_norm": 2.776464374577834, - "learning_rate": 2.9957244367650915e-06, - "loss": 0.7467, - "num_input_tokens_seen": 62484405, - "step": 2945 - }, - { - "epoch": 0.354235555822762, - "grad_norm": 3.3124182071216297, - "learning_rate": 2.995048793313242e-06, - "loss": 0.8318, - "num_input_tokens_seen": 62501540, - "step": 2946 - }, - { - "epoch": 0.35435579871340106, - "grad_norm": 2.2550764404008317, - "learning_rate": 2.994372998913283e-06, - "loss": 0.7139, - "num_input_tokens_seen": 62519765, - "step": 2947 - }, - { - "epoch": 0.35447604160404017, - "grad_norm": 2.519482606541746, - "learning_rate": 2.9936970536677297e-06, - "loss": 0.6328, - "num_input_tokens_seen": 62539730, - "step": 2948 - }, - { - "epoch": 0.3545962844946792, - "grad_norm": 2.371282385388547, - "learning_rate": 2.9930209576791244e-06, - "loss": 0.8133, - "num_input_tokens_seen": 62557925, - "step": 2949 - }, - { - "epoch": 0.35471652738531834, - "grad_norm": 2.3488908393388663, - "learning_rate": 2.9923447110500285e-06, - "loss": 0.6346, - "num_input_tokens_seen": 62576390, - "step": 2950 - }, - { - "epoch": 0.35483677027595745, - "grad_norm": 1.6458689680897258, - "learning_rate": 2.9916683138830295e-06, - "loss": 0.7464, - "num_input_tokens_seen": 62596775, - "step": 2951 - }, - { - "epoch": 0.3549570131665965, - "grad_norm": 2.8413688045660117, - "learning_rate": 2.9909917662807353e-06, - "loss": 0.8089, - "num_input_tokens_seen": 62614295, - "step": 2952 - }, - { - "epoch": 0.3550772560572356, - "grad_norm": 2.2302194121121053, - "learning_rate": 2.9903150683457783e-06, - "loss": 0.6954, - "num_input_tokens_seen": 62632560, - "step": 2953 - }, - { - "epoch": 0.3551974989478747, - "grad_norm": 1.7761226409755375, - "learning_rate": 2.9896382201808126e-06, - "loss": 0.6512, - "num_input_tokens_seen": 62649680, - "step": 2954 - }, - { - "epoch": 0.3553177418385138, - "grad_norm": 2.1304844349914287, - "learning_rate": 2.988961221888516e-06, - "loss": 0.8026, - "num_input_tokens_seen": 62666075, - "step": 2955 - }, - { - "epoch": 0.3554379847291529, - "grad_norm": 2.232578825519733, - "learning_rate": 2.9882840735715884e-06, - "loss": 0.785, - "num_input_tokens_seen": 62681880, - "step": 2956 - }, - { - "epoch": 0.355558227619792, - "grad_norm": 2.479938573774159, - "learning_rate": 2.9876067753327523e-06, - "loss": 0.7244, - "num_input_tokens_seen": 62699330, - "step": 2957 - }, - { - "epoch": 0.35567847051043106, - "grad_norm": 2.1983717197979606, - "learning_rate": 2.986929327274754e-06, - "loss": 0.795, - "num_input_tokens_seen": 62719630, - "step": 2958 - }, - { - "epoch": 0.35579871340107017, - "grad_norm": 1.770533305287497, - "learning_rate": 2.9862517295003617e-06, - "loss": 0.7774, - "num_input_tokens_seen": 62739765, - "step": 2959 - }, - { - "epoch": 0.3559189562917093, - "grad_norm": 1.6030427383627186, - "learning_rate": 2.9855739821123654e-06, - "loss": 0.7241, - "num_input_tokens_seen": 62761065, - "step": 2960 - }, - { - "epoch": 0.35603919918234833, - "grad_norm": 1.9943763604621525, - "learning_rate": 2.9848960852135803e-06, - "loss": 0.8108, - "num_input_tokens_seen": 62780725, - "step": 2961 - }, - { - "epoch": 0.35615944207298744, - "grad_norm": 2.3115840141860677, - "learning_rate": 2.9842180389068417e-06, - "loss": 0.7901, - "num_input_tokens_seen": 62797755, - "step": 2962 - }, - { - "epoch": 0.35627968496362655, - "grad_norm": 0.8163308754617485, - "learning_rate": 2.98353984329501e-06, - "loss": 0.6227, - "num_input_tokens_seen": 62861820, - "step": 2963 - }, - { - "epoch": 0.3563999278542656, - "grad_norm": 1.7175358141494248, - "learning_rate": 2.982861498480965e-06, - "loss": 0.7102, - "num_input_tokens_seen": 62883920, - "step": 2964 - }, - { - "epoch": 0.3565201707449047, - "grad_norm": 2.0082443680284277, - "learning_rate": 2.9821830045676126e-06, - "loss": 0.8155, - "num_input_tokens_seen": 62903340, - "step": 2965 - }, - { - "epoch": 0.3566404136355438, - "grad_norm": 1.8173087076135495, - "learning_rate": 2.9815043616578793e-06, - "loss": 0.7123, - "num_input_tokens_seen": 62923855, - "step": 2966 - }, - { - "epoch": 0.3567606565261829, - "grad_norm": 2.056619677034643, - "learning_rate": 2.9808255698547145e-06, - "loss": 0.7715, - "num_input_tokens_seen": 62946375, - "step": 2967 - }, - { - "epoch": 0.356880899416822, - "grad_norm": 2.0887074427988335, - "learning_rate": 2.980146629261091e-06, - "loss": 0.7839, - "num_input_tokens_seen": 62965980, - "step": 2968 - }, - { - "epoch": 0.35700114230746105, - "grad_norm": 2.0145808948915787, - "learning_rate": 2.979467539980003e-06, - "loss": 0.8084, - "num_input_tokens_seen": 62982490, - "step": 2969 - }, - { - "epoch": 0.35712138519810016, - "grad_norm": 2.0953020651916066, - "learning_rate": 2.9787883021144675e-06, - "loss": 0.7652, - "num_input_tokens_seen": 62999325, - "step": 2970 - }, - { - "epoch": 0.35724162808873927, - "grad_norm": 2.2707959005445884, - "learning_rate": 2.9781089157675255e-06, - "loss": 0.8141, - "num_input_tokens_seen": 63017505, - "step": 2971 - }, - { - "epoch": 0.3573618709793783, - "grad_norm": 1.6867566023035991, - "learning_rate": 2.9774293810422384e-06, - "loss": 0.8743, - "num_input_tokens_seen": 63037900, - "step": 2972 - }, - { - "epoch": 0.35748211387001744, - "grad_norm": 2.776112663456812, - "learning_rate": 2.9767496980416913e-06, - "loss": 0.8936, - "num_input_tokens_seen": 63056915, - "step": 2973 - }, - { - "epoch": 0.35760235676065655, - "grad_norm": 2.630724038701508, - "learning_rate": 2.9760698668689914e-06, - "loss": 0.8093, - "num_input_tokens_seen": 63072860, - "step": 2974 - }, - { - "epoch": 0.3577225996512956, - "grad_norm": 2.0763864326175367, - "learning_rate": 2.975389887627269e-06, - "loss": 0.7103, - "num_input_tokens_seen": 63095180, - "step": 2975 - }, - { - "epoch": 0.3578428425419347, - "grad_norm": 2.6509259135292416, - "learning_rate": 2.9747097604196764e-06, - "loss": 0.8914, - "num_input_tokens_seen": 63111545, - "step": 2976 - }, - { - "epoch": 0.3579630854325738, - "grad_norm": 0.6986299447828513, - "learning_rate": 2.9740294853493875e-06, - "loss": 0.5956, - "num_input_tokens_seen": 63182825, - "step": 2977 - }, - { - "epoch": 0.3580833283232129, - "grad_norm": 2.1230706515879, - "learning_rate": 2.9733490625196004e-06, - "loss": 0.6761, - "num_input_tokens_seen": 63202405, - "step": 2978 - }, - { - "epoch": 0.358203571213852, - "grad_norm": 5.338732050837974, - "learning_rate": 2.9726684920335344e-06, - "loss": 0.7591, - "num_input_tokens_seen": 63219990, - "step": 2979 - }, - { - "epoch": 0.35832381410449105, - "grad_norm": 2.1166918865470423, - "learning_rate": 2.971987773994432e-06, - "loss": 0.8144, - "num_input_tokens_seen": 63235895, - "step": 2980 - }, - { - "epoch": 0.35844405699513016, - "grad_norm": 2.05126906180908, - "learning_rate": 2.9713069085055566e-06, - "loss": 0.8266, - "num_input_tokens_seen": 63253925, - "step": 2981 - }, - { - "epoch": 0.35856429988576927, - "grad_norm": 2.7956062423960777, - "learning_rate": 2.9706258956701958e-06, - "loss": 0.7866, - "num_input_tokens_seen": 63273635, - "step": 2982 - }, - { - "epoch": 0.3586845427764083, - "grad_norm": 2.318693226356334, - "learning_rate": 2.969944735591658e-06, - "loss": 0.7689, - "num_input_tokens_seen": 63292165, - "step": 2983 - }, - { - "epoch": 0.35880478566704743, - "grad_norm": 3.218758764542374, - "learning_rate": 2.9692634283732747e-06, - "loss": 0.7398, - "num_input_tokens_seen": 63310235, - "step": 2984 - }, - { - "epoch": 0.35892502855768654, - "grad_norm": 2.33091356567361, - "learning_rate": 2.9685819741184007e-06, - "loss": 0.7861, - "num_input_tokens_seen": 63328395, - "step": 2985 - }, - { - "epoch": 0.3590452714483256, - "grad_norm": 2.2145966297723865, - "learning_rate": 2.967900372930411e-06, - "loss": 0.6835, - "num_input_tokens_seen": 63346625, - "step": 2986 - }, - { - "epoch": 0.3591655143389647, - "grad_norm": 4.425013318949799, - "learning_rate": 2.9672186249127046e-06, - "loss": 0.7923, - "num_input_tokens_seen": 63365810, - "step": 2987 - }, - { - "epoch": 0.3592857572296038, - "grad_norm": 1.879413631296592, - "learning_rate": 2.9665367301687014e-06, - "loss": 0.7779, - "num_input_tokens_seen": 63383775, - "step": 2988 - }, - { - "epoch": 0.3594060001202429, - "grad_norm": 1.7996845906629475, - "learning_rate": 2.965854688801845e-06, - "loss": 0.7585, - "num_input_tokens_seen": 63405555, - "step": 2989 - }, - { - "epoch": 0.359526243010882, - "grad_norm": 1.9114645875708873, - "learning_rate": 2.9651725009156e-06, - "loss": 0.7557, - "num_input_tokens_seen": 63423020, - "step": 2990 - }, - { - "epoch": 0.3596464859015211, - "grad_norm": 1.7552493677084657, - "learning_rate": 2.964490166613454e-06, - "loss": 0.7399, - "num_input_tokens_seen": 63442665, - "step": 2991 - }, - { - "epoch": 0.35976672879216015, - "grad_norm": 0.9002352766720059, - "learning_rate": 2.9638076859989167e-06, - "loss": 0.6019, - "num_input_tokens_seen": 63498250, - "step": 2992 - }, - { - "epoch": 0.35988697168279926, - "grad_norm": 1.7374379380919434, - "learning_rate": 2.9631250591755196e-06, - "loss": 0.7791, - "num_input_tokens_seen": 63520685, - "step": 2993 - }, - { - "epoch": 0.36000721457343837, - "grad_norm": 1.7819665656384225, - "learning_rate": 2.9624422862468174e-06, - "loss": 0.5795, - "num_input_tokens_seen": 63543235, - "step": 2994 - }, - { - "epoch": 0.3601274574640774, - "grad_norm": 1.566950416111105, - "learning_rate": 2.9617593673163853e-06, - "loss": 0.6988, - "num_input_tokens_seen": 63561775, - "step": 2995 - }, - { - "epoch": 0.36024770035471654, - "grad_norm": 2.6489034606829023, - "learning_rate": 2.9610763024878216e-06, - "loss": 0.7661, - "num_input_tokens_seen": 63577000, - "step": 2996 - }, - { - "epoch": 0.3603679432453556, - "grad_norm": 1.8572348243700725, - "learning_rate": 2.960393091864747e-06, - "loss": 0.9045, - "num_input_tokens_seen": 63595100, - "step": 2997 - }, - { - "epoch": 0.3604881861359947, - "grad_norm": 1.6965993778822916, - "learning_rate": 2.959709735550804e-06, - "loss": 0.7416, - "num_input_tokens_seen": 63614415, - "step": 2998 - }, - { - "epoch": 0.3606084290266338, - "grad_norm": 2.272856191762267, - "learning_rate": 2.9590262336496575e-06, - "loss": 0.755, - "num_input_tokens_seen": 63633865, - "step": 2999 - }, - { - "epoch": 0.36072867191727287, - "grad_norm": 8.09684440520056, - "learning_rate": 2.958342586264993e-06, - "loss": 0.8395, - "num_input_tokens_seen": 63651720, - "step": 3000 - }, - { - "epoch": 0.360848914807912, - "grad_norm": 2.1448108093479075, - "learning_rate": 2.957658793500521e-06, - "loss": 0.7411, - "num_input_tokens_seen": 63669520, - "step": 3001 - }, - { - "epoch": 0.3609691576985511, - "grad_norm": 2.7568503637191433, - "learning_rate": 2.9569748554599713e-06, - "loss": 0.7197, - "num_input_tokens_seen": 63684850, - "step": 3002 - }, - { - "epoch": 0.36108940058919015, - "grad_norm": 2.59302159519918, - "learning_rate": 2.956290772247097e-06, - "loss": 0.7159, - "num_input_tokens_seen": 63703245, - "step": 3003 - }, - { - "epoch": 0.36120964347982926, - "grad_norm": 1.6845370843586114, - "learning_rate": 2.9556065439656724e-06, - "loss": 0.7384, - "num_input_tokens_seen": 63722015, - "step": 3004 - }, - { - "epoch": 0.36132988637046837, - "grad_norm": 1.8873139956097023, - "learning_rate": 2.9549221707194952e-06, - "loss": 0.8198, - "num_input_tokens_seen": 63740585, - "step": 3005 - }, - { - "epoch": 0.3614501292611074, - "grad_norm": 2.627524458220418, - "learning_rate": 2.9542376526123835e-06, - "loss": 0.7312, - "num_input_tokens_seen": 63759355, - "step": 3006 - }, - { - "epoch": 0.36157037215174653, - "grad_norm": 2.0020328864982626, - "learning_rate": 2.9535529897481796e-06, - "loss": 0.8393, - "num_input_tokens_seen": 63776620, - "step": 3007 - }, - { - "epoch": 0.36169061504238564, - "grad_norm": 2.5911731016441015, - "learning_rate": 2.9528681822307446e-06, - "loss": 0.7728, - "num_input_tokens_seen": 63793190, - "step": 3008 - }, - { - "epoch": 0.3618108579330247, - "grad_norm": 2.653503032899075, - "learning_rate": 2.9521832301639642e-06, - "loss": 0.8189, - "num_input_tokens_seen": 63812485, - "step": 3009 - }, - { - "epoch": 0.3619311008236638, - "grad_norm": 2.654552430917696, - "learning_rate": 2.9514981336517448e-06, - "loss": 0.7346, - "num_input_tokens_seen": 63831975, - "step": 3010 - }, - { - "epoch": 0.36205134371430286, - "grad_norm": 2.0408650024544466, - "learning_rate": 2.950812892798015e-06, - "loss": 0.8074, - "num_input_tokens_seen": 63852590, - "step": 3011 - }, - { - "epoch": 0.362171586604942, - "grad_norm": 4.129123537617905, - "learning_rate": 2.950127507706725e-06, - "loss": 0.8669, - "num_input_tokens_seen": 63872930, - "step": 3012 - }, - { - "epoch": 0.3622918294955811, - "grad_norm": 1.5023418424140664, - "learning_rate": 2.949441978481848e-06, - "loss": 0.8802, - "num_input_tokens_seen": 63893550, - "step": 3013 - }, - { - "epoch": 0.36241207238622014, - "grad_norm": 1.9226042728650827, - "learning_rate": 2.9487563052273774e-06, - "loss": 0.7923, - "num_input_tokens_seen": 63910030, - "step": 3014 - }, - { - "epoch": 0.36253231527685925, - "grad_norm": 1.8449124732240965, - "learning_rate": 2.94807048804733e-06, - "loss": 0.8552, - "num_input_tokens_seen": 63929370, - "step": 3015 - }, - { - "epoch": 0.36265255816749836, - "grad_norm": 1.9996652290596735, - "learning_rate": 2.9473845270457434e-06, - "loss": 0.8923, - "num_input_tokens_seen": 63945905, - "step": 3016 - }, - { - "epoch": 0.3627728010581374, - "grad_norm": 2.536462746047324, - "learning_rate": 2.946698422326677e-06, - "loss": 0.697, - "num_input_tokens_seen": 63963085, - "step": 3017 - }, - { - "epoch": 0.36289304394877653, - "grad_norm": 2.1359093062228625, - "learning_rate": 2.946012173994213e-06, - "loss": 0.7911, - "num_input_tokens_seen": 63982590, - "step": 3018 - }, - { - "epoch": 0.36301328683941564, - "grad_norm": 1.5035475918395433, - "learning_rate": 2.945325782152454e-06, - "loss": 0.6826, - "num_input_tokens_seen": 64005345, - "step": 3019 - }, - { - "epoch": 0.3631335297300547, - "grad_norm": 2.2323894326599247, - "learning_rate": 2.9446392469055257e-06, - "loss": 0.79, - "num_input_tokens_seen": 64023100, - "step": 3020 - }, - { - "epoch": 0.3632537726206938, - "grad_norm": 1.737221133606062, - "learning_rate": 2.9439525683575745e-06, - "loss": 0.7973, - "num_input_tokens_seen": 64041740, - "step": 3021 - }, - { - "epoch": 0.3633740155113329, - "grad_norm": 2.08503699609348, - "learning_rate": 2.943265746612769e-06, - "loss": 0.7453, - "num_input_tokens_seen": 64061030, - "step": 3022 - }, - { - "epoch": 0.36349425840197197, - "grad_norm": 1.7919308736027637, - "learning_rate": 2.9425787817753007e-06, - "loss": 0.7648, - "num_input_tokens_seen": 64079410, - "step": 3023 - }, - { - "epoch": 0.3636145012926111, - "grad_norm": 1.7915654004491517, - "learning_rate": 2.94189167394938e-06, - "loss": 0.7058, - "num_input_tokens_seen": 64101565, - "step": 3024 - }, - { - "epoch": 0.3637347441832502, - "grad_norm": 2.0363889965813677, - "learning_rate": 2.941204423239241e-06, - "loss": 0.8063, - "num_input_tokens_seen": 64120160, - "step": 3025 - }, - { - "epoch": 0.36385498707388925, - "grad_norm": 2.0367616428703386, - "learning_rate": 2.9405170297491395e-06, - "loss": 0.7624, - "num_input_tokens_seen": 64139875, - "step": 3026 - }, - { - "epoch": 0.36397522996452836, - "grad_norm": 2.0612594899023007, - "learning_rate": 2.939829493583353e-06, - "loss": 0.7947, - "num_input_tokens_seen": 64156240, - "step": 3027 - }, - { - "epoch": 0.3640954728551674, - "grad_norm": 2.7556091863319243, - "learning_rate": 2.9391418148461785e-06, - "loss": 0.8385, - "num_input_tokens_seen": 64173375, - "step": 3028 - }, - { - "epoch": 0.3642157157458065, - "grad_norm": 2.1433199359107844, - "learning_rate": 2.938453993641938e-06, - "loss": 0.8139, - "num_input_tokens_seen": 64191470, - "step": 3029 - }, - { - "epoch": 0.36433595863644563, - "grad_norm": 4.405542301298672, - "learning_rate": 2.937766030074973e-06, - "loss": 0.7076, - "num_input_tokens_seen": 64208445, - "step": 3030 - }, - { - "epoch": 0.3644562015270847, - "grad_norm": 2.4287224704479686, - "learning_rate": 2.937077924249647e-06, - "loss": 0.8191, - "num_input_tokens_seen": 64230755, - "step": 3031 - }, - { - "epoch": 0.3645764444177238, - "grad_norm": 3.2121224738148015, - "learning_rate": 2.9363896762703443e-06, - "loss": 0.7531, - "num_input_tokens_seen": 64247540, - "step": 3032 - }, - { - "epoch": 0.3646966873083629, - "grad_norm": 1.913137721761046, - "learning_rate": 2.9357012862414725e-06, - "loss": 0.8344, - "num_input_tokens_seen": 64266620, - "step": 3033 - }, - { - "epoch": 0.36481693019900197, - "grad_norm": 1.9202922316932172, - "learning_rate": 2.935012754267459e-06, - "loss": 0.7069, - "num_input_tokens_seen": 64288550, - "step": 3034 - }, - { - "epoch": 0.3649371730896411, - "grad_norm": 2.5904623241121456, - "learning_rate": 2.934324080452755e-06, - "loss": 0.7549, - "num_input_tokens_seen": 64306060, - "step": 3035 - }, - { - "epoch": 0.3650574159802802, - "grad_norm": 1.4694981981703936, - "learning_rate": 2.93363526490183e-06, - "loss": 0.77, - "num_input_tokens_seen": 64325850, - "step": 3036 - }, - { - "epoch": 0.36517765887091924, - "grad_norm": 1.963954593361555, - "learning_rate": 2.9329463077191783e-06, - "loss": 0.6972, - "num_input_tokens_seen": 64348945, - "step": 3037 - }, - { - "epoch": 0.36529790176155835, - "grad_norm": 2.438383738817773, - "learning_rate": 2.9322572090093135e-06, - "loss": 0.6448, - "num_input_tokens_seen": 64367370, - "step": 3038 - }, - { - "epoch": 0.36541814465219746, - "grad_norm": 2.894055218806962, - "learning_rate": 2.9315679688767713e-06, - "loss": 0.7557, - "num_input_tokens_seen": 64385100, - "step": 3039 - }, - { - "epoch": 0.3655383875428365, - "grad_norm": 1.6316060157478773, - "learning_rate": 2.9308785874261085e-06, - "loss": 0.6618, - "num_input_tokens_seen": 64405010, - "step": 3040 - }, - { - "epoch": 0.36565863043347563, - "grad_norm": 1.876012026933462, - "learning_rate": 2.9301890647619045e-06, - "loss": 0.8103, - "num_input_tokens_seen": 64424025, - "step": 3041 - }, - { - "epoch": 0.36577887332411474, - "grad_norm": 2.7787159407877757, - "learning_rate": 2.929499400988759e-06, - "loss": 0.8046, - "num_input_tokens_seen": 64444905, - "step": 3042 - }, - { - "epoch": 0.3658991162147538, - "grad_norm": 2.1899876800885205, - "learning_rate": 2.9288095962112927e-06, - "loss": 0.65, - "num_input_tokens_seen": 64465330, - "step": 3043 - }, - { - "epoch": 0.3660193591053929, - "grad_norm": 1.7544531307199034, - "learning_rate": 2.92811965053415e-06, - "loss": 0.8411, - "num_input_tokens_seen": 64482220, - "step": 3044 - }, - { - "epoch": 0.36613960199603196, - "grad_norm": 2.0742417733507046, - "learning_rate": 2.9274295640619946e-06, - "loss": 0.7826, - "num_input_tokens_seen": 64499070, - "step": 3045 - }, - { - "epoch": 0.36625984488667107, - "grad_norm": 1.7883302662293092, - "learning_rate": 2.9267393368995103e-06, - "loss": 0.7836, - "num_input_tokens_seen": 64518020, - "step": 3046 - }, - { - "epoch": 0.3663800877773102, - "grad_norm": 2.3428617523356845, - "learning_rate": 2.926048969151407e-06, - "loss": 0.7489, - "num_input_tokens_seen": 64535025, - "step": 3047 - }, - { - "epoch": 0.36650033066794924, - "grad_norm": 2.0296602825414576, - "learning_rate": 2.92535846092241e-06, - "loss": 0.6777, - "num_input_tokens_seen": 64553760, - "step": 3048 - }, - { - "epoch": 0.36662057355858835, - "grad_norm": 1.7049836248754247, - "learning_rate": 2.9246678123172704e-06, - "loss": 0.8222, - "num_input_tokens_seen": 64573570, - "step": 3049 - }, - { - "epoch": 0.36674081644922746, - "grad_norm": 2.515448628718151, - "learning_rate": 2.923977023440759e-06, - "loss": 0.7385, - "num_input_tokens_seen": 64591595, - "step": 3050 - }, - { - "epoch": 0.3668610593398665, - "grad_norm": 1.627641199869569, - "learning_rate": 2.9232860943976686e-06, - "loss": 0.679, - "num_input_tokens_seen": 64612050, - "step": 3051 - }, - { - "epoch": 0.3669813022305056, - "grad_norm": 1.6979178840805242, - "learning_rate": 2.9225950252928115e-06, - "loss": 0.834, - "num_input_tokens_seen": 64632620, - "step": 3052 - }, - { - "epoch": 0.36710154512114473, - "grad_norm": 2.498423249056174, - "learning_rate": 2.9219038162310225e-06, - "loss": 0.8139, - "num_input_tokens_seen": 64650540, - "step": 3053 - }, - { - "epoch": 0.3672217880117838, - "grad_norm": 2.489071678827203, - "learning_rate": 2.921212467317157e-06, - "loss": 0.818, - "num_input_tokens_seen": 64669705, - "step": 3054 - }, - { - "epoch": 0.3673420309024229, - "grad_norm": 2.8663521346021597, - "learning_rate": 2.920520978656093e-06, - "loss": 0.8045, - "num_input_tokens_seen": 64686390, - "step": 3055 - }, - { - "epoch": 0.367462273793062, - "grad_norm": 2.279010603084803, - "learning_rate": 2.9198293503527286e-06, - "loss": 0.7607, - "num_input_tokens_seen": 64707715, - "step": 3056 - }, - { - "epoch": 0.36758251668370107, - "grad_norm": 0.7723522210277356, - "learning_rate": 2.919137582511983e-06, - "loss": 0.6206, - "num_input_tokens_seen": 64763875, - "step": 3057 - }, - { - "epoch": 0.3677027595743402, - "grad_norm": 2.201065780820753, - "learning_rate": 2.9184456752387964e-06, - "loss": 0.6371, - "num_input_tokens_seen": 64780520, - "step": 3058 - }, - { - "epoch": 0.36782300246497923, - "grad_norm": 1.93935933391005, - "learning_rate": 2.917753628638132e-06, - "loss": 0.6978, - "num_input_tokens_seen": 64800545, - "step": 3059 - }, - { - "epoch": 0.36794324535561834, - "grad_norm": 2.9581608736066745, - "learning_rate": 2.9170614428149716e-06, - "loss": 0.694, - "num_input_tokens_seen": 64818600, - "step": 3060 - }, - { - "epoch": 0.36806348824625745, - "grad_norm": 3.8907633849036585, - "learning_rate": 2.9163691178743195e-06, - "loss": 0.8667, - "num_input_tokens_seen": 64836970, - "step": 3061 - }, - { - "epoch": 0.3681837311368965, - "grad_norm": 2.3375353506131606, - "learning_rate": 2.9156766539212006e-06, - "loss": 0.7759, - "num_input_tokens_seen": 64854335, - "step": 3062 - }, - { - "epoch": 0.3683039740275356, - "grad_norm": 2.1947106055976895, - "learning_rate": 2.9149840510606614e-06, - "loss": 0.7144, - "num_input_tokens_seen": 64872710, - "step": 3063 - }, - { - "epoch": 0.36842421691817473, - "grad_norm": 1.0359801152085388, - "learning_rate": 2.914291309397769e-06, - "loss": 0.686, - "num_input_tokens_seen": 64929900, - "step": 3064 - }, - { - "epoch": 0.3685444598088138, - "grad_norm": 2.264296314002788, - "learning_rate": 2.9135984290376117e-06, - "loss": 0.7822, - "num_input_tokens_seen": 64948485, - "step": 3065 - }, - { - "epoch": 0.3686647026994529, - "grad_norm": 1.7814301011671463, - "learning_rate": 2.9129054100853e-06, - "loss": 0.8244, - "num_input_tokens_seen": 64967045, - "step": 3066 - }, - { - "epoch": 0.368784945590092, - "grad_norm": 7.660560302742403, - "learning_rate": 2.912212252645963e-06, - "loss": 0.7573, - "num_input_tokens_seen": 64989350, - "step": 3067 - }, - { - "epoch": 0.36890518848073106, - "grad_norm": 2.7484492038557082, - "learning_rate": 2.9115189568247523e-06, - "loss": 0.7556, - "num_input_tokens_seen": 65006630, - "step": 3068 - }, - { - "epoch": 0.36902543137137017, - "grad_norm": 2.517116487646983, - "learning_rate": 2.910825522726841e-06, - "loss": 0.9067, - "num_input_tokens_seen": 65023875, - "step": 3069 - }, - { - "epoch": 0.3691456742620093, - "grad_norm": 2.1179414415213693, - "learning_rate": 2.9101319504574215e-06, - "loss": 0.761, - "num_input_tokens_seen": 65040035, - "step": 3070 - }, - { - "epoch": 0.36926591715264834, - "grad_norm": 2.2375768482085525, - "learning_rate": 2.909438240121709e-06, - "loss": 0.7596, - "num_input_tokens_seen": 65060030, - "step": 3071 - }, - { - "epoch": 0.36938616004328745, - "grad_norm": 1.8559233221752391, - "learning_rate": 2.9087443918249385e-06, - "loss": 0.6966, - "num_input_tokens_seen": 65080770, - "step": 3072 - }, - { - "epoch": 0.36950640293392656, - "grad_norm": 2.140031799912235, - "learning_rate": 2.908050405672367e-06, - "loss": 0.7873, - "num_input_tokens_seen": 65100035, - "step": 3073 - }, - { - "epoch": 0.3696266458245656, - "grad_norm": 1.9494531830024715, - "learning_rate": 2.9073562817692703e-06, - "loss": 0.7851, - "num_input_tokens_seen": 65118440, - "step": 3074 - }, - { - "epoch": 0.3697468887152047, - "grad_norm": 0.7905469202754124, - "learning_rate": 2.906662020220947e-06, - "loss": 0.6066, - "num_input_tokens_seen": 65180650, - "step": 3075 - }, - { - "epoch": 0.3698671316058438, - "grad_norm": 3.0442508619622384, - "learning_rate": 2.905967621132716e-06, - "loss": 0.7762, - "num_input_tokens_seen": 65197980, - "step": 3076 - }, - { - "epoch": 0.3699873744964829, - "grad_norm": 2.283642788158564, - "learning_rate": 2.905273084609918e-06, - "loss": 0.7504, - "num_input_tokens_seen": 65219045, - "step": 3077 - }, - { - "epoch": 0.370107617387122, - "grad_norm": 0.9049793891796653, - "learning_rate": 2.904578410757912e-06, - "loss": 0.6469, - "num_input_tokens_seen": 65278870, - "step": 3078 - }, - { - "epoch": 0.37022786027776106, - "grad_norm": 2.0538122310865927, - "learning_rate": 2.9038835996820807e-06, - "loss": 0.6639, - "num_input_tokens_seen": 65296200, - "step": 3079 - }, - { - "epoch": 0.37034810316840017, - "grad_norm": 5.630774452643147, - "learning_rate": 2.9031886514878258e-06, - "loss": 0.7854, - "num_input_tokens_seen": 65314475, - "step": 3080 - }, - { - "epoch": 0.3704683460590393, - "grad_norm": 2.2202352077880287, - "learning_rate": 2.902493566280571e-06, - "loss": 0.866, - "num_input_tokens_seen": 65332300, - "step": 3081 - }, - { - "epoch": 0.37058858894967833, - "grad_norm": 1.8727087801933924, - "learning_rate": 2.9017983441657595e-06, - "loss": 0.8118, - "num_input_tokens_seen": 65349350, - "step": 3082 - }, - { - "epoch": 0.37070883184031744, - "grad_norm": 3.2382469697965375, - "learning_rate": 2.9011029852488564e-06, - "loss": 0.7556, - "num_input_tokens_seen": 65366305, - "step": 3083 - }, - { - "epoch": 0.37082907473095655, - "grad_norm": 1.01107184616564, - "learning_rate": 2.9004074896353465e-06, - "loss": 0.6761, - "num_input_tokens_seen": 65420025, - "step": 3084 - }, - { - "epoch": 0.3709493176215956, - "grad_norm": 1.9543161183791637, - "learning_rate": 2.8997118574307362e-06, - "loss": 0.8105, - "num_input_tokens_seen": 65436700, - "step": 3085 - }, - { - "epoch": 0.3710695605122347, - "grad_norm": 2.155761927349376, - "learning_rate": 2.899016088740553e-06, - "loss": 0.737, - "num_input_tokens_seen": 65454530, - "step": 3086 - }, - { - "epoch": 0.37118980340287383, - "grad_norm": 2.4060505105930634, - "learning_rate": 2.898320183670344e-06, - "loss": 0.7902, - "num_input_tokens_seen": 65471665, - "step": 3087 - }, - { - "epoch": 0.3713100462935129, - "grad_norm": 7.531004831037133, - "learning_rate": 2.8976241423256767e-06, - "loss": 0.8779, - "num_input_tokens_seen": 65491480, - "step": 3088 - }, - { - "epoch": 0.371430289184152, - "grad_norm": 2.3132560982893535, - "learning_rate": 2.896927964812142e-06, - "loss": 0.6782, - "num_input_tokens_seen": 65511765, - "step": 3089 - }, - { - "epoch": 0.37155053207479105, - "grad_norm": 2.598919673939607, - "learning_rate": 2.8962316512353465e-06, - "loss": 0.7524, - "num_input_tokens_seen": 65529030, - "step": 3090 - }, - { - "epoch": 0.37167077496543016, - "grad_norm": 1.8580821851049534, - "learning_rate": 2.8955352017009233e-06, - "loss": 0.7461, - "num_input_tokens_seen": 65547995, - "step": 3091 - }, - { - "epoch": 0.3717910178560693, - "grad_norm": 2.1379644631233687, - "learning_rate": 2.8948386163145212e-06, - "loss": 0.7674, - "num_input_tokens_seen": 65566925, - "step": 3092 - }, - { - "epoch": 0.3719112607467083, - "grad_norm": 1.885953940845471, - "learning_rate": 2.8941418951818135e-06, - "loss": 0.793, - "num_input_tokens_seen": 65586205, - "step": 3093 - }, - { - "epoch": 0.37203150363734744, - "grad_norm": 2.275389973348851, - "learning_rate": 2.89344503840849e-06, - "loss": 0.7089, - "num_input_tokens_seen": 65603440, - "step": 3094 - }, - { - "epoch": 0.37215174652798655, - "grad_norm": 2.4022548280684246, - "learning_rate": 2.8927480461002653e-06, - "loss": 0.7087, - "num_input_tokens_seen": 65623130, - "step": 3095 - }, - { - "epoch": 0.3722719894186256, - "grad_norm": 11.143873019262802, - "learning_rate": 2.892050918362872e-06, - "loss": 0.8512, - "num_input_tokens_seen": 65637905, - "step": 3096 - }, - { - "epoch": 0.3723922323092647, - "grad_norm": 0.9478235789567104, - "learning_rate": 2.891353655302063e-06, - "loss": 0.6145, - "num_input_tokens_seen": 65691680, - "step": 3097 - }, - { - "epoch": 0.3725124751999038, - "grad_norm": 2.0488815779033938, - "learning_rate": 2.8906562570236137e-06, - "loss": 0.847, - "num_input_tokens_seen": 65709310, - "step": 3098 - }, - { - "epoch": 0.3726327180905429, - "grad_norm": 1.5379489531203288, - "learning_rate": 2.8899587236333186e-06, - "loss": 0.7588, - "num_input_tokens_seen": 65727970, - "step": 3099 - }, - { - "epoch": 0.372752960981182, - "grad_norm": 1.9187932948780124, - "learning_rate": 2.8892610552369917e-06, - "loss": 0.7368, - "num_input_tokens_seen": 65749905, - "step": 3100 - }, - { - "epoch": 0.3728732038718211, - "grad_norm": 1.956083552989559, - "learning_rate": 2.8885632519404704e-06, - "loss": 0.8148, - "num_input_tokens_seen": 65769895, - "step": 3101 - }, - { - "epoch": 0.37299344676246016, - "grad_norm": 1.9128049549934065, - "learning_rate": 2.8878653138496102e-06, - "loss": 0.7477, - "num_input_tokens_seen": 65790110, - "step": 3102 - }, - { - "epoch": 0.37311368965309927, - "grad_norm": 2.5534878452091028, - "learning_rate": 2.8871672410702878e-06, - "loss": 0.7457, - "num_input_tokens_seen": 65807190, - "step": 3103 - }, - { - "epoch": 0.3732339325437384, - "grad_norm": 1.7605907227540802, - "learning_rate": 2.8864690337084008e-06, - "loss": 0.813, - "num_input_tokens_seen": 65826185, - "step": 3104 - }, - { - "epoch": 0.37335417543437743, - "grad_norm": 3.741086890953342, - "learning_rate": 2.885770691869866e-06, - "loss": 0.7714, - "num_input_tokens_seen": 65846785, - "step": 3105 - }, - { - "epoch": 0.37347441832501654, - "grad_norm": 2.7954387323893894, - "learning_rate": 2.8850722156606207e-06, - "loss": 0.7436, - "num_input_tokens_seen": 65864895, - "step": 3106 - }, - { - "epoch": 0.3735946612156556, - "grad_norm": 2.2239892431697506, - "learning_rate": 2.8843736051866252e-06, - "loss": 0.6685, - "num_input_tokens_seen": 65883540, - "step": 3107 - }, - { - "epoch": 0.3737149041062947, - "grad_norm": 1.5507169942103427, - "learning_rate": 2.8836748605538557e-06, - "loss": 0.6847, - "num_input_tokens_seen": 65904900, - "step": 3108 - }, - { - "epoch": 0.3738351469969338, - "grad_norm": 4.082018640371153, - "learning_rate": 2.882975981868313e-06, - "loss": 0.6283, - "num_input_tokens_seen": 65925005, - "step": 3109 - }, - { - "epoch": 0.3739553898875729, - "grad_norm": 2.344429963585106, - "learning_rate": 2.8822769692360165e-06, - "loss": 0.6876, - "num_input_tokens_seen": 65946085, - "step": 3110 - }, - { - "epoch": 0.374075632778212, - "grad_norm": 2.3160869694092936, - "learning_rate": 2.881577822763005e-06, - "loss": 0.7651, - "num_input_tokens_seen": 65963755, - "step": 3111 - }, - { - "epoch": 0.3741958756688511, - "grad_norm": 2.1012165506469582, - "learning_rate": 2.880878542555338e-06, - "loss": 0.8759, - "num_input_tokens_seen": 65981240, - "step": 3112 - }, - { - "epoch": 0.37431611855949015, - "grad_norm": 2.249785145647857, - "learning_rate": 2.8801791287190976e-06, - "loss": 0.8004, - "num_input_tokens_seen": 65998955, - "step": 3113 - }, - { - "epoch": 0.37443636145012926, - "grad_norm": 3.0832348730518295, - "learning_rate": 2.8794795813603817e-06, - "loss": 0.8509, - "num_input_tokens_seen": 66014140, - "step": 3114 - }, - { - "epoch": 0.3745566043407684, - "grad_norm": 2.1879430614656044, - "learning_rate": 2.878779900585314e-06, - "loss": 0.8146, - "num_input_tokens_seen": 66031700, - "step": 3115 - }, - { - "epoch": 0.37467684723140743, - "grad_norm": 2.144839827741358, - "learning_rate": 2.8780800865000328e-06, - "loss": 0.7543, - "num_input_tokens_seen": 66052730, - "step": 3116 - }, - { - "epoch": 0.37479709012204654, - "grad_norm": 1.033878072715553, - "learning_rate": 2.877380139210702e-06, - "loss": 0.6525, - "num_input_tokens_seen": 66111120, - "step": 3117 - }, - { - "epoch": 0.37491733301268565, - "grad_norm": 6.552799514834817, - "learning_rate": 2.876680058823501e-06, - "loss": 0.7464, - "num_input_tokens_seen": 66131240, - "step": 3118 - }, - { - "epoch": 0.3750375759033247, - "grad_norm": 2.3640126969618147, - "learning_rate": 2.8759798454446323e-06, - "loss": 0.6613, - "num_input_tokens_seen": 66154125, - "step": 3119 - }, - { - "epoch": 0.3751578187939638, - "grad_norm": 4.519550186111322, - "learning_rate": 2.8752794991803173e-06, - "loss": 0.8101, - "num_input_tokens_seen": 66171530, - "step": 3120 - }, - { - "epoch": 0.37527806168460287, - "grad_norm": 7.341990834089306, - "learning_rate": 2.874579020136798e-06, - "loss": 0.7484, - "num_input_tokens_seen": 66187005, - "step": 3121 - }, - { - "epoch": 0.375398304575242, - "grad_norm": 2.2947574508359625, - "learning_rate": 2.873878408420337e-06, - "loss": 0.8453, - "num_input_tokens_seen": 66206800, - "step": 3122 - }, - { - "epoch": 0.3755185474658811, - "grad_norm": 1.7964611695745767, - "learning_rate": 2.873177664137216e-06, - "loss": 0.7837, - "num_input_tokens_seen": 66227450, - "step": 3123 - }, - { - "epoch": 0.37563879035652015, - "grad_norm": 2.0733710424574454, - "learning_rate": 2.872476787393738e-06, - "loss": 0.6866, - "num_input_tokens_seen": 66251290, - "step": 3124 - }, - { - "epoch": 0.37575903324715926, - "grad_norm": 2.783782881930464, - "learning_rate": 2.871775778296225e-06, - "loss": 0.8738, - "num_input_tokens_seen": 66268100, - "step": 3125 - }, - { - "epoch": 0.37587927613779837, - "grad_norm": 2.526670701982994, - "learning_rate": 2.8710746369510196e-06, - "loss": 0.7877, - "num_input_tokens_seen": 66285805, - "step": 3126 - }, - { - "epoch": 0.3759995190284374, - "grad_norm": 2.4288263163627817, - "learning_rate": 2.8703733634644846e-06, - "loss": 0.8293, - "num_input_tokens_seen": 66300280, - "step": 3127 - }, - { - "epoch": 0.37611976191907653, - "grad_norm": 1.6937571026810703, - "learning_rate": 2.8696719579430014e-06, - "loss": 0.7883, - "num_input_tokens_seen": 66319155, - "step": 3128 - }, - { - "epoch": 0.37624000480971564, - "grad_norm": 3.0614512671743634, - "learning_rate": 2.8689704204929747e-06, - "loss": 0.7343, - "num_input_tokens_seen": 66338055, - "step": 3129 - }, - { - "epoch": 0.3763602477003547, - "grad_norm": 1.942790693679138, - "learning_rate": 2.8682687512208253e-06, - "loss": 0.795, - "num_input_tokens_seen": 66356785, - "step": 3130 - }, - { - "epoch": 0.3764804905909938, - "grad_norm": 1.9450081062139966, - "learning_rate": 2.8675669502329972e-06, - "loss": 0.8071, - "num_input_tokens_seen": 66378035, - "step": 3131 - }, - { - "epoch": 0.3766007334816329, - "grad_norm": 7.847783767388033, - "learning_rate": 2.8668650176359524e-06, - "loss": 0.8428, - "num_input_tokens_seen": 66395575, - "step": 3132 - }, - { - "epoch": 0.376720976372272, - "grad_norm": 2.4890157923517484, - "learning_rate": 2.866162953536174e-06, - "loss": 0.7881, - "num_input_tokens_seen": 66416265, - "step": 3133 - }, - { - "epoch": 0.3768412192629111, - "grad_norm": 1.7937575236750953, - "learning_rate": 2.8654607580401634e-06, - "loss": 0.746, - "num_input_tokens_seen": 66435720, - "step": 3134 - }, - { - "epoch": 0.3769614621535502, - "grad_norm": 0.9339269846878173, - "learning_rate": 2.8647584312544446e-06, - "loss": 0.6817, - "num_input_tokens_seen": 66500645, - "step": 3135 - }, - { - "epoch": 0.37708170504418925, - "grad_norm": 1.8014387929617797, - "learning_rate": 2.864055973285559e-06, - "loss": 0.8514, - "num_input_tokens_seen": 66522365, - "step": 3136 - }, - { - "epoch": 0.37720194793482836, - "grad_norm": 1.852460490666136, - "learning_rate": 2.8633533842400698e-06, - "loss": 0.8661, - "num_input_tokens_seen": 66542285, - "step": 3137 - }, - { - "epoch": 0.3773221908254674, - "grad_norm": 1.903543331292333, - "learning_rate": 2.8626506642245576e-06, - "loss": 0.7759, - "num_input_tokens_seen": 66560855, - "step": 3138 - }, - { - "epoch": 0.37744243371610653, - "grad_norm": 1.488639823456665, - "learning_rate": 2.8619478133456265e-06, - "loss": 0.6979, - "num_input_tokens_seen": 66583275, - "step": 3139 - }, - { - "epoch": 0.37756267660674564, - "grad_norm": 2.0544944680788535, - "learning_rate": 2.8612448317098974e-06, - "loss": 0.7192, - "num_input_tokens_seen": 66603330, - "step": 3140 - }, - { - "epoch": 0.3776829194973847, - "grad_norm": 2.2425091155970116, - "learning_rate": 2.860541719424012e-06, - "loss": 0.8321, - "num_input_tokens_seen": 66621410, - "step": 3141 - }, - { - "epoch": 0.3778031623880238, - "grad_norm": 3.547883488470654, - "learning_rate": 2.8598384765946315e-06, - "loss": 0.792, - "num_input_tokens_seen": 66639785, - "step": 3142 - }, - { - "epoch": 0.3779234052786629, - "grad_norm": 2.2029576359245446, - "learning_rate": 2.859135103328438e-06, - "loss": 0.7191, - "num_input_tokens_seen": 66659235, - "step": 3143 - }, - { - "epoch": 0.37804364816930197, - "grad_norm": 3.861109233663844, - "learning_rate": 2.8584315997321325e-06, - "loss": 0.834, - "num_input_tokens_seen": 66677960, - "step": 3144 - }, - { - "epoch": 0.3781638910599411, - "grad_norm": 3.412038625841604, - "learning_rate": 2.8577279659124356e-06, - "loss": 0.7739, - "num_input_tokens_seen": 66695355, - "step": 3145 - }, - { - "epoch": 0.3782841339505802, - "grad_norm": 1.8578129467557964, - "learning_rate": 2.8570242019760885e-06, - "loss": 0.8228, - "num_input_tokens_seen": 66712635, - "step": 3146 - }, - { - "epoch": 0.37840437684121925, - "grad_norm": 5.292381263964881, - "learning_rate": 2.8563203080298516e-06, - "loss": 0.7368, - "num_input_tokens_seen": 66733130, - "step": 3147 - }, - { - "epoch": 0.37852461973185836, - "grad_norm": 2.503168178923822, - "learning_rate": 2.855616284180505e-06, - "loss": 0.8857, - "num_input_tokens_seen": 66749900, - "step": 3148 - }, - { - "epoch": 0.37864486262249747, - "grad_norm": 0.9636375568937535, - "learning_rate": 2.8549121305348477e-06, - "loss": 0.7285, - "num_input_tokens_seen": 66809405, - "step": 3149 - }, - { - "epoch": 0.3787651055131365, - "grad_norm": 2.860839388834743, - "learning_rate": 2.8542078471997e-06, - "loss": 0.8223, - "num_input_tokens_seen": 66826740, - "step": 3150 - }, - { - "epoch": 0.37888534840377563, - "grad_norm": 1.842984744524347, - "learning_rate": 2.8535034342819013e-06, - "loss": 0.7537, - "num_input_tokens_seen": 66843870, - "step": 3151 - }, - { - "epoch": 0.37900559129441475, - "grad_norm": 1.5723033472035364, - "learning_rate": 2.85279889188831e-06, - "loss": 0.7161, - "num_input_tokens_seen": 66863965, - "step": 3152 - }, - { - "epoch": 0.3791258341850538, - "grad_norm": 1.782499440148793, - "learning_rate": 2.852094220125805e-06, - "loss": 0.8012, - "num_input_tokens_seen": 66883195, - "step": 3153 - }, - { - "epoch": 0.3792460770756929, - "grad_norm": 2.823779504213679, - "learning_rate": 2.851389419101285e-06, - "loss": 0.7022, - "num_input_tokens_seen": 66901895, - "step": 3154 - }, - { - "epoch": 0.37936631996633197, - "grad_norm": 2.359293594206734, - "learning_rate": 2.8506844889216664e-06, - "loss": 0.7786, - "num_input_tokens_seen": 66921000, - "step": 3155 - }, - { - "epoch": 0.3794865628569711, - "grad_norm": 0.9069679432226965, - "learning_rate": 2.849979429693887e-06, - "loss": 0.6675, - "num_input_tokens_seen": 66981705, - "step": 3156 - }, - { - "epoch": 0.3796068057476102, - "grad_norm": 2.5502282171571755, - "learning_rate": 2.8492742415249042e-06, - "loss": 0.7398, - "num_input_tokens_seen": 66999070, - "step": 3157 - }, - { - "epoch": 0.37972704863824924, - "grad_norm": 1.9709765330500568, - "learning_rate": 2.848568924521694e-06, - "loss": 0.7593, - "num_input_tokens_seen": 67019570, - "step": 3158 - }, - { - "epoch": 0.37984729152888835, - "grad_norm": 2.2528051229234825, - "learning_rate": 2.8478634787912526e-06, - "loss": 0.7264, - "num_input_tokens_seen": 67037345, - "step": 3159 - }, - { - "epoch": 0.37996753441952746, - "grad_norm": 2.940462319070035, - "learning_rate": 2.8471579044405954e-06, - "loss": 0.7635, - "num_input_tokens_seen": 67056795, - "step": 3160 - }, - { - "epoch": 0.3800877773101665, - "grad_norm": 1.686815102329516, - "learning_rate": 2.846452201576758e-06, - "loss": 0.7463, - "num_input_tokens_seen": 67075890, - "step": 3161 - }, - { - "epoch": 0.38020802020080563, - "grad_norm": 1.0648944070946078, - "learning_rate": 2.845746370306795e-06, - "loss": 0.6504, - "num_input_tokens_seen": 67140800, - "step": 3162 - }, - { - "epoch": 0.38032826309144474, - "grad_norm": 2.1074651090939116, - "learning_rate": 2.84504041073778e-06, - "loss": 0.7808, - "num_input_tokens_seen": 67158935, - "step": 3163 - }, - { - "epoch": 0.3804485059820838, - "grad_norm": 2.198766616879029, - "learning_rate": 2.844334322976806e-06, - "loss": 0.798, - "num_input_tokens_seen": 67178870, - "step": 3164 - }, - { - "epoch": 0.3805687488727229, - "grad_norm": 9.09663277233536, - "learning_rate": 2.843628107130987e-06, - "loss": 0.8278, - "num_input_tokens_seen": 67197130, - "step": 3165 - }, - { - "epoch": 0.380688991763362, - "grad_norm": 0.7830222457122498, - "learning_rate": 2.8429217633074545e-06, - "loss": 0.5527, - "num_input_tokens_seen": 67259660, - "step": 3166 - }, - { - "epoch": 0.38080923465400107, - "grad_norm": 2.037505354391273, - "learning_rate": 2.842215291613361e-06, - "loss": 0.8273, - "num_input_tokens_seen": 67277760, - "step": 3167 - }, - { - "epoch": 0.3809294775446402, - "grad_norm": 0.8465271274401072, - "learning_rate": 2.841508692155877e-06, - "loss": 0.6425, - "num_input_tokens_seen": 67340905, - "step": 3168 - }, - { - "epoch": 0.38104972043527924, - "grad_norm": 1.7681966864986254, - "learning_rate": 2.840801965042194e-06, - "loss": 0.7772, - "num_input_tokens_seen": 67360085, - "step": 3169 - }, - { - "epoch": 0.38116996332591835, - "grad_norm": 2.2557065318987655, - "learning_rate": 2.840095110379521e-06, - "loss": 0.8351, - "num_input_tokens_seen": 67379325, - "step": 3170 - }, - { - "epoch": 0.38129020621655746, - "grad_norm": 0.7234750766036715, - "learning_rate": 2.8393881282750884e-06, - "loss": 0.5503, - "num_input_tokens_seen": 67441875, - "step": 3171 - }, - { - "epoch": 0.3814104491071965, - "grad_norm": 2.9612366795459892, - "learning_rate": 2.8386810188361435e-06, - "loss": 0.7808, - "num_input_tokens_seen": 67458915, - "step": 3172 - }, - { - "epoch": 0.3815306919978356, - "grad_norm": 2.4768780469564975, - "learning_rate": 2.837973782169955e-06, - "loss": 0.7694, - "num_input_tokens_seen": 67477010, - "step": 3173 - }, - { - "epoch": 0.38165093488847474, - "grad_norm": 0.8930919426773212, - "learning_rate": 2.8372664183838096e-06, - "loss": 0.6238, - "num_input_tokens_seen": 67539750, - "step": 3174 - }, - { - "epoch": 0.3817711777791138, - "grad_norm": 2.2688095836356656, - "learning_rate": 2.836558927585015e-06, - "loss": 0.6863, - "num_input_tokens_seen": 67556440, - "step": 3175 - }, - { - "epoch": 0.3818914206697529, - "grad_norm": 2.9819648635912137, - "learning_rate": 2.8358513098808957e-06, - "loss": 0.8098, - "num_input_tokens_seen": 67576475, - "step": 3176 - }, - { - "epoch": 0.382011663560392, - "grad_norm": 2.1356580426955025, - "learning_rate": 2.835143565378798e-06, - "loss": 0.7634, - "num_input_tokens_seen": 67596660, - "step": 3177 - }, - { - "epoch": 0.38213190645103107, - "grad_norm": 2.2040490822486816, - "learning_rate": 2.8344356941860847e-06, - "loss": 0.7749, - "num_input_tokens_seen": 67616010, - "step": 3178 - }, - { - "epoch": 0.3822521493416702, - "grad_norm": 2.6126826308733335, - "learning_rate": 2.8337276964101403e-06, - "loss": 0.6625, - "num_input_tokens_seen": 67636170, - "step": 3179 - }, - { - "epoch": 0.3823723922323093, - "grad_norm": 5.052353377952411, - "learning_rate": 2.833019572158367e-06, - "loss": 0.7501, - "num_input_tokens_seen": 67654325, - "step": 3180 - }, - { - "epoch": 0.38249263512294834, - "grad_norm": 2.997275011567839, - "learning_rate": 2.8323113215381872e-06, - "loss": 0.8052, - "num_input_tokens_seen": 67672390, - "step": 3181 - }, - { - "epoch": 0.38261287801358745, - "grad_norm": 2.166972875921478, - "learning_rate": 2.8316029446570416e-06, - "loss": 0.7525, - "num_input_tokens_seen": 67690190, - "step": 3182 - }, - { - "epoch": 0.38273312090422656, - "grad_norm": 2.6722048896581603, - "learning_rate": 2.8308944416223904e-06, - "loss": 0.7398, - "num_input_tokens_seen": 67706560, - "step": 3183 - }, - { - "epoch": 0.3828533637948656, - "grad_norm": 2.8594806509272312, - "learning_rate": 2.8301858125417134e-06, - "loss": 0.7899, - "num_input_tokens_seen": 67726120, - "step": 3184 - }, - { - "epoch": 0.38297360668550473, - "grad_norm": 3.683428415865929, - "learning_rate": 2.8294770575225087e-06, - "loss": 0.7413, - "num_input_tokens_seen": 67745970, - "step": 3185 - }, - { - "epoch": 0.3830938495761438, - "grad_norm": 5.559087873767548, - "learning_rate": 2.828768176672293e-06, - "loss": 0.8346, - "num_input_tokens_seen": 67764805, - "step": 3186 - }, - { - "epoch": 0.3832140924667829, - "grad_norm": 2.447176294654098, - "learning_rate": 2.8280591700986048e-06, - "loss": 0.7202, - "num_input_tokens_seen": 67786390, - "step": 3187 - }, - { - "epoch": 0.383334335357422, - "grad_norm": 2.4761889783888043, - "learning_rate": 2.8273500379089986e-06, - "loss": 0.7514, - "num_input_tokens_seen": 67805550, - "step": 3188 - }, - { - "epoch": 0.38345457824806106, - "grad_norm": 3.866308095564575, - "learning_rate": 2.8266407802110496e-06, - "loss": 0.7896, - "num_input_tokens_seen": 67823525, - "step": 3189 - }, - { - "epoch": 0.3835748211387002, - "grad_norm": 2.356838230972668, - "learning_rate": 2.8259313971123506e-06, - "loss": 0.7537, - "num_input_tokens_seen": 67844365, - "step": 3190 - }, - { - "epoch": 0.3836950640293393, - "grad_norm": 2.388726334124802, - "learning_rate": 2.825221888720517e-06, - "loss": 0.7694, - "num_input_tokens_seen": 67864775, - "step": 3191 - }, - { - "epoch": 0.38381530691997834, - "grad_norm": 1.812982963796719, - "learning_rate": 2.824512255143178e-06, - "loss": 0.8074, - "num_input_tokens_seen": 67883730, - "step": 3192 - }, - { - "epoch": 0.38393554981061745, - "grad_norm": 1.9273416151562353, - "learning_rate": 2.8238024964879855e-06, - "loss": 0.7896, - "num_input_tokens_seen": 67904345, - "step": 3193 - }, - { - "epoch": 0.38405579270125656, - "grad_norm": 2.3238009444830365, - "learning_rate": 2.823092612862609e-06, - "loss": 0.7657, - "num_input_tokens_seen": 67922560, - "step": 3194 - }, - { - "epoch": 0.3841760355918956, - "grad_norm": 2.4195216232002754, - "learning_rate": 2.822382604374738e-06, - "loss": 0.7852, - "num_input_tokens_seen": 67941205, - "step": 3195 - }, - { - "epoch": 0.3842962784825347, - "grad_norm": 2.7879406733664625, - "learning_rate": 2.8216724711320793e-06, - "loss": 0.6478, - "num_input_tokens_seen": 67960050, - "step": 3196 - }, - { - "epoch": 0.38441652137317384, - "grad_norm": 1.9042961595927328, - "learning_rate": 2.820962213242361e-06, - "loss": 0.7962, - "num_input_tokens_seen": 67979100, - "step": 3197 - }, - { - "epoch": 0.3845367642638129, - "grad_norm": 2.4164326987298734, - "learning_rate": 2.8202518308133273e-06, - "loss": 0.8441, - "num_input_tokens_seen": 67996095, - "step": 3198 - }, - { - "epoch": 0.384657007154452, - "grad_norm": 2.008362776419249, - "learning_rate": 2.8195413239527426e-06, - "loss": 0.7297, - "num_input_tokens_seen": 68015555, - "step": 3199 - }, - { - "epoch": 0.38477725004509106, - "grad_norm": 3.177418803460515, - "learning_rate": 2.8188306927683906e-06, - "loss": 0.8037, - "num_input_tokens_seen": 68034745, - "step": 3200 - }, - { - "epoch": 0.38489749293573017, - "grad_norm": 3.1801369650805147, - "learning_rate": 2.818119937368074e-06, - "loss": 0.7538, - "num_input_tokens_seen": 68053100, - "step": 3201 - }, - { - "epoch": 0.3850177358263693, - "grad_norm": 2.501343470982094, - "learning_rate": 2.817409057859613e-06, - "loss": 0.6515, - "num_input_tokens_seen": 68071810, - "step": 3202 - }, - { - "epoch": 0.38513797871700833, - "grad_norm": 2.7637374189971657, - "learning_rate": 2.8166980543508482e-06, - "loss": 0.7845, - "num_input_tokens_seen": 68087420, - "step": 3203 - }, - { - "epoch": 0.38525822160764744, - "grad_norm": 2.117494427375198, - "learning_rate": 2.8159869269496375e-06, - "loss": 0.7976, - "num_input_tokens_seen": 68105640, - "step": 3204 - }, - { - "epoch": 0.38537846449828655, - "grad_norm": 1.801854297618171, - "learning_rate": 2.8152756757638593e-06, - "loss": 0.7956, - "num_input_tokens_seen": 68123860, - "step": 3205 - }, - { - "epoch": 0.3854987073889256, - "grad_norm": 2.1129966074856363, - "learning_rate": 2.8145643009014093e-06, - "loss": 0.8353, - "num_input_tokens_seen": 68142075, - "step": 3206 - }, - { - "epoch": 0.3856189502795647, - "grad_norm": 1.8512615219835713, - "learning_rate": 2.8138528024702023e-06, - "loss": 0.7825, - "num_input_tokens_seen": 68159690, - "step": 3207 - }, - { - "epoch": 0.38573919317020383, - "grad_norm": 2.7392680086253827, - "learning_rate": 2.8131411805781717e-06, - "loss": 0.7191, - "num_input_tokens_seen": 68179535, - "step": 3208 - }, - { - "epoch": 0.3858594360608429, - "grad_norm": 3.189225758197121, - "learning_rate": 2.8124294353332716e-06, - "loss": 0.6427, - "num_input_tokens_seen": 68197930, - "step": 3209 - }, - { - "epoch": 0.385979678951482, - "grad_norm": 2.8845284035144494, - "learning_rate": 2.811717566843471e-06, - "loss": 0.771, - "num_input_tokens_seen": 68217310, - "step": 3210 - }, - { - "epoch": 0.3860999218421211, - "grad_norm": 2.4025780283711846, - "learning_rate": 2.811005575216762e-06, - "loss": 0.6919, - "num_input_tokens_seen": 68235745, - "step": 3211 - }, - { - "epoch": 0.38622016473276016, - "grad_norm": 1.8744117195874066, - "learning_rate": 2.810293460561151e-06, - "loss": 0.7781, - "num_input_tokens_seen": 68257100, - "step": 3212 - }, - { - "epoch": 0.3863404076233993, - "grad_norm": 2.067944246693746, - "learning_rate": 2.8095812229846674e-06, - "loss": 0.6743, - "num_input_tokens_seen": 68276780, - "step": 3213 - }, - { - "epoch": 0.3864606505140384, - "grad_norm": 2.3616425111489168, - "learning_rate": 2.808868862595355e-06, - "loss": 0.691, - "num_input_tokens_seen": 68296745, - "step": 3214 - }, - { - "epoch": 0.38658089340467744, - "grad_norm": 2.228010454743298, - "learning_rate": 2.8081563795012795e-06, - "loss": 0.7955, - "num_input_tokens_seen": 68316090, - "step": 3215 - }, - { - "epoch": 0.38670113629531655, - "grad_norm": 2.4642472329205027, - "learning_rate": 2.807443773810524e-06, - "loss": 0.7312, - "num_input_tokens_seen": 68337070, - "step": 3216 - }, - { - "epoch": 0.3868213791859556, - "grad_norm": 19.586500644173306, - "learning_rate": 2.80673104563119e-06, - "loss": 0.8905, - "num_input_tokens_seen": 68357415, - "step": 3217 - }, - { - "epoch": 0.3869416220765947, - "grad_norm": 1.935875430167441, - "learning_rate": 2.8060181950713976e-06, - "loss": 0.7745, - "num_input_tokens_seen": 68373925, - "step": 3218 - }, - { - "epoch": 0.3870618649672338, - "grad_norm": 2.826078153465582, - "learning_rate": 2.805305222239286e-06, - "loss": 0.8065, - "num_input_tokens_seen": 68390900, - "step": 3219 - }, - { - "epoch": 0.3871821078578729, - "grad_norm": 2.056429348415702, - "learning_rate": 2.8045921272430126e-06, - "loss": 0.7305, - "num_input_tokens_seen": 68410300, - "step": 3220 - }, - { - "epoch": 0.387302350748512, - "grad_norm": 2.299329305554979, - "learning_rate": 2.803878910190753e-06, - "loss": 0.7619, - "num_input_tokens_seen": 68426940, - "step": 3221 - }, - { - "epoch": 0.3874225936391511, - "grad_norm": 3.4718564099401212, - "learning_rate": 2.8031655711907017e-06, - "loss": 0.8131, - "num_input_tokens_seen": 68440365, - "step": 3222 - }, - { - "epoch": 0.38754283652979016, - "grad_norm": 2.1749807856426346, - "learning_rate": 2.8024521103510723e-06, - "loss": 0.8059, - "num_input_tokens_seen": 68456855, - "step": 3223 - }, - { - "epoch": 0.38766307942042927, - "grad_norm": 1.8249165888820273, - "learning_rate": 2.8017385277800952e-06, - "loss": 0.7469, - "num_input_tokens_seen": 68474930, - "step": 3224 - }, - { - "epoch": 0.3877833223110684, - "grad_norm": 2.006745255579459, - "learning_rate": 2.8010248235860213e-06, - "loss": 0.7377, - "num_input_tokens_seen": 68494765, - "step": 3225 - }, - { - "epoch": 0.38790356520170743, - "grad_norm": 0.8561899931852645, - "learning_rate": 2.800310997877119e-06, - "loss": 0.665, - "num_input_tokens_seen": 68555650, - "step": 3226 - }, - { - "epoch": 0.38802380809234654, - "grad_norm": 2.149592460969267, - "learning_rate": 2.799597050761674e-06, - "loss": 0.782, - "num_input_tokens_seen": 68571575, - "step": 3227 - }, - { - "epoch": 0.38814405098298566, - "grad_norm": 1.8775909686476138, - "learning_rate": 2.7988829823479924e-06, - "loss": 0.788, - "num_input_tokens_seen": 68589685, - "step": 3228 - }, - { - "epoch": 0.3882642938736247, - "grad_norm": 2.2605340733905304, - "learning_rate": 2.7981687927443976e-06, - "loss": 0.6408, - "num_input_tokens_seen": 68606205, - "step": 3229 - }, - { - "epoch": 0.3883845367642638, - "grad_norm": 1.9181067704478187, - "learning_rate": 2.797454482059231e-06, - "loss": 0.852, - "num_input_tokens_seen": 68626080, - "step": 3230 - }, - { - "epoch": 0.3885047796549029, - "grad_norm": 1.647765499404315, - "learning_rate": 2.796740050400854e-06, - "loss": 0.8394, - "num_input_tokens_seen": 68645100, - "step": 3231 - }, - { - "epoch": 0.388625022545542, - "grad_norm": 0.8532408791801639, - "learning_rate": 2.7960254978776448e-06, - "loss": 0.6178, - "num_input_tokens_seen": 68706910, - "step": 3232 - }, - { - "epoch": 0.3887452654361811, - "grad_norm": 2.435094191181028, - "learning_rate": 2.7953108245980006e-06, - "loss": 0.8108, - "num_input_tokens_seen": 68725145, - "step": 3233 - }, - { - "epoch": 0.38886550832682015, - "grad_norm": 1.5641339285657376, - "learning_rate": 2.7945960306703365e-06, - "loss": 0.7398, - "num_input_tokens_seen": 68747850, - "step": 3234 - }, - { - "epoch": 0.38898575121745926, - "grad_norm": 1.8871605136948788, - "learning_rate": 2.793881116203087e-06, - "loss": 0.657, - "num_input_tokens_seen": 68767835, - "step": 3235 - }, - { - "epoch": 0.3891059941080984, - "grad_norm": 2.7825940514451557, - "learning_rate": 2.793166081304702e-06, - "loss": 0.8235, - "num_input_tokens_seen": 68788050, - "step": 3236 - }, - { - "epoch": 0.38922623699873743, - "grad_norm": 2.211229418210362, - "learning_rate": 2.7924509260836543e-06, - "loss": 0.8199, - "num_input_tokens_seen": 68806895, - "step": 3237 - }, - { - "epoch": 0.38934647988937654, - "grad_norm": 1.9152738637345454, - "learning_rate": 2.7917356506484302e-06, - "loss": 0.6831, - "num_input_tokens_seen": 68825735, - "step": 3238 - }, - { - "epoch": 0.38946672278001565, - "grad_norm": 2.357229773764005, - "learning_rate": 2.791020255107538e-06, - "loss": 0.7443, - "num_input_tokens_seen": 68842825, - "step": 3239 - }, - { - "epoch": 0.3895869656706547, - "grad_norm": 1.5063397346567262, - "learning_rate": 2.790304739569502e-06, - "loss": 0.7938, - "num_input_tokens_seen": 68862445, - "step": 3240 - }, - { - "epoch": 0.3897072085612938, - "grad_norm": 1.8553786764134097, - "learning_rate": 2.789589104142865e-06, - "loss": 0.8961, - "num_input_tokens_seen": 68879790, - "step": 3241 - }, - { - "epoch": 0.3898274514519329, - "grad_norm": 1.6941341482165353, - "learning_rate": 2.78887334893619e-06, - "loss": 0.7666, - "num_input_tokens_seen": 68897925, - "step": 3242 - }, - { - "epoch": 0.389947694342572, - "grad_norm": 0.8165173741996621, - "learning_rate": 2.788157474058054e-06, - "loss": 0.6475, - "num_input_tokens_seen": 68959920, - "step": 3243 - }, - { - "epoch": 0.3900679372332111, - "grad_norm": 1.9749278910624048, - "learning_rate": 2.7874414796170555e-06, - "loss": 0.699, - "num_input_tokens_seen": 68981130, - "step": 3244 - }, - { - "epoch": 0.3901881801238502, - "grad_norm": 4.2546129160662165, - "learning_rate": 2.7867253657218113e-06, - "loss": 0.833, - "num_input_tokens_seen": 68994740, - "step": 3245 - }, - { - "epoch": 0.39030842301448926, - "grad_norm": 1.8386320100310074, - "learning_rate": 2.7860091324809544e-06, - "loss": 0.7296, - "num_input_tokens_seen": 69015520, - "step": 3246 - }, - { - "epoch": 0.39042866590512837, - "grad_norm": 2.0772288424866585, - "learning_rate": 2.7852927800031377e-06, - "loss": 0.8031, - "num_input_tokens_seen": 69035405, - "step": 3247 - }, - { - "epoch": 0.3905489087957674, - "grad_norm": 1.91006723307839, - "learning_rate": 2.7845763083970293e-06, - "loss": 0.8217, - "num_input_tokens_seen": 69055525, - "step": 3248 - }, - { - "epoch": 0.39066915168640653, - "grad_norm": 2.617120465654301, - "learning_rate": 2.78385971777132e-06, - "loss": 0.8192, - "num_input_tokens_seen": 69076335, - "step": 3249 - }, - { - "epoch": 0.39078939457704565, - "grad_norm": 1.7490050289284655, - "learning_rate": 2.7831430082347143e-06, - "loss": 0.7324, - "num_input_tokens_seen": 69095260, - "step": 3250 - }, - { - "epoch": 0.3909096374676847, - "grad_norm": 2.046149020869084, - "learning_rate": 2.7824261798959373e-06, - "loss": 0.8202, - "num_input_tokens_seen": 69113160, - "step": 3251 - }, - { - "epoch": 0.3910298803583238, - "grad_norm": 1.8219220612114093, - "learning_rate": 2.78170923286373e-06, - "loss": 0.7903, - "num_input_tokens_seen": 69132480, - "step": 3252 - }, - { - "epoch": 0.3911501232489629, - "grad_norm": 2.6743505168232025, - "learning_rate": 2.780992167246855e-06, - "loss": 0.8292, - "num_input_tokens_seen": 69149725, - "step": 3253 - }, - { - "epoch": 0.391270366139602, - "grad_norm": 1.0283085145022817, - "learning_rate": 2.780274983154088e-06, - "loss": 0.7747, - "num_input_tokens_seen": 69208345, - "step": 3254 - }, - { - "epoch": 0.3913906090302411, - "grad_norm": 2.2577370138408432, - "learning_rate": 2.7795576806942268e-06, - "loss": 0.8156, - "num_input_tokens_seen": 69226870, - "step": 3255 - }, - { - "epoch": 0.3915108519208802, - "grad_norm": 0.8175112549171334, - "learning_rate": 2.778840259976085e-06, - "loss": 0.5768, - "num_input_tokens_seen": 69281820, - "step": 3256 - }, - { - "epoch": 0.39163109481151925, - "grad_norm": 1.9688295861626535, - "learning_rate": 2.7781227211084955e-06, - "loss": 0.7773, - "num_input_tokens_seen": 69299770, - "step": 3257 - }, - { - "epoch": 0.39175133770215836, - "grad_norm": 1.9496064702054825, - "learning_rate": 2.7774050642003076e-06, - "loss": 0.8803, - "num_input_tokens_seen": 69320300, - "step": 3258 - }, - { - "epoch": 0.3918715805927975, - "grad_norm": 1.8700093173411128, - "learning_rate": 2.7766872893603896e-06, - "loss": 0.935, - "num_input_tokens_seen": 69339995, - "step": 3259 - }, - { - "epoch": 0.39199182348343653, - "grad_norm": 1.8084285975671681, - "learning_rate": 2.775969396697627e-06, - "loss": 0.7285, - "num_input_tokens_seen": 69358220, - "step": 3260 - }, - { - "epoch": 0.39211206637407564, - "grad_norm": 1.9947594011564918, - "learning_rate": 2.7752513863209242e-06, - "loss": 0.8458, - "num_input_tokens_seen": 69376520, - "step": 3261 - }, - { - "epoch": 0.39223230926471475, - "grad_norm": 1.539650753480743, - "learning_rate": 2.7745332583392024e-06, - "loss": 0.8267, - "num_input_tokens_seen": 69393700, - "step": 3262 - }, - { - "epoch": 0.3923525521553538, - "grad_norm": 2.2492313911851465, - "learning_rate": 2.7738150128614014e-06, - "loss": 0.7913, - "num_input_tokens_seen": 69410825, - "step": 3263 - }, - { - "epoch": 0.3924727950459929, - "grad_norm": 3.7170439930092716, - "learning_rate": 2.773096649996478e-06, - "loss": 0.8912, - "num_input_tokens_seen": 69427495, - "step": 3264 - }, - { - "epoch": 0.39259303793663197, - "grad_norm": 2.998718104645421, - "learning_rate": 2.772378169853408e-06, - "loss": 0.7961, - "num_input_tokens_seen": 69444785, - "step": 3265 - }, - { - "epoch": 0.3927132808272711, - "grad_norm": 1.9705067567947931, - "learning_rate": 2.771659572541183e-06, - "loss": 0.7371, - "num_input_tokens_seen": 69462435, - "step": 3266 - }, - { - "epoch": 0.3928335237179102, - "grad_norm": 1.9960138044789726, - "learning_rate": 2.7709408581688143e-06, - "loss": 0.8694, - "num_input_tokens_seen": 69482140, - "step": 3267 - }, - { - "epoch": 0.39295376660854925, - "grad_norm": 1.579739037093937, - "learning_rate": 2.7702220268453307e-06, - "loss": 0.8754, - "num_input_tokens_seen": 69502220, - "step": 3268 - }, - { - "epoch": 0.39307400949918836, - "grad_norm": 1.9979618949720819, - "learning_rate": 2.7695030786797785e-06, - "loss": 0.8414, - "num_input_tokens_seen": 69517835, - "step": 3269 - }, - { - "epoch": 0.39319425238982747, - "grad_norm": 2.46552842637992, - "learning_rate": 2.76878401378122e-06, - "loss": 0.7347, - "num_input_tokens_seen": 69535640, - "step": 3270 - }, - { - "epoch": 0.3933144952804665, - "grad_norm": 0.8294253344023248, - "learning_rate": 2.768064832258739e-06, - "loss": 0.6347, - "num_input_tokens_seen": 69600235, - "step": 3271 - }, - { - "epoch": 0.39343473817110564, - "grad_norm": 1.9541891967771021, - "learning_rate": 2.7673455342214334e-06, - "loss": 0.8101, - "num_input_tokens_seen": 69616945, - "step": 3272 - }, - { - "epoch": 0.39355498106174475, - "grad_norm": 2.150477408039174, - "learning_rate": 2.7666261197784198e-06, - "loss": 0.7571, - "num_input_tokens_seen": 69635480, - "step": 3273 - }, - { - "epoch": 0.3936752239523838, - "grad_norm": 2.3944691953020665, - "learning_rate": 2.7659065890388336e-06, - "loss": 0.7693, - "num_input_tokens_seen": 69651200, - "step": 3274 - }, - { - "epoch": 0.3937954668430229, - "grad_norm": 4.83584702275678, - "learning_rate": 2.765186942111827e-06, - "loss": 0.8442, - "num_input_tokens_seen": 69667530, - "step": 3275 - }, - { - "epoch": 0.393915709733662, - "grad_norm": 1.8070150301717234, - "learning_rate": 2.764467179106569e-06, - "loss": 0.8216, - "num_input_tokens_seen": 69687955, - "step": 3276 - }, - { - "epoch": 0.3940359526243011, - "grad_norm": 2.325226101712111, - "learning_rate": 2.763747300132249e-06, - "loss": 0.7577, - "num_input_tokens_seen": 69705115, - "step": 3277 - }, - { - "epoch": 0.3941561955149402, - "grad_norm": 1.653860604550936, - "learning_rate": 2.7630273052980704e-06, - "loss": 0.8619, - "num_input_tokens_seen": 69725425, - "step": 3278 - }, - { - "epoch": 0.39427643840557924, - "grad_norm": 2.6488817239056406, - "learning_rate": 2.762307194713256e-06, - "loss": 0.6706, - "num_input_tokens_seen": 69742175, - "step": 3279 - }, - { - "epoch": 0.39439668129621835, - "grad_norm": 1.97649731825759, - "learning_rate": 2.7615869684870458e-06, - "loss": 0.7776, - "num_input_tokens_seen": 69761205, - "step": 3280 - }, - { - "epoch": 0.39451692418685746, - "grad_norm": 2.426535129401768, - "learning_rate": 2.7608666267286986e-06, - "loss": 0.8348, - "num_input_tokens_seen": 69781155, - "step": 3281 - }, - { - "epoch": 0.3946371670774965, - "grad_norm": 2.3596359702074126, - "learning_rate": 2.7601461695474888e-06, - "loss": 0.8561, - "num_input_tokens_seen": 69797640, - "step": 3282 - }, - { - "epoch": 0.39475740996813563, - "grad_norm": 1.5514249441449879, - "learning_rate": 2.75942559705271e-06, - "loss": 0.7615, - "num_input_tokens_seen": 69817095, - "step": 3283 - }, - { - "epoch": 0.39487765285877474, - "grad_norm": 2.6687006281010945, - "learning_rate": 2.7587049093536713e-06, - "loss": 0.8772, - "num_input_tokens_seen": 69833145, - "step": 3284 - }, - { - "epoch": 0.3949978957494138, - "grad_norm": 1.7031599929084906, - "learning_rate": 2.757984106559701e-06, - "loss": 0.8008, - "num_input_tokens_seen": 69851850, - "step": 3285 - }, - { - "epoch": 0.3951181386400529, - "grad_norm": 2.4336544278473333, - "learning_rate": 2.757263188780145e-06, - "loss": 0.7149, - "num_input_tokens_seen": 69873195, - "step": 3286 - }, - { - "epoch": 0.395238381530692, - "grad_norm": 1.7044943689675445, - "learning_rate": 2.7565421561243654e-06, - "loss": 0.7494, - "num_input_tokens_seen": 69891080, - "step": 3287 - }, - { - "epoch": 0.3953586244213311, - "grad_norm": 2.5191738399306574, - "learning_rate": 2.7558210087017413e-06, - "loss": 0.8233, - "num_input_tokens_seen": 69910735, - "step": 3288 - }, - { - "epoch": 0.3954788673119702, - "grad_norm": 2.170256057836073, - "learning_rate": 2.7550997466216724e-06, - "loss": 0.7386, - "num_input_tokens_seen": 69928250, - "step": 3289 - }, - { - "epoch": 0.3955991102026093, - "grad_norm": 2.2323220819837166, - "learning_rate": 2.7543783699935714e-06, - "loss": 0.8041, - "num_input_tokens_seen": 69946000, - "step": 3290 - }, - { - "epoch": 0.39571935309324835, - "grad_norm": 2.40132013766084, - "learning_rate": 2.753656878926872e-06, - "loss": 0.8528, - "num_input_tokens_seen": 69961600, - "step": 3291 - }, - { - "epoch": 0.39583959598388746, - "grad_norm": 1.8374856154153099, - "learning_rate": 2.7529352735310226e-06, - "loss": 0.737, - "num_input_tokens_seen": 69979470, - "step": 3292 - }, - { - "epoch": 0.39595983887452657, - "grad_norm": 2.325397445467587, - "learning_rate": 2.7522135539154914e-06, - "loss": 0.7908, - "num_input_tokens_seen": 69997545, - "step": 3293 - }, - { - "epoch": 0.3960800817651656, - "grad_norm": 1.131151958426341, - "learning_rate": 2.751491720189762e-06, - "loss": 0.6817, - "num_input_tokens_seen": 70055375, - "step": 3294 - }, - { - "epoch": 0.39620032465580474, - "grad_norm": 2.669612614441195, - "learning_rate": 2.7507697724633364e-06, - "loss": 0.9081, - "num_input_tokens_seen": 70071855, - "step": 3295 - }, - { - "epoch": 0.3963205675464438, - "grad_norm": 0.8652166978901629, - "learning_rate": 2.7500477108457327e-06, - "loss": 0.5659, - "num_input_tokens_seen": 70123585, - "step": 3296 - }, - { - "epoch": 0.3964408104370829, - "grad_norm": 1.8499369610659868, - "learning_rate": 2.749325535446488e-06, - "loss": 0.7999, - "num_input_tokens_seen": 70141115, - "step": 3297 - }, - { - "epoch": 0.396561053327722, - "grad_norm": 1.7507039295732016, - "learning_rate": 2.7486032463751555e-06, - "loss": 0.7539, - "num_input_tokens_seen": 70158850, - "step": 3298 - }, - { - "epoch": 0.39668129621836107, - "grad_norm": 2.1185347097908513, - "learning_rate": 2.7478808437413055e-06, - "loss": 0.6919, - "num_input_tokens_seen": 70177980, - "step": 3299 - }, - { - "epoch": 0.3968015391090002, - "grad_norm": 1.780913118824692, - "learning_rate": 2.7471583276545263e-06, - "loss": 0.6542, - "num_input_tokens_seen": 70198360, - "step": 3300 - }, - { - "epoch": 0.3969217819996393, - "grad_norm": 2.0239469972871236, - "learning_rate": 2.746435698224423e-06, - "loss": 0.6997, - "num_input_tokens_seen": 70216080, - "step": 3301 - }, - { - "epoch": 0.39704202489027834, - "grad_norm": 0.8377937703109886, - "learning_rate": 2.745712955560617e-06, - "loss": 0.6431, - "num_input_tokens_seen": 70272005, - "step": 3302 - }, - { - "epoch": 0.39716226778091746, - "grad_norm": 2.431012901034257, - "learning_rate": 2.7449900997727496e-06, - "loss": 0.772, - "num_input_tokens_seen": 70289835, - "step": 3303 - }, - { - "epoch": 0.39728251067155657, - "grad_norm": 1.641958929823671, - "learning_rate": 2.7442671309704754e-06, - "loss": 0.8325, - "num_input_tokens_seen": 70309280, - "step": 3304 - }, - { - "epoch": 0.3974027535621956, - "grad_norm": 1.810880890923636, - "learning_rate": 2.7435440492634697e-06, - "loss": 0.7628, - "num_input_tokens_seen": 70328325, - "step": 3305 - }, - { - "epoch": 0.39752299645283473, - "grad_norm": 2.36911539646419, - "learning_rate": 2.7428208547614223e-06, - "loss": 0.6628, - "num_input_tokens_seen": 70347540, - "step": 3306 - }, - { - "epoch": 0.39764323934347384, - "grad_norm": 2.289460460435136, - "learning_rate": 2.742097547574043e-06, - "loss": 0.769, - "num_input_tokens_seen": 70365485, - "step": 3307 - }, - { - "epoch": 0.3977634822341129, - "grad_norm": 17.143966592699762, - "learning_rate": 2.741374127811055e-06, - "loss": 0.7685, - "num_input_tokens_seen": 70383895, - "step": 3308 - }, - { - "epoch": 0.397883725124752, - "grad_norm": 2.7658846322547666, - "learning_rate": 2.7406505955822016e-06, - "loss": 0.6815, - "num_input_tokens_seen": 70404640, - "step": 3309 - }, - { - "epoch": 0.39800396801539106, - "grad_norm": 2.3642394700964733, - "learning_rate": 2.7399269509972415e-06, - "loss": 0.6575, - "num_input_tokens_seen": 70418515, - "step": 3310 - }, - { - "epoch": 0.3981242109060302, - "grad_norm": 2.628236673925707, - "learning_rate": 2.7392031941659514e-06, - "loss": 0.8344, - "num_input_tokens_seen": 70436080, - "step": 3311 - }, - { - "epoch": 0.3982444537966693, - "grad_norm": 1.7531765776602481, - "learning_rate": 2.7384793251981244e-06, - "loss": 0.8506, - "num_input_tokens_seen": 70454785, - "step": 3312 - }, - { - "epoch": 0.39836469668730834, - "grad_norm": 1.7100082239913483, - "learning_rate": 2.737755344203571e-06, - "loss": 0.804, - "num_input_tokens_seen": 70474455, - "step": 3313 - }, - { - "epoch": 0.39848493957794745, - "grad_norm": 1.7263494720033836, - "learning_rate": 2.7370312512921177e-06, - "loss": 0.7936, - "num_input_tokens_seen": 70495955, - "step": 3314 - }, - { - "epoch": 0.39860518246858656, - "grad_norm": 2.7479809507713586, - "learning_rate": 2.7363070465736106e-06, - "loss": 0.76, - "num_input_tokens_seen": 70511545, - "step": 3315 - }, - { - "epoch": 0.3987254253592256, - "grad_norm": 2.041973070662389, - "learning_rate": 2.73558273015791e-06, - "loss": 0.8165, - "num_input_tokens_seen": 70531095, - "step": 3316 - }, - { - "epoch": 0.3988456682498647, - "grad_norm": 2.285318278607479, - "learning_rate": 2.734858302154894e-06, - "loss": 0.7082, - "num_input_tokens_seen": 70552315, - "step": 3317 - }, - { - "epoch": 0.39896591114050384, - "grad_norm": 2.1358721970732013, - "learning_rate": 2.734133762674457e-06, - "loss": 0.755, - "num_input_tokens_seen": 70571625, - "step": 3318 - }, - { - "epoch": 0.3990861540311429, - "grad_norm": 2.9805138918900362, - "learning_rate": 2.7334091118265124e-06, - "loss": 0.7053, - "num_input_tokens_seen": 70593240, - "step": 3319 - }, - { - "epoch": 0.399206396921782, - "grad_norm": 0.6737915514037378, - "learning_rate": 2.732684349720989e-06, - "loss": 0.5862, - "num_input_tokens_seen": 70660920, - "step": 3320 - }, - { - "epoch": 0.3993266398124211, - "grad_norm": 1.9217949778581944, - "learning_rate": 2.7319594764678318e-06, - "loss": 0.7448, - "num_input_tokens_seen": 70682740, - "step": 3321 - }, - { - "epoch": 0.39944688270306017, - "grad_norm": 1.7321146337550002, - "learning_rate": 2.7312344921770044e-06, - "loss": 0.8339, - "num_input_tokens_seen": 70704160, - "step": 3322 - }, - { - "epoch": 0.3995671255936993, - "grad_norm": 2.2221370787413295, - "learning_rate": 2.730509396958486e-06, - "loss": 0.7821, - "num_input_tokens_seen": 70722705, - "step": 3323 - }, - { - "epoch": 0.3996873684843384, - "grad_norm": 1.7741069563217309, - "learning_rate": 2.729784190922272e-06, - "loss": 0.7893, - "num_input_tokens_seen": 70743860, - "step": 3324 - }, - { - "epoch": 0.39980761137497745, - "grad_norm": 0.7990992189193481, - "learning_rate": 2.729058874178378e-06, - "loss": 0.5923, - "num_input_tokens_seen": 70814260, - "step": 3325 - }, - { - "epoch": 0.39992785426561656, - "grad_norm": 2.0606626556923238, - "learning_rate": 2.728333446836831e-06, - "loss": 0.6919, - "num_input_tokens_seen": 70835260, - "step": 3326 - }, - { - "epoch": 0.4000480971562556, - "grad_norm": 2.0781642183073745, - "learning_rate": 2.72760790900768e-06, - "loss": 0.7261, - "num_input_tokens_seen": 70851565, - "step": 3327 - }, - { - "epoch": 0.4001683400468947, - "grad_norm": 1.7296033608493382, - "learning_rate": 2.726882260800987e-06, - "loss": 0.7854, - "num_input_tokens_seen": 70870660, - "step": 3328 - }, - { - "epoch": 0.40028858293753383, - "grad_norm": 2.2164326382091377, - "learning_rate": 2.726156502326834e-06, - "loss": 0.7813, - "num_input_tokens_seen": 70891680, - "step": 3329 - }, - { - "epoch": 0.4004088258281729, - "grad_norm": 0.7209349574618269, - "learning_rate": 2.7254306336953165e-06, - "loss": 0.6264, - "num_input_tokens_seen": 70954480, - "step": 3330 - }, - { - "epoch": 0.400529068718812, - "grad_norm": 0.9488210746982167, - "learning_rate": 2.7247046550165485e-06, - "loss": 0.6233, - "num_input_tokens_seen": 71006325, - "step": 3331 - }, - { - "epoch": 0.4006493116094511, - "grad_norm": 7.116124344666175, - "learning_rate": 2.7239785664006606e-06, - "loss": 0.748, - "num_input_tokens_seen": 71029585, - "step": 3332 - }, - { - "epoch": 0.40076955450009016, - "grad_norm": 1.346597498015084, - "learning_rate": 2.7232523679578002e-06, - "loss": 0.6645, - "num_input_tokens_seen": 71092385, - "step": 3333 - }, - { - "epoch": 0.4008897973907293, - "grad_norm": 2.6243993211882013, - "learning_rate": 2.7225260597981295e-06, - "loss": 0.8011, - "num_input_tokens_seen": 71109810, - "step": 3334 - }, - { - "epoch": 0.4010100402813684, - "grad_norm": 2.82306580058909, - "learning_rate": 2.721799642031831e-06, - "loss": 0.7808, - "num_input_tokens_seen": 71125700, - "step": 3335 - }, - { - "epoch": 0.40113028317200744, - "grad_norm": 1.903367933987269, - "learning_rate": 2.7210731147691006e-06, - "loss": 0.7745, - "num_input_tokens_seen": 71143095, - "step": 3336 - }, - { - "epoch": 0.40125052606264655, - "grad_norm": 1.7018305039058201, - "learning_rate": 2.720346478120152e-06, - "loss": 0.7489, - "num_input_tokens_seen": 71162130, - "step": 3337 - }, - { - "epoch": 0.40137076895328566, - "grad_norm": 3.3172086444882303, - "learning_rate": 2.719619732195215e-06, - "loss": 0.7724, - "num_input_tokens_seen": 71183490, - "step": 3338 - }, - { - "epoch": 0.4014910118439247, - "grad_norm": 1.3452891425888045, - "learning_rate": 2.7188928771045377e-06, - "loss": 0.7244, - "num_input_tokens_seen": 71204530, - "step": 3339 - }, - { - "epoch": 0.4016112547345638, - "grad_norm": 2.1125488827217875, - "learning_rate": 2.7181659129583815e-06, - "loss": 0.7899, - "num_input_tokens_seen": 71223840, - "step": 3340 - }, - { - "epoch": 0.4017314976252029, - "grad_norm": 2.167538896620741, - "learning_rate": 2.717438839867028e-06, - "loss": 0.771, - "num_input_tokens_seen": 71242740, - "step": 3341 - }, - { - "epoch": 0.401851740515842, - "grad_norm": 1.8810478843528182, - "learning_rate": 2.716711657940772e-06, - "loss": 0.9098, - "num_input_tokens_seen": 71263470, - "step": 3342 - }, - { - "epoch": 0.4019719834064811, - "grad_norm": 0.8605382082240843, - "learning_rate": 2.7159843672899284e-06, - "loss": 0.6014, - "num_input_tokens_seen": 71327390, - "step": 3343 - }, - { - "epoch": 0.40209222629712016, - "grad_norm": 2.427243314264212, - "learning_rate": 2.715256968024825e-06, - "loss": 0.8108, - "num_input_tokens_seen": 71344185, - "step": 3344 - }, - { - "epoch": 0.40221246918775927, - "grad_norm": 1.5906958118657648, - "learning_rate": 2.714529460255809e-06, - "loss": 0.8188, - "num_input_tokens_seen": 71364615, - "step": 3345 - }, - { - "epoch": 0.4023327120783984, - "grad_norm": 2.1269068837307636, - "learning_rate": 2.713801844093241e-06, - "loss": 0.699, - "num_input_tokens_seen": 71385485, - "step": 3346 - }, - { - "epoch": 0.40245295496903744, - "grad_norm": 2.46450221200613, - "learning_rate": 2.7130741196475014e-06, - "loss": 0.8828, - "num_input_tokens_seen": 71403335, - "step": 3347 - }, - { - "epoch": 0.40257319785967655, - "grad_norm": 2.135534097827691, - "learning_rate": 2.7123462870289843e-06, - "loss": 0.8027, - "num_input_tokens_seen": 71423105, - "step": 3348 - }, - { - "epoch": 0.40269344075031566, - "grad_norm": 2.2663431012150768, - "learning_rate": 2.711618346348102e-06, - "loss": 0.802, - "num_input_tokens_seen": 71443350, - "step": 3349 - }, - { - "epoch": 0.4028136836409547, - "grad_norm": 1.794293838293988, - "learning_rate": 2.7108902977152825e-06, - "loss": 0.6223, - "num_input_tokens_seen": 71460970, - "step": 3350 - }, - { - "epoch": 0.4029339265315938, - "grad_norm": 2.041535216498876, - "learning_rate": 2.7101621412409704e-06, - "loss": 0.7408, - "num_input_tokens_seen": 71480175, - "step": 3351 - }, - { - "epoch": 0.40305416942223293, - "grad_norm": 1.944953994684264, - "learning_rate": 2.7094338770356256e-06, - "loss": 0.859, - "num_input_tokens_seen": 71498980, - "step": 3352 - }, - { - "epoch": 0.403174412312872, - "grad_norm": 3.004637121133846, - "learning_rate": 2.708705505209726e-06, - "loss": 0.6378, - "num_input_tokens_seen": 71519475, - "step": 3353 - }, - { - "epoch": 0.4032946552035111, - "grad_norm": 11.794291245593165, - "learning_rate": 2.7079770258737646e-06, - "loss": 0.9034, - "num_input_tokens_seen": 71537105, - "step": 3354 - }, - { - "epoch": 0.4034148980941502, - "grad_norm": 2.0043669583208135, - "learning_rate": 2.707248439138251e-06, - "loss": 0.7451, - "num_input_tokens_seen": 71553060, - "step": 3355 - }, - { - "epoch": 0.40353514098478926, - "grad_norm": 1.9080100617484956, - "learning_rate": 2.7065197451137114e-06, - "loss": 0.6505, - "num_input_tokens_seen": 71574160, - "step": 3356 - }, - { - "epoch": 0.4036553838754284, - "grad_norm": 2.701141185746899, - "learning_rate": 2.7057909439106894e-06, - "loss": 0.6768, - "num_input_tokens_seen": 71591735, - "step": 3357 - }, - { - "epoch": 0.40377562676606743, - "grad_norm": 2.579293453042113, - "learning_rate": 2.7050620356397413e-06, - "loss": 0.774, - "num_input_tokens_seen": 71610405, - "step": 3358 - }, - { - "epoch": 0.40389586965670654, - "grad_norm": 1.7218811201584583, - "learning_rate": 2.7043330204114437e-06, - "loss": 0.7224, - "num_input_tokens_seen": 71628835, - "step": 3359 - }, - { - "epoch": 0.40401611254734565, - "grad_norm": 1.9132685637761047, - "learning_rate": 2.7036038983363862e-06, - "loss": 0.8516, - "num_input_tokens_seen": 71645160, - "step": 3360 - }, - { - "epoch": 0.4041363554379847, - "grad_norm": 1.5614400481429616, - "learning_rate": 2.702874669525177e-06, - "loss": 0.8303, - "num_input_tokens_seen": 71663360, - "step": 3361 - }, - { - "epoch": 0.4042565983286238, - "grad_norm": 1.9208657977896524, - "learning_rate": 2.7021453340884394e-06, - "loss": 0.6895, - "num_input_tokens_seen": 71680805, - "step": 3362 - }, - { - "epoch": 0.40437684121926293, - "grad_norm": 2.3903682709742213, - "learning_rate": 2.7014158921368125e-06, - "loss": 0.728, - "num_input_tokens_seen": 71698850, - "step": 3363 - }, - { - "epoch": 0.404497084109902, - "grad_norm": 1.9227937481201445, - "learning_rate": 2.7006863437809525e-06, - "loss": 0.849, - "num_input_tokens_seen": 71718440, - "step": 3364 - }, - { - "epoch": 0.4046173270005411, - "grad_norm": 1.7791675423717466, - "learning_rate": 2.699956689131532e-06, - "loss": 0.8822, - "num_input_tokens_seen": 71738145, - "step": 3365 - }, - { - "epoch": 0.4047375698911802, - "grad_norm": 2.1601317625976537, - "learning_rate": 2.699226928299238e-06, - "loss": 0.8417, - "num_input_tokens_seen": 71755885, - "step": 3366 - }, - { - "epoch": 0.40485781278181926, - "grad_norm": 2.227614434519331, - "learning_rate": 2.698497061394775e-06, - "loss": 0.7938, - "num_input_tokens_seen": 71774090, - "step": 3367 - }, - { - "epoch": 0.40497805567245837, - "grad_norm": 2.4416102457337, - "learning_rate": 2.6977670885288627e-06, - "loss": 0.797, - "num_input_tokens_seen": 71795210, - "step": 3368 - }, - { - "epoch": 0.4050982985630975, - "grad_norm": 2.1329745140035494, - "learning_rate": 2.6970370098122378e-06, - "loss": 0.7473, - "num_input_tokens_seen": 71811915, - "step": 3369 - }, - { - "epoch": 0.40521854145373654, - "grad_norm": 1.5300164233140587, - "learning_rate": 2.696306825355653e-06, - "loss": 0.8537, - "num_input_tokens_seen": 71833020, - "step": 3370 - }, - { - "epoch": 0.40533878434437565, - "grad_norm": 2.547414563651443, - "learning_rate": 2.6955765352698763e-06, - "loss": 0.8463, - "num_input_tokens_seen": 71852885, - "step": 3371 - }, - { - "epoch": 0.40545902723501476, - "grad_norm": 2.7094369070598305, - "learning_rate": 2.6948461396656923e-06, - "loss": 0.7298, - "num_input_tokens_seen": 71870015, - "step": 3372 - }, - { - "epoch": 0.4055792701256538, - "grad_norm": 4.592481967398067, - "learning_rate": 2.6941156386539013e-06, - "loss": 0.7476, - "num_input_tokens_seen": 71889685, - "step": 3373 - }, - { - "epoch": 0.4056995130162929, - "grad_norm": 3.3914769032649903, - "learning_rate": 2.6933850323453203e-06, - "loss": 0.8099, - "num_input_tokens_seen": 71907850, - "step": 3374 - }, - { - "epoch": 0.405819755906932, - "grad_norm": 1.7676202574110453, - "learning_rate": 2.6926543208507806e-06, - "loss": 0.7428, - "num_input_tokens_seen": 71926250, - "step": 3375 - }, - { - "epoch": 0.4059399987975711, - "grad_norm": 5.777958806098189, - "learning_rate": 2.6919235042811316e-06, - "loss": 0.7945, - "num_input_tokens_seen": 71944755, - "step": 3376 - }, - { - "epoch": 0.4060602416882102, - "grad_norm": 5.030816000614874, - "learning_rate": 2.691192582747237e-06, - "loss": 0.7481, - "num_input_tokens_seen": 71964105, - "step": 3377 - }, - { - "epoch": 0.40618048457884925, - "grad_norm": 1.8463383960981143, - "learning_rate": 2.6904615563599765e-06, - "loss": 0.7271, - "num_input_tokens_seen": 71983625, - "step": 3378 - }, - { - "epoch": 0.40630072746948837, - "grad_norm": 6.913559298439623, - "learning_rate": 2.6897304252302477e-06, - "loss": 0.8278, - "num_input_tokens_seen": 72000665, - "step": 3379 - }, - { - "epoch": 0.4064209703601275, - "grad_norm": 0.8113896624976898, - "learning_rate": 2.6889991894689614e-06, - "loss": 0.5665, - "num_input_tokens_seen": 72056815, - "step": 3380 - }, - { - "epoch": 0.40654121325076653, - "grad_norm": 2.5257474518195138, - "learning_rate": 2.6882678491870464e-06, - "loss": 0.7468, - "num_input_tokens_seen": 72076970, - "step": 3381 - }, - { - "epoch": 0.40666145614140564, - "grad_norm": 1.591360558399751, - "learning_rate": 2.6875364044954453e-06, - "loss": 0.7064, - "num_input_tokens_seen": 72096920, - "step": 3382 - }, - { - "epoch": 0.40678169903204475, - "grad_norm": 1.6875527925298854, - "learning_rate": 2.6868048555051185e-06, - "loss": 0.8075, - "num_input_tokens_seen": 72118170, - "step": 3383 - }, - { - "epoch": 0.4069019419226838, - "grad_norm": 3.0730493759885644, - "learning_rate": 2.686073202327041e-06, - "loss": 0.8541, - "num_input_tokens_seen": 72136890, - "step": 3384 - }, - { - "epoch": 0.4070221848133229, - "grad_norm": 1.5875785528798778, - "learning_rate": 2.6853414450722043e-06, - "loss": 0.7334, - "num_input_tokens_seen": 72156275, - "step": 3385 - }, - { - "epoch": 0.40714242770396203, - "grad_norm": 1.8434888944843437, - "learning_rate": 2.684609583851615e-06, - "loss": 0.8428, - "num_input_tokens_seen": 72174170, - "step": 3386 - }, - { - "epoch": 0.4072626705946011, - "grad_norm": 1.5530172579210408, - "learning_rate": 2.683877618776297e-06, - "loss": 0.7983, - "num_input_tokens_seen": 72196145, - "step": 3387 - }, - { - "epoch": 0.4073829134852402, - "grad_norm": 2.4212531539117, - "learning_rate": 2.6831455499572876e-06, - "loss": 0.7365, - "num_input_tokens_seen": 72213800, - "step": 3388 - }, - { - "epoch": 0.40750315637587925, - "grad_norm": 2.8054542417657453, - "learning_rate": 2.6824133775056415e-06, - "loss": 0.7776, - "num_input_tokens_seen": 72232325, - "step": 3389 - }, - { - "epoch": 0.40762339926651836, - "grad_norm": 1.8316823881615452, - "learning_rate": 2.6816811015324284e-06, - "loss": 0.7604, - "num_input_tokens_seen": 72250095, - "step": 3390 - }, - { - "epoch": 0.40774364215715747, - "grad_norm": 0.8035028203670925, - "learning_rate": 2.6809487221487343e-06, - "loss": 0.6183, - "num_input_tokens_seen": 72309300, - "step": 3391 - }, - { - "epoch": 0.4078638850477965, - "grad_norm": 3.1388870643945537, - "learning_rate": 2.68021623946566e-06, - "loss": 0.8218, - "num_input_tokens_seen": 72325730, - "step": 3392 - }, - { - "epoch": 0.40798412793843564, - "grad_norm": 1.7038207540118961, - "learning_rate": 2.679483653594324e-06, - "loss": 0.7244, - "num_input_tokens_seen": 72347220, - "step": 3393 - }, - { - "epoch": 0.40810437082907475, - "grad_norm": 2.3126524594406415, - "learning_rate": 2.678750964645857e-06, - "loss": 0.759, - "num_input_tokens_seen": 72366020, - "step": 3394 - }, - { - "epoch": 0.4082246137197138, - "grad_norm": 3.0687312899024475, - "learning_rate": 2.6780181727314094e-06, - "loss": 0.8448, - "num_input_tokens_seen": 72380645, - "step": 3395 - }, - { - "epoch": 0.4083448566103529, - "grad_norm": 1.7945888033474224, - "learning_rate": 2.6772852779621435e-06, - "loss": 0.7745, - "num_input_tokens_seen": 72398225, - "step": 3396 - }, - { - "epoch": 0.408465099500992, - "grad_norm": 2.557111457315078, - "learning_rate": 2.676552280449239e-06, - "loss": 0.8435, - "num_input_tokens_seen": 72417830, - "step": 3397 - }, - { - "epoch": 0.4085853423916311, - "grad_norm": 2.3227998784196187, - "learning_rate": 2.6758191803038917e-06, - "loss": 0.7573, - "num_input_tokens_seen": 72436045, - "step": 3398 - }, - { - "epoch": 0.4087055852822702, - "grad_norm": 1.7948690312614335, - "learning_rate": 2.6750859776373125e-06, - "loss": 0.8255, - "num_input_tokens_seen": 72455220, - "step": 3399 - }, - { - "epoch": 0.4088258281729093, - "grad_norm": 1.1402348242436562, - "learning_rate": 2.674352672560727e-06, - "loss": 0.6191, - "num_input_tokens_seen": 72516385, - "step": 3400 - }, - { - "epoch": 0.40894607106354836, - "grad_norm": 1.5677046797627825, - "learning_rate": 2.673619265185377e-06, - "loss": 0.765, - "num_input_tokens_seen": 72535945, - "step": 3401 - }, - { - "epoch": 0.40906631395418747, - "grad_norm": 1.7747198813534124, - "learning_rate": 2.672885755622521e-06, - "loss": 0.7655, - "num_input_tokens_seen": 72558080, - "step": 3402 - }, - { - "epoch": 0.4091865568448266, - "grad_norm": 2.29491756702475, - "learning_rate": 2.67215214398343e-06, - "loss": 0.6968, - "num_input_tokens_seen": 72577815, - "step": 3403 - }, - { - "epoch": 0.40930679973546563, - "grad_norm": 2.0762255367772293, - "learning_rate": 2.671418430379393e-06, - "loss": 0.7703, - "num_input_tokens_seen": 72596220, - "step": 3404 - }, - { - "epoch": 0.40942704262610474, - "grad_norm": 2.5708911522977465, - "learning_rate": 2.670684614921715e-06, - "loss": 0.822, - "num_input_tokens_seen": 72614915, - "step": 3405 - }, - { - "epoch": 0.4095472855167438, - "grad_norm": 2.38551915398837, - "learning_rate": 2.6699506977217128e-06, - "loss": 0.6906, - "num_input_tokens_seen": 72634810, - "step": 3406 - }, - { - "epoch": 0.4096675284073829, - "grad_norm": 2.428873090592351, - "learning_rate": 2.6692166788907233e-06, - "loss": 0.6914, - "num_input_tokens_seen": 72654725, - "step": 3407 - }, - { - "epoch": 0.409787771298022, - "grad_norm": 1.8644545769128333, - "learning_rate": 2.668482558540095e-06, - "loss": 0.7601, - "num_input_tokens_seen": 72673390, - "step": 3408 - }, - { - "epoch": 0.4099080141886611, - "grad_norm": 0.9027901520175735, - "learning_rate": 2.6677483367811947e-06, - "loss": 0.7176, - "num_input_tokens_seen": 72733150, - "step": 3409 - }, - { - "epoch": 0.4100282570793002, - "grad_norm": 1.717554401635684, - "learning_rate": 2.6670140137254028e-06, - "loss": 0.7495, - "num_input_tokens_seen": 72752345, - "step": 3410 - }, - { - "epoch": 0.4101484999699393, - "grad_norm": 2.3632587715431055, - "learning_rate": 2.666279589484115e-06, - "loss": 0.8718, - "num_input_tokens_seen": 72769965, - "step": 3411 - }, - { - "epoch": 0.41026874286057835, - "grad_norm": 1.9324442147914254, - "learning_rate": 2.6655450641687435e-06, - "loss": 0.8043, - "num_input_tokens_seen": 72787250, - "step": 3412 - }, - { - "epoch": 0.41038898575121746, - "grad_norm": 1.766564394940495, - "learning_rate": 2.664810437890715e-06, - "loss": 0.6841, - "num_input_tokens_seen": 72808640, - "step": 3413 - }, - { - "epoch": 0.41050922864185657, - "grad_norm": 1.9560068747597648, - "learning_rate": 2.664075710761471e-06, - "loss": 0.7967, - "num_input_tokens_seen": 72826455, - "step": 3414 - }, - { - "epoch": 0.4106294715324956, - "grad_norm": 2.5234970454556436, - "learning_rate": 2.6633408828924697e-06, - "loss": 0.7017, - "num_input_tokens_seen": 72845040, - "step": 3415 - }, - { - "epoch": 0.41074971442313474, - "grad_norm": 1.6584124478998463, - "learning_rate": 2.6626059543951844e-06, - "loss": 0.6936, - "num_input_tokens_seen": 72864720, - "step": 3416 - }, - { - "epoch": 0.41086995731377385, - "grad_norm": 1.6685018982143598, - "learning_rate": 2.6618709253811027e-06, - "loss": 0.8305, - "num_input_tokens_seen": 72883895, - "step": 3417 - }, - { - "epoch": 0.4109902002044129, - "grad_norm": 1.5649469194109542, - "learning_rate": 2.6611357959617277e-06, - "loss": 0.8692, - "num_input_tokens_seen": 72903235, - "step": 3418 - }, - { - "epoch": 0.411110443095052, - "grad_norm": 2.0446665323212616, - "learning_rate": 2.660400566248578e-06, - "loss": 0.9001, - "num_input_tokens_seen": 72921080, - "step": 3419 - }, - { - "epoch": 0.41123068598569107, - "grad_norm": 2.700654350729606, - "learning_rate": 2.6596652363531876e-06, - "loss": 0.6686, - "num_input_tokens_seen": 72936675, - "step": 3420 - }, - { - "epoch": 0.4113509288763302, - "grad_norm": 1.7712489812446686, - "learning_rate": 2.6589298063871055e-06, - "loss": 0.7783, - "num_input_tokens_seen": 72956570, - "step": 3421 - }, - { - "epoch": 0.4114711717669693, - "grad_norm": 2.1367957345825053, - "learning_rate": 2.658194276461895e-06, - "loss": 0.6995, - "num_input_tokens_seen": 72974215, - "step": 3422 - }, - { - "epoch": 0.41159141465760835, - "grad_norm": 2.1442624824450736, - "learning_rate": 2.6574586466891368e-06, - "loss": 0.6713, - "num_input_tokens_seen": 72994410, - "step": 3423 - }, - { - "epoch": 0.41171165754824746, - "grad_norm": 2.008027261075277, - "learning_rate": 2.6567229171804247e-06, - "loss": 0.6416, - "num_input_tokens_seen": 73012015, - "step": 3424 - }, - { - "epoch": 0.41183190043888657, - "grad_norm": 2.374680694092268, - "learning_rate": 2.655987088047368e-06, - "loss": 0.8729, - "num_input_tokens_seen": 73030080, - "step": 3425 - }, - { - "epoch": 0.4119521433295256, - "grad_norm": 3.753176033590248, - "learning_rate": 2.6552511594015912e-06, - "loss": 0.7778, - "num_input_tokens_seen": 73050190, - "step": 3426 - }, - { - "epoch": 0.41207238622016473, - "grad_norm": 2.716260329618953, - "learning_rate": 2.654515131354735e-06, - "loss": 0.8486, - "num_input_tokens_seen": 73068175, - "step": 3427 - }, - { - "epoch": 0.41219262911080384, - "grad_norm": 1.9204232488094897, - "learning_rate": 2.653779004018453e-06, - "loss": 0.8492, - "num_input_tokens_seen": 73088460, - "step": 3428 - }, - { - "epoch": 0.4123128720014429, - "grad_norm": 3.0030937122011734, - "learning_rate": 2.653042777504417e-06, - "loss": 0.8115, - "num_input_tokens_seen": 73110770, - "step": 3429 - }, - { - "epoch": 0.412433114892082, - "grad_norm": 1.8567094102953141, - "learning_rate": 2.65230645192431e-06, - "loss": 0.793, - "num_input_tokens_seen": 73130060, - "step": 3430 - }, - { - "epoch": 0.4125533577827211, - "grad_norm": 2.7712165325077045, - "learning_rate": 2.6515700273898333e-06, - "loss": 0.7851, - "num_input_tokens_seen": 73147655, - "step": 3431 - }, - { - "epoch": 0.4126736006733602, - "grad_norm": 2.1073799195345386, - "learning_rate": 2.6508335040127018e-06, - "loss": 0.68, - "num_input_tokens_seen": 73167070, - "step": 3432 - }, - { - "epoch": 0.4127938435639993, - "grad_norm": 2.511661563899412, - "learning_rate": 2.650096881904645e-06, - "loss": 0.7689, - "num_input_tokens_seen": 73187090, - "step": 3433 - }, - { - "epoch": 0.4129140864546384, - "grad_norm": 2.7349122445133958, - "learning_rate": 2.649360161177408e-06, - "loss": 0.5957, - "num_input_tokens_seen": 73201870, - "step": 3434 - }, - { - "epoch": 0.41303432934527745, - "grad_norm": 2.449098280609841, - "learning_rate": 2.6486233419427504e-06, - "loss": 0.7332, - "num_input_tokens_seen": 73221405, - "step": 3435 - }, - { - "epoch": 0.41315457223591656, - "grad_norm": 2.5354329871752146, - "learning_rate": 2.647886424312448e-06, - "loss": 0.7536, - "num_input_tokens_seen": 73240790, - "step": 3436 - }, - { - "epoch": 0.4132748151265556, - "grad_norm": 1.7620370453665812, - "learning_rate": 2.6471494083982903e-06, - "loss": 0.8434, - "num_input_tokens_seen": 73259895, - "step": 3437 - }, - { - "epoch": 0.4133950580171947, - "grad_norm": 1.7658132694452524, - "learning_rate": 2.6464122943120813e-06, - "loss": 0.7431, - "num_input_tokens_seen": 73279840, - "step": 3438 - }, - { - "epoch": 0.41351530090783384, - "grad_norm": 3.6806907473492183, - "learning_rate": 2.645675082165642e-06, - "loss": 0.8197, - "num_input_tokens_seen": 73295770, - "step": 3439 - }, - { - "epoch": 0.4136355437984729, - "grad_norm": 3.014424186075861, - "learning_rate": 2.644937772070806e-06, - "loss": 0.7445, - "num_input_tokens_seen": 73313935, - "step": 3440 - }, - { - "epoch": 0.413755786689112, - "grad_norm": 2.3055111545007203, - "learning_rate": 2.6442003641394225e-06, - "loss": 0.8297, - "num_input_tokens_seen": 73331250, - "step": 3441 - }, - { - "epoch": 0.4138760295797511, - "grad_norm": 1.6922175703552575, - "learning_rate": 2.643462858483356e-06, - "loss": 0.8313, - "num_input_tokens_seen": 73351255, - "step": 3442 - }, - { - "epoch": 0.41399627247039017, - "grad_norm": 2.240576647271395, - "learning_rate": 2.6427252552144856e-06, - "loss": 0.7277, - "num_input_tokens_seen": 73369625, - "step": 3443 - }, - { - "epoch": 0.4141165153610293, - "grad_norm": 1.932192897198705, - "learning_rate": 2.6419875544447044e-06, - "loss": 0.7492, - "num_input_tokens_seen": 73390745, - "step": 3444 - }, - { - "epoch": 0.4142367582516684, - "grad_norm": 1.641507784456658, - "learning_rate": 2.6412497562859218e-06, - "loss": 0.717, - "num_input_tokens_seen": 73411745, - "step": 3445 - }, - { - "epoch": 0.41435700114230745, - "grad_norm": 2.2158735478095166, - "learning_rate": 2.6405118608500617e-06, - "loss": 0.7503, - "num_input_tokens_seen": 73430290, - "step": 3446 - }, - { - "epoch": 0.41447724403294656, - "grad_norm": 1.8071482376672585, - "learning_rate": 2.6397738682490613e-06, - "loss": 0.797, - "num_input_tokens_seen": 73450910, - "step": 3447 - }, - { - "epoch": 0.41459748692358567, - "grad_norm": 1.6503833926226124, - "learning_rate": 2.6390357785948734e-06, - "loss": 0.7475, - "num_input_tokens_seen": 73467745, - "step": 3448 - }, - { - "epoch": 0.4147177298142247, - "grad_norm": 1.8316376925084643, - "learning_rate": 2.6382975919994667e-06, - "loss": 0.7956, - "num_input_tokens_seen": 73488040, - "step": 3449 - }, - { - "epoch": 0.41483797270486383, - "grad_norm": 1.783113943258856, - "learning_rate": 2.637559308574822e-06, - "loss": 0.7193, - "num_input_tokens_seen": 73507505, - "step": 3450 - }, - { - "epoch": 0.4149582155955029, - "grad_norm": 1.9927709868414059, - "learning_rate": 2.6368209284329376e-06, - "loss": 0.7173, - "num_input_tokens_seen": 73527855, - "step": 3451 - }, - { - "epoch": 0.415078458486142, - "grad_norm": 1.9887636836649296, - "learning_rate": 2.6360824516858244e-06, - "loss": 0.753, - "num_input_tokens_seen": 73545775, - "step": 3452 - }, - { - "epoch": 0.4151987013767811, - "grad_norm": 1.6567403783392922, - "learning_rate": 2.635343878445509e-06, - "loss": 0.8546, - "num_input_tokens_seen": 73568780, - "step": 3453 - }, - { - "epoch": 0.41531894426742016, - "grad_norm": 2.6656749830158812, - "learning_rate": 2.6346052088240326e-06, - "loss": 0.702, - "num_input_tokens_seen": 73588020, - "step": 3454 - }, - { - "epoch": 0.4154391871580593, - "grad_norm": 3.8473370892630014, - "learning_rate": 2.633866442933451e-06, - "loss": 0.7696, - "num_input_tokens_seen": 73604085, - "step": 3455 - }, - { - "epoch": 0.4155594300486984, - "grad_norm": 2.2345336269226723, - "learning_rate": 2.633127580885833e-06, - "loss": 0.8311, - "num_input_tokens_seen": 73618305, - "step": 3456 - }, - { - "epoch": 0.41567967293933744, - "grad_norm": 2.037844493782449, - "learning_rate": 2.632388622793265e-06, - "loss": 0.6485, - "num_input_tokens_seen": 73637180, - "step": 3457 - }, - { - "epoch": 0.41579991582997655, - "grad_norm": 1.850081695004342, - "learning_rate": 2.6316495687678457e-06, - "loss": 0.6773, - "num_input_tokens_seen": 73655550, - "step": 3458 - }, - { - "epoch": 0.41592015872061566, - "grad_norm": 2.4370931376098244, - "learning_rate": 2.6309104189216887e-06, - "loss": 0.7565, - "num_input_tokens_seen": 73672835, - "step": 3459 - }, - { - "epoch": 0.4160404016112547, - "grad_norm": 2.549368032416684, - "learning_rate": 2.6301711733669226e-06, - "loss": 0.7405, - "num_input_tokens_seen": 73688355, - "step": 3460 - }, - { - "epoch": 0.41616064450189383, - "grad_norm": 2.72231647067919, - "learning_rate": 2.629431832215691e-06, - "loss": 0.7455, - "num_input_tokens_seen": 73702880, - "step": 3461 - }, - { - "epoch": 0.41628088739253294, - "grad_norm": 4.329501779951195, - "learning_rate": 2.628692395580151e-06, - "loss": 0.8692, - "num_input_tokens_seen": 73722690, - "step": 3462 - }, - { - "epoch": 0.416401130283172, - "grad_norm": 1.782933477662036, - "learning_rate": 2.6279528635724747e-06, - "loss": 0.7911, - "num_input_tokens_seen": 73742565, - "step": 3463 - }, - { - "epoch": 0.4165213731738111, - "grad_norm": 2.789999562999678, - "learning_rate": 2.6272132363048478e-06, - "loss": 0.7839, - "num_input_tokens_seen": 73759085, - "step": 3464 - }, - { - "epoch": 0.4166416160644502, - "grad_norm": 2.2255415727290995, - "learning_rate": 2.626473513889472e-06, - "loss": 0.6949, - "num_input_tokens_seen": 73781185, - "step": 3465 - }, - { - "epoch": 0.41676185895508927, - "grad_norm": 1.9701508551618336, - "learning_rate": 2.625733696438562e-06, - "loss": 0.8217, - "num_input_tokens_seen": 73798410, - "step": 3466 - }, - { - "epoch": 0.4168821018457284, - "grad_norm": 1.7359499319730178, - "learning_rate": 2.6249937840643476e-06, - "loss": 0.755, - "num_input_tokens_seen": 73816435, - "step": 3467 - }, - { - "epoch": 0.41700234473636744, - "grad_norm": 2.463008962257231, - "learning_rate": 2.6242537768790733e-06, - "loss": 0.6718, - "num_input_tokens_seen": 73835310, - "step": 3468 - }, - { - "epoch": 0.41712258762700655, - "grad_norm": 2.094473969489827, - "learning_rate": 2.6235136749949975e-06, - "loss": 0.6861, - "num_input_tokens_seen": 73858480, - "step": 3469 - }, - { - "epoch": 0.41724283051764566, - "grad_norm": 2.366165033959666, - "learning_rate": 2.6227734785243924e-06, - "loss": 0.6131, - "num_input_tokens_seen": 73878160, - "step": 3470 - }, - { - "epoch": 0.4173630734082847, - "grad_norm": 1.96040709455256, - "learning_rate": 2.6220331875795466e-06, - "loss": 0.7845, - "num_input_tokens_seen": 73897230, - "step": 3471 - }, - { - "epoch": 0.4174833162989238, - "grad_norm": 1.6332146106739969, - "learning_rate": 2.62129280227276e-06, - "loss": 0.7507, - "num_input_tokens_seen": 73916950, - "step": 3472 - }, - { - "epoch": 0.41760355918956293, - "grad_norm": 7.142381751001548, - "learning_rate": 2.62055232271635e-06, - "loss": 0.6875, - "num_input_tokens_seen": 73943855, - "step": 3473 - }, - { - "epoch": 0.417723802080202, - "grad_norm": 2.1875974686545967, - "learning_rate": 2.619811749022645e-06, - "loss": 0.8795, - "num_input_tokens_seen": 73958885, - "step": 3474 - }, - { - "epoch": 0.4178440449708411, - "grad_norm": 2.6680836482395422, - "learning_rate": 2.6190710813039917e-06, - "loss": 0.7098, - "num_input_tokens_seen": 73971730, - "step": 3475 - }, - { - "epoch": 0.4179642878614802, - "grad_norm": 2.7845246937491948, - "learning_rate": 2.618330319672747e-06, - "loss": 0.8372, - "num_input_tokens_seen": 73990870, - "step": 3476 - }, - { - "epoch": 0.41808453075211927, - "grad_norm": 1.9065108652369176, - "learning_rate": 2.6175894642412846e-06, - "loss": 0.9097, - "num_input_tokens_seen": 74004990, - "step": 3477 - }, - { - "epoch": 0.4182047736427584, - "grad_norm": 2.0716710654423336, - "learning_rate": 2.6168485151219914e-06, - "loss": 0.7319, - "num_input_tokens_seen": 74024330, - "step": 3478 - }, - { - "epoch": 0.4183250165333975, - "grad_norm": 3.2746381334993773, - "learning_rate": 2.616107472427269e-06, - "loss": 0.702, - "num_input_tokens_seen": 74038745, - "step": 3479 - }, - { - "epoch": 0.41844525942403654, - "grad_norm": 2.4956682460770603, - "learning_rate": 2.6153663362695325e-06, - "loss": 0.7632, - "num_input_tokens_seen": 74052130, - "step": 3480 - }, - { - "epoch": 0.41856550231467565, - "grad_norm": 2.686935105171513, - "learning_rate": 2.6146251067612126e-06, - "loss": 0.7919, - "num_input_tokens_seen": 74067325, - "step": 3481 - }, - { - "epoch": 0.41868574520531476, - "grad_norm": 1.8929240802347667, - "learning_rate": 2.613883784014752e-06, - "loss": 0.8097, - "num_input_tokens_seen": 74086080, - "step": 3482 - }, - { - "epoch": 0.4188059880959538, - "grad_norm": 3.320399801687348, - "learning_rate": 2.6131423681426103e-06, - "loss": 0.7729, - "num_input_tokens_seen": 74101715, - "step": 3483 - }, - { - "epoch": 0.41892623098659293, - "grad_norm": 1.738595311398414, - "learning_rate": 2.6124008592572587e-06, - "loss": 0.7293, - "num_input_tokens_seen": 74125420, - "step": 3484 - }, - { - "epoch": 0.419046473877232, - "grad_norm": 2.2315833168098234, - "learning_rate": 2.6116592574711835e-06, - "loss": 0.8081, - "num_input_tokens_seen": 74143440, - "step": 3485 - }, - { - "epoch": 0.4191667167678711, - "grad_norm": 2.458298281802693, - "learning_rate": 2.610917562896885e-06, - "loss": 0.8297, - "num_input_tokens_seen": 74162925, - "step": 3486 - }, - { - "epoch": 0.4192869596585102, - "grad_norm": 10.526570370811093, - "learning_rate": 2.610175775646878e-06, - "loss": 0.8334, - "num_input_tokens_seen": 74181225, - "step": 3487 - }, - { - "epoch": 0.41940720254914926, - "grad_norm": 2.23643778482333, - "learning_rate": 2.6094338958336907e-06, - "loss": 0.7422, - "num_input_tokens_seen": 74199615, - "step": 3488 - }, - { - "epoch": 0.41952744543978837, - "grad_norm": 2.1117603086178605, - "learning_rate": 2.608691923569867e-06, - "loss": 0.8173, - "num_input_tokens_seen": 74216210, - "step": 3489 - }, - { - "epoch": 0.4196476883304275, - "grad_norm": 1.6016034418373954, - "learning_rate": 2.6079498589679616e-06, - "loss": 0.7524, - "num_input_tokens_seen": 74237020, - "step": 3490 - }, - { - "epoch": 0.41976793122106654, - "grad_norm": 1.9207048359616539, - "learning_rate": 2.6072077021405465e-06, - "loss": 0.761, - "num_input_tokens_seen": 74255575, - "step": 3491 - }, - { - "epoch": 0.41988817411170565, - "grad_norm": 1.7194715663200137, - "learning_rate": 2.6064654532002054e-06, - "loss": 0.6904, - "num_input_tokens_seen": 74274305, - "step": 3492 - }, - { - "epoch": 0.42000841700234476, - "grad_norm": 1.6324013058500164, - "learning_rate": 2.6057231122595375e-06, - "loss": 0.7512, - "num_input_tokens_seen": 74295335, - "step": 3493 - }, - { - "epoch": 0.4201286598929838, - "grad_norm": 1.9937085385972362, - "learning_rate": 2.604980679431154e-06, - "loss": 0.7276, - "num_input_tokens_seen": 74313295, - "step": 3494 - }, - { - "epoch": 0.4202489027836229, - "grad_norm": 2.1162838096830985, - "learning_rate": 2.604238154827684e-06, - "loss": 0.7467, - "num_input_tokens_seen": 74329640, - "step": 3495 - }, - { - "epoch": 0.42036914567426203, - "grad_norm": 3.4738411531268016, - "learning_rate": 2.6034955385617652e-06, - "loss": 0.7306, - "num_input_tokens_seen": 74347690, - "step": 3496 - }, - { - "epoch": 0.4204893885649011, - "grad_norm": 1.4938675897660767, - "learning_rate": 2.6027528307460536e-06, - "loss": 0.6636, - "num_input_tokens_seen": 74411415, - "step": 3497 - }, - { - "epoch": 0.4206096314555402, - "grad_norm": 2.1061221610536487, - "learning_rate": 2.602010031493217e-06, - "loss": 0.8542, - "num_input_tokens_seen": 74429365, - "step": 3498 - }, - { - "epoch": 0.42072987434617926, - "grad_norm": 2.079517926701649, - "learning_rate": 2.6012671409159376e-06, - "loss": 0.8688, - "num_input_tokens_seen": 74450420, - "step": 3499 - }, - { - "epoch": 0.42085011723681837, - "grad_norm": 2.0728196568071477, - "learning_rate": 2.6005241591269097e-06, - "loss": 0.818, - "num_input_tokens_seen": 74469510, - "step": 3500 - }, - { - "epoch": 0.4209703601274575, - "grad_norm": 1.640490933828694, - "learning_rate": 2.5997810862388454e-06, - "loss": 0.7981, - "num_input_tokens_seen": 74489070, - "step": 3501 - }, - { - "epoch": 0.42109060301809653, - "grad_norm": 3.9024981982776588, - "learning_rate": 2.5990379223644666e-06, - "loss": 0.7599, - "num_input_tokens_seen": 74507690, - "step": 3502 - }, - { - "epoch": 0.42121084590873564, - "grad_norm": 2.4385864910852524, - "learning_rate": 2.5982946676165112e-06, - "loss": 0.7502, - "num_input_tokens_seen": 74527180, - "step": 3503 - }, - { - "epoch": 0.42133108879937475, - "grad_norm": 0.7853580114040855, - "learning_rate": 2.597551322107731e-06, - "loss": 0.6048, - "num_input_tokens_seen": 74590870, - "step": 3504 - }, - { - "epoch": 0.4214513316900138, - "grad_norm": 2.2241793963271643, - "learning_rate": 2.5968078859508897e-06, - "loss": 0.8761, - "num_input_tokens_seen": 74607790, - "step": 3505 - }, - { - "epoch": 0.4215715745806529, - "grad_norm": 2.8289086951487823, - "learning_rate": 2.5960643592587673e-06, - "loss": 0.7954, - "num_input_tokens_seen": 74624920, - "step": 3506 - }, - { - "epoch": 0.42169181747129203, - "grad_norm": 1.9406197368302194, - "learning_rate": 2.5953207421441553e-06, - "loss": 0.8188, - "num_input_tokens_seen": 74643240, - "step": 3507 - }, - { - "epoch": 0.4218120603619311, - "grad_norm": 2.6330363717547542, - "learning_rate": 2.59457703471986e-06, - "loss": 0.7409, - "num_input_tokens_seen": 74661115, - "step": 3508 - }, - { - "epoch": 0.4219323032525702, - "grad_norm": 1.8702127292555886, - "learning_rate": 2.593833237098701e-06, - "loss": 0.8265, - "num_input_tokens_seen": 74678435, - "step": 3509 - }, - { - "epoch": 0.4220525461432093, - "grad_norm": 2.2151419581980987, - "learning_rate": 2.593089349393512e-06, - "loss": 0.6328, - "num_input_tokens_seen": 74698645, - "step": 3510 - }, - { - "epoch": 0.42217278903384836, - "grad_norm": 4.290503011914125, - "learning_rate": 2.592345371717141e-06, - "loss": 0.8388, - "num_input_tokens_seen": 74717895, - "step": 3511 - }, - { - "epoch": 0.42229303192448747, - "grad_norm": 2.2050791920259982, - "learning_rate": 2.591601304182448e-06, - "loss": 0.7029, - "num_input_tokens_seen": 74735585, - "step": 3512 - }, - { - "epoch": 0.4224132748151266, - "grad_norm": 1.6329320103893197, - "learning_rate": 2.5908571469023067e-06, - "loss": 0.7848, - "num_input_tokens_seen": 74754790, - "step": 3513 - }, - { - "epoch": 0.42253351770576564, - "grad_norm": 2.2098417963556685, - "learning_rate": 2.5901128999896067e-06, - "loss": 0.7544, - "num_input_tokens_seen": 74769940, - "step": 3514 - }, - { - "epoch": 0.42265376059640475, - "grad_norm": 1.6964963258270764, - "learning_rate": 2.5893685635572487e-06, - "loss": 0.6849, - "num_input_tokens_seen": 74790510, - "step": 3515 - }, - { - "epoch": 0.4227740034870438, - "grad_norm": 2.0269413478013485, - "learning_rate": 2.5886241377181483e-06, - "loss": 0.6947, - "num_input_tokens_seen": 74809100, - "step": 3516 - }, - { - "epoch": 0.4228942463776829, - "grad_norm": 1.7284362468897982, - "learning_rate": 2.587879622585234e-06, - "loss": 0.8055, - "num_input_tokens_seen": 74827420, - "step": 3517 - }, - { - "epoch": 0.423014489268322, - "grad_norm": 2.0690724226656383, - "learning_rate": 2.587135018271448e-06, - "loss": 0.7567, - "num_input_tokens_seen": 74848020, - "step": 3518 - }, - { - "epoch": 0.4231347321589611, - "grad_norm": 1.9716679129922656, - "learning_rate": 2.5863903248897475e-06, - "loss": 0.8021, - "num_input_tokens_seen": 74863640, - "step": 3519 - }, - { - "epoch": 0.4232549750496002, - "grad_norm": 2.415037916956387, - "learning_rate": 2.5856455425531003e-06, - "loss": 0.6616, - "num_input_tokens_seen": 74884835, - "step": 3520 - }, - { - "epoch": 0.4233752179402393, - "grad_norm": 1.8311904524340907, - "learning_rate": 2.5849006713744906e-06, - "loss": 0.8039, - "num_input_tokens_seen": 74903350, - "step": 3521 - }, - { - "epoch": 0.42349546083087836, - "grad_norm": 2.5168038229864433, - "learning_rate": 2.5841557114669135e-06, - "loss": 0.7268, - "num_input_tokens_seen": 74919930, - "step": 3522 - }, - { - "epoch": 0.42361570372151747, - "grad_norm": 15.38933095735968, - "learning_rate": 2.58341066294338e-06, - "loss": 0.6724, - "num_input_tokens_seen": 74936315, - "step": 3523 - }, - { - "epoch": 0.4237359466121566, - "grad_norm": 2.567370607508526, - "learning_rate": 2.582665525916912e-06, - "loss": 0.8557, - "num_input_tokens_seen": 74954690, - "step": 3524 - }, - { - "epoch": 0.42385618950279563, - "grad_norm": 2.4248226776410298, - "learning_rate": 2.5819203005005475e-06, - "loss": 0.8986, - "num_input_tokens_seen": 74971745, - "step": 3525 - }, - { - "epoch": 0.42397643239343474, - "grad_norm": 1.8552888965772123, - "learning_rate": 2.5811749868073355e-06, - "loss": 0.777, - "num_input_tokens_seen": 74991700, - "step": 3526 - }, - { - "epoch": 0.42409667528407385, - "grad_norm": 2.2740155805463145, - "learning_rate": 2.5804295849503414e-06, - "loss": 0.9069, - "num_input_tokens_seen": 75007170, - "step": 3527 - }, - { - "epoch": 0.4242169181747129, - "grad_norm": 2.3355500094581076, - "learning_rate": 2.5796840950426397e-06, - "loss": 0.6543, - "num_input_tokens_seen": 75023975, - "step": 3528 - }, - { - "epoch": 0.424337161065352, - "grad_norm": 1.831363347479086, - "learning_rate": 2.578938517197322e-06, - "loss": 0.6523, - "num_input_tokens_seen": 75041790, - "step": 3529 - }, - { - "epoch": 0.4244574039559911, - "grad_norm": 2.5770680635795666, - "learning_rate": 2.578192851527491e-06, - "loss": 0.6278, - "num_input_tokens_seen": 75060230, - "step": 3530 - }, - { - "epoch": 0.4245776468466302, - "grad_norm": 3.521474364436042, - "learning_rate": 2.577447098146265e-06, - "loss": 0.6805, - "num_input_tokens_seen": 75077125, - "step": 3531 - }, - { - "epoch": 0.4246978897372693, - "grad_norm": 1.6822951602955885, - "learning_rate": 2.5767012571667724e-06, - "loss": 0.78, - "num_input_tokens_seen": 75096325, - "step": 3532 - }, - { - "epoch": 0.42481813262790835, - "grad_norm": 2.007936744264856, - "learning_rate": 2.5759553287021587e-06, - "loss": 0.6795, - "num_input_tokens_seen": 75114375, - "step": 3533 - }, - { - "epoch": 0.42493837551854746, - "grad_norm": 2.1073572231889295, - "learning_rate": 2.5752093128655786e-06, - "loss": 0.7664, - "num_input_tokens_seen": 75132340, - "step": 3534 - }, - { - "epoch": 0.4250586184091866, - "grad_norm": 2.204380204621364, - "learning_rate": 2.574463209770204e-06, - "loss": 0.73, - "num_input_tokens_seen": 75151375, - "step": 3535 - }, - { - "epoch": 0.42517886129982563, - "grad_norm": 1.8377186689738227, - "learning_rate": 2.5737170195292165e-06, - "loss": 0.7852, - "num_input_tokens_seen": 75174430, - "step": 3536 - }, - { - "epoch": 0.42529910419046474, - "grad_norm": 2.0979206057256055, - "learning_rate": 2.572970742255814e-06, - "loss": 0.7718, - "num_input_tokens_seen": 75192640, - "step": 3537 - }, - { - "epoch": 0.42541934708110385, - "grad_norm": 1.8470224395050348, - "learning_rate": 2.5722243780632046e-06, - "loss": 0.8093, - "num_input_tokens_seen": 75210625, - "step": 3538 - }, - { - "epoch": 0.4255395899717429, - "grad_norm": 0.8357680552186567, - "learning_rate": 2.5714779270646125e-06, - "loss": 0.6544, - "num_input_tokens_seen": 75271115, - "step": 3539 - }, - { - "epoch": 0.425659832862382, - "grad_norm": 2.379488153454117, - "learning_rate": 2.570731389373273e-06, - "loss": 0.7611, - "num_input_tokens_seen": 75289375, - "step": 3540 - }, - { - "epoch": 0.4257800757530211, - "grad_norm": 3.1027385366722586, - "learning_rate": 2.5699847651024356e-06, - "loss": 0.7652, - "num_input_tokens_seen": 75309735, - "step": 3541 - }, - { - "epoch": 0.4259003186436602, - "grad_norm": 2.398128664671438, - "learning_rate": 2.5692380543653627e-06, - "loss": 0.7661, - "num_input_tokens_seen": 75327610, - "step": 3542 - }, - { - "epoch": 0.4260205615342993, - "grad_norm": 2.1937646833162625, - "learning_rate": 2.5684912572753298e-06, - "loss": 0.695, - "num_input_tokens_seen": 75343005, - "step": 3543 - }, - { - "epoch": 0.4261408044249384, - "grad_norm": 1.948438725942936, - "learning_rate": 2.5677443739456245e-06, - "loss": 0.8421, - "num_input_tokens_seen": 75364385, - "step": 3544 - }, - { - "epoch": 0.42626104731557746, - "grad_norm": 2.6506802196316026, - "learning_rate": 2.5669974044895495e-06, - "loss": 0.7887, - "num_input_tokens_seen": 75380500, - "step": 3545 - }, - { - "epoch": 0.42638129020621657, - "grad_norm": 1.7416516251374892, - "learning_rate": 2.5662503490204187e-06, - "loss": 0.7834, - "num_input_tokens_seen": 75400385, - "step": 3546 - }, - { - "epoch": 0.4265015330968556, - "grad_norm": 2.1656062333468022, - "learning_rate": 2.5655032076515603e-06, - "loss": 0.7597, - "num_input_tokens_seen": 75419430, - "step": 3547 - }, - { - "epoch": 0.42662177598749473, - "grad_norm": 2.5049421971210832, - "learning_rate": 2.564755980496315e-06, - "loss": 0.8146, - "num_input_tokens_seen": 75439080, - "step": 3548 - }, - { - "epoch": 0.42674201887813384, - "grad_norm": 1.8973682687438247, - "learning_rate": 2.5640086676680372e-06, - "loss": 0.7834, - "num_input_tokens_seen": 75460295, - "step": 3549 - }, - { - "epoch": 0.4268622617687729, - "grad_norm": 2.354202737052597, - "learning_rate": 2.5632612692800923e-06, - "loss": 0.8059, - "num_input_tokens_seen": 75479080, - "step": 3550 - }, - { - "epoch": 0.426982504659412, - "grad_norm": 2.6146409794521417, - "learning_rate": 2.5625137854458603e-06, - "loss": 0.7371, - "num_input_tokens_seen": 75497815, - "step": 3551 - }, - { - "epoch": 0.4271027475500511, - "grad_norm": 1.9667034197764448, - "learning_rate": 2.561766216278735e-06, - "loss": 0.799, - "num_input_tokens_seen": 75515130, - "step": 3552 - }, - { - "epoch": 0.4272229904406902, - "grad_norm": 2.0130897890201247, - "learning_rate": 2.561018561892121e-06, - "loss": 0.8034, - "num_input_tokens_seen": 75533990, - "step": 3553 - }, - { - "epoch": 0.4273432333313293, - "grad_norm": 1.6399691209264875, - "learning_rate": 2.5602708223994363e-06, - "loss": 0.7525, - "num_input_tokens_seen": 75555575, - "step": 3554 - }, - { - "epoch": 0.4274634762219684, - "grad_norm": 3.0607598747278395, - "learning_rate": 2.559522997914115e-06, - "loss": 0.6791, - "num_input_tokens_seen": 75574875, - "step": 3555 - }, - { - "epoch": 0.42758371911260745, - "grad_norm": 2.0648131201784183, - "learning_rate": 2.558775088549599e-06, - "loss": 0.8433, - "num_input_tokens_seen": 75594175, - "step": 3556 - }, - { - "epoch": 0.42770396200324656, - "grad_norm": 3.0557185461790772, - "learning_rate": 2.5580270944193467e-06, - "loss": 0.6676, - "num_input_tokens_seen": 75610715, - "step": 3557 - }, - { - "epoch": 0.4278242048938857, - "grad_norm": 1.633482738245169, - "learning_rate": 2.557279015636827e-06, - "loss": 0.5734, - "num_input_tokens_seen": 75670845, - "step": 3558 - }, - { - "epoch": 0.42794444778452473, - "grad_norm": 1.0701234787689877, - "learning_rate": 2.5565308523155245e-06, - "loss": 0.656, - "num_input_tokens_seen": 75730165, - "step": 3559 - }, - { - "epoch": 0.42806469067516384, - "grad_norm": 3.8148951486982634, - "learning_rate": 2.5557826045689336e-06, - "loss": 0.8222, - "num_input_tokens_seen": 75746125, - "step": 3560 - }, - { - "epoch": 0.4281849335658029, - "grad_norm": 1.108997071902187, - "learning_rate": 2.5550342725105643e-06, - "loss": 0.6081, - "num_input_tokens_seen": 75804010, - "step": 3561 - }, - { - "epoch": 0.428305176456442, - "grad_norm": 2.0069417929721305, - "learning_rate": 2.554285856253936e-06, - "loss": 0.8151, - "num_input_tokens_seen": 75822565, - "step": 3562 - }, - { - "epoch": 0.4284254193470811, - "grad_norm": 3.16011663722786, - "learning_rate": 2.553537355912585e-06, - "loss": 0.7696, - "num_input_tokens_seen": 75842650, - "step": 3563 - }, - { - "epoch": 0.42854566223772017, - "grad_norm": 1.9063930268992315, - "learning_rate": 2.552788771600057e-06, - "loss": 0.8142, - "num_input_tokens_seen": 75862680, - "step": 3564 - }, - { - "epoch": 0.4286659051283593, - "grad_norm": 1.9796588727113358, - "learning_rate": 2.552040103429912e-06, - "loss": 0.8146, - "num_input_tokens_seen": 75880160, - "step": 3565 - }, - { - "epoch": 0.4287861480189984, - "grad_norm": 1.9757256652959743, - "learning_rate": 2.551291351515722e-06, - "loss": 0.8633, - "num_input_tokens_seen": 75896895, - "step": 3566 - }, - { - "epoch": 0.42890639090963745, - "grad_norm": 1.53470517289562, - "learning_rate": 2.5505425159710726e-06, - "loss": 0.8569, - "num_input_tokens_seen": 75916425, - "step": 3567 - }, - { - "epoch": 0.42902663380027656, - "grad_norm": 1.9348507717518264, - "learning_rate": 2.5497935969095607e-06, - "loss": 0.8331, - "num_input_tokens_seen": 75934765, - "step": 3568 - }, - { - "epoch": 0.42914687669091567, - "grad_norm": 2.7428743786368486, - "learning_rate": 2.5490445944447976e-06, - "loss": 0.6609, - "num_input_tokens_seen": 75952980, - "step": 3569 - }, - { - "epoch": 0.4292671195815547, - "grad_norm": 2.091621906058011, - "learning_rate": 2.5482955086904056e-06, - "loss": 0.6594, - "num_input_tokens_seen": 75973995, - "step": 3570 - }, - { - "epoch": 0.42938736247219383, - "grad_norm": 1.7252700140295156, - "learning_rate": 2.547546339760022e-06, - "loss": 0.7579, - "num_input_tokens_seen": 75993795, - "step": 3571 - }, - { - "epoch": 0.42950760536283294, - "grad_norm": 2.022439570264024, - "learning_rate": 2.546797087767293e-06, - "loss": 0.7689, - "num_input_tokens_seen": 76013640, - "step": 3572 - }, - { - "epoch": 0.429627848253472, - "grad_norm": 1.857559768547678, - "learning_rate": 2.546047752825881e-06, - "loss": 0.8731, - "num_input_tokens_seen": 76033965, - "step": 3573 - }, - { - "epoch": 0.4297480911441111, - "grad_norm": 2.280151426834863, - "learning_rate": 2.545298335049459e-06, - "loss": 0.9338, - "num_input_tokens_seen": 76049240, - "step": 3574 - }, - { - "epoch": 0.4298683340347502, - "grad_norm": 2.1349635101751367, - "learning_rate": 2.544548834551713e-06, - "loss": 0.6479, - "num_input_tokens_seen": 76067965, - "step": 3575 - }, - { - "epoch": 0.4299885769253893, - "grad_norm": 2.3711969082986077, - "learning_rate": 2.543799251446342e-06, - "loss": 0.9358, - "num_input_tokens_seen": 76081010, - "step": 3576 - }, - { - "epoch": 0.4301088198160284, - "grad_norm": 1.6524779215895846, - "learning_rate": 2.5430495858470565e-06, - "loss": 0.8754, - "num_input_tokens_seen": 76100200, - "step": 3577 - }, - { - "epoch": 0.43022906270666744, - "grad_norm": 3.5605133985727857, - "learning_rate": 2.5422998378675815e-06, - "loss": 0.7661, - "num_input_tokens_seen": 76117865, - "step": 3578 - }, - { - "epoch": 0.43034930559730655, - "grad_norm": 2.6345596470293304, - "learning_rate": 2.541550007621651e-06, - "loss": 0.8396, - "num_input_tokens_seen": 76136075, - "step": 3579 - }, - { - "epoch": 0.43046954848794566, - "grad_norm": 1.7355202612383454, - "learning_rate": 2.5408000952230156e-06, - "loss": 0.793, - "num_input_tokens_seen": 76154585, - "step": 3580 - }, - { - "epoch": 0.4305897913785847, - "grad_norm": 1.8773640690264586, - "learning_rate": 2.5400501007854357e-06, - "loss": 0.8958, - "num_input_tokens_seen": 76173750, - "step": 3581 - }, - { - "epoch": 0.43071003426922383, - "grad_norm": 1.821732752755133, - "learning_rate": 2.539300024422685e-06, - "loss": 0.7548, - "num_input_tokens_seen": 76191415, - "step": 3582 - }, - { - "epoch": 0.43083027715986294, - "grad_norm": 0.8273045872785468, - "learning_rate": 2.538549866248549e-06, - "loss": 0.6378, - "num_input_tokens_seen": 76246115, - "step": 3583 - }, - { - "epoch": 0.430950520050502, - "grad_norm": 2.1396259372566644, - "learning_rate": 2.5377996263768265e-06, - "loss": 0.8156, - "num_input_tokens_seen": 76263915, - "step": 3584 - }, - { - "epoch": 0.4310707629411411, - "grad_norm": 1.9521410900541882, - "learning_rate": 2.5370493049213285e-06, - "loss": 0.6778, - "num_input_tokens_seen": 76283280, - "step": 3585 - }, - { - "epoch": 0.4311910058317802, - "grad_norm": 2.2755428940754165, - "learning_rate": 2.536298901995878e-06, - "loss": 0.7943, - "num_input_tokens_seen": 76302210, - "step": 3586 - }, - { - "epoch": 0.43131124872241927, - "grad_norm": 1.8481039970468383, - "learning_rate": 2.535548417714311e-06, - "loss": 0.798, - "num_input_tokens_seen": 76321230, - "step": 3587 - }, - { - "epoch": 0.4314314916130584, - "grad_norm": 1.497113061996737, - "learning_rate": 2.534797852190474e-06, - "loss": 0.8583, - "num_input_tokens_seen": 76341130, - "step": 3588 - }, - { - "epoch": 0.4315517345036975, - "grad_norm": 3.1375881868608766, - "learning_rate": 2.5340472055382287e-06, - "loss": 0.8066, - "num_input_tokens_seen": 76356880, - "step": 3589 - }, - { - "epoch": 0.43167197739433655, - "grad_norm": 2.2734636433953668, - "learning_rate": 2.5332964778714463e-06, - "loss": 0.8056, - "num_input_tokens_seen": 76373785, - "step": 3590 - }, - { - "epoch": 0.43179222028497566, - "grad_norm": 1.86802820388375, - "learning_rate": 2.5325456693040123e-06, - "loss": 0.6638, - "num_input_tokens_seen": 76390700, - "step": 3591 - }, - { - "epoch": 0.43191246317561477, - "grad_norm": 2.2976409629793046, - "learning_rate": 2.531794779949824e-06, - "loss": 0.7522, - "num_input_tokens_seen": 76408320, - "step": 3592 - }, - { - "epoch": 0.4320327060662538, - "grad_norm": 2.094413064199808, - "learning_rate": 2.5310438099227907e-06, - "loss": 0.8718, - "num_input_tokens_seen": 76425305, - "step": 3593 - }, - { - "epoch": 0.43215294895689293, - "grad_norm": 1.3208136464733782, - "learning_rate": 2.530292759336833e-06, - "loss": 0.5576, - "num_input_tokens_seen": 76485760, - "step": 3594 - }, - { - "epoch": 0.432273191847532, - "grad_norm": 2.7364501627253808, - "learning_rate": 2.5295416283058855e-06, - "loss": 0.704, - "num_input_tokens_seen": 76504345, - "step": 3595 - }, - { - "epoch": 0.4323934347381711, - "grad_norm": 1.71068111164653, - "learning_rate": 2.5287904169438943e-06, - "loss": 0.6573, - "num_input_tokens_seen": 76523270, - "step": 3596 - }, - { - "epoch": 0.4325136776288102, - "grad_norm": 2.6516865699439744, - "learning_rate": 2.528039125364817e-06, - "loss": 0.6491, - "num_input_tokens_seen": 76541795, - "step": 3597 - }, - { - "epoch": 0.43263392051944927, - "grad_norm": 2.1088631903856583, - "learning_rate": 2.527287753682624e-06, - "loss": 0.7513, - "num_input_tokens_seen": 76560310, - "step": 3598 - }, - { - "epoch": 0.4327541634100884, - "grad_norm": 3.1322998025485775, - "learning_rate": 2.5265363020112986e-06, - "loss": 0.6933, - "num_input_tokens_seen": 76580350, - "step": 3599 - }, - { - "epoch": 0.4328744063007275, - "grad_norm": 2.4521763991357903, - "learning_rate": 2.5257847704648348e-06, - "loss": 0.8348, - "num_input_tokens_seen": 76601300, - "step": 3600 - }, - { - "epoch": 0.43299464919136654, - "grad_norm": 2.0146789461852106, - "learning_rate": 2.525033159157239e-06, - "loss": 0.7634, - "num_input_tokens_seen": 76617335, - "step": 3601 - }, - { - "epoch": 0.43311489208200565, - "grad_norm": 1.9533350692708533, - "learning_rate": 2.52428146820253e-06, - "loss": 0.7759, - "num_input_tokens_seen": 76635310, - "step": 3602 - }, - { - "epoch": 0.43323513497264476, - "grad_norm": 2.1354105268103307, - "learning_rate": 2.52352969771474e-06, - "loss": 0.8207, - "num_input_tokens_seen": 76654255, - "step": 3603 - }, - { - "epoch": 0.4333553778632838, - "grad_norm": 18.34475598522582, - "learning_rate": 2.5227778478079106e-06, - "loss": 0.8761, - "num_input_tokens_seen": 76673385, - "step": 3604 - }, - { - "epoch": 0.43347562075392293, - "grad_norm": 1.6200273330478285, - "learning_rate": 2.522025918596098e-06, - "loss": 0.7626, - "num_input_tokens_seen": 76691405, - "step": 3605 - }, - { - "epoch": 0.43359586364456204, - "grad_norm": 2.4481792368894895, - "learning_rate": 2.5212739101933674e-06, - "loss": 0.6531, - "num_input_tokens_seen": 76714305, - "step": 3606 - }, - { - "epoch": 0.4337161065352011, - "grad_norm": 3.8021683757632614, - "learning_rate": 2.5205218227138e-06, - "loss": 0.867, - "num_input_tokens_seen": 76726980, - "step": 3607 - }, - { - "epoch": 0.4338363494258402, - "grad_norm": 2.1433950268519095, - "learning_rate": 2.519769656271486e-06, - "loss": 0.782, - "num_input_tokens_seen": 76744120, - "step": 3608 - }, - { - "epoch": 0.43395659231647926, - "grad_norm": 2.689230813678168, - "learning_rate": 2.5190174109805285e-06, - "loss": 0.6817, - "num_input_tokens_seen": 76763665, - "step": 3609 - }, - { - "epoch": 0.43407683520711837, - "grad_norm": 2.499055485090714, - "learning_rate": 2.518265086955042e-06, - "loss": 0.6307, - "num_input_tokens_seen": 76781105, - "step": 3610 - }, - { - "epoch": 0.4341970780977575, - "grad_norm": 2.8876975409281487, - "learning_rate": 2.5175126843091538e-06, - "loss": 0.8375, - "num_input_tokens_seen": 76800195, - "step": 3611 - }, - { - "epoch": 0.43431732098839654, - "grad_norm": 1.9085992647345504, - "learning_rate": 2.5167602031570026e-06, - "loss": 0.7391, - "num_input_tokens_seen": 76820100, - "step": 3612 - }, - { - "epoch": 0.43443756387903565, - "grad_norm": 1.9185820414804613, - "learning_rate": 2.51600764361274e-06, - "loss": 0.731, - "num_input_tokens_seen": 76841345, - "step": 3613 - }, - { - "epoch": 0.43455780676967476, - "grad_norm": 2.570985670695853, - "learning_rate": 2.5152550057905283e-06, - "loss": 0.7814, - "num_input_tokens_seen": 76860955, - "step": 3614 - }, - { - "epoch": 0.4346780496603138, - "grad_norm": 4.317111983263118, - "learning_rate": 2.514502289804542e-06, - "loss": 0.7602, - "num_input_tokens_seen": 76879860, - "step": 3615 - }, - { - "epoch": 0.4347982925509529, - "grad_norm": 2.9784663084731866, - "learning_rate": 2.5137494957689664e-06, - "loss": 0.8964, - "num_input_tokens_seen": 76895190, - "step": 3616 - }, - { - "epoch": 0.43491853544159204, - "grad_norm": 0.7686802857635239, - "learning_rate": 2.5129966237980016e-06, - "loss": 0.5976, - "num_input_tokens_seen": 76957905, - "step": 3617 - }, - { - "epoch": 0.4350387783322311, - "grad_norm": 2.0116047661381584, - "learning_rate": 2.5122436740058565e-06, - "loss": 0.7814, - "num_input_tokens_seen": 76976990, - "step": 3618 - }, - { - "epoch": 0.4351590212228702, - "grad_norm": 2.175968598712834, - "learning_rate": 2.5114906465067537e-06, - "loss": 0.853, - "num_input_tokens_seen": 76997695, - "step": 3619 - }, - { - "epoch": 0.4352792641135093, - "grad_norm": 2.433118670613242, - "learning_rate": 2.510737541414926e-06, - "loss": 0.761, - "num_input_tokens_seen": 77016660, - "step": 3620 - }, - { - "epoch": 0.43539950700414837, - "grad_norm": 2.5019388654942802, - "learning_rate": 2.5099843588446197e-06, - "loss": 0.7163, - "num_input_tokens_seen": 77034700, - "step": 3621 - }, - { - "epoch": 0.4355197498947875, - "grad_norm": 2.034688791974804, - "learning_rate": 2.5092310989100916e-06, - "loss": 0.6202, - "num_input_tokens_seen": 77054290, - "step": 3622 - }, - { - "epoch": 0.4356399927854266, - "grad_norm": 2.4958012752962753, - "learning_rate": 2.508477761725611e-06, - "loss": 0.7466, - "num_input_tokens_seen": 77072285, - "step": 3623 - }, - { - "epoch": 0.43576023567606564, - "grad_norm": 2.071654594007142, - "learning_rate": 2.507724347405458e-06, - "loss": 0.8019, - "num_input_tokens_seen": 77089955, - "step": 3624 - }, - { - "epoch": 0.43588047856670475, - "grad_norm": 1.9638713136687704, - "learning_rate": 2.5069708560639243e-06, - "loss": 0.8131, - "num_input_tokens_seen": 77107585, - "step": 3625 - }, - { - "epoch": 0.4360007214573438, - "grad_norm": 2.1562380783606545, - "learning_rate": 2.5062172878153158e-06, - "loss": 0.6119, - "num_input_tokens_seen": 77126580, - "step": 3626 - }, - { - "epoch": 0.4361209643479829, - "grad_norm": 1.8232627785844573, - "learning_rate": 2.505463642773947e-06, - "loss": 0.8696, - "num_input_tokens_seen": 77146265, - "step": 3627 - }, - { - "epoch": 0.43624120723862203, - "grad_norm": 2.3733395529526455, - "learning_rate": 2.5047099210541455e-06, - "loss": 0.7451, - "num_input_tokens_seen": 77162800, - "step": 3628 - }, - { - "epoch": 0.4363614501292611, - "grad_norm": 2.463057211515184, - "learning_rate": 2.50395612277025e-06, - "loss": 0.8362, - "num_input_tokens_seen": 77178375, - "step": 3629 - }, - { - "epoch": 0.4364816930199002, - "grad_norm": 3.2358110808639786, - "learning_rate": 2.503202248036612e-06, - "loss": 0.7287, - "num_input_tokens_seen": 77196950, - "step": 3630 - }, - { - "epoch": 0.4366019359105393, - "grad_norm": 1.7480023190368053, - "learning_rate": 2.5024482969675927e-06, - "loss": 0.7301, - "num_input_tokens_seen": 77216625, - "step": 3631 - }, - { - "epoch": 0.43672217880117836, - "grad_norm": 1.9845791676509061, - "learning_rate": 2.501694269677566e-06, - "loss": 0.8411, - "num_input_tokens_seen": 77234115, - "step": 3632 - }, - { - "epoch": 0.4368424216918175, - "grad_norm": 2.912556517794492, - "learning_rate": 2.500940166280918e-06, - "loss": 0.8046, - "num_input_tokens_seen": 77252265, - "step": 3633 - }, - { - "epoch": 0.4369626645824566, - "grad_norm": 2.094067371107186, - "learning_rate": 2.5001859868920447e-06, - "loss": 0.7894, - "num_input_tokens_seen": 77271470, - "step": 3634 - }, - { - "epoch": 0.43708290747309564, - "grad_norm": 2.5203751968746846, - "learning_rate": 2.499431731625355e-06, - "loss": 0.7683, - "num_input_tokens_seen": 77290215, - "step": 3635 - }, - { - "epoch": 0.43720315036373475, - "grad_norm": 2.0647099265792463, - "learning_rate": 2.4986774005952686e-06, - "loss": 0.7942, - "num_input_tokens_seen": 77312310, - "step": 3636 - }, - { - "epoch": 0.43732339325437386, - "grad_norm": 2.1886424785256406, - "learning_rate": 2.4979229939162175e-06, - "loss": 0.8363, - "num_input_tokens_seen": 77330810, - "step": 3637 - }, - { - "epoch": 0.4374436361450129, - "grad_norm": 2.107284108286668, - "learning_rate": 2.4971685117026433e-06, - "loss": 0.7933, - "num_input_tokens_seen": 77350295, - "step": 3638 - }, - { - "epoch": 0.437563879035652, - "grad_norm": 1.6971732145332628, - "learning_rate": 2.4964139540690018e-06, - "loss": 0.7639, - "num_input_tokens_seen": 77373350, - "step": 3639 - }, - { - "epoch": 0.4376841219262911, - "grad_norm": 1.9521561129031817, - "learning_rate": 2.4956593211297576e-06, - "loss": 0.7198, - "num_input_tokens_seen": 77390815, - "step": 3640 - }, - { - "epoch": 0.4378043648169302, - "grad_norm": 2.1932654525756385, - "learning_rate": 2.494904612999389e-06, - "loss": 0.7575, - "num_input_tokens_seen": 77409245, - "step": 3641 - }, - { - "epoch": 0.4379246077075693, - "grad_norm": 0.8179128352244903, - "learning_rate": 2.494149829792384e-06, - "loss": 0.6056, - "num_input_tokens_seen": 77469535, - "step": 3642 - }, - { - "epoch": 0.43804485059820836, - "grad_norm": 1.7350490027753196, - "learning_rate": 2.4933949716232424e-06, - "loss": 0.699, - "num_input_tokens_seen": 77486780, - "step": 3643 - }, - { - "epoch": 0.43816509348884747, - "grad_norm": 2.2946995033763287, - "learning_rate": 2.4926400386064763e-06, - "loss": 0.7326, - "num_input_tokens_seen": 77504865, - "step": 3644 - }, - { - "epoch": 0.4382853363794866, - "grad_norm": 1.9483651769910342, - "learning_rate": 2.491885030856608e-06, - "loss": 0.7841, - "num_input_tokens_seen": 77522680, - "step": 3645 - }, - { - "epoch": 0.43840557927012563, - "grad_norm": 22.566946495723524, - "learning_rate": 2.4911299484881713e-06, - "loss": 0.828, - "num_input_tokens_seen": 77539930, - "step": 3646 - }, - { - "epoch": 0.43852582216076474, - "grad_norm": 1.5670642119020959, - "learning_rate": 2.490374791615712e-06, - "loss": 0.8049, - "num_input_tokens_seen": 77559675, - "step": 3647 - }, - { - "epoch": 0.43864606505140386, - "grad_norm": 2.9081269667873637, - "learning_rate": 2.4896195603537867e-06, - "loss": 0.7766, - "num_input_tokens_seen": 77574005, - "step": 3648 - }, - { - "epoch": 0.4387663079420429, - "grad_norm": 2.1272161158330625, - "learning_rate": 2.488864254816964e-06, - "loss": 0.7446, - "num_input_tokens_seen": 77592415, - "step": 3649 - }, - { - "epoch": 0.438886550832682, - "grad_norm": 3.5252548198920475, - "learning_rate": 2.4881088751198213e-06, - "loss": 0.6746, - "num_input_tokens_seen": 77610295, - "step": 3650 - }, - { - "epoch": 0.43900679372332113, - "grad_norm": 4.572775541434697, - "learning_rate": 2.4873534213769517e-06, - "loss": 0.6426, - "num_input_tokens_seen": 77625245, - "step": 3651 - }, - { - "epoch": 0.4391270366139602, - "grad_norm": 1.8663018221376073, - "learning_rate": 2.4865978937029547e-06, - "loss": 0.7078, - "num_input_tokens_seen": 77643945, - "step": 3652 - }, - { - "epoch": 0.4392472795045993, - "grad_norm": 1.7993624891513738, - "learning_rate": 2.485842292212445e-06, - "loss": 0.6652, - "num_input_tokens_seen": 77664880, - "step": 3653 - }, - { - "epoch": 0.4393675223952384, - "grad_norm": 2.0300455020310286, - "learning_rate": 2.485086617020045e-06, - "loss": 0.7954, - "num_input_tokens_seen": 77683095, - "step": 3654 - }, - { - "epoch": 0.43948776528587746, - "grad_norm": 2.288980954422264, - "learning_rate": 2.4843308682403903e-06, - "loss": 0.8124, - "num_input_tokens_seen": 77699730, - "step": 3655 - }, - { - "epoch": 0.4396080081765166, - "grad_norm": 1.727497387055552, - "learning_rate": 2.483575045988129e-06, - "loss": 0.8238, - "num_input_tokens_seen": 77716075, - "step": 3656 - }, - { - "epoch": 0.43972825106715563, - "grad_norm": 3.714351063851641, - "learning_rate": 2.4828191503779177e-06, - "loss": 0.8084, - "num_input_tokens_seen": 77733895, - "step": 3657 - }, - { - "epoch": 0.43984849395779474, - "grad_norm": 2.1617231967722716, - "learning_rate": 2.482063181524425e-06, - "loss": 0.8881, - "num_input_tokens_seen": 77749515, - "step": 3658 - }, - { - "epoch": 0.43996873684843385, - "grad_norm": 2.2832959228087484, - "learning_rate": 2.481307139542331e-06, - "loss": 0.8055, - "num_input_tokens_seen": 77766800, - "step": 3659 - }, - { - "epoch": 0.4400889797390729, - "grad_norm": 2.0174947780635413, - "learning_rate": 2.4805510245463263e-06, - "loss": 0.6452, - "num_input_tokens_seen": 77786675, - "step": 3660 - }, - { - "epoch": 0.440209222629712, - "grad_norm": 3.1793437415229024, - "learning_rate": 2.4797948366511137e-06, - "loss": 0.5921, - "num_input_tokens_seen": 77806105, - "step": 3661 - }, - { - "epoch": 0.4403294655203511, - "grad_norm": 2.108618988979303, - "learning_rate": 2.479038575971405e-06, - "loss": 0.7656, - "num_input_tokens_seen": 77824890, - "step": 3662 - }, - { - "epoch": 0.4404497084109902, - "grad_norm": 3.037734146247169, - "learning_rate": 2.478282242621926e-06, - "loss": 0.7168, - "num_input_tokens_seen": 77845070, - "step": 3663 - }, - { - "epoch": 0.4405699513016293, - "grad_norm": 0.9227886542991343, - "learning_rate": 2.4775258367174108e-06, - "loss": 0.6361, - "num_input_tokens_seen": 77912555, - "step": 3664 - }, - { - "epoch": 0.4406901941922684, - "grad_norm": 2.4583004285564085, - "learning_rate": 2.476769358372606e-06, - "loss": 0.789, - "num_input_tokens_seen": 77933925, - "step": 3665 - }, - { - "epoch": 0.44081043708290746, - "grad_norm": 2.370898869345898, - "learning_rate": 2.4760128077022687e-06, - "loss": 0.745, - "num_input_tokens_seen": 77951780, - "step": 3666 - }, - { - "epoch": 0.44093067997354657, - "grad_norm": 1.5650845197581849, - "learning_rate": 2.4752561848211672e-06, - "loss": 0.6769, - "num_input_tokens_seen": 77973900, - "step": 3667 - }, - { - "epoch": 0.4410509228641857, - "grad_norm": 4.884257453505635, - "learning_rate": 2.4744994898440797e-06, - "loss": 0.7146, - "num_input_tokens_seen": 77992410, - "step": 3668 - }, - { - "epoch": 0.44117116575482473, - "grad_norm": 2.2495014818216394, - "learning_rate": 2.473742722885797e-06, - "loss": 0.827, - "num_input_tokens_seen": 78011150, - "step": 3669 - }, - { - "epoch": 0.44129140864546385, - "grad_norm": 2.26725516168272, - "learning_rate": 2.4729858840611197e-06, - "loss": 0.6588, - "num_input_tokens_seen": 78029780, - "step": 3670 - }, - { - "epoch": 0.4414116515361029, - "grad_norm": 2.1326809384514123, - "learning_rate": 2.4722289734848605e-06, - "loss": 0.7204, - "num_input_tokens_seen": 78049965, - "step": 3671 - }, - { - "epoch": 0.441531894426742, - "grad_norm": 2.032060004611709, - "learning_rate": 2.4714719912718405e-06, - "loss": 0.7761, - "num_input_tokens_seen": 78066810, - "step": 3672 - }, - { - "epoch": 0.4416521373173811, - "grad_norm": 2.002417550788984, - "learning_rate": 2.470714937536895e-06, - "loss": 0.7916, - "num_input_tokens_seen": 78085255, - "step": 3673 - }, - { - "epoch": 0.4417723802080202, - "grad_norm": 2.018039938066539, - "learning_rate": 2.469957812394868e-06, - "loss": 0.7059, - "num_input_tokens_seen": 78103785, - "step": 3674 - }, - { - "epoch": 0.4418926230986593, - "grad_norm": 2.192917684356724, - "learning_rate": 2.4692006159606148e-06, - "loss": 0.7607, - "num_input_tokens_seen": 78121035, - "step": 3675 - }, - { - "epoch": 0.4420128659892984, - "grad_norm": 1.717614081107856, - "learning_rate": 2.468443348349e-06, - "loss": 0.7824, - "num_input_tokens_seen": 78138630, - "step": 3676 - }, - { - "epoch": 0.44213310887993745, - "grad_norm": 2.907081507203634, - "learning_rate": 2.467686009674903e-06, - "loss": 0.8194, - "num_input_tokens_seen": 78152800, - "step": 3677 - }, - { - "epoch": 0.44225335177057656, - "grad_norm": 2.5397604118202453, - "learning_rate": 2.4669286000532085e-06, - "loss": 0.8437, - "num_input_tokens_seen": 78167825, - "step": 3678 - }, - { - "epoch": 0.4423735946612157, - "grad_norm": 3.665972131634199, - "learning_rate": 2.466171119598818e-06, - "loss": 0.7134, - "num_input_tokens_seen": 78187515, - "step": 3679 - }, - { - "epoch": 0.44249383755185473, - "grad_norm": 1.9922627420813293, - "learning_rate": 2.465413568426639e-06, - "loss": 0.7693, - "num_input_tokens_seen": 78208185, - "step": 3680 - }, - { - "epoch": 0.44261408044249384, - "grad_norm": 1.6473804742234324, - "learning_rate": 2.464655946651592e-06, - "loss": 0.8107, - "num_input_tokens_seen": 78226910, - "step": 3681 - }, - { - "epoch": 0.44273432333313295, - "grad_norm": 2.3946407310600355, - "learning_rate": 2.4638982543886065e-06, - "loss": 0.7965, - "num_input_tokens_seen": 78246670, - "step": 3682 - }, - { - "epoch": 0.442854566223772, - "grad_norm": 2.3871891828944394, - "learning_rate": 2.4631404917526254e-06, - "loss": 0.8701, - "num_input_tokens_seen": 78263345, - "step": 3683 - }, - { - "epoch": 0.4429748091144111, - "grad_norm": 2.0195508221209066, - "learning_rate": 2.4623826588585995e-06, - "loss": 0.7825, - "num_input_tokens_seen": 78283335, - "step": 3684 - }, - { - "epoch": 0.4430950520050502, - "grad_norm": 1.4742967155449034, - "learning_rate": 2.461624755821492e-06, - "loss": 0.8225, - "num_input_tokens_seen": 78302535, - "step": 3685 - }, - { - "epoch": 0.4432152948956893, - "grad_norm": 1.7813800763465129, - "learning_rate": 2.460866782756276e-06, - "loss": 0.7654, - "num_input_tokens_seen": 78321585, - "step": 3686 - }, - { - "epoch": 0.4433355377863284, - "grad_norm": 1.891004880523317, - "learning_rate": 2.460108739777936e-06, - "loss": 0.8899, - "num_input_tokens_seen": 78340440, - "step": 3687 - }, - { - "epoch": 0.44345578067696745, - "grad_norm": 2.0281705254109124, - "learning_rate": 2.4593506270014656e-06, - "loss": 0.7548, - "num_input_tokens_seen": 78359130, - "step": 3688 - }, - { - "epoch": 0.44357602356760656, - "grad_norm": 4.169041820865772, - "learning_rate": 2.45859244454187e-06, - "loss": 0.8156, - "num_input_tokens_seen": 78378640, - "step": 3689 - }, - { - "epoch": 0.44369626645824567, - "grad_norm": 2.0260239058025675, - "learning_rate": 2.4578341925141655e-06, - "loss": 0.6545, - "num_input_tokens_seen": 78397575, - "step": 3690 - }, - { - "epoch": 0.4438165093488847, - "grad_norm": 2.7234866321685485, - "learning_rate": 2.457075871033378e-06, - "loss": 0.7192, - "num_input_tokens_seen": 78419170, - "step": 3691 - }, - { - "epoch": 0.44393675223952384, - "grad_norm": 2.112027591235452, - "learning_rate": 2.4563174802145445e-06, - "loss": 0.879, - "num_input_tokens_seen": 78436140, - "step": 3692 - }, - { - "epoch": 0.44405699513016295, - "grad_norm": 3.7680237600967557, - "learning_rate": 2.455559020172712e-06, - "loss": 0.4955, - "num_input_tokens_seen": 78503215, - "step": 3693 - }, - { - "epoch": 0.444177238020802, - "grad_norm": 1.876866831753697, - "learning_rate": 2.454800491022938e-06, - "loss": 0.8955, - "num_input_tokens_seen": 78520510, - "step": 3694 - }, - { - "epoch": 0.4442974809114411, - "grad_norm": 1.7125622645485121, - "learning_rate": 2.4540418928802913e-06, - "loss": 0.8648, - "num_input_tokens_seen": 78538965, - "step": 3695 - }, - { - "epoch": 0.4444177238020802, - "grad_norm": 2.2758106352468, - "learning_rate": 2.4532832258598506e-06, - "loss": 0.6614, - "num_input_tokens_seen": 78556515, - "step": 3696 - }, - { - "epoch": 0.4445379666927193, - "grad_norm": 2.026183457346423, - "learning_rate": 2.4525244900767047e-06, - "loss": 0.8038, - "num_input_tokens_seen": 78577050, - "step": 3697 - }, - { - "epoch": 0.4446582095833584, - "grad_norm": 0.8441549179958148, - "learning_rate": 2.4517656856459536e-06, - "loss": 0.6295, - "num_input_tokens_seen": 78642615, - "step": 3698 - }, - { - "epoch": 0.4447784524739975, - "grad_norm": 2.020079458676657, - "learning_rate": 2.4510068126827073e-06, - "loss": 0.6748, - "num_input_tokens_seen": 78663335, - "step": 3699 - }, - { - "epoch": 0.44489869536463655, - "grad_norm": 2.4130134941921186, - "learning_rate": 2.4502478713020854e-06, - "loss": 0.8211, - "num_input_tokens_seen": 78680830, - "step": 3700 - }, - { - "epoch": 0.44501893825527566, - "grad_norm": 16.391931520600966, - "learning_rate": 2.44948886161922e-06, - "loss": 0.8309, - "num_input_tokens_seen": 78699565, - "step": 3701 - }, - { - "epoch": 0.4451391811459148, - "grad_norm": 1.6452690252463997, - "learning_rate": 2.4487297837492524e-06, - "loss": 0.8438, - "num_input_tokens_seen": 78718450, - "step": 3702 - }, - { - "epoch": 0.44525942403655383, - "grad_norm": 1.9895799845443682, - "learning_rate": 2.4479706378073327e-06, - "loss": 0.6096, - "num_input_tokens_seen": 78736710, - "step": 3703 - }, - { - "epoch": 0.44537966692719294, - "grad_norm": 3.7198373861827783, - "learning_rate": 2.447211423908623e-06, - "loss": 0.8363, - "num_input_tokens_seen": 78756475, - "step": 3704 - }, - { - "epoch": 0.445499909817832, - "grad_norm": 2.985763883121609, - "learning_rate": 2.4464521421682966e-06, - "loss": 0.7402, - "num_input_tokens_seen": 78773785, - "step": 3705 - }, - { - "epoch": 0.4456201527084711, - "grad_norm": 1.413429154065012, - "learning_rate": 2.445692792701534e-06, - "loss": 0.868, - "num_input_tokens_seen": 78794545, - "step": 3706 - }, - { - "epoch": 0.4457403955991102, - "grad_norm": 2.6214275513829484, - "learning_rate": 2.4449333756235307e-06, - "loss": 0.7553, - "num_input_tokens_seen": 78810980, - "step": 3707 - }, - { - "epoch": 0.4458606384897493, - "grad_norm": 2.5251702152819546, - "learning_rate": 2.4441738910494867e-06, - "loss": 0.7864, - "num_input_tokens_seen": 78825435, - "step": 3708 - }, - { - "epoch": 0.4459808813803884, - "grad_norm": 1.9437303643528219, - "learning_rate": 2.4434143390946176e-06, - "loss": 0.8175, - "num_input_tokens_seen": 78843965, - "step": 3709 - }, - { - "epoch": 0.4461011242710275, - "grad_norm": 2.1728020176654326, - "learning_rate": 2.4426547198741457e-06, - "loss": 0.8507, - "num_input_tokens_seen": 78861890, - "step": 3710 - }, - { - "epoch": 0.44622136716166655, - "grad_norm": 2.582708958514176, - "learning_rate": 2.441895033503305e-06, - "loss": 0.7471, - "num_input_tokens_seen": 78879530, - "step": 3711 - }, - { - "epoch": 0.44634161005230566, - "grad_norm": 2.4183288616389196, - "learning_rate": 2.4411352800973375e-06, - "loss": 0.8222, - "num_input_tokens_seen": 78897685, - "step": 3712 - }, - { - "epoch": 0.44646185294294477, - "grad_norm": 3.45301780210301, - "learning_rate": 2.4403754597715005e-06, - "loss": 0.75, - "num_input_tokens_seen": 78916850, - "step": 3713 - }, - { - "epoch": 0.4465820958335838, - "grad_norm": 3.234722379197625, - "learning_rate": 2.4396155726410553e-06, - "loss": 0.919, - "num_input_tokens_seen": 78935180, - "step": 3714 - }, - { - "epoch": 0.44670233872422294, - "grad_norm": 2.770958662360123, - "learning_rate": 2.438855618821278e-06, - "loss": 0.9143, - "num_input_tokens_seen": 78950700, - "step": 3715 - }, - { - "epoch": 0.44682258161486205, - "grad_norm": 2.1322103001409896, - "learning_rate": 2.4380955984274513e-06, - "loss": 0.6712, - "num_input_tokens_seen": 78969075, - "step": 3716 - }, - { - "epoch": 0.4469428245055011, - "grad_norm": 2.030468718166149, - "learning_rate": 2.4373355115748716e-06, - "loss": 0.7717, - "num_input_tokens_seen": 78989625, - "step": 3717 - }, - { - "epoch": 0.4470630673961402, - "grad_norm": 1.9443336452359934, - "learning_rate": 2.436575358378842e-06, - "loss": 0.723, - "num_input_tokens_seen": 79008835, - "step": 3718 - }, - { - "epoch": 0.44718331028677927, - "grad_norm": 6.235313351045172, - "learning_rate": 2.4358151389546782e-06, - "loss": 0.8288, - "num_input_tokens_seen": 79025240, - "step": 3719 - }, - { - "epoch": 0.4473035531774184, - "grad_norm": 2.356420714200738, - "learning_rate": 2.4350548534177035e-06, - "loss": 0.753, - "num_input_tokens_seen": 79041790, - "step": 3720 - }, - { - "epoch": 0.4474237960680575, - "grad_norm": 1.741783537337227, - "learning_rate": 2.434294501883254e-06, - "loss": 0.6694, - "num_input_tokens_seen": 79064605, - "step": 3721 - }, - { - "epoch": 0.44754403895869654, - "grad_norm": 1.734957073196937, - "learning_rate": 2.4335340844666737e-06, - "loss": 0.6583, - "num_input_tokens_seen": 79083545, - "step": 3722 - }, - { - "epoch": 0.44766428184933565, - "grad_norm": 1.9204499828183228, - "learning_rate": 2.4327736012833178e-06, - "loss": 0.7117, - "num_input_tokens_seen": 79104985, - "step": 3723 - }, - { - "epoch": 0.44778452473997477, - "grad_norm": 2.118016722638842, - "learning_rate": 2.4320130524485506e-06, - "loss": 0.7586, - "num_input_tokens_seen": 79123500, - "step": 3724 - }, - { - "epoch": 0.4479047676306138, - "grad_norm": 2.22545907522413, - "learning_rate": 2.4312524380777466e-06, - "loss": 0.7922, - "num_input_tokens_seen": 79142720, - "step": 3725 - }, - { - "epoch": 0.44802501052125293, - "grad_norm": 5.704156673247014, - "learning_rate": 2.4304917582862906e-06, - "loss": 0.7659, - "num_input_tokens_seen": 79161620, - "step": 3726 - }, - { - "epoch": 0.44814525341189204, - "grad_norm": 2.3372078822668563, - "learning_rate": 2.4297310131895774e-06, - "loss": 0.8781, - "num_input_tokens_seen": 79179885, - "step": 3727 - }, - { - "epoch": 0.4482654963025311, - "grad_norm": 2.4597314985903473, - "learning_rate": 2.428970202903011e-06, - "loss": 0.7615, - "num_input_tokens_seen": 79197075, - "step": 3728 - }, - { - "epoch": 0.4483857391931702, - "grad_norm": 1.8642653413137424, - "learning_rate": 2.4282093275420057e-06, - "loss": 0.8176, - "num_input_tokens_seen": 79215825, - "step": 3729 - }, - { - "epoch": 0.4485059820838093, - "grad_norm": 2.668147409563175, - "learning_rate": 2.427448387221986e-06, - "loss": 0.6877, - "num_input_tokens_seen": 79232905, - "step": 3730 - }, - { - "epoch": 0.4486262249744484, - "grad_norm": 1.8122634292229343, - "learning_rate": 2.426687382058386e-06, - "loss": 0.9255, - "num_input_tokens_seen": 79250905, - "step": 3731 - }, - { - "epoch": 0.4487464678650875, - "grad_norm": 0.9969375684357404, - "learning_rate": 2.425926312166649e-06, - "loss": 0.6162, - "num_input_tokens_seen": 79303500, - "step": 3732 - }, - { - "epoch": 0.4488667107557266, - "grad_norm": 2.5817944547085805, - "learning_rate": 2.42516517766223e-06, - "loss": 0.7212, - "num_input_tokens_seen": 79321300, - "step": 3733 - }, - { - "epoch": 0.44898695364636565, - "grad_norm": 1.875285052008389, - "learning_rate": 2.4244039786605907e-06, - "loss": 0.6709, - "num_input_tokens_seen": 79342025, - "step": 3734 - }, - { - "epoch": 0.44910719653700476, - "grad_norm": 2.3611656241427452, - "learning_rate": 2.4236427152772055e-06, - "loss": 0.8193, - "num_input_tokens_seen": 79360150, - "step": 3735 - }, - { - "epoch": 0.4492274394276438, - "grad_norm": 0.9030233703970052, - "learning_rate": 2.422881387627557e-06, - "loss": 0.6061, - "num_input_tokens_seen": 79412320, - "step": 3736 - }, - { - "epoch": 0.4493476823182829, - "grad_norm": 1.4718579608470002, - "learning_rate": 2.422119995827139e-06, - "loss": 0.7644, - "num_input_tokens_seen": 79432165, - "step": 3737 - }, - { - "epoch": 0.44946792520892204, - "grad_norm": 2.4124441950882725, - "learning_rate": 2.4213585399914523e-06, - "loss": 0.7336, - "num_input_tokens_seen": 79449090, - "step": 3738 - }, - { - "epoch": 0.4495881680995611, - "grad_norm": 1.6798794610633387, - "learning_rate": 2.4205970202360113e-06, - "loss": 0.8412, - "num_input_tokens_seen": 79468375, - "step": 3739 - }, - { - "epoch": 0.4497084109902002, - "grad_norm": 4.745589986704857, - "learning_rate": 2.4198354366763354e-06, - "loss": 0.7799, - "num_input_tokens_seen": 79486735, - "step": 3740 - }, - { - "epoch": 0.4498286538808393, - "grad_norm": 2.217209334774954, - "learning_rate": 2.4190737894279587e-06, - "loss": 0.7769, - "num_input_tokens_seen": 79503825, - "step": 3741 - }, - { - "epoch": 0.44994889677147837, - "grad_norm": 2.196018819808298, - "learning_rate": 2.4183120786064203e-06, - "loss": 0.7954, - "num_input_tokens_seen": 79520420, - "step": 3742 - }, - { - "epoch": 0.4500691396621175, - "grad_norm": 2.9469151327662706, - "learning_rate": 2.417550304327273e-06, - "loss": 0.8439, - "num_input_tokens_seen": 79538180, - "step": 3743 - }, - { - "epoch": 0.4501893825527566, - "grad_norm": 1.6444190561494996, - "learning_rate": 2.416788466706076e-06, - "loss": 0.7529, - "num_input_tokens_seen": 79560610, - "step": 3744 - }, - { - "epoch": 0.45030962544339564, - "grad_norm": 6.1292978724456155, - "learning_rate": 2.4160265658584e-06, - "loss": 0.8693, - "num_input_tokens_seen": 79575220, - "step": 3745 - }, - { - "epoch": 0.45042986833403476, - "grad_norm": 2.260887095181136, - "learning_rate": 2.4152646018998253e-06, - "loss": 0.6816, - "num_input_tokens_seen": 79593890, - "step": 3746 - }, - { - "epoch": 0.45055011122467387, - "grad_norm": 1.76589947857936, - "learning_rate": 2.4145025749459407e-06, - "loss": 0.7122, - "num_input_tokens_seen": 79614635, - "step": 3747 - }, - { - "epoch": 0.4506703541153129, - "grad_norm": 2.233100156065296, - "learning_rate": 2.413740485112344e-06, - "loss": 0.7016, - "num_input_tokens_seen": 79632695, - "step": 3748 - }, - { - "epoch": 0.45079059700595203, - "grad_norm": 1.6602487327608608, - "learning_rate": 2.412978332514646e-06, - "loss": 0.818, - "num_input_tokens_seen": 79651195, - "step": 3749 - }, - { - "epoch": 0.4509108398965911, - "grad_norm": 2.348598629174502, - "learning_rate": 2.412216117268462e-06, - "loss": 0.712, - "num_input_tokens_seen": 79671710, - "step": 3750 - }, - { - "epoch": 0.4510310827872302, - "grad_norm": 2.7683436094340053, - "learning_rate": 2.4114538394894216e-06, - "loss": 0.8268, - "num_input_tokens_seen": 79689070, - "step": 3751 - }, - { - "epoch": 0.4511513256778693, - "grad_norm": 1.9232511779175339, - "learning_rate": 2.4106914992931605e-06, - "loss": 0.8305, - "num_input_tokens_seen": 79706945, - "step": 3752 - }, - { - "epoch": 0.45127156856850836, - "grad_norm": 1.5574925932576171, - "learning_rate": 2.409929096795326e-06, - "loss": 0.7373, - "num_input_tokens_seen": 79727035, - "step": 3753 - }, - { - "epoch": 0.4513918114591475, - "grad_norm": 2.0787002980553804, - "learning_rate": 2.409166632111573e-06, - "loss": 0.786, - "num_input_tokens_seen": 79744890, - "step": 3754 - }, - { - "epoch": 0.4515120543497866, - "grad_norm": 1.8420448826829345, - "learning_rate": 2.4084041053575674e-06, - "loss": 0.7901, - "num_input_tokens_seen": 79764030, - "step": 3755 - }, - { - "epoch": 0.45163229724042564, - "grad_norm": 1.9298420370521412, - "learning_rate": 2.4076415166489834e-06, - "loss": 0.7165, - "num_input_tokens_seen": 79783160, - "step": 3756 - }, - { - "epoch": 0.45175254013106475, - "grad_norm": 1.6077993217628501, - "learning_rate": 2.406878866101506e-06, - "loss": 0.7901, - "num_input_tokens_seen": 79801845, - "step": 3757 - }, - { - "epoch": 0.45187278302170386, - "grad_norm": 3.165426706991801, - "learning_rate": 2.4061161538308273e-06, - "loss": 0.7745, - "num_input_tokens_seen": 79818410, - "step": 3758 - }, - { - "epoch": 0.4519930259123429, - "grad_norm": 2.002430782558665, - "learning_rate": 2.4053533799526523e-06, - "loss": 0.884, - "num_input_tokens_seen": 79833850, - "step": 3759 - }, - { - "epoch": 0.452113268802982, - "grad_norm": 1.7448498090995037, - "learning_rate": 2.404590544582691e-06, - "loss": 0.8562, - "num_input_tokens_seen": 79851805, - "step": 3760 - }, - { - "epoch": 0.45223351169362114, - "grad_norm": 1.7781375410708375, - "learning_rate": 2.403827647836666e-06, - "loss": 0.8025, - "num_input_tokens_seen": 79872080, - "step": 3761 - }, - { - "epoch": 0.4523537545842602, - "grad_norm": 2.096053595024568, - "learning_rate": 2.4030646898303075e-06, - "loss": 0.6886, - "num_input_tokens_seen": 79893290, - "step": 3762 - }, - { - "epoch": 0.4524739974748993, - "grad_norm": 2.048966028515164, - "learning_rate": 2.4023016706793566e-06, - "loss": 0.8191, - "num_input_tokens_seen": 79912805, - "step": 3763 - }, - { - "epoch": 0.4525942403655384, - "grad_norm": 0.8614607412046528, - "learning_rate": 2.401538590499561e-06, - "loss": 0.6052, - "num_input_tokens_seen": 79972980, - "step": 3764 - }, - { - "epoch": 0.45271448325617747, - "grad_norm": 2.3897817943561463, - "learning_rate": 2.400775449406682e-06, - "loss": 0.7043, - "num_input_tokens_seen": 79995895, - "step": 3765 - }, - { - "epoch": 0.4528347261468166, - "grad_norm": 2.3967319642451623, - "learning_rate": 2.4000122475164846e-06, - "loss": 0.7193, - "num_input_tokens_seen": 80016180, - "step": 3766 - }, - { - "epoch": 0.45295496903745563, - "grad_norm": 1.7532425034180619, - "learning_rate": 2.3992489849447484e-06, - "loss": 0.8918, - "num_input_tokens_seen": 80034355, - "step": 3767 - }, - { - "epoch": 0.45307521192809475, - "grad_norm": 2.757451334205097, - "learning_rate": 2.3984856618072584e-06, - "loss": 0.784, - "num_input_tokens_seen": 80054110, - "step": 3768 - }, - { - "epoch": 0.45319545481873386, - "grad_norm": 2.017176844906747, - "learning_rate": 2.39772227821981e-06, - "loss": 0.7341, - "num_input_tokens_seen": 80072465, - "step": 3769 - }, - { - "epoch": 0.4533156977093729, - "grad_norm": 2.2361428429504406, - "learning_rate": 2.3969588342982077e-06, - "loss": 0.7498, - "num_input_tokens_seen": 80091560, - "step": 3770 - }, - { - "epoch": 0.453435940600012, - "grad_norm": 1.7505164110371585, - "learning_rate": 2.396195330158267e-06, - "loss": 0.7185, - "num_input_tokens_seen": 80111170, - "step": 3771 - }, - { - "epoch": 0.45355618349065113, - "grad_norm": 2.7928530648112733, - "learning_rate": 2.395431765915809e-06, - "loss": 0.7878, - "num_input_tokens_seen": 80131225, - "step": 3772 - }, - { - "epoch": 0.4536764263812902, - "grad_norm": 0.9032585638216502, - "learning_rate": 2.394668141686667e-06, - "loss": 0.6225, - "num_input_tokens_seen": 80192910, - "step": 3773 - }, - { - "epoch": 0.4537966692719293, - "grad_norm": 1.9911977149613673, - "learning_rate": 2.393904457586681e-06, - "loss": 0.6884, - "num_input_tokens_seen": 80215380, - "step": 3774 - }, - { - "epoch": 0.4539169121625684, - "grad_norm": 2.396334800499764, - "learning_rate": 2.3931407137317024e-06, - "loss": 0.7528, - "num_input_tokens_seen": 80235255, - "step": 3775 - }, - { - "epoch": 0.45403715505320746, - "grad_norm": 2.0798927502133946, - "learning_rate": 2.3923769102375907e-06, - "loss": 0.8479, - "num_input_tokens_seen": 80253840, - "step": 3776 - }, - { - "epoch": 0.4541573979438466, - "grad_norm": 2.0454314347648315, - "learning_rate": 2.391613047220213e-06, - "loss": 0.783, - "num_input_tokens_seen": 80273460, - "step": 3777 - }, - { - "epoch": 0.4542776408344857, - "grad_norm": 139.94738138022433, - "learning_rate": 2.390849124795447e-06, - "loss": 0.7889, - "num_input_tokens_seen": 80289180, - "step": 3778 - }, - { - "epoch": 0.45439788372512474, - "grad_norm": 2.4334082567935567, - "learning_rate": 2.3900851430791804e-06, - "loss": 0.8383, - "num_input_tokens_seen": 80306920, - "step": 3779 - }, - { - "epoch": 0.45451812661576385, - "grad_norm": 2.3866613316663896, - "learning_rate": 2.389321102187307e-06, - "loss": 0.8456, - "num_input_tokens_seen": 80325420, - "step": 3780 - }, - { - "epoch": 0.4546383695064029, - "grad_norm": 1.7062829687281988, - "learning_rate": 2.3885570022357326e-06, - "loss": 0.8202, - "num_input_tokens_seen": 80344270, - "step": 3781 - }, - { - "epoch": 0.454758612397042, - "grad_norm": 0.8254330817330938, - "learning_rate": 2.38779284334037e-06, - "loss": 0.6272, - "num_input_tokens_seen": 80408965, - "step": 3782 - }, - { - "epoch": 0.4548788552876811, - "grad_norm": 2.9667837032790576, - "learning_rate": 2.387028625617141e-06, - "loss": 0.7805, - "num_input_tokens_seen": 80427900, - "step": 3783 - }, - { - "epoch": 0.4549990981783202, - "grad_norm": 8.112005825289943, - "learning_rate": 2.3862643491819766e-06, - "loss": 0.8462, - "num_input_tokens_seen": 80446185, - "step": 3784 - }, - { - "epoch": 0.4551193410689593, - "grad_norm": 1.824956936349683, - "learning_rate": 2.3855000141508186e-06, - "loss": 0.8323, - "num_input_tokens_seen": 80466060, - "step": 3785 - }, - { - "epoch": 0.4552395839595984, - "grad_norm": 3.843058233263913, - "learning_rate": 2.3847356206396143e-06, - "loss": 0.8361, - "num_input_tokens_seen": 80483090, - "step": 3786 - }, - { - "epoch": 0.45535982685023746, - "grad_norm": 2.507757479655883, - "learning_rate": 2.3839711687643227e-06, - "loss": 0.7821, - "num_input_tokens_seen": 80504035, - "step": 3787 - }, - { - "epoch": 0.45548006974087657, - "grad_norm": 2.2504358524027532, - "learning_rate": 2.3832066586409097e-06, - "loss": 0.7375, - "num_input_tokens_seen": 80523105, - "step": 3788 - }, - { - "epoch": 0.4556003126315157, - "grad_norm": 1.7567314217925787, - "learning_rate": 2.3824420903853516e-06, - "loss": 0.8136, - "num_input_tokens_seen": 80541290, - "step": 3789 - }, - { - "epoch": 0.45572055552215474, - "grad_norm": 2.629940388333714, - "learning_rate": 2.3816774641136324e-06, - "loss": 0.8103, - "num_input_tokens_seen": 80558265, - "step": 3790 - }, - { - "epoch": 0.45584079841279385, - "grad_norm": 3.1647475183819287, - "learning_rate": 2.380912779941745e-06, - "loss": 0.7176, - "num_input_tokens_seen": 80581105, - "step": 3791 - }, - { - "epoch": 0.45596104130343296, - "grad_norm": 1.9372692586238671, - "learning_rate": 2.3801480379856918e-06, - "loss": 0.8232, - "num_input_tokens_seen": 80602535, - "step": 3792 - }, - { - "epoch": 0.456081284194072, - "grad_norm": 1.6754338468828784, - "learning_rate": 2.379383238361484e-06, - "loss": 0.8323, - "num_input_tokens_seen": 80621615, - "step": 3793 - }, - { - "epoch": 0.4562015270847111, - "grad_norm": 2.1655691707219087, - "learning_rate": 2.3786183811851403e-06, - "loss": 0.799, - "num_input_tokens_seen": 80642040, - "step": 3794 - }, - { - "epoch": 0.45632176997535023, - "grad_norm": 1.9917808540285618, - "learning_rate": 2.3778534665726892e-06, - "loss": 0.8007, - "num_input_tokens_seen": 80658590, - "step": 3795 - }, - { - "epoch": 0.4564420128659893, - "grad_norm": 1.8134233830419824, - "learning_rate": 2.3770884946401677e-06, - "loss": 0.7242, - "num_input_tokens_seen": 80680060, - "step": 3796 - }, - { - "epoch": 0.4565622557566284, - "grad_norm": 1.867800770801713, - "learning_rate": 2.3763234655036216e-06, - "loss": 0.78, - "num_input_tokens_seen": 80698980, - "step": 3797 - }, - { - "epoch": 0.45668249864726745, - "grad_norm": 2.247743058879139, - "learning_rate": 2.3755583792791046e-06, - "loss": 0.8616, - "num_input_tokens_seen": 80718570, - "step": 3798 - }, - { - "epoch": 0.45680274153790656, - "grad_norm": 2.4839603848860268, - "learning_rate": 2.3747932360826803e-06, - "loss": 0.7409, - "num_input_tokens_seen": 80735220, - "step": 3799 - }, - { - "epoch": 0.4569229844285457, - "grad_norm": 1.8296477558558883, - "learning_rate": 2.37402803603042e-06, - "loss": 0.8129, - "num_input_tokens_seen": 80752665, - "step": 3800 - }, - { - "epoch": 0.45704322731918473, - "grad_norm": 1.698678050773504, - "learning_rate": 2.3732627792384038e-06, - "loss": 0.6789, - "num_input_tokens_seen": 80773455, - "step": 3801 - }, - { - "epoch": 0.45716347020982384, - "grad_norm": 1.9137999886451664, - "learning_rate": 2.3724974658227207e-06, - "loss": 0.7452, - "num_input_tokens_seen": 80793965, - "step": 3802 - }, - { - "epoch": 0.45728371310046295, - "grad_norm": 1.9631892776388056, - "learning_rate": 2.3717320958994687e-06, - "loss": 0.7041, - "num_input_tokens_seen": 80811245, - "step": 3803 - }, - { - "epoch": 0.457403955991102, - "grad_norm": 3.1920370240549323, - "learning_rate": 2.3709666695847534e-06, - "loss": 0.6925, - "num_input_tokens_seen": 80829145, - "step": 3804 - }, - { - "epoch": 0.4575241988817411, - "grad_norm": 2.060836246666886, - "learning_rate": 2.370201186994689e-06, - "loss": 0.7039, - "num_input_tokens_seen": 80852550, - "step": 3805 - }, - { - "epoch": 0.45764444177238023, - "grad_norm": 1.7283738489376945, - "learning_rate": 2.369435648245399e-06, - "loss": 0.6883, - "num_input_tokens_seen": 80872485, - "step": 3806 - }, - { - "epoch": 0.4577646846630193, - "grad_norm": 2.3010550185635608, - "learning_rate": 2.368670053453015e-06, - "loss": 0.8464, - "num_input_tokens_seen": 80893895, - "step": 3807 - }, - { - "epoch": 0.4578849275536584, - "grad_norm": 2.7277756514888947, - "learning_rate": 2.3679044027336757e-06, - "loss": 0.7334, - "num_input_tokens_seen": 80909505, - "step": 3808 - }, - { - "epoch": 0.4580051704442975, - "grad_norm": 2.8588057214012985, - "learning_rate": 2.3671386962035326e-06, - "loss": 0.6884, - "num_input_tokens_seen": 80926695, - "step": 3809 - }, - { - "epoch": 0.45812541333493656, - "grad_norm": 1.9092598109887318, - "learning_rate": 2.36637293397874e-06, - "loss": 0.6893, - "num_input_tokens_seen": 80943350, - "step": 3810 - }, - { - "epoch": 0.45824565622557567, - "grad_norm": 2.8806544266460046, - "learning_rate": 2.3656071161754657e-06, - "loss": 0.7196, - "num_input_tokens_seen": 80958495, - "step": 3811 - }, - { - "epoch": 0.4583658991162148, - "grad_norm": 2.7351544236256715, - "learning_rate": 2.3648412429098825e-06, - "loss": 0.6706, - "num_input_tokens_seen": 80976565, - "step": 3812 - }, - { - "epoch": 0.45848614200685384, - "grad_norm": 1.87232444334563, - "learning_rate": 2.3640753142981725e-06, - "loss": 0.8183, - "num_input_tokens_seen": 80993740, - "step": 3813 - }, - { - "epoch": 0.45860638489749295, - "grad_norm": 2.912523605110929, - "learning_rate": 2.3633093304565267e-06, - "loss": 0.7004, - "num_input_tokens_seen": 81012515, - "step": 3814 - }, - { - "epoch": 0.458726627788132, - "grad_norm": 2.2363559468469583, - "learning_rate": 2.3625432915011443e-06, - "loss": 0.6323, - "num_input_tokens_seen": 81034145, - "step": 3815 - }, - { - "epoch": 0.4588468706787711, - "grad_norm": 1.6676404979652715, - "learning_rate": 2.361777197548233e-06, - "loss": 0.6493, - "num_input_tokens_seen": 81052695, - "step": 3816 - }, - { - "epoch": 0.4589671135694102, - "grad_norm": 2.057095052356832, - "learning_rate": 2.3610110487140083e-06, - "loss": 0.7465, - "num_input_tokens_seen": 81070850, - "step": 3817 - }, - { - "epoch": 0.4590873564600493, - "grad_norm": 1.6818940299341805, - "learning_rate": 2.3602448451146944e-06, - "loss": 0.8064, - "num_input_tokens_seen": 81090190, - "step": 3818 - }, - { - "epoch": 0.4592075993506884, - "grad_norm": 2.54877569479225, - "learning_rate": 2.3594785868665245e-06, - "loss": 0.6913, - "num_input_tokens_seen": 81106215, - "step": 3819 - }, - { - "epoch": 0.4593278422413275, - "grad_norm": 2.372858756803921, - "learning_rate": 2.3587122740857386e-06, - "loss": 0.7958, - "num_input_tokens_seen": 81123035, - "step": 3820 - }, - { - "epoch": 0.45944808513196655, - "grad_norm": 3.2960880943826423, - "learning_rate": 2.357945906888586e-06, - "loss": 0.7841, - "num_input_tokens_seen": 81142195, - "step": 3821 - }, - { - "epoch": 0.45956832802260567, - "grad_norm": 2.7107916839648376, - "learning_rate": 2.3571794853913234e-06, - "loss": 0.7942, - "num_input_tokens_seen": 81159770, - "step": 3822 - }, - { - "epoch": 0.4596885709132448, - "grad_norm": 1.9438614848448112, - "learning_rate": 2.3564130097102173e-06, - "loss": 0.8496, - "num_input_tokens_seen": 81179145, - "step": 3823 - }, - { - "epoch": 0.45980881380388383, - "grad_norm": 1.740798754292395, - "learning_rate": 2.355646479961541e-06, - "loss": 0.7343, - "num_input_tokens_seen": 81198175, - "step": 3824 - }, - { - "epoch": 0.45992905669452294, - "grad_norm": 2.241563384165876, - "learning_rate": 2.354879896261576e-06, - "loss": 0.7128, - "num_input_tokens_seen": 81218105, - "step": 3825 - }, - { - "epoch": 0.46004929958516205, - "grad_norm": 1.9300561725741652, - "learning_rate": 2.3541132587266133e-06, - "loss": 0.5714, - "num_input_tokens_seen": 81240545, - "step": 3826 - }, - { - "epoch": 0.4601695424758011, - "grad_norm": 1.89391879044887, - "learning_rate": 2.3533465674729515e-06, - "loss": 0.6911, - "num_input_tokens_seen": 81257495, - "step": 3827 - }, - { - "epoch": 0.4602897853664402, - "grad_norm": 2.1205842654379787, - "learning_rate": 2.352579822616895e-06, - "loss": 0.7311, - "num_input_tokens_seen": 81274650, - "step": 3828 - }, - { - "epoch": 0.4604100282570793, - "grad_norm": 2.0948566434895315, - "learning_rate": 2.351813024274761e-06, - "loss": 0.7772, - "num_input_tokens_seen": 81295725, - "step": 3829 - }, - { - "epoch": 0.4605302711477184, - "grad_norm": 2.552269444440281, - "learning_rate": 2.3510461725628693e-06, - "loss": 0.7306, - "num_input_tokens_seen": 81315910, - "step": 3830 - }, - { - "epoch": 0.4606505140383575, - "grad_norm": 2.0580959540314914, - "learning_rate": 2.350279267597554e-06, - "loss": 0.7004, - "num_input_tokens_seen": 81336270, - "step": 3831 - }, - { - "epoch": 0.46077075692899655, - "grad_norm": 2.2330723115707527, - "learning_rate": 2.349512309495151e-06, - "loss": 0.8259, - "num_input_tokens_seen": 81354335, - "step": 3832 - }, - { - "epoch": 0.46089099981963566, - "grad_norm": 5.726609525964799, - "learning_rate": 2.348745298372009e-06, - "loss": 0.7503, - "num_input_tokens_seen": 81377600, - "step": 3833 - }, - { - "epoch": 0.46101124271027477, - "grad_norm": 1.885454183726305, - "learning_rate": 2.347978234344483e-06, - "loss": 0.7873, - "num_input_tokens_seen": 81393525, - "step": 3834 - }, - { - "epoch": 0.4611314856009138, - "grad_norm": 2.35352169795574, - "learning_rate": 2.3472111175289354e-06, - "loss": 0.6922, - "num_input_tokens_seen": 81415545, - "step": 3835 - }, - { - "epoch": 0.46125172849155294, - "grad_norm": 1.6657784260102466, - "learning_rate": 2.3464439480417374e-06, - "loss": 0.7095, - "num_input_tokens_seen": 81434785, - "step": 3836 - }, - { - "epoch": 0.46137197138219205, - "grad_norm": 3.987890317499909, - "learning_rate": 2.3456767259992676e-06, - "loss": 0.765, - "num_input_tokens_seen": 81452150, - "step": 3837 - }, - { - "epoch": 0.4614922142728311, - "grad_norm": 2.416955459657737, - "learning_rate": 2.344909451517913e-06, - "loss": 0.8783, - "num_input_tokens_seen": 81469330, - "step": 3838 - }, - { - "epoch": 0.4616124571634702, - "grad_norm": 1.7602869456078403, - "learning_rate": 2.34414212471407e-06, - "loss": 0.8074, - "num_input_tokens_seen": 81488845, - "step": 3839 - }, - { - "epoch": 0.4617327000541093, - "grad_norm": 2.1242046268862427, - "learning_rate": 2.343374745704139e-06, - "loss": 0.7263, - "num_input_tokens_seen": 81507270, - "step": 3840 - }, - { - "epoch": 0.4618529429447484, - "grad_norm": 2.0528273999808495, - "learning_rate": 2.342607314604533e-06, - "loss": 0.8399, - "num_input_tokens_seen": 81526740, - "step": 3841 - }, - { - "epoch": 0.4619731858353875, - "grad_norm": 1.7722429331940264, - "learning_rate": 2.3418398315316694e-06, - "loss": 0.8417, - "num_input_tokens_seen": 81544280, - "step": 3842 - }, - { - "epoch": 0.4620934287260266, - "grad_norm": 2.690618721842699, - "learning_rate": 2.3410722966019755e-06, - "loss": 0.7817, - "num_input_tokens_seen": 81559115, - "step": 3843 - }, - { - "epoch": 0.46221367161666566, - "grad_norm": 1.7863766648521076, - "learning_rate": 2.3403047099318844e-06, - "loss": 0.659, - "num_input_tokens_seen": 81582905, - "step": 3844 - }, - { - "epoch": 0.46233391450730477, - "grad_norm": 4.1728082994628535, - "learning_rate": 2.3395370716378405e-06, - "loss": 0.7426, - "num_input_tokens_seen": 81600070, - "step": 3845 - }, - { - "epoch": 0.4624541573979438, - "grad_norm": 2.288279965799928, - "learning_rate": 2.338769381836292e-06, - "loss": 0.7181, - "num_input_tokens_seen": 81619400, - "step": 3846 - }, - { - "epoch": 0.46257440028858293, - "grad_norm": 2.2435270205080378, - "learning_rate": 2.3380016406436984e-06, - "loss": 0.7285, - "num_input_tokens_seen": 81636600, - "step": 3847 - }, - { - "epoch": 0.46269464317922204, - "grad_norm": 2.1006268801042793, - "learning_rate": 2.337233848176524e-06, - "loss": 0.8105, - "num_input_tokens_seen": 81654090, - "step": 3848 - }, - { - "epoch": 0.4628148860698611, - "grad_norm": 2.9068527049208135, - "learning_rate": 2.3364660045512435e-06, - "loss": 0.8172, - "num_input_tokens_seen": 81672570, - "step": 3849 - }, - { - "epoch": 0.4629351289605002, - "grad_norm": 0.8045715669279327, - "learning_rate": 2.335698109884337e-06, - "loss": 0.6268, - "num_input_tokens_seen": 81737495, - "step": 3850 - }, - { - "epoch": 0.4630553718511393, - "grad_norm": 0.8581761167215042, - "learning_rate": 2.334930164292294e-06, - "loss": 0.6516, - "num_input_tokens_seen": 81799765, - "step": 3851 - }, - { - "epoch": 0.4631756147417784, - "grad_norm": 2.173689605762162, - "learning_rate": 2.334162167891612e-06, - "loss": 0.7946, - "num_input_tokens_seen": 81816750, - "step": 3852 - }, - { - "epoch": 0.4632958576324175, - "grad_norm": 2.812076533210322, - "learning_rate": 2.333394120798795e-06, - "loss": 0.7357, - "num_input_tokens_seen": 81835205, - "step": 3853 - }, - { - "epoch": 0.4634161005230566, - "grad_norm": 3.3263053764128316, - "learning_rate": 2.332626023130354e-06, - "loss": 0.7212, - "num_input_tokens_seen": 81853525, - "step": 3854 - }, - { - "epoch": 0.46353634341369565, - "grad_norm": 1.8021511067658027, - "learning_rate": 2.3318578750028107e-06, - "loss": 0.8688, - "num_input_tokens_seen": 81871845, - "step": 3855 - }, - { - "epoch": 0.46365658630433476, - "grad_norm": 1.8845017657351006, - "learning_rate": 2.3310896765326916e-06, - "loss": 0.7592, - "num_input_tokens_seen": 81892565, - "step": 3856 - }, - { - "epoch": 0.46377682919497387, - "grad_norm": 1.6916032474828888, - "learning_rate": 2.3303214278365317e-06, - "loss": 0.8299, - "num_input_tokens_seen": 81914155, - "step": 3857 - }, - { - "epoch": 0.4638970720856129, - "grad_norm": 1.857656959469593, - "learning_rate": 2.3295531290308733e-06, - "loss": 0.8228, - "num_input_tokens_seen": 81932025, - "step": 3858 - }, - { - "epoch": 0.46401731497625204, - "grad_norm": 3.8669411752125904, - "learning_rate": 2.3287847802322678e-06, - "loss": 0.757, - "num_input_tokens_seen": 81947315, - "step": 3859 - }, - { - "epoch": 0.4641375578668911, - "grad_norm": 2.0894538407598287, - "learning_rate": 2.328016381557272e-06, - "loss": 0.8323, - "num_input_tokens_seen": 81967630, - "step": 3860 - }, - { - "epoch": 0.4642578007575302, - "grad_norm": 2.322639889941078, - "learning_rate": 2.3272479331224522e-06, - "loss": 0.7583, - "num_input_tokens_seen": 81984780, - "step": 3861 - }, - { - "epoch": 0.4643780436481693, - "grad_norm": 2.0956056372655714, - "learning_rate": 2.3264794350443813e-06, - "loss": 0.7715, - "num_input_tokens_seen": 82006595, - "step": 3862 - }, - { - "epoch": 0.46449828653880837, - "grad_norm": 2.1145336423211387, - "learning_rate": 2.32571088743964e-06, - "loss": 0.7807, - "num_input_tokens_seen": 82027410, - "step": 3863 - }, - { - "epoch": 0.4646185294294475, - "grad_norm": 2.543642922154108, - "learning_rate": 2.3249422904248152e-06, - "loss": 0.74, - "num_input_tokens_seen": 82045565, - "step": 3864 - }, - { - "epoch": 0.4647387723200866, - "grad_norm": 2.0808119822700957, - "learning_rate": 2.324173644116504e-06, - "loss": 0.8646, - "num_input_tokens_seen": 82068135, - "step": 3865 - }, - { - "epoch": 0.46485901521072565, - "grad_norm": 1.877704267265373, - "learning_rate": 2.3234049486313083e-06, - "loss": 0.8099, - "num_input_tokens_seen": 82089305, - "step": 3866 - }, - { - "epoch": 0.46497925810136476, - "grad_norm": 2.090849298199001, - "learning_rate": 2.322636204085839e-06, - "loss": 0.7633, - "num_input_tokens_seen": 82109095, - "step": 3867 - }, - { - "epoch": 0.46509950099200387, - "grad_norm": 2.2182248601223264, - "learning_rate": 2.3218674105967143e-06, - "loss": 0.7762, - "num_input_tokens_seen": 82127080, - "step": 3868 - }, - { - "epoch": 0.4652197438826429, - "grad_norm": 1.882616032141527, - "learning_rate": 2.3210985682805593e-06, - "loss": 0.8291, - "num_input_tokens_seen": 82148580, - "step": 3869 - }, - { - "epoch": 0.46533998677328203, - "grad_norm": 2.658482988574435, - "learning_rate": 2.320329677254007e-06, - "loss": 0.6847, - "num_input_tokens_seen": 82165630, - "step": 3870 - }, - { - "epoch": 0.46546022966392114, - "grad_norm": 2.9410593200771435, - "learning_rate": 2.319560737633697e-06, - "loss": 0.7363, - "num_input_tokens_seen": 82184070, - "step": 3871 - }, - { - "epoch": 0.4655804725545602, - "grad_norm": 1.579394409184122, - "learning_rate": 2.3187917495362775e-06, - "loss": 0.6788, - "num_input_tokens_seen": 82208200, - "step": 3872 - }, - { - "epoch": 0.4657007154451993, - "grad_norm": 2.614772487432404, - "learning_rate": 2.318022713078403e-06, - "loss": 0.7564, - "num_input_tokens_seen": 82222500, - "step": 3873 - }, - { - "epoch": 0.4658209583358384, - "grad_norm": 4.3244900004509175, - "learning_rate": 2.3172536283767354e-06, - "loss": 0.8414, - "num_input_tokens_seen": 82235980, - "step": 3874 - }, - { - "epoch": 0.4659412012264775, - "grad_norm": 2.197490117156266, - "learning_rate": 2.3164844955479447e-06, - "loss": 0.8109, - "num_input_tokens_seen": 82251510, - "step": 3875 - }, - { - "epoch": 0.4660614441171166, - "grad_norm": 2.291343684695357, - "learning_rate": 2.315715314708708e-06, - "loss": 0.7007, - "num_input_tokens_seen": 82273120, - "step": 3876 - }, - { - "epoch": 0.46618168700775564, - "grad_norm": 1.8805075195723835, - "learning_rate": 2.314946085975709e-06, - "loss": 0.8319, - "num_input_tokens_seen": 82291820, - "step": 3877 - }, - { - "epoch": 0.46630192989839475, - "grad_norm": 1.816090353056169, - "learning_rate": 2.3141768094656393e-06, - "loss": 0.8214, - "num_input_tokens_seen": 82310115, - "step": 3878 - }, - { - "epoch": 0.46642217278903386, - "grad_norm": 2.4714507697822126, - "learning_rate": 2.313407485295197e-06, - "loss": 0.8162, - "num_input_tokens_seen": 82326425, - "step": 3879 - }, - { - "epoch": 0.4665424156796729, - "grad_norm": 1.730216099059028, - "learning_rate": 2.312638113581088e-06, - "loss": 0.7805, - "num_input_tokens_seen": 82346630, - "step": 3880 - }, - { - "epoch": 0.46666265857031203, - "grad_norm": 3.3854740760973914, - "learning_rate": 2.311868694440027e-06, - "loss": 0.7803, - "num_input_tokens_seen": 82360770, - "step": 3881 - }, - { - "epoch": 0.46678290146095114, - "grad_norm": 0.7424071643105359, - "learning_rate": 2.311099227988732e-06, - "loss": 0.6427, - "num_input_tokens_seen": 82432415, - "step": 3882 - }, - { - "epoch": 0.4669031443515902, - "grad_norm": 2.999664269308277, - "learning_rate": 2.310329714343932e-06, - "loss": 0.8409, - "num_input_tokens_seen": 82448285, - "step": 3883 - }, - { - "epoch": 0.4670233872422293, - "grad_norm": 2.047351590002029, - "learning_rate": 2.3095601536223605e-06, - "loss": 0.8184, - "num_input_tokens_seen": 82464915, - "step": 3884 - }, - { - "epoch": 0.4671436301328684, - "grad_norm": 2.5253079723960954, - "learning_rate": 2.3087905459407607e-06, - "loss": 0.7418, - "num_input_tokens_seen": 82483575, - "step": 3885 - }, - { - "epoch": 0.46726387302350747, - "grad_norm": 0.8625044948753109, - "learning_rate": 2.3080208914158795e-06, - "loss": 0.6787, - "num_input_tokens_seen": 82546295, - "step": 3886 - }, - { - "epoch": 0.4673841159141466, - "grad_norm": 2.1081035558333787, - "learning_rate": 2.3072511901644753e-06, - "loss": 0.7083, - "num_input_tokens_seen": 82565085, - "step": 3887 - }, - { - "epoch": 0.4675043588047857, - "grad_norm": 2.451618076372208, - "learning_rate": 2.306481442303308e-06, - "loss": 0.8067, - "num_input_tokens_seen": 82584380, - "step": 3888 - }, - { - "epoch": 0.46762460169542475, - "grad_norm": 1.7214738650356083, - "learning_rate": 2.3057116479491515e-06, - "loss": 0.7275, - "num_input_tokens_seen": 82603510, - "step": 3889 - }, - { - "epoch": 0.46774484458606386, - "grad_norm": 3.3682413642582505, - "learning_rate": 2.30494180721878e-06, - "loss": 0.7644, - "num_input_tokens_seen": 82620570, - "step": 3890 - }, - { - "epoch": 0.4678650874767029, - "grad_norm": 1.8958691148341253, - "learning_rate": 2.3041719202289794e-06, - "loss": 0.8897, - "num_input_tokens_seen": 82636465, - "step": 3891 - }, - { - "epoch": 0.467985330367342, - "grad_norm": 1.725798958955176, - "learning_rate": 2.30340198709654e-06, - "loss": 0.7997, - "num_input_tokens_seen": 82656020, - "step": 3892 - }, - { - "epoch": 0.46810557325798113, - "grad_norm": 2.623375700332464, - "learning_rate": 2.3026320079382605e-06, - "loss": 0.7489, - "num_input_tokens_seen": 82672675, - "step": 3893 - }, - { - "epoch": 0.4682258161486202, - "grad_norm": 1.8846448949013452, - "learning_rate": 2.3018619828709454e-06, - "loss": 0.7676, - "num_input_tokens_seen": 82693935, - "step": 3894 - }, - { - "epoch": 0.4683460590392593, - "grad_norm": 2.3363588650319977, - "learning_rate": 2.3010919120114084e-06, - "loss": 0.811, - "num_input_tokens_seen": 82710185, - "step": 3895 - }, - { - "epoch": 0.4684663019298984, - "grad_norm": 2.431443103580903, - "learning_rate": 2.3003217954764672e-06, - "loss": 0.654, - "num_input_tokens_seen": 82724610, - "step": 3896 - }, - { - "epoch": 0.46858654482053747, - "grad_norm": 2.08968901187863, - "learning_rate": 2.299551633382949e-06, - "loss": 0.7893, - "num_input_tokens_seen": 82744640, - "step": 3897 - }, - { - "epoch": 0.4687067877111766, - "grad_norm": 2.8573269069951373, - "learning_rate": 2.298781425847685e-06, - "loss": 0.8466, - "num_input_tokens_seen": 82762160, - "step": 3898 - }, - { - "epoch": 0.4688270306018157, - "grad_norm": 2.6821777141700838, - "learning_rate": 2.2980111729875173e-06, - "loss": 0.6679, - "num_input_tokens_seen": 82778130, - "step": 3899 - }, - { - "epoch": 0.46894727349245474, - "grad_norm": 1.720827964546958, - "learning_rate": 2.2972408749192917e-06, - "loss": 0.8213, - "num_input_tokens_seen": 82795580, - "step": 3900 - }, - { - "epoch": 0.46906751638309385, - "grad_norm": 1.8366391718665733, - "learning_rate": 2.2964705317598613e-06, - "loss": 0.6695, - "num_input_tokens_seen": 82813400, - "step": 3901 - }, - { - "epoch": 0.46918775927373296, - "grad_norm": 2.317822908815742, - "learning_rate": 2.2957001436260866e-06, - "loss": 0.7948, - "num_input_tokens_seen": 82830180, - "step": 3902 - }, - { - "epoch": 0.469308002164372, - "grad_norm": 2.344160572719799, - "learning_rate": 2.294929710634836e-06, - "loss": 0.7182, - "num_input_tokens_seen": 82847990, - "step": 3903 - }, - { - "epoch": 0.46942824505501113, - "grad_norm": 2.3645641591555546, - "learning_rate": 2.294159232902982e-06, - "loss": 0.6058, - "num_input_tokens_seen": 82868815, - "step": 3904 - }, - { - "epoch": 0.46954848794565024, - "grad_norm": 1.9159592729954404, - "learning_rate": 2.2933887105474067e-06, - "loss": 0.7821, - "num_input_tokens_seen": 82886710, - "step": 3905 - }, - { - "epoch": 0.4696687308362893, - "grad_norm": 3.244506162913963, - "learning_rate": 2.2926181436849974e-06, - "loss": 0.8062, - "num_input_tokens_seen": 82905785, - "step": 3906 - }, - { - "epoch": 0.4697889737269284, - "grad_norm": 1.803376994550711, - "learning_rate": 2.291847532432648e-06, - "loss": 0.7256, - "num_input_tokens_seen": 82925225, - "step": 3907 - }, - { - "epoch": 0.46990921661756746, - "grad_norm": 2.418841835450677, - "learning_rate": 2.2910768769072603e-06, - "loss": 0.898, - "num_input_tokens_seen": 82943725, - "step": 3908 - }, - { - "epoch": 0.47002945950820657, - "grad_norm": 2.279320742691122, - "learning_rate": 2.2903061772257417e-06, - "loss": 0.756, - "num_input_tokens_seen": 82961430, - "step": 3909 - }, - { - "epoch": 0.4701497023988457, - "grad_norm": 1.7397273815456287, - "learning_rate": 2.289535433505007e-06, - "loss": 0.782, - "num_input_tokens_seen": 82982505, - "step": 3910 - }, - { - "epoch": 0.47026994528948474, - "grad_norm": 5.749203446013382, - "learning_rate": 2.2887646458619767e-06, - "loss": 0.6271, - "num_input_tokens_seen": 83003590, - "step": 3911 - }, - { - "epoch": 0.47039018818012385, - "grad_norm": 2.194035243044513, - "learning_rate": 2.2879938144135792e-06, - "loss": 0.7645, - "num_input_tokens_seen": 83019415, - "step": 3912 - }, - { - "epoch": 0.47051043107076296, - "grad_norm": 2.3539811010527805, - "learning_rate": 2.2872229392767496e-06, - "loss": 0.7604, - "num_input_tokens_seen": 83039240, - "step": 3913 - }, - { - "epoch": 0.470630673961402, - "grad_norm": 1.5649595653330772, - "learning_rate": 2.286452020568428e-06, - "loss": 0.7468, - "num_input_tokens_seen": 83057035, - "step": 3914 - }, - { - "epoch": 0.4707509168520411, - "grad_norm": 2.039568447136854, - "learning_rate": 2.2856810584055637e-06, - "loss": 0.7339, - "num_input_tokens_seen": 83074290, - "step": 3915 - }, - { - "epoch": 0.47087115974268023, - "grad_norm": 3.0708734225523733, - "learning_rate": 2.2849100529051085e-06, - "loss": 0.676, - "num_input_tokens_seen": 83100945, - "step": 3916 - }, - { - "epoch": 0.4709914026333193, - "grad_norm": 2.8641044071739445, - "learning_rate": 2.284139004184026e-06, - "loss": 0.7961, - "num_input_tokens_seen": 83117895, - "step": 3917 - }, - { - "epoch": 0.4711116455239584, - "grad_norm": 2.5565827131555814, - "learning_rate": 2.2833679123592814e-06, - "loss": 0.7364, - "num_input_tokens_seen": 83134875, - "step": 3918 - }, - { - "epoch": 0.4712318884145975, - "grad_norm": 2.3514728520517383, - "learning_rate": 2.2825967775478508e-06, - "loss": 0.6414, - "num_input_tokens_seen": 83155695, - "step": 3919 - }, - { - "epoch": 0.47135213130523657, - "grad_norm": 2.805163767773821, - "learning_rate": 2.281825599866713e-06, - "loss": 0.8321, - "num_input_tokens_seen": 83173925, - "step": 3920 - }, - { - "epoch": 0.4714723741958757, - "grad_norm": 1.7072142845197524, - "learning_rate": 2.281054379432856e-06, - "loss": 0.7858, - "num_input_tokens_seen": 83192680, - "step": 3921 - }, - { - "epoch": 0.4715926170865148, - "grad_norm": 1.7755542004236387, - "learning_rate": 2.2802831163632735e-06, - "loss": 0.8139, - "num_input_tokens_seen": 83211120, - "step": 3922 - }, - { - "epoch": 0.47171285997715384, - "grad_norm": 1.684280722739875, - "learning_rate": 2.279511810774965e-06, - "loss": 0.7364, - "num_input_tokens_seen": 83232370, - "step": 3923 - }, - { - "epoch": 0.47183310286779295, - "grad_norm": 2.210677256333796, - "learning_rate": 2.2787404627849364e-06, - "loss": 0.7129, - "num_input_tokens_seen": 83251300, - "step": 3924 - }, - { - "epoch": 0.471953345758432, - "grad_norm": 2.431902189779806, - "learning_rate": 2.277969072510202e-06, - "loss": 0.7855, - "num_input_tokens_seen": 83270000, - "step": 3925 - }, - { - "epoch": 0.4720735886490711, - "grad_norm": 1.6858464378133364, - "learning_rate": 2.27719764006778e-06, - "loss": 0.8079, - "num_input_tokens_seen": 83288550, - "step": 3926 - }, - { - "epoch": 0.47219383153971023, - "grad_norm": 1.8263414488137624, - "learning_rate": 2.2764261655746965e-06, - "loss": 0.7809, - "num_input_tokens_seen": 83305765, - "step": 3927 - }, - { - "epoch": 0.4723140744303493, - "grad_norm": 1.6280657967675856, - "learning_rate": 2.2756546491479832e-06, - "loss": 0.7526, - "num_input_tokens_seen": 83326400, - "step": 3928 - }, - { - "epoch": 0.4724343173209884, - "grad_norm": 2.6485678907979313, - "learning_rate": 2.2748830909046793e-06, - "loss": 0.8081, - "num_input_tokens_seen": 83343885, - "step": 3929 - }, - { - "epoch": 0.4725545602116275, - "grad_norm": 2.170293108173598, - "learning_rate": 2.2741114909618283e-06, - "loss": 0.6688, - "num_input_tokens_seen": 83359500, - "step": 3930 - }, - { - "epoch": 0.47267480310226656, - "grad_norm": 1.954242188692323, - "learning_rate": 2.2733398494364828e-06, - "loss": 0.7142, - "num_input_tokens_seen": 83378465, - "step": 3931 - }, - { - "epoch": 0.47279504599290567, - "grad_norm": 2.8175475748822705, - "learning_rate": 2.2725681664456986e-06, - "loss": 0.8427, - "num_input_tokens_seen": 83396750, - "step": 3932 - }, - { - "epoch": 0.4729152888835448, - "grad_norm": 3.3686721436493974, - "learning_rate": 2.271796442106541e-06, - "loss": 0.65, - "num_input_tokens_seen": 83415825, - "step": 3933 - }, - { - "epoch": 0.47303553177418384, - "grad_norm": 0.8263059994767797, - "learning_rate": 2.2710246765360788e-06, - "loss": 0.6081, - "num_input_tokens_seen": 83475805, - "step": 3934 - }, - { - "epoch": 0.47315577466482295, - "grad_norm": 2.207122402478002, - "learning_rate": 2.2702528698513894e-06, - "loss": 0.7347, - "num_input_tokens_seen": 83496650, - "step": 3935 - }, - { - "epoch": 0.47327601755546206, - "grad_norm": 3.0425235715313215, - "learning_rate": 2.269481022169554e-06, - "loss": 0.7815, - "num_input_tokens_seen": 83514965, - "step": 3936 - }, - { - "epoch": 0.4733962604461011, - "grad_norm": 2.148851477634378, - "learning_rate": 2.2687091336076614e-06, - "loss": 0.8039, - "num_input_tokens_seen": 83534025, - "step": 3937 - }, - { - "epoch": 0.4735165033367402, - "grad_norm": 2.088015704959444, - "learning_rate": 2.267937204282807e-06, - "loss": 0.793, - "num_input_tokens_seen": 83550885, - "step": 3938 - }, - { - "epoch": 0.4736367462273793, - "grad_norm": 3.482601147055524, - "learning_rate": 2.2671652343120926e-06, - "loss": 0.7862, - "num_input_tokens_seen": 83571080, - "step": 3939 - }, - { - "epoch": 0.4737569891180184, - "grad_norm": 1.9590040524768826, - "learning_rate": 2.2663932238126236e-06, - "loss": 0.7954, - "num_input_tokens_seen": 83589360, - "step": 3940 - }, - { - "epoch": 0.4738772320086575, - "grad_norm": 2.177625535220625, - "learning_rate": 2.265621172901515e-06, - "loss": 0.7991, - "num_input_tokens_seen": 83612195, - "step": 3941 - }, - { - "epoch": 0.47399747489929656, - "grad_norm": 10.273132087407363, - "learning_rate": 2.264849081695885e-06, - "loss": 0.7148, - "num_input_tokens_seen": 83632910, - "step": 3942 - }, - { - "epoch": 0.47411771778993567, - "grad_norm": 2.40031148998413, - "learning_rate": 2.2640769503128606e-06, - "loss": 0.724, - "num_input_tokens_seen": 83651440, - "step": 3943 - }, - { - "epoch": 0.4742379606805748, - "grad_norm": 1.978315041758226, - "learning_rate": 2.2633047788695727e-06, - "loss": 0.8163, - "num_input_tokens_seen": 83671465, - "step": 3944 - }, - { - "epoch": 0.47435820357121383, - "grad_norm": 4.773291690080734, - "learning_rate": 2.262532567483159e-06, - "loss": 0.637, - "num_input_tokens_seen": 83689745, - "step": 3945 - }, - { - "epoch": 0.47447844646185294, - "grad_norm": 2.034375632627942, - "learning_rate": 2.2617603162707635e-06, - "loss": 0.8026, - "num_input_tokens_seen": 83709875, - "step": 3946 - }, - { - "epoch": 0.47459868935249205, - "grad_norm": 2.6294339345118707, - "learning_rate": 2.2609880253495363e-06, - "loss": 0.8101, - "num_input_tokens_seen": 83729230, - "step": 3947 - }, - { - "epoch": 0.4747189322431311, - "grad_norm": 2.0270839386020802, - "learning_rate": 2.2602156948366326e-06, - "loss": 0.861, - "num_input_tokens_seen": 83748125, - "step": 3948 - }, - { - "epoch": 0.4748391751337702, - "grad_norm": 2.282539378260951, - "learning_rate": 2.2594433248492157e-06, - "loss": 0.642, - "num_input_tokens_seen": 83766820, - "step": 3949 - }, - { - "epoch": 0.47495941802440933, - "grad_norm": 6.6982016438128404, - "learning_rate": 2.2586709155044527e-06, - "loss": 0.7893, - "num_input_tokens_seen": 83787140, - "step": 3950 - }, - { - "epoch": 0.4750796609150484, - "grad_norm": 1.758180981032041, - "learning_rate": 2.257898466919517e-06, - "loss": 0.7524, - "num_input_tokens_seen": 83807825, - "step": 3951 - }, - { - "epoch": 0.4751999038056875, - "grad_norm": 2.0325777523517807, - "learning_rate": 2.2571259792115887e-06, - "loss": 0.6643, - "num_input_tokens_seen": 83828765, - "step": 3952 - }, - { - "epoch": 0.4753201466963266, - "grad_norm": 1.9718263332115764, - "learning_rate": 2.2563534524978544e-06, - "loss": 0.7886, - "num_input_tokens_seen": 83845955, - "step": 3953 - }, - { - "epoch": 0.47544038958696566, - "grad_norm": 2.0720144051374256, - "learning_rate": 2.255580886895505e-06, - "loss": 0.7177, - "num_input_tokens_seen": 83867805, - "step": 3954 - }, - { - "epoch": 0.47556063247760477, - "grad_norm": 2.3896432691769074, - "learning_rate": 2.254808282521738e-06, - "loss": 0.724, - "num_input_tokens_seen": 83886275, - "step": 3955 - }, - { - "epoch": 0.4756808753682438, - "grad_norm": 1.8772941262218292, - "learning_rate": 2.2540356394937573e-06, - "loss": 0.804, - "num_input_tokens_seen": 83904695, - "step": 3956 - }, - { - "epoch": 0.47580111825888294, - "grad_norm": 2.68324119875583, - "learning_rate": 2.253262957928772e-06, - "loss": 0.8334, - "num_input_tokens_seen": 83921300, - "step": 3957 - }, - { - "epoch": 0.47592136114952205, - "grad_norm": 2.0472759145508306, - "learning_rate": 2.2524902379439976e-06, - "loss": 0.7181, - "num_input_tokens_seen": 83939690, - "step": 3958 - }, - { - "epoch": 0.4760416040401611, - "grad_norm": 0.7756131613398751, - "learning_rate": 2.251717479656655e-06, - "loss": 0.6541, - "num_input_tokens_seen": 84004205, - "step": 3959 - }, - { - "epoch": 0.4761618469308002, - "grad_norm": 1.8651759657432803, - "learning_rate": 2.2509446831839704e-06, - "loss": 0.755, - "num_input_tokens_seen": 84023365, - "step": 3960 - }, - { - "epoch": 0.4762820898214393, - "grad_norm": 7.514273013188539, - "learning_rate": 2.250171848643177e-06, - "loss": 0.8097, - "num_input_tokens_seen": 84040375, - "step": 3961 - }, - { - "epoch": 0.4764023327120784, - "grad_norm": 2.5101103493019044, - "learning_rate": 2.249398976151513e-06, - "loss": 0.8695, - "num_input_tokens_seen": 84057645, - "step": 3962 - }, - { - "epoch": 0.4765225756027175, - "grad_norm": 3.5166128883694507, - "learning_rate": 2.248626065826223e-06, - "loss": 0.7886, - "num_input_tokens_seen": 84075570, - "step": 3963 - }, - { - "epoch": 0.4766428184933566, - "grad_norm": 0.7760054816626786, - "learning_rate": 2.247853117784556e-06, - "loss": 0.6413, - "num_input_tokens_seen": 84136285, - "step": 3964 - }, - { - "epoch": 0.47676306138399566, - "grad_norm": 1.9389613937702037, - "learning_rate": 2.2470801321437686e-06, - "loss": 0.8467, - "num_input_tokens_seen": 84158360, - "step": 3965 - }, - { - "epoch": 0.47688330427463477, - "grad_norm": 3.4526233067465695, - "learning_rate": 2.246307109021121e-06, - "loss": 0.6928, - "num_input_tokens_seen": 84175485, - "step": 3966 - }, - { - "epoch": 0.4770035471652739, - "grad_norm": 1.9243364395237526, - "learning_rate": 2.2455340485338817e-06, - "loss": 0.8186, - "num_input_tokens_seen": 84192840, - "step": 3967 - }, - { - "epoch": 0.47712379005591293, - "grad_norm": 2.3013075010670123, - "learning_rate": 2.244760950799322e-06, - "loss": 0.6767, - "num_input_tokens_seen": 84210830, - "step": 3968 - }, - { - "epoch": 0.47724403294655204, - "grad_norm": 2.050949645339831, - "learning_rate": 2.2439878159347203e-06, - "loss": 0.7237, - "num_input_tokens_seen": 84229975, - "step": 3969 - }, - { - "epoch": 0.4773642758371911, - "grad_norm": 0.8479051670346899, - "learning_rate": 2.2432146440573612e-06, - "loss": 0.6274, - "num_input_tokens_seen": 84295655, - "step": 3970 - }, - { - "epoch": 0.4774845187278302, - "grad_norm": 1.8499564084366906, - "learning_rate": 2.242441435284534e-06, - "loss": 0.6591, - "num_input_tokens_seen": 84314250, - "step": 3971 - }, - { - "epoch": 0.4776047616184693, - "grad_norm": 2.4479667811541863, - "learning_rate": 2.2416681897335337e-06, - "loss": 0.85, - "num_input_tokens_seen": 84332120, - "step": 3972 - }, - { - "epoch": 0.4777250045091084, - "grad_norm": 2.389861872400696, - "learning_rate": 2.2408949075216616e-06, - "loss": 0.6658, - "num_input_tokens_seen": 84350920, - "step": 3973 - }, - { - "epoch": 0.4778452473997475, - "grad_norm": 2.154008097914997, - "learning_rate": 2.240121588766223e-06, - "loss": 0.6394, - "num_input_tokens_seen": 84370690, - "step": 3974 - }, - { - "epoch": 0.4779654902903866, - "grad_norm": 2.9392093162559845, - "learning_rate": 2.239348233584531e-06, - "loss": 0.7008, - "num_input_tokens_seen": 84391265, - "step": 3975 - }, - { - "epoch": 0.47808573318102565, - "grad_norm": 2.0706680432012634, - "learning_rate": 2.238574842093901e-06, - "loss": 0.8021, - "num_input_tokens_seen": 84410180, - "step": 3976 - }, - { - "epoch": 0.47820597607166476, - "grad_norm": 2.1677767253567906, - "learning_rate": 2.2378014144116583e-06, - "loss": 0.7262, - "num_input_tokens_seen": 84428710, - "step": 3977 - }, - { - "epoch": 0.4783262189623039, - "grad_norm": 2.055705001606301, - "learning_rate": 2.2370279506551295e-06, - "loss": 0.7907, - "num_input_tokens_seen": 84448010, - "step": 3978 - }, - { - "epoch": 0.47844646185294293, - "grad_norm": 0.9957046896111164, - "learning_rate": 2.2362544509416493e-06, - "loss": 0.7057, - "num_input_tokens_seen": 84499845, - "step": 3979 - }, - { - "epoch": 0.47856670474358204, - "grad_norm": 3.8507252777473995, - "learning_rate": 2.2354809153885572e-06, - "loss": 0.8246, - "num_input_tokens_seen": 84516635, - "step": 3980 - }, - { - "epoch": 0.47868694763422115, - "grad_norm": 1.9571372408487326, - "learning_rate": 2.234707344113197e-06, - "loss": 0.8171, - "num_input_tokens_seen": 84534450, - "step": 3981 - }, - { - "epoch": 0.4788071905248602, - "grad_norm": 1.7771968260322124, - "learning_rate": 2.233933737232919e-06, - "loss": 0.7737, - "num_input_tokens_seen": 84551950, - "step": 3982 - }, - { - "epoch": 0.4789274334154993, - "grad_norm": 2.02263000661705, - "learning_rate": 2.2331600948650793e-06, - "loss": 0.7777, - "num_input_tokens_seen": 84571815, - "step": 3983 - }, - { - "epoch": 0.4790476763061384, - "grad_norm": 1.6973146922126954, - "learning_rate": 2.2323864171270386e-06, - "loss": 0.7969, - "num_input_tokens_seen": 84592805, - "step": 3984 - }, - { - "epoch": 0.4791679191967775, - "grad_norm": 2.020238730697779, - "learning_rate": 2.231612704136164e-06, - "loss": 0.7202, - "num_input_tokens_seen": 84612895, - "step": 3985 - }, - { - "epoch": 0.4792881620874166, - "grad_norm": 2.171744126260886, - "learning_rate": 2.230838956009825e-06, - "loss": 0.7457, - "num_input_tokens_seen": 84628990, - "step": 3986 - }, - { - "epoch": 0.47940840497805565, - "grad_norm": 4.12476011126497, - "learning_rate": 2.2300651728654003e-06, - "loss": 0.7584, - "num_input_tokens_seen": 84643970, - "step": 3987 - }, - { - "epoch": 0.47952864786869476, - "grad_norm": 0.7654208974200336, - "learning_rate": 2.229291354820272e-06, - "loss": 0.6326, - "num_input_tokens_seen": 84704700, - "step": 3988 - }, - { - "epoch": 0.47964889075933387, - "grad_norm": 2.0509428912487686, - "learning_rate": 2.228517501991828e-06, - "loss": 0.7646, - "num_input_tokens_seen": 84723220, - "step": 3989 - }, - { - "epoch": 0.4797691336499729, - "grad_norm": 0.8928135228727033, - "learning_rate": 2.22774361449746e-06, - "loss": 0.6406, - "num_input_tokens_seen": 84779420, - "step": 3990 - }, - { - "epoch": 0.47988937654061203, - "grad_norm": 3.116934679164819, - "learning_rate": 2.2269696924545668e-06, - "loss": 0.7034, - "num_input_tokens_seen": 84796970, - "step": 3991 - }, - { - "epoch": 0.48000961943125114, - "grad_norm": 3.4014357969814744, - "learning_rate": 2.226195735980552e-06, - "loss": 0.7776, - "num_input_tokens_seen": 84813925, - "step": 3992 - }, - { - "epoch": 0.4801298623218902, - "grad_norm": 1.9242909249967262, - "learning_rate": 2.225421745192823e-06, - "loss": 0.737, - "num_input_tokens_seen": 84833800, - "step": 3993 - }, - { - "epoch": 0.4802501052125293, - "grad_norm": 2.5393244345059065, - "learning_rate": 2.2246477202087955e-06, - "loss": 0.7824, - "num_input_tokens_seen": 84854200, - "step": 3994 - }, - { - "epoch": 0.4803703481031684, - "grad_norm": 1.7164779414116171, - "learning_rate": 2.2238736611458875e-06, - "loss": 0.8264, - "num_input_tokens_seen": 84873975, - "step": 3995 - }, - { - "epoch": 0.4804905909938075, - "grad_norm": 1.6664675581919821, - "learning_rate": 2.2230995681215226e-06, - "loss": 0.7082, - "num_input_tokens_seen": 84893220, - "step": 3996 - }, - { - "epoch": 0.4806108338844466, - "grad_norm": 2.709812950415109, - "learning_rate": 2.2223254412531305e-06, - "loss": 0.7865, - "num_input_tokens_seen": 84910310, - "step": 3997 - }, - { - "epoch": 0.4807310767750857, - "grad_norm": 1.901966989291102, - "learning_rate": 2.2215512806581458e-06, - "loss": 0.8147, - "num_input_tokens_seen": 84929090, - "step": 3998 - }, - { - "epoch": 0.48085131966572475, - "grad_norm": 1.9683802677453521, - "learning_rate": 2.2207770864540085e-06, - "loss": 0.7336, - "num_input_tokens_seen": 84947785, - "step": 3999 - }, - { - "epoch": 0.48097156255636386, - "grad_norm": 2.193141402276056, - "learning_rate": 2.2200028587581617e-06, - "loss": 0.7184, - "num_input_tokens_seen": 84965495, - "step": 4000 - }, - { - "epoch": 0.481091805447003, - "grad_norm": 0.8616572911286942, - "learning_rate": 2.2192285976880573e-06, - "loss": 0.6231, - "num_input_tokens_seen": 85029470, - "step": 4001 - }, - { - "epoch": 0.48121204833764203, - "grad_norm": 2.1288868970971944, - "learning_rate": 2.2184543033611485e-06, - "loss": 0.7998, - "num_input_tokens_seen": 85050270, - "step": 4002 - }, - { - "epoch": 0.48133229122828114, - "grad_norm": 2.3965664991241313, - "learning_rate": 2.2176799758948957e-06, - "loss": 0.822, - "num_input_tokens_seen": 85070150, - "step": 4003 - }, - { - "epoch": 0.4814525341189202, - "grad_norm": 3.3311139437323876, - "learning_rate": 2.2169056154067635e-06, - "loss": 0.7221, - "num_input_tokens_seen": 85093790, - "step": 4004 - }, - { - "epoch": 0.4815727770095593, - "grad_norm": 2.7847100909440914, - "learning_rate": 2.216131222014222e-06, - "loss": 0.8181, - "num_input_tokens_seen": 85111585, - "step": 4005 - }, - { - "epoch": 0.4816930199001984, - "grad_norm": 2.330108599202827, - "learning_rate": 2.2153567958347455e-06, - "loss": 0.8027, - "num_input_tokens_seen": 85127515, - "step": 4006 - }, - { - "epoch": 0.48181326279083747, - "grad_norm": 3.725724356300552, - "learning_rate": 2.214582336985815e-06, - "loss": 0.7929, - "num_input_tokens_seen": 85145135, - "step": 4007 - }, - { - "epoch": 0.4819335056814766, - "grad_norm": 2.35969221135231, - "learning_rate": 2.213807845584914e-06, - "loss": 0.6532, - "num_input_tokens_seen": 85162850, - "step": 4008 - }, - { - "epoch": 0.4820537485721157, - "grad_norm": 2.1874587913433223, - "learning_rate": 2.213033321749533e-06, - "loss": 0.7905, - "num_input_tokens_seen": 85181740, - "step": 4009 - }, - { - "epoch": 0.48217399146275475, - "grad_norm": 4.524478683433816, - "learning_rate": 2.2122587655971665e-06, - "loss": 0.6708, - "num_input_tokens_seen": 85196405, - "step": 4010 - }, - { - "epoch": 0.48229423435339386, - "grad_norm": 1.7152819901044982, - "learning_rate": 2.211484177245314e-06, - "loss": 0.6353, - "num_input_tokens_seen": 85215715, - "step": 4011 - }, - { - "epoch": 0.48241447724403297, - "grad_norm": 2.3709313826938097, - "learning_rate": 2.21070955681148e-06, - "loss": 0.7279, - "num_input_tokens_seen": 85234540, - "step": 4012 - }, - { - "epoch": 0.482534720134672, - "grad_norm": 2.1165116851285486, - "learning_rate": 2.2099349044131736e-06, - "loss": 0.7685, - "num_input_tokens_seen": 85255865, - "step": 4013 - }, - { - "epoch": 0.48265496302531113, - "grad_norm": 2.1725383969700403, - "learning_rate": 2.2091602201679095e-06, - "loss": 0.7109, - "num_input_tokens_seen": 85275195, - "step": 4014 - }, - { - "epoch": 0.48277520591595025, - "grad_norm": 2.4907957430021748, - "learning_rate": 2.208385504193206e-06, - "loss": 0.8343, - "num_input_tokens_seen": 85292415, - "step": 4015 - }, - { - "epoch": 0.4828954488065893, - "grad_norm": 2.2364668413140265, - "learning_rate": 2.2076107566065873e-06, - "loss": 0.8091, - "num_input_tokens_seen": 85309920, - "step": 4016 - }, - { - "epoch": 0.4830156916972284, - "grad_norm": 2.8206617945332395, - "learning_rate": 2.206835977525582e-06, - "loss": 0.7474, - "num_input_tokens_seen": 85327950, - "step": 4017 - }, - { - "epoch": 0.48313593458786747, - "grad_norm": 3.7942707788886505, - "learning_rate": 2.206061167067723e-06, - "loss": 0.7761, - "num_input_tokens_seen": 85345780, - "step": 4018 - }, - { - "epoch": 0.4832561774785066, - "grad_norm": 2.543631195380871, - "learning_rate": 2.205286325350549e-06, - "loss": 0.7895, - "num_input_tokens_seen": 85364565, - "step": 4019 - }, - { - "epoch": 0.4833764203691457, - "grad_norm": 4.7489850007640815, - "learning_rate": 2.2045114524916025e-06, - "loss": 0.7224, - "num_input_tokens_seen": 85380910, - "step": 4020 - }, - { - "epoch": 0.48349666325978474, - "grad_norm": 1.9599063030864785, - "learning_rate": 2.2037365486084316e-06, - "loss": 0.7451, - "num_input_tokens_seen": 85403870, - "step": 4021 - }, - { - "epoch": 0.48361690615042385, - "grad_norm": 2.132908766041915, - "learning_rate": 2.202961613818588e-06, - "loss": 0.7728, - "num_input_tokens_seen": 85422590, - "step": 4022 - }, - { - "epoch": 0.48373714904106296, - "grad_norm": 2.2826361280887726, - "learning_rate": 2.202186648239629e-06, - "loss": 0.8243, - "num_input_tokens_seen": 85442245, - "step": 4023 - }, - { - "epoch": 0.483857391931702, - "grad_norm": 2.144311663967737, - "learning_rate": 2.2014116519891166e-06, - "loss": 0.7136, - "num_input_tokens_seen": 85463945, - "step": 4024 - }, - { - "epoch": 0.48397763482234113, - "grad_norm": 1.9919023560553297, - "learning_rate": 2.2006366251846167e-06, - "loss": 0.778, - "num_input_tokens_seen": 85484305, - "step": 4025 - }, - { - "epoch": 0.48409787771298024, - "grad_norm": 1.8952301187257234, - "learning_rate": 2.1998615679436997e-06, - "loss": 0.7452, - "num_input_tokens_seen": 85501565, - "step": 4026 - }, - { - "epoch": 0.4842181206036193, - "grad_norm": 4.178083829411189, - "learning_rate": 2.199086480383942e-06, - "loss": 0.7654, - "num_input_tokens_seen": 85520660, - "step": 4027 - }, - { - "epoch": 0.4843383634942584, - "grad_norm": 3.8547450751253147, - "learning_rate": 2.1983113626229234e-06, - "loss": 0.6644, - "num_input_tokens_seen": 85539630, - "step": 4028 - }, - { - "epoch": 0.4844586063848975, - "grad_norm": 2.1334829684527747, - "learning_rate": 2.1975362147782293e-06, - "loss": 0.7759, - "num_input_tokens_seen": 85558545, - "step": 4029 - }, - { - "epoch": 0.48457884927553657, - "grad_norm": 0.7764013679281967, - "learning_rate": 2.1967610369674476e-06, - "loss": 0.5684, - "num_input_tokens_seen": 85626230, - "step": 4030 - }, - { - "epoch": 0.4846990921661757, - "grad_norm": 3.4520457125485304, - "learning_rate": 2.1959858293081743e-06, - "loss": 0.7772, - "num_input_tokens_seen": 85645085, - "step": 4031 - }, - { - "epoch": 0.4848193350568148, - "grad_norm": 2.1264535195242686, - "learning_rate": 2.1952105919180056e-06, - "loss": 0.7602, - "num_input_tokens_seen": 85664060, - "step": 4032 - }, - { - "epoch": 0.48493957794745385, - "grad_norm": 2.6980222493951445, - "learning_rate": 2.1944353249145456e-06, - "loss": 0.6778, - "num_input_tokens_seen": 85682890, - "step": 4033 - }, - { - "epoch": 0.48505982083809296, - "grad_norm": 1.833618303271678, - "learning_rate": 2.193660028415401e-06, - "loss": 0.7427, - "num_input_tokens_seen": 85703390, - "step": 4034 - }, - { - "epoch": 0.485180063728732, - "grad_norm": 1.870785606655225, - "learning_rate": 2.192884702538185e-06, - "loss": 0.8149, - "num_input_tokens_seen": 85723715, - "step": 4035 - }, - { - "epoch": 0.4853003066193711, - "grad_norm": 1.9949015439646607, - "learning_rate": 2.1921093474005118e-06, - "loss": 0.8383, - "num_input_tokens_seen": 85743650, - "step": 4036 - }, - { - "epoch": 0.48542054951001024, - "grad_norm": 2.0294309289803536, - "learning_rate": 2.191333963120004e-06, - "loss": 0.7853, - "num_input_tokens_seen": 85762350, - "step": 4037 - }, - { - "epoch": 0.4855407924006493, - "grad_norm": 2.510524773136468, - "learning_rate": 2.190558549814286e-06, - "loss": 0.6941, - "num_input_tokens_seen": 85782230, - "step": 4038 - }, - { - "epoch": 0.4856610352912884, - "grad_norm": 2.5748970904994652, - "learning_rate": 2.1897831076009877e-06, - "loss": 0.7883, - "num_input_tokens_seen": 85801590, - "step": 4039 - }, - { - "epoch": 0.4857812781819275, - "grad_norm": 1.8775042628356595, - "learning_rate": 2.1890076365977426e-06, - "loss": 0.7979, - "num_input_tokens_seen": 85821135, - "step": 4040 - }, - { - "epoch": 0.48590152107256657, - "grad_norm": 0.8973110757229292, - "learning_rate": 2.188232136922189e-06, - "loss": 0.5613, - "num_input_tokens_seen": 85878975, - "step": 4041 - }, - { - "epoch": 0.4860217639632057, - "grad_norm": 1.9957205318414821, - "learning_rate": 2.1874566086919704e-06, - "loss": 0.7586, - "num_input_tokens_seen": 85897570, - "step": 4042 - }, - { - "epoch": 0.4861420068538448, - "grad_norm": 2.0863338870701327, - "learning_rate": 2.1866810520247334e-06, - "loss": 0.8718, - "num_input_tokens_seen": 85916160, - "step": 4043 - }, - { - "epoch": 0.48626224974448384, - "grad_norm": 2.509750142977052, - "learning_rate": 2.1859054670381285e-06, - "loss": 0.6482, - "num_input_tokens_seen": 85934785, - "step": 4044 - }, - { - "epoch": 0.48638249263512295, - "grad_norm": 1.7320036008282027, - "learning_rate": 2.1851298538498127e-06, - "loss": 0.7745, - "num_input_tokens_seen": 85954220, - "step": 4045 - }, - { - "epoch": 0.48650273552576206, - "grad_norm": 3.962868497273255, - "learning_rate": 2.1843542125774458e-06, - "loss": 0.7881, - "num_input_tokens_seen": 85974245, - "step": 4046 - }, - { - "epoch": 0.4866229784164011, - "grad_norm": 3.3559082693801114, - "learning_rate": 2.1835785433386907e-06, - "loss": 0.6286, - "num_input_tokens_seen": 85992780, - "step": 4047 - }, - { - "epoch": 0.48674322130704023, - "grad_norm": 2.1645281281371003, - "learning_rate": 2.182802846251216e-06, - "loss": 0.648, - "num_input_tokens_seen": 86012770, - "step": 4048 - }, - { - "epoch": 0.4868634641976793, - "grad_norm": 2.394288272597297, - "learning_rate": 2.182027121432696e-06, - "loss": 0.7224, - "num_input_tokens_seen": 86033115, - "step": 4049 - }, - { - "epoch": 0.4869837070883184, - "grad_norm": 3.705333413781921, - "learning_rate": 2.1812513690008054e-06, - "loss": 0.8106, - "num_input_tokens_seen": 86051955, - "step": 4050 - }, - { - "epoch": 0.4871039499789575, - "grad_norm": 2.896595064472955, - "learning_rate": 2.180475589073227e-06, - "loss": 0.7997, - "num_input_tokens_seen": 86069375, - "step": 4051 - }, - { - "epoch": 0.48722419286959656, - "grad_norm": 1.87295892736607, - "learning_rate": 2.1796997817676452e-06, - "loss": 0.7319, - "num_input_tokens_seen": 86090105, - "step": 4052 - }, - { - "epoch": 0.4873444357602357, - "grad_norm": 1.8030410483364006, - "learning_rate": 2.1789239472017494e-06, - "loss": 0.6706, - "num_input_tokens_seen": 86111475, - "step": 4053 - }, - { - "epoch": 0.4874646786508748, - "grad_norm": 2.805445121700547, - "learning_rate": 2.1781480854932326e-06, - "loss": 0.7227, - "num_input_tokens_seen": 86130960, - "step": 4054 - }, - { - "epoch": 0.48758492154151384, - "grad_norm": 2.1706092976400706, - "learning_rate": 2.1773721967597933e-06, - "loss": 0.7861, - "num_input_tokens_seen": 86149130, - "step": 4055 - }, - { - "epoch": 0.48770516443215295, - "grad_norm": 0.9057332053408153, - "learning_rate": 2.1765962811191322e-06, - "loss": 0.6298, - "num_input_tokens_seen": 86203315, - "step": 4056 - }, - { - "epoch": 0.48782540732279206, - "grad_norm": 0.9385802643009273, - "learning_rate": 2.175820338688956e-06, - "loss": 0.684, - "num_input_tokens_seen": 86265805, - "step": 4057 - }, - { - "epoch": 0.4879456502134311, - "grad_norm": 2.193576747209621, - "learning_rate": 2.175044369586974e-06, - "loss": 0.836, - "num_input_tokens_seen": 86281095, - "step": 4058 - }, - { - "epoch": 0.4880658931040702, - "grad_norm": 1.7538532861930534, - "learning_rate": 2.174268373930901e-06, - "loss": 0.8467, - "num_input_tokens_seen": 86298330, - "step": 4059 - }, - { - "epoch": 0.48818613599470934, - "grad_norm": 4.719760448090094, - "learning_rate": 2.1734923518384537e-06, - "loss": 0.7992, - "num_input_tokens_seen": 86314655, - "step": 4060 - }, - { - "epoch": 0.4883063788853484, - "grad_norm": 2.0171074911475873, - "learning_rate": 2.172716303427355e-06, - "loss": 0.8175, - "num_input_tokens_seen": 86332540, - "step": 4061 - }, - { - "epoch": 0.4884266217759875, - "grad_norm": 2.5119551309686465, - "learning_rate": 2.17194022881533e-06, - "loss": 0.7759, - "num_input_tokens_seen": 86348350, - "step": 4062 - }, - { - "epoch": 0.4885468646666266, - "grad_norm": 1.7473587416142473, - "learning_rate": 2.1711641281201092e-06, - "loss": 0.667, - "num_input_tokens_seen": 86368000, - "step": 4063 - }, - { - "epoch": 0.48866710755726567, - "grad_norm": 2.3072115942313136, - "learning_rate": 2.170388001459426e-06, - "loss": 0.7889, - "num_input_tokens_seen": 86385310, - "step": 4064 - }, - { - "epoch": 0.4887873504479048, - "grad_norm": 53.61829120016976, - "learning_rate": 2.1696118489510182e-06, - "loss": 0.7237, - "num_input_tokens_seen": 86405960, - "step": 4065 - }, - { - "epoch": 0.48890759333854383, - "grad_norm": 7.055357257253438, - "learning_rate": 2.168835670712628e-06, - "loss": 0.7189, - "num_input_tokens_seen": 86425300, - "step": 4066 - }, - { - "epoch": 0.48902783622918294, - "grad_norm": 2.1585127671406954, - "learning_rate": 2.168059466862001e-06, - "loss": 0.6948, - "num_input_tokens_seen": 86443170, - "step": 4067 - }, - { - "epoch": 0.48914807911982205, - "grad_norm": 2.407857100025039, - "learning_rate": 2.1672832375168867e-06, - "loss": 0.8107, - "num_input_tokens_seen": 86461165, - "step": 4068 - }, - { - "epoch": 0.4892683220104611, - "grad_norm": 2.043004033923676, - "learning_rate": 2.1665069827950383e-06, - "loss": 0.748, - "num_input_tokens_seen": 86478170, - "step": 4069 - }, - { - "epoch": 0.4893885649011002, - "grad_norm": 3.653390309101017, - "learning_rate": 2.1657307028142126e-06, - "loss": 0.8606, - "num_input_tokens_seen": 86495430, - "step": 4070 - }, - { - "epoch": 0.48950880779173933, - "grad_norm": 2.092922282495773, - "learning_rate": 2.164954397692171e-06, - "loss": 0.6683, - "num_input_tokens_seen": 86514575, - "step": 4071 - }, - { - "epoch": 0.4896290506823784, - "grad_norm": 1.101250165544609, - "learning_rate": 2.164178067546678e-06, - "loss": 0.8276, - "num_input_tokens_seen": 86573460, - "step": 4072 - }, - { - "epoch": 0.4897492935730175, - "grad_norm": 2.1594961789616023, - "learning_rate": 2.163401712495504e-06, - "loss": 0.9055, - "num_input_tokens_seen": 86590875, - "step": 4073 - }, - { - "epoch": 0.4898695364636566, - "grad_norm": 1.8216365870371665, - "learning_rate": 2.162625332656419e-06, - "loss": 0.7884, - "num_input_tokens_seen": 86609545, - "step": 4074 - }, - { - "epoch": 0.48998977935429566, - "grad_norm": 2.0084334242948776, - "learning_rate": 2.161848928147201e-06, - "loss": 0.769, - "num_input_tokens_seen": 86629535, - "step": 4075 - }, - { - "epoch": 0.4901100222449348, - "grad_norm": 2.162922578281108, - "learning_rate": 2.161072499085629e-06, - "loss": 0.807, - "num_input_tokens_seen": 86648250, - "step": 4076 - }, - { - "epoch": 0.4902302651355739, - "grad_norm": 2.3969922174836853, - "learning_rate": 2.160296045589487e-06, - "loss": 0.8224, - "num_input_tokens_seen": 86671430, - "step": 4077 - }, - { - "epoch": 0.49035050802621294, - "grad_norm": 1.9511942024447229, - "learning_rate": 2.159519567776562e-06, - "loss": 0.6905, - "num_input_tokens_seen": 86690800, - "step": 4078 - }, - { - "epoch": 0.49047075091685205, - "grad_norm": 2.7683633296270123, - "learning_rate": 2.158743065764646e-06, - "loss": 0.697, - "num_input_tokens_seen": 86703955, - "step": 4079 - }, - { - "epoch": 0.4905909938074911, - "grad_norm": 2.2170510855719585, - "learning_rate": 2.1579665396715326e-06, - "loss": 0.7772, - "num_input_tokens_seen": 86723315, - "step": 4080 - }, - { - "epoch": 0.4907112366981302, - "grad_norm": 3.814077948130425, - "learning_rate": 2.157189989615021e-06, - "loss": 0.6608, - "num_input_tokens_seen": 86741625, - "step": 4081 - }, - { - "epoch": 0.4908314795887693, - "grad_norm": 2.184306966192034, - "learning_rate": 2.156413415712913e-06, - "loss": 0.7445, - "num_input_tokens_seen": 86763395, - "step": 4082 - }, - { - "epoch": 0.4909517224794084, - "grad_norm": 1.970678818119009, - "learning_rate": 2.1556368180830144e-06, - "loss": 0.7709, - "num_input_tokens_seen": 86784485, - "step": 4083 - }, - { - "epoch": 0.4910719653700475, - "grad_norm": 2.44523123345503, - "learning_rate": 2.154860196843134e-06, - "loss": 0.8383, - "num_input_tokens_seen": 86803400, - "step": 4084 - }, - { - "epoch": 0.4911922082606866, - "grad_norm": 1.9484428319632539, - "learning_rate": 2.154083552111085e-06, - "loss": 0.7599, - "num_input_tokens_seen": 86822290, - "step": 4085 - }, - { - "epoch": 0.49131245115132566, - "grad_norm": 1.9132718309552519, - "learning_rate": 2.153306884004683e-06, - "loss": 0.8118, - "num_input_tokens_seen": 86842275, - "step": 4086 - }, - { - "epoch": 0.49143269404196477, - "grad_norm": 2.594276396172624, - "learning_rate": 2.152530192641749e-06, - "loss": 0.6065, - "num_input_tokens_seen": 86856905, - "step": 4087 - }, - { - "epoch": 0.4915529369326039, - "grad_norm": 1.8860865038356716, - "learning_rate": 2.1517534781401063e-06, - "loss": 0.7118, - "num_input_tokens_seen": 86874505, - "step": 4088 - }, - { - "epoch": 0.49167317982324293, - "grad_norm": 2.3161062018129237, - "learning_rate": 2.150976740617581e-06, - "loss": 0.687, - "num_input_tokens_seen": 86890785, - "step": 4089 - }, - { - "epoch": 0.49179342271388204, - "grad_norm": 2.3066504983486418, - "learning_rate": 2.1501999801920055e-06, - "loss": 0.7204, - "num_input_tokens_seen": 86909625, - "step": 4090 - }, - { - "epoch": 0.49191366560452116, - "grad_norm": 2.9919550686542973, - "learning_rate": 2.1494231969812114e-06, - "loss": 0.8104, - "num_input_tokens_seen": 86928335, - "step": 4091 - }, - { - "epoch": 0.4920339084951602, - "grad_norm": 2.4962750541457406, - "learning_rate": 2.1486463911030372e-06, - "loss": 0.8067, - "num_input_tokens_seen": 86948705, - "step": 4092 - }, - { - "epoch": 0.4921541513857993, - "grad_norm": 2.702063246045616, - "learning_rate": 2.147869562675324e-06, - "loss": 0.7367, - "num_input_tokens_seen": 86967395, - "step": 4093 - }, - { - "epoch": 0.49227439427643843, - "grad_norm": 15.30632331128531, - "learning_rate": 2.147092711815915e-06, - "loss": 0.7129, - "num_input_tokens_seen": 86986465, - "step": 4094 - }, - { - "epoch": 0.4923946371670775, - "grad_norm": 2.7398572650106554, - "learning_rate": 2.1463158386426593e-06, - "loss": 0.8588, - "num_input_tokens_seen": 87003995, - "step": 4095 - }, - { - "epoch": 0.4925148800577166, - "grad_norm": 8.738477506568747, - "learning_rate": 2.145538943273407e-06, - "loss": 0.7785, - "num_input_tokens_seen": 87023990, - "step": 4096 - }, - { - "epoch": 0.49263512294835565, - "grad_norm": 1.7592507678083682, - "learning_rate": 2.144762025826013e-06, - "loss": 0.7128, - "num_input_tokens_seen": 87042800, - "step": 4097 - }, - { - "epoch": 0.49275536583899476, - "grad_norm": 2.176938157652189, - "learning_rate": 2.143985086418334e-06, - "loss": 0.8634, - "num_input_tokens_seen": 87057700, - "step": 4098 - }, - { - "epoch": 0.4928756087296339, - "grad_norm": 1.8635329891005548, - "learning_rate": 2.1432081251682324e-06, - "loss": 0.7652, - "num_input_tokens_seen": 87077790, - "step": 4099 - }, - { - "epoch": 0.49299585162027293, - "grad_norm": 1.7899470456238205, - "learning_rate": 2.142431142193572e-06, - "loss": 0.8574, - "num_input_tokens_seen": 87095290, - "step": 4100 - }, - { - "epoch": 0.49311609451091204, - "grad_norm": 2.8560385404588695, - "learning_rate": 2.1416541376122207e-06, - "loss": 0.7173, - "num_input_tokens_seen": 87115190, - "step": 4101 - }, - { - "epoch": 0.49323633740155115, - "grad_norm": 2.022316662730115, - "learning_rate": 2.1408771115420488e-06, - "loss": 0.7236, - "num_input_tokens_seen": 87134770, - "step": 4102 - }, - { - "epoch": 0.4933565802921902, - "grad_norm": 2.304175189096094, - "learning_rate": 2.140100064100932e-06, - "loss": 0.6418, - "num_input_tokens_seen": 87150465, - "step": 4103 - }, - { - "epoch": 0.4934768231828293, - "grad_norm": 3.1159184717463106, - "learning_rate": 2.139322995406746e-06, - "loss": 0.7501, - "num_input_tokens_seen": 87167820, - "step": 4104 - }, - { - "epoch": 0.4935970660734684, - "grad_norm": 3.0053407868676048, - "learning_rate": 2.138545905577373e-06, - "loss": 0.7994, - "num_input_tokens_seen": 87185730, - "step": 4105 - }, - { - "epoch": 0.4937173089641075, - "grad_norm": 2.089165838391317, - "learning_rate": 2.137768794730696e-06, - "loss": 0.7363, - "num_input_tokens_seen": 87208900, - "step": 4106 - }, - { - "epoch": 0.4938375518547466, - "grad_norm": 2.557553193540661, - "learning_rate": 2.1369916629846026e-06, - "loss": 0.7975, - "num_input_tokens_seen": 87228370, - "step": 4107 - }, - { - "epoch": 0.4939577947453857, - "grad_norm": 2.054041484905405, - "learning_rate": 2.136214510456982e-06, - "loss": 0.7443, - "num_input_tokens_seen": 87246545, - "step": 4108 - }, - { - "epoch": 0.49407803763602476, - "grad_norm": 1.0363077091313688, - "learning_rate": 2.1354373372657296e-06, - "loss": 0.7052, - "num_input_tokens_seen": 87304705, - "step": 4109 - }, - { - "epoch": 0.49419828052666387, - "grad_norm": 1.8600743719006412, - "learning_rate": 2.13466014352874e-06, - "loss": 0.7018, - "num_input_tokens_seen": 87326695, - "step": 4110 - }, - { - "epoch": 0.494318523417303, - "grad_norm": 1.9646913734794522, - "learning_rate": 2.1338829293639144e-06, - "loss": 0.7963, - "num_input_tokens_seen": 87346775, - "step": 4111 - }, - { - "epoch": 0.49443876630794203, - "grad_norm": 2.6443894618127706, - "learning_rate": 2.1331056948891547e-06, - "loss": 0.8261, - "num_input_tokens_seen": 87363595, - "step": 4112 - }, - { - "epoch": 0.49455900919858115, - "grad_norm": 2.6113874791891223, - "learning_rate": 2.1323284402223666e-06, - "loss": 0.7587, - "num_input_tokens_seen": 87379305, - "step": 4113 - }, - { - "epoch": 0.4946792520892202, - "grad_norm": 2.227278235725536, - "learning_rate": 2.1315511654814597e-06, - "loss": 0.8718, - "num_input_tokens_seen": 87397435, - "step": 4114 - }, - { - "epoch": 0.4947994949798593, - "grad_norm": 1.7866041715286072, - "learning_rate": 2.1307738707843456e-06, - "loss": 0.7755, - "num_input_tokens_seen": 87416820, - "step": 4115 - }, - { - "epoch": 0.4949197378704984, - "grad_norm": 2.8598250254887145, - "learning_rate": 2.1299965562489385e-06, - "loss": 0.6876, - "num_input_tokens_seen": 87436345, - "step": 4116 - }, - { - "epoch": 0.4950399807611375, - "grad_norm": 1.5440965830243054, - "learning_rate": 2.129219221993158e-06, - "loss": 0.785, - "num_input_tokens_seen": 87460850, - "step": 4117 - }, - { - "epoch": 0.4951602236517766, - "grad_norm": 0.8636435645482484, - "learning_rate": 2.128441868134924e-06, - "loss": 0.645, - "num_input_tokens_seen": 87522505, - "step": 4118 - }, - { - "epoch": 0.4952804665424157, - "grad_norm": 2.4562523951896575, - "learning_rate": 2.1276644947921606e-06, - "loss": 0.8261, - "num_input_tokens_seen": 87541140, - "step": 4119 - }, - { - "epoch": 0.49540070943305475, - "grad_norm": 2.1970344098413483, - "learning_rate": 2.126887102082795e-06, - "loss": 0.8229, - "num_input_tokens_seen": 87560885, - "step": 4120 - }, - { - "epoch": 0.49552095232369386, - "grad_norm": 5.336959057457993, - "learning_rate": 2.126109690124757e-06, - "loss": 0.6976, - "num_input_tokens_seen": 87581420, - "step": 4121 - }, - { - "epoch": 0.495641195214333, - "grad_norm": 1.939125325047874, - "learning_rate": 2.1253322590359786e-06, - "loss": 0.7107, - "num_input_tokens_seen": 87600475, - "step": 4122 - }, - { - "epoch": 0.49576143810497203, - "grad_norm": 2.6349657660950188, - "learning_rate": 2.124554808934397e-06, - "loss": 0.7356, - "num_input_tokens_seen": 87620775, - "step": 4123 - }, - { - "epoch": 0.49588168099561114, - "grad_norm": 2.454464280317344, - "learning_rate": 2.123777339937949e-06, - "loss": 0.7294, - "num_input_tokens_seen": 87641460, - "step": 4124 - }, - { - "epoch": 0.49600192388625025, - "grad_norm": 1.9384378019179296, - "learning_rate": 2.122999852164578e-06, - "loss": 0.866, - "num_input_tokens_seen": 87661800, - "step": 4125 - }, - { - "epoch": 0.4961221667768893, - "grad_norm": 2.4404306942510847, - "learning_rate": 2.122222345732227e-06, - "loss": 0.5704, - "num_input_tokens_seen": 87681435, - "step": 4126 - }, - { - "epoch": 0.4962424096675284, - "grad_norm": 2.6704386941591354, - "learning_rate": 2.1214448207588434e-06, - "loss": 0.828, - "num_input_tokens_seen": 87699795, - "step": 4127 - }, - { - "epoch": 0.49636265255816747, - "grad_norm": 3.7554765886963457, - "learning_rate": 2.120667277362376e-06, - "loss": 0.7695, - "num_input_tokens_seen": 87718230, - "step": 4128 - }, - { - "epoch": 0.4964828954488066, - "grad_norm": 2.3844792465803053, - "learning_rate": 2.1198897156607796e-06, - "loss": 0.8418, - "num_input_tokens_seen": 87735305, - "step": 4129 - }, - { - "epoch": 0.4966031383394457, - "grad_norm": 4.067079400434869, - "learning_rate": 2.119112135772008e-06, - "loss": 0.7343, - "num_input_tokens_seen": 87753085, - "step": 4130 - }, - { - "epoch": 0.49672338123008475, - "grad_norm": 1.8424606088018047, - "learning_rate": 2.1183345378140206e-06, - "loss": 0.7425, - "num_input_tokens_seen": 87772550, - "step": 4131 - }, - { - "epoch": 0.49684362412072386, - "grad_norm": 0.982729697096362, - "learning_rate": 2.117556921904778e-06, - "loss": 0.6602, - "num_input_tokens_seen": 87833710, - "step": 4132 - }, - { - "epoch": 0.49696386701136297, - "grad_norm": 6.820503470932465, - "learning_rate": 2.1167792881622437e-06, - "loss": 0.7222, - "num_input_tokens_seen": 87852450, - "step": 4133 - }, - { - "epoch": 0.497084109902002, - "grad_norm": 2.4952973125165623, - "learning_rate": 2.116001636704384e-06, - "loss": 0.8009, - "num_input_tokens_seen": 87872555, - "step": 4134 - }, - { - "epoch": 0.49720435279264114, - "grad_norm": 2.110032017168059, - "learning_rate": 2.1152239676491685e-06, - "loss": 0.8069, - "num_input_tokens_seen": 87890380, - "step": 4135 - }, - { - "epoch": 0.49732459568328025, - "grad_norm": 13.799250221120738, - "learning_rate": 2.1144462811145685e-06, - "loss": 0.7375, - "num_input_tokens_seen": 87909120, - "step": 4136 - }, - { - "epoch": 0.4974448385739193, - "grad_norm": 2.466884498108347, - "learning_rate": 2.1136685772185587e-06, - "loss": 0.7511, - "num_input_tokens_seen": 87927865, - "step": 4137 - }, - { - "epoch": 0.4975650814645584, - "grad_norm": 1.7264769270221016, - "learning_rate": 2.1128908560791163e-06, - "loss": 0.7722, - "num_input_tokens_seen": 87947415, - "step": 4138 - }, - { - "epoch": 0.4976853243551975, - "grad_norm": 3.3374382461582988, - "learning_rate": 2.1121131178142203e-06, - "loss": 0.7786, - "num_input_tokens_seen": 87966500, - "step": 4139 - }, - { - "epoch": 0.4978055672458366, - "grad_norm": 2.1306517597334236, - "learning_rate": 2.1113353625418544e-06, - "loss": 0.8156, - "num_input_tokens_seen": 87984770, - "step": 4140 - }, - { - "epoch": 0.4979258101364757, - "grad_norm": 2.5348808693605753, - "learning_rate": 2.1105575903800017e-06, - "loss": 0.7886, - "num_input_tokens_seen": 88003210, - "step": 4141 - }, - { - "epoch": 0.4980460530271148, - "grad_norm": 1.855583746941516, - "learning_rate": 2.1097798014466502e-06, - "loss": 0.846, - "num_input_tokens_seen": 88022530, - "step": 4142 - }, - { - "epoch": 0.49816629591775385, - "grad_norm": 2.489770694251267, - "learning_rate": 2.109001995859791e-06, - "loss": 0.5809, - "num_input_tokens_seen": 88041150, - "step": 4143 - }, - { - "epoch": 0.49828653880839296, - "grad_norm": 0.7857729028641107, - "learning_rate": 2.108224173737415e-06, - "loss": 0.6299, - "num_input_tokens_seen": 88104170, - "step": 4144 - }, - { - "epoch": 0.498406781699032, - "grad_norm": 3.8151541126502857, - "learning_rate": 2.1074463351975183e-06, - "loss": 0.7579, - "num_input_tokens_seen": 88122775, - "step": 4145 - }, - { - "epoch": 0.49852702458967113, - "grad_norm": 2.2004693243475564, - "learning_rate": 2.1066684803580977e-06, - "loss": 0.7108, - "num_input_tokens_seen": 88142720, - "step": 4146 - }, - { - "epoch": 0.49864726748031024, - "grad_norm": 1.591730928756832, - "learning_rate": 2.1058906093371536e-06, - "loss": 0.7049, - "num_input_tokens_seen": 88160955, - "step": 4147 - }, - { - "epoch": 0.4987675103709493, - "grad_norm": 0.7027569594949499, - "learning_rate": 2.1051127222526883e-06, - "loss": 0.645, - "num_input_tokens_seen": 88232790, - "step": 4148 - }, - { - "epoch": 0.4988877532615884, - "grad_norm": 1.8119690419736563, - "learning_rate": 2.104334819222707e-06, - "loss": 0.797, - "num_input_tokens_seen": 88252880, - "step": 4149 - }, - { - "epoch": 0.4990079961522275, - "grad_norm": 2.5851918849174464, - "learning_rate": 2.1035569003652156e-06, - "loss": 0.6268, - "num_input_tokens_seen": 88271230, - "step": 4150 - }, - { - "epoch": 0.4991282390428666, - "grad_norm": 2.307667873597635, - "learning_rate": 2.1027789657982255e-06, - "loss": 0.813, - "num_input_tokens_seen": 88285165, - "step": 4151 - }, - { - "epoch": 0.4992484819335057, - "grad_norm": 1.9705379995932804, - "learning_rate": 2.102001015639748e-06, - "loss": 0.768, - "num_input_tokens_seen": 88302105, - "step": 4152 - }, - { - "epoch": 0.4993687248241448, - "grad_norm": 1.9656643552084356, - "learning_rate": 2.101223050007797e-06, - "loss": 0.7752, - "num_input_tokens_seen": 88320375, - "step": 4153 - }, - { - "epoch": 0.49948896771478385, - "grad_norm": 0.8792469235928111, - "learning_rate": 2.10044506902039e-06, - "loss": 0.5815, - "num_input_tokens_seen": 88376175, - "step": 4154 - }, - { - "epoch": 0.49960921060542296, - "grad_norm": 1.0007593181860748, - "learning_rate": 2.099667072795546e-06, - "loss": 0.7138, - "num_input_tokens_seen": 88438015, - "step": 4155 - }, - { - "epoch": 0.49972945349606207, - "grad_norm": 2.3208804633713, - "learning_rate": 2.0988890614512864e-06, - "loss": 0.7933, - "num_input_tokens_seen": 88457625, - "step": 4156 - }, - { - "epoch": 0.4998496963867011, - "grad_norm": 2.0836713806088243, - "learning_rate": 2.098111035105635e-06, - "loss": 0.8279, - "num_input_tokens_seen": 88475770, - "step": 4157 - }, - { - "epoch": 0.49996993927734024, - "grad_norm": 2.190331275261078, - "learning_rate": 2.0973329938766172e-06, - "loss": 0.7283, - "num_input_tokens_seen": 88492920, - "step": 4158 - }, - { - "epoch": 0.5000901821679793, - "grad_norm": 3.094957335929545, - "learning_rate": 2.0965549378822618e-06, - "loss": 0.7854, - "num_input_tokens_seen": 88513930, - "step": 4159 - }, - { - "epoch": 0.5002104250586185, - "grad_norm": 3.703561861804555, - "learning_rate": 2.095776867240599e-06, - "loss": 0.8331, - "num_input_tokens_seen": 88530640, - "step": 4160 - }, - { - "epoch": 0.5003306679492575, - "grad_norm": 3.85246810192429, - "learning_rate": 2.094998782069661e-06, - "loss": 0.8244, - "num_input_tokens_seen": 88548065, - "step": 4161 - }, - { - "epoch": 0.5004509108398966, - "grad_norm": 2.2560065038443073, - "learning_rate": 2.0942206824874845e-06, - "loss": 0.743, - "num_input_tokens_seen": 88570560, - "step": 4162 - }, - { - "epoch": 0.5005711537305357, - "grad_norm": 5.897810970123794, - "learning_rate": 2.093442568612105e-06, - "loss": 0.7934, - "num_input_tokens_seen": 88588085, - "step": 4163 - }, - { - "epoch": 0.5006913966211748, - "grad_norm": 2.8639942633022284, - "learning_rate": 2.0926644405615613e-06, - "loss": 0.8428, - "num_input_tokens_seen": 88608705, - "step": 4164 - }, - { - "epoch": 0.5008116395118138, - "grad_norm": 2.233737780371001, - "learning_rate": 2.091886298453897e-06, - "loss": 0.8155, - "num_input_tokens_seen": 88626610, - "step": 4165 - }, - { - "epoch": 0.500931882402453, - "grad_norm": 3.3045622628933837, - "learning_rate": 2.091108142407153e-06, - "loss": 0.7345, - "num_input_tokens_seen": 88645070, - "step": 4166 - }, - { - "epoch": 0.5010521252930921, - "grad_norm": 0.9508381683313963, - "learning_rate": 2.090329972539377e-06, - "loss": 0.6933, - "num_input_tokens_seen": 88703355, - "step": 4167 - }, - { - "epoch": 0.5011723681837311, - "grad_norm": 2.559090510837635, - "learning_rate": 2.0895517889686155e-06, - "loss": 0.6781, - "num_input_tokens_seen": 88721040, - "step": 4168 - }, - { - "epoch": 0.5012926110743702, - "grad_norm": 0.9205286407463413, - "learning_rate": 2.0887735918129194e-06, - "loss": 0.6486, - "num_input_tokens_seen": 88777325, - "step": 4169 - }, - { - "epoch": 0.5014128539650093, - "grad_norm": 11.974238113996211, - "learning_rate": 2.0879953811903396e-06, - "loss": 0.849, - "num_input_tokens_seen": 88791930, - "step": 4170 - }, - { - "epoch": 0.5015330968556484, - "grad_norm": 5.20233103910194, - "learning_rate": 2.0872171572189305e-06, - "loss": 0.7786, - "num_input_tokens_seen": 88810975, - "step": 4171 - }, - { - "epoch": 0.5016533397462875, - "grad_norm": 2.587323056128286, - "learning_rate": 2.0864389200167477e-06, - "loss": 0.7649, - "num_input_tokens_seen": 88828950, - "step": 4172 - }, - { - "epoch": 0.5017735826369266, - "grad_norm": 3.5283126849474202, - "learning_rate": 2.0856606697018504e-06, - "loss": 0.7872, - "num_input_tokens_seen": 88846680, - "step": 4173 - }, - { - "epoch": 0.5018938255275657, - "grad_norm": 31.681178241362144, - "learning_rate": 2.0848824063922966e-06, - "loss": 0.732, - "num_input_tokens_seen": 88864360, - "step": 4174 - }, - { - "epoch": 0.5020140684182047, - "grad_norm": 5.457804274314919, - "learning_rate": 2.0841041302061496e-06, - "loss": 0.6969, - "num_input_tokens_seen": 88883540, - "step": 4175 - }, - { - "epoch": 0.5021343113088439, - "grad_norm": 2.8333727765424563, - "learning_rate": 2.0833258412614728e-06, - "loss": 0.748, - "num_input_tokens_seen": 88902320, - "step": 4176 - }, - { - "epoch": 0.502254554199483, - "grad_norm": 3.121180037803544, - "learning_rate": 2.0825475396763322e-06, - "loss": 0.6662, - "num_input_tokens_seen": 88922690, - "step": 4177 - }, - { - "epoch": 0.502374797090122, - "grad_norm": 1.7486369089352134, - "learning_rate": 2.081769225568796e-06, - "loss": 0.6466, - "num_input_tokens_seen": 88944860, - "step": 4178 - }, - { - "epoch": 0.5024950399807612, - "grad_norm": 2.129365363679905, - "learning_rate": 2.0809908990569327e-06, - "loss": 0.7573, - "num_input_tokens_seen": 88966360, - "step": 4179 - }, - { - "epoch": 0.5026152828714002, - "grad_norm": 4.08739389311478, - "learning_rate": 2.080212560258814e-06, - "loss": 0.7927, - "num_input_tokens_seen": 88985345, - "step": 4180 - }, - { - "epoch": 0.5027355257620393, - "grad_norm": 2.173552467331007, - "learning_rate": 2.0794342092925146e-06, - "loss": 0.6672, - "num_input_tokens_seen": 89006200, - "step": 4181 - }, - { - "epoch": 0.5028557686526784, - "grad_norm": 2.9070891497657763, - "learning_rate": 2.078655846276108e-06, - "loss": 0.6817, - "num_input_tokens_seen": 89026250, - "step": 4182 - }, - { - "epoch": 0.5029760115433175, - "grad_norm": 2.7557653643091298, - "learning_rate": 2.0778774713276727e-06, - "loss": 0.6739, - "num_input_tokens_seen": 89045445, - "step": 4183 - }, - { - "epoch": 0.5030962544339566, - "grad_norm": 2.4909336152406008, - "learning_rate": 2.077099084565287e-06, - "loss": 0.6604, - "num_input_tokens_seen": 89062570, - "step": 4184 - }, - { - "epoch": 0.5032164973245957, - "grad_norm": 3.236765329882337, - "learning_rate": 2.0763206861070313e-06, - "loss": 0.6406, - "num_input_tokens_seen": 89081350, - "step": 4185 - }, - { - "epoch": 0.5033367402152348, - "grad_norm": 3.128946858176071, - "learning_rate": 2.0755422760709876e-06, - "loss": 0.7448, - "num_input_tokens_seen": 89098470, - "step": 4186 - }, - { - "epoch": 0.5034569831058738, - "grad_norm": 6.150868167371143, - "learning_rate": 2.0747638545752417e-06, - "loss": 0.7603, - "num_input_tokens_seen": 89116750, - "step": 4187 - }, - { - "epoch": 0.503577225996513, - "grad_norm": 2.913340756215938, - "learning_rate": 2.073985421737878e-06, - "loss": 0.8221, - "num_input_tokens_seen": 89133780, - "step": 4188 - }, - { - "epoch": 0.5036974688871521, - "grad_norm": 6.559936612330904, - "learning_rate": 2.0732069776769844e-06, - "loss": 0.7446, - "num_input_tokens_seen": 89150910, - "step": 4189 - }, - { - "epoch": 0.5038177117777911, - "grad_norm": 2.754252862776433, - "learning_rate": 2.0724285225106505e-06, - "loss": 0.725, - "num_input_tokens_seen": 89167195, - "step": 4190 - }, - { - "epoch": 0.5039379546684303, - "grad_norm": 2.8177142561799164, - "learning_rate": 2.0716500563569677e-06, - "loss": 0.7598, - "num_input_tokens_seen": 89184455, - "step": 4191 - }, - { - "epoch": 0.5040581975590693, - "grad_norm": 3.155598690182525, - "learning_rate": 2.070871579334028e-06, - "loss": 0.7944, - "num_input_tokens_seen": 89203285, - "step": 4192 - }, - { - "epoch": 0.5041784404497084, - "grad_norm": 2.849103600292471, - "learning_rate": 2.070093091559927e-06, - "loss": 0.7204, - "num_input_tokens_seen": 89222735, - "step": 4193 - }, - { - "epoch": 0.5042986833403476, - "grad_norm": 2.374926093173233, - "learning_rate": 2.0693145931527583e-06, - "loss": 0.7771, - "num_input_tokens_seen": 89238935, - "step": 4194 - }, - { - "epoch": 0.5044189262309866, - "grad_norm": 3.5498692524042124, - "learning_rate": 2.068536084230622e-06, - "loss": 0.7797, - "num_input_tokens_seen": 89260520, - "step": 4195 - }, - { - "epoch": 0.5045391691216257, - "grad_norm": 3.1018330060739374, - "learning_rate": 2.0677575649116155e-06, - "loss": 0.8775, - "num_input_tokens_seen": 89278815, - "step": 4196 - }, - { - "epoch": 0.5046594120122648, - "grad_norm": 5.103455401470044, - "learning_rate": 2.0669790353138407e-06, - "loss": 0.9209, - "num_input_tokens_seen": 89297500, - "step": 4197 - }, - { - "epoch": 0.5047796549029039, - "grad_norm": 3.757458134993652, - "learning_rate": 2.066200495555399e-06, - "loss": 0.7203, - "num_input_tokens_seen": 89316920, - "step": 4198 - }, - { - "epoch": 0.5048998977935429, - "grad_norm": 2.7735056121167405, - "learning_rate": 2.065421945754396e-06, - "loss": 0.7561, - "num_input_tokens_seen": 89334370, - "step": 4199 - }, - { - "epoch": 0.505020140684182, - "grad_norm": 2.4737853612715717, - "learning_rate": 2.0646433860289344e-06, - "loss": 0.7731, - "num_input_tokens_seen": 89353015, - "step": 4200 - }, - { - "epoch": 0.5051403835748212, - "grad_norm": 2.4757781453171863, - "learning_rate": 2.0638648164971233e-06, - "loss": 0.8141, - "num_input_tokens_seen": 89371200, - "step": 4201 - }, - { - "epoch": 0.5052606264654602, - "grad_norm": 10.480835296454176, - "learning_rate": 2.0630862372770697e-06, - "loss": 0.8837, - "num_input_tokens_seen": 89391020, - "step": 4202 - }, - { - "epoch": 0.5053808693560993, - "grad_norm": 3.0684361374878746, - "learning_rate": 2.0623076484868846e-06, - "loss": 0.7495, - "num_input_tokens_seen": 89408195, - "step": 4203 - }, - { - "epoch": 0.5055011122467384, - "grad_norm": 0.9356533116447043, - "learning_rate": 2.061529050244679e-06, - "loss": 0.6841, - "num_input_tokens_seen": 89467660, - "step": 4204 - }, - { - "epoch": 0.5056213551373775, - "grad_norm": 2.8189475198886607, - "learning_rate": 2.060750442668565e-06, - "loss": 0.7507, - "num_input_tokens_seen": 89485135, - "step": 4205 - }, - { - "epoch": 0.5057415980280165, - "grad_norm": 3.7122716323904834, - "learning_rate": 2.059971825876657e-06, - "loss": 0.6385, - "num_input_tokens_seen": 89499365, - "step": 4206 - }, - { - "epoch": 0.5058618409186557, - "grad_norm": 2.4775335641735197, - "learning_rate": 2.0591931999870713e-06, - "loss": 0.7571, - "num_input_tokens_seen": 89518010, - "step": 4207 - }, - { - "epoch": 0.5059820838092948, - "grad_norm": 0.9226974977271406, - "learning_rate": 2.0584145651179234e-06, - "loss": 0.6363, - "num_input_tokens_seen": 89573440, - "step": 4208 - }, - { - "epoch": 0.5061023266999338, - "grad_norm": 2.992854677917813, - "learning_rate": 2.0576359213873327e-06, - "loss": 0.7768, - "num_input_tokens_seen": 89588310, - "step": 4209 - }, - { - "epoch": 0.506222569590573, - "grad_norm": 3.279759014846503, - "learning_rate": 2.056857268913419e-06, - "loss": 0.6963, - "num_input_tokens_seen": 89608080, - "step": 4210 - }, - { - "epoch": 0.506342812481212, - "grad_norm": 8.527151961052574, - "learning_rate": 2.056078607814303e-06, - "loss": 0.85, - "num_input_tokens_seen": 89623585, - "step": 4211 - }, - { - "epoch": 0.5064630553718511, - "grad_norm": 2.4326448439217234, - "learning_rate": 2.0552999382081054e-06, - "loss": 0.7922, - "num_input_tokens_seen": 89644295, - "step": 4212 - }, - { - "epoch": 0.5065832982624903, - "grad_norm": 1.855576288503193, - "learning_rate": 2.054521260212952e-06, - "loss": 0.8532, - "num_input_tokens_seen": 89663870, - "step": 4213 - }, - { - "epoch": 0.5067035411531293, - "grad_norm": 5.396526033370523, - "learning_rate": 2.0537425739469673e-06, - "loss": 0.6712, - "num_input_tokens_seen": 89682525, - "step": 4214 - }, - { - "epoch": 0.5068237840437684, - "grad_norm": 0.9435496088658525, - "learning_rate": 2.0529638795282763e-06, - "loss": 0.6457, - "num_input_tokens_seen": 89742115, - "step": 4215 - }, - { - "epoch": 0.5069440269344075, - "grad_norm": 2.4606522737107883, - "learning_rate": 2.052185177075007e-06, - "loss": 0.7545, - "num_input_tokens_seen": 89761405, - "step": 4216 - }, - { - "epoch": 0.5070642698250466, - "grad_norm": 1.8464865415622307, - "learning_rate": 2.051406466705288e-06, - "loss": 0.8219, - "num_input_tokens_seen": 89780665, - "step": 4217 - }, - { - "epoch": 0.5071845127156857, - "grad_norm": 2.5010700291381553, - "learning_rate": 2.050627748537248e-06, - "loss": 0.8018, - "num_input_tokens_seen": 89799210, - "step": 4218 - }, - { - "epoch": 0.5073047556063248, - "grad_norm": 2.6240284567193934, - "learning_rate": 2.04984902268902e-06, - "loss": 0.6622, - "num_input_tokens_seen": 89816130, - "step": 4219 - }, - { - "epoch": 0.5074249984969639, - "grad_norm": 2.696130012035538, - "learning_rate": 2.049070289278734e-06, - "loss": 0.7446, - "num_input_tokens_seen": 89834910, - "step": 4220 - }, - { - "epoch": 0.5075452413876029, - "grad_norm": 3.9335262363590084, - "learning_rate": 2.048291548424525e-06, - "loss": 0.6217, - "num_input_tokens_seen": 89856250, - "step": 4221 - }, - { - "epoch": 0.5076654842782421, - "grad_norm": 5.20491427694413, - "learning_rate": 2.047512800244526e-06, - "loss": 0.8326, - "num_input_tokens_seen": 89871235, - "step": 4222 - }, - { - "epoch": 0.5077857271688812, - "grad_norm": 2.5926388069727664, - "learning_rate": 2.046734044856873e-06, - "loss": 0.783, - "num_input_tokens_seen": 89890365, - "step": 4223 - }, - { - "epoch": 0.5079059700595202, - "grad_norm": 2.643388562606605, - "learning_rate": 2.0459552823797018e-06, - "loss": 0.7984, - "num_input_tokens_seen": 89908745, - "step": 4224 - }, - { - "epoch": 0.5080262129501594, - "grad_norm": 5.759068285360543, - "learning_rate": 2.045176512931152e-06, - "loss": 0.7588, - "num_input_tokens_seen": 89923095, - "step": 4225 - }, - { - "epoch": 0.5081464558407984, - "grad_norm": 2.440216555795652, - "learning_rate": 2.0443977366293604e-06, - "loss": 0.7588, - "num_input_tokens_seen": 89940855, - "step": 4226 - }, - { - "epoch": 0.5082666987314375, - "grad_norm": 3.3024203056878245, - "learning_rate": 2.043618953592468e-06, - "loss": 0.7714, - "num_input_tokens_seen": 89963030, - "step": 4227 - }, - { - "epoch": 0.5083869416220766, - "grad_norm": 2.4467352158198383, - "learning_rate": 2.0428401639386144e-06, - "loss": 0.8066, - "num_input_tokens_seen": 89983315, - "step": 4228 - }, - { - "epoch": 0.5085071845127157, - "grad_norm": 0.9282028657554723, - "learning_rate": 2.042061367785943e-06, - "loss": 0.6527, - "num_input_tokens_seen": 90036175, - "step": 4229 - }, - { - "epoch": 0.5086274274033548, - "grad_norm": 3.620002678549815, - "learning_rate": 2.041282565252594e-06, - "loss": 0.7437, - "num_input_tokens_seen": 90056060, - "step": 4230 - }, - { - "epoch": 0.5087476702939938, - "grad_norm": 2.1149812001619335, - "learning_rate": 2.040503756456714e-06, - "loss": 0.7584, - "num_input_tokens_seen": 90074990, - "step": 4231 - }, - { - "epoch": 0.508867913184633, - "grad_norm": 2.9484106170940403, - "learning_rate": 2.0397249415164456e-06, - "loss": 0.788, - "num_input_tokens_seen": 90092065, - "step": 4232 - }, - { - "epoch": 0.508988156075272, - "grad_norm": 2.042825478319309, - "learning_rate": 2.0389461205499354e-06, - "loss": 0.7995, - "num_input_tokens_seen": 90110920, - "step": 4233 - }, - { - "epoch": 0.5091083989659111, - "grad_norm": 2.5303272342809384, - "learning_rate": 2.0381672936753297e-06, - "loss": 0.7227, - "num_input_tokens_seen": 90128795, - "step": 4234 - }, - { - "epoch": 0.5092286418565503, - "grad_norm": 2.5000064404450244, - "learning_rate": 2.037388461010776e-06, - "loss": 0.7042, - "num_input_tokens_seen": 90148830, - "step": 4235 - }, - { - "epoch": 0.5093488847471893, - "grad_norm": 9.663893296080847, - "learning_rate": 2.0366096226744225e-06, - "loss": 0.6832, - "num_input_tokens_seen": 90163745, - "step": 4236 - }, - { - "epoch": 0.5094691276378284, - "grad_norm": 1.9980892402011279, - "learning_rate": 2.0358307787844183e-06, - "loss": 0.7706, - "num_input_tokens_seen": 90184140, - "step": 4237 - }, - { - "epoch": 0.5095893705284675, - "grad_norm": 3.3981728942634724, - "learning_rate": 2.0350519294589134e-06, - "loss": 0.7957, - "num_input_tokens_seen": 90201900, - "step": 4238 - }, - { - "epoch": 0.5097096134191066, - "grad_norm": 1.924500560688671, - "learning_rate": 2.0342730748160588e-06, - "loss": 0.8234, - "num_input_tokens_seen": 90222085, - "step": 4239 - }, - { - "epoch": 0.5098298563097456, - "grad_norm": 2.6739341027765677, - "learning_rate": 2.0334942149740054e-06, - "loss": 0.7038, - "num_input_tokens_seen": 90242950, - "step": 4240 - }, - { - "epoch": 0.5099500992003848, - "grad_norm": 2.389200320237709, - "learning_rate": 2.0327153500509067e-06, - "loss": 0.8296, - "num_input_tokens_seen": 90261695, - "step": 4241 - }, - { - "epoch": 0.5100703420910239, - "grad_norm": 2.0488100064388166, - "learning_rate": 2.0319364801649154e-06, - "loss": 0.8486, - "num_input_tokens_seen": 90279155, - "step": 4242 - }, - { - "epoch": 0.5101905849816629, - "grad_norm": 2.986931179423849, - "learning_rate": 2.031157605434186e-06, - "loss": 0.8018, - "num_input_tokens_seen": 90299490, - "step": 4243 - }, - { - "epoch": 0.5103108278723021, - "grad_norm": 1.9928428142399661, - "learning_rate": 2.0303787259768715e-06, - "loss": 0.6288, - "num_input_tokens_seen": 90320110, - "step": 4244 - }, - { - "epoch": 0.5104310707629411, - "grad_norm": 4.167819751351299, - "learning_rate": 2.0295998419111294e-06, - "loss": 0.6817, - "num_input_tokens_seen": 90337120, - "step": 4245 - }, - { - "epoch": 0.5105513136535802, - "grad_norm": 5.911066595048062, - "learning_rate": 2.0288209533551144e-06, - "loss": 0.7251, - "num_input_tokens_seen": 90354940, - "step": 4246 - }, - { - "epoch": 0.5106715565442194, - "grad_norm": 2.3408366349855956, - "learning_rate": 2.0280420604269834e-06, - "loss": 0.7798, - "num_input_tokens_seen": 90374200, - "step": 4247 - }, - { - "epoch": 0.5107917994348584, - "grad_norm": 0.7985233431917704, - "learning_rate": 2.0272631632448945e-06, - "loss": 0.6404, - "num_input_tokens_seen": 90443045, - "step": 4248 - }, - { - "epoch": 0.5109120423254975, - "grad_norm": 3.4141041193505712, - "learning_rate": 2.026484261927005e-06, - "loss": 0.7376, - "num_input_tokens_seen": 90462260, - "step": 4249 - }, - { - "epoch": 0.5110322852161366, - "grad_norm": 2.989711923922118, - "learning_rate": 2.025705356591475e-06, - "loss": 0.7239, - "num_input_tokens_seen": 90479670, - "step": 4250 - }, - { - "epoch": 0.5111525281067757, - "grad_norm": 0.8871605833744901, - "learning_rate": 2.024926447356462e-06, - "loss": 0.6188, - "num_input_tokens_seen": 90541675, - "step": 4251 - }, - { - "epoch": 0.5112727709974147, - "grad_norm": 2.1606374572066414, - "learning_rate": 2.024147534340127e-06, - "loss": 0.7842, - "num_input_tokens_seen": 90559255, - "step": 4252 - }, - { - "epoch": 0.5113930138880539, - "grad_norm": 2.477900914295781, - "learning_rate": 2.02336861766063e-06, - "loss": 0.7903, - "num_input_tokens_seen": 90578035, - "step": 4253 - }, - { - "epoch": 0.511513256778693, - "grad_norm": 2.143114257516258, - "learning_rate": 2.0225896974361327e-06, - "loss": 0.7778, - "num_input_tokens_seen": 90597860, - "step": 4254 - }, - { - "epoch": 0.511633499669332, - "grad_norm": 2.432274467029066, - "learning_rate": 2.0218107737847962e-06, - "loss": 0.656, - "num_input_tokens_seen": 90659625, - "step": 4255 - }, - { - "epoch": 0.5117537425599712, - "grad_norm": 2.2495354488202306, - "learning_rate": 2.021031846824782e-06, - "loss": 0.7588, - "num_input_tokens_seen": 90678315, - "step": 4256 - }, - { - "epoch": 0.5118739854506102, - "grad_norm": 2.194469637942036, - "learning_rate": 2.020252916674254e-06, - "loss": 0.8138, - "num_input_tokens_seen": 90697150, - "step": 4257 - }, - { - "epoch": 0.5119942283412493, - "grad_norm": 2.876658687226483, - "learning_rate": 2.019473983451375e-06, - "loss": 0.803, - "num_input_tokens_seen": 90715290, - "step": 4258 - }, - { - "epoch": 0.5121144712318885, - "grad_norm": 3.2773293239250374, - "learning_rate": 2.0186950472743076e-06, - "loss": 0.7005, - "num_input_tokens_seen": 90734915, - "step": 4259 - }, - { - "epoch": 0.5122347141225275, - "grad_norm": 1.9817477030496025, - "learning_rate": 2.0179161082612162e-06, - "loss": 0.7387, - "num_input_tokens_seen": 90754025, - "step": 4260 - }, - { - "epoch": 0.5123549570131666, - "grad_norm": 5.938833149544115, - "learning_rate": 2.017137166530266e-06, - "loss": 0.7218, - "num_input_tokens_seen": 90773280, - "step": 4261 - }, - { - "epoch": 0.5124751999038056, - "grad_norm": 2.609474253535179, - "learning_rate": 2.016358222199621e-06, - "loss": 0.8007, - "num_input_tokens_seen": 90791375, - "step": 4262 - }, - { - "epoch": 0.5125954427944448, - "grad_norm": 3.0604825711032912, - "learning_rate": 2.015579275387446e-06, - "loss": 0.678, - "num_input_tokens_seen": 90816415, - "step": 4263 - }, - { - "epoch": 0.5127156856850839, - "grad_norm": 3.2214540387017343, - "learning_rate": 2.0148003262119085e-06, - "loss": 0.6872, - "num_input_tokens_seen": 90837105, - "step": 4264 - }, - { - "epoch": 0.5128359285757229, - "grad_norm": 2.2730404991139816, - "learning_rate": 2.014021374791173e-06, - "loss": 0.7649, - "num_input_tokens_seen": 90855360, - "step": 4265 - }, - { - "epoch": 0.5129561714663621, - "grad_norm": 2.3979203686641064, - "learning_rate": 2.013242421243406e-06, - "loss": 0.7984, - "num_input_tokens_seen": 90873985, - "step": 4266 - }, - { - "epoch": 0.5130764143570011, - "grad_norm": 3.8914629187626013, - "learning_rate": 2.012463465686774e-06, - "loss": 0.7827, - "num_input_tokens_seen": 90893455, - "step": 4267 - }, - { - "epoch": 0.5131966572476402, - "grad_norm": 1.2787630849663625, - "learning_rate": 2.0116845082394442e-06, - "loss": 0.598, - "num_input_tokens_seen": 90958010, - "step": 4268 - }, - { - "epoch": 0.5133169001382794, - "grad_norm": 2.5066625973920234, - "learning_rate": 2.0109055490195836e-06, - "loss": 0.788, - "num_input_tokens_seen": 90976185, - "step": 4269 - }, - { - "epoch": 0.5134371430289184, - "grad_norm": 2.698562216743721, - "learning_rate": 2.01012658814536e-06, - "loss": 0.6388, - "num_input_tokens_seen": 90994380, - "step": 4270 - }, - { - "epoch": 0.5135573859195575, - "grad_norm": 2.446307156922511, - "learning_rate": 2.009347625734941e-06, - "loss": 0.7896, - "num_input_tokens_seen": 91014010, - "step": 4271 - }, - { - "epoch": 0.5136776288101966, - "grad_norm": 3.0333839865120917, - "learning_rate": 2.0085686619064954e-06, - "loss": 0.7554, - "num_input_tokens_seen": 91030170, - "step": 4272 - }, - { - "epoch": 0.5137978717008357, - "grad_norm": 5.422883352406179, - "learning_rate": 2.00778969677819e-06, - "loss": 0.831, - "num_input_tokens_seen": 91046925, - "step": 4273 - }, - { - "epoch": 0.5139181145914747, - "grad_norm": 3.0214315908554625, - "learning_rate": 2.0070107304681934e-06, - "loss": 0.6454, - "num_input_tokens_seen": 91065600, - "step": 4274 - }, - { - "epoch": 0.5140383574821139, - "grad_norm": 2.1202631617984693, - "learning_rate": 2.006231763094675e-06, - "loss": 0.7775, - "num_input_tokens_seen": 91086340, - "step": 4275 - }, - { - "epoch": 0.514158600372753, - "grad_norm": 3.3392471000212978, - "learning_rate": 2.0054527947758027e-06, - "loss": 0.8599, - "num_input_tokens_seen": 91104860, - "step": 4276 - }, - { - "epoch": 0.514278843263392, - "grad_norm": 0.8365510281458459, - "learning_rate": 2.004673825629746e-06, - "loss": 0.5912, - "num_input_tokens_seen": 91165360, - "step": 4277 - }, - { - "epoch": 0.5143990861540312, - "grad_norm": 2.1751455784980798, - "learning_rate": 2.003894855774674e-06, - "loss": 0.7132, - "num_input_tokens_seen": 91186935, - "step": 4278 - }, - { - "epoch": 0.5145193290446702, - "grad_norm": 3.086526452217173, - "learning_rate": 2.0031158853287554e-06, - "loss": 0.748, - "num_input_tokens_seen": 91207090, - "step": 4279 - }, - { - "epoch": 0.5146395719353093, - "grad_norm": 2.4887210848649204, - "learning_rate": 2.0023369144101593e-06, - "loss": 0.7176, - "num_input_tokens_seen": 91224980, - "step": 4280 - }, - { - "epoch": 0.5147598148259485, - "grad_norm": 2.4324375921213983, - "learning_rate": 2.0015579431370555e-06, - "loss": 0.7606, - "num_input_tokens_seen": 91246380, - "step": 4281 - }, - { - "epoch": 0.5148800577165875, - "grad_norm": 17.028705456928314, - "learning_rate": 2.000778971627612e-06, - "loss": 0.6981, - "num_input_tokens_seen": 91265565, - "step": 4282 - }, - { - "epoch": 0.5150003006072266, - "grad_norm": 2.840245832403917, - "learning_rate": 2e-06, - "loss": 0.8942, - "num_input_tokens_seen": 91282880, - "step": 4283 - }, - { - "epoch": 0.5151205434978657, - "grad_norm": 2.124973058379237, - "learning_rate": 1.9992210283723878e-06, - "loss": 0.8569, - "num_input_tokens_seen": 91299840, - "step": 4284 - }, - { - "epoch": 0.5152407863885048, - "grad_norm": 2.475802755275781, - "learning_rate": 1.998442056862945e-06, - "loss": 0.782, - "num_input_tokens_seen": 91322325, - "step": 4285 - }, - { - "epoch": 0.5153610292791438, - "grad_norm": 2.6236899410785215, - "learning_rate": 1.9976630855898405e-06, - "loss": 0.7784, - "num_input_tokens_seen": 91339800, - "step": 4286 - }, - { - "epoch": 0.515481272169783, - "grad_norm": 2.739997182162152, - "learning_rate": 1.996884114671245e-06, - "loss": 0.7381, - "num_input_tokens_seen": 91359135, - "step": 4287 - }, - { - "epoch": 0.5156015150604221, - "grad_norm": 1.5960066151988939, - "learning_rate": 1.9961051442253263e-06, - "loss": 0.7014, - "num_input_tokens_seen": 91379090, - "step": 4288 - }, - { - "epoch": 0.5157217579510611, - "grad_norm": 2.242619769685269, - "learning_rate": 1.9953261743702543e-06, - "loss": 0.7913, - "num_input_tokens_seen": 91397910, - "step": 4289 - }, - { - "epoch": 0.5158420008417003, - "grad_norm": 1.8925339847482041, - "learning_rate": 1.9945472052241967e-06, - "loss": 0.7203, - "num_input_tokens_seen": 91416535, - "step": 4290 - }, - { - "epoch": 0.5159622437323393, - "grad_norm": 3.823087621358085, - "learning_rate": 1.993768236905325e-06, - "loss": 0.6744, - "num_input_tokens_seen": 91436925, - "step": 4291 - }, - { - "epoch": 0.5160824866229784, - "grad_norm": 2.4781187606963093, - "learning_rate": 1.992989269531807e-06, - "loss": 0.6552, - "num_input_tokens_seen": 91455455, - "step": 4292 - }, - { - "epoch": 0.5162027295136175, - "grad_norm": 3.631057327550693, - "learning_rate": 1.9922103032218104e-06, - "loss": 0.6757, - "num_input_tokens_seen": 91471980, - "step": 4293 - }, - { - "epoch": 0.5163229724042566, - "grad_norm": 1.6061926751872557, - "learning_rate": 1.991431338093505e-06, - "loss": 0.8034, - "num_input_tokens_seen": 91494055, - "step": 4294 - }, - { - "epoch": 0.5164432152948957, - "grad_norm": 2.757170702023657, - "learning_rate": 1.9906523742650587e-06, - "loss": 0.7867, - "num_input_tokens_seen": 91512635, - "step": 4295 - }, - { - "epoch": 0.5165634581855347, - "grad_norm": 2.4268439601317127, - "learning_rate": 1.98987341185464e-06, - "loss": 0.7585, - "num_input_tokens_seen": 91532305, - "step": 4296 - }, - { - "epoch": 0.5166837010761739, - "grad_norm": 1.7062827603353805, - "learning_rate": 1.9890944509804166e-06, - "loss": 0.7998, - "num_input_tokens_seen": 91552125, - "step": 4297 - }, - { - "epoch": 0.516803943966813, - "grad_norm": 2.607260410747192, - "learning_rate": 1.9883154917605556e-06, - "loss": 0.7642, - "num_input_tokens_seen": 91571055, - "step": 4298 - }, - { - "epoch": 0.516924186857452, - "grad_norm": 2.708475364401952, - "learning_rate": 1.9875365343132262e-06, - "loss": 0.8188, - "num_input_tokens_seen": 91587895, - "step": 4299 - }, - { - "epoch": 0.5170444297480912, - "grad_norm": 2.667900023407321, - "learning_rate": 1.9867575787565946e-06, - "loss": 0.8405, - "num_input_tokens_seen": 91602275, - "step": 4300 - }, - { - "epoch": 0.5171646726387302, - "grad_norm": 4.990030954550193, - "learning_rate": 1.9859786252088275e-06, - "loss": 0.8577, - "num_input_tokens_seen": 91619175, - "step": 4301 - }, - { - "epoch": 0.5172849155293693, - "grad_norm": 3.047202311858612, - "learning_rate": 1.9851996737880914e-06, - "loss": 0.6628, - "num_input_tokens_seen": 91634080, - "step": 4302 - }, - { - "epoch": 0.5174051584200084, - "grad_norm": 3.739120074222469, - "learning_rate": 1.9844207246125537e-06, - "loss": 0.7443, - "num_input_tokens_seen": 91650380, - "step": 4303 - }, - { - "epoch": 0.5175254013106475, - "grad_norm": 2.4108688483548226, - "learning_rate": 1.9836417778003794e-06, - "loss": 0.6864, - "num_input_tokens_seen": 91672745, - "step": 4304 - }, - { - "epoch": 0.5176456442012866, - "grad_norm": 0.8169154214823664, - "learning_rate": 1.9828628334697347e-06, - "loss": 0.6249, - "num_input_tokens_seen": 91739675, - "step": 4305 - }, - { - "epoch": 0.5177658870919257, - "grad_norm": 0.8102509857012096, - "learning_rate": 1.9820838917387836e-06, - "loss": 0.597, - "num_input_tokens_seen": 91800265, - "step": 4306 - }, - { - "epoch": 0.5178861299825648, - "grad_norm": 1.6899432779489791, - "learning_rate": 1.9813049527256923e-06, - "loss": 0.8221, - "num_input_tokens_seen": 91820380, - "step": 4307 - }, - { - "epoch": 0.5180063728732038, - "grad_norm": 4.034563796011272, - "learning_rate": 1.9805260165486252e-06, - "loss": 0.8274, - "num_input_tokens_seen": 91839470, - "step": 4308 - }, - { - "epoch": 0.518126615763843, - "grad_norm": 2.2360450308254514, - "learning_rate": 1.979747083325746e-06, - "loss": 0.8644, - "num_input_tokens_seen": 91858890, - "step": 4309 - }, - { - "epoch": 0.5182468586544821, - "grad_norm": 2.7616577243437024, - "learning_rate": 1.9789681531752177e-06, - "loss": 0.7753, - "num_input_tokens_seen": 91878830, - "step": 4310 - }, - { - "epoch": 0.5183671015451211, - "grad_norm": 1.821260709369397, - "learning_rate": 1.978189226215204e-06, - "loss": 0.7255, - "num_input_tokens_seen": 91899095, - "step": 4311 - }, - { - "epoch": 0.5184873444357603, - "grad_norm": 2.9685653504103025, - "learning_rate": 1.9774103025638675e-06, - "loss": 0.766, - "num_input_tokens_seen": 91916940, - "step": 4312 - }, - { - "epoch": 0.5186075873263993, - "grad_norm": 1.6913024463278183, - "learning_rate": 1.97663138233937e-06, - "loss": 0.7578, - "num_input_tokens_seen": 91937525, - "step": 4313 - }, - { - "epoch": 0.5187278302170384, - "grad_norm": 2.991297168858125, - "learning_rate": 1.9758524656598724e-06, - "loss": 0.6839, - "num_input_tokens_seen": 91953225, - "step": 4314 - }, - { - "epoch": 0.5188480731076776, - "grad_norm": 21.56889296111843, - "learning_rate": 1.9750735526435377e-06, - "loss": 0.6998, - "num_input_tokens_seen": 91969890, - "step": 4315 - }, - { - "epoch": 0.5189683159983166, - "grad_norm": 3.460673895993, - "learning_rate": 1.974294643408525e-06, - "loss": 0.7953, - "num_input_tokens_seen": 91987405, - "step": 4316 - }, - { - "epoch": 0.5190885588889557, - "grad_norm": 2.409393181049484, - "learning_rate": 1.9735157380729947e-06, - "loss": 0.6662, - "num_input_tokens_seen": 92007535, - "step": 4317 - }, - { - "epoch": 0.5192088017795948, - "grad_norm": 2.8245472072811784, - "learning_rate": 1.9727368367551053e-06, - "loss": 0.8355, - "num_input_tokens_seen": 92025805, - "step": 4318 - }, - { - "epoch": 0.5193290446702339, - "grad_norm": 2.454149967834717, - "learning_rate": 1.9719579395730164e-06, - "loss": 0.6882, - "num_input_tokens_seen": 92044900, - "step": 4319 - }, - { - "epoch": 0.5194492875608729, - "grad_norm": 4.000719061143896, - "learning_rate": 1.971179046644886e-06, - "loss": 0.9226, - "num_input_tokens_seen": 92058640, - "step": 4320 - }, - { - "epoch": 0.5195695304515121, - "grad_norm": 2.4336888395977296, - "learning_rate": 1.970400158088871e-06, - "loss": 0.6992, - "num_input_tokens_seen": 92077100, - "step": 4321 - }, - { - "epoch": 0.5196897733421512, - "grad_norm": 2.7565022621310002, - "learning_rate": 1.969621274023128e-06, - "loss": 0.8674, - "num_input_tokens_seen": 92095470, - "step": 4322 - }, - { - "epoch": 0.5198100162327902, - "grad_norm": 2.8388527492490403, - "learning_rate": 1.968842394565814e-06, - "loss": 0.8194, - "num_input_tokens_seen": 92116055, - "step": 4323 - }, - { - "epoch": 0.5199302591234293, - "grad_norm": 8.03449850856374, - "learning_rate": 1.968063519835085e-06, - "loss": 0.7186, - "num_input_tokens_seen": 92135485, - "step": 4324 - }, - { - "epoch": 0.5200505020140684, - "grad_norm": 3.218019743700349, - "learning_rate": 1.9672846499490935e-06, - "loss": 0.7213, - "num_input_tokens_seen": 92154415, - "step": 4325 - }, - { - "epoch": 0.5201707449047075, - "grad_norm": 2.360547308356082, - "learning_rate": 1.966505785025994e-06, - "loss": 0.7157, - "num_input_tokens_seen": 92176040, - "step": 4326 - }, - { - "epoch": 0.5202909877953465, - "grad_norm": 3.221811797575669, - "learning_rate": 1.965726925183941e-06, - "loss": 0.7594, - "num_input_tokens_seen": 92198865, - "step": 4327 - }, - { - "epoch": 0.5204112306859857, - "grad_norm": 3.7145767317145304, - "learning_rate": 1.964948070541087e-06, - "loss": 0.84, - "num_input_tokens_seen": 92217245, - "step": 4328 - }, - { - "epoch": 0.5205314735766248, - "grad_norm": 2.8580420021938053, - "learning_rate": 1.964169221215582e-06, - "loss": 0.6834, - "num_input_tokens_seen": 92234730, - "step": 4329 - }, - { - "epoch": 0.5206517164672638, - "grad_norm": 2.5171657976586643, - "learning_rate": 1.9633903773255777e-06, - "loss": 0.7267, - "num_input_tokens_seen": 92256765, - "step": 4330 - }, - { - "epoch": 0.520771959357903, - "grad_norm": 4.164258559088051, - "learning_rate": 1.962611538989224e-06, - "loss": 0.7475, - "num_input_tokens_seen": 92277275, - "step": 4331 - }, - { - "epoch": 0.520892202248542, - "grad_norm": 2.3137798832037504, - "learning_rate": 1.9618327063246705e-06, - "loss": 0.8498, - "num_input_tokens_seen": 92296845, - "step": 4332 - }, - { - "epoch": 0.5210124451391811, - "grad_norm": 3.486670104636567, - "learning_rate": 1.961053879450065e-06, - "loss": 0.7809, - "num_input_tokens_seen": 92316115, - "step": 4333 - }, - { - "epoch": 0.5211326880298203, - "grad_norm": 0.8254579896791291, - "learning_rate": 1.960275058483554e-06, - "loss": 0.6308, - "num_input_tokens_seen": 92381770, - "step": 4334 - }, - { - "epoch": 0.5212529309204593, - "grad_norm": 2.538645826597001, - "learning_rate": 1.959496243543286e-06, - "loss": 0.8275, - "num_input_tokens_seen": 92399370, - "step": 4335 - }, - { - "epoch": 0.5213731738110984, - "grad_norm": 4.1577397829328, - "learning_rate": 1.9587174347474057e-06, - "loss": 0.7914, - "num_input_tokens_seen": 92415600, - "step": 4336 - }, - { - "epoch": 0.5214934167017375, - "grad_norm": 2.862591332216673, - "learning_rate": 1.957938632214058e-06, - "loss": 0.8078, - "num_input_tokens_seen": 92431000, - "step": 4337 - }, - { - "epoch": 0.5216136595923766, - "grad_norm": 2.2818172747199643, - "learning_rate": 1.9571598360613854e-06, - "loss": 0.8018, - "num_input_tokens_seen": 92453595, - "step": 4338 - }, - { - "epoch": 0.5217339024830157, - "grad_norm": 2.4460848874558736, - "learning_rate": 1.956381046407532e-06, - "loss": 0.695, - "num_input_tokens_seen": 92473610, - "step": 4339 - }, - { - "epoch": 0.5218541453736548, - "grad_norm": 2.339742394442145, - "learning_rate": 1.95560226337064e-06, - "loss": 0.8515, - "num_input_tokens_seen": 92492120, - "step": 4340 - }, - { - "epoch": 0.5219743882642939, - "grad_norm": 3.411939574288475, - "learning_rate": 1.9548234870688486e-06, - "loss": 0.7916, - "num_input_tokens_seen": 92512050, - "step": 4341 - }, - { - "epoch": 0.5220946311549329, - "grad_norm": 2.1472633873801192, - "learning_rate": 1.9540447176202976e-06, - "loss": 0.8067, - "num_input_tokens_seen": 92533015, - "step": 4342 - }, - { - "epoch": 0.5222148740455721, - "grad_norm": 0.8598588637250604, - "learning_rate": 1.9532659551431272e-06, - "loss": 0.6514, - "num_input_tokens_seen": 92599765, - "step": 4343 - }, - { - "epoch": 0.5223351169362112, - "grad_norm": 1.8888903945012134, - "learning_rate": 1.9524871997554744e-06, - "loss": 0.658, - "num_input_tokens_seen": 92627245, - "step": 4344 - }, - { - "epoch": 0.5224553598268502, - "grad_norm": 2.926895730972248, - "learning_rate": 1.951708451575475e-06, - "loss": 0.795, - "num_input_tokens_seen": 92644030, - "step": 4345 - }, - { - "epoch": 0.5225756027174894, - "grad_norm": 2.7329165232551715, - "learning_rate": 1.9509297107212657e-06, - "loss": 0.8102, - "num_input_tokens_seen": 92660520, - "step": 4346 - }, - { - "epoch": 0.5226958456081284, - "grad_norm": 5.009365425022103, - "learning_rate": 1.95015097731098e-06, - "loss": 0.7895, - "num_input_tokens_seen": 92679730, - "step": 4347 - }, - { - "epoch": 0.5228160884987675, - "grad_norm": 3.1052224467568603, - "learning_rate": 1.9493722514627516e-06, - "loss": 0.814, - "num_input_tokens_seen": 92696865, - "step": 4348 - }, - { - "epoch": 0.5229363313894067, - "grad_norm": 3.2363247045039794, - "learning_rate": 1.9485935332947124e-06, - "loss": 0.8208, - "num_input_tokens_seen": 92714495, - "step": 4349 - }, - { - "epoch": 0.5230565742800457, - "grad_norm": 6.421452198815501, - "learning_rate": 1.9478148229249926e-06, - "loss": 0.8319, - "num_input_tokens_seen": 92731725, - "step": 4350 - }, - { - "epoch": 0.5231768171706848, - "grad_norm": 6.96818827027789, - "learning_rate": 1.9470361204717236e-06, - "loss": 0.8216, - "num_input_tokens_seen": 92750585, - "step": 4351 - }, - { - "epoch": 0.5232970600613239, - "grad_norm": 1.9420980659554217, - "learning_rate": 1.9462574260530326e-06, - "loss": 0.8055, - "num_input_tokens_seen": 92770585, - "step": 4352 - }, - { - "epoch": 0.523417302951963, - "grad_norm": 4.144984391168081, - "learning_rate": 1.9454787397870477e-06, - "loss": 0.8025, - "num_input_tokens_seen": 92787625, - "step": 4353 - }, - { - "epoch": 0.523537545842602, - "grad_norm": 2.6038864724338526, - "learning_rate": 1.944700061791894e-06, - "loss": 0.7168, - "num_input_tokens_seen": 92805740, - "step": 4354 - }, - { - "epoch": 0.5236577887332411, - "grad_norm": 3.5043455093740294, - "learning_rate": 1.943921392185698e-06, - "loss": 0.6464, - "num_input_tokens_seen": 92824085, - "step": 4355 - }, - { - "epoch": 0.5237780316238803, - "grad_norm": 16.549584717435856, - "learning_rate": 1.9431427310865814e-06, - "loss": 0.7674, - "num_input_tokens_seen": 92843410, - "step": 4356 - }, - { - "epoch": 0.5238982745145193, - "grad_norm": 2.470064852139736, - "learning_rate": 1.9423640786126676e-06, - "loss": 0.7873, - "num_input_tokens_seen": 92861860, - "step": 4357 - }, - { - "epoch": 0.5240185174051584, - "grad_norm": 2.2567559949489535, - "learning_rate": 1.941585434882076e-06, - "loss": 0.7453, - "num_input_tokens_seen": 92881430, - "step": 4358 - }, - { - "epoch": 0.5241387602957975, - "grad_norm": 2.506739240107256, - "learning_rate": 1.940806800012929e-06, - "loss": 0.6662, - "num_input_tokens_seen": 92901220, - "step": 4359 - }, - { - "epoch": 0.5242590031864366, - "grad_norm": 1.9487721000759703, - "learning_rate": 1.940028174123343e-06, - "loss": 0.6378, - "num_input_tokens_seen": 92925830, - "step": 4360 - }, - { - "epoch": 0.5243792460770756, - "grad_norm": 0.7067978514468166, - "learning_rate": 1.939249557331435e-06, - "loss": 0.5567, - "num_input_tokens_seen": 92991365, - "step": 4361 - }, - { - "epoch": 0.5244994889677148, - "grad_norm": 3.1522807992849913, - "learning_rate": 1.938470949755321e-06, - "loss": 0.7216, - "num_input_tokens_seen": 93010965, - "step": 4362 - }, - { - "epoch": 0.5246197318583539, - "grad_norm": 0.9263953067355369, - "learning_rate": 1.937692351513115e-06, - "loss": 0.6222, - "num_input_tokens_seen": 93069680, - "step": 4363 - }, - { - "epoch": 0.5247399747489929, - "grad_norm": 4.631820319099404, - "learning_rate": 1.93691376272293e-06, - "loss": 0.7974, - "num_input_tokens_seen": 93087800, - "step": 4364 - }, - { - "epoch": 0.5248602176396321, - "grad_norm": 3.4619780138004383, - "learning_rate": 1.9361351835028773e-06, - "loss": 0.8767, - "num_input_tokens_seen": 93104820, - "step": 4365 - }, - { - "epoch": 0.5249804605302711, - "grad_norm": 4.573756916794191, - "learning_rate": 1.9353566139710654e-06, - "loss": 0.8068, - "num_input_tokens_seen": 93125200, - "step": 4366 - }, - { - "epoch": 0.5251007034209102, - "grad_norm": 2.4023745048422, - "learning_rate": 1.9345780542456043e-06, - "loss": 0.7671, - "num_input_tokens_seen": 93144295, - "step": 4367 - }, - { - "epoch": 0.5252209463115494, - "grad_norm": 2.7027041859486407, - "learning_rate": 1.933799504444601e-06, - "loss": 0.7223, - "num_input_tokens_seen": 93162855, - "step": 4368 - }, - { - "epoch": 0.5253411892021884, - "grad_norm": 2.9043143765723407, - "learning_rate": 1.93302096468616e-06, - "loss": 0.7912, - "num_input_tokens_seen": 93181725, - "step": 4369 - }, - { - "epoch": 0.5254614320928275, - "grad_norm": 3.1500876552118307, - "learning_rate": 1.9322424350883843e-06, - "loss": 0.7728, - "num_input_tokens_seen": 93203280, - "step": 4370 - }, - { - "epoch": 0.5255816749834666, - "grad_norm": 2.109027213606183, - "learning_rate": 1.9314639157693784e-06, - "loss": 0.7743, - "num_input_tokens_seen": 93223115, - "step": 4371 - }, - { - "epoch": 0.5257019178741057, - "grad_norm": 3.183577255607226, - "learning_rate": 1.930685406847242e-06, - "loss": 0.7327, - "num_input_tokens_seen": 93237410, - "step": 4372 - }, - { - "epoch": 0.5258221607647448, - "grad_norm": 10.76185720688949, - "learning_rate": 1.929906908440074e-06, - "loss": 0.8189, - "num_input_tokens_seen": 93257990, - "step": 4373 - }, - { - "epoch": 0.5259424036553839, - "grad_norm": 2.224820145878844, - "learning_rate": 1.9291284206659717e-06, - "loss": 0.6875, - "num_input_tokens_seen": 93275895, - "step": 4374 - }, - { - "epoch": 0.526062646546023, - "grad_norm": 2.8559530889567992, - "learning_rate": 1.9283499436430325e-06, - "loss": 0.7111, - "num_input_tokens_seen": 93294715, - "step": 4375 - }, - { - "epoch": 0.526182889436662, - "grad_norm": 3.751191388187443, - "learning_rate": 1.9275714774893497e-06, - "loss": 0.8274, - "num_input_tokens_seen": 93313890, - "step": 4376 - }, - { - "epoch": 0.5263031323273012, - "grad_norm": 2.9434411965998355, - "learning_rate": 1.926793022323016e-06, - "loss": 0.7354, - "num_input_tokens_seen": 93332085, - "step": 4377 - }, - { - "epoch": 0.5264233752179402, - "grad_norm": 2.7650948054815605, - "learning_rate": 1.926014578262122e-06, - "loss": 0.7879, - "num_input_tokens_seen": 93349585, - "step": 4378 - }, - { - "epoch": 0.5265436181085793, - "grad_norm": 1.9827055758628263, - "learning_rate": 1.925236145424758e-06, - "loss": 0.8736, - "num_input_tokens_seen": 93368125, - "step": 4379 - }, - { - "epoch": 0.5266638609992185, - "grad_norm": 0.726743589331589, - "learning_rate": 1.924457723929012e-06, - "loss": 0.6011, - "num_input_tokens_seen": 93438655, - "step": 4380 - }, - { - "epoch": 0.5267841038898575, - "grad_norm": 1.758951297155496, - "learning_rate": 1.923679313892969e-06, - "loss": 0.8214, - "num_input_tokens_seen": 93457645, - "step": 4381 - }, - { - "epoch": 0.5269043467804966, - "grad_norm": 6.167261646418311, - "learning_rate": 1.922900915434713e-06, - "loss": 0.8021, - "num_input_tokens_seen": 93474955, - "step": 4382 - }, - { - "epoch": 0.5270245896711357, - "grad_norm": 3.64843063781641, - "learning_rate": 1.922122528672327e-06, - "loss": 0.8064, - "num_input_tokens_seen": 93493340, - "step": 4383 - }, - { - "epoch": 0.5271448325617748, - "grad_norm": 7.284114148568296, - "learning_rate": 1.921344153723892e-06, - "loss": 0.781, - "num_input_tokens_seen": 93509935, - "step": 4384 - }, - { - "epoch": 0.5272650754524139, - "grad_norm": 0.9994149535033161, - "learning_rate": 1.9205657907074856e-06, - "loss": 0.6495, - "num_input_tokens_seen": 93575045, - "step": 4385 - }, - { - "epoch": 0.527385318343053, - "grad_norm": 4.1900659965811755, - "learning_rate": 1.9197874397411853e-06, - "loss": 0.6649, - "num_input_tokens_seen": 93591395, - "step": 4386 - }, - { - "epoch": 0.5275055612336921, - "grad_norm": 4.722446008605398, - "learning_rate": 1.919009100943067e-06, - "loss": 0.6554, - "num_input_tokens_seen": 93606805, - "step": 4387 - }, - { - "epoch": 0.5276258041243311, - "grad_norm": 4.956860703755058, - "learning_rate": 1.9182307744312043e-06, - "loss": 0.6569, - "num_input_tokens_seen": 93623630, - "step": 4388 - }, - { - "epoch": 0.5277460470149702, - "grad_norm": 2.694895329348522, - "learning_rate": 1.9174524603236676e-06, - "loss": 0.7595, - "num_input_tokens_seen": 93642300, - "step": 4389 - }, - { - "epoch": 0.5278662899056094, - "grad_norm": 3.474000544801508, - "learning_rate": 1.916674158738527e-06, - "loss": 0.7588, - "num_input_tokens_seen": 93660925, - "step": 4390 - }, - { - "epoch": 0.5279865327962484, - "grad_norm": 2.396671485024513, - "learning_rate": 1.9158958697938506e-06, - "loss": 0.6016, - "num_input_tokens_seen": 93679025, - "step": 4391 - }, - { - "epoch": 0.5281067756868875, - "grad_norm": 5.854960807129127, - "learning_rate": 1.9151175936077036e-06, - "loss": 0.8541, - "num_input_tokens_seen": 93693715, - "step": 4392 - }, - { - "epoch": 0.5282270185775266, - "grad_norm": 2.304777815690915, - "learning_rate": 1.9143393302981507e-06, - "loss": 0.7887, - "num_input_tokens_seen": 93711120, - "step": 4393 - }, - { - "epoch": 0.5283472614681657, - "grad_norm": 3.51146715120032, - "learning_rate": 1.9135610799832517e-06, - "loss": 0.8285, - "num_input_tokens_seen": 93729665, - "step": 4394 - }, - { - "epoch": 0.5284675043588047, - "grad_norm": 5.6267238045277, - "learning_rate": 1.9127828427810693e-06, - "loss": 0.749, - "num_input_tokens_seen": 93749950, - "step": 4395 - }, - { - "epoch": 0.5285877472494439, - "grad_norm": 2.4455011451216047, - "learning_rate": 1.9120046188096607e-06, - "loss": 0.8049, - "num_input_tokens_seen": 93767715, - "step": 4396 - }, - { - "epoch": 0.528707990140083, - "grad_norm": 4.489227879512186, - "learning_rate": 1.911226408187081e-06, - "loss": 0.746, - "num_input_tokens_seen": 93785825, - "step": 4397 - }, - { - "epoch": 0.528828233030722, - "grad_norm": 2.579181557626125, - "learning_rate": 1.9104482110313843e-06, - "loss": 0.7501, - "num_input_tokens_seen": 93805135, - "step": 4398 - }, - { - "epoch": 0.5289484759213612, - "grad_norm": 2.3864483014280924, - "learning_rate": 1.909670027460623e-06, - "loss": 0.7413, - "num_input_tokens_seen": 93822155, - "step": 4399 - }, - { - "epoch": 0.5290687188120002, - "grad_norm": 2.442530661179652, - "learning_rate": 1.908891857592847e-06, - "loss": 0.7153, - "num_input_tokens_seen": 93842945, - "step": 4400 - }, - { - "epoch": 0.5291889617026393, - "grad_norm": 2.5753357883674592, - "learning_rate": 1.9081137015461038e-06, - "loss": 0.8878, - "num_input_tokens_seen": 93858740, - "step": 4401 - }, - { - "epoch": 0.5293092045932785, - "grad_norm": 2.217216870881063, - "learning_rate": 1.9073355594384379e-06, - "loss": 0.8977, - "num_input_tokens_seen": 93876700, - "step": 4402 - }, - { - "epoch": 0.5294294474839175, - "grad_norm": 2.742685642382288, - "learning_rate": 1.906557431387895e-06, - "loss": 0.801, - "num_input_tokens_seen": 93895410, - "step": 4403 - }, - { - "epoch": 0.5295496903745566, - "grad_norm": 2.2487668565730914, - "learning_rate": 1.9057793175125156e-06, - "loss": 0.7741, - "num_input_tokens_seen": 93912675, - "step": 4404 - }, - { - "epoch": 0.5296699332651957, - "grad_norm": 2.796240715408067, - "learning_rate": 1.9050012179303385e-06, - "loss": 0.8027, - "num_input_tokens_seen": 93930905, - "step": 4405 - }, - { - "epoch": 0.5297901761558348, - "grad_norm": 3.2172355322152653, - "learning_rate": 1.904223132759401e-06, - "loss": 0.683, - "num_input_tokens_seen": 93949225, - "step": 4406 - }, - { - "epoch": 0.5299104190464738, - "grad_norm": 3.0226313565650127, - "learning_rate": 1.9034450621177383e-06, - "loss": 0.687, - "num_input_tokens_seen": 93967265, - "step": 4407 - }, - { - "epoch": 0.530030661937113, - "grad_norm": 3.0912232833601005, - "learning_rate": 1.902667006123383e-06, - "loss": 0.7052, - "num_input_tokens_seen": 93984420, - "step": 4408 - }, - { - "epoch": 0.5301509048277521, - "grad_norm": 4.621179612881732, - "learning_rate": 1.9018889648943655e-06, - "loss": 0.8048, - "num_input_tokens_seen": 94003180, - "step": 4409 - }, - { - "epoch": 0.5302711477183911, - "grad_norm": 8.082062097438557, - "learning_rate": 1.901110938548713e-06, - "loss": 0.6784, - "num_input_tokens_seen": 94024150, - "step": 4410 - }, - { - "epoch": 0.5303913906090303, - "grad_norm": 2.2548373898608003, - "learning_rate": 1.900332927204454e-06, - "loss": 0.6484, - "num_input_tokens_seen": 94042320, - "step": 4411 - }, - { - "epoch": 0.5305116334996693, - "grad_norm": 2.3480190408862742, - "learning_rate": 1.89955493097961e-06, - "loss": 0.7671, - "num_input_tokens_seen": 94061345, - "step": 4412 - }, - { - "epoch": 0.5306318763903084, - "grad_norm": 2.0355745790844555, - "learning_rate": 1.8987769499922032e-06, - "loss": 0.7568, - "num_input_tokens_seen": 94080035, - "step": 4413 - }, - { - "epoch": 0.5307521192809476, - "grad_norm": 2.6908883799514327, - "learning_rate": 1.897998984360252e-06, - "loss": 0.7043, - "num_input_tokens_seen": 94098725, - "step": 4414 - }, - { - "epoch": 0.5308723621715866, - "grad_norm": 3.629613308427711, - "learning_rate": 1.8972210342017746e-06, - "loss": 0.778, - "num_input_tokens_seen": 94122185, - "step": 4415 - }, - { - "epoch": 0.5309926050622257, - "grad_norm": 1.782050217652631, - "learning_rate": 1.8964430996347842e-06, - "loss": 0.6626, - "num_input_tokens_seen": 94143455, - "step": 4416 - }, - { - "epoch": 0.5311128479528648, - "grad_norm": 2.48351731557814, - "learning_rate": 1.8956651807772936e-06, - "loss": 0.821, - "num_input_tokens_seen": 94161210, - "step": 4417 - }, - { - "epoch": 0.5312330908435039, - "grad_norm": 2.3983382985017316, - "learning_rate": 1.8948872777473115e-06, - "loss": 0.829, - "num_input_tokens_seen": 94178885, - "step": 4418 - }, - { - "epoch": 0.531353333734143, - "grad_norm": 2.398346175339401, - "learning_rate": 1.8941093906628462e-06, - "loss": 0.6333, - "num_input_tokens_seen": 94196390, - "step": 4419 - }, - { - "epoch": 0.531473576624782, - "grad_norm": 2.5024207529338316, - "learning_rate": 1.8933315196419024e-06, - "loss": 0.7117, - "num_input_tokens_seen": 94218255, - "step": 4420 - }, - { - "epoch": 0.5315938195154212, - "grad_norm": 5.498818319451968, - "learning_rate": 1.892553664802482e-06, - "loss": 0.7395, - "num_input_tokens_seen": 94235395, - "step": 4421 - }, - { - "epoch": 0.5317140624060602, - "grad_norm": 17.04042722055294, - "learning_rate": 1.8917758262625845e-06, - "loss": 0.756, - "num_input_tokens_seen": 94255355, - "step": 4422 - }, - { - "epoch": 0.5318343052966993, - "grad_norm": 2.3259055292546487, - "learning_rate": 1.8909980041402089e-06, - "loss": 0.8066, - "num_input_tokens_seen": 94273670, - "step": 4423 - }, - { - "epoch": 0.5319545481873384, - "grad_norm": 5.70026526891228, - "learning_rate": 1.8902201985533494e-06, - "loss": 0.6591, - "num_input_tokens_seen": 94290655, - "step": 4424 - }, - { - "epoch": 0.5320747910779775, - "grad_norm": 2.995304165481853, - "learning_rate": 1.8894424096199987e-06, - "loss": 0.747, - "num_input_tokens_seen": 94309580, - "step": 4425 - }, - { - "epoch": 0.5321950339686166, - "grad_norm": 2.574047933770709, - "learning_rate": 1.8886646374581459e-06, - "loss": 0.8605, - "num_input_tokens_seen": 94328525, - "step": 4426 - }, - { - "epoch": 0.5323152768592557, - "grad_norm": 2.359365978602531, - "learning_rate": 1.8878868821857795e-06, - "loss": 0.6995, - "num_input_tokens_seen": 94347895, - "step": 4427 - }, - { - "epoch": 0.5324355197498948, - "grad_norm": 2.648724418021193, - "learning_rate": 1.8871091439208842e-06, - "loss": 0.741, - "num_input_tokens_seen": 94369225, - "step": 4428 - }, - { - "epoch": 0.5325557626405338, - "grad_norm": 5.05780777383036, - "learning_rate": 1.8863314227814418e-06, - "loss": 0.7712, - "num_input_tokens_seen": 94387255, - "step": 4429 - }, - { - "epoch": 0.532676005531173, - "grad_norm": 2.732734031889, - "learning_rate": 1.8855537188854313e-06, - "loss": 0.4789, - "num_input_tokens_seen": 94405950, - "step": 4430 - }, - { - "epoch": 0.5327962484218121, - "grad_norm": 2.3070939620803133, - "learning_rate": 1.8847760323508315e-06, - "loss": 0.7778, - "num_input_tokens_seen": 94424575, - "step": 4431 - }, - { - "epoch": 0.5329164913124511, - "grad_norm": 1.8927473150470764, - "learning_rate": 1.883998363295616e-06, - "loss": 0.7501, - "num_input_tokens_seen": 94441775, - "step": 4432 - }, - { - "epoch": 0.5330367342030903, - "grad_norm": 0.98667060284152, - "learning_rate": 1.8832207118377565e-06, - "loss": 0.6823, - "num_input_tokens_seen": 94496865, - "step": 4433 - }, - { - "epoch": 0.5331569770937293, - "grad_norm": 3.269748394264652, - "learning_rate": 1.882443078095222e-06, - "loss": 0.6892, - "num_input_tokens_seen": 94515465, - "step": 4434 - }, - { - "epoch": 0.5332772199843684, - "grad_norm": 0.8712139923144947, - "learning_rate": 1.8816654621859794e-06, - "loss": 0.6882, - "num_input_tokens_seen": 94574850, - "step": 4435 - }, - { - "epoch": 0.5333974628750076, - "grad_norm": 2.257311872312223, - "learning_rate": 1.880887864227992e-06, - "loss": 0.7193, - "num_input_tokens_seen": 94589975, - "step": 4436 - }, - { - "epoch": 0.5335177057656466, - "grad_norm": 3.0373034547459032, - "learning_rate": 1.8801102843392209e-06, - "loss": 0.6486, - "num_input_tokens_seen": 94609100, - "step": 4437 - }, - { - "epoch": 0.5336379486562857, - "grad_norm": 2.8821916088537316, - "learning_rate": 1.8793327226376234e-06, - "loss": 0.8491, - "num_input_tokens_seen": 94628140, - "step": 4438 - }, - { - "epoch": 0.5337581915469248, - "grad_norm": 2.35753029770418, - "learning_rate": 1.8785551792411569e-06, - "loss": 0.7969, - "num_input_tokens_seen": 94646870, - "step": 4439 - }, - { - "epoch": 0.5338784344375639, - "grad_norm": 2.438078951391267, - "learning_rate": 1.8777776542677733e-06, - "loss": 0.8262, - "num_input_tokens_seen": 94664640, - "step": 4440 - }, - { - "epoch": 0.5339986773282029, - "grad_norm": 3.295531280338399, - "learning_rate": 1.877000147835422e-06, - "loss": 0.7146, - "num_input_tokens_seen": 94684035, - "step": 4441 - }, - { - "epoch": 0.5341189202188421, - "grad_norm": 2.644824057408577, - "learning_rate": 1.8762226600620504e-06, - "loss": 0.8283, - "num_input_tokens_seen": 94702370, - "step": 4442 - }, - { - "epoch": 0.5342391631094812, - "grad_norm": 7.1359933724098035, - "learning_rate": 1.8754451910656031e-06, - "loss": 0.5976, - "num_input_tokens_seen": 94715990, - "step": 4443 - }, - { - "epoch": 0.5343594060001202, - "grad_norm": 2.000797199205094, - "learning_rate": 1.8746677409640212e-06, - "loss": 0.8187, - "num_input_tokens_seen": 94732810, - "step": 4444 - }, - { - "epoch": 0.5344796488907594, - "grad_norm": 2.2001629176456166, - "learning_rate": 1.8738903098752437e-06, - "loss": 0.8386, - "num_input_tokens_seen": 94751660, - "step": 4445 - }, - { - "epoch": 0.5345998917813984, - "grad_norm": 2.3222989271348458, - "learning_rate": 1.8731128979172048e-06, - "loss": 0.7332, - "num_input_tokens_seen": 94770580, - "step": 4446 - }, - { - "epoch": 0.5347201346720375, - "grad_norm": 2.2798175222388437, - "learning_rate": 1.8723355052078394e-06, - "loss": 0.6559, - "num_input_tokens_seen": 94790335, - "step": 4447 - }, - { - "epoch": 0.5348403775626767, - "grad_norm": 6.289137556229466, - "learning_rate": 1.871558131865076e-06, - "loss": 0.7708, - "num_input_tokens_seen": 94809110, - "step": 4448 - }, - { - "epoch": 0.5349606204533157, - "grad_norm": 4.297781075734802, - "learning_rate": 1.8707807780068429e-06, - "loss": 0.8131, - "num_input_tokens_seen": 94826645, - "step": 4449 - }, - { - "epoch": 0.5350808633439548, - "grad_norm": 2.273705193317834, - "learning_rate": 1.8700034437510611e-06, - "loss": 0.6569, - "num_input_tokens_seen": 94846460, - "step": 4450 - }, - { - "epoch": 0.5352011062345938, - "grad_norm": 3.877558780218437, - "learning_rate": 1.8692261292156549e-06, - "loss": 0.8002, - "num_input_tokens_seen": 94865415, - "step": 4451 - }, - { - "epoch": 0.535321349125233, - "grad_norm": 2.1554374582619804, - "learning_rate": 1.8684488345185405e-06, - "loss": 0.8179, - "num_input_tokens_seen": 94885310, - "step": 4452 - }, - { - "epoch": 0.535441592015872, - "grad_norm": 3.642572747656797, - "learning_rate": 1.8676715597776336e-06, - "loss": 0.7899, - "num_input_tokens_seen": 94903375, - "step": 4453 - }, - { - "epoch": 0.5355618349065111, - "grad_norm": 1.8366259682792494, - "learning_rate": 1.8668943051108455e-06, - "loss": 0.7571, - "num_input_tokens_seen": 94920400, - "step": 4454 - }, - { - "epoch": 0.5356820777971503, - "grad_norm": 2.4695849171586404, - "learning_rate": 1.8661170706360856e-06, - "loss": 0.7584, - "num_input_tokens_seen": 94939285, - "step": 4455 - }, - { - "epoch": 0.5358023206877893, - "grad_norm": 2.1278996738246057, - "learning_rate": 1.8653398564712598e-06, - "loss": 0.8146, - "num_input_tokens_seen": 94957950, - "step": 4456 - }, - { - "epoch": 0.5359225635784284, - "grad_norm": 1.6637854996793302, - "learning_rate": 1.8645626627342708e-06, - "loss": 0.8171, - "num_input_tokens_seen": 94978435, - "step": 4457 - }, - { - "epoch": 0.5360428064690675, - "grad_norm": 2.231268407188041, - "learning_rate": 1.8637854895430172e-06, - "loss": 0.7995, - "num_input_tokens_seen": 94997420, - "step": 4458 - }, - { - "epoch": 0.5361630493597066, - "grad_norm": 3.1239707106360344, - "learning_rate": 1.8630083370153974e-06, - "loss": 0.6809, - "num_input_tokens_seen": 95016780, - "step": 4459 - }, - { - "epoch": 0.5362832922503457, - "grad_norm": 0.8387809675883565, - "learning_rate": 1.8622312052693041e-06, - "loss": 0.5895, - "num_input_tokens_seen": 95077680, - "step": 4460 - }, - { - "epoch": 0.5364035351409848, - "grad_norm": 5.63156720194881, - "learning_rate": 1.861454094422627e-06, - "loss": 0.7131, - "num_input_tokens_seen": 95094070, - "step": 4461 - }, - { - "epoch": 0.5365237780316239, - "grad_norm": 5.824647863542392, - "learning_rate": 1.8606770045932537e-06, - "loss": 0.678, - "num_input_tokens_seen": 95112905, - "step": 4462 - }, - { - "epoch": 0.5366440209222629, - "grad_norm": 4.120999983008074, - "learning_rate": 1.8598999358990684e-06, - "loss": 0.8144, - "num_input_tokens_seen": 95132480, - "step": 4463 - }, - { - "epoch": 0.5367642638129021, - "grad_norm": 2.3812548353314806, - "learning_rate": 1.859122888457951e-06, - "loss": 0.7897, - "num_input_tokens_seen": 95150695, - "step": 4464 - }, - { - "epoch": 0.5368845067035412, - "grad_norm": 2.0256451081564997, - "learning_rate": 1.85834586238778e-06, - "loss": 0.8144, - "num_input_tokens_seen": 95169515, - "step": 4465 - }, - { - "epoch": 0.5370047495941802, - "grad_norm": 2.067079322522307, - "learning_rate": 1.8575688578064277e-06, - "loss": 0.738, - "num_input_tokens_seen": 95187360, - "step": 4466 - }, - { - "epoch": 0.5371249924848194, - "grad_norm": 2.1133976624849606, - "learning_rate": 1.8567918748317674e-06, - "loss": 0.7562, - "num_input_tokens_seen": 95206430, - "step": 4467 - }, - { - "epoch": 0.5372452353754584, - "grad_norm": 2.4447912198549564, - "learning_rate": 1.8560149135816659e-06, - "loss": 0.8199, - "num_input_tokens_seen": 95222985, - "step": 4468 - }, - { - "epoch": 0.5373654782660975, - "grad_norm": 2.563339531592118, - "learning_rate": 1.8552379741739877e-06, - "loss": 0.8421, - "num_input_tokens_seen": 95240050, - "step": 4469 - }, - { - "epoch": 0.5374857211567367, - "grad_norm": 0.9263359863004152, - "learning_rate": 1.854461056726593e-06, - "loss": 0.5806, - "num_input_tokens_seen": 95293710, - "step": 4470 - }, - { - "epoch": 0.5376059640473757, - "grad_norm": 3.269741620668846, - "learning_rate": 1.853684161357341e-06, - "loss": 0.838, - "num_input_tokens_seen": 95311090, - "step": 4471 - }, - { - "epoch": 0.5377262069380148, - "grad_norm": 3.3909582674245407, - "learning_rate": 1.852907288184085e-06, - "loss": 0.775, - "num_input_tokens_seen": 95329695, - "step": 4472 - }, - { - "epoch": 0.5378464498286539, - "grad_norm": 10.617191930527659, - "learning_rate": 1.8521304373246766e-06, - "loss": 0.7, - "num_input_tokens_seen": 95350460, - "step": 4473 - }, - { - "epoch": 0.537966692719293, - "grad_norm": 4.418473177654474, - "learning_rate": 1.8513536088969626e-06, - "loss": 0.8814, - "num_input_tokens_seen": 95367845, - "step": 4474 - }, - { - "epoch": 0.538086935609932, - "grad_norm": 2.629785559357314, - "learning_rate": 1.8505768030187884e-06, - "loss": 0.7901, - "num_input_tokens_seen": 95387695, - "step": 4475 - }, - { - "epoch": 0.5382071785005712, - "grad_norm": 1.9645438551795233, - "learning_rate": 1.849800019807995e-06, - "loss": 0.7924, - "num_input_tokens_seen": 95408640, - "step": 4476 - }, - { - "epoch": 0.5383274213912103, - "grad_norm": 2.3472180828043574, - "learning_rate": 1.8490232593824186e-06, - "loss": 0.7132, - "num_input_tokens_seen": 95424815, - "step": 4477 - }, - { - "epoch": 0.5384476642818493, - "grad_norm": 1.7984980452169808, - "learning_rate": 1.8482465218598935e-06, - "loss": 0.8371, - "num_input_tokens_seen": 95444480, - "step": 4478 - }, - { - "epoch": 0.5385679071724885, - "grad_norm": 1.960688534881271, - "learning_rate": 1.8474698073582508e-06, - "loss": 0.8346, - "num_input_tokens_seen": 95465570, - "step": 4479 - }, - { - "epoch": 0.5386881500631275, - "grad_norm": 2.440468062830047, - "learning_rate": 1.846693115995317e-06, - "loss": 0.8626, - "num_input_tokens_seen": 95481925, - "step": 4480 - }, - { - "epoch": 0.5388083929537666, - "grad_norm": 1.8743435857275081, - "learning_rate": 1.8459164478889158e-06, - "loss": 0.8332, - "num_input_tokens_seen": 95503040, - "step": 4481 - }, - { - "epoch": 0.5389286358444056, - "grad_norm": 2.1722445227544838, - "learning_rate": 1.8451398031568658e-06, - "loss": 0.7568, - "num_input_tokens_seen": 95522385, - "step": 4482 - }, - { - "epoch": 0.5390488787350448, - "grad_norm": 1.7483135591373813, - "learning_rate": 1.8443631819169856e-06, - "loss": 0.7363, - "num_input_tokens_seen": 95542830, - "step": 4483 - }, - { - "epoch": 0.5391691216256839, - "grad_norm": 3.1262200639516045, - "learning_rate": 1.8435865842870868e-06, - "loss": 0.8255, - "num_input_tokens_seen": 95560490, - "step": 4484 - }, - { - "epoch": 0.5392893645163229, - "grad_norm": 2.3602068511998233, - "learning_rate": 1.842810010384979e-06, - "loss": 0.716, - "num_input_tokens_seen": 95580005, - "step": 4485 - }, - { - "epoch": 0.5394096074069621, - "grad_norm": 2.444235480450681, - "learning_rate": 1.842033460328467e-06, - "loss": 0.7137, - "num_input_tokens_seen": 95598445, - "step": 4486 - }, - { - "epoch": 0.5395298502976011, - "grad_norm": 2.2407812315153977, - "learning_rate": 1.8412569342353541e-06, - "loss": 0.7481, - "num_input_tokens_seen": 95618320, - "step": 4487 - }, - { - "epoch": 0.5396500931882402, - "grad_norm": 2.3805927645388314, - "learning_rate": 1.840480432223438e-06, - "loss": 0.8446, - "num_input_tokens_seen": 95637045, - "step": 4488 - }, - { - "epoch": 0.5397703360788794, - "grad_norm": 2.6192858027677888, - "learning_rate": 1.8397039544105136e-06, - "loss": 0.7702, - "num_input_tokens_seen": 95655850, - "step": 4489 - }, - { - "epoch": 0.5398905789695184, - "grad_norm": 2.23756559739194, - "learning_rate": 1.8389275009143707e-06, - "loss": 0.6995, - "num_input_tokens_seen": 95675310, - "step": 4490 - }, - { - "epoch": 0.5400108218601575, - "grad_norm": 2.5963487295155923, - "learning_rate": 1.8381510718527988e-06, - "loss": 0.7292, - "num_input_tokens_seen": 95694640, - "step": 4491 - }, - { - "epoch": 0.5401310647507966, - "grad_norm": 2.3130556921590806, - "learning_rate": 1.8373746673435812e-06, - "loss": 0.6297, - "num_input_tokens_seen": 95715385, - "step": 4492 - }, - { - "epoch": 0.5402513076414357, - "grad_norm": 3.315596086758766, - "learning_rate": 1.8365982875044968e-06, - "loss": 0.7799, - "num_input_tokens_seen": 95735415, - "step": 4493 - }, - { - "epoch": 0.5403715505320748, - "grad_norm": 2.894893736384203, - "learning_rate": 1.8358219324533212e-06, - "loss": 0.7533, - "num_input_tokens_seen": 95755400, - "step": 4494 - }, - { - "epoch": 0.5404917934227139, - "grad_norm": 2.3653403758497604, - "learning_rate": 1.8350456023078292e-06, - "loss": 0.6929, - "num_input_tokens_seen": 95777495, - "step": 4495 - }, - { - "epoch": 0.540612036313353, - "grad_norm": 2.812174444332911, - "learning_rate": 1.8342692971857879e-06, - "loss": 0.7732, - "num_input_tokens_seen": 95796415, - "step": 4496 - }, - { - "epoch": 0.540732279203992, - "grad_norm": 2.8317094289971045, - "learning_rate": 1.8334930172049624e-06, - "loss": 0.7069, - "num_input_tokens_seen": 95816240, - "step": 4497 - }, - { - "epoch": 0.5408525220946312, - "grad_norm": 2.522146148860825, - "learning_rate": 1.8327167624831134e-06, - "loss": 0.7661, - "num_input_tokens_seen": 95833690, - "step": 4498 - }, - { - "epoch": 0.5409727649852702, - "grad_norm": 1.8753000241432358, - "learning_rate": 1.831940533137999e-06, - "loss": 0.6998, - "num_input_tokens_seen": 95852315, - "step": 4499 - }, - { - "epoch": 0.5410930078759093, - "grad_norm": 2.3991958453254223, - "learning_rate": 1.8311643292873723e-06, - "loss": 0.7149, - "num_input_tokens_seen": 95870855, - "step": 4500 - }, - { - "epoch": 0.5412132507665485, - "grad_norm": 2.1620078352298546, - "learning_rate": 1.8303881510489822e-06, - "loss": 0.8782, - "num_input_tokens_seen": 95888965, - "step": 4501 - }, - { - "epoch": 0.5413334936571875, - "grad_norm": 2.127286289833415, - "learning_rate": 1.829611998540574e-06, - "loss": 0.6906, - "num_input_tokens_seen": 95909890, - "step": 4502 - }, - { - "epoch": 0.5414537365478266, - "grad_norm": 10.78141914026094, - "learning_rate": 1.828835871879891e-06, - "loss": 0.7982, - "num_input_tokens_seen": 95928800, - "step": 4503 - }, - { - "epoch": 0.5415739794384657, - "grad_norm": 2.41228774844088, - "learning_rate": 1.8280597711846703e-06, - "loss": 0.7212, - "num_input_tokens_seen": 95946760, - "step": 4504 - }, - { - "epoch": 0.5416942223291048, - "grad_norm": 2.3437457531770627, - "learning_rate": 1.8272836965726455e-06, - "loss": 0.8326, - "num_input_tokens_seen": 95965415, - "step": 4505 - }, - { - "epoch": 0.5418144652197439, - "grad_norm": 4.082541764448734, - "learning_rate": 1.8265076481615461e-06, - "loss": 0.7748, - "num_input_tokens_seen": 95985050, - "step": 4506 - }, - { - "epoch": 0.541934708110383, - "grad_norm": 2.4423541465414953, - "learning_rate": 1.8257316260690991e-06, - "loss": 0.8663, - "num_input_tokens_seen": 96002555, - "step": 4507 - }, - { - "epoch": 0.5420549510010221, - "grad_norm": 1.6724831441693595, - "learning_rate": 1.8249556304130258e-06, - "loss": 0.7583, - "num_input_tokens_seen": 96023555, - "step": 4508 - }, - { - "epoch": 0.5421751938916611, - "grad_norm": 2.6205232693030083, - "learning_rate": 1.8241796613110443e-06, - "loss": 0.6772, - "num_input_tokens_seen": 96042025, - "step": 4509 - }, - { - "epoch": 0.5422954367823003, - "grad_norm": 2.394894924883063, - "learning_rate": 1.8234037188808676e-06, - "loss": 0.7919, - "num_input_tokens_seen": 96060505, - "step": 4510 - }, - { - "epoch": 0.5424156796729394, - "grad_norm": 2.186440693775744, - "learning_rate": 1.822627803240207e-06, - "loss": 0.6603, - "num_input_tokens_seen": 96082555, - "step": 4511 - }, - { - "epoch": 0.5425359225635784, - "grad_norm": 3.1415320784749183, - "learning_rate": 1.8218519145067675e-06, - "loss": 0.8423, - "num_input_tokens_seen": 96097895, - "step": 4512 - }, - { - "epoch": 0.5426561654542175, - "grad_norm": 2.4460330056835984, - "learning_rate": 1.8210760527982512e-06, - "loss": 0.8933, - "num_input_tokens_seen": 96117900, - "step": 4513 - }, - { - "epoch": 0.5427764083448566, - "grad_norm": 2.6044980489508154, - "learning_rate": 1.8203002182323548e-06, - "loss": 0.7415, - "num_input_tokens_seen": 96135175, - "step": 4514 - }, - { - "epoch": 0.5428966512354957, - "grad_norm": 2.4114133682685392, - "learning_rate": 1.819524410926773e-06, - "loss": 0.7534, - "num_input_tokens_seen": 96152575, - "step": 4515 - }, - { - "epoch": 0.5430168941261347, - "grad_norm": 3.295945531207163, - "learning_rate": 1.8187486309991944e-06, - "loss": 0.7673, - "num_input_tokens_seen": 96173175, - "step": 4516 - }, - { - "epoch": 0.5431371370167739, - "grad_norm": 3.34917098162259, - "learning_rate": 1.8179728785673044e-06, - "loss": 0.7682, - "num_input_tokens_seen": 96191550, - "step": 4517 - }, - { - "epoch": 0.543257379907413, - "grad_norm": 2.5901773411606768, - "learning_rate": 1.8171971537487834e-06, - "loss": 0.7532, - "num_input_tokens_seen": 96209920, - "step": 4518 - }, - { - "epoch": 0.543377622798052, - "grad_norm": 3.053474104806019, - "learning_rate": 1.8164214566613093e-06, - "loss": 0.7976, - "num_input_tokens_seen": 96228265, - "step": 4519 - }, - { - "epoch": 0.5434978656886912, - "grad_norm": 4.070444044362186, - "learning_rate": 1.8156457874225547e-06, - "loss": 0.6416, - "num_input_tokens_seen": 96246445, - "step": 4520 - }, - { - "epoch": 0.5436181085793302, - "grad_norm": 2.2413919105501496, - "learning_rate": 1.814870146150187e-06, - "loss": 0.8002, - "num_input_tokens_seen": 96264275, - "step": 4521 - }, - { - "epoch": 0.5437383514699693, - "grad_norm": 3.204435880886595, - "learning_rate": 1.814094532961871e-06, - "loss": 0.7935, - "num_input_tokens_seen": 96282570, - "step": 4522 - }, - { - "epoch": 0.5438585943606085, - "grad_norm": 4.201664213550183, - "learning_rate": 1.8133189479752666e-06, - "loss": 0.8269, - "num_input_tokens_seen": 96301220, - "step": 4523 - }, - { - "epoch": 0.5439788372512475, - "grad_norm": 2.417096310683502, - "learning_rate": 1.8125433913080296e-06, - "loss": 0.809, - "num_input_tokens_seen": 96318640, - "step": 4524 - }, - { - "epoch": 0.5440990801418866, - "grad_norm": 2.410189694031847, - "learning_rate": 1.811767863077811e-06, - "loss": 0.8251, - "num_input_tokens_seen": 96337310, - "step": 4525 - }, - { - "epoch": 0.5442193230325257, - "grad_norm": 1.7308144573206885, - "learning_rate": 1.8109923634022573e-06, - "loss": 0.7752, - "num_input_tokens_seen": 96357055, - "step": 4526 - }, - { - "epoch": 0.5443395659231648, - "grad_norm": 2.3333614558463855, - "learning_rate": 1.8102168923990124e-06, - "loss": 0.8529, - "num_input_tokens_seen": 96370320, - "step": 4527 - }, - { - "epoch": 0.5444598088138038, - "grad_norm": 2.381400413939986, - "learning_rate": 1.809441450185714e-06, - "loss": 0.7915, - "num_input_tokens_seen": 96388525, - "step": 4528 - }, - { - "epoch": 0.544580051704443, - "grad_norm": 3.6277852093939207, - "learning_rate": 1.8086660368799963e-06, - "loss": 0.7378, - "num_input_tokens_seen": 96406295, - "step": 4529 - }, - { - "epoch": 0.5447002945950821, - "grad_norm": 1.8384391980530215, - "learning_rate": 1.807890652599488e-06, - "loss": 0.7598, - "num_input_tokens_seen": 96430400, - "step": 4530 - }, - { - "epoch": 0.5448205374857211, - "grad_norm": 3.1386592652514937, - "learning_rate": 1.8071152974618156e-06, - "loss": 0.8146, - "num_input_tokens_seen": 96447920, - "step": 4531 - }, - { - "epoch": 0.5449407803763603, - "grad_norm": 3.0048425241233163, - "learning_rate": 1.806339971584599e-06, - "loss": 0.7785, - "num_input_tokens_seen": 96464300, - "step": 4532 - }, - { - "epoch": 0.5450610232669993, - "grad_norm": 3.1812703582218136, - "learning_rate": 1.805564675085455e-06, - "loss": 0.8495, - "num_input_tokens_seen": 96483530, - "step": 4533 - }, - { - "epoch": 0.5451812661576384, - "grad_norm": 3.0389818427033974, - "learning_rate": 1.804789408081994e-06, - "loss": 0.8116, - "num_input_tokens_seen": 96500500, - "step": 4534 - }, - { - "epoch": 0.5453015090482776, - "grad_norm": 0.8047200938155699, - "learning_rate": 1.8040141706918258e-06, - "loss": 0.6643, - "num_input_tokens_seen": 96561460, - "step": 4535 - }, - { - "epoch": 0.5454217519389166, - "grad_norm": 3.0918020637606984, - "learning_rate": 1.8032389630325525e-06, - "loss": 0.7593, - "num_input_tokens_seen": 96579930, - "step": 4536 - }, - { - "epoch": 0.5455419948295557, - "grad_norm": 1.971746415360197, - "learning_rate": 1.8024637852217711e-06, - "loss": 0.751, - "num_input_tokens_seen": 96599375, - "step": 4537 - }, - { - "epoch": 0.5456622377201948, - "grad_norm": 1.890882133359788, - "learning_rate": 1.801688637377076e-06, - "loss": 0.8409, - "num_input_tokens_seen": 96617610, - "step": 4538 - }, - { - "epoch": 0.5457824806108339, - "grad_norm": 2.178145395482317, - "learning_rate": 1.8009135196160579e-06, - "loss": 0.7768, - "num_input_tokens_seen": 96636205, - "step": 4539 - }, - { - "epoch": 0.545902723501473, - "grad_norm": 4.253572513350592, - "learning_rate": 1.8001384320563004e-06, - "loss": 0.8348, - "num_input_tokens_seen": 96656180, - "step": 4540 - }, - { - "epoch": 0.5460229663921121, - "grad_norm": 0.8635187798253072, - "learning_rate": 1.7993633748153838e-06, - "loss": 0.6133, - "num_input_tokens_seen": 96710505, - "step": 4541 - }, - { - "epoch": 0.5461432092827512, - "grad_norm": 1.9087505469664325, - "learning_rate": 1.7985883480108834e-06, - "loss": 0.7198, - "num_input_tokens_seen": 96727860, - "step": 4542 - }, - { - "epoch": 0.5462634521733902, - "grad_norm": 2.5984113510996187, - "learning_rate": 1.797813351760371e-06, - "loss": 0.7171, - "num_input_tokens_seen": 96749285, - "step": 4543 - }, - { - "epoch": 0.5463836950640293, - "grad_norm": 2.004202562171678, - "learning_rate": 1.797038386181412e-06, - "loss": 0.7791, - "num_input_tokens_seen": 96768775, - "step": 4544 - }, - { - "epoch": 0.5465039379546685, - "grad_norm": 2.9186098632741073, - "learning_rate": 1.7962634513915689e-06, - "loss": 0.7374, - "num_input_tokens_seen": 96785845, - "step": 4545 - }, - { - "epoch": 0.5466241808453075, - "grad_norm": 3.6968052624059795, - "learning_rate": 1.7954885475083969e-06, - "loss": 0.7907, - "num_input_tokens_seen": 96803235, - "step": 4546 - }, - { - "epoch": 0.5467444237359466, - "grad_norm": 2.4142627767014875, - "learning_rate": 1.7947136746494509e-06, - "loss": 0.7345, - "num_input_tokens_seen": 96823870, - "step": 4547 - }, - { - "epoch": 0.5468646666265857, - "grad_norm": 2.560287438975104, - "learning_rate": 1.793938832932277e-06, - "loss": 0.8705, - "num_input_tokens_seen": 96841700, - "step": 4548 - }, - { - "epoch": 0.5469849095172248, - "grad_norm": 2.4788540133021013, - "learning_rate": 1.7931640224744185e-06, - "loss": 0.692, - "num_input_tokens_seen": 96861970, - "step": 4549 - }, - { - "epoch": 0.5471051524078638, - "grad_norm": 3.386966434795847, - "learning_rate": 1.7923892433934127e-06, - "loss": 0.7338, - "num_input_tokens_seen": 96882765, - "step": 4550 - }, - { - "epoch": 0.547225395298503, - "grad_norm": 2.85734832936579, - "learning_rate": 1.7916144958067943e-06, - "loss": 0.783, - "num_input_tokens_seen": 96900345, - "step": 4551 - }, - { - "epoch": 0.5473456381891421, - "grad_norm": 2.3233810216917252, - "learning_rate": 1.790839779832091e-06, - "loss": 0.7814, - "num_input_tokens_seen": 96919800, - "step": 4552 - }, - { - "epoch": 0.5474658810797811, - "grad_norm": 2.782166333885925, - "learning_rate": 1.790065095586827e-06, - "loss": 0.7377, - "num_input_tokens_seen": 96939165, - "step": 4553 - }, - { - "epoch": 0.5475861239704203, - "grad_norm": 2.45252649003263, - "learning_rate": 1.7892904431885198e-06, - "loss": 0.7625, - "num_input_tokens_seen": 96966060, - "step": 4554 - }, - { - "epoch": 0.5477063668610593, - "grad_norm": 2.6418683922961272, - "learning_rate": 1.788515822754686e-06, - "loss": 0.7549, - "num_input_tokens_seen": 96986200, - "step": 4555 - }, - { - "epoch": 0.5478266097516984, - "grad_norm": 3.639128229011245, - "learning_rate": 1.7877412344028335e-06, - "loss": 0.7842, - "num_input_tokens_seen": 97005725, - "step": 4556 - }, - { - "epoch": 0.5479468526423376, - "grad_norm": 2.66724802219536, - "learning_rate": 1.7869666782504673e-06, - "loss": 0.7737, - "num_input_tokens_seen": 97022025, - "step": 4557 - }, - { - "epoch": 0.5480670955329766, - "grad_norm": 2.1979104104713496, - "learning_rate": 1.7861921544150862e-06, - "loss": 0.6906, - "num_input_tokens_seen": 97040595, - "step": 4558 - }, - { - "epoch": 0.5481873384236157, - "grad_norm": 5.390991213395312, - "learning_rate": 1.7854176630141856e-06, - "loss": 0.7629, - "num_input_tokens_seen": 97057450, - "step": 4559 - }, - { - "epoch": 0.5483075813142548, - "grad_norm": 2.8387650721681896, - "learning_rate": 1.784643204165255e-06, - "loss": 0.8408, - "num_input_tokens_seen": 97076490, - "step": 4560 - }, - { - "epoch": 0.5484278242048939, - "grad_norm": 2.129319554576394, - "learning_rate": 1.7838687779857788e-06, - "loss": 0.7628, - "num_input_tokens_seen": 97094085, - "step": 4561 - }, - { - "epoch": 0.5485480670955329, - "grad_norm": 6.801854309750558, - "learning_rate": 1.7830943845932366e-06, - "loss": 0.6376, - "num_input_tokens_seen": 97113130, - "step": 4562 - }, - { - "epoch": 0.5486683099861721, - "grad_norm": 2.0142045892946583, - "learning_rate": 1.7823200241051044e-06, - "loss": 0.7422, - "num_input_tokens_seen": 97131765, - "step": 4563 - }, - { - "epoch": 0.5487885528768112, - "grad_norm": 2.548632231539975, - "learning_rate": 1.7815456966388513e-06, - "loss": 0.8026, - "num_input_tokens_seen": 97150580, - "step": 4564 - }, - { - "epoch": 0.5489087957674502, - "grad_norm": 2.328563739640445, - "learning_rate": 1.780771402311943e-06, - "loss": 0.8162, - "num_input_tokens_seen": 97169135, - "step": 4565 - }, - { - "epoch": 0.5490290386580894, - "grad_norm": 7.859507943904961, - "learning_rate": 1.7799971412418374e-06, - "loss": 0.7834, - "num_input_tokens_seen": 97190250, - "step": 4566 - }, - { - "epoch": 0.5491492815487284, - "grad_norm": 2.65226546756694, - "learning_rate": 1.7792229135459918e-06, - "loss": 0.7378, - "num_input_tokens_seen": 97206620, - "step": 4567 - }, - { - "epoch": 0.5492695244393675, - "grad_norm": 0.8319925777710446, - "learning_rate": 1.7784487193418542e-06, - "loss": 0.6405, - "num_input_tokens_seen": 97264190, - "step": 4568 - }, - { - "epoch": 0.5493897673300067, - "grad_norm": 4.044800536792994, - "learning_rate": 1.7776745587468698e-06, - "loss": 0.606, - "num_input_tokens_seen": 97281335, - "step": 4569 - }, - { - "epoch": 0.5495100102206457, - "grad_norm": 2.619569129379258, - "learning_rate": 1.7769004318784772e-06, - "loss": 0.8187, - "num_input_tokens_seen": 97298700, - "step": 4570 - }, - { - "epoch": 0.5496302531112848, - "grad_norm": 1.818039195860528, - "learning_rate": 1.7761263388541125e-06, - "loss": 0.8021, - "num_input_tokens_seen": 97316210, - "step": 4571 - }, - { - "epoch": 0.5497504960019239, - "grad_norm": 1.9504988214525416, - "learning_rate": 1.7753522797912044e-06, - "loss": 0.8335, - "num_input_tokens_seen": 97336015, - "step": 4572 - }, - { - "epoch": 0.549870738892563, - "grad_norm": 2.4627141351404487, - "learning_rate": 1.7745782548071769e-06, - "loss": 0.7017, - "num_input_tokens_seen": 97352630, - "step": 4573 - }, - { - "epoch": 0.549990981783202, - "grad_norm": 1.7276097396731485, - "learning_rate": 1.7738042640194482e-06, - "loss": 0.7331, - "num_input_tokens_seen": 97372015, - "step": 4574 - }, - { - "epoch": 0.5501112246738411, - "grad_norm": 1.9824607268051193, - "learning_rate": 1.7730303075454335e-06, - "loss": 0.7033, - "num_input_tokens_seen": 97390625, - "step": 4575 - }, - { - "epoch": 0.5502314675644803, - "grad_norm": 2.6737970907316426, - "learning_rate": 1.7722563855025402e-06, - "loss": 0.8421, - "num_input_tokens_seen": 97408375, - "step": 4576 - }, - { - "epoch": 0.5503517104551193, - "grad_norm": 2.621610651640046, - "learning_rate": 1.7714824980081725e-06, - "loss": 0.7012, - "num_input_tokens_seen": 97427390, - "step": 4577 - }, - { - "epoch": 0.5504719533457584, - "grad_norm": 2.4628521470387637, - "learning_rate": 1.7707086451797272e-06, - "loss": 0.7359, - "num_input_tokens_seen": 97447985, - "step": 4578 - }, - { - "epoch": 0.5505921962363975, - "grad_norm": 0.7072043028030115, - "learning_rate": 1.7699348271345997e-06, - "loss": 0.5395, - "num_input_tokens_seen": 97510330, - "step": 4579 - }, - { - "epoch": 0.5507124391270366, - "grad_norm": 0.7212288128862477, - "learning_rate": 1.7691610439901753e-06, - "loss": 0.5641, - "num_input_tokens_seen": 97572985, - "step": 4580 - }, - { - "epoch": 0.5508326820176757, - "grad_norm": 1.9523288888624444, - "learning_rate": 1.768387295863837e-06, - "loss": 0.7562, - "num_input_tokens_seen": 97591585, - "step": 4581 - }, - { - "epoch": 0.5509529249083148, - "grad_norm": 2.277103540951851, - "learning_rate": 1.767613582872961e-06, - "loss": 0.8372, - "num_input_tokens_seen": 97611015, - "step": 4582 - }, - { - "epoch": 0.5510731677989539, - "grad_norm": 2.468157347012842, - "learning_rate": 1.7668399051349205e-06, - "loss": 0.8248, - "num_input_tokens_seen": 97630415, - "step": 4583 - }, - { - "epoch": 0.5511934106895929, - "grad_norm": 4.3533864891513225, - "learning_rate": 1.766066262767081e-06, - "loss": 0.8196, - "num_input_tokens_seen": 97647975, - "step": 4584 - }, - { - "epoch": 0.5513136535802321, - "grad_norm": 3.9866515939481806, - "learning_rate": 1.7652926558868035e-06, - "loss": 0.7768, - "num_input_tokens_seen": 97666340, - "step": 4585 - }, - { - "epoch": 0.5514338964708712, - "grad_norm": 3.6574408166005337, - "learning_rate": 1.764519084611443e-06, - "loss": 0.7028, - "num_input_tokens_seen": 97686515, - "step": 4586 - }, - { - "epoch": 0.5515541393615102, - "grad_norm": 2.6701065441217184, - "learning_rate": 1.7637455490583505e-06, - "loss": 0.7719, - "num_input_tokens_seen": 97705560, - "step": 4587 - }, - { - "epoch": 0.5516743822521494, - "grad_norm": 2.600611771425595, - "learning_rate": 1.7629720493448706e-06, - "loss": 0.7679, - "num_input_tokens_seen": 97722575, - "step": 4588 - }, - { - "epoch": 0.5517946251427884, - "grad_norm": 2.7786709025685976, - "learning_rate": 1.7621985855883422e-06, - "loss": 0.8436, - "num_input_tokens_seen": 97738995, - "step": 4589 - }, - { - "epoch": 0.5519148680334275, - "grad_norm": 2.1653372250359006, - "learning_rate": 1.7614251579060983e-06, - "loss": 0.7245, - "num_input_tokens_seen": 97757310, - "step": 4590 - }, - { - "epoch": 0.5520351109240667, - "grad_norm": 2.1356490786964986, - "learning_rate": 1.7606517664154693e-06, - "loss": 0.8404, - "num_input_tokens_seen": 97779740, - "step": 4591 - }, - { - "epoch": 0.5521553538147057, - "grad_norm": 2.009791898315628, - "learning_rate": 1.759878411233777e-06, - "loss": 0.7708, - "num_input_tokens_seen": 97797920, - "step": 4592 - }, - { - "epoch": 0.5522755967053448, - "grad_norm": 2.7608515867367402, - "learning_rate": 1.7591050924783388e-06, - "loss": 0.7529, - "num_input_tokens_seen": 97814830, - "step": 4593 - }, - { - "epoch": 0.5523958395959839, - "grad_norm": 0.9190290365883461, - "learning_rate": 1.7583318102664661e-06, - "loss": 0.6283, - "num_input_tokens_seen": 97882115, - "step": 4594 - }, - { - "epoch": 0.552516082486623, - "grad_norm": 2.1414458485954846, - "learning_rate": 1.757558564715466e-06, - "loss": 0.7901, - "num_input_tokens_seen": 97899910, - "step": 4595 - }, - { - "epoch": 0.552636325377262, - "grad_norm": 3.3702286309477674, - "learning_rate": 1.756785355942639e-06, - "loss": 0.7369, - "num_input_tokens_seen": 97916680, - "step": 4596 - }, - { - "epoch": 0.5527565682679012, - "grad_norm": 2.3405612207399407, - "learning_rate": 1.7560121840652801e-06, - "loss": 0.7452, - "num_input_tokens_seen": 97935785, - "step": 4597 - }, - { - "epoch": 0.5528768111585403, - "grad_norm": 3.4159267622632257, - "learning_rate": 1.7552390492006778e-06, - "loss": 0.6939, - "num_input_tokens_seen": 97953825, - "step": 4598 - }, - { - "epoch": 0.5529970540491793, - "grad_norm": 2.393271359955657, - "learning_rate": 1.7544659514661184e-06, - "loss": 0.6517, - "num_input_tokens_seen": 97976635, - "step": 4599 - }, - { - "epoch": 0.5531172969398185, - "grad_norm": 2.2223138923828514, - "learning_rate": 1.7536928909788786e-06, - "loss": 0.7895, - "num_input_tokens_seen": 97995660, - "step": 4600 - }, - { - "epoch": 0.5532375398304575, - "grad_norm": 0.9079854887168407, - "learning_rate": 1.7529198678562317e-06, - "loss": 0.6382, - "num_input_tokens_seen": 98047025, - "step": 4601 - }, - { - "epoch": 0.5533577827210966, - "grad_norm": 2.287128317680286, - "learning_rate": 1.7521468822154436e-06, - "loss": 0.7744, - "num_input_tokens_seen": 98065660, - "step": 4602 - }, - { - "epoch": 0.5534780256117358, - "grad_norm": 2.131467136692197, - "learning_rate": 1.751373934173777e-06, - "loss": 0.7351, - "num_input_tokens_seen": 98088125, - "step": 4603 - }, - { - "epoch": 0.5535982685023748, - "grad_norm": 2.6375927004399946, - "learning_rate": 1.750601023848487e-06, - "loss": 0.7277, - "num_input_tokens_seen": 98108570, - "step": 4604 - }, - { - "epoch": 0.5537185113930139, - "grad_norm": 7.943641189541467, - "learning_rate": 1.7498281513568233e-06, - "loss": 0.7341, - "num_input_tokens_seen": 98128485, - "step": 4605 - }, - { - "epoch": 0.553838754283653, - "grad_norm": 1.9589051071615995, - "learning_rate": 1.7490553168160292e-06, - "loss": 0.7477, - "num_input_tokens_seen": 98149275, - "step": 4606 - }, - { - "epoch": 0.5539589971742921, - "grad_norm": 2.751107397056313, - "learning_rate": 1.748282520343345e-06, - "loss": 0.759, - "num_input_tokens_seen": 98168025, - "step": 4607 - }, - { - "epoch": 0.5540792400649311, - "grad_norm": 4.7398017392329574, - "learning_rate": 1.7475097620560023e-06, - "loss": 0.7828, - "num_input_tokens_seen": 98187810, - "step": 4608 - }, - { - "epoch": 0.5541994829555702, - "grad_norm": 2.32946365279131, - "learning_rate": 1.746737042071228e-06, - "loss": 0.6955, - "num_input_tokens_seen": 98206035, - "step": 4609 - }, - { - "epoch": 0.5543197258462094, - "grad_norm": 2.286728538534861, - "learning_rate": 1.7459643605062424e-06, - "loss": 0.7921, - "num_input_tokens_seen": 98223015, - "step": 4610 - }, - { - "epoch": 0.5544399687368484, - "grad_norm": 3.617949768722936, - "learning_rate": 1.745191717478262e-06, - "loss": 0.807, - "num_input_tokens_seen": 98241315, - "step": 4611 - }, - { - "epoch": 0.5545602116274875, - "grad_norm": 2.142886975268269, - "learning_rate": 1.7444191131044952e-06, - "loss": 0.7961, - "num_input_tokens_seen": 98261310, - "step": 4612 - }, - { - "epoch": 0.5546804545181266, - "grad_norm": 2.0224893838038374, - "learning_rate": 1.743646547502146e-06, - "loss": 0.7178, - "num_input_tokens_seen": 98281080, - "step": 4613 - }, - { - "epoch": 0.5548006974087657, - "grad_norm": 2.399871588061633, - "learning_rate": 1.7428740207884107e-06, - "loss": 0.7002, - "num_input_tokens_seen": 98301680, - "step": 4614 - }, - { - "epoch": 0.5549209402994048, - "grad_norm": 2.1087184573684725, - "learning_rate": 1.742101533080483e-06, - "loss": 0.6115, - "num_input_tokens_seen": 98321320, - "step": 4615 - }, - { - "epoch": 0.5550411831900439, - "grad_norm": 2.0228526046229565, - "learning_rate": 1.7413290844955475e-06, - "loss": 0.7237, - "num_input_tokens_seen": 98341070, - "step": 4616 - }, - { - "epoch": 0.555161426080683, - "grad_norm": 2.468785719319241, - "learning_rate": 1.7405566751507848e-06, - "loss": 0.7773, - "num_input_tokens_seen": 98358835, - "step": 4617 - }, - { - "epoch": 0.555281668971322, - "grad_norm": 1.8245892629041438, - "learning_rate": 1.7397843051633668e-06, - "loss": 0.6721, - "num_input_tokens_seen": 98381250, - "step": 4618 - }, - { - "epoch": 0.5554019118619612, - "grad_norm": 1.7072485014369077, - "learning_rate": 1.739011974650464e-06, - "loss": 0.7074, - "num_input_tokens_seen": 98400300, - "step": 4619 - }, - { - "epoch": 0.5555221547526003, - "grad_norm": 3.4370309849372958, - "learning_rate": 1.7382396837292365e-06, - "loss": 0.7715, - "num_input_tokens_seen": 98420480, - "step": 4620 - }, - { - "epoch": 0.5556423976432393, - "grad_norm": 2.055057315638427, - "learning_rate": 1.7374674325168414e-06, - "loss": 0.7395, - "num_input_tokens_seen": 98440300, - "step": 4621 - }, - { - "epoch": 0.5557626405338785, - "grad_norm": 2.6919249385100374, - "learning_rate": 1.7366952211304274e-06, - "loss": 0.7354, - "num_input_tokens_seen": 98457865, - "step": 4622 - }, - { - "epoch": 0.5558828834245175, - "grad_norm": 4.059086203916531, - "learning_rate": 1.7359230496871392e-06, - "loss": 0.8252, - "num_input_tokens_seen": 98474160, - "step": 4623 - }, - { - "epoch": 0.5560031263151566, - "grad_norm": 1.699107537094162, - "learning_rate": 1.7351509183041149e-06, - "loss": 0.7433, - "num_input_tokens_seen": 98494210, - "step": 4624 - }, - { - "epoch": 0.5561233692057957, - "grad_norm": 2.3238791412693143, - "learning_rate": 1.7343788270984856e-06, - "loss": 0.7222, - "num_input_tokens_seen": 98513070, - "step": 4625 - }, - { - "epoch": 0.5562436120964348, - "grad_norm": 1.932713218314441, - "learning_rate": 1.733606776187376e-06, - "loss": 0.7315, - "num_input_tokens_seen": 98535215, - "step": 4626 - }, - { - "epoch": 0.5563638549870739, - "grad_norm": 2.7704153670930913, - "learning_rate": 1.7328347656879076e-06, - "loss": 0.7553, - "num_input_tokens_seen": 98554795, - "step": 4627 - }, - { - "epoch": 0.556484097877713, - "grad_norm": 3.145226618649204, - "learning_rate": 1.7320627957171927e-06, - "loss": 0.6764, - "num_input_tokens_seen": 98569175, - "step": 4628 - }, - { - "epoch": 0.5566043407683521, - "grad_norm": 1.8068028262777365, - "learning_rate": 1.7312908663923386e-06, - "loss": 0.7995, - "num_input_tokens_seen": 98585070, - "step": 4629 - }, - { - "epoch": 0.5567245836589911, - "grad_norm": 2.630490418375558, - "learning_rate": 1.7305189778304463e-06, - "loss": 0.6675, - "num_input_tokens_seen": 98602965, - "step": 4630 - }, - { - "epoch": 0.5568448265496303, - "grad_norm": 2.587094026970457, - "learning_rate": 1.729747130148611e-06, - "loss": 0.7959, - "num_input_tokens_seen": 98621880, - "step": 4631 - }, - { - "epoch": 0.5569650694402694, - "grad_norm": 2.4483123974819576, - "learning_rate": 1.7289753234639213e-06, - "loss": 0.7585, - "num_input_tokens_seen": 98640575, - "step": 4632 - }, - { - "epoch": 0.5570853123309084, - "grad_norm": 1.92245706126497, - "learning_rate": 1.7282035578934596e-06, - "loss": 0.7517, - "num_input_tokens_seen": 98658460, - "step": 4633 - }, - { - "epoch": 0.5572055552215476, - "grad_norm": 1.9486684671640153, - "learning_rate": 1.727431833554301e-06, - "loss": 0.7814, - "num_input_tokens_seen": 98676655, - "step": 4634 - }, - { - "epoch": 0.5573257981121866, - "grad_norm": 2.348891844585534, - "learning_rate": 1.7266601505635175e-06, - "loss": 0.7665, - "num_input_tokens_seen": 98693715, - "step": 4635 - }, - { - "epoch": 0.5574460410028257, - "grad_norm": 2.399017925617153, - "learning_rate": 1.7258885090381717e-06, - "loss": 0.757, - "num_input_tokens_seen": 98711475, - "step": 4636 - }, - { - "epoch": 0.5575662838934649, - "grad_norm": 2.081514680426162, - "learning_rate": 1.7251169090953213e-06, - "loss": 0.7795, - "num_input_tokens_seen": 98731670, - "step": 4637 - }, - { - "epoch": 0.5576865267841039, - "grad_norm": 2.856826903110264, - "learning_rate": 1.7243453508520168e-06, - "loss": 0.758, - "num_input_tokens_seen": 98748375, - "step": 4638 - }, - { - "epoch": 0.557806769674743, - "grad_norm": 2.4484151050637473, - "learning_rate": 1.7235738344253038e-06, - "loss": 0.8361, - "num_input_tokens_seen": 98761725, - "step": 4639 - }, - { - "epoch": 0.557927012565382, - "grad_norm": 2.2418588312995844, - "learning_rate": 1.7228023599322204e-06, - "loss": 0.8172, - "num_input_tokens_seen": 98779750, - "step": 4640 - }, - { - "epoch": 0.5580472554560212, - "grad_norm": 2.6003058315021583, - "learning_rate": 1.7220309274897983e-06, - "loss": 0.6932, - "num_input_tokens_seen": 98796750, - "step": 4641 - }, - { - "epoch": 0.5581674983466602, - "grad_norm": 2.288405646894083, - "learning_rate": 1.721259537215063e-06, - "loss": 0.7386, - "num_input_tokens_seen": 98816450, - "step": 4642 - }, - { - "epoch": 0.5582877412372993, - "grad_norm": 3.3409610807535572, - "learning_rate": 1.720488189225035e-06, - "loss": 0.7296, - "num_input_tokens_seen": 98833870, - "step": 4643 - }, - { - "epoch": 0.5584079841279385, - "grad_norm": 2.7983596583535006, - "learning_rate": 1.7197168836367265e-06, - "loss": 0.788, - "num_input_tokens_seen": 98850400, - "step": 4644 - }, - { - "epoch": 0.5585282270185775, - "grad_norm": 3.5772989624295537, - "learning_rate": 1.7189456205671437e-06, - "loss": 0.8143, - "num_input_tokens_seen": 98868965, - "step": 4645 - }, - { - "epoch": 0.5586484699092166, - "grad_norm": 2.7775518455902763, - "learning_rate": 1.7181744001332866e-06, - "loss": 0.8142, - "num_input_tokens_seen": 98887295, - "step": 4646 - }, - { - "epoch": 0.5587687127998557, - "grad_norm": 2.328807090449223, - "learning_rate": 1.7174032224521493e-06, - "loss": 0.6336, - "num_input_tokens_seen": 98905725, - "step": 4647 - }, - { - "epoch": 0.5588889556904948, - "grad_norm": 2.440446316656876, - "learning_rate": 1.7166320876407184e-06, - "loss": 0.6936, - "num_input_tokens_seen": 98924865, - "step": 4648 - }, - { - "epoch": 0.5590091985811338, - "grad_norm": 2.553572799425318, - "learning_rate": 1.7158609958159746e-06, - "loss": 0.674, - "num_input_tokens_seen": 98941990, - "step": 4649 - }, - { - "epoch": 0.559129441471773, - "grad_norm": 2.433166228039536, - "learning_rate": 1.7150899470948907e-06, - "loss": 0.7811, - "num_input_tokens_seen": 98956975, - "step": 4650 - }, - { - "epoch": 0.5592496843624121, - "grad_norm": 0.8130248712780672, - "learning_rate": 1.7143189415944365e-06, - "loss": 0.5968, - "num_input_tokens_seen": 99021155, - "step": 4651 - }, - { - "epoch": 0.5593699272530511, - "grad_norm": 2.100087619942274, - "learning_rate": 1.7135479794315714e-06, - "loss": 0.7569, - "num_input_tokens_seen": 99037830, - "step": 4652 - }, - { - "epoch": 0.5594901701436903, - "grad_norm": 2.957547300753262, - "learning_rate": 1.7127770607232502e-06, - "loss": 0.78, - "num_input_tokens_seen": 99056095, - "step": 4653 - }, - { - "epoch": 0.5596104130343293, - "grad_norm": 3.2637453569110533, - "learning_rate": 1.7120061855864204e-06, - "loss": 0.7945, - "num_input_tokens_seen": 99075825, - "step": 4654 - }, - { - "epoch": 0.5597306559249684, - "grad_norm": 2.186395482101678, - "learning_rate": 1.7112353541380233e-06, - "loss": 0.7154, - "num_input_tokens_seen": 99095405, - "step": 4655 - }, - { - "epoch": 0.5598508988156076, - "grad_norm": 1.729936473836241, - "learning_rate": 1.7104645664949935e-06, - "loss": 0.7172, - "num_input_tokens_seen": 99117595, - "step": 4656 - }, - { - "epoch": 0.5599711417062466, - "grad_norm": 3.5900431691580956, - "learning_rate": 1.7096938227742588e-06, - "loss": 0.7211, - "num_input_tokens_seen": 99138445, - "step": 4657 - }, - { - "epoch": 0.5600913845968857, - "grad_norm": 2.0502035553852913, - "learning_rate": 1.7089231230927395e-06, - "loss": 0.8315, - "num_input_tokens_seen": 99156055, - "step": 4658 - }, - { - "epoch": 0.5602116274875248, - "grad_norm": 2.7385146753783998, - "learning_rate": 1.7081524675673518e-06, - "loss": 0.6723, - "num_input_tokens_seen": 99171265, - "step": 4659 - }, - { - "epoch": 0.5603318703781639, - "grad_norm": 0.893477876897292, - "learning_rate": 1.707381856315003e-06, - "loss": 0.6414, - "num_input_tokens_seen": 99233065, - "step": 4660 - }, - { - "epoch": 0.560452113268803, - "grad_norm": 2.9800139101710004, - "learning_rate": 1.706611289452594e-06, - "loss": 0.8646, - "num_input_tokens_seen": 99250865, - "step": 4661 - }, - { - "epoch": 0.5605723561594421, - "grad_norm": 2.126245528566991, - "learning_rate": 1.7058407670970177e-06, - "loss": 0.7235, - "num_input_tokens_seen": 99272060, - "step": 4662 - }, - { - "epoch": 0.5606925990500812, - "grad_norm": 3.498600359393606, - "learning_rate": 1.7050702893651643e-06, - "loss": 0.611, - "num_input_tokens_seen": 99291360, - "step": 4663 - }, - { - "epoch": 0.5608128419407202, - "grad_norm": 2.6478961158511494, - "learning_rate": 1.7042998563739134e-06, - "loss": 0.7474, - "num_input_tokens_seen": 99309430, - "step": 4664 - }, - { - "epoch": 0.5609330848313594, - "grad_norm": 2.677276122384816, - "learning_rate": 1.7035294682401394e-06, - "loss": 0.7097, - "num_input_tokens_seen": 99328020, - "step": 4665 - }, - { - "epoch": 0.5610533277219985, - "grad_norm": 3.2005083765255287, - "learning_rate": 1.7027591250807088e-06, - "loss": 0.7405, - "num_input_tokens_seen": 99344915, - "step": 4666 - }, - { - "epoch": 0.5611735706126375, - "grad_norm": 2.91488459926229, - "learning_rate": 1.701988827012483e-06, - "loss": 0.841, - "num_input_tokens_seen": 99361800, - "step": 4667 - }, - { - "epoch": 0.5612938135032767, - "grad_norm": 3.880008334058756, - "learning_rate": 1.701218574152315e-06, - "loss": 0.8098, - "num_input_tokens_seen": 99377845, - "step": 4668 - }, - { - "epoch": 0.5614140563939157, - "grad_norm": 2.3294073073337755, - "learning_rate": 1.700448366617052e-06, - "loss": 0.6208, - "num_input_tokens_seen": 99398060, - "step": 4669 - }, - { - "epoch": 0.5615342992845548, - "grad_norm": 3.087004830021334, - "learning_rate": 1.6996782045235326e-06, - "loss": 0.8037, - "num_input_tokens_seen": 99417645, - "step": 4670 - }, - { - "epoch": 0.5616545421751938, - "grad_norm": 4.244046296640389, - "learning_rate": 1.6989080879885918e-06, - "loss": 0.6879, - "num_input_tokens_seen": 99435225, - "step": 4671 - }, - { - "epoch": 0.561774785065833, - "grad_norm": 0.995728679876948, - "learning_rate": 1.6981380171290544e-06, - "loss": 0.6373, - "num_input_tokens_seen": 99495970, - "step": 4672 - }, - { - "epoch": 0.5618950279564721, - "grad_norm": 2.6484523022181303, - "learning_rate": 1.69736799206174e-06, - "loss": 0.7384, - "num_input_tokens_seen": 99513225, - "step": 4673 - }, - { - "epoch": 0.5620152708471111, - "grad_norm": 10.702345713206823, - "learning_rate": 1.6965980129034603e-06, - "loss": 0.8471, - "num_input_tokens_seen": 99530330, - "step": 4674 - }, - { - "epoch": 0.5621355137377503, - "grad_norm": 1.6272927703435291, - "learning_rate": 1.6958280797710209e-06, - "loss": 0.7585, - "num_input_tokens_seen": 99551975, - "step": 4675 - }, - { - "epoch": 0.5622557566283893, - "grad_norm": 0.748868144638413, - "learning_rate": 1.6950581927812202e-06, - "loss": 0.5574, - "num_input_tokens_seen": 99611265, - "step": 4676 - }, - { - "epoch": 0.5623759995190284, - "grad_norm": 2.8826021047167494, - "learning_rate": 1.694288352050849e-06, - "loss": 0.7848, - "num_input_tokens_seen": 99629720, - "step": 4677 - }, - { - "epoch": 0.5624962424096676, - "grad_norm": 2.6061119480622605, - "learning_rate": 1.693518557696691e-06, - "loss": 0.7759, - "num_input_tokens_seen": 99648580, - "step": 4678 - }, - { - "epoch": 0.5626164853003066, - "grad_norm": 2.3687967539788364, - "learning_rate": 1.6927488098355252e-06, - "loss": 0.8802, - "num_input_tokens_seen": 99665930, - "step": 4679 - }, - { - "epoch": 0.5627367281909457, - "grad_norm": 0.9109420691961535, - "learning_rate": 1.6919791085841201e-06, - "loss": 0.6759, - "num_input_tokens_seen": 99723060, - "step": 4680 - }, - { - "epoch": 0.5628569710815848, - "grad_norm": 3.486596242675488, - "learning_rate": 1.6912094540592396e-06, - "loss": 0.7915, - "num_input_tokens_seen": 99738300, - "step": 4681 - }, - { - "epoch": 0.5629772139722239, - "grad_norm": 2.474714027044474, - "learning_rate": 1.6904398463776393e-06, - "loss": 0.7919, - "num_input_tokens_seen": 99751820, - "step": 4682 - }, - { - "epoch": 0.5630974568628629, - "grad_norm": 2.253690521874694, - "learning_rate": 1.6896702856560683e-06, - "loss": 0.7214, - "num_input_tokens_seen": 99770635, - "step": 4683 - }, - { - "epoch": 0.5632176997535021, - "grad_norm": 4.310262499383267, - "learning_rate": 1.688900772011268e-06, - "loss": 0.6884, - "num_input_tokens_seen": 99788100, - "step": 4684 - }, - { - "epoch": 0.5633379426441412, - "grad_norm": 1.8696980579151923, - "learning_rate": 1.6881313055599734e-06, - "loss": 0.7751, - "num_input_tokens_seen": 99807750, - "step": 4685 - }, - { - "epoch": 0.5634581855347802, - "grad_norm": 3.1088012571700845, - "learning_rate": 1.687361886418911e-06, - "loss": 0.809, - "num_input_tokens_seen": 99823240, - "step": 4686 - }, - { - "epoch": 0.5635784284254194, - "grad_norm": 3.5329863794713483, - "learning_rate": 1.686592514704803e-06, - "loss": 0.7769, - "num_input_tokens_seen": 99840355, - "step": 4687 - }, - { - "epoch": 0.5636986713160584, - "grad_norm": 3.007595584657204, - "learning_rate": 1.685823190534361e-06, - "loss": 0.6897, - "num_input_tokens_seen": 99858315, - "step": 4688 - }, - { - "epoch": 0.5638189142066975, - "grad_norm": 2.2467799706758105, - "learning_rate": 1.6850539140242913e-06, - "loss": 0.8259, - "num_input_tokens_seen": 99877295, - "step": 4689 - }, - { - "epoch": 0.5639391570973367, - "grad_norm": 2.1387424014555334, - "learning_rate": 1.684284685291292e-06, - "loss": 0.8125, - "num_input_tokens_seen": 99898660, - "step": 4690 - }, - { - "epoch": 0.5640593999879757, - "grad_norm": 3.4426414998710575, - "learning_rate": 1.683515504452055e-06, - "loss": 0.7992, - "num_input_tokens_seen": 99915755, - "step": 4691 - }, - { - "epoch": 0.5641796428786148, - "grad_norm": 2.0982403644815433, - "learning_rate": 1.6827463716232648e-06, - "loss": 0.6572, - "num_input_tokens_seen": 99936135, - "step": 4692 - }, - { - "epoch": 0.5642998857692539, - "grad_norm": 2.5682011332678822, - "learning_rate": 1.6819772869215976e-06, - "loss": 0.744, - "num_input_tokens_seen": 99954935, - "step": 4693 - }, - { - "epoch": 0.564420128659893, - "grad_norm": 2.0625392868648333, - "learning_rate": 1.6812082504637223e-06, - "loss": 0.8152, - "num_input_tokens_seen": 99975975, - "step": 4694 - }, - { - "epoch": 0.564540371550532, - "grad_norm": 1.8386542305886315, - "learning_rate": 1.6804392623663025e-06, - "loss": 0.7414, - "num_input_tokens_seen": 99996900, - "step": 4695 - }, - { - "epoch": 0.5646606144411712, - "grad_norm": 2.268285779114741, - "learning_rate": 1.679670322745993e-06, - "loss": 0.7724, - "num_input_tokens_seen": 100014575, - "step": 4696 - }, - { - "epoch": 0.5647808573318103, - "grad_norm": 2.3209607325838655, - "learning_rate": 1.6789014317194407e-06, - "loss": 0.752, - "num_input_tokens_seen": 100035775, - "step": 4697 - }, - { - "epoch": 0.5649011002224493, - "grad_norm": 2.9827854843983523, - "learning_rate": 1.6781325894032853e-06, - "loss": 0.7236, - "num_input_tokens_seen": 100054455, - "step": 4698 - }, - { - "epoch": 0.5650213431130885, - "grad_norm": 2.5487859154302153, - "learning_rate": 1.6773637959141608e-06, - "loss": 0.9062, - "num_input_tokens_seen": 100071150, - "step": 4699 - }, - { - "epoch": 0.5651415860037275, - "grad_norm": 4.150395540936103, - "learning_rate": 1.6765950513686917e-06, - "loss": 0.6591, - "num_input_tokens_seen": 100088980, - "step": 4700 - }, - { - "epoch": 0.5652618288943666, - "grad_norm": 2.012919905937243, - "learning_rate": 1.6758263558834963e-06, - "loss": 0.7625, - "num_input_tokens_seen": 100107915, - "step": 4701 - }, - { - "epoch": 0.5653820717850057, - "grad_norm": 3.0719535535141844, - "learning_rate": 1.6750577095751844e-06, - "loss": 0.7886, - "num_input_tokens_seen": 100126745, - "step": 4702 - }, - { - "epoch": 0.5655023146756448, - "grad_norm": 2.1765740281078574, - "learning_rate": 1.67428911256036e-06, - "loss": 0.7247, - "num_input_tokens_seen": 100147370, - "step": 4703 - }, - { - "epoch": 0.5656225575662839, - "grad_norm": 2.1326518121074014, - "learning_rate": 1.673520564955619e-06, - "loss": 0.7114, - "num_input_tokens_seen": 100166960, - "step": 4704 - }, - { - "epoch": 0.5657428004569229, - "grad_norm": 1.9497628301241579, - "learning_rate": 1.672752066877548e-06, - "loss": 0.8387, - "num_input_tokens_seen": 100186965, - "step": 4705 - }, - { - "epoch": 0.5658630433475621, - "grad_norm": 1.8369030137304878, - "learning_rate": 1.6719836184427275e-06, - "loss": 0.7472, - "num_input_tokens_seen": 100206990, - "step": 4706 - }, - { - "epoch": 0.5659832862382012, - "grad_norm": 2.1361175471219203, - "learning_rate": 1.6712152197677325e-06, - "loss": 0.6434, - "num_input_tokens_seen": 100226170, - "step": 4707 - }, - { - "epoch": 0.5661035291288402, - "grad_norm": 2.280137482340126, - "learning_rate": 1.670446870969127e-06, - "loss": 0.7584, - "num_input_tokens_seen": 100243555, - "step": 4708 - }, - { - "epoch": 0.5662237720194794, - "grad_norm": 2.8577436853610183, - "learning_rate": 1.669678572163469e-06, - "loss": 0.8032, - "num_input_tokens_seen": 100257760, - "step": 4709 - }, - { - "epoch": 0.5663440149101184, - "grad_norm": 2.6087569199749625, - "learning_rate": 1.6689103234673086e-06, - "loss": 0.7228, - "num_input_tokens_seen": 100275800, - "step": 4710 - }, - { - "epoch": 0.5664642578007575, - "grad_norm": 3.010724151913435, - "learning_rate": 1.6681421249971895e-06, - "loss": 0.7666, - "num_input_tokens_seen": 100295180, - "step": 4711 - }, - { - "epoch": 0.5665845006913967, - "grad_norm": 0.7763559321278201, - "learning_rate": 1.6673739768696457e-06, - "loss": 0.6276, - "num_input_tokens_seen": 100361470, - "step": 4712 - }, - { - "epoch": 0.5667047435820357, - "grad_norm": 1.8815636913253833, - "learning_rate": 1.6666058792012056e-06, - "loss": 0.7681, - "num_input_tokens_seen": 100382075, - "step": 4713 - }, - { - "epoch": 0.5668249864726748, - "grad_norm": 0.8886862743750386, - "learning_rate": 1.6658378321083874e-06, - "loss": 0.7013, - "num_input_tokens_seen": 100446125, - "step": 4714 - }, - { - "epoch": 0.5669452293633139, - "grad_norm": 3.766664952998253, - "learning_rate": 1.6650698357077055e-06, - "loss": 0.8148, - "num_input_tokens_seen": 100462890, - "step": 4715 - }, - { - "epoch": 0.567065472253953, - "grad_norm": 2.8099670810823345, - "learning_rate": 1.6643018901156632e-06, - "loss": 0.8, - "num_input_tokens_seen": 100481705, - "step": 4716 - }, - { - "epoch": 0.567185715144592, - "grad_norm": 2.94318610588146, - "learning_rate": 1.663533995448757e-06, - "loss": 0.7817, - "num_input_tokens_seen": 100497300, - "step": 4717 - }, - { - "epoch": 0.5673059580352312, - "grad_norm": 1.892527479553599, - "learning_rate": 1.6627661518234758e-06, - "loss": 0.8205, - "num_input_tokens_seen": 100516275, - "step": 4718 - }, - { - "epoch": 0.5674262009258703, - "grad_norm": 2.044476209301902, - "learning_rate": 1.661998359356302e-06, - "loss": 0.8351, - "num_input_tokens_seen": 100535025, - "step": 4719 - }, - { - "epoch": 0.5675464438165093, - "grad_norm": 0.841166206280218, - "learning_rate": 1.6612306181637081e-06, - "loss": 0.5928, - "num_input_tokens_seen": 100594070, - "step": 4720 - }, - { - "epoch": 0.5676666867071485, - "grad_norm": 2.6185538357674534, - "learning_rate": 1.6604629283621604e-06, - "loss": 0.6586, - "num_input_tokens_seen": 100611720, - "step": 4721 - }, - { - "epoch": 0.5677869295977875, - "grad_norm": 2.3964198008744093, - "learning_rate": 1.6596952900681152e-06, - "loss": 0.746, - "num_input_tokens_seen": 100632200, - "step": 4722 - }, - { - "epoch": 0.5679071724884266, - "grad_norm": 2.5021946362775016, - "learning_rate": 1.658927703398025e-06, - "loss": 0.8202, - "num_input_tokens_seen": 100651985, - "step": 4723 - }, - { - "epoch": 0.5680274153790658, - "grad_norm": 2.3453428445957787, - "learning_rate": 1.6581601684683309e-06, - "loss": 0.7738, - "num_input_tokens_seen": 100672130, - "step": 4724 - }, - { - "epoch": 0.5681476582697048, - "grad_norm": 3.230107739551636, - "learning_rate": 1.6573926853954674e-06, - "loss": 0.6821, - "num_input_tokens_seen": 100689435, - "step": 4725 - }, - { - "epoch": 0.5682679011603439, - "grad_norm": 4.2877522687975524, - "learning_rate": 1.6566252542958608e-06, - "loss": 0.8222, - "num_input_tokens_seen": 100708655, - "step": 4726 - }, - { - "epoch": 0.568388144050983, - "grad_norm": 3.1070111416941155, - "learning_rate": 1.6558578752859305e-06, - "loss": 0.7778, - "num_input_tokens_seen": 100727335, - "step": 4727 - }, - { - "epoch": 0.5685083869416221, - "grad_norm": 2.738361123409445, - "learning_rate": 1.6550905484820867e-06, - "loss": 0.7788, - "num_input_tokens_seen": 100745515, - "step": 4728 - }, - { - "epoch": 0.5686286298322611, - "grad_norm": 3.982191595958614, - "learning_rate": 1.6543232740007328e-06, - "loss": 0.7939, - "num_input_tokens_seen": 100762350, - "step": 4729 - }, - { - "epoch": 0.5687488727229003, - "grad_norm": 7.7677271441508156, - "learning_rate": 1.6535560519582626e-06, - "loss": 0.6655, - "num_input_tokens_seen": 100781750, - "step": 4730 - }, - { - "epoch": 0.5688691156135394, - "grad_norm": 5.060103201206567, - "learning_rate": 1.6527888824710642e-06, - "loss": 0.7311, - "num_input_tokens_seen": 100801070, - "step": 4731 - }, - { - "epoch": 0.5689893585041784, - "grad_norm": 3.339791122579335, - "learning_rate": 1.6520217656555166e-06, - "loss": 0.7617, - "num_input_tokens_seen": 100820080, - "step": 4732 - }, - { - "epoch": 0.5691096013948175, - "grad_norm": 1.8199434351839006, - "learning_rate": 1.6512547016279905e-06, - "loss": 0.7063, - "num_input_tokens_seen": 100840155, - "step": 4733 - }, - { - "epoch": 0.5692298442854566, - "grad_norm": 2.369494032971793, - "learning_rate": 1.6504876905048485e-06, - "loss": 0.6963, - "num_input_tokens_seen": 100856835, - "step": 4734 - }, - { - "epoch": 0.5693500871760957, - "grad_norm": 3.842521411276859, - "learning_rate": 1.6497207324024464e-06, - "loss": 0.7248, - "num_input_tokens_seen": 100875455, - "step": 4735 - }, - { - "epoch": 0.5694703300667348, - "grad_norm": 1.9911215986332345, - "learning_rate": 1.6489538274371305e-06, - "loss": 0.8214, - "num_input_tokens_seen": 100893780, - "step": 4736 - }, - { - "epoch": 0.5695905729573739, - "grad_norm": 2.548021081688492, - "learning_rate": 1.64818697572524e-06, - "loss": 0.8222, - "num_input_tokens_seen": 100911835, - "step": 4737 - }, - { - "epoch": 0.569710815848013, - "grad_norm": 1.49836542829536, - "learning_rate": 1.6474201773831047e-06, - "loss": 0.7131, - "num_input_tokens_seen": 100934425, - "step": 4738 - }, - { - "epoch": 0.569831058738652, - "grad_norm": 2.133852703510713, - "learning_rate": 1.646653432527049e-06, - "loss": 0.7307, - "num_input_tokens_seen": 100954785, - "step": 4739 - }, - { - "epoch": 0.5699513016292912, - "grad_norm": 1.5879879796960523, - "learning_rate": 1.6458867412733865e-06, - "loss": 0.7418, - "num_input_tokens_seen": 100976320, - "step": 4740 - }, - { - "epoch": 0.5700715445199303, - "grad_norm": 8.443052490242112, - "learning_rate": 1.645120103738424e-06, - "loss": 0.7413, - "num_input_tokens_seen": 100993550, - "step": 4741 - }, - { - "epoch": 0.5701917874105693, - "grad_norm": 2.2819956511790593, - "learning_rate": 1.6443535200384591e-06, - "loss": 0.8333, - "num_input_tokens_seen": 101011445, - "step": 4742 - }, - { - "epoch": 0.5703120303012085, - "grad_norm": 1.809403089738324, - "learning_rate": 1.6435869902897827e-06, - "loss": 0.6997, - "num_input_tokens_seen": 101029745, - "step": 4743 - }, - { - "epoch": 0.5704322731918475, - "grad_norm": 0.8329593107263714, - "learning_rate": 1.6428205146086769e-06, - "loss": 0.6472, - "num_input_tokens_seen": 101091445, - "step": 4744 - }, - { - "epoch": 0.5705525160824866, - "grad_norm": 1.6313255063108463, - "learning_rate": 1.6420540931114146e-06, - "loss": 0.6978, - "num_input_tokens_seen": 101111755, - "step": 4745 - }, - { - "epoch": 0.5706727589731257, - "grad_norm": 3.0912751483316834, - "learning_rate": 1.6412877259142612e-06, - "loss": 0.7914, - "num_input_tokens_seen": 101131395, - "step": 4746 - }, - { - "epoch": 0.5707930018637648, - "grad_norm": 2.6910238741825587, - "learning_rate": 1.6405214131334757e-06, - "loss": 0.7438, - "num_input_tokens_seen": 101149640, - "step": 4747 - }, - { - "epoch": 0.5709132447544039, - "grad_norm": 2.422751478236722, - "learning_rate": 1.6397551548853056e-06, - "loss": 0.7893, - "num_input_tokens_seen": 101167525, - "step": 4748 - }, - { - "epoch": 0.571033487645043, - "grad_norm": 2.30627703476244, - "learning_rate": 1.6389889512859921e-06, - "loss": 0.7003, - "num_input_tokens_seen": 101186905, - "step": 4749 - }, - { - "epoch": 0.5711537305356821, - "grad_norm": 0.896744373482785, - "learning_rate": 1.638222802451767e-06, - "loss": 0.652, - "num_input_tokens_seen": 101248105, - "step": 4750 - }, - { - "epoch": 0.5712739734263211, - "grad_norm": 2.2774020046789674, - "learning_rate": 1.6374567084988557e-06, - "loss": 0.7479, - "num_input_tokens_seen": 101269010, - "step": 4751 - }, - { - "epoch": 0.5713942163169603, - "grad_norm": 2.6025776408135477, - "learning_rate": 1.6366906695434738e-06, - "loss": 0.7634, - "num_input_tokens_seen": 101291250, - "step": 4752 - }, - { - "epoch": 0.5715144592075994, - "grad_norm": 3.427391158594594, - "learning_rate": 1.6359246857018282e-06, - "loss": 0.8534, - "num_input_tokens_seen": 101308500, - "step": 4753 - }, - { - "epoch": 0.5716347020982384, - "grad_norm": 1.9613229460632502, - "learning_rate": 1.6351587570901178e-06, - "loss": 0.7721, - "num_input_tokens_seen": 101328345, - "step": 4754 - }, - { - "epoch": 0.5717549449888776, - "grad_norm": 3.22808589114031, - "learning_rate": 1.6343928838245344e-06, - "loss": 0.7442, - "num_input_tokens_seen": 101340065, - "step": 4755 - }, - { - "epoch": 0.5718751878795166, - "grad_norm": 2.03266459375331, - "learning_rate": 1.63362706602126e-06, - "loss": 0.6838, - "num_input_tokens_seen": 101361380, - "step": 4756 - }, - { - "epoch": 0.5719954307701557, - "grad_norm": 3.5285262606304415, - "learning_rate": 1.632861303796468e-06, - "loss": 0.6587, - "num_input_tokens_seen": 101384165, - "step": 4757 - }, - { - "epoch": 0.5721156736607949, - "grad_norm": 2.227232349426142, - "learning_rate": 1.6320955972663237e-06, - "loss": 0.6779, - "num_input_tokens_seen": 101403480, - "step": 4758 - }, - { - "epoch": 0.5722359165514339, - "grad_norm": 2.3870076280270935, - "learning_rate": 1.6313299465469857e-06, - "loss": 0.6491, - "num_input_tokens_seen": 101425930, - "step": 4759 - }, - { - "epoch": 0.572356159442073, - "grad_norm": 3.939530810206527, - "learning_rate": 1.6305643517546014e-06, - "loss": 0.7867, - "num_input_tokens_seen": 101441030, - "step": 4760 - }, - { - "epoch": 0.5724764023327121, - "grad_norm": 2.1623517362724543, - "learning_rate": 1.6297988130053116e-06, - "loss": 0.8418, - "num_input_tokens_seen": 101460470, - "step": 4761 - }, - { - "epoch": 0.5725966452233512, - "grad_norm": 2.080168075052579, - "learning_rate": 1.6290333304152469e-06, - "loss": 0.7037, - "num_input_tokens_seen": 101480065, - "step": 4762 - }, - { - "epoch": 0.5727168881139902, - "grad_norm": 3.125384086160007, - "learning_rate": 1.6282679041005314e-06, - "loss": 0.5689, - "num_input_tokens_seen": 101505375, - "step": 4763 - }, - { - "epoch": 0.5728371310046293, - "grad_norm": 2.244403543248274, - "learning_rate": 1.6275025341772793e-06, - "loss": 0.8676, - "num_input_tokens_seen": 101521400, - "step": 4764 - }, - { - "epoch": 0.5729573738952685, - "grad_norm": 5.964647442346836, - "learning_rate": 1.6267372207615965e-06, - "loss": 0.8224, - "num_input_tokens_seen": 101538585, - "step": 4765 - }, - { - "epoch": 0.5730776167859075, - "grad_norm": 3.0481800204566665, - "learning_rate": 1.62597196396958e-06, - "loss": 0.7857, - "num_input_tokens_seen": 101556475, - "step": 4766 - }, - { - "epoch": 0.5731978596765466, - "grad_norm": 2.743014184261631, - "learning_rate": 1.6252067639173197e-06, - "loss": 0.8503, - "num_input_tokens_seen": 101578105, - "step": 4767 - }, - { - "epoch": 0.5733181025671857, - "grad_norm": 2.670306404747414, - "learning_rate": 1.6244416207208956e-06, - "loss": 0.6942, - "num_input_tokens_seen": 101598760, - "step": 4768 - }, - { - "epoch": 0.5734383454578248, - "grad_norm": 1.8075204728786771, - "learning_rate": 1.6236765344963787e-06, - "loss": 0.7358, - "num_input_tokens_seen": 101619740, - "step": 4769 - }, - { - "epoch": 0.5735585883484638, - "grad_norm": 3.0653207775171425, - "learning_rate": 1.6229115053598322e-06, - "loss": 0.6939, - "num_input_tokens_seen": 101641215, - "step": 4770 - }, - { - "epoch": 0.573678831239103, - "grad_norm": 2.9889363357792944, - "learning_rate": 1.6221465334273108e-06, - "loss": 0.7106, - "num_input_tokens_seen": 101660145, - "step": 4771 - }, - { - "epoch": 0.5737990741297421, - "grad_norm": 2.5190817645108803, - "learning_rate": 1.6213816188148597e-06, - "loss": 0.6036, - "num_input_tokens_seen": 101678570, - "step": 4772 - }, - { - "epoch": 0.5739193170203811, - "grad_norm": 1.9760531538922232, - "learning_rate": 1.6206167616385162e-06, - "loss": 0.7627, - "num_input_tokens_seen": 101699355, - "step": 4773 - }, - { - "epoch": 0.5740395599110203, - "grad_norm": 3.404496126595559, - "learning_rate": 1.6198519620143074e-06, - "loss": 0.7397, - "num_input_tokens_seen": 101716230, - "step": 4774 - }, - { - "epoch": 0.5741598028016593, - "grad_norm": 2.149950119420719, - "learning_rate": 1.6190872200582546e-06, - "loss": 0.7725, - "num_input_tokens_seen": 101737690, - "step": 4775 - }, - { - "epoch": 0.5742800456922984, - "grad_norm": 3.078191430272785, - "learning_rate": 1.6183225358863676e-06, - "loss": 0.7764, - "num_input_tokens_seen": 101754305, - "step": 4776 - }, - { - "epoch": 0.5744002885829376, - "grad_norm": 2.764595124398925, - "learning_rate": 1.6175579096146485e-06, - "loss": 0.7048, - "num_input_tokens_seen": 101773460, - "step": 4777 - }, - { - "epoch": 0.5745205314735766, - "grad_norm": 1.9478939482039526, - "learning_rate": 1.6167933413590899e-06, - "loss": 0.8504, - "num_input_tokens_seen": 101792085, - "step": 4778 - }, - { - "epoch": 0.5746407743642157, - "grad_norm": 3.915496492629683, - "learning_rate": 1.6160288312356773e-06, - "loss": 0.9071, - "num_input_tokens_seen": 101808935, - "step": 4779 - }, - { - "epoch": 0.5747610172548548, - "grad_norm": 3.2983233051837852, - "learning_rate": 1.6152643793603857e-06, - "loss": 0.8188, - "num_input_tokens_seen": 101829005, - "step": 4780 - }, - { - "epoch": 0.5748812601454939, - "grad_norm": 2.192637542769304, - "learning_rate": 1.6144999858491819e-06, - "loss": 0.8691, - "num_input_tokens_seen": 101847355, - "step": 4781 - }, - { - "epoch": 0.575001503036133, - "grad_norm": 1.889045816775095, - "learning_rate": 1.6137356508180227e-06, - "loss": 0.8513, - "num_input_tokens_seen": 101868785, - "step": 4782 - }, - { - "epoch": 0.5751217459267721, - "grad_norm": 2.423801981230503, - "learning_rate": 1.6129713743828593e-06, - "loss": 0.8106, - "num_input_tokens_seen": 101887515, - "step": 4783 - }, - { - "epoch": 0.5752419888174112, - "grad_norm": 1.5807037643188067, - "learning_rate": 1.6122071566596302e-06, - "loss": 0.7551, - "num_input_tokens_seen": 101907510, - "step": 4784 - }, - { - "epoch": 0.5753622317080502, - "grad_norm": 2.4985192169439205, - "learning_rate": 1.6114429977642678e-06, - "loss": 0.8194, - "num_input_tokens_seen": 101921735, - "step": 4785 - }, - { - "epoch": 0.5754824745986894, - "grad_norm": 1.9433843077420119, - "learning_rate": 1.6106788978126926e-06, - "loss": 0.7313, - "num_input_tokens_seen": 101940430, - "step": 4786 - }, - { - "epoch": 0.5756027174893285, - "grad_norm": 4.223511918574018, - "learning_rate": 1.6099148569208196e-06, - "loss": 0.7785, - "num_input_tokens_seen": 101957370, - "step": 4787 - }, - { - "epoch": 0.5757229603799675, - "grad_norm": 1.9386536826596157, - "learning_rate": 1.609150875204553e-06, - "loss": 0.6265, - "num_input_tokens_seen": 101977970, - "step": 4788 - }, - { - "epoch": 0.5758432032706067, - "grad_norm": 2.0811855703967903, - "learning_rate": 1.6083869527797875e-06, - "loss": 0.8564, - "num_input_tokens_seen": 101997060, - "step": 4789 - }, - { - "epoch": 0.5759634461612457, - "grad_norm": 2.4288178422566076, - "learning_rate": 1.6076230897624098e-06, - "loss": 0.7425, - "num_input_tokens_seen": 102018985, - "step": 4790 - }, - { - "epoch": 0.5760836890518848, - "grad_norm": 3.5922269408610146, - "learning_rate": 1.6068592862682974e-06, - "loss": 0.7761, - "num_input_tokens_seen": 102036860, - "step": 4791 - }, - { - "epoch": 0.576203931942524, - "grad_norm": 2.5200497048020476, - "learning_rate": 1.6060955424133191e-06, - "loss": 0.7395, - "num_input_tokens_seen": 102057505, - "step": 4792 - }, - { - "epoch": 0.576324174833163, - "grad_norm": 2.1441582970490916, - "learning_rate": 1.6053318583133336e-06, - "loss": 0.8984, - "num_input_tokens_seen": 102078095, - "step": 4793 - }, - { - "epoch": 0.5764444177238021, - "grad_norm": 3.0081400330823747, - "learning_rate": 1.6045682340841907e-06, - "loss": 0.7522, - "num_input_tokens_seen": 102096740, - "step": 4794 - }, - { - "epoch": 0.5765646606144411, - "grad_norm": 0.8042994637477046, - "learning_rate": 1.6038046698417332e-06, - "loss": 0.6163, - "num_input_tokens_seen": 102157355, - "step": 4795 - }, - { - "epoch": 0.5766849035050803, - "grad_norm": 2.4414848786422176, - "learning_rate": 1.6030411657017919e-06, - "loss": 0.6901, - "num_input_tokens_seen": 102176730, - "step": 4796 - }, - { - "epoch": 0.5768051463957193, - "grad_norm": 2.47665713085787, - "learning_rate": 1.6022777217801907e-06, - "loss": 0.8422, - "num_input_tokens_seen": 102193405, - "step": 4797 - }, - { - "epoch": 0.5769253892863584, - "grad_norm": 2.0361900406680213, - "learning_rate": 1.601514338192742e-06, - "loss": 0.7259, - "num_input_tokens_seen": 102213055, - "step": 4798 - }, - { - "epoch": 0.5770456321769976, - "grad_norm": 3.437548489150342, - "learning_rate": 1.6007510150552518e-06, - "loss": 0.7085, - "num_input_tokens_seen": 102230835, - "step": 4799 - }, - { - "epoch": 0.5771658750676366, - "grad_norm": 2.16951101487682, - "learning_rate": 1.5999877524835154e-06, - "loss": 0.6228, - "num_input_tokens_seen": 102255000, - "step": 4800 - }, - { - "epoch": 0.5772861179582757, - "grad_norm": 3.2188623855678995, - "learning_rate": 1.5992245505933188e-06, - "loss": 0.6706, - "num_input_tokens_seen": 102274420, - "step": 4801 - }, - { - "epoch": 0.5774063608489148, - "grad_norm": 2.77323124188225, - "learning_rate": 1.5984614095004382e-06, - "loss": 0.7033, - "num_input_tokens_seen": 102295275, - "step": 4802 - }, - { - "epoch": 0.5775266037395539, - "grad_norm": 2.4737891455268706, - "learning_rate": 1.5976983293206438e-06, - "loss": 0.8092, - "num_input_tokens_seen": 102310800, - "step": 4803 - }, - { - "epoch": 0.577646846630193, - "grad_norm": 1.7917683031291272, - "learning_rate": 1.5969353101696928e-06, - "loss": 0.7097, - "num_input_tokens_seen": 102328960, - "step": 4804 - }, - { - "epoch": 0.5777670895208321, - "grad_norm": 1.7597343711219515, - "learning_rate": 1.5961723521633346e-06, - "loss": 0.7942, - "num_input_tokens_seen": 102349920, - "step": 4805 - }, - { - "epoch": 0.5778873324114712, - "grad_norm": 4.2148472966845825, - "learning_rate": 1.595409455417309e-06, - "loss": 0.9018, - "num_input_tokens_seen": 102367630, - "step": 4806 - }, - { - "epoch": 0.5780075753021102, - "grad_norm": 2.709987246892763, - "learning_rate": 1.5946466200473482e-06, - "loss": 0.7905, - "num_input_tokens_seen": 102385260, - "step": 4807 - }, - { - "epoch": 0.5781278181927494, - "grad_norm": 2.1272388902853074, - "learning_rate": 1.5938838461691723e-06, - "loss": 0.8303, - "num_input_tokens_seen": 102401890, - "step": 4808 - }, - { - "epoch": 0.5782480610833884, - "grad_norm": 3.6465495443497047, - "learning_rate": 1.5931211338984944e-06, - "loss": 0.8312, - "num_input_tokens_seen": 102418815, - "step": 4809 - }, - { - "epoch": 0.5783683039740275, - "grad_norm": 2.311343798011985, - "learning_rate": 1.592358483351016e-06, - "loss": 0.7821, - "num_input_tokens_seen": 102438710, - "step": 4810 - }, - { - "epoch": 0.5784885468646667, - "grad_norm": 2.1380002062817223, - "learning_rate": 1.5915958946424326e-06, - "loss": 0.7167, - "num_input_tokens_seen": 102457115, - "step": 4811 - }, - { - "epoch": 0.5786087897553057, - "grad_norm": 2.5982033638435826, - "learning_rate": 1.5908333678884271e-06, - "loss": 0.7377, - "num_input_tokens_seen": 102483255, - "step": 4812 - }, - { - "epoch": 0.5787290326459448, - "grad_norm": 2.3516452356233244, - "learning_rate": 1.5900709032046743e-06, - "loss": 0.7368, - "num_input_tokens_seen": 102501050, - "step": 4813 - }, - { - "epoch": 0.5788492755365839, - "grad_norm": 2.2369910595999243, - "learning_rate": 1.5893085007068391e-06, - "loss": 0.7791, - "num_input_tokens_seen": 102518330, - "step": 4814 - }, - { - "epoch": 0.578969518427223, - "grad_norm": 1.8778943552202543, - "learning_rate": 1.5885461605105786e-06, - "loss": 0.7048, - "num_input_tokens_seen": 102539650, - "step": 4815 - }, - { - "epoch": 0.579089761317862, - "grad_norm": 2.1204666426980716, - "learning_rate": 1.587783882731538e-06, - "loss": 0.767, - "num_input_tokens_seen": 102557915, - "step": 4816 - }, - { - "epoch": 0.5792100042085012, - "grad_norm": 11.031681896377048, - "learning_rate": 1.587021667485355e-06, - "loss": 0.6919, - "num_input_tokens_seen": 102577005, - "step": 4817 - }, - { - "epoch": 0.5793302470991403, - "grad_norm": 2.2090396212456915, - "learning_rate": 1.5862595148876554e-06, - "loss": 0.7774, - "num_input_tokens_seen": 102596830, - "step": 4818 - }, - { - "epoch": 0.5794504899897793, - "grad_norm": 3.1309532003069016, - "learning_rate": 1.5854974250540595e-06, - "loss": 0.7587, - "num_input_tokens_seen": 102611295, - "step": 4819 - }, - { - "epoch": 0.5795707328804185, - "grad_norm": 2.8950045941247535, - "learning_rate": 1.5847353981001747e-06, - "loss": 0.7547, - "num_input_tokens_seen": 102628195, - "step": 4820 - }, - { - "epoch": 0.5796909757710575, - "grad_norm": 1.6182434235722363, - "learning_rate": 1.5839734341415997e-06, - "loss": 0.6862, - "num_input_tokens_seen": 102650115, - "step": 4821 - }, - { - "epoch": 0.5798112186616966, - "grad_norm": 1.9753134092030766, - "learning_rate": 1.5832115332939238e-06, - "loss": 0.7618, - "num_input_tokens_seen": 102668275, - "step": 4822 - }, - { - "epoch": 0.5799314615523358, - "grad_norm": 3.5992823794245634, - "learning_rate": 1.5824496956727272e-06, - "loss": 0.7521, - "num_input_tokens_seen": 102685200, - "step": 4823 - }, - { - "epoch": 0.5800517044429748, - "grad_norm": 2.419276183619453, - "learning_rate": 1.5816879213935797e-06, - "loss": 0.721, - "num_input_tokens_seen": 102703730, - "step": 4824 - }, - { - "epoch": 0.5801719473336139, - "grad_norm": 2.1693232174276345, - "learning_rate": 1.580926210572042e-06, - "loss": 0.7846, - "num_input_tokens_seen": 102724490, - "step": 4825 - }, - { - "epoch": 0.580292190224253, - "grad_norm": 2.0034353019539655, - "learning_rate": 1.580164563323664e-06, - "loss": 0.7903, - "num_input_tokens_seen": 102745195, - "step": 4826 - }, - { - "epoch": 0.5804124331148921, - "grad_norm": 2.3195682505585333, - "learning_rate": 1.579402979763989e-06, - "loss": 0.7696, - "num_input_tokens_seen": 102765250, - "step": 4827 - }, - { - "epoch": 0.5805326760055312, - "grad_norm": 2.430777259244859, - "learning_rate": 1.578641460008548e-06, - "loss": 0.8031, - "num_input_tokens_seen": 102782705, - "step": 4828 - }, - { - "epoch": 0.5806529188961702, - "grad_norm": 2.2615825411291492, - "learning_rate": 1.5778800041728617e-06, - "loss": 0.6688, - "num_input_tokens_seen": 102798715, - "step": 4829 - }, - { - "epoch": 0.5807731617868094, - "grad_norm": 1.7188285956099998, - "learning_rate": 1.5771186123724426e-06, - "loss": 0.6585, - "num_input_tokens_seen": 102820275, - "step": 4830 - }, - { - "epoch": 0.5808934046774484, - "grad_norm": 1.9729082198831696, - "learning_rate": 1.5763572847227943e-06, - "loss": 0.706, - "num_input_tokens_seen": 102840880, - "step": 4831 - }, - { - "epoch": 0.5810136475680875, - "grad_norm": 2.3762129676769703, - "learning_rate": 1.5755960213394096e-06, - "loss": 0.804, - "num_input_tokens_seen": 102857700, - "step": 4832 - }, - { - "epoch": 0.5811338904587267, - "grad_norm": 2.3037636435955355, - "learning_rate": 1.5748348223377707e-06, - "loss": 0.7789, - "num_input_tokens_seen": 102874975, - "step": 4833 - }, - { - "epoch": 0.5812541333493657, - "grad_norm": 1.829828029407949, - "learning_rate": 1.5740736878333507e-06, - "loss": 0.7773, - "num_input_tokens_seen": 102892535, - "step": 4834 - }, - { - "epoch": 0.5813743762400048, - "grad_norm": 2.6787328061838296, - "learning_rate": 1.5733126179416143e-06, - "loss": 0.7758, - "num_input_tokens_seen": 102906740, - "step": 4835 - }, - { - "epoch": 0.5814946191306439, - "grad_norm": 2.4523824946009314, - "learning_rate": 1.5725516127780144e-06, - "loss": 0.7191, - "num_input_tokens_seen": 102928595, - "step": 4836 - }, - { - "epoch": 0.581614862021283, - "grad_norm": 3.0008383040067512, - "learning_rate": 1.5717906724579947e-06, - "loss": 0.8835, - "num_input_tokens_seen": 102945375, - "step": 4837 - }, - { - "epoch": 0.581735104911922, - "grad_norm": 2.4488302291988027, - "learning_rate": 1.571029797096989e-06, - "loss": 0.6769, - "num_input_tokens_seen": 102966200, - "step": 4838 - }, - { - "epoch": 0.5818553478025612, - "grad_norm": 1.8453097831632193, - "learning_rate": 1.5702689868104227e-06, - "loss": 0.7837, - "num_input_tokens_seen": 102985815, - "step": 4839 - }, - { - "epoch": 0.5819755906932003, - "grad_norm": 2.2388402217495105, - "learning_rate": 1.5695082417137096e-06, - "loss": 0.7473, - "num_input_tokens_seen": 103003410, - "step": 4840 - }, - { - "epoch": 0.5820958335838393, - "grad_norm": 1.8119756428290785, - "learning_rate": 1.5687475619222539e-06, - "loss": 0.7479, - "num_input_tokens_seen": 103023085, - "step": 4841 - }, - { - "epoch": 0.5822160764744785, - "grad_norm": 3.056226364776706, - "learning_rate": 1.5679869475514496e-06, - "loss": 0.7324, - "num_input_tokens_seen": 103039740, - "step": 4842 - }, - { - "epoch": 0.5823363193651175, - "grad_norm": 2.899919038780509, - "learning_rate": 1.5672263987166825e-06, - "loss": 0.8141, - "num_input_tokens_seen": 103059375, - "step": 4843 - }, - { - "epoch": 0.5824565622557566, - "grad_norm": 2.370603530474994, - "learning_rate": 1.5664659155333263e-06, - "loss": 0.6185, - "num_input_tokens_seen": 103081125, - "step": 4844 - }, - { - "epoch": 0.5825768051463958, - "grad_norm": 2.5721388576486297, - "learning_rate": 1.5657054981167463e-06, - "loss": 0.8808, - "num_input_tokens_seen": 103099740, - "step": 4845 - }, - { - "epoch": 0.5826970480370348, - "grad_norm": 2.03662999121406, - "learning_rate": 1.564945146582296e-06, - "loss": 0.6727, - "num_input_tokens_seen": 103120850, - "step": 4846 - }, - { - "epoch": 0.5828172909276739, - "grad_norm": 2.1717091557638364, - "learning_rate": 1.5641848610453218e-06, - "loss": 0.8256, - "num_input_tokens_seen": 103139230, - "step": 4847 - }, - { - "epoch": 0.582937533818313, - "grad_norm": 2.983228714278711, - "learning_rate": 1.563424641621158e-06, - "loss": 0.855, - "num_input_tokens_seen": 103158130, - "step": 4848 - }, - { - "epoch": 0.5830577767089521, - "grad_norm": 2.296346511754696, - "learning_rate": 1.5626644884251286e-06, - "loss": 0.6972, - "num_input_tokens_seen": 103177370, - "step": 4849 - }, - { - "epoch": 0.5831780195995911, - "grad_norm": 1.816630278458807, - "learning_rate": 1.5619044015725481e-06, - "loss": 0.8778, - "num_input_tokens_seen": 103196780, - "step": 4850 - }, - { - "epoch": 0.5832982624902303, - "grad_norm": 2.5257346420752373, - "learning_rate": 1.5611443811787224e-06, - "loss": 0.8681, - "num_input_tokens_seen": 103210625, - "step": 4851 - }, - { - "epoch": 0.5834185053808694, - "grad_norm": 2.423216048826171, - "learning_rate": 1.560384427358945e-06, - "loss": 0.6901, - "num_input_tokens_seen": 103229890, - "step": 4852 - }, - { - "epoch": 0.5835387482715084, - "grad_norm": 1.8768853969488881, - "learning_rate": 1.5596245402285002e-06, - "loss": 0.7224, - "num_input_tokens_seen": 103253135, - "step": 4853 - }, - { - "epoch": 0.5836589911621476, - "grad_norm": 2.1376558427561334, - "learning_rate": 1.5588647199026619e-06, - "loss": 0.814, - "num_input_tokens_seen": 103270590, - "step": 4854 - }, - { - "epoch": 0.5837792340527866, - "grad_norm": 2.2887586113960556, - "learning_rate": 1.5581049664966956e-06, - "loss": 0.8773, - "num_input_tokens_seen": 103288070, - "step": 4855 - }, - { - "epoch": 0.5838994769434257, - "grad_norm": 3.6478355470861517, - "learning_rate": 1.5573452801258545e-06, - "loss": 0.6902, - "num_input_tokens_seen": 103334960, - "step": 4856 - }, - { - "epoch": 0.5840197198340649, - "grad_norm": 4.4360439224691985, - "learning_rate": 1.5565856609053824e-06, - "loss": 0.632, - "num_input_tokens_seen": 103353475, - "step": 4857 - }, - { - "epoch": 0.5841399627247039, - "grad_norm": 1.9122823239521842, - "learning_rate": 1.5558261089505127e-06, - "loss": 0.7905, - "num_input_tokens_seen": 103371925, - "step": 4858 - }, - { - "epoch": 0.584260205615343, - "grad_norm": 2.240434628717895, - "learning_rate": 1.5550666243764697e-06, - "loss": 0.7901, - "num_input_tokens_seen": 103389805, - "step": 4859 - }, - { - "epoch": 0.584380448505982, - "grad_norm": 2.5336226641015713, - "learning_rate": 1.5543072072984655e-06, - "loss": 0.7636, - "num_input_tokens_seen": 103407785, - "step": 4860 - }, - { - "epoch": 0.5845006913966212, - "grad_norm": 1.9300210680071765, - "learning_rate": 1.553547857831704e-06, - "loss": 0.7883, - "num_input_tokens_seen": 103424015, - "step": 4861 - }, - { - "epoch": 0.5846209342872603, - "grad_norm": 1.0540954534540454, - "learning_rate": 1.5527885760913767e-06, - "loss": 0.7196, - "num_input_tokens_seen": 103473625, - "step": 4862 - }, - { - "epoch": 0.5847411771778993, - "grad_norm": 2.22492688902478, - "learning_rate": 1.5520293621926675e-06, - "loss": 0.7555, - "num_input_tokens_seen": 103492605, - "step": 4863 - }, - { - "epoch": 0.5848614200685385, - "grad_norm": 2.336872192051636, - "learning_rate": 1.5512702162507478e-06, - "loss": 0.7201, - "num_input_tokens_seen": 103512640, - "step": 4864 - }, - { - "epoch": 0.5849816629591775, - "grad_norm": 1.1429192558315644, - "learning_rate": 1.5505111383807796e-06, - "loss": 0.5699, - "num_input_tokens_seen": 103575030, - "step": 4865 - }, - { - "epoch": 0.5851019058498166, - "grad_norm": 1.7594670781130073, - "learning_rate": 1.5497521286979138e-06, - "loss": 0.7984, - "num_input_tokens_seen": 103594990, - "step": 4866 - }, - { - "epoch": 0.5852221487404557, - "grad_norm": 2.8975044302600956, - "learning_rate": 1.5489931873172927e-06, - "loss": 0.7423, - "num_input_tokens_seen": 103616030, - "step": 4867 - }, - { - "epoch": 0.5853423916310948, - "grad_norm": 3.5836437823057663, - "learning_rate": 1.5482343143540467e-06, - "loss": 0.7891, - "num_input_tokens_seen": 103637015, - "step": 4868 - }, - { - "epoch": 0.5854626345217339, - "grad_norm": 2.0902790807598213, - "learning_rate": 1.5474755099232956e-06, - "loss": 0.8298, - "num_input_tokens_seen": 103653775, - "step": 4869 - }, - { - "epoch": 0.585582877412373, - "grad_norm": 0.7667064686663994, - "learning_rate": 1.546716774140149e-06, - "loss": 0.5902, - "num_input_tokens_seen": 103714975, - "step": 4870 - }, - { - "epoch": 0.5857031203030121, - "grad_norm": 2.947524138475556, - "learning_rate": 1.5459581071197083e-06, - "loss": 0.7067, - "num_input_tokens_seen": 103730355, - "step": 4871 - }, - { - "epoch": 0.5858233631936511, - "grad_norm": 5.5961687268706, - "learning_rate": 1.5451995089770624e-06, - "loss": 0.8211, - "num_input_tokens_seen": 103749860, - "step": 4872 - }, - { - "epoch": 0.5859436060842903, - "grad_norm": 1.672344560749858, - "learning_rate": 1.544440979827289e-06, - "loss": 0.7125, - "num_input_tokens_seen": 103773670, - "step": 4873 - }, - { - "epoch": 0.5860638489749294, - "grad_norm": 2.1147885425560426, - "learning_rate": 1.5436825197854555e-06, - "loss": 0.8, - "num_input_tokens_seen": 103791870, - "step": 4874 - }, - { - "epoch": 0.5861840918655684, - "grad_norm": 3.3177634559123317, - "learning_rate": 1.5429241289666219e-06, - "loss": 0.795, - "num_input_tokens_seen": 103809090, - "step": 4875 - }, - { - "epoch": 0.5863043347562076, - "grad_norm": 3.3844530342066674, - "learning_rate": 1.5421658074858346e-06, - "loss": 0.694, - "num_input_tokens_seen": 103826915, - "step": 4876 - }, - { - "epoch": 0.5864245776468466, - "grad_norm": 2.739114405757076, - "learning_rate": 1.5414075554581302e-06, - "loss": 0.6577, - "num_input_tokens_seen": 103844680, - "step": 4877 - }, - { - "epoch": 0.5865448205374857, - "grad_norm": 2.3510347997538448, - "learning_rate": 1.5406493729985348e-06, - "loss": 0.7743, - "num_input_tokens_seen": 103863595, - "step": 4878 - }, - { - "epoch": 0.5866650634281249, - "grad_norm": 5.748427421319987, - "learning_rate": 1.5398912602220644e-06, - "loss": 0.7175, - "num_input_tokens_seen": 103882590, - "step": 4879 - }, - { - "epoch": 0.5867853063187639, - "grad_norm": 2.9413356827540103, - "learning_rate": 1.5391332172437243e-06, - "loss": 0.7797, - "num_input_tokens_seen": 103899330, - "step": 4880 - }, - { - "epoch": 0.586905549209403, - "grad_norm": 5.00974686559913, - "learning_rate": 1.5383752441785085e-06, - "loss": 0.7478, - "num_input_tokens_seen": 103918275, - "step": 4881 - }, - { - "epoch": 0.5870257921000421, - "grad_norm": 3.0010134130858557, - "learning_rate": 1.5376173411414003e-06, - "loss": 0.854, - "num_input_tokens_seen": 103936035, - "step": 4882 - }, - { - "epoch": 0.5871460349906812, - "grad_norm": 2.00597889705922, - "learning_rate": 1.5368595082473748e-06, - "loss": 0.7793, - "num_input_tokens_seen": 103954055, - "step": 4883 - }, - { - "epoch": 0.5872662778813202, - "grad_norm": 1.8146919527807026, - "learning_rate": 1.5361017456113935e-06, - "loss": 0.774, - "num_input_tokens_seen": 103974125, - "step": 4884 - }, - { - "epoch": 0.5873865207719594, - "grad_norm": 2.538178161739637, - "learning_rate": 1.5353440533484085e-06, - "loss": 0.8532, - "num_input_tokens_seen": 103992700, - "step": 4885 - }, - { - "epoch": 0.5875067636625985, - "grad_norm": 2.146481903250869, - "learning_rate": 1.534586431573361e-06, - "loss": 0.653, - "num_input_tokens_seen": 104017360, - "step": 4886 - }, - { - "epoch": 0.5876270065532375, - "grad_norm": 2.520778897230332, - "learning_rate": 1.533828880401182e-06, - "loss": 0.7733, - "num_input_tokens_seen": 104036580, - "step": 4887 - }, - { - "epoch": 0.5877472494438767, - "grad_norm": 2.6093016659075845, - "learning_rate": 1.5330713999467915e-06, - "loss": 0.7124, - "num_input_tokens_seen": 104055045, - "step": 4888 - }, - { - "epoch": 0.5878674923345157, - "grad_norm": 2.083236178913225, - "learning_rate": 1.532313990325098e-06, - "loss": 0.5748, - "num_input_tokens_seen": 104075370, - "step": 4889 - }, - { - "epoch": 0.5879877352251548, - "grad_norm": 1.928090608522909, - "learning_rate": 1.5315566516509997e-06, - "loss": 0.7606, - "num_input_tokens_seen": 104093260, - "step": 4890 - }, - { - "epoch": 0.5881079781157939, - "grad_norm": 2.1035431120775727, - "learning_rate": 1.5307993840393857e-06, - "loss": 0.6745, - "num_input_tokens_seen": 104111060, - "step": 4891 - }, - { - "epoch": 0.588228221006433, - "grad_norm": 2.2642621463515162, - "learning_rate": 1.530042187605132e-06, - "loss": 0.8052, - "num_input_tokens_seen": 104130035, - "step": 4892 - }, - { - "epoch": 0.5883484638970721, - "grad_norm": 1.7798274793622242, - "learning_rate": 1.5292850624631046e-06, - "loss": 0.8342, - "num_input_tokens_seen": 104151950, - "step": 4893 - }, - { - "epoch": 0.5884687067877111, - "grad_norm": 2.5993270618500466, - "learning_rate": 1.5285280087281589e-06, - "loss": 0.7857, - "num_input_tokens_seen": 104172400, - "step": 4894 - }, - { - "epoch": 0.5885889496783503, - "grad_norm": 0.7173286231951608, - "learning_rate": 1.5277710265151398e-06, - "loss": 0.5678, - "num_input_tokens_seen": 104241600, - "step": 4895 - }, - { - "epoch": 0.5887091925689893, - "grad_norm": 2.5862250478235302, - "learning_rate": 1.5270141159388803e-06, - "loss": 0.7687, - "num_input_tokens_seen": 104258340, - "step": 4896 - }, - { - "epoch": 0.5888294354596284, - "grad_norm": 2.2463954899326204, - "learning_rate": 1.5262572771142036e-06, - "loss": 0.792, - "num_input_tokens_seen": 104279135, - "step": 4897 - }, - { - "epoch": 0.5889496783502676, - "grad_norm": 1.9731000110447356, - "learning_rate": 1.5255005101559201e-06, - "loss": 0.8031, - "num_input_tokens_seen": 104296465, - "step": 4898 - }, - { - "epoch": 0.5890699212409066, - "grad_norm": 2.0015844887810768, - "learning_rate": 1.524743815178833e-06, - "loss": 0.7649, - "num_input_tokens_seen": 104314145, - "step": 4899 - }, - { - "epoch": 0.5891901641315457, - "grad_norm": 1.9543064685316924, - "learning_rate": 1.5239871922977315e-06, - "loss": 0.8049, - "num_input_tokens_seen": 104333780, - "step": 4900 - }, - { - "epoch": 0.5893104070221848, - "grad_norm": 2.4083839217209677, - "learning_rate": 1.523230641627394e-06, - "loss": 0.8912, - "num_input_tokens_seen": 104352485, - "step": 4901 - }, - { - "epoch": 0.5894306499128239, - "grad_norm": 3.824197071551223, - "learning_rate": 1.5224741632825888e-06, - "loss": 0.7293, - "num_input_tokens_seen": 104372395, - "step": 4902 - }, - { - "epoch": 0.589550892803463, - "grad_norm": 1.654199761882125, - "learning_rate": 1.521717757378074e-06, - "loss": 0.6843, - "num_input_tokens_seen": 104392660, - "step": 4903 - }, - { - "epoch": 0.5896711356941021, - "grad_norm": 2.722285510590189, - "learning_rate": 1.520961424028595e-06, - "loss": 0.6847, - "num_input_tokens_seen": 104410035, - "step": 4904 - }, - { - "epoch": 0.5897913785847412, - "grad_norm": 2.2078585100402215, - "learning_rate": 1.520205163348887e-06, - "loss": 0.8481, - "num_input_tokens_seen": 104427690, - "step": 4905 - }, - { - "epoch": 0.5899116214753802, - "grad_norm": 0.783418064196027, - "learning_rate": 1.5194489754536735e-06, - "loss": 0.5761, - "num_input_tokens_seen": 104482510, - "step": 4906 - }, - { - "epoch": 0.5900318643660194, - "grad_norm": 2.8402461955712437, - "learning_rate": 1.5186928604576692e-06, - "loss": 0.755, - "num_input_tokens_seen": 104499425, - "step": 4907 - }, - { - "epoch": 0.5901521072566585, - "grad_norm": 2.5059622064635487, - "learning_rate": 1.5179368184755752e-06, - "loss": 0.7715, - "num_input_tokens_seen": 104517230, - "step": 4908 - }, - { - "epoch": 0.5902723501472975, - "grad_norm": 1.7122431194773249, - "learning_rate": 1.5171808496220825e-06, - "loss": 0.8233, - "num_input_tokens_seen": 104535705, - "step": 4909 - }, - { - "epoch": 0.5903925930379367, - "grad_norm": 1.72795648900781, - "learning_rate": 1.5164249540118708e-06, - "loss": 0.8092, - "num_input_tokens_seen": 104554550, - "step": 4910 - }, - { - "epoch": 0.5905128359285757, - "grad_norm": 1.871686079541397, - "learning_rate": 1.5156691317596093e-06, - "loss": 0.8253, - "num_input_tokens_seen": 104575695, - "step": 4911 - }, - { - "epoch": 0.5906330788192148, - "grad_norm": 2.5585508312082736, - "learning_rate": 1.5149133829799556e-06, - "loss": 0.6701, - "num_input_tokens_seen": 104593410, - "step": 4912 - }, - { - "epoch": 0.590753321709854, - "grad_norm": 5.096875372963243, - "learning_rate": 1.514157707787556e-06, - "loss": 0.7938, - "num_input_tokens_seen": 104610455, - "step": 4913 - }, - { - "epoch": 0.590873564600493, - "grad_norm": 2.296656236793456, - "learning_rate": 1.5134021062970447e-06, - "loss": 0.7194, - "num_input_tokens_seen": 104628555, - "step": 4914 - }, - { - "epoch": 0.5909938074911321, - "grad_norm": 1.9009599285599679, - "learning_rate": 1.5126465786230488e-06, - "loss": 0.8038, - "num_input_tokens_seen": 104645050, - "step": 4915 - }, - { - "epoch": 0.5911140503817712, - "grad_norm": 3.185264952966568, - "learning_rate": 1.5118911248801787e-06, - "loss": 0.8085, - "num_input_tokens_seen": 104662780, - "step": 4916 - }, - { - "epoch": 0.5912342932724103, - "grad_norm": 4.3621336470149465, - "learning_rate": 1.5111357451830368e-06, - "loss": 0.7928, - "num_input_tokens_seen": 104681195, - "step": 4917 - }, - { - "epoch": 0.5913545361630493, - "grad_norm": 2.8599092328203297, - "learning_rate": 1.5103804396462127e-06, - "loss": 0.7122, - "num_input_tokens_seen": 104700850, - "step": 4918 - }, - { - "epoch": 0.5914747790536885, - "grad_norm": 1.9355973174636198, - "learning_rate": 1.5096252083842877e-06, - "loss": 0.7974, - "num_input_tokens_seen": 104719780, - "step": 4919 - }, - { - "epoch": 0.5915950219443276, - "grad_norm": 3.9243221278078355, - "learning_rate": 1.508870051511829e-06, - "loss": 0.8318, - "num_input_tokens_seen": 104738820, - "step": 4920 - }, - { - "epoch": 0.5917152648349666, - "grad_norm": 2.0825569427567308, - "learning_rate": 1.5081149691433923e-06, - "loss": 0.6637, - "num_input_tokens_seen": 104758525, - "step": 4921 - }, - { - "epoch": 0.5918355077256057, - "grad_norm": 1.564880547265013, - "learning_rate": 1.5073599613935238e-06, - "loss": 0.7718, - "num_input_tokens_seen": 104780365, - "step": 4922 - }, - { - "epoch": 0.5919557506162448, - "grad_norm": 1.833275328046626, - "learning_rate": 1.5066050283767574e-06, - "loss": 0.5751, - "num_input_tokens_seen": 104800765, - "step": 4923 - }, - { - "epoch": 0.5920759935068839, - "grad_norm": 2.296490476919945, - "learning_rate": 1.5058501702076166e-06, - "loss": 0.831, - "num_input_tokens_seen": 104817350, - "step": 4924 - }, - { - "epoch": 0.592196236397523, - "grad_norm": 2.5717329395143396, - "learning_rate": 1.5050953870006112e-06, - "loss": 0.7747, - "num_input_tokens_seen": 104839370, - "step": 4925 - }, - { - "epoch": 0.5923164792881621, - "grad_norm": 3.265502379326082, - "learning_rate": 1.504340678870242e-06, - "loss": 0.7431, - "num_input_tokens_seen": 104857305, - "step": 4926 - }, - { - "epoch": 0.5924367221788012, - "grad_norm": 2.1611825172239927, - "learning_rate": 1.5035860459309985e-06, - "loss": 0.8906, - "num_input_tokens_seen": 104874740, - "step": 4927 - }, - { - "epoch": 0.5925569650694402, - "grad_norm": 2.291722737396256, - "learning_rate": 1.5028314882973568e-06, - "loss": 0.629, - "num_input_tokens_seen": 104894865, - "step": 4928 - }, - { - "epoch": 0.5926772079600794, - "grad_norm": 2.0991735159093627, - "learning_rate": 1.502077006083783e-06, - "loss": 0.8422, - "num_input_tokens_seen": 104913245, - "step": 4929 - }, - { - "epoch": 0.5927974508507184, - "grad_norm": 2.173472667579059, - "learning_rate": 1.5013225994047315e-06, - "loss": 0.7719, - "num_input_tokens_seen": 104930595, - "step": 4930 - }, - { - "epoch": 0.5929176937413575, - "grad_norm": 2.234205711893548, - "learning_rate": 1.5005682683746452e-06, - "loss": 0.8013, - "num_input_tokens_seen": 104948830, - "step": 4931 - }, - { - "epoch": 0.5930379366319967, - "grad_norm": 2.1343571324635477, - "learning_rate": 1.4998140131079555e-06, - "loss": 0.7213, - "num_input_tokens_seen": 104964640, - "step": 4932 - }, - { - "epoch": 0.5931581795226357, - "grad_norm": 2.995082430092919, - "learning_rate": 1.4990598337190825e-06, - "loss": 0.7346, - "num_input_tokens_seen": 104980715, - "step": 4933 - }, - { - "epoch": 0.5932784224132748, - "grad_norm": 2.262631210577442, - "learning_rate": 1.4983057303224336e-06, - "loss": 0.6737, - "num_input_tokens_seen": 105000250, - "step": 4934 - }, - { - "epoch": 0.5933986653039139, - "grad_norm": 1.787158088418598, - "learning_rate": 1.4975517030324072e-06, - "loss": 0.8608, - "num_input_tokens_seen": 105017980, - "step": 4935 - }, - { - "epoch": 0.593518908194553, - "grad_norm": 0.8413624732657579, - "learning_rate": 1.4967977519633882e-06, - "loss": 0.6514, - "num_input_tokens_seen": 105075160, - "step": 4936 - }, - { - "epoch": 0.593639151085192, - "grad_norm": 2.3749357686263144, - "learning_rate": 1.4960438772297498e-06, - "loss": 0.7866, - "num_input_tokens_seen": 105091925, - "step": 4937 - }, - { - "epoch": 0.5937593939758312, - "grad_norm": 2.4666011378311086, - "learning_rate": 1.4952900789458545e-06, - "loss": 0.7338, - "num_input_tokens_seen": 105111410, - "step": 4938 - }, - { - "epoch": 0.5938796368664703, - "grad_norm": 2.010896699673101, - "learning_rate": 1.4945363572260529e-06, - "loss": 0.7343, - "num_input_tokens_seen": 105132125, - "step": 4939 - }, - { - "epoch": 0.5939998797571093, - "grad_norm": 2.5062262267315596, - "learning_rate": 1.4937827121846845e-06, - "loss": 0.6732, - "num_input_tokens_seen": 105152100, - "step": 4940 - }, - { - "epoch": 0.5941201226477485, - "grad_norm": 1.9252044893689988, - "learning_rate": 1.4930291439360759e-06, - "loss": 0.7357, - "num_input_tokens_seen": 105174385, - "step": 4941 - }, - { - "epoch": 0.5942403655383875, - "grad_norm": 2.104554654000872, - "learning_rate": 1.492275652594542e-06, - "loss": 0.7944, - "num_input_tokens_seen": 105193415, - "step": 4942 - }, - { - "epoch": 0.5943606084290266, - "grad_norm": 0.791358672645102, - "learning_rate": 1.4915222382743894e-06, - "loss": 0.6184, - "num_input_tokens_seen": 105251970, - "step": 4943 - }, - { - "epoch": 0.5944808513196658, - "grad_norm": 3.019251210229282, - "learning_rate": 1.4907689010899085e-06, - "loss": 0.7233, - "num_input_tokens_seen": 105269270, - "step": 4944 - }, - { - "epoch": 0.5946010942103048, - "grad_norm": 2.1853988812922385, - "learning_rate": 1.4900156411553804e-06, - "loss": 0.6249, - "num_input_tokens_seen": 105288820, - "step": 4945 - }, - { - "epoch": 0.5947213371009439, - "grad_norm": 3.5142679560549976, - "learning_rate": 1.4892624585850739e-06, - "loss": 0.8562, - "num_input_tokens_seen": 105306895, - "step": 4946 - }, - { - "epoch": 0.594841579991583, - "grad_norm": 2.085009922060645, - "learning_rate": 1.4885093534932465e-06, - "loss": 0.785, - "num_input_tokens_seen": 105324580, - "step": 4947 - }, - { - "epoch": 0.5949618228822221, - "grad_norm": 2.596691222226965, - "learning_rate": 1.4877563259941438e-06, - "loss": 0.7166, - "num_input_tokens_seen": 105342155, - "step": 4948 - }, - { - "epoch": 0.5950820657728612, - "grad_norm": 2.2077376289137582, - "learning_rate": 1.4870033762019988e-06, - "loss": 0.6753, - "num_input_tokens_seen": 105362040, - "step": 4949 - }, - { - "epoch": 0.5952023086635003, - "grad_norm": 1.7313282846120863, - "learning_rate": 1.4862505042310332e-06, - "loss": 0.7236, - "num_input_tokens_seen": 105381045, - "step": 4950 - }, - { - "epoch": 0.5953225515541394, - "grad_norm": 1.8760905122987144, - "learning_rate": 1.4854977101954585e-06, - "loss": 0.6886, - "num_input_tokens_seen": 105402985, - "step": 4951 - }, - { - "epoch": 0.5954427944447784, - "grad_norm": 2.4088574329530688, - "learning_rate": 1.4847449942094716e-06, - "loss": 0.8514, - "num_input_tokens_seen": 105421585, - "step": 4952 - }, - { - "epoch": 0.5955630373354175, - "grad_norm": 2.1860964710129083, - "learning_rate": 1.4839923563872602e-06, - "loss": 0.8621, - "num_input_tokens_seen": 105439845, - "step": 4953 - }, - { - "epoch": 0.5956832802260567, - "grad_norm": 2.348196619975476, - "learning_rate": 1.483239796842997e-06, - "loss": 0.7483, - "num_input_tokens_seen": 105457595, - "step": 4954 - }, - { - "epoch": 0.5958035231166957, - "grad_norm": 4.222363764859216, - "learning_rate": 1.4824873156908462e-06, - "loss": 0.8312, - "num_input_tokens_seen": 105475240, - "step": 4955 - }, - { - "epoch": 0.5959237660073348, - "grad_norm": 1.8225371821980323, - "learning_rate": 1.4817349130449584e-06, - "loss": 0.7493, - "num_input_tokens_seen": 105494680, - "step": 4956 - }, - { - "epoch": 0.5960440088979739, - "grad_norm": 2.0119491302410757, - "learning_rate": 1.4809825890194722e-06, - "loss": 0.8249, - "num_input_tokens_seen": 105513070, - "step": 4957 - }, - { - "epoch": 0.596164251788613, - "grad_norm": 1.8592741939123307, - "learning_rate": 1.4802303437285139e-06, - "loss": 0.7752, - "num_input_tokens_seen": 105530060, - "step": 4958 - }, - { - "epoch": 0.596284494679252, - "grad_norm": 2.5430385199884515, - "learning_rate": 1.4794781772861998e-06, - "loss": 0.8006, - "num_input_tokens_seen": 105546275, - "step": 4959 - }, - { - "epoch": 0.5964047375698912, - "grad_norm": 2.1087308391482207, - "learning_rate": 1.4787260898066324e-06, - "loss": 0.6654, - "num_input_tokens_seen": 105565995, - "step": 4960 - }, - { - "epoch": 0.5965249804605303, - "grad_norm": 2.6171970854192477, - "learning_rate": 1.4779740814039028e-06, - "loss": 0.8491, - "num_input_tokens_seen": 105585800, - "step": 4961 - }, - { - "epoch": 0.5966452233511693, - "grad_norm": 2.308017448727003, - "learning_rate": 1.477222152192089e-06, - "loss": 0.6778, - "num_input_tokens_seen": 105605545, - "step": 4962 - }, - { - "epoch": 0.5967654662418085, - "grad_norm": 5.042651786862969, - "learning_rate": 1.4764703022852598e-06, - "loss": 0.7328, - "num_input_tokens_seen": 105625785, - "step": 4963 - }, - { - "epoch": 0.5968857091324475, - "grad_norm": 2.0326451157730454, - "learning_rate": 1.4757185317974696e-06, - "loss": 0.7709, - "num_input_tokens_seen": 105643890, - "step": 4964 - }, - { - "epoch": 0.5970059520230866, - "grad_norm": 3.803606333410535, - "learning_rate": 1.4749668408427614e-06, - "loss": 0.7102, - "num_input_tokens_seen": 105663190, - "step": 4965 - }, - { - "epoch": 0.5971261949137258, - "grad_norm": 1.9901004225711854, - "learning_rate": 1.4742152295351655e-06, - "loss": 0.8616, - "num_input_tokens_seen": 105682065, - "step": 4966 - }, - { - "epoch": 0.5972464378043648, - "grad_norm": 4.005787820222646, - "learning_rate": 1.4734636979887016e-06, - "loss": 0.6366, - "num_input_tokens_seen": 105699245, - "step": 4967 - }, - { - "epoch": 0.5973666806950039, - "grad_norm": 2.1013218709649744, - "learning_rate": 1.472712246317376e-06, - "loss": 0.9003, - "num_input_tokens_seen": 105717495, - "step": 4968 - }, - { - "epoch": 0.597486923585643, - "grad_norm": 2.212790811263203, - "learning_rate": 1.4719608746351834e-06, - "loss": 0.6415, - "num_input_tokens_seen": 105736775, - "step": 4969 - }, - { - "epoch": 0.5976071664762821, - "grad_norm": 3.212131886540118, - "learning_rate": 1.4712095830561055e-06, - "loss": 0.6975, - "num_input_tokens_seen": 105754985, - "step": 4970 - }, - { - "epoch": 0.5977274093669211, - "grad_norm": 2.429881239909068, - "learning_rate": 1.4704583716941143e-06, - "loss": 0.8048, - "num_input_tokens_seen": 105773570, - "step": 4971 - }, - { - "epoch": 0.5978476522575603, - "grad_norm": 2.421290157295964, - "learning_rate": 1.4697072406631672e-06, - "loss": 0.7152, - "num_input_tokens_seen": 105793195, - "step": 4972 - }, - { - "epoch": 0.5979678951481994, - "grad_norm": 2.004255516897595, - "learning_rate": 1.4689561900772097e-06, - "loss": 0.7265, - "num_input_tokens_seen": 105812975, - "step": 4973 - }, - { - "epoch": 0.5980881380388384, - "grad_norm": 2.8174139107313603, - "learning_rate": 1.4682052200501758e-06, - "loss": 0.7247, - "num_input_tokens_seen": 105829900, - "step": 4974 - }, - { - "epoch": 0.5982083809294776, - "grad_norm": 1.998967924695995, - "learning_rate": 1.4674543306959876e-06, - "loss": 0.7883, - "num_input_tokens_seen": 105849090, - "step": 4975 - }, - { - "epoch": 0.5983286238201166, - "grad_norm": 3.2864567987040973, - "learning_rate": 1.466703522128554e-06, - "loss": 0.8465, - "num_input_tokens_seen": 105866450, - "step": 4976 - }, - { - "epoch": 0.5984488667107557, - "grad_norm": 1.9857091564113523, - "learning_rate": 1.465952794461772e-06, - "loss": 0.7335, - "num_input_tokens_seen": 105886115, - "step": 4977 - }, - { - "epoch": 0.5985691096013949, - "grad_norm": 2.405782674242532, - "learning_rate": 1.4652021478095255e-06, - "loss": 0.7656, - "num_input_tokens_seen": 105904330, - "step": 4978 - }, - { - "epoch": 0.5986893524920339, - "grad_norm": 2.0556270955512534, - "learning_rate": 1.4644515822856888e-06, - "loss": 0.7522, - "num_input_tokens_seen": 105922485, - "step": 4979 - }, - { - "epoch": 0.598809595382673, - "grad_norm": 0.8015344465263697, - "learning_rate": 1.4637010980041215e-06, - "loss": 0.5871, - "num_input_tokens_seen": 105984315, - "step": 4980 - }, - { - "epoch": 0.5989298382733121, - "grad_norm": 3.114296651820016, - "learning_rate": 1.4629506950786711e-06, - "loss": 0.8972, - "num_input_tokens_seen": 106000215, - "step": 4981 - }, - { - "epoch": 0.5990500811639512, - "grad_norm": 0.8202103382333706, - "learning_rate": 1.4622003736231729e-06, - "loss": 0.5862, - "num_input_tokens_seen": 106058925, - "step": 4982 - }, - { - "epoch": 0.5991703240545903, - "grad_norm": 2.05724219160363, - "learning_rate": 1.461450133751451e-06, - "loss": 0.7968, - "num_input_tokens_seen": 106076715, - "step": 4983 - }, - { - "epoch": 0.5992905669452293, - "grad_norm": 1.9559227176330563, - "learning_rate": 1.4606999755773153e-06, - "loss": 0.7619, - "num_input_tokens_seen": 106097640, - "step": 4984 - }, - { - "epoch": 0.5994108098358685, - "grad_norm": 3.50697128798293, - "learning_rate": 1.4599498992145647e-06, - "loss": 0.8124, - "num_input_tokens_seen": 106117385, - "step": 4985 - }, - { - "epoch": 0.5995310527265075, - "grad_norm": 2.030876906961989, - "learning_rate": 1.459199904776984e-06, - "loss": 0.7078, - "num_input_tokens_seen": 106135960, - "step": 4986 - }, - { - "epoch": 0.5996512956171466, - "grad_norm": 2.1728562897198325, - "learning_rate": 1.4584499923783486e-06, - "loss": 0.7499, - "num_input_tokens_seen": 106154260, - "step": 4987 - }, - { - "epoch": 0.5997715385077858, - "grad_norm": 2.0056817603173416, - "learning_rate": 1.457700162132419e-06, - "loss": 0.7569, - "num_input_tokens_seen": 106170970, - "step": 4988 - }, - { - "epoch": 0.5998917813984248, - "grad_norm": 2.1248362469267583, - "learning_rate": 1.4569504141529433e-06, - "loss": 0.7218, - "num_input_tokens_seen": 106188525, - "step": 4989 - }, - { - "epoch": 0.6000120242890639, - "grad_norm": 3.6119619649063766, - "learning_rate": 1.456200748553658e-06, - "loss": 0.7142, - "num_input_tokens_seen": 106206240, - "step": 4990 - }, - { - "epoch": 0.600132267179703, - "grad_norm": 1.8177384356022526, - "learning_rate": 1.455451165448287e-06, - "loss": 0.7778, - "num_input_tokens_seen": 106228615, - "step": 4991 - }, - { - "epoch": 0.6002525100703421, - "grad_norm": 2.5897948939405757, - "learning_rate": 1.4547016649505414e-06, - "loss": 0.7233, - "num_input_tokens_seen": 106246345, - "step": 4992 - }, - { - "epoch": 0.6003727529609811, - "grad_norm": 12.424633892144515, - "learning_rate": 1.4539522471741193e-06, - "loss": 0.846, - "num_input_tokens_seen": 106263490, - "step": 4993 - }, - { - "epoch": 0.6004929958516203, - "grad_norm": 2.9335995155779537, - "learning_rate": 1.4532029122327063e-06, - "loss": 0.7032, - "num_input_tokens_seen": 106279995, - "step": 4994 - }, - { - "epoch": 0.6006132387422594, - "grad_norm": 3.504969285172157, - "learning_rate": 1.4524536602399779e-06, - "loss": 0.7538, - "num_input_tokens_seen": 106298805, - "step": 4995 - }, - { - "epoch": 0.6007334816328984, - "grad_norm": 3.7458148002615292, - "learning_rate": 1.4517044913095942e-06, - "loss": 0.7607, - "num_input_tokens_seen": 106318945, - "step": 4996 - }, - { - "epoch": 0.6008537245235376, - "grad_norm": 1.9575211756643291, - "learning_rate": 1.4509554055552026e-06, - "loss": 0.8038, - "num_input_tokens_seen": 106338895, - "step": 4997 - }, - { - "epoch": 0.6009739674141766, - "grad_norm": 3.1167229266359033, - "learning_rate": 1.450206403090439e-06, - "loss": 0.839, - "num_input_tokens_seen": 106356810, - "step": 4998 - }, - { - "epoch": 0.6010942103048157, - "grad_norm": 2.2724159543601865, - "learning_rate": 1.4494574840289274e-06, - "loss": 0.8564, - "num_input_tokens_seen": 106373645, - "step": 4999 - }, - { - "epoch": 0.6012144531954549, - "grad_norm": 2.0981074439106813, - "learning_rate": 1.4487086484842782e-06, - "loss": 0.7429, - "num_input_tokens_seen": 106392010, - "step": 5000 - }, - { - "epoch": 0.6013346960860939, - "grad_norm": 2.6937774748522996, - "learning_rate": 1.4479598965700883e-06, - "loss": 0.599, - "num_input_tokens_seen": 106408995, - "step": 5001 - }, - { - "epoch": 0.601454938976733, - "grad_norm": 2.6482098193718477, - "learning_rate": 1.4472112283999427e-06, - "loss": 0.6819, - "num_input_tokens_seen": 106427370, - "step": 5002 - }, - { - "epoch": 0.6015751818673721, - "grad_norm": 3.2228786947589034, - "learning_rate": 1.4464626440874147e-06, - "loss": 0.6956, - "num_input_tokens_seen": 106446205, - "step": 5003 - }, - { - "epoch": 0.6016954247580112, - "grad_norm": 6.177074473220978, - "learning_rate": 1.4457141437460636e-06, - "loss": 0.741, - "num_input_tokens_seen": 106463150, - "step": 5004 - }, - { - "epoch": 0.6018156676486502, - "grad_norm": 2.3888212067419667, - "learning_rate": 1.4449657274894364e-06, - "loss": 0.7246, - "num_input_tokens_seen": 106482315, - "step": 5005 - }, - { - "epoch": 0.6019359105392894, - "grad_norm": 2.070862719620687, - "learning_rate": 1.4442173954310656e-06, - "loss": 0.6252, - "num_input_tokens_seen": 106504575, - "step": 5006 - }, - { - "epoch": 0.6020561534299285, - "grad_norm": 0.8439720855225405, - "learning_rate": 1.4434691476844755e-06, - "loss": 0.5844, - "num_input_tokens_seen": 106565270, - "step": 5007 - }, - { - "epoch": 0.6021763963205675, - "grad_norm": 2.370974446366025, - "learning_rate": 1.4427209843631729e-06, - "loss": 0.6641, - "num_input_tokens_seen": 106582040, - "step": 5008 - }, - { - "epoch": 0.6022966392112067, - "grad_norm": 2.430251315853662, - "learning_rate": 1.4419729055806538e-06, - "loss": 0.8157, - "num_input_tokens_seen": 106601195, - "step": 5009 - }, - { - "epoch": 0.6024168821018457, - "grad_norm": 2.2779119577947817, - "learning_rate": 1.441224911450401e-06, - "loss": 0.8232, - "num_input_tokens_seen": 106616870, - "step": 5010 - }, - { - "epoch": 0.6025371249924848, - "grad_norm": 1.861604255120992, - "learning_rate": 1.4404770020858851e-06, - "loss": 0.8248, - "num_input_tokens_seen": 106636075, - "step": 5011 - }, - { - "epoch": 0.602657367883124, - "grad_norm": 1.7258341523817926, - "learning_rate": 1.4397291776005633e-06, - "loss": 0.857, - "num_input_tokens_seen": 106656290, - "step": 5012 - }, - { - "epoch": 0.602777610773763, - "grad_norm": 2.6731758872586284, - "learning_rate": 1.4389814381078797e-06, - "loss": 0.7226, - "num_input_tokens_seen": 106675250, - "step": 5013 - }, - { - "epoch": 0.6028978536644021, - "grad_norm": 2.364498912842888, - "learning_rate": 1.438233783721265e-06, - "loss": 0.791, - "num_input_tokens_seen": 106691135, - "step": 5014 - }, - { - "epoch": 0.6030180965550412, - "grad_norm": 2.408715271742756, - "learning_rate": 1.4374862145541395e-06, - "loss": 0.7779, - "num_input_tokens_seen": 106707290, - "step": 5015 - }, - { - "epoch": 0.6031383394456803, - "grad_norm": 2.627943053028283, - "learning_rate": 1.4367387307199082e-06, - "loss": 0.7964, - "num_input_tokens_seen": 106723860, - "step": 5016 - }, - { - "epoch": 0.6032585823363193, - "grad_norm": 2.3517121038524085, - "learning_rate": 1.4359913323319632e-06, - "loss": 0.8201, - "num_input_tokens_seen": 106740750, - "step": 5017 - }, - { - "epoch": 0.6033788252269584, - "grad_norm": 2.234584789360631, - "learning_rate": 1.4352440195036847e-06, - "loss": 0.7752, - "num_input_tokens_seen": 106760645, - "step": 5018 - }, - { - "epoch": 0.6034990681175976, - "grad_norm": 2.5540391739281776, - "learning_rate": 1.4344967923484395e-06, - "loss": 0.7974, - "num_input_tokens_seen": 106782335, - "step": 5019 - }, - { - "epoch": 0.6036193110082366, - "grad_norm": 10.972469959501462, - "learning_rate": 1.4337496509795814e-06, - "loss": 0.7177, - "num_input_tokens_seen": 106802040, - "step": 5020 - }, - { - "epoch": 0.6037395538988757, - "grad_norm": 2.6401056483469008, - "learning_rate": 1.433002595510451e-06, - "loss": 0.6853, - "num_input_tokens_seen": 106820540, - "step": 5021 - }, - { - "epoch": 0.6038597967895148, - "grad_norm": 1.7705639102595552, - "learning_rate": 1.4322556260543753e-06, - "loss": 0.7132, - "num_input_tokens_seen": 106836835, - "step": 5022 - }, - { - "epoch": 0.6039800396801539, - "grad_norm": 0.9678063972551598, - "learning_rate": 1.4315087427246703e-06, - "loss": 0.657, - "num_input_tokens_seen": 106890380, - "step": 5023 - }, - { - "epoch": 0.604100282570793, - "grad_norm": 0.9313413350856867, - "learning_rate": 1.4307619456346372e-06, - "loss": 0.6199, - "num_input_tokens_seen": 106934405, - "step": 5024 - }, - { - "epoch": 0.6042205254614321, - "grad_norm": 5.4036484591824765, - "learning_rate": 1.4300152348975645e-06, - "loss": 0.7282, - "num_input_tokens_seen": 106957405, - "step": 5025 - }, - { - "epoch": 0.6043407683520712, - "grad_norm": 1.9999655933491143, - "learning_rate": 1.429268610626727e-06, - "loss": 0.6585, - "num_input_tokens_seen": 106979975, - "step": 5026 - }, - { - "epoch": 0.6044610112427102, - "grad_norm": 1.7010107526145257, - "learning_rate": 1.4285220729353876e-06, - "loss": 0.7604, - "num_input_tokens_seen": 106998235, - "step": 5027 - }, - { - "epoch": 0.6045812541333494, - "grad_norm": 2.500426602665383, - "learning_rate": 1.4277756219367957e-06, - "loss": 0.7773, - "num_input_tokens_seen": 107014980, - "step": 5028 - }, - { - "epoch": 0.6047014970239885, - "grad_norm": 2.8071285986868566, - "learning_rate": 1.4270292577441866e-06, - "loss": 0.7951, - "num_input_tokens_seen": 107034205, - "step": 5029 - }, - { - "epoch": 0.6048217399146275, - "grad_norm": 1.8683207219837716, - "learning_rate": 1.4262829804707831e-06, - "loss": 0.7124, - "num_input_tokens_seen": 107055915, - "step": 5030 - }, - { - "epoch": 0.6049419828052667, - "grad_norm": 1.9613628675111048, - "learning_rate": 1.4255367902297958e-06, - "loss": 0.6903, - "num_input_tokens_seen": 107076965, - "step": 5031 - }, - { - "epoch": 0.6050622256959057, - "grad_norm": 2.415337000943828, - "learning_rate": 1.424790687134421e-06, - "loss": 0.7846, - "num_input_tokens_seen": 107092080, - "step": 5032 - }, - { - "epoch": 0.6051824685865448, - "grad_norm": 2.760119057147909, - "learning_rate": 1.4240446712978415e-06, - "loss": 0.751, - "num_input_tokens_seen": 107110785, - "step": 5033 - }, - { - "epoch": 0.605302711477184, - "grad_norm": 2.191495633778257, - "learning_rate": 1.423298742833227e-06, - "loss": 0.734, - "num_input_tokens_seen": 107129165, - "step": 5034 - }, - { - "epoch": 0.605422954367823, - "grad_norm": 1.9591306095223673, - "learning_rate": 1.4225529018537352e-06, - "loss": 0.7163, - "num_input_tokens_seen": 107144390, - "step": 5035 - }, - { - "epoch": 0.6055431972584621, - "grad_norm": 1.7703099064437682, - "learning_rate": 1.4218071484725086e-06, - "loss": 0.7765, - "num_input_tokens_seen": 107166230, - "step": 5036 - }, - { - "epoch": 0.6056634401491012, - "grad_norm": 2.4377035068953288, - "learning_rate": 1.4210614828026786e-06, - "loss": 0.7551, - "num_input_tokens_seen": 107183800, - "step": 5037 - }, - { - "epoch": 0.6057836830397403, - "grad_norm": 1.8072833674767195, - "learning_rate": 1.4203159049573601e-06, - "loss": 0.7404, - "num_input_tokens_seen": 107204755, - "step": 5038 - }, - { - "epoch": 0.6059039259303793, - "grad_norm": 2.961817696679899, - "learning_rate": 1.4195704150496589e-06, - "loss": 0.8743, - "num_input_tokens_seen": 107222190, - "step": 5039 - }, - { - "epoch": 0.6060241688210185, - "grad_norm": 1.9461971441292651, - "learning_rate": 1.4188250131926643e-06, - "loss": 0.7301, - "num_input_tokens_seen": 107240710, - "step": 5040 - }, - { - "epoch": 0.6061444117116576, - "grad_norm": 2.2240655078578695, - "learning_rate": 1.418079699499453e-06, - "loss": 0.8134, - "num_input_tokens_seen": 107257845, - "step": 5041 - }, - { - "epoch": 0.6062646546022966, - "grad_norm": 3.0376143383356085, - "learning_rate": 1.4173344740830877e-06, - "loss": 0.7142, - "num_input_tokens_seen": 107276695, - "step": 5042 - }, - { - "epoch": 0.6063848974929358, - "grad_norm": 1.99583096715096, - "learning_rate": 1.4165893370566202e-06, - "loss": 0.7001, - "num_input_tokens_seen": 107300170, - "step": 5043 - }, - { - "epoch": 0.6065051403835748, - "grad_norm": 3.0366590795962103, - "learning_rate": 1.4158442885330865e-06, - "loss": 0.768, - "num_input_tokens_seen": 107318460, - "step": 5044 - }, - { - "epoch": 0.6066253832742139, - "grad_norm": 1.9997730250866659, - "learning_rate": 1.4150993286255098e-06, - "loss": 0.7882, - "num_input_tokens_seen": 107337430, - "step": 5045 - }, - { - "epoch": 0.6067456261648531, - "grad_norm": 2.2530438950249616, - "learning_rate": 1.4143544574468993e-06, - "loss": 0.7833, - "num_input_tokens_seen": 107355510, - "step": 5046 - }, - { - "epoch": 0.6068658690554921, - "grad_norm": 1.9389585403274003, - "learning_rate": 1.4136096751102527e-06, - "loss": 0.8134, - "num_input_tokens_seen": 107373560, - "step": 5047 - }, - { - "epoch": 0.6069861119461312, - "grad_norm": 2.5696302055732496, - "learning_rate": 1.4128649817285516e-06, - "loss": 0.8193, - "num_input_tokens_seen": 107391415, - "step": 5048 - }, - { - "epoch": 0.6071063548367702, - "grad_norm": 2.1162437236382994, - "learning_rate": 1.4121203774147663e-06, - "loss": 0.6263, - "num_input_tokens_seen": 107411325, - "step": 5049 - }, - { - "epoch": 0.6072265977274094, - "grad_norm": 1.8490427762859487, - "learning_rate": 1.4113758622818517e-06, - "loss": 0.6976, - "num_input_tokens_seen": 107431110, - "step": 5050 - }, - { - "epoch": 0.6073468406180484, - "grad_norm": 2.505605132691777, - "learning_rate": 1.410631436442751e-06, - "loss": 0.8274, - "num_input_tokens_seen": 107449625, - "step": 5051 - }, - { - "epoch": 0.6074670835086875, - "grad_norm": 2.3471302473028164, - "learning_rate": 1.4098871000103936e-06, - "loss": 0.8537, - "num_input_tokens_seen": 107467945, - "step": 5052 - }, - { - "epoch": 0.6075873263993267, - "grad_norm": 1.8458454664124124, - "learning_rate": 1.4091428530976935e-06, - "loss": 0.8181, - "num_input_tokens_seen": 107487905, - "step": 5053 - }, - { - "epoch": 0.6077075692899657, - "grad_norm": 3.7489760041746147, - "learning_rate": 1.4083986958175524e-06, - "loss": 0.794, - "num_input_tokens_seen": 107504850, - "step": 5054 - }, - { - "epoch": 0.6078278121806048, - "grad_norm": 2.7510365566388324, - "learning_rate": 1.4076546282828593e-06, - "loss": 0.6922, - "num_input_tokens_seen": 107527425, - "step": 5055 - }, - { - "epoch": 0.6079480550712439, - "grad_norm": 2.326156358615473, - "learning_rate": 1.4069106506064878e-06, - "loss": 0.6553, - "num_input_tokens_seen": 107548570, - "step": 5056 - }, - { - "epoch": 0.608068297961883, - "grad_norm": 2.1852317646184027, - "learning_rate": 1.4061667629012993e-06, - "loss": 0.7764, - "num_input_tokens_seen": 107568960, - "step": 5057 - }, - { - "epoch": 0.608188540852522, - "grad_norm": 1.705825279562018, - "learning_rate": 1.40542296528014e-06, - "loss": 0.8298, - "num_input_tokens_seen": 107588340, - "step": 5058 - }, - { - "epoch": 0.6083087837431612, - "grad_norm": 2.8962502974448907, - "learning_rate": 1.4046792578558445e-06, - "loss": 0.7527, - "num_input_tokens_seen": 107605955, - "step": 5059 - }, - { - "epoch": 0.6084290266338003, - "grad_norm": 2.738061258426552, - "learning_rate": 1.4039356407412325e-06, - "loss": 0.7595, - "num_input_tokens_seen": 107618915, - "step": 5060 - }, - { - "epoch": 0.6085492695244393, - "grad_norm": 0.878888277036582, - "learning_rate": 1.40319211404911e-06, - "loss": 0.6189, - "num_input_tokens_seen": 107673635, - "step": 5061 - }, - { - "epoch": 0.6086695124150785, - "grad_norm": 2.7188654767061893, - "learning_rate": 1.4024486778922691e-06, - "loss": 0.8939, - "num_input_tokens_seen": 107691670, - "step": 5062 - }, - { - "epoch": 0.6087897553057176, - "grad_norm": 2.004022488511787, - "learning_rate": 1.4017053323834884e-06, - "loss": 0.7706, - "num_input_tokens_seen": 107711220, - "step": 5063 - }, - { - "epoch": 0.6089099981963566, - "grad_norm": 2.44263165344024, - "learning_rate": 1.4009620776355337e-06, - "loss": 0.7607, - "num_input_tokens_seen": 107732540, - "step": 5064 - }, - { - "epoch": 0.6090302410869958, - "grad_norm": 1.9338179863543867, - "learning_rate": 1.4002189137611553e-06, - "loss": 0.7868, - "num_input_tokens_seen": 107751600, - "step": 5065 - }, - { - "epoch": 0.6091504839776348, - "grad_norm": 2.294318138425732, - "learning_rate": 1.3994758408730897e-06, - "loss": 0.6898, - "num_input_tokens_seen": 107770505, - "step": 5066 - }, - { - "epoch": 0.6092707268682739, - "grad_norm": 2.3712981821250163, - "learning_rate": 1.3987328590840629e-06, - "loss": 0.7629, - "num_input_tokens_seen": 107791170, - "step": 5067 - }, - { - "epoch": 0.609390969758913, - "grad_norm": 2.464814302351344, - "learning_rate": 1.397989968506783e-06, - "loss": 0.8586, - "num_input_tokens_seen": 107809900, - "step": 5068 - }, - { - "epoch": 0.6095112126495521, - "grad_norm": 2.3178203751821562, - "learning_rate": 1.3972471692539462e-06, - "loss": 0.7236, - "num_input_tokens_seen": 107824335, - "step": 5069 - }, - { - "epoch": 0.6096314555401912, - "grad_norm": 2.1312076206288593, - "learning_rate": 1.3965044614382344e-06, - "loss": 0.7493, - "num_input_tokens_seen": 107839505, - "step": 5070 - }, - { - "epoch": 0.6097516984308303, - "grad_norm": 3.1315784753043903, - "learning_rate": 1.3957618451723162e-06, - "loss": 0.7433, - "num_input_tokens_seen": 107855255, - "step": 5071 - }, - { - "epoch": 0.6098719413214694, - "grad_norm": 1.8880179293800088, - "learning_rate": 1.3950193205688457e-06, - "loss": 0.6955, - "num_input_tokens_seen": 107874700, - "step": 5072 - }, - { - "epoch": 0.6099921842121084, - "grad_norm": 2.2981766263854224, - "learning_rate": 1.3942768877404631e-06, - "loss": 0.8294, - "num_input_tokens_seen": 107893385, - "step": 5073 - }, - { - "epoch": 0.6101124271027476, - "grad_norm": 1.808031152046907, - "learning_rate": 1.3935345467997946e-06, - "loss": 0.738, - "num_input_tokens_seen": 107912805, - "step": 5074 - }, - { - "epoch": 0.6102326699933867, - "grad_norm": 1.868917717542659, - "learning_rate": 1.3927922978594536e-06, - "loss": 0.6716, - "num_input_tokens_seen": 107933610, - "step": 5075 - }, - { - "epoch": 0.6103529128840257, - "grad_norm": 0.8637580117701023, - "learning_rate": 1.3920501410320382e-06, - "loss": 0.623, - "num_input_tokens_seen": 107989445, - "step": 5076 - }, - { - "epoch": 0.6104731557746649, - "grad_norm": 2.4600870170206157, - "learning_rate": 1.3913080764301333e-06, - "loss": 0.7565, - "num_input_tokens_seen": 108006125, - "step": 5077 - }, - { - "epoch": 0.6105933986653039, - "grad_norm": 1.9191960858975163, - "learning_rate": 1.3905661041663085e-06, - "loss": 0.7144, - "num_input_tokens_seen": 108027030, - "step": 5078 - }, - { - "epoch": 0.610713641555943, - "grad_norm": 2.7507603481408656, - "learning_rate": 1.389824224353122e-06, - "loss": 0.6434, - "num_input_tokens_seen": 108048340, - "step": 5079 - }, - { - "epoch": 0.610833884446582, - "grad_norm": 1.8112865265708167, - "learning_rate": 1.3890824371031151e-06, - "loss": 0.7621, - "num_input_tokens_seen": 108067330, - "step": 5080 - }, - { - "epoch": 0.6109541273372212, - "grad_norm": 2.0813021630729747, - "learning_rate": 1.3883407425288172e-06, - "loss": 0.778, - "num_input_tokens_seen": 108087385, - "step": 5081 - }, - { - "epoch": 0.6110743702278603, - "grad_norm": 2.3960643168273226, - "learning_rate": 1.3875991407427413e-06, - "loss": 0.7855, - "num_input_tokens_seen": 108105330, - "step": 5082 - }, - { - "epoch": 0.6111946131184993, - "grad_norm": 0.7976938898291167, - "learning_rate": 1.38685763185739e-06, - "loss": 0.6082, - "num_input_tokens_seen": 108158710, - "step": 5083 - }, - { - "epoch": 0.6113148560091385, - "grad_norm": 3.933877773617746, - "learning_rate": 1.386116215985248e-06, - "loss": 0.6704, - "num_input_tokens_seen": 108176565, - "step": 5084 - }, - { - "epoch": 0.6114350988997775, - "grad_norm": 1.9176158619545272, - "learning_rate": 1.3853748932387879e-06, - "loss": 0.7944, - "num_input_tokens_seen": 108196925, - "step": 5085 - }, - { - "epoch": 0.6115553417904166, - "grad_norm": 2.5881255362235573, - "learning_rate": 1.3846336637304671e-06, - "loss": 0.7507, - "num_input_tokens_seen": 108214915, - "step": 5086 - }, - { - "epoch": 0.6116755846810558, - "grad_norm": 2.074502626765727, - "learning_rate": 1.3838925275727312e-06, - "loss": 0.825, - "num_input_tokens_seen": 108235375, - "step": 5087 - }, - { - "epoch": 0.6117958275716948, - "grad_norm": 1.9143854331843104, - "learning_rate": 1.3831514848780089e-06, - "loss": 0.7878, - "num_input_tokens_seen": 108254670, - "step": 5088 - }, - { - "epoch": 0.6119160704623339, - "grad_norm": 2.5227524593987023, - "learning_rate": 1.3824105357587157e-06, - "loss": 0.9193, - "num_input_tokens_seen": 108271495, - "step": 5089 - }, - { - "epoch": 0.612036313352973, - "grad_norm": 1.4661906185120732, - "learning_rate": 1.381669680327253e-06, - "loss": 0.8132, - "num_input_tokens_seen": 108292895, - "step": 5090 - }, - { - "epoch": 0.6121565562436121, - "grad_norm": 2.329621967642062, - "learning_rate": 1.3809289186960085e-06, - "loss": 0.7058, - "num_input_tokens_seen": 108311385, - "step": 5091 - }, - { - "epoch": 0.6122767991342511, - "grad_norm": 2.37929946891987, - "learning_rate": 1.3801882509773548e-06, - "loss": 0.7034, - "num_input_tokens_seen": 108328965, - "step": 5092 - }, - { - "epoch": 0.6123970420248903, - "grad_norm": 2.1880234398938825, - "learning_rate": 1.3794476772836507e-06, - "loss": 0.8099, - "num_input_tokens_seen": 108349785, - "step": 5093 - }, - { - "epoch": 0.6125172849155294, - "grad_norm": 2.2261573745486998, - "learning_rate": 1.3787071977272398e-06, - "loss": 0.8322, - "num_input_tokens_seen": 108368765, - "step": 5094 - }, - { - "epoch": 0.6126375278061684, - "grad_norm": 4.73253345130025, - "learning_rate": 1.3779668124204535e-06, - "loss": 0.7178, - "num_input_tokens_seen": 108384900, - "step": 5095 - }, - { - "epoch": 0.6127577706968076, - "grad_norm": 1.7417625466686495, - "learning_rate": 1.3772265214756074e-06, - "loss": 0.8041, - "num_input_tokens_seen": 108404380, - "step": 5096 - }, - { - "epoch": 0.6128780135874466, - "grad_norm": 6.476177335190081, - "learning_rate": 1.376486325005003e-06, - "loss": 0.747, - "num_input_tokens_seen": 108422340, - "step": 5097 - }, - { - "epoch": 0.6129982564780857, - "grad_norm": 1.9089059341196482, - "learning_rate": 1.3757462231209267e-06, - "loss": 0.7997, - "num_input_tokens_seen": 108442365, - "step": 5098 - }, - { - "epoch": 0.6131184993687249, - "grad_norm": 1.9468412238329391, - "learning_rate": 1.3750062159356525e-06, - "loss": 0.8847, - "num_input_tokens_seen": 108461435, - "step": 5099 - }, - { - "epoch": 0.6132387422593639, - "grad_norm": 2.185121448268844, - "learning_rate": 1.3742663035614386e-06, - "loss": 0.8219, - "num_input_tokens_seen": 108478525, - "step": 5100 - }, - { - "epoch": 0.613358985150003, - "grad_norm": 1.954276696659958, - "learning_rate": 1.3735264861105287e-06, - "loss": 0.7963, - "num_input_tokens_seen": 108498885, - "step": 5101 - }, - { - "epoch": 0.6134792280406421, - "grad_norm": 2.4360455721530463, - "learning_rate": 1.372786763695152e-06, - "loss": 0.7772, - "num_input_tokens_seen": 108517365, - "step": 5102 - }, - { - "epoch": 0.6135994709312812, - "grad_norm": 2.890445004239762, - "learning_rate": 1.3720471364275253e-06, - "loss": 0.7696, - "num_input_tokens_seen": 108536730, - "step": 5103 - }, - { - "epoch": 0.6137197138219203, - "grad_norm": 4.70729623187718, - "learning_rate": 1.3713076044198486e-06, - "loss": 0.7614, - "num_input_tokens_seen": 108553260, - "step": 5104 - }, - { - "epoch": 0.6138399567125594, - "grad_norm": 4.302229399118108, - "learning_rate": 1.3705681677843086e-06, - "loss": 0.803, - "num_input_tokens_seen": 108571575, - "step": 5105 - }, - { - "epoch": 0.6139601996031985, - "grad_norm": 1.0486411974678187, - "learning_rate": 1.3698288266330768e-06, - "loss": 0.6341, - "num_input_tokens_seen": 108631920, - "step": 5106 - }, - { - "epoch": 0.6140804424938375, - "grad_norm": 2.3031076818195966, - "learning_rate": 1.3690895810783113e-06, - "loss": 0.7248, - "num_input_tokens_seen": 108650435, - "step": 5107 - }, - { - "epoch": 0.6142006853844767, - "grad_norm": 2.461920486158964, - "learning_rate": 1.3683504312321548e-06, - "loss": 0.7219, - "num_input_tokens_seen": 108670490, - "step": 5108 - }, - { - "epoch": 0.6143209282751158, - "grad_norm": 2.513553099661042, - "learning_rate": 1.3676113772067355e-06, - "loss": 0.7899, - "num_input_tokens_seen": 108687265, - "step": 5109 - }, - { - "epoch": 0.6144411711657548, - "grad_norm": 2.8415894165921163, - "learning_rate": 1.3668724191141667e-06, - "loss": 0.7134, - "num_input_tokens_seen": 108706255, - "step": 5110 - }, - { - "epoch": 0.6145614140563939, - "grad_norm": 2.398817076850106, - "learning_rate": 1.3661335570665493e-06, - "loss": 0.6609, - "num_input_tokens_seen": 108723885, - "step": 5111 - }, - { - "epoch": 0.614681656947033, - "grad_norm": 2.668097805964205, - "learning_rate": 1.3653947911759676e-06, - "loss": 0.7009, - "num_input_tokens_seen": 108741155, - "step": 5112 - }, - { - "epoch": 0.6148018998376721, - "grad_norm": 2.9196111607981154, - "learning_rate": 1.3646561215544909e-06, - "loss": 0.7357, - "num_input_tokens_seen": 108765515, - "step": 5113 - }, - { - "epoch": 0.6149221427283111, - "grad_norm": 2.4142253734809955, - "learning_rate": 1.3639175483141756e-06, - "loss": 0.7919, - "num_input_tokens_seen": 108784500, - "step": 5114 - }, - { - "epoch": 0.6150423856189503, - "grad_norm": 3.0514754111793874, - "learning_rate": 1.3631790715670626e-06, - "loss": 0.7289, - "num_input_tokens_seen": 108802625, - "step": 5115 - }, - { - "epoch": 0.6151626285095894, - "grad_norm": 3.470882610971371, - "learning_rate": 1.3624406914251783e-06, - "loss": 0.854, - "num_input_tokens_seen": 108819465, - "step": 5116 - }, - { - "epoch": 0.6152828714002284, - "grad_norm": 2.337631280806907, - "learning_rate": 1.361702408000534e-06, - "loss": 0.872, - "num_input_tokens_seen": 108836085, - "step": 5117 - }, - { - "epoch": 0.6154031142908676, - "grad_norm": 2.130656489460872, - "learning_rate": 1.3609642214051262e-06, - "loss": 0.7367, - "num_input_tokens_seen": 108860030, - "step": 5118 - }, - { - "epoch": 0.6155233571815066, - "grad_norm": 2.674008879175692, - "learning_rate": 1.3602261317509385e-06, - "loss": 0.6592, - "num_input_tokens_seen": 108876410, - "step": 5119 - }, - { - "epoch": 0.6156436000721457, - "grad_norm": 3.3278163004699244, - "learning_rate": 1.3594881391499383e-06, - "loss": 0.8071, - "num_input_tokens_seen": 108895045, - "step": 5120 - }, - { - "epoch": 0.6157638429627849, - "grad_norm": 2.2754206884747274, - "learning_rate": 1.3587502437140783e-06, - "loss": 0.7872, - "num_input_tokens_seen": 108912930, - "step": 5121 - }, - { - "epoch": 0.6158840858534239, - "grad_norm": 2.377261135494553, - "learning_rate": 1.3580124455552952e-06, - "loss": 0.8492, - "num_input_tokens_seen": 108932015, - "step": 5122 - }, - { - "epoch": 0.616004328744063, - "grad_norm": 1.8012426057885011, - "learning_rate": 1.3572747447855148e-06, - "loss": 0.8588, - "num_input_tokens_seen": 108952145, - "step": 5123 - }, - { - "epoch": 0.6161245716347021, - "grad_norm": 2.197149964904538, - "learning_rate": 1.3565371415166444e-06, - "loss": 0.6903, - "num_input_tokens_seen": 108969285, - "step": 5124 - }, - { - "epoch": 0.6162448145253412, - "grad_norm": 2.4192402461858205, - "learning_rate": 1.355799635860578e-06, - "loss": 0.6178, - "num_input_tokens_seen": 108988925, - "step": 5125 - }, - { - "epoch": 0.6163650574159802, - "grad_norm": 2.2475032023258694, - "learning_rate": 1.3550622279291941e-06, - "loss": 0.6939, - "num_input_tokens_seen": 109006790, - "step": 5126 - }, - { - "epoch": 0.6164853003066194, - "grad_norm": 1.477741756577462, - "learning_rate": 1.354324917834358e-06, - "loss": 0.8266, - "num_input_tokens_seen": 109027755, - "step": 5127 - }, - { - "epoch": 0.6166055431972585, - "grad_norm": 1.8326367285881393, - "learning_rate": 1.3535877056879183e-06, - "loss": 0.7611, - "num_input_tokens_seen": 109045650, - "step": 5128 - }, - { - "epoch": 0.6167257860878975, - "grad_norm": 3.0332064234729663, - "learning_rate": 1.3528505916017102e-06, - "loss": 0.7199, - "num_input_tokens_seen": 109063070, - "step": 5129 - }, - { - "epoch": 0.6168460289785367, - "grad_norm": 2.070687900871443, - "learning_rate": 1.3521135756875514e-06, - "loss": 0.8738, - "num_input_tokens_seen": 109079105, - "step": 5130 - }, - { - "epoch": 0.6169662718691757, - "grad_norm": 2.3074418713180274, - "learning_rate": 1.3513766580572492e-06, - "loss": 0.8549, - "num_input_tokens_seen": 109101645, - "step": 5131 - }, - { - "epoch": 0.6170865147598148, - "grad_norm": 2.228157943273862, - "learning_rate": 1.3506398388225924e-06, - "loss": 0.7655, - "num_input_tokens_seen": 109118685, - "step": 5132 - }, - { - "epoch": 0.617206757650454, - "grad_norm": 2.1776296404202062, - "learning_rate": 1.3499031180953554e-06, - "loss": 0.7051, - "num_input_tokens_seen": 109137540, - "step": 5133 - }, - { - "epoch": 0.617327000541093, - "grad_norm": 2.122194730063685, - "learning_rate": 1.349166495987298e-06, - "loss": 0.7313, - "num_input_tokens_seen": 109155825, - "step": 5134 - }, - { - "epoch": 0.6174472434317321, - "grad_norm": 0.9460531365172357, - "learning_rate": 1.3484299726101665e-06, - "loss": 0.6732, - "num_input_tokens_seen": 109219850, - "step": 5135 - }, - { - "epoch": 0.6175674863223712, - "grad_norm": 0.9078130896997626, - "learning_rate": 1.3476935480756901e-06, - "loss": 0.6099, - "num_input_tokens_seen": 109276320, - "step": 5136 - }, - { - "epoch": 0.6176877292130103, - "grad_norm": 3.958298093492208, - "learning_rate": 1.3469572224955833e-06, - "loss": 0.7471, - "num_input_tokens_seen": 109293835, - "step": 5137 - }, - { - "epoch": 0.6178079721036493, - "grad_norm": 3.0116191923119016, - "learning_rate": 1.3462209959815462e-06, - "loss": 0.7117, - "num_input_tokens_seen": 109308295, - "step": 5138 - }, - { - "epoch": 0.6179282149942885, - "grad_norm": 2.5285091245357907, - "learning_rate": 1.345484868645265e-06, - "loss": 0.7324, - "num_input_tokens_seen": 109326825, - "step": 5139 - }, - { - "epoch": 0.6180484578849276, - "grad_norm": 2.4294778288751253, - "learning_rate": 1.3447488405984088e-06, - "loss": 0.7742, - "num_input_tokens_seen": 109344805, - "step": 5140 - }, - { - "epoch": 0.6181687007755666, - "grad_norm": 2.8709401510474573, - "learning_rate": 1.3440129119526327e-06, - "loss": 0.6862, - "num_input_tokens_seen": 109366950, - "step": 5141 - }, - { - "epoch": 0.6182889436662057, - "grad_norm": 0.9983529920984433, - "learning_rate": 1.3432770828195757e-06, - "loss": 0.5663, - "num_input_tokens_seen": 109427655, - "step": 5142 - }, - { - "epoch": 0.6184091865568448, - "grad_norm": 3.9876579218549963, - "learning_rate": 1.3425413533108635e-06, - "loss": 0.7132, - "num_input_tokens_seen": 109445975, - "step": 5143 - }, - { - "epoch": 0.6185294294474839, - "grad_norm": 3.3593234662234357, - "learning_rate": 1.341805723538105e-06, - "loss": 0.7067, - "num_input_tokens_seen": 109465800, - "step": 5144 - }, - { - "epoch": 0.618649672338123, - "grad_norm": 1.7613245857542201, - "learning_rate": 1.3410701936128952e-06, - "loss": 0.768, - "num_input_tokens_seen": 109488300, - "step": 5145 - }, - { - "epoch": 0.6187699152287621, - "grad_norm": 4.476894421143781, - "learning_rate": 1.340334763646812e-06, - "loss": 0.8495, - "num_input_tokens_seen": 109502155, - "step": 5146 - }, - { - "epoch": 0.6188901581194012, - "grad_norm": 2.198068838673797, - "learning_rate": 1.3395994337514218e-06, - "loss": 0.743, - "num_input_tokens_seen": 109522045, - "step": 5147 - }, - { - "epoch": 0.6190104010100402, - "grad_norm": 1.7720964647502353, - "learning_rate": 1.3388642040382725e-06, - "loss": 0.7765, - "num_input_tokens_seen": 109542190, - "step": 5148 - }, - { - "epoch": 0.6191306439006794, - "grad_norm": 1.907353934457241, - "learning_rate": 1.3381290746188975e-06, - "loss": 0.8361, - "num_input_tokens_seen": 109561280, - "step": 5149 - }, - { - "epoch": 0.6192508867913185, - "grad_norm": 7.693214080085688, - "learning_rate": 1.3373940456048152e-06, - "loss": 0.6719, - "num_input_tokens_seen": 109581025, - "step": 5150 - }, - { - "epoch": 0.6193711296819575, - "grad_norm": 1.8089479306400549, - "learning_rate": 1.3366591171075299e-06, - "loss": 0.587, - "num_input_tokens_seen": 109604250, - "step": 5151 - }, - { - "epoch": 0.6194913725725967, - "grad_norm": 1.9219118460603057, - "learning_rate": 1.3359242892385293e-06, - "loss": 0.8976, - "num_input_tokens_seen": 109623180, - "step": 5152 - }, - { - "epoch": 0.6196116154632357, - "grad_norm": 2.040736516095926, - "learning_rate": 1.3351895621092859e-06, - "loss": 0.7609, - "num_input_tokens_seen": 109643245, - "step": 5153 - }, - { - "epoch": 0.6197318583538748, - "grad_norm": 2.455665928742031, - "learning_rate": 1.3344549358312565e-06, - "loss": 0.7694, - "num_input_tokens_seen": 109661365, - "step": 5154 - }, - { - "epoch": 0.619852101244514, - "grad_norm": 2.789083886177181, - "learning_rate": 1.3337204105158847e-06, - "loss": 0.7794, - "num_input_tokens_seen": 109679955, - "step": 5155 - }, - { - "epoch": 0.619972344135153, - "grad_norm": 2.61961396288061, - "learning_rate": 1.332985986274597e-06, - "loss": 0.7213, - "num_input_tokens_seen": 109697305, - "step": 5156 - }, - { - "epoch": 0.6200925870257921, - "grad_norm": 2.2157051448615426, - "learning_rate": 1.332251663218805e-06, - "loss": 0.7502, - "num_input_tokens_seen": 109713920, - "step": 5157 - }, - { - "epoch": 0.6202128299164312, - "grad_norm": 1.8940261801803904, - "learning_rate": 1.3315174414599045e-06, - "loss": 0.6736, - "num_input_tokens_seen": 109734960, - "step": 5158 - }, - { - "epoch": 0.6203330728070703, - "grad_norm": 2.0284374734956896, - "learning_rate": 1.3307833211092768e-06, - "loss": 0.7456, - "num_input_tokens_seen": 109753345, - "step": 5159 - }, - { - "epoch": 0.6204533156977093, - "grad_norm": 2.283071985077825, - "learning_rate": 1.3300493022782873e-06, - "loss": 0.7404, - "num_input_tokens_seen": 109773635, - "step": 5160 - }, - { - "epoch": 0.6205735585883485, - "grad_norm": 2.3228423976327925, - "learning_rate": 1.3293153850782859e-06, - "loss": 0.7262, - "num_input_tokens_seen": 109791675, - "step": 5161 - }, - { - "epoch": 0.6206938014789876, - "grad_norm": 2.396554790449731, - "learning_rate": 1.3285815696206065e-06, - "loss": 0.7034, - "num_input_tokens_seen": 109812940, - "step": 5162 - }, - { - "epoch": 0.6208140443696266, - "grad_norm": 2.5119891701219306, - "learning_rate": 1.32784785601657e-06, - "loss": 0.7633, - "num_input_tokens_seen": 109832070, - "step": 5163 - }, - { - "epoch": 0.6209342872602658, - "grad_norm": 2.087007035465976, - "learning_rate": 1.3271142443774794e-06, - "loss": 0.7345, - "num_input_tokens_seen": 109854025, - "step": 5164 - }, - { - "epoch": 0.6210545301509048, - "grad_norm": 4.797031861823209, - "learning_rate": 1.3263807348146233e-06, - "loss": 0.815, - "num_input_tokens_seen": 109873600, - "step": 5165 - }, - { - "epoch": 0.6211747730415439, - "grad_norm": 1.9231491035555146, - "learning_rate": 1.3256473274392727e-06, - "loss": 0.7352, - "num_input_tokens_seen": 109894665, - "step": 5166 - }, - { - "epoch": 0.6212950159321831, - "grad_norm": 1.9195123469973472, - "learning_rate": 1.3249140223626873e-06, - "loss": 0.6984, - "num_input_tokens_seen": 109916005, - "step": 5167 - }, - { - "epoch": 0.6214152588228221, - "grad_norm": 1.9849459265124092, - "learning_rate": 1.3241808196961081e-06, - "loss": 0.7588, - "num_input_tokens_seen": 109936850, - "step": 5168 - }, - { - "epoch": 0.6215355017134612, - "grad_norm": 2.2043648781899225, - "learning_rate": 1.3234477195507613e-06, - "loss": 0.7142, - "num_input_tokens_seen": 109955400, - "step": 5169 - }, - { - "epoch": 0.6216557446041003, - "grad_norm": 2.2469087902340252, - "learning_rate": 1.322714722037857e-06, - "loss": 0.6291, - "num_input_tokens_seen": 109976565, - "step": 5170 - }, - { - "epoch": 0.6217759874947394, - "grad_norm": 2.264959369627215, - "learning_rate": 1.321981827268591e-06, - "loss": 0.7678, - "num_input_tokens_seen": 109996940, - "step": 5171 - }, - { - "epoch": 0.6218962303853784, - "grad_norm": 1.7791049045340406, - "learning_rate": 1.321249035354143e-06, - "loss": 0.8104, - "num_input_tokens_seen": 110018920, - "step": 5172 - }, - { - "epoch": 0.6220164732760175, - "grad_norm": 1.987540557729061, - "learning_rate": 1.3205163464056766e-06, - "loss": 0.7973, - "num_input_tokens_seen": 110035245, - "step": 5173 - }, - { - "epoch": 0.6221367161666567, - "grad_norm": 2.0960684848173954, - "learning_rate": 1.319783760534339e-06, - "loss": 0.7203, - "num_input_tokens_seen": 110054210, - "step": 5174 - }, - { - "epoch": 0.6222569590572957, - "grad_norm": 2.2064178653816136, - "learning_rate": 1.3190512778512655e-06, - "loss": 0.7486, - "num_input_tokens_seen": 110070215, - "step": 5175 - }, - { - "epoch": 0.6223772019479348, - "grad_norm": 2.107536350264193, - "learning_rate": 1.3183188984675716e-06, - "loss": 0.8396, - "num_input_tokens_seen": 110088300, - "step": 5176 - }, - { - "epoch": 0.6224974448385739, - "grad_norm": 3.023436930587385, - "learning_rate": 1.3175866224943586e-06, - "loss": 0.7115, - "num_input_tokens_seen": 110106740, - "step": 5177 - }, - { - "epoch": 0.622617687729213, - "grad_norm": 2.431301220875932, - "learning_rate": 1.316854450042712e-06, - "loss": 0.7282, - "num_input_tokens_seen": 110124400, - "step": 5178 - }, - { - "epoch": 0.622737930619852, - "grad_norm": 2.310650671842693, - "learning_rate": 1.3161223812237028e-06, - "loss": 0.7358, - "num_input_tokens_seen": 110143475, - "step": 5179 - }, - { - "epoch": 0.6228581735104912, - "grad_norm": 2.6336933873647035, - "learning_rate": 1.3153904161483846e-06, - "loss": 0.8473, - "num_input_tokens_seen": 110158495, - "step": 5180 - }, - { - "epoch": 0.6229784164011303, - "grad_norm": 3.1637773947991827, - "learning_rate": 1.3146585549277957e-06, - "loss": 0.8532, - "num_input_tokens_seen": 110176855, - "step": 5181 - }, - { - "epoch": 0.6230986592917693, - "grad_norm": 2.575921187182607, - "learning_rate": 1.3139267976729587e-06, - "loss": 0.7791, - "num_input_tokens_seen": 110196765, - "step": 5182 - }, - { - "epoch": 0.6232189021824085, - "grad_norm": 3.3904206149608704, - "learning_rate": 1.3131951444948815e-06, - "loss": 0.7052, - "num_input_tokens_seen": 110215885, - "step": 5183 - }, - { - "epoch": 0.6233391450730476, - "grad_norm": 2.3995504764809126, - "learning_rate": 1.3124635955045546e-06, - "loss": 0.7474, - "num_input_tokens_seen": 110235420, - "step": 5184 - }, - { - "epoch": 0.6234593879636866, - "grad_norm": 3.2262578827880524, - "learning_rate": 1.311732150812954e-06, - "loss": 0.8349, - "num_input_tokens_seen": 110253220, - "step": 5185 - }, - { - "epoch": 0.6235796308543258, - "grad_norm": 3.606543327874877, - "learning_rate": 1.3110008105310384e-06, - "loss": 0.7565, - "num_input_tokens_seen": 110272760, - "step": 5186 - }, - { - "epoch": 0.6236998737449648, - "grad_norm": 2.0556612667600356, - "learning_rate": 1.3102695747697526e-06, - "loss": 0.7731, - "num_input_tokens_seen": 110295350, - "step": 5187 - }, - { - "epoch": 0.6238201166356039, - "grad_norm": 5.111913267016819, - "learning_rate": 1.3095384436400237e-06, - "loss": 0.8986, - "num_input_tokens_seen": 110306600, - "step": 5188 - }, - { - "epoch": 0.623940359526243, - "grad_norm": 45.930871334175414, - "learning_rate": 1.3088074172527637e-06, - "loss": 0.8258, - "num_input_tokens_seen": 110323450, - "step": 5189 - }, - { - "epoch": 0.6240606024168821, - "grad_norm": 2.030760971136488, - "learning_rate": 1.3080764957188684e-06, - "loss": 0.7158, - "num_input_tokens_seen": 110343415, - "step": 5190 - }, - { - "epoch": 0.6241808453075212, - "grad_norm": 2.193745687130762, - "learning_rate": 1.3073456791492192e-06, - "loss": 0.7002, - "num_input_tokens_seen": 110362845, - "step": 5191 - }, - { - "epoch": 0.6243010881981603, - "grad_norm": 1.8849290094762998, - "learning_rate": 1.3066149676546801e-06, - "loss": 0.7838, - "num_input_tokens_seen": 110380745, - "step": 5192 - }, - { - "epoch": 0.6244213310887994, - "grad_norm": 1.748856858841141, - "learning_rate": 1.3058843613460985e-06, - "loss": 0.6469, - "num_input_tokens_seen": 110398405, - "step": 5193 - }, - { - "epoch": 0.6245415739794384, - "grad_norm": 2.2374547719770335, - "learning_rate": 1.3051538603343075e-06, - "loss": 0.7421, - "num_input_tokens_seen": 110416055, - "step": 5194 - }, - { - "epoch": 0.6246618168700776, - "grad_norm": 1.8904234910984918, - "learning_rate": 1.3044234647301235e-06, - "loss": 0.6724, - "num_input_tokens_seen": 110433800, - "step": 5195 - }, - { - "epoch": 0.6247820597607167, - "grad_norm": 2.0445874268778077, - "learning_rate": 1.3036931746443474e-06, - "loss": 0.7158, - "num_input_tokens_seen": 110450995, - "step": 5196 - }, - { - "epoch": 0.6249023026513557, - "grad_norm": 4.7571959269373005, - "learning_rate": 1.3029629901877625e-06, - "loss": 0.8008, - "num_input_tokens_seen": 110470090, - "step": 5197 - }, - { - "epoch": 0.6250225455419949, - "grad_norm": 2.9820216325560542, - "learning_rate": 1.3022329114711371e-06, - "loss": 0.7758, - "num_input_tokens_seen": 110488520, - "step": 5198 - }, - { - "epoch": 0.6251427884326339, - "grad_norm": 2.59725265414932, - "learning_rate": 1.3015029386052252e-06, - "loss": 0.6942, - "num_input_tokens_seen": 110508410, - "step": 5199 - }, - { - "epoch": 0.625263031323273, - "grad_norm": 2.011996771573217, - "learning_rate": 1.3007730717007622e-06, - "loss": 0.7213, - "num_input_tokens_seen": 110528945, - "step": 5200 - }, - { - "epoch": 0.6253832742139122, - "grad_norm": 2.9973102063433426, - "learning_rate": 1.300043310868468e-06, - "loss": 0.7536, - "num_input_tokens_seen": 110549165, - "step": 5201 - }, - { - "epoch": 0.6255035171045512, - "grad_norm": 2.473410629263274, - "learning_rate": 1.2993136562190467e-06, - "loss": 0.7952, - "num_input_tokens_seen": 110568005, - "step": 5202 - }, - { - "epoch": 0.6256237599951903, - "grad_norm": 1.564251548673153, - "learning_rate": 1.2985841078631871e-06, - "loss": 0.7007, - "num_input_tokens_seen": 110587045, - "step": 5203 - }, - { - "epoch": 0.6257440028858293, - "grad_norm": 2.0462399026820357, - "learning_rate": 1.2978546659115608e-06, - "loss": 0.7742, - "num_input_tokens_seen": 110604845, - "step": 5204 - }, - { - "epoch": 0.6258642457764685, - "grad_norm": 2.1250060517098364, - "learning_rate": 1.2971253304748234e-06, - "loss": 0.8496, - "num_input_tokens_seen": 110622280, - "step": 5205 - }, - { - "epoch": 0.6259844886671075, - "grad_norm": 1.7850933096364225, - "learning_rate": 1.2963961016636136e-06, - "loss": 0.7483, - "num_input_tokens_seen": 110638560, - "step": 5206 - }, - { - "epoch": 0.6261047315577466, - "grad_norm": 2.318819752791727, - "learning_rate": 1.2956669795885565e-06, - "loss": 0.8369, - "num_input_tokens_seen": 110654910, - "step": 5207 - }, - { - "epoch": 0.6262249744483858, - "grad_norm": 2.3073701442344055, - "learning_rate": 1.294937964360259e-06, - "loss": 0.6897, - "num_input_tokens_seen": 110674900, - "step": 5208 - }, - { - "epoch": 0.6263452173390248, - "grad_norm": 2.4655920038086943, - "learning_rate": 1.2942090560893112e-06, - "loss": 0.6928, - "num_input_tokens_seen": 110694025, - "step": 5209 - }, - { - "epoch": 0.6264654602296639, - "grad_norm": 2.2813960820589454, - "learning_rate": 1.2934802548862878e-06, - "loss": 0.5986, - "num_input_tokens_seen": 110716530, - "step": 5210 - }, - { - "epoch": 0.626585703120303, - "grad_norm": 3.2720546801358266, - "learning_rate": 1.292751560861749e-06, - "loss": 0.8187, - "num_input_tokens_seen": 110731155, - "step": 5211 - }, - { - "epoch": 0.6267059460109421, - "grad_norm": 1.9641602027685314, - "learning_rate": 1.2920229741262356e-06, - "loss": 0.7927, - "num_input_tokens_seen": 110748880, - "step": 5212 - }, - { - "epoch": 0.6268261889015811, - "grad_norm": 2.48002937166285, - "learning_rate": 1.2912944947902745e-06, - "loss": 0.74, - "num_input_tokens_seen": 110765085, - "step": 5213 - }, - { - "epoch": 0.6269464317922203, - "grad_norm": 3.052319556127872, - "learning_rate": 1.2905661229643742e-06, - "loss": 0.7162, - "num_input_tokens_seen": 110784565, - "step": 5214 - }, - { - "epoch": 0.6270666746828594, - "grad_norm": 2.395072844226662, - "learning_rate": 1.2898378587590299e-06, - "loss": 0.8397, - "num_input_tokens_seen": 110800885, - "step": 5215 - }, - { - "epoch": 0.6271869175734984, - "grad_norm": 2.356659738438972, - "learning_rate": 1.2891097022847177e-06, - "loss": 0.8693, - "num_input_tokens_seen": 110817950, - "step": 5216 - }, - { - "epoch": 0.6273071604641376, - "grad_norm": 2.3337986702990676, - "learning_rate": 1.288381653651898e-06, - "loss": 0.6687, - "num_input_tokens_seen": 110838810, - "step": 5217 - }, - { - "epoch": 0.6274274033547766, - "grad_norm": 2.413457694513621, - "learning_rate": 1.2876537129710155e-06, - "loss": 0.8184, - "num_input_tokens_seen": 110856260, - "step": 5218 - }, - { - "epoch": 0.6275476462454157, - "grad_norm": 2.358554182318023, - "learning_rate": 1.2869258803524987e-06, - "loss": 0.7451, - "num_input_tokens_seen": 110874840, - "step": 5219 - }, - { - "epoch": 0.6276678891360549, - "grad_norm": 1.7908847530506782, - "learning_rate": 1.2861981559067592e-06, - "loss": 0.6999, - "num_input_tokens_seen": 110895165, - "step": 5220 - }, - { - "epoch": 0.6277881320266939, - "grad_norm": 2.031556287969863, - "learning_rate": 1.2854705397441917e-06, - "loss": 0.7995, - "num_input_tokens_seen": 110910425, - "step": 5221 - }, - { - "epoch": 0.627908374917333, - "grad_norm": 3.001299653552301, - "learning_rate": 1.2847430319751747e-06, - "loss": 0.7721, - "num_input_tokens_seen": 110928240, - "step": 5222 - }, - { - "epoch": 0.6280286178079721, - "grad_norm": 2.7303845128361237, - "learning_rate": 1.2840156327100716e-06, - "loss": 0.6697, - "num_input_tokens_seen": 110945085, - "step": 5223 - }, - { - "epoch": 0.6281488606986112, - "grad_norm": 4.18336343067612, - "learning_rate": 1.2832883420592278e-06, - "loss": 0.7175, - "num_input_tokens_seen": 110963700, - "step": 5224 - }, - { - "epoch": 0.6282691035892503, - "grad_norm": 2.282553383531497, - "learning_rate": 1.2825611601329725e-06, - "loss": 0.638, - "num_input_tokens_seen": 110983940, - "step": 5225 - }, - { - "epoch": 0.6283893464798894, - "grad_norm": 2.082559058799252, - "learning_rate": 1.2818340870416182e-06, - "loss": 0.805, - "num_input_tokens_seen": 111004795, - "step": 5226 - }, - { - "epoch": 0.6285095893705285, - "grad_norm": 4.089667592479948, - "learning_rate": 1.2811071228954626e-06, - "loss": 0.7512, - "num_input_tokens_seen": 111023150, - "step": 5227 - }, - { - "epoch": 0.6286298322611675, - "grad_norm": 2.106316160231089, - "learning_rate": 1.2803802678047846e-06, - "loss": 0.8082, - "num_input_tokens_seen": 111043020, - "step": 5228 - }, - { - "epoch": 0.6287500751518067, - "grad_norm": 2.1346415468226847, - "learning_rate": 1.2796535218798483e-06, - "loss": 0.73, - "num_input_tokens_seen": 111062805, - "step": 5229 - }, - { - "epoch": 0.6288703180424458, - "grad_norm": 2.250278938404751, - "learning_rate": 1.2789268852308992e-06, - "loss": 0.8384, - "num_input_tokens_seen": 111077735, - "step": 5230 - }, - { - "epoch": 0.6289905609330848, - "grad_norm": 2.4570592454405453, - "learning_rate": 1.2782003579681688e-06, - "loss": 0.6954, - "num_input_tokens_seen": 111096985, - "step": 5231 - }, - { - "epoch": 0.629110803823724, - "grad_norm": 2.5659629633958296, - "learning_rate": 1.2774739402018701e-06, - "loss": 0.7351, - "num_input_tokens_seen": 111117540, - "step": 5232 - }, - { - "epoch": 0.629231046714363, - "grad_norm": 1.8538971436082687, - "learning_rate": 1.2767476320422006e-06, - "loss": 0.7267, - "num_input_tokens_seen": 111137185, - "step": 5233 - }, - { - "epoch": 0.6293512896050021, - "grad_norm": 0.7297993800686974, - "learning_rate": 1.2760214335993392e-06, - "loss": 0.5962, - "num_input_tokens_seen": 111203550, - "step": 5234 - }, - { - "epoch": 0.6294715324956413, - "grad_norm": 2.06204278575427, - "learning_rate": 1.2752953449834514e-06, - "loss": 0.586, - "num_input_tokens_seen": 111225720, - "step": 5235 - }, - { - "epoch": 0.6295917753862803, - "grad_norm": 1.9061635460802566, - "learning_rate": 1.2745693663046836e-06, - "loss": 0.7962, - "num_input_tokens_seen": 111244510, - "step": 5236 - }, - { - "epoch": 0.6297120182769194, - "grad_norm": 1.9899008352941812, - "learning_rate": 1.2738434976731662e-06, - "loss": 0.7991, - "num_input_tokens_seen": 111262415, - "step": 5237 - }, - { - "epoch": 0.6298322611675584, - "grad_norm": 1.738945131494004, - "learning_rate": 1.2731177391990125e-06, - "loss": 0.7481, - "num_input_tokens_seen": 111282060, - "step": 5238 - }, - { - "epoch": 0.6299525040581976, - "grad_norm": 16.045329240179676, - "learning_rate": 1.2723920909923203e-06, - "loss": 0.8142, - "num_input_tokens_seen": 111297525, - "step": 5239 - }, - { - "epoch": 0.6300727469488366, - "grad_norm": 0.9340783517036609, - "learning_rate": 1.2716665531631692e-06, - "loss": 0.6518, - "num_input_tokens_seen": 111351530, - "step": 5240 - }, - { - "epoch": 0.6301929898394757, - "grad_norm": 2.1276929111388667, - "learning_rate": 1.270941125821623e-06, - "loss": 0.7663, - "num_input_tokens_seen": 111371675, - "step": 5241 - }, - { - "epoch": 0.6303132327301149, - "grad_norm": 1.7322007291532513, - "learning_rate": 1.2702158090777273e-06, - "loss": 0.7538, - "num_input_tokens_seen": 111392485, - "step": 5242 - }, - { - "epoch": 0.6304334756207539, - "grad_norm": 2.2456092856542367, - "learning_rate": 1.2694906030415141e-06, - "loss": 0.744, - "num_input_tokens_seen": 111409950, - "step": 5243 - }, - { - "epoch": 0.630553718511393, - "grad_norm": 2.712737074675974, - "learning_rate": 1.2687655078229958e-06, - "loss": 0.8081, - "num_input_tokens_seen": 111424000, - "step": 5244 - }, - { - "epoch": 0.6306739614020321, - "grad_norm": 2.1064104580625087, - "learning_rate": 1.2680405235321683e-06, - "loss": 0.6929, - "num_input_tokens_seen": 111445055, - "step": 5245 - }, - { - "epoch": 0.6307942042926712, - "grad_norm": 10.412549822211044, - "learning_rate": 1.267315650279011e-06, - "loss": 0.7828, - "num_input_tokens_seen": 111463245, - "step": 5246 - }, - { - "epoch": 0.6309144471833102, - "grad_norm": 2.3720885630859287, - "learning_rate": 1.2665908881734874e-06, - "loss": 0.74, - "num_input_tokens_seen": 111481800, - "step": 5247 - }, - { - "epoch": 0.6310346900739494, - "grad_norm": 2.6979891231226505, - "learning_rate": 1.2658662373255432e-06, - "loss": 0.846, - "num_input_tokens_seen": 111499910, - "step": 5248 - }, - { - "epoch": 0.6311549329645885, - "grad_norm": 0.8806303462501649, - "learning_rate": 1.265141697845107e-06, - "loss": 0.5723, - "num_input_tokens_seen": 111565015, - "step": 5249 - }, - { - "epoch": 0.6312751758552275, - "grad_norm": 2.2954593267314936, - "learning_rate": 1.2644172698420899e-06, - "loss": 0.6465, - "num_input_tokens_seen": 111586840, - "step": 5250 - }, - { - "epoch": 0.6313954187458667, - "grad_norm": 1.8471206967788458, - "learning_rate": 1.2636929534263894e-06, - "loss": 0.8412, - "num_input_tokens_seen": 111605545, - "step": 5251 - }, - { - "epoch": 0.6315156616365057, - "grad_norm": 2.3994975481123513, - "learning_rate": 1.2629687487078821e-06, - "loss": 0.7681, - "num_input_tokens_seen": 111624075, - "step": 5252 - }, - { - "epoch": 0.6316359045271448, - "grad_norm": 4.065423323777952, - "learning_rate": 1.2622446557964298e-06, - "loss": 0.7555, - "num_input_tokens_seen": 111641800, - "step": 5253 - }, - { - "epoch": 0.631756147417784, - "grad_norm": 1.6867561317674031, - "learning_rate": 1.2615206748018757e-06, - "loss": 0.7012, - "num_input_tokens_seen": 111662115, - "step": 5254 - }, - { - "epoch": 0.631876390308423, - "grad_norm": 2.980653683989346, - "learning_rate": 1.2607968058340488e-06, - "loss": 0.7213, - "num_input_tokens_seen": 111681530, - "step": 5255 - }, - { - "epoch": 0.6319966331990621, - "grad_norm": 2.410032744526805, - "learning_rate": 1.2600730490027586e-06, - "loss": 0.7286, - "num_input_tokens_seen": 111701490, - "step": 5256 - }, - { - "epoch": 0.6321168760897012, - "grad_norm": 1.8658546078770732, - "learning_rate": 1.2593494044177986e-06, - "loss": 0.799, - "num_input_tokens_seen": 111719515, - "step": 5257 - }, - { - "epoch": 0.6322371189803403, - "grad_norm": 2.3220802559303566, - "learning_rate": 1.2586258721889448e-06, - "loss": 0.7953, - "num_input_tokens_seen": 111736585, - "step": 5258 - }, - { - "epoch": 0.6323573618709794, - "grad_norm": 2.2001562839333415, - "learning_rate": 1.2579024524259573e-06, - "loss": 0.8064, - "num_input_tokens_seen": 111752565, - "step": 5259 - }, - { - "epoch": 0.6324776047616185, - "grad_norm": 1.8832792685980189, - "learning_rate": 1.2571791452385775e-06, - "loss": 0.9057, - "num_input_tokens_seen": 111769550, - "step": 5260 - }, - { - "epoch": 0.6325978476522576, - "grad_norm": 1.6084239787433037, - "learning_rate": 1.2564559507365306e-06, - "loss": 0.7635, - "num_input_tokens_seen": 111791675, - "step": 5261 - }, - { - "epoch": 0.6327180905428966, - "grad_norm": 2.06494265719428, - "learning_rate": 1.2557328690295244e-06, - "loss": 0.7877, - "num_input_tokens_seen": 111809585, - "step": 5262 - }, - { - "epoch": 0.6328383334335358, - "grad_norm": 6.204705968815963, - "learning_rate": 1.2550099002272506e-06, - "loss": 0.7563, - "num_input_tokens_seen": 111828330, - "step": 5263 - }, - { - "epoch": 0.6329585763241748, - "grad_norm": 2.4256622602510536, - "learning_rate": 1.254287044439383e-06, - "loss": 0.7926, - "num_input_tokens_seen": 111847655, - "step": 5264 - }, - { - "epoch": 0.6330788192148139, - "grad_norm": 0.8313187081462069, - "learning_rate": 1.2535643017755776e-06, - "loss": 0.5632, - "num_input_tokens_seen": 111909565, - "step": 5265 - }, - { - "epoch": 0.6331990621054531, - "grad_norm": 2.443150304967501, - "learning_rate": 1.2528416723454737e-06, - "loss": 0.7132, - "num_input_tokens_seen": 111925955, - "step": 5266 - }, - { - "epoch": 0.6333193049960921, - "grad_norm": 7.38401961084352, - "learning_rate": 1.2521191562586945e-06, - "loss": 0.7059, - "num_input_tokens_seen": 111949325, - "step": 5267 - }, - { - "epoch": 0.6334395478867312, - "grad_norm": 2.217107078064154, - "learning_rate": 1.251396753624845e-06, - "loss": 0.7686, - "num_input_tokens_seen": 111965365, - "step": 5268 - }, - { - "epoch": 0.6335597907773702, - "grad_norm": 2.739152607110941, - "learning_rate": 1.2506744645535122e-06, - "loss": 0.8047, - "num_input_tokens_seen": 111985515, - "step": 5269 - }, - { - "epoch": 0.6336800336680094, - "grad_norm": 4.005794505068641, - "learning_rate": 1.2499522891542667e-06, - "loss": 0.5896, - "num_input_tokens_seen": 112005275, - "step": 5270 - }, - { - "epoch": 0.6338002765586485, - "grad_norm": 1.8565679058295572, - "learning_rate": 1.2492302275366635e-06, - "loss": 0.7554, - "num_input_tokens_seen": 112024670, - "step": 5271 - }, - { - "epoch": 0.6339205194492875, - "grad_norm": 3.329445595786218, - "learning_rate": 1.2485082798102377e-06, - "loss": 0.6509, - "num_input_tokens_seen": 112044805, - "step": 5272 - }, - { - "epoch": 0.6340407623399267, - "grad_norm": 5.293613099723363, - "learning_rate": 1.2477864460845086e-06, - "loss": 0.687, - "num_input_tokens_seen": 112060925, - "step": 5273 - }, - { - "epoch": 0.6341610052305657, - "grad_norm": 5.0474857942866045, - "learning_rate": 1.247064726468977e-06, - "loss": 0.7385, - "num_input_tokens_seen": 112079125, - "step": 5274 - }, - { - "epoch": 0.6342812481212048, - "grad_norm": 3.908715415333567, - "learning_rate": 1.2463431210731282e-06, - "loss": 0.7127, - "num_input_tokens_seen": 112098430, - "step": 5275 - }, - { - "epoch": 0.634401491011844, - "grad_norm": 2.3496489092615525, - "learning_rate": 1.2456216300064289e-06, - "loss": 0.7557, - "num_input_tokens_seen": 112115700, - "step": 5276 - }, - { - "epoch": 0.634521733902483, - "grad_norm": 2.366313895944972, - "learning_rate": 1.2449002533783284e-06, - "loss": 0.775, - "num_input_tokens_seen": 112135475, - "step": 5277 - }, - { - "epoch": 0.6346419767931221, - "grad_norm": 2.324993340810823, - "learning_rate": 1.2441789912982579e-06, - "loss": 0.6925, - "num_input_tokens_seen": 112152280, - "step": 5278 - }, - { - "epoch": 0.6347622196837612, - "grad_norm": 2.3095453146778793, - "learning_rate": 1.2434578438756346e-06, - "loss": 0.6462, - "num_input_tokens_seen": 112172430, - "step": 5279 - }, - { - "epoch": 0.6348824625744003, - "grad_norm": 2.5763976446510513, - "learning_rate": 1.242736811219855e-06, - "loss": 0.7824, - "num_input_tokens_seen": 112198110, - "step": 5280 - }, - { - "epoch": 0.6350027054650393, - "grad_norm": 1.8010468456990276, - "learning_rate": 1.2420158934402988e-06, - "loss": 0.8148, - "num_input_tokens_seen": 112218445, - "step": 5281 - }, - { - "epoch": 0.6351229483556785, - "grad_norm": 1.9541332765371024, - "learning_rate": 1.2412950906463286e-06, - "loss": 0.8303, - "num_input_tokens_seen": 112235470, - "step": 5282 - }, - { - "epoch": 0.6352431912463176, - "grad_norm": 1.932823796223964, - "learning_rate": 1.2405744029472902e-06, - "loss": 0.8892, - "num_input_tokens_seen": 112254675, - "step": 5283 - }, - { - "epoch": 0.6353634341369566, - "grad_norm": 2.3826998521820535, - "learning_rate": 1.2398538304525113e-06, - "loss": 0.7591, - "num_input_tokens_seen": 112273020, - "step": 5284 - }, - { - "epoch": 0.6354836770275958, - "grad_norm": 2.3542005417520295, - "learning_rate": 1.2391333732713016e-06, - "loss": 0.7527, - "num_input_tokens_seen": 112290545, - "step": 5285 - }, - { - "epoch": 0.6356039199182348, - "grad_norm": 3.218954584587067, - "learning_rate": 1.2384130315129536e-06, - "loss": 0.7778, - "num_input_tokens_seen": 112308590, - "step": 5286 - }, - { - "epoch": 0.6357241628088739, - "grad_norm": 2.6630769999364166, - "learning_rate": 1.2376928052867442e-06, - "loss": 0.7259, - "num_input_tokens_seen": 112327430, - "step": 5287 - }, - { - "epoch": 0.6358444056995131, - "grad_norm": 2.591191587501824, - "learning_rate": 1.2369726947019299e-06, - "loss": 0.7827, - "num_input_tokens_seen": 112347625, - "step": 5288 - }, - { - "epoch": 0.6359646485901521, - "grad_norm": 2.454139769225393, - "learning_rate": 1.2362526998677516e-06, - "loss": 0.6645, - "num_input_tokens_seen": 112363710, - "step": 5289 - }, - { - "epoch": 0.6360848914807912, - "grad_norm": 1.9795957021037067, - "learning_rate": 1.2355328208934301e-06, - "loss": 0.8417, - "num_input_tokens_seen": 112382305, - "step": 5290 - }, - { - "epoch": 0.6362051343714303, - "grad_norm": 1.849343974248388, - "learning_rate": 1.2348130578881728e-06, - "loss": 0.7272, - "num_input_tokens_seen": 112400245, - "step": 5291 - }, - { - "epoch": 0.6363253772620694, - "grad_norm": 4.805070229333753, - "learning_rate": 1.2340934109611664e-06, - "loss": 0.7536, - "num_input_tokens_seen": 112420725, - "step": 5292 - }, - { - "epoch": 0.6364456201527084, - "grad_norm": 3.299814878926741, - "learning_rate": 1.2333738802215803e-06, - "loss": 0.6807, - "num_input_tokens_seen": 112440665, - "step": 5293 - }, - { - "epoch": 0.6365658630433476, - "grad_norm": 2.514701341713591, - "learning_rate": 1.2326544657785668e-06, - "loss": 0.8081, - "num_input_tokens_seen": 112460075, - "step": 5294 - }, - { - "epoch": 0.6366861059339867, - "grad_norm": 2.5878788315120347, - "learning_rate": 1.2319351677412612e-06, - "loss": 0.7374, - "num_input_tokens_seen": 112476840, - "step": 5295 - }, - { - "epoch": 0.6368063488246257, - "grad_norm": 1.9853188416144236, - "learning_rate": 1.2312159862187796e-06, - "loss": 0.7359, - "num_input_tokens_seen": 112494970, - "step": 5296 - }, - { - "epoch": 0.6369265917152649, - "grad_norm": 1.7032026923010202, - "learning_rate": 1.2304969213202221e-06, - "loss": 0.7533, - "num_input_tokens_seen": 112515950, - "step": 5297 - }, - { - "epoch": 0.6370468346059039, - "grad_norm": 2.9977384166703778, - "learning_rate": 1.2297779731546687e-06, - "loss": 0.7892, - "num_input_tokens_seen": 112534765, - "step": 5298 - }, - { - "epoch": 0.637167077496543, - "grad_norm": 2.1589928030699017, - "learning_rate": 1.2290591418311853e-06, - "loss": 0.7768, - "num_input_tokens_seen": 112551880, - "step": 5299 - }, - { - "epoch": 0.637287320387182, - "grad_norm": 1.9339938587171805, - "learning_rate": 1.2283404274588172e-06, - "loss": 0.704, - "num_input_tokens_seen": 112570545, - "step": 5300 - }, - { - "epoch": 0.6374075632778212, - "grad_norm": 0.7951509615224558, - "learning_rate": 1.2276218301465925e-06, - "loss": 0.5556, - "num_input_tokens_seen": 112625630, - "step": 5301 - }, - { - "epoch": 0.6375278061684603, - "grad_norm": 1.8959663808941072, - "learning_rate": 1.2269033500035217e-06, - "loss": 0.7835, - "num_input_tokens_seen": 112645485, - "step": 5302 - }, - { - "epoch": 0.6376480490590993, - "grad_norm": 1.946591753994788, - "learning_rate": 1.2261849871385988e-06, - "loss": 0.7383, - "num_input_tokens_seen": 112666310, - "step": 5303 - }, - { - "epoch": 0.6377682919497385, - "grad_norm": 2.4535601168950643, - "learning_rate": 1.2254667416607976e-06, - "loss": 0.6121, - "num_input_tokens_seen": 112687630, - "step": 5304 - }, - { - "epoch": 0.6378885348403776, - "grad_norm": 2.0176746018851457, - "learning_rate": 1.2247486136790762e-06, - "loss": 0.8319, - "num_input_tokens_seen": 112706830, - "step": 5305 - }, - { - "epoch": 0.6380087777310166, - "grad_norm": 2.2291941657011063, - "learning_rate": 1.2240306033023726e-06, - "loss": 0.7976, - "num_input_tokens_seen": 112724375, - "step": 5306 - }, - { - "epoch": 0.6381290206216558, - "grad_norm": 2.0293592734779153, - "learning_rate": 1.2233127106396106e-06, - "loss": 0.7174, - "num_input_tokens_seen": 112742815, - "step": 5307 - }, - { - "epoch": 0.6382492635122948, - "grad_norm": 3.711336783216749, - "learning_rate": 1.2225949357996928e-06, - "loss": 0.8587, - "num_input_tokens_seen": 112760660, - "step": 5308 - }, - { - "epoch": 0.6383695064029339, - "grad_norm": 9.492569741622521, - "learning_rate": 1.221877278891505e-06, - "loss": 0.8002, - "num_input_tokens_seen": 112779635, - "step": 5309 - }, - { - "epoch": 0.638489749293573, - "grad_norm": 4.011802883703717, - "learning_rate": 1.221159740023915e-06, - "loss": 0.7077, - "num_input_tokens_seen": 112799185, - "step": 5310 - }, - { - "epoch": 0.6386099921842121, - "grad_norm": 2.446543728621817, - "learning_rate": 1.2204423193057735e-06, - "loss": 0.7302, - "num_input_tokens_seen": 112817735, - "step": 5311 - }, - { - "epoch": 0.6387302350748512, - "grad_norm": 0.9916017969666181, - "learning_rate": 1.2197250168459126e-06, - "loss": 0.6863, - "num_input_tokens_seen": 112873855, - "step": 5312 - }, - { - "epoch": 0.6388504779654903, - "grad_norm": 2.4507939987983143, - "learning_rate": 1.2190078327531458e-06, - "loss": 0.7452, - "num_input_tokens_seen": 112889820, - "step": 5313 - }, - { - "epoch": 0.6389707208561294, - "grad_norm": 2.067654429337552, - "learning_rate": 1.2182907671362693e-06, - "loss": 0.7231, - "num_input_tokens_seen": 112910235, - "step": 5314 - }, - { - "epoch": 0.6390909637467684, - "grad_norm": 1.9994166148385504, - "learning_rate": 1.2175738201040626e-06, - "loss": 0.7751, - "num_input_tokens_seen": 112926995, - "step": 5315 - }, - { - "epoch": 0.6392112066374076, - "grad_norm": 2.0051756709731685, - "learning_rate": 1.2168569917652855e-06, - "loss": 0.7893, - "num_input_tokens_seen": 112946570, - "step": 5316 - }, - { - "epoch": 0.6393314495280467, - "grad_norm": 1.9420777912835931, - "learning_rate": 1.2161402822286802e-06, - "loss": 0.6367, - "num_input_tokens_seen": 112966975, - "step": 5317 - }, - { - "epoch": 0.6394516924186857, - "grad_norm": 2.2461965804328914, - "learning_rate": 1.2154236916029698e-06, - "loss": 0.7903, - "num_input_tokens_seen": 112984670, - "step": 5318 - }, - { - "epoch": 0.6395719353093249, - "grad_norm": 2.9939395699251117, - "learning_rate": 1.2147072199968627e-06, - "loss": 0.7246, - "num_input_tokens_seen": 113003025, - "step": 5319 - }, - { - "epoch": 0.6396921781999639, - "grad_norm": 2.50220961228838, - "learning_rate": 1.2139908675190454e-06, - "loss": 0.7146, - "num_input_tokens_seen": 113021955, - "step": 5320 - }, - { - "epoch": 0.639812421090603, - "grad_norm": 2.4236534102059886, - "learning_rate": 1.2132746342781887e-06, - "loss": 0.7459, - "num_input_tokens_seen": 113042835, - "step": 5321 - }, - { - "epoch": 0.6399326639812422, - "grad_norm": 3.673772215391414, - "learning_rate": 1.2125585203829437e-06, - "loss": 0.785, - "num_input_tokens_seen": 113058195, - "step": 5322 - }, - { - "epoch": 0.6400529068718812, - "grad_norm": 2.3936903601375445, - "learning_rate": 1.211842525941946e-06, - "loss": 0.7313, - "num_input_tokens_seen": 113077710, - "step": 5323 - }, - { - "epoch": 0.6401731497625203, - "grad_norm": 2.3263710461321, - "learning_rate": 1.2111266510638105e-06, - "loss": 0.7936, - "num_input_tokens_seen": 113100355, - "step": 5324 - }, - { - "epoch": 0.6402933926531594, - "grad_norm": 1.7408512655587947, - "learning_rate": 1.2104108958571346e-06, - "loss": 0.7958, - "num_input_tokens_seen": 113118345, - "step": 5325 - }, - { - "epoch": 0.6404136355437985, - "grad_norm": 2.354395345675336, - "learning_rate": 1.2096952604304975e-06, - "loss": 0.751, - "num_input_tokens_seen": 113138495, - "step": 5326 - }, - { - "epoch": 0.6405338784344375, - "grad_norm": 2.6098000250523166, - "learning_rate": 1.2089797448924616e-06, - "loss": 0.7016, - "num_input_tokens_seen": 113162090, - "step": 5327 - }, - { - "epoch": 0.6406541213250767, - "grad_norm": 2.3030690169309738, - "learning_rate": 1.2082643493515696e-06, - "loss": 0.651, - "num_input_tokens_seen": 113180130, - "step": 5328 - }, - { - "epoch": 0.6407743642157158, - "grad_norm": 2.149240351075889, - "learning_rate": 1.207549073916346e-06, - "loss": 0.814, - "num_input_tokens_seen": 113200785, - "step": 5329 - }, - { - "epoch": 0.6408946071063548, - "grad_norm": 2.540532925962855, - "learning_rate": 1.2068339186952974e-06, - "loss": 0.7836, - "num_input_tokens_seen": 113218045, - "step": 5330 - }, - { - "epoch": 0.6410148499969939, - "grad_norm": 2.700466189649394, - "learning_rate": 1.2061188837969133e-06, - "loss": 0.7238, - "num_input_tokens_seen": 113237375, - "step": 5331 - }, - { - "epoch": 0.641135092887633, - "grad_norm": 4.889911437451184, - "learning_rate": 1.2054039693296631e-06, - "loss": 0.8389, - "num_input_tokens_seen": 113255090, - "step": 5332 - }, - { - "epoch": 0.6412553357782721, - "grad_norm": 1.8542335704279096, - "learning_rate": 1.2046891754019996e-06, - "loss": 0.8063, - "num_input_tokens_seen": 113275420, - "step": 5333 - }, - { - "epoch": 0.6413755786689112, - "grad_norm": 3.3089048526070197, - "learning_rate": 1.2039745021223548e-06, - "loss": 0.8181, - "num_input_tokens_seen": 113292560, - "step": 5334 - }, - { - "epoch": 0.6414958215595503, - "grad_norm": 0.912853591214701, - "learning_rate": 1.2032599495991456e-06, - "loss": 0.6196, - "num_input_tokens_seen": 113357020, - "step": 5335 - }, - { - "epoch": 0.6416160644501894, - "grad_norm": 2.3202516007485223, - "learning_rate": 1.2025455179407685e-06, - "loss": 0.694, - "num_input_tokens_seen": 113377900, - "step": 5336 - }, - { - "epoch": 0.6417363073408284, - "grad_norm": 2.407591367601122, - "learning_rate": 1.2018312072556027e-06, - "loss": 0.7331, - "num_input_tokens_seen": 113396120, - "step": 5337 - }, - { - "epoch": 0.6418565502314676, - "grad_norm": 2.8326555720773103, - "learning_rate": 1.2011170176520077e-06, - "loss": 0.7399, - "num_input_tokens_seen": 113416755, - "step": 5338 - }, - { - "epoch": 0.6419767931221066, - "grad_norm": 9.54917696404613, - "learning_rate": 1.200402949238326e-06, - "loss": 0.8065, - "num_input_tokens_seen": 113437815, - "step": 5339 - }, - { - "epoch": 0.6420970360127457, - "grad_norm": 2.056981520561676, - "learning_rate": 1.1996890021228814e-06, - "loss": 0.7382, - "num_input_tokens_seen": 113454310, - "step": 5340 - }, - { - "epoch": 0.6422172789033849, - "grad_norm": 2.585042162478798, - "learning_rate": 1.198975176413979e-06, - "loss": 0.6928, - "num_input_tokens_seen": 113477680, - "step": 5341 - }, - { - "epoch": 0.6423375217940239, - "grad_norm": 1.8762190567855141, - "learning_rate": 1.198261472219904e-06, - "loss": 0.8191, - "num_input_tokens_seen": 113498575, - "step": 5342 - }, - { - "epoch": 0.642457764684663, - "grad_norm": 2.175959659945122, - "learning_rate": 1.1975478896489276e-06, - "loss": 0.7711, - "num_input_tokens_seen": 113516130, - "step": 5343 - }, - { - "epoch": 0.6425780075753021, - "grad_norm": 2.472418910018756, - "learning_rate": 1.1968344288092981e-06, - "loss": 0.7587, - "num_input_tokens_seen": 113532430, - "step": 5344 - }, - { - "epoch": 0.6426982504659412, - "grad_norm": 2.07724398552733, - "learning_rate": 1.1961210898092473e-06, - "loss": 0.6407, - "num_input_tokens_seen": 113551100, - "step": 5345 - }, - { - "epoch": 0.6428184933565803, - "grad_norm": 6.145346676051869, - "learning_rate": 1.1954078727569874e-06, - "loss": 0.8011, - "num_input_tokens_seen": 113568120, - "step": 5346 - }, - { - "epoch": 0.6429387362472194, - "grad_norm": 3.6960047360292507, - "learning_rate": 1.1946947777607141e-06, - "loss": 0.777, - "num_input_tokens_seen": 113588975, - "step": 5347 - }, - { - "epoch": 0.6430589791378585, - "grad_norm": 1.9334417258068015, - "learning_rate": 1.1939818049286028e-06, - "loss": 0.7936, - "num_input_tokens_seen": 113606855, - "step": 5348 - }, - { - "epoch": 0.6431792220284975, - "grad_norm": 1.8516167601704034, - "learning_rate": 1.1932689543688103e-06, - "loss": 0.7372, - "num_input_tokens_seen": 113627680, - "step": 5349 - }, - { - "epoch": 0.6432994649191367, - "grad_norm": 2.465884311197901, - "learning_rate": 1.1925562261894756e-06, - "loss": 0.7201, - "num_input_tokens_seen": 113646480, - "step": 5350 - }, - { - "epoch": 0.6434197078097758, - "grad_norm": 1.8920295170593262, - "learning_rate": 1.1918436204987203e-06, - "loss": 0.7704, - "num_input_tokens_seen": 113668060, - "step": 5351 - }, - { - "epoch": 0.6435399507004148, - "grad_norm": 2.3995250093597424, - "learning_rate": 1.191131137404645e-06, - "loss": 0.8122, - "num_input_tokens_seen": 113684520, - "step": 5352 - }, - { - "epoch": 0.643660193591054, - "grad_norm": 2.3709927214297157, - "learning_rate": 1.190418777015333e-06, - "loss": 0.7698, - "num_input_tokens_seen": 113703150, - "step": 5353 - }, - { - "epoch": 0.643780436481693, - "grad_norm": 3.104706877201076, - "learning_rate": 1.1897065394388487e-06, - "loss": 0.7301, - "num_input_tokens_seen": 113723310, - "step": 5354 - }, - { - "epoch": 0.6439006793723321, - "grad_norm": 1.933048973726292, - "learning_rate": 1.1889944247832385e-06, - "loss": 0.7603, - "num_input_tokens_seen": 113743270, - "step": 5355 - }, - { - "epoch": 0.6440209222629713, - "grad_norm": 2.620356832022428, - "learning_rate": 1.188282433156529e-06, - "loss": 0.7003, - "num_input_tokens_seen": 113762450, - "step": 5356 - }, - { - "epoch": 0.6441411651536103, - "grad_norm": 3.012886796589671, - "learning_rate": 1.187570564666729e-06, - "loss": 0.8862, - "num_input_tokens_seen": 113780060, - "step": 5357 - }, - { - "epoch": 0.6442614080442494, - "grad_norm": 2.7104177992956417, - "learning_rate": 1.1868588194218277e-06, - "loss": 0.747, - "num_input_tokens_seen": 113800160, - "step": 5358 - }, - { - "epoch": 0.6443816509348885, - "grad_norm": 1.7538525126651472, - "learning_rate": 1.1861471975297979e-06, - "loss": 0.7333, - "num_input_tokens_seen": 113821575, - "step": 5359 - }, - { - "epoch": 0.6445018938255276, - "grad_norm": 1.829430267584359, - "learning_rate": 1.185435699098591e-06, - "loss": 0.7015, - "num_input_tokens_seen": 113847490, - "step": 5360 - }, - { - "epoch": 0.6446221367161666, - "grad_norm": 9.654782899790336, - "learning_rate": 1.1847243242361407e-06, - "loss": 0.767, - "num_input_tokens_seen": 113865800, - "step": 5361 - }, - { - "epoch": 0.6447423796068057, - "grad_norm": 1.8164042710850974, - "learning_rate": 1.184013073050362e-06, - "loss": 0.782, - "num_input_tokens_seen": 113886800, - "step": 5362 - }, - { - "epoch": 0.6448626224974449, - "grad_norm": 1.9237541469885715, - "learning_rate": 1.1833019456491518e-06, - "loss": 0.7482, - "num_input_tokens_seen": 113908050, - "step": 5363 - }, - { - "epoch": 0.6449828653880839, - "grad_norm": 2.2789041149074185, - "learning_rate": 1.1825909421403871e-06, - "loss": 0.7729, - "num_input_tokens_seen": 113926865, - "step": 5364 - }, - { - "epoch": 0.645103108278723, - "grad_norm": 2.044262204553878, - "learning_rate": 1.1818800626319263e-06, - "loss": 0.7537, - "num_input_tokens_seen": 113945920, - "step": 5365 - }, - { - "epoch": 0.6452233511693621, - "grad_norm": 2.7351556874246166, - "learning_rate": 1.181169307231609e-06, - "loss": 0.8573, - "num_input_tokens_seen": 113963320, - "step": 5366 - }, - { - "epoch": 0.6453435940600012, - "grad_norm": 3.6908690635817285, - "learning_rate": 1.1804586760472574e-06, - "loss": 0.8317, - "num_input_tokens_seen": 113979505, - "step": 5367 - }, - { - "epoch": 0.6454638369506402, - "grad_norm": 2.076550799308343, - "learning_rate": 1.1797481691866732e-06, - "loss": 0.7823, - "num_input_tokens_seen": 113996450, - "step": 5368 - }, - { - "epoch": 0.6455840798412794, - "grad_norm": 2.736922152546507, - "learning_rate": 1.1790377867576393e-06, - "loss": 0.8174, - "num_input_tokens_seen": 114013920, - "step": 5369 - }, - { - "epoch": 0.6457043227319185, - "grad_norm": 2.1888097470898153, - "learning_rate": 1.1783275288679203e-06, - "loss": 0.7607, - "num_input_tokens_seen": 114030805, - "step": 5370 - }, - { - "epoch": 0.6458245656225575, - "grad_norm": 0.9497446653973504, - "learning_rate": 1.177617395625262e-06, - "loss": 0.6493, - "num_input_tokens_seen": 114088500, - "step": 5371 - }, - { - "epoch": 0.6459448085131967, - "grad_norm": 1.9630646231636049, - "learning_rate": 1.176907387137391e-06, - "loss": 0.7597, - "num_input_tokens_seen": 114108425, - "step": 5372 - }, - { - "epoch": 0.6460650514038357, - "grad_norm": 1.7904798573865792, - "learning_rate": 1.176197503512015e-06, - "loss": 0.8381, - "num_input_tokens_seen": 114127860, - "step": 5373 - }, - { - "epoch": 0.6461852942944748, - "grad_norm": 2.593414773562563, - "learning_rate": 1.175487744856822e-06, - "loss": 0.8212, - "num_input_tokens_seen": 114147035, - "step": 5374 - }, - { - "epoch": 0.646305537185114, - "grad_norm": 4.997265095623739, - "learning_rate": 1.1747781112794833e-06, - "loss": 0.897, - "num_input_tokens_seen": 114163250, - "step": 5375 - }, - { - "epoch": 0.646425780075753, - "grad_norm": 1.8633750597736216, - "learning_rate": 1.1740686028876492e-06, - "loss": 0.8165, - "num_input_tokens_seen": 114181835, - "step": 5376 - }, - { - "epoch": 0.6465460229663921, - "grad_norm": 2.7343963932742574, - "learning_rate": 1.173359219788951e-06, - "loss": 0.7448, - "num_input_tokens_seen": 114198465, - "step": 5377 - }, - { - "epoch": 0.6466662658570312, - "grad_norm": 5.201720885059953, - "learning_rate": 1.1726499620910014e-06, - "loss": 0.7143, - "num_input_tokens_seen": 114218465, - "step": 5378 - }, - { - "epoch": 0.6467865087476703, - "grad_norm": 2.225503499233112, - "learning_rate": 1.171940829901395e-06, - "loss": 0.759, - "num_input_tokens_seen": 114236910, - "step": 5379 - }, - { - "epoch": 0.6469067516383094, - "grad_norm": 22.63692718935013, - "learning_rate": 1.1712318233277067e-06, - "loss": 0.7567, - "num_input_tokens_seen": 114255650, - "step": 5380 - }, - { - "epoch": 0.6470269945289485, - "grad_norm": 0.7747348307908427, - "learning_rate": 1.1705229424774918e-06, - "loss": 0.5908, - "num_input_tokens_seen": 114309640, - "step": 5381 - }, - { - "epoch": 0.6471472374195876, - "grad_norm": 1.7503124099654213, - "learning_rate": 1.1698141874582867e-06, - "loss": 0.6379, - "num_input_tokens_seen": 114330405, - "step": 5382 - }, - { - "epoch": 0.6472674803102266, - "grad_norm": 2.482338896064831, - "learning_rate": 1.1691055583776094e-06, - "loss": 0.7176, - "num_input_tokens_seen": 114350215, - "step": 5383 - }, - { - "epoch": 0.6473877232008658, - "grad_norm": 2.9346558193922503, - "learning_rate": 1.1683970553429587e-06, - "loss": 0.7752, - "num_input_tokens_seen": 114371390, - "step": 5384 - }, - { - "epoch": 0.6475079660915048, - "grad_norm": 2.3210669462163787, - "learning_rate": 1.1676886784618128e-06, - "loss": 0.8106, - "num_input_tokens_seen": 114387775, - "step": 5385 - }, - { - "epoch": 0.6476282089821439, - "grad_norm": 2.3177982054961106, - "learning_rate": 1.1669804278416332e-06, - "loss": 0.8234, - "num_input_tokens_seen": 114402220, - "step": 5386 - }, - { - "epoch": 0.6477484518727831, - "grad_norm": 1.9091213905273532, - "learning_rate": 1.1662723035898602e-06, - "loss": 0.709, - "num_input_tokens_seen": 114421700, - "step": 5387 - }, - { - "epoch": 0.6478686947634221, - "grad_norm": 2.026477861519642, - "learning_rate": 1.1655643058139158e-06, - "loss": 0.8125, - "num_input_tokens_seen": 114440420, - "step": 5388 - }, - { - "epoch": 0.6479889376540612, - "grad_norm": 1.737474323674123, - "learning_rate": 1.164856434621203e-06, - "loss": 0.8099, - "num_input_tokens_seen": 114459260, - "step": 5389 - }, - { - "epoch": 0.6481091805447003, - "grad_norm": 2.137194915622085, - "learning_rate": 1.164148690119104e-06, - "loss": 0.7585, - "num_input_tokens_seen": 114480260, - "step": 5390 - }, - { - "epoch": 0.6482294234353394, - "grad_norm": 2.113936030754424, - "learning_rate": 1.163441072414985e-06, - "loss": 0.7372, - "num_input_tokens_seen": 114500185, - "step": 5391 - }, - { - "epoch": 0.6483496663259785, - "grad_norm": 2.181219042106376, - "learning_rate": 1.16273358161619e-06, - "loss": 0.6891, - "num_input_tokens_seen": 114520240, - "step": 5392 - }, - { - "epoch": 0.6484699092166175, - "grad_norm": 2.0493167137015176, - "learning_rate": 1.1620262178300455e-06, - "loss": 0.8415, - "num_input_tokens_seen": 114538575, - "step": 5393 - }, - { - "epoch": 0.6485901521072567, - "grad_norm": 1.8379586571441586, - "learning_rate": 1.1613189811638563e-06, - "loss": 0.7531, - "num_input_tokens_seen": 114560020, - "step": 5394 - }, - { - "epoch": 0.6487103949978957, - "grad_norm": 3.8303575091283943, - "learning_rate": 1.1606118717249117e-06, - "loss": 0.7745, - "num_input_tokens_seen": 114579840, - "step": 5395 - }, - { - "epoch": 0.6488306378885348, - "grad_norm": 2.138371651536261, - "learning_rate": 1.1599048896204787e-06, - "loss": 0.679, - "num_input_tokens_seen": 114599440, - "step": 5396 - }, - { - "epoch": 0.648950880779174, - "grad_norm": 1.834884138733889, - "learning_rate": 1.1591980349578061e-06, - "loss": 0.803, - "num_input_tokens_seen": 114617830, - "step": 5397 - }, - { - "epoch": 0.649071123669813, - "grad_norm": 0.8080736411275956, - "learning_rate": 1.1584913078441222e-06, - "loss": 0.5745, - "num_input_tokens_seen": 114677470, - "step": 5398 - }, - { - "epoch": 0.6491913665604521, - "grad_norm": 2.7017641710080182, - "learning_rate": 1.1577847083866387e-06, - "loss": 0.8382, - "num_input_tokens_seen": 114696225, - "step": 5399 - }, - { - "epoch": 0.6493116094510912, - "grad_norm": 2.833949755276881, - "learning_rate": 1.1570782366925453e-06, - "loss": 0.725, - "num_input_tokens_seen": 114714460, - "step": 5400 - }, - { - "epoch": 0.6494318523417303, - "grad_norm": 6.20291758446645, - "learning_rate": 1.1563718928690132e-06, - "loss": 0.7527, - "num_input_tokens_seen": 114731615, - "step": 5401 - }, - { - "epoch": 0.6495520952323693, - "grad_norm": 2.5196673123363316, - "learning_rate": 1.1556656770231942e-06, - "loss": 0.7039, - "num_input_tokens_seen": 114747530, - "step": 5402 - }, - { - "epoch": 0.6496723381230085, - "grad_norm": 2.299612062614275, - "learning_rate": 1.1549595892622207e-06, - "loss": 0.7569, - "num_input_tokens_seen": 114766020, - "step": 5403 - }, - { - "epoch": 0.6497925810136476, - "grad_norm": 0.8593892284736993, - "learning_rate": 1.1542536296932053e-06, - "loss": 0.6337, - "num_input_tokens_seen": 114829275, - "step": 5404 - }, - { - "epoch": 0.6499128239042866, - "grad_norm": 2.1367610561197155, - "learning_rate": 1.1535477984232423e-06, - "loss": 0.6917, - "num_input_tokens_seen": 114848870, - "step": 5405 - }, - { - "epoch": 0.6500330667949258, - "grad_norm": 2.240752533806768, - "learning_rate": 1.152842095559404e-06, - "loss": 0.7634, - "num_input_tokens_seen": 114869250, - "step": 5406 - }, - { - "epoch": 0.6501533096855648, - "grad_norm": 1.857743792423614, - "learning_rate": 1.1521365212087474e-06, - "loss": 0.7573, - "num_input_tokens_seen": 114888955, - "step": 5407 - }, - { - "epoch": 0.6502735525762039, - "grad_norm": 1.8777365174935885, - "learning_rate": 1.1514310754783062e-06, - "loss": 0.7005, - "num_input_tokens_seen": 114911625, - "step": 5408 - }, - { - "epoch": 0.6503937954668431, - "grad_norm": 1.8850690756719408, - "learning_rate": 1.1507257584750964e-06, - "loss": 0.7272, - "num_input_tokens_seen": 114931525, - "step": 5409 - }, - { - "epoch": 0.6505140383574821, - "grad_norm": 1.9504362481009716, - "learning_rate": 1.150020570306113e-06, - "loss": 0.7632, - "num_input_tokens_seen": 114950385, - "step": 5410 - }, - { - "epoch": 0.6506342812481212, - "grad_norm": 2.786111719453489, - "learning_rate": 1.1493155110783338e-06, - "loss": 0.7494, - "num_input_tokens_seen": 114968630, - "step": 5411 - }, - { - "epoch": 0.6507545241387603, - "grad_norm": 2.743658247547822, - "learning_rate": 1.1486105808987155e-06, - "loss": 0.7047, - "num_input_tokens_seen": 114989840, - "step": 5412 - }, - { - "epoch": 0.6508747670293994, - "grad_norm": 1.9627647652122042, - "learning_rate": 1.1479057798741947e-06, - "loss": 0.803, - "num_input_tokens_seen": 115007615, - "step": 5413 - }, - { - "epoch": 0.6509950099200384, - "grad_norm": 0.8519248943625141, - "learning_rate": 1.1472011081116893e-06, - "loss": 0.5777, - "num_input_tokens_seen": 115064565, - "step": 5414 - }, - { - "epoch": 0.6511152528106776, - "grad_norm": 2.5983168388011895, - "learning_rate": 1.146496565718098e-06, - "loss": 0.7624, - "num_input_tokens_seen": 115084855, - "step": 5415 - }, - { - "epoch": 0.6512354957013167, - "grad_norm": 4.170478578604415, - "learning_rate": 1.1457921528002996e-06, - "loss": 0.7485, - "num_input_tokens_seen": 115103010, - "step": 5416 - }, - { - "epoch": 0.6513557385919557, - "grad_norm": 3.1160852940465325, - "learning_rate": 1.145087869465153e-06, - "loss": 0.7185, - "num_input_tokens_seen": 115123295, - "step": 5417 - }, - { - "epoch": 0.6514759814825949, - "grad_norm": 5.550907757235389, - "learning_rate": 1.1443837158194954e-06, - "loss": 0.6221, - "num_input_tokens_seen": 115138160, - "step": 5418 - }, - { - "epoch": 0.651596224373234, - "grad_norm": 2.3418374344765738, - "learning_rate": 1.1436796919701484e-06, - "loss": 0.7352, - "num_input_tokens_seen": 115156595, - "step": 5419 - }, - { - "epoch": 0.651716467263873, - "grad_norm": 2.1147793267346837, - "learning_rate": 1.1429757980239115e-06, - "loss": 0.6148, - "num_input_tokens_seen": 115176740, - "step": 5420 - }, - { - "epoch": 0.6518367101545122, - "grad_norm": 2.750431851853045, - "learning_rate": 1.1422720340875644e-06, - "loss": 0.8059, - "num_input_tokens_seen": 115195210, - "step": 5421 - }, - { - "epoch": 0.6519569530451512, - "grad_norm": 2.7279162412048805, - "learning_rate": 1.1415684002678671e-06, - "loss": 0.7871, - "num_input_tokens_seen": 115213690, - "step": 5422 - }, - { - "epoch": 0.6520771959357903, - "grad_norm": 3.6482507766970644, - "learning_rate": 1.1408648966715617e-06, - "loss": 0.7803, - "num_input_tokens_seen": 115230930, - "step": 5423 - }, - { - "epoch": 0.6521974388264293, - "grad_norm": 3.831495756765936, - "learning_rate": 1.1401615234053683e-06, - "loss": 0.7197, - "num_input_tokens_seen": 115249470, - "step": 5424 - }, - { - "epoch": 0.6523176817170685, - "grad_norm": 1.842908548710883, - "learning_rate": 1.1394582805759885e-06, - "loss": 0.7512, - "num_input_tokens_seen": 115268470, - "step": 5425 - }, - { - "epoch": 0.6524379246077076, - "grad_norm": 1.8582930045716146, - "learning_rate": 1.1387551682901022e-06, - "loss": 0.7572, - "num_input_tokens_seen": 115288795, - "step": 5426 - }, - { - "epoch": 0.6525581674983466, - "grad_norm": 2.7718112736944365, - "learning_rate": 1.138052186654373e-06, - "loss": 0.7008, - "num_input_tokens_seen": 115305985, - "step": 5427 - }, - { - "epoch": 0.6526784103889858, - "grad_norm": 2.951645136073294, - "learning_rate": 1.1373493357754417e-06, - "loss": 0.8755, - "num_input_tokens_seen": 115324610, - "step": 5428 - }, - { - "epoch": 0.6527986532796248, - "grad_norm": 2.0342297387896298, - "learning_rate": 1.1366466157599303e-06, - "loss": 0.7677, - "num_input_tokens_seen": 115343605, - "step": 5429 - }, - { - "epoch": 0.6529188961702639, - "grad_norm": 3.0844687272804356, - "learning_rate": 1.1359440267144412e-06, - "loss": 0.7539, - "num_input_tokens_seen": 115360780, - "step": 5430 - }, - { - "epoch": 0.653039139060903, - "grad_norm": 1.8415268492795913, - "learning_rate": 1.1352415687455556e-06, - "loss": 0.7384, - "num_input_tokens_seen": 115381760, - "step": 5431 - }, - { - "epoch": 0.6531593819515421, - "grad_norm": 3.175285154046051, - "learning_rate": 1.1345392419598368e-06, - "loss": 0.6365, - "num_input_tokens_seen": 115400360, - "step": 5432 - }, - { - "epoch": 0.6532796248421812, - "grad_norm": 1.9449097702459905, - "learning_rate": 1.133837046463827e-06, - "loss": 0.7082, - "num_input_tokens_seen": 115419480, - "step": 5433 - }, - { - "epoch": 0.6533998677328203, - "grad_norm": 2.860088198945128, - "learning_rate": 1.1331349823640474e-06, - "loss": 0.6358, - "num_input_tokens_seen": 115436630, - "step": 5434 - }, - { - "epoch": 0.6535201106234594, - "grad_norm": 2.6007283199817963, - "learning_rate": 1.132433049767003e-06, - "loss": 0.7828, - "num_input_tokens_seen": 115454265, - "step": 5435 - }, - { - "epoch": 0.6536403535140984, - "grad_norm": 1.628706954462291, - "learning_rate": 1.1317312487791748e-06, - "loss": 0.7998, - "num_input_tokens_seen": 115475635, - "step": 5436 - }, - { - "epoch": 0.6537605964047376, - "grad_norm": 2.8244763712729517, - "learning_rate": 1.131029579507026e-06, - "loss": 0.7192, - "num_input_tokens_seen": 115495295, - "step": 5437 - }, - { - "epoch": 0.6538808392953767, - "grad_norm": 1.974859420721518, - "learning_rate": 1.1303280420569982e-06, - "loss": 0.7994, - "num_input_tokens_seen": 115516900, - "step": 5438 - }, - { - "epoch": 0.6540010821860157, - "grad_norm": 2.1207385191914523, - "learning_rate": 1.1296266365355158e-06, - "loss": 0.7622, - "num_input_tokens_seen": 115540005, - "step": 5439 - }, - { - "epoch": 0.6541213250766549, - "grad_norm": 2.2726856058740488, - "learning_rate": 1.1289253630489806e-06, - "loss": 0.7257, - "num_input_tokens_seen": 115560775, - "step": 5440 - }, - { - "epoch": 0.6542415679672939, - "grad_norm": 2.4532842367711343, - "learning_rate": 1.1282242217037753e-06, - "loss": 0.7382, - "num_input_tokens_seen": 115577995, - "step": 5441 - }, - { - "epoch": 0.654361810857933, - "grad_norm": 2.756443333504138, - "learning_rate": 1.1275232126062614e-06, - "loss": 0.6213, - "num_input_tokens_seen": 115600540, - "step": 5442 - }, - { - "epoch": 0.6544820537485722, - "grad_norm": 1.9539630289725078, - "learning_rate": 1.1268223358627835e-06, - "loss": 0.7241, - "num_input_tokens_seen": 115622750, - "step": 5443 - }, - { - "epoch": 0.6546022966392112, - "grad_norm": 2.1462075815416015, - "learning_rate": 1.126121591579663e-06, - "loss": 0.7056, - "num_input_tokens_seen": 115641675, - "step": 5444 - }, - { - "epoch": 0.6547225395298503, - "grad_norm": 2.7809840030810147, - "learning_rate": 1.1254209798632018e-06, - "loss": 0.6862, - "num_input_tokens_seen": 115662415, - "step": 5445 - }, - { - "epoch": 0.6548427824204894, - "grad_norm": 1.8071513463625968, - "learning_rate": 1.124720500819683e-06, - "loss": 0.8428, - "num_input_tokens_seen": 115680290, - "step": 5446 - }, - { - "epoch": 0.6549630253111285, - "grad_norm": 2.4942841805542235, - "learning_rate": 1.1240201545553682e-06, - "loss": 0.8185, - "num_input_tokens_seen": 115697810, - "step": 5447 - }, - { - "epoch": 0.6550832682017675, - "grad_norm": 7.42106113027378, - "learning_rate": 1.1233199411764996e-06, - "loss": 0.7293, - "num_input_tokens_seen": 115716965, - "step": 5448 - }, - { - "epoch": 0.6552035110924067, - "grad_norm": 2.9914338132911285, - "learning_rate": 1.1226198607892987e-06, - "loss": 0.6882, - "num_input_tokens_seen": 115737245, - "step": 5449 - }, - { - "epoch": 0.6553237539830458, - "grad_norm": 1.9920378915034498, - "learning_rate": 1.1219199134999664e-06, - "loss": 0.795, - "num_input_tokens_seen": 115755465, - "step": 5450 - }, - { - "epoch": 0.6554439968736848, - "grad_norm": 2.1506121379245746, - "learning_rate": 1.1212200994146863e-06, - "loss": 0.7762, - "num_input_tokens_seen": 115772940, - "step": 5451 - }, - { - "epoch": 0.655564239764324, - "grad_norm": 2.6884195725406212, - "learning_rate": 1.120520418639618e-06, - "loss": 0.7485, - "num_input_tokens_seen": 115791195, - "step": 5452 - }, - { - "epoch": 0.655684482654963, - "grad_norm": 2.026326990547198, - "learning_rate": 1.119820871280903e-06, - "loss": 0.8287, - "num_input_tokens_seen": 115811990, - "step": 5453 - }, - { - "epoch": 0.6558047255456021, - "grad_norm": 6.090404650070804, - "learning_rate": 1.1191214574446614e-06, - "loss": 0.7275, - "num_input_tokens_seen": 115831955, - "step": 5454 - }, - { - "epoch": 0.6559249684362413, - "grad_norm": 1.6746053391414106, - "learning_rate": 1.118422177236995e-06, - "loss": 0.7937, - "num_input_tokens_seen": 115853500, - "step": 5455 - }, - { - "epoch": 0.6560452113268803, - "grad_norm": 2.4658539440548206, - "learning_rate": 1.1177230307639835e-06, - "loss": 0.8467, - "num_input_tokens_seen": 115870760, - "step": 5456 - }, - { - "epoch": 0.6561654542175194, - "grad_norm": 1.993763917924796, - "learning_rate": 1.1170240181316865e-06, - "loss": 0.7862, - "num_input_tokens_seen": 115891925, - "step": 5457 - }, - { - "epoch": 0.6562856971081584, - "grad_norm": 2.5433038801142924, - "learning_rate": 1.1163251394461433e-06, - "loss": 0.795, - "num_input_tokens_seen": 115910125, - "step": 5458 - }, - { - "epoch": 0.6564059399987976, - "grad_norm": 2.931023486959919, - "learning_rate": 1.1156263948133746e-06, - "loss": 0.8196, - "num_input_tokens_seen": 115926500, - "step": 5459 - }, - { - "epoch": 0.6565261828894366, - "grad_norm": 1.9998085982412905, - "learning_rate": 1.1149277843393793e-06, - "loss": 0.7746, - "num_input_tokens_seen": 115947380, - "step": 5460 - }, - { - "epoch": 0.6566464257800757, - "grad_norm": 2.454902955488945, - "learning_rate": 1.114229308130135e-06, - "loss": 0.6329, - "num_input_tokens_seen": 115964980, - "step": 5461 - }, - { - "epoch": 0.6567666686707149, - "grad_norm": 2.0762901053238747, - "learning_rate": 1.1135309662915995e-06, - "loss": 0.6685, - "num_input_tokens_seen": 115984865, - "step": 5462 - }, - { - "epoch": 0.6568869115613539, - "grad_norm": 2.792002177719222, - "learning_rate": 1.112832758929712e-06, - "loss": 0.5957, - "num_input_tokens_seen": 116007195, - "step": 5463 - }, - { - "epoch": 0.657007154451993, - "grad_norm": 2.917205676280278, - "learning_rate": 1.11213468615039e-06, - "loss": 0.7398, - "num_input_tokens_seen": 116026345, - "step": 5464 - }, - { - "epoch": 0.6571273973426321, - "grad_norm": 7.936242447778208, - "learning_rate": 1.1114367480595299e-06, - "loss": 0.7554, - "num_input_tokens_seen": 116047145, - "step": 5465 - }, - { - "epoch": 0.6572476402332712, - "grad_norm": 2.71814201176872, - "learning_rate": 1.1107389447630077e-06, - "loss": 0.8098, - "num_input_tokens_seen": 116065565, - "step": 5466 - }, - { - "epoch": 0.6573678831239103, - "grad_norm": 2.2333780829947396, - "learning_rate": 1.1100412763666818e-06, - "loss": 0.7749, - "num_input_tokens_seen": 116080545, - "step": 5467 - }, - { - "epoch": 0.6574881260145494, - "grad_norm": 3.4091599597747435, - "learning_rate": 1.1093437429763865e-06, - "loss": 0.7922, - "num_input_tokens_seen": 116100530, - "step": 5468 - }, - { - "epoch": 0.6576083689051885, - "grad_norm": 2.651966925931211, - "learning_rate": 1.108646344697937e-06, - "loss": 0.7401, - "num_input_tokens_seen": 116118600, - "step": 5469 - }, - { - "epoch": 0.6577286117958275, - "grad_norm": 2.4425894714104186, - "learning_rate": 1.1079490816371277e-06, - "loss": 0.7596, - "num_input_tokens_seen": 116138085, - "step": 5470 - }, - { - "epoch": 0.6578488546864667, - "grad_norm": 3.157342247876814, - "learning_rate": 1.1072519538997346e-06, - "loss": 0.7314, - "num_input_tokens_seen": 116156945, - "step": 5471 - }, - { - "epoch": 0.6579690975771058, - "grad_norm": 1.6831217598836876, - "learning_rate": 1.1065549615915095e-06, - "loss": 0.8115, - "num_input_tokens_seen": 116176495, - "step": 5472 - }, - { - "epoch": 0.6580893404677448, - "grad_norm": 2.9241770060946326, - "learning_rate": 1.105858104818187e-06, - "loss": 0.7717, - "num_input_tokens_seen": 116197370, - "step": 5473 - }, - { - "epoch": 0.658209583358384, - "grad_norm": 3.39827239466364, - "learning_rate": 1.1051613836854788e-06, - "loss": 0.7431, - "num_input_tokens_seen": 116213475, - "step": 5474 - }, - { - "epoch": 0.658329826249023, - "grad_norm": 0.7825908881542003, - "learning_rate": 1.1044647982990771e-06, - "loss": 0.5992, - "num_input_tokens_seen": 116275080, - "step": 5475 - }, - { - "epoch": 0.6584500691396621, - "grad_norm": 2.688796430741427, - "learning_rate": 1.1037683487646536e-06, - "loss": 0.6326, - "num_input_tokens_seen": 116295085, - "step": 5476 - }, - { - "epoch": 0.6585703120303013, - "grad_norm": 1.9504613669587676, - "learning_rate": 1.1030720351878594e-06, - "loss": 0.7665, - "num_input_tokens_seen": 116312925, - "step": 5477 - }, - { - "epoch": 0.6586905549209403, - "grad_norm": 0.8279952753306795, - "learning_rate": 1.102375857674323e-06, - "loss": 0.6164, - "num_input_tokens_seen": 116374560, - "step": 5478 - }, - { - "epoch": 0.6588107978115794, - "grad_norm": 2.096631672149992, - "learning_rate": 1.1016798163296561e-06, - "loss": 0.8975, - "num_input_tokens_seen": 116393480, - "step": 5479 - }, - { - "epoch": 0.6589310407022185, - "grad_norm": 2.2720620266549147, - "learning_rate": 1.1009839112594471e-06, - "loss": 0.6553, - "num_input_tokens_seen": 116411225, - "step": 5480 - }, - { - "epoch": 0.6590512835928576, - "grad_norm": 2.874935116488654, - "learning_rate": 1.1002881425692638e-06, - "loss": 0.7156, - "num_input_tokens_seen": 116431375, - "step": 5481 - }, - { - "epoch": 0.6591715264834966, - "grad_norm": 1.8068882832227608, - "learning_rate": 1.0995925103646532e-06, - "loss": 0.7487, - "num_input_tokens_seen": 116449695, - "step": 5482 - }, - { - "epoch": 0.6592917693741358, - "grad_norm": 2.7713059094046475, - "learning_rate": 1.0988970147511437e-06, - "loss": 0.6666, - "num_input_tokens_seen": 116471295, - "step": 5483 - }, - { - "epoch": 0.6594120122647749, - "grad_norm": 2.6744358497419354, - "learning_rate": 1.0982016558342405e-06, - "loss": 0.8009, - "num_input_tokens_seen": 116489985, - "step": 5484 - }, - { - "epoch": 0.6595322551554139, - "grad_norm": 2.321274491254443, - "learning_rate": 1.0975064337194291e-06, - "loss": 0.7104, - "num_input_tokens_seen": 116507750, - "step": 5485 - }, - { - "epoch": 0.6596524980460531, - "grad_norm": 2.062603096622019, - "learning_rate": 1.0968113485121734e-06, - "loss": 0.6973, - "num_input_tokens_seen": 116527060, - "step": 5486 - }, - { - "epoch": 0.6597727409366921, - "grad_norm": 2.0816052496221196, - "learning_rate": 1.0961164003179185e-06, - "loss": 0.797, - "num_input_tokens_seen": 116545290, - "step": 5487 - }, - { - "epoch": 0.6598929838273312, - "grad_norm": 2.302974991219862, - "learning_rate": 1.0954215892420875e-06, - "loss": 0.8307, - "num_input_tokens_seen": 116565710, - "step": 5488 - }, - { - "epoch": 0.6600132267179702, - "grad_norm": 2.353437844330326, - "learning_rate": 1.094726915390082e-06, - "loss": 0.7012, - "num_input_tokens_seen": 116583765, - "step": 5489 - }, - { - "epoch": 0.6601334696086094, - "grad_norm": 1.8788638675061986, - "learning_rate": 1.0940323788672836e-06, - "loss": 0.694, - "num_input_tokens_seen": 116602660, - "step": 5490 - }, - { - "epoch": 0.6602537124992485, - "grad_norm": 2.212091276382403, - "learning_rate": 1.093337979779053e-06, - "loss": 0.7387, - "num_input_tokens_seen": 116621795, - "step": 5491 - }, - { - "epoch": 0.6603739553898875, - "grad_norm": 2.5279204872176817, - "learning_rate": 1.0926437182307302e-06, - "loss": 0.7096, - "num_input_tokens_seen": 116640325, - "step": 5492 - }, - { - "epoch": 0.6604941982805267, - "grad_norm": 2.161517934802307, - "learning_rate": 1.0919495943276338e-06, - "loss": 0.778, - "num_input_tokens_seen": 116661065, - "step": 5493 - }, - { - "epoch": 0.6606144411711657, - "grad_norm": 3.0094488106314845, - "learning_rate": 1.0912556081750611e-06, - "loss": 0.7554, - "num_input_tokens_seen": 116678715, - "step": 5494 - }, - { - "epoch": 0.6607346840618048, - "grad_norm": 2.008382762873296, - "learning_rate": 1.0905617598782909e-06, - "loss": 0.7611, - "num_input_tokens_seen": 116698640, - "step": 5495 - }, - { - "epoch": 0.660854926952444, - "grad_norm": 2.7774383930739583, - "learning_rate": 1.0898680495425786e-06, - "loss": 0.8082, - "num_input_tokens_seen": 116716650, - "step": 5496 - }, - { - "epoch": 0.660975169843083, - "grad_norm": 1.879071591427866, - "learning_rate": 1.0891744772731594e-06, - "loss": 0.7989, - "num_input_tokens_seen": 116734185, - "step": 5497 - }, - { - "epoch": 0.6610954127337221, - "grad_norm": 2.1500111413072323, - "learning_rate": 1.0884810431752473e-06, - "loss": 0.6432, - "num_input_tokens_seen": 116754475, - "step": 5498 - }, - { - "epoch": 0.6612156556243612, - "grad_norm": 2.0618194358256523, - "learning_rate": 1.0877877473540368e-06, - "loss": 0.7525, - "num_input_tokens_seen": 116774780, - "step": 5499 - }, - { - "epoch": 0.6613358985150003, - "grad_norm": 1.8766934680817506, - "learning_rate": 1.0870945899147002e-06, - "loss": 0.7217, - "num_input_tokens_seen": 116791145, - "step": 5500 - }, - { - "epoch": 0.6614561414056394, - "grad_norm": 1.8937098029064863, - "learning_rate": 1.0864015709623879e-06, - "loss": 0.7456, - "num_input_tokens_seen": 116811735, - "step": 5501 - }, - { - "epoch": 0.6615763842962785, - "grad_norm": 2.669832105247532, - "learning_rate": 1.0857086906022303e-06, - "loss": 0.7879, - "num_input_tokens_seen": 116829790, - "step": 5502 - }, - { - "epoch": 0.6616966271869176, - "grad_norm": 2.367858427789993, - "learning_rate": 1.0850159489393388e-06, - "loss": 0.7238, - "num_input_tokens_seen": 116848770, - "step": 5503 - }, - { - "epoch": 0.6618168700775566, - "grad_norm": 2.0560595715073786, - "learning_rate": 1.0843233460788e-06, - "loss": 0.815, - "num_input_tokens_seen": 116865705, - "step": 5504 - }, - { - "epoch": 0.6619371129681958, - "grad_norm": 1.9499404117032744, - "learning_rate": 1.0836308821256812e-06, - "loss": 0.7744, - "num_input_tokens_seen": 116886225, - "step": 5505 - }, - { - "epoch": 0.6620573558588349, - "grad_norm": 2.0850437278002825, - "learning_rate": 1.0829385571850282e-06, - "loss": 0.7796, - "num_input_tokens_seen": 116902925, - "step": 5506 - }, - { - "epoch": 0.6621775987494739, - "grad_norm": 2.945327206187434, - "learning_rate": 1.0822463713618679e-06, - "loss": 0.8314, - "num_input_tokens_seen": 116919500, - "step": 5507 - }, - { - "epoch": 0.6622978416401131, - "grad_norm": 4.135214924993876, - "learning_rate": 1.0815543247612034e-06, - "loss": 0.8365, - "num_input_tokens_seen": 116936290, - "step": 5508 - }, - { - "epoch": 0.6624180845307521, - "grad_norm": 2.57782346587537, - "learning_rate": 1.0808624174880174e-06, - "loss": 0.8222, - "num_input_tokens_seen": 116956660, - "step": 5509 - }, - { - "epoch": 0.6625383274213912, - "grad_norm": 2.5597878091921125, - "learning_rate": 1.0801706496472714e-06, - "loss": 0.7892, - "num_input_tokens_seen": 116976185, - "step": 5510 - }, - { - "epoch": 0.6626585703120303, - "grad_norm": 1.7783624732037169, - "learning_rate": 1.0794790213439068e-06, - "loss": 0.6667, - "num_input_tokens_seen": 117002805, - "step": 5511 - }, - { - "epoch": 0.6627788132026694, - "grad_norm": 2.2096679364253484, - "learning_rate": 1.078787532682843e-06, - "loss": 0.7752, - "num_input_tokens_seen": 117020000, - "step": 5512 - }, - { - "epoch": 0.6628990560933085, - "grad_norm": 2.9892144218478927, - "learning_rate": 1.0780961837689781e-06, - "loss": 0.7551, - "num_input_tokens_seen": 117039230, - "step": 5513 - }, - { - "epoch": 0.6630192989839476, - "grad_norm": 1.6151765442348993, - "learning_rate": 1.0774049747071883e-06, - "loss": 0.6941, - "num_input_tokens_seen": 117056830, - "step": 5514 - }, - { - "epoch": 0.6631395418745867, - "grad_norm": 2.484489142523293, - "learning_rate": 1.0767139056023312e-06, - "loss": 0.6822, - "num_input_tokens_seen": 117077125, - "step": 5515 - }, - { - "epoch": 0.6632597847652257, - "grad_norm": 1.9147662923813924, - "learning_rate": 1.07602297655924e-06, - "loss": 0.8019, - "num_input_tokens_seen": 117095165, - "step": 5516 - }, - { - "epoch": 0.6633800276558649, - "grad_norm": 2.1325139450852117, - "learning_rate": 1.0753321876827292e-06, - "loss": 0.7988, - "num_input_tokens_seen": 117114170, - "step": 5517 - }, - { - "epoch": 0.663500270546504, - "grad_norm": 2.5324050566788823, - "learning_rate": 1.0746415390775902e-06, - "loss": 0.7323, - "num_input_tokens_seen": 117132020, - "step": 5518 - }, - { - "epoch": 0.663620513437143, - "grad_norm": 2.0002547362609895, - "learning_rate": 1.0739510308485939e-06, - "loss": 0.7746, - "num_input_tokens_seen": 117148955, - "step": 5519 - }, - { - "epoch": 0.6637407563277821, - "grad_norm": 0.8236460222148144, - "learning_rate": 1.07326066310049e-06, - "loss": 0.6436, - "num_input_tokens_seen": 117212800, - "step": 5520 - }, - { - "epoch": 0.6638609992184212, - "grad_norm": 2.749800989513713, - "learning_rate": 1.0725704359380065e-06, - "loss": 0.7962, - "num_input_tokens_seen": 117232375, - "step": 5521 - }, - { - "epoch": 0.6639812421090603, - "grad_norm": 2.3778164296012148, - "learning_rate": 1.0718803494658497e-06, - "loss": 0.7137, - "num_input_tokens_seen": 117250985, - "step": 5522 - }, - { - "epoch": 0.6641014849996993, - "grad_norm": 2.2148159103921485, - "learning_rate": 1.071190403788707e-06, - "loss": 0.8342, - "num_input_tokens_seen": 117266010, - "step": 5523 - }, - { - "epoch": 0.6642217278903385, - "grad_norm": 3.858551778709007, - "learning_rate": 1.0705005990112415e-06, - "loss": 0.7454, - "num_input_tokens_seen": 117285510, - "step": 5524 - }, - { - "epoch": 0.6643419707809776, - "grad_norm": 3.325488403273317, - "learning_rate": 1.0698109352380957e-06, - "loss": 0.7419, - "num_input_tokens_seen": 117302830, - "step": 5525 - }, - { - "epoch": 0.6644622136716166, - "grad_norm": 2.587646484770462, - "learning_rate": 1.0691214125738909e-06, - "loss": 0.7732, - "num_input_tokens_seen": 117322755, - "step": 5526 - }, - { - "epoch": 0.6645824565622558, - "grad_norm": 2.2338633944812245, - "learning_rate": 1.0684320311232287e-06, - "loss": 0.6143, - "num_input_tokens_seen": 117385380, - "step": 5527 - }, - { - "epoch": 0.6647026994528948, - "grad_norm": 1.9672865995361937, - "learning_rate": 1.0677427909906865e-06, - "loss": 0.8057, - "num_input_tokens_seen": 117405550, - "step": 5528 - }, - { - "epoch": 0.6648229423435339, - "grad_norm": 2.807091670308767, - "learning_rate": 1.0670536922808216e-06, - "loss": 0.7129, - "num_input_tokens_seen": 117425395, - "step": 5529 - }, - { - "epoch": 0.6649431852341731, - "grad_norm": 2.5961994790189977, - "learning_rate": 1.066364735098169e-06, - "loss": 0.7127, - "num_input_tokens_seen": 117441495, - "step": 5530 - }, - { - "epoch": 0.6650634281248121, - "grad_norm": 2.32410710757479, - "learning_rate": 1.0656759195472447e-06, - "loss": 0.8046, - "num_input_tokens_seen": 117458505, - "step": 5531 - }, - { - "epoch": 0.6651836710154512, - "grad_norm": 0.8313125604807928, - "learning_rate": 1.0649872457325403e-06, - "loss": 0.6416, - "num_input_tokens_seen": 117519510, - "step": 5532 - }, - { - "epoch": 0.6653039139060903, - "grad_norm": 0.9143975828236155, - "learning_rate": 1.0642987137585278e-06, - "loss": 0.6133, - "num_input_tokens_seen": 117578755, - "step": 5533 - }, - { - "epoch": 0.6654241567967294, - "grad_norm": 1.822386305081586, - "learning_rate": 1.0636103237296561e-06, - "loss": 0.8164, - "num_input_tokens_seen": 117597400, - "step": 5534 - }, - { - "epoch": 0.6655443996873684, - "grad_norm": 2.059920949425601, - "learning_rate": 1.0629220757503538e-06, - "loss": 0.8303, - "num_input_tokens_seen": 117617135, - "step": 5535 - }, - { - "epoch": 0.6656646425780076, - "grad_norm": 2.7126568025008493, - "learning_rate": 1.0622339699250274e-06, - "loss": 0.7142, - "num_input_tokens_seen": 117634775, - "step": 5536 - }, - { - "epoch": 0.6657848854686467, - "grad_norm": 1.8709695712420376, - "learning_rate": 1.0615460063580624e-06, - "loss": 0.7928, - "num_input_tokens_seen": 117652970, - "step": 5537 - }, - { - "epoch": 0.6659051283592857, - "grad_norm": 2.4054106377231697, - "learning_rate": 1.060858185153821e-06, - "loss": 0.7311, - "num_input_tokens_seen": 117670790, - "step": 5538 - }, - { - "epoch": 0.6660253712499249, - "grad_norm": 2.6512031915456378, - "learning_rate": 1.0601705064166474e-06, - "loss": 0.7622, - "num_input_tokens_seen": 117688905, - "step": 5539 - }, - { - "epoch": 0.666145614140564, - "grad_norm": 2.304515260912785, - "learning_rate": 1.0594829702508605e-06, - "loss": 0.7267, - "num_input_tokens_seen": 117706340, - "step": 5540 - }, - { - "epoch": 0.666265857031203, - "grad_norm": 1.977406769513987, - "learning_rate": 1.0587955767607592e-06, - "loss": 0.547, - "num_input_tokens_seen": 117727920, - "step": 5541 - }, - { - "epoch": 0.6663860999218422, - "grad_norm": 3.67318750815592, - "learning_rate": 1.0581083260506198e-06, - "loss": 0.7754, - "num_input_tokens_seen": 117744425, - "step": 5542 - }, - { - "epoch": 0.6665063428124812, - "grad_norm": 2.5154150695839004, - "learning_rate": 1.0574212182246993e-06, - "loss": 0.7631, - "num_input_tokens_seen": 117762840, - "step": 5543 - }, - { - "epoch": 0.6666265857031203, - "grad_norm": 2.588424178863202, - "learning_rate": 1.0567342533872303e-06, - "loss": 0.7466, - "num_input_tokens_seen": 117782590, - "step": 5544 - }, - { - "epoch": 0.6667468285937594, - "grad_norm": 1.9228600443687214, - "learning_rate": 1.0560474316424255e-06, - "loss": 0.8096, - "num_input_tokens_seen": 117802070, - "step": 5545 - }, - { - "epoch": 0.6668670714843985, - "grad_norm": 4.824440139837768, - "learning_rate": 1.0553607530944746e-06, - "loss": 0.7376, - "num_input_tokens_seen": 117819845, - "step": 5546 - }, - { - "epoch": 0.6669873143750376, - "grad_norm": 2.1490731892797927, - "learning_rate": 1.0546742178475463e-06, - "loss": 0.8884, - "num_input_tokens_seen": 117838560, - "step": 5547 - }, - { - "epoch": 0.6671075572656767, - "grad_norm": 1.97939604325987, - "learning_rate": 1.0539878260057874e-06, - "loss": 0.8603, - "num_input_tokens_seen": 117857320, - "step": 5548 - }, - { - "epoch": 0.6672278001563158, - "grad_norm": 2.865090115922973, - "learning_rate": 1.0533015776733237e-06, - "loss": 0.6731, - "num_input_tokens_seen": 117873190, - "step": 5549 - }, - { - "epoch": 0.6673480430469548, - "grad_norm": 2.942809741309313, - "learning_rate": 1.0526154729542566e-06, - "loss": 0.7784, - "num_input_tokens_seen": 117892970, - "step": 5550 - }, - { - "epoch": 0.6674682859375939, - "grad_norm": 3.428035061954582, - "learning_rate": 1.0519295119526699e-06, - "loss": 0.7931, - "num_input_tokens_seen": 117908995, - "step": 5551 - }, - { - "epoch": 0.667588528828233, - "grad_norm": 1.9148807291909227, - "learning_rate": 1.0512436947726227e-06, - "loss": 0.8235, - "num_input_tokens_seen": 117930130, - "step": 5552 - }, - { - "epoch": 0.6677087717188721, - "grad_norm": 2.528009267599202, - "learning_rate": 1.0505580215181525e-06, - "loss": 0.6555, - "num_input_tokens_seen": 117948090, - "step": 5553 - }, - { - "epoch": 0.6678290146095112, - "grad_norm": 0.8537304903581836, - "learning_rate": 1.0498724922932746e-06, - "loss": 0.5915, - "num_input_tokens_seen": 118005925, - "step": 5554 - }, - { - "epoch": 0.6679492575001503, - "grad_norm": 2.4357237326526895, - "learning_rate": 1.0491871072019851e-06, - "loss": 0.8545, - "num_input_tokens_seen": 118023535, - "step": 5555 - }, - { - "epoch": 0.6680695003907894, - "grad_norm": 2.0398163692095124, - "learning_rate": 1.0485018663482555e-06, - "loss": 0.6301, - "num_input_tokens_seen": 118043275, - "step": 5556 - }, - { - "epoch": 0.6681897432814284, - "grad_norm": 3.05609966831787, - "learning_rate": 1.0478167698360362e-06, - "loss": 0.7031, - "num_input_tokens_seen": 118062295, - "step": 5557 - }, - { - "epoch": 0.6683099861720676, - "grad_norm": 2.602022834880324, - "learning_rate": 1.047131817769255e-06, - "loss": 0.697, - "num_input_tokens_seen": 118082315, - "step": 5558 - }, - { - "epoch": 0.6684302290627067, - "grad_norm": 2.7578551688172204, - "learning_rate": 1.0464470102518203e-06, - "loss": 0.7472, - "num_input_tokens_seen": 118099365, - "step": 5559 - }, - { - "epoch": 0.6685504719533457, - "grad_norm": 2.163731678669961, - "learning_rate": 1.0457623473876157e-06, - "loss": 0.757, - "num_input_tokens_seen": 118118590, - "step": 5560 - }, - { - "epoch": 0.6686707148439849, - "grad_norm": 2.337917324343052, - "learning_rate": 1.0450778292805046e-06, - "loss": 0.6988, - "num_input_tokens_seen": 118138295, - "step": 5561 - }, - { - "epoch": 0.6687909577346239, - "grad_norm": 2.437667662726632, - "learning_rate": 1.0443934560343276e-06, - "loss": 0.7861, - "num_input_tokens_seen": 118159425, - "step": 5562 - }, - { - "epoch": 0.668911200625263, - "grad_norm": 2.1442833880309693, - "learning_rate": 1.0437092277529034e-06, - "loss": 0.7709, - "num_input_tokens_seen": 118178400, - "step": 5563 - }, - { - "epoch": 0.6690314435159022, - "grad_norm": 2.27078078330895, - "learning_rate": 1.0430251445400292e-06, - "loss": 0.7406, - "num_input_tokens_seen": 118196165, - "step": 5564 - }, - { - "epoch": 0.6691516864065412, - "grad_norm": 3.3023129754038925, - "learning_rate": 1.0423412064994794e-06, - "loss": 0.6225, - "num_input_tokens_seen": 118216655, - "step": 5565 - }, - { - "epoch": 0.6692719292971803, - "grad_norm": 2.96278896963116, - "learning_rate": 1.0416574137350064e-06, - "loss": 0.7394, - "num_input_tokens_seen": 118237080, - "step": 5566 - }, - { - "epoch": 0.6693921721878194, - "grad_norm": 2.939401981641236, - "learning_rate": 1.0409737663503428e-06, - "loss": 0.8103, - "num_input_tokens_seen": 118255180, - "step": 5567 - }, - { - "epoch": 0.6695124150784585, - "grad_norm": 2.8569625755783408, - "learning_rate": 1.040290264449196e-06, - "loss": 0.836, - "num_input_tokens_seen": 118273005, - "step": 5568 - }, - { - "epoch": 0.6696326579690975, - "grad_norm": 2.2391590844751557, - "learning_rate": 1.0396069081352532e-06, - "loss": 0.629, - "num_input_tokens_seen": 118291880, - "step": 5569 - }, - { - "epoch": 0.6697529008597367, - "grad_norm": 0.8826317822324508, - "learning_rate": 1.0389236975121782e-06, - "loss": 0.596, - "num_input_tokens_seen": 118346450, - "step": 5570 - }, - { - "epoch": 0.6698731437503758, - "grad_norm": 3.313814000089316, - "learning_rate": 1.0382406326836147e-06, - "loss": 0.7161, - "num_input_tokens_seen": 118365315, - "step": 5571 - }, - { - "epoch": 0.6699933866410148, - "grad_norm": 2.4235946257177177, - "learning_rate": 1.0375577137531828e-06, - "loss": 0.7435, - "num_input_tokens_seen": 118383595, - "step": 5572 - }, - { - "epoch": 0.670113629531654, - "grad_norm": 1.813027822895774, - "learning_rate": 1.0368749408244802e-06, - "loss": 0.7149, - "num_input_tokens_seen": 118406235, - "step": 5573 - }, - { - "epoch": 0.670233872422293, - "grad_norm": 2.259117521308527, - "learning_rate": 1.0361923140010827e-06, - "loss": 0.781, - "num_input_tokens_seen": 118424440, - "step": 5574 - }, - { - "epoch": 0.6703541153129321, - "grad_norm": 3.0086824393775347, - "learning_rate": 1.0355098333865455e-06, - "loss": 0.6374, - "num_input_tokens_seen": 118443390, - "step": 5575 - }, - { - "epoch": 0.6704743582035713, - "grad_norm": 2.1281985344684466, - "learning_rate": 1.0348274990844e-06, - "loss": 0.6864, - "num_input_tokens_seen": 118465870, - "step": 5576 - }, - { - "epoch": 0.6705946010942103, - "grad_norm": 2.1619763019913876, - "learning_rate": 1.034145311198155e-06, - "loss": 0.721, - "num_input_tokens_seen": 118485605, - "step": 5577 - }, - { - "epoch": 0.6707148439848494, - "grad_norm": 2.019244425422779, - "learning_rate": 1.0334632698312989e-06, - "loss": 0.636, - "num_input_tokens_seen": 118506120, - "step": 5578 - }, - { - "epoch": 0.6708350868754885, - "grad_norm": 2.208751621630716, - "learning_rate": 1.0327813750872958e-06, - "loss": 0.7459, - "num_input_tokens_seen": 118525740, - "step": 5579 - }, - { - "epoch": 0.6709553297661276, - "grad_norm": 1.6084535602024201, - "learning_rate": 1.0320996270695891e-06, - "loss": 0.6659, - "num_input_tokens_seen": 118546530, - "step": 5580 - }, - { - "epoch": 0.6710755726567667, - "grad_norm": 3.7151223899624943, - "learning_rate": 1.0314180258815998e-06, - "loss": 0.7297, - "num_input_tokens_seen": 118564890, - "step": 5581 - }, - { - "epoch": 0.6711958155474057, - "grad_norm": 2.110729006405776, - "learning_rate": 1.0307365716267247e-06, - "loss": 0.7411, - "num_input_tokens_seen": 118585055, - "step": 5582 - }, - { - "epoch": 0.6713160584380449, - "grad_norm": 3.1181906940330326, - "learning_rate": 1.0300552644083423e-06, - "loss": 0.7748, - "num_input_tokens_seen": 118603700, - "step": 5583 - }, - { - "epoch": 0.6714363013286839, - "grad_norm": 3.2785691103822248, - "learning_rate": 1.0293741043298045e-06, - "loss": 0.7331, - "num_input_tokens_seen": 118621770, - "step": 5584 - }, - { - "epoch": 0.671556544219323, - "grad_norm": 3.567506504420239, - "learning_rate": 1.0286930914944436e-06, - "loss": 0.7147, - "num_input_tokens_seen": 118641305, - "step": 5585 - }, - { - "epoch": 0.6716767871099621, - "grad_norm": 2.571966391192147, - "learning_rate": 1.0280122260055678e-06, - "loss": 0.7703, - "num_input_tokens_seen": 118656735, - "step": 5586 - }, - { - "epoch": 0.6717970300006012, - "grad_norm": 2.1708265279817085, - "learning_rate": 1.0273315079664652e-06, - "loss": 0.8136, - "num_input_tokens_seen": 118674410, - "step": 5587 - }, - { - "epoch": 0.6719172728912403, - "grad_norm": 2.196258295026403, - "learning_rate": 1.0266509374803992e-06, - "loss": 0.7523, - "num_input_tokens_seen": 118695290, - "step": 5588 - }, - { - "epoch": 0.6720375157818794, - "grad_norm": 7.56596757654837, - "learning_rate": 1.0259705146506123e-06, - "loss": 0.8359, - "num_input_tokens_seen": 118709905, - "step": 5589 - }, - { - "epoch": 0.6721577586725185, - "grad_norm": 2.066362511497974, - "learning_rate": 1.025290239580324e-06, - "loss": 0.7665, - "num_input_tokens_seen": 118730295, - "step": 5590 - }, - { - "epoch": 0.6722780015631575, - "grad_norm": 2.116341385128596, - "learning_rate": 1.0246101123727313e-06, - "loss": 0.7529, - "num_input_tokens_seen": 118748995, - "step": 5591 - }, - { - "epoch": 0.6723982444537967, - "grad_norm": 2.3846078621139855, - "learning_rate": 1.023930133131009e-06, - "loss": 0.7827, - "num_input_tokens_seen": 118766335, - "step": 5592 - }, - { - "epoch": 0.6725184873444358, - "grad_norm": 1.6127821865552134, - "learning_rate": 1.0232503019583094e-06, - "loss": 0.8776, - "num_input_tokens_seen": 118785665, - "step": 5593 - }, - { - "epoch": 0.6726387302350748, - "grad_norm": 2.17079772582495, - "learning_rate": 1.0225706189577619e-06, - "loss": 0.6981, - "num_input_tokens_seen": 118803910, - "step": 5594 - }, - { - "epoch": 0.672758973125714, - "grad_norm": 2.879632471789694, - "learning_rate": 1.021891084232475e-06, - "loss": 0.7378, - "num_input_tokens_seen": 118821565, - "step": 5595 - }, - { - "epoch": 0.672879216016353, - "grad_norm": 4.997962772332968, - "learning_rate": 1.0212116978855325e-06, - "loss": 0.7956, - "num_input_tokens_seen": 118839300, - "step": 5596 - }, - { - "epoch": 0.6729994589069921, - "grad_norm": 1.7452230524892114, - "learning_rate": 1.0205324600199976e-06, - "loss": 0.7824, - "num_input_tokens_seen": 118858270, - "step": 5597 - }, - { - "epoch": 0.6731197017976313, - "grad_norm": 2.0223611672689428, - "learning_rate": 1.0198533707389088e-06, - "loss": 0.6926, - "num_input_tokens_seen": 118878865, - "step": 5598 - }, - { - "epoch": 0.6732399446882703, - "grad_norm": 2.0880100859394335, - "learning_rate": 1.0191744301452853e-06, - "loss": 0.731, - "num_input_tokens_seen": 118897885, - "step": 5599 - }, - { - "epoch": 0.6733601875789094, - "grad_norm": 3.3222466948532134, - "learning_rate": 1.0184956383421208e-06, - "loss": 0.6989, - "num_input_tokens_seen": 118916255, - "step": 5600 - }, - { - "epoch": 0.6734804304695485, - "grad_norm": 3.004318834775927, - "learning_rate": 1.0178169954323876e-06, - "loss": 0.6486, - "num_input_tokens_seen": 118935075, - "step": 5601 - }, - { - "epoch": 0.6736006733601876, - "grad_norm": 1.9209239396004463, - "learning_rate": 1.0171385015190347e-06, - "loss": 0.7374, - "num_input_tokens_seen": 118954655, - "step": 5602 - }, - { - "epoch": 0.6737209162508266, - "grad_norm": 2.292331758203086, - "learning_rate": 1.0164601567049902e-06, - "loss": 0.7219, - "num_input_tokens_seen": 118972905, - "step": 5603 - }, - { - "epoch": 0.6738411591414658, - "grad_norm": 2.223441180967998, - "learning_rate": 1.015781961093158e-06, - "loss": 0.7982, - "num_input_tokens_seen": 118991945, - "step": 5604 - }, - { - "epoch": 0.6739614020321049, - "grad_norm": 1.8999873790586812, - "learning_rate": 1.0151039147864197e-06, - "loss": 0.764, - "num_input_tokens_seen": 119011640, - "step": 5605 - }, - { - "epoch": 0.6740816449227439, - "grad_norm": 2.341380340720333, - "learning_rate": 1.0144260178876342e-06, - "loss": 0.6659, - "num_input_tokens_seen": 119030705, - "step": 5606 - }, - { - "epoch": 0.6742018878133831, - "grad_norm": 2.7409618712656276, - "learning_rate": 1.0137482704996388e-06, - "loss": 0.6684, - "num_input_tokens_seen": 119044775, - "step": 5607 - }, - { - "epoch": 0.6743221307040221, - "grad_norm": 6.963618248197937, - "learning_rate": 1.0130706727252461e-06, - "loss": 0.7938, - "num_input_tokens_seen": 119061550, - "step": 5608 - }, - { - "epoch": 0.6744423735946612, - "grad_norm": 3.1627852451343337, - "learning_rate": 1.0123932246672477e-06, - "loss": 0.673, - "num_input_tokens_seen": 119075415, - "step": 5609 - }, - { - "epoch": 0.6745626164853004, - "grad_norm": 0.814674407097078, - "learning_rate": 1.0117159264284114e-06, - "loss": 0.5849, - "num_input_tokens_seen": 119138305, - "step": 5610 - }, - { - "epoch": 0.6746828593759394, - "grad_norm": 2.018503852648043, - "learning_rate": 1.0110387781114837e-06, - "loss": 0.7708, - "num_input_tokens_seen": 119156640, - "step": 5611 - }, - { - "epoch": 0.6748031022665785, - "grad_norm": 3.414565340989417, - "learning_rate": 1.0103617798191872e-06, - "loss": 0.761, - "num_input_tokens_seen": 119175835, - "step": 5612 - }, - { - "epoch": 0.6749233451572175, - "grad_norm": 2.4903377724153843, - "learning_rate": 1.0096849316542217e-06, - "loss": 0.82, - "num_input_tokens_seen": 119192105, - "step": 5613 - }, - { - "epoch": 0.6750435880478567, - "grad_norm": 2.798613139488539, - "learning_rate": 1.0090082337192643e-06, - "loss": 0.7407, - "num_input_tokens_seen": 119211470, - "step": 5614 - }, - { - "epoch": 0.6751638309384957, - "grad_norm": 5.579463810680022, - "learning_rate": 1.0083316861169705e-06, - "loss": 0.7785, - "num_input_tokens_seen": 119229925, - "step": 5615 - }, - { - "epoch": 0.6752840738291348, - "grad_norm": 3.2200899173212356, - "learning_rate": 1.0076552889499713e-06, - "loss": 0.7205, - "num_input_tokens_seen": 119250410, - "step": 5616 - }, - { - "epoch": 0.675404316719774, - "grad_norm": 2.230974019375234, - "learning_rate": 1.006979042320876e-06, - "loss": 0.7267, - "num_input_tokens_seen": 119270345, - "step": 5617 - }, - { - "epoch": 0.675524559610413, - "grad_norm": 3.4822170858000105, - "learning_rate": 1.0063029463322693e-06, - "loss": 0.6313, - "num_input_tokens_seen": 119290340, - "step": 5618 - }, - { - "epoch": 0.6756448025010521, - "grad_norm": 3.7485509339466563, - "learning_rate": 1.0056270010867164e-06, - "loss": 0.7446, - "num_input_tokens_seen": 119307630, - "step": 5619 - }, - { - "epoch": 0.6757650453916912, - "grad_norm": 3.752538983738534, - "learning_rate": 1.004951206686757e-06, - "loss": 0.7733, - "num_input_tokens_seen": 119325625, - "step": 5620 - }, - { - "epoch": 0.6758852882823303, - "grad_norm": 4.203164371883197, - "learning_rate": 1.0042755632349087e-06, - "loss": 0.7127, - "num_input_tokens_seen": 119342235, - "step": 5621 - }, - { - "epoch": 0.6760055311729694, - "grad_norm": 2.1761723454268687, - "learning_rate": 1.0036000708336653e-06, - "loss": 0.6199, - "num_input_tokens_seen": 119361085, - "step": 5622 - }, - { - "epoch": 0.6761257740636085, - "grad_norm": 5.062662890212761, - "learning_rate": 1.0029247295854992e-06, - "loss": 0.7917, - "num_input_tokens_seen": 119377425, - "step": 5623 - }, - { - "epoch": 0.6762460169542476, - "grad_norm": 2.517789843598912, - "learning_rate": 1.0022495395928588e-06, - "loss": 0.7104, - "num_input_tokens_seen": 119395625, - "step": 5624 - }, - { - "epoch": 0.6763662598448866, - "grad_norm": 0.8184953966886332, - "learning_rate": 1.0015745009581697e-06, - "loss": 0.639, - "num_input_tokens_seen": 119456950, - "step": 5625 - }, - { - "epoch": 0.6764865027355258, - "grad_norm": 2.308923968912805, - "learning_rate": 1.0008996137838343e-06, - "loss": 0.6617, - "num_input_tokens_seen": 119475645, - "step": 5626 - }, - { - "epoch": 0.6766067456261649, - "grad_norm": 2.3129380116268927, - "learning_rate": 1.000224878172234e-06, - "loss": 0.7946, - "num_input_tokens_seen": 119494490, - "step": 5627 - }, - { - "epoch": 0.6767269885168039, - "grad_norm": 2.2867345690262533, - "learning_rate": 9.995502942257248e-07, - "loss": 0.7289, - "num_input_tokens_seen": 119513365, - "step": 5628 - }, - { - "epoch": 0.6768472314074431, - "grad_norm": 2.850174201235272, - "learning_rate": 9.988758620466402e-07, - "loss": 0.7105, - "num_input_tokens_seen": 119531955, - "step": 5629 - }, - { - "epoch": 0.6769674742980821, - "grad_norm": 2.145077494151295, - "learning_rate": 9.982015817372909e-07, - "loss": 0.7579, - "num_input_tokens_seen": 119552115, - "step": 5630 - }, - { - "epoch": 0.6770877171887212, - "grad_norm": 2.3290565314353384, - "learning_rate": 9.975274533999657e-07, - "loss": 0.8142, - "num_input_tokens_seen": 119571365, - "step": 5631 - }, - { - "epoch": 0.6772079600793603, - "grad_norm": 3.41008237538907, - "learning_rate": 9.96853477136929e-07, - "loss": 0.8395, - "num_input_tokens_seen": 119585830, - "step": 5632 - }, - { - "epoch": 0.6773282029699994, - "grad_norm": 2.4093251328529974, - "learning_rate": 9.96179653050422e-07, - "loss": 0.7395, - "num_input_tokens_seen": 119605710, - "step": 5633 - }, - { - "epoch": 0.6774484458606385, - "grad_norm": 2.3429182986714996, - "learning_rate": 9.955059812426635e-07, - "loss": 0.7267, - "num_input_tokens_seen": 119622960, - "step": 5634 - }, - { - "epoch": 0.6775686887512776, - "grad_norm": 2.4273938589357074, - "learning_rate": 9.948324618158493e-07, - "loss": 0.818, - "num_input_tokens_seen": 119643020, - "step": 5635 - }, - { - "epoch": 0.6776889316419167, - "grad_norm": 3.4230700719679716, - "learning_rate": 9.941590948721513e-07, - "loss": 0.7751, - "num_input_tokens_seen": 119659940, - "step": 5636 - }, - { - "epoch": 0.6778091745325557, - "grad_norm": 2.0615651878489665, - "learning_rate": 9.934858805137188e-07, - "loss": 0.7551, - "num_input_tokens_seen": 119680310, - "step": 5637 - }, - { - "epoch": 0.6779294174231949, - "grad_norm": 2.0823722460691925, - "learning_rate": 9.92812818842677e-07, - "loss": 0.8002, - "num_input_tokens_seen": 119699205, - "step": 5638 - }, - { - "epoch": 0.678049660313834, - "grad_norm": 2.173764720267259, - "learning_rate": 9.921399099611306e-07, - "loss": 0.6341, - "num_input_tokens_seen": 119720090, - "step": 5639 - }, - { - "epoch": 0.678169903204473, - "grad_norm": 1.7570937163770215, - "learning_rate": 9.914671539711588e-07, - "loss": 0.6918, - "num_input_tokens_seen": 119739330, - "step": 5640 - }, - { - "epoch": 0.6782901460951122, - "grad_norm": 2.4106060797124567, - "learning_rate": 9.907945509748176e-07, - "loss": 0.7802, - "num_input_tokens_seen": 119759445, - "step": 5641 - }, - { - "epoch": 0.6784103889857512, - "grad_norm": 2.945091421478644, - "learning_rate": 9.9012210107414e-07, - "loss": 0.8103, - "num_input_tokens_seen": 119778485, - "step": 5642 - }, - { - "epoch": 0.6785306318763903, - "grad_norm": 2.31250801659447, - "learning_rate": 9.894498043711375e-07, - "loss": 0.7438, - "num_input_tokens_seen": 119799950, - "step": 5643 - }, - { - "epoch": 0.6786508747670293, - "grad_norm": 2.447018036923878, - "learning_rate": 9.887776609677962e-07, - "loss": 0.6914, - "num_input_tokens_seen": 119821040, - "step": 5644 - }, - { - "epoch": 0.6787711176576685, - "grad_norm": 5.23400249343789, - "learning_rate": 9.881056709660796e-07, - "loss": 0.7185, - "num_input_tokens_seen": 119839220, - "step": 5645 - }, - { - "epoch": 0.6788913605483076, - "grad_norm": 1.9128059623042541, - "learning_rate": 9.874338344679274e-07, - "loss": 0.7797, - "num_input_tokens_seen": 119854785, - "step": 5646 - }, - { - "epoch": 0.6790116034389466, - "grad_norm": 2.44979856250486, - "learning_rate": 9.867621515752582e-07, - "loss": 0.7396, - "num_input_tokens_seen": 119874500, - "step": 5647 - }, - { - "epoch": 0.6791318463295858, - "grad_norm": 1.5706120592930803, - "learning_rate": 9.860906223899651e-07, - "loss": 0.7901, - "num_input_tokens_seen": 119893615, - "step": 5648 - }, - { - "epoch": 0.6792520892202248, - "grad_norm": 2.088331952034577, - "learning_rate": 9.854192470139184e-07, - "loss": 0.749, - "num_input_tokens_seen": 119914815, - "step": 5649 - }, - { - "epoch": 0.6793723321108639, - "grad_norm": 2.6482426846029066, - "learning_rate": 9.847480255489653e-07, - "loss": 0.7096, - "num_input_tokens_seen": 119933560, - "step": 5650 - }, - { - "epoch": 0.6794925750015031, - "grad_norm": 1.8001434363990196, - "learning_rate": 9.840769580969295e-07, - "loss": 0.69, - "num_input_tokens_seen": 119953720, - "step": 5651 - }, - { - "epoch": 0.6796128178921421, - "grad_norm": 2.223310195039357, - "learning_rate": 9.834060447596114e-07, - "loss": 0.7929, - "num_input_tokens_seen": 119972710, - "step": 5652 - }, - { - "epoch": 0.6797330607827812, - "grad_norm": 3.0762282727713717, - "learning_rate": 9.82735285638788e-07, - "loss": 0.7713, - "num_input_tokens_seen": 119992140, - "step": 5653 - }, - { - "epoch": 0.6798533036734203, - "grad_norm": 0.8182982149600501, - "learning_rate": 9.820646808362118e-07, - "loss": 0.6533, - "num_input_tokens_seen": 120058115, - "step": 5654 - }, - { - "epoch": 0.6799735465640594, - "grad_norm": 2.0630662587600384, - "learning_rate": 9.813942304536154e-07, - "loss": 0.7171, - "num_input_tokens_seen": 120075805, - "step": 5655 - }, - { - "epoch": 0.6800937894546984, - "grad_norm": 1.9501571398177167, - "learning_rate": 9.807239345927043e-07, - "loss": 0.6368, - "num_input_tokens_seen": 120095535, - "step": 5656 - }, - { - "epoch": 0.6802140323453376, - "grad_norm": 3.621713051897509, - "learning_rate": 9.80053793355162e-07, - "loss": 0.7285, - "num_input_tokens_seen": 120113950, - "step": 5657 - }, - { - "epoch": 0.6803342752359767, - "grad_norm": 2.695622203290369, - "learning_rate": 9.793838068426472e-07, - "loss": 0.7407, - "num_input_tokens_seen": 120131365, - "step": 5658 - }, - { - "epoch": 0.6804545181266157, - "grad_norm": 2.546395038835068, - "learning_rate": 9.787139751567983e-07, - "loss": 0.6013, - "num_input_tokens_seen": 120146950, - "step": 5659 - }, - { - "epoch": 0.6805747610172549, - "grad_norm": 1.91082587396936, - "learning_rate": 9.780442983992273e-07, - "loss": 0.7166, - "num_input_tokens_seen": 120165185, - "step": 5660 - }, - { - "epoch": 0.680695003907894, - "grad_norm": 1.907277149391533, - "learning_rate": 9.773747766715238e-07, - "loss": 0.71, - "num_input_tokens_seen": 120185725, - "step": 5661 - }, - { - "epoch": 0.680815246798533, - "grad_norm": 5.563003309588458, - "learning_rate": 9.76705410075253e-07, - "loss": 0.8025, - "num_input_tokens_seen": 120205395, - "step": 5662 - }, - { - "epoch": 0.6809354896891722, - "grad_norm": 2.1069139067868754, - "learning_rate": 9.760361987119584e-07, - "loss": 0.8049, - "num_input_tokens_seen": 120222850, - "step": 5663 - }, - { - "epoch": 0.6810557325798112, - "grad_norm": 2.5796659771674517, - "learning_rate": 9.753671426831584e-07, - "loss": 0.6777, - "num_input_tokens_seen": 120238585, - "step": 5664 - }, - { - "epoch": 0.6811759754704503, - "grad_norm": 5.37964584866951, - "learning_rate": 9.746982420903483e-07, - "loss": 0.7882, - "num_input_tokens_seen": 120256500, - "step": 5665 - }, - { - "epoch": 0.6812962183610894, - "grad_norm": 1.8747188413475615, - "learning_rate": 9.740294970349993e-07, - "loss": 0.7399, - "num_input_tokens_seen": 120272635, - "step": 5666 - }, - { - "epoch": 0.6814164612517285, - "grad_norm": 0.9969339598575847, - "learning_rate": 9.733609076185602e-07, - "loss": 0.68, - "num_input_tokens_seen": 120328760, - "step": 5667 - }, - { - "epoch": 0.6815367041423676, - "grad_norm": 2.338583704568072, - "learning_rate": 9.72692473942455e-07, - "loss": 0.8331, - "num_input_tokens_seen": 120345705, - "step": 5668 - }, - { - "epoch": 0.6816569470330067, - "grad_norm": 1.825224673788321, - "learning_rate": 9.720241961080849e-07, - "loss": 0.7728, - "num_input_tokens_seen": 120364740, - "step": 5669 - }, - { - "epoch": 0.6817771899236458, - "grad_norm": 2.5813101895172883, - "learning_rate": 9.713560742168259e-07, - "loss": 0.7149, - "num_input_tokens_seen": 120387085, - "step": 5670 - }, - { - "epoch": 0.6818974328142848, - "grad_norm": 2.17815696213839, - "learning_rate": 9.706881083700333e-07, - "loss": 0.7101, - "num_input_tokens_seen": 120406490, - "step": 5671 - }, - { - "epoch": 0.682017675704924, - "grad_norm": 2.2964241827977125, - "learning_rate": 9.700202986690364e-07, - "loss": 0.8173, - "num_input_tokens_seen": 120424510, - "step": 5672 - }, - { - "epoch": 0.682137918595563, - "grad_norm": 3.135653361618271, - "learning_rate": 9.693526452151413e-07, - "loss": 0.6511, - "num_input_tokens_seen": 120443280, - "step": 5673 - }, - { - "epoch": 0.6822581614862021, - "grad_norm": 2.5123060235516554, - "learning_rate": 9.686851481096296e-07, - "loss": 0.755, - "num_input_tokens_seen": 120464310, - "step": 5674 - }, - { - "epoch": 0.6823784043768413, - "grad_norm": 2.7598060704314764, - "learning_rate": 9.68017807453762e-07, - "loss": 0.726, - "num_input_tokens_seen": 120482775, - "step": 5675 - }, - { - "epoch": 0.6824986472674803, - "grad_norm": 1.8674442909494517, - "learning_rate": 9.673506233487721e-07, - "loss": 0.7258, - "num_input_tokens_seen": 120500460, - "step": 5676 - }, - { - "epoch": 0.6826188901581194, - "grad_norm": 2.118331678142326, - "learning_rate": 9.666835958958717e-07, - "loss": 0.8595, - "num_input_tokens_seen": 120519500, - "step": 5677 - }, - { - "epoch": 0.6827391330487584, - "grad_norm": 2.084150152897892, - "learning_rate": 9.660167251962484e-07, - "loss": 0.7899, - "num_input_tokens_seen": 120537580, - "step": 5678 - }, - { - "epoch": 0.6828593759393976, - "grad_norm": 1.7788692722215675, - "learning_rate": 9.653500113510654e-07, - "loss": 0.7711, - "num_input_tokens_seen": 120556415, - "step": 5679 - }, - { - "epoch": 0.6829796188300367, - "grad_norm": 4.2533523454507245, - "learning_rate": 9.646834544614635e-07, - "loss": 0.6635, - "num_input_tokens_seen": 120576635, - "step": 5680 - }, - { - "epoch": 0.6830998617206757, - "grad_norm": 2.4656761102258433, - "learning_rate": 9.64017054628558e-07, - "loss": 0.7544, - "num_input_tokens_seen": 120595180, - "step": 5681 - }, - { - "epoch": 0.6832201046113149, - "grad_norm": 2.135077869251625, - "learning_rate": 9.63350811953441e-07, - "loss": 0.7854, - "num_input_tokens_seen": 120615275, - "step": 5682 - }, - { - "epoch": 0.6833403475019539, - "grad_norm": 2.644328645466541, - "learning_rate": 9.626847265371826e-07, - "loss": 0.6986, - "num_input_tokens_seen": 120634315, - "step": 5683 - }, - { - "epoch": 0.683460590392593, - "grad_norm": 3.2994741178463634, - "learning_rate": 9.620187984808262e-07, - "loss": 0.7801, - "num_input_tokens_seen": 120652835, - "step": 5684 - }, - { - "epoch": 0.6835808332832322, - "grad_norm": 2.6794205210352104, - "learning_rate": 9.613530278853927e-07, - "loss": 0.8511, - "num_input_tokens_seen": 120672530, - "step": 5685 - }, - { - "epoch": 0.6837010761738712, - "grad_norm": 2.5252554782011836, - "learning_rate": 9.606874148518782e-07, - "loss": 0.739, - "num_input_tokens_seen": 120693255, - "step": 5686 - }, - { - "epoch": 0.6838213190645103, - "grad_norm": 2.349412548068551, - "learning_rate": 9.600219594812575e-07, - "loss": 0.7674, - "num_input_tokens_seen": 120710915, - "step": 5687 - }, - { - "epoch": 0.6839415619551494, - "grad_norm": 2.126578202663835, - "learning_rate": 9.593566618744786e-07, - "loss": 0.7199, - "num_input_tokens_seen": 120730785, - "step": 5688 - }, - { - "epoch": 0.6840618048457885, - "grad_norm": 1.9560459136567503, - "learning_rate": 9.586915221324668e-07, - "loss": 0.729, - "num_input_tokens_seen": 120749315, - "step": 5689 - }, - { - "epoch": 0.6841820477364275, - "grad_norm": 2.3849725891692786, - "learning_rate": 9.580265403561222e-07, - "loss": 0.8371, - "num_input_tokens_seen": 120767300, - "step": 5690 - }, - { - "epoch": 0.6843022906270667, - "grad_norm": 1.8902344500481807, - "learning_rate": 9.57361716646324e-07, - "loss": 0.8566, - "num_input_tokens_seen": 120788235, - "step": 5691 - }, - { - "epoch": 0.6844225335177058, - "grad_norm": 2.0329187542770875, - "learning_rate": 9.56697051103924e-07, - "loss": 0.5945, - "num_input_tokens_seen": 120805395, - "step": 5692 - }, - { - "epoch": 0.6845427764083448, - "grad_norm": 2.419546725692858, - "learning_rate": 9.560325438297522e-07, - "loss": 0.8009, - "num_input_tokens_seen": 120823425, - "step": 5693 - }, - { - "epoch": 0.684663019298984, - "grad_norm": 3.189173872260607, - "learning_rate": 9.553681949246134e-07, - "loss": 0.8701, - "num_input_tokens_seen": 120840770, - "step": 5694 - }, - { - "epoch": 0.684783262189623, - "grad_norm": 2.485866181990579, - "learning_rate": 9.547040044892886e-07, - "loss": 0.7519, - "num_input_tokens_seen": 120868005, - "step": 5695 - }, - { - "epoch": 0.6849035050802621, - "grad_norm": 0.8976487129122176, - "learning_rate": 9.540399726245354e-07, - "loss": 0.6448, - "num_input_tokens_seen": 120924430, - "step": 5696 - }, - { - "epoch": 0.6850237479709013, - "grad_norm": 2.4773478305203485, - "learning_rate": 9.533760994310867e-07, - "loss": 0.6865, - "num_input_tokens_seen": 120944550, - "step": 5697 - }, - { - "epoch": 0.6851439908615403, - "grad_norm": 3.7819043827613443, - "learning_rate": 9.527123850096508e-07, - "loss": 0.742, - "num_input_tokens_seen": 120962630, - "step": 5698 - }, - { - "epoch": 0.6852642337521794, - "grad_norm": 2.004535920611483, - "learning_rate": 9.520488294609142e-07, - "loss": 0.7115, - "num_input_tokens_seen": 120981130, - "step": 5699 - }, - { - "epoch": 0.6853844766428185, - "grad_norm": 0.9791123132195761, - "learning_rate": 9.513854328855368e-07, - "loss": 0.6, - "num_input_tokens_seen": 121038725, - "step": 5700 - }, - { - "epoch": 0.6855047195334576, - "grad_norm": 2.2853095570772792, - "learning_rate": 9.507221953841558e-07, - "loss": 0.8114, - "num_input_tokens_seen": 121056075, - "step": 5701 - }, - { - "epoch": 0.6856249624240967, - "grad_norm": 2.6403895812060156, - "learning_rate": 9.500591170573824e-07, - "loss": 0.7716, - "num_input_tokens_seen": 121075815, - "step": 5702 - }, - { - "epoch": 0.6857452053147358, - "grad_norm": 2.527892227599656, - "learning_rate": 9.49396198005807e-07, - "loss": 0.7381, - "num_input_tokens_seen": 121093130, - "step": 5703 - }, - { - "epoch": 0.6858654482053749, - "grad_norm": 3.9979097183038603, - "learning_rate": 9.48733438329993e-07, - "loss": 0.6802, - "num_input_tokens_seen": 121113115, - "step": 5704 - }, - { - "epoch": 0.6859856910960139, - "grad_norm": 2.2221661064097593, - "learning_rate": 9.480708381304807e-07, - "loss": 0.7336, - "num_input_tokens_seen": 121134130, - "step": 5705 - }, - { - "epoch": 0.6861059339866531, - "grad_norm": 2.164600640702911, - "learning_rate": 9.474083975077851e-07, - "loss": 0.8336, - "num_input_tokens_seen": 121150975, - "step": 5706 - }, - { - "epoch": 0.6862261768772921, - "grad_norm": 3.5821268171544793, - "learning_rate": 9.467461165623994e-07, - "loss": 0.7939, - "num_input_tokens_seen": 121169745, - "step": 5707 - }, - { - "epoch": 0.6863464197679312, - "grad_norm": 3.203362841298789, - "learning_rate": 9.460839953947903e-07, - "loss": 0.7871, - "num_input_tokens_seen": 121187275, - "step": 5708 - }, - { - "epoch": 0.6864666626585703, - "grad_norm": 2.835659700586604, - "learning_rate": 9.45422034105402e-07, - "loss": 0.6274, - "num_input_tokens_seen": 121211780, - "step": 5709 - }, - { - "epoch": 0.6865869055492094, - "grad_norm": 2.0545390856391763, - "learning_rate": 9.447602327946512e-07, - "loss": 0.8033, - "num_input_tokens_seen": 121230140, - "step": 5710 - }, - { - "epoch": 0.6867071484398485, - "grad_norm": 3.951160177290665, - "learning_rate": 9.440985915629345e-07, - "loss": 0.7663, - "num_input_tokens_seen": 121247190, - "step": 5711 - }, - { - "epoch": 0.6868273913304875, - "grad_norm": 2.154865349124956, - "learning_rate": 9.434371105106223e-07, - "loss": 0.7187, - "num_input_tokens_seen": 121264510, - "step": 5712 - }, - { - "epoch": 0.6869476342211267, - "grad_norm": 3.12363571666529, - "learning_rate": 9.427757897380602e-07, - "loss": 0.7088, - "num_input_tokens_seen": 121283630, - "step": 5713 - }, - { - "epoch": 0.6870678771117658, - "grad_norm": 10.19452029343035, - "learning_rate": 9.421146293455695e-07, - "loss": 0.8482, - "num_input_tokens_seen": 121299090, - "step": 5714 - }, - { - "epoch": 0.6871881200024048, - "grad_norm": 1.9349666369499914, - "learning_rate": 9.414536294334489e-07, - "loss": 0.6757, - "num_input_tokens_seen": 121318830, - "step": 5715 - }, - { - "epoch": 0.687308362893044, - "grad_norm": 2.1640124836854375, - "learning_rate": 9.407927901019714e-07, - "loss": 0.6974, - "num_input_tokens_seen": 121337680, - "step": 5716 - }, - { - "epoch": 0.687428605783683, - "grad_norm": 4.211496220563102, - "learning_rate": 9.401321114513854e-07, - "loss": 0.7645, - "num_input_tokens_seen": 121356295, - "step": 5717 - }, - { - "epoch": 0.6875488486743221, - "grad_norm": 1.758534795181603, - "learning_rate": 9.394715935819146e-07, - "loss": 0.7452, - "num_input_tokens_seen": 121376405, - "step": 5718 - }, - { - "epoch": 0.6876690915649613, - "grad_norm": 2.501188733728939, - "learning_rate": 9.388112365937608e-07, - "loss": 0.6215, - "num_input_tokens_seen": 121395590, - "step": 5719 - }, - { - "epoch": 0.6877893344556003, - "grad_norm": 32.62870847194971, - "learning_rate": 9.381510405870985e-07, - "loss": 0.8191, - "num_input_tokens_seen": 121414325, - "step": 5720 - }, - { - "epoch": 0.6879095773462394, - "grad_norm": 2.728350377519243, - "learning_rate": 9.374910056620791e-07, - "loss": 0.7683, - "num_input_tokens_seen": 121433110, - "step": 5721 - }, - { - "epoch": 0.6880298202368785, - "grad_norm": 2.4241332673249163, - "learning_rate": 9.368311319188293e-07, - "loss": 0.811, - "num_input_tokens_seen": 121450645, - "step": 5722 - }, - { - "epoch": 0.6881500631275176, - "grad_norm": 2.0276284322463924, - "learning_rate": 9.361714194574515e-07, - "loss": 0.7926, - "num_input_tokens_seen": 121472700, - "step": 5723 - }, - { - "epoch": 0.6882703060181566, - "grad_norm": 0.765729410977009, - "learning_rate": 9.355118683780234e-07, - "loss": 0.5957, - "num_input_tokens_seen": 121542490, - "step": 5724 - }, - { - "epoch": 0.6883905489087958, - "grad_norm": 2.4658602553767643, - "learning_rate": 9.348524787805987e-07, - "loss": 0.7897, - "num_input_tokens_seen": 121557400, - "step": 5725 - }, - { - "epoch": 0.6885107917994349, - "grad_norm": 3.8896327875127072, - "learning_rate": 9.341932507652053e-07, - "loss": 0.8489, - "num_input_tokens_seen": 121571610, - "step": 5726 - }, - { - "epoch": 0.6886310346900739, - "grad_norm": 2.1344892834610545, - "learning_rate": 9.335341844318489e-07, - "loss": 0.7764, - "num_input_tokens_seen": 121591470, - "step": 5727 - }, - { - "epoch": 0.6887512775807131, - "grad_norm": 2.132128088241014, - "learning_rate": 9.328752798805091e-07, - "loss": 0.7328, - "num_input_tokens_seen": 121609660, - "step": 5728 - }, - { - "epoch": 0.6888715204713521, - "grad_norm": 3.1093780408853133, - "learning_rate": 9.322165372111405e-07, - "loss": 0.7468, - "num_input_tokens_seen": 121627525, - "step": 5729 - }, - { - "epoch": 0.6889917633619912, - "grad_norm": 2.1333715568588474, - "learning_rate": 9.315579565236737e-07, - "loss": 0.7515, - "num_input_tokens_seen": 121646350, - "step": 5730 - }, - { - "epoch": 0.6891120062526304, - "grad_norm": 8.139317368291874, - "learning_rate": 9.308995379180162e-07, - "loss": 0.743, - "num_input_tokens_seen": 121665625, - "step": 5731 - }, - { - "epoch": 0.6892322491432694, - "grad_norm": 0.8707265602570968, - "learning_rate": 9.302412814940488e-07, - "loss": 0.6356, - "num_input_tokens_seen": 121728120, - "step": 5732 - }, - { - "epoch": 0.6893524920339085, - "grad_norm": 2.6750356340592796, - "learning_rate": 9.295831873516282e-07, - "loss": 0.6991, - "num_input_tokens_seen": 121747115, - "step": 5733 - }, - { - "epoch": 0.6894727349245476, - "grad_norm": 1.538774631478507, - "learning_rate": 9.289252555905865e-07, - "loss": 0.758, - "num_input_tokens_seen": 121766915, - "step": 5734 - }, - { - "epoch": 0.6895929778151867, - "grad_norm": 2.377370084709072, - "learning_rate": 9.282674863107325e-07, - "loss": 0.7549, - "num_input_tokens_seen": 121784450, - "step": 5735 - }, - { - "epoch": 0.6897132207058257, - "grad_norm": 2.8749584527365637, - "learning_rate": 9.276098796118488e-07, - "loss": 0.7581, - "num_input_tokens_seen": 121800655, - "step": 5736 - }, - { - "epoch": 0.6898334635964649, - "grad_norm": 2.1607664353012934, - "learning_rate": 9.269524355936938e-07, - "loss": 0.6632, - "num_input_tokens_seen": 121823555, - "step": 5737 - }, - { - "epoch": 0.689953706487104, - "grad_norm": 1.768322661526969, - "learning_rate": 9.262951543560009e-07, - "loss": 0.8447, - "num_input_tokens_seen": 121842500, - "step": 5738 - }, - { - "epoch": 0.690073949377743, - "grad_norm": 2.691020657196348, - "learning_rate": 9.256380359984795e-07, - "loss": 0.8529, - "num_input_tokens_seen": 121859330, - "step": 5739 - }, - { - "epoch": 0.6901941922683821, - "grad_norm": 2.1714322533165076, - "learning_rate": 9.249810806208139e-07, - "loss": 0.7415, - "num_input_tokens_seen": 121878315, - "step": 5740 - }, - { - "epoch": 0.6903144351590212, - "grad_norm": 2.3190317057241585, - "learning_rate": 9.243242883226636e-07, - "loss": 0.8114, - "num_input_tokens_seen": 121897130, - "step": 5741 - }, - { - "epoch": 0.6904346780496603, - "grad_norm": 2.76753980877324, - "learning_rate": 9.236676592036628e-07, - "loss": 0.6934, - "num_input_tokens_seen": 121916525, - "step": 5742 - }, - { - "epoch": 0.6905549209402994, - "grad_norm": 2.1303453618743755, - "learning_rate": 9.230111933634228e-07, - "loss": 0.7232, - "num_input_tokens_seen": 121937840, - "step": 5743 - }, - { - "epoch": 0.6906751638309385, - "grad_norm": 1.6923010245041947, - "learning_rate": 9.223548909015288e-07, - "loss": 0.8017, - "num_input_tokens_seen": 121959250, - "step": 5744 - }, - { - "epoch": 0.6907954067215776, - "grad_norm": 3.130097704411959, - "learning_rate": 9.216987519175407e-07, - "loss": 0.7072, - "num_input_tokens_seen": 121979145, - "step": 5745 - }, - { - "epoch": 0.6909156496122166, - "grad_norm": 2.501090917564905, - "learning_rate": 9.210427765109942e-07, - "loss": 0.6834, - "num_input_tokens_seen": 121998540, - "step": 5746 - }, - { - "epoch": 0.6910358925028558, - "grad_norm": 2.726701896582755, - "learning_rate": 9.203869647814011e-07, - "loss": 0.8069, - "num_input_tokens_seen": 122016280, - "step": 5747 - }, - { - "epoch": 0.6911561353934949, - "grad_norm": 3.140027158250298, - "learning_rate": 9.197313168282472e-07, - "loss": 0.8403, - "num_input_tokens_seen": 122033445, - "step": 5748 - }, - { - "epoch": 0.6912763782841339, - "grad_norm": 5.375275732614449, - "learning_rate": 9.190758327509935e-07, - "loss": 0.7152, - "num_input_tokens_seen": 122051910, - "step": 5749 - }, - { - "epoch": 0.6913966211747731, - "grad_norm": 0.9157513124102706, - "learning_rate": 9.184205126490761e-07, - "loss": 0.6711, - "num_input_tokens_seen": 122100525, - "step": 5750 - }, - { - "epoch": 0.6915168640654121, - "grad_norm": 0.945610153369629, - "learning_rate": 9.177653566219075e-07, - "loss": 0.6404, - "num_input_tokens_seen": 122154970, - "step": 5751 - }, - { - "epoch": 0.6916371069560512, - "grad_norm": 2.8989492381994553, - "learning_rate": 9.171103647688738e-07, - "loss": 0.7533, - "num_input_tokens_seen": 122173430, - "step": 5752 - }, - { - "epoch": 0.6917573498466904, - "grad_norm": 2.1568265652970733, - "learning_rate": 9.164555371893375e-07, - "loss": 0.6863, - "num_input_tokens_seen": 122193080, - "step": 5753 - }, - { - "epoch": 0.6918775927373294, - "grad_norm": 2.1854520104076025, - "learning_rate": 9.158008739826333e-07, - "loss": 0.7483, - "num_input_tokens_seen": 122210400, - "step": 5754 - }, - { - "epoch": 0.6919978356279685, - "grad_norm": 1.7443003470287874, - "learning_rate": 9.15146375248075e-07, - "loss": 0.8532, - "num_input_tokens_seen": 122228850, - "step": 5755 - }, - { - "epoch": 0.6921180785186076, - "grad_norm": 4.070780877577215, - "learning_rate": 9.144920410849493e-07, - "loss": 0.7947, - "num_input_tokens_seen": 122249805, - "step": 5756 - }, - { - "epoch": 0.6922383214092467, - "grad_norm": 3.038691955820551, - "learning_rate": 9.138378715925176e-07, - "loss": 0.7957, - "num_input_tokens_seen": 122268620, - "step": 5757 - }, - { - "epoch": 0.6923585642998857, - "grad_norm": 1.833583005051259, - "learning_rate": 9.131838668700167e-07, - "loss": 0.8057, - "num_input_tokens_seen": 122288410, - "step": 5758 - }, - { - "epoch": 0.6924788071905249, - "grad_norm": 3.469021084412814, - "learning_rate": 9.125300270166598e-07, - "loss": 0.8607, - "num_input_tokens_seen": 122308735, - "step": 5759 - }, - { - "epoch": 0.692599050081164, - "grad_norm": 1.920577696716791, - "learning_rate": 9.11876352131633e-07, - "loss": 0.8645, - "num_input_tokens_seen": 122329030, - "step": 5760 - }, - { - "epoch": 0.692719292971803, - "grad_norm": 1.8447309435312118, - "learning_rate": 9.112228423140987e-07, - "loss": 0.7534, - "num_input_tokens_seen": 122347670, - "step": 5761 - }, - { - "epoch": 0.6928395358624422, - "grad_norm": 2.603976261751542, - "learning_rate": 9.105694976631926e-07, - "loss": 0.856, - "num_input_tokens_seen": 122365300, - "step": 5762 - }, - { - "epoch": 0.6929597787530812, - "grad_norm": 2.760815713949478, - "learning_rate": 9.099163182780283e-07, - "loss": 0.7245, - "num_input_tokens_seen": 122383175, - "step": 5763 - }, - { - "epoch": 0.6930800216437203, - "grad_norm": 3.018832470128621, - "learning_rate": 9.092633042576916e-07, - "loss": 0.4917, - "num_input_tokens_seen": 122400160, - "step": 5764 - }, - { - "epoch": 0.6932002645343595, - "grad_norm": 2.470595298487512, - "learning_rate": 9.086104557012446e-07, - "loss": 0.5699, - "num_input_tokens_seen": 122420450, - "step": 5765 - }, - { - "epoch": 0.6933205074249985, - "grad_norm": 1.9883446091056796, - "learning_rate": 9.079577727077239e-07, - "loss": 0.6535, - "num_input_tokens_seen": 122439000, - "step": 5766 - }, - { - "epoch": 0.6934407503156376, - "grad_norm": 2.8386987672233785, - "learning_rate": 9.073052553761404e-07, - "loss": 0.7159, - "num_input_tokens_seen": 122458085, - "step": 5767 - }, - { - "epoch": 0.6935609932062767, - "grad_norm": 3.5855480970972162, - "learning_rate": 9.066529038054811e-07, - "loss": 0.771, - "num_input_tokens_seen": 122477870, - "step": 5768 - }, - { - "epoch": 0.6936812360969158, - "grad_norm": 2.015515930702198, - "learning_rate": 9.060007180947071e-07, - "loss": 0.7381, - "num_input_tokens_seen": 122495645, - "step": 5769 - }, - { - "epoch": 0.6938014789875548, - "grad_norm": 2.20097854396206, - "learning_rate": 9.053486983427534e-07, - "loss": 0.7263, - "num_input_tokens_seen": 122516615, - "step": 5770 - }, - { - "epoch": 0.6939217218781939, - "grad_norm": 2.054274043760421, - "learning_rate": 9.046968446485326e-07, - "loss": 0.7021, - "num_input_tokens_seen": 122534740, - "step": 5771 - }, - { - "epoch": 0.6940419647688331, - "grad_norm": 2.6846041985592817, - "learning_rate": 9.040451571109295e-07, - "loss": 0.692, - "num_input_tokens_seen": 122550080, - "step": 5772 - }, - { - "epoch": 0.6941622076594721, - "grad_norm": 0.9973684702210112, - "learning_rate": 9.03393635828805e-07, - "loss": 0.6507, - "num_input_tokens_seen": 122603535, - "step": 5773 - }, - { - "epoch": 0.6942824505501112, - "grad_norm": 1.8399738115287108, - "learning_rate": 9.02742280900993e-07, - "loss": 0.8167, - "num_input_tokens_seen": 122623200, - "step": 5774 - }, - { - "epoch": 0.6944026934407503, - "grad_norm": 1.904868735645822, - "learning_rate": 9.020910924263054e-07, - "loss": 0.8266, - "num_input_tokens_seen": 122641445, - "step": 5775 - }, - { - "epoch": 0.6945229363313894, - "grad_norm": 0.9423214519573473, - "learning_rate": 9.014400705035261e-07, - "loss": 0.6202, - "num_input_tokens_seen": 122698070, - "step": 5776 - }, - { - "epoch": 0.6946431792220285, - "grad_norm": 5.401724289010299, - "learning_rate": 9.007892152314147e-07, - "loss": 0.7654, - "num_input_tokens_seen": 122716185, - "step": 5777 - }, - { - "epoch": 0.6947634221126676, - "grad_norm": 10.13534991058867, - "learning_rate": 9.001385267087047e-07, - "loss": 0.8203, - "num_input_tokens_seen": 122735050, - "step": 5778 - }, - { - "epoch": 0.6948836650033067, - "grad_norm": 2.31126092840529, - "learning_rate": 8.994880050341064e-07, - "loss": 0.6992, - "num_input_tokens_seen": 122754875, - "step": 5779 - }, - { - "epoch": 0.6950039078939457, - "grad_norm": 1.9673895105245716, - "learning_rate": 8.988376503063026e-07, - "loss": 0.7743, - "num_input_tokens_seen": 122775855, - "step": 5780 - }, - { - "epoch": 0.6951241507845849, - "grad_norm": 2.5967403933541786, - "learning_rate": 8.981874626239521e-07, - "loss": 0.8184, - "num_input_tokens_seen": 122794150, - "step": 5781 - }, - { - "epoch": 0.695244393675224, - "grad_norm": 2.696509190022972, - "learning_rate": 8.975374420856872e-07, - "loss": 0.8731, - "num_input_tokens_seen": 122810765, - "step": 5782 - }, - { - "epoch": 0.695364636565863, - "grad_norm": 2.361041820320028, - "learning_rate": 8.968875887901157e-07, - "loss": 0.7191, - "num_input_tokens_seen": 122827865, - "step": 5783 - }, - { - "epoch": 0.6954848794565022, - "grad_norm": 2.6530293684449435, - "learning_rate": 8.9623790283582e-07, - "loss": 0.6246, - "num_input_tokens_seen": 122845465, - "step": 5784 - }, - { - "epoch": 0.6956051223471412, - "grad_norm": 2.7323740234991307, - "learning_rate": 8.955883843213569e-07, - "loss": 0.759, - "num_input_tokens_seen": 122864200, - "step": 5785 - }, - { - "epoch": 0.6957253652377803, - "grad_norm": 2.0600057429751257, - "learning_rate": 8.949390333452569e-07, - "loss": 0.8745, - "num_input_tokens_seen": 122881865, - "step": 5786 - }, - { - "epoch": 0.6958456081284194, - "grad_norm": 2.0904027714530717, - "learning_rate": 8.942898500060279e-07, - "loss": 0.6737, - "num_input_tokens_seen": 122901300, - "step": 5787 - }, - { - "epoch": 0.6959658510190585, - "grad_norm": 2.875111124277208, - "learning_rate": 8.936408344021493e-07, - "loss": 0.7222, - "num_input_tokens_seen": 122917935, - "step": 5788 - }, - { - "epoch": 0.6960860939096976, - "grad_norm": 2.5961743687634904, - "learning_rate": 8.929919866320765e-07, - "loss": 0.7058, - "num_input_tokens_seen": 122938470, - "step": 5789 - }, - { - "epoch": 0.6962063368003367, - "grad_norm": 3.278340845592569, - "learning_rate": 8.923433067942385e-07, - "loss": 0.8029, - "num_input_tokens_seen": 122956755, - "step": 5790 - }, - { - "epoch": 0.6963265796909758, - "grad_norm": 7.182160304427614, - "learning_rate": 8.916947949870409e-07, - "loss": 0.6816, - "num_input_tokens_seen": 122976140, - "step": 5791 - }, - { - "epoch": 0.6964468225816148, - "grad_norm": 0.8692665563349373, - "learning_rate": 8.910464513088615e-07, - "loss": 0.635, - "num_input_tokens_seen": 123039900, - "step": 5792 - }, - { - "epoch": 0.696567065472254, - "grad_norm": 2.078111162928809, - "learning_rate": 8.903982758580542e-07, - "loss": 0.785, - "num_input_tokens_seen": 123058560, - "step": 5793 - }, - { - "epoch": 0.696687308362893, - "grad_norm": 2.7022916338500855, - "learning_rate": 8.89750268732945e-07, - "loss": 0.801, - "num_input_tokens_seen": 123078080, - "step": 5794 - }, - { - "epoch": 0.6968075512535321, - "grad_norm": 4.931016050874534, - "learning_rate": 8.891024300318382e-07, - "loss": 0.7961, - "num_input_tokens_seen": 123096370, - "step": 5795 - }, - { - "epoch": 0.6969277941441713, - "grad_norm": 3.611687484505827, - "learning_rate": 8.884547598530103e-07, - "loss": 0.7502, - "num_input_tokens_seen": 123116660, - "step": 5796 - }, - { - "epoch": 0.6970480370348103, - "grad_norm": 1.8958862359598438, - "learning_rate": 8.878072582947107e-07, - "loss": 0.7456, - "num_input_tokens_seen": 123134285, - "step": 5797 - }, - { - "epoch": 0.6971682799254494, - "grad_norm": 3.378430523870369, - "learning_rate": 8.87159925455165e-07, - "loss": 0.7658, - "num_input_tokens_seen": 123153835, - "step": 5798 - }, - { - "epoch": 0.6972885228160886, - "grad_norm": 2.3402846008513745, - "learning_rate": 8.865127614325745e-07, - "loss": 0.7276, - "num_input_tokens_seen": 123171985, - "step": 5799 - }, - { - "epoch": 0.6974087657067276, - "grad_norm": 2.526308242551654, - "learning_rate": 8.85865766325113e-07, - "loss": 0.6703, - "num_input_tokens_seen": 123195635, - "step": 5800 - }, - { - "epoch": 0.6975290085973667, - "grad_norm": 3.89629240584875, - "learning_rate": 8.852189402309287e-07, - "loss": 0.7201, - "num_input_tokens_seen": 123214540, - "step": 5801 - }, - { - "epoch": 0.6976492514880057, - "grad_norm": 4.553674622989023, - "learning_rate": 8.845722832481441e-07, - "loss": 0.7328, - "num_input_tokens_seen": 123229690, - "step": 5802 - }, - { - "epoch": 0.6977694943786449, - "grad_norm": 2.286421551144912, - "learning_rate": 8.83925795474858e-07, - "loss": 0.7728, - "num_input_tokens_seen": 123249535, - "step": 5803 - }, - { - "epoch": 0.6978897372692839, - "grad_norm": 2.5241828144253957, - "learning_rate": 8.832794770091414e-07, - "loss": 0.6059, - "num_input_tokens_seen": 123270090, - "step": 5804 - }, - { - "epoch": 0.698009980159923, - "grad_norm": 2.779768389048756, - "learning_rate": 8.826333279490401e-07, - "loss": 0.8236, - "num_input_tokens_seen": 123290445, - "step": 5805 - }, - { - "epoch": 0.6981302230505622, - "grad_norm": 2.438945536551706, - "learning_rate": 8.81987348392574e-07, - "loss": 0.6807, - "num_input_tokens_seen": 123307285, - "step": 5806 - }, - { - "epoch": 0.6982504659412012, - "grad_norm": 2.7875573101398365, - "learning_rate": 8.81341538437739e-07, - "loss": 0.7471, - "num_input_tokens_seen": 123325295, - "step": 5807 - }, - { - "epoch": 0.6983707088318403, - "grad_norm": 2.4612091218673657, - "learning_rate": 8.80695898182503e-07, - "loss": 0.6812, - "num_input_tokens_seen": 123345995, - "step": 5808 - }, - { - "epoch": 0.6984909517224794, - "grad_norm": 1.165436515940839, - "learning_rate": 8.800504277248093e-07, - "loss": 0.6885, - "num_input_tokens_seen": 123410465, - "step": 5809 - }, - { - "epoch": 0.6986111946131185, - "grad_norm": 1.7971155093654674, - "learning_rate": 8.794051271625753e-07, - "loss": 0.7471, - "num_input_tokens_seen": 123427820, - "step": 5810 - }, - { - "epoch": 0.6987314375037575, - "grad_norm": 1.99024624343518, - "learning_rate": 8.787599965936925e-07, - "loss": 0.8272, - "num_input_tokens_seen": 123448470, - "step": 5811 - }, - { - "epoch": 0.6988516803943967, - "grad_norm": 1.6917874846028231, - "learning_rate": 8.781150361160268e-07, - "loss": 0.7109, - "num_input_tokens_seen": 123470100, - "step": 5812 - }, - { - "epoch": 0.6989719232850358, - "grad_norm": 1.935236985201567, - "learning_rate": 8.774702458274181e-07, - "loss": 0.7289, - "num_input_tokens_seen": 123490225, - "step": 5813 - }, - { - "epoch": 0.6990921661756748, - "grad_norm": 13.469183196271489, - "learning_rate": 8.768256258256799e-07, - "loss": 0.7016, - "num_input_tokens_seen": 123506570, - "step": 5814 - }, - { - "epoch": 0.699212409066314, - "grad_norm": 2.0798211833419034, - "learning_rate": 8.76181176208602e-07, - "loss": 0.7408, - "num_input_tokens_seen": 123524390, - "step": 5815 - }, - { - "epoch": 0.699332651956953, - "grad_norm": 2.0795534428333426, - "learning_rate": 8.755368970739461e-07, - "loss": 0.7288, - "num_input_tokens_seen": 123543470, - "step": 5816 - }, - { - "epoch": 0.6994528948475921, - "grad_norm": 3.395109582403701, - "learning_rate": 8.748927885194489e-07, - "loss": 0.6144, - "num_input_tokens_seen": 123561495, - "step": 5817 - }, - { - "epoch": 0.6995731377382313, - "grad_norm": 0.7578202913830686, - "learning_rate": 8.742488506428201e-07, - "loss": 0.5865, - "num_input_tokens_seen": 123620305, - "step": 5818 - }, - { - "epoch": 0.6996933806288703, - "grad_norm": 2.1310537261469715, - "learning_rate": 8.736050835417466e-07, - "loss": 0.7847, - "num_input_tokens_seen": 123640065, - "step": 5819 - }, - { - "epoch": 0.6998136235195094, - "grad_norm": 2.843505119455081, - "learning_rate": 8.729614873138862e-07, - "loss": 0.6168, - "num_input_tokens_seen": 123657420, - "step": 5820 - }, - { - "epoch": 0.6999338664101485, - "grad_norm": 2.4205749685245967, - "learning_rate": 8.723180620568722e-07, - "loss": 0.7751, - "num_input_tokens_seen": 123676395, - "step": 5821 - }, - { - "epoch": 0.7000541093007876, - "grad_norm": 2.143577863912355, - "learning_rate": 8.716748078683107e-07, - "loss": 0.8562, - "num_input_tokens_seen": 123692890, - "step": 5822 - }, - { - "epoch": 0.7001743521914267, - "grad_norm": 2.453009840366571, - "learning_rate": 8.710317248457846e-07, - "loss": 0.685, - "num_input_tokens_seen": 123712225, - "step": 5823 - }, - { - "epoch": 0.7002945950820658, - "grad_norm": 2.295224754772024, - "learning_rate": 8.703888130868482e-07, - "loss": 0.7092, - "num_input_tokens_seen": 123733795, - "step": 5824 - }, - { - "epoch": 0.7004148379727049, - "grad_norm": 2.224199911024683, - "learning_rate": 8.697460726890307e-07, - "loss": 0.8128, - "num_input_tokens_seen": 123750660, - "step": 5825 - }, - { - "epoch": 0.7005350808633439, - "grad_norm": 2.4100870241860126, - "learning_rate": 8.691035037498354e-07, - "loss": 0.8991, - "num_input_tokens_seen": 123766370, - "step": 5826 - }, - { - "epoch": 0.7006553237539831, - "grad_norm": 3.264172559169138, - "learning_rate": 8.684611063667391e-07, - "loss": 0.7224, - "num_input_tokens_seen": 123786555, - "step": 5827 - }, - { - "epoch": 0.7007755666446221, - "grad_norm": 2.9347790919562566, - "learning_rate": 8.678188806371935e-07, - "loss": 0.7581, - "num_input_tokens_seen": 123808310, - "step": 5828 - }, - { - "epoch": 0.7008958095352612, - "grad_norm": 2.7243321885056693, - "learning_rate": 8.671768266586234e-07, - "loss": 0.845, - "num_input_tokens_seen": 123826155, - "step": 5829 - }, - { - "epoch": 0.7010160524259004, - "grad_norm": 1.9201810822097873, - "learning_rate": 8.665349445284275e-07, - "loss": 0.7779, - "num_input_tokens_seen": 123845615, - "step": 5830 - }, - { - "epoch": 0.7011362953165394, - "grad_norm": 1.5057307526088324, - "learning_rate": 8.658932343439799e-07, - "loss": 0.8041, - "num_input_tokens_seen": 123865120, - "step": 5831 - }, - { - "epoch": 0.7012565382071785, - "grad_norm": 2.1458664815422135, - "learning_rate": 8.65251696202627e-07, - "loss": 0.7627, - "num_input_tokens_seen": 123881220, - "step": 5832 - }, - { - "epoch": 0.7013767810978175, - "grad_norm": 3.8215086064687727, - "learning_rate": 8.646103302016896e-07, - "loss": 0.8735, - "num_input_tokens_seen": 123899910, - "step": 5833 - }, - { - "epoch": 0.7014970239884567, - "grad_norm": 1.8775991210681255, - "learning_rate": 8.639691364384614e-07, - "loss": 0.8826, - "num_input_tokens_seen": 123917255, - "step": 5834 - }, - { - "epoch": 0.7016172668790958, - "grad_norm": 2.199387815460875, - "learning_rate": 8.63328115010213e-07, - "loss": 0.7205, - "num_input_tokens_seen": 123933825, - "step": 5835 - }, - { - "epoch": 0.7017375097697348, - "grad_norm": 2.7935219445064425, - "learning_rate": 8.626872660141855e-07, - "loss": 0.6746, - "num_input_tokens_seen": 123951455, - "step": 5836 - }, - { - "epoch": 0.701857752660374, - "grad_norm": 1.8000102053350795, - "learning_rate": 8.620465895475957e-07, - "loss": 0.7397, - "num_input_tokens_seen": 123969395, - "step": 5837 - }, - { - "epoch": 0.701977995551013, - "grad_norm": 1.6164830658483664, - "learning_rate": 8.614060857076326e-07, - "loss": 0.7487, - "num_input_tokens_seen": 123989785, - "step": 5838 - }, - { - "epoch": 0.7020982384416521, - "grad_norm": 2.043937670169765, - "learning_rate": 8.607657545914626e-07, - "loss": 0.7384, - "num_input_tokens_seen": 124009200, - "step": 5839 - }, - { - "epoch": 0.7022184813322913, - "grad_norm": 2.149622226934407, - "learning_rate": 8.601255962962211e-07, - "loss": 0.7193, - "num_input_tokens_seen": 124027930, - "step": 5840 - }, - { - "epoch": 0.7023387242229303, - "grad_norm": 2.7498104005409623, - "learning_rate": 8.594856109190203e-07, - "loss": 0.7205, - "num_input_tokens_seen": 124044680, - "step": 5841 - }, - { - "epoch": 0.7024589671135694, - "grad_norm": 3.8107181027584915, - "learning_rate": 8.588457985569446e-07, - "loss": 0.6908, - "num_input_tokens_seen": 124067310, - "step": 5842 - }, - { - "epoch": 0.7025792100042085, - "grad_norm": 2.4042925662751626, - "learning_rate": 8.582061593070548e-07, - "loss": 0.7062, - "num_input_tokens_seen": 124087760, - "step": 5843 - }, - { - "epoch": 0.7026994528948476, - "grad_norm": 2.2601402006842264, - "learning_rate": 8.57566693266383e-07, - "loss": 0.7641, - "num_input_tokens_seen": 124105170, - "step": 5844 - }, - { - "epoch": 0.7028196957854866, - "grad_norm": 2.5387976499141582, - "learning_rate": 8.569274005319354e-07, - "loss": 0.694, - "num_input_tokens_seen": 124123290, - "step": 5845 - }, - { - "epoch": 0.7029399386761258, - "grad_norm": 2.2072677006018595, - "learning_rate": 8.562882812006913e-07, - "loss": 0.7957, - "num_input_tokens_seen": 124140500, - "step": 5846 - }, - { - "epoch": 0.7030601815667649, - "grad_norm": 2.0332175134146375, - "learning_rate": 8.556493353696066e-07, - "loss": 0.7639, - "num_input_tokens_seen": 124159220, - "step": 5847 - }, - { - "epoch": 0.7031804244574039, - "grad_norm": 2.8633720301501513, - "learning_rate": 8.550105631356077e-07, - "loss": 0.6747, - "num_input_tokens_seen": 124178665, - "step": 5848 - }, - { - "epoch": 0.7033006673480431, - "grad_norm": 2.3808748099815484, - "learning_rate": 8.543719645955961e-07, - "loss": 0.7597, - "num_input_tokens_seen": 124196715, - "step": 5849 - }, - { - "epoch": 0.7034209102386821, - "grad_norm": 1.6396057514756623, - "learning_rate": 8.537335398464458e-07, - "loss": 0.74, - "num_input_tokens_seen": 124216755, - "step": 5850 - }, - { - "epoch": 0.7035411531293212, - "grad_norm": 3.390915774377116, - "learning_rate": 8.53095288985007e-07, - "loss": 0.849, - "num_input_tokens_seen": 124230210, - "step": 5851 - }, - { - "epoch": 0.7036613960199604, - "grad_norm": 2.326238211452944, - "learning_rate": 8.524572121081009e-07, - "loss": 0.8187, - "num_input_tokens_seen": 124250030, - "step": 5852 - }, - { - "epoch": 0.7037816389105994, - "grad_norm": 2.9606863369593506, - "learning_rate": 8.518193093125232e-07, - "loss": 0.6237, - "num_input_tokens_seen": 124268805, - "step": 5853 - }, - { - "epoch": 0.7039018818012385, - "grad_norm": 1.6307357093683321, - "learning_rate": 8.511815806950436e-07, - "loss": 0.7935, - "num_input_tokens_seen": 124289555, - "step": 5854 - }, - { - "epoch": 0.7040221246918776, - "grad_norm": 1.8601970997053092, - "learning_rate": 8.505440263524044e-07, - "loss": 0.771, - "num_input_tokens_seen": 124308120, - "step": 5855 - }, - { - "epoch": 0.7041423675825167, - "grad_norm": 4.216998523722609, - "learning_rate": 8.499066463813227e-07, - "loss": 0.8699, - "num_input_tokens_seen": 124320675, - "step": 5856 - }, - { - "epoch": 0.7042626104731557, - "grad_norm": 1.9165562234346236, - "learning_rate": 8.492694408784884e-07, - "loss": 0.712, - "num_input_tokens_seen": 124340650, - "step": 5857 - }, - { - "epoch": 0.7043828533637949, - "grad_norm": 7.135690130874974, - "learning_rate": 8.486324099405642e-07, - "loss": 0.6191, - "num_input_tokens_seen": 124357215, - "step": 5858 - }, - { - "epoch": 0.704503096254434, - "grad_norm": 1.7074100395502942, - "learning_rate": 8.479955536641887e-07, - "loss": 0.7452, - "num_input_tokens_seen": 124378430, - "step": 5859 - }, - { - "epoch": 0.704623339145073, - "grad_norm": 2.4059061847083765, - "learning_rate": 8.473588721459716e-07, - "loss": 0.6517, - "num_input_tokens_seen": 124398060, - "step": 5860 - }, - { - "epoch": 0.7047435820357122, - "grad_norm": 2.929225172075417, - "learning_rate": 8.467223654824975e-07, - "loss": 0.7057, - "num_input_tokens_seen": 124417235, - "step": 5861 - }, - { - "epoch": 0.7048638249263512, - "grad_norm": 2.2320970155392987, - "learning_rate": 8.460860337703227e-07, - "loss": 0.6269, - "num_input_tokens_seen": 124437560, - "step": 5862 - }, - { - "epoch": 0.7049840678169903, - "grad_norm": 2.441890084258046, - "learning_rate": 8.454498771059797e-07, - "loss": 0.7073, - "num_input_tokens_seen": 124456655, - "step": 5863 - }, - { - "epoch": 0.7051043107076294, - "grad_norm": 2.3044616157977336, - "learning_rate": 8.448138955859725e-07, - "loss": 0.8303, - "num_input_tokens_seen": 124472960, - "step": 5864 - }, - { - "epoch": 0.7052245535982685, - "grad_norm": 2.863272245794222, - "learning_rate": 8.44178089306779e-07, - "loss": 0.8954, - "num_input_tokens_seen": 124490615, - "step": 5865 - }, - { - "epoch": 0.7053447964889076, - "grad_norm": 1.9999592525800536, - "learning_rate": 8.435424583648494e-07, - "loss": 0.7695, - "num_input_tokens_seen": 124508780, - "step": 5866 - }, - { - "epoch": 0.7054650393795466, - "grad_norm": 5.116959911996318, - "learning_rate": 8.429070028566101e-07, - "loss": 0.7216, - "num_input_tokens_seen": 124529810, - "step": 5867 - }, - { - "epoch": 0.7055852822701858, - "grad_norm": 2.0174898039859763, - "learning_rate": 8.422717228784586e-07, - "loss": 0.7439, - "num_input_tokens_seen": 124546405, - "step": 5868 - }, - { - "epoch": 0.7057055251608249, - "grad_norm": 1.9496718860492182, - "learning_rate": 8.416366185267663e-07, - "loss": 0.685, - "num_input_tokens_seen": 124563625, - "step": 5869 - }, - { - "epoch": 0.7058257680514639, - "grad_norm": 2.4153914601803, - "learning_rate": 8.410016898978778e-07, - "loss": 0.7703, - "num_input_tokens_seen": 124580820, - "step": 5870 - }, - { - "epoch": 0.7059460109421031, - "grad_norm": 1.8264799683224753, - "learning_rate": 8.403669370881115e-07, - "loss": 0.7889, - "num_input_tokens_seen": 124599275, - "step": 5871 - }, - { - "epoch": 0.7060662538327421, - "grad_norm": 1.702911751238476, - "learning_rate": 8.397323601937587e-07, - "loss": 0.7708, - "num_input_tokens_seen": 124618895, - "step": 5872 - }, - { - "epoch": 0.7061864967233812, - "grad_norm": 2.7795835997745932, - "learning_rate": 8.390979593110845e-07, - "loss": 0.7669, - "num_input_tokens_seen": 124640745, - "step": 5873 - }, - { - "epoch": 0.7063067396140204, - "grad_norm": 2.628525904524467, - "learning_rate": 8.384637345363262e-07, - "loss": 0.8031, - "num_input_tokens_seen": 124659655, - "step": 5874 - }, - { - "epoch": 0.7064269825046594, - "grad_norm": 2.2233804096283962, - "learning_rate": 8.378296859656964e-07, - "loss": 0.7635, - "num_input_tokens_seen": 124680530, - "step": 5875 - }, - { - "epoch": 0.7065472253952985, - "grad_norm": 2.4898693497953763, - "learning_rate": 8.371958136953792e-07, - "loss": 0.6727, - "num_input_tokens_seen": 124700280, - "step": 5876 - }, - { - "epoch": 0.7066674682859376, - "grad_norm": 2.9232944411278474, - "learning_rate": 8.365621178215326e-07, - "loss": 0.653, - "num_input_tokens_seen": 124716470, - "step": 5877 - }, - { - "epoch": 0.7067877111765767, - "grad_norm": 2.689336504326027, - "learning_rate": 8.359285984402871e-07, - "loss": 0.7455, - "num_input_tokens_seen": 124733455, - "step": 5878 - }, - { - "epoch": 0.7069079540672157, - "grad_norm": 2.156907680758512, - "learning_rate": 8.352952556477481e-07, - "loss": 0.737, - "num_input_tokens_seen": 124751085, - "step": 5879 - }, - { - "epoch": 0.7070281969578549, - "grad_norm": 2.0682501183752624, - "learning_rate": 8.34662089539993e-07, - "loss": 0.7691, - "num_input_tokens_seen": 124770315, - "step": 5880 - }, - { - "epoch": 0.707148439848494, - "grad_norm": 3.187723238264632, - "learning_rate": 8.340291002130722e-07, - "loss": 0.786, - "num_input_tokens_seen": 124789225, - "step": 5881 - }, - { - "epoch": 0.707268682739133, - "grad_norm": 3.5202132722902872, - "learning_rate": 8.3339628776301e-07, - "loss": 0.7909, - "num_input_tokens_seen": 124807085, - "step": 5882 - }, - { - "epoch": 0.7073889256297722, - "grad_norm": 3.5763798492428855, - "learning_rate": 8.327636522858033e-07, - "loss": 0.5628, - "num_input_tokens_seen": 124826410, - "step": 5883 - }, - { - "epoch": 0.7075091685204112, - "grad_norm": 2.28302761732246, - "learning_rate": 8.321311938774225e-07, - "loss": 0.7599, - "num_input_tokens_seen": 124845220, - "step": 5884 - }, - { - "epoch": 0.7076294114110503, - "grad_norm": 3.099892554351801, - "learning_rate": 8.31498912633811e-07, - "loss": 0.7839, - "num_input_tokens_seen": 124864950, - "step": 5885 - }, - { - "epoch": 0.7077496543016895, - "grad_norm": 1.8951283269948798, - "learning_rate": 8.308668086508847e-07, - "loss": 0.8389, - "num_input_tokens_seen": 124882750, - "step": 5886 - }, - { - "epoch": 0.7078698971923285, - "grad_norm": 2.189770710557123, - "learning_rate": 8.302348820245349e-07, - "loss": 0.7355, - "num_input_tokens_seen": 124905035, - "step": 5887 - }, - { - "epoch": 0.7079901400829676, - "grad_norm": 2.958003808312968, - "learning_rate": 8.296031328506232e-07, - "loss": 0.7008, - "num_input_tokens_seen": 124924505, - "step": 5888 - }, - { - "epoch": 0.7081103829736067, - "grad_norm": 2.4783905215207422, - "learning_rate": 8.289715612249857e-07, - "loss": 0.7507, - "num_input_tokens_seen": 124944840, - "step": 5889 - }, - { - "epoch": 0.7082306258642458, - "grad_norm": 5.134662107702385, - "learning_rate": 8.283401672434305e-07, - "loss": 0.7663, - "num_input_tokens_seen": 124959785, - "step": 5890 - }, - { - "epoch": 0.7083508687548848, - "grad_norm": 2.0960331757124306, - "learning_rate": 8.277089510017412e-07, - "loss": 0.7043, - "num_input_tokens_seen": 124980310, - "step": 5891 - }, - { - "epoch": 0.708471111645524, - "grad_norm": 2.045932090879382, - "learning_rate": 8.270779125956719e-07, - "loss": 0.8175, - "num_input_tokens_seen": 125000410, - "step": 5892 - }, - { - "epoch": 0.7085913545361631, - "grad_norm": 2.334997301712223, - "learning_rate": 8.264470521209505e-07, - "loss": 0.7908, - "num_input_tokens_seen": 125018495, - "step": 5893 - }, - { - "epoch": 0.7087115974268021, - "grad_norm": 3.274519333048488, - "learning_rate": 8.258163696732779e-07, - "loss": 0.7587, - "num_input_tokens_seen": 125035805, - "step": 5894 - }, - { - "epoch": 0.7088318403174413, - "grad_norm": 2.4864310177079334, - "learning_rate": 8.251858653483288e-07, - "loss": 0.7667, - "num_input_tokens_seen": 125053690, - "step": 5895 - }, - { - "epoch": 0.7089520832080803, - "grad_norm": 2.236903660523214, - "learning_rate": 8.245555392417501e-07, - "loss": 0.8505, - "num_input_tokens_seen": 125068065, - "step": 5896 - }, - { - "epoch": 0.7090723260987194, - "grad_norm": 2.1861391571755275, - "learning_rate": 8.239253914491613e-07, - "loss": 0.7866, - "num_input_tokens_seen": 125086110, - "step": 5897 - }, - { - "epoch": 0.7091925689893585, - "grad_norm": 2.0798696769264153, - "learning_rate": 8.232954220661556e-07, - "loss": 0.7436, - "num_input_tokens_seen": 125108565, - "step": 5898 - }, - { - "epoch": 0.7093128118799976, - "grad_norm": 3.2558864905838205, - "learning_rate": 8.226656311882989e-07, - "loss": 0.6879, - "num_input_tokens_seen": 125127595, - "step": 5899 - }, - { - "epoch": 0.7094330547706367, - "grad_norm": 6.148354324078775, - "learning_rate": 8.220360189111298e-07, - "loss": 0.7619, - "num_input_tokens_seen": 125145345, - "step": 5900 - }, - { - "epoch": 0.7095532976612757, - "grad_norm": 3.6400111607903662, - "learning_rate": 8.214065853301599e-07, - "loss": 0.7953, - "num_input_tokens_seen": 125160595, - "step": 5901 - }, - { - "epoch": 0.7096735405519149, - "grad_norm": 0.8190312990669477, - "learning_rate": 8.207773305408734e-07, - "loss": 0.6026, - "num_input_tokens_seen": 125227535, - "step": 5902 - }, - { - "epoch": 0.709793783442554, - "grad_norm": 2.3134780933985466, - "learning_rate": 8.201482546387288e-07, - "loss": 0.7894, - "num_input_tokens_seen": 125246730, - "step": 5903 - }, - { - "epoch": 0.709914026333193, - "grad_norm": 1.8639194181343757, - "learning_rate": 8.195193577191559e-07, - "loss": 0.9075, - "num_input_tokens_seen": 125268280, - "step": 5904 - }, - { - "epoch": 0.7100342692238322, - "grad_norm": 1.917515582073336, - "learning_rate": 8.188906398775579e-07, - "loss": 0.8346, - "num_input_tokens_seen": 125288545, - "step": 5905 - }, - { - "epoch": 0.7101545121144712, - "grad_norm": 2.0250249711543935, - "learning_rate": 8.1826210120931e-07, - "loss": 0.6875, - "num_input_tokens_seen": 125307475, - "step": 5906 - }, - { - "epoch": 0.7102747550051103, - "grad_norm": 4.702882089338399, - "learning_rate": 8.176337418097626e-07, - "loss": 0.6945, - "num_input_tokens_seen": 125327665, - "step": 5907 - }, - { - "epoch": 0.7103949978957494, - "grad_norm": 2.2365564054696176, - "learning_rate": 8.170055617742364e-07, - "loss": 0.7919, - "num_input_tokens_seen": 125344665, - "step": 5908 - }, - { - "epoch": 0.7105152407863885, - "grad_norm": 2.369596687233931, - "learning_rate": 8.163775611980259e-07, - "loss": 0.7049, - "num_input_tokens_seen": 125363495, - "step": 5909 - }, - { - "epoch": 0.7106354836770276, - "grad_norm": 3.2606317493847694, - "learning_rate": 8.157497401763976e-07, - "loss": 0.7844, - "num_input_tokens_seen": 125380880, - "step": 5910 - }, - { - "epoch": 0.7107557265676667, - "grad_norm": 2.2332964344941524, - "learning_rate": 8.151220988045928e-07, - "loss": 0.7738, - "num_input_tokens_seen": 125399855, - "step": 5911 - }, - { - "epoch": 0.7108759694583058, - "grad_norm": 2.269868914998486, - "learning_rate": 8.144946371778234e-07, - "loss": 0.8249, - "num_input_tokens_seen": 125419685, - "step": 5912 - }, - { - "epoch": 0.7109962123489448, - "grad_norm": 2.1570535202995544, - "learning_rate": 8.138673553912751e-07, - "loss": 0.771, - "num_input_tokens_seen": 125439965, - "step": 5913 - }, - { - "epoch": 0.711116455239584, - "grad_norm": 3.1449954650966006, - "learning_rate": 8.132402535401059e-07, - "loss": 0.5656, - "num_input_tokens_seen": 125460940, - "step": 5914 - }, - { - "epoch": 0.711236698130223, - "grad_norm": 1.8001175951779036, - "learning_rate": 8.126133317194465e-07, - "loss": 0.7433, - "num_input_tokens_seen": 125480850, - "step": 5915 - }, - { - "epoch": 0.7113569410208621, - "grad_norm": 2.8179978075944367, - "learning_rate": 8.11986590024401e-07, - "loss": 0.7325, - "num_input_tokens_seen": 125500310, - "step": 5916 - }, - { - "epoch": 0.7114771839115013, - "grad_norm": 1.9585462457148632, - "learning_rate": 8.113600285500448e-07, - "loss": 0.6745, - "num_input_tokens_seen": 125520240, - "step": 5917 - }, - { - "epoch": 0.7115974268021403, - "grad_norm": 2.0153665071086633, - "learning_rate": 8.107336473914268e-07, - "loss": 0.733, - "num_input_tokens_seen": 125538590, - "step": 5918 - }, - { - "epoch": 0.7117176696927794, - "grad_norm": 0.9830103952212915, - "learning_rate": 8.101074466435694e-07, - "loss": 0.6027, - "num_input_tokens_seen": 125597785, - "step": 5919 - }, - { - "epoch": 0.7118379125834186, - "grad_norm": 2.755434164992065, - "learning_rate": 8.094814264014662e-07, - "loss": 0.6719, - "num_input_tokens_seen": 125616260, - "step": 5920 - }, - { - "epoch": 0.7119581554740576, - "grad_norm": 2.642542072806552, - "learning_rate": 8.088555867600844e-07, - "loss": 0.8173, - "num_input_tokens_seen": 125632145, - "step": 5921 - }, - { - "epoch": 0.7120783983646967, - "grad_norm": 2.2963964485546855, - "learning_rate": 8.08229927814362e-07, - "loss": 0.6014, - "num_input_tokens_seen": 125654755, - "step": 5922 - }, - { - "epoch": 0.7121986412553358, - "grad_norm": 1.8868883795227693, - "learning_rate": 8.076044496592127e-07, - "loss": 0.6429, - "num_input_tokens_seen": 125676325, - "step": 5923 - }, - { - "epoch": 0.7123188841459749, - "grad_norm": 2.3393160007064018, - "learning_rate": 8.069791523895204e-07, - "loss": 0.7754, - "num_input_tokens_seen": 125692495, - "step": 5924 - }, - { - "epoch": 0.7124391270366139, - "grad_norm": 2.1264029617422993, - "learning_rate": 8.063540361001422e-07, - "loss": 0.7751, - "num_input_tokens_seen": 125710785, - "step": 5925 - }, - { - "epoch": 0.7125593699272531, - "grad_norm": 4.318504675685387, - "learning_rate": 8.057291008859075e-07, - "loss": 0.7951, - "num_input_tokens_seen": 125728665, - "step": 5926 - }, - { - "epoch": 0.7126796128178922, - "grad_norm": 2.0216689069860263, - "learning_rate": 8.051043468416187e-07, - "loss": 0.6735, - "num_input_tokens_seen": 125749635, - "step": 5927 - }, - { - "epoch": 0.7127998557085312, - "grad_norm": 3.190755346379996, - "learning_rate": 8.044797740620506e-07, - "loss": 0.8195, - "num_input_tokens_seen": 125767960, - "step": 5928 - }, - { - "epoch": 0.7129200985991703, - "grad_norm": 2.2690636682016407, - "learning_rate": 8.0385538264195e-07, - "loss": 0.7793, - "num_input_tokens_seen": 125786390, - "step": 5929 - }, - { - "epoch": 0.7130403414898094, - "grad_norm": 1.9017918148792083, - "learning_rate": 8.032311726760364e-07, - "loss": 0.8023, - "num_input_tokens_seen": 125807330, - "step": 5930 - }, - { - "epoch": 0.7131605843804485, - "grad_norm": 3.0389515470673842, - "learning_rate": 8.026071442590028e-07, - "loss": 0.6874, - "num_input_tokens_seen": 125833980, - "step": 5931 - }, - { - "epoch": 0.7132808272710875, - "grad_norm": 2.2587520762098476, - "learning_rate": 8.019832974855134e-07, - "loss": 0.8127, - "num_input_tokens_seen": 125851660, - "step": 5932 - }, - { - "epoch": 0.7134010701617267, - "grad_norm": 3.210069659489253, - "learning_rate": 8.013596324502052e-07, - "loss": 0.8175, - "num_input_tokens_seen": 125869845, - "step": 5933 - }, - { - "epoch": 0.7135213130523658, - "grad_norm": 7.198537849727589, - "learning_rate": 8.007361492476872e-07, - "loss": 0.7776, - "num_input_tokens_seen": 125888890, - "step": 5934 - }, - { - "epoch": 0.7136415559430048, - "grad_norm": 1.5668746728659018, - "learning_rate": 8.001128479725426e-07, - "loss": 0.78, - "num_input_tokens_seen": 125910515, - "step": 5935 - }, - { - "epoch": 0.713761798833644, - "grad_norm": 2.437616022721662, - "learning_rate": 7.994897287193248e-07, - "loss": 0.8013, - "num_input_tokens_seen": 125929615, - "step": 5936 - }, - { - "epoch": 0.713882041724283, - "grad_norm": 2.3862970082853283, - "learning_rate": 7.988667915825605e-07, - "loss": 0.839, - "num_input_tokens_seen": 125946400, - "step": 5937 - }, - { - "epoch": 0.7140022846149221, - "grad_norm": 3.3095438249269162, - "learning_rate": 7.982440366567485e-07, - "loss": 0.7516, - "num_input_tokens_seen": 125964610, - "step": 5938 - }, - { - "epoch": 0.7141225275055613, - "grad_norm": 1.8872712834661791, - "learning_rate": 7.97621464036361e-07, - "loss": 0.7497, - "num_input_tokens_seen": 125986090, - "step": 5939 - }, - { - "epoch": 0.7142427703962003, - "grad_norm": 2.7646027532956055, - "learning_rate": 7.969990738158417e-07, - "loss": 0.6787, - "num_input_tokens_seen": 126004220, - "step": 5940 - }, - { - "epoch": 0.7143630132868394, - "grad_norm": 2.454530292894201, - "learning_rate": 7.963768660896062e-07, - "loss": 0.8344, - "num_input_tokens_seen": 126022350, - "step": 5941 - }, - { - "epoch": 0.7144832561774785, - "grad_norm": 2.478187868166997, - "learning_rate": 7.957548409520432e-07, - "loss": 0.8194, - "num_input_tokens_seen": 126041295, - "step": 5942 - }, - { - "epoch": 0.7146034990681176, - "grad_norm": 2.0563859570904963, - "learning_rate": 7.951329984975135e-07, - "loss": 0.8405, - "num_input_tokens_seen": 126057955, - "step": 5943 - }, - { - "epoch": 0.7147237419587567, - "grad_norm": 0.7367238794582575, - "learning_rate": 7.945113388203497e-07, - "loss": 0.5617, - "num_input_tokens_seen": 126119980, - "step": 5944 - }, - { - "epoch": 0.7148439848493958, - "grad_norm": 3.2841200464990017, - "learning_rate": 7.938898620148575e-07, - "loss": 0.7807, - "num_input_tokens_seen": 126137460, - "step": 5945 - }, - { - "epoch": 0.7149642277400349, - "grad_norm": 2.6035488552783077, - "learning_rate": 7.932685681753135e-07, - "loss": 0.7029, - "num_input_tokens_seen": 126154460, - "step": 5946 - }, - { - "epoch": 0.7150844706306739, - "grad_norm": 2.566913677399377, - "learning_rate": 7.92647457395969e-07, - "loss": 0.6211, - "num_input_tokens_seen": 126176005, - "step": 5947 - }, - { - "epoch": 0.7152047135213131, - "grad_norm": 3.6668935326290213, - "learning_rate": 7.920265297710451e-07, - "loss": 0.735, - "num_input_tokens_seen": 126193115, - "step": 5948 - }, - { - "epoch": 0.7153249564119522, - "grad_norm": 1.9827154045629922, - "learning_rate": 7.914057853947363e-07, - "loss": 0.7173, - "num_input_tokens_seen": 126212015, - "step": 5949 - }, - { - "epoch": 0.7154451993025912, - "grad_norm": 1.8503598201708058, - "learning_rate": 7.907852243612083e-07, - "loss": 0.6291, - "num_input_tokens_seen": 126232140, - "step": 5950 - }, - { - "epoch": 0.7155654421932304, - "grad_norm": 2.2508461389296377, - "learning_rate": 7.901648467646009e-07, - "loss": 0.7137, - "num_input_tokens_seen": 126250800, - "step": 5951 - }, - { - "epoch": 0.7156856850838694, - "grad_norm": 1.7510362527105234, - "learning_rate": 7.895446526990244e-07, - "loss": 0.7187, - "num_input_tokens_seen": 126270535, - "step": 5952 - }, - { - "epoch": 0.7158059279745085, - "grad_norm": 1.6663469947012228, - "learning_rate": 7.889246422585616e-07, - "loss": 0.7477, - "num_input_tokens_seen": 126289640, - "step": 5953 - }, - { - "epoch": 0.7159261708651476, - "grad_norm": 1.8898179682963823, - "learning_rate": 7.883048155372669e-07, - "loss": 0.7327, - "num_input_tokens_seen": 126307875, - "step": 5954 - }, - { - "epoch": 0.7160464137557867, - "grad_norm": 2.5333450951029226, - "learning_rate": 7.876851726291691e-07, - "loss": 0.7047, - "num_input_tokens_seen": 126325895, - "step": 5955 - }, - { - "epoch": 0.7161666566464258, - "grad_norm": 2.4790833965747487, - "learning_rate": 7.870657136282666e-07, - "loss": 0.7807, - "num_input_tokens_seen": 126344475, - "step": 5956 - }, - { - "epoch": 0.7162868995370649, - "grad_norm": 1.7677675413444143, - "learning_rate": 7.86446438628531e-07, - "loss": 0.8102, - "num_input_tokens_seen": 126365265, - "step": 5957 - }, - { - "epoch": 0.716407142427704, - "grad_norm": 0.8362269586633847, - "learning_rate": 7.858273477239059e-07, - "loss": 0.5977, - "num_input_tokens_seen": 126433405, - "step": 5958 - }, - { - "epoch": 0.716527385318343, - "grad_norm": 1.9054625662871856, - "learning_rate": 7.852084410083067e-07, - "loss": 0.713, - "num_input_tokens_seen": 126451945, - "step": 5959 - }, - { - "epoch": 0.7166476282089821, - "grad_norm": 1.7789717298763061, - "learning_rate": 7.84589718575621e-07, - "loss": 0.628, - "num_input_tokens_seen": 126472110, - "step": 5960 - }, - { - "epoch": 0.7167678710996213, - "grad_norm": 2.333946754621867, - "learning_rate": 7.839711805197087e-07, - "loss": 0.6816, - "num_input_tokens_seen": 126490685, - "step": 5961 - }, - { - "epoch": 0.7168881139902603, - "grad_norm": 2.935143616386971, - "learning_rate": 7.833528269344008e-07, - "loss": 0.7433, - "num_input_tokens_seen": 126510310, - "step": 5962 - }, - { - "epoch": 0.7170083568808994, - "grad_norm": 3.432118378663752, - "learning_rate": 7.827346579135023e-07, - "loss": 0.7725, - "num_input_tokens_seen": 126527370, - "step": 5963 - }, - { - "epoch": 0.7171285997715385, - "grad_norm": 2.7708960766688504, - "learning_rate": 7.821166735507885e-07, - "loss": 0.8279, - "num_input_tokens_seen": 126546120, - "step": 5964 - }, - { - "epoch": 0.7172488426621776, - "grad_norm": 44.32332475648218, - "learning_rate": 7.81498873940007e-07, - "loss": 0.6812, - "num_input_tokens_seen": 126563055, - "step": 5965 - }, - { - "epoch": 0.7173690855528166, - "grad_norm": 10.181157326224408, - "learning_rate": 7.808812591748768e-07, - "loss": 0.7645, - "num_input_tokens_seen": 126583155, - "step": 5966 - }, - { - "epoch": 0.7174893284434558, - "grad_norm": 2.751923502991377, - "learning_rate": 7.802638293490908e-07, - "loss": 0.649, - "num_input_tokens_seen": 126602520, - "step": 5967 - }, - { - "epoch": 0.7176095713340949, - "grad_norm": 2.501735916087633, - "learning_rate": 7.796465845563123e-07, - "loss": 0.7735, - "num_input_tokens_seen": 126621115, - "step": 5968 - }, - { - "epoch": 0.7177298142247339, - "grad_norm": 2.250301904435027, - "learning_rate": 7.790295248901766e-07, - "loss": 0.7968, - "num_input_tokens_seen": 126641965, - "step": 5969 - }, - { - "epoch": 0.7178500571153731, - "grad_norm": 2.0454599439646612, - "learning_rate": 7.78412650444291e-07, - "loss": 0.6241, - "num_input_tokens_seen": 126664915, - "step": 5970 - }, - { - "epoch": 0.7179703000060121, - "grad_norm": 1.939846278150598, - "learning_rate": 7.777959613122351e-07, - "loss": 0.6713, - "num_input_tokens_seen": 126684460, - "step": 5971 - }, - { - "epoch": 0.7180905428966512, - "grad_norm": 1.9554624487795176, - "learning_rate": 7.771794575875604e-07, - "loss": 0.773, - "num_input_tokens_seen": 126706050, - "step": 5972 - }, - { - "epoch": 0.7182107857872904, - "grad_norm": 3.1908312814053374, - "learning_rate": 7.765631393637894e-07, - "loss": 0.7719, - "num_input_tokens_seen": 126723965, - "step": 5973 - }, - { - "epoch": 0.7183310286779294, - "grad_norm": 3.0188150875785333, - "learning_rate": 7.75947006734417e-07, - "loss": 0.4717, - "num_input_tokens_seen": 126741465, - "step": 5974 - }, - { - "epoch": 0.7184512715685685, - "grad_norm": 2.252119704361162, - "learning_rate": 7.753310597929108e-07, - "loss": 0.821, - "num_input_tokens_seen": 126757825, - "step": 5975 - }, - { - "epoch": 0.7185715144592076, - "grad_norm": 0.8101836214369763, - "learning_rate": 7.747152986327095e-07, - "loss": 0.5865, - "num_input_tokens_seen": 126818090, - "step": 5976 - }, - { - "epoch": 0.7186917573498467, - "grad_norm": 2.0468655191869036, - "learning_rate": 7.740997233472228e-07, - "loss": 0.6764, - "num_input_tokens_seen": 126835430, - "step": 5977 - }, - { - "epoch": 0.7188120002404857, - "grad_norm": 2.5491412583530177, - "learning_rate": 7.734843340298329e-07, - "loss": 0.7033, - "num_input_tokens_seen": 126854975, - "step": 5978 - }, - { - "epoch": 0.7189322431311249, - "grad_norm": 2.2320724391262643, - "learning_rate": 7.72869130773895e-07, - "loss": 0.7556, - "num_input_tokens_seen": 126875295, - "step": 5979 - }, - { - "epoch": 0.719052486021764, - "grad_norm": 0.8149386333211902, - "learning_rate": 7.722541136727343e-07, - "loss": 0.6088, - "num_input_tokens_seen": 126931030, - "step": 5980 - }, - { - "epoch": 0.719172728912403, - "grad_norm": 2.176573895538796, - "learning_rate": 7.716392828196483e-07, - "loss": 0.8061, - "num_input_tokens_seen": 126948550, - "step": 5981 - }, - { - "epoch": 0.7192929718030422, - "grad_norm": 3.2493379123144703, - "learning_rate": 7.710246383079057e-07, - "loss": 0.7648, - "num_input_tokens_seen": 126963655, - "step": 5982 - }, - { - "epoch": 0.7194132146936812, - "grad_norm": 2.6072413778681582, - "learning_rate": 7.704101802307492e-07, - "loss": 0.9128, - "num_input_tokens_seen": 126975675, - "step": 5983 - }, - { - "epoch": 0.7195334575843203, - "grad_norm": 2.452466744129387, - "learning_rate": 7.697959086813906e-07, - "loss": 0.8713, - "num_input_tokens_seen": 126991560, - "step": 5984 - }, - { - "epoch": 0.7196537004749595, - "grad_norm": 1.9079363467480595, - "learning_rate": 7.691818237530145e-07, - "loss": 0.7955, - "num_input_tokens_seen": 127010140, - "step": 5985 - }, - { - "epoch": 0.7197739433655985, - "grad_norm": 3.080624927405265, - "learning_rate": 7.685679255387774e-07, - "loss": 0.773, - "num_input_tokens_seen": 127028175, - "step": 5986 - }, - { - "epoch": 0.7198941862562376, - "grad_norm": 2.337735198455269, - "learning_rate": 7.679542141318065e-07, - "loss": 0.7647, - "num_input_tokens_seen": 127045000, - "step": 5987 - }, - { - "epoch": 0.7200144291468767, - "grad_norm": 1.8635313567547989, - "learning_rate": 7.67340689625202e-07, - "loss": 0.7591, - "num_input_tokens_seen": 127066095, - "step": 5988 - }, - { - "epoch": 0.7201346720375158, - "grad_norm": 1.6116332146169243, - "learning_rate": 7.667273521120347e-07, - "loss": 0.7771, - "num_input_tokens_seen": 127085375, - "step": 5989 - }, - { - "epoch": 0.7202549149281549, - "grad_norm": 2.262156162347373, - "learning_rate": 7.661142016853468e-07, - "loss": 0.7934, - "num_input_tokens_seen": 127102455, - "step": 5990 - }, - { - "epoch": 0.7203751578187939, - "grad_norm": 2.0128542565893155, - "learning_rate": 7.655012384381543e-07, - "loss": 0.7439, - "num_input_tokens_seen": 127121660, - "step": 5991 - }, - { - "epoch": 0.7204954007094331, - "grad_norm": 1.9066759849473476, - "learning_rate": 7.648884624634422e-07, - "loss": 0.8086, - "num_input_tokens_seen": 127139930, - "step": 5992 - }, - { - "epoch": 0.7206156436000721, - "grad_norm": 1.9261636133864954, - "learning_rate": 7.642758738541683e-07, - "loss": 0.8779, - "num_input_tokens_seen": 127156230, - "step": 5993 - }, - { - "epoch": 0.7207358864907112, - "grad_norm": 0.8004235625357764, - "learning_rate": 7.636634727032613e-07, - "loss": 0.6186, - "num_input_tokens_seen": 127213055, - "step": 5994 - }, - { - "epoch": 0.7208561293813504, - "grad_norm": 2.592785243194575, - "learning_rate": 7.630512591036231e-07, - "loss": 0.7904, - "num_input_tokens_seen": 127232085, - "step": 5995 - }, - { - "epoch": 0.7209763722719894, - "grad_norm": 7.701021332679958, - "learning_rate": 7.624392331481255e-07, - "loss": 0.6483, - "num_input_tokens_seen": 127249460, - "step": 5996 - }, - { - "epoch": 0.7210966151626285, - "grad_norm": 0.7452960272034347, - "learning_rate": 7.618273949296121e-07, - "loss": 0.5415, - "num_input_tokens_seen": 127308690, - "step": 5997 - }, - { - "epoch": 0.7212168580532676, - "grad_norm": 2.2312588143596397, - "learning_rate": 7.612157445408977e-07, - "loss": 0.6805, - "num_input_tokens_seen": 127326220, - "step": 5998 - }, - { - "epoch": 0.7213371009439067, - "grad_norm": 3.1190137399266855, - "learning_rate": 7.606042820747709e-07, - "loss": 0.7373, - "num_input_tokens_seen": 127342345, - "step": 5999 - }, - { - "epoch": 0.7214573438345457, - "grad_norm": 1.8428060407422815, - "learning_rate": 7.599930076239889e-07, - "loss": 0.8471, - "num_input_tokens_seen": 127359350, - "step": 6000 - }, - { - "epoch": 0.7215775867251849, - "grad_norm": 1.932430630127869, - "learning_rate": 7.593819212812818e-07, - "loss": 0.7065, - "num_input_tokens_seen": 127380650, - "step": 6001 - }, - { - "epoch": 0.721697829615824, - "grad_norm": 3.019556453587002, - "learning_rate": 7.587710231393508e-07, - "loss": 0.7231, - "num_input_tokens_seen": 127398725, - "step": 6002 - }, - { - "epoch": 0.721818072506463, - "grad_norm": 2.693911332211301, - "learning_rate": 7.581603132908685e-07, - "loss": 0.8292, - "num_input_tokens_seen": 127416415, - "step": 6003 - }, - { - "epoch": 0.7219383153971022, - "grad_norm": 3.948674689801315, - "learning_rate": 7.575497918284795e-07, - "loss": 0.7734, - "num_input_tokens_seen": 127433680, - "step": 6004 - }, - { - "epoch": 0.7220585582877412, - "grad_norm": 2.3923381033490476, - "learning_rate": 7.569394588447992e-07, - "loss": 0.7424, - "num_input_tokens_seen": 127450415, - "step": 6005 - }, - { - "epoch": 0.7221788011783803, - "grad_norm": 4.883190723785703, - "learning_rate": 7.563293144324139e-07, - "loss": 0.774, - "num_input_tokens_seen": 127465685, - "step": 6006 - }, - { - "epoch": 0.7222990440690195, - "grad_norm": 2.171551114174458, - "learning_rate": 7.557193586838834e-07, - "loss": 0.7974, - "num_input_tokens_seen": 127480770, - "step": 6007 - }, - { - "epoch": 0.7224192869596585, - "grad_norm": 2.429654985195171, - "learning_rate": 7.551095916917371e-07, - "loss": 0.7025, - "num_input_tokens_seen": 127497820, - "step": 6008 - }, - { - "epoch": 0.7225395298502976, - "grad_norm": 4.001595389917697, - "learning_rate": 7.545000135484758e-07, - "loss": 0.6615, - "num_input_tokens_seen": 127514975, - "step": 6009 - }, - { - "epoch": 0.7226597727409367, - "grad_norm": 2.282572083469935, - "learning_rate": 7.538906243465714e-07, - "loss": 0.6249, - "num_input_tokens_seen": 127534830, - "step": 6010 - }, - { - "epoch": 0.7227800156315758, - "grad_norm": 2.257202023658201, - "learning_rate": 7.532814241784693e-07, - "loss": 0.7762, - "num_input_tokens_seen": 127551315, - "step": 6011 - }, - { - "epoch": 0.7229002585222148, - "grad_norm": 1.962389888024602, - "learning_rate": 7.526724131365838e-07, - "loss": 0.688, - "num_input_tokens_seen": 127571990, - "step": 6012 - }, - { - "epoch": 0.723020501412854, - "grad_norm": 3.1777409189049, - "learning_rate": 7.520635913133017e-07, - "loss": 0.6979, - "num_input_tokens_seen": 127590340, - "step": 6013 - }, - { - "epoch": 0.7231407443034931, - "grad_norm": 3.8676112323254332, - "learning_rate": 7.514549588009804e-07, - "loss": 0.8145, - "num_input_tokens_seen": 127610935, - "step": 6014 - }, - { - "epoch": 0.7232609871941321, - "grad_norm": 2.142341987214896, - "learning_rate": 7.508465156919492e-07, - "loss": 0.7036, - "num_input_tokens_seen": 127634165, - "step": 6015 - }, - { - "epoch": 0.7233812300847713, - "grad_norm": 3.271809125721559, - "learning_rate": 7.502382620785083e-07, - "loss": 0.6215, - "num_input_tokens_seen": 127650435, - "step": 6016 - }, - { - "epoch": 0.7235014729754103, - "grad_norm": 0.8811509860409997, - "learning_rate": 7.496301980529296e-07, - "loss": 0.6887, - "num_input_tokens_seen": 127713365, - "step": 6017 - }, - { - "epoch": 0.7236217158660494, - "grad_norm": 2.4687664104283806, - "learning_rate": 7.490223237074547e-07, - "loss": 0.741, - "num_input_tokens_seen": 127732795, - "step": 6018 - }, - { - "epoch": 0.7237419587566886, - "grad_norm": 3.480842721464305, - "learning_rate": 7.484146391342996e-07, - "loss": 0.6526, - "num_input_tokens_seen": 127752310, - "step": 6019 - }, - { - "epoch": 0.7238622016473276, - "grad_norm": 4.125243374523593, - "learning_rate": 7.478071444256484e-07, - "loss": 0.567, - "num_input_tokens_seen": 127769790, - "step": 6020 - }, - { - "epoch": 0.7239824445379667, - "grad_norm": 2.947257535081137, - "learning_rate": 7.471998396736579e-07, - "loss": 0.7804, - "num_input_tokens_seen": 127789890, - "step": 6021 - }, - { - "epoch": 0.7241026874286057, - "grad_norm": 1.7203305881308386, - "learning_rate": 7.465927249704549e-07, - "loss": 0.7466, - "num_input_tokens_seen": 127807495, - "step": 6022 - }, - { - "epoch": 0.7242229303192449, - "grad_norm": 1.781019346072082, - "learning_rate": 7.459858004081398e-07, - "loss": 0.7651, - "num_input_tokens_seen": 127825185, - "step": 6023 - }, - { - "epoch": 0.724343173209884, - "grad_norm": 1.4043195305498013, - "learning_rate": 7.453790660787815e-07, - "loss": 0.5991, - "num_input_tokens_seen": 127893000, - "step": 6024 - }, - { - "epoch": 0.724463416100523, - "grad_norm": 2.346777454868482, - "learning_rate": 7.447725220744214e-07, - "loss": 0.6317, - "num_input_tokens_seen": 127914965, - "step": 6025 - }, - { - "epoch": 0.7245836589911622, - "grad_norm": 2.6436261936115035, - "learning_rate": 7.44166168487071e-07, - "loss": 0.7584, - "num_input_tokens_seen": 127934940, - "step": 6026 - }, - { - "epoch": 0.7247039018818012, - "grad_norm": 1.9341843939480519, - "learning_rate": 7.435600054087152e-07, - "loss": 0.8082, - "num_input_tokens_seen": 127956825, - "step": 6027 - }, - { - "epoch": 0.7248241447724403, - "grad_norm": 2.5057000785449564, - "learning_rate": 7.429540329313074e-07, - "loss": 0.7425, - "num_input_tokens_seen": 127977585, - "step": 6028 - }, - { - "epoch": 0.7249443876630794, - "grad_norm": 2.821246576160181, - "learning_rate": 7.423482511467733e-07, - "loss": 0.7348, - "num_input_tokens_seen": 127998075, - "step": 6029 - }, - { - "epoch": 0.7250646305537185, - "grad_norm": 2.7519864013125233, - "learning_rate": 7.417426601470099e-07, - "loss": 0.6494, - "num_input_tokens_seen": 128018155, - "step": 6030 - }, - { - "epoch": 0.7251848734443576, - "grad_norm": 4.728718352846354, - "learning_rate": 7.411372600238841e-07, - "loss": 0.7788, - "num_input_tokens_seen": 128038490, - "step": 6031 - }, - { - "epoch": 0.7253051163349967, - "grad_norm": 2.9919865971125645, - "learning_rate": 7.405320508692352e-07, - "loss": 0.734, - "num_input_tokens_seen": 128056950, - "step": 6032 - }, - { - "epoch": 0.7254253592256358, - "grad_norm": 2.294866685366273, - "learning_rate": 7.399270327748727e-07, - "loss": 0.7509, - "num_input_tokens_seen": 128074330, - "step": 6033 - }, - { - "epoch": 0.7255456021162748, - "grad_norm": 2.573428054503482, - "learning_rate": 7.39322205832577e-07, - "loss": 0.7407, - "num_input_tokens_seen": 128094940, - "step": 6034 - }, - { - "epoch": 0.725665845006914, - "grad_norm": 2.4812717277175724, - "learning_rate": 7.387175701341009e-07, - "loss": 0.798, - "num_input_tokens_seen": 128113330, - "step": 6035 - }, - { - "epoch": 0.7257860878975531, - "grad_norm": 3.1899022408767865, - "learning_rate": 7.381131257711666e-07, - "loss": 0.7333, - "num_input_tokens_seen": 128130155, - "step": 6036 - }, - { - "epoch": 0.7259063307881921, - "grad_norm": 2.156626242718935, - "learning_rate": 7.375088728354677e-07, - "loss": 0.8318, - "num_input_tokens_seen": 128144905, - "step": 6037 - }, - { - "epoch": 0.7260265736788313, - "grad_norm": 3.436898749245299, - "learning_rate": 7.369048114186685e-07, - "loss": 0.6653, - "num_input_tokens_seen": 128165670, - "step": 6038 - }, - { - "epoch": 0.7261468165694703, - "grad_norm": 1.9228060106765044, - "learning_rate": 7.363009416124055e-07, - "loss": 0.828, - "num_input_tokens_seen": 128184715, - "step": 6039 - }, - { - "epoch": 0.7262670594601094, - "grad_norm": 2.84561545439624, - "learning_rate": 7.356972635082852e-07, - "loss": 0.6314, - "num_input_tokens_seen": 128203290, - "step": 6040 - }, - { - "epoch": 0.7263873023507486, - "grad_norm": 4.085337008285573, - "learning_rate": 7.350937771978847e-07, - "loss": 0.7476, - "num_input_tokens_seen": 128223080, - "step": 6041 - }, - { - "epoch": 0.7265075452413876, - "grad_norm": 3.6370958890663085, - "learning_rate": 7.344904827727519e-07, - "loss": 0.8457, - "num_input_tokens_seen": 128239980, - "step": 6042 - }, - { - "epoch": 0.7266277881320267, - "grad_norm": 2.978948940095128, - "learning_rate": 7.33887380324407e-07, - "loss": 0.7256, - "num_input_tokens_seen": 128254935, - "step": 6043 - }, - { - "epoch": 0.7267480310226658, - "grad_norm": 1.8045441250723215, - "learning_rate": 7.332844699443401e-07, - "loss": 0.7952, - "num_input_tokens_seen": 128273255, - "step": 6044 - }, - { - "epoch": 0.7268682739133049, - "grad_norm": 2.891263978504804, - "learning_rate": 7.326817517240121e-07, - "loss": 0.7424, - "num_input_tokens_seen": 128294680, - "step": 6045 - }, - { - "epoch": 0.7269885168039439, - "grad_norm": 4.3115409786657555, - "learning_rate": 7.320792257548545e-07, - "loss": 0.8312, - "num_input_tokens_seen": 128315575, - "step": 6046 - }, - { - "epoch": 0.7271087596945831, - "grad_norm": 3.3347672501136456, - "learning_rate": 7.314768921282704e-07, - "loss": 0.7662, - "num_input_tokens_seen": 128335950, - "step": 6047 - }, - { - "epoch": 0.7272290025852222, - "grad_norm": 2.7446353530954477, - "learning_rate": 7.30874750935633e-07, - "loss": 0.7098, - "num_input_tokens_seen": 128355355, - "step": 6048 - }, - { - "epoch": 0.7273492454758612, - "grad_norm": 2.2518717189056576, - "learning_rate": 7.302728022682869e-07, - "loss": 0.7836, - "num_input_tokens_seen": 128372070, - "step": 6049 - }, - { - "epoch": 0.7274694883665004, - "grad_norm": 3.1131514248189145, - "learning_rate": 7.296710462175464e-07, - "loss": 0.7572, - "num_input_tokens_seen": 128390900, - "step": 6050 - }, - { - "epoch": 0.7275897312571394, - "grad_norm": 2.6819960937452736, - "learning_rate": 7.290694828746988e-07, - "loss": 0.8155, - "num_input_tokens_seen": 128410285, - "step": 6051 - }, - { - "epoch": 0.7277099741477785, - "grad_norm": 3.520379790824816, - "learning_rate": 7.284681123310004e-07, - "loss": 0.8554, - "num_input_tokens_seen": 128428720, - "step": 6052 - }, - { - "epoch": 0.7278302170384175, - "grad_norm": 5.851062002035487, - "learning_rate": 7.27866934677678e-07, - "loss": 0.7872, - "num_input_tokens_seen": 128448110, - "step": 6053 - }, - { - "epoch": 0.7279504599290567, - "grad_norm": 1.9870206113182822, - "learning_rate": 7.272659500059297e-07, - "loss": 0.7735, - "num_input_tokens_seen": 128465170, - "step": 6054 - }, - { - "epoch": 0.7280707028196958, - "grad_norm": 2.0897166472497037, - "learning_rate": 7.266651584069256e-07, - "loss": 0.7982, - "num_input_tokens_seen": 128482555, - "step": 6055 - }, - { - "epoch": 0.7281909457103348, - "grad_norm": 1.7866459548501132, - "learning_rate": 7.260645599718045e-07, - "loss": 0.5687, - "num_input_tokens_seen": 128508630, - "step": 6056 - }, - { - "epoch": 0.728311188600974, - "grad_norm": 3.220914906018947, - "learning_rate": 7.254641547916767e-07, - "loss": 0.6661, - "num_input_tokens_seen": 128525845, - "step": 6057 - }, - { - "epoch": 0.728431431491613, - "grad_norm": 2.0605856054673386, - "learning_rate": 7.248639429576234e-07, - "loss": 0.6863, - "num_input_tokens_seen": 128545020, - "step": 6058 - }, - { - "epoch": 0.7285516743822521, - "grad_norm": 2.6631959703913024, - "learning_rate": 7.242639245606959e-07, - "loss": 0.7232, - "num_input_tokens_seen": 128564530, - "step": 6059 - }, - { - "epoch": 0.7286719172728913, - "grad_norm": 1.989737404280523, - "learning_rate": 7.236640996919168e-07, - "loss": 0.8245, - "num_input_tokens_seen": 128583295, - "step": 6060 - }, - { - "epoch": 0.7287921601635303, - "grad_norm": 2.00108215447217, - "learning_rate": 7.230644684422789e-07, - "loss": 0.7062, - "num_input_tokens_seen": 128603245, - "step": 6061 - }, - { - "epoch": 0.7289124030541694, - "grad_norm": 2.051632005993278, - "learning_rate": 7.224650309027451e-07, - "loss": 0.8138, - "num_input_tokens_seen": 128622715, - "step": 6062 - }, - { - "epoch": 0.7290326459448085, - "grad_norm": 2.6032836186423465, - "learning_rate": 7.218657871642512e-07, - "loss": 0.6767, - "num_input_tokens_seen": 128641240, - "step": 6063 - }, - { - "epoch": 0.7291528888354476, - "grad_norm": 3.3610758079283727, - "learning_rate": 7.212667373177012e-07, - "loss": 0.6113, - "num_input_tokens_seen": 128655955, - "step": 6064 - }, - { - "epoch": 0.7292731317260867, - "grad_norm": 2.200698457498222, - "learning_rate": 7.206678814539704e-07, - "loss": 0.7538, - "num_input_tokens_seen": 128673975, - "step": 6065 - }, - { - "epoch": 0.7293933746167258, - "grad_norm": 2.226091148949567, - "learning_rate": 7.20069219663904e-07, - "loss": 0.7241, - "num_input_tokens_seen": 128693580, - "step": 6066 - }, - { - "epoch": 0.7295136175073649, - "grad_norm": 2.4209448392144757, - "learning_rate": 7.1947075203832e-07, - "loss": 0.7952, - "num_input_tokens_seen": 128713280, - "step": 6067 - }, - { - "epoch": 0.7296338603980039, - "grad_norm": 1.1248126970217327, - "learning_rate": 7.188724786680049e-07, - "loss": 0.6134, - "num_input_tokens_seen": 128773470, - "step": 6068 - }, - { - "epoch": 0.7297541032886431, - "grad_norm": 1.9710274931489284, - "learning_rate": 7.182743996437162e-07, - "loss": 0.7444, - "num_input_tokens_seen": 128792725, - "step": 6069 - }, - { - "epoch": 0.7298743461792822, - "grad_norm": 2.2890650565063684, - "learning_rate": 7.176765150561812e-07, - "loss": 0.6804, - "num_input_tokens_seen": 128811050, - "step": 6070 - }, - { - "epoch": 0.7299945890699212, - "grad_norm": 2.4091861150171057, - "learning_rate": 7.170788249961002e-07, - "loss": 0.7909, - "num_input_tokens_seen": 128829280, - "step": 6071 - }, - { - "epoch": 0.7301148319605604, - "grad_norm": 2.313346913900591, - "learning_rate": 7.164813295541412e-07, - "loss": 0.8797, - "num_input_tokens_seen": 128848565, - "step": 6072 - }, - { - "epoch": 0.7302350748511994, - "grad_norm": 1.97986860280755, - "learning_rate": 7.15884028820944e-07, - "loss": 0.6945, - "num_input_tokens_seen": 128867340, - "step": 6073 - }, - { - "epoch": 0.7303553177418385, - "grad_norm": 2.3942194688134135, - "learning_rate": 7.152869228871185e-07, - "loss": 0.5954, - "num_input_tokens_seen": 128889545, - "step": 6074 - }, - { - "epoch": 0.7304755606324776, - "grad_norm": 2.4428380423786877, - "learning_rate": 7.146900118432457e-07, - "loss": 0.715, - "num_input_tokens_seen": 128909010, - "step": 6075 - }, - { - "epoch": 0.7305958035231167, - "grad_norm": 1.8614967404043345, - "learning_rate": 7.140932957798759e-07, - "loss": 0.8486, - "num_input_tokens_seen": 128927170, - "step": 6076 - }, - { - "epoch": 0.7307160464137558, - "grad_norm": 1.9805047861872567, - "learning_rate": 7.134967747875309e-07, - "loss": 0.7096, - "num_input_tokens_seen": 128945100, - "step": 6077 - }, - { - "epoch": 0.7308362893043949, - "grad_norm": 2.4014442499278386, - "learning_rate": 7.129004489567014e-07, - "loss": 0.8063, - "num_input_tokens_seen": 128962300, - "step": 6078 - }, - { - "epoch": 0.730956532195034, - "grad_norm": 41.363265287859434, - "learning_rate": 7.123043183778512e-07, - "loss": 0.7822, - "num_input_tokens_seen": 128979350, - "step": 6079 - }, - { - "epoch": 0.731076775085673, - "grad_norm": 1.6456871501273476, - "learning_rate": 7.117083831414122e-07, - "loss": 0.6405, - "num_input_tokens_seen": 128998345, - "step": 6080 - }, - { - "epoch": 0.7311970179763122, - "grad_norm": 2.4294938073202097, - "learning_rate": 7.11112643337787e-07, - "loss": 0.7043, - "num_input_tokens_seen": 129017110, - "step": 6081 - }, - { - "epoch": 0.7313172608669513, - "grad_norm": 2.450763682414957, - "learning_rate": 7.105170990573484e-07, - "loss": 0.7574, - "num_input_tokens_seen": 129033780, - "step": 6082 - }, - { - "epoch": 0.7314375037575903, - "grad_norm": 3.102626626507124, - "learning_rate": 7.099217503904411e-07, - "loss": 0.6097, - "num_input_tokens_seen": 129051355, - "step": 6083 - }, - { - "epoch": 0.7315577466482295, - "grad_norm": 9.44806186414828, - "learning_rate": 7.093265974273788e-07, - "loss": 0.8978, - "num_input_tokens_seen": 129068970, - "step": 6084 - }, - { - "epoch": 0.7316779895388685, - "grad_norm": 2.009054671319429, - "learning_rate": 7.087316402584453e-07, - "loss": 0.7181, - "num_input_tokens_seen": 129087515, - "step": 6085 - }, - { - "epoch": 0.7317982324295076, - "grad_norm": 2.016204630139146, - "learning_rate": 7.081368789738947e-07, - "loss": 0.8483, - "num_input_tokens_seen": 129104435, - "step": 6086 - }, - { - "epoch": 0.7319184753201466, - "grad_norm": 2.8238217052094017, - "learning_rate": 7.075423136639531e-07, - "loss": 0.7691, - "num_input_tokens_seen": 129123410, - "step": 6087 - }, - { - "epoch": 0.7320387182107858, - "grad_norm": 2.455112707077979, - "learning_rate": 7.069479444188149e-07, - "loss": 0.7408, - "num_input_tokens_seen": 129143720, - "step": 6088 - }, - { - "epoch": 0.7321589611014249, - "grad_norm": 2.417089871530588, - "learning_rate": 7.063537713286461e-07, - "loss": 0.8213, - "num_input_tokens_seen": 129161120, - "step": 6089 - }, - { - "epoch": 0.7322792039920639, - "grad_norm": 1.9645444849915148, - "learning_rate": 7.057597944835803e-07, - "loss": 0.803, - "num_input_tokens_seen": 129180115, - "step": 6090 - }, - { - "epoch": 0.7323994468827031, - "grad_norm": 2.064679686669884, - "learning_rate": 7.051660139737253e-07, - "loss": 0.7375, - "num_input_tokens_seen": 129198055, - "step": 6091 - }, - { - "epoch": 0.7325196897733421, - "grad_norm": 1.9606667900970483, - "learning_rate": 7.045724298891565e-07, - "loss": 0.7554, - "num_input_tokens_seen": 129217245, - "step": 6092 - }, - { - "epoch": 0.7326399326639812, - "grad_norm": 2.189003655264423, - "learning_rate": 7.039790423199198e-07, - "loss": 0.6905, - "num_input_tokens_seen": 129236605, - "step": 6093 - }, - { - "epoch": 0.7327601755546204, - "grad_norm": 2.385283375288238, - "learning_rate": 7.033858513560316e-07, - "loss": 0.7796, - "num_input_tokens_seen": 129252620, - "step": 6094 - }, - { - "epoch": 0.7328804184452594, - "grad_norm": 3.082280751904044, - "learning_rate": 7.027928570874794e-07, - "loss": 0.7711, - "num_input_tokens_seen": 129270530, - "step": 6095 - }, - { - "epoch": 0.7330006613358985, - "grad_norm": 1.9275694232687495, - "learning_rate": 7.022000596042194e-07, - "loss": 0.8428, - "num_input_tokens_seen": 129287350, - "step": 6096 - }, - { - "epoch": 0.7331209042265376, - "grad_norm": 3.919498117069979, - "learning_rate": 7.016074589961784e-07, - "loss": 0.8139, - "num_input_tokens_seen": 129305635, - "step": 6097 - }, - { - "epoch": 0.7332411471171767, - "grad_norm": 2.580664585653789, - "learning_rate": 7.01015055353253e-07, - "loss": 0.6596, - "num_input_tokens_seen": 129327780, - "step": 6098 - }, - { - "epoch": 0.7333613900078157, - "grad_norm": 2.008725279965256, - "learning_rate": 7.004228487653116e-07, - "loss": 0.7698, - "num_input_tokens_seen": 129348305, - "step": 6099 - }, - { - "epoch": 0.7334816328984549, - "grad_norm": 2.2350749318053045, - "learning_rate": 6.998308393221906e-07, - "loss": 0.7746, - "num_input_tokens_seen": 129366430, - "step": 6100 - }, - { - "epoch": 0.733601875789094, - "grad_norm": 3.3344335588747582, - "learning_rate": 6.992390271136977e-07, - "loss": 0.7096, - "num_input_tokens_seen": 129381860, - "step": 6101 - }, - { - "epoch": 0.733722118679733, - "grad_norm": 2.124030678251846, - "learning_rate": 6.9864741222961e-07, - "loss": 0.8545, - "num_input_tokens_seen": 129400695, - "step": 6102 - }, - { - "epoch": 0.7338423615703722, - "grad_norm": 2.204175519286464, - "learning_rate": 6.980559947596751e-07, - "loss": 0.7238, - "num_input_tokens_seen": 129418955, - "step": 6103 - }, - { - "epoch": 0.7339626044610112, - "grad_norm": 2.6375335083590876, - "learning_rate": 6.974647747936109e-07, - "loss": 0.7578, - "num_input_tokens_seen": 129437060, - "step": 6104 - }, - { - "epoch": 0.7340828473516503, - "grad_norm": 2.2435324145256006, - "learning_rate": 6.968737524211046e-07, - "loss": 0.8193, - "num_input_tokens_seen": 129453590, - "step": 6105 - }, - { - "epoch": 0.7342030902422895, - "grad_norm": 2.6369733767013317, - "learning_rate": 6.962829277318132e-07, - "loss": 0.7981, - "num_input_tokens_seen": 129472905, - "step": 6106 - }, - { - "epoch": 0.7343233331329285, - "grad_norm": 2.376747445052866, - "learning_rate": 6.956923008153659e-07, - "loss": 0.8244, - "num_input_tokens_seen": 129492390, - "step": 6107 - }, - { - "epoch": 0.7344435760235676, - "grad_norm": 2.4372994325587625, - "learning_rate": 6.951018717613593e-07, - "loss": 0.8416, - "num_input_tokens_seen": 129511125, - "step": 6108 - }, - { - "epoch": 0.7345638189142067, - "grad_norm": 1.9203232744908312, - "learning_rate": 6.945116406593614e-07, - "loss": 0.7753, - "num_input_tokens_seen": 129529700, - "step": 6109 - }, - { - "epoch": 0.7346840618048458, - "grad_norm": 22.235917557628696, - "learning_rate": 6.939216075989089e-07, - "loss": 0.7364, - "num_input_tokens_seen": 129547350, - "step": 6110 - }, - { - "epoch": 0.7348043046954849, - "grad_norm": 2.2380365069196477, - "learning_rate": 6.933317726695109e-07, - "loss": 0.6586, - "num_input_tokens_seen": 129568300, - "step": 6111 - }, - { - "epoch": 0.734924547586124, - "grad_norm": 2.89637256690497, - "learning_rate": 6.92742135960644e-07, - "loss": 0.7912, - "num_input_tokens_seen": 129585720, - "step": 6112 - }, - { - "epoch": 0.7350447904767631, - "grad_norm": 0.9017764840904604, - "learning_rate": 6.921526975617556e-07, - "loss": 0.5997, - "num_input_tokens_seen": 129644900, - "step": 6113 - }, - { - "epoch": 0.7351650333674021, - "grad_norm": 1.8310775935069004, - "learning_rate": 6.915634575622625e-07, - "loss": 0.7367, - "num_input_tokens_seen": 129663135, - "step": 6114 - }, - { - "epoch": 0.7352852762580413, - "grad_norm": 2.834489786226201, - "learning_rate": 6.909744160515532e-07, - "loss": 0.7119, - "num_input_tokens_seen": 129680995, - "step": 6115 - }, - { - "epoch": 0.7354055191486804, - "grad_norm": 2.537388304611972, - "learning_rate": 6.903855731189843e-07, - "loss": 0.6792, - "num_input_tokens_seen": 129703350, - "step": 6116 - }, - { - "epoch": 0.7355257620393194, - "grad_norm": 2.3967596416043957, - "learning_rate": 6.897969288538825e-07, - "loss": 0.8184, - "num_input_tokens_seen": 129721015, - "step": 6117 - }, - { - "epoch": 0.7356460049299585, - "grad_norm": 2.42968571050616, - "learning_rate": 6.892084833455452e-07, - "loss": 0.8069, - "num_input_tokens_seen": 129740305, - "step": 6118 - }, - { - "epoch": 0.7357662478205976, - "grad_norm": 1.7636107275414388, - "learning_rate": 6.886202366832384e-07, - "loss": 0.8381, - "num_input_tokens_seen": 129761710, - "step": 6119 - }, - { - "epoch": 0.7358864907112367, - "grad_norm": 2.430707664843906, - "learning_rate": 6.880321889561993e-07, - "loss": 0.7371, - "num_input_tokens_seen": 129779405, - "step": 6120 - }, - { - "epoch": 0.7360067336018757, - "grad_norm": 2.3829015864766547, - "learning_rate": 6.874443402536338e-07, - "loss": 0.659, - "num_input_tokens_seen": 129798215, - "step": 6121 - }, - { - "epoch": 0.7361269764925149, - "grad_norm": 1.8503110283205804, - "learning_rate": 6.868566906647177e-07, - "loss": 0.7964, - "num_input_tokens_seen": 129818885, - "step": 6122 - }, - { - "epoch": 0.736247219383154, - "grad_norm": 1.9334544049962614, - "learning_rate": 6.862692402785984e-07, - "loss": 0.8233, - "num_input_tokens_seen": 129838855, - "step": 6123 - }, - { - "epoch": 0.736367462273793, - "grad_norm": 0.706384267566739, - "learning_rate": 6.856819891843905e-07, - "loss": 0.5159, - "num_input_tokens_seen": 129903280, - "step": 6124 - }, - { - "epoch": 0.7364877051644322, - "grad_norm": 2.4974335458414822, - "learning_rate": 6.8509493747118e-07, - "loss": 0.722, - "num_input_tokens_seen": 129921810, - "step": 6125 - }, - { - "epoch": 0.7366079480550712, - "grad_norm": 4.167968514206307, - "learning_rate": 6.845080852280213e-07, - "loss": 0.8793, - "num_input_tokens_seen": 129938600, - "step": 6126 - }, - { - "epoch": 0.7367281909457103, - "grad_norm": 1.80061949395276, - "learning_rate": 6.839214325439409e-07, - "loss": 0.7429, - "num_input_tokens_seen": 129956015, - "step": 6127 - }, - { - "epoch": 0.7368484338363495, - "grad_norm": 1.6820896811155979, - "learning_rate": 6.833349795079327e-07, - "loss": 0.7096, - "num_input_tokens_seen": 129974845, - "step": 6128 - }, - { - "epoch": 0.7369686767269885, - "grad_norm": 2.0374385285789915, - "learning_rate": 6.827487262089613e-07, - "loss": 0.6802, - "num_input_tokens_seen": 129995070, - "step": 6129 - }, - { - "epoch": 0.7370889196176276, - "grad_norm": 0.9163140055993958, - "learning_rate": 6.8216267273596e-07, - "loss": 0.6001, - "num_input_tokens_seen": 130060350, - "step": 6130 - }, - { - "epoch": 0.7372091625082667, - "grad_norm": 4.372017083883281, - "learning_rate": 6.815768191778342e-07, - "loss": 0.7747, - "num_input_tokens_seen": 130078150, - "step": 6131 - }, - { - "epoch": 0.7373294053989058, - "grad_norm": 1.9991369768946987, - "learning_rate": 6.809911656234575e-07, - "loss": 0.7307, - "num_input_tokens_seen": 130099845, - "step": 6132 - }, - { - "epoch": 0.7374496482895448, - "grad_norm": 2.7718994211318235, - "learning_rate": 6.804057121616713e-07, - "loss": 0.7835, - "num_input_tokens_seen": 130117770, - "step": 6133 - }, - { - "epoch": 0.737569891180184, - "grad_norm": 2.091485395373309, - "learning_rate": 6.798204588812888e-07, - "loss": 0.7171, - "num_input_tokens_seen": 130136905, - "step": 6134 - }, - { - "epoch": 0.7376901340708231, - "grad_norm": 2.1228414387598336, - "learning_rate": 6.792354058710937e-07, - "loss": 0.7477, - "num_input_tokens_seen": 130154095, - "step": 6135 - }, - { - "epoch": 0.7378103769614621, - "grad_norm": 5.784542254557947, - "learning_rate": 6.786505532198374e-07, - "loss": 0.6553, - "num_input_tokens_seen": 130172760, - "step": 6136 - }, - { - "epoch": 0.7379306198521013, - "grad_norm": 2.19090763445533, - "learning_rate": 6.780659010162417e-07, - "loss": 0.8445, - "num_input_tokens_seen": 130191430, - "step": 6137 - }, - { - "epoch": 0.7380508627427403, - "grad_norm": 1.849804107875879, - "learning_rate": 6.774814493489969e-07, - "loss": 0.8252, - "num_input_tokens_seen": 130208825, - "step": 6138 - }, - { - "epoch": 0.7381711056333794, - "grad_norm": 2.114469309547878, - "learning_rate": 6.768971983067655e-07, - "loss": 0.661, - "num_input_tokens_seen": 130228875, - "step": 6139 - }, - { - "epoch": 0.7382913485240186, - "grad_norm": 1.0841302311203715, - "learning_rate": 6.763131479781772e-07, - "loss": 0.7047, - "num_input_tokens_seen": 130278355, - "step": 6140 - }, - { - "epoch": 0.7384115914146576, - "grad_norm": 3.246889732029036, - "learning_rate": 6.757292984518316e-07, - "loss": 0.774, - "num_input_tokens_seen": 130297475, - "step": 6141 - }, - { - "epoch": 0.7385318343052967, - "grad_norm": 0.8745190247783067, - "learning_rate": 6.751456498162981e-07, - "loss": 0.615, - "num_input_tokens_seen": 130356230, - "step": 6142 - }, - { - "epoch": 0.7386520771959358, - "grad_norm": 2.164061349621602, - "learning_rate": 6.745622021601167e-07, - "loss": 0.8491, - "num_input_tokens_seen": 130372975, - "step": 6143 - }, - { - "epoch": 0.7387723200865749, - "grad_norm": 2.9392690147388443, - "learning_rate": 6.739789555717954e-07, - "loss": 0.704, - "num_input_tokens_seen": 130389670, - "step": 6144 - }, - { - "epoch": 0.738892562977214, - "grad_norm": 2.438339528576635, - "learning_rate": 6.733959101398124e-07, - "loss": 0.7777, - "num_input_tokens_seen": 130407520, - "step": 6145 - }, - { - "epoch": 0.7390128058678531, - "grad_norm": 2.0102901008691823, - "learning_rate": 6.72813065952615e-07, - "loss": 0.8091, - "num_input_tokens_seen": 130425050, - "step": 6146 - }, - { - "epoch": 0.7391330487584922, - "grad_norm": 2.7105356116443384, - "learning_rate": 6.7223042309862e-07, - "loss": 0.7025, - "num_input_tokens_seen": 130444970, - "step": 6147 - }, - { - "epoch": 0.7392532916491312, - "grad_norm": 2.4451829151999136, - "learning_rate": 6.716479816662144e-07, - "loss": 0.7272, - "num_input_tokens_seen": 130466420, - "step": 6148 - }, - { - "epoch": 0.7393735345397703, - "grad_norm": 2.072380817722059, - "learning_rate": 6.710657417437537e-07, - "loss": 0.728, - "num_input_tokens_seen": 130485845, - "step": 6149 - }, - { - "epoch": 0.7394937774304094, - "grad_norm": 4.157813101906924, - "learning_rate": 6.704837034195628e-07, - "loss": 0.7874, - "num_input_tokens_seen": 130504030, - "step": 6150 - }, - { - "epoch": 0.7396140203210485, - "grad_norm": 1.8346196805104165, - "learning_rate": 6.699018667819376e-07, - "loss": 0.845, - "num_input_tokens_seen": 130523150, - "step": 6151 - }, - { - "epoch": 0.7397342632116876, - "grad_norm": 2.038622815045475, - "learning_rate": 6.693202319191415e-07, - "loss": 0.7203, - "num_input_tokens_seen": 130544605, - "step": 6152 - }, - { - "epoch": 0.7398545061023267, - "grad_norm": 1.9652806355905164, - "learning_rate": 6.687387989194084e-07, - "loss": 0.7332, - "num_input_tokens_seen": 130563840, - "step": 6153 - }, - { - "epoch": 0.7399747489929658, - "grad_norm": 2.2835269910615086, - "learning_rate": 6.681575678709404e-07, - "loss": 0.7958, - "num_input_tokens_seen": 130582250, - "step": 6154 - }, - { - "epoch": 0.7400949918836048, - "grad_norm": 2.1808278160421475, - "learning_rate": 6.67576538861911e-07, - "loss": 0.6989, - "num_input_tokens_seen": 130600545, - "step": 6155 - }, - { - "epoch": 0.740215234774244, - "grad_norm": 1.7902682349162393, - "learning_rate": 6.669957119804612e-07, - "loss": 0.8192, - "num_input_tokens_seen": 130621900, - "step": 6156 - }, - { - "epoch": 0.7403354776648831, - "grad_norm": 3.2241967800642453, - "learning_rate": 6.66415087314702e-07, - "loss": 0.7087, - "num_input_tokens_seen": 130636575, - "step": 6157 - }, - { - "epoch": 0.7404557205555221, - "grad_norm": 2.750336163648373, - "learning_rate": 6.658346649527133e-07, - "loss": 0.7246, - "num_input_tokens_seen": 130653745, - "step": 6158 - }, - { - "epoch": 0.7405759634461613, - "grad_norm": 2.0806408185318297, - "learning_rate": 6.652544449825457e-07, - "loss": 0.7493, - "num_input_tokens_seen": 130673720, - "step": 6159 - }, - { - "epoch": 0.7406962063368003, - "grad_norm": 1.7759439799619565, - "learning_rate": 6.646744274922176e-07, - "loss": 0.7578, - "num_input_tokens_seen": 130691885, - "step": 6160 - }, - { - "epoch": 0.7408164492274394, - "grad_norm": 6.522752526671609, - "learning_rate": 6.640946125697171e-07, - "loss": 0.7544, - "num_input_tokens_seen": 130709135, - "step": 6161 - }, - { - "epoch": 0.7409366921180786, - "grad_norm": 2.888736431926943, - "learning_rate": 6.635150003030017e-07, - "loss": 0.7567, - "num_input_tokens_seen": 130727380, - "step": 6162 - }, - { - "epoch": 0.7410569350087176, - "grad_norm": 2.613375665571984, - "learning_rate": 6.629355907799981e-07, - "loss": 0.8561, - "num_input_tokens_seen": 130746905, - "step": 6163 - }, - { - "epoch": 0.7411771778993567, - "grad_norm": 2.1444508926581087, - "learning_rate": 6.623563840886022e-07, - "loss": 0.6979, - "num_input_tokens_seen": 130767550, - "step": 6164 - }, - { - "epoch": 0.7412974207899958, - "grad_norm": 2.6413550402617556, - "learning_rate": 6.617773803166795e-07, - "loss": 0.6929, - "num_input_tokens_seen": 130785595, - "step": 6165 - }, - { - "epoch": 0.7414176636806349, - "grad_norm": 2.3734336601310226, - "learning_rate": 6.611985795520634e-07, - "loss": 0.8176, - "num_input_tokens_seen": 130803860, - "step": 6166 - }, - { - "epoch": 0.7415379065712739, - "grad_norm": 2.3865155461574923, - "learning_rate": 6.606199818825588e-07, - "loss": 0.7603, - "num_input_tokens_seen": 130824035, - "step": 6167 - }, - { - "epoch": 0.7416581494619131, - "grad_norm": 2.35215065475073, - "learning_rate": 6.600415873959384e-07, - "loss": 0.806, - "num_input_tokens_seen": 130841630, - "step": 6168 - }, - { - "epoch": 0.7417783923525522, - "grad_norm": 2.965403989764952, - "learning_rate": 6.594633961799437e-07, - "loss": 0.6441, - "num_input_tokens_seen": 130860390, - "step": 6169 - }, - { - "epoch": 0.7418986352431912, - "grad_norm": 2.230060175680007, - "learning_rate": 6.58885408322285e-07, - "loss": 0.8202, - "num_input_tokens_seen": 130879545, - "step": 6170 - }, - { - "epoch": 0.7420188781338304, - "grad_norm": 2.2890149871782937, - "learning_rate": 6.583076239106444e-07, - "loss": 0.8019, - "num_input_tokens_seen": 130897770, - "step": 6171 - }, - { - "epoch": 0.7421391210244694, - "grad_norm": 2.7357449577245387, - "learning_rate": 6.577300430326707e-07, - "loss": 0.7512, - "num_input_tokens_seen": 130912435, - "step": 6172 - }, - { - "epoch": 0.7422593639151085, - "grad_norm": 2.670546819823486, - "learning_rate": 6.571526657759821e-07, - "loss": 0.7135, - "num_input_tokens_seen": 130927895, - "step": 6173 - }, - { - "epoch": 0.7423796068057477, - "grad_norm": 3.842777133018692, - "learning_rate": 6.565754922281657e-07, - "loss": 0.7093, - "num_input_tokens_seen": 130949860, - "step": 6174 - }, - { - "epoch": 0.7424998496963867, - "grad_norm": 1.897159116202194, - "learning_rate": 6.559985224767801e-07, - "loss": 0.7776, - "num_input_tokens_seen": 130967455, - "step": 6175 - }, - { - "epoch": 0.7426200925870258, - "grad_norm": 2.7516521150344198, - "learning_rate": 6.554217566093496e-07, - "loss": 0.7465, - "num_input_tokens_seen": 130985430, - "step": 6176 - }, - { - "epoch": 0.7427403354776649, - "grad_norm": 3.0097296119077526, - "learning_rate": 6.548451947133698e-07, - "loss": 0.7837, - "num_input_tokens_seen": 131006100, - "step": 6177 - }, - { - "epoch": 0.742860578368304, - "grad_norm": 2.196136065511712, - "learning_rate": 6.542688368763034e-07, - "loss": 0.8008, - "num_input_tokens_seen": 131024225, - "step": 6178 - }, - { - "epoch": 0.742980821258943, - "grad_norm": 1.901052777838635, - "learning_rate": 6.536926831855854e-07, - "loss": 0.7697, - "num_input_tokens_seen": 131043110, - "step": 6179 - }, - { - "epoch": 0.7431010641495821, - "grad_norm": 2.746691240813948, - "learning_rate": 6.531167337286165e-07, - "loss": 0.7266, - "num_input_tokens_seen": 131062850, - "step": 6180 - }, - { - "epoch": 0.7432213070402213, - "grad_norm": 1.6885432823452455, - "learning_rate": 6.525409885927686e-07, - "loss": 0.7884, - "num_input_tokens_seen": 131083590, - "step": 6181 - }, - { - "epoch": 0.7433415499308603, - "grad_norm": 2.0787481087993265, - "learning_rate": 6.519654478653806e-07, - "loss": 0.8332, - "num_input_tokens_seen": 131101675, - "step": 6182 - }, - { - "epoch": 0.7434617928214994, - "grad_norm": 0.7801077334796087, - "learning_rate": 6.51390111633763e-07, - "loss": 0.5811, - "num_input_tokens_seen": 131166670, - "step": 6183 - }, - { - "epoch": 0.7435820357121385, - "grad_norm": 2.0190433369509146, - "learning_rate": 6.508149799851932e-07, - "loss": 0.7552, - "num_input_tokens_seen": 131188055, - "step": 6184 - }, - { - "epoch": 0.7437022786027776, - "grad_norm": 2.309625367798866, - "learning_rate": 6.502400530069183e-07, - "loss": 0.6165, - "num_input_tokens_seen": 131207660, - "step": 6185 - }, - { - "epoch": 0.7438225214934167, - "grad_norm": 2.1925091187398533, - "learning_rate": 6.496653307861535e-07, - "loss": 0.68, - "num_input_tokens_seen": 131228050, - "step": 6186 - }, - { - "epoch": 0.7439427643840558, - "grad_norm": 1.9996484675225485, - "learning_rate": 6.49090813410085e-07, - "loss": 0.6531, - "num_input_tokens_seen": 131246235, - "step": 6187 - }, - { - "epoch": 0.7440630072746949, - "grad_norm": 2.2901946076725483, - "learning_rate": 6.48516500965866e-07, - "loss": 0.6906, - "num_input_tokens_seen": 131265890, - "step": 6188 - }, - { - "epoch": 0.7441832501653339, - "grad_norm": 1.8128198889936369, - "learning_rate": 6.479423935406192e-07, - "loss": 0.8118, - "num_input_tokens_seen": 131285595, - "step": 6189 - }, - { - "epoch": 0.7443034930559731, - "grad_norm": 0.9215484257310398, - "learning_rate": 6.473684912214363e-07, - "loss": 0.7075, - "num_input_tokens_seen": 131348875, - "step": 6190 - }, - { - "epoch": 0.7444237359466122, - "grad_norm": 2.307173296223332, - "learning_rate": 6.467947940953778e-07, - "loss": 0.6909, - "num_input_tokens_seen": 131367120, - "step": 6191 - }, - { - "epoch": 0.7445439788372512, - "grad_norm": 2.478181516923358, - "learning_rate": 6.462213022494732e-07, - "loss": 0.7186, - "num_input_tokens_seen": 131386085, - "step": 6192 - }, - { - "epoch": 0.7446642217278904, - "grad_norm": 0.8337086010664749, - "learning_rate": 6.456480157707207e-07, - "loss": 0.6557, - "num_input_tokens_seen": 131450580, - "step": 6193 - }, - { - "epoch": 0.7447844646185294, - "grad_norm": 2.0336677824027034, - "learning_rate": 6.450749347460866e-07, - "loss": 0.8509, - "num_input_tokens_seen": 131467275, - "step": 6194 - }, - { - "epoch": 0.7449047075091685, - "grad_norm": 1.8449635820557584, - "learning_rate": 6.445020592625083e-07, - "loss": 0.7871, - "num_input_tokens_seen": 131487645, - "step": 6195 - }, - { - "epoch": 0.7450249503998077, - "grad_norm": 2.2208985712182034, - "learning_rate": 6.4392938940689e-07, - "loss": 0.8044, - "num_input_tokens_seen": 131502780, - "step": 6196 - }, - { - "epoch": 0.7451451932904467, - "grad_norm": 2.5486925102851847, - "learning_rate": 6.433569252661049e-07, - "loss": 0.7006, - "num_input_tokens_seen": 131520500, - "step": 6197 - }, - { - "epoch": 0.7452654361810858, - "grad_norm": 2.1374560083020366, - "learning_rate": 6.427846669269952e-07, - "loss": 0.7058, - "num_input_tokens_seen": 131537840, - "step": 6198 - }, - { - "epoch": 0.7453856790717249, - "grad_norm": 1.9680436428453851, - "learning_rate": 6.422126144763729e-07, - "loss": 0.818, - "num_input_tokens_seen": 131556950, - "step": 6199 - }, - { - "epoch": 0.745505921962364, - "grad_norm": 2.252747848797241, - "learning_rate": 6.416407680010174e-07, - "loss": 0.7718, - "num_input_tokens_seen": 131571030, - "step": 6200 - }, - { - "epoch": 0.745626164853003, - "grad_norm": 2.074550575069196, - "learning_rate": 6.410691275876774e-07, - "loss": 0.8076, - "num_input_tokens_seen": 131590170, - "step": 6201 - }, - { - "epoch": 0.7457464077436422, - "grad_norm": 7.644304659026054, - "learning_rate": 6.404976933230696e-07, - "loss": 0.7584, - "num_input_tokens_seen": 131606410, - "step": 6202 - }, - { - "epoch": 0.7458666506342813, - "grad_norm": 2.153671702211841, - "learning_rate": 6.399264652938813e-07, - "loss": 0.7226, - "num_input_tokens_seen": 131627035, - "step": 6203 - }, - { - "epoch": 0.7459868935249203, - "grad_norm": 2.1276632030606537, - "learning_rate": 6.393554435867672e-07, - "loss": 0.7374, - "num_input_tokens_seen": 131647605, - "step": 6204 - }, - { - "epoch": 0.7461071364155595, - "grad_norm": 2.5335659087954143, - "learning_rate": 6.387846282883502e-07, - "loss": 0.8255, - "num_input_tokens_seen": 131663855, - "step": 6205 - }, - { - "epoch": 0.7462273793061985, - "grad_norm": 2.443258329221693, - "learning_rate": 6.38214019485223e-07, - "loss": 0.7656, - "num_input_tokens_seen": 131682400, - "step": 6206 - }, - { - "epoch": 0.7463476221968376, - "grad_norm": 2.185481759928335, - "learning_rate": 6.376436172639461e-07, - "loss": 0.7106, - "num_input_tokens_seen": 131699965, - "step": 6207 - }, - { - "epoch": 0.7464678650874768, - "grad_norm": 3.725363092407717, - "learning_rate": 6.370734217110494e-07, - "loss": 0.6447, - "num_input_tokens_seen": 131718430, - "step": 6208 - }, - { - "epoch": 0.7465881079781158, - "grad_norm": 19.256440167348703, - "learning_rate": 6.36503432913031e-07, - "loss": 0.6384, - "num_input_tokens_seen": 131741295, - "step": 6209 - }, - { - "epoch": 0.7467083508687549, - "grad_norm": 1.9628870552078443, - "learning_rate": 6.359336509563569e-07, - "loss": 0.6761, - "num_input_tokens_seen": 131757035, - "step": 6210 - }, - { - "epoch": 0.7468285937593939, - "grad_norm": 2.071013534539995, - "learning_rate": 6.353640759274641e-07, - "loss": 0.8013, - "num_input_tokens_seen": 131775645, - "step": 6211 - }, - { - "epoch": 0.7469488366500331, - "grad_norm": 3.4086281004602874, - "learning_rate": 6.34794707912756e-07, - "loss": 0.7437, - "num_input_tokens_seen": 131793265, - "step": 6212 - }, - { - "epoch": 0.7470690795406721, - "grad_norm": 2.50271502462732, - "learning_rate": 6.342255469986053e-07, - "loss": 0.7654, - "num_input_tokens_seen": 131811730, - "step": 6213 - }, - { - "epoch": 0.7471893224313112, - "grad_norm": 2.0791887329503833, - "learning_rate": 6.336565932713527e-07, - "loss": 0.773, - "num_input_tokens_seen": 131830875, - "step": 6214 - }, - { - "epoch": 0.7473095653219504, - "grad_norm": 1.7651660965190452, - "learning_rate": 6.330878468173088e-07, - "loss": 0.7732, - "num_input_tokens_seen": 131850660, - "step": 6215 - }, - { - "epoch": 0.7474298082125894, - "grad_norm": 1.9421687644878305, - "learning_rate": 6.32519307722752e-07, - "loss": 0.7249, - "num_input_tokens_seen": 131868275, - "step": 6216 - }, - { - "epoch": 0.7475500511032285, - "grad_norm": 0.8144640220043486, - "learning_rate": 6.31950976073929e-07, - "loss": 0.5802, - "num_input_tokens_seen": 131922085, - "step": 6217 - }, - { - "epoch": 0.7476702939938676, - "grad_norm": 2.653064343602838, - "learning_rate": 6.31382851957055e-07, - "loss": 0.7967, - "num_input_tokens_seen": 131938625, - "step": 6218 - }, - { - "epoch": 0.7477905368845067, - "grad_norm": 10.286464837577718, - "learning_rate": 6.308149354583143e-07, - "loss": 0.7037, - "num_input_tokens_seen": 131957750, - "step": 6219 - }, - { - "epoch": 0.7479107797751458, - "grad_norm": 2.950586283588881, - "learning_rate": 6.302472266638592e-07, - "loss": 0.8139, - "num_input_tokens_seen": 131978010, - "step": 6220 - }, - { - "epoch": 0.7480310226657849, - "grad_norm": 2.1960507784738965, - "learning_rate": 6.296797256598107e-07, - "loss": 0.6991, - "num_input_tokens_seen": 131999210, - "step": 6221 - }, - { - "epoch": 0.748151265556424, - "grad_norm": 2.621399342220879, - "learning_rate": 6.291124325322576e-07, - "loss": 0.8008, - "num_input_tokens_seen": 132019055, - "step": 6222 - }, - { - "epoch": 0.748271508447063, - "grad_norm": 1.8982444736940969, - "learning_rate": 6.285453473672595e-07, - "loss": 0.619, - "num_input_tokens_seen": 132041345, - "step": 6223 - }, - { - "epoch": 0.7483917513377022, - "grad_norm": 4.290460144155665, - "learning_rate": 6.279784702508415e-07, - "loss": 0.7567, - "num_input_tokens_seen": 132061815, - "step": 6224 - }, - { - "epoch": 0.7485119942283412, - "grad_norm": 0.8330735577887192, - "learning_rate": 6.274118012689987e-07, - "loss": 0.6456, - "num_input_tokens_seen": 132123435, - "step": 6225 - }, - { - "epoch": 0.7486322371189803, - "grad_norm": 2.0693548885609068, - "learning_rate": 6.268453405076937e-07, - "loss": 0.6787, - "num_input_tokens_seen": 132145550, - "step": 6226 - }, - { - "epoch": 0.7487524800096195, - "grad_norm": 2.1435393605445463, - "learning_rate": 6.262790880528592e-07, - "loss": 0.8136, - "num_input_tokens_seen": 132162890, - "step": 6227 - }, - { - "epoch": 0.7488727229002585, - "grad_norm": 3.4252993367512374, - "learning_rate": 6.257130439903951e-07, - "loss": 0.7912, - "num_input_tokens_seen": 132179105, - "step": 6228 - }, - { - "epoch": 0.7489929657908976, - "grad_norm": 2.143155115115386, - "learning_rate": 6.251472084061695e-07, - "loss": 0.8011, - "num_input_tokens_seen": 132197745, - "step": 6229 - }, - { - "epoch": 0.7491132086815367, - "grad_norm": 2.17579804041867, - "learning_rate": 6.245815813860184e-07, - "loss": 0.8936, - "num_input_tokens_seen": 132212975, - "step": 6230 - }, - { - "epoch": 0.7492334515721758, - "grad_norm": 5.344181930879343, - "learning_rate": 6.240161630157487e-07, - "loss": 0.6909, - "num_input_tokens_seen": 132232050, - "step": 6231 - }, - { - "epoch": 0.7493536944628149, - "grad_norm": 2.563653592848581, - "learning_rate": 6.23450953381133e-07, - "loss": 0.6937, - "num_input_tokens_seen": 132249860, - "step": 6232 - }, - { - "epoch": 0.749473937353454, - "grad_norm": 2.14881737953873, - "learning_rate": 6.228859525679131e-07, - "loss": 0.6751, - "num_input_tokens_seen": 132263995, - "step": 6233 - }, - { - "epoch": 0.7495941802440931, - "grad_norm": 2.1536892318808394, - "learning_rate": 6.223211606617993e-07, - "loss": 0.7959, - "num_input_tokens_seen": 132282135, - "step": 6234 - }, - { - "epoch": 0.7497144231347321, - "grad_norm": 1.8020189379318385, - "learning_rate": 6.217565777484701e-07, - "loss": 0.8229, - "num_input_tokens_seen": 132300950, - "step": 6235 - }, - { - "epoch": 0.7498346660253713, - "grad_norm": 1.7648875950356748, - "learning_rate": 6.211922039135722e-07, - "loss": 0.7942, - "num_input_tokens_seen": 132320815, - "step": 6236 - }, - { - "epoch": 0.7499549089160104, - "grad_norm": 2.6101216125564037, - "learning_rate": 6.206280392427208e-07, - "loss": 0.7996, - "num_input_tokens_seen": 132340120, - "step": 6237 - }, - { - "epoch": 0.7500751518066494, - "grad_norm": 1.8213365546893499, - "learning_rate": 6.200640838214983e-07, - "loss": 0.7292, - "num_input_tokens_seen": 132362615, - "step": 6238 - }, - { - "epoch": 0.7501953946972886, - "grad_norm": 2.1930796733989286, - "learning_rate": 6.195003377354578e-07, - "loss": 0.6683, - "num_input_tokens_seen": 132381605, - "step": 6239 - }, - { - "epoch": 0.7503156375879276, - "grad_norm": 3.4603565266700507, - "learning_rate": 6.189368010701183e-07, - "loss": 0.728, - "num_input_tokens_seen": 132398385, - "step": 6240 - }, - { - "epoch": 0.7504358804785667, - "grad_norm": 2.0900273184119484, - "learning_rate": 6.183734739109683e-07, - "loss": 0.753, - "num_input_tokens_seen": 132415925, - "step": 6241 - }, - { - "epoch": 0.7505561233692057, - "grad_norm": 2.5381114127317614, - "learning_rate": 6.178103563434629e-07, - "loss": 0.6837, - "num_input_tokens_seen": 132434645, - "step": 6242 - }, - { - "epoch": 0.7506763662598449, - "grad_norm": 1.7457561623563378, - "learning_rate": 6.172474484530283e-07, - "loss": 0.8338, - "num_input_tokens_seen": 132453100, - "step": 6243 - }, - { - "epoch": 0.750796609150484, - "grad_norm": 1.9637201652485372, - "learning_rate": 6.166847503250563e-07, - "loss": 0.75, - "num_input_tokens_seen": 132475060, - "step": 6244 - }, - { - "epoch": 0.750916852041123, - "grad_norm": 3.1392941287431344, - "learning_rate": 6.161222620449078e-07, - "loss": 0.7938, - "num_input_tokens_seen": 132493555, - "step": 6245 - }, - { - "epoch": 0.7510370949317622, - "grad_norm": 4.0444749547274945, - "learning_rate": 6.155599836979111e-07, - "loss": 0.8016, - "num_input_tokens_seen": 132511960, - "step": 6246 - }, - { - "epoch": 0.7511573378224012, - "grad_norm": 2.18621966273293, - "learning_rate": 6.149979153693649e-07, - "loss": 0.8045, - "num_input_tokens_seen": 132528935, - "step": 6247 - }, - { - "epoch": 0.7512775807130403, - "grad_norm": 7.65615635466436, - "learning_rate": 6.144360571445337e-07, - "loss": 0.7565, - "num_input_tokens_seen": 132547800, - "step": 6248 - }, - { - "epoch": 0.7513978236036795, - "grad_norm": 2.1028147941652455, - "learning_rate": 6.138744091086509e-07, - "loss": 0.7912, - "num_input_tokens_seen": 132567105, - "step": 6249 - }, - { - "epoch": 0.7515180664943185, - "grad_norm": 9.293610789068884, - "learning_rate": 6.133129713469183e-07, - "loss": 0.7285, - "num_input_tokens_seen": 132586030, - "step": 6250 - }, - { - "epoch": 0.7516383093849576, - "grad_norm": 3.501417704553043, - "learning_rate": 6.127517439445053e-07, - "loss": 0.6323, - "num_input_tokens_seen": 132606595, - "step": 6251 - }, - { - "epoch": 0.7517585522755967, - "grad_norm": 2.2087797664612903, - "learning_rate": 6.121907269865498e-07, - "loss": 0.8223, - "num_input_tokens_seen": 132625805, - "step": 6252 - }, - { - "epoch": 0.7518787951662358, - "grad_norm": 0.9794511107462682, - "learning_rate": 6.116299205581577e-07, - "loss": 0.7298, - "num_input_tokens_seen": 132680355, - "step": 6253 - }, - { - "epoch": 0.7519990380568748, - "grad_norm": 12.222096153516329, - "learning_rate": 6.110693247444018e-07, - "loss": 0.6843, - "num_input_tokens_seen": 132701910, - "step": 6254 - }, - { - "epoch": 0.752119280947514, - "grad_norm": 2.6435225226564913, - "learning_rate": 6.105089396303258e-07, - "loss": 0.8152, - "num_input_tokens_seen": 132720020, - "step": 6255 - }, - { - "epoch": 0.7522395238381531, - "grad_norm": 2.543789548541477, - "learning_rate": 6.09948765300939e-07, - "loss": 0.753, - "num_input_tokens_seen": 132739085, - "step": 6256 - }, - { - "epoch": 0.7523597667287921, - "grad_norm": 2.436047192948676, - "learning_rate": 6.093888018412192e-07, - "loss": 0.83, - "num_input_tokens_seen": 132754995, - "step": 6257 - }, - { - "epoch": 0.7524800096194313, - "grad_norm": 0.7746759661345062, - "learning_rate": 6.088290493361119e-07, - "loss": 0.5791, - "num_input_tokens_seen": 132819600, - "step": 6258 - }, - { - "epoch": 0.7526002525100703, - "grad_norm": 3.3903081888929067, - "learning_rate": 6.082695078705322e-07, - "loss": 0.7157, - "num_input_tokens_seen": 132836800, - "step": 6259 - }, - { - "epoch": 0.7527204954007094, - "grad_norm": 5.823284895127585, - "learning_rate": 6.077101775293618e-07, - "loss": 0.6779, - "num_input_tokens_seen": 132855345, - "step": 6260 - }, - { - "epoch": 0.7528407382913486, - "grad_norm": 2.5486632210317373, - "learning_rate": 6.071510583974504e-07, - "loss": 0.8224, - "num_input_tokens_seen": 132870250, - "step": 6261 - }, - { - "epoch": 0.7529609811819876, - "grad_norm": 2.0739337043893973, - "learning_rate": 6.065921505596161e-07, - "loss": 0.7117, - "num_input_tokens_seen": 132888250, - "step": 6262 - }, - { - "epoch": 0.7530812240726267, - "grad_norm": 2.030532307697674, - "learning_rate": 6.060334541006445e-07, - "loss": 0.7671, - "num_input_tokens_seen": 132906465, - "step": 6263 - }, - { - "epoch": 0.7532014669632658, - "grad_norm": 1.5603893390424808, - "learning_rate": 6.054749691052896e-07, - "loss": 0.6772, - "num_input_tokens_seen": 132929175, - "step": 6264 - }, - { - "epoch": 0.7533217098539049, - "grad_norm": 2.6149667986905802, - "learning_rate": 6.049166956582732e-07, - "loss": 0.7295, - "num_input_tokens_seen": 132947160, - "step": 6265 - }, - { - "epoch": 0.753441952744544, - "grad_norm": 2.792615985038066, - "learning_rate": 6.043586338442841e-07, - "loss": 0.8612, - "num_input_tokens_seen": 132965935, - "step": 6266 - }, - { - "epoch": 0.7535621956351831, - "grad_norm": 1.4436900666320727, - "learning_rate": 6.038007837479815e-07, - "loss": 0.7231, - "num_input_tokens_seen": 132986760, - "step": 6267 - }, - { - "epoch": 0.7536824385258222, - "grad_norm": 2.3453243820403724, - "learning_rate": 6.032431454539897e-07, - "loss": 0.6303, - "num_input_tokens_seen": 133005325, - "step": 6268 - }, - { - "epoch": 0.7538026814164612, - "grad_norm": 2.2050019327520127, - "learning_rate": 6.026857190469022e-07, - "loss": 0.8037, - "num_input_tokens_seen": 133026800, - "step": 6269 - }, - { - "epoch": 0.7539229243071004, - "grad_norm": 2.9678922303506803, - "learning_rate": 6.021285046112794e-07, - "loss": 0.7417, - "num_input_tokens_seen": 133045640, - "step": 6270 - }, - { - "epoch": 0.7540431671977395, - "grad_norm": 2.350658345750166, - "learning_rate": 6.015715022316516e-07, - "loss": 0.7505, - "num_input_tokens_seen": 133063340, - "step": 6271 - }, - { - "epoch": 0.7541634100883785, - "grad_norm": 2.8055025198113523, - "learning_rate": 6.010147119925154e-07, - "loss": 0.7783, - "num_input_tokens_seen": 133080815, - "step": 6272 - }, - { - "epoch": 0.7542836529790176, - "grad_norm": 2.3274050714884793, - "learning_rate": 6.004581339783348e-07, - "loss": 0.6489, - "num_input_tokens_seen": 133098855, - "step": 6273 - }, - { - "epoch": 0.7544038958696567, - "grad_norm": 2.9049557388342926, - "learning_rate": 5.999017682735419e-07, - "loss": 0.6763, - "num_input_tokens_seen": 133114965, - "step": 6274 - }, - { - "epoch": 0.7545241387602958, - "grad_norm": 2.2010233390423077, - "learning_rate": 5.993456149625382e-07, - "loss": 0.6582, - "num_input_tokens_seen": 133135835, - "step": 6275 - }, - { - "epoch": 0.7546443816509348, - "grad_norm": 2.186178802353043, - "learning_rate": 5.987896741296909e-07, - "loss": 0.8092, - "num_input_tokens_seen": 133153295, - "step": 6276 - }, - { - "epoch": 0.754764624541574, - "grad_norm": 2.3978810453707293, - "learning_rate": 5.982339458593361e-07, - "loss": 0.7779, - "num_input_tokens_seen": 133172955, - "step": 6277 - }, - { - "epoch": 0.7548848674322131, - "grad_norm": 3.6616735672445806, - "learning_rate": 5.976784302357773e-07, - "loss": 0.8384, - "num_input_tokens_seen": 133193240, - "step": 6278 - }, - { - "epoch": 0.7550051103228521, - "grad_norm": 10.076436554867167, - "learning_rate": 5.971231273432855e-07, - "loss": 0.7224, - "num_input_tokens_seen": 133212445, - "step": 6279 - }, - { - "epoch": 0.7551253532134913, - "grad_norm": 0.839676214768804, - "learning_rate": 5.965680372661e-07, - "loss": 0.5816, - "num_input_tokens_seen": 133269730, - "step": 6280 - }, - { - "epoch": 0.7552455961041303, - "grad_norm": 1.9735119962533987, - "learning_rate": 5.960131600884273e-07, - "loss": 0.5609, - "num_input_tokens_seen": 133288720, - "step": 6281 - }, - { - "epoch": 0.7553658389947694, - "grad_norm": 2.4628861132618143, - "learning_rate": 5.954584958944413e-07, - "loss": 0.7513, - "num_input_tokens_seen": 133307105, - "step": 6282 - }, - { - "epoch": 0.7554860818854086, - "grad_norm": 2.63888668777687, - "learning_rate": 5.949040447682854e-07, - "loss": 0.8064, - "num_input_tokens_seen": 133326650, - "step": 6283 - }, - { - "epoch": 0.7556063247760476, - "grad_norm": 2.766594810246037, - "learning_rate": 5.943498067940686e-07, - "loss": 0.6852, - "num_input_tokens_seen": 133343395, - "step": 6284 - }, - { - "epoch": 0.7557265676666867, - "grad_norm": 1.899263174678764, - "learning_rate": 5.937957820558686e-07, - "loss": 0.8126, - "num_input_tokens_seen": 133362460, - "step": 6285 - }, - { - "epoch": 0.7558468105573258, - "grad_norm": 0.9006774994163477, - "learning_rate": 5.932419706377296e-07, - "loss": 0.6776, - "num_input_tokens_seen": 133420485, - "step": 6286 - }, - { - "epoch": 0.7559670534479649, - "grad_norm": 2.6403849656575433, - "learning_rate": 5.92688372623666e-07, - "loss": 0.7363, - "num_input_tokens_seen": 133438910, - "step": 6287 - }, - { - "epoch": 0.7560872963386039, - "grad_norm": 2.6468925376835033, - "learning_rate": 5.921349880976574e-07, - "loss": 0.7339, - "num_input_tokens_seen": 133456465, - "step": 6288 - }, - { - "epoch": 0.7562075392292431, - "grad_norm": 2.0771945384229156, - "learning_rate": 5.915818171436515e-07, - "loss": 0.8131, - "num_input_tokens_seen": 133475520, - "step": 6289 - }, - { - "epoch": 0.7563277821198822, - "grad_norm": 2.3145208987277868, - "learning_rate": 5.910288598455637e-07, - "loss": 0.7385, - "num_input_tokens_seen": 133494590, - "step": 6290 - }, - { - "epoch": 0.7564480250105212, - "grad_norm": 2.9164621711051555, - "learning_rate": 5.90476116287278e-07, - "loss": 0.7332, - "num_input_tokens_seen": 133511910, - "step": 6291 - }, - { - "epoch": 0.7565682679011604, - "grad_norm": 2.0452487514656696, - "learning_rate": 5.899235865526448e-07, - "loss": 0.6733, - "num_input_tokens_seen": 133530925, - "step": 6292 - }, - { - "epoch": 0.7566885107917994, - "grad_norm": 1.704670311944046, - "learning_rate": 5.893712707254825e-07, - "loss": 0.8086, - "num_input_tokens_seen": 133548105, - "step": 6293 - }, - { - "epoch": 0.7568087536824385, - "grad_norm": 6.10587203076784, - "learning_rate": 5.888191688895769e-07, - "loss": 0.652, - "num_input_tokens_seen": 133565085, - "step": 6294 - }, - { - "epoch": 0.7569289965730777, - "grad_norm": 2.4617329476836214, - "learning_rate": 5.882672811286813e-07, - "loss": 0.6164, - "num_input_tokens_seen": 133581085, - "step": 6295 - }, - { - "epoch": 0.7570492394637167, - "grad_norm": 2.1563886958390728, - "learning_rate": 5.877156075265166e-07, - "loss": 0.6989, - "num_input_tokens_seen": 133597070, - "step": 6296 - }, - { - "epoch": 0.7571694823543558, - "grad_norm": 2.975366519091935, - "learning_rate": 5.871641481667715e-07, - "loss": 0.6873, - "num_input_tokens_seen": 133611235, - "step": 6297 - }, - { - "epoch": 0.7572897252449949, - "grad_norm": 2.0131947287565217, - "learning_rate": 5.866129031331011e-07, - "loss": 0.8368, - "num_input_tokens_seen": 133630610, - "step": 6298 - }, - { - "epoch": 0.757409968135634, - "grad_norm": 3.063705082157713, - "learning_rate": 5.8606187250913e-07, - "loss": 0.8253, - "num_input_tokens_seen": 133648380, - "step": 6299 - }, - { - "epoch": 0.757530211026273, - "grad_norm": 2.2738609262671976, - "learning_rate": 5.855110563784488e-07, - "loss": 0.8367, - "num_input_tokens_seen": 133666635, - "step": 6300 - }, - { - "epoch": 0.7576504539169122, - "grad_norm": 2.06961783322287, - "learning_rate": 5.849604548246156e-07, - "loss": 0.6371, - "num_input_tokens_seen": 133687465, - "step": 6301 - }, - { - "epoch": 0.7577706968075513, - "grad_norm": 2.8338165550698586, - "learning_rate": 5.844100679311559e-07, - "loss": 0.7941, - "num_input_tokens_seen": 133706145, - "step": 6302 - }, - { - "epoch": 0.7578909396981903, - "grad_norm": 2.2453796282595073, - "learning_rate": 5.838598957815637e-07, - "loss": 0.7573, - "num_input_tokens_seen": 133723095, - "step": 6303 - }, - { - "epoch": 0.7580111825888295, - "grad_norm": 1.5239320188420045, - "learning_rate": 5.833099384592996e-07, - "loss": 0.8522, - "num_input_tokens_seen": 133743390, - "step": 6304 - }, - { - "epoch": 0.7581314254794685, - "grad_norm": 2.174260235563152, - "learning_rate": 5.827601960477913e-07, - "loss": 0.6993, - "num_input_tokens_seen": 133761035, - "step": 6305 - }, - { - "epoch": 0.7582516683701076, - "grad_norm": 2.0347380545057954, - "learning_rate": 5.822106686304344e-07, - "loss": 0.7045, - "num_input_tokens_seen": 133780045, - "step": 6306 - }, - { - "epoch": 0.7583719112607467, - "grad_norm": 2.2108613534897463, - "learning_rate": 5.816613562905919e-07, - "loss": 0.5712, - "num_input_tokens_seen": 133800950, - "step": 6307 - }, - { - "epoch": 0.7584921541513858, - "grad_norm": 1.6758909541225415, - "learning_rate": 5.81112259111594e-07, - "loss": 0.7057, - "num_input_tokens_seen": 133821655, - "step": 6308 - }, - { - "epoch": 0.7586123970420249, - "grad_norm": 3.8633948154619615, - "learning_rate": 5.805633771767382e-07, - "loss": 0.7164, - "num_input_tokens_seen": 133838770, - "step": 6309 - }, - { - "epoch": 0.7587326399326639, - "grad_norm": 2.185368817915525, - "learning_rate": 5.800147105692888e-07, - "loss": 0.7761, - "num_input_tokens_seen": 133858065, - "step": 6310 - }, - { - "epoch": 0.7588528828233031, - "grad_norm": 2.0287861015399997, - "learning_rate": 5.794662593724795e-07, - "loss": 0.7918, - "num_input_tokens_seen": 133876790, - "step": 6311 - }, - { - "epoch": 0.7589731257139422, - "grad_norm": 3.0625557041677363, - "learning_rate": 5.789180236695091e-07, - "loss": 0.748, - "num_input_tokens_seen": 133893365, - "step": 6312 - }, - { - "epoch": 0.7590933686045812, - "grad_norm": 2.0374945047089454, - "learning_rate": 5.783700035435446e-07, - "loss": 0.8472, - "num_input_tokens_seen": 133911840, - "step": 6313 - }, - { - "epoch": 0.7592136114952204, - "grad_norm": 2.161303217221468, - "learning_rate": 5.778221990777197e-07, - "loss": 0.8308, - "num_input_tokens_seen": 133929300, - "step": 6314 - }, - { - "epoch": 0.7593338543858594, - "grad_norm": 2.0704439596362607, - "learning_rate": 5.772746103551372e-07, - "loss": 0.8179, - "num_input_tokens_seen": 133944415, - "step": 6315 - }, - { - "epoch": 0.7594540972764985, - "grad_norm": 1.7474266966923675, - "learning_rate": 5.767272374588648e-07, - "loss": 0.7164, - "num_input_tokens_seen": 133965540, - "step": 6316 - }, - { - "epoch": 0.7595743401671377, - "grad_norm": 3.3065380836355547, - "learning_rate": 5.76180080471939e-07, - "loss": 0.7779, - "num_input_tokens_seen": 133988430, - "step": 6317 - }, - { - "epoch": 0.7596945830577767, - "grad_norm": 3.366626368727072, - "learning_rate": 5.756331394773623e-07, - "loss": 0.7111, - "num_input_tokens_seen": 134004365, - "step": 6318 - }, - { - "epoch": 0.7598148259484158, - "grad_norm": 2.30264668025299, - "learning_rate": 5.750864145581065e-07, - "loss": 0.7619, - "num_input_tokens_seen": 134023305, - "step": 6319 - }, - { - "epoch": 0.7599350688390549, - "grad_norm": 5.985573394851742, - "learning_rate": 5.745399057971085e-07, - "loss": 0.846, - "num_input_tokens_seen": 134044160, - "step": 6320 - }, - { - "epoch": 0.760055311729694, - "grad_norm": 2.2643468370251956, - "learning_rate": 5.739936132772738e-07, - "loss": 0.7541, - "num_input_tokens_seen": 134062445, - "step": 6321 - }, - { - "epoch": 0.760175554620333, - "grad_norm": 2.8697174241943224, - "learning_rate": 5.734475370814737e-07, - "loss": 0.7444, - "num_input_tokens_seen": 134081845, - "step": 6322 - }, - { - "epoch": 0.7602957975109722, - "grad_norm": 1.90973632003252, - "learning_rate": 5.729016772925483e-07, - "loss": 0.7714, - "num_input_tokens_seen": 134103140, - "step": 6323 - }, - { - "epoch": 0.7604160404016113, - "grad_norm": 1.7974974356915343, - "learning_rate": 5.723560339933038e-07, - "loss": 0.7011, - "num_input_tokens_seen": 134123195, - "step": 6324 - }, - { - "epoch": 0.7605362832922503, - "grad_norm": 2.1259658033756375, - "learning_rate": 5.718106072665136e-07, - "loss": 0.641, - "num_input_tokens_seen": 134141500, - "step": 6325 - }, - { - "epoch": 0.7606565261828895, - "grad_norm": 2.744882177130346, - "learning_rate": 5.712653971949184e-07, - "loss": 0.5986, - "num_input_tokens_seen": 134159340, - "step": 6326 - }, - { - "epoch": 0.7607767690735285, - "grad_norm": 2.6108161945061474, - "learning_rate": 5.707204038612268e-07, - "loss": 0.759, - "num_input_tokens_seen": 134176490, - "step": 6327 - }, - { - "epoch": 0.7608970119641676, - "grad_norm": 3.1723513692747405, - "learning_rate": 5.701756273481138e-07, - "loss": 0.7262, - "num_input_tokens_seen": 134193630, - "step": 6328 - }, - { - "epoch": 0.7610172548548068, - "grad_norm": 1.6257986294807119, - "learning_rate": 5.696310677382212e-07, - "loss": 0.7305, - "num_input_tokens_seen": 134214745, - "step": 6329 - }, - { - "epoch": 0.7611374977454458, - "grad_norm": 0.8682098760207265, - "learning_rate": 5.690867251141576e-07, - "loss": 0.6511, - "num_input_tokens_seen": 134281120, - "step": 6330 - }, - { - "epoch": 0.7612577406360849, - "grad_norm": 2.213534489430463, - "learning_rate": 5.685425995585009e-07, - "loss": 0.9084, - "num_input_tokens_seen": 134298765, - "step": 6331 - }, - { - "epoch": 0.761377983526724, - "grad_norm": 0.7863598053935742, - "learning_rate": 5.679986911537935e-07, - "loss": 0.6236, - "num_input_tokens_seen": 134366015, - "step": 6332 - }, - { - "epoch": 0.7614982264173631, - "grad_norm": 2.126246227801962, - "learning_rate": 5.674549999825462e-07, - "loss": 0.6739, - "num_input_tokens_seen": 134388550, - "step": 6333 - }, - { - "epoch": 0.7616184693080021, - "grad_norm": 1.0510224346764374, - "learning_rate": 5.669115261272359e-07, - "loss": 0.7738, - "num_input_tokens_seen": 134448590, - "step": 6334 - }, - { - "epoch": 0.7617387121986413, - "grad_norm": 2.3599441280967044, - "learning_rate": 5.663682696703081e-07, - "loss": 0.7248, - "num_input_tokens_seen": 134466575, - "step": 6335 - }, - { - "epoch": 0.7618589550892804, - "grad_norm": 2.11980395708863, - "learning_rate": 5.65825230694174e-07, - "loss": 0.8251, - "num_input_tokens_seen": 134485615, - "step": 6336 - }, - { - "epoch": 0.7619791979799194, - "grad_norm": 2.8284980776438275, - "learning_rate": 5.65282409281212e-07, - "loss": 0.758, - "num_input_tokens_seen": 134502800, - "step": 6337 - }, - { - "epoch": 0.7620994408705585, - "grad_norm": 2.8148622652435833, - "learning_rate": 5.64739805513768e-07, - "loss": 0.6917, - "num_input_tokens_seen": 134520065, - "step": 6338 - }, - { - "epoch": 0.7622196837611976, - "grad_norm": 0.8457363162198092, - "learning_rate": 5.641974194741541e-07, - "loss": 0.5855, - "num_input_tokens_seen": 134575470, - "step": 6339 - }, - { - "epoch": 0.7623399266518367, - "grad_norm": 0.7661169667163336, - "learning_rate": 5.636552512446502e-07, - "loss": 0.6509, - "num_input_tokens_seen": 134636245, - "step": 6340 - }, - { - "epoch": 0.7624601695424758, - "grad_norm": 2.08578809747483, - "learning_rate": 5.631133009075027e-07, - "loss": 0.7831, - "num_input_tokens_seen": 134655150, - "step": 6341 - }, - { - "epoch": 0.7625804124331149, - "grad_norm": 2.231950114295946, - "learning_rate": 5.625715685449242e-07, - "loss": 0.6905, - "num_input_tokens_seen": 134672975, - "step": 6342 - }, - { - "epoch": 0.762700655323754, - "grad_norm": 1.7613145502041365, - "learning_rate": 5.620300542390966e-07, - "loss": 0.7133, - "num_input_tokens_seen": 134693740, - "step": 6343 - }, - { - "epoch": 0.762820898214393, - "grad_norm": 2.313018382918317, - "learning_rate": 5.614887580721666e-07, - "loss": 0.8484, - "num_input_tokens_seen": 134713605, - "step": 6344 - }, - { - "epoch": 0.7629411411050322, - "grad_norm": 2.730684423706499, - "learning_rate": 5.609476801262481e-07, - "loss": 0.7398, - "num_input_tokens_seen": 134728185, - "step": 6345 - }, - { - "epoch": 0.7630613839956712, - "grad_norm": 3.195982214878335, - "learning_rate": 5.604068204834215e-07, - "loss": 0.6444, - "num_input_tokens_seen": 134744800, - "step": 6346 - }, - { - "epoch": 0.7631816268863103, - "grad_norm": 3.463030339719599, - "learning_rate": 5.598661792257367e-07, - "loss": 0.7472, - "num_input_tokens_seen": 134761565, - "step": 6347 - }, - { - "epoch": 0.7633018697769495, - "grad_norm": 2.1872474599821197, - "learning_rate": 5.593257564352071e-07, - "loss": 0.7634, - "num_input_tokens_seen": 134779725, - "step": 6348 - }, - { - "epoch": 0.7634221126675885, - "grad_norm": 1.6377286611500093, - "learning_rate": 5.58785552193815e-07, - "loss": 0.7506, - "num_input_tokens_seen": 134799690, - "step": 6349 - }, - { - "epoch": 0.7635423555582276, - "grad_norm": 2.1302826171642923, - "learning_rate": 5.582455665835086e-07, - "loss": 0.751, - "num_input_tokens_seen": 134819705, - "step": 6350 - }, - { - "epoch": 0.7636625984488667, - "grad_norm": 5.179631870952496, - "learning_rate": 5.577057996862036e-07, - "loss": 0.7292, - "num_input_tokens_seen": 134837050, - "step": 6351 - }, - { - "epoch": 0.7637828413395058, - "grad_norm": 2.4177652355882375, - "learning_rate": 5.571662515837818e-07, - "loss": 0.7536, - "num_input_tokens_seen": 134858730, - "step": 6352 - }, - { - "epoch": 0.7639030842301449, - "grad_norm": 2.0240989134851124, - "learning_rate": 5.566269223580926e-07, - "loss": 0.8352, - "num_input_tokens_seen": 134880160, - "step": 6353 - }, - { - "epoch": 0.764023327120784, - "grad_norm": 1.6718112131723575, - "learning_rate": 5.560878120909511e-07, - "loss": 0.7403, - "num_input_tokens_seen": 134902480, - "step": 6354 - }, - { - "epoch": 0.7641435700114231, - "grad_norm": 0.9038247306596882, - "learning_rate": 5.55548920864141e-07, - "loss": 0.6478, - "num_input_tokens_seen": 134962855, - "step": 6355 - }, - { - "epoch": 0.7642638129020621, - "grad_norm": 1.8227317918894237, - "learning_rate": 5.550102487594113e-07, - "loss": 0.7637, - "num_input_tokens_seen": 134981245, - "step": 6356 - }, - { - "epoch": 0.7643840557927013, - "grad_norm": 1.9256008769006938, - "learning_rate": 5.544717958584776e-07, - "loss": 0.7111, - "num_input_tokens_seen": 135001035, - "step": 6357 - }, - { - "epoch": 0.7645042986833404, - "grad_norm": 2.6707409407821396, - "learning_rate": 5.539335622430227e-07, - "loss": 0.8288, - "num_input_tokens_seen": 135019375, - "step": 6358 - }, - { - "epoch": 0.7646245415739794, - "grad_norm": 2.0996248617636364, - "learning_rate": 5.533955479946975e-07, - "loss": 0.7407, - "num_input_tokens_seen": 135037875, - "step": 6359 - }, - { - "epoch": 0.7647447844646186, - "grad_norm": 0.8931092337628109, - "learning_rate": 5.528577531951173e-07, - "loss": 0.6762, - "num_input_tokens_seen": 135098000, - "step": 6360 - }, - { - "epoch": 0.7648650273552576, - "grad_norm": 2.494854709978254, - "learning_rate": 5.523201779258653e-07, - "loss": 0.7418, - "num_input_tokens_seen": 135116695, - "step": 6361 - }, - { - "epoch": 0.7649852702458967, - "grad_norm": 1.989201457230547, - "learning_rate": 5.517828222684906e-07, - "loss": 0.8398, - "num_input_tokens_seen": 135137070, - "step": 6362 - }, - { - "epoch": 0.7651055131365359, - "grad_norm": 0.793777499821597, - "learning_rate": 5.512456863045109e-07, - "loss": 0.6047, - "num_input_tokens_seen": 135197480, - "step": 6363 - }, - { - "epoch": 0.7652257560271749, - "grad_norm": 2.2512515513123526, - "learning_rate": 5.507087701154089e-07, - "loss": 0.7335, - "num_input_tokens_seen": 135217120, - "step": 6364 - }, - { - "epoch": 0.765345998917814, - "grad_norm": 2.4449229420965257, - "learning_rate": 5.50172073782634e-07, - "loss": 0.7468, - "num_input_tokens_seen": 135234820, - "step": 6365 - }, - { - "epoch": 0.7654662418084531, - "grad_norm": 2.469374293426904, - "learning_rate": 5.49635597387603e-07, - "loss": 0.8621, - "num_input_tokens_seen": 135253795, - "step": 6366 - }, - { - "epoch": 0.7655864846990922, - "grad_norm": 1.9648030689397533, - "learning_rate": 5.490993410116984e-07, - "loss": 0.7087, - "num_input_tokens_seen": 135276505, - "step": 6367 - }, - { - "epoch": 0.7657067275897312, - "grad_norm": 1.9197839390277576, - "learning_rate": 5.485633047362704e-07, - "loss": 0.7009, - "num_input_tokens_seen": 135298230, - "step": 6368 - }, - { - "epoch": 0.7658269704803703, - "grad_norm": 2.154869834587019, - "learning_rate": 5.480274886426349e-07, - "loss": 0.7773, - "num_input_tokens_seen": 135314590, - "step": 6369 - }, - { - "epoch": 0.7659472133710095, - "grad_norm": 2.707946763944997, - "learning_rate": 5.474918928120744e-07, - "loss": 0.7772, - "num_input_tokens_seen": 135330805, - "step": 6370 - }, - { - "epoch": 0.7660674562616485, - "grad_norm": 1.8624321786494242, - "learning_rate": 5.469565173258392e-07, - "loss": 0.8722, - "num_input_tokens_seen": 135349040, - "step": 6371 - }, - { - "epoch": 0.7661876991522876, - "grad_norm": 1.9315928820242891, - "learning_rate": 5.464213622651454e-07, - "loss": 0.6391, - "num_input_tokens_seen": 135366575, - "step": 6372 - }, - { - "epoch": 0.7663079420429267, - "grad_norm": 2.131534258398, - "learning_rate": 5.458864277111753e-07, - "loss": 0.8352, - "num_input_tokens_seen": 135384130, - "step": 6373 - }, - { - "epoch": 0.7664281849335658, - "grad_norm": 2.7329928404504593, - "learning_rate": 5.453517137450769e-07, - "loss": 0.6867, - "num_input_tokens_seen": 135400425, - "step": 6374 - }, - { - "epoch": 0.7665484278242048, - "grad_norm": 1.9557421048202888, - "learning_rate": 5.448172204479677e-07, - "loss": 0.7515, - "num_input_tokens_seen": 135419425, - "step": 6375 - }, - { - "epoch": 0.766668670714844, - "grad_norm": 2.1165919264466795, - "learning_rate": 5.442829479009294e-07, - "loss": 0.7421, - "num_input_tokens_seen": 135437925, - "step": 6376 - }, - { - "epoch": 0.7667889136054831, - "grad_norm": 2.7468194789400147, - "learning_rate": 5.437488961850103e-07, - "loss": 0.7134, - "num_input_tokens_seen": 135457445, - "step": 6377 - }, - { - "epoch": 0.7669091564961221, - "grad_norm": 2.3414434543991227, - "learning_rate": 5.432150653812253e-07, - "loss": 0.7527, - "num_input_tokens_seen": 135477200, - "step": 6378 - }, - { - "epoch": 0.7670293993867613, - "grad_norm": 2.6579669881075523, - "learning_rate": 5.42681455570557e-07, - "loss": 0.8282, - "num_input_tokens_seen": 135493450, - "step": 6379 - }, - { - "epoch": 0.7671496422774003, - "grad_norm": 2.179744610404988, - "learning_rate": 5.421480668339533e-07, - "loss": 0.6442, - "num_input_tokens_seen": 135512415, - "step": 6380 - }, - { - "epoch": 0.7672698851680394, - "grad_norm": 2.212817525249641, - "learning_rate": 5.416148992523289e-07, - "loss": 0.7572, - "num_input_tokens_seen": 135530710, - "step": 6381 - }, - { - "epoch": 0.7673901280586786, - "grad_norm": 1.8596636510581415, - "learning_rate": 5.410819529065644e-07, - "loss": 0.7829, - "num_input_tokens_seen": 135548385, - "step": 6382 - }, - { - "epoch": 0.7675103709493176, - "grad_norm": 2.8472899830798735, - "learning_rate": 5.405492278775079e-07, - "loss": 0.6477, - "num_input_tokens_seen": 135567885, - "step": 6383 - }, - { - "epoch": 0.7676306138399567, - "grad_norm": 4.507056836227131, - "learning_rate": 5.400167242459732e-07, - "loss": 0.7938, - "num_input_tokens_seen": 135586565, - "step": 6384 - }, - { - "epoch": 0.7677508567305958, - "grad_norm": 1.7234290338446134, - "learning_rate": 5.394844420927405e-07, - "loss": 0.7978, - "num_input_tokens_seen": 135605895, - "step": 6385 - }, - { - "epoch": 0.7678710996212349, - "grad_norm": 2.263809891326768, - "learning_rate": 5.389523814985562e-07, - "loss": 0.7201, - "num_input_tokens_seen": 135625035, - "step": 6386 - }, - { - "epoch": 0.767991342511874, - "grad_norm": 8.227022186249583, - "learning_rate": 5.384205425441344e-07, - "loss": 0.754, - "num_input_tokens_seen": 135645665, - "step": 6387 - }, - { - "epoch": 0.7681115854025131, - "grad_norm": 2.6188421603062584, - "learning_rate": 5.378889253101542e-07, - "loss": 0.8362, - "num_input_tokens_seen": 135665940, - "step": 6388 - }, - { - "epoch": 0.7682318282931522, - "grad_norm": 1.7735668920390248, - "learning_rate": 5.373575298772617e-07, - "loss": 0.7951, - "num_input_tokens_seen": 135684780, - "step": 6389 - }, - { - "epoch": 0.7683520711837912, - "grad_norm": 0.7374682030310578, - "learning_rate": 5.368263563260682e-07, - "loss": 0.62, - "num_input_tokens_seen": 135749635, - "step": 6390 - }, - { - "epoch": 0.7684723140744304, - "grad_norm": 2.557667045463059, - "learning_rate": 5.362954047371537e-07, - "loss": 0.6383, - "num_input_tokens_seen": 135768465, - "step": 6391 - }, - { - "epoch": 0.7685925569650695, - "grad_norm": 2.499681258431957, - "learning_rate": 5.357646751910627e-07, - "loss": 0.7163, - "num_input_tokens_seen": 135789365, - "step": 6392 - }, - { - "epoch": 0.7687127998557085, - "grad_norm": 4.272197991257629, - "learning_rate": 5.352341677683061e-07, - "loss": 0.7948, - "num_input_tokens_seen": 135810385, - "step": 6393 - }, - { - "epoch": 0.7688330427463477, - "grad_norm": 1.995636035652429, - "learning_rate": 5.347038825493617e-07, - "loss": 0.7821, - "num_input_tokens_seen": 135831635, - "step": 6394 - }, - { - "epoch": 0.7689532856369867, - "grad_norm": 2.6701079900926112, - "learning_rate": 5.341738196146732e-07, - "loss": 0.6731, - "num_input_tokens_seen": 135849700, - "step": 6395 - }, - { - "epoch": 0.7690735285276258, - "grad_norm": 2.459553700301667, - "learning_rate": 5.33643979044651e-07, - "loss": 0.728, - "num_input_tokens_seen": 135868520, - "step": 6396 - }, - { - "epoch": 0.769193771418265, - "grad_norm": 2.0732176520505803, - "learning_rate": 5.331143609196711e-07, - "loss": 0.6224, - "num_input_tokens_seen": 135892055, - "step": 6397 - }, - { - "epoch": 0.769314014308904, - "grad_norm": 1.976923347515186, - "learning_rate": 5.325849653200758e-07, - "loss": 0.7653, - "num_input_tokens_seen": 135915725, - "step": 6398 - }, - { - "epoch": 0.7694342571995431, - "grad_norm": 2.6583830368322254, - "learning_rate": 5.32055792326175e-07, - "loss": 0.7563, - "num_input_tokens_seen": 135933870, - "step": 6399 - }, - { - "epoch": 0.7695545000901821, - "grad_norm": 3.6193365256209304, - "learning_rate": 5.315268420182437e-07, - "loss": 0.7229, - "num_input_tokens_seen": 135952265, - "step": 6400 - }, - { - "epoch": 0.7696747429808213, - "grad_norm": 2.0388274747217854, - "learning_rate": 5.309981144765225e-07, - "loss": 0.7599, - "num_input_tokens_seen": 135972130, - "step": 6401 - }, - { - "epoch": 0.7697949858714603, - "grad_norm": 2.6418384952674465, - "learning_rate": 5.304696097812191e-07, - "loss": 0.7477, - "num_input_tokens_seen": 135988450, - "step": 6402 - }, - { - "epoch": 0.7699152287620994, - "grad_norm": 3.8519158979226367, - "learning_rate": 5.299413280125078e-07, - "loss": 0.5921, - "num_input_tokens_seen": 136006480, - "step": 6403 - }, - { - "epoch": 0.7700354716527386, - "grad_norm": 5.576130817088323, - "learning_rate": 5.294132692505284e-07, - "loss": 0.7258, - "num_input_tokens_seen": 136024610, - "step": 6404 - }, - { - "epoch": 0.7701557145433776, - "grad_norm": 2.350712528257476, - "learning_rate": 5.288854335753868e-07, - "loss": 0.7841, - "num_input_tokens_seen": 136042590, - "step": 6405 - }, - { - "epoch": 0.7702759574340167, - "grad_norm": 3.4448411809848025, - "learning_rate": 5.283578210671545e-07, - "loss": 0.7575, - "num_input_tokens_seen": 136064550, - "step": 6406 - }, - { - "epoch": 0.7703962003246558, - "grad_norm": 2.393596155989619, - "learning_rate": 5.278304318058713e-07, - "loss": 0.7614, - "num_input_tokens_seen": 136082125, - "step": 6407 - }, - { - "epoch": 0.7705164432152949, - "grad_norm": 2.2142736632796027, - "learning_rate": 5.273032658715411e-07, - "loss": 0.7915, - "num_input_tokens_seen": 136104655, - "step": 6408 - }, - { - "epoch": 0.7706366861059339, - "grad_norm": 6.339771914321342, - "learning_rate": 5.267763233441347e-07, - "loss": 0.7664, - "num_input_tokens_seen": 136125005, - "step": 6409 - }, - { - "epoch": 0.7707569289965731, - "grad_norm": 2.4807597346937857, - "learning_rate": 5.262496043035885e-07, - "loss": 0.694, - "num_input_tokens_seen": 136143230, - "step": 6410 - }, - { - "epoch": 0.7708771718872122, - "grad_norm": 2.373775723891977, - "learning_rate": 5.257231088298057e-07, - "loss": 0.7752, - "num_input_tokens_seen": 136161360, - "step": 6411 - }, - { - "epoch": 0.7709974147778512, - "grad_norm": 1.2244398579784608, - "learning_rate": 5.25196837002655e-07, - "loss": 0.5723, - "num_input_tokens_seen": 136220790, - "step": 6412 - }, - { - "epoch": 0.7711176576684904, - "grad_norm": 2.117101016430545, - "learning_rate": 5.246707889019715e-07, - "loss": 0.6871, - "num_input_tokens_seen": 136243600, - "step": 6413 - }, - { - "epoch": 0.7712379005591294, - "grad_norm": 2.4156496445622158, - "learning_rate": 5.241449646075557e-07, - "loss": 0.681, - "num_input_tokens_seen": 136266545, - "step": 6414 - }, - { - "epoch": 0.7713581434497685, - "grad_norm": 5.062791395610434, - "learning_rate": 5.236193641991762e-07, - "loss": 0.7196, - "num_input_tokens_seen": 136284195, - "step": 6415 - }, - { - "epoch": 0.7714783863404077, - "grad_norm": 3.5130192356481333, - "learning_rate": 5.23093987756565e-07, - "loss": 0.7024, - "num_input_tokens_seen": 136302610, - "step": 6416 - }, - { - "epoch": 0.7715986292310467, - "grad_norm": 1.9611568210320338, - "learning_rate": 5.225688353594217e-07, - "loss": 0.7558, - "num_input_tokens_seen": 136321960, - "step": 6417 - }, - { - "epoch": 0.7717188721216858, - "grad_norm": 3.1634505213160145, - "learning_rate": 5.220439070874108e-07, - "loss": 0.7761, - "num_input_tokens_seen": 136340920, - "step": 6418 - }, - { - "epoch": 0.7718391150123249, - "grad_norm": 2.0177819571696576, - "learning_rate": 5.215192030201645e-07, - "loss": 0.7116, - "num_input_tokens_seen": 136361630, - "step": 6419 - }, - { - "epoch": 0.771959357902964, - "grad_norm": 3.428402878299086, - "learning_rate": 5.209947232372798e-07, - "loss": 0.8588, - "num_input_tokens_seen": 136378840, - "step": 6420 - }, - { - "epoch": 0.772079600793603, - "grad_norm": 1.8664875569644048, - "learning_rate": 5.204704678183196e-07, - "loss": 0.8009, - "num_input_tokens_seen": 136397295, - "step": 6421 - }, - { - "epoch": 0.7721998436842422, - "grad_norm": 2.2075968706803404, - "learning_rate": 5.199464368428124e-07, - "loss": 0.8419, - "num_input_tokens_seen": 136414145, - "step": 6422 - }, - { - "epoch": 0.7723200865748813, - "grad_norm": 1.910598148910355, - "learning_rate": 5.194226303902546e-07, - "loss": 0.68, - "num_input_tokens_seen": 136433600, - "step": 6423 - }, - { - "epoch": 0.7724403294655203, - "grad_norm": 1.8156191521669256, - "learning_rate": 5.188990485401066e-07, - "loss": 0.706, - "num_input_tokens_seen": 136452525, - "step": 6424 - }, - { - "epoch": 0.7725605723561595, - "grad_norm": 2.325674322472472, - "learning_rate": 5.183756913717958e-07, - "loss": 0.8531, - "num_input_tokens_seen": 136472020, - "step": 6425 - }, - { - "epoch": 0.7726808152467985, - "grad_norm": 3.2641462093122073, - "learning_rate": 5.178525589647136e-07, - "loss": 0.7309, - "num_input_tokens_seen": 136493380, - "step": 6426 - }, - { - "epoch": 0.7728010581374376, - "grad_norm": 2.232113012897507, - "learning_rate": 5.173296513982201e-07, - "loss": 0.7813, - "num_input_tokens_seen": 136511625, - "step": 6427 - }, - { - "epoch": 0.7729213010280768, - "grad_norm": 3.3984708156592336, - "learning_rate": 5.168069687516398e-07, - "loss": 0.6484, - "num_input_tokens_seen": 136531115, - "step": 6428 - }, - { - "epoch": 0.7730415439187158, - "grad_norm": 2.461511298269615, - "learning_rate": 5.16284511104263e-07, - "loss": 0.7127, - "num_input_tokens_seen": 136549970, - "step": 6429 - }, - { - "epoch": 0.7731617868093549, - "grad_norm": 3.394391373177479, - "learning_rate": 5.157622785353457e-07, - "loss": 0.8005, - "num_input_tokens_seen": 136567805, - "step": 6430 - }, - { - "epoch": 0.7732820296999939, - "grad_norm": 0.6800467868587783, - "learning_rate": 5.152402711241113e-07, - "loss": 0.6218, - "num_input_tokens_seen": 136635430, - "step": 6431 - }, - { - "epoch": 0.7734022725906331, - "grad_norm": 2.0045139502992817, - "learning_rate": 5.147184889497471e-07, - "loss": 0.8312, - "num_input_tokens_seen": 136654620, - "step": 6432 - }, - { - "epoch": 0.7735225154812722, - "grad_norm": 3.5323197025817774, - "learning_rate": 5.141969320914072e-07, - "loss": 0.7902, - "num_input_tokens_seen": 136671845, - "step": 6433 - }, - { - "epoch": 0.7736427583719112, - "grad_norm": 3.749421956043653, - "learning_rate": 5.136756006282108e-07, - "loss": 0.6299, - "num_input_tokens_seen": 136690230, - "step": 6434 - }, - { - "epoch": 0.7737630012625504, - "grad_norm": 2.612260793205545, - "learning_rate": 5.131544946392446e-07, - "loss": 0.8417, - "num_input_tokens_seen": 136705230, - "step": 6435 - }, - { - "epoch": 0.7738832441531894, - "grad_norm": 3.074551949162788, - "learning_rate": 5.126336142035592e-07, - "loss": 0.6384, - "num_input_tokens_seen": 136724985, - "step": 6436 - }, - { - "epoch": 0.7740034870438285, - "grad_norm": 3.385516497942366, - "learning_rate": 5.121129594001721e-07, - "loss": 0.7123, - "num_input_tokens_seen": 136738970, - "step": 6437 - }, - { - "epoch": 0.7741237299344677, - "grad_norm": 1.9370530758292988, - "learning_rate": 5.115925303080661e-07, - "loss": 0.8106, - "num_input_tokens_seen": 136758400, - "step": 6438 - }, - { - "epoch": 0.7742439728251067, - "grad_norm": 3.7438483385105057, - "learning_rate": 5.110723270061899e-07, - "loss": 0.7855, - "num_input_tokens_seen": 136774610, - "step": 6439 - }, - { - "epoch": 0.7743642157157458, - "grad_norm": 2.4063009868142498, - "learning_rate": 5.105523495734576e-07, - "loss": 0.7914, - "num_input_tokens_seen": 136791730, - "step": 6440 - }, - { - "epoch": 0.7744844586063849, - "grad_norm": 1.5599379362955605, - "learning_rate": 5.100325980887499e-07, - "loss": 0.736, - "num_input_tokens_seen": 136811375, - "step": 6441 - }, - { - "epoch": 0.774604701497024, - "grad_norm": 2.175839337697373, - "learning_rate": 5.095130726309116e-07, - "loss": 0.8241, - "num_input_tokens_seen": 136831270, - "step": 6442 - }, - { - "epoch": 0.774724944387663, - "grad_norm": 0.8999989931838549, - "learning_rate": 5.089937732787559e-07, - "loss": 0.6675, - "num_input_tokens_seen": 136895550, - "step": 6443 - }, - { - "epoch": 0.7748451872783022, - "grad_norm": 4.730448172191009, - "learning_rate": 5.084747001110592e-07, - "loss": 0.6623, - "num_input_tokens_seen": 136914895, - "step": 6444 - }, - { - "epoch": 0.7749654301689413, - "grad_norm": 1.9356166607084275, - "learning_rate": 5.079558532065646e-07, - "loss": 0.6898, - "num_input_tokens_seen": 136939320, - "step": 6445 - }, - { - "epoch": 0.7750856730595803, - "grad_norm": 1.955323697211826, - "learning_rate": 5.074372326439802e-07, - "loss": 0.7077, - "num_input_tokens_seen": 136962050, - "step": 6446 - }, - { - "epoch": 0.7752059159502195, - "grad_norm": 3.216235087068157, - "learning_rate": 5.069188385019814e-07, - "loss": 0.7354, - "num_input_tokens_seen": 136979470, - "step": 6447 - }, - { - "epoch": 0.7753261588408585, - "grad_norm": 3.7768500726745438, - "learning_rate": 5.064006708592077e-07, - "loss": 0.6157, - "num_input_tokens_seen": 136995435, - "step": 6448 - }, - { - "epoch": 0.7754464017314976, - "grad_norm": 2.8616665406475312, - "learning_rate": 5.058827297942647e-07, - "loss": 0.7502, - "num_input_tokens_seen": 137010260, - "step": 6449 - }, - { - "epoch": 0.7755666446221368, - "grad_norm": 4.600530760313983, - "learning_rate": 5.053650153857229e-07, - "loss": 0.7375, - "num_input_tokens_seen": 137028990, - "step": 6450 - }, - { - "epoch": 0.7756868875127758, - "grad_norm": 1.755189198874425, - "learning_rate": 5.048475277121207e-07, - "loss": 0.6943, - "num_input_tokens_seen": 137045925, - "step": 6451 - }, - { - "epoch": 0.7758071304034149, - "grad_norm": 1.9175797372736918, - "learning_rate": 5.043302668519598e-07, - "loss": 0.7641, - "num_input_tokens_seen": 137064980, - "step": 6452 - }, - { - "epoch": 0.775927373294054, - "grad_norm": 1.9843691088440378, - "learning_rate": 5.038132328837079e-07, - "loss": 0.7188, - "num_input_tokens_seen": 137083090, - "step": 6453 - }, - { - "epoch": 0.7760476161846931, - "grad_norm": 2.069665213275956, - "learning_rate": 5.032964258857993e-07, - "loss": 0.7323, - "num_input_tokens_seen": 137102905, - "step": 6454 - }, - { - "epoch": 0.7761678590753321, - "grad_norm": 2.7698051160249983, - "learning_rate": 5.027798459366329e-07, - "loss": 0.6818, - "num_input_tokens_seen": 137127990, - "step": 6455 - }, - { - "epoch": 0.7762881019659713, - "grad_norm": 3.2233197082646234, - "learning_rate": 5.02263493114573e-07, - "loss": 0.6316, - "num_input_tokens_seen": 137149505, - "step": 6456 - }, - { - "epoch": 0.7764083448566104, - "grad_norm": 2.847564099744299, - "learning_rate": 5.017473674979509e-07, - "loss": 0.7634, - "num_input_tokens_seen": 137165250, - "step": 6457 - }, - { - "epoch": 0.7765285877472494, - "grad_norm": 0.7789420496969789, - "learning_rate": 5.01231469165061e-07, - "loss": 0.5974, - "num_input_tokens_seen": 137220795, - "step": 6458 - }, - { - "epoch": 0.7766488306378886, - "grad_norm": 1.892357678647014, - "learning_rate": 5.007157981941663e-07, - "loss": 0.6145, - "num_input_tokens_seen": 137285875, - "step": 6459 - }, - { - "epoch": 0.7767690735285276, - "grad_norm": 0.9138976902236747, - "learning_rate": 5.002003546634928e-07, - "loss": 0.6994, - "num_input_tokens_seen": 137341695, - "step": 6460 - }, - { - "epoch": 0.7768893164191667, - "grad_norm": 2.6393848820023265, - "learning_rate": 4.996851386512331e-07, - "loss": 0.7559, - "num_input_tokens_seen": 137360120, - "step": 6461 - }, - { - "epoch": 0.7770095593098058, - "grad_norm": 1.7820880241764918, - "learning_rate": 4.991701502355444e-07, - "loss": 0.8253, - "num_input_tokens_seen": 137380305, - "step": 6462 - }, - { - "epoch": 0.7771298022004449, - "grad_norm": 1.9623330895479028, - "learning_rate": 4.986553894945511e-07, - "loss": 0.7586, - "num_input_tokens_seen": 137401235, - "step": 6463 - }, - { - "epoch": 0.777250045091084, - "grad_norm": 3.4448093715462997, - "learning_rate": 4.981408565063416e-07, - "loss": 0.8614, - "num_input_tokens_seen": 137420900, - "step": 6464 - }, - { - "epoch": 0.777370287981723, - "grad_norm": 2.429690082460131, - "learning_rate": 4.976265513489701e-07, - "loss": 0.7537, - "num_input_tokens_seen": 137440590, - "step": 6465 - }, - { - "epoch": 0.7774905308723622, - "grad_norm": 2.7243379708507773, - "learning_rate": 4.971124741004558e-07, - "loss": 0.8043, - "num_input_tokens_seen": 137459310, - "step": 6466 - }, - { - "epoch": 0.7776107737630013, - "grad_norm": 2.0105311463703166, - "learning_rate": 4.965986248387846e-07, - "loss": 0.7571, - "num_input_tokens_seen": 137477345, - "step": 6467 - }, - { - "epoch": 0.7777310166536403, - "grad_norm": 1.6472463248806992, - "learning_rate": 4.960850036419073e-07, - "loss": 0.7627, - "num_input_tokens_seen": 137496165, - "step": 6468 - }, - { - "epoch": 0.7778512595442795, - "grad_norm": 2.2132686495040432, - "learning_rate": 4.955716105877386e-07, - "loss": 0.7906, - "num_input_tokens_seen": 137514655, - "step": 6469 - }, - { - "epoch": 0.7779715024349185, - "grad_norm": 2.0253479672148753, - "learning_rate": 4.950584457541598e-07, - "loss": 0.8271, - "num_input_tokens_seen": 137532840, - "step": 6470 - }, - { - "epoch": 0.7780917453255576, - "grad_norm": 1.6716101950465727, - "learning_rate": 4.945455092190187e-07, - "loss": 0.8174, - "num_input_tokens_seen": 137553815, - "step": 6471 - }, - { - "epoch": 0.7782119882161967, - "grad_norm": 0.73251137181082, - "learning_rate": 4.940328010601271e-07, - "loss": 0.5756, - "num_input_tokens_seen": 137618450, - "step": 6472 - }, - { - "epoch": 0.7783322311068358, - "grad_norm": 2.4365344690004163, - "learning_rate": 4.935203213552621e-07, - "loss": 0.7533, - "num_input_tokens_seen": 137641910, - "step": 6473 - }, - { - "epoch": 0.7784524739974749, - "grad_norm": 2.28594232703087, - "learning_rate": 4.930080701821662e-07, - "loss": 0.6544, - "num_input_tokens_seen": 137659095, - "step": 6474 - }, - { - "epoch": 0.778572716888114, - "grad_norm": 2.0915009232905843, - "learning_rate": 4.92496047618548e-07, - "loss": 0.767, - "num_input_tokens_seen": 137678575, - "step": 6475 - }, - { - "epoch": 0.7786929597787531, - "grad_norm": 2.6397487718854613, - "learning_rate": 4.919842537420811e-07, - "loss": 0.7682, - "num_input_tokens_seen": 137695410, - "step": 6476 - }, - { - "epoch": 0.7788132026693921, - "grad_norm": 3.4215923177787007, - "learning_rate": 4.91472688630404e-07, - "loss": 0.7938, - "num_input_tokens_seen": 137715870, - "step": 6477 - }, - { - "epoch": 0.7789334455600313, - "grad_norm": 2.6457797796889, - "learning_rate": 4.909613523611198e-07, - "loss": 0.7383, - "num_input_tokens_seen": 137732470, - "step": 6478 - }, - { - "epoch": 0.7790536884506704, - "grad_norm": 2.1519768105567856, - "learning_rate": 4.904502450117991e-07, - "loss": 0.743, - "num_input_tokens_seen": 137753150, - "step": 6479 - }, - { - "epoch": 0.7791739313413094, - "grad_norm": 2.4090559515328316, - "learning_rate": 4.899393666599762e-07, - "loss": 0.7253, - "num_input_tokens_seen": 137769445, - "step": 6480 - }, - { - "epoch": 0.7792941742319486, - "grad_norm": 4.288459063556391, - "learning_rate": 4.894287173831506e-07, - "loss": 0.7148, - "num_input_tokens_seen": 137785125, - "step": 6481 - }, - { - "epoch": 0.7794144171225876, - "grad_norm": 2.774863982885005, - "learning_rate": 4.889182972587877e-07, - "loss": 0.8375, - "num_input_tokens_seen": 137804140, - "step": 6482 - }, - { - "epoch": 0.7795346600132267, - "grad_norm": 2.317646733257671, - "learning_rate": 4.884081063643177e-07, - "loss": 0.6648, - "num_input_tokens_seen": 137822520, - "step": 6483 - }, - { - "epoch": 0.7796549029038659, - "grad_norm": 0.9363650201554515, - "learning_rate": 4.87898144777136e-07, - "loss": 0.5688, - "num_input_tokens_seen": 137876620, - "step": 6484 - }, - { - "epoch": 0.7797751457945049, - "grad_norm": 2.3436783865556774, - "learning_rate": 4.873884125746035e-07, - "loss": 0.7271, - "num_input_tokens_seen": 137898015, - "step": 6485 - }, - { - "epoch": 0.779895388685144, - "grad_norm": 2.882342492659769, - "learning_rate": 4.868789098340456e-07, - "loss": 0.7233, - "num_input_tokens_seen": 137915640, - "step": 6486 - }, - { - "epoch": 0.7800156315757831, - "grad_norm": 2.9574250572369576, - "learning_rate": 4.863696366327543e-07, - "loss": 0.7103, - "num_input_tokens_seen": 137934530, - "step": 6487 - }, - { - "epoch": 0.7801358744664222, - "grad_norm": 2.7056854074565324, - "learning_rate": 4.85860593047986e-07, - "loss": 0.7758, - "num_input_tokens_seen": 137954315, - "step": 6488 - }, - { - "epoch": 0.7802561173570612, - "grad_norm": 2.175799088930615, - "learning_rate": 4.853517791569617e-07, - "loss": 0.7405, - "num_input_tokens_seen": 137976215, - "step": 6489 - }, - { - "epoch": 0.7803763602477004, - "grad_norm": 2.070546437842019, - "learning_rate": 4.848431950368676e-07, - "loss": 0.6547, - "num_input_tokens_seen": 137998495, - "step": 6490 - }, - { - "epoch": 0.7804966031383395, - "grad_norm": 0.7628411031412166, - "learning_rate": 4.843348407648569e-07, - "loss": 0.589, - "num_input_tokens_seen": 138059495, - "step": 6491 - }, - { - "epoch": 0.7806168460289785, - "grad_norm": 2.1230722837911347, - "learning_rate": 4.838267164180457e-07, - "loss": 0.8174, - "num_input_tokens_seen": 138074885, - "step": 6492 - }, - { - "epoch": 0.7807370889196176, - "grad_norm": 2.12148655330612, - "learning_rate": 4.833188220735161e-07, - "loss": 0.8411, - "num_input_tokens_seen": 138094275, - "step": 6493 - }, - { - "epoch": 0.7808573318102567, - "grad_norm": 2.114751918650088, - "learning_rate": 4.828111578083147e-07, - "loss": 0.7372, - "num_input_tokens_seen": 138110900, - "step": 6494 - }, - { - "epoch": 0.7809775747008958, - "grad_norm": 3.0055485461883515, - "learning_rate": 4.823037236994549e-07, - "loss": 0.8025, - "num_input_tokens_seen": 138128785, - "step": 6495 - }, - { - "epoch": 0.7810978175915348, - "grad_norm": 0.7794904402319447, - "learning_rate": 4.817965198239136e-07, - "loss": 0.5907, - "num_input_tokens_seen": 138194965, - "step": 6496 - }, - { - "epoch": 0.781218060482174, - "grad_norm": 3.105727893500193, - "learning_rate": 4.812895462586331e-07, - "loss": 0.7316, - "num_input_tokens_seen": 138212510, - "step": 6497 - }, - { - "epoch": 0.7813383033728131, - "grad_norm": 3.0005052159454064, - "learning_rate": 4.807828030805207e-07, - "loss": 0.8173, - "num_input_tokens_seen": 138231220, - "step": 6498 - }, - { - "epoch": 0.7814585462634521, - "grad_norm": 2.256648918402157, - "learning_rate": 4.802762903664495e-07, - "loss": 0.6722, - "num_input_tokens_seen": 138250120, - "step": 6499 - }, - { - "epoch": 0.7815787891540913, - "grad_norm": 6.7909059596204955, - "learning_rate": 4.797700081932565e-07, - "loss": 0.7252, - "num_input_tokens_seen": 138267705, - "step": 6500 - }, - { - "epoch": 0.7816990320447303, - "grad_norm": 2.8290493567344797, - "learning_rate": 4.792639566377448e-07, - "loss": 0.8132, - "num_input_tokens_seen": 138284835, - "step": 6501 - }, - { - "epoch": 0.7818192749353694, - "grad_norm": 2.002130291538481, - "learning_rate": 4.78758135776681e-07, - "loss": 0.7754, - "num_input_tokens_seen": 138304410, - "step": 6502 - }, - { - "epoch": 0.7819395178260086, - "grad_norm": 4.537871945859692, - "learning_rate": 4.782525456867989e-07, - "loss": 0.7837, - "num_input_tokens_seen": 138322985, - "step": 6503 - }, - { - "epoch": 0.7820597607166476, - "grad_norm": 1.8152758483097473, - "learning_rate": 4.777471864447959e-07, - "loss": 0.8226, - "num_input_tokens_seen": 138343445, - "step": 6504 - }, - { - "epoch": 0.7821800036072867, - "grad_norm": 2.288650847117772, - "learning_rate": 4.772420581273344e-07, - "loss": 0.7967, - "num_input_tokens_seen": 138360650, - "step": 6505 - }, - { - "epoch": 0.7823002464979258, - "grad_norm": 2.4116869185501844, - "learning_rate": 4.7673716081104134e-07, - "loss": 0.7636, - "num_input_tokens_seen": 138380545, - "step": 6506 - }, - { - "epoch": 0.7824204893885649, - "grad_norm": 1.8313960262361968, - "learning_rate": 4.762324945725102e-07, - "loss": 0.8379, - "num_input_tokens_seen": 138399710, - "step": 6507 - }, - { - "epoch": 0.782540732279204, - "grad_norm": 1.969528082994351, - "learning_rate": 4.7572805948829844e-07, - "loss": 0.7502, - "num_input_tokens_seen": 138419690, - "step": 6508 - }, - { - "epoch": 0.7826609751698431, - "grad_norm": 1.8777736120112787, - "learning_rate": 4.7522385563492795e-07, - "loss": 0.7043, - "num_input_tokens_seen": 138439710, - "step": 6509 - }, - { - "epoch": 0.7827812180604822, - "grad_norm": 2.557912300760641, - "learning_rate": 4.747198830888857e-07, - "loss": 0.694, - "num_input_tokens_seen": 138459300, - "step": 6510 - }, - { - "epoch": 0.7829014609511212, - "grad_norm": 2.1996694476284353, - "learning_rate": 4.742161419266255e-07, - "loss": 0.6777, - "num_input_tokens_seen": 138478180, - "step": 6511 - }, - { - "epoch": 0.7830217038417604, - "grad_norm": 2.5795612568071102, - "learning_rate": 4.7371263222456304e-07, - "loss": 0.6497, - "num_input_tokens_seen": 138495220, - "step": 6512 - }, - { - "epoch": 0.7831419467323995, - "grad_norm": 0.8287779299546597, - "learning_rate": 4.732093540590807e-07, - "loss": 0.6469, - "num_input_tokens_seen": 138555810, - "step": 6513 - }, - { - "epoch": 0.7832621896230385, - "grad_norm": 3.1322762495802983, - "learning_rate": 4.7270630750652475e-07, - "loss": 0.8185, - "num_input_tokens_seen": 138571485, - "step": 6514 - }, - { - "epoch": 0.7833824325136777, - "grad_norm": 1.750614157913882, - "learning_rate": 4.7220349264320815e-07, - "loss": 0.8038, - "num_input_tokens_seen": 138590290, - "step": 6515 - }, - { - "epoch": 0.7835026754043167, - "grad_norm": 0.7989667014940743, - "learning_rate": 4.71700909545407e-07, - "loss": 0.5939, - "num_input_tokens_seen": 138652955, - "step": 6516 - }, - { - "epoch": 0.7836229182949558, - "grad_norm": 1.9881447994755221, - "learning_rate": 4.711985582893627e-07, - "loss": 0.7616, - "num_input_tokens_seen": 138671195, - "step": 6517 - }, - { - "epoch": 0.783743161185595, - "grad_norm": 2.002830156145519, - "learning_rate": 4.706964389512811e-07, - "loss": 0.7131, - "num_input_tokens_seen": 138690950, - "step": 6518 - }, - { - "epoch": 0.783863404076234, - "grad_norm": 2.367181348621602, - "learning_rate": 4.701945516073345e-07, - "loss": 0.8669, - "num_input_tokens_seen": 138708145, - "step": 6519 - }, - { - "epoch": 0.7839836469668731, - "grad_norm": 2.5491382204757387, - "learning_rate": 4.696928963336577e-07, - "loss": 0.7486, - "num_input_tokens_seen": 138727295, - "step": 6520 - }, - { - "epoch": 0.7841038898575122, - "grad_norm": 0.8887021278182728, - "learning_rate": 4.6919147320635224e-07, - "loss": 0.6364, - "num_input_tokens_seen": 138789725, - "step": 6521 - }, - { - "epoch": 0.7842241327481513, - "grad_norm": 5.195074971803579, - "learning_rate": 4.6869028230148223e-07, - "loss": 0.7239, - "num_input_tokens_seen": 138807240, - "step": 6522 - }, - { - "epoch": 0.7843443756387903, - "grad_norm": 3.9206470905707484, - "learning_rate": 4.6818932369507957e-07, - "loss": 0.6044, - "num_input_tokens_seen": 138826460, - "step": 6523 - }, - { - "epoch": 0.7844646185294295, - "grad_norm": 2.5691218680073744, - "learning_rate": 4.676885974631386e-07, - "loss": 0.8762, - "num_input_tokens_seen": 138844540, - "step": 6524 - }, - { - "epoch": 0.7845848614200686, - "grad_norm": 2.053408858133002, - "learning_rate": 4.67188103681619e-07, - "loss": 0.7984, - "num_input_tokens_seen": 138864045, - "step": 6525 - }, - { - "epoch": 0.7847051043107076, - "grad_norm": 2.2931376213854793, - "learning_rate": 4.666878424264453e-07, - "loss": 0.6857, - "num_input_tokens_seen": 138883720, - "step": 6526 - }, - { - "epoch": 0.7848253472013467, - "grad_norm": 1.8381895291923052, - "learning_rate": 4.661878137735069e-07, - "loss": 0.7316, - "num_input_tokens_seen": 138901630, - "step": 6527 - }, - { - "epoch": 0.7849455900919858, - "grad_norm": 5.045677826744305, - "learning_rate": 4.656880177986577e-07, - "loss": 0.7439, - "num_input_tokens_seen": 138919895, - "step": 6528 - }, - { - "epoch": 0.7850658329826249, - "grad_norm": 2.22738545499811, - "learning_rate": 4.6518845457771607e-07, - "loss": 0.8044, - "num_input_tokens_seen": 138938475, - "step": 6529 - }, - { - "epoch": 0.7851860758732639, - "grad_norm": 2.391111716310641, - "learning_rate": 4.646891241864652e-07, - "loss": 0.7918, - "num_input_tokens_seen": 138956760, - "step": 6530 - }, - { - "epoch": 0.7853063187639031, - "grad_norm": 2.1300471992083487, - "learning_rate": 4.6419002670065397e-07, - "loss": 0.7291, - "num_input_tokens_seen": 138976060, - "step": 6531 - }, - { - "epoch": 0.7854265616545422, - "grad_norm": 2.310930386919058, - "learning_rate": 4.6369116219599445e-07, - "loss": 0.8458, - "num_input_tokens_seen": 138991765, - "step": 6532 - }, - { - "epoch": 0.7855468045451812, - "grad_norm": 1.9163906904286707, - "learning_rate": 4.631925307481643e-07, - "loss": 0.7889, - "num_input_tokens_seen": 139011300, - "step": 6533 - }, - { - "epoch": 0.7856670474358204, - "grad_norm": 2.633796967505484, - "learning_rate": 4.6269413243280464e-07, - "loss": 0.7434, - "num_input_tokens_seen": 139030440, - "step": 6534 - }, - { - "epoch": 0.7857872903264594, - "grad_norm": 2.9064898087318687, - "learning_rate": 4.621959673255236e-07, - "loss": 0.7443, - "num_input_tokens_seen": 139046460, - "step": 6535 - }, - { - "epoch": 0.7859075332170985, - "grad_norm": 2.5458334201319612, - "learning_rate": 4.6169803550189135e-07, - "loss": 0.8992, - "num_input_tokens_seen": 139061875, - "step": 6536 - }, - { - "epoch": 0.7860277761077377, - "grad_norm": 9.78434268023342, - "learning_rate": 4.6120033703744424e-07, - "loss": 0.7599, - "num_input_tokens_seen": 139080490, - "step": 6537 - }, - { - "epoch": 0.7861480189983767, - "grad_norm": 2.2762288060598403, - "learning_rate": 4.6070287200768177e-07, - "loss": 0.778, - "num_input_tokens_seen": 139096890, - "step": 6538 - }, - { - "epoch": 0.7862682618890158, - "grad_norm": 2.0139993938300944, - "learning_rate": 4.602056404880703e-07, - "loss": 0.7266, - "num_input_tokens_seen": 139114285, - "step": 6539 - }, - { - "epoch": 0.7863885047796549, - "grad_norm": 5.9096370463733034, - "learning_rate": 4.5970864255403883e-07, - "loss": 0.716, - "num_input_tokens_seen": 139135530, - "step": 6540 - }, - { - "epoch": 0.786508747670294, - "grad_norm": 2.607097106719047, - "learning_rate": 4.59211878280982e-07, - "loss": 0.8151, - "num_input_tokens_seen": 139154765, - "step": 6541 - }, - { - "epoch": 0.786628990560933, - "grad_norm": 2.378131763862302, - "learning_rate": 4.587153477442578e-07, - "loss": 0.7007, - "num_input_tokens_seen": 139170800, - "step": 6542 - }, - { - "epoch": 0.7867492334515722, - "grad_norm": 2.462117859171312, - "learning_rate": 4.582190510191899e-07, - "loss": 0.8153, - "num_input_tokens_seen": 139189180, - "step": 6543 - }, - { - "epoch": 0.7868694763422113, - "grad_norm": 2.4289066476263925, - "learning_rate": 4.5772298818106625e-07, - "loss": 0.858, - "num_input_tokens_seen": 139204690, - "step": 6544 - }, - { - "epoch": 0.7869897192328503, - "grad_norm": 4.065258907706046, - "learning_rate": 4.572271593051389e-07, - "loss": 0.7228, - "num_input_tokens_seen": 139221765, - "step": 6545 - }, - { - "epoch": 0.7871099621234895, - "grad_norm": 1.684615657524531, - "learning_rate": 4.567315644666245e-07, - "loss": 0.767, - "num_input_tokens_seen": 139240280, - "step": 6546 - }, - { - "epoch": 0.7872302050141285, - "grad_norm": 2.2279816301929496, - "learning_rate": 4.5623620374070507e-07, - "loss": 0.8383, - "num_input_tokens_seen": 139259315, - "step": 6547 - }, - { - "epoch": 0.7873504479047676, - "grad_norm": 0.8115200289333083, - "learning_rate": 4.557410772025263e-07, - "loss": 0.6282, - "num_input_tokens_seen": 139320985, - "step": 6548 - }, - { - "epoch": 0.7874706907954068, - "grad_norm": 18.92609819888793, - "learning_rate": 4.5524618492719803e-07, - "loss": 0.655, - "num_input_tokens_seen": 139339925, - "step": 6549 - }, - { - "epoch": 0.7875909336860458, - "grad_norm": 1.46626843560371, - "learning_rate": 4.54751526989795e-07, - "loss": 0.7739, - "num_input_tokens_seen": 139361485, - "step": 6550 - }, - { - "epoch": 0.7877111765766849, - "grad_norm": 2.56102401660103, - "learning_rate": 4.5425710346535706e-07, - "loss": 0.7876, - "num_input_tokens_seen": 139379150, - "step": 6551 - }, - { - "epoch": 0.787831419467324, - "grad_norm": 2.3511993593764, - "learning_rate": 4.537629144288877e-07, - "loss": 0.8102, - "num_input_tokens_seen": 139396325, - "step": 6552 - }, - { - "epoch": 0.7879516623579631, - "grad_norm": 2.761978460632437, - "learning_rate": 4.5326895995535477e-07, - "loss": 0.7415, - "num_input_tokens_seen": 139414945, - "step": 6553 - }, - { - "epoch": 0.7880719052486022, - "grad_norm": 3.343338701952212, - "learning_rate": 4.527752401196907e-07, - "loss": 0.8386, - "num_input_tokens_seen": 139432680, - "step": 6554 - }, - { - "epoch": 0.7881921481392413, - "grad_norm": 2.5455675086628125, - "learning_rate": 4.5228175499679254e-07, - "loss": 0.6679, - "num_input_tokens_seen": 139451985, - "step": 6555 - }, - { - "epoch": 0.7883123910298804, - "grad_norm": 0.8657004409253339, - "learning_rate": 4.5178850466152174e-07, - "loss": 0.5633, - "num_input_tokens_seen": 139510535, - "step": 6556 - }, - { - "epoch": 0.7884326339205194, - "grad_norm": 2.1726706978780914, - "learning_rate": 4.5129548918870377e-07, - "loss": 0.8123, - "num_input_tokens_seen": 139528555, - "step": 6557 - }, - { - "epoch": 0.7885528768111585, - "grad_norm": 2.6732285107988605, - "learning_rate": 4.5080270865312806e-07, - "loss": 0.832, - "num_input_tokens_seen": 139545470, - "step": 6558 - }, - { - "epoch": 0.7886731197017977, - "grad_norm": 3.4994461583746768, - "learning_rate": 4.5031016312955027e-07, - "loss": 0.7032, - "num_input_tokens_seen": 139563505, - "step": 6559 - }, - { - "epoch": 0.7887933625924367, - "grad_norm": 2.164115294854905, - "learning_rate": 4.498178526926886e-07, - "loss": 0.7444, - "num_input_tokens_seen": 139584090, - "step": 6560 - }, - { - "epoch": 0.7889136054830758, - "grad_norm": 2.5859696583032643, - "learning_rate": 4.4932577741722635e-07, - "loss": 0.7263, - "num_input_tokens_seen": 139602340, - "step": 6561 - }, - { - "epoch": 0.7890338483737149, - "grad_norm": 1.7693188400056055, - "learning_rate": 4.4883393737780985e-07, - "loss": 0.7453, - "num_input_tokens_seen": 139623010, - "step": 6562 - }, - { - "epoch": 0.789154091264354, - "grad_norm": 4.134475029366514, - "learning_rate": 4.4834233264905254e-07, - "loss": 0.7756, - "num_input_tokens_seen": 139639745, - "step": 6563 - }, - { - "epoch": 0.789274334154993, - "grad_norm": 3.8503971787461686, - "learning_rate": 4.478509633055294e-07, - "loss": 0.7165, - "num_input_tokens_seen": 139657175, - "step": 6564 - }, - { - "epoch": 0.7893945770456322, - "grad_norm": 3.925838407472922, - "learning_rate": 4.473598294217813e-07, - "loss": 0.8013, - "num_input_tokens_seen": 139672320, - "step": 6565 - }, - { - "epoch": 0.7895148199362713, - "grad_norm": 2.168090397666778, - "learning_rate": 4.4686893107231196e-07, - "loss": 0.7197, - "num_input_tokens_seen": 139689855, - "step": 6566 - }, - { - "epoch": 0.7896350628269103, - "grad_norm": 2.532593047945066, - "learning_rate": 4.463782683315913e-07, - "loss": 0.7759, - "num_input_tokens_seen": 139708580, - "step": 6567 - }, - { - "epoch": 0.7897553057175495, - "grad_norm": 2.233097236943236, - "learning_rate": 4.458878412740523e-07, - "loss": 0.7193, - "num_input_tokens_seen": 139727080, - "step": 6568 - }, - { - "epoch": 0.7898755486081885, - "grad_norm": 3.611853276789595, - "learning_rate": 4.453976499740919e-07, - "loss": 0.7702, - "num_input_tokens_seen": 139744445, - "step": 6569 - }, - { - "epoch": 0.7899957914988276, - "grad_norm": 3.8846004019161797, - "learning_rate": 4.4490769450607215e-07, - "loss": 0.7772, - "num_input_tokens_seen": 139761790, - "step": 6570 - }, - { - "epoch": 0.7901160343894668, - "grad_norm": 3.077231986599135, - "learning_rate": 4.4441797494431845e-07, - "loss": 0.7345, - "num_input_tokens_seen": 139783315, - "step": 6571 - }, - { - "epoch": 0.7902362772801058, - "grad_norm": 4.375585990050049, - "learning_rate": 4.439284913631214e-07, - "loss": 0.7807, - "num_input_tokens_seen": 139800245, - "step": 6572 - }, - { - "epoch": 0.7903565201707449, - "grad_norm": 2.7973099914915642, - "learning_rate": 4.434392438367347e-07, - "loss": 0.8303, - "num_input_tokens_seen": 139819390, - "step": 6573 - }, - { - "epoch": 0.790476763061384, - "grad_norm": 2.166016007312907, - "learning_rate": 4.4295023243937677e-07, - "loss": 0.7337, - "num_input_tokens_seen": 139839315, - "step": 6574 - }, - { - "epoch": 0.7905970059520231, - "grad_norm": 1.9267302561393633, - "learning_rate": 4.4246145724523123e-07, - "loss": 0.8028, - "num_input_tokens_seen": 139856780, - "step": 6575 - }, - { - "epoch": 0.7907172488426621, - "grad_norm": 2.833445551831673, - "learning_rate": 4.41972918328444e-07, - "loss": 0.7606, - "num_input_tokens_seen": 139873935, - "step": 6576 - }, - { - "epoch": 0.7908374917333013, - "grad_norm": 2.921887831434381, - "learning_rate": 4.4148461576312646e-07, - "loss": 0.7776, - "num_input_tokens_seen": 139893320, - "step": 6577 - }, - { - "epoch": 0.7909577346239404, - "grad_norm": 1.5517627113205803, - "learning_rate": 4.4099654962335274e-07, - "loss": 0.7404, - "num_input_tokens_seen": 139913490, - "step": 6578 - }, - { - "epoch": 0.7910779775145794, - "grad_norm": 1.8679197766264184, - "learning_rate": 4.405087199831636e-07, - "loss": 0.7367, - "num_input_tokens_seen": 139933450, - "step": 6579 - }, - { - "epoch": 0.7911982204052186, - "grad_norm": 10.883326488497756, - "learning_rate": 4.400211269165619e-07, - "loss": 0.6714, - "num_input_tokens_seen": 139949625, - "step": 6580 - }, - { - "epoch": 0.7913184632958576, - "grad_norm": 1.6273304937248816, - "learning_rate": 4.3953377049751463e-07, - "loss": 0.7656, - "num_input_tokens_seen": 139969770, - "step": 6581 - }, - { - "epoch": 0.7914387061864967, - "grad_norm": 2.8756865821185653, - "learning_rate": 4.390466507999533e-07, - "loss": 0.7708, - "num_input_tokens_seen": 139985240, - "step": 6582 - }, - { - "epoch": 0.7915589490771359, - "grad_norm": 2.4951737358532795, - "learning_rate": 4.385597678977744e-07, - "loss": 0.7607, - "num_input_tokens_seen": 140003795, - "step": 6583 - }, - { - "epoch": 0.7916791919677749, - "grad_norm": 3.6922783720166055, - "learning_rate": 4.3807312186483726e-07, - "loss": 0.7416, - "num_input_tokens_seen": 140024235, - "step": 6584 - }, - { - "epoch": 0.791799434858414, - "grad_norm": 2.1978319902726327, - "learning_rate": 4.375867127749655e-07, - "loss": 0.7759, - "num_input_tokens_seen": 140042230, - "step": 6585 - }, - { - "epoch": 0.7919196777490531, - "grad_norm": 2.469862047310355, - "learning_rate": 4.3710054070194744e-07, - "loss": 0.6755, - "num_input_tokens_seen": 140061645, - "step": 6586 - }, - { - "epoch": 0.7920399206396922, - "grad_norm": 3.935687673705815, - "learning_rate": 4.3661460571953455e-07, - "loss": 0.6613, - "num_input_tokens_seen": 140078100, - "step": 6587 - }, - { - "epoch": 0.7921601635303313, - "grad_norm": 1.7208626897339487, - "learning_rate": 4.36128907901443e-07, - "loss": 0.6739, - "num_input_tokens_seen": 140097415, - "step": 6588 - }, - { - "epoch": 0.7922804064209703, - "grad_norm": 3.0157235650548704, - "learning_rate": 4.356434473213526e-07, - "loss": 0.7123, - "num_input_tokens_seen": 140114585, - "step": 6589 - }, - { - "epoch": 0.7924006493116095, - "grad_norm": 2.38162014564134, - "learning_rate": 4.351582240529068e-07, - "loss": 0.7847, - "num_input_tokens_seen": 140135135, - "step": 6590 - }, - { - "epoch": 0.7925208922022485, - "grad_norm": 0.7069074921835033, - "learning_rate": 4.346732381697149e-07, - "loss": 0.6098, - "num_input_tokens_seen": 140198985, - "step": 6591 - }, - { - "epoch": 0.7926411350928876, - "grad_norm": 1.8791145206138866, - "learning_rate": 4.3418848974534825e-07, - "loss": 0.8117, - "num_input_tokens_seen": 140215645, - "step": 6592 - }, - { - "epoch": 0.7927613779835267, - "grad_norm": 1.7709507319081672, - "learning_rate": 4.3370397885334276e-07, - "loss": 0.6802, - "num_input_tokens_seen": 140235995, - "step": 6593 - }, - { - "epoch": 0.7928816208741658, - "grad_norm": 2.191721012237967, - "learning_rate": 4.3321970556719777e-07, - "loss": 0.7479, - "num_input_tokens_seen": 140254010, - "step": 6594 - }, - { - "epoch": 0.7930018637648049, - "grad_norm": 3.232595447172109, - "learning_rate": 4.3273566996037814e-07, - "loss": 0.7112, - "num_input_tokens_seen": 140270425, - "step": 6595 - }, - { - "epoch": 0.793122106655444, - "grad_norm": 2.2419566080581093, - "learning_rate": 4.322518721063113e-07, - "loss": 0.792, - "num_input_tokens_seen": 140288695, - "step": 6596 - }, - { - "epoch": 0.7932423495460831, - "grad_norm": 2.0667869479864116, - "learning_rate": 4.3176831207838906e-07, - "loss": 0.6923, - "num_input_tokens_seen": 140311825, - "step": 6597 - }, - { - "epoch": 0.7933625924367221, - "grad_norm": 1.967228453792871, - "learning_rate": 4.3128498994996685e-07, - "loss": 0.7486, - "num_input_tokens_seen": 140331020, - "step": 6598 - }, - { - "epoch": 0.7934828353273613, - "grad_norm": 2.2617734828760048, - "learning_rate": 4.308019057943646e-07, - "loss": 0.7077, - "num_input_tokens_seen": 140352465, - "step": 6599 - }, - { - "epoch": 0.7936030782180004, - "grad_norm": 2.1427991752682125, - "learning_rate": 4.3031905968486535e-07, - "loss": 0.7455, - "num_input_tokens_seen": 140373015, - "step": 6600 - }, - { - "epoch": 0.7937233211086394, - "grad_norm": 2.2559952354583865, - "learning_rate": 4.298364516947168e-07, - "loss": 0.6849, - "num_input_tokens_seen": 140389965, - "step": 6601 - }, - { - "epoch": 0.7938435639992786, - "grad_norm": 2.2881709286565264, - "learning_rate": 4.293540818971295e-07, - "loss": 0.6617, - "num_input_tokens_seen": 140407490, - "step": 6602 - }, - { - "epoch": 0.7939638068899176, - "grad_norm": 2.216010863510391, - "learning_rate": 4.2887195036527976e-07, - "loss": 0.7699, - "num_input_tokens_seen": 140426015, - "step": 6603 - }, - { - "epoch": 0.7940840497805567, - "grad_norm": 2.924610516895714, - "learning_rate": 4.28390057172306e-07, - "loss": 0.7301, - "num_input_tokens_seen": 140442240, - "step": 6604 - }, - { - "epoch": 0.7942042926711959, - "grad_norm": 2.9420251858647166, - "learning_rate": 4.279084023913111e-07, - "loss": 0.7234, - "num_input_tokens_seen": 140459835, - "step": 6605 - }, - { - "epoch": 0.7943245355618349, - "grad_norm": 2.110265184849847, - "learning_rate": 4.2742698609536096e-07, - "loss": 0.687, - "num_input_tokens_seen": 140477865, - "step": 6606 - }, - { - "epoch": 0.794444778452474, - "grad_norm": 5.3948542484579685, - "learning_rate": 4.2694580835748706e-07, - "loss": 0.7781, - "num_input_tokens_seen": 140497445, - "step": 6607 - }, - { - "epoch": 0.7945650213431131, - "grad_norm": 2.0683105884559136, - "learning_rate": 4.264648692506836e-07, - "loss": 0.7324, - "num_input_tokens_seen": 140515955, - "step": 6608 - }, - { - "epoch": 0.7946852642337522, - "grad_norm": 1.9142453139761246, - "learning_rate": 4.2598416884790824e-07, - "loss": 0.715, - "num_input_tokens_seen": 140534725, - "step": 6609 - }, - { - "epoch": 0.7948055071243912, - "grad_norm": 2.7881176658288855, - "learning_rate": 4.255037072220824e-07, - "loss": 0.8062, - "num_input_tokens_seen": 140555815, - "step": 6610 - }, - { - "epoch": 0.7949257500150304, - "grad_norm": 2.101215362922217, - "learning_rate": 4.2502348444609293e-07, - "loss": 0.711, - "num_input_tokens_seen": 140575155, - "step": 6611 - }, - { - "epoch": 0.7950459929056695, - "grad_norm": 2.4649507122318344, - "learning_rate": 4.2454350059278844e-07, - "loss": 0.6813, - "num_input_tokens_seen": 140595935, - "step": 6612 - }, - { - "epoch": 0.7951662357963085, - "grad_norm": 1.8590295887028097, - "learning_rate": 4.240637557349824e-07, - "loss": 0.8384, - "num_input_tokens_seen": 140612870, - "step": 6613 - }, - { - "epoch": 0.7952864786869477, - "grad_norm": 2.150851352209119, - "learning_rate": 4.235842499454516e-07, - "loss": 0.6536, - "num_input_tokens_seen": 140632505, - "step": 6614 - }, - { - "epoch": 0.7954067215775867, - "grad_norm": 1.9800560810694914, - "learning_rate": 4.2310498329693687e-07, - "loss": 0.8183, - "num_input_tokens_seen": 140653125, - "step": 6615 - }, - { - "epoch": 0.7955269644682258, - "grad_norm": 1.775172580027822, - "learning_rate": 4.226259558621421e-07, - "loss": 0.8039, - "num_input_tokens_seen": 140673940, - "step": 6616 - }, - { - "epoch": 0.795647207358865, - "grad_norm": 2.1544108526428, - "learning_rate": 4.221471677137358e-07, - "loss": 0.7653, - "num_input_tokens_seen": 140694475, - "step": 6617 - }, - { - "epoch": 0.795767450249504, - "grad_norm": 1.6828833693622285, - "learning_rate": 4.216686189243492e-07, - "loss": 0.6981, - "num_input_tokens_seen": 140712985, - "step": 6618 - }, - { - "epoch": 0.7958876931401431, - "grad_norm": 1.7448310175183948, - "learning_rate": 4.211903095665785e-07, - "loss": 0.7218, - "num_input_tokens_seen": 140732090, - "step": 6619 - }, - { - "epoch": 0.7960079360307821, - "grad_norm": 2.202566601572069, - "learning_rate": 4.2071223971298277e-07, - "loss": 0.7494, - "num_input_tokens_seen": 140748995, - "step": 6620 - }, - { - "epoch": 0.7961281789214213, - "grad_norm": 2.629815763780873, - "learning_rate": 4.2023440943608433e-07, - "loss": 0.6088, - "num_input_tokens_seen": 140768680, - "step": 6621 - }, - { - "epoch": 0.7962484218120603, - "grad_norm": 2.204287002448897, - "learning_rate": 4.1975681880836954e-07, - "loss": 0.7839, - "num_input_tokens_seen": 140788405, - "step": 6622 - }, - { - "epoch": 0.7963686647026994, - "grad_norm": 1.7594970002206367, - "learning_rate": 4.192794679022895e-07, - "loss": 0.8225, - "num_input_tokens_seen": 140806450, - "step": 6623 - }, - { - "epoch": 0.7964889075933386, - "grad_norm": 2.0018840328485648, - "learning_rate": 4.1880235679025743e-07, - "loss": 0.7132, - "num_input_tokens_seen": 140826265, - "step": 6624 - }, - { - "epoch": 0.7966091504839776, - "grad_norm": 2.8301330020851494, - "learning_rate": 4.1832548554465054e-07, - "loss": 0.6271, - "num_input_tokens_seen": 140844280, - "step": 6625 - }, - { - "epoch": 0.7967293933746167, - "grad_norm": 0.7893052969496102, - "learning_rate": 4.1784885423780934e-07, - "loss": 0.6107, - "num_input_tokens_seen": 140901580, - "step": 6626 - }, - { - "epoch": 0.7968496362652558, - "grad_norm": 2.066843155078559, - "learning_rate": 4.173724629420394e-07, - "loss": 0.8829, - "num_input_tokens_seen": 140922660, - "step": 6627 - }, - { - "epoch": 0.7969698791558949, - "grad_norm": 4.259539999530233, - "learning_rate": 4.168963117296087e-07, - "loss": 0.6791, - "num_input_tokens_seen": 140939715, - "step": 6628 - }, - { - "epoch": 0.797090122046534, - "grad_norm": 2.479631822273694, - "learning_rate": 4.1642040067274876e-07, - "loss": 0.7512, - "num_input_tokens_seen": 140959105, - "step": 6629 - }, - { - "epoch": 0.7972103649371731, - "grad_norm": 1.8333417997372683, - "learning_rate": 4.1594472984365493e-07, - "loss": 0.7173, - "num_input_tokens_seen": 140977510, - "step": 6630 - }, - { - "epoch": 0.7973306078278122, - "grad_norm": 4.717198677937778, - "learning_rate": 4.154692993144862e-07, - "loss": 0.7657, - "num_input_tokens_seen": 140997000, - "step": 6631 - }, - { - "epoch": 0.7974508507184512, - "grad_norm": 2.2805903769213933, - "learning_rate": 4.1499410915736476e-07, - "loss": 0.7103, - "num_input_tokens_seen": 141015650, - "step": 6632 - }, - { - "epoch": 0.7975710936090904, - "grad_norm": 0.823809378840752, - "learning_rate": 4.145191594443769e-07, - "loss": 0.7079, - "num_input_tokens_seen": 141079725, - "step": 6633 - }, - { - "epoch": 0.7976913364997295, - "grad_norm": 1.9422318021280922, - "learning_rate": 4.140444502475713e-07, - "loss": 0.7054, - "num_input_tokens_seen": 141098995, - "step": 6634 - }, - { - "epoch": 0.7978115793903685, - "grad_norm": 2.4784895766777892, - "learning_rate": 4.1356998163896216e-07, - "loss": 0.6927, - "num_input_tokens_seen": 141115765, - "step": 6635 - }, - { - "epoch": 0.7979318222810077, - "grad_norm": 2.5803522743587157, - "learning_rate": 4.130957536905255e-07, - "loss": 0.739, - "num_input_tokens_seen": 141133500, - "step": 6636 - }, - { - "epoch": 0.7980520651716467, - "grad_norm": 3.7813008882868284, - "learning_rate": 4.1262176647420134e-07, - "loss": 0.7084, - "num_input_tokens_seen": 141151385, - "step": 6637 - }, - { - "epoch": 0.7981723080622858, - "grad_norm": 18.140299554166404, - "learning_rate": 4.121480200618923e-07, - "loss": 0.7953, - "num_input_tokens_seen": 141170760, - "step": 6638 - }, - { - "epoch": 0.798292550952925, - "grad_norm": 2.4006875597946125, - "learning_rate": 4.11674514525467e-07, - "loss": 0.793, - "num_input_tokens_seen": 141190015, - "step": 6639 - }, - { - "epoch": 0.798412793843564, - "grad_norm": 0.8185702746196547, - "learning_rate": 4.1120124993675476e-07, - "loss": 0.6126, - "num_input_tokens_seen": 141254165, - "step": 6640 - }, - { - "epoch": 0.7985330367342031, - "grad_norm": 2.166649431357531, - "learning_rate": 4.107282263675498e-07, - "loss": 0.6214, - "num_input_tokens_seen": 141271555, - "step": 6641 - }, - { - "epoch": 0.7986532796248422, - "grad_norm": 0.7493303631700791, - "learning_rate": 4.1025544388960907e-07, - "loss": 0.5432, - "num_input_tokens_seen": 141332315, - "step": 6642 - }, - { - "epoch": 0.7987735225154813, - "grad_norm": 2.154479093536911, - "learning_rate": 4.097829025746538e-07, - "loss": 0.7131, - "num_input_tokens_seen": 141353580, - "step": 6643 - }, - { - "epoch": 0.7988937654061203, - "grad_norm": 0.7051672616595179, - "learning_rate": 4.0931060249436757e-07, - "loss": 0.6227, - "num_input_tokens_seen": 141417140, - "step": 6644 - }, - { - "epoch": 0.7990140082967595, - "grad_norm": 3.846159148833426, - "learning_rate": 4.088385437203983e-07, - "loss": 0.6845, - "num_input_tokens_seen": 141433870, - "step": 6645 - }, - { - "epoch": 0.7991342511873986, - "grad_norm": 2.736133380293792, - "learning_rate": 4.083667263243564e-07, - "loss": 0.7689, - "num_input_tokens_seen": 141451935, - "step": 6646 - }, - { - "epoch": 0.7992544940780376, - "grad_norm": 2.0825976885317905, - "learning_rate": 4.0789515037781696e-07, - "loss": 0.7053, - "num_input_tokens_seen": 141472380, - "step": 6647 - }, - { - "epoch": 0.7993747369686768, - "grad_norm": 6.517627822534658, - "learning_rate": 4.0742381595231755e-07, - "loss": 0.8236, - "num_input_tokens_seen": 141488825, - "step": 6648 - }, - { - "epoch": 0.7994949798593158, - "grad_norm": 1.7369255979642453, - "learning_rate": 4.06952723119359e-07, - "loss": 0.7822, - "num_input_tokens_seen": 141508420, - "step": 6649 - }, - { - "epoch": 0.7996152227499549, - "grad_norm": 3.697304491371467, - "learning_rate": 4.0648187195040504e-07, - "loss": 0.6653, - "num_input_tokens_seen": 141530345, - "step": 6650 - }, - { - "epoch": 0.799735465640594, - "grad_norm": 1.042984619199664, - "learning_rate": 4.060112625168848e-07, - "loss": 0.7246, - "num_input_tokens_seen": 141595175, - "step": 6651 - }, - { - "epoch": 0.7998557085312331, - "grad_norm": 2.2622730295235596, - "learning_rate": 4.055408948901886e-07, - "loss": 0.7381, - "num_input_tokens_seen": 141616295, - "step": 6652 - }, - { - "epoch": 0.7999759514218722, - "grad_norm": 2.8461654667839715, - "learning_rate": 4.050707691416708e-07, - "loss": 0.7078, - "num_input_tokens_seen": 141637325, - "step": 6653 - }, - { - "epoch": 0.8000961943125112, - "grad_norm": 0.7349926385703843, - "learning_rate": 4.046008853426488e-07, - "loss": 0.6211, - "num_input_tokens_seen": 141700360, - "step": 6654 - }, - { - "epoch": 0.8002164372031504, - "grad_norm": 2.4138838760615267, - "learning_rate": 4.0413124356440464e-07, - "loss": 0.6276, - "num_input_tokens_seen": 141724125, - "step": 6655 - }, - { - "epoch": 0.8003366800937894, - "grad_norm": 2.0200897689092625, - "learning_rate": 4.036618438781818e-07, - "loss": 0.8227, - "num_input_tokens_seen": 141742305, - "step": 6656 - }, - { - "epoch": 0.8004569229844285, - "grad_norm": 2.061170798551594, - "learning_rate": 4.0319268635518797e-07, - "loss": 0.849, - "num_input_tokens_seen": 141762600, - "step": 6657 - }, - { - "epoch": 0.8005771658750677, - "grad_norm": 1.9053033050912034, - "learning_rate": 4.027237710665943e-07, - "loss": 0.7444, - "num_input_tokens_seen": 141780785, - "step": 6658 - }, - { - "epoch": 0.8006974087657067, - "grad_norm": 2.1208929488561132, - "learning_rate": 4.022550980835344e-07, - "loss": 0.6943, - "num_input_tokens_seen": 141802750, - "step": 6659 - }, - { - "epoch": 0.8008176516563458, - "grad_norm": 4.977877012078649, - "learning_rate": 4.017866674771058e-07, - "loss": 0.8022, - "num_input_tokens_seen": 141819955, - "step": 6660 - }, - { - "epoch": 0.8009378945469849, - "grad_norm": 2.014074460767353, - "learning_rate": 4.013184793183688e-07, - "loss": 0.7443, - "num_input_tokens_seen": 141841770, - "step": 6661 - }, - { - "epoch": 0.801058137437624, - "grad_norm": 2.1151315127676122, - "learning_rate": 4.008505336783472e-07, - "loss": 0.7188, - "num_input_tokens_seen": 141859215, - "step": 6662 - }, - { - "epoch": 0.801178380328263, - "grad_norm": 2.0670748493270903, - "learning_rate": 4.003828306280284e-07, - "loss": 0.817, - "num_input_tokens_seen": 141876610, - "step": 6663 - }, - { - "epoch": 0.8012986232189022, - "grad_norm": 1.845973965206068, - "learning_rate": 3.999153702383626e-07, - "loss": 0.7728, - "num_input_tokens_seen": 141894220, - "step": 6664 - }, - { - "epoch": 0.8014188661095413, - "grad_norm": 2.174803822126223, - "learning_rate": 3.9944815258026263e-07, - "loss": 0.7274, - "num_input_tokens_seen": 141915760, - "step": 6665 - }, - { - "epoch": 0.8015391090001803, - "grad_norm": 13.06832285281222, - "learning_rate": 3.9898117772460505e-07, - "loss": 0.8269, - "num_input_tokens_seen": 141935650, - "step": 6666 - }, - { - "epoch": 0.8016593518908195, - "grad_norm": 0.921291621580826, - "learning_rate": 3.985144457422305e-07, - "loss": 0.7001, - "num_input_tokens_seen": 141989655, - "step": 6667 - }, - { - "epoch": 0.8017795947814585, - "grad_norm": 4.0151380443713105, - "learning_rate": 3.9804795670394096e-07, - "loss": 0.754, - "num_input_tokens_seen": 142009500, - "step": 6668 - }, - { - "epoch": 0.8018998376720976, - "grad_norm": 2.5819981014056443, - "learning_rate": 3.975817106805026e-07, - "loss": 0.7068, - "num_input_tokens_seen": 142027920, - "step": 6669 - }, - { - "epoch": 0.8020200805627368, - "grad_norm": 2.212655538936499, - "learning_rate": 3.9711570774264433e-07, - "loss": 0.6394, - "num_input_tokens_seen": 142048315, - "step": 6670 - }, - { - "epoch": 0.8021403234533758, - "grad_norm": 2.371809147509461, - "learning_rate": 3.966499479610592e-07, - "loss": 0.6497, - "num_input_tokens_seen": 142066130, - "step": 6671 - }, - { - "epoch": 0.8022605663440149, - "grad_norm": 1.9087701317367574, - "learning_rate": 3.9618443140640225e-07, - "loss": 0.6502, - "num_input_tokens_seen": 142084760, - "step": 6672 - }, - { - "epoch": 0.802380809234654, - "grad_norm": 0.759228835808509, - "learning_rate": 3.957191581492918e-07, - "loss": 0.5507, - "num_input_tokens_seen": 142145240, - "step": 6673 - }, - { - "epoch": 0.8025010521252931, - "grad_norm": 5.620310477800154, - "learning_rate": 3.952541282603097e-07, - "loss": 0.6981, - "num_input_tokens_seen": 142160065, - "step": 6674 - }, - { - "epoch": 0.8026212950159322, - "grad_norm": 1.865219317954989, - "learning_rate": 3.9478934181000013e-07, - "loss": 0.8288, - "num_input_tokens_seen": 142179810, - "step": 6675 - }, - { - "epoch": 0.8027415379065713, - "grad_norm": 6.216344304846314, - "learning_rate": 3.943247988688714e-07, - "loss": 0.8395, - "num_input_tokens_seen": 142198225, - "step": 6676 - }, - { - "epoch": 0.8028617807972104, - "grad_norm": 2.3350731271176586, - "learning_rate": 3.9386049950739377e-07, - "loss": 0.7117, - "num_input_tokens_seen": 142216415, - "step": 6677 - }, - { - "epoch": 0.8029820236878494, - "grad_norm": 3.6487187548592233, - "learning_rate": 3.933964437960009e-07, - "loss": 0.6579, - "num_input_tokens_seen": 142235965, - "step": 6678 - }, - { - "epoch": 0.8031022665784886, - "grad_norm": 2.7355194831009553, - "learning_rate": 3.929326318050907e-07, - "loss": 0.7095, - "num_input_tokens_seen": 142253355, - "step": 6679 - }, - { - "epoch": 0.8032225094691277, - "grad_norm": 2.020066783805988, - "learning_rate": 3.924690636050225e-07, - "loss": 0.7792, - "num_input_tokens_seen": 142270485, - "step": 6680 - }, - { - "epoch": 0.8033427523597667, - "grad_norm": 3.2068520478362346, - "learning_rate": 3.9200573926611915e-07, - "loss": 0.7212, - "num_input_tokens_seen": 142291620, - "step": 6681 - }, - { - "epoch": 0.8034629952504058, - "grad_norm": 2.10061308169592, - "learning_rate": 3.9154265885866613e-07, - "loss": 0.7193, - "num_input_tokens_seen": 142310650, - "step": 6682 - }, - { - "epoch": 0.8035832381410449, - "grad_norm": 3.317079256855228, - "learning_rate": 3.910798224529135e-07, - "loss": 0.7452, - "num_input_tokens_seen": 142328495, - "step": 6683 - }, - { - "epoch": 0.803703481031684, - "grad_norm": 5.584756791538455, - "learning_rate": 3.9061723011907245e-07, - "loss": 0.7653, - "num_input_tokens_seen": 142347570, - "step": 6684 - }, - { - "epoch": 0.803823723922323, - "grad_norm": 2.4808570259105167, - "learning_rate": 3.901548819273179e-07, - "loss": 0.7808, - "num_input_tokens_seen": 142367305, - "step": 6685 - }, - { - "epoch": 0.8039439668129622, - "grad_norm": 2.038494087397355, - "learning_rate": 3.896927779477881e-07, - "loss": 0.6844, - "num_input_tokens_seen": 142386285, - "step": 6686 - }, - { - "epoch": 0.8040642097036013, - "grad_norm": 2.8535043409312704, - "learning_rate": 3.892309182505833e-07, - "loss": 0.6729, - "num_input_tokens_seen": 142403820, - "step": 6687 - }, - { - "epoch": 0.8041844525942403, - "grad_norm": 4.054940566723818, - "learning_rate": 3.887693029057675e-07, - "loss": 0.8568, - "num_input_tokens_seen": 142423050, - "step": 6688 - }, - { - "epoch": 0.8043046954848795, - "grad_norm": 2.1832544728071235, - "learning_rate": 3.8830793198336753e-07, - "loss": 0.8067, - "num_input_tokens_seen": 142442360, - "step": 6689 - }, - { - "epoch": 0.8044249383755185, - "grad_norm": 4.666573500188218, - "learning_rate": 3.878468055533721e-07, - "loss": 0.7005, - "num_input_tokens_seen": 142464620, - "step": 6690 - }, - { - "epoch": 0.8045451812661576, - "grad_norm": 3.0258409366219223, - "learning_rate": 3.8738592368573507e-07, - "loss": 0.8473, - "num_input_tokens_seen": 142481895, - "step": 6691 - }, - { - "epoch": 0.8046654241567968, - "grad_norm": 2.1204446880215864, - "learning_rate": 3.8692528645037137e-07, - "loss": 0.8707, - "num_input_tokens_seen": 142500795, - "step": 6692 - }, - { - "epoch": 0.8047856670474358, - "grad_norm": 2.5912381251580676, - "learning_rate": 3.8646489391715907e-07, - "loss": 0.7707, - "num_input_tokens_seen": 142514810, - "step": 6693 - }, - { - "epoch": 0.8049059099380749, - "grad_norm": 2.5851729898860936, - "learning_rate": 3.8600474615593903e-07, - "loss": 0.8777, - "num_input_tokens_seen": 142529145, - "step": 6694 - }, - { - "epoch": 0.805026152828714, - "grad_norm": 0.9188288201349036, - "learning_rate": 3.8554484323651605e-07, - "loss": 0.6557, - "num_input_tokens_seen": 142590735, - "step": 6695 - }, - { - "epoch": 0.8051463957193531, - "grad_norm": 1.9903972938415635, - "learning_rate": 3.85085185228657e-07, - "loss": 0.7848, - "num_input_tokens_seen": 142609425, - "step": 6696 - }, - { - "epoch": 0.8052666386099921, - "grad_norm": 3.2140251742275936, - "learning_rate": 3.8462577220209114e-07, - "loss": 0.7364, - "num_input_tokens_seen": 142629520, - "step": 6697 - }, - { - "epoch": 0.8053868815006313, - "grad_norm": 0.7113146974068315, - "learning_rate": 3.841666042265106e-07, - "loss": 0.6071, - "num_input_tokens_seen": 142698890, - "step": 6698 - }, - { - "epoch": 0.8055071243912704, - "grad_norm": 2.6706431701741256, - "learning_rate": 3.837076813715723e-07, - "loss": 0.6825, - "num_input_tokens_seen": 142718495, - "step": 6699 - }, - { - "epoch": 0.8056273672819094, - "grad_norm": 2.148499868023375, - "learning_rate": 3.832490037068934e-07, - "loss": 0.7483, - "num_input_tokens_seen": 142737005, - "step": 6700 - }, - { - "epoch": 0.8057476101725486, - "grad_norm": 2.1737305236238154, - "learning_rate": 3.827905713020554e-07, - "loss": 0.7522, - "num_input_tokens_seen": 142754370, - "step": 6701 - }, - { - "epoch": 0.8058678530631876, - "grad_norm": 2.365473207320476, - "learning_rate": 3.823323842266017e-07, - "loss": 0.6735, - "num_input_tokens_seen": 142773485, - "step": 6702 - }, - { - "epoch": 0.8059880959538267, - "grad_norm": 2.6862700972810596, - "learning_rate": 3.818744425500393e-07, - "loss": 0.733, - "num_input_tokens_seen": 142791220, - "step": 6703 - }, - { - "epoch": 0.8061083388444659, - "grad_norm": 2.241068931896184, - "learning_rate": 3.814167463418372e-07, - "loss": 0.8029, - "num_input_tokens_seen": 142809970, - "step": 6704 - }, - { - "epoch": 0.8062285817351049, - "grad_norm": 9.429142067582834, - "learning_rate": 3.809592956714278e-07, - "loss": 0.6463, - "num_input_tokens_seen": 142832925, - "step": 6705 - }, - { - "epoch": 0.806348824625744, - "grad_norm": 2.0593662389606386, - "learning_rate": 3.805020906082057e-07, - "loss": 0.736, - "num_input_tokens_seen": 142851220, - "step": 6706 - }, - { - "epoch": 0.8064690675163831, - "grad_norm": 2.638522533626239, - "learning_rate": 3.8004513122152917e-07, - "loss": 0.8029, - "num_input_tokens_seen": 142869250, - "step": 6707 - }, - { - "epoch": 0.8065893104070222, - "grad_norm": 1.902348412841278, - "learning_rate": 3.79588417580718e-07, - "loss": 0.6648, - "num_input_tokens_seen": 142887080, - "step": 6708 - }, - { - "epoch": 0.8067095532976613, - "grad_norm": 2.0590983231847724, - "learning_rate": 3.791319497550558e-07, - "loss": 0.7595, - "num_input_tokens_seen": 142904630, - "step": 6709 - }, - { - "epoch": 0.8068297961883004, - "grad_norm": 2.219228161743034, - "learning_rate": 3.7867572781378755e-07, - "loss": 0.7104, - "num_input_tokens_seen": 142921915, - "step": 6710 - }, - { - "epoch": 0.8069500390789395, - "grad_norm": 1.6897252461476986, - "learning_rate": 3.782197518261225e-07, - "loss": 0.7326, - "num_input_tokens_seen": 142941075, - "step": 6711 - }, - { - "epoch": 0.8070702819695785, - "grad_norm": 2.480708620891473, - "learning_rate": 3.777640218612319e-07, - "loss": 0.9579, - "num_input_tokens_seen": 142958780, - "step": 6712 - }, - { - "epoch": 0.8071905248602176, - "grad_norm": 2.7879108565817585, - "learning_rate": 3.7730853798824945e-07, - "loss": 0.7068, - "num_input_tokens_seen": 142977555, - "step": 6713 - }, - { - "epoch": 0.8073107677508568, - "grad_norm": 2.42795345404584, - "learning_rate": 3.768533002762708e-07, - "loss": 0.7598, - "num_input_tokens_seen": 143000810, - "step": 6714 - }, - { - "epoch": 0.8074310106414958, - "grad_norm": 2.0449347474493926, - "learning_rate": 3.763983087943567e-07, - "loss": 0.7563, - "num_input_tokens_seen": 143019920, - "step": 6715 - }, - { - "epoch": 0.8075512535321349, - "grad_norm": 1.7563104276740333, - "learning_rate": 3.759435636115282e-07, - "loss": 0.7932, - "num_input_tokens_seen": 143040425, - "step": 6716 - }, - { - "epoch": 0.807671496422774, - "grad_norm": 1.832554866791834, - "learning_rate": 3.7548906479676967e-07, - "loss": 0.738, - "num_input_tokens_seen": 143059740, - "step": 6717 - }, - { - "epoch": 0.8077917393134131, - "grad_norm": 2.50729886812423, - "learning_rate": 3.7503481241902855e-07, - "loss": 0.7072, - "num_input_tokens_seen": 143079435, - "step": 6718 - }, - { - "epoch": 0.8079119822040521, - "grad_norm": 4.582345259405457, - "learning_rate": 3.745808065472145e-07, - "loss": 0.7975, - "num_input_tokens_seen": 143096450, - "step": 6719 - }, - { - "epoch": 0.8080322250946913, - "grad_norm": 1.7226459971937955, - "learning_rate": 3.741270472501994e-07, - "loss": 0.7556, - "num_input_tokens_seen": 143116810, - "step": 6720 - }, - { - "epoch": 0.8081524679853304, - "grad_norm": 1.975239213208688, - "learning_rate": 3.736735345968187e-07, - "loss": 0.7246, - "num_input_tokens_seen": 143136140, - "step": 6721 - }, - { - "epoch": 0.8082727108759694, - "grad_norm": 8.697357206168736, - "learning_rate": 3.732202686558692e-07, - "loss": 0.7869, - "num_input_tokens_seen": 143154895, - "step": 6722 - }, - { - "epoch": 0.8083929537666086, - "grad_norm": 2.0376427754196493, - "learning_rate": 3.7276724949611206e-07, - "loss": 0.7223, - "num_input_tokens_seen": 143174725, - "step": 6723 - }, - { - "epoch": 0.8085131966572476, - "grad_norm": 2.352813015761238, - "learning_rate": 3.723144771862694e-07, - "loss": 0.7377, - "num_input_tokens_seen": 143195085, - "step": 6724 - }, - { - "epoch": 0.8086334395478867, - "grad_norm": 1.6784396475362502, - "learning_rate": 3.718619517950263e-07, - "loss": 0.7625, - "num_input_tokens_seen": 143215400, - "step": 6725 - }, - { - "epoch": 0.8087536824385259, - "grad_norm": 2.2099908650210236, - "learning_rate": 3.714096733910301e-07, - "loss": 0.765, - "num_input_tokens_seen": 143232645, - "step": 6726 - }, - { - "epoch": 0.8088739253291649, - "grad_norm": 3.883394270349313, - "learning_rate": 3.7095764204289216e-07, - "loss": 0.6982, - "num_input_tokens_seen": 143253165, - "step": 6727 - }, - { - "epoch": 0.808994168219804, - "grad_norm": 2.56278938015523, - "learning_rate": 3.7050585781918463e-07, - "loss": 0.7315, - "num_input_tokens_seen": 143273185, - "step": 6728 - }, - { - "epoch": 0.8091144111104431, - "grad_norm": 2.2712109289236997, - "learning_rate": 3.700543207884428e-07, - "loss": 0.6861, - "num_input_tokens_seen": 143289815, - "step": 6729 - }, - { - "epoch": 0.8092346540010822, - "grad_norm": 2.66601702341341, - "learning_rate": 3.6960303101916466e-07, - "loss": 0.7029, - "num_input_tokens_seen": 143309450, - "step": 6730 - }, - { - "epoch": 0.8093548968917212, - "grad_norm": 0.8214335467969754, - "learning_rate": 3.6915198857981047e-07, - "loss": 0.5876, - "num_input_tokens_seen": 143374370, - "step": 6731 - }, - { - "epoch": 0.8094751397823604, - "grad_norm": 1.9391259163086911, - "learning_rate": 3.687011935388027e-07, - "loss": 0.6796, - "num_input_tokens_seen": 143396985, - "step": 6732 - }, - { - "epoch": 0.8095953826729995, - "grad_norm": 2.4245265938286367, - "learning_rate": 3.6825064596452715e-07, - "loss": 0.7216, - "num_input_tokens_seen": 143417050, - "step": 6733 - }, - { - "epoch": 0.8097156255636385, - "grad_norm": 1.8143374295696109, - "learning_rate": 3.678003459253305e-07, - "loss": 0.7034, - "num_input_tokens_seen": 143437620, - "step": 6734 - }, - { - "epoch": 0.8098358684542777, - "grad_norm": 2.1423278422042458, - "learning_rate": 3.673502934895241e-07, - "loss": 0.7367, - "num_input_tokens_seen": 143456845, - "step": 6735 - }, - { - "epoch": 0.8099561113449167, - "grad_norm": 0.7168994638594164, - "learning_rate": 3.669004887253802e-07, - "loss": 0.5935, - "num_input_tokens_seen": 143522855, - "step": 6736 - }, - { - "epoch": 0.8100763542355558, - "grad_norm": 1.7156257068197367, - "learning_rate": 3.664509317011335e-07, - "loss": 0.7827, - "num_input_tokens_seen": 143542910, - "step": 6737 - }, - { - "epoch": 0.810196597126195, - "grad_norm": 1.895946684944535, - "learning_rate": 3.6600162248498134e-07, - "loss": 0.7267, - "num_input_tokens_seen": 143566260, - "step": 6738 - }, - { - "epoch": 0.810316840016834, - "grad_norm": 1.9241933921021666, - "learning_rate": 3.6555256114508426e-07, - "loss": 0.7593, - "num_input_tokens_seen": 143585775, - "step": 6739 - }, - { - "epoch": 0.8104370829074731, - "grad_norm": 1.9645312809474171, - "learning_rate": 3.651037477495642e-07, - "loss": 0.7224, - "num_input_tokens_seen": 143606945, - "step": 6740 - }, - { - "epoch": 0.8105573257981122, - "grad_norm": 2.7667999849248037, - "learning_rate": 3.6465518236650584e-07, - "loss": 0.6747, - "num_input_tokens_seen": 143626810, - "step": 6741 - }, - { - "epoch": 0.8106775686887513, - "grad_norm": 1.7810624871962215, - "learning_rate": 3.642068650639554e-07, - "loss": 0.7737, - "num_input_tokens_seen": 143646275, - "step": 6742 - }, - { - "epoch": 0.8107978115793903, - "grad_norm": 2.61235974471993, - "learning_rate": 3.6375879590992334e-07, - "loss": 0.6424, - "num_input_tokens_seen": 143666340, - "step": 6743 - }, - { - "epoch": 0.8109180544700295, - "grad_norm": 2.2109984483867255, - "learning_rate": 3.6331097497238104e-07, - "loss": 0.7997, - "num_input_tokens_seen": 143685505, - "step": 6744 - }, - { - "epoch": 0.8110382973606686, - "grad_norm": 3.9619369668625457, - "learning_rate": 3.628634023192627e-07, - "loss": 0.7908, - "num_input_tokens_seen": 143705470, - "step": 6745 - }, - { - "epoch": 0.8111585402513076, - "grad_norm": 2.6808505791682546, - "learning_rate": 3.624160780184644e-07, - "loss": 0.7533, - "num_input_tokens_seen": 143722405, - "step": 6746 - }, - { - "epoch": 0.8112787831419467, - "grad_norm": 2.3236594670981168, - "learning_rate": 3.6196900213784496e-07, - "loss": 0.7431, - "num_input_tokens_seen": 143741440, - "step": 6747 - }, - { - "epoch": 0.8113990260325858, - "grad_norm": 2.187873408270472, - "learning_rate": 3.6152217474522575e-07, - "loss": 0.858, - "num_input_tokens_seen": 143757975, - "step": 6748 - }, - { - "epoch": 0.8115192689232249, - "grad_norm": 1.7824908365274366, - "learning_rate": 3.6107559590838975e-07, - "loss": 0.7219, - "num_input_tokens_seen": 143776680, - "step": 6749 - }, - { - "epoch": 0.811639511813864, - "grad_norm": 4.215220747446123, - "learning_rate": 3.606292656950822e-07, - "loss": 0.6579, - "num_input_tokens_seen": 143794810, - "step": 6750 - }, - { - "epoch": 0.8117597547045031, - "grad_norm": 2.2951263267094584, - "learning_rate": 3.601831841730121e-07, - "loss": 0.8562, - "num_input_tokens_seen": 143812450, - "step": 6751 - }, - { - "epoch": 0.8118799975951422, - "grad_norm": 2.0775748802028513, - "learning_rate": 3.5973735140984916e-07, - "loss": 0.7325, - "num_input_tokens_seen": 143832340, - "step": 6752 - }, - { - "epoch": 0.8120002404857812, - "grad_norm": 2.4919784637661975, - "learning_rate": 3.5929176747322607e-07, - "loss": 0.7853, - "num_input_tokens_seen": 143851165, - "step": 6753 - }, - { - "epoch": 0.8121204833764204, - "grad_norm": 0.839488909640401, - "learning_rate": 3.588464324307365e-07, - "loss": 0.5734, - "num_input_tokens_seen": 143914510, - "step": 6754 - }, - { - "epoch": 0.8122407262670595, - "grad_norm": 2.225475316344406, - "learning_rate": 3.584013463499391e-07, - "loss": 0.7515, - "num_input_tokens_seen": 143932850, - "step": 6755 - }, - { - "epoch": 0.8123609691576985, - "grad_norm": 0.7295797115039838, - "learning_rate": 3.579565092983521e-07, - "loss": 0.6505, - "num_input_tokens_seen": 143993690, - "step": 6756 - }, - { - "epoch": 0.8124812120483377, - "grad_norm": 3.08029310634769, - "learning_rate": 3.57511921343457e-07, - "loss": 0.8313, - "num_input_tokens_seen": 144011925, - "step": 6757 - }, - { - "epoch": 0.8126014549389767, - "grad_norm": 2.0872777291460576, - "learning_rate": 3.5706758255269696e-07, - "loss": 0.8095, - "num_input_tokens_seen": 144030100, - "step": 6758 - }, - { - "epoch": 0.8127216978296158, - "grad_norm": 1.8481565537196298, - "learning_rate": 3.5662349299347906e-07, - "loss": 0.6939, - "num_input_tokens_seen": 144049020, - "step": 6759 - }, - { - "epoch": 0.812841940720255, - "grad_norm": 1.5939232972372117, - "learning_rate": 3.561796527331706e-07, - "loss": 0.716, - "num_input_tokens_seen": 144070415, - "step": 6760 - }, - { - "epoch": 0.812962183610894, - "grad_norm": 1.8820097197013812, - "learning_rate": 3.557360618391023e-07, - "loss": 0.7662, - "num_input_tokens_seen": 144090140, - "step": 6761 - }, - { - "epoch": 0.8130824265015331, - "grad_norm": 2.142511248968733, - "learning_rate": 3.5529272037856493e-07, - "loss": 0.7764, - "num_input_tokens_seen": 144108075, - "step": 6762 - }, - { - "epoch": 0.8132026693921722, - "grad_norm": 0.8245767328882193, - "learning_rate": 3.548496284188149e-07, - "loss": 0.5792, - "num_input_tokens_seen": 144168000, - "step": 6763 - }, - { - "epoch": 0.8133229122828113, - "grad_norm": 1.9470619800707958, - "learning_rate": 3.544067860270681e-07, - "loss": 0.7835, - "num_input_tokens_seen": 144185295, - "step": 6764 - }, - { - "epoch": 0.8134431551734503, - "grad_norm": 2.7052414425551303, - "learning_rate": 3.539641932705035e-07, - "loss": 0.7065, - "num_input_tokens_seen": 144203495, - "step": 6765 - }, - { - "epoch": 0.8135633980640895, - "grad_norm": 2.740916746181709, - "learning_rate": 3.535218502162614e-07, - "loss": 0.7533, - "num_input_tokens_seen": 144222785, - "step": 6766 - }, - { - "epoch": 0.8136836409547286, - "grad_norm": 2.006321378693079, - "learning_rate": 3.530797569314461e-07, - "loss": 0.7662, - "num_input_tokens_seen": 144241530, - "step": 6767 - }, - { - "epoch": 0.8138038838453676, - "grad_norm": 3.5750600330424738, - "learning_rate": 3.5263791348312235e-07, - "loss": 0.7726, - "num_input_tokens_seen": 144260445, - "step": 6768 - }, - { - "epoch": 0.8139241267360068, - "grad_norm": 2.3435695748073626, - "learning_rate": 3.521963199383171e-07, - "loss": 0.7013, - "num_input_tokens_seen": 144283120, - "step": 6769 - }, - { - "epoch": 0.8140443696266458, - "grad_norm": 3.762055943219793, - "learning_rate": 3.517549763640197e-07, - "loss": 0.7682, - "num_input_tokens_seen": 144300480, - "step": 6770 - }, - { - "epoch": 0.8141646125172849, - "grad_norm": 2.553451781079344, - "learning_rate": 3.5131388282718224e-07, - "loss": 0.7102, - "num_input_tokens_seen": 144320070, - "step": 6771 - }, - { - "epoch": 0.8142848554079241, - "grad_norm": 3.4638544394923065, - "learning_rate": 3.508730393947179e-07, - "loss": 0.6972, - "num_input_tokens_seen": 144343045, - "step": 6772 - }, - { - "epoch": 0.8144050982985631, - "grad_norm": 1.8236744885837812, - "learning_rate": 3.504324461335024e-07, - "loss": 0.7118, - "num_input_tokens_seen": 144362875, - "step": 6773 - }, - { - "epoch": 0.8145253411892022, - "grad_norm": 3.0073805622307894, - "learning_rate": 3.499921031103732e-07, - "loss": 0.8685, - "num_input_tokens_seen": 144383365, - "step": 6774 - }, - { - "epoch": 0.8146455840798413, - "grad_norm": 1.956143532793424, - "learning_rate": 3.4955201039212987e-07, - "loss": 0.7733, - "num_input_tokens_seen": 144404005, - "step": 6775 - }, - { - "epoch": 0.8147658269704804, - "grad_norm": 2.6089722193477565, - "learning_rate": 3.4911216804553465e-07, - "loss": 0.641, - "num_input_tokens_seen": 144422625, - "step": 6776 - }, - { - "epoch": 0.8148860698611194, - "grad_norm": 2.291035423071957, - "learning_rate": 3.486725761373106e-07, - "loss": 0.707, - "num_input_tokens_seen": 144441540, - "step": 6777 - }, - { - "epoch": 0.8150063127517585, - "grad_norm": 1.6912839146547407, - "learning_rate": 3.4823323473414343e-07, - "loss": 0.8455, - "num_input_tokens_seen": 144460780, - "step": 6778 - }, - { - "epoch": 0.8151265556423977, - "grad_norm": 2.4085970245324115, - "learning_rate": 3.477941439026819e-07, - "loss": 0.7547, - "num_input_tokens_seen": 144478720, - "step": 6779 - }, - { - "epoch": 0.8152467985330367, - "grad_norm": 2.2644124108576675, - "learning_rate": 3.473553037095349e-07, - "loss": 0.7233, - "num_input_tokens_seen": 144497465, - "step": 6780 - }, - { - "epoch": 0.8153670414236758, - "grad_norm": 2.0315189680606407, - "learning_rate": 3.469167142212743e-07, - "loss": 0.8208, - "num_input_tokens_seen": 144519030, - "step": 6781 - }, - { - "epoch": 0.8154872843143149, - "grad_norm": 4.655670528311071, - "learning_rate": 3.4647837550443337e-07, - "loss": 0.6272, - "num_input_tokens_seen": 144537315, - "step": 6782 - }, - { - "epoch": 0.815607527204954, - "grad_norm": 1.896099603273935, - "learning_rate": 3.460402876255086e-07, - "loss": 0.7372, - "num_input_tokens_seen": 144554425, - "step": 6783 - }, - { - "epoch": 0.815727770095593, - "grad_norm": 4.07434327339687, - "learning_rate": 3.456024506509574e-07, - "loss": 0.7128, - "num_input_tokens_seen": 144575065, - "step": 6784 - }, - { - "epoch": 0.8158480129862322, - "grad_norm": 2.1230070914579504, - "learning_rate": 3.4516486464719873e-07, - "loss": 0.737, - "num_input_tokens_seen": 144594175, - "step": 6785 - }, - { - "epoch": 0.8159682558768713, - "grad_norm": 1.7757839812508975, - "learning_rate": 3.4472752968061403e-07, - "loss": 0.6219, - "num_input_tokens_seen": 144618325, - "step": 6786 - }, - { - "epoch": 0.8160884987675103, - "grad_norm": 2.407705812064104, - "learning_rate": 3.442904458175475e-07, - "loss": 0.7313, - "num_input_tokens_seen": 144635365, - "step": 6787 - }, - { - "epoch": 0.8162087416581495, - "grad_norm": 1.5783639154366238, - "learning_rate": 3.438536131243037e-07, - "loss": 0.7516, - "num_input_tokens_seen": 144656245, - "step": 6788 - }, - { - "epoch": 0.8163289845487885, - "grad_norm": 2.245599028765818, - "learning_rate": 3.434170316671503e-07, - "loss": 0.6054, - "num_input_tokens_seen": 144680995, - "step": 6789 - }, - { - "epoch": 0.8164492274394276, - "grad_norm": 2.385280338446686, - "learning_rate": 3.4298070151231583e-07, - "loss": 0.8965, - "num_input_tokens_seen": 144696115, - "step": 6790 - }, - { - "epoch": 0.8165694703300668, - "grad_norm": 2.8780795058389823, - "learning_rate": 3.425446227259916e-07, - "loss": 0.594, - "num_input_tokens_seen": 144716800, - "step": 6791 - }, - { - "epoch": 0.8166897132207058, - "grad_norm": 2.4027732823822645, - "learning_rate": 3.4210879537433023e-07, - "loss": 0.8221, - "num_input_tokens_seen": 144736285, - "step": 6792 - }, - { - "epoch": 0.8168099561113449, - "grad_norm": 2.516120194959324, - "learning_rate": 3.416732195234464e-07, - "loss": 0.797, - "num_input_tokens_seen": 144756060, - "step": 6793 - }, - { - "epoch": 0.816930199001984, - "grad_norm": 1.5330840700177697, - "learning_rate": 3.4123789523941613e-07, - "loss": 0.7862, - "num_input_tokens_seen": 144775605, - "step": 6794 - }, - { - "epoch": 0.8170504418926231, - "grad_norm": 1.9323610795302726, - "learning_rate": 3.4080282258827884e-07, - "loss": 0.6327, - "num_input_tokens_seen": 144793700, - "step": 6795 - }, - { - "epoch": 0.8171706847832622, - "grad_norm": 3.2559633552140026, - "learning_rate": 3.403680016360342e-07, - "loss": 0.7135, - "num_input_tokens_seen": 144812025, - "step": 6796 - }, - { - "epoch": 0.8172909276739013, - "grad_norm": 3.4548089502689283, - "learning_rate": 3.3993343244864403e-07, - "loss": 0.6709, - "num_input_tokens_seen": 144831335, - "step": 6797 - }, - { - "epoch": 0.8174111705645404, - "grad_norm": 1.817604040064371, - "learning_rate": 3.3949911509203167e-07, - "loss": 0.718, - "num_input_tokens_seen": 144854175, - "step": 6798 - }, - { - "epoch": 0.8175314134551794, - "grad_norm": 2.3568813145078793, - "learning_rate": 3.3906504963208396e-07, - "loss": 0.7387, - "num_input_tokens_seen": 144870590, - "step": 6799 - }, - { - "epoch": 0.8176516563458186, - "grad_norm": 2.953986018636422, - "learning_rate": 3.3863123613464774e-07, - "loss": 0.651, - "num_input_tokens_seen": 144889210, - "step": 6800 - }, - { - "epoch": 0.8177718992364577, - "grad_norm": 2.5861307961050017, - "learning_rate": 3.381976746655317e-07, - "loss": 0.7336, - "num_input_tokens_seen": 144908685, - "step": 6801 - }, - { - "epoch": 0.8178921421270967, - "grad_norm": 2.160117599437323, - "learning_rate": 3.3776436529050687e-07, - "loss": 0.6738, - "num_input_tokens_seen": 144927955, - "step": 6802 - }, - { - "epoch": 0.8180123850177359, - "grad_norm": 2.8496104303781524, - "learning_rate": 3.3733130807530684e-07, - "loss": 0.7155, - "num_input_tokens_seen": 144951735, - "step": 6803 - }, - { - "epoch": 0.8181326279083749, - "grad_norm": 2.573090701006446, - "learning_rate": 3.3689850308562574e-07, - "loss": 0.7705, - "num_input_tokens_seen": 144971900, - "step": 6804 - }, - { - "epoch": 0.818252870799014, - "grad_norm": 1.9294438025692988, - "learning_rate": 3.364659503871188e-07, - "loss": 0.7709, - "num_input_tokens_seen": 144989555, - "step": 6805 - }, - { - "epoch": 0.8183731136896532, - "grad_norm": 2.045228053642145, - "learning_rate": 3.3603365004540417e-07, - "loss": 0.8286, - "num_input_tokens_seen": 145007570, - "step": 6806 - }, - { - "epoch": 0.8184933565802922, - "grad_norm": 2.4923176712314157, - "learning_rate": 3.356016021260624e-07, - "loss": 0.7604, - "num_input_tokens_seen": 145027620, - "step": 6807 - }, - { - "epoch": 0.8186135994709313, - "grad_norm": 3.066955069842093, - "learning_rate": 3.35169806694634e-07, - "loss": 0.641, - "num_input_tokens_seen": 145045590, - "step": 6808 - }, - { - "epoch": 0.8187338423615703, - "grad_norm": 0.7377240889855464, - "learning_rate": 3.347382638166223e-07, - "loss": 0.6213, - "num_input_tokens_seen": 145116450, - "step": 6809 - }, - { - "epoch": 0.8188540852522095, - "grad_norm": 2.1274000878465413, - "learning_rate": 3.343069735574917e-07, - "loss": 0.81, - "num_input_tokens_seen": 145133860, - "step": 6810 - }, - { - "epoch": 0.8189743281428485, - "grad_norm": 3.7813880538337137, - "learning_rate": 3.3387593598266907e-07, - "loss": 0.7389, - "num_input_tokens_seen": 145150190, - "step": 6811 - }, - { - "epoch": 0.8190945710334876, - "grad_norm": 1.992086818031112, - "learning_rate": 3.3344515115754225e-07, - "loss": 0.7772, - "num_input_tokens_seen": 145168890, - "step": 6812 - }, - { - "epoch": 0.8192148139241268, - "grad_norm": 2.9252191007406734, - "learning_rate": 3.33014619147461e-07, - "loss": 0.7869, - "num_input_tokens_seen": 145186635, - "step": 6813 - }, - { - "epoch": 0.8193350568147658, - "grad_norm": 2.4391270049403926, - "learning_rate": 3.325843400177362e-07, - "loss": 0.712, - "num_input_tokens_seen": 145207695, - "step": 6814 - }, - { - "epoch": 0.8194552997054049, - "grad_norm": 2.1512663405099866, - "learning_rate": 3.3215431383364156e-07, - "loss": 0.7268, - "num_input_tokens_seen": 145227570, - "step": 6815 - }, - { - "epoch": 0.819575542596044, - "grad_norm": 2.362265162117775, - "learning_rate": 3.3172454066041164e-07, - "loss": 0.6034, - "num_input_tokens_seen": 145246795, - "step": 6816 - }, - { - "epoch": 0.8196957854866831, - "grad_norm": 2.530884814567089, - "learning_rate": 3.3129502056324234e-07, - "loss": 0.7545, - "num_input_tokens_seen": 145267880, - "step": 6817 - }, - { - "epoch": 0.8198160283773221, - "grad_norm": 0.8186426484697743, - "learning_rate": 3.3086575360729165e-07, - "loss": 0.6282, - "num_input_tokens_seen": 145325135, - "step": 6818 - }, - { - "epoch": 0.8199362712679613, - "grad_norm": 1.9541116357457324, - "learning_rate": 3.3043673985767906e-07, - "loss": 0.7085, - "num_input_tokens_seen": 145343920, - "step": 6819 - }, - { - "epoch": 0.8200565141586004, - "grad_norm": 1.995718698350831, - "learning_rate": 3.3000797937948564e-07, - "loss": 0.7695, - "num_input_tokens_seen": 145361935, - "step": 6820 - }, - { - "epoch": 0.8201767570492394, - "grad_norm": 0.9576204151446579, - "learning_rate": 3.2957947223775384e-07, - "loss": 0.6772, - "num_input_tokens_seen": 145425260, - "step": 6821 - }, - { - "epoch": 0.8202969999398786, - "grad_norm": 1.9824797352389605, - "learning_rate": 3.291512184974876e-07, - "loss": 0.7895, - "num_input_tokens_seen": 145445370, - "step": 6822 - }, - { - "epoch": 0.8204172428305176, - "grad_norm": 2.0286719449556863, - "learning_rate": 3.2872321822365346e-07, - "loss": 0.6679, - "num_input_tokens_seen": 145465305, - "step": 6823 - }, - { - "epoch": 0.8205374857211567, - "grad_norm": 2.0360447715407815, - "learning_rate": 3.282954714811783e-07, - "loss": 0.7324, - "num_input_tokens_seen": 145483930, - "step": 6824 - }, - { - "epoch": 0.8206577286117959, - "grad_norm": 2.4382427972761773, - "learning_rate": 3.2786797833495093e-07, - "loss": 0.702, - "num_input_tokens_seen": 145499005, - "step": 6825 - }, - { - "epoch": 0.8207779715024349, - "grad_norm": 2.2596869139181464, - "learning_rate": 3.274407388498213e-07, - "loss": 0.7199, - "num_input_tokens_seen": 145516855, - "step": 6826 - }, - { - "epoch": 0.820898214393074, - "grad_norm": 3.26530753405961, - "learning_rate": 3.270137530906021e-07, - "loss": 0.7368, - "num_input_tokens_seen": 145535810, - "step": 6827 - }, - { - "epoch": 0.8210184572837131, - "grad_norm": 2.1316511086561487, - "learning_rate": 3.265870211220665e-07, - "loss": 0.8287, - "num_input_tokens_seen": 145553365, - "step": 6828 - }, - { - "epoch": 0.8211387001743522, - "grad_norm": 3.775283647238993, - "learning_rate": 3.2616054300894934e-07, - "loss": 0.8091, - "num_input_tokens_seen": 145572535, - "step": 6829 - }, - { - "epoch": 0.8212589430649913, - "grad_norm": 2.1094774104931644, - "learning_rate": 3.257343188159465e-07, - "loss": 0.841, - "num_input_tokens_seen": 145591800, - "step": 6830 - }, - { - "epoch": 0.8213791859556304, - "grad_norm": 2.5451732459221934, - "learning_rate": 3.2530834860771663e-07, - "loss": 0.6527, - "num_input_tokens_seen": 145610900, - "step": 6831 - }, - { - "epoch": 0.8214994288462695, - "grad_norm": 3.1006880998731217, - "learning_rate": 3.248826324488789e-07, - "loss": 0.7307, - "num_input_tokens_seen": 145627915, - "step": 6832 - }, - { - "epoch": 0.8216196717369085, - "grad_norm": 2.857064873872957, - "learning_rate": 3.244571704040138e-07, - "loss": 0.8705, - "num_input_tokens_seen": 145647795, - "step": 6833 - }, - { - "epoch": 0.8217399146275477, - "grad_norm": 2.197935822779527, - "learning_rate": 3.2403196253766374e-07, - "loss": 0.7306, - "num_input_tokens_seen": 145666595, - "step": 6834 - }, - { - "epoch": 0.8218601575181868, - "grad_norm": 2.4457567958888453, - "learning_rate": 3.2360700891433254e-07, - "loss": 0.7832, - "num_input_tokens_seen": 145685340, - "step": 6835 - }, - { - "epoch": 0.8219804004088258, - "grad_norm": 0.8258212032887101, - "learning_rate": 3.2318230959848513e-07, - "loss": 0.5951, - "num_input_tokens_seen": 145739700, - "step": 6836 - }, - { - "epoch": 0.822100643299465, - "grad_norm": 2.9037616186578186, - "learning_rate": 3.2275786465454814e-07, - "loss": 0.7471, - "num_input_tokens_seen": 145756070, - "step": 6837 - }, - { - "epoch": 0.822220886190104, - "grad_norm": 2.4724078103708926, - "learning_rate": 3.2233367414690917e-07, - "loss": 0.7573, - "num_input_tokens_seen": 145777980, - "step": 6838 - }, - { - "epoch": 0.8223411290807431, - "grad_norm": 2.2402347064016834, - "learning_rate": 3.219097381399183e-07, - "loss": 0.8355, - "num_input_tokens_seen": 145794875, - "step": 6839 - }, - { - "epoch": 0.8224613719713821, - "grad_norm": 1.8410913141495147, - "learning_rate": 3.2148605669788584e-07, - "loss": 0.7968, - "num_input_tokens_seen": 145814485, - "step": 6840 - }, - { - "epoch": 0.8225816148620213, - "grad_norm": 20.45698117660499, - "learning_rate": 3.2106262988508405e-07, - "loss": 0.7653, - "num_input_tokens_seen": 145832255, - "step": 6841 - }, - { - "epoch": 0.8227018577526604, - "grad_norm": 3.438138768387695, - "learning_rate": 3.206394577657461e-07, - "loss": 0.7331, - "num_input_tokens_seen": 145849755, - "step": 6842 - }, - { - "epoch": 0.8228221006432994, - "grad_norm": 3.1820139605565383, - "learning_rate": 3.202165404040675e-07, - "loss": 0.712, - "num_input_tokens_seen": 145867395, - "step": 6843 - }, - { - "epoch": 0.8229423435339386, - "grad_norm": 3.006719561106939, - "learning_rate": 3.1979387786420396e-07, - "loss": 0.7427, - "num_input_tokens_seen": 145887355, - "step": 6844 - }, - { - "epoch": 0.8230625864245776, - "grad_norm": 2.439902438076832, - "learning_rate": 3.1937147021027346e-07, - "loss": 0.818, - "num_input_tokens_seen": 145905530, - "step": 6845 - }, - { - "epoch": 0.8231828293152167, - "grad_norm": 9.404427719808812, - "learning_rate": 3.189493175063542e-07, - "loss": 0.7611, - "num_input_tokens_seen": 145922485, - "step": 6846 - }, - { - "epoch": 0.8233030722058559, - "grad_norm": 2.2164797040193887, - "learning_rate": 3.1852741981648776e-07, - "loss": 0.6694, - "num_input_tokens_seen": 145940855, - "step": 6847 - }, - { - "epoch": 0.8234233150964949, - "grad_norm": 2.614384598470918, - "learning_rate": 3.1810577720467446e-07, - "loss": 0.6945, - "num_input_tokens_seen": 145962305, - "step": 6848 - }, - { - "epoch": 0.823543557987134, - "grad_norm": 1.6186250296599518, - "learning_rate": 3.176843897348773e-07, - "loss": 0.564, - "num_input_tokens_seen": 145985220, - "step": 6849 - }, - { - "epoch": 0.8236638008777731, - "grad_norm": 6.218079243556821, - "learning_rate": 3.1726325747102034e-07, - "loss": 0.7526, - "num_input_tokens_seen": 146003315, - "step": 6850 - }, - { - "epoch": 0.8237840437684122, - "grad_norm": 1.555671543908552, - "learning_rate": 3.1684238047698974e-07, - "loss": 0.6364, - "num_input_tokens_seen": 146031305, - "step": 6851 - }, - { - "epoch": 0.8239042866590512, - "grad_norm": 2.7926815256262767, - "learning_rate": 3.1642175881663155e-07, - "loss": 0.5268, - "num_input_tokens_seen": 146050755, - "step": 6852 - }, - { - "epoch": 0.8240245295496904, - "grad_norm": 2.084756321585468, - "learning_rate": 3.1600139255375413e-07, - "loss": 0.8388, - "num_input_tokens_seen": 146071310, - "step": 6853 - }, - { - "epoch": 0.8241447724403295, - "grad_norm": 7.215655114401023, - "learning_rate": 3.1558128175212615e-07, - "loss": 0.7508, - "num_input_tokens_seen": 146091405, - "step": 6854 - }, - { - "epoch": 0.8242650153309685, - "grad_norm": 2.3119603509379276, - "learning_rate": 3.151614264754787e-07, - "loss": 0.7701, - "num_input_tokens_seen": 146109070, - "step": 6855 - }, - { - "epoch": 0.8243852582216077, - "grad_norm": 2.440012365661594, - "learning_rate": 3.147418267875035e-07, - "loss": 0.7875, - "num_input_tokens_seen": 146126920, - "step": 6856 - }, - { - "epoch": 0.8245055011122467, - "grad_norm": 3.1584048500821402, - "learning_rate": 3.1432248275185315e-07, - "loss": 0.6533, - "num_input_tokens_seen": 146147150, - "step": 6857 - }, - { - "epoch": 0.8246257440028858, - "grad_norm": 2.470926391087847, - "learning_rate": 3.139033944321412e-07, - "loss": 0.7737, - "num_input_tokens_seen": 146164230, - "step": 6858 - }, - { - "epoch": 0.824745986893525, - "grad_norm": 2.1079992570074575, - "learning_rate": 3.134845618919444e-07, - "loss": 0.784, - "num_input_tokens_seen": 146184410, - "step": 6859 - }, - { - "epoch": 0.824866229784164, - "grad_norm": 1.7752250146199624, - "learning_rate": 3.1306598519479876e-07, - "loss": 0.8232, - "num_input_tokens_seen": 146203950, - "step": 6860 - }, - { - "epoch": 0.8249864726748031, - "grad_norm": 1.7049616475349059, - "learning_rate": 3.1264766440420177e-07, - "loss": 0.7793, - "num_input_tokens_seen": 146226140, - "step": 6861 - }, - { - "epoch": 0.8251067155654422, - "grad_norm": 2.177157480033531, - "learning_rate": 3.122295995836124e-07, - "loss": 0.672, - "num_input_tokens_seen": 146245730, - "step": 6862 - }, - { - "epoch": 0.8252269584560813, - "grad_norm": 2.7597382259785492, - "learning_rate": 3.118117907964508e-07, - "loss": 0.7781, - "num_input_tokens_seen": 146267395, - "step": 6863 - }, - { - "epoch": 0.8253472013467203, - "grad_norm": 2.3260151631472286, - "learning_rate": 3.1139423810609856e-07, - "loss": 0.801, - "num_input_tokens_seen": 146283810, - "step": 6864 - }, - { - "epoch": 0.8254674442373595, - "grad_norm": 3.1430570449193564, - "learning_rate": 3.109769415758976e-07, - "loss": 0.753, - "num_input_tokens_seen": 146303415, - "step": 6865 - }, - { - "epoch": 0.8255876871279986, - "grad_norm": 5.230790794251164, - "learning_rate": 3.105599012691511e-07, - "loss": 0.7574, - "num_input_tokens_seen": 146321565, - "step": 6866 - }, - { - "epoch": 0.8257079300186376, - "grad_norm": 1.6259356643546288, - "learning_rate": 3.101431172491249e-07, - "loss": 0.8168, - "num_input_tokens_seen": 146342830, - "step": 6867 - }, - { - "epoch": 0.8258281729092768, - "grad_norm": 2.649303264881949, - "learning_rate": 3.097265895790444e-07, - "loss": 0.7146, - "num_input_tokens_seen": 146360760, - "step": 6868 - }, - { - "epoch": 0.8259484157999158, - "grad_norm": 2.2360640517528574, - "learning_rate": 3.093103183220962e-07, - "loss": 0.8294, - "num_input_tokens_seen": 146380525, - "step": 6869 - }, - { - "epoch": 0.8260686586905549, - "grad_norm": 0.9414163219170635, - "learning_rate": 3.0889430354142796e-07, - "loss": 0.6524, - "num_input_tokens_seen": 146441755, - "step": 6870 - }, - { - "epoch": 0.826188901581194, - "grad_norm": 4.690599122356733, - "learning_rate": 3.084785453001497e-07, - "loss": 0.6928, - "num_input_tokens_seen": 146462390, - "step": 6871 - }, - { - "epoch": 0.8263091444718331, - "grad_norm": 2.4681720492792527, - "learning_rate": 3.080630436613314e-07, - "loss": 0.809, - "num_input_tokens_seen": 146479880, - "step": 6872 - }, - { - "epoch": 0.8264293873624722, - "grad_norm": 2.6533995089520195, - "learning_rate": 3.076477986880039e-07, - "loss": 0.8509, - "num_input_tokens_seen": 146497395, - "step": 6873 - }, - { - "epoch": 0.8265496302531112, - "grad_norm": 2.0374102606125555, - "learning_rate": 3.072328104431594e-07, - "loss": 0.6846, - "num_input_tokens_seen": 146519070, - "step": 6874 - }, - { - "epoch": 0.8266698731437504, - "grad_norm": 2.310733761696155, - "learning_rate": 3.068180789897521e-07, - "loss": 0.7606, - "num_input_tokens_seen": 146537200, - "step": 6875 - }, - { - "epoch": 0.8267901160343895, - "grad_norm": 1.6052174509603416, - "learning_rate": 3.064036043906961e-07, - "loss": 0.8095, - "num_input_tokens_seen": 146560360, - "step": 6876 - }, - { - "epoch": 0.8269103589250285, - "grad_norm": 2.173445408744171, - "learning_rate": 3.059893867088668e-07, - "loss": 0.6718, - "num_input_tokens_seen": 146584225, - "step": 6877 - }, - { - "epoch": 0.8270306018156677, - "grad_norm": 2.170178064455718, - "learning_rate": 3.055754260071004e-07, - "loss": 0.6628, - "num_input_tokens_seen": 146606240, - "step": 6878 - }, - { - "epoch": 0.8271508447063067, - "grad_norm": 2.031145438682488, - "learning_rate": 3.051617223481948e-07, - "loss": 0.7335, - "num_input_tokens_seen": 146627280, - "step": 6879 - }, - { - "epoch": 0.8272710875969458, - "grad_norm": 2.6585038131449275, - "learning_rate": 3.0474827579490825e-07, - "loss": 0.7477, - "num_input_tokens_seen": 146644630, - "step": 6880 - }, - { - "epoch": 0.827391330487585, - "grad_norm": 1.9611839149006123, - "learning_rate": 3.043350864099605e-07, - "loss": 0.8422, - "num_input_tokens_seen": 146662910, - "step": 6881 - }, - { - "epoch": 0.827511573378224, - "grad_norm": 4.792019379462137, - "learning_rate": 3.039221542560315e-07, - "loss": 0.805, - "num_input_tokens_seen": 146679195, - "step": 6882 - }, - { - "epoch": 0.8276318162688631, - "grad_norm": 2.2181429408365365, - "learning_rate": 3.0350947939576356e-07, - "loss": 0.742, - "num_input_tokens_seen": 146698070, - "step": 6883 - }, - { - "epoch": 0.8277520591595022, - "grad_norm": 1.8573367472872755, - "learning_rate": 3.0309706189175876e-07, - "loss": 0.7212, - "num_input_tokens_seen": 146717625, - "step": 6884 - }, - { - "epoch": 0.8278723020501413, - "grad_norm": 0.8388458037637345, - "learning_rate": 3.0268490180658045e-07, - "loss": 0.5944, - "num_input_tokens_seen": 146780125, - "step": 6885 - }, - { - "epoch": 0.8279925449407803, - "grad_norm": 2.239293896097615, - "learning_rate": 3.0227299920275263e-07, - "loss": 0.7805, - "num_input_tokens_seen": 146796160, - "step": 6886 - }, - { - "epoch": 0.8281127878314195, - "grad_norm": 3.2239081546695596, - "learning_rate": 3.018613541427613e-07, - "loss": 0.8492, - "num_input_tokens_seen": 146815400, - "step": 6887 - }, - { - "epoch": 0.8282330307220586, - "grad_norm": 31.248229298434207, - "learning_rate": 3.0144996668905243e-07, - "loss": 0.7338, - "num_input_tokens_seen": 146832500, - "step": 6888 - }, - { - "epoch": 0.8283532736126976, - "grad_norm": 3.4196753874493604, - "learning_rate": 3.010388369040331e-07, - "loss": 0.8136, - "num_input_tokens_seen": 146850880, - "step": 6889 - }, - { - "epoch": 0.8284735165033368, - "grad_norm": 2.144420070344818, - "learning_rate": 3.006279648500709e-07, - "loss": 0.8215, - "num_input_tokens_seen": 146871540, - "step": 6890 - }, - { - "epoch": 0.8285937593939758, - "grad_norm": 3.698852984919603, - "learning_rate": 3.002173505894965e-07, - "loss": 0.6444, - "num_input_tokens_seen": 146890410, - "step": 6891 - }, - { - "epoch": 0.8287140022846149, - "grad_norm": 3.25734162111977, - "learning_rate": 2.9980699418459774e-07, - "loss": 0.6195, - "num_input_tokens_seen": 146909200, - "step": 6892 - }, - { - "epoch": 0.8288342451752541, - "grad_norm": 4.654758041637784, - "learning_rate": 2.993968956976263e-07, - "loss": 0.6173, - "num_input_tokens_seen": 146976665, - "step": 6893 - }, - { - "epoch": 0.8289544880658931, - "grad_norm": 3.3273314773725917, - "learning_rate": 2.9898705519079313e-07, - "loss": 0.6898, - "num_input_tokens_seen": 146995490, - "step": 6894 - }, - { - "epoch": 0.8290747309565322, - "grad_norm": 1.9948104495682495, - "learning_rate": 2.985774727262715e-07, - "loss": 0.7367, - "num_input_tokens_seen": 147014055, - "step": 6895 - }, - { - "epoch": 0.8291949738471713, - "grad_norm": 2.10140458208863, - "learning_rate": 2.981681483661949e-07, - "loss": 0.8057, - "num_input_tokens_seen": 147033360, - "step": 6896 - }, - { - "epoch": 0.8293152167378104, - "grad_norm": 1.6378127293717482, - "learning_rate": 2.9775908217265675e-07, - "loss": 0.7027, - "num_input_tokens_seen": 147058315, - "step": 6897 - }, - { - "epoch": 0.8294354596284494, - "grad_norm": 0.8402322842744447, - "learning_rate": 2.973502742077121e-07, - "loss": 0.524, - "num_input_tokens_seen": 147118370, - "step": 6898 - }, - { - "epoch": 0.8295557025190886, - "grad_norm": 1.9221327873232499, - "learning_rate": 2.969417245333774e-07, - "loss": 0.7092, - "num_input_tokens_seen": 147137470, - "step": 6899 - }, - { - "epoch": 0.8296759454097277, - "grad_norm": 4.40503988085388, - "learning_rate": 2.9653343321162915e-07, - "loss": 0.7789, - "num_input_tokens_seen": 147156700, - "step": 6900 - }, - { - "epoch": 0.8297961883003667, - "grad_norm": 2.268618319649715, - "learning_rate": 2.9612540030440446e-07, - "loss": 0.6381, - "num_input_tokens_seen": 147176965, - "step": 6901 - }, - { - "epoch": 0.8299164311910058, - "grad_norm": 0.8806013932452963, - "learning_rate": 2.957176258736016e-07, - "loss": 0.6473, - "num_input_tokens_seen": 147233070, - "step": 6902 - }, - { - "epoch": 0.8300366740816449, - "grad_norm": 1.7415016845063431, - "learning_rate": 2.953101099810802e-07, - "loss": 0.7363, - "num_input_tokens_seen": 147252395, - "step": 6903 - }, - { - "epoch": 0.830156916972284, - "grad_norm": 6.696914305963923, - "learning_rate": 2.9490285268865965e-07, - "loss": 0.8357, - "num_input_tokens_seen": 147269605, - "step": 6904 - }, - { - "epoch": 0.830277159862923, - "grad_norm": 2.3844055815975507, - "learning_rate": 2.9449585405812085e-07, - "loss": 0.7999, - "num_input_tokens_seen": 147286705, - "step": 6905 - }, - { - "epoch": 0.8303974027535622, - "grad_norm": 2.1866853018681685, - "learning_rate": 2.940891141512047e-07, - "loss": 0.7401, - "num_input_tokens_seen": 147304445, - "step": 6906 - }, - { - "epoch": 0.8305176456442013, - "grad_norm": 2.519838971183083, - "learning_rate": 2.9368263302961385e-07, - "loss": 0.7092, - "num_input_tokens_seen": 147322865, - "step": 6907 - }, - { - "epoch": 0.8306378885348403, - "grad_norm": 2.8178309203404464, - "learning_rate": 2.9327641075501075e-07, - "loss": 0.7944, - "num_input_tokens_seen": 147341575, - "step": 6908 - }, - { - "epoch": 0.8307581314254795, - "grad_norm": 3.795649059275563, - "learning_rate": 2.9287044738901913e-07, - "loss": 0.6518, - "num_input_tokens_seen": 147359280, - "step": 6909 - }, - { - "epoch": 0.8308783743161186, - "grad_norm": 9.297190464101524, - "learning_rate": 2.9246474299322274e-07, - "loss": 0.9062, - "num_input_tokens_seen": 147374560, - "step": 6910 - }, - { - "epoch": 0.8309986172067576, - "grad_norm": 0.9338561477490201, - "learning_rate": 2.920592976291678e-07, - "loss": 0.6629, - "num_input_tokens_seen": 147431610, - "step": 6911 - }, - { - "epoch": 0.8311188600973968, - "grad_norm": 2.24408649308667, - "learning_rate": 2.916541113583595e-07, - "loss": 0.8054, - "num_input_tokens_seen": 147449830, - "step": 6912 - }, - { - "epoch": 0.8312391029880358, - "grad_norm": 3.7041626483805925, - "learning_rate": 2.912491842422642e-07, - "loss": 0.6528, - "num_input_tokens_seen": 147467255, - "step": 6913 - }, - { - "epoch": 0.8313593458786749, - "grad_norm": 1.734805722474592, - "learning_rate": 2.9084451634230857e-07, - "loss": 0.6973, - "num_input_tokens_seen": 147486275, - "step": 6914 - }, - { - "epoch": 0.831479588769314, - "grad_norm": 2.259316725953428, - "learning_rate": 2.9044010771988125e-07, - "loss": 0.7114, - "num_input_tokens_seen": 147505810, - "step": 6915 - }, - { - "epoch": 0.8315998316599531, - "grad_norm": 1.9088973247391254, - "learning_rate": 2.900359584363303e-07, - "loss": 0.7218, - "num_input_tokens_seen": 147528635, - "step": 6916 - }, - { - "epoch": 0.8317200745505922, - "grad_norm": 2.821963192012367, - "learning_rate": 2.8963206855296494e-07, - "loss": 0.8317, - "num_input_tokens_seen": 147544595, - "step": 6917 - }, - { - "epoch": 0.8318403174412313, - "grad_norm": 1.9870771864690584, - "learning_rate": 2.8922843813105437e-07, - "loss": 0.7682, - "num_input_tokens_seen": 147565730, - "step": 6918 - }, - { - "epoch": 0.8319605603318704, - "grad_norm": 2.9016051919697614, - "learning_rate": 2.888250672318302e-07, - "loss": 0.707, - "num_input_tokens_seen": 147582850, - "step": 6919 - }, - { - "epoch": 0.8320808032225094, - "grad_norm": 4.27053055477984, - "learning_rate": 2.8842195591648243e-07, - "loss": 0.6867, - "num_input_tokens_seen": 147605715, - "step": 6920 - }, - { - "epoch": 0.8322010461131486, - "grad_norm": 2.0286754341757165, - "learning_rate": 2.880191042461635e-07, - "loss": 0.8067, - "num_input_tokens_seen": 147621375, - "step": 6921 - }, - { - "epoch": 0.8323212890037877, - "grad_norm": 2.398742455431569, - "learning_rate": 2.876165122819849e-07, - "loss": 0.7981, - "num_input_tokens_seen": 147639075, - "step": 6922 - }, - { - "epoch": 0.8324415318944267, - "grad_norm": 1.903549917531399, - "learning_rate": 2.872141800850201e-07, - "loss": 0.7856, - "num_input_tokens_seen": 147655970, - "step": 6923 - }, - { - "epoch": 0.8325617747850659, - "grad_norm": 1.8428676634987566, - "learning_rate": 2.868121077163024e-07, - "loss": 0.7261, - "num_input_tokens_seen": 147675245, - "step": 6924 - }, - { - "epoch": 0.8326820176757049, - "grad_norm": 1.8544383886565183, - "learning_rate": 2.864102952368257e-07, - "loss": 0.7153, - "num_input_tokens_seen": 147692890, - "step": 6925 - }, - { - "epoch": 0.832802260566344, - "grad_norm": 1.4953249974212524, - "learning_rate": 2.860087427075444e-07, - "loss": 0.5917, - "num_input_tokens_seen": 147716860, - "step": 6926 - }, - { - "epoch": 0.8329225034569832, - "grad_norm": 2.702330756249681, - "learning_rate": 2.856074501893744e-07, - "loss": 0.8583, - "num_input_tokens_seen": 147731780, - "step": 6927 - }, - { - "epoch": 0.8330427463476222, - "grad_norm": 1.8647738549899766, - "learning_rate": 2.8520641774319097e-07, - "loss": 0.8111, - "num_input_tokens_seen": 147749590, - "step": 6928 - }, - { - "epoch": 0.8331629892382613, - "grad_norm": 2.2931058763702103, - "learning_rate": 2.848056454298309e-07, - "loss": 0.7469, - "num_input_tokens_seen": 147766635, - "step": 6929 - }, - { - "epoch": 0.8332832321289004, - "grad_norm": 4.024077278275758, - "learning_rate": 2.844051333100901e-07, - "loss": 0.652, - "num_input_tokens_seen": 147783900, - "step": 6930 - }, - { - "epoch": 0.8334034750195395, - "grad_norm": 2.398267877039075, - "learning_rate": 2.840048814447269e-07, - "loss": 0.8361, - "num_input_tokens_seen": 147801785, - "step": 6931 - }, - { - "epoch": 0.8335237179101785, - "grad_norm": 2.6760525134882385, - "learning_rate": 2.836048898944587e-07, - "loss": 0.7302, - "num_input_tokens_seen": 147819930, - "step": 6932 - }, - { - "epoch": 0.8336439608008177, - "grad_norm": 3.639357120635123, - "learning_rate": 2.832051587199642e-07, - "loss": 0.7155, - "num_input_tokens_seen": 147836905, - "step": 6933 - }, - { - "epoch": 0.8337642036914568, - "grad_norm": 0.8003977797803753, - "learning_rate": 2.828056879818821e-07, - "loss": 0.6068, - "num_input_tokens_seen": 147895700, - "step": 6934 - }, - { - "epoch": 0.8338844465820958, - "grad_norm": 2.6400155919357906, - "learning_rate": 2.824064777408117e-07, - "loss": 0.8272, - "num_input_tokens_seen": 147915210, - "step": 6935 - }, - { - "epoch": 0.8340046894727349, - "grad_norm": 2.7805958423433967, - "learning_rate": 2.820075280573131e-07, - "loss": 0.7531, - "num_input_tokens_seen": 147937920, - "step": 6936 - }, - { - "epoch": 0.834124932363374, - "grad_norm": 2.284618252767419, - "learning_rate": 2.8160883899190667e-07, - "loss": 0.8012, - "num_input_tokens_seen": 147960910, - "step": 6937 - }, - { - "epoch": 0.8342451752540131, - "grad_norm": 2.429735895223449, - "learning_rate": 2.8121041060507234e-07, - "loss": 0.736, - "num_input_tokens_seen": 147979660, - "step": 6938 - }, - { - "epoch": 0.8343654181446521, - "grad_norm": 1.703920002955549, - "learning_rate": 2.808122429572528e-07, - "loss": 0.7079, - "num_input_tokens_seen": 147999585, - "step": 6939 - }, - { - "epoch": 0.8344856610352913, - "grad_norm": 11.686142348938322, - "learning_rate": 2.804143361088489e-07, - "loss": 0.7478, - "num_input_tokens_seen": 148018485, - "step": 6940 - }, - { - "epoch": 0.8346059039259304, - "grad_norm": 2.320912860524297, - "learning_rate": 2.800166901202232e-07, - "loss": 0.7622, - "num_input_tokens_seen": 148036175, - "step": 6941 - }, - { - "epoch": 0.8347261468165694, - "grad_norm": 1.823295875907801, - "learning_rate": 2.796193050516975e-07, - "loss": 0.6926, - "num_input_tokens_seen": 148060140, - "step": 6942 - }, - { - "epoch": 0.8348463897072086, - "grad_norm": 4.279533183587005, - "learning_rate": 2.792221809635558e-07, - "loss": 0.7553, - "num_input_tokens_seen": 148080490, - "step": 6943 - }, - { - "epoch": 0.8349666325978476, - "grad_norm": 2.5403554696079134, - "learning_rate": 2.788253179160411e-07, - "loss": 0.7446, - "num_input_tokens_seen": 148101370, - "step": 6944 - }, - { - "epoch": 0.8350868754884867, - "grad_norm": 2.103267811249552, - "learning_rate": 2.7842871596935725e-07, - "loss": 0.6489, - "num_input_tokens_seen": 148119605, - "step": 6945 - }, - { - "epoch": 0.8352071183791259, - "grad_norm": 1.7235735954889084, - "learning_rate": 2.780323751836677e-07, - "loss": 0.6837, - "num_input_tokens_seen": 148140540, - "step": 6946 - }, - { - "epoch": 0.8353273612697649, - "grad_norm": 1.8385923208278383, - "learning_rate": 2.776362956190983e-07, - "loss": 0.7853, - "num_input_tokens_seen": 148161090, - "step": 6947 - }, - { - "epoch": 0.835447604160404, - "grad_norm": 2.1937308038126324, - "learning_rate": 2.772404773357335e-07, - "loss": 0.7618, - "num_input_tokens_seen": 148180215, - "step": 6948 - }, - { - "epoch": 0.8355678470510431, - "grad_norm": 6.144809029585824, - "learning_rate": 2.7684492039361853e-07, - "loss": 0.7701, - "num_input_tokens_seen": 148199160, - "step": 6949 - }, - { - "epoch": 0.8356880899416822, - "grad_norm": 2.101532973335265, - "learning_rate": 2.76449624852759e-07, - "loss": 0.832, - "num_input_tokens_seen": 148217855, - "step": 6950 - }, - { - "epoch": 0.8358083328323213, - "grad_norm": 3.5760020255416882, - "learning_rate": 2.760545907731211e-07, - "loss": 0.776, - "num_input_tokens_seen": 148238150, - "step": 6951 - }, - { - "epoch": 0.8359285757229604, - "grad_norm": 2.441141431187933, - "learning_rate": 2.75659818214631e-07, - "loss": 0.6717, - "num_input_tokens_seen": 148258975, - "step": 6952 - }, - { - "epoch": 0.8360488186135995, - "grad_norm": 2.063119426133086, - "learning_rate": 2.7526530723717534e-07, - "loss": 0.7829, - "num_input_tokens_seen": 148278130, - "step": 6953 - }, - { - "epoch": 0.8361690615042385, - "grad_norm": 2.2532227426526137, - "learning_rate": 2.7487105790060105e-07, - "loss": 0.7393, - "num_input_tokens_seen": 148297260, - "step": 6954 - }, - { - "epoch": 0.8362893043948777, - "grad_norm": 2.2361013395692475, - "learning_rate": 2.7447707026471587e-07, - "loss": 0.6872, - "num_input_tokens_seen": 148319955, - "step": 6955 - }, - { - "epoch": 0.8364095472855168, - "grad_norm": 1.9918158716260648, - "learning_rate": 2.740833443892874e-07, - "loss": 0.7962, - "num_input_tokens_seen": 148337845, - "step": 6956 - }, - { - "epoch": 0.8365297901761558, - "grad_norm": 1.9507490717495841, - "learning_rate": 2.7368988033404327e-07, - "loss": 0.7852, - "num_input_tokens_seen": 148355080, - "step": 6957 - }, - { - "epoch": 0.836650033066795, - "grad_norm": 1.5961202174944367, - "learning_rate": 2.732966781586712e-07, - "loss": 0.8453, - "num_input_tokens_seen": 148374545, - "step": 6958 - }, - { - "epoch": 0.836770275957434, - "grad_norm": 1.9101106254508742, - "learning_rate": 2.729037379228205e-07, - "loss": 0.6685, - "num_input_tokens_seen": 148394450, - "step": 6959 - }, - { - "epoch": 0.8368905188480731, - "grad_norm": 1.7039840853833346, - "learning_rate": 2.725110596860998e-07, - "loss": 0.7996, - "num_input_tokens_seen": 148414850, - "step": 6960 - }, - { - "epoch": 0.8370107617387123, - "grad_norm": 2.502650459534539, - "learning_rate": 2.7211864350807776e-07, - "loss": 0.6962, - "num_input_tokens_seen": 148432770, - "step": 6961 - }, - { - "epoch": 0.8371310046293513, - "grad_norm": 2.026286214006468, - "learning_rate": 2.717264894482831e-07, - "loss": 0.7325, - "num_input_tokens_seen": 148452830, - "step": 6962 - }, - { - "epoch": 0.8372512475199904, - "grad_norm": 4.324551923176521, - "learning_rate": 2.7133459756620646e-07, - "loss": 0.8035, - "num_input_tokens_seen": 148469745, - "step": 6963 - }, - { - "epoch": 0.8373714904106295, - "grad_norm": 1.9730274315620218, - "learning_rate": 2.709429679212969e-07, - "loss": 0.7293, - "num_input_tokens_seen": 148489065, - "step": 6964 - }, - { - "epoch": 0.8374917333012686, - "grad_norm": 1.9750490967147278, - "learning_rate": 2.7055160057296424e-07, - "loss": 0.7488, - "num_input_tokens_seen": 148506025, - "step": 6965 - }, - { - "epoch": 0.8376119761919076, - "grad_norm": 2.0732451084698806, - "learning_rate": 2.7016049558057896e-07, - "loss": 0.7181, - "num_input_tokens_seen": 148527705, - "step": 6966 - }, - { - "epoch": 0.8377322190825467, - "grad_norm": 1.9453842065679938, - "learning_rate": 2.6976965300347074e-07, - "loss": 0.7105, - "num_input_tokens_seen": 148550035, - "step": 6967 - }, - { - "epoch": 0.8378524619731859, - "grad_norm": 3.396684710335882, - "learning_rate": 2.693790729009309e-07, - "loss": 0.688, - "num_input_tokens_seen": 148571365, - "step": 6968 - }, - { - "epoch": 0.8379727048638249, - "grad_norm": 4.80072991963761, - "learning_rate": 2.6898875533220946e-07, - "loss": 0.8736, - "num_input_tokens_seen": 148590390, - "step": 6969 - }, - { - "epoch": 0.838092947754464, - "grad_norm": 1.9100846336874302, - "learning_rate": 2.685987003565171e-07, - "loss": 0.8083, - "num_input_tokens_seen": 148608150, - "step": 6970 - }, - { - "epoch": 0.8382131906451031, - "grad_norm": 2.5960785425030175, - "learning_rate": 2.6820890803302566e-07, - "loss": 0.7613, - "num_input_tokens_seen": 148623395, - "step": 6971 - }, - { - "epoch": 0.8383334335357422, - "grad_norm": 2.146671274393012, - "learning_rate": 2.67819378420866e-07, - "loss": 0.8112, - "num_input_tokens_seen": 148641905, - "step": 6972 - }, - { - "epoch": 0.8384536764263812, - "grad_norm": 3.5919793867400522, - "learning_rate": 2.6743011157912933e-07, - "loss": 0.6808, - "num_input_tokens_seen": 148661345, - "step": 6973 - }, - { - "epoch": 0.8385739193170204, - "grad_norm": 2.3003579604372875, - "learning_rate": 2.6704110756686683e-07, - "loss": 0.6504, - "num_input_tokens_seen": 148681890, - "step": 6974 - }, - { - "epoch": 0.8386941622076595, - "grad_norm": 2.2560659493105275, - "learning_rate": 2.6665236644309085e-07, - "loss": 0.8339, - "num_input_tokens_seen": 148701920, - "step": 6975 - }, - { - "epoch": 0.8388144050982985, - "grad_norm": 2.183155857114762, - "learning_rate": 2.662638882667727e-07, - "loss": 0.7898, - "num_input_tokens_seen": 148720580, - "step": 6976 - }, - { - "epoch": 0.8389346479889377, - "grad_norm": 2.1207740955438683, - "learning_rate": 2.658756730968443e-07, - "loss": 0.721, - "num_input_tokens_seen": 148738765, - "step": 6977 - }, - { - "epoch": 0.8390548908795767, - "grad_norm": 2.620582790305174, - "learning_rate": 2.654877209921975e-07, - "loss": 0.8787, - "num_input_tokens_seen": 148756020, - "step": 6978 - }, - { - "epoch": 0.8391751337702158, - "grad_norm": 2.7957844633839644, - "learning_rate": 2.651000320116843e-07, - "loss": 0.6283, - "num_input_tokens_seen": 148776625, - "step": 6979 - }, - { - "epoch": 0.839295376660855, - "grad_norm": 2.2994686652372653, - "learning_rate": 2.647126062141167e-07, - "loss": 0.7538, - "num_input_tokens_seen": 148795420, - "step": 6980 - }, - { - "epoch": 0.839415619551494, - "grad_norm": 2.185893216462019, - "learning_rate": 2.643254436582674e-07, - "loss": 0.8266, - "num_input_tokens_seen": 148814630, - "step": 6981 - }, - { - "epoch": 0.8395358624421331, - "grad_norm": 2.1147213452862674, - "learning_rate": 2.6393854440286743e-07, - "loss": 0.8202, - "num_input_tokens_seen": 148833520, - "step": 6982 - }, - { - "epoch": 0.8396561053327722, - "grad_norm": 2.2342781966876424, - "learning_rate": 2.6355190850661045e-07, - "loss": 0.7008, - "num_input_tokens_seen": 148850075, - "step": 6983 - }, - { - "epoch": 0.8397763482234113, - "grad_norm": 2.4874857158277583, - "learning_rate": 2.631655360281486e-07, - "loss": 0.8566, - "num_input_tokens_seen": 148869470, - "step": 6984 - }, - { - "epoch": 0.8398965911140504, - "grad_norm": 2.7039312951341965, - "learning_rate": 2.6277942702609366e-07, - "loss": 0.6566, - "num_input_tokens_seen": 148888670, - "step": 6985 - }, - { - "epoch": 0.8400168340046895, - "grad_norm": 3.1963195216116365, - "learning_rate": 2.6239358155901816e-07, - "loss": 0.8676, - "num_input_tokens_seen": 148906770, - "step": 6986 - }, - { - "epoch": 0.8401370768953286, - "grad_norm": 3.1957382170888673, - "learning_rate": 2.6200799968545516e-07, - "loss": 0.8082, - "num_input_tokens_seen": 148926785, - "step": 6987 - }, - { - "epoch": 0.8402573197859676, - "grad_norm": 0.8141094580973, - "learning_rate": 2.616226814638969e-07, - "loss": 0.593, - "num_input_tokens_seen": 148991610, - "step": 6988 - }, - { - "epoch": 0.8403775626766068, - "grad_norm": 2.0061494102156794, - "learning_rate": 2.612376269527954e-07, - "loss": 0.7696, - "num_input_tokens_seen": 149011035, - "step": 6989 - }, - { - "epoch": 0.8404978055672458, - "grad_norm": 24.261070551092935, - "learning_rate": 2.608528362105631e-07, - "loss": 0.6775, - "num_input_tokens_seen": 149030125, - "step": 6990 - }, - { - "epoch": 0.8406180484578849, - "grad_norm": 2.46070628071618, - "learning_rate": 2.6046830929557327e-07, - "loss": 0.7262, - "num_input_tokens_seen": 149049495, - "step": 6991 - }, - { - "epoch": 0.8407382913485241, - "grad_norm": 2.344915412832524, - "learning_rate": 2.6008404626615776e-07, - "loss": 0.8426, - "num_input_tokens_seen": 149067715, - "step": 6992 - }, - { - "epoch": 0.8408585342391631, - "grad_norm": 2.754917772900143, - "learning_rate": 2.597000471806092e-07, - "loss": 0.7293, - "num_input_tokens_seen": 149084000, - "step": 6993 - }, - { - "epoch": 0.8409787771298022, - "grad_norm": 2.5364283389798, - "learning_rate": 2.5931631209717976e-07, - "loss": 0.7272, - "num_input_tokens_seen": 149102585, - "step": 6994 - }, - { - "epoch": 0.8410990200204413, - "grad_norm": 2.0493255185944728, - "learning_rate": 2.5893284107408165e-07, - "loss": 0.6816, - "num_input_tokens_seen": 149119675, - "step": 6995 - }, - { - "epoch": 0.8412192629110804, - "grad_norm": 2.2518907794419074, - "learning_rate": 2.5854963416948726e-07, - "loss": 0.7776, - "num_input_tokens_seen": 149141660, - "step": 6996 - }, - { - "epoch": 0.8413395058017195, - "grad_norm": 1.7050513470355861, - "learning_rate": 2.581666914415286e-07, - "loss": 0.6865, - "num_input_tokens_seen": 149162560, - "step": 6997 - }, - { - "epoch": 0.8414597486923585, - "grad_norm": 0.9404238787506684, - "learning_rate": 2.5778401294829777e-07, - "loss": 0.73, - "num_input_tokens_seen": 149221020, - "step": 6998 - }, - { - "epoch": 0.8415799915829977, - "grad_norm": 2.7058926233499103, - "learning_rate": 2.574015987478473e-07, - "loss": 0.644, - "num_input_tokens_seen": 149238870, - "step": 6999 - }, - { - "epoch": 0.8417002344736367, - "grad_norm": 7.603321120843654, - "learning_rate": 2.570194488981887e-07, - "loss": 0.8555, - "num_input_tokens_seen": 149255135, - "step": 7000 - }, - { - "epoch": 0.8418204773642758, - "grad_norm": 0.9231476941463659, - "learning_rate": 2.566375634572939e-07, - "loss": 0.6408, - "num_input_tokens_seen": 149315495, - "step": 7001 - }, - { - "epoch": 0.841940720254915, - "grad_norm": 2.2160526855752862, - "learning_rate": 2.562559424830943e-07, - "loss": 0.7522, - "num_input_tokens_seen": 149333175, - "step": 7002 - }, - { - "epoch": 0.842060963145554, - "grad_norm": 3.7530695055914203, - "learning_rate": 2.558745860334821e-07, - "loss": 0.6983, - "num_input_tokens_seen": 149350185, - "step": 7003 - }, - { - "epoch": 0.8421812060361931, - "grad_norm": 2.4648088736051403, - "learning_rate": 2.554934941663085e-07, - "loss": 0.8362, - "num_input_tokens_seen": 149367440, - "step": 7004 - }, - { - "epoch": 0.8423014489268322, - "grad_norm": 8.561751198289116, - "learning_rate": 2.5511266693938484e-07, - "loss": 0.7233, - "num_input_tokens_seen": 149385620, - "step": 7005 - }, - { - "epoch": 0.8424216918174713, - "grad_norm": 1.5788227851948324, - "learning_rate": 2.5473210441048176e-07, - "loss": 0.7632, - "num_input_tokens_seen": 149406835, - "step": 7006 - }, - { - "epoch": 0.8425419347081103, - "grad_norm": 2.11376861019725, - "learning_rate": 2.5435180663733113e-07, - "loss": 0.7667, - "num_input_tokens_seen": 149426855, - "step": 7007 - }, - { - "epoch": 0.8426621775987495, - "grad_norm": 2.8285857509830254, - "learning_rate": 2.539717736776237e-07, - "loss": 0.7099, - "num_input_tokens_seen": 149442800, - "step": 7008 - }, - { - "epoch": 0.8427824204893886, - "grad_norm": 3.4524010749935616, - "learning_rate": 2.535920055890097e-07, - "loss": 0.7529, - "num_input_tokens_seen": 149463815, - "step": 7009 - }, - { - "epoch": 0.8429026633800276, - "grad_norm": 2.4412854795045345, - "learning_rate": 2.5321250242910006e-07, - "loss": 0.6423, - "num_input_tokens_seen": 149481450, - "step": 7010 - }, - { - "epoch": 0.8430229062706668, - "grad_norm": 2.969574117544732, - "learning_rate": 2.5283326425546493e-07, - "loss": 0.8536, - "num_input_tokens_seen": 149500280, - "step": 7011 - }, - { - "epoch": 0.8431431491613058, - "grad_norm": 2.700931634059641, - "learning_rate": 2.5245429112563443e-07, - "loss": 0.6875, - "num_input_tokens_seen": 149520675, - "step": 7012 - }, - { - "epoch": 0.8432633920519449, - "grad_norm": 2.0078023355510495, - "learning_rate": 2.5207558309709865e-07, - "loss": 0.8188, - "num_input_tokens_seen": 149540130, - "step": 7013 - }, - { - "epoch": 0.8433836349425841, - "grad_norm": 0.7264068538051787, - "learning_rate": 2.516971402273065e-07, - "loss": 0.585, - "num_input_tokens_seen": 149605915, - "step": 7014 - }, - { - "epoch": 0.8435038778332231, - "grad_norm": 1.8956640360411938, - "learning_rate": 2.513189625736687e-07, - "loss": 0.6632, - "num_input_tokens_seen": 149622530, - "step": 7015 - }, - { - "epoch": 0.8436241207238622, - "grad_norm": 2.2422708545220043, - "learning_rate": 2.5094105019355385e-07, - "loss": 0.7011, - "num_input_tokens_seen": 149637885, - "step": 7016 - }, - { - "epoch": 0.8437443636145013, - "grad_norm": 3.5569036399022353, - "learning_rate": 2.5056340314429116e-07, - "loss": 0.749, - "num_input_tokens_seen": 149655070, - "step": 7017 - }, - { - "epoch": 0.8438646065051404, - "grad_norm": 2.364431627049494, - "learning_rate": 2.5018602148316857e-07, - "loss": 0.8016, - "num_input_tokens_seen": 149670825, - "step": 7018 - }, - { - "epoch": 0.8439848493957794, - "grad_norm": 1.7690261723177203, - "learning_rate": 2.498089052674359e-07, - "loss": 0.7918, - "num_input_tokens_seen": 149688520, - "step": 7019 - }, - { - "epoch": 0.8441050922864186, - "grad_norm": 2.140767648819049, - "learning_rate": 2.494320545543007e-07, - "loss": 0.7533, - "num_input_tokens_seen": 149707810, - "step": 7020 - }, - { - "epoch": 0.8442253351770577, - "grad_norm": 2.2221936890966374, - "learning_rate": 2.490554694009308e-07, - "loss": 0.6698, - "num_input_tokens_seen": 149728395, - "step": 7021 - }, - { - "epoch": 0.8443455780676967, - "grad_norm": 1.5737884257485262, - "learning_rate": 2.4867914986445426e-07, - "loss": 0.7841, - "num_input_tokens_seen": 149750505, - "step": 7022 - }, - { - "epoch": 0.8444658209583359, - "grad_norm": 3.302121640607582, - "learning_rate": 2.483030960019581e-07, - "loss": 0.7002, - "num_input_tokens_seen": 149774155, - "step": 7023 - }, - { - "epoch": 0.8445860638489749, - "grad_norm": 0.7568110950777192, - "learning_rate": 2.4792730787048956e-07, - "loss": 0.5684, - "num_input_tokens_seen": 149827240, - "step": 7024 - }, - { - "epoch": 0.844706306739614, - "grad_norm": 0.8060351232851412, - "learning_rate": 2.475517855270552e-07, - "loss": 0.6647, - "num_input_tokens_seen": 149887040, - "step": 7025 - }, - { - "epoch": 0.8448265496302532, - "grad_norm": 1.896761020819705, - "learning_rate": 2.4717652902862143e-07, - "loss": 0.7253, - "num_input_tokens_seen": 149905735, - "step": 7026 - }, - { - "epoch": 0.8449467925208922, - "grad_norm": 1.8544717123263836, - "learning_rate": 2.4680153843211495e-07, - "loss": 0.813, - "num_input_tokens_seen": 149925385, - "step": 7027 - }, - { - "epoch": 0.8450670354115313, - "grad_norm": 3.0420278636111466, - "learning_rate": 2.464268137944212e-07, - "loss": 0.7144, - "num_input_tokens_seen": 149946400, - "step": 7028 - }, - { - "epoch": 0.8451872783021703, - "grad_norm": 1.9624355658659327, - "learning_rate": 2.460523551723854e-07, - "loss": 0.7761, - "num_input_tokens_seen": 149964160, - "step": 7029 - }, - { - "epoch": 0.8453075211928095, - "grad_norm": 1.9029728300252038, - "learning_rate": 2.456781626228124e-07, - "loss": 0.7451, - "num_input_tokens_seen": 149983385, - "step": 7030 - }, - { - "epoch": 0.8454277640834486, - "grad_norm": 1.0699732595512892, - "learning_rate": 2.453042362024675e-07, - "loss": 0.7477, - "num_input_tokens_seen": 150036350, - "step": 7031 - }, - { - "epoch": 0.8455480069740876, - "grad_norm": 1.8955352264829122, - "learning_rate": 2.449305759680751e-07, - "loss": 0.728, - "num_input_tokens_seen": 150057395, - "step": 7032 - }, - { - "epoch": 0.8456682498647268, - "grad_norm": 1.8172100038432952, - "learning_rate": 2.445571819763188e-07, - "loss": 0.7454, - "num_input_tokens_seen": 150079415, - "step": 7033 - }, - { - "epoch": 0.8457884927553658, - "grad_norm": 2.699841256259373, - "learning_rate": 2.441840542838418e-07, - "loss": 0.5799, - "num_input_tokens_seen": 150099345, - "step": 7034 - }, - { - "epoch": 0.8459087356460049, - "grad_norm": 3.118410657928793, - "learning_rate": 2.4381119294724816e-07, - "loss": 0.7179, - "num_input_tokens_seen": 150116510, - "step": 7035 - }, - { - "epoch": 0.846028978536644, - "grad_norm": 3.316587881044612, - "learning_rate": 2.434385980231004e-07, - "loss": 0.546, - "num_input_tokens_seen": 150135070, - "step": 7036 - }, - { - "epoch": 0.8461492214272831, - "grad_norm": 1.8737851121679632, - "learning_rate": 2.4306626956792043e-07, - "loss": 0.6469, - "num_input_tokens_seen": 150159735, - "step": 7037 - }, - { - "epoch": 0.8462694643179222, - "grad_norm": 4.876373211076824, - "learning_rate": 2.426942076381906e-07, - "loss": 0.7547, - "num_input_tokens_seen": 150177500, - "step": 7038 - }, - { - "epoch": 0.8463897072085613, - "grad_norm": 4.263012352849394, - "learning_rate": 2.4232241229035223e-07, - "loss": 0.826, - "num_input_tokens_seen": 150194975, - "step": 7039 - }, - { - "epoch": 0.8465099500992004, - "grad_norm": 0.8406978610807823, - "learning_rate": 2.419508835808064e-07, - "loss": 0.5975, - "num_input_tokens_seen": 150251250, - "step": 7040 - }, - { - "epoch": 0.8466301929898394, - "grad_norm": 17.58985235175416, - "learning_rate": 2.415796215659136e-07, - "loss": 0.6236, - "num_input_tokens_seen": 150267675, - "step": 7041 - }, - { - "epoch": 0.8467504358804786, - "grad_norm": 5.951262392195484, - "learning_rate": 2.412086263019939e-07, - "loss": 0.7728, - "num_input_tokens_seen": 150285420, - "step": 7042 - }, - { - "epoch": 0.8468706787711177, - "grad_norm": 1.8287172526399715, - "learning_rate": 2.408378978453276e-07, - "loss": 0.797, - "num_input_tokens_seen": 150305260, - "step": 7043 - }, - { - "epoch": 0.8469909216617567, - "grad_norm": 0.8153187237949509, - "learning_rate": 2.404674362521533e-07, - "loss": 0.6629, - "num_input_tokens_seen": 150363475, - "step": 7044 - }, - { - "epoch": 0.8471111645523959, - "grad_norm": 2.6547907029001907, - "learning_rate": 2.4009724157866997e-07, - "loss": 0.7424, - "num_input_tokens_seen": 150380255, - "step": 7045 - }, - { - "epoch": 0.8472314074430349, - "grad_norm": 2.07571613263205, - "learning_rate": 2.3972731388103564e-07, - "loss": 0.7596, - "num_input_tokens_seen": 150398455, - "step": 7046 - }, - { - "epoch": 0.847351650333674, - "grad_norm": 0.8263367032748078, - "learning_rate": 2.393576532153683e-07, - "loss": 0.6476, - "num_input_tokens_seen": 150461960, - "step": 7047 - }, - { - "epoch": 0.8474718932243132, - "grad_norm": 0.9807950941046557, - "learning_rate": 2.389882596377453e-07, - "loss": 0.6267, - "num_input_tokens_seen": 150515945, - "step": 7048 - }, - { - "epoch": 0.8475921361149522, - "grad_norm": 1.9082715675593804, - "learning_rate": 2.386191332042031e-07, - "loss": 0.758, - "num_input_tokens_seen": 150537560, - "step": 7049 - }, - { - "epoch": 0.8477123790055913, - "grad_norm": 1.8911575917975172, - "learning_rate": 2.382502739707375e-07, - "loss": 0.7269, - "num_input_tokens_seen": 150557755, - "step": 7050 - }, - { - "epoch": 0.8478326218962304, - "grad_norm": 2.1422948224332776, - "learning_rate": 2.3788168199330515e-07, - "loss": 0.6628, - "num_input_tokens_seen": 150579035, - "step": 7051 - }, - { - "epoch": 0.8479528647868695, - "grad_norm": 2.3746617085126958, - "learning_rate": 2.375133573278205e-07, - "loss": 0.7253, - "num_input_tokens_seen": 150600015, - "step": 7052 - }, - { - "epoch": 0.8480731076775085, - "grad_norm": 2.2291697052645327, - "learning_rate": 2.371453000301582e-07, - "loss": 0.7887, - "num_input_tokens_seen": 150618420, - "step": 7053 - }, - { - "epoch": 0.8481933505681477, - "grad_norm": 1.8937414140644757, - "learning_rate": 2.3677751015615222e-07, - "loss": 0.7347, - "num_input_tokens_seen": 150640215, - "step": 7054 - }, - { - "epoch": 0.8483135934587868, - "grad_norm": 1.9748290387350074, - "learning_rate": 2.3640998776159593e-07, - "loss": 0.8482, - "num_input_tokens_seen": 150657440, - "step": 7055 - }, - { - "epoch": 0.8484338363494258, - "grad_norm": 1.741265989049003, - "learning_rate": 2.3604273290224253e-07, - "loss": 0.8049, - "num_input_tokens_seen": 150677875, - "step": 7056 - }, - { - "epoch": 0.848554079240065, - "grad_norm": 2.685245561921207, - "learning_rate": 2.356757456338039e-07, - "loss": 0.7406, - "num_input_tokens_seen": 150695080, - "step": 7057 - }, - { - "epoch": 0.848674322130704, - "grad_norm": 0.853456328368847, - "learning_rate": 2.3530902601195147e-07, - "loss": 0.6532, - "num_input_tokens_seen": 150763290, - "step": 7058 - }, - { - "epoch": 0.8487945650213431, - "grad_norm": 2.510092769480272, - "learning_rate": 2.34942574092317e-07, - "loss": 0.7783, - "num_input_tokens_seen": 150778260, - "step": 7059 - }, - { - "epoch": 0.8489148079119821, - "grad_norm": 4.460202404886829, - "learning_rate": 2.345763899304909e-07, - "loss": 0.7607, - "num_input_tokens_seen": 150795970, - "step": 7060 - }, - { - "epoch": 0.8490350508026213, - "grad_norm": 2.4355810019979374, - "learning_rate": 2.3421047358202252e-07, - "loss": 0.6425, - "num_input_tokens_seen": 150814540, - "step": 7061 - }, - { - "epoch": 0.8491552936932604, - "grad_norm": 3.0332326048770257, - "learning_rate": 2.33844825102421e-07, - "loss": 0.8206, - "num_input_tokens_seen": 150832120, - "step": 7062 - }, - { - "epoch": 0.8492755365838994, - "grad_norm": 3.188543223196614, - "learning_rate": 2.3347944454715575e-07, - "loss": 0.7682, - "num_input_tokens_seen": 150848230, - "step": 7063 - }, - { - "epoch": 0.8493957794745386, - "grad_norm": 2.3233737441554725, - "learning_rate": 2.331143319716542e-07, - "loss": 0.6634, - "num_input_tokens_seen": 150867480, - "step": 7064 - }, - { - "epoch": 0.8495160223651776, - "grad_norm": 4.462323509616469, - "learning_rate": 2.3274948743130363e-07, - "loss": 0.6467, - "num_input_tokens_seen": 150887035, - "step": 7065 - }, - { - "epoch": 0.8496362652558167, - "grad_norm": 1.6797241525802804, - "learning_rate": 2.3238491098145085e-07, - "loss": 0.7897, - "num_input_tokens_seen": 150906285, - "step": 7066 - }, - { - "epoch": 0.8497565081464559, - "grad_norm": 2.4242288837038406, - "learning_rate": 2.3202060267740141e-07, - "loss": 0.7303, - "num_input_tokens_seen": 150923530, - "step": 7067 - }, - { - "epoch": 0.8498767510370949, - "grad_norm": 2.4316967109254835, - "learning_rate": 2.316565625744209e-07, - "loss": 0.7703, - "num_input_tokens_seen": 150941770, - "step": 7068 - }, - { - "epoch": 0.849996993927734, - "grad_norm": 4.575553863754309, - "learning_rate": 2.31292790727734e-07, - "loss": 0.8961, - "num_input_tokens_seen": 150959055, - "step": 7069 - }, - { - "epoch": 0.8501172368183731, - "grad_norm": 2.842109170699587, - "learning_rate": 2.3092928719252392e-07, - "loss": 0.7945, - "num_input_tokens_seen": 150977175, - "step": 7070 - }, - { - "epoch": 0.8502374797090122, - "grad_norm": 2.0921256149427565, - "learning_rate": 2.3056605202393475e-07, - "loss": 0.7757, - "num_input_tokens_seen": 150994455, - "step": 7071 - }, - { - "epoch": 0.8503577225996513, - "grad_norm": 2.4569211708733656, - "learning_rate": 2.3020308527706888e-07, - "loss": 0.6662, - "num_input_tokens_seen": 151013590, - "step": 7072 - }, - { - "epoch": 0.8504779654902904, - "grad_norm": 1.872827926695392, - "learning_rate": 2.298403870069876e-07, - "loss": 0.8809, - "num_input_tokens_seen": 151032620, - "step": 7073 - }, - { - "epoch": 0.8505982083809295, - "grad_norm": 1.9564084635512284, - "learning_rate": 2.2947795726871177e-07, - "loss": 0.7853, - "num_input_tokens_seen": 151053365, - "step": 7074 - }, - { - "epoch": 0.8507184512715685, - "grad_norm": 1.7907204789757285, - "learning_rate": 2.2911579611722253e-07, - "loss": 0.848, - "num_input_tokens_seen": 151072230, - "step": 7075 - }, - { - "epoch": 0.8508386941622077, - "grad_norm": 1.9292736461567053, - "learning_rate": 2.2875390360745905e-07, - "loss": 0.8688, - "num_input_tokens_seen": 151091355, - "step": 7076 - }, - { - "epoch": 0.8509589370528468, - "grad_norm": 3.558941402147183, - "learning_rate": 2.2839227979432008e-07, - "loss": 0.7716, - "num_input_tokens_seen": 151108725, - "step": 7077 - }, - { - "epoch": 0.8510791799434858, - "grad_norm": 2.0825181234485863, - "learning_rate": 2.2803092473266328e-07, - "loss": 0.8398, - "num_input_tokens_seen": 151125970, - "step": 7078 - }, - { - "epoch": 0.851199422834125, - "grad_norm": 2.678802518549154, - "learning_rate": 2.2766983847730682e-07, - "loss": 0.8593, - "num_input_tokens_seen": 151145360, - "step": 7079 - }, - { - "epoch": 0.851319665724764, - "grad_norm": 2.1370111705562023, - "learning_rate": 2.2730902108302663e-07, - "loss": 0.6653, - "num_input_tokens_seen": 151161995, - "step": 7080 - }, - { - "epoch": 0.8514399086154031, - "grad_norm": 1.6101808947683822, - "learning_rate": 2.269484726045583e-07, - "loss": 0.6863, - "num_input_tokens_seen": 151180630, - "step": 7081 - }, - { - "epoch": 0.8515601515060423, - "grad_norm": 1.932912260384305, - "learning_rate": 2.2658819309659715e-07, - "loss": 0.7822, - "num_input_tokens_seen": 151200550, - "step": 7082 - }, - { - "epoch": 0.8516803943966813, - "grad_norm": 2.139634794704658, - "learning_rate": 2.2622818261379706e-07, - "loss": 0.8365, - "num_input_tokens_seen": 151217290, - "step": 7083 - }, - { - "epoch": 0.8518006372873204, - "grad_norm": 4.433274764354832, - "learning_rate": 2.2586844121077142e-07, - "loss": 0.7463, - "num_input_tokens_seen": 151235520, - "step": 7084 - }, - { - "epoch": 0.8519208801779595, - "grad_norm": 2.2363025787205655, - "learning_rate": 2.255089689420926e-07, - "loss": 0.7127, - "num_input_tokens_seen": 151254755, - "step": 7085 - }, - { - "epoch": 0.8520411230685986, - "grad_norm": 0.6980516998447036, - "learning_rate": 2.2514976586229184e-07, - "loss": 0.59, - "num_input_tokens_seen": 151322420, - "step": 7086 - }, - { - "epoch": 0.8521613659592376, - "grad_norm": 0.9274846236901506, - "learning_rate": 2.247908320258609e-07, - "loss": 0.6288, - "num_input_tokens_seen": 151382230, - "step": 7087 - }, - { - "epoch": 0.8522816088498768, - "grad_norm": 2.5354770140202394, - "learning_rate": 2.2443216748724914e-07, - "loss": 0.7875, - "num_input_tokens_seen": 151402660, - "step": 7088 - }, - { - "epoch": 0.8524018517405159, - "grad_norm": 2.3561222562200204, - "learning_rate": 2.2407377230086588e-07, - "loss": 0.7432, - "num_input_tokens_seen": 151424735, - "step": 7089 - }, - { - "epoch": 0.8525220946311549, - "grad_norm": 2.5433032758430936, - "learning_rate": 2.23715646521079e-07, - "loss": 0.8287, - "num_input_tokens_seen": 151441975, - "step": 7090 - }, - { - "epoch": 0.852642337521794, - "grad_norm": 2.3356782846759097, - "learning_rate": 2.233577902022168e-07, - "loss": 0.831, - "num_input_tokens_seen": 151458315, - "step": 7091 - }, - { - "epoch": 0.8527625804124331, - "grad_norm": 0.8857943101204383, - "learning_rate": 2.2300020339856497e-07, - "loss": 0.6239, - "num_input_tokens_seen": 151520720, - "step": 7092 - }, - { - "epoch": 0.8528828233030722, - "grad_norm": 3.4988663917784457, - "learning_rate": 2.2264288616436966e-07, - "loss": 0.7704, - "num_input_tokens_seen": 151540695, - "step": 7093 - }, - { - "epoch": 0.8530030661937112, - "grad_norm": 2.1399794375545587, - "learning_rate": 2.2228583855383464e-07, - "loss": 0.7326, - "num_input_tokens_seen": 151557215, - "step": 7094 - }, - { - "epoch": 0.8531233090843504, - "grad_norm": 1.8371527139517003, - "learning_rate": 2.2192906062112527e-07, - "loss": 0.6724, - "num_input_tokens_seen": 151576810, - "step": 7095 - }, - { - "epoch": 0.8532435519749895, - "grad_norm": 1.553842364359297, - "learning_rate": 2.2157255242036333e-07, - "loss": 0.7, - "num_input_tokens_seen": 151600195, - "step": 7096 - }, - { - "epoch": 0.8533637948656285, - "grad_norm": 2.1987024811894735, - "learning_rate": 2.2121631400563178e-07, - "loss": 0.7394, - "num_input_tokens_seen": 151619745, - "step": 7097 - }, - { - "epoch": 0.8534840377562677, - "grad_norm": 0.8445379874529323, - "learning_rate": 2.208603454309701e-07, - "loss": 0.6057, - "num_input_tokens_seen": 151677555, - "step": 7098 - }, - { - "epoch": 0.8536042806469067, - "grad_norm": 2.920393185815172, - "learning_rate": 2.2050464675037994e-07, - "loss": 0.7077, - "num_input_tokens_seen": 151695900, - "step": 7099 - }, - { - "epoch": 0.8537245235375458, - "grad_norm": 2.7386850105403147, - "learning_rate": 2.2014921801782016e-07, - "loss": 0.7269, - "num_input_tokens_seen": 151715110, - "step": 7100 - }, - { - "epoch": 0.853844766428185, - "grad_norm": 2.1101372242820666, - "learning_rate": 2.1979405928720872e-07, - "loss": 0.732, - "num_input_tokens_seen": 151734485, - "step": 7101 - }, - { - "epoch": 0.853965009318824, - "grad_norm": 2.6447096604926554, - "learning_rate": 2.1943917061242257e-07, - "loss": 0.7855, - "num_input_tokens_seen": 151754060, - "step": 7102 - }, - { - "epoch": 0.8540852522094631, - "grad_norm": 2.191214286158684, - "learning_rate": 2.1908455204729903e-07, - "loss": 0.6596, - "num_input_tokens_seen": 151772930, - "step": 7103 - }, - { - "epoch": 0.8542054951001022, - "grad_norm": 2.096211094482049, - "learning_rate": 2.187302036456331e-07, - "loss": 0.7732, - "num_input_tokens_seen": 151791715, - "step": 7104 - }, - { - "epoch": 0.8543257379907413, - "grad_norm": 3.504360039067773, - "learning_rate": 2.183761254611789e-07, - "loss": 0.7576, - "num_input_tokens_seen": 151811760, - "step": 7105 - }, - { - "epoch": 0.8544459808813804, - "grad_norm": 2.0663234275142783, - "learning_rate": 2.1802231754764945e-07, - "loss": 0.7039, - "num_input_tokens_seen": 151836920, - "step": 7106 - }, - { - "epoch": 0.8545662237720195, - "grad_norm": 2.0362714465032403, - "learning_rate": 2.17668779958718e-07, - "loss": 0.7565, - "num_input_tokens_seen": 151859220, - "step": 7107 - }, - { - "epoch": 0.8546864666626586, - "grad_norm": 2.362050105227234, - "learning_rate": 2.1731551274801553e-07, - "loss": 0.7973, - "num_input_tokens_seen": 151875380, - "step": 7108 - }, - { - "epoch": 0.8548067095532976, - "grad_norm": 2.0477178248243044, - "learning_rate": 2.169625159691324e-07, - "loss": 0.604, - "num_input_tokens_seen": 151894975, - "step": 7109 - }, - { - "epoch": 0.8549269524439368, - "grad_norm": 2.8749073584890485, - "learning_rate": 2.1660978967561784e-07, - "loss": 0.7452, - "num_input_tokens_seen": 151914030, - "step": 7110 - }, - { - "epoch": 0.8550471953345758, - "grad_norm": 2.6451303424907833, - "learning_rate": 2.1625733392098035e-07, - "loss": 0.7835, - "num_input_tokens_seen": 151929360, - "step": 7111 - }, - { - "epoch": 0.8551674382252149, - "grad_norm": 2.225476234905436, - "learning_rate": 2.1590514875868692e-07, - "loss": 0.7907, - "num_input_tokens_seen": 151949210, - "step": 7112 - }, - { - "epoch": 0.8552876811158541, - "grad_norm": 2.924170868868411, - "learning_rate": 2.155532342421642e-07, - "loss": 0.7225, - "num_input_tokens_seen": 151966930, - "step": 7113 - }, - { - "epoch": 0.8554079240064931, - "grad_norm": 2.0468407863242066, - "learning_rate": 2.1520159042479636e-07, - "loss": 0.7794, - "num_input_tokens_seen": 151984940, - "step": 7114 - }, - { - "epoch": 0.8555281668971322, - "grad_norm": 2.6096756245830206, - "learning_rate": 2.148502173599287e-07, - "loss": 0.7035, - "num_input_tokens_seen": 152002800, - "step": 7115 - }, - { - "epoch": 0.8556484097877713, - "grad_norm": 4.887049003488496, - "learning_rate": 2.1449911510086372e-07, - "loss": 0.6491, - "num_input_tokens_seen": 152021990, - "step": 7116 - }, - { - "epoch": 0.8557686526784104, - "grad_norm": 2.0839366022197363, - "learning_rate": 2.1414828370086324e-07, - "loss": 0.7665, - "num_input_tokens_seen": 152042250, - "step": 7117 - }, - { - "epoch": 0.8558888955690495, - "grad_norm": 1.831528610812624, - "learning_rate": 2.1379772321314782e-07, - "loss": 0.7146, - "num_input_tokens_seen": 152060015, - "step": 7118 - }, - { - "epoch": 0.8560091384596886, - "grad_norm": 2.3503315951839143, - "learning_rate": 2.1344743369089802e-07, - "loss": 0.8155, - "num_input_tokens_seen": 152075515, - "step": 7119 - }, - { - "epoch": 0.8561293813503277, - "grad_norm": 2.050192104853889, - "learning_rate": 2.130974151872522e-07, - "loss": 0.8123, - "num_input_tokens_seen": 152095570, - "step": 7120 - }, - { - "epoch": 0.8562496242409667, - "grad_norm": 2.102795715196339, - "learning_rate": 2.1274766775530773e-07, - "loss": 0.7822, - "num_input_tokens_seen": 152115155, - "step": 7121 - }, - { - "epoch": 0.8563698671316058, - "grad_norm": 1.9510248557037422, - "learning_rate": 2.1239819144812056e-07, - "loss": 0.7877, - "num_input_tokens_seen": 152129335, - "step": 7122 - }, - { - "epoch": 0.856490110022245, - "grad_norm": 1.9979452963810218, - "learning_rate": 2.120489863187067e-07, - "loss": 0.6888, - "num_input_tokens_seen": 152153945, - "step": 7123 - }, - { - "epoch": 0.856610352912884, - "grad_norm": 1.9357833165382563, - "learning_rate": 2.1170005242004006e-07, - "loss": 0.7623, - "num_input_tokens_seen": 152175015, - "step": 7124 - }, - { - "epoch": 0.8567305958035231, - "grad_norm": 2.386084644553693, - "learning_rate": 2.1135138980505384e-07, - "loss": 0.7753, - "num_input_tokens_seen": 152195405, - "step": 7125 - }, - { - "epoch": 0.8568508386941622, - "grad_norm": 1.8508099347400397, - "learning_rate": 2.110029985266395e-07, - "loss": 0.7262, - "num_input_tokens_seen": 152214830, - "step": 7126 - }, - { - "epoch": 0.8569710815848013, - "grad_norm": 1.747903818307868, - "learning_rate": 2.1065487863764787e-07, - "loss": 0.7275, - "num_input_tokens_seen": 152232895, - "step": 7127 - }, - { - "epoch": 0.8570913244754403, - "grad_norm": 1.6592736947031055, - "learning_rate": 2.1030703019088846e-07, - "loss": 0.8503, - "num_input_tokens_seen": 152253245, - "step": 7128 - }, - { - "epoch": 0.8572115673660795, - "grad_norm": 2.2555919744515083, - "learning_rate": 2.0995945323912956e-07, - "loss": 0.7051, - "num_input_tokens_seen": 152271650, - "step": 7129 - }, - { - "epoch": 0.8573318102567186, - "grad_norm": 1.6952760273826992, - "learning_rate": 2.0961214783509806e-07, - "loss": 0.78, - "num_input_tokens_seen": 152294250, - "step": 7130 - }, - { - "epoch": 0.8574520531473576, - "grad_norm": 3.3150091970414177, - "learning_rate": 2.0926511403148051e-07, - "loss": 0.7409, - "num_input_tokens_seen": 152312935, - "step": 7131 - }, - { - "epoch": 0.8575722960379968, - "grad_norm": 2.320398321897453, - "learning_rate": 2.0891835188092143e-07, - "loss": 0.7567, - "num_input_tokens_seen": 152329655, - "step": 7132 - }, - { - "epoch": 0.8576925389286358, - "grad_norm": 1.908287808946306, - "learning_rate": 2.0857186143602434e-07, - "loss": 0.8096, - "num_input_tokens_seen": 152348020, - "step": 7133 - }, - { - "epoch": 0.8578127818192749, - "grad_norm": 2.0352865886651688, - "learning_rate": 2.0822564274935094e-07, - "loss": 0.6746, - "num_input_tokens_seen": 152367165, - "step": 7134 - }, - { - "epoch": 0.8579330247099141, - "grad_norm": 2.9696702151476138, - "learning_rate": 2.0787969587342346e-07, - "loss": 0.6672, - "num_input_tokens_seen": 152389605, - "step": 7135 - }, - { - "epoch": 0.8580532676005531, - "grad_norm": 2.166406478957432, - "learning_rate": 2.0753402086072124e-07, - "loss": 0.739, - "num_input_tokens_seen": 152407955, - "step": 7136 - }, - { - "epoch": 0.8581735104911922, - "grad_norm": 2.709845163384798, - "learning_rate": 2.071886177636828e-07, - "loss": 0.7557, - "num_input_tokens_seen": 152424460, - "step": 7137 - }, - { - "epoch": 0.8582937533818313, - "grad_norm": 2.033111600699077, - "learning_rate": 2.068434866347053e-07, - "loss": 0.8177, - "num_input_tokens_seen": 152444360, - "step": 7138 - }, - { - "epoch": 0.8584139962724704, - "grad_norm": 2.0407494257726317, - "learning_rate": 2.0649862752614555e-07, - "loss": 0.6129, - "num_input_tokens_seen": 152462790, - "step": 7139 - }, - { - "epoch": 0.8585342391631094, - "grad_norm": 0.8049465740917714, - "learning_rate": 2.0615404049031838e-07, - "loss": 0.5945, - "num_input_tokens_seen": 152519480, - "step": 7140 - }, - { - "epoch": 0.8586544820537486, - "grad_norm": 3.2630454824190163, - "learning_rate": 2.058097255794966e-07, - "loss": 0.7785, - "num_input_tokens_seen": 152534290, - "step": 7141 - }, - { - "epoch": 0.8587747249443877, - "grad_norm": 0.8115580668202989, - "learning_rate": 2.054656828459125e-07, - "loss": 0.5502, - "num_input_tokens_seen": 152598120, - "step": 7142 - }, - { - "epoch": 0.8588949678350267, - "grad_norm": 4.92193482901055, - "learning_rate": 2.051219123417578e-07, - "loss": 0.7671, - "num_input_tokens_seen": 152617900, - "step": 7143 - }, - { - "epoch": 0.8590152107256659, - "grad_norm": 2.9717207988956065, - "learning_rate": 2.0477841411918196e-07, - "loss": 0.5957, - "num_input_tokens_seen": 152637145, - "step": 7144 - }, - { - "epoch": 0.859135453616305, - "grad_norm": 2.216565312955087, - "learning_rate": 2.0443518823029326e-07, - "loss": 0.7448, - "num_input_tokens_seen": 152657405, - "step": 7145 - }, - { - "epoch": 0.859255696506944, - "grad_norm": 2.791394646109027, - "learning_rate": 2.0409223472715854e-07, - "loss": 0.7626, - "num_input_tokens_seen": 152674270, - "step": 7146 - }, - { - "epoch": 0.8593759393975832, - "grad_norm": 1.9852725640866236, - "learning_rate": 2.0374955366180434e-07, - "loss": 0.7413, - "num_input_tokens_seen": 152691630, - "step": 7147 - }, - { - "epoch": 0.8594961822882222, - "grad_norm": 1.8941941624099565, - "learning_rate": 2.034071450862147e-07, - "loss": 0.7219, - "num_input_tokens_seen": 152708820, - "step": 7148 - }, - { - "epoch": 0.8596164251788613, - "grad_norm": 2.006034916351908, - "learning_rate": 2.030650090523327e-07, - "loss": 0.7632, - "num_input_tokens_seen": 152727730, - "step": 7149 - }, - { - "epoch": 0.8597366680695004, - "grad_norm": 2.1913441206873965, - "learning_rate": 2.027231456120595e-07, - "loss": 0.5931, - "num_input_tokens_seen": 152747845, - "step": 7150 - }, - { - "epoch": 0.8598569109601395, - "grad_norm": 1.8126852530663988, - "learning_rate": 2.023815548172567e-07, - "loss": 0.7231, - "num_input_tokens_seen": 152767635, - "step": 7151 - }, - { - "epoch": 0.8599771538507786, - "grad_norm": 2.3543047215394055, - "learning_rate": 2.0204023671974267e-07, - "loss": 0.6587, - "num_input_tokens_seen": 152786740, - "step": 7152 - }, - { - "epoch": 0.8600973967414177, - "grad_norm": 2.266877085644117, - "learning_rate": 2.0169919137129532e-07, - "loss": 0.8086, - "num_input_tokens_seen": 152804900, - "step": 7153 - }, - { - "epoch": 0.8602176396320568, - "grad_norm": 2.24249984969273, - "learning_rate": 2.013584188236508e-07, - "loss": 0.6874, - "num_input_tokens_seen": 152822525, - "step": 7154 - }, - { - "epoch": 0.8603378825226958, - "grad_norm": 2.1326073804878236, - "learning_rate": 2.0101791912850396e-07, - "loss": 0.7917, - "num_input_tokens_seen": 152841785, - "step": 7155 - }, - { - "epoch": 0.8604581254133349, - "grad_norm": 1.933624357412785, - "learning_rate": 2.0067769233750842e-07, - "loss": 0.6266, - "num_input_tokens_seen": 152863160, - "step": 7156 - }, - { - "epoch": 0.860578368303974, - "grad_norm": 2.383261236466245, - "learning_rate": 2.003377385022764e-07, - "loss": 0.6947, - "num_input_tokens_seen": 152881705, - "step": 7157 - }, - { - "epoch": 0.8606986111946131, - "grad_norm": 2.2464755370106944, - "learning_rate": 1.9999805767437826e-07, - "loss": 0.7629, - "num_input_tokens_seen": 152900315, - "step": 7158 - }, - { - "epoch": 0.8608188540852522, - "grad_norm": 1.9152654052759959, - "learning_rate": 1.9965864990534386e-07, - "loss": 0.7124, - "num_input_tokens_seen": 152920560, - "step": 7159 - }, - { - "epoch": 0.8609390969758913, - "grad_norm": 2.0099014811244, - "learning_rate": 1.9931951524666092e-07, - "loss": 0.7726, - "num_input_tokens_seen": 152941370, - "step": 7160 - }, - { - "epoch": 0.8610593398665304, - "grad_norm": 1.730678366576472, - "learning_rate": 1.989806537497758e-07, - "loss": 0.8021, - "num_input_tokens_seen": 152961295, - "step": 7161 - }, - { - "epoch": 0.8611795827571694, - "grad_norm": 2.3178195240729074, - "learning_rate": 1.9864206546609297e-07, - "loss": 0.7217, - "num_input_tokens_seen": 152979855, - "step": 7162 - }, - { - "epoch": 0.8612998256478086, - "grad_norm": 2.0057397641487835, - "learning_rate": 1.983037504469771e-07, - "loss": 0.8386, - "num_input_tokens_seen": 152998285, - "step": 7163 - }, - { - "epoch": 0.8614200685384477, - "grad_norm": 2.4475923873306864, - "learning_rate": 1.9796570874374984e-07, - "loss": 0.6637, - "num_input_tokens_seen": 153018110, - "step": 7164 - }, - { - "epoch": 0.8615403114290867, - "grad_norm": 2.3754437351453275, - "learning_rate": 1.976279404076917e-07, - "loss": 0.7671, - "num_input_tokens_seen": 153037230, - "step": 7165 - }, - { - "epoch": 0.8616605543197259, - "grad_norm": 3.32312976187681, - "learning_rate": 1.9729044549004148e-07, - "loss": 0.7548, - "num_input_tokens_seen": 153058335, - "step": 7166 - }, - { - "epoch": 0.8617807972103649, - "grad_norm": 2.1710292128460456, - "learning_rate": 1.9695322404199798e-07, - "loss": 0.6986, - "num_input_tokens_seen": 153080100, - "step": 7167 - }, - { - "epoch": 0.861901040101004, - "grad_norm": 2.0820525311060445, - "learning_rate": 1.9661627611471654e-07, - "loss": 0.8197, - "num_input_tokens_seen": 153099615, - "step": 7168 - }, - { - "epoch": 0.8620212829916432, - "grad_norm": 2.05609631521287, - "learning_rate": 1.9627960175931246e-07, - "loss": 0.7029, - "num_input_tokens_seen": 153124035, - "step": 7169 - }, - { - "epoch": 0.8621415258822822, - "grad_norm": 2.926873636571819, - "learning_rate": 1.9594320102685847e-07, - "loss": 0.7405, - "num_input_tokens_seen": 153143025, - "step": 7170 - }, - { - "epoch": 0.8622617687729213, - "grad_norm": 2.5005060277344726, - "learning_rate": 1.956070739683864e-07, - "loss": 0.6348, - "num_input_tokens_seen": 153162080, - "step": 7171 - }, - { - "epoch": 0.8623820116635604, - "grad_norm": 1.819274772706039, - "learning_rate": 1.9527122063488678e-07, - "loss": 0.7375, - "num_input_tokens_seen": 153182915, - "step": 7172 - }, - { - "epoch": 0.8625022545541995, - "grad_norm": 1.8947983161705129, - "learning_rate": 1.9493564107730797e-07, - "loss": 0.7972, - "num_input_tokens_seen": 153202635, - "step": 7173 - }, - { - "epoch": 0.8626224974448385, - "grad_norm": 2.3719586764524063, - "learning_rate": 1.9460033534655684e-07, - "loss": 0.6112, - "num_input_tokens_seen": 153221715, - "step": 7174 - }, - { - "epoch": 0.8627427403354777, - "grad_norm": 3.153760040971219, - "learning_rate": 1.9426530349349978e-07, - "loss": 0.8304, - "num_input_tokens_seen": 153241885, - "step": 7175 - }, - { - "epoch": 0.8628629832261168, - "grad_norm": 2.0797077003325457, - "learning_rate": 1.9393054556896038e-07, - "loss": 0.6505, - "num_input_tokens_seen": 153259305, - "step": 7176 - }, - { - "epoch": 0.8629832261167558, - "grad_norm": 2.473491592330941, - "learning_rate": 1.9359606162372133e-07, - "loss": 0.6877, - "num_input_tokens_seen": 153280630, - "step": 7177 - }, - { - "epoch": 0.863103469007395, - "grad_norm": 2.0471905583743077, - "learning_rate": 1.9326185170852293e-07, - "loss": 0.7069, - "num_input_tokens_seen": 153299315, - "step": 7178 - }, - { - "epoch": 0.863223711898034, - "grad_norm": 2.7662760660244743, - "learning_rate": 1.9292791587406553e-07, - "loss": 0.712, - "num_input_tokens_seen": 153317895, - "step": 7179 - }, - { - "epoch": 0.8633439547886731, - "grad_norm": 2.234619315226899, - "learning_rate": 1.9259425417100661e-07, - "loss": 0.8585, - "num_input_tokens_seen": 153333730, - "step": 7180 - }, - { - "epoch": 0.8634641976793123, - "grad_norm": 3.7684167119657914, - "learning_rate": 1.9226086664996234e-07, - "loss": 0.7386, - "num_input_tokens_seen": 153351695, - "step": 7181 - }, - { - "epoch": 0.8635844405699513, - "grad_norm": 2.3184627211060347, - "learning_rate": 1.9192775336150667e-07, - "loss": 0.7411, - "num_input_tokens_seen": 153371715, - "step": 7182 - }, - { - "epoch": 0.8637046834605904, - "grad_norm": 0.7894201711477891, - "learning_rate": 1.9159491435617415e-07, - "loss": 0.5742, - "num_input_tokens_seen": 153426110, - "step": 7183 - }, - { - "epoch": 0.8638249263512295, - "grad_norm": 1.9464572640896318, - "learning_rate": 1.9126234968445498e-07, - "loss": 0.7678, - "num_input_tokens_seen": 153445520, - "step": 7184 - }, - { - "epoch": 0.8639451692418686, - "grad_norm": 2.533004740365005, - "learning_rate": 1.9093005939679907e-07, - "loss": 0.6631, - "num_input_tokens_seen": 153467195, - "step": 7185 - }, - { - "epoch": 0.8640654121325076, - "grad_norm": 2.101528304251122, - "learning_rate": 1.9059804354361452e-07, - "loss": 0.7585, - "num_input_tokens_seen": 153484690, - "step": 7186 - }, - { - "epoch": 0.8641856550231467, - "grad_norm": 2.021698217452282, - "learning_rate": 1.902663021752684e-07, - "loss": 0.7045, - "num_input_tokens_seen": 153505840, - "step": 7187 - }, - { - "epoch": 0.8643058979137859, - "grad_norm": 2.5000420485830075, - "learning_rate": 1.8993483534208556e-07, - "loss": 0.811, - "num_input_tokens_seen": 153524470, - "step": 7188 - }, - { - "epoch": 0.8644261408044249, - "grad_norm": 2.6667501661910396, - "learning_rate": 1.8960364309434884e-07, - "loss": 0.7369, - "num_input_tokens_seen": 153541685, - "step": 7189 - }, - { - "epoch": 0.864546383695064, - "grad_norm": 1.8983065177083056, - "learning_rate": 1.8927272548229967e-07, - "loss": 0.7805, - "num_input_tokens_seen": 153561095, - "step": 7190 - }, - { - "epoch": 0.8646666265857031, - "grad_norm": 1.7037925639123406, - "learning_rate": 1.8894208255613876e-07, - "loss": 0.8205, - "num_input_tokens_seen": 153580130, - "step": 7191 - }, - { - "epoch": 0.8647868694763422, - "grad_norm": 4.952810886922987, - "learning_rate": 1.8861171436602397e-07, - "loss": 0.7662, - "num_input_tokens_seen": 153596965, - "step": 7192 - }, - { - "epoch": 0.8649071123669813, - "grad_norm": 2.6416261396699854, - "learning_rate": 1.882816209620719e-07, - "loss": 0.8028, - "num_input_tokens_seen": 153613395, - "step": 7193 - }, - { - "epoch": 0.8650273552576204, - "grad_norm": 1.861466738641778, - "learning_rate": 1.8795180239435693e-07, - "loss": 0.7631, - "num_input_tokens_seen": 153631970, - "step": 7194 - }, - { - "epoch": 0.8651475981482595, - "grad_norm": 2.9463203968605125, - "learning_rate": 1.8762225871291348e-07, - "loss": 0.7543, - "num_input_tokens_seen": 153647565, - "step": 7195 - }, - { - "epoch": 0.8652678410388985, - "grad_norm": 1.8297474270851621, - "learning_rate": 1.8729298996773201e-07, - "loss": 0.7981, - "num_input_tokens_seen": 153666035, - "step": 7196 - }, - { - "epoch": 0.8653880839295377, - "grad_norm": 0.8650190380246979, - "learning_rate": 1.8696399620876301e-07, - "loss": 0.6366, - "num_input_tokens_seen": 153722785, - "step": 7197 - }, - { - "epoch": 0.8655083268201768, - "grad_norm": 2.7651777171582173, - "learning_rate": 1.866352774859141e-07, - "loss": 0.7877, - "num_input_tokens_seen": 153737730, - "step": 7198 - }, - { - "epoch": 0.8656285697108158, - "grad_norm": 3.871772522618278, - "learning_rate": 1.8630683384905188e-07, - "loss": 0.6964, - "num_input_tokens_seen": 153756780, - "step": 7199 - }, - { - "epoch": 0.865748812601455, - "grad_norm": 2.0663543380812, - "learning_rate": 1.859786653480009e-07, - "loss": 0.8864, - "num_input_tokens_seen": 153771615, - "step": 7200 - }, - { - "epoch": 0.865869055492094, - "grad_norm": 2.0056519617050177, - "learning_rate": 1.8565077203254398e-07, - "loss": 0.7388, - "num_input_tokens_seen": 153796795, - "step": 7201 - }, - { - "epoch": 0.8659892983827331, - "grad_norm": 3.8256225415782117, - "learning_rate": 1.8532315395242203e-07, - "loss": 0.7218, - "num_input_tokens_seen": 153812965, - "step": 7202 - }, - { - "epoch": 0.8661095412733723, - "grad_norm": 2.364187639422691, - "learning_rate": 1.849958111573353e-07, - "loss": 0.7145, - "num_input_tokens_seen": 153831290, - "step": 7203 - }, - { - "epoch": 0.8662297841640113, - "grad_norm": 1.9510397215728377, - "learning_rate": 1.8466874369694074e-07, - "loss": 0.6328, - "num_input_tokens_seen": 153848705, - "step": 7204 - }, - { - "epoch": 0.8663500270546504, - "grad_norm": 2.417359695290982, - "learning_rate": 1.8434195162085443e-07, - "loss": 0.7007, - "num_input_tokens_seen": 153865350, - "step": 7205 - }, - { - "epoch": 0.8664702699452895, - "grad_norm": 3.1851356552454932, - "learning_rate": 1.8401543497865023e-07, - "loss": 0.7861, - "num_input_tokens_seen": 153883070, - "step": 7206 - }, - { - "epoch": 0.8665905128359286, - "grad_norm": 12.605545248843363, - "learning_rate": 1.836891938198608e-07, - "loss": 0.6415, - "num_input_tokens_seen": 153903215, - "step": 7207 - }, - { - "epoch": 0.8667107557265676, - "grad_norm": 3.5488315507197026, - "learning_rate": 1.8336322819397677e-07, - "loss": 0.7152, - "num_input_tokens_seen": 153920470, - "step": 7208 - }, - { - "epoch": 0.8668309986172068, - "grad_norm": 2.4241916386651905, - "learning_rate": 1.8303753815044654e-07, - "loss": 0.6199, - "num_input_tokens_seen": 153939495, - "step": 7209 - }, - { - "epoch": 0.8669512415078459, - "grad_norm": 2.429468190799404, - "learning_rate": 1.8271212373867684e-07, - "loss": 0.7005, - "num_input_tokens_seen": 153956660, - "step": 7210 - }, - { - "epoch": 0.8670714843984849, - "grad_norm": 3.7225790900409974, - "learning_rate": 1.823869850080333e-07, - "loss": 0.7489, - "num_input_tokens_seen": 153969145, - "step": 7211 - }, - { - "epoch": 0.8671917272891241, - "grad_norm": 0.8766079965587327, - "learning_rate": 1.820621220078391e-07, - "loss": 0.6246, - "num_input_tokens_seen": 154032775, - "step": 7212 - }, - { - "epoch": 0.8673119701797631, - "grad_norm": 2.625262119250147, - "learning_rate": 1.8173753478737553e-07, - "loss": 0.669, - "num_input_tokens_seen": 154052930, - "step": 7213 - }, - { - "epoch": 0.8674322130704022, - "grad_norm": 3.478789691036733, - "learning_rate": 1.8141322339588205e-07, - "loss": 0.7898, - "num_input_tokens_seen": 154069990, - "step": 7214 - }, - { - "epoch": 0.8675524559610414, - "grad_norm": 2.5694640397871886, - "learning_rate": 1.810891878825569e-07, - "loss": 0.6895, - "num_input_tokens_seen": 154089685, - "step": 7215 - }, - { - "epoch": 0.8676726988516804, - "grad_norm": 2.6537659561055045, - "learning_rate": 1.8076542829655561e-07, - "loss": 0.7122, - "num_input_tokens_seen": 154108210, - "step": 7216 - }, - { - "epoch": 0.8677929417423195, - "grad_norm": 2.521058556490146, - "learning_rate": 1.8044194468699248e-07, - "loss": 0.7989, - "num_input_tokens_seen": 154125240, - "step": 7217 - }, - { - "epoch": 0.8679131846329585, - "grad_norm": 2.6363711273523633, - "learning_rate": 1.8011873710293912e-07, - "loss": 0.7483, - "num_input_tokens_seen": 154143465, - "step": 7218 - }, - { - "epoch": 0.8680334275235977, - "grad_norm": 3.260746694393361, - "learning_rate": 1.7979580559342677e-07, - "loss": 0.6885, - "num_input_tokens_seen": 154163915, - "step": 7219 - }, - { - "epoch": 0.8681536704142367, - "grad_norm": 1.9330496047579002, - "learning_rate": 1.7947315020744358e-07, - "loss": 0.6584, - "num_input_tokens_seen": 154184730, - "step": 7220 - }, - { - "epoch": 0.8682739133048758, - "grad_norm": 1.880700272046773, - "learning_rate": 1.7915077099393594e-07, - "loss": 0.7866, - "num_input_tokens_seen": 154201050, - "step": 7221 - }, - { - "epoch": 0.868394156195515, - "grad_norm": 2.3584848776473586, - "learning_rate": 1.788286680018083e-07, - "loss": 0.7369, - "num_input_tokens_seen": 154219480, - "step": 7222 - }, - { - "epoch": 0.868514399086154, - "grad_norm": 2.3088326095588028, - "learning_rate": 1.7850684127992398e-07, - "loss": 0.7179, - "num_input_tokens_seen": 154238945, - "step": 7223 - }, - { - "epoch": 0.8686346419767931, - "grad_norm": 1.9794273381585965, - "learning_rate": 1.7818529087710378e-07, - "loss": 0.6992, - "num_input_tokens_seen": 154259020, - "step": 7224 - }, - { - "epoch": 0.8687548848674322, - "grad_norm": 1.9152934112428484, - "learning_rate": 1.7786401684212637e-07, - "loss": 0.8426, - "num_input_tokens_seen": 154277570, - "step": 7225 - }, - { - "epoch": 0.8688751277580713, - "grad_norm": 0.7503385091815515, - "learning_rate": 1.7754301922372838e-07, - "loss": 0.5712, - "num_input_tokens_seen": 154326935, - "step": 7226 - }, - { - "epoch": 0.8689953706487104, - "grad_norm": 2.053629416093874, - "learning_rate": 1.7722229807060617e-07, - "loss": 0.8031, - "num_input_tokens_seen": 154345235, - "step": 7227 - }, - { - "epoch": 0.8691156135393495, - "grad_norm": 3.3843710108863205, - "learning_rate": 1.7690185343141172e-07, - "loss": 0.8131, - "num_input_tokens_seen": 154364870, - "step": 7228 - }, - { - "epoch": 0.8692358564299886, - "grad_norm": 2.1988372194241936, - "learning_rate": 1.7658168535475636e-07, - "loss": 0.7065, - "num_input_tokens_seen": 154382375, - "step": 7229 - }, - { - "epoch": 0.8693560993206276, - "grad_norm": 1.7677365937711709, - "learning_rate": 1.7626179388920948e-07, - "loss": 0.642, - "num_input_tokens_seen": 154403375, - "step": 7230 - }, - { - "epoch": 0.8694763422112668, - "grad_norm": 2.157889907551522, - "learning_rate": 1.7594217908329866e-07, - "loss": 0.8004, - "num_input_tokens_seen": 154425280, - "step": 7231 - }, - { - "epoch": 0.8695965851019059, - "grad_norm": 2.5372761874371497, - "learning_rate": 1.7562284098550895e-07, - "loss": 0.73, - "num_input_tokens_seen": 154444710, - "step": 7232 - }, - { - "epoch": 0.8697168279925449, - "grad_norm": 0.8987151687924567, - "learning_rate": 1.753037796442838e-07, - "loss": 0.6638, - "num_input_tokens_seen": 154503870, - "step": 7233 - }, - { - "epoch": 0.8698370708831841, - "grad_norm": 2.707181722592954, - "learning_rate": 1.74984995108024e-07, - "loss": 0.7423, - "num_input_tokens_seen": 154521520, - "step": 7234 - }, - { - "epoch": 0.8699573137738231, - "grad_norm": 2.193273155476818, - "learning_rate": 1.7466648742508981e-07, - "loss": 0.8238, - "num_input_tokens_seen": 154537425, - "step": 7235 - }, - { - "epoch": 0.8700775566644622, - "grad_norm": 2.1199136904213507, - "learning_rate": 1.7434825664379837e-07, - "loss": 0.832, - "num_input_tokens_seen": 154555650, - "step": 7236 - }, - { - "epoch": 0.8701977995551013, - "grad_norm": 3.637289849030688, - "learning_rate": 1.740303028124246e-07, - "loss": 0.8603, - "num_input_tokens_seen": 154571430, - "step": 7237 - }, - { - "epoch": 0.8703180424457404, - "grad_norm": 2.0837906820959997, - "learning_rate": 1.7371262597920188e-07, - "loss": 0.753, - "num_input_tokens_seen": 154593210, - "step": 7238 - }, - { - "epoch": 0.8704382853363795, - "grad_norm": 1.5109766343947568, - "learning_rate": 1.7339522619232195e-07, - "loss": 0.7523, - "num_input_tokens_seen": 154611310, - "step": 7239 - }, - { - "epoch": 0.8705585282270186, - "grad_norm": 4.104608892162011, - "learning_rate": 1.730781034999338e-07, - "loss": 0.7466, - "num_input_tokens_seen": 154632610, - "step": 7240 - }, - { - "epoch": 0.8706787711176577, - "grad_norm": 2.2682240701066587, - "learning_rate": 1.7276125795014497e-07, - "loss": 0.7365, - "num_input_tokens_seen": 154650780, - "step": 7241 - }, - { - "epoch": 0.8707990140082967, - "grad_norm": 2.187120399509654, - "learning_rate": 1.7244468959102054e-07, - "loss": 0.6667, - "num_input_tokens_seen": 154667555, - "step": 7242 - }, - { - "epoch": 0.8709192568989359, - "grad_norm": 2.3911697489612855, - "learning_rate": 1.7212839847058348e-07, - "loss": 0.8442, - "num_input_tokens_seen": 154682405, - "step": 7243 - }, - { - "epoch": 0.871039499789575, - "grad_norm": 2.362377550081417, - "learning_rate": 1.7181238463681514e-07, - "loss": 0.7363, - "num_input_tokens_seen": 154701170, - "step": 7244 - }, - { - "epoch": 0.871159742680214, - "grad_norm": 2.0016769998469623, - "learning_rate": 1.714966481376543e-07, - "loss": 0.7155, - "num_input_tokens_seen": 154717570, - "step": 7245 - }, - { - "epoch": 0.8712799855708532, - "grad_norm": 1.9892272558102755, - "learning_rate": 1.7118118902099797e-07, - "loss": 0.8197, - "num_input_tokens_seen": 154735375, - "step": 7246 - }, - { - "epoch": 0.8714002284614922, - "grad_norm": 2.009736885711811, - "learning_rate": 1.7086600733470146e-07, - "loss": 0.8023, - "num_input_tokens_seen": 154755765, - "step": 7247 - }, - { - "epoch": 0.8715204713521313, - "grad_norm": 1.8908046441173751, - "learning_rate": 1.7055110312657738e-07, - "loss": 0.7585, - "num_input_tokens_seen": 154774980, - "step": 7248 - }, - { - "epoch": 0.8716407142427703, - "grad_norm": 2.503460605775502, - "learning_rate": 1.702364764443962e-07, - "loss": 0.7369, - "num_input_tokens_seen": 154793775, - "step": 7249 - }, - { - "epoch": 0.8717609571334095, - "grad_norm": 2.9099414206939205, - "learning_rate": 1.699221273358864e-07, - "loss": 0.7205, - "num_input_tokens_seen": 154813160, - "step": 7250 - }, - { - "epoch": 0.8718812000240486, - "grad_norm": 2.199683174645047, - "learning_rate": 1.6960805584873538e-07, - "loss": 0.7473, - "num_input_tokens_seen": 154830880, - "step": 7251 - }, - { - "epoch": 0.8720014429146876, - "grad_norm": 2.371451260425662, - "learning_rate": 1.6929426203058684e-07, - "loss": 0.7776, - "num_input_tokens_seen": 154851025, - "step": 7252 - }, - { - "epoch": 0.8721216858053268, - "grad_norm": 2.5995821005727184, - "learning_rate": 1.689807459290431e-07, - "loss": 0.8036, - "num_input_tokens_seen": 154869400, - "step": 7253 - }, - { - "epoch": 0.8722419286959658, - "grad_norm": 2.348181247702717, - "learning_rate": 1.6866750759166392e-07, - "loss": 0.6999, - "num_input_tokens_seen": 154889100, - "step": 7254 - }, - { - "epoch": 0.8723621715866049, - "grad_norm": 2.724670660934635, - "learning_rate": 1.683545470659684e-07, - "loss": 0.7675, - "num_input_tokens_seen": 154906650, - "step": 7255 - }, - { - "epoch": 0.8724824144772441, - "grad_norm": 2.2240112864833455, - "learning_rate": 1.680418643994317e-07, - "loss": 0.7382, - "num_input_tokens_seen": 154924940, - "step": 7256 - }, - { - "epoch": 0.8726026573678831, - "grad_norm": 0.9865027062594387, - "learning_rate": 1.6772945963948738e-07, - "loss": 0.6932, - "num_input_tokens_seen": 154982825, - "step": 7257 - }, - { - "epoch": 0.8727229002585222, - "grad_norm": 2.5453450184035367, - "learning_rate": 1.6741733283352733e-07, - "loss": 0.7684, - "num_input_tokens_seen": 155000150, - "step": 7258 - }, - { - "epoch": 0.8728431431491613, - "grad_norm": 2.8346749231499566, - "learning_rate": 1.6710548402890102e-07, - "loss": 0.8298, - "num_input_tokens_seen": 155020395, - "step": 7259 - }, - { - "epoch": 0.8729633860398004, - "grad_norm": 1.997145367734175, - "learning_rate": 1.6679391327291527e-07, - "loss": 0.6625, - "num_input_tokens_seen": 155041320, - "step": 7260 - }, - { - "epoch": 0.8730836289304394, - "grad_norm": 3.165372802917187, - "learning_rate": 1.6648262061283535e-07, - "loss": 0.6792, - "num_input_tokens_seen": 155056340, - "step": 7261 - }, - { - "epoch": 0.8732038718210786, - "grad_norm": 2.0410575566125853, - "learning_rate": 1.6617160609588353e-07, - "loss": 0.7278, - "num_input_tokens_seen": 155075235, - "step": 7262 - }, - { - "epoch": 0.8733241147117177, - "grad_norm": 2.535664053738104, - "learning_rate": 1.6586086976924163e-07, - "loss": 0.7046, - "num_input_tokens_seen": 155090455, - "step": 7263 - }, - { - "epoch": 0.8734443576023567, - "grad_norm": 2.1026735246017703, - "learning_rate": 1.6555041168004747e-07, - "loss": 0.786, - "num_input_tokens_seen": 155109495, - "step": 7264 - }, - { - "epoch": 0.8735646004929959, - "grad_norm": 2.1188259079447227, - "learning_rate": 1.6524023187539715e-07, - "loss": 0.6854, - "num_input_tokens_seen": 155127500, - "step": 7265 - }, - { - "epoch": 0.873684843383635, - "grad_norm": 2.612217863088034, - "learning_rate": 1.649303304023446e-07, - "loss": 0.7468, - "num_input_tokens_seen": 155146975, - "step": 7266 - }, - { - "epoch": 0.873805086274274, - "grad_norm": 1.7716806477454652, - "learning_rate": 1.6462070730790222e-07, - "loss": 0.7783, - "num_input_tokens_seen": 155165855, - "step": 7267 - }, - { - "epoch": 0.8739253291649132, - "grad_norm": 25.477779563158002, - "learning_rate": 1.6431136263903912e-07, - "loss": 0.7831, - "num_input_tokens_seen": 155184575, - "step": 7268 - }, - { - "epoch": 0.8740455720555522, - "grad_norm": 1.9790379960731481, - "learning_rate": 1.6400229644268282e-07, - "loss": 0.7358, - "num_input_tokens_seen": 155202650, - "step": 7269 - }, - { - "epoch": 0.8741658149461913, - "grad_norm": 2.0632569306880577, - "learning_rate": 1.6369350876571852e-07, - "loss": 0.8054, - "num_input_tokens_seen": 155220525, - "step": 7270 - }, - { - "epoch": 0.8742860578368304, - "grad_norm": 4.119014060933731, - "learning_rate": 1.6338499965498874e-07, - "loss": 0.8055, - "num_input_tokens_seen": 155238975, - "step": 7271 - }, - { - "epoch": 0.8744063007274695, - "grad_norm": 1.67449513296944, - "learning_rate": 1.630767691572943e-07, - "loss": 0.7696, - "num_input_tokens_seen": 155258715, - "step": 7272 - }, - { - "epoch": 0.8745265436181086, - "grad_norm": 0.7726221207645896, - "learning_rate": 1.627688173193935e-07, - "loss": 0.564, - "num_input_tokens_seen": 155320325, - "step": 7273 - }, - { - "epoch": 0.8746467865087477, - "grad_norm": 2.142295318549826, - "learning_rate": 1.6246114418800193e-07, - "loss": 0.7525, - "num_input_tokens_seen": 155340325, - "step": 7274 - }, - { - "epoch": 0.8747670293993868, - "grad_norm": 2.241612257790113, - "learning_rate": 1.6215374980979423e-07, - "loss": 0.7604, - "num_input_tokens_seen": 155360455, - "step": 7275 - }, - { - "epoch": 0.8748872722900258, - "grad_norm": 5.116313393513584, - "learning_rate": 1.6184663423140133e-07, - "loss": 0.6847, - "num_input_tokens_seen": 155380475, - "step": 7276 - }, - { - "epoch": 0.875007515180665, - "grad_norm": 2.5639926860542843, - "learning_rate": 1.615397974994126e-07, - "loss": 0.6433, - "num_input_tokens_seen": 155398000, - "step": 7277 - }, - { - "epoch": 0.875127758071304, - "grad_norm": 1.6899886406677598, - "learning_rate": 1.6123323966037438e-07, - "loss": 0.7947, - "num_input_tokens_seen": 155416240, - "step": 7278 - }, - { - "epoch": 0.8752480009619431, - "grad_norm": 2.6514341977765112, - "learning_rate": 1.6092696076079216e-07, - "loss": 0.7765, - "num_input_tokens_seen": 155434335, - "step": 7279 - }, - { - "epoch": 0.8753682438525822, - "grad_norm": 2.1219837291097585, - "learning_rate": 1.6062096084712785e-07, - "loss": 0.7303, - "num_input_tokens_seen": 155455405, - "step": 7280 - }, - { - "epoch": 0.8754884867432213, - "grad_norm": 2.0267282598001586, - "learning_rate": 1.6031523996580098e-07, - "loss": 0.7034, - "num_input_tokens_seen": 155472685, - "step": 7281 - }, - { - "epoch": 0.8756087296338604, - "grad_norm": 2.452921777603213, - "learning_rate": 1.600097981631894e-07, - "loss": 0.6552, - "num_input_tokens_seen": 155490870, - "step": 7282 - }, - { - "epoch": 0.8757289725244994, - "grad_norm": 2.6973055406511013, - "learning_rate": 1.5970463548562886e-07, - "loss": 0.7507, - "num_input_tokens_seen": 155509745, - "step": 7283 - }, - { - "epoch": 0.8758492154151386, - "grad_norm": 2.3713795344776427, - "learning_rate": 1.5939975197941192e-07, - "loss": 0.71, - "num_input_tokens_seen": 155531120, - "step": 7284 - }, - { - "epoch": 0.8759694583057777, - "grad_norm": 0.8371535968498088, - "learning_rate": 1.5909514769078892e-07, - "loss": 0.5623, - "num_input_tokens_seen": 155595945, - "step": 7285 - }, - { - "epoch": 0.8760897011964167, - "grad_norm": 1.7353195151683674, - "learning_rate": 1.5879082266596867e-07, - "loss": 0.7687, - "num_input_tokens_seen": 155617005, - "step": 7286 - }, - { - "epoch": 0.8762099440870559, - "grad_norm": 3.8741979204224317, - "learning_rate": 1.5848677695111645e-07, - "loss": 0.7137, - "num_input_tokens_seen": 155638325, - "step": 7287 - }, - { - "epoch": 0.8763301869776949, - "grad_norm": 3.1890496768097476, - "learning_rate": 1.5818301059235607e-07, - "loss": 0.7032, - "num_input_tokens_seen": 155653220, - "step": 7288 - }, - { - "epoch": 0.876450429868334, - "grad_norm": 1.7995767709116486, - "learning_rate": 1.578795236357684e-07, - "loss": 0.811, - "num_input_tokens_seen": 155674405, - "step": 7289 - }, - { - "epoch": 0.8765706727589732, - "grad_norm": 2.8384049598305965, - "learning_rate": 1.5757631612739218e-07, - "loss": 0.8448, - "num_input_tokens_seen": 155687670, - "step": 7290 - }, - { - "epoch": 0.8766909156496122, - "grad_norm": 0.8806307760284282, - "learning_rate": 1.572733881132242e-07, - "loss": 0.6745, - "num_input_tokens_seen": 155748035, - "step": 7291 - }, - { - "epoch": 0.8768111585402513, - "grad_norm": 0.7984432667858014, - "learning_rate": 1.5697073963921814e-07, - "loss": 0.6136, - "num_input_tokens_seen": 155806995, - "step": 7292 - }, - { - "epoch": 0.8769314014308904, - "grad_norm": 2.3234911821240414, - "learning_rate": 1.566683707512857e-07, - "loss": 0.8472, - "num_input_tokens_seen": 155824390, - "step": 7293 - }, - { - "epoch": 0.8770516443215295, - "grad_norm": 2.372146328883043, - "learning_rate": 1.5636628149529508e-07, - "loss": 0.7875, - "num_input_tokens_seen": 155841900, - "step": 7294 - }, - { - "epoch": 0.8771718872121685, - "grad_norm": 2.4177423173776704, - "learning_rate": 1.560644719170743e-07, - "loss": 0.7818, - "num_input_tokens_seen": 155862490, - "step": 7295 - }, - { - "epoch": 0.8772921301028077, - "grad_norm": 2.247949290823861, - "learning_rate": 1.5576294206240692e-07, - "loss": 0.7076, - "num_input_tokens_seen": 155881735, - "step": 7296 - }, - { - "epoch": 0.8774123729934468, - "grad_norm": 2.028692490742412, - "learning_rate": 1.5546169197703507e-07, - "loss": 0.6831, - "num_input_tokens_seen": 155907730, - "step": 7297 - }, - { - "epoch": 0.8775326158840858, - "grad_norm": 4.657685726373577, - "learning_rate": 1.551607217066575e-07, - "loss": 0.7663, - "num_input_tokens_seen": 155925420, - "step": 7298 - }, - { - "epoch": 0.877652858774725, - "grad_norm": 1.7297149813438113, - "learning_rate": 1.5486003129693193e-07, - "loss": 0.8534, - "num_input_tokens_seen": 155942505, - "step": 7299 - }, - { - "epoch": 0.877773101665364, - "grad_norm": 2.006525888370653, - "learning_rate": 1.545596207934725e-07, - "loss": 0.7692, - "num_input_tokens_seen": 155960710, - "step": 7300 - }, - { - "epoch": 0.8778933445560031, - "grad_norm": 1.9001422590215593, - "learning_rate": 1.5425949024185147e-07, - "loss": 0.771, - "num_input_tokens_seen": 155980455, - "step": 7301 - }, - { - "epoch": 0.8780135874466423, - "grad_norm": 2.7000747616830045, - "learning_rate": 1.5395963968759818e-07, - "loss": 0.6772, - "num_input_tokens_seen": 156000450, - "step": 7302 - }, - { - "epoch": 0.8781338303372813, - "grad_norm": 1.702062347682195, - "learning_rate": 1.536600691761998e-07, - "loss": 0.6364, - "num_input_tokens_seen": 156026000, - "step": 7303 - }, - { - "epoch": 0.8782540732279204, - "grad_norm": 1.8085981974383063, - "learning_rate": 1.5336077875310084e-07, - "loss": 0.7056, - "num_input_tokens_seen": 156044945, - "step": 7304 - }, - { - "epoch": 0.8783743161185595, - "grad_norm": 3.598424529556889, - "learning_rate": 1.5306176846370345e-07, - "loss": 0.7386, - "num_input_tokens_seen": 156062810, - "step": 7305 - }, - { - "epoch": 0.8784945590091986, - "grad_norm": 6.992587157182235, - "learning_rate": 1.5276303835336712e-07, - "loss": 0.7397, - "num_input_tokens_seen": 156083070, - "step": 7306 - }, - { - "epoch": 0.8786148018998376, - "grad_norm": 0.7998410538781606, - "learning_rate": 1.524645884674094e-07, - "loss": 0.5562, - "num_input_tokens_seen": 156139720, - "step": 7307 - }, - { - "epoch": 0.8787350447904768, - "grad_norm": 2.398837101047178, - "learning_rate": 1.521664188511047e-07, - "loss": 0.7854, - "num_input_tokens_seen": 156159465, - "step": 7308 - }, - { - "epoch": 0.8788552876811159, - "grad_norm": 2.5265979537210215, - "learning_rate": 1.518685295496851e-07, - "loss": 0.8103, - "num_input_tokens_seen": 156177045, - "step": 7309 - }, - { - "epoch": 0.8789755305717549, - "grad_norm": 1.7898697532212067, - "learning_rate": 1.5157092060833975e-07, - "loss": 0.8439, - "num_input_tokens_seen": 156196415, - "step": 7310 - }, - { - "epoch": 0.879095773462394, - "grad_norm": 1.7132263754821253, - "learning_rate": 1.5127359207221635e-07, - "loss": 0.6529, - "num_input_tokens_seen": 156215615, - "step": 7311 - }, - { - "epoch": 0.8792160163530331, - "grad_norm": 2.093428046917588, - "learning_rate": 1.5097654398641923e-07, - "loss": 0.7187, - "num_input_tokens_seen": 156233240, - "step": 7312 - }, - { - "epoch": 0.8793362592436722, - "grad_norm": 1.403515511192551, - "learning_rate": 1.5067977639601014e-07, - "loss": 0.7241, - "num_input_tokens_seen": 156255720, - "step": 7313 - }, - { - "epoch": 0.8794565021343113, - "grad_norm": 3.071228494945204, - "learning_rate": 1.5038328934600864e-07, - "loss": 0.7069, - "num_input_tokens_seen": 156272075, - "step": 7314 - }, - { - "epoch": 0.8795767450249504, - "grad_norm": 2.002934039991062, - "learning_rate": 1.5008708288139161e-07, - "loss": 0.6971, - "num_input_tokens_seen": 156294155, - "step": 7315 - }, - { - "epoch": 0.8796969879155895, - "grad_norm": 2.0579384232195688, - "learning_rate": 1.497911570470931e-07, - "loss": 0.724, - "num_input_tokens_seen": 156313880, - "step": 7316 - }, - { - "epoch": 0.8798172308062285, - "grad_norm": 1.7319519990520393, - "learning_rate": 1.4949551188800502e-07, - "loss": 0.8483, - "num_input_tokens_seen": 156334585, - "step": 7317 - }, - { - "epoch": 0.8799374736968677, - "grad_norm": 1.6536701667612526, - "learning_rate": 1.4920014744897634e-07, - "loss": 0.7219, - "num_input_tokens_seen": 156353720, - "step": 7318 - }, - { - "epoch": 0.8800577165875068, - "grad_norm": 2.7264760224295217, - "learning_rate": 1.4890506377481392e-07, - "loss": 0.8608, - "num_input_tokens_seen": 156372530, - "step": 7319 - }, - { - "epoch": 0.8801779594781458, - "grad_norm": 1.8113670099325259, - "learning_rate": 1.486102609102815e-07, - "loss": 0.6342, - "num_input_tokens_seen": 156392800, - "step": 7320 - }, - { - "epoch": 0.880298202368785, - "grad_norm": 3.2734731952769187, - "learning_rate": 1.483157389001004e-07, - "loss": 0.8486, - "num_input_tokens_seen": 156410080, - "step": 7321 - }, - { - "epoch": 0.880418445259424, - "grad_norm": 2.767476507298718, - "learning_rate": 1.4802149778894933e-07, - "loss": 0.7811, - "num_input_tokens_seen": 156428590, - "step": 7322 - }, - { - "epoch": 0.8805386881500631, - "grad_norm": 1.655509141066757, - "learning_rate": 1.4772753762146484e-07, - "loss": 0.8698, - "num_input_tokens_seen": 156447565, - "step": 7323 - }, - { - "epoch": 0.8806589310407023, - "grad_norm": 1.9936651901005138, - "learning_rate": 1.474338584422401e-07, - "loss": 0.696, - "num_input_tokens_seen": 156472495, - "step": 7324 - }, - { - "epoch": 0.8807791739313413, - "grad_norm": 2.30308829396143, - "learning_rate": 1.4714046029582595e-07, - "loss": 0.7511, - "num_input_tokens_seen": 156491280, - "step": 7325 - }, - { - "epoch": 0.8808994168219804, - "grad_norm": 7.504067307446644, - "learning_rate": 1.4684734322673075e-07, - "loss": 0.7518, - "num_input_tokens_seen": 156512040, - "step": 7326 - }, - { - "epoch": 0.8810196597126195, - "grad_norm": 3.17519253007266, - "learning_rate": 1.465545072794203e-07, - "loss": 0.6877, - "num_input_tokens_seen": 156529635, - "step": 7327 - }, - { - "epoch": 0.8811399026032586, - "grad_norm": 1.725685443873617, - "learning_rate": 1.4626195249831753e-07, - "loss": 0.75, - "num_input_tokens_seen": 156550255, - "step": 7328 - }, - { - "epoch": 0.8812601454938976, - "grad_norm": 2.1093932448411263, - "learning_rate": 1.4596967892780244e-07, - "loss": 0.7118, - "num_input_tokens_seen": 156566305, - "step": 7329 - }, - { - "epoch": 0.8813803883845368, - "grad_norm": 2.2395660462486964, - "learning_rate": 1.4567768661221314e-07, - "loss": 0.744, - "num_input_tokens_seen": 156586595, - "step": 7330 - }, - { - "epoch": 0.8815006312751759, - "grad_norm": 2.553811892529049, - "learning_rate": 1.4538597559584442e-07, - "loss": 0.7359, - "num_input_tokens_seen": 156604105, - "step": 7331 - }, - { - "epoch": 0.8816208741658149, - "grad_norm": 2.1129890530321607, - "learning_rate": 1.4509454592294868e-07, - "loss": 0.7674, - "num_input_tokens_seen": 156624310, - "step": 7332 - }, - { - "epoch": 0.8817411170564541, - "grad_norm": 1.9864245695085363, - "learning_rate": 1.448033976377354e-07, - "loss": 0.7879, - "num_input_tokens_seen": 156639015, - "step": 7333 - }, - { - "epoch": 0.8818613599470931, - "grad_norm": 2.850459602558032, - "learning_rate": 1.445125307843713e-07, - "loss": 0.7315, - "num_input_tokens_seen": 156656960, - "step": 7334 - }, - { - "epoch": 0.8819816028377322, - "grad_norm": 5.7091869677963345, - "learning_rate": 1.442219454069813e-07, - "loss": 0.7445, - "num_input_tokens_seen": 156677705, - "step": 7335 - }, - { - "epoch": 0.8821018457283714, - "grad_norm": 2.4291813698257987, - "learning_rate": 1.4393164154964676e-07, - "loss": 0.6596, - "num_input_tokens_seen": 156696955, - "step": 7336 - }, - { - "epoch": 0.8822220886190104, - "grad_norm": 1.8521979586937316, - "learning_rate": 1.4364161925640649e-07, - "loss": 0.928, - "num_input_tokens_seen": 156718075, - "step": 7337 - }, - { - "epoch": 0.8823423315096495, - "grad_norm": 2.4251544194735564, - "learning_rate": 1.4335187857125618e-07, - "loss": 0.8438, - "num_input_tokens_seen": 156736495, - "step": 7338 - }, - { - "epoch": 0.8824625744002886, - "grad_norm": 2.143483609906486, - "learning_rate": 1.4306241953815023e-07, - "loss": 0.744, - "num_input_tokens_seen": 156757275, - "step": 7339 - }, - { - "epoch": 0.8825828172909277, - "grad_norm": 2.5192539981486455, - "learning_rate": 1.4277324220099862e-07, - "loss": 0.7025, - "num_input_tokens_seen": 156778905, - "step": 7340 - }, - { - "epoch": 0.8827030601815667, - "grad_norm": 2.8641593289907576, - "learning_rate": 1.4248434660366938e-07, - "loss": 0.7343, - "num_input_tokens_seen": 156798100, - "step": 7341 - }, - { - "epoch": 0.8828233030722058, - "grad_norm": 5.4814248673686174, - "learning_rate": 1.4219573278998765e-07, - "loss": 0.7033, - "num_input_tokens_seen": 156816280, - "step": 7342 - }, - { - "epoch": 0.882943545962845, - "grad_norm": 2.445820209226076, - "learning_rate": 1.4190740080373664e-07, - "loss": 0.648, - "num_input_tokens_seen": 156836280, - "step": 7343 - }, - { - "epoch": 0.883063788853484, - "grad_norm": 2.047721290995996, - "learning_rate": 1.4161935068865538e-07, - "loss": 0.8384, - "num_input_tokens_seen": 156851145, - "step": 7344 - }, - { - "epoch": 0.8831840317441231, - "grad_norm": 2.3130893788321925, - "learning_rate": 1.4133158248844113e-07, - "loss": 0.747, - "num_input_tokens_seen": 156869770, - "step": 7345 - }, - { - "epoch": 0.8833042746347622, - "grad_norm": 2.0716009581216324, - "learning_rate": 1.4104409624674785e-07, - "loss": 0.724, - "num_input_tokens_seen": 156889275, - "step": 7346 - }, - { - "epoch": 0.8834245175254013, - "grad_norm": 1.890763442574728, - "learning_rate": 1.407568920071873e-07, - "loss": 0.7789, - "num_input_tokens_seen": 156907860, - "step": 7347 - }, - { - "epoch": 0.8835447604160404, - "grad_norm": 2.35163211453435, - "learning_rate": 1.4046996981332782e-07, - "loss": 0.6696, - "num_input_tokens_seen": 156927465, - "step": 7348 - }, - { - "epoch": 0.8836650033066795, - "grad_norm": 2.142423432451787, - "learning_rate": 1.4018332970869561e-07, - "loss": 0.7712, - "num_input_tokens_seen": 156945125, - "step": 7349 - }, - { - "epoch": 0.8837852461973186, - "grad_norm": 2.1744484176091605, - "learning_rate": 1.3989697173677305e-07, - "loss": 0.8389, - "num_input_tokens_seen": 156966170, - "step": 7350 - }, - { - "epoch": 0.8839054890879576, - "grad_norm": 2.030314446021492, - "learning_rate": 1.396108959410014e-07, - "loss": 0.7615, - "num_input_tokens_seen": 156985105, - "step": 7351 - }, - { - "epoch": 0.8840257319785968, - "grad_norm": 1.9026112079934179, - "learning_rate": 1.3932510236477745e-07, - "loss": 0.8023, - "num_input_tokens_seen": 157005495, - "step": 7352 - }, - { - "epoch": 0.8841459748692359, - "grad_norm": 2.083546629977031, - "learning_rate": 1.3903959105145636e-07, - "loss": 0.5489, - "num_input_tokens_seen": 157025705, - "step": 7353 - }, - { - "epoch": 0.8842662177598749, - "grad_norm": 2.2061970657174483, - "learning_rate": 1.387543620443492e-07, - "loss": 0.8213, - "num_input_tokens_seen": 157042270, - "step": 7354 - }, - { - "epoch": 0.8843864606505141, - "grad_norm": 1.861115844720276, - "learning_rate": 1.3846941538672564e-07, - "loss": 0.832, - "num_input_tokens_seen": 157060695, - "step": 7355 - }, - { - "epoch": 0.8845067035411531, - "grad_norm": 2.42261155932535, - "learning_rate": 1.3818475112181193e-07, - "loss": 0.8073, - "num_input_tokens_seen": 157079210, - "step": 7356 - }, - { - "epoch": 0.8846269464317922, - "grad_norm": 2.167692392567915, - "learning_rate": 1.3790036929279091e-07, - "loss": 0.784, - "num_input_tokens_seen": 157096085, - "step": 7357 - }, - { - "epoch": 0.8847471893224313, - "grad_norm": 2.763826260668255, - "learning_rate": 1.3761626994280363e-07, - "loss": 0.5815, - "num_input_tokens_seen": 157113275, - "step": 7358 - }, - { - "epoch": 0.8848674322130704, - "grad_norm": 1.9188592581836388, - "learning_rate": 1.3733245311494735e-07, - "loss": 0.7286, - "num_input_tokens_seen": 157135650, - "step": 7359 - }, - { - "epoch": 0.8849876751037095, - "grad_norm": 8.317519376224942, - "learning_rate": 1.3704891885227676e-07, - "loss": 0.707, - "num_input_tokens_seen": 157155415, - "step": 7360 - }, - { - "epoch": 0.8851079179943486, - "grad_norm": 2.245657821116547, - "learning_rate": 1.3676566719780414e-07, - "loss": 0.7663, - "num_input_tokens_seen": 157172600, - "step": 7361 - }, - { - "epoch": 0.8852281608849877, - "grad_norm": 1.974864910709764, - "learning_rate": 1.36482698194498e-07, - "loss": 0.7406, - "num_input_tokens_seen": 157188865, - "step": 7362 - }, - { - "epoch": 0.8853484037756267, - "grad_norm": 2.455228309467207, - "learning_rate": 1.3620001188528506e-07, - "loss": 0.7096, - "num_input_tokens_seen": 157209305, - "step": 7363 - }, - { - "epoch": 0.8854686466662659, - "grad_norm": 2.5781171158206084, - "learning_rate": 1.3591760831304865e-07, - "loss": 0.7227, - "num_input_tokens_seen": 157226715, - "step": 7364 - }, - { - "epoch": 0.885588889556905, - "grad_norm": 9.118166295748768, - "learning_rate": 1.356354875206287e-07, - "loss": 0.7956, - "num_input_tokens_seen": 157244270, - "step": 7365 - }, - { - "epoch": 0.885709132447544, - "grad_norm": 2.1402908511079652, - "learning_rate": 1.3535364955082296e-07, - "loss": 0.6854, - "num_input_tokens_seen": 157263840, - "step": 7366 - }, - { - "epoch": 0.8858293753381832, - "grad_norm": 1.8522966181911544, - "learning_rate": 1.3507209444638613e-07, - "loss": 0.6328, - "num_input_tokens_seen": 157285560, - "step": 7367 - }, - { - "epoch": 0.8859496182288222, - "grad_norm": 2.3928223980956136, - "learning_rate": 1.347908222500298e-07, - "loss": 0.7404, - "num_input_tokens_seen": 157305355, - "step": 7368 - }, - { - "epoch": 0.8860698611194613, - "grad_norm": 2.7846449558826594, - "learning_rate": 1.3450983300442276e-07, - "loss": 0.695, - "num_input_tokens_seen": 157324305, - "step": 7369 - }, - { - "epoch": 0.8861901040101005, - "grad_norm": 1.938876681017681, - "learning_rate": 1.3422912675219044e-07, - "loss": 0.7266, - "num_input_tokens_seen": 157343780, - "step": 7370 - }, - { - "epoch": 0.8863103469007395, - "grad_norm": 2.1202783117260533, - "learning_rate": 1.339487035359166e-07, - "loss": 0.7763, - "num_input_tokens_seen": 157363870, - "step": 7371 - }, - { - "epoch": 0.8864305897913786, - "grad_norm": 2.0667737806587216, - "learning_rate": 1.3366856339814049e-07, - "loss": 0.8444, - "num_input_tokens_seen": 157384675, - "step": 7372 - }, - { - "epoch": 0.8865508326820177, - "grad_norm": 2.2264483313600723, - "learning_rate": 1.333887063813597e-07, - "loss": 0.7374, - "num_input_tokens_seen": 157402500, - "step": 7373 - }, - { - "epoch": 0.8866710755726568, - "grad_norm": 6.017501354892182, - "learning_rate": 1.331091325280278e-07, - "loss": 0.6595, - "num_input_tokens_seen": 157421190, - "step": 7374 - }, - { - "epoch": 0.8867913184632958, - "grad_norm": 2.4631218893722178, - "learning_rate": 1.3282984188055625e-07, - "loss": 0.7792, - "num_input_tokens_seen": 157440700, - "step": 7375 - }, - { - "epoch": 0.8869115613539349, - "grad_norm": 2.1149320955758952, - "learning_rate": 1.325508344813131e-07, - "loss": 0.7856, - "num_input_tokens_seen": 157459465, - "step": 7376 - }, - { - "epoch": 0.8870318042445741, - "grad_norm": 2.4888044248203482, - "learning_rate": 1.3227211037262365e-07, - "loss": 0.781, - "num_input_tokens_seen": 157476425, - "step": 7377 - }, - { - "epoch": 0.8871520471352131, - "grad_norm": 2.317447241394007, - "learning_rate": 1.319936695967696e-07, - "loss": 0.8553, - "num_input_tokens_seen": 157493970, - "step": 7378 - }, - { - "epoch": 0.8872722900258522, - "grad_norm": 2.5840236606613813, - "learning_rate": 1.3171551219599097e-07, - "loss": 0.8141, - "num_input_tokens_seen": 157512215, - "step": 7379 - }, - { - "epoch": 0.8873925329164913, - "grad_norm": 2.3898624801931456, - "learning_rate": 1.3143763821248377e-07, - "loss": 0.7702, - "num_input_tokens_seen": 157529020, - "step": 7380 - }, - { - "epoch": 0.8875127758071304, - "grad_norm": 1.8238026448901838, - "learning_rate": 1.3116004768840118e-07, - "loss": 0.7129, - "num_input_tokens_seen": 157547115, - "step": 7381 - }, - { - "epoch": 0.8876330186977694, - "grad_norm": 1.8422589716224764, - "learning_rate": 1.3088274066585303e-07, - "loss": 0.7262, - "num_input_tokens_seen": 157564445, - "step": 7382 - }, - { - "epoch": 0.8877532615884086, - "grad_norm": 2.2649777083044045, - "learning_rate": 1.3060571718690749e-07, - "loss": 0.8965, - "num_input_tokens_seen": 157581660, - "step": 7383 - }, - { - "epoch": 0.8878735044790477, - "grad_norm": 0.7839944321922209, - "learning_rate": 1.3032897729358805e-07, - "loss": 0.5974, - "num_input_tokens_seen": 157642335, - "step": 7384 - }, - { - "epoch": 0.8879937473696867, - "grad_norm": 2.6732217857331855, - "learning_rate": 1.3005252102787645e-07, - "loss": 0.7899, - "num_input_tokens_seen": 157660995, - "step": 7385 - }, - { - "epoch": 0.8881139902603259, - "grad_norm": 1.7803093473260332, - "learning_rate": 1.2977634843171025e-07, - "loss": 0.7313, - "num_input_tokens_seen": 157679010, - "step": 7386 - }, - { - "epoch": 0.888234233150965, - "grad_norm": 2.4494793090637015, - "learning_rate": 1.295004595469853e-07, - "loss": 0.7028, - "num_input_tokens_seen": 157696565, - "step": 7387 - }, - { - "epoch": 0.888354476041604, - "grad_norm": 2.362765108782187, - "learning_rate": 1.2922485441555343e-07, - "loss": 0.7468, - "num_input_tokens_seen": 157715365, - "step": 7388 - }, - { - "epoch": 0.8884747189322432, - "grad_norm": 2.3954695551139538, - "learning_rate": 1.2894953307922363e-07, - "loss": 0.8136, - "num_input_tokens_seen": 157734045, - "step": 7389 - }, - { - "epoch": 0.8885949618228822, - "grad_norm": 2.0646279259217453, - "learning_rate": 1.2867449557976208e-07, - "loss": 0.8347, - "num_input_tokens_seen": 157751865, - "step": 7390 - }, - { - "epoch": 0.8887152047135213, - "grad_norm": 2.3890209817780286, - "learning_rate": 1.283997419588916e-07, - "loss": 0.755, - "num_input_tokens_seen": 157771055, - "step": 7391 - }, - { - "epoch": 0.8888354476041604, - "grad_norm": 1.9512523927845635, - "learning_rate": 1.2812527225829216e-07, - "loss": 0.6118, - "num_input_tokens_seen": 157789000, - "step": 7392 - }, - { - "epoch": 0.8889556904947995, - "grad_norm": 7.857630504034833, - "learning_rate": 1.2785108651960076e-07, - "loss": 0.7555, - "num_input_tokens_seen": 157810355, - "step": 7393 - }, - { - "epoch": 0.8890759333854386, - "grad_norm": 2.7157426842107015, - "learning_rate": 1.275771847844105e-07, - "loss": 0.7885, - "num_input_tokens_seen": 157830820, - "step": 7394 - }, - { - "epoch": 0.8891961762760777, - "grad_norm": 1.900291048993054, - "learning_rate": 1.2730356709427302e-07, - "loss": 0.7691, - "num_input_tokens_seen": 157849220, - "step": 7395 - }, - { - "epoch": 0.8893164191667168, - "grad_norm": 2.16600915867931, - "learning_rate": 1.2703023349069542e-07, - "loss": 0.5902, - "num_input_tokens_seen": 157873790, - "step": 7396 - }, - { - "epoch": 0.8894366620573558, - "grad_norm": 2.4687199533663216, - "learning_rate": 1.2675718401514223e-07, - "loss": 0.6115, - "num_input_tokens_seen": 157897690, - "step": 7397 - }, - { - "epoch": 0.889556904947995, - "grad_norm": 2.477721961456672, - "learning_rate": 1.264844187090346e-07, - "loss": 0.7387, - "num_input_tokens_seen": 157914535, - "step": 7398 - }, - { - "epoch": 0.889677147838634, - "grad_norm": 1.812463232675726, - "learning_rate": 1.2621193761375116e-07, - "loss": 0.7456, - "num_input_tokens_seen": 157935315, - "step": 7399 - }, - { - "epoch": 0.8897973907292731, - "grad_norm": 1.807733222729297, - "learning_rate": 1.2593974077062707e-07, - "loss": 0.8404, - "num_input_tokens_seen": 157956655, - "step": 7400 - }, - { - "epoch": 0.8899176336199123, - "grad_norm": 1.6680510105211415, - "learning_rate": 1.2566782822095423e-07, - "loss": 0.6183, - "num_input_tokens_seen": 157976630, - "step": 7401 - }, - { - "epoch": 0.8900378765105513, - "grad_norm": 3.7925560935072222, - "learning_rate": 1.2539620000598162e-07, - "loss": 0.7152, - "num_input_tokens_seen": 157995685, - "step": 7402 - }, - { - "epoch": 0.8901581194011904, - "grad_norm": 1.8352876282218142, - "learning_rate": 1.2512485616691492e-07, - "loss": 0.7905, - "num_input_tokens_seen": 158012460, - "step": 7403 - }, - { - "epoch": 0.8902783622918296, - "grad_norm": 2.6441627331042903, - "learning_rate": 1.2485379674491681e-07, - "loss": 0.7992, - "num_input_tokens_seen": 158038375, - "step": 7404 - }, - { - "epoch": 0.8903986051824686, - "grad_norm": 2.5340851572212753, - "learning_rate": 1.2458302178110702e-07, - "loss": 0.7878, - "num_input_tokens_seen": 158056460, - "step": 7405 - }, - { - "epoch": 0.8905188480731077, - "grad_norm": 2.099011793590973, - "learning_rate": 1.2431253131656118e-07, - "loss": 0.8178, - "num_input_tokens_seen": 158075655, - "step": 7406 - }, - { - "epoch": 0.8906390909637467, - "grad_norm": 2.7639861033090947, - "learning_rate": 1.240423253923133e-07, - "loss": 0.7528, - "num_input_tokens_seen": 158094980, - "step": 7407 - }, - { - "epoch": 0.8907593338543859, - "grad_norm": 3.278476556308301, - "learning_rate": 1.237724040493533e-07, - "loss": 0.6816, - "num_input_tokens_seen": 158113325, - "step": 7408 - }, - { - "epoch": 0.8908795767450249, - "grad_norm": 2.9028990544002333, - "learning_rate": 1.2350276732862773e-07, - "loss": 0.7308, - "num_input_tokens_seen": 158134070, - "step": 7409 - }, - { - "epoch": 0.890999819635664, - "grad_norm": 0.8592370892965188, - "learning_rate": 1.2323341527103993e-07, - "loss": 0.6169, - "num_input_tokens_seen": 158188990, - "step": 7410 - }, - { - "epoch": 0.8911200625263032, - "grad_norm": 5.5175932348302235, - "learning_rate": 1.2296434791745135e-07, - "loss": 0.8417, - "num_input_tokens_seen": 158207160, - "step": 7411 - }, - { - "epoch": 0.8912403054169422, - "grad_norm": 4.249353177600977, - "learning_rate": 1.2269556530867875e-07, - "loss": 0.7654, - "num_input_tokens_seen": 158225435, - "step": 7412 - }, - { - "epoch": 0.8913605483075813, - "grad_norm": 2.3149239649135134, - "learning_rate": 1.2242706748549614e-07, - "loss": 0.815, - "num_input_tokens_seen": 158243150, - "step": 7413 - }, - { - "epoch": 0.8914807911982204, - "grad_norm": 2.452027937539289, - "learning_rate": 1.2215885448863428e-07, - "loss": 0.8135, - "num_input_tokens_seen": 158263745, - "step": 7414 - }, - { - "epoch": 0.8916010340888595, - "grad_norm": 2.0436152163925168, - "learning_rate": 1.2189092635878152e-07, - "loss": 0.7981, - "num_input_tokens_seen": 158284915, - "step": 7415 - }, - { - "epoch": 0.8917212769794985, - "grad_norm": 1.8952763475669507, - "learning_rate": 1.21623283136582e-07, - "loss": 0.7725, - "num_input_tokens_seen": 158303580, - "step": 7416 - }, - { - "epoch": 0.8918415198701377, - "grad_norm": 2.2136266171035612, - "learning_rate": 1.2135592486263678e-07, - "loss": 0.8058, - "num_input_tokens_seen": 158322550, - "step": 7417 - }, - { - "epoch": 0.8919617627607768, - "grad_norm": 1.7865943064220071, - "learning_rate": 1.2108885157750415e-07, - "loss": 0.6047, - "num_input_tokens_seen": 158344630, - "step": 7418 - }, - { - "epoch": 0.8920820056514158, - "grad_norm": 1.80477949447005, - "learning_rate": 1.2082206332169897e-07, - "loss": 0.7949, - "num_input_tokens_seen": 158364445, - "step": 7419 - }, - { - "epoch": 0.892202248542055, - "grad_norm": 2.4332201211360287, - "learning_rate": 1.2055556013569246e-07, - "loss": 0.7244, - "num_input_tokens_seen": 158379675, - "step": 7420 - }, - { - "epoch": 0.892322491432694, - "grad_norm": 1.72965925276919, - "learning_rate": 1.2028934205991315e-07, - "loss": 0.8071, - "num_input_tokens_seen": 158398715, - "step": 7421 - }, - { - "epoch": 0.8924427343233331, - "grad_norm": 1.7336509221232885, - "learning_rate": 1.2002340913474607e-07, - "loss": 0.7608, - "num_input_tokens_seen": 158422070, - "step": 7422 - }, - { - "epoch": 0.8925629772139723, - "grad_norm": 2.078205066315998, - "learning_rate": 1.1975776140053317e-07, - "loss": 0.736, - "num_input_tokens_seen": 158441760, - "step": 7423 - }, - { - "epoch": 0.8926832201046113, - "grad_norm": 2.6634472117488865, - "learning_rate": 1.194923988975729e-07, - "loss": 0.7294, - "num_input_tokens_seen": 158461080, - "step": 7424 - }, - { - "epoch": 0.8928034629952504, - "grad_norm": 2.977311174455832, - "learning_rate": 1.192273216661206e-07, - "loss": 0.7316, - "num_input_tokens_seen": 158478890, - "step": 7425 - }, - { - "epoch": 0.8929237058858895, - "grad_norm": 0.779681279661497, - "learning_rate": 1.1896252974638787e-07, - "loss": 0.6079, - "num_input_tokens_seen": 158540300, - "step": 7426 - }, - { - "epoch": 0.8930439487765286, - "grad_norm": 2.592134327599314, - "learning_rate": 1.1869802317854394e-07, - "loss": 0.79, - "num_input_tokens_seen": 158563805, - "step": 7427 - }, - { - "epoch": 0.8931641916671677, - "grad_norm": 2.0683654205797732, - "learning_rate": 1.1843380200271425e-07, - "loss": 0.7218, - "num_input_tokens_seen": 158582725, - "step": 7428 - }, - { - "epoch": 0.8932844345578068, - "grad_norm": 2.2712199177089634, - "learning_rate": 1.1816986625898073e-07, - "loss": 0.7995, - "num_input_tokens_seen": 158602030, - "step": 7429 - }, - { - "epoch": 0.8934046774484459, - "grad_norm": 2.043293741541181, - "learning_rate": 1.1790621598738204e-07, - "loss": 0.7538, - "num_input_tokens_seen": 158620065, - "step": 7430 - }, - { - "epoch": 0.8935249203390849, - "grad_norm": 2.3460672567136887, - "learning_rate": 1.176428512279144e-07, - "loss": 0.7501, - "num_input_tokens_seen": 158640505, - "step": 7431 - }, - { - "epoch": 0.8936451632297241, - "grad_norm": 2.142117129832284, - "learning_rate": 1.173797720205294e-07, - "loss": 0.7694, - "num_input_tokens_seen": 158658260, - "step": 7432 - }, - { - "epoch": 0.8937654061203631, - "grad_norm": 4.410563840176427, - "learning_rate": 1.1711697840513646e-07, - "loss": 0.7188, - "num_input_tokens_seen": 158677415, - "step": 7433 - }, - { - "epoch": 0.8938856490110022, - "grad_norm": 2.981879446490988, - "learning_rate": 1.1685447042160012e-07, - "loss": 0.6999, - "num_input_tokens_seen": 158695170, - "step": 7434 - }, - { - "epoch": 0.8940058919016414, - "grad_norm": 2.0064303955234886, - "learning_rate": 1.1659224810974367e-07, - "loss": 0.7063, - "num_input_tokens_seen": 158714850, - "step": 7435 - }, - { - "epoch": 0.8941261347922804, - "grad_norm": 1.6583843680478008, - "learning_rate": 1.1633031150934591e-07, - "loss": 0.6771, - "num_input_tokens_seen": 158737600, - "step": 7436 - }, - { - "epoch": 0.8942463776829195, - "grad_norm": 2.4075523325168495, - "learning_rate": 1.1606866066014199e-07, - "loss": 0.7962, - "num_input_tokens_seen": 158756370, - "step": 7437 - }, - { - "epoch": 0.8943666205735585, - "grad_norm": 2.4275949545876614, - "learning_rate": 1.1580729560182412e-07, - "loss": 0.7429, - "num_input_tokens_seen": 158771945, - "step": 7438 - }, - { - "epoch": 0.8944868634641977, - "grad_norm": 2.4113990118787743, - "learning_rate": 1.1554621637404171e-07, - "loss": 0.7096, - "num_input_tokens_seen": 158789755, - "step": 7439 - }, - { - "epoch": 0.8946071063548368, - "grad_norm": 2.1845927393315874, - "learning_rate": 1.1528542301639999e-07, - "loss": 0.6025, - "num_input_tokens_seen": 158806265, - "step": 7440 - }, - { - "epoch": 0.8947273492454758, - "grad_norm": 3.303139965580428, - "learning_rate": 1.1502491556846105e-07, - "loss": 0.8148, - "num_input_tokens_seen": 158824480, - "step": 7441 - }, - { - "epoch": 0.894847592136115, - "grad_norm": 2.746326716284547, - "learning_rate": 1.1476469406974331e-07, - "loss": 0.8141, - "num_input_tokens_seen": 158839800, - "step": 7442 - }, - { - "epoch": 0.894967835026754, - "grad_norm": 2.1581172915131446, - "learning_rate": 1.1450475855972297e-07, - "loss": 0.7645, - "num_input_tokens_seen": 158860310, - "step": 7443 - }, - { - "epoch": 0.8950880779173931, - "grad_norm": 2.127983045321389, - "learning_rate": 1.1424510907783158e-07, - "loss": 0.6992, - "num_input_tokens_seen": 158877310, - "step": 7444 - }, - { - "epoch": 0.8952083208080323, - "grad_norm": 1.6929488287054206, - "learning_rate": 1.1398574566345787e-07, - "loss": 0.8171, - "num_input_tokens_seen": 158897665, - "step": 7445 - }, - { - "epoch": 0.8953285636986713, - "grad_norm": 2.2375815494616984, - "learning_rate": 1.1372666835594702e-07, - "loss": 0.8229, - "num_input_tokens_seen": 158915710, - "step": 7446 - }, - { - "epoch": 0.8954488065893104, - "grad_norm": 2.9188268774038146, - "learning_rate": 1.1346787719460071e-07, - "loss": 0.7136, - "num_input_tokens_seen": 158934315, - "step": 7447 - }, - { - "epoch": 0.8955690494799495, - "grad_norm": 1.961671483476635, - "learning_rate": 1.1320937221867732e-07, - "loss": 0.7241, - "num_input_tokens_seen": 158951615, - "step": 7448 - }, - { - "epoch": 0.8956892923705886, - "grad_norm": 1.8541193162208953, - "learning_rate": 1.1295115346739215e-07, - "loss": 0.7884, - "num_input_tokens_seen": 158971335, - "step": 7449 - }, - { - "epoch": 0.8958095352612276, - "grad_norm": 2.5213796887906352, - "learning_rate": 1.1269322097991629e-07, - "loss": 0.7333, - "num_input_tokens_seen": 158994340, - "step": 7450 - }, - { - "epoch": 0.8959297781518668, - "grad_norm": 2.2469988960220384, - "learning_rate": 1.1243557479537869e-07, - "loss": 0.679, - "num_input_tokens_seen": 159013950, - "step": 7451 - }, - { - "epoch": 0.8960500210425059, - "grad_norm": 1.9917502640688571, - "learning_rate": 1.121782149528634e-07, - "loss": 0.6835, - "num_input_tokens_seen": 159030770, - "step": 7452 - }, - { - "epoch": 0.8961702639331449, - "grad_norm": 2.165014867472244, - "learning_rate": 1.1192114149141208e-07, - "loss": 0.7872, - "num_input_tokens_seen": 159050125, - "step": 7453 - }, - { - "epoch": 0.8962905068237841, - "grad_norm": 3.1238865484205225, - "learning_rate": 1.1166435445002197e-07, - "loss": 0.6501, - "num_input_tokens_seen": 159067515, - "step": 7454 - }, - { - "epoch": 0.8964107497144231, - "grad_norm": 3.1551304464595242, - "learning_rate": 1.1140785386764818e-07, - "loss": 0.6768, - "num_input_tokens_seen": 159085935, - "step": 7455 - }, - { - "epoch": 0.8965309926050622, - "grad_norm": 2.2656326700830607, - "learning_rate": 1.1115163978320153e-07, - "loss": 0.6961, - "num_input_tokens_seen": 159104385, - "step": 7456 - }, - { - "epoch": 0.8966512354957014, - "grad_norm": 2.098923854081124, - "learning_rate": 1.1089571223554917e-07, - "loss": 0.8196, - "num_input_tokens_seen": 159124990, - "step": 7457 - }, - { - "epoch": 0.8967714783863404, - "grad_norm": 1.821318755187835, - "learning_rate": 1.1064007126351494e-07, - "loss": 0.8347, - "num_input_tokens_seen": 159145425, - "step": 7458 - }, - { - "epoch": 0.8968917212769795, - "grad_norm": 2.150388695466973, - "learning_rate": 1.1038471690588003e-07, - "loss": 0.753, - "num_input_tokens_seen": 159164290, - "step": 7459 - }, - { - "epoch": 0.8970119641676186, - "grad_norm": 1.9845461245954064, - "learning_rate": 1.1012964920138124e-07, - "loss": 0.7941, - "num_input_tokens_seen": 159183595, - "step": 7460 - }, - { - "epoch": 0.8971322070582577, - "grad_norm": 2.8961545752178472, - "learning_rate": 1.0987486818871205e-07, - "loss": 0.7521, - "num_input_tokens_seen": 159206905, - "step": 7461 - }, - { - "epoch": 0.8972524499488967, - "grad_norm": 6.701494773359898, - "learning_rate": 1.0962037390652245e-07, - "loss": 0.7246, - "num_input_tokens_seen": 159225645, - "step": 7462 - }, - { - "epoch": 0.8973726928395359, - "grad_norm": 2.24292520802731, - "learning_rate": 1.0936616639341911e-07, - "loss": 0.719, - "num_input_tokens_seen": 159245655, - "step": 7463 - }, - { - "epoch": 0.897492935730175, - "grad_norm": 0.9956366844433466, - "learning_rate": 1.0911224568796496e-07, - "loss": 0.5653, - "num_input_tokens_seen": 159303570, - "step": 7464 - }, - { - "epoch": 0.897613178620814, - "grad_norm": 2.110120525028927, - "learning_rate": 1.0885861182867984e-07, - "loss": 0.7025, - "num_input_tokens_seen": 159321395, - "step": 7465 - }, - { - "epoch": 0.8977334215114532, - "grad_norm": 2.1731514485303545, - "learning_rate": 1.0860526485403942e-07, - "loss": 0.7007, - "num_input_tokens_seen": 159342390, - "step": 7466 - }, - { - "epoch": 0.8978536644020922, - "grad_norm": 1.7461191273470114, - "learning_rate": 1.0835220480247675e-07, - "loss": 0.7707, - "num_input_tokens_seen": 159360605, - "step": 7467 - }, - { - "epoch": 0.8979739072927313, - "grad_norm": 2.5862737864272782, - "learning_rate": 1.0809943171238067e-07, - "loss": 0.8323, - "num_input_tokens_seen": 159378250, - "step": 7468 - }, - { - "epoch": 0.8980941501833704, - "grad_norm": 2.5868554760031923, - "learning_rate": 1.078469456220965e-07, - "loss": 0.6342, - "num_input_tokens_seen": 159398125, - "step": 7469 - }, - { - "epoch": 0.8982143930740095, - "grad_norm": 2.414086964815238, - "learning_rate": 1.0759474656992584e-07, - "loss": 0.6861, - "num_input_tokens_seen": 159420615, - "step": 7470 - }, - { - "epoch": 0.8983346359646486, - "grad_norm": 2.588448483697621, - "learning_rate": 1.0734283459412785e-07, - "loss": 0.7756, - "num_input_tokens_seen": 159437185, - "step": 7471 - }, - { - "epoch": 0.8984548788552876, - "grad_norm": 1.870896329250706, - "learning_rate": 1.0709120973291707e-07, - "loss": 0.7982, - "num_input_tokens_seen": 159456685, - "step": 7472 - }, - { - "epoch": 0.8985751217459268, - "grad_norm": 3.038158459974717, - "learning_rate": 1.0683987202446475e-07, - "loss": 0.7763, - "num_input_tokens_seen": 159474590, - "step": 7473 - }, - { - "epoch": 0.8986953646365659, - "grad_norm": 2.0296187293502714, - "learning_rate": 1.0658882150689841e-07, - "loss": 0.7007, - "num_input_tokens_seen": 159493170, - "step": 7474 - }, - { - "epoch": 0.8988156075272049, - "grad_norm": 3.680987775662778, - "learning_rate": 1.0633805821830243e-07, - "loss": 0.7767, - "num_input_tokens_seen": 159509575, - "step": 7475 - }, - { - "epoch": 0.8989358504178441, - "grad_norm": 2.6005847246806426, - "learning_rate": 1.0608758219671798e-07, - "loss": 0.8269, - "num_input_tokens_seen": 159528335, - "step": 7476 - }, - { - "epoch": 0.8990560933084831, - "grad_norm": 1.7060250119074871, - "learning_rate": 1.0583739348014087e-07, - "loss": 0.7032, - "num_input_tokens_seen": 159549140, - "step": 7477 - }, - { - "epoch": 0.8991763361991222, - "grad_norm": 4.536282916458728, - "learning_rate": 1.0558749210652518e-07, - "loss": 0.841, - "num_input_tokens_seen": 159568790, - "step": 7478 - }, - { - "epoch": 0.8992965790897613, - "grad_norm": 1.6573231953673202, - "learning_rate": 1.053378781137808e-07, - "loss": 0.8475, - "num_input_tokens_seen": 159589430, - "step": 7479 - }, - { - "epoch": 0.8994168219804004, - "grad_norm": 1.862817181323433, - "learning_rate": 1.0508855153977392e-07, - "loss": 0.7689, - "num_input_tokens_seen": 159605615, - "step": 7480 - }, - { - "epoch": 0.8995370648710395, - "grad_norm": 2.860157491264734, - "learning_rate": 1.0483951242232714e-07, - "loss": 0.6626, - "num_input_tokens_seen": 159625810, - "step": 7481 - }, - { - "epoch": 0.8996573077616786, - "grad_norm": 1.0992852768122545, - "learning_rate": 1.0459076079921913e-07, - "loss": 0.6085, - "num_input_tokens_seen": 159678190, - "step": 7482 - }, - { - "epoch": 0.8997775506523177, - "grad_norm": 2.191421284110555, - "learning_rate": 1.0434229670818618e-07, - "loss": 0.8363, - "num_input_tokens_seen": 159694585, - "step": 7483 - }, - { - "epoch": 0.8998977935429567, - "grad_norm": 2.6993337303124676, - "learning_rate": 1.0409412018691944e-07, - "loss": 0.7942, - "num_input_tokens_seen": 159714770, - "step": 7484 - }, - { - "epoch": 0.9000180364335959, - "grad_norm": 2.538244067681212, - "learning_rate": 1.0384623127306724e-07, - "loss": 0.7521, - "num_input_tokens_seen": 159731835, - "step": 7485 - }, - { - "epoch": 0.900138279324235, - "grad_norm": 1.8307027562518343, - "learning_rate": 1.0359863000423397e-07, - "loss": 0.7813, - "num_input_tokens_seen": 159749690, - "step": 7486 - }, - { - "epoch": 0.900258522214874, - "grad_norm": 1.7498627808467078, - "learning_rate": 1.0335131641798089e-07, - "loss": 0.7109, - "num_input_tokens_seen": 159771370, - "step": 7487 - }, - { - "epoch": 0.9003787651055132, - "grad_norm": 0.8594861350953386, - "learning_rate": 1.0310429055182512e-07, - "loss": 0.6277, - "num_input_tokens_seen": 159825410, - "step": 7488 - }, - { - "epoch": 0.9004990079961522, - "grad_norm": 2.2680041214321682, - "learning_rate": 1.0285755244324024e-07, - "loss": 0.7254, - "num_input_tokens_seen": 159845875, - "step": 7489 - }, - { - "epoch": 0.9006192508867913, - "grad_norm": 1.5113856022716663, - "learning_rate": 1.0261110212965629e-07, - "loss": 0.6827, - "num_input_tokens_seen": 159867390, - "step": 7490 - }, - { - "epoch": 0.9007394937774305, - "grad_norm": 3.054985057124256, - "learning_rate": 1.023649396484596e-07, - "loss": 0.7891, - "num_input_tokens_seen": 159886165, - "step": 7491 - }, - { - "epoch": 0.9008597366680695, - "grad_norm": 2.624098147592181, - "learning_rate": 1.0211906503699275e-07, - "loss": 0.6699, - "num_input_tokens_seen": 159908860, - "step": 7492 - }, - { - "epoch": 0.9009799795587086, - "grad_norm": 3.0606136626489064, - "learning_rate": 1.0187347833255477e-07, - "loss": 0.8175, - "num_input_tokens_seen": 159924485, - "step": 7493 - }, - { - "epoch": 0.9011002224493477, - "grad_norm": 1.8862268761230878, - "learning_rate": 1.0162817957240056e-07, - "loss": 0.7954, - "num_input_tokens_seen": 159944100, - "step": 7494 - }, - { - "epoch": 0.9012204653399868, - "grad_norm": 0.9996754349651223, - "learning_rate": 1.0138316879374253e-07, - "loss": 0.6832, - "num_input_tokens_seen": 160013110, - "step": 7495 - }, - { - "epoch": 0.9013407082306258, - "grad_norm": 2.3090022248454027, - "learning_rate": 1.0113844603374833e-07, - "loss": 0.7294, - "num_input_tokens_seen": 160029355, - "step": 7496 - }, - { - "epoch": 0.901460951121265, - "grad_norm": 2.7184849437388854, - "learning_rate": 1.0089401132954178e-07, - "loss": 0.713, - "num_input_tokens_seen": 160047055, - "step": 7497 - }, - { - "epoch": 0.9015811940119041, - "grad_norm": 3.1639211591771494, - "learning_rate": 1.006498647182037e-07, - "loss": 0.7247, - "num_input_tokens_seen": 160065430, - "step": 7498 - }, - { - "epoch": 0.9017014369025431, - "grad_norm": 2.515584669544843, - "learning_rate": 1.004060062367713e-07, - "loss": 0.7159, - "num_input_tokens_seen": 160086245, - "step": 7499 - }, - { - "epoch": 0.9018216797931822, - "grad_norm": 2.207496324924907, - "learning_rate": 1.0016243592223728e-07, - "loss": 0.6892, - "num_input_tokens_seen": 160106365, - "step": 7500 - }, - { - "epoch": 0.9019419226838213, - "grad_norm": 2.0020856950507615, - "learning_rate": 9.991915381155114e-08, - "loss": 0.6456, - "num_input_tokens_seen": 160129065, - "step": 7501 - }, - { - "epoch": 0.9020621655744604, - "grad_norm": 6.1518308439457785, - "learning_rate": 9.967615994161849e-08, - "loss": 0.7562, - "num_input_tokens_seen": 160148445, - "step": 7502 - }, - { - "epoch": 0.9021824084650995, - "grad_norm": 2.881232592612134, - "learning_rate": 9.943345434930161e-08, - "loss": 0.7753, - "num_input_tokens_seen": 160168415, - "step": 7503 - }, - { - "epoch": 0.9023026513557386, - "grad_norm": 2.3371083590113426, - "learning_rate": 9.919103707141862e-08, - "loss": 0.6815, - "num_input_tokens_seen": 160187015, - "step": 7504 - }, - { - "epoch": 0.9024228942463777, - "grad_norm": 15.34603127945343, - "learning_rate": 9.89489081447441e-08, - "loss": 0.7644, - "num_input_tokens_seen": 160203935, - "step": 7505 - }, - { - "epoch": 0.9025431371370167, - "grad_norm": 2.2168655036174623, - "learning_rate": 9.870706760600844e-08, - "loss": 0.8242, - "num_input_tokens_seen": 160223605, - "step": 7506 - }, - { - "epoch": 0.9026633800276559, - "grad_norm": 2.6545732032829754, - "learning_rate": 9.846551549189918e-08, - "loss": 0.7292, - "num_input_tokens_seen": 160242930, - "step": 7507 - }, - { - "epoch": 0.902783622918295, - "grad_norm": 2.516511140837101, - "learning_rate": 9.822425183905925e-08, - "loss": 0.6799, - "num_input_tokens_seen": 160263175, - "step": 7508 - }, - { - "epoch": 0.902903865808934, - "grad_norm": 0.9948757554707756, - "learning_rate": 9.798327668408823e-08, - "loss": 0.7757, - "num_input_tokens_seen": 160324530, - "step": 7509 - }, - { - "epoch": 0.9030241086995732, - "grad_norm": 3.2401104748490774, - "learning_rate": 9.774259006354158e-08, - "loss": 0.6864, - "num_input_tokens_seen": 160344320, - "step": 7510 - }, - { - "epoch": 0.9031443515902122, - "grad_norm": 2.502940523264334, - "learning_rate": 9.750219201393184e-08, - "loss": 0.753, - "num_input_tokens_seen": 160364005, - "step": 7511 - }, - { - "epoch": 0.9032645944808513, - "grad_norm": 1.8601252712663567, - "learning_rate": 9.726208257172697e-08, - "loss": 0.7745, - "num_input_tokens_seen": 160385420, - "step": 7512 - }, - { - "epoch": 0.9033848373714904, - "grad_norm": 2.3912634199003127, - "learning_rate": 9.702226177335115e-08, - "loss": 0.7459, - "num_input_tokens_seen": 160403635, - "step": 7513 - }, - { - "epoch": 0.9035050802621295, - "grad_norm": 1.7436473053112613, - "learning_rate": 9.678272965518508e-08, - "loss": 0.719, - "num_input_tokens_seen": 160424640, - "step": 7514 - }, - { - "epoch": 0.9036253231527686, - "grad_norm": 2.7474470451241904, - "learning_rate": 9.65434862535659e-08, - "loss": 0.6687, - "num_input_tokens_seen": 160443730, - "step": 7515 - }, - { - "epoch": 0.9037455660434077, - "grad_norm": 4.535659387411721, - "learning_rate": 9.630453160478635e-08, - "loss": 0.6416, - "num_input_tokens_seen": 160458805, - "step": 7516 - }, - { - "epoch": 0.9038658089340468, - "grad_norm": 1.969192699228269, - "learning_rate": 9.60658657450959e-08, - "loss": 0.8143, - "num_input_tokens_seen": 160478825, - "step": 7517 - }, - { - "epoch": 0.9039860518246858, - "grad_norm": 1.802472414416805, - "learning_rate": 9.582748871069957e-08, - "loss": 0.7881, - "num_input_tokens_seen": 160497985, - "step": 7518 - }, - { - "epoch": 0.904106294715325, - "grad_norm": 2.0273494435806922, - "learning_rate": 9.558940053775954e-08, - "loss": 0.8164, - "num_input_tokens_seen": 160516345, - "step": 7519 - }, - { - "epoch": 0.904226537605964, - "grad_norm": 2.229178075447839, - "learning_rate": 9.535160126239317e-08, - "loss": 0.6803, - "num_input_tokens_seen": 160532690, - "step": 7520 - }, - { - "epoch": 0.9043467804966031, - "grad_norm": 1.6338859475216079, - "learning_rate": 9.511409092067446e-08, - "loss": 0.7059, - "num_input_tokens_seen": 160552765, - "step": 7521 - }, - { - "epoch": 0.9044670233872423, - "grad_norm": 1.9095735221204646, - "learning_rate": 9.487686954863327e-08, - "loss": 0.6673, - "num_input_tokens_seen": 160572205, - "step": 7522 - }, - { - "epoch": 0.9045872662778813, - "grad_norm": 2.2797108697620443, - "learning_rate": 9.46399371822566e-08, - "loss": 0.7589, - "num_input_tokens_seen": 160591700, - "step": 7523 - }, - { - "epoch": 0.9047075091685204, - "grad_norm": 4.848339218755644, - "learning_rate": 9.440329385748657e-08, - "loss": 0.7122, - "num_input_tokens_seen": 160608490, - "step": 7524 - }, - { - "epoch": 0.9048277520591596, - "grad_norm": 1.8366682425941812, - "learning_rate": 9.41669396102216e-08, - "loss": 0.7065, - "num_input_tokens_seen": 160626460, - "step": 7525 - }, - { - "epoch": 0.9049479949497986, - "grad_norm": 2.1311443679812627, - "learning_rate": 9.393087447631631e-08, - "loss": 0.7696, - "num_input_tokens_seen": 160644460, - "step": 7526 - }, - { - "epoch": 0.9050682378404377, - "grad_norm": 1.9598539362045724, - "learning_rate": 9.36950984915823e-08, - "loss": 0.7187, - "num_input_tokens_seen": 160662535, - "step": 7527 - }, - { - "epoch": 0.9051884807310768, - "grad_norm": 2.1223043316954446, - "learning_rate": 9.345961169178607e-08, - "loss": 0.6981, - "num_input_tokens_seen": 160681940, - "step": 7528 - }, - { - "epoch": 0.9053087236217159, - "grad_norm": 1.568713773382654, - "learning_rate": 9.322441411265081e-08, - "loss": 0.7197, - "num_input_tokens_seen": 160702645, - "step": 7529 - }, - { - "epoch": 0.9054289665123549, - "grad_norm": 2.146973649963694, - "learning_rate": 9.298950578985554e-08, - "loss": 0.7263, - "num_input_tokens_seen": 160719440, - "step": 7530 - }, - { - "epoch": 0.905549209402994, - "grad_norm": 6.157937870752905, - "learning_rate": 9.275488675903642e-08, - "loss": 0.7066, - "num_input_tokens_seen": 160738105, - "step": 7531 - }, - { - "epoch": 0.9056694522936332, - "grad_norm": 2.1803037634972084, - "learning_rate": 9.252055705578454e-08, - "loss": 0.7271, - "num_input_tokens_seen": 160757325, - "step": 7532 - }, - { - "epoch": 0.9057896951842722, - "grad_norm": 1.7875094097835558, - "learning_rate": 9.228651671564747e-08, - "loss": 0.7097, - "num_input_tokens_seen": 160779075, - "step": 7533 - }, - { - "epoch": 0.9059099380749113, - "grad_norm": 1.6822523971466088, - "learning_rate": 9.205276577412901e-08, - "loss": 0.7753, - "num_input_tokens_seen": 160801575, - "step": 7534 - }, - { - "epoch": 0.9060301809655504, - "grad_norm": 4.3546995619916595, - "learning_rate": 9.181930426668905e-08, - "loss": 0.7557, - "num_input_tokens_seen": 160818090, - "step": 7535 - }, - { - "epoch": 0.9061504238561895, - "grad_norm": 1.7066634306694526, - "learning_rate": 9.158613222874346e-08, - "loss": 0.6745, - "num_input_tokens_seen": 160839435, - "step": 7536 - }, - { - "epoch": 0.9062706667468285, - "grad_norm": 1.7270450184082642, - "learning_rate": 9.135324969566416e-08, - "loss": 0.8138, - "num_input_tokens_seen": 160858655, - "step": 7537 - }, - { - "epoch": 0.9063909096374677, - "grad_norm": 2.2086411011526232, - "learning_rate": 9.112065670277913e-08, - "loss": 0.7436, - "num_input_tokens_seen": 160874740, - "step": 7538 - }, - { - "epoch": 0.9065111525281068, - "grad_norm": 2.0603104100715526, - "learning_rate": 9.088835328537303e-08, - "loss": 0.7189, - "num_input_tokens_seen": 160896050, - "step": 7539 - }, - { - "epoch": 0.9066313954187458, - "grad_norm": 3.1993694370955117, - "learning_rate": 9.065633947868568e-08, - "loss": 0.7104, - "num_input_tokens_seen": 160915375, - "step": 7540 - }, - { - "epoch": 0.906751638309385, - "grad_norm": 11.465692091360646, - "learning_rate": 9.042461531791379e-08, - "loss": 0.7926, - "num_input_tokens_seen": 160933515, - "step": 7541 - }, - { - "epoch": 0.906871881200024, - "grad_norm": 1.867101866808219, - "learning_rate": 9.019318083820903e-08, - "loss": 0.7713, - "num_input_tokens_seen": 160951815, - "step": 7542 - }, - { - "epoch": 0.9069921240906631, - "grad_norm": 2.134552912810663, - "learning_rate": 8.996203607468045e-08, - "loss": 0.8428, - "num_input_tokens_seen": 160970535, - "step": 7543 - }, - { - "epoch": 0.9071123669813023, - "grad_norm": 1.776671957933852, - "learning_rate": 8.973118106239241e-08, - "loss": 0.7514, - "num_input_tokens_seen": 160992860, - "step": 7544 - }, - { - "epoch": 0.9072326098719413, - "grad_norm": 2.1123227885970226, - "learning_rate": 8.95006158363656e-08, - "loss": 0.94, - "num_input_tokens_seen": 161012765, - "step": 7545 - }, - { - "epoch": 0.9073528527625804, - "grad_norm": 5.205004338763424, - "learning_rate": 8.927034043157577e-08, - "loss": 0.7693, - "num_input_tokens_seen": 161031575, - "step": 7546 - }, - { - "epoch": 0.9074730956532195, - "grad_norm": 2.5892651612581736, - "learning_rate": 8.904035488295658e-08, - "loss": 0.7298, - "num_input_tokens_seen": 161050795, - "step": 7547 - }, - { - "epoch": 0.9075933385438586, - "grad_norm": 0.7162675044292636, - "learning_rate": 8.881065922539588e-08, - "loss": 0.5642, - "num_input_tokens_seen": 161110955, - "step": 7548 - }, - { - "epoch": 0.9077135814344977, - "grad_norm": 1.9819170542957976, - "learning_rate": 8.85812534937389e-08, - "loss": 0.7366, - "num_input_tokens_seen": 161128775, - "step": 7549 - }, - { - "epoch": 0.9078338243251368, - "grad_norm": 2.641616198754066, - "learning_rate": 8.835213772278583e-08, - "loss": 0.6626, - "num_input_tokens_seen": 161145350, - "step": 7550 - }, - { - "epoch": 0.9079540672157759, - "grad_norm": 1.9311448888048481, - "learning_rate": 8.812331194729373e-08, - "loss": 0.7791, - "num_input_tokens_seen": 161164715, - "step": 7551 - }, - { - "epoch": 0.9080743101064149, - "grad_norm": 2.3363297893957315, - "learning_rate": 8.789477620197484e-08, - "loss": 0.7186, - "num_input_tokens_seen": 161183960, - "step": 7552 - }, - { - "epoch": 0.9081945529970541, - "grad_norm": 9.535324200458785, - "learning_rate": 8.766653052149831e-08, - "loss": 0.7884, - "num_input_tokens_seen": 161198865, - "step": 7553 - }, - { - "epoch": 0.9083147958876931, - "grad_norm": 2.1869030122515847, - "learning_rate": 8.743857494048823e-08, - "loss": 0.7364, - "num_input_tokens_seen": 161215400, - "step": 7554 - }, - { - "epoch": 0.9084350387783322, - "grad_norm": 2.4098919368440184, - "learning_rate": 8.721090949352605e-08, - "loss": 0.6317, - "num_input_tokens_seen": 161231360, - "step": 7555 - }, - { - "epoch": 0.9085552816689714, - "grad_norm": 2.3997348097620343, - "learning_rate": 8.698353421514793e-08, - "loss": 0.7227, - "num_input_tokens_seen": 161249455, - "step": 7556 - }, - { - "epoch": 0.9086755245596104, - "grad_norm": 6.159752386165655, - "learning_rate": 8.67564491398467e-08, - "loss": 0.7919, - "num_input_tokens_seen": 161266180, - "step": 7557 - }, - { - "epoch": 0.9087957674502495, - "grad_norm": 2.521642549208355, - "learning_rate": 8.652965430207082e-08, - "loss": 0.7367, - "num_input_tokens_seen": 161283805, - "step": 7558 - }, - { - "epoch": 0.9089160103408886, - "grad_norm": 2.690236497598766, - "learning_rate": 8.630314973622521e-08, - "loss": 0.6493, - "num_input_tokens_seen": 161301070, - "step": 7559 - }, - { - "epoch": 0.9090362532315277, - "grad_norm": 2.8108221454400737, - "learning_rate": 8.607693547666995e-08, - "loss": 0.7066, - "num_input_tokens_seen": 161323330, - "step": 7560 - }, - { - "epoch": 0.9091564961221668, - "grad_norm": 0.9286950482000047, - "learning_rate": 8.585101155772201e-08, - "loss": 0.625, - "num_input_tokens_seen": 161385170, - "step": 7561 - }, - { - "epoch": 0.9092767390128058, - "grad_norm": 1.9051620376967369, - "learning_rate": 8.562537801365354e-08, - "loss": 0.6815, - "num_input_tokens_seen": 161404625, - "step": 7562 - }, - { - "epoch": 0.909396981903445, - "grad_norm": 1.8887367327230244, - "learning_rate": 8.540003487869362e-08, - "loss": 0.6924, - "num_input_tokens_seen": 161426015, - "step": 7563 - }, - { - "epoch": 0.909517224794084, - "grad_norm": 2.272450348568467, - "learning_rate": 8.51749821870258e-08, - "loss": 0.7888, - "num_input_tokens_seen": 161443665, - "step": 7564 - }, - { - "epoch": 0.9096374676847231, - "grad_norm": 2.3608990691186404, - "learning_rate": 8.495021997279073e-08, - "loss": 0.6935, - "num_input_tokens_seen": 161461410, - "step": 7565 - }, - { - "epoch": 0.9097577105753623, - "grad_norm": 2.2850992885359744, - "learning_rate": 8.472574827008428e-08, - "loss": 0.6565, - "num_input_tokens_seen": 161482015, - "step": 7566 - }, - { - "epoch": 0.9098779534660013, - "grad_norm": 1.6910648294287771, - "learning_rate": 8.450156711295942e-08, - "loss": 0.8321, - "num_input_tokens_seen": 161501905, - "step": 7567 - }, - { - "epoch": 0.9099981963566404, - "grad_norm": 2.382902294918769, - "learning_rate": 8.427767653542383e-08, - "loss": 0.8612, - "num_input_tokens_seen": 161516795, - "step": 7568 - }, - { - "epoch": 0.9101184392472795, - "grad_norm": 2.672725355428453, - "learning_rate": 8.405407657144148e-08, - "loss": 0.7011, - "num_input_tokens_seen": 161535675, - "step": 7569 - }, - { - "epoch": 0.9102386821379186, - "grad_norm": 2.1751604996742224, - "learning_rate": 8.38307672549321e-08, - "loss": 0.7159, - "num_input_tokens_seen": 161552715, - "step": 7570 - }, - { - "epoch": 0.9103589250285576, - "grad_norm": 2.1264823940838316, - "learning_rate": 8.360774861977216e-08, - "loss": 0.6738, - "num_input_tokens_seen": 161571555, - "step": 7571 - }, - { - "epoch": 0.9104791679191968, - "grad_norm": 2.160953622723792, - "learning_rate": 8.338502069979281e-08, - "loss": 0.7446, - "num_input_tokens_seen": 161591585, - "step": 7572 - }, - { - "epoch": 0.9105994108098359, - "grad_norm": 4.216202780987863, - "learning_rate": 8.316258352878214e-08, - "loss": 0.7924, - "num_input_tokens_seen": 161607725, - "step": 7573 - }, - { - "epoch": 0.9107196537004749, - "grad_norm": 3.191875191782117, - "learning_rate": 8.294043714048315e-08, - "loss": 0.7023, - "num_input_tokens_seen": 161626525, - "step": 7574 - }, - { - "epoch": 0.9108398965911141, - "grad_norm": 0.8626935519681387, - "learning_rate": 8.271858156859602e-08, - "loss": 0.6451, - "num_input_tokens_seen": 161691615, - "step": 7575 - }, - { - "epoch": 0.9109601394817531, - "grad_norm": 2.8407299383022813, - "learning_rate": 8.249701684677557e-08, - "loss": 0.7335, - "num_input_tokens_seen": 161712660, - "step": 7576 - }, - { - "epoch": 0.9110803823723922, - "grad_norm": 2.0580438973069257, - "learning_rate": 8.227574300863294e-08, - "loss": 0.8051, - "num_input_tokens_seen": 161732550, - "step": 7577 - }, - { - "epoch": 0.9112006252630314, - "grad_norm": 2.019764434310867, - "learning_rate": 8.205476008773548e-08, - "loss": 0.7011, - "num_input_tokens_seen": 161756270, - "step": 7578 - }, - { - "epoch": 0.9113208681536704, - "grad_norm": 2.420110162429577, - "learning_rate": 8.183406811760596e-08, - "loss": 0.8228, - "num_input_tokens_seen": 161775720, - "step": 7579 - }, - { - "epoch": 0.9114411110443095, - "grad_norm": 1.5226931927080167, - "learning_rate": 8.161366713172313e-08, - "loss": 0.7372, - "num_input_tokens_seen": 161797830, - "step": 7580 - }, - { - "epoch": 0.9115613539349486, - "grad_norm": 3.487394882747052, - "learning_rate": 8.13935571635218e-08, - "loss": 0.847, - "num_input_tokens_seen": 161812390, - "step": 7581 - }, - { - "epoch": 0.9116815968255877, - "grad_norm": 2.350631755582932, - "learning_rate": 8.117373824639196e-08, - "loss": 0.6951, - "num_input_tokens_seen": 161832375, - "step": 7582 - }, - { - "epoch": 0.9118018397162267, - "grad_norm": 0.7445561490452474, - "learning_rate": 8.095421041368067e-08, - "loss": 0.6092, - "num_input_tokens_seen": 161891510, - "step": 7583 - }, - { - "epoch": 0.9119220826068659, - "grad_norm": 2.45747923774838, - "learning_rate": 8.073497369868999e-08, - "loss": 0.7074, - "num_input_tokens_seen": 161909690, - "step": 7584 - }, - { - "epoch": 0.912042325497505, - "grad_norm": 2.0020318037415072, - "learning_rate": 8.051602813467772e-08, - "loss": 0.7447, - "num_input_tokens_seen": 161931265, - "step": 7585 - }, - { - "epoch": 0.912162568388144, - "grad_norm": 1.870569300231878, - "learning_rate": 8.029737375485756e-08, - "loss": 0.7106, - "num_input_tokens_seen": 161950215, - "step": 7586 - }, - { - "epoch": 0.9122828112787832, - "grad_norm": 2.7125430342196126, - "learning_rate": 8.007901059239986e-08, - "loss": 0.7243, - "num_input_tokens_seen": 161969215, - "step": 7587 - }, - { - "epoch": 0.9124030541694222, - "grad_norm": 1.7769672611276608, - "learning_rate": 7.986093868042964e-08, - "loss": 0.7967, - "num_input_tokens_seen": 161989180, - "step": 7588 - }, - { - "epoch": 0.9125232970600613, - "grad_norm": 2.133263869857011, - "learning_rate": 7.964315805202826e-08, - "loss": 0.6742, - "num_input_tokens_seen": 162009480, - "step": 7589 - }, - { - "epoch": 0.9126435399507005, - "grad_norm": 3.2160695358914064, - "learning_rate": 7.94256687402326e-08, - "loss": 0.7297, - "num_input_tokens_seen": 162028385, - "step": 7590 - }, - { - "epoch": 0.9127637828413395, - "grad_norm": 2.4259216170256592, - "learning_rate": 7.920847077803649e-08, - "loss": 0.6948, - "num_input_tokens_seen": 162045895, - "step": 7591 - }, - { - "epoch": 0.9128840257319786, - "grad_norm": 2.1342458075863804, - "learning_rate": 7.899156419838804e-08, - "loss": 0.8181, - "num_input_tokens_seen": 162064585, - "step": 7592 - }, - { - "epoch": 0.9130042686226177, - "grad_norm": 2.820724667989107, - "learning_rate": 7.87749490341918e-08, - "loss": 0.6505, - "num_input_tokens_seen": 162084580, - "step": 7593 - }, - { - "epoch": 0.9131245115132568, - "grad_norm": 2.0430614147947286, - "learning_rate": 7.855862531830836e-08, - "loss": 0.8324, - "num_input_tokens_seen": 162100410, - "step": 7594 - }, - { - "epoch": 0.9132447544038959, - "grad_norm": 2.894161402487206, - "learning_rate": 7.834259308355373e-08, - "loss": 0.7194, - "num_input_tokens_seen": 162118895, - "step": 7595 - }, - { - "epoch": 0.9133649972945349, - "grad_norm": 2.0246382779070777, - "learning_rate": 7.812685236269989e-08, - "loss": 0.7387, - "num_input_tokens_seen": 162137275, - "step": 7596 - }, - { - "epoch": 0.9134852401851741, - "grad_norm": 0.8816646999174246, - "learning_rate": 7.791140318847445e-08, - "loss": 0.6149, - "num_input_tokens_seen": 162195130, - "step": 7597 - }, - { - "epoch": 0.9136054830758131, - "grad_norm": 1.787764871639484, - "learning_rate": 7.769624559356081e-08, - "loss": 0.7889, - "num_input_tokens_seen": 162218245, - "step": 7598 - }, - { - "epoch": 0.9137257259664522, - "grad_norm": 3.2829866993559094, - "learning_rate": 7.748137961059842e-08, - "loss": 0.7483, - "num_input_tokens_seen": 162231945, - "step": 7599 - }, - { - "epoch": 0.9138459688570914, - "grad_norm": 3.0265571629836345, - "learning_rate": 7.726680527218211e-08, - "loss": 0.6552, - "num_input_tokens_seen": 162248705, - "step": 7600 - }, - { - "epoch": 0.9139662117477304, - "grad_norm": 2.1066769458871057, - "learning_rate": 7.70525226108627e-08, - "loss": 0.7506, - "num_input_tokens_seen": 162272095, - "step": 7601 - }, - { - "epoch": 0.9140864546383695, - "grad_norm": 2.127204793214638, - "learning_rate": 7.683853165914644e-08, - "loss": 0.7939, - "num_input_tokens_seen": 162289585, - "step": 7602 - }, - { - "epoch": 0.9142066975290086, - "grad_norm": 1.797980066794584, - "learning_rate": 7.662483244949602e-08, - "loss": 0.7693, - "num_input_tokens_seen": 162306565, - "step": 7603 - }, - { - "epoch": 0.9143269404196477, - "grad_norm": 2.4006741565354615, - "learning_rate": 7.641142501432951e-08, - "loss": 0.7972, - "num_input_tokens_seen": 162322480, - "step": 7604 - }, - { - "epoch": 0.9144471833102867, - "grad_norm": 2.48748038242498, - "learning_rate": 7.619830938602013e-08, - "loss": 0.7323, - "num_input_tokens_seen": 162343425, - "step": 7605 - }, - { - "epoch": 0.9145674262009259, - "grad_norm": 2.829063516006072, - "learning_rate": 7.598548559689777e-08, - "loss": 0.8067, - "num_input_tokens_seen": 162361545, - "step": 7606 - }, - { - "epoch": 0.914687669091565, - "grad_norm": 2.823479185294803, - "learning_rate": 7.577295367924751e-08, - "loss": 0.8043, - "num_input_tokens_seen": 162377665, - "step": 7607 - }, - { - "epoch": 0.914807911982204, - "grad_norm": 1.889590800438572, - "learning_rate": 7.556071366531025e-08, - "loss": 0.8193, - "num_input_tokens_seen": 162398355, - "step": 7608 - }, - { - "epoch": 0.9149281548728432, - "grad_norm": 4.336539853757118, - "learning_rate": 7.534876558728242e-08, - "loss": 0.7776, - "num_input_tokens_seen": 162417245, - "step": 7609 - }, - { - "epoch": 0.9150483977634822, - "grad_norm": 4.194804596968461, - "learning_rate": 7.513710947731656e-08, - "loss": 0.7378, - "num_input_tokens_seen": 162438175, - "step": 7610 - }, - { - "epoch": 0.9151686406541213, - "grad_norm": 2.033333265548982, - "learning_rate": 7.492574536752095e-08, - "loss": 0.8443, - "num_input_tokens_seen": 162457885, - "step": 7611 - }, - { - "epoch": 0.9152888835447605, - "grad_norm": 1.7872056373206395, - "learning_rate": 7.471467328995907e-08, - "loss": 0.7841, - "num_input_tokens_seen": 162476415, - "step": 7612 - }, - { - "epoch": 0.9154091264353995, - "grad_norm": 4.248516437476192, - "learning_rate": 7.450389327665041e-08, - "loss": 0.6096, - "num_input_tokens_seen": 162493970, - "step": 7613 - }, - { - "epoch": 0.9155293693260386, - "grad_norm": 2.6887946778669503, - "learning_rate": 7.429340535957007e-08, - "loss": 0.6774, - "num_input_tokens_seen": 162508885, - "step": 7614 - }, - { - "epoch": 0.9156496122166777, - "grad_norm": 2.5126813821631684, - "learning_rate": 7.40832095706494e-08, - "loss": 0.7062, - "num_input_tokens_seen": 162525300, - "step": 7615 - }, - { - "epoch": 0.9157698551073168, - "grad_norm": 1.9773764498642252, - "learning_rate": 7.387330594177443e-08, - "loss": 0.7947, - "num_input_tokens_seen": 162547095, - "step": 7616 - }, - { - "epoch": 0.9158900979979558, - "grad_norm": 2.463664845115131, - "learning_rate": 7.366369450478749e-08, - "loss": 0.7805, - "num_input_tokens_seen": 162567925, - "step": 7617 - }, - { - "epoch": 0.916010340888595, - "grad_norm": 1.7945315841860985, - "learning_rate": 7.345437529148623e-08, - "loss": 0.6497, - "num_input_tokens_seen": 162586655, - "step": 7618 - }, - { - "epoch": 0.9161305837792341, - "grad_norm": 2.7694333079269744, - "learning_rate": 7.324534833362461e-08, - "loss": 0.7283, - "num_input_tokens_seen": 162603950, - "step": 7619 - }, - { - "epoch": 0.9162508266698731, - "grad_norm": 1.8925551101515905, - "learning_rate": 7.303661366291192e-08, - "loss": 0.6785, - "num_input_tokens_seen": 162624340, - "step": 7620 - }, - { - "epoch": 0.9163710695605123, - "grad_norm": 2.2886122427724906, - "learning_rate": 7.28281713110126e-08, - "loss": 0.8145, - "num_input_tokens_seen": 162642135, - "step": 7621 - }, - { - "epoch": 0.9164913124511513, - "grad_norm": 2.4022685341152537, - "learning_rate": 7.262002130954759e-08, - "loss": 0.7693, - "num_input_tokens_seen": 162660310, - "step": 7622 - }, - { - "epoch": 0.9166115553417904, - "grad_norm": 2.004384622669353, - "learning_rate": 7.241216369009296e-08, - "loss": 0.784, - "num_input_tokens_seen": 162680215, - "step": 7623 - }, - { - "epoch": 0.9167317982324296, - "grad_norm": 2.402848142844697, - "learning_rate": 7.220459848418037e-08, - "loss": 0.6618, - "num_input_tokens_seen": 162700010, - "step": 7624 - }, - { - "epoch": 0.9168520411230686, - "grad_norm": 2.023539090655207, - "learning_rate": 7.199732572329731e-08, - "loss": 0.7881, - "num_input_tokens_seen": 162717630, - "step": 7625 - }, - { - "epoch": 0.9169722840137077, - "grad_norm": 3.0870319218966245, - "learning_rate": 7.179034543888684e-08, - "loss": 0.762, - "num_input_tokens_seen": 162736855, - "step": 7626 - }, - { - "epoch": 0.9170925269043467, - "grad_norm": 4.534386764551792, - "learning_rate": 7.158365766234808e-08, - "loss": 0.7745, - "num_input_tokens_seen": 162755425, - "step": 7627 - }, - { - "epoch": 0.9172127697949859, - "grad_norm": 2.0964458591415425, - "learning_rate": 7.137726242503527e-08, - "loss": 0.7147, - "num_input_tokens_seen": 162774065, - "step": 7628 - }, - { - "epoch": 0.917333012685625, - "grad_norm": 2.8927536128091584, - "learning_rate": 7.11711597582585e-08, - "loss": 0.779, - "num_input_tokens_seen": 162791145, - "step": 7629 - }, - { - "epoch": 0.917453255576264, - "grad_norm": 2.616532091973877, - "learning_rate": 7.096534969328271e-08, - "loss": 0.7988, - "num_input_tokens_seen": 162808310, - "step": 7630 - }, - { - "epoch": 0.9175734984669032, - "grad_norm": 2.444107560883417, - "learning_rate": 7.075983226132987e-08, - "loss": 0.8364, - "num_input_tokens_seen": 162826960, - "step": 7631 - }, - { - "epoch": 0.9176937413575422, - "grad_norm": 2.830071813843808, - "learning_rate": 7.055460749357656e-08, - "loss": 0.7823, - "num_input_tokens_seen": 162842960, - "step": 7632 - }, - { - "epoch": 0.9178139842481813, - "grad_norm": 2.5152038799735332, - "learning_rate": 7.034967542115521e-08, - "loss": 0.6992, - "num_input_tokens_seen": 162860945, - "step": 7633 - }, - { - "epoch": 0.9179342271388204, - "grad_norm": 2.463994410315292, - "learning_rate": 7.014503607515366e-08, - "loss": 0.7455, - "num_input_tokens_seen": 162879970, - "step": 7634 - }, - { - "epoch": 0.9180544700294595, - "grad_norm": 2.3132634060940616, - "learning_rate": 6.994068948661592e-08, - "loss": 0.6691, - "num_input_tokens_seen": 162897845, - "step": 7635 - }, - { - "epoch": 0.9181747129200986, - "grad_norm": 2.900983597196399, - "learning_rate": 6.97366356865412e-08, - "loss": 0.7539, - "num_input_tokens_seen": 162915270, - "step": 7636 - }, - { - "epoch": 0.9182949558107377, - "grad_norm": 2.322618345785149, - "learning_rate": 6.953287470588386e-08, - "loss": 0.6467, - "num_input_tokens_seen": 162932945, - "step": 7637 - }, - { - "epoch": 0.9184151987013768, - "grad_norm": 3.0418981070969644, - "learning_rate": 6.932940657555452e-08, - "loss": 0.8516, - "num_input_tokens_seen": 162948795, - "step": 7638 - }, - { - "epoch": 0.9185354415920158, - "grad_norm": 2.082465536662542, - "learning_rate": 6.912623132641938e-08, - "loss": 0.7494, - "num_input_tokens_seen": 162973605, - "step": 7639 - }, - { - "epoch": 0.918655684482655, - "grad_norm": 1.902650248504533, - "learning_rate": 6.892334898929952e-08, - "loss": 0.7512, - "num_input_tokens_seen": 162993570, - "step": 7640 - }, - { - "epoch": 0.918775927373294, - "grad_norm": 3.0512414338728653, - "learning_rate": 6.872075959497236e-08, - "loss": 0.8343, - "num_input_tokens_seen": 163012065, - "step": 7641 - }, - { - "epoch": 0.9188961702639331, - "grad_norm": 2.6949847214219194, - "learning_rate": 6.85184631741702e-08, - "loss": 0.8193, - "num_input_tokens_seen": 163032350, - "step": 7642 - }, - { - "epoch": 0.9190164131545723, - "grad_norm": 2.4017060426118664, - "learning_rate": 6.831645975758161e-08, - "loss": 0.7731, - "num_input_tokens_seen": 163050010, - "step": 7643 - }, - { - "epoch": 0.9191366560452113, - "grad_norm": 2.6698715287134633, - "learning_rate": 6.811474937585026e-08, - "loss": 0.6744, - "num_input_tokens_seen": 163069520, - "step": 7644 - }, - { - "epoch": 0.9192568989358504, - "grad_norm": 1.8434418050130366, - "learning_rate": 6.79133320595755e-08, - "loss": 0.7777, - "num_input_tokens_seen": 163089160, - "step": 7645 - }, - { - "epoch": 0.9193771418264896, - "grad_norm": 2.1281752298736496, - "learning_rate": 6.771220783931175e-08, - "loss": 0.7466, - "num_input_tokens_seen": 163109040, - "step": 7646 - }, - { - "epoch": 0.9194973847171286, - "grad_norm": 0.8726606417077498, - "learning_rate": 6.751137674556994e-08, - "loss": 0.6767, - "num_input_tokens_seen": 163169145, - "step": 7647 - }, - { - "epoch": 0.9196176276077677, - "grad_norm": 2.1728873456451234, - "learning_rate": 6.731083880881572e-08, - "loss": 0.7745, - "num_input_tokens_seen": 163185085, - "step": 7648 - }, - { - "epoch": 0.9197378704984068, - "grad_norm": 2.3115927579783695, - "learning_rate": 6.711059405947072e-08, - "loss": 0.8054, - "num_input_tokens_seen": 163202995, - "step": 7649 - }, - { - "epoch": 0.9198581133890459, - "grad_norm": 2.560097991471572, - "learning_rate": 6.691064252791156e-08, - "loss": 0.7694, - "num_input_tokens_seen": 163222190, - "step": 7650 - }, - { - "epoch": 0.9199783562796849, - "grad_norm": 1.9551818290424663, - "learning_rate": 6.67109842444713e-08, - "loss": 0.7699, - "num_input_tokens_seen": 163240840, - "step": 7651 - }, - { - "epoch": 0.9200985991703241, - "grad_norm": 3.25240196965659, - "learning_rate": 6.651161923943726e-08, - "loss": 0.7629, - "num_input_tokens_seen": 163258465, - "step": 7652 - }, - { - "epoch": 0.9202188420609632, - "grad_norm": 3.024121756037315, - "learning_rate": 6.631254754305349e-08, - "loss": 0.7619, - "num_input_tokens_seen": 163277645, - "step": 7653 - }, - { - "epoch": 0.9203390849516022, - "grad_norm": 2.3864893769323388, - "learning_rate": 6.611376918551848e-08, - "loss": 0.7778, - "num_input_tokens_seen": 163296150, - "step": 7654 - }, - { - "epoch": 0.9204593278422414, - "grad_norm": 2.4106302106160684, - "learning_rate": 6.591528419698744e-08, - "loss": 0.7919, - "num_input_tokens_seen": 163315655, - "step": 7655 - }, - { - "epoch": 0.9205795707328804, - "grad_norm": 2.983989664660224, - "learning_rate": 6.571709260756986e-08, - "loss": 0.8333, - "num_input_tokens_seen": 163332020, - "step": 7656 - }, - { - "epoch": 0.9206998136235195, - "grad_norm": 3.445360355747961, - "learning_rate": 6.551919444733145e-08, - "loss": 0.7543, - "num_input_tokens_seen": 163349555, - "step": 7657 - }, - { - "epoch": 0.9208200565141585, - "grad_norm": 1.9893530282535243, - "learning_rate": 6.532158974629287e-08, - "loss": 0.6436, - "num_input_tokens_seen": 163373030, - "step": 7658 - }, - { - "epoch": 0.9209402994047977, - "grad_norm": 2.275459375063756, - "learning_rate": 6.512427853443103e-08, - "loss": 0.758, - "num_input_tokens_seen": 163394830, - "step": 7659 - }, - { - "epoch": 0.9210605422954368, - "grad_norm": 1.6606415919907909, - "learning_rate": 6.492726084167799e-08, - "loss": 0.7541, - "num_input_tokens_seen": 163416665, - "step": 7660 - }, - { - "epoch": 0.9211807851860758, - "grad_norm": 0.807823609834835, - "learning_rate": 6.473053669792072e-08, - "loss": 0.5804, - "num_input_tokens_seen": 163471075, - "step": 7661 - }, - { - "epoch": 0.921301028076715, - "grad_norm": 2.606458243927914, - "learning_rate": 6.453410613300225e-08, - "loss": 0.7275, - "num_input_tokens_seen": 163488725, - "step": 7662 - }, - { - "epoch": 0.921421270967354, - "grad_norm": 1.8033922760765775, - "learning_rate": 6.433796917672118e-08, - "loss": 0.578, - "num_input_tokens_seen": 163507650, - "step": 7663 - }, - { - "epoch": 0.9215415138579931, - "grad_norm": 0.7869964700232591, - "learning_rate": 6.414212585883105e-08, - "loss": 0.6168, - "num_input_tokens_seen": 163570000, - "step": 7664 - }, - { - "epoch": 0.9216617567486323, - "grad_norm": 2.07232525265648, - "learning_rate": 6.394657620904143e-08, - "loss": 0.6949, - "num_input_tokens_seen": 163592830, - "step": 7665 - }, - { - "epoch": 0.9217819996392713, - "grad_norm": 1.9840034656730698, - "learning_rate": 6.375132025701657e-08, - "loss": 0.7092, - "num_input_tokens_seen": 163614850, - "step": 7666 - }, - { - "epoch": 0.9219022425299104, - "grad_norm": 7.31759322867845, - "learning_rate": 6.355635803237724e-08, - "loss": 0.6872, - "num_input_tokens_seen": 163630270, - "step": 7667 - }, - { - "epoch": 0.9220224854205495, - "grad_norm": 2.2343063216639405, - "learning_rate": 6.336168956469867e-08, - "loss": 0.7996, - "num_input_tokens_seen": 163648465, - "step": 7668 - }, - { - "epoch": 0.9221427283111886, - "grad_norm": 1.767091412407001, - "learning_rate": 6.31673148835119e-08, - "loss": 0.715, - "num_input_tokens_seen": 163669375, - "step": 7669 - }, - { - "epoch": 0.9222629712018277, - "grad_norm": 1.784182565960609, - "learning_rate": 6.297323401830334e-08, - "loss": 0.6345, - "num_input_tokens_seen": 163687880, - "step": 7670 - }, - { - "epoch": 0.9223832140924668, - "grad_norm": 2.3693360289981107, - "learning_rate": 6.277944699851523e-08, - "loss": 0.6881, - "num_input_tokens_seen": 163707120, - "step": 7671 - }, - { - "epoch": 0.9225034569831059, - "grad_norm": 1.9841378315237865, - "learning_rate": 6.25859538535447e-08, - "loss": 0.7275, - "num_input_tokens_seen": 163727635, - "step": 7672 - }, - { - "epoch": 0.9226236998737449, - "grad_norm": 3.5849558296867365, - "learning_rate": 6.239275461274474e-08, - "loss": 0.7663, - "num_input_tokens_seen": 163743730, - "step": 7673 - }, - { - "epoch": 0.9227439427643841, - "grad_norm": 2.0079527815440104, - "learning_rate": 6.219984930542299e-08, - "loss": 0.8525, - "num_input_tokens_seen": 163764190, - "step": 7674 - }, - { - "epoch": 0.9228641856550232, - "grad_norm": 2.567093971340067, - "learning_rate": 6.200723796084362e-08, - "loss": 0.7538, - "num_input_tokens_seen": 163782005, - "step": 7675 - }, - { - "epoch": 0.9229844285456622, - "grad_norm": 0.7644028197374373, - "learning_rate": 6.181492060822546e-08, - "loss": 0.6434, - "num_input_tokens_seen": 163841900, - "step": 7676 - }, - { - "epoch": 0.9231046714363014, - "grad_norm": 2.210626430229041, - "learning_rate": 6.162289727674274e-08, - "loss": 0.8144, - "num_input_tokens_seen": 163859300, - "step": 7677 - }, - { - "epoch": 0.9232249143269404, - "grad_norm": 2.9426182512994665, - "learning_rate": 6.143116799552506e-08, - "loss": 0.8769, - "num_input_tokens_seen": 163875265, - "step": 7678 - }, - { - "epoch": 0.9233451572175795, - "grad_norm": 2.5919334311516073, - "learning_rate": 6.123973279365802e-08, - "loss": 0.5499, - "num_input_tokens_seen": 163893960, - "step": 7679 - }, - { - "epoch": 0.9234654001082186, - "grad_norm": 1.8493410164895827, - "learning_rate": 6.104859170018218e-08, - "loss": 0.7756, - "num_input_tokens_seen": 163911535, - "step": 7680 - }, - { - "epoch": 0.9235856429988577, - "grad_norm": 1.689468347824574, - "learning_rate": 6.085774474409322e-08, - "loss": 0.802, - "num_input_tokens_seen": 163931815, - "step": 7681 - }, - { - "epoch": 0.9237058858894968, - "grad_norm": 2.169122796669728, - "learning_rate": 6.066719195434267e-08, - "loss": 0.6983, - "num_input_tokens_seen": 163949335, - "step": 7682 - }, - { - "epoch": 0.9238261287801359, - "grad_norm": 4.430542826929758, - "learning_rate": 6.047693335983717e-08, - "loss": 0.661, - "num_input_tokens_seen": 163971400, - "step": 7683 - }, - { - "epoch": 0.923946371670775, - "grad_norm": 4.4135184896564414, - "learning_rate": 6.028696898943853e-08, - "loss": 0.817, - "num_input_tokens_seen": 163990180, - "step": 7684 - }, - { - "epoch": 0.924066614561414, - "grad_norm": 4.047769178950823, - "learning_rate": 6.00972988719648e-08, - "loss": 0.7062, - "num_input_tokens_seen": 164008135, - "step": 7685 - }, - { - "epoch": 0.9241868574520532, - "grad_norm": 6.575575374632869, - "learning_rate": 5.990792303618807e-08, - "loss": 0.7103, - "num_input_tokens_seen": 164027435, - "step": 7686 - }, - { - "epoch": 0.9243071003426923, - "grad_norm": 1.8826012948425552, - "learning_rate": 5.971884151083695e-08, - "loss": 0.6934, - "num_input_tokens_seen": 164049565, - "step": 7687 - }, - { - "epoch": 0.9244273432333313, - "grad_norm": 1.829255598550161, - "learning_rate": 5.9530054324595124e-08, - "loss": 0.7398, - "num_input_tokens_seen": 164069400, - "step": 7688 - }, - { - "epoch": 0.9245475861239704, - "grad_norm": 0.7772465622589103, - "learning_rate": 5.934156150610103e-08, - "loss": 0.5959, - "num_input_tokens_seen": 164126485, - "step": 7689 - }, - { - "epoch": 0.9246678290146095, - "grad_norm": 2.4899874784998315, - "learning_rate": 5.915336308394891e-08, - "loss": 0.7754, - "num_input_tokens_seen": 164145040, - "step": 7690 - }, - { - "epoch": 0.9247880719052486, - "grad_norm": 1.6930480601185507, - "learning_rate": 5.89654590866886e-08, - "loss": 0.7695, - "num_input_tokens_seen": 164164260, - "step": 7691 - }, - { - "epoch": 0.9249083147958876, - "grad_norm": 2.3324529964104532, - "learning_rate": 5.877784954282483e-08, - "loss": 0.8775, - "num_input_tokens_seen": 164183320, - "step": 7692 - }, - { - "epoch": 0.9250285576865268, - "grad_norm": 3.111661559376203, - "learning_rate": 5.8590534480817963e-08, - "loss": 0.7191, - "num_input_tokens_seen": 164204765, - "step": 7693 - }, - { - "epoch": 0.9251488005771659, - "grad_norm": 2.9735658744086466, - "learning_rate": 5.840351392908349e-08, - "loss": 0.723, - "num_input_tokens_seen": 164220205, - "step": 7694 - }, - { - "epoch": 0.9252690434678049, - "grad_norm": 3.2971726652754314, - "learning_rate": 5.821678791599205e-08, - "loss": 0.7095, - "num_input_tokens_seen": 164239370, - "step": 7695 - }, - { - "epoch": 0.9253892863584441, - "grad_norm": 1.8403975449503616, - "learning_rate": 5.803035646986987e-08, - "loss": 0.801, - "num_input_tokens_seen": 164258425, - "step": 7696 - }, - { - "epoch": 0.9255095292490831, - "grad_norm": 2.825252842672205, - "learning_rate": 5.7844219618998766e-08, - "loss": 0.6687, - "num_input_tokens_seen": 164272470, - "step": 7697 - }, - { - "epoch": 0.9256297721397222, - "grad_norm": 2.2738473960864876, - "learning_rate": 5.765837739161505e-08, - "loss": 0.7096, - "num_input_tokens_seen": 164291310, - "step": 7698 - }, - { - "epoch": 0.9257500150303614, - "grad_norm": 3.8134690384264545, - "learning_rate": 5.7472829815911504e-08, - "loss": 0.7451, - "num_input_tokens_seen": 164309855, - "step": 7699 - }, - { - "epoch": 0.9258702579210004, - "grad_norm": 1.7736622015278536, - "learning_rate": 5.7287576920035164e-08, - "loss": 0.8023, - "num_input_tokens_seen": 164328590, - "step": 7700 - }, - { - "epoch": 0.9259905008116395, - "grad_norm": 1.9273500005798785, - "learning_rate": 5.710261873208866e-08, - "loss": 0.7544, - "num_input_tokens_seen": 164347640, - "step": 7701 - }, - { - "epoch": 0.9261107437022786, - "grad_norm": 1.6368246176197416, - "learning_rate": 5.691795528012999e-08, - "loss": 0.7347, - "num_input_tokens_seen": 164368840, - "step": 7702 - }, - { - "epoch": 0.9262309865929177, - "grad_norm": 2.5428603472362052, - "learning_rate": 5.6733586592172755e-08, - "loss": 0.7208, - "num_input_tokens_seen": 164388055, - "step": 7703 - }, - { - "epoch": 0.9263512294835567, - "grad_norm": 2.8726336453957617, - "learning_rate": 5.6549512696185244e-08, - "loss": 0.8029, - "num_input_tokens_seen": 164406275, - "step": 7704 - }, - { - "epoch": 0.9264714723741959, - "grad_norm": 2.180029138224243, - "learning_rate": 5.636573362009156e-08, - "loss": 0.6788, - "num_input_tokens_seen": 164426055, - "step": 7705 - }, - { - "epoch": 0.926591715264835, - "grad_norm": 2.1096575202205017, - "learning_rate": 5.618224939177052e-08, - "loss": 0.7558, - "num_input_tokens_seen": 164443680, - "step": 7706 - }, - { - "epoch": 0.926711958155474, - "grad_norm": 2.5465581893520937, - "learning_rate": 5.5999060039056964e-08, - "loss": 0.6965, - "num_input_tokens_seen": 164465945, - "step": 7707 - }, - { - "epoch": 0.9268322010461132, - "grad_norm": 2.590806558290182, - "learning_rate": 5.581616558974023e-08, - "loss": 0.8204, - "num_input_tokens_seen": 164484230, - "step": 7708 - }, - { - "epoch": 0.9269524439367522, - "grad_norm": 3.7070702873445693, - "learning_rate": 5.5633566071565444e-08, - "loss": 0.7849, - "num_input_tokens_seen": 164503190, - "step": 7709 - }, - { - "epoch": 0.9270726868273913, - "grad_norm": 2.8166443618556927, - "learning_rate": 5.5451261512232896e-08, - "loss": 0.6937, - "num_input_tokens_seen": 164525590, - "step": 7710 - }, - { - "epoch": 0.9271929297180305, - "grad_norm": 2.451214690879537, - "learning_rate": 5.5269251939397576e-08, - "loss": 0.618, - "num_input_tokens_seen": 164544825, - "step": 7711 - }, - { - "epoch": 0.9273131726086695, - "grad_norm": 2.2009864936452934, - "learning_rate": 5.508753738067073e-08, - "loss": 0.759, - "num_input_tokens_seen": 164564085, - "step": 7712 - }, - { - "epoch": 0.9274334154993086, - "grad_norm": 2.0569710206288003, - "learning_rate": 5.4906117863617875e-08, - "loss": 0.7829, - "num_input_tokens_seen": 164583190, - "step": 7713 - }, - { - "epoch": 0.9275536583899477, - "grad_norm": 1.8967905050436942, - "learning_rate": 5.4724993415760533e-08, - "loss": 0.7811, - "num_input_tokens_seen": 164601265, - "step": 7714 - }, - { - "epoch": 0.9276739012805868, - "grad_norm": 3.4088639460890398, - "learning_rate": 5.454416406457496e-08, - "loss": 0.7467, - "num_input_tokens_seen": 164620080, - "step": 7715 - }, - { - "epoch": 0.9277941441712259, - "grad_norm": 3.5453436908898968, - "learning_rate": 5.436362983749299e-08, - "loss": 0.7404, - "num_input_tokens_seen": 164634970, - "step": 7716 - }, - { - "epoch": 0.927914387061865, - "grad_norm": 2.306579451410043, - "learning_rate": 5.418339076190137e-08, - "loss": 0.6332, - "num_input_tokens_seen": 164654200, - "step": 7717 - }, - { - "epoch": 0.9280346299525041, - "grad_norm": 2.281436237660145, - "learning_rate": 5.400344686514202e-08, - "loss": 0.8822, - "num_input_tokens_seen": 164671505, - "step": 7718 - }, - { - "epoch": 0.9281548728431431, - "grad_norm": 2.0002020547021266, - "learning_rate": 5.382379817451288e-08, - "loss": 0.6597, - "num_input_tokens_seen": 164689340, - "step": 7719 - }, - { - "epoch": 0.9282751157337822, - "grad_norm": 1.7624345531522285, - "learning_rate": 5.364444471726592e-08, - "loss": 0.8065, - "num_input_tokens_seen": 164708265, - "step": 7720 - }, - { - "epoch": 0.9283953586244214, - "grad_norm": 2.751545909416791, - "learning_rate": 5.346538652060939e-08, - "loss": 0.7954, - "num_input_tokens_seen": 164729340, - "step": 7721 - }, - { - "epoch": 0.9285156015150604, - "grad_norm": 2.249500590737962, - "learning_rate": 5.3286623611705775e-08, - "loss": 0.6981, - "num_input_tokens_seen": 164747105, - "step": 7722 - }, - { - "epoch": 0.9286358444056995, - "grad_norm": 0.9058493096277834, - "learning_rate": 5.3108156017673824e-08, - "loss": 0.6502, - "num_input_tokens_seen": 164808585, - "step": 7723 - }, - { - "epoch": 0.9287560872963386, - "grad_norm": 2.6429313806698795, - "learning_rate": 5.292998376558655e-08, - "loss": 0.7077, - "num_input_tokens_seen": 164827085, - "step": 7724 - }, - { - "epoch": 0.9288763301869777, - "grad_norm": 1.9818057872456156, - "learning_rate": 5.275210688247278e-08, - "loss": 0.6238, - "num_input_tokens_seen": 164847130, - "step": 7725 - }, - { - "epoch": 0.9289965730776167, - "grad_norm": 2.813342292186378, - "learning_rate": 5.257452539531604e-08, - "loss": 0.8399, - "num_input_tokens_seen": 164863920, - "step": 7726 - }, - { - "epoch": 0.9291168159682559, - "grad_norm": 2.1237894101746386, - "learning_rate": 5.2397239331055445e-08, - "loss": 0.6806, - "num_input_tokens_seen": 164885640, - "step": 7727 - }, - { - "epoch": 0.929237058858895, - "grad_norm": 2.6320667404706826, - "learning_rate": 5.2220248716585036e-08, - "loss": 0.7978, - "num_input_tokens_seen": 164903040, - "step": 7728 - }, - { - "epoch": 0.929357301749534, - "grad_norm": 3.2097207003638486, - "learning_rate": 5.204355357875445e-08, - "loss": 0.7491, - "num_input_tokens_seen": 164921105, - "step": 7729 - }, - { - "epoch": 0.9294775446401732, - "grad_norm": 3.2066326279463175, - "learning_rate": 5.1867153944367584e-08, - "loss": 0.701, - "num_input_tokens_seen": 164937215, - "step": 7730 - }, - { - "epoch": 0.9295977875308122, - "grad_norm": 1.6386494531643085, - "learning_rate": 5.16910498401848e-08, - "loss": 0.7311, - "num_input_tokens_seen": 164956385, - "step": 7731 - }, - { - "epoch": 0.9297180304214513, - "grad_norm": 2.12365505333182, - "learning_rate": 5.151524129292073e-08, - "loss": 0.831, - "num_input_tokens_seen": 164974000, - "step": 7732 - }, - { - "epoch": 0.9298382733120905, - "grad_norm": 4.194903200666486, - "learning_rate": 5.1339728329245155e-08, - "loss": 0.6703, - "num_input_tokens_seen": 164994285, - "step": 7733 - }, - { - "epoch": 0.9299585162027295, - "grad_norm": 3.9698574765343566, - "learning_rate": 5.116451097578345e-08, - "loss": 0.7847, - "num_input_tokens_seen": 165013045, - "step": 7734 - }, - { - "epoch": 0.9300787590933686, - "grad_norm": 1.8423210063791695, - "learning_rate": 5.0989589259115895e-08, - "loss": 0.7401, - "num_input_tokens_seen": 165033650, - "step": 7735 - }, - { - "epoch": 0.9301990019840077, - "grad_norm": 1.9752022694647853, - "learning_rate": 5.081496320577816e-08, - "loss": 0.7021, - "num_input_tokens_seen": 165050490, - "step": 7736 - }, - { - "epoch": 0.9303192448746468, - "grad_norm": 0.9637446798467478, - "learning_rate": 5.0640632842260835e-08, - "loss": 0.6601, - "num_input_tokens_seen": 165110470, - "step": 7737 - }, - { - "epoch": 0.9304394877652858, - "grad_norm": 1.5982502814379467, - "learning_rate": 5.0466598195009426e-08, - "loss": 0.7147, - "num_input_tokens_seen": 165137060, - "step": 7738 - }, - { - "epoch": 0.930559730655925, - "grad_norm": 2.3424280890928078, - "learning_rate": 5.0292859290425036e-08, - "loss": 0.7005, - "num_input_tokens_seen": 165154650, - "step": 7739 - }, - { - "epoch": 0.9306799735465641, - "grad_norm": 2.0358421300537564, - "learning_rate": 5.01194161548637e-08, - "loss": 0.773, - "num_input_tokens_seen": 165173485, - "step": 7740 - }, - { - "epoch": 0.9308002164372031, - "grad_norm": 2.112256078091361, - "learning_rate": 4.994626881463659e-08, - "loss": 0.8452, - "num_input_tokens_seen": 165189460, - "step": 7741 - }, - { - "epoch": 0.9309204593278423, - "grad_norm": 1.832913599036993, - "learning_rate": 4.9773417296009814e-08, - "loss": 0.7021, - "num_input_tokens_seen": 165210700, - "step": 7742 - }, - { - "epoch": 0.9310407022184813, - "grad_norm": 2.0711501273961463, - "learning_rate": 4.960086162520527e-08, - "loss": 0.6507, - "num_input_tokens_seen": 165230510, - "step": 7743 - }, - { - "epoch": 0.9311609451091204, - "grad_norm": 2.9479820749355037, - "learning_rate": 4.942860182839936e-08, - "loss": 0.8139, - "num_input_tokens_seen": 165248575, - "step": 7744 - }, - { - "epoch": 0.9312811879997596, - "grad_norm": 2.006368254204446, - "learning_rate": 4.925663793172363e-08, - "loss": 0.7832, - "num_input_tokens_seen": 165266255, - "step": 7745 - }, - { - "epoch": 0.9314014308903986, - "grad_norm": 0.8604809808810056, - "learning_rate": 4.9084969961264544e-08, - "loss": 0.6107, - "num_input_tokens_seen": 165329435, - "step": 7746 - }, - { - "epoch": 0.9315216737810377, - "grad_norm": 1.6442784922257239, - "learning_rate": 4.89135979430646e-08, - "loss": 0.7568, - "num_input_tokens_seen": 165349200, - "step": 7747 - }, - { - "epoch": 0.9316419166716768, - "grad_norm": 1.8634660754024868, - "learning_rate": 4.874252190312078e-08, - "loss": 0.8363, - "num_input_tokens_seen": 165369305, - "step": 7748 - }, - { - "epoch": 0.9317621595623159, - "grad_norm": 4.262197833330716, - "learning_rate": 4.857174186738477e-08, - "loss": 0.643, - "num_input_tokens_seen": 165392375, - "step": 7749 - }, - { - "epoch": 0.931882402452955, - "grad_norm": 3.0136117953515273, - "learning_rate": 4.840125786176385e-08, - "loss": 0.7314, - "num_input_tokens_seen": 165408300, - "step": 7750 - }, - { - "epoch": 0.932002645343594, - "grad_norm": 1.9877469090677302, - "learning_rate": 4.823106991212045e-08, - "loss": 0.7627, - "num_input_tokens_seen": 165427260, - "step": 7751 - }, - { - "epoch": 0.9321228882342332, - "grad_norm": 2.082490565239733, - "learning_rate": 4.806117804427212e-08, - "loss": 0.8362, - "num_input_tokens_seen": 165444915, - "step": 7752 - }, - { - "epoch": 0.9322431311248722, - "grad_norm": 1.9981519634381384, - "learning_rate": 4.7891582283990926e-08, - "loss": 0.6407, - "num_input_tokens_seen": 165463360, - "step": 7753 - }, - { - "epoch": 0.9323633740155113, - "grad_norm": 1.70360463337487, - "learning_rate": 4.772228265700473e-08, - "loss": 0.7194, - "num_input_tokens_seen": 165483940, - "step": 7754 - }, - { - "epoch": 0.9324836169061504, - "grad_norm": 2.17087333477564, - "learning_rate": 4.75532791889961e-08, - "loss": 0.7446, - "num_input_tokens_seen": 165500360, - "step": 7755 - }, - { - "epoch": 0.9326038597967895, - "grad_norm": 2.1014668839916117, - "learning_rate": 4.738457190560252e-08, - "loss": 0.6518, - "num_input_tokens_seen": 165519190, - "step": 7756 - }, - { - "epoch": 0.9327241026874286, - "grad_norm": 2.5190607165203907, - "learning_rate": 4.721616083241686e-08, - "loss": 0.7841, - "num_input_tokens_seen": 165537165, - "step": 7757 - }, - { - "epoch": 0.9328443455780677, - "grad_norm": 1.7832295340407653, - "learning_rate": 4.7048045994986684e-08, - "loss": 0.7673, - "num_input_tokens_seen": 165557745, - "step": 7758 - }, - { - "epoch": 0.9329645884687068, - "grad_norm": 2.2463172532169464, - "learning_rate": 4.688022741881559e-08, - "loss": 0.9088, - "num_input_tokens_seen": 165577990, - "step": 7759 - }, - { - "epoch": 0.9330848313593458, - "grad_norm": 1.676237432646866, - "learning_rate": 4.671270512936076e-08, - "loss": 0.753, - "num_input_tokens_seen": 165596870, - "step": 7760 - }, - { - "epoch": 0.933205074249985, - "grad_norm": 1.886040018968966, - "learning_rate": 4.6545479152035884e-08, - "loss": 0.8234, - "num_input_tokens_seen": 165615760, - "step": 7761 - }, - { - "epoch": 0.9333253171406241, - "grad_norm": 2.2079434040633323, - "learning_rate": 4.637854951220821e-08, - "loss": 0.7679, - "num_input_tokens_seen": 165632265, - "step": 7762 - }, - { - "epoch": 0.9334455600312631, - "grad_norm": 1.9321950544389397, - "learning_rate": 4.6211916235201485e-08, - "loss": 0.7409, - "num_input_tokens_seen": 165650415, - "step": 7763 - }, - { - "epoch": 0.9335658029219023, - "grad_norm": 4.065464584551626, - "learning_rate": 4.604557934629372e-08, - "loss": 0.8343, - "num_input_tokens_seen": 165669210, - "step": 7764 - }, - { - "epoch": 0.9336860458125413, - "grad_norm": 1.9331438800195957, - "learning_rate": 4.587953887071805e-08, - "loss": 0.7987, - "num_input_tokens_seen": 165688750, - "step": 7765 - }, - { - "epoch": 0.9338062887031804, - "grad_norm": 2.164590352950927, - "learning_rate": 4.5713794833662336e-08, - "loss": 0.8618, - "num_input_tokens_seen": 165707685, - "step": 7766 - }, - { - "epoch": 0.9339265315938196, - "grad_norm": 2.1502681466433566, - "learning_rate": 4.5548347260270236e-08, - "loss": 0.6328, - "num_input_tokens_seen": 165726695, - "step": 7767 - }, - { - "epoch": 0.9340467744844586, - "grad_norm": 2.0988134568412047, - "learning_rate": 4.5383196175639905e-08, - "loss": 0.6946, - "num_input_tokens_seen": 165745435, - "step": 7768 - }, - { - "epoch": 0.9341670173750977, - "grad_norm": 2.231215763484151, - "learning_rate": 4.521834160482485e-08, - "loss": 0.742, - "num_input_tokens_seen": 165763895, - "step": 7769 - }, - { - "epoch": 0.9342872602657368, - "grad_norm": 2.0708266523925687, - "learning_rate": 4.5053783572832846e-08, - "loss": 0.8084, - "num_input_tokens_seen": 165783795, - "step": 7770 - }, - { - "epoch": 0.9344075031563759, - "grad_norm": 1.8226344806387074, - "learning_rate": 4.488952210462771e-08, - "loss": 0.7541, - "num_input_tokens_seen": 165803720, - "step": 7771 - }, - { - "epoch": 0.9345277460470149, - "grad_norm": 2.109034799002808, - "learning_rate": 4.4725557225127495e-08, - "loss": 0.8507, - "num_input_tokens_seen": 165821780, - "step": 7772 - }, - { - "epoch": 0.9346479889376541, - "grad_norm": 1.6380201568195163, - "learning_rate": 4.456188895920565e-08, - "loss": 0.7895, - "num_input_tokens_seen": 165843255, - "step": 7773 - }, - { - "epoch": 0.9347682318282932, - "grad_norm": 1.9813355877921004, - "learning_rate": 4.439851733169031e-08, - "loss": 0.8417, - "num_input_tokens_seen": 165860765, - "step": 7774 - }, - { - "epoch": 0.9348884747189322, - "grad_norm": 2.75155907317542, - "learning_rate": 4.4235442367365204e-08, - "loss": 0.6947, - "num_input_tokens_seen": 165880795, - "step": 7775 - }, - { - "epoch": 0.9350087176095714, - "grad_norm": 2.182436864339903, - "learning_rate": 4.4072664090968327e-08, - "loss": 0.7823, - "num_input_tokens_seen": 165898900, - "step": 7776 - }, - { - "epoch": 0.9351289605002104, - "grad_norm": 2.1022582666502005, - "learning_rate": 4.391018252719347e-08, - "loss": 0.838, - "num_input_tokens_seen": 165918415, - "step": 7777 - }, - { - "epoch": 0.9352492033908495, - "grad_norm": 2.1710050756791532, - "learning_rate": 4.3747997700688264e-08, - "loss": 0.685, - "num_input_tokens_seen": 165934810, - "step": 7778 - }, - { - "epoch": 0.9353694462814887, - "grad_norm": 3.0018462457921795, - "learning_rate": 4.358610963605658e-08, - "loss": 0.7398, - "num_input_tokens_seen": 165954980, - "step": 7779 - }, - { - "epoch": 0.9354896891721277, - "grad_norm": 2.4629164433005224, - "learning_rate": 4.342451835785677e-08, - "loss": 0.679, - "num_input_tokens_seen": 165975610, - "step": 7780 - }, - { - "epoch": 0.9356099320627668, - "grad_norm": 2.1366441601098596, - "learning_rate": 4.3263223890601665e-08, - "loss": 0.7443, - "num_input_tokens_seen": 165994040, - "step": 7781 - }, - { - "epoch": 0.9357301749534058, - "grad_norm": 1.7920647430720333, - "learning_rate": 4.31022262587597e-08, - "loss": 0.7955, - "num_input_tokens_seen": 166012435, - "step": 7782 - }, - { - "epoch": 0.935850417844045, - "grad_norm": 1.8015132620110743, - "learning_rate": 4.2941525486754225e-08, - "loss": 0.6537, - "num_input_tokens_seen": 166032475, - "step": 7783 - }, - { - "epoch": 0.935970660734684, - "grad_norm": 2.5462421615259325, - "learning_rate": 4.2781121598963076e-08, - "loss": 0.7851, - "num_input_tokens_seen": 166050035, - "step": 7784 - }, - { - "epoch": 0.9360909036253231, - "grad_norm": 13.549006132997718, - "learning_rate": 4.2621014619719896e-08, - "loss": 0.6718, - "num_input_tokens_seen": 166067520, - "step": 7785 - }, - { - "epoch": 0.9362111465159623, - "grad_norm": 0.7700988321416368, - "learning_rate": 4.246120457331215e-08, - "loss": 0.6298, - "num_input_tokens_seen": 166129415, - "step": 7786 - }, - { - "epoch": 0.9363313894066013, - "grad_norm": 2.145353666808575, - "learning_rate": 4.2301691483983325e-08, - "loss": 0.7176, - "num_input_tokens_seen": 166149255, - "step": 7787 - }, - { - "epoch": 0.9364516322972404, - "grad_norm": 5.790223092642301, - "learning_rate": 4.214247537593163e-08, - "loss": 0.75, - "num_input_tokens_seen": 166168225, - "step": 7788 - }, - { - "epoch": 0.9365718751878795, - "grad_norm": 2.111044250953407, - "learning_rate": 4.198355627330952e-08, - "loss": 0.7964, - "num_input_tokens_seen": 166186695, - "step": 7789 - }, - { - "epoch": 0.9366921180785186, - "grad_norm": 3.2707046007380423, - "learning_rate": 4.1824934200225034e-08, - "loss": 0.6911, - "num_input_tokens_seen": 166202085, - "step": 7790 - }, - { - "epoch": 0.9368123609691577, - "grad_norm": 1.9799627666173143, - "learning_rate": 4.166660918074139e-08, - "loss": 0.7791, - "num_input_tokens_seen": 166221710, - "step": 7791 - }, - { - "epoch": 0.9369326038597968, - "grad_norm": 1.606768299346578, - "learning_rate": 4.15085812388758e-08, - "loss": 0.7276, - "num_input_tokens_seen": 166243650, - "step": 7792 - }, - { - "epoch": 0.9370528467504359, - "grad_norm": 1.7967776462907175, - "learning_rate": 4.135085039860153e-08, - "loss": 0.7818, - "num_input_tokens_seen": 166262770, - "step": 7793 - }, - { - "epoch": 0.9371730896410749, - "grad_norm": 2.8206937712409825, - "learning_rate": 4.119341668384568e-08, - "loss": 0.787, - "num_input_tokens_seen": 166281420, - "step": 7794 - }, - { - "epoch": 0.9372933325317141, - "grad_norm": 2.5563290745335956, - "learning_rate": 4.103628011849136e-08, - "loss": 0.8232, - "num_input_tokens_seen": 166296500, - "step": 7795 - }, - { - "epoch": 0.9374135754223532, - "grad_norm": 2.056509023377302, - "learning_rate": 4.0879440726375506e-08, - "loss": 0.749, - "num_input_tokens_seen": 166314005, - "step": 7796 - }, - { - "epoch": 0.9375338183129922, - "grad_norm": 3.0503071618314284, - "learning_rate": 4.0722898531291074e-08, - "loss": 0.5528, - "num_input_tokens_seen": 166330965, - "step": 7797 - }, - { - "epoch": 0.9376540612036314, - "grad_norm": 2.241635628607921, - "learning_rate": 4.056665355698508e-08, - "loss": 0.7577, - "num_input_tokens_seen": 166351230, - "step": 7798 - }, - { - "epoch": 0.9377743040942704, - "grad_norm": 6.259066509423454, - "learning_rate": 4.0410705827159886e-08, - "loss": 0.8079, - "num_input_tokens_seen": 166368245, - "step": 7799 - }, - { - "epoch": 0.9378945469849095, - "grad_norm": 2.5048287502838043, - "learning_rate": 4.0255055365472356e-08, - "loss": 0.7063, - "num_input_tokens_seen": 166386060, - "step": 7800 - }, - { - "epoch": 0.9380147898755486, - "grad_norm": 2.3440089017728196, - "learning_rate": 4.0099702195534935e-08, - "loss": 0.7327, - "num_input_tokens_seen": 166402730, - "step": 7801 - }, - { - "epoch": 0.9381350327661877, - "grad_norm": 3.328941824586175, - "learning_rate": 3.99446463409141e-08, - "loss": 0.7586, - "num_input_tokens_seen": 166420305, - "step": 7802 - }, - { - "epoch": 0.9382552756568268, - "grad_norm": 3.0479777481105814, - "learning_rate": 3.978988782513215e-08, - "loss": 0.6842, - "num_input_tokens_seen": 166437520, - "step": 7803 - }, - { - "epoch": 0.9383755185474659, - "grad_norm": 2.086627613233314, - "learning_rate": 3.963542667166586e-08, - "loss": 0.7552, - "num_input_tokens_seen": 166457345, - "step": 7804 - }, - { - "epoch": 0.938495761438105, - "grad_norm": 2.2312363176614536, - "learning_rate": 3.9481262903946486e-08, - "loss": 0.6846, - "num_input_tokens_seen": 166476510, - "step": 7805 - }, - { - "epoch": 0.938616004328744, - "grad_norm": 0.8010731541405713, - "learning_rate": 3.932739654536066e-08, - "loss": 0.5702, - "num_input_tokens_seen": 166538930, - "step": 7806 - }, - { - "epoch": 0.9387362472193832, - "grad_norm": 2.2624080324645086, - "learning_rate": 3.917382761925014e-08, - "loss": 0.7303, - "num_input_tokens_seen": 166554485, - "step": 7807 - }, - { - "epoch": 0.9388564901100223, - "grad_norm": 1.798461760517054, - "learning_rate": 3.9020556148910754e-08, - "loss": 0.782, - "num_input_tokens_seen": 166573560, - "step": 7808 - }, - { - "epoch": 0.9389767330006613, - "grad_norm": 0.7378359564794622, - "learning_rate": 3.8867582157593895e-08, - "loss": 0.589, - "num_input_tokens_seen": 166627485, - "step": 7809 - }, - { - "epoch": 0.9390969758913005, - "grad_norm": 2.026459683623381, - "learning_rate": 3.871490566850544e-08, - "loss": 0.7587, - "num_input_tokens_seen": 166651415, - "step": 7810 - }, - { - "epoch": 0.9392172187819395, - "grad_norm": 1.8556379036847632, - "learning_rate": 3.856252670480642e-08, - "loss": 0.6996, - "num_input_tokens_seen": 166669795, - "step": 7811 - }, - { - "epoch": 0.9393374616725786, - "grad_norm": 6.922869819428638, - "learning_rate": 3.841044528961279e-08, - "loss": 0.8079, - "num_input_tokens_seen": 166687310, - "step": 7812 - }, - { - "epoch": 0.9394577045632178, - "grad_norm": 2.5436438358092097, - "learning_rate": 3.825866144599499e-08, - "loss": 0.7846, - "num_input_tokens_seen": 166706085, - "step": 7813 - }, - { - "epoch": 0.9395779474538568, - "grad_norm": 2.460778425754977, - "learning_rate": 3.8107175196978145e-08, - "loss": 0.7486, - "num_input_tokens_seen": 166722110, - "step": 7814 - }, - { - "epoch": 0.9396981903444959, - "grad_norm": 2.311435586361119, - "learning_rate": 3.7955986565542996e-08, - "loss": 0.7607, - "num_input_tokens_seen": 166739910, - "step": 7815 - }, - { - "epoch": 0.9398184332351349, - "grad_norm": 2.3373259681092944, - "learning_rate": 3.780509557462497e-08, - "loss": 0.6843, - "num_input_tokens_seen": 166759830, - "step": 7816 - }, - { - "epoch": 0.9399386761257741, - "grad_norm": 8.600736525993172, - "learning_rate": 3.765450224711375e-08, - "loss": 0.7501, - "num_input_tokens_seen": 166780055, - "step": 7817 - }, - { - "epoch": 0.9400589190164131, - "grad_norm": 1.7273285346485665, - "learning_rate": 3.750420660585396e-08, - "loss": 0.7868, - "num_input_tokens_seen": 166801715, - "step": 7818 - }, - { - "epoch": 0.9401791619070522, - "grad_norm": 3.6524712755973527, - "learning_rate": 3.735420867364603e-08, - "loss": 0.7935, - "num_input_tokens_seen": 166822415, - "step": 7819 - }, - { - "epoch": 0.9402994047976914, - "grad_norm": 2.1609752360013883, - "learning_rate": 3.720450847324397e-08, - "loss": 0.6153, - "num_input_tokens_seen": 166845760, - "step": 7820 - }, - { - "epoch": 0.9404196476883304, - "grad_norm": 2.3865701095750778, - "learning_rate": 3.7055106027357395e-08, - "loss": 0.6867, - "num_input_tokens_seen": 166865345, - "step": 7821 - }, - { - "epoch": 0.9405398905789695, - "grad_norm": 2.893641042497842, - "learning_rate": 3.690600135865041e-08, - "loss": 0.7156, - "num_input_tokens_seen": 166881990, - "step": 7822 - }, - { - "epoch": 0.9406601334696086, - "grad_norm": 0.7907660928297727, - "learning_rate": 3.675719448974246e-08, - "loss": 0.6011, - "num_input_tokens_seen": 166946800, - "step": 7823 - }, - { - "epoch": 0.9407803763602477, - "grad_norm": 2.3144180039820137, - "learning_rate": 3.6608685443207054e-08, - "loss": 0.5961, - "num_input_tokens_seen": 166965670, - "step": 7824 - }, - { - "epoch": 0.9409006192508867, - "grad_norm": 2.3973680170974956, - "learning_rate": 3.646047424157306e-08, - "loss": 0.6692, - "num_input_tokens_seen": 166982365, - "step": 7825 - }, - { - "epoch": 0.9410208621415259, - "grad_norm": 3.0587815088750583, - "learning_rate": 3.631256090732382e-08, - "loss": 0.6694, - "num_input_tokens_seen": 167002545, - "step": 7826 - }, - { - "epoch": 0.941141105032165, - "grad_norm": 2.3181352039744834, - "learning_rate": 3.6164945462897833e-08, - "loss": 0.8168, - "num_input_tokens_seen": 167021555, - "step": 7827 - }, - { - "epoch": 0.941261347922804, - "grad_norm": 1.9469453572826758, - "learning_rate": 3.6017627930688074e-08, - "loss": 0.7461, - "num_input_tokens_seen": 167041100, - "step": 7828 - }, - { - "epoch": 0.9413815908134432, - "grad_norm": 2.4890781555091306, - "learning_rate": 3.587060833304267e-08, - "loss": 0.7656, - "num_input_tokens_seen": 167059010, - "step": 7829 - }, - { - "epoch": 0.9415018337040822, - "grad_norm": 2.2299327797370063, - "learning_rate": 3.5723886692264225e-08, - "loss": 0.6369, - "num_input_tokens_seen": 167076270, - "step": 7830 - }, - { - "epoch": 0.9416220765947213, - "grad_norm": 2.354287789934089, - "learning_rate": 3.557746303061071e-08, - "loss": 0.6112, - "num_input_tokens_seen": 167097745, - "step": 7831 - }, - { - "epoch": 0.9417423194853605, - "grad_norm": 2.2445783405849786, - "learning_rate": 3.543133737029391e-08, - "loss": 0.716, - "num_input_tokens_seen": 167117975, - "step": 7832 - }, - { - "epoch": 0.9418625623759995, - "grad_norm": 1.964919552161831, - "learning_rate": 3.5285509733481214e-08, - "loss": 0.676, - "num_input_tokens_seen": 167137420, - "step": 7833 - }, - { - "epoch": 0.9419828052666386, - "grad_norm": 2.0875254969405788, - "learning_rate": 3.513998014229447e-08, - "loss": 0.7632, - "num_input_tokens_seen": 167156090, - "step": 7834 - }, - { - "epoch": 0.9421030481572777, - "grad_norm": 4.967629022609805, - "learning_rate": 3.499474861881069e-08, - "loss": 0.8554, - "num_input_tokens_seen": 167173035, - "step": 7835 - }, - { - "epoch": 0.9422232910479168, - "grad_norm": 2.4773409967926403, - "learning_rate": 3.4849815185061136e-08, - "loss": 0.677, - "num_input_tokens_seen": 167192645, - "step": 7836 - }, - { - "epoch": 0.9423435339385559, - "grad_norm": 2.378971646462686, - "learning_rate": 3.470517986303223e-08, - "loss": 0.7582, - "num_input_tokens_seen": 167211350, - "step": 7837 - }, - { - "epoch": 0.942463776829195, - "grad_norm": 1.8114841893128897, - "learning_rate": 3.4560842674664856e-08, - "loss": 0.7873, - "num_input_tokens_seen": 167229585, - "step": 7838 - }, - { - "epoch": 0.9425840197198341, - "grad_norm": 6.504006656085906, - "learning_rate": 3.441680364185484e-08, - "loss": 0.7485, - "num_input_tokens_seen": 167249175, - "step": 7839 - }, - { - "epoch": 0.9427042626104731, - "grad_norm": 3.1991518237384624, - "learning_rate": 3.427306278645314e-08, - "loss": 0.7424, - "num_input_tokens_seen": 167267350, - "step": 7840 - }, - { - "epoch": 0.9428245055011123, - "grad_norm": 2.005488035228287, - "learning_rate": 3.4129620130264767e-08, - "loss": 0.7217, - "num_input_tokens_seen": 167285430, - "step": 7841 - }, - { - "epoch": 0.9429447483917514, - "grad_norm": 2.810382928468579, - "learning_rate": 3.398647569505009e-08, - "loss": 0.7835, - "num_input_tokens_seen": 167302575, - "step": 7842 - }, - { - "epoch": 0.9430649912823904, - "grad_norm": 3.051985669490127, - "learning_rate": 3.384362950252373e-08, - "loss": 0.7376, - "num_input_tokens_seen": 167319265, - "step": 7843 - }, - { - "epoch": 0.9431852341730296, - "grad_norm": 2.718053253896293, - "learning_rate": 3.3701081574355473e-08, - "loss": 0.5579, - "num_input_tokens_seen": 167340945, - "step": 7844 - }, - { - "epoch": 0.9433054770636686, - "grad_norm": 0.685433690681999, - "learning_rate": 3.355883193217002e-08, - "loss": 0.536, - "num_input_tokens_seen": 167409335, - "step": 7845 - }, - { - "epoch": 0.9434257199543077, - "grad_norm": 1.9728851464632355, - "learning_rate": 3.341688059754588e-08, - "loss": 0.8762, - "num_input_tokens_seen": 167424710, - "step": 7846 - }, - { - "epoch": 0.9435459628449467, - "grad_norm": 2.602949673862356, - "learning_rate": 3.327522759201762e-08, - "loss": 0.7741, - "num_input_tokens_seen": 167444300, - "step": 7847 - }, - { - "epoch": 0.9436662057355859, - "grad_norm": 4.027816666404336, - "learning_rate": 3.313387293707359e-08, - "loss": 0.66, - "num_input_tokens_seen": 167462725, - "step": 7848 - }, - { - "epoch": 0.943786448626225, - "grad_norm": 2.8430588210180856, - "learning_rate": 3.29928166541571e-08, - "loss": 0.677, - "num_input_tokens_seen": 167481400, - "step": 7849 - }, - { - "epoch": 0.943906691516864, - "grad_norm": 2.330422352990951, - "learning_rate": 3.2852058764666346e-08, - "loss": 0.7952, - "num_input_tokens_seen": 167500220, - "step": 7850 - }, - { - "epoch": 0.9440269344075032, - "grad_norm": 2.373335213428048, - "learning_rate": 3.2711599289954264e-08, - "loss": 0.685, - "num_input_tokens_seen": 167523975, - "step": 7851 - }, - { - "epoch": 0.9441471772981422, - "grad_norm": 2.001050152499835, - "learning_rate": 3.257143825132847e-08, - "loss": 0.7717, - "num_input_tokens_seen": 167541865, - "step": 7852 - }, - { - "epoch": 0.9442674201887813, - "grad_norm": 1.8185334276795595, - "learning_rate": 3.243157567005106e-08, - "loss": 0.7503, - "num_input_tokens_seen": 167559765, - "step": 7853 - }, - { - "epoch": 0.9443876630794205, - "grad_norm": 2.3825594706741646, - "learning_rate": 3.2292011567339296e-08, - "loss": 0.6356, - "num_input_tokens_seen": 167577290, - "step": 7854 - }, - { - "epoch": 0.9445079059700595, - "grad_norm": 3.7214351567185737, - "learning_rate": 3.21527459643649e-08, - "loss": 0.5607, - "num_input_tokens_seen": 167593895, - "step": 7855 - }, - { - "epoch": 0.9446281488606986, - "grad_norm": 4.870754460671157, - "learning_rate": 3.2013778882254536e-08, - "loss": 0.7355, - "num_input_tokens_seen": 167612410, - "step": 7856 - }, - { - "epoch": 0.9447483917513377, - "grad_norm": 2.0263256189947194, - "learning_rate": 3.1875110342088895e-08, - "loss": 0.7541, - "num_input_tokens_seen": 167633580, - "step": 7857 - }, - { - "epoch": 0.9448686346419768, - "grad_norm": 1.8548097932559486, - "learning_rate": 3.1736740364904035e-08, - "loss": 0.6521, - "num_input_tokens_seen": 167653830, - "step": 7858 - }, - { - "epoch": 0.9449888775326158, - "grad_norm": 2.861439742553642, - "learning_rate": 3.159866897169094e-08, - "loss": 0.761, - "num_input_tokens_seen": 167671750, - "step": 7859 - }, - { - "epoch": 0.945109120423255, - "grad_norm": 2.3923390008519343, - "learning_rate": 3.146089618339487e-08, - "loss": 0.7494, - "num_input_tokens_seen": 167688325, - "step": 7860 - }, - { - "epoch": 0.9452293633138941, - "grad_norm": 2.1232775061426215, - "learning_rate": 3.132342202091554e-08, - "loss": 0.6732, - "num_input_tokens_seen": 167708270, - "step": 7861 - }, - { - "epoch": 0.9453496062045331, - "grad_norm": 2.9503404982755304, - "learning_rate": 3.1186246505107595e-08, - "loss": 0.6729, - "num_input_tokens_seen": 167727130, - "step": 7862 - }, - { - "epoch": 0.9454698490951723, - "grad_norm": 1.788639537107173, - "learning_rate": 3.104936965678084e-08, - "loss": 0.8316, - "num_input_tokens_seen": 167745180, - "step": 7863 - }, - { - "epoch": 0.9455900919858113, - "grad_norm": 2.3924124210275846, - "learning_rate": 3.091279149669934e-08, - "loss": 0.8039, - "num_input_tokens_seen": 167763690, - "step": 7864 - }, - { - "epoch": 0.9457103348764504, - "grad_norm": 2.0877745496750193, - "learning_rate": 3.0776512045581624e-08, - "loss": 0.7323, - "num_input_tokens_seen": 167782200, - "step": 7865 - }, - { - "epoch": 0.9458305777670896, - "grad_norm": 2.892406012889742, - "learning_rate": 3.064053132410116e-08, - "loss": 0.7642, - "num_input_tokens_seen": 167799685, - "step": 7866 - }, - { - "epoch": 0.9459508206577286, - "grad_norm": 1.8794005960632765, - "learning_rate": 3.0504849352886554e-08, - "loss": 0.7503, - "num_input_tokens_seen": 167817550, - "step": 7867 - }, - { - "epoch": 0.9460710635483677, - "grad_norm": 2.5103010199407363, - "learning_rate": 3.036946615252023e-08, - "loss": 0.7067, - "num_input_tokens_seen": 167832800, - "step": 7868 - }, - { - "epoch": 0.9461913064390068, - "grad_norm": 2.1213907429428223, - "learning_rate": 3.0234381743539984e-08, - "loss": 0.6585, - "num_input_tokens_seen": 167850135, - "step": 7869 - }, - { - "epoch": 0.9463115493296459, - "grad_norm": 2.2415656602949827, - "learning_rate": 3.0099596146437863e-08, - "loss": 0.7976, - "num_input_tokens_seen": 167866960, - "step": 7870 - }, - { - "epoch": 0.946431792220285, - "grad_norm": 0.9844997154264823, - "learning_rate": 2.996510938166086e-08, - "loss": 0.6279, - "num_input_tokens_seen": 167929655, - "step": 7871 - }, - { - "epoch": 0.9465520351109241, - "grad_norm": 1.9779463855153512, - "learning_rate": 2.9830921469610196e-08, - "loss": 0.726, - "num_input_tokens_seen": 167946720, - "step": 7872 - }, - { - "epoch": 0.9466722780015632, - "grad_norm": 2.2817562370736724, - "learning_rate": 2.9697032430642256e-08, - "loss": 0.7932, - "num_input_tokens_seen": 167964655, - "step": 7873 - }, - { - "epoch": 0.9467925208922022, - "grad_norm": 2.4811427145918126, - "learning_rate": 2.9563442285067906e-08, - "loss": 0.7329, - "num_input_tokens_seen": 167981420, - "step": 7874 - }, - { - "epoch": 0.9469127637828414, - "grad_norm": 3.238490785175216, - "learning_rate": 2.943015105315294e-08, - "loss": 0.7958, - "num_input_tokens_seen": 168001335, - "step": 7875 - }, - { - "epoch": 0.9470330066734804, - "grad_norm": 2.4895358851466507, - "learning_rate": 2.929715875511718e-08, - "loss": 0.6601, - "num_input_tokens_seen": 168020090, - "step": 7876 - }, - { - "epoch": 0.9471532495641195, - "grad_norm": 2.465899396047182, - "learning_rate": 2.9164465411135375e-08, - "loss": 0.6909, - "num_input_tokens_seen": 168039580, - "step": 7877 - }, - { - "epoch": 0.9472734924547586, - "grad_norm": 2.2963694950006883, - "learning_rate": 2.9032071041337204e-08, - "loss": 0.7963, - "num_input_tokens_seen": 168057535, - "step": 7878 - }, - { - "epoch": 0.9473937353453977, - "grad_norm": 1.9506390774048077, - "learning_rate": 2.889997566580704e-08, - "loss": 0.7248, - "num_input_tokens_seen": 168075410, - "step": 7879 - }, - { - "epoch": 0.9475139782360368, - "grad_norm": 1.874611849228241, - "learning_rate": 2.8768179304583086e-08, - "loss": 0.6946, - "num_input_tokens_seen": 168097185, - "step": 7880 - }, - { - "epoch": 0.9476342211266758, - "grad_norm": 1.8285824138980884, - "learning_rate": 2.8636681977659117e-08, - "loss": 0.733, - "num_input_tokens_seen": 168116555, - "step": 7881 - }, - { - "epoch": 0.947754464017315, - "grad_norm": 2.3750887309025224, - "learning_rate": 2.850548370498296e-08, - "loss": 0.7717, - "num_input_tokens_seen": 168134115, - "step": 7882 - }, - { - "epoch": 0.9478747069079541, - "grad_norm": 1.859595708140476, - "learning_rate": 2.8374584506457577e-08, - "loss": 0.7057, - "num_input_tokens_seen": 168155110, - "step": 7883 - }, - { - "epoch": 0.9479949497985931, - "grad_norm": 2.9710440866155983, - "learning_rate": 2.824398440193998e-08, - "loss": 0.667, - "num_input_tokens_seen": 168173630, - "step": 7884 - }, - { - "epoch": 0.9481151926892323, - "grad_norm": 3.345057298641073, - "learning_rate": 2.811368341124232e-08, - "loss": 0.7059, - "num_input_tokens_seen": 168192420, - "step": 7885 - }, - { - "epoch": 0.9482354355798713, - "grad_norm": 4.299865995872961, - "learning_rate": 2.7983681554131222e-08, - "loss": 0.6728, - "num_input_tokens_seen": 168212400, - "step": 7886 - }, - { - "epoch": 0.9483556784705104, - "grad_norm": 3.42910105571297, - "learning_rate": 2.7853978850327365e-08, - "loss": 0.692, - "num_input_tokens_seen": 168231290, - "step": 7887 - }, - { - "epoch": 0.9484759213611496, - "grad_norm": 1.9063330145742587, - "learning_rate": 2.7724575319507225e-08, - "loss": 0.8663, - "num_input_tokens_seen": 168250720, - "step": 7888 - }, - { - "epoch": 0.9485961642517886, - "grad_norm": 1.8641401795724934, - "learning_rate": 2.759547098130066e-08, - "loss": 0.7735, - "num_input_tokens_seen": 168269170, - "step": 7889 - }, - { - "epoch": 0.9487164071424277, - "grad_norm": 2.137271093436953, - "learning_rate": 2.746666585529267e-08, - "loss": 0.7615, - "num_input_tokens_seen": 168289165, - "step": 7890 - }, - { - "epoch": 0.9488366500330668, - "grad_norm": 2.2389786577764266, - "learning_rate": 2.73381599610234e-08, - "loss": 0.7319, - "num_input_tokens_seen": 168309285, - "step": 7891 - }, - { - "epoch": 0.9489568929237059, - "grad_norm": 2.0373743053437936, - "learning_rate": 2.7209953317987033e-08, - "loss": 0.7076, - "num_input_tokens_seen": 168330045, - "step": 7892 - }, - { - "epoch": 0.9490771358143449, - "grad_norm": 2.259652604021654, - "learning_rate": 2.7082045945631793e-08, - "loss": 0.7655, - "num_input_tokens_seen": 168351980, - "step": 7893 - }, - { - "epoch": 0.9491973787049841, - "grad_norm": 2.7159464477700173, - "learning_rate": 2.6954437863361712e-08, - "loss": 0.6915, - "num_input_tokens_seen": 168369615, - "step": 7894 - }, - { - "epoch": 0.9493176215956232, - "grad_norm": 7.62534997736172, - "learning_rate": 2.682712909053464e-08, - "loss": 0.7091, - "num_input_tokens_seen": 168389635, - "step": 7895 - }, - { - "epoch": 0.9494378644862622, - "grad_norm": 2.098902253118414, - "learning_rate": 2.670011964646335e-08, - "loss": 0.7761, - "num_input_tokens_seen": 168408035, - "step": 7896 - }, - { - "epoch": 0.9495581073769014, - "grad_norm": 2.420571720420862, - "learning_rate": 2.657340955041487e-08, - "loss": 0.6844, - "num_input_tokens_seen": 168426530, - "step": 7897 - }, - { - "epoch": 0.9496783502675404, - "grad_norm": 2.2621318880658197, - "learning_rate": 2.6446998821610945e-08, - "loss": 0.7103, - "num_input_tokens_seen": 168446445, - "step": 7898 - }, - { - "epoch": 0.9497985931581795, - "grad_norm": 3.5804442966929555, - "learning_rate": 2.632088747922845e-08, - "loss": 0.7121, - "num_input_tokens_seen": 168462765, - "step": 7899 - }, - { - "epoch": 0.9499188360488187, - "grad_norm": 3.055979145460257, - "learning_rate": 2.619507554239786e-08, - "loss": 0.7175, - "num_input_tokens_seen": 168481045, - "step": 7900 - }, - { - "epoch": 0.9500390789394577, - "grad_norm": 2.0833230995363703, - "learning_rate": 2.606956303020502e-08, - "loss": 0.6992, - "num_input_tokens_seen": 168501570, - "step": 7901 - }, - { - "epoch": 0.9501593218300968, - "grad_norm": 1.8408385206878377, - "learning_rate": 2.5944349961690036e-08, - "loss": 0.8311, - "num_input_tokens_seen": 168518310, - "step": 7902 - }, - { - "epoch": 0.9502795647207359, - "grad_norm": 1.7946286642215257, - "learning_rate": 2.581943635584749e-08, - "loss": 0.7257, - "num_input_tokens_seen": 168540860, - "step": 7903 - }, - { - "epoch": 0.950399807611375, - "grad_norm": 2.104113168371056, - "learning_rate": 2.569482223162689e-08, - "loss": 0.6541, - "num_input_tokens_seen": 168564555, - "step": 7904 - }, - { - "epoch": 0.950520050502014, - "grad_norm": 1.831970054004136, - "learning_rate": 2.5570507607932e-08, - "loss": 0.7189, - "num_input_tokens_seen": 168584190, - "step": 7905 - }, - { - "epoch": 0.9506402933926532, - "grad_norm": 3.36053132209125, - "learning_rate": 2.54464925036213e-08, - "loss": 0.6264, - "num_input_tokens_seen": 168601200, - "step": 7906 - }, - { - "epoch": 0.9507605362832923, - "grad_norm": 2.5850478358333913, - "learning_rate": 2.532277693750773e-08, - "loss": 0.597, - "num_input_tokens_seen": 168621845, - "step": 7907 - }, - { - "epoch": 0.9508807791739313, - "grad_norm": 4.271286290063512, - "learning_rate": 2.5199360928358726e-08, - "loss": 0.7499, - "num_input_tokens_seen": 168638800, - "step": 7908 - }, - { - "epoch": 0.9510010220645704, - "grad_norm": 2.051570321444406, - "learning_rate": 2.507624449489665e-08, - "loss": 0.862, - "num_input_tokens_seen": 168657150, - "step": 7909 - }, - { - "epoch": 0.9511212649552095, - "grad_norm": 4.908450471661576, - "learning_rate": 2.4953427655797888e-08, - "loss": 0.6457, - "num_input_tokens_seen": 168675530, - "step": 7910 - }, - { - "epoch": 0.9512415078458486, - "grad_norm": 1.845008486164713, - "learning_rate": 2.4830910429693984e-08, - "loss": 0.7082, - "num_input_tokens_seen": 168695210, - "step": 7911 - }, - { - "epoch": 0.9513617507364877, - "grad_norm": 2.2125042004841315, - "learning_rate": 2.470869283517052e-08, - "loss": 0.7845, - "num_input_tokens_seen": 168712965, - "step": 7912 - }, - { - "epoch": 0.9514819936271268, - "grad_norm": 1.8260647504810248, - "learning_rate": 2.458677489076777e-08, - "loss": 0.7666, - "num_input_tokens_seen": 168733695, - "step": 7913 - }, - { - "epoch": 0.9516022365177659, - "grad_norm": 1.8879815643061244, - "learning_rate": 2.446515661498072e-08, - "loss": 0.8238, - "num_input_tokens_seen": 168752745, - "step": 7914 - }, - { - "epoch": 0.9517224794084049, - "grad_norm": 5.020952357132159, - "learning_rate": 2.434383802625861e-08, - "loss": 0.735, - "num_input_tokens_seen": 168771420, - "step": 7915 - }, - { - "epoch": 0.9518427222990441, - "grad_norm": 2.288130826957981, - "learning_rate": 2.422281914300539e-08, - "loss": 0.7349, - "num_input_tokens_seen": 168790735, - "step": 7916 - }, - { - "epoch": 0.9519629651896832, - "grad_norm": 2.1950592436228598, - "learning_rate": 2.4102099983579706e-08, - "loss": 0.8019, - "num_input_tokens_seen": 168809605, - "step": 7917 - }, - { - "epoch": 0.9520832080803222, - "grad_norm": 1.8318396159718264, - "learning_rate": 2.3981680566294236e-08, - "loss": 0.7549, - "num_input_tokens_seen": 168828925, - "step": 7918 - }, - { - "epoch": 0.9522034509709614, - "grad_norm": 2.0990833969288776, - "learning_rate": 2.3861560909416822e-08, - "loss": 0.7312, - "num_input_tokens_seen": 168848195, - "step": 7919 - }, - { - "epoch": 0.9523236938616004, - "grad_norm": 2.3775611187163412, - "learning_rate": 2.3741741031169325e-08, - "loss": 0.8179, - "num_input_tokens_seen": 168867485, - "step": 7920 - }, - { - "epoch": 0.9524439367522395, - "grad_norm": 2.161232264195119, - "learning_rate": 2.3622220949728544e-08, - "loss": 0.7035, - "num_input_tokens_seen": 168886090, - "step": 7921 - }, - { - "epoch": 0.9525641796428787, - "grad_norm": 3.072965574445768, - "learning_rate": 2.3503000683225304e-08, - "loss": 0.6154, - "num_input_tokens_seen": 168903525, - "step": 7922 - }, - { - "epoch": 0.9526844225335177, - "grad_norm": 2.4445056856672696, - "learning_rate": 2.3384080249745585e-08, - "loss": 0.8349, - "num_input_tokens_seen": 168921135, - "step": 7923 - }, - { - "epoch": 0.9528046654241568, - "grad_norm": 2.8082466760971396, - "learning_rate": 2.3265459667329178e-08, - "loss": 0.8351, - "num_input_tokens_seen": 168940345, - "step": 7924 - }, - { - "epoch": 0.9529249083147959, - "grad_norm": 3.2794835538733422, - "learning_rate": 2.31471389539708e-08, - "loss": 0.8589, - "num_input_tokens_seen": 168957190, - "step": 7925 - }, - { - "epoch": 0.953045151205435, - "grad_norm": 2.3487699881912922, - "learning_rate": 2.302911812761965e-08, - "loss": 0.7259, - "num_input_tokens_seen": 168976625, - "step": 7926 - }, - { - "epoch": 0.953165394096074, - "grad_norm": 2.244578565167704, - "learning_rate": 2.2911397206179628e-08, - "loss": 0.8589, - "num_input_tokens_seen": 168993095, - "step": 7927 - }, - { - "epoch": 0.9532856369867132, - "grad_norm": 5.287023861445611, - "learning_rate": 2.279397620750845e-08, - "loss": 0.6197, - "num_input_tokens_seen": 169011860, - "step": 7928 - }, - { - "epoch": 0.9534058798773523, - "grad_norm": 3.2592722599664192, - "learning_rate": 2.2676855149419195e-08, - "loss": 0.7823, - "num_input_tokens_seen": 169028750, - "step": 7929 - }, - { - "epoch": 0.9535261227679913, - "grad_norm": 2.4184075683096546, - "learning_rate": 2.2560034049678766e-08, - "loss": 0.7406, - "num_input_tokens_seen": 169042820, - "step": 7930 - }, - { - "epoch": 0.9536463656586305, - "grad_norm": 2.767569650434561, - "learning_rate": 2.2443512926008988e-08, - "loss": 0.7494, - "num_input_tokens_seen": 169061870, - "step": 7931 - }, - { - "epoch": 0.9537666085492695, - "grad_norm": 2.571659299413914, - "learning_rate": 2.2327291796085946e-08, - "loss": 0.7, - "num_input_tokens_seen": 169079950, - "step": 7932 - }, - { - "epoch": 0.9538868514399086, - "grad_norm": 4.5221436510617705, - "learning_rate": 2.221137067754042e-08, - "loss": 0.7643, - "num_input_tokens_seen": 169096195, - "step": 7933 - }, - { - "epoch": 0.9540070943305478, - "grad_norm": 3.122665925840265, - "learning_rate": 2.2095749587957012e-08, - "loss": 0.7753, - "num_input_tokens_seen": 169113820, - "step": 7934 - }, - { - "epoch": 0.9541273372211868, - "grad_norm": 2.371007277750419, - "learning_rate": 2.1980428544876138e-08, - "loss": 0.6927, - "num_input_tokens_seen": 169132180, - "step": 7935 - }, - { - "epoch": 0.9542475801118259, - "grad_norm": 1.7739209527619382, - "learning_rate": 2.1865407565791584e-08, - "loss": 0.7308, - "num_input_tokens_seen": 169153470, - "step": 7936 - }, - { - "epoch": 0.954367823002465, - "grad_norm": 2.2624711270871396, - "learning_rate": 2.175068666815183e-08, - "loss": 0.7673, - "num_input_tokens_seen": 169174030, - "step": 7937 - }, - { - "epoch": 0.9544880658931041, - "grad_norm": 2.5346677480768722, - "learning_rate": 2.163626586935985e-08, - "loss": 0.7883, - "num_input_tokens_seen": 169190290, - "step": 7938 - }, - { - "epoch": 0.9546083087837431, - "grad_norm": 2.474563057907463, - "learning_rate": 2.1522145186773533e-08, - "loss": 0.6266, - "num_input_tokens_seen": 169208930, - "step": 7939 - }, - { - "epoch": 0.9547285516743822, - "grad_norm": 2.131475109895167, - "learning_rate": 2.140832463770481e-08, - "loss": 0.8506, - "num_input_tokens_seen": 169227845, - "step": 7940 - }, - { - "epoch": 0.9548487945650214, - "grad_norm": 8.565388463467404, - "learning_rate": 2.129480423941987e-08, - "loss": 0.7501, - "num_input_tokens_seen": 169244235, - "step": 7941 - }, - { - "epoch": 0.9549690374556604, - "grad_norm": 2.497887772340388, - "learning_rate": 2.1181584009140052e-08, - "loss": 0.8, - "num_input_tokens_seen": 169263495, - "step": 7942 - }, - { - "epoch": 0.9550892803462995, - "grad_norm": 2.6260386267379134, - "learning_rate": 2.10686639640405e-08, - "loss": 0.8307, - "num_input_tokens_seen": 169277305, - "step": 7943 - }, - { - "epoch": 0.9552095232369386, - "grad_norm": 2.307565353533796, - "learning_rate": 2.0956044121251294e-08, - "loss": 0.8082, - "num_input_tokens_seen": 169295810, - "step": 7944 - }, - { - "epoch": 0.9553297661275777, - "grad_norm": 2.353113366858493, - "learning_rate": 2.084372449785654e-08, - "loss": 0.8096, - "num_input_tokens_seen": 169315365, - "step": 7945 - }, - { - "epoch": 0.9554500090182168, - "grad_norm": 1.8942030976690745, - "learning_rate": 2.0731705110895282e-08, - "loss": 0.6725, - "num_input_tokens_seen": 169332575, - "step": 7946 - }, - { - "epoch": 0.9555702519088559, - "grad_norm": 2.0011680526915065, - "learning_rate": 2.0619985977360587e-08, - "loss": 0.8651, - "num_input_tokens_seen": 169350615, - "step": 7947 - }, - { - "epoch": 0.955690494799495, - "grad_norm": 2.0123845546260317, - "learning_rate": 2.0508567114200237e-08, - "loss": 0.7664, - "num_input_tokens_seen": 169370250, - "step": 7948 - }, - { - "epoch": 0.955810737690134, - "grad_norm": 2.8528708311849935, - "learning_rate": 2.0397448538316485e-08, - "loss": 0.7735, - "num_input_tokens_seen": 169391010, - "step": 7949 - }, - { - "epoch": 0.9559309805807732, - "grad_norm": 2.1818762262611218, - "learning_rate": 2.028663026656563e-08, - "loss": 0.6527, - "num_input_tokens_seen": 169409585, - "step": 7950 - }, - { - "epoch": 0.9560512234714122, - "grad_norm": 2.187762253747632, - "learning_rate": 2.0176112315758885e-08, - "loss": 0.7079, - "num_input_tokens_seen": 169427095, - "step": 7951 - }, - { - "epoch": 0.9561714663620513, - "grad_norm": 3.565405894107871, - "learning_rate": 2.0065894702661957e-08, - "loss": 0.6929, - "num_input_tokens_seen": 169443490, - "step": 7952 - }, - { - "epoch": 0.9562917092526905, - "grad_norm": 2.251478513492884, - "learning_rate": 1.9955977443994577e-08, - "loss": 0.7781, - "num_input_tokens_seen": 169463200, - "step": 7953 - }, - { - "epoch": 0.9564119521433295, - "grad_norm": 2.674929553048704, - "learning_rate": 1.9846360556430965e-08, - "loss": 0.6315, - "num_input_tokens_seen": 169481220, - "step": 7954 - }, - { - "epoch": 0.9565321950339686, - "grad_norm": 2.6161275933809938, - "learning_rate": 1.973704405660004e-08, - "loss": 0.614, - "num_input_tokens_seen": 169502055, - "step": 7955 - }, - { - "epoch": 0.9566524379246077, - "grad_norm": 1.8275144177077192, - "learning_rate": 1.9628027961085203e-08, - "loss": 0.7686, - "num_input_tokens_seen": 169525005, - "step": 7956 - }, - { - "epoch": 0.9567726808152468, - "grad_norm": 1.971944240910578, - "learning_rate": 1.9519312286423894e-08, - "loss": 0.8267, - "num_input_tokens_seen": 169547920, - "step": 7957 - }, - { - "epoch": 0.9568929237058859, - "grad_norm": 4.072879737707969, - "learning_rate": 1.9410897049108255e-08, - "loss": 0.7689, - "num_input_tokens_seen": 169566920, - "step": 7958 - }, - { - "epoch": 0.957013166596525, - "grad_norm": 2.1094889135384163, - "learning_rate": 1.9302782265584905e-08, - "loss": 0.9061, - "num_input_tokens_seen": 169587305, - "step": 7959 - }, - { - "epoch": 0.9571334094871641, - "grad_norm": 2.391583435135503, - "learning_rate": 1.9194967952254504e-08, - "loss": 0.8609, - "num_input_tokens_seen": 169605600, - "step": 7960 - }, - { - "epoch": 0.9572536523778031, - "grad_norm": 2.4597325502862613, - "learning_rate": 1.9087454125472635e-08, - "loss": 0.7983, - "num_input_tokens_seen": 169619795, - "step": 7961 - }, - { - "epoch": 0.9573738952684423, - "grad_norm": 2.1077675693187388, - "learning_rate": 1.8980240801548696e-08, - "loss": 0.7799, - "num_input_tokens_seen": 169638705, - "step": 7962 - }, - { - "epoch": 0.9574941381590814, - "grad_norm": 1.8438501368236029, - "learning_rate": 1.8873327996747458e-08, - "loss": 0.7348, - "num_input_tokens_seen": 169656925, - "step": 7963 - }, - { - "epoch": 0.9576143810497204, - "grad_norm": 2.150654740561062, - "learning_rate": 1.8766715727287053e-08, - "loss": 0.6595, - "num_input_tokens_seen": 169678350, - "step": 7964 - }, - { - "epoch": 0.9577346239403596, - "grad_norm": 1.8844785279048308, - "learning_rate": 1.8660404009340546e-08, - "loss": 0.7811, - "num_input_tokens_seen": 169698520, - "step": 7965 - }, - { - "epoch": 0.9578548668309986, - "grad_norm": 0.9324936308729534, - "learning_rate": 1.8554392859035485e-08, - "loss": 0.664, - "num_input_tokens_seen": 169755990, - "step": 7966 - }, - { - "epoch": 0.9579751097216377, - "grad_norm": 1.919559263784753, - "learning_rate": 1.8448682292453444e-08, - "loss": 0.7821, - "num_input_tokens_seen": 169774785, - "step": 7967 - }, - { - "epoch": 0.9580953526122769, - "grad_norm": 2.4468980376831193, - "learning_rate": 1.8343272325631154e-08, - "loss": 0.6675, - "num_input_tokens_seen": 169793450, - "step": 7968 - }, - { - "epoch": 0.9582155955029159, - "grad_norm": 2.9481665710402356, - "learning_rate": 1.8238162974558492e-08, - "loss": 0.7684, - "num_input_tokens_seen": 169807100, - "step": 7969 - }, - { - "epoch": 0.958335838393555, - "grad_norm": 2.0919724748888733, - "learning_rate": 1.8133354255180922e-08, - "loss": 0.7409, - "num_input_tokens_seen": 169827135, - "step": 7970 - }, - { - "epoch": 0.958456081284194, - "grad_norm": 3.1962032399822204, - "learning_rate": 1.8028846183397727e-08, - "loss": 0.7532, - "num_input_tokens_seen": 169845660, - "step": 7971 - }, - { - "epoch": 0.9585763241748332, - "grad_norm": 5.430738491819635, - "learning_rate": 1.7924638775062894e-08, - "loss": 0.8062, - "num_input_tokens_seen": 169864500, - "step": 7972 - }, - { - "epoch": 0.9586965670654722, - "grad_norm": 1.9866314431317613, - "learning_rate": 1.7820732045984444e-08, - "loss": 0.8141, - "num_input_tokens_seen": 169884365, - "step": 7973 - }, - { - "epoch": 0.9588168099561113, - "grad_norm": 1.9135452002848408, - "learning_rate": 1.7717126011924655e-08, - "loss": 0.7381, - "num_input_tokens_seen": 169905670, - "step": 7974 - }, - { - "epoch": 0.9589370528467505, - "grad_norm": 3.0833277736629165, - "learning_rate": 1.7613820688600957e-08, - "loss": 0.7561, - "num_input_tokens_seen": 169921295, - "step": 7975 - }, - { - "epoch": 0.9590572957373895, - "grad_norm": 2.8037767955591897, - "learning_rate": 1.7510816091684588e-08, - "loss": 0.7831, - "num_input_tokens_seen": 169940940, - "step": 7976 - }, - { - "epoch": 0.9591775386280286, - "grad_norm": 3.1708717613445248, - "learning_rate": 1.7408112236801053e-08, - "loss": 0.7924, - "num_input_tokens_seen": 169957515, - "step": 7977 - }, - { - "epoch": 0.9592977815186677, - "grad_norm": 13.37495886578553, - "learning_rate": 1.7305709139530334e-08, - "loss": 0.7396, - "num_input_tokens_seen": 169976015, - "step": 7978 - }, - { - "epoch": 0.9594180244093068, - "grad_norm": 2.423448386526746, - "learning_rate": 1.7203606815407334e-08, - "loss": 0.7463, - "num_input_tokens_seen": 169990330, - "step": 7979 - }, - { - "epoch": 0.9595382672999458, - "grad_norm": 2.0307758473377198, - "learning_rate": 1.7101805279920557e-08, - "loss": 0.7916, - "num_input_tokens_seen": 170008210, - "step": 7980 - }, - { - "epoch": 0.959658510190585, - "grad_norm": 3.0735222189430376, - "learning_rate": 1.7000304548513643e-08, - "loss": 0.8102, - "num_input_tokens_seen": 170028035, - "step": 7981 - }, - { - "epoch": 0.9597787530812241, - "grad_norm": 2.580253512112275, - "learning_rate": 1.6899104636583394e-08, - "loss": 0.812, - "num_input_tokens_seen": 170045805, - "step": 7982 - }, - { - "epoch": 0.9598989959718631, - "grad_norm": 0.763674509277108, - "learning_rate": 1.6798205559482638e-08, - "loss": 0.6557, - "num_input_tokens_seen": 170107905, - "step": 7983 - }, - { - "epoch": 0.9600192388625023, - "grad_norm": 2.5518254776811484, - "learning_rate": 1.669760733251713e-08, - "loss": 0.7605, - "num_input_tokens_seen": 170126500, - "step": 7984 - }, - { - "epoch": 0.9601394817531413, - "grad_norm": 2.094349304433427, - "learning_rate": 1.659730997094755e-08, - "loss": 0.8205, - "num_input_tokens_seen": 170144710, - "step": 7985 - }, - { - "epoch": 0.9602597246437804, - "grad_norm": 1.8968075376209723, - "learning_rate": 1.6497313489989283e-08, - "loss": 0.6127, - "num_input_tokens_seen": 170164255, - "step": 7986 - }, - { - "epoch": 0.9603799675344196, - "grad_norm": 3.5113935915170638, - "learning_rate": 1.639761790481131e-08, - "loss": 0.6948, - "num_input_tokens_seen": 170184855, - "step": 7987 - }, - { - "epoch": 0.9605002104250586, - "grad_norm": 2.3703774336825965, - "learning_rate": 1.6298223230537754e-08, - "loss": 0.7852, - "num_input_tokens_seen": 170202375, - "step": 7988 - }, - { - "epoch": 0.9606204533156977, - "grad_norm": 2.4605232139940822, - "learning_rate": 1.6199129482246333e-08, - "loss": 0.6846, - "num_input_tokens_seen": 170223300, - "step": 7989 - }, - { - "epoch": 0.9607406962063368, - "grad_norm": 3.7221899342562614, - "learning_rate": 1.6100336674969682e-08, - "loss": 0.6013, - "num_input_tokens_seen": 170241860, - "step": 7990 - }, - { - "epoch": 0.9608609390969759, - "grad_norm": 2.5758386934124604, - "learning_rate": 1.600184482369449e-08, - "loss": 0.766, - "num_input_tokens_seen": 170261495, - "step": 7991 - }, - { - "epoch": 0.960981181987615, - "grad_norm": 2.376932590939063, - "learning_rate": 1.5903653943362126e-08, - "loss": 0.894, - "num_input_tokens_seen": 170280210, - "step": 7992 - }, - { - "epoch": 0.9611014248782541, - "grad_norm": 2.7639449677992003, - "learning_rate": 1.580576404886802e-08, - "loss": 0.7566, - "num_input_tokens_seen": 170298460, - "step": 7993 - }, - { - "epoch": 0.9612216677688932, - "grad_norm": 1.9954917169327204, - "learning_rate": 1.570817515506162e-08, - "loss": 0.7937, - "num_input_tokens_seen": 170316870, - "step": 7994 - }, - { - "epoch": 0.9613419106595322, - "grad_norm": 2.1146164838727146, - "learning_rate": 1.561088727674753e-08, - "loss": 0.804, - "num_input_tokens_seen": 170330800, - "step": 7995 - }, - { - "epoch": 0.9614621535501714, - "grad_norm": 2.8287339152070974, - "learning_rate": 1.551390042868417e-08, - "loss": 0.7035, - "num_input_tokens_seen": 170352290, - "step": 7996 - }, - { - "epoch": 0.9615823964408104, - "grad_norm": 2.0839700740286173, - "learning_rate": 1.5417214625584207e-08, - "loss": 0.7027, - "num_input_tokens_seen": 170369665, - "step": 7997 - }, - { - "epoch": 0.9617026393314495, - "grad_norm": 1.8568633179769516, - "learning_rate": 1.5320829882114584e-08, - "loss": 0.8479, - "num_input_tokens_seen": 170387460, - "step": 7998 - }, - { - "epoch": 0.9618228822220887, - "grad_norm": 2.121928857208219, - "learning_rate": 1.5224746212897378e-08, - "loss": 0.7788, - "num_input_tokens_seen": 170406475, - "step": 7999 - }, - { - "epoch": 0.9619431251127277, - "grad_norm": 1.660845754178301, - "learning_rate": 1.512896363250804e-08, - "loss": 0.7621, - "num_input_tokens_seen": 170426305, - "step": 8000 - }, - { - "epoch": 0.9620633680033668, - "grad_norm": 2.331924161128743, - "learning_rate": 1.503348215547673e-08, - "loss": 0.7476, - "num_input_tokens_seen": 170447115, - "step": 8001 - }, - { - "epoch": 0.962183610894006, - "grad_norm": 1.8891233275701773, - "learning_rate": 1.4938301796288078e-08, - "loss": 0.7957, - "num_input_tokens_seen": 170463405, - "step": 8002 - }, - { - "epoch": 0.962303853784645, - "grad_norm": 2.712257359943641, - "learning_rate": 1.4843422569380537e-08, - "loss": 0.8182, - "num_input_tokens_seen": 170479880, - "step": 8003 - }, - { - "epoch": 0.9624240966752841, - "grad_norm": 2.1430669697397913, - "learning_rate": 1.4748844489147483e-08, - "loss": 0.8223, - "num_input_tokens_seen": 170496590, - "step": 8004 - }, - { - "epoch": 0.9625443395659231, - "grad_norm": 2.2377112735916644, - "learning_rate": 1.4654567569936326e-08, - "loss": 0.7039, - "num_input_tokens_seen": 170513885, - "step": 8005 - }, - { - "epoch": 0.9626645824565623, - "grad_norm": 2.8734296156319075, - "learning_rate": 1.456059182604874e-08, - "loss": 0.8187, - "num_input_tokens_seen": 170532410, - "step": 8006 - }, - { - "epoch": 0.9627848253472013, - "grad_norm": 2.8486158705629547, - "learning_rate": 1.4466917271740653e-08, - "loss": 0.7558, - "num_input_tokens_seen": 170550330, - "step": 8007 - }, - { - "epoch": 0.9629050682378404, - "grad_norm": 2.3415190061961337, - "learning_rate": 1.4373543921222697e-08, - "loss": 0.6699, - "num_input_tokens_seen": 170569635, - "step": 8008 - }, - { - "epoch": 0.9630253111284796, - "grad_norm": 3.1784240889830198, - "learning_rate": 1.428047178865932e-08, - "loss": 0.7731, - "num_input_tokens_seen": 170586145, - "step": 8009 - }, - { - "epoch": 0.9631455540191186, - "grad_norm": 3.8905884786682123, - "learning_rate": 1.418770088816923e-08, - "loss": 0.7343, - "num_input_tokens_seen": 170605040, - "step": 8010 - }, - { - "epoch": 0.9632657969097577, - "grad_norm": 0.833427001999858, - "learning_rate": 1.40952312338265e-08, - "loss": 0.6487, - "num_input_tokens_seen": 170669405, - "step": 8011 - }, - { - "epoch": 0.9633860398003968, - "grad_norm": 1.6966099832892405, - "learning_rate": 1.4003062839657909e-08, - "loss": 0.678, - "num_input_tokens_seen": 170691605, - "step": 8012 - }, - { - "epoch": 0.9635062826910359, - "grad_norm": 1.7189036580018384, - "learning_rate": 1.391119571964583e-08, - "loss": 0.7925, - "num_input_tokens_seen": 170712265, - "step": 8013 - }, - { - "epoch": 0.9636265255816749, - "grad_norm": 1.9626285073365846, - "learning_rate": 1.3819629887726003e-08, - "loss": 0.7282, - "num_input_tokens_seen": 170730075, - "step": 8014 - }, - { - "epoch": 0.9637467684723141, - "grad_norm": 2.4162597051472328, - "learning_rate": 1.3728365357789317e-08, - "loss": 0.763, - "num_input_tokens_seen": 170749160, - "step": 8015 - }, - { - "epoch": 0.9638670113629532, - "grad_norm": 3.891502961263289, - "learning_rate": 1.3637402143680254e-08, - "loss": 0.7608, - "num_input_tokens_seen": 170763780, - "step": 8016 - }, - { - "epoch": 0.9639872542535922, - "grad_norm": 0.7841700082856119, - "learning_rate": 1.3546740259197998e-08, - "loss": 0.5744, - "num_input_tokens_seen": 170816310, - "step": 8017 - }, - { - "epoch": 0.9641074971442314, - "grad_norm": 2.339438057956562, - "learning_rate": 1.3456379718095989e-08, - "loss": 0.6929, - "num_input_tokens_seen": 170836445, - "step": 8018 - }, - { - "epoch": 0.9642277400348704, - "grad_norm": 0.8922137860030963, - "learning_rate": 1.3366320534081487e-08, - "loss": 0.6545, - "num_input_tokens_seen": 170898845, - "step": 8019 - }, - { - "epoch": 0.9643479829255095, - "grad_norm": 2.8914530739701556, - "learning_rate": 1.3276562720816675e-08, - "loss": 0.7499, - "num_input_tokens_seen": 170920075, - "step": 8020 - }, - { - "epoch": 0.9644682258161487, - "grad_norm": 3.2364401260642817, - "learning_rate": 1.3187106291917549e-08, - "loss": 0.8274, - "num_input_tokens_seen": 170936785, - "step": 8021 - }, - { - "epoch": 0.9645884687067877, - "grad_norm": 1.9971944082193316, - "learning_rate": 1.3097951260954809e-08, - "loss": 0.6966, - "num_input_tokens_seen": 170954805, - "step": 8022 - }, - { - "epoch": 0.9647087115974268, - "grad_norm": 2.4455279274616686, - "learning_rate": 1.3009097641453192e-08, - "loss": 0.8022, - "num_input_tokens_seen": 170972375, - "step": 8023 - }, - { - "epoch": 0.9648289544880659, - "grad_norm": 1.8223879618476428, - "learning_rate": 1.2920545446891474e-08, - "loss": 0.7577, - "num_input_tokens_seen": 170988815, - "step": 8024 - }, - { - "epoch": 0.964949197378705, - "grad_norm": 2.0937659587575155, - "learning_rate": 1.2832294690703127e-08, - "loss": 0.6937, - "num_input_tokens_seen": 171007510, - "step": 8025 - }, - { - "epoch": 0.965069440269344, - "grad_norm": 2.3079748720877977, - "learning_rate": 1.2744345386275668e-08, - "loss": 0.7692, - "num_input_tokens_seen": 171026770, - "step": 8026 - }, - { - "epoch": 0.9651896831599832, - "grad_norm": 10.118050113344793, - "learning_rate": 1.265669754695109e-08, - "loss": 0.7771, - "num_input_tokens_seen": 171046060, - "step": 8027 - }, - { - "epoch": 0.9653099260506223, - "grad_norm": 4.050076615553602, - "learning_rate": 1.2569351186025201e-08, - "loss": 0.815, - "num_input_tokens_seen": 171064235, - "step": 8028 - }, - { - "epoch": 0.9654301689412613, - "grad_norm": 1.6154677376918176, - "learning_rate": 1.2482306316748737e-08, - "loss": 0.7453, - "num_input_tokens_seen": 171084400, - "step": 8029 - }, - { - "epoch": 0.9655504118319005, - "grad_norm": 2.1825478964128235, - "learning_rate": 1.2395562952326021e-08, - "loss": 0.7827, - "num_input_tokens_seen": 171101280, - "step": 8030 - }, - { - "epoch": 0.9656706547225395, - "grad_norm": 2.6160818897027065, - "learning_rate": 1.2309121105916309e-08, - "loss": 0.8131, - "num_input_tokens_seen": 171119290, - "step": 8031 - }, - { - "epoch": 0.9657908976131786, - "grad_norm": 2.454567307861694, - "learning_rate": 1.222298079063222e-08, - "loss": 0.6807, - "num_input_tokens_seen": 171140150, - "step": 8032 - }, - { - "epoch": 0.9659111405038178, - "grad_norm": 2.145079813702674, - "learning_rate": 1.2137142019541747e-08, - "loss": 0.724, - "num_input_tokens_seen": 171158425, - "step": 8033 - }, - { - "epoch": 0.9660313833944568, - "grad_norm": 2.496751086261041, - "learning_rate": 1.2051604805666027e-08, - "loss": 0.7339, - "num_input_tokens_seen": 171175270, - "step": 8034 - }, - { - "epoch": 0.9661516262850959, - "grad_norm": 2.232208227466493, - "learning_rate": 1.196636916198135e-08, - "loss": 0.7759, - "num_input_tokens_seen": 171192530, - "step": 8035 - }, - { - "epoch": 0.9662718691757349, - "grad_norm": 2.2418081602637034, - "learning_rate": 1.1881435101418036e-08, - "loss": 0.768, - "num_input_tokens_seen": 171211665, - "step": 8036 - }, - { - "epoch": 0.9663921120663741, - "grad_norm": 0.7890566035850896, - "learning_rate": 1.1796802636860003e-08, - "loss": 0.7021, - "num_input_tokens_seen": 171279915, - "step": 8037 - }, - { - "epoch": 0.9665123549570132, - "grad_norm": 2.633004589320094, - "learning_rate": 1.1712471781146316e-08, - "loss": 0.7292, - "num_input_tokens_seen": 171298970, - "step": 8038 - }, - { - "epoch": 0.9666325978476522, - "grad_norm": 2.2410098803118297, - "learning_rate": 1.1628442547069628e-08, - "loss": 0.6734, - "num_input_tokens_seen": 171320890, - "step": 8039 - }, - { - "epoch": 0.9667528407382914, - "grad_norm": 3.9047966056289662, - "learning_rate": 1.1544714947377521e-08, - "loss": 0.7543, - "num_input_tokens_seen": 171338295, - "step": 8040 - }, - { - "epoch": 0.9668730836289304, - "grad_norm": 4.617361842263025, - "learning_rate": 1.1461288994770945e-08, - "loss": 0.699, - "num_input_tokens_seen": 171357090, - "step": 8041 - }, - { - "epoch": 0.9669933265195695, - "grad_norm": 1.8365444211448254, - "learning_rate": 1.1378164701905778e-08, - "loss": 0.7725, - "num_input_tokens_seen": 171378575, - "step": 8042 - }, - { - "epoch": 0.9671135694102087, - "grad_norm": 2.049513339049453, - "learning_rate": 1.1295342081392156e-08, - "loss": 0.6593, - "num_input_tokens_seen": 171397655, - "step": 8043 - }, - { - "epoch": 0.9672338123008477, - "grad_norm": 1.8331255606877064, - "learning_rate": 1.1212821145793804e-08, - "loss": 0.6853, - "num_input_tokens_seen": 171416990, - "step": 8044 - }, - { - "epoch": 0.9673540551914868, - "grad_norm": 2.105240127533223, - "learning_rate": 1.1130601907629156e-08, - "loss": 0.7801, - "num_input_tokens_seen": 171434440, - "step": 8045 - }, - { - "epoch": 0.9674742980821259, - "grad_norm": 0.8652868216238374, - "learning_rate": 1.1048684379370899e-08, - "loss": 0.6807, - "num_input_tokens_seen": 171494845, - "step": 8046 - }, - { - "epoch": 0.967594540972765, - "grad_norm": 2.3931982480796345, - "learning_rate": 1.0967068573445759e-08, - "loss": 0.7433, - "num_input_tokens_seen": 171512050, - "step": 8047 - }, - { - "epoch": 0.967714783863404, - "grad_norm": 2.633423996656163, - "learning_rate": 1.0885754502234945e-08, - "loss": 0.6513, - "num_input_tokens_seen": 171531430, - "step": 8048 - }, - { - "epoch": 0.9678350267540432, - "grad_norm": 2.1158135533162103, - "learning_rate": 1.08047421780737e-08, - "loss": 0.7794, - "num_input_tokens_seen": 171550340, - "step": 8049 - }, - { - "epoch": 0.9679552696446823, - "grad_norm": 2.312157510222528, - "learning_rate": 1.0724031613251305e-08, - "loss": 0.7414, - "num_input_tokens_seen": 171567960, - "step": 8050 - }, - { - "epoch": 0.9680755125353213, - "grad_norm": 2.778771965009616, - "learning_rate": 1.0643622820011744e-08, - "loss": 0.6581, - "num_input_tokens_seen": 171588735, - "step": 8051 - }, - { - "epoch": 0.9681957554259605, - "grad_norm": 3.6330454600610715, - "learning_rate": 1.0563515810552814e-08, - "loss": 0.6762, - "num_input_tokens_seen": 171605425, - "step": 8052 - }, - { - "epoch": 0.9683159983165995, - "grad_norm": 1.5918218422794614, - "learning_rate": 1.0483710597026795e-08, - "loss": 0.7289, - "num_input_tokens_seen": 171625005, - "step": 8053 - }, - { - "epoch": 0.9684362412072386, - "grad_norm": 2.4468582438185225, - "learning_rate": 1.0404207191540004e-08, - "loss": 0.7333, - "num_input_tokens_seen": 171645180, - "step": 8054 - }, - { - "epoch": 0.9685564840978778, - "grad_norm": 2.169848576469213, - "learning_rate": 1.0325005606153236e-08, - "loss": 0.7491, - "num_input_tokens_seen": 171664360, - "step": 8055 - }, - { - "epoch": 0.9686767269885168, - "grad_norm": 2.9045212662637767, - "learning_rate": 1.0246105852881104e-08, - "loss": 0.778, - "num_input_tokens_seen": 171679180, - "step": 8056 - }, - { - "epoch": 0.9687969698791559, - "grad_norm": 1.922016499138457, - "learning_rate": 1.0167507943692476e-08, - "loss": 0.7812, - "num_input_tokens_seen": 171697985, - "step": 8057 - }, - { - "epoch": 0.968917212769795, - "grad_norm": 2.5707928562644673, - "learning_rate": 1.008921189051093e-08, - "loss": 0.707, - "num_input_tokens_seen": 171715050, - "step": 8058 - }, - { - "epoch": 0.9690374556604341, - "grad_norm": 2.018293448098207, - "learning_rate": 1.0011217705213848e-08, - "loss": 0.7664, - "num_input_tokens_seen": 171732645, - "step": 8059 - }, - { - "epoch": 0.9691576985510731, - "grad_norm": 1.8592714532205175, - "learning_rate": 9.933525399632658e-09, - "loss": 0.7486, - "num_input_tokens_seen": 171750600, - "step": 8060 - }, - { - "epoch": 0.9692779414417123, - "grad_norm": 2.051628554743265, - "learning_rate": 9.856134985553488e-09, - "loss": 0.6485, - "num_input_tokens_seen": 171770045, - "step": 8061 - }, - { - "epoch": 0.9693981843323514, - "grad_norm": 2.3525745578273565, - "learning_rate": 9.77904647471628e-09, - "loss": 0.7299, - "num_input_tokens_seen": 171792945, - "step": 8062 - }, - { - "epoch": 0.9695184272229904, - "grad_norm": 2.18426259739792, - "learning_rate": 9.702259878815454e-09, - "loss": 0.7336, - "num_input_tokens_seen": 171812990, - "step": 8063 - }, - { - "epoch": 0.9696386701136296, - "grad_norm": 3.2437377377041323, - "learning_rate": 9.625775209499254e-09, - "loss": 0.74, - "num_input_tokens_seen": 171832715, - "step": 8064 - }, - { - "epoch": 0.9697589130042686, - "grad_norm": 2.1817471257718837, - "learning_rate": 9.549592478370394e-09, - "loss": 0.7373, - "num_input_tokens_seen": 171850615, - "step": 8065 - }, - { - "epoch": 0.9698791558949077, - "grad_norm": 2.17472122246022, - "learning_rate": 9.473711696985632e-09, - "loss": 0.7955, - "num_input_tokens_seen": 171869665, - "step": 8066 - }, - { - "epoch": 0.9699993987855468, - "grad_norm": 2.5050576842799046, - "learning_rate": 9.398132876856201e-09, - "loss": 0.7515, - "num_input_tokens_seen": 171888350, - "step": 8067 - }, - { - "epoch": 0.9701196416761859, - "grad_norm": 0.7904499167935078, - "learning_rate": 9.322856029447379e-09, - "loss": 0.6453, - "num_input_tokens_seen": 171949255, - "step": 8068 - }, - { - "epoch": 0.970239884566825, - "grad_norm": 3.8226679822290865, - "learning_rate": 9.247881166178695e-09, - "loss": 0.7931, - "num_input_tokens_seen": 171967685, - "step": 8069 - }, - { - "epoch": 0.970360127457464, - "grad_norm": 2.8873106668989172, - "learning_rate": 9.173208298423274e-09, - "loss": 0.7713, - "num_input_tokens_seen": 171988610, - "step": 8070 - }, - { - "epoch": 0.9704803703481032, - "grad_norm": 1.9418277279599057, - "learning_rate": 9.098837437509389e-09, - "loss": 0.7564, - "num_input_tokens_seen": 172011220, - "step": 8071 - }, - { - "epoch": 0.9706006132387422, - "grad_norm": 1.7997187337770832, - "learning_rate": 9.024768594719124e-09, - "loss": 0.8343, - "num_input_tokens_seen": 172029320, - "step": 8072 - }, - { - "epoch": 0.9707208561293813, - "grad_norm": 2.4917085762968383, - "learning_rate": 8.95100178128816e-09, - "loss": 0.7198, - "num_input_tokens_seen": 172048180, - "step": 8073 - }, - { - "epoch": 0.9708410990200205, - "grad_norm": 2.2418718082442832, - "learning_rate": 8.877537008407321e-09, - "loss": 0.6997, - "num_input_tokens_seen": 172067950, - "step": 8074 - }, - { - "epoch": 0.9709613419106595, - "grad_norm": 4.469347936666569, - "learning_rate": 8.804374287221028e-09, - "loss": 0.6742, - "num_input_tokens_seen": 172088905, - "step": 8075 - }, - { - "epoch": 0.9710815848012986, - "grad_norm": 1.6330953968385515, - "learning_rate": 8.731513628827958e-09, - "loss": 0.8399, - "num_input_tokens_seen": 172107990, - "step": 8076 - }, - { - "epoch": 0.9712018276919377, - "grad_norm": 2.041194962644168, - "learning_rate": 8.658955044281047e-09, - "loss": 0.8179, - "num_input_tokens_seen": 172126635, - "step": 8077 - }, - { - "epoch": 0.9713220705825768, - "grad_norm": 1.7451976900278796, - "learning_rate": 8.586698544587268e-09, - "loss": 0.7642, - "num_input_tokens_seen": 172147965, - "step": 8078 - }, - { - "epoch": 0.9714423134732159, - "grad_norm": 2.08309210406066, - "learning_rate": 8.514744140707853e-09, - "loss": 0.7359, - "num_input_tokens_seen": 172166825, - "step": 8079 - }, - { - "epoch": 0.971562556363855, - "grad_norm": 1.87159604320456, - "learning_rate": 8.443091843558515e-09, - "loss": 0.7573, - "num_input_tokens_seen": 172185630, - "step": 8080 - }, - { - "epoch": 0.9716827992544941, - "grad_norm": 2.6323068351592824, - "learning_rate": 8.37174166400878e-09, - "loss": 0.6443, - "num_input_tokens_seen": 172200925, - "step": 8081 - }, - { - "epoch": 0.9718030421451331, - "grad_norm": 2.196851156171053, - "learning_rate": 8.300693612881992e-09, - "loss": 0.8477, - "num_input_tokens_seen": 172220710, - "step": 8082 - }, - { - "epoch": 0.9719232850357723, - "grad_norm": 2.5740158929288337, - "learning_rate": 8.22994770095664e-09, - "loss": 0.8008, - "num_input_tokens_seen": 172239005, - "step": 8083 - }, - { - "epoch": 0.9720435279264114, - "grad_norm": 2.593024829472518, - "learning_rate": 8.159503938964585e-09, - "loss": 0.7484, - "num_input_tokens_seen": 172256045, - "step": 8084 - }, - { - "epoch": 0.9721637708170504, - "grad_norm": 1.7659990580690959, - "learning_rate": 8.089362337592164e-09, - "loss": 0.7007, - "num_input_tokens_seen": 172279390, - "step": 8085 - }, - { - "epoch": 0.9722840137076896, - "grad_norm": 1.9805584641378822, - "learning_rate": 8.019522907479536e-09, - "loss": 0.7165, - "num_input_tokens_seen": 172299470, - "step": 8086 - }, - { - "epoch": 0.9724042565983286, - "grad_norm": 2.135805961351635, - "learning_rate": 7.949985659221558e-09, - "loss": 0.7629, - "num_input_tokens_seen": 172316455, - "step": 8087 - }, - { - "epoch": 0.9725244994889677, - "grad_norm": 2.2411719132482713, - "learning_rate": 7.880750603366904e-09, - "loss": 0.7826, - "num_input_tokens_seen": 172335045, - "step": 8088 - }, - { - "epoch": 0.9726447423796069, - "grad_norm": 2.420216177479266, - "learning_rate": 7.811817750418282e-09, - "loss": 0.7917, - "num_input_tokens_seen": 172353525, - "step": 8089 - }, - { - "epoch": 0.9727649852702459, - "grad_norm": 2.1522786532070413, - "learning_rate": 7.743187110833105e-09, - "loss": 0.792, - "num_input_tokens_seen": 172376005, - "step": 8090 - }, - { - "epoch": 0.972885228160885, - "grad_norm": 1.7732818278657758, - "learning_rate": 7.674858695022602e-09, - "loss": 0.8026, - "num_input_tokens_seen": 172394080, - "step": 8091 - }, - { - "epoch": 0.9730054710515241, - "grad_norm": 2.6817188526706093, - "learning_rate": 7.606832513351591e-09, - "loss": 0.7572, - "num_input_tokens_seen": 172411750, - "step": 8092 - }, - { - "epoch": 0.9731257139421632, - "grad_norm": 0.8172461126494237, - "learning_rate": 7.539108576140264e-09, - "loss": 0.6763, - "num_input_tokens_seen": 172475580, - "step": 8093 - }, - { - "epoch": 0.9732459568328022, - "grad_norm": 2.8911897303082528, - "learning_rate": 7.471686893661732e-09, - "loss": 0.6897, - "num_input_tokens_seen": 172493595, - "step": 8094 - }, - { - "epoch": 0.9733661997234414, - "grad_norm": 2.151796859380502, - "learning_rate": 7.4045674761442636e-09, - "loss": 0.6357, - "num_input_tokens_seen": 172510645, - "step": 8095 - }, - { - "epoch": 0.9734864426140805, - "grad_norm": 2.183968291740034, - "learning_rate": 7.337750333769488e-09, - "loss": 0.7258, - "num_input_tokens_seen": 172530170, - "step": 8096 - }, - { - "epoch": 0.9736066855047195, - "grad_norm": 1.7555191558854748, - "learning_rate": 7.2712354766737425e-09, - "loss": 0.7269, - "num_input_tokens_seen": 172550220, - "step": 8097 - }, - { - "epoch": 0.9737269283953586, - "grad_norm": 2.02609369359964, - "learning_rate": 7.2050229149469565e-09, - "loss": 0.7998, - "num_input_tokens_seen": 172569950, - "step": 8098 - }, - { - "epoch": 0.9738471712859977, - "grad_norm": 2.188116142239428, - "learning_rate": 7.139112658633984e-09, - "loss": 0.6264, - "num_input_tokens_seen": 172589820, - "step": 8099 - }, - { - "epoch": 0.9739674141766368, - "grad_norm": 2.079628933827158, - "learning_rate": 7.073504717733048e-09, - "loss": 0.6986, - "num_input_tokens_seen": 172609105, - "step": 8100 - }, - { - "epoch": 0.9740876570672758, - "grad_norm": 0.7790619474049189, - "learning_rate": 7.008199102196855e-09, - "loss": 0.5921, - "num_input_tokens_seen": 172670250, - "step": 8101 - }, - { - "epoch": 0.974207899957915, - "grad_norm": 0.8477342095568586, - "learning_rate": 6.9431958219321464e-09, - "loss": 0.6195, - "num_input_tokens_seen": 172726135, - "step": 8102 - }, - { - "epoch": 0.9743281428485541, - "grad_norm": 2.02706254065749, - "learning_rate": 6.878494886800146e-09, - "loss": 0.7777, - "num_input_tokens_seen": 172746630, - "step": 8103 - }, - { - "epoch": 0.9744483857391931, - "grad_norm": 2.753348006769035, - "learning_rate": 6.814096306615669e-09, - "loss": 0.7568, - "num_input_tokens_seen": 172764490, - "step": 8104 - }, - { - "epoch": 0.9745686286298323, - "grad_norm": 2.696879116593512, - "learning_rate": 6.750000091148234e-09, - "loss": 0.6456, - "num_input_tokens_seen": 172781505, - "step": 8105 - }, - { - "epoch": 0.9746888715204713, - "grad_norm": 2.0069405232496274, - "learning_rate": 6.686206250120729e-09, - "loss": 0.733, - "num_input_tokens_seen": 172802720, - "step": 8106 - }, - { - "epoch": 0.9748091144111104, - "grad_norm": 2.1308811603440865, - "learning_rate": 6.622714793210749e-09, - "loss": 0.743, - "num_input_tokens_seen": 172821360, - "step": 8107 - }, - { - "epoch": 0.9749293573017496, - "grad_norm": 2.48559094604535, - "learning_rate": 6.559525730050364e-09, - "loss": 0.7749, - "num_input_tokens_seen": 172841180, - "step": 8108 - }, - { - "epoch": 0.9750496001923886, - "grad_norm": 1.8725943292313862, - "learning_rate": 6.496639070224796e-09, - "loss": 0.7528, - "num_input_tokens_seen": 172859385, - "step": 8109 - }, - { - "epoch": 0.9751698430830277, - "grad_norm": 3.3564167404108565, - "learning_rate": 6.4340548232739714e-09, - "loss": 0.824, - "num_input_tokens_seen": 172875305, - "step": 8110 - }, - { - "epoch": 0.9752900859736668, - "grad_norm": 1.7237646268025921, - "learning_rate": 6.371772998692071e-09, - "loss": 0.7886, - "num_input_tokens_seen": 172894280, - "step": 8111 - }, - { - "epoch": 0.9754103288643059, - "grad_norm": 7.091303962462014, - "learning_rate": 6.309793605927094e-09, - "loss": 0.6467, - "num_input_tokens_seen": 172912320, - "step": 8112 - }, - { - "epoch": 0.975530571754945, - "grad_norm": 2.292543861897221, - "learning_rate": 6.248116654381297e-09, - "loss": 0.7944, - "num_input_tokens_seen": 172930510, - "step": 8113 - }, - { - "epoch": 0.9756508146455841, - "grad_norm": 2.511923641487094, - "learning_rate": 6.186742153410751e-09, - "loss": 0.7235, - "num_input_tokens_seen": 172949725, - "step": 8114 - }, - { - "epoch": 0.9757710575362232, - "grad_norm": 2.5089775571322877, - "learning_rate": 6.125670112326453e-09, - "loss": 0.8584, - "num_input_tokens_seen": 172968705, - "step": 8115 - }, - { - "epoch": 0.9758913004268622, - "grad_norm": 2.0001391148349614, - "learning_rate": 6.064900540392548e-09, - "loss": 0.6976, - "num_input_tokens_seen": 172990520, - "step": 8116 - }, - { - "epoch": 0.9760115433175014, - "grad_norm": 2.852656165969772, - "learning_rate": 6.0044334468278835e-09, - "loss": 0.7909, - "num_input_tokens_seen": 173009585, - "step": 8117 - }, - { - "epoch": 0.9761317862081405, - "grad_norm": 2.2101290299613274, - "learning_rate": 5.944268840805345e-09, - "loss": 0.7169, - "num_input_tokens_seen": 173030050, - "step": 8118 - }, - { - "epoch": 0.9762520290987795, - "grad_norm": 2.8577039029166764, - "learning_rate": 5.88440673145163e-09, - "loss": 0.6348, - "num_input_tokens_seen": 173050820, - "step": 8119 - }, - { - "epoch": 0.9763722719894187, - "grad_norm": 2.5105576609568274, - "learning_rate": 5.824847127848142e-09, - "loss": 0.8205, - "num_input_tokens_seen": 173069065, - "step": 8120 - }, - { - "epoch": 0.9764925148800577, - "grad_norm": 2.2402784511274585, - "learning_rate": 5.765590039029433e-09, - "loss": 0.7782, - "num_input_tokens_seen": 173088105, - "step": 8121 - }, - { - "epoch": 0.9766127577706968, - "grad_norm": 1.9066163550843596, - "learning_rate": 5.706635473985422e-09, - "loss": 0.705, - "num_input_tokens_seen": 173111695, - "step": 8122 - }, - { - "epoch": 0.976733000661336, - "grad_norm": 2.2632997316209478, - "learning_rate": 5.647983441658955e-09, - "loss": 0.8472, - "num_input_tokens_seen": 173130775, - "step": 8123 - }, - { - "epoch": 0.976853243551975, - "grad_norm": 2.74453420957537, - "learning_rate": 5.589633950947803e-09, - "loss": 0.6699, - "num_input_tokens_seen": 173147995, - "step": 8124 - }, - { - "epoch": 0.9769734864426141, - "grad_norm": 2.2149294735564493, - "learning_rate": 5.5315870107035535e-09, - "loss": 0.6926, - "num_input_tokens_seen": 173165765, - "step": 8125 - }, - { - "epoch": 0.9770937293332532, - "grad_norm": 2.1642668845232995, - "learning_rate": 5.473842629731607e-09, - "loss": 0.7792, - "num_input_tokens_seen": 173183985, - "step": 8126 - }, - { - "epoch": 0.9772139722238923, - "grad_norm": 2.4800352386580244, - "learning_rate": 5.416400816792066e-09, - "loss": 0.7822, - "num_input_tokens_seen": 173201220, - "step": 8127 - }, - { - "epoch": 0.9773342151145313, - "grad_norm": 2.6603243783657478, - "learning_rate": 5.359261580598407e-09, - "loss": 0.7745, - "num_input_tokens_seen": 173216780, - "step": 8128 - }, - { - "epoch": 0.9774544580051704, - "grad_norm": 2.9479008075999946, - "learning_rate": 5.302424929819027e-09, - "loss": 0.7791, - "num_input_tokens_seen": 173230510, - "step": 8129 - }, - { - "epoch": 0.9775747008958096, - "grad_norm": 2.638121312000568, - "learning_rate": 5.24589087307592e-09, - "loss": 0.7194, - "num_input_tokens_seen": 173247850, - "step": 8130 - }, - { - "epoch": 0.9776949437864486, - "grad_norm": 1.8325908802789879, - "learning_rate": 5.189659418944891e-09, - "loss": 0.6484, - "num_input_tokens_seen": 173277745, - "step": 8131 - }, - { - "epoch": 0.9778151866770877, - "grad_norm": 2.0465207692860234, - "learning_rate": 5.133730575956674e-09, - "loss": 0.7629, - "num_input_tokens_seen": 173297135, - "step": 8132 - }, - { - "epoch": 0.9779354295677268, - "grad_norm": 2.5631169101329028, - "learning_rate": 5.0781043525953696e-09, - "loss": 0.7163, - "num_input_tokens_seen": 173314920, - "step": 8133 - }, - { - "epoch": 0.9780556724583659, - "grad_norm": 2.387397494384778, - "learning_rate": 5.0227807572995605e-09, - "loss": 0.7244, - "num_input_tokens_seen": 173336615, - "step": 8134 - }, - { - "epoch": 0.9781759153490049, - "grad_norm": 2.1916600276962397, - "learning_rate": 4.967759798461646e-09, - "loss": 0.6678, - "num_input_tokens_seen": 173354680, - "step": 8135 - }, - { - "epoch": 0.9782961582396441, - "grad_norm": 2.9123066526660715, - "learning_rate": 4.913041484428282e-09, - "loss": 0.7351, - "num_input_tokens_seen": 173374875, - "step": 8136 - }, - { - "epoch": 0.9784164011302832, - "grad_norm": 2.3403729152970825, - "learning_rate": 4.858625823500384e-09, - "loss": 0.7366, - "num_input_tokens_seen": 173392295, - "step": 8137 - }, - { - "epoch": 0.9785366440209222, - "grad_norm": 2.5410462260144833, - "learning_rate": 4.80451282393246e-09, - "loss": 0.7344, - "num_input_tokens_seen": 173412000, - "step": 8138 - }, - { - "epoch": 0.9786568869115614, - "grad_norm": 2.6484215176247243, - "learning_rate": 4.750702493933722e-09, - "loss": 0.6724, - "num_input_tokens_seen": 173431605, - "step": 8139 - }, - { - "epoch": 0.9787771298022004, - "grad_norm": 2.4341760697815125, - "learning_rate": 4.697194841666974e-09, - "loss": 0.8481, - "num_input_tokens_seen": 173450250, - "step": 8140 - }, - { - "epoch": 0.9788973726928395, - "grad_norm": 2.0426261787081046, - "learning_rate": 4.6439898752492764e-09, - "loss": 0.8124, - "num_input_tokens_seen": 173470110, - "step": 8141 - }, - { - "epoch": 0.9790176155834787, - "grad_norm": 0.7648630205831016, - "learning_rate": 4.591087602751731e-09, - "loss": 0.6391, - "num_input_tokens_seen": 173531690, - "step": 8142 - }, - { - "epoch": 0.9791378584741177, - "grad_norm": 1.777440161350849, - "learning_rate": 4.538488032199916e-09, - "loss": 0.7188, - "num_input_tokens_seen": 173549510, - "step": 8143 - }, - { - "epoch": 0.9792581013647568, - "grad_norm": 3.5043439688867357, - "learning_rate": 4.486191171572784e-09, - "loss": 0.6789, - "num_input_tokens_seen": 173566500, - "step": 8144 - }, - { - "epoch": 0.9793783442553959, - "grad_norm": 1.6145476880619811, - "learning_rate": 4.434197028803766e-09, - "loss": 0.7736, - "num_input_tokens_seen": 173585445, - "step": 8145 - }, - { - "epoch": 0.979498587146035, - "grad_norm": 2.2120591621396035, - "learning_rate": 4.38250561178033e-09, - "loss": 0.8134, - "num_input_tokens_seen": 173601050, - "step": 8146 - }, - { - "epoch": 0.979618830036674, - "grad_norm": 2.874269653901541, - "learning_rate": 4.331116928344425e-09, - "loss": 0.7989, - "num_input_tokens_seen": 173617085, - "step": 8147 - }, - { - "epoch": 0.9797390729273132, - "grad_norm": 2.920578558212431, - "learning_rate": 4.28003098629115e-09, - "loss": 0.6279, - "num_input_tokens_seen": 173632940, - "step": 8148 - }, - { - "epoch": 0.9798593158179523, - "grad_norm": 2.2739781890537714, - "learning_rate": 4.229247793370305e-09, - "loss": 0.79, - "num_input_tokens_seen": 173651785, - "step": 8149 - }, - { - "epoch": 0.9799795587085913, - "grad_norm": 2.022791279758367, - "learning_rate": 4.178767357285951e-09, - "loss": 0.7045, - "num_input_tokens_seen": 173673135, - "step": 8150 - }, - { - "epoch": 0.9800998015992305, - "grad_norm": 2.222109900524572, - "learning_rate": 4.128589685695516e-09, - "loss": 0.7013, - "num_input_tokens_seen": 173693280, - "step": 8151 - }, - { - "epoch": 0.9802200444898695, - "grad_norm": 2.8337398786263743, - "learning_rate": 4.078714786211135e-09, - "loss": 0.8374, - "num_input_tokens_seen": 173708850, - "step": 8152 - }, - { - "epoch": 0.9803402873805086, - "grad_norm": 1.7054149618946184, - "learning_rate": 4.029142666398977e-09, - "loss": 0.7587, - "num_input_tokens_seen": 173728735, - "step": 8153 - }, - { - "epoch": 0.9804605302711478, - "grad_norm": 2.291803388693196, - "learning_rate": 3.979873333778805e-09, - "loss": 0.7918, - "num_input_tokens_seen": 173746630, - "step": 8154 - }, - { - "epoch": 0.9805807731617868, - "grad_norm": 2.5327666698827453, - "learning_rate": 3.930906795824862e-09, - "loss": 0.7396, - "num_input_tokens_seen": 173767025, - "step": 8155 - }, - { - "epoch": 0.9807010160524259, - "grad_norm": 2.270367188752037, - "learning_rate": 3.882243059965207e-09, - "loss": 0.7688, - "num_input_tokens_seen": 173784460, - "step": 8156 - }, - { - "epoch": 0.980821258943065, - "grad_norm": 2.987200053417994, - "learning_rate": 3.833882133582156e-09, - "loss": 0.6633, - "num_input_tokens_seen": 173799840, - "step": 8157 - }, - { - "epoch": 0.9809415018337041, - "grad_norm": 1.750130174657618, - "learning_rate": 3.785824024012285e-09, - "loss": 0.7695, - "num_input_tokens_seen": 173818560, - "step": 8158 - }, - { - "epoch": 0.9810617447243432, - "grad_norm": 1.6644756653090327, - "learning_rate": 3.738068738545541e-09, - "loss": 0.7795, - "num_input_tokens_seen": 173837365, - "step": 8159 - }, - { - "epoch": 0.9811819876149822, - "grad_norm": 7.848257978691657, - "learning_rate": 3.6906162844265733e-09, - "loss": 0.7729, - "num_input_tokens_seen": 173854170, - "step": 8160 - }, - { - "epoch": 0.9813022305056214, - "grad_norm": 2.160243322855058, - "learning_rate": 3.643466668853845e-09, - "loss": 0.7056, - "num_input_tokens_seen": 173871915, - "step": 8161 - }, - { - "epoch": 0.9814224733962604, - "grad_norm": 2.1169723195939536, - "learning_rate": 3.59661989898008e-09, - "loss": 0.7451, - "num_input_tokens_seen": 173892690, - "step": 8162 - }, - { - "epoch": 0.9815427162868995, - "grad_norm": 2.306559728903708, - "learning_rate": 3.5500759819115934e-09, - "loss": 0.7599, - "num_input_tokens_seen": 173912775, - "step": 8163 - }, - { - "epoch": 0.9816629591775387, - "grad_norm": 2.720169889083904, - "learning_rate": 3.5038349247094034e-09, - "loss": 0.811, - "num_input_tokens_seen": 173929755, - "step": 8164 - }, - { - "epoch": 0.9817832020681777, - "grad_norm": 2.4729755387539165, - "learning_rate": 3.4578967343878994e-09, - "loss": 0.7628, - "num_input_tokens_seen": 173945680, - "step": 8165 - }, - { - "epoch": 0.9819034449588168, - "grad_norm": 2.0508586812392013, - "learning_rate": 3.4122614179161733e-09, - "loss": 0.7983, - "num_input_tokens_seen": 173965360, - "step": 8166 - }, - { - "epoch": 0.9820236878494559, - "grad_norm": 1.7245063401288443, - "learning_rate": 3.36692898221691e-09, - "loss": 0.7669, - "num_input_tokens_seen": 173983445, - "step": 8167 - }, - { - "epoch": 0.982143930740095, - "grad_norm": 2.0767389731463624, - "learning_rate": 3.3218994341668305e-09, - "loss": 0.7332, - "num_input_tokens_seen": 174002095, - "step": 8168 - }, - { - "epoch": 0.982264173630734, - "grad_norm": 1.7552384827444198, - "learning_rate": 3.2771727805971373e-09, - "loss": 0.75, - "num_input_tokens_seen": 174023200, - "step": 8169 - }, - { - "epoch": 0.9823844165213732, - "grad_norm": 1.9469663833398747, - "learning_rate": 3.232749028292847e-09, - "loss": 0.7666, - "num_input_tokens_seen": 174039885, - "step": 8170 - }, - { - "epoch": 0.9825046594120123, - "grad_norm": 1.8509515365385438, - "learning_rate": 3.188628183992792e-09, - "loss": 0.8757, - "num_input_tokens_seen": 174059870, - "step": 8171 - }, - { - "epoch": 0.9826249023026513, - "grad_norm": 0.7985777011380686, - "learning_rate": 3.1448102543902844e-09, - "loss": 0.655, - "num_input_tokens_seen": 174123505, - "step": 8172 - }, - { - "epoch": 0.9827451451932905, - "grad_norm": 2.207216559422279, - "learning_rate": 3.1012952461324515e-09, - "loss": 0.6682, - "num_input_tokens_seen": 174142200, - "step": 8173 - }, - { - "epoch": 0.9828653880839295, - "grad_norm": 2.136622668066744, - "learning_rate": 3.0580831658202354e-09, - "loss": 0.7395, - "num_input_tokens_seen": 174159500, - "step": 8174 - }, - { - "epoch": 0.9829856309745686, - "grad_norm": 2.009022276249234, - "learning_rate": 3.015174020009281e-09, - "loss": 0.7772, - "num_input_tokens_seen": 174178545, - "step": 8175 - }, - { - "epoch": 0.9831058738652078, - "grad_norm": 2.930687565141807, - "learning_rate": 2.972567815208382e-09, - "loss": 0.747, - "num_input_tokens_seen": 174196835, - "step": 8176 - }, - { - "epoch": 0.9832261167558468, - "grad_norm": 3.464233973715276, - "learning_rate": 2.930264557881257e-09, - "loss": 0.8159, - "num_input_tokens_seen": 174211740, - "step": 8177 - }, - { - "epoch": 0.9833463596464859, - "grad_norm": 0.8292809613255099, - "learning_rate": 2.8882642544452163e-09, - "loss": 0.6299, - "num_input_tokens_seen": 174276185, - "step": 8178 - }, - { - "epoch": 0.983466602537125, - "grad_norm": 7.170591166620345, - "learning_rate": 2.8465669112716083e-09, - "loss": 0.7411, - "num_input_tokens_seen": 174293430, - "step": 8179 - }, - { - "epoch": 0.9835868454277641, - "grad_norm": 2.451753247009581, - "learning_rate": 2.8051725346858177e-09, - "loss": 0.7508, - "num_input_tokens_seen": 174313410, - "step": 8180 - }, - { - "epoch": 0.9837070883184031, - "grad_norm": 2.3576880398151445, - "learning_rate": 2.7640811309674883e-09, - "loss": 0.7003, - "num_input_tokens_seen": 174332630, - "step": 8181 - }, - { - "epoch": 0.9838273312090423, - "grad_norm": 1.7012050999043942, - "learning_rate": 2.7232927063498557e-09, - "loss": 0.7945, - "num_input_tokens_seen": 174352725, - "step": 8182 - }, - { - "epoch": 0.9839475740996814, - "grad_norm": 2.2260371973360313, - "learning_rate": 2.682807267020859e-09, - "loss": 0.6782, - "num_input_tokens_seen": 174375205, - "step": 8183 - }, - { - "epoch": 0.9840678169903204, - "grad_norm": 1.6781314806344834, - "learning_rate": 2.642624819121808e-09, - "loss": 0.6214, - "num_input_tokens_seen": 174395075, - "step": 8184 - }, - { - "epoch": 0.9841880598809596, - "grad_norm": 2.1025618045242886, - "learning_rate": 2.6027453687487154e-09, - "loss": 0.6116, - "num_input_tokens_seen": 174411885, - "step": 8185 - }, - { - "epoch": 0.9843083027715986, - "grad_norm": 3.023044858553157, - "learning_rate": 2.5631689219507422e-09, - "loss": 0.5248, - "num_input_tokens_seen": 174430285, - "step": 8186 - }, - { - "epoch": 0.9844285456622377, - "grad_norm": 1.8129161082976373, - "learning_rate": 2.523895484732197e-09, - "loss": 0.8294, - "num_input_tokens_seen": 174449460, - "step": 8187 - }, - { - "epoch": 0.9845487885528769, - "grad_norm": 2.169234175647031, - "learning_rate": 2.4849250630505357e-09, - "loss": 0.7429, - "num_input_tokens_seen": 174467425, - "step": 8188 - }, - { - "epoch": 0.9846690314435159, - "grad_norm": 1.959938241126848, - "learning_rate": 2.4462576628172528e-09, - "loss": 0.7305, - "num_input_tokens_seen": 174485775, - "step": 8189 - }, - { - "epoch": 0.984789274334155, - "grad_norm": 2.554926060490561, - "learning_rate": 2.407893289898544e-09, - "loss": 0.73, - "num_input_tokens_seen": 174504525, - "step": 8190 - }, - { - "epoch": 0.984909517224794, - "grad_norm": 2.1015829064329, - "learning_rate": 2.3698319501144202e-09, - "loss": 0.832, - "num_input_tokens_seen": 174525230, - "step": 8191 - }, - { - "epoch": 0.9850297601154332, - "grad_norm": 1.803213035332049, - "learning_rate": 2.3320736492382644e-09, - "loss": 0.7242, - "num_input_tokens_seen": 174543785, - "step": 8192 - }, - { - "epoch": 0.9851500030060723, - "grad_norm": 1.8538934348030842, - "learning_rate": 2.29461839299816e-09, - "loss": 0.6762, - "num_input_tokens_seen": 174563220, - "step": 8193 - }, - { - "epoch": 0.9852702458967113, - "grad_norm": 1.817544588049787, - "learning_rate": 2.257466187076229e-09, - "loss": 0.7956, - "num_input_tokens_seen": 174582145, - "step": 8194 - }, - { - "epoch": 0.9853904887873505, - "grad_norm": 2.2617389240460244, - "learning_rate": 2.2206170371081854e-09, - "loss": 0.702, - "num_input_tokens_seen": 174600450, - "step": 8195 - }, - { - "epoch": 0.9855107316779895, - "grad_norm": 1.6744999911213394, - "learning_rate": 2.1840709486842247e-09, - "loss": 0.8375, - "num_input_tokens_seen": 174619790, - "step": 8196 - }, - { - "epoch": 0.9856309745686286, - "grad_norm": 2.942772035092758, - "learning_rate": 2.1478279273481335e-09, - "loss": 0.7918, - "num_input_tokens_seen": 174637995, - "step": 8197 - }, - { - "epoch": 0.9857512174592677, - "grad_norm": 2.8771065626776515, - "learning_rate": 2.1118879785981815e-09, - "loss": 0.8024, - "num_input_tokens_seen": 174657855, - "step": 8198 - }, - { - "epoch": 0.9858714603499068, - "grad_norm": 1.7558341214960185, - "learning_rate": 2.0762511078862288e-09, - "loss": 0.7857, - "num_input_tokens_seen": 174677920, - "step": 8199 - }, - { - "epoch": 0.9859917032405459, - "grad_norm": 2.4751821131737866, - "learning_rate": 2.0409173206186183e-09, - "loss": 0.6524, - "num_input_tokens_seen": 174696880, - "step": 8200 - }, - { - "epoch": 0.986111946131185, - "grad_norm": 2.138024592105558, - "learning_rate": 2.0058866221550617e-09, - "loss": 0.8705, - "num_input_tokens_seen": 174714840, - "step": 8201 - }, - { - "epoch": 0.9862321890218241, - "grad_norm": 3.8660672270306793, - "learning_rate": 1.971159017809976e-09, - "loss": 0.7465, - "num_input_tokens_seen": 174732850, - "step": 8202 - }, - { - "epoch": 0.9863524319124631, - "grad_norm": 2.6995616277124594, - "learning_rate": 1.93673451285159e-09, - "loss": 0.7798, - "num_input_tokens_seen": 174751620, - "step": 8203 - }, - { - "epoch": 0.9864726748031023, - "grad_norm": 0.7724341943356106, - "learning_rate": 1.9026131125019495e-09, - "loss": 0.5964, - "num_input_tokens_seen": 174808710, - "step": 8204 - }, - { - "epoch": 0.9865929176937414, - "grad_norm": 1.8469702633006309, - "learning_rate": 1.8687948219371363e-09, - "loss": 0.8516, - "num_input_tokens_seen": 174827655, - "step": 8205 - }, - { - "epoch": 0.9867131605843804, - "grad_norm": 3.293176856132798, - "learning_rate": 1.835279646287491e-09, - "loss": 0.8766, - "num_input_tokens_seen": 174845385, - "step": 8206 - }, - { - "epoch": 0.9868334034750196, - "grad_norm": 3.5308385935089834, - "learning_rate": 1.8020675906371685e-09, - "loss": 0.766, - "num_input_tokens_seen": 174864500, - "step": 8207 - }, - { - "epoch": 0.9869536463656586, - "grad_norm": 3.0348022487946267, - "learning_rate": 1.7691586600243612e-09, - "loss": 0.7438, - "num_input_tokens_seen": 174883120, - "step": 8208 - }, - { - "epoch": 0.9870738892562977, - "grad_norm": 3.361978996228318, - "learning_rate": 1.7365528594415202e-09, - "loss": 0.8547, - "num_input_tokens_seen": 174896910, - "step": 8209 - }, - { - "epoch": 0.9871941321469369, - "grad_norm": 2.0596399155289107, - "learning_rate": 1.7042501938346888e-09, - "loss": 0.669, - "num_input_tokens_seen": 174919360, - "step": 8210 - }, - { - "epoch": 0.9873143750375759, - "grad_norm": 2.3605871324865277, - "learning_rate": 1.6722506681043913e-09, - "loss": 0.7636, - "num_input_tokens_seen": 174938040, - "step": 8211 - }, - { - "epoch": 0.987434617928215, - "grad_norm": 3.4691202377151793, - "learning_rate": 1.640554287104745e-09, - "loss": 0.6839, - "num_input_tokens_seen": 174956035, - "step": 8212 - }, - { - "epoch": 0.9875548608188541, - "grad_norm": 2.6814377921700676, - "learning_rate": 1.609161055644348e-09, - "loss": 0.7933, - "num_input_tokens_seen": 174971680, - "step": 8213 - }, - { - "epoch": 0.9876751037094932, - "grad_norm": 2.0702336996413226, - "learning_rate": 1.5780709784849467e-09, - "loss": 0.6672, - "num_input_tokens_seen": 174988420, - "step": 8214 - }, - { - "epoch": 0.9877953466001322, - "grad_norm": 2.091158107277124, - "learning_rate": 1.5472840603436565e-09, - "loss": 0.812, - "num_input_tokens_seen": 175005370, - "step": 8215 - }, - { - "epoch": 0.9879155894907714, - "grad_norm": 4.941459196577964, - "learning_rate": 1.5168003058900757e-09, - "loss": 0.7906, - "num_input_tokens_seen": 175023090, - "step": 8216 - }, - { - "epoch": 0.9880358323814105, - "grad_norm": 2.0764351282813474, - "learning_rate": 1.4866197197491715e-09, - "loss": 0.9155, - "num_input_tokens_seen": 175042170, - "step": 8217 - }, - { - "epoch": 0.9881560752720495, - "grad_norm": 4.152594873888854, - "learning_rate": 1.4567423064988371e-09, - "loss": 0.7841, - "num_input_tokens_seen": 175059240, - "step": 8218 - }, - { - "epoch": 0.9882763181626887, - "grad_norm": 2.4157356594834867, - "learning_rate": 1.4271680706718913e-09, - "loss": 0.7689, - "num_input_tokens_seen": 175076635, - "step": 8219 - }, - { - "epoch": 0.9883965610533277, - "grad_norm": 2.2641177734521936, - "learning_rate": 1.3978970167543013e-09, - "loss": 0.8187, - "num_input_tokens_seen": 175096535, - "step": 8220 - }, - { - "epoch": 0.9885168039439668, - "grad_norm": 3.0761114086101187, - "learning_rate": 1.3689291491867372e-09, - "loss": 0.7672, - "num_input_tokens_seen": 175114570, - "step": 8221 - }, - { - "epoch": 0.988637046834606, - "grad_norm": 2.188723533124547, - "learning_rate": 1.3402644723636836e-09, - "loss": 0.7336, - "num_input_tokens_seen": 175136320, - "step": 8222 - }, - { - "epoch": 0.988757289725245, - "grad_norm": 2.349230568079875, - "learning_rate": 1.311902990633218e-09, - "loss": 0.8308, - "num_input_tokens_seen": 175155005, - "step": 8223 - }, - { - "epoch": 0.9888775326158841, - "grad_norm": 1.769861787822553, - "learning_rate": 1.2838447082978987e-09, - "loss": 0.7063, - "num_input_tokens_seen": 175175880, - "step": 8224 - }, - { - "epoch": 0.9889977755065231, - "grad_norm": 3.293334393032793, - "learning_rate": 1.2560896296143208e-09, - "loss": 0.8243, - "num_input_tokens_seen": 175194065, - "step": 8225 - }, - { - "epoch": 0.9891180183971623, - "grad_norm": 2.7623080149193124, - "learning_rate": 1.2286377587926722e-09, - "loss": 0.8184, - "num_input_tokens_seen": 175210575, - "step": 8226 - }, - { - "epoch": 0.9892382612878013, - "grad_norm": 2.2055527264777157, - "learning_rate": 1.2014890999973992e-09, - "loss": 0.7467, - "num_input_tokens_seen": 175227215, - "step": 8227 - }, - { - "epoch": 0.9893585041784404, - "grad_norm": 1.7809228354404718, - "learning_rate": 1.1746436573472073e-09, - "loss": 0.7756, - "num_input_tokens_seen": 175248670, - "step": 8228 - }, - { - "epoch": 0.9894787470690796, - "grad_norm": 2.5171829428685375, - "learning_rate": 1.1481014349141726e-09, - "loss": 0.6869, - "num_input_tokens_seen": 175265610, - "step": 8229 - }, - { - "epoch": 0.9895989899597186, - "grad_norm": 2.383116077565131, - "learning_rate": 1.121862436724852e-09, - "loss": 0.8338, - "num_input_tokens_seen": 175284170, - "step": 8230 - }, - { - "epoch": 0.9897192328503577, - "grad_norm": 1.9637235341596146, - "learning_rate": 1.0959266667598388e-09, - "loss": 0.7032, - "num_input_tokens_seen": 175302705, - "step": 8231 - }, - { - "epoch": 0.9898394757409968, - "grad_norm": 2.7769046920525757, - "learning_rate": 1.0702941289533196e-09, - "loss": 0.7402, - "num_input_tokens_seen": 175321100, - "step": 8232 - }, - { - "epoch": 0.9899597186316359, - "grad_norm": 2.12520476244948, - "learning_rate": 1.0449648271939615e-09, - "loss": 0.8761, - "num_input_tokens_seen": 175337165, - "step": 8233 - }, - { - "epoch": 0.990079961522275, - "grad_norm": 1.627505953714871, - "learning_rate": 1.0199387653240243e-09, - "loss": 0.7257, - "num_input_tokens_seen": 175356575, - "step": 8234 - }, - { - "epoch": 0.9902002044129141, - "grad_norm": 1.7087624079813422, - "learning_rate": 9.952159471400267e-10, - "loss": 0.6997, - "num_input_tokens_seen": 175373335, - "step": 8235 - }, - { - "epoch": 0.9903204473035532, - "grad_norm": 1.9484826577151628, - "learning_rate": 9.707963763925241e-10, - "loss": 0.8267, - "num_input_tokens_seen": 175392105, - "step": 8236 - }, - { - "epoch": 0.9904406901941922, - "grad_norm": 2.0765197558588726, - "learning_rate": 9.466800567856648e-10, - "loss": 0.7802, - "num_input_tokens_seen": 175410425, - "step": 8237 - }, - { - "epoch": 0.9905609330848314, - "grad_norm": 2.3984590820603398, - "learning_rate": 9.228669919778553e-10, - "loss": 0.683, - "num_input_tokens_seen": 175429070, - "step": 8238 - }, - { - "epoch": 0.9906811759754705, - "grad_norm": 2.1221046028586144, - "learning_rate": 8.993571855817617e-10, - "loss": 0.7896, - "num_input_tokens_seen": 175447620, - "step": 8239 - }, - { - "epoch": 0.9908014188661095, - "grad_norm": 2.0415281793379165, - "learning_rate": 8.761506411638642e-10, - "loss": 0.7344, - "num_input_tokens_seen": 175466805, - "step": 8240 - }, - { - "epoch": 0.9909216617567487, - "grad_norm": 2.2644088759270757, - "learning_rate": 8.53247362244236e-10, - "loss": 0.7372, - "num_input_tokens_seen": 175485335, - "step": 8241 - }, - { - "epoch": 0.9910419046473877, - "grad_norm": 4.391329814701583, - "learning_rate": 8.306473522976532e-10, - "loss": 0.6787, - "num_input_tokens_seen": 175504460, - "step": 8242 - }, - { - "epoch": 0.9911621475380268, - "grad_norm": 2.2949354290347594, - "learning_rate": 8.083506147522623e-10, - "loss": 0.7135, - "num_input_tokens_seen": 175523575, - "step": 8243 - }, - { - "epoch": 0.991282390428666, - "grad_norm": 2.321622358009521, - "learning_rate": 7.863571529906909e-10, - "loss": 0.8519, - "num_input_tokens_seen": 175538880, - "step": 8244 - }, - { - "epoch": 0.991402633319305, - "grad_norm": 0.8000434019367024, - "learning_rate": 7.646669703489372e-10, - "loss": 0.6527, - "num_input_tokens_seen": 175602910, - "step": 8245 - }, - { - "epoch": 0.9915228762099441, - "grad_norm": 2.4792683851780657, - "learning_rate": 7.432800701177023e-10, - "loss": 0.578, - "num_input_tokens_seen": 175620630, - "step": 8246 - }, - { - "epoch": 0.9916431191005832, - "grad_norm": 0.8829869352441092, - "learning_rate": 7.221964555415017e-10, - "loss": 0.6037, - "num_input_tokens_seen": 175680010, - "step": 8247 - }, - { - "epoch": 0.9917633619912223, - "grad_norm": 3.7492249374019573, - "learning_rate": 7.01416129818222e-10, - "loss": 0.7428, - "num_input_tokens_seen": 175697350, - "step": 8248 - }, - { - "epoch": 0.9918836048818613, - "grad_norm": 2.415236024482971, - "learning_rate": 6.809390961006745e-10, - "loss": 0.5816, - "num_input_tokens_seen": 175717200, - "step": 8249 - }, - { - "epoch": 0.9920038477725005, - "grad_norm": 2.5684401977483278, - "learning_rate": 6.607653574948191e-10, - "loss": 0.6831, - "num_input_tokens_seen": 175737700, - "step": 8250 - }, - { - "epoch": 0.9921240906631396, - "grad_norm": 2.0867190167079914, - "learning_rate": 6.408949170613187e-10, - "loss": 0.8155, - "num_input_tokens_seen": 175756685, - "step": 8251 - }, - { - "epoch": 0.9922443335537786, - "grad_norm": 1.810709188480728, - "learning_rate": 6.213277778144288e-10, - "loss": 0.8132, - "num_input_tokens_seen": 175778050, - "step": 8252 - }, - { - "epoch": 0.9923645764444178, - "grad_norm": 2.040245725371067, - "learning_rate": 6.020639427224416e-10, - "loss": 0.6673, - "num_input_tokens_seen": 175795415, - "step": 8253 - }, - { - "epoch": 0.9924848193350568, - "grad_norm": 2.8215961746092946, - "learning_rate": 5.831034147076864e-10, - "loss": 0.7175, - "num_input_tokens_seen": 175812385, - "step": 8254 - }, - { - "epoch": 0.9926050622256959, - "grad_norm": 0.7216869926770196, - "learning_rate": 5.644461966463065e-10, - "loss": 0.575, - "num_input_tokens_seen": 175879715, - "step": 8255 - }, - { - "epoch": 0.9927253051163349, - "grad_norm": 1.9879932919293617, - "learning_rate": 5.460922913687049e-10, - "loss": 0.7509, - "num_input_tokens_seen": 175898525, - "step": 8256 - }, - { - "epoch": 0.9928455480069741, - "grad_norm": 2.3696605000102378, - "learning_rate": 5.280417016593208e-10, - "loss": 0.747, - "num_input_tokens_seen": 175918035, - "step": 8257 - }, - { - "epoch": 0.9929657908976132, - "grad_norm": 2.006487433174183, - "learning_rate": 5.102944302559642e-10, - "loss": 0.7475, - "num_input_tokens_seen": 175935250, - "step": 8258 - }, - { - "epoch": 0.9930860337882522, - "grad_norm": 2.091255332901187, - "learning_rate": 4.9285047985137e-10, - "loss": 0.7821, - "num_input_tokens_seen": 175954390, - "step": 8259 - }, - { - "epoch": 0.9932062766788914, - "grad_norm": 2.091468950178844, - "learning_rate": 4.757098530916436e-10, - "loss": 0.7424, - "num_input_tokens_seen": 175974555, - "step": 8260 - }, - { - "epoch": 0.9933265195695304, - "grad_norm": 3.5271336042439265, - "learning_rate": 4.5887255257670563e-10, - "loss": 0.7754, - "num_input_tokens_seen": 175991315, - "step": 8261 - }, - { - "epoch": 0.9934467624601695, - "grad_norm": 2.5085280957515375, - "learning_rate": 4.4233858086117906e-10, - "loss": 0.7603, - "num_input_tokens_seen": 176009560, - "step": 8262 - }, - { - "epoch": 0.9935670053508087, - "grad_norm": 2.7292772982230034, - "learning_rate": 4.261079404528356e-10, - "loss": 0.6809, - "num_input_tokens_seen": 176028760, - "step": 8263 - }, - { - "epoch": 0.9936872482414477, - "grad_norm": 2.4280748400474295, - "learning_rate": 4.1018063381437205e-10, - "loss": 0.6805, - "num_input_tokens_seen": 176048865, - "step": 8264 - }, - { - "epoch": 0.9938074911320868, - "grad_norm": 1.0118182579815589, - "learning_rate": 3.9455666336141167e-10, - "loss": 0.6633, - "num_input_tokens_seen": 176112365, - "step": 8265 - }, - { - "epoch": 0.9939277340227259, - "grad_norm": 3.054157089563749, - "learning_rate": 3.7923603146450267e-10, - "loss": 0.8114, - "num_input_tokens_seen": 176128145, - "step": 8266 - }, - { - "epoch": 0.994047976913365, - "grad_norm": 2.0958864446894894, - "learning_rate": 3.642187404473418e-10, - "loss": 0.8026, - "num_input_tokens_seen": 176146025, - "step": 8267 - }, - { - "epoch": 0.994168219804004, - "grad_norm": 2.2062681475225663, - "learning_rate": 3.495047925885508e-10, - "loss": 0.8478, - "num_input_tokens_seen": 176164080, - "step": 8268 - }, - { - "epoch": 0.9942884626946432, - "grad_norm": 2.4037161592757816, - "learning_rate": 3.350941901199e-10, - "loss": 0.832, - "num_input_tokens_seen": 176180720, - "step": 8269 - }, - { - "epoch": 0.9944087055852823, - "grad_norm": 2.8931073750826344, - "learning_rate": 3.2098693522764066e-10, - "loss": 0.8236, - "num_input_tokens_seen": 176193640, - "step": 8270 - }, - { - "epoch": 0.9945289484759213, - "grad_norm": 2.653038071448288, - "learning_rate": 3.071830300516165e-10, - "loss": 0.8127, - "num_input_tokens_seen": 176211190, - "step": 8271 - }, - { - "epoch": 0.9946491913665605, - "grad_norm": 2.675827430428048, - "learning_rate": 2.9368247668615234e-10, - "loss": 0.7031, - "num_input_tokens_seen": 176229500, - "step": 8272 - }, - { - "epoch": 0.9947694342571995, - "grad_norm": 4.744550491672129, - "learning_rate": 2.804852771789434e-10, - "loss": 0.6143, - "num_input_tokens_seen": 176242520, - "step": 8273 - }, - { - "epoch": 0.9948896771478386, - "grad_norm": 2.1101443353275293, - "learning_rate": 2.675914335321661e-10, - "loss": 0.5556, - "num_input_tokens_seen": 176260995, - "step": 8274 - }, - { - "epoch": 0.9950099200384778, - "grad_norm": 3.3603475336718596, - "learning_rate": 2.550009477018111e-10, - "loss": 0.7888, - "num_input_tokens_seen": 176279485, - "step": 8275 - }, - { - "epoch": 0.9951301629291168, - "grad_norm": 2.76719365977582, - "learning_rate": 2.4271382159790634e-10, - "loss": 0.6217, - "num_input_tokens_seen": 176296635, - "step": 8276 - }, - { - "epoch": 0.9952504058197559, - "grad_norm": 1.740750725358956, - "learning_rate": 2.3073005708429406e-10, - "loss": 0.855, - "num_input_tokens_seen": 176316000, - "step": 8277 - }, - { - "epoch": 0.995370648710395, - "grad_norm": 1.908443667053566, - "learning_rate": 2.190496559788535e-10, - "loss": 0.7113, - "num_input_tokens_seen": 176334005, - "step": 8278 - }, - { - "epoch": 0.9954908916010341, - "grad_norm": 3.2127900523222133, - "learning_rate": 2.0767262005372265e-10, - "loss": 0.7617, - "num_input_tokens_seen": 176351240, - "step": 8279 - }, - { - "epoch": 0.9956111344916732, - "grad_norm": 1.8994265271107622, - "learning_rate": 1.965989510346322e-10, - "loss": 0.7444, - "num_input_tokens_seen": 176370080, - "step": 8280 - }, - { - "epoch": 0.9957313773823123, - "grad_norm": 4.47759596491919, - "learning_rate": 1.8582865060134955e-10, - "loss": 0.6966, - "num_input_tokens_seen": 176387990, - "step": 8281 - }, - { - "epoch": 0.9958516202729514, - "grad_norm": 0.8166389173617845, - "learning_rate": 1.7536172038790098e-10, - "loss": 0.5902, - "num_input_tokens_seen": 176448020, - "step": 8282 - }, - { - "epoch": 0.9959718631635904, - "grad_norm": 2.2943255952352968, - "learning_rate": 1.651981619819054e-10, - "loss": 0.6954, - "num_input_tokens_seen": 176464890, - "step": 8283 - }, - { - "epoch": 0.9960921060542296, - "grad_norm": 2.5726579921884793, - "learning_rate": 1.5533797692546257e-10, - "loss": 0.6965, - "num_input_tokens_seen": 176483345, - "step": 8284 - }, - { - "epoch": 0.9962123489448687, - "grad_norm": 2.15599553412446, - "learning_rate": 1.4578116671404296e-10, - "loss": 0.8288, - "num_input_tokens_seen": 176501345, - "step": 8285 - }, - { - "epoch": 0.9963325918355077, - "grad_norm": 2.583962588903221, - "learning_rate": 1.3652773279759777e-10, - "loss": 0.7118, - "num_input_tokens_seen": 176517715, - "step": 8286 - }, - { - "epoch": 0.9964528347261468, - "grad_norm": 2.343758824603163, - "learning_rate": 1.2757767657989305e-10, - "loss": 0.6193, - "num_input_tokens_seen": 176541225, - "step": 8287 - }, - { - "epoch": 0.9965730776167859, - "grad_norm": 2.7563916119811016, - "learning_rate": 1.1893099941850948e-10, - "loss": 0.8599, - "num_input_tokens_seen": 176559840, - "step": 8288 - }, - { - "epoch": 0.996693320507425, - "grad_norm": 2.567794781252191, - "learning_rate": 1.105877026252866e-10, - "loss": 0.7654, - "num_input_tokens_seen": 176577890, - "step": 8289 - }, - { - "epoch": 0.996813563398064, - "grad_norm": 2.312729168030679, - "learning_rate": 1.0254778746565663e-10, - "loss": 0.7162, - "num_input_tokens_seen": 176592885, - "step": 8290 - }, - { - "epoch": 0.9969338062887032, - "grad_norm": 3.1137882306079594, - "learning_rate": 9.481125515953259e-11, - "loss": 0.726, - "num_input_tokens_seen": 176610665, - "step": 8291 - }, - { - "epoch": 0.9970540491793423, - "grad_norm": 3.103135461880461, - "learning_rate": 8.737810688064228e-11, - "loss": 0.7931, - "num_input_tokens_seen": 176630220, - "step": 8292 - }, - { - "epoch": 0.9971742920699813, - "grad_norm": 4.054154418615418, - "learning_rate": 8.024834375608414e-11, - "loss": 0.7865, - "num_input_tokens_seen": 176648530, - "step": 8293 - }, - { - "epoch": 0.9972945349606205, - "grad_norm": 0.8539698343127142, - "learning_rate": 7.342196686788149e-11, - "loss": 0.6648, - "num_input_tokens_seen": 176701415, - "step": 8294 - }, - { - "epoch": 0.9974147778512595, - "grad_norm": 4.617238943334054, - "learning_rate": 6.689897725142834e-11, - "loss": 0.6789, - "num_input_tokens_seen": 176720610, - "step": 8295 - }, - { - "epoch": 0.9975350207418986, - "grad_norm": 2.4320007278956117, - "learning_rate": 6.067937589615545e-11, - "loss": 0.8744, - "num_input_tokens_seen": 176738405, - "step": 8296 - }, - { - "epoch": 0.9976552636325378, - "grad_norm": 0.8037430908240526, - "learning_rate": 5.476316374575241e-11, - "loss": 0.5846, - "num_input_tokens_seen": 176801610, - "step": 8297 - }, - { - "epoch": 0.9977755065231768, - "grad_norm": 3.0512985585844783, - "learning_rate": 4.9150341697723476e-11, - "loss": 0.7263, - "num_input_tokens_seen": 176821220, - "step": 8298 - }, - { - "epoch": 0.9978957494138159, - "grad_norm": 1.6951311378268001, - "learning_rate": 4.384091060338768e-11, - "loss": 0.6563, - "num_input_tokens_seen": 176841410, - "step": 8299 - }, - { - "epoch": 0.998015992304455, - "grad_norm": 2.514276611531191, - "learning_rate": 3.883487126810081e-11, - "loss": 0.7359, - "num_input_tokens_seen": 176860390, - "step": 8300 - }, - { - "epoch": 0.9981362351950941, - "grad_norm": 2.1396989651389244, - "learning_rate": 3.41322244516995e-11, - "loss": 0.7934, - "num_input_tokens_seen": 176878055, - "step": 8301 - }, - { - "epoch": 0.9982564780857331, - "grad_norm": 1.7317634566852855, - "learning_rate": 2.9732970866946925e-11, - "loss": 0.6223, - "num_input_tokens_seen": 176897655, - "step": 8302 - }, - { - "epoch": 0.9983767209763723, - "grad_norm": 3.3719932002171005, - "learning_rate": 2.563711118175327e-11, - "loss": 0.7808, - "num_input_tokens_seen": 176914260, - "step": 8303 - }, - { - "epoch": 0.9984969638670114, - "grad_norm": 1.8855250152951066, - "learning_rate": 2.184464601717728e-11, - "loss": 0.8316, - "num_input_tokens_seen": 176932295, - "step": 8304 - }, - { - "epoch": 0.9986172067576504, - "grad_norm": 2.6088639848298802, - "learning_rate": 1.8355575948758585e-11, - "loss": 0.771, - "num_input_tokens_seen": 176950000, - "step": 8305 - }, - { - "epoch": 0.9987374496482896, - "grad_norm": 2.4449209720783576, - "learning_rate": 1.5169901505407424e-11, - "loss": 0.7396, - "num_input_tokens_seen": 176966785, - "step": 8306 - }, - { - "epoch": 0.9988576925389286, - "grad_norm": 1.8933684917611164, - "learning_rate": 1.228762317073695e-11, - "loss": 0.7304, - "num_input_tokens_seen": 176985335, - "step": 8307 - }, - { - "epoch": 0.9989779354295677, - "grad_norm": 2.2195442228715208, - "learning_rate": 9.70874138195299e-12, - "loss": 0.781, - "num_input_tokens_seen": 177006965, - "step": 8308 - }, - { - "epoch": 0.9990981783202069, - "grad_norm": 2.4100321606138677, - "learning_rate": 7.433256530076093e-12, - "loss": 0.7375, - "num_input_tokens_seen": 177026640, - "step": 8309 - }, - { - "epoch": 0.9992184212108459, - "grad_norm": 2.3830769448293965, - "learning_rate": 5.46116896038562e-12, - "loss": 0.742, - "num_input_tokens_seen": 177040770, - "step": 8310 - }, - { - "epoch": 0.999338664101485, - "grad_norm": 2.2288285164145263, - "learning_rate": 3.792478972197699e-12, - "loss": 0.6167, - "num_input_tokens_seen": 177061075, - "step": 8311 - }, - { - "epoch": 0.9994589069921241, - "grad_norm": 3.4773882566335352, - "learning_rate": 2.4271868181990895e-12, - "loss": 0.6946, - "num_input_tokens_seen": 177077960, - "step": 8312 - }, - { - "epoch": 0.9995791498827632, - "grad_norm": 2.313151740493287, - "learning_rate": 1.3652927060014973e-12, - "loss": 0.796, - "num_input_tokens_seen": 177093275, - "step": 8313 - }, - { - "epoch": 0.9996993927734023, - "grad_norm": 3.5257222933850607, - "learning_rate": 6.067967965872612e-13, - "loss": 0.6378, - "num_input_tokens_seen": 177112605, - "step": 8314 - }, - { - "epoch": 0.9998196356640414, - "grad_norm": 1.6849569618192852, - "learning_rate": 1.5169920497548615e-13, - "loss": 0.7645, - "num_input_tokens_seen": 177136945, - "step": 8315 - }, - { - "epoch": 0.9999398785546805, - "grad_norm": 1.1960316289490573, - "learning_rate": 0.0, - "loss": 0.5808, - "num_input_tokens_seen": 177185545, - "step": 8316 - }, - { - "epoch": 0.9999398785546805, - "num_input_tokens_seen": 177185545, - "step": 8316, - "total_flos": 6.901864404405453e+17, - "train_loss": 0.7678336634006335, - "train_runtime": 60017.2753, - "train_samples_per_second": 5.543, - "train_steps_per_second": 0.139 - } - ], - "logging_steps": 1.0, - "max_steps": 8316, - "num_input_tokens_seen": 177185545, - "num_train_epochs": 1, - "save_steps": 832, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": true - }, - "attributes": {} - } - }, - "total_flos": 6.901864404405453e+17, - "train_batch_size": 5, - "trial_name": null, - "trial_params": null -} diff --git a/sft/hyperrouter/training_args.bin b/sft/hyperrouter/training_args.bin deleted file mode 100644 index 0c2b00786f8549360488645f4433aaf0bd1ad624..0000000000000000000000000000000000000000 --- a/sft/hyperrouter/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c09427a177e86daca87c380071eb69bb649199ac37ec8b4066e07e5d8e69e1bb -size 7352 diff --git a/sft/smoe/added_tokens.json b/sft/smoe/added_tokens.json deleted file mode 100644 index c9d3d3a1b74d87e381e471f7b33784015d2dc0ea..0000000000000000000000000000000000000000 --- a/sft/smoe/added_tokens.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "<|assistant|>": 32001, - "<|endoftext|>": 32000, - "<|end|>": 32007, - "<|placeholder1|>": 32002, - "<|placeholder2|>": 32003, - "<|placeholder3|>": 32004, - "<|placeholder4|>": 32005, - "<|placeholder5|>": 32008, - "<|placeholder6|>": 32009, - "<|system|>": 32006, - "<|user|>": 32010 -} diff --git a/sft/smoe/config.json b/sft/smoe/config.json deleted file mode 100644 index 5a927b3aac26cbe4aba220e62b8c5ccb98517c28..0000000000000000000000000000000000000000 --- a/sft/smoe/config.json +++ /dev/null @@ -1,66 +0,0 @@ -{ - "_name_or_path": "/cm/archive/thongdt4/toolkitmoe/checkpoints/phi3mini-siglip224/pft", - "architectures": [ - "LlavaPhiForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" - }, - "balance_loss_coef": 0.1, - "bos_token_id": 1, - "clip_smoe": true, - "dropout": false, - "embd_pdrop": 0.0, - "eos_token_id": 32000, - "freeze_mm_mlp_adapter": false, - "hidden_act": "silu", - "hidden_size": 3072, - "image_aspect_ratio": "pad", - "initializer_range": 0.02, - "intermediate_size": 8192, - "local_rank": 0, - "max_position_embeddings": 4096, - "mlp_smoe": true, - "mm_hidden_size": 1152, - "mm_patch_merge_type": "flat", - "mm_projector_lr": null, - "mm_projector_type": "moe", - "mm_use_im_patch_token": false, - "mm_use_im_start_end": false, - "mm_vision_select_feature": "patch", - "mm_vision_select_layer": -2, - "mm_vision_tower": "google/siglip-so400m-patch14-224", - "model_type": "llava_phi", - "moe_name": "smoe", - "num_attention_heads": 32, - "num_experts": 4, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "num_layers": 3, - "num_selected": 2, - "original_max_position_embeddings": 4096, - "pad_token_id": 32000, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "router_z_loss_coef": 0.01, - "scales": [ - 1, - 3 - ], - "sliding_window": 2047, - "tie_word_embeddings": false, - "tokenizer_model_max_length": 2048, - "tokenizer_padding_side": "right", - "torch_dtype": "bfloat16", - "training": true, - "transformers_version": "4.43.2", - "tune_mm_mlp_adapter": false, - "use_cache": true, - "use_mm_proj": true, - "vocab_size": 32064 -} diff --git a/sft/smoe/generation_config.json b/sft/smoe/generation_config.json deleted file mode 100644 index 3a20824ea777f1ebd11da590160a7209fe3b62c6..0000000000000000000000000000000000000000 --- a/sft/smoe/generation_config.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "_from_model_config": true, - "bos_token_id": 1, - "do_sample": true, - "eos_token_id": [ - 32000, - 32001, - 32007 - ], - "pad_token_id": 32000, - "transformers_version": "4.43.2" -} diff --git a/sft/smoe/model-00001-of-00003.safetensors b/sft/smoe/model-00001-of-00003.safetensors deleted file mode 100644 index 97cf974e7dc90a95e1206771df2ea4019386466c..0000000000000000000000000000000000000000 --- a/sft/smoe/model-00001-of-00003.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:69feaff0e4b709ada48a0f2e32927ef45315c40a21da51238da2c77e5ed7a093 -size 4972489328 diff --git a/sft/smoe/model-00002-of-00003.safetensors b/sft/smoe/model-00002-of-00003.safetensors deleted file mode 100644 index 6e1bc3b16bac25e3beb20f88b2d7c87fdd178b6b..0000000000000000000000000000000000000000 --- a/sft/smoe/model-00002-of-00003.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:10e2c7781c5462f9ee1bc3167c625026078097aa3b5151085a195e18a6302acd -size 4985529648 diff --git a/sft/smoe/model-00003-of-00003.safetensors b/sft/smoe/model-00003-of-00003.safetensors deleted file mode 100644 index 6f11883917b12b23f2313c17af7aa3ae15b1b917..0000000000000000000000000000000000000000 --- a/sft/smoe/model-00003-of-00003.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e4138152dfc9641daf414984226e43a0d470aab2c4bab6bf7d9888918cdeee2e -size 248943552 diff --git a/sft/smoe/model.safetensors.index.json b/sft/smoe/model.safetensors.index.json deleted file mode 100644 index aa54419fc0a3eab502aa7c4ad974dca52ed10803..0000000000000000000000000000000000000000 --- a/sft/smoe/model.safetensors.index.json +++ /dev/null @@ -1,1005 +0,0 @@ -{ - "metadata": { - "total_size": 10206819456 - }, - "weight_map": { - "lm_head.weight": "model-00003-of-00003.safetensors", - "model.embed_tokens.weight": "model-00001-of-00003.safetensors", - "model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.0.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.0.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.1.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.1.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.10.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.10.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.10.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.10.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.11.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.11.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.11.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.11.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.12.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.12.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.12.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.12.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.13.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.13.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.13.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.13.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.14.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.14.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.14.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.14.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.15.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.15.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.15.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.15.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.16.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.16.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.16.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.16.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.17.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.17.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.17.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.17.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.18.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.18.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.18.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.18.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.19.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.19.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.19.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.19.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.2.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.2.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.20.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.20.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.20.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.20.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.20.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.21.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.21.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.21.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.22.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.22.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.22.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.22.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.23.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.23.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.23.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.23.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.24.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.24.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.24.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.24.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.25.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.25.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.25.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.25.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.26.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.26.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.26.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.26.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.27.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.27.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.27.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.27.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.28.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.28.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.28.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.28.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.28.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.28.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.29.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.29.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.29.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.29.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.29.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.29.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.3.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.3.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.30.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.30.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.30.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.30.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.30.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.30.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.31.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.31.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.31.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.31.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.31.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.31.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.4.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.4.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.5.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.5.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.6.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.6.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.7.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.7.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.8.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.8.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.9.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.9.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.mm_projector.moelayer.experts.0.0.bias": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.0.0.weight": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.0.2.bias": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.0.2.weight": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.1.0.bias": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.1.0.weight": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.1.2.bias": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.1.2.weight": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.2.0.bias": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.2.0.weight": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.2.2.bias": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.2.2.weight": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.3.0.bias": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.3.0.weight": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.3.2.bias": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.3.2.weight": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.gate.weight": "model-00003-of-00003.safetensors", - "model.norm.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.embeddings.patch_embedding.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.embeddings.patch_embedding.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.embeddings.position_embedding.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00002-of-00003.safetensors" - } -} diff --git a/sft/smoe/special_tokens_map.json b/sft/smoe/special_tokens_map.json deleted file mode 100644 index 3e4d5a5bc1cb51753cc9ae0305ece0da60052b10..0000000000000000000000000000000000000000 --- a/sft/smoe/special_tokens_map.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "bos_token": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - }, - "eos_token": { - "content": "<|endoftext|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - }, - "pad_token": "", - "unk_token": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - } -} diff --git a/sft/smoe/tokenizer.model b/sft/smoe/tokenizer.model deleted file mode 100644 index 6c00c742ce03c627d6cd5b795984876fa49fa899..0000000000000000000000000000000000000000 --- a/sft/smoe/tokenizer.model +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 -size 499723 diff --git a/sft/smoe/tokenizer_config.json b/sft/smoe/tokenizer_config.json deleted file mode 100644 index 3bd56c6314b14d6a33a69cd1802e04dbc1e47840..0000000000000000000000000000000000000000 --- a/sft/smoe/tokenizer_config.json +++ /dev/null @@ -1,132 +0,0 @@ -{ - "add_bos_token": true, - "add_eos_token": false, - "add_prefix_space": true, - "added_tokens_decoder": { - "0": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "1": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "2": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": false - }, - "32000": { - "content": "<|endoftext|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "32001": { - "content": "<|assistant|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32002": { - "content": "<|placeholder1|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32003": { - "content": "<|placeholder2|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32004": { - "content": "<|placeholder3|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32005": { - "content": "<|placeholder4|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32006": { - "content": "<|system|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32007": { - "content": "<|end|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32008": { - "content": "<|placeholder5|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32009": { - "content": "<|placeholder6|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32010": { - "content": "<|user|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - } - }, - "bos_token": "", - "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}", - "clean_up_tokenization_spaces": false, - "eos_token": "<|endoftext|>", - "legacy": false, - "model_max_length": 2048, - "pad_token": "", - "padding_side": "right", - "sp_model_kwargs": {}, - "spaces_between_special_tokens": false, - "tokenizer_class": "LlamaTokenizer", - "unk_token": "", - "use_default_system_prompt": false -} diff --git a/sft/smoe/trainer_state.json b/sft/smoe/trainer_state.json deleted file mode 100644 index f2b7a779ddd50d01bb3f842f8045a76b3be9747e..0000000000000000000000000000000000000000 --- a/sft/smoe/trainer_state.json +++ /dev/null @@ -1,58254 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.9999398785546805, - "eval_steps": 500, - "global_step": 8316, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.00012024289063909097, - "grad_norm": 17.609243289214618, - "learning_rate": 0.0, - "loss": 1.9307, - "step": 1 - }, - { - "epoch": 0.00024048578127818193, - "grad_norm": 19.208107285676192, - "learning_rate": 5.021476677069823e-07, - "loss": 1.8918, - "step": 2 - }, - { - "epoch": 0.0003607286719172729, - "grad_norm": 13.850924662767893, - "learning_rate": 7.958852231401551e-07, - "loss": 1.7577, - "step": 3 - }, - { - "epoch": 0.00048097156255636386, - "grad_norm": 13.833681462208517, - "learning_rate": 1.0042953354139647e-06, - "loss": 1.8249, - "step": 4 - }, - { - "epoch": 0.0006012144531954548, - "grad_norm": 15.953775390757825, - "learning_rate": 1.1659507774310057e-06, - "loss": 1.898, - "step": 5 - }, - { - "epoch": 0.0007214573438345458, - "grad_norm": 16.244578234644454, - "learning_rate": 1.2980328908471373e-06, - "loss": 1.7571, - "step": 6 - }, - { - "epoch": 0.0008417002344736367, - "grad_norm": 8.811005161199464, - "learning_rate": 1.4097067265369432e-06, - "loss": 1.2999, - "step": 7 - }, - { - "epoch": 0.0009619431251127277, - "grad_norm": 16.577148408735273, - "learning_rate": 1.506443003120947e-06, - "loss": 1.7049, - "step": 8 - }, - { - "epoch": 0.0010821860157518186, - "grad_norm": 9.843884857707188, - "learning_rate": 1.5917704462803102e-06, - "loss": 1.8102, - "step": 9 - }, - { - "epoch": 0.0012024289063909096, - "grad_norm": 11.020876061705824, - "learning_rate": 1.6680984451379884e-06, - "loss": 1.8447, - "step": 10 - }, - { - "epoch": 0.0013226717970300007, - "grad_norm": 8.509569951632031, - "learning_rate": 1.7371455188905097e-06, - "loss": 1.6365, - "step": 11 - }, - { - "epoch": 0.0014429146876690916, - "grad_norm": 7.559538458404822, - "learning_rate": 1.8001805585541196e-06, - "loss": 1.6239, - "step": 12 - }, - { - "epoch": 0.0015631575783081825, - "grad_norm": 6.93614994693928, - "learning_rate": 1.8581671739548328e-06, - "loss": 1.6679, - "step": 13 - }, - { - "epoch": 0.0016834004689472734, - "grad_norm": 6.633344756968005, - "learning_rate": 1.9118543942439254e-06, - "loss": 1.5154, - "step": 14 - }, - { - "epoch": 0.0018036433595863645, - "grad_norm": 6.394248774400009, - "learning_rate": 1.961836000571161e-06, - "loss": 1.5176, - "step": 15 - }, - { - "epoch": 0.0019238862502254555, - "grad_norm": 6.610155857574448, - "learning_rate": 2.0085906708279293e-06, - "loss": 1.0882, - "step": 16 - }, - { - "epoch": 0.0020441291408645466, - "grad_norm": 5.805847955587404, - "learning_rate": 2.0525099325728135e-06, - "loss": 1.5513, - "step": 17 - }, - { - "epoch": 0.0021643720315036373, - "grad_norm": 6.025098123978103, - "learning_rate": 2.0939181139872922e-06, - "loss": 1.1497, - "step": 18 - }, - { - "epoch": 0.0022846149221427284, - "grad_norm": 5.829483323929712, - "learning_rate": 2.1330868934640175e-06, - "loss": 1.3919, - "step": 19 - }, - { - "epoch": 0.002404857812781819, - "grad_norm": 5.018746708813036, - "learning_rate": 2.170246112844971e-06, - "loss": 1.1702, - "step": 20 - }, - { - "epoch": 0.0025251007034209102, - "grad_norm": 4.914543383043674, - "learning_rate": 2.2055919496770983e-06, - "loss": 1.3851, - "step": 21 - }, - { - "epoch": 0.0026453435940600014, - "grad_norm": 4.483936673849529, - "learning_rate": 2.2392931865974923e-06, - "loss": 1.2649, - "step": 22 - }, - { - "epoch": 0.002765586484699092, - "grad_norm": 4.231657951218799, - "learning_rate": 2.271496085962064e-06, - "loss": 1.367, - "step": 23 - }, - { - "epoch": 0.002885829375338183, - "grad_norm": 5.010121971975559, - "learning_rate": 2.3023282262611022e-06, - "loss": 1.3863, - "step": 24 - }, - { - "epoch": 0.003006072265977274, - "grad_norm": 5.657264913478025, - "learning_rate": 2.3319015548620114e-06, - "loss": 1.2734, - "step": 25 - }, - { - "epoch": 0.003126315156616365, - "grad_norm": 3.944202108207152, - "learning_rate": 2.3603148416618152e-06, - "loss": 1.2699, - "step": 26 - }, - { - "epoch": 0.003246558047255456, - "grad_norm": 4.9652686248671225, - "learning_rate": 2.3876556694204647e-06, - "loss": 1.3434, - "step": 27 - }, - { - "epoch": 0.003366800937894547, - "grad_norm": 3.503733142672055, - "learning_rate": 2.414002061950908e-06, - "loss": 1.2295, - "step": 28 - }, - { - "epoch": 0.003487043828533638, - "grad_norm": 3.2660285663838806, - "learning_rate": 2.4394238264681557e-06, - "loss": 1.3241, - "step": 29 - }, - { - "epoch": 0.003607286719172729, - "grad_norm": 2.945949981264831, - "learning_rate": 2.4639836682781433e-06, - "loss": 1.3146, - "step": 30 - }, - { - "epoch": 0.00372752960981182, - "grad_norm": 3.2656994678822433, - "learning_rate": 2.487738122623307e-06, - "loss": 1.3157, - "step": 31 - }, - { - "epoch": 0.003847772500450911, - "grad_norm": 2.9652882520464363, - "learning_rate": 2.510738338534912e-06, - "loss": 1.2776, - "step": 32 - }, - { - "epoch": 0.003968015391090002, - "grad_norm": 2.8898509045852907, - "learning_rate": 2.5330307420306648e-06, - "loss": 1.3101, - "step": 33 - }, - { - "epoch": 0.004088258281729093, - "grad_norm": 2.170097117382273, - "learning_rate": 2.554657600279796e-06, - "loss": 1.1518, - "step": 34 - }, - { - "epoch": 0.004208501172368184, - "grad_norm": 2.760675084715759, - "learning_rate": 2.5756575039679493e-06, - "loss": 1.3154, - "step": 35 - }, - { - "epoch": 0.0043287440630072746, - "grad_norm": 2.1187811340025027, - "learning_rate": 2.5960657816942747e-06, - "loss": 1.2249, - "step": 36 - }, - { - "epoch": 0.004448986953646365, - "grad_norm": 2.9331285130633993, - "learning_rate": 2.6159148575788668e-06, - "loss": 0.9605, - "step": 37 - }, - { - "epoch": 0.004569229844285457, - "grad_norm": 3.805024619705815, - "learning_rate": 2.635234561171e-06, - "loss": 1.2614, - "step": 38 - }, - { - "epoch": 0.0046894727349245475, - "grad_norm": 2.329117429027455, - "learning_rate": 2.6540523970949877e-06, - "loss": 1.2121, - "step": 39 - }, - { - "epoch": 0.004809715625563638, - "grad_norm": 2.574402867680595, - "learning_rate": 2.6723937805519533e-06, - "loss": 1.1976, - "step": 40 - }, - { - "epoch": 0.00492995851620273, - "grad_norm": 2.1996843241579107, - "learning_rate": 2.690282243737839e-06, - "loss": 1.2071, - "step": 41 - }, - { - "epoch": 0.0050502014068418205, - "grad_norm": 2.485404898740073, - "learning_rate": 2.7077396173840807e-06, - "loss": 1.2557, - "step": 42 - }, - { - "epoch": 0.005170444297480911, - "grad_norm": 2.251618227245878, - "learning_rate": 2.7247861909342594e-06, - "loss": 1.1963, - "step": 43 - }, - { - "epoch": 0.005290687188120003, - "grad_norm": 2.3977627379733644, - "learning_rate": 2.7414408543044743e-06, - "loss": 1.0872, - "step": 44 - }, - { - "epoch": 0.005410930078759093, - "grad_norm": 3.0054191281828504, - "learning_rate": 2.7577212237113157e-06, - "loss": 1.0598, - "step": 45 - }, - { - "epoch": 0.005531172969398184, - "grad_norm": 1.9301185280964537, - "learning_rate": 2.7736437536690466e-06, - "loss": 1.3063, - "step": 46 - }, - { - "epoch": 0.005651415860037276, - "grad_norm": 1.908319381873871, - "learning_rate": 2.789223836941131e-06, - "loss": 1.3395, - "step": 47 - }, - { - "epoch": 0.005771658750676366, - "grad_norm": 2.9571647664292477, - "learning_rate": 2.8044758939680847e-06, - "loss": 1.3334, - "step": 48 - }, - { - "epoch": 0.005891901641315457, - "grad_norm": 2.4943300801062698, - "learning_rate": 2.8194134530738863e-06, - "loss": 1.2689, - "step": 49 - }, - { - "epoch": 0.006012144531954548, - "grad_norm": 2.8719524319144285, - "learning_rate": 2.834049222568994e-06, - "loss": 1.1557, - "step": 50 - }, - { - "epoch": 0.006132387422593639, - "grad_norm": 1.939364305442756, - "learning_rate": 2.848395155712969e-06, - "loss": 1.1894, - "step": 51 - }, - { - "epoch": 0.00625263031323273, - "grad_norm": 2.092270741362975, - "learning_rate": 2.8624625093687977e-06, - "loss": 1.2448, - "step": 52 - }, - { - "epoch": 0.006372873203871821, - "grad_norm": 2.0515456164813655, - "learning_rate": 2.876261897070029e-06, - "loss": 1.1407, - "step": 53 - }, - { - "epoch": 0.006493116094510912, - "grad_norm": 44.10842160455657, - "learning_rate": 2.889803337127447e-06, - "loss": 1.1795, - "step": 54 - }, - { - "epoch": 0.006613358985150003, - "grad_norm": 2.684373856034662, - "learning_rate": 2.903096296321516e-06, - "loss": 1.1082, - "step": 55 - }, - { - "epoch": 0.006733601875789094, - "grad_norm": 1.827795863070128, - "learning_rate": 2.9161497296578907e-06, - "loss": 1.1781, - "step": 56 - }, - { - "epoch": 0.006853844766428185, - "grad_norm": 2.2293497408501897, - "learning_rate": 2.928972116604173e-06, - "loss": 1.1145, - "step": 57 - }, - { - "epoch": 0.006974087657067276, - "grad_norm": 2.1454155771451897, - "learning_rate": 2.9415714941751377e-06, - "loss": 1.2637, - "step": 58 - }, - { - "epoch": 0.007094330547706367, - "grad_norm": 1.9598817452938044, - "learning_rate": 2.9539554871897396e-06, - "loss": 1.1911, - "step": 59 - }, - { - "epoch": 0.007214573438345458, - "grad_norm": 1.9171418001904172, - "learning_rate": 2.9661313359851253e-06, - "loss": 1.2293, - "step": 60 - }, - { - "epoch": 0.007334816328984549, - "grad_norm": 1.9408997272138075, - "learning_rate": 2.978105921839922e-06, - "loss": 1.2041, - "step": 61 - }, - { - "epoch": 0.00745505921962364, - "grad_norm": 2.7097246166665987, - "learning_rate": 2.9898857903302893e-06, - "loss": 0.998, - "step": 62 - }, - { - "epoch": 0.007575302110262731, - "grad_norm": 2.796367151622545, - "learning_rate": 3.001477172817253e-06, - "loss": 1.1388, - "step": 63 - }, - { - "epoch": 0.007695545000901822, - "grad_norm": 2.275861149546558, - "learning_rate": 3.012886006241894e-06, - "loss": 1.2207, - "step": 64 - }, - { - "epoch": 0.007815787891540913, - "grad_norm": 2.009434857225924, - "learning_rate": 3.0241179513858383e-06, - "loss": 1.1535, - "step": 65 - }, - { - "epoch": 0.007936030782180003, - "grad_norm": 2.1318701594109606, - "learning_rate": 3.035178409737647e-06, - "loss": 1.1369, - "step": 66 - }, - { - "epoch": 0.008056273672819095, - "grad_norm": 2.1700658055185245, - "learning_rate": 3.046072539090907e-06, - "loss": 1.1393, - "step": 67 - }, - { - "epoch": 0.008176516563458186, - "grad_norm": 2.438271131558677, - "learning_rate": 3.056805267986779e-06, - "loss": 1.2966, - "step": 68 - }, - { - "epoch": 0.008296759454097276, - "grad_norm": 2.108669473843792, - "learning_rate": 3.0673813091022194e-06, - "loss": 1.1999, - "step": 69 - }, - { - "epoch": 0.008417002344736368, - "grad_norm": 2.1694731020801834, - "learning_rate": 3.0778051716749317e-06, - "loss": 0.9424, - "step": 70 - }, - { - "epoch": 0.008537245235375458, - "grad_norm": 1.9905463545997066, - "learning_rate": 3.0880811730470094e-06, - "loss": 1.1642, - "step": 71 - }, - { - "epoch": 0.008657488126014549, - "grad_norm": 1.725377431296128, - "learning_rate": 3.098213449401257e-06, - "loss": 0.8838, - "step": 72 - }, - { - "epoch": 0.00877773101665364, - "grad_norm": 2.4810746061277307, - "learning_rate": 3.1082059657570015e-06, - "loss": 1.2326, - "step": 73 - }, - { - "epoch": 0.00889797390729273, - "grad_norm": 2.039400503856615, - "learning_rate": 3.1180625252858496e-06, - "loss": 1.2108, - "step": 74 - }, - { - "epoch": 0.009018216797931822, - "grad_norm": 2.7228064256186815, - "learning_rate": 3.1277867780021663e-06, - "loss": 1.0565, - "step": 75 - }, - { - "epoch": 0.009138459688570914, - "grad_norm": 2.578841508655372, - "learning_rate": 3.1373822288779824e-06, - "loss": 1.2015, - "step": 76 - }, - { - "epoch": 0.009258702579210003, - "grad_norm": 1.8897178932850751, - "learning_rate": 3.1468522454274533e-06, - "loss": 1.0402, - "step": 77 - }, - { - "epoch": 0.009378945469849095, - "grad_norm": 1.7904181529035903, - "learning_rate": 3.15620006480197e-06, - "loss": 1.1622, - "step": 78 - }, - { - "epoch": 0.009499188360488187, - "grad_norm": 2.4675722813865426, - "learning_rate": 3.1654288004333087e-06, - "loss": 0.9966, - "step": 79 - }, - { - "epoch": 0.009619431251127276, - "grad_norm": 2.226233742519093, - "learning_rate": 3.1745414482589353e-06, - "loss": 0.9996, - "step": 80 - }, - { - "epoch": 0.009739674141766368, - "grad_norm": 2.0784758078108068, - "learning_rate": 3.1835408925606204e-06, - "loss": 1.111, - "step": 81 - }, - { - "epoch": 0.00985991703240546, - "grad_norm": 3.4773533982310405, - "learning_rate": 3.1924299114448214e-06, - "loss": 1.1301, - "step": 82 - }, - { - "epoch": 0.00998015992304455, - "grad_norm": 2.7331568156985098, - "learning_rate": 3.2012111819909055e-06, - "loss": 1.0894, - "step": 83 - }, - { - "epoch": 0.010100402813683641, - "grad_norm": 1.8415452738781086, - "learning_rate": 3.2098872850910627e-06, - "loss": 1.1886, - "step": 84 - }, - { - "epoch": 0.010220645704322733, - "grad_norm": 1.8543890773237575, - "learning_rate": 3.2184607100038194e-06, - "loss": 1.1356, - "step": 85 - }, - { - "epoch": 0.010340888594961822, - "grad_norm": 2.049713961486718, - "learning_rate": 3.2269338586412414e-06, - "loss": 1.1738, - "step": 86 - }, - { - "epoch": 0.010461131485600914, - "grad_norm": 2.149754678791419, - "learning_rate": 3.2353090496083106e-06, - "loss": 1.2108, - "step": 87 - }, - { - "epoch": 0.010581374376240005, - "grad_norm": 1.803480264714883, - "learning_rate": 3.2435885220114572e-06, - "loss": 1.0635, - "step": 88 - }, - { - "epoch": 0.010701617266879095, - "grad_norm": 1.950072260275819, - "learning_rate": 3.2517744390519113e-06, - "loss": 1.1792, - "step": 89 - }, - { - "epoch": 0.010821860157518187, - "grad_norm": 2.68984359754506, - "learning_rate": 3.259868891418298e-06, - "loss": 0.9877, - "step": 90 - }, - { - "epoch": 0.010942103048157278, - "grad_norm": 1.7599696507871647, - "learning_rate": 3.2678739004917757e-06, - "loss": 1.0839, - "step": 91 - }, - { - "epoch": 0.011062345938796368, - "grad_norm": 1.6257520549303044, - "learning_rate": 3.275791421376029e-06, - "loss": 1.1589, - "step": 92 - }, - { - "epoch": 0.01118258882943546, - "grad_norm": 1.8633734402352224, - "learning_rate": 3.2836233457634622e-06, - "loss": 1.1948, - "step": 93 - }, - { - "epoch": 0.011302831720074551, - "grad_norm": 1.8967386187273159, - "learning_rate": 3.2913715046481135e-06, - "loss": 1.0932, - "step": 94 - }, - { - "epoch": 0.011423074610713641, - "grad_norm": 2.311460312826287, - "learning_rate": 3.299037670895023e-06, - "loss": 1.1306, - "step": 95 - }, - { - "epoch": 0.011543317501352733, - "grad_norm": 1.726598885959746, - "learning_rate": 3.3066235616750667e-06, - "loss": 1.0428, - "step": 96 - }, - { - "epoch": 0.011663560391991824, - "grad_norm": 2.0599759002312847, - "learning_rate": 3.3141308407736276e-06, - "loss": 1.15, - "step": 97 - }, - { - "epoch": 0.011783803282630914, - "grad_norm": 1.8478759247920038, - "learning_rate": 3.321561120780869e-06, - "loss": 1.0982, - "step": 98 - }, - { - "epoch": 0.011904046173270006, - "grad_norm": 2.1961724711299926, - "learning_rate": 3.3289159651708192e-06, - "loss": 1.2567, - "step": 99 - }, - { - "epoch": 0.012024289063909096, - "grad_norm": 1.8693106483292914, - "learning_rate": 3.3361968902759768e-06, - "loss": 1.212, - "step": 100 - }, - { - "epoch": 0.012144531954548187, - "grad_norm": 2.671528933237574, - "learning_rate": 3.343405367163663e-06, - "loss": 1.1857, - "step": 101 - }, - { - "epoch": 0.012264774845187279, - "grad_norm": 2.666459065542104, - "learning_rate": 3.350542823419951e-06, - "loss": 1.0472, - "step": 102 - }, - { - "epoch": 0.012385017735826368, - "grad_norm": 3.2453912188221636, - "learning_rate": 3.3576106448465615e-06, - "loss": 1.1273, - "step": 103 - }, - { - "epoch": 0.01250526062646546, - "grad_norm": 2.200395448863266, - "learning_rate": 3.3646101770757797e-06, - "loss": 1.1158, - "step": 104 - }, - { - "epoch": 0.012625503517104552, - "grad_norm": 1.804123166846866, - "learning_rate": 3.371542727108104e-06, - "loss": 1.0884, - "step": 105 - }, - { - "epoch": 0.012745746407743641, - "grad_norm": 2.522690228339417, - "learning_rate": 3.3784095647770114e-06, - "loss": 1.1445, - "step": 106 - }, - { - "epoch": 0.012865989298382733, - "grad_norm": 2.092300368691051, - "learning_rate": 3.3852119241449547e-06, - "loss": 1.1251, - "step": 107 - }, - { - "epoch": 0.012986232189021825, - "grad_norm": 2.3072022247774613, - "learning_rate": 3.3919510048344295e-06, - "loss": 1.2013, - "step": 108 - }, - { - "epoch": 0.013106475079660914, - "grad_norm": 1.8568665710278454, - "learning_rate": 3.3986279732976907e-06, - "loss": 1.1038, - "step": 109 - }, - { - "epoch": 0.013226717970300006, - "grad_norm": 2.2324095931762438, - "learning_rate": 3.4052439640284983e-06, - "loss": 1.1911, - "step": 110 - }, - { - "epoch": 0.013346960860939098, - "grad_norm": 1.989569885314976, - "learning_rate": 3.4118000807190217e-06, - "loss": 1.0528, - "step": 111 - }, - { - "epoch": 0.013467203751578187, - "grad_norm": 2.1482109921435457, - "learning_rate": 3.4182973973648723e-06, - "loss": 1.0027, - "step": 112 - }, - { - "epoch": 0.013587446642217279, - "grad_norm": 2.867150579313547, - "learning_rate": 3.424736959321014e-06, - "loss": 1.1795, - "step": 113 - }, - { - "epoch": 0.01370768953285637, - "grad_norm": 1.7701600159766562, - "learning_rate": 3.431119784311155e-06, - "loss": 1.1239, - "step": 114 - }, - { - "epoch": 0.01382793242349546, - "grad_norm": 1.8026854404077561, - "learning_rate": 3.43744686339307e-06, - "loss": 1.0153, - "step": 115 - }, - { - "epoch": 0.013948175314134552, - "grad_norm": 1.9768353300308312, - "learning_rate": 3.44371916188212e-06, - "loss": 1.1543, - "step": 116 - }, - { - "epoch": 0.014068418204773643, - "grad_norm": 8.552494189436878, - "learning_rate": 3.449937620235143e-06, - "loss": 1.1038, - "step": 117 - }, - { - "epoch": 0.014188661095412733, - "grad_norm": 1.5488176573968473, - "learning_rate": 3.456103154896722e-06, - "loss": 1.1139, - "step": 118 - }, - { - "epoch": 0.014308903986051825, - "grad_norm": 1.7767205455738415, - "learning_rate": 3.462216659109757e-06, - "loss": 1.1561, - "step": 119 - }, - { - "epoch": 0.014429146876690916, - "grad_norm": 2.935319293978933, - "learning_rate": 3.4682790036921077e-06, - "loss": 1.0883, - "step": 120 - }, - { - "epoch": 0.014549389767330006, - "grad_norm": 1.688919741714039, - "learning_rate": 3.4742910377810193e-06, - "loss": 1.0596, - "step": 121 - }, - { - "epoch": 0.014669632657969098, - "grad_norm": 3.575943060846196, - "learning_rate": 3.4802535895469042e-06, - "loss": 1.126, - "step": 122 - }, - { - "epoch": 0.01478987554860819, - "grad_norm": 1.8040663405694195, - "learning_rate": 3.4861674668779934e-06, - "loss": 1.1312, - "step": 123 - }, - { - "epoch": 0.01491011843924728, - "grad_norm": 2.143006162911372, - "learning_rate": 3.492033458037272e-06, - "loss": 1.0774, - "step": 124 - }, - { - "epoch": 0.01503036132988637, - "grad_norm": 2.9405939568756967, - "learning_rate": 3.497852332293018e-06, - "loss": 1.1006, - "step": 125 - }, - { - "epoch": 0.015150604220525462, - "grad_norm": 1.8774391839022933, - "learning_rate": 3.5036248405242356e-06, - "loss": 1.2016, - "step": 126 - }, - { - "epoch": 0.015270847111164552, - "grad_norm": 1.9483821197201514, - "learning_rate": 3.509351715802146e-06, - "loss": 1.0624, - "step": 127 - }, - { - "epoch": 0.015391090001803644, - "grad_norm": 1.9916759357742138, - "learning_rate": 3.5150336739488763e-06, - "loss": 1.0182, - "step": 128 - }, - { - "epoch": 0.015511332892442733, - "grad_norm": 3.101302666914586, - "learning_rate": 3.5206714140744143e-06, - "loss": 1.0541, - "step": 129 - }, - { - "epoch": 0.015631575783081827, - "grad_norm": 2.4110519253468494, - "learning_rate": 3.5262656190928208e-06, - "loss": 1.1149, - "step": 130 - }, - { - "epoch": 0.015751818673720917, - "grad_norm": 1.447740319945736, - "learning_rate": 3.5318169562186737e-06, - "loss": 0.9512, - "step": 131 - }, - { - "epoch": 0.015872061564360006, - "grad_norm": 1.7508986754149494, - "learning_rate": 3.5373260774446292e-06, - "loss": 1.0565, - "step": 132 - }, - { - "epoch": 0.0159923044549991, - "grad_norm": 2.7902370512147234, - "learning_rate": 3.542793620000961e-06, - "loss": 1.1401, - "step": 133 - }, - { - "epoch": 0.01611254734563819, - "grad_norm": 2.024954679947446, - "learning_rate": 3.5482202067978894e-06, - "loss": 1.0923, - "step": 134 - }, - { - "epoch": 0.01623279023627728, - "grad_norm": 2.6177497829116207, - "learning_rate": 3.553606446851471e-06, - "loss": 0.9837, - "step": 135 - }, - { - "epoch": 0.016353033126916373, - "grad_norm": 1.8142707330623729, - "learning_rate": 3.5589529356937613e-06, - "loss": 1.0596, - "step": 136 - }, - { - "epoch": 0.016473276017555463, - "grad_norm": 1.844302983521318, - "learning_rate": 3.5642602557679627e-06, - "loss": 1.006, - "step": 137 - }, - { - "epoch": 0.016593518908194552, - "grad_norm": 1.8956867273117886, - "learning_rate": 3.569528976809202e-06, - "loss": 1.0679, - "step": 138 - }, - { - "epoch": 0.016713761798833646, - "grad_norm": 1.8716831115290082, - "learning_rate": 3.5747596562115522e-06, - "loss": 1.1123, - "step": 139 - }, - { - "epoch": 0.016834004689472735, - "grad_norm": 2.504608389116113, - "learning_rate": 3.5799528393819138e-06, - "loss": 1.1322, - "step": 140 - }, - { - "epoch": 0.016954247580111825, - "grad_norm": 1.9410984708717396, - "learning_rate": 3.585109060081286e-06, - "loss": 1.0926, - "step": 141 - }, - { - "epoch": 0.017074490470750915, - "grad_norm": 1.7192714940389269, - "learning_rate": 3.590228840753992e-06, - "loss": 1.0043, - "step": 142 - }, - { - "epoch": 0.01719473336139001, - "grad_norm": 2.375727893314234, - "learning_rate": 3.5953126928453423e-06, - "loss": 1.095, - "step": 143 - }, - { - "epoch": 0.017314976252029098, - "grad_norm": 2.403506808463941, - "learning_rate": 3.600361117108239e-06, - "loss": 1.0371, - "step": 144 - }, - { - "epoch": 0.017435219142668188, - "grad_norm": 2.026122914493647, - "learning_rate": 3.6053746038991616e-06, - "loss": 1.1939, - "step": 145 - }, - { - "epoch": 0.01755546203330728, - "grad_norm": 1.4312786982295291, - "learning_rate": 3.6103536334639843e-06, - "loss": 0.8475, - "step": 146 - }, - { - "epoch": 0.01767570492394637, - "grad_norm": 2.119265279311837, - "learning_rate": 3.615298676214041e-06, - "loss": 1.0746, - "step": 147 - }, - { - "epoch": 0.01779594781458546, - "grad_norm": 2.035374254496081, - "learning_rate": 3.6202101929928317e-06, - "loss": 1.1121, - "step": 148 - }, - { - "epoch": 0.017916190705224554, - "grad_norm": 1.664853763057934, - "learning_rate": 3.6250886353337413e-06, - "loss": 1.1095, - "step": 149 - }, - { - "epoch": 0.018036433595863644, - "grad_norm": 1.8965603059915326, - "learning_rate": 3.6299344457091488e-06, - "loss": 1.085, - "step": 150 - }, - { - "epoch": 0.018156676486502734, - "grad_norm": 10.347425101820885, - "learning_rate": 3.634748057771256e-06, - "loss": 1.1529, - "step": 151 - }, - { - "epoch": 0.018276919377141827, - "grad_norm": 1.563033813842987, - "learning_rate": 3.639529896584965e-06, - "loss": 1.0849, - "step": 152 - }, - { - "epoch": 0.018397162267780917, - "grad_norm": 4.218974693813773, - "learning_rate": 3.6442803788531233e-06, - "loss": 1.111, - "step": 153 - }, - { - "epoch": 0.018517405158420007, - "grad_norm": 6.885767602177074, - "learning_rate": 3.6489999131344357e-06, - "loss": 1.1729, - "step": 154 - }, - { - "epoch": 0.0186376480490591, - "grad_norm": 1.6911901463689816, - "learning_rate": 3.653688900054313e-06, - "loss": 1.1315, - "step": 155 - }, - { - "epoch": 0.01875789093969819, - "grad_norm": 2.0795479398770365, - "learning_rate": 3.6583477325089526e-06, - "loss": 0.981, - "step": 156 - }, - { - "epoch": 0.01887813383033728, - "grad_norm": 6.132856040848623, - "learning_rate": 3.6629767958628916e-06, - "loss": 1.2548, - "step": 157 - }, - { - "epoch": 0.018998376720976373, - "grad_norm": 2.0244515135374477, - "learning_rate": 3.667576468140291e-06, - "loss": 1.0748, - "step": 158 - }, - { - "epoch": 0.019118619611615463, - "grad_norm": 2.8671795171891596, - "learning_rate": 3.672147120210184e-06, - "loss": 1.108, - "step": 159 - }, - { - "epoch": 0.019238862502254553, - "grad_norm": 2.2811337794132283, - "learning_rate": 3.6766891159659177e-06, - "loss": 1.0931, - "step": 160 - }, - { - "epoch": 0.019359105392893646, - "grad_norm": 2.5892349839526005, - "learning_rate": 3.6812028124990075e-06, - "loss": 1.0884, - "step": 161 - }, - { - "epoch": 0.019479348283532736, - "grad_norm": 2.6680741072890166, - "learning_rate": 3.6856885602676016e-06, - "loss": 1.0425, - "step": 162 - }, - { - "epoch": 0.019599591174171826, - "grad_norm": 2.1274878580215337, - "learning_rate": 3.6901467032597733e-06, - "loss": 1.1652, - "step": 163 - }, - { - "epoch": 0.01971983406481092, - "grad_norm": 2.6191784929216264, - "learning_rate": 3.694577579151804e-06, - "loss": 1.0992, - "step": 164 - }, - { - "epoch": 0.01984007695545001, - "grad_norm": 1.987898814176429, - "learning_rate": 3.6989815194616703e-06, - "loss": 0.9624, - "step": 165 - }, - { - "epoch": 0.0199603198460891, - "grad_norm": 6.520259389105674, - "learning_rate": 3.703358849697888e-06, - "loss": 1.0325, - "step": 166 - }, - { - "epoch": 0.020080562736728192, - "grad_norm": 1.671625635677403, - "learning_rate": 3.7077098895038803e-06, - "loss": 1.0412, - "step": 167 - }, - { - "epoch": 0.020200805627367282, - "grad_norm": 7.483861726430549, - "learning_rate": 3.712034952798045e-06, - "loss": 1.1904, - "step": 168 - }, - { - "epoch": 0.02032104851800637, - "grad_norm": 2.185551129457153, - "learning_rate": 3.7163343479096656e-06, - "loss": 1.066, - "step": 169 - }, - { - "epoch": 0.020441291408645465, - "grad_norm": 2.0796097348962133, - "learning_rate": 3.720608377710802e-06, - "loss": 1.0432, - "step": 170 - }, - { - "epoch": 0.020561534299284555, - "grad_norm": 2.5164688306273137, - "learning_rate": 3.7248573397443277e-06, - "loss": 1.0851, - "step": 171 - }, - { - "epoch": 0.020681777189923645, - "grad_norm": 1.8974806018385637, - "learning_rate": 3.729081526348224e-06, - "loss": 1.2003, - "step": 172 - }, - { - "epoch": 0.020802020080562738, - "grad_norm": 1.6229686511638242, - "learning_rate": 3.7332812247762777e-06, - "loss": 1.0612, - "step": 173 - }, - { - "epoch": 0.020922262971201828, - "grad_norm": 2.1594336406512853, - "learning_rate": 3.737456717315293e-06, - "loss": 1.1698, - "step": 174 - }, - { - "epoch": 0.021042505861840918, - "grad_norm": 1.6224759379573208, - "learning_rate": 3.7416082813989552e-06, - "loss": 1.1262, - "step": 175 - }, - { - "epoch": 0.02116274875248001, - "grad_norm": 2.0779449843833158, - "learning_rate": 3.745736189718439e-06, - "loss": 1.1171, - "step": 176 - }, - { - "epoch": 0.0212829916431191, - "grad_norm": 2.3855798879250227, - "learning_rate": 3.749840710329894e-06, - "loss": 0.9504, - "step": 177 - }, - { - "epoch": 0.02140323453375819, - "grad_norm": 2.7355961280015566, - "learning_rate": 3.7539221067588938e-06, - "loss": 1.1998, - "step": 178 - }, - { - "epoch": 0.021523477424397284, - "grad_norm": 2.979935944656014, - "learning_rate": 3.757980638101964e-06, - "loss": 1.1595, - "step": 179 - }, - { - "epoch": 0.021643720315036374, - "grad_norm": 2.0567309295449006, - "learning_rate": 3.7620165591252806e-06, - "loss": 1.12, - "step": 180 - }, - { - "epoch": 0.021763963205675464, - "grad_norm": 1.9746288747607588, - "learning_rate": 3.766030120360636e-06, - "loss": 1.1635, - "step": 181 - }, - { - "epoch": 0.021884206096314557, - "grad_norm": 2.4432273481453466, - "learning_rate": 3.7700215681987578e-06, - "loss": 1.1026, - "step": 182 - }, - { - "epoch": 0.022004448986953647, - "grad_norm": 1.73694482070694, - "learning_rate": 3.7739911449800767e-06, - "loss": 1.0411, - "step": 183 - }, - { - "epoch": 0.022124691877592736, - "grad_norm": 1.7353523868350602, - "learning_rate": 3.7779390890830114e-06, - "loss": 1.0205, - "step": 184 - }, - { - "epoch": 0.02224493476823183, - "grad_norm": 3.5919318866091476, - "learning_rate": 3.7818656350098723e-06, - "loss": 1.0742, - "step": 185 - }, - { - "epoch": 0.02236517765887092, - "grad_norm": 2.6323389368878782, - "learning_rate": 3.7857710134704447e-06, - "loss": 0.9897, - "step": 186 - }, - { - "epoch": 0.02248542054951001, - "grad_norm": 2.332520502292686, - "learning_rate": 3.7896554514633234e-06, - "loss": 1.0159, - "step": 187 - }, - { - "epoch": 0.022605663440149103, - "grad_norm": 1.9130078582642152, - "learning_rate": 3.7935191723550955e-06, - "loss": 1.0612, - "step": 188 - }, - { - "epoch": 0.022725906330788193, - "grad_norm": 2.4594118263366664, - "learning_rate": 3.797362395957408e-06, - "loss": 1.1099, - "step": 189 - }, - { - "epoch": 0.022846149221427282, - "grad_norm": 2.3289675628054276, - "learning_rate": 3.8011853386020055e-06, - "loss": 1.0016, - "step": 190 - }, - { - "epoch": 0.022966392112066376, - "grad_norm": 2.826981286887328, - "learning_rate": 3.804988213213804e-06, - "loss": 1.1125, - "step": 191 - }, - { - "epoch": 0.023086635002705466, - "grad_norm": 1.322544085483537, - "learning_rate": 3.808771229382049e-06, - "loss": 0.8668, - "step": 192 - }, - { - "epoch": 0.023206877893344555, - "grad_norm": 2.4160554924206634, - "learning_rate": 3.8125345934296324e-06, - "loss": 1.071, - "step": 193 - }, - { - "epoch": 0.02332712078398365, - "grad_norm": 4.109521667415922, - "learning_rate": 3.81627850848061e-06, - "loss": 1.098, - "step": 194 - }, - { - "epoch": 0.02344736367462274, - "grad_norm": 2.41173947853347, - "learning_rate": 3.820003174525994e-06, - "loss": 1.07, - "step": 195 - }, - { - "epoch": 0.02356760656526183, - "grad_norm": 2.1214786974079916, - "learning_rate": 3.823708788487851e-06, - "loss": 1.0571, - "step": 196 - }, - { - "epoch": 0.02368784945590092, - "grad_norm": 4.544758202360157, - "learning_rate": 3.827395544281781e-06, - "loss": 1.0627, - "step": 197 - }, - { - "epoch": 0.02380809234654001, - "grad_norm": 1.7725075455519776, - "learning_rate": 3.831063632877802e-06, - "loss": 1.0223, - "step": 198 - }, - { - "epoch": 0.0239283352371791, - "grad_norm": 2.105598403250892, - "learning_rate": 3.834713242359712e-06, - "loss": 0.9806, - "step": 199 - }, - { - "epoch": 0.02404857812781819, - "grad_norm": 2.1239574343193164, - "learning_rate": 3.838344557982959e-06, - "loss": 1.0913, - "step": 200 - }, - { - "epoch": 0.024168821018457284, - "grad_norm": 3.588437208511007, - "learning_rate": 3.841957762231063e-06, - "loss": 1.0719, - "step": 201 - }, - { - "epoch": 0.024289063909096374, - "grad_norm": 1.8818497471681281, - "learning_rate": 3.8455530348706454e-06, - "loss": 1.085, - "step": 202 - }, - { - "epoch": 0.024409306799735464, - "grad_norm": 1.7625196712830034, - "learning_rate": 3.849130553005099e-06, - "loss": 0.9921, - "step": 203 - }, - { - "epoch": 0.024529549690374557, - "grad_norm": 8.394296762662638, - "learning_rate": 3.852690491126933e-06, - "loss": 1.0502, - "step": 204 - }, - { - "epoch": 0.024649792581013647, - "grad_norm": 2.4257816431410713, - "learning_rate": 3.856233021168845e-06, - "loss": 1.122, - "step": 205 - }, - { - "epoch": 0.024770035471652737, - "grad_norm": 2.0251242187453284, - "learning_rate": 3.859758312553544e-06, - "loss": 1.128, - "step": 206 - }, - { - "epoch": 0.02489027836229183, - "grad_norm": 1.7759354262211506, - "learning_rate": 3.8632665322423735e-06, - "loss": 1.1424, - "step": 207 - }, - { - "epoch": 0.02501052125293092, - "grad_norm": 1.714155958578329, - "learning_rate": 3.866757844782762e-06, - "loss": 1.0718, - "step": 208 - }, - { - "epoch": 0.02513076414357001, - "grad_norm": 2.2072641435473117, - "learning_rate": 3.870232412354527e-06, - "loss": 1.1246, - "step": 209 - }, - { - "epoch": 0.025251007034209103, - "grad_norm": 1.8066345544661524, - "learning_rate": 3.873690394815086e-06, - "loss": 1.1424, - "step": 210 - }, - { - "epoch": 0.025371249924848193, - "grad_norm": 2.3071447688773468, - "learning_rate": 3.877131949743587e-06, - "loss": 1.1347, - "step": 211 - }, - { - "epoch": 0.025491492815487283, - "grad_norm": 2.0676140239228906, - "learning_rate": 3.880557232483993e-06, - "loss": 1.01, - "step": 212 - }, - { - "epoch": 0.025611735706126376, - "grad_norm": 5.2376130522041375, - "learning_rate": 3.883966396187164e-06, - "loss": 1.0886, - "step": 213 - }, - { - "epoch": 0.025731978596765466, - "grad_norm": 2.075833196998611, - "learning_rate": 3.887359591851937e-06, - "loss": 1.1192, - "step": 214 - }, - { - "epoch": 0.025852221487404556, - "grad_norm": 1.8847851687984452, - "learning_rate": 3.890736968365265e-06, - "loss": 1.1457, - "step": 215 - }, - { - "epoch": 0.02597246437804365, - "grad_norm": 1.8682377626724513, - "learning_rate": 3.894098672541412e-06, - "loss": 1.0644, - "step": 216 - }, - { - "epoch": 0.02609270726868274, - "grad_norm": 1.6518813042984053, - "learning_rate": 3.89744484916025e-06, - "loss": 0.9718, - "step": 217 - }, - { - "epoch": 0.02621295015932183, - "grad_norm": 1.8261107735433184, - "learning_rate": 3.900775641004673e-06, - "loss": 1.0884, - "step": 218 - }, - { - "epoch": 0.026333193049960922, - "grad_norm": 2.7154744904383534, - "learning_rate": 3.904091188897156e-06, - "loss": 0.9629, - "step": 219 - }, - { - "epoch": 0.026453435940600012, - "grad_norm": 2.015173573176588, - "learning_rate": 3.90739163173548e-06, - "loss": 1.039, - "step": 220 - }, - { - "epoch": 0.026573678831239102, - "grad_norm": 3.4790170298546332, - "learning_rate": 3.910677106527646e-06, - "loss": 1.1085, - "step": 221 - }, - { - "epoch": 0.026693921721878195, - "grad_norm": 2.5354615954825164, - "learning_rate": 3.913947748426004e-06, - "loss": 1.0559, - "step": 222 - }, - { - "epoch": 0.026814164612517285, - "grad_norm": 2.9633075820553456, - "learning_rate": 3.9172036907606136e-06, - "loss": 0.9837, - "step": 223 - }, - { - "epoch": 0.026934407503156375, - "grad_norm": 1.7059933368065279, - "learning_rate": 3.920445065071855e-06, - "loss": 1.1541, - "step": 224 - }, - { - "epoch": 0.027054650393795468, - "grad_norm": 2.1260813037617066, - "learning_rate": 3.923672001142322e-06, - "loss": 1.0095, - "step": 225 - }, - { - "epoch": 0.027174893284434558, - "grad_norm": 1.7157733457039497, - "learning_rate": 3.926884627027996e-06, - "loss": 1.0647, - "step": 226 - }, - { - "epoch": 0.027295136175073648, - "grad_norm": 2.4207169255836942, - "learning_rate": 3.930083069088744e-06, - "loss": 0.9915, - "step": 227 - }, - { - "epoch": 0.02741537906571274, - "grad_norm": 1.2657052944756468, - "learning_rate": 3.933267452018137e-06, - "loss": 0.847, - "step": 228 - }, - { - "epoch": 0.02753562195635183, - "grad_norm": 2.723551226816785, - "learning_rate": 3.936437898872622e-06, - "loss": 1.0595, - "step": 229 - }, - { - "epoch": 0.02765586484699092, - "grad_norm": 2.3784493945682286, - "learning_rate": 3.9395945311000525e-06, - "loss": 1.0149, - "step": 230 - }, - { - "epoch": 0.027776107737630014, - "grad_norm": 2.3069008753877456, - "learning_rate": 3.942737468567608e-06, - "loss": 1.1311, - "step": 231 - }, - { - "epoch": 0.027896350628269104, - "grad_norm": 1.7560147735434786, - "learning_rate": 3.9458668295891026e-06, - "loss": 1.073, - "step": 232 - }, - { - "epoch": 0.028016593518908194, - "grad_norm": 2.6996124907850816, - "learning_rate": 3.948982730951712e-06, - "loss": 1.0834, - "step": 233 - }, - { - "epoch": 0.028136836409547287, - "grad_norm": 1.901022951913625, - "learning_rate": 3.9520852879421254e-06, - "loss": 1.0342, - "step": 234 - }, - { - "epoch": 0.028257079300186377, - "grad_norm": 2.1081634432931655, - "learning_rate": 3.955174614372137e-06, - "loss": 1.0313, - "step": 235 - }, - { - "epoch": 0.028377322190825467, - "grad_norm": 2.0597136200290573, - "learning_rate": 3.9582508226037045e-06, - "loss": 1.066, - "step": 236 - }, - { - "epoch": 0.02849756508146456, - "grad_norm": 2.438429435179858, - "learning_rate": 3.9613140235734636e-06, - "loss": 1.1484, - "step": 237 - }, - { - "epoch": 0.02861780797210365, - "grad_norm": 1.855786812274431, - "learning_rate": 3.96436432681674e-06, - "loss": 1.0296, - "step": 238 - }, - { - "epoch": 0.02873805086274274, - "grad_norm": 1.821652422701241, - "learning_rate": 3.967401840491044e-06, - "loss": 1.1192, - "step": 239 - }, - { - "epoch": 0.028858293753381833, - "grad_norm": 1.9171570965906295, - "learning_rate": 3.97042667139909e-06, - "loss": 1.0935, - "step": 240 - }, - { - "epoch": 0.028978536644020923, - "grad_norm": 1.8930830306611417, - "learning_rate": 3.973438925011327e-06, - "loss": 1.0902, - "step": 241 - }, - { - "epoch": 0.029098779534660012, - "grad_norm": 2.236227694592612, - "learning_rate": 3.976438705488002e-06, - "loss": 1.1319, - "step": 242 - }, - { - "epoch": 0.029219022425299106, - "grad_norm": 2.7847833370395136, - "learning_rate": 3.9794261157007744e-06, - "loss": 1.1503, - "step": 243 - }, - { - "epoch": 0.029339265315938196, - "grad_norm": 1.9728559638422651, - "learning_rate": 3.982401257253887e-06, - "loss": 1.068, - "step": 244 - }, - { - "epoch": 0.029459508206577285, - "grad_norm": 2.4938648283485563, - "learning_rate": 3.985364230504893e-06, - "loss": 1.1061, - "step": 245 - }, - { - "epoch": 0.02957975109721638, - "grad_norm": 2.0342128565991735, - "learning_rate": 3.988315134584976e-06, - "loss": 1.0553, - "step": 246 - }, - { - "epoch": 0.02969999398785547, - "grad_norm": 1.5839054262027967, - "learning_rate": 3.991254067418851e-06, - "loss": 1.0159, - "step": 247 - }, - { - "epoch": 0.02982023687849456, - "grad_norm": 1.9049339128780456, - "learning_rate": 3.994181125744254e-06, - "loss": 1.0353, - "step": 248 - }, - { - "epoch": 0.02994047976913365, - "grad_norm": 1.916664565817746, - "learning_rate": 3.99709640513106e-06, - "loss": 0.9642, - "step": 249 - }, - { - "epoch": 0.03006072265977274, - "grad_norm": 2.079552755139304, - "learning_rate": 4e-06, - "loss": 1.0632, - "step": 250 - }, - { - "epoch": 0.03018096555041183, - "grad_norm": 3.738932490051289, - "learning_rate": 3.999999848300794e-06, - "loss": 1.1082, - "step": 251 - }, - { - "epoch": 0.030301208441050925, - "grad_norm": 1.5601784052682113, - "learning_rate": 3.999999393203203e-06, - "loss": 1.1028, - "step": 252 - }, - { - "epoch": 0.030421451331690014, - "grad_norm": 1.6835326429283448, - "learning_rate": 3.999998634707293e-06, - "loss": 1.0653, - "step": 253 - }, - { - "epoch": 0.030541694222329104, - "grad_norm": 2.318954121532687, - "learning_rate": 3.999997572813182e-06, - "loss": 1.1812, - "step": 254 - }, - { - "epoch": 0.030661937112968194, - "grad_norm": 3.284199288383747, - "learning_rate": 3.999996207521028e-06, - "loss": 1.0861, - "step": 255 - }, - { - "epoch": 0.030782180003607287, - "grad_norm": 2.167769359320484, - "learning_rate": 3.999994538831039e-06, - "loss": 1.03, - "step": 256 - }, - { - "epoch": 0.030902422894246377, - "grad_norm": 2.49726479630417, - "learning_rate": 3.99999256674347e-06, - "loss": 1.058, - "step": 257 - }, - { - "epoch": 0.031022665784885467, - "grad_norm": 1.1427844260702404, - "learning_rate": 3.999990291258618e-06, - "loss": 0.767, - "step": 258 - }, - { - "epoch": 0.03114290867552456, - "grad_norm": 2.18450777580779, - "learning_rate": 3.999987712376829e-06, - "loss": 1.0737, - "step": 259 - }, - { - "epoch": 0.031263151566163654, - "grad_norm": 2.395628237584029, - "learning_rate": 3.999984830098494e-06, - "loss": 1.0266, - "step": 260 - }, - { - "epoch": 0.03138339445680274, - "grad_norm": 3.439859586112196, - "learning_rate": 3.999981644424051e-06, - "loss": 1.1859, - "step": 261 - }, - { - "epoch": 0.03150363734744183, - "grad_norm": 2.3241326837071568, - "learning_rate": 3.999978155353982e-06, - "loss": 1.0756, - "step": 262 - }, - { - "epoch": 0.03162388023808092, - "grad_norm": 2.201054025793754, - "learning_rate": 3.9999743628888186e-06, - "loss": 1.0181, - "step": 263 - }, - { - "epoch": 0.03174412312872001, - "grad_norm": 2.1746781735906136, - "learning_rate": 3.999970267029133e-06, - "loss": 1.1017, - "step": 264 - }, - { - "epoch": 0.0318643660193591, - "grad_norm": 1.6487941265780817, - "learning_rate": 3.999965867775548e-06, - "loss": 1.0075, - "step": 265 - }, - { - "epoch": 0.0319846089099982, - "grad_norm": 2.2000841298056435, - "learning_rate": 3.9999611651287315e-06, - "loss": 1.0742, - "step": 266 - }, - { - "epoch": 0.03210485180063729, - "grad_norm": 2.259358968717438, - "learning_rate": 3.999956159089396e-06, - "loss": 1.0143, - "step": 267 - }, - { - "epoch": 0.03222509469127638, - "grad_norm": 2.1370857104980354, - "learning_rate": 3.999950849658302e-06, - "loss": 1.0076, - "step": 268 - }, - { - "epoch": 0.03234533758191547, - "grad_norm": 2.225159105762883, - "learning_rate": 3.999945236836254e-06, - "loss": 1.058, - "step": 269 - }, - { - "epoch": 0.03246558047255456, - "grad_norm": 2.3758283475593625, - "learning_rate": 3.999939320624103e-06, - "loss": 1.1654, - "step": 270 - }, - { - "epoch": 0.03258582336319365, - "grad_norm": 1.773325643498854, - "learning_rate": 3.999933101022749e-06, - "loss": 1.1114, - "step": 271 - }, - { - "epoch": 0.032706066253832745, - "grad_norm": 2.1745015461087953, - "learning_rate": 3.999926578033132e-06, - "loss": 1.0702, - "step": 272 - }, - { - "epoch": 0.032826309144471835, - "grad_norm": 1.8977370363128634, - "learning_rate": 3.999919751656244e-06, - "loss": 0.8498, - "step": 273 - }, - { - "epoch": 0.032946552035110925, - "grad_norm": 2.0407535950595044, - "learning_rate": 3.9999126218931195e-06, - "loss": 0.9769, - "step": 274 - }, - { - "epoch": 0.033066794925750015, - "grad_norm": 2.03462806920697, - "learning_rate": 3.99990518874484e-06, - "loss": 1.1102, - "step": 275 - }, - { - "epoch": 0.033187037816389105, - "grad_norm": 2.101073367690247, - "learning_rate": 3.999897452212534e-06, - "loss": 1.1333, - "step": 276 - }, - { - "epoch": 0.033307280707028195, - "grad_norm": 2.1103364236933864, - "learning_rate": 3.999889412297374e-06, - "loss": 1.2138, - "step": 277 - }, - { - "epoch": 0.03342752359766729, - "grad_norm": 1.9202968661957684, - "learning_rate": 3.999881069000581e-06, - "loss": 1.003, - "step": 278 - }, - { - "epoch": 0.03354776648830638, - "grad_norm": 2.2449065943872126, - "learning_rate": 3.99987242232342e-06, - "loss": 1.085, - "step": 279 - }, - { - "epoch": 0.03366800937894547, - "grad_norm": 1.8918202302866738, - "learning_rate": 3.9998634722672026e-06, - "loss": 1.0035, - "step": 280 - }, - { - "epoch": 0.03378825226958456, - "grad_norm": 1.8905179332335584, - "learning_rate": 3.999854218833286e-06, - "loss": 1.0037, - "step": 281 - }, - { - "epoch": 0.03390849516022365, - "grad_norm": 1.9041059349386278, - "learning_rate": 3.999844662023075e-06, - "loss": 1.0376, - "step": 282 - }, - { - "epoch": 0.03402873805086274, - "grad_norm": 1.6946733386140749, - "learning_rate": 3.999834801838018e-06, - "loss": 1.128, - "step": 283 - }, - { - "epoch": 0.03414898094150183, - "grad_norm": 1.9444662804361441, - "learning_rate": 3.9998246382796115e-06, - "loss": 0.9535, - "step": 284 - }, - { - "epoch": 0.03426922383214093, - "grad_norm": 2.092397794061868, - "learning_rate": 3.999814171349399e-06, - "loss": 1.1222, - "step": 285 - }, - { - "epoch": 0.03438946672278002, - "grad_norm": 1.748331568886467, - "learning_rate": 3.9998034010489655e-06, - "loss": 0.952, - "step": 286 - }, - { - "epoch": 0.03450970961341911, - "grad_norm": 2.6977936312827464, - "learning_rate": 3.999792327379946e-06, - "loss": 0.9762, - "step": 287 - }, - { - "epoch": 0.034629952504058197, - "grad_norm": 1.9776850592326278, - "learning_rate": 3.999780950344021e-06, - "loss": 1.1882, - "step": 288 - }, - { - "epoch": 0.034750195394697286, - "grad_norm": 1.6294815182007922, - "learning_rate": 3.999769269942916e-06, - "loss": 1.0293, - "step": 289 - }, - { - "epoch": 0.034870438285336376, - "grad_norm": 1.6539963558484378, - "learning_rate": 3.999757286178402e-06, - "loss": 1.0257, - "step": 290 - }, - { - "epoch": 0.03499068117597547, - "grad_norm": 1.8490645543780575, - "learning_rate": 3.999744999052299e-06, - "loss": 1.1236, - "step": 291 - }, - { - "epoch": 0.03511092406661456, - "grad_norm": 1.2103531935174383, - "learning_rate": 3.9997324085664675e-06, - "loss": 0.868, - "step": 292 - }, - { - "epoch": 0.03523116695725365, - "grad_norm": 2.0349818061558924, - "learning_rate": 3.999719514722821e-06, - "loss": 1.1303, - "step": 293 - }, - { - "epoch": 0.03535140984789274, - "grad_norm": 2.7202594995128573, - "learning_rate": 3.999706317523314e-06, - "loss": 0.9607, - "step": 294 - }, - { - "epoch": 0.03547165273853183, - "grad_norm": 2.1561358877753354, - "learning_rate": 3.999692816969948e-06, - "loss": 1.07, - "step": 295 - }, - { - "epoch": 0.03559189562917092, - "grad_norm": 1.0415663416551577, - "learning_rate": 3.999679013064772e-06, - "loss": 0.9131, - "step": 296 - }, - { - "epoch": 0.03571213851981002, - "grad_norm": 2.7992793995160925, - "learning_rate": 3.99966490580988e-06, - "loss": 1.0718, - "step": 297 - }, - { - "epoch": 0.03583238141044911, - "grad_norm": 2.3628642573424967, - "learning_rate": 3.999650495207411e-06, - "loss": 0.87, - "step": 298 - }, - { - "epoch": 0.0359526243010882, - "grad_norm": 3.0078123414671225, - "learning_rate": 3.999635781259553e-06, - "loss": 1.1142, - "step": 299 - }, - { - "epoch": 0.03607286719172729, - "grad_norm": 1.0120581569395872, - "learning_rate": 3.999620763968535e-06, - "loss": 0.7633, - "step": 300 - }, - { - "epoch": 0.03619311008236638, - "grad_norm": 1.674093325603322, - "learning_rate": 3.999605443336638e-06, - "loss": 1.0801, - "step": 301 - }, - { - "epoch": 0.03631335297300547, - "grad_norm": 2.4081209513681943, - "learning_rate": 3.999589819366185e-06, - "loss": 1.1085, - "step": 302 - }, - { - "epoch": 0.036433595863644565, - "grad_norm": 1.8075336950176801, - "learning_rate": 3.999573892059547e-06, - "loss": 1.047, - "step": 303 - }, - { - "epoch": 0.036553838754283655, - "grad_norm": 1.8160634363370722, - "learning_rate": 3.999557661419138e-06, - "loss": 1.02, - "step": 304 - }, - { - "epoch": 0.036674081644922744, - "grad_norm": 2.0229363133752907, - "learning_rate": 3.9995411274474225e-06, - "loss": 1.0157, - "step": 305 - }, - { - "epoch": 0.036794324535561834, - "grad_norm": 1.6866482598477697, - "learning_rate": 3.999524290146908e-06, - "loss": 1.0279, - "step": 306 - }, - { - "epoch": 0.036914567426200924, - "grad_norm": 2.6930422744917517, - "learning_rate": 3.9995071495201485e-06, - "loss": 1.1167, - "step": 307 - }, - { - "epoch": 0.037034810316840014, - "grad_norm": 2.1494561017512575, - "learning_rate": 3.999489705569744e-06, - "loss": 1.182, - "step": 308 - }, - { - "epoch": 0.03715505320747911, - "grad_norm": 1.7850139402598504, - "learning_rate": 3.999471958298341e-06, - "loss": 1.0749, - "step": 309 - }, - { - "epoch": 0.0372752960981182, - "grad_norm": 1.968956830498376, - "learning_rate": 3.999453907708631e-06, - "loss": 0.973, - "step": 310 - }, - { - "epoch": 0.03739553898875729, - "grad_norm": 1.74510461837947, - "learning_rate": 3.999435553803353e-06, - "loss": 1.0275, - "step": 311 - }, - { - "epoch": 0.03751578187939638, - "grad_norm": 2.211287615657207, - "learning_rate": 3.999416896585292e-06, - "loss": 1.046, - "step": 312 - }, - { - "epoch": 0.03763602477003547, - "grad_norm": 2.8567554585898853, - "learning_rate": 3.9993979360572775e-06, - "loss": 1.0786, - "step": 313 - }, - { - "epoch": 0.03775626766067456, - "grad_norm": 2.139382663170354, - "learning_rate": 3.999378672222185e-06, - "loss": 1.0455, - "step": 314 - }, - { - "epoch": 0.03787651055131366, - "grad_norm": 2.0241739118668085, - "learning_rate": 3.9993591050829385e-06, - "loss": 1.0439, - "step": 315 - }, - { - "epoch": 0.037996753441952746, - "grad_norm": 1.8555088400274253, - "learning_rate": 3.999339234642506e-06, - "loss": 1.008, - "step": 316 - }, - { - "epoch": 0.038116996332591836, - "grad_norm": 1.8301915283242165, - "learning_rate": 3.9993190609038994e-06, - "loss": 1.054, - "step": 317 - }, - { - "epoch": 0.038237239223230926, - "grad_norm": 1.7079389938876743, - "learning_rate": 3.999298583870182e-06, - "loss": 1.0533, - "step": 318 - }, - { - "epoch": 0.038357482113870016, - "grad_norm": 1.6834876077919283, - "learning_rate": 3.999277803544458e-06, - "loss": 0.9868, - "step": 319 - }, - { - "epoch": 0.038477725004509106, - "grad_norm": 1.1075458221772818, - "learning_rate": 3.999256719929882e-06, - "loss": 0.8709, - "step": 320 - }, - { - "epoch": 0.0385979678951482, - "grad_norm": 1.3423772941929943, - "learning_rate": 3.999235333029651e-06, - "loss": 0.9501, - "step": 321 - }, - { - "epoch": 0.03871821078578729, - "grad_norm": 1.6837141166550618, - "learning_rate": 3.999213642847009e-06, - "loss": 1.0271, - "step": 322 - }, - { - "epoch": 0.03883845367642638, - "grad_norm": 1.6652470344135637, - "learning_rate": 3.999191649385247e-06, - "loss": 1.1276, - "step": 323 - }, - { - "epoch": 0.03895869656706547, - "grad_norm": 1.0954220661210665, - "learning_rate": 3.999169352647702e-06, - "loss": 0.8392, - "step": 324 - }, - { - "epoch": 0.03907893945770456, - "grad_norm": 1.6084383859404057, - "learning_rate": 3.999146752637755e-06, - "loss": 1.0355, - "step": 325 - }, - { - "epoch": 0.03919918234834365, - "grad_norm": 2.604305619665272, - "learning_rate": 3.999123849358836e-06, - "loss": 1.1216, - "step": 326 - }, - { - "epoch": 0.03931942523898275, - "grad_norm": 2.0735925566154094, - "learning_rate": 3.999100642814418e-06, - "loss": 0.9666, - "step": 327 - }, - { - "epoch": 0.03943966812962184, - "grad_norm": 1.9182041124801135, - "learning_rate": 3.999077133008022e-06, - "loss": 1.1044, - "step": 328 - }, - { - "epoch": 0.03955991102026093, - "grad_norm": 2.2484239780637094, - "learning_rate": 3.9990533199432145e-06, - "loss": 1.1204, - "step": 329 - }, - { - "epoch": 0.03968015391090002, - "grad_norm": 1.934262123573809, - "learning_rate": 3.999029203623608e-06, - "loss": 0.9775, - "step": 330 - }, - { - "epoch": 0.03980039680153911, - "grad_norm": 1.8237933494012393, - "learning_rate": 3.99900478405286e-06, - "loss": 1.077, - "step": 331 - }, - { - "epoch": 0.0399206396921782, - "grad_norm": 2.2928965880494414, - "learning_rate": 3.998980061234676e-06, - "loss": 1.0448, - "step": 332 - }, - { - "epoch": 0.040040882582817294, - "grad_norm": 2.1534518454725293, - "learning_rate": 3.9989550351728055e-06, - "loss": 0.9851, - "step": 333 - }, - { - "epoch": 0.040161125473456384, - "grad_norm": 2.044563677439154, - "learning_rate": 3.998929705871046e-06, - "loss": 1.05, - "step": 334 - }, - { - "epoch": 0.040281368364095474, - "grad_norm": 2.3318255412046285, - "learning_rate": 3.99890407333324e-06, - "loss": 1.0954, - "step": 335 - }, - { - "epoch": 0.040401611254734564, - "grad_norm": 1.8228422603862353, - "learning_rate": 3.998878137563275e-06, - "loss": 1.0764, - "step": 336 - }, - { - "epoch": 0.040521854145373654, - "grad_norm": 1.9468936851915153, - "learning_rate": 3.998851898565085e-06, - "loss": 1.0652, - "step": 337 - }, - { - "epoch": 0.04064209703601274, - "grad_norm": 1.7600647463593357, - "learning_rate": 3.998825356342653e-06, - "loss": 1.0462, - "step": 338 - }, - { - "epoch": 0.04076233992665183, - "grad_norm": 2.2607565300570958, - "learning_rate": 3.998798510900003e-06, - "loss": 0.9424, - "step": 339 - }, - { - "epoch": 0.04088258281729093, - "grad_norm": 2.085294817382624, - "learning_rate": 3.998771362241207e-06, - "loss": 1.0626, - "step": 340 - }, - { - "epoch": 0.04100282570793002, - "grad_norm": 1.7864356600228073, - "learning_rate": 3.998743910370385e-06, - "loss": 1.0865, - "step": 341 - }, - { - "epoch": 0.04112306859856911, - "grad_norm": 1.7909712291943385, - "learning_rate": 3.998716155291702e-06, - "loss": 0.9441, - "step": 342 - }, - { - "epoch": 0.0412433114892082, - "grad_norm": 1.497273430728047, - "learning_rate": 3.998688097009366e-06, - "loss": 1.116, - "step": 343 - }, - { - "epoch": 0.04136355437984729, - "grad_norm": 1.950908776798496, - "learning_rate": 3.998659735527636e-06, - "loss": 1.0169, - "step": 344 - }, - { - "epoch": 0.04148379727048638, - "grad_norm": 1.436331896326977, - "learning_rate": 3.998631070850813e-06, - "loss": 0.9811, - "step": 345 - }, - { - "epoch": 0.041604040161125476, - "grad_norm": 2.0888017855513503, - "learning_rate": 3.9986021029832455e-06, - "loss": 1.0432, - "step": 346 - }, - { - "epoch": 0.041724283051764566, - "grad_norm": 2.3892858407091606, - "learning_rate": 3.9985728319293285e-06, - "loss": 1.1217, - "step": 347 - }, - { - "epoch": 0.041844525942403656, - "grad_norm": 2.057251465481097, - "learning_rate": 3.998543257693501e-06, - "loss": 1.0635, - "step": 348 - }, - { - "epoch": 0.041964768833042745, - "grad_norm": 1.6973747683639475, - "learning_rate": 3.998513380280251e-06, - "loss": 1.0896, - "step": 349 - }, - { - "epoch": 0.042085011723681835, - "grad_norm": 2.313451622715403, - "learning_rate": 3.99848319969411e-06, - "loss": 1.1664, - "step": 350 - }, - { - "epoch": 0.042205254614320925, - "grad_norm": 2.6635917180480386, - "learning_rate": 3.9984527159396564e-06, - "loss": 0.995, - "step": 351 - }, - { - "epoch": 0.04232549750496002, - "grad_norm": 1.9605120315085285, - "learning_rate": 3.9984219290215154e-06, - "loss": 1.0459, - "step": 352 - }, - { - "epoch": 0.04244574039559911, - "grad_norm": 1.4204040296415223, - "learning_rate": 3.998390838944356e-06, - "loss": 1.0996, - "step": 353 - }, - { - "epoch": 0.0425659832862382, - "grad_norm": 2.4900912374610145, - "learning_rate": 3.998359445712895e-06, - "loss": 1.1039, - "step": 354 - }, - { - "epoch": 0.04268622617687729, - "grad_norm": 2.1532457957273774, - "learning_rate": 3.9983277493318955e-06, - "loss": 1.0202, - "step": 355 - }, - { - "epoch": 0.04280646906751638, - "grad_norm": 1.6028183641956768, - "learning_rate": 3.998295749806165e-06, - "loss": 1.0213, - "step": 356 - }, - { - "epoch": 0.04292671195815547, - "grad_norm": 2.054035039660432, - "learning_rate": 3.998263447140558e-06, - "loss": 1.0472, - "step": 357 - }, - { - "epoch": 0.04304695484879457, - "grad_norm": 1.884858472049231, - "learning_rate": 3.998230841339976e-06, - "loss": 1.0344, - "step": 358 - }, - { - "epoch": 0.04316719773943366, - "grad_norm": 2.040594351335331, - "learning_rate": 3.998197932409363e-06, - "loss": 1.0574, - "step": 359 - }, - { - "epoch": 0.04328744063007275, - "grad_norm": 1.84770283226989, - "learning_rate": 3.9981647203537125e-06, - "loss": 1.0739, - "step": 360 - }, - { - "epoch": 0.04340768352071184, - "grad_norm": 1.7271621836419722, - "learning_rate": 3.998131205178063e-06, - "loss": 1.1648, - "step": 361 - }, - { - "epoch": 0.04352792641135093, - "grad_norm": 2.5601971210643244, - "learning_rate": 3.998097386887498e-06, - "loss": 0.9741, - "step": 362 - }, - { - "epoch": 0.04364816930199002, - "grad_norm": 1.899364794408425, - "learning_rate": 3.998063265487148e-06, - "loss": 1.0451, - "step": 363 - }, - { - "epoch": 0.043768412192629114, - "grad_norm": 3.056664648967092, - "learning_rate": 3.99802884098219e-06, - "loss": 1.0207, - "step": 364 - }, - { - "epoch": 0.043888655083268203, - "grad_norm": 2.149308461598322, - "learning_rate": 3.997994113377845e-06, - "loss": 1.0321, - "step": 365 - }, - { - "epoch": 0.04400889797390729, - "grad_norm": 1.9386579683301968, - "learning_rate": 3.9979590826793815e-06, - "loss": 1.0393, - "step": 366 - }, - { - "epoch": 0.04412914086454638, - "grad_norm": 2.3708411495390527, - "learning_rate": 3.997923748892113e-06, - "loss": 1.0209, - "step": 367 - }, - { - "epoch": 0.04424938375518547, - "grad_norm": 1.5885532399574145, - "learning_rate": 3.9978881120214015e-06, - "loss": 1.0949, - "step": 368 - }, - { - "epoch": 0.04436962664582456, - "grad_norm": 1.788114671460655, - "learning_rate": 3.997852172072652e-06, - "loss": 1.0045, - "step": 369 - }, - { - "epoch": 0.04448986953646366, - "grad_norm": 2.425591158828626, - "learning_rate": 3.9978159290513155e-06, - "loss": 1.1059, - "step": 370 - }, - { - "epoch": 0.04461011242710275, - "grad_norm": 1.632650339935672, - "learning_rate": 3.997779382962892e-06, - "loss": 1.017, - "step": 371 - }, - { - "epoch": 0.04473035531774184, - "grad_norm": 1.9621327317746473, - "learning_rate": 3.997742533812924e-06, - "loss": 0.947, - "step": 372 - }, - { - "epoch": 0.04485059820838093, - "grad_norm": 2.2896775600862664, - "learning_rate": 3.997705381607001e-06, - "loss": 1.1272, - "step": 373 - }, - { - "epoch": 0.04497084109902002, - "grad_norm": 1.1414387556665588, - "learning_rate": 3.997667926350761e-06, - "loss": 0.8357, - "step": 374 - }, - { - "epoch": 0.04509108398965911, - "grad_norm": 0.9612116228833609, - "learning_rate": 3.997630168049886e-06, - "loss": 0.8059, - "step": 375 - }, - { - "epoch": 0.045211326880298205, - "grad_norm": 1.71535406144674, - "learning_rate": 3.997592106710101e-06, - "loss": 0.9843, - "step": 376 - }, - { - "epoch": 0.045331569770937295, - "grad_norm": 2.1646250740655284, - "learning_rate": 3.997553742337182e-06, - "loss": 0.873, - "step": 377 - }, - { - "epoch": 0.045451812661576385, - "grad_norm": 1.8704879512730213, - "learning_rate": 3.997515074936949e-06, - "loss": 1.1236, - "step": 378 - }, - { - "epoch": 0.045572055552215475, - "grad_norm": 2.49236562445096, - "learning_rate": 3.997476104515268e-06, - "loss": 1.0692, - "step": 379 - }, - { - "epoch": 0.045692298442854565, - "grad_norm": 1.7334324793092164, - "learning_rate": 3.9974368310780485e-06, - "loss": 0.9858, - "step": 380 - }, - { - "epoch": 0.045812541333493655, - "grad_norm": 2.798976683949854, - "learning_rate": 3.997397254631251e-06, - "loss": 0.9538, - "step": 381 - }, - { - "epoch": 0.04593278422413275, - "grad_norm": 1.0669667095149955, - "learning_rate": 3.997357375180878e-06, - "loss": 0.8547, - "step": 382 - }, - { - "epoch": 0.04605302711477184, - "grad_norm": 1.7604371545566269, - "learning_rate": 3.997317192732979e-06, - "loss": 0.959, - "step": 383 - }, - { - "epoch": 0.04617327000541093, - "grad_norm": 2.030572631573953, - "learning_rate": 3.99727670729365e-06, - "loss": 1.0261, - "step": 384 - }, - { - "epoch": 0.04629351289605002, - "grad_norm": 1.9139026808676232, - "learning_rate": 3.997235918869033e-06, - "loss": 0.9847, - "step": 385 - }, - { - "epoch": 0.04641375578668911, - "grad_norm": 1.8340917521315057, - "learning_rate": 3.997194827465315e-06, - "loss": 1.0393, - "step": 386 - }, - { - "epoch": 0.0465339986773282, - "grad_norm": 2.645804117196643, - "learning_rate": 3.997153433088728e-06, - "loss": 1.1343, - "step": 387 - }, - { - "epoch": 0.0466542415679673, - "grad_norm": 1.9147105462860332, - "learning_rate": 3.997111735745554e-06, - "loss": 1.0143, - "step": 388 - }, - { - "epoch": 0.04677448445860639, - "grad_norm": 1.7904838679486272, - "learning_rate": 3.997069735442118e-06, - "loss": 1.0358, - "step": 389 - }, - { - "epoch": 0.04689472734924548, - "grad_norm": 2.708453620782563, - "learning_rate": 3.997027432184792e-06, - "loss": 1.0031, - "step": 390 - }, - { - "epoch": 0.04701497023988457, - "grad_norm": 1.7641850904470633, - "learning_rate": 3.99698482597999e-06, - "loss": 1.091, - "step": 391 - }, - { - "epoch": 0.04713521313052366, - "grad_norm": 1.1037930030265066, - "learning_rate": 3.99694191683418e-06, - "loss": 0.8599, - "step": 392 - }, - { - "epoch": 0.047255456021162746, - "grad_norm": 1.9179916225087303, - "learning_rate": 3.996898704753867e-06, - "loss": 1.0363, - "step": 393 - }, - { - "epoch": 0.04737569891180184, - "grad_norm": 3.0535618105476368, - "learning_rate": 3.996855189745609e-06, - "loss": 1.0988, - "step": 394 - }, - { - "epoch": 0.04749594180244093, - "grad_norm": 2.4306636701486224, - "learning_rate": 3.996811371816007e-06, - "loss": 1.1319, - "step": 395 - }, - { - "epoch": 0.04761618469308002, - "grad_norm": 1.920290285855736, - "learning_rate": 3.996767250971707e-06, - "loss": 0.9934, - "step": 396 - }, - { - "epoch": 0.04773642758371911, - "grad_norm": 1.6853178538374103, - "learning_rate": 3.996722827219403e-06, - "loss": 1.0741, - "step": 397 - }, - { - "epoch": 0.0478566704743582, - "grad_norm": 1.9224995784486227, - "learning_rate": 3.996678100565833e-06, - "loss": 1.0301, - "step": 398 - }, - { - "epoch": 0.04797691336499729, - "grad_norm": 2.4761862484053965, - "learning_rate": 3.996633071017783e-06, - "loss": 1.0958, - "step": 399 - }, - { - "epoch": 0.04809715625563638, - "grad_norm": 2.494311727869749, - "learning_rate": 3.996587738582084e-06, - "loss": 1.0276, - "step": 400 - }, - { - "epoch": 0.04821739914627548, - "grad_norm": 2.5648697850895537, - "learning_rate": 3.9965421032656115e-06, - "loss": 1.0624, - "step": 401 - }, - { - "epoch": 0.04833764203691457, - "grad_norm": 2.118083581235261, - "learning_rate": 3.99649616507529e-06, - "loss": 1.1527, - "step": 402 - }, - { - "epoch": 0.04845788492755366, - "grad_norm": 1.2815910443706238, - "learning_rate": 3.996449924018088e-06, - "loss": 0.8764, - "step": 403 - }, - { - "epoch": 0.04857812781819275, - "grad_norm": 1.9780639253320897, - "learning_rate": 3.99640338010102e-06, - "loss": 1.0089, - "step": 404 - }, - { - "epoch": 0.04869837070883184, - "grad_norm": 1.693002841467143, - "learning_rate": 3.996356533331146e-06, - "loss": 0.9898, - "step": 405 - }, - { - "epoch": 0.04881861359947093, - "grad_norm": 2.1224220570697483, - "learning_rate": 3.996309383715573e-06, - "loss": 0.8241, - "step": 406 - }, - { - "epoch": 0.048938856490110025, - "grad_norm": 1.7948828227015277, - "learning_rate": 3.996261931261454e-06, - "loss": 0.9399, - "step": 407 - }, - { - "epoch": 0.049059099380749115, - "grad_norm": 1.7067179446911294, - "learning_rate": 3.996214175975987e-06, - "loss": 1.071, - "step": 408 - }, - { - "epoch": 0.049179342271388204, - "grad_norm": 1.8618956718308877, - "learning_rate": 3.996166117866417e-06, - "loss": 0.9997, - "step": 409 - }, - { - "epoch": 0.049299585162027294, - "grad_norm": 3.4633902992309613, - "learning_rate": 3.996117756940035e-06, - "loss": 1.0782, - "step": 410 - }, - { - "epoch": 0.049419828052666384, - "grad_norm": 1.8067207277427852, - "learning_rate": 3.996069093204175e-06, - "loss": 1.1754, - "step": 411 - }, - { - "epoch": 0.049540070943305474, - "grad_norm": 1.955698256030346, - "learning_rate": 3.996020126666221e-06, - "loss": 1.0849, - "step": 412 - }, - { - "epoch": 0.04966031383394457, - "grad_norm": 1.8635532012747378, - "learning_rate": 3.995970857333601e-06, - "loss": 1.0287, - "step": 413 - }, - { - "epoch": 0.04978055672458366, - "grad_norm": 1.6933903498825058, - "learning_rate": 3.995921285213789e-06, - "loss": 1.0056, - "step": 414 - }, - { - "epoch": 0.04990079961522275, - "grad_norm": 2.8870860938463974, - "learning_rate": 3.995871410314305e-06, - "loss": 1.0188, - "step": 415 - }, - { - "epoch": 0.05002104250586184, - "grad_norm": 1.159383034485485, - "learning_rate": 3.995821232642714e-06, - "loss": 0.8594, - "step": 416 - }, - { - "epoch": 0.05014128539650093, - "grad_norm": 1.814780280735437, - "learning_rate": 3.995770752206629e-06, - "loss": 1.0328, - "step": 417 - }, - { - "epoch": 0.05026152828714002, - "grad_norm": 1.8980019152102037, - "learning_rate": 3.995719969013709e-06, - "loss": 1.1756, - "step": 418 - }, - { - "epoch": 0.05038177117777912, - "grad_norm": 3.0623217161016436, - "learning_rate": 3.995668883071655e-06, - "loss": 1.0764, - "step": 419 - }, - { - "epoch": 0.050502014068418206, - "grad_norm": 2.139618908649217, - "learning_rate": 3.995617494388219e-06, - "loss": 1.1206, - "step": 420 - }, - { - "epoch": 0.050622256959057296, - "grad_norm": 2.2533171890553243, - "learning_rate": 3.995565802971196e-06, - "loss": 1.0154, - "step": 421 - }, - { - "epoch": 0.050742499849696386, - "grad_norm": 1.7204425454250905, - "learning_rate": 3.995513808828427e-06, - "loss": 0.8868, - "step": 422 - }, - { - "epoch": 0.050862742740335476, - "grad_norm": 2.249473616167837, - "learning_rate": 3.9954615119678e-06, - "loss": 0.9714, - "step": 423 - }, - { - "epoch": 0.050982985630974566, - "grad_norm": 2.141335473879405, - "learning_rate": 3.995408912397248e-06, - "loss": 1.0035, - "step": 424 - }, - { - "epoch": 0.05110322852161366, - "grad_norm": 2.91195168634306, - "learning_rate": 3.99535601012475e-06, - "loss": 1.1355, - "step": 425 - }, - { - "epoch": 0.05122347141225275, - "grad_norm": 1.647631335081414, - "learning_rate": 3.995302805158333e-06, - "loss": 0.9621, - "step": 426 - }, - { - "epoch": 0.05134371430289184, - "grad_norm": 1.7390390180320259, - "learning_rate": 3.9952492975060665e-06, - "loss": 1.0453, - "step": 427 - }, - { - "epoch": 0.05146395719353093, - "grad_norm": 2.2257118072335067, - "learning_rate": 3.995195487176067e-06, - "loss": 1.0639, - "step": 428 - }, - { - "epoch": 0.05158420008417002, - "grad_norm": 1.7823246173796539, - "learning_rate": 3.995141374176499e-06, - "loss": 1.0661, - "step": 429 - }, - { - "epoch": 0.05170444297480911, - "grad_norm": 1.0823196412412788, - "learning_rate": 3.995086958515572e-06, - "loss": 0.8775, - "step": 430 - }, - { - "epoch": 0.05182468586544821, - "grad_norm": 1.0910205571889107, - "learning_rate": 3.995032240201538e-06, - "loss": 0.8451, - "step": 431 - }, - { - "epoch": 0.0519449287560873, - "grad_norm": 1.0725210750616587, - "learning_rate": 3.9949772192427e-06, - "loss": 0.8536, - "step": 432 - }, - { - "epoch": 0.05206517164672639, - "grad_norm": 2.013397642123288, - "learning_rate": 3.994921895647405e-06, - "loss": 1.007, - "step": 433 - }, - { - "epoch": 0.05218541453736548, - "grad_norm": 1.1269333439352007, - "learning_rate": 3.994866269424043e-06, - "loss": 0.7722, - "step": 434 - }, - { - "epoch": 0.05230565742800457, - "grad_norm": 2.055600735737761, - "learning_rate": 3.9948103405810545e-06, - "loss": 0.9887, - "step": 435 - }, - { - "epoch": 0.05242590031864366, - "grad_norm": 1.7442484071584052, - "learning_rate": 3.994754109126923e-06, - "loss": 1.0586, - "step": 436 - }, - { - "epoch": 0.052546143209282754, - "grad_norm": 1.882020015849293, - "learning_rate": 3.994697575070181e-06, - "loss": 1.1381, - "step": 437 - }, - { - "epoch": 0.052666386099921844, - "grad_norm": 1.917737150485131, - "learning_rate": 3.994640738419402e-06, - "loss": 1.1161, - "step": 438 - }, - { - "epoch": 0.052786628990560934, - "grad_norm": 2.2734114917434143, - "learning_rate": 3.9945835991832075e-06, - "loss": 1.0139, - "step": 439 - }, - { - "epoch": 0.052906871881200024, - "grad_norm": 2.518991054614434, - "learning_rate": 3.994526157370268e-06, - "loss": 1.1284, - "step": 440 - }, - { - "epoch": 0.053027114771839114, - "grad_norm": 1.1997205945413856, - "learning_rate": 3.994468412989296e-06, - "loss": 0.816, - "step": 441 - }, - { - "epoch": 0.053147357662478203, - "grad_norm": 1.9423443314320747, - "learning_rate": 3.994410366049052e-06, - "loss": 1.1467, - "step": 442 - }, - { - "epoch": 0.0532676005531173, - "grad_norm": 1.9618011639601205, - "learning_rate": 3.994352016558341e-06, - "loss": 1.0464, - "step": 443 - }, - { - "epoch": 0.05338784344375639, - "grad_norm": 1.8341507678777775, - "learning_rate": 3.994293364526014e-06, - "loss": 0.9464, - "step": 444 - }, - { - "epoch": 0.05350808633439548, - "grad_norm": 1.8890251768099402, - "learning_rate": 3.99423440996097e-06, - "loss": 1.0432, - "step": 445 - }, - { - "epoch": 0.05362832922503457, - "grad_norm": 1.9754687513517237, - "learning_rate": 3.994175152872152e-06, - "loss": 1.022, - "step": 446 - }, - { - "epoch": 0.05374857211567366, - "grad_norm": 1.9011032791256297, - "learning_rate": 3.994115593268548e-06, - "loss": 1.002, - "step": 447 - }, - { - "epoch": 0.05386881500631275, - "grad_norm": 1.7505358829171058, - "learning_rate": 3.994055731159195e-06, - "loss": 1.0314, - "step": 448 - }, - { - "epoch": 0.053989057896951846, - "grad_norm": 1.865804758788738, - "learning_rate": 3.993995566553172e-06, - "loss": 1.0756, - "step": 449 - }, - { - "epoch": 0.054109300787590936, - "grad_norm": 1.7227432332817312, - "learning_rate": 3.993935099459607e-06, - "loss": 0.9755, - "step": 450 - }, - { - "epoch": 0.054229543678230026, - "grad_norm": 1.8996010813295714, - "learning_rate": 3.993874329887673e-06, - "loss": 0.9477, - "step": 451 - }, - { - "epoch": 0.054349786568869116, - "grad_norm": 2.0262756474052535, - "learning_rate": 3.993813257846589e-06, - "loss": 1.0701, - "step": 452 - }, - { - "epoch": 0.054470029459508205, - "grad_norm": 2.01528620275233, - "learning_rate": 3.993751883345619e-06, - "loss": 1.1416, - "step": 453 - }, - { - "epoch": 0.054590272350147295, - "grad_norm": 2.219196973023829, - "learning_rate": 3.993690206394073e-06, - "loss": 1.0877, - "step": 454 - }, - { - "epoch": 0.054710515240786385, - "grad_norm": 2.2675603558668023, - "learning_rate": 3.993628227001307e-06, - "loss": 1.0841, - "step": 455 - }, - { - "epoch": 0.05483075813142548, - "grad_norm": 1.7848083023442443, - "learning_rate": 3.993565945176726e-06, - "loss": 0.9177, - "step": 456 - }, - { - "epoch": 0.05495100102206457, - "grad_norm": 1.7503219717021818, - "learning_rate": 3.993503360929776e-06, - "loss": 1.0443, - "step": 457 - }, - { - "epoch": 0.05507124391270366, - "grad_norm": 1.4993871390521951, - "learning_rate": 3.99344047426995e-06, - "loss": 1.0204, - "step": 458 - }, - { - "epoch": 0.05519148680334275, - "grad_norm": 1.8609744973912985, - "learning_rate": 3.993377285206789e-06, - "loss": 1.1386, - "step": 459 - }, - { - "epoch": 0.05531172969398184, - "grad_norm": 1.5819193482559712, - "learning_rate": 3.99331379374988e-06, - "loss": 1.0782, - "step": 460 - }, - { - "epoch": 0.05543197258462093, - "grad_norm": 1.7873127472439096, - "learning_rate": 3.993249999908852e-06, - "loss": 1.0159, - "step": 461 - }, - { - "epoch": 0.05555221547526003, - "grad_norm": 1.678376861937162, - "learning_rate": 3.993185903693384e-06, - "loss": 1.0769, - "step": 462 - }, - { - "epoch": 0.05567245836589912, - "grad_norm": 1.9090463108781925, - "learning_rate": 3.9931215051131995e-06, - "loss": 1.0365, - "step": 463 - }, - { - "epoch": 0.05579270125653821, - "grad_norm": 1.4752674377579598, - "learning_rate": 3.993056804178068e-06, - "loss": 1.0091, - "step": 464 - }, - { - "epoch": 0.0559129441471773, - "grad_norm": 1.988695082850335, - "learning_rate": 3.992991800897803e-06, - "loss": 1.049, - "step": 465 - }, - { - "epoch": 0.05603318703781639, - "grad_norm": 2.1459743737346515, - "learning_rate": 3.9929264952822665e-06, - "loss": 1.1008, - "step": 466 - }, - { - "epoch": 0.05615342992845548, - "grad_norm": 1.7845237496234032, - "learning_rate": 3.992860887341366e-06, - "loss": 1.0933, - "step": 467 - }, - { - "epoch": 0.056273672819094574, - "grad_norm": 1.9087336260599788, - "learning_rate": 3.992794977085052e-06, - "loss": 1.0256, - "step": 468 - }, - { - "epoch": 0.056393915709733664, - "grad_norm": 1.7779881912457205, - "learning_rate": 3.992728764523326e-06, - "loss": 1.0532, - "step": 469 - }, - { - "epoch": 0.05651415860037275, - "grad_norm": 1.492764426342315, - "learning_rate": 3.99266224966623e-06, - "loss": 1.0098, - "step": 470 - }, - { - "epoch": 0.05663440149101184, - "grad_norm": 1.7809661505222467, - "learning_rate": 3.992595432523855e-06, - "loss": 1.0808, - "step": 471 - }, - { - "epoch": 0.05675464438165093, - "grad_norm": 1.8064945966110275, - "learning_rate": 3.992528313106338e-06, - "loss": 1.068, - "step": 472 - }, - { - "epoch": 0.05687488727229002, - "grad_norm": 2.1308833189964314, - "learning_rate": 3.9924608914238595e-06, - "loss": 1.027, - "step": 473 - }, - { - "epoch": 0.05699513016292912, - "grad_norm": 2.15509776413643, - "learning_rate": 3.992393167486648e-06, - "loss": 1.0465, - "step": 474 - }, - { - "epoch": 0.05711537305356821, - "grad_norm": 2.3945148212311107, - "learning_rate": 3.992325141304977e-06, - "loss": 1.0141, - "step": 475 - }, - { - "epoch": 0.0572356159442073, - "grad_norm": 2.9205995111477714, - "learning_rate": 3.992256812889166e-06, - "loss": 1.0652, - "step": 476 - }, - { - "epoch": 0.05735585883484639, - "grad_norm": 2.0618396048559022, - "learning_rate": 3.992188182249582e-06, - "loss": 0.9776, - "step": 477 - }, - { - "epoch": 0.05747610172548548, - "grad_norm": 2.0983716146276263, - "learning_rate": 3.992119249396633e-06, - "loss": 1.122, - "step": 478 - }, - { - "epoch": 0.05759634461612457, - "grad_norm": 1.659290761300107, - "learning_rate": 3.992050014340778e-06, - "loss": 1.0265, - "step": 479 - }, - { - "epoch": 0.057716587506763666, - "grad_norm": 1.325998061732491, - "learning_rate": 3.99198047709252e-06, - "loss": 0.7745, - "step": 480 - }, - { - "epoch": 0.057836830397402755, - "grad_norm": 1.718087155233018, - "learning_rate": 3.991910637662408e-06, - "loss": 0.9905, - "step": 481 - }, - { - "epoch": 0.057957073288041845, - "grad_norm": 1.6571326872673289, - "learning_rate": 3.9918404960610355e-06, - "loss": 1.0132, - "step": 482 - }, - { - "epoch": 0.058077316178680935, - "grad_norm": 2.000171534811616, - "learning_rate": 3.991770052299043e-06, - "loss": 0.9792, - "step": 483 - }, - { - "epoch": 0.058197559069320025, - "grad_norm": 2.049960540763886, - "learning_rate": 3.991699306387118e-06, - "loss": 1.0862, - "step": 484 - }, - { - "epoch": 0.058317801959959115, - "grad_norm": 1.5895538440667103, - "learning_rate": 3.991628258335991e-06, - "loss": 0.995, - "step": 485 - }, - { - "epoch": 0.05843804485059821, - "grad_norm": 2.738108935627923, - "learning_rate": 3.991556908156442e-06, - "loss": 1.0828, - "step": 486 - }, - { - "epoch": 0.0585582877412373, - "grad_norm": 1.6801053259394185, - "learning_rate": 3.9914852558592914e-06, - "loss": 1.0742, - "step": 487 - }, - { - "epoch": 0.05867853063187639, - "grad_norm": 3.1475007814826026, - "learning_rate": 3.991413301455413e-06, - "loss": 1.0263, - "step": 488 - }, - { - "epoch": 0.05879877352251548, - "grad_norm": 1.9825354632829242, - "learning_rate": 3.991341044955719e-06, - "loss": 0.9779, - "step": 489 - }, - { - "epoch": 0.05891901641315457, - "grad_norm": 2.0778015573379536, - "learning_rate": 3.991268486371172e-06, - "loss": 1.023, - "step": 490 - }, - { - "epoch": 0.05903925930379366, - "grad_norm": 2.250788020845747, - "learning_rate": 3.991195625712779e-06, - "loss": 1.0761, - "step": 491 - }, - { - "epoch": 0.05915950219443276, - "grad_norm": 1.7910958275262543, - "learning_rate": 3.991122462991592e-06, - "loss": 1.0231, - "step": 492 - }, - { - "epoch": 0.05927974508507185, - "grad_norm": 5.3742602526812755, - "learning_rate": 3.991048998218712e-06, - "loss": 1.0161, - "step": 493 - }, - { - "epoch": 0.05939998797571094, - "grad_norm": 2.166809101802645, - "learning_rate": 3.990975231405281e-06, - "loss": 0.9666, - "step": 494 - }, - { - "epoch": 0.05952023086635003, - "grad_norm": 2.0195549791960015, - "learning_rate": 3.990901162562491e-06, - "loss": 0.9865, - "step": 495 - }, - { - "epoch": 0.05964047375698912, - "grad_norm": 2.178410090830978, - "learning_rate": 3.9908267917015765e-06, - "loss": 1.1037, - "step": 496 - }, - { - "epoch": 0.059760716647628206, - "grad_norm": 1.861178317050652, - "learning_rate": 3.990752118833821e-06, - "loss": 1.1301, - "step": 497 - }, - { - "epoch": 0.0598809595382673, - "grad_norm": 1.7089584958978068, - "learning_rate": 3.990677143970553e-06, - "loss": 0.984, - "step": 498 - }, - { - "epoch": 0.06000120242890639, - "grad_norm": 2.3107061903549164, - "learning_rate": 3.990601867123144e-06, - "loss": 1.0168, - "step": 499 - }, - { - "epoch": 0.06012144531954548, - "grad_norm": 3.55149251698572, - "learning_rate": 3.990526288303014e-06, - "loss": 1.0597, - "step": 500 - }, - { - "epoch": 0.06024168821018457, - "grad_norm": 1.7458996419521589, - "learning_rate": 3.9904504075216295e-06, - "loss": 1.1088, - "step": 501 - }, - { - "epoch": 0.06036193110082366, - "grad_norm": 1.988849551180643, - "learning_rate": 3.990374224790501e-06, - "loss": 1.144, - "step": 502 - }, - { - "epoch": 0.06048217399146275, - "grad_norm": 1.789043709602128, - "learning_rate": 3.990297740121185e-06, - "loss": 0.914, - "step": 503 - }, - { - "epoch": 0.06060241688210185, - "grad_norm": 1.6378232738749725, - "learning_rate": 3.990220953525284e-06, - "loss": 0.9855, - "step": 504 - }, - { - "epoch": 0.06072265977274094, - "grad_norm": 2.2013957494501124, - "learning_rate": 3.9901438650144465e-06, - "loss": 0.9516, - "step": 505 - }, - { - "epoch": 0.06084290266338003, - "grad_norm": 2.555245341486408, - "learning_rate": 3.990066474600367e-06, - "loss": 1.1241, - "step": 506 - }, - { - "epoch": 0.06096314555401912, - "grad_norm": 1.6613236216240324, - "learning_rate": 3.989988782294786e-06, - "loss": 0.8822, - "step": 507 - }, - { - "epoch": 0.06108338844465821, - "grad_norm": 1.6908935051607117, - "learning_rate": 3.989910788109489e-06, - "loss": 1.1508, - "step": 508 - }, - { - "epoch": 0.0612036313352973, - "grad_norm": 1.9977711536718206, - "learning_rate": 3.989832492056307e-06, - "loss": 0.9539, - "step": 509 - }, - { - "epoch": 0.06132387422593639, - "grad_norm": 1.8294353516596589, - "learning_rate": 3.989753894147119e-06, - "loss": 1.0094, - "step": 510 - }, - { - "epoch": 0.061444117116575485, - "grad_norm": 1.6562524471624758, - "learning_rate": 3.989674994393846e-06, - "loss": 0.9993, - "step": 511 - }, - { - "epoch": 0.061564360007214575, - "grad_norm": 1.772883324434723, - "learning_rate": 3.98959579280846e-06, - "loss": 1.1443, - "step": 512 - }, - { - "epoch": 0.061684602897853665, - "grad_norm": 2.070339217103553, - "learning_rate": 3.989516289402973e-06, - "loss": 1.0375, - "step": 513 - }, - { - "epoch": 0.061804845788492754, - "grad_norm": 2.216877214093497, - "learning_rate": 3.989436484189447e-06, - "loss": 1.0244, - "step": 514 - }, - { - "epoch": 0.061925088679131844, - "grad_norm": 2.548903517503267, - "learning_rate": 3.9893563771799885e-06, - "loss": 1.0182, - "step": 515 - }, - { - "epoch": 0.062045331569770934, - "grad_norm": 1.9236611850554441, - "learning_rate": 3.989275968386749e-06, - "loss": 1.067, - "step": 516 - }, - { - "epoch": 0.06216557446041003, - "grad_norm": 1.9615844631986172, - "learning_rate": 3.989195257821926e-06, - "loss": 0.9706, - "step": 517 - }, - { - "epoch": 0.06228581735104912, - "grad_norm": 1.8957668404568782, - "learning_rate": 3.989114245497765e-06, - "loss": 1.0541, - "step": 518 - }, - { - "epoch": 0.06240606024168821, - "grad_norm": 1.960995616279616, - "learning_rate": 3.989032931426554e-06, - "loss": 1.1572, - "step": 519 - }, - { - "epoch": 0.06252630313232731, - "grad_norm": 1.8417160971151463, - "learning_rate": 3.9889513156206295e-06, - "loss": 1.0735, - "step": 520 - }, - { - "epoch": 0.06264654602296639, - "grad_norm": 3.854367238917299, - "learning_rate": 3.988869398092371e-06, - "loss": 0.9268, - "step": 521 - }, - { - "epoch": 0.06276678891360549, - "grad_norm": 2.559018817565589, - "learning_rate": 3.988787178854206e-06, - "loss": 0.9934, - "step": 522 - }, - { - "epoch": 0.06288703180424457, - "grad_norm": 1.8880655919173794, - "learning_rate": 3.988704657918608e-06, - "loss": 1.0793, - "step": 523 - }, - { - "epoch": 0.06300727469488367, - "grad_norm": 2.838849214930639, - "learning_rate": 3.988621835298094e-06, - "loss": 1.0151, - "step": 524 - }, - { - "epoch": 0.06312751758552275, - "grad_norm": 1.8525043779905739, - "learning_rate": 3.988538711005229e-06, - "loss": 1.1209, - "step": 525 - }, - { - "epoch": 0.06324776047616185, - "grad_norm": 2.2965024269667578, - "learning_rate": 3.988455285052622e-06, - "loss": 1.09, - "step": 526 - }, - { - "epoch": 0.06336800336680094, - "grad_norm": 1.9513808129966694, - "learning_rate": 3.98837155745293e-06, - "loss": 1.0367, - "step": 527 - }, - { - "epoch": 0.06348824625744003, - "grad_norm": 1.826041836112044, - "learning_rate": 3.988287528218854e-06, - "loss": 0.9727, - "step": 528 - }, - { - "epoch": 0.06360848914807912, - "grad_norm": 1.8196602500042574, - "learning_rate": 3.98820319736314e-06, - "loss": 1.1055, - "step": 529 - }, - { - "epoch": 0.0637287320387182, - "grad_norm": 1.9885253156892635, - "learning_rate": 3.988118564898582e-06, - "loss": 1.0556, - "step": 530 - }, - { - "epoch": 0.0638489749293573, - "grad_norm": 2.4418907722332843, - "learning_rate": 3.988033630838019e-06, - "loss": 1.0974, - "step": 531 - }, - { - "epoch": 0.0639692178199964, - "grad_norm": 1.8494315459697743, - "learning_rate": 3.987948395194334e-06, - "loss": 1.0777, - "step": 532 - }, - { - "epoch": 0.06408946071063548, - "grad_norm": 2.1320155404904013, - "learning_rate": 3.987862857980458e-06, - "loss": 0.9888, - "step": 533 - }, - { - "epoch": 0.06420970360127458, - "grad_norm": 4.407448896348746, - "learning_rate": 3.987777019209368e-06, - "loss": 0.9756, - "step": 534 - }, - { - "epoch": 0.06432994649191366, - "grad_norm": 1.6292641518671698, - "learning_rate": 3.987690878894084e-06, - "loss": 1.0125, - "step": 535 - }, - { - "epoch": 0.06445018938255276, - "grad_norm": 2.6058437186256738, - "learning_rate": 3.987604437047673e-06, - "loss": 1.0509, - "step": 536 - }, - { - "epoch": 0.06457043227319184, - "grad_norm": 1.8439305103969759, - "learning_rate": 3.987517693683251e-06, - "loss": 0.9872, - "step": 537 - }, - { - "epoch": 0.06469067516383094, - "grad_norm": 2.2841170597175346, - "learning_rate": 3.9874306488139745e-06, - "loss": 1.1654, - "step": 538 - }, - { - "epoch": 0.06481091805447003, - "grad_norm": 1.8724861459257742, - "learning_rate": 3.987343302453049e-06, - "loss": 1.0759, - "step": 539 - }, - { - "epoch": 0.06493116094510912, - "grad_norm": 1.6497956236062727, - "learning_rate": 3.987255654613724e-06, - "loss": 1.0345, - "step": 540 - }, - { - "epoch": 0.06505140383574821, - "grad_norm": 2.3230654636047947, - "learning_rate": 3.987167705309296e-06, - "loss": 0.9101, - "step": 541 - }, - { - "epoch": 0.0651716467263873, - "grad_norm": 1.7621636008779535, - "learning_rate": 3.987079454553108e-06, - "loss": 1.1541, - "step": 542 - }, - { - "epoch": 0.0652918896170264, - "grad_norm": 1.7161022252131695, - "learning_rate": 3.986990902358546e-06, - "loss": 1.1243, - "step": 543 - }, - { - "epoch": 0.06541213250766549, - "grad_norm": 1.9956090529008064, - "learning_rate": 3.986902048739045e-06, - "loss": 1.1287, - "step": 544 - }, - { - "epoch": 0.06553237539830457, - "grad_norm": 2.767935009050059, - "learning_rate": 3.986812893708082e-06, - "loss": 1.0028, - "step": 545 - }, - { - "epoch": 0.06565261828894367, - "grad_norm": 2.0128729907017924, - "learning_rate": 3.9867234372791826e-06, - "loss": 1.0204, - "step": 546 - }, - { - "epoch": 0.06577286117958275, - "grad_norm": 1.9617185215131414, - "learning_rate": 3.986633679465918e-06, - "loss": 1.0774, - "step": 547 - }, - { - "epoch": 0.06589310407022185, - "grad_norm": 2.156237063161769, - "learning_rate": 3.986543620281904e-06, - "loss": 1.0187, - "step": 548 - }, - { - "epoch": 0.06601334696086093, - "grad_norm": 1.4923216275155171, - "learning_rate": 3.986453259740802e-06, - "loss": 1.1103, - "step": 549 - }, - { - "epoch": 0.06613358985150003, - "grad_norm": 2.8630157805609824, - "learning_rate": 3.986362597856319e-06, - "loss": 0.9982, - "step": 550 - }, - { - "epoch": 0.06625383274213913, - "grad_norm": 2.8106401015799105, - "learning_rate": 3.986271634642211e-06, - "loss": 1.0143, - "step": 551 - }, - { - "epoch": 0.06637407563277821, - "grad_norm": 2.3833653543450866, - "learning_rate": 3.986180370112274e-06, - "loss": 1.0211, - "step": 552 - }, - { - "epoch": 0.0664943185234173, - "grad_norm": 10.719558551865603, - "learning_rate": 3.986088804280354e-06, - "loss": 0.953, - "step": 553 - }, - { - "epoch": 0.06661456141405639, - "grad_norm": 2.33408662580023, - "learning_rate": 3.985996937160342e-06, - "loss": 1.1358, - "step": 554 - }, - { - "epoch": 0.06673480430469549, - "grad_norm": 2.3629751464337443, - "learning_rate": 3.985904768766173e-06, - "loss": 0.9067, - "step": 555 - }, - { - "epoch": 0.06685504719533458, - "grad_norm": 2.4606421883825464, - "learning_rate": 3.98581229911183e-06, - "loss": 0.9675, - "step": 556 - }, - { - "epoch": 0.06697529008597367, - "grad_norm": 1.6367710442029935, - "learning_rate": 3.985719528211341e-06, - "loss": 1.1158, - "step": 557 - }, - { - "epoch": 0.06709553297661276, - "grad_norm": 1.0203845666083857, - "learning_rate": 3.985626456078777e-06, - "loss": 0.8704, - "step": 558 - }, - { - "epoch": 0.06721577586725185, - "grad_norm": 2.068845293783316, - "learning_rate": 3.985533082728259e-06, - "loss": 1.0699, - "step": 559 - }, - { - "epoch": 0.06733601875789094, - "grad_norm": 1.6842447602092994, - "learning_rate": 3.985439408173951e-06, - "loss": 0.9522, - "step": 560 - }, - { - "epoch": 0.06745626164853002, - "grad_norm": 2.0123062609185345, - "learning_rate": 3.9853454324300634e-06, - "loss": 0.9133, - "step": 561 - }, - { - "epoch": 0.06757650453916912, - "grad_norm": 2.2121720927726196, - "learning_rate": 3.985251155510852e-06, - "loss": 0.9817, - "step": 562 - }, - { - "epoch": 0.06769674742980822, - "grad_norm": 1.9412184073683185, - "learning_rate": 3.98515657743062e-06, - "loss": 1.0147, - "step": 563 - }, - { - "epoch": 0.0678169903204473, - "grad_norm": 1.8356520410558532, - "learning_rate": 3.985061698203711e-06, - "loss": 0.9761, - "step": 564 - }, - { - "epoch": 0.0679372332110864, - "grad_norm": 0.934293221559773, - "learning_rate": 3.984966517844523e-06, - "loss": 0.8681, - "step": 565 - }, - { - "epoch": 0.06805747610172548, - "grad_norm": 2.592837897180128, - "learning_rate": 3.984871036367492e-06, - "loss": 1.0131, - "step": 566 - }, - { - "epoch": 0.06817771899236458, - "grad_norm": 2.0144916275543494, - "learning_rate": 3.984775253787102e-06, - "loss": 1.0423, - "step": 567 - }, - { - "epoch": 0.06829796188300366, - "grad_norm": 3.3659981665059453, - "learning_rate": 3.984679170117885e-06, - "loss": 1.0882, - "step": 568 - }, - { - "epoch": 0.06841820477364276, - "grad_norm": 2.593156310007618, - "learning_rate": 3.984582785374415e-06, - "loss": 0.9864, - "step": 569 - }, - { - "epoch": 0.06853844766428185, - "grad_norm": 2.3094883458631554, - "learning_rate": 3.9844860995713155e-06, - "loss": 1.0136, - "step": 570 - }, - { - "epoch": 0.06865869055492094, - "grad_norm": 2.285695382448776, - "learning_rate": 3.9843891127232524e-06, - "loss": 1.0201, - "step": 571 - }, - { - "epoch": 0.06877893344556003, - "grad_norm": 2.3953927561036106, - "learning_rate": 3.984291824844938e-06, - "loss": 0.8747, - "step": 572 - }, - { - "epoch": 0.06889917633619912, - "grad_norm": 2.2318852564806515, - "learning_rate": 3.984194235951132e-06, - "loss": 1.0587, - "step": 573 - }, - { - "epoch": 0.06901941922683821, - "grad_norm": 2.6253045223347815, - "learning_rate": 3.9840963460566375e-06, - "loss": 1.0529, - "step": 574 - }, - { - "epoch": 0.06913966211747731, - "grad_norm": 1.5239928969320335, - "learning_rate": 3.983998155176305e-06, - "loss": 1.0959, - "step": 575 - }, - { - "epoch": 0.06925990500811639, - "grad_norm": 0.9899959239490483, - "learning_rate": 3.9838996633250305e-06, - "loss": 0.7902, - "step": 576 - }, - { - "epoch": 0.06938014789875549, - "grad_norm": 1.952462350968349, - "learning_rate": 3.983800870517753e-06, - "loss": 1.0865, - "step": 577 - }, - { - "epoch": 0.06950039078939457, - "grad_norm": 3.418665805037931, - "learning_rate": 3.983701776769463e-06, - "loss": 0.9964, - "step": 578 - }, - { - "epoch": 0.06962063368003367, - "grad_norm": 1.7433620992199474, - "learning_rate": 3.9836023820951885e-06, - "loss": 1.0495, - "step": 579 - }, - { - "epoch": 0.06974087657067275, - "grad_norm": 2.047034221018365, - "learning_rate": 3.983502686510011e-06, - "loss": 0.8962, - "step": 580 - }, - { - "epoch": 0.06986111946131185, - "grad_norm": 1.758147347664539, - "learning_rate": 3.9834026900290525e-06, - "loss": 0.9299, - "step": 581 - }, - { - "epoch": 0.06998136235195095, - "grad_norm": 1.8124406739095353, - "learning_rate": 3.983302392667482e-06, - "loss": 1.2008, - "step": 582 - }, - { - "epoch": 0.07010160524259003, - "grad_norm": 1.6261094780743477, - "learning_rate": 3.983201794440517e-06, - "loss": 1.1408, - "step": 583 - }, - { - "epoch": 0.07022184813322913, - "grad_norm": 2.3738842401216718, - "learning_rate": 3.9831008953634165e-06, - "loss": 0.8793, - "step": 584 - }, - { - "epoch": 0.07034209102386821, - "grad_norm": 1.69642672861329, - "learning_rate": 3.9829996954514864e-06, - "loss": 1.0132, - "step": 585 - }, - { - "epoch": 0.0704623339145073, - "grad_norm": 2.430910544735772, - "learning_rate": 3.982898194720079e-06, - "loss": 1.0426, - "step": 586 - }, - { - "epoch": 0.0705825768051464, - "grad_norm": 1.7704543512250546, - "learning_rate": 3.982796393184592e-06, - "loss": 1.0194, - "step": 587 - }, - { - "epoch": 0.07070281969578548, - "grad_norm": 0.942795421027133, - "learning_rate": 3.98269429086047e-06, - "loss": 0.8495, - "step": 588 - }, - { - "epoch": 0.07082306258642458, - "grad_norm": 2.094945879216674, - "learning_rate": 3.982591887763199e-06, - "loss": 1.0747, - "step": 589 - }, - { - "epoch": 0.07094330547706366, - "grad_norm": 2.212194402247176, - "learning_rate": 3.982489183908316e-06, - "loss": 1.0196, - "step": 590 - }, - { - "epoch": 0.07106354836770276, - "grad_norm": 1.739522700671225, - "learning_rate": 3.982386179311399e-06, - "loss": 1.0467, - "step": 591 - }, - { - "epoch": 0.07118379125834184, - "grad_norm": 2.1661576137664182, - "learning_rate": 3.982282873988075e-06, - "loss": 1.0819, - "step": 592 - }, - { - "epoch": 0.07130403414898094, - "grad_norm": 1.5159027002508465, - "learning_rate": 3.982179267954016e-06, - "loss": 1.075, - "step": 593 - }, - { - "epoch": 0.07142427703962004, - "grad_norm": 4.318362245942623, - "learning_rate": 3.982075361224937e-06, - "loss": 1.1778, - "step": 594 - }, - { - "epoch": 0.07154451993025912, - "grad_norm": 1.723112015756551, - "learning_rate": 3.981971153816602e-06, - "loss": 1.0885, - "step": 595 - }, - { - "epoch": 0.07166476282089822, - "grad_norm": 1.4246164240661394, - "learning_rate": 3.981866645744819e-06, - "loss": 1.1575, - "step": 596 - }, - { - "epoch": 0.0717850057115373, - "grad_norm": 2.1709292980614077, - "learning_rate": 3.9817618370254416e-06, - "loss": 1.0201, - "step": 597 - }, - { - "epoch": 0.0719052486021764, - "grad_norm": 2.140316683430537, - "learning_rate": 3.9816567276743684e-06, - "loss": 1.0739, - "step": 598 - }, - { - "epoch": 0.0720254914928155, - "grad_norm": 1.9654680780745781, - "learning_rate": 3.9815513177075466e-06, - "loss": 0.9743, - "step": 599 - }, - { - "epoch": 0.07214573438345458, - "grad_norm": 1.7070593929807891, - "learning_rate": 3.9814456071409646e-06, - "loss": 0.91, - "step": 600 - }, - { - "epoch": 0.07226597727409367, - "grad_norm": 2.603689246602039, - "learning_rate": 3.981339595990659e-06, - "loss": 1.072, - "step": 601 - }, - { - "epoch": 0.07238622016473276, - "grad_norm": 2.211404164807273, - "learning_rate": 3.981233284272713e-06, - "loss": 1.0138, - "step": 602 - }, - { - "epoch": 0.07250646305537185, - "grad_norm": 1.5304139638133094, - "learning_rate": 3.981126672003253e-06, - "loss": 1.1029, - "step": 603 - }, - { - "epoch": 0.07262670594601094, - "grad_norm": 2.36287031147632, - "learning_rate": 3.981019759198451e-06, - "loss": 0.9947, - "step": 604 - }, - { - "epoch": 0.07274694883665003, - "grad_norm": 1.9493568386786382, - "learning_rate": 3.980912545874528e-06, - "loss": 1.0409, - "step": 605 - }, - { - "epoch": 0.07286719172728913, - "grad_norm": 1.7176665619289297, - "learning_rate": 3.980805032047746e-06, - "loss": 1.0639, - "step": 606 - }, - { - "epoch": 0.07298743461792821, - "grad_norm": 1.848463531336639, - "learning_rate": 3.980697217734415e-06, - "loss": 1.0145, - "step": 607 - }, - { - "epoch": 0.07310767750856731, - "grad_norm": 1.5706933162357324, - "learning_rate": 3.980589102950891e-06, - "loss": 1.1213, - "step": 608 - }, - { - "epoch": 0.07322792039920639, - "grad_norm": 3.5596201449701135, - "learning_rate": 3.9804806877135755e-06, - "loss": 0.9718, - "step": 609 - }, - { - "epoch": 0.07334816328984549, - "grad_norm": 1.8328026812746507, - "learning_rate": 3.980371972038915e-06, - "loss": 1.066, - "step": 610 - }, - { - "epoch": 0.07346840618048459, - "grad_norm": 1.5439187517481325, - "learning_rate": 3.980262955943399e-06, - "loss": 1.0405, - "step": 611 - }, - { - "epoch": 0.07358864907112367, - "grad_norm": 2.503718471776147, - "learning_rate": 3.980153639443569e-06, - "loss": 1.0739, - "step": 612 - }, - { - "epoch": 0.07370889196176277, - "grad_norm": 1.8378434697100523, - "learning_rate": 3.980044022556005e-06, - "loss": 1.0083, - "step": 613 - }, - { - "epoch": 0.07382913485240185, - "grad_norm": 2.1284460287745155, - "learning_rate": 3.9799341052973375e-06, - "loss": 0.9306, - "step": 614 - }, - { - "epoch": 0.07394937774304094, - "grad_norm": 2.1435642528972227, - "learning_rate": 3.979823887684241e-06, - "loss": 0.9621, - "step": 615 - }, - { - "epoch": 0.07406962063368003, - "grad_norm": 2.0976651154229558, - "learning_rate": 3.979713369733434e-06, - "loss": 1.0596, - "step": 616 - }, - { - "epoch": 0.07418986352431912, - "grad_norm": 1.98923971460993, - "learning_rate": 3.979602551461683e-06, - "loss": 1.0516, - "step": 617 - }, - { - "epoch": 0.07431010641495822, - "grad_norm": 2.044047258790019, - "learning_rate": 3.979491432885799e-06, - "loss": 1.127, - "step": 618 - }, - { - "epoch": 0.0744303493055973, - "grad_norm": 2.4313142978825035, - "learning_rate": 3.97938001402264e-06, - "loss": 1.0373, - "step": 619 - }, - { - "epoch": 0.0745505921962364, - "grad_norm": 2.740211753180857, - "learning_rate": 3.979268294889105e-06, - "loss": 1.0208, - "step": 620 - }, - { - "epoch": 0.07467083508687548, - "grad_norm": 1.6316646209771513, - "learning_rate": 3.979156275502143e-06, - "loss": 0.9541, - "step": 621 - }, - { - "epoch": 0.07479107797751458, - "grad_norm": 2.6895869823601455, - "learning_rate": 3.979043955878749e-06, - "loss": 1.1247, - "step": 622 - }, - { - "epoch": 0.07491132086815366, - "grad_norm": 2.7027915793059205, - "learning_rate": 3.978931336035959e-06, - "loss": 1.0355, - "step": 623 - }, - { - "epoch": 0.07503156375879276, - "grad_norm": 2.044511318437607, - "learning_rate": 3.9788184159908595e-06, - "loss": 1.0361, - "step": 624 - }, - { - "epoch": 0.07515180664943186, - "grad_norm": 2.1398443204635966, - "learning_rate": 3.97870519576058e-06, - "loss": 1.031, - "step": 625 - }, - { - "epoch": 0.07527204954007094, - "grad_norm": 2.864084170228814, - "learning_rate": 3.978591675362295e-06, - "loss": 1.0118, - "step": 626 - }, - { - "epoch": 0.07539229243071004, - "grad_norm": 1.6538971797530426, - "learning_rate": 3.978477854813226e-06, - "loss": 1.079, - "step": 627 - }, - { - "epoch": 0.07551253532134912, - "grad_norm": 2.3839003784243396, - "learning_rate": 3.97836373413064e-06, - "loss": 1.032, - "step": 628 - }, - { - "epoch": 0.07563277821198822, - "grad_norm": 1.5634614656614196, - "learning_rate": 3.978249313331848e-06, - "loss": 0.9504, - "step": 629 - }, - { - "epoch": 0.07575302110262731, - "grad_norm": 2.5361700396767186, - "learning_rate": 3.978134592434208e-06, - "loss": 0.8293, - "step": 630 - }, - { - "epoch": 0.0758732639932664, - "grad_norm": 1.1322710519064556, - "learning_rate": 3.978019571455123e-06, - "loss": 0.8445, - "step": 631 - }, - { - "epoch": 0.07599350688390549, - "grad_norm": 4.167993232633695, - "learning_rate": 3.977904250412042e-06, - "loss": 1.0452, - "step": 632 - }, - { - "epoch": 0.07611374977454458, - "grad_norm": 2.269896591116607, - "learning_rate": 3.97778862932246e-06, - "loss": 1.058, - "step": 633 - }, - { - "epoch": 0.07623399266518367, - "grad_norm": 1.9966669920516134, - "learning_rate": 3.9776727082039144e-06, - "loss": 1.1406, - "step": 634 - }, - { - "epoch": 0.07635423555582276, - "grad_norm": 0.9738134524590039, - "learning_rate": 3.977556487073991e-06, - "loss": 0.7811, - "step": 635 - }, - { - "epoch": 0.07647447844646185, - "grad_norm": 1.6575569897918923, - "learning_rate": 3.97743996595032e-06, - "loss": 1.0141, - "step": 636 - }, - { - "epoch": 0.07659472133710095, - "grad_norm": 1.4762895119827641, - "learning_rate": 3.9773231448505804e-06, - "loss": 1.0214, - "step": 637 - }, - { - "epoch": 0.07671496422774003, - "grad_norm": 1.7819741099025646, - "learning_rate": 3.977206023792491e-06, - "loss": 0.9794, - "step": 638 - }, - { - "epoch": 0.07683520711837913, - "grad_norm": 3.625361852014835, - "learning_rate": 3.97708860279382e-06, - "loss": 1.0176, - "step": 639 - }, - { - "epoch": 0.07695545000901821, - "grad_norm": 1.7509776518052942, - "learning_rate": 3.97697088187238e-06, - "loss": 0.9961, - "step": 640 - }, - { - "epoch": 0.07707569289965731, - "grad_norm": 2.1616116634048588, - "learning_rate": 3.976852861046029e-06, - "loss": 1.1193, - "step": 641 - }, - { - "epoch": 0.0771959357902964, - "grad_norm": 1.4960586902305195, - "learning_rate": 3.97673454033267e-06, - "loss": 1.0009, - "step": 642 - }, - { - "epoch": 0.07731617868093549, - "grad_norm": 1.782239940018287, - "learning_rate": 3.976615919750254e-06, - "loss": 1.0224, - "step": 643 - }, - { - "epoch": 0.07743642157157458, - "grad_norm": 2.02920452711855, - "learning_rate": 3.976496999316775e-06, - "loss": 1.0768, - "step": 644 - }, - { - "epoch": 0.07755666446221367, - "grad_norm": 2.6333103331837004, - "learning_rate": 3.976377779050271e-06, - "loss": 1.0424, - "step": 645 - }, - { - "epoch": 0.07767690735285276, - "grad_norm": 2.0083549746327414, - "learning_rate": 3.976258258968831e-06, - "loss": 1.042, - "step": 646 - }, - { - "epoch": 0.07779715024349185, - "grad_norm": 1.9823233744386781, - "learning_rate": 3.976138439090583e-06, - "loss": 0.9515, - "step": 647 - }, - { - "epoch": 0.07791739313413094, - "grad_norm": 1.8380684031853134, - "learning_rate": 3.976018319433706e-06, - "loss": 1.0466, - "step": 648 - }, - { - "epoch": 0.07803763602477004, - "grad_norm": 2.3361821381421577, - "learning_rate": 3.9758979000164205e-06, - "loss": 1.1304, - "step": 649 - }, - { - "epoch": 0.07815787891540912, - "grad_norm": 2.649857671982157, - "learning_rate": 3.975777180856995e-06, - "loss": 0.927, - "step": 650 - }, - { - "epoch": 0.07827812180604822, - "grad_norm": 2.0845214380528456, - "learning_rate": 3.975656161973742e-06, - "loss": 1.0741, - "step": 651 - }, - { - "epoch": 0.0783983646966873, - "grad_norm": 2.2231491963525896, - "learning_rate": 3.9755348433850194e-06, - "loss": 1.0985, - "step": 652 - }, - { - "epoch": 0.0785186075873264, - "grad_norm": 1.2291808639396113, - "learning_rate": 3.975413225109232e-06, - "loss": 0.9128, - "step": 653 - }, - { - "epoch": 0.0786388504779655, - "grad_norm": 5.272543326969028, - "learning_rate": 3.975291307164829e-06, - "loss": 1.1367, - "step": 654 - }, - { - "epoch": 0.07875909336860458, - "grad_norm": 1.8246148473795987, - "learning_rate": 3.975169089570306e-06, - "loss": 1.0596, - "step": 655 - }, - { - "epoch": 0.07887933625924368, - "grad_norm": 1.8921631160207606, - "learning_rate": 3.975046572344202e-06, - "loss": 1.1208, - "step": 656 - }, - { - "epoch": 0.07899957914988276, - "grad_norm": 1.9390618581716996, - "learning_rate": 3.974923755505103e-06, - "loss": 0.9225, - "step": 657 - }, - { - "epoch": 0.07911982204052186, - "grad_norm": 1.647381846774275, - "learning_rate": 3.974800639071641e-06, - "loss": 1.1125, - "step": 658 - }, - { - "epoch": 0.07924006493116094, - "grad_norm": 1.9079414742728584, - "learning_rate": 3.974677223062492e-06, - "loss": 1.2103, - "step": 659 - }, - { - "epoch": 0.07936030782180004, - "grad_norm": 1.897924597256363, - "learning_rate": 3.974553507496378e-06, - "loss": 0.9508, - "step": 660 - }, - { - "epoch": 0.07948055071243913, - "grad_norm": 2.0829157855902, - "learning_rate": 3.974429492392068e-06, - "loss": 1.0881, - "step": 661 - }, - { - "epoch": 0.07960079360307822, - "grad_norm": 1.8941782199671189, - "learning_rate": 3.974305177768373e-06, - "loss": 1.1046, - "step": 662 - }, - { - "epoch": 0.07972103649371731, - "grad_norm": 2.0307820881589937, - "learning_rate": 3.974180563644152e-06, - "loss": 1.0566, - "step": 663 - }, - { - "epoch": 0.0798412793843564, - "grad_norm": 2.3236127303744176, - "learning_rate": 3.97405565003831e-06, - "loss": 1.1, - "step": 664 - }, - { - "epoch": 0.07996152227499549, - "grad_norm": 2.016636557656404, - "learning_rate": 3.973930436969794e-06, - "loss": 0.9976, - "step": 665 - }, - { - "epoch": 0.08008176516563459, - "grad_norm": 1.6622492264074418, - "learning_rate": 3.973804924457602e-06, - "loss": 1.0631, - "step": 666 - }, - { - "epoch": 0.08020200805627367, - "grad_norm": 1.5525473428040606, - "learning_rate": 3.973679112520771e-06, - "loss": 1.0567, - "step": 667 - }, - { - "epoch": 0.08032225094691277, - "grad_norm": 1.7454042033229136, - "learning_rate": 3.973553001178389e-06, - "loss": 1.1947, - "step": 668 - }, - { - "epoch": 0.08044249383755185, - "grad_norm": 1.8083916503831623, - "learning_rate": 3.973426590449585e-06, - "loss": 0.9548, - "step": 669 - }, - { - "epoch": 0.08056273672819095, - "grad_norm": 1.769876745318378, - "learning_rate": 3.9732998803535364e-06, - "loss": 0.9611, - "step": 670 - }, - { - "epoch": 0.08068297961883003, - "grad_norm": 1.9855489187100626, - "learning_rate": 3.973172870909465e-06, - "loss": 1.0668, - "step": 671 - }, - { - "epoch": 0.08080322250946913, - "grad_norm": 2.358949736612309, - "learning_rate": 3.973045562136638e-06, - "loss": 1.0231, - "step": 672 - }, - { - "epoch": 0.08092346540010822, - "grad_norm": 1.8200924457923748, - "learning_rate": 3.972917954054368e-06, - "loss": 1.1147, - "step": 673 - }, - { - "epoch": 0.08104370829074731, - "grad_norm": 2.208056186583102, - "learning_rate": 3.972790046682013e-06, - "loss": 1.0108, - "step": 674 - }, - { - "epoch": 0.0811639511813864, - "grad_norm": 1.5087890625, - "learning_rate": 3.972661840038977e-06, - "loss": 0.9944, - "step": 675 - }, - { - "epoch": 0.08128419407202549, - "grad_norm": 1.8940153382998481, - "learning_rate": 3.972533334144707e-06, - "loss": 1.0368, - "step": 676 - }, - { - "epoch": 0.08140443696266458, - "grad_norm": 1.9247540874207403, - "learning_rate": 3.972404529018699e-06, - "loss": 0.9942, - "step": 677 - }, - { - "epoch": 0.08152467985330367, - "grad_norm": 1.7258768921197507, - "learning_rate": 3.972275424680493e-06, - "loss": 1.0609, - "step": 678 - }, - { - "epoch": 0.08164492274394276, - "grad_norm": 1.81477246384644, - "learning_rate": 3.972146021149673e-06, - "loss": 1.1165, - "step": 679 - }, - { - "epoch": 0.08176516563458186, - "grad_norm": 1.9734287071485193, - "learning_rate": 3.972016318445868e-06, - "loss": 0.9974, - "step": 680 - }, - { - "epoch": 0.08188540852522094, - "grad_norm": 1.6869561060614695, - "learning_rate": 3.971886316588757e-06, - "loss": 1.1207, - "step": 681 - }, - { - "epoch": 0.08200565141586004, - "grad_norm": 2.3116475157802787, - "learning_rate": 3.9717560155980595e-06, - "loss": 0.9524, - "step": 682 - }, - { - "epoch": 0.08212589430649912, - "grad_norm": 1.781362295793793, - "learning_rate": 3.971625415493542e-06, - "loss": 1.1266, - "step": 683 - }, - { - "epoch": 0.08224613719713822, - "grad_norm": 1.8345865026974106, - "learning_rate": 3.971494516295017e-06, - "loss": 1.0763, - "step": 684 - }, - { - "epoch": 0.08236638008777732, - "grad_norm": 1.907048355269616, - "learning_rate": 3.971363318022341e-06, - "loss": 1.0561, - "step": 685 - }, - { - "epoch": 0.0824866229784164, - "grad_norm": 1.718543161604363, - "learning_rate": 3.971231820695417e-06, - "loss": 0.8893, - "step": 686 - }, - { - "epoch": 0.0826068658690555, - "grad_norm": 1.7432175401023136, - "learning_rate": 3.971100024334193e-06, - "loss": 1.0195, - "step": 687 - }, - { - "epoch": 0.08272710875969458, - "grad_norm": 1.767261388230687, - "learning_rate": 3.970967928958663e-06, - "loss": 1.0652, - "step": 688 - }, - { - "epoch": 0.08284735165033368, - "grad_norm": 1.5910501987251318, - "learning_rate": 3.970835534588865e-06, - "loss": 1.0393, - "step": 689 - }, - { - "epoch": 0.08296759454097276, - "grad_norm": 1.5867047638419938, - "learning_rate": 3.970702841244883e-06, - "loss": 1.0636, - "step": 690 - }, - { - "epoch": 0.08308783743161186, - "grad_norm": 1.6986584671432443, - "learning_rate": 3.970569848946847e-06, - "loss": 1.0302, - "step": 691 - }, - { - "epoch": 0.08320808032225095, - "grad_norm": 1.976884416141089, - "learning_rate": 3.970436557714932e-06, - "loss": 1.0318, - "step": 692 - }, - { - "epoch": 0.08332832321289003, - "grad_norm": 1.8235571762338292, - "learning_rate": 3.970302967569358e-06, - "loss": 1.0602, - "step": 693 - }, - { - "epoch": 0.08344856610352913, - "grad_norm": 1.8095454107754376, - "learning_rate": 3.9701690785303896e-06, - "loss": 0.8924, - "step": 694 - }, - { - "epoch": 0.08356880899416821, - "grad_norm": 2.0544363818443974, - "learning_rate": 3.970034890618339e-06, - "loss": 1.0892, - "step": 695 - }, - { - "epoch": 0.08368905188480731, - "grad_norm": 1.8902633573324783, - "learning_rate": 3.969900403853562e-06, - "loss": 1.083, - "step": 696 - }, - { - "epoch": 0.08380929477544641, - "grad_norm": 1.5405814329103875, - "learning_rate": 3.96976561825646e-06, - "loss": 0.9869, - "step": 697 - }, - { - "epoch": 0.08392953766608549, - "grad_norm": 2.1301562637892086, - "learning_rate": 3.969630533847479e-06, - "loss": 1.076, - "step": 698 - }, - { - "epoch": 0.08404978055672459, - "grad_norm": 1.904120522182731, - "learning_rate": 3.969495150647113e-06, - "loss": 1.0536, - "step": 699 - }, - { - "epoch": 0.08417002344736367, - "grad_norm": 1.5169254504892187, - "learning_rate": 3.969359468675899e-06, - "loss": 0.9687, - "step": 700 - }, - { - "epoch": 0.08429026633800277, - "grad_norm": 1.845260130989473, - "learning_rate": 3.969223487954418e-06, - "loss": 1.098, - "step": 701 - }, - { - "epoch": 0.08441050922864185, - "grad_norm": 1.9059260358839358, - "learning_rate": 3.969087208503301e-06, - "loss": 1.0261, - "step": 702 - }, - { - "epoch": 0.08453075211928095, - "grad_norm": 2.3513093222760415, - "learning_rate": 3.968950630343219e-06, - "loss": 1.0455, - "step": 703 - }, - { - "epoch": 0.08465099500992004, - "grad_norm": 1.8111278338463128, - "learning_rate": 3.968813753494892e-06, - "loss": 1.137, - "step": 704 - }, - { - "epoch": 0.08477123790055913, - "grad_norm": 1.9715819199966536, - "learning_rate": 3.968676577979084e-06, - "loss": 0.9553, - "step": 705 - }, - { - "epoch": 0.08489148079119822, - "grad_norm": 1.870711890897288, - "learning_rate": 3.968539103816605e-06, - "loss": 0.9814, - "step": 706 - }, - { - "epoch": 0.0850117236818373, - "grad_norm": 1.7785754285276063, - "learning_rate": 3.9684013310283085e-06, - "loss": 1.0979, - "step": 707 - }, - { - "epoch": 0.0851319665724764, - "grad_norm": 1.8261744208496626, - "learning_rate": 3.9682632596350956e-06, - "loss": 0.8499, - "step": 708 - }, - { - "epoch": 0.0852522094631155, - "grad_norm": 1.7533953969846945, - "learning_rate": 3.968124889657911e-06, - "loss": 0.9868, - "step": 709 - }, - { - "epoch": 0.08537245235375458, - "grad_norm": 2.4171190386535546, - "learning_rate": 3.967986221117746e-06, - "loss": 1.111, - "step": 710 - }, - { - "epoch": 0.08549269524439368, - "grad_norm": 2.1547956122813106, - "learning_rate": 3.967847254035635e-06, - "loss": 1.0704, - "step": 711 - }, - { - "epoch": 0.08561293813503276, - "grad_norm": 2.52658527201226, - "learning_rate": 3.967707988432661e-06, - "loss": 1.0743, - "step": 712 - }, - { - "epoch": 0.08573318102567186, - "grad_norm": 2.2635001906556456, - "learning_rate": 3.967568424329949e-06, - "loss": 1.0794, - "step": 713 - }, - { - "epoch": 0.08585342391631094, - "grad_norm": 0.909968737494909, - "learning_rate": 3.967428561748671e-06, - "loss": 0.7939, - "step": 714 - }, - { - "epoch": 0.08597366680695004, - "grad_norm": 1.8773645114901596, - "learning_rate": 3.967288400710045e-06, - "loss": 1.076, - "step": 715 - }, - { - "epoch": 0.08609390969758914, - "grad_norm": 2.230166225995015, - "learning_rate": 3.9671479412353335e-06, - "loss": 1.0832, - "step": 716 - }, - { - "epoch": 0.08621415258822822, - "grad_norm": 1.9832295994384375, - "learning_rate": 3.967007183345843e-06, - "loss": 0.9564, - "step": 717 - }, - { - "epoch": 0.08633439547886732, - "grad_norm": 2.1478767911784438, - "learning_rate": 3.966866127062927e-06, - "loss": 1.1017, - "step": 718 - }, - { - "epoch": 0.0864546383695064, - "grad_norm": 1.0307819575893813, - "learning_rate": 3.966724772407982e-06, - "loss": 0.8793, - "step": 719 - }, - { - "epoch": 0.0865748812601455, - "grad_norm": 1.8140279314820098, - "learning_rate": 3.966583119402454e-06, - "loss": 1.0875, - "step": 720 - }, - { - "epoch": 0.08669512415078459, - "grad_norm": 1.6450838805034265, - "learning_rate": 3.9664411680678305e-06, - "loss": 1.0198, - "step": 721 - }, - { - "epoch": 0.08681536704142367, - "grad_norm": 1.2030829880550473, - "learning_rate": 3.966298918425644e-06, - "loss": 0.8402, - "step": 722 - }, - { - "epoch": 0.08693560993206277, - "grad_norm": 1.6044606851746153, - "learning_rate": 3.966156370497476e-06, - "loss": 1.0358, - "step": 723 - }, - { - "epoch": 0.08705585282270185, - "grad_norm": 1.6302081789579308, - "learning_rate": 3.96601352430495e-06, - "loss": 1.0882, - "step": 724 - }, - { - "epoch": 0.08717609571334095, - "grad_norm": 1.9373520825435524, - "learning_rate": 3.965870379869735e-06, - "loss": 1.0356, - "step": 725 - }, - { - "epoch": 0.08729633860398003, - "grad_norm": 1.83333107196784, - "learning_rate": 3.965726937213547e-06, - "loss": 1.0633, - "step": 726 - }, - { - "epoch": 0.08741658149461913, - "grad_norm": 3.817494856975216, - "learning_rate": 3.965583196358144e-06, - "loss": 1.0162, - "step": 727 - }, - { - "epoch": 0.08753682438525823, - "grad_norm": 1.9410705890270177, - "learning_rate": 3.965439157325335e-06, - "loss": 0.9577, - "step": 728 - }, - { - "epoch": 0.08765706727589731, - "grad_norm": 1.9290328054214345, - "learning_rate": 3.965294820136968e-06, - "loss": 0.9641, - "step": 729 - }, - { - "epoch": 0.08777731016653641, - "grad_norm": 1.8398727123143386, - "learning_rate": 3.965150184814938e-06, - "loss": 1.0684, - "step": 730 - }, - { - "epoch": 0.08789755305717549, - "grad_norm": 2.0028935005568202, - "learning_rate": 3.965005251381189e-06, - "loss": 0.9604, - "step": 731 - }, - { - "epoch": 0.08801779594781459, - "grad_norm": 0.8959033554271493, - "learning_rate": 3.964860019857705e-06, - "loss": 0.8641, - "step": 732 - }, - { - "epoch": 0.08813803883845367, - "grad_norm": 1.687666001808678, - "learning_rate": 3.964714490266518e-06, - "loss": 1.035, - "step": 733 - }, - { - "epoch": 0.08825828172909277, - "grad_norm": 0.9416542201499619, - "learning_rate": 3.964568662629706e-06, - "loss": 0.8576, - "step": 734 - }, - { - "epoch": 0.08837852461973186, - "grad_norm": 1.9595664653285954, - "learning_rate": 3.9644225369693895e-06, - "loss": 1.0433, - "step": 735 - }, - { - "epoch": 0.08849876751037095, - "grad_norm": 1.8421542487407412, - "learning_rate": 3.964276113307735e-06, - "loss": 1.0752, - "step": 736 - }, - { - "epoch": 0.08861901040101004, - "grad_norm": 1.7763352435796032, - "learning_rate": 3.9641293916669574e-06, - "loss": 1.0094, - "step": 737 - }, - { - "epoch": 0.08873925329164913, - "grad_norm": 1.794962520367466, - "learning_rate": 3.9639823720693115e-06, - "loss": 1.0322, - "step": 738 - }, - { - "epoch": 0.08885949618228822, - "grad_norm": 1.1943242594162813, - "learning_rate": 3.963835054537102e-06, - "loss": 0.8502, - "step": 739 - }, - { - "epoch": 0.08897973907292732, - "grad_norm": 2.1188018387385927, - "learning_rate": 3.963687439092676e-06, - "loss": 0.8163, - "step": 740 - }, - { - "epoch": 0.0890999819635664, - "grad_norm": 2.4098610473083264, - "learning_rate": 3.963539525758427e-06, - "loss": 1.0063, - "step": 741 - }, - { - "epoch": 0.0892202248542055, - "grad_norm": 1.8048723134450748, - "learning_rate": 3.9633913145567925e-06, - "loss": 0.8864, - "step": 742 - }, - { - "epoch": 0.08934046774484458, - "grad_norm": 1.7329109129550686, - "learning_rate": 3.9632428055102575e-06, - "loss": 1.0186, - "step": 743 - }, - { - "epoch": 0.08946071063548368, - "grad_norm": 2.337820461562018, - "learning_rate": 3.9630939986413495e-06, - "loss": 0.8832, - "step": 744 - }, - { - "epoch": 0.08958095352612276, - "grad_norm": 1.729195705614266, - "learning_rate": 3.962944893972643e-06, - "loss": 0.9838, - "step": 745 - }, - { - "epoch": 0.08970119641676186, - "grad_norm": 8.320544487669606, - "learning_rate": 3.962795491526756e-06, - "loss": 1.127, - "step": 746 - }, - { - "epoch": 0.08982143930740095, - "grad_norm": 3.406799552052661, - "learning_rate": 3.962645791326354e-06, - "loss": 1.0979, - "step": 747 - }, - { - "epoch": 0.08994168219804004, - "grad_norm": 2.2577780750646586, - "learning_rate": 3.962495793394146e-06, - "loss": 1.0353, - "step": 748 - }, - { - "epoch": 0.09006192508867913, - "grad_norm": 0.9429987184428854, - "learning_rate": 3.9623454977528864e-06, - "loss": 0.81, - "step": 749 - }, - { - "epoch": 0.09018216797931822, - "grad_norm": 1.6112065679764813, - "learning_rate": 3.962194904425375e-06, - "loss": 1.0558, - "step": 750 - }, - { - "epoch": 0.09030241086995731, - "grad_norm": 1.8175376075936063, - "learning_rate": 3.9620440134344566e-06, - "loss": 0.881, - "step": 751 - }, - { - "epoch": 0.09042265376059641, - "grad_norm": 2.167569290338366, - "learning_rate": 3.9618928248030215e-06, - "loss": 1.0277, - "step": 752 - }, - { - "epoch": 0.0905428966512355, - "grad_norm": 2.5546372901578684, - "learning_rate": 3.961741338554005e-06, - "loss": 1.0357, - "step": 753 - }, - { - "epoch": 0.09066313954187459, - "grad_norm": 2.0259835856790445, - "learning_rate": 3.9615895547103865e-06, - "loss": 0.9566, - "step": 754 - }, - { - "epoch": 0.09078338243251367, - "grad_norm": 1.8159765904383964, - "learning_rate": 3.961437473295193e-06, - "loss": 0.9795, - "step": 755 - }, - { - "epoch": 0.09090362532315277, - "grad_norm": 1.9833977162572545, - "learning_rate": 3.961285094331495e-06, - "loss": 0.9201, - "step": 756 - }, - { - "epoch": 0.09102386821379185, - "grad_norm": 1.7641942801977537, - "learning_rate": 3.961132417842406e-06, - "loss": 1.0572, - "step": 757 - }, - { - "epoch": 0.09114411110443095, - "grad_norm": 2.687174222851038, - "learning_rate": 3.960979443851089e-06, - "loss": 0.9665, - "step": 758 - }, - { - "epoch": 0.09126435399507005, - "grad_norm": 1.6077317672642353, - "learning_rate": 3.96082617238075e-06, - "loss": 0.998, - "step": 759 - }, - { - "epoch": 0.09138459688570913, - "grad_norm": 2.1572751911393167, - "learning_rate": 3.960672603454639e-06, - "loss": 0.9994, - "step": 760 - }, - { - "epoch": 0.09150483977634823, - "grad_norm": 2.9320727594671117, - "learning_rate": 3.960518737096054e-06, - "loss": 0.9795, - "step": 761 - }, - { - "epoch": 0.09162508266698731, - "grad_norm": 2.579320457035257, - "learning_rate": 3.960364573328334e-06, - "loss": 0.9463, - "step": 762 - }, - { - "epoch": 0.0917453255576264, - "grad_norm": 1.7756160848918083, - "learning_rate": 3.9602101121748675e-06, - "loss": 1.0942, - "step": 763 - }, - { - "epoch": 0.0918655684482655, - "grad_norm": 2.008601408461248, - "learning_rate": 3.960055353659085e-06, - "loss": 0.9248, - "step": 764 - }, - { - "epoch": 0.09198581133890459, - "grad_norm": 1.689629376870224, - "learning_rate": 3.959900297804465e-06, - "loss": 1.0456, - "step": 765 - }, - { - "epoch": 0.09210605422954368, - "grad_norm": 1.80932299287203, - "learning_rate": 3.9597449446345276e-06, - "loss": 0.9657, - "step": 766 - }, - { - "epoch": 0.09222629712018277, - "grad_norm": 2.2074500302699755, - "learning_rate": 3.95958929417284e-06, - "loss": 1.0385, - "step": 767 - }, - { - "epoch": 0.09234654001082186, - "grad_norm": 1.4732978698006658, - "learning_rate": 3.9594333464430145e-06, - "loss": 0.8117, - "step": 768 - }, - { - "epoch": 0.09246678290146094, - "grad_norm": 2.092493050038453, - "learning_rate": 3.959277101468709e-06, - "loss": 1.0772, - "step": 769 - }, - { - "epoch": 0.09258702579210004, - "grad_norm": 2.3424827710213236, - "learning_rate": 3.959120559273624e-06, - "loss": 0.9899, - "step": 770 - }, - { - "epoch": 0.09270726868273914, - "grad_norm": 1.8563501453036801, - "learning_rate": 3.958963719881509e-06, - "loss": 1.0402, - "step": 771 - }, - { - "epoch": 0.09282751157337822, - "grad_norm": 2.0897022805504832, - "learning_rate": 3.958806583316154e-06, - "loss": 1.1429, - "step": 772 - }, - { - "epoch": 0.09294775446401732, - "grad_norm": 1.8386260131277112, - "learning_rate": 3.9586491496013985e-06, - "loss": 0.9947, - "step": 773 - }, - { - "epoch": 0.0930679973546564, - "grad_norm": 1.9513932752578809, - "learning_rate": 3.958491418761124e-06, - "loss": 1.0257, - "step": 774 - }, - { - "epoch": 0.0931882402452955, - "grad_norm": 2.602559674666265, - "learning_rate": 3.958333390819258e-06, - "loss": 0.9442, - "step": 775 - }, - { - "epoch": 0.0933084831359346, - "grad_norm": 2.2289254482871823, - "learning_rate": 3.9581750657997754e-06, - "loss": 0.997, - "step": 776 - }, - { - "epoch": 0.09342872602657368, - "grad_norm": 1.9894065445518208, - "learning_rate": 3.95801644372669e-06, - "loss": 1.1032, - "step": 777 - }, - { - "epoch": 0.09354896891721277, - "grad_norm": 1.7810329505789537, - "learning_rate": 3.957857524624068e-06, - "loss": 1.0398, - "step": 778 - }, - { - "epoch": 0.09366921180785186, - "grad_norm": 1.4927895015664259, - "learning_rate": 3.957698308516016e-06, - "loss": 1.1026, - "step": 779 - }, - { - "epoch": 0.09378945469849095, - "grad_norm": 1.8625964299783293, - "learning_rate": 3.957538795426688e-06, - "loss": 1.0279, - "step": 780 - }, - { - "epoch": 0.09390969758913004, - "grad_norm": 2.000259978563297, - "learning_rate": 3.9573789853802804e-06, - "loss": 0.9727, - "step": 781 - }, - { - "epoch": 0.09402994047976913, - "grad_norm": 1.964490612292075, - "learning_rate": 3.957218878401037e-06, - "loss": 0.9564, - "step": 782 - }, - { - "epoch": 0.09415018337040823, - "grad_norm": 1.9926099740649914, - "learning_rate": 3.957058474513246e-06, - "loss": 1.105, - "step": 783 - }, - { - "epoch": 0.09427042626104731, - "grad_norm": 1.7113306299333628, - "learning_rate": 3.956897773741241e-06, - "loss": 0.9922, - "step": 784 - }, - { - "epoch": 0.09439066915168641, - "grad_norm": 1.8250062864012422, - "learning_rate": 3.956736776109398e-06, - "loss": 0.9194, - "step": 785 - }, - { - "epoch": 0.09451091204232549, - "grad_norm": 1.724663704419231, - "learning_rate": 3.956575481642143e-06, - "loss": 1.0391, - "step": 786 - }, - { - "epoch": 0.09463115493296459, - "grad_norm": 3.0204715029744547, - "learning_rate": 3.956413890363943e-06, - "loss": 0.9504, - "step": 787 - }, - { - "epoch": 0.09475139782360369, - "grad_norm": 1.8476378619385938, - "learning_rate": 3.956252002299312e-06, - "loss": 1.0315, - "step": 788 - }, - { - "epoch": 0.09487164071424277, - "grad_norm": 1.856401518215355, - "learning_rate": 3.956089817472807e-06, - "loss": 1.1092, - "step": 789 - }, - { - "epoch": 0.09499188360488187, - "grad_norm": 12.93807613545824, - "learning_rate": 3.955927335909032e-06, - "loss": 1.067, - "step": 790 - }, - { - "epoch": 0.09511212649552095, - "grad_norm": 2.597422562033232, - "learning_rate": 3.955764557632634e-06, - "loss": 0.9584, - "step": 791 - }, - { - "epoch": 0.09523236938616005, - "grad_norm": 4.24007086172809, - "learning_rate": 3.955601482668309e-06, - "loss": 1.1491, - "step": 792 - }, - { - "epoch": 0.09535261227679913, - "grad_norm": 1.7168249273340956, - "learning_rate": 3.955438111040794e-06, - "loss": 1.0813, - "step": 793 - }, - { - "epoch": 0.09547285516743823, - "grad_norm": 1.752377325817121, - "learning_rate": 3.955274442774873e-06, - "loss": 1.0095, - "step": 794 - }, - { - "epoch": 0.09559309805807732, - "grad_norm": 2.1408000790913886, - "learning_rate": 3.9551104778953725e-06, - "loss": 0.9225, - "step": 795 - }, - { - "epoch": 0.0957133409487164, - "grad_norm": 2.2606479040139913, - "learning_rate": 3.954946216427167e-06, - "loss": 1.0664, - "step": 796 - }, - { - "epoch": 0.0958335838393555, - "grad_norm": 1.0196736554191779, - "learning_rate": 3.954781658395176e-06, - "loss": 0.8457, - "step": 797 - }, - { - "epoch": 0.09595382672999458, - "grad_norm": 1.7592805284846624, - "learning_rate": 3.95461680382436e-06, - "loss": 1.1236, - "step": 798 - }, - { - "epoch": 0.09607406962063368, - "grad_norm": 4.959332255976696, - "learning_rate": 3.9544516527397295e-06, - "loss": 1.0622, - "step": 799 - }, - { - "epoch": 0.09619431251127276, - "grad_norm": 1.6910884990397377, - "learning_rate": 3.954286205166338e-06, - "loss": 1.0092, - "step": 800 - }, - { - "epoch": 0.09631455540191186, - "grad_norm": 2.1802302952835997, - "learning_rate": 3.954120461129282e-06, - "loss": 1.0405, - "step": 801 - }, - { - "epoch": 0.09643479829255096, - "grad_norm": 2.0073569408098257, - "learning_rate": 3.953954420653706e-06, - "loss": 1.0481, - "step": 802 - }, - { - "epoch": 0.09655504118319004, - "grad_norm": 1.8393215078079967, - "learning_rate": 3.953788083764798e-06, - "loss": 1.0828, - "step": 803 - }, - { - "epoch": 0.09667528407382914, - "grad_norm": 1.9778731753824204, - "learning_rate": 3.953621450487792e-06, - "loss": 1.1223, - "step": 804 - }, - { - "epoch": 0.09679552696446822, - "grad_norm": 0.9470223025824642, - "learning_rate": 3.953454520847964e-06, - "loss": 0.8469, - "step": 805 - }, - { - "epoch": 0.09691576985510732, - "grad_norm": 1.9515287666673342, - "learning_rate": 3.9532872948706395e-06, - "loss": 0.9445, - "step": 806 - }, - { - "epoch": 0.09703601274574641, - "grad_norm": 2.134600326685888, - "learning_rate": 3.9531197725811845e-06, - "loss": 1.0353, - "step": 807 - }, - { - "epoch": 0.0971562556363855, - "grad_norm": 1.8085718668526283, - "learning_rate": 3.952951954005013e-06, - "loss": 1.0744, - "step": 808 - }, - { - "epoch": 0.0972764985270246, - "grad_norm": 2.796673900684311, - "learning_rate": 3.952783839167584e-06, - "loss": 1.0514, - "step": 809 - }, - { - "epoch": 0.09739674141766368, - "grad_norm": 2.6807441074114826, - "learning_rate": 3.952615428094398e-06, - "loss": 0.9553, - "step": 810 - }, - { - "epoch": 0.09751698430830277, - "grad_norm": 1.5839121245852634, - "learning_rate": 3.952446720811004e-06, - "loss": 0.946, - "step": 811 - }, - { - "epoch": 0.09763722719894186, - "grad_norm": 1.0646816743244019, - "learning_rate": 3.952277717342995e-06, - "loss": 0.8701, - "step": 812 - }, - { - "epoch": 0.09775747008958095, - "grad_norm": 2.0559090004015568, - "learning_rate": 3.952108417716009e-06, - "loss": 1.0617, - "step": 813 - }, - { - "epoch": 0.09787771298022005, - "grad_norm": 1.6959375732151942, - "learning_rate": 3.951938821955727e-06, - "loss": 1.0577, - "step": 814 - }, - { - "epoch": 0.09799795587085913, - "grad_norm": 1.5063862750823473, - "learning_rate": 3.9517689300878786e-06, - "loss": 0.9638, - "step": 815 - }, - { - "epoch": 0.09811819876149823, - "grad_norm": 1.5865633626908848, - "learning_rate": 3.951598742138236e-06, - "loss": 0.9853, - "step": 816 - }, - { - "epoch": 0.09823844165213731, - "grad_norm": 2.0884586485791266, - "learning_rate": 3.951428258132615e-06, - "loss": 0.9942, - "step": 817 - }, - { - "epoch": 0.09835868454277641, - "grad_norm": 1.8947146612048589, - "learning_rate": 3.951257478096879e-06, - "loss": 1.0504, - "step": 818 - }, - { - "epoch": 0.0984789274334155, - "grad_norm": 2.8329143962429275, - "learning_rate": 3.951086402056936e-06, - "loss": 0.8866, - "step": 819 - }, - { - "epoch": 0.09859917032405459, - "grad_norm": 1.4605958916600206, - "learning_rate": 3.950915030038735e-06, - "loss": 1.0394, - "step": 820 - }, - { - "epoch": 0.09871941321469369, - "grad_norm": 2.012815543132318, - "learning_rate": 3.9507433620682765e-06, - "loss": 1.0446, - "step": 821 - }, - { - "epoch": 0.09883965610533277, - "grad_norm": 1.456256814048444, - "learning_rate": 3.9505713981716e-06, - "loss": 1.0806, - "step": 822 - }, - { - "epoch": 0.09895989899597187, - "grad_norm": 1.716670356055187, - "learning_rate": 3.950399138374795e-06, - "loss": 1.0132, - "step": 823 - }, - { - "epoch": 0.09908014188661095, - "grad_norm": 1.5526599026460728, - "learning_rate": 3.95022658270399e-06, - "loss": 0.9439, - "step": 824 - }, - { - "epoch": 0.09920038477725004, - "grad_norm": 1.997873129053725, - "learning_rate": 3.9500537311853635e-06, - "loss": 0.9849, - "step": 825 - }, - { - "epoch": 0.09932062766788914, - "grad_norm": 2.1063031470048466, - "learning_rate": 3.949880583845136e-06, - "loss": 1.0341, - "step": 826 - }, - { - "epoch": 0.09944087055852822, - "grad_norm": 1.7113332073081347, - "learning_rate": 3.949707140709575e-06, - "loss": 1.0197, - "step": 827 - }, - { - "epoch": 0.09956111344916732, - "grad_norm": 3.4067168309598634, - "learning_rate": 3.949533401804991e-06, - "loss": 1.0418, - "step": 828 - }, - { - "epoch": 0.0996813563398064, - "grad_norm": 1.9619839980029907, - "learning_rate": 3.949359367157739e-06, - "loss": 1.1134, - "step": 829 - }, - { - "epoch": 0.0998015992304455, - "grad_norm": 2.0567584026698724, - "learning_rate": 3.949185036794222e-06, - "loss": 0.9615, - "step": 830 - }, - { - "epoch": 0.0999218421210846, - "grad_norm": 1.5653945814207297, - "learning_rate": 3.949010410740884e-06, - "loss": 0.9816, - "step": 831 - }, - { - "epoch": 0.10004208501172368, - "grad_norm": 2.560460837727866, - "learning_rate": 3.948835489024216e-06, - "loss": 1.0708, - "step": 832 - }, - { - "epoch": 0.10016232790236278, - "grad_norm": 1.797326537080267, - "learning_rate": 3.948660271670755e-06, - "loss": 1.0971, - "step": 833 - }, - { - "epoch": 0.10028257079300186, - "grad_norm": 1.9432532506748947, - "learning_rate": 3.948484758707079e-06, - "loss": 1.0447, - "step": 834 - }, - { - "epoch": 0.10040281368364096, - "grad_norm": 1.8741263579492986, - "learning_rate": 3.948308950159815e-06, - "loss": 1.0384, - "step": 835 - }, - { - "epoch": 0.10052305657428004, - "grad_norm": 2.0851061589501376, - "learning_rate": 3.9481328460556326e-06, - "loss": 0.9671, - "step": 836 - }, - { - "epoch": 0.10064329946491914, - "grad_norm": 1.698522666332974, - "learning_rate": 3.9479564464212455e-06, - "loss": 1.0966, - "step": 837 - }, - { - "epoch": 0.10076354235555823, - "grad_norm": 2.2130661277837143, - "learning_rate": 3.947779751283414e-06, - "loss": 0.9742, - "step": 838 - }, - { - "epoch": 0.10088378524619732, - "grad_norm": 1.7185998330839687, - "learning_rate": 3.947602760668944e-06, - "loss": 0.9617, - "step": 839 - }, - { - "epoch": 0.10100402813683641, - "grad_norm": 1.7502381298990544, - "learning_rate": 3.947425474604684e-06, - "loss": 0.9186, - "step": 840 - }, - { - "epoch": 0.1011242710274755, - "grad_norm": 1.9341874877562848, - "learning_rate": 3.947247893117528e-06, - "loss": 1.125, - "step": 841 - }, - { - "epoch": 0.10124451391811459, - "grad_norm": 4.5472118344683805, - "learning_rate": 3.947070016234413e-06, - "loss": 0.9013, - "step": 842 - }, - { - "epoch": 0.10136475680875369, - "grad_norm": 2.022536972141775, - "learning_rate": 3.946891843982326e-06, - "loss": 0.9364, - "step": 843 - }, - { - "epoch": 0.10148499969939277, - "grad_norm": 1.940384655503353, - "learning_rate": 3.9467133763882935e-06, - "loss": 0.9491, - "step": 844 - }, - { - "epoch": 0.10160524259003187, - "grad_norm": 1.8814901559832216, - "learning_rate": 3.9465346134793905e-06, - "loss": 1.0713, - "step": 845 - }, - { - "epoch": 0.10172548548067095, - "grad_norm": 1.744655554477422, - "learning_rate": 3.9463555552827335e-06, - "loss": 0.9979, - "step": 846 - }, - { - "epoch": 0.10184572837131005, - "grad_norm": 2.4905311557220218, - "learning_rate": 3.946176201825487e-06, - "loss": 1.0748, - "step": 847 - }, - { - "epoch": 0.10196597126194913, - "grad_norm": 1.8682704961023298, - "learning_rate": 3.9459965531348575e-06, - "loss": 1.0365, - "step": 848 - }, - { - "epoch": 0.10208621415258823, - "grad_norm": 2.030559246281953, - "learning_rate": 3.945816609238098e-06, - "loss": 1.0624, - "step": 849 - }, - { - "epoch": 0.10220645704322733, - "grad_norm": 2.409691962187411, - "learning_rate": 3.945636370162507e-06, - "loss": 1.0563, - "step": 850 - }, - { - "epoch": 0.10232669993386641, - "grad_norm": 1.745395870830066, - "learning_rate": 3.945455835935425e-06, - "loss": 0.9983, - "step": 851 - }, - { - "epoch": 0.1024469428245055, - "grad_norm": 1.9684394712630529, - "learning_rate": 3.94527500658424e-06, - "loss": 0.9471, - "step": 852 - }, - { - "epoch": 0.10256718571514459, - "grad_norm": 1.7337228047081412, - "learning_rate": 3.945093882136382e-06, - "loss": 1.0171, - "step": 853 - }, - { - "epoch": 0.10268742860578368, - "grad_norm": 1.7819708319309442, - "learning_rate": 3.944912462619329e-06, - "loss": 1.0461, - "step": 854 - }, - { - "epoch": 0.10280767149642277, - "grad_norm": 2.557406317373826, - "learning_rate": 3.9447307480606025e-06, - "loss": 1.0141, - "step": 855 - }, - { - "epoch": 0.10292791438706186, - "grad_norm": 1.8369572830591177, - "learning_rate": 3.944548738487767e-06, - "loss": 1.1072, - "step": 856 - }, - { - "epoch": 0.10304815727770096, - "grad_norm": 1.6042080498183677, - "learning_rate": 3.944366433928434e-06, - "loss": 1.1064, - "step": 857 - }, - { - "epoch": 0.10316840016834004, - "grad_norm": 1.3987762211446118, - "learning_rate": 3.9441838344102594e-06, - "loss": 1.0302, - "step": 858 - }, - { - "epoch": 0.10328864305897914, - "grad_norm": 1.9966532600517408, - "learning_rate": 3.944000939960943e-06, - "loss": 0.8837, - "step": 859 - }, - { - "epoch": 0.10340888594961822, - "grad_norm": 1.4543592891983868, - "learning_rate": 3.943817750608229e-06, - "loss": 1.0091, - "step": 860 - }, - { - "epoch": 0.10352912884025732, - "grad_norm": 2.0004392379994895, - "learning_rate": 3.943634266379908e-06, - "loss": 1.0233, - "step": 861 - }, - { - "epoch": 0.10364937173089642, - "grad_norm": 1.8202137408664305, - "learning_rate": 3.943450487303815e-06, - "loss": 1.0533, - "step": 862 - }, - { - "epoch": 0.1037696146215355, - "grad_norm": 1.5543784596366712, - "learning_rate": 3.943266413407827e-06, - "loss": 1.0549, - "step": 863 - }, - { - "epoch": 0.1038898575121746, - "grad_norm": 1.6281147962019804, - "learning_rate": 3.94308204471987e-06, - "loss": 1.0494, - "step": 864 - }, - { - "epoch": 0.10401010040281368, - "grad_norm": 2.2555513774455207, - "learning_rate": 3.942897381267912e-06, - "loss": 0.9482, - "step": 865 - }, - { - "epoch": 0.10413034329345278, - "grad_norm": 2.1693055026858605, - "learning_rate": 3.942712423079965e-06, - "loss": 0.8708, - "step": 866 - }, - { - "epoch": 0.10425058618409186, - "grad_norm": 1.9551784249566937, - "learning_rate": 3.942527170184088e-06, - "loss": 1.1017, - "step": 867 - }, - { - "epoch": 0.10437082907473096, - "grad_norm": 2.181225633895011, - "learning_rate": 3.942341622608385e-06, - "loss": 0.9916, - "step": 868 - }, - { - "epoch": 0.10449107196537005, - "grad_norm": 1.3982205782106505, - "learning_rate": 3.942155780381001e-06, - "loss": 0.9736, - "step": 869 - }, - { - "epoch": 0.10461131485600914, - "grad_norm": 1.6865947555898335, - "learning_rate": 3.94196964353013e-06, - "loss": 0.9658, - "step": 870 - }, - { - "epoch": 0.10473155774664823, - "grad_norm": 1.903916227982163, - "learning_rate": 3.941783212084008e-06, - "loss": 1.0016, - "step": 871 - }, - { - "epoch": 0.10485180063728732, - "grad_norm": 2.1345201300545074, - "learning_rate": 3.941596486070916e-06, - "loss": 0.9745, - "step": 872 - }, - { - "epoch": 0.10497204352792641, - "grad_norm": 2.8629974599301353, - "learning_rate": 3.941409465519182e-06, - "loss": 0.784, - "step": 873 - }, - { - "epoch": 0.10509228641856551, - "grad_norm": 1.4381696551045025, - "learning_rate": 3.941222150457176e-06, - "loss": 1.055, - "step": 874 - }, - { - "epoch": 0.10521252930920459, - "grad_norm": 2.7547786981001927, - "learning_rate": 3.941034540913311e-06, - "loss": 0.926, - "step": 875 - }, - { - "epoch": 0.10533277219984369, - "grad_norm": 1.5055049969081244, - "learning_rate": 3.940846636916051e-06, - "loss": 1.0284, - "step": 876 - }, - { - "epoch": 0.10545301509048277, - "grad_norm": 1.8066936753190863, - "learning_rate": 3.940658438493899e-06, - "loss": 1.0667, - "step": 877 - }, - { - "epoch": 0.10557325798112187, - "grad_norm": 1.852077106798631, - "learning_rate": 3.940469945675405e-06, - "loss": 0.964, - "step": 878 - }, - { - "epoch": 0.10569350087176095, - "grad_norm": 1.7789926111755117, - "learning_rate": 3.940281158489163e-06, - "loss": 1.1138, - "step": 879 - }, - { - "epoch": 0.10581374376240005, - "grad_norm": 1.7943278960154383, - "learning_rate": 3.940092076963812e-06, - "loss": 1.0298, - "step": 880 - }, - { - "epoch": 0.10593398665303914, - "grad_norm": 2.005508466445449, - "learning_rate": 3.9399027011280355e-06, - "loss": 0.9926, - "step": 881 - }, - { - "epoch": 0.10605422954367823, - "grad_norm": 2.8376703354043933, - "learning_rate": 3.939713031010561e-06, - "loss": 0.9733, - "step": 882 - }, - { - "epoch": 0.10617447243431732, - "grad_norm": 1.9159026350794792, - "learning_rate": 3.939523066640163e-06, - "loss": 0.9847, - "step": 883 - }, - { - "epoch": 0.10629471532495641, - "grad_norm": 1.7957871834592114, - "learning_rate": 3.939332808045657e-06, - "loss": 1.01, - "step": 884 - }, - { - "epoch": 0.1064149582155955, - "grad_norm": 1.738421350898184, - "learning_rate": 3.939142255255906e-06, - "loss": 1.0442, - "step": 885 - }, - { - "epoch": 0.1065352011062346, - "grad_norm": 1.9681800138283836, - "learning_rate": 3.938951408299817e-06, - "loss": 1.0836, - "step": 886 - }, - { - "epoch": 0.10665544399687368, - "grad_norm": 1.0680013611174064, - "learning_rate": 3.938760267206342e-06, - "loss": 0.7757, - "step": 887 - }, - { - "epoch": 0.10677568688751278, - "grad_norm": 2.3558708594182542, - "learning_rate": 3.938568832004475e-06, - "loss": 0.9965, - "step": 888 - }, - { - "epoch": 0.10689592977815186, - "grad_norm": 2.0047339204332926, - "learning_rate": 3.938377102723257e-06, - "loss": 0.9555, - "step": 889 - }, - { - "epoch": 0.10701617266879096, - "grad_norm": 1.9201558184974392, - "learning_rate": 3.938185079391774e-06, - "loss": 1.0467, - "step": 890 - }, - { - "epoch": 0.10713641555943004, - "grad_norm": 7.135153779990403, - "learning_rate": 3.937992762039157e-06, - "loss": 1.2706, - "step": 891 - }, - { - "epoch": 0.10725665845006914, - "grad_norm": 1.555930556024953, - "learning_rate": 3.937800150694577e-06, - "loss": 1.0049, - "step": 892 - }, - { - "epoch": 0.10737690134070824, - "grad_norm": 1.8227807857187996, - "learning_rate": 3.937607245387255e-06, - "loss": 0.9524, - "step": 893 - }, - { - "epoch": 0.10749714423134732, - "grad_norm": 2.86145644176871, - "learning_rate": 3.937414046146455e-06, - "loss": 0.9276, - "step": 894 - }, - { - "epoch": 0.10761738712198642, - "grad_norm": 1.8562798905454578, - "learning_rate": 3.9372205530014845e-06, - "loss": 0.9637, - "step": 895 - }, - { - "epoch": 0.1077376300126255, - "grad_norm": 1.7992353775640846, - "learning_rate": 3.937026765981696e-06, - "loss": 0.9274, - "step": 896 - }, - { - "epoch": 0.1078578729032646, - "grad_norm": 1.9795940205483538, - "learning_rate": 3.936832685116488e-06, - "loss": 0.9969, - "step": 897 - }, - { - "epoch": 0.10797811579390369, - "grad_norm": 2.0391782530452973, - "learning_rate": 3.936638310435301e-06, - "loss": 1.0999, - "step": 898 - }, - { - "epoch": 0.10809835868454278, - "grad_norm": 1.9754901132995175, - "learning_rate": 3.936443641967623e-06, - "loss": 1.027, - "step": 899 - }, - { - "epoch": 0.10821860157518187, - "grad_norm": 1.7871981572678712, - "learning_rate": 3.936248679742983e-06, - "loss": 1.0285, - "step": 900 - }, - { - "epoch": 0.10833884446582095, - "grad_norm": 1.1253395098088093, - "learning_rate": 3.936053423790959e-06, - "loss": 0.9487, - "step": 901 - }, - { - "epoch": 0.10845908735646005, - "grad_norm": 1.61522234505137, - "learning_rate": 3.935857874141168e-06, - "loss": 0.9725, - "step": 902 - }, - { - "epoch": 0.10857933024709913, - "grad_norm": 2.1731775343620026, - "learning_rate": 3.935662030823279e-06, - "loss": 1.0367, - "step": 903 - }, - { - "epoch": 0.10869957313773823, - "grad_norm": 2.0849782045172, - "learning_rate": 3.935465893866998e-06, - "loss": 0.923, - "step": 904 - }, - { - "epoch": 0.10881981602837733, - "grad_norm": 1.779945163310744, - "learning_rate": 3.935269463302079e-06, - "loss": 1.0046, - "step": 905 - }, - { - "epoch": 0.10894005891901641, - "grad_norm": 1.8146638778638935, - "learning_rate": 3.935072739158322e-06, - "loss": 0.9766, - "step": 906 - }, - { - "epoch": 0.10906030180965551, - "grad_norm": 1.6664578386450608, - "learning_rate": 3.934875721465569e-06, - "loss": 0.9957, - "step": 907 - }, - { - "epoch": 0.10918054470029459, - "grad_norm": 2.2149677544651327, - "learning_rate": 3.9346784102537076e-06, - "loss": 0.915, - "step": 908 - }, - { - "epoch": 0.10930078759093369, - "grad_norm": 1.6226666644390757, - "learning_rate": 3.934480805552669e-06, - "loss": 0.9826, - "step": 909 - }, - { - "epoch": 0.10942103048157277, - "grad_norm": 1.9952979843818495, - "learning_rate": 3.93428290739243e-06, - "loss": 1.0805, - "step": 910 - }, - { - "epoch": 0.10954127337221187, - "grad_norm": 2.357907982290233, - "learning_rate": 3.9340847158030125e-06, - "loss": 0.9923, - "step": 911 - }, - { - "epoch": 0.10966151626285096, - "grad_norm": 1.6060181951430355, - "learning_rate": 3.9338862308144814e-06, - "loss": 0.9577, - "step": 912 - }, - { - "epoch": 0.10978175915349005, - "grad_norm": 2.245096479521996, - "learning_rate": 3.933687452456946e-06, - "loss": 1.0504, - "step": 913 - }, - { - "epoch": 0.10990200204412914, - "grad_norm": 1.9561822385904597, - "learning_rate": 3.933488380760562e-06, - "loss": 1.0646, - "step": 914 - }, - { - "epoch": 0.11002224493476823, - "grad_norm": 1.7447970565788562, - "learning_rate": 3.9332890157555286e-06, - "loss": 1.0788, - "step": 915 - }, - { - "epoch": 0.11014248782540732, - "grad_norm": 1.7738467513103828, - "learning_rate": 3.933089357472088e-06, - "loss": 0.9657, - "step": 916 - }, - { - "epoch": 0.11026273071604642, - "grad_norm": 1.8287066325053796, - "learning_rate": 3.932889405940529e-06, - "loss": 1.0654, - "step": 917 - }, - { - "epoch": 0.1103829736066855, - "grad_norm": 1.9974697916661404, - "learning_rate": 3.932689161191184e-06, - "loss": 1.0058, - "step": 918 - }, - { - "epoch": 0.1105032164973246, - "grad_norm": 1.9949214351930689, - "learning_rate": 3.93248862325443e-06, - "loss": 1.0754, - "step": 919 - }, - { - "epoch": 0.11062345938796368, - "grad_norm": 1.1062612220496693, - "learning_rate": 3.932287792160688e-06, - "loss": 0.8521, - "step": 920 - }, - { - "epoch": 0.11074370227860278, - "grad_norm": 2.0766161266604146, - "learning_rate": 3.932086667940424e-06, - "loss": 1.0073, - "step": 921 - }, - { - "epoch": 0.11086394516924186, - "grad_norm": 1.8316823143098313, - "learning_rate": 3.93188525062415e-06, - "loss": 1.0126, - "step": 922 - }, - { - "epoch": 0.11098418805988096, - "grad_norm": 1.7702996814933714, - "learning_rate": 3.931683540242418e-06, - "loss": 1.0662, - "step": 923 - }, - { - "epoch": 0.11110443095052006, - "grad_norm": 2.4683639610840746, - "learning_rate": 3.9314815368258295e-06, - "loss": 1.1105, - "step": 924 - }, - { - "epoch": 0.11122467384115914, - "grad_norm": 1.6103944234886964, - "learning_rate": 3.9312792404050275e-06, - "loss": 0.9842, - "step": 925 - }, - { - "epoch": 0.11134491673179824, - "grad_norm": 1.5747210361538078, - "learning_rate": 3.9310766510107e-06, - "loss": 0.9783, - "step": 926 - }, - { - "epoch": 0.11146515962243732, - "grad_norm": 1.8489724243773318, - "learning_rate": 3.9308737686735806e-06, - "loss": 1.122, - "step": 927 - }, - { - "epoch": 0.11158540251307641, - "grad_norm": 2.0463018415685017, - "learning_rate": 3.9306705934244455e-06, - "loss": 1.0289, - "step": 928 - }, - { - "epoch": 0.11170564540371551, - "grad_norm": 1.6610428181916224, - "learning_rate": 3.930467125294116e-06, - "loss": 1.0852, - "step": 929 - }, - { - "epoch": 0.1118258882943546, - "grad_norm": 1.0830610312069355, - "learning_rate": 3.930263364313458e-06, - "loss": 0.8327, - "step": 930 - }, - { - "epoch": 0.11194613118499369, - "grad_norm": 2.1715649719707293, - "learning_rate": 3.930059310513384e-06, - "loss": 1.0421, - "step": 931 - }, - { - "epoch": 0.11206637407563277, - "grad_norm": 1.6080197303614814, - "learning_rate": 3.929854963924846e-06, - "loss": 1.0293, - "step": 932 - }, - { - "epoch": 0.11218661696627187, - "grad_norm": 1.9143945522723416, - "learning_rate": 3.929650324578845e-06, - "loss": 0.9761, - "step": 933 - }, - { - "epoch": 0.11230685985691095, - "grad_norm": 2.3760541283128647, - "learning_rate": 3.929445392506423e-06, - "loss": 1.0357, - "step": 934 - }, - { - "epoch": 0.11242710274755005, - "grad_norm": 1.7526421356244994, - "learning_rate": 3.92924016773867e-06, - "loss": 0.9699, - "step": 935 - }, - { - "epoch": 0.11254734563818915, - "grad_norm": 3.0708347487910066, - "learning_rate": 3.9290346503067175e-06, - "loss": 0.9471, - "step": 936 - }, - { - "epoch": 0.11266758852882823, - "grad_norm": 1.7992714202101932, - "learning_rate": 3.9288288402417415e-06, - "loss": 0.9983, - "step": 937 - }, - { - "epoch": 0.11278783141946733, - "grad_norm": 2.0400523855925647, - "learning_rate": 3.928622737574964e-06, - "loss": 0.9019, - "step": 938 - }, - { - "epoch": 0.11290807431010641, - "grad_norm": 1.6962876569690493, - "learning_rate": 3.928416342337652e-06, - "loss": 1.1128, - "step": 939 - }, - { - "epoch": 0.1130283172007455, - "grad_norm": 1.7607920169391114, - "learning_rate": 3.928209654561113e-06, - "loss": 1.0343, - "step": 940 - }, - { - "epoch": 0.1131485600913846, - "grad_norm": 1.8871256071737716, - "learning_rate": 3.928002674276703e-06, - "loss": 1.0073, - "step": 941 - }, - { - "epoch": 0.11326880298202369, - "grad_norm": 2.059618237129115, - "learning_rate": 3.92779540151582e-06, - "loss": 0.9614, - "step": 942 - }, - { - "epoch": 0.11338904587266278, - "grad_norm": 1.7609746676891522, - "learning_rate": 3.927587836309907e-06, - "loss": 1.0647, - "step": 943 - }, - { - "epoch": 0.11350928876330187, - "grad_norm": 1.7971063879550666, - "learning_rate": 3.927379978690452e-06, - "loss": 0.9869, - "step": 944 - }, - { - "epoch": 0.11362953165394096, - "grad_norm": 1.9675900508722912, - "learning_rate": 3.927171828688987e-06, - "loss": 1.0688, - "step": 945 - }, - { - "epoch": 0.11374977454458005, - "grad_norm": 2.0669733722302253, - "learning_rate": 3.926963386337088e-06, - "loss": 1.0167, - "step": 946 - }, - { - "epoch": 0.11387001743521914, - "grad_norm": 2.983981441092369, - "learning_rate": 3.926754651666375e-06, - "loss": 0.9036, - "step": 947 - }, - { - "epoch": 0.11399026032585824, - "grad_norm": 2.6049610197991164, - "learning_rate": 3.926545624708513e-06, - "loss": 0.9872, - "step": 948 - }, - { - "epoch": 0.11411050321649732, - "grad_norm": 1.7273058089597266, - "learning_rate": 3.926336305495213e-06, - "loss": 1.0622, - "step": 949 - }, - { - "epoch": 0.11423074610713642, - "grad_norm": 1.984155717883876, - "learning_rate": 3.926126694058226e-06, - "loss": 1.0882, - "step": 950 - }, - { - "epoch": 0.1143509889977755, - "grad_norm": 1.3730622856393138, - "learning_rate": 3.92591679042935e-06, - "loss": 1.0182, - "step": 951 - }, - { - "epoch": 0.1144712318884146, - "grad_norm": 1.7116515877247391, - "learning_rate": 3.92570659464043e-06, - "loss": 1.0235, - "step": 952 - }, - { - "epoch": 0.1145914747790537, - "grad_norm": 1.796591031399869, - "learning_rate": 3.925496106723349e-06, - "loss": 0.9978, - "step": 953 - }, - { - "epoch": 0.11471171766969278, - "grad_norm": 2.014569383143582, - "learning_rate": 3.9252853267100405e-06, - "loss": 1.0343, - "step": 954 - }, - { - "epoch": 0.11483196056033187, - "grad_norm": 1.7618288399074868, - "learning_rate": 3.9250742546324786e-06, - "loss": 1.0395, - "step": 955 - }, - { - "epoch": 0.11495220345097096, - "grad_norm": 1.5945701732678892, - "learning_rate": 3.924862890522683e-06, - "loss": 1.0648, - "step": 956 - }, - { - "epoch": 0.11507244634161005, - "grad_norm": 2.1498069144197833, - "learning_rate": 3.9246512344127174e-06, - "loss": 1.0629, - "step": 957 - }, - { - "epoch": 0.11519268923224914, - "grad_norm": 1.7243925241241476, - "learning_rate": 3.9244392863346895e-06, - "loss": 1.0222, - "step": 958 - }, - { - "epoch": 0.11531293212288823, - "grad_norm": 1.8114354526097687, - "learning_rate": 3.9242270463207524e-06, - "loss": 1.116, - "step": 959 - }, - { - "epoch": 0.11543317501352733, - "grad_norm": 3.049092898946946, - "learning_rate": 3.924014514403102e-06, - "loss": 1.0647, - "step": 960 - }, - { - "epoch": 0.11555341790416641, - "grad_norm": 2.9896924644692957, - "learning_rate": 3.92380169061398e-06, - "loss": 1.1208, - "step": 961 - }, - { - "epoch": 0.11567366079480551, - "grad_norm": 3.8213336102613558, - "learning_rate": 3.9235885749856705e-06, - "loss": 1.0502, - "step": 962 - }, - { - "epoch": 0.1157939036854446, - "grad_norm": 1.864533201712288, - "learning_rate": 3.9233751675505035e-06, - "loss": 1.0371, - "step": 963 - }, - { - "epoch": 0.11591414657608369, - "grad_norm": 1.958765597692146, - "learning_rate": 3.923161468340853e-06, - "loss": 1.0369, - "step": 964 - }, - { - "epoch": 0.11603438946672277, - "grad_norm": 2.124380133483197, - "learning_rate": 3.9229474773891374e-06, - "loss": 1.0169, - "step": 965 - }, - { - "epoch": 0.11615463235736187, - "grad_norm": 1.7242516290237455, - "learning_rate": 3.922733194727818e-06, - "loss": 1.0443, - "step": 966 - }, - { - "epoch": 0.11627487524800097, - "grad_norm": 2.0293226047680077, - "learning_rate": 3.922518620389402e-06, - "loss": 1.0677, - "step": 967 - }, - { - "epoch": 0.11639511813864005, - "grad_norm": 1.6564089860723514, - "learning_rate": 3.922303754406439e-06, - "loss": 1.1061, - "step": 968 - }, - { - "epoch": 0.11651536102927915, - "grad_norm": 1.7161849562064404, - "learning_rate": 3.922088596811526e-06, - "loss": 0.9895, - "step": 969 - }, - { - "epoch": 0.11663560391991823, - "grad_norm": 1.9164141474800966, - "learning_rate": 3.9218731476373e-06, - "loss": 1.063, - "step": 970 - }, - { - "epoch": 0.11675584681055733, - "grad_norm": 1.7445317392651785, - "learning_rate": 3.9216574069164455e-06, - "loss": 1.0542, - "step": 971 - }, - { - "epoch": 0.11687608970119642, - "grad_norm": 1.425469936716158, - "learning_rate": 3.921441374681691e-06, - "loss": 1.0053, - "step": 972 - }, - { - "epoch": 0.1169963325918355, - "grad_norm": 1.8010769350986673, - "learning_rate": 3.921225050965808e-06, - "loss": 0.8487, - "step": 973 - }, - { - "epoch": 0.1171165754824746, - "grad_norm": 2.495163626341326, - "learning_rate": 3.921008435801612e-06, - "loss": 0.9505, - "step": 974 - }, - { - "epoch": 0.11723681837311369, - "grad_norm": 2.343189121527929, - "learning_rate": 3.920791529221963e-06, - "loss": 0.9635, - "step": 975 - }, - { - "epoch": 0.11735706126375278, - "grad_norm": 1.6968598928885525, - "learning_rate": 3.920574331259768e-06, - "loss": 0.963, - "step": 976 - }, - { - "epoch": 0.11747730415439187, - "grad_norm": 2.2629782113355064, - "learning_rate": 3.9203568419479716e-06, - "loss": 1.0061, - "step": 977 - }, - { - "epoch": 0.11759754704503096, - "grad_norm": 1.838782650406788, - "learning_rate": 3.92013906131957e-06, - "loss": 0.9564, - "step": 978 - }, - { - "epoch": 0.11771778993567006, - "grad_norm": 1.5182077483754233, - "learning_rate": 3.9199209894076e-06, - "loss": 1.0224, - "step": 979 - }, - { - "epoch": 0.11783803282630914, - "grad_norm": 1.7221962796926933, - "learning_rate": 3.919702626245142e-06, - "loss": 1.1058, - "step": 980 - }, - { - "epoch": 0.11795827571694824, - "grad_norm": 2.9108813720140634, - "learning_rate": 3.919483971865322e-06, - "loss": 0.8582, - "step": 981 - }, - { - "epoch": 0.11807851860758732, - "grad_norm": 1.7411276839673309, - "learning_rate": 3.91926502630131e-06, - "loss": 1.0734, - "step": 982 - }, - { - "epoch": 0.11819876149822642, - "grad_norm": 2.0760156926203277, - "learning_rate": 3.91904578958632e-06, - "loss": 0.9275, - "step": 983 - }, - { - "epoch": 0.11831900438886551, - "grad_norm": 1.898077471467448, - "learning_rate": 3.918826261753608e-06, - "loss": 1.0502, - "step": 984 - }, - { - "epoch": 0.1184392472795046, - "grad_norm": 2.3865641092752283, - "learning_rate": 3.918606442836478e-06, - "loss": 0.9065, - "step": 985 - }, - { - "epoch": 0.1185594901701437, - "grad_norm": 1.828857927038418, - "learning_rate": 3.918386332868277e-06, - "loss": 0.9773, - "step": 986 - }, - { - "epoch": 0.11867973306078278, - "grad_norm": 1.6461143575644832, - "learning_rate": 3.918165931882394e-06, - "loss": 1.1465, - "step": 987 - }, - { - "epoch": 0.11879997595142187, - "grad_norm": 2.203760799353369, - "learning_rate": 3.917945239912264e-06, - "loss": 0.9651, - "step": 988 - }, - { - "epoch": 0.11892021884206096, - "grad_norm": 2.0257379717751625, - "learning_rate": 3.917724256991367e-06, - "loss": 0.9614, - "step": 989 - }, - { - "epoch": 0.11904046173270005, - "grad_norm": 2.1948846692002038, - "learning_rate": 3.9175029831532245e-06, - "loss": 1.0173, - "step": 990 - }, - { - "epoch": 0.11916070462333915, - "grad_norm": 1.9111808416055422, - "learning_rate": 3.917281418431404e-06, - "loss": 1.0833, - "step": 991 - }, - { - "epoch": 0.11928094751397823, - "grad_norm": 13.24908328933789, - "learning_rate": 3.917059562859516e-06, - "loss": 0.979, - "step": 992 - }, - { - "epoch": 0.11940119040461733, - "grad_norm": 1.9544310208627649, - "learning_rate": 3.916837416471218e-06, - "loss": 1.0921, - "step": 993 - }, - { - "epoch": 0.11952143329525641, - "grad_norm": 2.4016112164412715, - "learning_rate": 3.916614979300207e-06, - "loss": 0.9333, - "step": 994 - }, - { - "epoch": 0.11964167618589551, - "grad_norm": 1.456744863883071, - "learning_rate": 3.9163922513802274e-06, - "loss": 0.9897, - "step": 995 - }, - { - "epoch": 0.1197619190765346, - "grad_norm": 2.427681834111917, - "learning_rate": 3.916169232745067e-06, - "loss": 1.0298, - "step": 996 - }, - { - "epoch": 0.11988216196717369, - "grad_norm": 2.830757559757031, - "learning_rate": 3.915945923428559e-06, - "loss": 1.1171, - "step": 997 - }, - { - "epoch": 0.12000240485781279, - "grad_norm": 2.3599236463059654, - "learning_rate": 3.915722323464577e-06, - "loss": 1.037, - "step": 998 - }, - { - "epoch": 0.12012264774845187, - "grad_norm": 3.39373973753556, - "learning_rate": 3.91549843288704e-06, - "loss": 0.9161, - "step": 999 - }, - { - "epoch": 0.12024289063909097, - "grad_norm": 1.9928032017120467, - "learning_rate": 3.915274251729916e-06, - "loss": 0.9985, - "step": 1000 - }, - { - "epoch": 0.12036313352973005, - "grad_norm": 1.8236398043686919, - "learning_rate": 3.91504978002721e-06, - "loss": 1.1076, - "step": 1001 - }, - { - "epoch": 0.12048337642036915, - "grad_norm": 2.034922756631751, - "learning_rate": 3.914825017812974e-06, - "loss": 0.9749, - "step": 1002 - }, - { - "epoch": 0.12060361931100824, - "grad_norm": 2.0223612040082104, - "learning_rate": 3.9145999651213065e-06, - "loss": 0.935, - "step": 1003 - }, - { - "epoch": 0.12072386220164733, - "grad_norm": 2.319411696953475, - "learning_rate": 3.9143746219863465e-06, - "loss": 1.0897, - "step": 1004 - }, - { - "epoch": 0.12084410509228642, - "grad_norm": 1.1278552802838493, - "learning_rate": 3.914148988442278e-06, - "loss": 0.9147, - "step": 1005 - }, - { - "epoch": 0.1209643479829255, - "grad_norm": 2.2577193613872195, - "learning_rate": 3.91392306452333e-06, - "loss": 1.1587, - "step": 1006 - }, - { - "epoch": 0.1210845908735646, - "grad_norm": 2.989892462898501, - "learning_rate": 3.913696850263774e-06, - "loss": 0.8809, - "step": 1007 - }, - { - "epoch": 0.1212048337642037, - "grad_norm": 2.0137837598114943, - "learning_rate": 3.913470345697929e-06, - "loss": 0.9889, - "step": 1008 - }, - { - "epoch": 0.12132507665484278, - "grad_norm": 1.923353789279698, - "learning_rate": 3.913243550860153e-06, - "loss": 1.0608, - "step": 1009 - }, - { - "epoch": 0.12144531954548188, - "grad_norm": 1.628522210166173, - "learning_rate": 3.913016465784852e-06, - "loss": 0.9624, - "step": 1010 - }, - { - "epoch": 0.12156556243612096, - "grad_norm": 2.344878980202009, - "learning_rate": 3.912789090506474e-06, - "loss": 0.9327, - "step": 1011 - }, - { - "epoch": 0.12168580532676006, - "grad_norm": 4.640332350831336, - "learning_rate": 3.9125614250595114e-06, - "loss": 0.9297, - "step": 1012 - }, - { - "epoch": 0.12180604821739914, - "grad_norm": 8.929787035343118, - "learning_rate": 3.912333469478502e-06, - "loss": 1.0912, - "step": 1013 - }, - { - "epoch": 0.12192629110803824, - "grad_norm": 1.725675052744518, - "learning_rate": 3.912105223798025e-06, - "loss": 0.9836, - "step": 1014 - }, - { - "epoch": 0.12204653399867733, - "grad_norm": 1.158420227185446, - "learning_rate": 3.9118766880527065e-06, - "loss": 0.9165, - "step": 1015 - }, - { - "epoch": 0.12216677688931642, - "grad_norm": 1.5364687989304142, - "learning_rate": 3.9116478622772145e-06, - "loss": 0.9303, - "step": 1016 - }, - { - "epoch": 0.12228701977995551, - "grad_norm": 1.614801092742482, - "learning_rate": 3.911418746506261e-06, - "loss": 1.0738, - "step": 1017 - }, - { - "epoch": 0.1224072626705946, - "grad_norm": 1.662781128023744, - "learning_rate": 3.911189340774604e-06, - "loss": 0.9861, - "step": 1018 - }, - { - "epoch": 0.1225275055612337, - "grad_norm": 1.668773876690871, - "learning_rate": 3.910959645117043e-06, - "loss": 1.0028, - "step": 1019 - }, - { - "epoch": 0.12264774845187278, - "grad_norm": 1.005119036461946, - "learning_rate": 3.910729659568423e-06, - "loss": 0.7911, - "step": 1020 - }, - { - "epoch": 0.12276799134251187, - "grad_norm": 1.7322472244528944, - "learning_rate": 3.9104993841636344e-06, - "loss": 1.0226, - "step": 1021 - }, - { - "epoch": 0.12288823423315097, - "grad_norm": 1.7910325976184274, - "learning_rate": 3.910268818937608e-06, - "loss": 1.0123, - "step": 1022 - }, - { - "epoch": 0.12300847712379005, - "grad_norm": 2.408967304494001, - "learning_rate": 3.9100379639253196e-06, - "loss": 1.0791, - "step": 1023 - }, - { - "epoch": 0.12312872001442915, - "grad_norm": 2.540173847827164, - "learning_rate": 3.909806819161791e-06, - "loss": 1.0539, - "step": 1024 - }, - { - "epoch": 0.12324896290506823, - "grad_norm": 2.0941398599257135, - "learning_rate": 3.909575384682086e-06, - "loss": 1.0706, - "step": 1025 - }, - { - "epoch": 0.12336920579570733, - "grad_norm": 1.8501249116534546, - "learning_rate": 3.9093436605213144e-06, - "loss": 0.8954, - "step": 1026 - }, - { - "epoch": 0.12348944868634643, - "grad_norm": 1.6794404313885019, - "learning_rate": 3.909111646714627e-06, - "loss": 0.9892, - "step": 1027 - }, - { - "epoch": 0.12360969157698551, - "grad_norm": 2.6879066337083217, - "learning_rate": 3.9088793432972206e-06, - "loss": 0.9241, - "step": 1028 - }, - { - "epoch": 0.1237299344676246, - "grad_norm": 2.624665738576593, - "learning_rate": 3.908646750304336e-06, - "loss": 1.025, - "step": 1029 - }, - { - "epoch": 0.12385017735826369, - "grad_norm": 1.5149262380741249, - "learning_rate": 3.908413867771257e-06, - "loss": 1.0679, - "step": 1030 - }, - { - "epoch": 0.12397042024890279, - "grad_norm": 1.6558796450559208, - "learning_rate": 3.908180695733311e-06, - "loss": 1.0151, - "step": 1031 - }, - { - "epoch": 0.12409066313954187, - "grad_norm": 2.1006127235623633, - "learning_rate": 3.907947234225871e-06, - "loss": 1.0274, - "step": 1032 - }, - { - "epoch": 0.12421090603018096, - "grad_norm": 1.8124322549721756, - "learning_rate": 3.907713483284352e-06, - "loss": 1.0754, - "step": 1033 - }, - { - "epoch": 0.12433114892082006, - "grad_norm": 2.009452061339735, - "learning_rate": 3.907479442944216e-06, - "loss": 1.1878, - "step": 1034 - }, - { - "epoch": 0.12445139181145914, - "grad_norm": 1.87953584714063, - "learning_rate": 3.907245113240963e-06, - "loss": 1.1195, - "step": 1035 - }, - { - "epoch": 0.12457163470209824, - "grad_norm": 1.7818217112943269, - "learning_rate": 3.907010494210144e-06, - "loss": 0.9481, - "step": 1036 - }, - { - "epoch": 0.12469187759273732, - "grad_norm": 1.943476902460626, - "learning_rate": 3.9067755858873495e-06, - "loss": 1.1291, - "step": 1037 - }, - { - "epoch": 0.12481212048337642, - "grad_norm": 1.0471945886875744, - "learning_rate": 3.906540388308214e-06, - "loss": 0.8593, - "step": 1038 - }, - { - "epoch": 0.12493236337401552, - "grad_norm": 1.7291787464991468, - "learning_rate": 3.906304901508417e-06, - "loss": 1.0194, - "step": 1039 - }, - { - "epoch": 0.12505260626465461, - "grad_norm": 1.8553990401173106, - "learning_rate": 3.9060691255236835e-06, - "loss": 0.9588, - "step": 1040 - }, - { - "epoch": 0.1251728491552937, - "grad_norm": 1.8456952453122144, - "learning_rate": 3.905833060389778e-06, - "loss": 1.0226, - "step": 1041 - }, - { - "epoch": 0.12529309204593278, - "grad_norm": 2.2226603261578397, - "learning_rate": 3.905596706142513e-06, - "loss": 0.9854, - "step": 1042 - }, - { - "epoch": 0.12541333493657186, - "grad_norm": 1.8797975198792072, - "learning_rate": 3.9053600628177435e-06, - "loss": 1.0568, - "step": 1043 - }, - { - "epoch": 0.12553357782721097, - "grad_norm": 1.8710411876709994, - "learning_rate": 3.905123130451367e-06, - "loss": 1.0508, - "step": 1044 - }, - { - "epoch": 0.12565382071785006, - "grad_norm": 1.949834508475813, - "learning_rate": 3.904885909079326e-06, - "loss": 0.9946, - "step": 1045 - }, - { - "epoch": 0.12577406360848914, - "grad_norm": 4.150068461761165, - "learning_rate": 3.904648398737607e-06, - "loss": 0.9708, - "step": 1046 - }, - { - "epoch": 0.12589430649912825, - "grad_norm": 1.7354766508667685, - "learning_rate": 3.9044105994622406e-06, - "loss": 0.9834, - "step": 1047 - }, - { - "epoch": 0.12601454938976733, - "grad_norm": 1.7963291831936903, - "learning_rate": 3.9041725112893005e-06, - "loss": 1.0199, - "step": 1048 - }, - { - "epoch": 0.12613479228040642, - "grad_norm": 1.5380782644897895, - "learning_rate": 3.903934134254904e-06, - "loss": 0.9546, - "step": 1049 - }, - { - "epoch": 0.1262550351710455, - "grad_norm": 2.0062449232375896, - "learning_rate": 3.903695468395213e-06, - "loss": 1.0444, - "step": 1050 - }, - { - "epoch": 0.1263752780616846, - "grad_norm": 1.80523594040699, - "learning_rate": 3.903456513746434e-06, - "loss": 0.7638, - "step": 1051 - }, - { - "epoch": 0.1264955209523237, - "grad_norm": 1.7900595073292014, - "learning_rate": 3.903217270344815e-06, - "loss": 1.0842, - "step": 1052 - }, - { - "epoch": 0.12661576384296278, - "grad_norm": 1.8132831591555412, - "learning_rate": 3.902977738226648e-06, - "loss": 1.0304, - "step": 1053 - }, - { - "epoch": 0.12673600673360189, - "grad_norm": 1.9099994119173913, - "learning_rate": 3.902737917428273e-06, - "loss": 1.1161, - "step": 1054 - }, - { - "epoch": 0.12685624962424097, - "grad_norm": 1.7069378091827847, - "learning_rate": 3.902497807986068e-06, - "loss": 1.0392, - "step": 1055 - }, - { - "epoch": 0.12697649251488005, - "grad_norm": 1.5434981970892367, - "learning_rate": 3.902257409936458e-06, - "loss": 1.0377, - "step": 1056 - }, - { - "epoch": 0.12709673540551916, - "grad_norm": 1.951908007076094, - "learning_rate": 3.902016723315912e-06, - "loss": 1.0446, - "step": 1057 - }, - { - "epoch": 0.12721697829615825, - "grad_norm": 2.272075336758661, - "learning_rate": 3.901775748160941e-06, - "loss": 0.8981, - "step": 1058 - }, - { - "epoch": 0.12733722118679733, - "grad_norm": 0.9539878566616867, - "learning_rate": 3.901534484508101e-06, - "loss": 0.8345, - "step": 1059 - }, - { - "epoch": 0.1274574640774364, - "grad_norm": 1.890551478950459, - "learning_rate": 3.901292932393991e-06, - "loss": 0.9483, - "step": 1060 - }, - { - "epoch": 0.12757770696807552, - "grad_norm": 2.0571308185777664, - "learning_rate": 3.9010510918552555e-06, - "loss": 1.0547, - "step": 1061 - }, - { - "epoch": 0.1276979498587146, - "grad_norm": 2.6007287691366883, - "learning_rate": 3.900808962928581e-06, - "loss": 0.9601, - "step": 1062 - }, - { - "epoch": 0.1278181927493537, - "grad_norm": 2.019147294104144, - "learning_rate": 3.900566545650698e-06, - "loss": 1.0966, - "step": 1063 - }, - { - "epoch": 0.1279384356399928, - "grad_norm": 2.240163068625953, - "learning_rate": 3.900323840058381e-06, - "loss": 1.0124, - "step": 1064 - }, - { - "epoch": 0.12805867853063188, - "grad_norm": 1.78876649498415, - "learning_rate": 3.900080846188449e-06, - "loss": 1.0159, - "step": 1065 - }, - { - "epoch": 0.12817892142127096, - "grad_norm": 1.7180183934302902, - "learning_rate": 3.8998375640777625e-06, - "loss": 1.0127, - "step": 1066 - }, - { - "epoch": 0.12829916431191005, - "grad_norm": 0.969052944191592, - "learning_rate": 3.899593993763229e-06, - "loss": 0.7488, - "step": 1067 - }, - { - "epoch": 0.12841940720254916, - "grad_norm": 2.2201377934143696, - "learning_rate": 3.899350135281796e-06, - "loss": 1.0183, - "step": 1068 - }, - { - "epoch": 0.12853965009318824, - "grad_norm": 2.117875441378192, - "learning_rate": 3.8991059886704585e-06, - "loss": 0.9916, - "step": 1069 - }, - { - "epoch": 0.12865989298382732, - "grad_norm": 2.0506718488304494, - "learning_rate": 3.898861553966252e-06, - "loss": 1.0259, - "step": 1070 - }, - { - "epoch": 0.12878013587446643, - "grad_norm": 1.6741656716424325, - "learning_rate": 3.898616831206257e-06, - "loss": 1.08, - "step": 1071 - }, - { - "epoch": 0.12890037876510552, - "grad_norm": 2.490270086294186, - "learning_rate": 3.8983718204276e-06, - "loss": 0.9764, - "step": 1072 - }, - { - "epoch": 0.1290206216557446, - "grad_norm": 1.6598199481293536, - "learning_rate": 3.898126521667446e-06, - "loss": 1.0338, - "step": 1073 - }, - { - "epoch": 0.12914086454638368, - "grad_norm": 1.6660243545070976, - "learning_rate": 3.897880934963007e-06, - "loss": 1.0405, - "step": 1074 - }, - { - "epoch": 0.1292611074370228, - "grad_norm": 1.948232764505721, - "learning_rate": 3.89763506035154e-06, - "loss": 0.9824, - "step": 1075 - }, - { - "epoch": 0.12938135032766188, - "grad_norm": 1.63255994385846, - "learning_rate": 3.897388897870343e-06, - "loss": 1.0181, - "step": 1076 - }, - { - "epoch": 0.12950159321830096, - "grad_norm": 1.9747121842633424, - "learning_rate": 3.89714244755676e-06, - "loss": 0.9475, - "step": 1077 - }, - { - "epoch": 0.12962183610894007, - "grad_norm": 2.16726623700161, - "learning_rate": 3.896895709448175e-06, - "loss": 1.0641, - "step": 1078 - }, - { - "epoch": 0.12974207899957915, - "grad_norm": 6.510613799098794, - "learning_rate": 3.896648683582019e-06, - "loss": 0.9715, - "step": 1079 - }, - { - "epoch": 0.12986232189021824, - "grad_norm": 2.047935967986643, - "learning_rate": 3.896401369995766e-06, - "loss": 1.0096, - "step": 1080 - }, - { - "epoch": 0.12998256478085732, - "grad_norm": 1.6645375321460467, - "learning_rate": 3.896153768726932e-06, - "loss": 0.995, - "step": 1081 - }, - { - "epoch": 0.13010280767149643, - "grad_norm": 1.9443851696546706, - "learning_rate": 3.8959058798130806e-06, - "loss": 1.0817, - "step": 1082 - }, - { - "epoch": 0.1302230505621355, - "grad_norm": 1.7090405961734485, - "learning_rate": 3.895657703291814e-06, - "loss": 0.9545, - "step": 1083 - }, - { - "epoch": 0.1303432934527746, - "grad_norm": 2.335048136245586, - "learning_rate": 3.895409239200781e-06, - "loss": 1.0084, - "step": 1084 - }, - { - "epoch": 0.1304635363434137, - "grad_norm": 2.325873393423711, - "learning_rate": 3.895160487577673e-06, - "loss": 1.1242, - "step": 1085 - }, - { - "epoch": 0.1305837792340528, - "grad_norm": 1.032108065499155, - "learning_rate": 3.894911448460226e-06, - "loss": 0.8295, - "step": 1086 - }, - { - "epoch": 0.13070402212469187, - "grad_norm": 1.7073748000617035, - "learning_rate": 3.8946621218862195e-06, - "loss": 0.9301, - "step": 1087 - }, - { - "epoch": 0.13082426501533098, - "grad_norm": 1.9508273228103665, - "learning_rate": 3.894412507893475e-06, - "loss": 1.0938, - "step": 1088 - }, - { - "epoch": 0.13094450790597006, - "grad_norm": 1.8872761347309448, - "learning_rate": 3.894162606519859e-06, - "loss": 0.9289, - "step": 1089 - }, - { - "epoch": 0.13106475079660915, - "grad_norm": 1.789982854665127, - "learning_rate": 3.893912417803282e-06, - "loss": 0.9744, - "step": 1090 - }, - { - "epoch": 0.13118499368724823, - "grad_norm": 1.7516113764454195, - "learning_rate": 3.8936619417816975e-06, - "loss": 0.9682, - "step": 1091 - }, - { - "epoch": 0.13130523657788734, - "grad_norm": 1.8233784408615323, - "learning_rate": 3.8934111784931015e-06, - "loss": 0.928, - "step": 1092 - }, - { - "epoch": 0.13142547946852642, - "grad_norm": 1.104007361574093, - "learning_rate": 3.893160127975535e-06, - "loss": 0.8396, - "step": 1093 - }, - { - "epoch": 0.1315457223591655, - "grad_norm": 2.082375904063134, - "learning_rate": 3.8929087902670826e-06, - "loss": 1.0125, - "step": 1094 - }, - { - "epoch": 0.13166596524980462, - "grad_norm": 1.1631524541813472, - "learning_rate": 3.8926571654058715e-06, - "loss": 0.8237, - "step": 1095 - }, - { - "epoch": 0.1317862081404437, - "grad_norm": 2.2578972929241683, - "learning_rate": 3.892405253430074e-06, - "loss": 0.9742, - "step": 1096 - }, - { - "epoch": 0.13190645103108278, - "grad_norm": 1.8115329135533826, - "learning_rate": 3.892153054377904e-06, - "loss": 1.0281, - "step": 1097 - }, - { - "epoch": 0.13202669392172187, - "grad_norm": 1.1987866048623907, - "learning_rate": 3.891900568287619e-06, - "loss": 0.8341, - "step": 1098 - }, - { - "epoch": 0.13214693681236098, - "grad_norm": 2.2079443199075497, - "learning_rate": 3.891647795197523e-06, - "loss": 0.9306, - "step": 1099 - }, - { - "epoch": 0.13226717970300006, - "grad_norm": 1.9240901555571757, - "learning_rate": 3.8913947351459605e-06, - "loss": 0.8962, - "step": 1100 - }, - { - "epoch": 0.13238742259363914, - "grad_norm": 1.747370106204012, - "learning_rate": 3.89114138817132e-06, - "loss": 0.8843, - "step": 1101 - }, - { - "epoch": 0.13250766548427825, - "grad_norm": 1.8836661655171316, - "learning_rate": 3.890887754312035e-06, - "loss": 1.0497, - "step": 1102 - }, - { - "epoch": 0.13262790837491734, - "grad_norm": 1.6758331788738121, - "learning_rate": 3.890633833606581e-06, - "loss": 1.0734, - "step": 1103 - }, - { - "epoch": 0.13274815126555642, - "grad_norm": 1.8171521695901776, - "learning_rate": 3.890379626093477e-06, - "loss": 0.8953, - "step": 1104 - }, - { - "epoch": 0.1328683941561955, - "grad_norm": 1.9461146160218437, - "learning_rate": 3.890125131811287e-06, - "loss": 1.1168, - "step": 1105 - }, - { - "epoch": 0.1329886370468346, - "grad_norm": 1.7631169963926812, - "learning_rate": 3.889870350798618e-06, - "loss": 0.9557, - "step": 1106 - }, - { - "epoch": 0.1331088799374737, - "grad_norm": 1.5276429646669205, - "learning_rate": 3.889615283094119e-06, - "loss": 0.9935, - "step": 1107 - }, - { - "epoch": 0.13322912282811278, - "grad_norm": 2.076200813386047, - "learning_rate": 3.889359928736485e-06, - "loss": 1.0496, - "step": 1108 - }, - { - "epoch": 0.1333493657187519, - "grad_norm": 1.7624130065226773, - "learning_rate": 3.889104287764451e-06, - "loss": 1.1112, - "step": 1109 - }, - { - "epoch": 0.13346960860939097, - "grad_norm": 1.9667885106807619, - "learning_rate": 3.888848360216798e-06, - "loss": 1.1061, - "step": 1110 - }, - { - "epoch": 0.13358985150003005, - "grad_norm": 1.003486753458996, - "learning_rate": 3.888592146132351e-06, - "loss": 0.7796, - "step": 1111 - }, - { - "epoch": 0.13371009439066917, - "grad_norm": 2.062701128776361, - "learning_rate": 3.888335645549978e-06, - "loss": 0.9891, - "step": 1112 - }, - { - "epoch": 0.13383033728130825, - "grad_norm": 2.172306594229059, - "learning_rate": 3.888078858508588e-06, - "loss": 1.0313, - "step": 1113 - }, - { - "epoch": 0.13395058017194733, - "grad_norm": 1.7424601525412193, - "learning_rate": 3.8878217850471365e-06, - "loss": 1.0482, - "step": 1114 - }, - { - "epoch": 0.13407082306258641, - "grad_norm": 1.894991537593577, - "learning_rate": 3.887564425204621e-06, - "loss": 0.9452, - "step": 1115 - }, - { - "epoch": 0.13419106595322552, - "grad_norm": 1.0267281083716895, - "learning_rate": 3.887306779020083e-06, - "loss": 0.771, - "step": 1116 - }, - { - "epoch": 0.1343113088438646, - "grad_norm": 2.1314865294282948, - "learning_rate": 3.887048846532608e-06, - "loss": 0.9207, - "step": 1117 - }, - { - "epoch": 0.1344315517345037, - "grad_norm": 0.9452735325371739, - "learning_rate": 3.8867906277813224e-06, - "loss": 0.7937, - "step": 1118 - }, - { - "epoch": 0.1345517946251428, - "grad_norm": 1.9388687467333434, - "learning_rate": 3.886532122805399e-06, - "loss": 0.945, - "step": 1119 - }, - { - "epoch": 0.13467203751578188, - "grad_norm": 2.315186254864683, - "learning_rate": 3.886273331644053e-06, - "loss": 1.0969, - "step": 1120 - }, - { - "epoch": 0.13479228040642097, - "grad_norm": 2.4528793892334413, - "learning_rate": 3.886014254336542e-06, - "loss": 1.0246, - "step": 1121 - }, - { - "epoch": 0.13491252329706005, - "grad_norm": 1.6850767750893598, - "learning_rate": 3.885754890922168e-06, - "loss": 1.1254, - "step": 1122 - }, - { - "epoch": 0.13503276618769916, - "grad_norm": 1.8559633258410753, - "learning_rate": 3.885495241440277e-06, - "loss": 0.9876, - "step": 1123 - }, - { - "epoch": 0.13515300907833824, - "grad_norm": 1.784775106864484, - "learning_rate": 3.885235305930257e-06, - "loss": 0.9503, - "step": 1124 - }, - { - "epoch": 0.13527325196897733, - "grad_norm": 1.8990551607016188, - "learning_rate": 3.884975084431539e-06, - "loss": 1.0656, - "step": 1125 - }, - { - "epoch": 0.13539349485961644, - "grad_norm": 2.2426728577265673, - "learning_rate": 3.8847145769836e-06, - "loss": 1.1247, - "step": 1126 - }, - { - "epoch": 0.13551373775025552, - "grad_norm": 3.396619990374076, - "learning_rate": 3.884453783625959e-06, - "loss": 0.883, - "step": 1127 - }, - { - "epoch": 0.1356339806408946, - "grad_norm": 2.2956258270403427, - "learning_rate": 3.884192704398176e-06, - "loss": 1.0618, - "step": 1128 - }, - { - "epoch": 0.13575422353153369, - "grad_norm": 1.6578273819265323, - "learning_rate": 3.883931339339858e-06, - "loss": 0.9499, - "step": 1129 - }, - { - "epoch": 0.1358744664221728, - "grad_norm": 1.894854016682381, - "learning_rate": 3.883669688490654e-06, - "loss": 0.9891, - "step": 1130 - }, - { - "epoch": 0.13599470931281188, - "grad_norm": 1.7058896365406775, - "learning_rate": 3.883407751890256e-06, - "loss": 1.0541, - "step": 1131 - }, - { - "epoch": 0.13611495220345096, - "grad_norm": 1.6327781856161823, - "learning_rate": 3.8831455295783994e-06, - "loss": 1.0567, - "step": 1132 - }, - { - "epoch": 0.13623519509409007, - "grad_norm": 3.453429333838809, - "learning_rate": 3.882883021594864e-06, - "loss": 0.9338, - "step": 1133 - }, - { - "epoch": 0.13635543798472916, - "grad_norm": 3.880839961655646, - "learning_rate": 3.8826202279794705e-06, - "loss": 1.0693, - "step": 1134 - }, - { - "epoch": 0.13647568087536824, - "grad_norm": 1.8848331735790627, - "learning_rate": 3.882357148772085e-06, - "loss": 0.9033, - "step": 1135 - }, - { - "epoch": 0.13659592376600732, - "grad_norm": 2.376178047740318, - "learning_rate": 3.882093784012617e-06, - "loss": 1.0519, - "step": 1136 - }, - { - "epoch": 0.13671616665664643, - "grad_norm": 1.6445598101457815, - "learning_rate": 3.881830133741019e-06, - "loss": 1.0373, - "step": 1137 - }, - { - "epoch": 0.13683640954728551, - "grad_norm": 1.9264138442060055, - "learning_rate": 3.881566197997285e-06, - "loss": 0.9679, - "step": 1138 - }, - { - "epoch": 0.1369566524379246, - "grad_norm": 1.390093927022935, - "learning_rate": 3.881301976821456e-06, - "loss": 0.9474, - "step": 1139 - }, - { - "epoch": 0.1370768953285637, - "grad_norm": 1.882831051050499, - "learning_rate": 3.881037470253612e-06, - "loss": 1.1086, - "step": 1140 - }, - { - "epoch": 0.1371971382192028, - "grad_norm": 2.269017067735348, - "learning_rate": 3.88077267833388e-06, - "loss": 1.0051, - "step": 1141 - }, - { - "epoch": 0.13731738110984187, - "grad_norm": 2.3714729018507983, - "learning_rate": 3.880507601102427e-06, - "loss": 1.0336, - "step": 1142 - }, - { - "epoch": 0.13743762400048098, - "grad_norm": 1.6278600466179893, - "learning_rate": 3.880242238599467e-06, - "loss": 1.0226, - "step": 1143 - }, - { - "epoch": 0.13755786689112007, - "grad_norm": 1.7813589497759224, - "learning_rate": 3.879976590865254e-06, - "loss": 1.0404, - "step": 1144 - }, - { - "epoch": 0.13767810978175915, - "grad_norm": 1.793605118060913, - "learning_rate": 3.879710657940087e-06, - "loss": 1.0674, - "step": 1145 - }, - { - "epoch": 0.13779835267239823, - "grad_norm": 1.8921619819917561, - "learning_rate": 3.879444439864308e-06, - "loss": 0.9029, - "step": 1146 - }, - { - "epoch": 0.13791859556303734, - "grad_norm": 1.4650136620206744, - "learning_rate": 3.879177936678301e-06, - "loss": 1.0587, - "step": 1147 - }, - { - "epoch": 0.13803883845367643, - "grad_norm": 2.009601078396708, - "learning_rate": 3.878911148422496e-06, - "loss": 0.9853, - "step": 1148 - }, - { - "epoch": 0.1381590813443155, - "grad_norm": 4.870640884308898, - "learning_rate": 3.878644075137364e-06, - "loss": 0.9042, - "step": 1149 - }, - { - "epoch": 0.13827932423495462, - "grad_norm": 1.9883115874788306, - "learning_rate": 3.878376716863418e-06, - "loss": 0.9916, - "step": 1150 - }, - { - "epoch": 0.1383995671255937, - "grad_norm": 2.356146416044192, - "learning_rate": 3.878109073641219e-06, - "loss": 0.9233, - "step": 1151 - }, - { - "epoch": 0.13851981001623279, - "grad_norm": 1.4684410581933276, - "learning_rate": 3.877841145511366e-06, - "loss": 1.0122, - "step": 1152 - }, - { - "epoch": 0.13864005290687187, - "grad_norm": 1.6123548900752844, - "learning_rate": 3.8775729325145035e-06, - "loss": 1.0288, - "step": 1153 - }, - { - "epoch": 0.13876029579751098, - "grad_norm": 0.8553451727528157, - "learning_rate": 3.877304434691321e-06, - "loss": 0.8448, - "step": 1154 - }, - { - "epoch": 0.13888053868815006, - "grad_norm": 1.6245932436759243, - "learning_rate": 3.877035652082548e-06, - "loss": 0.9967, - "step": 1155 - }, - { - "epoch": 0.13900078157878915, - "grad_norm": 1.714166320605406, - "learning_rate": 3.87676658472896e-06, - "loss": 1.0442, - "step": 1156 - }, - { - "epoch": 0.13912102446942826, - "grad_norm": 2.223830196313314, - "learning_rate": 3.876497232671372e-06, - "loss": 1.0572, - "step": 1157 - }, - { - "epoch": 0.13924126736006734, - "grad_norm": 1.9929982050472739, - "learning_rate": 3.876227595950647e-06, - "loss": 1.0341, - "step": 1158 - }, - { - "epoch": 0.13936151025070642, - "grad_norm": 1.393666060447485, - "learning_rate": 3.875957674607686e-06, - "loss": 0.9844, - "step": 1159 - }, - { - "epoch": 0.1394817531413455, - "grad_norm": 1.9417070428254828, - "learning_rate": 3.8756874686834386e-06, - "loss": 1.0849, - "step": 1160 - }, - { - "epoch": 0.13960199603198462, - "grad_norm": 1.6187047797407927, - "learning_rate": 3.875416978218893e-06, - "loss": 1.0052, - "step": 1161 - }, - { - "epoch": 0.1397222389226237, - "grad_norm": 2.045993178183779, - "learning_rate": 3.8751462032550835e-06, - "loss": 1.0242, - "step": 1162 - }, - { - "epoch": 0.13984248181326278, - "grad_norm": 2.8038478433898364, - "learning_rate": 3.874875143833085e-06, - "loss": 1.0255, - "step": 1163 - }, - { - "epoch": 0.1399627247039019, - "grad_norm": 1.800895902413731, - "learning_rate": 3.874603799994019e-06, - "loss": 0.8936, - "step": 1164 - }, - { - "epoch": 0.14008296759454097, - "grad_norm": 2.5961135757887743, - "learning_rate": 3.874332171779046e-06, - "loss": 1.0759, - "step": 1165 - }, - { - "epoch": 0.14020321048518006, - "grad_norm": 1.6513559982850152, - "learning_rate": 3.874060259229373e-06, - "loss": 0.9577, - "step": 1166 - }, - { - "epoch": 0.14032345337581917, - "grad_norm": 1.9742892616158085, - "learning_rate": 3.873788062386249e-06, - "loss": 1.1262, - "step": 1167 - }, - { - "epoch": 0.14044369626645825, - "grad_norm": 2.1092580127064258, - "learning_rate": 3.873515581290965e-06, - "loss": 1.0252, - "step": 1168 - }, - { - "epoch": 0.14056393915709733, - "grad_norm": 1.9890469199122545, - "learning_rate": 3.8732428159848575e-06, - "loss": 0.9588, - "step": 1169 - }, - { - "epoch": 0.14068418204773642, - "grad_norm": 1.7748226184596165, - "learning_rate": 3.872969766509304e-06, - "loss": 0.9857, - "step": 1170 - }, - { - "epoch": 0.14080442493837553, - "grad_norm": 0.9937878115675994, - "learning_rate": 3.872696432905726e-06, - "loss": 0.7837, - "step": 1171 - }, - { - "epoch": 0.1409246678290146, - "grad_norm": 2.0537608562793177, - "learning_rate": 3.872422815215589e-06, - "loss": 0.9208, - "step": 1172 - }, - { - "epoch": 0.1410449107196537, - "grad_norm": 1.6917150625187107, - "learning_rate": 3.8721489134803994e-06, - "loss": 0.9502, - "step": 1173 - }, - { - "epoch": 0.1411651536102928, - "grad_norm": 2.2265526486898297, - "learning_rate": 3.871874727741707e-06, - "loss": 0.9351, - "step": 1174 - }, - { - "epoch": 0.1412853965009319, - "grad_norm": 1.6586414562090044, - "learning_rate": 3.871600258041108e-06, - "loss": 1.1646, - "step": 1175 - }, - { - "epoch": 0.14140563939157097, - "grad_norm": 2.285711863209939, - "learning_rate": 3.871325504420238e-06, - "loss": 1.0607, - "step": 1176 - }, - { - "epoch": 0.14152588228221005, - "grad_norm": 1.7526134322565332, - "learning_rate": 3.871050466920776e-06, - "loss": 1.0199, - "step": 1177 - }, - { - "epoch": 0.14164612517284916, - "grad_norm": 1.810800742382574, - "learning_rate": 3.870775145584447e-06, - "loss": 1.0053, - "step": 1178 - }, - { - "epoch": 0.14176636806348825, - "grad_norm": 2.6146548642610754, - "learning_rate": 3.8704995404530145e-06, - "loss": 0.8439, - "step": 1179 - }, - { - "epoch": 0.14188661095412733, - "grad_norm": 1.694292526348593, - "learning_rate": 3.87022365156829e-06, - "loss": 1.0472, - "step": 1180 - }, - { - "epoch": 0.14200685384476644, - "grad_norm": 2.059120993803026, - "learning_rate": 3.869947478972123e-06, - "loss": 1.0036, - "step": 1181 - }, - { - "epoch": 0.14212709673540552, - "grad_norm": 2.1767733655932697, - "learning_rate": 3.869671022706412e-06, - "loss": 1.0192, - "step": 1182 - }, - { - "epoch": 0.1422473396260446, - "grad_norm": 2.5134502986063705, - "learning_rate": 3.869394282813092e-06, - "loss": 0.855, - "step": 1183 - }, - { - "epoch": 0.1423675825166837, - "grad_norm": 2.4333833018707187, - "learning_rate": 3.869117259334147e-06, - "loss": 1.097, - "step": 1184 - }, - { - "epoch": 0.1424878254073228, - "grad_norm": 1.6940395656680596, - "learning_rate": 3.868839952311599e-06, - "loss": 1.031, - "step": 1185 - }, - { - "epoch": 0.14260806829796188, - "grad_norm": 1.96369199573657, - "learning_rate": 3.868562361787516e-06, - "loss": 1.01, - "step": 1186 - }, - { - "epoch": 0.14272831118860096, - "grad_norm": 1.894129507984493, - "learning_rate": 3.868284487804009e-06, - "loss": 0.8992, - "step": 1187 - }, - { - "epoch": 0.14284855407924008, - "grad_norm": 1.6899984725403514, - "learning_rate": 3.86800633040323e-06, - "loss": 0.9865, - "step": 1188 - }, - { - "epoch": 0.14296879696987916, - "grad_norm": 2.2454082471253516, - "learning_rate": 3.867727889627376e-06, - "loss": 0.9901, - "step": 1189 - }, - { - "epoch": 0.14308903986051824, - "grad_norm": 2.3356980421135574, - "learning_rate": 3.867449165518687e-06, - "loss": 0.9898, - "step": 1190 - }, - { - "epoch": 0.14320928275115732, - "grad_norm": 1.8866961933943747, - "learning_rate": 3.867170158119444e-06, - "loss": 0.9183, - "step": 1191 - }, - { - "epoch": 0.14332952564179643, - "grad_norm": 1.8277717314210784, - "learning_rate": 3.866890867471972e-06, - "loss": 0.958, - "step": 1192 - }, - { - "epoch": 0.14344976853243552, - "grad_norm": 2.708975925996197, - "learning_rate": 3.86661129361864e-06, - "loss": 1.1067, - "step": 1193 - }, - { - "epoch": 0.1435700114230746, - "grad_norm": 2.2719641037039486, - "learning_rate": 3.866331436601859e-06, - "loss": 1.0618, - "step": 1194 - }, - { - "epoch": 0.1436902543137137, - "grad_norm": 2.097916968710864, - "learning_rate": 3.866051296464083e-06, - "loss": 0.945, - "step": 1195 - }, - { - "epoch": 0.1438104972043528, - "grad_norm": 1.8113831334649144, - "learning_rate": 3.86577087324781e-06, - "loss": 1.0519, - "step": 1196 - }, - { - "epoch": 0.14393074009499188, - "grad_norm": 1.947021162935906, - "learning_rate": 3.865490166995578e-06, - "loss": 0.9722, - "step": 1197 - }, - { - "epoch": 0.144050982985631, - "grad_norm": 2.103623524681579, - "learning_rate": 3.86520917774997e-06, - "loss": 1.037, - "step": 1198 - }, - { - "epoch": 0.14417122587627007, - "grad_norm": 2.185271626748519, - "learning_rate": 3.864927905553614e-06, - "loss": 0.9531, - "step": 1199 - }, - { - "epoch": 0.14429146876690915, - "grad_norm": 1.4937522632789224, - "learning_rate": 3.8646463504491765e-06, - "loss": 1.0873, - "step": 1200 - }, - { - "epoch": 0.14441171165754824, - "grad_norm": 1.9940397739594982, - "learning_rate": 3.8643645124793705e-06, - "loss": 1.0393, - "step": 1201 - }, - { - "epoch": 0.14453195454818735, - "grad_norm": 1.515748323752011, - "learning_rate": 3.8640823916869515e-06, - "loss": 0.9507, - "step": 1202 - }, - { - "epoch": 0.14465219743882643, - "grad_norm": 1.4885723800462631, - "learning_rate": 3.863799988114714e-06, - "loss": 0.9849, - "step": 1203 - }, - { - "epoch": 0.1447724403294655, - "grad_norm": 2.722275476086172, - "learning_rate": 3.863517301805502e-06, - "loss": 0.9111, - "step": 1204 - }, - { - "epoch": 0.14489268322010462, - "grad_norm": 2.2389795835040345, - "learning_rate": 3.863234332802196e-06, - "loss": 1.1709, - "step": 1205 - }, - { - "epoch": 0.1450129261107437, - "grad_norm": 1.9678958674886562, - "learning_rate": 3.862951081147723e-06, - "loss": 0.9425, - "step": 1206 - }, - { - "epoch": 0.1451331690013828, - "grad_norm": 2.6811474973919274, - "learning_rate": 3.862667546885053e-06, - "loss": 0.9818, - "step": 1207 - }, - { - "epoch": 0.14525341189202187, - "grad_norm": 2.2902593510785274, - "learning_rate": 3.8623837300571965e-06, - "loss": 0.9338, - "step": 1208 - }, - { - "epoch": 0.14537365478266098, - "grad_norm": 1.7575231017199244, - "learning_rate": 3.8620996307072085e-06, - "loss": 1.0393, - "step": 1209 - }, - { - "epoch": 0.14549389767330007, - "grad_norm": 1.6439671942229115, - "learning_rate": 3.861815248878188e-06, - "loss": 0.8491, - "step": 1210 - }, - { - "epoch": 0.14561414056393915, - "grad_norm": 2.154240431958346, - "learning_rate": 3.861530584613274e-06, - "loss": 1.0092, - "step": 1211 - }, - { - "epoch": 0.14573438345457826, - "grad_norm": 2.2056526131764738, - "learning_rate": 3.86124563795565e-06, - "loss": 1.0228, - "step": 1212 - }, - { - "epoch": 0.14585462634521734, - "grad_norm": 1.6736881727290425, - "learning_rate": 3.860960408948543e-06, - "loss": 0.9078, - "step": 1213 - }, - { - "epoch": 0.14597486923585642, - "grad_norm": 2.5072076846549667, - "learning_rate": 3.860674897635222e-06, - "loss": 1.1023, - "step": 1214 - }, - { - "epoch": 0.1460951121264955, - "grad_norm": 2.302344835253286, - "learning_rate": 3.860389104058998e-06, - "loss": 1.0326, - "step": 1215 - }, - { - "epoch": 0.14621535501713462, - "grad_norm": 1.805089534219076, - "learning_rate": 3.860103028263227e-06, - "loss": 0.9301, - "step": 1216 - }, - { - "epoch": 0.1463355979077737, - "grad_norm": 2.009309796688859, - "learning_rate": 3.859816670291304e-06, - "loss": 0.8966, - "step": 1217 - }, - { - "epoch": 0.14645584079841278, - "grad_norm": 2.0258413050672677, - "learning_rate": 3.859530030186672e-06, - "loss": 1.0991, - "step": 1218 - }, - { - "epoch": 0.1465760836890519, - "grad_norm": 2.259505115946418, - "learning_rate": 3.859243107992813e-06, - "loss": 1.0301, - "step": 1219 - }, - { - "epoch": 0.14669632657969098, - "grad_norm": 2.383565314769396, - "learning_rate": 3.858955903753252e-06, - "loss": 0.992, - "step": 1220 - }, - { - "epoch": 0.14681656947033006, - "grad_norm": 1.4438137222033205, - "learning_rate": 3.858668417511559e-06, - "loss": 1.0363, - "step": 1221 - }, - { - "epoch": 0.14693681236096917, - "grad_norm": 2.0378714989233906, - "learning_rate": 3.8583806493113445e-06, - "loss": 0.9568, - "step": 1222 - }, - { - "epoch": 0.14705705525160825, - "grad_norm": 1.8345598611825358, - "learning_rate": 3.858092599196263e-06, - "loss": 1.0238, - "step": 1223 - }, - { - "epoch": 0.14717729814224734, - "grad_norm": 2.0237215645425826, - "learning_rate": 3.857804267210012e-06, - "loss": 1.0277, - "step": 1224 - }, - { - "epoch": 0.14729754103288642, - "grad_norm": 1.9005685156495207, - "learning_rate": 3.857515653396331e-06, - "loss": 1.0861, - "step": 1225 - }, - { - "epoch": 0.14741778392352553, - "grad_norm": 2.040396067934897, - "learning_rate": 3.857226757799002e-06, - "loss": 1.0689, - "step": 1226 - }, - { - "epoch": 0.1475380268141646, - "grad_norm": 1.9401175137941857, - "learning_rate": 3.85693758046185e-06, - "loss": 0.9419, - "step": 1227 - }, - { - "epoch": 0.1476582697048037, - "grad_norm": 1.595404644882061, - "learning_rate": 3.8566481214287435e-06, - "loss": 1.0278, - "step": 1228 - }, - { - "epoch": 0.1477785125954428, - "grad_norm": 2.0474512584879254, - "learning_rate": 3.8563583807435935e-06, - "loss": 1.1082, - "step": 1229 - }, - { - "epoch": 0.1478987554860819, - "grad_norm": 2.472907513090277, - "learning_rate": 3.856068358450353e-06, - "loss": 0.9798, - "step": 1230 - }, - { - "epoch": 0.14801899837672097, - "grad_norm": 1.7286025892892176, - "learning_rate": 3.8557780545930186e-06, - "loss": 1.0557, - "step": 1231 - }, - { - "epoch": 0.14813924126736006, - "grad_norm": 1.6178896204498472, - "learning_rate": 3.855487469215628e-06, - "loss": 0.9923, - "step": 1232 - }, - { - "epoch": 0.14825948415799917, - "grad_norm": 2.017466568793935, - "learning_rate": 3.855196602362264e-06, - "loss": 0.9265, - "step": 1233 - }, - { - "epoch": 0.14837972704863825, - "grad_norm": 1.8149372025862671, - "learning_rate": 3.854905454077051e-06, - "loss": 1.1484, - "step": 1234 - }, - { - "epoch": 0.14849996993927733, - "grad_norm": 1.7253565654713627, - "learning_rate": 3.854614024404155e-06, - "loss": 1.0779, - "step": 1235 - }, - { - "epoch": 0.14862021282991644, - "grad_norm": 1.6993489533448904, - "learning_rate": 3.8543223133877865e-06, - "loss": 1.1005, - "step": 1236 - }, - { - "epoch": 0.14874045572055553, - "grad_norm": 1.8533002850904408, - "learning_rate": 3.854030321072198e-06, - "loss": 1.0769, - "step": 1237 - }, - { - "epoch": 0.1488606986111946, - "grad_norm": 1.9325705547209937, - "learning_rate": 3.853738047501682e-06, - "loss": 0.9409, - "step": 1238 - }, - { - "epoch": 0.1489809415018337, - "grad_norm": 2.0294352714627606, - "learning_rate": 3.85344549272058e-06, - "loss": 0.9766, - "step": 1239 - }, - { - "epoch": 0.1491011843924728, - "grad_norm": 1.637583065110133, - "learning_rate": 3.853152656773269e-06, - "loss": 1.0289, - "step": 1240 - }, - { - "epoch": 0.14922142728311188, - "grad_norm": 2.1078454017192123, - "learning_rate": 3.852859539704174e-06, - "loss": 1.0414, - "step": 1241 - }, - { - "epoch": 0.14934167017375097, - "grad_norm": 1.7523858972209663, - "learning_rate": 3.85256614155776e-06, - "loss": 0.9609, - "step": 1242 - }, - { - "epoch": 0.14946191306439008, - "grad_norm": 2.3669155455632764, - "learning_rate": 3.852272462378535e-06, - "loss": 0.9523, - "step": 1243 - }, - { - "epoch": 0.14958215595502916, - "grad_norm": 1.8505032241574688, - "learning_rate": 3.85197850221105e-06, - "loss": 0.9875, - "step": 1244 - }, - { - "epoch": 0.14970239884566824, - "grad_norm": 1.700374447031176, - "learning_rate": 3.851684261099899e-06, - "loss": 0.9646, - "step": 1245 - }, - { - "epoch": 0.14982264173630733, - "grad_norm": 1.6898719224419976, - "learning_rate": 3.851389739089718e-06, - "loss": 1.0612, - "step": 1246 - }, - { - "epoch": 0.14994288462694644, - "grad_norm": 1.690798608465008, - "learning_rate": 3.851094936225186e-06, - "loss": 1.0047, - "step": 1247 - }, - { - "epoch": 0.15006312751758552, - "grad_norm": 1.3995589549303757, - "learning_rate": 3.850799852551024e-06, - "loss": 0.9719, - "step": 1248 - }, - { - "epoch": 0.1501833704082246, - "grad_norm": 3.8866647687834286, - "learning_rate": 3.850504488111995e-06, - "loss": 1.063, - "step": 1249 - }, - { - "epoch": 0.15030361329886371, - "grad_norm": 1.8939298009340497, - "learning_rate": 3.850208842952907e-06, - "loss": 1.0234, - "step": 1250 - }, - { - "epoch": 0.1504238561895028, - "grad_norm": 1.8113254819401947, - "learning_rate": 3.849912917118608e-06, - "loss": 0.9968, - "step": 1251 - }, - { - "epoch": 0.15054409908014188, - "grad_norm": 1.0224893852000028, - "learning_rate": 3.849616710653992e-06, - "loss": 0.8233, - "step": 1252 - }, - { - "epoch": 0.150664341970781, - "grad_norm": 1.520310150803322, - "learning_rate": 3.84932022360399e-06, - "loss": 0.952, - "step": 1253 - }, - { - "epoch": 0.15078458486142007, - "grad_norm": 2.4476122294831604, - "learning_rate": 3.849023456013581e-06, - "loss": 1.0461, - "step": 1254 - }, - { - "epoch": 0.15090482775205916, - "grad_norm": 2.1984687461340613, - "learning_rate": 3.848726407927784e-06, - "loss": 0.8222, - "step": 1255 - }, - { - "epoch": 0.15102507064269824, - "grad_norm": 2.60014373308645, - "learning_rate": 3.84842907939166e-06, - "loss": 1.0771, - "step": 1256 - }, - { - "epoch": 0.15114531353333735, - "grad_norm": 2.0879703288838463, - "learning_rate": 3.8481314704503146e-06, - "loss": 0.9139, - "step": 1257 - }, - { - "epoch": 0.15126555642397643, - "grad_norm": 2.442954684742499, - "learning_rate": 3.847833581148895e-06, - "loss": 1.0928, - "step": 1258 - }, - { - "epoch": 0.15138579931461552, - "grad_norm": 1.9353827937200276, - "learning_rate": 3.84753541153259e-06, - "loss": 1.0053, - "step": 1259 - }, - { - "epoch": 0.15150604220525463, - "grad_norm": 1.4545455879785736, - "learning_rate": 3.847236961646633e-06, - "loss": 1.0403, - "step": 1260 - }, - { - "epoch": 0.1516262850958937, - "grad_norm": 2.353799054186188, - "learning_rate": 3.846938231536296e-06, - "loss": 0.9839, - "step": 1261 - }, - { - "epoch": 0.1517465279865328, - "grad_norm": 1.5426655531813005, - "learning_rate": 3.8466392212468995e-06, - "loss": 1.0109, - "step": 1262 - }, - { - "epoch": 0.15186677087717187, - "grad_norm": 0.9406551673081742, - "learning_rate": 3.8463399308238e-06, - "loss": 0.8433, - "step": 1263 - }, - { - "epoch": 0.15198701376781099, - "grad_norm": 1.6542741037578292, - "learning_rate": 3.846040360312402e-06, - "loss": 0.8418, - "step": 1264 - }, - { - "epoch": 0.15210725665845007, - "grad_norm": 2.031377700312801, - "learning_rate": 3.8457405097581485e-06, - "loss": 1.0184, - "step": 1265 - }, - { - "epoch": 0.15222749954908915, - "grad_norm": 1.775721956564641, - "learning_rate": 3.8454403792065275e-06, - "loss": 0.9838, - "step": 1266 - }, - { - "epoch": 0.15234774243972826, - "grad_norm": 1.7127176717263775, - "learning_rate": 3.845139968703068e-06, - "loss": 1.0551, - "step": 1267 - }, - { - "epoch": 0.15246798533036734, - "grad_norm": 1.682929950323882, - "learning_rate": 3.844839278293342e-06, - "loss": 1.0337, - "step": 1268 - }, - { - "epoch": 0.15258822822100643, - "grad_norm": 2.1187661679531535, - "learning_rate": 3.8445383080229654e-06, - "loss": 0.9667, - "step": 1269 - }, - { - "epoch": 0.1527084711116455, - "grad_norm": 2.216141039928687, - "learning_rate": 3.844237057937593e-06, - "loss": 0.944, - "step": 1270 - }, - { - "epoch": 0.15282871400228462, - "grad_norm": 2.205675096686072, - "learning_rate": 3.843935528082926e-06, - "loss": 0.9871, - "step": 1271 - }, - { - "epoch": 0.1529489568929237, - "grad_norm": 1.8342553406765414, - "learning_rate": 3.843633718504704e-06, - "loss": 1.0533, - "step": 1272 - }, - { - "epoch": 0.1530691997835628, - "grad_norm": 2.1148019561752758, - "learning_rate": 3.843331629248715e-06, - "loss": 1.0969, - "step": 1273 - }, - { - "epoch": 0.1531894426742019, - "grad_norm": 2.3334040858123024, - "learning_rate": 3.843029260360782e-06, - "loss": 0.9719, - "step": 1274 - }, - { - "epoch": 0.15330968556484098, - "grad_norm": 2.4346163393788975, - "learning_rate": 3.8427266118867755e-06, - "loss": 0.9893, - "step": 1275 - }, - { - "epoch": 0.15342992845548006, - "grad_norm": 1.8041345318875668, - "learning_rate": 3.842423683872608e-06, - "loss": 1.0331, - "step": 1276 - }, - { - "epoch": 0.15355017134611917, - "grad_norm": 2.2574542860300904, - "learning_rate": 3.842120476364232e-06, - "loss": 0.9885, - "step": 1277 - }, - { - "epoch": 0.15367041423675826, - "grad_norm": 2.6194465566630885, - "learning_rate": 3.841816989407644e-06, - "loss": 1.0391, - "step": 1278 - }, - { - "epoch": 0.15379065712739734, - "grad_norm": 2.0537443716073067, - "learning_rate": 3.841513223048884e-06, - "loss": 0.9682, - "step": 1279 - }, - { - "epoch": 0.15391090001803642, - "grad_norm": 2.814609160588318, - "learning_rate": 3.841209177334031e-06, - "loss": 0.9886, - "step": 1280 - }, - { - "epoch": 0.15403114290867553, - "grad_norm": 2.048775409496695, - "learning_rate": 3.84090485230921e-06, - "loss": 0.952, - "step": 1281 - }, - { - "epoch": 0.15415138579931462, - "grad_norm": 3.3453391033541973, - "learning_rate": 3.840600248020588e-06, - "loss": 0.9634, - "step": 1282 - }, - { - "epoch": 0.1542716286899537, - "grad_norm": 2.644571458970077, - "learning_rate": 3.840295364514371e-06, - "loss": 1.0003, - "step": 1283 - }, - { - "epoch": 0.1543918715805928, - "grad_norm": 2.462844932912793, - "learning_rate": 3.83999020183681e-06, - "loss": 0.9797, - "step": 1284 - }, - { - "epoch": 0.1545121144712319, - "grad_norm": 1.9191187501138574, - "learning_rate": 3.839684760034199e-06, - "loss": 0.9817, - "step": 1285 - }, - { - "epoch": 0.15463235736187098, - "grad_norm": 2.5547074716703095, - "learning_rate": 3.8393790391528716e-06, - "loss": 0.8482, - "step": 1286 - }, - { - "epoch": 0.15475260025251006, - "grad_norm": 7.738603149065758, - "learning_rate": 3.8390730392392075e-06, - "loss": 1.0831, - "step": 1287 - }, - { - "epoch": 0.15487284314314917, - "grad_norm": 2.0071510977059543, - "learning_rate": 3.838766760339626e-06, - "loss": 0.9949, - "step": 1288 - }, - { - "epoch": 0.15499308603378825, - "grad_norm": 2.4054379765077467, - "learning_rate": 3.838460202500587e-06, - "loss": 0.9959, - "step": 1289 - }, - { - "epoch": 0.15511332892442733, - "grad_norm": 2.1490443257002547, - "learning_rate": 3.838153365768599e-06, - "loss": 0.948, - "step": 1290 - }, - { - "epoch": 0.15523357181506645, - "grad_norm": 2.234356966812882, - "learning_rate": 3.837846250190206e-06, - "loss": 0.9509, - "step": 1291 - }, - { - "epoch": 0.15535381470570553, - "grad_norm": 1.9767638092658644, - "learning_rate": 3.837538855811998e-06, - "loss": 0.9715, - "step": 1292 - }, - { - "epoch": 0.1554740575963446, - "grad_norm": 2.425224596614643, - "learning_rate": 3.837231182680606e-06, - "loss": 0.9044, - "step": 1293 - }, - { - "epoch": 0.1555943004869837, - "grad_norm": 2.0899533822368133, - "learning_rate": 3.836923230842706e-06, - "loss": 0.9632, - "step": 1294 - }, - { - "epoch": 0.1557145433776228, - "grad_norm": 1.783542162115314, - "learning_rate": 3.836615000345011e-06, - "loss": 1.0145, - "step": 1295 - }, - { - "epoch": 0.1558347862682619, - "grad_norm": 2.0069782587155824, - "learning_rate": 3.836306491234282e-06, - "loss": 0.9833, - "step": 1296 - }, - { - "epoch": 0.15595502915890097, - "grad_norm": 3.1797191421471505, - "learning_rate": 3.835997703557317e-06, - "loss": 0.9509, - "step": 1297 - }, - { - "epoch": 0.15607527204954008, - "grad_norm": 1.5168642306643, - "learning_rate": 3.83568863736096e-06, - "loss": 0.9996, - "step": 1298 - }, - { - "epoch": 0.15619551494017916, - "grad_norm": 3.0564493625264717, - "learning_rate": 3.8353792926920975e-06, - "loss": 1.0831, - "step": 1299 - }, - { - "epoch": 0.15631575783081825, - "grad_norm": 2.0271793364961312, - "learning_rate": 3.835069669597655e-06, - "loss": 1.0231, - "step": 1300 - }, - { - "epoch": 0.15643600072145733, - "grad_norm": 1.9117666995353142, - "learning_rate": 3.834759768124603e-06, - "loss": 0.9977, - "step": 1301 - }, - { - "epoch": 0.15655624361209644, - "grad_norm": 2.4668475196454005, - "learning_rate": 3.834449588319953e-06, - "loss": 0.9643, - "step": 1302 - }, - { - "epoch": 0.15667648650273552, - "grad_norm": 1.778948585432023, - "learning_rate": 3.834139130230758e-06, - "loss": 1.042, - "step": 1303 - }, - { - "epoch": 0.1567967293933746, - "grad_norm": 1.3969795256215456, - "learning_rate": 3.833828393904117e-06, - "loss": 1.0149, - "step": 1304 - }, - { - "epoch": 0.15691697228401372, - "grad_norm": 2.2673566843184023, - "learning_rate": 3.833517379387165e-06, - "loss": 0.9775, - "step": 1305 - }, - { - "epoch": 0.1570372151746528, - "grad_norm": 1.8169318638456606, - "learning_rate": 3.833206086727085e-06, - "loss": 1.0942, - "step": 1306 - }, - { - "epoch": 0.15715745806529188, - "grad_norm": 1.924192256644433, - "learning_rate": 3.8328945159710994e-06, - "loss": 0.9167, - "step": 1307 - }, - { - "epoch": 0.157277700955931, - "grad_norm": 1.7988217630180656, - "learning_rate": 3.832582667166473e-06, - "loss": 1.0824, - "step": 1308 - }, - { - "epoch": 0.15739794384657008, - "grad_norm": 2.192376940067302, - "learning_rate": 3.8322705403605125e-06, - "loss": 1.0201, - "step": 1309 - }, - { - "epoch": 0.15751818673720916, - "grad_norm": 2.0565422012818204, - "learning_rate": 3.831958135600568e-06, - "loss": 1.0158, - "step": 1310 - }, - { - "epoch": 0.15763842962784824, - "grad_norm": 1.7001172025333586, - "learning_rate": 3.831645452934032e-06, - "loss": 1.0045, - "step": 1311 - }, - { - "epoch": 0.15775867251848735, - "grad_norm": 2.81048155517172, - "learning_rate": 3.831332492408336e-06, - "loss": 1.0052, - "step": 1312 - }, - { - "epoch": 0.15787891540912644, - "grad_norm": 1.719866788607068, - "learning_rate": 3.831019254070957e-06, - "loss": 0.8878, - "step": 1313 - }, - { - "epoch": 0.15799915829976552, - "grad_norm": 2.398592170425063, - "learning_rate": 3.8307057379694135e-06, - "loss": 1.1655, - "step": 1314 - }, - { - "epoch": 0.15811940119040463, - "grad_norm": 1.978886320101878, - "learning_rate": 3.830391944151264e-06, - "loss": 1.025, - "step": 1315 - }, - { - "epoch": 0.1582396440810437, - "grad_norm": 1.781818968267568, - "learning_rate": 3.830077872664114e-06, - "loss": 0.8673, - "step": 1316 - }, - { - "epoch": 0.1583598869716828, - "grad_norm": 1.7939765441534905, - "learning_rate": 3.829763523555604e-06, - "loss": 0.9296, - "step": 1317 - }, - { - "epoch": 0.15848012986232188, - "grad_norm": 2.2350931147352373, - "learning_rate": 3.829448896873423e-06, - "loss": 1.0002, - "step": 1318 - }, - { - "epoch": 0.158600372752961, - "grad_norm": 1.7984749346829896, - "learning_rate": 3.829133992665299e-06, - "loss": 0.9891, - "step": 1319 - }, - { - "epoch": 0.15872061564360007, - "grad_norm": 2.9521306700176546, - "learning_rate": 3.828818810979002e-06, - "loss": 1.0917, - "step": 1320 - }, - { - "epoch": 0.15884085853423915, - "grad_norm": 1.8321556585890306, - "learning_rate": 3.8285033518623454e-06, - "loss": 1.0033, - "step": 1321 - }, - { - "epoch": 0.15896110142487826, - "grad_norm": 2.5439339723535634, - "learning_rate": 3.8281876153631845e-06, - "loss": 1.0183, - "step": 1322 - }, - { - "epoch": 0.15908134431551735, - "grad_norm": 2.057286696287179, - "learning_rate": 3.827871601529416e-06, - "loss": 0.8446, - "step": 1323 - }, - { - "epoch": 0.15920158720615643, - "grad_norm": 2.0738135335340293, - "learning_rate": 3.827555310408979e-06, - "loss": 1.001, - "step": 1324 - }, - { - "epoch": 0.1593218300967955, - "grad_norm": 1.594340944124581, - "learning_rate": 3.827238742049854e-06, - "loss": 1.0268, - "step": 1325 - }, - { - "epoch": 0.15944207298743462, - "grad_norm": 1.6477201084949837, - "learning_rate": 3.826921896500066e-06, - "loss": 0.7303, - "step": 1326 - }, - { - "epoch": 0.1595623158780737, - "grad_norm": 1.766753249775775, - "learning_rate": 3.826604773807678e-06, - "loss": 0.9887, - "step": 1327 - }, - { - "epoch": 0.1596825587687128, - "grad_norm": 2.3495171355219107, - "learning_rate": 3.826287374020798e-06, - "loss": 0.9326, - "step": 1328 - }, - { - "epoch": 0.1598028016593519, - "grad_norm": 2.006156743339168, - "learning_rate": 3.825969697187575e-06, - "loss": 1.0266, - "step": 1329 - }, - { - "epoch": 0.15992304454999098, - "grad_norm": 1.6483761744368826, - "learning_rate": 3.8256517433562015e-06, - "loss": 0.9082, - "step": 1330 - }, - { - "epoch": 0.16004328744063007, - "grad_norm": 2.0069631717193865, - "learning_rate": 3.82533351257491e-06, - "loss": 1.1245, - "step": 1331 - }, - { - "epoch": 0.16016353033126918, - "grad_norm": 1.793826959534097, - "learning_rate": 3.825015004891975e-06, - "loss": 1.09, - "step": 1332 - }, - { - "epoch": 0.16028377322190826, - "grad_norm": 1.606870090710067, - "learning_rate": 3.824696220355716e-06, - "loss": 0.9602, - "step": 1333 - }, - { - "epoch": 0.16040401611254734, - "grad_norm": 1.5602637500814134, - "learning_rate": 3.824377159014491e-06, - "loss": 0.9965, - "step": 1334 - }, - { - "epoch": 0.16052425900318643, - "grad_norm": 1.7809097902090456, - "learning_rate": 3.824057820916702e-06, - "loss": 1.0518, - "step": 1335 - }, - { - "epoch": 0.16064450189382554, - "grad_norm": 2.157992557247645, - "learning_rate": 3.8237382061107904e-06, - "loss": 0.9208, - "step": 1336 - }, - { - "epoch": 0.16076474478446462, - "grad_norm": 1.8696588098899676, - "learning_rate": 3.823418314645243e-06, - "loss": 0.9865, - "step": 1337 - }, - { - "epoch": 0.1608849876751037, - "grad_norm": 2.1264037937024263, - "learning_rate": 3.823098146568588e-06, - "loss": 0.9599, - "step": 1338 - }, - { - "epoch": 0.1610052305657428, - "grad_norm": 1.6895355380910626, - "learning_rate": 3.822777701929394e-06, - "loss": 0.921, - "step": 1339 - }, - { - "epoch": 0.1611254734563819, - "grad_norm": 1.836469337861716, - "learning_rate": 3.8224569807762714e-06, - "loss": 0.9496, - "step": 1340 - }, - { - "epoch": 0.16124571634702098, - "grad_norm": 1.838001535699346, - "learning_rate": 3.822135983157873e-06, - "loss": 0.9794, - "step": 1341 - }, - { - "epoch": 0.16136595923766006, - "grad_norm": 1.748174669080439, - "learning_rate": 3.821814709122896e-06, - "loss": 1.0469, - "step": 1342 - }, - { - "epoch": 0.16148620212829917, - "grad_norm": 1.953843190709645, - "learning_rate": 3.821493158720076e-06, - "loss": 1.0465, - "step": 1343 - }, - { - "epoch": 0.16160644501893826, - "grad_norm": 2.7411218729964615, - "learning_rate": 3.821171331998191e-06, - "loss": 0.94, - "step": 1344 - }, - { - "epoch": 0.16172668790957734, - "grad_norm": 0.9284202208912479, - "learning_rate": 3.820849229006064e-06, - "loss": 0.7883, - "step": 1345 - }, - { - "epoch": 0.16184693080021645, - "grad_norm": 1.641662197139226, - "learning_rate": 3.8205268497925564e-06, - "loss": 0.9136, - "step": 1346 - }, - { - "epoch": 0.16196717369085553, - "grad_norm": 2.0247986681659107, - "learning_rate": 3.8202041944065725e-06, - "loss": 0.977, - "step": 1347 - }, - { - "epoch": 0.16208741658149461, - "grad_norm": 1.8653320286936874, - "learning_rate": 3.819881262897061e-06, - "loss": 0.9392, - "step": 1348 - }, - { - "epoch": 0.1622076594721337, - "grad_norm": 1.8176233295432735, - "learning_rate": 3.819558055313008e-06, - "loss": 0.9362, - "step": 1349 - }, - { - "epoch": 0.1623279023627728, - "grad_norm": 2.2692966569737627, - "learning_rate": 3.819234571703444e-06, - "loss": 0.9762, - "step": 1350 - }, - { - "epoch": 0.1624481452534119, - "grad_norm": 2.1373815537143686, - "learning_rate": 3.8189108121174435e-06, - "loss": 1.0597, - "step": 1351 - }, - { - "epoch": 0.16256838814405097, - "grad_norm": 1.4887694511776381, - "learning_rate": 3.818586776604118e-06, - "loss": 1.0377, - "step": 1352 - }, - { - "epoch": 0.16268863103469008, - "grad_norm": 1.8433807132534372, - "learning_rate": 3.818262465212625e-06, - "loss": 0.8231, - "step": 1353 - }, - { - "epoch": 0.16280887392532917, - "grad_norm": 1.9579728044914342, - "learning_rate": 3.817937877992161e-06, - "loss": 0.9738, - "step": 1354 - }, - { - "epoch": 0.16292911681596825, - "grad_norm": 2.1328026628092736, - "learning_rate": 3.817613014991967e-06, - "loss": 1.0501, - "step": 1355 - }, - { - "epoch": 0.16304935970660733, - "grad_norm": 1.6522284573492427, - "learning_rate": 3.817287876261323e-06, - "loss": 0.9612, - "step": 1356 - }, - { - "epoch": 0.16316960259724644, - "grad_norm": 1.7418969429531084, - "learning_rate": 3.816962461849553e-06, - "loss": 1.0072, - "step": 1357 - }, - { - "epoch": 0.16328984548788553, - "grad_norm": 1.782075121990507, - "learning_rate": 3.8166367718060235e-06, - "loss": 1.0347, - "step": 1358 - }, - { - "epoch": 0.1634100883785246, - "grad_norm": 2.938590638623599, - "learning_rate": 3.816310806180139e-06, - "loss": 0.9602, - "step": 1359 - }, - { - "epoch": 0.16353033126916372, - "grad_norm": 1.4910458496484218, - "learning_rate": 3.81598456502135e-06, - "loss": 1.0077, - "step": 1360 - }, - { - "epoch": 0.1636505741598028, - "grad_norm": 1.902114321681526, - "learning_rate": 3.8156580483791455e-06, - "loss": 1.0749, - "step": 1361 - }, - { - "epoch": 0.16377081705044189, - "grad_norm": 1.8795253185851248, - "learning_rate": 3.815331256303059e-06, - "loss": 0.9656, - "step": 1362 - }, - { - "epoch": 0.163891059941081, - "grad_norm": 1.9031765659906754, - "learning_rate": 3.815004188842665e-06, - "loss": 0.9731, - "step": 1363 - }, - { - "epoch": 0.16401130283172008, - "grad_norm": 1.3895578897749996, - "learning_rate": 3.814676846047578e-06, - "loss": 0.9959, - "step": 1364 - }, - { - "epoch": 0.16413154572235916, - "grad_norm": 1.7495457195839583, - "learning_rate": 3.8143492279674565e-06, - "loss": 0.8949, - "step": 1365 - }, - { - "epoch": 0.16425178861299825, - "grad_norm": 0.9886000650492682, - "learning_rate": 3.8140213346519997e-06, - "loss": 0.8541, - "step": 1366 - }, - { - "epoch": 0.16437203150363736, - "grad_norm": 1.6135240422914343, - "learning_rate": 3.813693166150948e-06, - "loss": 0.9742, - "step": 1367 - }, - { - "epoch": 0.16449227439427644, - "grad_norm": 14.668767662996663, - "learning_rate": 3.813364722514086e-06, - "loss": 1.0678, - "step": 1368 - }, - { - "epoch": 0.16461251728491552, - "grad_norm": 2.0708915620391264, - "learning_rate": 3.8130360037912368e-06, - "loss": 1.0109, - "step": 1369 - }, - { - "epoch": 0.16473276017555463, - "grad_norm": 2.1492877179543832, - "learning_rate": 3.812707010032268e-06, - "loss": 1.0209, - "step": 1370 - }, - { - "epoch": 0.16485300306619372, - "grad_norm": 1.6411851199270218, - "learning_rate": 3.8123777412870863e-06, - "loss": 0.9931, - "step": 1371 - }, - { - "epoch": 0.1649732459568328, - "grad_norm": 1.745625068181054, - "learning_rate": 3.812048197605643e-06, - "loss": 0.9858, - "step": 1372 - }, - { - "epoch": 0.16509348884747188, - "grad_norm": 1.7738525980336646, - "learning_rate": 3.8117183790379277e-06, - "loss": 1.0078, - "step": 1373 - }, - { - "epoch": 0.165213731738111, - "grad_norm": 2.500313739163645, - "learning_rate": 3.811388285633976e-06, - "loss": 1.1495, - "step": 1374 - }, - { - "epoch": 0.16533397462875007, - "grad_norm": 1.7196425461270128, - "learning_rate": 3.811057917443861e-06, - "loss": 0.8181, - "step": 1375 - }, - { - "epoch": 0.16545421751938916, - "grad_norm": 1.0032180029992226, - "learning_rate": 3.8107272745177e-06, - "loss": 0.9013, - "step": 1376 - }, - { - "epoch": 0.16557446041002827, - "grad_norm": 1.6040105950203003, - "learning_rate": 3.8103963569056513e-06, - "loss": 0.9971, - "step": 1377 - }, - { - "epoch": 0.16569470330066735, - "grad_norm": 1.465489441025698, - "learning_rate": 3.8100651646579146e-06, - "loss": 1.0849, - "step": 1378 - }, - { - "epoch": 0.16581494619130643, - "grad_norm": 2.0130322954872235, - "learning_rate": 3.8097336978247317e-06, - "loss": 1.1299, - "step": 1379 - }, - { - "epoch": 0.16593518908194552, - "grad_norm": 2.112175784540625, - "learning_rate": 3.8094019564563854e-06, - "loss": 1.1001, - "step": 1380 - }, - { - "epoch": 0.16605543197258463, - "grad_norm": 2.0200963069128366, - "learning_rate": 3.809069940603201e-06, - "loss": 0.9643, - "step": 1381 - }, - { - "epoch": 0.1661756748632237, - "grad_norm": 1.8899714703904318, - "learning_rate": 3.8087376503155452e-06, - "loss": 0.9761, - "step": 1382 - }, - { - "epoch": 0.1662959177538628, - "grad_norm": 1.2506876007990486, - "learning_rate": 3.808405085643826e-06, - "loss": 0.8041, - "step": 1383 - }, - { - "epoch": 0.1664161606445019, - "grad_norm": 1.9473131302191806, - "learning_rate": 3.8080722466384925e-06, - "loss": 1.1017, - "step": 1384 - }, - { - "epoch": 0.166536403535141, - "grad_norm": 2.0781718083896594, - "learning_rate": 3.8077391333500376e-06, - "loss": 0.9025, - "step": 1385 - }, - { - "epoch": 0.16665664642578007, - "grad_norm": 2.0150616472152003, - "learning_rate": 3.8074057458289934e-06, - "loss": 0.9641, - "step": 1386 - }, - { - "epoch": 0.16677688931641918, - "grad_norm": 2.0801797904659205, - "learning_rate": 3.807072084125934e-06, - "loss": 1.0235, - "step": 1387 - }, - { - "epoch": 0.16689713220705826, - "grad_norm": 2.254233721607158, - "learning_rate": 3.806738148291477e-06, - "loss": 1.0097, - "step": 1388 - }, - { - "epoch": 0.16701737509769735, - "grad_norm": 1.7832745875133267, - "learning_rate": 3.8064039383762793e-06, - "loss": 0.9189, - "step": 1389 - }, - { - "epoch": 0.16713761798833643, - "grad_norm": 2.00788707075164, - "learning_rate": 3.8060694544310396e-06, - "loss": 0.9747, - "step": 1390 - }, - { - "epoch": 0.16725786087897554, - "grad_norm": 1.585338986967148, - "learning_rate": 3.8057346965065006e-06, - "loss": 0.9923, - "step": 1391 - }, - { - "epoch": 0.16737810376961462, - "grad_norm": 1.444476158861716, - "learning_rate": 3.805399664653443e-06, - "loss": 1.0536, - "step": 1392 - }, - { - "epoch": 0.1674983466602537, - "grad_norm": 2.1805023529732823, - "learning_rate": 3.805064358922692e-06, - "loss": 0.9616, - "step": 1393 - }, - { - "epoch": 0.16761858955089282, - "grad_norm": 1.8206012807833811, - "learning_rate": 3.8047287793651136e-06, - "loss": 1.009, - "step": 1394 - }, - { - "epoch": 0.1677388324415319, - "grad_norm": 1.7515117382687484, - "learning_rate": 3.8043929260316137e-06, - "loss": 1.085, - "step": 1395 - }, - { - "epoch": 0.16785907533217098, - "grad_norm": 1.7842949389987486, - "learning_rate": 3.8040567989731417e-06, - "loss": 1.0383, - "step": 1396 - }, - { - "epoch": 0.16797931822281006, - "grad_norm": 1.7296653243024322, - "learning_rate": 3.8037203982406876e-06, - "loss": 1.0029, - "step": 1397 - }, - { - "epoch": 0.16809956111344918, - "grad_norm": 1.9760523201737876, - "learning_rate": 3.8033837238852835e-06, - "loss": 0.9388, - "step": 1398 - }, - { - "epoch": 0.16821980400408826, - "grad_norm": 1.5305433394368986, - "learning_rate": 3.8030467759580017e-06, - "loss": 0.8994, - "step": 1399 - }, - { - "epoch": 0.16834004689472734, - "grad_norm": 1.699521232954931, - "learning_rate": 3.802709554509958e-06, - "loss": 1.077, - "step": 1400 - }, - { - "epoch": 0.16846028978536645, - "grad_norm": 1.8154822840056049, - "learning_rate": 3.8023720595923083e-06, - "loss": 0.9873, - "step": 1401 - }, - { - "epoch": 0.16858053267600553, - "grad_norm": 2.312806598855273, - "learning_rate": 3.80203429125625e-06, - "loss": 1.0787, - "step": 1402 - }, - { - "epoch": 0.16870077556664462, - "grad_norm": 1.9225932074127317, - "learning_rate": 3.8016962495530225e-06, - "loss": 0.909, - "step": 1403 - }, - { - "epoch": 0.1688210184572837, - "grad_norm": 2.040353066988645, - "learning_rate": 3.8013579345339063e-06, - "loss": 0.9702, - "step": 1404 - }, - { - "epoch": 0.1689412613479228, - "grad_norm": 1.8894957645850297, - "learning_rate": 3.801019346250224e-06, - "loss": 0.9001, - "step": 1405 - }, - { - "epoch": 0.1690615042385619, - "grad_norm": 2.0719542705394205, - "learning_rate": 3.8006804847533395e-06, - "loss": 1.0437, - "step": 1406 - }, - { - "epoch": 0.16918174712920098, - "grad_norm": 1.7131036432403968, - "learning_rate": 3.8003413500946556e-06, - "loss": 1.0488, - "step": 1407 - }, - { - "epoch": 0.1693019900198401, - "grad_norm": 2.3308390728203934, - "learning_rate": 3.8000019423256216e-06, - "loss": 1.0355, - "step": 1408 - }, - { - "epoch": 0.16942223291047917, - "grad_norm": 1.4899090052987916, - "learning_rate": 3.7996622614977234e-06, - "loss": 1.0817, - "step": 1409 - }, - { - "epoch": 0.16954247580111825, - "grad_norm": 1.5708736347050796, - "learning_rate": 3.799322307662492e-06, - "loss": 0.9965, - "step": 1410 - }, - { - "epoch": 0.16966271869175734, - "grad_norm": 1.987318604066752, - "learning_rate": 3.798982080871496e-06, - "loss": 1.0451, - "step": 1411 - }, - { - "epoch": 0.16978296158239645, - "grad_norm": 1.825410441399833, - "learning_rate": 3.798641581176349e-06, - "loss": 0.8847, - "step": 1412 - }, - { - "epoch": 0.16990320447303553, - "grad_norm": 1.7057813175372971, - "learning_rate": 3.7983008086287044e-06, - "loss": 0.9479, - "step": 1413 - }, - { - "epoch": 0.1700234473636746, - "grad_norm": 1.9112131514156245, - "learning_rate": 3.797959763280257e-06, - "loss": 0.9992, - "step": 1414 - }, - { - "epoch": 0.17014369025431372, - "grad_norm": 2.923655411200154, - "learning_rate": 3.797618445182743e-06, - "loss": 0.9993, - "step": 1415 - }, - { - "epoch": 0.1702639331449528, - "grad_norm": 1.9174739409146906, - "learning_rate": 3.79727685438794e-06, - "loss": 1.048, - "step": 1416 - }, - { - "epoch": 0.1703841760355919, - "grad_norm": 1.1121273016548328, - "learning_rate": 3.796934990947667e-06, - "loss": 0.8327, - "step": 1417 - }, - { - "epoch": 0.170504418926231, - "grad_norm": 1.0083885025400943, - "learning_rate": 3.7965928549137854e-06, - "loss": 0.8528, - "step": 1418 - }, - { - "epoch": 0.17062466181687008, - "grad_norm": 1.876070479941846, - "learning_rate": 3.7962504463381953e-06, - "loss": 0.9674, - "step": 1419 - }, - { - "epoch": 0.17074490470750917, - "grad_norm": 1.555094220045327, - "learning_rate": 3.7959077652728412e-06, - "loss": 0.9874, - "step": 1420 - }, - { - "epoch": 0.17086514759814825, - "grad_norm": 1.9293120478427148, - "learning_rate": 3.795564811769707e-06, - "loss": 0.9714, - "step": 1421 - }, - { - "epoch": 0.17098539048878736, - "grad_norm": 1.9761756246953084, - "learning_rate": 3.795221585880818e-06, - "loss": 0.9872, - "step": 1422 - }, - { - "epoch": 0.17110563337942644, - "grad_norm": 1.7066979683051977, - "learning_rate": 3.794878087658242e-06, - "loss": 1.1121, - "step": 1423 - }, - { - "epoch": 0.17122587627006552, - "grad_norm": 1.608531101038781, - "learning_rate": 3.7945343171540873e-06, - "loss": 0.9842, - "step": 1424 - }, - { - "epoch": 0.17134611916070464, - "grad_norm": 1.8655737762831168, - "learning_rate": 3.7941902744205033e-06, - "loss": 0.9911, - "step": 1425 - }, - { - "epoch": 0.17146636205134372, - "grad_norm": 1.9047668190166114, - "learning_rate": 3.7938459595096817e-06, - "loss": 1.039, - "step": 1426 - }, - { - "epoch": 0.1715866049419828, - "grad_norm": 1.5990469925579327, - "learning_rate": 3.7935013724738545e-06, - "loss": 1.0593, - "step": 1427 - }, - { - "epoch": 0.17170684783262188, - "grad_norm": 1.6072183621547698, - "learning_rate": 3.7931565133652945e-06, - "loss": 0.9802, - "step": 1428 - }, - { - "epoch": 0.171827090723261, - "grad_norm": 2.1464560509462567, - "learning_rate": 3.792811382236317e-06, - "loss": 0.8789, - "step": 1429 - }, - { - "epoch": 0.17194733361390008, - "grad_norm": 1.6330120932043075, - "learning_rate": 3.792465979139279e-06, - "loss": 0.9814, - "step": 1430 - }, - { - "epoch": 0.17206757650453916, - "grad_norm": 1.084519372832984, - "learning_rate": 3.792120304126576e-06, - "loss": 0.9334, - "step": 1431 - }, - { - "epoch": 0.17218781939517827, - "grad_norm": 1.581820777738468, - "learning_rate": 3.791774357250649e-06, - "loss": 1.0422, - "step": 1432 - }, - { - "epoch": 0.17230806228581735, - "grad_norm": 2.311524881508315, - "learning_rate": 3.7914281385639757e-06, - "loss": 0.9893, - "step": 1433 - }, - { - "epoch": 0.17242830517645644, - "grad_norm": 2.1372848402452784, - "learning_rate": 3.7910816481190784e-06, - "loss": 0.992, - "step": 1434 - }, - { - "epoch": 0.17254854806709552, - "grad_norm": 1.720419402443295, - "learning_rate": 3.7907348859685193e-06, - "loss": 0.9604, - "step": 1435 - }, - { - "epoch": 0.17266879095773463, - "grad_norm": 1.8001170226310332, - "learning_rate": 3.790387852164902e-06, - "loss": 1.0053, - "step": 1436 - }, - { - "epoch": 0.1727890338483737, - "grad_norm": 1.9876425566020546, - "learning_rate": 3.7900405467608707e-06, - "loss": 0.9728, - "step": 1437 - }, - { - "epoch": 0.1729092767390128, - "grad_norm": 2.8752190879604242, - "learning_rate": 3.7896929698091114e-06, - "loss": 0.9908, - "step": 1438 - }, - { - "epoch": 0.1730295196296519, - "grad_norm": 2.663842066261736, - "learning_rate": 3.7893451213623518e-06, - "loss": 0.8952, - "step": 1439 - }, - { - "epoch": 0.173149762520291, - "grad_norm": 1.711279569249174, - "learning_rate": 3.7889970014733606e-06, - "loss": 1.0198, - "step": 1440 - }, - { - "epoch": 0.17327000541093007, - "grad_norm": 1.6135210870358423, - "learning_rate": 3.7886486101949463e-06, - "loss": 0.9754, - "step": 1441 - }, - { - "epoch": 0.17339024830156918, - "grad_norm": 1.8612918109693775, - "learning_rate": 3.7882999475799594e-06, - "loss": 1.0705, - "step": 1442 - }, - { - "epoch": 0.17351049119220827, - "grad_norm": 1.6813080533013054, - "learning_rate": 3.787951013681293e-06, - "loss": 1.0129, - "step": 1443 - }, - { - "epoch": 0.17363073408284735, - "grad_norm": 1.6720744931997875, - "learning_rate": 3.787601808551879e-06, - "loss": 0.9837, - "step": 1444 - }, - { - "epoch": 0.17375097697348643, - "grad_norm": 2.0939044041423007, - "learning_rate": 3.7872523322446926e-06, - "loss": 1.0375, - "step": 1445 - }, - { - "epoch": 0.17387121986412554, - "grad_norm": 2.603536778884194, - "learning_rate": 3.7869025848127478e-06, - "loss": 0.8057, - "step": 1446 - }, - { - "epoch": 0.17399146275476463, - "grad_norm": 2.4416489137214623, - "learning_rate": 3.786552566309102e-06, - "loss": 1.0108, - "step": 1447 - }, - { - "epoch": 0.1741117056454037, - "grad_norm": 4.676569082737912, - "learning_rate": 3.7862022767868517e-06, - "loss": 1.0682, - "step": 1448 - }, - { - "epoch": 0.17423194853604282, - "grad_norm": 1.9682836434371798, - "learning_rate": 3.7858517162991367e-06, - "loss": 1.0462, - "step": 1449 - }, - { - "epoch": 0.1743521914266819, - "grad_norm": 2.2965687657913274, - "learning_rate": 3.7855008848991363e-06, - "loss": 0.817, - "step": 1450 - }, - { - "epoch": 0.17447243431732098, - "grad_norm": 1.8830326948766358, - "learning_rate": 3.7851497826400714e-06, - "loss": 0.98, - "step": 1451 - }, - { - "epoch": 0.17459267720796007, - "grad_norm": 1.6348970968778924, - "learning_rate": 3.7847984095752034e-06, - "loss": 0.9549, - "step": 1452 - }, - { - "epoch": 0.17471292009859918, - "grad_norm": 2.019286268121311, - "learning_rate": 3.784446765757836e-06, - "loss": 1.0121, - "step": 1453 - }, - { - "epoch": 0.17483316298923826, - "grad_norm": 2.1275321235428017, - "learning_rate": 3.7840948512413133e-06, - "loss": 0.9827, - "step": 1454 - }, - { - "epoch": 0.17495340587987734, - "grad_norm": 1.668003626867509, - "learning_rate": 3.7837426660790196e-06, - "loss": 0.985, - "step": 1455 - }, - { - "epoch": 0.17507364877051645, - "grad_norm": 1.9877941081925314, - "learning_rate": 3.783390210324382e-06, - "loss": 1.0163, - "step": 1456 - }, - { - "epoch": 0.17519389166115554, - "grad_norm": 2.263999829632648, - "learning_rate": 3.7830374840308676e-06, - "loss": 0.9299, - "step": 1457 - }, - { - "epoch": 0.17531413455179462, - "grad_norm": 2.263764136790006, - "learning_rate": 3.7826844872519842e-06, - "loss": 1.0389, - "step": 1458 - }, - { - "epoch": 0.1754343774424337, - "grad_norm": 1.674463781243612, - "learning_rate": 3.782331220041282e-06, - "loss": 0.9265, - "step": 1459 - }, - { - "epoch": 0.17555462033307281, - "grad_norm": 1.9176843125391088, - "learning_rate": 3.7819776824523504e-06, - "loss": 1.0321, - "step": 1460 - }, - { - "epoch": 0.1756748632237119, - "grad_norm": 2.144807057806381, - "learning_rate": 3.7816238745388213e-06, - "loss": 1.0443, - "step": 1461 - }, - { - "epoch": 0.17579510611435098, - "grad_norm": 2.4039875640357065, - "learning_rate": 3.781269796354367e-06, - "loss": 1.0745, - "step": 1462 - }, - { - "epoch": 0.1759153490049901, - "grad_norm": 1.623165415487262, - "learning_rate": 3.7809154479527006e-06, - "loss": 1.0729, - "step": 1463 - }, - { - "epoch": 0.17603559189562917, - "grad_norm": 2.019800400645387, - "learning_rate": 3.780560829387577e-06, - "loss": 1.0362, - "step": 1464 - }, - { - "epoch": 0.17615583478626826, - "grad_norm": 0.9322189359514795, - "learning_rate": 3.7802059407127915e-06, - "loss": 0.8112, - "step": 1465 - }, - { - "epoch": 0.17627607767690734, - "grad_norm": 1.9665545377717135, - "learning_rate": 3.7798507819821797e-06, - "loss": 1.0637, - "step": 1466 - }, - { - "epoch": 0.17639632056754645, - "grad_norm": 2.17049512321383, - "learning_rate": 3.7794953532496197e-06, - "loss": 0.9993, - "step": 1467 - }, - { - "epoch": 0.17651656345818553, - "grad_norm": 1.0112182085960073, - "learning_rate": 3.7791396545690295e-06, - "loss": 0.8349, - "step": 1468 - }, - { - "epoch": 0.17663680634882462, - "grad_norm": 2.092473794096235, - "learning_rate": 3.7787836859943685e-06, - "loss": 1.0077, - "step": 1469 - }, - { - "epoch": 0.17675704923946373, - "grad_norm": 2.379553695794279, - "learning_rate": 3.7784274475796363e-06, - "loss": 0.9963, - "step": 1470 - }, - { - "epoch": 0.1768772921301028, - "grad_norm": 2.1627291210711412, - "learning_rate": 3.7780709393788745e-06, - "loss": 0.9678, - "step": 1471 - }, - { - "epoch": 0.1769975350207419, - "grad_norm": 1.823194261032352, - "learning_rate": 3.777714161446165e-06, - "loss": 0.9593, - "step": 1472 - }, - { - "epoch": 0.177117777911381, - "grad_norm": 2.722613866577391, - "learning_rate": 3.7773571138356304e-06, - "loss": 0.8911, - "step": 1473 - }, - { - "epoch": 0.17723802080202009, - "grad_norm": 2.1544836798541342, - "learning_rate": 3.776999796601435e-06, - "loss": 1.0972, - "step": 1474 - }, - { - "epoch": 0.17735826369265917, - "grad_norm": 2.1885776590047312, - "learning_rate": 3.776642209797783e-06, - "loss": 0.9135, - "step": 1475 - }, - { - "epoch": 0.17747850658329825, - "grad_norm": 1.92136413483392, - "learning_rate": 3.7762843534789205e-06, - "loss": 0.9748, - "step": 1476 - }, - { - "epoch": 0.17759874947393736, - "grad_norm": 2.506379190287817, - "learning_rate": 3.7759262276991343e-06, - "loss": 1.0825, - "step": 1477 - }, - { - "epoch": 0.17771899236457644, - "grad_norm": 2.318486172455306, - "learning_rate": 3.7755678325127506e-06, - "loss": 1.007, - "step": 1478 - }, - { - "epoch": 0.17783923525521553, - "grad_norm": 1.7685912879935475, - "learning_rate": 3.7752091679741393e-06, - "loss": 0.9569, - "step": 1479 - }, - { - "epoch": 0.17795947814585464, - "grad_norm": 2.3603627745583413, - "learning_rate": 3.774850234137708e-06, - "loss": 0.9677, - "step": 1480 - }, - { - "epoch": 0.17807972103649372, - "grad_norm": 4.231198629182942, - "learning_rate": 3.7744910310579076e-06, - "loss": 1.0248, - "step": 1481 - }, - { - "epoch": 0.1781999639271328, - "grad_norm": 1.7752858751070184, - "learning_rate": 3.774131558789229e-06, - "loss": 1.0507, - "step": 1482 - }, - { - "epoch": 0.1783202068177719, - "grad_norm": 2.436646727842621, - "learning_rate": 3.773771817386203e-06, - "loss": 0.8958, - "step": 1483 - }, - { - "epoch": 0.178440449708411, - "grad_norm": 1.6189477159093633, - "learning_rate": 3.773411806903403e-06, - "loss": 1.002, - "step": 1484 - }, - { - "epoch": 0.17856069259905008, - "grad_norm": 1.7629825770079226, - "learning_rate": 3.7730515273954415e-06, - "loss": 1.1421, - "step": 1485 - }, - { - "epoch": 0.17868093548968916, - "grad_norm": 1.764192185479382, - "learning_rate": 3.772690978916973e-06, - "loss": 1.0376, - "step": 1486 - }, - { - "epoch": 0.17880117838032827, - "grad_norm": 2.1486723060608863, - "learning_rate": 3.772330161522693e-06, - "loss": 1.0725, - "step": 1487 - }, - { - "epoch": 0.17892142127096736, - "grad_norm": 1.8399684726045806, - "learning_rate": 3.7719690752673365e-06, - "loss": 1.0046, - "step": 1488 - }, - { - "epoch": 0.17904166416160644, - "grad_norm": 1.703537777216737, - "learning_rate": 3.7716077202056796e-06, - "loss": 0.9868, - "step": 1489 - }, - { - "epoch": 0.17916190705224552, - "grad_norm": 2.083800746617878, - "learning_rate": 3.7712460963925404e-06, - "loss": 1.1289, - "step": 1490 - }, - { - "epoch": 0.17928214994288463, - "grad_norm": 1.7531540920951252, - "learning_rate": 3.7708842038827775e-06, - "loss": 0.9513, - "step": 1491 - }, - { - "epoch": 0.17940239283352372, - "grad_norm": 1.5605240343926505, - "learning_rate": 3.770522042731288e-06, - "loss": 1.0536, - "step": 1492 - }, - { - "epoch": 0.1795226357241628, - "grad_norm": 2.6993550377982274, - "learning_rate": 3.7701596129930122e-06, - "loss": 1.0867, - "step": 1493 - }, - { - "epoch": 0.1796428786148019, - "grad_norm": 2.3256344370559523, - "learning_rate": 3.7697969147229315e-06, - "loss": 0.9319, - "step": 1494 - }, - { - "epoch": 0.179763121505441, - "grad_norm": 1.8371134137241647, - "learning_rate": 3.7694339479760647e-06, - "loss": 1.0519, - "step": 1495 - }, - { - "epoch": 0.17988336439608008, - "grad_norm": 0.9289894047534313, - "learning_rate": 3.769070712807476e-06, - "loss": 0.8041, - "step": 1496 - }, - { - "epoch": 0.18000360728671919, - "grad_norm": 1.6928679249186163, - "learning_rate": 3.768707209272266e-06, - "loss": 0.9909, - "step": 1497 - }, - { - "epoch": 0.18012385017735827, - "grad_norm": 2.194005176451148, - "learning_rate": 3.768343437425579e-06, - "loss": 0.9616, - "step": 1498 - }, - { - "epoch": 0.18024409306799735, - "grad_norm": 2.2168423685791656, - "learning_rate": 3.7679793973225987e-06, - "loss": 1.0684, - "step": 1499 - }, - { - "epoch": 0.18036433595863643, - "grad_norm": 1.0154704049632974, - "learning_rate": 3.767615089018549e-06, - "loss": 0.8366, - "step": 1500 - }, - { - "epoch": 0.18048457884927555, - "grad_norm": 1.7871840164356063, - "learning_rate": 3.7672505125686966e-06, - "loss": 1.056, - "step": 1501 - }, - { - "epoch": 0.18060482173991463, - "grad_norm": 2.6915281403821045, - "learning_rate": 3.7668856680283455e-06, - "loss": 1.0604, - "step": 1502 - }, - { - "epoch": 0.1807250646305537, - "grad_norm": 1.7063774061319474, - "learning_rate": 3.7665205554528437e-06, - "loss": 1.028, - "step": 1503 - }, - { - "epoch": 0.18084530752119282, - "grad_norm": 1.7647836081366954, - "learning_rate": 3.7661551748975782e-06, - "loss": 0.9559, - "step": 1504 - }, - { - "epoch": 0.1809655504118319, - "grad_norm": 1.2360293734001466, - "learning_rate": 3.7657895264179772e-06, - "loss": 0.8138, - "step": 1505 - }, - { - "epoch": 0.181085793302471, - "grad_norm": 1.8295609141277664, - "learning_rate": 3.765423610069509e-06, - "loss": 0.9527, - "step": 1506 - }, - { - "epoch": 0.18120603619311007, - "grad_norm": 1.7847413096250404, - "learning_rate": 3.765057425907683e-06, - "loss": 0.9299, - "step": 1507 - }, - { - "epoch": 0.18132627908374918, - "grad_norm": 1.797576037776295, - "learning_rate": 3.764690973988048e-06, - "loss": 0.9846, - "step": 1508 - }, - { - "epoch": 0.18144652197438826, - "grad_norm": 2.2313882565873073, - "learning_rate": 3.7643242543661967e-06, - "loss": 0.9515, - "step": 1509 - }, - { - "epoch": 0.18156676486502735, - "grad_norm": 1.255706540503408, - "learning_rate": 3.7639572670977573e-06, - "loss": 0.84, - "step": 1510 - }, - { - "epoch": 0.18168700775566646, - "grad_norm": 1.5501483230995667, - "learning_rate": 3.7635900122384042e-06, - "loss": 0.9706, - "step": 1511 - }, - { - "epoch": 0.18180725064630554, - "grad_norm": 1.9763961541060253, - "learning_rate": 3.7632224898438477e-06, - "loss": 1.0722, - "step": 1512 - }, - { - "epoch": 0.18192749353694462, - "grad_norm": 1.5165481115822381, - "learning_rate": 3.762854699969842e-06, - "loss": 0.9935, - "step": 1513 - }, - { - "epoch": 0.1820477364275837, - "grad_norm": 1.9132972900416094, - "learning_rate": 3.762486642672179e-06, - "loss": 0.9296, - "step": 1514 - }, - { - "epoch": 0.18216797931822282, - "grad_norm": 1.8339479745455822, - "learning_rate": 3.7621183180066946e-06, - "loss": 1.0723, - "step": 1515 - }, - { - "epoch": 0.1822882222088619, - "grad_norm": 1.7383974872258472, - "learning_rate": 3.7617497260292625e-06, - "loss": 0.9421, - "step": 1516 - }, - { - "epoch": 0.18240846509950098, - "grad_norm": 2.3177578924359676, - "learning_rate": 3.7613808667957967e-06, - "loss": 0.9982, - "step": 1517 - }, - { - "epoch": 0.1825287079901401, - "grad_norm": 2.012311237083513, - "learning_rate": 3.7610117403622547e-06, - "loss": 1.1164, - "step": 1518 - }, - { - "epoch": 0.18264895088077918, - "grad_norm": 1.7731734427933268, - "learning_rate": 3.7606423467846313e-06, - "loss": 1.0995, - "step": 1519 - }, - { - "epoch": 0.18276919377141826, - "grad_norm": 1.4885662136451996, - "learning_rate": 3.760272686118964e-06, - "loss": 0.9996, - "step": 1520 - }, - { - "epoch": 0.18288943666205737, - "grad_norm": 2.165607878381993, - "learning_rate": 3.7599027584213297e-06, - "loss": 1.1239, - "step": 1521 - }, - { - "epoch": 0.18300967955269645, - "grad_norm": 1.9367291547550405, - "learning_rate": 3.7595325637478465e-06, - "loss": 0.9837, - "step": 1522 - }, - { - "epoch": 0.18312992244333554, - "grad_norm": 1.6815621493966035, - "learning_rate": 3.7591621021546723e-06, - "loss": 1.0213, - "step": 1523 - }, - { - "epoch": 0.18325016533397462, - "grad_norm": 1.8569546501978005, - "learning_rate": 3.7587913736980062e-06, - "loss": 1.0204, - "step": 1524 - }, - { - "epoch": 0.18337040822461373, - "grad_norm": 1.6996770131200907, - "learning_rate": 3.7584203784340865e-06, - "loss": 1.0441, - "step": 1525 - }, - { - "epoch": 0.1834906511152528, - "grad_norm": 1.79785673440151, - "learning_rate": 3.7580491164191938e-06, - "loss": 1.0567, - "step": 1526 - }, - { - "epoch": 0.1836108940058919, - "grad_norm": 0.9986747065917819, - "learning_rate": 3.757677587709648e-06, - "loss": 0.8296, - "step": 1527 - }, - { - "epoch": 0.183731136896531, - "grad_norm": 1.9020647474914996, - "learning_rate": 3.7573057923618095e-06, - "loss": 0.9636, - "step": 1528 - }, - { - "epoch": 0.1838513797871701, - "grad_norm": 1.6114852551227477, - "learning_rate": 3.7569337304320793e-06, - "loss": 0.9391, - "step": 1529 - }, - { - "epoch": 0.18397162267780917, - "grad_norm": 1.1983333139503558, - "learning_rate": 3.756561401976899e-06, - "loss": 0.8569, - "step": 1530 - }, - { - "epoch": 0.18409186556844825, - "grad_norm": 1.7426380207890662, - "learning_rate": 3.7561888070527514e-06, - "loss": 1.0278, - "step": 1531 - }, - { - "epoch": 0.18421210845908736, - "grad_norm": 1.8686374796287903, - "learning_rate": 3.7558159457161577e-06, - "loss": 0.9959, - "step": 1532 - }, - { - "epoch": 0.18433235134972645, - "grad_norm": 2.7165724704848646, - "learning_rate": 3.755442818023681e-06, - "loss": 0.9827, - "step": 1533 - }, - { - "epoch": 0.18445259424036553, - "grad_norm": 1.754991904780745, - "learning_rate": 3.7550694240319246e-06, - "loss": 0.9663, - "step": 1534 - }, - { - "epoch": 0.18457283713100464, - "grad_norm": 1.9104280804952143, - "learning_rate": 3.7546957637975326e-06, - "loss": 0.9778, - "step": 1535 - }, - { - "epoch": 0.18469308002164372, - "grad_norm": 1.5202476558206153, - "learning_rate": 3.7543218373771873e-06, - "loss": 0.9427, - "step": 1536 - }, - { - "epoch": 0.1848133229122828, - "grad_norm": 1.3590046169772398, - "learning_rate": 3.753947644827615e-06, - "loss": 0.9853, - "step": 1537 - }, - { - "epoch": 0.1849335658029219, - "grad_norm": 0.9345257946799763, - "learning_rate": 3.753573186205579e-06, - "loss": 0.7973, - "step": 1538 - }, - { - "epoch": 0.185053808693561, - "grad_norm": 2.343819680767565, - "learning_rate": 3.753198461567885e-06, - "loss": 0.9824, - "step": 1539 - }, - { - "epoch": 0.18517405158420008, - "grad_norm": 1.8216594902285783, - "learning_rate": 3.7528234709713783e-06, - "loss": 1.122, - "step": 1540 - }, - { - "epoch": 0.18529429447483917, - "grad_norm": 1.8354134318929074, - "learning_rate": 3.7524482144729447e-06, - "loss": 1.0479, - "step": 1541 - }, - { - "epoch": 0.18541453736547828, - "grad_norm": 2.0141564275533668, - "learning_rate": 3.7520726921295106e-06, - "loss": 1.0417, - "step": 1542 - }, - { - "epoch": 0.18553478025611736, - "grad_norm": 1.7054807836411234, - "learning_rate": 3.751696903998042e-06, - "loss": 0.9272, - "step": 1543 - }, - { - "epoch": 0.18565502314675644, - "grad_norm": 1.571443368792532, - "learning_rate": 3.7513208501355456e-06, - "loss": 0.9013, - "step": 1544 - }, - { - "epoch": 0.18577526603739553, - "grad_norm": 1.6128532821090658, - "learning_rate": 3.750944530599069e-06, - "loss": 1.0447, - "step": 1545 - }, - { - "epoch": 0.18589550892803464, - "grad_norm": 1.844296002766121, - "learning_rate": 3.7505679454456992e-06, - "loss": 1.0145, - "step": 1546 - }, - { - "epoch": 0.18601575181867372, - "grad_norm": 2.1623314499630975, - "learning_rate": 3.750191094732564e-06, - "loss": 0.9071, - "step": 1547 - }, - { - "epoch": 0.1861359947093128, - "grad_norm": 1.9416231153239933, - "learning_rate": 3.7498139785168313e-06, - "loss": 0.956, - "step": 1548 - }, - { - "epoch": 0.1862562375999519, - "grad_norm": 2.134814318549184, - "learning_rate": 3.749436596855709e-06, - "loss": 0.9859, - "step": 1549 - }, - { - "epoch": 0.186376480490591, - "grad_norm": 1.6580077147172048, - "learning_rate": 3.749058949806446e-06, - "loss": 1.1112, - "step": 1550 - }, - { - "epoch": 0.18649672338123008, - "grad_norm": 1.562398907252143, - "learning_rate": 3.748681037426331e-06, - "loss": 1.0443, - "step": 1551 - }, - { - "epoch": 0.1866169662718692, - "grad_norm": 1.9491889660839814, - "learning_rate": 3.7483028597726936e-06, - "loss": 1.1153, - "step": 1552 - }, - { - "epoch": 0.18673720916250827, - "grad_norm": 1.773245914546488, - "learning_rate": 3.7479244169029017e-06, - "loss": 0.8254, - "step": 1553 - }, - { - "epoch": 0.18685745205314735, - "grad_norm": 2.240779634085037, - "learning_rate": 3.7475457088743658e-06, - "loss": 0.9383, - "step": 1554 - }, - { - "epoch": 0.18697769494378644, - "grad_norm": 1.960632482872529, - "learning_rate": 3.7471667357445348e-06, - "loss": 0.9491, - "step": 1555 - }, - { - "epoch": 0.18709793783442555, - "grad_norm": 2.2109690957120147, - "learning_rate": 3.7467874975709e-06, - "loss": 0.9304, - "step": 1556 - }, - { - "epoch": 0.18721818072506463, - "grad_norm": 2.2887917836998803, - "learning_rate": 3.7464079944109904e-06, - "loss": 0.9856, - "step": 1557 - }, - { - "epoch": 0.18733842361570371, - "grad_norm": 1.9638358653798804, - "learning_rate": 3.746028226322376e-06, - "loss": 0.9786, - "step": 1558 - }, - { - "epoch": 0.18745866650634282, - "grad_norm": 1.717150550840464, - "learning_rate": 3.745648193362669e-06, - "loss": 0.9609, - "step": 1559 - }, - { - "epoch": 0.1875789093969819, - "grad_norm": 1.9287406049933458, - "learning_rate": 3.745267895589518e-06, - "loss": 0.9332, - "step": 1560 - }, - { - "epoch": 0.187699152287621, - "grad_norm": 1.6942604421896381, - "learning_rate": 3.7448873330606154e-06, - "loss": 1.0159, - "step": 1561 - }, - { - "epoch": 0.18781939517826007, - "grad_norm": 1.9998498502635749, - "learning_rate": 3.7445065058336914e-06, - "loss": 1.0789, - "step": 1562 - }, - { - "epoch": 0.18793963806889918, - "grad_norm": 1.8613983173084452, - "learning_rate": 3.7441254139665176e-06, - "loss": 1.0641, - "step": 1563 - }, - { - "epoch": 0.18805988095953827, - "grad_norm": 1.676394192654657, - "learning_rate": 3.743744057516905e-06, - "loss": 1.0253, - "step": 1564 - }, - { - "epoch": 0.18818012385017735, - "grad_norm": 2.586407944220045, - "learning_rate": 3.743362436542706e-06, - "loss": 1.0899, - "step": 1565 - }, - { - "epoch": 0.18830036674081646, - "grad_norm": 2.0570537445658115, - "learning_rate": 3.7429805511018115e-06, - "loss": 0.9719, - "step": 1566 - }, - { - "epoch": 0.18842060963145554, - "grad_norm": 2.607337201633118, - "learning_rate": 3.7425984012521524e-06, - "loss": 0.9775, - "step": 1567 - }, - { - "epoch": 0.18854085252209463, - "grad_norm": 0.9113382024162872, - "learning_rate": 3.7422159870517025e-06, - "loss": 0.8235, - "step": 1568 - }, - { - "epoch": 0.1886610954127337, - "grad_norm": 1.4950569085864622, - "learning_rate": 3.7418333085584717e-06, - "loss": 0.9891, - "step": 1569 - }, - { - "epoch": 0.18878133830337282, - "grad_norm": 4.832122037091743, - "learning_rate": 3.7414503658305128e-06, - "loss": 1.1105, - "step": 1570 - }, - { - "epoch": 0.1889015811940119, - "grad_norm": 2.0901308807204537, - "learning_rate": 3.7410671589259185e-06, - "loss": 0.9808, - "step": 1571 - }, - { - "epoch": 0.18902182408465099, - "grad_norm": 1.6664242886095872, - "learning_rate": 3.7406836879028205e-06, - "loss": 0.991, - "step": 1572 - }, - { - "epoch": 0.1891420669752901, - "grad_norm": 1.9283195310915817, - "learning_rate": 3.7402999528193907e-06, - "loss": 0.9722, - "step": 1573 - }, - { - "epoch": 0.18926230986592918, - "grad_norm": 2.2341233758693715, - "learning_rate": 3.739915953733842e-06, - "loss": 1.0509, - "step": 1574 - }, - { - "epoch": 0.18938255275656826, - "grad_norm": 1.4955381950569318, - "learning_rate": 3.7395316907044264e-06, - "loss": 1.0126, - "step": 1575 - }, - { - "epoch": 0.18950279564720737, - "grad_norm": 1.4535033851114332, - "learning_rate": 3.7391471637894364e-06, - "loss": 0.9939, - "step": 1576 - }, - { - "epoch": 0.18962303853784646, - "grad_norm": 1.7645346051198332, - "learning_rate": 3.738762373047205e-06, - "loss": 1.0609, - "step": 1577 - }, - { - "epoch": 0.18974328142848554, - "grad_norm": 1.8467758694297365, - "learning_rate": 3.738377318536103e-06, - "loss": 1.0353, - "step": 1578 - }, - { - "epoch": 0.18986352431912462, - "grad_norm": 2.074796377095519, - "learning_rate": 3.7379920003145447e-06, - "loss": 0.925, - "step": 1579 - }, - { - "epoch": 0.18998376720976373, - "grad_norm": 1.8038565964636626, - "learning_rate": 3.7376064184409817e-06, - "loss": 1.0483, - "step": 1580 - }, - { - "epoch": 0.19010401010040281, - "grad_norm": 1.3290063962703544, - "learning_rate": 3.7372205729739063e-06, - "loss": 1.0697, - "step": 1581 - }, - { - "epoch": 0.1902242529910419, - "grad_norm": 1.7969173923965025, - "learning_rate": 3.7368344639718514e-06, - "loss": 0.9181, - "step": 1582 - }, - { - "epoch": 0.190344495881681, - "grad_norm": 1.494832116974218, - "learning_rate": 3.7364480914933895e-06, - "loss": 1.0148, - "step": 1583 - }, - { - "epoch": 0.1904647387723201, - "grad_norm": 1.879055152116141, - "learning_rate": 3.7360614555971325e-06, - "loss": 1.0154, - "step": 1584 - }, - { - "epoch": 0.19058498166295917, - "grad_norm": 1.7399816532921648, - "learning_rate": 3.735674556341733e-06, - "loss": 1.0527, - "step": 1585 - }, - { - "epoch": 0.19070522455359826, - "grad_norm": 1.6998651787646462, - "learning_rate": 3.7352873937858835e-06, - "loss": 1.0268, - "step": 1586 - }, - { - "epoch": 0.19082546744423737, - "grad_norm": 2.560593710092025, - "learning_rate": 3.734899967988316e-06, - "loss": 0.9248, - "step": 1587 - }, - { - "epoch": 0.19094571033487645, - "grad_norm": 4.189301516050476, - "learning_rate": 3.7345122790078026e-06, - "loss": 1.0399, - "step": 1588 - }, - { - "epoch": 0.19106595322551553, - "grad_norm": 2.1281120848858976, - "learning_rate": 3.7341243269031556e-06, - "loss": 1.1339, - "step": 1589 - }, - { - "epoch": 0.19118619611615464, - "grad_norm": 1.394488806839997, - "learning_rate": 3.7337361117332275e-06, - "loss": 0.9819, - "step": 1590 - }, - { - "epoch": 0.19130643900679373, - "grad_norm": 1.7106768265811725, - "learning_rate": 3.7333476335569087e-06, - "loss": 0.9694, - "step": 1591 - }, - { - "epoch": 0.1914266818974328, - "grad_norm": 2.158763581428149, - "learning_rate": 3.7329588924331325e-06, - "loss": 0.8659, - "step": 1592 - }, - { - "epoch": 0.1915469247880719, - "grad_norm": 1.5726243646443194, - "learning_rate": 3.732569888420871e-06, - "loss": 1.0235, - "step": 1593 - }, - { - "epoch": 0.191667167678711, - "grad_norm": 1.9388206656961517, - "learning_rate": 3.732180621579134e-06, - "loss": 1.0407, - "step": 1594 - }, - { - "epoch": 0.1917874105693501, - "grad_norm": 1.970949851282097, - "learning_rate": 3.7317910919669745e-06, - "loss": 1.0177, - "step": 1595 - }, - { - "epoch": 0.19190765345998917, - "grad_norm": 3.1425758489834053, - "learning_rate": 3.7314012996434826e-06, - "loss": 0.9654, - "step": 1596 - }, - { - "epoch": 0.19202789635062828, - "grad_norm": 1.7554694263574089, - "learning_rate": 3.7310112446677907e-06, - "loss": 1.0066, - "step": 1597 - }, - { - "epoch": 0.19214813924126736, - "grad_norm": 1.6626109208125415, - "learning_rate": 3.7306209270990695e-06, - "loss": 0.8895, - "step": 1598 - }, - { - "epoch": 0.19226838213190645, - "grad_norm": 1.8035906476368644, - "learning_rate": 3.7302303469965292e-06, - "loss": 1.0689, - "step": 1599 - }, - { - "epoch": 0.19238862502254553, - "grad_norm": 1.7788664279345476, - "learning_rate": 3.7298395044194206e-06, - "loss": 0.9138, - "step": 1600 - }, - { - "epoch": 0.19250886791318464, - "grad_norm": 1.7855445222540764, - "learning_rate": 3.7294483994270356e-06, - "loss": 1.1362, - "step": 1601 - }, - { - "epoch": 0.19262911080382372, - "grad_norm": 1.8335618252406927, - "learning_rate": 3.7290570320787033e-06, - "loss": 0.9728, - "step": 1602 - }, - { - "epoch": 0.1927493536944628, - "grad_norm": 1.9478709849612614, - "learning_rate": 3.728665402433793e-06, - "loss": 0.9104, - "step": 1603 - }, - { - "epoch": 0.19286959658510192, - "grad_norm": 2.2285648377289617, - "learning_rate": 3.7282735105517164e-06, - "loss": 1.0623, - "step": 1604 - }, - { - "epoch": 0.192989839475741, - "grad_norm": 1.9850693937034023, - "learning_rate": 3.727881356491922e-06, - "loss": 0.8806, - "step": 1605 - }, - { - "epoch": 0.19311008236638008, - "grad_norm": 1.6725955016691811, - "learning_rate": 3.7274889403139002e-06, - "loss": 0.956, - "step": 1606 - }, - { - "epoch": 0.1932303252570192, - "grad_norm": 2.507268543609593, - "learning_rate": 3.727096262077179e-06, - "loss": 0.9903, - "step": 1607 - }, - { - "epoch": 0.19335056814765827, - "grad_norm": 1.906198594697829, - "learning_rate": 3.7267033218413285e-06, - "loss": 1.0602, - "step": 1608 - }, - { - "epoch": 0.19347081103829736, - "grad_norm": 1.8865856178968914, - "learning_rate": 3.726310119665957e-06, - "loss": 1.0133, - "step": 1609 - }, - { - "epoch": 0.19359105392893644, - "grad_norm": 1.7752027422673626, - "learning_rate": 3.725916655610713e-06, - "loss": 1.0571, - "step": 1610 - }, - { - "epoch": 0.19371129681957555, - "grad_norm": 2.5646184212840613, - "learning_rate": 3.725522929735284e-06, - "loss": 0.9614, - "step": 1611 - }, - { - "epoch": 0.19383153971021463, - "grad_norm": 2.4985595367953777, - "learning_rate": 3.725128942099399e-06, - "loss": 0.9424, - "step": 1612 - }, - { - "epoch": 0.19395178260085372, - "grad_norm": 2.407588833985212, - "learning_rate": 3.7247346927628245e-06, - "loss": 1.0101, - "step": 1613 - }, - { - "epoch": 0.19407202549149283, - "grad_norm": 1.635945066709608, - "learning_rate": 3.7243401817853694e-06, - "loss": 0.984, - "step": 1614 - }, - { - "epoch": 0.1941922683821319, - "grad_norm": 1.896112344632569, - "learning_rate": 3.723945409226879e-06, - "loss": 0.9242, - "step": 1615 - }, - { - "epoch": 0.194312511272771, - "grad_norm": 2.0263055337006217, - "learning_rate": 3.723550375147241e-06, - "loss": 1.0048, - "step": 1616 - }, - { - "epoch": 0.19443275416341008, - "grad_norm": 1.6109350846399206, - "learning_rate": 3.7231550796063816e-06, - "loss": 1.0009, - "step": 1617 - }, - { - "epoch": 0.1945529970540492, - "grad_norm": 1.6250264825863778, - "learning_rate": 3.722759522664266e-06, - "loss": 0.8465, - "step": 1618 - }, - { - "epoch": 0.19467323994468827, - "grad_norm": 2.3359472982654035, - "learning_rate": 3.7223637043809016e-06, - "loss": 1.013, - "step": 1619 - }, - { - "epoch": 0.19479348283532735, - "grad_norm": 1.8055923637485565, - "learning_rate": 3.7219676248163322e-06, - "loss": 1.0657, - "step": 1620 - }, - { - "epoch": 0.19491372572596646, - "grad_norm": 1.969293549601779, - "learning_rate": 3.721571284030643e-06, - "loss": 1.1317, - "step": 1621 - }, - { - "epoch": 0.19503396861660555, - "grad_norm": 1.9182135586945588, - "learning_rate": 3.7211746820839587e-06, - "loss": 0.9903, - "step": 1622 - }, - { - "epoch": 0.19515421150724463, - "grad_norm": 1.7163010924000368, - "learning_rate": 3.7207778190364437e-06, - "loss": 1.0263, - "step": 1623 - }, - { - "epoch": 0.1952744543978837, - "grad_norm": 1.602574024365351, - "learning_rate": 3.720380694948302e-06, - "loss": 0.9398, - "step": 1624 - }, - { - "epoch": 0.19539469728852282, - "grad_norm": 1.0486712063094732, - "learning_rate": 3.719983309879777e-06, - "loss": 0.9387, - "step": 1625 - }, - { - "epoch": 0.1955149401791619, - "grad_norm": 1.7297190814077115, - "learning_rate": 3.719585663891151e-06, - "loss": 0.9806, - "step": 1626 - }, - { - "epoch": 0.195635183069801, - "grad_norm": 2.0351985407149282, - "learning_rate": 3.719187757042747e-06, - "loss": 0.9914, - "step": 1627 - }, - { - "epoch": 0.1957554259604401, - "grad_norm": 0.8572262690750702, - "learning_rate": 3.7187895893949275e-06, - "loss": 0.7955, - "step": 1628 - }, - { - "epoch": 0.19587566885107918, - "grad_norm": 2.039032099120835, - "learning_rate": 3.7183911610080937e-06, - "loss": 0.9507, - "step": 1629 - }, - { - "epoch": 0.19599591174171827, - "grad_norm": 2.1980384925477714, - "learning_rate": 3.7179924719426872e-06, - "loss": 0.9461, - "step": 1630 - }, - { - "epoch": 0.19611615463235738, - "grad_norm": 2.1289996485389575, - "learning_rate": 3.7175935222591885e-06, - "loss": 0.9666, - "step": 1631 - }, - { - "epoch": 0.19623639752299646, - "grad_norm": 1.5845728673529955, - "learning_rate": 3.717194312018118e-06, - "loss": 0.9534, - "step": 1632 - }, - { - "epoch": 0.19635664041363554, - "grad_norm": 1.8547035325859011, - "learning_rate": 3.716794841280036e-06, - "loss": 0.9703, - "step": 1633 - }, - { - "epoch": 0.19647688330427462, - "grad_norm": 1.9173919991487547, - "learning_rate": 3.7163951101055407e-06, - "loss": 0.9826, - "step": 1634 - }, - { - "epoch": 0.19659712619491373, - "grad_norm": 1.770005387874528, - "learning_rate": 3.715995118555273e-06, - "loss": 0.9902, - "step": 1635 - }, - { - "epoch": 0.19671736908555282, - "grad_norm": 2.230225237432349, - "learning_rate": 3.71559486668991e-06, - "loss": 1.0568, - "step": 1636 - }, - { - "epoch": 0.1968376119761919, - "grad_norm": 1.469683979215953, - "learning_rate": 3.715194354570169e-06, - "loss": 0.9768, - "step": 1637 - }, - { - "epoch": 0.196957854866831, - "grad_norm": 2.319880589832227, - "learning_rate": 3.714793582256809e-06, - "loss": 1.0262, - "step": 1638 - }, - { - "epoch": 0.1970780977574701, - "grad_norm": 2.039950005909784, - "learning_rate": 3.7143925498106253e-06, - "loss": 1.0454, - "step": 1639 - }, - { - "epoch": 0.19719834064810918, - "grad_norm": 2.359849288460093, - "learning_rate": 3.7139912572924558e-06, - "loss": 0.9947, - "step": 1640 - }, - { - "epoch": 0.19731858353874826, - "grad_norm": 2.346218182810912, - "learning_rate": 3.7135897047631744e-06, - "loss": 1.0172, - "step": 1641 - }, - { - "epoch": 0.19743882642938737, - "grad_norm": 1.723248230820753, - "learning_rate": 3.713187892283698e-06, - "loss": 0.9551, - "step": 1642 - }, - { - "epoch": 0.19755906932002645, - "grad_norm": 1.9275483541199954, - "learning_rate": 3.71278581991498e-06, - "loss": 1.0716, - "step": 1643 - }, - { - "epoch": 0.19767931221066554, - "grad_norm": 1.7490207111927383, - "learning_rate": 3.712383487718015e-06, - "loss": 0.9897, - "step": 1644 - }, - { - "epoch": 0.19779955510130465, - "grad_norm": 2.5314383731026133, - "learning_rate": 3.7119808957538365e-06, - "loss": 1.0691, - "step": 1645 - }, - { - "epoch": 0.19791979799194373, - "grad_norm": 2.2673927513927983, - "learning_rate": 3.711578044083517e-06, - "loss": 1.0001, - "step": 1646 - }, - { - "epoch": 0.1980400408825828, - "grad_norm": 1.7084198286287977, - "learning_rate": 3.7111749327681698e-06, - "loss": 0.9477, - "step": 1647 - }, - { - "epoch": 0.1981602837732219, - "grad_norm": 1.9199273389736593, - "learning_rate": 3.7107715618689455e-06, - "loss": 1.0628, - "step": 1648 - }, - { - "epoch": 0.198280526663861, - "grad_norm": 1.339971330535282, - "learning_rate": 3.710367931447035e-06, - "loss": 1.0308, - "step": 1649 - }, - { - "epoch": 0.1984007695545001, - "grad_norm": 2.1586942226181867, - "learning_rate": 3.70996404156367e-06, - "loss": 1.0636, - "step": 1650 - }, - { - "epoch": 0.19852101244513917, - "grad_norm": 1.549315098215844, - "learning_rate": 3.7095598922801187e-06, - "loss": 0.9292, - "step": 1651 - }, - { - "epoch": 0.19864125533577828, - "grad_norm": 2.083222983934619, - "learning_rate": 3.7091554836576914e-06, - "loss": 0.9568, - "step": 1652 - }, - { - "epoch": 0.19876149822641737, - "grad_norm": 1.8633404928262065, - "learning_rate": 3.708750815757736e-06, - "loss": 1.0297, - "step": 1653 - }, - { - "epoch": 0.19888174111705645, - "grad_norm": 1.975618219795407, - "learning_rate": 3.7083458886416407e-06, - "loss": 0.9349, - "step": 1654 - }, - { - "epoch": 0.19900198400769553, - "grad_norm": 1.8793156231405104, - "learning_rate": 3.707940702370832e-06, - "loss": 1.088, - "step": 1655 - }, - { - "epoch": 0.19912222689833464, - "grad_norm": 0.891803764487083, - "learning_rate": 3.707535257006777e-06, - "loss": 0.7937, - "step": 1656 - }, - { - "epoch": 0.19924246978897373, - "grad_norm": 1.9425323720269583, - "learning_rate": 3.707129552610981e-06, - "loss": 1.0848, - "step": 1657 - }, - { - "epoch": 0.1993627126796128, - "grad_norm": 1.6787728591691926, - "learning_rate": 3.70672358924499e-06, - "loss": 0.9362, - "step": 1658 - }, - { - "epoch": 0.19948295557025192, - "grad_norm": 2.0600929085603483, - "learning_rate": 3.706317366970386e-06, - "loss": 0.9835, - "step": 1659 - }, - { - "epoch": 0.199603198460891, - "grad_norm": 1.6133805583841263, - "learning_rate": 3.705910885848795e-06, - "loss": 1.043, - "step": 1660 - }, - { - "epoch": 0.19972344135153008, - "grad_norm": 1.890826884438468, - "learning_rate": 3.705504145941879e-06, - "loss": 1.042, - "step": 1661 - }, - { - "epoch": 0.1998436842421692, - "grad_norm": 1.8064369210539029, - "learning_rate": 3.7050971473113403e-06, - "loss": 0.9954, - "step": 1662 - }, - { - "epoch": 0.19996392713280828, - "grad_norm": 1.5518894799886251, - "learning_rate": 3.7046898900189196e-06, - "loss": 1.002, - "step": 1663 - }, - { - "epoch": 0.20008417002344736, - "grad_norm": 1.5218153704920465, - "learning_rate": 3.704282374126398e-06, - "loss": 1.0404, - "step": 1664 - }, - { - "epoch": 0.20020441291408644, - "grad_norm": 1.4818949007767865, - "learning_rate": 3.7038745996955954e-06, - "loss": 1.0813, - "step": 1665 - }, - { - "epoch": 0.20032465580472555, - "grad_norm": 2.2114155929778834, - "learning_rate": 3.703466566788371e-06, - "loss": 0.9219, - "step": 1666 - }, - { - "epoch": 0.20044489869536464, - "grad_norm": 1.7437612499024564, - "learning_rate": 3.703058275466622e-06, - "loss": 0.9448, - "step": 1667 - }, - { - "epoch": 0.20056514158600372, - "grad_norm": 1.8796902803192759, - "learning_rate": 3.7026497257922877e-06, - "loss": 0.9737, - "step": 1668 - }, - { - "epoch": 0.20068538447664283, - "grad_norm": 1.7488650319983199, - "learning_rate": 3.7022409178273436e-06, - "loss": 1.0533, - "step": 1669 - }, - { - "epoch": 0.2008056273672819, - "grad_norm": 2.2959169219135744, - "learning_rate": 3.7018318516338054e-06, - "loss": 0.98, - "step": 1670 - }, - { - "epoch": 0.200925870257921, - "grad_norm": 4.535214077944732, - "learning_rate": 3.7014225272737284e-06, - "loss": 1.0141, - "step": 1671 - }, - { - "epoch": 0.20104611314856008, - "grad_norm": 2.2144203386825296, - "learning_rate": 3.701012944809207e-06, - "loss": 0.9393, - "step": 1672 - }, - { - "epoch": 0.2011663560391992, - "grad_norm": 1.6712753789282944, - "learning_rate": 3.700603104302374e-06, - "loss": 0.9832, - "step": 1673 - }, - { - "epoch": 0.20128659892983827, - "grad_norm": 0.9856239751352075, - "learning_rate": 3.7001930058154027e-06, - "loss": 0.7813, - "step": 1674 - }, - { - "epoch": 0.20140684182047736, - "grad_norm": 2.341614220553409, - "learning_rate": 3.6997826494105037e-06, - "loss": 1.0032, - "step": 1675 - }, - { - "epoch": 0.20152708471111647, - "grad_norm": 2.136944356162661, - "learning_rate": 3.6993720351499286e-06, - "loss": 0.8959, - "step": 1676 - }, - { - "epoch": 0.20164732760175555, - "grad_norm": 1.6430774342590533, - "learning_rate": 3.6989611630959666e-06, - "loss": 0.972, - "step": 1677 - }, - { - "epoch": 0.20176757049239463, - "grad_norm": 0.9780080243934061, - "learning_rate": 3.6985500333109474e-06, - "loss": 0.8101, - "step": 1678 - }, - { - "epoch": 0.20188781338303372, - "grad_norm": 2.2247040069520674, - "learning_rate": 3.6981386458572385e-06, - "loss": 0.9632, - "step": 1679 - }, - { - "epoch": 0.20200805627367283, - "grad_norm": 2.427939519094355, - "learning_rate": 3.6977270007972468e-06, - "loss": 0.9683, - "step": 1680 - }, - { - "epoch": 0.2021282991643119, - "grad_norm": 2.094297878264998, - "learning_rate": 3.6973150981934196e-06, - "loss": 0.9157, - "step": 1681 - }, - { - "epoch": 0.202248542054951, - "grad_norm": 2.303546377322494, - "learning_rate": 3.6969029381082415e-06, - "loss": 1.0347, - "step": 1682 - }, - { - "epoch": 0.2023687849455901, - "grad_norm": 2.7907982134913505, - "learning_rate": 3.696490520604237e-06, - "loss": 1.0049, - "step": 1683 - }, - { - "epoch": 0.20248902783622919, - "grad_norm": 1.637500803707013, - "learning_rate": 3.696077845743968e-06, - "loss": 1.0087, - "step": 1684 - }, - { - "epoch": 0.20260927072686827, - "grad_norm": 2.0956180267263473, - "learning_rate": 3.69566491359004e-06, - "loss": 0.9308, - "step": 1685 - }, - { - "epoch": 0.20272951361750738, - "grad_norm": 1.5677353697142655, - "learning_rate": 3.695251724205092e-06, - "loss": 0.9074, - "step": 1686 - }, - { - "epoch": 0.20284975650814646, - "grad_norm": 1.5903351795140774, - "learning_rate": 3.6948382776518054e-06, - "loss": 1.0602, - "step": 1687 - }, - { - "epoch": 0.20296999939878554, - "grad_norm": 1.98342608494086, - "learning_rate": 3.6944245739929e-06, - "loss": 0.9934, - "step": 1688 - }, - { - "epoch": 0.20309024228942463, - "grad_norm": 1.9754646479002755, - "learning_rate": 3.6940106132911332e-06, - "loss": 0.9245, - "step": 1689 - }, - { - "epoch": 0.20321048518006374, - "grad_norm": 1.8010934819684359, - "learning_rate": 3.6935963956093037e-06, - "loss": 1.084, - "step": 1690 - }, - { - "epoch": 0.20333072807070282, - "grad_norm": 1.7969984592649788, - "learning_rate": 3.6931819210102474e-06, - "loss": 0.8875, - "step": 1691 - }, - { - "epoch": 0.2034509709613419, - "grad_norm": 1.7437660353275362, - "learning_rate": 3.6927671895568402e-06, - "loss": 1.048, - "step": 1692 - }, - { - "epoch": 0.20357121385198101, - "grad_norm": 1.7237082662087746, - "learning_rate": 3.692352201311996e-06, - "loss": 1.0732, - "step": 1693 - }, - { - "epoch": 0.2036914567426201, - "grad_norm": 1.5840348730727862, - "learning_rate": 3.6919369563386687e-06, - "loss": 0.9627, - "step": 1694 - }, - { - "epoch": 0.20381169963325918, - "grad_norm": 2.1786115718600416, - "learning_rate": 3.69152145469985e-06, - "loss": 0.9976, - "step": 1695 - }, - { - "epoch": 0.20393194252389826, - "grad_norm": 1.6921097699714438, - "learning_rate": 3.691105696458572e-06, - "loss": 1.0193, - "step": 1696 - }, - { - "epoch": 0.20405218541453737, - "grad_norm": 2.5967603013177785, - "learning_rate": 3.690689681677904e-06, - "loss": 0.8786, - "step": 1697 - }, - { - "epoch": 0.20417242830517646, - "grad_norm": 1.6563668839579173, - "learning_rate": 3.690273410420956e-06, - "loss": 1.0848, - "step": 1698 - }, - { - "epoch": 0.20429267119581554, - "grad_norm": 2.553391060570849, - "learning_rate": 3.689856882750875e-06, - "loss": 0.9695, - "step": 1699 - }, - { - "epoch": 0.20441291408645465, - "grad_norm": 1.6129790016032561, - "learning_rate": 3.6894400987308486e-06, - "loss": 0.9887, - "step": 1700 - }, - { - "epoch": 0.20453315697709373, - "grad_norm": 1.7255750222932227, - "learning_rate": 3.6890230584241024e-06, - "loss": 1.0552, - "step": 1701 - }, - { - "epoch": 0.20465339986773282, - "grad_norm": 0.9536435327062908, - "learning_rate": 3.6886057618939016e-06, - "loss": 0.8961, - "step": 1702 - }, - { - "epoch": 0.2047736427583719, - "grad_norm": 1.998945196472013, - "learning_rate": 3.6881882092035492e-06, - "loss": 0.8998, - "step": 1703 - }, - { - "epoch": 0.204893885649011, - "grad_norm": 1.0617336426533233, - "learning_rate": 3.6877704004163873e-06, - "loss": 0.8741, - "step": 1704 - }, - { - "epoch": 0.2050141285396501, - "grad_norm": 1.680008187841717, - "learning_rate": 3.6873523355957984e-06, - "loss": 0.9853, - "step": 1705 - }, - { - "epoch": 0.20513437143028918, - "grad_norm": 1.100504265422952, - "learning_rate": 3.686934014805201e-06, - "loss": 0.9166, - "step": 1706 - }, - { - "epoch": 0.20525461432092829, - "grad_norm": 1.6159418441014721, - "learning_rate": 3.6865154381080552e-06, - "loss": 1.0108, - "step": 1707 - }, - { - "epoch": 0.20537485721156737, - "grad_norm": 1.7415400101574956, - "learning_rate": 3.6860966055678585e-06, - "loss": 1.0215, - "step": 1708 - }, - { - "epoch": 0.20549510010220645, - "grad_norm": 1.8850621123667168, - "learning_rate": 3.685677517248147e-06, - "loss": 1.0582, - "step": 1709 - }, - { - "epoch": 0.20561534299284553, - "grad_norm": 1.8446684667143995, - "learning_rate": 3.6852581732124967e-06, - "loss": 1.0033, - "step": 1710 - }, - { - "epoch": 0.20573558588348465, - "grad_norm": 1.6921281573456421, - "learning_rate": 3.6848385735245213e-06, - "loss": 0.9585, - "step": 1711 - }, - { - "epoch": 0.20585582877412373, - "grad_norm": 1.8457111338285237, - "learning_rate": 3.6844187182478734e-06, - "loss": 1.0706, - "step": 1712 - }, - { - "epoch": 0.2059760716647628, - "grad_norm": 1.624942631809241, - "learning_rate": 3.683998607446246e-06, - "loss": 0.9509, - "step": 1713 - }, - { - "epoch": 0.20609631455540192, - "grad_norm": 1.9081467744126541, - "learning_rate": 3.6835782411833686e-06, - "loss": 0.9435, - "step": 1714 - }, - { - "epoch": 0.206216557446041, - "grad_norm": 1.8098479632809659, - "learning_rate": 3.68315761952301e-06, - "loss": 0.9479, - "step": 1715 - }, - { - "epoch": 0.2063368003366801, - "grad_norm": 1.7128510247658382, - "learning_rate": 3.6827367425289797e-06, - "loss": 1.0284, - "step": 1716 - }, - { - "epoch": 0.2064570432273192, - "grad_norm": 2.188882881108972, - "learning_rate": 3.6823156102651225e-06, - "loss": 0.9326, - "step": 1717 - }, - { - "epoch": 0.20657728611795828, - "grad_norm": 1.575744797942677, - "learning_rate": 3.6818942227953257e-06, - "loss": 0.9134, - "step": 1718 - }, - { - "epoch": 0.20669752900859736, - "grad_norm": 1.8814691206856162, - "learning_rate": 3.681472580183512e-06, - "loss": 0.8964, - "step": 1719 - }, - { - "epoch": 0.20681777189923645, - "grad_norm": 1.7997503080965231, - "learning_rate": 3.6810506824936455e-06, - "loss": 1.0625, - "step": 1720 - }, - { - "epoch": 0.20693801478987556, - "grad_norm": 1.219722262177167, - "learning_rate": 3.680628529789726e-06, - "loss": 0.8598, - "step": 1721 - }, - { - "epoch": 0.20705825768051464, - "grad_norm": 1.8771058019492204, - "learning_rate": 3.680206122135796e-06, - "loss": 1.0584, - "step": 1722 - }, - { - "epoch": 0.20717850057115372, - "grad_norm": 1.8011300831043346, - "learning_rate": 3.6797834595959323e-06, - "loss": 0.9821, - "step": 1723 - }, - { - "epoch": 0.20729874346179283, - "grad_norm": 2.1286791199079835, - "learning_rate": 3.679360542234254e-06, - "loss": 0.9785, - "step": 1724 - }, - { - "epoch": 0.20741898635243192, - "grad_norm": 1.6057858490402466, - "learning_rate": 3.678937370114916e-06, - "loss": 0.9246, - "step": 1725 - }, - { - "epoch": 0.207539229243071, - "grad_norm": 1.7669443248187753, - "learning_rate": 3.678513943302114e-06, - "loss": 0.9871, - "step": 1726 - }, - { - "epoch": 0.20765947213371008, - "grad_norm": 1.8393404974959802, - "learning_rate": 3.678090261860082e-06, - "loss": 1.049, - "step": 1727 - }, - { - "epoch": 0.2077797150243492, - "grad_norm": 1.8416959020903443, - "learning_rate": 3.6776663258530906e-06, - "loss": 0.9847, - "step": 1728 - }, - { - "epoch": 0.20789995791498828, - "grad_norm": 1.7546061115632845, - "learning_rate": 3.6772421353454516e-06, - "loss": 0.913, - "step": 1729 - }, - { - "epoch": 0.20802020080562736, - "grad_norm": 1.9330980069988914, - "learning_rate": 3.6768176904015153e-06, - "loss": 1.0844, - "step": 1730 - }, - { - "epoch": 0.20814044369626647, - "grad_norm": 2.036932991562432, - "learning_rate": 3.6763929910856674e-06, - "loss": 0.7968, - "step": 1731 - }, - { - "epoch": 0.20826068658690555, - "grad_norm": 2.011611135622865, - "learning_rate": 3.6759680374623365e-06, - "loss": 0.9727, - "step": 1732 - }, - { - "epoch": 0.20838092947754464, - "grad_norm": 1.9225492457716116, - "learning_rate": 3.675542829595986e-06, - "loss": 0.9513, - "step": 1733 - }, - { - "epoch": 0.20850117236818372, - "grad_norm": 1.3878792218749905, - "learning_rate": 3.6751173675511213e-06, - "loss": 0.9933, - "step": 1734 - }, - { - "epoch": 0.20862141525882283, - "grad_norm": 1.8691286985521085, - "learning_rate": 3.674691651392283e-06, - "loss": 1.0817, - "step": 1735 - }, - { - "epoch": 0.2087416581494619, - "grad_norm": 2.2174668161001576, - "learning_rate": 3.674265681184053e-06, - "loss": 0.9656, - "step": 1736 - }, - { - "epoch": 0.208861901040101, - "grad_norm": 1.7325235743384353, - "learning_rate": 3.6738394569910504e-06, - "loss": 1.0593, - "step": 1737 - }, - { - "epoch": 0.2089821439307401, - "grad_norm": 1.904027487456232, - "learning_rate": 3.6734129788779333e-06, - "loss": 1.0296, - "step": 1738 - }, - { - "epoch": 0.2091023868213792, - "grad_norm": 1.5984152783949548, - "learning_rate": 3.6729862469093976e-06, - "loss": 1.1015, - "step": 1739 - }, - { - "epoch": 0.20922262971201827, - "grad_norm": 2.0315717002239015, - "learning_rate": 3.6725592611501782e-06, - "loss": 1.021, - "step": 1740 - }, - { - "epoch": 0.20934287260265738, - "grad_norm": 1.6651400090825432, - "learning_rate": 3.6721320216650496e-06, - "loss": 0.9644, - "step": 1741 - }, - { - "epoch": 0.20946311549329646, - "grad_norm": 1.9660718958680758, - "learning_rate": 3.6717045285188215e-06, - "loss": 1.0412, - "step": 1742 - }, - { - "epoch": 0.20958335838393555, - "grad_norm": 1.887692154962116, - "learning_rate": 3.671276781776346e-06, - "loss": 1.0577, - "step": 1743 - }, - { - "epoch": 0.20970360127457463, - "grad_norm": 1.839739041274668, - "learning_rate": 3.6708487815025128e-06, - "loss": 0.8706, - "step": 1744 - }, - { - "epoch": 0.20982384416521374, - "grad_norm": 2.2309575121572855, - "learning_rate": 3.6704205277622463e-06, - "loss": 0.9444, - "step": 1745 - }, - { - "epoch": 0.20994408705585282, - "grad_norm": 2.9936776297787304, - "learning_rate": 3.6699920206205146e-06, - "loss": 0.9981, - "step": 1746 - }, - { - "epoch": 0.2100643299464919, - "grad_norm": 1.5735847957274125, - "learning_rate": 3.669563260142321e-06, - "loss": 1.0282, - "step": 1747 - }, - { - "epoch": 0.21018457283713102, - "grad_norm": 1.7579885437020337, - "learning_rate": 3.6691342463927083e-06, - "loss": 1.0366, - "step": 1748 - }, - { - "epoch": 0.2103048157277701, - "grad_norm": 1.6251123096095865, - "learning_rate": 3.668704979436758e-06, - "loss": 1.0181, - "step": 1749 - }, - { - "epoch": 0.21042505861840918, - "grad_norm": 1.7342942365107934, - "learning_rate": 3.668275459339588e-06, - "loss": 0.9928, - "step": 1750 - }, - { - "epoch": 0.21054530150904827, - "grad_norm": 1.8375202255692917, - "learning_rate": 3.667845686166358e-06, - "loss": 1.0106, - "step": 1751 - }, - { - "epoch": 0.21066554439968738, - "grad_norm": 1.5003826924744743, - "learning_rate": 3.6674156599822634e-06, - "loss": 1.0591, - "step": 1752 - }, - { - "epoch": 0.21078578729032646, - "grad_norm": 1.721093816493133, - "learning_rate": 3.666985380852539e-06, - "loss": 1.0069, - "step": 1753 - }, - { - "epoch": 0.21090603018096554, - "grad_norm": 2.016356934125781, - "learning_rate": 3.6665548488424576e-06, - "loss": 0.9553, - "step": 1754 - }, - { - "epoch": 0.21102627307160465, - "grad_norm": 1.5675289097847172, - "learning_rate": 3.6661240640173307e-06, - "loss": 1.0777, - "step": 1755 - }, - { - "epoch": 0.21114651596224374, - "grad_norm": 0.9895788728044602, - "learning_rate": 3.6656930264425085e-06, - "loss": 0.8336, - "step": 1756 - }, - { - "epoch": 0.21126675885288282, - "grad_norm": 1.7137752323532942, - "learning_rate": 3.665261736183378e-06, - "loss": 0.965, - "step": 1757 - }, - { - "epoch": 0.2113870017435219, - "grad_norm": 2.0636841813514266, - "learning_rate": 3.664830193305366e-06, - "loss": 1.0883, - "step": 1758 - }, - { - "epoch": 0.211507244634161, - "grad_norm": 2.3600030530489553, - "learning_rate": 3.6643983978739373e-06, - "loss": 0.962, - "step": 1759 - }, - { - "epoch": 0.2116274875248001, - "grad_norm": 3.326460014402246, - "learning_rate": 3.663966349954596e-06, - "loss": 1.015, - "step": 1760 - }, - { - "epoch": 0.21174773041543918, - "grad_norm": 0.9323344016625352, - "learning_rate": 3.6635340496128816e-06, - "loss": 0.8137, - "step": 1761 - }, - { - "epoch": 0.2118679733060783, - "grad_norm": 1.5203572751829288, - "learning_rate": 3.6631014969143747e-06, - "loss": 1.1217, - "step": 1762 - }, - { - "epoch": 0.21198821619671737, - "grad_norm": 1.8194122052753956, - "learning_rate": 3.662668691924693e-06, - "loss": 1.0887, - "step": 1763 - }, - { - "epoch": 0.21210845908735645, - "grad_norm": 1.7565908976771933, - "learning_rate": 3.6622356347094927e-06, - "loss": 0.915, - "step": 1764 - }, - { - "epoch": 0.21222870197799554, - "grad_norm": 1.6995412936961172, - "learning_rate": 3.6618023253344684e-06, - "loss": 0.9868, - "step": 1765 - }, - { - "epoch": 0.21234894486863465, - "grad_norm": 1.9122250203433497, - "learning_rate": 3.6613687638653527e-06, - "loss": 1.0385, - "step": 1766 - }, - { - "epoch": 0.21246918775927373, - "grad_norm": 1.8090724105682108, - "learning_rate": 3.660934950367916e-06, - "loss": 0.9807, - "step": 1767 - }, - { - "epoch": 0.21258943064991281, - "grad_norm": 1.483477029733795, - "learning_rate": 3.660500884907968e-06, - "loss": 1.0318, - "step": 1768 - }, - { - "epoch": 0.21270967354055192, - "grad_norm": 0.9000856623678769, - "learning_rate": 3.660066567551356e-06, - "loss": 0.8213, - "step": 1769 - }, - { - "epoch": 0.212829916431191, - "grad_norm": 2.015331749690312, - "learning_rate": 3.6596319983639657e-06, - "loss": 1.0398, - "step": 1770 - }, - { - "epoch": 0.2129501593218301, - "grad_norm": 1.530560863891061, - "learning_rate": 3.6591971774117214e-06, - "loss": 1.0657, - "step": 1771 - }, - { - "epoch": 0.2130704022124692, - "grad_norm": 2.0279782281794168, - "learning_rate": 3.6587621047605833e-06, - "loss": 1.0137, - "step": 1772 - }, - { - "epoch": 0.21319064510310828, - "grad_norm": 1.8664528905981703, - "learning_rate": 3.6583267804765542e-06, - "loss": 1.0676, - "step": 1773 - }, - { - "epoch": 0.21331088799374737, - "grad_norm": 1.7503899412317692, - "learning_rate": 3.6578912046256702e-06, - "loss": 1.0514, - "step": 1774 - }, - { - "epoch": 0.21343113088438645, - "grad_norm": 3.0785514419963924, - "learning_rate": 3.6574553772740083e-06, - "loss": 0.9638, - "step": 1775 - }, - { - "epoch": 0.21355137377502556, - "grad_norm": 0.9590937907464497, - "learning_rate": 3.657019298487684e-06, - "loss": 0.8572, - "step": 1776 - }, - { - "epoch": 0.21367161666566464, - "grad_norm": 1.9942789506153424, - "learning_rate": 3.6565829683328495e-06, - "loss": 1.0344, - "step": 1777 - }, - { - "epoch": 0.21379185955630373, - "grad_norm": 1.7229937095283723, - "learning_rate": 3.6561463868756965e-06, - "loss": 1.0578, - "step": 1778 - }, - { - "epoch": 0.21391210244694284, - "grad_norm": 1.4950028468586536, - "learning_rate": 3.655709554182452e-06, - "loss": 0.9806, - "step": 1779 - }, - { - "epoch": 0.21403234533758192, - "grad_norm": 1.8416943486177257, - "learning_rate": 3.6552724703193855e-06, - "loss": 1.0547, - "step": 1780 - }, - { - "epoch": 0.214152588228221, - "grad_norm": 1.02792510202506, - "learning_rate": 3.654835135352801e-06, - "loss": 0.7782, - "step": 1781 - }, - { - "epoch": 0.21427283111886009, - "grad_norm": 1.6616355139829388, - "learning_rate": 3.654397549349043e-06, - "loss": 1.0713, - "step": 1782 - }, - { - "epoch": 0.2143930740094992, - "grad_norm": 1.8352371503294114, - "learning_rate": 3.653959712374491e-06, - "loss": 0.9545, - "step": 1783 - }, - { - "epoch": 0.21451331690013828, - "grad_norm": 1.684911154943156, - "learning_rate": 3.6535216244955663e-06, - "loss": 1.0265, - "step": 1784 - }, - { - "epoch": 0.21463355979077736, - "grad_norm": 1.556286396995118, - "learning_rate": 3.653083285778726e-06, - "loss": 0.9093, - "step": 1785 - }, - { - "epoch": 0.21475380268141647, - "grad_norm": 2.027568473929645, - "learning_rate": 3.6526446962904653e-06, - "loss": 1.0052, - "step": 1786 - }, - { - "epoch": 0.21487404557205556, - "grad_norm": 1.5087222186345997, - "learning_rate": 3.652205856097318e-06, - "loss": 0.9468, - "step": 1787 - }, - { - "epoch": 0.21499428846269464, - "grad_norm": 1.8356434688087113, - "learning_rate": 3.651766765265856e-06, - "loss": 0.9894, - "step": 1788 - }, - { - "epoch": 0.21511453135333372, - "grad_norm": 3.228944504181511, - "learning_rate": 3.65132742386269e-06, - "loss": 1.0072, - "step": 1789 - }, - { - "epoch": 0.21523477424397283, - "grad_norm": 1.7247091849834026, - "learning_rate": 3.6508878319544656e-06, - "loss": 1.0495, - "step": 1790 - }, - { - "epoch": 0.21535501713461191, - "grad_norm": 2.2355535640079904, - "learning_rate": 3.65044798960787e-06, - "loss": 1.0042, - "step": 1791 - }, - { - "epoch": 0.215475260025251, - "grad_norm": 1.808988919352287, - "learning_rate": 3.650007896889627e-06, - "loss": 0.9835, - "step": 1792 - }, - { - "epoch": 0.2155955029158901, - "grad_norm": 1.7735548610757326, - "learning_rate": 3.6495675538664974e-06, - "loss": 0.9991, - "step": 1793 - }, - { - "epoch": 0.2157157458065292, - "grad_norm": 2.438507360982667, - "learning_rate": 3.649126960605282e-06, - "loss": 1.028, - "step": 1794 - }, - { - "epoch": 0.21583598869716827, - "grad_norm": 2.3585309672584005, - "learning_rate": 3.6486861171728174e-06, - "loss": 1.0329, - "step": 1795 - }, - { - "epoch": 0.21595623158780738, - "grad_norm": 1.5301923407785119, - "learning_rate": 3.6482450236359803e-06, - "loss": 0.9869, - "step": 1796 - }, - { - "epoch": 0.21607647447844647, - "grad_norm": 1.964468220493272, - "learning_rate": 3.647803680061683e-06, - "loss": 0.9739, - "step": 1797 - }, - { - "epoch": 0.21619671736908555, - "grad_norm": 2.470929017125364, - "learning_rate": 3.6473620865168776e-06, - "loss": 0.949, - "step": 1798 - }, - { - "epoch": 0.21631696025972463, - "grad_norm": 1.8822920463771329, - "learning_rate": 3.646920243068554e-06, - "loss": 1.0208, - "step": 1799 - }, - { - "epoch": 0.21643720315036374, - "grad_norm": 1.755607476586644, - "learning_rate": 3.6464781497837384e-06, - "loss": 0.9446, - "step": 1800 - }, - { - "epoch": 0.21655744604100283, - "grad_norm": 1.5490988726844697, - "learning_rate": 3.6460358067294965e-06, - "loss": 0.9394, - "step": 1801 - }, - { - "epoch": 0.2166776889316419, - "grad_norm": 1.8390722904900823, - "learning_rate": 3.645593213972932e-06, - "loss": 0.977, - "step": 1802 - }, - { - "epoch": 0.21679793182228102, - "grad_norm": 1.8578887898122585, - "learning_rate": 3.6451503715811852e-06, - "loss": 0.9897, - "step": 1803 - }, - { - "epoch": 0.2169181747129201, - "grad_norm": 1.735573157146472, - "learning_rate": 3.6447072796214345e-06, - "loss": 0.9972, - "step": 1804 - }, - { - "epoch": 0.21703841760355919, - "grad_norm": 1.3058779647558338, - "learning_rate": 3.644263938160898e-06, - "loss": 0.8647, - "step": 1805 - }, - { - "epoch": 0.21715866049419827, - "grad_norm": 1.727991815715411, - "learning_rate": 3.6438203472668293e-06, - "loss": 0.9162, - "step": 1806 - }, - { - "epoch": 0.21727890338483738, - "grad_norm": 2.0774242395572102, - "learning_rate": 3.6433765070065206e-06, - "loss": 1.0216, - "step": 1807 - }, - { - "epoch": 0.21739914627547646, - "grad_norm": 2.1006248679781017, - "learning_rate": 3.6429324174473025e-06, - "loss": 1.0822, - "step": 1808 - }, - { - "epoch": 0.21751938916611555, - "grad_norm": 2.08906029962896, - "learning_rate": 3.6424880786565425e-06, - "loss": 1.0506, - "step": 1809 - }, - { - "epoch": 0.21763963205675466, - "grad_norm": 2.1802996251044746, - "learning_rate": 3.6420434907016482e-06, - "loss": 0.9944, - "step": 1810 - }, - { - "epoch": 0.21775987494739374, - "grad_norm": 1.4830772750384087, - "learning_rate": 3.6415986536500606e-06, - "loss": 1.0121, - "step": 1811 - }, - { - "epoch": 0.21788011783803282, - "grad_norm": 1.6767295875017778, - "learning_rate": 3.641153567569263e-06, - "loss": 1.0102, - "step": 1812 - }, - { - "epoch": 0.2180003607286719, - "grad_norm": 1.844217402809903, - "learning_rate": 3.640708232526774e-06, - "loss": 1.1453, - "step": 1813 - }, - { - "epoch": 0.21812060361931102, - "grad_norm": 1.6725318545285892, - "learning_rate": 3.6402626485901504e-06, - "loss": 0.9826, - "step": 1814 - }, - { - "epoch": 0.2182408465099501, - "grad_norm": 1.7650554717427034, - "learning_rate": 3.639816815826988e-06, - "loss": 0.9731, - "step": 1815 - }, - { - "epoch": 0.21836108940058918, - "grad_norm": 1.7982418003539689, - "learning_rate": 3.6393707343049176e-06, - "loss": 0.9818, - "step": 1816 - }, - { - "epoch": 0.2184813322912283, - "grad_norm": 3.101370625362176, - "learning_rate": 3.6389244040916104e-06, - "loss": 0.9383, - "step": 1817 - }, - { - "epoch": 0.21860157518186737, - "grad_norm": 2.3907974280711306, - "learning_rate": 3.6384778252547747e-06, - "loss": 0.9882, - "step": 1818 - }, - { - "epoch": 0.21872181807250646, - "grad_norm": 2.197706769951782, - "learning_rate": 3.638030997862155e-06, - "loss": 0.9833, - "step": 1819 - }, - { - "epoch": 0.21884206096314554, - "grad_norm": 0.9160757797433192, - "learning_rate": 3.6375839219815356e-06, - "loss": 0.8205, - "step": 1820 - }, - { - "epoch": 0.21896230385378465, - "grad_norm": 2.639238885345716, - "learning_rate": 3.6371365976807375e-06, - "loss": 1.0265, - "step": 1821 - }, - { - "epoch": 0.21908254674442373, - "grad_norm": 1.6357392723854622, - "learning_rate": 3.6366890250276185e-06, - "loss": 1.0322, - "step": 1822 - }, - { - "epoch": 0.21920278963506282, - "grad_norm": 1.7590483665474554, - "learning_rate": 3.6362412040900764e-06, - "loss": 1.0995, - "step": 1823 - }, - { - "epoch": 0.21932303252570193, - "grad_norm": 1.8542108709088043, - "learning_rate": 3.635793134936044e-06, - "loss": 1.0051, - "step": 1824 - }, - { - "epoch": 0.219443275416341, - "grad_norm": 1.5359599021605095, - "learning_rate": 3.635344817633494e-06, - "loss": 0.9301, - "step": 1825 - }, - { - "epoch": 0.2195635183069801, - "grad_norm": 2.034518502560732, - "learning_rate": 3.634896252250436e-06, - "loss": 0.9524, - "step": 1826 - }, - { - "epoch": 0.2196837611976192, - "grad_norm": 2.307836004820638, - "learning_rate": 3.6344474388549157e-06, - "loss": 1.028, - "step": 1827 - }, - { - "epoch": 0.2198040040882583, - "grad_norm": 2.0589725500558633, - "learning_rate": 3.6339983775150183e-06, - "loss": 1.0122, - "step": 1828 - }, - { - "epoch": 0.21992424697889737, - "grad_norm": 2.2412880612070656, - "learning_rate": 3.6335490682988664e-06, - "loss": 1.0517, - "step": 1829 - }, - { - "epoch": 0.22004448986953645, - "grad_norm": 1.8186821574244045, - "learning_rate": 3.63309951127462e-06, - "loss": 1.0303, - "step": 1830 - }, - { - "epoch": 0.22016473276017556, - "grad_norm": 1.8137275879498567, - "learning_rate": 3.6326497065104757e-06, - "loss": 0.9518, - "step": 1831 - }, - { - "epoch": 0.22028497565081465, - "grad_norm": 1.87699878966547, - "learning_rate": 3.6321996540746697e-06, - "loss": 0.9822, - "step": 1832 - }, - { - "epoch": 0.22040521854145373, - "grad_norm": 1.7506840595060251, - "learning_rate": 3.6317493540354733e-06, - "loss": 1.0029, - "step": 1833 - }, - { - "epoch": 0.22052546143209284, - "grad_norm": 1.9119745816811669, - "learning_rate": 3.6312988064611976e-06, - "loss": 0.9801, - "step": 1834 - }, - { - "epoch": 0.22064570432273192, - "grad_norm": 1.811914415144611, - "learning_rate": 3.6308480114201896e-06, - "loss": 1.0041, - "step": 1835 - }, - { - "epoch": 0.220765947213371, - "grad_norm": 1.7112076776814689, - "learning_rate": 3.630396968980835e-06, - "loss": 0.9708, - "step": 1836 - }, - { - "epoch": 0.2208861901040101, - "grad_norm": 2.4268796341405063, - "learning_rate": 3.6299456792115575e-06, - "loss": 1.0404, - "step": 1837 - }, - { - "epoch": 0.2210064329946492, - "grad_norm": 1.706623438955761, - "learning_rate": 3.629494142180815e-06, - "loss": 1.0177, - "step": 1838 - }, - { - "epoch": 0.22112667588528828, - "grad_norm": 1.9708595478446256, - "learning_rate": 3.6290423579571075e-06, - "loss": 1.0471, - "step": 1839 - }, - { - "epoch": 0.22124691877592736, - "grad_norm": 1.5446040119481097, - "learning_rate": 3.6285903266089694e-06, - "loss": 1.0006, - "step": 1840 - }, - { - "epoch": 0.22136716166656648, - "grad_norm": 1.6490478831892739, - "learning_rate": 3.628138048204974e-06, - "loss": 0.9743, - "step": 1841 - }, - { - "epoch": 0.22148740455720556, - "grad_norm": 1.6617778442951643, - "learning_rate": 3.6276855228137304e-06, - "loss": 0.9765, - "step": 1842 - }, - { - "epoch": 0.22160764744784464, - "grad_norm": 2.1308950671205884, - "learning_rate": 3.6272327505038874e-06, - "loss": 1.0219, - "step": 1843 - }, - { - "epoch": 0.22172789033848372, - "grad_norm": 1.686172811156814, - "learning_rate": 3.626779731344131e-06, - "loss": 0.9797, - "step": 1844 - }, - { - "epoch": 0.22184813322912283, - "grad_norm": 2.0019284725997375, - "learning_rate": 3.6263264654031814e-06, - "loss": 1.0525, - "step": 1845 - }, - { - "epoch": 0.22196837611976192, - "grad_norm": 0.8896534958328172, - "learning_rate": 3.6258729527498008e-06, - "loss": 0.799, - "step": 1846 - }, - { - "epoch": 0.222088619010401, - "grad_norm": 2.1093461776459685, - "learning_rate": 3.6254191934527854e-06, - "loss": 0.8357, - "step": 1847 - }, - { - "epoch": 0.2222088619010401, - "grad_norm": 1.9109310146917304, - "learning_rate": 3.6249651875809715e-06, - "loss": 0.8454, - "step": 1848 - }, - { - "epoch": 0.2223291047916792, - "grad_norm": 1.7930061153599053, - "learning_rate": 3.62451093520323e-06, - "loss": 1.0874, - "step": 1849 - }, - { - "epoch": 0.22244934768231828, - "grad_norm": 3.1892212353029885, - "learning_rate": 3.6240564363884714e-06, - "loss": 1.102, - "step": 1850 - }, - { - "epoch": 0.2225695905729574, - "grad_norm": 1.8656312210771617, - "learning_rate": 3.623601691205643e-06, - "loss": 0.9038, - "step": 1851 - }, - { - "epoch": 0.22268983346359647, - "grad_norm": 1.9599679928715812, - "learning_rate": 3.623146699723729e-06, - "loss": 1.0147, - "step": 1852 - }, - { - "epoch": 0.22281007635423555, - "grad_norm": 1.632802643814638, - "learning_rate": 3.6226914620117507e-06, - "loss": 0.985, - "step": 1853 - }, - { - "epoch": 0.22293031924487464, - "grad_norm": 2.200431096581582, - "learning_rate": 3.622235978138768e-06, - "loss": 1.0051, - "step": 1854 - }, - { - "epoch": 0.22305056213551375, - "grad_norm": 1.7306647953901084, - "learning_rate": 3.621780248173877e-06, - "loss": 1.0202, - "step": 1855 - }, - { - "epoch": 0.22317080502615283, - "grad_norm": 0.9950334478152572, - "learning_rate": 3.6213242721862125e-06, - "loss": 0.8499, - "step": 1856 - }, - { - "epoch": 0.2232910479167919, - "grad_norm": 1.5497950726062413, - "learning_rate": 3.620868050244945e-06, - "loss": 0.9559, - "step": 1857 - }, - { - "epoch": 0.22341129080743102, - "grad_norm": 1.7295198960672102, - "learning_rate": 3.6204115824192817e-06, - "loss": 0.9749, - "step": 1858 - }, - { - "epoch": 0.2235315336980701, - "grad_norm": 2.1044261382109144, - "learning_rate": 3.619954868778471e-06, - "loss": 0.9726, - "step": 1859 - }, - { - "epoch": 0.2236517765887092, - "grad_norm": 1.6872846677934634, - "learning_rate": 3.6194979093917944e-06, - "loss": 1.021, - "step": 1860 - }, - { - "epoch": 0.22377201947934827, - "grad_norm": 1.7656934606732304, - "learning_rate": 3.6190407043285724e-06, - "loss": 1.0804, - "step": 1861 - }, - { - "epoch": 0.22389226236998738, - "grad_norm": 1.8148830795460311, - "learning_rate": 3.618583253658163e-06, - "loss": 0.9466, - "step": 1862 - }, - { - "epoch": 0.22401250526062647, - "grad_norm": 1.7374749874812019, - "learning_rate": 3.618125557449961e-06, - "loss": 1.0742, - "step": 1863 - }, - { - "epoch": 0.22413274815126555, - "grad_norm": 1.828298381180389, - "learning_rate": 3.6176676157733983e-06, - "loss": 1.0385, - "step": 1864 - }, - { - "epoch": 0.22425299104190466, - "grad_norm": 1.9339082096136464, - "learning_rate": 3.6172094286979443e-06, - "loss": 0.9526, - "step": 1865 - }, - { - "epoch": 0.22437323393254374, - "grad_norm": 1.2793910799940091, - "learning_rate": 3.6167509962931064e-06, - "loss": 1.0068, - "step": 1866 - }, - { - "epoch": 0.22449347682318282, - "grad_norm": 2.453913082197091, - "learning_rate": 3.6162923186284276e-06, - "loss": 0.9743, - "step": 1867 - }, - { - "epoch": 0.2246137197138219, - "grad_norm": 2.0870373287399424, - "learning_rate": 3.6158333957734888e-06, - "loss": 1.0635, - "step": 1868 - }, - { - "epoch": 0.22473396260446102, - "grad_norm": 1.7873375585411944, - "learning_rate": 3.6153742277979088e-06, - "loss": 1.0291, - "step": 1869 - }, - { - "epoch": 0.2248542054951001, - "grad_norm": 3.0245080279841106, - "learning_rate": 3.6149148147713434e-06, - "loss": 0.9932, - "step": 1870 - }, - { - "epoch": 0.22497444838573918, - "grad_norm": 1.5983662787984334, - "learning_rate": 3.614455156763484e-06, - "loss": 1.0693, - "step": 1871 - }, - { - "epoch": 0.2250946912763783, - "grad_norm": 1.934024154078317, - "learning_rate": 3.613995253844061e-06, - "loss": 0.917, - "step": 1872 - }, - { - "epoch": 0.22521493416701738, - "grad_norm": 1.7750431807732507, - "learning_rate": 3.6135351060828414e-06, - "loss": 1.009, - "step": 1873 - }, - { - "epoch": 0.22533517705765646, - "grad_norm": 1.955188180301015, - "learning_rate": 3.6130747135496285e-06, - "loss": 0.8879, - "step": 1874 - }, - { - "epoch": 0.22545541994829554, - "grad_norm": 1.9539882125655137, - "learning_rate": 3.6126140763142646e-06, - "loss": 0.8655, - "step": 1875 - }, - { - "epoch": 0.22557566283893465, - "grad_norm": 2.132765549363034, - "learning_rate": 3.6121531944466275e-06, - "loss": 1.0531, - "step": 1876 - }, - { - "epoch": 0.22569590572957374, - "grad_norm": 1.9216856824469453, - "learning_rate": 3.611692068016633e-06, - "loss": 0.9837, - "step": 1877 - }, - { - "epoch": 0.22581614862021282, - "grad_norm": 2.360472569108084, - "learning_rate": 3.611230697094233e-06, - "loss": 0.9548, - "step": 1878 - }, - { - "epoch": 0.22593639151085193, - "grad_norm": 1.6637034857228532, - "learning_rate": 3.6107690817494173e-06, - "loss": 1.0735, - "step": 1879 - }, - { - "epoch": 0.226056634401491, - "grad_norm": 2.016066746764821, - "learning_rate": 3.6103072220522117e-06, - "loss": 0.9034, - "step": 1880 - }, - { - "epoch": 0.2261768772921301, - "grad_norm": 1.6220459864455647, - "learning_rate": 3.609845118072682e-06, - "loss": 1.1112, - "step": 1881 - }, - { - "epoch": 0.2262971201827692, - "grad_norm": 1.6907286663319867, - "learning_rate": 3.6093827698809276e-06, - "loss": 0.9957, - "step": 1882 - }, - { - "epoch": 0.2264173630734083, - "grad_norm": 1.95288365208049, - "learning_rate": 3.6089201775470864e-06, - "loss": 1.0467, - "step": 1883 - }, - { - "epoch": 0.22653760596404737, - "grad_norm": 1.3779174890660886, - "learning_rate": 3.6084573411413334e-06, - "loss": 0.9806, - "step": 1884 - }, - { - "epoch": 0.22665784885468646, - "grad_norm": 3.671383309416641, - "learning_rate": 3.607994260733881e-06, - "loss": 1.0177, - "step": 1885 - }, - { - "epoch": 0.22677809174532557, - "grad_norm": 1.511449430068347, - "learning_rate": 3.6075309363949776e-06, - "loss": 0.946, - "step": 1886 - }, - { - "epoch": 0.22689833463596465, - "grad_norm": 3.1239483398874643, - "learning_rate": 3.6070673681949094e-06, - "loss": 1.0071, - "step": 1887 - }, - { - "epoch": 0.22701857752660373, - "grad_norm": 1.6014194331583518, - "learning_rate": 3.606603556203999e-06, - "loss": 1.0123, - "step": 1888 - }, - { - "epoch": 0.22713882041724284, - "grad_norm": 1.6258264787387047, - "learning_rate": 3.6061395004926066e-06, - "loss": 1.0378, - "step": 1889 - }, - { - "epoch": 0.22725906330788193, - "grad_norm": 2.0534799022299453, - "learning_rate": 3.605675201131129e-06, - "loss": 1.0496, - "step": 1890 - }, - { - "epoch": 0.227379306198521, - "grad_norm": 2.2876647222859816, - "learning_rate": 3.60521065819e-06, - "loss": 1.0035, - "step": 1891 - }, - { - "epoch": 0.2274995490891601, - "grad_norm": 1.6349024196985076, - "learning_rate": 3.60474587173969e-06, - "loss": 1.0714, - "step": 1892 - }, - { - "epoch": 0.2276197919797992, - "grad_norm": 2.0383749785682648, - "learning_rate": 3.6042808418507084e-06, - "loss": 1.0342, - "step": 1893 - }, - { - "epoch": 0.22774003487043828, - "grad_norm": 2.458188510659174, - "learning_rate": 3.6038155685935976e-06, - "loss": 0.9785, - "step": 1894 - }, - { - "epoch": 0.22786027776107737, - "grad_norm": 1.8419406871130595, - "learning_rate": 3.6033500520389404e-06, - "loss": 0.9044, - "step": 1895 - }, - { - "epoch": 0.22798052065171648, - "grad_norm": 1.1303073939260035, - "learning_rate": 3.6028842922573553e-06, - "loss": 0.8814, - "step": 1896 - }, - { - "epoch": 0.22810076354235556, - "grad_norm": 0.9378840931224447, - "learning_rate": 3.602418289319497e-06, - "loss": 0.8583, - "step": 1897 - }, - { - "epoch": 0.22822100643299464, - "grad_norm": 2.3296751455674056, - "learning_rate": 3.601952043296059e-06, - "loss": 0.9375, - "step": 1898 - }, - { - "epoch": 0.22834124932363373, - "grad_norm": 1.956914841165016, - "learning_rate": 3.6014855542577696e-06, - "loss": 1.0042, - "step": 1899 - }, - { - "epoch": 0.22846149221427284, - "grad_norm": 1.6549161902467213, - "learning_rate": 3.6010188222753943e-06, - "loss": 1.0443, - "step": 1900 - }, - { - "epoch": 0.22858173510491192, - "grad_norm": 1.1834954450718682, - "learning_rate": 3.6005518474197372e-06, - "loss": 0.8697, - "step": 1901 - }, - { - "epoch": 0.228701977995551, - "grad_norm": 1.7462152016407613, - "learning_rate": 3.6000846297616373e-06, - "loss": 0.9892, - "step": 1902 - }, - { - "epoch": 0.22882222088619011, - "grad_norm": 2.061864697262086, - "learning_rate": 3.5996171693719717e-06, - "loss": 0.9315, - "step": 1903 - }, - { - "epoch": 0.2289424637768292, - "grad_norm": 1.343012052662295, - "learning_rate": 3.5991494663216528e-06, - "loss": 0.8665, - "step": 1904 - }, - { - "epoch": 0.22906270666746828, - "grad_norm": 1.8805415597028892, - "learning_rate": 3.5986815206816314e-06, - "loss": 1.0832, - "step": 1905 - }, - { - "epoch": 0.2291829495581074, - "grad_norm": 1.6897063490005182, - "learning_rate": 3.598213332522895e-06, - "loss": 0.9472, - "step": 1906 - }, - { - "epoch": 0.22930319244874647, - "grad_norm": 1.726048872139747, - "learning_rate": 3.597744901916466e-06, - "loss": 0.9755, - "step": 1907 - }, - { - "epoch": 0.22942343533938556, - "grad_norm": 1.8854878519294573, - "learning_rate": 3.5972762289334058e-06, - "loss": 0.971, - "step": 1908 - }, - { - "epoch": 0.22954367823002464, - "grad_norm": 1.7859812359865432, - "learning_rate": 3.5968073136448116e-06, - "loss": 1.0514, - "step": 1909 - }, - { - "epoch": 0.22966392112066375, - "grad_norm": 1.5931868212599938, - "learning_rate": 3.596338156121818e-06, - "loss": 1.1086, - "step": 1910 - }, - { - "epoch": 0.22978416401130283, - "grad_norm": 1.0323563623155894, - "learning_rate": 3.595868756435595e-06, - "loss": 0.8174, - "step": 1911 - }, - { - "epoch": 0.22990440690194192, - "grad_norm": 2.2617297131103036, - "learning_rate": 3.5953991146573504e-06, - "loss": 0.9987, - "step": 1912 - }, - { - "epoch": 0.23002464979258103, - "grad_norm": 2.1113222529694013, - "learning_rate": 3.5949292308583294e-06, - "loss": 1.03, - "step": 1913 - }, - { - "epoch": 0.2301448926832201, - "grad_norm": 2.0543212564214395, - "learning_rate": 3.594459105109811e-06, - "loss": 1.0047, - "step": 1914 - }, - { - "epoch": 0.2302651355738592, - "grad_norm": 1.6826683386468415, - "learning_rate": 3.593988737483115e-06, - "loss": 1.0144, - "step": 1915 - }, - { - "epoch": 0.23038537846449827, - "grad_norm": 1.857900788412651, - "learning_rate": 3.5935181280495947e-06, - "loss": 0.9814, - "step": 1916 - }, - { - "epoch": 0.23050562135513739, - "grad_norm": 1.2411175324285177, - "learning_rate": 3.5930472768806412e-06, - "loss": 0.7844, - "step": 1917 - }, - { - "epoch": 0.23062586424577647, - "grad_norm": 1.879570984182153, - "learning_rate": 3.5925761840476826e-06, - "loss": 0.9732, - "step": 1918 - }, - { - "epoch": 0.23074610713641555, - "grad_norm": 3.5799918156535617, - "learning_rate": 3.592104849622183e-06, - "loss": 1.0232, - "step": 1919 - }, - { - "epoch": 0.23086635002705466, - "grad_norm": 1.4660593914163764, - "learning_rate": 3.591633273675644e-06, - "loss": 0.9335, - "step": 1920 - }, - { - "epoch": 0.23098659291769374, - "grad_norm": 1.7285856243550473, - "learning_rate": 3.591161456279602e-06, - "loss": 0.8249, - "step": 1921 - }, - { - "epoch": 0.23110683580833283, - "grad_norm": 1.3727047142544226, - "learning_rate": 3.590689397505633e-06, - "loss": 0.9972, - "step": 1922 - }, - { - "epoch": 0.2312270786989719, - "grad_norm": 1.6839513553788021, - "learning_rate": 3.590217097425347e-06, - "loss": 1.0662, - "step": 1923 - }, - { - "epoch": 0.23134732158961102, - "grad_norm": 1.9166501984372515, - "learning_rate": 3.589744556110391e-06, - "loss": 0.9141, - "step": 1924 - }, - { - "epoch": 0.2314675644802501, - "grad_norm": 1.6336904109486852, - "learning_rate": 3.58927177363245e-06, - "loss": 1.0483, - "step": 1925 - }, - { - "epoch": 0.2315878073708892, - "grad_norm": 2.1268603091835327, - "learning_rate": 3.5887987500632447e-06, - "loss": 0.934, - "step": 1926 - }, - { - "epoch": 0.2317080502615283, - "grad_norm": 1.8145871477346116, - "learning_rate": 3.5883254854745325e-06, - "loss": 1.0329, - "step": 1927 - }, - { - "epoch": 0.23182829315216738, - "grad_norm": 2.0994717433088135, - "learning_rate": 3.587851979938107e-06, - "loss": 0.9609, - "step": 1928 - }, - { - "epoch": 0.23194853604280646, - "grad_norm": 1.7807066239206428, - "learning_rate": 3.5873782335257985e-06, - "loss": 0.9704, - "step": 1929 - }, - { - "epoch": 0.23206877893344555, - "grad_norm": 2.1362864389989196, - "learning_rate": 3.5869042463094744e-06, - "loss": 0.9889, - "step": 1930 - }, - { - "epoch": 0.23218902182408466, - "grad_norm": 2.6101258619515404, - "learning_rate": 3.586430018361038e-06, - "loss": 0.9688, - "step": 1931 - }, - { - "epoch": 0.23230926471472374, - "grad_norm": 1.993531794550666, - "learning_rate": 3.5859555497524283e-06, - "loss": 0.9671, - "step": 1932 - }, - { - "epoch": 0.23242950760536282, - "grad_norm": 2.0706471298697875, - "learning_rate": 3.5854808405556237e-06, - "loss": 1.1122, - "step": 1933 - }, - { - "epoch": 0.23254975049600193, - "grad_norm": 2.370713230123463, - "learning_rate": 3.5850058908426355e-06, - "loss": 0.9629, - "step": 1934 - }, - { - "epoch": 0.23266999338664102, - "grad_norm": 1.8907320607035558, - "learning_rate": 3.584530700685514e-06, - "loss": 1.0479, - "step": 1935 - }, - { - "epoch": 0.2327902362772801, - "grad_norm": 2.2888341796328255, - "learning_rate": 3.5840552701563448e-06, - "loss": 1.0923, - "step": 1936 - }, - { - "epoch": 0.2329104791679192, - "grad_norm": 2.0601860706773523, - "learning_rate": 3.5835795993272513e-06, - "loss": 1.0169, - "step": 1937 - }, - { - "epoch": 0.2330307220585583, - "grad_norm": 1.6338064280588613, - "learning_rate": 3.583103688270391e-06, - "loss": 0.9076, - "step": 1938 - }, - { - "epoch": 0.23315096494919738, - "grad_norm": 2.129518977191504, - "learning_rate": 3.58262753705796e-06, - "loss": 1.0836, - "step": 1939 - }, - { - "epoch": 0.23327120783983646, - "grad_norm": 1.2489739975677403, - "learning_rate": 3.5821511457621902e-06, - "loss": 0.7702, - "step": 1940 - }, - { - "epoch": 0.23339145073047557, - "grad_norm": 2.960409555196218, - "learning_rate": 3.5816745144553497e-06, - "loss": 1.0165, - "step": 1941 - }, - { - "epoch": 0.23351169362111465, - "grad_norm": 2.047166759867476, - "learning_rate": 3.5811976432097424e-06, - "loss": 0.9574, - "step": 1942 - }, - { - "epoch": 0.23363193651175373, - "grad_norm": 1.7794898187100083, - "learning_rate": 3.58072053209771e-06, - "loss": 1.0453, - "step": 1943 - }, - { - "epoch": 0.23375217940239285, - "grad_norm": 2.0927928046088224, - "learning_rate": 3.5802431811916296e-06, - "loss": 0.9943, - "step": 1944 - }, - { - "epoch": 0.23387242229303193, - "grad_norm": 1.5833783477942958, - "learning_rate": 3.579765590563916e-06, - "loss": 1.0013, - "step": 1945 - }, - { - "epoch": 0.233992665183671, - "grad_norm": 2.5074936138159227, - "learning_rate": 3.579287760287017e-06, - "loss": 1.0144, - "step": 1946 - }, - { - "epoch": 0.2341129080743101, - "grad_norm": 1.58176086375746, - "learning_rate": 3.578809690433421e-06, - "loss": 0.9297, - "step": 1947 - }, - { - "epoch": 0.2342331509649492, - "grad_norm": 2.0004301800622675, - "learning_rate": 3.578331381075651e-06, - "loss": 1.0042, - "step": 1948 - }, - { - "epoch": 0.2343533938555883, - "grad_norm": 1.8660468923190263, - "learning_rate": 3.5778528322862646e-06, - "loss": 0.8955, - "step": 1949 - }, - { - "epoch": 0.23447363674622737, - "grad_norm": 1.4480114455271282, - "learning_rate": 3.5773740441378585e-06, - "loss": 1.0625, - "step": 1950 - }, - { - "epoch": 0.23459387963686648, - "grad_norm": 1.6630903099481937, - "learning_rate": 3.5768950167030633e-06, - "loss": 0.9342, - "step": 1951 - }, - { - "epoch": 0.23471412252750556, - "grad_norm": 1.7545576011832016, - "learning_rate": 3.576415750054548e-06, - "loss": 0.9863, - "step": 1952 - }, - { - "epoch": 0.23483436541814465, - "grad_norm": 1.7163677699168955, - "learning_rate": 3.5759362442650172e-06, - "loss": 1.0506, - "step": 1953 - }, - { - "epoch": 0.23495460830878373, - "grad_norm": 1.8177371819801937, - "learning_rate": 3.5754564994072113e-06, - "loss": 1.0433, - "step": 1954 - }, - { - "epoch": 0.23507485119942284, - "grad_norm": 2.0617762364984347, - "learning_rate": 3.5749765155539067e-06, - "loss": 0.8055, - "step": 1955 - }, - { - "epoch": 0.23519509409006192, - "grad_norm": 2.0988619445427457, - "learning_rate": 3.574496292777917e-06, - "loss": 1.1229, - "step": 1956 - }, - { - "epoch": 0.235315336980701, - "grad_norm": 1.6555280011434994, - "learning_rate": 3.574015831152092e-06, - "loss": 0.909, - "step": 1957 - }, - { - "epoch": 0.23543557987134012, - "grad_norm": 2.3162264311857945, - "learning_rate": 3.573535130749316e-06, - "loss": 1.0423, - "step": 1958 - }, - { - "epoch": 0.2355558227619792, - "grad_norm": 1.7304216985570413, - "learning_rate": 3.5730541916425127e-06, - "loss": 0.9326, - "step": 1959 - }, - { - "epoch": 0.23567606565261828, - "grad_norm": 1.7290536161794667, - "learning_rate": 3.572573013904639e-06, - "loss": 1.052, - "step": 1960 - }, - { - "epoch": 0.2357963085432574, - "grad_norm": 1.8345583016672395, - "learning_rate": 3.572091597608689e-06, - "loss": 1.1237, - "step": 1961 - }, - { - "epoch": 0.23591655143389648, - "grad_norm": 1.8180272242539885, - "learning_rate": 3.571609942827694e-06, - "loss": 0.9316, - "step": 1962 - }, - { - "epoch": 0.23603679432453556, - "grad_norm": 1.5490146827666798, - "learning_rate": 3.57112804963472e-06, - "loss": 1.0691, - "step": 1963 - }, - { - "epoch": 0.23615703721517464, - "grad_norm": 2.001433812217506, - "learning_rate": 3.57064591810287e-06, - "loss": 0.9668, - "step": 1964 - }, - { - "epoch": 0.23627728010581375, - "grad_norm": 1.9840130813844177, - "learning_rate": 3.570163548305284e-06, - "loss": 1.0048, - "step": 1965 - }, - { - "epoch": 0.23639752299645284, - "grad_norm": 2.184605345361979, - "learning_rate": 3.569680940315135e-06, - "loss": 0.9032, - "step": 1966 - }, - { - "epoch": 0.23651776588709192, - "grad_norm": 1.6799546140262207, - "learning_rate": 3.5691980942056356e-06, - "loss": 1.0118, - "step": 1967 - }, - { - "epoch": 0.23663800877773103, - "grad_norm": 1.6273935736154375, - "learning_rate": 3.5687150100500332e-06, - "loss": 0.9896, - "step": 1968 - }, - { - "epoch": 0.2367582516683701, - "grad_norm": 2.742154113044615, - "learning_rate": 3.568231687921611e-06, - "loss": 0.9436, - "step": 1969 - }, - { - "epoch": 0.2368784945590092, - "grad_norm": 1.4847284799619778, - "learning_rate": 3.5677481278936883e-06, - "loss": 1.0073, - "step": 1970 - }, - { - "epoch": 0.23699873744964828, - "grad_norm": 0.9732347170033783, - "learning_rate": 3.5672643300396214e-06, - "loss": 0.7889, - "step": 1971 - }, - { - "epoch": 0.2371189803402874, - "grad_norm": 2.2733903073790604, - "learning_rate": 3.566780294432802e-06, - "loss": 0.8823, - "step": 1972 - }, - { - "epoch": 0.23723922323092647, - "grad_norm": 2.1545143328951504, - "learning_rate": 3.566296021146657e-06, - "loss": 0.9449, - "step": 1973 - }, - { - "epoch": 0.23735946612156555, - "grad_norm": 1.7094730049226652, - "learning_rate": 3.565811510254652e-06, - "loss": 0.934, - "step": 1974 - }, - { - "epoch": 0.23747970901220466, - "grad_norm": 1.028169829252164, - "learning_rate": 3.5653267618302845e-06, - "loss": 0.7892, - "step": 1975 - }, - { - "epoch": 0.23759995190284375, - "grad_norm": 1.6526907337821983, - "learning_rate": 3.564841775947093e-06, - "loss": 1.0607, - "step": 1976 - }, - { - "epoch": 0.23772019479348283, - "grad_norm": 2.1785396712085694, - "learning_rate": 3.5643565526786475e-06, - "loss": 0.9634, - "step": 1977 - }, - { - "epoch": 0.2378404376841219, - "grad_norm": 1.7140182558954757, - "learning_rate": 3.5638710920985574e-06, - "loss": 0.9727, - "step": 1978 - }, - { - "epoch": 0.23796068057476102, - "grad_norm": 1.8311999291354557, - "learning_rate": 3.5633853942804655e-06, - "loss": 1.0213, - "step": 1979 - }, - { - "epoch": 0.2380809234654001, - "grad_norm": 2.008604850723531, - "learning_rate": 3.5628994592980527e-06, - "loss": 0.9706, - "step": 1980 - }, - { - "epoch": 0.2382011663560392, - "grad_norm": 1.7083707743317733, - "learning_rate": 3.562413287225034e-06, - "loss": 0.9066, - "step": 1981 - }, - { - "epoch": 0.2383214092466783, - "grad_norm": 2.2860855690801456, - "learning_rate": 3.5619268781351623e-06, - "loss": 1.0869, - "step": 1982 - }, - { - "epoch": 0.23844165213731738, - "grad_norm": 2.2859820055849953, - "learning_rate": 3.5614402321022256e-06, - "loss": 0.9719, - "step": 1983 - }, - { - "epoch": 0.23856189502795647, - "grad_norm": 1.7068146103324138, - "learning_rate": 3.5609533492000463e-06, - "loss": 1.0671, - "step": 1984 - }, - { - "epoch": 0.23868213791859555, - "grad_norm": 1.9263918142824423, - "learning_rate": 3.560466229502485e-06, - "loss": 0.9872, - "step": 1985 - }, - { - "epoch": 0.23880238080923466, - "grad_norm": 1.8982490477411984, - "learning_rate": 3.5599788730834384e-06, - "loss": 1.0991, - "step": 1986 - }, - { - "epoch": 0.23892262369987374, - "grad_norm": 2.4966341248629473, - "learning_rate": 3.559491280016836e-06, - "loss": 1.0103, - "step": 1987 - }, - { - "epoch": 0.23904286659051283, - "grad_norm": 1.8699706973875647, - "learning_rate": 3.5590034503766465e-06, - "loss": 0.9107, - "step": 1988 - }, - { - "epoch": 0.23916310948115194, - "grad_norm": 2.211680796198878, - "learning_rate": 3.558515384236874e-06, - "loss": 1.0274, - "step": 1989 - }, - { - "epoch": 0.23928335237179102, - "grad_norm": 1.7665410998843378, - "learning_rate": 3.558027081671556e-06, - "loss": 1.0366, - "step": 1990 - }, - { - "epoch": 0.2394035952624301, - "grad_norm": 1.8376215576654453, - "learning_rate": 3.557538542754769e-06, - "loss": 0.8932, - "step": 1991 - }, - { - "epoch": 0.2395238381530692, - "grad_norm": 1.7033738820623865, - "learning_rate": 3.557049767560623e-06, - "loss": 0.8701, - "step": 1992 - }, - { - "epoch": 0.2396440810437083, - "grad_norm": 1.8599570509351129, - "learning_rate": 3.5565607561632655e-06, - "loss": 1.0584, - "step": 1993 - }, - { - "epoch": 0.23976432393434738, - "grad_norm": 2.0597490402239815, - "learning_rate": 3.5560715086368787e-06, - "loss": 0.9942, - "step": 1994 - }, - { - "epoch": 0.23988456682498646, - "grad_norm": 2.8654720153240842, - "learning_rate": 3.5555820250556816e-06, - "loss": 1.031, - "step": 1995 - }, - { - "epoch": 0.24000480971562557, - "grad_norm": 2.0168870626821183, - "learning_rate": 3.5550923054939278e-06, - "loss": 0.8929, - "step": 1996 - }, - { - "epoch": 0.24012505260626466, - "grad_norm": 1.696589397751916, - "learning_rate": 3.5546023500259083e-06, - "loss": 0.9424, - "step": 1997 - }, - { - "epoch": 0.24024529549690374, - "grad_norm": 3.931472160721083, - "learning_rate": 3.5541121587259477e-06, - "loss": 1.0076, - "step": 1998 - }, - { - "epoch": 0.24036553838754285, - "grad_norm": 1.3331015305883245, - "learning_rate": 3.553621731668408e-06, - "loss": 0.7851, - "step": 1999 - }, - { - "epoch": 0.24048578127818193, - "grad_norm": 1.648975447250433, - "learning_rate": 3.553131068927688e-06, - "loss": 1.0382, - "step": 2000 - }, - { - "epoch": 0.24060602416882101, - "grad_norm": 1.5415571878412457, - "learning_rate": 3.552640170578219e-06, - "loss": 1.005, - "step": 2001 - }, - { - "epoch": 0.2407262670594601, - "grad_norm": 1.9455648358163216, - "learning_rate": 3.5521490366944703e-06, - "loss": 0.9809, - "step": 2002 - }, - { - "epoch": 0.2408465099500992, - "grad_norm": 2.3689069388948236, - "learning_rate": 3.5516576673509474e-06, - "loss": 1.0046, - "step": 2003 - }, - { - "epoch": 0.2409667528407383, - "grad_norm": 1.6064500617331015, - "learning_rate": 3.5511660626221896e-06, - "loss": 1.061, - "step": 2004 - }, - { - "epoch": 0.24108699573137737, - "grad_norm": 2.076533231392621, - "learning_rate": 3.5506742225827744e-06, - "loss": 1.0883, - "step": 2005 - }, - { - "epoch": 0.24120723862201648, - "grad_norm": 2.135590361687402, - "learning_rate": 3.5501821473073116e-06, - "loss": 1.1091, - "step": 2006 - }, - { - "epoch": 0.24132748151265557, - "grad_norm": 1.8042028527745877, - "learning_rate": 3.54968983687045e-06, - "loss": 1.0674, - "step": 2007 - }, - { - "epoch": 0.24144772440329465, - "grad_norm": 2.706415367800443, - "learning_rate": 3.549197291346872e-06, - "loss": 1.1019, - "step": 2008 - }, - { - "epoch": 0.24156796729393373, - "grad_norm": 1.9826280007524548, - "learning_rate": 3.548704510811297e-06, - "loss": 0.9978, - "step": 2009 - }, - { - "epoch": 0.24168821018457284, - "grad_norm": 2.1403499899972895, - "learning_rate": 3.5482114953384787e-06, - "loss": 0.9389, - "step": 2010 - }, - { - "epoch": 0.24180845307521193, - "grad_norm": 1.7167309779864166, - "learning_rate": 3.5477182450032077e-06, - "loss": 1.0391, - "step": 2011 - }, - { - "epoch": 0.241928695965851, - "grad_norm": 1.940364258675315, - "learning_rate": 3.5472247598803097e-06, - "loss": 1.035, - "step": 2012 - }, - { - "epoch": 0.24204893885649012, - "grad_norm": 1.9040184717364224, - "learning_rate": 3.546731040044645e-06, - "loss": 1.0614, - "step": 2013 - }, - { - "epoch": 0.2421691817471292, - "grad_norm": 1.6543566840483404, - "learning_rate": 3.546237085571112e-06, - "loss": 0.9542, - "step": 2014 - }, - { - "epoch": 0.24228942463776829, - "grad_norm": 2.0005574641556576, - "learning_rate": 3.5457428965346425e-06, - "loss": 0.9306, - "step": 2015 - }, - { - "epoch": 0.2424096675284074, - "grad_norm": 1.4832679234059436, - "learning_rate": 3.545248473010205e-06, - "loss": 0.9467, - "step": 2016 - }, - { - "epoch": 0.24252991041904648, - "grad_norm": 1.6616032297058014, - "learning_rate": 3.544753815072802e-06, - "loss": 1.0715, - "step": 2017 - }, - { - "epoch": 0.24265015330968556, - "grad_norm": 1.7310476604888472, - "learning_rate": 3.544258922797474e-06, - "loss": 1.0895, - "step": 2018 - }, - { - "epoch": 0.24277039620032465, - "grad_norm": 1.387302157756739, - "learning_rate": 3.543763796259295e-06, - "loss": 0.9806, - "step": 2019 - }, - { - "epoch": 0.24289063909096376, - "grad_norm": 1.6415102795439664, - "learning_rate": 3.5432684355333754e-06, - "loss": 1.109, - "step": 2020 - }, - { - "epoch": 0.24301088198160284, - "grad_norm": 1.8122189402529427, - "learning_rate": 3.5427728406948613e-06, - "loss": 0.9615, - "step": 2021 - }, - { - "epoch": 0.24313112487224192, - "grad_norm": 0.8837143671495057, - "learning_rate": 3.542277011818934e-06, - "loss": 0.7991, - "step": 2022 - }, - { - "epoch": 0.24325136776288103, - "grad_norm": 2.443846827000983, - "learning_rate": 3.5417809489808104e-06, - "loss": 0.9433, - "step": 2023 - }, - { - "epoch": 0.24337161065352012, - "grad_norm": 1.752128260607617, - "learning_rate": 3.5412846522557422e-06, - "loss": 0.9236, - "step": 2024 - }, - { - "epoch": 0.2434918535441592, - "grad_norm": 2.056739623611481, - "learning_rate": 3.540788121719018e-06, - "loss": 0.9397, - "step": 2025 - }, - { - "epoch": 0.24361209643479828, - "grad_norm": 1.807356804838247, - "learning_rate": 3.5402913574459604e-06, - "loss": 1.033, - "step": 2026 - }, - { - "epoch": 0.2437323393254374, - "grad_norm": 1.6636362022270963, - "learning_rate": 3.5397943595119297e-06, - "loss": 1.0523, - "step": 2027 - }, - { - "epoch": 0.24385258221607647, - "grad_norm": 2.2628255451839436, - "learning_rate": 3.5392971279923177e-06, - "loss": 0.9768, - "step": 2028 - }, - { - "epoch": 0.24397282510671556, - "grad_norm": 1.8941755767133839, - "learning_rate": 3.5387996629625557e-06, - "loss": 1.0283, - "step": 2029 - }, - { - "epoch": 0.24409306799735467, - "grad_norm": 1.04127931386435, - "learning_rate": 3.5383019644981083e-06, - "loss": 0.7947, - "step": 2030 - }, - { - "epoch": 0.24421331088799375, - "grad_norm": 2.002194154697658, - "learning_rate": 3.5378040326744763e-06, - "loss": 0.9302, - "step": 2031 - }, - { - "epoch": 0.24433355377863283, - "grad_norm": 1.9761785202080528, - "learning_rate": 3.5373058675671946e-06, - "loss": 1.0644, - "step": 2032 - }, - { - "epoch": 0.24445379666927192, - "grad_norm": 1.8382936333767324, - "learning_rate": 3.536807469251836e-06, - "loss": 0.9362, - "step": 2033 - }, - { - "epoch": 0.24457403955991103, - "grad_norm": 1.6113223543930761, - "learning_rate": 3.5363088378040055e-06, - "loss": 1.0202, - "step": 2034 - }, - { - "epoch": 0.2446942824505501, - "grad_norm": 0.918358967406565, - "learning_rate": 3.5358099732993463e-06, - "loss": 0.8775, - "step": 2035 - }, - { - "epoch": 0.2448145253411892, - "grad_norm": 1.895542024581773, - "learning_rate": 3.535310875813535e-06, - "loss": 1.1025, - "step": 2036 - }, - { - "epoch": 0.2449347682318283, - "grad_norm": 1.8871363460040251, - "learning_rate": 3.5348115454222843e-06, - "loss": 1.016, - "step": 2037 - }, - { - "epoch": 0.2450550111224674, - "grad_norm": 4.361211793119358, - "learning_rate": 3.5343119822013425e-06, - "loss": 1.0583, - "step": 2038 - }, - { - "epoch": 0.24517525401310647, - "grad_norm": 2.2226699801860628, - "learning_rate": 3.533812186226493e-06, - "loss": 0.9736, - "step": 2039 - }, - { - "epoch": 0.24529549690374555, - "grad_norm": 1.7750776327644653, - "learning_rate": 3.5333121575735545e-06, - "loss": 0.9644, - "step": 2040 - }, - { - "epoch": 0.24541573979438466, - "grad_norm": 2.2245009130440447, - "learning_rate": 3.532811896318381e-06, - "loss": 0.9579, - "step": 2041 - }, - { - "epoch": 0.24553598268502375, - "grad_norm": 2.1440196721256246, - "learning_rate": 3.5323114025368615e-06, - "loss": 1.0191, - "step": 2042 - }, - { - "epoch": 0.24565622557566283, - "grad_norm": 2.000450798728208, - "learning_rate": 3.53181067630492e-06, - "loss": 1.0156, - "step": 2043 - }, - { - "epoch": 0.24577646846630194, - "grad_norm": 1.5896706826677958, - "learning_rate": 3.5313097176985175e-06, - "loss": 0.9608, - "step": 2044 - }, - { - "epoch": 0.24589671135694102, - "grad_norm": 1.7211321881435258, - "learning_rate": 3.5308085267936482e-06, - "loss": 1.0032, - "step": 2045 - }, - { - "epoch": 0.2460169542475801, - "grad_norm": 1.6779870230499179, - "learning_rate": 3.530307103666342e-06, - "loss": 1.099, - "step": 2046 - }, - { - "epoch": 0.24613719713821922, - "grad_norm": 1.592400821250353, - "learning_rate": 3.5298054483926658e-06, - "loss": 1.0017, - "step": 2047 - }, - { - "epoch": 0.2462574400288583, - "grad_norm": 1.96842378608225, - "learning_rate": 3.5293035610487187e-06, - "loss": 1.0383, - "step": 2048 - }, - { - "epoch": 0.24637768291949738, - "grad_norm": 0.8830141495492233, - "learning_rate": 3.5288014417106374e-06, - "loss": 0.8405, - "step": 2049 - }, - { - "epoch": 0.24649792581013646, - "grad_norm": 1.8218414696148304, - "learning_rate": 3.528299090454593e-06, - "loss": 0.9545, - "step": 2050 - }, - { - "epoch": 0.24661816870077558, - "grad_norm": 2.060597900915905, - "learning_rate": 3.527796507356792e-06, - "loss": 1.0277, - "step": 2051 - }, - { - "epoch": 0.24673841159141466, - "grad_norm": 2.4208820891818914, - "learning_rate": 3.527293692493475e-06, - "loss": 1.1089, - "step": 2052 - }, - { - "epoch": 0.24685865448205374, - "grad_norm": 2.2416472631510467, - "learning_rate": 3.52679064594092e-06, - "loss": 0.9421, - "step": 2053 - }, - { - "epoch": 0.24697889737269285, - "grad_norm": 1.918167694402204, - "learning_rate": 3.5262873677754375e-06, - "loss": 0.9511, - "step": 2054 - }, - { - "epoch": 0.24709914026333193, - "grad_norm": 1.6087376341681665, - "learning_rate": 3.5257838580733745e-06, - "loss": 1.0068, - "step": 2055 - }, - { - "epoch": 0.24721938315397102, - "grad_norm": 1.7466805174086701, - "learning_rate": 3.5252801169111138e-06, - "loss": 1.074, - "step": 2056 - }, - { - "epoch": 0.2473396260446101, - "grad_norm": 1.8302597232639009, - "learning_rate": 3.524776144365072e-06, - "loss": 0.994, - "step": 2057 - }, - { - "epoch": 0.2474598689352492, - "grad_norm": 1.4113490166742493, - "learning_rate": 3.5242719405117016e-06, - "loss": 0.9941, - "step": 2058 - }, - { - "epoch": 0.2475801118258883, - "grad_norm": 4.345611392992504, - "learning_rate": 3.5237675054274893e-06, - "loss": 0.9492, - "step": 2059 - }, - { - "epoch": 0.24770035471652738, - "grad_norm": 1.925596278594557, - "learning_rate": 3.5232628391889584e-06, - "loss": 1.0017, - "step": 2060 - }, - { - "epoch": 0.2478205976071665, - "grad_norm": 2.0360318751210134, - "learning_rate": 3.522757941872666e-06, - "loss": 0.8402, - "step": 2061 - }, - { - "epoch": 0.24794084049780557, - "grad_norm": 1.4669422631268683, - "learning_rate": 3.5222528135552042e-06, - "loss": 1.0314, - "step": 2062 - }, - { - "epoch": 0.24806108338844465, - "grad_norm": 1.5905928662844633, - "learning_rate": 3.521747454313201e-06, - "loss": 1.0073, - "step": 2063 - }, - { - "epoch": 0.24818132627908374, - "grad_norm": 2.19265662439648, - "learning_rate": 3.521241864223319e-06, - "loss": 0.8727, - "step": 2064 - }, - { - "epoch": 0.24830156916972285, - "grad_norm": 1.0131227626693469, - "learning_rate": 3.5207360433622552e-06, - "loss": 0.8277, - "step": 2065 - }, - { - "epoch": 0.24842181206036193, - "grad_norm": 1.51075512831804, - "learning_rate": 3.5202299918067437e-06, - "loss": 0.9414, - "step": 2066 - }, - { - "epoch": 0.248542054951001, - "grad_norm": 2.202717844898851, - "learning_rate": 3.519723709633551e-06, - "loss": 0.8985, - "step": 2067 - }, - { - "epoch": 0.24866229784164012, - "grad_norm": 1.7998701260655239, - "learning_rate": 3.519217196919479e-06, - "loss": 1.0267, - "step": 2068 - }, - { - "epoch": 0.2487825407322792, - "grad_norm": 1.8536764061094628, - "learning_rate": 3.518710453741367e-06, - "loss": 0.9295, - "step": 2069 - }, - { - "epoch": 0.2489027836229183, - "grad_norm": 1.8468974775311666, - "learning_rate": 3.518203480176086e-06, - "loss": 0.8755, - "step": 2070 - }, - { - "epoch": 0.2490230265135574, - "grad_norm": 1.4919877642623267, - "learning_rate": 3.517696276300545e-06, - "loss": 1.0015, - "step": 2071 - }, - { - "epoch": 0.24914326940419648, - "grad_norm": 2.05052593276753, - "learning_rate": 3.517188842191685e-06, - "loss": 0.9008, - "step": 2072 - }, - { - "epoch": 0.24926351229483557, - "grad_norm": 1.5667174831562927, - "learning_rate": 3.5166811779264837e-06, - "loss": 0.9438, - "step": 2073 - }, - { - "epoch": 0.24938375518547465, - "grad_norm": 1.7236512094007597, - "learning_rate": 3.5161732835819545e-06, - "loss": 0.9809, - "step": 2074 - }, - { - "epoch": 0.24950399807611376, - "grad_norm": 1.6229426491664825, - "learning_rate": 3.515665159235143e-06, - "loss": 1.0346, - "step": 2075 - }, - { - "epoch": 0.24962424096675284, - "grad_norm": 1.575779900387142, - "learning_rate": 3.5151568049631318e-06, - "loss": 0.9527, - "step": 2076 - }, - { - "epoch": 0.24974448385739192, - "grad_norm": 1.449856599754291, - "learning_rate": 3.5146482208430385e-06, - "loss": 0.9928, - "step": 2077 - }, - { - "epoch": 0.24986472674803104, - "grad_norm": 1.6954207144096969, - "learning_rate": 3.514139406952014e-06, - "loss": 0.8821, - "step": 2078 - }, - { - "epoch": 0.24998496963867012, - "grad_norm": 1.8418810148629574, - "learning_rate": 3.5136303633672454e-06, - "loss": 1.0376, - "step": 2079 - }, - { - "epoch": 0.25010521252930923, - "grad_norm": 1.4908023816657994, - "learning_rate": 3.5131210901659544e-06, - "loss": 0.9507, - "step": 2080 - }, - { - "epoch": 0.2502254554199483, - "grad_norm": 2.0335171298564054, - "learning_rate": 3.5126115874253967e-06, - "loss": 1.0194, - "step": 2081 - }, - { - "epoch": 0.2503456983105874, - "grad_norm": 1.7939546820531382, - "learning_rate": 3.5121018552228644e-06, - "loss": 1.0095, - "step": 2082 - }, - { - "epoch": 0.2504659412012265, - "grad_norm": 1.7826243669907194, - "learning_rate": 3.5115918936356827e-06, - "loss": 0.9621, - "step": 2083 - }, - { - "epoch": 0.25058618409186556, - "grad_norm": 1.8623868767095733, - "learning_rate": 3.5110817027412123e-06, - "loss": 0.9947, - "step": 2084 - }, - { - "epoch": 0.25070642698250467, - "grad_norm": 2.0262584684690355, - "learning_rate": 3.5105712826168493e-06, - "loss": 0.8916, - "step": 2085 - }, - { - "epoch": 0.2508266698731437, - "grad_norm": 1.6468770383644882, - "learning_rate": 3.5100606333400235e-06, - "loss": 0.9004, - "step": 2086 - }, - { - "epoch": 0.25094691276378284, - "grad_norm": 1.8798160372677737, - "learning_rate": 3.5095497549882006e-06, - "loss": 0.9753, - "step": 2087 - }, - { - "epoch": 0.25106715565442195, - "grad_norm": 1.6926939117599797, - "learning_rate": 3.50903864763888e-06, - "loss": 0.9206, - "step": 2088 - }, - { - "epoch": 0.251187398545061, - "grad_norm": 2.2814799153724863, - "learning_rate": 3.5085273113695965e-06, - "loss": 0.9722, - "step": 2089 - }, - { - "epoch": 0.2513076414357001, - "grad_norm": 1.6437716870635941, - "learning_rate": 3.508015746257919e-06, - "loss": 0.9837, - "step": 2090 - }, - { - "epoch": 0.2514278843263392, - "grad_norm": 1.8000950364349528, - "learning_rate": 3.5075039523814518e-06, - "loss": 1.0334, - "step": 2091 - }, - { - "epoch": 0.2515481272169783, - "grad_norm": 1.7992172234578847, - "learning_rate": 3.506991929817834e-06, - "loss": 1.0214, - "step": 2092 - }, - { - "epoch": 0.2516683701076174, - "grad_norm": 1.7380726206876211, - "learning_rate": 3.506479678644738e-06, - "loss": 1.0207, - "step": 2093 - }, - { - "epoch": 0.2517886129982565, - "grad_norm": 2.55338984671751, - "learning_rate": 3.505967198939873e-06, - "loss": 0.9462, - "step": 2094 - }, - { - "epoch": 0.25190885588889556, - "grad_norm": 1.867781795908635, - "learning_rate": 3.5054544907809813e-06, - "loss": 0.9806, - "step": 2095 - }, - { - "epoch": 0.25202909877953467, - "grad_norm": 1.6802956123489552, - "learning_rate": 3.50494155424584e-06, - "loss": 0.9963, - "step": 2096 - }, - { - "epoch": 0.2521493416701738, - "grad_norm": 1.6639898579820254, - "learning_rate": 3.504428389412262e-06, - "loss": 1.0342, - "step": 2097 - }, - { - "epoch": 0.25226958456081283, - "grad_norm": 1.999143417030271, - "learning_rate": 3.5039149963580927e-06, - "loss": 0.9328, - "step": 2098 - }, - { - "epoch": 0.25238982745145194, - "grad_norm": 2.174347382776061, - "learning_rate": 3.503401375161215e-06, - "loss": 0.8944, - "step": 2099 - }, - { - "epoch": 0.252510070342091, - "grad_norm": 1.5600588994033346, - "learning_rate": 3.502887525899544e-06, - "loss": 1.0391, - "step": 2100 - }, - { - "epoch": 0.2526303132327301, - "grad_norm": 2.0825285183532634, - "learning_rate": 3.50237344865103e-06, - "loss": 1.0227, - "step": 2101 - }, - { - "epoch": 0.2527505561233692, - "grad_norm": 2.011836550328286, - "learning_rate": 3.501859143493658e-06, - "loss": 0.974, - "step": 2102 - }, - { - "epoch": 0.2528707990140083, - "grad_norm": 1.0814929270308018, - "learning_rate": 3.5013446105054488e-06, - "loss": 0.8435, - "step": 2103 - }, - { - "epoch": 0.2529910419046474, - "grad_norm": 1.7541428981931586, - "learning_rate": 3.5008298497644555e-06, - "loss": 0.9522, - "step": 2104 - }, - { - "epoch": 0.2531112847952865, - "grad_norm": 1.4978327989864164, - "learning_rate": 3.500314861348767e-06, - "loss": 1.0766, - "step": 2105 - }, - { - "epoch": 0.25323152768592555, - "grad_norm": 1.7798406814684038, - "learning_rate": 3.499799645336507e-06, - "loss": 0.9754, - "step": 2106 - }, - { - "epoch": 0.25335177057656466, - "grad_norm": 1.3620006153564632, - "learning_rate": 3.4992842018058336e-06, - "loss": 1.0684, - "step": 2107 - }, - { - "epoch": 0.25347201346720377, - "grad_norm": 2.276536435283722, - "learning_rate": 3.4987685308349384e-06, - "loss": 1.0772, - "step": 2108 - }, - { - "epoch": 0.2535922563578428, - "grad_norm": 2.222729619697428, - "learning_rate": 3.4982526325020497e-06, - "loss": 0.8214, - "step": 2109 - }, - { - "epoch": 0.25371249924848194, - "grad_norm": 1.9619665599310294, - "learning_rate": 3.4977365068854273e-06, - "loss": 1.0221, - "step": 2110 - }, - { - "epoch": 0.25383274213912105, - "grad_norm": 1.6132876047374087, - "learning_rate": 3.4972201540633676e-06, - "loss": 0.924, - "step": 2111 - }, - { - "epoch": 0.2539529850297601, - "grad_norm": 1.6705049266542724, - "learning_rate": 3.4967035741142008e-06, - "loss": 1.0516, - "step": 2112 - }, - { - "epoch": 0.2540732279203992, - "grad_norm": 1.7906648881074467, - "learning_rate": 3.4961867671162917e-06, - "loss": 1.0174, - "step": 2113 - }, - { - "epoch": 0.2541934708110383, - "grad_norm": 2.3307632756356877, - "learning_rate": 3.4956697331480402e-06, - "loss": 0.9852, - "step": 2114 - }, - { - "epoch": 0.2543137137016774, - "grad_norm": 1.5072248034969327, - "learning_rate": 3.495152472287879e-06, - "loss": 1.0033, - "step": 2115 - }, - { - "epoch": 0.2544339565923165, - "grad_norm": 1.854797306040078, - "learning_rate": 3.4946349846142766e-06, - "loss": 0.9368, - "step": 2116 - }, - { - "epoch": 0.25455419948295555, - "grad_norm": 1.8787828432665414, - "learning_rate": 3.4941172702057353e-06, - "loss": 0.956, - "step": 2117 - }, - { - "epoch": 0.25467444237359466, - "grad_norm": 2.0720898178840863, - "learning_rate": 3.4935993291407924e-06, - "loss": 1.0034, - "step": 2118 - }, - { - "epoch": 0.25479468526423377, - "grad_norm": 2.4748032154497044, - "learning_rate": 3.4930811614980183e-06, - "loss": 0.9086, - "step": 2119 - }, - { - "epoch": 0.2549149281548728, - "grad_norm": 1.4589896042409634, - "learning_rate": 3.4925627673560198e-06, - "loss": 0.9938, - "step": 2120 - }, - { - "epoch": 0.25503517104551193, - "grad_norm": 1.4720145934822395, - "learning_rate": 3.4920441467934357e-06, - "loss": 1.0851, - "step": 2121 - }, - { - "epoch": 0.25515541393615104, - "grad_norm": 1.9182799294686175, - "learning_rate": 3.491525299888941e-06, - "loss": 1.0304, - "step": 2122 - }, - { - "epoch": 0.2552756568267901, - "grad_norm": 1.0352389320605448, - "learning_rate": 3.491006226721244e-06, - "loss": 0.8805, - "step": 2123 - }, - { - "epoch": 0.2553958997174292, - "grad_norm": 1.983842733291311, - "learning_rate": 3.4904869273690882e-06, - "loss": 0.9721, - "step": 2124 - }, - { - "epoch": 0.2555161426080683, - "grad_norm": 1.7075581381242815, - "learning_rate": 3.489967401911251e-06, - "loss": 1.0876, - "step": 2125 - }, - { - "epoch": 0.2556363854987074, - "grad_norm": 1.481349310785054, - "learning_rate": 3.4894476504265428e-06, - "loss": 0.8958, - "step": 2126 - }, - { - "epoch": 0.2557566283893465, - "grad_norm": 0.8541187800188329, - "learning_rate": 3.4889276729938104e-06, - "loss": 0.7695, - "step": 2127 - }, - { - "epoch": 0.2558768712799856, - "grad_norm": 2.116168575804665, - "learning_rate": 3.488407469691934e-06, - "loss": 1.0076, - "step": 2128 - }, - { - "epoch": 0.25599711417062465, - "grad_norm": 2.5049237402236297, - "learning_rate": 3.487887040599828e-06, - "loss": 1.0125, - "step": 2129 - }, - { - "epoch": 0.25611735706126376, - "grad_norm": 1.9788050538704272, - "learning_rate": 3.4873663857964407e-06, - "loss": 0.9563, - "step": 2130 - }, - { - "epoch": 0.2562375999519028, - "grad_norm": 1.6907164684661216, - "learning_rate": 3.4868455053607556e-06, - "loss": 0.87, - "step": 2131 - }, - { - "epoch": 0.2563578428425419, - "grad_norm": 2.0770719914970033, - "learning_rate": 3.486324399371789e-06, - "loss": 0.9184, - "step": 2132 - }, - { - "epoch": 0.25647808573318104, - "grad_norm": 1.6789100444567986, - "learning_rate": 3.485803067908593e-06, - "loss": 0.9931, - "step": 2133 - }, - { - "epoch": 0.2565983286238201, - "grad_norm": 1.7805786875536251, - "learning_rate": 3.485281511050253e-06, - "loss": 0.9937, - "step": 2134 - }, - { - "epoch": 0.2567185715144592, - "grad_norm": 2.2486769707077334, - "learning_rate": 3.484759728875889e-06, - "loss": 1.1008, - "step": 2135 - }, - { - "epoch": 0.2568388144050983, - "grad_norm": 1.5940804980304633, - "learning_rate": 3.4842377214646543e-06, - "loss": 1.0112, - "step": 2136 - }, - { - "epoch": 0.25695905729573737, - "grad_norm": 1.5627650226901673, - "learning_rate": 3.483715488895737e-06, - "loss": 0.8708, - "step": 2137 - }, - { - "epoch": 0.2570793001863765, - "grad_norm": 1.809053630322781, - "learning_rate": 3.48319303124836e-06, - "loss": 0.9913, - "step": 2138 - }, - { - "epoch": 0.2571995430770156, - "grad_norm": 1.9373258081702631, - "learning_rate": 3.4826703486017798e-06, - "loss": 0.8756, - "step": 2139 - }, - { - "epoch": 0.25731978596765465, - "grad_norm": 1.3752953472250966, - "learning_rate": 3.4821474410352867e-06, - "loss": 0.967, - "step": 2140 - }, - { - "epoch": 0.25744002885829376, - "grad_norm": 1.0140201506860278, - "learning_rate": 3.481624308628205e-06, - "loss": 0.8666, - "step": 2141 - }, - { - "epoch": 0.25756027174893287, - "grad_norm": 2.2936636344595063, - "learning_rate": 3.481100951459893e-06, - "loss": 1.2042, - "step": 2142 - }, - { - "epoch": 0.2576805146395719, - "grad_norm": 1.5034985274474808, - "learning_rate": 3.4805773696097453e-06, - "loss": 0.9899, - "step": 2143 - }, - { - "epoch": 0.25780075753021103, - "grad_norm": 1.9536170644802158, - "learning_rate": 3.4800535631571874e-06, - "loss": 1.0716, - "step": 2144 - }, - { - "epoch": 0.25792100042085014, - "grad_norm": 2.1092389098171225, - "learning_rate": 3.4795295321816804e-06, - "loss": 0.9766, - "step": 2145 - }, - { - "epoch": 0.2580412433114892, - "grad_norm": 1.788009333571239, - "learning_rate": 3.47900527676272e-06, - "loss": 1.1188, - "step": 2146 - }, - { - "epoch": 0.2581614862021283, - "grad_norm": 1.6988530609553274, - "learning_rate": 3.478480796979835e-06, - "loss": 1.0864, - "step": 2147 - }, - { - "epoch": 0.25828172909276736, - "grad_norm": 2.1643402251915203, - "learning_rate": 3.4779560929125894e-06, - "loss": 0.9738, - "step": 2148 - }, - { - "epoch": 0.2584019719834065, - "grad_norm": 0.7591161231695472, - "learning_rate": 3.4774311646405783e-06, - "loss": 0.7799, - "step": 2149 - }, - { - "epoch": 0.2585222148740456, - "grad_norm": 2.448623122413426, - "learning_rate": 3.476906012243435e-06, - "loss": 1.0365, - "step": 2150 - }, - { - "epoch": 0.25864245776468464, - "grad_norm": 1.5131876903280301, - "learning_rate": 3.476380635800824e-06, - "loss": 1.0174, - "step": 2151 - }, - { - "epoch": 0.25876270065532375, - "grad_norm": 2.1322708280433695, - "learning_rate": 3.475855035392444e-06, - "loss": 1.065, - "step": 2152 - }, - { - "epoch": 0.25888294354596286, - "grad_norm": 1.8876935442794356, - "learning_rate": 3.475329211098029e-06, - "loss": 0.9048, - "step": 2153 - }, - { - "epoch": 0.2590031864366019, - "grad_norm": 1.4808510158487034, - "learning_rate": 3.4748031629973453e-06, - "loss": 1.0215, - "step": 2154 - }, - { - "epoch": 0.25912342932724103, - "grad_norm": 0.9199067132757325, - "learning_rate": 3.4742768911701944e-06, - "loss": 0.7711, - "step": 2155 - }, - { - "epoch": 0.25924367221788014, - "grad_norm": 2.387746536547929, - "learning_rate": 3.4737503956964113e-06, - "loss": 0.908, - "step": 2156 - }, - { - "epoch": 0.2593639151085192, - "grad_norm": 2.228037349052294, - "learning_rate": 3.473223676655865e-06, - "loss": 0.881, - "step": 2157 - }, - { - "epoch": 0.2594841579991583, - "grad_norm": 2.796223058319243, - "learning_rate": 3.472696734128459e-06, - "loss": 1.0049, - "step": 2158 - }, - { - "epoch": 0.2596044008897974, - "grad_norm": 1.8909538550513585, - "learning_rate": 3.4721695681941286e-06, - "loss": 0.9574, - "step": 2159 - }, - { - "epoch": 0.25972464378043647, - "grad_norm": 2.1182667138853843, - "learning_rate": 3.471642178932845e-06, - "loss": 1.032, - "step": 2160 - }, - { - "epoch": 0.2598448866710756, - "grad_norm": 1.8401012200005227, - "learning_rate": 3.471114566424613e-06, - "loss": 1.0938, - "step": 2161 - }, - { - "epoch": 0.25996512956171464, - "grad_norm": 1.885788966842743, - "learning_rate": 3.4705867307494715e-06, - "loss": 0.9617, - "step": 2162 - }, - { - "epoch": 0.26008537245235375, - "grad_norm": 2.2605516126816068, - "learning_rate": 3.470058671987492e-06, - "loss": 1.0399, - "step": 2163 - }, - { - "epoch": 0.26020561534299286, - "grad_norm": 1.585403578007023, - "learning_rate": 3.4695303902187805e-06, - "loss": 1.0378, - "step": 2164 - }, - { - "epoch": 0.2603258582336319, - "grad_norm": 2.0683299810898284, - "learning_rate": 3.469001885523478e-06, - "loss": 0.9863, - "step": 2165 - }, - { - "epoch": 0.260446101124271, - "grad_norm": 1.5471601127122157, - "learning_rate": 3.4684731579817568e-06, - "loss": 1.0104, - "step": 2166 - }, - { - "epoch": 0.26056634401491013, - "grad_norm": 1.4039827507459235, - "learning_rate": 3.4679442076738247e-06, - "loss": 0.9704, - "step": 2167 - }, - { - "epoch": 0.2606865869055492, - "grad_norm": 2.310900573229101, - "learning_rate": 3.4674150346799245e-06, - "loss": 1.0374, - "step": 2168 - }, - { - "epoch": 0.2608068297961883, - "grad_norm": 2.1184198938187633, - "learning_rate": 3.4668856390803295e-06, - "loss": 0.9981, - "step": 2169 - }, - { - "epoch": 0.2609270726868274, - "grad_norm": 2.033134173452589, - "learning_rate": 3.4663560209553495e-06, - "loss": 1.095, - "step": 2170 - }, - { - "epoch": 0.26104731557746647, - "grad_norm": 1.6449029280477907, - "learning_rate": 3.4658261803853267e-06, - "loss": 0.9867, - "step": 2171 - }, - { - "epoch": 0.2611675584681056, - "grad_norm": 1.9923265595614796, - "learning_rate": 3.4652961174506383e-06, - "loss": 1.0135, - "step": 2172 - }, - { - "epoch": 0.2612878013587447, - "grad_norm": 1.1425938792650518, - "learning_rate": 3.464765832231694e-06, - "loss": 0.8026, - "step": 2173 - }, - { - "epoch": 0.26140804424938374, - "grad_norm": 1.5937773758276126, - "learning_rate": 3.4642353248089373e-06, - "loss": 0.9063, - "step": 2174 - }, - { - "epoch": 0.26152828714002285, - "grad_norm": 2.446806624859093, - "learning_rate": 3.463704595262846e-06, - "loss": 1.0071, - "step": 2175 - }, - { - "epoch": 0.26164853003066196, - "grad_norm": 1.700104020246109, - "learning_rate": 3.463173643673931e-06, - "loss": 0.8985, - "step": 2176 - }, - { - "epoch": 0.261768772921301, - "grad_norm": 1.2248473111186162, - "learning_rate": 3.4626424701227387e-06, - "loss": 0.861, - "step": 2177 - }, - { - "epoch": 0.26188901581194013, - "grad_norm": 1.1022366564317851, - "learning_rate": 3.4621110746898452e-06, - "loss": 0.8149, - "step": 2178 - }, - { - "epoch": 0.2620092587025792, - "grad_norm": 1.4503793516165282, - "learning_rate": 3.4615794574558654e-06, - "loss": 0.9451, - "step": 2179 - }, - { - "epoch": 0.2621295015932183, - "grad_norm": 2.0942014520652674, - "learning_rate": 3.4610476185014436e-06, - "loss": 1.0486, - "step": 2180 - }, - { - "epoch": 0.2622497444838574, - "grad_norm": 1.5063067413659386, - "learning_rate": 3.4605155579072597e-06, - "loss": 0.997, - "step": 2181 - }, - { - "epoch": 0.26236998737449646, - "grad_norm": 1.7195148500165651, - "learning_rate": 3.459983275754027e-06, - "loss": 0.9139, - "step": 2182 - }, - { - "epoch": 0.26249023026513557, - "grad_norm": 3.006757912981661, - "learning_rate": 3.4594507721224918e-06, - "loss": 0.9991, - "step": 2183 - }, - { - "epoch": 0.2626104731557747, - "grad_norm": 3.237207029777011, - "learning_rate": 3.4589180470934353e-06, - "loss": 1.0227, - "step": 2184 - }, - { - "epoch": 0.26273071604641374, - "grad_norm": 1.8012875984921701, - "learning_rate": 3.4583851007476713e-06, - "loss": 0.9691, - "step": 2185 - }, - { - "epoch": 0.26285095893705285, - "grad_norm": 1.9486820804683676, - "learning_rate": 3.4578519331660464e-06, - "loss": 0.8933, - "step": 2186 - }, - { - "epoch": 0.26297120182769196, - "grad_norm": 1.9978959221371213, - "learning_rate": 3.4573185444294426e-06, - "loss": 1.0207, - "step": 2187 - }, - { - "epoch": 0.263091444718331, - "grad_norm": 1.8492646585945602, - "learning_rate": 3.456784934618774e-06, - "loss": 0.9869, - "step": 2188 - }, - { - "epoch": 0.2632116876089701, - "grad_norm": 1.8783797638758402, - "learning_rate": 3.4562511038149897e-06, - "loss": 0.9966, - "step": 2189 - }, - { - "epoch": 0.26333193049960923, - "grad_norm": 0.947811606457368, - "learning_rate": 3.4557170520990705e-06, - "loss": 0.7973, - "step": 2190 - }, - { - "epoch": 0.2634521733902483, - "grad_norm": 1.3714822506759128, - "learning_rate": 3.4551827795520324e-06, - "loss": 1.0648, - "step": 2191 - }, - { - "epoch": 0.2635724162808874, - "grad_norm": 1.580537670252564, - "learning_rate": 3.4546482862549226e-06, - "loss": 1.0408, - "step": 2192 - }, - { - "epoch": 0.2636926591715265, - "grad_norm": 2.414033068628743, - "learning_rate": 3.4541135722888253e-06, - "loss": 0.9867, - "step": 2193 - }, - { - "epoch": 0.26381290206216557, - "grad_norm": 1.7806225390967907, - "learning_rate": 3.453578637734854e-06, - "loss": 1.0013, - "step": 2194 - }, - { - "epoch": 0.2639331449528047, - "grad_norm": 1.7165920235167749, - "learning_rate": 3.4530434826741605e-06, - "loss": 0.9899, - "step": 2195 - }, - { - "epoch": 0.26405338784344373, - "grad_norm": 1.6096448209181635, - "learning_rate": 3.452508107187926e-06, - "loss": 0.8943, - "step": 2196 - }, - { - "epoch": 0.26417363073408284, - "grad_norm": 1.5794553531424023, - "learning_rate": 3.451972511357366e-06, - "loss": 0.9679, - "step": 2197 - }, - { - "epoch": 0.26429387362472195, - "grad_norm": 1.5948868978693251, - "learning_rate": 3.45143669526373e-06, - "loss": 1.055, - "step": 2198 - }, - { - "epoch": 0.264414116515361, - "grad_norm": 0.8925622984570276, - "learning_rate": 3.450900658988302e-06, - "loss": 0.8307, - "step": 2199 - }, - { - "epoch": 0.2645343594060001, - "grad_norm": 2.5658396845961247, - "learning_rate": 3.450364402612397e-06, - "loss": 0.9742, - "step": 2200 - }, - { - "epoch": 0.26465460229663923, - "grad_norm": 1.7044134603314876, - "learning_rate": 3.449827926217366e-06, - "loss": 1.0274, - "step": 2201 - }, - { - "epoch": 0.2647748451872783, - "grad_norm": 1.9153243284751806, - "learning_rate": 3.449291229884591e-06, - "loss": 1.0077, - "step": 2202 - }, - { - "epoch": 0.2648950880779174, - "grad_norm": 1.7374972857996098, - "learning_rate": 3.4487543136954887e-06, - "loss": 1.0663, - "step": 2203 - }, - { - "epoch": 0.2650153309685565, - "grad_norm": 1.7142702851282328, - "learning_rate": 3.448217177731509e-06, - "loss": 1.1125, - "step": 2204 - }, - { - "epoch": 0.26513557385919556, - "grad_norm": 1.7949385450251132, - "learning_rate": 3.4476798220741348e-06, - "loss": 0.9756, - "step": 2205 - }, - { - "epoch": 0.26525581674983467, - "grad_norm": 1.5200428236399997, - "learning_rate": 3.4471422468048826e-06, - "loss": 0.9807, - "step": 2206 - }, - { - "epoch": 0.2653760596404738, - "grad_norm": 2.2885008243278433, - "learning_rate": 3.4466044520053022e-06, - "loss": 0.9304, - "step": 2207 - }, - { - "epoch": 0.26549630253111284, - "grad_norm": 1.874021529156467, - "learning_rate": 3.446066437756977e-06, - "loss": 0.8073, - "step": 2208 - }, - { - "epoch": 0.26561654542175195, - "grad_norm": 1.997037183118393, - "learning_rate": 3.4455282041415224e-06, - "loss": 0.954, - "step": 2209 - }, - { - "epoch": 0.265736788312391, - "grad_norm": 2.1042738657419755, - "learning_rate": 3.4449897512405894e-06, - "loss": 1.0714, - "step": 2210 - }, - { - "epoch": 0.2658570312030301, - "grad_norm": 1.9117719373954607, - "learning_rate": 3.444451079135859e-06, - "loss": 0.9504, - "step": 2211 - }, - { - "epoch": 0.2659772740936692, - "grad_norm": 1.7226977700140915, - "learning_rate": 3.4439121879090493e-06, - "loss": 0.9359, - "step": 2212 - }, - { - "epoch": 0.2660975169843083, - "grad_norm": 2.1260566328370167, - "learning_rate": 3.4433730776419082e-06, - "loss": 1.0275, - "step": 2213 - }, - { - "epoch": 0.2662177598749474, - "grad_norm": 2.8244676915537736, - "learning_rate": 3.4428337484162183e-06, - "loss": 1.0047, - "step": 2214 - }, - { - "epoch": 0.2663380027655865, - "grad_norm": 1.790087943234337, - "learning_rate": 3.442294200313797e-06, - "loss": 1.047, - "step": 2215 - }, - { - "epoch": 0.26645824565622556, - "grad_norm": 0.9744762566728773, - "learning_rate": 3.4417544334164916e-06, - "loss": 0.777, - "step": 2216 - }, - { - "epoch": 0.26657848854686467, - "grad_norm": 1.738741421599518, - "learning_rate": 3.4412144478061854e-06, - "loss": 0.9796, - "step": 2217 - }, - { - "epoch": 0.2666987314375038, - "grad_norm": 1.7260031506099656, - "learning_rate": 3.4406742435647925e-06, - "loss": 0.9493, - "step": 2218 - }, - { - "epoch": 0.26681897432814283, - "grad_norm": 2.03473798147557, - "learning_rate": 3.440133820774263e-06, - "loss": 0.9955, - "step": 2219 - }, - { - "epoch": 0.26693921721878194, - "grad_norm": 2.0899268017959947, - "learning_rate": 3.439593179516578e-06, - "loss": 1.0211, - "step": 2220 - }, - { - "epoch": 0.26705946010942105, - "grad_norm": 1.682529405107353, - "learning_rate": 3.4390523198737524e-06, - "loss": 1.0106, - "step": 2221 - }, - { - "epoch": 0.2671797030000601, - "grad_norm": 1.5294802812724955, - "learning_rate": 3.4385112419278333e-06, - "loss": 0.9307, - "step": 2222 - }, - { - "epoch": 0.2672999458906992, - "grad_norm": 1.0196353084617866, - "learning_rate": 3.4379699457609033e-06, - "loss": 0.8915, - "step": 2223 - }, - { - "epoch": 0.26742018878133833, - "grad_norm": 3.3281452017157567, - "learning_rate": 3.4374284314550755e-06, - "loss": 1.0949, - "step": 2224 - }, - { - "epoch": 0.2675404316719774, - "grad_norm": 1.7264926257601563, - "learning_rate": 3.436886699092498e-06, - "loss": 1.0154, - "step": 2225 - }, - { - "epoch": 0.2676606745626165, - "grad_norm": 2.2220312221884666, - "learning_rate": 3.4363447487553502e-06, - "loss": 0.9089, - "step": 2226 - }, - { - "epoch": 0.26778091745325555, - "grad_norm": 1.8565592241731879, - "learning_rate": 3.4358025805258455e-06, - "loss": 0.9944, - "step": 2227 - }, - { - "epoch": 0.26790116034389466, - "grad_norm": 6.006758380377421, - "learning_rate": 3.435260194486232e-06, - "loss": 1.0379, - "step": 2228 - }, - { - "epoch": 0.2680214032345338, - "grad_norm": 2.0766894896566015, - "learning_rate": 3.4347175907187875e-06, - "loss": 1.0196, - "step": 2229 - }, - { - "epoch": 0.26814164612517283, - "grad_norm": 1.7118153867338226, - "learning_rate": 3.4341747693058254e-06, - "loss": 1.0757, - "step": 2230 - }, - { - "epoch": 0.26826188901581194, - "grad_norm": 1.6786703889309127, - "learning_rate": 3.4336317303296916e-06, - "loss": 0.9652, - "step": 2231 - }, - { - "epoch": 0.26838213190645105, - "grad_norm": 2.088516641087956, - "learning_rate": 3.4330884738727635e-06, - "loss": 0.9584, - "step": 2232 - }, - { - "epoch": 0.2685023747970901, - "grad_norm": 1.809067073044604, - "learning_rate": 3.4325450000174535e-06, - "loss": 0.9104, - "step": 2233 - }, - { - "epoch": 0.2686226176877292, - "grad_norm": 1.686306566741686, - "learning_rate": 3.4320013088462067e-06, - "loss": 0.9361, - "step": 2234 - }, - { - "epoch": 0.2687428605783683, - "grad_norm": 1.7912775178913247, - "learning_rate": 3.431457400441499e-06, - "loss": 1.0201, - "step": 2235 - }, - { - "epoch": 0.2688631034690074, - "grad_norm": 1.117270246522453, - "learning_rate": 3.4309132748858424e-06, - "loss": 0.8844, - "step": 2236 - }, - { - "epoch": 0.2689833463596465, - "grad_norm": 1.5464823831466872, - "learning_rate": 3.430368932261779e-06, - "loss": 1.0392, - "step": 2237 - }, - { - "epoch": 0.2691035892502856, - "grad_norm": 1.6949233210501005, - "learning_rate": 3.429824372651886e-06, - "loss": 0.947, - "step": 2238 - }, - { - "epoch": 0.26922383214092466, - "grad_norm": 2.425228823847857, - "learning_rate": 3.4292795961387732e-06, - "loss": 1.0373, - "step": 2239 - }, - { - "epoch": 0.26934407503156377, - "grad_norm": 1.931296477020895, - "learning_rate": 3.4287346028050818e-06, - "loss": 1.0751, - "step": 2240 - }, - { - "epoch": 0.2694643179222028, - "grad_norm": 1.5521664522516925, - "learning_rate": 3.4281893927334866e-06, - "loss": 0.9952, - "step": 2241 - }, - { - "epoch": 0.26958456081284193, - "grad_norm": 1.8005092854047193, - "learning_rate": 3.4276439660066963e-06, - "loss": 0.9499, - "step": 2242 - }, - { - "epoch": 0.26970480370348104, - "grad_norm": 1.9854574780374237, - "learning_rate": 3.427098322707452e-06, - "loss": 1.0404, - "step": 2243 - }, - { - "epoch": 0.2698250465941201, - "grad_norm": 1.8248877921947866, - "learning_rate": 3.426552462918526e-06, - "loss": 1.0955, - "step": 2244 - }, - { - "epoch": 0.2699452894847592, - "grad_norm": 2.1755832328611766, - "learning_rate": 3.426006386722726e-06, - "loss": 0.9402, - "step": 2245 - }, - { - "epoch": 0.2700655323753983, - "grad_norm": 1.7690891277746574, - "learning_rate": 3.4254600942028914e-06, - "loss": 1.1205, - "step": 2246 - }, - { - "epoch": 0.2701857752660374, - "grad_norm": 2.1641230936208924, - "learning_rate": 3.424913585441893e-06, - "loss": 1.0113, - "step": 2247 - }, - { - "epoch": 0.2703060181566765, - "grad_norm": 2.2639764510099347, - "learning_rate": 3.4243668605226374e-06, - "loss": 1.0786, - "step": 2248 - }, - { - "epoch": 0.2704262610473156, - "grad_norm": 1.9418045341541703, - "learning_rate": 3.423819919528061e-06, - "loss": 1.0289, - "step": 2249 - }, - { - "epoch": 0.27054650393795465, - "grad_norm": 2.475451102713659, - "learning_rate": 3.4232727625411355e-06, - "loss": 0.9861, - "step": 2250 - }, - { - "epoch": 0.27066674682859376, - "grad_norm": 1.5221155935997572, - "learning_rate": 3.4227253896448626e-06, - "loss": 1.0585, - "step": 2251 - }, - { - "epoch": 0.2707869897192329, - "grad_norm": 2.0139902272067816, - "learning_rate": 3.42217780092228e-06, - "loss": 1.0057, - "step": 2252 - }, - { - "epoch": 0.27090723260987193, - "grad_norm": 1.068874813621843, - "learning_rate": 3.421629996456456e-06, - "loss": 0.8322, - "step": 2253 - }, - { - "epoch": 0.27102747550051104, - "grad_norm": 1.8100477927837306, - "learning_rate": 3.421081976330491e-06, - "loss": 1.0155, - "step": 2254 - }, - { - "epoch": 0.27114771839115015, - "grad_norm": 1.7824322976286877, - "learning_rate": 3.4205337406275207e-06, - "loss": 1.0733, - "step": 2255 - }, - { - "epoch": 0.2712679612817892, - "grad_norm": 2.027534255409844, - "learning_rate": 3.4199852894307114e-06, - "loss": 0.9581, - "step": 2256 - }, - { - "epoch": 0.2713882041724283, - "grad_norm": 1.790605371177898, - "learning_rate": 3.419436622823262e-06, - "loss": 0.9877, - "step": 2257 - }, - { - "epoch": 0.27150844706306737, - "grad_norm": 1.680438703182356, - "learning_rate": 3.4188877408884063e-06, - "loss": 0.9405, - "step": 2258 - }, - { - "epoch": 0.2716286899537065, - "grad_norm": 2.1942076159561945, - "learning_rate": 3.4183386437094088e-06, - "loss": 0.8625, - "step": 2259 - }, - { - "epoch": 0.2717489328443456, - "grad_norm": 2.193357961897085, - "learning_rate": 3.417789331369565e-06, - "loss": 1.0156, - "step": 2260 - }, - { - "epoch": 0.27186917573498465, - "grad_norm": 1.8542856399713292, - "learning_rate": 3.4172398039522088e-06, - "loss": 1.1065, - "step": 2261 - }, - { - "epoch": 0.27198941862562376, - "grad_norm": 1.4976998813822424, - "learning_rate": 3.4166900615407e-06, - "loss": 0.9887, - "step": 2262 - }, - { - "epoch": 0.27210966151626287, - "grad_norm": 1.6498990923919208, - "learning_rate": 3.416140104218436e-06, - "loss": 0.9422, - "step": 2263 - }, - { - "epoch": 0.2722299044069019, - "grad_norm": 0.9778912469079135, - "learning_rate": 3.4155899320688437e-06, - "loss": 0.9401, - "step": 2264 - }, - { - "epoch": 0.27235014729754103, - "grad_norm": 1.9793145241636523, - "learning_rate": 3.415039545175384e-06, - "loss": 0.9452, - "step": 2265 - }, - { - "epoch": 0.27247039018818014, - "grad_norm": 2.0703197551096344, - "learning_rate": 3.414488943621551e-06, - "loss": 0.8609, - "step": 2266 - }, - { - "epoch": 0.2725906330788192, - "grad_norm": 1.7560692174864274, - "learning_rate": 3.41393812749087e-06, - "loss": 0.9408, - "step": 2267 - }, - { - "epoch": 0.2727108759694583, - "grad_norm": 2.1991461137122212, - "learning_rate": 3.4133870968668984e-06, - "loss": 0.928, - "step": 2268 - }, - { - "epoch": 0.2728311188600974, - "grad_norm": 1.5410095137364959, - "learning_rate": 3.412835851833229e-06, - "loss": 0.9856, - "step": 2269 - }, - { - "epoch": 0.2729513617507365, - "grad_norm": 1.7774253071947215, - "learning_rate": 3.4122843924734834e-06, - "loss": 0.9769, - "step": 2270 - }, - { - "epoch": 0.2730716046413756, - "grad_norm": 1.777888287977163, - "learning_rate": 3.411732718871319e-06, - "loss": 1.0854, - "step": 2271 - }, - { - "epoch": 0.27319184753201464, - "grad_norm": 1.5032131430940048, - "learning_rate": 3.4111808311104227e-06, - "loss": 0.9805, - "step": 2272 - }, - { - "epoch": 0.27331209042265375, - "grad_norm": 1.9819913349635834, - "learning_rate": 3.410628729274517e-06, - "loss": 0.8944, - "step": 2273 - }, - { - "epoch": 0.27343233331329286, - "grad_norm": 1.7884739736530146, - "learning_rate": 3.4100764134473546e-06, - "loss": 1.0292, - "step": 2274 - }, - { - "epoch": 0.2735525762039319, - "grad_norm": 2.2967615423920447, - "learning_rate": 3.4095238837127215e-06, - "loss": 1.0589, - "step": 2275 - }, - { - "epoch": 0.27367281909457103, - "grad_norm": 4.222981231351459, - "learning_rate": 3.4089711401544355e-06, - "loss": 0.9965, - "step": 2276 - }, - { - "epoch": 0.27379306198521014, - "grad_norm": 1.9448657154710192, - "learning_rate": 3.4084181828563486e-06, - "loss": 0.872, - "step": 2277 - }, - { - "epoch": 0.2739133048758492, - "grad_norm": 1.6321192095451422, - "learning_rate": 3.4078650119023428e-06, - "loss": 0.9038, - "step": 2278 - }, - { - "epoch": 0.2740335477664883, - "grad_norm": 1.8283691243250517, - "learning_rate": 3.4073116273763337e-06, - "loss": 0.9464, - "step": 2279 - }, - { - "epoch": 0.2741537906571274, - "grad_norm": 1.973327522505547, - "learning_rate": 3.40675802936227e-06, - "loss": 1.0118, - "step": 2280 - }, - { - "epoch": 0.27427403354776647, - "grad_norm": 1.820490357710551, - "learning_rate": 3.4062042179441318e-06, - "loss": 0.9148, - "step": 2281 - }, - { - "epoch": 0.2743942764384056, - "grad_norm": 2.0170434499967533, - "learning_rate": 3.4056501932059314e-06, - "loss": 1.01, - "step": 2282 - }, - { - "epoch": 0.2745145193290447, - "grad_norm": 1.0946931314936081, - "learning_rate": 3.405095955231715e-06, - "loss": 0.81, - "step": 2283 - }, - { - "epoch": 0.27463476221968375, - "grad_norm": 1.9474088717528577, - "learning_rate": 3.4045415041055585e-06, - "loss": 1.143, - "step": 2284 - }, - { - "epoch": 0.27475500511032286, - "grad_norm": 2.0584603253725375, - "learning_rate": 3.4039868399115728e-06, - "loss": 1.0015, - "step": 2285 - }, - { - "epoch": 0.27487524800096197, - "grad_norm": 1.7078127261595302, - "learning_rate": 3.4034319627339003e-06, - "loss": 0.9997, - "step": 2286 - }, - { - "epoch": 0.274995490891601, - "grad_norm": 1.9961946167067892, - "learning_rate": 3.402876872656715e-06, - "loss": 0.8921, - "step": 2287 - }, - { - "epoch": 0.27511573378224013, - "grad_norm": 1.708780889113318, - "learning_rate": 3.402321569764223e-06, - "loss": 1.0958, - "step": 2288 - }, - { - "epoch": 0.2752359766728792, - "grad_norm": 1.6274882852627857, - "learning_rate": 3.4017660541406635e-06, - "loss": 1.0306, - "step": 2289 - }, - { - "epoch": 0.2753562195635183, - "grad_norm": 1.5718326327793206, - "learning_rate": 3.4012103258703092e-06, - "loss": 0.9397, - "step": 2290 - }, - { - "epoch": 0.2754764624541574, - "grad_norm": 1.6843238893585508, - "learning_rate": 3.4006543850374616e-06, - "loss": 1.0244, - "step": 2291 - }, - { - "epoch": 0.27559670534479647, - "grad_norm": 2.0815256159661106, - "learning_rate": 3.400098231726458e-06, - "loss": 0.954, - "step": 2292 - }, - { - "epoch": 0.2757169482354356, - "grad_norm": 1.6730804375865393, - "learning_rate": 3.3995418660216657e-06, - "loss": 1.0793, - "step": 2293 - }, - { - "epoch": 0.2758371911260747, - "grad_norm": 1.887356920692969, - "learning_rate": 3.3989852880074848e-06, - "loss": 1.0174, - "step": 2294 - }, - { - "epoch": 0.27595743401671374, - "grad_norm": 1.127168895884123, - "learning_rate": 3.398428497768348e-06, - "loss": 0.8389, - "step": 2295 - }, - { - "epoch": 0.27607767690735285, - "grad_norm": 1.7799363227769889, - "learning_rate": 3.3978714953887205e-06, - "loss": 0.921, - "step": 2296 - }, - { - "epoch": 0.27619791979799196, - "grad_norm": 2.911428780084429, - "learning_rate": 3.397314280953098e-06, - "loss": 1.0662, - "step": 2297 - }, - { - "epoch": 0.276318162688631, - "grad_norm": 1.833344336678979, - "learning_rate": 3.3967568545460108e-06, - "loss": 0.9982, - "step": 2298 - }, - { - "epoch": 0.27643840557927013, - "grad_norm": 1.8302883813150472, - "learning_rate": 3.3961992162520185e-06, - "loss": 1.0026, - "step": 2299 - }, - { - "epoch": 0.27655864846990924, - "grad_norm": 1.841544173411801, - "learning_rate": 3.3956413661557156e-06, - "loss": 0.9284, - "step": 2300 - }, - { - "epoch": 0.2766788913605483, - "grad_norm": 2.2182618799698655, - "learning_rate": 3.3950833043417273e-06, - "loss": 0.8579, - "step": 2301 - }, - { - "epoch": 0.2767991342511874, - "grad_norm": 1.9850354634919751, - "learning_rate": 3.3945250308947105e-06, - "loss": 0.9221, - "step": 2302 - }, - { - "epoch": 0.2769193771418265, - "grad_norm": 1.3539470298863834, - "learning_rate": 3.3939665458993556e-06, - "loss": 0.8979, - "step": 2303 - }, - { - "epoch": 0.27703962003246557, - "grad_norm": 1.902304897655741, - "learning_rate": 3.3934078494403843e-06, - "loss": 0.9647, - "step": 2304 - }, - { - "epoch": 0.2771598629231047, - "grad_norm": 1.8800586489396438, - "learning_rate": 3.3928489416025495e-06, - "loss": 1.0116, - "step": 2305 - }, - { - "epoch": 0.27728010581374374, - "grad_norm": 1.8488190361168682, - "learning_rate": 3.392289822470638e-06, - "loss": 0.9901, - "step": 2306 - }, - { - "epoch": 0.27740034870438285, - "grad_norm": 2.424950007287119, - "learning_rate": 3.3917304921294674e-06, - "loss": 0.9567, - "step": 2307 - }, - { - "epoch": 0.27752059159502196, - "grad_norm": 1.5531413762478408, - "learning_rate": 3.3911709506638876e-06, - "loss": 1.0128, - "step": 2308 - }, - { - "epoch": 0.277640834485661, - "grad_norm": 2.0397961929837827, - "learning_rate": 3.390611198158781e-06, - "loss": 1.0081, - "step": 2309 - }, - { - "epoch": 0.2777610773763001, - "grad_norm": 1.901667104459685, - "learning_rate": 3.3900512346990612e-06, - "loss": 1.103, - "step": 2310 - }, - { - "epoch": 0.27788132026693924, - "grad_norm": 1.649999448024773, - "learning_rate": 3.389491060369674e-06, - "loss": 0.8568, - "step": 2311 - }, - { - "epoch": 0.2780015631575783, - "grad_norm": 2.0723623815845262, - "learning_rate": 3.388930675255598e-06, - "loss": 1.0911, - "step": 2312 - }, - { - "epoch": 0.2781218060482174, - "grad_norm": 2.3605358981404865, - "learning_rate": 3.388370079441843e-06, - "loss": 0.9868, - "step": 2313 - }, - { - "epoch": 0.2782420489388565, - "grad_norm": 1.9339408177706483, - "learning_rate": 3.3878092730134505e-06, - "loss": 1.1308, - "step": 2314 - }, - { - "epoch": 0.27836229182949557, - "grad_norm": 1.537935570526866, - "learning_rate": 3.3872482560554947e-06, - "loss": 1.0069, - "step": 2315 - }, - { - "epoch": 0.2784825347201347, - "grad_norm": 1.088308257611981, - "learning_rate": 3.386687028653082e-06, - "loss": 0.7905, - "step": 2316 - }, - { - "epoch": 0.2786027776107738, - "grad_norm": 2.2709098476386718, - "learning_rate": 3.386125590891349e-06, - "loss": 1.0462, - "step": 2317 - }, - { - "epoch": 0.27872302050141284, - "grad_norm": 2.1464372791125443, - "learning_rate": 3.3855639428554657e-06, - "loss": 1.0303, - "step": 2318 - }, - { - "epoch": 0.27884326339205195, - "grad_norm": 1.708460368883782, - "learning_rate": 3.385002084630635e-06, - "loss": 1.0132, - "step": 2319 - }, - { - "epoch": 0.278963506282691, - "grad_norm": 2.2171089658478382, - "learning_rate": 3.384440016302088e-06, - "loss": 1.0441, - "step": 2320 - }, - { - "epoch": 0.2790837491733301, - "grad_norm": 2.956010004669314, - "learning_rate": 3.3838777379550923e-06, - "loss": 0.8201, - "step": 2321 - }, - { - "epoch": 0.27920399206396923, - "grad_norm": 2.617726922313924, - "learning_rate": 3.383315249674944e-06, - "loss": 0.993, - "step": 2322 - }, - { - "epoch": 0.2793242349546083, - "grad_norm": 1.8592021445060087, - "learning_rate": 3.3827525515469715e-06, - "loss": 1.0633, - "step": 2323 - }, - { - "epoch": 0.2794444778452474, - "grad_norm": 1.8491530696419998, - "learning_rate": 3.3821896436565367e-06, - "loss": 0.9082, - "step": 2324 - }, - { - "epoch": 0.2795647207358865, - "grad_norm": 1.9787205912849959, - "learning_rate": 3.381626526089032e-06, - "loss": 0.9015, - "step": 2325 - }, - { - "epoch": 0.27968496362652556, - "grad_norm": 1.7509299940649286, - "learning_rate": 3.3810631989298815e-06, - "loss": 0.9805, - "step": 2326 - }, - { - "epoch": 0.2798052065171647, - "grad_norm": 3.440851554375819, - "learning_rate": 3.3804996622645423e-06, - "loss": 1.0498, - "step": 2327 - }, - { - "epoch": 0.2799254494078038, - "grad_norm": 1.6213820603231428, - "learning_rate": 3.3799359161785015e-06, - "loss": 1.0866, - "step": 2328 - }, - { - "epoch": 0.28004569229844284, - "grad_norm": 1.4713515920250797, - "learning_rate": 3.3793719607572798e-06, - "loss": 1.0549, - "step": 2329 - }, - { - "epoch": 0.28016593518908195, - "grad_norm": 1.8143944540896084, - "learning_rate": 3.378807796086428e-06, - "loss": 0.9743, - "step": 2330 - }, - { - "epoch": 0.28028617807972106, - "grad_norm": 2.0584814051857756, - "learning_rate": 3.37824342225153e-06, - "loss": 0.9655, - "step": 2331 - }, - { - "epoch": 0.2804064209703601, - "grad_norm": 1.7261901039599188, - "learning_rate": 3.3776788393382006e-06, - "loss": 0.972, - "step": 2332 - }, - { - "epoch": 0.2805266638609992, - "grad_norm": 2.5704041438592014, - "learning_rate": 3.3771140474320872e-06, - "loss": 0.9703, - "step": 2333 - }, - { - "epoch": 0.28064690675163834, - "grad_norm": 1.6993368173555796, - "learning_rate": 3.3765490466188664e-06, - "loss": 0.9918, - "step": 2334 - }, - { - "epoch": 0.2807671496422774, - "grad_norm": 2.3234762592584515, - "learning_rate": 3.3759838369842508e-06, - "loss": 0.9443, - "step": 2335 - }, - { - "epoch": 0.2808873925329165, - "grad_norm": 1.621363458832475, - "learning_rate": 3.375418418613981e-06, - "loss": 0.9306, - "step": 2336 - }, - { - "epoch": 0.28100763542355556, - "grad_norm": 2.102257709308941, - "learning_rate": 3.374852791593831e-06, - "loss": 1.03, - "step": 2337 - }, - { - "epoch": 0.28112787831419467, - "grad_norm": 2.3369284476743313, - "learning_rate": 3.374286956009605e-06, - "loss": 0.7351, - "step": 2338 - }, - { - "epoch": 0.2812481212048338, - "grad_norm": 1.8751673305868453, - "learning_rate": 3.3737209119471405e-06, - "loss": 0.9579, - "step": 2339 - }, - { - "epoch": 0.28136836409547283, - "grad_norm": 2.3126202113893375, - "learning_rate": 3.373154659492306e-06, - "loss": 0.843, - "step": 2340 - }, - { - "epoch": 0.28148860698611194, - "grad_norm": 1.5616230601869145, - "learning_rate": 3.3725881987310016e-06, - "loss": 1.0447, - "step": 2341 - }, - { - "epoch": 0.28160884987675106, - "grad_norm": 1.6884061005486426, - "learning_rate": 3.372021529749159e-06, - "loss": 1.0728, - "step": 2342 - }, - { - "epoch": 0.2817290927673901, - "grad_norm": 1.8514439025753777, - "learning_rate": 3.3714546526327405e-06, - "loss": 1.1207, - "step": 2343 - }, - { - "epoch": 0.2818493356580292, - "grad_norm": 1.9134472539373704, - "learning_rate": 3.3708875674677423e-06, - "loss": 1.0806, - "step": 2344 - }, - { - "epoch": 0.28196957854866833, - "grad_norm": 1.8960770740255106, - "learning_rate": 3.37032027434019e-06, - "loss": 1.0297, - "step": 2345 - }, - { - "epoch": 0.2820898214393074, - "grad_norm": 1.7343131389411224, - "learning_rate": 3.369752773336141e-06, - "loss": 1.0321, - "step": 2346 - }, - { - "epoch": 0.2822100643299465, - "grad_norm": 1.663078626167502, - "learning_rate": 3.3691850645416864e-06, - "loss": 0.9851, - "step": 2347 - }, - { - "epoch": 0.2823303072205856, - "grad_norm": 2.25339093589893, - "learning_rate": 3.368617148042945e-06, - "loss": 1.0376, - "step": 2348 - }, - { - "epoch": 0.28245055011122466, - "grad_norm": 1.620274789604944, - "learning_rate": 3.368049023926071e-06, - "loss": 1.0465, - "step": 2349 - }, - { - "epoch": 0.2825707930018638, - "grad_norm": 1.4412755726036857, - "learning_rate": 3.3674806922772476e-06, - "loss": 1.0319, - "step": 2350 - }, - { - "epoch": 0.28269103589250283, - "grad_norm": 1.5740593386980535, - "learning_rate": 3.3669121531826904e-06, - "loss": 0.9428, - "step": 2351 - }, - { - "epoch": 0.28281127878314194, - "grad_norm": 1.872176651379604, - "learning_rate": 3.366343406728647e-06, - "loss": 1.0258, - "step": 2352 - }, - { - "epoch": 0.28293152167378105, - "grad_norm": 1.5808094722788335, - "learning_rate": 3.3657744530013946e-06, - "loss": 0.8792, - "step": 2353 - }, - { - "epoch": 0.2830517645644201, - "grad_norm": 1.8418698180106035, - "learning_rate": 3.3652052920872437e-06, - "loss": 0.9236, - "step": 2354 - }, - { - "epoch": 0.2831720074550592, - "grad_norm": 4.764639330380464, - "learning_rate": 3.3646359240725355e-06, - "loss": 1.0542, - "step": 2355 - }, - { - "epoch": 0.2832922503456983, - "grad_norm": 1.7718234342307457, - "learning_rate": 3.364066349043643e-06, - "loss": 0.8813, - "step": 2356 - }, - { - "epoch": 0.2834124932363374, - "grad_norm": 1.5375906785726083, - "learning_rate": 3.363496567086969e-06, - "loss": 1.0276, - "step": 2357 - }, - { - "epoch": 0.2835327361269765, - "grad_norm": 2.1244375943206912, - "learning_rate": 3.3629265782889506e-06, - "loss": 0.9583, - "step": 2358 - }, - { - "epoch": 0.2836529790176156, - "grad_norm": 1.701848032580443, - "learning_rate": 3.362356382736054e-06, - "loss": 0.9151, - "step": 2359 - }, - { - "epoch": 0.28377322190825466, - "grad_norm": 1.669444804615582, - "learning_rate": 3.361785980514777e-06, - "loss": 1.11, - "step": 2360 - }, - { - "epoch": 0.28389346479889377, - "grad_norm": 1.723731503349358, - "learning_rate": 3.361215371711649e-06, - "loss": 0.9707, - "step": 2361 - }, - { - "epoch": 0.2840137076895329, - "grad_norm": 1.576150697851848, - "learning_rate": 3.3606445564132326e-06, - "loss": 1.0298, - "step": 2362 - }, - { - "epoch": 0.28413395058017193, - "grad_norm": 1.6719901499800318, - "learning_rate": 3.360073534706118e-06, - "loss": 1.0144, - "step": 2363 - }, - { - "epoch": 0.28425419347081105, - "grad_norm": 1.6845346463615642, - "learning_rate": 3.35950230667693e-06, - "loss": 0.9573, - "step": 2364 - }, - { - "epoch": 0.28437443636145016, - "grad_norm": 2.38274165814262, - "learning_rate": 3.358930872412323e-06, - "loss": 1.0588, - "step": 2365 - }, - { - "epoch": 0.2844946792520892, - "grad_norm": 1.5006019655975642, - "learning_rate": 3.3583592319989825e-06, - "loss": 1.0084, - "step": 2366 - }, - { - "epoch": 0.2846149221427283, - "grad_norm": 2.2277744140451627, - "learning_rate": 3.357787385523627e-06, - "loss": 0.8851, - "step": 2367 - }, - { - "epoch": 0.2847351650333674, - "grad_norm": 1.7495363847514616, - "learning_rate": 3.3572153330730048e-06, - "loss": 1.0379, - "step": 2368 - }, - { - "epoch": 0.2848554079240065, - "grad_norm": 0.8903653619671155, - "learning_rate": 3.3566430747338956e-06, - "loss": 0.8654, - "step": 2369 - }, - { - "epoch": 0.2849756508146456, - "grad_norm": 2.0402557272599893, - "learning_rate": 3.35607061059311e-06, - "loss": 1.0692, - "step": 2370 - }, - { - "epoch": 0.28509589370528465, - "grad_norm": 1.7493564239747732, - "learning_rate": 3.3554979407374917e-06, - "loss": 0.9489, - "step": 2371 - }, - { - "epoch": 0.28521613659592376, - "grad_norm": 1.5037046619407706, - "learning_rate": 3.3549250652539134e-06, - "loss": 0.9359, - "step": 2372 - }, - { - "epoch": 0.2853363794865629, - "grad_norm": 1.6310678314567388, - "learning_rate": 3.3543519842292794e-06, - "loss": 1.0185, - "step": 2373 - }, - { - "epoch": 0.28545662237720193, - "grad_norm": 1.6742579510291933, - "learning_rate": 3.353778697750527e-06, - "loss": 1.0431, - "step": 2374 - }, - { - "epoch": 0.28557686526784104, - "grad_norm": 1.703046604417039, - "learning_rate": 3.353205205904622e-06, - "loss": 1.0909, - "step": 2375 - }, - { - "epoch": 0.28569710815848015, - "grad_norm": 1.6347524991072508, - "learning_rate": 3.3526315087785637e-06, - "loss": 0.929, - "step": 2376 - }, - { - "epoch": 0.2858173510491192, - "grad_norm": 1.5280487696818597, - "learning_rate": 3.3520576064593805e-06, - "loss": 1.0116, - "step": 2377 - }, - { - "epoch": 0.2859375939397583, - "grad_norm": 1.4949337917198797, - "learning_rate": 3.3514834990341337e-06, - "loss": 1.0226, - "step": 2378 - }, - { - "epoch": 0.2860578368303974, - "grad_norm": 2.0446538157416296, - "learning_rate": 3.3509091865899144e-06, - "loss": 1.1285, - "step": 2379 - }, - { - "epoch": 0.2861780797210365, - "grad_norm": 1.7229845075923695, - "learning_rate": 3.350334669213846e-06, - "loss": 0.9069, - "step": 2380 - }, - { - "epoch": 0.2862983226116756, - "grad_norm": 1.7165410498920075, - "learning_rate": 3.3497599469930816e-06, - "loss": 0.968, - "step": 2381 - }, - { - "epoch": 0.28641856550231465, - "grad_norm": 1.9299226687064146, - "learning_rate": 3.349185020014807e-06, - "loss": 1.0365, - "step": 2382 - }, - { - "epoch": 0.28653880839295376, - "grad_norm": 1.7814980718970002, - "learning_rate": 3.348609888366237e-06, - "loss": 0.9489, - "step": 2383 - }, - { - "epoch": 0.28665905128359287, - "grad_norm": 1.9098567922940015, - "learning_rate": 3.348034552134619e-06, - "loss": 0.8318, - "step": 2384 - }, - { - "epoch": 0.2867792941742319, - "grad_norm": 1.7917466330610394, - "learning_rate": 3.3474590114072316e-06, - "loss": 1.0503, - "step": 2385 - }, - { - "epoch": 0.28689953706487104, - "grad_norm": 1.6484358367188365, - "learning_rate": 3.3468832662713836e-06, - "loss": 1.0293, - "step": 2386 - }, - { - "epoch": 0.28701977995551015, - "grad_norm": 1.9392095222650094, - "learning_rate": 3.346307316814415e-06, - "loss": 1.047, - "step": 2387 - }, - { - "epoch": 0.2871400228461492, - "grad_norm": 2.0487532988376835, - "learning_rate": 3.3457311631236965e-06, - "loss": 0.9653, - "step": 2388 - }, - { - "epoch": 0.2872602657367883, - "grad_norm": 1.724231717502105, - "learning_rate": 3.345154805286631e-06, - "loss": 1.0477, - "step": 2389 - }, - { - "epoch": 0.2873805086274274, - "grad_norm": 2.283075621139081, - "learning_rate": 3.344578243390651e-06, - "loss": 0.9579, - "step": 2390 - }, - { - "epoch": 0.2875007515180665, - "grad_norm": 1.9705825027737451, - "learning_rate": 3.3440014775232206e-06, - "loss": 0.9835, - "step": 2391 - }, - { - "epoch": 0.2876209944087056, - "grad_norm": 1.764067106012864, - "learning_rate": 3.343424507771834e-06, - "loss": 0.916, - "step": 2392 - }, - { - "epoch": 0.2877412372993447, - "grad_norm": 1.8395937610070026, - "learning_rate": 3.342847334224018e-06, - "loss": 1.07, - "step": 2393 - }, - { - "epoch": 0.28786148018998375, - "grad_norm": 0.9525740547628208, - "learning_rate": 3.342269956967329e-06, - "loss": 0.8639, - "step": 2394 - }, - { - "epoch": 0.28798172308062286, - "grad_norm": 2.1751850619455206, - "learning_rate": 3.341692376089355e-06, - "loss": 0.9199, - "step": 2395 - }, - { - "epoch": 0.288101965971262, - "grad_norm": 2.4344782561316167, - "learning_rate": 3.3411145916777146e-06, - "loss": 1.03, - "step": 2396 - }, - { - "epoch": 0.28822220886190103, - "grad_norm": 2.3436182620853865, - "learning_rate": 3.3405366038200566e-06, - "loss": 1.1112, - "step": 2397 - }, - { - "epoch": 0.28834245175254014, - "grad_norm": 2.4377348371012064, - "learning_rate": 3.3399584126040617e-06, - "loss": 1.0522, - "step": 2398 - }, - { - "epoch": 0.2884626946431792, - "grad_norm": 1.7700515218014257, - "learning_rate": 3.339380018117441e-06, - "loss": 1.1016, - "step": 2399 - }, - { - "epoch": 0.2885829375338183, - "grad_norm": 2.521063287088141, - "learning_rate": 3.3388014204479366e-06, - "loss": 0.9944, - "step": 2400 - }, - { - "epoch": 0.2887031804244574, - "grad_norm": 1.846654576090999, - "learning_rate": 3.338222619683321e-06, - "loss": 1.1095, - "step": 2401 - }, - { - "epoch": 0.2888234233150965, - "grad_norm": 1.964034048282542, - "learning_rate": 3.337643615911398e-06, - "loss": 0.9481, - "step": 2402 - }, - { - "epoch": 0.2889436662057356, - "grad_norm": 1.8405900806165103, - "learning_rate": 3.3370644092200026e-06, - "loss": 0.9958, - "step": 2403 - }, - { - "epoch": 0.2890639090963747, - "grad_norm": 1.6635460567078044, - "learning_rate": 3.3364849996969985e-06, - "loss": 0.9864, - "step": 2404 - }, - { - "epoch": 0.28918415198701375, - "grad_norm": 1.6983155404164287, - "learning_rate": 3.335905387430283e-06, - "loss": 1.056, - "step": 2405 - }, - { - "epoch": 0.28930439487765286, - "grad_norm": 1.7220785335704936, - "learning_rate": 3.335325572507782e-06, - "loss": 1.0237, - "step": 2406 - }, - { - "epoch": 0.28942463776829197, - "grad_norm": 1.5186454328794672, - "learning_rate": 3.3347455550174537e-06, - "loss": 0.9432, - "step": 2407 - }, - { - "epoch": 0.289544880658931, - "grad_norm": 1.9579541130027198, - "learning_rate": 3.3341653350472864e-06, - "loss": 0.8966, - "step": 2408 - }, - { - "epoch": 0.28966512354957014, - "grad_norm": 2.3399258759976935, - "learning_rate": 3.333584912685298e-06, - "loss": 0.9021, - "step": 2409 - }, - { - "epoch": 0.28978536644020925, - "grad_norm": 0.9074857113766829, - "learning_rate": 3.3330042880195385e-06, - "loss": 0.7847, - "step": 2410 - }, - { - "epoch": 0.2899056093308483, - "grad_norm": 1.601810491947102, - "learning_rate": 3.3324234611380888e-06, - "loss": 0.9864, - "step": 2411 - }, - { - "epoch": 0.2900258522214874, - "grad_norm": 1.4820936641309828, - "learning_rate": 3.3318424321290596e-06, - "loss": 1.0136, - "step": 2412 - }, - { - "epoch": 0.2901460951121265, - "grad_norm": 1.0059313109516501, - "learning_rate": 3.3312612010805917e-06, - "loss": 0.8417, - "step": 2413 - }, - { - "epoch": 0.2902663380027656, - "grad_norm": 1.6203706530414315, - "learning_rate": 3.330679768080858e-06, - "loss": 0.9006, - "step": 2414 - }, - { - "epoch": 0.2903865808934047, - "grad_norm": 2.222639838025688, - "learning_rate": 3.3300981332180627e-06, - "loss": 1.0402, - "step": 2415 - }, - { - "epoch": 0.29050682378404374, - "grad_norm": 1.7167709053514337, - "learning_rate": 3.3295162965804373e-06, - "loss": 1.0062, - "step": 2416 - }, - { - "epoch": 0.29062706667468285, - "grad_norm": 2.0777587567896365, - "learning_rate": 3.328934258256247e-06, - "loss": 0.9742, - "step": 2417 - }, - { - "epoch": 0.29074730956532197, - "grad_norm": 1.874802261098219, - "learning_rate": 3.3283520183337856e-06, - "loss": 0.8751, - "step": 2418 - }, - { - "epoch": 0.290867552455961, - "grad_norm": 2.1959108236442457, - "learning_rate": 3.3277695769013797e-06, - "loss": 0.8987, - "step": 2419 - }, - { - "epoch": 0.29098779534660013, - "grad_norm": 1.9152266718324207, - "learning_rate": 3.327186934047385e-06, - "loss": 0.9769, - "step": 2420 - }, - { - "epoch": 0.29110803823723924, - "grad_norm": 1.7735773779199373, - "learning_rate": 3.3266040898601877e-06, - "loss": 0.862, - "step": 2421 - }, - { - "epoch": 0.2912282811278783, - "grad_norm": 1.6867125757978128, - "learning_rate": 3.3260210444282045e-06, - "loss": 0.9769, - "step": 2422 - }, - { - "epoch": 0.2913485240185174, - "grad_norm": 2.1941018890118196, - "learning_rate": 3.325437797839883e-06, - "loss": 0.9246, - "step": 2423 - }, - { - "epoch": 0.2914687669091565, - "grad_norm": 2.2358197429792286, - "learning_rate": 3.3248543501837015e-06, - "loss": 0.9535, - "step": 2424 - }, - { - "epoch": 0.2915890097997956, - "grad_norm": 1.6679291552951943, - "learning_rate": 3.3242707015481684e-06, - "loss": 0.969, - "step": 2425 - }, - { - "epoch": 0.2917092526904347, - "grad_norm": 1.617065867397099, - "learning_rate": 3.323686852021823e-06, - "loss": 1.0154, - "step": 2426 - }, - { - "epoch": 0.2918294955810738, - "grad_norm": 1.8469449825550313, - "learning_rate": 3.323102801693235e-06, - "loss": 0.9998, - "step": 2427 - }, - { - "epoch": 0.29194973847171285, - "grad_norm": 1.905220629313651, - "learning_rate": 3.322518550651003e-06, - "loss": 0.9961, - "step": 2428 - }, - { - "epoch": 0.29206998136235196, - "grad_norm": 1.6345767478346704, - "learning_rate": 3.3219340989837586e-06, - "loss": 1.0073, - "step": 2429 - }, - { - "epoch": 0.292190224252991, - "grad_norm": 1.7589989621114352, - "learning_rate": 3.3213494467801625e-06, - "loss": 1.0077, - "step": 2430 - }, - { - "epoch": 0.2923104671436301, - "grad_norm": 2.2079123569592123, - "learning_rate": 3.3207645941289063e-06, - "loss": 0.9213, - "step": 2431 - }, - { - "epoch": 0.29243071003426924, - "grad_norm": 1.7327862585366487, - "learning_rate": 3.320179541118711e-06, - "loss": 1.0032, - "step": 2432 - }, - { - "epoch": 0.2925509529249083, - "grad_norm": 1.1917622018857226, - "learning_rate": 3.3195942878383293e-06, - "loss": 0.84, - "step": 2433 - }, - { - "epoch": 0.2926711958155474, - "grad_norm": 1.6473015220227953, - "learning_rate": 3.319008834376543e-06, - "loss": 0.9835, - "step": 2434 - }, - { - "epoch": 0.2927914387061865, - "grad_norm": 2.3030200163103496, - "learning_rate": 3.3184231808221654e-06, - "loss": 1.0846, - "step": 2435 - }, - { - "epoch": 0.29291168159682557, - "grad_norm": 3.253804987216724, - "learning_rate": 3.3178373272640394e-06, - "loss": 0.8281, - "step": 2436 - }, - { - "epoch": 0.2930319244874647, - "grad_norm": 2.0475417354368415, - "learning_rate": 3.3172512737910387e-06, - "loss": 1.0676, - "step": 2437 - }, - { - "epoch": 0.2931521673781038, - "grad_norm": 1.9234482443390888, - "learning_rate": 3.3166650204920674e-06, - "loss": 1.0803, - "step": 2438 - }, - { - "epoch": 0.29327241026874284, - "grad_norm": 1.6373798792220533, - "learning_rate": 3.316078567456059e-06, - "loss": 1.0205, - "step": 2439 - }, - { - "epoch": 0.29339265315938196, - "grad_norm": 1.456929548462361, - "learning_rate": 3.3154919147719786e-06, - "loss": 0.9735, - "step": 2440 - }, - { - "epoch": 0.29351289605002107, - "grad_norm": 1.7564405046089888, - "learning_rate": 3.31490506252882e-06, - "loss": 1.0729, - "step": 2441 - }, - { - "epoch": 0.2936331389406601, - "grad_norm": 1.6337417805471497, - "learning_rate": 3.31431801081561e-06, - "loss": 1.0421, - "step": 2442 - }, - { - "epoch": 0.29375338183129923, - "grad_norm": 1.2084705450266553, - "learning_rate": 3.313730759721402e-06, - "loss": 0.8841, - "step": 2443 - }, - { - "epoch": 0.29387362472193834, - "grad_norm": 2.075920829057116, - "learning_rate": 3.313143309335282e-06, - "loss": 1.0617, - "step": 2444 - }, - { - "epoch": 0.2939938676125774, - "grad_norm": 1.7466002546793573, - "learning_rate": 3.3125556597463665e-06, - "loss": 1.0428, - "step": 2445 - }, - { - "epoch": 0.2941141105032165, - "grad_norm": 1.3960071521920123, - "learning_rate": 3.311967811043801e-06, - "loss": 0.8572, - "step": 2446 - }, - { - "epoch": 0.29423435339385556, - "grad_norm": 1.9724837123841423, - "learning_rate": 3.3113797633167617e-06, - "loss": 1.0196, - "step": 2447 - }, - { - "epoch": 0.2943545962844947, - "grad_norm": 1.9533694915332533, - "learning_rate": 3.310791516654455e-06, - "loss": 0.8897, - "step": 2448 - }, - { - "epoch": 0.2944748391751338, - "grad_norm": 1.7610447306846837, - "learning_rate": 3.3102030711461177e-06, - "loss": 0.9956, - "step": 2449 - }, - { - "epoch": 0.29459508206577284, - "grad_norm": 1.6408140527700945, - "learning_rate": 3.3096144268810156e-06, - "loss": 0.8781, - "step": 2450 - }, - { - "epoch": 0.29471532495641195, - "grad_norm": 1.994520250253495, - "learning_rate": 3.3090255839484462e-06, - "loss": 0.9276, - "step": 2451 - }, - { - "epoch": 0.29483556784705106, - "grad_norm": 1.88279274577941, - "learning_rate": 3.3084365424377366e-06, - "loss": 1.0573, - "step": 2452 - }, - { - "epoch": 0.2949558107376901, - "grad_norm": 0.8528550164460243, - "learning_rate": 3.307847302438245e-06, - "loss": 0.7796, - "step": 2453 - }, - { - "epoch": 0.2950760536283292, - "grad_norm": 2.7256951906455624, - "learning_rate": 3.3072578640393562e-06, - "loss": 0.9813, - "step": 2454 - }, - { - "epoch": 0.29519629651896834, - "grad_norm": 1.7766749873989331, - "learning_rate": 3.3066682273304886e-06, - "loss": 0.9925, - "step": 2455 - }, - { - "epoch": 0.2953165394096074, - "grad_norm": 1.7404119912558782, - "learning_rate": 3.3060783924010904e-06, - "loss": 0.974, - "step": 2456 - }, - { - "epoch": 0.2954367823002465, - "grad_norm": 1.8565439421787793, - "learning_rate": 3.3054883593406387e-06, - "loss": 1.0471, - "step": 2457 - }, - { - "epoch": 0.2955570251908856, - "grad_norm": 2.2804662324813743, - "learning_rate": 3.3048981282386404e-06, - "loss": 0.8486, - "step": 2458 - }, - { - "epoch": 0.29567726808152467, - "grad_norm": 1.8514695928900928, - "learning_rate": 3.304307699184634e-06, - "loss": 1.0281, - "step": 2459 - }, - { - "epoch": 0.2957975109721638, - "grad_norm": 1.6106657019058606, - "learning_rate": 3.3037170722681866e-06, - "loss": 0.9887, - "step": 2460 - }, - { - "epoch": 0.29591775386280283, - "grad_norm": 1.73550034863538, - "learning_rate": 3.3031262475788956e-06, - "loss": 0.8877, - "step": 2461 - }, - { - "epoch": 0.29603799675344195, - "grad_norm": 1.8248175673554408, - "learning_rate": 3.3025352252063897e-06, - "loss": 0.9395, - "step": 2462 - }, - { - "epoch": 0.29615823964408106, - "grad_norm": 1.5305367969225727, - "learning_rate": 3.3019440052403252e-06, - "loss": 0.9526, - "step": 2463 - }, - { - "epoch": 0.2962784825347201, - "grad_norm": 1.5897060025313652, - "learning_rate": 3.30135258777039e-06, - "loss": 0.9087, - "step": 2464 - }, - { - "epoch": 0.2963987254253592, - "grad_norm": 1.9752653788682588, - "learning_rate": 3.3007609728863024e-06, - "loss": 0.9013, - "step": 2465 - }, - { - "epoch": 0.29651896831599833, - "grad_norm": 1.7756157492074738, - "learning_rate": 3.300169160677809e-06, - "loss": 0.936, - "step": 2466 - }, - { - "epoch": 0.2966392112066374, - "grad_norm": 3.1865792534090276, - "learning_rate": 3.2995771512346878e-06, - "loss": 0.9716, - "step": 2467 - }, - { - "epoch": 0.2967594540972765, - "grad_norm": 1.964515431087917, - "learning_rate": 3.298984944646746e-06, - "loss": 0.9308, - "step": 2468 - }, - { - "epoch": 0.2968796969879156, - "grad_norm": 1.8596977146171356, - "learning_rate": 3.298392541003822e-06, - "loss": 1.0097, - "step": 2469 - }, - { - "epoch": 0.29699993987855466, - "grad_norm": 1.6901879272708664, - "learning_rate": 3.2977999403957806e-06, - "loss": 1.0884, - "step": 2470 - }, - { - "epoch": 0.2971201827691938, - "grad_norm": 1.8020569757139984, - "learning_rate": 3.2972071429125207e-06, - "loss": 0.8746, - "step": 2471 - }, - { - "epoch": 0.2972404256598329, - "grad_norm": 1.8676000163349764, - "learning_rate": 3.2966141486439682e-06, - "loss": 1.087, - "step": 2472 - }, - { - "epoch": 0.29736066855047194, - "grad_norm": 2.279238506442555, - "learning_rate": 3.29602095768008e-06, - "loss": 0.8562, - "step": 2473 - }, - { - "epoch": 0.29748091144111105, - "grad_norm": 1.8109895397539282, - "learning_rate": 3.2954275701108437e-06, - "loss": 0.8396, - "step": 2474 - }, - { - "epoch": 0.29760115433175016, - "grad_norm": 1.649244210513369, - "learning_rate": 3.294833986026275e-06, - "loss": 0.8894, - "step": 2475 - }, - { - "epoch": 0.2977213972223892, - "grad_norm": 1.7728780796799157, - "learning_rate": 3.29424020551642e-06, - "loss": 1.0603, - "step": 2476 - }, - { - "epoch": 0.2978416401130283, - "grad_norm": 2.7459315636005095, - "learning_rate": 3.2936462286713546e-06, - "loss": 0.9125, - "step": 2477 - }, - { - "epoch": 0.2979618830036674, - "grad_norm": 1.7647836081366954, - "learning_rate": 3.2930520555811846e-06, - "loss": 0.9766, - "step": 2478 - }, - { - "epoch": 0.2980821258943065, - "grad_norm": 1.7523309986944806, - "learning_rate": 3.292457686336046e-06, - "loss": 0.9981, - "step": 2479 - }, - { - "epoch": 0.2982023687849456, - "grad_norm": 0.9931733647663857, - "learning_rate": 3.291863121026105e-06, - "loss": 0.8459, - "step": 2480 - }, - { - "epoch": 0.29832261167558466, - "grad_norm": 2.186380590395819, - "learning_rate": 3.2912683597415547e-06, - "loss": 0.9671, - "step": 2481 - }, - { - "epoch": 0.29844285456622377, - "grad_norm": 1.7288840036042619, - "learning_rate": 3.2906734025726213e-06, - "loss": 0.9793, - "step": 2482 - }, - { - "epoch": 0.2985630974568629, - "grad_norm": 1.7646714057546482, - "learning_rate": 3.290078249609559e-06, - "loss": 1.082, - "step": 2483 - }, - { - "epoch": 0.29868334034750194, - "grad_norm": 1.992295206187736, - "learning_rate": 3.2894829009426514e-06, - "loss": 1.0811, - "step": 2484 - }, - { - "epoch": 0.29880358323814105, - "grad_norm": 1.9177458530660167, - "learning_rate": 3.288887356662213e-06, - "loss": 0.9717, - "step": 2485 - }, - { - "epoch": 0.29892382612878016, - "grad_norm": 0.9163150257387758, - "learning_rate": 3.288291616858588e-06, - "loss": 0.8096, - "step": 2486 - }, - { - "epoch": 0.2990440690194192, - "grad_norm": 1.6976013905017058, - "learning_rate": 3.287695681622149e-06, - "loss": 0.9672, - "step": 2487 - }, - { - "epoch": 0.2991643119100583, - "grad_norm": 2.2890202124533587, - "learning_rate": 3.2870995510432982e-06, - "loss": 1.0166, - "step": 2488 - }, - { - "epoch": 0.29928455480069743, - "grad_norm": 2.3059056053328737, - "learning_rate": 3.2865032252124697e-06, - "loss": 0.97, - "step": 2489 - }, - { - "epoch": 0.2994047976913365, - "grad_norm": 1.4568003453618268, - "learning_rate": 3.2859067042201243e-06, - "loss": 0.9739, - "step": 2490 - }, - { - "epoch": 0.2995250405819756, - "grad_norm": 1.753304903044057, - "learning_rate": 3.2853099881567544e-06, - "loss": 0.9766, - "step": 2491 - }, - { - "epoch": 0.29964528347261465, - "grad_norm": 1.5855076399913521, - "learning_rate": 3.284713077112881e-06, - "loss": 0.9863, - "step": 2492 - }, - { - "epoch": 0.29976552636325376, - "grad_norm": 2.3156494139338, - "learning_rate": 3.284115971179056e-06, - "loss": 1.0599, - "step": 2493 - }, - { - "epoch": 0.2998857692538929, - "grad_norm": 1.7345438007312681, - "learning_rate": 3.283518670445859e-06, - "loss": 0.9975, - "step": 2494 - }, - { - "epoch": 0.30000601214453193, - "grad_norm": 1.053814222702393, - "learning_rate": 3.2829211750038995e-06, - "loss": 0.7714, - "step": 2495 - }, - { - "epoch": 0.30012625503517104, - "grad_norm": 1.7229546182581676, - "learning_rate": 3.2823234849438183e-06, - "loss": 1.0837, - "step": 2496 - }, - { - "epoch": 0.30024649792581015, - "grad_norm": 1.7395232489102137, - "learning_rate": 3.2817256003562836e-06, - "loss": 0.9484, - "step": 2497 - }, - { - "epoch": 0.3003667408164492, - "grad_norm": 1.7185867232015948, - "learning_rate": 3.281127521331995e-06, - "loss": 0.8664, - "step": 2498 - }, - { - "epoch": 0.3004869837070883, - "grad_norm": 1.0829604803255848, - "learning_rate": 3.2805292479616798e-06, - "loss": 0.8375, - "step": 2499 - }, - { - "epoch": 0.30060722659772743, - "grad_norm": 2.0443692776895253, - "learning_rate": 3.2799307803360955e-06, - "loss": 1.1142, - "step": 2500 - }, - { - "epoch": 0.3007274694883665, - "grad_norm": 2.5439015448790157, - "learning_rate": 3.27933211854603e-06, - "loss": 1.0142, - "step": 2501 - }, - { - "epoch": 0.3008477123790056, - "grad_norm": 1.6255830305725014, - "learning_rate": 3.278733262682299e-06, - "loss": 1.0694, - "step": 2502 - }, - { - "epoch": 0.3009679552696447, - "grad_norm": 2.0247934871910975, - "learning_rate": 3.2781342128357484e-06, - "loss": 1.023, - "step": 2503 - }, - { - "epoch": 0.30108819816028376, - "grad_norm": 2.742961024548096, - "learning_rate": 3.2775349690972547e-06, - "loss": 1.0083, - "step": 2504 - }, - { - "epoch": 0.30120844105092287, - "grad_norm": 1.011487897656271, - "learning_rate": 3.276935531557722e-06, - "loss": 0.7599, - "step": 2505 - }, - { - "epoch": 0.301328683941562, - "grad_norm": 2.0438694910238007, - "learning_rate": 3.2763359003080837e-06, - "loss": 0.9997, - "step": 2506 - }, - { - "epoch": 0.30144892683220104, - "grad_norm": 1.0695222039184986, - "learning_rate": 3.2757360754393047e-06, - "loss": 0.8717, - "step": 2507 - }, - { - "epoch": 0.30156916972284015, - "grad_norm": 2.841944362313219, - "learning_rate": 3.2751360570423767e-06, - "loss": 0.8477, - "step": 2508 - }, - { - "epoch": 0.3016894126134792, - "grad_norm": 1.7283822390713108, - "learning_rate": 3.2745358452083236e-06, - "loss": 0.9619, - "step": 2509 - }, - { - "epoch": 0.3018096555041183, - "grad_norm": 1.3845777720453414, - "learning_rate": 3.2739354400281955e-06, - "loss": 1.0196, - "step": 2510 - }, - { - "epoch": 0.3019298983947574, - "grad_norm": 1.0304795479371416, - "learning_rate": 3.2733348415930744e-06, - "loss": 0.8841, - "step": 2511 - }, - { - "epoch": 0.3020501412853965, - "grad_norm": 1.7832725820562183, - "learning_rate": 3.27273404999407e-06, - "loss": 1.0142, - "step": 2512 - }, - { - "epoch": 0.3021703841760356, - "grad_norm": 0.9262008359991365, - "learning_rate": 3.272133065322322e-06, - "loss": 0.8223, - "step": 2513 - }, - { - "epoch": 0.3022906270666747, - "grad_norm": 1.4868579376383406, - "learning_rate": 3.271531887669e-06, - "loss": 0.9826, - "step": 2514 - }, - { - "epoch": 0.30241086995731375, - "grad_norm": 2.096331243606879, - "learning_rate": 3.2709305171253015e-06, - "loss": 0.839, - "step": 2515 - }, - { - "epoch": 0.30253111284795287, - "grad_norm": 1.9151037381313225, - "learning_rate": 3.2703289537824536e-06, - "loss": 0.9751, - "step": 2516 - }, - { - "epoch": 0.302651355738592, - "grad_norm": 5.659409457290879, - "learning_rate": 3.269727197731714e-06, - "loss": 0.9912, - "step": 2517 - }, - { - "epoch": 0.30277159862923103, - "grad_norm": 1.7121712055743363, - "learning_rate": 3.269125249064367e-06, - "loss": 0.9796, - "step": 2518 - }, - { - "epoch": 0.30289184151987014, - "grad_norm": 1.5214530344172639, - "learning_rate": 3.2685231078717297e-06, - "loss": 1.032, - "step": 2519 - }, - { - "epoch": 0.30301208441050925, - "grad_norm": 1.7597539056690443, - "learning_rate": 3.267920774245145e-06, - "loss": 0.9465, - "step": 2520 - }, - { - "epoch": 0.3031323273011483, - "grad_norm": 1.5827878296747417, - "learning_rate": 3.2673182482759876e-06, - "loss": 1.0438, - "step": 2521 - }, - { - "epoch": 0.3032525701917874, - "grad_norm": 2.6448286556999254, - "learning_rate": 3.266715530055659e-06, - "loss": 0.871, - "step": 2522 - }, - { - "epoch": 0.30337281308242653, - "grad_norm": 1.4663015207278212, - "learning_rate": 3.2661126196755927e-06, - "loss": 1.0054, - "step": 2523 - }, - { - "epoch": 0.3034930559730656, - "grad_norm": 0.9457939435232149, - "learning_rate": 3.265509517227248e-06, - "loss": 0.8018, - "step": 2524 - }, - { - "epoch": 0.3036132988637047, - "grad_norm": 1.7424341548919176, - "learning_rate": 3.264906222802115e-06, - "loss": 1.0067, - "step": 2525 - }, - { - "epoch": 0.30373354175434375, - "grad_norm": 4.336522518247718, - "learning_rate": 3.264302736491715e-06, - "loss": 0.9747, - "step": 2526 - }, - { - "epoch": 0.30385378464498286, - "grad_norm": 1.783818785230683, - "learning_rate": 3.263699058387594e-06, - "loss": 1.0687, - "step": 2527 - }, - { - "epoch": 0.30397402753562197, - "grad_norm": 2.109781861803379, - "learning_rate": 3.2630951885813315e-06, - "loss": 1.1057, - "step": 2528 - }, - { - "epoch": 0.304094270426261, - "grad_norm": 1.969063325199789, - "learning_rate": 3.262491127164533e-06, - "loss": 0.9822, - "step": 2529 - }, - { - "epoch": 0.30421451331690014, - "grad_norm": 2.5690900220233166, - "learning_rate": 3.2618868742288337e-06, - "loss": 1.0022, - "step": 2530 - }, - { - "epoch": 0.30433475620753925, - "grad_norm": 1.9742265852850798, - "learning_rate": 3.261282429865899e-06, - "loss": 0.9248, - "step": 2531 - }, - { - "epoch": 0.3044549990981783, - "grad_norm": 1.5765590641222471, - "learning_rate": 3.2606777941674225e-06, - "loss": 0.9235, - "step": 2532 - }, - { - "epoch": 0.3045752419888174, - "grad_norm": 1.9485838320432838, - "learning_rate": 3.2600729672251276e-06, - "loss": 1.0439, - "step": 2533 - }, - { - "epoch": 0.3046954848794565, - "grad_norm": 1.909505034918289, - "learning_rate": 3.259467949130765e-06, - "loss": 0.8587, - "step": 2534 - }, - { - "epoch": 0.3048157277700956, - "grad_norm": 2.1441027381678444, - "learning_rate": 3.2588627399761164e-06, - "loss": 1.0404, - "step": 2535 - }, - { - "epoch": 0.3049359706607347, - "grad_norm": 1.838674120864161, - "learning_rate": 3.2582573398529903e-06, - "loss": 0.9116, - "step": 2536 - }, - { - "epoch": 0.3050562135513738, - "grad_norm": 1.9243589646754469, - "learning_rate": 3.2576517488532265e-06, - "loss": 0.9455, - "step": 2537 - }, - { - "epoch": 0.30517645644201286, - "grad_norm": 1.975628175910924, - "learning_rate": 3.257045967068692e-06, - "loss": 1.0654, - "step": 2538 - }, - { - "epoch": 0.30529669933265197, - "grad_norm": 2.1614354021319055, - "learning_rate": 3.2564399945912848e-06, - "loss": 1.0239, - "step": 2539 - }, - { - "epoch": 0.305416942223291, - "grad_norm": 2.108217953426056, - "learning_rate": 3.2558338315129287e-06, - "loss": 1.0215, - "step": 2540 - }, - { - "epoch": 0.30553718511393013, - "grad_norm": 1.8999027829646433, - "learning_rate": 3.2552274779255785e-06, - "loss": 0.9592, - "step": 2541 - }, - { - "epoch": 0.30565742800456924, - "grad_norm": 2.0341923457359394, - "learning_rate": 3.2546209339212184e-06, - "loss": 0.969, - "step": 2542 - }, - { - "epoch": 0.3057776708952083, - "grad_norm": 1.3157367305474272, - "learning_rate": 3.25401419959186e-06, - "loss": 0.9683, - "step": 2543 - }, - { - "epoch": 0.3058979137858474, - "grad_norm": 2.127355728367856, - "learning_rate": 3.253407275029545e-06, - "loss": 0.9634, - "step": 2544 - }, - { - "epoch": 0.3060181566764865, - "grad_norm": 1.5886619730768146, - "learning_rate": 3.2528001603263425e-06, - "loss": 1.0015, - "step": 2545 - }, - { - "epoch": 0.3061383995671256, - "grad_norm": 1.7401571031876444, - "learning_rate": 3.2521928555743514e-06, - "loss": 1.0189, - "step": 2546 - }, - { - "epoch": 0.3062586424577647, - "grad_norm": 1.7077345457722206, - "learning_rate": 3.2515853608657e-06, - "loss": 0.8798, - "step": 2547 - }, - { - "epoch": 0.3063788853484038, - "grad_norm": 2.9912189081268274, - "learning_rate": 3.250977676292545e-06, - "loss": 0.9527, - "step": 2548 - }, - { - "epoch": 0.30649912823904285, - "grad_norm": 2.099915607209862, - "learning_rate": 3.2503698019470712e-06, - "loss": 0.994, - "step": 2549 - }, - { - "epoch": 0.30661937112968196, - "grad_norm": 1.7144911974983827, - "learning_rate": 3.249761737921492e-06, - "loss": 0.9789, - "step": 2550 - }, - { - "epoch": 0.30673961402032107, - "grad_norm": 1.7377366488002421, - "learning_rate": 3.249153484308051e-06, - "loss": 0.9448, - "step": 2551 - }, - { - "epoch": 0.3068598569109601, - "grad_norm": 1.6661768988380061, - "learning_rate": 3.2485450411990194e-06, - "loss": 0.9722, - "step": 2552 - }, - { - "epoch": 0.30698009980159924, - "grad_norm": 1.491942300672847, - "learning_rate": 3.2479364086866983e-06, - "loss": 1.0229, - "step": 2553 - }, - { - "epoch": 0.30710034269223835, - "grad_norm": 1.538365627471971, - "learning_rate": 3.247327586863416e-06, - "loss": 1.0092, - "step": 2554 - }, - { - "epoch": 0.3072205855828774, - "grad_norm": 1.80453820870257, - "learning_rate": 3.2467185758215304e-06, - "loss": 0.9764, - "step": 2555 - }, - { - "epoch": 0.3073408284735165, - "grad_norm": 2.3530005433503063, - "learning_rate": 3.246109375653428e-06, - "loss": 1.0529, - "step": 2556 - }, - { - "epoch": 0.30746107136415557, - "grad_norm": 1.9287146459616247, - "learning_rate": 3.2454999864515243e-06, - "loss": 0.9878, - "step": 2557 - }, - { - "epoch": 0.3075813142547947, - "grad_norm": 1.7259078359739743, - "learning_rate": 3.244890408308263e-06, - "loss": 0.8885, - "step": 2558 - }, - { - "epoch": 0.3077015571454338, - "grad_norm": 1.8636320731542897, - "learning_rate": 3.2442806413161165e-06, - "loss": 0.8171, - "step": 2559 - }, - { - "epoch": 0.30782180003607285, - "grad_norm": 1.8339371192702147, - "learning_rate": 3.243670685567586e-06, - "loss": 0.9679, - "step": 2560 - }, - { - "epoch": 0.30794204292671196, - "grad_norm": 2.0506082516317337, - "learning_rate": 3.2430605411552012e-06, - "loss": 1.0063, - "step": 2561 - }, - { - "epoch": 0.30806228581735107, - "grad_norm": 1.0859972710935193, - "learning_rate": 3.2424502081715205e-06, - "loss": 0.9147, - "step": 2562 - }, - { - "epoch": 0.3081825287079901, - "grad_norm": 1.6307758241369283, - "learning_rate": 3.241839686709132e-06, - "loss": 0.981, - "step": 2563 - }, - { - "epoch": 0.30830277159862923, - "grad_norm": 1.930943439435614, - "learning_rate": 3.2412289768606495e-06, - "loss": 1.0186, - "step": 2564 - }, - { - "epoch": 0.30842301448926834, - "grad_norm": 3.697883598645428, - "learning_rate": 3.240618078718718e-06, - "loss": 1.0212, - "step": 2565 - }, - { - "epoch": 0.3085432573799074, - "grad_norm": 1.9023945701751028, - "learning_rate": 3.240006992376011e-06, - "loss": 0.9347, - "step": 2566 - }, - { - "epoch": 0.3086635002705465, - "grad_norm": 2.2998881395753417, - "learning_rate": 3.2393957179252284e-06, - "loss": 0.9595, - "step": 2567 - }, - { - "epoch": 0.3087837431611856, - "grad_norm": 1.7091607051889852, - "learning_rate": 3.2387842554591016e-06, - "loss": 1.0084, - "step": 2568 - }, - { - "epoch": 0.3089039860518247, - "grad_norm": 1.8300119426733663, - "learning_rate": 3.238172605070388e-06, - "loss": 1.0639, - "step": 2569 - }, - { - "epoch": 0.3090242289424638, - "grad_norm": 2.0566647375129707, - "learning_rate": 3.2375607668518745e-06, - "loss": 0.9854, - "step": 2570 - }, - { - "epoch": 0.30914447183310284, - "grad_norm": 1.8427342348522835, - "learning_rate": 3.236948740896377e-06, - "loss": 1.1015, - "step": 2571 - }, - { - "epoch": 0.30926471472374195, - "grad_norm": 1.3754667010254675, - "learning_rate": 3.2363365272967384e-06, - "loss": 1.0406, - "step": 2572 - }, - { - "epoch": 0.30938495761438106, - "grad_norm": 2.001779479892979, - "learning_rate": 3.235724126145832e-06, - "loss": 1.0087, - "step": 2573 - }, - { - "epoch": 0.3095052005050201, - "grad_norm": 1.643696625281723, - "learning_rate": 3.235111537536558e-06, - "loss": 0.9758, - "step": 2574 - }, - { - "epoch": 0.30962544339565923, - "grad_norm": 1.868985192543836, - "learning_rate": 3.2344987615618456e-06, - "loss": 1.0317, - "step": 2575 - }, - { - "epoch": 0.30974568628629834, - "grad_norm": 1.5214947956007476, - "learning_rate": 3.2338857983146533e-06, - "loss": 0.9832, - "step": 2576 - }, - { - "epoch": 0.3098659291769374, - "grad_norm": 1.8223482462806184, - "learning_rate": 3.233272647887966e-06, - "loss": 0.9696, - "step": 2577 - }, - { - "epoch": 0.3099861720675765, - "grad_norm": 1.5325872848819608, - "learning_rate": 3.2326593103747985e-06, - "loss": 1.0969, - "step": 2578 - }, - { - "epoch": 0.3101064149582156, - "grad_norm": 1.736015231557906, - "learning_rate": 3.2320457858681936e-06, - "loss": 1.0462, - "step": 2579 - }, - { - "epoch": 0.31022665784885467, - "grad_norm": 2.2501926339657263, - "learning_rate": 3.2314320744612228e-06, - "loss": 1.0543, - "step": 2580 - }, - { - "epoch": 0.3103469007394938, - "grad_norm": 1.4953939927873694, - "learning_rate": 3.2308181762469854e-06, - "loss": 0.9668, - "step": 2581 - }, - { - "epoch": 0.3104671436301329, - "grad_norm": 1.678724926884155, - "learning_rate": 3.230204091318609e-06, - "loss": 0.9873, - "step": 2582 - }, - { - "epoch": 0.31058738652077195, - "grad_norm": 2.321586901117621, - "learning_rate": 3.2295898197692503e-06, - "loss": 1.0477, - "step": 2583 - }, - { - "epoch": 0.31070762941141106, - "grad_norm": 1.5327822745451058, - "learning_rate": 3.228975361692094e-06, - "loss": 0.9885, - "step": 2584 - }, - { - "epoch": 0.31082787230205017, - "grad_norm": 2.0320632480329808, - "learning_rate": 3.228360717180352e-06, - "loss": 1.004, - "step": 2585 - }, - { - "epoch": 0.3109481151926892, - "grad_norm": 0.9216220638032284, - "learning_rate": 3.227745886327266e-06, - "loss": 0.8384, - "step": 2586 - }, - { - "epoch": 0.31106835808332833, - "grad_norm": 0.8390412155662645, - "learning_rate": 3.227130869226105e-06, - "loss": 0.7837, - "step": 2587 - }, - { - "epoch": 0.3111886009739674, - "grad_norm": 2.2269717191794114, - "learning_rate": 3.226515665970167e-06, - "loss": 1.0186, - "step": 2588 - }, - { - "epoch": 0.3113088438646065, - "grad_norm": 2.070124203738169, - "learning_rate": 3.225900276652777e-06, - "loss": 1.0599, - "step": 2589 - }, - { - "epoch": 0.3114290867552456, - "grad_norm": 1.4283158022510578, - "learning_rate": 3.2252847013672906e-06, - "loss": 0.955, - "step": 2590 - }, - { - "epoch": 0.31154932964588467, - "grad_norm": 2.285182333251247, - "learning_rate": 3.224668940207089e-06, - "loss": 0.9647, - "step": 2591 - }, - { - "epoch": 0.3116695725365238, - "grad_norm": 1.6708842398989525, - "learning_rate": 3.2240529932655828e-06, - "loss": 1.0681, - "step": 2592 - }, - { - "epoch": 0.3117898154271629, - "grad_norm": 2.405443427908747, - "learning_rate": 3.223436860636211e-06, - "loss": 1.0943, - "step": 2593 - }, - { - "epoch": 0.31191005831780194, - "grad_norm": 1.7228682683799217, - "learning_rate": 3.2228205424124403e-06, - "loss": 0.9376, - "step": 2594 - }, - { - "epoch": 0.31203030120844105, - "grad_norm": 2.0779987956567676, - "learning_rate": 3.222204038687765e-06, - "loss": 0.9421, - "step": 2595 - }, - { - "epoch": 0.31215054409908016, - "grad_norm": 1.5736362335558025, - "learning_rate": 3.221587349555709e-06, - "loss": 1.0808, - "step": 2596 - }, - { - "epoch": 0.3122707869897192, - "grad_norm": 1.4466661042916982, - "learning_rate": 3.2209704751098236e-06, - "loss": 0.887, - "step": 2597 - }, - { - "epoch": 0.31239102988035833, - "grad_norm": 2.1339707356156192, - "learning_rate": 3.2203534154436875e-06, - "loss": 1.0315, - "step": 2598 - }, - { - "epoch": 0.31251127277099744, - "grad_norm": 1.9257401467786455, - "learning_rate": 3.2197361706509084e-06, - "loss": 0.962, - "step": 2599 - }, - { - "epoch": 0.3126315156616365, - "grad_norm": 2.7982236711266606, - "learning_rate": 3.2191187408251228e-06, - "loss": 1.0507, - "step": 2600 - }, - { - "epoch": 0.3127517585522756, - "grad_norm": 1.9341925416349983, - "learning_rate": 3.218501126059993e-06, - "loss": 0.9793, - "step": 2601 - }, - { - "epoch": 0.31287200144291466, - "grad_norm": 2.119758199541619, - "learning_rate": 3.2178833264492116e-06, - "loss": 1.0114, - "step": 2602 - }, - { - "epoch": 0.31299224433355377, - "grad_norm": 1.6920735582365072, - "learning_rate": 3.217265342086498e-06, - "loss": 0.962, - "step": 2603 - }, - { - "epoch": 0.3131124872241929, - "grad_norm": 1.92062721994646, - "learning_rate": 3.216647173065599e-06, - "loss": 0.9336, - "step": 2604 - }, - { - "epoch": 0.31323273011483194, - "grad_norm": 1.6554404385473984, - "learning_rate": 3.216028819480292e-06, - "loss": 0.9425, - "step": 2605 - }, - { - "epoch": 0.31335297300547105, - "grad_norm": 2.9131856217754035, - "learning_rate": 3.2154102814243793e-06, - "loss": 0.9581, - "step": 2606 - }, - { - "epoch": 0.31347321589611016, - "grad_norm": 1.80809894161916, - "learning_rate": 3.2147915589916937e-06, - "loss": 0.8779, - "step": 2607 - }, - { - "epoch": 0.3135934587867492, - "grad_norm": 1.9420926206680817, - "learning_rate": 3.2141726522760938e-06, - "loss": 1.0272, - "step": 2608 - }, - { - "epoch": 0.3137137016773883, - "grad_norm": 0.7851372332191552, - "learning_rate": 3.213553561371469e-06, - "loss": 0.7536, - "step": 2609 - }, - { - "epoch": 0.31383394456802743, - "grad_norm": 1.9390958551063244, - "learning_rate": 3.212934286371733e-06, - "loss": 1.1676, - "step": 2610 - }, - { - "epoch": 0.3139541874586665, - "grad_norm": 4.958958028127627, - "learning_rate": 3.2123148273708304e-06, - "loss": 1.0351, - "step": 2611 - }, - { - "epoch": 0.3140744303493056, - "grad_norm": 1.5931862226644726, - "learning_rate": 3.211695184462733e-06, - "loss": 0.9642, - "step": 2612 - }, - { - "epoch": 0.3141946732399447, - "grad_norm": 0.9790114861477918, - "learning_rate": 3.2110753577414383e-06, - "loss": 0.8399, - "step": 2613 - }, - { - "epoch": 0.31431491613058377, - "grad_norm": 1.8495777937097242, - "learning_rate": 3.2104553473009757e-06, - "loss": 0.9912, - "step": 2614 - }, - { - "epoch": 0.3144351590212229, - "grad_norm": 1.6435642616142712, - "learning_rate": 3.209835153235399e-06, - "loss": 0.878, - "step": 2615 - }, - { - "epoch": 0.314555401911862, - "grad_norm": 1.603860090941444, - "learning_rate": 3.2092147756387916e-06, - "loss": 0.88, - "step": 2616 - }, - { - "epoch": 0.31467564480250104, - "grad_norm": 1.6943150411850834, - "learning_rate": 3.208594214605264e-06, - "loss": 1.0334, - "step": 2617 - }, - { - "epoch": 0.31479588769314015, - "grad_norm": 1.7873701728677185, - "learning_rate": 3.2079734702289553e-06, - "loss": 0.9783, - "step": 2618 - }, - { - "epoch": 0.3149161305837792, - "grad_norm": 1.040877914624367, - "learning_rate": 3.207352542604031e-06, - "loss": 0.8387, - "step": 2619 - }, - { - "epoch": 0.3150363734744183, - "grad_norm": 1.4191259281017985, - "learning_rate": 3.2067314318246864e-06, - "loss": 0.9825, - "step": 2620 - }, - { - "epoch": 0.31515661636505743, - "grad_norm": 1.7602165906022187, - "learning_rate": 3.206110137985143e-06, - "loss": 0.9685, - "step": 2621 - }, - { - "epoch": 0.3152768592556965, - "grad_norm": 1.5971609527704356, - "learning_rate": 3.2054886611796505e-06, - "loss": 1.1202, - "step": 2622 - }, - { - "epoch": 0.3153971021463356, - "grad_norm": 1.228172557579763, - "learning_rate": 3.204867001502487e-06, - "loss": 0.887, - "step": 2623 - }, - { - "epoch": 0.3155173450369747, - "grad_norm": 2.1116394321619354, - "learning_rate": 3.2042451590479567e-06, - "loss": 1.0088, - "step": 2624 - }, - { - "epoch": 0.31563758792761376, - "grad_norm": 1.520928375549097, - "learning_rate": 3.203623133910394e-06, - "loss": 1.0603, - "step": 2625 - }, - { - "epoch": 0.31575783081825287, - "grad_norm": 2.3423705046696264, - "learning_rate": 3.203000926184158e-06, - "loss": 0.9676, - "step": 2626 - }, - { - "epoch": 0.315878073708892, - "grad_norm": 4.404776685338892, - "learning_rate": 3.202378535963639e-06, - "loss": 0.9752, - "step": 2627 - }, - { - "epoch": 0.31599831659953104, - "grad_norm": 1.7682197881420867, - "learning_rate": 3.2017559633432516e-06, - "loss": 1.042, - "step": 2628 - }, - { - "epoch": 0.31611855949017015, - "grad_norm": 1.7001716134459004, - "learning_rate": 3.2011332084174398e-06, - "loss": 0.8658, - "step": 2629 - }, - { - "epoch": 0.31623880238080926, - "grad_norm": 1.5057361913583365, - "learning_rate": 3.2005102712806756e-06, - "loss": 1.0896, - "step": 2630 - }, - { - "epoch": 0.3163590452714483, - "grad_norm": 2.1630065765633604, - "learning_rate": 3.1998871520274575e-06, - "loss": 0.9234, - "step": 2631 - }, - { - "epoch": 0.3164792881620874, - "grad_norm": 1.5790694026467753, - "learning_rate": 3.199263850752312e-06, - "loss": 1.0425, - "step": 2632 - }, - { - "epoch": 0.31659953105272653, - "grad_norm": 2.16882879020637, - "learning_rate": 3.198640367549795e-06, - "loss": 1.06, - "step": 2633 - }, - { - "epoch": 0.3167197739433656, - "grad_norm": 1.6386717295049846, - "learning_rate": 3.198016702514487e-06, - "loss": 1.0625, - "step": 2634 - }, - { - "epoch": 0.3168400168340047, - "grad_norm": 1.6043835613050543, - "learning_rate": 3.1973928557409972e-06, - "loss": 1.0493, - "step": 2635 - }, - { - "epoch": 0.31696025972464376, - "grad_norm": 1.8977184425235785, - "learning_rate": 3.1967688273239636e-06, - "loss": 0.8996, - "step": 2636 - }, - { - "epoch": 0.31708050261528287, - "grad_norm": 1.6739914946801475, - "learning_rate": 3.1961446173580503e-06, - "loss": 1.0194, - "step": 2637 - }, - { - "epoch": 0.317200745505922, - "grad_norm": 1.659647354217922, - "learning_rate": 3.1955202259379502e-06, - "loss": 0.9743, - "step": 2638 - }, - { - "epoch": 0.31732098839656103, - "grad_norm": 1.6537039999356222, - "learning_rate": 3.194895653158381e-06, - "loss": 1.0194, - "step": 2639 - }, - { - "epoch": 0.31744123128720014, - "grad_norm": 1.0184930067688418, - "learning_rate": 3.194270899114093e-06, - "loss": 0.7907, - "step": 2640 - }, - { - "epoch": 0.31756147417783925, - "grad_norm": 2.0336818516244644, - "learning_rate": 3.193645963899858e-06, - "loss": 1.0263, - "step": 2641 - }, - { - "epoch": 0.3176817170684783, - "grad_norm": 1.5954757024097799, - "learning_rate": 3.193020847610479e-06, - "loss": 1.0349, - "step": 2642 - }, - { - "epoch": 0.3178019599591174, - "grad_norm": 2.0254014547384047, - "learning_rate": 3.192395550340787e-06, - "loss": 0.9163, - "step": 2643 - }, - { - "epoch": 0.31792220284975653, - "grad_norm": 2.1182134753558137, - "learning_rate": 3.191770072185638e-06, - "loss": 0.9675, - "step": 2644 - }, - { - "epoch": 0.3180424457403956, - "grad_norm": 2.3571033784111664, - "learning_rate": 3.191144413239916e-06, - "loss": 0.9303, - "step": 2645 - }, - { - "epoch": 0.3181626886310347, - "grad_norm": 2.3592363531541185, - "learning_rate": 3.190518573598534e-06, - "loss": 1.0788, - "step": 2646 - }, - { - "epoch": 0.3182829315216738, - "grad_norm": 1.8625367794629708, - "learning_rate": 3.1898925533564308e-06, - "loss": 0.9797, - "step": 2647 - }, - { - "epoch": 0.31840317441231286, - "grad_norm": 1.8394043999207335, - "learning_rate": 3.1892663526085733e-06, - "loss": 0.8397, - "step": 2648 - }, - { - "epoch": 0.31852341730295197, - "grad_norm": 0.8862925778318043, - "learning_rate": 3.188639971449956e-06, - "loss": 0.796, - "step": 2649 - }, - { - "epoch": 0.318643660193591, - "grad_norm": 2.08789816160064, - "learning_rate": 3.1880134099756e-06, - "loss": 0.923, - "step": 2650 - }, - { - "epoch": 0.31876390308423014, - "grad_norm": 1.7112250935516289, - "learning_rate": 3.1873866682805535e-06, - "loss": 0.9023, - "step": 2651 - }, - { - "epoch": 0.31888414597486925, - "grad_norm": 1.7386178023038281, - "learning_rate": 3.186759746459894e-06, - "loss": 1.086, - "step": 2652 - }, - { - "epoch": 0.3190043888655083, - "grad_norm": 1.7068567950668758, - "learning_rate": 3.1861326446087246e-06, - "loss": 0.9942, - "step": 2653 - }, - { - "epoch": 0.3191246317561474, - "grad_norm": 2.247360270466713, - "learning_rate": 3.1855053628221763e-06, - "loss": 0.9182, - "step": 2654 - }, - { - "epoch": 0.3192448746467865, - "grad_norm": 2.090607747689582, - "learning_rate": 3.184877901195407e-06, - "loss": 1.0991, - "step": 2655 - }, - { - "epoch": 0.3193651175374256, - "grad_norm": 0.9543605987721352, - "learning_rate": 3.184250259823602e-06, - "loss": 0.8634, - "step": 2656 - }, - { - "epoch": 0.3194853604280647, - "grad_norm": 2.1007532312944197, - "learning_rate": 3.183622438801974e-06, - "loss": 1.0096, - "step": 2657 - }, - { - "epoch": 0.3196056033187038, - "grad_norm": 1.7705859142315836, - "learning_rate": 3.1829944382257637e-06, - "loss": 0.963, - "step": 2658 - }, - { - "epoch": 0.31972584620934286, - "grad_norm": 2.265875545986293, - "learning_rate": 3.1823662581902373e-06, - "loss": 1.0209, - "step": 2659 - }, - { - "epoch": 0.31984608909998197, - "grad_norm": 2.3998988169957554, - "learning_rate": 3.1817378987906896e-06, - "loss": 0.948, - "step": 2660 - }, - { - "epoch": 0.3199663319906211, - "grad_norm": 1.784457481067986, - "learning_rate": 3.181109360122442e-06, - "loss": 0.9984, - "step": 2661 - }, - { - "epoch": 0.32008657488126013, - "grad_norm": 2.0773527388362076, - "learning_rate": 3.1804806422808445e-06, - "loss": 0.9872, - "step": 2662 - }, - { - "epoch": 0.32020681777189924, - "grad_norm": 1.5069321350126195, - "learning_rate": 3.1798517453612714e-06, - "loss": 0.9266, - "step": 2663 - }, - { - "epoch": 0.32032706066253835, - "grad_norm": 1.8747735204608689, - "learning_rate": 3.1792226694591265e-06, - "loss": 0.9532, - "step": 2664 - }, - { - "epoch": 0.3204473035531774, - "grad_norm": 1.624012647123842, - "learning_rate": 3.178593414669841e-06, - "loss": 0.9996, - "step": 2665 - }, - { - "epoch": 0.3205675464438165, - "grad_norm": 1.8958241878191118, - "learning_rate": 3.1779639810888707e-06, - "loss": 0.8999, - "step": 2666 - }, - { - "epoch": 0.3206877893344556, - "grad_norm": 1.7101936290937703, - "learning_rate": 3.1773343688117013e-06, - "loss": 0.9605, - "step": 2667 - }, - { - "epoch": 0.3208080322250947, - "grad_norm": 2.446725552843784, - "learning_rate": 3.1767045779338445e-06, - "loss": 1.0416, - "step": 2668 - }, - { - "epoch": 0.3209282751157338, - "grad_norm": 1.9835491714786977, - "learning_rate": 3.176074608550839e-06, - "loss": 1.1115, - "step": 2669 - }, - { - "epoch": 0.32104851800637285, - "grad_norm": 2.0638519249000926, - "learning_rate": 3.17544446075825e-06, - "loss": 1.0281, - "step": 2670 - }, - { - "epoch": 0.32116876089701196, - "grad_norm": 1.5271656293495321, - "learning_rate": 3.174814134651671e-06, - "loss": 0.9136, - "step": 2671 - }, - { - "epoch": 0.3212890037876511, - "grad_norm": 1.5471584176027144, - "learning_rate": 3.1741836303267215e-06, - "loss": 1.0082, - "step": 2672 - }, - { - "epoch": 0.32140924667829013, - "grad_norm": 1.6823583615273388, - "learning_rate": 3.1735529478790496e-06, - "loss": 0.9494, - "step": 2673 - }, - { - "epoch": 0.32152948956892924, - "grad_norm": 1.628390736044777, - "learning_rate": 3.172922087404328e-06, - "loss": 0.9951, - "step": 2674 - }, - { - "epoch": 0.32164973245956835, - "grad_norm": 0.9340417975910201, - "learning_rate": 3.1722910489982586e-06, - "loss": 0.7803, - "step": 2675 - }, - { - "epoch": 0.3217699753502074, - "grad_norm": 1.3543737204298762, - "learning_rate": 3.1716598327565694e-06, - "loss": 1.0022, - "step": 2676 - }, - { - "epoch": 0.3218902182408465, - "grad_norm": 1.3578231108748002, - "learning_rate": 3.171028438775015e-06, - "loss": 1.0411, - "step": 2677 - }, - { - "epoch": 0.3220104611314856, - "grad_norm": 1.7864241156530938, - "learning_rate": 3.170396867149377e-06, - "loss": 1.0481, - "step": 2678 - }, - { - "epoch": 0.3221307040221247, - "grad_norm": 1.8537579489580605, - "learning_rate": 3.1697651179754653e-06, - "loss": 1.0653, - "step": 2679 - }, - { - "epoch": 0.3222509469127638, - "grad_norm": 1.5257955445978977, - "learning_rate": 3.1691331913491153e-06, - "loss": 0.9313, - "step": 2680 - }, - { - "epoch": 0.32237118980340285, - "grad_norm": 1.8024237500489724, - "learning_rate": 3.1685010873661898e-06, - "loss": 1.0396, - "step": 2681 - }, - { - "epoch": 0.32249143269404196, - "grad_norm": 1.9381677030787527, - "learning_rate": 3.167868806122578e-06, - "loss": 1.0009, - "step": 2682 - }, - { - "epoch": 0.32261167558468107, - "grad_norm": 1.766134948006956, - "learning_rate": 3.1672363477141968e-06, - "loss": 0.8689, - "step": 2683 - }, - { - "epoch": 0.3227319184753201, - "grad_norm": 1.848111699365739, - "learning_rate": 3.1666037122369903e-06, - "loss": 1.0537, - "step": 2684 - }, - { - "epoch": 0.32285216136595923, - "grad_norm": 2.259029284945571, - "learning_rate": 3.165970899786928e-06, - "loss": 1.0563, - "step": 2685 - }, - { - "epoch": 0.32297240425659834, - "grad_norm": 1.596041285193575, - "learning_rate": 3.1653379104600067e-06, - "loss": 0.9391, - "step": 2686 - }, - { - "epoch": 0.3230926471472374, - "grad_norm": 1.7929212775833423, - "learning_rate": 3.164704744352251e-06, - "loss": 0.8964, - "step": 2687 - }, - { - "epoch": 0.3232128900378765, - "grad_norm": 1.5542233028097823, - "learning_rate": 3.164071401559713e-06, - "loss": 1.0125, - "step": 2688 - }, - { - "epoch": 0.3233331329285156, - "grad_norm": 1.6934801034448412, - "learning_rate": 3.1634378821784674e-06, - "loss": 0.9087, - "step": 2689 - }, - { - "epoch": 0.3234533758191547, - "grad_norm": 2.2742840740816836, - "learning_rate": 3.1628041863046208e-06, - "loss": 0.9392, - "step": 2690 - }, - { - "epoch": 0.3235736187097938, - "grad_norm": 2.302191569227752, - "learning_rate": 3.162170314034304e-06, - "loss": 1.115, - "step": 2691 - }, - { - "epoch": 0.3236938616004329, - "grad_norm": 1.7352181524495922, - "learning_rate": 3.1615362654636738e-06, - "loss": 1.0001, - "step": 2692 - }, - { - "epoch": 0.32381410449107195, - "grad_norm": 1.5706155968417614, - "learning_rate": 3.1609020406889163e-06, - "loss": 1.071, - "step": 2693 - }, - { - "epoch": 0.32393434738171106, - "grad_norm": 1.489244204599715, - "learning_rate": 3.1602676398062416e-06, - "loss": 1.0462, - "step": 2694 - }, - { - "epoch": 0.3240545902723502, - "grad_norm": 2.056182085672302, - "learning_rate": 3.1596330629118886e-06, - "loss": 0.8177, - "step": 2695 - }, - { - "epoch": 0.32417483316298923, - "grad_norm": 2.0929889719272023, - "learning_rate": 3.1589983101021223e-06, - "loss": 0.9388, - "step": 2696 - }, - { - "epoch": 0.32429507605362834, - "grad_norm": 1.9990790153943487, - "learning_rate": 3.1583633814732337e-06, - "loss": 1.0446, - "step": 2697 - }, - { - "epoch": 0.3244153189442674, - "grad_norm": 2.4241579186456628, - "learning_rate": 3.157728277121541e-06, - "loss": 0.9162, - "step": 2698 - }, - { - "epoch": 0.3245355618349065, - "grad_norm": 2.554950852192014, - "learning_rate": 3.1570929971433897e-06, - "loss": 0.9859, - "step": 2699 - }, - { - "epoch": 0.3246558047255456, - "grad_norm": 1.924948800632251, - "learning_rate": 3.1564575416351504e-06, - "loss": 1.0293, - "step": 2700 - }, - { - "epoch": 0.32477604761618467, - "grad_norm": 2.2834028771755124, - "learning_rate": 3.155821910693221e-06, - "loss": 0.9513, - "step": 2701 - }, - { - "epoch": 0.3248962905068238, - "grad_norm": 1.5072584962743, - "learning_rate": 3.1551861044140275e-06, - "loss": 1.0538, - "step": 2702 - }, - { - "epoch": 0.3250165333974629, - "grad_norm": 1.6425655695631547, - "learning_rate": 3.15455012289402e-06, - "loss": 0.9683, - "step": 2703 - }, - { - "epoch": 0.32513677628810195, - "grad_norm": 1.5666518934378506, - "learning_rate": 3.153913966229677e-06, - "loss": 1.037, - "step": 2704 - }, - { - "epoch": 0.32525701917874106, - "grad_norm": 0.7907221496964847, - "learning_rate": 3.1532776345175027e-06, - "loss": 0.7116, - "step": 2705 - }, - { - "epoch": 0.32537726206938017, - "grad_norm": 1.847936371100077, - "learning_rate": 3.1526411278540285e-06, - "loss": 0.9806, - "step": 2706 - }, - { - "epoch": 0.3254975049600192, - "grad_norm": 2.185045118237079, - "learning_rate": 3.1520044463358116e-06, - "loss": 1.0192, - "step": 2707 - }, - { - "epoch": 0.32561774785065833, - "grad_norm": 1.4056284484549593, - "learning_rate": 3.151367590059436e-06, - "loss": 0.9975, - "step": 2708 - }, - { - "epoch": 0.32573799074129745, - "grad_norm": 1.8751780743278925, - "learning_rate": 3.1507305591215117e-06, - "loss": 1.0654, - "step": 2709 - }, - { - "epoch": 0.3258582336319365, - "grad_norm": 0.8522106818531721, - "learning_rate": 3.150093353618677e-06, - "loss": 0.7788, - "step": 2710 - }, - { - "epoch": 0.3259784765225756, - "grad_norm": 2.0163570523680265, - "learning_rate": 3.149455973647596e-06, - "loss": 1.0806, - "step": 2711 - }, - { - "epoch": 0.32609871941321467, - "grad_norm": 1.947528358419498, - "learning_rate": 3.1488184193049563e-06, - "loss": 0.967, - "step": 2712 - }, - { - "epoch": 0.3262189623038538, - "grad_norm": 1.5937845562947162, - "learning_rate": 3.1481806906874767e-06, - "loss": 0.9268, - "step": 2713 - }, - { - "epoch": 0.3263392051944929, - "grad_norm": 3.1811545546927276, - "learning_rate": 3.147542787891899e-06, - "loss": 1.0756, - "step": 2714 - }, - { - "epoch": 0.32645944808513194, - "grad_norm": 1.8997219434358876, - "learning_rate": 3.1469047110149926e-06, - "loss": 0.947, - "step": 2715 - }, - { - "epoch": 0.32657969097577105, - "grad_norm": 1.6444175119099018, - "learning_rate": 3.146266460153554e-06, - "loss": 1.0522, - "step": 2716 - }, - { - "epoch": 0.32669993386641016, - "grad_norm": 1.5756199661047363, - "learning_rate": 3.145628035404404e-06, - "loss": 0.9978, - "step": 2717 - }, - { - "epoch": 0.3268201767570492, - "grad_norm": 1.0262972647298838, - "learning_rate": 3.1449894368643922e-06, - "loss": 0.7986, - "step": 2718 - }, - { - "epoch": 0.32694041964768833, - "grad_norm": 1.575289757126825, - "learning_rate": 3.1443506646303934e-06, - "loss": 0.9182, - "step": 2719 - }, - { - "epoch": 0.32706066253832744, - "grad_norm": 1.8934282690522994, - "learning_rate": 3.1437117187993086e-06, - "loss": 0.8708, - "step": 2720 - }, - { - "epoch": 0.3271809054289665, - "grad_norm": 1.7454520119392185, - "learning_rate": 3.143072599468065e-06, - "loss": 0.9994, - "step": 2721 - }, - { - "epoch": 0.3273011483196056, - "grad_norm": 1.5867930392553786, - "learning_rate": 3.1424333067336174e-06, - "loss": 0.9544, - "step": 2722 - }, - { - "epoch": 0.3274213912102447, - "grad_norm": 1.6734954966406725, - "learning_rate": 3.141793840692945e-06, - "loss": 0.9694, - "step": 2723 - }, - { - "epoch": 0.32754163410088377, - "grad_norm": 1.8225370902014346, - "learning_rate": 3.1411542014430553e-06, - "loss": 0.8148, - "step": 2724 - }, - { - "epoch": 0.3276618769915229, - "grad_norm": 1.5960416586465573, - "learning_rate": 3.1405143890809804e-06, - "loss": 1.0186, - "step": 2725 - }, - { - "epoch": 0.327782119882162, - "grad_norm": 1.6490786783430689, - "learning_rate": 3.1398744037037796e-06, - "loss": 0.9056, - "step": 2726 - }, - { - "epoch": 0.32790236277280105, - "grad_norm": 1.6719799543575389, - "learning_rate": 3.139234245408538e-06, - "loss": 1.0419, - "step": 2727 - }, - { - "epoch": 0.32802260566344016, - "grad_norm": 1.6522933194736553, - "learning_rate": 3.1385939142923666e-06, - "loss": 0.9684, - "step": 2728 - }, - { - "epoch": 0.3281428485540792, - "grad_norm": 1.8808406777408222, - "learning_rate": 3.137953410452405e-06, - "loss": 0.9819, - "step": 2729 - }, - { - "epoch": 0.3282630914447183, - "grad_norm": 1.5721269439760468, - "learning_rate": 3.1373127339858146e-06, - "loss": 0.945, - "step": 2730 - }, - { - "epoch": 0.32838333433535744, - "grad_norm": 1.815234916133648, - "learning_rate": 3.136671884989787e-06, - "loss": 0.9411, - "step": 2731 - }, - { - "epoch": 0.3285035772259965, - "grad_norm": 2.038729234843453, - "learning_rate": 3.1360308635615383e-06, - "loss": 1.0736, - "step": 2732 - }, - { - "epoch": 0.3286238201166356, - "grad_norm": 1.8270473727519116, - "learning_rate": 3.135389669798311e-06, - "loss": 0.9868, - "step": 2733 - }, - { - "epoch": 0.3287440630072747, - "grad_norm": 1.9060469972674432, - "learning_rate": 3.134748303797373e-06, - "loss": 0.9981, - "step": 2734 - }, - { - "epoch": 0.32886430589791377, - "grad_norm": 1.7365262548217641, - "learning_rate": 3.1341067656560203e-06, - "loss": 1.0114, - "step": 2735 - }, - { - "epoch": 0.3289845487885529, - "grad_norm": 1.782943188543656, - "learning_rate": 3.133465055471572e-06, - "loss": 1.0627, - "step": 2736 - }, - { - "epoch": 0.329104791679192, - "grad_norm": 2.1068916679091916, - "learning_rate": 3.1328231733413767e-06, - "loss": 0.8595, - "step": 2737 - }, - { - "epoch": 0.32922503456983104, - "grad_norm": 1.905766785458772, - "learning_rate": 3.1321811193628067e-06, - "loss": 1.1076, - "step": 2738 - }, - { - "epoch": 0.32934527746047015, - "grad_norm": 2.263940961378018, - "learning_rate": 3.131538893633261e-06, - "loss": 0.9141, - "step": 2739 - }, - { - "epoch": 0.32946552035110926, - "grad_norm": 2.0504886091427976, - "learning_rate": 3.130896496250165e-06, - "loss": 0.9835, - "step": 2740 - }, - { - "epoch": 0.3295857632417483, - "grad_norm": 2.187198836167367, - "learning_rate": 3.1302539273109693e-06, - "loss": 1.0667, - "step": 2741 - }, - { - "epoch": 0.32970600613238743, - "grad_norm": 1.4688429295786092, - "learning_rate": 3.1296111869131513e-06, - "loss": 1.0057, - "step": 2742 - }, - { - "epoch": 0.32982624902302654, - "grad_norm": 1.7792059564235179, - "learning_rate": 3.1289682751542153e-06, - "loss": 1.0476, - "step": 2743 - }, - { - "epoch": 0.3299464919136656, - "grad_norm": 1.7636335501831624, - "learning_rate": 3.1283251921316883e-06, - "loss": 0.9186, - "step": 2744 - }, - { - "epoch": 0.3300667348043047, - "grad_norm": 1.8448532166023686, - "learning_rate": 3.1276819379431277e-06, - "loss": 1.0135, - "step": 2745 - }, - { - "epoch": 0.33018697769494376, - "grad_norm": 1.8895433342459782, - "learning_rate": 3.1270385126861134e-06, - "loss": 0.9545, - "step": 2746 - }, - { - "epoch": 0.3303072205855829, - "grad_norm": 1.7154154248248943, - "learning_rate": 3.1263949164582533e-06, - "loss": 1.0242, - "step": 2747 - }, - { - "epoch": 0.330427463476222, - "grad_norm": 1.9979190132941453, - "learning_rate": 3.1257511493571797e-06, - "loss": 0.9797, - "step": 2748 - }, - { - "epoch": 0.33054770636686104, - "grad_norm": 1.7668619464814432, - "learning_rate": 3.125107211480552e-06, - "loss": 0.9867, - "step": 2749 - }, - { - "epoch": 0.33066794925750015, - "grad_norm": 1.5375298939101505, - "learning_rate": 3.124463102926054e-06, - "loss": 0.9911, - "step": 2750 - }, - { - "epoch": 0.33078819214813926, - "grad_norm": 0.8592831649262278, - "learning_rate": 3.1238188237913984e-06, - "loss": 0.8238, - "step": 2751 - }, - { - "epoch": 0.3309084350387783, - "grad_norm": 2.2099687051067645, - "learning_rate": 3.1231743741743202e-06, - "loss": 0.9623, - "step": 2752 - }, - { - "epoch": 0.3310286779294174, - "grad_norm": 2.2123131667741074, - "learning_rate": 3.122529754172582e-06, - "loss": 1.0344, - "step": 2753 - }, - { - "epoch": 0.33114892082005654, - "grad_norm": 2.204258404020349, - "learning_rate": 3.1218849638839736e-06, - "loss": 0.9271, - "step": 2754 - }, - { - "epoch": 0.3312691637106956, - "grad_norm": 3.0638653572618364, - "learning_rate": 3.121240003406307e-06, - "loss": 0.9784, - "step": 2755 - }, - { - "epoch": 0.3313894066013347, - "grad_norm": 1.7739221524013256, - "learning_rate": 3.120594872837425e-06, - "loss": 0.9167, - "step": 2756 - }, - { - "epoch": 0.3315096494919738, - "grad_norm": 0.8968803511935104, - "learning_rate": 3.1199495722751906e-06, - "loss": 0.8426, - "step": 2757 - }, - { - "epoch": 0.33162989238261287, - "grad_norm": 1.9116708566319855, - "learning_rate": 3.1193041018174972e-06, - "loss": 1.0434, - "step": 2758 - }, - { - "epoch": 0.331750135273252, - "grad_norm": 1.7801206923939557, - "learning_rate": 3.118658461562261e-06, - "loss": 1.1519, - "step": 2759 - }, - { - "epoch": 0.33187037816389103, - "grad_norm": 1.3779089674010228, - "learning_rate": 3.118012651607426e-06, - "loss": 1.0464, - "step": 2760 - }, - { - "epoch": 0.33199062105453014, - "grad_norm": 1.9443511425824351, - "learning_rate": 3.1173666720509603e-06, - "loss": 1.0277, - "step": 2761 - }, - { - "epoch": 0.33211086394516925, - "grad_norm": 1.719215607432671, - "learning_rate": 3.116720522990859e-06, - "loss": 0.8783, - "step": 2762 - }, - { - "epoch": 0.3322311068358083, - "grad_norm": 1.9672348610628403, - "learning_rate": 3.116074204525142e-06, - "loss": 0.8242, - "step": 2763 - }, - { - "epoch": 0.3323513497264474, - "grad_norm": 1.430796708867802, - "learning_rate": 3.1154277167518553e-06, - "loss": 1.0333, - "step": 2764 - }, - { - "epoch": 0.33247159261708653, - "grad_norm": 0.9048458599842616, - "learning_rate": 3.114781059769072e-06, - "loss": 0.82, - "step": 2765 - }, - { - "epoch": 0.3325918355077256, - "grad_norm": 3.085980456089476, - "learning_rate": 3.1141342336748874e-06, - "loss": 0.8946, - "step": 2766 - }, - { - "epoch": 0.3327120783983647, - "grad_norm": 1.429971218619778, - "learning_rate": 3.1134872385674253e-06, - "loss": 1.0137, - "step": 2767 - }, - { - "epoch": 0.3328323212890038, - "grad_norm": 2.6791593959901516, - "learning_rate": 3.1128400745448353e-06, - "loss": 1.0579, - "step": 2768 - }, - { - "epoch": 0.33295256417964286, - "grad_norm": 1.8319721009033296, - "learning_rate": 3.11219274170529e-06, - "loss": 0.8334, - "step": 2769 - }, - { - "epoch": 0.333072807070282, - "grad_norm": 1.6113321200234565, - "learning_rate": 3.1115452401469903e-06, - "loss": 1.0125, - "step": 2770 - }, - { - "epoch": 0.3331930499609211, - "grad_norm": 1.626897437709934, - "learning_rate": 3.1108975699681613e-06, - "loss": 1.0651, - "step": 2771 - }, - { - "epoch": 0.33331329285156014, - "grad_norm": 1.6088210698859764, - "learning_rate": 3.1102497312670542e-06, - "loss": 0.9152, - "step": 2772 - }, - { - "epoch": 0.33343353574219925, - "grad_norm": 1.7527617732729812, - "learning_rate": 3.109601724141946e-06, - "loss": 1.0035, - "step": 2773 - }, - { - "epoch": 0.33355377863283836, - "grad_norm": 1.6078946610673435, - "learning_rate": 3.108953548691138e-06, - "loss": 0.8853, - "step": 2774 - }, - { - "epoch": 0.3336740215234774, - "grad_norm": 2.0477110341998306, - "learning_rate": 3.108305205012959e-06, - "loss": 0.9282, - "step": 2775 - }, - { - "epoch": 0.3337942644141165, - "grad_norm": 1.841222485390453, - "learning_rate": 3.107656693205761e-06, - "loss": 1.0719, - "step": 2776 - }, - { - "epoch": 0.3339145073047556, - "grad_norm": 2.0205160724365965, - "learning_rate": 3.107008013367924e-06, - "loss": 0.8962, - "step": 2777 - }, - { - "epoch": 0.3340347501953947, - "grad_norm": 1.8152552741998174, - "learning_rate": 3.1063591655978507e-06, - "loss": 1.0636, - "step": 2778 - }, - { - "epoch": 0.3341549930860338, - "grad_norm": 1.5949340610566483, - "learning_rate": 3.105710149993972e-06, - "loss": 0.9906, - "step": 2779 - }, - { - "epoch": 0.33427523597667286, - "grad_norm": 1.6279515823878175, - "learning_rate": 3.1050609666547427e-06, - "loss": 1.0558, - "step": 2780 - }, - { - "epoch": 0.33439547886731197, - "grad_norm": 1.7127124515371488, - "learning_rate": 3.104411615678644e-06, - "loss": 0.9747, - "step": 2781 - }, - { - "epoch": 0.3345157217579511, - "grad_norm": 2.063152903472423, - "learning_rate": 3.1037620971641803e-06, - "loss": 0.932, - "step": 2782 - }, - { - "epoch": 0.33463596464859013, - "grad_norm": 2.1208618486906894, - "learning_rate": 3.1031124112098844e-06, - "loss": 0.8439, - "step": 2783 - }, - { - "epoch": 0.33475620753922924, - "grad_norm": 2.1690283033354283, - "learning_rate": 3.1024625579143127e-06, - "loss": 0.9174, - "step": 2784 - }, - { - "epoch": 0.33487645042986836, - "grad_norm": 2.3883657313844977, - "learning_rate": 3.101812537376048e-06, - "loss": 0.9281, - "step": 2785 - }, - { - "epoch": 0.3349966933205074, - "grad_norm": 1.735624121286941, - "learning_rate": 3.1011623496936973e-06, - "loss": 1.0408, - "step": 2786 - }, - { - "epoch": 0.3351169362111465, - "grad_norm": 1.5874521323368895, - "learning_rate": 3.100511994965893e-06, - "loss": 0.8984, - "step": 2787 - }, - { - "epoch": 0.33523717910178563, - "grad_norm": 1.579244386610762, - "learning_rate": 3.0998614732912947e-06, - "loss": 1.0573, - "step": 2788 - }, - { - "epoch": 0.3353574219924247, - "grad_norm": 1.7974038714090321, - "learning_rate": 3.0992107847685855e-06, - "loss": 0.8786, - "step": 2789 - }, - { - "epoch": 0.3354776648830638, - "grad_norm": 1.5368327142669873, - "learning_rate": 3.0985599294964736e-06, - "loss": 0.9981, - "step": 2790 - }, - { - "epoch": 0.33559790777370285, - "grad_norm": 1.9452583274805237, - "learning_rate": 3.097908907573695e-06, - "loss": 0.8996, - "step": 2791 - }, - { - "epoch": 0.33571815066434196, - "grad_norm": 1.843499894069982, - "learning_rate": 3.0972577190990067e-06, - "loss": 1.0971, - "step": 2792 - }, - { - "epoch": 0.3358383935549811, - "grad_norm": 1.6909067592530196, - "learning_rate": 3.096606364171196e-06, - "loss": 1.0015, - "step": 2793 - }, - { - "epoch": 0.33595863644562013, - "grad_norm": 1.7093188846729306, - "learning_rate": 3.0959548428890703e-06, - "loss": 1.0484, - "step": 2794 - }, - { - "epoch": 0.33607887933625924, - "grad_norm": 1.4042686066945758, - "learning_rate": 3.095303155351468e-06, - "loss": 1.0315, - "step": 2795 - }, - { - "epoch": 0.33619912222689835, - "grad_norm": 1.8849610538658523, - "learning_rate": 3.0946513016572464e-06, - "loss": 0.9882, - "step": 2796 - }, - { - "epoch": 0.3363193651175374, - "grad_norm": 1.7798163684677413, - "learning_rate": 3.0939992819052938e-06, - "loss": 0.9645, - "step": 2797 - }, - { - "epoch": 0.3364396080081765, - "grad_norm": 1.9808347465414875, - "learning_rate": 3.0933470961945193e-06, - "loss": 1.0129, - "step": 2798 - }, - { - "epoch": 0.3365598508988156, - "grad_norm": 1.5550046051055328, - "learning_rate": 3.0926947446238597e-06, - "loss": 0.8893, - "step": 2799 - }, - { - "epoch": 0.3366800937894547, - "grad_norm": 1.8086158965301729, - "learning_rate": 3.092042227292276e-06, - "loss": 1.0195, - "step": 2800 - }, - { - "epoch": 0.3368003366800938, - "grad_norm": 1.5109019672748347, - "learning_rate": 3.0913895442987557e-06, - "loss": 1.0816, - "step": 2801 - }, - { - "epoch": 0.3369205795707329, - "grad_norm": 1.497344845995601, - "learning_rate": 3.090736695742308e-06, - "loss": 1.0525, - "step": 2802 - }, - { - "epoch": 0.33704082246137196, - "grad_norm": 1.9012003444831034, - "learning_rate": 3.0900836817219713e-06, - "loss": 0.7088, - "step": 2803 - }, - { - "epoch": 0.33716106535201107, - "grad_norm": 1.550835827076072, - "learning_rate": 3.089430502336807e-06, - "loss": 1.0431, - "step": 2804 - }, - { - "epoch": 0.3372813082426502, - "grad_norm": 2.1970093166481943, - "learning_rate": 3.088777157685902e-06, - "loss": 1.1001, - "step": 2805 - }, - { - "epoch": 0.33740155113328923, - "grad_norm": 1.8004362557824323, - "learning_rate": 3.088123647868367e-06, - "loss": 1.0574, - "step": 2806 - }, - { - "epoch": 0.33752179402392835, - "grad_norm": 2.1784407355350583, - "learning_rate": 3.0874699729833405e-06, - "loss": 1.013, - "step": 2807 - }, - { - "epoch": 0.3376420369145674, - "grad_norm": 1.5512474762114499, - "learning_rate": 3.086816133129983e-06, - "loss": 1.0001, - "step": 2808 - }, - { - "epoch": 0.3377622798052065, - "grad_norm": 1.7302228007973914, - "learning_rate": 3.0861621284074826e-06, - "loss": 0.9663, - "step": 2809 - }, - { - "epoch": 0.3378825226958456, - "grad_norm": 1.4360100031533534, - "learning_rate": 3.085507958915051e-06, - "loss": 0.9344, - "step": 2810 - }, - { - "epoch": 0.3380027655864847, - "grad_norm": 1.8890943399906002, - "learning_rate": 3.084853624751925e-06, - "loss": 0.914, - "step": 2811 - }, - { - "epoch": 0.3381230084771238, - "grad_norm": 1.7196669473447581, - "learning_rate": 3.0841991260173668e-06, - "loss": 1.0576, - "step": 2812 - }, - { - "epoch": 0.3382432513677629, - "grad_norm": 1.6860797698313017, - "learning_rate": 3.0835444628106634e-06, - "loss": 1.0027, - "step": 2813 - }, - { - "epoch": 0.33836349425840195, - "grad_norm": 1.780656816192273, - "learning_rate": 3.082889635231126e-06, - "loss": 1.0326, - "step": 2814 - }, - { - "epoch": 0.33848373714904106, - "grad_norm": 2.263011714330166, - "learning_rate": 3.0822346433780925e-06, - "loss": 0.9676, - "step": 2815 - }, - { - "epoch": 0.3386039800396802, - "grad_norm": 2.01095311678237, - "learning_rate": 3.0815794873509237e-06, - "loss": 1.074, - "step": 2816 - }, - { - "epoch": 0.33872422293031923, - "grad_norm": 1.9836858318601525, - "learning_rate": 3.0809241672490066e-06, - "loss": 0.934, - "step": 2817 - }, - { - "epoch": 0.33884446582095834, - "grad_norm": 1.5519885689225956, - "learning_rate": 3.080268683171753e-06, - "loss": 1.0485, - "step": 2818 - }, - { - "epoch": 0.33896470871159745, - "grad_norm": 2.1493668089563864, - "learning_rate": 3.0796130352185985e-06, - "loss": 1.0837, - "step": 2819 - }, - { - "epoch": 0.3390849516022365, - "grad_norm": 1.683010557911079, - "learning_rate": 3.0789572234890057e-06, - "loss": 0.8741, - "step": 2820 - }, - { - "epoch": 0.3392051944928756, - "grad_norm": 1.5486576328027786, - "learning_rate": 3.0783012480824596e-06, - "loss": 0.9798, - "step": 2821 - }, - { - "epoch": 0.33932543738351467, - "grad_norm": 1.9523836483643247, - "learning_rate": 3.077645109098471e-06, - "loss": 0.9445, - "step": 2822 - }, - { - "epoch": 0.3394456802741538, - "grad_norm": 2.1376443423785854, - "learning_rate": 3.076988806636577e-06, - "loss": 0.9164, - "step": 2823 - }, - { - "epoch": 0.3395659231647929, - "grad_norm": 1.688431094402989, - "learning_rate": 3.0763323407963377e-06, - "loss": 1.082, - "step": 2824 - }, - { - "epoch": 0.33968616605543195, - "grad_norm": 1.5096138585488037, - "learning_rate": 3.075675711677337e-06, - "loss": 1.0013, - "step": 2825 - }, - { - "epoch": 0.33980640894607106, - "grad_norm": 1.7615701481638066, - "learning_rate": 3.0750189193791865e-06, - "loss": 0.9736, - "step": 2826 - }, - { - "epoch": 0.33992665183671017, - "grad_norm": 1.6173192951843751, - "learning_rate": 3.0743619640015203e-06, - "loss": 0.9064, - "step": 2827 - }, - { - "epoch": 0.3400468947273492, - "grad_norm": 1.7681342330265444, - "learning_rate": 3.073704845643999e-06, - "loss": 1.1222, - "step": 2828 - }, - { - "epoch": 0.34016713761798834, - "grad_norm": 2.4290319214533707, - "learning_rate": 3.0730475644063063e-06, - "loss": 0.99, - "step": 2829 - }, - { - "epoch": 0.34028738050862745, - "grad_norm": 1.6090153412641726, - "learning_rate": 3.072390120388151e-06, - "loss": 0.8569, - "step": 2830 - }, - { - "epoch": 0.3404076233992665, - "grad_norm": 1.828287687969226, - "learning_rate": 3.071732513689267e-06, - "loss": 0.9155, - "step": 2831 - }, - { - "epoch": 0.3405278662899056, - "grad_norm": 2.3074363737747037, - "learning_rate": 3.0710747444094134e-06, - "loss": 0.8766, - "step": 2832 - }, - { - "epoch": 0.3406481091805447, - "grad_norm": 1.7564883521758252, - "learning_rate": 3.070416812648372e-06, - "loss": 0.8552, - "step": 2833 - }, - { - "epoch": 0.3407683520711838, - "grad_norm": 1.8709425576433003, - "learning_rate": 3.069758718505951e-06, - "loss": 0.8608, - "step": 2834 - }, - { - "epoch": 0.3408885949618229, - "grad_norm": 1.4485386489067902, - "learning_rate": 3.0691004620819836e-06, - "loss": 1.002, - "step": 2835 - }, - { - "epoch": 0.341008837852462, - "grad_norm": 0.9074524761398426, - "learning_rate": 3.0684420434763254e-06, - "loss": 0.8315, - "step": 2836 - }, - { - "epoch": 0.34112908074310105, - "grad_norm": 1.7796607038106635, - "learning_rate": 3.06778346278886e-06, - "loss": 0.9657, - "step": 2837 - }, - { - "epoch": 0.34124932363374016, - "grad_norm": 1.4961761372244424, - "learning_rate": 3.0671247201194906e-06, - "loss": 0.9864, - "step": 2838 - }, - { - "epoch": 0.3413695665243792, - "grad_norm": 1.7714947381604955, - "learning_rate": 3.066465815568151e-06, - "loss": 0.9513, - "step": 2839 - }, - { - "epoch": 0.34148980941501833, - "grad_norm": 3.0957658153648415, - "learning_rate": 3.0658067492347947e-06, - "loss": 0.8841, - "step": 2840 - }, - { - "epoch": 0.34161005230565744, - "grad_norm": 1.7401945749648753, - "learning_rate": 3.065147521219402e-06, - "loss": 0.8717, - "step": 2841 - }, - { - "epoch": 0.3417302951962965, - "grad_norm": 1.5989128711048535, - "learning_rate": 3.064488131621977e-06, - "loss": 0.9491, - "step": 2842 - }, - { - "epoch": 0.3418505380869356, - "grad_norm": 1.5507515777132161, - "learning_rate": 3.063828580542549e-06, - "loss": 0.9326, - "step": 2843 - }, - { - "epoch": 0.3419707809775747, - "grad_norm": 2.249119692360539, - "learning_rate": 3.0631688680811706e-06, - "loss": 0.9231, - "step": 2844 - }, - { - "epoch": 0.3420910238682138, - "grad_norm": 1.9131838280332663, - "learning_rate": 3.062508994337921e-06, - "loss": 0.955, - "step": 2845 - }, - { - "epoch": 0.3422112667588529, - "grad_norm": 1.9011183283428745, - "learning_rate": 3.0618489594129013e-06, - "loss": 0.9851, - "step": 2846 - }, - { - "epoch": 0.342331509649492, - "grad_norm": 1.7572080802221108, - "learning_rate": 3.061188763406239e-06, - "loss": 0.9129, - "step": 2847 - }, - { - "epoch": 0.34245175254013105, - "grad_norm": 2.0544841941361045, - "learning_rate": 3.060528406418085e-06, - "loss": 1.0245, - "step": 2848 - }, - { - "epoch": 0.34257199543077016, - "grad_norm": 1.5114282925348232, - "learning_rate": 3.0598678885486145e-06, - "loss": 0.8239, - "step": 2849 - }, - { - "epoch": 0.34269223832140927, - "grad_norm": 1.6401827261474826, - "learning_rate": 3.0592072098980282e-06, - "loss": 0.9412, - "step": 2850 - }, - { - "epoch": 0.3428124812120483, - "grad_norm": 3.0408064440138336, - "learning_rate": 3.0585463705665514e-06, - "loss": 0.9326, - "step": 2851 - }, - { - "epoch": 0.34293272410268744, - "grad_norm": 1.9492311649271157, - "learning_rate": 3.0578853706544304e-06, - "loss": 0.9093, - "step": 2852 - }, - { - "epoch": 0.34305296699332655, - "grad_norm": 1.790864528326914, - "learning_rate": 3.0572242102619404e-06, - "loss": 0.8459, - "step": 2853 - }, - { - "epoch": 0.3431732098839656, - "grad_norm": 1.6652143190389996, - "learning_rate": 3.0565628894893784e-06, - "loss": 1.0069, - "step": 2854 - }, - { - "epoch": 0.3432934527746047, - "grad_norm": 1.4783553284761708, - "learning_rate": 3.0559014084370655e-06, - "loss": 0.9369, - "step": 2855 - }, - { - "epoch": 0.34341369566524377, - "grad_norm": 2.6451867782901486, - "learning_rate": 3.055239767205349e-06, - "loss": 0.9791, - "step": 2856 - }, - { - "epoch": 0.3435339385558829, - "grad_norm": 1.5795290149609134, - "learning_rate": 3.054577965894599e-06, - "loss": 0.9713, - "step": 2857 - }, - { - "epoch": 0.343654181446522, - "grad_norm": 1.521223288233379, - "learning_rate": 3.0539160046052094e-06, - "loss": 0.9053, - "step": 2858 - }, - { - "epoch": 0.34377442433716104, - "grad_norm": 2.964303471287185, - "learning_rate": 3.0532538834376003e-06, - "loss": 0.91, - "step": 2859 - }, - { - "epoch": 0.34389466722780015, - "grad_norm": 1.8509030354769185, - "learning_rate": 3.0525916024922143e-06, - "loss": 0.9832, - "step": 2860 - }, - { - "epoch": 0.34401491011843927, - "grad_norm": 2.752944756798504, - "learning_rate": 3.0519291618695193e-06, - "loss": 1.0394, - "step": 2861 - }, - { - "epoch": 0.3441351530090783, - "grad_norm": 1.5448762715742859, - "learning_rate": 3.0512665616700065e-06, - "loss": 0.9519, - "step": 2862 - }, - { - "epoch": 0.34425539589971743, - "grad_norm": 1.723137543929762, - "learning_rate": 3.0506038019941933e-06, - "loss": 1.1036, - "step": 2863 - }, - { - "epoch": 0.34437563879035654, - "grad_norm": 2.115172155421471, - "learning_rate": 3.049940882942617e-06, - "loss": 0.8775, - "step": 2864 - }, - { - "epoch": 0.3444958816809956, - "grad_norm": 1.6532852700897247, - "learning_rate": 3.0492778046158448e-06, - "loss": 0.9984, - "step": 2865 - }, - { - "epoch": 0.3446161245716347, - "grad_norm": 1.8603593801098868, - "learning_rate": 3.0486145671144633e-06, - "loss": 0.9613, - "step": 2866 - }, - { - "epoch": 0.3447363674622738, - "grad_norm": 1.9831390136069733, - "learning_rate": 3.047951170539086e-06, - "loss": 0.9641, - "step": 2867 - }, - { - "epoch": 0.3448566103529129, - "grad_norm": 2.3318433318471, - "learning_rate": 3.047287614990349e-06, - "loss": 1.0402, - "step": 2868 - }, - { - "epoch": 0.344976853243552, - "grad_norm": 2.192015102619853, - "learning_rate": 3.046623900568914e-06, - "loss": 0.8229, - "step": 2869 - }, - { - "epoch": 0.34509709613419104, - "grad_norm": 4.217870549971675, - "learning_rate": 3.045960027375465e-06, - "loss": 0.8988, - "step": 2870 - }, - { - "epoch": 0.34521733902483015, - "grad_norm": 2.4594335410919514, - "learning_rate": 3.045295995510711e-06, - "loss": 1.0225, - "step": 2871 - }, - { - "epoch": 0.34533758191546926, - "grad_norm": 2.009472468787199, - "learning_rate": 3.0446318050753865e-06, - "loss": 0.9356, - "step": 2872 - }, - { - "epoch": 0.3454578248061083, - "grad_norm": 1.8889318403646371, - "learning_rate": 3.0439674561702474e-06, - "loss": 0.9863, - "step": 2873 - }, - { - "epoch": 0.3455780676967474, - "grad_norm": 1.8511737785643503, - "learning_rate": 3.043302948896076e-06, - "loss": 1.0844, - "step": 2874 - }, - { - "epoch": 0.34569831058738654, - "grad_norm": 2.1137554884647014, - "learning_rate": 3.0426382833536756e-06, - "loss": 0.7999, - "step": 2875 - }, - { - "epoch": 0.3458185534780256, - "grad_norm": 1.844577797444167, - "learning_rate": 3.041973459643877e-06, - "loss": 0.9823, - "step": 2876 - }, - { - "epoch": 0.3459387963686647, - "grad_norm": 2.404138208107354, - "learning_rate": 3.0413084778675334e-06, - "loss": 0.8724, - "step": 2877 - }, - { - "epoch": 0.3460590392593038, - "grad_norm": 1.764279958804943, - "learning_rate": 3.0406433381255214e-06, - "loss": 1.0301, - "step": 2878 - }, - { - "epoch": 0.34617928214994287, - "grad_norm": 2.048593047613763, - "learning_rate": 3.0399780405187425e-06, - "loss": 1.025, - "step": 2879 - }, - { - "epoch": 0.346299525040582, - "grad_norm": 1.7575589145428352, - "learning_rate": 3.0393125851481216e-06, - "loss": 0.9824, - "step": 2880 - }, - { - "epoch": 0.3464197679312211, - "grad_norm": 1.87890625, - "learning_rate": 3.038646972114608e-06, - "loss": 1.0618, - "step": 2881 - }, - { - "epoch": 0.34654001082186014, - "grad_norm": 1.5832668424331209, - "learning_rate": 3.037981201519174e-06, - "loss": 0.8767, - "step": 2882 - }, - { - "epoch": 0.34666025371249926, - "grad_norm": 2.274175884507246, - "learning_rate": 3.0373152734628175e-06, - "loss": 0.9119, - "step": 2883 - }, - { - "epoch": 0.34678049660313837, - "grad_norm": 1.6995720857717382, - "learning_rate": 3.0366491880465584e-06, - "loss": 0.9569, - "step": 2884 - }, - { - "epoch": 0.3469007394937774, - "grad_norm": 1.4765490980397906, - "learning_rate": 3.035982945371443e-06, - "loss": 1.0202, - "step": 2885 - }, - { - "epoch": 0.34702098238441653, - "grad_norm": 2.198753619426895, - "learning_rate": 3.035316545538537e-06, - "loss": 1.0543, - "step": 2886 - }, - { - "epoch": 0.3471412252750556, - "grad_norm": 1.8855017612988225, - "learning_rate": 3.034649988648935e-06, - "loss": 0.9934, - "step": 2887 - }, - { - "epoch": 0.3472614681656947, - "grad_norm": 1.7243247051031747, - "learning_rate": 3.033983274803752e-06, - "loss": 1.0147, - "step": 2888 - }, - { - "epoch": 0.3473817110563338, - "grad_norm": 1.9748912877934053, - "learning_rate": 3.0333164041041283e-06, - "loss": 0.9178, - "step": 2889 - }, - { - "epoch": 0.34750195394697286, - "grad_norm": 1.773868995637525, - "learning_rate": 3.032649376651228e-06, - "loss": 0.9251, - "step": 2890 - }, - { - "epoch": 0.347622196837612, - "grad_norm": 1.7659003119062457, - "learning_rate": 3.031982192546238e-06, - "loss": 0.9573, - "step": 2891 - }, - { - "epoch": 0.3477424397282511, - "grad_norm": 2.2920045661348194, - "learning_rate": 3.0313148518903696e-06, - "loss": 1.1451, - "step": 2892 - }, - { - "epoch": 0.34786268261889014, - "grad_norm": 2.3267572468757183, - "learning_rate": 3.030647354784859e-06, - "loss": 1.0015, - "step": 2893 - }, - { - "epoch": 0.34798292550952925, - "grad_norm": 1.6756624477574422, - "learning_rate": 3.029979701330964e-06, - "loss": 0.9726, - "step": 2894 - }, - { - "epoch": 0.34810316840016836, - "grad_norm": 1.9759982061223524, - "learning_rate": 3.029311891629966e-06, - "loss": 1.0023, - "step": 2895 - }, - { - "epoch": 0.3482234112908074, - "grad_norm": 1.6701844839885294, - "learning_rate": 3.0286439257831744e-06, - "loss": 0.9362, - "step": 2896 - }, - { - "epoch": 0.3483436541814465, - "grad_norm": 2.02849250837583, - "learning_rate": 3.0279758038919156e-06, - "loss": 0.9094, - "step": 2897 - }, - { - "epoch": 0.34846389707208564, - "grad_norm": 2.3382376379042946, - "learning_rate": 3.0273075260575455e-06, - "loss": 0.9802, - "step": 2898 - }, - { - "epoch": 0.3485841399627247, - "grad_norm": 1.9442339745403479, - "learning_rate": 3.0266390923814396e-06, - "loss": 1.0003, - "step": 2899 - }, - { - "epoch": 0.3487043828533638, - "grad_norm": 1.8806797468660679, - "learning_rate": 3.0259705029650008e-06, - "loss": 1.0193, - "step": 2900 - }, - { - "epoch": 0.34882462574400286, - "grad_norm": 1.5746326139150464, - "learning_rate": 3.025301757909652e-06, - "loss": 0.9279, - "step": 2901 - }, - { - "epoch": 0.34894486863464197, - "grad_norm": 1.43123188923534, - "learning_rate": 3.024632857316842e-06, - "loss": 1.0076, - "step": 2902 - }, - { - "epoch": 0.3490651115252811, - "grad_norm": 1.7499694821557783, - "learning_rate": 3.0239638012880412e-06, - "loss": 0.977, - "step": 2903 - }, - { - "epoch": 0.34918535441592014, - "grad_norm": 2.3408610531759204, - "learning_rate": 3.0232945899247466e-06, - "loss": 1.0041, - "step": 2904 - }, - { - "epoch": 0.34930559730655925, - "grad_norm": 1.8744483772102911, - "learning_rate": 3.022625223328476e-06, - "loss": 0.9766, - "step": 2905 - }, - { - "epoch": 0.34942584019719836, - "grad_norm": 1.422025986923573, - "learning_rate": 3.0219557016007723e-06, - "loss": 0.8921, - "step": 2906 - }, - { - "epoch": 0.3495460830878374, - "grad_norm": 2.3868737802554816, - "learning_rate": 3.021286024843202e-06, - "loss": 0.8983, - "step": 2907 - }, - { - "epoch": 0.3496663259784765, - "grad_norm": 1.1913394096260825, - "learning_rate": 3.0206161931573526e-06, - "loss": 0.8993, - "step": 2908 - }, - { - "epoch": 0.34978656886911563, - "grad_norm": 1.5423465295054923, - "learning_rate": 3.0199462066448388e-06, - "loss": 1.1298, - "step": 2909 - }, - { - "epoch": 0.3499068117597547, - "grad_norm": 1.6815663320188985, - "learning_rate": 3.019276065407296e-06, - "loss": 0.8945, - "step": 2910 - }, - { - "epoch": 0.3500270546503938, - "grad_norm": 4.204629961893803, - "learning_rate": 3.018605769546385e-06, - "loss": 1.0105, - "step": 2911 - }, - { - "epoch": 0.3501472975410329, - "grad_norm": 1.8116016792613017, - "learning_rate": 3.017935319163788e-06, - "loss": 1.0048, - "step": 2912 - }, - { - "epoch": 0.35026754043167196, - "grad_norm": 1.732048192197317, - "learning_rate": 3.017264714361213e-06, - "loss": 0.9133, - "step": 2913 - }, - { - "epoch": 0.3503877833223111, - "grad_norm": 1.8500291615842595, - "learning_rate": 3.016593955240389e-06, - "loss": 1.0129, - "step": 2914 - }, - { - "epoch": 0.3505080262129502, - "grad_norm": 0.8379973230250899, - "learning_rate": 3.015923041903071e-06, - "loss": 0.8514, - "step": 2915 - }, - { - "epoch": 0.35062826910358924, - "grad_norm": 1.7838324849465508, - "learning_rate": 3.0152519744510347e-06, - "loss": 1.0306, - "step": 2916 - }, - { - "epoch": 0.35074851199422835, - "grad_norm": 1.7314541252243294, - "learning_rate": 3.014580752986081e-06, - "loss": 1.0304, - "step": 2917 - }, - { - "epoch": 0.3508687548848674, - "grad_norm": 1.779706252537664, - "learning_rate": 3.0139093776100345e-06, - "loss": 0.9859, - "step": 2918 - }, - { - "epoch": 0.3509889977755065, - "grad_norm": 1.7249038116616426, - "learning_rate": 3.013237848424741e-06, - "loss": 0.952, - "step": 2919 - }, - { - "epoch": 0.35110924066614563, - "grad_norm": 1.9327346280322655, - "learning_rate": 3.012566165532072e-06, - "loss": 0.9522, - "step": 2920 - }, - { - "epoch": 0.3512294835567847, - "grad_norm": 2.11084362755921, - "learning_rate": 3.0118943290339207e-06, - "loss": 0.9669, - "step": 2921 - }, - { - "epoch": 0.3513497264474238, - "grad_norm": 1.8804611782782101, - "learning_rate": 3.011222339032204e-06, - "loss": 0.8771, - "step": 2922 - }, - { - "epoch": 0.3514699693380629, - "grad_norm": 1.725435881898816, - "learning_rate": 3.0105501956288626e-06, - "loss": 0.8918, - "step": 2923 - }, - { - "epoch": 0.35159021222870196, - "grad_norm": 1.7894921598942186, - "learning_rate": 3.0098778989258602e-06, - "loss": 0.9424, - "step": 2924 - }, - { - "epoch": 0.35171045511934107, - "grad_norm": 1.828567190569837, - "learning_rate": 3.009205449025183e-06, - "loss": 1.0787, - "step": 2925 - }, - { - "epoch": 0.3518306980099802, - "grad_norm": 2.398164615577292, - "learning_rate": 3.008532846028842e-06, - "loss": 0.8328, - "step": 2926 - }, - { - "epoch": 0.35195094090061924, - "grad_norm": 2.3313606870008483, - "learning_rate": 3.0078600900388694e-06, - "loss": 0.9043, - "step": 2927 - }, - { - "epoch": 0.35207118379125835, - "grad_norm": 1.7459950941919227, - "learning_rate": 3.007187181157323e-06, - "loss": 0.9431, - "step": 2928 - }, - { - "epoch": 0.35219142668189746, - "grad_norm": 2.313162837529439, - "learning_rate": 3.006514119486282e-06, - "loss": 0.8886, - "step": 2929 - }, - { - "epoch": 0.3523116695725365, - "grad_norm": 1.615406474639775, - "learning_rate": 3.005840905127849e-06, - "loss": 0.8847, - "step": 2930 - }, - { - "epoch": 0.3524319124631756, - "grad_norm": 1.9186265365193802, - "learning_rate": 3.0051675381841516e-06, - "loss": 1.0657, - "step": 2931 - }, - { - "epoch": 0.3525521553538147, - "grad_norm": 1.5050606396647257, - "learning_rate": 3.0044940187573363e-06, - "loss": 0.9651, - "step": 2932 - }, - { - "epoch": 0.3526723982444538, - "grad_norm": 1.8265687888553588, - "learning_rate": 3.003820346949578e-06, - "loss": 0.8581, - "step": 2933 - }, - { - "epoch": 0.3527926411350929, - "grad_norm": 2.0512454134316225, - "learning_rate": 3.003146522863071e-06, - "loss": 0.9953, - "step": 2934 - }, - { - "epoch": 0.35291288402573195, - "grad_norm": 1.985586082165959, - "learning_rate": 3.0024725466000345e-06, - "loss": 1.0583, - "step": 2935 - }, - { - "epoch": 0.35303312691637107, - "grad_norm": 1.8707723002967238, - "learning_rate": 3.0017984182627087e-06, - "loss": 0.9933, - "step": 2936 - }, - { - "epoch": 0.3531533698070102, - "grad_norm": 1.9134991497977876, - "learning_rate": 3.00112413795336e-06, - "loss": 1.0286, - "step": 2937 - }, - { - "epoch": 0.35327361269764923, - "grad_norm": 1.9640764137327558, - "learning_rate": 3.000449705774275e-06, - "loss": 1.005, - "step": 2938 - }, - { - "epoch": 0.35339385558828834, - "grad_norm": 2.4119389647547362, - "learning_rate": 2.9997751218277654e-06, - "loss": 0.9146, - "step": 2939 - }, - { - "epoch": 0.35351409847892745, - "grad_norm": 1.7099805969119022, - "learning_rate": 2.999100386216166e-06, - "loss": 0.9747, - "step": 2940 - }, - { - "epoch": 0.3536343413695665, - "grad_norm": 1.7124054764326564, - "learning_rate": 2.998425499041831e-06, - "loss": 0.9494, - "step": 2941 - }, - { - "epoch": 0.3537545842602056, - "grad_norm": 1.8207762950049702, - "learning_rate": 2.997750460407142e-06, - "loss": 0.8253, - "step": 2942 - }, - { - "epoch": 0.35387482715084473, - "grad_norm": 1.9506137420026297, - "learning_rate": 2.997075270414501e-06, - "loss": 0.9102, - "step": 2943 - }, - { - "epoch": 0.3539950700414838, - "grad_norm": 0.7683325013744404, - "learning_rate": 2.9963999291663347e-06, - "loss": 0.7958, - "step": 2944 - }, - { - "epoch": 0.3541153129321229, - "grad_norm": 3.3589177729816275, - "learning_rate": 2.9957244367650915e-06, - "loss": 0.9469, - "step": 2945 - }, - { - "epoch": 0.354235555822762, - "grad_norm": 1.7297064693137856, - "learning_rate": 2.9950487933132425e-06, - "loss": 1.0349, - "step": 2946 - }, - { - "epoch": 0.35435579871340106, - "grad_norm": 1.8340358544092832, - "learning_rate": 2.994372998913283e-06, - "loss": 0.9175, - "step": 2947 - }, - { - "epoch": 0.35447604160404017, - "grad_norm": 2.2084114012924307, - "learning_rate": 2.99369705366773e-06, - "loss": 0.8295, - "step": 2948 - }, - { - "epoch": 0.3545962844946792, - "grad_norm": 2.1926668454652742, - "learning_rate": 2.9930209576791244e-06, - "loss": 1.0194, - "step": 2949 - }, - { - "epoch": 0.35471652738531834, - "grad_norm": 1.7478409800399344, - "learning_rate": 2.9923447110500285e-06, - "loss": 0.8403, - "step": 2950 - }, - { - "epoch": 0.35483677027595745, - "grad_norm": 1.4464381602808756, - "learning_rate": 2.9916683138830295e-06, - "loss": 0.9531, - "step": 2951 - }, - { - "epoch": 0.3549570131665965, - "grad_norm": 4.473790027559782, - "learning_rate": 2.9909917662807353e-06, - "loss": 1.0085, - "step": 2952 - }, - { - "epoch": 0.3550772560572356, - "grad_norm": 2.3418282323015265, - "learning_rate": 2.9903150683457783e-06, - "loss": 0.8989, - "step": 2953 - }, - { - "epoch": 0.3551974989478747, - "grad_norm": 3.1270455341396945, - "learning_rate": 2.9896382201808126e-06, - "loss": 0.8536, - "step": 2954 - }, - { - "epoch": 0.3553177418385138, - "grad_norm": 2.6238963213825715, - "learning_rate": 2.988961221888516e-06, - "loss": 1.01, - "step": 2955 - }, - { - "epoch": 0.3554379847291529, - "grad_norm": 2.500134464462475, - "learning_rate": 2.988284073571589e-06, - "loss": 0.9891, - "step": 2956 - }, - { - "epoch": 0.355558227619792, - "grad_norm": 2.784967060270989, - "learning_rate": 2.9876067753327528e-06, - "loss": 0.9241, - "step": 2957 - }, - { - "epoch": 0.35567847051043106, - "grad_norm": 1.9225385807367785, - "learning_rate": 2.986929327274754e-06, - "loss": 1.0059, - "step": 2958 - }, - { - "epoch": 0.35579871340107017, - "grad_norm": 1.552074364075144, - "learning_rate": 2.9862517295003617e-06, - "loss": 0.9824, - "step": 2959 - }, - { - "epoch": 0.3559189562917093, - "grad_norm": 1.59513689918525, - "learning_rate": 2.9855739821123654e-06, - "loss": 0.9267, - "step": 2960 - }, - { - "epoch": 0.35603919918234833, - "grad_norm": 1.5874430458485325, - "learning_rate": 2.98489608521358e-06, - "loss": 1.015, - "step": 2961 - }, - { - "epoch": 0.35615944207298744, - "grad_norm": 1.9074082529744425, - "learning_rate": 2.9842180389068425e-06, - "loss": 0.9929, - "step": 2962 - }, - { - "epoch": 0.35627968496362655, - "grad_norm": 0.899723724505178, - "learning_rate": 2.98353984329501e-06, - "loss": 0.8185, - "step": 2963 - }, - { - "epoch": 0.3563999278542656, - "grad_norm": 1.628892710970225, - "learning_rate": 2.982861498480965e-06, - "loss": 0.9107, - "step": 2964 - }, - { - "epoch": 0.3565201707449047, - "grad_norm": 1.5300305240054617, - "learning_rate": 2.9821830045676122e-06, - "loss": 1.022, - "step": 2965 - }, - { - "epoch": 0.3566404136355438, - "grad_norm": 2.102488033882045, - "learning_rate": 2.9815043616578793e-06, - "loss": 0.9207, - "step": 2966 - }, - { - "epoch": 0.3567606565261829, - "grad_norm": 1.9027740806426106, - "learning_rate": 2.9808255698547145e-06, - "loss": 0.9772, - "step": 2967 - }, - { - "epoch": 0.356880899416822, - "grad_norm": 2.166869178502284, - "learning_rate": 2.9801466292610913e-06, - "loss": 0.9894, - "step": 2968 - }, - { - "epoch": 0.35700114230746105, - "grad_norm": 1.9161213914646988, - "learning_rate": 2.979467539980003e-06, - "loss": 1.0082, - "step": 2969 - }, - { - "epoch": 0.35712138519810016, - "grad_norm": 1.9533262835734528, - "learning_rate": 2.978788302114468e-06, - "loss": 0.9713, - "step": 2970 - }, - { - "epoch": 0.35724162808873927, - "grad_norm": 1.6520249801021751, - "learning_rate": 2.9781089157675255e-06, - "loss": 1.0191, - "step": 2971 - }, - { - "epoch": 0.3573618709793783, - "grad_norm": 1.390846534827868, - "learning_rate": 2.977429381042238e-06, - "loss": 1.0824, - "step": 2972 - }, - { - "epoch": 0.35748211387001744, - "grad_norm": 2.064445271833455, - "learning_rate": 2.9767496980416913e-06, - "loss": 1.093, - "step": 2973 - }, - { - "epoch": 0.35760235676065655, - "grad_norm": 2.2908586869467897, - "learning_rate": 2.9760698668689914e-06, - "loss": 1.0047, - "step": 2974 - }, - { - "epoch": 0.3577225996512956, - "grad_norm": 1.7675741459079868, - "learning_rate": 2.975389887627269e-06, - "loss": 0.9144, - "step": 2975 - }, - { - "epoch": 0.3578428425419347, - "grad_norm": 1.968419365139986, - "learning_rate": 2.9747097604196764e-06, - "loss": 1.0969, - "step": 2976 - }, - { - "epoch": 0.3579630854325738, - "grad_norm": 0.7590428618917461, - "learning_rate": 2.9740294853493875e-06, - "loss": 0.7928, - "step": 2977 - }, - { - "epoch": 0.3580833283232129, - "grad_norm": 1.8755037266880599, - "learning_rate": 2.9733490625196008e-06, - "loss": 0.8766, - "step": 2978 - }, - { - "epoch": 0.358203571213852, - "grad_norm": 6.054258112294454, - "learning_rate": 2.9726684920335353e-06, - "loss": 0.9555, - "step": 2979 - }, - { - "epoch": 0.35832381410449105, - "grad_norm": 2.2021999716808183, - "learning_rate": 2.971987773994432e-06, - "loss": 1.0221, - "step": 2980 - }, - { - "epoch": 0.35844405699513016, - "grad_norm": 1.896875877317323, - "learning_rate": 2.9713069085055566e-06, - "loss": 1.0278, - "step": 2981 - }, - { - "epoch": 0.35856429988576927, - "grad_norm": 1.5497973801861835, - "learning_rate": 2.9706258956701958e-06, - "loss": 0.9892, - "step": 2982 - }, - { - "epoch": 0.3586845427764083, - "grad_norm": 1.9980013516815776, - "learning_rate": 2.9699447355916575e-06, - "loss": 0.9655, - "step": 2983 - }, - { - "epoch": 0.35880478566704743, - "grad_norm": 1.9202747040172534, - "learning_rate": 2.969263428373275e-06, - "loss": 0.9379, - "step": 2984 - }, - { - "epoch": 0.35892502855768654, - "grad_norm": 1.8284407774941984, - "learning_rate": 2.9685819741184007e-06, - "loss": 0.9894, - "step": 2985 - }, - { - "epoch": 0.3590452714483256, - "grad_norm": 2.9552772389337623, - "learning_rate": 2.967900372930411e-06, - "loss": 0.8887, - "step": 2986 - }, - { - "epoch": 0.3591655143389647, - "grad_norm": 2.585965712471196, - "learning_rate": 2.9672186249127046e-06, - "loss": 0.9962, - "step": 2987 - }, - { - "epoch": 0.3592857572296038, - "grad_norm": 1.868277195856453, - "learning_rate": 2.9665367301687014e-06, - "loss": 0.9793, - "step": 2988 - }, - { - "epoch": 0.3594060001202429, - "grad_norm": 1.8266192372747467, - "learning_rate": 2.965854688801845e-06, - "loss": 0.9596, - "step": 2989 - }, - { - "epoch": 0.359526243010882, - "grad_norm": 2.87839407227411, - "learning_rate": 2.9651725009156005e-06, - "loss": 0.9614, - "step": 2990 - }, - { - "epoch": 0.3596464859015211, - "grad_norm": 1.5646807139535024, - "learning_rate": 2.964490166613454e-06, - "loss": 0.9469, - "step": 2991 - }, - { - "epoch": 0.35976672879216015, - "grad_norm": 0.9145307075346369, - "learning_rate": 2.963807685998917e-06, - "loss": 0.7971, - "step": 2992 - }, - { - "epoch": 0.35988697168279926, - "grad_norm": 1.520445404045989, - "learning_rate": 2.9631250591755196e-06, - "loss": 0.9823, - "step": 2993 - }, - { - "epoch": 0.36000721457343837, - "grad_norm": 2.527636928150829, - "learning_rate": 2.962442286246817e-06, - "loss": 0.7859, - "step": 2994 - }, - { - "epoch": 0.3601274574640774, - "grad_norm": 1.5943233730162685, - "learning_rate": 2.9617593673163853e-06, - "loss": 0.9, - "step": 2995 - }, - { - "epoch": 0.36024770035471654, - "grad_norm": 2.0469310257793483, - "learning_rate": 2.9610763024878216e-06, - "loss": 0.9733, - "step": 2996 - }, - { - "epoch": 0.3603679432453556, - "grad_norm": 1.6741192452129516, - "learning_rate": 2.960393091864747e-06, - "loss": 1.1087, - "step": 2997 - }, - { - "epoch": 0.3604881861359947, - "grad_norm": 1.8390931625208409, - "learning_rate": 2.959709735550804e-06, - "loss": 0.9524, - "step": 2998 - }, - { - "epoch": 0.3606084290266338, - "grad_norm": 2.104699725120513, - "learning_rate": 2.9590262336496575e-06, - "loss": 0.9487, - "step": 2999 - }, - { - "epoch": 0.36072867191727287, - "grad_norm": 1.771712148674101, - "learning_rate": 2.9583425862649936e-06, - "loss": 1.0502, - "step": 3000 - }, - { - "epoch": 0.360848914807912, - "grad_norm": 2.0412110449583145, - "learning_rate": 2.9576587935005215e-06, - "loss": 0.9462, - "step": 3001 - }, - { - "epoch": 0.3609691576985511, - "grad_norm": 2.286190692180327, - "learning_rate": 2.9569748554599713e-06, - "loss": 0.9235, - "step": 3002 - }, - { - "epoch": 0.36108940058919015, - "grad_norm": 2.550581312303171, - "learning_rate": 2.956290772247097e-06, - "loss": 0.9239, - "step": 3003 - }, - { - "epoch": 0.36120964347982926, - "grad_norm": 1.7967653323997947, - "learning_rate": 2.9556065439656724e-06, - "loss": 0.9363, - "step": 3004 - }, - { - "epoch": 0.36132988637046837, - "grad_norm": 1.5662575363379734, - "learning_rate": 2.9549221707194952e-06, - "loss": 1.0196, - "step": 3005 - }, - { - "epoch": 0.3614501292611074, - "grad_norm": 1.9123318068403101, - "learning_rate": 2.954237652612384e-06, - "loss": 0.9388, - "step": 3006 - }, - { - "epoch": 0.36157037215174653, - "grad_norm": 1.8566157279340705, - "learning_rate": 2.9535529897481796e-06, - "loss": 1.0431, - "step": 3007 - }, - { - "epoch": 0.36169061504238564, - "grad_norm": 1.9970387948307569, - "learning_rate": 2.9528681822307446e-06, - "loss": 0.9722, - "step": 3008 - }, - { - "epoch": 0.3618108579330247, - "grad_norm": 2.3003146827043754, - "learning_rate": 2.952183230163964e-06, - "loss": 1.0294, - "step": 3009 - }, - { - "epoch": 0.3619311008236638, - "grad_norm": 1.8771746422352855, - "learning_rate": 2.9514981336517448e-06, - "loss": 0.9354, - "step": 3010 - }, - { - "epoch": 0.36205134371430286, - "grad_norm": 2.1426467814959906, - "learning_rate": 2.950812892798015e-06, - "loss": 1.0114, - "step": 3011 - }, - { - "epoch": 0.362171586604942, - "grad_norm": 1.7901434818494049, - "learning_rate": 2.9501275077067256e-06, - "loss": 1.068, - "step": 3012 - }, - { - "epoch": 0.3622918294955811, - "grad_norm": 3.6765203432194475, - "learning_rate": 2.949441978481848e-06, - "loss": 1.0844, - "step": 3013 - }, - { - "epoch": 0.36241207238622014, - "grad_norm": 1.8003086620171374, - "learning_rate": 2.9487563052273778e-06, - "loss": 0.9954, - "step": 3014 - }, - { - "epoch": 0.36253231527685925, - "grad_norm": 1.6276301592976588, - "learning_rate": 2.94807048804733e-06, - "loss": 1.056, - "step": 3015 - }, - { - "epoch": 0.36265255816749836, - "grad_norm": 2.3838408705579694, - "learning_rate": 2.9473845270457434e-06, - "loss": 1.096, - "step": 3016 - }, - { - "epoch": 0.3627728010581374, - "grad_norm": 2.1045406751954796, - "learning_rate": 2.946698422326677e-06, - "loss": 0.894, - "step": 3017 - }, - { - "epoch": 0.36289304394877653, - "grad_norm": 1.9162500453881783, - "learning_rate": 2.946012173994213e-06, - "loss": 0.9925, - "step": 3018 - }, - { - "epoch": 0.36301328683941564, - "grad_norm": 1.3217996652477761, - "learning_rate": 2.945325782152454e-06, - "loss": 0.8822, - "step": 3019 - }, - { - "epoch": 0.3631335297300547, - "grad_norm": 2.191865325549606, - "learning_rate": 2.9446392469055257e-06, - "loss": 0.9966, - "step": 3020 - }, - { - "epoch": 0.3632537726206938, - "grad_norm": 1.6165619160776283, - "learning_rate": 2.9439525683575745e-06, - "loss": 0.9965, - "step": 3021 - }, - { - "epoch": 0.3633740155113329, - "grad_norm": 2.18520518232105, - "learning_rate": 2.9432657466127694e-06, - "loss": 0.9439, - "step": 3022 - }, - { - "epoch": 0.36349425840197197, - "grad_norm": 1.7686443337733078, - "learning_rate": 2.9425787817753007e-06, - "loss": 0.968, - "step": 3023 - }, - { - "epoch": 0.3636145012926111, - "grad_norm": 1.5654995165580166, - "learning_rate": 2.94189167394938e-06, - "loss": 0.9122, - "step": 3024 - }, - { - "epoch": 0.3637347441832502, - "grad_norm": 1.813154957263127, - "learning_rate": 2.941204423239241e-06, - "loss": 1.0126, - "step": 3025 - }, - { - "epoch": 0.36385498707388925, - "grad_norm": 1.619506132057125, - "learning_rate": 2.9405170297491395e-06, - "loss": 0.9658, - "step": 3026 - }, - { - "epoch": 0.36397522996452836, - "grad_norm": 1.8343872667776537, - "learning_rate": 2.939829493583353e-06, - "loss": 0.9987, - "step": 3027 - }, - { - "epoch": 0.3640954728551674, - "grad_norm": 2.134608815289472, - "learning_rate": 2.939141814846179e-06, - "loss": 1.0284, - "step": 3028 - }, - { - "epoch": 0.3642157157458065, - "grad_norm": 1.7468567275727738, - "learning_rate": 2.938453993641938e-06, - "loss": 1.021, - "step": 3029 - }, - { - "epoch": 0.36433595863644563, - "grad_norm": 2.253360040814885, - "learning_rate": 2.937766030074973e-06, - "loss": 0.906, - "step": 3030 - }, - { - "epoch": 0.3644562015270847, - "grad_norm": 1.622172316272732, - "learning_rate": 2.937077924249646e-06, - "loss": 1.0177, - "step": 3031 - }, - { - "epoch": 0.3645764444177238, - "grad_norm": 2.169603106332485, - "learning_rate": 2.9363896762703443e-06, - "loss": 0.9582, - "step": 3032 - }, - { - "epoch": 0.3646966873083629, - "grad_norm": 1.5713818453368902, - "learning_rate": 2.9357012862414725e-06, - "loss": 1.0382, - "step": 3033 - }, - { - "epoch": 0.36481693019900197, - "grad_norm": 2.0797670230235292, - "learning_rate": 2.9350127542674593e-06, - "loss": 0.9158, - "step": 3034 - }, - { - "epoch": 0.3649371730896411, - "grad_norm": 1.7875749825709686, - "learning_rate": 2.934324080452755e-06, - "loss": 0.9603, - "step": 3035 - }, - { - "epoch": 0.3650574159802802, - "grad_norm": 1.44989220119836, - "learning_rate": 2.9336352649018307e-06, - "loss": 0.9767, - "step": 3036 - }, - { - "epoch": 0.36517765887091924, - "grad_norm": 1.5433713749810316, - "learning_rate": 2.9329463077191783e-06, - "loss": 0.8966, - "step": 3037 - }, - { - "epoch": 0.36529790176155835, - "grad_norm": 3.058376572499912, - "learning_rate": 2.9322572090093135e-06, - "loss": 0.8429, - "step": 3038 - }, - { - "epoch": 0.36541814465219746, - "grad_norm": 2.4968249185860563, - "learning_rate": 2.9315679688767713e-06, - "loss": 0.9584, - "step": 3039 - }, - { - "epoch": 0.3655383875428365, - "grad_norm": 1.5361351406033694, - "learning_rate": 2.9308785874261085e-06, - "loss": 0.8683, - "step": 3040 - }, - { - "epoch": 0.36565863043347563, - "grad_norm": 1.5171004516898603, - "learning_rate": 2.9301890647619045e-06, - "loss": 1.0075, - "step": 3041 - }, - { - "epoch": 0.36577887332411474, - "grad_norm": 2.0388935355267197, - "learning_rate": 2.929499400988759e-06, - "loss": 1.006, - "step": 3042 - }, - { - "epoch": 0.3658991162147538, - "grad_norm": 1.7632009018899166, - "learning_rate": 2.9288095962112927e-06, - "loss": 0.8539, - "step": 3043 - }, - { - "epoch": 0.3660193591053929, - "grad_norm": 1.6779854601024364, - "learning_rate": 2.9281196505341503e-06, - "loss": 1.0458, - "step": 3044 - }, - { - "epoch": 0.36613960199603196, - "grad_norm": 2.0693589803754797, - "learning_rate": 2.9274295640619946e-06, - "loss": 0.9873, - "step": 3045 - }, - { - "epoch": 0.36625984488667107, - "grad_norm": 1.755165990060252, - "learning_rate": 2.9267393368995103e-06, - "loss": 0.9812, - "step": 3046 - }, - { - "epoch": 0.3663800877773102, - "grad_norm": 2.179429343191215, - "learning_rate": 2.926048969151407e-06, - "loss": 0.9511, - "step": 3047 - }, - { - "epoch": 0.36650033066794924, - "grad_norm": 1.720404504850193, - "learning_rate": 2.92535846092241e-06, - "loss": 0.8833, - "step": 3048 - }, - { - "epoch": 0.36662057355858835, - "grad_norm": 1.4518188020337106, - "learning_rate": 2.9246678123172704e-06, - "loss": 1.0247, - "step": 3049 - }, - { - "epoch": 0.36674081644922746, - "grad_norm": 2.119360452667861, - "learning_rate": 2.9239770234407596e-06, - "loss": 0.9449, - "step": 3050 - }, - { - "epoch": 0.3668610593398665, - "grad_norm": 1.815316806708645, - "learning_rate": 2.9232860943976686e-06, - "loss": 0.8836, - "step": 3051 - }, - { - "epoch": 0.3669813022305056, - "grad_norm": 1.5158176103097694, - "learning_rate": 2.9225950252928115e-06, - "loss": 1.0417, - "step": 3052 - }, - { - "epoch": 0.36710154512114473, - "grad_norm": 1.9534350339867552, - "learning_rate": 2.9219038162310217e-06, - "loss": 1.0181, - "step": 3053 - }, - { - "epoch": 0.3672217880117838, - "grad_norm": 1.7733927985787998, - "learning_rate": 2.921212467317157e-06, - "loss": 1.0219, - "step": 3054 - }, - { - "epoch": 0.3673420309024229, - "grad_norm": 2.0891096019927207, - "learning_rate": 2.920520978656093e-06, - "loss": 1.0143, - "step": 3055 - }, - { - "epoch": 0.367462273793062, - "grad_norm": 1.9377705785027517, - "learning_rate": 2.919829350352729e-06, - "loss": 0.9669, - "step": 3056 - }, - { - "epoch": 0.36758251668370107, - "grad_norm": 0.8710148181136562, - "learning_rate": 2.919137582511983e-06, - "loss": 0.8156, - "step": 3057 - }, - { - "epoch": 0.3677027595743402, - "grad_norm": 2.28746951140217, - "learning_rate": 2.918445675238797e-06, - "loss": 0.8376, - "step": 3058 - }, - { - "epoch": 0.36782300246497923, - "grad_norm": 1.7100547011030878, - "learning_rate": 2.917753628638132e-06, - "loss": 0.9006, - "step": 3059 - }, - { - "epoch": 0.36794324535561834, - "grad_norm": 1.9562444571029576, - "learning_rate": 2.9170614428149716e-06, - "loss": 0.9025, - "step": 3060 - }, - { - "epoch": 0.36806348824625745, - "grad_norm": 2.3979943080322315, - "learning_rate": 2.9163691178743195e-06, - "loss": 1.0666, - "step": 3061 - }, - { - "epoch": 0.3681837311368965, - "grad_norm": 1.7438294067897644, - "learning_rate": 2.9156766539212006e-06, - "loss": 0.9822, - "step": 3062 - }, - { - "epoch": 0.3683039740275356, - "grad_norm": 1.9702000802962973, - "learning_rate": 2.9149840510606614e-06, - "loss": 0.9144, - "step": 3063 - }, - { - "epoch": 0.36842421691817473, - "grad_norm": 1.244585661288369, - "learning_rate": 2.914291309397769e-06, - "loss": 0.8809, - "step": 3064 - }, - { - "epoch": 0.3685444598088138, - "grad_norm": 1.9120013291222648, - "learning_rate": 2.9135984290376117e-06, - "loss": 0.9846, - "step": 3065 - }, - { - "epoch": 0.3686647026994529, - "grad_norm": 1.7845803965988893, - "learning_rate": 2.9129054100853e-06, - "loss": 1.0311, - "step": 3066 - }, - { - "epoch": 0.368784945590092, - "grad_norm": 1.7485260205534867, - "learning_rate": 2.912212252645963e-06, - "loss": 0.9558, - "step": 3067 - }, - { - "epoch": 0.36890518848073106, - "grad_norm": 1.9730375318017124, - "learning_rate": 2.9115189568247523e-06, - "loss": 0.9572, - "step": 3068 - }, - { - "epoch": 0.36902543137137017, - "grad_norm": 1.844497399865766, - "learning_rate": 2.910825522726841e-06, - "loss": 1.1121, - "step": 3069 - }, - { - "epoch": 0.3691456742620093, - "grad_norm": 1.9260123773812898, - "learning_rate": 2.9101319504574215e-06, - "loss": 0.967, - "step": 3070 - }, - { - "epoch": 0.36926591715264834, - "grad_norm": 1.6584488290155759, - "learning_rate": 2.909438240121709e-06, - "loss": 0.9548, - "step": 3071 - }, - { - "epoch": 0.36938616004328745, - "grad_norm": 1.7452035294463206, - "learning_rate": 2.908744391824939e-06, - "loss": 0.9013, - "step": 3072 - }, - { - "epoch": 0.36950640293392656, - "grad_norm": 1.6109740082001434, - "learning_rate": 2.908050405672367e-06, - "loss": 0.9831, - "step": 3073 - }, - { - "epoch": 0.3696266458245656, - "grad_norm": 1.6781861448603212, - "learning_rate": 2.9073562817692703e-06, - "loss": 0.9833, - "step": 3074 - }, - { - "epoch": 0.3697468887152047, - "grad_norm": 0.895822051812632, - "learning_rate": 2.9066620202209468e-06, - "loss": 0.8056, - "step": 3075 - }, - { - "epoch": 0.3698671316058438, - "grad_norm": 2.124876243129115, - "learning_rate": 2.905967621132716e-06, - "loss": 0.9853, - "step": 3076 - }, - { - "epoch": 0.3699873744964829, - "grad_norm": 1.810439023259192, - "learning_rate": 2.9052730846099172e-06, - "loss": 0.9436, - "step": 3077 - }, - { - "epoch": 0.370107617387122, - "grad_norm": 1.0317154036448433, - "learning_rate": 2.9045784107579123e-06, - "loss": 0.8443, - "step": 3078 - }, - { - "epoch": 0.37022786027776106, - "grad_norm": 1.961186245039449, - "learning_rate": 2.9038835996820807e-06, - "loss": 0.8721, - "step": 3079 - }, - { - "epoch": 0.37034810316840017, - "grad_norm": 1.694146735541169, - "learning_rate": 2.903188651487826e-06, - "loss": 0.991, - "step": 3080 - }, - { - "epoch": 0.3704683460590393, - "grad_norm": 2.4361401212446987, - "learning_rate": 2.902493566280571e-06, - "loss": 1.0679, - "step": 3081 - }, - { - "epoch": 0.37058858894967833, - "grad_norm": 1.705799487656695, - "learning_rate": 2.9017983441657595e-06, - "loss": 1.0123, - "step": 3082 - }, - { - "epoch": 0.37070883184031744, - "grad_norm": 2.321969927032005, - "learning_rate": 2.9011029852488564e-06, - "loss": 0.9552, - "step": 3083 - }, - { - "epoch": 0.37082907473095655, - "grad_norm": 1.2048293588041274, - "learning_rate": 2.9004074896353465e-06, - "loss": 0.8738, - "step": 3084 - }, - { - "epoch": 0.3709493176215956, - "grad_norm": 1.7446590392155297, - "learning_rate": 2.8997118574307362e-06, - "loss": 1.0135, - "step": 3085 - }, - { - "epoch": 0.3710695605122347, - "grad_norm": 2.026901520288842, - "learning_rate": 2.899016088740553e-06, - "loss": 0.9392, - "step": 3086 - }, - { - "epoch": 0.37118980340287383, - "grad_norm": 8.92429384488671, - "learning_rate": 2.898320183670344e-06, - "loss": 0.9971, - "step": 3087 - }, - { - "epoch": 0.3713100462935129, - "grad_norm": 1.7365712874147128, - "learning_rate": 2.8976241423256767e-06, - "loss": 1.0817, - "step": 3088 - }, - { - "epoch": 0.371430289184152, - "grad_norm": 2.8936323038969984, - "learning_rate": 2.896927964812142e-06, - "loss": 0.8766, - "step": 3089 - }, - { - "epoch": 0.37155053207479105, - "grad_norm": 3.03443693967984, - "learning_rate": 2.8962316512353465e-06, - "loss": 0.9519, - "step": 3090 - }, - { - "epoch": 0.37167077496543016, - "grad_norm": 1.528812180609015, - "learning_rate": 2.8955352017009233e-06, - "loss": 0.9493, - "step": 3091 - }, - { - "epoch": 0.3717910178560693, - "grad_norm": 1.9590806414454407, - "learning_rate": 2.8948386163145212e-06, - "loss": 0.9746, - "step": 3092 - }, - { - "epoch": 0.3719112607467083, - "grad_norm": 1.6290375361605303, - "learning_rate": 2.8941418951818135e-06, - "loss": 0.9916, - "step": 3093 - }, - { - "epoch": 0.37203150363734744, - "grad_norm": 2.0735764595617594, - "learning_rate": 2.8934450384084903e-06, - "loss": 0.9141, - "step": 3094 - }, - { - "epoch": 0.37215174652798655, - "grad_norm": 2.0647635321274724, - "learning_rate": 2.8927480461002653e-06, - "loss": 0.9116, - "step": 3095 - }, - { - "epoch": 0.3722719894186256, - "grad_norm": 2.424252038813801, - "learning_rate": 2.892050918362872e-06, - "loss": 1.0642, - "step": 3096 - }, - { - "epoch": 0.3723922323092647, - "grad_norm": 0.9887799599179976, - "learning_rate": 2.8913536553020626e-06, - "loss": 0.8085, - "step": 3097 - }, - { - "epoch": 0.3725124751999038, - "grad_norm": 3.938091536396233, - "learning_rate": 2.8906562570236137e-06, - "loss": 1.0504, - "step": 3098 - }, - { - "epoch": 0.3726327180905429, - "grad_norm": 1.5838245165365161, - "learning_rate": 2.889958723633318e-06, - "loss": 0.9663, - "step": 3099 - }, - { - "epoch": 0.372752960981182, - "grad_norm": 1.6548386084241056, - "learning_rate": 2.889261055236992e-06, - "loss": 0.9406, - "step": 3100 - }, - { - "epoch": 0.3728732038718211, - "grad_norm": 1.632097735769523, - "learning_rate": 2.8885632519404704e-06, - "loss": 1.02, - "step": 3101 - }, - { - "epoch": 0.37299344676246016, - "grad_norm": 1.9254671347445018, - "learning_rate": 2.8878653138496107e-06, - "loss": 0.9465, - "step": 3102 - }, - { - "epoch": 0.37311368965309927, - "grad_norm": 2.324323892218707, - "learning_rate": 2.8871672410702878e-06, - "loss": 0.9557, - "step": 3103 - }, - { - "epoch": 0.3732339325437384, - "grad_norm": 1.621934272991335, - "learning_rate": 2.8864690337084008e-06, - "loss": 1.014, - "step": 3104 - }, - { - "epoch": 0.37335417543437743, - "grad_norm": 1.5988656761667612, - "learning_rate": 2.885770691869866e-06, - "loss": 0.9749, - "step": 3105 - }, - { - "epoch": 0.37347441832501654, - "grad_norm": 2.377491798266395, - "learning_rate": 2.8850722156606207e-06, - "loss": 0.9456, - "step": 3106 - }, - { - "epoch": 0.3735946612156556, - "grad_norm": 1.9324186202824383, - "learning_rate": 2.8843736051866252e-06, - "loss": 0.8704, - "step": 3107 - }, - { - "epoch": 0.3737149041062947, - "grad_norm": 1.5100460606301416, - "learning_rate": 2.8836748605538557e-06, - "loss": 0.8936, - "step": 3108 - }, - { - "epoch": 0.3738351469969338, - "grad_norm": 1.93192158845566, - "learning_rate": 2.882975981868313e-06, - "loss": 0.8317, - "step": 3109 - }, - { - "epoch": 0.3739553898875729, - "grad_norm": 2.059062057590017, - "learning_rate": 2.882276969236016e-06, - "loss": 0.8864, - "step": 3110 - }, - { - "epoch": 0.374075632778212, - "grad_norm": 6.119207640572225, - "learning_rate": 2.881577822763005e-06, - "loss": 0.9666, - "step": 3111 - }, - { - "epoch": 0.3741958756688511, - "grad_norm": 1.7657293778346361, - "learning_rate": 2.880878542555338e-06, - "loss": 1.0765, - "step": 3112 - }, - { - "epoch": 0.37431611855949015, - "grad_norm": 2.2510636252821374, - "learning_rate": 2.8801791287190976e-06, - "loss": 1.0106, - "step": 3113 - }, - { - "epoch": 0.37443636145012926, - "grad_norm": 4.010646004332758, - "learning_rate": 2.8794795813603817e-06, - "loss": 1.0623, - "step": 3114 - }, - { - "epoch": 0.3745566043407684, - "grad_norm": 1.7456405700142184, - "learning_rate": 2.878779900585314e-06, - "loss": 1.0158, - "step": 3115 - }, - { - "epoch": 0.37467684723140743, - "grad_norm": 1.5578540394715192, - "learning_rate": 2.8780800865000336e-06, - "loss": 0.9589, - "step": 3116 - }, - { - "epoch": 0.37479709012204654, - "grad_norm": 1.2045184896339771, - "learning_rate": 2.877380139210702e-06, - "loss": 0.8527, - "step": 3117 - }, - { - "epoch": 0.37491733301268565, - "grad_norm": 1.5228611565043433, - "learning_rate": 2.876680058823501e-06, - "loss": 0.9511, - "step": 3118 - }, - { - "epoch": 0.3750375759033247, - "grad_norm": 1.9935922494501905, - "learning_rate": 2.8759798454446314e-06, - "loss": 0.8635, - "step": 3119 - }, - { - "epoch": 0.3751578187939638, - "grad_norm": 1.7550420333306334, - "learning_rate": 2.8752794991803173e-06, - "loss": 1.0163, - "step": 3120 - }, - { - "epoch": 0.37527806168460287, - "grad_norm": 1.9715828269531, - "learning_rate": 2.8745790201367976e-06, - "loss": 0.9536, - "step": 3121 - }, - { - "epoch": 0.375398304575242, - "grad_norm": 1.8346177572497395, - "learning_rate": 2.8738784084203373e-06, - "loss": 1.0505, - "step": 3122 - }, - { - "epoch": 0.3755185474658811, - "grad_norm": 1.6730808650947628, - "learning_rate": 2.873177664137216e-06, - "loss": 0.9885, - "step": 3123 - }, - { - "epoch": 0.37563879035652015, - "grad_norm": 1.5142868269481837, - "learning_rate": 2.8724767873937384e-06, - "loss": 0.8926, - "step": 3124 - }, - { - "epoch": 0.37575903324715926, - "grad_norm": 1.9004746797797352, - "learning_rate": 2.871775778296225e-06, - "loss": 1.0754, - "step": 3125 - }, - { - "epoch": 0.37587927613779837, - "grad_norm": 2.057106363828096, - "learning_rate": 2.8710746369510196e-06, - "loss": 0.982, - "step": 3126 - }, - { - "epoch": 0.3759995190284374, - "grad_norm": 2.4197831502811913, - "learning_rate": 2.8703733634644846e-06, - "loss": 1.0339, - "step": 3127 - }, - { - "epoch": 0.37611976191907653, - "grad_norm": 1.508674809842813, - "learning_rate": 2.869671957943002e-06, - "loss": 0.9919, - "step": 3128 - }, - { - "epoch": 0.37624000480971564, - "grad_norm": 1.7334144613035776, - "learning_rate": 2.8689704204929747e-06, - "loss": 0.9457, - "step": 3129 - }, - { - "epoch": 0.3763602477003547, - "grad_norm": 2.043148229822271, - "learning_rate": 2.8682687512208253e-06, - "loss": 1.0009, - "step": 3130 - }, - { - "epoch": 0.3764804905909938, - "grad_norm": 1.7878409800722308, - "learning_rate": 2.8675669502329972e-06, - "loss": 1.0031, - "step": 3131 - }, - { - "epoch": 0.3766007334816329, - "grad_norm": 1.9748304414998672, - "learning_rate": 2.866865017635952e-06, - "loss": 1.0436, - "step": 3132 - }, - { - "epoch": 0.376720976372272, - "grad_norm": 1.6405556800366567, - "learning_rate": 2.866162953536174e-06, - "loss": 0.9902, - "step": 3133 - }, - { - "epoch": 0.3768412192629111, - "grad_norm": 1.696108582799461, - "learning_rate": 2.8654607580401634e-06, - "loss": 0.9474, - "step": 3134 - }, - { - "epoch": 0.3769614621535502, - "grad_norm": 1.037813626420565, - "learning_rate": 2.8647584312544446e-06, - "loss": 0.8801, - "step": 3135 - }, - { - "epoch": 0.37708170504418925, - "grad_norm": 1.4459243716436845, - "learning_rate": 2.864055973285559e-06, - "loss": 1.0522, - "step": 3136 - }, - { - "epoch": 0.37720194793482836, - "grad_norm": 2.060480342712907, - "learning_rate": 2.8633533842400698e-06, - "loss": 1.0636, - "step": 3137 - }, - { - "epoch": 0.3773221908254674, - "grad_norm": 2.477272774200821, - "learning_rate": 2.862650664224558e-06, - "loss": 0.9832, - "step": 3138 - }, - { - "epoch": 0.37744243371610653, - "grad_norm": 1.2952327671520787, - "learning_rate": 2.861947813345627e-06, - "loss": 0.9009, - "step": 3139 - }, - { - "epoch": 0.37756267660674564, - "grad_norm": 2.1077887328033063, - "learning_rate": 2.8612448317098974e-06, - "loss": 0.9196, - "step": 3140 - }, - { - "epoch": 0.3776829194973847, - "grad_norm": 1.9797087345324271, - "learning_rate": 2.8605417194240114e-06, - "loss": 1.0346, - "step": 3141 - }, - { - "epoch": 0.3778031623880238, - "grad_norm": 1.5914076253129412, - "learning_rate": 2.8598384765946315e-06, - "loss": 0.9875, - "step": 3142 - }, - { - "epoch": 0.3779234052786629, - "grad_norm": 1.8169901903902748, - "learning_rate": 2.8591351033284377e-06, - "loss": 0.9163, - "step": 3143 - }, - { - "epoch": 0.37804364816930197, - "grad_norm": 1.9318729025694545, - "learning_rate": 2.8584315997321325e-06, - "loss": 1.0438, - "step": 3144 - }, - { - "epoch": 0.3781638910599411, - "grad_norm": 2.284757034773452, - "learning_rate": 2.8577279659124356e-06, - "loss": 0.9787, - "step": 3145 - }, - { - "epoch": 0.3782841339505802, - "grad_norm": 1.7146044588081517, - "learning_rate": 2.857024201976089e-06, - "loss": 1.0268, - "step": 3146 - }, - { - "epoch": 0.37840437684121925, - "grad_norm": 2.102189661770306, - "learning_rate": 2.8563203080298516e-06, - "loss": 0.9377, - "step": 3147 - }, - { - "epoch": 0.37852461973185836, - "grad_norm": 2.2681074551418563, - "learning_rate": 2.855616284180505e-06, - "loss": 1.0913, - "step": 3148 - }, - { - "epoch": 0.37864486262249747, - "grad_norm": 1.0300425772523962, - "learning_rate": 2.8549121305348477e-06, - "loss": 0.9242, - "step": 3149 - }, - { - "epoch": 0.3787651055131365, - "grad_norm": 2.0770977033795894, - "learning_rate": 2.8542078471997006e-06, - "loss": 1.0266, - "step": 3150 - }, - { - "epoch": 0.37888534840377563, - "grad_norm": 1.598275584562238, - "learning_rate": 2.8535034342819013e-06, - "loss": 0.9524, - "step": 3151 - }, - { - "epoch": 0.37900559129441475, - "grad_norm": 1.3621165374663815, - "learning_rate": 2.85279889188831e-06, - "loss": 0.9171, - "step": 3152 - }, - { - "epoch": 0.3791258341850538, - "grad_norm": 1.6723839528831907, - "learning_rate": 2.852094220125805e-06, - "loss": 1.0095, - "step": 3153 - }, - { - "epoch": 0.3792460770756929, - "grad_norm": 1.9213091629836712, - "learning_rate": 2.8513894191012846e-06, - "loss": 0.9073, - "step": 3154 - }, - { - "epoch": 0.37936631996633197, - "grad_norm": 1.4458452222410414, - "learning_rate": 2.8506844889216664e-06, - "loss": 0.9839, - "step": 3155 - }, - { - "epoch": 0.3794865628569711, - "grad_norm": 0.9837809389864626, - "learning_rate": 2.849979429693887e-06, - "loss": 0.8644, - "step": 3156 - }, - { - "epoch": 0.3796068057476102, - "grad_norm": 2.012290147537978, - "learning_rate": 2.8492742415249042e-06, - "loss": 0.9426, - "step": 3157 - }, - { - "epoch": 0.37972704863824924, - "grad_norm": 1.5578622272478697, - "learning_rate": 2.848568924521694e-06, - "loss": 0.9587, - "step": 3158 - }, - { - "epoch": 0.37984729152888835, - "grad_norm": 1.7321829475179216, - "learning_rate": 2.8478634787912526e-06, - "loss": 0.9279, - "step": 3159 - }, - { - "epoch": 0.37996753441952746, - "grad_norm": 2.1413247851101778, - "learning_rate": 2.847157904440596e-06, - "loss": 0.9693, - "step": 3160 - }, - { - "epoch": 0.3800877773101665, - "grad_norm": 1.5226355373643343, - "learning_rate": 2.846452201576759e-06, - "loss": 0.9482, - "step": 3161 - }, - { - "epoch": 0.38020802020080563, - "grad_norm": 1.0202220461885014, - "learning_rate": 2.845746370306795e-06, - "loss": 0.8505, - "step": 3162 - }, - { - "epoch": 0.38032826309144474, - "grad_norm": 2.1507003796688906, - "learning_rate": 2.84504041073778e-06, - "loss": 0.9837, - "step": 3163 - }, - { - "epoch": 0.3804485059820838, - "grad_norm": 1.9162376034130983, - "learning_rate": 2.844334322976806e-06, - "loss": 1.0028, - "step": 3164 - }, - { - "epoch": 0.3805687488727229, - "grad_norm": 1.8126775720363437, - "learning_rate": 2.8436281071309866e-06, - "loss": 1.0374, - "step": 3165 - }, - { - "epoch": 0.380688991763362, - "grad_norm": 1.0875473713697001, - "learning_rate": 2.842921763307455e-06, - "loss": 0.7504, - "step": 3166 - }, - { - "epoch": 0.38080923465400107, - "grad_norm": 1.7761937036257287, - "learning_rate": 2.842215291613361e-06, - "loss": 1.0291, - "step": 3167 - }, - { - "epoch": 0.3809294775446402, - "grad_norm": 0.9493406868937359, - "learning_rate": 2.8415086921558774e-06, - "loss": 0.8369, - "step": 3168 - }, - { - "epoch": 0.38104972043527924, - "grad_norm": 1.7075730779824303, - "learning_rate": 2.840801965042194e-06, - "loss": 0.9796, - "step": 3169 - }, - { - "epoch": 0.38116996332591835, - "grad_norm": 1.6663857302719058, - "learning_rate": 2.840095110379521e-06, - "loss": 1.0376, - "step": 3170 - }, - { - "epoch": 0.38129020621655746, - "grad_norm": 1.0931943299351308, - "learning_rate": 2.8393881282750884e-06, - "loss": 0.7499, - "step": 3171 - }, - { - "epoch": 0.3814104491071965, - "grad_norm": 1.9035917414448205, - "learning_rate": 2.838681018836144e-06, - "loss": 0.9771, - "step": 3172 - }, - { - "epoch": 0.3815306919978356, - "grad_norm": 2.6194241659710618, - "learning_rate": 2.837973782169955e-06, - "loss": 0.9772, - "step": 3173 - }, - { - "epoch": 0.38165093488847474, - "grad_norm": 1.1577377027477542, - "learning_rate": 2.8372664183838096e-06, - "loss": 0.8216, - "step": 3174 - }, - { - "epoch": 0.3817711777791138, - "grad_norm": 2.294223214794445, - "learning_rate": 2.836558927585015e-06, - "loss": 0.886, - "step": 3175 - }, - { - "epoch": 0.3818914206697529, - "grad_norm": 1.76521985292212, - "learning_rate": 2.8358513098808957e-06, - "loss": 1.0177, - "step": 3176 - }, - { - "epoch": 0.382011663560392, - "grad_norm": 1.84763773289892, - "learning_rate": 2.835143565378798e-06, - "loss": 0.9707, - "step": 3177 - }, - { - "epoch": 0.38213190645103107, - "grad_norm": 1.9435056085056635, - "learning_rate": 2.8344356941860847e-06, - "loss": 0.983, - "step": 3178 - }, - { - "epoch": 0.3822521493416702, - "grad_norm": 3.0760332930093828, - "learning_rate": 2.8337276964101403e-06, - "loss": 0.8642, - "step": 3179 - }, - { - "epoch": 0.3823723922323093, - "grad_norm": 1.723091399232215, - "learning_rate": 2.833019572158367e-06, - "loss": 0.955, - "step": 3180 - }, - { - "epoch": 0.38249263512294834, - "grad_norm": 1.710699613572528, - "learning_rate": 2.8323113215381872e-06, - "loss": 1.0077, - "step": 3181 - }, - { - "epoch": 0.38261287801358745, - "grad_norm": 1.9838437548225878, - "learning_rate": 2.831602944657042e-06, - "loss": 0.9567, - "step": 3182 - }, - { - "epoch": 0.38273312090422656, - "grad_norm": 2.066136708554761, - "learning_rate": 2.830894441622391e-06, - "loss": 0.9422, - "step": 3183 - }, - { - "epoch": 0.3828533637948656, - "grad_norm": 1.9581056489545325, - "learning_rate": 2.8301858125417134e-06, - "loss": 0.996, - "step": 3184 - }, - { - "epoch": 0.38297360668550473, - "grad_norm": 2.2169719610097283, - "learning_rate": 2.8294770575225082e-06, - "loss": 0.9392, - "step": 3185 - }, - { - "epoch": 0.3830938495761438, - "grad_norm": 1.5228209984297782, - "learning_rate": 2.828768176672293e-06, - "loss": 1.0398, - "step": 3186 - }, - { - "epoch": 0.3832140924667829, - "grad_norm": 1.8990986618558734, - "learning_rate": 2.8280591700986044e-06, - "loss": 0.9283, - "step": 3187 - }, - { - "epoch": 0.383334335357422, - "grad_norm": 1.7159185495408713, - "learning_rate": 2.827350037908999e-06, - "loss": 0.9591, - "step": 3188 - }, - { - "epoch": 0.38345457824806106, - "grad_norm": 1.99018340190567, - "learning_rate": 2.8266407802110496e-06, - "loss": 0.9868, - "step": 3189 - }, - { - "epoch": 0.3835748211387002, - "grad_norm": 1.8080308998218735, - "learning_rate": 2.8259313971123515e-06, - "loss": 0.953, - "step": 3190 - }, - { - "epoch": 0.3836950640293393, - "grad_norm": 1.4824577397567849, - "learning_rate": 2.8252218887205166e-06, - "loss": 0.9774, - "step": 3191 - }, - { - "epoch": 0.38381530691997834, - "grad_norm": 1.5064582870638843, - "learning_rate": 2.824512255143178e-06, - "loss": 1.0091, - "step": 3192 - }, - { - "epoch": 0.38393554981061745, - "grad_norm": 1.6938367476542293, - "learning_rate": 2.8238024964879855e-06, - "loss": 0.9896, - "step": 3193 - }, - { - "epoch": 0.38405579270125656, - "grad_norm": 1.9469598742456424, - "learning_rate": 2.8230926128626095e-06, - "loss": 0.9655, - "step": 3194 - }, - { - "epoch": 0.3841760355918956, - "grad_norm": 1.9795893836753633, - "learning_rate": 2.822382604374738e-06, - "loss": 0.9812, - "step": 3195 - }, - { - "epoch": 0.3842962784825347, - "grad_norm": 2.017055388363885, - "learning_rate": 2.8216724711320793e-06, - "loss": 0.855, - "step": 3196 - }, - { - "epoch": 0.38441652137317384, - "grad_norm": 1.5043106129984056, - "learning_rate": 2.820962213242361e-06, - "loss": 1.0015, - "step": 3197 - }, - { - "epoch": 0.3845367642638129, - "grad_norm": 2.45949034753601, - "learning_rate": 2.8202518308133264e-06, - "loss": 1.0489, - "step": 3198 - }, - { - "epoch": 0.384657007154452, - "grad_norm": 1.6952933736680311, - "learning_rate": 2.8195413239527426e-06, - "loss": 0.9369, - "step": 3199 - }, - { - "epoch": 0.38477725004509106, - "grad_norm": 1.7680127363005125, - "learning_rate": 2.8188306927683906e-06, - "loss": 1.0123, - "step": 3200 - }, - { - "epoch": 0.38489749293573017, - "grad_norm": 1.8602414076024423, - "learning_rate": 2.818119937368074e-06, - "loss": 0.9513, - "step": 3201 - }, - { - "epoch": 0.3850177358263693, - "grad_norm": 3.084216292142746, - "learning_rate": 2.817409057859613e-06, - "loss": 0.851, - "step": 3202 - }, - { - "epoch": 0.38513797871700833, - "grad_norm": 1.692089057517528, - "learning_rate": 2.8166980543508482e-06, - "loss": 0.9871, - "step": 3203 - }, - { - "epoch": 0.38525822160764744, - "grad_norm": 1.6557088723688393, - "learning_rate": 2.815986926949638e-06, - "loss": 0.9955, - "step": 3204 - }, - { - "epoch": 0.38537846449828655, - "grad_norm": 2.2572312184261274, - "learning_rate": 2.8152756757638597e-06, - "loss": 0.9993, - "step": 3205 - }, - { - "epoch": 0.3854987073889256, - "grad_norm": 1.9032698302133468, - "learning_rate": 2.8145643009014093e-06, - "loss": 1.0406, - "step": 3206 - }, - { - "epoch": 0.3856189502795647, - "grad_norm": 1.7149453795607337, - "learning_rate": 2.813852802470202e-06, - "loss": 0.9895, - "step": 3207 - }, - { - "epoch": 0.38573919317020383, - "grad_norm": 1.9932672902822224, - "learning_rate": 2.8131411805781717e-06, - "loss": 0.9156, - "step": 3208 - }, - { - "epoch": 0.3858594360608429, - "grad_norm": 2.882098515284764, - "learning_rate": 2.8124294353332707e-06, - "loss": 0.8382, - "step": 3209 - }, - { - "epoch": 0.385979678951482, - "grad_norm": 1.6378886336776888, - "learning_rate": 2.8117175668434713e-06, - "loss": 0.9756, - "step": 3210 - }, - { - "epoch": 0.3860999218421211, - "grad_norm": 2.054744124992076, - "learning_rate": 2.811005575216762e-06, - "loss": 0.8951, - "step": 3211 - }, - { - "epoch": 0.38622016473276016, - "grad_norm": 1.378742154220987, - "learning_rate": 2.8102934605611513e-06, - "loss": 0.9833, - "step": 3212 - }, - { - "epoch": 0.3863404076233993, - "grad_norm": 5.829797745037719, - "learning_rate": 2.8095812229846665e-06, - "loss": 0.8722, - "step": 3213 - }, - { - "epoch": 0.3864606505140384, - "grad_norm": 2.1087308677253147, - "learning_rate": 2.808868862595355e-06, - "loss": 0.8909, - "step": 3214 - }, - { - "epoch": 0.38658089340467744, - "grad_norm": 1.9112745260756967, - "learning_rate": 2.8081563795012795e-06, - "loss": 0.9979, - "step": 3215 - }, - { - "epoch": 0.38670113629531655, - "grad_norm": 1.6692941300010777, - "learning_rate": 2.807443773810524e-06, - "loss": 0.9397, - "step": 3216 - }, - { - "epoch": 0.3868213791859556, - "grad_norm": 4.20693830778835, - "learning_rate": 2.80673104563119e-06, - "loss": 1.0926, - "step": 3217 - }, - { - "epoch": 0.3869416220765947, - "grad_norm": 1.7005956924419812, - "learning_rate": 2.8060181950713976e-06, - "loss": 0.979, - "step": 3218 - }, - { - "epoch": 0.3870618649672338, - "grad_norm": 1.7633715402373566, - "learning_rate": 2.805305222239286e-06, - "loss": 1.0099, - "step": 3219 - }, - { - "epoch": 0.3871821078578729, - "grad_norm": 1.8614584525915392, - "learning_rate": 2.8045921272430118e-06, - "loss": 0.9403, - "step": 3220 - }, - { - "epoch": 0.387302350748512, - "grad_norm": 2.36201087288288, - "learning_rate": 2.803878910190753e-06, - "loss": 0.9601, - "step": 3221 - }, - { - "epoch": 0.3874225936391511, - "grad_norm": 2.106229796723046, - "learning_rate": 2.8031655711907017e-06, - "loss": 1.0159, - "step": 3222 - }, - { - "epoch": 0.38754283652979016, - "grad_norm": 2.350844264049556, - "learning_rate": 2.8024521103510723e-06, - "loss": 1.0125, - "step": 3223 - }, - { - "epoch": 0.38766307942042927, - "grad_norm": 1.5593817303651805, - "learning_rate": 2.8017385277800952e-06, - "loss": 0.9524, - "step": 3224 - }, - { - "epoch": 0.3877833223110684, - "grad_norm": 2.8573374341376647, - "learning_rate": 2.8010248235860213e-06, - "loss": 0.9443, - "step": 3225 - }, - { - "epoch": 0.38790356520170743, - "grad_norm": 0.9458714873786959, - "learning_rate": 2.8003109978771192e-06, - "loss": 0.8631, - "step": 3226 - }, - { - "epoch": 0.38802380809234654, - "grad_norm": 2.095802781152213, - "learning_rate": 2.799597050761674e-06, - "loss": 0.9877, - "step": 3227 - }, - { - "epoch": 0.38814405098298566, - "grad_norm": 1.9921336447224265, - "learning_rate": 2.7988829823479924e-06, - "loss": 0.9966, - "step": 3228 - }, - { - "epoch": 0.3882642938736247, - "grad_norm": 2.4484986824030543, - "learning_rate": 2.7981687927443976e-06, - "loss": 0.8418, - "step": 3229 - }, - { - "epoch": 0.3883845367642638, - "grad_norm": 1.5917303715018833, - "learning_rate": 2.797454482059231e-06, - "loss": 1.0547, - "step": 3230 - }, - { - "epoch": 0.3885047796549029, - "grad_norm": 1.70491668531174, - "learning_rate": 2.7967400504008537e-06, - "loss": 1.0426, - "step": 3231 - }, - { - "epoch": 0.388625022545542, - "grad_norm": 0.9739830435543885, - "learning_rate": 2.7960254978776456e-06, - "loss": 0.8186, - "step": 3232 - }, - { - "epoch": 0.3887452654361811, - "grad_norm": 2.1387813453004894, - "learning_rate": 2.7953108245980006e-06, - "loss": 1.004, - "step": 3233 - }, - { - "epoch": 0.38886550832682015, - "grad_norm": 1.86910184782138, - "learning_rate": 2.7945960306703365e-06, - "loss": 0.9422, - "step": 3234 - }, - { - "epoch": 0.38898575121745926, - "grad_norm": 1.6528834550484497, - "learning_rate": 2.7938811162030865e-06, - "loss": 0.8515, - "step": 3235 - }, - { - "epoch": 0.3891059941080984, - "grad_norm": 1.5643705234318752, - "learning_rate": 2.793166081304702e-06, - "loss": 1.024, - "step": 3236 - }, - { - "epoch": 0.38922623699873743, - "grad_norm": 1.7721876537986854, - "learning_rate": 2.7924509260836543e-06, - "loss": 1.0238, - "step": 3237 - }, - { - "epoch": 0.38934647988937654, - "grad_norm": 1.5401033086928075, - "learning_rate": 2.791735650648431e-06, - "loss": 0.8827, - "step": 3238 - }, - { - "epoch": 0.38946672278001565, - "grad_norm": 1.8229012479584157, - "learning_rate": 2.791020255107538e-06, - "loss": 0.9468, - "step": 3239 - }, - { - "epoch": 0.3895869656706547, - "grad_norm": 3.9127314392472465, - "learning_rate": 2.7903047395695023e-06, - "loss": 0.9981, - "step": 3240 - }, - { - "epoch": 0.3897072085612938, - "grad_norm": 1.9733339259859861, - "learning_rate": 2.789589104142865e-06, - "loss": 1.0988, - "step": 3241 - }, - { - "epoch": 0.3898274514519329, - "grad_norm": 1.5492563124819394, - "learning_rate": 2.7888733489361895e-06, - "loss": 0.97, - "step": 3242 - }, - { - "epoch": 0.389947694342572, - "grad_norm": 1.022168486560309, - "learning_rate": 2.788157474058054e-06, - "loss": 0.8446, - "step": 3243 - }, - { - "epoch": 0.3900679372332111, - "grad_norm": 1.571225636247067, - "learning_rate": 2.7874414796170555e-06, - "loss": 0.9003, - "step": 3244 - }, - { - "epoch": 0.3901881801238502, - "grad_norm": 2.115774210819631, - "learning_rate": 2.7867253657218113e-06, - "loss": 1.0393, - "step": 3245 - }, - { - "epoch": 0.39030842301448926, - "grad_norm": 3.938314686614159, - "learning_rate": 2.7860091324809544e-06, - "loss": 0.9283, - "step": 3246 - }, - { - "epoch": 0.39042866590512837, - "grad_norm": 1.7352471435593713, - "learning_rate": 2.7852927800031377e-06, - "loss": 1.0067, - "step": 3247 - }, - { - "epoch": 0.3905489087957674, - "grad_norm": 1.780333567664245, - "learning_rate": 2.7845763083970298e-06, - "loss": 1.0288, - "step": 3248 - }, - { - "epoch": 0.39066915168640653, - "grad_norm": 1.9036888051607939, - "learning_rate": 2.7838597177713205e-06, - "loss": 1.0279, - "step": 3249 - }, - { - "epoch": 0.39078939457704565, - "grad_norm": 2.202225846581546, - "learning_rate": 2.7831430082347143e-06, - "loss": 0.9366, - "step": 3250 - }, - { - "epoch": 0.3909096374676847, - "grad_norm": 2.0753259379844047, - "learning_rate": 2.7824261798959373e-06, - "loss": 1.023, - "step": 3251 - }, - { - "epoch": 0.3910298803583238, - "grad_norm": 2.045660226287243, - "learning_rate": 2.78170923286373e-06, - "loss": 0.9935, - "step": 3252 - }, - { - "epoch": 0.3911501232489629, - "grad_norm": 1.8044745910031001, - "learning_rate": 2.780992167246854e-06, - "loss": 1.0275, - "step": 3253 - }, - { - "epoch": 0.391270366139602, - "grad_norm": 1.1126006605920882, - "learning_rate": 2.7802749831540883e-06, - "loss": 0.9744, - "step": 3254 - }, - { - "epoch": 0.3913906090302411, - "grad_norm": 1.8569152975572811, - "learning_rate": 2.7795576806942268e-06, - "loss": 1.0174, - "step": 3255 - }, - { - "epoch": 0.3915108519208802, - "grad_norm": 1.023695410925374, - "learning_rate": 2.778840259976085e-06, - "loss": 0.7724, - "step": 3256 - }, - { - "epoch": 0.39163109481151925, - "grad_norm": 2.028562910534112, - "learning_rate": 2.778122721108495e-06, - "loss": 0.9755, - "step": 3257 - }, - { - "epoch": 0.39175133770215836, - "grad_norm": 1.7989178530569758, - "learning_rate": 2.7774050642003076e-06, - "loss": 1.0823, - "step": 3258 - }, - { - "epoch": 0.3918715805927975, - "grad_norm": 1.7270775626545922, - "learning_rate": 2.7766872893603896e-06, - "loss": 1.1435, - "step": 3259 - }, - { - "epoch": 0.39199182348343653, - "grad_norm": 1.519681476923879, - "learning_rate": 2.7759693966976275e-06, - "loss": 0.9274, - "step": 3260 - }, - { - "epoch": 0.39211206637407564, - "grad_norm": 2.0520865219272166, - "learning_rate": 2.7752513863209242e-06, - "loss": 1.0472, - "step": 3261 - }, - { - "epoch": 0.39223230926471475, - "grad_norm": 1.5551249593098952, - "learning_rate": 2.774533258339203e-06, - "loss": 1.0365, - "step": 3262 - }, - { - "epoch": 0.3923525521553538, - "grad_norm": 2.1681164390502716, - "learning_rate": 2.7738150128614014e-06, - "loss": 0.9921, - "step": 3263 - }, - { - "epoch": 0.3924727950459929, - "grad_norm": 1.8624230413730474, - "learning_rate": 2.7730966499964777e-06, - "loss": 1.0941, - "step": 3264 - }, - { - "epoch": 0.39259303793663197, - "grad_norm": 2.280278351674309, - "learning_rate": 2.772378169853408e-06, - "loss": 0.9972, - "step": 3265 - }, - { - "epoch": 0.3927132808272711, - "grad_norm": 1.9712247281096433, - "learning_rate": 2.771659572541183e-06, - "loss": 0.9411, - "step": 3266 - }, - { - "epoch": 0.3928335237179102, - "grad_norm": 2.533359637458499, - "learning_rate": 2.7709408581688143e-06, - "loss": 1.0695, - "step": 3267 - }, - { - "epoch": 0.39295376660854925, - "grad_norm": 1.4910535248398624, - "learning_rate": 2.7702220268453307e-06, - "loss": 1.0771, - "step": 3268 - }, - { - "epoch": 0.39307400949918836, - "grad_norm": 1.8534110454806083, - "learning_rate": 2.7695030786797785e-06, - "loss": 1.048, - "step": 3269 - }, - { - "epoch": 0.39319425238982747, - "grad_norm": 1.843739008466641, - "learning_rate": 2.7687840137812206e-06, - "loss": 0.9397, - "step": 3270 - }, - { - "epoch": 0.3933144952804665, - "grad_norm": 0.8921756295986136, - "learning_rate": 2.7680648322587395e-06, - "loss": 0.83, - "step": 3271 - }, - { - "epoch": 0.39343473817110564, - "grad_norm": 2.461595429164745, - "learning_rate": 2.7673455342214334e-06, - "loss": 1.0136, - "step": 3272 - }, - { - "epoch": 0.39355498106174475, - "grad_norm": 2.0331711120723646, - "learning_rate": 2.7666261197784198e-06, - "loss": 0.954, - "step": 3273 - }, - { - "epoch": 0.3936752239523838, - "grad_norm": 1.8597418158805321, - "learning_rate": 2.7659065890388336e-06, - "loss": 0.9713, - "step": 3274 - }, - { - "epoch": 0.3937954668430229, - "grad_norm": 1.8487503006362929, - "learning_rate": 2.7651869421118266e-06, - "loss": 1.0442, - "step": 3275 - }, - { - "epoch": 0.393915709733662, - "grad_norm": 1.7749684989175605, - "learning_rate": 2.76446717910657e-06, - "loss": 1.0324, - "step": 3276 - }, - { - "epoch": 0.3940359526243011, - "grad_norm": 2.0996174373028347, - "learning_rate": 2.763747300132249e-06, - "loss": 0.9648, - "step": 3277 - }, - { - "epoch": 0.3941561955149402, - "grad_norm": 1.6545675117135206, - "learning_rate": 2.7630273052980704e-06, - "loss": 1.0669, - "step": 3278 - }, - { - "epoch": 0.39427643840557924, - "grad_norm": 2.1707169992780244, - "learning_rate": 2.7623071947132554e-06, - "loss": 0.8809, - "step": 3279 - }, - { - "epoch": 0.39439668129621835, - "grad_norm": 1.8053260103036803, - "learning_rate": 2.7615869684870458e-06, - "loss": 0.9793, - "step": 3280 - }, - { - "epoch": 0.39451692418685746, - "grad_norm": 1.7153507256622562, - "learning_rate": 2.7608666267286986e-06, - "loss": 1.0413, - "step": 3281 - }, - { - "epoch": 0.3946371670774965, - "grad_norm": 2.544530995140262, - "learning_rate": 2.760146169547489e-06, - "loss": 1.0608, - "step": 3282 - }, - { - "epoch": 0.39475740996813563, - "grad_norm": 1.3719307282062463, - "learning_rate": 2.75942559705271e-06, - "loss": 0.9644, - "step": 3283 - }, - { - "epoch": 0.39487765285877474, - "grad_norm": 1.8241003353396519, - "learning_rate": 2.7587049093536713e-06, - "loss": 1.0829, - "step": 3284 - }, - { - "epoch": 0.3949978957494138, - "grad_norm": 1.643875607525018, - "learning_rate": 2.757984106559701e-06, - "loss": 1.0036, - "step": 3285 - }, - { - "epoch": 0.3951181386400529, - "grad_norm": 2.097014885001411, - "learning_rate": 2.7572631887801446e-06, - "loss": 0.923, - "step": 3286 - }, - { - "epoch": 0.395238381530692, - "grad_norm": 1.5870081101939237, - "learning_rate": 2.7565421561243654e-06, - "loss": 0.9534, - "step": 3287 - }, - { - "epoch": 0.3953586244213311, - "grad_norm": 2.206991711827813, - "learning_rate": 2.7558210087017413e-06, - "loss": 1.0216, - "step": 3288 - }, - { - "epoch": 0.3954788673119702, - "grad_norm": 2.193443507332897, - "learning_rate": 2.7550997466216724e-06, - "loss": 0.9422, - "step": 3289 - }, - { - "epoch": 0.3955991102026093, - "grad_norm": 1.6860065209204933, - "learning_rate": 2.7543783699935714e-06, - "loss": 1.0066, - "step": 3290 - }, - { - "epoch": 0.39571935309324835, - "grad_norm": 2.3372399006934557, - "learning_rate": 2.753656878926872e-06, - "loss": 1.0586, - "step": 3291 - }, - { - "epoch": 0.39583959598388746, - "grad_norm": 1.6563925771704002, - "learning_rate": 2.752935273531023e-06, - "loss": 0.9358, - "step": 3292 - }, - { - "epoch": 0.39595983887452657, - "grad_norm": 1.786999641759972, - "learning_rate": 2.752213553915492e-06, - "loss": 0.9894, - "step": 3293 - }, - { - "epoch": 0.3960800817651656, - "grad_norm": 0.8947013460047915, - "learning_rate": 2.751491720189762e-06, - "loss": 0.8781, - "step": 3294 - }, - { - "epoch": 0.39620032465580474, - "grad_norm": 2.133961015489998, - "learning_rate": 2.7507697724633364e-06, - "loss": 1.1096, - "step": 3295 - }, - { - "epoch": 0.3963205675464438, - "grad_norm": 0.8428515136198252, - "learning_rate": 2.7500477108457327e-06, - "loss": 0.7614, - "step": 3296 - }, - { - "epoch": 0.3964408104370829, - "grad_norm": 1.9110670668063399, - "learning_rate": 2.7493255354464877e-06, - "loss": 1.0115, - "step": 3297 - }, - { - "epoch": 0.396561053327722, - "grad_norm": 2.0627099132644164, - "learning_rate": 2.748603246375156e-06, - "loss": 0.9562, - "step": 3298 - }, - { - "epoch": 0.39668129621836107, - "grad_norm": 1.9556304121079722, - "learning_rate": 2.7478808437413055e-06, - "loss": 0.8941, - "step": 3299 - }, - { - "epoch": 0.3968015391090002, - "grad_norm": 1.963040140255578, - "learning_rate": 2.7471583276545263e-06, - "loss": 0.8632, - "step": 3300 - }, - { - "epoch": 0.3969217819996393, - "grad_norm": 2.4843181867521205, - "learning_rate": 2.7464356982244224e-06, - "loss": 0.9098, - "step": 3301 - }, - { - "epoch": 0.39704202489027834, - "grad_norm": 0.8928667933078754, - "learning_rate": 2.745712955560617e-06, - "loss": 0.8407, - "step": 3302 - }, - { - "epoch": 0.39716226778091746, - "grad_norm": 2.5456917477292467, - "learning_rate": 2.7449900997727496e-06, - "loss": 0.9707, - "step": 3303 - }, - { - "epoch": 0.39728251067155657, - "grad_norm": 1.5957445118275004, - "learning_rate": 2.744267130970476e-06, - "loss": 1.0358, - "step": 3304 - }, - { - "epoch": 0.3974027535621956, - "grad_norm": 1.7472631306484403, - "learning_rate": 2.7435440492634697e-06, - "loss": 0.9629, - "step": 3305 - }, - { - "epoch": 0.39752299645283473, - "grad_norm": 2.0896990859684705, - "learning_rate": 2.7428208547614228e-06, - "loss": 0.8642, - "step": 3306 - }, - { - "epoch": 0.39764323934347384, - "grad_norm": 4.22057997962127, - "learning_rate": 2.742097547574043e-06, - "loss": 0.9663, - "step": 3307 - }, - { - "epoch": 0.3977634822341129, - "grad_norm": 3.149308531419908, - "learning_rate": 2.7413741278110544e-06, - "loss": 0.969, - "step": 3308 - }, - { - "epoch": 0.397883725124752, - "grad_norm": 2.2121399753561817, - "learning_rate": 2.7406505955822016e-06, - "loss": 0.8932, - "step": 3309 - }, - { - "epoch": 0.39800396801539106, - "grad_norm": 2.122565164812551, - "learning_rate": 2.7399269509972415e-06, - "loss": 0.8672, - "step": 3310 - }, - { - "epoch": 0.3981242109060302, - "grad_norm": 2.2361490102943127, - "learning_rate": 2.7392031941659514e-06, - "loss": 1.0399, - "step": 3311 - }, - { - "epoch": 0.3982444537966693, - "grad_norm": 1.731739688296847, - "learning_rate": 2.7384793251981244e-06, - "loss": 1.0594, - "step": 3312 - }, - { - "epoch": 0.39836469668730834, - "grad_norm": 5.689680750359868, - "learning_rate": 2.737755344203571e-06, - "loss": 1.0036, - "step": 3313 - }, - { - "epoch": 0.39848493957794745, - "grad_norm": 1.5716079758128845, - "learning_rate": 2.7370312512921186e-06, - "loss": 0.9987, - "step": 3314 - }, - { - "epoch": 0.39860518246858656, - "grad_norm": 2.252609541046846, - "learning_rate": 2.736307046573611e-06, - "loss": 0.9645, - "step": 3315 - }, - { - "epoch": 0.3987254253592256, - "grad_norm": 1.5066818188155766, - "learning_rate": 2.73558273015791e-06, - "loss": 1.0217, - "step": 3316 - }, - { - "epoch": 0.3988456682498647, - "grad_norm": 2.032301175948876, - "learning_rate": 2.734858302154894e-06, - "loss": 0.9149, - "step": 3317 - }, - { - "epoch": 0.39896591114050384, - "grad_norm": 1.8864524763982866, - "learning_rate": 2.734133762674457e-06, - "loss": 0.9568, - "step": 3318 - }, - { - "epoch": 0.3990861540311429, - "grad_norm": 1.6510040407275748, - "learning_rate": 2.7334091118265124e-06, - "loss": 0.9048, - "step": 3319 - }, - { - "epoch": 0.399206396921782, - "grad_norm": 0.7147083258419703, - "learning_rate": 2.732684349720989e-06, - "loss": 0.7832, - "step": 3320 - }, - { - "epoch": 0.3993266398124211, - "grad_norm": 1.5796376145935247, - "learning_rate": 2.7319594764678318e-06, - "loss": 0.9505, - "step": 3321 - }, - { - "epoch": 0.39944688270306017, - "grad_norm": 1.5641864830364662, - "learning_rate": 2.7312344921770044e-06, - "loss": 1.0347, - "step": 3322 - }, - { - "epoch": 0.3995671255936993, - "grad_norm": 1.778597010495809, - "learning_rate": 2.7305093969584857e-06, - "loss": 0.9835, - "step": 3323 - }, - { - "epoch": 0.3996873684843384, - "grad_norm": 1.5685169335601283, - "learning_rate": 2.729784190922272e-06, - "loss": 0.9866, - "step": 3324 - }, - { - "epoch": 0.39980761137497745, - "grad_norm": 0.8834629869377327, - "learning_rate": 2.729058874178378e-06, - "loss": 0.7884, - "step": 3325 - }, - { - "epoch": 0.39992785426561656, - "grad_norm": 9.394713160920697, - "learning_rate": 2.7283334468368315e-06, - "loss": 0.8973, - "step": 3326 - }, - { - "epoch": 0.4000480971562556, - "grad_norm": 1.8392789909356309, - "learning_rate": 2.72760790900768e-06, - "loss": 0.9329, - "step": 3327 - }, - { - "epoch": 0.4001683400468947, - "grad_norm": 1.838779927521695, - "learning_rate": 2.7268822608009875e-06, - "loss": 0.9936, - "step": 3328 - }, - { - "epoch": 0.40028858293753383, - "grad_norm": 1.8077009399070603, - "learning_rate": 2.726156502326834e-06, - "loss": 0.9823, - "step": 3329 - }, - { - "epoch": 0.4004088258281729, - "grad_norm": 0.8552419283390371, - "learning_rate": 2.725430633695316e-06, - "loss": 0.8246, - "step": 3330 - }, - { - "epoch": 0.400529068718812, - "grad_norm": 0.9708480880227324, - "learning_rate": 2.7247046550165485e-06, - "loss": 0.8205, - "step": 3331 - }, - { - "epoch": 0.4006493116094511, - "grad_norm": 1.7428152552804, - "learning_rate": 2.7239785664006606e-06, - "loss": 0.9543, - "step": 3332 - }, - { - "epoch": 0.40076955450009016, - "grad_norm": 1.06118940416288, - "learning_rate": 2.7232523679578002e-06, - "loss": 0.8622, - "step": 3333 - }, - { - "epoch": 0.4008897973907293, - "grad_norm": 2.104010081995897, - "learning_rate": 2.7225260597981295e-06, - "loss": 0.9964, - "step": 3334 - }, - { - "epoch": 0.4010100402813684, - "grad_norm": 2.4627074643184015, - "learning_rate": 2.721799642031831e-06, - "loss": 0.9875, - "step": 3335 - }, - { - "epoch": 0.40113028317200744, - "grad_norm": 1.9647581411713122, - "learning_rate": 2.721073114769101e-06, - "loss": 0.9772, - "step": 3336 - }, - { - "epoch": 0.40125052606264655, - "grad_norm": 1.962751423469001, - "learning_rate": 2.7203464781201523e-06, - "loss": 0.9545, - "step": 3337 - }, - { - "epoch": 0.40137076895328566, - "grad_norm": 2.40135944329526, - "learning_rate": 2.719619732195215e-06, - "loss": 0.9777, - "step": 3338 - }, - { - "epoch": 0.4014910118439247, - "grad_norm": 2.57511772701432, - "learning_rate": 2.7188928771045377e-06, - "loss": 0.9276, - "step": 3339 - }, - { - "epoch": 0.4016112547345638, - "grad_norm": 1.6910914597279458, - "learning_rate": 2.7181659129583815e-06, - "loss": 0.9967, - "step": 3340 - }, - { - "epoch": 0.4017314976252029, - "grad_norm": 2.1300802650909616, - "learning_rate": 2.7174388398670276e-06, - "loss": 0.9689, - "step": 3341 - }, - { - "epoch": 0.401851740515842, - "grad_norm": 1.8966148638005966, - "learning_rate": 2.716711657940773e-06, - "loss": 1.1185, - "step": 3342 - }, - { - "epoch": 0.4019719834064811, - "grad_norm": 1.0720978485757615, - "learning_rate": 2.7159843672899284e-06, - "loss": 0.7978, - "step": 3343 - }, - { - "epoch": 0.40209222629712016, - "grad_norm": 1.9965462666268203, - "learning_rate": 2.715256968024825e-06, - "loss": 1.0113, - "step": 3344 - }, - { - "epoch": 0.40221246918775927, - "grad_norm": 1.4124175562471568, - "learning_rate": 2.7145294602558083e-06, - "loss": 1.0262, - "step": 3345 - }, - { - "epoch": 0.4023327120783984, - "grad_norm": 2.1409123360131224, - "learning_rate": 2.713801844093241e-06, - "loss": 0.9023, - "step": 3346 - }, - { - "epoch": 0.40245295496903744, - "grad_norm": 1.9068463752771145, - "learning_rate": 2.7130741196475014e-06, - "loss": 1.0784, - "step": 3347 - }, - { - "epoch": 0.40257319785967655, - "grad_norm": 1.8518493687648374, - "learning_rate": 2.7123462870289848e-06, - "loss": 1.0065, - "step": 3348 - }, - { - "epoch": 0.40269344075031566, - "grad_norm": 1.6169675963156436, - "learning_rate": 2.711618346348102e-06, - "loss": 1.0075, - "step": 3349 - }, - { - "epoch": 0.4028136836409547, - "grad_norm": 1.5662568513396153, - "learning_rate": 2.7108902977152825e-06, - "loss": 0.8264, - "step": 3350 - }, - { - "epoch": 0.4029339265315938, - "grad_norm": 2.3515860217365967, - "learning_rate": 2.7101621412409704e-06, - "loss": 0.954, - "step": 3351 - }, - { - "epoch": 0.40305416942223293, - "grad_norm": 1.7778055450469705, - "learning_rate": 2.7094338770356256e-06, - "loss": 1.0568, - "step": 3352 - }, - { - "epoch": 0.403174412312872, - "grad_norm": 1.8225943870854, - "learning_rate": 2.708705505209726e-06, - "loss": 0.8391, - "step": 3353 - }, - { - "epoch": 0.4032946552035111, - "grad_norm": 1.7938272918105127, - "learning_rate": 2.7079770258737646e-06, - "loss": 1.1106, - "step": 3354 - }, - { - "epoch": 0.4034148980941502, - "grad_norm": 2.0596564371217476, - "learning_rate": 2.707248439138251e-06, - "loss": 0.9505, - "step": 3355 - }, - { - "epoch": 0.40353514098478926, - "grad_norm": 1.7750862960254643, - "learning_rate": 2.7065197451137114e-06, - "loss": 0.854, - "step": 3356 - }, - { - "epoch": 0.4036553838754284, - "grad_norm": 2.41361684296585, - "learning_rate": 2.7057909439106894e-06, - "loss": 0.8766, - "step": 3357 - }, - { - "epoch": 0.40377562676606743, - "grad_norm": 1.6914375425620019, - "learning_rate": 2.7050620356397417e-06, - "loss": 0.9775, - "step": 3358 - }, - { - "epoch": 0.40389586965670654, - "grad_norm": 1.6632732973495443, - "learning_rate": 2.7043330204114437e-06, - "loss": 0.9264, - "step": 3359 - }, - { - "epoch": 0.40401611254734565, - "grad_norm": 1.8541072950251434, - "learning_rate": 2.7036038983363862e-06, - "loss": 1.0509, - "step": 3360 - }, - { - "epoch": 0.4041363554379847, - "grad_norm": 1.5801049832582883, - "learning_rate": 2.702874669525177e-06, - "loss": 1.0353, - "step": 3361 - }, - { - "epoch": 0.4042565983286238, - "grad_norm": 1.9224864948329135, - "learning_rate": 2.7021453340884394e-06, - "loss": 0.8952, - "step": 3362 - }, - { - "epoch": 0.40437684121926293, - "grad_norm": 2.4718059506037657, - "learning_rate": 2.7014158921368125e-06, - "loss": 0.9312, - "step": 3363 - }, - { - "epoch": 0.404497084109902, - "grad_norm": 1.7365441032452558, - "learning_rate": 2.700686343780953e-06, - "loss": 1.0512, - "step": 3364 - }, - { - "epoch": 0.4046173270005411, - "grad_norm": 3.771655560346617, - "learning_rate": 2.699956689131532e-06, - "loss": 1.083, - "step": 3365 - }, - { - "epoch": 0.4047375698911802, - "grad_norm": 2.0787982280410224, - "learning_rate": 2.699226928299238e-06, - "loss": 1.0475, - "step": 3366 - }, - { - "epoch": 0.40485781278181926, - "grad_norm": 2.093422935333814, - "learning_rate": 2.698497061394774e-06, - "loss": 0.9974, - "step": 3367 - }, - { - "epoch": 0.40497805567245837, - "grad_norm": 1.4362137886708193, - "learning_rate": 2.6977670885288627e-06, - "loss": 1.0044, - "step": 3368 - }, - { - "epoch": 0.4050982985630975, - "grad_norm": 1.6375747663595335, - "learning_rate": 2.6970370098122378e-06, - "loss": 0.952, - "step": 3369 - }, - { - "epoch": 0.40521854145373654, - "grad_norm": 1.4864047177364104, - "learning_rate": 2.6963068253556535e-06, - "loss": 1.0535, - "step": 3370 - }, - { - "epoch": 0.40533878434437565, - "grad_norm": 1.7355313269695538, - "learning_rate": 2.6955765352698763e-06, - "loss": 1.0529, - "step": 3371 - }, - { - "epoch": 0.40545902723501476, - "grad_norm": 2.048950657121494, - "learning_rate": 2.6948461396656923e-06, - "loss": 0.9296, - "step": 3372 - }, - { - "epoch": 0.4055792701256538, - "grad_norm": 2.122568983882293, - "learning_rate": 2.6941156386539013e-06, - "loss": 0.9381, - "step": 3373 - }, - { - "epoch": 0.4056995130162929, - "grad_norm": 2.8834646257559395, - "learning_rate": 2.6933850323453203e-06, - "loss": 1.0093, - "step": 3374 - }, - { - "epoch": 0.405819755906932, - "grad_norm": 1.7440486939264337, - "learning_rate": 2.6926543208507806e-06, - "loss": 0.9433, - "step": 3375 - }, - { - "epoch": 0.4059399987975711, - "grad_norm": 2.417152871079268, - "learning_rate": 2.6919235042811316e-06, - "loss": 0.9983, - "step": 3376 - }, - { - "epoch": 0.4060602416882102, - "grad_norm": 2.6716736834793453, - "learning_rate": 2.691192582747237e-06, - "loss": 0.9562, - "step": 3377 - }, - { - "epoch": 0.40618048457884925, - "grad_norm": 1.7494658608672087, - "learning_rate": 2.6904615563599765e-06, - "loss": 0.9305, - "step": 3378 - }, - { - "epoch": 0.40630072746948837, - "grad_norm": 1.687471884034077, - "learning_rate": 2.6897304252302477e-06, - "loss": 1.0279, - "step": 3379 - }, - { - "epoch": 0.4064209703601275, - "grad_norm": 0.8736946041463185, - "learning_rate": 2.688999189468962e-06, - "loss": 0.7652, - "step": 3380 - }, - { - "epoch": 0.40654121325076653, - "grad_norm": 3.5177902674525225, - "learning_rate": 2.6882678491870464e-06, - "loss": 0.9556, - "step": 3381 - }, - { - "epoch": 0.40666145614140564, - "grad_norm": 1.6991558194839307, - "learning_rate": 2.6875364044954453e-06, - "loss": 0.9053, - "step": 3382 - }, - { - "epoch": 0.40678169903204475, - "grad_norm": 1.6464830717577137, - "learning_rate": 2.6868048555051185e-06, - "loss": 1.0186, - "step": 3383 - }, - { - "epoch": 0.4069019419226838, - "grad_norm": 2.5376317145077105, - "learning_rate": 2.686073202327041e-06, - "loss": 1.0611, - "step": 3384 - }, - { - "epoch": 0.4070221848133229, - "grad_norm": 1.627793185638889, - "learning_rate": 2.6853414450722043e-06, - "loss": 0.9346, - "step": 3385 - }, - { - "epoch": 0.40714242770396203, - "grad_norm": 1.666099197420289, - "learning_rate": 2.684609583851616e-06, - "loss": 1.0426, - "step": 3386 - }, - { - "epoch": 0.4072626705946011, - "grad_norm": 2.1052123640859586, - "learning_rate": 2.683877618776297e-06, - "loss": 1.0031, - "step": 3387 - }, - { - "epoch": 0.4073829134852402, - "grad_norm": 2.3170920477748242, - "learning_rate": 2.6831455499572876e-06, - "loss": 0.9381, - "step": 3388 - }, - { - "epoch": 0.40750315637587925, - "grad_norm": 1.9218610437398178, - "learning_rate": 2.682413377505641e-06, - "loss": 0.9763, - "step": 3389 - }, - { - "epoch": 0.40762339926651836, - "grad_norm": 1.7360348019253802, - "learning_rate": 2.6816811015324284e-06, - "loss": 0.9639, - "step": 3390 - }, - { - "epoch": 0.40774364215715747, - "grad_norm": 0.8609517284955287, - "learning_rate": 2.6809487221487343e-06, - "loss": 0.8155, - "step": 3391 - }, - { - "epoch": 0.4078638850477965, - "grad_norm": 2.235792230755999, - "learning_rate": 2.6802162394656605e-06, - "loss": 1.0268, - "step": 3392 - }, - { - "epoch": 0.40798412793843564, - "grad_norm": 1.8735559624895324, - "learning_rate": 2.679483653594324e-06, - "loss": 0.9204, - "step": 3393 - }, - { - "epoch": 0.40810437082907475, - "grad_norm": 2.157246787726318, - "learning_rate": 2.678750964645857e-06, - "loss": 0.9603, - "step": 3394 - }, - { - "epoch": 0.4082246137197138, - "grad_norm": 2.6206866657435612, - "learning_rate": 2.6780181727314094e-06, - "loss": 1.0437, - "step": 3395 - }, - { - "epoch": 0.4083448566103529, - "grad_norm": 1.6583788883079973, - "learning_rate": 2.6772852779621435e-06, - "loss": 0.9771, - "step": 3396 - }, - { - "epoch": 0.408465099500992, - "grad_norm": 2.03459830516166, - "learning_rate": 2.676552280449239e-06, - "loss": 1.0465, - "step": 3397 - }, - { - "epoch": 0.4085853423916311, - "grad_norm": 2.2253671750344264, - "learning_rate": 2.6758191803038917e-06, - "loss": 0.9576, - "step": 3398 - }, - { - "epoch": 0.4087055852822702, - "grad_norm": 1.6399242539475116, - "learning_rate": 2.6750859776373125e-06, - "loss": 1.0316, - "step": 3399 - }, - { - "epoch": 0.4088258281729093, - "grad_norm": 0.8942919081950484, - "learning_rate": 2.674352672560727e-06, - "loss": 0.8126, - "step": 3400 - }, - { - "epoch": 0.40894607106354836, - "grad_norm": 1.8719897583697631, - "learning_rate": 2.673619265185377e-06, - "loss": 0.9691, - "step": 3401 - }, - { - "epoch": 0.40906631395418747, - "grad_norm": 2.028123415342397, - "learning_rate": 2.672885755622521e-06, - "loss": 0.972, - "step": 3402 - }, - { - "epoch": 0.4091865568448266, - "grad_norm": 2.2512102051330776, - "learning_rate": 2.67215214398343e-06, - "loss": 0.8977, - "step": 3403 - }, - { - "epoch": 0.40930679973546563, - "grad_norm": 2.572575833581033, - "learning_rate": 2.671418430379393e-06, - "loss": 0.9768, - "step": 3404 - }, - { - "epoch": 0.40942704262610474, - "grad_norm": 1.7875119615701451, - "learning_rate": 2.670684614921715e-06, - "loss": 1.0312, - "step": 3405 - }, - { - "epoch": 0.4095472855167438, - "grad_norm": 2.7328862088155943, - "learning_rate": 2.6699506977217128e-06, - "loss": 0.8974, - "step": 3406 - }, - { - "epoch": 0.4096675284073829, - "grad_norm": 1.9464332997975844, - "learning_rate": 2.6692166788907233e-06, - "loss": 0.8979, - "step": 3407 - }, - { - "epoch": 0.409787771298022, - "grad_norm": 1.7359102344861048, - "learning_rate": 2.6684825585400957e-06, - "loss": 0.9621, - "step": 3408 - }, - { - "epoch": 0.4099080141886611, - "grad_norm": 0.9891763486016093, - "learning_rate": 2.6677483367811947e-06, - "loss": 0.9148, - "step": 3409 - }, - { - "epoch": 0.4100282570793002, - "grad_norm": 1.557688667785797, - "learning_rate": 2.6670140137254028e-06, - "loss": 0.963, - "step": 3410 - }, - { - "epoch": 0.4101484999699393, - "grad_norm": 2.469579340060126, - "learning_rate": 2.666279589484115e-06, - "loss": 1.0774, - "step": 3411 - }, - { - "epoch": 0.41026874286057835, - "grad_norm": 1.7887580979095283, - "learning_rate": 2.6655450641687435e-06, - "loss": 1.0031, - "step": 3412 - }, - { - "epoch": 0.41038898575121746, - "grad_norm": 1.5909916812959781, - "learning_rate": 2.664810437890715e-06, - "loss": 0.8862, - "step": 3413 - }, - { - "epoch": 0.41050922864185657, - "grad_norm": 1.8718315055774162, - "learning_rate": 2.6640757107614714e-06, - "loss": 0.9952, - "step": 3414 - }, - { - "epoch": 0.4106294715324956, - "grad_norm": 2.071378613325244, - "learning_rate": 2.6633408828924697e-06, - "loss": 0.9005, - "step": 3415 - }, - { - "epoch": 0.41074971442313474, - "grad_norm": 1.5242874513254343, - "learning_rate": 2.662605954395185e-06, - "loss": 0.8976, - "step": 3416 - }, - { - "epoch": 0.41086995731377385, - "grad_norm": 1.6459327941328965, - "learning_rate": 2.6618709253811027e-06, - "loss": 1.0328, - "step": 3417 - }, - { - "epoch": 0.4109902002044129, - "grad_norm": 1.438077147349587, - "learning_rate": 2.6611357959617277e-06, - "loss": 1.076, - "step": 3418 - }, - { - "epoch": 0.411110443095052, - "grad_norm": 1.7380538963196281, - "learning_rate": 2.660400566248578e-06, - "loss": 1.1062, - "step": 3419 - }, - { - "epoch": 0.41123068598569107, - "grad_norm": 2.583770858466373, - "learning_rate": 2.6596652363531876e-06, - "loss": 0.8671, - "step": 3420 - }, - { - "epoch": 0.4113509288763302, - "grad_norm": 1.4586933055016675, - "learning_rate": 2.6589298063871055e-06, - "loss": 0.9787, - "step": 3421 - }, - { - "epoch": 0.4114711717669693, - "grad_norm": 1.7449204430575331, - "learning_rate": 2.658194276461895e-06, - "loss": 0.9012, - "step": 3422 - }, - { - "epoch": 0.41159141465760835, - "grad_norm": 1.7334082030974913, - "learning_rate": 2.6574586466891368e-06, - "loss": 0.8741, - "step": 3423 - }, - { - "epoch": 0.41171165754824746, - "grad_norm": 1.8856294065834271, - "learning_rate": 2.6567229171804247e-06, - "loss": 0.8443, - "step": 3424 - }, - { - "epoch": 0.41183190043888657, - "grad_norm": 2.1111155922602336, - "learning_rate": 2.655987088047368e-06, - "loss": 1.0747, - "step": 3425 - }, - { - "epoch": 0.4119521433295256, - "grad_norm": 2.0284177548286864, - "learning_rate": 2.6552511594015912e-06, - "loss": 0.982, - "step": 3426 - }, - { - "epoch": 0.41207238622016473, - "grad_norm": 1.8562249820962773, - "learning_rate": 2.654515131354735e-06, - "loss": 1.0545, - "step": 3427 - }, - { - "epoch": 0.41219262911080384, - "grad_norm": 2.1504456169171915, - "learning_rate": 2.653779004018453e-06, - "loss": 1.0489, - "step": 3428 - }, - { - "epoch": 0.4123128720014429, - "grad_norm": 1.8163104944728348, - "learning_rate": 2.653042777504417e-06, - "loss": 1.0139, - "step": 3429 - }, - { - "epoch": 0.412433114892082, - "grad_norm": 1.7294862597389238, - "learning_rate": 2.6523064519243105e-06, - "loss": 0.9932, - "step": 3430 - }, - { - "epoch": 0.4125533577827211, - "grad_norm": 2.340064648118305, - "learning_rate": 2.6515700273898333e-06, - "loss": 0.9846, - "step": 3431 - }, - { - "epoch": 0.4126736006733602, - "grad_norm": 1.8756744443330957, - "learning_rate": 2.6508335040127018e-06, - "loss": 0.8873, - "step": 3432 - }, - { - "epoch": 0.4127938435639993, - "grad_norm": 1.4237011930089465, - "learning_rate": 2.6500968819046446e-06, - "loss": 0.976, - "step": 3433 - }, - { - "epoch": 0.4129140864546384, - "grad_norm": 2.403077250826011, - "learning_rate": 2.649360161177408e-06, - "loss": 0.7959, - "step": 3434 - }, - { - "epoch": 0.41303432934527745, - "grad_norm": 1.8470272743718648, - "learning_rate": 2.6486233419427504e-06, - "loss": 0.9348, - "step": 3435 - }, - { - "epoch": 0.41315457223591656, - "grad_norm": 2.0380482690727204, - "learning_rate": 2.6478864243124484e-06, - "loss": 0.9582, - "step": 3436 - }, - { - "epoch": 0.4132748151265556, - "grad_norm": 1.8840718469911475, - "learning_rate": 2.6471494083982903e-06, - "loss": 1.0527, - "step": 3437 - }, - { - "epoch": 0.4133950580171947, - "grad_norm": 1.6961272782394714, - "learning_rate": 2.6464122943120818e-06, - "loss": 0.9494, - "step": 3438 - }, - { - "epoch": 0.41351530090783384, - "grad_norm": 2.499766529625661, - "learning_rate": 2.645675082165642e-06, - "loss": 1.028, - "step": 3439 - }, - { - "epoch": 0.4136355437984729, - "grad_norm": 2.0414400818361167, - "learning_rate": 2.644937772070806e-06, - "loss": 0.9542, - "step": 3440 - }, - { - "epoch": 0.413755786689112, - "grad_norm": 2.2229934731549377, - "learning_rate": 2.6442003641394225e-06, - "loss": 1.034, - "step": 3441 - }, - { - "epoch": 0.4138760295797511, - "grad_norm": 1.4121512463536352, - "learning_rate": 2.643462858483356e-06, - "loss": 1.033, - "step": 3442 - }, - { - "epoch": 0.41399627247039017, - "grad_norm": 1.6903827909755367, - "learning_rate": 2.6427252552144856e-06, - "loss": 0.9309, - "step": 3443 - }, - { - "epoch": 0.4141165153610293, - "grad_norm": 2.523614363480761, - "learning_rate": 2.6419875544447044e-06, - "loss": 0.9509, - "step": 3444 - }, - { - "epoch": 0.4142367582516684, - "grad_norm": 1.5502902774336875, - "learning_rate": 2.6412497562859218e-06, - "loss": 0.9157, - "step": 3445 - }, - { - "epoch": 0.41435700114230745, - "grad_norm": 2.005596674846606, - "learning_rate": 2.6405118608500617e-06, - "loss": 0.9564, - "step": 3446 - }, - { - "epoch": 0.41447724403294656, - "grad_norm": 1.5675616866389843, - "learning_rate": 2.6397738682490613e-06, - "loss": 0.9988, - "step": 3447 - }, - { - "epoch": 0.41459748692358567, - "grad_norm": 1.9075458687083244, - "learning_rate": 2.6390357785948734e-06, - "loss": 0.9541, - "step": 3448 - }, - { - "epoch": 0.4147177298142247, - "grad_norm": 1.7601232641577398, - "learning_rate": 2.6382975919994667e-06, - "loss": 1.0023, - "step": 3449 - }, - { - "epoch": 0.41483797270486383, - "grad_norm": 1.6384981447499785, - "learning_rate": 2.637559308574822e-06, - "loss": 0.9212, - "step": 3450 - }, - { - "epoch": 0.4149582155955029, - "grad_norm": 2.057199197614507, - "learning_rate": 2.6368209284329376e-06, - "loss": 0.9152, - "step": 3451 - }, - { - "epoch": 0.415078458486142, - "grad_norm": 1.942496471426372, - "learning_rate": 2.636082451685825e-06, - "loss": 0.9618, - "step": 3452 - }, - { - "epoch": 0.4151987013767811, - "grad_norm": 1.4768224416736622, - "learning_rate": 2.6353438784455094e-06, - "loss": 1.0597, - "step": 3453 - }, - { - "epoch": 0.41531894426742016, - "grad_norm": 1.913816350562833, - "learning_rate": 2.6346052088240326e-06, - "loss": 0.9095, - "step": 3454 - }, - { - "epoch": 0.4154391871580593, - "grad_norm": 1.8628571545503099, - "learning_rate": 2.63386644293345e-06, - "loss": 0.9711, - "step": 3455 - }, - { - "epoch": 0.4155594300486984, - "grad_norm": 2.3584627320428573, - "learning_rate": 2.633127580885833e-06, - "loss": 1.0329, - "step": 3456 - }, - { - "epoch": 0.41567967293933744, - "grad_norm": 2.2124869911975216, - "learning_rate": 2.632388622793265e-06, - "loss": 0.8467, - "step": 3457 - }, - { - "epoch": 0.41579991582997655, - "grad_norm": 1.7063442218182472, - "learning_rate": 2.6316495687678457e-06, - "loss": 0.8855, - "step": 3458 - }, - { - "epoch": 0.41592015872061566, - "grad_norm": 2.3217690773545008, - "learning_rate": 2.6309104189216887e-06, - "loss": 0.9594, - "step": 3459 - }, - { - "epoch": 0.4160404016112547, - "grad_norm": 1.9867014426296183, - "learning_rate": 2.630171173366923e-06, - "loss": 0.9463, - "step": 3460 - }, - { - "epoch": 0.41616064450189383, - "grad_norm": 2.3279213016159575, - "learning_rate": 2.629431832215691e-06, - "loss": 0.9419, - "step": 3461 - }, - { - "epoch": 0.41628088739253294, - "grad_norm": 1.688139123331454, - "learning_rate": 2.628692395580151e-06, - "loss": 1.074, - "step": 3462 - }, - { - "epoch": 0.416401130283172, - "grad_norm": 1.6717744600957591, - "learning_rate": 2.6279528635724747e-06, - "loss": 0.9918, - "step": 3463 - }, - { - "epoch": 0.4165213731738111, - "grad_norm": 3.17485770559968, - "learning_rate": 2.627213236304848e-06, - "loss": 0.9892, - "step": 3464 - }, - { - "epoch": 0.4166416160644502, - "grad_norm": 1.7672174075131448, - "learning_rate": 2.626473513889472e-06, - "loss": 0.9028, - "step": 3465 - }, - { - "epoch": 0.41676185895508927, - "grad_norm": 1.8000529175503273, - "learning_rate": 2.625733696438562e-06, - "loss": 1.0273, - "step": 3466 - }, - { - "epoch": 0.4168821018457284, - "grad_norm": 2.9143261010446273, - "learning_rate": 2.6249937840643476e-06, - "loss": 0.9553, - "step": 3467 - }, - { - "epoch": 0.41700234473636744, - "grad_norm": 1.8502501653664232, - "learning_rate": 2.6242537768790733e-06, - "loss": 0.874, - "step": 3468 - }, - { - "epoch": 0.41712258762700655, - "grad_norm": 1.7970761393877657, - "learning_rate": 2.6235136749949975e-06, - "loss": 0.8912, - "step": 3469 - }, - { - "epoch": 0.41724283051764566, - "grad_norm": 1.854517385623964, - "learning_rate": 2.6227734785243924e-06, - "loss": 0.815, - "step": 3470 - }, - { - "epoch": 0.4173630734082847, - "grad_norm": 1.6064839738176457, - "learning_rate": 2.6220331875795466e-06, - "loss": 0.9918, - "step": 3471 - }, - { - "epoch": 0.4174833162989238, - "grad_norm": 1.4555526471695386, - "learning_rate": 2.62129280227276e-06, - "loss": 0.9548, - "step": 3472 - }, - { - "epoch": 0.41760355918956293, - "grad_norm": 1.9793870368208497, - "learning_rate": 2.62055232271635e-06, - "loss": 0.8773, - "step": 3473 - }, - { - "epoch": 0.417723802080202, - "grad_norm": 2.0517887220274087, - "learning_rate": 2.619811749022646e-06, - "loss": 1.0808, - "step": 3474 - }, - { - "epoch": 0.4178440449708411, - "grad_norm": 2.552924711666025, - "learning_rate": 2.6190710813039917e-06, - "loss": 0.9166, - "step": 3475 - }, - { - "epoch": 0.4179642878614802, - "grad_norm": 2.6623816235513322, - "learning_rate": 2.618330319672747e-06, - "loss": 1.042, - "step": 3476 - }, - { - "epoch": 0.41808453075211927, - "grad_norm": 1.8821375733299055, - "learning_rate": 2.617589464241284e-06, - "loss": 1.12, - "step": 3477 - }, - { - "epoch": 0.4182047736427584, - "grad_norm": 1.8096213006763522, - "learning_rate": 2.6168485151219914e-06, - "loss": 0.9408, - "step": 3478 - }, - { - "epoch": 0.4183250165333975, - "grad_norm": 2.182427656235351, - "learning_rate": 2.616107472427269e-06, - "loss": 0.9048, - "step": 3479 - }, - { - "epoch": 0.41844525942403654, - "grad_norm": 2.1289333516766904, - "learning_rate": 2.615366336269533e-06, - "loss": 0.9727, - "step": 3480 - }, - { - "epoch": 0.41856550231467565, - "grad_norm": 2.3243927193294756, - "learning_rate": 2.6146251067612126e-06, - "loss": 0.992, - "step": 3481 - }, - { - "epoch": 0.41868574520531476, - "grad_norm": 1.6096901446176146, - "learning_rate": 2.6138837840147525e-06, - "loss": 1.0154, - "step": 3482 - }, - { - "epoch": 0.4188059880959538, - "grad_norm": 2.0022546457386072, - "learning_rate": 2.6131423681426103e-06, - "loss": 0.9774, - "step": 3483 - }, - { - "epoch": 0.41892623098659293, - "grad_norm": 1.43864817997283, - "learning_rate": 2.6124008592572587e-06, - "loss": 0.9255, - "step": 3484 - }, - { - "epoch": 0.419046473877232, - "grad_norm": 2.003241415702606, - "learning_rate": 2.6116592574711835e-06, - "loss": 1.009, - "step": 3485 - }, - { - "epoch": 0.4191667167678711, - "grad_norm": 1.7748742018702621, - "learning_rate": 2.6109175628968853e-06, - "loss": 1.0366, - "step": 3486 - }, - { - "epoch": 0.4192869596585102, - "grad_norm": 1.8294468200994587, - "learning_rate": 2.610175775646878e-06, - "loss": 1.0281, - "step": 3487 - }, - { - "epoch": 0.41940720254914926, - "grad_norm": 1.9306030572525392, - "learning_rate": 2.6094338958336907e-06, - "loss": 0.9482, - "step": 3488 - }, - { - "epoch": 0.41952744543978837, - "grad_norm": 1.9077507115261394, - "learning_rate": 2.608691923569867e-06, - "loss": 1.0194, - "step": 3489 - }, - { - "epoch": 0.4196476883304275, - "grad_norm": 1.7234867369050444, - "learning_rate": 2.6079498589679616e-06, - "loss": 0.9597, - "step": 3490 - }, - { - "epoch": 0.41976793122106654, - "grad_norm": 1.8129966976053635, - "learning_rate": 2.6072077021405465e-06, - "loss": 0.9616, - "step": 3491 - }, - { - "epoch": 0.41988817411170565, - "grad_norm": 1.6061057801570335, - "learning_rate": 2.6064654532002054e-06, - "loss": 0.8907, - "step": 3492 - }, - { - "epoch": 0.42000841700234476, - "grad_norm": 1.407997190361579, - "learning_rate": 2.6057231122595375e-06, - "loss": 0.9526, - "step": 3493 - }, - { - "epoch": 0.4201286598929838, - "grad_norm": 1.53135836470831, - "learning_rate": 2.604980679431154e-06, - "loss": 0.9298, - "step": 3494 - }, - { - "epoch": 0.4202489027836229, - "grad_norm": 1.9085314496521988, - "learning_rate": 2.604238154827684e-06, - "loss": 0.9455, - "step": 3495 - }, - { - "epoch": 0.42036914567426203, - "grad_norm": 1.7838130380014794, - "learning_rate": 2.6034955385617656e-06, - "loss": 0.932, - "step": 3496 - }, - { - "epoch": 0.4204893885649011, - "grad_norm": 0.869608480994341, - "learning_rate": 2.6027528307460544e-06, - "loss": 0.8592, - "step": 3497 - }, - { - "epoch": 0.4206096314555402, - "grad_norm": 2.134798571444929, - "learning_rate": 2.602010031493217e-06, - "loss": 1.056, - "step": 3498 - }, - { - "epoch": 0.42072987434617926, - "grad_norm": 1.7975213256759335, - "learning_rate": 2.6012671409159367e-06, - "loss": 1.0692, - "step": 3499 - }, - { - "epoch": 0.42085011723681837, - "grad_norm": 1.7599675510406272, - "learning_rate": 2.6005241591269097e-06, - "loss": 1.0223, - "step": 3500 - }, - { - "epoch": 0.4209703601274575, - "grad_norm": 1.6384062523329541, - "learning_rate": 2.5997810862388454e-06, - "loss": 1.004, - "step": 3501 - }, - { - "epoch": 0.42109060301809653, - "grad_norm": 2.1467499364770934, - "learning_rate": 2.599037922364467e-06, - "loss": 0.9656, - "step": 3502 - }, - { - "epoch": 0.42121084590873564, - "grad_norm": 2.2603037464632125, - "learning_rate": 2.5982946676165112e-06, - "loss": 0.9505, - "step": 3503 - }, - { - "epoch": 0.42133108879937475, - "grad_norm": 0.8358975695048416, - "learning_rate": 2.5975513221077313e-06, - "loss": 0.7987, - "step": 3504 - }, - { - "epoch": 0.4214513316900138, - "grad_norm": 2.1536583183200375, - "learning_rate": 2.5968078859508897e-06, - "loss": 1.0788, - "step": 3505 - }, - { - "epoch": 0.4215715745806529, - "grad_norm": 1.8465481877220176, - "learning_rate": 2.5960643592587673e-06, - "loss": 0.9963, - "step": 3506 - }, - { - "epoch": 0.42169181747129203, - "grad_norm": 1.8553341465362363, - "learning_rate": 2.5953207421441553e-06, - "loss": 1.0217, - "step": 3507 - }, - { - "epoch": 0.4218120603619311, - "grad_norm": 2.3296628647543676, - "learning_rate": 2.5945770347198603e-06, - "loss": 0.9456, - "step": 3508 - }, - { - "epoch": 0.4219323032525702, - "grad_norm": 1.6629884504237171, - "learning_rate": 2.593833237098701e-06, - "loss": 1.0279, - "step": 3509 - }, - { - "epoch": 0.4220525461432093, - "grad_norm": 1.7268171856176773, - "learning_rate": 2.593089349393512e-06, - "loss": 0.8299, - "step": 3510 - }, - { - "epoch": 0.42217278903384836, - "grad_norm": 1.9035269878008383, - "learning_rate": 2.592345371717141e-06, - "loss": 1.0423, - "step": 3511 - }, - { - "epoch": 0.42229303192448747, - "grad_norm": 1.9273649130263548, - "learning_rate": 2.591601304182448e-06, - "loss": 0.9091, - "step": 3512 - }, - { - "epoch": 0.4224132748151266, - "grad_norm": 1.5000602392180653, - "learning_rate": 2.5908571469023067e-06, - "loss": 0.9879, - "step": 3513 - }, - { - "epoch": 0.42253351770576564, - "grad_norm": 2.1164813113217695, - "learning_rate": 2.5901128999896067e-06, - "loss": 0.9554, - "step": 3514 - }, - { - "epoch": 0.42265376059640475, - "grad_norm": 1.700545711442454, - "learning_rate": 2.5893685635572487e-06, - "loss": 0.8828, - "step": 3515 - }, - { - "epoch": 0.4227740034870438, - "grad_norm": 1.7975450012982999, - "learning_rate": 2.5886241377181483e-06, - "loss": 0.8938, - "step": 3516 - }, - { - "epoch": 0.4228942463776829, - "grad_norm": 1.723194548566549, - "learning_rate": 2.587879622585234e-06, - "loss": 1.0107, - "step": 3517 - }, - { - "epoch": 0.423014489268322, - "grad_norm": 2.1114559505419717, - "learning_rate": 2.5871350182714486e-06, - "loss": 0.964, - "step": 3518 - }, - { - "epoch": 0.4231347321589611, - "grad_norm": 1.809919098263149, - "learning_rate": 2.586390324889748e-06, - "loss": 0.9996, - "step": 3519 - }, - { - "epoch": 0.4232549750496002, - "grad_norm": 1.7968125042205951, - "learning_rate": 2.5856455425531003e-06, - "loss": 0.8664, - "step": 3520 - }, - { - "epoch": 0.4233752179402393, - "grad_norm": 1.6927651811492794, - "learning_rate": 2.5849006713744902e-06, - "loss": 1.0095, - "step": 3521 - }, - { - "epoch": 0.42349546083087836, - "grad_norm": 3.392098383565344, - "learning_rate": 2.5841557114669135e-06, - "loss": 0.9336, - "step": 3522 - }, - { - "epoch": 0.42361570372151747, - "grad_norm": 2.5724053022714077, - "learning_rate": 2.58341066294338e-06, - "loss": 0.8686, - "step": 3523 - }, - { - "epoch": 0.4237359466121566, - "grad_norm": 1.99518828935192, - "learning_rate": 2.5826655259169124e-06, - "loss": 1.059, - "step": 3524 - }, - { - "epoch": 0.42385618950279563, - "grad_norm": 1.7187810721623151, - "learning_rate": 2.5819203005005475e-06, - "loss": 1.1034, - "step": 3525 - }, - { - "epoch": 0.42397643239343474, - "grad_norm": 1.8202783147524333, - "learning_rate": 2.581174986807336e-06, - "loss": 0.9856, - "step": 3526 - }, - { - "epoch": 0.42409667528407385, - "grad_norm": 2.0269186937749417, - "learning_rate": 2.580429584950341e-06, - "loss": 1.1134, - "step": 3527 - }, - { - "epoch": 0.4242169181747129, - "grad_norm": 1.8222149252099318, - "learning_rate": 2.5796840950426397e-06, - "loss": 0.8618, - "step": 3528 - }, - { - "epoch": 0.424337161065352, - "grad_norm": 1.7208138992037312, - "learning_rate": 2.578938517197322e-06, - "loss": 0.8546, - "step": 3529 - }, - { - "epoch": 0.4244574039559911, - "grad_norm": 2.211246967561582, - "learning_rate": 2.5781928515274916e-06, - "loss": 0.8243, - "step": 3530 - }, - { - "epoch": 0.4245776468466302, - "grad_norm": 1.8476495399917798, - "learning_rate": 2.577447098146265e-06, - "loss": 0.8828, - "step": 3531 - }, - { - "epoch": 0.4246978897372693, - "grad_norm": 1.551894396177919, - "learning_rate": 2.5767012571667724e-06, - "loss": 0.9885, - "step": 3532 - }, - { - "epoch": 0.42481813262790835, - "grad_norm": 1.8616780355454126, - "learning_rate": 2.5759553287021587e-06, - "loss": 0.8832, - "step": 3533 - }, - { - "epoch": 0.42493837551854746, - "grad_norm": 1.9969970092751044, - "learning_rate": 2.5752093128655786e-06, - "loss": 0.9704, - "step": 3534 - }, - { - "epoch": 0.4250586184091866, - "grad_norm": 1.5810717274888573, - "learning_rate": 2.574463209770204e-06, - "loss": 0.9327, - "step": 3535 - }, - { - "epoch": 0.42517886129982563, - "grad_norm": 1.6960203740902478, - "learning_rate": 2.5737170195292165e-06, - "loss": 0.9944, - "step": 3536 - }, - { - "epoch": 0.42529910419046474, - "grad_norm": 1.9020629299555545, - "learning_rate": 2.572970742255814e-06, - "loss": 0.9736, - "step": 3537 - }, - { - "epoch": 0.42541934708110385, - "grad_norm": 1.5049756496145104, - "learning_rate": 2.5722243780632046e-06, - "loss": 1.0157, - "step": 3538 - }, - { - "epoch": 0.4255395899717429, - "grad_norm": 0.9133472741819594, - "learning_rate": 2.5714779270646125e-06, - "loss": 0.853, - "step": 3539 - }, - { - "epoch": 0.425659832862382, - "grad_norm": 2.5841808979618888, - "learning_rate": 2.5707313893732735e-06, - "loss": 0.9642, - "step": 3540 - }, - { - "epoch": 0.4257800757530211, - "grad_norm": 1.505317956789028, - "learning_rate": 2.5699847651024364e-06, - "loss": 0.9669, - "step": 3541 - }, - { - "epoch": 0.4259003186436602, - "grad_norm": 2.036127894553413, - "learning_rate": 2.5692380543653627e-06, - "loss": 0.9726, - "step": 3542 - }, - { - "epoch": 0.4260205615342993, - "grad_norm": 1.8576817857140864, - "learning_rate": 2.5684912572753293e-06, - "loss": 0.9003, - "step": 3543 - }, - { - "epoch": 0.4261408044249384, - "grad_norm": 1.7849890298800137, - "learning_rate": 2.5677443739456245e-06, - "loss": 1.043, - "step": 3544 - }, - { - "epoch": 0.42626104731557746, - "grad_norm": 2.3744194927500217, - "learning_rate": 2.5669974044895495e-06, - "loss": 0.9963, - "step": 3545 - }, - { - "epoch": 0.42638129020621657, - "grad_norm": 1.6958680539087676, - "learning_rate": 2.5662503490204187e-06, - "loss": 0.989, - "step": 3546 - }, - { - "epoch": 0.4265015330968556, - "grad_norm": 1.7391828277719246, - "learning_rate": 2.5655032076515603e-06, - "loss": 0.9611, - "step": 3547 - }, - { - "epoch": 0.42662177598749473, - "grad_norm": 2.2428839794736684, - "learning_rate": 2.5647559804963155e-06, - "loss": 1.0144, - "step": 3548 - }, - { - "epoch": 0.42674201887813384, - "grad_norm": 2.141729251031255, - "learning_rate": 2.5640086676680364e-06, - "loss": 0.9898, - "step": 3549 - }, - { - "epoch": 0.4268622617687729, - "grad_norm": 2.7521940495419703, - "learning_rate": 2.5632612692800923e-06, - "loss": 1.0068, - "step": 3550 - }, - { - "epoch": 0.426982504659412, - "grad_norm": 1.8301160354514554, - "learning_rate": 2.5625137854458603e-06, - "loss": 0.9442, - "step": 3551 - }, - { - "epoch": 0.4271027475500511, - "grad_norm": 1.8783836986329925, - "learning_rate": 2.561766216278735e-06, - "loss": 0.9988, - "step": 3552 - }, - { - "epoch": 0.4272229904406902, - "grad_norm": 2.008532443273843, - "learning_rate": 2.561018561892121e-06, - "loss": 1.0048, - "step": 3553 - }, - { - "epoch": 0.4273432333313293, - "grad_norm": 1.4408124822398893, - "learning_rate": 2.5602708223994363e-06, - "loss": 0.9571, - "step": 3554 - }, - { - "epoch": 0.4274634762219684, - "grad_norm": 2.340982761722079, - "learning_rate": 2.559522997914115e-06, - "loss": 0.8783, - "step": 3555 - }, - { - "epoch": 0.42758371911260745, - "grad_norm": 1.8261299658877603, - "learning_rate": 2.558775088549599e-06, - "loss": 1.0495, - "step": 3556 - }, - { - "epoch": 0.42770396200324656, - "grad_norm": 2.6857828376450636, - "learning_rate": 2.5580270944193467e-06, - "loss": 0.8665, - "step": 3557 - }, - { - "epoch": 0.4278242048938857, - "grad_norm": 0.8156341844531227, - "learning_rate": 2.557279015636827e-06, - "loss": 0.7703, - "step": 3558 - }, - { - "epoch": 0.42794444778452473, - "grad_norm": 0.8411163624107257, - "learning_rate": 2.5565308523155245e-06, - "loss": 0.8541, - "step": 3559 - }, - { - "epoch": 0.42806469067516384, - "grad_norm": 2.2531217535368535, - "learning_rate": 2.5557826045689336e-06, - "loss": 1.0279, - "step": 3560 - }, - { - "epoch": 0.4281849335658029, - "grad_norm": 0.9346320790574639, - "learning_rate": 2.5550342725105643e-06, - "loss": 0.8052, - "step": 3561 - }, - { - "epoch": 0.428305176456442, - "grad_norm": 1.605976627702345, - "learning_rate": 2.554285856253937e-06, - "loss": 1.012, - "step": 3562 - }, - { - "epoch": 0.4284254193470811, - "grad_norm": 1.819623890570289, - "learning_rate": 2.5535373559125855e-06, - "loss": 0.9704, - "step": 3563 - }, - { - "epoch": 0.42854566223772017, - "grad_norm": 1.74169134764394, - "learning_rate": 2.552788771600057e-06, - "loss": 1.0182, - "step": 3564 - }, - { - "epoch": 0.4286659051283593, - "grad_norm": 5.879612512082436, - "learning_rate": 2.5520401034299118e-06, - "loss": 1.0159, - "step": 3565 - }, - { - "epoch": 0.4287861480189984, - "grad_norm": 2.0098841329172896, - "learning_rate": 2.551291351515722e-06, - "loss": 1.0715, - "step": 3566 - }, - { - "epoch": 0.42890639090963745, - "grad_norm": 1.5226527613780425, - "learning_rate": 2.5505425159710726e-06, - "loss": 1.0555, - "step": 3567 - }, - { - "epoch": 0.42902663380027656, - "grad_norm": 1.653347567194508, - "learning_rate": 2.549793596909561e-06, - "loss": 1.0362, - "step": 3568 - }, - { - "epoch": 0.42914687669091567, - "grad_norm": 2.536307192665673, - "learning_rate": 2.5490445944447976e-06, - "loss": 0.8665, - "step": 3569 - }, - { - "epoch": 0.4292671195815547, - "grad_norm": 1.8127184769069287, - "learning_rate": 2.548295508690406e-06, - "loss": 0.8604, - "step": 3570 - }, - { - "epoch": 0.42938736247219383, - "grad_norm": 1.7330096945252094, - "learning_rate": 2.5475463397600217e-06, - "loss": 0.9648, - "step": 3571 - }, - { - "epoch": 0.42950760536283294, - "grad_norm": 1.8845003405839142, - "learning_rate": 2.546797087767293e-06, - "loss": 0.9722, - "step": 3572 - }, - { - "epoch": 0.429627848253472, - "grad_norm": 1.8247491690302833, - "learning_rate": 2.546047752825881e-06, - "loss": 1.0731, - "step": 3573 - }, - { - "epoch": 0.4297480911441111, - "grad_norm": 1.9336795055754696, - "learning_rate": 2.5452983350494595e-06, - "loss": 1.138, - "step": 3574 - }, - { - "epoch": 0.4298683340347502, - "grad_norm": 4.25658736495369, - "learning_rate": 2.544548834551713e-06, - "loss": 0.86, - "step": 3575 - }, - { - "epoch": 0.4299885769253893, - "grad_norm": 2.0482541162967935, - "learning_rate": 2.5437992514463424e-06, - "loss": 1.1399, - "step": 3576 - }, - { - "epoch": 0.4301088198160284, - "grad_norm": 1.604247210903992, - "learning_rate": 2.5430495858470565e-06, - "loss": 1.0754, - "step": 3577 - }, - { - "epoch": 0.43022906270666744, - "grad_norm": 1.9939231342969617, - "learning_rate": 2.54229983786758e-06, - "loss": 0.9699, - "step": 3578 - }, - { - "epoch": 0.43034930559730655, - "grad_norm": 1.7786533770720065, - "learning_rate": 2.541550007621651e-06, - "loss": 1.0435, - "step": 3579 - }, - { - "epoch": 0.43046954848794566, - "grad_norm": 1.6327518287593545, - "learning_rate": 2.5408000952230156e-06, - "loss": 0.9961, - "step": 3580 - }, - { - "epoch": 0.4305897913785847, - "grad_norm": 1.8177980402659428, - "learning_rate": 2.5400501007854357e-06, - "loss": 1.0945, - "step": 3581 - }, - { - "epoch": 0.43071003426922383, - "grad_norm": 3.4002649764982125, - "learning_rate": 2.539300024422685e-06, - "loss": 0.9623, - "step": 3582 - }, - { - "epoch": 0.43083027715986294, - "grad_norm": 0.944857716521011, - "learning_rate": 2.538549866248549e-06, - "loss": 0.8349, - "step": 3583 - }, - { - "epoch": 0.430950520050502, - "grad_norm": 1.9053387574496166, - "learning_rate": 2.5377996263768274e-06, - "loss": 1.0178, - "step": 3584 - }, - { - "epoch": 0.4310707629411411, - "grad_norm": 1.63722064714831, - "learning_rate": 2.5370493049213293e-06, - "loss": 0.8776, - "step": 3585 - }, - { - "epoch": 0.4311910058317802, - "grad_norm": 1.7855903882232849, - "learning_rate": 2.536298901995878e-06, - "loss": 1.0008, - "step": 3586 - }, - { - "epoch": 0.43131124872241927, - "grad_norm": 1.653573807076424, - "learning_rate": 2.535548417714311e-06, - "loss": 0.9997, - "step": 3587 - }, - { - "epoch": 0.4314314916130584, - "grad_norm": 1.5019289171031496, - "learning_rate": 2.534797852190474e-06, - "loss": 1.063, - "step": 3588 - }, - { - "epoch": 0.4315517345036975, - "grad_norm": 1.8905145912404362, - "learning_rate": 2.5340472055382283e-06, - "loss": 1.0083, - "step": 3589 - }, - { - "epoch": 0.43167197739433655, - "grad_norm": 1.8501473342375279, - "learning_rate": 2.5332964778714468e-06, - "loss": 1.0074, - "step": 3590 - }, - { - "epoch": 0.43179222028497566, - "grad_norm": 1.7191798279567125, - "learning_rate": 2.5325456693040123e-06, - "loss": 0.8702, - "step": 3591 - }, - { - "epoch": 0.43191246317561477, - "grad_norm": 1.899200913891978, - "learning_rate": 2.531794779949824e-06, - "loss": 0.9585, - "step": 3592 - }, - { - "epoch": 0.4320327060662538, - "grad_norm": 1.7912295347909415, - "learning_rate": 2.5310438099227903e-06, - "loss": 1.0795, - "step": 3593 - }, - { - "epoch": 0.43215294895689293, - "grad_norm": 1.4766417789873507, - "learning_rate": 2.530292759336833e-06, - "loss": 0.7569, - "step": 3594 - }, - { - "epoch": 0.432273191847532, - "grad_norm": 2.7486995743397613, - "learning_rate": 2.5295416283058855e-06, - "loss": 0.9121, - "step": 3595 - }, - { - "epoch": 0.4323934347381711, - "grad_norm": 1.4703922424421012, - "learning_rate": 2.5287904169438943e-06, - "loss": 0.8576, - "step": 3596 - }, - { - "epoch": 0.4325136776288102, - "grad_norm": 2.577180446644245, - "learning_rate": 2.528039125364817e-06, - "loss": 0.8532, - "step": 3597 - }, - { - "epoch": 0.43263392051944927, - "grad_norm": 2.0177972015467325, - "learning_rate": 2.5272877536826246e-06, - "loss": 0.9517, - "step": 3598 - }, - { - "epoch": 0.4327541634100884, - "grad_norm": 2.0743359893597106, - "learning_rate": 2.5265363020112986e-06, - "loss": 0.8899, - "step": 3599 - }, - { - "epoch": 0.4328744063007275, - "grad_norm": 1.7376176231693654, - "learning_rate": 2.5257847704648344e-06, - "loss": 1.0412, - "step": 3600 - }, - { - "epoch": 0.43299464919136654, - "grad_norm": 1.765603462020356, - "learning_rate": 2.525033159157239e-06, - "loss": 0.9621, - "step": 3601 - }, - { - "epoch": 0.43311489208200565, - "grad_norm": 1.870315166780615, - "learning_rate": 2.52428146820253e-06, - "loss": 0.9767, - "step": 3602 - }, - { - "epoch": 0.43323513497264476, - "grad_norm": 1.4938545701203017, - "learning_rate": 2.52352969771474e-06, - "loss": 1.0169, - "step": 3603 - }, - { - "epoch": 0.4333553778632838, - "grad_norm": 2.038378253586317, - "learning_rate": 2.5227778478079106e-06, - "loss": 1.0795, - "step": 3604 - }, - { - "epoch": 0.43347562075392293, - "grad_norm": 1.5520184479875478, - "learning_rate": 2.522025918596098e-06, - "loss": 0.9677, - "step": 3605 - }, - { - "epoch": 0.43359586364456204, - "grad_norm": 1.2908527311899702, - "learning_rate": 2.521273910193368e-06, - "loss": 0.8522, - "step": 3606 - }, - { - "epoch": 0.4337161065352011, - "grad_norm": 2.3832276029631823, - "learning_rate": 2.5205218227138006e-06, - "loss": 1.0742, - "step": 3607 - }, - { - "epoch": 0.4338363494258402, - "grad_norm": 4.271388369834454, - "learning_rate": 2.519769656271486e-06, - "loss": 0.9895, - "step": 3608 - }, - { - "epoch": 0.43395659231647926, - "grad_norm": 2.0922874922500085, - "learning_rate": 2.5190174109805285e-06, - "loss": 0.8865, - "step": 3609 - }, - { - "epoch": 0.43407683520711837, - "grad_norm": 2.707634840660551, - "learning_rate": 2.518265086955042e-06, - "loss": 0.8406, - "step": 3610 - }, - { - "epoch": 0.4341970780977575, - "grad_norm": 1.8915633718153633, - "learning_rate": 2.5175126843091534e-06, - "loss": 1.0379, - "step": 3611 - }, - { - "epoch": 0.43431732098839654, - "grad_norm": 2.056563416686446, - "learning_rate": 2.5167602031570034e-06, - "loss": 0.9442, - "step": 3612 - }, - { - "epoch": 0.43443756387903565, - "grad_norm": 1.884585610129059, - "learning_rate": 2.51600764361274e-06, - "loss": 0.9363, - "step": 3613 - }, - { - "epoch": 0.43455780676967476, - "grad_norm": 2.2898202104246823, - "learning_rate": 2.5152550057905283e-06, - "loss": 0.9824, - "step": 3614 - }, - { - "epoch": 0.4346780496603138, - "grad_norm": 2.0790253029917016, - "learning_rate": 2.5145022898045415e-06, - "loss": 0.9573, - "step": 3615 - }, - { - "epoch": 0.4347982925509529, - "grad_norm": 2.259213023097674, - "learning_rate": 2.5137494957689664e-06, - "loss": 1.1023, - "step": 3616 - }, - { - "epoch": 0.43491853544159204, - "grad_norm": 0.8670662803812216, - "learning_rate": 2.5129966237980016e-06, - "loss": 0.7947, - "step": 3617 - }, - { - "epoch": 0.4350387783322311, - "grad_norm": 2.9387767533188307, - "learning_rate": 2.512243674005857e-06, - "loss": 0.9801, - "step": 3618 - }, - { - "epoch": 0.4351590212228702, - "grad_norm": 1.7990234136452, - "learning_rate": 2.5114906465067537e-06, - "loss": 1.0585, - "step": 3619 - }, - { - "epoch": 0.4352792641135093, - "grad_norm": 2.025353309044805, - "learning_rate": 2.5107375414149264e-06, - "loss": 0.9581, - "step": 3620 - }, - { - "epoch": 0.43539950700414837, - "grad_norm": 2.248590239638138, - "learning_rate": 2.5099843588446197e-06, - "loss": 0.9145, - "step": 3621 - }, - { - "epoch": 0.4355197498947875, - "grad_norm": 1.6617742574914705, - "learning_rate": 2.509231098910091e-06, - "loss": 0.8196, - "step": 3622 - }, - { - "epoch": 0.4356399927854266, - "grad_norm": 2.0142603549887594, - "learning_rate": 2.508477761725611e-06, - "loss": 0.9545, - "step": 3623 - }, - { - "epoch": 0.43576023567606564, - "grad_norm": 1.8569836666187964, - "learning_rate": 2.507724347405458e-06, - "loss": 1.002, - "step": 3624 - }, - { - "epoch": 0.43588047856670475, - "grad_norm": 1.718301471793448, - "learning_rate": 2.5069708560639243e-06, - "loss": 1.0181, - "step": 3625 - }, - { - "epoch": 0.4360007214573438, - "grad_norm": 1.8210172148618256, - "learning_rate": 2.5062172878153158e-06, - "loss": 0.8161, - "step": 3626 - }, - { - "epoch": 0.4361209643479829, - "grad_norm": 1.971321001533963, - "learning_rate": 2.505463642773947e-06, - "loss": 1.0731, - "step": 3627 - }, - { - "epoch": 0.43624120723862203, - "grad_norm": 2.1516412325812815, - "learning_rate": 2.504709921054146e-06, - "loss": 0.9547, - "step": 3628 - }, - { - "epoch": 0.4363614501292611, - "grad_norm": 2.4701190484272857, - "learning_rate": 2.50395612277025e-06, - "loss": 1.0381, - "step": 3629 - }, - { - "epoch": 0.4364816930199002, - "grad_norm": 2.566944826419595, - "learning_rate": 2.503202248036612e-06, - "loss": 0.9313, - "step": 3630 - }, - { - "epoch": 0.4366019359105393, - "grad_norm": 1.5647226165798678, - "learning_rate": 2.5024482969675927e-06, - "loss": 0.9353, - "step": 3631 - }, - { - "epoch": 0.43672217880117836, - "grad_norm": 1.865739843773063, - "learning_rate": 2.501694269677566e-06, - "loss": 1.0454, - "step": 3632 - }, - { - "epoch": 0.4368424216918175, - "grad_norm": 2.2052619256048613, - "learning_rate": 2.500940166280918e-06, - "loss": 1.0147, - "step": 3633 - }, - { - "epoch": 0.4369626645824566, - "grad_norm": 2.1252043008767623, - "learning_rate": 2.500185986892045e-06, - "loss": 0.9869, - "step": 3634 - }, - { - "epoch": 0.43708290747309564, - "grad_norm": 2.3616958218293465, - "learning_rate": 2.499431731625355e-06, - "loss": 0.9736, - "step": 3635 - }, - { - "epoch": 0.43720315036373475, - "grad_norm": 1.640574499897071, - "learning_rate": 2.4986774005952686e-06, - "loss": 0.9964, - "step": 3636 - }, - { - "epoch": 0.43732339325437386, - "grad_norm": 1.859924748036698, - "learning_rate": 2.4979229939162166e-06, - "loss": 1.0439, - "step": 3637 - }, - { - "epoch": 0.4374436361450129, - "grad_norm": 1.516580496774979, - "learning_rate": 2.4971685117026433e-06, - "loss": 1.0007, - "step": 3638 - }, - { - "epoch": 0.437563879035652, - "grad_norm": 1.3623893027877398, - "learning_rate": 2.4964139540690018e-06, - "loss": 0.9709, - "step": 3639 - }, - { - "epoch": 0.4376841219262911, - "grad_norm": 1.6844894257075154, - "learning_rate": 2.495659321129758e-06, - "loss": 0.9247, - "step": 3640 - }, - { - "epoch": 0.4378043648169302, - "grad_norm": 1.6389541110119372, - "learning_rate": 2.494904612999389e-06, - "loss": 0.9612, - "step": 3641 - }, - { - "epoch": 0.4379246077075693, - "grad_norm": 0.8599643073881246, - "learning_rate": 2.4941498297923843e-06, - "loss": 0.802, - "step": 3642 - }, - { - "epoch": 0.43804485059820836, - "grad_norm": 1.6966795441689029, - "learning_rate": 2.4933949716232424e-06, - "loss": 0.9008, - "step": 3643 - }, - { - "epoch": 0.43816509348884747, - "grad_norm": 2.0366838985573636, - "learning_rate": 2.492640038606476e-06, - "loss": 0.938, - "step": 3644 - }, - { - "epoch": 0.4382853363794866, - "grad_norm": 1.8677597126770125, - "learning_rate": 2.491885030856608e-06, - "loss": 0.9895, - "step": 3645 - }, - { - "epoch": 0.43840557927012563, - "grad_norm": 1.7903289976340346, - "learning_rate": 2.4911299484881713e-06, - "loss": 1.0243, - "step": 3646 - }, - { - "epoch": 0.43852582216076474, - "grad_norm": 1.4358424706296535, - "learning_rate": 2.490374791615712e-06, - "loss": 1.0099, - "step": 3647 - }, - { - "epoch": 0.43864606505140386, - "grad_norm": 3.168932221419416, - "learning_rate": 2.4896195603537867e-06, - "loss": 0.9852, - "step": 3648 - }, - { - "epoch": 0.4387663079420429, - "grad_norm": 2.00440434443015, - "learning_rate": 2.488864254816964e-06, - "loss": 0.941, - "step": 3649 - }, - { - "epoch": 0.438886550832682, - "grad_norm": 2.1619803537469515, - "learning_rate": 2.4881088751198218e-06, - "loss": 0.8816, - "step": 3650 - }, - { - "epoch": 0.43900679372332113, - "grad_norm": 2.2131136371931674, - "learning_rate": 2.4873534213769517e-06, - "loss": 0.852, - "step": 3651 - }, - { - "epoch": 0.4391270366139602, - "grad_norm": 1.5370338353519892, - "learning_rate": 2.4865978937029547e-06, - "loss": 0.9151, - "step": 3652 - }, - { - "epoch": 0.4392472795045993, - "grad_norm": 1.532909195053889, - "learning_rate": 2.485842292212445e-06, - "loss": 0.8592, - "step": 3653 - }, - { - "epoch": 0.4393675223952384, - "grad_norm": 1.7038676191495705, - "learning_rate": 2.485086617020045e-06, - "loss": 0.9995, - "step": 3654 - }, - { - "epoch": 0.43948776528587746, - "grad_norm": 1.977184032077199, - "learning_rate": 2.4843308682403903e-06, - "loss": 1.0159, - "step": 3655 - }, - { - "epoch": 0.4396080081765166, - "grad_norm": 1.5078921914659498, - "learning_rate": 2.4835750459881294e-06, - "loss": 1.0283, - "step": 3656 - }, - { - "epoch": 0.43972825106715563, - "grad_norm": 3.1083910741548038, - "learning_rate": 2.4828191503779177e-06, - "loss": 1.0092, - "step": 3657 - }, - { - "epoch": 0.43984849395779474, - "grad_norm": 1.9355263656006922, - "learning_rate": 2.482063181524425e-06, - "loss": 1.0911, - "step": 3658 - }, - { - "epoch": 0.43996873684843385, - "grad_norm": 1.926236360020155, - "learning_rate": 2.4813071395423307e-06, - "loss": 1.0158, - "step": 3659 - }, - { - "epoch": 0.4400889797390729, - "grad_norm": 2.026724483748623, - "learning_rate": 2.4805510245463263e-06, - "loss": 0.8462, - "step": 3660 - }, - { - "epoch": 0.440209222629712, - "grad_norm": 2.1445912837396297, - "learning_rate": 2.4797948366511137e-06, - "loss": 0.7931, - "step": 3661 - }, - { - "epoch": 0.4403294655203511, - "grad_norm": 1.9644602710398957, - "learning_rate": 2.4790385759714055e-06, - "loss": 0.9689, - "step": 3662 - }, - { - "epoch": 0.4404497084109902, - "grad_norm": 1.6146628165420969, - "learning_rate": 2.478282242621926e-06, - "loss": 0.9135, - "step": 3663 - }, - { - "epoch": 0.4405699513016293, - "grad_norm": 1.0523859979861592, - "learning_rate": 2.477525836717411e-06, - "loss": 0.8312, - "step": 3664 - }, - { - "epoch": 0.4406901941922684, - "grad_norm": 2.035529689976153, - "learning_rate": 2.476769358372606e-06, - "loss": 0.9917, - "step": 3665 - }, - { - "epoch": 0.44081043708290746, - "grad_norm": 2.1544468292311443, - "learning_rate": 2.4760128077022683e-06, - "loss": 0.9466, - "step": 3666 - }, - { - "epoch": 0.44093067997354657, - "grad_norm": 1.4269424517441478, - "learning_rate": 2.4752561848211672e-06, - "loss": 0.8796, - "step": 3667 - }, - { - "epoch": 0.4410509228641857, - "grad_norm": 2.067667412212819, - "learning_rate": 2.4744994898440797e-06, - "loss": 0.9117, - "step": 3668 - }, - { - "epoch": 0.44117116575482473, - "grad_norm": 2.0578061647542123, - "learning_rate": 2.473742722885797e-06, - "loss": 1.0316, - "step": 3669 - }, - { - "epoch": 0.44129140864546385, - "grad_norm": 1.899581689964276, - "learning_rate": 2.4729858840611197e-06, - "loss": 0.8577, - "step": 3670 - }, - { - "epoch": 0.4414116515361029, - "grad_norm": 1.8328820959277854, - "learning_rate": 2.4722289734848605e-06, - "loss": 0.9256, - "step": 3671 - }, - { - "epoch": 0.441531894426742, - "grad_norm": 1.8969477078676358, - "learning_rate": 2.471471991271841e-06, - "loss": 0.9834, - "step": 3672 - }, - { - "epoch": 0.4416521373173811, - "grad_norm": 1.6905323616637806, - "learning_rate": 2.470714937536896e-06, - "loss": 0.9991, - "step": 3673 - }, - { - "epoch": 0.4417723802080202, - "grad_norm": 1.9567998874982426, - "learning_rate": 2.469957812394868e-06, - "loss": 0.9156, - "step": 3674 - }, - { - "epoch": 0.4418926230986593, - "grad_norm": 6.871227824782727, - "learning_rate": 2.4692006159606148e-06, - "loss": 0.9682, - "step": 3675 - }, - { - "epoch": 0.4420128659892984, - "grad_norm": 1.8354949417771962, - "learning_rate": 2.468443348349e-06, - "loss": 0.986, - "step": 3676 - }, - { - "epoch": 0.44213310887993745, - "grad_norm": 2.4792335137937225, - "learning_rate": 2.467686009674902e-06, - "loss": 1.0276, - "step": 3677 - }, - { - "epoch": 0.44225335177057656, - "grad_norm": 2.043121157152746, - "learning_rate": 2.466928600053209e-06, - "loss": 1.0483, - "step": 3678 - }, - { - "epoch": 0.4423735946612157, - "grad_norm": 2.14317515147956, - "learning_rate": 2.466171119598818e-06, - "loss": 0.921, - "step": 3679 - }, - { - "epoch": 0.44249383755185473, - "grad_norm": 1.8159651682197817, - "learning_rate": 2.465413568426639e-06, - "loss": 0.9703, - "step": 3680 - }, - { - "epoch": 0.44261408044249384, - "grad_norm": 1.5731511057103527, - "learning_rate": 2.464655946651591e-06, - "loss": 1.0143, - "step": 3681 - }, - { - "epoch": 0.44273432333313295, - "grad_norm": 1.7080769075057323, - "learning_rate": 2.4638982543886065e-06, - "loss": 1.0005, - "step": 3682 - }, - { - "epoch": 0.442854566223772, - "grad_norm": 2.3238966261808645, - "learning_rate": 2.4631404917526254e-06, - "loss": 1.0737, - "step": 3683 - }, - { - "epoch": 0.4429748091144111, - "grad_norm": 1.4630980809280296, - "learning_rate": 2.4623826588586e-06, - "loss": 0.9891, - "step": 3684 - }, - { - "epoch": 0.4430950520050502, - "grad_norm": 1.487932940397392, - "learning_rate": 2.461624755821492e-06, - "loss": 1.032, - "step": 3685 - }, - { - "epoch": 0.4432152948956893, - "grad_norm": 1.6064523621378373, - "learning_rate": 2.4608667827562763e-06, - "loss": 0.968, - "step": 3686 - }, - { - "epoch": 0.4433355377863284, - "grad_norm": 1.7834662319587409, - "learning_rate": 2.460108739777936e-06, - "loss": 1.0895, - "step": 3687 - }, - { - "epoch": 0.44345578067696745, - "grad_norm": 1.483431064229328, - "learning_rate": 2.4593506270014656e-06, - "loss": 0.9619, - "step": 3688 - }, - { - "epoch": 0.44357602356760656, - "grad_norm": 3.9183378644749642, - "learning_rate": 2.45859244454187e-06, - "loss": 1.0155, - "step": 3689 - }, - { - "epoch": 0.44369626645824567, - "grad_norm": 1.5648398617845167, - "learning_rate": 2.4578341925141655e-06, - "loss": 0.8602, - "step": 3690 - }, - { - "epoch": 0.4438165093488847, - "grad_norm": 1.870877693460299, - "learning_rate": 2.457075871033378e-06, - "loss": 0.92, - "step": 3691 - }, - { - "epoch": 0.44393675223952384, - "grad_norm": 1.8802171603037008, - "learning_rate": 2.4563174802145445e-06, - "loss": 1.0817, - "step": 3692 - }, - { - "epoch": 0.44405699513016295, - "grad_norm": 0.6745173371068447, - "learning_rate": 2.455559020172712e-06, - "loss": 0.6945, - "step": 3693 - }, - { - "epoch": 0.444177238020802, - "grad_norm": 3.422216250330461, - "learning_rate": 2.4548004910229385e-06, - "loss": 1.1007, - "step": 3694 - }, - { - "epoch": 0.4442974809114411, - "grad_norm": 1.6204156767349311, - "learning_rate": 2.4540418928802913e-06, - "loss": 1.0663, - "step": 3695 - }, - { - "epoch": 0.4444177238020802, - "grad_norm": 2.008698264347735, - "learning_rate": 2.4532832258598506e-06, - "loss": 0.8678, - "step": 3696 - }, - { - "epoch": 0.4445379666927193, - "grad_norm": 1.6784819779143014, - "learning_rate": 2.4525244900767047e-06, - "loss": 1.0036, - "step": 3697 - }, - { - "epoch": 0.4446582095833584, - "grad_norm": 0.9291299382179521, - "learning_rate": 2.4517656856459536e-06, - "loss": 0.8276, - "step": 3698 - }, - { - "epoch": 0.4447784524739975, - "grad_norm": 1.7022486540662576, - "learning_rate": 2.4510068126827073e-06, - "loss": 0.8756, - "step": 3699 - }, - { - "epoch": 0.44489869536463655, - "grad_norm": 2.581583373776504, - "learning_rate": 2.450247871302086e-06, - "loss": 1.0232, - "step": 3700 - }, - { - "epoch": 0.44501893825527566, - "grad_norm": 2.1249556817313886, - "learning_rate": 2.44948886161922e-06, - "loss": 1.0292, - "step": 3701 - }, - { - "epoch": 0.4451391811459148, - "grad_norm": 2.8718667579175627, - "learning_rate": 2.4487297837492524e-06, - "loss": 1.0476, - "step": 3702 - }, - { - "epoch": 0.44525942403655383, - "grad_norm": 1.682810023455626, - "learning_rate": 2.4479706378073323e-06, - "loss": 0.8153, - "step": 3703 - }, - { - "epoch": 0.44537966692719294, - "grad_norm": 1.5145016469357724, - "learning_rate": 2.447211423908623e-06, - "loss": 1.0388, - "step": 3704 - }, - { - "epoch": 0.445499909817832, - "grad_norm": 2.4192312269524114, - "learning_rate": 2.4464521421682966e-06, - "loss": 0.9448, - "step": 3705 - }, - { - "epoch": 0.4456201527084711, - "grad_norm": 1.2826033167279995, - "learning_rate": 2.4456927927015345e-06, - "loss": 1.0728, - "step": 3706 - }, - { - "epoch": 0.4457403955991102, - "grad_norm": 1.8712670359476273, - "learning_rate": 2.4449333756235307e-06, - "loss": 0.958, - "step": 3707 - }, - { - "epoch": 0.4458606384897493, - "grad_norm": 2.1481718141828208, - "learning_rate": 2.4441738910494876e-06, - "loss": 0.9932, - "step": 3708 - }, - { - "epoch": 0.4459808813803884, - "grad_norm": 2.3027547717303305, - "learning_rate": 2.4434143390946176e-06, - "loss": 1.0243, - "step": 3709 - }, - { - "epoch": 0.4461011242710275, - "grad_norm": 1.9377421566161335, - "learning_rate": 2.4426547198741457e-06, - "loss": 1.0485, - "step": 3710 - }, - { - "epoch": 0.44622136716166655, - "grad_norm": 1.9688472421033678, - "learning_rate": 2.441895033503305e-06, - "loss": 0.9504, - "step": 3711 - }, - { - "epoch": 0.44634161005230566, - "grad_norm": 1.894628148697886, - "learning_rate": 2.4411352800973375e-06, - "loss": 1.0286, - "step": 3712 - }, - { - "epoch": 0.44646185294294477, - "grad_norm": 2.6995758394574096, - "learning_rate": 2.4403754597715005e-06, - "loss": 0.9549, - "step": 3713 - }, - { - "epoch": 0.4465820958335838, - "grad_norm": 1.926170387182394, - "learning_rate": 2.4396155726410553e-06, - "loss": 1.1187, - "step": 3714 - }, - { - "epoch": 0.44670233872422294, - "grad_norm": 2.4318851455245474, - "learning_rate": 2.438855618821278e-06, - "loss": 1.1103, - "step": 3715 - }, - { - "epoch": 0.44682258161486205, - "grad_norm": 1.6883508868244823, - "learning_rate": 2.4380955984274517e-06, - "loss": 0.8792, - "step": 3716 - }, - { - "epoch": 0.4469428245055011, - "grad_norm": 1.7294872247246695, - "learning_rate": 2.4373355115748716e-06, - "loss": 0.9739, - "step": 3717 - }, - { - "epoch": 0.4470630673961402, - "grad_norm": 1.6375621725611813, - "learning_rate": 2.436575358378842e-06, - "loss": 0.9212, - "step": 3718 - }, - { - "epoch": 0.44718331028677927, - "grad_norm": 2.5736666884794293, - "learning_rate": 2.4358151389546782e-06, - "loss": 1.0341, - "step": 3719 - }, - { - "epoch": 0.4473035531774184, - "grad_norm": 2.313489032291478, - "learning_rate": 2.4350548534177035e-06, - "loss": 0.9615, - "step": 3720 - }, - { - "epoch": 0.4474237960680575, - "grad_norm": 1.5055338189752248, - "learning_rate": 2.434294501883254e-06, - "loss": 0.8694, - "step": 3721 - }, - { - "epoch": 0.44754403895869654, - "grad_norm": 1.9272167125286042, - "learning_rate": 2.433534084466674e-06, - "loss": 0.8591, - "step": 3722 - }, - { - "epoch": 0.44766428184933565, - "grad_norm": 1.3957816536669103, - "learning_rate": 2.4327736012833178e-06, - "loss": 0.9112, - "step": 3723 - }, - { - "epoch": 0.44778452473997477, - "grad_norm": 1.9045120195974448, - "learning_rate": 2.4320130524485506e-06, - "loss": 0.9669, - "step": 3724 - }, - { - "epoch": 0.4479047676306138, - "grad_norm": 1.3107399720065374, - "learning_rate": 2.431252438077746e-06, - "loss": 0.9955, - "step": 3725 - }, - { - "epoch": 0.44802501052125293, - "grad_norm": 2.093929340026585, - "learning_rate": 2.4304917582862906e-06, - "loss": 0.9661, - "step": 3726 - }, - { - "epoch": 0.44814525341189204, - "grad_norm": 2.5167645071181273, - "learning_rate": 2.4297310131895774e-06, - "loss": 1.0859, - "step": 3727 - }, - { - "epoch": 0.4482654963025311, - "grad_norm": 2.2090135162616944, - "learning_rate": 2.4289702029030113e-06, - "loss": 0.9641, - "step": 3728 - }, - { - "epoch": 0.4483857391931702, - "grad_norm": 1.5676173524870025, - "learning_rate": 2.4282093275420057e-06, - "loss": 1.0198, - "step": 3729 - }, - { - "epoch": 0.4485059820838093, - "grad_norm": 2.1470277912347173, - "learning_rate": 2.4274483872219863e-06, - "loss": 0.8974, - "step": 3730 - }, - { - "epoch": 0.4486262249744484, - "grad_norm": 1.6257379763488702, - "learning_rate": 2.426687382058386e-06, - "loss": 1.126, - "step": 3731 - }, - { - "epoch": 0.4487464678650875, - "grad_norm": 1.0201709830268577, - "learning_rate": 2.425926312166649e-06, - "loss": 0.8141, - "step": 3732 - }, - { - "epoch": 0.4488667107557266, - "grad_norm": 4.458449222567736, - "learning_rate": 2.42516517766223e-06, - "loss": 0.9187, - "step": 3733 - }, - { - "epoch": 0.44898695364636565, - "grad_norm": 2.0298333730102773, - "learning_rate": 2.4244039786605907e-06, - "loss": 0.8775, - "step": 3734 - }, - { - "epoch": 0.44910719653700476, - "grad_norm": 2.3254420034639884, - "learning_rate": 2.4236427152772055e-06, - "loss": 1.0237, - "step": 3735 - }, - { - "epoch": 0.4492274394276438, - "grad_norm": 1.0020676932168506, - "learning_rate": 2.422881387627557e-06, - "loss": 0.8046, - "step": 3736 - }, - { - "epoch": 0.4493476823182829, - "grad_norm": 1.4600750830691767, - "learning_rate": 2.422119995827139e-06, - "loss": 0.9661, - "step": 3737 - }, - { - "epoch": 0.44946792520892204, - "grad_norm": 2.268754573428586, - "learning_rate": 2.4213585399914528e-06, - "loss": 0.9364, - "step": 3738 - }, - { - "epoch": 0.4495881680995611, - "grad_norm": 1.5791765241223001, - "learning_rate": 2.4205970202360113e-06, - "loss": 1.0394, - "step": 3739 - }, - { - "epoch": 0.4497084109902002, - "grad_norm": 1.9625624045393883, - "learning_rate": 2.4198354366763354e-06, - "loss": 0.99, - "step": 3740 - }, - { - "epoch": 0.4498286538808393, - "grad_norm": 2.138319124019486, - "learning_rate": 2.4190737894279587e-06, - "loss": 0.9815, - "step": 3741 - }, - { - "epoch": 0.44994889677147837, - "grad_norm": 2.1409226927539624, - "learning_rate": 2.4183120786064203e-06, - "loss": 1.0046, - "step": 3742 - }, - { - "epoch": 0.4500691396621175, - "grad_norm": 2.555148208559789, - "learning_rate": 2.417550304327273e-06, - "loss": 1.0498, - "step": 3743 - }, - { - "epoch": 0.4501893825527566, - "grad_norm": 1.4766786721360803, - "learning_rate": 2.4167884667060763e-06, - "loss": 0.9565, - "step": 3744 - }, - { - "epoch": 0.45030962544339564, - "grad_norm": 1.8653308783527076, - "learning_rate": 2.4160265658584e-06, - "loss": 1.0698, - "step": 3745 - }, - { - "epoch": 0.45042986833403476, - "grad_norm": 1.8622012417192306, - "learning_rate": 2.4152646018998253e-06, - "loss": 0.8767, - "step": 3746 - }, - { - "epoch": 0.45055011122467387, - "grad_norm": 1.7816094153007709, - "learning_rate": 2.4145025749459403e-06, - "loss": 0.9118, - "step": 3747 - }, - { - "epoch": 0.4506703541153129, - "grad_norm": 1.8393322016837075, - "learning_rate": 2.413740485112344e-06, - "loss": 0.8989, - "step": 3748 - }, - { - "epoch": 0.45079059700595203, - "grad_norm": 1.478084849161096, - "learning_rate": 2.412978332514646e-06, - "loss": 1.017, - "step": 3749 - }, - { - "epoch": 0.4509108398965911, - "grad_norm": 1.873403632563391, - "learning_rate": 2.4122161172684623e-06, - "loss": 0.9237, - "step": 3750 - }, - { - "epoch": 0.4510310827872302, - "grad_norm": 2.219971871068956, - "learning_rate": 2.4114538394894216e-06, - "loss": 1.0296, - "step": 3751 - }, - { - "epoch": 0.4511513256778693, - "grad_norm": 1.7245716337089918, - "learning_rate": 2.410691499293161e-06, - "loss": 1.032, - "step": 3752 - }, - { - "epoch": 0.45127156856850836, - "grad_norm": 1.478997377549349, - "learning_rate": 2.409929096795326e-06, - "loss": 0.9437, - "step": 3753 - }, - { - "epoch": 0.4513918114591475, - "grad_norm": 2.12868964816976, - "learning_rate": 2.409166632111573e-06, - "loss": 0.9869, - "step": 3754 - }, - { - "epoch": 0.4515120543497866, - "grad_norm": 2.625462082655344, - "learning_rate": 2.4084041053575674e-06, - "loss": 0.9942, - "step": 3755 - }, - { - "epoch": 0.45163229724042564, - "grad_norm": 1.6907627211795404, - "learning_rate": 2.4076415166489834e-06, - "loss": 0.9201, - "step": 3756 - }, - { - "epoch": 0.45175254013106475, - "grad_norm": 1.474471291651616, - "learning_rate": 2.406878866101506e-06, - "loss": 0.9975, - "step": 3757 - }, - { - "epoch": 0.45187278302170386, - "grad_norm": 2.008704080300121, - "learning_rate": 2.4061161538308273e-06, - "loss": 0.9811, - "step": 3758 - }, - { - "epoch": 0.4519930259123429, - "grad_norm": 2.422999164185988, - "learning_rate": 2.4053533799526523e-06, - "loss": 1.0875, - "step": 3759 - }, - { - "epoch": 0.452113268802982, - "grad_norm": 1.6996824837533635, - "learning_rate": 2.404590544582691e-06, - "loss": 1.0628, - "step": 3760 - }, - { - "epoch": 0.45223351169362114, - "grad_norm": 1.8761698570137362, - "learning_rate": 2.403827647836666e-06, - "loss": 1.0001, - "step": 3761 - }, - { - "epoch": 0.4523537545842602, - "grad_norm": 1.6277137251538052, - "learning_rate": 2.4030646898303075e-06, - "loss": 0.8995, - "step": 3762 - }, - { - "epoch": 0.4524739974748993, - "grad_norm": 2.148961339724409, - "learning_rate": 2.4023016706793566e-06, - "loss": 1.028, - "step": 3763 - }, - { - "epoch": 0.4525942403655384, - "grad_norm": 0.8774761857499197, - "learning_rate": 2.401538590499561e-06, - "loss": 0.7997, - "step": 3764 - }, - { - "epoch": 0.45271448325617747, - "grad_norm": 1.7428232581107703, - "learning_rate": 2.400775449406682e-06, - "loss": 0.9077, - "step": 3765 - }, - { - "epoch": 0.4528347261468166, - "grad_norm": 1.773533150534354, - "learning_rate": 2.400012247516485e-06, - "loss": 0.9238, - "step": 3766 - }, - { - "epoch": 0.45295496903745563, - "grad_norm": 1.7288480105030661, - "learning_rate": 2.3992489849447484e-06, - "loss": 1.0916, - "step": 3767 - }, - { - "epoch": 0.45307521192809475, - "grad_norm": 1.4872034530601612, - "learning_rate": 2.3984856618072584e-06, - "loss": 0.9882, - "step": 3768 - }, - { - "epoch": 0.45319545481873386, - "grad_norm": 3.534059605240975, - "learning_rate": 2.3977222782198098e-06, - "loss": 0.9324, - "step": 3769 - }, - { - "epoch": 0.4533156977093729, - "grad_norm": 1.681301317530802, - "learning_rate": 2.3969588342982077e-06, - "loss": 0.9511, - "step": 3770 - }, - { - "epoch": 0.453435940600012, - "grad_norm": 1.8437951939103192, - "learning_rate": 2.396195330158267e-06, - "loss": 0.9245, - "step": 3771 - }, - { - "epoch": 0.45355618349065113, - "grad_norm": 2.2275943975001753, - "learning_rate": 2.3954317659158094e-06, - "loss": 0.9877, - "step": 3772 - }, - { - "epoch": 0.4536764263812902, - "grad_norm": 1.0391992464645743, - "learning_rate": 2.394668141686667e-06, - "loss": 0.8187, - "step": 3773 - }, - { - "epoch": 0.4537966692719293, - "grad_norm": 1.7561227772745005, - "learning_rate": 2.3939044575866813e-06, - "loss": 0.8952, - "step": 3774 - }, - { - "epoch": 0.4539169121625684, - "grad_norm": 1.8663225924048872, - "learning_rate": 2.3931407137317024e-06, - "loss": 0.9569, - "step": 3775 - }, - { - "epoch": 0.45403715505320746, - "grad_norm": 1.6014215174704727, - "learning_rate": 2.3923769102375907e-06, - "loss": 1.0481, - "step": 3776 - }, - { - "epoch": 0.4541573979438466, - "grad_norm": 2.0269852690648396, - "learning_rate": 2.391613047220213e-06, - "loss": 0.9843, - "step": 3777 - }, - { - "epoch": 0.4542776408344857, - "grad_norm": 2.9774450560656276, - "learning_rate": 2.390849124795447e-06, - "loss": 0.9956, - "step": 3778 - }, - { - "epoch": 0.45439788372512474, - "grad_norm": 2.0800692992403724, - "learning_rate": 2.3900851430791804e-06, - "loss": 1.0401, - "step": 3779 - }, - { - "epoch": 0.45451812661576385, - "grad_norm": 2.0272119144800125, - "learning_rate": 2.389321102187307e-06, - "loss": 1.0522, - "step": 3780 - }, - { - "epoch": 0.4546383695064029, - "grad_norm": 1.6016809931394516, - "learning_rate": 2.3885570022357326e-06, - "loss": 1.0219, - "step": 3781 - }, - { - "epoch": 0.454758612397042, - "grad_norm": 0.8977534800063406, - "learning_rate": 2.38779284334037e-06, - "loss": 0.8233, - "step": 3782 - }, - { - "epoch": 0.4548788552876811, - "grad_norm": 1.7955325211777518, - "learning_rate": 2.387028625617141e-06, - "loss": 0.984, - "step": 3783 - }, - { - "epoch": 0.4549990981783202, - "grad_norm": 1.8459599069062345, - "learning_rate": 2.3862643491819766e-06, - "loss": 1.0439, - "step": 3784 - }, - { - "epoch": 0.4551193410689593, - "grad_norm": 1.658125481287329, - "learning_rate": 2.3855000141508186e-06, - "loss": 1.0405, - "step": 3785 - }, - { - "epoch": 0.4552395839595984, - "grad_norm": 1.9914796057712056, - "learning_rate": 2.3847356206396143e-06, - "loss": 1.0384, - "step": 3786 - }, - { - "epoch": 0.45535982685023746, - "grad_norm": 1.4540400751460103, - "learning_rate": 2.3839711687643227e-06, - "loss": 0.9841, - "step": 3787 - }, - { - "epoch": 0.45548006974087657, - "grad_norm": 1.782771748805046, - "learning_rate": 2.38320665864091e-06, - "loss": 0.9397, - "step": 3788 - }, - { - "epoch": 0.4556003126315157, - "grad_norm": 1.5446741650634932, - "learning_rate": 2.3824420903853516e-06, - "loss": 1.0152, - "step": 3789 - }, - { - "epoch": 0.45572055552215474, - "grad_norm": 1.9252577373813304, - "learning_rate": 2.3816774641136324e-06, - "loss": 1.0199, - "step": 3790 - }, - { - "epoch": 0.45584079841279385, - "grad_norm": 1.6255159292622758, - "learning_rate": 2.380912779941745e-06, - "loss": 0.9196, - "step": 3791 - }, - { - "epoch": 0.45596104130343296, - "grad_norm": 1.966557083740729, - "learning_rate": 2.3801480379856918e-06, - "loss": 1.0312, - "step": 3792 - }, - { - "epoch": 0.456081284194072, - "grad_norm": 1.5973215661966103, - "learning_rate": 2.379383238361484e-06, - "loss": 1.0378, - "step": 3793 - }, - { - "epoch": 0.4562015270847111, - "grad_norm": 1.9196430054217817, - "learning_rate": 2.3786183811851407e-06, - "loss": 0.9938, - "step": 3794 - }, - { - "epoch": 0.45632176997535023, - "grad_norm": 1.7383178707080023, - "learning_rate": 2.3778534665726892e-06, - "loss": 1.0027, - "step": 3795 - }, - { - "epoch": 0.4564420128659893, - "grad_norm": 1.6261963475237124, - "learning_rate": 2.377088494640168e-06, - "loss": 0.9257, - "step": 3796 - }, - { - "epoch": 0.4565622557566284, - "grad_norm": 2.014649147555013, - "learning_rate": 2.3763234655036216e-06, - "loss": 0.9823, - "step": 3797 - }, - { - "epoch": 0.45668249864726745, - "grad_norm": 1.8327636556141351, - "learning_rate": 2.3755583792791046e-06, - "loss": 1.0618, - "step": 3798 - }, - { - "epoch": 0.45680274153790656, - "grad_norm": 2.0327696764200924, - "learning_rate": 2.3747932360826803e-06, - "loss": 0.9452, - "step": 3799 - }, - { - "epoch": 0.4569229844285457, - "grad_norm": 1.7483460239620143, - "learning_rate": 2.3740280360304205e-06, - "loss": 1.0112, - "step": 3800 - }, - { - "epoch": 0.45704322731918473, - "grad_norm": 1.5178618294779767, - "learning_rate": 2.3732627792384038e-06, - "loss": 0.8795, - "step": 3801 - }, - { - "epoch": 0.45716347020982384, - "grad_norm": 1.8725232614618106, - "learning_rate": 2.3724974658227207e-06, - "loss": 0.9488, - "step": 3802 - }, - { - "epoch": 0.45728371310046295, - "grad_norm": 1.748005752506, - "learning_rate": 2.3717320958994687e-06, - "loss": 0.9069, - "step": 3803 - }, - { - "epoch": 0.457403955991102, - "grad_norm": 1.8620921565276665, - "learning_rate": 2.3709666695847534e-06, - "loss": 0.8952, - "step": 3804 - }, - { - "epoch": 0.4575241988817411, - "grad_norm": 1.9055635435729663, - "learning_rate": 2.370201186994689e-06, - "loss": 0.9087, - "step": 3805 - }, - { - "epoch": 0.45764444177238023, - "grad_norm": 1.9040769479373416, - "learning_rate": 2.369435648245399e-06, - "loss": 0.8934, - "step": 3806 - }, - { - "epoch": 0.4577646846630193, - "grad_norm": 1.616086870946169, - "learning_rate": 2.368670053453015e-06, - "loss": 1.0527, - "step": 3807 - }, - { - "epoch": 0.4578849275536584, - "grad_norm": 2.287894201446859, - "learning_rate": 2.3679044027336757e-06, - "loss": 0.9345, - "step": 3808 - }, - { - "epoch": 0.4580051704442975, - "grad_norm": 2.628711800943929, - "learning_rate": 2.3671386962035326e-06, - "loss": 0.8905, - "step": 3809 - }, - { - "epoch": 0.45812541333493656, - "grad_norm": 1.6695277769617596, - "learning_rate": 2.3663729339787405e-06, - "loss": 0.893, - "step": 3810 - }, - { - "epoch": 0.45824565622557567, - "grad_norm": 2.0264115441304864, - "learning_rate": 2.365607116175466e-06, - "loss": 0.9252, - "step": 3811 - }, - { - "epoch": 0.4583658991162148, - "grad_norm": 2.3893255554240085, - "learning_rate": 2.3648412429098825e-06, - "loss": 0.8709, - "step": 3812 - }, - { - "epoch": 0.45848614200685384, - "grad_norm": 2.1230225057177172, - "learning_rate": 2.364075314298172e-06, - "loss": 1.0245, - "step": 3813 - }, - { - "epoch": 0.45860638489749295, - "grad_norm": 1.7177220825360635, - "learning_rate": 2.3633093304565267e-06, - "loss": 0.9059, - "step": 3814 - }, - { - "epoch": 0.458726627788132, - "grad_norm": 1.5493695730565946, - "learning_rate": 2.3625432915011443e-06, - "loss": 0.8329, - "step": 3815 - }, - { - "epoch": 0.4588468706787711, - "grad_norm": 1.5130614791199193, - "learning_rate": 2.3617771975482334e-06, - "loss": 0.8551, - "step": 3816 - }, - { - "epoch": 0.4589671135694102, - "grad_norm": 1.4954375179888795, - "learning_rate": 2.3610110487140083e-06, - "loss": 0.9481, - "step": 3817 - }, - { - "epoch": 0.4590873564600493, - "grad_norm": 1.4820984901051442, - "learning_rate": 2.360244845114695e-06, - "loss": 1.0156, - "step": 3818 - }, - { - "epoch": 0.4592075993506884, - "grad_norm": 2.4490281415687734, - "learning_rate": 2.3594785868665245e-06, - "loss": 0.8857, - "step": 3819 - }, - { - "epoch": 0.4593278422413275, - "grad_norm": 1.7504154802024916, - "learning_rate": 2.3587122740857386e-06, - "loss": 1.0014, - "step": 3820 - }, - { - "epoch": 0.45944808513196655, - "grad_norm": 1.7361732988875682, - "learning_rate": 2.357945906888586e-06, - "loss": 0.9828, - "step": 3821 - }, - { - "epoch": 0.45956832802260567, - "grad_norm": 2.0620965274006626, - "learning_rate": 2.357179485391324e-06, - "loss": 1.0006, - "step": 3822 - }, - { - "epoch": 0.4596885709132448, - "grad_norm": 1.5946492668710182, - "learning_rate": 2.3564130097102173e-06, - "loss": 1.053, - "step": 3823 - }, - { - "epoch": 0.45980881380388383, - "grad_norm": 1.7032176001336856, - "learning_rate": 2.355646479961541e-06, - "loss": 0.9412, - "step": 3824 - }, - { - "epoch": 0.45992905669452294, - "grad_norm": 1.7155775442317427, - "learning_rate": 2.354879896261576e-06, - "loss": 0.9167, - "step": 3825 - }, - { - "epoch": 0.46004929958516205, - "grad_norm": 1.6740271006023246, - "learning_rate": 2.3541132587266133e-06, - "loss": 0.7704, - "step": 3826 - }, - { - "epoch": 0.4601695424758011, - "grad_norm": 2.184331288261898, - "learning_rate": 2.3533465674729515e-06, - "loss": 0.8944, - "step": 3827 - }, - { - "epoch": 0.4602897853664402, - "grad_norm": 2.0909352527082388, - "learning_rate": 2.352579822616895e-06, - "loss": 0.939, - "step": 3828 - }, - { - "epoch": 0.4604100282570793, - "grad_norm": 1.5001188866867639, - "learning_rate": 2.351813024274761e-06, - "loss": 0.9822, - "step": 3829 - }, - { - "epoch": 0.4605302711477184, - "grad_norm": 1.8005748307754177, - "learning_rate": 2.3510461725628693e-06, - "loss": 0.9254, - "step": 3830 - }, - { - "epoch": 0.4606505140383575, - "grad_norm": 3.2662714998141302, - "learning_rate": 2.350279267597554e-06, - "loss": 0.9027, - "step": 3831 - }, - { - "epoch": 0.46077075692899655, - "grad_norm": 1.9642549091252033, - "learning_rate": 2.3495123094951515e-06, - "loss": 1.0287, - "step": 3832 - }, - { - "epoch": 0.46089099981963566, - "grad_norm": 1.934406887894729, - "learning_rate": 2.34874529837201e-06, - "loss": 0.9579, - "step": 3833 - }, - { - "epoch": 0.46101124271027477, - "grad_norm": 1.7963670095454878, - "learning_rate": 2.347978234344483e-06, - "loss": 0.9923, - "step": 3834 - }, - { - "epoch": 0.4611314856009138, - "grad_norm": 2.502319404419308, - "learning_rate": 2.347211117528935e-06, - "loss": 0.8934, - "step": 3835 - }, - { - "epoch": 0.46125172849155294, - "grad_norm": 1.3909303994170013, - "learning_rate": 2.3464439480417374e-06, - "loss": 0.912, - "step": 3836 - }, - { - "epoch": 0.46137197138219205, - "grad_norm": 3.938412030876299, - "learning_rate": 2.3456767259992676e-06, - "loss": 0.9714, - "step": 3837 - }, - { - "epoch": 0.4614922142728311, - "grad_norm": 2.003462654970958, - "learning_rate": 2.3449094515179135e-06, - "loss": 1.0842, - "step": 3838 - }, - { - "epoch": 0.4616124571634702, - "grad_norm": 1.5016835619406097, - "learning_rate": 2.34414212471407e-06, - "loss": 1.0124, - "step": 3839 - }, - { - "epoch": 0.4617327000541093, - "grad_norm": 1.7361938287314849, - "learning_rate": 2.3433747457041394e-06, - "loss": 0.9303, - "step": 3840 - }, - { - "epoch": 0.4618529429447484, - "grad_norm": 1.8190804433666716, - "learning_rate": 2.342607314604533e-06, - "loss": 1.0406, - "step": 3841 - }, - { - "epoch": 0.4619731858353875, - "grad_norm": 1.7021396131171862, - "learning_rate": 2.3418398315316694e-06, - "loss": 1.043, - "step": 3842 - }, - { - "epoch": 0.4620934287260266, - "grad_norm": 2.0593177058074716, - "learning_rate": 2.3410722966019755e-06, - "loss": 0.9811, - "step": 3843 - }, - { - "epoch": 0.46221367161666566, - "grad_norm": 1.7230647634244856, - "learning_rate": 2.3403047099318848e-06, - "loss": 0.8598, - "step": 3844 - }, - { - "epoch": 0.46233391450730477, - "grad_norm": 2.1278234685513437, - "learning_rate": 2.3395370716378405e-06, - "loss": 0.9467, - "step": 3845 - }, - { - "epoch": 0.4624541573979438, - "grad_norm": 2.015036800228327, - "learning_rate": 2.338769381836292e-06, - "loss": 0.9279, - "step": 3846 - }, - { - "epoch": 0.46257440028858293, - "grad_norm": 2.066226251907633, - "learning_rate": 2.3380016406436984e-06, - "loss": 0.9302, - "step": 3847 - }, - { - "epoch": 0.46269464317922204, - "grad_norm": 1.7790445427295711, - "learning_rate": 2.337233848176524e-06, - "loss": 1.0165, - "step": 3848 - }, - { - "epoch": 0.4628148860698611, - "grad_norm": 1.7328106122569886, - "learning_rate": 2.3364660045512435e-06, - "loss": 1.0165, - "step": 3849 - }, - { - "epoch": 0.4629351289605002, - "grad_norm": 0.9331272507964249, - "learning_rate": 2.335698109884337e-06, - "loss": 0.8229, - "step": 3850 - }, - { - "epoch": 0.4630553718511393, - "grad_norm": 0.9129019648802931, - "learning_rate": 2.334930164292294e-06, - "loss": 0.8501, - "step": 3851 - }, - { - "epoch": 0.4631756147417784, - "grad_norm": 3.898180385512344, - "learning_rate": 2.334162167891612e-06, - "loss": 0.9961, - "step": 3852 - }, - { - "epoch": 0.4632958576324175, - "grad_norm": 2.6257012656689724, - "learning_rate": 2.333394120798795e-06, - "loss": 0.942, - "step": 3853 - }, - { - "epoch": 0.4634161005230566, - "grad_norm": 1.963172277484267, - "learning_rate": 2.3326260231303545e-06, - "loss": 0.9267, - "step": 3854 - }, - { - "epoch": 0.46353634341369565, - "grad_norm": 1.6017057773420875, - "learning_rate": 2.331857875002811e-06, - "loss": 1.0756, - "step": 3855 - }, - { - "epoch": 0.46365658630433476, - "grad_norm": 1.6424989443097722, - "learning_rate": 2.3310896765326916e-06, - "loss": 0.9594, - "step": 3856 - }, - { - "epoch": 0.46377682919497387, - "grad_norm": 1.5984920936128824, - "learning_rate": 2.330321427836531e-06, - "loss": 1.0349, - "step": 3857 - }, - { - "epoch": 0.4638970720856129, - "grad_norm": 1.4445509270193846, - "learning_rate": 2.3295531290308733e-06, - "loss": 1.0331, - "step": 3858 - }, - { - "epoch": 0.46401731497625204, - "grad_norm": 2.928187930022223, - "learning_rate": 2.3287847802322678e-06, - "loss": 0.9569, - "step": 3859 - }, - { - "epoch": 0.4641375578668911, - "grad_norm": 1.7335042056199632, - "learning_rate": 2.3280163815572723e-06, - "loss": 1.0371, - "step": 3860 - }, - { - "epoch": 0.4642578007575302, - "grad_norm": 1.7747416133593794, - "learning_rate": 2.3272479331224522e-06, - "loss": 0.9622, - "step": 3861 - }, - { - "epoch": 0.4643780436481693, - "grad_norm": 1.4977980982826071, - "learning_rate": 2.3264794350443817e-06, - "loss": 0.9778, - "step": 3862 - }, - { - "epoch": 0.46449828653880837, - "grad_norm": 1.759976017745222, - "learning_rate": 2.3257108874396396e-06, - "loss": 0.9888, - "step": 3863 - }, - { - "epoch": 0.4646185294294475, - "grad_norm": 2.0027618412272044, - "learning_rate": 2.3249422904248152e-06, - "loss": 0.9371, - "step": 3864 - }, - { - "epoch": 0.4647387723200866, - "grad_norm": 1.385119953744986, - "learning_rate": 2.324173644116504e-06, - "loss": 1.0676, - "step": 3865 - }, - { - "epoch": 0.46485901521072565, - "grad_norm": 1.612317478584369, - "learning_rate": 2.3234049486313087e-06, - "loss": 1.0184, - "step": 3866 - }, - { - "epoch": 0.46497925810136476, - "grad_norm": 1.7973075014478581, - "learning_rate": 2.322636204085839e-06, - "loss": 0.9675, - "step": 3867 - }, - { - "epoch": 0.46509950099200387, - "grad_norm": 2.3263413938415582, - "learning_rate": 2.3218674105967143e-06, - "loss": 0.976, - "step": 3868 - }, - { - "epoch": 0.4652197438826429, - "grad_norm": 1.5437028514750966, - "learning_rate": 2.3210985682805593e-06, - "loss": 1.0409, - "step": 3869 - }, - { - "epoch": 0.46533998677328203, - "grad_norm": 2.286915054992205, - "learning_rate": 2.320329677254007e-06, - "loss": 0.8812, - "step": 3870 - }, - { - "epoch": 0.46546022966392114, - "grad_norm": 6.711890689182531, - "learning_rate": 2.319560737633697e-06, - "loss": 0.9321, - "step": 3871 - }, - { - "epoch": 0.4655804725545602, - "grad_norm": 1.452335348202441, - "learning_rate": 2.3187917495362775e-06, - "loss": 0.8788, - "step": 3872 - }, - { - "epoch": 0.4657007154451993, - "grad_norm": 2.3239085270907625, - "learning_rate": 2.318022713078403e-06, - "loss": 0.9572, - "step": 3873 - }, - { - "epoch": 0.4658209583358384, - "grad_norm": 2.123422598207576, - "learning_rate": 2.3172536283767354e-06, - "loss": 1.0396, - "step": 3874 - }, - { - "epoch": 0.4659412012264775, - "grad_norm": 1.7375510064192208, - "learning_rate": 2.3164844955479447e-06, - "loss": 1.0132, - "step": 3875 - }, - { - "epoch": 0.4660614441171166, - "grad_norm": 1.6742852208928647, - "learning_rate": 2.3157153147087082e-06, - "loss": 0.9015, - "step": 3876 - }, - { - "epoch": 0.46618168700775564, - "grad_norm": 1.5656358151243142, - "learning_rate": 2.314946085975709e-06, - "loss": 1.0326, - "step": 3877 - }, - { - "epoch": 0.46630192989839475, - "grad_norm": 1.7731226166798895, - "learning_rate": 2.3141768094656393e-06, - "loss": 1.0234, - "step": 3878 - }, - { - "epoch": 0.46642217278903386, - "grad_norm": 2.3068249117680057, - "learning_rate": 2.3134074852951966e-06, - "loss": 1.0279, - "step": 3879 - }, - { - "epoch": 0.4665424156796729, - "grad_norm": 1.5624962615922076, - "learning_rate": 2.312638113581088e-06, - "loss": 0.9791, - "step": 3880 - }, - { - "epoch": 0.46666265857031203, - "grad_norm": 2.4769880729391223, - "learning_rate": 2.311868694440027e-06, - "loss": 0.9808, - "step": 3881 - }, - { - "epoch": 0.46678290146095114, - "grad_norm": 0.9040556023470071, - "learning_rate": 2.3110992279887323e-06, - "loss": 0.8366, - "step": 3882 - }, - { - "epoch": 0.4669031443515902, - "grad_norm": 2.1300476933902073, - "learning_rate": 2.310329714343932e-06, - "loss": 1.0518, - "step": 3883 - }, - { - "epoch": 0.4670233872422293, - "grad_norm": 1.8791625547963202, - "learning_rate": 2.309560153622361e-06, - "loss": 1.0173, - "step": 3884 - }, - { - "epoch": 0.4671436301328684, - "grad_norm": 2.0687460677463925, - "learning_rate": 2.3087905459407602e-06, - "loss": 0.9451, - "step": 3885 - }, - { - "epoch": 0.46726387302350747, - "grad_norm": 0.9723543058469432, - "learning_rate": 2.3080208914158795e-06, - "loss": 0.8763, - "step": 3886 - }, - { - "epoch": 0.4673841159141466, - "grad_norm": 1.9772833312656106, - "learning_rate": 2.3072511901644753e-06, - "loss": 0.9071, - "step": 3887 - }, - { - "epoch": 0.4675043588047857, - "grad_norm": 2.105182805221936, - "learning_rate": 2.306481442303309e-06, - "loss": 1.004, - "step": 3888 - }, - { - "epoch": 0.46762460169542475, - "grad_norm": 1.7966699234408632, - "learning_rate": 2.3057116479491515e-06, - "loss": 0.9304, - "step": 3889 - }, - { - "epoch": 0.46774484458606386, - "grad_norm": 1.8201382926129102, - "learning_rate": 2.30494180721878e-06, - "loss": 0.9707, - "step": 3890 - }, - { - "epoch": 0.4678650874767029, - "grad_norm": 1.7289811536557174, - "learning_rate": 2.3041719202289794e-06, - "loss": 1.0993, - "step": 3891 - }, - { - "epoch": 0.467985330367342, - "grad_norm": 1.6212802274146392, - "learning_rate": 2.30340198709654e-06, - "loss": 0.998, - "step": 3892 - }, - { - "epoch": 0.46810557325798113, - "grad_norm": 2.139668828883606, - "learning_rate": 2.3026320079382605e-06, - "loss": 0.9505, - "step": 3893 - }, - { - "epoch": 0.4682258161486202, - "grad_norm": 1.7998144901896638, - "learning_rate": 2.3018619828709454e-06, - "loss": 0.966, - "step": 3894 - }, - { - "epoch": 0.4683460590392593, - "grad_norm": 1.982441839108219, - "learning_rate": 2.3010919120114084e-06, - "loss": 1.0185, - "step": 3895 - }, - { - "epoch": 0.4684663019298984, - "grad_norm": 2.5821163838673935, - "learning_rate": 2.3003217954764672e-06, - "loss": 0.8591, - "step": 3896 - }, - { - "epoch": 0.46858654482053747, - "grad_norm": 1.5782484251800324, - "learning_rate": 2.299551633382949e-06, - "loss": 0.9971, - "step": 3897 - }, - { - "epoch": 0.4687067877111766, - "grad_norm": 2.2328705958288952, - "learning_rate": 2.2987814258476854e-06, - "loss": 1.0467, - "step": 3898 - }, - { - "epoch": 0.4688270306018157, - "grad_norm": 2.304633188415934, - "learning_rate": 2.2980111729875177e-06, - "loss": 0.877, - "step": 3899 - }, - { - "epoch": 0.46894727349245474, - "grad_norm": 2.639756370197872, - "learning_rate": 2.2972408749192917e-06, - "loss": 1.0226, - "step": 3900 - }, - { - "epoch": 0.46906751638309385, - "grad_norm": 1.8766160993838226, - "learning_rate": 2.296470531759861e-06, - "loss": 0.8757, - "step": 3901 - }, - { - "epoch": 0.46918775927373296, - "grad_norm": 1.816638300029691, - "learning_rate": 2.2957001436260866e-06, - "loss": 1.0002, - "step": 3902 - }, - { - "epoch": 0.469308002164372, - "grad_norm": 1.6468435961282548, - "learning_rate": 2.294929710634836e-06, - "loss": 0.9227, - "step": 3903 - }, - { - "epoch": 0.46942824505501113, - "grad_norm": 1.7654050841982036, - "learning_rate": 2.2941592329029823e-06, - "loss": 0.8116, - "step": 3904 - }, - { - "epoch": 0.46954848794565024, - "grad_norm": 1.7202275947316947, - "learning_rate": 2.2933887105474067e-06, - "loss": 0.9831, - "step": 3905 - }, - { - "epoch": 0.4696687308362893, - "grad_norm": 1.3791688231406203, - "learning_rate": 2.2926181436849974e-06, - "loss": 1.0061, - "step": 3906 - }, - { - "epoch": 0.4697889737269284, - "grad_norm": 1.5286674519645136, - "learning_rate": 2.2918475324326478e-06, - "loss": 0.9224, - "step": 3907 - }, - { - "epoch": 0.46990921661756746, - "grad_norm": 1.7967676545296594, - "learning_rate": 2.2910768769072603e-06, - "loss": 1.1011, - "step": 3908 - }, - { - "epoch": 0.47002945950820657, - "grad_norm": 1.7639670901180473, - "learning_rate": 2.2903061772257417e-06, - "loss": 0.9555, - "step": 3909 - }, - { - "epoch": 0.4701497023988457, - "grad_norm": 1.419226474827036, - "learning_rate": 2.289535433505007e-06, - "loss": 0.9865, - "step": 3910 - }, - { - "epoch": 0.47026994528948474, - "grad_norm": 3.831150649806618, - "learning_rate": 2.2887646458619767e-06, - "loss": 0.8335, - "step": 3911 - }, - { - "epoch": 0.47039018818012385, - "grad_norm": 1.8981429762455366, - "learning_rate": 2.2879938144135797e-06, - "loss": 0.9661, - "step": 3912 - }, - { - "epoch": 0.47051043107076296, - "grad_norm": 1.7009276467689174, - "learning_rate": 2.2872229392767496e-06, - "loss": 0.9599, - "step": 3913 - }, - { - "epoch": 0.470630673961402, - "grad_norm": 1.552938656594532, - "learning_rate": 2.286452020568428e-06, - "loss": 0.9544, - "step": 3914 - }, - { - "epoch": 0.4707509168520411, - "grad_norm": 1.6779890832966464, - "learning_rate": 2.2856810584055637e-06, - "loss": 0.9354, - "step": 3915 - }, - { - "epoch": 0.47087115974268023, - "grad_norm": 1.4137368011533054, - "learning_rate": 2.2849100529051085e-06, - "loss": 0.8795, - "step": 3916 - }, - { - "epoch": 0.4709914026333193, - "grad_norm": 2.1014842192524523, - "learning_rate": 2.284139004184026e-06, - "loss": 1.0013, - "step": 3917 - }, - { - "epoch": 0.4711116455239584, - "grad_norm": 1.886639832293452, - "learning_rate": 2.2833679123592814e-06, - "loss": 0.9358, - "step": 3918 - }, - { - "epoch": 0.4712318884145975, - "grad_norm": 1.9001326665237417, - "learning_rate": 2.2825967775478508e-06, - "loss": 0.8381, - "step": 3919 - }, - { - "epoch": 0.47135213130523657, - "grad_norm": 2.2862707827266355, - "learning_rate": 2.2818255998667135e-06, - "loss": 1.0327, - "step": 3920 - }, - { - "epoch": 0.4714723741958757, - "grad_norm": 1.5181761831086582, - "learning_rate": 2.2810543794328566e-06, - "loss": 0.9911, - "step": 3921 - }, - { - "epoch": 0.4715926170865148, - "grad_norm": 1.629828761297626, - "learning_rate": 2.2802831163632735e-06, - "loss": 1.0174, - "step": 3922 - }, - { - "epoch": 0.47171285997715384, - "grad_norm": 1.5047940890863827, - "learning_rate": 2.279511810774965e-06, - "loss": 0.9388, - "step": 3923 - }, - { - "epoch": 0.47183310286779295, - "grad_norm": 1.6659231752814838, - "learning_rate": 2.2787404627849364e-06, - "loss": 0.9176, - "step": 3924 - }, - { - "epoch": 0.471953345758432, - "grad_norm": 1.6996465034962194, - "learning_rate": 2.277969072510202e-06, - "loss": 0.9876, - "step": 3925 - }, - { - "epoch": 0.4720735886490711, - "grad_norm": 1.5416999332172365, - "learning_rate": 2.2771976400677803e-06, - "loss": 1.0132, - "step": 3926 - }, - { - "epoch": 0.47219383153971023, - "grad_norm": 1.6970969095427753, - "learning_rate": 2.2764261655746965e-06, - "loss": 0.9873, - "step": 3927 - }, - { - "epoch": 0.4723140744303493, - "grad_norm": 1.502935715651491, - "learning_rate": 2.2756546491479832e-06, - "loss": 0.9566, - "step": 3928 - }, - { - "epoch": 0.4724343173209884, - "grad_norm": 2.34265670708175, - "learning_rate": 2.274883090904679e-06, - "loss": 1.0088, - "step": 3929 - }, - { - "epoch": 0.4725545602116275, - "grad_norm": 1.9162660332075565, - "learning_rate": 2.2741114909618283e-06, - "loss": 0.8751, - "step": 3930 - }, - { - "epoch": 0.47267480310226656, - "grad_norm": 1.6598304339060297, - "learning_rate": 2.2733398494364828e-06, - "loss": 0.9158, - "step": 3931 - }, - { - "epoch": 0.47279504599290567, - "grad_norm": 1.8104731309145883, - "learning_rate": 2.272568166445699e-06, - "loss": 1.0419, - "step": 3932 - }, - { - "epoch": 0.4729152888835448, - "grad_norm": 1.9751376116168737, - "learning_rate": 2.271796442106541e-06, - "loss": 0.8456, - "step": 3933 - }, - { - "epoch": 0.47303553177418384, - "grad_norm": 0.9417537190419832, - "learning_rate": 2.271024676536079e-06, - "loss": 0.8063, - "step": 3934 - }, - { - "epoch": 0.47315577466482295, - "grad_norm": 1.9933296071318605, - "learning_rate": 2.2702528698513894e-06, - "loss": 0.9365, - "step": 3935 - }, - { - "epoch": 0.47327601755546206, - "grad_norm": 1.7977613833643533, - "learning_rate": 2.269481022169554e-06, - "loss": 0.9888, - "step": 3936 - }, - { - "epoch": 0.4733962604461011, - "grad_norm": 2.1069410056539675, - "learning_rate": 2.2687091336076614e-06, - "loss": 1.0097, - "step": 3937 - }, - { - "epoch": 0.4735165033367402, - "grad_norm": 1.8008402982286134, - "learning_rate": 2.267937204282807e-06, - "loss": 0.998, - "step": 3938 - }, - { - "epoch": 0.4736367462273793, - "grad_norm": 1.7902132689371981, - "learning_rate": 2.2671652343120926e-06, - "loss": 0.9836, - "step": 3939 - }, - { - "epoch": 0.4737569891180184, - "grad_norm": 1.9156483211622388, - "learning_rate": 2.2663932238126236e-06, - "loss": 1.0004, - "step": 3940 - }, - { - "epoch": 0.4738772320086575, - "grad_norm": 1.3616167635432608, - "learning_rate": 2.265621172901515e-06, - "loss": 1.0018, - "step": 3941 - }, - { - "epoch": 0.47399747489929656, - "grad_norm": 1.8217497951088135, - "learning_rate": 2.2648490816958854e-06, - "loss": 0.9112, - "step": 3942 - }, - { - "epoch": 0.47411771778993567, - "grad_norm": 2.4858756661292642, - "learning_rate": 2.264076950312861e-06, - "loss": 0.9221, - "step": 3943 - }, - { - "epoch": 0.4742379606805748, - "grad_norm": 1.808967502278896, - "learning_rate": 2.2633047788695727e-06, - "loss": 1.0227, - "step": 3944 - }, - { - "epoch": 0.47435820357121383, - "grad_norm": 2.1986924620750496, - "learning_rate": 2.262532567483159e-06, - "loss": 0.8454, - "step": 3945 - }, - { - "epoch": 0.47447844646185294, - "grad_norm": 1.8371545531784756, - "learning_rate": 2.2617603162707635e-06, - "loss": 1.0039, - "step": 3946 - }, - { - "epoch": 0.47459868935249205, - "grad_norm": 1.6195670786831047, - "learning_rate": 2.2609880253495363e-06, - "loss": 1.0145, - "step": 3947 - }, - { - "epoch": 0.4747189322431311, - "grad_norm": 1.8397223236121332, - "learning_rate": 2.260215694836633e-06, - "loss": 1.0582, - "step": 3948 - }, - { - "epoch": 0.4748391751337702, - "grad_norm": 1.8835144000939648, - "learning_rate": 2.2594433248492157e-06, - "loss": 0.8471, - "step": 3949 - }, - { - "epoch": 0.47495941802440933, - "grad_norm": 1.6142754620131257, - "learning_rate": 2.2586709155044527e-06, - "loss": 0.9952, - "step": 3950 - }, - { - "epoch": 0.4750796609150484, - "grad_norm": 1.512069306647102, - "learning_rate": 2.2578984669195167e-06, - "loss": 0.9588, - "step": 3951 - }, - { - "epoch": 0.4751999038056875, - "grad_norm": 1.6696348780243924, - "learning_rate": 2.2571259792115887e-06, - "loss": 0.8693, - "step": 3952 - }, - { - "epoch": 0.4753201466963266, - "grad_norm": 1.6394765558174949, - "learning_rate": 2.2563534524978544e-06, - "loss": 0.9902, - "step": 3953 - }, - { - "epoch": 0.47544038958696566, - "grad_norm": 1.619600789720527, - "learning_rate": 2.2555808868955052e-06, - "loss": 0.9173, - "step": 3954 - }, - { - "epoch": 0.47556063247760477, - "grad_norm": 1.9238827770235336, - "learning_rate": 2.254808282521738e-06, - "loss": 0.9318, - "step": 3955 - }, - { - "epoch": 0.4756808753682438, - "grad_norm": 1.7714754249489661, - "learning_rate": 2.2540356394937573e-06, - "loss": 1.0121, - "step": 3956 - }, - { - "epoch": 0.47580111825888294, - "grad_norm": 1.982381104299041, - "learning_rate": 2.253262957928772e-06, - "loss": 1.0372, - "step": 3957 - }, - { - "epoch": 0.47592136114952205, - "grad_norm": 1.6422446113041578, - "learning_rate": 2.2524902379439976e-06, - "loss": 0.9244, - "step": 3958 - }, - { - "epoch": 0.4760416040401611, - "grad_norm": 0.8480457167088186, - "learning_rate": 2.251717479656655e-06, - "loss": 0.8514, - "step": 3959 - }, - { - "epoch": 0.4761618469308002, - "grad_norm": 1.75686538612235, - "learning_rate": 2.2509446831839704e-06, - "loss": 0.9508, - "step": 3960 - }, - { - "epoch": 0.4762820898214393, - "grad_norm": 2.331313951039578, - "learning_rate": 2.250171848643177e-06, - "loss": 1.0172, - "step": 3961 - }, - { - "epoch": 0.4764023327120784, - "grad_norm": 1.596016711795285, - "learning_rate": 2.249398976151513e-06, - "loss": 1.0659, - "step": 3962 - }, - { - "epoch": 0.4765225756027175, - "grad_norm": 2.578653454806235, - "learning_rate": 2.248626065826223e-06, - "loss": 0.9903, - "step": 3963 - }, - { - "epoch": 0.4766428184933566, - "grad_norm": 0.9011015164705555, - "learning_rate": 2.2478531177845564e-06, - "loss": 0.8379, - "step": 3964 - }, - { - "epoch": 0.47676306138399566, - "grad_norm": 1.5485303864396884, - "learning_rate": 2.247080132143769e-06, - "loss": 1.0476, - "step": 3965 - }, - { - "epoch": 0.47688330427463477, - "grad_norm": 2.043382533457588, - "learning_rate": 2.246307109021121e-06, - "loss": 0.8852, - "step": 3966 - }, - { - "epoch": 0.4770035471652739, - "grad_norm": 1.6398362216349567, - "learning_rate": 2.2455340485338817e-06, - "loss": 1.0231, - "step": 3967 - }, - { - "epoch": 0.47712379005591293, - "grad_norm": 1.8895604943602424, - "learning_rate": 2.244760950799322e-06, - "loss": 0.878, - "step": 3968 - }, - { - "epoch": 0.47724403294655204, - "grad_norm": 1.9379905571879745, - "learning_rate": 2.2439878159347203e-06, - "loss": 0.9265, - "step": 3969 - }, - { - "epoch": 0.4773642758371911, - "grad_norm": 1.5487588527817442, - "learning_rate": 2.2432146440573616e-06, - "loss": 0.8219, - "step": 3970 - }, - { - "epoch": 0.4774845187278302, - "grad_norm": 1.8243830278095179, - "learning_rate": 2.242441435284534e-06, - "loss": 0.8641, - "step": 3971 - }, - { - "epoch": 0.4776047616184693, - "grad_norm": 2.9285329756388774, - "learning_rate": 2.2416681897335337e-06, - "loss": 1.0519, - "step": 3972 - }, - { - "epoch": 0.4777250045091084, - "grad_norm": 1.831349585627783, - "learning_rate": 2.240894907521661e-06, - "loss": 0.8639, - "step": 3973 - }, - { - "epoch": 0.4778452473997475, - "grad_norm": 1.7290707144101736, - "learning_rate": 2.240121588766223e-06, - "loss": 0.8414, - "step": 3974 - }, - { - "epoch": 0.4779654902903866, - "grad_norm": 1.807328970429771, - "learning_rate": 2.239348233584531e-06, - "loss": 0.9065, - "step": 3975 - }, - { - "epoch": 0.47808573318102565, - "grad_norm": 1.766358619800259, - "learning_rate": 2.2385748420939013e-06, - "loss": 1.0065, - "step": 3976 - }, - { - "epoch": 0.47820597607166476, - "grad_norm": 1.5798528296098717, - "learning_rate": 2.2378014144116583e-06, - "loss": 0.9265, - "step": 3977 - }, - { - "epoch": 0.4783262189623039, - "grad_norm": 1.6001682818846683, - "learning_rate": 2.23702795065513e-06, - "loss": 0.995, - "step": 3978 - }, - { - "epoch": 0.47844646185294293, - "grad_norm": 1.056479055634856, - "learning_rate": 2.2362544509416493e-06, - "loss": 0.9019, - "step": 3979 - }, - { - "epoch": 0.47856670474358204, - "grad_norm": 2.3441463898198673, - "learning_rate": 2.2354809153885572e-06, - "loss": 1.023, - "step": 3980 - }, - { - "epoch": 0.47868694763422115, - "grad_norm": 2.0253763814515433, - "learning_rate": 2.234707344113197e-06, - "loss": 1.0229, - "step": 3981 - }, - { - "epoch": 0.4788071905248602, - "grad_norm": 1.7229279803358568, - "learning_rate": 2.233933737232919e-06, - "loss": 0.971, - "step": 3982 - }, - { - "epoch": 0.4789274334154993, - "grad_norm": 1.7112379115188325, - "learning_rate": 2.2331600948650793e-06, - "loss": 0.9808, - "step": 3983 - }, - { - "epoch": 0.4790476763061384, - "grad_norm": 1.592481819180799, - "learning_rate": 2.2323864171270386e-06, - "loss": 1.0019, - "step": 3984 - }, - { - "epoch": 0.4791679191967775, - "grad_norm": 2.1805720021571857, - "learning_rate": 2.231612704136164e-06, - "loss": 0.9276, - "step": 3985 - }, - { - "epoch": 0.4792881620874166, - "grad_norm": 2.2372194338023506, - "learning_rate": 2.2308389560098253e-06, - "loss": 0.9469, - "step": 3986 - }, - { - "epoch": 0.47940840497805565, - "grad_norm": 1.9002723147348395, - "learning_rate": 2.2300651728654008e-06, - "loss": 0.9607, - "step": 3987 - }, - { - "epoch": 0.47952864786869476, - "grad_norm": 0.9277460679772989, - "learning_rate": 2.229291354820272e-06, - "loss": 0.8298, - "step": 3988 - }, - { - "epoch": 0.47964889075933387, - "grad_norm": 1.7830296376664627, - "learning_rate": 2.228517501991828e-06, - "loss": 0.9706, - "step": 3989 - }, - { - "epoch": 0.4797691336499729, - "grad_norm": 0.8785498680676941, - "learning_rate": 2.22774361449746e-06, - "loss": 0.8368, - "step": 3990 - }, - { - "epoch": 0.47988937654061203, - "grad_norm": 3.3684859814628902, - "learning_rate": 2.2269696924545668e-06, - "loss": 0.9029, - "step": 3991 - }, - { - "epoch": 0.48000961943125114, - "grad_norm": 2.1519305322347106, - "learning_rate": 2.2261957359805523e-06, - "loss": 0.9828, - "step": 3992 - }, - { - "epoch": 0.4801298623218902, - "grad_norm": 1.8850698274990025, - "learning_rate": 2.225421745192823e-06, - "loss": 0.9413, - "step": 3993 - }, - { - "epoch": 0.4802501052125293, - "grad_norm": 2.565723903296927, - "learning_rate": 2.2246477202087955e-06, - "loss": 0.9805, - "step": 3994 - }, - { - "epoch": 0.4803703481031684, - "grad_norm": 1.5697333942913498, - "learning_rate": 2.223873661145887e-06, - "loss": 1.0307, - "step": 3995 - }, - { - "epoch": 0.4804905909938075, - "grad_norm": 1.483448100561434, - "learning_rate": 2.2230995681215226e-06, - "loss": 0.9107, - "step": 3996 - }, - { - "epoch": 0.4806108338844466, - "grad_norm": 1.7332594437011088, - "learning_rate": 2.2223254412531305e-06, - "loss": 0.985, - "step": 3997 - }, - { - "epoch": 0.4807310767750857, - "grad_norm": 1.5606486987910533, - "learning_rate": 2.221551280658146e-06, - "loss": 1.0201, - "step": 3998 - }, - { - "epoch": 0.48085131966572475, - "grad_norm": 4.096305224430819, - "learning_rate": 2.2207770864540085e-06, - "loss": 0.938, - "step": 3999 - }, - { - "epoch": 0.48097156255636386, - "grad_norm": 1.8229856060252891, - "learning_rate": 2.220002858758162e-06, - "loss": 0.9259, - "step": 4000 - }, - { - "epoch": 0.481091805447003, - "grad_norm": 1.0162292443627488, - "learning_rate": 2.2192285976880573e-06, - "loss": 0.8224, - "step": 4001 - }, - { - "epoch": 0.48121204833764203, - "grad_norm": 1.9930475510256684, - "learning_rate": 2.2184543033611485e-06, - "loss": 1.0048, - "step": 4002 - }, - { - "epoch": 0.48133229122828114, - "grad_norm": 1.8161655717773908, - "learning_rate": 2.2176799758948957e-06, - "loss": 1.024, - "step": 4003 - }, - { - "epoch": 0.4814525341189202, - "grad_norm": 1.6616890327583425, - "learning_rate": 2.2169056154067635e-06, - "loss": 0.9237, - "step": 4004 - }, - { - "epoch": 0.4815727770095593, - "grad_norm": 1.6726765360129907, - "learning_rate": 2.216131222014222e-06, - "loss": 1.0209, - "step": 4005 - }, - { - "epoch": 0.4816930199001984, - "grad_norm": 1.957218975878109, - "learning_rate": 2.2153567958347455e-06, - "loss": 1.0054, - "step": 4006 - }, - { - "epoch": 0.48181326279083747, - "grad_norm": 1.7887325732263943, - "learning_rate": 2.214582336985815e-06, - "loss": 0.9941, - "step": 4007 - }, - { - "epoch": 0.4819335056814766, - "grad_norm": 2.064833621326073, - "learning_rate": 2.2138078455849142e-06, - "loss": 0.8556, - "step": 4008 - }, - { - "epoch": 0.4820537485721157, - "grad_norm": 1.7146318517643842, - "learning_rate": 2.2130333217495334e-06, - "loss": 0.9932, - "step": 4009 - }, - { - "epoch": 0.48217399146275475, - "grad_norm": 2.701328099298757, - "learning_rate": 2.2122587655971665e-06, - "loss": 0.8708, - "step": 4010 - }, - { - "epoch": 0.48229423435339386, - "grad_norm": 1.4335069344242741, - "learning_rate": 2.211484177245314e-06, - "loss": 0.8367, - "step": 4011 - }, - { - "epoch": 0.48241447724403297, - "grad_norm": 2.0575798766343114, - "learning_rate": 2.21070955681148e-06, - "loss": 0.9334, - "step": 4012 - }, - { - "epoch": 0.482534720134672, - "grad_norm": 1.5739959483403525, - "learning_rate": 2.209934904413174e-06, - "loss": 0.9711, - "step": 4013 - }, - { - "epoch": 0.48265496302531113, - "grad_norm": 2.246814592209953, - "learning_rate": 2.2091602201679095e-06, - "loss": 0.918, - "step": 4014 - }, - { - "epoch": 0.48277520591595025, - "grad_norm": 1.911246146777643, - "learning_rate": 2.208385504193206e-06, - "loss": 1.0302, - "step": 4015 - }, - { - "epoch": 0.4828954488065893, - "grad_norm": 1.9902108951939321, - "learning_rate": 2.2076107566065873e-06, - "loss": 1.0104, - "step": 4016 - }, - { - "epoch": 0.4830156916972284, - "grad_norm": 2.129464564476212, - "learning_rate": 2.2068359775255816e-06, - "loss": 0.9533, - "step": 4017 - }, - { - "epoch": 0.48313593458786747, - "grad_norm": 2.2103671898609227, - "learning_rate": 2.206061167067723e-06, - "loss": 0.9808, - "step": 4018 - }, - { - "epoch": 0.4832561774785066, - "grad_norm": 2.0916624907595023, - "learning_rate": 2.205286325350549e-06, - "loss": 0.9922, - "step": 4019 - }, - { - "epoch": 0.4833764203691457, - "grad_norm": 1.822160102430231, - "learning_rate": 2.204511452491603e-06, - "loss": 0.9253, - "step": 4020 - }, - { - "epoch": 0.48349666325978474, - "grad_norm": 1.4909172843181437, - "learning_rate": 2.2037365486084316e-06, - "loss": 0.9468, - "step": 4021 - }, - { - "epoch": 0.48361690615042385, - "grad_norm": 1.6574035801755513, - "learning_rate": 2.2029616138185886e-06, - "loss": 0.9784, - "step": 4022 - }, - { - "epoch": 0.48373714904106296, - "grad_norm": 1.5908216613426602, - "learning_rate": 2.202186648239629e-06, - "loss": 1.0235, - "step": 4023 - }, - { - "epoch": 0.483857391931702, - "grad_norm": 1.676481514118601, - "learning_rate": 2.201411651989117e-06, - "loss": 0.9116, - "step": 4024 - }, - { - "epoch": 0.48397763482234113, - "grad_norm": 1.69475395037186, - "learning_rate": 2.2006366251846167e-06, - "loss": 0.9817, - "step": 4025 - }, - { - "epoch": 0.48409787771298024, - "grad_norm": 1.7839936658977742, - "learning_rate": 2.1998615679436997e-06, - "loss": 0.9525, - "step": 4026 - }, - { - "epoch": 0.4842181206036193, - "grad_norm": 2.569534879817005, - "learning_rate": 2.199086480383942e-06, - "loss": 0.9748, - "step": 4027 - }, - { - "epoch": 0.4843383634942584, - "grad_norm": 2.4785318817495323, - "learning_rate": 2.1983113626229234e-06, - "loss": 0.8685, - "step": 4028 - }, - { - "epoch": 0.4844586063848975, - "grad_norm": 2.0142048408869058, - "learning_rate": 2.1975362147782293e-06, - "loss": 0.9829, - "step": 4029 - }, - { - "epoch": 0.48457884927553657, - "grad_norm": 0.9049543787590582, - "learning_rate": 2.196761036967448e-06, - "loss": 0.7648, - "step": 4030 - }, - { - "epoch": 0.4846990921661757, - "grad_norm": 1.9643152333803473, - "learning_rate": 2.1959858293081743e-06, - "loss": 0.9753, - "step": 4031 - }, - { - "epoch": 0.4848193350568148, - "grad_norm": 8.35652331616407, - "learning_rate": 2.1952105919180056e-06, - "loss": 0.9619, - "step": 4032 - }, - { - "epoch": 0.48493957794745385, - "grad_norm": 2.1691455843068645, - "learning_rate": 2.1944353249145456e-06, - "loss": 0.8829, - "step": 4033 - }, - { - "epoch": 0.48505982083809296, - "grad_norm": 1.4304487359289062, - "learning_rate": 2.193660028415401e-06, - "loss": 0.9436, - "step": 4034 - }, - { - "epoch": 0.485180063728732, - "grad_norm": 1.8621381856243187, - "learning_rate": 2.1928847025381852e-06, - "loss": 1.0194, - "step": 4035 - }, - { - "epoch": 0.4853003066193711, - "grad_norm": 1.6136045709230347, - "learning_rate": 2.192109347400512e-06, - "loss": 1.0421, - "step": 4036 - }, - { - "epoch": 0.48542054951001024, - "grad_norm": 1.6654398058340893, - "learning_rate": 2.191333963120004e-06, - "loss": 0.9918, - "step": 4037 - }, - { - "epoch": 0.4855407924006493, - "grad_norm": 2.135205055234548, - "learning_rate": 2.190558549814286e-06, - "loss": 0.9076, - "step": 4038 - }, - { - "epoch": 0.4856610352912884, - "grad_norm": 2.7759001637955842, - "learning_rate": 2.1897831076009872e-06, - "loss": 0.9943, - "step": 4039 - }, - { - "epoch": 0.4857812781819275, - "grad_norm": 2.021807157389434, - "learning_rate": 2.1890076365977426e-06, - "loss": 1.001, - "step": 4040 - }, - { - "epoch": 0.48590152107256657, - "grad_norm": 1.1993403668196503, - "learning_rate": 2.188232136922189e-06, - "loss": 0.7601, - "step": 4041 - }, - { - "epoch": 0.4860217639632057, - "grad_norm": 3.4385604609811997, - "learning_rate": 2.187456608691971e-06, - "loss": 0.9597, - "step": 4042 - }, - { - "epoch": 0.4861420068538448, - "grad_norm": 1.7542868288373004, - "learning_rate": 2.1866810520247334e-06, - "loss": 1.0731, - "step": 4043 - }, - { - "epoch": 0.48626224974448384, - "grad_norm": 1.738117476045468, - "learning_rate": 2.185905467038129e-06, - "loss": 0.8541, - "step": 4044 - }, - { - "epoch": 0.48638249263512295, - "grad_norm": 1.4943920846406804, - "learning_rate": 2.1851298538498127e-06, - "loss": 0.9759, - "step": 4045 - }, - { - "epoch": 0.48650273552576206, - "grad_norm": 1.752685121779836, - "learning_rate": 2.184354212577446e-06, - "loss": 0.9908, - "step": 4046 - }, - { - "epoch": 0.4866229784164011, - "grad_norm": 2.504833983899083, - "learning_rate": 2.1835785433386907e-06, - "loss": 0.8318, - "step": 4047 - }, - { - "epoch": 0.48674322130704023, - "grad_norm": 1.5929847824988628, - "learning_rate": 2.182802846251216e-06, - "loss": 0.8504, - "step": 4048 - }, - { - "epoch": 0.4868634641976793, - "grad_norm": 1.8518628226944238, - "learning_rate": 2.182027121432696e-06, - "loss": 0.9271, - "step": 4049 - }, - { - "epoch": 0.4869837070883184, - "grad_norm": 2.3246227778755255, - "learning_rate": 2.1812513690008054e-06, - "loss": 1.0163, - "step": 4050 - }, - { - "epoch": 0.4871039499789575, - "grad_norm": 1.8743443296339346, - "learning_rate": 2.180475589073227e-06, - "loss": 0.9984, - "step": 4051 - }, - { - "epoch": 0.48722419286959656, - "grad_norm": 1.6337615544932826, - "learning_rate": 2.1796997817676456e-06, - "loss": 0.94, - "step": 4052 - }, - { - "epoch": 0.4873444357602357, - "grad_norm": 1.5279243321881415, - "learning_rate": 2.1789239472017494e-06, - "loss": 0.8745, - "step": 4053 - }, - { - "epoch": 0.4874646786508748, - "grad_norm": 1.8813991702937944, - "learning_rate": 2.1781480854932326e-06, - "loss": 0.9245, - "step": 4054 - }, - { - "epoch": 0.48758492154151384, - "grad_norm": 1.8139896190897244, - "learning_rate": 2.1773721967597933e-06, - "loss": 0.9961, - "step": 4055 - }, - { - "epoch": 0.48770516443215295, - "grad_norm": 0.945149273612506, - "learning_rate": 2.1765962811191322e-06, - "loss": 0.8257, - "step": 4056 - }, - { - "epoch": 0.48782540732279206, - "grad_norm": 0.9566524456597622, - "learning_rate": 2.1758203386889566e-06, - "loss": 0.8792, - "step": 4057 - }, - { - "epoch": 0.4879456502134311, - "grad_norm": 1.8597545717195751, - "learning_rate": 2.1750443695869746e-06, - "loss": 1.0475, - "step": 4058 - }, - { - "epoch": 0.4880658931040702, - "grad_norm": 1.6331818815326429, - "learning_rate": 2.174268373930901e-06, - "loss": 1.0524, - "step": 4059 - }, - { - "epoch": 0.48818613599470934, - "grad_norm": 1.8212733536520938, - "learning_rate": 2.1734923518384537e-06, - "loss": 0.9995, - "step": 4060 - }, - { - "epoch": 0.4883063788853484, - "grad_norm": 1.8763821911482481, - "learning_rate": 2.1727163034273547e-06, - "loss": 1.0184, - "step": 4061 - }, - { - "epoch": 0.4884266217759875, - "grad_norm": 3.06054282416372, - "learning_rate": 2.17194022881533e-06, - "loss": 0.9758, - "step": 4062 - }, - { - "epoch": 0.4885468646666266, - "grad_norm": 1.5872980304708442, - "learning_rate": 2.1711641281201092e-06, - "loss": 0.8739, - "step": 4063 - }, - { - "epoch": 0.48866710755726567, - "grad_norm": 2.0499262308013173, - "learning_rate": 2.1703880014594264e-06, - "loss": 0.9923, - "step": 4064 - }, - { - "epoch": 0.4887873504479048, - "grad_norm": 1.6648473187340769, - "learning_rate": 2.1696118489510182e-06, - "loss": 0.9297, - "step": 4065 - }, - { - "epoch": 0.48890759333854383, - "grad_norm": 1.6630781244082107, - "learning_rate": 2.1688356707126286e-06, - "loss": 0.9181, - "step": 4066 - }, - { - "epoch": 0.48902783622918294, - "grad_norm": 2.6173532917403755, - "learning_rate": 2.168059466862001e-06, - "loss": 0.9017, - "step": 4067 - }, - { - "epoch": 0.48914807911982205, - "grad_norm": 1.8946399775501546, - "learning_rate": 2.167283237516887e-06, - "loss": 1.0134, - "step": 4068 - }, - { - "epoch": 0.4892683220104611, - "grad_norm": 1.7276191757542274, - "learning_rate": 2.1665069827950383e-06, - "loss": 0.9514, - "step": 4069 - }, - { - "epoch": 0.4893885649011002, - "grad_norm": 1.7309609567788704, - "learning_rate": 2.1657307028142126e-06, - "loss": 1.0671, - "step": 4070 - }, - { - "epoch": 0.48950880779173933, - "grad_norm": 1.7502436468267628, - "learning_rate": 2.164954397692171e-06, - "loss": 0.8658, - "step": 4071 - }, - { - "epoch": 0.4896290506823784, - "grad_norm": 1.1609467147933787, - "learning_rate": 2.164178067546678e-06, - "loss": 1.0231, - "step": 4072 - }, - { - "epoch": 0.4897492935730175, - "grad_norm": 1.7935867075744876, - "learning_rate": 2.163401712495504e-06, - "loss": 1.1062, - "step": 4073 - }, - { - "epoch": 0.4898695364636566, - "grad_norm": 1.5800026780117855, - "learning_rate": 2.1626253326564194e-06, - "loss": 0.9866, - "step": 4074 - }, - { - "epoch": 0.48998977935429566, - "grad_norm": 1.7243235989604402, - "learning_rate": 2.161848928147201e-06, - "loss": 0.9709, - "step": 4075 - }, - { - "epoch": 0.4901100222449348, - "grad_norm": 1.827868973499864, - "learning_rate": 2.161072499085629e-06, - "loss": 1.0054, - "step": 4076 - }, - { - "epoch": 0.4902302651355739, - "grad_norm": 1.5654777381456948, - "learning_rate": 2.160296045589487e-06, - "loss": 1.0253, - "step": 4077 - }, - { - "epoch": 0.49035050802621294, - "grad_norm": 1.6686457883605945, - "learning_rate": 2.159519567776562e-06, - "loss": 0.8877, - "step": 4078 - }, - { - "epoch": 0.49047075091685205, - "grad_norm": 3.1185102596141716, - "learning_rate": 2.1587430657646463e-06, - "loss": 0.9078, - "step": 4079 - }, - { - "epoch": 0.4905909938074911, - "grad_norm": 1.6329113505981743, - "learning_rate": 2.157966539671533e-06, - "loss": 0.9803, - "step": 4080 - }, - { - "epoch": 0.4907112366981302, - "grad_norm": 1.8272083951571823, - "learning_rate": 2.157189989615021e-06, - "loss": 0.8678, - "step": 4081 - }, - { - "epoch": 0.4908314795887693, - "grad_norm": 1.602384328491092, - "learning_rate": 2.156413415712913e-06, - "loss": 0.9438, - "step": 4082 - }, - { - "epoch": 0.4909517224794084, - "grad_norm": 1.7940826612627763, - "learning_rate": 2.155636818083014e-06, - "loss": 0.9778, - "step": 4083 - }, - { - "epoch": 0.4910719653700475, - "grad_norm": 1.7410347036883043, - "learning_rate": 2.154860196843134e-06, - "loss": 1.0372, - "step": 4084 - }, - { - "epoch": 0.4911922082606866, - "grad_norm": 1.6375487778947964, - "learning_rate": 2.154083552111085e-06, - "loss": 0.9689, - "step": 4085 - }, - { - "epoch": 0.49131245115132566, - "grad_norm": 1.737384007376295, - "learning_rate": 2.1533068840046834e-06, - "loss": 1.0158, - "step": 4086 - }, - { - "epoch": 0.49143269404196477, - "grad_norm": 2.176203524089435, - "learning_rate": 2.152530192641749e-06, - "loss": 0.8066, - "step": 4087 - }, - { - "epoch": 0.4915529369326039, - "grad_norm": 1.6067504963960855, - "learning_rate": 2.1517534781401068e-06, - "loss": 0.917, - "step": 4088 - }, - { - "epoch": 0.49167317982324293, - "grad_norm": 1.9113536116543637, - "learning_rate": 2.150976740617581e-06, - "loss": 0.893, - "step": 4089 - }, - { - "epoch": 0.49179342271388204, - "grad_norm": 1.8935261685376543, - "learning_rate": 2.150199980192006e-06, - "loss": 0.927, - "step": 4090 - }, - { - "epoch": 0.49191366560452116, - "grad_norm": 1.5805056904705732, - "learning_rate": 2.1494231969812114e-06, - "loss": 1.011, - "step": 4091 - }, - { - "epoch": 0.4920339084951602, - "grad_norm": 2.128532614861538, - "learning_rate": 2.1486463911030372e-06, - "loss": 1.008, - "step": 4092 - }, - { - "epoch": 0.4921541513857993, - "grad_norm": 1.8762734856809808, - "learning_rate": 2.147869562675324e-06, - "loss": 0.9394, - "step": 4093 - }, - { - "epoch": 0.49227439427643843, - "grad_norm": 1.566642610216431, - "learning_rate": 2.147092711815915e-06, - "loss": 0.9226, - "step": 4094 - }, - { - "epoch": 0.4923946371670775, - "grad_norm": 2.6158621549481893, - "learning_rate": 2.1463158386426593e-06, - "loss": 1.0658, - "step": 4095 - }, - { - "epoch": 0.4925148800577166, - "grad_norm": 1.8589184625318553, - "learning_rate": 2.145538943273407e-06, - "loss": 0.9834, - "step": 4096 - }, - { - "epoch": 0.49263512294835565, - "grad_norm": 1.7146207277740184, - "learning_rate": 2.144762025826013e-06, - "loss": 0.9195, - "step": 4097 - }, - { - "epoch": 0.49275536583899476, - "grad_norm": 1.7864700257685155, - "learning_rate": 2.143985086418334e-06, - "loss": 1.0682, - "step": 4098 - }, - { - "epoch": 0.4928756087296339, - "grad_norm": 1.2630949280493922, - "learning_rate": 2.1432081251682324e-06, - "loss": 0.9721, - "step": 4099 - }, - { - "epoch": 0.49299585162027293, - "grad_norm": 1.5876945947041756, - "learning_rate": 2.142431142193572e-06, - "loss": 1.0629, - "step": 4100 - }, - { - "epoch": 0.49311609451091204, - "grad_norm": 1.9630559291790963, - "learning_rate": 2.1416541376122207e-06, - "loss": 0.9208, - "step": 4101 - }, - { - "epoch": 0.49323633740155115, - "grad_norm": 1.5572590478991586, - "learning_rate": 2.1408771115420496e-06, - "loss": 0.925, - "step": 4102 - }, - { - "epoch": 0.4933565802921902, - "grad_norm": 1.5834501708573772, - "learning_rate": 2.140100064100932e-06, - "loss": 0.8459, - "step": 4103 - }, - { - "epoch": 0.4934768231828293, - "grad_norm": 1.6733709042474005, - "learning_rate": 2.139322995406746e-06, - "loss": 0.9546, - "step": 4104 - }, - { - "epoch": 0.4935970660734684, - "grad_norm": 1.9040915980104245, - "learning_rate": 2.1385459055773727e-06, - "loss": 1.0046, - "step": 4105 - }, - { - "epoch": 0.4937173089641075, - "grad_norm": 1.8848030047094815, - "learning_rate": 2.137768794730696e-06, - "loss": 0.9363, - "step": 4106 - }, - { - "epoch": 0.4938375518547466, - "grad_norm": 2.295892829839494, - "learning_rate": 2.1369916629846026e-06, - "loss": 1.0019, - "step": 4107 - }, - { - "epoch": 0.4939577947453857, - "grad_norm": 2.2364391049495858, - "learning_rate": 2.136214510456983e-06, - "loss": 0.9458, - "step": 4108 - }, - { - "epoch": 0.49407803763602476, - "grad_norm": 1.0278682167769275, - "learning_rate": 2.1354373372657296e-06, - "loss": 0.9012, - "step": 4109 - }, - { - "epoch": 0.49419828052666387, - "grad_norm": 1.407498123682645, - "learning_rate": 2.1346601435287404e-06, - "loss": 0.9051, - "step": 4110 - }, - { - "epoch": 0.494318523417303, - "grad_norm": 1.6541985816983615, - "learning_rate": 2.1338829293639144e-06, - "loss": 1.0046, - "step": 4111 - }, - { - "epoch": 0.49443876630794203, - "grad_norm": 1.989707809735223, - "learning_rate": 2.1331056948891547e-06, - "loss": 1.0264, - "step": 4112 - }, - { - "epoch": 0.49455900919858115, - "grad_norm": 1.9910743146407466, - "learning_rate": 2.1323284402223666e-06, - "loss": 0.9612, - "step": 4113 - }, - { - "epoch": 0.4946792520892202, - "grad_norm": 1.6424166387948018, - "learning_rate": 2.1315511654814597e-06, - "loss": 1.0757, - "step": 4114 - }, - { - "epoch": 0.4947994949798593, - "grad_norm": 1.7138623185917308, - "learning_rate": 2.1307738707843456e-06, - "loss": 0.9801, - "step": 4115 - }, - { - "epoch": 0.4949197378704984, - "grad_norm": 1.788582416737457, - "learning_rate": 2.1299965562489385e-06, - "loss": 0.8912, - "step": 4116 - }, - { - "epoch": 0.4950399807611375, - "grad_norm": 1.287737355991647, - "learning_rate": 2.129219221993158e-06, - "loss": 0.9888, - "step": 4117 - }, - { - "epoch": 0.4951602236517766, - "grad_norm": 0.9180302051507468, - "learning_rate": 2.128441868134924e-06, - "loss": 0.8401, - "step": 4118 - }, - { - "epoch": 0.4952804665424157, - "grad_norm": 2.189982395362641, - "learning_rate": 2.1276644947921606e-06, - "loss": 1.0349, - "step": 4119 - }, - { - "epoch": 0.49540070943305475, - "grad_norm": 1.6335079044679737, - "learning_rate": 2.126887102082795e-06, - "loss": 1.0294, - "step": 4120 - }, - { - "epoch": 0.49552095232369386, - "grad_norm": 1.5360212922160263, - "learning_rate": 2.126109690124757e-06, - "loss": 0.9003, - "step": 4121 - }, - { - "epoch": 0.495641195214333, - "grad_norm": 1.6588811308277729, - "learning_rate": 2.1253322590359786e-06, - "loss": 0.9168, - "step": 4122 - }, - { - "epoch": 0.49576143810497203, - "grad_norm": 1.5693490028430928, - "learning_rate": 2.124554808934397e-06, - "loss": 0.9406, - "step": 4123 - }, - { - "epoch": 0.49588168099561114, - "grad_norm": 1.7529426085439526, - "learning_rate": 2.1237773399379496e-06, - "loss": 0.9282, - "step": 4124 - }, - { - "epoch": 0.49600192388625025, - "grad_norm": 1.7104683149983104, - "learning_rate": 2.122999852164578e-06, - "loss": 1.0721, - "step": 4125 - }, - { - "epoch": 0.4961221667768893, - "grad_norm": 2.537433935348332, - "learning_rate": 2.122222345732227e-06, - "loss": 0.7784, - "step": 4126 - }, - { - "epoch": 0.4962424096675284, - "grad_norm": 1.6891334539961416, - "learning_rate": 2.121444820758843e-06, - "loss": 1.0292, - "step": 4127 - }, - { - "epoch": 0.49636265255816747, - "grad_norm": 1.8592184305387127, - "learning_rate": 2.120667277362376e-06, - "loss": 0.9742, - "step": 4128 - }, - { - "epoch": 0.4964828954488066, - "grad_norm": 1.8860138707788614, - "learning_rate": 2.1198897156607796e-06, - "loss": 1.046, - "step": 4129 - }, - { - "epoch": 0.4966031383394457, - "grad_norm": 2.0832724244432845, - "learning_rate": 2.1191121357720085e-06, - "loss": 0.9436, - "step": 4130 - }, - { - "epoch": 0.49672338123008475, - "grad_norm": 1.5245688907800707, - "learning_rate": 2.1183345378140206e-06, - "loss": 0.9435, - "step": 4131 - }, - { - "epoch": 0.49684362412072386, - "grad_norm": 1.0248593071002359, - "learning_rate": 2.1175569219047783e-06, - "loss": 0.8567, - "step": 4132 - }, - { - "epoch": 0.49696386701136297, - "grad_norm": 1.5055261384233594, - "learning_rate": 2.1167792881622437e-06, - "loss": 0.9273, - "step": 4133 - }, - { - "epoch": 0.497084109902002, - "grad_norm": 1.6934976312323988, - "learning_rate": 2.116001636704384e-06, - "loss": 1.0044, - "step": 4134 - }, - { - "epoch": 0.49720435279264114, - "grad_norm": 1.6511276495493015, - "learning_rate": 2.1152239676491685e-06, - "loss": 1.0006, - "step": 4135 - }, - { - "epoch": 0.49732459568328025, - "grad_norm": 1.6521698700159217, - "learning_rate": 2.114446281114569e-06, - "loss": 0.9407, - "step": 4136 - }, - { - "epoch": 0.4974448385739193, - "grad_norm": 1.6787666102970193, - "learning_rate": 2.1136685772185587e-06, - "loss": 0.955, - "step": 4137 - }, - { - "epoch": 0.4975650814645584, - "grad_norm": 1.61272784848568, - "learning_rate": 2.1128908560791163e-06, - "loss": 0.9766, - "step": 4138 - }, - { - "epoch": 0.4976853243551975, - "grad_norm": 1.6303995363599812, - "learning_rate": 2.1121131178142203e-06, - "loss": 0.9747, - "step": 4139 - }, - { - "epoch": 0.4978055672458366, - "grad_norm": 1.4408010644132505, - "learning_rate": 2.1113353625418544e-06, - "loss": 1.0187, - "step": 4140 - }, - { - "epoch": 0.4979258101364757, - "grad_norm": 1.6230479767172479, - "learning_rate": 2.1105575903800017e-06, - "loss": 0.9948, - "step": 4141 - }, - { - "epoch": 0.4980460530271148, - "grad_norm": 1.6713556213784675, - "learning_rate": 2.1097798014466502e-06, - "loss": 1.0456, - "step": 4142 - }, - { - "epoch": 0.49816629591775385, - "grad_norm": 4.7566211129573315, - "learning_rate": 2.109001995859791e-06, - "loss": 0.7852, - "step": 4143 - }, - { - "epoch": 0.49828653880839296, - "grad_norm": 0.854984930474509, - "learning_rate": 2.108224173737415e-06, - "loss": 0.8276, - "step": 4144 - }, - { - "epoch": 0.498406781699032, - "grad_norm": 1.6833951970179972, - "learning_rate": 2.1074463351975183e-06, - "loss": 0.9599, - "step": 4145 - }, - { - "epoch": 0.49852702458967113, - "grad_norm": 1.774071332282261, - "learning_rate": 2.106668480358098e-06, - "loss": 0.9154, - "step": 4146 - }, - { - "epoch": 0.49864726748031024, - "grad_norm": 1.6447286963010146, - "learning_rate": 2.105890609337154e-06, - "loss": 0.9068, - "step": 4147 - }, - { - "epoch": 0.4987675103709493, - "grad_norm": 0.7870874141535493, - "learning_rate": 2.1051127222526883e-06, - "loss": 0.8414, - "step": 4148 - }, - { - "epoch": 0.4988877532615884, - "grad_norm": 1.4473857134830959, - "learning_rate": 2.1043348192227067e-06, - "loss": 1.0011, - "step": 4149 - }, - { - "epoch": 0.4990079961522275, - "grad_norm": 1.8409664672789752, - "learning_rate": 2.1035569003652156e-06, - "loss": 0.8255, - "step": 4150 - }, - { - "epoch": 0.4991282390428666, - "grad_norm": 2.1489164061407915, - "learning_rate": 2.1027789657982255e-06, - "loss": 1.0145, - "step": 4151 - }, - { - "epoch": 0.4992484819335057, - "grad_norm": 2.3525673376453047, - "learning_rate": 2.1020010156397482e-06, - "loss": 0.973, - "step": 4152 - }, - { - "epoch": 0.4993687248241448, - "grad_norm": 1.4153009544839945, - "learning_rate": 2.101223050007797e-06, - "loss": 0.9755, - "step": 4153 - }, - { - "epoch": 0.49948896771478385, - "grad_norm": 1.001577682499066, - "learning_rate": 2.1004450690203904e-06, - "loss": 0.7781, - "step": 4154 - }, - { - "epoch": 0.49960921060542296, - "grad_norm": 1.0263689877742002, - "learning_rate": 2.099667072795546e-06, - "loss": 0.9092, - "step": 4155 - }, - { - "epoch": 0.49972945349606207, - "grad_norm": 1.7771732458136957, - "learning_rate": 2.0988890614512864e-06, - "loss": 0.9925, - "step": 4156 - }, - { - "epoch": 0.4998496963867011, - "grad_norm": 1.6924053527422804, - "learning_rate": 2.098111035105635e-06, - "loss": 1.037, - "step": 4157 - }, - { - "epoch": 0.49996993927734024, - "grad_norm": 1.7449621847913865, - "learning_rate": 2.0973329938766176e-06, - "loss": 0.9386, - "step": 4158 - }, - { - "epoch": 0.5000901821679793, - "grad_norm": 1.8613060292456547, - "learning_rate": 2.0965549378822618e-06, - "loss": 0.9907, - "step": 4159 - }, - { - "epoch": 0.5002104250586185, - "grad_norm": 1.9742179505286581, - "learning_rate": 2.095776867240599e-06, - "loss": 1.0375, - "step": 4160 - }, - { - "epoch": 0.5003306679492575, - "grad_norm": 2.094271694494028, - "learning_rate": 2.094998782069661e-06, - "loss": 1.023, - "step": 4161 - }, - { - "epoch": 0.5004509108398966, - "grad_norm": 1.536437531238837, - "learning_rate": 2.0942206824874845e-06, - "loss": 0.9438, - "step": 4162 - }, - { - "epoch": 0.5005711537305357, - "grad_norm": 1.8457915431244005, - "learning_rate": 2.093442568612105e-06, - "loss": 0.9897, - "step": 4163 - }, - { - "epoch": 0.5006913966211748, - "grad_norm": 1.3254350685697764, - "learning_rate": 2.0926644405615613e-06, - "loss": 1.0472, - "step": 4164 - }, - { - "epoch": 0.5008116395118138, - "grad_norm": 2.806978527904288, - "learning_rate": 2.091886298453897e-06, - "loss": 1.0184, - "step": 4165 - }, - { - "epoch": 0.500931882402453, - "grad_norm": 1.9350441781257135, - "learning_rate": 2.091108142407153e-06, - "loss": 0.9348, - "step": 4166 - }, - { - "epoch": 0.5010521252930921, - "grad_norm": 1.0545616922578356, - "learning_rate": 2.090329972539377e-06, - "loss": 0.8858, - "step": 4167 - }, - { - "epoch": 0.5011723681837311, - "grad_norm": 1.6517335743573114, - "learning_rate": 2.089551788968616e-06, - "loss": 0.8809, - "step": 4168 - }, - { - "epoch": 0.5012926110743702, - "grad_norm": 0.9412656813653942, - "learning_rate": 2.08877359181292e-06, - "loss": 0.84, - "step": 4169 - }, - { - "epoch": 0.5014128539650093, - "grad_norm": 2.418311469173993, - "learning_rate": 2.0879953811903396e-06, - "loss": 1.055, - "step": 4170 - }, - { - "epoch": 0.5015330968556484, - "grad_norm": 2.212514900976316, - "learning_rate": 2.08721715721893e-06, - "loss": 0.9853, - "step": 4171 - }, - { - "epoch": 0.5016533397462875, - "grad_norm": 1.79073844936405, - "learning_rate": 2.0864389200167477e-06, - "loss": 0.9685, - "step": 4172 - }, - { - "epoch": 0.5017735826369266, - "grad_norm": 1.5816226384446683, - "learning_rate": 2.0856606697018504e-06, - "loss": 0.9889, - "step": 4173 - }, - { - "epoch": 0.5018938255275657, - "grad_norm": 2.1489419240936787, - "learning_rate": 2.084882406392297e-06, - "loss": 0.9301, - "step": 4174 - }, - { - "epoch": 0.5020140684182047, - "grad_norm": 1.9500442010809482, - "learning_rate": 2.0841041302061496e-06, - "loss": 0.9028, - "step": 4175 - }, - { - "epoch": 0.5021343113088439, - "grad_norm": 1.8163352378172029, - "learning_rate": 2.083325841261473e-06, - "loss": 0.9479, - "step": 4176 - }, - { - "epoch": 0.502254554199483, - "grad_norm": 1.7775183288015524, - "learning_rate": 2.0825475396763322e-06, - "loss": 0.8704, - "step": 4177 - }, - { - "epoch": 0.502374797090122, - "grad_norm": 1.2620079252414353, - "learning_rate": 2.081769225568796e-06, - "loss": 0.8515, - "step": 4178 - }, - { - "epoch": 0.5024950399807612, - "grad_norm": 1.3954741551990022, - "learning_rate": 2.0809908990569327e-06, - "loss": 0.9562, - "step": 4179 - }, - { - "epoch": 0.5026152828714002, - "grad_norm": 1.9283927249362134, - "learning_rate": 2.0802125602588146e-06, - "loss": 0.9947, - "step": 4180 - }, - { - "epoch": 0.5027355257620393, - "grad_norm": 1.851867908125606, - "learning_rate": 2.0794342092925146e-06, - "loss": 0.8727, - "step": 4181 - }, - { - "epoch": 0.5028557686526784, - "grad_norm": 1.88183124979145, - "learning_rate": 2.078655846276108e-06, - "loss": 0.8848, - "step": 4182 - }, - { - "epoch": 0.5029760115433175, - "grad_norm": 1.7798950663722124, - "learning_rate": 2.0778774713276727e-06, - "loss": 0.8787, - "step": 4183 - }, - { - "epoch": 0.5030962544339566, - "grad_norm": 1.9888856340212386, - "learning_rate": 2.077099084565287e-06, - "loss": 0.8661, - "step": 4184 - }, - { - "epoch": 0.5032164973245957, - "grad_norm": 2.0500278796649485, - "learning_rate": 2.0763206861070313e-06, - "loss": 0.8487, - "step": 4185 - }, - { - "epoch": 0.5033367402152348, - "grad_norm": 1.7508984030764057, - "learning_rate": 2.0755422760709876e-06, - "loss": 0.9545, - "step": 4186 - }, - { - "epoch": 0.5034569831058738, - "grad_norm": 1.703040374613258, - "learning_rate": 2.0747638545752417e-06, - "loss": 0.9685, - "step": 4187 - }, - { - "epoch": 0.503577225996513, - "grad_norm": 2.210935558951776, - "learning_rate": 2.073985421737878e-06, - "loss": 1.0279, - "step": 4188 - }, - { - "epoch": 0.5036974688871521, - "grad_norm": 2.070232807281049, - "learning_rate": 2.0732069776769844e-06, - "loss": 0.9541, - "step": 4189 - }, - { - "epoch": 0.5038177117777911, - "grad_norm": 1.8657951750210515, - "learning_rate": 2.072428522510651e-06, - "loss": 0.9332, - "step": 4190 - }, - { - "epoch": 0.5039379546684303, - "grad_norm": 2.037279306640747, - "learning_rate": 2.071650056356968e-06, - "loss": 0.9625, - "step": 4191 - }, - { - "epoch": 0.5040581975590693, - "grad_norm": 1.8035246830862117, - "learning_rate": 2.070871579334028e-06, - "loss": 0.9937, - "step": 4192 - }, - { - "epoch": 0.5041784404497084, - "grad_norm": 1.4773942330549052, - "learning_rate": 2.0700930915599264e-06, - "loss": 0.9229, - "step": 4193 - }, - { - "epoch": 0.5042986833403476, - "grad_norm": 1.9716396015961317, - "learning_rate": 2.0693145931527583e-06, - "loss": 0.9809, - "step": 4194 - }, - { - "epoch": 0.5044189262309866, - "grad_norm": 1.3929707748047495, - "learning_rate": 2.068536084230622e-06, - "loss": 0.9806, - "step": 4195 - }, - { - "epoch": 0.5045391691216257, - "grad_norm": 2.600437321930545, - "learning_rate": 2.067757564911616e-06, - "loss": 1.0835, - "step": 4196 - }, - { - "epoch": 0.5046594120122648, - "grad_norm": 2.021926020784789, - "learning_rate": 2.0669790353138407e-06, - "loss": 1.1223, - "step": 4197 - }, - { - "epoch": 0.5047796549029039, - "grad_norm": 1.7750970410966433, - "learning_rate": 2.0662004955553995e-06, - "loss": 0.9277, - "step": 4198 - }, - { - "epoch": 0.5048998977935429, - "grad_norm": 1.9070212258056327, - "learning_rate": 2.065421945754395e-06, - "loss": 0.9583, - "step": 4199 - }, - { - "epoch": 0.505020140684182, - "grad_norm": 1.592755175496933, - "learning_rate": 2.0646433860289344e-06, - "loss": 0.9781, - "step": 4200 - }, - { - "epoch": 0.5051403835748212, - "grad_norm": 1.7779047824607555, - "learning_rate": 2.0638648164971233e-06, - "loss": 1.0189, - "step": 4201 - }, - { - "epoch": 0.5052606264654602, - "grad_norm": 1.7028886823577027, - "learning_rate": 2.06308623727707e-06, - "loss": 1.0845, - "step": 4202 - }, - { - "epoch": 0.5053808693560993, - "grad_norm": 1.9969034064575282, - "learning_rate": 2.0623076484868846e-06, - "loss": 0.9558, - "step": 4203 - }, - { - "epoch": 0.5055011122467384, - "grad_norm": 0.968808541528721, - "learning_rate": 2.061529050244679e-06, - "loss": 0.8806, - "step": 4204 - }, - { - "epoch": 0.5056213551373775, - "grad_norm": 1.8632841929926314, - "learning_rate": 2.060750442668565e-06, - "loss": 0.9502, - "step": 4205 - }, - { - "epoch": 0.5057415980280165, - "grad_norm": 2.8344742964707788, - "learning_rate": 2.059971825876657e-06, - "loss": 0.8401, - "step": 4206 - }, - { - "epoch": 0.5058618409186557, - "grad_norm": 1.6183944841978217, - "learning_rate": 2.0591931999870713e-06, - "loss": 0.961, - "step": 4207 - }, - { - "epoch": 0.5059820838092948, - "grad_norm": 0.9633926413823839, - "learning_rate": 2.0584145651179234e-06, - "loss": 0.831, - "step": 4208 - }, - { - "epoch": 0.5061023266999338, - "grad_norm": 2.0314768737858686, - "learning_rate": 2.0576359213873327e-06, - "loss": 0.9833, - "step": 4209 - }, - { - "epoch": 0.506222569590573, - "grad_norm": 2.000937480554159, - "learning_rate": 2.056857268913419e-06, - "loss": 0.9075, - "step": 4210 - }, - { - "epoch": 0.506342812481212, - "grad_norm": 1.8645968161438229, - "learning_rate": 2.056078607814303e-06, - "loss": 1.0489, - "step": 4211 - }, - { - "epoch": 0.5064630553718511, - "grad_norm": 1.7065890020838912, - "learning_rate": 2.055299938208106e-06, - "loss": 0.9931, - "step": 4212 - }, - { - "epoch": 0.5065832982624903, - "grad_norm": 1.4857738127940012, - "learning_rate": 2.0545212602129526e-06, - "loss": 1.0577, - "step": 4213 - }, - { - "epoch": 0.5067035411531293, - "grad_norm": 2.30228995061952, - "learning_rate": 2.0537425739469673e-06, - "loss": 0.8744, - "step": 4214 - }, - { - "epoch": 0.5068237840437684, - "grad_norm": 1.0019096974857034, - "learning_rate": 2.052963879528276e-06, - "loss": 0.8386, - "step": 4215 - }, - { - "epoch": 0.5069440269344075, - "grad_norm": 1.9042788460685016, - "learning_rate": 2.052185177075007e-06, - "loss": 0.9606, - "step": 4216 - }, - { - "epoch": 0.5070642698250466, - "grad_norm": 1.4405092172165592, - "learning_rate": 2.051406466705288e-06, - "loss": 1.0262, - "step": 4217 - }, - { - "epoch": 0.5071845127156857, - "grad_norm": 1.7049280823784807, - "learning_rate": 2.0506277485372486e-06, - "loss": 1.0057, - "step": 4218 - }, - { - "epoch": 0.5073047556063248, - "grad_norm": 2.2895158442465315, - "learning_rate": 2.04984902268902e-06, - "loss": 0.8671, - "step": 4219 - }, - { - "epoch": 0.5074249984969639, - "grad_norm": 1.9748527761706258, - "learning_rate": 2.0490702892787345e-06, - "loss": 0.9441, - "step": 4220 - }, - { - "epoch": 0.5075452413876029, - "grad_norm": 1.4853647245266612, - "learning_rate": 2.0482915484245246e-06, - "loss": 0.8221, - "step": 4221 - }, - { - "epoch": 0.5076654842782421, - "grad_norm": 2.031136142767526, - "learning_rate": 2.047512800244526e-06, - "loss": 1.0337, - "step": 4222 - }, - { - "epoch": 0.5077857271688812, - "grad_norm": 1.6296262179123844, - "learning_rate": 2.046734044856873e-06, - "loss": 0.9863, - "step": 4223 - }, - { - "epoch": 0.5079059700595202, - "grad_norm": 1.698908214036432, - "learning_rate": 2.045955282379702e-06, - "loss": 0.9975, - "step": 4224 - }, - { - "epoch": 0.5080262129501594, - "grad_norm": 2.440677137220955, - "learning_rate": 2.045176512931152e-06, - "loss": 0.9579, - "step": 4225 - }, - { - "epoch": 0.5081464558407984, - "grad_norm": 1.6718493308119973, - "learning_rate": 2.0443977366293604e-06, - "loss": 0.9586, - "step": 4226 - }, - { - "epoch": 0.5082666987314375, - "grad_norm": 1.4316362950778656, - "learning_rate": 2.043618953592468e-06, - "loss": 0.9764, - "step": 4227 - }, - { - "epoch": 0.5083869416220766, - "grad_norm": 1.4986073864048035, - "learning_rate": 2.0428401639386144e-06, - "loss": 1.0111, - "step": 4228 - }, - { - "epoch": 0.5085071845127157, - "grad_norm": 0.965727366728071, - "learning_rate": 2.042061367785943e-06, - "loss": 0.8455, - "step": 4229 - }, - { - "epoch": 0.5086274274033548, - "grad_norm": 1.9619590256638362, - "learning_rate": 2.041282565252594e-06, - "loss": 0.9452, - "step": 4230 - }, - { - "epoch": 0.5087476702939938, - "grad_norm": 1.8925256554541405, - "learning_rate": 2.040503756456714e-06, - "loss": 0.9663, - "step": 4231 - }, - { - "epoch": 0.508867913184633, - "grad_norm": 1.9067430327645662, - "learning_rate": 2.0397249415164456e-06, - "loss": 0.984, - "step": 4232 - }, - { - "epoch": 0.508988156075272, - "grad_norm": 1.5499347519215876, - "learning_rate": 2.0389461205499354e-06, - "loss": 1.0018, - "step": 4233 - }, - { - "epoch": 0.5091083989659111, - "grad_norm": 1.8168592318863344, - "learning_rate": 2.03816729367533e-06, - "loss": 0.9302, - "step": 4234 - }, - { - "epoch": 0.5092286418565503, - "grad_norm": 27.859017892411202, - "learning_rate": 2.0373884610107765e-06, - "loss": 0.9042, - "step": 4235 - }, - { - "epoch": 0.5093488847471893, - "grad_norm": 2.3210813744779055, - "learning_rate": 2.0366096226744225e-06, - "loss": 0.8881, - "step": 4236 - }, - { - "epoch": 0.5094691276378284, - "grad_norm": 1.6008190383286118, - "learning_rate": 2.035830778784418e-06, - "loss": 0.9726, - "step": 4237 - }, - { - "epoch": 0.5095893705284675, - "grad_norm": 1.802379172266004, - "learning_rate": 2.0350519294589134e-06, - "loss": 0.9991, - "step": 4238 - }, - { - "epoch": 0.5097096134191066, - "grad_norm": 1.6743035904201116, - "learning_rate": 2.0342730748160588e-06, - "loss": 1.0272, - "step": 4239 - }, - { - "epoch": 0.5098298563097456, - "grad_norm": 1.883420663963757, - "learning_rate": 2.033494214974006e-06, - "loss": 0.9046, - "step": 4240 - }, - { - "epoch": 0.5099500992003848, - "grad_norm": 1.6325791479672407, - "learning_rate": 2.0327153500509067e-06, - "loss": 1.0294, - "step": 4241 - }, - { - "epoch": 0.5100703420910239, - "grad_norm": 1.923536187498137, - "learning_rate": 2.031936480164916e-06, - "loss": 1.0472, - "step": 4242 - }, - { - "epoch": 0.5101905849816629, - "grad_norm": 1.8213993478882975, - "learning_rate": 2.0311576054341857e-06, - "loss": 1.005, - "step": 4243 - }, - { - "epoch": 0.5103108278723021, - "grad_norm": 1.7312307639034554, - "learning_rate": 2.0303787259768715e-06, - "loss": 0.8336, - "step": 4244 - }, - { - "epoch": 0.5104310707629411, - "grad_norm": 2.1324298221896356, - "learning_rate": 2.0295998419111294e-06, - "loss": 0.8902, - "step": 4245 - }, - { - "epoch": 0.5105513136535802, - "grad_norm": 1.9599515708467639, - "learning_rate": 2.028820953355115e-06, - "loss": 0.9312, - "step": 4246 - }, - { - "epoch": 0.5106715565442194, - "grad_norm": 1.5973755233098705, - "learning_rate": 2.0280420604269834e-06, - "loss": 0.9868, - "step": 4247 - }, - { - "epoch": 0.5107917994348584, - "grad_norm": 0.8698730126144569, - "learning_rate": 2.027263163244895e-06, - "loss": 0.8387, - "step": 4248 - }, - { - "epoch": 0.5109120423254975, - "grad_norm": 1.5564398163391948, - "learning_rate": 2.026484261927005e-06, - "loss": 0.9426, - "step": 4249 - }, - { - "epoch": 0.5110322852161366, - "grad_norm": 2.149460760403957, - "learning_rate": 2.025705356591475e-06, - "loss": 0.9326, - "step": 4250 - }, - { - "epoch": 0.5111525281067757, - "grad_norm": 0.912998950099028, - "learning_rate": 2.024926447356462e-06, - "loss": 0.8172, - "step": 4251 - }, - { - "epoch": 0.5112727709974147, - "grad_norm": 1.8103596776750968, - "learning_rate": 2.024147534340127e-06, - "loss": 0.9883, - "step": 4252 - }, - { - "epoch": 0.5113930138880539, - "grad_norm": 1.5980069025150798, - "learning_rate": 2.02336861766063e-06, - "loss": 0.9977, - "step": 4253 - }, - { - "epoch": 0.511513256778693, - "grad_norm": 1.6019021767265533, - "learning_rate": 2.0225896974361327e-06, - "loss": 0.9804, - "step": 4254 - }, - { - "epoch": 0.511633499669332, - "grad_norm": 0.9780378869603022, - "learning_rate": 2.0218107737847962e-06, - "loss": 0.8503, - "step": 4255 - }, - { - "epoch": 0.5117537425599712, - "grad_norm": 3.0689267047421627, - "learning_rate": 2.0210318468247826e-06, - "loss": 0.9589, - "step": 4256 - }, - { - "epoch": 0.5118739854506102, - "grad_norm": 1.7092920342488458, - "learning_rate": 2.020252916674255e-06, - "loss": 1.0181, - "step": 4257 - }, - { - "epoch": 0.5119942283412493, - "grad_norm": 1.6515308301301805, - "learning_rate": 2.019473983451375e-06, - "loss": 1.0045, - "step": 4258 - }, - { - "epoch": 0.5121144712318885, - "grad_norm": 1.7612627543088442, - "learning_rate": 2.0186950472743076e-06, - "loss": 0.909, - "step": 4259 - }, - { - "epoch": 0.5122347141225275, - "grad_norm": 1.6409125666412223, - "learning_rate": 2.0179161082612162e-06, - "loss": 0.9401, - "step": 4260 - }, - { - "epoch": 0.5123549570131666, - "grad_norm": 1.8642102370828333, - "learning_rate": 2.017137166530266e-06, - "loss": 0.924, - "step": 4261 - }, - { - "epoch": 0.5124751999038056, - "grad_norm": 2.5250929371565864, - "learning_rate": 2.0163582221996213e-06, - "loss": 1.0064, - "step": 4262 - }, - { - "epoch": 0.5125954427944448, - "grad_norm": 3.500842401761704, - "learning_rate": 2.015579275387446e-06, - "loss": 0.8793, - "step": 4263 - }, - { - "epoch": 0.5127156856850839, - "grad_norm": 2.025363785833503, - "learning_rate": 2.0148003262119085e-06, - "loss": 0.8824, - "step": 4264 - }, - { - "epoch": 0.5128359285757229, - "grad_norm": 1.9672784301006572, - "learning_rate": 2.0140213747911728e-06, - "loss": 0.9675, - "step": 4265 - }, - { - "epoch": 0.5129561714663621, - "grad_norm": 1.848920522670353, - "learning_rate": 2.013242421243406e-06, - "loss": 0.9979, - "step": 4266 - }, - { - "epoch": 0.5130764143570011, - "grad_norm": 1.4015519395853544, - "learning_rate": 2.012463465686774e-06, - "loss": 0.989, - "step": 4267 - }, - { - "epoch": 0.5131966572476402, - "grad_norm": 0.9068315547366427, - "learning_rate": 2.0116845082394446e-06, - "loss": 0.7945, - "step": 4268 - }, - { - "epoch": 0.5133169001382794, - "grad_norm": 1.739256510233405, - "learning_rate": 2.0109055490195836e-06, - "loss": 0.9824, - "step": 4269 - }, - { - "epoch": 0.5134371430289184, - "grad_norm": 1.8362197983597126, - "learning_rate": 2.0101265881453605e-06, - "loss": 0.8379, - "step": 4270 - }, - { - "epoch": 0.5135573859195575, - "grad_norm": 1.907313816088478, - "learning_rate": 2.009347625734941e-06, - "loss": 0.9865, - "step": 4271 - }, - { - "epoch": 0.5136776288101966, - "grad_norm": 1.9376144990621091, - "learning_rate": 2.0085686619064954e-06, - "loss": 0.9525, - "step": 4272 - }, - { - "epoch": 0.5137978717008357, - "grad_norm": 5.218520108030621, - "learning_rate": 2.00778969677819e-06, - "loss": 1.0321, - "step": 4273 - }, - { - "epoch": 0.5139181145914747, - "grad_norm": 1.7513790146807071, - "learning_rate": 2.0070107304681934e-06, - "loss": 0.8481, - "step": 4274 - }, - { - "epoch": 0.5140383574821139, - "grad_norm": 1.58973937195754, - "learning_rate": 2.006231763094675e-06, - "loss": 0.9807, - "step": 4275 - }, - { - "epoch": 0.514158600372753, - "grad_norm": 2.1693518823058455, - "learning_rate": 2.0054527947758027e-06, - "loss": 1.0646, - "step": 4276 - }, - { - "epoch": 0.514278843263392, - "grad_norm": 0.8292800118182345, - "learning_rate": 2.004673825629746e-06, - "loss": 0.7876, - "step": 4277 - }, - { - "epoch": 0.5143990861540312, - "grad_norm": 1.632667206501018, - "learning_rate": 2.0038948557746744e-06, - "loss": 0.9198, - "step": 4278 - }, - { - "epoch": 0.5145193290446702, - "grad_norm": 1.7244562618939283, - "learning_rate": 2.0031158853287558e-06, - "loss": 0.953, - "step": 4279 - }, - { - "epoch": 0.5146395719353093, - "grad_norm": 2.0212723035649334, - "learning_rate": 2.0023369144101593e-06, - "loss": 0.9186, - "step": 4280 - }, - { - "epoch": 0.5147598148259485, - "grad_norm": 1.5999221961654757, - "learning_rate": 2.0015579431370555e-06, - "loss": 0.9631, - "step": 4281 - }, - { - "epoch": 0.5148800577165875, - "grad_norm": 1.8229309372116274, - "learning_rate": 2.000778971627612e-06, - "loss": 0.8982, - "step": 4282 - }, - { - "epoch": 0.5150003006072266, - "grad_norm": 3.1162696959557477, - "learning_rate": 2e-06, - "loss": 1.1023, - "step": 4283 - }, - { - "epoch": 0.5151205434978657, - "grad_norm": 1.5608000090477345, - "learning_rate": 1.9992210283723878e-06, - "loss": 1.0575, - "step": 4284 - }, - { - "epoch": 0.5152407863885048, - "grad_norm": 1.511519150284857, - "learning_rate": 1.9984420568629448e-06, - "loss": 0.9876, - "step": 4285 - }, - { - "epoch": 0.5153610292791438, - "grad_norm": 2.1621689206739423, - "learning_rate": 1.9976630855898405e-06, - "loss": 0.9755, - "step": 4286 - }, - { - "epoch": 0.515481272169783, - "grad_norm": 1.7948983640241343, - "learning_rate": 1.9968841146712445e-06, - "loss": 0.9445, - "step": 4287 - }, - { - "epoch": 0.5156015150604221, - "grad_norm": 1.5133496538338096, - "learning_rate": 1.996105144225326e-06, - "loss": 0.9099, - "step": 4288 - }, - { - "epoch": 0.5157217579510611, - "grad_norm": 1.7702995468164064, - "learning_rate": 1.995326174370254e-06, - "loss": 0.993, - "step": 4289 - }, - { - "epoch": 0.5158420008417003, - "grad_norm": 1.4561652916104988, - "learning_rate": 1.994547205224197e-06, - "loss": 0.9229, - "step": 4290 - }, - { - "epoch": 0.5159622437323393, - "grad_norm": 1.9220440797941214, - "learning_rate": 1.993768236905325e-06, - "loss": 0.875, - "step": 4291 - }, - { - "epoch": 0.5160824866229784, - "grad_norm": 1.7491189918848336, - "learning_rate": 1.992989269531807e-06, - "loss": 0.854, - "step": 4292 - }, - { - "epoch": 0.5162027295136175, - "grad_norm": 2.7359708652172054, - "learning_rate": 1.99221030322181e-06, - "loss": 0.8761, - "step": 4293 - }, - { - "epoch": 0.5163229724042566, - "grad_norm": 1.6912949593736337, - "learning_rate": 1.991431338093505e-06, - "loss": 1.0053, - "step": 4294 - }, - { - "epoch": 0.5164432152948957, - "grad_norm": 5.149727504891282, - "learning_rate": 1.9906523742650587e-06, - "loss": 0.9899, - "step": 4295 - }, - { - "epoch": 0.5165634581855347, - "grad_norm": 1.7275198789300192, - "learning_rate": 1.9898734118546397e-06, - "loss": 0.9581, - "step": 4296 - }, - { - "epoch": 0.5166837010761739, - "grad_norm": 1.5361895396143288, - "learning_rate": 1.989094450980416e-06, - "loss": 1.0075, - "step": 4297 - }, - { - "epoch": 0.516803943966813, - "grad_norm": 1.769390243739493, - "learning_rate": 1.9883154917605556e-06, - "loss": 0.9734, - "step": 4298 - }, - { - "epoch": 0.516924186857452, - "grad_norm": 2.3748306414803797, - "learning_rate": 1.9875365343132262e-06, - "loss": 1.0231, - "step": 4299 - }, - { - "epoch": 0.5170444297480912, - "grad_norm": 1.85635470470708, - "learning_rate": 1.9867575787565946e-06, - "loss": 1.043, - "step": 4300 - }, - { - "epoch": 0.5171646726387302, - "grad_norm": 1.7964795548220287, - "learning_rate": 1.9859786252088275e-06, - "loss": 1.0621, - "step": 4301 - }, - { - "epoch": 0.5172849155293693, - "grad_norm": 2.7027892857936373, - "learning_rate": 1.9851996737880914e-06, - "loss": 0.8674, - "step": 4302 - }, - { - "epoch": 0.5174051584200084, - "grad_norm": 1.6836932301172622, - "learning_rate": 1.9844207246125537e-06, - "loss": 0.9461, - "step": 4303 - }, - { - "epoch": 0.5175254013106475, - "grad_norm": 1.967235285244494, - "learning_rate": 1.983641777800379e-06, - "loss": 0.8883, - "step": 4304 - }, - { - "epoch": 0.5176456442012866, - "grad_norm": 0.895698818191442, - "learning_rate": 1.9828628334697343e-06, - "loss": 0.8227, - "step": 4305 - }, - { - "epoch": 0.5177658870919257, - "grad_norm": 0.8200481533821712, - "learning_rate": 1.982083891738784e-06, - "loss": 0.7899, - "step": 4306 - }, - { - "epoch": 0.5178861299825648, - "grad_norm": 1.398215761133455, - "learning_rate": 1.9813049527256923e-06, - "loss": 1.0262, - "step": 4307 - }, - { - "epoch": 0.5180063728732038, - "grad_norm": 2.097041944063383, - "learning_rate": 1.9805260165486252e-06, - "loss": 1.0344, - "step": 4308 - }, - { - "epoch": 0.518126615763843, - "grad_norm": 1.7458736272393942, - "learning_rate": 1.9797470833257457e-06, - "loss": 1.0622, - "step": 4309 - }, - { - "epoch": 0.5182468586544821, - "grad_norm": 1.7760828932585286, - "learning_rate": 1.9789681531752177e-06, - "loss": 0.9783, - "step": 4310 - }, - { - "epoch": 0.5183671015451211, - "grad_norm": 1.5849451427413088, - "learning_rate": 1.978189226215204e-06, - "loss": 0.9252, - "step": 4311 - }, - { - "epoch": 0.5184873444357603, - "grad_norm": 1.7142858618781616, - "learning_rate": 1.9774103025638675e-06, - "loss": 0.9707, - "step": 4312 - }, - { - "epoch": 0.5186075873263993, - "grad_norm": 1.4261829385195104, - "learning_rate": 1.9766313823393696e-06, - "loss": 0.9619, - "step": 4313 - }, - { - "epoch": 0.5187278302170384, - "grad_norm": 1.9657698585490075, - "learning_rate": 1.975852465659873e-06, - "loss": 0.8858, - "step": 4314 - }, - { - "epoch": 0.5188480731076776, - "grad_norm": 2.9728630084243557, - "learning_rate": 1.9750735526435377e-06, - "loss": 0.9079, - "step": 4315 - }, - { - "epoch": 0.5189683159983166, - "grad_norm": 2.2615111787778233, - "learning_rate": 1.974294643408525e-06, - "loss": 1.0017, - "step": 4316 - }, - { - "epoch": 0.5190885588889557, - "grad_norm": 2.153497459408351, - "learning_rate": 1.9735157380729947e-06, - "loss": 0.8731, - "step": 4317 - }, - { - "epoch": 0.5192088017795948, - "grad_norm": 1.6338094195860784, - "learning_rate": 1.9727368367551053e-06, - "loss": 1.0334, - "step": 4318 - }, - { - "epoch": 0.5193290446702339, - "grad_norm": 1.7567017839578294, - "learning_rate": 1.9719579395730164e-06, - "loss": 0.894, - "step": 4319 - }, - { - "epoch": 0.5194492875608729, - "grad_norm": 1.9133592831893262, - "learning_rate": 1.9711790466448854e-06, - "loss": 1.1298, - "step": 4320 - }, - { - "epoch": 0.5195695304515121, - "grad_norm": 1.9116694223815869, - "learning_rate": 1.9704001580888704e-06, - "loss": 0.9072, - "step": 4321 - }, - { - "epoch": 0.5196897733421512, - "grad_norm": 1.8275777535655655, - "learning_rate": 1.9696212740231283e-06, - "loss": 1.0746, - "step": 4322 - }, - { - "epoch": 0.5198100162327902, - "grad_norm": 3.292936892824846, - "learning_rate": 1.9688423945658146e-06, - "loss": 1.0248, - "step": 4323 - }, - { - "epoch": 0.5199302591234293, - "grad_norm": 2.6420805942737977, - "learning_rate": 1.9680635198350845e-06, - "loss": 0.9244, - "step": 4324 - }, - { - "epoch": 0.5200505020140684, - "grad_norm": 1.974528355355966, - "learning_rate": 1.967284649949093e-06, - "loss": 0.9222, - "step": 4325 - }, - { - "epoch": 0.5201707449047075, - "grad_norm": 1.7379966245647, - "learning_rate": 1.966505785025994e-06, - "loss": 0.9213, - "step": 4326 - }, - { - "epoch": 0.5202909877953465, - "grad_norm": 1.7358144337099393, - "learning_rate": 1.965726925183941e-06, - "loss": 0.9697, - "step": 4327 - }, - { - "epoch": 0.5204112306859857, - "grad_norm": 1.707272440546259, - "learning_rate": 1.964948070541087e-06, - "loss": 1.0432, - "step": 4328 - }, - { - "epoch": 0.5205314735766248, - "grad_norm": 2.200315916613806, - "learning_rate": 1.9641692212155816e-06, - "loss": 0.888, - "step": 4329 - }, - { - "epoch": 0.5206517164672638, - "grad_norm": 1.7800304855993578, - "learning_rate": 1.9633903773255777e-06, - "loss": 0.9328, - "step": 4330 - }, - { - "epoch": 0.520771959357903, - "grad_norm": 1.6880310777045144, - "learning_rate": 1.9626115389892237e-06, - "loss": 0.95, - "step": 4331 - }, - { - "epoch": 0.520892202248542, - "grad_norm": 1.7600964437416413, - "learning_rate": 1.96183270632467e-06, - "loss": 1.047, - "step": 4332 - }, - { - "epoch": 0.5210124451391811, - "grad_norm": 1.424177380754598, - "learning_rate": 1.9610538794500644e-06, - "loss": 0.9816, - "step": 4333 - }, - { - "epoch": 0.5211326880298203, - "grad_norm": 0.8650858387774991, - "learning_rate": 1.9602750584835542e-06, - "loss": 0.8278, - "step": 4334 - }, - { - "epoch": 0.5212529309204593, - "grad_norm": 2.040656040767574, - "learning_rate": 1.959496243543286e-06, - "loss": 1.0283, - "step": 4335 - }, - { - "epoch": 0.5213731738110984, - "grad_norm": 1.9317257267456402, - "learning_rate": 1.9587174347474057e-06, - "loss": 0.997, - "step": 4336 - }, - { - "epoch": 0.5214934167017375, - "grad_norm": 2.5311201085801596, - "learning_rate": 1.9579386322140574e-06, - "loss": 1.0161, - "step": 4337 - }, - { - "epoch": 0.5216136595923766, - "grad_norm": 2.0424913829682394, - "learning_rate": 1.9571598360613854e-06, - "loss": 1.0046, - "step": 4338 - }, - { - "epoch": 0.5217339024830157, - "grad_norm": 1.939534134723986, - "learning_rate": 1.956381046407532e-06, - "loss": 0.8971, - "step": 4339 - }, - { - "epoch": 0.5218541453736548, - "grad_norm": 1.695173055653579, - "learning_rate": 1.9556022633706394e-06, - "loss": 1.0629, - "step": 4340 - }, - { - "epoch": 0.5219743882642939, - "grad_norm": 1.5472021816530201, - "learning_rate": 1.954823487068848e-06, - "loss": 0.9918, - "step": 4341 - }, - { - "epoch": 0.5220946311549329, - "grad_norm": 1.7070862086896244, - "learning_rate": 1.9540447176202976e-06, - "loss": 1.0086, - "step": 4342 - }, - { - "epoch": 0.5222148740455721, - "grad_norm": 0.8830376059558602, - "learning_rate": 1.9532659551431272e-06, - "loss": 0.8474, - "step": 4343 - }, - { - "epoch": 0.5223351169362112, - "grad_norm": 1.7134673344994016, - "learning_rate": 1.9524871997554744e-06, - "loss": 0.8651, - "step": 4344 - }, - { - "epoch": 0.5224553598268502, - "grad_norm": 2.0622035882827783, - "learning_rate": 1.951708451575475e-06, - "loss": 0.9994, - "step": 4345 - }, - { - "epoch": 0.5225756027174894, - "grad_norm": 1.7276160706585408, - "learning_rate": 1.9509297107212657e-06, - "loss": 1.018, - "step": 4346 - }, - { - "epoch": 0.5226958456081284, - "grad_norm": 1.5229842074020643, - "learning_rate": 1.95015097731098e-06, - "loss": 0.9917, - "step": 4347 - }, - { - "epoch": 0.5228160884987675, - "grad_norm": 1.9229170912566151, - "learning_rate": 1.949372251462751e-06, - "loss": 1.0196, - "step": 4348 - }, - { - "epoch": 0.5229363313894067, - "grad_norm": 1.6887059317937263, - "learning_rate": 1.9485935332947124e-06, - "loss": 1.0196, - "step": 4349 - }, - { - "epoch": 0.5230565742800457, - "grad_norm": 2.5694534118312573, - "learning_rate": 1.947814822924993e-06, - "loss": 1.0352, - "step": 4350 - }, - { - "epoch": 0.5231768171706848, - "grad_norm": 1.6824391382100952, - "learning_rate": 1.9470361204717236e-06, - "loss": 1.0217, - "step": 4351 - }, - { - "epoch": 0.5232970600613239, - "grad_norm": 1.4745240849972368, - "learning_rate": 1.9462574260530326e-06, - "loss": 1.0063, - "step": 4352 - }, - { - "epoch": 0.523417302951963, - "grad_norm": 1.64737598537083, - "learning_rate": 1.9454787397870472e-06, - "loss": 1.0108, - "step": 4353 - }, - { - "epoch": 0.523537545842602, - "grad_norm": 1.873085061361263, - "learning_rate": 1.944700061791894e-06, - "loss": 0.9097, - "step": 4354 - }, - { - "epoch": 0.5236577887332411, - "grad_norm": 2.115457538868337, - "learning_rate": 1.943921392185698e-06, - "loss": 0.8594, - "step": 4355 - }, - { - "epoch": 0.5237780316238803, - "grad_norm": 1.8853053135595024, - "learning_rate": 1.9431427310865814e-06, - "loss": 0.974, - "step": 4356 - }, - { - "epoch": 0.5238982745145193, - "grad_norm": 1.6438013482141773, - "learning_rate": 1.942364078612667e-06, - "loss": 0.9955, - "step": 4357 - }, - { - "epoch": 0.5240185174051584, - "grad_norm": 2.037183107411447, - "learning_rate": 1.9415854348820765e-06, - "loss": 0.9488, - "step": 4358 - }, - { - "epoch": 0.5241387602957975, - "grad_norm": 2.022240008228229, - "learning_rate": 1.940806800012929e-06, - "loss": 0.8704, - "step": 4359 - }, - { - "epoch": 0.5242590031864366, - "grad_norm": 1.455583523019745, - "learning_rate": 1.9400281741233432e-06, - "loss": 0.8369, - "step": 4360 - }, - { - "epoch": 0.5243792460770756, - "grad_norm": 0.7290515717996683, - "learning_rate": 1.939249557331435e-06, - "loss": 0.7536, - "step": 4361 - }, - { - "epoch": 0.5244994889677148, - "grad_norm": 1.671614154182312, - "learning_rate": 1.938470949755321e-06, - "loss": 0.9237, - "step": 4362 - }, - { - "epoch": 0.5246197318583539, - "grad_norm": 0.925280632802416, - "learning_rate": 1.937692351513115e-06, - "loss": 0.8178, - "step": 4363 - }, - { - "epoch": 0.5247399747489929, - "grad_norm": 1.691593362444585, - "learning_rate": 1.9369137627229297e-06, - "loss": 1.0017, - "step": 4364 - }, - { - "epoch": 0.5248602176396321, - "grad_norm": 1.9181129416903682, - "learning_rate": 1.936135183502877e-06, - "loss": 1.0797, - "step": 4365 - }, - { - "epoch": 0.5249804605302711, - "grad_norm": 1.9036928754665687, - "learning_rate": 1.935356613971066e-06, - "loss": 1.0088, - "step": 4366 - }, - { - "epoch": 0.5251007034209102, - "grad_norm": 1.64786494286822, - "learning_rate": 1.9345780542456047e-06, - "loss": 0.9705, - "step": 4367 - }, - { - "epoch": 0.5252209463115494, - "grad_norm": 3.4358758037001493, - "learning_rate": 1.9337995044446007e-06, - "loss": 0.9218, - "step": 4368 - }, - { - "epoch": 0.5253411892021884, - "grad_norm": 1.8066311232858245, - "learning_rate": 1.9330209646861596e-06, - "loss": 1.0013, - "step": 4369 - }, - { - "epoch": 0.5254614320928275, - "grad_norm": 1.44508335255912, - "learning_rate": 1.9322424350883843e-06, - "loss": 0.9767, - "step": 4370 - }, - { - "epoch": 0.5255816749834666, - "grad_norm": 1.6616868805620337, - "learning_rate": 1.931463915769379e-06, - "loss": 0.9769, - "step": 4371 - }, - { - "epoch": 0.5257019178741057, - "grad_norm": 2.2928150651888037, - "learning_rate": 1.930685406847242e-06, - "loss": 0.9384, - "step": 4372 - }, - { - "epoch": 0.5258221607647448, - "grad_norm": 1.409765723089192, - "learning_rate": 1.9299069084400734e-06, - "loss": 1.0163, - "step": 4373 - }, - { - "epoch": 0.5259424036553839, - "grad_norm": 1.9005453707350777, - "learning_rate": 1.9291284206659717e-06, - "loss": 0.8908, - "step": 4374 - }, - { - "epoch": 0.526062646546023, - "grad_norm": 1.9766529650585867, - "learning_rate": 1.928349943643032e-06, - "loss": 0.9146, - "step": 4375 - }, - { - "epoch": 0.526182889436662, - "grad_norm": 1.5643105507413695, - "learning_rate": 1.9275714774893493e-06, - "loss": 1.0291, - "step": 4376 - }, - { - "epoch": 0.5263031323273012, - "grad_norm": 2.4574480779679155, - "learning_rate": 1.9267930223230154e-06, - "loss": 0.9403, - "step": 4377 - }, - { - "epoch": 0.5264233752179402, - "grad_norm": 2.0520238980691894, - "learning_rate": 1.9260145782621224e-06, - "loss": 0.9863, - "step": 4378 - }, - { - "epoch": 0.5265436181085793, - "grad_norm": 1.709236309555193, - "learning_rate": 1.925236145424758e-06, - "loss": 1.0757, - "step": 4379 - }, - { - "epoch": 0.5266638609992185, - "grad_norm": 0.8063904602952972, - "learning_rate": 1.924457723929012e-06, - "loss": 0.7976, - "step": 4380 - }, - { - "epoch": 0.5267841038898575, - "grad_norm": 1.3643469375191812, - "learning_rate": 1.9236793138929685e-06, - "loss": 1.0249, - "step": 4381 - }, - { - "epoch": 0.5269043467804966, - "grad_norm": 1.7409281605333229, - "learning_rate": 1.9229009154347133e-06, - "loss": 1.0054, - "step": 4382 - }, - { - "epoch": 0.5270245896711357, - "grad_norm": 1.8901155904713918, - "learning_rate": 1.922122528672327e-06, - "loss": 1.0093, - "step": 4383 - }, - { - "epoch": 0.5271448325617748, - "grad_norm": 2.153371797322437, - "learning_rate": 1.9213441537238914e-06, - "loss": 0.9815, - "step": 4384 - }, - { - "epoch": 0.5272650754524139, - "grad_norm": 1.0043270550187762, - "learning_rate": 1.920565790707485e-06, - "loss": 0.8479, - "step": 4385 - }, - { - "epoch": 0.527385318343053, - "grad_norm": 1.803909067818352, - "learning_rate": 1.9197874397411853e-06, - "loss": 0.8649, - "step": 4386 - }, - { - "epoch": 0.5275055612336921, - "grad_norm": 2.9714962755077527, - "learning_rate": 1.919009100943067e-06, - "loss": 0.8641, - "step": 4387 - }, - { - "epoch": 0.5276258041243311, - "grad_norm": 2.1217161498351262, - "learning_rate": 1.9182307744312043e-06, - "loss": 0.8529, - "step": 4388 - }, - { - "epoch": 0.5277460470149702, - "grad_norm": 1.5942334208626139, - "learning_rate": 1.9174524603236676e-06, - "loss": 0.9632, - "step": 4389 - }, - { - "epoch": 0.5278662899056094, - "grad_norm": 1.7359548023642322, - "learning_rate": 1.916674158738527e-06, - "loss": 0.9605, - "step": 4390 - }, - { - "epoch": 0.5279865327962484, - "grad_norm": 1.9085839788535761, - "learning_rate": 1.9158958697938506e-06, - "loss": 0.8013, - "step": 4391 - }, - { - "epoch": 0.5281067756868875, - "grad_norm": 2.3061234478227526, - "learning_rate": 1.9151175936077032e-06, - "loss": 1.0549, - "step": 4392 - }, - { - "epoch": 0.5282270185775266, - "grad_norm": 1.490635329236724, - "learning_rate": 1.9143393302981507e-06, - "loss": 0.9917, - "step": 4393 - }, - { - "epoch": 0.5283472614681657, - "grad_norm": 1.570013672592983, - "learning_rate": 1.913561079983252e-06, - "loss": 1.032, - "step": 4394 - }, - { - "epoch": 0.5284675043588047, - "grad_norm": 4.751403601215185, - "learning_rate": 1.9127828427810693e-06, - "loss": 0.9496, - "step": 4395 - }, - { - "epoch": 0.5285877472494439, - "grad_norm": 1.7257611931066634, - "learning_rate": 1.9120046188096607e-06, - "loss": 1.0074, - "step": 4396 - }, - { - "epoch": 0.528707990140083, - "grad_norm": 1.799737789341014, - "learning_rate": 1.9112264081870804e-06, - "loss": 0.9456, - "step": 4397 - }, - { - "epoch": 0.528828233030722, - "grad_norm": 2.604899331342427, - "learning_rate": 1.9104482110313843e-06, - "loss": 0.9557, - "step": 4398 - }, - { - "epoch": 0.5289484759213612, - "grad_norm": 1.8559994229817316, - "learning_rate": 1.909670027460623e-06, - "loss": 0.9416, - "step": 4399 - }, - { - "epoch": 0.5290687188120002, - "grad_norm": 1.6388482782034268, - "learning_rate": 1.908891857592847e-06, - "loss": 0.9178, - "step": 4400 - }, - { - "epoch": 0.5291889617026393, - "grad_norm": 11.065455844623681, - "learning_rate": 1.9081137015461034e-06, - "loss": 1.0978, - "step": 4401 - }, - { - "epoch": 0.5293092045932785, - "grad_norm": 1.9049497453563897, - "learning_rate": 1.9073355594384383e-06, - "loss": 1.1015, - "step": 4402 - }, - { - "epoch": 0.5294294474839175, - "grad_norm": 1.832084345912357, - "learning_rate": 1.906557431387895e-06, - "loss": 1.0014, - "step": 4403 - }, - { - "epoch": 0.5295496903745566, - "grad_norm": 1.8825811346840309, - "learning_rate": 1.905779317512516e-06, - "loss": 0.9803, - "step": 4404 - }, - { - "epoch": 0.5296699332651957, - "grad_norm": 1.742577547621829, - "learning_rate": 1.9050012179303385e-06, - "loss": 1.0055, - "step": 4405 - }, - { - "epoch": 0.5297901761558348, - "grad_norm": 2.0826344144198656, - "learning_rate": 1.904223132759401e-06, - "loss": 0.8904, - "step": 4406 - }, - { - "epoch": 0.5299104190464738, - "grad_norm": 2.15250414131289, - "learning_rate": 1.9034450621177383e-06, - "loss": 0.8882, - "step": 4407 - }, - { - "epoch": 0.530030661937113, - "grad_norm": 1.809476303400785, - "learning_rate": 1.9026670061233824e-06, - "loss": 0.9076, - "step": 4408 - }, - { - "epoch": 0.5301509048277521, - "grad_norm": 1.5829674482053118, - "learning_rate": 1.901888964894365e-06, - "loss": 1.008, - "step": 4409 - }, - { - "epoch": 0.5302711477183911, - "grad_norm": 2.1152569178738743, - "learning_rate": 1.9011109385487134e-06, - "loss": 0.8746, - "step": 4410 - }, - { - "epoch": 0.5303913906090303, - "grad_norm": 2.0372234835535687, - "learning_rate": 1.900332927204454e-06, - "loss": 0.86, - "step": 4411 - }, - { - "epoch": 0.5305116334996693, - "grad_norm": 1.632559140639863, - "learning_rate": 1.8995549309796097e-06, - "loss": 0.9699, - "step": 4412 - }, - { - "epoch": 0.5306318763903084, - "grad_norm": 1.6764456047838088, - "learning_rate": 1.8987769499922028e-06, - "loss": 0.9569, - "step": 4413 - }, - { - "epoch": 0.5307521192809476, - "grad_norm": 1.8577438380303857, - "learning_rate": 1.897998984360252e-06, - "loss": 0.9091, - "step": 4414 - }, - { - "epoch": 0.5308723621715866, - "grad_norm": 1.385473761660786, - "learning_rate": 1.897221034201775e-06, - "loss": 0.977, - "step": 4415 - }, - { - "epoch": 0.5309926050622257, - "grad_norm": 1.4315822532172018, - "learning_rate": 1.8964430996347842e-06, - "loss": 0.8655, - "step": 4416 - }, - { - "epoch": 0.5311128479528648, - "grad_norm": 1.7328637214430642, - "learning_rate": 1.8956651807772931e-06, - "loss": 1.0263, - "step": 4417 - }, - { - "epoch": 0.5312330908435039, - "grad_norm": 1.498003823508462, - "learning_rate": 1.8948872777473115e-06, - "loss": 1.0307, - "step": 4418 - }, - { - "epoch": 0.531353333734143, - "grad_norm": 1.555950016385877, - "learning_rate": 1.8941093906628458e-06, - "loss": 0.8372, - "step": 4419 - }, - { - "epoch": 0.531473576624782, - "grad_norm": 1.8000234469899954, - "learning_rate": 1.893331519641902e-06, - "loss": 0.9125, - "step": 4420 - }, - { - "epoch": 0.5315938195154212, - "grad_norm": 15.915785893275453, - "learning_rate": 1.8925536648024815e-06, - "loss": 0.9462, - "step": 4421 - }, - { - "epoch": 0.5317140624060602, - "grad_norm": 1.683358372920671, - "learning_rate": 1.8917758262625849e-06, - "loss": 0.9562, - "step": 4422 - }, - { - "epoch": 0.5318343052966993, - "grad_norm": 1.581452591172502, - "learning_rate": 1.8909980041402089e-06, - "loss": 1.011, - "step": 4423 - }, - { - "epoch": 0.5319545481873384, - "grad_norm": 2.680325268179854, - "learning_rate": 1.8902201985533494e-06, - "loss": 0.8654, - "step": 4424 - }, - { - "epoch": 0.5320747910779775, - "grad_norm": 1.593892558584727, - "learning_rate": 1.8894424096199983e-06, - "loss": 0.9493, - "step": 4425 - }, - { - "epoch": 0.5321950339686166, - "grad_norm": 1.7282831930148392, - "learning_rate": 1.8886646374581463e-06, - "loss": 1.0583, - "step": 4426 - }, - { - "epoch": 0.5323152768592557, - "grad_norm": 1.5921532354888803, - "learning_rate": 1.8878868821857795e-06, - "loss": 0.9058, - "step": 4427 - }, - { - "epoch": 0.5324355197498948, - "grad_norm": 1.9887735473082762, - "learning_rate": 1.8871091439208838e-06, - "loss": 0.9469, - "step": 4428 - }, - { - "epoch": 0.5325557626405338, - "grad_norm": 2.569471691319337, - "learning_rate": 1.8863314227814414e-06, - "loss": 0.9697, - "step": 4429 - }, - { - "epoch": 0.532676005531173, - "grad_norm": 2.378613583957493, - "learning_rate": 1.8855537188854313e-06, - "loss": 0.6845, - "step": 4430 - }, - { - "epoch": 0.5327962484218121, - "grad_norm": 2.137094523915681, - "learning_rate": 1.8847760323508315e-06, - "loss": 0.9764, - "step": 4431 - }, - { - "epoch": 0.5329164913124511, - "grad_norm": 1.6226278009616042, - "learning_rate": 1.883998363295616e-06, - "loss": 0.9533, - "step": 4432 - }, - { - "epoch": 0.5330367342030903, - "grad_norm": 1.0270280244081058, - "learning_rate": 1.8832207118377565e-06, - "loss": 0.8823, - "step": 4433 - }, - { - "epoch": 0.5331569770937293, - "grad_norm": 1.8889375201989753, - "learning_rate": 1.882443078095222e-06, - "loss": 0.8922, - "step": 4434 - }, - { - "epoch": 0.5332772199843684, - "grad_norm": 0.895216500293343, - "learning_rate": 1.8816654621859794e-06, - "loss": 0.8845, - "step": 4435 - }, - { - "epoch": 0.5333974628750076, - "grad_norm": 1.916962586930789, - "learning_rate": 1.8808878642279915e-06, - "loss": 0.9173, - "step": 4436 - }, - { - "epoch": 0.5335177057656466, - "grad_norm": 2.085091637260268, - "learning_rate": 1.8801102843392209e-06, - "loss": 0.8582, - "step": 4437 - }, - { - "epoch": 0.5336379486562857, - "grad_norm": 1.4701209467483407, - "learning_rate": 1.8793327226376238e-06, - "loss": 1.0505, - "step": 4438 - }, - { - "epoch": 0.5337581915469248, - "grad_norm": 1.7188948136791744, - "learning_rate": 1.8785551792411569e-06, - "loss": 0.997, - "step": 4439 - }, - { - "epoch": 0.5338784344375639, - "grad_norm": 1.9993479380992663, - "learning_rate": 1.8777776542677733e-06, - "loss": 1.0263, - "step": 4440 - }, - { - "epoch": 0.5339986773282029, - "grad_norm": 1.8019702486340556, - "learning_rate": 1.8770001478354216e-06, - "loss": 0.921, - "step": 4441 - }, - { - "epoch": 0.5341189202188421, - "grad_norm": 1.8753078208016971, - "learning_rate": 1.8762226600620504e-06, - "loss": 1.0288, - "step": 4442 - }, - { - "epoch": 0.5342391631094812, - "grad_norm": 3.549727582553252, - "learning_rate": 1.8754451910656031e-06, - "loss": 0.8044, - "step": 4443 - }, - { - "epoch": 0.5343594060001202, - "grad_norm": 2.19184716016908, - "learning_rate": 1.8746677409640212e-06, - "loss": 1.0216, - "step": 4444 - }, - { - "epoch": 0.5344796488907594, - "grad_norm": 1.6171556552571178, - "learning_rate": 1.8738903098752432e-06, - "loss": 1.0487, - "step": 4445 - }, - { - "epoch": 0.5345998917813984, - "grad_norm": 1.9987298389250607, - "learning_rate": 1.8731128979172052e-06, - "loss": 0.9362, - "step": 4446 - }, - { - "epoch": 0.5347201346720375, - "grad_norm": 2.131914781273641, - "learning_rate": 1.8723355052078394e-06, - "loss": 0.8632, - "step": 4447 - }, - { - "epoch": 0.5348403775626767, - "grad_norm": 1.9605552027119408, - "learning_rate": 1.8715581318650765e-06, - "loss": 0.9747, - "step": 4448 - }, - { - "epoch": 0.5349606204533157, - "grad_norm": 2.147538985585732, - "learning_rate": 1.8707807780068422e-06, - "loss": 1.0229, - "step": 4449 - }, - { - "epoch": 0.5350808633439548, - "grad_norm": 1.9046554775534705, - "learning_rate": 1.8700034437510611e-06, - "loss": 0.862, - "step": 4450 - }, - { - "epoch": 0.5352011062345938, - "grad_norm": 2.123483004163143, - "learning_rate": 1.8692261292156549e-06, - "loss": 1.0106, - "step": 4451 - }, - { - "epoch": 0.535321349125233, - "grad_norm": 1.913389064107868, - "learning_rate": 1.8684488345185401e-06, - "loss": 1.0221, - "step": 4452 - }, - { - "epoch": 0.535441592015872, - "grad_norm": 2.1071343848956388, - "learning_rate": 1.8676715597776332e-06, - "loss": 0.9987, - "step": 4453 - }, - { - "epoch": 0.5355618349065111, - "grad_norm": 1.8396440466441522, - "learning_rate": 1.8668943051108455e-06, - "loss": 0.9674, - "step": 4454 - }, - { - "epoch": 0.5356820777971503, - "grad_norm": 1.6838574125583308, - "learning_rate": 1.8661170706360856e-06, - "loss": 0.965, - "step": 4455 - }, - { - "epoch": 0.5358023206877893, - "grad_norm": 1.5464430697273388, - "learning_rate": 1.8653398564712594e-06, - "loss": 1.0159, - "step": 4456 - }, - { - "epoch": 0.5359225635784284, - "grad_norm": 1.430936507495237, - "learning_rate": 1.8645626627342704e-06, - "loss": 1.017, - "step": 4457 - }, - { - "epoch": 0.5360428064690675, - "grad_norm": 2.067513354995197, - "learning_rate": 1.8637854895430172e-06, - "loss": 1.0056, - "step": 4458 - }, - { - "epoch": 0.5361630493597066, - "grad_norm": 2.0506352254559705, - "learning_rate": 1.8630083370153978e-06, - "loss": 0.8877, - "step": 4459 - }, - { - "epoch": 0.5362832922503457, - "grad_norm": 0.8504629473231579, - "learning_rate": 1.8622312052693041e-06, - "loss": 0.7849, - "step": 4460 - }, - { - "epoch": 0.5364035351409848, - "grad_norm": 2.1556094780084094, - "learning_rate": 1.8614540944226267e-06, - "loss": 0.923, - "step": 4461 - }, - { - "epoch": 0.5365237780316239, - "grad_norm": 1.7778862764443046, - "learning_rate": 1.8606770045932537e-06, - "loss": 0.8836, - "step": 4462 - }, - { - "epoch": 0.5366440209222629, - "grad_norm": 2.3846027590462375, - "learning_rate": 1.859899935899068e-06, - "loss": 1.0168, - "step": 4463 - }, - { - "epoch": 0.5367642638129021, - "grad_norm": 1.748981179259878, - "learning_rate": 1.8591228884579506e-06, - "loss": 0.9953, - "step": 4464 - }, - { - "epoch": 0.5368845067035412, - "grad_norm": 1.8324518757911363, - "learning_rate": 1.8583458623877795e-06, - "loss": 1.0165, - "step": 4465 - }, - { - "epoch": 0.5370047495941802, - "grad_norm": 1.5819097024825632, - "learning_rate": 1.8575688578064281e-06, - "loss": 0.9425, - "step": 4466 - }, - { - "epoch": 0.5371249924848194, - "grad_norm": 1.7452261388546693, - "learning_rate": 1.8567918748317674e-06, - "loss": 0.9587, - "step": 4467 - }, - { - "epoch": 0.5372452353754584, - "grad_norm": 1.8218663341388093, - "learning_rate": 1.8560149135816659e-06, - "loss": 1.0208, - "step": 4468 - }, - { - "epoch": 0.5373654782660975, - "grad_norm": 2.03714612450373, - "learning_rate": 1.8552379741739873e-06, - "loss": 1.0419, - "step": 4469 - }, - { - "epoch": 0.5374857211567367, - "grad_norm": 0.9610252340320308, - "learning_rate": 1.8544610567265935e-06, - "loss": 0.777, - "step": 4470 - }, - { - "epoch": 0.5376059640473757, - "grad_norm": 1.8174463063196613, - "learning_rate": 1.853684161357341e-06, - "loss": 1.037, - "step": 4471 - }, - { - "epoch": 0.5377262069380148, - "grad_norm": 1.677882728634455, - "learning_rate": 1.852907288184085e-06, - "loss": 0.9707, - "step": 4472 - }, - { - "epoch": 0.5378464498286539, - "grad_norm": 1.7490279358976908, - "learning_rate": 1.8521304373246762e-06, - "loss": 0.9019, - "step": 4473 - }, - { - "epoch": 0.537966692719293, - "grad_norm": 2.5445216252792333, - "learning_rate": 1.8513536088969626e-06, - "loss": 1.0916, - "step": 4474 - }, - { - "epoch": 0.538086935609932, - "grad_norm": 1.5478362362460862, - "learning_rate": 1.8505768030187884e-06, - "loss": 0.9981, - "step": 4475 - }, - { - "epoch": 0.5382071785005712, - "grad_norm": 1.5223147436473827, - "learning_rate": 1.849800019807995e-06, - "loss": 0.9968, - "step": 4476 - }, - { - "epoch": 0.5383274213912103, - "grad_norm": 2.028107780301152, - "learning_rate": 1.8490232593824186e-06, - "loss": 0.9144, - "step": 4477 - }, - { - "epoch": 0.5384476642818493, - "grad_norm": 1.8835202228462018, - "learning_rate": 1.8482465218598935e-06, - "loss": 1.0411, - "step": 4478 - }, - { - "epoch": 0.5385679071724885, - "grad_norm": 1.6168836225199368, - "learning_rate": 1.8474698073582508e-06, - "loss": 1.0335, - "step": 4479 - }, - { - "epoch": 0.5386881500631275, - "grad_norm": 1.8958396562184403, - "learning_rate": 1.8466931159953166e-06, - "loss": 1.0712, - "step": 4480 - }, - { - "epoch": 0.5388083929537666, - "grad_norm": 1.9401926587978766, - "learning_rate": 1.8459164478889158e-06, - "loss": 1.0408, - "step": 4481 - }, - { - "epoch": 0.5389286358444056, - "grad_norm": 1.6685287643253206, - "learning_rate": 1.8451398031568663e-06, - "loss": 0.962, - "step": 4482 - }, - { - "epoch": 0.5390488787350448, - "grad_norm": 1.6814484347973464, - "learning_rate": 1.844363181916986e-06, - "loss": 0.9441, - "step": 4483 - }, - { - "epoch": 0.5391691216256839, - "grad_norm": 1.7869631514632585, - "learning_rate": 1.8435865842870868e-06, - "loss": 1.0266, - "step": 4484 - }, - { - "epoch": 0.5392893645163229, - "grad_norm": 1.8114954696922922, - "learning_rate": 1.8428100103849787e-06, - "loss": 0.9171, - "step": 4485 - }, - { - "epoch": 0.5394096074069621, - "grad_norm": 1.9402697669576636, - "learning_rate": 1.842033460328467e-06, - "loss": 0.9238, - "step": 4486 - }, - { - "epoch": 0.5395298502976011, - "grad_norm": 1.4810657756862724, - "learning_rate": 1.8412569342353541e-06, - "loss": 0.9484, - "step": 4487 - }, - { - "epoch": 0.5396500931882402, - "grad_norm": 3.4783337142303163, - "learning_rate": 1.840480432223438e-06, - "loss": 1.0449, - "step": 4488 - }, - { - "epoch": 0.5397703360788794, - "grad_norm": 2.1654965713533727, - "learning_rate": 1.8397039544105131e-06, - "loss": 0.9702, - "step": 4489 - }, - { - "epoch": 0.5398905789695184, - "grad_norm": 1.6579701829536853, - "learning_rate": 1.8389275009143711e-06, - "loss": 0.8992, - "step": 4490 - }, - { - "epoch": 0.5400108218601575, - "grad_norm": 1.7596237684309115, - "learning_rate": 1.8381510718527988e-06, - "loss": 0.931, - "step": 4491 - }, - { - "epoch": 0.5401310647507966, - "grad_norm": 1.6413335814043277, - "learning_rate": 1.8373746673435812e-06, - "loss": 0.8292, - "step": 4492 - }, - { - "epoch": 0.5402513076414357, - "grad_norm": 1.5360345633214174, - "learning_rate": 1.8365982875044964e-06, - "loss": 0.9817, - "step": 4493 - }, - { - "epoch": 0.5403715505320748, - "grad_norm": 2.8047463243526547, - "learning_rate": 1.8358219324533217e-06, - "loss": 0.9614, - "step": 4494 - }, - { - "epoch": 0.5404917934227139, - "grad_norm": 1.4181020794861763, - "learning_rate": 1.8350456023078292e-06, - "loss": 0.8911, - "step": 4495 - }, - { - "epoch": 0.540612036313353, - "grad_norm": 2.22726836142174, - "learning_rate": 1.8342692971857874e-06, - "loss": 0.9709, - "step": 4496 - }, - { - "epoch": 0.540732279203992, - "grad_norm": 2.1794343753534235, - "learning_rate": 1.833493017204962e-06, - "loss": 0.9176, - "step": 4497 - }, - { - "epoch": 0.5408525220946312, - "grad_norm": 1.6483531767624975, - "learning_rate": 1.8327167624831134e-06, - "loss": 0.9709, - "step": 4498 - }, - { - "epoch": 0.5409727649852702, - "grad_norm": 1.5779628103778838, - "learning_rate": 1.831940533137999e-06, - "loss": 0.9045, - "step": 4499 - }, - { - "epoch": 0.5410930078759093, - "grad_norm": 1.7763455784402395, - "learning_rate": 1.8311643292873718e-06, - "loss": 0.9222, - "step": 4500 - }, - { - "epoch": 0.5412132507665485, - "grad_norm": 1.622638159748853, - "learning_rate": 1.8303881510489818e-06, - "loss": 1.0819, - "step": 4501 - }, - { - "epoch": 0.5413334936571875, - "grad_norm": 1.8647448791196293, - "learning_rate": 1.829611998540574e-06, - "loss": 0.8942, - "step": 4502 - }, - { - "epoch": 0.5414537365478266, - "grad_norm": 1.756535994840947, - "learning_rate": 1.8288358718798914e-06, - "loss": 1.0047, - "step": 4503 - }, - { - "epoch": 0.5415739794384657, - "grad_norm": 1.686345376497964, - "learning_rate": 1.8280597711846703e-06, - "loss": 0.9269, - "step": 4504 - }, - { - "epoch": 0.5416942223291048, - "grad_norm": 1.9973716750957322, - "learning_rate": 1.8272836965726455e-06, - "loss": 1.0332, - "step": 4505 - }, - { - "epoch": 0.5418144652197439, - "grad_norm": 1.6575958254172438, - "learning_rate": 1.8265076481615461e-06, - "loss": 0.9801, - "step": 4506 - }, - { - "epoch": 0.541934708110383, - "grad_norm": 2.011890589217256, - "learning_rate": 1.8257316260690987e-06, - "loss": 1.0696, - "step": 4507 - }, - { - "epoch": 0.5420549510010221, - "grad_norm": 1.4168729725382754, - "learning_rate": 1.8249556304130254e-06, - "loss": 0.9618, - "step": 4508 - }, - { - "epoch": 0.5421751938916611, - "grad_norm": 4.0802022386430234, - "learning_rate": 1.824179661311044e-06, - "loss": 0.8867, - "step": 4509 - }, - { - "epoch": 0.5422954367823003, - "grad_norm": 1.782532614988723, - "learning_rate": 1.823403718880868e-06, - "loss": 0.9903, - "step": 4510 - }, - { - "epoch": 0.5424156796729394, - "grad_norm": 1.828451013432, - "learning_rate": 1.822627803240207e-06, - "loss": 0.8662, - "step": 4511 - }, - { - "epoch": 0.5425359225635784, - "grad_norm": 2.477961677470458, - "learning_rate": 1.8218519145067675e-06, - "loss": 1.0512, - "step": 4512 - }, - { - "epoch": 0.5426561654542175, - "grad_norm": 1.7083424436124706, - "learning_rate": 1.8210760527982508e-06, - "loss": 1.0978, - "step": 4513 - }, - { - "epoch": 0.5427764083448566, - "grad_norm": 1.7248982827977235, - "learning_rate": 1.8203002182323552e-06, - "loss": 0.9403, - "step": 4514 - }, - { - "epoch": 0.5428966512354957, - "grad_norm": 1.6486930468297594, - "learning_rate": 1.819524410926773e-06, - "loss": 0.9527, - "step": 4515 - }, - { - "epoch": 0.5430168941261347, - "grad_norm": 1.4327330955209692, - "learning_rate": 1.8187486309991944e-06, - "loss": 0.9693, - "step": 4516 - }, - { - "epoch": 0.5431371370167739, - "grad_norm": 1.642404517616566, - "learning_rate": 1.817972878567304e-06, - "loss": 0.9729, - "step": 4517 - }, - { - "epoch": 0.543257379907413, - "grad_norm": 1.6369733589628457, - "learning_rate": 1.8171971537487834e-06, - "loss": 0.9583, - "step": 4518 - }, - { - "epoch": 0.543377622798052, - "grad_norm": 1.5178495775563172, - "learning_rate": 1.8164214566613093e-06, - "loss": 1.002, - "step": 4519 - }, - { - "epoch": 0.5434978656886912, - "grad_norm": 2.2262440236186496, - "learning_rate": 1.8156457874225547e-06, - "loss": 0.844, - "step": 4520 - }, - { - "epoch": 0.5436181085793302, - "grad_norm": 1.71053055056152, - "learning_rate": 1.814870146150187e-06, - "loss": 1.0042, - "step": 4521 - }, - { - "epoch": 0.5437383514699693, - "grad_norm": 2.11695832423298, - "learning_rate": 1.814094532961871e-06, - "loss": 0.9915, - "step": 4522 - }, - { - "epoch": 0.5438585943606085, - "grad_norm": 2.2581706571463713, - "learning_rate": 1.8133189479752666e-06, - "loss": 1.0336, - "step": 4523 - }, - { - "epoch": 0.5439788372512475, - "grad_norm": 1.6977753921742236, - "learning_rate": 1.8125433913080292e-06, - "loss": 1.0149, - "step": 4524 - }, - { - "epoch": 0.5440990801418866, - "grad_norm": 1.9108725610835875, - "learning_rate": 1.811767863077811e-06, - "loss": 1.0308, - "step": 4525 - }, - { - "epoch": 0.5442193230325257, - "grad_norm": 1.4183873591161507, - "learning_rate": 1.8109923634022577e-06, - "loss": 0.9793, - "step": 4526 - }, - { - "epoch": 0.5443395659231648, - "grad_norm": 1.9727360775347156, - "learning_rate": 1.8102168923990128e-06, - "loss": 1.0602, - "step": 4527 - }, - { - "epoch": 0.5444598088138038, - "grad_norm": 1.7000875422433086, - "learning_rate": 1.809441450185714e-06, - "loss": 0.9934, - "step": 4528 - }, - { - "epoch": 0.544580051704443, - "grad_norm": 1.9326860859884274, - "learning_rate": 1.8086660368799958e-06, - "loss": 0.9343, - "step": 4529 - }, - { - "epoch": 0.5447002945950821, - "grad_norm": 1.5826997827912626, - "learning_rate": 1.807890652599488e-06, - "loss": 0.9689, - "step": 4530 - }, - { - "epoch": 0.5448205374857211, - "grad_norm": 1.7025171997725135, - "learning_rate": 1.8071152974618156e-06, - "loss": 1.016, - "step": 4531 - }, - { - "epoch": 0.5449407803763603, - "grad_norm": 1.8095781518608862, - "learning_rate": 1.806339971584599e-06, - "loss": 0.9811, - "step": 4532 - }, - { - "epoch": 0.5450610232669993, - "grad_norm": 1.889487562798306, - "learning_rate": 1.8055646750854546e-06, - "loss": 1.0548, - "step": 4533 - }, - { - "epoch": 0.5451812661576384, - "grad_norm": 2.237141210676874, - "learning_rate": 1.8047894080819945e-06, - "loss": 1.0207, - "step": 4534 - }, - { - "epoch": 0.5453015090482776, - "grad_norm": 0.8394520045539688, - "learning_rate": 1.8040141706918258e-06, - "loss": 0.861, - "step": 4535 - }, - { - "epoch": 0.5454217519389166, - "grad_norm": 1.677030440102576, - "learning_rate": 1.8032389630325525e-06, - "loss": 0.9601, - "step": 4536 - }, - { - "epoch": 0.5455419948295557, - "grad_norm": 1.4896906389475857, - "learning_rate": 1.8024637852217707e-06, - "loss": 0.9585, - "step": 4537 - }, - { - "epoch": 0.5456622377201948, - "grad_norm": 1.577902523259769, - "learning_rate": 1.8016886373770766e-06, - "loss": 1.0397, - "step": 4538 - }, - { - "epoch": 0.5457824806108339, - "grad_norm": 1.6369914189639843, - "learning_rate": 1.8009135196160579e-06, - "loss": 0.9798, - "step": 4539 - }, - { - "epoch": 0.545902723501473, - "grad_norm": 1.7376884907085524, - "learning_rate": 1.8001384320563e-06, - "loss": 1.0382, - "step": 4540 - }, - { - "epoch": 0.5460229663921121, - "grad_norm": 0.9179250503852124, - "learning_rate": 1.7993633748153833e-06, - "loss": 0.8054, - "step": 4541 - }, - { - "epoch": 0.5461432092827512, - "grad_norm": 1.7456000737460338, - "learning_rate": 1.7985883480108834e-06, - "loss": 0.9247, - "step": 4542 - }, - { - "epoch": 0.5462634521733902, - "grad_norm": 1.507618155332715, - "learning_rate": 1.797813351760371e-06, - "loss": 0.9254, - "step": 4543 - }, - { - "epoch": 0.5463836950640293, - "grad_norm": 1.6823760760441548, - "learning_rate": 1.7970383861814116e-06, - "loss": 0.9815, - "step": 4544 - }, - { - "epoch": 0.5465039379546685, - "grad_norm": 1.726584870206466, - "learning_rate": 1.7962634513915684e-06, - "loss": 0.9423, - "step": 4545 - }, - { - "epoch": 0.5466241808453075, - "grad_norm": 1.5269858484867769, - "learning_rate": 1.7954885475083969e-06, - "loss": 0.9942, - "step": 4546 - }, - { - "epoch": 0.5467444237359466, - "grad_norm": 1.8057974172464055, - "learning_rate": 1.7947136746494513e-06, - "loss": 0.9305, - "step": 4547 - }, - { - "epoch": 0.5468646666265857, - "grad_norm": 1.8585841957542628, - "learning_rate": 1.793938832932277e-06, - "loss": 1.0777, - "step": 4548 - }, - { - "epoch": 0.5469849095172248, - "grad_norm": 1.7996252491049871, - "learning_rate": 1.7931640224744185e-06, - "loss": 0.8924, - "step": 4549 - }, - { - "epoch": 0.5471051524078638, - "grad_norm": 1.4827134319369801, - "learning_rate": 1.7923892433934127e-06, - "loss": 0.9375, - "step": 4550 - }, - { - "epoch": 0.547225395298503, - "grad_norm": 1.8019879119247444, - "learning_rate": 1.7916144958067939e-06, - "loss": 0.9867, - "step": 4551 - }, - { - "epoch": 0.5473456381891421, - "grad_norm": 1.6892028976090383, - "learning_rate": 1.7908397798320905e-06, - "loss": 0.9868, - "step": 4552 - }, - { - "epoch": 0.5474658810797811, - "grad_norm": 1.6973414080418503, - "learning_rate": 1.7900650955868265e-06, - "loss": 0.941, - "step": 4553 - }, - { - "epoch": 0.5475861239704203, - "grad_norm": 1.5996155604800661, - "learning_rate": 1.7892904431885202e-06, - "loss": 0.9619, - "step": 4554 - }, - { - "epoch": 0.5477063668610593, - "grad_norm": 1.574556754578466, - "learning_rate": 1.788515822754686e-06, - "loss": 0.9523, - "step": 4555 - }, - { - "epoch": 0.5478266097516984, - "grad_norm": 2.109606920844173, - "learning_rate": 1.7877412344028335e-06, - "loss": 0.9853, - "step": 4556 - }, - { - "epoch": 0.5479468526423376, - "grad_norm": 2.025005189865432, - "learning_rate": 1.7869666782504668e-06, - "loss": 0.9847, - "step": 4557 - }, - { - "epoch": 0.5480670955329766, - "grad_norm": 1.9052061130628237, - "learning_rate": 1.7861921544150867e-06, - "loss": 0.8889, - "step": 4558 - }, - { - "epoch": 0.5481873384236157, - "grad_norm": 1.8345728570917823, - "learning_rate": 1.7854176630141856e-06, - "loss": 0.962, - "step": 4559 - }, - { - "epoch": 0.5483075813142548, - "grad_norm": 2.1242346787916477, - "learning_rate": 1.784643204165255e-06, - "loss": 1.0496, - "step": 4560 - }, - { - "epoch": 0.5484278242048939, - "grad_norm": 1.8964706718979063, - "learning_rate": 1.7838687779857783e-06, - "loss": 0.9685, - "step": 4561 - }, - { - "epoch": 0.5485480670955329, - "grad_norm": 1.7997943548774211, - "learning_rate": 1.7830943845932366e-06, - "loss": 0.8372, - "step": 4562 - }, - { - "epoch": 0.5486683099861721, - "grad_norm": 1.4319209595875835, - "learning_rate": 1.7823200241051044e-06, - "loss": 0.948, - "step": 4563 - }, - { - "epoch": 0.5487885528768112, - "grad_norm": 1.8202012973594255, - "learning_rate": 1.7815456966388513e-06, - "loss": 1.0003, - "step": 4564 - }, - { - "epoch": 0.5489087957674502, - "grad_norm": 2.078156205710252, - "learning_rate": 1.780771402311943e-06, - "loss": 1.0227, - "step": 4565 - }, - { - "epoch": 0.5490290386580894, - "grad_norm": 1.6236561573800259, - "learning_rate": 1.7799971412418374e-06, - "loss": 0.9845, - "step": 4566 - }, - { - "epoch": 0.5491492815487284, - "grad_norm": 1.9461180462999355, - "learning_rate": 1.7792229135459918e-06, - "loss": 0.9341, - "step": 4567 - }, - { - "epoch": 0.5492695244393675, - "grad_norm": 0.851651948203194, - "learning_rate": 1.7784487193418538e-06, - "loss": 0.833, - "step": 4568 - }, - { - "epoch": 0.5493897673300067, - "grad_norm": 3.478435980234696, - "learning_rate": 1.7776745587468698e-06, - "loss": 0.8081, - "step": 4569 - }, - { - "epoch": 0.5495100102206457, - "grad_norm": 2.125337461715639, - "learning_rate": 1.7769004318784776e-06, - "loss": 1.0208, - "step": 4570 - }, - { - "epoch": 0.5496302531112848, - "grad_norm": 1.5754513139335895, - "learning_rate": 1.776126338854113e-06, - "loss": 1.0044, - "step": 4571 - }, - { - "epoch": 0.5497504960019239, - "grad_norm": 1.8136011922037305, - "learning_rate": 1.7753522797912044e-06, - "loss": 1.0375, - "step": 4572 - }, - { - "epoch": 0.549870738892563, - "grad_norm": 2.0392892061098165, - "learning_rate": 1.7745782548071765e-06, - "loss": 0.9061, - "step": 4573 - }, - { - "epoch": 0.549990981783202, - "grad_norm": 1.4774845211554355, - "learning_rate": 1.7738042640194482e-06, - "loss": 0.9345, - "step": 4574 - }, - { - "epoch": 0.5501112246738411, - "grad_norm": 1.8690359630400601, - "learning_rate": 1.7730303075454335e-06, - "loss": 0.9045, - "step": 4575 - }, - { - "epoch": 0.5502314675644803, - "grad_norm": 1.7768929716818906, - "learning_rate": 1.7722563855025402e-06, - "loss": 1.047, - "step": 4576 - }, - { - "epoch": 0.5503517104551193, - "grad_norm": 1.6316926026872545, - "learning_rate": 1.7714824980081721e-06, - "loss": 0.9078, - "step": 4577 - }, - { - "epoch": 0.5504719533457584, - "grad_norm": 1.718907366397614, - "learning_rate": 1.7707086451797276e-06, - "loss": 0.9422, - "step": 4578 - }, - { - "epoch": 0.5505921962363975, - "grad_norm": 0.7791562825971252, - "learning_rate": 1.7699348271345993e-06, - "loss": 0.7365, - "step": 4579 - }, - { - "epoch": 0.5507124391270366, - "grad_norm": 0.7974958805946459, - "learning_rate": 1.7691610439901753e-06, - "loss": 0.7596, - "step": 4580 - }, - { - "epoch": 0.5508326820176757, - "grad_norm": 1.589822455036284, - "learning_rate": 1.7683872958638367e-06, - "loss": 0.9542, - "step": 4581 - }, - { - "epoch": 0.5509529249083148, - "grad_norm": 1.716143834330466, - "learning_rate": 1.7676135828729614e-06, - "loss": 1.0382, - "step": 4582 - }, - { - "epoch": 0.5510731677989539, - "grad_norm": 1.852544210423791, - "learning_rate": 1.7668399051349205e-06, - "loss": 1.0267, - "step": 4583 - }, - { - "epoch": 0.5511934106895929, - "grad_norm": 1.7676655953193212, - "learning_rate": 1.766066262767081e-06, - "loss": 1.029, - "step": 4584 - }, - { - "epoch": 0.5513136535802321, - "grad_norm": 1.9387931814986379, - "learning_rate": 1.765292655886803e-06, - "loss": 0.9757, - "step": 4585 - }, - { - "epoch": 0.5514338964708712, - "grad_norm": 1.7723067791948657, - "learning_rate": 1.764519084611443e-06, - "loss": 0.9071, - "step": 4586 - }, - { - "epoch": 0.5515541393615102, - "grad_norm": 1.638181410672912, - "learning_rate": 1.7637455490583505e-06, - "loss": 0.9768, - "step": 4587 - }, - { - "epoch": 0.5516743822521494, - "grad_norm": 1.733297202149419, - "learning_rate": 1.7629720493448701e-06, - "loss": 0.9717, - "step": 4588 - }, - { - "epoch": 0.5517946251427884, - "grad_norm": 1.7453787276254085, - "learning_rate": 1.7621985855883418e-06, - "loss": 1.0499, - "step": 4589 - }, - { - "epoch": 0.5519148680334275, - "grad_norm": 1.8453332601052934, - "learning_rate": 1.7614251579060983e-06, - "loss": 0.9278, - "step": 4590 - }, - { - "epoch": 0.5520351109240667, - "grad_norm": 1.5560379680522, - "learning_rate": 1.76065176641547e-06, - "loss": 1.0472, - "step": 4591 - }, - { - "epoch": 0.5521553538147057, - "grad_norm": 1.6485855253027277, - "learning_rate": 1.759878411233777e-06, - "loss": 0.9741, - "step": 4592 - }, - { - "epoch": 0.5522755967053448, - "grad_norm": 1.9818617156181113, - "learning_rate": 1.7591050924783388e-06, - "loss": 0.9541, - "step": 4593 - }, - { - "epoch": 0.5523958395959839, - "grad_norm": 0.9147717910990133, - "learning_rate": 1.7583318102664661e-06, - "loss": 0.8269, - "step": 4594 - }, - { - "epoch": 0.552516082486623, - "grad_norm": 1.833683854608791, - "learning_rate": 1.757558564715466e-06, - "loss": 0.9964, - "step": 4595 - }, - { - "epoch": 0.552636325377262, - "grad_norm": 2.254736048439445, - "learning_rate": 1.7567853559426386e-06, - "loss": 0.9443, - "step": 4596 - }, - { - "epoch": 0.5527565682679012, - "grad_norm": 1.892803040988282, - "learning_rate": 1.7560121840652797e-06, - "loss": 0.9533, - "step": 4597 - }, - { - "epoch": 0.5528768111585403, - "grad_norm": 2.408798948462696, - "learning_rate": 1.7552390492006782e-06, - "loss": 0.8935, - "step": 4598 - }, - { - "epoch": 0.5529970540491793, - "grad_norm": 1.6438014932551634, - "learning_rate": 1.7544659514661184e-06, - "loss": 0.8522, - "step": 4599 - }, - { - "epoch": 0.5531172969398185, - "grad_norm": 2.5304287178568745, - "learning_rate": 1.7536928909788786e-06, - "loss": 0.9942, - "step": 4600 - }, - { - "epoch": 0.5532375398304575, - "grad_norm": 1.0021175376040397, - "learning_rate": 1.752919867856231e-06, - "loss": 0.8359, - "step": 4601 - }, - { - "epoch": 0.5533577827210966, - "grad_norm": 1.4467161219064482, - "learning_rate": 1.7521468822154436e-06, - "loss": 0.9791, - "step": 4602 - }, - { - "epoch": 0.5534780256117358, - "grad_norm": 1.685885540123523, - "learning_rate": 1.751373934173777e-06, - "loss": 0.9374, - "step": 4603 - }, - { - "epoch": 0.5535982685023748, - "grad_norm": 1.43598177803188, - "learning_rate": 1.750601023848487e-06, - "loss": 0.9281, - "step": 4604 - }, - { - "epoch": 0.5537185113930139, - "grad_norm": 1.7530794977549102, - "learning_rate": 1.749828151356823e-06, - "loss": 0.9386, - "step": 4605 - }, - { - "epoch": 0.553838754283653, - "grad_norm": 1.538786965119079, - "learning_rate": 1.7490553168160297e-06, - "loss": 0.9603, - "step": 4606 - }, - { - "epoch": 0.5539589971742921, - "grad_norm": 1.9931611318534004, - "learning_rate": 1.748282520343345e-06, - "loss": 0.9645, - "step": 4607 - }, - { - "epoch": 0.5540792400649311, - "grad_norm": 1.7093155371106046, - "learning_rate": 1.7475097620560023e-06, - "loss": 0.9887, - "step": 4608 - }, - { - "epoch": 0.5541994829555702, - "grad_norm": 1.6263424390108046, - "learning_rate": 1.746737042071228e-06, - "loss": 0.8982, - "step": 4609 - }, - { - "epoch": 0.5543197258462094, - "grad_norm": 1.6817886340570727, - "learning_rate": 1.7459643605062424e-06, - "loss": 0.9957, - "step": 4610 - }, - { - "epoch": 0.5544399687368484, - "grad_norm": 1.5153759623946674, - "learning_rate": 1.745191717478262e-06, - "loss": 1.006, - "step": 4611 - }, - { - "epoch": 0.5545602116274875, - "grad_norm": 1.596433661401863, - "learning_rate": 1.7444191131044948e-06, - "loss": 0.9992, - "step": 4612 - }, - { - "epoch": 0.5546804545181266, - "grad_norm": 2.897777554216877, - "learning_rate": 1.7436465475021456e-06, - "loss": 0.9276, - "step": 4613 - }, - { - "epoch": 0.5548006974087657, - "grad_norm": 1.724853636572227, - "learning_rate": 1.7428740207884111e-06, - "loss": 0.9007, - "step": 4614 - }, - { - "epoch": 0.5549209402994048, - "grad_norm": 1.643176175173396, - "learning_rate": 1.7421015330804833e-06, - "loss": 0.812, - "step": 4615 - }, - { - "epoch": 0.5550411831900439, - "grad_norm": 3.869986459476726, - "learning_rate": 1.7413290844955475e-06, - "loss": 0.9329, - "step": 4616 - }, - { - "epoch": 0.555161426080683, - "grad_norm": 1.8558282443683238, - "learning_rate": 1.7405566751507843e-06, - "loss": 0.9791, - "step": 4617 - }, - { - "epoch": 0.555281668971322, - "grad_norm": 1.4731253526935633, - "learning_rate": 1.7397843051633668e-06, - "loss": 0.8756, - "step": 4618 - }, - { - "epoch": 0.5554019118619612, - "grad_norm": 1.9498617147661563, - "learning_rate": 1.739011974650464e-06, - "loss": 0.9104, - "step": 4619 - }, - { - "epoch": 0.5555221547526003, - "grad_norm": 1.810271965480468, - "learning_rate": 1.7382396837292365e-06, - "loss": 0.9687, - "step": 4620 - }, - { - "epoch": 0.5556423976432393, - "grad_norm": 1.591328745149908, - "learning_rate": 1.737467432516841e-06, - "loss": 0.9395, - "step": 4621 - }, - { - "epoch": 0.5557626405338785, - "grad_norm": 2.25467693831309, - "learning_rate": 1.7366952211304274e-06, - "loss": 0.9394, - "step": 4622 - }, - { - "epoch": 0.5558828834245175, - "grad_norm": 1.8831168280980315, - "learning_rate": 1.735923049687139e-06, - "loss": 1.0292, - "step": 4623 - }, - { - "epoch": 0.5560031263151566, - "grad_norm": 1.5625716383723904, - "learning_rate": 1.7351509183041144e-06, - "loss": 0.9456, - "step": 4624 - }, - { - "epoch": 0.5561233692057957, - "grad_norm": 1.525137710607117, - "learning_rate": 1.7343788270984852e-06, - "loss": 0.9207, - "step": 4625 - }, - { - "epoch": 0.5562436120964348, - "grad_norm": 1.693865109857109, - "learning_rate": 1.7336067761873764e-06, - "loss": 0.9416, - "step": 4626 - }, - { - "epoch": 0.5563638549870739, - "grad_norm": 1.8280124466637175, - "learning_rate": 1.7328347656879076e-06, - "loss": 0.9596, - "step": 4627 - }, - { - "epoch": 0.556484097877713, - "grad_norm": 2.2789325177729034, - "learning_rate": 1.7320627957171927e-06, - "loss": 0.8834, - "step": 4628 - }, - { - "epoch": 0.5566043407683521, - "grad_norm": 2.0225820021632877, - "learning_rate": 1.7312908663923382e-06, - "loss": 0.9968, - "step": 4629 - }, - { - "epoch": 0.5567245836589911, - "grad_norm": 1.8475890197781104, - "learning_rate": 1.7305189778304463e-06, - "loss": 0.8721, - "step": 4630 - }, - { - "epoch": 0.5568448265496303, - "grad_norm": 1.9506881771105415, - "learning_rate": 1.729747130148611e-06, - "loss": 1.001, - "step": 4631 - }, - { - "epoch": 0.5569650694402694, - "grad_norm": 2.0615538536585514, - "learning_rate": 1.7289753234639208e-06, - "loss": 0.9643, - "step": 4632 - }, - { - "epoch": 0.5570853123309084, - "grad_norm": 1.6853027166913388, - "learning_rate": 1.7282035578934592e-06, - "loss": 0.9625, - "step": 4633 - }, - { - "epoch": 0.5572055552215476, - "grad_norm": 1.8889124656901257, - "learning_rate": 1.727431833554301e-06, - "loss": 0.9828, - "step": 4634 - }, - { - "epoch": 0.5573257981121866, - "grad_norm": 2.090217913350895, - "learning_rate": 1.7266601505635175e-06, - "loss": 0.9671, - "step": 4635 - }, - { - "epoch": 0.5574460410028257, - "grad_norm": 1.7688379676969246, - "learning_rate": 1.7258885090381717e-06, - "loss": 0.9653, - "step": 4636 - }, - { - "epoch": 0.5575662838934649, - "grad_norm": 1.8830457360886539, - "learning_rate": 1.7251169090953213e-06, - "loss": 0.986, - "step": 4637 - }, - { - "epoch": 0.5576865267841039, - "grad_norm": 2.211613959360181, - "learning_rate": 1.7243453508520168e-06, - "loss": 0.9606, - "step": 4638 - }, - { - "epoch": 0.557806769674743, - "grad_norm": 1.9005374048050168, - "learning_rate": 1.7235738344253038e-06, - "loss": 1.0424, - "step": 4639 - }, - { - "epoch": 0.557927012565382, - "grad_norm": 1.6114018833734123, - "learning_rate": 1.72280235993222e-06, - "loss": 1.0213, - "step": 4640 - }, - { - "epoch": 0.5580472554560212, - "grad_norm": 2.125043307592357, - "learning_rate": 1.722030927489798e-06, - "loss": 0.8921, - "step": 4641 - }, - { - "epoch": 0.5581674983466602, - "grad_norm": 1.7145218601385117, - "learning_rate": 1.7212595372150634e-06, - "loss": 0.9428, - "step": 4642 - }, - { - "epoch": 0.5582877412372993, - "grad_norm": 4.062455631894138, - "learning_rate": 1.720488189225035e-06, - "loss": 0.9428, - "step": 4643 - }, - { - "epoch": 0.5584079841279385, - "grad_norm": 2.3274627358048203, - "learning_rate": 1.7197168836367265e-06, - "loss": 0.9972, - "step": 4644 - }, - { - "epoch": 0.5585282270185775, - "grad_norm": 2.116282139290178, - "learning_rate": 1.7189456205671433e-06, - "loss": 1.0164, - "step": 4645 - }, - { - "epoch": 0.5586484699092166, - "grad_norm": 1.8658472462326072, - "learning_rate": 1.7181744001332866e-06, - "loss": 1.0152, - "step": 4646 - }, - { - "epoch": 0.5587687127998557, - "grad_norm": 1.8619209618352057, - "learning_rate": 1.7174032224521493e-06, - "loss": 0.8373, - "step": 4647 - }, - { - "epoch": 0.5588889556904948, - "grad_norm": 1.645640961554775, - "learning_rate": 1.7166320876407184e-06, - "loss": 0.8969, - "step": 4648 - }, - { - "epoch": 0.5590091985811338, - "grad_norm": 1.8574716779929374, - "learning_rate": 1.7158609958159742e-06, - "loss": 0.8826, - "step": 4649 - }, - { - "epoch": 0.559129441471773, - "grad_norm": 2.3460539619763403, - "learning_rate": 1.7150899470948911e-06, - "loss": 0.9822, - "step": 4650 - }, - { - "epoch": 0.5592496843624121, - "grad_norm": 0.8726957498351848, - "learning_rate": 1.7143189415944365e-06, - "loss": 0.7952, - "step": 4651 - }, - { - "epoch": 0.5593699272530511, - "grad_norm": 1.6602979173240657, - "learning_rate": 1.7135479794315714e-06, - "loss": 0.9572, - "step": 4652 - }, - { - "epoch": 0.5594901701436903, - "grad_norm": 1.7514165866862255, - "learning_rate": 1.7127770607232502e-06, - "loss": 0.9831, - "step": 4653 - }, - { - "epoch": 0.5596104130343293, - "grad_norm": 2.0542154096142595, - "learning_rate": 1.7120061855864204e-06, - "loss": 0.9969, - "step": 4654 - }, - { - "epoch": 0.5597306559249684, - "grad_norm": 1.9360902641313649, - "learning_rate": 1.7112353541380233e-06, - "loss": 0.9195, - "step": 4655 - }, - { - "epoch": 0.5598508988156076, - "grad_norm": 4.931519181673025, - "learning_rate": 1.7104645664949931e-06, - "loss": 0.9246, - "step": 4656 - }, - { - "epoch": 0.5599711417062466, - "grad_norm": 1.5182232166869456, - "learning_rate": 1.7096938227742584e-06, - "loss": 0.9216, - "step": 4657 - }, - { - "epoch": 0.5600913845968857, - "grad_norm": 1.6556046866196452, - "learning_rate": 1.70892312309274e-06, - "loss": 1.0341, - "step": 4658 - }, - { - "epoch": 0.5602116274875248, - "grad_norm": 2.1305749355442156, - "learning_rate": 1.7081524675673523e-06, - "loss": 0.8765, - "step": 4659 - }, - { - "epoch": 0.5603318703781639, - "grad_norm": 0.8975279474821581, - "learning_rate": 1.7073818563150026e-06, - "loss": 0.839, - "step": 4660 - }, - { - "epoch": 0.560452113268803, - "grad_norm": 2.0173064323130996, - "learning_rate": 1.7066112894525935e-06, - "loss": 1.0663, - "step": 4661 - }, - { - "epoch": 0.5605723561594421, - "grad_norm": 1.4442213733933627, - "learning_rate": 1.7058407670970177e-06, - "loss": 0.9271, - "step": 4662 - }, - { - "epoch": 0.5606925990500812, - "grad_norm": 1.6412293547169285, - "learning_rate": 1.7050702893651643e-06, - "loss": 0.8194, - "step": 4663 - }, - { - "epoch": 0.5608128419407202, - "grad_norm": 1.843487931069499, - "learning_rate": 1.7042998563739134e-06, - "loss": 0.9547, - "step": 4664 - }, - { - "epoch": 0.5609330848313594, - "grad_norm": 1.9711386102083985, - "learning_rate": 1.703529468240139e-06, - "loss": 0.9131, - "step": 4665 - }, - { - "epoch": 0.5610533277219985, - "grad_norm": 2.1573420537285894, - "learning_rate": 1.7027591250807088e-06, - "loss": 0.9393, - "step": 4666 - }, - { - "epoch": 0.5611735706126375, - "grad_norm": 2.3451321404810304, - "learning_rate": 1.7019888270124825e-06, - "loss": 1.0477, - "step": 4667 - }, - { - "epoch": 0.5612938135032767, - "grad_norm": 1.598770685606419, - "learning_rate": 1.7012185741523147e-06, - "loss": 1.0135, - "step": 4668 - }, - { - "epoch": 0.5614140563939157, - "grad_norm": 1.763987499227367, - "learning_rate": 1.7004483666170514e-06, - "loss": 0.827, - "step": 4669 - }, - { - "epoch": 0.5615342992845548, - "grad_norm": 1.781956315127, - "learning_rate": 1.699678204523533e-06, - "loss": 1.004, - "step": 4670 - }, - { - "epoch": 0.5616545421751938, - "grad_norm": 2.141580744205984, - "learning_rate": 1.6989080879885918e-06, - "loss": 0.89, - "step": 4671 - }, - { - "epoch": 0.561774785065833, - "grad_norm": 1.005684728182289, - "learning_rate": 1.6981380171290544e-06, - "loss": 0.8342, - "step": 4672 - }, - { - "epoch": 0.5618950279564721, - "grad_norm": 1.6582005369348178, - "learning_rate": 1.6973679920617396e-06, - "loss": 0.9442, - "step": 4673 - }, - { - "epoch": 0.5620152708471111, - "grad_norm": 1.7598860653932353, - "learning_rate": 1.6965980129034603e-06, - "loss": 1.0522, - "step": 4674 - }, - { - "epoch": 0.5621355137377503, - "grad_norm": 1.379225826243114, - "learning_rate": 1.6958280797710209e-06, - "loss": 0.9658, - "step": 4675 - }, - { - "epoch": 0.5622557566283893, - "grad_norm": 0.8040467831750521, - "learning_rate": 1.6950581927812198e-06, - "loss": 0.7537, - "step": 4676 - }, - { - "epoch": 0.5623759995190284, - "grad_norm": 1.8373998783367727, - "learning_rate": 1.6942883520508486e-06, - "loss": 0.9807, - "step": 4677 - }, - { - "epoch": 0.5624962424096676, - "grad_norm": 1.7648570998701385, - "learning_rate": 1.693518557696691e-06, - "loss": 0.9771, - "step": 4678 - }, - { - "epoch": 0.5626164853003066, - "grad_norm": 1.734322005828362, - "learning_rate": 1.6927488098355252e-06, - "loss": 1.0861, - "step": 4679 - }, - { - "epoch": 0.5627367281909457, - "grad_norm": 0.9365196188171625, - "learning_rate": 1.6919791085841201e-06, - "loss": 0.8707, - "step": 4680 - }, - { - "epoch": 0.5628569710815848, - "grad_norm": 2.0368040007735497, - "learning_rate": 1.6912094540592396e-06, - "loss": 0.9942, - "step": 4681 - }, - { - "epoch": 0.5629772139722239, - "grad_norm": 2.2770810638258205, - "learning_rate": 1.6904398463776393e-06, - "loss": 0.9979, - "step": 4682 - }, - { - "epoch": 0.5630974568628629, - "grad_norm": 1.4823657441098343, - "learning_rate": 1.6896702856560683e-06, - "loss": 0.9273, - "step": 4683 - }, - { - "epoch": 0.5632176997535021, - "grad_norm": 2.1317812484038186, - "learning_rate": 1.6889007720112677e-06, - "loss": 0.8959, - "step": 4684 - }, - { - "epoch": 0.5633379426441412, - "grad_norm": 1.4908134165429072, - "learning_rate": 1.6881313055599734e-06, - "loss": 0.9801, - "step": 4685 - }, - { - "epoch": 0.5634581855347802, - "grad_norm": 1.907043667025775, - "learning_rate": 1.6873618864189117e-06, - "loss": 1.0117, - "step": 4686 - }, - { - "epoch": 0.5635784284254194, - "grad_norm": 1.9600660355790482, - "learning_rate": 1.686592514704803e-06, - "loss": 0.9805, - "step": 4687 - }, - { - "epoch": 0.5636986713160584, - "grad_norm": 1.9421089481786182, - "learning_rate": 1.685823190534361e-06, - "loss": 0.8953, - "step": 4688 - }, - { - "epoch": 0.5638189142066975, - "grad_norm": 1.822488164039137, - "learning_rate": 1.6850539140242907e-06, - "loss": 1.0324, - "step": 4689 - }, - { - "epoch": 0.5639391570973367, - "grad_norm": 2.0519905521182755, - "learning_rate": 1.684284685291292e-06, - "loss": 1.0146, - "step": 4690 - }, - { - "epoch": 0.5640593999879757, - "grad_norm": 2.1614196283636105, - "learning_rate": 1.683515504452055e-06, - "loss": 1.0021, - "step": 4691 - }, - { - "epoch": 0.5641796428786148, - "grad_norm": 1.4424714905088372, - "learning_rate": 1.6827463716232648e-06, - "loss": 0.8574, - "step": 4692 - }, - { - "epoch": 0.5642998857692539, - "grad_norm": 1.6523003177931799, - "learning_rate": 1.6819772869215972e-06, - "loss": 0.9523, - "step": 4693 - }, - { - "epoch": 0.564420128659893, - "grad_norm": 1.5624643703213075, - "learning_rate": 1.6812082504637228e-06, - "loss": 1.0161, - "step": 4694 - }, - { - "epoch": 0.564540371550532, - "grad_norm": 1.4208262002417238, - "learning_rate": 1.6804392623663025e-06, - "loss": 0.9442, - "step": 4695 - }, - { - "epoch": 0.5646606144411712, - "grad_norm": 1.7727869663991958, - "learning_rate": 1.6796703227459935e-06, - "loss": 0.9766, - "step": 4696 - }, - { - "epoch": 0.5647808573318103, - "grad_norm": 1.8607240799525027, - "learning_rate": 1.6789014317194407e-06, - "loss": 0.956, - "step": 4697 - }, - { - "epoch": 0.5649011002224493, - "grad_norm": 2.272239448038523, - "learning_rate": 1.6781325894032853e-06, - "loss": 0.9236, - "step": 4698 - }, - { - "epoch": 0.5650213431130885, - "grad_norm": 1.8122604474895192, - "learning_rate": 1.6773637959141608e-06, - "loss": 1.1141, - "step": 4699 - }, - { - "epoch": 0.5651415860037275, - "grad_norm": 2.029943309988325, - "learning_rate": 1.6765950513686915e-06, - "loss": 0.8599, - "step": 4700 - }, - { - "epoch": 0.5652618288943666, - "grad_norm": 1.6586587053140593, - "learning_rate": 1.675826355883496e-06, - "loss": 0.966, - "step": 4701 - }, - { - "epoch": 0.5653820717850057, - "grad_norm": 1.8708442410134791, - "learning_rate": 1.6750577095751848e-06, - "loss": 0.996, - "step": 4702 - }, - { - "epoch": 0.5655023146756448, - "grad_norm": 1.6149996641176423, - "learning_rate": 1.6742891125603605e-06, - "loss": 0.9304, - "step": 4703 - }, - { - "epoch": 0.5656225575662839, - "grad_norm": 1.6878918086945847, - "learning_rate": 1.6735205649556185e-06, - "loss": 0.9136, - "step": 4704 - }, - { - "epoch": 0.5657428004569229, - "grad_norm": 1.4980808537694292, - "learning_rate": 1.6727520668775476e-06, - "loss": 1.0431, - "step": 4705 - }, - { - "epoch": 0.5658630433475621, - "grad_norm": 1.5505907532655252, - "learning_rate": 1.6719836184427275e-06, - "loss": 0.9538, - "step": 4706 - }, - { - "epoch": 0.5659832862382012, - "grad_norm": 1.6820682420574113, - "learning_rate": 1.671215219767733e-06, - "loss": 0.8451, - "step": 4707 - }, - { - "epoch": 0.5661035291288402, - "grad_norm": 1.7431360237153244, - "learning_rate": 1.670446870969127e-06, - "loss": 0.9617, - "step": 4708 - }, - { - "epoch": 0.5662237720194794, - "grad_norm": 1.9212056056436266, - "learning_rate": 1.6696785721634685e-06, - "loss": 1.0029, - "step": 4709 - }, - { - "epoch": 0.5663440149101184, - "grad_norm": 2.0741247238428437, - "learning_rate": 1.6689103234673086e-06, - "loss": 0.9294, - "step": 4710 - }, - { - "epoch": 0.5664642578007575, - "grad_norm": 1.7363059489804686, - "learning_rate": 1.668142124997189e-06, - "loss": 0.9638, - "step": 4711 - }, - { - "epoch": 0.5665845006913967, - "grad_norm": 0.828713405946197, - "learning_rate": 1.6673739768696453e-06, - "loss": 0.8233, - "step": 4712 - }, - { - "epoch": 0.5667047435820357, - "grad_norm": 1.6826861207450057, - "learning_rate": 1.6666058792012052e-06, - "loss": 0.9657, - "step": 4713 - }, - { - "epoch": 0.5668249864726748, - "grad_norm": 0.9161524558609587, - "learning_rate": 1.6658378321083878e-06, - "loss": 0.8955, - "step": 4714 - }, - { - "epoch": 0.5669452293633139, - "grad_norm": 1.5454903773116528, - "learning_rate": 1.6650698357077055e-06, - "loss": 1.0192, - "step": 4715 - }, - { - "epoch": 0.567065472253953, - "grad_norm": 2.9996817737917727, - "learning_rate": 1.6643018901156632e-06, - "loss": 1.009, - "step": 4716 - }, - { - "epoch": 0.567185715144592, - "grad_norm": 2.2103468035108813, - "learning_rate": 1.6635339954487566e-06, - "loss": 0.9902, - "step": 4717 - }, - { - "epoch": 0.5673059580352312, - "grad_norm": 1.7449733886219874, - "learning_rate": 1.6627661518234765e-06, - "loss": 1.0282, - "step": 4718 - }, - { - "epoch": 0.5674262009258703, - "grad_norm": 1.5342644583667437, - "learning_rate": 1.661998359356302e-06, - "loss": 1.0444, - "step": 4719 - }, - { - "epoch": 0.5675464438165093, - "grad_norm": 0.9259887977729937, - "learning_rate": 1.6612306181637077e-06, - "loss": 0.784, - "step": 4720 - }, - { - "epoch": 0.5676666867071485, - "grad_norm": 2.3489218044330604, - "learning_rate": 1.6604629283621598e-06, - "loss": 0.8598, - "step": 4721 - }, - { - "epoch": 0.5677869295977875, - "grad_norm": 2.458491002430801, - "learning_rate": 1.6596952900681152e-06, - "loss": 0.951, - "step": 4722 - }, - { - "epoch": 0.5679071724884266, - "grad_norm": 1.9272369392280044, - "learning_rate": 1.658927703398025e-06, - "loss": 1.0231, - "step": 4723 - }, - { - "epoch": 0.5680274153790658, - "grad_norm": 2.0378651812418123, - "learning_rate": 1.6581601684683309e-06, - "loss": 0.9746, - "step": 4724 - }, - { - "epoch": 0.5681476582697048, - "grad_norm": 2.2475495239843886, - "learning_rate": 1.6573926853954674e-06, - "loss": 0.8917, - "step": 4725 - }, - { - "epoch": 0.5682679011603439, - "grad_norm": 1.7680964094690907, - "learning_rate": 1.6566252542958608e-06, - "loss": 1.023, - "step": 4726 - }, - { - "epoch": 0.568388144050983, - "grad_norm": 1.6646602792664429, - "learning_rate": 1.6558578752859305e-06, - "loss": 0.9772, - "step": 4727 - }, - { - "epoch": 0.5685083869416221, - "grad_norm": 1.6764372139768957, - "learning_rate": 1.6550905484820865e-06, - "loss": 0.9824, - "step": 4728 - }, - { - "epoch": 0.5686286298322611, - "grad_norm": 2.105270234836429, - "learning_rate": 1.6543232740007328e-06, - "loss": 0.9931, - "step": 4729 - }, - { - "epoch": 0.5687488727229003, - "grad_norm": 2.3636898980046945, - "learning_rate": 1.653556051958263e-06, - "loss": 0.882, - "step": 4730 - }, - { - "epoch": 0.5688691156135394, - "grad_norm": 1.6220445900748728, - "learning_rate": 1.6527888824710642e-06, - "loss": 0.9354, - "step": 4731 - }, - { - "epoch": 0.5689893585041784, - "grad_norm": 2.1569754444434754, - "learning_rate": 1.6520217656555166e-06, - "loss": 0.9607, - "step": 4732 - }, - { - "epoch": 0.5691096013948175, - "grad_norm": 1.501290878514354, - "learning_rate": 1.65125470162799e-06, - "loss": 0.9036, - "step": 4733 - }, - { - "epoch": 0.5692298442854566, - "grad_norm": 2.114243152734973, - "learning_rate": 1.6504876905048485e-06, - "loss": 0.901, - "step": 4734 - }, - { - "epoch": 0.5693500871760957, - "grad_norm": 1.601849041868737, - "learning_rate": 1.6497207324024464e-06, - "loss": 0.9264, - "step": 4735 - }, - { - "epoch": 0.5694703300667348, - "grad_norm": 2.03903677620867, - "learning_rate": 1.6489538274371305e-06, - "loss": 1.0278, - "step": 4736 - }, - { - "epoch": 0.5695905729573739, - "grad_norm": 1.7460796859140673, - "learning_rate": 1.6481869757252396e-06, - "loss": 1.026, - "step": 4737 - }, - { - "epoch": 0.569710815848013, - "grad_norm": 1.425248388760895, - "learning_rate": 1.647420177383105e-06, - "loss": 0.9175, - "step": 4738 - }, - { - "epoch": 0.569831058738652, - "grad_norm": 1.609402035976832, - "learning_rate": 1.646653432527049e-06, - "loss": 0.9314, - "step": 4739 - }, - { - "epoch": 0.5699513016292912, - "grad_norm": 1.3707571458757237, - "learning_rate": 1.645886741273387e-06, - "loss": 0.9438, - "step": 4740 - }, - { - "epoch": 0.5700715445199303, - "grad_norm": 1.9027505239992946, - "learning_rate": 1.645120103738424e-06, - "loss": 0.939, - "step": 4741 - }, - { - "epoch": 0.5701917874105693, - "grad_norm": 3.1047549788999302, - "learning_rate": 1.6443535200384591e-06, - "loss": 1.0336, - "step": 4742 - }, - { - "epoch": 0.5703120303012085, - "grad_norm": 1.4988611189347631, - "learning_rate": 1.6435869902897827e-06, - "loss": 0.9, - "step": 4743 - }, - { - "epoch": 0.5704322731918475, - "grad_norm": 0.8709520643841702, - "learning_rate": 1.6428205146086764e-06, - "loss": 0.8392, - "step": 4744 - }, - { - "epoch": 0.5705525160824866, - "grad_norm": 1.3486104931596954, - "learning_rate": 1.6420540931114142e-06, - "loss": 0.9015, - "step": 4745 - }, - { - "epoch": 0.5706727589731257, - "grad_norm": 1.4369271007515119, - "learning_rate": 1.6412877259142616e-06, - "loss": 0.9884, - "step": 4746 - }, - { - "epoch": 0.5707930018637648, - "grad_norm": 2.3218228854136287, - "learning_rate": 1.6405214131334757e-06, - "loss": 0.944, - "step": 4747 - }, - { - "epoch": 0.5709132447544039, - "grad_norm": 2.4239602248891137, - "learning_rate": 1.6397551548853052e-06, - "loss": 0.996, - "step": 4748 - }, - { - "epoch": 0.571033487645043, - "grad_norm": 1.5312374659433998, - "learning_rate": 1.6389889512859917e-06, - "loss": 0.9057, - "step": 4749 - }, - { - "epoch": 0.5711537305356821, - "grad_norm": 0.9297973183276291, - "learning_rate": 1.638222802451767e-06, - "loss": 0.8505, - "step": 4750 - }, - { - "epoch": 0.5712739734263211, - "grad_norm": 1.8832231129264272, - "learning_rate": 1.6374567084988561e-06, - "loss": 0.95, - "step": 4751 - }, - { - "epoch": 0.5713942163169603, - "grad_norm": 1.6536329213837277, - "learning_rate": 1.6366906695434738e-06, - "loss": 0.9672, - "step": 4752 - }, - { - "epoch": 0.5715144592075994, - "grad_norm": 2.1309874834384956, - "learning_rate": 1.6359246857018275e-06, - "loss": 1.056, - "step": 4753 - }, - { - "epoch": 0.5716347020982384, - "grad_norm": 2.274103126220648, - "learning_rate": 1.6351587570901178e-06, - "loss": 0.9789, - "step": 4754 - }, - { - "epoch": 0.5717549449888776, - "grad_norm": 2.121240431629691, - "learning_rate": 1.634392883824534e-06, - "loss": 0.9489, - "step": 4755 - }, - { - "epoch": 0.5718751878795166, - "grad_norm": 1.5639116395492902, - "learning_rate": 1.6336270660212595e-06, - "loss": 0.8773, - "step": 4756 - }, - { - "epoch": 0.5719954307701557, - "grad_norm": 2.034331463935381, - "learning_rate": 1.6328613037964676e-06, - "loss": 0.8649, - "step": 4757 - }, - { - "epoch": 0.5721156736607949, - "grad_norm": 1.9617588101409535, - "learning_rate": 1.6320955972663241e-06, - "loss": 0.8817, - "step": 4758 - }, - { - "epoch": 0.5722359165514339, - "grad_norm": 1.8725317285393581, - "learning_rate": 1.6313299465469857e-06, - "loss": 0.8582, - "step": 4759 - }, - { - "epoch": 0.572356159442073, - "grad_norm": 2.5314032425823423, - "learning_rate": 1.6305643517546014e-06, - "loss": 0.9978, - "step": 4760 - }, - { - "epoch": 0.5724764023327121, - "grad_norm": 1.837953734657202, - "learning_rate": 1.629798813005311e-06, - "loss": 1.05, - "step": 4761 - }, - { - "epoch": 0.5725966452233512, - "grad_norm": 1.8234562393180056, - "learning_rate": 1.6290333304152473e-06, - "loss": 0.909, - "step": 4762 - }, - { - "epoch": 0.5727168881139902, - "grad_norm": 1.753989916464726, - "learning_rate": 1.6282679041005314e-06, - "loss": 0.768, - "step": 4763 - }, - { - "epoch": 0.5728371310046293, - "grad_norm": 1.9351192737260778, - "learning_rate": 1.6275025341772789e-06, - "loss": 1.0642, - "step": 4764 - }, - { - "epoch": 0.5729573738952685, - "grad_norm": 2.109400883268571, - "learning_rate": 1.626737220761596e-06, - "loss": 1.0238, - "step": 4765 - }, - { - "epoch": 0.5730776167859075, - "grad_norm": 1.8055970513229733, - "learning_rate": 1.62597196396958e-06, - "loss": 0.9905, - "step": 4766 - }, - { - "epoch": 0.5731978596765466, - "grad_norm": 2.2176249700896427, - "learning_rate": 1.6252067639173197e-06, - "loss": 1.0541, - "step": 4767 - }, - { - "epoch": 0.5733181025671857, - "grad_norm": 1.8896920921152551, - "learning_rate": 1.6244416207208956e-06, - "loss": 0.8995, - "step": 4768 - }, - { - "epoch": 0.5734383454578248, - "grad_norm": 1.5585261356535336, - "learning_rate": 1.6236765344963787e-06, - "loss": 0.9357, - "step": 4769 - }, - { - "epoch": 0.5735585883484638, - "grad_norm": 2.0625925910280296, - "learning_rate": 1.6229115053598322e-06, - "loss": 0.8975, - "step": 4770 - }, - { - "epoch": 0.573678831239103, - "grad_norm": 1.8480395187718537, - "learning_rate": 1.6221465334273108e-06, - "loss": 0.9178, - "step": 4771 - }, - { - "epoch": 0.5737990741297421, - "grad_norm": 1.9000409423030806, - "learning_rate": 1.6213816188148593e-06, - "loss": 0.8131, - "step": 4772 - }, - { - "epoch": 0.5739193170203811, - "grad_norm": 1.531184525938979, - "learning_rate": 1.6206167616385162e-06, - "loss": 0.9699, - "step": 4773 - }, - { - "epoch": 0.5740395599110203, - "grad_norm": 5.058153523617235, - "learning_rate": 1.6198519620143078e-06, - "loss": 0.9403, - "step": 4774 - }, - { - "epoch": 0.5741598028016593, - "grad_norm": 1.3586332994334442, - "learning_rate": 1.6190872200582546e-06, - "loss": 0.9756, - "step": 4775 - }, - { - "epoch": 0.5742800456922984, - "grad_norm": 2.25618614043003, - "learning_rate": 1.6183225358863676e-06, - "loss": 0.982, - "step": 4776 - }, - { - "epoch": 0.5744002885829376, - "grad_norm": 2.125682945459915, - "learning_rate": 1.617557909614648e-06, - "loss": 0.9133, - "step": 4777 - }, - { - "epoch": 0.5745205314735766, - "grad_norm": 1.6135380058008986, - "learning_rate": 1.6167933413590899e-06, - "loss": 1.0585, - "step": 4778 - }, - { - "epoch": 0.5746407743642157, - "grad_norm": 2.0565768645891596, - "learning_rate": 1.6160288312356773e-06, - "loss": 1.1129, - "step": 4779 - }, - { - "epoch": 0.5747610172548548, - "grad_norm": 1.8070150451329374, - "learning_rate": 1.6152643793603857e-06, - "loss": 1.02, - "step": 4780 - }, - { - "epoch": 0.5748812601454939, - "grad_norm": 1.5652076149602925, - "learning_rate": 1.6144999858491815e-06, - "loss": 1.0736, - "step": 4781 - }, - { - "epoch": 0.575001503036133, - "grad_norm": 1.5376253340584085, - "learning_rate": 1.6137356508180232e-06, - "loss": 1.0518, - "step": 4782 - }, - { - "epoch": 0.5751217459267721, - "grad_norm": 1.7178973770754882, - "learning_rate": 1.6129713743828593e-06, - "loss": 1.0126, - "step": 4783 - }, - { - "epoch": 0.5752419888174112, - "grad_norm": 1.3473557289190017, - "learning_rate": 1.6122071566596306e-06, - "loss": 0.9579, - "step": 4784 - }, - { - "epoch": 0.5753622317080502, - "grad_norm": 1.974565846975572, - "learning_rate": 1.6114429977642674e-06, - "loss": 1.0223, - "step": 4785 - }, - { - "epoch": 0.5754824745986894, - "grad_norm": 1.8261885861534852, - "learning_rate": 1.6106788978126926e-06, - "loss": 0.9319, - "step": 4786 - }, - { - "epoch": 0.5756027174893285, - "grad_norm": 1.9394276781516029, - "learning_rate": 1.6099148569208196e-06, - "loss": 0.9779, - "step": 4787 - }, - { - "epoch": 0.5757229603799675, - "grad_norm": 1.9323438515689724, - "learning_rate": 1.6091508752045523e-06, - "loss": 0.8267, - "step": 4788 - }, - { - "epoch": 0.5758432032706067, - "grad_norm": 1.4858749841802543, - "learning_rate": 1.608386952779787e-06, - "loss": 1.0598, - "step": 4789 - }, - { - "epoch": 0.5759634461612457, - "grad_norm": 1.7088150027677314, - "learning_rate": 1.6076230897624098e-06, - "loss": 0.9454, - "step": 4790 - }, - { - "epoch": 0.5760836890518848, - "grad_norm": 2.4409225106697843, - "learning_rate": 1.6068592862682974e-06, - "loss": 0.9755, - "step": 4791 - }, - { - "epoch": 0.576203931942524, - "grad_norm": 1.7151675950396499, - "learning_rate": 1.6060955424133187e-06, - "loss": 0.938, - "step": 4792 - }, - { - "epoch": 0.576324174833163, - "grad_norm": 2.0850198277324257, - "learning_rate": 1.6053318583133332e-06, - "loss": 1.0966, - "step": 4793 - }, - { - "epoch": 0.5764444177238021, - "grad_norm": 1.828042183328804, - "learning_rate": 1.6045682340841907e-06, - "loss": 0.9533, - "step": 4794 - }, - { - "epoch": 0.5765646606144411, - "grad_norm": 0.8576926323083116, - "learning_rate": 1.6038046698417336e-06, - "loss": 0.8071, - "step": 4795 - }, - { - "epoch": 0.5766849035050803, - "grad_norm": 1.8744092010501783, - "learning_rate": 1.6030411657017919e-06, - "loss": 0.8936, - "step": 4796 - }, - { - "epoch": 0.5768051463957193, - "grad_norm": 1.7740042700861274, - "learning_rate": 1.6022777217801903e-06, - "loss": 1.0453, - "step": 4797 - }, - { - "epoch": 0.5769253892863584, - "grad_norm": 1.7532652637312798, - "learning_rate": 1.601514338192742e-06, - "loss": 0.9288, - "step": 4798 - }, - { - "epoch": 0.5770456321769976, - "grad_norm": 1.9656596679541343, - "learning_rate": 1.6007510150552514e-06, - "loss": 0.914, - "step": 4799 - }, - { - "epoch": 0.5771658750676366, - "grad_norm": 1.9509031551771419, - "learning_rate": 1.599987752483515e-06, - "loss": 0.8246, - "step": 4800 - }, - { - "epoch": 0.5772861179582757, - "grad_norm": 1.5290920076499277, - "learning_rate": 1.5992245505933184e-06, - "loss": 0.8793, - "step": 4801 - }, - { - "epoch": 0.5774063608489148, - "grad_norm": 1.7443958877411787, - "learning_rate": 1.5984614095004388e-06, - "loss": 0.9116, - "step": 4802 - }, - { - "epoch": 0.5775266037395539, - "grad_norm": 2.0567857595098378, - "learning_rate": 1.5976983293206438e-06, - "loss": 1.0095, - "step": 4803 - }, - { - "epoch": 0.577646846630193, - "grad_norm": 1.6440318021970048, - "learning_rate": 1.5969353101696928e-06, - "loss": 0.9143, - "step": 4804 - }, - { - "epoch": 0.5777670895208321, - "grad_norm": 1.4438038968681635, - "learning_rate": 1.5961723521633341e-06, - "loss": 0.9999, - "step": 4805 - }, - { - "epoch": 0.5778873324114712, - "grad_norm": 2.2016240411174914, - "learning_rate": 1.5954094554173097e-06, - "loss": 1.1086, - "step": 4806 - }, - { - "epoch": 0.5780075753021102, - "grad_norm": 2.839194537101471, - "learning_rate": 1.5946466200473482e-06, - "loss": 0.9869, - "step": 4807 - }, - { - "epoch": 0.5781278181927494, - "grad_norm": 1.8932143835638844, - "learning_rate": 1.5938838461691723e-06, - "loss": 1.0301, - "step": 4808 - }, - { - "epoch": 0.5782480610833884, - "grad_norm": 2.5024356421427516, - "learning_rate": 1.593121133898494e-06, - "loss": 1.0334, - "step": 4809 - }, - { - "epoch": 0.5783683039740275, - "grad_norm": 1.9409930827643016, - "learning_rate": 1.592358483351016e-06, - "loss": 0.9872, - "step": 4810 - }, - { - "epoch": 0.5784885468646667, - "grad_norm": 1.804719470512948, - "learning_rate": 1.5915958946424326e-06, - "loss": 0.9224, - "step": 4811 - }, - { - "epoch": 0.5786087897553057, - "grad_norm": 1.4339469453794607, - "learning_rate": 1.5908333678884271e-06, - "loss": 0.944, - "step": 4812 - }, - { - "epoch": 0.5787290326459448, - "grad_norm": 1.6698559128968062, - "learning_rate": 1.5900709032046743e-06, - "loss": 0.9284, - "step": 4813 - }, - { - "epoch": 0.5788492755365839, - "grad_norm": 2.088516298617237, - "learning_rate": 1.5893085007068391e-06, - "loss": 0.9807, - "step": 4814 - }, - { - "epoch": 0.578969518427223, - "grad_norm": 1.9564693047689068, - "learning_rate": 1.5885461605105786e-06, - "loss": 0.915, - "step": 4815 - }, - { - "epoch": 0.579089761317862, - "grad_norm": 1.8374718929917786, - "learning_rate": 1.5877838827315375e-06, - "loss": 0.9664, - "step": 4816 - }, - { - "epoch": 0.5792100042085012, - "grad_norm": 2.180213345228602, - "learning_rate": 1.587021667485355e-06, - "loss": 0.8894, - "step": 4817 - }, - { - "epoch": 0.5793302470991403, - "grad_norm": 1.6415025816242648, - "learning_rate": 1.5862595148876559e-06, - "loss": 0.9765, - "step": 4818 - }, - { - "epoch": 0.5794504899897793, - "grad_norm": 1.8241398077472044, - "learning_rate": 1.58549742505406e-06, - "loss": 0.9588, - "step": 4819 - }, - { - "epoch": 0.5795707328804185, - "grad_norm": 2.3890015319270015, - "learning_rate": 1.5847353981001747e-06, - "loss": 0.9569, - "step": 4820 - }, - { - "epoch": 0.5796909757710575, - "grad_norm": 1.5034360472585493, - "learning_rate": 1.5839734341415993e-06, - "loss": 0.89, - "step": 4821 - }, - { - "epoch": 0.5798112186616966, - "grad_norm": 1.6154500132377279, - "learning_rate": 1.5832115332939238e-06, - "loss": 0.9674, - "step": 4822 - }, - { - "epoch": 0.5799314615523358, - "grad_norm": 8.80043795102737, - "learning_rate": 1.5824496956727272e-06, - "loss": 0.9531, - "step": 4823 - }, - { - "epoch": 0.5800517044429748, - "grad_norm": 1.7907010366862768, - "learning_rate": 1.5816879213935797e-06, - "loss": 0.9293, - "step": 4824 - }, - { - "epoch": 0.5801719473336139, - "grad_norm": 1.612878264187884, - "learning_rate": 1.5809262105720416e-06, - "loss": 0.988, - "step": 4825 - }, - { - "epoch": 0.580292190224253, - "grad_norm": 1.6432322538883894, - "learning_rate": 1.5801645633236644e-06, - "loss": 0.9927, - "step": 4826 - }, - { - "epoch": 0.5804124331148921, - "grad_norm": 1.6607463191937828, - "learning_rate": 1.579402979763989e-06, - "loss": 0.9727, - "step": 4827 - }, - { - "epoch": 0.5805326760055312, - "grad_norm": 4.293052276826186, - "learning_rate": 1.578641460008548e-06, - "loss": 1.0099, - "step": 4828 - }, - { - "epoch": 0.5806529188961702, - "grad_norm": 1.9479946658043803, - "learning_rate": 1.5778800041728613e-06, - "loss": 0.8701, - "step": 4829 - }, - { - "epoch": 0.5807731617868094, - "grad_norm": 1.5682779673425795, - "learning_rate": 1.577118612372443e-06, - "loss": 0.8642, - "step": 4830 - }, - { - "epoch": 0.5808934046774484, - "grad_norm": 1.6398546135821075, - "learning_rate": 1.5763572847227943e-06, - "loss": 0.9055, - "step": 4831 - }, - { - "epoch": 0.5810136475680875, - "grad_norm": 1.720990679784936, - "learning_rate": 1.5755960213394091e-06, - "loss": 1.0086, - "step": 4832 - }, - { - "epoch": 0.5811338904587267, - "grad_norm": 1.7713031444774268, - "learning_rate": 1.5748348223377703e-06, - "loss": 0.9834, - "step": 4833 - }, - { - "epoch": 0.5812541333493657, - "grad_norm": 1.4960332713838702, - "learning_rate": 1.5740736878333507e-06, - "loss": 0.9796, - "step": 4834 - }, - { - "epoch": 0.5813743762400048, - "grad_norm": 2.1293976102971603, - "learning_rate": 1.5733126179416143e-06, - "loss": 0.9842, - "step": 4835 - }, - { - "epoch": 0.5814946191306439, - "grad_norm": 1.7624629239991632, - "learning_rate": 1.5725516127780137e-06, - "loss": 0.9181, - "step": 4836 - }, - { - "epoch": 0.581614862021283, - "grad_norm": 2.520644398904768, - "learning_rate": 1.5717906724579943e-06, - "loss": 1.0858, - "step": 4837 - }, - { - "epoch": 0.581735104911922, - "grad_norm": 1.769036769198011, - "learning_rate": 1.571029797096989e-06, - "loss": 0.8827, - "step": 4838 - }, - { - "epoch": 0.5818553478025612, - "grad_norm": 1.6418378434117171, - "learning_rate": 1.570268986810423e-06, - "loss": 0.9833, - "step": 4839 - }, - { - "epoch": 0.5819755906932003, - "grad_norm": 1.8924969320133795, - "learning_rate": 1.5695082417137096e-06, - "loss": 0.9517, - "step": 4840 - }, - { - "epoch": 0.5820958335838393, - "grad_norm": 3.27937277728195, - "learning_rate": 1.5687475619222539e-06, - "loss": 0.9501, - "step": 4841 - }, - { - "epoch": 0.5822160764744785, - "grad_norm": 2.0073036112988363, - "learning_rate": 1.5679869475514496e-06, - "loss": 0.9364, - "step": 4842 - }, - { - "epoch": 0.5823363193651175, - "grad_norm": 1.9177987514759518, - "learning_rate": 1.567226398716682e-06, - "loss": 1.013, - "step": 4843 - }, - { - "epoch": 0.5824565622557566, - "grad_norm": 1.955530623014533, - "learning_rate": 1.566465915533326e-06, - "loss": 0.8253, - "step": 4844 - }, - { - "epoch": 0.5825768051463958, - "grad_norm": 2.0840514153491627, - "learning_rate": 1.5657054981167458e-06, - "loss": 1.0865, - "step": 4845 - }, - { - "epoch": 0.5826970480370348, - "grad_norm": 1.830238880763038, - "learning_rate": 1.5649451465822965e-06, - "loss": 0.8704, - "step": 4846 - }, - { - "epoch": 0.5828172909276739, - "grad_norm": 1.6111167445394894, - "learning_rate": 1.5641848610453218e-06, - "loss": 1.032, - "step": 4847 - }, - { - "epoch": 0.582937533818313, - "grad_norm": 1.8915338774916208, - "learning_rate": 1.563424641621158e-06, - "loss": 1.0601, - "step": 4848 - }, - { - "epoch": 0.5830577767089521, - "grad_norm": 1.8253340323989975, - "learning_rate": 1.5626644884251282e-06, - "loss": 0.8995, - "step": 4849 - }, - { - "epoch": 0.5831780195995911, - "grad_norm": 1.544265241007036, - "learning_rate": 1.5619044015725488e-06, - "loss": 1.0806, - "step": 4850 - }, - { - "epoch": 0.5832982624902303, - "grad_norm": 1.945827676355061, - "learning_rate": 1.5611443811787224e-06, - "loss": 1.0713, - "step": 4851 - }, - { - "epoch": 0.5834185053808694, - "grad_norm": 1.9548595813167797, - "learning_rate": 1.560384427358945e-06, - "loss": 0.8903, - "step": 4852 - }, - { - "epoch": 0.5835387482715084, - "grad_norm": 1.3302087929689255, - "learning_rate": 1.5596245402284998e-06, - "loss": 0.9221, - "step": 4853 - }, - { - "epoch": 0.5836589911621476, - "grad_norm": 1.549430200994516, - "learning_rate": 1.5588647199026619e-06, - "loss": 1.0181, - "step": 4854 - }, - { - "epoch": 0.5837792340527866, - "grad_norm": 3.5511741630623197, - "learning_rate": 1.5581049664966956e-06, - "loss": 1.0718, - "step": 4855 - }, - { - "epoch": 0.5838994769434257, - "grad_norm": 1.0576212452311355, - "learning_rate": 1.5573452801258545e-06, - "loss": 0.8861, - "step": 4856 - }, - { - "epoch": 0.5840197198340649, - "grad_norm": 3.0413511626465053, - "learning_rate": 1.5565856609053824e-06, - "loss": 0.8337, - "step": 4857 - }, - { - "epoch": 0.5841399627247039, - "grad_norm": 1.6025245568213469, - "learning_rate": 1.5558261089505127e-06, - "loss": 0.99, - "step": 4858 - }, - { - "epoch": 0.584260205615343, - "grad_norm": 1.774288964572419, - "learning_rate": 1.5550666243764697e-06, - "loss": 0.9933, - "step": 4859 - }, - { - "epoch": 0.584380448505982, - "grad_norm": 1.7794642950657764, - "learning_rate": 1.554307207298465e-06, - "loss": 0.9652, - "step": 4860 - }, - { - "epoch": 0.5845006913966212, - "grad_norm": 1.8309708314131714, - "learning_rate": 1.553547857831704e-06, - "loss": 0.9897, - "step": 4861 - }, - { - "epoch": 0.5846209342872603, - "grad_norm": 1.1044802070595268, - "learning_rate": 1.5527885760913771e-06, - "loss": 0.9148, - "step": 4862 - }, - { - "epoch": 0.5847411771778993, - "grad_norm": 1.477533253458508, - "learning_rate": 1.552029362192668e-06, - "loss": 0.9621, - "step": 4863 - }, - { - "epoch": 0.5848614200685385, - "grad_norm": 1.734451910839564, - "learning_rate": 1.5512702162507478e-06, - "loss": 0.9245, - "step": 4864 - }, - { - "epoch": 0.5849816629591775, - "grad_norm": 1.1146036960250985, - "learning_rate": 1.5505111383807792e-06, - "loss": 0.7663, - "step": 4865 - }, - { - "epoch": 0.5851019058498166, - "grad_norm": 1.576544999931909, - "learning_rate": 1.5497521286979138e-06, - "loss": 1.0051, - "step": 4866 - }, - { - "epoch": 0.5852221487404557, - "grad_norm": 1.9447018687584638, - "learning_rate": 1.5489931873172927e-06, - "loss": 0.9472, - "step": 4867 - }, - { - "epoch": 0.5853423916310948, - "grad_norm": 1.5833687861974064, - "learning_rate": 1.5482343143540467e-06, - "loss": 0.9913, - "step": 4868 - }, - { - "epoch": 0.5854626345217339, - "grad_norm": 1.893636590564471, - "learning_rate": 1.547475509923295e-06, - "loss": 1.0352, - "step": 4869 - }, - { - "epoch": 0.585582877412373, - "grad_norm": 0.7841279997364609, - "learning_rate": 1.5467167741401495e-06, - "loss": 0.7877, - "step": 4870 - }, - { - "epoch": 0.5857031203030121, - "grad_norm": 1.8810436122894223, - "learning_rate": 1.5459581071197083e-06, - "loss": 0.9131, - "step": 4871 - }, - { - "epoch": 0.5858233631936511, - "grad_norm": 1.7502720485127252, - "learning_rate": 1.5451995089770624e-06, - "loss": 1.0276, - "step": 4872 - }, - { - "epoch": 0.5859436060842903, - "grad_norm": 1.2348121158834622, - "learning_rate": 1.5444409798272885e-06, - "loss": 0.9144, - "step": 4873 - }, - { - "epoch": 0.5860638489749294, - "grad_norm": 2.13511494311489, - "learning_rate": 1.543682519785456e-06, - "loss": 1.0028, - "step": 4874 - }, - { - "epoch": 0.5861840918655684, - "grad_norm": 2.4198232512124007, - "learning_rate": 1.5429241289666219e-06, - "loss": 1.0048, - "step": 4875 - }, - { - "epoch": 0.5863043347562076, - "grad_norm": 1.952322161655508, - "learning_rate": 1.5421658074858342e-06, - "loss": 0.8984, - "step": 4876 - }, - { - "epoch": 0.5864245776468466, - "grad_norm": 2.414167482136527, - "learning_rate": 1.5414075554581298e-06, - "loss": 0.8609, - "step": 4877 - }, - { - "epoch": 0.5865448205374857, - "grad_norm": 2.2061304465259126, - "learning_rate": 1.5406493729985348e-06, - "loss": 0.9797, - "step": 4878 - }, - { - "epoch": 0.5866650634281249, - "grad_norm": 1.900246217735691, - "learning_rate": 1.5398912602220644e-06, - "loss": 0.9175, - "step": 4879 - }, - { - "epoch": 0.5867853063187639, - "grad_norm": 2.145135512973144, - "learning_rate": 1.539133217243724e-06, - "loss": 0.9775, - "step": 4880 - }, - { - "epoch": 0.586905549209403, - "grad_norm": 2.1114177843618798, - "learning_rate": 1.5383752441785081e-06, - "loss": 0.9543, - "step": 4881 - }, - { - "epoch": 0.5870257921000421, - "grad_norm": 2.059747882710899, - "learning_rate": 1.5376173411414003e-06, - "loss": 1.0583, - "step": 4882 - }, - { - "epoch": 0.5871460349906812, - "grad_norm": 1.847964626006723, - "learning_rate": 1.5368595082473753e-06, - "loss": 0.9828, - "step": 4883 - }, - { - "epoch": 0.5872662778813202, - "grad_norm": 1.7439541604903501, - "learning_rate": 1.5361017456113935e-06, - "loss": 0.9827, - "step": 4884 - }, - { - "epoch": 0.5873865207719594, - "grad_norm": 1.8276849200705307, - "learning_rate": 1.5353440533484085e-06, - "loss": 1.0525, - "step": 4885 - }, - { - "epoch": 0.5875067636625985, - "grad_norm": 1.6245227993357074, - "learning_rate": 1.534586431573361e-06, - "loss": 0.8636, - "step": 4886 - }, - { - "epoch": 0.5876270065532375, - "grad_norm": 1.8067227071725183, - "learning_rate": 1.5338288804011817e-06, - "loss": 0.9793, - "step": 4887 - }, - { - "epoch": 0.5877472494438767, - "grad_norm": 2.2924475524294223, - "learning_rate": 1.533071399946791e-06, - "loss": 0.9142, - "step": 4888 - }, - { - "epoch": 0.5878674923345157, - "grad_norm": 1.7662680476104191, - "learning_rate": 1.5323139903250977e-06, - "loss": 0.7737, - "step": 4889 - }, - { - "epoch": 0.5879877352251548, - "grad_norm": 1.412045130448809, - "learning_rate": 1.5315566516510002e-06, - "loss": 0.963, - "step": 4890 - }, - { - "epoch": 0.5881079781157939, - "grad_norm": 1.5837715278733993, - "learning_rate": 1.5307993840393857e-06, - "loss": 0.8746, - "step": 4891 - }, - { - "epoch": 0.588228221006433, - "grad_norm": 1.811104599070527, - "learning_rate": 1.530042187605132e-06, - "loss": 1.0053, - "step": 4892 - }, - { - "epoch": 0.5883484638970721, - "grad_norm": 1.2715947678981825, - "learning_rate": 1.5292850624631044e-06, - "loss": 1.0353, - "step": 4893 - }, - { - "epoch": 0.5884687067877111, - "grad_norm": 2.590280073185899, - "learning_rate": 1.5285280087281593e-06, - "loss": 0.9927, - "step": 4894 - }, - { - "epoch": 0.5885889496783503, - "grad_norm": 0.6813913233748476, - "learning_rate": 1.5277710265151398e-06, - "loss": 0.7656, - "step": 4895 - }, - { - "epoch": 0.5887091925689893, - "grad_norm": 3.084365946747732, - "learning_rate": 1.5270141159388803e-06, - "loss": 0.9775, - "step": 4896 - }, - { - "epoch": 0.5888294354596284, - "grad_norm": 1.5256588122517325, - "learning_rate": 1.526257277114203e-06, - "loss": 0.9916, - "step": 4897 - }, - { - "epoch": 0.5889496783502676, - "grad_norm": 1.896478717768017, - "learning_rate": 1.5255005101559201e-06, - "loss": 1.0008, - "step": 4898 - }, - { - "epoch": 0.5890699212409066, - "grad_norm": 1.7676529842141864, - "learning_rate": 1.524743815178833e-06, - "loss": 0.9628, - "step": 4899 - }, - { - "epoch": 0.5891901641315457, - "grad_norm": 2.5355330600106525, - "learning_rate": 1.5239871922977315e-06, - "loss": 1.0052, - "step": 4900 - }, - { - "epoch": 0.5893104070221848, - "grad_norm": 1.8540719968937365, - "learning_rate": 1.523230641627394e-06, - "loss": 1.0905, - "step": 4901 - }, - { - "epoch": 0.5894306499128239, - "grad_norm": 1.7578493241585589, - "learning_rate": 1.5224741632825888e-06, - "loss": 0.9314, - "step": 4902 - }, - { - "epoch": 0.589550892803463, - "grad_norm": 1.6500262518441646, - "learning_rate": 1.521717757378074e-06, - "loss": 0.891, - "step": 4903 - }, - { - "epoch": 0.5896711356941021, - "grad_norm": 1.7063493916274513, - "learning_rate": 1.5209614240285943e-06, - "loss": 0.8889, - "step": 4904 - }, - { - "epoch": 0.5897913785847412, - "grad_norm": 1.792535933236818, - "learning_rate": 1.520205163348887e-06, - "loss": 1.0552, - "step": 4905 - }, - { - "epoch": 0.5899116214753802, - "grad_norm": 0.8204023402835827, - "learning_rate": 1.519448975453674e-06, - "loss": 0.7746, - "step": 4906 - }, - { - "epoch": 0.5900318643660194, - "grad_norm": 1.8681650196637316, - "learning_rate": 1.5186928604576696e-06, - "loss": 0.9557, - "step": 4907 - }, - { - "epoch": 0.5901521072566585, - "grad_norm": 1.8844930659262962, - "learning_rate": 1.5179368184755752e-06, - "loss": 0.9722, - "step": 4908 - }, - { - "epoch": 0.5902723501472975, - "grad_norm": 2.9457934566174684, - "learning_rate": 1.5171808496220821e-06, - "loss": 1.0277, - "step": 4909 - }, - { - "epoch": 0.5903925930379367, - "grad_norm": 1.430615316869014, - "learning_rate": 1.5164249540118708e-06, - "loss": 1.0115, - "step": 4910 - }, - { - "epoch": 0.5905128359285757, - "grad_norm": 1.5119922794619243, - "learning_rate": 1.5156691317596093e-06, - "loss": 1.0296, - "step": 4911 - }, - { - "epoch": 0.5906330788192148, - "grad_norm": 1.9453294132830259, - "learning_rate": 1.5149133829799556e-06, - "loss": 0.8654, - "step": 4912 - }, - { - "epoch": 0.590753321709854, - "grad_norm": 1.8015431015148098, - "learning_rate": 1.5141577077875556e-06, - "loss": 0.9953, - "step": 4913 - }, - { - "epoch": 0.590873564600493, - "grad_norm": 1.693750813614203, - "learning_rate": 1.5134021062970451e-06, - "loss": 0.9201, - "step": 4914 - }, - { - "epoch": 0.5909938074911321, - "grad_norm": 1.748773962787874, - "learning_rate": 1.5126465786230483e-06, - "loss": 1.0075, - "step": 4915 - }, - { - "epoch": 0.5911140503817712, - "grad_norm": 1.6528297953963829, - "learning_rate": 1.5118911248801787e-06, - "loss": 1.0116, - "step": 4916 - }, - { - "epoch": 0.5912342932724103, - "grad_norm": 2.005537116769346, - "learning_rate": 1.5111357451830364e-06, - "loss": 0.9921, - "step": 4917 - }, - { - "epoch": 0.5913545361630493, - "grad_norm": 1.9634949318529216, - "learning_rate": 1.5103804396462131e-06, - "loss": 0.9142, - "step": 4918 - }, - { - "epoch": 0.5914747790536885, - "grad_norm": 1.7581110043855182, - "learning_rate": 1.5096252083842877e-06, - "loss": 1.0002, - "step": 4919 - }, - { - "epoch": 0.5915950219443276, - "grad_norm": 1.7164680591370525, - "learning_rate": 1.5088700515118285e-06, - "loss": 1.0404, - "step": 4920 - }, - { - "epoch": 0.5917152648349666, - "grad_norm": 1.8089833838868479, - "learning_rate": 1.508114969143392e-06, - "loss": 0.8703, - "step": 4921 - }, - { - "epoch": 0.5918355077256057, - "grad_norm": 1.4132728690283214, - "learning_rate": 1.5073599613935238e-06, - "loss": 0.9734, - "step": 4922 - }, - { - "epoch": 0.5919557506162448, - "grad_norm": 1.6893924415582775, - "learning_rate": 1.5066050283767574e-06, - "loss": 0.777, - "step": 4923 - }, - { - "epoch": 0.5920759935068839, - "grad_norm": 1.8161524441504564, - "learning_rate": 1.505850170207616e-06, - "loss": 1.0283, - "step": 4924 - }, - { - "epoch": 0.592196236397523, - "grad_norm": 1.9330875851660778, - "learning_rate": 1.505095387000611e-06, - "loss": 0.9766, - "step": 4925 - }, - { - "epoch": 0.5923164792881621, - "grad_norm": 1.8865928844856783, - "learning_rate": 1.504340678870242e-06, - "loss": 0.9467, - "step": 4926 - }, - { - "epoch": 0.5924367221788012, - "grad_norm": 1.9269705112182534, - "learning_rate": 1.5035860459309989e-06, - "loss": 1.0901, - "step": 4927 - }, - { - "epoch": 0.5925569650694402, - "grad_norm": 2.044290556275943, - "learning_rate": 1.5028314882973568e-06, - "loss": 0.8294, - "step": 4928 - }, - { - "epoch": 0.5926772079600794, - "grad_norm": 1.7331862628727799, - "learning_rate": 1.502077006083783e-06, - "loss": 1.0482, - "step": 4929 - }, - { - "epoch": 0.5927974508507184, - "grad_norm": 1.5548835515764556, - "learning_rate": 1.5013225994047315e-06, - "loss": 0.9728, - "step": 4930 - }, - { - "epoch": 0.5929176937413575, - "grad_norm": 1.6097747158209472, - "learning_rate": 1.5005682683746452e-06, - "loss": 1.0043, - "step": 4931 - }, - { - "epoch": 0.5930379366319967, - "grad_norm": 1.911480590801599, - "learning_rate": 1.4998140131079553e-06, - "loss": 0.9246, - "step": 4932 - }, - { - "epoch": 0.5931581795226357, - "grad_norm": 1.9888978013431515, - "learning_rate": 1.4990598337190821e-06, - "loss": 0.9377, - "step": 4933 - }, - { - "epoch": 0.5932784224132748, - "grad_norm": 1.5809887123597717, - "learning_rate": 1.4983057303224338e-06, - "loss": 0.8799, - "step": 4934 - }, - { - "epoch": 0.5933986653039139, - "grad_norm": 1.5745384327137046, - "learning_rate": 1.4975517030324072e-06, - "loss": 1.0666, - "step": 4935 - }, - { - "epoch": 0.593518908194553, - "grad_norm": 0.9166839699123548, - "learning_rate": 1.4967977519633882e-06, - "loss": 0.8461, - "step": 4936 - }, - { - "epoch": 0.593639151085192, - "grad_norm": 1.779179624678559, - "learning_rate": 1.4960438772297494e-06, - "loss": 0.9929, - "step": 4937 - }, - { - "epoch": 0.5937593939758312, - "grad_norm": 2.460256912338025, - "learning_rate": 1.495290078945855e-06, - "loss": 0.9407, - "step": 4938 - }, - { - "epoch": 0.5938796368664703, - "grad_norm": 1.6371651634735742, - "learning_rate": 1.4945363572260529e-06, - "loss": 0.9392, - "step": 4939 - }, - { - "epoch": 0.5939998797571093, - "grad_norm": 1.9758620396843811, - "learning_rate": 1.4937827121846845e-06, - "loss": 0.8775, - "step": 4940 - }, - { - "epoch": 0.5941201226477485, - "grad_norm": 1.4494998792876568, - "learning_rate": 1.4930291439360755e-06, - "loss": 0.9387, - "step": 4941 - }, - { - "epoch": 0.5942403655383875, - "grad_norm": 1.6235709141973855, - "learning_rate": 1.4922756525945427e-06, - "loss": 0.9947, - "step": 4942 - }, - { - "epoch": 0.5943606084290266, - "grad_norm": 0.8593719829159574, - "learning_rate": 1.4915222382743894e-06, - "loss": 0.8145, - "step": 4943 - }, - { - "epoch": 0.5944808513196658, - "grad_norm": 2.021934510759931, - "learning_rate": 1.4907689010899085e-06, - "loss": 0.9183, - "step": 4944 - }, - { - "epoch": 0.5946010942103048, - "grad_norm": 1.7338731056738206, - "learning_rate": 1.4900156411553804e-06, - "loss": 0.817, - "step": 4945 - }, - { - "epoch": 0.5947213371009439, - "grad_norm": 1.966390740170881, - "learning_rate": 1.4892624585850739e-06, - "loss": 1.0513, - "step": 4946 - }, - { - "epoch": 0.594841579991583, - "grad_norm": 1.7291174579037536, - "learning_rate": 1.4885093534932465e-06, - "loss": 0.986, - "step": 4947 - }, - { - "epoch": 0.5949618228822221, - "grad_norm": 2.5079809589788726, - "learning_rate": 1.4877563259941433e-06, - "loss": 0.9155, - "step": 4948 - }, - { - "epoch": 0.5950820657728612, - "grad_norm": 1.5837746139094997, - "learning_rate": 1.4870033762019988e-06, - "loss": 0.8778, - "step": 4949 - }, - { - "epoch": 0.5952023086635003, - "grad_norm": 1.4802744393169769, - "learning_rate": 1.4862505042310334e-06, - "loss": 0.9281, - "step": 4950 - }, - { - "epoch": 0.5953225515541394, - "grad_norm": 1.420254465056849, - "learning_rate": 1.4854977101954587e-06, - "loss": 0.8935, - "step": 4951 - }, - { - "epoch": 0.5954427944447784, - "grad_norm": 1.6917929967053216, - "learning_rate": 1.4847449942094716e-06, - "loss": 1.0564, - "step": 4952 - }, - { - "epoch": 0.5955630373354175, - "grad_norm": 1.8650462110587946, - "learning_rate": 1.4839923563872598e-06, - "loss": 1.0597, - "step": 4953 - }, - { - "epoch": 0.5956832802260567, - "grad_norm": 1.8739478019968339, - "learning_rate": 1.483239796842997e-06, - "loss": 0.957, - "step": 4954 - }, - { - "epoch": 0.5958035231166957, - "grad_norm": 3.7539222550224625, - "learning_rate": 1.4824873156908462e-06, - "loss": 1.0311, - "step": 4955 - }, - { - "epoch": 0.5959237660073348, - "grad_norm": 2.329517741577684, - "learning_rate": 1.4817349130449584e-06, - "loss": 0.9569, - "step": 4956 - }, - { - "epoch": 0.5960440088979739, - "grad_norm": 1.8237955063913556, - "learning_rate": 1.4809825890194717e-06, - "loss": 1.0242, - "step": 4957 - }, - { - "epoch": 0.596164251788613, - "grad_norm": 1.859369037522445, - "learning_rate": 1.4802303437285139e-06, - "loss": 0.9736, - "step": 4958 - }, - { - "epoch": 0.596284494679252, - "grad_norm": 2.13250461913276, - "learning_rate": 1.4794781772861994e-06, - "loss": 0.9994, - "step": 4959 - }, - { - "epoch": 0.5964047375698912, - "grad_norm": 9.755463072218456, - "learning_rate": 1.4787260898066324e-06, - "loss": 0.8769, - "step": 4960 - }, - { - "epoch": 0.5965249804605303, - "grad_norm": 2.104421493157161, - "learning_rate": 1.4779740814039023e-06, - "loss": 1.0517, - "step": 4961 - }, - { - "epoch": 0.5966452233511693, - "grad_norm": 2.0351582415492295, - "learning_rate": 1.4772221521920894e-06, - "loss": 0.8751, - "step": 4962 - }, - { - "epoch": 0.5967654662418085, - "grad_norm": 1.9668653641020146, - "learning_rate": 1.4764703022852598e-06, - "loss": 0.9421, - "step": 4963 - }, - { - "epoch": 0.5968857091324475, - "grad_norm": 1.9018697598548429, - "learning_rate": 1.4757185317974696e-06, - "loss": 0.9692, - "step": 4964 - }, - { - "epoch": 0.5970059520230866, - "grad_norm": 1.9653033407261424, - "learning_rate": 1.474966840842761e-06, - "loss": 0.9152, - "step": 4965 - }, - { - "epoch": 0.5971261949137258, - "grad_norm": 1.7657214113092579, - "learning_rate": 1.4742152295351655e-06, - "loss": 1.0658, - "step": 4966 - }, - { - "epoch": 0.5972464378043648, - "grad_norm": 2.8911054882014753, - "learning_rate": 1.4734636979887016e-06, - "loss": 0.836, - "step": 4967 - }, - { - "epoch": 0.5973666806950039, - "grad_norm": 13.239516231307793, - "learning_rate": 1.4727122463173755e-06, - "loss": 1.1047, - "step": 4968 - }, - { - "epoch": 0.597486923585643, - "grad_norm": 1.7122106822853975, - "learning_rate": 1.471960874635183e-06, - "loss": 0.8487, - "step": 4969 - }, - { - "epoch": 0.5976071664762821, - "grad_norm": 2.0688336542739236, - "learning_rate": 1.4712095830561055e-06, - "loss": 0.9024, - "step": 4970 - }, - { - "epoch": 0.5977274093669211, - "grad_norm": 2.193994961617178, - "learning_rate": 1.4704583716941147e-06, - "loss": 1.0081, - "step": 4971 - }, - { - "epoch": 0.5978476522575603, - "grad_norm": 1.503822145958968, - "learning_rate": 1.4697072406631672e-06, - "loss": 0.9175, - "step": 4972 - }, - { - "epoch": 0.5979678951481994, - "grad_norm": 1.6655664945422322, - "learning_rate": 1.4689561900772097e-06, - "loss": 0.93, - "step": 4973 - }, - { - "epoch": 0.5980881380388384, - "grad_norm": 2.104205203568988, - "learning_rate": 1.4682052200501758e-06, - "loss": 0.9303, - "step": 4974 - }, - { - "epoch": 0.5982083809294776, - "grad_norm": 1.5908233099265807, - "learning_rate": 1.4674543306959876e-06, - "loss": 0.9864, - "step": 4975 - }, - { - "epoch": 0.5983286238201166, - "grad_norm": 1.970875939465815, - "learning_rate": 1.4667035221285535e-06, - "loss": 1.0467, - "step": 4976 - }, - { - "epoch": 0.5984488667107557, - "grad_norm": 1.6243063106302253, - "learning_rate": 1.4659527944617715e-06, - "loss": 0.9426, - "step": 4977 - }, - { - "epoch": 0.5985691096013949, - "grad_norm": 1.8097019302090587, - "learning_rate": 1.465202147809526e-06, - "loss": 0.9598, - "step": 4978 - }, - { - "epoch": 0.5986893524920339, - "grad_norm": 1.8997846933141997, - "learning_rate": 1.4644515822856888e-06, - "loss": 0.9554, - "step": 4979 - }, - { - "epoch": 0.598809595382673, - "grad_norm": 0.8388114439947202, - "learning_rate": 1.4637010980041215e-06, - "loss": 0.7831, - "step": 4980 - }, - { - "epoch": 0.5989298382733121, - "grad_norm": 2.196708805314874, - "learning_rate": 1.4629506950786707e-06, - "loss": 1.1012, - "step": 4981 - }, - { - "epoch": 0.5990500811639512, - "grad_norm": 0.8377253585055571, - "learning_rate": 1.4622003736231733e-06, - "loss": 0.7811, - "step": 4982 - }, - { - "epoch": 0.5991703240545903, - "grad_norm": 1.7982751449891008, - "learning_rate": 1.461450133751451e-06, - "loss": 0.9979, - "step": 4983 - }, - { - "epoch": 0.5992905669452293, - "grad_norm": 1.7465021052747827, - "learning_rate": 1.4606999755773153e-06, - "loss": 0.963, - "step": 4984 - }, - { - "epoch": 0.5994108098358685, - "grad_norm": 1.5022833452464641, - "learning_rate": 1.4599498992145643e-06, - "loss": 1.0071, - "step": 4985 - }, - { - "epoch": 0.5995310527265075, - "grad_norm": 1.8446294335905726, - "learning_rate": 1.4591999047769846e-06, - "loss": 0.9075, - "step": 4986 - }, - { - "epoch": 0.5996512956171466, - "grad_norm": 2.0458295642121422, - "learning_rate": 1.4584499923783486e-06, - "loss": 0.9597, - "step": 4987 - }, - { - "epoch": 0.5997715385077858, - "grad_norm": 1.735300659051358, - "learning_rate": 1.457700162132419e-06, - "loss": 0.9619, - "step": 4988 - }, - { - "epoch": 0.5998917813984248, - "grad_norm": 1.806004757715424, - "learning_rate": 1.4569504141529433e-06, - "loss": 0.9226, - "step": 4989 - }, - { - "epoch": 0.6000120242890639, - "grad_norm": 1.7423935841010503, - "learning_rate": 1.456200748553658e-06, - "loss": 0.9162, - "step": 4990 - }, - { - "epoch": 0.600132267179703, - "grad_norm": 1.4145068740219233, - "learning_rate": 1.455451165448287e-06, - "loss": 0.9788, - "step": 4991 - }, - { - "epoch": 0.6002525100703421, - "grad_norm": 2.1659636701952016, - "learning_rate": 1.4547016649505407e-06, - "loss": 0.9326, - "step": 4992 - }, - { - "epoch": 0.6003727529609811, - "grad_norm": 1.982056171529707, - "learning_rate": 1.4539522471741193e-06, - "loss": 1.0486, - "step": 4993 - }, - { - "epoch": 0.6004929958516203, - "grad_norm": 1.924274714186927, - "learning_rate": 1.4532029122327067e-06, - "loss": 0.9067, - "step": 4994 - }, - { - "epoch": 0.6006132387422594, - "grad_norm": 1.9628440433877368, - "learning_rate": 1.4524536602399783e-06, - "loss": 0.9529, - "step": 4995 - }, - { - "epoch": 0.6007334816328984, - "grad_norm": 1.5010598729776883, - "learning_rate": 1.4517044913095938e-06, - "loss": 0.9694, - "step": 4996 - }, - { - "epoch": 0.6008537245235376, - "grad_norm": 1.7439294155343463, - "learning_rate": 1.4509554055552022e-06, - "loss": 1.0135, - "step": 4997 - }, - { - "epoch": 0.6009739674141766, - "grad_norm": 2.3595057571253593, - "learning_rate": 1.450206403090439e-06, - "loss": 1.0383, - "step": 4998 - }, - { - "epoch": 0.6010942103048157, - "grad_norm": 1.9381371342667564, - "learning_rate": 1.4494574840289274e-06, - "loss": 1.0579, - "step": 4999 - }, - { - "epoch": 0.6012144531954549, - "grad_norm": 1.8542962475476872, - "learning_rate": 1.4487086484842782e-06, - "loss": 0.9459, - "step": 5000 - }, - { - "epoch": 0.6013346960860939, - "grad_norm": 1.982765927079678, - "learning_rate": 1.4479598965700878e-06, - "loss": 0.8001, - "step": 5001 - }, - { - "epoch": 0.601454938976733, - "grad_norm": 2.1141469596011464, - "learning_rate": 1.4472112283999427e-06, - "loss": 0.8851, - "step": 5002 - }, - { - "epoch": 0.6015751818673721, - "grad_norm": 1.9308584266890247, - "learning_rate": 1.4464626440874143e-06, - "loss": 0.8968, - "step": 5003 - }, - { - "epoch": 0.6016954247580112, - "grad_norm": 2.3842827935344997, - "learning_rate": 1.4457141437460636e-06, - "loss": 0.9441, - "step": 5004 - }, - { - "epoch": 0.6018156676486502, - "grad_norm": 1.7654594411282982, - "learning_rate": 1.444965727489436e-06, - "loss": 0.9296, - "step": 5005 - }, - { - "epoch": 0.6019359105392894, - "grad_norm": 1.603728601992328, - "learning_rate": 1.444217395431066e-06, - "loss": 0.8317, - "step": 5006 - }, - { - "epoch": 0.6020561534299285, - "grad_norm": 0.8786109258610197, - "learning_rate": 1.4434691476844755e-06, - "loss": 0.78, - "step": 5007 - }, - { - "epoch": 0.6021763963205675, - "grad_norm": 1.9354802340555373, - "learning_rate": 1.4427209843631729e-06, - "loss": 0.8713, - "step": 5008 - }, - { - "epoch": 0.6022966392112067, - "grad_norm": 1.6617251892395513, - "learning_rate": 1.4419729055806534e-06, - "loss": 1.0136, - "step": 5009 - }, - { - "epoch": 0.6024168821018457, - "grad_norm": 1.6111291011083602, - "learning_rate": 1.441224911450401e-06, - "loss": 1.0278, - "step": 5010 - }, - { - "epoch": 0.6025371249924848, - "grad_norm": 1.5129661440745608, - "learning_rate": 1.4404770020858851e-06, - "loss": 1.0214, - "step": 5011 - }, - { - "epoch": 0.602657367883124, - "grad_norm": 1.4418293815179122, - "learning_rate": 1.439729177600563e-06, - "loss": 1.0605, - "step": 5012 - }, - { - "epoch": 0.602777610773763, - "grad_norm": 1.6522576780875295, - "learning_rate": 1.4389814381078793e-06, - "loss": 0.9259, - "step": 5013 - }, - { - "epoch": 0.6028978536644021, - "grad_norm": 2.3150764752603123, - "learning_rate": 1.438233783721265e-06, - "loss": 1.0017, - "step": 5014 - }, - { - "epoch": 0.6030180965550412, - "grad_norm": 1.871177910363431, - "learning_rate": 1.43748621455414e-06, - "loss": 0.9791, - "step": 5015 - }, - { - "epoch": 0.6031383394456803, - "grad_norm": 2.204721940688756, - "learning_rate": 1.4367387307199082e-06, - "loss": 1.0032, - "step": 5016 - }, - { - "epoch": 0.6032585823363193, - "grad_norm": 1.782430291223898, - "learning_rate": 1.4359913323319632e-06, - "loss": 1.0206, - "step": 5017 - }, - { - "epoch": 0.6033788252269584, - "grad_norm": 1.660905736151177, - "learning_rate": 1.4352440195036847e-06, - "loss": 0.9838, - "step": 5018 - }, - { - "epoch": 0.6034990681175976, - "grad_norm": 1.6679946218445063, - "learning_rate": 1.4344967923484395e-06, - "loss": 0.9935, - "step": 5019 - }, - { - "epoch": 0.6036193110082366, - "grad_norm": 2.143803040318184, - "learning_rate": 1.433749650979581e-06, - "loss": 0.9186, - "step": 5020 - }, - { - "epoch": 0.6037395538988757, - "grad_norm": 1.8564365151972975, - "learning_rate": 1.433002595510451e-06, - "loss": 0.8841, - "step": 5021 - }, - { - "epoch": 0.6038597967895148, - "grad_norm": 1.7246081999173162, - "learning_rate": 1.4322556260543757e-06, - "loss": 0.9172, - "step": 5022 - }, - { - "epoch": 0.6039800396801539, - "grad_norm": 0.9660093857208538, - "learning_rate": 1.4315087427246703e-06, - "loss": 0.8515, - "step": 5023 - }, - { - "epoch": 0.604100282570793, - "grad_norm": 0.9436544900526501, - "learning_rate": 1.4307619456346372e-06, - "loss": 0.8145, - "step": 5024 - }, - { - "epoch": 0.6042205254614321, - "grad_norm": 1.9409556182472631, - "learning_rate": 1.430015234897564e-06, - "loss": 0.9295, - "step": 5025 - }, - { - "epoch": 0.6043407683520712, - "grad_norm": 1.6600542104440925, - "learning_rate": 1.4292686106267274e-06, - "loss": 0.8636, - "step": 5026 - }, - { - "epoch": 0.6044610112427102, - "grad_norm": 1.4373902818023534, - "learning_rate": 1.4285220729353876e-06, - "loss": 0.961, - "step": 5027 - }, - { - "epoch": 0.6045812541333494, - "grad_norm": 2.0255275227586718, - "learning_rate": 1.4277756219367957e-06, - "loss": 0.9806, - "step": 5028 - }, - { - "epoch": 0.6047014970239885, - "grad_norm": 1.9891968063715464, - "learning_rate": 1.4270292577441864e-06, - "loss": 1.0033, - "step": 5029 - }, - { - "epoch": 0.6048217399146275, - "grad_norm": 1.6180279155911754, - "learning_rate": 1.4262829804707836e-06, - "loss": 0.918, - "step": 5030 - }, - { - "epoch": 0.6049419828052667, - "grad_norm": 1.3977110270411064, - "learning_rate": 1.4255367902297958e-06, - "loss": 0.8943, - "step": 5031 - }, - { - "epoch": 0.6050622256959057, - "grad_norm": 2.0679015894851394, - "learning_rate": 1.4247906871344215e-06, - "loss": 0.9868, - "step": 5032 - }, - { - "epoch": 0.6051824685865448, - "grad_norm": 2.1777452135030173, - "learning_rate": 1.4240446712978415e-06, - "loss": 0.9529, - "step": 5033 - }, - { - "epoch": 0.605302711477184, - "grad_norm": 1.9922468587970514, - "learning_rate": 1.423298742833227e-06, - "loss": 0.9421, - "step": 5034 - }, - { - "epoch": 0.605422954367823, - "grad_norm": 4.33305326192498, - "learning_rate": 1.4225529018537352e-06, - "loss": 0.9245, - "step": 5035 - }, - { - "epoch": 0.6055431972584621, - "grad_norm": 1.5982462719199995, - "learning_rate": 1.4218071484725082e-06, - "loss": 0.9782, - "step": 5036 - }, - { - "epoch": 0.6056634401491012, - "grad_norm": 1.7833616890242412, - "learning_rate": 1.4210614828026786e-06, - "loss": 0.9545, - "step": 5037 - }, - { - "epoch": 0.6057836830397403, - "grad_norm": 1.5198437680714763, - "learning_rate": 1.4203159049573605e-06, - "loss": 0.945, - "step": 5038 - }, - { - "epoch": 0.6059039259303793, - "grad_norm": 1.8266674654441442, - "learning_rate": 1.4195704150496593e-06, - "loss": 1.0775, - "step": 5039 - }, - { - "epoch": 0.6060241688210185, - "grad_norm": 2.0399600571106618, - "learning_rate": 1.4188250131926639e-06, - "loss": 0.9385, - "step": 5040 - }, - { - "epoch": 0.6061444117116576, - "grad_norm": 1.8491357279466136, - "learning_rate": 1.4180796994994525e-06, - "loss": 1.0131, - "step": 5041 - }, - { - "epoch": 0.6062646546022966, - "grad_norm": 1.8142547006176886, - "learning_rate": 1.4173344740830877e-06, - "loss": 0.9233, - "step": 5042 - }, - { - "epoch": 0.6063848974929358, - "grad_norm": 1.6143792136178314, - "learning_rate": 1.4165893370566206e-06, - "loss": 0.9065, - "step": 5043 - }, - { - "epoch": 0.6065051403835748, - "grad_norm": 1.8834823113429346, - "learning_rate": 1.4158442885330865e-06, - "loss": 0.9651, - "step": 5044 - }, - { - "epoch": 0.6066253832742139, - "grad_norm": 1.9039003242960846, - "learning_rate": 1.4150993286255094e-06, - "loss": 0.9951, - "step": 5045 - }, - { - "epoch": 0.6067456261648531, - "grad_norm": 1.8481335013500122, - "learning_rate": 1.4143544574468993e-06, - "loss": 0.9937, - "step": 5046 - }, - { - "epoch": 0.6068658690554921, - "grad_norm": 1.6229151042233716, - "learning_rate": 1.4136096751102523e-06, - "loss": 1.0196, - "step": 5047 - }, - { - "epoch": 0.6069861119461312, - "grad_norm": 1.7271205638865248, - "learning_rate": 1.4128649817285516e-06, - "loss": 1.027, - "step": 5048 - }, - { - "epoch": 0.6071063548367702, - "grad_norm": 1.708746565485351, - "learning_rate": 1.412120377414766e-06, - "loss": 0.8331, - "step": 5049 - }, - { - "epoch": 0.6072265977274094, - "grad_norm": 1.441620353366432, - "learning_rate": 1.4113758622818522e-06, - "loss": 0.9023, - "step": 5050 - }, - { - "epoch": 0.6073468406180484, - "grad_norm": 1.8493668942082073, - "learning_rate": 1.410631436442751e-06, - "loss": 1.0271, - "step": 5051 - }, - { - "epoch": 0.6074670835086875, - "grad_norm": 1.843204223544527, - "learning_rate": 1.4098871000103936e-06, - "loss": 1.0678, - "step": 5052 - }, - { - "epoch": 0.6075873263993267, - "grad_norm": 1.5054745906128595, - "learning_rate": 1.409142853097693e-06, - "loss": 1.0204, - "step": 5053 - }, - { - "epoch": 0.6077075692899657, - "grad_norm": 1.742776950626152, - "learning_rate": 1.408398695817553e-06, - "loss": 0.9964, - "step": 5054 - }, - { - "epoch": 0.6078278121806048, - "grad_norm": 1.5317484764414053, - "learning_rate": 1.4076546282828593e-06, - "loss": 0.8963, - "step": 5055 - }, - { - "epoch": 0.6079480550712439, - "grad_norm": 1.9640370223912498, - "learning_rate": 1.4069106506064874e-06, - "loss": 0.8616, - "step": 5056 - }, - { - "epoch": 0.608068297961883, - "grad_norm": 1.5632441465291707, - "learning_rate": 1.4061667629012989e-06, - "loss": 0.9801, - "step": 5057 - }, - { - "epoch": 0.608188540852522, - "grad_norm": 1.4796770137749637, - "learning_rate": 1.40542296528014e-06, - "loss": 1.0271, - "step": 5058 - }, - { - "epoch": 0.6083087837431612, - "grad_norm": 1.62922522625798, - "learning_rate": 1.4046792578558452e-06, - "loss": 0.9541, - "step": 5059 - }, - { - "epoch": 0.6084290266338003, - "grad_norm": 2.5233709840304646, - "learning_rate": 1.4039356407412325e-06, - "loss": 0.9655, - "step": 5060 - }, - { - "epoch": 0.6085492695244393, - "grad_norm": 0.8697256797993359, - "learning_rate": 1.40319211404911e-06, - "loss": 0.8141, - "step": 5061 - }, - { - "epoch": 0.6086695124150785, - "grad_norm": 1.6858673675063391, - "learning_rate": 1.4024486778922691e-06, - "loss": 1.0944, - "step": 5062 - }, - { - "epoch": 0.6087897553057176, - "grad_norm": 1.66603222533494, - "learning_rate": 1.4017053323834884e-06, - "loss": 0.9716, - "step": 5063 - }, - { - "epoch": 0.6089099981963566, - "grad_norm": 1.8285747529110323, - "learning_rate": 1.4009620776355333e-06, - "loss": 0.9629, - "step": 5064 - }, - { - "epoch": 0.6090302410869958, - "grad_norm": 1.5653785130827595, - "learning_rate": 1.4002189137611553e-06, - "loss": 0.9875, - "step": 5065 - }, - { - "epoch": 0.6091504839776348, - "grad_norm": 1.560489047128459, - "learning_rate": 1.3994758408730901e-06, - "loss": 0.8915, - "step": 5066 - }, - { - "epoch": 0.6092707268682739, - "grad_norm": 1.8924822551747011, - "learning_rate": 1.3987328590840629e-06, - "loss": 0.9742, - "step": 5067 - }, - { - "epoch": 0.609390969758913, - "grad_norm": 1.816471681134178, - "learning_rate": 1.397989968506783e-06, - "loss": 1.0611, - "step": 5068 - }, - { - "epoch": 0.6095112126495521, - "grad_norm": 1.985692225330757, - "learning_rate": 1.3972471692539458e-06, - "loss": 0.9285, - "step": 5069 - }, - { - "epoch": 0.6096314555401912, - "grad_norm": 1.9888033977559314, - "learning_rate": 1.3965044614382348e-06, - "loss": 0.9576, - "step": 5070 - }, - { - "epoch": 0.6097516984308303, - "grad_norm": 1.951069536093222, - "learning_rate": 1.3957618451723162e-06, - "loss": 0.948, - "step": 5071 - }, - { - "epoch": 0.6098719413214694, - "grad_norm": 1.9367055186950335, - "learning_rate": 1.3950193205688457e-06, - "loss": 0.9025, - "step": 5072 - }, - { - "epoch": 0.6099921842121084, - "grad_norm": 1.8006983673815355, - "learning_rate": 1.3942768877404627e-06, - "loss": 1.0339, - "step": 5073 - }, - { - "epoch": 0.6101124271027476, - "grad_norm": 1.4861144464916856, - "learning_rate": 1.393534546799795e-06, - "loss": 0.941, - "step": 5074 - }, - { - "epoch": 0.6102326699933867, - "grad_norm": 1.6888922140579505, - "learning_rate": 1.3927922978594536e-06, - "loss": 0.881, - "step": 5075 - }, - { - "epoch": 0.6103529128840257, - "grad_norm": 0.8805006019767857, - "learning_rate": 1.3920501410320387e-06, - "loss": 0.8191, - "step": 5076 - }, - { - "epoch": 0.6104731557746649, - "grad_norm": 1.9204421866369645, - "learning_rate": 1.3913080764301333e-06, - "loss": 0.9608, - "step": 5077 - }, - { - "epoch": 0.6105933986653039, - "grad_norm": 1.8163810482122653, - "learning_rate": 1.3905661041663085e-06, - "loss": 0.918, - "step": 5078 - }, - { - "epoch": 0.610713641555943, - "grad_norm": 2.0762171197592383, - "learning_rate": 1.389824224353122e-06, - "loss": 0.8445, - "step": 5079 - }, - { - "epoch": 0.610833884446582, - "grad_norm": 1.6362959650284716, - "learning_rate": 1.389082437103115e-06, - "loss": 0.9649, - "step": 5080 - }, - { - "epoch": 0.6109541273372212, - "grad_norm": 4.049462628918535, - "learning_rate": 1.3883407425288172e-06, - "loss": 0.9808, - "step": 5081 - }, - { - "epoch": 0.6110743702278603, - "grad_norm": 3.0779076514020174, - "learning_rate": 1.3875991407427417e-06, - "loss": 0.9863, - "step": 5082 - }, - { - "epoch": 0.6111946131184993, - "grad_norm": 0.8358461561832629, - "learning_rate": 1.38685763185739e-06, - "loss": 0.8049, - "step": 5083 - }, - { - "epoch": 0.6113148560091385, - "grad_norm": 2.211419258605003, - "learning_rate": 1.3861162159852476e-06, - "loss": 0.8702, - "step": 5084 - }, - { - "epoch": 0.6114350988997775, - "grad_norm": 1.548931411820475, - "learning_rate": 1.3853748932387875e-06, - "loss": 1.0011, - "step": 5085 - }, - { - "epoch": 0.6115553417904166, - "grad_norm": 2.039470646375068, - "learning_rate": 1.3846336637304671e-06, - "loss": 0.9479, - "step": 5086 - }, - { - "epoch": 0.6116755846810558, - "grad_norm": 1.7274372767366126, - "learning_rate": 1.3838925275727316e-06, - "loss": 1.0243, - "step": 5087 - }, - { - "epoch": 0.6117958275716948, - "grad_norm": 1.6849565057772513, - "learning_rate": 1.3831514848780089e-06, - "loss": 0.9899, - "step": 5088 - }, - { - "epoch": 0.6119160704623339, - "grad_norm": 2.249000751283214, - "learning_rate": 1.3824105357587152e-06, - "loss": 1.1251, - "step": 5089 - }, - { - "epoch": 0.612036313352973, - "grad_norm": 1.396137840117002, - "learning_rate": 1.381669680327253e-06, - "loss": 1.0172, - "step": 5090 - }, - { - "epoch": 0.6121565562436121, - "grad_norm": 1.6417186906320256, - "learning_rate": 1.380928918696008e-06, - "loss": 0.9091, - "step": 5091 - }, - { - "epoch": 0.6122767991342511, - "grad_norm": 2.2456885461604426, - "learning_rate": 1.3801882509773548e-06, - "loss": 0.9144, - "step": 5092 - }, - { - "epoch": 0.6123970420248903, - "grad_norm": 2.0090474290352223, - "learning_rate": 1.3794476772836503e-06, - "loss": 1.0116, - "step": 5093 - }, - { - "epoch": 0.6125172849155294, - "grad_norm": 1.5515136526664535, - "learning_rate": 1.3787071977272402e-06, - "loss": 1.039, - "step": 5094 - }, - { - "epoch": 0.6126375278061684, - "grad_norm": 2.222666976715109, - "learning_rate": 1.3779668124204535e-06, - "loss": 0.921, - "step": 5095 - }, - { - "epoch": 0.6127577706968076, - "grad_norm": 1.7566162107678425, - "learning_rate": 1.3772265214756074e-06, - "loss": 1.0106, - "step": 5096 - }, - { - "epoch": 0.6128780135874466, - "grad_norm": 1.8207915498433522, - "learning_rate": 1.3764863250050025e-06, - "loss": 0.9513, - "step": 5097 - }, - { - "epoch": 0.6129982564780857, - "grad_norm": 2.132190096519664, - "learning_rate": 1.3757462231209272e-06, - "loss": 1.0005, - "step": 5098 - }, - { - "epoch": 0.6131184993687249, - "grad_norm": 1.8551869389198015, - "learning_rate": 1.3750062159356525e-06, - "loss": 1.0854, - "step": 5099 - }, - { - "epoch": 0.6132387422593639, - "grad_norm": 1.7528560357307388, - "learning_rate": 1.3742663035614382e-06, - "loss": 1.022, - "step": 5100 - }, - { - "epoch": 0.613358985150003, - "grad_norm": 1.6318924789632216, - "learning_rate": 1.3735264861105283e-06, - "loss": 1.0027, - "step": 5101 - }, - { - "epoch": 0.6134792280406421, - "grad_norm": 2.0967673579641763, - "learning_rate": 1.372786763695152e-06, - "loss": 0.9814, - "step": 5102 - }, - { - "epoch": 0.6135994709312812, - "grad_norm": 1.6228780830834122, - "learning_rate": 1.3720471364275257e-06, - "loss": 0.97, - "step": 5103 - }, - { - "epoch": 0.6137197138219203, - "grad_norm": 1.829587628434741, - "learning_rate": 1.3713076044198486e-06, - "loss": 0.9651, - "step": 5104 - }, - { - "epoch": 0.6138399567125594, - "grad_norm": 1.8759255985643133, - "learning_rate": 1.3705681677843086e-06, - "loss": 1.0087, - "step": 5105 - }, - { - "epoch": 0.6139601996031985, - "grad_norm": 0.8581356475236193, - "learning_rate": 1.3698288266330768e-06, - "loss": 0.8269, - "step": 5106 - }, - { - "epoch": 0.6140804424938375, - "grad_norm": 2.1348516197369447, - "learning_rate": 1.3690895810783113e-06, - "loss": 0.9276, - "step": 5107 - }, - { - "epoch": 0.6142006853844767, - "grad_norm": 2.0290218163187435, - "learning_rate": 1.3683504312321543e-06, - "loss": 0.9187, - "step": 5108 - }, - { - "epoch": 0.6143209282751158, - "grad_norm": 3.9408251331158253, - "learning_rate": 1.3676113772067355e-06, - "loss": 0.9965, - "step": 5109 - }, - { - "epoch": 0.6144411711657548, - "grad_norm": 2.0033644748791377, - "learning_rate": 1.3668724191141671e-06, - "loss": 0.911, - "step": 5110 - }, - { - "epoch": 0.6145614140563939, - "grad_norm": 2.2736842490195297, - "learning_rate": 1.3661335570665493e-06, - "loss": 0.8689, - "step": 5111 - }, - { - "epoch": 0.614681656947033, - "grad_norm": 2.106923013353359, - "learning_rate": 1.3653947911759676e-06, - "loss": 0.9017, - "step": 5112 - }, - { - "epoch": 0.6148018998376721, - "grad_norm": 1.4207428837211493, - "learning_rate": 1.3646561215544904e-06, - "loss": 0.941, - "step": 5113 - }, - { - "epoch": 0.6149221427283111, - "grad_norm": 1.949114290240174, - "learning_rate": 1.363917548314176e-06, - "loss": 0.9969, - "step": 5114 - }, - { - "epoch": 0.6150423856189503, - "grad_norm": 1.633775490968363, - "learning_rate": 1.3631790715670626e-06, - "loss": 0.9346, - "step": 5115 - }, - { - "epoch": 0.6151626285095894, - "grad_norm": 1.662018142509363, - "learning_rate": 1.3624406914251783e-06, - "loss": 1.0601, - "step": 5116 - }, - { - "epoch": 0.6152828714002284, - "grad_norm": 1.7991986716272752, - "learning_rate": 1.3617024080005335e-06, - "loss": 1.0784, - "step": 5117 - }, - { - "epoch": 0.6154031142908676, - "grad_norm": 1.4259015594306956, - "learning_rate": 1.3609642214051266e-06, - "loss": 0.942, - "step": 5118 - }, - { - "epoch": 0.6155233571815066, - "grad_norm": 1.6978946130486583, - "learning_rate": 1.3602261317509385e-06, - "loss": 0.8611, - "step": 5119 - }, - { - "epoch": 0.6156436000721457, - "grad_norm": 2.4693072692032456, - "learning_rate": 1.3594881391499387e-06, - "loss": 1.0178, - "step": 5120 - }, - { - "epoch": 0.6157638429627849, - "grad_norm": 1.5743966203259814, - "learning_rate": 1.3587502437140778e-06, - "loss": 0.9913, - "step": 5121 - }, - { - "epoch": 0.6158840858534239, - "grad_norm": 2.1196149022839914, - "learning_rate": 1.3580124455552952e-06, - "loss": 1.0526, - "step": 5122 - }, - { - "epoch": 0.616004328744063, - "grad_norm": 1.7829284790722304, - "learning_rate": 1.3572747447855148e-06, - "loss": 1.0621, - "step": 5123 - }, - { - "epoch": 0.6161245716347021, - "grad_norm": 1.8597754039319017, - "learning_rate": 1.356537141516644e-06, - "loss": 0.89, - "step": 5124 - }, - { - "epoch": 0.6162448145253412, - "grad_norm": 1.7353835738905254, - "learning_rate": 1.3557996358605775e-06, - "loss": 0.8161, - "step": 5125 - }, - { - "epoch": 0.6163650574159802, - "grad_norm": 2.2289763633806263, - "learning_rate": 1.3550622279291941e-06, - "loss": 0.9005, - "step": 5126 - }, - { - "epoch": 0.6164853003066194, - "grad_norm": 1.2806456931784858, - "learning_rate": 1.354324917834358e-06, - "loss": 1.0263, - "step": 5127 - }, - { - "epoch": 0.6166055431972585, - "grad_norm": 1.5473377036679823, - "learning_rate": 1.353587705687918e-06, - "loss": 0.9583, - "step": 5128 - }, - { - "epoch": 0.6167257860878975, - "grad_norm": 2.311337049711042, - "learning_rate": 1.3528505916017096e-06, - "loss": 0.9264, - "step": 5129 - }, - { - "epoch": 0.6168460289785367, - "grad_norm": 2.2461986218723773, - "learning_rate": 1.3521135756875514e-06, - "loss": 1.077, - "step": 5130 - }, - { - "epoch": 0.6169662718691757, - "grad_norm": 1.7237332322907497, - "learning_rate": 1.3513766580572496e-06, - "loss": 1.0546, - "step": 5131 - }, - { - "epoch": 0.6170865147598148, - "grad_norm": 2.082465894037427, - "learning_rate": 1.3506398388225924e-06, - "loss": 0.9691, - "step": 5132 - }, - { - "epoch": 0.617206757650454, - "grad_norm": 1.6405730466336885, - "learning_rate": 1.349903118095355e-06, - "loss": 0.908, - "step": 5133 - }, - { - "epoch": 0.617327000541093, - "grad_norm": 1.7799941872919733, - "learning_rate": 1.349166495987298e-06, - "loss": 0.941, - "step": 5134 - }, - { - "epoch": 0.6174472434317321, - "grad_norm": 3.3044339707251416, - "learning_rate": 1.348429972610166e-06, - "loss": 0.8708, - "step": 5135 - }, - { - "epoch": 0.6175674863223712, - "grad_norm": 0.895391425470311, - "learning_rate": 1.3476935480756897e-06, - "loss": 0.8044, - "step": 5136 - }, - { - "epoch": 0.6176877292130103, - "grad_norm": 1.9554597251644208, - "learning_rate": 1.346957222495583e-06, - "loss": 0.9538, - "step": 5137 - }, - { - "epoch": 0.6178079721036493, - "grad_norm": 2.744886759800338, - "learning_rate": 1.3462209959815466e-06, - "loss": 0.9153, - "step": 5138 - }, - { - "epoch": 0.6179282149942885, - "grad_norm": 1.5794455413704225, - "learning_rate": 1.345484868645265e-06, - "loss": 0.9342, - "step": 5139 - }, - { - "epoch": 0.6180484578849276, - "grad_norm": 1.8437041584070415, - "learning_rate": 1.3447488405984088e-06, - "loss": 0.976, - "step": 5140 - }, - { - "epoch": 0.6181687007755666, - "grad_norm": 2.1906476943381903, - "learning_rate": 1.3440129119526322e-06, - "loss": 0.8901, - "step": 5141 - }, - { - "epoch": 0.6182889436662057, - "grad_norm": 1.016094979337218, - "learning_rate": 1.3432770828195762e-06, - "loss": 0.7623, - "step": 5142 - }, - { - "epoch": 0.6184091865568448, - "grad_norm": 2.370433935526171, - "learning_rate": 1.3425413533108635e-06, - "loss": 0.9142, - "step": 5143 - }, - { - "epoch": 0.6185294294474839, - "grad_norm": 6.315523839684948, - "learning_rate": 1.341805723538105e-06, - "loss": 0.9063, - "step": 5144 - }, - { - "epoch": 0.618649672338123, - "grad_norm": 1.3712589783839686, - "learning_rate": 1.3410701936128948e-06, - "loss": 0.972, - "step": 5145 - }, - { - "epoch": 0.6187699152287621, - "grad_norm": 2.7472730467538917, - "learning_rate": 1.340334763646812e-06, - "loss": 1.057, - "step": 5146 - }, - { - "epoch": 0.6188901581194012, - "grad_norm": 1.5065772179970869, - "learning_rate": 1.3395994337514218e-06, - "loss": 0.9474, - "step": 5147 - }, - { - "epoch": 0.6190104010100402, - "grad_norm": 1.5158124984699057, - "learning_rate": 1.3388642040382725e-06, - "loss": 0.9757, - "step": 5148 - }, - { - "epoch": 0.6191306439006794, - "grad_norm": 1.6118933963502393, - "learning_rate": 1.3381290746188975e-06, - "loss": 1.0401, - "step": 5149 - }, - { - "epoch": 0.6192508867913185, - "grad_norm": 1.6521192177158095, - "learning_rate": 1.3373940456048152e-06, - "loss": 0.8772, - "step": 5150 - }, - { - "epoch": 0.6193711296819575, - "grad_norm": 1.513698651476875, - "learning_rate": 1.3366591171075299e-06, - "loss": 0.7927, - "step": 5151 - }, - { - "epoch": 0.6194913725725967, - "grad_norm": 1.6529510319974752, - "learning_rate": 1.335924289238529e-06, - "loss": 1.1032, - "step": 5152 - }, - { - "epoch": 0.6196116154632357, - "grad_norm": 1.4737111166240249, - "learning_rate": 1.3351895621092859e-06, - "loss": 0.9608, - "step": 5153 - }, - { - "epoch": 0.6197318583538748, - "grad_norm": 1.7559845638943568, - "learning_rate": 1.3344549358312567e-06, - "loss": 0.9702, - "step": 5154 - }, - { - "epoch": 0.619852101244514, - "grad_norm": 1.8775832500871141, - "learning_rate": 1.3337204105158852e-06, - "loss": 0.9813, - "step": 5155 - }, - { - "epoch": 0.619972344135153, - "grad_norm": 1.781631629618372, - "learning_rate": 1.332985986274597e-06, - "loss": 0.9252, - "step": 5156 - }, - { - "epoch": 0.6200925870257921, - "grad_norm": 2.0023425211997545, - "learning_rate": 1.3322516632188047e-06, - "loss": 0.9479, - "step": 5157 - }, - { - "epoch": 0.6202128299164312, - "grad_norm": 1.576347029359403, - "learning_rate": 1.3315174414599045e-06, - "loss": 0.8729, - "step": 5158 - }, - { - "epoch": 0.6203330728070703, - "grad_norm": 1.975074660423146, - "learning_rate": 1.3307833211092768e-06, - "loss": 0.9526, - "step": 5159 - }, - { - "epoch": 0.6204533156977093, - "grad_norm": 1.6051370319800475, - "learning_rate": 1.3300493022782873e-06, - "loss": 0.9465, - "step": 5160 - }, - { - "epoch": 0.6205735585883485, - "grad_norm": 3.701696707924091, - "learning_rate": 1.3293153850782855e-06, - "loss": 0.9232, - "step": 5161 - }, - { - "epoch": 0.6206938014789876, - "grad_norm": 2.0301535361345313, - "learning_rate": 1.3285815696206069e-06, - "loss": 0.9144, - "step": 5162 - }, - { - "epoch": 0.6208140443696266, - "grad_norm": 1.7762310863027486, - "learning_rate": 1.32784785601657e-06, - "loss": 0.9655, - "step": 5163 - }, - { - "epoch": 0.6209342872602658, - "grad_norm": 1.6322703944582835, - "learning_rate": 1.3271142443774798e-06, - "loss": 0.9402, - "step": 5164 - }, - { - "epoch": 0.6210545301509048, - "grad_norm": 2.169545193312459, - "learning_rate": 1.3263807348146228e-06, - "loss": 1.0182, - "step": 5165 - }, - { - "epoch": 0.6211747730415439, - "grad_norm": 1.86116313703454, - "learning_rate": 1.3256473274392733e-06, - "loss": 0.9401, - "step": 5166 - }, - { - "epoch": 0.6212950159321831, - "grad_norm": 1.943006504552519, - "learning_rate": 1.3249140223626873e-06, - "loss": 0.8978, - "step": 5167 - }, - { - "epoch": 0.6214152588228221, - "grad_norm": 1.7980506036700286, - "learning_rate": 1.3241808196961077e-06, - "loss": 0.9611, - "step": 5168 - }, - { - "epoch": 0.6215355017134612, - "grad_norm": 1.6747279558315438, - "learning_rate": 1.3234477195507608e-06, - "loss": 0.9165, - "step": 5169 - }, - { - "epoch": 0.6216557446041003, - "grad_norm": 2.7778999640824393, - "learning_rate": 1.322714722037857e-06, - "loss": 0.8349, - "step": 5170 - }, - { - "epoch": 0.6217759874947394, - "grad_norm": 2.079694112513553, - "learning_rate": 1.321981827268591e-06, - "loss": 0.9718, - "step": 5171 - }, - { - "epoch": 0.6218962303853784, - "grad_norm": 1.5926814517573344, - "learning_rate": 1.3212490353541426e-06, - "loss": 1.0145, - "step": 5172 - }, - { - "epoch": 0.6220164732760175, - "grad_norm": 1.6940628579242598, - "learning_rate": 1.3205163464056762e-06, - "loss": 0.9988, - "step": 5173 - }, - { - "epoch": 0.6221367161666567, - "grad_norm": 1.7869909695448898, - "learning_rate": 1.319783760534339e-06, - "loss": 0.9231, - "step": 5174 - }, - { - "epoch": 0.6222569590572957, - "grad_norm": 1.9636108291816925, - "learning_rate": 1.319051277851266e-06, - "loss": 0.9559, - "step": 5175 - }, - { - "epoch": 0.6223772019479348, - "grad_norm": 1.7914954554643623, - "learning_rate": 1.3183188984675716e-06, - "loss": 1.0419, - "step": 5176 - }, - { - "epoch": 0.6224974448385739, - "grad_norm": 1.976557554457437, - "learning_rate": 1.3175866224943586e-06, - "loss": 0.9169, - "step": 5177 - }, - { - "epoch": 0.622617687729213, - "grad_norm": 1.9693356808411888, - "learning_rate": 1.316854450042712e-06, - "loss": 0.9363, - "step": 5178 - }, - { - "epoch": 0.622737930619852, - "grad_norm": 1.8886364023216753, - "learning_rate": 1.3161223812237024e-06, - "loss": 0.9356, - "step": 5179 - }, - { - "epoch": 0.6228581735104912, - "grad_norm": 2.2637061049841325, - "learning_rate": 1.3153904161483842e-06, - "loss": 1.0507, - "step": 5180 - }, - { - "epoch": 0.6229784164011303, - "grad_norm": 1.9312438248717707, - "learning_rate": 1.3146585549277953e-06, - "loss": 1.0598, - "step": 5181 - }, - { - "epoch": 0.6230986592917693, - "grad_norm": 2.5927156890932364, - "learning_rate": 1.3139267976729591e-06, - "loss": 0.981, - "step": 5182 - }, - { - "epoch": 0.6232189021824085, - "grad_norm": 1.546098379289155, - "learning_rate": 1.3131951444948815e-06, - "loss": 0.9122, - "step": 5183 - }, - { - "epoch": 0.6233391450730476, - "grad_norm": 1.8259085888706201, - "learning_rate": 1.3124635955045546e-06, - "loss": 0.9542, - "step": 5184 - }, - { - "epoch": 0.6234593879636866, - "grad_norm": 2.2386154804039045, - "learning_rate": 1.3117321508129537e-06, - "loss": 1.0388, - "step": 5185 - }, - { - "epoch": 0.6235796308543258, - "grad_norm": 1.6093846293040963, - "learning_rate": 1.3110008105310388e-06, - "loss": 0.9619, - "step": 5186 - }, - { - "epoch": 0.6236998737449648, - "grad_norm": 1.4914296407648773, - "learning_rate": 1.3102695747697526e-06, - "loss": 0.9759, - "step": 5187 - }, - { - "epoch": 0.6238201166356039, - "grad_norm": 2.4889585809751953, - "learning_rate": 1.3095384436400237e-06, - "loss": 1.0965, - "step": 5188 - }, - { - "epoch": 0.623940359526243, - "grad_norm": 1.870927456850641, - "learning_rate": 1.3088074172527633e-06, - "loss": 1.0204, - "step": 5189 - }, - { - "epoch": 0.6240606024168821, - "grad_norm": 1.772803575589778, - "learning_rate": 1.3080764957188684e-06, - "loss": 0.9162, - "step": 5190 - }, - { - "epoch": 0.6241808453075212, - "grad_norm": 1.8845697963356896, - "learning_rate": 1.3073456791492192e-06, - "loss": 0.9039, - "step": 5191 - }, - { - "epoch": 0.6243010881981603, - "grad_norm": 1.7436738110376913, - "learning_rate": 1.3066149676546801e-06, - "loss": 0.9834, - "step": 5192 - }, - { - "epoch": 0.6244213310887994, - "grad_norm": 1.470911018906365, - "learning_rate": 1.3058843613460985e-06, - "loss": 0.8527, - "step": 5193 - }, - { - "epoch": 0.6245415739794384, - "grad_norm": 1.7640719039301425, - "learning_rate": 1.3051538603343075e-06, - "loss": 0.9428, - "step": 5194 - }, - { - "epoch": 0.6246618168700776, - "grad_norm": 1.8243582629291715, - "learning_rate": 1.3044234647301235e-06, - "loss": 0.8701, - "step": 5195 - }, - { - "epoch": 0.6247820597607167, - "grad_norm": 1.600809357500155, - "learning_rate": 1.303693174644347e-06, - "loss": 0.9152, - "step": 5196 - }, - { - "epoch": 0.6249023026513557, - "grad_norm": 2.199952242072796, - "learning_rate": 1.3029629901877625e-06, - "loss": 1.006, - "step": 5197 - }, - { - "epoch": 0.6250225455419949, - "grad_norm": 2.151319644383603, - "learning_rate": 1.3022329114711376e-06, - "loss": 0.9817, - "step": 5198 - }, - { - "epoch": 0.6251427884326339, - "grad_norm": 2.0655926723518676, - "learning_rate": 1.3015029386052256e-06, - "loss": 0.899, - "step": 5199 - }, - { - "epoch": 0.625263031323273, - "grad_norm": 1.9055054257950501, - "learning_rate": 1.3007730717007622e-06, - "loss": 0.9275, - "step": 5200 - }, - { - "epoch": 0.6253832742139122, - "grad_norm": 1.6373639348698268, - "learning_rate": 1.3000433108684676e-06, - "loss": 0.9551, - "step": 5201 - }, - { - "epoch": 0.6255035171045512, - "grad_norm": 2.631915836855358, - "learning_rate": 1.2993136562190467e-06, - "loss": 0.9975, - "step": 5202 - }, - { - "epoch": 0.6256237599951903, - "grad_norm": 1.4326151072273154, - "learning_rate": 1.2985841078631871e-06, - "loss": 0.9018, - "step": 5203 - }, - { - "epoch": 0.6257440028858293, - "grad_norm": 1.6182200503289474, - "learning_rate": 1.2978546659115608e-06, - "loss": 0.9822, - "step": 5204 - }, - { - "epoch": 0.6258642457764685, - "grad_norm": 1.813356197308869, - "learning_rate": 1.2971253304748228e-06, - "loss": 1.0547, - "step": 5205 - }, - { - "epoch": 0.6259844886671075, - "grad_norm": 1.5677595499824835, - "learning_rate": 1.296396101663614e-06, - "loss": 0.955, - "step": 5206 - }, - { - "epoch": 0.6261047315577466, - "grad_norm": 2.0352301703151143, - "learning_rate": 1.2956669795885565e-06, - "loss": 1.0385, - "step": 5207 - }, - { - "epoch": 0.6262249744483858, - "grad_norm": 1.9513164233791427, - "learning_rate": 1.294937964360259e-06, - "loss": 0.8928, - "step": 5208 - }, - { - "epoch": 0.6263452173390248, - "grad_norm": 3.929821944923056, - "learning_rate": 1.2942090560893108e-06, - "loss": 0.8969, - "step": 5209 - }, - { - "epoch": 0.6264654602296639, - "grad_norm": 1.6263604704671322, - "learning_rate": 1.2934802548862882e-06, - "loss": 0.8033, - "step": 5210 - }, - { - "epoch": 0.626585703120303, - "grad_norm": 1.891590470895463, - "learning_rate": 1.292751560861749e-06, - "loss": 1.0263, - "step": 5211 - }, - { - "epoch": 0.6267059460109421, - "grad_norm": 1.8791305186085896, - "learning_rate": 1.2920229741262354e-06, - "loss": 1.0008, - "step": 5212 - }, - { - "epoch": 0.6268261889015811, - "grad_norm": 1.96018754820849, - "learning_rate": 1.2912944947902739e-06, - "loss": 0.944, - "step": 5213 - }, - { - "epoch": 0.6269464317922203, - "grad_norm": 2.3306715178508304, - "learning_rate": 1.2905661229643742e-06, - "loss": 0.9148, - "step": 5214 - }, - { - "epoch": 0.6270666746828594, - "grad_norm": 2.1565971647891864, - "learning_rate": 1.2898378587590299e-06, - "loss": 1.0396, - "step": 5215 - }, - { - "epoch": 0.6271869175734984, - "grad_norm": 1.9569958590355025, - "learning_rate": 1.2891097022847173e-06, - "loss": 1.0716, - "step": 5216 - }, - { - "epoch": 0.6273071604641376, - "grad_norm": 1.8604809333092875, - "learning_rate": 1.2883816536518978e-06, - "loss": 0.8731, - "step": 5217 - }, - { - "epoch": 0.6274274033547766, - "grad_norm": 1.6415789056288954, - "learning_rate": 1.2876537129710155e-06, - "loss": 1.0184, - "step": 5218 - }, - { - "epoch": 0.6275476462454157, - "grad_norm": 2.373161708737753, - "learning_rate": 1.286925880352499e-06, - "loss": 0.9437, - "step": 5219 - }, - { - "epoch": 0.6276678891360549, - "grad_norm": 1.5492486947955793, - "learning_rate": 1.2861981559067592e-06, - "loss": 0.8974, - "step": 5220 - }, - { - "epoch": 0.6277881320266939, - "grad_norm": 1.9790361930270899, - "learning_rate": 1.2854705397441917e-06, - "loss": 1.0023, - "step": 5221 - }, - { - "epoch": 0.627908374917333, - "grad_norm": 1.9790673348111611, - "learning_rate": 1.2847430319751747e-06, - "loss": 0.9733, - "step": 5222 - }, - { - "epoch": 0.6280286178079721, - "grad_norm": 2.069620153433545, - "learning_rate": 1.2840156327100712e-06, - "loss": 0.8755, - "step": 5223 - }, - { - "epoch": 0.6281488606986112, - "grad_norm": 1.640675425889867, - "learning_rate": 1.2832883420592272e-06, - "loss": 0.9189, - "step": 5224 - }, - { - "epoch": 0.6282691035892503, - "grad_norm": 1.825329852669079, - "learning_rate": 1.282561160132972e-06, - "loss": 0.8434, - "step": 5225 - }, - { - "epoch": 0.6283893464798894, - "grad_norm": 1.6182266066716864, - "learning_rate": 1.2818340870416186e-06, - "loss": 1.0087, - "step": 5226 - }, - { - "epoch": 0.6285095893705285, - "grad_norm": 1.5842728922368192, - "learning_rate": 1.2811071228954626e-06, - "loss": 0.9536, - "step": 5227 - }, - { - "epoch": 0.6286298322611675, - "grad_norm": 1.8476459269003371, - "learning_rate": 1.2803802678047846e-06, - "loss": 1.0164, - "step": 5228 - }, - { - "epoch": 0.6287500751518067, - "grad_norm": 1.9209106631100845, - "learning_rate": 1.279653521879848e-06, - "loss": 0.9358, - "step": 5229 - }, - { - "epoch": 0.6288703180424458, - "grad_norm": 1.8480814470739435, - "learning_rate": 1.2789268852308997e-06, - "loss": 1.0364, - "step": 5230 - }, - { - "epoch": 0.6289905609330848, - "grad_norm": 1.8149687955081246, - "learning_rate": 1.2782003579681688e-06, - "loss": 0.9005, - "step": 5231 - }, - { - "epoch": 0.629110803823724, - "grad_norm": 1.5815727417167091, - "learning_rate": 1.2774739402018701e-06, - "loss": 0.9424, - "step": 5232 - }, - { - "epoch": 0.629231046714363, - "grad_norm": 2.3926300416880038, - "learning_rate": 1.2767476320422002e-06, - "loss": 0.937, - "step": 5233 - }, - { - "epoch": 0.6293512896050021, - "grad_norm": 0.8460577734345774, - "learning_rate": 1.2760214335993392e-06, - "loss": 0.793, - "step": 5234 - }, - { - "epoch": 0.6294715324956413, - "grad_norm": 1.8051378092900732, - "learning_rate": 1.2752953449834514e-06, - "loss": 0.7859, - "step": 5235 - }, - { - "epoch": 0.6295917753862803, - "grad_norm": 1.6509999250877543, - "learning_rate": 1.2745693663046836e-06, - "loss": 0.9979, - "step": 5236 - }, - { - "epoch": 0.6297120182769194, - "grad_norm": 2.022265827782429, - "learning_rate": 1.2738434976731662e-06, - "loss": 0.9956, - "step": 5237 - }, - { - "epoch": 0.6298322611675584, - "grad_norm": 1.6632729389920133, - "learning_rate": 1.2731177391990125e-06, - "loss": 0.9516, - "step": 5238 - }, - { - "epoch": 0.6299525040581976, - "grad_norm": 1.88740991289002, - "learning_rate": 1.2723920909923203e-06, - "loss": 1.0191, - "step": 5239 - }, - { - "epoch": 0.6300727469488366, - "grad_norm": 0.9398528139053077, - "learning_rate": 1.2716665531631688e-06, - "loss": 0.8481, - "step": 5240 - }, - { - "epoch": 0.6301929898394757, - "grad_norm": 1.7616091269073904, - "learning_rate": 1.270941125821623e-06, - "loss": 0.9706, - "step": 5241 - }, - { - "epoch": 0.6303132327301149, - "grad_norm": 1.6802055092409653, - "learning_rate": 1.2702158090777278e-06, - "loss": 0.9613, - "step": 5242 - }, - { - "epoch": 0.6304334756207539, - "grad_norm": 1.619894369943729, - "learning_rate": 1.2694906030415148e-06, - "loss": 0.9491, - "step": 5243 - }, - { - "epoch": 0.630553718511393, - "grad_norm": 2.176729772839818, - "learning_rate": 1.2687655078229958e-06, - "loss": 1.017, - "step": 5244 - }, - { - "epoch": 0.6306739614020321, - "grad_norm": 1.9854010985312138, - "learning_rate": 1.2680405235321678e-06, - "loss": 0.8979, - "step": 5245 - }, - { - "epoch": 0.6307942042926712, - "grad_norm": 1.8707469387822109, - "learning_rate": 1.267315650279011e-06, - "loss": 0.986, - "step": 5246 - }, - { - "epoch": 0.6309144471833102, - "grad_norm": 1.9401495260444863, - "learning_rate": 1.2665908881734874e-06, - "loss": 0.9491, - "step": 5247 - }, - { - "epoch": 0.6310346900739494, - "grad_norm": 1.980056629527053, - "learning_rate": 1.2658662373255432e-06, - "loss": 1.0522, - "step": 5248 - }, - { - "epoch": 0.6311549329645885, - "grad_norm": 0.908598979695457, - "learning_rate": 1.2651416978451063e-06, - "loss": 0.7677, - "step": 5249 - }, - { - "epoch": 0.6312751758552275, - "grad_norm": 1.6441455669695861, - "learning_rate": 1.2644172698420903e-06, - "loss": 0.8483, - "step": 5250 - }, - { - "epoch": 0.6313954187458667, - "grad_norm": 1.8389403763689474, - "learning_rate": 1.2636929534263892e-06, - "loss": 1.043, - "step": 5251 - }, - { - "epoch": 0.6315156616365057, - "grad_norm": 1.869197577522646, - "learning_rate": 1.2629687487078821e-06, - "loss": 0.9688, - "step": 5252 - }, - { - "epoch": 0.6316359045271448, - "grad_norm": 2.00823447695146, - "learning_rate": 1.2622446557964293e-06, - "loss": 0.963, - "step": 5253 - }, - { - "epoch": 0.631756147417784, - "grad_norm": 1.698958383541079, - "learning_rate": 1.261520674801876e-06, - "loss": 0.8999, - "step": 5254 - }, - { - "epoch": 0.631876390308423, - "grad_norm": 2.059575058708673, - "learning_rate": 1.2607968058340488e-06, - "loss": 0.9257, - "step": 5255 - }, - { - "epoch": 0.6319966331990621, - "grad_norm": 1.8219970636061489, - "learning_rate": 1.2600730490027583e-06, - "loss": 0.9315, - "step": 5256 - }, - { - "epoch": 0.6321168760897012, - "grad_norm": 1.5132684379196764, - "learning_rate": 1.2593494044177984e-06, - "loss": 1.003, - "step": 5257 - }, - { - "epoch": 0.6322371189803403, - "grad_norm": 1.930680610149748, - "learning_rate": 1.2586258721889448e-06, - "loss": 0.9938, - "step": 5258 - }, - { - "epoch": 0.6323573618709794, - "grad_norm": 1.8241876440234732, - "learning_rate": 1.2579024524259573e-06, - "loss": 1.0117, - "step": 5259 - }, - { - "epoch": 0.6324776047616185, - "grad_norm": 1.7272383113790797, - "learning_rate": 1.2571791452385768e-06, - "loss": 1.1118, - "step": 5260 - }, - { - "epoch": 0.6325978476522576, - "grad_norm": 1.5089733023635952, - "learning_rate": 1.2564559507365301e-06, - "loss": 0.9655, - "step": 5261 - }, - { - "epoch": 0.6327180905428966, - "grad_norm": 1.865140678888506, - "learning_rate": 1.2557328690295244e-06, - "loss": 0.9876, - "step": 5262 - }, - { - "epoch": 0.6328383334335358, - "grad_norm": 1.6724078318655426, - "learning_rate": 1.255009900227251e-06, - "loss": 0.9552, - "step": 5263 - }, - { - "epoch": 0.6329585763241748, - "grad_norm": 1.7251222567149853, - "learning_rate": 1.254287044439383e-06, - "loss": 0.9907, - "step": 5264 - }, - { - "epoch": 0.6330788192148139, - "grad_norm": 0.9471700718458421, - "learning_rate": 1.2535643017755776e-06, - "loss": 0.7623, - "step": 5265 - }, - { - "epoch": 0.6331990621054531, - "grad_norm": 2.7783246890068782, - "learning_rate": 1.2528416723454737e-06, - "loss": 0.9225, - "step": 5266 - }, - { - "epoch": 0.6333193049960921, - "grad_norm": 1.594698380678536, - "learning_rate": 1.2521191562586945e-06, - "loss": 0.9075, - "step": 5267 - }, - { - "epoch": 0.6334395478867312, - "grad_norm": 1.8093860447996166, - "learning_rate": 1.2513967536248445e-06, - "loss": 0.9713, - "step": 5268 - }, - { - "epoch": 0.6335597907773702, - "grad_norm": 1.7407160134208803, - "learning_rate": 1.2506744645535117e-06, - "loss": 1.0099, - "step": 5269 - }, - { - "epoch": 0.6336800336680094, - "grad_norm": 1.7585996772601828, - "learning_rate": 1.249952289154267e-06, - "loss": 0.7981, - "step": 5270 - }, - { - "epoch": 0.6338002765586485, - "grad_norm": 1.5100720330292636, - "learning_rate": 1.2492302275366635e-06, - "loss": 0.9591, - "step": 5271 - }, - { - "epoch": 0.6339205194492875, - "grad_norm": 2.3385834778670787, - "learning_rate": 1.2485082798102377e-06, - "loss": 0.8592, - "step": 5272 - }, - { - "epoch": 0.6340407623399267, - "grad_norm": 2.886531591478097, - "learning_rate": 1.2477864460845084e-06, - "loss": 0.8874, - "step": 5273 - }, - { - "epoch": 0.6341610052305657, - "grad_norm": 3.1644571693958694, - "learning_rate": 1.2470647264689776e-06, - "loss": 0.9425, - "step": 5274 - }, - { - "epoch": 0.6342812481212048, - "grad_norm": 2.067718031802041, - "learning_rate": 1.2463431210731282e-06, - "loss": 0.914, - "step": 5275 - }, - { - "epoch": 0.634401491011844, - "grad_norm": 2.090039509669025, - "learning_rate": 1.2456216300064289e-06, - "loss": 0.9616, - "step": 5276 - }, - { - "epoch": 0.634521733902483, - "grad_norm": 1.50165276072531, - "learning_rate": 1.244900253378328e-06, - "loss": 0.9792, - "step": 5277 - }, - { - "epoch": 0.6346419767931221, - "grad_norm": 1.795891135835143, - "learning_rate": 1.2441789912982583e-06, - "loss": 0.898, - "step": 5278 - }, - { - "epoch": 0.6347622196837612, - "grad_norm": 1.7187764946022555, - "learning_rate": 1.2434578438756346e-06, - "loss": 0.8461, - "step": 5279 - }, - { - "epoch": 0.6348824625744003, - "grad_norm": 1.8068260963707174, - "learning_rate": 1.242736811219855e-06, - "loss": 0.9829, - "step": 5280 - }, - { - "epoch": 0.6350027054650393, - "grad_norm": 1.5816655242535265, - "learning_rate": 1.2420158934402988e-06, - "loss": 1.015, - "step": 5281 - }, - { - "epoch": 0.6351229483556785, - "grad_norm": 1.7393404701635278, - "learning_rate": 1.2412950906463286e-06, - "loss": 1.0409, - "step": 5282 - }, - { - "epoch": 0.6352431912463176, - "grad_norm": 1.6317016619370452, - "learning_rate": 1.2405744029472902e-06, - "loss": 1.0938, - "step": 5283 - }, - { - "epoch": 0.6353634341369566, - "grad_norm": 1.8245623834659799, - "learning_rate": 1.2398538304525108e-06, - "loss": 0.956, - "step": 5284 - }, - { - "epoch": 0.6354836770275958, - "grad_norm": 1.8953002627387303, - "learning_rate": 1.2391333732713016e-06, - "loss": 0.9581, - "step": 5285 - }, - { - "epoch": 0.6356039199182348, - "grad_norm": 1.8902105712653139, - "learning_rate": 1.2384130315129543e-06, - "loss": 0.9763, - "step": 5286 - }, - { - "epoch": 0.6357241628088739, - "grad_norm": 1.988848652098183, - "learning_rate": 1.2376928052867447e-06, - "loss": 0.9288, - "step": 5287 - }, - { - "epoch": 0.6358444056995131, - "grad_norm": 1.9580574923560294, - "learning_rate": 1.2369726947019299e-06, - "loss": 0.982, - "step": 5288 - }, - { - "epoch": 0.6359646485901521, - "grad_norm": 1.9326075649552008, - "learning_rate": 1.2362526998677511e-06, - "loss": 0.8607, - "step": 5289 - }, - { - "epoch": 0.6360848914807912, - "grad_norm": 1.8842328677497502, - "learning_rate": 1.2355328208934301e-06, - "loss": 1.0453, - "step": 5290 - }, - { - "epoch": 0.6362051343714303, - "grad_norm": 1.5928050212897833, - "learning_rate": 1.2348130578881728e-06, - "loss": 0.9292, - "step": 5291 - }, - { - "epoch": 0.6363253772620694, - "grad_norm": 1.8468682380978492, - "learning_rate": 1.2340934109611664e-06, - "loss": 0.9592, - "step": 5292 - }, - { - "epoch": 0.6364456201527084, - "grad_norm": 2.8447208633653864, - "learning_rate": 1.2333738802215798e-06, - "loss": 0.8818, - "step": 5293 - }, - { - "epoch": 0.6365658630433476, - "grad_norm": 1.6835282527636397, - "learning_rate": 1.2326544657785668e-06, - "loss": 1.0102, - "step": 5294 - }, - { - "epoch": 0.6366861059339867, - "grad_norm": 2.2411955124401106, - "learning_rate": 1.2319351677412608e-06, - "loss": 0.9455, - "step": 5295 - }, - { - "epoch": 0.6368063488246257, - "grad_norm": 1.6582425206046048, - "learning_rate": 1.2312159862187796e-06, - "loss": 0.9409, - "step": 5296 - }, - { - "epoch": 0.6369265917152649, - "grad_norm": 1.5539621932832492, - "learning_rate": 1.2304969213202217e-06, - "loss": 0.9586, - "step": 5297 - }, - { - "epoch": 0.6370468346059039, - "grad_norm": 2.7274974542565706, - "learning_rate": 1.2297779731546692e-06, - "loss": 0.9903, - "step": 5298 - }, - { - "epoch": 0.637167077496543, - "grad_norm": 1.856404279469097, - "learning_rate": 1.2290591418311853e-06, - "loss": 0.9831, - "step": 5299 - }, - { - "epoch": 0.637287320387182, - "grad_norm": 1.5511648630883368, - "learning_rate": 1.2283404274588172e-06, - "loss": 0.9112, - "step": 5300 - }, - { - "epoch": 0.6374075632778212, - "grad_norm": 0.7956706276850822, - "learning_rate": 1.227621830146592e-06, - "loss": 0.7482, - "step": 5301 - }, - { - "epoch": 0.6375278061684603, - "grad_norm": 1.8636336083402376, - "learning_rate": 1.2269033500035217e-06, - "loss": 0.9903, - "step": 5302 - }, - { - "epoch": 0.6376480490590993, - "grad_norm": 2.02987448261996, - "learning_rate": 1.2261849871385988e-06, - "loss": 0.9435, - "step": 5303 - }, - { - "epoch": 0.6377682919497385, - "grad_norm": 1.884659680170838, - "learning_rate": 1.2254667416607972e-06, - "loss": 0.8173, - "step": 5304 - }, - { - "epoch": 0.6378885348403776, - "grad_norm": 1.6143321016159784, - "learning_rate": 1.2247486136790756e-06, - "loss": 1.0347, - "step": 5305 - }, - { - "epoch": 0.6380087777310166, - "grad_norm": 1.843791314648175, - "learning_rate": 1.2240306033023726e-06, - "loss": 1.0078, - "step": 5306 - }, - { - "epoch": 0.6381290206216558, - "grad_norm": 1.6360099186211874, - "learning_rate": 1.223312710639611e-06, - "loss": 0.9241, - "step": 5307 - }, - { - "epoch": 0.6382492635122948, - "grad_norm": 2.3952636594328114, - "learning_rate": 1.2225949357996928e-06, - "loss": 1.0621, - "step": 5308 - }, - { - "epoch": 0.6383695064029339, - "grad_norm": 1.503343273660032, - "learning_rate": 1.221877278891505e-06, - "loss": 0.9993, - "step": 5309 - }, - { - "epoch": 0.638489749293573, - "grad_norm": 1.84193654506703, - "learning_rate": 1.221159740023915e-06, - "loss": 0.9074, - "step": 5310 - }, - { - "epoch": 0.6386099921842121, - "grad_norm": 1.907882866588851, - "learning_rate": 1.2204423193057735e-06, - "loss": 0.9382, - "step": 5311 - }, - { - "epoch": 0.6387302350748512, - "grad_norm": 0.9863732960243575, - "learning_rate": 1.2197250168459122e-06, - "loss": 0.8831, - "step": 5312 - }, - { - "epoch": 0.6388504779654903, - "grad_norm": 1.9215217203604587, - "learning_rate": 1.2190078327531454e-06, - "loss": 0.949, - "step": 5313 - }, - { - "epoch": 0.6389707208561294, - "grad_norm": 1.3494110182477828, - "learning_rate": 1.2182907671362697e-06, - "loss": 0.9256, - "step": 5314 - }, - { - "epoch": 0.6390909637467684, - "grad_norm": 1.9487302852316062, - "learning_rate": 1.2175738201040626e-06, - "loss": 0.9823, - "step": 5315 - }, - { - "epoch": 0.6392112066374076, - "grad_norm": 1.7067734023831398, - "learning_rate": 1.2168569917652855e-06, - "loss": 0.9855, - "step": 5316 - }, - { - "epoch": 0.6393314495280467, - "grad_norm": 1.4684981272201363, - "learning_rate": 1.2161402822286797e-06, - "loss": 0.8377, - "step": 5317 - }, - { - "epoch": 0.6394516924186857, - "grad_norm": 1.7829054117022098, - "learning_rate": 1.2154236916029703e-06, - "loss": 0.9927, - "step": 5318 - }, - { - "epoch": 0.6395719353093249, - "grad_norm": 2.2361056154656125, - "learning_rate": 1.2147072199968627e-06, - "loss": 0.9353, - "step": 5319 - }, - { - "epoch": 0.6396921781999639, - "grad_norm": 1.6667214225675377, - "learning_rate": 1.2139908675190454e-06, - "loss": 0.917, - "step": 5320 - }, - { - "epoch": 0.639812421090603, - "grad_norm": 1.7180184628179564, - "learning_rate": 1.2132746342781883e-06, - "loss": 0.9544, - "step": 5321 - }, - { - "epoch": 0.6399326639812422, - "grad_norm": 2.463846769788815, - "learning_rate": 1.2125585203829442e-06, - "loss": 0.9897, - "step": 5322 - }, - { - "epoch": 0.6400529068718812, - "grad_norm": 1.7783017437723234, - "learning_rate": 1.211842525941946e-06, - "loss": 0.9383, - "step": 5323 - }, - { - "epoch": 0.6401731497625203, - "grad_norm": 1.6515104749383749, - "learning_rate": 1.2111266510638105e-06, - "loss": 0.9869, - "step": 5324 - }, - { - "epoch": 0.6402933926531594, - "grad_norm": 1.847842830084535, - "learning_rate": 1.2104108958571346e-06, - "loss": 0.9993, - "step": 5325 - }, - { - "epoch": 0.6404136355437985, - "grad_norm": 1.4256985575067511, - "learning_rate": 1.2096952604304975e-06, - "loss": 0.9565, - "step": 5326 - }, - { - "epoch": 0.6405338784344375, - "grad_norm": 2.0848884373372543, - "learning_rate": 1.2089797448924616e-06, - "loss": 0.901, - "step": 5327 - }, - { - "epoch": 0.6406541213250767, - "grad_norm": 2.0582305182391347, - "learning_rate": 1.2082643493515692e-06, - "loss": 0.8506, - "step": 5328 - }, - { - "epoch": 0.6407743642157158, - "grad_norm": 1.6441913171857938, - "learning_rate": 1.207549073916346e-06, - "loss": 1.0163, - "step": 5329 - }, - { - "epoch": 0.6408946071063548, - "grad_norm": 2.0057502575754937, - "learning_rate": 1.2068339186952976e-06, - "loss": 0.986, - "step": 5330 - }, - { - "epoch": 0.6410148499969939, - "grad_norm": 1.6948359650315006, - "learning_rate": 1.2061188837969136e-06, - "loss": 0.9281, - "step": 5331 - }, - { - "epoch": 0.641135092887633, - "grad_norm": 2.710824222011911, - "learning_rate": 1.2054039693296631e-06, - "loss": 1.043, - "step": 5332 - }, - { - "epoch": 0.6412553357782721, - "grad_norm": 1.6146970729348462, - "learning_rate": 1.2046891754019992e-06, - "loss": 1.0076, - "step": 5333 - }, - { - "epoch": 0.6413755786689112, - "grad_norm": 1.9856431768472118, - "learning_rate": 1.2039745021223548e-06, - "loss": 1.02, - "step": 5334 - }, - { - "epoch": 0.6414958215595503, - "grad_norm": 0.946572716935823, - "learning_rate": 1.2032599495991456e-06, - "loss": 0.8151, - "step": 5335 - }, - { - "epoch": 0.6416160644501894, - "grad_norm": 2.3943040251462517, - "learning_rate": 1.2025455179407685e-06, - "loss": 0.8947, - "step": 5336 - }, - { - "epoch": 0.6417363073408284, - "grad_norm": 2.567028045810188, - "learning_rate": 1.2018312072556022e-06, - "loss": 0.9374, - "step": 5337 - }, - { - "epoch": 0.6418565502314676, - "grad_norm": 1.9496665547232233, - "learning_rate": 1.2011170176520077e-06, - "loss": 0.9491, - "step": 5338 - }, - { - "epoch": 0.6419767931221066, - "grad_norm": 1.6371427364677436, - "learning_rate": 1.2004029492383256e-06, - "loss": 1.0074, - "step": 5339 - }, - { - "epoch": 0.6420970360127457, - "grad_norm": 2.0342395790136028, - "learning_rate": 1.1996890021228814e-06, - "loss": 0.9383, - "step": 5340 - }, - { - "epoch": 0.6422172789033849, - "grad_norm": 1.4777344588971195, - "learning_rate": 1.1989751764139785e-06, - "loss": 0.8962, - "step": 5341 - }, - { - "epoch": 0.6423375217940239, - "grad_norm": 1.5508765664583524, - "learning_rate": 1.1982614722199044e-06, - "loss": 1.018, - "step": 5342 - }, - { - "epoch": 0.642457764684663, - "grad_norm": 2.080780970788318, - "learning_rate": 1.1975478896489276e-06, - "loss": 0.9842, - "step": 5343 - }, - { - "epoch": 0.6425780075753021, - "grad_norm": 1.782209305353094, - "learning_rate": 1.1968344288092981e-06, - "loss": 0.9672, - "step": 5344 - }, - { - "epoch": 0.6426982504659412, - "grad_norm": 1.535366209004208, - "learning_rate": 1.1961210898092468e-06, - "loss": 0.8437, - "step": 5345 - }, - { - "epoch": 0.6428184933565803, - "grad_norm": 2.0263625988276357, - "learning_rate": 1.1954078727569874e-06, - "loss": 1.0009, - "step": 5346 - }, - { - "epoch": 0.6429387362472194, - "grad_norm": 1.4981565273693092, - "learning_rate": 1.1946947777607141e-06, - "loss": 0.9771, - "step": 5347 - }, - { - "epoch": 0.6430589791378585, - "grad_norm": 1.638006755079004, - "learning_rate": 1.1939818049286024e-06, - "loss": 0.9941, - "step": 5348 - }, - { - "epoch": 0.6431792220284975, - "grad_norm": 1.4657142595712336, - "learning_rate": 1.1932689543688101e-06, - "loss": 0.9444, - "step": 5349 - }, - { - "epoch": 0.6432994649191367, - "grad_norm": 1.7567952243059073, - "learning_rate": 1.1925562261894756e-06, - "loss": 0.9271, - "step": 5350 - }, - { - "epoch": 0.6434197078097758, - "grad_norm": 1.6169632465948702, - "learning_rate": 1.1918436204987207e-06, - "loss": 0.9734, - "step": 5351 - }, - { - "epoch": 0.6435399507004148, - "grad_norm": 2.25350181613711, - "learning_rate": 1.191131137404645e-06, - "loss": 1.0174, - "step": 5352 - }, - { - "epoch": 0.643660193591054, - "grad_norm": 2.0246399359187763, - "learning_rate": 1.190418777015333e-06, - "loss": 0.9681, - "step": 5353 - }, - { - "epoch": 0.643780436481693, - "grad_norm": 1.8529693152145912, - "learning_rate": 1.1897065394388487e-06, - "loss": 0.9311, - "step": 5354 - }, - { - "epoch": 0.6439006793723321, - "grad_norm": 1.6644403449541263, - "learning_rate": 1.1889944247832385e-06, - "loss": 0.9637, - "step": 5355 - }, - { - "epoch": 0.6440209222629713, - "grad_norm": 1.9964236231371386, - "learning_rate": 1.1882824331565283e-06, - "loss": 0.9022, - "step": 5356 - }, - { - "epoch": 0.6441411651536103, - "grad_norm": 1.913691146015377, - "learning_rate": 1.1875705646667287e-06, - "loss": 1.0916, - "step": 5357 - }, - { - "epoch": 0.6442614080442494, - "grad_norm": 1.761690600523445, - "learning_rate": 1.1868588194218282e-06, - "loss": 0.948, - "step": 5358 - }, - { - "epoch": 0.6443816509348885, - "grad_norm": 1.483416277820222, - "learning_rate": 1.1861471975297979e-06, - "loss": 0.9389, - "step": 5359 - }, - { - "epoch": 0.6445018938255276, - "grad_norm": 1.4910388939720527, - "learning_rate": 1.185435699098591e-06, - "loss": 0.9105, - "step": 5360 - }, - { - "epoch": 0.6446221367161666, - "grad_norm": 2.0481329393975063, - "learning_rate": 1.1847243242361403e-06, - "loss": 0.977, - "step": 5361 - }, - { - "epoch": 0.6447423796068057, - "grad_norm": 1.7378826927437891, - "learning_rate": 1.1840130730503624e-06, - "loss": 0.9826, - "step": 5362 - }, - { - "epoch": 0.6448626224974449, - "grad_norm": 1.6140358109312525, - "learning_rate": 1.1833019456491518e-06, - "loss": 0.9547, - "step": 5363 - }, - { - "epoch": 0.6449828653880839, - "grad_norm": 2.100299668502037, - "learning_rate": 1.1825909421403871e-06, - "loss": 0.9775, - "step": 5364 - }, - { - "epoch": 0.645103108278723, - "grad_norm": 1.8987676388371395, - "learning_rate": 1.181880062631926e-06, - "loss": 0.9567, - "step": 5365 - }, - { - "epoch": 0.6452233511693621, - "grad_norm": 2.082616097647539, - "learning_rate": 1.1811693072316093e-06, - "loss": 1.0545, - "step": 5366 - }, - { - "epoch": 0.6453435940600012, - "grad_norm": 2.173520788794929, - "learning_rate": 1.1804586760472574e-06, - "loss": 1.0359, - "step": 5367 - }, - { - "epoch": 0.6454638369506402, - "grad_norm": 1.9694964492428826, - "learning_rate": 1.1797481691866736e-06, - "loss": 0.9868, - "step": 5368 - }, - { - "epoch": 0.6455840798412794, - "grad_norm": 1.9053388825814705, - "learning_rate": 1.1790377867576393e-06, - "loss": 1.0218, - "step": 5369 - }, - { - "epoch": 0.6457043227319185, - "grad_norm": 1.8471164035683563, - "learning_rate": 1.1783275288679203e-06, - "loss": 0.9613, - "step": 5370 - }, - { - "epoch": 0.6458245656225575, - "grad_norm": 0.9706679708152, - "learning_rate": 1.177617395625262e-06, - "loss": 0.8464, - "step": 5371 - }, - { - "epoch": 0.6459448085131967, - "grad_norm": 1.7426464348756645, - "learning_rate": 1.1769073871373908e-06, - "loss": 0.9558, - "step": 5372 - }, - { - "epoch": 0.6460650514038357, - "grad_norm": 1.6251715056073126, - "learning_rate": 1.176197503512015e-06, - "loss": 1.0392, - "step": 5373 - }, - { - "epoch": 0.6461852942944748, - "grad_norm": 2.59954178146886, - "learning_rate": 1.1754877448568223e-06, - "loss": 1.0219, - "step": 5374 - }, - { - "epoch": 0.646305537185114, - "grad_norm": 1.77029947947792, - "learning_rate": 1.1747781112794837e-06, - "loss": 1.1053, - "step": 5375 - }, - { - "epoch": 0.646425780075753, - "grad_norm": 1.4930614688174624, - "learning_rate": 1.1740686028876487e-06, - "loss": 1.022, - "step": 5376 - }, - { - "epoch": 0.6465460229663921, - "grad_norm": 1.982066275738601, - "learning_rate": 1.1733592197889507e-06, - "loss": 0.9455, - "step": 5377 - }, - { - "epoch": 0.6466662658570312, - "grad_norm": 1.7621384351258358, - "learning_rate": 1.1726499620910014e-06, - "loss": 0.9197, - "step": 5378 - }, - { - "epoch": 0.6467865087476703, - "grad_norm": 7.341364566448785, - "learning_rate": 1.1719408299013955e-06, - "loss": 0.9571, - "step": 5379 - }, - { - "epoch": 0.6469067516383094, - "grad_norm": 2.4584637515716494, - "learning_rate": 1.1712318233277067e-06, - "loss": 0.9629, - "step": 5380 - }, - { - "epoch": 0.6470269945289485, - "grad_norm": 0.7991009458528395, - "learning_rate": 1.1705229424774916e-06, - "loss": 0.7873, - "step": 5381 - }, - { - "epoch": 0.6471472374195876, - "grad_norm": 1.7779868502994185, - "learning_rate": 1.1698141874582867e-06, - "loss": 0.8357, - "step": 5382 - }, - { - "epoch": 0.6472674803102266, - "grad_norm": 1.582035620412559, - "learning_rate": 1.169105558377609e-06, - "loss": 0.9203, - "step": 5383 - }, - { - "epoch": 0.6473877232008658, - "grad_norm": 1.5753503712477905, - "learning_rate": 1.1683970553429587e-06, - "loss": 0.9792, - "step": 5384 - }, - { - "epoch": 0.6475079660915048, - "grad_norm": 1.8098349874317763, - "learning_rate": 1.1676886784618128e-06, - "loss": 1.0143, - "step": 5385 - }, - { - "epoch": 0.6476282089821439, - "grad_norm": 2.1742435411416747, - "learning_rate": 1.1669804278416332e-06, - "loss": 1.0324, - "step": 5386 - }, - { - "epoch": 0.6477484518727831, - "grad_norm": 1.6398637731310153, - "learning_rate": 1.1662723035898602e-06, - "loss": 0.913, - "step": 5387 - }, - { - "epoch": 0.6478686947634221, - "grad_norm": 1.8563116789052727, - "learning_rate": 1.165564305813915e-06, - "loss": 1.0175, - "step": 5388 - }, - { - "epoch": 0.6479889376540612, - "grad_norm": 1.5949419837373513, - "learning_rate": 1.1648564346212019e-06, - "loss": 1.0101, - "step": 5389 - }, - { - "epoch": 0.6481091805447003, - "grad_norm": 1.5906534218637756, - "learning_rate": 1.164148690119104e-06, - "loss": 0.9639, - "step": 5390 - }, - { - "epoch": 0.6482294234353394, - "grad_norm": 1.638293689831541, - "learning_rate": 1.163441072414985e-06, - "loss": 0.9389, - "step": 5391 - }, - { - "epoch": 0.6483496663259785, - "grad_norm": 1.9047321467686311, - "learning_rate": 1.16273358161619e-06, - "loss": 0.8939, - "step": 5392 - }, - { - "epoch": 0.6484699092166175, - "grad_norm": 1.6921726102869563, - "learning_rate": 1.1620262178300446e-06, - "loss": 1.0453, - "step": 5393 - }, - { - "epoch": 0.6485901521072567, - "grad_norm": 1.5745809815103142, - "learning_rate": 1.1613189811638563e-06, - "loss": 0.9531, - "step": 5394 - }, - { - "epoch": 0.6487103949978957, - "grad_norm": 1.568413720430438, - "learning_rate": 1.1606118717249117e-06, - "loss": 0.9775, - "step": 5395 - }, - { - "epoch": 0.6488306378885348, - "grad_norm": 1.9491052384283283, - "learning_rate": 1.1599048896204787e-06, - "loss": 0.8807, - "step": 5396 - }, - { - "epoch": 0.648950880779174, - "grad_norm": 1.7391709012009098, - "learning_rate": 1.1591980349578061e-06, - "loss": 1.0108, - "step": 5397 - }, - { - "epoch": 0.649071123669813, - "grad_norm": 0.8318946618809583, - "learning_rate": 1.158491307844123e-06, - "loss": 0.7695, - "step": 5398 - }, - { - "epoch": 0.6491913665604521, - "grad_norm": 1.829712203174232, - "learning_rate": 1.1577847083866387e-06, - "loss": 1.0446, - "step": 5399 - }, - { - "epoch": 0.6493116094510912, - "grad_norm": 1.7854159977962467, - "learning_rate": 1.1570782366925453e-06, - "loss": 0.9299, - "step": 5400 - }, - { - "epoch": 0.6494318523417303, - "grad_norm": 1.562848776991444, - "learning_rate": 1.1563718928690132e-06, - "loss": 0.9541, - "step": 5401 - }, - { - "epoch": 0.6495520952323693, - "grad_norm": 2.012909116684116, - "learning_rate": 1.1556656770231942e-06, - "loss": 0.9094, - "step": 5402 - }, - { - "epoch": 0.6496723381230085, - "grad_norm": 1.4334512166775968, - "learning_rate": 1.1549595892622207e-06, - "loss": 0.9626, - "step": 5403 - }, - { - "epoch": 0.6497925810136476, - "grad_norm": 0.9004139928205814, - "learning_rate": 1.1542536296932047e-06, - "loss": 0.8297, - "step": 5404 - }, - { - "epoch": 0.6499128239042866, - "grad_norm": 2.017903540809671, - "learning_rate": 1.1535477984232414e-06, - "loss": 0.8995, - "step": 5405 - }, - { - "epoch": 0.6500330667949258, - "grad_norm": 1.6385681337747957, - "learning_rate": 1.152842095559404e-06, - "loss": 0.9649, - "step": 5406 - }, - { - "epoch": 0.6501533096855648, - "grad_norm": 1.595387085489025, - "learning_rate": 1.1521365212087474e-06, - "loss": 0.9582, - "step": 5407 - }, - { - "epoch": 0.6502735525762039, - "grad_norm": 1.6912793118459473, - "learning_rate": 1.1514310754783062e-06, - "loss": 0.8976, - "step": 5408 - }, - { - "epoch": 0.6503937954668431, - "grad_norm": 2.0267847131975123, - "learning_rate": 1.1507257584750964e-06, - "loss": 0.9336, - "step": 5409 - }, - { - "epoch": 0.6505140383574821, - "grad_norm": 1.6923296304261142, - "learning_rate": 1.150020570306113e-06, - "loss": 0.9703, - "step": 5410 - }, - { - "epoch": 0.6506342812481212, - "grad_norm": 1.7416068850148567, - "learning_rate": 1.1493155110783338e-06, - "loss": 0.9479, - "step": 5411 - }, - { - "epoch": 0.6507545241387603, - "grad_norm": 1.9245203312319974, - "learning_rate": 1.1486105808987155e-06, - "loss": 0.9035, - "step": 5412 - }, - { - "epoch": 0.6508747670293994, - "grad_norm": 1.65557322080235, - "learning_rate": 1.1479057798741947e-06, - "loss": 1.0024, - "step": 5413 - }, - { - "epoch": 0.6509950099200384, - "grad_norm": 0.8946439180675141, - "learning_rate": 1.14720110811169e-06, - "loss": 0.775, - "step": 5414 - }, - { - "epoch": 0.6511152528106776, - "grad_norm": 1.9765439239955904, - "learning_rate": 1.146496565718098e-06, - "loss": 0.9625, - "step": 5415 - }, - { - "epoch": 0.6512354957013167, - "grad_norm": 2.3321208074040993, - "learning_rate": 1.1457921528002996e-06, - "loss": 0.9593, - "step": 5416 - }, - { - "epoch": 0.6513557385919557, - "grad_norm": 2.3719080825931065, - "learning_rate": 1.1450878694651522e-06, - "loss": 0.9172, - "step": 5417 - }, - { - "epoch": 0.6514759814825949, - "grad_norm": 2.0821739276716738, - "learning_rate": 1.1443837158194954e-06, - "loss": 0.8223, - "step": 5418 - }, - { - "epoch": 0.651596224373234, - "grad_norm": 1.4721442427866314, - "learning_rate": 1.1436796919701484e-06, - "loss": 0.935, - "step": 5419 - }, - { - "epoch": 0.651716467263873, - "grad_norm": 1.8684001480301748, - "learning_rate": 1.1429757980239115e-06, - "loss": 0.8195, - "step": 5420 - }, - { - "epoch": 0.6518367101545122, - "grad_norm": 4.974837694443867, - "learning_rate": 1.1422720340875636e-06, - "loss": 1.0097, - "step": 5421 - }, - { - "epoch": 0.6519569530451512, - "grad_norm": 2.5094350159191396, - "learning_rate": 1.1415684002678671e-06, - "loss": 0.991, - "step": 5422 - }, - { - "epoch": 0.6520771959357903, - "grad_norm": 2.225483629621137, - "learning_rate": 1.1408648966715617e-06, - "loss": 0.9861, - "step": 5423 - }, - { - "epoch": 0.6521974388264293, - "grad_norm": 1.7469429153042366, - "learning_rate": 1.1401615234053683e-06, - "loss": 0.9206, - "step": 5424 - }, - { - "epoch": 0.6523176817170685, - "grad_norm": 1.7303946244528874, - "learning_rate": 1.1394582805759885e-06, - "loss": 0.9572, - "step": 5425 - }, - { - "epoch": 0.6524379246077076, - "grad_norm": 1.598224492204108, - "learning_rate": 1.1387551682901022e-06, - "loss": 0.9596, - "step": 5426 - }, - { - "epoch": 0.6525581674983466, - "grad_norm": 1.8681462591652012, - "learning_rate": 1.138052186654373e-06, - "loss": 0.9098, - "step": 5427 - }, - { - "epoch": 0.6526784103889858, - "grad_norm": 1.8933255793296282, - "learning_rate": 1.1373493357754417e-06, - "loss": 1.0807, - "step": 5428 - }, - { - "epoch": 0.6527986532796248, - "grad_norm": 2.0008571695255504, - "learning_rate": 1.1366466157599303e-06, - "loss": 0.9709, - "step": 5429 - }, - { - "epoch": 0.6529188961702639, - "grad_norm": 2.1660054983137935, - "learning_rate": 1.1359440267144412e-06, - "loss": 0.9565, - "step": 5430 - }, - { - "epoch": 0.653039139060903, - "grad_norm": 1.7328855287592084, - "learning_rate": 1.1352415687455556e-06, - "loss": 0.9408, - "step": 5431 - }, - { - "epoch": 0.6531593819515421, - "grad_norm": 2.03105538242848, - "learning_rate": 1.1345392419598362e-06, - "loss": 0.8431, - "step": 5432 - }, - { - "epoch": 0.6532796248421812, - "grad_norm": 1.5695755021430127, - "learning_rate": 1.1338370464638263e-06, - "loss": 0.908, - "step": 5433 - }, - { - "epoch": 0.6533998677328203, - "grad_norm": 2.2901474396329964, - "learning_rate": 1.1331349823640474e-06, - "loss": 0.8368, - "step": 5434 - }, - { - "epoch": 0.6535201106234594, - "grad_norm": 2.066059624312866, - "learning_rate": 1.132433049767003e-06, - "loss": 0.9838, - "step": 5435 - }, - { - "epoch": 0.6536403535140984, - "grad_norm": 1.4604487289994164, - "learning_rate": 1.1317312487791748e-06, - "loss": 1.0033, - "step": 5436 - }, - { - "epoch": 0.6537605964047376, - "grad_norm": 2.3414328755954523, - "learning_rate": 1.1310295795070253e-06, - "loss": 0.9253, - "step": 5437 - }, - { - "epoch": 0.6538808392953767, - "grad_norm": 1.7286423803718134, - "learning_rate": 1.1303280420569982e-06, - "loss": 1.0027, - "step": 5438 - }, - { - "epoch": 0.6540010821860157, - "grad_norm": 1.735465729177519, - "learning_rate": 1.1296266365355158e-06, - "loss": 0.9657, - "step": 5439 - }, - { - "epoch": 0.6541213250766549, - "grad_norm": 2.5406450709527286, - "learning_rate": 1.1289253630489806e-06, - "loss": 0.9333, - "step": 5440 - }, - { - "epoch": 0.6542415679672939, - "grad_norm": 1.8802287628106555, - "learning_rate": 1.1282242217037753e-06, - "loss": 0.9431, - "step": 5441 - }, - { - "epoch": 0.654361810857933, - "grad_norm": 1.7072526102988728, - "learning_rate": 1.127523212606262e-06, - "loss": 0.8257, - "step": 5442 - }, - { - "epoch": 0.6544820537485722, - "grad_norm": 1.6254036475413718, - "learning_rate": 1.1268223358627835e-06, - "loss": 0.9256, - "step": 5443 - }, - { - "epoch": 0.6546022966392112, - "grad_norm": 1.6194015311125671, - "learning_rate": 1.126121591579663e-06, - "loss": 0.9046, - "step": 5444 - }, - { - "epoch": 0.6547225395298503, - "grad_norm": 1.596751061573694, - "learning_rate": 1.1254209798632018e-06, - "loss": 0.8895, - "step": 5445 - }, - { - "epoch": 0.6548427824204894, - "grad_norm": 1.6410200824055001, - "learning_rate": 1.124720500819683e-06, - "loss": 1.0469, - "step": 5446 - }, - { - "epoch": 0.6549630253111285, - "grad_norm": 1.736750101963939, - "learning_rate": 1.1240201545553682e-06, - "loss": 1.022, - "step": 5447 - }, - { - "epoch": 0.6550832682017675, - "grad_norm": 2.010400551317946, - "learning_rate": 1.1233199411764987e-06, - "loss": 0.9318, - "step": 5448 - }, - { - "epoch": 0.6552035110924067, - "grad_norm": 2.2293777603547267, - "learning_rate": 1.1226198607892978e-06, - "loss": 0.8908, - "step": 5449 - }, - { - "epoch": 0.6553237539830458, - "grad_norm": 1.9399882919902074, - "learning_rate": 1.1219199134999664e-06, - "loss": 0.9973, - "step": 5450 - }, - { - "epoch": 0.6554439968736848, - "grad_norm": 1.8856080381384759, - "learning_rate": 1.1212200994146863e-06, - "loss": 0.9799, - "step": 5451 - }, - { - "epoch": 0.655564239764324, - "grad_norm": 1.802987425000203, - "learning_rate": 1.120520418639618e-06, - "loss": 0.9533, - "step": 5452 - }, - { - "epoch": 0.655684482654963, - "grad_norm": 1.8234931107043912, - "learning_rate": 1.119820871280903e-06, - "loss": 1.029, - "step": 5453 - }, - { - "epoch": 0.6558047255456021, - "grad_norm": 1.791108739464554, - "learning_rate": 1.1191214574446614e-06, - "loss": 0.9272, - "step": 5454 - }, - { - "epoch": 0.6559249684362413, - "grad_norm": 1.4649920986079634, - "learning_rate": 1.118422177236995e-06, - "loss": 0.9938, - "step": 5455 - }, - { - "epoch": 0.6560452113268803, - "grad_norm": 1.7580086153619987, - "learning_rate": 1.1177230307639835e-06, - "loss": 1.0498, - "step": 5456 - }, - { - "epoch": 0.6561654542175194, - "grad_norm": 2.2685520600703146, - "learning_rate": 1.1170240181316865e-06, - "loss": 0.9887, - "step": 5457 - }, - { - "epoch": 0.6562856971081584, - "grad_norm": 1.90968763223784, - "learning_rate": 1.1163251394461442e-06, - "loss": 0.9986, - "step": 5458 - }, - { - "epoch": 0.6564059399987976, - "grad_norm": 1.8036739921813507, - "learning_rate": 1.1156263948133746e-06, - "loss": 1.0211, - "step": 5459 - }, - { - "epoch": 0.6565261828894366, - "grad_norm": 1.569311021893066, - "learning_rate": 1.1149277843393787e-06, - "loss": 0.9791, - "step": 5460 - }, - { - "epoch": 0.6566464257800757, - "grad_norm": 1.921317104838017, - "learning_rate": 1.1142293081301342e-06, - "loss": 0.8292, - "step": 5461 - }, - { - "epoch": 0.6567666686707149, - "grad_norm": 1.7129525637764436, - "learning_rate": 1.1135309662915995e-06, - "loss": 0.8685, - "step": 5462 - }, - { - "epoch": 0.6568869115613539, - "grad_norm": 2.036703564902217, - "learning_rate": 1.112832758929712e-06, - "loss": 0.7926, - "step": 5463 - }, - { - "epoch": 0.657007154451993, - "grad_norm": 2.0664892053809067, - "learning_rate": 1.11213468615039e-06, - "loss": 0.9406, - "step": 5464 - }, - { - "epoch": 0.6571273973426321, - "grad_norm": 1.453909221402206, - "learning_rate": 1.1114367480595292e-06, - "loss": 0.9601, - "step": 5465 - }, - { - "epoch": 0.6572476402332712, - "grad_norm": 1.7675777877891599, - "learning_rate": 1.1107389447630086e-06, - "loss": 1.0164, - "step": 5466 - }, - { - "epoch": 0.6573678831239103, - "grad_norm": 2.062669226899846, - "learning_rate": 1.1100412763666818e-06, - "loss": 0.9798, - "step": 5467 - }, - { - "epoch": 0.6574881260145494, - "grad_norm": 1.5853246246902917, - "learning_rate": 1.1093437429763865e-06, - "loss": 0.9958, - "step": 5468 - }, - { - "epoch": 0.6576083689051885, - "grad_norm": 1.9121677284571377, - "learning_rate": 1.1086463446979361e-06, - "loss": 0.9363, - "step": 5469 - }, - { - "epoch": 0.6577286117958275, - "grad_norm": 1.6228547240724709, - "learning_rate": 1.1079490816371277e-06, - "loss": 0.9651, - "step": 5470 - }, - { - "epoch": 0.6578488546864667, - "grad_norm": 1.939107289746042, - "learning_rate": 1.1072519538997352e-06, - "loss": 0.932, - "step": 5471 - }, - { - "epoch": 0.6579690975771058, - "grad_norm": 2.383302932047406, - "learning_rate": 1.1065549615915095e-06, - "loss": 1.0135, - "step": 5472 - }, - { - "epoch": 0.6580893404677448, - "grad_norm": 2.2579247469868036, - "learning_rate": 1.105858104818187e-06, - "loss": 0.9702, - "step": 5473 - }, - { - "epoch": 0.658209583358384, - "grad_norm": 2.3555594406255835, - "learning_rate": 1.105161383685478e-06, - "loss": 0.9456, - "step": 5474 - }, - { - "epoch": 0.658329826249023, - "grad_norm": 0.8122460261793959, - "learning_rate": 1.1044647982990771e-06, - "loss": 0.7944, - "step": 5475 - }, - { - "epoch": 0.6584500691396621, - "grad_norm": 1.974157808100657, - "learning_rate": 1.1037683487646536e-06, - "loss": 0.8351, - "step": 5476 - }, - { - "epoch": 0.6585703120303013, - "grad_norm": 1.6112969782737798, - "learning_rate": 1.1030720351878583e-06, - "loss": 0.9724, - "step": 5477 - }, - { - "epoch": 0.6586905549209403, - "grad_norm": 0.865759937497022, - "learning_rate": 1.102375857674323e-06, - "loss": 0.8104, - "step": 5478 - }, - { - "epoch": 0.6588107978115794, - "grad_norm": 1.5740881172293413, - "learning_rate": 1.1016798163296561e-06, - "loss": 1.1028, - "step": 5479 - }, - { - "epoch": 0.6589310407022185, - "grad_norm": 1.7974665455894938, - "learning_rate": 1.1009839112594471e-06, - "loss": 0.8556, - "step": 5480 - }, - { - "epoch": 0.6590512835928576, - "grad_norm": 1.952538974582998, - "learning_rate": 1.1002881425692638e-06, - "loss": 0.924, - "step": 5481 - }, - { - "epoch": 0.6591715264834966, - "grad_norm": 1.873915803888605, - "learning_rate": 1.0995925103646532e-06, - "loss": 0.9532, - "step": 5482 - }, - { - "epoch": 0.6592917693741358, - "grad_norm": 1.506623585065359, - "learning_rate": 1.0988970147511437e-06, - "loss": 0.8709, - "step": 5483 - }, - { - "epoch": 0.6594120122647749, - "grad_norm": 2.0737216734604216, - "learning_rate": 1.0982016558342405e-06, - "loss": 1.0063, - "step": 5484 - }, - { - "epoch": 0.6595322551554139, - "grad_norm": 1.9398119883523866, - "learning_rate": 1.0975064337194291e-06, - "loss": 0.9154, - "step": 5485 - }, - { - "epoch": 0.6596524980460531, - "grad_norm": 1.3209498746142059, - "learning_rate": 1.0968113485121743e-06, - "loss": 0.9016, - "step": 5486 - }, - { - "epoch": 0.6597727409366921, - "grad_norm": 1.925014939188271, - "learning_rate": 1.0961164003179185e-06, - "loss": 0.9985, - "step": 5487 - }, - { - "epoch": 0.6598929838273312, - "grad_norm": 1.710884267597261, - "learning_rate": 1.0954215892420884e-06, - "loss": 1.0304, - "step": 5488 - }, - { - "epoch": 0.6600132267179702, - "grad_norm": 2.128093151240722, - "learning_rate": 1.094726915390082e-06, - "loss": 0.9047, - "step": 5489 - }, - { - "epoch": 0.6601334696086094, - "grad_norm": 3.1592328168832338, - "learning_rate": 1.0940323788672836e-06, - "loss": 0.9017, - "step": 5490 - }, - { - "epoch": 0.6602537124992485, - "grad_norm": 1.5491986018037875, - "learning_rate": 1.0933379797790522e-06, - "loss": 0.9435, - "step": 5491 - }, - { - "epoch": 0.6603739553898875, - "grad_norm": 2.2683779072452026, - "learning_rate": 1.0926437182307293e-06, - "loss": 0.9205, - "step": 5492 - }, - { - "epoch": 0.6604941982805267, - "grad_norm": 1.618549897186935, - "learning_rate": 1.0919495943276338e-06, - "loss": 0.9811, - "step": 5493 - }, - { - "epoch": 0.6606144411711657, - "grad_norm": 2.200446699048628, - "learning_rate": 1.0912556081750611e-06, - "loss": 0.9571, - "step": 5494 - }, - { - "epoch": 0.6607346840618048, - "grad_norm": 2.899912990711744, - "learning_rate": 1.0905617598782909e-06, - "loss": 0.9661, - "step": 5495 - }, - { - "epoch": 0.660854926952444, - "grad_norm": 1.8005328555299327, - "learning_rate": 1.0898680495425775e-06, - "loss": 1.0198, - "step": 5496 - }, - { - "epoch": 0.660975169843083, - "grad_norm": 1.7436135789017353, - "learning_rate": 1.0891744772731594e-06, - "loss": 1.0013, - "step": 5497 - }, - { - "epoch": 0.6610954127337221, - "grad_norm": 1.6319265197779556, - "learning_rate": 1.088481043175248e-06, - "loss": 0.8461, - "step": 5498 - }, - { - "epoch": 0.6612156556243612, - "grad_norm": 2.0987340335568057, - "learning_rate": 1.0877877473540368e-06, - "loss": 0.9555, - "step": 5499 - }, - { - "epoch": 0.6613358985150003, - "grad_norm": 1.762576281472735, - "learning_rate": 1.0870945899147002e-06, - "loss": 0.9244, - "step": 5500 - }, - { - "epoch": 0.6614561414056394, - "grad_norm": 1.6523516138489365, - "learning_rate": 1.0864015709623879e-06, - "loss": 0.9468, - "step": 5501 - }, - { - "epoch": 0.6615763842962785, - "grad_norm": 3.414330792742271, - "learning_rate": 1.0857086906022313e-06, - "loss": 0.9919, - "step": 5502 - }, - { - "epoch": 0.6616966271869176, - "grad_norm": 2.0161367550363605, - "learning_rate": 1.0850159489393388e-06, - "loss": 0.9313, - "step": 5503 - }, - { - "epoch": 0.6618168700775566, - "grad_norm": 1.7092371464851812, - "learning_rate": 1.0843233460787992e-06, - "loss": 1.0172, - "step": 5504 - }, - { - "epoch": 0.6619371129681958, - "grad_norm": 1.946003496243354, - "learning_rate": 1.0836308821256805e-06, - "loss": 0.9712, - "step": 5505 - }, - { - "epoch": 0.6620573558588349, - "grad_norm": 2.4327918334161533, - "learning_rate": 1.0829385571850282e-06, - "loss": 0.983, - "step": 5506 - }, - { - "epoch": 0.6621775987494739, - "grad_norm": 2.424037238851614, - "learning_rate": 1.0822463713618679e-06, - "loss": 1.0322, - "step": 5507 - }, - { - "epoch": 0.6622978416401131, - "grad_norm": 1.9453588886168613, - "learning_rate": 1.0815543247612034e-06, - "loss": 1.0392, - "step": 5508 - }, - { - "epoch": 0.6624180845307521, - "grad_norm": 1.6253212831241906, - "learning_rate": 1.0808624174880168e-06, - "loss": 1.0273, - "step": 5509 - }, - { - "epoch": 0.6625383274213912, - "grad_norm": 1.6063118829940835, - "learning_rate": 1.080170649647272e-06, - "loss": 0.9941, - "step": 5510 - }, - { - "epoch": 0.6626585703120303, - "grad_norm": 1.6207463271349365, - "learning_rate": 1.0794790213439068e-06, - "loss": 0.8673, - "step": 5511 - }, - { - "epoch": 0.6627788132026694, - "grad_norm": 1.737964661303684, - "learning_rate": 1.078787532682843e-06, - "loss": 0.9783, - "step": 5512 - }, - { - "epoch": 0.6628990560933085, - "grad_norm": 2.2818276705988443, - "learning_rate": 1.0780961837689773e-06, - "loss": 0.9622, - "step": 5513 - }, - { - "epoch": 0.6630192989839476, - "grad_norm": 1.7612201805649745, - "learning_rate": 1.0774049747071883e-06, - "loss": 0.8966, - "step": 5514 - }, - { - "epoch": 0.6631395418745867, - "grad_norm": 1.5150866781435999, - "learning_rate": 1.076713905602332e-06, - "loss": 0.8859, - "step": 5515 - }, - { - "epoch": 0.6632597847652257, - "grad_norm": 1.6462160301324338, - "learning_rate": 1.07602297655924e-06, - "loss": 1.0077, - "step": 5516 - }, - { - "epoch": 0.6633800276558649, - "grad_norm": 1.6384275706687166, - "learning_rate": 1.0753321876827292e-06, - "loss": 1.0054, - "step": 5517 - }, - { - "epoch": 0.663500270546504, - "grad_norm": 1.9388271831444528, - "learning_rate": 1.0746415390775893e-06, - "loss": 0.9428, - "step": 5518 - }, - { - "epoch": 0.663620513437143, - "grad_norm": 1.775689061159638, - "learning_rate": 1.0739510308485939e-06, - "loss": 0.9729, - "step": 5519 - }, - { - "epoch": 0.6637407563277821, - "grad_norm": 0.8428833714080336, - "learning_rate": 1.07326066310049e-06, - "loss": 0.8394, - "step": 5520 - }, - { - "epoch": 0.6638609992184212, - "grad_norm": 1.8876744726523609, - "learning_rate": 1.0725704359380059e-06, - "loss": 0.999, - "step": 5521 - }, - { - "epoch": 0.6639812421090603, - "grad_norm": 2.173686088864062, - "learning_rate": 1.0718803494658497e-06, - "loss": 0.9226, - "step": 5522 - }, - { - "epoch": 0.6641014849996993, - "grad_norm": 2.140203281266535, - "learning_rate": 1.071190403788707e-06, - "loss": 1.0333, - "step": 5523 - }, - { - "epoch": 0.6642217278903385, - "grad_norm": 2.644798276596754, - "learning_rate": 1.0705005990112415e-06, - "loss": 0.9489, - "step": 5524 - }, - { - "epoch": 0.6643419707809776, - "grad_norm": 2.1194834067164656, - "learning_rate": 1.0698109352380957e-06, - "loss": 0.9447, - "step": 5525 - }, - { - "epoch": 0.6644622136716166, - "grad_norm": 1.7361987036754778, - "learning_rate": 1.0691214125738909e-06, - "loss": 0.9756, - "step": 5526 - }, - { - "epoch": 0.6645824565622558, - "grad_norm": 0.8448890485660835, - "learning_rate": 1.0684320311232287e-06, - "loss": 0.8107, - "step": 5527 - }, - { - "epoch": 0.6647026994528948, - "grad_norm": 1.7813699916110435, - "learning_rate": 1.0677427909906865e-06, - "loss": 1.0091, - "step": 5528 - }, - { - "epoch": 0.6648229423435339, - "grad_norm": 1.9060743282231425, - "learning_rate": 1.0670536922808216e-06, - "loss": 0.9175, - "step": 5529 - }, - { - "epoch": 0.6649431852341731, - "grad_norm": 1.914315218687937, - "learning_rate": 1.06636473509817e-06, - "loss": 0.9107, - "step": 5530 - }, - { - "epoch": 0.6650634281248121, - "grad_norm": 2.088605339113458, - "learning_rate": 1.0656759195472447e-06, - "loss": 1.0054, - "step": 5531 - }, - { - "epoch": 0.6651836710154512, - "grad_norm": 0.8732565131022362, - "learning_rate": 1.0649872457325414e-06, - "loss": 0.8383, - "step": 5532 - }, - { - "epoch": 0.6653039139060903, - "grad_norm": 0.921977182964489, - "learning_rate": 1.0642987137585278e-06, - "loss": 0.8095, - "step": 5533 - }, - { - "epoch": 0.6654241567967294, - "grad_norm": 1.5049634512078476, - "learning_rate": 1.0636103237296561e-06, - "loss": 1.0202, - "step": 5534 - }, - { - "epoch": 0.6655443996873684, - "grad_norm": 2.3105148867845933, - "learning_rate": 1.062922075750353e-06, - "loss": 1.0397, - "step": 5535 - }, - { - "epoch": 0.6656646425780076, - "grad_norm": 1.8621240377198958, - "learning_rate": 1.0622339699250267e-06, - "loss": 0.9138, - "step": 5536 - }, - { - "epoch": 0.6657848854686467, - "grad_norm": 1.7483849566135174, - "learning_rate": 1.0615460063580624e-06, - "loss": 0.9975, - "step": 5537 - }, - { - "epoch": 0.6659051283592857, - "grad_norm": 4.483423431134048, - "learning_rate": 1.060858185153821e-06, - "loss": 0.9269, - "step": 5538 - }, - { - "epoch": 0.6660253712499249, - "grad_norm": 2.0105624236666837, - "learning_rate": 1.0601705064166474e-06, - "loss": 0.9591, - "step": 5539 - }, - { - "epoch": 0.666145614140564, - "grad_norm": 1.914680599139818, - "learning_rate": 1.0594829702508596e-06, - "loss": 0.9313, - "step": 5540 - }, - { - "epoch": 0.666265857031203, - "grad_norm": 1.7114003570017744, - "learning_rate": 1.0587955767607592e-06, - "loss": 0.75, - "step": 5541 - }, - { - "epoch": 0.6663860999218422, - "grad_norm": 3.5517135084534015, - "learning_rate": 1.0581083260506206e-06, - "loss": 0.9797, - "step": 5542 - }, - { - "epoch": 0.6665063428124812, - "grad_norm": 2.279110885393135, - "learning_rate": 1.0574212182246993e-06, - "loss": 0.9632, - "step": 5543 - }, - { - "epoch": 0.6666265857031203, - "grad_norm": 2.5299750991018066, - "learning_rate": 1.0567342533872303e-06, - "loss": 0.9512, - "step": 5544 - }, - { - "epoch": 0.6667468285937594, - "grad_norm": 1.6431443262541623, - "learning_rate": 1.0560474316424255e-06, - "loss": 1.0131, - "step": 5545 - }, - { - "epoch": 0.6668670714843985, - "grad_norm": 2.3017965515010124, - "learning_rate": 1.0553607530944746e-06, - "loss": 0.9364, - "step": 5546 - }, - { - "epoch": 0.6669873143750376, - "grad_norm": 1.7922884579253289, - "learning_rate": 1.0546742178475463e-06, - "loss": 1.0921, - "step": 5547 - }, - { - "epoch": 0.6671075572656767, - "grad_norm": 1.7284912107650818, - "learning_rate": 1.0539878260057868e-06, - "loss": 1.0631, - "step": 5548 - }, - { - "epoch": 0.6672278001563158, - "grad_norm": 2.3386521911557328, - "learning_rate": 1.0533015776733226e-06, - "loss": 0.8792, - "step": 5549 - }, - { - "epoch": 0.6673480430469548, - "grad_norm": 2.1793246497028593, - "learning_rate": 1.0526154729542566e-06, - "loss": 0.9822, - "step": 5550 - }, - { - "epoch": 0.6674682859375939, - "grad_norm": 5.270731184903167, - "learning_rate": 1.0519295119526699e-06, - "loss": 0.9994, - "step": 5551 - }, - { - "epoch": 0.667588528828233, - "grad_norm": 1.5468135012337063, - "learning_rate": 1.0512436947726227e-06, - "loss": 1.0297, - "step": 5552 - }, - { - "epoch": 0.6677087717188721, - "grad_norm": 3.1307148367583912, - "learning_rate": 1.0505580215181517e-06, - "loss": 0.8519, - "step": 5553 - }, - { - "epoch": 0.6678290146095112, - "grad_norm": 0.8670939833246545, - "learning_rate": 1.0498724922932753e-06, - "loss": 0.7878, - "step": 5554 - }, - { - "epoch": 0.6679492575001503, - "grad_norm": 2.171384687446346, - "learning_rate": 1.0491871072019851e-06, - "loss": 1.0583, - "step": 5555 - }, - { - "epoch": 0.6680695003907894, - "grad_norm": 1.8377437066634796, - "learning_rate": 1.0485018663482555e-06, - "loss": 0.8371, - "step": 5556 - }, - { - "epoch": 0.6681897432814284, - "grad_norm": 2.8875184558096945, - "learning_rate": 1.0478167698360354e-06, - "loss": 0.9092, - "step": 5557 - }, - { - "epoch": 0.6683099861720676, - "grad_norm": 1.952982904987356, - "learning_rate": 1.0471318177692556e-06, - "loss": 0.8941, - "step": 5558 - }, - { - "epoch": 0.6684302290627067, - "grad_norm": 2.0824095648904577, - "learning_rate": 1.046447010251821e-06, - "loss": 0.9521, - "step": 5559 - }, - { - "epoch": 0.6685504719533457, - "grad_norm": 1.6845722231646774, - "learning_rate": 1.0457623473876157e-06, - "loss": 0.9605, - "step": 5560 - }, - { - "epoch": 0.6686707148439849, - "grad_norm": 1.6889262353521226, - "learning_rate": 1.0450778292805046e-06, - "loss": 0.902, - "step": 5561 - }, - { - "epoch": 0.6687909577346239, - "grad_norm": 1.8018542752379048, - "learning_rate": 1.0443934560343267e-06, - "loss": 0.9884, - "step": 5562 - }, - { - "epoch": 0.668911200625263, - "grad_norm": 1.7805534473207676, - "learning_rate": 1.0437092277529034e-06, - "loss": 0.9753, - "step": 5563 - }, - { - "epoch": 0.6690314435159022, - "grad_norm": 2.0291995923806443, - "learning_rate": 1.0430251445400292e-06, - "loss": 0.9389, - "step": 5564 - }, - { - "epoch": 0.6691516864065412, - "grad_norm": 2.04429767048384, - "learning_rate": 1.0423412064994787e-06, - "loss": 0.8235, - "step": 5565 - }, - { - "epoch": 0.6692719292971803, - "grad_norm": 1.952279480070526, - "learning_rate": 1.0416574137350064e-06, - "loss": 0.934, - "step": 5566 - }, - { - "epoch": 0.6693921721878194, - "grad_norm": 2.721613636454082, - "learning_rate": 1.0409737663503428e-06, - "loss": 1.0159, - "step": 5567 - }, - { - "epoch": 0.6695124150784585, - "grad_norm": 1.6981062831977405, - "learning_rate": 1.040290264449196e-06, - "loss": 1.035, - "step": 5568 - }, - { - "epoch": 0.6696326579690975, - "grad_norm": 1.7748388727690163, - "learning_rate": 1.0396069081352532e-06, - "loss": 0.8386, - "step": 5569 - }, - { - "epoch": 0.6697529008597367, - "grad_norm": 0.8737389469129782, - "learning_rate": 1.0389236975121782e-06, - "loss": 0.7901, - "step": 5570 - }, - { - "epoch": 0.6698731437503758, - "grad_norm": 2.038216484798366, - "learning_rate": 1.0382406326836147e-06, - "loss": 0.9143, - "step": 5571 - }, - { - "epoch": 0.6699933866410148, - "grad_norm": 1.9437195410595827, - "learning_rate": 1.0375577137531828e-06, - "loss": 0.948, - "step": 5572 - }, - { - "epoch": 0.670113629531654, - "grad_norm": 1.5424011731754685, - "learning_rate": 1.0368749408244802e-06, - "loss": 0.9159, - "step": 5573 - }, - { - "epoch": 0.670233872422293, - "grad_norm": 1.6580077866163194, - "learning_rate": 1.0361923140010836e-06, - "loss": 0.9834, - "step": 5574 - }, - { - "epoch": 0.6703541153129321, - "grad_norm": 2.343766682883334, - "learning_rate": 1.0355098333865455e-06, - "loss": 0.8367, - "step": 5575 - }, - { - "epoch": 0.6704743582035713, - "grad_norm": 1.6203991240518576, - "learning_rate": 1.0348274990844006e-06, - "loss": 0.8934, - "step": 5576 - }, - { - "epoch": 0.6705946010942103, - "grad_norm": 1.6385443436620355, - "learning_rate": 1.034145311198155e-06, - "loss": 0.9233, - "step": 5577 - }, - { - "epoch": 0.6707148439848494, - "grad_norm": 1.8802948893932812, - "learning_rate": 1.0334632698312989e-06, - "loss": 0.8393, - "step": 5578 - }, - { - "epoch": 0.6708350868754885, - "grad_norm": 1.7585322962038519, - "learning_rate": 1.032781375087295e-06, - "loss": 0.952, - "step": 5579 - }, - { - "epoch": 0.6709553297661276, - "grad_norm": 1.4654960299034316, - "learning_rate": 1.0320996270695891e-06, - "loss": 0.8693, - "step": 5580 - }, - { - "epoch": 0.6710755726567667, - "grad_norm": 1.586434948707738, - "learning_rate": 1.0314180258815998e-06, - "loss": 0.9324, - "step": 5581 - }, - { - "epoch": 0.6711958155474057, - "grad_norm": 1.5191891635743324, - "learning_rate": 1.0307365716267247e-06, - "loss": 0.943, - "step": 5582 - }, - { - "epoch": 0.6713160584380449, - "grad_norm": 2.052656555152528, - "learning_rate": 1.0300552644083423e-06, - "loss": 0.9818, - "step": 5583 - }, - { - "epoch": 0.6714363013286839, - "grad_norm": 5.089192604226631, - "learning_rate": 1.0293741043298036e-06, - "loss": 0.9337, - "step": 5584 - }, - { - "epoch": 0.671556544219323, - "grad_norm": 2.9816872841847357, - "learning_rate": 1.0286930914944436e-06, - "loss": 0.9194, - "step": 5585 - }, - { - "epoch": 0.6716767871099621, - "grad_norm": 2.144088727253822, - "learning_rate": 1.0280122260055684e-06, - "loss": 0.9777, - "step": 5586 - }, - { - "epoch": 0.6717970300006012, - "grad_norm": 1.8750998152549612, - "learning_rate": 1.0273315079664652e-06, - "loss": 1.0229, - "step": 5587 - }, - { - "epoch": 0.6719172728912403, - "grad_norm": 7.445002514369276, - "learning_rate": 1.0266509374803992e-06, - "loss": 0.9469, - "step": 5588 - }, - { - "epoch": 0.6720375157818794, - "grad_norm": 3.0909935587800765, - "learning_rate": 1.0259705146506123e-06, - "loss": 1.0454, - "step": 5589 - }, - { - "epoch": 0.6721577586725185, - "grad_norm": 2.1158358492791853, - "learning_rate": 1.025290239580324e-06, - "loss": 0.9702, - "step": 5590 - }, - { - "epoch": 0.6722780015631575, - "grad_norm": 1.4845440416956341, - "learning_rate": 1.0246101123727313e-06, - "loss": 0.9528, - "step": 5591 - }, - { - "epoch": 0.6723982444537967, - "grad_norm": 2.123785008169446, - "learning_rate": 1.0239301331310085e-06, - "loss": 0.9837, - "step": 5592 - }, - { - "epoch": 0.6725184873444358, - "grad_norm": 1.5208161322609455, - "learning_rate": 1.0232503019583088e-06, - "loss": 1.0821, - "step": 5593 - }, - { - "epoch": 0.6726387302350748, - "grad_norm": 2.424720126646768, - "learning_rate": 1.0225706189577619e-06, - "loss": 0.8927, - "step": 5594 - }, - { - "epoch": 0.672758973125714, - "grad_norm": 1.7685833343595492, - "learning_rate": 1.021891084232475e-06, - "loss": 0.9419, - "step": 5595 - }, - { - "epoch": 0.672879216016353, - "grad_norm": 1.9073375038737825, - "learning_rate": 1.0212116978855325e-06, - "loss": 0.9889, - "step": 5596 - }, - { - "epoch": 0.6729994589069921, - "grad_norm": 1.686967765798632, - "learning_rate": 1.020532460019997e-06, - "loss": 0.9907, - "step": 5597 - }, - { - "epoch": 0.6731197017976313, - "grad_norm": 1.622760109293562, - "learning_rate": 1.0198533707389096e-06, - "loss": 0.8938, - "step": 5598 - }, - { - "epoch": 0.6732399446882703, - "grad_norm": 1.6261114574299214, - "learning_rate": 1.0191744301452853e-06, - "loss": 0.9349, - "step": 5599 - }, - { - "epoch": 0.6733601875789094, - "grad_norm": 1.6892457338612066, - "learning_rate": 1.0184956383421208e-06, - "loss": 0.8957, - "step": 5600 - }, - { - "epoch": 0.6734804304695485, - "grad_norm": 1.967453666054189, - "learning_rate": 1.017816995432387e-06, - "loss": 0.8507, - "step": 5601 - }, - { - "epoch": 0.6736006733601876, - "grad_norm": 1.6891414994389762, - "learning_rate": 1.0171385015190353e-06, - "loss": 0.9336, - "step": 5602 - }, - { - "epoch": 0.6737209162508266, - "grad_norm": 1.8074057448677125, - "learning_rate": 1.0164601567049908e-06, - "loss": 0.9231, - "step": 5603 - }, - { - "epoch": 0.6738411591414658, - "grad_norm": 1.5525434268645806, - "learning_rate": 1.015781961093158e-06, - "loss": 1.0045, - "step": 5604 - }, - { - "epoch": 0.6739614020321049, - "grad_norm": 2.2756341239077855, - "learning_rate": 1.0151039147864197e-06, - "loss": 0.9704, - "step": 5605 - }, - { - "epoch": 0.6740816449227439, - "grad_norm": 1.8224941817663016, - "learning_rate": 1.0144260178876336e-06, - "loss": 0.8637, - "step": 5606 - }, - { - "epoch": 0.6742018878133831, - "grad_norm": 2.0651528608073972, - "learning_rate": 1.0137482704996388e-06, - "loss": 0.8702, - "step": 5607 - }, - { - "epoch": 0.6743221307040221, - "grad_norm": 1.9774323611260056, - "learning_rate": 1.0130706727252461e-06, - "loss": 0.9915, - "step": 5608 - }, - { - "epoch": 0.6744423735946612, - "grad_norm": 2.1042450868200913, - "learning_rate": 1.0123932246672468e-06, - "loss": 0.875, - "step": 5609 - }, - { - "epoch": 0.6745626164853004, - "grad_norm": 0.8307213299579951, - "learning_rate": 1.0117159264284114e-06, - "loss": 0.779, - "step": 5610 - }, - { - "epoch": 0.6746828593759394, - "grad_norm": 1.7203814306490282, - "learning_rate": 1.0110387781114837e-06, - "loss": 0.9713, - "step": 5611 - }, - { - "epoch": 0.6748031022665785, - "grad_norm": 2.1255951216363544, - "learning_rate": 1.0103617798191872e-06, - "loss": 0.9631, - "step": 5612 - }, - { - "epoch": 0.6749233451572175, - "grad_norm": 3.2123396659055827, - "learning_rate": 1.0096849316542217e-06, - "loss": 1.0179, - "step": 5613 - }, - { - "epoch": 0.6750435880478567, - "grad_norm": 2.3955908168839013, - "learning_rate": 1.0090082337192643e-06, - "loss": 0.9414, - "step": 5614 - }, - { - "epoch": 0.6751638309384957, - "grad_norm": 2.4199549785871373, - "learning_rate": 1.0083316861169705e-06, - "loss": 0.9746, - "step": 5615 - }, - { - "epoch": 0.6752840738291348, - "grad_norm": 2.127159479953376, - "learning_rate": 1.0076552889499713e-06, - "loss": 0.9222, - "step": 5616 - }, - { - "epoch": 0.675404316719774, - "grad_norm": 2.0251335194214666, - "learning_rate": 1.006979042320876e-06, - "loss": 0.9335, - "step": 5617 - }, - { - "epoch": 0.675524559610413, - "grad_norm": 2.3495636108710785, - "learning_rate": 1.0063029463322702e-06, - "loss": 0.8358, - "step": 5618 - }, - { - "epoch": 0.6756448025010521, - "grad_norm": 2.2092033567754994, - "learning_rate": 1.0056270010867164e-06, - "loss": 0.946, - "step": 5619 - }, - { - "epoch": 0.6757650453916912, - "grad_norm": 2.517080324378823, - "learning_rate": 1.004951206686758e-06, - "loss": 0.9799, - "step": 5620 - }, - { - "epoch": 0.6758852882823303, - "grad_norm": 1.8398691487432117, - "learning_rate": 1.0042755632349087e-06, - "loss": 0.9147, - "step": 5621 - }, - { - "epoch": 0.6760055311729694, - "grad_norm": 2.240639713936008, - "learning_rate": 1.0036000708336653e-06, - "loss": 0.8288, - "step": 5622 - }, - { - "epoch": 0.6761257740636085, - "grad_norm": 1.9634205572552992, - "learning_rate": 1.0029247295854984e-06, - "loss": 0.9922, - "step": 5623 - }, - { - "epoch": 0.6762460169542476, - "grad_norm": 1.7999230447849228, - "learning_rate": 1.0022495395928588e-06, - "loss": 0.9117, - "step": 5624 - }, - { - "epoch": 0.6763662598448866, - "grad_norm": 0.8839588992653237, - "learning_rate": 1.0015745009581697e-06, - "loss": 0.8349, - "step": 5625 - }, - { - "epoch": 0.6764865027355258, - "grad_norm": 1.7261960430400014, - "learning_rate": 1.0008996137838343e-06, - "loss": 0.8676, - "step": 5626 - }, - { - "epoch": 0.6766067456261649, - "grad_norm": 1.9651643707600173, - "learning_rate": 1.000224878172234e-06, - "loss": 0.999, - "step": 5627 - }, - { - "epoch": 0.6767269885168039, - "grad_norm": 1.8915004752448683, - "learning_rate": 9.99550294225724e-07, - "loss": 0.9315, - "step": 5628 - }, - { - "epoch": 0.6768472314074431, - "grad_norm": 1.8708952159318228, - "learning_rate": 9.988758620466402e-07, - "loss": 0.9176, - "step": 5629 - }, - { - "epoch": 0.6769674742980821, - "grad_norm": 1.5210116904547446, - "learning_rate": 9.982015817372917e-07, - "loss": 0.9609, - "step": 5630 - }, - { - "epoch": 0.6770877171887212, - "grad_norm": 1.806700273508558, - "learning_rate": 9.975274533999657e-07, - "loss": 1.0152, - "step": 5631 - }, - { - "epoch": 0.6772079600793603, - "grad_norm": 2.5391654125778493, - "learning_rate": 9.96853477136929e-07, - "loss": 1.0416, - "step": 5632 - }, - { - "epoch": 0.6773282029699994, - "grad_norm": 1.8113556899884922, - "learning_rate": 9.96179653050422e-07, - "loss": 0.9422, - "step": 5633 - }, - { - "epoch": 0.6774484458606385, - "grad_norm": 1.846254877847208, - "learning_rate": 9.955059812426635e-07, - "loss": 0.9312, - "step": 5634 - }, - { - "epoch": 0.6775686887512776, - "grad_norm": 2.066244021647644, - "learning_rate": 9.948324618158493e-07, - "loss": 1.0234, - "step": 5635 - }, - { - "epoch": 0.6776889316419167, - "grad_norm": 3.037117696180321, - "learning_rate": 9.941590948721502e-07, - "loss": 0.985, - "step": 5636 - }, - { - "epoch": 0.6778091745325557, - "grad_norm": 1.6161010336027162, - "learning_rate": 9.934858805137188e-07, - "loss": 0.9532, - "step": 5637 - }, - { - "epoch": 0.6779294174231949, - "grad_norm": 1.6943960217642617, - "learning_rate": 9.92812818842677e-07, - "loss": 1.0032, - "step": 5638 - }, - { - "epoch": 0.678049660313834, - "grad_norm": 1.5749874084211533, - "learning_rate": 9.921399099611306e-07, - "loss": 0.8418, - "step": 5639 - }, - { - "epoch": 0.678169903204473, - "grad_norm": 3.4174242032735167, - "learning_rate": 9.914671539711588e-07, - "loss": 0.8907, - "step": 5640 - }, - { - "epoch": 0.6782901460951122, - "grad_norm": 1.673847496606107, - "learning_rate": 9.90794550974817e-07, - "loss": 0.9812, - "step": 5641 - }, - { - "epoch": 0.6784103889857512, - "grad_norm": 2.2156345899882814, - "learning_rate": 9.901221010741407e-07, - "loss": 1.0098, - "step": 5642 - }, - { - "epoch": 0.6785306318763903, - "grad_norm": 1.768907314893599, - "learning_rate": 9.894498043711375e-07, - "loss": 0.949, - "step": 5643 - }, - { - "epoch": 0.6786508747670293, - "grad_norm": 2.047729663195084, - "learning_rate": 9.887776609677962e-07, - "loss": 0.8971, - "step": 5644 - }, - { - "epoch": 0.6787711176576685, - "grad_norm": 1.628776343857915, - "learning_rate": 9.88105670966079e-07, - "loss": 0.9226, - "step": 5645 - }, - { - "epoch": 0.6788913605483076, - "grad_norm": 6.379863006173335, - "learning_rate": 9.874338344679283e-07, - "loss": 0.9852, - "step": 5646 - }, - { - "epoch": 0.6790116034389466, - "grad_norm": 1.6112885441398124, - "learning_rate": 9.86762151575259e-07, - "loss": 0.9418, - "step": 5647 - }, - { - "epoch": 0.6791318463295858, - "grad_norm": 1.9153380834084834, - "learning_rate": 9.860906223899651e-07, - "loss": 0.9977, - "step": 5648 - }, - { - "epoch": 0.6792520892202248, - "grad_norm": 1.4760440138118642, - "learning_rate": 9.854192470139184e-07, - "loss": 0.9513, - "step": 5649 - }, - { - "epoch": 0.6793723321108639, - "grad_norm": 2.2028046503575207, - "learning_rate": 9.847480255489645e-07, - "loss": 0.9143, - "step": 5650 - }, - { - "epoch": 0.6794925750015031, - "grad_norm": 1.7967208132726598, - "learning_rate": 9.840769580969295e-07, - "loss": 0.8846, - "step": 5651 - }, - { - "epoch": 0.6796128178921421, - "grad_norm": 1.8920040933189712, - "learning_rate": 9.834060447596114e-07, - "loss": 0.9935, - "step": 5652 - }, - { - "epoch": 0.6797330607827812, - "grad_norm": 1.8396886934354606, - "learning_rate": 9.827352856387868e-07, - "loss": 0.975, - "step": 5653 - }, - { - "epoch": 0.6798533036734203, - "grad_norm": 0.8658818562752322, - "learning_rate": 9.820646808362118e-07, - "loss": 0.8509, - "step": 5654 - }, - { - "epoch": 0.6799735465640594, - "grad_norm": 2.2160910133786555, - "learning_rate": 9.813942304536154e-07, - "loss": 0.9187, - "step": 5655 - }, - { - "epoch": 0.6800937894546984, - "grad_norm": 5.894351337131752, - "learning_rate": 9.807239345927043e-07, - "loss": 0.8395, - "step": 5656 - }, - { - "epoch": 0.6802140323453376, - "grad_norm": 1.9208163315137108, - "learning_rate": 9.80053793355162e-07, - "loss": 0.9298, - "step": 5657 - }, - { - "epoch": 0.6803342752359767, - "grad_norm": 2.0757855318748177, - "learning_rate": 9.793838068426472e-07, - "loss": 0.9421, - "step": 5658 - }, - { - "epoch": 0.6804545181266157, - "grad_norm": 1.972097729468442, - "learning_rate": 9.78713975156799e-07, - "loss": 0.8085, - "step": 5659 - }, - { - "epoch": 0.6805747610172549, - "grad_norm": 1.782113652245289, - "learning_rate": 9.780442983992273e-07, - "loss": 0.9174, - "step": 5660 - }, - { - "epoch": 0.680695003907894, - "grad_norm": 1.6309748625838385, - "learning_rate": 9.773747766715238e-07, - "loss": 0.9119, - "step": 5661 - }, - { - "epoch": 0.680815246798533, - "grad_norm": 1.4979224918059446, - "learning_rate": 9.767054100752536e-07, - "loss": 0.9979, - "step": 5662 - }, - { - "epoch": 0.6809354896891722, - "grad_norm": 2.343930351947663, - "learning_rate": 9.760361987119584e-07, - "loss": 1.0083, - "step": 5663 - }, - { - "epoch": 0.6810557325798112, - "grad_norm": 1.8199917752216326, - "learning_rate": 9.753671426831592e-07, - "loss": 0.8797, - "step": 5664 - }, - { - "epoch": 0.6811759754704503, - "grad_norm": 1.9507568651171672, - "learning_rate": 9.746982420903483e-07, - "loss": 0.9888, - "step": 5665 - }, - { - "epoch": 0.6812962183610894, - "grad_norm": 1.4471624134239482, - "learning_rate": 9.740294970349993e-07, - "loss": 0.9455, - "step": 5666 - }, - { - "epoch": 0.6814164612517285, - "grad_norm": 0.9776146980467646, - "learning_rate": 9.733609076185594e-07, - "loss": 0.8737, - "step": 5667 - }, - { - "epoch": 0.6815367041423676, - "grad_norm": 1.6743816942605863, - "learning_rate": 9.72692473942455e-07, - "loss": 1.035, - "step": 5668 - }, - { - "epoch": 0.6816569470330067, - "grad_norm": 1.6656050718450142, - "learning_rate": 9.720241961080849e-07, - "loss": 0.9734, - "step": 5669 - }, - { - "epoch": 0.6817771899236458, - "grad_norm": 1.7842926006365323, - "learning_rate": 9.713560742168259e-07, - "loss": 0.9148, - "step": 5670 - }, - { - "epoch": 0.6818974328142848, - "grad_norm": 1.822651812976811, - "learning_rate": 9.706881083700333e-07, - "loss": 0.9117, - "step": 5671 - }, - { - "epoch": 0.682017675704924, - "grad_norm": 2.007927204818709, - "learning_rate": 9.700202986690357e-07, - "loss": 1.0246, - "step": 5672 - }, - { - "epoch": 0.682137918595563, - "grad_norm": 1.6317273782430566, - "learning_rate": 9.693526452151413e-07, - "loss": 0.8529, - "step": 5673 - }, - { - "epoch": 0.6822581614862021, - "grad_norm": 1.7393315603223551, - "learning_rate": 9.686851481096305e-07, - "loss": 0.9595, - "step": 5674 - }, - { - "epoch": 0.6823784043768413, - "grad_norm": 2.1165633178871563, - "learning_rate": 9.68017807453762e-07, - "loss": 0.9278, - "step": 5675 - }, - { - "epoch": 0.6824986472674803, - "grad_norm": 1.708889715570405, - "learning_rate": 9.673506233487721e-07, - "loss": 0.9301, - "step": 5676 - }, - { - "epoch": 0.6826188901581194, - "grad_norm": 1.7012687156258808, - "learning_rate": 9.666835958958717e-07, - "loss": 1.0623, - "step": 5677 - }, - { - "epoch": 0.6827391330487584, - "grad_norm": 2.256221646371143, - "learning_rate": 9.660167251962484e-07, - "loss": 0.9938, - "step": 5678 - }, - { - "epoch": 0.6828593759393976, - "grad_norm": 1.6175748941573023, - "learning_rate": 9.653500113510654e-07, - "loss": 0.9775, - "step": 5679 - }, - { - "epoch": 0.6829796188300367, - "grad_norm": 2.2000845632773505, - "learning_rate": 9.646834544614627e-07, - "loss": 0.8771, - "step": 5680 - }, - { - "epoch": 0.6830998617206757, - "grad_norm": 2.0801015073509728, - "learning_rate": 9.64017054628558e-07, - "loss": 0.9585, - "step": 5681 - }, - { - "epoch": 0.6832201046113149, - "grad_norm": 1.7640747421285867, - "learning_rate": 9.63350811953441e-07, - "loss": 0.9909, - "step": 5682 - }, - { - "epoch": 0.6833403475019539, - "grad_norm": 2.104360540038399, - "learning_rate": 9.626847265371826e-07, - "loss": 0.9026, - "step": 5683 - }, - { - "epoch": 0.683460590392593, - "grad_norm": 1.9832451675223446, - "learning_rate": 9.620187984808262e-07, - "loss": 0.9796, - "step": 5684 - }, - { - "epoch": 0.6835808332832322, - "grad_norm": 1.7481836020419492, - "learning_rate": 9.613530278853919e-07, - "loss": 1.0579, - "step": 5685 - }, - { - "epoch": 0.6837010761738712, - "grad_norm": 1.6721886313913474, - "learning_rate": 9.60687414851879e-07, - "loss": 0.9414, - "step": 5686 - }, - { - "epoch": 0.6838213190645103, - "grad_norm": 1.916188207975033, - "learning_rate": 9.600219594812575e-07, - "loss": 0.9637, - "step": 5687 - }, - { - "epoch": 0.6839415619551494, - "grad_norm": 1.5582597800183078, - "learning_rate": 9.593566618744786e-07, - "loss": 0.9272, - "step": 5688 - }, - { - "epoch": 0.6840618048457885, - "grad_norm": 1.632455303053436, - "learning_rate": 9.58691522132466e-07, - "loss": 0.9295, - "step": 5689 - }, - { - "epoch": 0.6841820477364275, - "grad_norm": 1.9028534568655184, - "learning_rate": 9.58026540356123e-07, - "loss": 1.046, - "step": 5690 - }, - { - "epoch": 0.6843022906270667, - "grad_norm": 1.9415692696544662, - "learning_rate": 9.573617166463246e-07, - "loss": 1.0638, - "step": 5691 - }, - { - "epoch": 0.6844225335177058, - "grad_norm": 1.796101411900184, - "learning_rate": 9.56697051103924e-07, - "loss": 0.7962, - "step": 5692 - }, - { - "epoch": 0.6845427764083448, - "grad_norm": 1.9029330803575548, - "learning_rate": 9.560325438297522e-07, - "loss": 1.0016, - "step": 5693 - }, - { - "epoch": 0.684663019298984, - "grad_norm": 1.6211439745372984, - "learning_rate": 9.553681949246127e-07, - "loss": 1.0714, - "step": 5694 - }, - { - "epoch": 0.684783262189623, - "grad_norm": 1.905204611375391, - "learning_rate": 9.547040044892886e-07, - "loss": 0.9591, - "step": 5695 - }, - { - "epoch": 0.6849035050802621, - "grad_norm": 0.891789260945376, - "learning_rate": 9.540399726245354e-07, - "loss": 0.8361, - "step": 5696 - }, - { - "epoch": 0.6850237479709013, - "grad_norm": 1.7018919514607915, - "learning_rate": 9.533760994310859e-07, - "loss": 0.8908, - "step": 5697 - }, - { - "epoch": 0.6851439908615403, - "grad_norm": 2.004209380246526, - "learning_rate": 9.527123850096508e-07, - "loss": 0.9532, - "step": 5698 - }, - { - "epoch": 0.6852642337521794, - "grad_norm": 1.654321519472203, - "learning_rate": 9.520488294609142e-07, - "loss": 0.9161, - "step": 5699 - }, - { - "epoch": 0.6853844766428185, - "grad_norm": 0.9693903191227301, - "learning_rate": 9.513854328855368e-07, - "loss": 0.7941, - "step": 5700 - }, - { - "epoch": 0.6855047195334576, - "grad_norm": 1.8672187196044898, - "learning_rate": 9.507221953841558e-07, - "loss": 1.012, - "step": 5701 - }, - { - "epoch": 0.6856249624240967, - "grad_norm": 1.60189309778606, - "learning_rate": 9.500591170573824e-07, - "loss": 0.9738, - "step": 5702 - }, - { - "epoch": 0.6857452053147358, - "grad_norm": 1.8756720292253286, - "learning_rate": 9.493961980058078e-07, - "loss": 0.9426, - "step": 5703 - }, - { - "epoch": 0.6858654482053749, - "grad_norm": 1.9984886896587861, - "learning_rate": 9.48733438329993e-07, - "loss": 0.8807, - "step": 5704 - }, - { - "epoch": 0.6859856910960139, - "grad_norm": 1.6011831439055149, - "learning_rate": 9.480708381304807e-07, - "loss": 0.9366, - "step": 5705 - }, - { - "epoch": 0.6861059339866531, - "grad_norm": 6.918845450165891, - "learning_rate": 9.474083975077858e-07, - "loss": 1.0365, - "step": 5706 - }, - { - "epoch": 0.6862261768772921, - "grad_norm": 2.188097082121768, - "learning_rate": 9.467461165623994e-07, - "loss": 0.9984, - "step": 5707 - }, - { - "epoch": 0.6863464197679312, - "grad_norm": 1.8454659445821007, - "learning_rate": 9.46083995394791e-07, - "loss": 0.9965, - "step": 5708 - }, - { - "epoch": 0.6864666626585703, - "grad_norm": 1.9256328657138817, - "learning_rate": 9.454220341054012e-07, - "loss": 0.8335, - "step": 5709 - }, - { - "epoch": 0.6865869055492094, - "grad_norm": 1.8094591743773616, - "learning_rate": 9.447602327946512e-07, - "loss": 1.0052, - "step": 5710 - }, - { - "epoch": 0.6867071484398485, - "grad_norm": 1.8202479273267298, - "learning_rate": 9.440985915629338e-07, - "loss": 0.9688, - "step": 5711 - }, - { - "epoch": 0.6868273913304875, - "grad_norm": 11.964728972270237, - "learning_rate": 9.434371105106223e-07, - "loss": 0.9174, - "step": 5712 - }, - { - "epoch": 0.6869476342211267, - "grad_norm": 1.8925104749180546, - "learning_rate": 9.427757897380602e-07, - "loss": 0.9101, - "step": 5713 - }, - { - "epoch": 0.6870678771117658, - "grad_norm": 1.9639275843638855, - "learning_rate": 9.421146293455695e-07, - "loss": 1.0509, - "step": 5714 - }, - { - "epoch": 0.6871881200024048, - "grad_norm": 1.64653440424344, - "learning_rate": 9.414536294334489e-07, - "loss": 0.8808, - "step": 5715 - }, - { - "epoch": 0.687308362893044, - "grad_norm": 1.6794321265262977, - "learning_rate": 9.407927901019708e-07, - "loss": 0.8988, - "step": 5716 - }, - { - "epoch": 0.687428605783683, - "grad_norm": 1.8747627744014366, - "learning_rate": 9.401321114513854e-07, - "loss": 0.9682, - "step": 5717 - }, - { - "epoch": 0.6875488486743221, - "grad_norm": 1.7102368457356902, - "learning_rate": 9.394715935819155e-07, - "loss": 0.9514, - "step": 5718 - }, - { - "epoch": 0.6876690915649613, - "grad_norm": 1.9340781481712788, - "learning_rate": 9.388112365937608e-07, - "loss": 0.8236, - "step": 5719 - }, - { - "epoch": 0.6877893344556003, - "grad_norm": 2.4267641002182123, - "learning_rate": 9.381510405870985e-07, - "loss": 1.0233, - "step": 5720 - }, - { - "epoch": 0.6879095773462394, - "grad_norm": 3.261546531288008, - "learning_rate": 9.374910056620791e-07, - "loss": 0.9772, - "step": 5721 - }, - { - "epoch": 0.6880298202368785, - "grad_norm": 1.6150522925116069, - "learning_rate": 9.368311319188293e-07, - "loss": 1.0161, - "step": 5722 - }, - { - "epoch": 0.6881500631275176, - "grad_norm": 1.7937749907450726, - "learning_rate": 9.361714194574515e-07, - "loss": 0.9974, - "step": 5723 - }, - { - "epoch": 0.6882703060181566, - "grad_norm": 0.7823497660904425, - "learning_rate": 9.355118683780228e-07, - "loss": 0.7929, - "step": 5724 - }, - { - "epoch": 0.6883905489087958, - "grad_norm": 2.101872077461182, - "learning_rate": 9.348524787805987e-07, - "loss": 0.9942, - "step": 5725 - }, - { - "epoch": 0.6885107917994349, - "grad_norm": 2.5131905664762657, - "learning_rate": 9.341932507652053e-07, - "loss": 1.0517, - "step": 5726 - }, - { - "epoch": 0.6886310346900739, - "grad_norm": 1.7492242183489084, - "learning_rate": 9.335341844318489e-07, - "loss": 0.9796, - "step": 5727 - }, - { - "epoch": 0.6887512775807131, - "grad_norm": 1.7897993677531827, - "learning_rate": 9.328752798805091e-07, - "loss": 0.9349, - "step": 5728 - }, - { - "epoch": 0.6888715204713521, - "grad_norm": 2.354350721427996, - "learning_rate": 9.322165372111399e-07, - "loss": 0.9549, - "step": 5729 - }, - { - "epoch": 0.6889917633619912, - "grad_norm": 1.8084112285355702, - "learning_rate": 9.315579565236747e-07, - "loss": 0.9597, - "step": 5730 - }, - { - "epoch": 0.6891120062526304, - "grad_norm": 1.7904463167896614, - "learning_rate": 9.308995379180162e-07, - "loss": 0.9461, - "step": 5731 - }, - { - "epoch": 0.6892322491432694, - "grad_norm": 0.8420752399199726, - "learning_rate": 9.302412814940488e-07, - "loss": 0.8316, - "step": 5732 - }, - { - "epoch": 0.6893524920339085, - "grad_norm": 1.983275762358199, - "learning_rate": 9.295831873516276e-07, - "loss": 0.8972, - "step": 5733 - }, - { - "epoch": 0.6894727349245476, - "grad_norm": 1.4364033537982486, - "learning_rate": 9.289252555905873e-07, - "loss": 0.96, - "step": 5734 - }, - { - "epoch": 0.6895929778151867, - "grad_norm": 1.7785748923263045, - "learning_rate": 9.282674863107334e-07, - "loss": 0.9589, - "step": 5735 - }, - { - "epoch": 0.6897132207058257, - "grad_norm": 2.1779501495891633, - "learning_rate": 9.276098796118488e-07, - "loss": 0.9562, - "step": 5736 - }, - { - "epoch": 0.6898334635964649, - "grad_norm": 1.6193310817709659, - "learning_rate": 9.269524355936938e-07, - "loss": 0.8671, - "step": 5737 - }, - { - "epoch": 0.689953706487104, - "grad_norm": 1.6361223467988137, - "learning_rate": 9.262951543560002e-07, - "loss": 1.0471, - "step": 5738 - }, - { - "epoch": 0.690073949377743, - "grad_norm": 2.0571548094477916, - "learning_rate": 9.256380359984795e-07, - "loss": 1.052, - "step": 5739 - }, - { - "epoch": 0.6901941922683821, - "grad_norm": 1.8730179801358964, - "learning_rate": 9.249810806208139e-07, - "loss": 0.9384, - "step": 5740 - }, - { - "epoch": 0.6903144351590212, - "grad_norm": 2.0087618828299187, - "learning_rate": 9.243242883226627e-07, - "loss": 1.0085, - "step": 5741 - }, - { - "epoch": 0.6904346780496603, - "grad_norm": 1.840327756031118, - "learning_rate": 9.236676592036628e-07, - "loss": 0.8934, - "step": 5742 - }, - { - "epoch": 0.6905549209402994, - "grad_norm": 1.546611339563335, - "learning_rate": 9.230111933634228e-07, - "loss": 0.9236, - "step": 5743 - }, - { - "epoch": 0.6906751638309385, - "grad_norm": 1.4526138226924585, - "learning_rate": 9.223548909015288e-07, - "loss": 1.0058, - "step": 5744 - }, - { - "epoch": 0.6907954067215776, - "grad_norm": 1.766894938758209, - "learning_rate": 9.216987519175407e-07, - "loss": 0.9144, - "step": 5745 - }, - { - "epoch": 0.6909156496122166, - "grad_norm": 1.6081523510133684, - "learning_rate": 9.210427765109942e-07, - "loss": 0.8848, - "step": 5746 - }, - { - "epoch": 0.6910358925028558, - "grad_norm": 1.9157085580944389, - "learning_rate": 9.20386964781402e-07, - "loss": 1.0126, - "step": 5747 - }, - { - "epoch": 0.6911561353934949, - "grad_norm": 2.0863331623084433, - "learning_rate": 9.197313168282472e-07, - "loss": 1.0414, - "step": 5748 - }, - { - "epoch": 0.6912763782841339, - "grad_norm": 1.948113565812197, - "learning_rate": 9.190758327509935e-07, - "loss": 0.9198, - "step": 5749 - }, - { - "epoch": 0.6913966211747731, - "grad_norm": 0.9305963321469217, - "learning_rate": 9.184205126490767e-07, - "loss": 0.8657, - "step": 5750 - }, - { - "epoch": 0.6915168640654121, - "grad_norm": 1.0122619113499232, - "learning_rate": 9.177653566219075e-07, - "loss": 0.8352, - "step": 5751 - }, - { - "epoch": 0.6916371069560512, - "grad_norm": 2.250119841880856, - "learning_rate": 9.171103647688744e-07, - "loss": 0.9613, - "step": 5752 - }, - { - "epoch": 0.6917573498466904, - "grad_norm": 1.8146247248813951, - "learning_rate": 9.164555371893367e-07, - "loss": 0.8906, - "step": 5753 - }, - { - "epoch": 0.6918775927373294, - "grad_norm": 1.7900330688576007, - "learning_rate": 9.158008739826333e-07, - "loss": 0.9546, - "step": 5754 - }, - { - "epoch": 0.6919978356279685, - "grad_norm": 1.5086729134601542, - "learning_rate": 9.151463752480744e-07, - "loss": 1.0616, - "step": 5755 - }, - { - "epoch": 0.6921180785186076, - "grad_norm": 1.3046693629300483, - "learning_rate": 9.144920410849493e-07, - "loss": 0.9959, - "step": 5756 - }, - { - "epoch": 0.6922383214092467, - "grad_norm": 2.317547933335862, - "learning_rate": 9.138378715925176e-07, - "loss": 1.002, - "step": 5757 - }, - { - "epoch": 0.6923585642998857, - "grad_norm": 1.6010707193227616, - "learning_rate": 9.131838668700167e-07, - "loss": 1.0092, - "step": 5758 - }, - { - "epoch": 0.6924788071905249, - "grad_norm": 1.5906939658332249, - "learning_rate": 9.125300270166598e-07, - "loss": 1.067, - "step": 5759 - }, - { - "epoch": 0.692599050081164, - "grad_norm": 1.7704575831867662, - "learning_rate": 9.118763521316324e-07, - "loss": 1.0645, - "step": 5760 - }, - { - "epoch": 0.692719292971803, - "grad_norm": 1.5357458326831819, - "learning_rate": 9.112228423140987e-07, - "loss": 0.9567, - "step": 5761 - }, - { - "epoch": 0.6928395358624422, - "grad_norm": 2.385913769317307, - "learning_rate": 9.105694976631932e-07, - "loss": 1.0603, - "step": 5762 - }, - { - "epoch": 0.6929597787530812, - "grad_norm": 1.9027047255415819, - "learning_rate": 9.099163182780283e-07, - "loss": 0.9246, - "step": 5763 - }, - { - "epoch": 0.6930800216437203, - "grad_norm": 2.422593041081001, - "learning_rate": 9.092633042576916e-07, - "loss": 0.6941, - "step": 5764 - }, - { - "epoch": 0.6932002645343595, - "grad_norm": 1.8737833526269219, - "learning_rate": 9.086104557012446e-07, - "loss": 0.7721, - "step": 5765 - }, - { - "epoch": 0.6933205074249985, - "grad_norm": 1.714409844979283, - "learning_rate": 9.079577727077239e-07, - "loss": 0.8583, - "step": 5766 - }, - { - "epoch": 0.6934407503156376, - "grad_norm": 2.1650630689777532, - "learning_rate": 9.073052553761404e-07, - "loss": 0.9253, - "step": 5767 - }, - { - "epoch": 0.6935609932062767, - "grad_norm": 1.6212614776981513, - "learning_rate": 9.066529038054805e-07, - "loss": 0.9684, - "step": 5768 - }, - { - "epoch": 0.6936812360969158, - "grad_norm": 1.6541719175522849, - "learning_rate": 9.060007180947071e-07, - "loss": 0.9352, - "step": 5769 - }, - { - "epoch": 0.6938014789875548, - "grad_norm": 1.9635257737168343, - "learning_rate": 9.053486983427534e-07, - "loss": 0.9341, - "step": 5770 - }, - { - "epoch": 0.6939217218781939, - "grad_norm": 2.000666149780417, - "learning_rate": 9.046968446485326e-07, - "loss": 0.908, - "step": 5771 - }, - { - "epoch": 0.6940419647688331, - "grad_norm": 2.494762083341421, - "learning_rate": 9.040451571109295e-07, - "loss": 0.902, - "step": 5772 - }, - { - "epoch": 0.6941622076594721, - "grad_norm": 0.9825517026386725, - "learning_rate": 9.033936358288042e-07, - "loss": 0.8454, - "step": 5773 - }, - { - "epoch": 0.6942824505501112, - "grad_norm": 1.6358084320774509, - "learning_rate": 9.027422809009937e-07, - "loss": 1.0195, - "step": 5774 - }, - { - "epoch": 0.6944026934407503, - "grad_norm": 1.5833698402345409, - "learning_rate": 9.020910924263054e-07, - "loss": 1.0285, - "step": 5775 - }, - { - "epoch": 0.6945229363313894, - "grad_norm": 0.9115132378889691, - "learning_rate": 9.014400705035261e-07, - "loss": 0.8182, - "step": 5776 - }, - { - "epoch": 0.6946431792220285, - "grad_norm": 1.8339430994298627, - "learning_rate": 9.00789215231414e-07, - "loss": 0.9672, - "step": 5777 - }, - { - "epoch": 0.6947634221126676, - "grad_norm": 1.6105779947831347, - "learning_rate": 9.001385267087056e-07, - "loss": 1.0189, - "step": 5778 - }, - { - "epoch": 0.6948836650033067, - "grad_norm": 1.614886355933809, - "learning_rate": 8.994880050341072e-07, - "loss": 0.9022, - "step": 5779 - }, - { - "epoch": 0.6950039078939457, - "grad_norm": 1.6659694007820314, - "learning_rate": 8.988376503063026e-07, - "loss": 0.9799, - "step": 5780 - }, - { - "epoch": 0.6951241507845849, - "grad_norm": 1.8461351645881607, - "learning_rate": 8.981874626239521e-07, - "loss": 1.0169, - "step": 5781 - }, - { - "epoch": 0.695244393675224, - "grad_norm": 1.9808042945493263, - "learning_rate": 8.975374420856872e-07, - "loss": 1.0846, - "step": 5782 - }, - { - "epoch": 0.695364636565863, - "grad_norm": 2.140613639411574, - "learning_rate": 8.968875887901157e-07, - "loss": 0.92, - "step": 5783 - }, - { - "epoch": 0.6954848794565022, - "grad_norm": 1.8277911019862199, - "learning_rate": 8.9623790283582e-07, - "loss": 0.8323, - "step": 5784 - }, - { - "epoch": 0.6956051223471412, - "grad_norm": 1.9190780011377035, - "learning_rate": 8.955883843213561e-07, - "loss": 0.9626, - "step": 5785 - }, - { - "epoch": 0.6957253652377803, - "grad_norm": 1.7439656442180536, - "learning_rate": 8.949390333452569e-07, - "loss": 1.0736, - "step": 5786 - }, - { - "epoch": 0.6958456081284194, - "grad_norm": 1.7585636822686053, - "learning_rate": 8.942898500060279e-07, - "loss": 0.8769, - "step": 5787 - }, - { - "epoch": 0.6959658510190585, - "grad_norm": 2.3986261647727685, - "learning_rate": 8.936408344021493e-07, - "loss": 0.918, - "step": 5788 - }, - { - "epoch": 0.6960860939096976, - "grad_norm": 1.887112973177588, - "learning_rate": 8.929919866320765e-07, - "loss": 0.9072, - "step": 5789 - }, - { - "epoch": 0.6962063368003367, - "grad_norm": 2.2077937876562377, - "learning_rate": 8.923433067942385e-07, - "loss": 1.0064, - "step": 5790 - }, - { - "epoch": 0.6963265796909758, - "grad_norm": 1.661440794402757, - "learning_rate": 8.916947949870417e-07, - "loss": 0.8866, - "step": 5791 - }, - { - "epoch": 0.6964468225816148, - "grad_norm": 0.9424413791305718, - "learning_rate": 8.910464513088615e-07, - "loss": 0.8275, - "step": 5792 - }, - { - "epoch": 0.696567065472254, - "grad_norm": 1.7025305034128668, - "learning_rate": 8.903982758580542e-07, - "loss": 0.9874, - "step": 5793 - }, - { - "epoch": 0.696687308362893, - "grad_norm": 1.7922819397046887, - "learning_rate": 8.897502687329457e-07, - "loss": 1.0104, - "step": 5794 - }, - { - "epoch": 0.6968075512535321, - "grad_norm": 1.6884537580019083, - "learning_rate": 8.891024300318382e-07, - "loss": 1.0007, - "step": 5795 - }, - { - "epoch": 0.6969277941441713, - "grad_norm": 1.836172275547227, - "learning_rate": 8.884547598530103e-07, - "loss": 0.9532, - "step": 5796 - }, - { - "epoch": 0.6970480370348103, - "grad_norm": 1.843794999947406, - "learning_rate": 8.8780725829471e-07, - "loss": 0.948, - "step": 5797 - }, - { - "epoch": 0.6971682799254494, - "grad_norm": 1.755941386954287, - "learning_rate": 8.87159925455165e-07, - "loss": 0.9693, - "step": 5798 - }, - { - "epoch": 0.6972885228160886, - "grad_norm": 2.4646305530073636, - "learning_rate": 8.865127614325738e-07, - "loss": 0.9317, - "step": 5799 - }, - { - "epoch": 0.6974087657067276, - "grad_norm": 1.8082260516055049, - "learning_rate": 8.85865766325113e-07, - "loss": 0.8661, - "step": 5800 - }, - { - "epoch": 0.6975290085973667, - "grad_norm": 2.8788257520335074, - "learning_rate": 8.852189402309287e-07, - "loss": 0.9248, - "step": 5801 - }, - { - "epoch": 0.6976492514880057, - "grad_norm": 2.245546808407698, - "learning_rate": 8.845722832481441e-07, - "loss": 0.9347, - "step": 5802 - }, - { - "epoch": 0.6977694943786449, - "grad_norm": 1.767950838531776, - "learning_rate": 8.83925795474858e-07, - "loss": 0.9745, - "step": 5803 - }, - { - "epoch": 0.6978897372692839, - "grad_norm": 2.170503251747098, - "learning_rate": 8.832794770091414e-07, - "loss": 0.8044, - "step": 5804 - }, - { - "epoch": 0.698009980159923, - "grad_norm": 2.142623080231432, - "learning_rate": 8.826333279490401e-07, - "loss": 1.0233, - "step": 5805 - }, - { - "epoch": 0.6981302230505622, - "grad_norm": 2.1061976485722025, - "learning_rate": 8.819873483925748e-07, - "loss": 0.8804, - "step": 5806 - }, - { - "epoch": 0.6982504659412012, - "grad_norm": 2.28546077948821, - "learning_rate": 8.81341538437739e-07, - "loss": 0.951, - "step": 5807 - }, - { - "epoch": 0.6983707088318403, - "grad_norm": 1.7397913855974716, - "learning_rate": 8.80695898182503e-07, - "loss": 0.8813, - "step": 5808 - }, - { - "epoch": 0.6984909517224794, - "grad_norm": 1.115633971448029, - "learning_rate": 8.800504277248093e-07, - "loss": 0.8838, - "step": 5809 - }, - { - "epoch": 0.6986111946131185, - "grad_norm": 1.6940622246043087, - "learning_rate": 8.794051271625753e-07, - "loss": 0.9462, - "step": 5810 - }, - { - "epoch": 0.6987314375037575, - "grad_norm": 1.4984443067728204, - "learning_rate": 8.787599965936925e-07, - "loss": 1.0262, - "step": 5811 - }, - { - "epoch": 0.6988516803943967, - "grad_norm": 1.5748877227969313, - "learning_rate": 8.781150361160261e-07, - "loss": 0.9198, - "step": 5812 - }, - { - "epoch": 0.6989719232850358, - "grad_norm": 1.6996822032084957, - "learning_rate": 8.774702458274181e-07, - "loss": 0.9314, - "step": 5813 - }, - { - "epoch": 0.6990921661756748, - "grad_norm": 4.008905748720469, - "learning_rate": 8.768256258256799e-07, - "loss": 0.9037, - "step": 5814 - }, - { - "epoch": 0.699212409066314, - "grad_norm": 1.588897273383141, - "learning_rate": 8.76181176208602e-07, - "loss": 0.9436, - "step": 5815 - }, - { - "epoch": 0.699332651956953, - "grad_norm": 1.6853595863864794, - "learning_rate": 8.755368970739461e-07, - "loss": 0.929, - "step": 5816 - }, - { - "epoch": 0.6994528948475921, - "grad_norm": 2.5402757768279542, - "learning_rate": 8.748927885194479e-07, - "loss": 0.8162, - "step": 5817 - }, - { - "epoch": 0.6995731377382313, - "grad_norm": 0.8048322602956948, - "learning_rate": 8.742488506428209e-07, - "loss": 0.781, - "step": 5818 - }, - { - "epoch": 0.6996933806288703, - "grad_norm": 1.6523118613482444, - "learning_rate": 8.736050835417466e-07, - "loss": 0.9862, - "step": 5819 - }, - { - "epoch": 0.6998136235195094, - "grad_norm": 1.8486350049924267, - "learning_rate": 8.729614873138862e-07, - "loss": 0.8202, - "step": 5820 - }, - { - "epoch": 0.6999338664101485, - "grad_norm": 1.7520702922515634, - "learning_rate": 8.723180620568716e-07, - "loss": 0.9792, - "step": 5821 - }, - { - "epoch": 0.7000541093007876, - "grad_norm": 1.7113598863919135, - "learning_rate": 8.716748078683116e-07, - "loss": 1.0622, - "step": 5822 - }, - { - "epoch": 0.7001743521914267, - "grad_norm": 2.2636025708626324, - "learning_rate": 8.710317248457855e-07, - "loss": 0.8837, - "step": 5823 - }, - { - "epoch": 0.7002945950820658, - "grad_norm": 2.8216722755654278, - "learning_rate": 8.703888130868482e-07, - "loss": 0.913, - "step": 5824 - }, - { - "epoch": 0.7004148379727049, - "grad_norm": 1.7955845718798513, - "learning_rate": 8.697460726890307e-07, - "loss": 1.0187, - "step": 5825 - }, - { - "epoch": 0.7005350808633439, - "grad_norm": 2.192160192796724, - "learning_rate": 8.691035037498354e-07, - "loss": 1.1008, - "step": 5826 - }, - { - "epoch": 0.7006553237539831, - "grad_norm": 1.5159730806502152, - "learning_rate": 8.684611063667391e-07, - "loss": 0.9283, - "step": 5827 - }, - { - "epoch": 0.7007755666446221, - "grad_norm": 1.7765252209243876, - "learning_rate": 8.678188806371935e-07, - "loss": 0.964, - "step": 5828 - }, - { - "epoch": 0.7008958095352612, - "grad_norm": 1.5564585044321892, - "learning_rate": 8.671768266586228e-07, - "loss": 1.0506, - "step": 5829 - }, - { - "epoch": 0.7010160524259004, - "grad_norm": 1.5824646909369364, - "learning_rate": 8.665349445284275e-07, - "loss": 0.9818, - "step": 5830 - }, - { - "epoch": 0.7011362953165394, - "grad_norm": 1.4623553082220995, - "learning_rate": 8.658932343439799e-07, - "loss": 1.0049, - "step": 5831 - }, - { - "epoch": 0.7012565382071785, - "grad_norm": 1.7799283528642258, - "learning_rate": 8.65251696202627e-07, - "loss": 0.9673, - "step": 5832 - }, - { - "epoch": 0.7013767810978175, - "grad_norm": 3.567540000044359, - "learning_rate": 8.646103302016896e-07, - "loss": 1.077, - "step": 5833 - }, - { - "epoch": 0.7014970239884567, - "grad_norm": 1.6254860811227225, - "learning_rate": 8.639691364384614e-07, - "loss": 1.0876, - "step": 5834 - }, - { - "epoch": 0.7016172668790958, - "grad_norm": 1.9357525112880225, - "learning_rate": 8.633281150102136e-07, - "loss": 0.9253, - "step": 5835 - }, - { - "epoch": 0.7017375097697348, - "grad_norm": 2.125808337682871, - "learning_rate": 8.626872660141855e-07, - "loss": 0.879, - "step": 5836 - }, - { - "epoch": 0.701857752660374, - "grad_norm": 1.8243270286429278, - "learning_rate": 8.620465895475957e-07, - "loss": 0.9431, - "step": 5837 - }, - { - "epoch": 0.701977995551013, - "grad_norm": 1.4222486235648308, - "learning_rate": 8.614060857076333e-07, - "loss": 0.9543, - "step": 5838 - }, - { - "epoch": 0.7020982384416521, - "grad_norm": 1.7445510090974399, - "learning_rate": 8.60765754591462e-07, - "loss": 0.9423, - "step": 5839 - }, - { - "epoch": 0.7022184813322913, - "grad_norm": 1.7459619800933228, - "learning_rate": 8.601255962962211e-07, - "loss": 0.9327, - "step": 5840 - }, - { - "epoch": 0.7023387242229303, - "grad_norm": 2.4927225046888153, - "learning_rate": 8.594856109190194e-07, - "loss": 0.9242, - "step": 5841 - }, - { - "epoch": 0.7024589671135694, - "grad_norm": 1.4428241651103546, - "learning_rate": 8.588457985569446e-07, - "loss": 0.8949, - "step": 5842 - }, - { - "epoch": 0.7025792100042085, - "grad_norm": 2.0206015018001815, - "learning_rate": 8.582061593070542e-07, - "loss": 0.9066, - "step": 5843 - }, - { - "epoch": 0.7026994528948476, - "grad_norm": 1.945343507573219, - "learning_rate": 8.57566693266383e-07, - "loss": 0.9695, - "step": 5844 - }, - { - "epoch": 0.7028196957854866, - "grad_norm": 2.002566598077659, - "learning_rate": 8.569274005319354e-07, - "loss": 0.8879, - "step": 5845 - }, - { - "epoch": 0.7029399386761258, - "grad_norm": 1.763288183709922, - "learning_rate": 8.562882812006913e-07, - "loss": 1.0009, - "step": 5846 - }, - { - "epoch": 0.7030601815667649, - "grad_norm": 1.6274644796828577, - "learning_rate": 8.556493353696066e-07, - "loss": 0.9637, - "step": 5847 - }, - { - "epoch": 0.7031804244574039, - "grad_norm": 2.011040493595871, - "learning_rate": 8.550105631356077e-07, - "loss": 0.8796, - "step": 5848 - }, - { - "epoch": 0.7033006673480431, - "grad_norm": 1.8743959089526778, - "learning_rate": 8.543719645955961e-07, - "loss": 0.9659, - "step": 5849 - }, - { - "epoch": 0.7034209102386821, - "grad_norm": 1.6856058581166702, - "learning_rate": 8.537335398464467e-07, - "loss": 0.9385, - "step": 5850 - }, - { - "epoch": 0.7035411531293212, - "grad_norm": 2.5223458116512667, - "learning_rate": 8.53095288985007e-07, - "loss": 1.055, - "step": 5851 - }, - { - "epoch": 0.7036613960199604, - "grad_norm": 1.5017413522069047, - "learning_rate": 8.524572121081009e-07, - "loss": 1.0218, - "step": 5852 - }, - { - "epoch": 0.7037816389105994, - "grad_norm": 1.9736137861102012, - "learning_rate": 8.518193093125232e-07, - "loss": 0.8278, - "step": 5853 - }, - { - "epoch": 0.7039018818012385, - "grad_norm": 1.5740548704052177, - "learning_rate": 8.511815806950436e-07, - "loss": 0.9963, - "step": 5854 - }, - { - "epoch": 0.7040221246918776, - "grad_norm": 1.5507614172909236, - "learning_rate": 8.505440263524044e-07, - "loss": 0.9774, - "step": 5855 - }, - { - "epoch": 0.7041423675825167, - "grad_norm": 2.570100259569319, - "learning_rate": 8.49906646381322e-07, - "loss": 1.0771, - "step": 5856 - }, - { - "epoch": 0.7042626104731557, - "grad_norm": 1.63791803745991, - "learning_rate": 8.492694408784884e-07, - "loss": 0.9132, - "step": 5857 - }, - { - "epoch": 0.7043828533637949, - "grad_norm": 2.2191809652080097, - "learning_rate": 8.486324099405642e-07, - "loss": 0.8228, - "step": 5858 - }, - { - "epoch": 0.704503096254434, - "grad_norm": 1.530781265450361, - "learning_rate": 8.479955536641887e-07, - "loss": 0.9497, - "step": 5859 - }, - { - "epoch": 0.704623339145073, - "grad_norm": 1.6193452896696532, - "learning_rate": 8.473588721459716e-07, - "loss": 0.8619, - "step": 5860 - }, - { - "epoch": 0.7047435820357122, - "grad_norm": 2.367890810588432, - "learning_rate": 8.467223654824967e-07, - "loss": 0.9039, - "step": 5861 - }, - { - "epoch": 0.7048638249263512, - "grad_norm": 1.9806033357715607, - "learning_rate": 8.460860337703233e-07, - "loss": 0.8321, - "step": 5862 - }, - { - "epoch": 0.7049840678169903, - "grad_norm": 1.6698441336756176, - "learning_rate": 8.454498771059797e-07, - "loss": 0.9082, - "step": 5863 - }, - { - "epoch": 0.7051043107076294, - "grad_norm": 1.8991299845807659, - "learning_rate": 8.448138955859725e-07, - "loss": 1.0293, - "step": 5864 - }, - { - "epoch": 0.7052245535982685, - "grad_norm": 1.8550410048690253, - "learning_rate": 8.44178089306778e-07, - "loss": 1.1, - "step": 5865 - }, - { - "epoch": 0.7053447964889076, - "grad_norm": 2.0073666801220837, - "learning_rate": 8.4354245836485e-07, - "loss": 0.9732, - "step": 5866 - }, - { - "epoch": 0.7054650393795466, - "grad_norm": 1.9395399122229882, - "learning_rate": 8.429070028566108e-07, - "loss": 0.9256, - "step": 5867 - }, - { - "epoch": 0.7055852822701858, - "grad_norm": 1.8000971555980763, - "learning_rate": 8.422717228784586e-07, - "loss": 0.9481, - "step": 5868 - }, - { - "epoch": 0.7057055251608249, - "grad_norm": 2.081398472344373, - "learning_rate": 8.416366185267663e-07, - "loss": 0.8908, - "step": 5869 - }, - { - "epoch": 0.7058257680514639, - "grad_norm": 1.7453378154725256, - "learning_rate": 8.410016898978778e-07, - "loss": 0.975, - "step": 5870 - }, - { - "epoch": 0.7059460109421031, - "grad_norm": 1.589227430398496, - "learning_rate": 8.403669370881115e-07, - "loss": 0.9899, - "step": 5871 - }, - { - "epoch": 0.7060662538327421, - "grad_norm": 1.6912865717529209, - "learning_rate": 8.397323601937587e-07, - "loss": 0.9718, - "step": 5872 - }, - { - "epoch": 0.7061864967233812, - "grad_norm": 1.7802354031181205, - "learning_rate": 8.390979593110838e-07, - "loss": 0.9707, - "step": 5873 - }, - { - "epoch": 0.7063067396140204, - "grad_norm": 1.4763916562663415, - "learning_rate": 8.384637345363262e-07, - "loss": 1.0065, - "step": 5874 - }, - { - "epoch": 0.7064269825046594, - "grad_norm": 1.6595725076666958, - "learning_rate": 8.378296859656964e-07, - "loss": 0.9637, - "step": 5875 - }, - { - "epoch": 0.7065472253952985, - "grad_norm": 2.1354534859313072, - "learning_rate": 8.371958136953792e-07, - "loss": 0.8739, - "step": 5876 - }, - { - "epoch": 0.7066674682859376, - "grad_norm": 2.686265617598572, - "learning_rate": 8.365621178215326e-07, - "loss": 0.8654, - "step": 5877 - }, - { - "epoch": 0.7067877111765767, - "grad_norm": 1.890914453464687, - "learning_rate": 8.359285984402871e-07, - "loss": 0.9464, - "step": 5878 - }, - { - "epoch": 0.7069079540672157, - "grad_norm": 1.9459930822674858, - "learning_rate": 8.352952556477489e-07, - "loss": 0.944, - "step": 5879 - }, - { - "epoch": 0.7070281969578549, - "grad_norm": 1.841770855587352, - "learning_rate": 8.34662089539993e-07, - "loss": 0.9751, - "step": 5880 - }, - { - "epoch": 0.707148439848494, - "grad_norm": 1.7959561196087672, - "learning_rate": 8.340291002130722e-07, - "loss": 0.988, - "step": 5881 - }, - { - "epoch": 0.707268682739133, - "grad_norm": 2.108457012343479, - "learning_rate": 8.3339628776301e-07, - "loss": 0.9955, - "step": 5882 - }, - { - "epoch": 0.7073889256297722, - "grad_norm": 1.7254798912704932, - "learning_rate": 8.327636522858033e-07, - "loss": 0.767, - "step": 5883 - }, - { - "epoch": 0.7075091685204112, - "grad_norm": 1.8673439758367036, - "learning_rate": 8.321311938774225e-07, - "loss": 0.9623, - "step": 5884 - }, - { - "epoch": 0.7076294114110503, - "grad_norm": 1.7772927748527942, - "learning_rate": 8.314989126338104e-07, - "loss": 0.9866, - "step": 5885 - }, - { - "epoch": 0.7077496543016895, - "grad_norm": 1.5780878345909908, - "learning_rate": 8.308668086508847e-07, - "loss": 1.0428, - "step": 5886 - }, - { - "epoch": 0.7078698971923285, - "grad_norm": 1.8409910087469492, - "learning_rate": 8.302348820245342e-07, - "loss": 0.9376, - "step": 5887 - }, - { - "epoch": 0.7079901400829676, - "grad_norm": 2.57114496635026, - "learning_rate": 8.296031328506232e-07, - "loss": 0.8997, - "step": 5888 - }, - { - "epoch": 0.7081103829736067, - "grad_norm": 2.038638366786382, - "learning_rate": 8.289715612249857e-07, - "loss": 0.9568, - "step": 5889 - }, - { - "epoch": 0.7082306258642458, - "grad_norm": 3.1028754772657234, - "learning_rate": 8.283401672434305e-07, - "loss": 0.968, - "step": 5890 - }, - { - "epoch": 0.7083508687548848, - "grad_norm": 2.5043762051660594, - "learning_rate": 8.277089510017412e-07, - "loss": 0.9102, - "step": 5891 - }, - { - "epoch": 0.708471111645524, - "grad_norm": 1.7374944041868705, - "learning_rate": 8.270779125956719e-07, - "loss": 1.0217, - "step": 5892 - }, - { - "epoch": 0.7085913545361631, - "grad_norm": 2.094458389010949, - "learning_rate": 8.264470521209505e-07, - "loss": 0.9943, - "step": 5893 - }, - { - "epoch": 0.7087115974268021, - "grad_norm": 1.9759637581671603, - "learning_rate": 8.258163696732785e-07, - "loss": 0.9607, - "step": 5894 - }, - { - "epoch": 0.7088318403174413, - "grad_norm": 1.6903696738200589, - "learning_rate": 8.251858653483288e-07, - "loss": 0.9726, - "step": 5895 - }, - { - "epoch": 0.7089520832080803, - "grad_norm": 1.9361770173830213, - "learning_rate": 8.245555392417501e-07, - "loss": 1.0569, - "step": 5896 - }, - { - "epoch": 0.7090723260987194, - "grad_norm": 1.7264051408821968, - "learning_rate": 8.239253914491613e-07, - "loss": 0.9864, - "step": 5897 - }, - { - "epoch": 0.7091925689893585, - "grad_norm": 1.723972293608998, - "learning_rate": 8.232954220661556e-07, - "loss": 0.9479, - "step": 5898 - }, - { - "epoch": 0.7093128118799976, - "grad_norm": 2.0495587874331713, - "learning_rate": 8.226656311882989e-07, - "loss": 0.8889, - "step": 5899 - }, - { - "epoch": 0.7094330547706367, - "grad_norm": 2.2961161910573455, - "learning_rate": 8.22036018911129e-07, - "loss": 0.9599, - "step": 5900 - }, - { - "epoch": 0.7095532976612757, - "grad_norm": 1.8449047158093692, - "learning_rate": 8.214065853301599e-07, - "loss": 0.9966, - "step": 5901 - }, - { - "epoch": 0.7096735405519149, - "grad_norm": 1.0362664944540454, - "learning_rate": 8.207773305408734e-07, - "loss": 0.7991, - "step": 5902 - }, - { - "epoch": 0.709793783442554, - "grad_norm": 2.071794778182741, - "learning_rate": 8.201482546387288e-07, - "loss": 0.9974, - "step": 5903 - }, - { - "epoch": 0.709914026333193, - "grad_norm": 1.6394199850619897, - "learning_rate": 8.195193577191553e-07, - "loss": 1.1149, - "step": 5904 - }, - { - "epoch": 0.7100342692238322, - "grad_norm": 1.5771748921446147, - "learning_rate": 8.188906398775579e-07, - "loss": 1.0379, - "step": 5905 - }, - { - "epoch": 0.7101545121144712, - "grad_norm": 2.3980223454825422, - "learning_rate": 8.18262101209311e-07, - "loss": 0.8876, - "step": 5906 - }, - { - "epoch": 0.7102747550051103, - "grad_norm": 1.7760253711597973, - "learning_rate": 8.176337418097626e-07, - "loss": 0.9003, - "step": 5907 - }, - { - "epoch": 0.7103949978957494, - "grad_norm": 1.7410850286825916, - "learning_rate": 8.170055617742364e-07, - "loss": 0.9961, - "step": 5908 - }, - { - "epoch": 0.7105152407863885, - "grad_norm": 1.669980553382445, - "learning_rate": 8.163775611980252e-07, - "loss": 0.9111, - "step": 5909 - }, - { - "epoch": 0.7106354836770276, - "grad_norm": 1.5393887091834297, - "learning_rate": 8.157497401763982e-07, - "loss": 0.9845, - "step": 5910 - }, - { - "epoch": 0.7107557265676667, - "grad_norm": 1.8851197221412836, - "learning_rate": 8.151220988045935e-07, - "loss": 0.9751, - "step": 5911 - }, - { - "epoch": 0.7108759694583058, - "grad_norm": 1.685537680803067, - "learning_rate": 8.144946371778234e-07, - "loss": 1.0263, - "step": 5912 - }, - { - "epoch": 0.7109962123489448, - "grad_norm": 1.7252994940788682, - "learning_rate": 8.138673553912751e-07, - "loss": 0.9724, - "step": 5913 - }, - { - "epoch": 0.711116455239584, - "grad_norm": 2.353384535055082, - "learning_rate": 8.132402535401059e-07, - "loss": 0.7695, - "step": 5914 - }, - { - "epoch": 0.711236698130223, - "grad_norm": 1.6759920854586454, - "learning_rate": 8.126133317194465e-07, - "loss": 0.9444, - "step": 5915 - }, - { - "epoch": 0.7113569410208621, - "grad_norm": 1.7327722240338748, - "learning_rate": 8.11986590024401e-07, - "loss": 0.9382, - "step": 5916 - }, - { - "epoch": 0.7114771839115013, - "grad_norm": 1.4838271886966916, - "learning_rate": 8.113600285500442e-07, - "loss": 0.8768, - "step": 5917 - }, - { - "epoch": 0.7115974268021403, - "grad_norm": 1.696824414814716, - "learning_rate": 8.107336473914268e-07, - "loss": 0.9388, - "step": 5918 - }, - { - "epoch": 0.7117176696927794, - "grad_norm": 0.855279840661868, - "learning_rate": 8.101074466435694e-07, - "loss": 0.8005, - "step": 5919 - }, - { - "epoch": 0.7118379125834186, - "grad_norm": 1.5427108355941244, - "learning_rate": 8.094814264014662e-07, - "loss": 0.8758, - "step": 5920 - }, - { - "epoch": 0.7119581554740576, - "grad_norm": 2.038913063624358, - "learning_rate": 8.088555867600844e-07, - "loss": 1.0223, - "step": 5921 - }, - { - "epoch": 0.7120783983646967, - "grad_norm": 2.7562531555572924, - "learning_rate": 8.08229927814362e-07, - "loss": 0.8085, - "step": 5922 - }, - { - "epoch": 0.7121986412553358, - "grad_norm": 1.9253940773404963, - "learning_rate": 8.076044496592134e-07, - "loss": 0.8419, - "step": 5923 - }, - { - "epoch": 0.7123188841459749, - "grad_norm": 1.8692695150709955, - "learning_rate": 8.069791523895204e-07, - "loss": 0.9735, - "step": 5924 - }, - { - "epoch": 0.7124391270366139, - "grad_norm": 1.9085207062842986, - "learning_rate": 8.063540361001422e-07, - "loss": 0.9811, - "step": 5925 - }, - { - "epoch": 0.7125593699272531, - "grad_norm": 1.8559423866085603, - "learning_rate": 8.057291008859069e-07, - "loss": 0.9952, - "step": 5926 - }, - { - "epoch": 0.7126796128178922, - "grad_norm": 2.061859840691173, - "learning_rate": 8.051043468416187e-07, - "loss": 0.8756, - "step": 5927 - }, - { - "epoch": 0.7127998557085312, - "grad_norm": 1.758103749200681, - "learning_rate": 8.044797740620506e-07, - "loss": 1.0261, - "step": 5928 - }, - { - "epoch": 0.7129200985991703, - "grad_norm": 2.07315731728372, - "learning_rate": 8.038553826419494e-07, - "loss": 0.9792, - "step": 5929 - }, - { - "epoch": 0.7130403414898094, - "grad_norm": 2.5462920100885658, - "learning_rate": 8.032311726760364e-07, - "loss": 1.0005, - "step": 5930 - }, - { - "epoch": 0.7131605843804485, - "grad_norm": 1.7607680502381629, - "learning_rate": 8.026071442590022e-07, - "loss": 0.8955, - "step": 5931 - }, - { - "epoch": 0.7132808272710875, - "grad_norm": 2.1847783187041845, - "learning_rate": 8.019832974855134e-07, - "loss": 1.0131, - "step": 5932 - }, - { - "epoch": 0.7134010701617267, - "grad_norm": 2.168686756454152, - "learning_rate": 8.013596324502052e-07, - "loss": 1.0203, - "step": 5933 - }, - { - "epoch": 0.7135213130523658, - "grad_norm": 2.0896728445742587, - "learning_rate": 8.007361492476872e-07, - "loss": 0.9832, - "step": 5934 - }, - { - "epoch": 0.7136415559430048, - "grad_norm": 1.5622541615685122, - "learning_rate": 8.001128479725426e-07, - "loss": 0.9841, - "step": 5935 - }, - { - "epoch": 0.713761798833644, - "grad_norm": 1.4713961524936927, - "learning_rate": 7.994897287193248e-07, - "loss": 0.9985, - "step": 5936 - }, - { - "epoch": 0.713882041724283, - "grad_norm": 2.083937353946159, - "learning_rate": 7.988667915825605e-07, - "loss": 1.0431, - "step": 5937 - }, - { - "epoch": 0.7140022846149221, - "grad_norm": 1.956579708309442, - "learning_rate": 7.982440366567491e-07, - "loss": 0.9536, - "step": 5938 - }, - { - "epoch": 0.7141225275055613, - "grad_norm": 1.6702917570533657, - "learning_rate": 7.97621464036361e-07, - "loss": 0.95, - "step": 5939 - }, - { - "epoch": 0.7142427703962003, - "grad_norm": 3.270980167535213, - "learning_rate": 7.969990738158417e-07, - "loss": 0.8759, - "step": 5940 - }, - { - "epoch": 0.7143630132868394, - "grad_norm": 1.7125800622198484, - "learning_rate": 7.963768660896062e-07, - "loss": 1.0436, - "step": 5941 - }, - { - "epoch": 0.7144832561774785, - "grad_norm": 1.725176362683201, - "learning_rate": 7.957548409520432e-07, - "loss": 1.0181, - "step": 5942 - }, - { - "epoch": 0.7146034990681176, - "grad_norm": 1.9717901462488638, - "learning_rate": 7.951329984975135e-07, - "loss": 1.0435, - "step": 5943 - }, - { - "epoch": 0.7147237419587567, - "grad_norm": 0.756366687631267, - "learning_rate": 7.94511338820349e-07, - "loss": 0.7578, - "step": 5944 - }, - { - "epoch": 0.7148439848493958, - "grad_norm": 2.048169141892649, - "learning_rate": 7.938898620148575e-07, - "loss": 0.9811, - "step": 5945 - }, - { - "epoch": 0.7149642277400349, - "grad_norm": 1.773530260256478, - "learning_rate": 7.932685681753135e-07, - "loss": 0.9091, - "step": 5946 - }, - { - "epoch": 0.7150844706306739, - "grad_norm": 1.6866982992822142, - "learning_rate": 7.92647457395969e-07, - "loss": 0.8248, - "step": 5947 - }, - { - "epoch": 0.7152047135213131, - "grad_norm": 2.0891995301595414, - "learning_rate": 7.920265297710444e-07, - "loss": 0.9395, - "step": 5948 - }, - { - "epoch": 0.7153249564119522, - "grad_norm": 1.7914096145165745, - "learning_rate": 7.914057853947363e-07, - "loss": 0.925, - "step": 5949 - }, - { - "epoch": 0.7154451993025912, - "grad_norm": 1.9271016575397448, - "learning_rate": 7.907852243612089e-07, - "loss": 0.8302, - "step": 5950 - }, - { - "epoch": 0.7155654421932304, - "grad_norm": 1.866683253430288, - "learning_rate": 7.901648467646009e-07, - "loss": 0.9244, - "step": 5951 - }, - { - "epoch": 0.7156856850838694, - "grad_norm": 3.2441525306919554, - "learning_rate": 7.895446526990244e-07, - "loss": 0.9191, - "step": 5952 - }, - { - "epoch": 0.7158059279745085, - "grad_norm": 1.4854721030456957, - "learning_rate": 7.889246422585609e-07, - "loss": 0.9521, - "step": 5953 - }, - { - "epoch": 0.7159261708651476, - "grad_norm": 1.7776088642632415, - "learning_rate": 7.883048155372675e-07, - "loss": 0.94, - "step": 5954 - }, - { - "epoch": 0.7160464137557867, - "grad_norm": 2.0699410735141823, - "learning_rate": 7.876851726291698e-07, - "loss": 0.9064, - "step": 5955 - }, - { - "epoch": 0.7161666566464258, - "grad_norm": 1.8232370876083601, - "learning_rate": 7.870657136282666e-07, - "loss": 0.9798, - "step": 5956 - }, - { - "epoch": 0.7162868995370649, - "grad_norm": 1.4269371050612347, - "learning_rate": 7.86446438628531e-07, - "loss": 1.0155, - "step": 5957 - }, - { - "epoch": 0.716407142427704, - "grad_norm": 0.8360224529130481, - "learning_rate": 7.858273477239059e-07, - "loss": 0.7939, - "step": 5958 - }, - { - "epoch": 0.716527385318343, - "grad_norm": 1.5881960716993526, - "learning_rate": 7.852084410083067e-07, - "loss": 0.9108, - "step": 5959 - }, - { - "epoch": 0.7166476282089821, - "grad_norm": 1.7420158130571848, - "learning_rate": 7.84589718575621e-07, - "loss": 0.8356, - "step": 5960 - }, - { - "epoch": 0.7167678710996213, - "grad_norm": 1.870621081958954, - "learning_rate": 7.83971180519708e-07, - "loss": 0.8843, - "step": 5961 - }, - { - "epoch": 0.7168881139902603, - "grad_norm": 1.9122429120170183, - "learning_rate": 7.833528269344008e-07, - "loss": 0.946, - "step": 5962 - }, - { - "epoch": 0.7170083568808994, - "grad_norm": 2.1142327780706087, - "learning_rate": 7.827346579135023e-07, - "loss": 0.9802, - "step": 5963 - }, - { - "epoch": 0.7171285997715385, - "grad_norm": 1.758279424747939, - "learning_rate": 7.821166735507885e-07, - "loss": 1.0296, - "step": 5964 - }, - { - "epoch": 0.7172488426621776, - "grad_norm": 2.263692518375458, - "learning_rate": 7.81498873940007e-07, - "loss": 0.8872, - "step": 5965 - }, - { - "epoch": 0.7173690855528166, - "grad_norm": 2.020776597108132, - "learning_rate": 7.808812591748768e-07, - "loss": 0.9669, - "step": 5966 - }, - { - "epoch": 0.7174893284434558, - "grad_norm": 1.8039136275995127, - "learning_rate": 7.802638293490915e-07, - "loss": 0.8463, - "step": 5967 - }, - { - "epoch": 0.7176095713340949, - "grad_norm": 1.5810229443761785, - "learning_rate": 7.796465845563123e-07, - "loss": 0.9791, - "step": 5968 - }, - { - "epoch": 0.7177298142247339, - "grad_norm": 1.7518062806439427, - "learning_rate": 7.790295248901766e-07, - "loss": 1.0017, - "step": 5969 - }, - { - "epoch": 0.7178500571153731, - "grad_norm": 1.5992724165043841, - "learning_rate": 7.784126504442902e-07, - "loss": 0.8271, - "step": 5970 - }, - { - "epoch": 0.7179703000060121, - "grad_norm": 1.3586830482727594, - "learning_rate": 7.777959613122351e-07, - "loss": 0.8711, - "step": 5971 - }, - { - "epoch": 0.7180905428966512, - "grad_norm": 1.534636431329628, - "learning_rate": 7.771794575875604e-07, - "loss": 0.9777, - "step": 5972 - }, - { - "epoch": 0.7182107857872904, - "grad_norm": 2.098251059537466, - "learning_rate": 7.765631393637888e-07, - "loss": 0.9793, - "step": 5973 - }, - { - "epoch": 0.7183310286779294, - "grad_norm": 11.438017619776273, - "learning_rate": 7.75947006734417e-07, - "loss": 0.6834, - "step": 5974 - }, - { - "epoch": 0.7184512715685685, - "grad_norm": 2.069117247838492, - "learning_rate": 7.753310597929101e-07, - "loss": 1.0199, - "step": 5975 - }, - { - "epoch": 0.7185715144592076, - "grad_norm": 0.8200822779617623, - "learning_rate": 7.747152986327095e-07, - "loss": 0.7776, - "step": 5976 - }, - { - "epoch": 0.7186917573498467, - "grad_norm": 1.7604079688344516, - "learning_rate": 7.740997233472228e-07, - "loss": 0.878, - "step": 5977 - }, - { - "epoch": 0.7188120002404857, - "grad_norm": 2.198010398951221, - "learning_rate": 7.734843340298329e-07, - "loss": 0.9036, - "step": 5978 - }, - { - "epoch": 0.7189322431311249, - "grad_norm": 2.16598040153957, - "learning_rate": 7.72869130773895e-07, - "loss": 0.9552, - "step": 5979 - }, - { - "epoch": 0.719052486021764, - "grad_norm": 0.8302070068833792, - "learning_rate": 7.722541136727343e-07, - "loss": 0.806, - "step": 5980 - }, - { - "epoch": 0.719172728912403, - "grad_norm": 1.8687974062619108, - "learning_rate": 7.716392828196483e-07, - "loss": 1.0106, - "step": 5981 - }, - { - "epoch": 0.7192929718030422, - "grad_norm": 2.6726934060750773, - "learning_rate": 7.710246383079064e-07, - "loss": 0.9657, - "step": 5982 - }, - { - "epoch": 0.7194132146936812, - "grad_norm": 2.258435331718066, - "learning_rate": 7.704101802307492e-07, - "loss": 1.1188, - "step": 5983 - }, - { - "epoch": 0.7195334575843203, - "grad_norm": 1.9451432365100447, - "learning_rate": 7.697959086813912e-07, - "loss": 1.0776, - "step": 5984 - }, - { - "epoch": 0.7196537004749595, - "grad_norm": 1.5957716293291473, - "learning_rate": 7.691818237530145e-07, - "loss": 0.9963, - "step": 5985 - }, - { - "epoch": 0.7197739433655985, - "grad_norm": 2.022028133955019, - "learning_rate": 7.685679255387774e-07, - "loss": 0.9748, - "step": 5986 - }, - { - "epoch": 0.7198941862562376, - "grad_norm": 1.9576430896673216, - "learning_rate": 7.679542141318065e-07, - "loss": 0.9672, - "step": 5987 - }, - { - "epoch": 0.7200144291468767, - "grad_norm": 1.6399595091636157, - "learning_rate": 7.673406896252013e-07, - "loss": 0.9623, - "step": 5988 - }, - { - "epoch": 0.7201346720375158, - "grad_norm": 1.4842180671051797, - "learning_rate": 7.667273521120347e-07, - "loss": 0.9794, - "step": 5989 - }, - { - "epoch": 0.7202549149281549, - "grad_norm": 1.8677699884328776, - "learning_rate": 7.661142016853468e-07, - "loss": 0.9936, - "step": 5990 - }, - { - "epoch": 0.7203751578187939, - "grad_norm": 1.6256255633020653, - "learning_rate": 7.655012384381543e-07, - "loss": 0.9488, - "step": 5991 - }, - { - "epoch": 0.7204954007094331, - "grad_norm": 1.6839207024196103, - "learning_rate": 7.648884624634415e-07, - "loss": 1.0124, - "step": 5992 - }, - { - "epoch": 0.7206156436000721, - "grad_norm": 1.823169545390226, - "learning_rate": 7.642758738541683e-07, - "loss": 1.0853, - "step": 5993 - }, - { - "epoch": 0.7207358864907112, - "grad_norm": 0.8386700612844996, - "learning_rate": 7.636634727032621e-07, - "loss": 0.8148, - "step": 5994 - }, - { - "epoch": 0.7208561293813504, - "grad_norm": 1.9274064763952217, - "learning_rate": 7.630512591036231e-07, - "loss": 0.9881, - "step": 5995 - }, - { - "epoch": 0.7209763722719894, - "grad_norm": 2.1582866393626605, - "learning_rate": 7.624392331481255e-07, - "loss": 0.8435, - "step": 5996 - }, - { - "epoch": 0.7210966151626285, - "grad_norm": 0.7600527419436373, - "learning_rate": 7.618273949296115e-07, - "loss": 0.7376, - "step": 5997 - }, - { - "epoch": 0.7212168580532676, - "grad_norm": 2.0588208531113867, - "learning_rate": 7.612157445408987e-07, - "loss": 0.8813, - "step": 5998 - }, - { - "epoch": 0.7213371009439067, - "grad_norm": 1.942214950895386, - "learning_rate": 7.606042820747716e-07, - "loss": 0.9414, - "step": 5999 - }, - { - "epoch": 0.7214573438345457, - "grad_norm": 2.128646974786524, - "learning_rate": 7.599930076239889e-07, - "loss": 1.0526, - "step": 6000 - }, - { - "epoch": 0.7215775867251849, - "grad_norm": 2.5059435764119673, - "learning_rate": 7.593819212812818e-07, - "loss": 0.9106, - "step": 6001 - }, - { - "epoch": 0.721697829615824, - "grad_norm": 1.7806461046708146, - "learning_rate": 7.587710231393508e-07, - "loss": 0.924, - "step": 6002 - }, - { - "epoch": 0.721818072506463, - "grad_norm": 1.79947931388122, - "learning_rate": 7.581603132908685e-07, - "loss": 1.0321, - "step": 6003 - }, - { - "epoch": 0.7219383153971022, - "grad_norm": 1.8298172902267573, - "learning_rate": 7.575497918284795e-07, - "loss": 0.9753, - "step": 6004 - }, - { - "epoch": 0.7220585582877412, - "grad_norm": 2.084840178733568, - "learning_rate": 7.569394588447984e-07, - "loss": 0.9488, - "step": 6005 - }, - { - "epoch": 0.7221788011783803, - "grad_norm": 2.1531599819846226, - "learning_rate": 7.563293144324146e-07, - "loss": 0.9762, - "step": 6006 - }, - { - "epoch": 0.7222990440690195, - "grad_norm": 2.4913317128823453, - "learning_rate": 7.557193586838834e-07, - "loss": 0.9992, - "step": 6007 - }, - { - "epoch": 0.7224192869596585, - "grad_norm": 1.8861628438078208, - "learning_rate": 7.551095916917371e-07, - "loss": 0.9049, - "step": 6008 - }, - { - "epoch": 0.7225395298502976, - "grad_norm": 2.1477283764020276, - "learning_rate": 7.545000135484758e-07, - "loss": 0.861, - "step": 6009 - }, - { - "epoch": 0.7226597727409367, - "grad_norm": 1.9144406314948903, - "learning_rate": 7.538906243465714e-07, - "loss": 0.8281, - "step": 6010 - }, - { - "epoch": 0.7227800156315758, - "grad_norm": 1.8585110107580112, - "learning_rate": 7.5328142417847e-07, - "loss": 0.9776, - "step": 6011 - }, - { - "epoch": 0.7229002585222148, - "grad_norm": 1.4796251293922569, - "learning_rate": 7.526724131365838e-07, - "loss": 0.8923, - "step": 6012 - }, - { - "epoch": 0.723020501412854, - "grad_norm": 1.6702425107932202, - "learning_rate": 7.520635913133017e-07, - "loss": 0.903, - "step": 6013 - }, - { - "epoch": 0.7231407443034931, - "grad_norm": 1.7026115833524302, - "learning_rate": 7.514549588009798e-07, - "loss": 1.0206, - "step": 6014 - }, - { - "epoch": 0.7232609871941321, - "grad_norm": 1.7714970261228247, - "learning_rate": 7.508465156919492e-07, - "loss": 0.9055, - "step": 6015 - }, - { - "epoch": 0.7233812300847713, - "grad_norm": 2.798430367295096, - "learning_rate": 7.502382620785083e-07, - "loss": 0.8237, - "step": 6016 - }, - { - "epoch": 0.7235014729754103, - "grad_norm": 0.9275026694616681, - "learning_rate": 7.496301980529289e-07, - "loss": 0.887, - "step": 6017 - }, - { - "epoch": 0.7236217158660494, - "grad_norm": 3.5801804148643845, - "learning_rate": 7.490223237074547e-07, - "loss": 0.948, - "step": 6018 - }, - { - "epoch": 0.7237419587566886, - "grad_norm": 1.9939292922765006, - "learning_rate": 7.484146391342989e-07, - "loss": 0.8534, - "step": 6019 - }, - { - "epoch": 0.7238622016473276, - "grad_norm": 3.1858348611010947, - "learning_rate": 7.478071444256484e-07, - "loss": 0.777, - "step": 6020 - }, - { - "epoch": 0.7239824445379667, - "grad_norm": 1.624260881032573, - "learning_rate": 7.471998396736579e-07, - "loss": 0.9828, - "step": 6021 - }, - { - "epoch": 0.7241026874286057, - "grad_norm": 1.611277446527857, - "learning_rate": 7.465927249704549e-07, - "loss": 0.9516, - "step": 6022 - }, - { - "epoch": 0.7242229303192449, - "grad_norm": 1.6382388973992108, - "learning_rate": 7.459858004081398e-07, - "loss": 0.9694, - "step": 6023 - }, - { - "epoch": 0.724343173209884, - "grad_norm": 0.7117516603199252, - "learning_rate": 7.453790660787815e-07, - "loss": 0.7957, - "step": 6024 - }, - { - "epoch": 0.724463416100523, - "grad_norm": 2.363099449037742, - "learning_rate": 7.447725220744214e-07, - "loss": 0.8389, - "step": 6025 - }, - { - "epoch": 0.7245836589911622, - "grad_norm": 1.9361975814589873, - "learning_rate": 7.441661684870717e-07, - "loss": 0.961, - "step": 6026 - }, - { - "epoch": 0.7247039018818012, - "grad_norm": 1.678096709909789, - "learning_rate": 7.435600054087152e-07, - "loss": 1.013, - "step": 6027 - }, - { - "epoch": 0.7248241447724403, - "grad_norm": 1.7724489658256168, - "learning_rate": 7.42954032931308e-07, - "loss": 0.9459, - "step": 6028 - }, - { - "epoch": 0.7249443876630794, - "grad_norm": 1.6953231178337935, - "learning_rate": 7.423482511467733e-07, - "loss": 0.9419, - "step": 6029 - }, - { - "epoch": 0.7250646305537185, - "grad_norm": 2.541774955902398, - "learning_rate": 7.417426601470099e-07, - "loss": 0.8554, - "step": 6030 - }, - { - "epoch": 0.7251848734443576, - "grad_norm": 1.9373501135162194, - "learning_rate": 7.411372600238841e-07, - "loss": 0.9802, - "step": 6031 - }, - { - "epoch": 0.7253051163349967, - "grad_norm": 2.2539145008379093, - "learning_rate": 7.405320508692346e-07, - "loss": 0.936, - "step": 6032 - }, - { - "epoch": 0.7254253592256358, - "grad_norm": 3.1412832604623446, - "learning_rate": 7.399270327748727e-07, - "loss": 0.9521, - "step": 6033 - }, - { - "epoch": 0.7255456021162748, - "grad_norm": 1.7208393229210215, - "learning_rate": 7.39322205832577e-07, - "loss": 0.9416, - "step": 6034 - }, - { - "epoch": 0.725665845006914, - "grad_norm": 2.361579925767615, - "learning_rate": 7.387175701341009e-07, - "loss": 1.0046, - "step": 6035 - }, - { - "epoch": 0.7257860878975531, - "grad_norm": 1.9650929105308055, - "learning_rate": 7.381131257711659e-07, - "loss": 0.9295, - "step": 6036 - }, - { - "epoch": 0.7259063307881921, - "grad_norm": 1.69879959022835, - "learning_rate": 7.375088728354677e-07, - "loss": 1.0365, - "step": 6037 - }, - { - "epoch": 0.7260265736788313, - "grad_norm": 1.386904507942418, - "learning_rate": 7.369048114186691e-07, - "loss": 0.8647, - "step": 6038 - }, - { - "epoch": 0.7261468165694703, - "grad_norm": 2.0324536791939067, - "learning_rate": 7.363009416124055e-07, - "loss": 1.0317, - "step": 6039 - }, - { - "epoch": 0.7262670594601094, - "grad_norm": 2.0195321944509397, - "learning_rate": 7.356972635082852e-07, - "loss": 0.8377, - "step": 6040 - }, - { - "epoch": 0.7263873023507486, - "grad_norm": 1.6573538790314446, - "learning_rate": 7.35093777197884e-07, - "loss": 0.9494, - "step": 6041 - }, - { - "epoch": 0.7265075452413876, - "grad_norm": 2.7673571490987072, - "learning_rate": 7.344904827727525e-07, - "loss": 1.0453, - "step": 6042 - }, - { - "epoch": 0.7266277881320267, - "grad_norm": 2.1208014804968744, - "learning_rate": 7.338873803244076e-07, - "loss": 0.929, - "step": 6043 - }, - { - "epoch": 0.7267480310226658, - "grad_norm": 1.7104343040136203, - "learning_rate": 7.332844699443401e-07, - "loss": 1.0023, - "step": 6044 - }, - { - "epoch": 0.7268682739133049, - "grad_norm": 1.7409837609880714, - "learning_rate": 7.326817517240121e-07, - "loss": 0.9432, - "step": 6045 - }, - { - "epoch": 0.7269885168039439, - "grad_norm": 1.8586188309441596, - "learning_rate": 7.320792257548545e-07, - "loss": 1.0346, - "step": 6046 - }, - { - "epoch": 0.7271087596945831, - "grad_norm": 2.052613114133659, - "learning_rate": 7.314768921282704e-07, - "loss": 0.9629, - "step": 6047 - }, - { - "epoch": 0.7272290025852222, - "grad_norm": 2.998621146902991, - "learning_rate": 7.30874750935633e-07, - "loss": 0.9188, - "step": 6048 - }, - { - "epoch": 0.7273492454758612, - "grad_norm": 2.7002767209134024, - "learning_rate": 7.30272802268286e-07, - "loss": 0.9873, - "step": 6049 - }, - { - "epoch": 0.7274694883665004, - "grad_norm": 1.984483610770322, - "learning_rate": 7.29671046217547e-07, - "loss": 0.9632, - "step": 6050 - }, - { - "epoch": 0.7275897312571394, - "grad_norm": 2.0144193133463824, - "learning_rate": 7.290694828746988e-07, - "loss": 1.0176, - "step": 6051 - }, - { - "epoch": 0.7277099741477785, - "grad_norm": 1.6078429105234922, - "learning_rate": 7.284681123310004e-07, - "loss": 1.0498, - "step": 6052 - }, - { - "epoch": 0.7278302170384175, - "grad_norm": 1.574408205052449, - "learning_rate": 7.27866934677678e-07, - "loss": 0.9881, - "step": 6053 - }, - { - "epoch": 0.7279504599290567, - "grad_norm": 1.6989966236370593, - "learning_rate": 7.272659500059297e-07, - "loss": 0.9741, - "step": 6054 - }, - { - "epoch": 0.7280707028196958, - "grad_norm": 2.4401221232260744, - "learning_rate": 7.266651584069264e-07, - "loss": 1.0041, - "step": 6055 - }, - { - "epoch": 0.7281909457103348, - "grad_norm": 1.6269953288343604, - "learning_rate": 7.260645599718045e-07, - "loss": 0.7763, - "step": 6056 - }, - { - "epoch": 0.728311188600974, - "grad_norm": 2.0682809903112322, - "learning_rate": 7.254641547916767e-07, - "loss": 0.8714, - "step": 6057 - }, - { - "epoch": 0.728431431491613, - "grad_norm": 1.61850548447654, - "learning_rate": 7.248639429576226e-07, - "loss": 0.8886, - "step": 6058 - }, - { - "epoch": 0.7285516743822521, - "grad_norm": 1.5565211538610686, - "learning_rate": 7.242639245606959e-07, - "loss": 0.9234, - "step": 6059 - }, - { - "epoch": 0.7286719172728913, - "grad_norm": 1.6238634096028135, - "learning_rate": 7.236640996919168e-07, - "loss": 1.0254, - "step": 6060 - }, - { - "epoch": 0.7287921601635303, - "grad_norm": 1.5027746129232766, - "learning_rate": 7.230644684422782e-07, - "loss": 0.905, - "step": 6061 - }, - { - "epoch": 0.7289124030541694, - "grad_norm": 1.6794021718957797, - "learning_rate": 7.224650309027451e-07, - "loss": 1.0157, - "step": 6062 - }, - { - "epoch": 0.7290326459448085, - "grad_norm": 1.594471487657014, - "learning_rate": 7.218657871642506e-07, - "loss": 0.879, - "step": 6063 - }, - { - "epoch": 0.7291528888354476, - "grad_norm": 1.7556383717327482, - "learning_rate": 7.212667373177012e-07, - "loss": 0.815, - "step": 6064 - }, - { - "epoch": 0.7292731317260867, - "grad_norm": 1.596220905991742, - "learning_rate": 7.206678814539704e-07, - "loss": 0.9545, - "step": 6065 - }, - { - "epoch": 0.7293933746167258, - "grad_norm": 1.4456245704053214, - "learning_rate": 7.20069219663904e-07, - "loss": 0.9257, - "step": 6066 - }, - { - "epoch": 0.7295136175073649, - "grad_norm": 1.7625973830137538, - "learning_rate": 7.1947075203832e-07, - "loss": 0.9954, - "step": 6067 - }, - { - "epoch": 0.7296338603980039, - "grad_norm": 0.9542619146812987, - "learning_rate": 7.188724786680049e-07, - "loss": 0.8098, - "step": 6068 - }, - { - "epoch": 0.7297541032886431, - "grad_norm": 1.7259347732525168, - "learning_rate": 7.182743996437162e-07, - "loss": 0.9483, - "step": 6069 - }, - { - "epoch": 0.7298743461792822, - "grad_norm": 1.7815715432652663, - "learning_rate": 7.176765150561819e-07, - "loss": 0.8822, - "step": 6070 - }, - { - "epoch": 0.7299945890699212, - "grad_norm": 1.990179268894857, - "learning_rate": 7.170788249961002e-07, - "loss": 0.9949, - "step": 6071 - }, - { - "epoch": 0.7301148319605604, - "grad_norm": 1.6762632019296835, - "learning_rate": 7.164813295541418e-07, - "loss": 1.0826, - "step": 6072 - }, - { - "epoch": 0.7302350748511994, - "grad_norm": 1.6654966380057619, - "learning_rate": 7.15884028820944e-07, - "loss": 0.8976, - "step": 6073 - }, - { - "epoch": 0.7303553177418385, - "grad_norm": 2.1013805211347356, - "learning_rate": 7.152869228871185e-07, - "loss": 0.7985, - "step": 6074 - }, - { - "epoch": 0.7304755606324776, - "grad_norm": 1.600690650889475, - "learning_rate": 7.146900118432457e-07, - "loss": 0.92, - "step": 6075 - }, - { - "epoch": 0.7305958035231167, - "grad_norm": 1.8198079076987361, - "learning_rate": 7.140932957798753e-07, - "loss": 1.054, - "step": 6076 - }, - { - "epoch": 0.7307160464137558, - "grad_norm": 3.5816033793154194, - "learning_rate": 7.134967747875309e-07, - "loss": 0.9155, - "step": 6077 - }, - { - "epoch": 0.7308362893043949, - "grad_norm": 1.683733940925847, - "learning_rate": 7.129004489567014e-07, - "loss": 1.0148, - "step": 6078 - }, - { - "epoch": 0.730956532195034, - "grad_norm": 2.039217537433912, - "learning_rate": 7.123043183778512e-07, - "loss": 0.9791, - "step": 6079 - }, - { - "epoch": 0.731076775085673, - "grad_norm": 1.5310989616796633, - "learning_rate": 7.117083831414114e-07, - "loss": 0.8446, - "step": 6080 - }, - { - "epoch": 0.7311970179763122, - "grad_norm": 2.246717495763057, - "learning_rate": 7.11112643337787e-07, - "loss": 0.9067, - "step": 6081 - }, - { - "epoch": 0.7313172608669513, - "grad_norm": 3.022875514277241, - "learning_rate": 7.10517099057349e-07, - "loss": 0.9683, - "step": 6082 - }, - { - "epoch": 0.7314375037575903, - "grad_norm": 2.109850455494313, - "learning_rate": 7.099217503904411e-07, - "loss": 0.8154, - "step": 6083 - }, - { - "epoch": 0.7315577466482295, - "grad_norm": 1.9824765954193502, - "learning_rate": 7.093265974273788e-07, - "loss": 1.109, - "step": 6084 - }, - { - "epoch": 0.7316779895388685, - "grad_norm": 1.652800873275371, - "learning_rate": 7.087316402584447e-07, - "loss": 0.9215, - "step": 6085 - }, - { - "epoch": 0.7317982324295076, - "grad_norm": 2.5506402950861164, - "learning_rate": 7.081368789738953e-07, - "loss": 1.0556, - "step": 6086 - }, - { - "epoch": 0.7319184753201466, - "grad_norm": 1.8935698596647514, - "learning_rate": 7.075423136639537e-07, - "loss": 0.9761, - "step": 6087 - }, - { - "epoch": 0.7320387182107858, - "grad_norm": 1.5914441051297394, - "learning_rate": 7.069479444188149e-07, - "loss": 0.9457, - "step": 6088 - }, - { - "epoch": 0.7321589611014249, - "grad_norm": 1.6399414091576476, - "learning_rate": 7.063537713286453e-07, - "loss": 1.0242, - "step": 6089 - }, - { - "epoch": 0.7322792039920639, - "grad_norm": 1.7150682028729882, - "learning_rate": 7.057597944835803e-07, - "loss": 1.0069, - "step": 6090 - }, - { - "epoch": 0.7323994468827031, - "grad_norm": 1.6519534684941044, - "learning_rate": 7.051660139737253e-07, - "loss": 0.939, - "step": 6091 - }, - { - "epoch": 0.7325196897733421, - "grad_norm": 1.8527021160369581, - "learning_rate": 7.045724298891565e-07, - "loss": 0.9617, - "step": 6092 - }, - { - "epoch": 0.7326399326639812, - "grad_norm": 1.8183869094484852, - "learning_rate": 7.039790423199192e-07, - "loss": 0.8913, - "step": 6093 - }, - { - "epoch": 0.7327601755546204, - "grad_norm": 2.473752422129181, - "learning_rate": 7.033858513560322e-07, - "loss": 0.9747, - "step": 6094 - }, - { - "epoch": 0.7328804184452594, - "grad_norm": 2.005303503643185, - "learning_rate": 7.027928570874794e-07, - "loss": 0.977, - "step": 6095 - }, - { - "epoch": 0.7330006613358985, - "grad_norm": 1.692067992550951, - "learning_rate": 7.022000596042194e-07, - "loss": 1.0484, - "step": 6096 - }, - { - "epoch": 0.7331209042265376, - "grad_norm": 2.037919348875704, - "learning_rate": 7.016074589961784e-07, - "loss": 1.0194, - "step": 6097 - }, - { - "epoch": 0.7332411471171767, - "grad_norm": 1.530767948801118, - "learning_rate": 7.01015055353253e-07, - "loss": 0.8642, - "step": 6098 - }, - { - "epoch": 0.7333613900078157, - "grad_norm": 1.6401909390226572, - "learning_rate": 7.004228487653123e-07, - "loss": 0.9763, - "step": 6099 - }, - { - "epoch": 0.7334816328984549, - "grad_norm": 1.6137317831959943, - "learning_rate": 6.998308393221906e-07, - "loss": 0.9817, - "step": 6100 - }, - { - "epoch": 0.733601875789094, - "grad_norm": 2.0558483485165895, - "learning_rate": 6.992390271136977e-07, - "loss": 0.9175, - "step": 6101 - }, - { - "epoch": 0.733722118679733, - "grad_norm": 1.5625407404356664, - "learning_rate": 6.986474122296094e-07, - "loss": 1.0607, - "step": 6102 - }, - { - "epoch": 0.7338423615703722, - "grad_norm": 2.2271309110331057, - "learning_rate": 6.980559947596751e-07, - "loss": 0.9248, - "step": 6103 - }, - { - "epoch": 0.7339626044610112, - "grad_norm": 2.1828661202771817, - "learning_rate": 6.974647747936109e-07, - "loss": 0.9588, - "step": 6104 - }, - { - "epoch": 0.7340828473516503, - "grad_norm": 1.7574372971963068, - "learning_rate": 6.968737524211039e-07, - "loss": 1.0212, - "step": 6105 - }, - { - "epoch": 0.7342030902422895, - "grad_norm": 1.8267519758774233, - "learning_rate": 6.962829277318132e-07, - "loss": 1.0048, - "step": 6106 - }, - { - "epoch": 0.7343233331329285, - "grad_norm": 1.6591293929067819, - "learning_rate": 6.956923008153652e-07, - "loss": 1.0238, - "step": 6107 - }, - { - "epoch": 0.7344435760235676, - "grad_norm": 1.8974159537739061, - "learning_rate": 6.951018717613593e-07, - "loss": 1.0414, - "step": 6108 - }, - { - "epoch": 0.7345638189142067, - "grad_norm": 1.783342704867163, - "learning_rate": 6.945116406593614e-07, - "loss": 0.9747, - "step": 6109 - }, - { - "epoch": 0.7346840618048458, - "grad_norm": 2.123440675262929, - "learning_rate": 6.939216075989089e-07, - "loss": 0.9432, - "step": 6110 - }, - { - "epoch": 0.7348043046954849, - "grad_norm": 1.5809593808133522, - "learning_rate": 6.933317726695109e-07, - "loss": 0.8624, - "step": 6111 - }, - { - "epoch": 0.734924547586124, - "grad_norm": 2.340288276032528, - "learning_rate": 6.92742135960644e-07, - "loss": 0.992, - "step": 6112 - }, - { - "epoch": 0.7350447904767631, - "grad_norm": 0.907931576402591, - "learning_rate": 6.921526975617556e-07, - "loss": 0.7936, - "step": 6113 - }, - { - "epoch": 0.7351650333674021, - "grad_norm": 1.7297071585016213, - "learning_rate": 6.915634575622631e-07, - "loss": 0.9416, - "step": 6114 - }, - { - "epoch": 0.7352852762580413, - "grad_norm": 1.8363128278460716, - "learning_rate": 6.909744160515532e-07, - "loss": 0.912, - "step": 6115 - }, - { - "epoch": 0.7354055191486804, - "grad_norm": 1.7196912095759298, - "learning_rate": 6.903855731189849e-07, - "loss": 0.8891, - "step": 6116 - }, - { - "epoch": 0.7355257620393194, - "grad_norm": 2.9437768307250756, - "learning_rate": 6.897969288538825e-07, - "loss": 1.0263, - "step": 6117 - }, - { - "epoch": 0.7356460049299585, - "grad_norm": 2.0925762245578334, - "learning_rate": 6.892084833455452e-07, - "loss": 1.0109, - "step": 6118 - }, - { - "epoch": 0.7357662478205976, - "grad_norm": 1.398728665350239, - "learning_rate": 6.886202366832384e-07, - "loss": 1.039, - "step": 6119 - }, - { - "epoch": 0.7358864907112367, - "grad_norm": 1.6494571370744675, - "learning_rate": 6.880321889561987e-07, - "loss": 0.93, - "step": 6120 - }, - { - "epoch": 0.7360067336018757, - "grad_norm": 1.888986113639103, - "learning_rate": 6.874443402536338e-07, - "loss": 0.8587, - "step": 6121 - }, - { - "epoch": 0.7361269764925149, - "grad_norm": 1.7978068713466908, - "learning_rate": 6.868566906647177e-07, - "loss": 1.005, - "step": 6122 - }, - { - "epoch": 0.736247219383154, - "grad_norm": 1.66155860462776, - "learning_rate": 6.862692402785984e-07, - "loss": 1.0276, - "step": 6123 - }, - { - "epoch": 0.736367462273793, - "grad_norm": 0.7312173118784212, - "learning_rate": 6.856819891843899e-07, - "loss": 0.7108, - "step": 6124 - }, - { - "epoch": 0.7364877051644322, - "grad_norm": 1.804096604088807, - "learning_rate": 6.8509493747118e-07, - "loss": 0.9176, - "step": 6125 - }, - { - "epoch": 0.7366079480550712, - "grad_norm": 3.360585549058965, - "learning_rate": 6.845080852280221e-07, - "loss": 1.0848, - "step": 6126 - }, - { - "epoch": 0.7367281909457103, - "grad_norm": 1.7744297830980136, - "learning_rate": 6.839214325439409e-07, - "loss": 0.9475, - "step": 6127 - }, - { - "epoch": 0.7368484338363495, - "grad_norm": 1.52264923830018, - "learning_rate": 6.833349795079327e-07, - "loss": 0.916, - "step": 6128 - }, - { - "epoch": 0.7369686767269885, - "grad_norm": 1.5141849558846638, - "learning_rate": 6.827487262089613e-07, - "loss": 0.8835, - "step": 6129 - }, - { - "epoch": 0.7370889196176276, - "grad_norm": 0.9017178895741629, - "learning_rate": 6.821626727359606e-07, - "loss": 0.7949, - "step": 6130 - }, - { - "epoch": 0.7372091625082667, - "grad_norm": 2.2172694034516667, - "learning_rate": 6.815768191778348e-07, - "loss": 0.9743, - "step": 6131 - }, - { - "epoch": 0.7373294053989058, - "grad_norm": 1.7043900996995187, - "learning_rate": 6.809911656234569e-07, - "loss": 0.9318, - "step": 6132 - }, - { - "epoch": 0.7374496482895448, - "grad_norm": 1.9796577915049016, - "learning_rate": 6.804057121616707e-07, - "loss": 0.9792, - "step": 6133 - }, - { - "epoch": 0.737569891180184, - "grad_norm": 1.7942321581241107, - "learning_rate": 6.798204588812888e-07, - "loss": 0.9177, - "step": 6134 - }, - { - "epoch": 0.7376901340708231, - "grad_norm": 1.6978449738077468, - "learning_rate": 6.792354058710937e-07, - "loss": 0.9479, - "step": 6135 - }, - { - "epoch": 0.7378103769614621, - "grad_norm": 1.9805996040890066, - "learning_rate": 6.786505532198374e-07, - "loss": 0.8508, - "step": 6136 - }, - { - "epoch": 0.7379306198521013, - "grad_norm": 1.6428313904870395, - "learning_rate": 6.780659010162411e-07, - "loss": 1.0503, - "step": 6137 - }, - { - "epoch": 0.7380508627427403, - "grad_norm": 1.8834684503524077, - "learning_rate": 6.774814493489975e-07, - "loss": 1.0302, - "step": 6138 - }, - { - "epoch": 0.7381711056333794, - "grad_norm": 1.69654470920984, - "learning_rate": 6.768971983067655e-07, - "loss": 0.863, - "step": 6139 - }, - { - "epoch": 0.7382913485240186, - "grad_norm": 1.0643047264317471, - "learning_rate": 6.763131479781772e-07, - "loss": 0.9004, - "step": 6140 - }, - { - "epoch": 0.7384115914146576, - "grad_norm": 1.7065572888124148, - "learning_rate": 6.757292984518316e-07, - "loss": 0.9736, - "step": 6141 - }, - { - "epoch": 0.7385318343052967, - "grad_norm": 0.890646482509725, - "learning_rate": 6.751456498162981e-07, - "loss": 0.8097, - "step": 6142 - }, - { - "epoch": 0.7386520771959358, - "grad_norm": 1.92129743627934, - "learning_rate": 6.745622021601174e-07, - "loss": 1.0497, - "step": 6143 - }, - { - "epoch": 0.7387723200865749, - "grad_norm": 1.8317376981342703, - "learning_rate": 6.739789555717954e-07, - "loss": 0.9029, - "step": 6144 - }, - { - "epoch": 0.738892562977214, - "grad_norm": 1.8458436619776037, - "learning_rate": 6.733959101398124e-07, - "loss": 0.9796, - "step": 6145 - }, - { - "epoch": 0.7390128058678531, - "grad_norm": 1.7739812210706125, - "learning_rate": 6.728130659526143e-07, - "loss": 1.0145, - "step": 6146 - }, - { - "epoch": 0.7391330487584922, - "grad_norm": 2.1469941440710296, - "learning_rate": 6.7223042309862e-07, - "loss": 0.9043, - "step": 6147 - }, - { - "epoch": 0.7392532916491312, - "grad_norm": 1.7512056421057445, - "learning_rate": 6.716479816662144e-07, - "loss": 0.9304, - "step": 6148 - }, - { - "epoch": 0.7393735345397703, - "grad_norm": 2.913394309336167, - "learning_rate": 6.710657417437531e-07, - "loss": 0.929, - "step": 6149 - }, - { - "epoch": 0.7394937774304094, - "grad_norm": 1.931816933881387, - "learning_rate": 6.704837034195628e-07, - "loss": 0.9895, - "step": 6150 - }, - { - "epoch": 0.7396140203210485, - "grad_norm": 1.6469811247883006, - "learning_rate": 6.699018667819376e-07, - "loss": 1.0471, - "step": 6151 - }, - { - "epoch": 0.7397342632116876, - "grad_norm": 1.8715162337809588, - "learning_rate": 6.693202319191415e-07, - "loss": 0.9208, - "step": 6152 - }, - { - "epoch": 0.7398545061023267, - "grad_norm": 1.6251929975487884, - "learning_rate": 6.687387989194084e-07, - "loss": 0.9339, - "step": 6153 - }, - { - "epoch": 0.7399747489929658, - "grad_norm": 2.019648594647548, - "learning_rate": 6.681575678709404e-07, - "loss": 0.9964, - "step": 6154 - }, - { - "epoch": 0.7400949918836048, - "grad_norm": 1.9054103940917084, - "learning_rate": 6.67576538861911e-07, - "loss": 0.9032, - "step": 6155 - }, - { - "epoch": 0.740215234774244, - "grad_norm": 1.3768827379782405, - "learning_rate": 6.669957119804612e-07, - "loss": 1.0217, - "step": 6156 - }, - { - "epoch": 0.7403354776648831, - "grad_norm": 2.7824779757084754, - "learning_rate": 6.66415087314702e-07, - "loss": 0.9141, - "step": 6157 - }, - { - "epoch": 0.7404557205555221, - "grad_norm": 2.009283691989516, - "learning_rate": 6.65834664952714e-07, - "loss": 0.9187, - "step": 6158 - }, - { - "epoch": 0.7405759634461613, - "grad_norm": 1.650045035932495, - "learning_rate": 6.652544449825457e-07, - "loss": 0.9557, - "step": 6159 - }, - { - "epoch": 0.7406962063368003, - "grad_norm": 1.9258536118226983, - "learning_rate": 6.646744274922182e-07, - "loss": 0.9656, - "step": 6160 - }, - { - "epoch": 0.7408164492274394, - "grad_norm": 3.0206948785732193, - "learning_rate": 6.640946125697171e-07, - "loss": 0.9622, - "step": 6161 - }, - { - "epoch": 0.7409366921180786, - "grad_norm": 2.250340118132775, - "learning_rate": 6.635150003030017e-07, - "loss": 0.96, - "step": 6162 - }, - { - "epoch": 0.7410569350087176, - "grad_norm": 2.111480340402267, - "learning_rate": 6.629355907799981e-07, - "loss": 1.0561, - "step": 6163 - }, - { - "epoch": 0.7411771778993567, - "grad_norm": 1.7251608842653259, - "learning_rate": 6.623563840886015e-07, - "loss": 0.9004, - "step": 6164 - }, - { - "epoch": 0.7412974207899958, - "grad_norm": 1.6317812935008589, - "learning_rate": 6.617773803166795e-07, - "loss": 0.895, - "step": 6165 - }, - { - "epoch": 0.7414176636806349, - "grad_norm": 2.1144239118141184, - "learning_rate": 6.611985795520634e-07, - "loss": 1.0173, - "step": 6166 - }, - { - "epoch": 0.7415379065712739, - "grad_norm": 1.9882491133515348, - "learning_rate": 6.606199818825588e-07, - "loss": 0.9617, - "step": 6167 - }, - { - "epoch": 0.7416581494619131, - "grad_norm": 2.389838993283514, - "learning_rate": 6.600415873959377e-07, - "loss": 1.0075, - "step": 6168 - }, - { - "epoch": 0.7417783923525522, - "grad_norm": 1.919599100418977, - "learning_rate": 6.594633961799437e-07, - "loss": 0.8443, - "step": 6169 - }, - { - "epoch": 0.7418986352431912, - "grad_norm": 1.991758173139946, - "learning_rate": 6.588854083222857e-07, - "loss": 1.0182, - "step": 6170 - }, - { - "epoch": 0.7420188781338304, - "grad_norm": 1.7789826267416946, - "learning_rate": 6.583076239106444e-07, - "loss": 1.0018, - "step": 6171 - }, - { - "epoch": 0.7421391210244694, - "grad_norm": 2.048015713422814, - "learning_rate": 6.577300430326707e-07, - "loss": 0.9522, - "step": 6172 - }, - { - "epoch": 0.7422593639151085, - "grad_norm": 1.8519860278193676, - "learning_rate": 6.571526657759821e-07, - "loss": 0.9191, - "step": 6173 - }, - { - "epoch": 0.7423796068057477, - "grad_norm": 2.9308687397287247, - "learning_rate": 6.565754922281663e-07, - "loss": 0.9102, - "step": 6174 - }, - { - "epoch": 0.7424998496963867, - "grad_norm": 2.0358918191464834, - "learning_rate": 6.559985224767801e-07, - "loss": 0.9803, - "step": 6175 - }, - { - "epoch": 0.7426200925870258, - "grad_norm": 2.456987001539166, - "learning_rate": 6.55421756609349e-07, - "loss": 0.9497, - "step": 6176 - }, - { - "epoch": 0.7427403354776649, - "grad_norm": 1.6111579573971269, - "learning_rate": 6.54845194713369e-07, - "loss": 0.9838, - "step": 6177 - }, - { - "epoch": 0.742860578368304, - "grad_norm": 2.1152510567497784, - "learning_rate": 6.542688368763034e-07, - "loss": 1.0017, - "step": 6178 - }, - { - "epoch": 0.742980821258943, - "grad_norm": 1.4922065933364104, - "learning_rate": 6.536926831855854e-07, - "loss": 0.975, - "step": 6179 - }, - { - "epoch": 0.7431010641495821, - "grad_norm": 1.952714434386647, - "learning_rate": 6.531167337286165e-07, - "loss": 0.9303, - "step": 6180 - }, - { - "epoch": 0.7432213070402213, - "grad_norm": 1.4359293110378406, - "learning_rate": 6.52540988592768e-07, - "loss": 0.9912, - "step": 6181 - }, - { - "epoch": 0.7433415499308603, - "grad_norm": 2.323598262763757, - "learning_rate": 6.519654478653814e-07, - "loss": 1.0323, - "step": 6182 - }, - { - "epoch": 0.7434617928214994, - "grad_norm": 0.7945121440259423, - "learning_rate": 6.51390111633763e-07, - "loss": 0.7746, - "step": 6183 - }, - { - "epoch": 0.7435820357121385, - "grad_norm": 1.69428851586195, - "learning_rate": 6.508149799851932e-07, - "loss": 0.9582, - "step": 6184 - }, - { - "epoch": 0.7437022786027776, - "grad_norm": 1.9909561839893795, - "learning_rate": 6.502400530069183e-07, - "loss": 0.8104, - "step": 6185 - }, - { - "epoch": 0.7438225214934167, - "grad_norm": 1.4610265204903194, - "learning_rate": 6.496653307861535e-07, - "loss": 0.8812, - "step": 6186 - }, - { - "epoch": 0.7439427643840558, - "grad_norm": 1.8758968751376295, - "learning_rate": 6.490908134100857e-07, - "loss": 0.8567, - "step": 6187 - }, - { - "epoch": 0.7440630072746949, - "grad_norm": 2.212269627705937, - "learning_rate": 6.48516500965866e-07, - "loss": 0.8995, - "step": 6188 - }, - { - "epoch": 0.7441832501653339, - "grad_norm": 1.4063822790026501, - "learning_rate": 6.479423935406192e-07, - "loss": 1.0128, - "step": 6189 - }, - { - "epoch": 0.7443034930559731, - "grad_norm": 0.9237301796734139, - "learning_rate": 6.473684912214357e-07, - "loss": 0.904, - "step": 6190 - }, - { - "epoch": 0.7444237359466122, - "grad_norm": 1.872681264825626, - "learning_rate": 6.467947940953778e-07, - "loss": 0.8973, - "step": 6191 - }, - { - "epoch": 0.7445439788372512, - "grad_norm": 1.5858264000975446, - "learning_rate": 6.462213022494732e-07, - "loss": 0.9234, - "step": 6192 - }, - { - "epoch": 0.7446642217278904, - "grad_norm": 0.8270318355590661, - "learning_rate": 6.456480157707201e-07, - "loss": 0.8491, - "step": 6193 - }, - { - "epoch": 0.7447844646185294, - "grad_norm": 1.7145014879693588, - "learning_rate": 6.450749347460866e-07, - "loss": 1.0538, - "step": 6194 - }, - { - "epoch": 0.7449047075091685, - "grad_norm": 1.4949805996506809, - "learning_rate": 6.445020592625083e-07, - "loss": 0.986, - "step": 6195 - }, - { - "epoch": 0.7450249503998077, - "grad_norm": 3.2268453478665275, - "learning_rate": 6.4392938940689e-07, - "loss": 1.0097, - "step": 6196 - }, - { - "epoch": 0.7451451932904467, - "grad_norm": 1.9901460846998256, - "learning_rate": 6.433569252661049e-07, - "loss": 0.9115, - "step": 6197 - }, - { - "epoch": 0.7452654361810858, - "grad_norm": 2.0287108763884536, - "learning_rate": 6.427846669269952e-07, - "loss": 0.909, - "step": 6198 - }, - { - "epoch": 0.7453856790717249, - "grad_norm": 2.058353533111586, - "learning_rate": 6.422126144763729e-07, - "loss": 1.0256, - "step": 6199 - }, - { - "epoch": 0.745505921962364, - "grad_norm": 2.1042047503458816, - "learning_rate": 6.416407680010174e-07, - "loss": 0.9739, - "step": 6200 - }, - { - "epoch": 0.745626164853003, - "grad_norm": 2.09306791212314, - "learning_rate": 6.410691275876774e-07, - "loss": 1.0086, - "step": 6201 - }, - { - "epoch": 0.7457464077436422, - "grad_norm": 1.9465334940237538, - "learning_rate": 6.404976933230704e-07, - "loss": 0.9595, - "step": 6202 - }, - { - "epoch": 0.7458666506342813, - "grad_norm": 1.7882623340677835, - "learning_rate": 6.399264652938813e-07, - "loss": 0.928, - "step": 6203 - }, - { - "epoch": 0.7459868935249203, - "grad_norm": 1.7960367652511429, - "learning_rate": 6.393554435867679e-07, - "loss": 0.9438, - "step": 6204 - }, - { - "epoch": 0.7461071364155595, - "grad_norm": 2.019919146062143, - "learning_rate": 6.387846282883502e-07, - "loss": 1.0301, - "step": 6205 - }, - { - "epoch": 0.7462273793061985, - "grad_norm": 1.7998747623108842, - "learning_rate": 6.38214019485223e-07, - "loss": 0.9635, - "step": 6206 - }, - { - "epoch": 0.7463476221968376, - "grad_norm": 1.5988823026624988, - "learning_rate": 6.376436172639461e-07, - "loss": 0.9122, - "step": 6207 - }, - { - "epoch": 0.7464678650874768, - "grad_norm": 2.1476048191055566, - "learning_rate": 6.370734217110487e-07, - "loss": 0.8464, - "step": 6208 - }, - { - "epoch": 0.7465881079781158, - "grad_norm": 1.5754625882398348, - "learning_rate": 6.36503432913031e-07, - "loss": 0.843, - "step": 6209 - }, - { - "epoch": 0.7467083508687549, - "grad_norm": 1.756069828443791, - "learning_rate": 6.359336509563569e-07, - "loss": 0.8805, - "step": 6210 - }, - { - "epoch": 0.7468285937593939, - "grad_norm": 1.7877215561575757, - "learning_rate": 6.353640759274641e-07, - "loss": 1.0109, - "step": 6211 - }, - { - "epoch": 0.7469488366500331, - "grad_norm": 2.955858691006099, - "learning_rate": 6.347947079127556e-07, - "loss": 0.9477, - "step": 6212 - }, - { - "epoch": 0.7470690795406721, - "grad_norm": 2.0177745151119955, - "learning_rate": 6.342255469986053e-07, - "loss": 0.9727, - "step": 6213 - }, - { - "epoch": 0.7471893224313112, - "grad_norm": 1.7528722897114, - "learning_rate": 6.336565932713533e-07, - "loss": 0.9747, - "step": 6214 - }, - { - "epoch": 0.7473095653219504, - "grad_norm": 1.6516274056001012, - "learning_rate": 6.330878468173088e-07, - "loss": 0.9812, - "step": 6215 - }, - { - "epoch": 0.7474298082125894, - "grad_norm": 1.6885641769173245, - "learning_rate": 6.32519307722752e-07, - "loss": 0.928, - "step": 6216 - }, - { - "epoch": 0.7475500511032285, - "grad_norm": 0.8298205166672471, - "learning_rate": 6.31950976073929e-07, - "loss": 0.7752, - "step": 6217 - }, - { - "epoch": 0.7476702939938676, - "grad_norm": 1.8912913511688525, - "learning_rate": 6.31382851957055e-07, - "loss": 0.9975, - "step": 6218 - }, - { - "epoch": 0.7477905368845067, - "grad_norm": 2.2014956028929253, - "learning_rate": 6.308149354583143e-07, - "loss": 0.9068, - "step": 6219 - }, - { - "epoch": 0.7479107797751458, - "grad_norm": 1.7276501574033247, - "learning_rate": 6.302472266638586e-07, - "loss": 1.017, - "step": 6220 - }, - { - "epoch": 0.7480310226657849, - "grad_norm": 1.7810461362660026, - "learning_rate": 6.296797256598101e-07, - "loss": 0.9029, - "step": 6221 - }, - { - "epoch": 0.748151265556424, - "grad_norm": 1.614151911368584, - "learning_rate": 6.291124325322576e-07, - "loss": 1.0017, - "step": 6222 - }, - { - "epoch": 0.748271508447063, - "grad_norm": 1.4416896469107503, - "learning_rate": 6.285453473672595e-07, - "loss": 0.8203, - "step": 6223 - }, - { - "epoch": 0.7483917513377022, - "grad_norm": 1.768580368588564, - "learning_rate": 6.279784702508415e-07, - "loss": 0.9517, - "step": 6224 - }, - { - "epoch": 0.7485119942283412, - "grad_norm": 0.8570860115853838, - "learning_rate": 6.274118012689979e-07, - "loss": 0.8413, - "step": 6225 - }, - { - "epoch": 0.7486322371189803, - "grad_norm": 1.5548722813890166, - "learning_rate": 6.268453405076943e-07, - "loss": 0.8876, - "step": 6226 - }, - { - "epoch": 0.7487524800096195, - "grad_norm": 2.833774887534332, - "learning_rate": 6.262790880528592e-07, - "loss": 1.0189, - "step": 6227 - }, - { - "epoch": 0.7488727229002585, - "grad_norm": 2.4185167224771997, - "learning_rate": 6.257130439903951e-07, - "loss": 0.992, - "step": 6228 - }, - { - "epoch": 0.7489929657908976, - "grad_norm": 1.7525565682721618, - "learning_rate": 6.251472084061695e-07, - "loss": 1.0066, - "step": 6229 - }, - { - "epoch": 0.7491132086815367, - "grad_norm": 2.2422180173704462, - "learning_rate": 6.245815813860191e-07, - "loss": 1.1004, - "step": 6230 - }, - { - "epoch": 0.7492334515721758, - "grad_norm": 1.8719239115494233, - "learning_rate": 6.240161630157495e-07, - "loss": 0.8929, - "step": 6231 - }, - { - "epoch": 0.7493536944628149, - "grad_norm": 2.1152897173262857, - "learning_rate": 6.23450953381133e-07, - "loss": 0.895, - "step": 6232 - }, - { - "epoch": 0.749473937353454, - "grad_norm": 1.9976173752160078, - "learning_rate": 6.228859525679131e-07, - "loss": 0.8869, - "step": 6233 - }, - { - "epoch": 0.7495941802440931, - "grad_norm": 2.292918943939751, - "learning_rate": 6.223211606617986e-07, - "loss": 0.9962, - "step": 6234 - }, - { - "epoch": 0.7497144231347321, - "grad_norm": 1.7025488482622149, - "learning_rate": 6.217565777484701e-07, - "loss": 1.0261, - "step": 6235 - }, - { - "epoch": 0.7498346660253713, - "grad_norm": 1.6192422979848105, - "learning_rate": 6.211922039135722e-07, - "loss": 1.0042, - "step": 6236 - }, - { - "epoch": 0.7499549089160104, - "grad_norm": 1.7171496483446573, - "learning_rate": 6.206280392427201e-07, - "loss": 1.0027, - "step": 6237 - }, - { - "epoch": 0.7500751518066494, - "grad_norm": 1.3570359813583548, - "learning_rate": 6.200640838214983e-07, - "loss": 0.9331, - "step": 6238 - }, - { - "epoch": 0.7501953946972886, - "grad_norm": 1.6960065273663132, - "learning_rate": 6.195003377354578e-07, - "loss": 0.8692, - "step": 6239 - }, - { - "epoch": 0.7503156375879276, - "grad_norm": 2.7244019788336944, - "learning_rate": 6.189368010701183e-07, - "loss": 0.9261, - "step": 6240 - }, - { - "epoch": 0.7504358804785667, - "grad_norm": 3.340479740021432, - "learning_rate": 6.183734739109683e-07, - "loss": 0.9533, - "step": 6241 - }, - { - "epoch": 0.7505561233692057, - "grad_norm": 1.9018484485188147, - "learning_rate": 6.178103563434629e-07, - "loss": 0.8919, - "step": 6242 - }, - { - "epoch": 0.7506763662598449, - "grad_norm": 1.5004228949459477, - "learning_rate": 6.172474484530283e-07, - "loss": 1.0344, - "step": 6243 - }, - { - "epoch": 0.750796609150484, - "grad_norm": 2.1793873351229105, - "learning_rate": 6.166847503250563e-07, - "loss": 0.9535, - "step": 6244 - }, - { - "epoch": 0.750916852041123, - "grad_norm": 2.369908547226411, - "learning_rate": 6.161222620449078e-07, - "loss": 0.9972, - "step": 6245 - }, - { - "epoch": 0.7510370949317622, - "grad_norm": 1.845750273248662, - "learning_rate": 6.155599836979117e-07, - "loss": 1.005, - "step": 6246 - }, - { - "epoch": 0.7511573378224012, - "grad_norm": 1.9073050034072523, - "learning_rate": 6.149979153693649e-07, - "loss": 1.0128, - "step": 6247 - }, - { - "epoch": 0.7512775807130403, - "grad_norm": 1.8756213748010866, - "learning_rate": 6.144360571445343e-07, - "loss": 0.963, - "step": 6248 - }, - { - "epoch": 0.7513978236036795, - "grad_norm": 1.593567332850995, - "learning_rate": 6.138744091086509e-07, - "loss": 0.9973, - "step": 6249 - }, - { - "epoch": 0.7515180664943185, - "grad_norm": 2.2208917515880255, - "learning_rate": 6.133129713469183e-07, - "loss": 0.9337, - "step": 6250 - }, - { - "epoch": 0.7516383093849576, - "grad_norm": 1.656885133278451, - "learning_rate": 6.127517439445053e-07, - "loss": 0.8385, - "step": 6251 - }, - { - "epoch": 0.7517585522755967, - "grad_norm": 1.93825725400648, - "learning_rate": 6.121907269865498e-07, - "loss": 1.0236, - "step": 6252 - }, - { - "epoch": 0.7518787951662358, - "grad_norm": 0.9785239091777571, - "learning_rate": 6.116299205581577e-07, - "loss": 0.924, - "step": 6253 - }, - { - "epoch": 0.7519990380568748, - "grad_norm": 1.7485217253960927, - "learning_rate": 6.110693247444018e-07, - "loss": 0.88, - "step": 6254 - }, - { - "epoch": 0.752119280947514, - "grad_norm": 1.7110808848041172, - "learning_rate": 6.105089396303258e-07, - "loss": 1.0195, - "step": 6255 - }, - { - "epoch": 0.7522395238381531, - "grad_norm": 1.9191483174268285, - "learning_rate": 6.099487653009383e-07, - "loss": 0.9602, - "step": 6256 - }, - { - "epoch": 0.7523597667287921, - "grad_norm": 1.857088237525171, - "learning_rate": 6.093888018412192e-07, - "loss": 1.0369, - "step": 6257 - }, - { - "epoch": 0.7524800096194313, - "grad_norm": 0.7681277493706239, - "learning_rate": 6.088290493361125e-07, - "loss": 0.7736, - "step": 6258 - }, - { - "epoch": 0.7526002525100703, - "grad_norm": 2.040703708597046, - "learning_rate": 6.082695078705322e-07, - "loss": 0.9124, - "step": 6259 - }, - { - "epoch": 0.7527204954007094, - "grad_norm": 1.9149576390613485, - "learning_rate": 6.077101775293618e-07, - "loss": 0.8796, - "step": 6260 - }, - { - "epoch": 0.7528407382913486, - "grad_norm": 2.3091279265468265, - "learning_rate": 6.071510583974504e-07, - "loss": 1.0326, - "step": 6261 - }, - { - "epoch": 0.7529609811819876, - "grad_norm": 1.7931069713495942, - "learning_rate": 6.065921505596161e-07, - "loss": 0.9121, - "step": 6262 - }, - { - "epoch": 0.7530812240726267, - "grad_norm": 1.669922588861324, - "learning_rate": 6.060334541006445e-07, - "loss": 0.9723, - "step": 6263 - }, - { - "epoch": 0.7532014669632658, - "grad_norm": 1.4490616294228063, - "learning_rate": 6.05474969105289e-07, - "loss": 0.8874, - "step": 6264 - }, - { - "epoch": 0.7533217098539049, - "grad_norm": 2.0413389397030683, - "learning_rate": 6.049166956582725e-07, - "loss": 0.9378, - "step": 6265 - }, - { - "epoch": 0.753441952744544, - "grad_norm": 1.8726078031954858, - "learning_rate": 6.043586338442841e-07, - "loss": 1.0663, - "step": 6266 - }, - { - "epoch": 0.7535621956351831, - "grad_norm": 1.2796348998795501, - "learning_rate": 6.038007837479815e-07, - "loss": 0.927, - "step": 6267 - }, - { - "epoch": 0.7536824385258222, - "grad_norm": 1.721585241400554, - "learning_rate": 6.032431454539897e-07, - "loss": 0.8338, - "step": 6268 - }, - { - "epoch": 0.7538026814164612, - "grad_norm": 2.0929543420994143, - "learning_rate": 6.026857190469014e-07, - "loss": 1.0055, - "step": 6269 - }, - { - "epoch": 0.7539229243071004, - "grad_norm": 2.2055980248556883, - "learning_rate": 6.0212850461128e-07, - "loss": 0.9442, - "step": 6270 - }, - { - "epoch": 0.7540431671977395, - "grad_norm": 1.761304108740852, - "learning_rate": 6.015715022316516e-07, - "loss": 0.9476, - "step": 6271 - }, - { - "epoch": 0.7541634100883785, - "grad_norm": 2.2032291036047957, - "learning_rate": 6.010147119925154e-07, - "loss": 0.9842, - "step": 6272 - }, - { - "epoch": 0.7542836529790176, - "grad_norm": 1.7212399566512517, - "learning_rate": 6.004581339783348e-07, - "loss": 0.8553, - "step": 6273 - }, - { - "epoch": 0.7544038958696567, - "grad_norm": 2.650963223139282, - "learning_rate": 5.999017682735425e-07, - "loss": 0.8758, - "step": 6274 - }, - { - "epoch": 0.7545241387602958, - "grad_norm": 1.8947003790486736, - "learning_rate": 5.993456149625387e-07, - "loss": 0.8636, - "step": 6275 - }, - { - "epoch": 0.7546443816509348, - "grad_norm": 1.5912900901952367, - "learning_rate": 5.987896741296909e-07, - "loss": 1.0186, - "step": 6276 - }, - { - "epoch": 0.754764624541574, - "grad_norm": 1.807915578951585, - "learning_rate": 5.982339458593361e-07, - "loss": 0.9835, - "step": 6277 - }, - { - "epoch": 0.7548848674322131, - "grad_norm": 1.5075140939436422, - "learning_rate": 5.976784302357767e-07, - "loss": 1.0406, - "step": 6278 - }, - { - "epoch": 0.7550051103228521, - "grad_norm": 1.6383353104810663, - "learning_rate": 5.971231273432855e-07, - "loss": 0.9235, - "step": 6279 - }, - { - "epoch": 0.7551253532134913, - "grad_norm": 0.8709546307376784, - "learning_rate": 5.965680372661e-07, - "loss": 0.7786, - "step": 6280 - }, - { - "epoch": 0.7552455961041303, - "grad_norm": 1.8605990187702135, - "learning_rate": 5.960131600884266e-07, - "loss": 0.7619, - "step": 6281 - }, - { - "epoch": 0.7553658389947694, - "grad_norm": 1.6233959350551963, - "learning_rate": 5.954584958944413e-07, - "loss": 0.9573, - "step": 6282 - }, - { - "epoch": 0.7554860818854086, - "grad_norm": 3.087325853386356, - "learning_rate": 5.949040447682854e-07, - "loss": 1.0036, - "step": 6283 - }, - { - "epoch": 0.7556063247760476, - "grad_norm": 1.962036615078654, - "learning_rate": 5.943498067940686e-07, - "loss": 0.8878, - "step": 6284 - }, - { - "epoch": 0.7557265676666867, - "grad_norm": 1.7927571755700384, - "learning_rate": 5.937957820558686e-07, - "loss": 1.0125, - "step": 6285 - }, - { - "epoch": 0.7558468105573258, - "grad_norm": 0.8601844443430693, - "learning_rate": 5.932419706377296e-07, - "loss": 0.8694, - "step": 6286 - }, - { - "epoch": 0.7559670534479649, - "grad_norm": 1.7991691208163485, - "learning_rate": 5.92688372623666e-07, - "loss": 0.9418, - "step": 6287 - }, - { - "epoch": 0.7560872963386039, - "grad_norm": 1.8724418355059538, - "learning_rate": 5.921349880976574e-07, - "loss": 0.9345, - "step": 6288 - }, - { - "epoch": 0.7562075392292431, - "grad_norm": 1.7669154489837429, - "learning_rate": 5.915818171436515e-07, - "loss": 1.0149, - "step": 6289 - }, - { - "epoch": 0.7563277821198822, - "grad_norm": 1.6377163489303734, - "learning_rate": 5.910288598455642e-07, - "loss": 0.9429, - "step": 6290 - }, - { - "epoch": 0.7564480250105212, - "grad_norm": 2.2888857411956542, - "learning_rate": 5.90476116287278e-07, - "loss": 0.9328, - "step": 6291 - }, - { - "epoch": 0.7565682679011604, - "grad_norm": 1.8400298913310678, - "learning_rate": 5.899235865526456e-07, - "loss": 0.8769, - "step": 6292 - }, - { - "epoch": 0.7566885107917994, - "grad_norm": 1.6577980436365303, - "learning_rate": 5.893712707254825e-07, - "loss": 1.0146, - "step": 6293 - }, - { - "epoch": 0.7568087536824385, - "grad_norm": 2.4322806010243023, - "learning_rate": 5.888191688895769e-07, - "loss": 0.8629, - "step": 6294 - }, - { - "epoch": 0.7569289965730777, - "grad_norm": 2.0511331310837537, - "learning_rate": 5.882672811286813e-07, - "loss": 0.8256, - "step": 6295 - }, - { - "epoch": 0.7570492394637167, - "grad_norm": 1.9353325318060957, - "learning_rate": 5.877156075265166e-07, - "loss": 0.8967, - "step": 6296 - }, - { - "epoch": 0.7571694823543558, - "grad_norm": 2.794525864840964, - "learning_rate": 5.871641481667715e-07, - "loss": 0.8944, - "step": 6297 - }, - { - "epoch": 0.7572897252449949, - "grad_norm": 1.624332804526286, - "learning_rate": 5.866129031331011e-07, - "loss": 1.0381, - "step": 6298 - }, - { - "epoch": 0.757409968135634, - "grad_norm": 2.1996763034583835, - "learning_rate": 5.8606187250913e-07, - "loss": 1.0314, - "step": 6299 - }, - { - "epoch": 0.757530211026273, - "grad_norm": 1.9642562442897316, - "learning_rate": 5.855110563784482e-07, - "loss": 1.0398, - "step": 6300 - }, - { - "epoch": 0.7576504539169122, - "grad_norm": 2.2079897799255357, - "learning_rate": 5.849604548246156e-07, - "loss": 0.8372, - "step": 6301 - }, - { - "epoch": 0.7577706968075513, - "grad_norm": 1.8430149746895526, - "learning_rate": 5.844100679311565e-07, - "loss": 0.9956, - "step": 6302 - }, - { - "epoch": 0.7578909396981903, - "grad_norm": 2.088059050115978, - "learning_rate": 5.838598957815637e-07, - "loss": 0.9626, - "step": 6303 - }, - { - "epoch": 0.7580111825888295, - "grad_norm": 1.3512535403590795, - "learning_rate": 5.833099384592996e-07, - "loss": 1.056, - "step": 6304 - }, - { - "epoch": 0.7581314254794685, - "grad_norm": 1.8538097795350932, - "learning_rate": 5.827601960477913e-07, - "loss": 0.8986, - "step": 6305 - }, - { - "epoch": 0.7582516683701076, - "grad_norm": 1.7653613948685372, - "learning_rate": 5.822106686304344e-07, - "loss": 0.9112, - "step": 6306 - }, - { - "epoch": 0.7583719112607467, - "grad_norm": 1.591633981432358, - "learning_rate": 5.816613562905919e-07, - "loss": 0.7747, - "step": 6307 - }, - { - "epoch": 0.7584921541513858, - "grad_norm": 1.403813349176728, - "learning_rate": 5.811122591115933e-07, - "loss": 0.9047, - "step": 6308 - }, - { - "epoch": 0.7586123970420249, - "grad_norm": 2.186355509406735, - "learning_rate": 5.805633771767376e-07, - "loss": 0.9176, - "step": 6309 - }, - { - "epoch": 0.7587326399326639, - "grad_norm": 3.87384360501596, - "learning_rate": 5.800147105692888e-07, - "loss": 0.9748, - "step": 6310 - }, - { - "epoch": 0.7588528828233031, - "grad_norm": 1.7654830740541605, - "learning_rate": 5.794662593724795e-07, - "loss": 0.9905, - "step": 6311 - }, - { - "epoch": 0.7589731257139422, - "grad_norm": 1.99181802352747, - "learning_rate": 5.789180236695091e-07, - "loss": 0.9506, - "step": 6312 - }, - { - "epoch": 0.7590933686045812, - "grad_norm": 2.417451424174771, - "learning_rate": 5.78370003543544e-07, - "loss": 1.0525, - "step": 6313 - }, - { - "epoch": 0.7592136114952204, - "grad_norm": 1.8402156251532333, - "learning_rate": 5.778221990777203e-07, - "loss": 1.0304, - "step": 6314 - }, - { - "epoch": 0.7593338543858594, - "grad_norm": 1.8976914309510655, - "learning_rate": 5.772746103551372e-07, - "loss": 1.0284, - "step": 6315 - }, - { - "epoch": 0.7594540972764985, - "grad_norm": 1.5645917242902863, - "learning_rate": 5.767272374588648e-07, - "loss": 0.916, - "step": 6316 - }, - { - "epoch": 0.7595743401671377, - "grad_norm": 1.5696992957924858, - "learning_rate": 5.76180080471939e-07, - "loss": 0.9785, - "step": 6317 - }, - { - "epoch": 0.7596945830577767, - "grad_norm": 2.1084221842542976, - "learning_rate": 5.756331394773631e-07, - "loss": 0.9156, - "step": 6318 - }, - { - "epoch": 0.7598148259484158, - "grad_norm": 1.6983656572379886, - "learning_rate": 5.750864145581071e-07, - "loss": 0.9698, - "step": 6319 - }, - { - "epoch": 0.7599350688390549, - "grad_norm": 1.7462220283512617, - "learning_rate": 5.745399057971085e-07, - "loss": 1.0522, - "step": 6320 - }, - { - "epoch": 0.760055311729694, - "grad_norm": 4.770820362234459, - "learning_rate": 5.739936132772738e-07, - "loss": 0.955, - "step": 6321 - }, - { - "epoch": 0.760175554620333, - "grad_norm": 1.8820837992944226, - "learning_rate": 5.734475370814733e-07, - "loss": 0.9423, - "step": 6322 - }, - { - "epoch": 0.7602957975109722, - "grad_norm": 1.4728107725473654, - "learning_rate": 5.729016772925483e-07, - "loss": 0.9769, - "step": 6323 - }, - { - "epoch": 0.7604160404016113, - "grad_norm": 1.8317294980496626, - "learning_rate": 5.723560339933038e-07, - "loss": 0.9012, - "step": 6324 - }, - { - "epoch": 0.7605362832922503, - "grad_norm": 1.7941093722826722, - "learning_rate": 5.71810607266513e-07, - "loss": 0.8447, - "step": 6325 - }, - { - "epoch": 0.7606565261828895, - "grad_norm": 1.9201050959076287, - "learning_rate": 5.712653971949184e-07, - "loss": 0.7999, - "step": 6326 - }, - { - "epoch": 0.7607767690735285, - "grad_norm": 2.361472100979305, - "learning_rate": 5.707204038612268e-07, - "loss": 0.9737, - "step": 6327 - }, - { - "epoch": 0.7608970119641676, - "grad_norm": 3.40615004646567, - "learning_rate": 5.701756273481138e-07, - "loss": 0.9285, - "step": 6328 - }, - { - "epoch": 0.7610172548548068, - "grad_norm": 1.436718686940294, - "learning_rate": 5.696310677382212e-07, - "loss": 0.9374, - "step": 6329 - }, - { - "epoch": 0.7611374977454458, - "grad_norm": 0.8522838372171084, - "learning_rate": 5.690867251141576e-07, - "loss": 0.845, - "step": 6330 - }, - { - "epoch": 0.7612577406360849, - "grad_norm": 2.210471060248702, - "learning_rate": 5.685425995585013e-07, - "loss": 1.1182, - "step": 6331 - }, - { - "epoch": 0.761377983526724, - "grad_norm": 0.8116849332342806, - "learning_rate": 5.679986911537935e-07, - "loss": 0.8169, - "step": 6332 - }, - { - "epoch": 0.7614982264173631, - "grad_norm": 2.0368003720579084, - "learning_rate": 5.674549999825462e-07, - "loss": 0.8774, - "step": 6333 - }, - { - "epoch": 0.7616184693080021, - "grad_norm": 1.0178155018121235, - "learning_rate": 5.669115261272363e-07, - "loss": 0.9692, - "step": 6334 - }, - { - "epoch": 0.7617387121986413, - "grad_norm": 2.484497451163689, - "learning_rate": 5.663682696703081e-07, - "loss": 0.931, - "step": 6335 - }, - { - "epoch": 0.7618589550892804, - "grad_norm": 1.8528727470713147, - "learning_rate": 5.658252306941746e-07, - "loss": 1.032, - "step": 6336 - }, - { - "epoch": 0.7619791979799194, - "grad_norm": 1.9369219255994599, - "learning_rate": 5.65282409281212e-07, - "loss": 0.9586, - "step": 6337 - }, - { - "epoch": 0.7620994408705585, - "grad_norm": 2.1906698965132794, - "learning_rate": 5.64739805513768e-07, - "loss": 0.8942, - "step": 6338 - }, - { - "epoch": 0.7622196837611976, - "grad_norm": 0.8503187703802535, - "learning_rate": 5.641974194741541e-07, - "loss": 0.7823, - "step": 6339 - }, - { - "epoch": 0.7623399266518367, - "grad_norm": 0.7906128788196066, - "learning_rate": 5.636552512446502e-07, - "loss": 0.8462, - "step": 6340 - }, - { - "epoch": 0.7624601695424758, - "grad_norm": 1.5810551398914579, - "learning_rate": 5.631133009075027e-07, - "loss": 0.9797, - "step": 6341 - }, - { - "epoch": 0.7625804124331149, - "grad_norm": 2.1597315006799565, - "learning_rate": 5.625715685449242e-07, - "loss": 0.8954, - "step": 6342 - }, - { - "epoch": 0.762700655323754, - "grad_norm": 1.942590854832423, - "learning_rate": 5.620300542390966e-07, - "loss": 0.9145, - "step": 6343 - }, - { - "epoch": 0.762820898214393, - "grad_norm": 1.7233858185504163, - "learning_rate": 5.614887580721659e-07, - "loss": 1.0503, - "step": 6344 - }, - { - "epoch": 0.7629411411050322, - "grad_norm": 1.783308679939109, - "learning_rate": 5.609476801262481e-07, - "loss": 0.9354, - "step": 6345 - }, - { - "epoch": 0.7630613839956712, - "grad_norm": 4.661300480334726, - "learning_rate": 5.604068204834223e-07, - "loss": 0.8397, - "step": 6346 - }, - { - "epoch": 0.7631816268863103, - "grad_norm": 2.0241312730232957, - "learning_rate": 5.598661792257367e-07, - "loss": 0.9494, - "step": 6347 - }, - { - "epoch": 0.7633018697769495, - "grad_norm": 1.8270375204341642, - "learning_rate": 5.593257564352071e-07, - "loss": 0.9652, - "step": 6348 - }, - { - "epoch": 0.7634221126675885, - "grad_norm": 1.4497147608416368, - "learning_rate": 5.58785552193815e-07, - "loss": 0.9525, - "step": 6349 - }, - { - "epoch": 0.7635423555582276, - "grad_norm": 2.339624665309358, - "learning_rate": 5.582455665835086e-07, - "loss": 0.955, - "step": 6350 - }, - { - "epoch": 0.7636625984488667, - "grad_norm": 3.219152184701594, - "learning_rate": 5.577057996862036e-07, - "loss": 0.9327, - "step": 6351 - }, - { - "epoch": 0.7637828413395058, - "grad_norm": 1.453775239890994, - "learning_rate": 5.571662515837814e-07, - "loss": 0.9567, - "step": 6352 - }, - { - "epoch": 0.7639030842301449, - "grad_norm": 1.5481509716333346, - "learning_rate": 5.566269223580926e-07, - "loss": 1.0336, - "step": 6353 - }, - { - "epoch": 0.764023327120784, - "grad_norm": 1.525040472775564, - "learning_rate": 5.560878120909511e-07, - "loss": 0.94, - "step": 6354 - }, - { - "epoch": 0.7641435700114231, - "grad_norm": 0.931248836388597, - "learning_rate": 5.55548920864141e-07, - "loss": 0.8444, - "step": 6355 - }, - { - "epoch": 0.7642638129020621, - "grad_norm": 1.5837022786526727, - "learning_rate": 5.550102487594113e-07, - "loss": 0.9669, - "step": 6356 - }, - { - "epoch": 0.7643840557927013, - "grad_norm": 1.6088876819752427, - "learning_rate": 5.54471795858477e-07, - "loss": 0.9157, - "step": 6357 - }, - { - "epoch": 0.7645042986833404, - "grad_norm": 2.117232769133769, - "learning_rate": 5.539335622430235e-07, - "loss": 1.035, - "step": 6358 - }, - { - "epoch": 0.7646245415739794, - "grad_norm": 1.9035466520996984, - "learning_rate": 5.533955479946975e-07, - "loss": 0.9474, - "step": 6359 - }, - { - "epoch": 0.7647447844646186, - "grad_norm": 0.894016934260053, - "learning_rate": 5.528577531951173e-07, - "loss": 0.8707, - "step": 6360 - }, - { - "epoch": 0.7648650273552576, - "grad_norm": 1.9506771770271496, - "learning_rate": 5.523201779258653e-07, - "loss": 0.9415, - "step": 6361 - }, - { - "epoch": 0.7649852702458967, - "grad_norm": 2.1603000586101926, - "learning_rate": 5.517828222684912e-07, - "loss": 1.0409, - "step": 6362 - }, - { - "epoch": 0.7651055131365359, - "grad_norm": 0.8109021247101126, - "learning_rate": 5.512456863045117e-07, - "loss": 0.8009, - "step": 6363 - }, - { - "epoch": 0.7652257560271749, - "grad_norm": 1.7277146719719643, - "learning_rate": 5.507087701154089e-07, - "loss": 0.9403, - "step": 6364 - }, - { - "epoch": 0.765345998917814, - "grad_norm": 2.5267324754940055, - "learning_rate": 5.50172073782634e-07, - "loss": 0.9435, - "step": 6365 - }, - { - "epoch": 0.7654662418084531, - "grad_norm": 1.6091721786544222, - "learning_rate": 5.496355973876023e-07, - "loss": 1.0672, - "step": 6366 - }, - { - "epoch": 0.7655864846990922, - "grad_norm": 1.6433341773123182, - "learning_rate": 5.490993410116984e-07, - "loss": 0.9131, - "step": 6367 - }, - { - "epoch": 0.7657067275897312, - "grad_norm": 1.625008436327823, - "learning_rate": 5.485633047362704e-07, - "loss": 0.8988, - "step": 6368 - }, - { - "epoch": 0.7658269704803703, - "grad_norm": 1.8118099345972316, - "learning_rate": 5.480274886426341e-07, - "loss": 0.9804, - "step": 6369 - }, - { - "epoch": 0.7659472133710095, - "grad_norm": 2.019335266874707, - "learning_rate": 5.474918928120744e-07, - "loss": 0.9749, - "step": 6370 - }, - { - "epoch": 0.7660674562616485, - "grad_norm": 1.452776631261026, - "learning_rate": 5.469565173258392e-07, - "loss": 1.0715, - "step": 6371 - }, - { - "epoch": 0.7661876991522876, - "grad_norm": 1.7211096085394832, - "learning_rate": 5.464213622651454e-07, - "loss": 0.8407, - "step": 6372 - }, - { - "epoch": 0.7663079420429267, - "grad_norm": 1.9808515972450405, - "learning_rate": 5.458864277111753e-07, - "loss": 1.0355, - "step": 6373 - }, - { - "epoch": 0.7664281849335658, - "grad_norm": 2.2566969072277536, - "learning_rate": 5.453517137450769e-07, - "loss": 0.8874, - "step": 6374 - }, - { - "epoch": 0.7665484278242048, - "grad_norm": 1.9447143125210495, - "learning_rate": 5.448172204479684e-07, - "loss": 0.9571, - "step": 6375 - }, - { - "epoch": 0.766668670714844, - "grad_norm": 1.5999031961482195, - "learning_rate": 5.442829479009294e-07, - "loss": 0.9464, - "step": 6376 - }, - { - "epoch": 0.7667889136054831, - "grad_norm": 1.7656123068024103, - "learning_rate": 5.437488961850103e-07, - "loss": 0.9194, - "step": 6377 - }, - { - "epoch": 0.7669091564961221, - "grad_norm": 1.7276443613241377, - "learning_rate": 5.432150653812258e-07, - "loss": 0.9577, - "step": 6378 - }, - { - "epoch": 0.7670293993867613, - "grad_norm": 1.9877608242073643, - "learning_rate": 5.42681455570557e-07, - "loss": 1.0246, - "step": 6379 - }, - { - "epoch": 0.7671496422774003, - "grad_norm": 3.831808256763582, - "learning_rate": 5.42148066833954e-07, - "loss": 0.8486, - "step": 6380 - }, - { - "epoch": 0.7672698851680394, - "grad_norm": 2.0093844305265067, - "learning_rate": 5.416148992523289e-07, - "loss": 0.9591, - "step": 6381 - }, - { - "epoch": 0.7673901280586786, - "grad_norm": 1.6355458429268148, - "learning_rate": 5.410819529065644e-07, - "loss": 0.9807, - "step": 6382 - }, - { - "epoch": 0.7675103709493176, - "grad_norm": 1.9937747391747367, - "learning_rate": 5.405492278775079e-07, - "loss": 0.8544, - "step": 6383 - }, - { - "epoch": 0.7676306138399567, - "grad_norm": 2.642603928354713, - "learning_rate": 5.400167242459732e-07, - "loss": 0.9985, - "step": 6384 - }, - { - "epoch": 0.7677508567305958, - "grad_norm": 1.629076612606421, - "learning_rate": 5.394844420927405e-07, - "loss": 1.0027, - "step": 6385 - }, - { - "epoch": 0.7678710996212349, - "grad_norm": 2.131143105250934, - "learning_rate": 5.389523814985562e-07, - "loss": 0.9224, - "step": 6386 - }, - { - "epoch": 0.767991342511874, - "grad_norm": 2.2839742619522343, - "learning_rate": 5.384205425441344e-07, - "loss": 0.955, - "step": 6387 - }, - { - "epoch": 0.7681115854025131, - "grad_norm": 1.643609085134768, - "learning_rate": 5.378889253101537e-07, - "loss": 1.0408, - "step": 6388 - }, - { - "epoch": 0.7682318282931522, - "grad_norm": 1.5237539109515477, - "learning_rate": 5.373575298772617e-07, - "loss": 1.0033, - "step": 6389 - }, - { - "epoch": 0.7683520711837912, - "grad_norm": 0.7833985348884258, - "learning_rate": 5.368263563260689e-07, - "loss": 0.8183, - "step": 6390 - }, - { - "epoch": 0.7684723140744304, - "grad_norm": 1.5995707711020215, - "learning_rate": 5.362954047371537e-07, - "loss": 0.8372, - "step": 6391 - }, - { - "epoch": 0.7685925569650695, - "grad_norm": 2.4977498418498474, - "learning_rate": 5.357646751910627e-07, - "loss": 0.9213, - "step": 6392 - }, - { - "epoch": 0.7687127998557085, - "grad_norm": 2.133657993507408, - "learning_rate": 5.352341677683061e-07, - "loss": 0.9979, - "step": 6393 - }, - { - "epoch": 0.7688330427463477, - "grad_norm": 2.375581971190062, - "learning_rate": 5.347038825493617e-07, - "loss": 0.9839, - "step": 6394 - }, - { - "epoch": 0.7689532856369867, - "grad_norm": 1.8181721952573842, - "learning_rate": 5.341738196146732e-07, - "loss": 0.8747, - "step": 6395 - }, - { - "epoch": 0.7690735285276258, - "grad_norm": 2.1070832412636227, - "learning_rate": 5.336439790446503e-07, - "loss": 0.9318, - "step": 6396 - }, - { - "epoch": 0.769193771418265, - "grad_norm": 1.5999063255783479, - "learning_rate": 5.331143609196711e-07, - "loss": 0.8268, - "step": 6397 - }, - { - "epoch": 0.769314014308904, - "grad_norm": 1.949062792209336, - "learning_rate": 5.325849653200758e-07, - "loss": 0.9698, - "step": 6398 - }, - { - "epoch": 0.7694342571995431, - "grad_norm": 2.003003487792004, - "learning_rate": 5.32055792326175e-07, - "loss": 0.9594, - "step": 6399 - }, - { - "epoch": 0.7695545000901821, - "grad_norm": 1.8364682992660082, - "learning_rate": 5.315268420182437e-07, - "loss": 0.9253, - "step": 6400 - }, - { - "epoch": 0.7696747429808213, - "grad_norm": 1.697935123772341, - "learning_rate": 5.309981144765221e-07, - "loss": 0.9613, - "step": 6401 - }, - { - "epoch": 0.7697949858714603, - "grad_norm": 2.6955070093708047, - "learning_rate": 5.304696097812196e-07, - "loss": 0.9547, - "step": 6402 - }, - { - "epoch": 0.7699152287620994, - "grad_norm": 2.5572808311424486, - "learning_rate": 5.299413280125078e-07, - "loss": 0.7914, - "step": 6403 - }, - { - "epoch": 0.7700354716527386, - "grad_norm": 2.0642955940074272, - "learning_rate": 5.294132692505284e-07, - "loss": 0.9246, - "step": 6404 - }, - { - "epoch": 0.7701557145433776, - "grad_norm": 2.3705144988499462, - "learning_rate": 5.288854335753861e-07, - "loss": 0.9862, - "step": 6405 - }, - { - "epoch": 0.7702759574340167, - "grad_norm": 1.511881265110819, - "learning_rate": 5.283578210671551e-07, - "loss": 0.9637, - "step": 6406 - }, - { - "epoch": 0.7703962003246558, - "grad_norm": 2.180912452966061, - "learning_rate": 5.278304318058719e-07, - "loss": 0.9642, - "step": 6407 - }, - { - "epoch": 0.7705164432152949, - "grad_norm": 2.2327418194015385, - "learning_rate": 5.273032658715411e-07, - "loss": 0.9948, - "step": 6408 - }, - { - "epoch": 0.7706366861059339, - "grad_norm": 1.7770224479927557, - "learning_rate": 5.267763233441347e-07, - "loss": 0.968, - "step": 6409 - }, - { - "epoch": 0.7707569289965731, - "grad_norm": 2.0516975027205344, - "learning_rate": 5.26249604303588e-07, - "loss": 0.8952, - "step": 6410 - }, - { - "epoch": 0.7708771718872122, - "grad_norm": 2.0685616629346675, - "learning_rate": 5.257231088298057e-07, - "loss": 0.974, - "step": 6411 - }, - { - "epoch": 0.7709974147778512, - "grad_norm": 0.8645939922058735, - "learning_rate": 5.25196837002655e-07, - "loss": 0.7657, - "step": 6412 - }, - { - "epoch": 0.7711176576684904, - "grad_norm": 1.8814368069691216, - "learning_rate": 5.24670788901971e-07, - "loss": 0.8943, - "step": 6413 - }, - { - "epoch": 0.7712379005591294, - "grad_norm": 2.0382027987804108, - "learning_rate": 5.241449646075557e-07, - "loss": 0.8902, - "step": 6414 - }, - { - "epoch": 0.7713581434497685, - "grad_norm": 2.1705289552871085, - "learning_rate": 5.236193641991762e-07, - "loss": 0.9268, - "step": 6415 - }, - { - "epoch": 0.7714783863404077, - "grad_norm": 1.8425093533862111, - "learning_rate": 5.23093987756565e-07, - "loss": 0.9041, - "step": 6416 - }, - { - "epoch": 0.7715986292310467, - "grad_norm": 1.7105515275479446, - "learning_rate": 5.225688353594217e-07, - "loss": 0.9592, - "step": 6417 - }, - { - "epoch": 0.7717188721216858, - "grad_norm": 2.064830388268759, - "learning_rate": 5.220439070874108e-07, - "loss": 0.9766, - "step": 6418 - }, - { - "epoch": 0.7718391150123249, - "grad_norm": 1.5989133184435735, - "learning_rate": 5.215192030201652e-07, - "loss": 0.9089, - "step": 6419 - }, - { - "epoch": 0.771959357902964, - "grad_norm": 1.697651598625991, - "learning_rate": 5.209947232372798e-07, - "loss": 1.0606, - "step": 6420 - }, - { - "epoch": 0.772079600793603, - "grad_norm": 1.5658649831716243, - "learning_rate": 5.204704678183196e-07, - "loss": 0.9985, - "step": 6421 - }, - { - "epoch": 0.7721998436842422, - "grad_norm": 1.8083846628431, - "learning_rate": 5.19946436842813e-07, - "loss": 1.0444, - "step": 6422 - }, - { - "epoch": 0.7723200865748813, - "grad_norm": 1.765752197005012, - "learning_rate": 5.194226303902546e-07, - "loss": 0.8813, - "step": 6423 - }, - { - "epoch": 0.7724403294655203, - "grad_norm": 2.0667123261401854, - "learning_rate": 5.188990485401072e-07, - "loss": 0.9113, - "step": 6424 - }, - { - "epoch": 0.7725605723561595, - "grad_norm": 1.9582621649397511, - "learning_rate": 5.183756913717954e-07, - "loss": 1.058, - "step": 6425 - }, - { - "epoch": 0.7726808152467985, - "grad_norm": 1.8891785818676916, - "learning_rate": 5.178525589647136e-07, - "loss": 0.9334, - "step": 6426 - }, - { - "epoch": 0.7728010581374376, - "grad_norm": 1.7518567725385708, - "learning_rate": 5.173296513982197e-07, - "loss": 0.9793, - "step": 6427 - }, - { - "epoch": 0.7729213010280768, - "grad_norm": 2.164591920729765, - "learning_rate": 5.168069687516398e-07, - "loss": 0.8505, - "step": 6428 - }, - { - "epoch": 0.7730415439187158, - "grad_norm": 1.9605311242424934, - "learning_rate": 5.16284511104263e-07, - "loss": 0.9174, - "step": 6429 - }, - { - "epoch": 0.7731617868093549, - "grad_norm": 7.490466352747379, - "learning_rate": 5.157622785353457e-07, - "loss": 1.0089, - "step": 6430 - }, - { - "epoch": 0.7732820296999939, - "grad_norm": 0.7211961171403773, - "learning_rate": 5.152402711241113e-07, - "loss": 0.8161, - "step": 6431 - }, - { - "epoch": 0.7734022725906331, - "grad_norm": 1.968825444789243, - "learning_rate": 5.147184889497465e-07, - "loss": 1.0336, - "step": 6432 - }, - { - "epoch": 0.7735225154812722, - "grad_norm": 2.0657931537831975, - "learning_rate": 5.141969320914072e-07, - "loss": 0.9944, - "step": 6433 - }, - { - "epoch": 0.7736427583719112, - "grad_norm": 2.5343623865380476, - "learning_rate": 5.136756006282113e-07, - "loss": 0.8288, - "step": 6434 - }, - { - "epoch": 0.7737630012625504, - "grad_norm": 2.444661014047202, - "learning_rate": 5.131544946392446e-07, - "loss": 1.053, - "step": 6435 - }, - { - "epoch": 0.7738832441531894, - "grad_norm": 1.8147084166390628, - "learning_rate": 5.126336142035592e-07, - "loss": 0.8429, - "step": 6436 - }, - { - "epoch": 0.7740034870438285, - "grad_norm": 2.8142748848400374, - "learning_rate": 5.121129594001721e-07, - "loss": 0.9185, - "step": 6437 - }, - { - "epoch": 0.7741237299344677, - "grad_norm": 1.5451447028385272, - "learning_rate": 5.115925303080661e-07, - "loss": 1.0164, - "step": 6438 - }, - { - "epoch": 0.7742439728251067, - "grad_norm": 2.0405901451697654, - "learning_rate": 5.110723270061899e-07, - "loss": 0.9922, - "step": 6439 - }, - { - "epoch": 0.7743642157157458, - "grad_norm": 1.7041082781028132, - "learning_rate": 5.105523495734572e-07, - "loss": 0.9952, - "step": 6440 - }, - { - "epoch": 0.7744844586063849, - "grad_norm": 1.5236378855841466, - "learning_rate": 5.100325980887499e-07, - "loss": 0.9386, - "step": 6441 - }, - { - "epoch": 0.774604701497024, - "grad_norm": 1.817588962544757, - "learning_rate": 5.095130726309116e-07, - "loss": 1.0323, - "step": 6442 - }, - { - "epoch": 0.774724944387663, - "grad_norm": 1.166991472443557, - "learning_rate": 5.089937732787559e-07, - "loss": 0.8613, - "step": 6443 - }, - { - "epoch": 0.7748451872783022, - "grad_norm": 1.9144359613539237, - "learning_rate": 5.084747001110592e-07, - "loss": 0.8622, - "step": 6444 - }, - { - "epoch": 0.7749654301689413, - "grad_norm": 1.6788857609211763, - "learning_rate": 5.07955853206564e-07, - "loss": 0.8963, - "step": 6445 - }, - { - "epoch": 0.7750856730595803, - "grad_norm": 1.464706455154451, - "learning_rate": 5.074372326439807e-07, - "loss": 0.9083, - "step": 6446 - }, - { - "epoch": 0.7752059159502195, - "grad_norm": 1.94915532867061, - "learning_rate": 5.069188385019814e-07, - "loss": 0.9364, - "step": 6447 - }, - { - "epoch": 0.7753261588408585, - "grad_norm": 2.683565142394371, - "learning_rate": 5.064006708592077e-07, - "loss": 0.8209, - "step": 6448 - }, - { - "epoch": 0.7754464017314976, - "grad_norm": 2.0959843342901787, - "learning_rate": 5.058827297942641e-07, - "loss": 0.9538, - "step": 6449 - }, - { - "epoch": 0.7755666446221368, - "grad_norm": 1.9523421282568794, - "learning_rate": 5.053650153857237e-07, - "loss": 0.9409, - "step": 6450 - }, - { - "epoch": 0.7756868875127758, - "grad_norm": 1.5622550772396562, - "learning_rate": 5.048475277121214e-07, - "loss": 0.8982, - "step": 6451 - }, - { - "epoch": 0.7758071304034149, - "grad_norm": 1.7293516389504904, - "learning_rate": 5.043302668519598e-07, - "loss": 0.9686, - "step": 6452 - }, - { - "epoch": 0.775927373294054, - "grad_norm": 1.7066126120222562, - "learning_rate": 5.038132328837079e-07, - "loss": 0.9232, - "step": 6453 - }, - { - "epoch": 0.7760476161846931, - "grad_norm": 2.0188175910523425, - "learning_rate": 5.032964258857993e-07, - "loss": 0.9357, - "step": 6454 - }, - { - "epoch": 0.7761678590753321, - "grad_norm": 1.6050421897595724, - "learning_rate": 5.027798459366329e-07, - "loss": 0.8877, - "step": 6455 - }, - { - "epoch": 0.7762881019659713, - "grad_norm": 1.46555052936114, - "learning_rate": 5.02263493114573e-07, - "loss": 0.8348, - "step": 6456 - }, - { - "epoch": 0.7764083448566104, - "grad_norm": 2.0968536603270977, - "learning_rate": 5.017473674979502e-07, - "loss": 0.9621, - "step": 6457 - }, - { - "epoch": 0.7765285877472494, - "grad_norm": 1.0608802117005862, - "learning_rate": 5.01231469165061e-07, - "loss": 0.7941, - "step": 6458 - }, - { - "epoch": 0.7766488306378886, - "grad_norm": 0.9834779557597273, - "learning_rate": 5.007157981941663e-07, - "loss": 0.8112, - "step": 6459 - }, - { - "epoch": 0.7767690735285276, - "grad_norm": 0.9461457229564355, - "learning_rate": 5.002003546634928e-07, - "loss": 0.8954, - "step": 6460 - }, - { - "epoch": 0.7768893164191667, - "grad_norm": 1.6133287621399195, - "learning_rate": 4.996851386512331e-07, - "loss": 0.9551, - "step": 6461 - }, - { - "epoch": 0.7770095593098058, - "grad_norm": 1.8321560489792563, - "learning_rate": 4.991701502355444e-07, - "loss": 1.0304, - "step": 6462 - }, - { - "epoch": 0.7771298022004449, - "grad_norm": 1.4818048814058575, - "learning_rate": 4.986553894945518e-07, - "loss": 0.9631, - "step": 6463 - }, - { - "epoch": 0.777250045091084, - "grad_norm": 1.8786195151573675, - "learning_rate": 4.981408565063416e-07, - "loss": 1.0652, - "step": 6464 - }, - { - "epoch": 0.777370287981723, - "grad_norm": 2.2659074278437803, - "learning_rate": 4.976265513489701e-07, - "loss": 0.9627, - "step": 6465 - }, - { - "epoch": 0.7774905308723622, - "grad_norm": 1.8924615310326915, - "learning_rate": 4.971124741004562e-07, - "loss": 1.0007, - "step": 6466 - }, - { - "epoch": 0.7776107737630013, - "grad_norm": 1.673883105591332, - "learning_rate": 4.965986248387846e-07, - "loss": 0.967, - "step": 6467 - }, - { - "epoch": 0.7777310166536403, - "grad_norm": 1.6254458182676466, - "learning_rate": 4.960850036419073e-07, - "loss": 0.9671, - "step": 6468 - }, - { - "epoch": 0.7778512595442795, - "grad_norm": 2.499908445589183, - "learning_rate": 4.955716105877378e-07, - "loss": 0.9963, - "step": 6469 - }, - { - "epoch": 0.7779715024349185, - "grad_norm": 1.6215951902366186, - "learning_rate": 4.950584457541598e-07, - "loss": 1.0352, - "step": 6470 - }, - { - "epoch": 0.7780917453255576, - "grad_norm": 1.2757179576266824, - "learning_rate": 4.945455092190183e-07, - "loss": 1.0197, - "step": 6471 - }, - { - "epoch": 0.7782119882161967, - "grad_norm": 0.742477520945993, - "learning_rate": 4.940328010601271e-07, - "loss": 0.7716, - "step": 6472 - }, - { - "epoch": 0.7783322311068358, - "grad_norm": 1.7107454653753276, - "learning_rate": 4.935203213552621e-07, - "loss": 0.9548, - "step": 6473 - }, - { - "epoch": 0.7784524739974749, - "grad_norm": 1.8093187761548095, - "learning_rate": 4.930080701821662e-07, - "loss": 0.8638, - "step": 6474 - }, - { - "epoch": 0.778572716888114, - "grad_norm": 1.7496295945672116, - "learning_rate": 4.92496047618548e-07, - "loss": 0.9722, - "step": 6475 - }, - { - "epoch": 0.7786929597787531, - "grad_norm": 2.01475044617232, - "learning_rate": 4.919842537420811e-07, - "loss": 0.9732, - "step": 6476 - }, - { - "epoch": 0.7788132026693921, - "grad_norm": 2.1756991742472276, - "learning_rate": 4.91472688630404e-07, - "loss": 0.9944, - "step": 6477 - }, - { - "epoch": 0.7789334455600313, - "grad_norm": 1.668779305761517, - "learning_rate": 4.909613523611202e-07, - "loss": 0.9444, - "step": 6478 - }, - { - "epoch": 0.7790536884506704, - "grad_norm": 1.7108339600609128, - "learning_rate": 4.904502450117991e-07, - "loss": 0.9439, - "step": 6479 - }, - { - "epoch": 0.7791739313413094, - "grad_norm": 2.2001319195469673, - "learning_rate": 4.899393666599762e-07, - "loss": 0.9219, - "step": 6480 - }, - { - "epoch": 0.7792941742319486, - "grad_norm": 1.9995499342921605, - "learning_rate": 4.894287173831506e-07, - "loss": 0.918, - "step": 6481 - }, - { - "epoch": 0.7794144171225876, - "grad_norm": 1.8015853178989518, - "learning_rate": 4.889182972587877e-07, - "loss": 1.0465, - "step": 6482 - }, - { - "epoch": 0.7795346600132267, - "grad_norm": 1.7042685353145661, - "learning_rate": 4.884081063643177e-07, - "loss": 0.8627, - "step": 6483 - }, - { - "epoch": 0.7796549029038659, - "grad_norm": 0.9172374653244381, - "learning_rate": 4.878981447771353e-07, - "loss": 0.7631, - "step": 6484 - }, - { - "epoch": 0.7797751457945049, - "grad_norm": 1.5303058438256625, - "learning_rate": 4.873884125746035e-07, - "loss": 0.9338, - "step": 6485 - }, - { - "epoch": 0.779895388685144, - "grad_norm": 2.323038266225168, - "learning_rate": 4.868789098340456e-07, - "loss": 0.9273, - "step": 6486 - }, - { - "epoch": 0.7800156315757831, - "grad_norm": 2.1383401970618148, - "learning_rate": 4.863696366327543e-07, - "loss": 0.9105, - "step": 6487 - }, - { - "epoch": 0.7801358744664222, - "grad_norm": 1.7062241940066443, - "learning_rate": 4.85860593047986e-07, - "loss": 0.9843, - "step": 6488 - }, - { - "epoch": 0.7802561173570612, - "grad_norm": 1.5589673352248192, - "learning_rate": 4.853517791569613e-07, - "loss": 0.9413, - "step": 6489 - }, - { - "epoch": 0.7803763602477004, - "grad_norm": 1.634225918040798, - "learning_rate": 4.848431950368684e-07, - "loss": 0.858, - "step": 6490 - }, - { - "epoch": 0.7804966031383395, - "grad_norm": 0.7684950072251044, - "learning_rate": 4.843348407648569e-07, - "loss": 0.7825, - "step": 6491 - }, - { - "epoch": 0.7806168460289785, - "grad_norm": 2.017295795491471, - "learning_rate": 4.838267164180457e-07, - "loss": 1.0279, - "step": 6492 - }, - { - "epoch": 0.7807370889196176, - "grad_norm": 1.8931111784789691, - "learning_rate": 4.833188220735156e-07, - "loss": 1.0448, - "step": 6493 - }, - { - "epoch": 0.7808573318102567, - "grad_norm": 1.837894581578665, - "learning_rate": 4.828111578083152e-07, - "loss": 0.9403, - "step": 6494 - }, - { - "epoch": 0.7809775747008958, - "grad_norm": 1.8895640273055987, - "learning_rate": 4.823037236994556e-07, - "loss": 1.0096, - "step": 6495 - }, - { - "epoch": 0.7810978175915348, - "grad_norm": 0.806328110274921, - "learning_rate": 4.817965198239136e-07, - "loss": 0.7868, - "step": 6496 - }, - { - "epoch": 0.781218060482174, - "grad_norm": 1.8494135623068682, - "learning_rate": 4.812895462586331e-07, - "loss": 0.937, - "step": 6497 - }, - { - "epoch": 0.7813383033728131, - "grad_norm": 1.5768823546828894, - "learning_rate": 4.807828030805207e-07, - "loss": 1.0209, - "step": 6498 - }, - { - "epoch": 0.7814585462634521, - "grad_norm": 1.9021025393109332, - "learning_rate": 4.802762903664495e-07, - "loss": 0.872, - "step": 6499 - }, - { - "epoch": 0.7815787891540913, - "grad_norm": 2.533426549936536, - "learning_rate": 4.797700081932565e-07, - "loss": 0.9267, - "step": 6500 - }, - { - "epoch": 0.7816990320447303, - "grad_norm": 3.8443661327752836, - "learning_rate": 4.792639566377442e-07, - "loss": 1.0159, - "step": 6501 - }, - { - "epoch": 0.7818192749353694, - "grad_norm": 1.7609725014458315, - "learning_rate": 4.78758135776681e-07, - "loss": 0.9756, - "step": 6502 - }, - { - "epoch": 0.7819395178260086, - "grad_norm": 1.7693122913563004, - "learning_rate": 4.782525456867989e-07, - "loss": 0.9836, - "step": 6503 - }, - { - "epoch": 0.7820597607166476, - "grad_norm": 1.4939670038013777, - "learning_rate": 4.777471864447959e-07, - "loss": 1.0301, - "step": 6504 - }, - { - "epoch": 0.7821800036072867, - "grad_norm": 2.1626526133758417, - "learning_rate": 4.772420581273344e-07, - "loss": 1.0001, - "step": 6505 - }, - { - "epoch": 0.7823002464979258, - "grad_norm": 1.6772541962140537, - "learning_rate": 4.7673716081104134e-07, - "loss": 0.9614, - "step": 6506 - }, - { - "epoch": 0.7824204893885649, - "grad_norm": 1.619931827217477, - "learning_rate": 4.762324945725109e-07, - "loss": 1.0362, - "step": 6507 - }, - { - "epoch": 0.782540732279204, - "grad_norm": 1.8656610610007118, - "learning_rate": 4.7572805948829844e-07, - "loss": 0.953, - "step": 6508 - }, - { - "epoch": 0.7826609751698431, - "grad_norm": 1.6756056046635082, - "learning_rate": 4.7522385563492795e-07, - "loss": 0.9065, - "step": 6509 - }, - { - "epoch": 0.7827812180604822, - "grad_norm": 2.659713820357591, - "learning_rate": 4.747198830888863e-07, - "loss": 0.9035, - "step": 6510 - }, - { - "epoch": 0.7829014609511212, - "grad_norm": 1.7651465113620133, - "learning_rate": 4.742161419266251e-07, - "loss": 0.8832, - "step": 6511 - }, - { - "epoch": 0.7830217038417604, - "grad_norm": 2.164912638674656, - "learning_rate": 4.7371263222456304e-07, - "loss": 0.8611, - "step": 6512 - }, - { - "epoch": 0.7831419467323995, - "grad_norm": 1.1356309227796844, - "learning_rate": 4.7320935405908004e-07, - "loss": 0.8387, - "step": 6513 - }, - { - "epoch": 0.7832621896230385, - "grad_norm": 1.9560494463312832, - "learning_rate": 4.7270630750652475e-07, - "loss": 1.0216, - "step": 6514 - }, - { - "epoch": 0.7833824325136777, - "grad_norm": 1.6518796444848158, - "learning_rate": 4.7220349264320746e-07, - "loss": 1.0029, - "step": 6515 - }, - { - "epoch": 0.7835026754043167, - "grad_norm": 0.8252452225799627, - "learning_rate": 4.71700909545407e-07, - "loss": 0.7885, - "step": 6516 - }, - { - "epoch": 0.7836229182949558, - "grad_norm": 1.6383008207171836, - "learning_rate": 4.711985582893627e-07, - "loss": 0.9654, - "step": 6517 - }, - { - "epoch": 0.783743161185595, - "grad_norm": 1.6593939253882661, - "learning_rate": 4.706964389512811e-07, - "loss": 0.916, - "step": 6518 - }, - { - "epoch": 0.783863404076234, - "grad_norm": 1.7587664241064556, - "learning_rate": 4.701945516073345e-07, - "loss": 1.0678, - "step": 6519 - }, - { - "epoch": 0.7839836469668731, - "grad_norm": 1.641877413872439, - "learning_rate": 4.696928963336577e-07, - "loss": 0.9488, - "step": 6520 - }, - { - "epoch": 0.7841038898575122, - "grad_norm": 0.9301566214603174, - "learning_rate": 4.6919147320635224e-07, - "loss": 0.8294, - "step": 6521 - }, - { - "epoch": 0.7842241327481513, - "grad_norm": 2.707850917008748, - "learning_rate": 4.6869028230148286e-07, - "loss": 0.9291, - "step": 6522 - }, - { - "epoch": 0.7843443756387903, - "grad_norm": 2.321765894014223, - "learning_rate": 4.6818932369507957e-07, - "loss": 0.8082, - "step": 6523 - }, - { - "epoch": 0.7844646185294295, - "grad_norm": 2.967008863136931, - "learning_rate": 4.676885974631386e-07, - "loss": 1.0806, - "step": 6524 - }, - { - "epoch": 0.7845848614200686, - "grad_norm": 1.8373526455709044, - "learning_rate": 4.67188103681619e-07, - "loss": 1.0018, - "step": 6525 - }, - { - "epoch": 0.7847051043107076, - "grad_norm": 1.9517104863178232, - "learning_rate": 4.666878424264453e-07, - "loss": 0.8927, - "step": 6526 - }, - { - "epoch": 0.7848253472013467, - "grad_norm": 1.6398168844245902, - "learning_rate": 4.661878137735069e-07, - "loss": 0.9327, - "step": 6527 - }, - { - "epoch": 0.7849455900919858, - "grad_norm": 1.6913328088757809, - "learning_rate": 4.656880177986571e-07, - "loss": 0.943, - "step": 6528 - }, - { - "epoch": 0.7850658329826249, - "grad_norm": 2.3378452433656705, - "learning_rate": 4.6518845457771607e-07, - "loss": 1.0089, - "step": 6529 - }, - { - "epoch": 0.7851860758732639, - "grad_norm": 1.680455019161891, - "learning_rate": 4.646891241864652e-07, - "loss": 0.9996, - "step": 6530 - }, - { - "epoch": 0.7853063187639031, - "grad_norm": 2.0006379063865363, - "learning_rate": 4.6419002670065397e-07, - "loss": 0.9368, - "step": 6531 - }, - { - "epoch": 0.7854265616545422, - "grad_norm": 1.8730426744527324, - "learning_rate": 4.6369116219599445e-07, - "loss": 1.0465, - "step": 6532 - }, - { - "epoch": 0.7855468045451812, - "grad_norm": 1.5922460004337322, - "learning_rate": 4.631925307481637e-07, - "loss": 0.9927, - "step": 6533 - }, - { - "epoch": 0.7856670474358204, - "grad_norm": 1.9229869572062752, - "learning_rate": 4.6269413243280533e-07, - "loss": 0.9488, - "step": 6534 - }, - { - "epoch": 0.7857872903264594, - "grad_norm": 2.52433425511541, - "learning_rate": 4.621959673255236e-07, - "loss": 0.9422, - "step": 6535 - }, - { - "epoch": 0.7859075332170985, - "grad_norm": 1.7839786977992926, - "learning_rate": 4.6169803550189135e-07, - "loss": 1.097, - "step": 6536 - }, - { - "epoch": 0.7860277761077377, - "grad_norm": 2.2159309208290385, - "learning_rate": 4.6120033703744355e-07, - "loss": 0.9624, - "step": 6537 - }, - { - "epoch": 0.7861480189983767, - "grad_norm": 1.800699956221587, - "learning_rate": 4.607028720076822e-07, - "loss": 0.9793, - "step": 6538 - }, - { - "epoch": 0.7862682618890158, - "grad_norm": 1.712062935848847, - "learning_rate": 4.6020564048807074e-07, - "loss": 0.9364, - "step": 6539 - }, - { - "epoch": 0.7863885047796549, - "grad_norm": 1.834031239521201, - "learning_rate": 4.5970864255403883e-07, - "loss": 0.9193, - "step": 6540 - }, - { - "epoch": 0.786508747670294, - "grad_norm": 2.501439729021274, - "learning_rate": 4.59211878280982e-07, - "loss": 1.0175, - "step": 6541 - }, - { - "epoch": 0.786628990560933, - "grad_norm": 1.8239574694527048, - "learning_rate": 4.587153477442578e-07, - "loss": 0.8983, - "step": 6542 - }, - { - "epoch": 0.7867492334515722, - "grad_norm": 2.2505974506195257, - "learning_rate": 4.582190510191899e-07, - "loss": 1.0175, - "step": 6543 - }, - { - "epoch": 0.7868694763422113, - "grad_norm": 1.9530106778065355, - "learning_rate": 4.5772298818106625e-07, - "loss": 1.0601, - "step": 6544 - }, - { - "epoch": 0.7869897192328503, - "grad_norm": 2.3926507681892994, - "learning_rate": 4.572271593051384e-07, - "loss": 0.9252, - "step": 6545 - }, - { - "epoch": 0.7871099621234895, - "grad_norm": 1.570527930214468, - "learning_rate": 4.567315644666245e-07, - "loss": 0.9721, - "step": 6546 - }, - { - "epoch": 0.7872302050141285, - "grad_norm": 1.9630128129724582, - "learning_rate": 4.5623620374070507e-07, - "loss": 1.0412, - "step": 6547 - }, - { - "epoch": 0.7873504479047676, - "grad_norm": 0.8385859452082195, - "learning_rate": 4.557410772025263e-07, - "loss": 0.8228, - "step": 6548 - }, - { - "epoch": 0.7874706907954068, - "grad_norm": 1.972336907552077, - "learning_rate": 4.5524618492719803e-07, - "loss": 0.8631, - "step": 6549 - }, - { - "epoch": 0.7875909336860458, - "grad_norm": 1.4839787004386933, - "learning_rate": 4.54751526989795e-07, - "loss": 0.9766, - "step": 6550 - }, - { - "epoch": 0.7877111765766849, - "grad_norm": 2.2870206612077775, - "learning_rate": 4.5425710346535775e-07, - "loss": 0.9892, - "step": 6551 - }, - { - "epoch": 0.787831419467324, - "grad_norm": 1.8040278166214403, - "learning_rate": 4.537629144288877e-07, - "loss": 1.0129, - "step": 6552 - }, - { - "epoch": 0.7879516623579631, - "grad_norm": 1.7821480344429026, - "learning_rate": 4.5326895995535477e-07, - "loss": 0.9424, - "step": 6553 - }, - { - "epoch": 0.7880719052486022, - "grad_norm": 3.9148645245532405, - "learning_rate": 4.527752401196907e-07, - "loss": 1.0386, - "step": 6554 - }, - { - "epoch": 0.7881921481392413, - "grad_norm": 1.878391504659077, - "learning_rate": 4.5228175499679254e-07, - "loss": 0.8702, - "step": 6555 - }, - { - "epoch": 0.7883123910298804, - "grad_norm": 0.8684234060136949, - "learning_rate": 4.5178850466152174e-07, - "loss": 0.7592, - "step": 6556 - }, - { - "epoch": 0.7884326339205194, - "grad_norm": 1.7089020627498817, - "learning_rate": 4.512954891887031e-07, - "loss": 1.0123, - "step": 6557 - }, - { - "epoch": 0.7885528768111585, - "grad_norm": 1.9939875231039716, - "learning_rate": 4.5080270865312806e-07, - "loss": 1.0297, - "step": 6558 - }, - { - "epoch": 0.7886731197017977, - "grad_norm": 1.8935642566803077, - "learning_rate": 4.5031016312954985e-07, - "loss": 0.9077, - "step": 6559 - }, - { - "epoch": 0.7887933625924367, - "grad_norm": 2.0534662018347505, - "learning_rate": 4.498178526926886e-07, - "loss": 0.9506, - "step": 6560 - }, - { - "epoch": 0.7889136054830758, - "grad_norm": 2.0085698343319875, - "learning_rate": 4.4932577741722635e-07, - "loss": 0.9273, - "step": 6561 - }, - { - "epoch": 0.7890338483737149, - "grad_norm": 1.7344192636701028, - "learning_rate": 4.4883393737780985e-07, - "loss": 0.9488, - "step": 6562 - }, - { - "epoch": 0.789154091264354, - "grad_norm": 2.0724261165099067, - "learning_rate": 4.4834233264905254e-07, - "loss": 0.9802, - "step": 6563 - }, - { - "epoch": 0.789274334154993, - "grad_norm": 2.6584369408039756, - "learning_rate": 4.478509633055294e-07, - "loss": 0.9181, - "step": 6564 - }, - { - "epoch": 0.7893945770456322, - "grad_norm": 2.1288822837670494, - "learning_rate": 4.473598294217813e-07, - "loss": 1.0032, - "step": 6565 - }, - { - "epoch": 0.7895148199362713, - "grad_norm": 1.772488781418694, - "learning_rate": 4.468689310723124e-07, - "loss": 0.9175, - "step": 6566 - }, - { - "epoch": 0.7896350628269103, - "grad_norm": 1.8280449222078388, - "learning_rate": 4.463782683315913e-07, - "loss": 0.9828, - "step": 6567 - }, - { - "epoch": 0.7897553057175495, - "grad_norm": 1.7429192208126918, - "learning_rate": 4.458878412740523e-07, - "loss": 0.9228, - "step": 6568 - }, - { - "epoch": 0.7898755486081885, - "grad_norm": 2.1569059175957626, - "learning_rate": 4.453976499740919e-07, - "loss": 0.9703, - "step": 6569 - }, - { - "epoch": 0.7899957914988276, - "grad_norm": 1.6376064324187878, - "learning_rate": 4.4490769450607215e-07, - "loss": 0.9802, - "step": 6570 - }, - { - "epoch": 0.7901160343894668, - "grad_norm": 1.72096962223584, - "learning_rate": 4.4441797494431845e-07, - "loss": 0.938, - "step": 6571 - }, - { - "epoch": 0.7902362772801058, - "grad_norm": 2.2897179612301555, - "learning_rate": 4.439284913631207e-07, - "loss": 0.9779, - "step": 6572 - }, - { - "epoch": 0.7903565201707449, - "grad_norm": 1.7904209493376206, - "learning_rate": 4.434392438367347e-07, - "loss": 1.0351, - "step": 6573 - }, - { - "epoch": 0.790476763061384, - "grad_norm": 1.7574360084007774, - "learning_rate": 4.4295023243937677e-07, - "loss": 0.9422, - "step": 6574 - }, - { - "epoch": 0.7905970059520231, - "grad_norm": 1.5805634647939497, - "learning_rate": 4.4246145724523123e-07, - "loss": 1.0087, - "step": 6575 - }, - { - "epoch": 0.7907172488426621, - "grad_norm": 2.0693913551820255, - "learning_rate": 4.41972918328444e-07, - "loss": 0.9635, - "step": 6576 - }, - { - "epoch": 0.7908374917333013, - "grad_norm": 2.5504353918235627, - "learning_rate": 4.4148461576312646e-07, - "loss": 0.9812, - "step": 6577 - }, - { - "epoch": 0.7909577346239404, - "grad_norm": 1.4444700517992901, - "learning_rate": 4.4099654962335343e-07, - "loss": 0.9445, - "step": 6578 - }, - { - "epoch": 0.7910779775145794, - "grad_norm": 1.8065516764421, - "learning_rate": 4.405087199831636e-07, - "loss": 0.9432, - "step": 6579 - }, - { - "epoch": 0.7911982204052186, - "grad_norm": 1.8541355844814205, - "learning_rate": 4.400211269165619e-07, - "loss": 0.8718, - "step": 6580 - }, - { - "epoch": 0.7913184632958576, - "grad_norm": 1.4072490534153212, - "learning_rate": 4.3953377049751416e-07, - "loss": 0.969, - "step": 6581 - }, - { - "epoch": 0.7914387061864967, - "grad_norm": 2.243579817669609, - "learning_rate": 4.390466507999537e-07, - "loss": 0.9798, - "step": 6582 - }, - { - "epoch": 0.7915589490771359, - "grad_norm": 2.2940554797407184, - "learning_rate": 4.385597678977748e-07, - "loss": 0.9577, - "step": 6583 - }, - { - "epoch": 0.7916791919677749, - "grad_norm": 1.4967541544321212, - "learning_rate": 4.3807312186483726e-07, - "loss": 0.9497, - "step": 6584 - }, - { - "epoch": 0.791799434858414, - "grad_norm": 2.0173047777000854, - "learning_rate": 4.375867127749655e-07, - "loss": 0.9798, - "step": 6585 - }, - { - "epoch": 0.7919196777490531, - "grad_norm": 1.8019975704299518, - "learning_rate": 4.3710054070194744e-07, - "loss": 0.8807, - "step": 6586 - }, - { - "epoch": 0.7920399206396922, - "grad_norm": 2.7071209076625324, - "learning_rate": 4.3661460571953455e-07, - "loss": 0.8667, - "step": 6587 - }, - { - "epoch": 0.7921601635303313, - "grad_norm": 1.5531120560718792, - "learning_rate": 4.36128907901443e-07, - "loss": 0.8782, - "step": 6588 - }, - { - "epoch": 0.7922804064209703, - "grad_norm": 2.2488690818991603, - "learning_rate": 4.356434473213519e-07, - "loss": 0.9181, - "step": 6589 - }, - { - "epoch": 0.7924006493116095, - "grad_norm": 1.6558315540234407, - "learning_rate": 4.351582240529068e-07, - "loss": 0.9901, - "step": 6590 - }, - { - "epoch": 0.7925208922022485, - "grad_norm": 0.730621077214553, - "learning_rate": 4.346732381697149e-07, - "loss": 0.8049, - "step": 6591 - }, - { - "epoch": 0.7926411350928876, - "grad_norm": 1.6012538705580994, - "learning_rate": 4.3418848974534825e-07, - "loss": 1.0162, - "step": 6592 - }, - { - "epoch": 0.7927613779835267, - "grad_norm": 1.4851344524872896, - "learning_rate": 4.3370397885334276e-07, - "loss": 0.8853, - "step": 6593 - }, - { - "epoch": 0.7928816208741658, - "grad_norm": 2.0983020775698935, - "learning_rate": 4.3321970556719777e-07, - "loss": 0.9492, - "step": 6594 - }, - { - "epoch": 0.7930018637648049, - "grad_norm": 2.074787299043734, - "learning_rate": 4.3273566996037856e-07, - "loss": 0.9148, - "step": 6595 - }, - { - "epoch": 0.793122106655444, - "grad_norm": 3.5217684495271633, - "learning_rate": 4.322518721063113e-07, - "loss": 1.0004, - "step": 6596 - }, - { - "epoch": 0.7932423495460831, - "grad_norm": 2.0871201495107203, - "learning_rate": 4.3176831207838906e-07, - "loss": 0.8959, - "step": 6597 - }, - { - "epoch": 0.7933625924367221, - "grad_norm": 2.561312237379475, - "learning_rate": 4.3128498994996685e-07, - "loss": 0.9518, - "step": 6598 - }, - { - "epoch": 0.7934828353273613, - "grad_norm": 1.8298023060810693, - "learning_rate": 4.308019057943646e-07, - "loss": 0.9121, - "step": 6599 - }, - { - "epoch": 0.7936030782180004, - "grad_norm": 1.5909027396731736, - "learning_rate": 4.3031905968486535e-07, - "loss": 0.9468, - "step": 6600 - }, - { - "epoch": 0.7937233211086394, - "grad_norm": 1.873334908241524, - "learning_rate": 4.298364516947162e-07, - "loss": 0.8868, - "step": 6601 - }, - { - "epoch": 0.7938435639992786, - "grad_norm": 2.2357160905128204, - "learning_rate": 4.293540818971295e-07, - "loss": 0.8618, - "step": 6602 - }, - { - "epoch": 0.7939638068899176, - "grad_norm": 2.468158313007666, - "learning_rate": 4.2887195036527934e-07, - "loss": 0.9654, - "step": 6603 - }, - { - "epoch": 0.7940840497805567, - "grad_norm": 2.1990093992066226, - "learning_rate": 4.28390057172306e-07, - "loss": 0.9383, - "step": 6604 - }, - { - "epoch": 0.7942042926711959, - "grad_norm": 1.989900480494371, - "learning_rate": 4.279084023913111e-07, - "loss": 0.9276, - "step": 6605 - }, - { - "epoch": 0.7943245355618349, - "grad_norm": 1.7639813494874943, - "learning_rate": 4.2742698609536096e-07, - "loss": 0.8862, - "step": 6606 - }, - { - "epoch": 0.794444778452474, - "grad_norm": 1.7556668219468803, - "learning_rate": 4.2694580835748706e-07, - "loss": 0.9776, - "step": 6607 - }, - { - "epoch": 0.7945650213431131, - "grad_norm": 1.7417140710880261, - "learning_rate": 4.264648692506836e-07, - "loss": 0.9405, - "step": 6608 - }, - { - "epoch": 0.7946852642337522, - "grad_norm": 1.7761328292524852, - "learning_rate": 4.2598416884790824e-07, - "loss": 0.9164, - "step": 6609 - }, - { - "epoch": 0.7948055071243912, - "grad_norm": 1.828249413585008, - "learning_rate": 4.255037072220828e-07, - "loss": 1.014, - "step": 6610 - }, - { - "epoch": 0.7949257500150304, - "grad_norm": 1.4954378368501502, - "learning_rate": 4.2502348444609293e-07, - "loss": 0.9066, - "step": 6611 - }, - { - "epoch": 0.7950459929056695, - "grad_norm": 1.690369462252201, - "learning_rate": 4.2454350059278844e-07, - "loss": 0.8904, - "step": 6612 - }, - { - "epoch": 0.7951662357963085, - "grad_norm": 1.921647097092851, - "learning_rate": 4.240637557349824e-07, - "loss": 1.0415, - "step": 6613 - }, - { - "epoch": 0.7952864786869477, - "grad_norm": 1.7601357937575244, - "learning_rate": 4.235842499454516e-07, - "loss": 0.8558, - "step": 6614 - }, - { - "epoch": 0.7954067215775867, - "grad_norm": 2.2435267897915665, - "learning_rate": 4.2310498329693687e-07, - "loss": 1.0209, - "step": 6615 - }, - { - "epoch": 0.7955269644682258, - "grad_norm": 1.481347057526743, - "learning_rate": 4.2262595586214164e-07, - "loss": 1.0089, - "step": 6616 - }, - { - "epoch": 0.795647207358865, - "grad_norm": 1.4806809084615413, - "learning_rate": 4.221471677137358e-07, - "loss": 0.9693, - "step": 6617 - }, - { - "epoch": 0.795767450249504, - "grad_norm": 1.5152456851073928, - "learning_rate": 4.216686189243492e-07, - "loss": 0.9004, - "step": 6618 - }, - { - "epoch": 0.7958876931401431, - "grad_norm": 1.5433005447008057, - "learning_rate": 4.211903095665785e-07, - "loss": 0.9252, - "step": 6619 - }, - { - "epoch": 0.7960079360307821, - "grad_norm": 1.8053973234566494, - "learning_rate": 4.2071223971298277e-07, - "loss": 0.9514, - "step": 6620 - }, - { - "epoch": 0.7961281789214213, - "grad_norm": 2.027534255409844, - "learning_rate": 4.2023440943608433e-07, - "loss": 0.8156, - "step": 6621 - }, - { - "epoch": 0.7962484218120603, - "grad_norm": 1.44561302566022, - "learning_rate": 4.1975681880837023e-07, - "loss": 0.9818, - "step": 6622 - }, - { - "epoch": 0.7963686647026994, - "grad_norm": 2.246309644855056, - "learning_rate": 4.192794679022895e-07, - "loss": 1.0266, - "step": 6623 - }, - { - "epoch": 0.7964889075933386, - "grad_norm": 1.8268635628153336, - "learning_rate": 4.1880235679025743e-07, - "loss": 0.9129, - "step": 6624 - }, - { - "epoch": 0.7966091504839776, - "grad_norm": 2.1621359502163635, - "learning_rate": 4.1832548554464986e-07, - "loss": 0.8368, - "step": 6625 - }, - { - "epoch": 0.7967293933746167, - "grad_norm": 0.8192669699630943, - "learning_rate": 4.178488542378098e-07, - "loss": 0.8035, - "step": 6626 - }, - { - "epoch": 0.7968496362652558, - "grad_norm": 3.5354436768080926, - "learning_rate": 4.173724629420401e-07, - "loss": 1.0847, - "step": 6627 - }, - { - "epoch": 0.7969698791558949, - "grad_norm": 5.419433043834157, - "learning_rate": 4.168963117296087e-07, - "loss": 0.8813, - "step": 6628 - }, - { - "epoch": 0.797090122046534, - "grad_norm": 2.048326167173501, - "learning_rate": 4.1642040067274876e-07, - "loss": 0.9471, - "step": 6629 - }, - { - "epoch": 0.7972103649371731, - "grad_norm": 1.5609076205917385, - "learning_rate": 4.1594472984365493e-07, - "loss": 0.9221, - "step": 6630 - }, - { - "epoch": 0.7973306078278122, - "grad_norm": 1.574453331670907, - "learning_rate": 4.154692993144862e-07, - "loss": 0.969, - "step": 6631 - }, - { - "epoch": 0.7974508507184512, - "grad_norm": 1.887344919869608, - "learning_rate": 4.1499410915736476e-07, - "loss": 0.9124, - "step": 6632 - }, - { - "epoch": 0.7975710936090904, - "grad_norm": 0.8348468546423211, - "learning_rate": 4.145191594443762e-07, - "loss": 0.9037, - "step": 6633 - }, - { - "epoch": 0.7976913364997295, - "grad_norm": 1.6536062481164657, - "learning_rate": 4.140444502475713e-07, - "loss": 0.9106, - "step": 6634 - }, - { - "epoch": 0.7978115793903685, - "grad_norm": 1.8255433331969977, - "learning_rate": 4.1356998163896216e-07, - "loss": 0.8978, - "step": 6635 - }, - { - "epoch": 0.7979318222810077, - "grad_norm": 1.9389805213142635, - "learning_rate": 4.130957536905255e-07, - "loss": 0.9428, - "step": 6636 - }, - { - "epoch": 0.7980520651716467, - "grad_norm": 2.354648327011166, - "learning_rate": 4.1262176647420134e-07, - "loss": 0.9115, - "step": 6637 - }, - { - "epoch": 0.7981723080622858, - "grad_norm": 1.6822395985894452, - "learning_rate": 4.121480200618923e-07, - "loss": 0.9948, - "step": 6638 - }, - { - "epoch": 0.798292550952925, - "grad_norm": 1.547647225643924, - "learning_rate": 4.116745145254674e-07, - "loss": 0.9967, - "step": 6639 - }, - { - "epoch": 0.798412793843564, - "grad_norm": 0.7944739201960462, - "learning_rate": 4.1120124993675476e-07, - "loss": 0.8036, - "step": 6640 - }, - { - "epoch": 0.7985330367342031, - "grad_norm": 1.8496006096347213, - "learning_rate": 4.107282263675498e-07, - "loss": 0.8206, - "step": 6641 - }, - { - "epoch": 0.7986532796248422, - "grad_norm": 0.7756494815476238, - "learning_rate": 4.1025544388960907e-07, - "loss": 0.7384, - "step": 6642 - }, - { - "epoch": 0.7987735225154813, - "grad_norm": 1.6540449326672557, - "learning_rate": 4.097829025746538e-07, - "loss": 0.9147, - "step": 6643 - }, - { - "epoch": 0.7988937654061203, - "grad_norm": 0.7117308916046581, - "learning_rate": 4.0931060249436757e-07, - "loss": 0.8189, - "step": 6644 - }, - { - "epoch": 0.7990140082967595, - "grad_norm": 1.9080921090043588, - "learning_rate": 4.088385437203978e-07, - "loss": 0.8928, - "step": 6645 - }, - { - "epoch": 0.7991342511873986, - "grad_norm": 2.0146186149634686, - "learning_rate": 4.083667263243564e-07, - "loss": 0.9703, - "step": 6646 - }, - { - "epoch": 0.7992544940780376, - "grad_norm": 1.7487306759838908, - "learning_rate": 4.0789515037781653e-07, - "loss": 0.9083, - "step": 6647 - }, - { - "epoch": 0.7993747369686768, - "grad_norm": 1.6919189099073355, - "learning_rate": 4.0742381595231755e-07, - "loss": 1.0237, - "step": 6648 - }, - { - "epoch": 0.7994949798593158, - "grad_norm": 1.3757509435102473, - "learning_rate": 4.06952723119359e-07, - "loss": 0.9828, - "step": 6649 - }, - { - "epoch": 0.7996152227499549, - "grad_norm": 2.0779772254202484, - "learning_rate": 4.0648187195040504e-07, - "loss": 0.8657, - "step": 6650 - }, - { - "epoch": 0.799735465640594, - "grad_norm": 0.980124850202699, - "learning_rate": 4.060112625168848e-07, - "loss": 0.9195, - "step": 6651 - }, - { - "epoch": 0.7998557085312331, - "grad_norm": 1.6593533358264256, - "learning_rate": 4.055408948901886e-07, - "loss": 0.9362, - "step": 6652 - }, - { - "epoch": 0.7999759514218722, - "grad_norm": 1.652121526686348, - "learning_rate": 4.050707691416708e-07, - "loss": 0.9131, - "step": 6653 - }, - { - "epoch": 0.8000961943125112, - "grad_norm": 0.7459166551289582, - "learning_rate": 4.046008853426495e-07, - "loss": 0.8177, - "step": 6654 - }, - { - "epoch": 0.8002164372031504, - "grad_norm": 1.6163757055834334, - "learning_rate": 4.0413124356440464e-07, - "loss": 0.8298, - "step": 6655 - }, - { - "epoch": 0.8003366800937894, - "grad_norm": 2.0248326974209907, - "learning_rate": 4.0366184387818223e-07, - "loss": 1.0289, - "step": 6656 - }, - { - "epoch": 0.8004569229844285, - "grad_norm": 1.684027383636044, - "learning_rate": 4.0319268635518797e-07, - "loss": 1.0512, - "step": 6657 - }, - { - "epoch": 0.8005771658750677, - "grad_norm": 1.4948859456421804, - "learning_rate": 4.027237710665943e-07, - "loss": 0.9475, - "step": 6658 - }, - { - "epoch": 0.8006974087657067, - "grad_norm": 2.5666743447655445, - "learning_rate": 4.022550980835344e-07, - "loss": 0.8947, - "step": 6659 - }, - { - "epoch": 0.8008176516563458, - "grad_norm": 1.954936659779544, - "learning_rate": 4.017866674771051e-07, - "loss": 1.0064, - "step": 6660 - }, - { - "epoch": 0.8009378945469849, - "grad_norm": 1.520011374907847, - "learning_rate": 4.013184793183688e-07, - "loss": 0.9451, - "step": 6661 - }, - { - "epoch": 0.801058137437624, - "grad_norm": 1.647106844889194, - "learning_rate": 4.008505336783472e-07, - "loss": 0.9234, - "step": 6662 - }, - { - "epoch": 0.801178380328263, - "grad_norm": 2.089922010435937, - "learning_rate": 4.003828306280284e-07, - "loss": 1.0172, - "step": 6663 - }, - { - "epoch": 0.8012986232189022, - "grad_norm": 2.333426564488446, - "learning_rate": 3.999153702383626e-07, - "loss": 0.9758, - "step": 6664 - }, - { - "epoch": 0.8014188661095413, - "grad_norm": 1.631099258415012, - "learning_rate": 3.9944815258026263e-07, - "loss": 0.9277, - "step": 6665 - }, - { - "epoch": 0.8015391090001803, - "grad_norm": 1.660772231771339, - "learning_rate": 3.989811777246057e-07, - "loss": 1.0304, - "step": 6666 - }, - { - "epoch": 0.8016593518908195, - "grad_norm": 0.9342982614702938, - "learning_rate": 3.985144457422305e-07, - "loss": 0.8963, - "step": 6667 - }, - { - "epoch": 0.8017795947814585, - "grad_norm": 1.687799427170424, - "learning_rate": 3.9804795670394096e-07, - "loss": 0.9548, - "step": 6668 - }, - { - "epoch": 0.8018998376720976, - "grad_norm": 1.8781470908390723, - "learning_rate": 3.975817106805022e-07, - "loss": 0.9114, - "step": 6669 - }, - { - "epoch": 0.8020200805627368, - "grad_norm": 1.802757254772536, - "learning_rate": 3.97115707742645e-07, - "loss": 0.8405, - "step": 6670 - }, - { - "epoch": 0.8021403234533758, - "grad_norm": 1.8531635942991427, - "learning_rate": 3.966499479610599e-07, - "loss": 0.8538, - "step": 6671 - }, - { - "epoch": 0.8022605663440149, - "grad_norm": 1.725263909870186, - "learning_rate": 3.9618443140640225e-07, - "loss": 0.8498, - "step": 6672 - }, - { - "epoch": 0.802380809234654, - "grad_norm": 0.7999116595488047, - "learning_rate": 3.957191581492918e-07, - "loss": 0.743, - "step": 6673 - }, - { - "epoch": 0.8025010521252931, - "grad_norm": 3.148706315825258, - "learning_rate": 3.952541282603097e-07, - "loss": 0.902, - "step": 6674 - }, - { - "epoch": 0.8026212950159322, - "grad_norm": 2.25318831144525, - "learning_rate": 3.9478934181000013e-07, - "loss": 1.03, - "step": 6675 - }, - { - "epoch": 0.8027415379065713, - "grad_norm": 2.033543040712784, - "learning_rate": 3.943247988688714e-07, - "loss": 1.0433, - "step": 6676 - }, - { - "epoch": 0.8028617807972104, - "grad_norm": 1.6875881595895128, - "learning_rate": 3.938604995073933e-07, - "loss": 0.9094, - "step": 6677 - }, - { - "epoch": 0.8029820236878494, - "grad_norm": 1.5504617433967265, - "learning_rate": 3.9339644379600157e-07, - "loss": 0.8626, - "step": 6678 - }, - { - "epoch": 0.8031022665784886, - "grad_norm": 2.30842168847754, - "learning_rate": 3.929326318050907e-07, - "loss": 0.9178, - "step": 6679 - }, - { - "epoch": 0.8032225094691277, - "grad_norm": 1.8019848026689023, - "learning_rate": 3.924690636050225e-07, - "loss": 0.9829, - "step": 6680 - }, - { - "epoch": 0.8033427523597667, - "grad_norm": 1.728864214386572, - "learning_rate": 3.9200573926611915e-07, - "loss": 0.9239, - "step": 6681 - }, - { - "epoch": 0.8034629952504058, - "grad_norm": 1.88712863932027, - "learning_rate": 3.9154265885866613e-07, - "loss": 0.9259, - "step": 6682 - }, - { - "epoch": 0.8035832381410449, - "grad_norm": 3.3066469700544556, - "learning_rate": 3.9107982245291394e-07, - "loss": 0.9508, - "step": 6683 - }, - { - "epoch": 0.803703481031684, - "grad_norm": 1.8912791862177136, - "learning_rate": 3.9061723011907245e-07, - "loss": 0.965, - "step": 6684 - }, - { - "epoch": 0.803823723922323, - "grad_norm": 1.7763634294215331, - "learning_rate": 3.901548819273179e-07, - "loss": 0.9841, - "step": 6685 - }, - { - "epoch": 0.8039439668129622, - "grad_norm": 1.9305403211009307, - "learning_rate": 3.896927779477881e-07, - "loss": 0.8902, - "step": 6686 - }, - { - "epoch": 0.8040642097036013, - "grad_norm": 2.117592185219532, - "learning_rate": 3.892309182505833e-07, - "loss": 0.8751, - "step": 6687 - }, - { - "epoch": 0.8041844525942403, - "grad_norm": 2.0096449746265503, - "learning_rate": 3.887693029057675e-07, - "loss": 1.0623, - "step": 6688 - }, - { - "epoch": 0.8043046954848795, - "grad_norm": 1.720667722859601, - "learning_rate": 3.8830793198336684e-07, - "loss": 1.0113, - "step": 6689 - }, - { - "epoch": 0.8044249383755185, - "grad_norm": 1.6560796974004137, - "learning_rate": 3.878468055533721e-07, - "loss": 0.9032, - "step": 6690 - }, - { - "epoch": 0.8045451812661576, - "grad_norm": 2.9321321180899353, - "learning_rate": 3.8738592368573464e-07, - "loss": 1.0569, - "step": 6691 - }, - { - "epoch": 0.8046654241567968, - "grad_norm": 1.7275623861218776, - "learning_rate": 3.8692528645037137e-07, - "loss": 1.0773, - "step": 6692 - }, - { - "epoch": 0.8047856670474358, - "grad_norm": 2.297117051845188, - "learning_rate": 3.8646489391715907e-07, - "loss": 0.974, - "step": 6693 - }, - { - "epoch": 0.8049059099380749, - "grad_norm": 2.1161320719614563, - "learning_rate": 3.8600474615593903e-07, - "loss": 1.0766, - "step": 6694 - }, - { - "epoch": 0.805026152828714, - "grad_norm": 0.8719230433527436, - "learning_rate": 3.8554484323651605e-07, - "loss": 0.8529, - "step": 6695 - }, - { - "epoch": 0.8051463957193531, - "grad_norm": 1.490035417507052, - "learning_rate": 3.85085185228657e-07, - "loss": 0.9823, - "step": 6696 - }, - { - "epoch": 0.8052666386099921, - "grad_norm": 2.008489353667369, - "learning_rate": 3.8462577220209114e-07, - "loss": 0.9367, - "step": 6697 - }, - { - "epoch": 0.8053868815006313, - "grad_norm": 0.9064616252185056, - "learning_rate": 3.8416660422651127e-07, - "loss": 0.8036, - "step": 6698 - }, - { - "epoch": 0.8055071243912704, - "grad_norm": 1.7411063906893631, - "learning_rate": 3.837076813715723e-07, - "loss": 0.8879, - "step": 6699 - }, - { - "epoch": 0.8056273672819094, - "grad_norm": 3.596323833112066, - "learning_rate": 3.832490037068941e-07, - "loss": 0.9493, - "step": 6700 - }, - { - "epoch": 0.8057476101725486, - "grad_norm": 2.0667149794479194, - "learning_rate": 3.827905713020554e-07, - "loss": 0.9552, - "step": 6701 - }, - { - "epoch": 0.8058678530631876, - "grad_norm": 1.8109557051315437, - "learning_rate": 3.823323842266017e-07, - "loss": 0.8775, - "step": 6702 - }, - { - "epoch": 0.8059880959538267, - "grad_norm": 2.3102401826515973, - "learning_rate": 3.818744425500393e-07, - "loss": 0.9413, - "step": 6703 - }, - { - "epoch": 0.8061083388444659, - "grad_norm": 1.6996954589028992, - "learning_rate": 3.8141674634183675e-07, - "loss": 1.01, - "step": 6704 - }, - { - "epoch": 0.8062285817351049, - "grad_norm": 1.7628253582352067, - "learning_rate": 3.809592956714278e-07, - "loss": 0.8514, - "step": 6705 - }, - { - "epoch": 0.806348824625744, - "grad_norm": 1.7587164694945605, - "learning_rate": 3.805020906082057e-07, - "loss": 0.9359, - "step": 6706 - }, - { - "epoch": 0.8064690675163831, - "grad_norm": 2.175934873461833, - "learning_rate": 3.8004513122152917e-07, - "loss": 1.0059, - "step": 6707 - }, - { - "epoch": 0.8065893104070222, - "grad_norm": 1.6294415735502055, - "learning_rate": 3.79588417580718e-07, - "loss": 0.8685, - "step": 6708 - }, - { - "epoch": 0.8067095532976613, - "grad_norm": 1.6978602097687234, - "learning_rate": 3.791319497550558e-07, - "loss": 0.964, - "step": 6709 - }, - { - "epoch": 0.8068297961883004, - "grad_norm": 1.7547508873704252, - "learning_rate": 3.78675727813788e-07, - "loss": 0.915, - "step": 6710 - }, - { - "epoch": 0.8069500390789395, - "grad_norm": 1.8443663262034347, - "learning_rate": 3.782197518261225e-07, - "loss": 0.9333, - "step": 6711 - }, - { - "epoch": 0.8070702819695785, - "grad_norm": 1.9907663100553066, - "learning_rate": 3.777640218612319e-07, - "loss": 1.1638, - "step": 6712 - }, - { - "epoch": 0.8071905248602176, - "grad_norm": 1.9374301651705332, - "learning_rate": 3.773085379882488e-07, - "loss": 0.9131, - "step": 6713 - }, - { - "epoch": 0.8073107677508568, - "grad_norm": 1.766437647328318, - "learning_rate": 3.768533002762715e-07, - "loss": 0.9642, - "step": 6714 - }, - { - "epoch": 0.8074310106414958, - "grad_norm": 1.7728495022176665, - "learning_rate": 3.763983087943572e-07, - "loss": 0.9611, - "step": 6715 - }, - { - "epoch": 0.8075512535321349, - "grad_norm": 1.8894991714705875, - "learning_rate": 3.759435636115282e-07, - "loss": 0.9994, - "step": 6716 - }, - { - "epoch": 0.807671496422774, - "grad_norm": 1.8416684572144977, - "learning_rate": 3.7548906479676967e-07, - "loss": 0.937, - "step": 6717 - }, - { - "epoch": 0.8077917393134131, - "grad_norm": 1.7567648923630004, - "learning_rate": 3.7503481241902855e-07, - "loss": 0.9102, - "step": 6718 - }, - { - "epoch": 0.8079119822040521, - "grad_norm": 1.6279931013162647, - "learning_rate": 3.745808065472145e-07, - "loss": 0.997, - "step": 6719 - }, - { - "epoch": 0.8080322250946913, - "grad_norm": 1.5980639695265182, - "learning_rate": 3.741270472501994e-07, - "loss": 0.9588, - "step": 6720 - }, - { - "epoch": 0.8081524679853304, - "grad_norm": 1.616976074551317, - "learning_rate": 3.736735345968183e-07, - "loss": 0.9217, - "step": 6721 - }, - { - "epoch": 0.8082727108759694, - "grad_norm": 1.5634157167755138, - "learning_rate": 3.7322026865586986e-07, - "loss": 0.9873, - "step": 6722 - }, - { - "epoch": 0.8083929537666086, - "grad_norm": 1.6595786851490133, - "learning_rate": 3.7276724949611206e-07, - "loss": 0.9245, - "step": 6723 - }, - { - "epoch": 0.8085131966572476, - "grad_norm": 1.7804032036239335, - "learning_rate": 3.723144771862694e-07, - "loss": 0.9444, - "step": 6724 - }, - { - "epoch": 0.8086334395478867, - "grad_norm": 1.57389991125025, - "learning_rate": 3.718619517950263e-07, - "loss": 0.9684, - "step": 6725 - }, - { - "epoch": 0.8087536824385259, - "grad_norm": 1.9688732774678646, - "learning_rate": 3.714096733910301e-07, - "loss": 0.9672, - "step": 6726 - }, - { - "epoch": 0.8088739253291649, - "grad_norm": 2.215923819681938, - "learning_rate": 3.709576420428926e-07, - "loss": 0.8992, - "step": 6727 - }, - { - "epoch": 0.808994168219804, - "grad_norm": 2.086150769250261, - "learning_rate": 3.7050585781918463e-07, - "loss": 0.9313, - "step": 6728 - }, - { - "epoch": 0.8091144111104431, - "grad_norm": 2.089897711226531, - "learning_rate": 3.700543207884428e-07, - "loss": 0.891, - "step": 6729 - }, - { - "epoch": 0.8092346540010822, - "grad_norm": 1.6656900246472117, - "learning_rate": 3.6960303101916466e-07, - "loss": 0.9062, - "step": 6730 - }, - { - "epoch": 0.8093548968917212, - "grad_norm": 0.8201636043021907, - "learning_rate": 3.6915198857981047e-07, - "loss": 0.7811, - "step": 6731 - }, - { - "epoch": 0.8094751397823604, - "grad_norm": 1.7522905890147358, - "learning_rate": 3.687011935388027e-07, - "loss": 0.8842, - "step": 6732 - }, - { - "epoch": 0.8095953826729995, - "grad_norm": 1.9697490760829137, - "learning_rate": 3.6825064596452646e-07, - "loss": 0.9225, - "step": 6733 - }, - { - "epoch": 0.8097156255636385, - "grad_norm": 1.720186985298121, - "learning_rate": 3.678003459253305e-07, - "loss": 0.9077, - "step": 6734 - }, - { - "epoch": 0.8098358684542777, - "grad_norm": 1.8946205354081613, - "learning_rate": 3.673502934895236e-07, - "loss": 0.9339, - "step": 6735 - }, - { - "epoch": 0.8099561113449167, - "grad_norm": 0.7157332509219984, - "learning_rate": 3.669004887253802e-07, - "loss": 0.7869, - "step": 6736 - }, - { - "epoch": 0.8100763542355558, - "grad_norm": 1.3865596196383498, - "learning_rate": 3.664509317011335e-07, - "loss": 0.9832, - "step": 6737 - }, - { - "epoch": 0.810196597126195, - "grad_norm": 1.8526809469132288, - "learning_rate": 3.6600162248498134e-07, - "loss": 0.9324, - "step": 6738 - }, - { - "epoch": 0.810316840016834, - "grad_norm": 1.8379272716731974, - "learning_rate": 3.6555256114508426e-07, - "loss": 0.9614, - "step": 6739 - }, - { - "epoch": 0.8104370829074731, - "grad_norm": 1.8291861559873055, - "learning_rate": 3.651037477495642e-07, - "loss": 0.9269, - "step": 6740 - }, - { - "epoch": 0.8105573257981122, - "grad_norm": 2.0844545271999473, - "learning_rate": 3.6465518236650584e-07, - "loss": 0.8756, - "step": 6741 - }, - { - "epoch": 0.8106775686887513, - "grad_norm": 1.6290723683936277, - "learning_rate": 3.642068650639558e-07, - "loss": 0.9793, - "step": 6742 - }, - { - "epoch": 0.8107978115793903, - "grad_norm": 1.6287873954315222, - "learning_rate": 3.6375879590992334e-07, - "loss": 0.8421, - "step": 6743 - }, - { - "epoch": 0.8109180544700295, - "grad_norm": 1.7976360533772155, - "learning_rate": 3.6331097497238173e-07, - "loss": 1.0012, - "step": 6744 - }, - { - "epoch": 0.8110382973606686, - "grad_norm": 1.747539425297395, - "learning_rate": 3.628634023192627e-07, - "loss": 0.9951, - "step": 6745 - }, - { - "epoch": 0.8111585402513076, - "grad_norm": 2.2152869909633703, - "learning_rate": 3.624160780184644e-07, - "loss": 0.9523, - "step": 6746 - }, - { - "epoch": 0.8112787831419467, - "grad_norm": 1.592905456572706, - "learning_rate": 3.6196900213784496e-07, - "loss": 0.9482, - "step": 6747 - }, - { - "epoch": 0.8113990260325858, - "grad_norm": 1.8928548100809288, - "learning_rate": 3.6152217474522527e-07, - "loss": 1.0639, - "step": 6748 - }, - { - "epoch": 0.8115192689232249, - "grad_norm": 1.6102464409101693, - "learning_rate": 3.6107559590838975e-07, - "loss": 0.9253, - "step": 6749 - }, - { - "epoch": 0.811639511813864, - "grad_norm": 2.3360848779020627, - "learning_rate": 3.606292656950822e-07, - "loss": 0.8753, - "step": 6750 - }, - { - "epoch": 0.8117597547045031, - "grad_norm": 1.786836063351731, - "learning_rate": 3.601831841730121e-07, - "loss": 1.0591, - "step": 6751 - }, - { - "epoch": 0.8118799975951422, - "grad_norm": 1.7471473467898044, - "learning_rate": 3.5973735140984916e-07, - "loss": 0.9374, - "step": 6752 - }, - { - "epoch": 0.8120002404857812, - "grad_norm": 2.3975964789524133, - "learning_rate": 3.5929176747322607e-07, - "loss": 0.9905, - "step": 6753 - }, - { - "epoch": 0.8121204833764204, - "grad_norm": 0.8417516638702816, - "learning_rate": 3.588464324307372e-07, - "loss": 0.7696, - "step": 6754 - }, - { - "epoch": 0.8122407262670595, - "grad_norm": 1.7268592268796055, - "learning_rate": 3.584013463499391e-07, - "loss": 0.9545, - "step": 6755 - }, - { - "epoch": 0.8123609691576985, - "grad_norm": 0.7318618464407718, - "learning_rate": 3.579565092983521e-07, - "loss": 0.848, - "step": 6756 - }, - { - "epoch": 0.8124812120483377, - "grad_norm": 1.8810187061359203, - "learning_rate": 3.575119213434565e-07, - "loss": 1.0364, - "step": 6757 - }, - { - "epoch": 0.8126014549389767, - "grad_norm": 1.6316449677398366, - "learning_rate": 3.5706758255269765e-07, - "loss": 1.0163, - "step": 6758 - }, - { - "epoch": 0.8127216978296158, - "grad_norm": 1.5911541161787237, - "learning_rate": 3.566234929934795e-07, - "loss": 0.8964, - "step": 6759 - }, - { - "epoch": 0.812841940720255, - "grad_norm": 1.4680052147234772, - "learning_rate": 3.561796527331706e-07, - "loss": 0.917, - "step": 6760 - }, - { - "epoch": 0.812962183610894, - "grad_norm": 1.762529951882682, - "learning_rate": 3.5573606183910163e-07, - "loss": 0.9675, - "step": 6761 - }, - { - "epoch": 0.8130824265015331, - "grad_norm": 1.4964458957505584, - "learning_rate": 3.5529272037856493e-07, - "loss": 0.9791, - "step": 6762 - }, - { - "epoch": 0.8132026693921722, - "grad_norm": 0.8646313565869714, - "learning_rate": 3.548496284188149e-07, - "loss": 0.7702, - "step": 6763 - }, - { - "epoch": 0.8133229122828113, - "grad_norm": 1.7646218209721383, - "learning_rate": 3.544067860270681e-07, - "loss": 0.9853, - "step": 6764 - }, - { - "epoch": 0.8134431551734503, - "grad_norm": 1.8874982720171365, - "learning_rate": 3.539641932705029e-07, - "loss": 0.9129, - "step": 6765 - }, - { - "epoch": 0.8135633980640895, - "grad_norm": 1.9787702330701318, - "learning_rate": 3.53521850216262e-07, - "loss": 0.9514, - "step": 6766 - }, - { - "epoch": 0.8136836409547286, - "grad_norm": 2.0073349677976307, - "learning_rate": 3.530797569314461e-07, - "loss": 0.9639, - "step": 6767 - }, - { - "epoch": 0.8138038838453676, - "grad_norm": 1.6297817301732744, - "learning_rate": 3.5263791348312235e-07, - "loss": 0.9662, - "step": 6768 - }, - { - "epoch": 0.8139241267360068, - "grad_norm": 1.7440603137348623, - "learning_rate": 3.521963199383171e-07, - "loss": 0.9094, - "step": 6769 - }, - { - "epoch": 0.8140443696266458, - "grad_norm": 2.057005064754528, - "learning_rate": 3.517549763640197e-07, - "loss": 0.9692, - "step": 6770 - }, - { - "epoch": 0.8141646125172849, - "grad_norm": 1.774918127186528, - "learning_rate": 3.513138828271829e-07, - "loss": 0.91, - "step": 6771 - }, - { - "epoch": 0.8142848554079241, - "grad_norm": 1.6516760520912193, - "learning_rate": 3.508730393947179e-07, - "loss": 0.8993, - "step": 6772 - }, - { - "epoch": 0.8144050982985631, - "grad_norm": 1.568591641036036, - "learning_rate": 3.504324461335024e-07, - "loss": 0.9141, - "step": 6773 - }, - { - "epoch": 0.8145253411892022, - "grad_norm": 1.8473447896055406, - "learning_rate": 3.499921031103732e-07, - "loss": 1.0714, - "step": 6774 - }, - { - "epoch": 0.8146455840798413, - "grad_norm": 1.5668272751127457, - "learning_rate": 3.4955201039212987e-07, - "loss": 0.9775, - "step": 6775 - }, - { - "epoch": 0.8147658269704804, - "grad_norm": 2.822688995602164, - "learning_rate": 3.4911216804553465e-07, - "loss": 0.8432, - "step": 6776 - }, - { - "epoch": 0.8148860698611194, - "grad_norm": 2.1750123122294296, - "learning_rate": 3.4867257613731017e-07, - "loss": 0.9046, - "step": 6777 - }, - { - "epoch": 0.8150063127517585, - "grad_norm": 1.4947905678916729, - "learning_rate": 3.4823323473414343e-07, - "loss": 1.052, - "step": 6778 - }, - { - "epoch": 0.8151265556423977, - "grad_norm": 1.688572789853944, - "learning_rate": 3.477941439026812e-07, - "loss": 0.9602, - "step": 6779 - }, - { - "epoch": 0.8152467985330367, - "grad_norm": 1.7006892714294761, - "learning_rate": 3.473553037095349e-07, - "loss": 0.9276, - "step": 6780 - }, - { - "epoch": 0.8153670414236758, - "grad_norm": 1.5959934077975242, - "learning_rate": 3.469167142212743e-07, - "loss": 1.0251, - "step": 6781 - }, - { - "epoch": 0.8154872843143149, - "grad_norm": 2.7577989269927587, - "learning_rate": 3.4647837550443337e-07, - "loss": 0.8282, - "step": 6782 - }, - { - "epoch": 0.815607527204954, - "grad_norm": 1.6319099377530408, - "learning_rate": 3.460402876255086e-07, - "loss": 0.9381, - "step": 6783 - }, - { - "epoch": 0.815727770095593, - "grad_norm": 1.8773553042268574, - "learning_rate": 3.456024506509574e-07, - "loss": 0.9144, - "step": 6784 - }, - { - "epoch": 0.8158480129862322, - "grad_norm": 1.5031270968924189, - "learning_rate": 3.4516486464719873e-07, - "loss": 0.9391, - "step": 6785 - }, - { - "epoch": 0.8159682558768713, - "grad_norm": 1.7481998994648094, - "learning_rate": 3.4472752968061445e-07, - "loss": 0.8279, - "step": 6786 - }, - { - "epoch": 0.8160884987675103, - "grad_norm": 1.829160608905792, - "learning_rate": 3.442904458175475e-07, - "loss": 0.9351, - "step": 6787 - }, - { - "epoch": 0.8162087416581495, - "grad_norm": 1.4971792401663397, - "learning_rate": 3.438536131243044e-07, - "loss": 0.9548, - "step": 6788 - }, - { - "epoch": 0.8163289845487885, - "grad_norm": 2.0358418135506073, - "learning_rate": 3.434170316671503e-07, - "loss": 0.8177, - "step": 6789 - }, - { - "epoch": 0.8164492274394276, - "grad_norm": 2.585188742854502, - "learning_rate": 3.4298070151231583e-07, - "loss": 1.0992, - "step": 6790 - }, - { - "epoch": 0.8165694703300668, - "grad_norm": 1.9856563246011911, - "learning_rate": 3.425446227259916e-07, - "loss": 0.7994, - "step": 6791 - }, - { - "epoch": 0.8166897132207058, - "grad_norm": 1.8310251299697693, - "learning_rate": 3.421087953743296e-07, - "loss": 1.0246, - "step": 6792 - }, - { - "epoch": 0.8168099561113449, - "grad_norm": 2.0543175425907014, - "learning_rate": 3.416732195234464e-07, - "loss": 0.9989, - "step": 6793 - }, - { - "epoch": 0.816930199001984, - "grad_norm": 1.4137066556726512, - "learning_rate": 3.4123789523941613e-07, - "loss": 0.9937, - "step": 6794 - }, - { - "epoch": 0.8170504418926231, - "grad_norm": 1.4328554003500764, - "learning_rate": 3.4080282258827884e-07, - "loss": 0.8346, - "step": 6795 - }, - { - "epoch": 0.8171706847832622, - "grad_norm": 1.9613494440351065, - "learning_rate": 3.403680016360342e-07, - "loss": 0.9186, - "step": 6796 - }, - { - "epoch": 0.8172909276739013, - "grad_norm": 1.517857117212119, - "learning_rate": 3.3993343244864403e-07, - "loss": 0.8733, - "step": 6797 - }, - { - "epoch": 0.8174111705645404, - "grad_norm": 1.7520835598262439, - "learning_rate": 3.394991150920323e-07, - "loss": 0.9264, - "step": 6798 - }, - { - "epoch": 0.8175314134551794, - "grad_norm": 1.8075432579383093, - "learning_rate": 3.3906504963208396e-07, - "loss": 0.9497, - "step": 6799 - }, - { - "epoch": 0.8176516563458186, - "grad_norm": 1.6512307460912778, - "learning_rate": 3.3863123613464774e-07, - "loss": 0.8466, - "step": 6800 - }, - { - "epoch": 0.8177718992364577, - "grad_norm": 1.6152202785481093, - "learning_rate": 3.381976746655317e-07, - "loss": 0.9376, - "step": 6801 - }, - { - "epoch": 0.8178921421270967, - "grad_norm": 2.352656417332879, - "learning_rate": 3.3776436529050756e-07, - "loss": 0.8771, - "step": 6802 - }, - { - "epoch": 0.8180123850177359, - "grad_norm": 1.8178264357301699, - "learning_rate": 3.373313080753073e-07, - "loss": 0.9179, - "step": 6803 - }, - { - "epoch": 0.8181326279083749, - "grad_norm": 1.511924788753976, - "learning_rate": 3.3689850308562527e-07, - "loss": 0.9737, - "step": 6804 - }, - { - "epoch": 0.818252870799014, - "grad_norm": 1.6830620512468244, - "learning_rate": 3.364659503871183e-07, - "loss": 0.9741, - "step": 6805 - }, - { - "epoch": 0.8183731136896532, - "grad_norm": 1.8207839551758416, - "learning_rate": 3.3603365004540417e-07, - "loss": 1.0348, - "step": 6806 - }, - { - "epoch": 0.8184933565802922, - "grad_norm": 1.9194088130992257, - "learning_rate": 3.356016021260624e-07, - "loss": 0.9642, - "step": 6807 - }, - { - "epoch": 0.8186135994709313, - "grad_norm": 2.0610505993446844, - "learning_rate": 3.35169806694634e-07, - "loss": 0.8383, - "step": 6808 - }, - { - "epoch": 0.8187338423615703, - "grad_norm": 0.7916552559548922, - "learning_rate": 3.3473826381662186e-07, - "loss": 0.8176, - "step": 6809 - }, - { - "epoch": 0.8188540852522095, - "grad_norm": 1.6881828339438694, - "learning_rate": 3.3430697355749216e-07, - "loss": 1.0176, - "step": 6810 - }, - { - "epoch": 0.8189743281428485, - "grad_norm": 1.8067692891262124, - "learning_rate": 3.3387593598266907e-07, - "loss": 0.9404, - "step": 6811 - }, - { - "epoch": 0.8190945710334876, - "grad_norm": 1.5674273047861533, - "learning_rate": 3.3344515115754225e-07, - "loss": 0.9834, - "step": 6812 - }, - { - "epoch": 0.8192148139241268, - "grad_norm": 1.9129607474631671, - "learning_rate": 3.33014619147461e-07, - "loss": 0.9919, - "step": 6813 - }, - { - "epoch": 0.8193350568147658, - "grad_norm": 2.405767218968875, - "learning_rate": 3.325843400177362e-07, - "loss": 0.9178, - "step": 6814 - }, - { - "epoch": 0.8194552997054049, - "grad_norm": 1.866247602947967, - "learning_rate": 3.32154313833642e-07, - "loss": 0.9337, - "step": 6815 - }, - { - "epoch": 0.819575542596044, - "grad_norm": 2.061375166757619, - "learning_rate": 3.3172454066041164e-07, - "loss": 0.7947, - "step": 6816 - }, - { - "epoch": 0.8196957854866831, - "grad_norm": 1.6471158193595992, - "learning_rate": 3.3129502056324234e-07, - "loss": 0.9596, - "step": 6817 - }, - { - "epoch": 0.8198160283773221, - "grad_norm": 0.8420190717050421, - "learning_rate": 3.3086575360729165e-07, - "loss": 0.8203, - "step": 6818 - }, - { - "epoch": 0.8199362712679613, - "grad_norm": 1.82831618132612, - "learning_rate": 3.3043673985767906e-07, - "loss": 0.9135, - "step": 6819 - }, - { - "epoch": 0.8200565141586004, - "grad_norm": 1.6203706530414315, - "learning_rate": 3.3000797937948564e-07, - "loss": 0.9682, - "step": 6820 - }, - { - "epoch": 0.8201767570492394, - "grad_norm": 0.9500020253009289, - "learning_rate": 3.295794722377534e-07, - "loss": 0.8728, - "step": 6821 - }, - { - "epoch": 0.8202969999398786, - "grad_norm": 2.645126839176301, - "learning_rate": 3.291512184974876e-07, - "loss": 0.9913, - "step": 6822 - }, - { - "epoch": 0.8204172428305176, - "grad_norm": 1.6556741685143748, - "learning_rate": 3.2872321822365346e-07, - "loss": 0.8724, - "step": 6823 - }, - { - "epoch": 0.8205374857211567, - "grad_norm": 1.8647632263467346, - "learning_rate": 3.282954714811783e-07, - "loss": 0.9338, - "step": 6824 - }, - { - "epoch": 0.8206577286117959, - "grad_norm": 1.9257619365474234, - "learning_rate": 3.2786797833495093e-07, - "loss": 0.9023, - "step": 6825 - }, - { - "epoch": 0.8207779715024349, - "grad_norm": 1.7067750088133582, - "learning_rate": 3.274407388498213e-07, - "loss": 0.9251, - "step": 6826 - }, - { - "epoch": 0.820898214393074, - "grad_norm": 1.65044949217777, - "learning_rate": 3.270137530906021e-07, - "loss": 0.9426, - "step": 6827 - }, - { - "epoch": 0.8210184572837131, - "grad_norm": 2.0233653876809865, - "learning_rate": 3.265870211220665e-07, - "loss": 1.0282, - "step": 6828 - }, - { - "epoch": 0.8211387001743522, - "grad_norm": 2.158206549594285, - "learning_rate": 3.2616054300894934e-07, - "loss": 1.0179, - "step": 6829 - }, - { - "epoch": 0.8212589430649913, - "grad_norm": 1.8575477276673495, - "learning_rate": 3.2573431881594693e-07, - "loss": 1.0448, - "step": 6830 - }, - { - "epoch": 0.8213791859556304, - "grad_norm": 2.538588071661688, - "learning_rate": 3.2530834860771663e-07, - "loss": 0.8465, - "step": 6831 - }, - { - "epoch": 0.8214994288462695, - "grad_norm": 1.7501215211364944, - "learning_rate": 3.248826324488794e-07, - "loss": 0.9374, - "step": 6832 - }, - { - "epoch": 0.8216196717369085, - "grad_norm": 1.6299025601241528, - "learning_rate": 3.244571704040138e-07, - "loss": 1.072, - "step": 6833 - }, - { - "epoch": 0.8217399146275477, - "grad_norm": 2.44525380414529, - "learning_rate": 3.2403196253766374e-07, - "loss": 0.9358, - "step": 6834 - }, - { - "epoch": 0.8218601575181868, - "grad_norm": 2.3352328357153542, - "learning_rate": 3.2360700891433254e-07, - "loss": 0.9897, - "step": 6835 - }, - { - "epoch": 0.8219804004088258, - "grad_norm": 0.8332412589387538, - "learning_rate": 3.231823095984847e-07, - "loss": 0.7894, - "step": 6836 - }, - { - "epoch": 0.822100643299465, - "grad_norm": 1.9133428972561266, - "learning_rate": 3.2275786465454814e-07, - "loss": 0.9496, - "step": 6837 - }, - { - "epoch": 0.822220886190104, - "grad_norm": 1.8483602138430155, - "learning_rate": 3.2233367414690917e-07, - "loss": 0.9619, - "step": 6838 - }, - { - "epoch": 0.8223411290807431, - "grad_norm": 1.906633432316514, - "learning_rate": 3.219097381399183e-07, - "loss": 1.0363, - "step": 6839 - }, - { - "epoch": 0.8224613719713821, - "grad_norm": 1.6348228672447163, - "learning_rate": 3.2148605669788584e-07, - "loss": 1.0018, - "step": 6840 - }, - { - "epoch": 0.8225816148620213, - "grad_norm": 2.3303603123735694, - "learning_rate": 3.2106262988508405e-07, - "loss": 0.9757, - "step": 6841 - }, - { - "epoch": 0.8227018577526604, - "grad_norm": 1.8048781917609056, - "learning_rate": 3.206394577657465e-07, - "loss": 0.9452, - "step": 6842 - }, - { - "epoch": 0.8228221006432994, - "grad_norm": 2.7291752667085296, - "learning_rate": 3.202165404040675e-07, - "loss": 0.9152, - "step": 6843 - }, - { - "epoch": 0.8229423435339386, - "grad_norm": 2.9288542109211524, - "learning_rate": 3.1979387786420396e-07, - "loss": 0.9483, - "step": 6844 - }, - { - "epoch": 0.8230625864245776, - "grad_norm": 3.0911455076753636, - "learning_rate": 3.1937147021027346e-07, - "loss": 1.0234, - "step": 6845 - }, - { - "epoch": 0.8231828293152167, - "grad_norm": 3.413463688412668, - "learning_rate": 3.189493175063547e-07, - "loss": 0.9662, - "step": 6846 - }, - { - "epoch": 0.8233030722058559, - "grad_norm": 1.775941870189138, - "learning_rate": 3.1852741981648776e-07, - "loss": 0.8715, - "step": 6847 - }, - { - "epoch": 0.8234233150964949, - "grad_norm": 1.8204938282514946, - "learning_rate": 3.1810577720467404e-07, - "loss": 0.8962, - "step": 6848 - }, - { - "epoch": 0.823543557987134, - "grad_norm": 1.5467802076483606, - "learning_rate": 3.176843897348769e-07, - "loss": 0.7649, - "step": 6849 - }, - { - "epoch": 0.8236638008777731, - "grad_norm": 2.39650924657275, - "learning_rate": 3.1726325747102034e-07, - "loss": 0.9591, - "step": 6850 - }, - { - "epoch": 0.8237840437684122, - "grad_norm": 1.596343828160556, - "learning_rate": 3.1684238047698974e-07, - "loss": 0.84, - "step": 6851 - }, - { - "epoch": 0.8239042866590512, - "grad_norm": 1.9623536851122514, - "learning_rate": 3.1642175881663155e-07, - "loss": 0.7282, - "step": 6852 - }, - { - "epoch": 0.8240245295496904, - "grad_norm": 1.6654836827317399, - "learning_rate": 3.160013925537537e-07, - "loss": 1.0411, - "step": 6853 - }, - { - "epoch": 0.8241447724403295, - "grad_norm": 1.9325065253420237, - "learning_rate": 3.155812817521266e-07, - "loss": 0.9513, - "step": 6854 - }, - { - "epoch": 0.8242650153309685, - "grad_norm": 1.9708782379110243, - "learning_rate": 3.151614264754787e-07, - "loss": 0.9736, - "step": 6855 - }, - { - "epoch": 0.8243852582216077, - "grad_norm": 1.9722542230341096, - "learning_rate": 3.147418267875035e-07, - "loss": 0.9953, - "step": 6856 - }, - { - "epoch": 0.8245055011122467, - "grad_norm": 2.000550551931578, - "learning_rate": 3.1432248275185315e-07, - "loss": 0.8581, - "step": 6857 - }, - { - "epoch": 0.8246257440028858, - "grad_norm": 2.0429902230587573, - "learning_rate": 3.139033944321412e-07, - "loss": 0.9752, - "step": 6858 - }, - { - "epoch": 0.824745986893525, - "grad_norm": 1.477091053815643, - "learning_rate": 3.1348456189194507e-07, - "loss": 0.9895, - "step": 6859 - }, - { - "epoch": 0.824866229784164, - "grad_norm": 1.581973152506728, - "learning_rate": 3.1306598519479876e-07, - "loss": 1.0246, - "step": 6860 - }, - { - "epoch": 0.8249864726748031, - "grad_norm": 1.5701823773726638, - "learning_rate": 3.1264766440420177e-07, - "loss": 0.9811, - "step": 6861 - }, - { - "epoch": 0.8251067155654422, - "grad_norm": 2.022422624388128, - "learning_rate": 3.122295995836124e-07, - "loss": 0.8797, - "step": 6862 - }, - { - "epoch": 0.8252269584560813, - "grad_norm": 2.0182238244426944, - "learning_rate": 3.118117907964508e-07, - "loss": 0.9797, - "step": 6863 - }, - { - "epoch": 0.8253472013467203, - "grad_norm": 2.091867882015814, - "learning_rate": 3.1139423810609856e-07, - "loss": 1.005, - "step": 6864 - }, - { - "epoch": 0.8254674442373595, - "grad_norm": 1.7955049018251181, - "learning_rate": 3.1097694157589714e-07, - "loss": 0.9549, - "step": 6865 - }, - { - "epoch": 0.8255876871279986, - "grad_norm": 3.000853099325335, - "learning_rate": 3.105599012691511e-07, - "loss": 0.9691, - "step": 6866 - }, - { - "epoch": 0.8257079300186376, - "grad_norm": 1.4288376747390812, - "learning_rate": 3.101431172491249e-07, - "loss": 1.0206, - "step": 6867 - }, - { - "epoch": 0.8258281729092768, - "grad_norm": 2.886551414667517, - "learning_rate": 3.097265895790444e-07, - "loss": 0.9151, - "step": 6868 - }, - { - "epoch": 0.8259484157999158, - "grad_norm": 2.6800934509097125, - "learning_rate": 3.093103183220962e-07, - "loss": 1.0304, - "step": 6869 - }, - { - "epoch": 0.8260686586905549, - "grad_norm": 0.9565765546322633, - "learning_rate": 3.0889430354142796e-07, - "loss": 0.8445, - "step": 6870 - }, - { - "epoch": 0.826188901581194, - "grad_norm": 1.9398342960417658, - "learning_rate": 3.084785453001497e-07, - "loss": 0.8929, - "step": 6871 - }, - { - "epoch": 0.8263091444718331, - "grad_norm": 2.1945234625173327, - "learning_rate": 3.080630436613314e-07, - "loss": 1.0149, - "step": 6872 - }, - { - "epoch": 0.8264293873624722, - "grad_norm": 1.840868233378243, - "learning_rate": 3.076477986880039e-07, - "loss": 1.0509, - "step": 6873 - }, - { - "epoch": 0.8265496302531112, - "grad_norm": 2.071538483000969, - "learning_rate": 3.0723281044315986e-07, - "loss": 0.8836, - "step": 6874 - }, - { - "epoch": 0.8266698731437504, - "grad_norm": 1.8389460809591256, - "learning_rate": 3.068180789897521e-07, - "loss": 0.9612, - "step": 6875 - }, - { - "epoch": 0.8267901160343895, - "grad_norm": 1.4166531936154434, - "learning_rate": 3.064036043906966e-07, - "loss": 1.0162, - "step": 6876 - }, - { - "epoch": 0.8269103589250285, - "grad_norm": 1.9080156997578561, - "learning_rate": 3.059893867088668e-07, - "loss": 0.8759, - "step": 6877 - }, - { - "epoch": 0.8270306018156677, - "grad_norm": 1.802515150898022, - "learning_rate": 3.055754260071004e-07, - "loss": 0.8656, - "step": 6878 - }, - { - "epoch": 0.8271508447063067, - "grad_norm": 1.8484100030190445, - "learning_rate": 3.051617223481948e-07, - "loss": 0.938, - "step": 6879 - }, - { - "epoch": 0.8272710875969458, - "grad_norm": 2.0205753069925474, - "learning_rate": 3.047482757949078e-07, - "loss": 0.9476, - "step": 6880 - }, - { - "epoch": 0.827391330487585, - "grad_norm": 1.6667321510165998, - "learning_rate": 3.043350864099605e-07, - "loss": 1.0432, - "step": 6881 - }, - { - "epoch": 0.827511573378224, - "grad_norm": 2.4154857402894656, - "learning_rate": 3.039221542560315e-07, - "loss": 1.0126, - "step": 6882 - }, - { - "epoch": 0.8276318162688631, - "grad_norm": 1.7768408430629172, - "learning_rate": 3.0350947939576356e-07, - "loss": 0.9493, - "step": 6883 - }, - { - "epoch": 0.8277520591595022, - "grad_norm": 1.4791758989216128, - "learning_rate": 3.0309706189175876e-07, - "loss": 0.923, - "step": 6884 - }, - { - "epoch": 0.8278723020501413, - "grad_norm": 0.8329134280542713, - "learning_rate": 3.0268490180658045e-07, - "loss": 0.7901, - "step": 6885 - }, - { - "epoch": 0.8279925449407803, - "grad_norm": 1.930790944770836, - "learning_rate": 3.0227299920275305e-07, - "loss": 0.9892, - "step": 6886 - }, - { - "epoch": 0.8281127878314195, - "grad_norm": 2.264470088856374, - "learning_rate": 3.018613541427613e-07, - "loss": 1.0559, - "step": 6887 - }, - { - "epoch": 0.8282330307220586, - "grad_norm": 1.5926215720969528, - "learning_rate": 3.0144996668905243e-07, - "loss": 0.9348, - "step": 6888 - }, - { - "epoch": 0.8283532736126976, - "grad_norm": 15.050469496998362, - "learning_rate": 3.010388369040331e-07, - "loss": 1.0149, - "step": 6889 - }, - { - "epoch": 0.8284735165033368, - "grad_norm": 1.3867727349747103, - "learning_rate": 3.0062796485007156e-07, - "loss": 1.0247, - "step": 6890 - }, - { - "epoch": 0.8285937593939758, - "grad_norm": 2.2399276350457615, - "learning_rate": 3.002173505894965e-07, - "loss": 0.8423, - "step": 6891 - }, - { - "epoch": 0.8287140022846149, - "grad_norm": 2.3397733394364812, - "learning_rate": 2.998069941845973e-07, - "loss": 0.8238, - "step": 6892 - }, - { - "epoch": 0.8288342451752541, - "grad_norm": 0.7923880644848246, - "learning_rate": 2.993968956976258e-07, - "loss": 0.8133, - "step": 6893 - }, - { - "epoch": 0.8289544880658931, - "grad_norm": 2.636351609277347, - "learning_rate": 2.9898705519079313e-07, - "loss": 0.8932, - "step": 6894 - }, - { - "epoch": 0.8290747309565322, - "grad_norm": 2.057856100050639, - "learning_rate": 2.985774727262715e-07, - "loss": 0.9389, - "step": 6895 - }, - { - "epoch": 0.8291949738471713, - "grad_norm": 1.6297585432563366, - "learning_rate": 2.981681483661949e-07, - "loss": 1.0066, - "step": 6896 - }, - { - "epoch": 0.8293152167378104, - "grad_norm": 1.557935226641389, - "learning_rate": 2.9775908217265633e-07, - "loss": 0.9052, - "step": 6897 - }, - { - "epoch": 0.8294354596284494, - "grad_norm": 0.817269557822716, - "learning_rate": 2.9735027420771253e-07, - "loss": 0.7186, - "step": 6898 - }, - { - "epoch": 0.8295557025190886, - "grad_norm": 1.7770753091837437, - "learning_rate": 2.969417245333774e-07, - "loss": 0.9166, - "step": 6899 - }, - { - "epoch": 0.8296759454097277, - "grad_norm": 1.925044725567323, - "learning_rate": 2.9653343321162915e-07, - "loss": 0.9786, - "step": 6900 - }, - { - "epoch": 0.8297961883003667, - "grad_norm": 1.909100824078496, - "learning_rate": 2.9612540030440446e-07, - "loss": 0.845, - "step": 6901 - }, - { - "epoch": 0.8299164311910058, - "grad_norm": 0.866212967415445, - "learning_rate": 2.9571762587360206e-07, - "loss": 0.8405, - "step": 6902 - }, - { - "epoch": 0.8300366740816449, - "grad_norm": 1.6259944513843265, - "learning_rate": 2.953101099810806e-07, - "loss": 0.9408, - "step": 6903 - }, - { - "epoch": 0.830156916972284, - "grad_norm": 1.886390167751686, - "learning_rate": 2.9490285268865965e-07, - "loss": 1.0354, - "step": 6904 - }, - { - "epoch": 0.830277159862923, - "grad_norm": 1.9997323572367964, - "learning_rate": 2.9449585405812085e-07, - "loss": 1.0038, - "step": 6905 - }, - { - "epoch": 0.8303974027535622, - "grad_norm": 1.708375100656424, - "learning_rate": 2.940891141512043e-07, - "loss": 0.9374, - "step": 6906 - }, - { - "epoch": 0.8305176456442013, - "grad_norm": 2.1501225192624154, - "learning_rate": 2.9368263302961385e-07, - "loss": 0.9154, - "step": 6907 - }, - { - "epoch": 0.8306378885348403, - "grad_norm": 2.5815069038436222, - "learning_rate": 2.9327641075501075e-07, - "loss": 0.9952, - "step": 6908 - }, - { - "epoch": 0.8307581314254795, - "grad_norm": 2.6712372671278155, - "learning_rate": 2.9287044738901866e-07, - "loss": 0.8638, - "step": 6909 - }, - { - "epoch": 0.8308783743161186, - "grad_norm": 1.9547872565144273, - "learning_rate": 2.9246474299322274e-07, - "loss": 1.1074, - "step": 6910 - }, - { - "epoch": 0.8309986172067576, - "grad_norm": 0.9453963880912585, - "learning_rate": 2.920592976291678e-07, - "loss": 0.8593, - "step": 6911 - }, - { - "epoch": 0.8311188600973968, - "grad_norm": 1.9838712758790105, - "learning_rate": 2.916541113583595e-07, - "loss": 1.0054, - "step": 6912 - }, - { - "epoch": 0.8312391029880358, - "grad_norm": 2.0037095715114384, - "learning_rate": 2.912491842422642e-07, - "loss": 0.8626, - "step": 6913 - }, - { - "epoch": 0.8313593458786749, - "grad_norm": 1.5847414514636324, - "learning_rate": 2.9084451634230857e-07, - "loss": 0.901, - "step": 6914 - }, - { - "epoch": 0.831479588769314, - "grad_norm": 2.4559952776114824, - "learning_rate": 2.9044010771988125e-07, - "loss": 0.9192, - "step": 6915 - }, - { - "epoch": 0.8315998316599531, - "grad_norm": 1.658160780870382, - "learning_rate": 2.900359584363303e-07, - "loss": 0.9286, - "step": 6916 - }, - { - "epoch": 0.8317200745505922, - "grad_norm": 2.03128849873372, - "learning_rate": 2.8963206855296494e-07, - "loss": 1.0423, - "step": 6917 - }, - { - "epoch": 0.8318403174412313, - "grad_norm": 1.5156369552927322, - "learning_rate": 2.892284381310548e-07, - "loss": 0.9754, - "step": 6918 - }, - { - "epoch": 0.8319605603318704, - "grad_norm": 2.605533627641702, - "learning_rate": 2.888250672318302e-07, - "loss": 0.9151, - "step": 6919 - }, - { - "epoch": 0.8320808032225094, - "grad_norm": 1.711415402629907, - "learning_rate": 2.884219559164831e-07, - "loss": 0.8894, - "step": 6920 - }, - { - "epoch": 0.8322010461131486, - "grad_norm": 2.710533793618906, - "learning_rate": 2.880191042461635e-07, - "loss": 1.0056, - "step": 6921 - }, - { - "epoch": 0.8323212890037877, - "grad_norm": 1.622576887663008, - "learning_rate": 2.876165122819849e-07, - "loss": 1.0, - "step": 6922 - }, - { - "epoch": 0.8324415318944267, - "grad_norm": 1.541257116544185, - "learning_rate": 2.872141800850201e-07, - "loss": 0.989, - "step": 6923 - }, - { - "epoch": 0.8325617747850659, - "grad_norm": 1.617061444220963, - "learning_rate": 2.868121077163024e-07, - "loss": 0.9254, - "step": 6924 - }, - { - "epoch": 0.8326820176757049, - "grad_norm": 2.0371595835510186, - "learning_rate": 2.864102952368257e-07, - "loss": 0.9203, - "step": 6925 - }, - { - "epoch": 0.832802260566344, - "grad_norm": 1.3569822628686465, - "learning_rate": 2.860087427075444e-07, - "loss": 0.791, - "step": 6926 - }, - { - "epoch": 0.8329225034569832, - "grad_norm": 2.2927350992607236, - "learning_rate": 2.856074501893744e-07, - "loss": 1.0629, - "step": 6927 - }, - { - "epoch": 0.8330427463476222, - "grad_norm": 1.4999252936515353, - "learning_rate": 2.8520641774319054e-07, - "loss": 1.0105, - "step": 6928 - }, - { - "epoch": 0.8331629892382613, - "grad_norm": 1.7238106870860586, - "learning_rate": 2.848056454298309e-07, - "loss": 0.946, - "step": 6929 - }, - { - "epoch": 0.8332832321289004, - "grad_norm": 1.8268890115133387, - "learning_rate": 2.844051333100905e-07, - "loss": 0.8452, - "step": 6930 - }, - { - "epoch": 0.8334034750195395, - "grad_norm": 1.9162707610974594, - "learning_rate": 2.840048814447269e-07, - "loss": 1.0397, - "step": 6931 - }, - { - "epoch": 0.8335237179101785, - "grad_norm": 2.467390434514864, - "learning_rate": 2.836048898944587e-07, - "loss": 0.9367, - "step": 6932 - }, - { - "epoch": 0.8336439608008177, - "grad_norm": 2.2534067006929814, - "learning_rate": 2.832051587199642e-07, - "loss": 0.9181, - "step": 6933 - }, - { - "epoch": 0.8337642036914568, - "grad_norm": 0.8444193551521968, - "learning_rate": 2.828056879818821e-07, - "loss": 0.7987, - "step": 6934 - }, - { - "epoch": 0.8338844465820958, - "grad_norm": 2.166201309211595, - "learning_rate": 2.824064777408117e-07, - "loss": 1.0268, - "step": 6935 - }, - { - "epoch": 0.8340046894727349, - "grad_norm": 1.5579238254940035, - "learning_rate": 2.8200752805731263e-07, - "loss": 0.9563, - "step": 6936 - }, - { - "epoch": 0.834124932363374, - "grad_norm": 1.3932066538290138, - "learning_rate": 2.8160883899190625e-07, - "loss": 1.003, - "step": 6937 - }, - { - "epoch": 0.8342451752540131, - "grad_norm": 2.153207041557373, - "learning_rate": 2.8121041060507234e-07, - "loss": 0.9375, - "step": 6938 - }, - { - "epoch": 0.8343654181446521, - "grad_norm": 1.4471040910981567, - "learning_rate": 2.808122429572528e-07, - "loss": 0.9135, - "step": 6939 - }, - { - "epoch": 0.8344856610352913, - "grad_norm": 2.5696850042598927, - "learning_rate": 2.804143361088489e-07, - "loss": 0.9588, - "step": 6940 - }, - { - "epoch": 0.8346059039259304, - "grad_norm": 1.940637324934443, - "learning_rate": 2.8001669012022277e-07, - "loss": 0.9654, - "step": 6941 - }, - { - "epoch": 0.8347261468165694, - "grad_norm": 1.4807361371384704, - "learning_rate": 2.7961930505169795e-07, - "loss": 0.8936, - "step": 6942 - }, - { - "epoch": 0.8348463897072086, - "grad_norm": 1.9456004347140365, - "learning_rate": 2.792221809635558e-07, - "loss": 0.9588, - "step": 6943 - }, - { - "epoch": 0.8349666325978476, - "grad_norm": 1.6154875735599263, - "learning_rate": 2.788253179160411e-07, - "loss": 0.9474, - "step": 6944 - }, - { - "epoch": 0.8350868754884867, - "grad_norm": 1.9644899447895683, - "learning_rate": 2.7842871596935725e-07, - "loss": 0.8485, - "step": 6945 - }, - { - "epoch": 0.8352071183791259, - "grad_norm": 1.5864876981318994, - "learning_rate": 2.780323751836682e-07, - "loss": 0.8877, - "step": 6946 - }, - { - "epoch": 0.8353273612697649, - "grad_norm": 1.3440141639984917, - "learning_rate": 2.7763629561909876e-07, - "loss": 0.9854, - "step": 6947 - }, - { - "epoch": 0.835447604160404, - "grad_norm": 2.342052404415165, - "learning_rate": 2.772404773357335e-07, - "loss": 0.9641, - "step": 6948 - }, - { - "epoch": 0.8355678470510431, - "grad_norm": 1.6816525342566102, - "learning_rate": 2.7684492039361853e-07, - "loss": 0.9735, - "step": 6949 - }, - { - "epoch": 0.8356880899416822, - "grad_norm": 1.6558781332333492, - "learning_rate": 2.764496248527586e-07, - "loss": 1.0381, - "step": 6950 - }, - { - "epoch": 0.8358083328323213, - "grad_norm": 1.9874017415259702, - "learning_rate": 2.760545907731211e-07, - "loss": 0.9793, - "step": 6951 - }, - { - "epoch": 0.8359285757229604, - "grad_norm": 1.6512602010638515, - "learning_rate": 2.75659818214631e-07, - "loss": 0.8749, - "step": 6952 - }, - { - "epoch": 0.8360488186135995, - "grad_norm": 1.8590681319697295, - "learning_rate": 2.752653072371749e-07, - "loss": 0.9866, - "step": 6953 - }, - { - "epoch": 0.8361690615042385, - "grad_norm": 1.69358407084056, - "learning_rate": 2.7487105790060105e-07, - "loss": 0.9453, - "step": 6954 - }, - { - "epoch": 0.8362893043948777, - "grad_norm": 1.8680664292231361, - "learning_rate": 2.7447707026471587e-07, - "loss": 0.8884, - "step": 6955 - }, - { - "epoch": 0.8364095472855168, - "grad_norm": 1.8859933915978164, - "learning_rate": 2.740833443892874e-07, - "loss": 1.0043, - "step": 6956 - }, - { - "epoch": 0.8365297901761558, - "grad_norm": 1.640066796663758, - "learning_rate": 2.7368988033404327e-07, - "loss": 0.9901, - "step": 6957 - }, - { - "epoch": 0.836650033066795, - "grad_norm": 1.5859190089458512, - "learning_rate": 2.732966781586712e-07, - "loss": 1.0469, - "step": 6958 - }, - { - "epoch": 0.836770275957434, - "grad_norm": 1.5861124778395757, - "learning_rate": 2.729037379228205e-07, - "loss": 0.873, - "step": 6959 - }, - { - "epoch": 0.8368905188480731, - "grad_norm": 1.4011941942575472, - "learning_rate": 2.725110596860998e-07, - "loss": 1.0022, - "step": 6960 - }, - { - "epoch": 0.8370107617387123, - "grad_norm": 1.7675171562344192, - "learning_rate": 2.7211864350807776e-07, - "loss": 0.907, - "step": 6961 - }, - { - "epoch": 0.8371310046293513, - "grad_norm": 1.570211378798691, - "learning_rate": 2.717264894482836e-07, - "loss": 0.9355, - "step": 6962 - }, - { - "epoch": 0.8372512475199904, - "grad_norm": 1.812655540895615, - "learning_rate": 2.7133459756620646e-07, - "loss": 1.01, - "step": 6963 - }, - { - "epoch": 0.8373714904106295, - "grad_norm": 1.6850583107704051, - "learning_rate": 2.7094296792129733e-07, - "loss": 0.9366, - "step": 6964 - }, - { - "epoch": 0.8374917333012686, - "grad_norm": 1.8163914177401812, - "learning_rate": 2.7055160057296424e-07, - "loss": 0.9466, - "step": 6965 - }, - { - "epoch": 0.8376119761919076, - "grad_norm": 1.6516652258447444, - "learning_rate": 2.7016049558057896e-07, - "loss": 0.9159, - "step": 6966 - }, - { - "epoch": 0.8377322190825467, - "grad_norm": 1.616717873906073, - "learning_rate": 2.6976965300347074e-07, - "loss": 0.9133, - "step": 6967 - }, - { - "epoch": 0.8378524619731859, - "grad_norm": 2.5455560369818127, - "learning_rate": 2.693790729009309e-07, - "loss": 0.8998, - "step": 6968 - }, - { - "epoch": 0.8379727048638249, - "grad_norm": 1.8383885680979504, - "learning_rate": 2.6898875533220946e-07, - "loss": 1.0764, - "step": 6969 - }, - { - "epoch": 0.838092947754464, - "grad_norm": 1.6387700084451216, - "learning_rate": 2.685987003565171e-07, - "loss": 1.0144, - "step": 6970 - }, - { - "epoch": 0.8382131906451031, - "grad_norm": 2.372740423564016, - "learning_rate": 2.6820890803302566e-07, - "loss": 0.9563, - "step": 6971 - }, - { - "epoch": 0.8383334335357422, - "grad_norm": 1.8135873229714092, - "learning_rate": 2.6781937842086557e-07, - "loss": 1.0173, - "step": 6972 - }, - { - "epoch": 0.8384536764263812, - "grad_norm": 1.7467458987768654, - "learning_rate": 2.6743011157912933e-07, - "loss": 0.8799, - "step": 6973 - }, - { - "epoch": 0.8385739193170204, - "grad_norm": 1.7014518008202661, - "learning_rate": 2.6704110756686725e-07, - "loss": 0.8511, - "step": 6974 - }, - { - "epoch": 0.8386941622076595, - "grad_norm": 1.7712855790037005, - "learning_rate": 2.6665236644309085e-07, - "loss": 1.0353, - "step": 6975 - }, - { - "epoch": 0.8388144050982985, - "grad_norm": 1.763260397325087, - "learning_rate": 2.662638882667727e-07, - "loss": 0.9914, - "step": 6976 - }, - { - "epoch": 0.8389346479889377, - "grad_norm": 2.6773599464409705, - "learning_rate": 2.658756730968443e-07, - "loss": 0.9234, - "step": 6977 - }, - { - "epoch": 0.8390548908795767, - "grad_norm": 1.9733586939694205, - "learning_rate": 2.654877209921975e-07, - "loss": 1.0867, - "step": 6978 - }, - { - "epoch": 0.8391751337702158, - "grad_norm": 3.178625130850309, - "learning_rate": 2.651000320116843e-07, - "loss": 0.824, - "step": 6979 - }, - { - "epoch": 0.839295376660855, - "grad_norm": 1.7681802809357117, - "learning_rate": 2.647126062141163e-07, - "loss": 0.9558, - "step": 6980 - }, - { - "epoch": 0.839415619551494, - "grad_norm": 1.8295916029690302, - "learning_rate": 2.643254436582669e-07, - "loss": 1.0355, - "step": 6981 - }, - { - "epoch": 0.8395358624421331, - "grad_norm": 1.7746861972928325, - "learning_rate": 2.6393854440286743e-07, - "loss": 1.0197, - "step": 6982 - }, - { - "epoch": 0.8396561053327722, - "grad_norm": 2.795199441089367, - "learning_rate": 2.6355190850661045e-07, - "loss": 0.9068, - "step": 6983 - }, - { - "epoch": 0.8397763482234113, - "grad_norm": 1.5081742401242035, - "learning_rate": 2.631655360281486e-07, - "loss": 1.0593, - "step": 6984 - }, - { - "epoch": 0.8398965911140504, - "grad_norm": 2.310411386394974, - "learning_rate": 2.6277942702609323e-07, - "loss": 0.8546, - "step": 6985 - }, - { - "epoch": 0.8400168340046895, - "grad_norm": 2.0172060893980297, - "learning_rate": 2.623935815590186e-07, - "loss": 1.0666, - "step": 6986 - }, - { - "epoch": 0.8401370768953286, - "grad_norm": 1.963142219475224, - "learning_rate": 2.6200799968545516e-07, - "loss": 1.0103, - "step": 6987 - }, - { - "epoch": 0.8402573197859676, - "grad_norm": 0.8444996082402118, - "learning_rate": 2.616226814638969e-07, - "loss": 0.7893, - "step": 6988 - }, - { - "epoch": 0.8403775626766068, - "grad_norm": 1.7862759006723328, - "learning_rate": 2.612376269527954e-07, - "loss": 0.9789, - "step": 6989 - }, - { - "epoch": 0.8404978055672458, - "grad_norm": 2.8465089933279115, - "learning_rate": 2.608528362105635e-07, - "loss": 0.8798, - "step": 6990 - }, - { - "epoch": 0.8406180484578849, - "grad_norm": 1.8726013099145142, - "learning_rate": 2.6046830929557374e-07, - "loss": 0.9321, - "step": 6991 - }, - { - "epoch": 0.8407382913485241, - "grad_norm": 1.8174207909913735, - "learning_rate": 2.6008404626615776e-07, - "loss": 1.0454, - "step": 6992 - }, - { - "epoch": 0.8408585342391631, - "grad_norm": 2.07186716115628, - "learning_rate": 2.597000471806092e-07, - "loss": 0.9285, - "step": 6993 - }, - { - "epoch": 0.8409787771298022, - "grad_norm": 1.938705069576535, - "learning_rate": 2.593163120971793e-07, - "loss": 0.9241, - "step": 6994 - }, - { - "epoch": 0.8410990200204413, - "grad_norm": 1.7795837372094854, - "learning_rate": 2.5893284107408165e-07, - "loss": 0.8916, - "step": 6995 - }, - { - "epoch": 0.8412192629110804, - "grad_norm": 1.725329550071727, - "learning_rate": 2.5854963416948726e-07, - "loss": 0.987, - "step": 6996 - }, - { - "epoch": 0.8413395058017195, - "grad_norm": 1.527690955305583, - "learning_rate": 2.5816669144152816e-07, - "loss": 0.8847, - "step": 6997 - }, - { - "epoch": 0.8414597486923585, - "grad_norm": 0.9306193898418097, - "learning_rate": 2.5778401294829777e-07, - "loss": 0.9266, - "step": 6998 - }, - { - "epoch": 0.8415799915829977, - "grad_norm": 1.7700804138236144, - "learning_rate": 2.574015987478473e-07, - "loss": 0.8483, - "step": 6999 - }, - { - "epoch": 0.8417002344736367, - "grad_norm": 1.93435204019309, - "learning_rate": 2.570194488981887e-07, - "loss": 1.0617, - "step": 7000 - }, - { - "epoch": 0.8418204773642758, - "grad_norm": 0.8952331454542155, - "learning_rate": 2.566375634572939e-07, - "loss": 0.8366, - "step": 7001 - }, - { - "epoch": 0.841940720254915, - "grad_norm": 2.5917962310721223, - "learning_rate": 2.562559424830943e-07, - "loss": 0.9575, - "step": 7002 - }, - { - "epoch": 0.842060963145554, - "grad_norm": 2.4206930906695066, - "learning_rate": 2.5587458603348256e-07, - "loss": 0.8964, - "step": 7003 - }, - { - "epoch": 0.8421812060361931, - "grad_norm": 2.0695703867822295, - "learning_rate": 2.554934941663085e-07, - "loss": 1.0388, - "step": 7004 - }, - { - "epoch": 0.8423014489268322, - "grad_norm": 1.7088975982433057, - "learning_rate": 2.5511266693938484e-07, - "loss": 0.9284, - "step": 7005 - }, - { - "epoch": 0.8424216918174713, - "grad_norm": 1.4450949840280947, - "learning_rate": 2.547321044104822e-07, - "loss": 0.9688, - "step": 7006 - }, - { - "epoch": 0.8425419347081103, - "grad_norm": 1.8340974718023515, - "learning_rate": 2.5435180663733113e-07, - "loss": 0.9688, - "step": 7007 - }, - { - "epoch": 0.8426621775987495, - "grad_norm": 2.1279030212160035, - "learning_rate": 2.539717736776241e-07, - "loss": 0.9112, - "step": 7008 - }, - { - "epoch": 0.8427824204893886, - "grad_norm": 1.5741307539282263, - "learning_rate": 2.535920055890097e-07, - "loss": 0.9564, - "step": 7009 - }, - { - "epoch": 0.8429026633800276, - "grad_norm": 1.9398590001256928, - "learning_rate": 2.5321250242910006e-07, - "loss": 0.8491, - "step": 7010 - }, - { - "epoch": 0.8430229062706668, - "grad_norm": 1.8282812328907518, - "learning_rate": 2.5283326425546493e-07, - "loss": 1.0584, - "step": 7011 - }, - { - "epoch": 0.8431431491613058, - "grad_norm": 2.0208165931663746, - "learning_rate": 2.5245429112563443e-07, - "loss": 0.887, - "step": 7012 - }, - { - "epoch": 0.8432633920519449, - "grad_norm": 4.951363716697583, - "learning_rate": 2.5207558309709865e-07, - "loss": 1.0176, - "step": 7013 - }, - { - "epoch": 0.8433836349425841, - "grad_norm": 0.7055918547056912, - "learning_rate": 2.516971402273065e-07, - "loss": 0.7782, - "step": 7014 - }, - { - "epoch": 0.8435038778332231, - "grad_norm": 1.7143551906062862, - "learning_rate": 2.513189625736687e-07, - "loss": 0.8672, - "step": 7015 - }, - { - "epoch": 0.8436241207238622, - "grad_norm": 2.265382740617315, - "learning_rate": 2.509410501935534e-07, - "loss": 0.901, - "step": 7016 - }, - { - "epoch": 0.8437443636145013, - "grad_norm": 2.265424101242412, - "learning_rate": 2.5056340314429116e-07, - "loss": 0.9607, - "step": 7017 - }, - { - "epoch": 0.8438646065051404, - "grad_norm": 1.977871186423481, - "learning_rate": 2.5018602148316904e-07, - "loss": 1.001, - "step": 7018 - }, - { - "epoch": 0.8439848493957794, - "grad_norm": 1.5171653549511062, - "learning_rate": 2.498089052674359e-07, - "loss": 0.9926, - "step": 7019 - }, - { - "epoch": 0.8441050922864186, - "grad_norm": 2.0830149089336456, - "learning_rate": 2.494320545543007e-07, - "loss": 0.9563, - "step": 7020 - }, - { - "epoch": 0.8442253351770577, - "grad_norm": 1.731422867390123, - "learning_rate": 2.490554694009308e-07, - "loss": 0.8716, - "step": 7021 - }, - { - "epoch": 0.8443455780676967, - "grad_norm": 1.483429778460474, - "learning_rate": 2.4867914986445426e-07, - "loss": 0.9879, - "step": 7022 - }, - { - "epoch": 0.8444658209583359, - "grad_norm": 1.7410664736499077, - "learning_rate": 2.483030960019581e-07, - "loss": 0.8973, - "step": 7023 - }, - { - "epoch": 0.8445860638489749, - "grad_norm": 0.7664005088785548, - "learning_rate": 2.479273078704891e-07, - "loss": 0.7638, - "step": 7024 - }, - { - "epoch": 0.844706306739614, - "grad_norm": 0.8232145293489278, - "learning_rate": 2.475517855270552e-07, - "loss": 0.8611, - "step": 7025 - }, - { - "epoch": 0.8448265496302532, - "grad_norm": 1.7812985865341127, - "learning_rate": 2.4717652902862143e-07, - "loss": 0.9284, - "step": 7026 - }, - { - "epoch": 0.8449467925208922, - "grad_norm": 1.5925214183331198, - "learning_rate": 2.4680153843211495e-07, - "loss": 1.0201, - "step": 7027 - }, - { - "epoch": 0.8450670354115313, - "grad_norm": 1.6018313298577864, - "learning_rate": 2.464268137944212e-07, - "loss": 0.9159, - "step": 7028 - }, - { - "epoch": 0.8451872783021703, - "grad_norm": 1.8292481974141508, - "learning_rate": 2.46052355172385e-07, - "loss": 0.9824, - "step": 7029 - }, - { - "epoch": 0.8453075211928095, - "grad_norm": 1.6458935384563156, - "learning_rate": 2.456781626228128e-07, - "loss": 0.9409, - "step": 7030 - }, - { - "epoch": 0.8454277640834486, - "grad_norm": 1.0685232200723582, - "learning_rate": 2.453042362024675e-07, - "loss": 0.9412, - "step": 7031 - }, - { - "epoch": 0.8455480069740876, - "grad_norm": 1.3812056003528828, - "learning_rate": 2.449305759680751e-07, - "loss": 0.9327, - "step": 7032 - }, - { - "epoch": 0.8456682498647268, - "grad_norm": 1.537608355293834, - "learning_rate": 2.445571819763188e-07, - "loss": 0.9437, - "step": 7033 - }, - { - "epoch": 0.8457884927553658, - "grad_norm": 1.7442112275087562, - "learning_rate": 2.4418405428384227e-07, - "loss": 0.7859, - "step": 7034 - }, - { - "epoch": 0.8459087356460049, - "grad_norm": 1.5245534086596353, - "learning_rate": 2.4381119294724864e-07, - "loss": 0.9132, - "step": 7035 - }, - { - "epoch": 0.846028978536644, - "grad_norm": 1.9539338535096298, - "learning_rate": 2.434385980231004e-07, - "loss": 0.7444, - "step": 7036 - }, - { - "epoch": 0.8461492214272831, - "grad_norm": 1.6123461657340674, - "learning_rate": 2.4306626956792043e-07, - "loss": 0.8485, - "step": 7037 - }, - { - "epoch": 0.8462694643179222, - "grad_norm": 1.6353579306128252, - "learning_rate": 2.4269420763819017e-07, - "loss": 0.9536, - "step": 7038 - }, - { - "epoch": 0.8463897072085613, - "grad_norm": 2.63934204716925, - "learning_rate": 2.4232241229035223e-07, - "loss": 1.0277, - "step": 7039 - }, - { - "epoch": 0.8465099500992004, - "grad_norm": 0.8623608131331038, - "learning_rate": 2.419508835808064e-07, - "loss": 0.7915, - "step": 7040 - }, - { - "epoch": 0.8466301929898394, - "grad_norm": 1.8935743924040802, - "learning_rate": 2.415796215659134e-07, - "loss": 0.8296, - "step": 7041 - }, - { - "epoch": 0.8467504358804786, - "grad_norm": 1.9838208002870035, - "learning_rate": 2.412086263019939e-07, - "loss": 0.9673, - "step": 7042 - }, - { - "epoch": 0.8468706787711177, - "grad_norm": 1.9593473879680092, - "learning_rate": 2.408378978453276e-07, - "loss": 1.0008, - "step": 7043 - }, - { - "epoch": 0.8469909216617567, - "grad_norm": 0.8214517946988408, - "learning_rate": 2.404674362521533e-07, - "loss": 0.8568, - "step": 7044 - }, - { - "epoch": 0.8471111645523959, - "grad_norm": 2.1008316527676043, - "learning_rate": 2.4009724157866997e-07, - "loss": 0.9357, - "step": 7045 - }, - { - "epoch": 0.8472314074430349, - "grad_norm": 1.7769314801299751, - "learning_rate": 2.3972731388103564e-07, - "loss": 0.963, - "step": 7046 - }, - { - "epoch": 0.847351650333674, - "grad_norm": 0.8192762096218117, - "learning_rate": 2.393576532153687e-07, - "loss": 0.8443, - "step": 7047 - }, - { - "epoch": 0.8474718932243132, - "grad_norm": 1.015766251351633, - "learning_rate": 2.389882596377453e-07, - "loss": 0.8205, - "step": 7048 - }, - { - "epoch": 0.8475921361149522, - "grad_norm": 1.8118457271004769, - "learning_rate": 2.386191332042031e-07, - "loss": 0.9619, - "step": 7049 - }, - { - "epoch": 0.8477123790055913, - "grad_norm": 2.3788022173604193, - "learning_rate": 2.3825027397073794e-07, - "loss": 0.9281, - "step": 7050 - }, - { - "epoch": 0.8478326218962304, - "grad_norm": 1.820157285957392, - "learning_rate": 2.3788168199330515e-07, - "loss": 0.8679, - "step": 7051 - }, - { - "epoch": 0.8479528647868695, - "grad_norm": 1.4824329722596, - "learning_rate": 2.3751335732782074e-07, - "loss": 0.9333, - "step": 7052 - }, - { - "epoch": 0.8480731076775085, - "grad_norm": 1.762825561107174, - "learning_rate": 2.371453000301582e-07, - "loss": 1.0002, - "step": 7053 - }, - { - "epoch": 0.8481933505681477, - "grad_norm": 1.781082078480673, - "learning_rate": 2.3677751015615222e-07, - "loss": 0.9398, - "step": 7054 - }, - { - "epoch": 0.8483135934587868, - "grad_norm": 1.8681200962506406, - "learning_rate": 2.3640998776159593e-07, - "loss": 1.0493, - "step": 7055 - }, - { - "epoch": 0.8484338363494258, - "grad_norm": 1.6353416749643244, - "learning_rate": 2.3604273290224253e-07, - "loss": 1.0085, - "step": 7056 - }, - { - "epoch": 0.848554079240065, - "grad_norm": 1.751426183754052, - "learning_rate": 2.356757456338039e-07, - "loss": 0.9452, - "step": 7057 - }, - { - "epoch": 0.848674322130704, - "grad_norm": 0.8933796362003341, - "learning_rate": 2.3530902601195147e-07, - "loss": 0.8472, - "step": 7058 - }, - { - "epoch": 0.8487945650213431, - "grad_norm": 2.670161004230491, - "learning_rate": 2.34942574092317e-07, - "loss": 0.9831, - "step": 7059 - }, - { - "epoch": 0.8489148079119821, - "grad_norm": 1.8939643562290225, - "learning_rate": 2.3457638993049045e-07, - "loss": 0.9609, - "step": 7060 - }, - { - "epoch": 0.8490350508026213, - "grad_norm": 1.8093974426669739, - "learning_rate": 2.3421047358202252e-07, - "loss": 0.8382, - "step": 7061 - }, - { - "epoch": 0.8491552936932604, - "grad_norm": 2.1860268128086533, - "learning_rate": 2.3384482510242144e-07, - "loss": 1.0253, - "step": 7062 - }, - { - "epoch": 0.8492755365838994, - "grad_norm": 1.915652366056886, - "learning_rate": 2.3347944454715575e-07, - "loss": 0.9676, - "step": 7063 - }, - { - "epoch": 0.8493957794745386, - "grad_norm": 1.643966614117367, - "learning_rate": 2.331143319716542e-07, - "loss": 0.8646, - "step": 7064 - }, - { - "epoch": 0.8495160223651776, - "grad_norm": 1.8357876980316277, - "learning_rate": 2.3274948743130363e-07, - "loss": 0.8506, - "step": 7065 - }, - { - "epoch": 0.8496362652558167, - "grad_norm": 2.0670956361272896, - "learning_rate": 2.3238491098145085e-07, - "loss": 0.991, - "step": 7066 - }, - { - "epoch": 0.8497565081464559, - "grad_norm": 2.337834331245814, - "learning_rate": 2.3202060267740141e-07, - "loss": 0.9318, - "step": 7067 - }, - { - "epoch": 0.8498767510370949, - "grad_norm": 2.087690780967343, - "learning_rate": 2.3165656257442044e-07, - "loss": 0.9753, - "step": 7068 - }, - { - "epoch": 0.849996993927734, - "grad_norm": 1.7480103217197738, - "learning_rate": 2.31292790727734e-07, - "loss": 1.1019, - "step": 7069 - }, - { - "epoch": 0.8501172368183731, - "grad_norm": 2.2126373119094676, - "learning_rate": 2.3092928719252392e-07, - "loss": 0.9987, - "step": 7070 - }, - { - "epoch": 0.8502374797090122, - "grad_norm": 2.033348642614713, - "learning_rate": 2.3056605202393475e-07, - "loss": 0.9834, - "step": 7071 - }, - { - "epoch": 0.8503577225996513, - "grad_norm": 1.6571138396447342, - "learning_rate": 2.3020308527706888e-07, - "loss": 0.8702, - "step": 7072 - }, - { - "epoch": 0.8504779654902904, - "grad_norm": 1.5039078353278983, - "learning_rate": 2.2984038700698715e-07, - "loss": 1.081, - "step": 7073 - }, - { - "epoch": 0.8505982083809295, - "grad_norm": 1.5052915063953807, - "learning_rate": 2.2947795726871222e-07, - "loss": 0.9884, - "step": 7074 - }, - { - "epoch": 0.8507184512715685, - "grad_norm": 3.83489901592484, - "learning_rate": 2.2911579611722253e-07, - "loss": 1.0507, - "step": 7075 - }, - { - "epoch": 0.8508386941622077, - "grad_norm": 1.8170057394454577, - "learning_rate": 2.2875390360745905e-07, - "loss": 1.0701, - "step": 7076 - }, - { - "epoch": 0.8509589370528468, - "grad_norm": 1.5793844051103691, - "learning_rate": 2.2839227979432008e-07, - "loss": 0.9756, - "step": 7077 - }, - { - "epoch": 0.8510791799434858, - "grad_norm": 1.787326953754662, - "learning_rate": 2.2803092473266373e-07, - "loss": 1.0418, - "step": 7078 - }, - { - "epoch": 0.851199422834125, - "grad_norm": 2.095927230925832, - "learning_rate": 2.2766983847730724e-07, - "loss": 1.0639, - "step": 7079 - }, - { - "epoch": 0.851319665724764, - "grad_norm": 1.6689644550723428, - "learning_rate": 2.2730902108302663e-07, - "loss": 0.8673, - "step": 7080 - }, - { - "epoch": 0.8514399086154031, - "grad_norm": 1.508736678019622, - "learning_rate": 2.269484726045583e-07, - "loss": 0.8886, - "step": 7081 - }, - { - "epoch": 0.8515601515060423, - "grad_norm": 1.539111993448916, - "learning_rate": 2.2658819309659672e-07, - "loss": 0.9843, - "step": 7082 - }, - { - "epoch": 0.8516803943966813, - "grad_norm": 2.0665952311239373, - "learning_rate": 2.2622818261379706e-07, - "loss": 1.0373, - "step": 7083 - }, - { - "epoch": 0.8518006372873204, - "grad_norm": 1.6335176104439066, - "learning_rate": 2.2586844121077142e-07, - "loss": 0.9471, - "step": 7084 - }, - { - "epoch": 0.8519208801779595, - "grad_norm": 1.735418813214877, - "learning_rate": 2.2550896894209215e-07, - "loss": 0.9212, - "step": 7085 - }, - { - "epoch": 0.8520411230685986, - "grad_norm": 0.7177910004627436, - "learning_rate": 2.2514976586229184e-07, - "loss": 0.7858, - "step": 7086 - }, - { - "epoch": 0.8521613659592376, - "grad_norm": 0.9846852131644195, - "learning_rate": 2.247908320258609e-07, - "loss": 0.822, - "step": 7087 - }, - { - "epoch": 0.8522816088498768, - "grad_norm": 2.027928145014101, - "learning_rate": 2.2443216748724914e-07, - "loss": 0.9908, - "step": 7088 - }, - { - "epoch": 0.8524018517405159, - "grad_norm": 1.834138418924976, - "learning_rate": 2.2407377230086588e-07, - "loss": 0.9457, - "step": 7089 - }, - { - "epoch": 0.8525220946311549, - "grad_norm": 2.1500914709460757, - "learning_rate": 2.23715646521079e-07, - "loss": 1.033, - "step": 7090 - }, - { - "epoch": 0.852642337521794, - "grad_norm": 1.6771102650377436, - "learning_rate": 2.2335779020221724e-07, - "loss": 1.0313, - "step": 7091 - }, - { - "epoch": 0.8527625804124331, - "grad_norm": 0.8671459497343665, - "learning_rate": 2.2300020339856497e-07, - "loss": 0.8187, - "step": 7092 - }, - { - "epoch": 0.8528828233030722, - "grad_norm": 1.9731918363274308, - "learning_rate": 2.2264288616436966e-07, - "loss": 0.9754, - "step": 7093 - }, - { - "epoch": 0.8530030661937112, - "grad_norm": 1.9535460971834784, - "learning_rate": 2.222858385538351e-07, - "loss": 0.9293, - "step": 7094 - }, - { - "epoch": 0.8531233090843504, - "grad_norm": 1.5736650198246611, - "learning_rate": 2.2192906062112527e-07, - "loss": 0.8764, - "step": 7095 - }, - { - "epoch": 0.8532435519749895, - "grad_norm": 1.4392977543989538, - "learning_rate": 2.2157255242036377e-07, - "loss": 0.902, - "step": 7096 - }, - { - "epoch": 0.8533637948656285, - "grad_norm": 1.6387031561283543, - "learning_rate": 2.2121631400563135e-07, - "loss": 0.9404, - "step": 7097 - }, - { - "epoch": 0.8534840377562677, - "grad_norm": 0.8406211271515871, - "learning_rate": 2.208603454309701e-07, - "loss": 0.8005, - "step": 7098 - }, - { - "epoch": 0.8536042806469067, - "grad_norm": 1.6515781080650698, - "learning_rate": 2.2050464675037994e-07, - "loss": 0.9072, - "step": 7099 - }, - { - "epoch": 0.8537245235375458, - "grad_norm": 1.781924003106555, - "learning_rate": 2.2014921801782016e-07, - "loss": 0.9292, - "step": 7100 - }, - { - "epoch": 0.853844766428185, - "grad_norm": 1.8899851575399833, - "learning_rate": 2.1979405928720872e-07, - "loss": 0.9326, - "step": 7101 - }, - { - "epoch": 0.853965009318824, - "grad_norm": 1.3226295319780492, - "learning_rate": 2.1943917061242257e-07, - "loss": 0.9874, - "step": 7102 - }, - { - "epoch": 0.8540852522094631, - "grad_norm": 1.487956254372469, - "learning_rate": 2.1908455204729903e-07, - "loss": 0.863, - "step": 7103 - }, - { - "epoch": 0.8542054951001022, - "grad_norm": 1.8618755676330132, - "learning_rate": 2.1873020364563265e-07, - "loss": 0.9764, - "step": 7104 - }, - { - "epoch": 0.8543257379907413, - "grad_norm": 2.0724908848321633, - "learning_rate": 2.183761254611789e-07, - "loss": 0.958, - "step": 7105 - }, - { - "epoch": 0.8544459808813804, - "grad_norm": 2.114378808063502, - "learning_rate": 2.1802231754764987e-07, - "loss": 0.9028, - "step": 7106 - }, - { - "epoch": 0.8545662237720195, - "grad_norm": 1.720927160012759, - "learning_rate": 2.17668779958718e-07, - "loss": 0.9596, - "step": 7107 - }, - { - "epoch": 0.8546864666626586, - "grad_norm": 2.0381634948333125, - "learning_rate": 2.1731551274801553e-07, - "loss": 0.9993, - "step": 7108 - }, - { - "epoch": 0.8548067095532976, - "grad_norm": 2.131539772000884, - "learning_rate": 2.169625159691324e-07, - "loss": 0.808, - "step": 7109 - }, - { - "epoch": 0.8549269524439368, - "grad_norm": 2.0102880038134754, - "learning_rate": 2.1660978967561784e-07, - "loss": 0.9453, - "step": 7110 - }, - { - "epoch": 0.8550471953345758, - "grad_norm": 2.6060321890657407, - "learning_rate": 2.1625733392098035e-07, - "loss": 0.9885, - "step": 7111 - }, - { - "epoch": 0.8551674382252149, - "grad_norm": 1.5491895217946952, - "learning_rate": 2.159051487586867e-07, - "loss": 0.99, - "step": 7112 - }, - { - "epoch": 0.8552876811158541, - "grad_norm": 2.199586122769625, - "learning_rate": 2.155532342421642e-07, - "loss": 0.9251, - "step": 7113 - }, - { - "epoch": 0.8554079240064931, - "grad_norm": 1.6880054423348436, - "learning_rate": 2.1520159042479636e-07, - "loss": 0.9828, - "step": 7114 - }, - { - "epoch": 0.8555281668971322, - "grad_norm": 2.018450981174257, - "learning_rate": 2.148502173599287e-07, - "loss": 0.9064, - "step": 7115 - }, - { - "epoch": 0.8556484097877713, - "grad_norm": 1.488479320732297, - "learning_rate": 2.1449911510086372e-07, - "loss": 0.8504, - "step": 7116 - }, - { - "epoch": 0.8557686526784104, - "grad_norm": 3.156359529011135, - "learning_rate": 2.141482837008628e-07, - "loss": 0.9702, - "step": 7117 - }, - { - "epoch": 0.8558888955690495, - "grad_norm": 1.8012305503922879, - "learning_rate": 2.1379772321314826e-07, - "loss": 0.915, - "step": 7118 - }, - { - "epoch": 0.8560091384596886, - "grad_norm": 1.9981355917765915, - "learning_rate": 2.1344743369089802e-07, - "loss": 1.0224, - "step": 7119 - }, - { - "epoch": 0.8561293813503277, - "grad_norm": 1.5723041411237282, - "learning_rate": 2.130974151872522e-07, - "loss": 1.0161, - "step": 7120 - }, - { - "epoch": 0.8562496242409667, - "grad_norm": 1.653942155696029, - "learning_rate": 2.1274766775530773e-07, - "loss": 0.986, - "step": 7121 - }, - { - "epoch": 0.8563698671316058, - "grad_norm": 1.99033667663658, - "learning_rate": 2.1239819144812077e-07, - "loss": 0.9924, - "step": 7122 - }, - { - "epoch": 0.856490110022245, - "grad_norm": 1.657182970524568, - "learning_rate": 2.1204898631870716e-07, - "loss": 0.8986, - "step": 7123 - }, - { - "epoch": 0.856610352912884, - "grad_norm": 1.67343565925618, - "learning_rate": 2.1170005242004006e-07, - "loss": 0.9633, - "step": 7124 - }, - { - "epoch": 0.8567305958035231, - "grad_norm": 1.7292924934781502, - "learning_rate": 2.1135138980505384e-07, - "loss": 0.9769, - "step": 7125 - }, - { - "epoch": 0.8568508386941622, - "grad_norm": 1.699937550014763, - "learning_rate": 2.110029985266395e-07, - "loss": 0.9301, - "step": 7126 - }, - { - "epoch": 0.8569710815848013, - "grad_norm": 1.6520152385415254, - "learning_rate": 2.1065487863764787e-07, - "loss": 0.9327, - "step": 7127 - }, - { - "epoch": 0.8570913244754403, - "grad_norm": 1.479025265377918, - "learning_rate": 2.1030703019088846e-07, - "loss": 1.0571, - "step": 7128 - }, - { - "epoch": 0.8572115673660795, - "grad_norm": 1.6354930722152008, - "learning_rate": 2.099594532391291e-07, - "loss": 0.9026, - "step": 7129 - }, - { - "epoch": 0.8573318102567186, - "grad_norm": 1.6177363546990033, - "learning_rate": 2.0961214783509806e-07, - "loss": 0.9817, - "step": 7130 - }, - { - "epoch": 0.8574520531473576, - "grad_norm": 1.7090651487607897, - "learning_rate": 2.0926511403148051e-07, - "loss": 0.9493, - "step": 7131 - }, - { - "epoch": 0.8575722960379968, - "grad_norm": 1.7684309951438792, - "learning_rate": 2.0891835188092143e-07, - "loss": 0.956, - "step": 7132 - }, - { - "epoch": 0.8576925389286358, - "grad_norm": 1.6413026409321527, - "learning_rate": 2.0857186143602434e-07, - "loss": 1.0091, - "step": 7133 - }, - { - "epoch": 0.8578127818192749, - "grad_norm": 1.7414254887007177, - "learning_rate": 2.0822564274935094e-07, - "loss": 0.8765, - "step": 7134 - }, - { - "epoch": 0.8579330247099141, - "grad_norm": 1.7030996618223389, - "learning_rate": 2.078796958734239e-07, - "loss": 0.8704, - "step": 7135 - }, - { - "epoch": 0.8580532676005531, - "grad_norm": 1.8714411339395378, - "learning_rate": 2.0753402086072124e-07, - "loss": 0.9443, - "step": 7136 - }, - { - "epoch": 0.8581735104911922, - "grad_norm": 2.079639313309381, - "learning_rate": 2.071886177636828e-07, - "loss": 0.9627, - "step": 7137 - }, - { - "epoch": 0.8582937533818313, - "grad_norm": 1.766384805231116, - "learning_rate": 2.0684348663470575e-07, - "loss": 1.0156, - "step": 7138 - }, - { - "epoch": 0.8584139962724704, - "grad_norm": 1.6187830623358008, - "learning_rate": 2.0649862752614555e-07, - "loss": 0.8166, - "step": 7139 - }, - { - "epoch": 0.8585342391631094, - "grad_norm": 0.8053874702772965, - "learning_rate": 2.0615404049031838e-07, - "loss": 0.7888, - "step": 7140 - }, - { - "epoch": 0.8586544820537486, - "grad_norm": 2.0043395170776925, - "learning_rate": 2.0580972557949616e-07, - "loss": 0.9841, - "step": 7141 - }, - { - "epoch": 0.8587747249443877, - "grad_norm": 0.8440736220642852, - "learning_rate": 2.054656828459125e-07, - "loss": 0.7456, - "step": 7142 - }, - { - "epoch": 0.8588949678350267, - "grad_norm": 1.5837000204749303, - "learning_rate": 2.051219123417578e-07, - "loss": 0.967, - "step": 7143 - }, - { - "epoch": 0.8590152107256659, - "grad_norm": 2.133494509131815, - "learning_rate": 2.0477841411918196e-07, - "loss": 0.8045, - "step": 7144 - }, - { - "epoch": 0.859135453616305, - "grad_norm": 1.8409612222262377, - "learning_rate": 2.0443518823029326e-07, - "loss": 0.9465, - "step": 7145 - }, - { - "epoch": 0.859255696506944, - "grad_norm": 1.8966049328876737, - "learning_rate": 2.0409223472715854e-07, - "loss": 0.9622, - "step": 7146 - }, - { - "epoch": 0.8593759393975832, - "grad_norm": 1.894605245816549, - "learning_rate": 2.0374955366180434e-07, - "loss": 0.9438, - "step": 7147 - }, - { - "epoch": 0.8594961822882222, - "grad_norm": 1.7061783604376572, - "learning_rate": 2.034071450862147e-07, - "loss": 0.9284, - "step": 7148 - }, - { - "epoch": 0.8596164251788613, - "grad_norm": 2.844358609646216, - "learning_rate": 2.030650090523327e-07, - "loss": 0.9654, - "step": 7149 - }, - { - "epoch": 0.8597366680695004, - "grad_norm": 1.5650776010840999, - "learning_rate": 2.0272314561205995e-07, - "loss": 0.7982, - "step": 7150 - }, - { - "epoch": 0.8598569109601395, - "grad_norm": 1.732841157117282, - "learning_rate": 2.023815548172567e-07, - "loss": 0.9299, - "step": 7151 - }, - { - "epoch": 0.8599771538507786, - "grad_norm": 1.5522073103099623, - "learning_rate": 2.0204023671974267e-07, - "loss": 0.8659, - "step": 7152 - }, - { - "epoch": 0.8600973967414177, - "grad_norm": 1.9585111280314562, - "learning_rate": 2.0169919137129532e-07, - "loss": 1.0069, - "step": 7153 - }, - { - "epoch": 0.8602176396320568, - "grad_norm": 2.0326026520842104, - "learning_rate": 2.013584188236508e-07, - "loss": 0.8973, - "step": 7154 - }, - { - "epoch": 0.8603378825226958, - "grad_norm": 1.60143878738083, - "learning_rate": 2.0101791912850396e-07, - "loss": 0.993, - "step": 7155 - }, - { - "epoch": 0.8604581254133349, - "grad_norm": 1.702043662509677, - "learning_rate": 2.006776923375082e-07, - "loss": 0.8335, - "step": 7156 - }, - { - "epoch": 0.860578368303974, - "grad_norm": 1.4750582990601493, - "learning_rate": 2.003377385022764e-07, - "loss": 0.9061, - "step": 7157 - }, - { - "epoch": 0.8606986111946131, - "grad_norm": 1.6987278722538721, - "learning_rate": 1.9999805767437826e-07, - "loss": 0.9668, - "step": 7158 - }, - { - "epoch": 0.8608188540852522, - "grad_norm": 1.599630912291098, - "learning_rate": 1.9965864990534386e-07, - "loss": 0.9153, - "step": 7159 - }, - { - "epoch": 0.8609390969758913, - "grad_norm": 1.497410685147701, - "learning_rate": 1.9931951524666092e-07, - "loss": 0.9721, - "step": 7160 - }, - { - "epoch": 0.8610593398665304, - "grad_norm": 1.6643229535940023, - "learning_rate": 1.9898065374977534e-07, - "loss": 1.0059, - "step": 7161 - }, - { - "epoch": 0.8611795827571694, - "grad_norm": 1.8768793224547895, - "learning_rate": 1.9864206546609342e-07, - "loss": 0.9214, - "step": 7162 - }, - { - "epoch": 0.8612998256478086, - "grad_norm": 1.759208091285455, - "learning_rate": 1.983037504469771e-07, - "loss": 1.0445, - "step": 7163 - }, - { - "epoch": 0.8614200685384477, - "grad_norm": 1.593646177014273, - "learning_rate": 1.9796570874374984e-07, - "loss": 0.868, - "step": 7164 - }, - { - "epoch": 0.8615403114290867, - "grad_norm": 1.6560790495548918, - "learning_rate": 1.976279404076917e-07, - "loss": 0.9695, - "step": 7165 - }, - { - "epoch": 0.8616605543197259, - "grad_norm": 1.723425038379981, - "learning_rate": 1.9729044549004193e-07, - "loss": 0.9559, - "step": 7166 - }, - { - "epoch": 0.8617807972103649, - "grad_norm": 3.3161086093799916, - "learning_rate": 1.9695322404199822e-07, - "loss": 0.9015, - "step": 7167 - }, - { - "epoch": 0.861901040101004, - "grad_norm": 1.8073395238104524, - "learning_rate": 1.9661627611471654e-07, - "loss": 1.023, - "step": 7168 - }, - { - "epoch": 0.8620212829916432, - "grad_norm": 2.1681652632994264, - "learning_rate": 1.9627960175931246e-07, - "loss": 0.9074, - "step": 7169 - }, - { - "epoch": 0.8621415258822822, - "grad_norm": 1.732293675785272, - "learning_rate": 1.9594320102685847e-07, - "loss": 0.9396, - "step": 7170 - }, - { - "epoch": 0.8622617687729213, - "grad_norm": 1.9784853180713402, - "learning_rate": 1.956070739683864e-07, - "loss": 0.8398, - "step": 7171 - }, - { - "epoch": 0.8623820116635604, - "grad_norm": 1.442522810519268, - "learning_rate": 1.9527122063488678e-07, - "loss": 0.9372, - "step": 7172 - }, - { - "epoch": 0.8625022545541995, - "grad_norm": 1.897344643587048, - "learning_rate": 1.9493564107730755e-07, - "loss": 0.9981, - "step": 7173 - }, - { - "epoch": 0.8626224974448385, - "grad_norm": 1.908285461336176, - "learning_rate": 1.9460033534655684e-07, - "loss": 0.8103, - "step": 7174 - }, - { - "epoch": 0.8627427403354777, - "grad_norm": 1.4477974639052518, - "learning_rate": 1.9426530349349978e-07, - "loss": 1.038, - "step": 7175 - }, - { - "epoch": 0.8628629832261168, - "grad_norm": 1.9942608982920322, - "learning_rate": 1.9393054556896038e-07, - "loss": 0.8524, - "step": 7176 - }, - { - "epoch": 0.8629832261167558, - "grad_norm": 2.0882032568938422, - "learning_rate": 1.9359606162372133e-07, - "loss": 0.8922, - "step": 7177 - }, - { - "epoch": 0.863103469007395, - "grad_norm": 2.0522807715329443, - "learning_rate": 1.9326185170852293e-07, - "loss": 0.9076, - "step": 7178 - }, - { - "epoch": 0.863223711898034, - "grad_norm": 1.8512305110959042, - "learning_rate": 1.9292791587406598e-07, - "loss": 0.9181, - "step": 7179 - }, - { - "epoch": 0.8633439547886731, - "grad_norm": 1.9235970450329627, - "learning_rate": 1.9259425417100661e-07, - "loss": 1.0597, - "step": 7180 - }, - { - "epoch": 0.8634641976793123, - "grad_norm": 2.3494618306880115, - "learning_rate": 1.9226086664996234e-07, - "loss": 0.9431, - "step": 7181 - }, - { - "epoch": 0.8635844405699513, - "grad_norm": 1.8811050207093671, - "learning_rate": 1.9192775336150712e-07, - "loss": 0.9462, - "step": 7182 - }, - { - "epoch": 0.8637046834605904, - "grad_norm": 0.8055374323424938, - "learning_rate": 1.915949143561739e-07, - "loss": 0.7702, - "step": 7183 - }, - { - "epoch": 0.8638249263512295, - "grad_norm": 1.6317872839750964, - "learning_rate": 1.9126234968445498e-07, - "loss": 0.9728, - "step": 7184 - }, - { - "epoch": 0.8639451692418686, - "grad_norm": 1.3866947655551287, - "learning_rate": 1.9093005939679884e-07, - "loss": 0.8671, - "step": 7185 - }, - { - "epoch": 0.8640654121325076, - "grad_norm": 1.9790045688570628, - "learning_rate": 1.9059804354361452e-07, - "loss": 0.9604, - "step": 7186 - }, - { - "epoch": 0.8641856550231467, - "grad_norm": 1.5934908787131454, - "learning_rate": 1.902663021752684e-07, - "loss": 0.9087, - "step": 7187 - }, - { - "epoch": 0.8643058979137859, - "grad_norm": 2.0848671670951013, - "learning_rate": 1.8993483534208556e-07, - "loss": 1.0187, - "step": 7188 - }, - { - "epoch": 0.8644261408044249, - "grad_norm": 2.202710268200238, - "learning_rate": 1.8960364309434884e-07, - "loss": 0.9453, - "step": 7189 - }, - { - "epoch": 0.864546383695064, - "grad_norm": 1.6441473796014212, - "learning_rate": 1.8927272548229967e-07, - "loss": 0.9857, - "step": 7190 - }, - { - "epoch": 0.8646666265857031, - "grad_norm": 1.45340758826628, - "learning_rate": 1.8894208255613876e-07, - "loss": 1.0248, - "step": 7191 - }, - { - "epoch": 0.8647868694763422, - "grad_norm": 1.8279323639408362, - "learning_rate": 1.8861171436602397e-07, - "loss": 0.9713, - "step": 7192 - }, - { - "epoch": 0.8649071123669813, - "grad_norm": 2.159846526804195, - "learning_rate": 1.882816209620719e-07, - "loss": 1.0102, - "step": 7193 - }, - { - "epoch": 0.8650273552576204, - "grad_norm": 1.8190666159101752, - "learning_rate": 1.8795180239435738e-07, - "loss": 0.9666, - "step": 7194 - }, - { - "epoch": 0.8651475981482595, - "grad_norm": 2.6523223561476104, - "learning_rate": 1.8762225871291348e-07, - "loss": 0.9582, - "step": 7195 - }, - { - "epoch": 0.8652678410388985, - "grad_norm": 3.0155187400320322, - "learning_rate": 1.8729298996773201e-07, - "loss": 1.0027, - "step": 7196 - }, - { - "epoch": 0.8653880839295377, - "grad_norm": 0.8624288916645797, - "learning_rate": 1.8696399620876301e-07, - "loss": 0.8307, - "step": 7197 - }, - { - "epoch": 0.8655083268201768, - "grad_norm": 2.0164008015230537, - "learning_rate": 1.866352774859141e-07, - "loss": 0.99, - "step": 7198 - }, - { - "epoch": 0.8656285697108158, - "grad_norm": 2.4320248464958962, - "learning_rate": 1.8630683384905188e-07, - "loss": 0.8985, - "step": 7199 - }, - { - "epoch": 0.865748812601455, - "grad_norm": 1.7821124481874366, - "learning_rate": 1.8597866534800045e-07, - "loss": 1.0926, - "step": 7200 - }, - { - "epoch": 0.865869055492094, - "grad_norm": 1.6869607699660052, - "learning_rate": 1.8565077203254398e-07, - "loss": 0.9417, - "step": 7201 - }, - { - "epoch": 0.8659892983827331, - "grad_norm": 2.581860973843641, - "learning_rate": 1.8532315395242203e-07, - "loss": 0.9322, - "step": 7202 - }, - { - "epoch": 0.8661095412733723, - "grad_norm": 1.8841756737398292, - "learning_rate": 1.849958111573353e-07, - "loss": 0.917, - "step": 7203 - }, - { - "epoch": 0.8662297841640113, - "grad_norm": 1.6346296939827674, - "learning_rate": 1.8466874369694074e-07, - "loss": 0.8334, - "step": 7204 - }, - { - "epoch": 0.8663500270546504, - "grad_norm": 6.5345532400741275, - "learning_rate": 1.843419516208542e-07, - "loss": 0.9063, - "step": 7205 - }, - { - "epoch": 0.8664702699452895, - "grad_norm": 2.068105305600687, - "learning_rate": 1.8401543497865047e-07, - "loss": 0.9864, - "step": 7206 - }, - { - "epoch": 0.8665905128359286, - "grad_norm": 2.1364575214158155, - "learning_rate": 1.836891938198608e-07, - "loss": 0.8417, - "step": 7207 - }, - { - "epoch": 0.8667107557265676, - "grad_norm": 2.8113925024868993, - "learning_rate": 1.8336322819397677e-07, - "loss": 0.9144, - "step": 7208 - }, - { - "epoch": 0.8668309986172068, - "grad_norm": 1.9104955952964708, - "learning_rate": 1.8303753815044654e-07, - "loss": 0.8289, - "step": 7209 - }, - { - "epoch": 0.8669512415078459, - "grad_norm": 3.3299357901216364, - "learning_rate": 1.827121237386773e-07, - "loss": 0.9015, - "step": 7210 - }, - { - "epoch": 0.8670714843984849, - "grad_norm": 2.383733252560016, - "learning_rate": 1.8238698500803374e-07, - "loss": 0.9516, - "step": 7211 - }, - { - "epoch": 0.8671917272891241, - "grad_norm": 1.336393401227894, - "learning_rate": 1.820621220078391e-07, - "loss": 0.8175, - "step": 7212 - }, - { - "epoch": 0.8673119701797631, - "grad_norm": 1.515458009309914, - "learning_rate": 1.8173753478737553e-07, - "loss": 0.8737, - "step": 7213 - }, - { - "epoch": 0.8674322130704022, - "grad_norm": 1.9761994522260746, - "learning_rate": 1.8141322339588205e-07, - "loss": 0.9913, - "step": 7214 - }, - { - "epoch": 0.8675524559610414, - "grad_norm": 4.998092669045777, - "learning_rate": 1.810891878825569e-07, - "loss": 0.8968, - "step": 7215 - }, - { - "epoch": 0.8676726988516804, - "grad_norm": 1.906636558486175, - "learning_rate": 1.8076542829655561e-07, - "loss": 0.9136, - "step": 7216 - }, - { - "epoch": 0.8677929417423195, - "grad_norm": 2.042278574377185, - "learning_rate": 1.8044194468699203e-07, - "loss": 0.9984, - "step": 7217 - }, - { - "epoch": 0.8679131846329585, - "grad_norm": 2.0629077855042723, - "learning_rate": 1.8011873710293912e-07, - "loss": 0.9476, - "step": 7218 - }, - { - "epoch": 0.8680334275235977, - "grad_norm": 1.9260937667355258, - "learning_rate": 1.7979580559342677e-07, - "loss": 0.8955, - "step": 7219 - }, - { - "epoch": 0.8681536704142367, - "grad_norm": 1.6192398685093015, - "learning_rate": 1.7947315020744358e-07, - "loss": 0.8649, - "step": 7220 - }, - { - "epoch": 0.8682739133048758, - "grad_norm": 1.6952122249920365, - "learning_rate": 1.7915077099393594e-07, - "loss": 0.99, - "step": 7221 - }, - { - "epoch": 0.868394156195515, - "grad_norm": 2.9716626786401177, - "learning_rate": 1.788286680018083e-07, - "loss": 0.9356, - "step": 7222 - }, - { - "epoch": 0.868514399086154, - "grad_norm": 2.4658303681725657, - "learning_rate": 1.7850684127992443e-07, - "loss": 0.9233, - "step": 7223 - }, - { - "epoch": 0.8686346419767931, - "grad_norm": 1.533764467925065, - "learning_rate": 1.7818529087710378e-07, - "loss": 0.9026, - "step": 7224 - }, - { - "epoch": 0.8687548848674322, - "grad_norm": 1.9678444369170225, - "learning_rate": 1.7786401684212637e-07, - "loss": 1.0411, - "step": 7225 - }, - { - "epoch": 0.8688751277580713, - "grad_norm": 0.758603970648353, - "learning_rate": 1.7754301922372883e-07, - "loss": 0.7679, - "step": 7226 - }, - { - "epoch": 0.8689953706487104, - "grad_norm": 1.6952888030052853, - "learning_rate": 1.7722229807060617e-07, - "loss": 1.0059, - "step": 7227 - }, - { - "epoch": 0.8691156135393495, - "grad_norm": 2.3272354167350593, - "learning_rate": 1.7690185343141172e-07, - "loss": 1.0084, - "step": 7228 - }, - { - "epoch": 0.8692358564299886, - "grad_norm": 2.057699222676487, - "learning_rate": 1.7658168535475615e-07, - "loss": 0.9109, - "step": 7229 - }, - { - "epoch": 0.8693560993206276, - "grad_norm": 1.557920535214479, - "learning_rate": 1.7626179388920948e-07, - "loss": 0.8424, - "step": 7230 - }, - { - "epoch": 0.8694763422112668, - "grad_norm": 1.5622327194491246, - "learning_rate": 1.7594217908329866e-07, - "loss": 1.0068, - "step": 7231 - }, - { - "epoch": 0.8695965851019059, - "grad_norm": 1.5558794522648143, - "learning_rate": 1.7562284098550895e-07, - "loss": 0.9342, - "step": 7232 - }, - { - "epoch": 0.8697168279925449, - "grad_norm": 0.8965492973007044, - "learning_rate": 1.753037796442838e-07, - "loss": 0.8582, - "step": 7233 - }, - { - "epoch": 0.8698370708831841, - "grad_norm": 2.10428440280785, - "learning_rate": 1.74984995108024e-07, - "loss": 0.9496, - "step": 7234 - }, - { - "epoch": 0.8699573137738231, - "grad_norm": 2.0204812624750916, - "learning_rate": 1.7466648742508981e-07, - "loss": 1.0305, - "step": 7235 - }, - { - "epoch": 0.8700775566644622, - "grad_norm": 1.845122521526904, - "learning_rate": 1.7434825664379837e-07, - "loss": 1.0354, - "step": 7236 - }, - { - "epoch": 0.8701977995551013, - "grad_norm": 2.5564616630987835, - "learning_rate": 1.740303028124246e-07, - "loss": 1.0638, - "step": 7237 - }, - { - "epoch": 0.8703180424457404, - "grad_norm": 1.8778865212147562, - "learning_rate": 1.7371262597920212e-07, - "loss": 0.9566, - "step": 7238 - }, - { - "epoch": 0.8704382853363795, - "grad_norm": 1.7856317800807713, - "learning_rate": 1.7339522619232195e-07, - "loss": 0.957, - "step": 7239 - }, - { - "epoch": 0.8705585282270186, - "grad_norm": 1.8468396436789285, - "learning_rate": 1.730781034999338e-07, - "loss": 0.9517, - "step": 7240 - }, - { - "epoch": 0.8706787711176577, - "grad_norm": 1.895294727767304, - "learning_rate": 1.7276125795014497e-07, - "loss": 0.9348, - "step": 7241 - }, - { - "epoch": 0.8707990140082967, - "grad_norm": 1.6879423056320184, - "learning_rate": 1.7244468959102054e-07, - "loss": 0.8727, - "step": 7242 - }, - { - "epoch": 0.8709192568989359, - "grad_norm": 2.001442031748963, - "learning_rate": 1.7212839847058348e-07, - "loss": 1.0495, - "step": 7243 - }, - { - "epoch": 0.871039499789575, - "grad_norm": 1.7645841247965162, - "learning_rate": 1.718123846368147e-07, - "loss": 0.9351, - "step": 7244 - }, - { - "epoch": 0.871159742680214, - "grad_norm": 1.8467392692455555, - "learning_rate": 1.714966481376543e-07, - "loss": 0.9137, - "step": 7245 - }, - { - "epoch": 0.8712799855708532, - "grad_norm": 2.5413953188107885, - "learning_rate": 1.7118118902099797e-07, - "loss": 1.0186, - "step": 7246 - }, - { - "epoch": 0.8714002284614922, - "grad_norm": 1.57010432898975, - "learning_rate": 1.7086600733470146e-07, - "loss": 1.003, - "step": 7247 - }, - { - "epoch": 0.8715204713521313, - "grad_norm": 1.680337540573143, - "learning_rate": 1.7055110312657738e-07, - "loss": 0.9629, - "step": 7248 - }, - { - "epoch": 0.8716407142427703, - "grad_norm": 1.991900195179965, - "learning_rate": 1.702364764443962e-07, - "loss": 0.9419, - "step": 7249 - }, - { - "epoch": 0.8717609571334095, - "grad_norm": 3.249126023374822, - "learning_rate": 1.6992212733588685e-07, - "loss": 0.9259, - "step": 7250 - }, - { - "epoch": 0.8718812000240486, - "grad_norm": 1.8284056358449163, - "learning_rate": 1.6960805584873538e-07, - "loss": 0.9481, - "step": 7251 - }, - { - "epoch": 0.8720014429146876, - "grad_norm": 1.4942756143065543, - "learning_rate": 1.6929426203058684e-07, - "loss": 0.9791, - "step": 7252 - }, - { - "epoch": 0.8721216858053268, - "grad_norm": 2.1596021165668184, - "learning_rate": 1.689807459290431e-07, - "loss": 1.009, - "step": 7253 - }, - { - "epoch": 0.8722419286959658, - "grad_norm": 1.8065280528694934, - "learning_rate": 1.6866750759166437e-07, - "loss": 0.9023, - "step": 7254 - }, - { - "epoch": 0.8723621715866049, - "grad_norm": 2.1253902974450103, - "learning_rate": 1.6835454706596865e-07, - "loss": 0.9698, - "step": 7255 - }, - { - "epoch": 0.8724824144772441, - "grad_norm": 1.5662299839454814, - "learning_rate": 1.680418643994317e-07, - "loss": 0.941, - "step": 7256 - }, - { - "epoch": 0.8726026573678831, - "grad_norm": 0.9860163129691228, - "learning_rate": 1.6772945963948738e-07, - "loss": 0.8849, - "step": 7257 - }, - { - "epoch": 0.8727229002585222, - "grad_norm": 2.1765040185086333, - "learning_rate": 1.6741733283352733e-07, - "loss": 0.9654, - "step": 7258 - }, - { - "epoch": 0.8728431431491613, - "grad_norm": 1.413479595278588, - "learning_rate": 1.6710548402890102e-07, - "loss": 1.0356, - "step": 7259 - }, - { - "epoch": 0.8729633860398004, - "grad_norm": 1.9300746760967717, - "learning_rate": 1.6679391327291527e-07, - "loss": 0.8688, - "step": 7260 - }, - { - "epoch": 0.8730836289304394, - "grad_norm": 2.4320673924256395, - "learning_rate": 1.6648262061283492e-07, - "loss": 0.8757, - "step": 7261 - }, - { - "epoch": 0.8732038718210786, - "grad_norm": 2.376672657677731, - "learning_rate": 1.6617160609588353e-07, - "loss": 0.9343, - "step": 7262 - }, - { - "epoch": 0.8733241147117177, - "grad_norm": 1.9187492445158325, - "learning_rate": 1.6586086976924163e-07, - "loss": 0.9068, - "step": 7263 - }, - { - "epoch": 0.8734443576023567, - "grad_norm": 1.802435324218859, - "learning_rate": 1.6555041168004747e-07, - "loss": 0.9818, - "step": 7264 - }, - { - "epoch": 0.8735646004929959, - "grad_norm": 1.6712620404764844, - "learning_rate": 1.6524023187539715e-07, - "loss": 0.8863, - "step": 7265 - }, - { - "epoch": 0.873684843383635, - "grad_norm": 1.7355389512641255, - "learning_rate": 1.649303304023446e-07, - "loss": 0.9484, - "step": 7266 - }, - { - "epoch": 0.873805086274274, - "grad_norm": 2.0013516150462767, - "learning_rate": 1.6462070730790246e-07, - "loss": 0.9845, - "step": 7267 - }, - { - "epoch": 0.8739253291649132, - "grad_norm": 3.25475022818212, - "learning_rate": 1.6431136263903912e-07, - "loss": 0.9902, - "step": 7268 - }, - { - "epoch": 0.8740455720555522, - "grad_norm": 1.8461449795595628, - "learning_rate": 1.6400229644268282e-07, - "loss": 0.9442, - "step": 7269 - }, - { - "epoch": 0.8741658149461913, - "grad_norm": 1.8968014044522765, - "learning_rate": 1.6369350876571852e-07, - "loss": 1.0002, - "step": 7270 - }, - { - "epoch": 0.8742860578368304, - "grad_norm": 2.169232524214522, - "learning_rate": 1.6338499965498874e-07, - "loss": 1.006, - "step": 7271 - }, - { - "epoch": 0.8744063007274695, - "grad_norm": 1.4500840064082043, - "learning_rate": 1.630767691572943e-07, - "loss": 0.9715, - "step": 7272 - }, - { - "epoch": 0.8745265436181086, - "grad_norm": 0.7696672116144542, - "learning_rate": 1.6276881731939306e-07, - "loss": 0.7587, - "step": 7273 - }, - { - "epoch": 0.8746467865087477, - "grad_norm": 1.564999755152479, - "learning_rate": 1.6246114418800193e-07, - "loss": 0.9529, - "step": 7274 - }, - { - "epoch": 0.8747670293993868, - "grad_norm": 1.5964284343359776, - "learning_rate": 1.6215374980979423e-07, - "loss": 0.9669, - "step": 7275 - }, - { - "epoch": 0.8748872722900258, - "grad_norm": 1.7768907577561963, - "learning_rate": 1.6184663423140133e-07, - "loss": 0.8866, - "step": 7276 - }, - { - "epoch": 0.875007515180665, - "grad_norm": 1.7647840134303825, - "learning_rate": 1.615397974994126e-07, - "loss": 0.8442, - "step": 7277 - }, - { - "epoch": 0.875127758071304, - "grad_norm": 1.3560185265667999, - "learning_rate": 1.6123323966037438e-07, - "loss": 0.9987, - "step": 7278 - }, - { - "epoch": 0.8752480009619431, - "grad_norm": 1.7338134268866183, - "learning_rate": 1.6092696076079216e-07, - "loss": 0.977, - "step": 7279 - }, - { - "epoch": 0.8753682438525822, - "grad_norm": 1.8644090993731606, - "learning_rate": 1.6062096084712785e-07, - "loss": 0.937, - "step": 7280 - }, - { - "epoch": 0.8754884867432213, - "grad_norm": 1.7144615772844667, - "learning_rate": 1.6031523996580098e-07, - "loss": 0.9061, - "step": 7281 - }, - { - "epoch": 0.8756087296338604, - "grad_norm": 2.3082486845211254, - "learning_rate": 1.6000979816318981e-07, - "loss": 0.8566, - "step": 7282 - }, - { - "epoch": 0.8757289725244994, - "grad_norm": 1.9657129143733092, - "learning_rate": 1.5970463548562886e-07, - "loss": 0.9519, - "step": 7283 - }, - { - "epoch": 0.8758492154151386, - "grad_norm": 1.621581369622155, - "learning_rate": 1.5939975197941192e-07, - "loss": 0.9111, - "step": 7284 - }, - { - "epoch": 0.8759694583057777, - "grad_norm": 0.8395009960443565, - "learning_rate": 1.5909514769078892e-07, - "loss": 0.7564, - "step": 7285 - }, - { - "epoch": 0.8760897011964167, - "grad_norm": 1.4094947256589723, - "learning_rate": 1.5879082266596867e-07, - "loss": 0.9762, - "step": 7286 - }, - { - "epoch": 0.8762099440870559, - "grad_norm": 1.6040490177493871, - "learning_rate": 1.5848677695111645e-07, - "loss": 0.9147, - "step": 7287 - }, - { - "epoch": 0.8763301869776949, - "grad_norm": 2.327681326353688, - "learning_rate": 1.5818301059235562e-07, - "loss": 0.9065, - "step": 7288 - }, - { - "epoch": 0.876450429868334, - "grad_norm": 1.9102992840906796, - "learning_rate": 1.578795236357684e-07, - "loss": 1.0182, - "step": 7289 - }, - { - "epoch": 0.8765706727589732, - "grad_norm": 2.0692131146448784, - "learning_rate": 1.5757631612739218e-07, - "loss": 1.0551, - "step": 7290 - }, - { - "epoch": 0.8766909156496122, - "grad_norm": 0.8882592217793119, - "learning_rate": 1.572733881132242e-07, - "loss": 0.8681, - "step": 7291 - }, - { - "epoch": 0.8768111585402513, - "grad_norm": 0.8002374430788204, - "learning_rate": 1.5697073963921814e-07, - "loss": 0.81, - "step": 7292 - }, - { - "epoch": 0.8769314014308904, - "grad_norm": 1.951226311001621, - "learning_rate": 1.566683707512857e-07, - "loss": 1.0477, - "step": 7293 - }, - { - "epoch": 0.8770516443215295, - "grad_norm": 1.767602538893897, - "learning_rate": 1.5636628149529553e-07, - "loss": 0.9943, - "step": 7294 - }, - { - "epoch": 0.8771718872121685, - "grad_norm": 2.0492829580715375, - "learning_rate": 1.560644719170743e-07, - "loss": 0.9897, - "step": 7295 - }, - { - "epoch": 0.8772921301028077, - "grad_norm": 1.746846968915551, - "learning_rate": 1.5576294206240692e-07, - "loss": 0.9163, - "step": 7296 - }, - { - "epoch": 0.8774123729934468, - "grad_norm": 1.7510660194399457, - "learning_rate": 1.5546169197703507e-07, - "loss": 0.8824, - "step": 7297 - }, - { - "epoch": 0.8775326158840858, - "grad_norm": 2.6159347952319596, - "learning_rate": 1.5516072170665774e-07, - "loss": 0.971, - "step": 7298 - }, - { - "epoch": 0.877652858774725, - "grad_norm": 4.623131864449032, - "learning_rate": 1.5486003129693214e-07, - "loss": 1.06, - "step": 7299 - }, - { - "epoch": 0.877773101665364, - "grad_norm": 2.8611354723966236, - "learning_rate": 1.545596207934725e-07, - "loss": 0.9723, - "step": 7300 - }, - { - "epoch": 0.8778933445560031, - "grad_norm": 1.8175751893286052, - "learning_rate": 1.5425949024185147e-07, - "loss": 0.9709, - "step": 7301 - }, - { - "epoch": 0.8780135874466423, - "grad_norm": 1.7260517728605984, - "learning_rate": 1.5395963968759818e-07, - "loss": 0.8759, - "step": 7302 - }, - { - "epoch": 0.8781338303372813, - "grad_norm": 1.5852803339177344, - "learning_rate": 1.536600691761998e-07, - "loss": 0.8433, - "step": 7303 - }, - { - "epoch": 0.8782540732279204, - "grad_norm": 1.6433626131824228, - "learning_rate": 1.5336077875310084e-07, - "loss": 0.9097, - "step": 7304 - }, - { - "epoch": 0.8783743161185595, - "grad_norm": 1.9026843007029515, - "learning_rate": 1.5306176846370321e-07, - "loss": 0.9393, - "step": 7305 - }, - { - "epoch": 0.8784945590091986, - "grad_norm": 2.1905857664940243, - "learning_rate": 1.5276303835336712e-07, - "loss": 0.9418, - "step": 7306 - }, - { - "epoch": 0.8786148018998376, - "grad_norm": 0.7886600412252783, - "learning_rate": 1.524645884674094e-07, - "loss": 0.7517, - "step": 7307 - }, - { - "epoch": 0.8787350447904768, - "grad_norm": 2.261276386332671, - "learning_rate": 1.521664188511047e-07, - "loss": 0.9894, - "step": 7308 - }, - { - "epoch": 0.8788552876811159, - "grad_norm": 1.9194285631174537, - "learning_rate": 1.518685295496851e-07, - "loss": 1.0057, - "step": 7309 - }, - { - "epoch": 0.8789755305717549, - "grad_norm": 1.5355485021892612, - "learning_rate": 1.5157092060833975e-07, - "loss": 1.0498, - "step": 7310 - }, - { - "epoch": 0.879095773462394, - "grad_norm": 2.7262935123180734, - "learning_rate": 1.5127359207221658e-07, - "loss": 0.8566, - "step": 7311 - }, - { - "epoch": 0.8792160163530331, - "grad_norm": 1.8081022381528322, - "learning_rate": 1.5097654398641923e-07, - "loss": 0.9289, - "step": 7312 - }, - { - "epoch": 0.8793362592436722, - "grad_norm": 1.3710117587274624, - "learning_rate": 1.5067977639601014e-07, - "loss": 0.9288, - "step": 7313 - }, - { - "epoch": 0.8794565021343113, - "grad_norm": 2.0218882871306185, - "learning_rate": 1.5038328934600864e-07, - "loss": 0.9079, - "step": 7314 - }, - { - "epoch": 0.8795767450249504, - "grad_norm": 3.380847245519114, - "learning_rate": 1.5008708288139161e-07, - "loss": 0.8974, - "step": 7315 - }, - { - "epoch": 0.8796969879155895, - "grad_norm": 1.890919686042673, - "learning_rate": 1.497911570470931e-07, - "loss": 0.939, - "step": 7316 - }, - { - "epoch": 0.8798172308062285, - "grad_norm": 1.604375536643259, - "learning_rate": 1.494955118880048e-07, - "loss": 1.0489, - "step": 7317 - }, - { - "epoch": 0.8799374736968677, - "grad_norm": 1.6340292453646694, - "learning_rate": 1.4920014744897634e-07, - "loss": 0.9276, - "step": 7318 - }, - { - "epoch": 0.8800577165875068, - "grad_norm": 1.9878589353998313, - "learning_rate": 1.4890506377481392e-07, - "loss": 1.0629, - "step": 7319 - }, - { - "epoch": 0.8801779594781458, - "grad_norm": 1.455139158876204, - "learning_rate": 1.486102609102815e-07, - "loss": 0.8364, - "step": 7320 - }, - { - "epoch": 0.880298202368785, - "grad_norm": 2.60994965398762, - "learning_rate": 1.483157389001004e-07, - "loss": 1.0451, - "step": 7321 - }, - { - "epoch": 0.880418445259424, - "grad_norm": 2.897795490407084, - "learning_rate": 1.4802149778894933e-07, - "loss": 0.989, - "step": 7322 - }, - { - "epoch": 0.8805386881500631, - "grad_norm": 1.6430766361819351, - "learning_rate": 1.4772753762146484e-07, - "loss": 1.0728, - "step": 7323 - }, - { - "epoch": 0.8806589310407023, - "grad_norm": 2.141829103279206, - "learning_rate": 1.474338584422401e-07, - "loss": 0.8973, - "step": 7324 - }, - { - "epoch": 0.8807791739313413, - "grad_norm": 1.6299562431968313, - "learning_rate": 1.4714046029582595e-07, - "loss": 0.9563, - "step": 7325 - }, - { - "epoch": 0.8808994168219804, - "grad_norm": 1.748610421664454, - "learning_rate": 1.46847343226731e-07, - "loss": 0.9594, - "step": 7326 - }, - { - "epoch": 0.8810196597126195, - "grad_norm": 1.6631930233341623, - "learning_rate": 1.465545072794203e-07, - "loss": 0.888, - "step": 7327 - }, - { - "epoch": 0.8811399026032586, - "grad_norm": 1.5359539260063535, - "learning_rate": 1.4626195249831774e-07, - "loss": 0.9543, - "step": 7328 - }, - { - "epoch": 0.8812601454938976, - "grad_norm": 1.6786833844633828, - "learning_rate": 1.4596967892780244e-07, - "loss": 0.9178, - "step": 7329 - }, - { - "epoch": 0.8813803883845368, - "grad_norm": 1.7256043135782302, - "learning_rate": 1.4567768661221314e-07, - "loss": 0.9481, - "step": 7330 - }, - { - "epoch": 0.8815006312751759, - "grad_norm": 1.7777225300363464, - "learning_rate": 1.4538597559584442e-07, - "loss": 0.9401, - "step": 7331 - }, - { - "epoch": 0.8816208741658149, - "grad_norm": 1.835246698806552, - "learning_rate": 1.4509454592294823e-07, - "loss": 0.9704, - "step": 7332 - }, - { - "epoch": 0.8817411170564541, - "grad_norm": 2.0289793968602297, - "learning_rate": 1.448033976377354e-07, - "loss": 0.9909, - "step": 7333 - }, - { - "epoch": 0.8818613599470931, - "grad_norm": 1.858242042363317, - "learning_rate": 1.445125307843713e-07, - "loss": 0.9343, - "step": 7334 - }, - { - "epoch": 0.8819816028377322, - "grad_norm": 1.8175414117138233, - "learning_rate": 1.442219454069813e-07, - "loss": 0.949, - "step": 7335 - }, - { - "epoch": 0.8821018457283714, - "grad_norm": 1.9776053711876873, - "learning_rate": 1.4393164154964676e-07, - "loss": 0.8681, - "step": 7336 - }, - { - "epoch": 0.8822220886190104, - "grad_norm": 2.084505997277424, - "learning_rate": 1.4364161925640649e-07, - "loss": 1.1301, - "step": 7337 - }, - { - "epoch": 0.8823423315096495, - "grad_norm": 1.8833120482713268, - "learning_rate": 1.4335187857125663e-07, - "loss": 1.0494, - "step": 7338 - }, - { - "epoch": 0.8824625744002886, - "grad_norm": 1.6445752498150024, - "learning_rate": 1.4306241953815023e-07, - "loss": 0.9546, - "step": 7339 - }, - { - "epoch": 0.8825828172909277, - "grad_norm": 2.4327552783780613, - "learning_rate": 1.4277324220099862e-07, - "loss": 0.9089, - "step": 7340 - }, - { - "epoch": 0.8827030601815667, - "grad_norm": 3.549618370067424, - "learning_rate": 1.4248434660366938e-07, - "loss": 0.9408, - "step": 7341 - }, - { - "epoch": 0.8828233030722058, - "grad_norm": 1.82355076978217, - "learning_rate": 1.4219573278998808e-07, - "loss": 0.9097, - "step": 7342 - }, - { - "epoch": 0.882943545962845, - "grad_norm": 1.9958138764242002, - "learning_rate": 1.4190740080373685e-07, - "loss": 0.8536, - "step": 7343 - }, - { - "epoch": 0.883063788853484, - "grad_norm": 1.7077961829296842, - "learning_rate": 1.4161935068865538e-07, - "loss": 1.0437, - "step": 7344 - }, - { - "epoch": 0.8831840317441231, - "grad_norm": 2.3722074052088358, - "learning_rate": 1.4133158248844113e-07, - "loss": 0.954, - "step": 7345 - }, - { - "epoch": 0.8833042746347622, - "grad_norm": 1.7567306241016971, - "learning_rate": 1.4104409624674785e-07, - "loss": 0.9282, - "step": 7346 - }, - { - "epoch": 0.8834245175254013, - "grad_norm": 1.6992876411591535, - "learning_rate": 1.407568920071873e-07, - "loss": 0.9816, - "step": 7347 - }, - { - "epoch": 0.8835447604160404, - "grad_norm": 1.8693019753454811, - "learning_rate": 1.4046996981332782e-07, - "loss": 0.8711, - "step": 7348 - }, - { - "epoch": 0.8836650033066795, - "grad_norm": 2.201400081580663, - "learning_rate": 1.4018332970869516e-07, - "loss": 0.9789, - "step": 7349 - }, - { - "epoch": 0.8837852461973186, - "grad_norm": 1.607328576755856, - "learning_rate": 1.398969717367733e-07, - "loss": 1.0419, - "step": 7350 - }, - { - "epoch": 0.8839054890879576, - "grad_norm": 1.5578406481557692, - "learning_rate": 1.396108959410014e-07, - "loss": 0.9633, - "step": 7351 - }, - { - "epoch": 0.8840257319785968, - "grad_norm": 1.5092361923427837, - "learning_rate": 1.3932510236477745e-07, - "loss": 1.0026, - "step": 7352 - }, - { - "epoch": 0.8841459748692359, - "grad_norm": 15.367461054218738, - "learning_rate": 1.3903959105145636e-07, - "loss": 0.7513, - "step": 7353 - }, - { - "epoch": 0.8842662177598749, - "grad_norm": 1.9701258983175562, - "learning_rate": 1.387543620443492e-07, - "loss": 1.0245, - "step": 7354 - }, - { - "epoch": 0.8843864606505141, - "grad_norm": 1.5090463758386494, - "learning_rate": 1.3846941538672606e-07, - "loss": 1.0347, - "step": 7355 - }, - { - "epoch": 0.8845067035411531, - "grad_norm": 2.2536521517162003, - "learning_rate": 1.3818475112181193e-07, - "loss": 1.0091, - "step": 7356 - }, - { - "epoch": 0.8846269464317922, - "grad_norm": 2.0436135436067144, - "learning_rate": 1.3790036929279091e-07, - "loss": 0.9844, - "step": 7357 - }, - { - "epoch": 0.8847471893224313, - "grad_norm": 2.3628169336619185, - "learning_rate": 1.3761626994280363e-07, - "loss": 0.7865, - "step": 7358 - }, - { - "epoch": 0.8848674322130704, - "grad_norm": 1.7681021403631392, - "learning_rate": 1.3733245311494735e-07, - "loss": 0.9309, - "step": 7359 - }, - { - "epoch": 0.8849876751037095, - "grad_norm": 1.9090664178755299, - "learning_rate": 1.3704891885227676e-07, - "loss": 0.9102, - "step": 7360 - }, - { - "epoch": 0.8851079179943486, - "grad_norm": 1.9833032311898613, - "learning_rate": 1.367656671978037e-07, - "loss": 0.9703, - "step": 7361 - }, - { - "epoch": 0.8852281608849877, - "grad_norm": 2.8841275159604702, - "learning_rate": 1.36482698194498e-07, - "loss": 0.9414, - "step": 7362 - }, - { - "epoch": 0.8853484037756267, - "grad_norm": 1.9800931736187435, - "learning_rate": 1.3620001188528506e-07, - "loss": 0.9153, - "step": 7363 - }, - { - "epoch": 0.8854686466662659, - "grad_norm": 2.68758667207284, - "learning_rate": 1.3591760831304865e-07, - "loss": 0.926, - "step": 7364 - }, - { - "epoch": 0.885588889556905, - "grad_norm": 1.572279879108523, - "learning_rate": 1.356354875206287e-07, - "loss": 0.9992, - "step": 7365 - }, - { - "epoch": 0.885709132447544, - "grad_norm": 1.8674241558438136, - "learning_rate": 1.3535364955082296e-07, - "loss": 0.8904, - "step": 7366 - }, - { - "epoch": 0.8858293753381832, - "grad_norm": 1.8111034142864373, - "learning_rate": 1.3507209444638613e-07, - "loss": 0.8352, - "step": 7367 - }, - { - "epoch": 0.8859496182288222, - "grad_norm": 1.6681334081490042, - "learning_rate": 1.347908222500298e-07, - "loss": 0.938, - "step": 7368 - }, - { - "epoch": 0.8860698611194613, - "grad_norm": 1.888032317729529, - "learning_rate": 1.3450983300442276e-07, - "loss": 0.9001, - "step": 7369 - }, - { - "epoch": 0.8861901040101005, - "grad_norm": 1.893041658276622, - "learning_rate": 1.3422912675219068e-07, - "loss": 0.9326, - "step": 7370 - }, - { - "epoch": 0.8863103469007395, - "grad_norm": 1.5515213360660918, - "learning_rate": 1.339487035359166e-07, - "loss": 0.98, - "step": 7371 - }, - { - "epoch": 0.8864305897913786, - "grad_norm": 1.4511212716193946, - "learning_rate": 1.336685633981409e-07, - "loss": 1.0462, - "step": 7372 - }, - { - "epoch": 0.8865508326820177, - "grad_norm": 1.8666194546496515, - "learning_rate": 1.333887063813597e-07, - "loss": 0.9415, - "step": 7373 - }, - { - "epoch": 0.8866710755726568, - "grad_norm": 2.73622112623153, - "learning_rate": 1.331091325280278e-07, - "loss": 0.8631, - "step": 7374 - }, - { - "epoch": 0.8867913184632958, - "grad_norm": 1.4400089593449668, - "learning_rate": 1.3282984188055625e-07, - "loss": 0.982, - "step": 7375 - }, - { - "epoch": 0.8869115613539349, - "grad_norm": 2.462370343353068, - "learning_rate": 1.3255083448131288e-07, - "loss": 0.9879, - "step": 7376 - }, - { - "epoch": 0.8870318042445741, - "grad_norm": 1.910031304826887, - "learning_rate": 1.3227211037262365e-07, - "loss": 0.9833, - "step": 7377 - }, - { - "epoch": 0.8871520471352131, - "grad_norm": 2.448165837585917, - "learning_rate": 1.319936695967696e-07, - "loss": 1.0563, - "step": 7378 - }, - { - "epoch": 0.8872722900258522, - "grad_norm": 1.92191885301134, - "learning_rate": 1.3171551219599097e-07, - "loss": 1.0202, - "step": 7379 - }, - { - "epoch": 0.8873925329164913, - "grad_norm": 3.300561805066668, - "learning_rate": 1.3143763821248377e-07, - "loss": 0.9777, - "step": 7380 - }, - { - "epoch": 0.8875127758071304, - "grad_norm": 1.6496623040464882, - "learning_rate": 1.3116004768840118e-07, - "loss": 0.919, - "step": 7381 - }, - { - "epoch": 0.8876330186977694, - "grad_norm": 1.5634405738362684, - "learning_rate": 1.3088274066585348e-07, - "loss": 0.9323, - "step": 7382 - }, - { - "epoch": 0.8877532615884086, - "grad_norm": 1.8691924116878982, - "learning_rate": 1.3060571718690749e-07, - "loss": 1.1003, - "step": 7383 - }, - { - "epoch": 0.8878735044790477, - "grad_norm": 0.8171000475797452, - "learning_rate": 1.3032897729358805e-07, - "loss": 0.7942, - "step": 7384 - }, - { - "epoch": 0.8879937473696867, - "grad_norm": 1.727581569219846, - "learning_rate": 1.3005252102787645e-07, - "loss": 0.9937, - "step": 7385 - }, - { - "epoch": 0.8881139902603259, - "grad_norm": 1.444487300057732, - "learning_rate": 1.297763484317105e-07, - "loss": 0.9394, - "step": 7386 - }, - { - "epoch": 0.888234233150965, - "grad_norm": 2.1219053735370967, - "learning_rate": 1.2950045954698551e-07, - "loss": 0.9091, - "step": 7387 - }, - { - "epoch": 0.888354476041604, - "grad_norm": 1.482275994602903, - "learning_rate": 1.2922485441555343e-07, - "loss": 0.9547, - "step": 7388 - }, - { - "epoch": 0.8884747189322432, - "grad_norm": 1.7905443875739804, - "learning_rate": 1.2894953307922363e-07, - "loss": 1.0196, - "step": 7389 - }, - { - "epoch": 0.8885949618228822, - "grad_norm": 1.863283873102353, - "learning_rate": 1.2867449557976208e-07, - "loss": 1.0374, - "step": 7390 - }, - { - "epoch": 0.8887152047135213, - "grad_norm": 1.6988024673050581, - "learning_rate": 1.283997419588916e-07, - "loss": 0.9592, - "step": 7391 - }, - { - "epoch": 0.8888354476041604, - "grad_norm": 1.7826515840597559, - "learning_rate": 1.2812527225829216e-07, - "loss": 0.8138, - "step": 7392 - }, - { - "epoch": 0.8889556904947995, - "grad_norm": 1.9505008005523408, - "learning_rate": 1.2785108651960052e-07, - "loss": 0.9569, - "step": 7393 - }, - { - "epoch": 0.8890759333854386, - "grad_norm": 1.8021889436973, - "learning_rate": 1.2757718478441094e-07, - "loss": 0.9948, - "step": 7394 - }, - { - "epoch": 0.8891961762760777, - "grad_norm": 1.8222335043935236, - "learning_rate": 1.2730356709427302e-07, - "loss": 0.9707, - "step": 7395 - }, - { - "epoch": 0.8893164191667168, - "grad_norm": 1.4516128547729976, - "learning_rate": 1.2703023349069542e-07, - "loss": 0.7934, - "step": 7396 - }, - { - "epoch": 0.8894366620573558, - "grad_norm": 1.6289126901476405, - "learning_rate": 1.2675718401514223e-07, - "loss": 0.8096, - "step": 7397 - }, - { - "epoch": 0.889556904947995, - "grad_norm": 1.9685534424526645, - "learning_rate": 1.264844187090346e-07, - "loss": 0.9418, - "step": 7398 - }, - { - "epoch": 0.889677147838634, - "grad_norm": 1.6301079212338625, - "learning_rate": 1.262119376137516e-07, - "loss": 0.9552, - "step": 7399 - }, - { - "epoch": 0.8897973907292731, - "grad_norm": 1.5675046499082963, - "learning_rate": 1.2593974077062707e-07, - "loss": 1.0451, - "step": 7400 - }, - { - "epoch": 0.8899176336199123, - "grad_norm": 1.4520269367462235, - "learning_rate": 1.2566782822095423e-07, - "loss": 0.8255, - "step": 7401 - }, - { - "epoch": 0.8900378765105513, - "grad_norm": 1.7030680235685278, - "learning_rate": 1.2539620000598162e-07, - "loss": 0.9182, - "step": 7402 - }, - { - "epoch": 0.8901581194011904, - "grad_norm": 1.9638663376816288, - "learning_rate": 1.2512485616691492e-07, - "loss": 0.9968, - "step": 7403 - }, - { - "epoch": 0.8902783622918296, - "grad_norm": 1.4256766502994662, - "learning_rate": 1.2485379674491681e-07, - "loss": 1.0037, - "step": 7404 - }, - { - "epoch": 0.8903986051824686, - "grad_norm": 2.328772128497085, - "learning_rate": 1.2458302178110657e-07, - "loss": 0.9878, - "step": 7405 - }, - { - "epoch": 0.8905188480731077, - "grad_norm": 1.822192551434613, - "learning_rate": 1.2431253131656118e-07, - "loss": 1.0229, - "step": 7406 - }, - { - "epoch": 0.8906390909637467, - "grad_norm": 1.6623314105146991, - "learning_rate": 1.240423253923133e-07, - "loss": 0.9639, - "step": 7407 - }, - { - "epoch": 0.8907593338543859, - "grad_norm": 1.8207225419592585, - "learning_rate": 1.237724040493533e-07, - "loss": 0.8909, - "step": 7408 - }, - { - "epoch": 0.8908795767450249, - "grad_norm": 2.141605013641485, - "learning_rate": 1.2350276732862773e-07, - "loss": 0.926, - "step": 7409 - }, - { - "epoch": 0.890999819635664, - "grad_norm": 0.8831346987268001, - "learning_rate": 1.2323341527103993e-07, - "loss": 0.8132, - "step": 7410 - }, - { - "epoch": 0.8911200625263032, - "grad_norm": 1.839451709616346, - "learning_rate": 1.2296434791745135e-07, - "loss": 1.0377, - "step": 7411 - }, - { - "epoch": 0.8912403054169422, - "grad_norm": 1.8059172960779344, - "learning_rate": 1.2269556530867875e-07, - "loss": 0.9687, - "step": 7412 - }, - { - "epoch": 0.8913605483075813, - "grad_norm": 2.002635887764524, - "learning_rate": 1.2242706748549614e-07, - "loss": 1.0224, - "step": 7413 - }, - { - "epoch": 0.8914807911982204, - "grad_norm": 1.7593487617338492, - "learning_rate": 1.2215885448863473e-07, - "loss": 1.0138, - "step": 7414 - }, - { - "epoch": 0.8916010340888595, - "grad_norm": 1.6129204667615786, - "learning_rate": 1.2189092635878152e-07, - "loss": 1.0035, - "step": 7415 - }, - { - "epoch": 0.8917212769794985, - "grad_norm": 1.5753713321426674, - "learning_rate": 1.216232831365822e-07, - "loss": 0.9738, - "step": 7416 - }, - { - "epoch": 0.8918415198701377, - "grad_norm": 1.729307934915467, - "learning_rate": 1.2135592486263678e-07, - "loss": 1.0059, - "step": 7417 - }, - { - "epoch": 0.8919617627607768, - "grad_norm": 1.544056183262226, - "learning_rate": 1.2108885157750415e-07, - "loss": 0.8052, - "step": 7418 - }, - { - "epoch": 0.8920820056514158, - "grad_norm": 1.6102230467214382, - "learning_rate": 1.2082206332169897e-07, - "loss": 0.9984, - "step": 7419 - }, - { - "epoch": 0.892202248542055, - "grad_norm": 2.387512374765971, - "learning_rate": 1.2055556013569225e-07, - "loss": 0.9283, - "step": 7420 - }, - { - "epoch": 0.892322491432694, - "grad_norm": 1.5032984706842716, - "learning_rate": 1.2028934205991315e-07, - "loss": 1.0144, - "step": 7421 - }, - { - "epoch": 0.8924427343233331, - "grad_norm": 1.358173453199154, - "learning_rate": 1.2002340913474607e-07, - "loss": 0.9608, - "step": 7422 - }, - { - "epoch": 0.8925629772139723, - "grad_norm": 1.9558540504159099, - "learning_rate": 1.1975776140053317e-07, - "loss": 0.9431, - "step": 7423 - }, - { - "epoch": 0.8926832201046113, - "grad_norm": 2.0921983805481057, - "learning_rate": 1.194923988975729e-07, - "loss": 0.9356, - "step": 7424 - }, - { - "epoch": 0.8928034629952504, - "grad_norm": 2.045828282284636, - "learning_rate": 1.192273216661206e-07, - "loss": 0.9327, - "step": 7425 - }, - { - "epoch": 0.8929237058858895, - "grad_norm": 0.7894022370726436, - "learning_rate": 1.189625297463881e-07, - "loss": 0.8035, - "step": 7426 - }, - { - "epoch": 0.8930439487765286, - "grad_norm": 1.8144340062443018, - "learning_rate": 1.1869802317854394e-07, - "loss": 0.9911, - "step": 7427 - }, - { - "epoch": 0.8931641916671677, - "grad_norm": 2.7910430458382263, - "learning_rate": 1.1843380200271425e-07, - "loss": 0.9216, - "step": 7428 - }, - { - "epoch": 0.8932844345578068, - "grad_norm": 1.6542286323964883, - "learning_rate": 1.181698662589805e-07, - "loss": 1.0024, - "step": 7429 - }, - { - "epoch": 0.8934046774484459, - "grad_norm": 1.8409171890954068, - "learning_rate": 1.1790621598738249e-07, - "loss": 0.9576, - "step": 7430 - }, - { - "epoch": 0.8935249203390849, - "grad_norm": 1.9630739648325017, - "learning_rate": 1.1764285122791461e-07, - "loss": 0.9525, - "step": 7431 - }, - { - "epoch": 0.8936451632297241, - "grad_norm": 1.8233326101330725, - "learning_rate": 1.173797720205294e-07, - "loss": 0.9641, - "step": 7432 - }, - { - "epoch": 0.8937654061203631, - "grad_norm": 2.4231624011798, - "learning_rate": 1.1711697840513602e-07, - "loss": 0.9209, - "step": 7433 - }, - { - "epoch": 0.8938856490110022, - "grad_norm": 1.9322216984367477, - "learning_rate": 1.1685447042160012e-07, - "loss": 0.9031, - "step": 7434 - }, - { - "epoch": 0.8940058919016414, - "grad_norm": 1.4574735650205684, - "learning_rate": 1.1659224810974367e-07, - "loss": 0.906, - "step": 7435 - }, - { - "epoch": 0.8941261347922804, - "grad_norm": 1.512061422775655, - "learning_rate": 1.1633031150934591e-07, - "loss": 0.8812, - "step": 7436 - }, - { - "epoch": 0.8942463776829195, - "grad_norm": 1.9036145362072956, - "learning_rate": 1.1606866066014176e-07, - "loss": 1.0006, - "step": 7437 - }, - { - "epoch": 0.8943666205735585, - "grad_norm": 2.0701215548000915, - "learning_rate": 1.1580729560182434e-07, - "loss": 0.9449, - "step": 7438 - }, - { - "epoch": 0.8944868634641977, - "grad_norm": 1.5604924847765034, - "learning_rate": 1.1554621637404171e-07, - "loss": 0.9101, - "step": 7439 - }, - { - "epoch": 0.8946071063548368, - "grad_norm": 2.4011761564927463, - "learning_rate": 1.1528542301639999e-07, - "loss": 0.8008, - "step": 7440 - }, - { - "epoch": 0.8947273492454758, - "grad_norm": 2.201214442141926, - "learning_rate": 1.1502491556846105e-07, - "loss": 1.0205, - "step": 7441 - }, - { - "epoch": 0.894847592136115, - "grad_norm": 2.030008024378359, - "learning_rate": 1.1476469406974331e-07, - "loss": 1.0118, - "step": 7442 - }, - { - "epoch": 0.894967835026754, - "grad_norm": 1.6669428437611336, - "learning_rate": 1.1450475855972341e-07, - "loss": 0.9729, - "step": 7443 - }, - { - "epoch": 0.8950880779173931, - "grad_norm": 1.9288019163668846, - "learning_rate": 1.1424510907783158e-07, - "loss": 0.9017, - "step": 7444 - }, - { - "epoch": 0.8952083208080323, - "grad_norm": 1.5789711827021522, - "learning_rate": 1.1398574566345787e-07, - "loss": 1.0211, - "step": 7445 - }, - { - "epoch": 0.8953285636986713, - "grad_norm": 1.9316335278757701, - "learning_rate": 1.1372666835594702e-07, - "loss": 1.0243, - "step": 7446 - }, - { - "epoch": 0.8954488065893104, - "grad_norm": 1.8496698291087217, - "learning_rate": 1.1346787719460071e-07, - "loss": 0.9134, - "step": 7447 - }, - { - "epoch": 0.8955690494799495, - "grad_norm": 1.8752605257236015, - "learning_rate": 1.1320937221867732e-07, - "loss": 0.9235, - "step": 7448 - }, - { - "epoch": 0.8956892923705886, - "grad_norm": 1.625512042434847, - "learning_rate": 1.1295115346739192e-07, - "loss": 0.9904, - "step": 7449 - }, - { - "epoch": 0.8958095352612276, - "grad_norm": 1.835433760991706, - "learning_rate": 1.1269322097991629e-07, - "loss": 0.9362, - "step": 7450 - }, - { - "epoch": 0.8959297781518668, - "grad_norm": 2.2470300994948618, - "learning_rate": 1.1243557479537846e-07, - "loss": 0.8803, - "step": 7451 - }, - { - "epoch": 0.8960500210425059, - "grad_norm": 1.8424826970316666, - "learning_rate": 1.121782149528634e-07, - "loss": 0.8844, - "step": 7452 - }, - { - "epoch": 0.8961702639331449, - "grad_norm": 2.148662097635283, - "learning_rate": 1.1192114149141208e-07, - "loss": 0.9904, - "step": 7453 - }, - { - "epoch": 0.8962905068237841, - "grad_norm": 1.9113326555420962, - "learning_rate": 1.1166435445002197e-07, - "loss": 0.8505, - "step": 7454 - }, - { - "epoch": 0.8964107497144231, - "grad_norm": 1.9691920919414134, - "learning_rate": 1.1140785386764818e-07, - "loss": 0.8826, - "step": 7455 - }, - { - "epoch": 0.8965309926050622, - "grad_norm": 1.8939211148044612, - "learning_rate": 1.1115163978320153e-07, - "loss": 0.8923, - "step": 7456 - }, - { - "epoch": 0.8966512354957014, - "grad_norm": 1.8259564440944314, - "learning_rate": 1.1089571223554917e-07, - "loss": 1.0257, - "step": 7457 - }, - { - "epoch": 0.8967714783863404, - "grad_norm": 1.557177825475102, - "learning_rate": 1.1064007126351537e-07, - "loss": 1.0424, - "step": 7458 - }, - { - "epoch": 0.8968917212769795, - "grad_norm": 2.048858613259213, - "learning_rate": 1.1038471690588003e-07, - "loss": 0.9638, - "step": 7459 - }, - { - "epoch": 0.8970119641676186, - "grad_norm": 1.7897432189974933, - "learning_rate": 1.1012964920138145e-07, - "loss": 0.9993, - "step": 7460 - }, - { - "epoch": 0.8971322070582577, - "grad_norm": 1.4528242230614183, - "learning_rate": 1.0987486818871205e-07, - "loss": 0.9524, - "step": 7461 - }, - { - "epoch": 0.8972524499488967, - "grad_norm": 2.055214469837347, - "learning_rate": 1.0962037390652245e-07, - "loss": 0.9188, - "step": 7462 - }, - { - "epoch": 0.8973726928395359, - "grad_norm": 1.687039913046781, - "learning_rate": 1.0936616639341911e-07, - "loss": 0.9177, - "step": 7463 - }, - { - "epoch": 0.897492935730175, - "grad_norm": 0.7724821181835607, - "learning_rate": 1.0911224568796473e-07, - "loss": 0.7603, - "step": 7464 - }, - { - "epoch": 0.897613178620814, - "grad_norm": 1.7815631122785451, - "learning_rate": 1.0885861182867984e-07, - "loss": 0.9028, - "step": 7465 - }, - { - "epoch": 0.8977334215114532, - "grad_norm": 1.7301668545296258, - "learning_rate": 1.0860526485403942e-07, - "loss": 0.9052, - "step": 7466 - }, - { - "epoch": 0.8978536644020922, - "grad_norm": 1.5040892650664748, - "learning_rate": 1.0835220480247675e-07, - "loss": 0.9745, - "step": 7467 - }, - { - "epoch": 0.8979739072927313, - "grad_norm": 1.9390469190046853, - "learning_rate": 1.0809943171238067e-07, - "loss": 1.0373, - "step": 7468 - }, - { - "epoch": 0.8980941501833704, - "grad_norm": 2.1770755335715233, - "learning_rate": 1.078469456220965e-07, - "loss": 0.8412, - "step": 7469 - }, - { - "epoch": 0.8982143930740095, - "grad_norm": 1.7569250282484634, - "learning_rate": 1.0759474656992606e-07, - "loss": 0.8918, - "step": 7470 - }, - { - "epoch": 0.8983346359646486, - "grad_norm": 2.2473231392629867, - "learning_rate": 1.0734283459412785e-07, - "loss": 0.9707, - "step": 7471 - }, - { - "epoch": 0.8984548788552876, - "grad_norm": 1.5771944682959562, - "learning_rate": 1.0709120973291707e-07, - "loss": 0.9973, - "step": 7472 - }, - { - "epoch": 0.8985751217459268, - "grad_norm": 2.1120563560393992, - "learning_rate": 1.0683987202446475e-07, - "loss": 0.9801, - "step": 7473 - }, - { - "epoch": 0.8986953646365659, - "grad_norm": 7.481405732359242, - "learning_rate": 1.0658882150689862e-07, - "loss": 0.8984, - "step": 7474 - }, - { - "epoch": 0.8988156075272049, - "grad_norm": 2.347843613344157, - "learning_rate": 1.0633805821830288e-07, - "loss": 0.9757, - "step": 7475 - }, - { - "epoch": 0.8989358504178441, - "grad_norm": 2.2436605791640205, - "learning_rate": 1.0608758219671753e-07, - "loss": 1.0298, - "step": 7476 - }, - { - "epoch": 0.8990560933084831, - "grad_norm": 1.5628010269106989, - "learning_rate": 1.0583739348014065e-07, - "loss": 0.905, - "step": 7477 - }, - { - "epoch": 0.8991763361991222, - "grad_norm": 1.6108146080406183, - "learning_rate": 1.0558749210652518e-07, - "loss": 1.0439, - "step": 7478 - }, - { - "epoch": 0.8992965790897613, - "grad_norm": 1.6093063339843576, - "learning_rate": 1.053378781137808e-07, - "loss": 1.0524, - "step": 7479 - }, - { - "epoch": 0.8994168219804004, - "grad_norm": 1.8589167310668224, - "learning_rate": 1.0508855153977392e-07, - "loss": 0.9764, - "step": 7480 - }, - { - "epoch": 0.8995370648710395, - "grad_norm": 2.0724540718244584, - "learning_rate": 1.0483951242232669e-07, - "loss": 0.8591, - "step": 7481 - }, - { - "epoch": 0.8996573077616786, - "grad_norm": 1.1149780192582073, - "learning_rate": 1.0459076079921936e-07, - "loss": 0.8038, - "step": 7482 - }, - { - "epoch": 0.8997775506523177, - "grad_norm": 1.947722080187777, - "learning_rate": 1.0434229670818618e-07, - "loss": 1.0392, - "step": 7483 - }, - { - "epoch": 0.8998977935429567, - "grad_norm": 1.3503291947282474, - "learning_rate": 1.0409412018691944e-07, - "loss": 0.9925, - "step": 7484 - }, - { - "epoch": 0.9000180364335959, - "grad_norm": 1.9761294165636776, - "learning_rate": 1.0384623127306724e-07, - "loss": 0.9534, - "step": 7485 - }, - { - "epoch": 0.900138279324235, - "grad_norm": 1.6200603086525096, - "learning_rate": 1.0359863000423397e-07, - "loss": 0.9866, - "step": 7486 - }, - { - "epoch": 0.900258522214874, - "grad_norm": 1.6005951728112149, - "learning_rate": 1.0335131641798112e-07, - "loss": 0.9139, - "step": 7487 - }, - { - "epoch": 0.9003787651055132, - "grad_norm": 0.8677132190018791, - "learning_rate": 1.0310429055182512e-07, - "loss": 0.8215, - "step": 7488 - }, - { - "epoch": 0.9004990079961522, - "grad_norm": 1.546901047612528, - "learning_rate": 1.0285755244324024e-07, - "loss": 0.9302, - "step": 7489 - }, - { - "epoch": 0.9006192508867913, - "grad_norm": 1.3736916733248847, - "learning_rate": 1.0261110212965629e-07, - "loss": 0.8828, - "step": 7490 - }, - { - "epoch": 0.9007394937774305, - "grad_norm": 1.8242302512351571, - "learning_rate": 1.023649396484596e-07, - "loss": 0.9853, - "step": 7491 - }, - { - "epoch": 0.9008597366680695, - "grad_norm": 1.9249655212683419, - "learning_rate": 1.0211906503699275e-07, - "loss": 0.8727, - "step": 7492 - }, - { - "epoch": 0.9009799795587086, - "grad_norm": 2.249091388770746, - "learning_rate": 1.0187347833255455e-07, - "loss": 1.0174, - "step": 7493 - }, - { - "epoch": 0.9011002224493477, - "grad_norm": 1.685578064171354, - "learning_rate": 1.0162817957240056e-07, - "loss": 0.9963, - "step": 7494 - }, - { - "epoch": 0.9012204653399868, - "grad_norm": 1.0039522628035977, - "learning_rate": 1.0138316879374253e-07, - "loss": 0.8788, - "step": 7495 - }, - { - "epoch": 0.9013407082306258, - "grad_norm": 2.227510163597613, - "learning_rate": 1.0113844603374833e-07, - "loss": 0.9358, - "step": 7496 - }, - { - "epoch": 0.901460951121265, - "grad_norm": 1.9890879135725832, - "learning_rate": 1.0089401132954178e-07, - "loss": 0.9137, - "step": 7497 - }, - { - "epoch": 0.9015811940119041, - "grad_norm": 1.5120601613524087, - "learning_rate": 1.006498647182037e-07, - "loss": 0.9247, - "step": 7498 - }, - { - "epoch": 0.9017014369025431, - "grad_norm": 1.861338692441966, - "learning_rate": 1.004060062367713e-07, - "loss": 0.9176, - "step": 7499 - }, - { - "epoch": 0.9018216797931822, - "grad_norm": 1.7324901339427548, - "learning_rate": 1.0016243592223728e-07, - "loss": 0.893, - "step": 7500 - }, - { - "epoch": 0.9019419226838213, - "grad_norm": 1.6530481014141691, - "learning_rate": 9.991915381155114e-08, - "loss": 0.8416, - "step": 7501 - }, - { - "epoch": 0.9020621655744604, - "grad_norm": 1.9100288083365489, - "learning_rate": 9.967615994161871e-08, - "loss": 0.952, - "step": 7502 - }, - { - "epoch": 0.9021824084650995, - "grad_norm": 1.632258490067246, - "learning_rate": 9.943345434930161e-08, - "loss": 0.9682, - "step": 7503 - }, - { - "epoch": 0.9023026513557386, - "grad_norm": 2.7896787673165693, - "learning_rate": 9.919103707141885e-08, - "loss": 0.8849, - "step": 7504 - }, - { - "epoch": 0.9024228942463777, - "grad_norm": 1.7969349726744448, - "learning_rate": 9.89489081447441e-08, - "loss": 0.966, - "step": 7505 - }, - { - "epoch": 0.9025431371370167, - "grad_norm": 1.70302511499761, - "learning_rate": 9.870706760600844e-08, - "loss": 1.0297, - "step": 7506 - }, - { - "epoch": 0.9026633800276559, - "grad_norm": 1.8648947841020078, - "learning_rate": 9.846551549189918e-08, - "loss": 0.9282, - "step": 7507 - }, - { - "epoch": 0.902783622918295, - "grad_norm": 3.559809957425171, - "learning_rate": 9.822425183905902e-08, - "loss": 0.8871, - "step": 7508 - }, - { - "epoch": 0.902903865808934, - "grad_norm": 0.9655995362513771, - "learning_rate": 9.798327668408823e-08, - "loss": 0.9669, - "step": 7509 - }, - { - "epoch": 0.9030241086995732, - "grad_norm": 12.869163597689695, - "learning_rate": 9.774259006354158e-08, - "loss": 0.8878, - "step": 7510 - }, - { - "epoch": 0.9031443515902122, - "grad_norm": 1.6514929346250486, - "learning_rate": 9.750219201393184e-08, - "loss": 0.9565, - "step": 7511 - }, - { - "epoch": 0.9032645944808513, - "grad_norm": 1.6389562203249126, - "learning_rate": 9.726208257172697e-08, - "loss": 0.9773, - "step": 7512 - }, - { - "epoch": 0.9033848373714904, - "grad_norm": 1.8349072101841994, - "learning_rate": 9.702226177335115e-08, - "loss": 0.9478, - "step": 7513 - }, - { - "epoch": 0.9035050802621295, - "grad_norm": 1.4561382758378358, - "learning_rate": 9.67827296551853e-08, - "loss": 0.9237, - "step": 7514 - }, - { - "epoch": 0.9036253231527686, - "grad_norm": 1.8183024691518543, - "learning_rate": 9.65434862535659e-08, - "loss": 0.8759, - "step": 7515 - }, - { - "epoch": 0.9037455660434077, - "grad_norm": 2.2969371761288473, - "learning_rate": 9.630453160478635e-08, - "loss": 0.8515, - "step": 7516 - }, - { - "epoch": 0.9038658089340468, - "grad_norm": 1.4912218894131637, - "learning_rate": 9.60658657450959e-08, - "loss": 1.0149, - "step": 7517 - }, - { - "epoch": 0.9039860518246858, - "grad_norm": 1.6424842109095472, - "learning_rate": 9.582748871069979e-08, - "loss": 0.9908, - "step": 7518 - }, - { - "epoch": 0.904106294715325, - "grad_norm": 1.8883589097681697, - "learning_rate": 9.558940053775954e-08, - "loss": 1.0225, - "step": 7519 - }, - { - "epoch": 0.904226537605964, - "grad_norm": 1.802814386809511, - "learning_rate": 9.535160126239294e-08, - "loss": 0.8832, - "step": 7520 - }, - { - "epoch": 0.9043467804966031, - "grad_norm": 1.4327247750852878, - "learning_rate": 9.511409092067424e-08, - "loss": 0.9065, - "step": 7521 - }, - { - "epoch": 0.9044670233872423, - "grad_norm": 1.8586548765119602, - "learning_rate": 9.487686954863327e-08, - "loss": 0.8713, - "step": 7522 - }, - { - "epoch": 0.9045872662778813, - "grad_norm": 3.1222372431033616, - "learning_rate": 9.46399371822566e-08, - "loss": 0.9679, - "step": 7523 - }, - { - "epoch": 0.9047075091685204, - "grad_norm": 1.850040115746306, - "learning_rate": 9.440329385748657e-08, - "loss": 0.9107, - "step": 7524 - }, - { - "epoch": 0.9048277520591596, - "grad_norm": 1.6729512538477787, - "learning_rate": 9.416693961022137e-08, - "loss": 0.9075, - "step": 7525 - }, - { - "epoch": 0.9049479949497986, - "grad_norm": 1.538832284197716, - "learning_rate": 9.393087447631654e-08, - "loss": 0.9727, - "step": 7526 - }, - { - "epoch": 0.9050682378404377, - "grad_norm": 1.4291540933098559, - "learning_rate": 9.36950984915823e-08, - "loss": 0.9186, - "step": 7527 - }, - { - "epoch": 0.9051884807310768, - "grad_norm": 3.273508002858003, - "learning_rate": 9.345961169178607e-08, - "loss": 0.8946, - "step": 7528 - }, - { - "epoch": 0.9053087236217159, - "grad_norm": 1.3367575643713152, - "learning_rate": 9.322441411265081e-08, - "loss": 0.9244, - "step": 7529 - }, - { - "epoch": 0.9054289665123549, - "grad_norm": 1.7034894220881978, - "learning_rate": 9.298950578985554e-08, - "loss": 0.9328, - "step": 7530 - }, - { - "epoch": 0.905549209402994, - "grad_norm": 1.6542008877664776, - "learning_rate": 9.275488675903665e-08, - "loss": 0.9112, - "step": 7531 - }, - { - "epoch": 0.9056694522936332, - "grad_norm": 1.7725732520802853, - "learning_rate": 9.252055705578454e-08, - "loss": 0.9318, - "step": 7532 - }, - { - "epoch": 0.9057896951842722, - "grad_norm": 1.5804273221841543, - "learning_rate": 9.228651671564747e-08, - "loss": 0.9128, - "step": 7533 - }, - { - "epoch": 0.9059099380749113, - "grad_norm": 1.4794459370349196, - "learning_rate": 9.205276577412901e-08, - "loss": 0.9797, - "step": 7534 - }, - { - "epoch": 0.9060301809655504, - "grad_norm": 2.16257742594328, - "learning_rate": 9.181930426668905e-08, - "loss": 0.9701, - "step": 7535 - }, - { - "epoch": 0.9061504238561895, - "grad_norm": 2.430099010883662, - "learning_rate": 9.158613222874346e-08, - "loss": 0.8795, - "step": 7536 - }, - { - "epoch": 0.9062706667468285, - "grad_norm": 1.6730354061085417, - "learning_rate": 9.135324969566394e-08, - "loss": 1.0178, - "step": 7537 - }, - { - "epoch": 0.9063909096374677, - "grad_norm": 2.101082218694827, - "learning_rate": 9.112065670277913e-08, - "loss": 0.9472, - "step": 7538 - }, - { - "epoch": 0.9065111525281068, - "grad_norm": 1.7351598939681316, - "learning_rate": 9.088835328537303e-08, - "loss": 0.9211, - "step": 7539 - }, - { - "epoch": 0.9066313954187458, - "grad_norm": 2.6300720078796473, - "learning_rate": 9.065633947868568e-08, - "loss": 0.9124, - "step": 7540 - }, - { - "epoch": 0.906751638309385, - "grad_norm": 1.9565907361354544, - "learning_rate": 9.042461531791379e-08, - "loss": 0.9925, - "step": 7541 - }, - { - "epoch": 0.906871881200024, - "grad_norm": 1.5472579636127697, - "learning_rate": 9.019318083820903e-08, - "loss": 0.9711, - "step": 7542 - }, - { - "epoch": 0.9069921240906631, - "grad_norm": 1.5277909894815107, - "learning_rate": 8.996203607468045e-08, - "loss": 1.0468, - "step": 7543 - }, - { - "epoch": 0.9071123669813023, - "grad_norm": 1.3662373320052439, - "learning_rate": 8.973118106239241e-08, - "loss": 0.9536, - "step": 7544 - }, - { - "epoch": 0.9072326098719413, - "grad_norm": 1.9730769851250982, - "learning_rate": 8.95006158363656e-08, - "loss": 1.1424, - "step": 7545 - }, - { - "epoch": 0.9073528527625804, - "grad_norm": 1.805517294877748, - "learning_rate": 8.9270340431576e-08, - "loss": 0.9738, - "step": 7546 - }, - { - "epoch": 0.9074730956532195, - "grad_norm": 1.870126239167063, - "learning_rate": 8.904035488295658e-08, - "loss": 0.9362, - "step": 7547 - }, - { - "epoch": 0.9075933385438586, - "grad_norm": 0.735470502538074, - "learning_rate": 8.881065922539632e-08, - "loss": 0.7593, - "step": 7548 - }, - { - "epoch": 0.9077135814344977, - "grad_norm": 1.6304186196839103, - "learning_rate": 8.85812534937389e-08, - "loss": 0.9378, - "step": 7549 - }, - { - "epoch": 0.9078338243251368, - "grad_norm": 3.7398606873131324, - "learning_rate": 8.835213772278583e-08, - "loss": 0.869, - "step": 7550 - }, - { - "epoch": 0.9079540672157759, - "grad_norm": 1.567843037475733, - "learning_rate": 8.812331194729373e-08, - "loss": 0.9796, - "step": 7551 - }, - { - "epoch": 0.9080743101064149, - "grad_norm": 1.7129440038317252, - "learning_rate": 8.789477620197461e-08, - "loss": 0.9226, - "step": 7552 - }, - { - "epoch": 0.9081945529970541, - "grad_norm": 2.061185475687607, - "learning_rate": 8.766653052149831e-08, - "loss": 0.9931, - "step": 7553 - }, - { - "epoch": 0.9083147958876931, - "grad_norm": 1.814330327833916, - "learning_rate": 8.743857494048823e-08, - "loss": 0.939, - "step": 7554 - }, - { - "epoch": 0.9084350387783322, - "grad_norm": 1.8079829657393585, - "learning_rate": 8.721090949352605e-08, - "loss": 0.8361, - "step": 7555 - }, - { - "epoch": 0.9085552816689714, - "grad_norm": 1.7018213444775239, - "learning_rate": 8.698353421514793e-08, - "loss": 0.9275, - "step": 7556 - }, - { - "epoch": 0.9086755245596104, - "grad_norm": 2.087881489690024, - "learning_rate": 8.67564491398467e-08, - "loss": 0.9954, - "step": 7557 - }, - { - "epoch": 0.9087957674502495, - "grad_norm": 1.9783091310254217, - "learning_rate": 8.652965430207104e-08, - "loss": 0.9354, - "step": 7558 - }, - { - "epoch": 0.9089160103408886, - "grad_norm": 2.3635600784635953, - "learning_rate": 8.630314973622521e-08, - "loss": 0.8556, - "step": 7559 - }, - { - "epoch": 0.9090362532315277, - "grad_norm": 1.7809963380325085, - "learning_rate": 8.607693547666995e-08, - "loss": 0.9066, - "step": 7560 - }, - { - "epoch": 0.9091564961221668, - "grad_norm": 0.9423014709470892, - "learning_rate": 8.585101155772201e-08, - "loss": 0.8213, - "step": 7561 - }, - { - "epoch": 0.9092767390128058, - "grad_norm": 1.598381940933256, - "learning_rate": 8.562537801365377e-08, - "loss": 0.8889, - "step": 7562 - }, - { - "epoch": 0.909396981903445, - "grad_norm": 1.5658171728063899, - "learning_rate": 8.540003487869362e-08, - "loss": 0.8961, - "step": 7563 - }, - { - "epoch": 0.909517224794084, - "grad_norm": 1.733546291009418, - "learning_rate": 8.517498218702557e-08, - "loss": 0.9957, - "step": 7564 - }, - { - "epoch": 0.9096374676847231, - "grad_norm": 1.608466549365151, - "learning_rate": 8.49502199727905e-08, - "loss": 0.897, - "step": 7565 - }, - { - "epoch": 0.9097577105753623, - "grad_norm": 2.4285254934919425, - "learning_rate": 8.472574827008428e-08, - "loss": 0.8551, - "step": 7566 - }, - { - "epoch": 0.9098779534660013, - "grad_norm": 1.5001061719830633, - "learning_rate": 8.450156711295942e-08, - "loss": 1.0337, - "step": 7567 - }, - { - "epoch": 0.9099981963566404, - "grad_norm": 2.0195504931007173, - "learning_rate": 8.427767653542383e-08, - "loss": 1.0609, - "step": 7568 - }, - { - "epoch": 0.9101184392472795, - "grad_norm": 1.6776925238295455, - "learning_rate": 8.405407657144125e-08, - "loss": 0.8985, - "step": 7569 - }, - { - "epoch": 0.9102386821379186, - "grad_norm": 1.6695364881048649, - "learning_rate": 8.383076725493232e-08, - "loss": 0.9177, - "step": 7570 - }, - { - "epoch": 0.9103589250285576, - "grad_norm": 1.9293174852226915, - "learning_rate": 8.360774861977216e-08, - "loss": 0.8803, - "step": 7571 - }, - { - "epoch": 0.9104791679191968, - "grad_norm": 1.7344602271782095, - "learning_rate": 8.338502069979281e-08, - "loss": 0.9514, - "step": 7572 - }, - { - "epoch": 0.9105994108098359, - "grad_norm": 2.4398588846392584, - "learning_rate": 8.316258352878214e-08, - "loss": 0.9918, - "step": 7573 - }, - { - "epoch": 0.9107196537004749, - "grad_norm": 1.8124746781257512, - "learning_rate": 8.294043714048338e-08, - "loss": 0.9091, - "step": 7574 - }, - { - "epoch": 0.9108398965911141, - "grad_norm": 0.7997322468911262, - "learning_rate": 8.271858156859624e-08, - "loss": 0.8395, - "step": 7575 - }, - { - "epoch": 0.9109601394817531, - "grad_norm": 1.511706606005405, - "learning_rate": 8.249701684677557e-08, - "loss": 0.9345, - "step": 7576 - }, - { - "epoch": 0.9110803823723922, - "grad_norm": 1.598686128821326, - "learning_rate": 8.227574300863294e-08, - "loss": 1.0053, - "step": 7577 - }, - { - "epoch": 0.9112006252630314, - "grad_norm": 1.587171854061094, - "learning_rate": 8.205476008773548e-08, - "loss": 0.898, - "step": 7578 - }, - { - "epoch": 0.9113208681536704, - "grad_norm": 2.524971416047213, - "learning_rate": 8.183406811760596e-08, - "loss": 1.0211, - "step": 7579 - }, - { - "epoch": 0.9114411110443095, - "grad_norm": 1.4575996824172057, - "learning_rate": 8.161366713172313e-08, - "loss": 0.9444, - "step": 7580 - }, - { - "epoch": 0.9115613539349486, - "grad_norm": 2.4263463245199004, - "learning_rate": 8.139355716352137e-08, - "loss": 1.0516, - "step": 7581 - }, - { - "epoch": 0.9116815968255877, - "grad_norm": 1.7717936286878027, - "learning_rate": 8.117373824639196e-08, - "loss": 0.9035, - "step": 7582 - }, - { - "epoch": 0.9118018397162267, - "grad_norm": 1.1783452260731517, - "learning_rate": 8.095421041368067e-08, - "loss": 0.8051, - "step": 7583 - }, - { - "epoch": 0.9119220826068659, - "grad_norm": 1.9581643971833633, - "learning_rate": 8.073497369868999e-08, - "loss": 0.9118, - "step": 7584 - }, - { - "epoch": 0.912042325497505, - "grad_norm": 12.015556424544235, - "learning_rate": 8.051602813467772e-08, - "loss": 0.9502, - "step": 7585 - }, - { - "epoch": 0.912162568388144, - "grad_norm": 1.496502613742125, - "learning_rate": 8.029737375485756e-08, - "loss": 0.9136, - "step": 7586 - }, - { - "epoch": 0.9122828112787832, - "grad_norm": 1.783458144137125, - "learning_rate": 8.007901059239986e-08, - "loss": 0.926, - "step": 7587 - }, - { - "epoch": 0.9124030541694222, - "grad_norm": 1.51135217919117, - "learning_rate": 7.986093868042964e-08, - "loss": 0.9998, - "step": 7588 - }, - { - "epoch": 0.9125232970600613, - "grad_norm": 2.059832958189404, - "learning_rate": 7.964315805202826e-08, - "loss": 0.8752, - "step": 7589 - }, - { - "epoch": 0.9126435399507005, - "grad_norm": 1.829506311477154, - "learning_rate": 7.942566874023304e-08, - "loss": 0.9319, - "step": 7590 - }, - { - "epoch": 0.9127637828413395, - "grad_norm": 2.124213690294004, - "learning_rate": 7.920847077803649e-08, - "loss": 0.8974, - "step": 7591 - }, - { - "epoch": 0.9128840257319786, - "grad_norm": 2.020999337945119, - "learning_rate": 7.899156419838826e-08, - "loss": 1.019, - "step": 7592 - }, - { - "epoch": 0.9130042686226177, - "grad_norm": 1.8263148940278144, - "learning_rate": 7.87749490341918e-08, - "loss": 0.859, - "step": 7593 - }, - { - "epoch": 0.9131245115132568, - "grad_norm": 2.351337612209135, - "learning_rate": 7.855862531830836e-08, - "loss": 1.0398, - "step": 7594 - }, - { - "epoch": 0.9132447544038959, - "grad_norm": 1.5178578240529303, - "learning_rate": 7.834259308355373e-08, - "loss": 0.9229, - "step": 7595 - }, - { - "epoch": 0.9133649972945349, - "grad_norm": 1.876568074976432, - "learning_rate": 7.812685236269989e-08, - "loss": 0.9411, - "step": 7596 - }, - { - "epoch": 0.9134852401851741, - "grad_norm": 0.8732847022088848, - "learning_rate": 7.791140318847445e-08, - "loss": 0.8115, - "step": 7597 - }, - { - "epoch": 0.9136054830758131, - "grad_norm": 1.3114559471506595, - "learning_rate": 7.769624559356081e-08, - "loss": 0.9923, - "step": 7598 - }, - { - "epoch": 0.9137257259664522, - "grad_norm": 2.570998080110074, - "learning_rate": 7.748137961059842e-08, - "loss": 0.9553, - "step": 7599 - }, - { - "epoch": 0.9138459688570914, - "grad_norm": 2.1769133384322936, - "learning_rate": 7.726680527218211e-08, - "loss": 0.865, - "step": 7600 - }, - { - "epoch": 0.9139662117477304, - "grad_norm": 2.4509862821919595, - "learning_rate": 7.70525226108627e-08, - "loss": 0.9514, - "step": 7601 - }, - { - "epoch": 0.9140864546383695, - "grad_norm": 2.9462880914400125, - "learning_rate": 7.683853165914666e-08, - "loss": 1.002, - "step": 7602 - }, - { - "epoch": 0.9142066975290086, - "grad_norm": 1.5984785207069563, - "learning_rate": 7.662483244949602e-08, - "loss": 0.9705, - "step": 7603 - }, - { - "epoch": 0.9143269404196477, - "grad_norm": 2.288879803862746, - "learning_rate": 7.641142501432951e-08, - "loss": 1.0038, - "step": 7604 - }, - { - "epoch": 0.9144471833102867, - "grad_norm": 1.4997114062843164, - "learning_rate": 7.619830938602013e-08, - "loss": 0.9379, - "step": 7605 - }, - { - "epoch": 0.9145674262009259, - "grad_norm": 1.902568639351668, - "learning_rate": 7.598548559689777e-08, - "loss": 1.0079, - "step": 7606 - }, - { - "epoch": 0.914687669091565, - "grad_norm": 2.1502633399261293, - "learning_rate": 7.577295367924751e-08, - "loss": 1.0042, - "step": 7607 - }, - { - "epoch": 0.914807911982204, - "grad_norm": 1.718930113631711, - "learning_rate": 7.556071366531002e-08, - "loss": 1.0227, - "step": 7608 - }, - { - "epoch": 0.9149281548728432, - "grad_norm": 2.372043677000262, - "learning_rate": 7.53487655872822e-08, - "loss": 0.9794, - "step": 7609 - }, - { - "epoch": 0.9150483977634822, - "grad_norm": 1.6464264521246794, - "learning_rate": 7.513710947731656e-08, - "loss": 0.9407, - "step": 7610 - }, - { - "epoch": 0.9151686406541213, - "grad_norm": 1.912901981967384, - "learning_rate": 7.492574536752095e-08, - "loss": 1.0468, - "step": 7611 - }, - { - "epoch": 0.9152888835447605, - "grad_norm": 1.6575495102336257, - "learning_rate": 7.471467328995907e-08, - "loss": 0.9804, - "step": 7612 - }, - { - "epoch": 0.9154091264353995, - "grad_norm": 2.0829363889829464, - "learning_rate": 7.450389327665018e-08, - "loss": 0.8102, - "step": 7613 - }, - { - "epoch": 0.9155293693260386, - "grad_norm": 2.1096390170421486, - "learning_rate": 7.429340535957029e-08, - "loss": 0.8761, - "step": 7614 - }, - { - "epoch": 0.9156496122166777, - "grad_norm": 3.2858060178331607, - "learning_rate": 7.40832095706494e-08, - "loss": 0.9144, - "step": 7615 - }, - { - "epoch": 0.9157698551073168, - "grad_norm": 1.6314706353535746, - "learning_rate": 7.387330594177443e-08, - "loss": 0.9955, - "step": 7616 - }, - { - "epoch": 0.9158900979979558, - "grad_norm": 1.6264559019192808, - "learning_rate": 7.366369450478749e-08, - "loss": 0.9856, - "step": 7617 - }, - { - "epoch": 0.916010340888595, - "grad_norm": 1.662066341430278, - "learning_rate": 7.345437529148646e-08, - "loss": 0.856, - "step": 7618 - }, - { - "epoch": 0.9161305837792341, - "grad_norm": 1.8576783846482106, - "learning_rate": 7.324534833362483e-08, - "loss": 0.9337, - "step": 7619 - }, - { - "epoch": 0.9162508266698731, - "grad_norm": 1.7493130834374668, - "learning_rate": 7.303661366291192e-08, - "loss": 0.8752, - "step": 7620 - }, - { - "epoch": 0.9163710695605123, - "grad_norm": 1.6121960700987248, - "learning_rate": 7.28281713110126e-08, - "loss": 1.013, - "step": 7621 - }, - { - "epoch": 0.9164913124511513, - "grad_norm": 1.795196478990002, - "learning_rate": 7.262002130954759e-08, - "loss": 0.9711, - "step": 7622 - }, - { - "epoch": 0.9166115553417904, - "grad_norm": 1.8731438668350642, - "learning_rate": 7.241216369009296e-08, - "loss": 0.9891, - "step": 7623 - }, - { - "epoch": 0.9167317982324296, - "grad_norm": 2.2824181479143366, - "learning_rate": 7.220459848418037e-08, - "loss": 0.8655, - "step": 7624 - }, - { - "epoch": 0.9168520411230686, - "grad_norm": 1.621197727040077, - "learning_rate": 7.199732572329708e-08, - "loss": 0.9912, - "step": 7625 - }, - { - "epoch": 0.9169722840137077, - "grad_norm": 1.856464704947043, - "learning_rate": 7.179034543888684e-08, - "loss": 0.9627, - "step": 7626 - }, - { - "epoch": 0.9170925269043467, - "grad_norm": 1.8952580581733252, - "learning_rate": 7.158365766234808e-08, - "loss": 0.9771, - "step": 7627 - }, - { - "epoch": 0.9172127697949859, - "grad_norm": 1.723464464822274, - "learning_rate": 7.137726242503527e-08, - "loss": 0.9187, - "step": 7628 - }, - { - "epoch": 0.917333012685625, - "grad_norm": 2.477887686566433, - "learning_rate": 7.11711597582585e-08, - "loss": 0.9822, - "step": 7629 - }, - { - "epoch": 0.917453255576264, - "grad_norm": 1.7287134089703833, - "learning_rate": 7.096534969328271e-08, - "loss": 0.9983, - "step": 7630 - }, - { - "epoch": 0.9175734984669032, - "grad_norm": 1.9407427323605901, - "learning_rate": 7.075983226132987e-08, - "loss": 1.0421, - "step": 7631 - }, - { - "epoch": 0.9176937413575422, - "grad_norm": 2.5444412304535415, - "learning_rate": 7.055460749357656e-08, - "loss": 0.9839, - "step": 7632 - }, - { - "epoch": 0.9178139842481813, - "grad_norm": 1.5650839992076482, - "learning_rate": 7.034967542115521e-08, - "loss": 0.8991, - "step": 7633 - }, - { - "epoch": 0.9179342271388204, - "grad_norm": 1.8741877385921444, - "learning_rate": 7.014503607515388e-08, - "loss": 0.9503, - "step": 7634 - }, - { - "epoch": 0.9180544700294595, - "grad_norm": 1.9215015575790744, - "learning_rate": 6.994068948661592e-08, - "loss": 0.8725, - "step": 7635 - }, - { - "epoch": 0.9181747129200986, - "grad_norm": 2.2740082434015823, - "learning_rate": 6.973663568654142e-08, - "loss": 0.9591, - "step": 7636 - }, - { - "epoch": 0.9182949558107377, - "grad_norm": 2.0624868219128007, - "learning_rate": 6.953287470588386e-08, - "loss": 0.8459, - "step": 7637 - }, - { - "epoch": 0.9184151987013768, - "grad_norm": 2.311830681517594, - "learning_rate": 6.932940657555452e-08, - "loss": 1.0545, - "step": 7638 - }, - { - "epoch": 0.9185354415920158, - "grad_norm": 1.3426083882938527, - "learning_rate": 6.912623132641938e-08, - "loss": 0.9522, - "step": 7639 - }, - { - "epoch": 0.918655684482655, - "grad_norm": 1.7047839702998253, - "learning_rate": 6.892334898929952e-08, - "loss": 0.959, - "step": 7640 - }, - { - "epoch": 0.918775927373294, - "grad_norm": 1.8033953247532608, - "learning_rate": 6.872075959497236e-08, - "loss": 1.0457, - "step": 7641 - }, - { - "epoch": 0.9188961702639331, - "grad_norm": 1.7258136212622939, - "learning_rate": 6.85184631741702e-08, - "loss": 1.0239, - "step": 7642 - }, - { - "epoch": 0.9190164131545723, - "grad_norm": 2.048035503780062, - "learning_rate": 6.831645975758161e-08, - "loss": 0.9739, - "step": 7643 - }, - { - "epoch": 0.9191366560452113, - "grad_norm": 1.7558578134769545, - "learning_rate": 6.811474937585026e-08, - "loss": 0.88, - "step": 7644 - }, - { - "epoch": 0.9192568989358504, - "grad_norm": 1.5451891411123504, - "learning_rate": 6.79133320595755e-08, - "loss": 0.9829, - "step": 7645 - }, - { - "epoch": 0.9193771418264896, - "grad_norm": 1.7560339173667063, - "learning_rate": 6.771220783931198e-08, - "loss": 0.9436, - "step": 7646 - }, - { - "epoch": 0.9194973847171286, - "grad_norm": 0.8788831919718428, - "learning_rate": 6.751137674556994e-08, - "loss": 0.8708, - "step": 7647 - }, - { - "epoch": 0.9196176276077677, - "grad_norm": 2.115571365916231, - "learning_rate": 6.731083880881572e-08, - "loss": 0.981, - "step": 7648 - }, - { - "epoch": 0.9197378704984068, - "grad_norm": 2.016646015702539, - "learning_rate": 6.711059405947072e-08, - "loss": 1.013, - "step": 7649 - }, - { - "epoch": 0.9198581133890459, - "grad_norm": 1.9922421915363475, - "learning_rate": 6.691064252791156e-08, - "loss": 0.9711, - "step": 7650 - }, - { - "epoch": 0.9199783562796849, - "grad_norm": 1.4913828013171861, - "learning_rate": 6.67109842444713e-08, - "loss": 0.9748, - "step": 7651 - }, - { - "epoch": 0.9200985991703241, - "grad_norm": 1.7749709838859578, - "learning_rate": 6.651161923943704e-08, - "loss": 0.9675, - "step": 7652 - }, - { - "epoch": 0.9202188420609632, - "grad_norm": 2.7319812869356825, - "learning_rate": 6.631254754305326e-08, - "loss": 0.9609, - "step": 7653 - }, - { - "epoch": 0.9203390849516022, - "grad_norm": 1.9317295528419283, - "learning_rate": 6.611376918551848e-08, - "loss": 0.9852, - "step": 7654 - }, - { - "epoch": 0.9204593278422414, - "grad_norm": 2.048820910154718, - "learning_rate": 6.591528419698744e-08, - "loss": 0.9966, - "step": 7655 - }, - { - "epoch": 0.9205795707328804, - "grad_norm": 2.665879868624614, - "learning_rate": 6.571709260756986e-08, - "loss": 1.0373, - "step": 7656 - }, - { - "epoch": 0.9206998136235195, - "grad_norm": 2.204926099511973, - "learning_rate": 6.551919444733122e-08, - "loss": 0.9602, - "step": 7657 - }, - { - "epoch": 0.9208200565141585, - "grad_norm": 1.7676123178452112, - "learning_rate": 6.53215897462931e-08, - "loss": 0.8493, - "step": 7658 - }, - { - "epoch": 0.9209402994047977, - "grad_norm": 1.8004570460218434, - "learning_rate": 6.512427853443103e-08, - "loss": 0.955, - "step": 7659 - }, - { - "epoch": 0.9210605422954368, - "grad_norm": 1.8247203586531622, - "learning_rate": 6.492726084167799e-08, - "loss": 0.9602, - "step": 7660 - }, - { - "epoch": 0.9211807851860758, - "grad_norm": 0.8136576328505382, - "learning_rate": 6.473053669792072e-08, - "loss": 0.7767, - "step": 7661 - }, - { - "epoch": 0.921301028076715, - "grad_norm": 2.4121683831661374, - "learning_rate": 6.453410613300248e-08, - "loss": 0.929, - "step": 7662 - }, - { - "epoch": 0.921421270967354, - "grad_norm": 1.5076943781148326, - "learning_rate": 6.43379691767214e-08, - "loss": 0.7809, - "step": 7663 - }, - { - "epoch": 0.9215415138579931, - "grad_norm": 0.8030331095215146, - "learning_rate": 6.414212585883105e-08, - "loss": 0.8072, - "step": 7664 - }, - { - "epoch": 0.9216617567486323, - "grad_norm": 1.5599437402704104, - "learning_rate": 6.394657620904143e-08, - "loss": 0.8964, - "step": 7665 - }, - { - "epoch": 0.9217819996392713, - "grad_norm": 1.708104056192601, - "learning_rate": 6.375132025701657e-08, - "loss": 0.9153, - "step": 7666 - }, - { - "epoch": 0.9219022425299104, - "grad_norm": 2.1608312838606327, - "learning_rate": 6.355635803237724e-08, - "loss": 0.8925, - "step": 7667 - }, - { - "epoch": 0.9220224854205495, - "grad_norm": 2.0698955763862954, - "learning_rate": 6.336168956469867e-08, - "loss": 1.0044, - "step": 7668 - }, - { - "epoch": 0.9221427283111886, - "grad_norm": 1.696594175702483, - "learning_rate": 6.316731488351168e-08, - "loss": 0.9202, - "step": 7669 - }, - { - "epoch": 0.9222629712018277, - "grad_norm": 1.8192009540471483, - "learning_rate": 6.297323401830334e-08, - "loss": 0.8427, - "step": 7670 - }, - { - "epoch": 0.9223832140924668, - "grad_norm": 1.874662814021504, - "learning_rate": 6.277944699851523e-08, - "loss": 0.8885, - "step": 7671 - }, - { - "epoch": 0.9225034569831059, - "grad_norm": 2.1587964929500956, - "learning_rate": 6.25859538535447e-08, - "loss": 0.9303, - "step": 7672 - }, - { - "epoch": 0.9226236998737449, - "grad_norm": 2.5588048017975877, - "learning_rate": 6.239275461274474e-08, - "loss": 0.9768, - "step": 7673 - }, - { - "epoch": 0.9227439427643841, - "grad_norm": 2.390598072573532, - "learning_rate": 6.219984930542299e-08, - "loss": 1.0552, - "step": 7674 - }, - { - "epoch": 0.9228641856550232, - "grad_norm": 2.3687734798988833, - "learning_rate": 6.200723796084383e-08, - "loss": 0.9537, - "step": 7675 - }, - { - "epoch": 0.9229844285456622, - "grad_norm": 0.7847115695046513, - "learning_rate": 6.181492060822546e-08, - "loss": 0.8389, - "step": 7676 - }, - { - "epoch": 0.9231046714363014, - "grad_norm": 2.0290011354746866, - "learning_rate": 6.162289727674274e-08, - "loss": 1.0185, - "step": 7677 - }, - { - "epoch": 0.9232249143269404, - "grad_norm": 2.072334770178004, - "learning_rate": 6.143116799552527e-08, - "loss": 1.0809, - "step": 7678 - }, - { - "epoch": 0.9233451572175795, - "grad_norm": 2.185088872433368, - "learning_rate": 6.123973279365802e-08, - "loss": 0.7569, - "step": 7679 - }, - { - "epoch": 0.9234654001082186, - "grad_norm": 1.7257463416268417, - "learning_rate": 6.10485917001824e-08, - "loss": 0.9836, - "step": 7680 - }, - { - "epoch": 0.9235856429988577, - "grad_norm": 1.3924558353097842, - "learning_rate": 6.085774474409322e-08, - "loss": 1.0067, - "step": 7681 - }, - { - "epoch": 0.9237058858894968, - "grad_norm": 1.82952722748218, - "learning_rate": 6.066719195434267e-08, - "loss": 0.9024, - "step": 7682 - }, - { - "epoch": 0.9238261287801359, - "grad_norm": 1.735190260071633, - "learning_rate": 6.047693335983717e-08, - "loss": 0.864, - "step": 7683 - }, - { - "epoch": 0.923946371670775, - "grad_norm": 2.5886094962634876, - "learning_rate": 6.028696898943853e-08, - "loss": 1.0183, - "step": 7684 - }, - { - "epoch": 0.924066614561414, - "grad_norm": 1.7024188896497188, - "learning_rate": 6.00972988719648e-08, - "loss": 0.9072, - "step": 7685 - }, - { - "epoch": 0.9241868574520532, - "grad_norm": 2.156254312262162, - "learning_rate": 5.990792303618807e-08, - "loss": 0.919, - "step": 7686 - }, - { - "epoch": 0.9243071003426923, - "grad_norm": 1.4669733055665992, - "learning_rate": 5.971884151083695e-08, - "loss": 0.8996, - "step": 7687 - }, - { - "epoch": 0.9244273432333313, - "grad_norm": 1.7223622596713035, - "learning_rate": 5.9530054324595124e-08, - "loss": 0.9454, - "step": 7688 - }, - { - "epoch": 0.9245475861239704, - "grad_norm": 0.7737093361074203, - "learning_rate": 5.934156150610103e-08, - "loss": 0.7921, - "step": 7689 - }, - { - "epoch": 0.9246678290146095, - "grad_norm": 1.9712912491937737, - "learning_rate": 5.915336308394914e-08, - "loss": 0.984, - "step": 7690 - }, - { - "epoch": 0.9247880719052486, - "grad_norm": 1.501968919450235, - "learning_rate": 5.89654590866886e-08, - "loss": 0.9744, - "step": 7691 - }, - { - "epoch": 0.9249083147958876, - "grad_norm": 1.7078698235024095, - "learning_rate": 5.877784954282483e-08, - "loss": 1.0817, - "step": 7692 - }, - { - "epoch": 0.9250285576865268, - "grad_norm": 1.8235467167092925, - "learning_rate": 5.8590534480817963e-08, - "loss": 0.9236, - "step": 7693 - }, - { - "epoch": 0.9251488005771659, - "grad_norm": 1.9700420318031093, - "learning_rate": 5.840351392908349e-08, - "loss": 0.9291, - "step": 7694 - }, - { - "epoch": 0.9252690434678049, - "grad_norm": 2.0423555055492577, - "learning_rate": 5.821678791599205e-08, - "loss": 0.9076, - "step": 7695 - }, - { - "epoch": 0.9253892863584441, - "grad_norm": 1.5672344196936792, - "learning_rate": 5.803035646986965e-08, - "loss": 1.0065, - "step": 7696 - }, - { - "epoch": 0.9255095292490831, - "grad_norm": 2.019345656819027, - "learning_rate": 5.7844219618998766e-08, - "loss": 0.8753, - "step": 7697 - }, - { - "epoch": 0.9256297721397222, - "grad_norm": 1.879054581146328, - "learning_rate": 5.765837739161505e-08, - "loss": 0.9112, - "step": 7698 - }, - { - "epoch": 0.9257500150303614, - "grad_norm": 1.4992987265035498, - "learning_rate": 5.7472829815911504e-08, - "loss": 0.95, - "step": 7699 - }, - { - "epoch": 0.9258702579210004, - "grad_norm": 1.5730033328535757, - "learning_rate": 5.7287576920035164e-08, - "loss": 1.0132, - "step": 7700 - }, - { - "epoch": 0.9259905008116395, - "grad_norm": 1.6320535456484409, - "learning_rate": 5.7102618732088435e-08, - "loss": 0.9565, - "step": 7701 - }, - { - "epoch": 0.9261107437022786, - "grad_norm": 1.6002418096923055, - "learning_rate": 5.6917955280130216e-08, - "loss": 0.9369, - "step": 7702 - }, - { - "epoch": 0.9262309865929177, - "grad_norm": 2.153013924666617, - "learning_rate": 5.6733586592172755e-08, - "loss": 0.9195, - "step": 7703 - }, - { - "epoch": 0.9263512294835567, - "grad_norm": 1.830013050074505, - "learning_rate": 5.6549512696185244e-08, - "loss": 1.0066, - "step": 7704 - }, - { - "epoch": 0.9264714723741959, - "grad_norm": 1.859449497301262, - "learning_rate": 5.636573362009156e-08, - "loss": 0.8836, - "step": 7705 - }, - { - "epoch": 0.926591715264835, - "grad_norm": 1.9172108195657973, - "learning_rate": 5.618224939177074e-08, - "loss": 0.962, - "step": 7706 - }, - { - "epoch": 0.926711958155474, - "grad_norm": 1.6455594651805916, - "learning_rate": 5.599906003905719e-08, - "loss": 0.8975, - "step": 7707 - }, - { - "epoch": 0.9268322010461132, - "grad_norm": 1.9056768337367995, - "learning_rate": 5.581616558974023e-08, - "loss": 1.017, - "step": 7708 - }, - { - "epoch": 0.9269524439367522, - "grad_norm": 1.6752666403498044, - "learning_rate": 5.5633566071565444e-08, - "loss": 0.986, - "step": 7709 - }, - { - "epoch": 0.9270726868273913, - "grad_norm": 1.8695731783611564, - "learning_rate": 5.5451261512232896e-08, - "loss": 0.8927, - "step": 7710 - }, - { - "epoch": 0.9271929297180305, - "grad_norm": 3.795702969224359, - "learning_rate": 5.5269251939397576e-08, - "loss": 0.8201, - "step": 7711 - }, - { - "epoch": 0.9273131726086695, - "grad_norm": 2.125289336309531, - "learning_rate": 5.508753738067073e-08, - "loss": 0.9644, - "step": 7712 - }, - { - "epoch": 0.9274334154993086, - "grad_norm": 1.978391200951747, - "learning_rate": 5.4906117863617875e-08, - "loss": 0.989, - "step": 7713 - }, - { - "epoch": 0.9275536583899477, - "grad_norm": 1.6915056935033281, - "learning_rate": 5.4724993415760533e-08, - "loss": 0.9842, - "step": 7714 - }, - { - "epoch": 0.9276739012805868, - "grad_norm": 2.156687042615682, - "learning_rate": 5.454416406457496e-08, - "loss": 0.9481, - "step": 7715 - }, - { - "epoch": 0.9277941441712259, - "grad_norm": 3.1603754861085416, - "learning_rate": 5.436362983749299e-08, - "loss": 0.9334, - "step": 7716 - }, - { - "epoch": 0.927914387061865, - "grad_norm": 1.8766326154303934, - "learning_rate": 5.418339076190137e-08, - "loss": 0.8393, - "step": 7717 - }, - { - "epoch": 0.9280346299525041, - "grad_norm": 1.626870472652952, - "learning_rate": 5.400344686514202e-08, - "loss": 1.0888, - "step": 7718 - }, - { - "epoch": 0.9281548728431431, - "grad_norm": 1.9286967834725899, - "learning_rate": 5.38237981745131e-08, - "loss": 0.8646, - "step": 7719 - }, - { - "epoch": 0.9282751157337822, - "grad_norm": 1.58664082683765, - "learning_rate": 5.364444471726592e-08, - "loss": 1.0102, - "step": 7720 - }, - { - "epoch": 0.9283953586244214, - "grad_norm": 1.8975005172708683, - "learning_rate": 5.346538652060939e-08, - "loss": 1.0042, - "step": 7721 - }, - { - "epoch": 0.9285156015150604, - "grad_norm": 1.7566360266593277, - "learning_rate": 5.3286623611705994e-08, - "loss": 0.9013, - "step": 7722 - }, - { - "epoch": 0.9286358444056995, - "grad_norm": 0.912109865110866, - "learning_rate": 5.3108156017673824e-08, - "loss": 0.8457, - "step": 7723 - }, - { - "epoch": 0.9287560872963386, - "grad_norm": 1.6969209415405861, - "learning_rate": 5.2929983765586775e-08, - "loss": 0.9128, - "step": 7724 - }, - { - "epoch": 0.9288763301869777, - "grad_norm": 1.671942165843817, - "learning_rate": 5.275210688247278e-08, - "loss": 0.8308, - "step": 7725 - }, - { - "epoch": 0.9289965730776167, - "grad_norm": 2.046489533681642, - "learning_rate": 5.257452539531604e-08, - "loss": 1.0478, - "step": 7726 - }, - { - "epoch": 0.9291168159682559, - "grad_norm": 4.767650536499461, - "learning_rate": 5.2397239331055445e-08, - "loss": 0.8833, - "step": 7727 - }, - { - "epoch": 0.929237058858895, - "grad_norm": 1.9524163753561352, - "learning_rate": 5.2220248716585036e-08, - "loss": 1.0026, - "step": 7728 - }, - { - "epoch": 0.929357301749534, - "grad_norm": 1.9162803412596545, - "learning_rate": 5.204355357875445e-08, - "loss": 0.9585, - "step": 7729 - }, - { - "epoch": 0.9294775446401732, - "grad_norm": 1.9790786589747826, - "learning_rate": 5.1867153944367584e-08, - "loss": 0.9081, - "step": 7730 - }, - { - "epoch": 0.9295977875308122, - "grad_norm": 1.4639754099474438, - "learning_rate": 5.16910498401848e-08, - "loss": 0.9362, - "step": 7731 - }, - { - "epoch": 0.9297180304214513, - "grad_norm": 1.9808084471212788, - "learning_rate": 5.151524129292073e-08, - "loss": 1.0325, - "step": 7732 - }, - { - "epoch": 0.9298382733120905, - "grad_norm": 1.8772538943461778, - "learning_rate": 5.1339728329245155e-08, - "loss": 0.8655, - "step": 7733 - }, - { - "epoch": 0.9299585162027295, - "grad_norm": 1.9617894362366, - "learning_rate": 5.116451097578367e-08, - "loss": 0.989, - "step": 7734 - }, - { - "epoch": 0.9300787590933686, - "grad_norm": 2.8499743075634862, - "learning_rate": 5.0989589259115895e-08, - "loss": 0.9377, - "step": 7735 - }, - { - "epoch": 0.9301990019840077, - "grad_norm": 1.7454935360747572, - "learning_rate": 5.081496320577816e-08, - "loss": 0.9037, - "step": 7736 - }, - { - "epoch": 0.9303192448746468, - "grad_norm": 0.9522289066056591, - "learning_rate": 5.0640632842260835e-08, - "loss": 0.8562, - "step": 7737 - }, - { - "epoch": 0.9304394877652858, - "grad_norm": 1.3645582366654514, - "learning_rate": 5.0466598195009426e-08, - "loss": 0.9199, - "step": 7738 - }, - { - "epoch": 0.930559730655925, - "grad_norm": 2.1538796042370474, - "learning_rate": 5.0292859290425036e-08, - "loss": 0.8998, - "step": 7739 - }, - { - "epoch": 0.9306799735465641, - "grad_norm": 1.9201276325480705, - "learning_rate": 5.011941615486348e-08, - "loss": 0.9748, - "step": 7740 - }, - { - "epoch": 0.9308002164372031, - "grad_norm": 3.373408613483245, - "learning_rate": 4.994626881463659e-08, - "loss": 1.0455, - "step": 7741 - }, - { - "epoch": 0.9309204593278423, - "grad_norm": 1.7709128268481795, - "learning_rate": 4.9773417296009814e-08, - "loss": 0.9132, - "step": 7742 - }, - { - "epoch": 0.9310407022184813, - "grad_norm": 1.8635730315018095, - "learning_rate": 4.960086162520527e-08, - "loss": 0.8509, - "step": 7743 - }, - { - "epoch": 0.9311609451091204, - "grad_norm": 1.8660971039409149, - "learning_rate": 4.942860182839936e-08, - "loss": 1.0217, - "step": 7744 - }, - { - "epoch": 0.9312811879997596, - "grad_norm": 1.7219294184620102, - "learning_rate": 4.925663793172341e-08, - "loss": 0.9851, - "step": 7745 - }, - { - "epoch": 0.9314014308903986, - "grad_norm": 0.8672953701803094, - "learning_rate": 4.908496996126477e-08, - "loss": 0.8071, - "step": 7746 - }, - { - "epoch": 0.9315216737810377, - "grad_norm": 1.6127257787860387, - "learning_rate": 4.89135979430646e-08, - "loss": 0.9586, - "step": 7747 - }, - { - "epoch": 0.9316419166716768, - "grad_norm": 1.5635840660749103, - "learning_rate": 4.874252190312078e-08, - "loss": 1.0394, - "step": 7748 - }, - { - "epoch": 0.9317621595623159, - "grad_norm": 2.710971095511843, - "learning_rate": 4.857174186738477e-08, - "loss": 0.845, - "step": 7749 - }, - { - "epoch": 0.931882402452955, - "grad_norm": 2.0574929698207516, - "learning_rate": 4.840125786176408e-08, - "loss": 0.9367, - "step": 7750 - }, - { - "epoch": 0.932002645343594, - "grad_norm": 1.6813973883367053, - "learning_rate": 4.823106991212067e-08, - "loss": 0.9696, - "step": 7751 - }, - { - "epoch": 0.9321228882342332, - "grad_norm": 1.9053358794147057, - "learning_rate": 4.806117804427212e-08, - "loss": 1.0406, - "step": 7752 - }, - { - "epoch": 0.9322431311248722, - "grad_norm": 2.668406147659304, - "learning_rate": 4.7891582283990926e-08, - "loss": 0.8421, - "step": 7753 - }, - { - "epoch": 0.9323633740155113, - "grad_norm": 1.632431204778989, - "learning_rate": 4.772228265700473e-08, - "loss": 0.9265, - "step": 7754 - }, - { - "epoch": 0.9324836169061504, - "grad_norm": 2.314726711150682, - "learning_rate": 4.75532791889961e-08, - "loss": 0.9468, - "step": 7755 - }, - { - "epoch": 0.9326038597967895, - "grad_norm": 1.8650570770202217, - "learning_rate": 4.738457190560252e-08, - "loss": 0.8531, - "step": 7756 - }, - { - "epoch": 0.9327241026874286, - "grad_norm": 2.7536591547501934, - "learning_rate": 4.721616083241664e-08, - "loss": 0.9926, - "step": 7757 - }, - { - "epoch": 0.9328443455780677, - "grad_norm": 1.9665138018191892, - "learning_rate": 4.7048045994986684e-08, - "loss": 0.9706, - "step": 7758 - }, - { - "epoch": 0.9329645884687068, - "grad_norm": 2.0090645177952395, - "learning_rate": 4.688022741881559e-08, - "loss": 1.1098, - "step": 7759 - }, - { - "epoch": 0.9330848313593458, - "grad_norm": 1.4277931545697815, - "learning_rate": 4.671270512936076e-08, - "loss": 0.9531, - "step": 7760 - }, - { - "epoch": 0.933205074249985, - "grad_norm": 1.5908589788694076, - "learning_rate": 4.6545479152035884e-08, - "loss": 1.0276, - "step": 7761 - }, - { - "epoch": 0.9333253171406241, - "grad_norm": 2.0248910992385825, - "learning_rate": 4.637854951220821e-08, - "loss": 0.9621, - "step": 7762 - }, - { - "epoch": 0.9334455600312631, - "grad_norm": 1.5999293490546864, - "learning_rate": 4.621191623520171e-08, - "loss": 0.9432, - "step": 7763 - }, - { - "epoch": 0.9335658029219023, - "grad_norm": 2.3353505497561993, - "learning_rate": 4.604557934629372e-08, - "loss": 1.0384, - "step": 7764 - }, - { - "epoch": 0.9336860458125413, - "grad_norm": 1.5586971545037245, - "learning_rate": 4.587953887071805e-08, - "loss": 1.0035, - "step": 7765 - }, - { - "epoch": 0.9338062887031804, - "grad_norm": 1.823283378409571, - "learning_rate": 4.5713794833662554e-08, - "loss": 1.0588, - "step": 7766 - }, - { - "epoch": 0.9339265315938196, - "grad_norm": 1.7847921388276005, - "learning_rate": 4.5548347260270236e-08, - "loss": 0.8325, - "step": 7767 - }, - { - "epoch": 0.9340467744844586, - "grad_norm": 1.5811157591760303, - "learning_rate": 4.538319617564012e-08, - "loss": 0.897, - "step": 7768 - }, - { - "epoch": 0.9341670173750977, - "grad_norm": 1.799641213157153, - "learning_rate": 4.521834160482485e-08, - "loss": 0.9464, - "step": 7769 - }, - { - "epoch": 0.9342872602657368, - "grad_norm": 1.5922168013971487, - "learning_rate": 4.5053783572832846e-08, - "loss": 1.0121, - "step": 7770 - }, - { - "epoch": 0.9344075031563759, - "grad_norm": 1.647454497500565, - "learning_rate": 4.488952210462771e-08, - "loss": 0.9583, - "step": 7771 - }, - { - "epoch": 0.9345277460470149, - "grad_norm": 1.8008613485863578, - "learning_rate": 4.4725557225127495e-08, - "loss": 1.0585, - "step": 7772 - }, - { - "epoch": 0.9346479889376541, - "grad_norm": 1.44442243233235, - "learning_rate": 4.456188895920565e-08, - "loss": 0.9924, - "step": 7773 - }, - { - "epoch": 0.9347682318282932, - "grad_norm": 2.2052675474995516, - "learning_rate": 4.439851733169031e-08, - "loss": 1.0435, - "step": 7774 - }, - { - "epoch": 0.9348884747189322, - "grad_norm": 2.0895156174929803, - "learning_rate": 4.4235442367365204e-08, - "loss": 0.8984, - "step": 7775 - }, - { - "epoch": 0.9350087176095714, - "grad_norm": 1.95005673301526, - "learning_rate": 4.4072664090968545e-08, - "loss": 0.9858, - "step": 7776 - }, - { - "epoch": 0.9351289605002104, - "grad_norm": 1.67110047269934, - "learning_rate": 4.391018252719347e-08, - "loss": 1.0367, - "step": 7777 - }, - { - "epoch": 0.9352492033908495, - "grad_norm": 1.7810145439997733, - "learning_rate": 4.374799770068849e-08, - "loss": 0.884, - "step": 7778 - }, - { - "epoch": 0.9353694462814887, - "grad_norm": 3.1614035654535013, - "learning_rate": 4.358610963605658e-08, - "loss": 0.9423, - "step": 7779 - }, - { - "epoch": 0.9354896891721277, - "grad_norm": 1.8060974954288556, - "learning_rate": 4.342451835785677e-08, - "loss": 0.879, - "step": 7780 - }, - { - "epoch": 0.9356099320627668, - "grad_norm": 1.4932988207666367, - "learning_rate": 4.3263223890601665e-08, - "loss": 0.9464, - "step": 7781 - }, - { - "epoch": 0.9357301749534058, - "grad_norm": 1.667615580635184, - "learning_rate": 4.31022262587597e-08, - "loss": 0.9968, - "step": 7782 - }, - { - "epoch": 0.935850417844045, - "grad_norm": 1.4525391412868072, - "learning_rate": 4.2941525486754225e-08, - "loss": 0.8582, - "step": 7783 - }, - { - "epoch": 0.935970660734684, - "grad_norm": 1.7631742635103453, - "learning_rate": 4.278112159896286e-08, - "loss": 0.9859, - "step": 7784 - }, - { - "epoch": 0.9360909036253231, - "grad_norm": 1.6821720643728733, - "learning_rate": 4.2621014619719896e-08, - "loss": 0.8695, - "step": 7785 - }, - { - "epoch": 0.9362111465159623, - "grad_norm": 0.7804683207987082, - "learning_rate": 4.246120457331215e-08, - "loss": 0.8242, - "step": 7786 - }, - { - "epoch": 0.9363313894066013, - "grad_norm": 2.6733031443685307, - "learning_rate": 4.2301691483983325e-08, - "loss": 0.9234, - "step": 7787 - }, - { - "epoch": 0.9364516322972404, - "grad_norm": 1.7317176600350153, - "learning_rate": 4.214247537593163e-08, - "loss": 0.9552, - "step": 7788 - }, - { - "epoch": 0.9365718751878795, - "grad_norm": 1.9891728348856765, - "learning_rate": 4.1983556273309293e-08, - "loss": 1.0062, - "step": 7789 - }, - { - "epoch": 0.9366921180785186, - "grad_norm": 2.1699392350650077, - "learning_rate": 4.182493420022526e-08, - "loss": 0.897, - "step": 7790 - }, - { - "epoch": 0.9368123609691577, - "grad_norm": 1.7761147745995074, - "learning_rate": 4.166660918074139e-08, - "loss": 0.983, - "step": 7791 - }, - { - "epoch": 0.9369326038597968, - "grad_norm": 1.3882416435607086, - "learning_rate": 4.15085812388758e-08, - "loss": 0.9339, - "step": 7792 - }, - { - "epoch": 0.9370528467504359, - "grad_norm": 2.037326351325236, - "learning_rate": 4.135085039860153e-08, - "loss": 0.9839, - "step": 7793 - }, - { - "epoch": 0.9371730896410749, - "grad_norm": 1.9979386435510915, - "learning_rate": 4.1193416683845906e-08, - "loss": 0.9852, - "step": 7794 - }, - { - "epoch": 0.9372933325317141, - "grad_norm": 2.2524734253326573, - "learning_rate": 4.103628011849136e-08, - "loss": 1.0308, - "step": 7795 - }, - { - "epoch": 0.9374135754223532, - "grad_norm": 2.0528058040542794, - "learning_rate": 4.0879440726375506e-08, - "loss": 0.9487, - "step": 7796 - }, - { - "epoch": 0.9375338183129922, - "grad_norm": 2.31449247560166, - "learning_rate": 4.0722898531291074e-08, - "loss": 0.7586, - "step": 7797 - }, - { - "epoch": 0.9376540612036314, - "grad_norm": 1.777022246741562, - "learning_rate": 4.0566653556985295e-08, - "loss": 0.9642, - "step": 7798 - }, - { - "epoch": 0.9377743040942704, - "grad_norm": 2.089643179992685, - "learning_rate": 4.0410705827159886e-08, - "loss": 1.0107, - "step": 7799 - }, - { - "epoch": 0.9378945469849095, - "grad_norm": 1.965155514200356, - "learning_rate": 4.0255055365472356e-08, - "loss": 0.9101, - "step": 7800 - }, - { - "epoch": 0.9380147898755486, - "grad_norm": 2.5983429653567476, - "learning_rate": 4.009970219553471e-08, - "loss": 0.9395, - "step": 7801 - }, - { - "epoch": 0.9381350327661877, - "grad_norm": 2.306002484197015, - "learning_rate": 3.99446463409141e-08, - "loss": 0.9679, - "step": 7802 - }, - { - "epoch": 0.9382552756568268, - "grad_norm": 2.158785007106817, - "learning_rate": 3.978988782513215e-08, - "loss": 0.8815, - "step": 7803 - }, - { - "epoch": 0.9383755185474659, - "grad_norm": 2.0133571434297273, - "learning_rate": 3.963542667166586e-08, - "loss": 0.9622, - "step": 7804 - }, - { - "epoch": 0.938495761438105, - "grad_norm": 1.7598134498998712, - "learning_rate": 3.9481262903946486e-08, - "loss": 0.8883, - "step": 7805 - }, - { - "epoch": 0.938616004328744, - "grad_norm": 0.8196537460840987, - "learning_rate": 3.932739654536066e-08, - "loss": 0.7671, - "step": 7806 - }, - { - "epoch": 0.9387362472193832, - "grad_norm": 2.2098024507838883, - "learning_rate": 3.917382761925014e-08, - "loss": 0.9368, - "step": 7807 - }, - { - "epoch": 0.9388564901100223, - "grad_norm": 1.5896754820159236, - "learning_rate": 3.9020556148910754e-08, - "loss": 0.9871, - "step": 7808 - }, - { - "epoch": 0.9389767330006613, - "grad_norm": 0.7604513334619891, - "learning_rate": 3.8867582157593895e-08, - "loss": 0.7842, - "step": 7809 - }, - { - "epoch": 0.9390969758913005, - "grad_norm": 1.98436022174769, - "learning_rate": 3.871490566850544e-08, - "loss": 0.9642, - "step": 7810 - }, - { - "epoch": 0.9392172187819395, - "grad_norm": 1.6390616097825126, - "learning_rate": 3.856252670480642e-08, - "loss": 0.903, - "step": 7811 - }, - { - "epoch": 0.9393374616725786, - "grad_norm": 1.655218126841389, - "learning_rate": 3.841044528961279e-08, - "loss": 1.0095, - "step": 7812 - }, - { - "epoch": 0.9394577045632178, - "grad_norm": 1.8679926584740736, - "learning_rate": 3.825866144599477e-08, - "loss": 0.9904, - "step": 7813 - }, - { - "epoch": 0.9395779474538568, - "grad_norm": 1.9816486523159411, - "learning_rate": 3.8107175196978145e-08, - "loss": 0.9498, - "step": 7814 - }, - { - "epoch": 0.9396981903444959, - "grad_norm": 1.874051807977996, - "learning_rate": 3.7955986565542996e-08, - "loss": 0.9643, - "step": 7815 - }, - { - "epoch": 0.9398184332351349, - "grad_norm": 1.776350947175281, - "learning_rate": 3.780509557462497e-08, - "loss": 0.8887, - "step": 7816 - }, - { - "epoch": 0.9399386761257741, - "grad_norm": 1.569956952737978, - "learning_rate": 3.765450224711375e-08, - "loss": 0.9541, - "step": 7817 - }, - { - "epoch": 0.9400589190164131, - "grad_norm": 1.5258535154137465, - "learning_rate": 3.750420660585396e-08, - "loss": 0.9898, - "step": 7818 - }, - { - "epoch": 0.9401791619070522, - "grad_norm": 1.6673245403165213, - "learning_rate": 3.735420867364603e-08, - "loss": 0.9945, - "step": 7819 - }, - { - "epoch": 0.9402994047976914, - "grad_norm": 1.5057498877238853, - "learning_rate": 3.7204508473244186e-08, - "loss": 0.8181, - "step": 7820 - }, - { - "epoch": 0.9404196476883304, - "grad_norm": 1.6556052626473396, - "learning_rate": 3.7055106027357395e-08, - "loss": 0.8904, - "step": 7821 - }, - { - "epoch": 0.9405398905789695, - "grad_norm": 1.8583782954685528, - "learning_rate": 3.690600135865063e-08, - "loss": 0.9169, - "step": 7822 - }, - { - "epoch": 0.9406601334696086, - "grad_norm": 0.7826849538482507, - "learning_rate": 3.675719448974246e-08, - "loss": 0.7972, - "step": 7823 - }, - { - "epoch": 0.9407803763602477, - "grad_norm": 1.9296904034920908, - "learning_rate": 3.6608685443207054e-08, - "loss": 0.7976, - "step": 7824 - }, - { - "epoch": 0.9409006192508867, - "grad_norm": 2.0947344159599286, - "learning_rate": 3.646047424157306e-08, - "loss": 0.8668, - "step": 7825 - }, - { - "epoch": 0.9410208621415259, - "grad_norm": 2.0050835376564633, - "learning_rate": 3.631256090732382e-08, - "loss": 0.8764, - "step": 7826 - }, - { - "epoch": 0.941141105032165, - "grad_norm": 1.8022708317524638, - "learning_rate": 3.6164945462897833e-08, - "loss": 1.0213, - "step": 7827 - }, - { - "epoch": 0.941261347922804, - "grad_norm": 1.6193703188252486, - "learning_rate": 3.6017627930687856e-08, - "loss": 0.9515, - "step": 7828 - }, - { - "epoch": 0.9413815908134432, - "grad_norm": 2.319612236831892, - "learning_rate": 3.587060833304267e-08, - "loss": 0.9719, - "step": 7829 - }, - { - "epoch": 0.9415018337040822, - "grad_norm": 1.9964128750525982, - "learning_rate": 3.5723886692264225e-08, - "loss": 0.8402, - "step": 7830 - }, - { - "epoch": 0.9416220765947213, - "grad_norm": 2.6662942705802566, - "learning_rate": 3.557746303061071e-08, - "loss": 0.817, - "step": 7831 - }, - { - "epoch": 0.9417423194853605, - "grad_norm": 1.7053019058879926, - "learning_rate": 3.543133737029391e-08, - "loss": 0.9186, - "step": 7832 - }, - { - "epoch": 0.9418625623759995, - "grad_norm": 1.7828354722865725, - "learning_rate": 3.5285509733481214e-08, - "loss": 0.8874, - "step": 7833 - }, - { - "epoch": 0.9419828052666386, - "grad_norm": 1.6515659819451676, - "learning_rate": 3.513998014229469e-08, - "loss": 0.9697, - "step": 7834 - }, - { - "epoch": 0.9421030481572777, - "grad_norm": 3.1840788489216, - "learning_rate": 3.499474861881069e-08, - "loss": 1.0593, - "step": 7835 - }, - { - "epoch": 0.9422232910479168, - "grad_norm": 1.9379292289519738, - "learning_rate": 3.4849815185061136e-08, - "loss": 0.8774, - "step": 7836 - }, - { - "epoch": 0.9423435339385559, - "grad_norm": 1.934967231374454, - "learning_rate": 3.470517986303223e-08, - "loss": 0.9567, - "step": 7837 - }, - { - "epoch": 0.942463776829195, - "grad_norm": 2.006716061466813, - "learning_rate": 3.4560842674664856e-08, - "loss": 0.9904, - "step": 7838 - }, - { - "epoch": 0.9425840197198341, - "grad_norm": 1.7136758985574345, - "learning_rate": 3.441680364185506e-08, - "loss": 0.9567, - "step": 7839 - }, - { - "epoch": 0.9427042626104731, - "grad_norm": 7.987011856660631, - "learning_rate": 3.427306278645314e-08, - "loss": 0.9451, - "step": 7840 - }, - { - "epoch": 0.9428245055011123, - "grad_norm": 1.8776673100436139, - "learning_rate": 3.4129620130264767e-08, - "loss": 0.9236, - "step": 7841 - }, - { - "epoch": 0.9429447483917514, - "grad_norm": 1.969694970425567, - "learning_rate": 3.398647569505009e-08, - "loss": 0.9816, - "step": 7842 - }, - { - "epoch": 0.9430649912823904, - "grad_norm": 1.953795051079702, - "learning_rate": 3.384362950252373e-08, - "loss": 0.9374, - "step": 7843 - }, - { - "epoch": 0.9431852341730296, - "grad_norm": 1.7674521385522024, - "learning_rate": 3.3701081574355473e-08, - "loss": 0.7669, - "step": 7844 - }, - { - "epoch": 0.9433054770636686, - "grad_norm": 0.719931543526431, - "learning_rate": 3.3558831932169796e-08, - "loss": 0.7315, - "step": 7845 - }, - { - "epoch": 0.9434257199543077, - "grad_norm": 2.1499331175581124, - "learning_rate": 3.341688059754588e-08, - "loss": 1.0787, - "step": 7846 - }, - { - "epoch": 0.9435459628449467, - "grad_norm": 1.8298304501143736, - "learning_rate": 3.327522759201762e-08, - "loss": 0.967, - "step": 7847 - }, - { - "epoch": 0.9436662057355859, - "grad_norm": 1.8085303408751445, - "learning_rate": 3.313387293707359e-08, - "loss": 0.8621, - "step": 7848 - }, - { - "epoch": 0.943786448626225, - "grad_norm": 4.120368901005236, - "learning_rate": 3.29928166541571e-08, - "loss": 0.8823, - "step": 7849 - }, - { - "epoch": 0.943906691516864, - "grad_norm": 1.753338422381095, - "learning_rate": 3.2852058764666346e-08, - "loss": 0.9923, - "step": 7850 - }, - { - "epoch": 0.9440269344075032, - "grad_norm": 1.564522997644746, - "learning_rate": 3.2711599289954264e-08, - "loss": 0.887, - "step": 7851 - }, - { - "epoch": 0.9441471772981422, - "grad_norm": 1.745068003468907, - "learning_rate": 3.257143825132847e-08, - "loss": 0.9693, - "step": 7852 - }, - { - "epoch": 0.9442674201887813, - "grad_norm": 1.526796989431154, - "learning_rate": 3.243157567005106e-08, - "loss": 0.9569, - "step": 7853 - }, - { - "epoch": 0.9443876630794205, - "grad_norm": 1.7788435759495953, - "learning_rate": 3.2292011567339296e-08, - "loss": 0.8396, - "step": 7854 - }, - { - "epoch": 0.9445079059700595, - "grad_norm": 2.215080988589524, - "learning_rate": 3.21527459643649e-08, - "loss": 0.7661, - "step": 7855 - }, - { - "epoch": 0.9446281488606986, - "grad_norm": 2.040064423048745, - "learning_rate": 3.2013778882254536e-08, - "loss": 0.9431, - "step": 7856 - }, - { - "epoch": 0.9447483917513377, - "grad_norm": 2.7003348178248068, - "learning_rate": 3.1875110342088676e-08, - "loss": 0.9572, - "step": 7857 - }, - { - "epoch": 0.9448686346419768, - "grad_norm": 1.737255007809217, - "learning_rate": 3.1736740364904035e-08, - "loss": 0.8498, - "step": 7858 - }, - { - "epoch": 0.9449888775326158, - "grad_norm": 1.7757324292595709, - "learning_rate": 3.159866897169094e-08, - "loss": 0.9631, - "step": 7859 - }, - { - "epoch": 0.945109120423255, - "grad_norm": 1.9275781631949014, - "learning_rate": 3.146089618339487e-08, - "loss": 0.9526, - "step": 7860 - }, - { - "epoch": 0.9452293633138941, - "grad_norm": 1.7749975795460706, - "learning_rate": 3.132342202091554e-08, - "loss": 0.8788, - "step": 7861 - }, - { - "epoch": 0.9453496062045331, - "grad_norm": 1.855401031864591, - "learning_rate": 3.1186246505107595e-08, - "loss": 0.8757, - "step": 7862 - }, - { - "epoch": 0.9454698490951723, - "grad_norm": 1.9294159115085892, - "learning_rate": 3.104936965678084e-08, - "loss": 1.0304, - "step": 7863 - }, - { - "epoch": 0.9455900919858113, - "grad_norm": 1.7388528976873037, - "learning_rate": 3.091279149669956e-08, - "loss": 1.0029, - "step": 7864 - }, - { - "epoch": 0.9457103348764504, - "grad_norm": 1.9626260607620403, - "learning_rate": 3.0776512045581624e-08, - "loss": 0.9331, - "step": 7865 - }, - { - "epoch": 0.9458305777670896, - "grad_norm": 1.822302193392292, - "learning_rate": 3.0640531324101384e-08, - "loss": 0.9722, - "step": 7866 - }, - { - "epoch": 0.9459508206577286, - "grad_norm": 1.746892281380272, - "learning_rate": 3.0504849352886554e-08, - "loss": 0.9551, - "step": 7867 - }, - { - "epoch": 0.9460710635483677, - "grad_norm": 2.0944142213232215, - "learning_rate": 3.036946615252023e-08, - "loss": 0.9073, - "step": 7868 - }, - { - "epoch": 0.9461913064390068, - "grad_norm": 2.603831358621488, - "learning_rate": 3.0234381743539984e-08, - "loss": 0.8636, - "step": 7869 - }, - { - "epoch": 0.9463115493296459, - "grad_norm": 1.8662797325305553, - "learning_rate": 3.0099596146437863e-08, - "loss": 0.9988, - "step": 7870 - }, - { - "epoch": 0.946431792220285, - "grad_norm": 0.8251336249594342, - "learning_rate": 2.996510938166086e-08, - "loss": 0.8245, - "step": 7871 - }, - { - "epoch": 0.9465520351109241, - "grad_norm": 1.8014047333585808, - "learning_rate": 2.983092146960997e-08, - "loss": 0.93, - "step": 7872 - }, - { - "epoch": 0.9466722780015632, - "grad_norm": 1.780185180418265, - "learning_rate": 2.9697032430642256e-08, - "loss": 0.9953, - "step": 7873 - }, - { - "epoch": 0.9467925208922022, - "grad_norm": 2.166411629363025, - "learning_rate": 2.9563442285067906e-08, - "loss": 0.9338, - "step": 7874 - }, - { - "epoch": 0.9469127637828414, - "grad_norm": 1.7221270589617514, - "learning_rate": 2.943015105315294e-08, - "loss": 0.9948, - "step": 7875 - }, - { - "epoch": 0.9470330066734804, - "grad_norm": 2.6629882615532976, - "learning_rate": 2.929715875511718e-08, - "loss": 0.8647, - "step": 7876 - }, - { - "epoch": 0.9471532495641195, - "grad_norm": 1.633682822121068, - "learning_rate": 2.9164465411135375e-08, - "loss": 0.8947, - "step": 7877 - }, - { - "epoch": 0.9472734924547586, - "grad_norm": 1.8053166337351383, - "learning_rate": 2.9032071041337426e-08, - "loss": 0.9964, - "step": 7878 - }, - { - "epoch": 0.9473937353453977, - "grad_norm": 1.525386326743504, - "learning_rate": 2.889997566580704e-08, - "loss": 0.928, - "step": 7879 - }, - { - "epoch": 0.9475139782360368, - "grad_norm": 1.571697402537598, - "learning_rate": 2.8768179304583086e-08, - "loss": 0.8975, - "step": 7880 - }, - { - "epoch": 0.9476342211266758, - "grad_norm": 1.4291249820572762, - "learning_rate": 2.8636681977659117e-08, - "loss": 0.9335, - "step": 7881 - }, - { - "epoch": 0.947754464017315, - "grad_norm": 1.8615399107075583, - "learning_rate": 2.850548370498318e-08, - "loss": 0.9739, - "step": 7882 - }, - { - "epoch": 0.9478747069079541, - "grad_norm": 1.507040033468118, - "learning_rate": 2.8374584506457798e-08, - "loss": 0.9068, - "step": 7883 - }, - { - "epoch": 0.9479949497985931, - "grad_norm": 2.232544903106042, - "learning_rate": 2.824398440193998e-08, - "loss": 0.8685, - "step": 7884 - }, - { - "epoch": 0.9481151926892323, - "grad_norm": 1.7129831843667047, - "learning_rate": 2.811368341124232e-08, - "loss": 0.902, - "step": 7885 - }, - { - "epoch": 0.9482354355798713, - "grad_norm": 2.2333368410490966, - "learning_rate": 2.7983681554131222e-08, - "loss": 0.8719, - "step": 7886 - }, - { - "epoch": 0.9483556784705104, - "grad_norm": 2.4182475827466385, - "learning_rate": 2.7853978850327365e-08, - "loss": 0.8969, - "step": 7887 - }, - { - "epoch": 0.9484759213611496, - "grad_norm": 1.6062000400477183, - "learning_rate": 2.7724575319507225e-08, - "loss": 1.0701, - "step": 7888 - }, - { - "epoch": 0.9485961642517886, - "grad_norm": 1.784751529015932, - "learning_rate": 2.759547098130044e-08, - "loss": 0.9763, - "step": 7889 - }, - { - "epoch": 0.9487164071424277, - "grad_norm": 1.628361233433501, - "learning_rate": 2.746666585529267e-08, - "loss": 0.9633, - "step": 7890 - }, - { - "epoch": 0.9488366500330668, - "grad_norm": 2.0402400683346036, - "learning_rate": 2.73381599610234e-08, - "loss": 0.9333, - "step": 7891 - }, - { - "epoch": 0.9489568929237059, - "grad_norm": 1.6711398495971608, - "learning_rate": 2.7209953317987033e-08, - "loss": 0.9052, - "step": 7892 - }, - { - "epoch": 0.9490771358143449, - "grad_norm": 1.7889163028335622, - "learning_rate": 2.7082045945631793e-08, - "loss": 0.9712, - "step": 7893 - }, - { - "epoch": 0.9491973787049841, - "grad_norm": 2.1715307168669926, - "learning_rate": 2.6954437863361712e-08, - "loss": 0.8928, - "step": 7894 - }, - { - "epoch": 0.9493176215956232, - "grad_norm": 1.7654200071990935, - "learning_rate": 2.6827129090534862e-08, - "loss": 0.9091, - "step": 7895 - }, - { - "epoch": 0.9494378644862622, - "grad_norm": 1.7589732766780817, - "learning_rate": 2.670011964646335e-08, - "loss": 0.9778, - "step": 7896 - }, - { - "epoch": 0.9495581073769014, - "grad_norm": 1.961190439150133, - "learning_rate": 2.657340955041487e-08, - "loss": 0.8877, - "step": 7897 - }, - { - "epoch": 0.9496783502675404, - "grad_norm": 1.7933504118100245, - "learning_rate": 2.6446998821611167e-08, - "loss": 0.9122, - "step": 7898 - }, - { - "epoch": 0.9497985931581795, - "grad_norm": 2.2321415121891874, - "learning_rate": 2.6320887479228228e-08, - "loss": 0.9231, - "step": 7899 - }, - { - "epoch": 0.9499188360488187, - "grad_norm": 2.159959339182992, - "learning_rate": 2.619507554239786e-08, - "loss": 0.9215, - "step": 7900 - }, - { - "epoch": 0.9500390789394577, - "grad_norm": 1.6218676087315775, - "learning_rate": 2.606956303020502e-08, - "loss": 0.8996, - "step": 7901 - }, - { - "epoch": 0.9501593218300968, - "grad_norm": 1.9054600063622293, - "learning_rate": 2.5944349961690036e-08, - "loss": 1.0367, - "step": 7902 - }, - { - "epoch": 0.9502795647207359, - "grad_norm": 1.6351024868542918, - "learning_rate": 2.581943635584749e-08, - "loss": 0.9343, - "step": 7903 - }, - { - "epoch": 0.950399807611375, - "grad_norm": 1.7386265786723492, - "learning_rate": 2.569482223162689e-08, - "loss": 0.8623, - "step": 7904 - }, - { - "epoch": 0.950520050502014, - "grad_norm": 1.8516711754433544, - "learning_rate": 2.5570507607932e-08, - "loss": 0.9218, - "step": 7905 - }, - { - "epoch": 0.9506402933926532, - "grad_norm": 4.180663035980078, - "learning_rate": 2.54464925036213e-08, - "loss": 0.8184, - "step": 7906 - }, - { - "epoch": 0.9507605362832923, - "grad_norm": 2.0238811934360283, - "learning_rate": 2.532277693750773e-08, - "loss": 0.8044, - "step": 7907 - }, - { - "epoch": 0.9508807791739313, - "grad_norm": 1.7461199662461446, - "learning_rate": 2.5199360928358948e-08, - "loss": 0.9552, - "step": 7908 - }, - { - "epoch": 0.9510010220645704, - "grad_norm": 1.6670383118481578, - "learning_rate": 2.507624449489665e-08, - "loss": 1.0689, - "step": 7909 - }, - { - "epoch": 0.9511212649552095, - "grad_norm": 1.70924628294423, - "learning_rate": 2.495342765579811e-08, - "loss": 0.8449, - "step": 7910 - }, - { - "epoch": 0.9512415078458486, - "grad_norm": 1.8444504781162179, - "learning_rate": 2.4830910429693984e-08, - "loss": 0.9057, - "step": 7911 - }, - { - "epoch": 0.9513617507364877, - "grad_norm": 1.8286757984348265, - "learning_rate": 2.470869283517052e-08, - "loss": 0.9902, - "step": 7912 - }, - { - "epoch": 0.9514819936271268, - "grad_norm": 1.528328268499659, - "learning_rate": 2.458677489076777e-08, - "loss": 0.9681, - "step": 7913 - }, - { - "epoch": 0.9516022365177659, - "grad_norm": 1.5864450929639835, - "learning_rate": 2.446515661498072e-08, - "loss": 1.0289, - "step": 7914 - }, - { - "epoch": 0.9517224794084049, - "grad_norm": 1.9234879710984227, - "learning_rate": 2.434383802625861e-08, - "loss": 0.9389, - "step": 7915 - }, - { - "epoch": 0.9518427222990441, - "grad_norm": 1.680012090503238, - "learning_rate": 2.4222819143005168e-08, - "loss": 0.9285, - "step": 7916 - }, - { - "epoch": 0.9519629651896832, - "grad_norm": 1.739752603240874, - "learning_rate": 2.4102099983579706e-08, - "loss": 1.01, - "step": 7917 - }, - { - "epoch": 0.9520832080803222, - "grad_norm": 1.6538956661183035, - "learning_rate": 2.3981680566294236e-08, - "loss": 0.9591, - "step": 7918 - }, - { - "epoch": 0.9522034509709614, - "grad_norm": 1.667640242734329, - "learning_rate": 2.3861560909416822e-08, - "loss": 0.9323, - "step": 7919 - }, - { - "epoch": 0.9523236938616004, - "grad_norm": 1.6851407973103147, - "learning_rate": 2.3741741031169325e-08, - "loss": 1.0232, - "step": 7920 - }, - { - "epoch": 0.9524439367522395, - "grad_norm": 1.748837289110908, - "learning_rate": 2.3622220949728544e-08, - "loss": 0.9055, - "step": 7921 - }, - { - "epoch": 0.9525641796428787, - "grad_norm": 2.438389249131389, - "learning_rate": 2.3503000683225526e-08, - "loss": 0.8107, - "step": 7922 - }, - { - "epoch": 0.9526844225335177, - "grad_norm": 8.193672008710632, - "learning_rate": 2.3384080249745585e-08, - "loss": 1.0358, - "step": 7923 - }, - { - "epoch": 0.9528046654241568, - "grad_norm": 3.041018604235455, - "learning_rate": 2.3265459667329178e-08, - "loss": 1.0371, - "step": 7924 - }, - { - "epoch": 0.9529249083147959, - "grad_norm": 2.662248010307039, - "learning_rate": 2.31471389539708e-08, - "loss": 1.0631, - "step": 7925 - }, - { - "epoch": 0.953045151205435, - "grad_norm": 1.8351322437208633, - "learning_rate": 2.3029118127619872e-08, - "loss": 0.9265, - "step": 7926 - }, - { - "epoch": 0.953165394096074, - "grad_norm": 2.2772463848048883, - "learning_rate": 2.2911397206179628e-08, - "loss": 1.0667, - "step": 7927 - }, - { - "epoch": 0.9532856369867132, - "grad_norm": 1.897270817442259, - "learning_rate": 2.279397620750845e-08, - "loss": 0.829, - "step": 7928 - }, - { - "epoch": 0.9534058798773523, - "grad_norm": 2.0044931961290455, - "learning_rate": 2.2676855149419195e-08, - "loss": 0.9834, - "step": 7929 - }, - { - "epoch": 0.9535261227679913, - "grad_norm": 2.040229083643579, - "learning_rate": 2.2560034049678988e-08, - "loss": 0.9478, - "step": 7930 - }, - { - "epoch": 0.9536463656586305, - "grad_norm": 1.9372281529849624, - "learning_rate": 2.2443512926008988e-08, - "loss": 0.9502, - "step": 7931 - }, - { - "epoch": 0.9537666085492695, - "grad_norm": 2.4311566097101807, - "learning_rate": 2.2327291796085946e-08, - "loss": 0.8986, - "step": 7932 - }, - { - "epoch": 0.9538868514399086, - "grad_norm": 2.09846148672476, - "learning_rate": 2.2211370677540197e-08, - "loss": 0.9732, - "step": 7933 - }, - { - "epoch": 0.9540070943305478, - "grad_norm": 2.164461505338744, - "learning_rate": 2.2095749587957012e-08, - "loss": 0.9811, - "step": 7934 - }, - { - "epoch": 0.9541273372211868, - "grad_norm": 1.8918420326629528, - "learning_rate": 2.1980428544876138e-08, - "loss": 0.8881, - "step": 7935 - }, - { - "epoch": 0.9542475801118259, - "grad_norm": 1.6089230986260594, - "learning_rate": 2.1865407565791584e-08, - "loss": 0.9369, - "step": 7936 - }, - { - "epoch": 0.954367823002465, - "grad_norm": 1.7545360632315814, - "learning_rate": 2.175068666815183e-08, - "loss": 0.9693, - "step": 7937 - }, - { - "epoch": 0.9544880658931041, - "grad_norm": 1.9230529152765437, - "learning_rate": 2.163626586935985e-08, - "loss": 0.9882, - "step": 7938 - }, - { - "epoch": 0.9546083087837431, - "grad_norm": 1.886334366273617, - "learning_rate": 2.1522145186773755e-08, - "loss": 0.8254, - "step": 7939 - }, - { - "epoch": 0.9547285516743822, - "grad_norm": 1.6542640151326193, - "learning_rate": 2.140832463770481e-08, - "loss": 1.0499, - "step": 7940 - }, - { - "epoch": 0.9548487945650214, - "grad_norm": 2.1158781049553155, - "learning_rate": 2.129480423941987e-08, - "loss": 0.9516, - "step": 7941 - }, - { - "epoch": 0.9549690374556604, - "grad_norm": 1.5697697702697782, - "learning_rate": 2.1181584009140052e-08, - "loss": 1.0038, - "step": 7942 - }, - { - "epoch": 0.9550892803462995, - "grad_norm": 4.944007930465313, - "learning_rate": 2.10686639640405e-08, - "loss": 1.0317, - "step": 7943 - }, - { - "epoch": 0.9552095232369386, - "grad_norm": 1.5073118020878689, - "learning_rate": 2.0956044121251294e-08, - "loss": 1.0095, - "step": 7944 - }, - { - "epoch": 0.9553297661275777, - "grad_norm": 1.7006902527548717, - "learning_rate": 2.084372449785654e-08, - "loss": 1.0109, - "step": 7945 - }, - { - "epoch": 0.9554500090182168, - "grad_norm": 1.874837041767034, - "learning_rate": 2.0731705110895282e-08, - "loss": 0.8785, - "step": 7946 - }, - { - "epoch": 0.9555702519088559, - "grad_norm": 1.6623823253316161, - "learning_rate": 2.0619985977360587e-08, - "loss": 1.0696, - "step": 7947 - }, - { - "epoch": 0.955690494799495, - "grad_norm": 1.7290827106485311, - "learning_rate": 2.0508567114200237e-08, - "loss": 0.9715, - "step": 7948 - }, - { - "epoch": 0.955810737690134, - "grad_norm": 1.6852272411349472, - "learning_rate": 2.0397448538316485e-08, - "loss": 0.974, - "step": 7949 - }, - { - "epoch": 0.9559309805807732, - "grad_norm": 1.875113165937139, - "learning_rate": 2.028663026656563e-08, - "loss": 0.8589, - "step": 7950 - }, - { - "epoch": 0.9560512234714122, - "grad_norm": 2.3861184122163595, - "learning_rate": 2.0176112315758885e-08, - "loss": 0.9143, - "step": 7951 - }, - { - "epoch": 0.9561714663620513, - "grad_norm": 2.0457467034276164, - "learning_rate": 2.0065894702661957e-08, - "loss": 0.9039, - "step": 7952 - }, - { - "epoch": 0.9562917092526905, - "grad_norm": 1.6369680428761666, - "learning_rate": 1.9955977443994577e-08, - "loss": 0.9796, - "step": 7953 - }, - { - "epoch": 0.9564119521433295, - "grad_norm": 2.3689648090484607, - "learning_rate": 1.9846360556430965e-08, - "loss": 0.8342, - "step": 7954 - }, - { - "epoch": 0.9565321950339686, - "grad_norm": 2.0720283739216567, - "learning_rate": 1.973704405660004e-08, - "loss": 0.8164, - "step": 7955 - }, - { - "epoch": 0.9566524379246077, - "grad_norm": 1.509662580242987, - "learning_rate": 1.9628027961085203e-08, - "loss": 0.9723, - "step": 7956 - }, - { - "epoch": 0.9567726808152468, - "grad_norm": 1.661079132271245, - "learning_rate": 1.9519312286423894e-08, - "loss": 1.0331, - "step": 7957 - }, - { - "epoch": 0.9568929237058859, - "grad_norm": 1.7916555478246317, - "learning_rate": 1.9410897049108255e-08, - "loss": 0.9684, - "step": 7958 - }, - { - "epoch": 0.957013166596525, - "grad_norm": 1.605905960583898, - "learning_rate": 1.9302782265584905e-08, - "loss": 1.1105, - "step": 7959 - }, - { - "epoch": 0.9571334094871641, - "grad_norm": 1.878028458384731, - "learning_rate": 1.9194967952254282e-08, - "loss": 1.0618, - "step": 7960 - }, - { - "epoch": 0.9572536523778031, - "grad_norm": 2.376513751631649, - "learning_rate": 1.9087454125472635e-08, - "loss": 1.003, - "step": 7961 - }, - { - "epoch": 0.9573738952684423, - "grad_norm": 2.01719497927484, - "learning_rate": 1.8980240801548696e-08, - "loss": 0.9869, - "step": 7962 - }, - { - "epoch": 0.9574941381590814, - "grad_norm": 1.4850203065093888, - "learning_rate": 1.8873327996747458e-08, - "loss": 0.9376, - "step": 7963 - }, - { - "epoch": 0.9576143810497204, - "grad_norm": 1.7888531291670406, - "learning_rate": 1.8766715727287053e-08, - "loss": 0.865, - "step": 7964 - }, - { - "epoch": 0.9577346239403596, - "grad_norm": 1.649369830390487, - "learning_rate": 1.8660404009340546e-08, - "loss": 0.9887, - "step": 7965 - }, - { - "epoch": 0.9578548668309986, - "grad_norm": 0.9489772159667218, - "learning_rate": 1.8554392859035485e-08, - "loss": 0.8611, - "step": 7966 - }, - { - "epoch": 0.9579751097216377, - "grad_norm": 1.841931691094986, - "learning_rate": 1.8448682292453444e-08, - "loss": 0.9893, - "step": 7967 - }, - { - "epoch": 0.9580953526122769, - "grad_norm": 1.713923178950952, - "learning_rate": 1.8343272325631154e-08, - "loss": 0.8653, - "step": 7968 - }, - { - "epoch": 0.9582155955029159, - "grad_norm": 2.499683455454295, - "learning_rate": 1.8238162974558492e-08, - "loss": 0.9751, - "step": 7969 - }, - { - "epoch": 0.958335838393555, - "grad_norm": 2.189108012323266, - "learning_rate": 1.8133354255181144e-08, - "loss": 0.9427, - "step": 7970 - }, - { - "epoch": 0.958456081284194, - "grad_norm": 1.675478607806159, - "learning_rate": 1.802884618339795e-08, - "loss": 0.946, - "step": 7971 - }, - { - "epoch": 0.9585763241748332, - "grad_norm": 1.8861131031673188, - "learning_rate": 1.7924638775062894e-08, - "loss": 1.0121, - "step": 7972 - }, - { - "epoch": 0.9586965670654722, - "grad_norm": 1.9753890342472151, - "learning_rate": 1.7820732045984444e-08, - "loss": 1.0139, - "step": 7973 - }, - { - "epoch": 0.9588168099561113, - "grad_norm": 2.032093518515613, - "learning_rate": 1.7717126011924655e-08, - "loss": 0.9401, - "step": 7974 - }, - { - "epoch": 0.9589370528467505, - "grad_norm": 3.2042856486187894, - "learning_rate": 1.7613820688600957e-08, - "loss": 0.9648, - "step": 7975 - }, - { - "epoch": 0.9590572957373895, - "grad_norm": 1.7206047454318034, - "learning_rate": 1.7510816091684588e-08, - "loss": 0.9872, - "step": 7976 - }, - { - "epoch": 0.9591775386280286, - "grad_norm": 2.327691978789315, - "learning_rate": 1.740811223680083e-08, - "loss": 0.987, - "step": 7977 - }, - { - "epoch": 0.9592977815186677, - "grad_norm": 2.193748595843178, - "learning_rate": 1.7305709139530334e-08, - "loss": 0.9362, - "step": 7978 - }, - { - "epoch": 0.9594180244093068, - "grad_norm": 2.2942177069607785, - "learning_rate": 1.7203606815407334e-08, - "loss": 0.9512, - "step": 7979 - }, - { - "epoch": 0.9595382672999458, - "grad_norm": 1.6208700370391933, - "learning_rate": 1.7101805279920557e-08, - "loss": 0.9923, - "step": 7980 - }, - { - "epoch": 0.959658510190585, - "grad_norm": 1.9742526100917226, - "learning_rate": 1.7000304548513643e-08, - "loss": 1.0171, - "step": 7981 - }, - { - "epoch": 0.9597787530812241, - "grad_norm": 1.8534823094098216, - "learning_rate": 1.6899104636583394e-08, - "loss": 1.0197, - "step": 7982 - }, - { - "epoch": 0.9598989959718631, - "grad_norm": 0.7763715635932092, - "learning_rate": 1.6798205559482638e-08, - "loss": 0.8497, - "step": 7983 - }, - { - "epoch": 0.9600192388625023, - "grad_norm": 1.6681223313963074, - "learning_rate": 1.669760733251713e-08, - "loss": 0.9635, - "step": 7984 - }, - { - "epoch": 0.9601394817531413, - "grad_norm": 1.5218168588298815, - "learning_rate": 1.659730997094755e-08, - "loss": 1.0255, - "step": 7985 - }, - { - "epoch": 0.9602597246437804, - "grad_norm": 1.7067808059185292, - "learning_rate": 1.6497313489989283e-08, - "loss": 0.8193, - "step": 7986 - }, - { - "epoch": 0.9603799675344196, - "grad_norm": 2.4299163219973603, - "learning_rate": 1.639761790481131e-08, - "loss": 0.8979, - "step": 7987 - }, - { - "epoch": 0.9605002104250586, - "grad_norm": 2.122594144640597, - "learning_rate": 1.6298223230537754e-08, - "loss": 0.9875, - "step": 7988 - }, - { - "epoch": 0.9606204533156977, - "grad_norm": 1.864728897067374, - "learning_rate": 1.619912948224611e-08, - "loss": 0.8964, - "step": 7989 - }, - { - "epoch": 0.9607406962063368, - "grad_norm": 2.190069814466271, - "learning_rate": 1.6100336674969682e-08, - "loss": 0.7973, - "step": 7990 - }, - { - "epoch": 0.9608609390969759, - "grad_norm": 1.8266361401204578, - "learning_rate": 1.600184482369449e-08, - "loss": 0.972, - "step": 7991 - }, - { - "epoch": 0.960981181987615, - "grad_norm": 2.0860277935016183, - "learning_rate": 1.5903653943362126e-08, - "loss": 1.0946, - "step": 7992 - }, - { - "epoch": 0.9611014248782541, - "grad_norm": 1.7374895328782767, - "learning_rate": 1.580576404886802e-08, - "loss": 0.9617, - "step": 7993 - }, - { - "epoch": 0.9612216677688932, - "grad_norm": 1.9544936000848985, - "learning_rate": 1.570817515506162e-08, - "loss": 0.9995, - "step": 7994 - }, - { - "epoch": 0.9613419106595322, - "grad_norm": 1.845040532418982, - "learning_rate": 1.561088727674753e-08, - "loss": 1.0074, - "step": 7995 - }, - { - "epoch": 0.9614621535501714, - "grad_norm": 2.2652303417713924, - "learning_rate": 1.551390042868417e-08, - "loss": 0.9048, - "step": 7996 - }, - { - "epoch": 0.9615823964408104, - "grad_norm": 1.8282473270538082, - "learning_rate": 1.5417214625584207e-08, - "loss": 0.9066, - "step": 7997 - }, - { - "epoch": 0.9617026393314495, - "grad_norm": 1.4945221058655018, - "learning_rate": 1.5320829882114806e-08, - "loss": 1.0496, - "step": 7998 - }, - { - "epoch": 0.9618228822220887, - "grad_norm": 1.6827877797300783, - "learning_rate": 1.5224746212897378e-08, - "loss": 0.9769, - "step": 7999 - }, - { - "epoch": 0.9619431251127277, - "grad_norm": 1.5029794190267358, - "learning_rate": 1.512896363250804e-08, - "loss": 0.9644, - "step": 8000 - }, - { - "epoch": 0.9620633680033668, - "grad_norm": 1.7180486461870872, - "learning_rate": 1.503348215547673e-08, - "loss": 0.9535, - "step": 8001 - }, - { - "epoch": 0.962183610894006, - "grad_norm": 1.8637197046612788, - "learning_rate": 1.4938301796288078e-08, - "loss": 1.0002, - "step": 8002 - }, - { - "epoch": 0.962303853784645, - "grad_norm": 2.336628279087225, - "learning_rate": 1.4843422569380537e-08, - "loss": 1.0297, - "step": 8003 - }, - { - "epoch": 0.9624240966752841, - "grad_norm": 1.6603044511056784, - "learning_rate": 1.4748844489147483e-08, - "loss": 1.0239, - "step": 8004 - }, - { - "epoch": 0.9625443395659231, - "grad_norm": 1.9477506624636125, - "learning_rate": 1.4654567569936326e-08, - "loss": 0.9027, - "step": 8005 - }, - { - "epoch": 0.9626645824565623, - "grad_norm": 2.1222844443705897, - "learning_rate": 1.456059182604874e-08, - "loss": 1.0242, - "step": 8006 - }, - { - "epoch": 0.9627848253472013, - "grad_norm": 1.6324057916678236, - "learning_rate": 1.4466917271740653e-08, - "loss": 0.9574, - "step": 8007 - }, - { - "epoch": 0.9629050682378404, - "grad_norm": 1.7905585684445033, - "learning_rate": 1.4373543921222697e-08, - "loss": 0.8717, - "step": 8008 - }, - { - "epoch": 0.9630253111284796, - "grad_norm": 1.700117483006445, - "learning_rate": 1.428047178865932e-08, - "loss": 0.9803, - "step": 8009 - }, - { - "epoch": 0.9631455540191186, - "grad_norm": 1.491120840971979, - "learning_rate": 1.4187700888169451e-08, - "loss": 0.9406, - "step": 8010 - }, - { - "epoch": 0.9632657969097577, - "grad_norm": 0.8380818537815073, - "learning_rate": 1.40952312338265e-08, - "loss": 0.8431, - "step": 8011 - }, - { - "epoch": 0.9633860398003968, - "grad_norm": 1.5493195609160682, - "learning_rate": 1.4003062839657909e-08, - "loss": 0.882, - "step": 8012 - }, - { - "epoch": 0.9635062826910359, - "grad_norm": 1.5106171924293026, - "learning_rate": 1.391119571964583e-08, - "loss": 0.9974, - "step": 8013 - }, - { - "epoch": 0.9636265255816749, - "grad_norm": 1.7009036074893293, - "learning_rate": 1.3819629887726225e-08, - "loss": 0.9278, - "step": 8014 - }, - { - "epoch": 0.9637467684723141, - "grad_norm": 1.8275224393622862, - "learning_rate": 1.3728365357789317e-08, - "loss": 0.9635, - "step": 8015 - }, - { - "epoch": 0.9638670113629532, - "grad_norm": 2.865975687747757, - "learning_rate": 1.3637402143680254e-08, - "loss": 0.9668, - "step": 8016 - }, - { - "epoch": 0.9639872542535922, - "grad_norm": 0.7799962091965136, - "learning_rate": 1.3546740259197998e-08, - "loss": 0.7691, - "step": 8017 - }, - { - "epoch": 0.9641074971442314, - "grad_norm": 1.8762215767574821, - "learning_rate": 1.3456379718095989e-08, - "loss": 0.8979, - "step": 8018 - }, - { - "epoch": 0.9642277400348704, - "grad_norm": 0.9204324006243534, - "learning_rate": 1.3366320534081487e-08, - "loss": 0.8469, - "step": 8019 - }, - { - "epoch": 0.9643479829255095, - "grad_norm": 2.3366808266784322, - "learning_rate": 1.3276562720816675e-08, - "loss": 0.9468, - "step": 8020 - }, - { - "epoch": 0.9644682258161487, - "grad_norm": 2.1285559129743814, - "learning_rate": 1.3187106291917549e-08, - "loss": 1.0276, - "step": 8021 - }, - { - "epoch": 0.9645884687067877, - "grad_norm": 1.768375583555018, - "learning_rate": 1.309795126095503e-08, - "loss": 0.8982, - "step": 8022 - }, - { - "epoch": 0.9647087115974268, - "grad_norm": 3.2189885023325906, - "learning_rate": 1.3009097641453192e-08, - "loss": 1.0054, - "step": 8023 - }, - { - "epoch": 0.9648289544880659, - "grad_norm": 1.5342278621693195, - "learning_rate": 1.2920545446891474e-08, - "loss": 0.9556, - "step": 8024 - }, - { - "epoch": 0.964949197378705, - "grad_norm": 1.5308301700454836, - "learning_rate": 1.2832294690703127e-08, - "loss": 0.8917, - "step": 8025 - }, - { - "epoch": 0.965069440269344, - "grad_norm": 2.004181662626994, - "learning_rate": 1.2744345386275668e-08, - "loss": 0.973, - "step": 8026 - }, - { - "epoch": 0.9651896831599832, - "grad_norm": 1.8623740748904927, - "learning_rate": 1.265669754695109e-08, - "loss": 0.9767, - "step": 8027 - }, - { - "epoch": 0.9653099260506223, - "grad_norm": 1.9767346816023603, - "learning_rate": 1.2569351186025201e-08, - "loss": 1.014, - "step": 8028 - }, - { - "epoch": 0.9654301689412613, - "grad_norm": 1.3926289724346905, - "learning_rate": 1.2482306316748737e-08, - "loss": 0.9469, - "step": 8029 - }, - { - "epoch": 0.9655504118319005, - "grad_norm": 1.7343799487893923, - "learning_rate": 1.2395562952326021e-08, - "loss": 0.9845, - "step": 8030 - }, - { - "epoch": 0.9656706547225395, - "grad_norm": 2.3994809344377432, - "learning_rate": 1.2309121105916309e-08, - "loss": 1.0149, - "step": 8031 - }, - { - "epoch": 0.9657908976131786, - "grad_norm": 1.678352641714655, - "learning_rate": 1.222298079063222e-08, - "loss": 0.8862, - "step": 8032 - }, - { - "epoch": 0.9659111405038178, - "grad_norm": 1.8660372459112524, - "learning_rate": 1.2137142019541524e-08, - "loss": 0.9249, - "step": 8033 - }, - { - "epoch": 0.9660313833944568, - "grad_norm": 1.7899801241447832, - "learning_rate": 1.2051604805666027e-08, - "loss": 0.9333, - "step": 8034 - }, - { - "epoch": 0.9661516262850959, - "grad_norm": 2.09196111068173, - "learning_rate": 1.196636916198135e-08, - "loss": 0.9769, - "step": 8035 - }, - { - "epoch": 0.9662718691757349, - "grad_norm": 1.8094998884830826, - "learning_rate": 1.1881435101418036e-08, - "loss": 0.9705, - "step": 8036 - }, - { - "epoch": 0.9663921120663741, - "grad_norm": 0.8027592119147531, - "learning_rate": 1.1796802636860003e-08, - "loss": 0.8978, - "step": 8037 - }, - { - "epoch": 0.9665123549570132, - "grad_norm": 6.204711096321758, - "learning_rate": 1.1712471781146316e-08, - "loss": 0.9264, - "step": 8038 - }, - { - "epoch": 0.9666325978476522, - "grad_norm": 1.975377869975011, - "learning_rate": 1.1628442547069628e-08, - "loss": 0.8731, - "step": 8039 - }, - { - "epoch": 0.9667528407382914, - "grad_norm": 1.7530432534155989, - "learning_rate": 1.1544714947377521e-08, - "loss": 0.9644, - "step": 8040 - }, - { - "epoch": 0.9668730836289304, - "grad_norm": 1.9209857527141467, - "learning_rate": 1.1461288994770945e-08, - "loss": 0.8959, - "step": 8041 - }, - { - "epoch": 0.9669933265195695, - "grad_norm": 1.7994423479287374, - "learning_rate": 1.1378164701906002e-08, - "loss": 0.9757, - "step": 8042 - }, - { - "epoch": 0.9671135694102087, - "grad_norm": 1.753742643682124, - "learning_rate": 1.1295342081392156e-08, - "loss": 0.8658, - "step": 8043 - }, - { - "epoch": 0.9672338123008477, - "grad_norm": 1.690213882184529, - "learning_rate": 1.1212821145793804e-08, - "loss": 0.8894, - "step": 8044 - }, - { - "epoch": 0.9673540551914868, - "grad_norm": 1.9244777144313456, - "learning_rate": 1.1130601907629156e-08, - "loss": 0.986, - "step": 8045 - }, - { - "epoch": 0.9674742980821259, - "grad_norm": 0.8490628644249576, - "learning_rate": 1.1048684379370899e-08, - "loss": 0.8704, - "step": 8046 - }, - { - "epoch": 0.967594540972765, - "grad_norm": 1.8299846482805546, - "learning_rate": 1.0967068573445759e-08, - "loss": 0.9426, - "step": 8047 - }, - { - "epoch": 0.967714783863404, - "grad_norm": 2.0724241607741996, - "learning_rate": 1.0885754502234945e-08, - "loss": 0.8544, - "step": 8048 - }, - { - "epoch": 0.9678350267540432, - "grad_norm": 1.7635922503741714, - "learning_rate": 1.08047421780737e-08, - "loss": 0.9768, - "step": 8049 - }, - { - "epoch": 0.9679552696446823, - "grad_norm": 2.3420257329268197, - "learning_rate": 1.0724031613251305e-08, - "loss": 0.9432, - "step": 8050 - }, - { - "epoch": 0.9680755125353213, - "grad_norm": 1.925039152267416, - "learning_rate": 1.0643622820011744e-08, - "loss": 0.8554, - "step": 8051 - }, - { - "epoch": 0.9681957554259605, - "grad_norm": 1.9593570008736434, - "learning_rate": 1.0563515810552814e-08, - "loss": 0.8797, - "step": 8052 - }, - { - "epoch": 0.9683159983165995, - "grad_norm": 1.4899118056853913, - "learning_rate": 1.0483710597026795e-08, - "loss": 0.9331, - "step": 8053 - }, - { - "epoch": 0.9684362412072386, - "grad_norm": 2.2814813783961205, - "learning_rate": 1.0404207191540227e-08, - "loss": 0.9338, - "step": 8054 - }, - { - "epoch": 0.9685564840978778, - "grad_norm": 1.8197802637358365, - "learning_rate": 1.0325005606153236e-08, - "loss": 0.9546, - "step": 8055 - }, - { - "epoch": 0.9686767269885168, - "grad_norm": 2.6353550980403635, - "learning_rate": 1.0246105852881104e-08, - "loss": 0.9823, - "step": 8056 - }, - { - "epoch": 0.9687969698791559, - "grad_norm": 1.6358761314096106, - "learning_rate": 1.0167507943692476e-08, - "loss": 0.9864, - "step": 8057 - }, - { - "epoch": 0.968917212769795, - "grad_norm": 2.065112684375824, - "learning_rate": 1.008921189051093e-08, - "loss": 0.916, - "step": 8058 - }, - { - "epoch": 0.9690374556604341, - "grad_norm": 2.0347085705654675, - "learning_rate": 1.0011217705213848e-08, - "loss": 0.9715, - "step": 8059 - }, - { - "epoch": 0.9691576985510731, - "grad_norm": 1.6942892194567847, - "learning_rate": 9.933525399632658e-09, - "loss": 0.9547, - "step": 8060 - }, - { - "epoch": 0.9692779414417123, - "grad_norm": 3.456353658877282, - "learning_rate": 9.856134985553488e-09, - "loss": 0.8514, - "step": 8061 - }, - { - "epoch": 0.9693981843323514, - "grad_norm": 1.452438028034505, - "learning_rate": 9.77904647471628e-09, - "loss": 0.9278, - "step": 8062 - }, - { - "epoch": 0.9695184272229904, - "grad_norm": 1.381778783256513, - "learning_rate": 9.702259878815454e-09, - "loss": 0.9343, - "step": 8063 - }, - { - "epoch": 0.9696386701136296, - "grad_norm": 1.8973704663519122, - "learning_rate": 9.625775209499254e-09, - "loss": 0.9393, - "step": 8064 - }, - { - "epoch": 0.9697589130042686, - "grad_norm": 2.2964852969120235, - "learning_rate": 9.549592478370172e-09, - "loss": 0.9353, - "step": 8065 - }, - { - "epoch": 0.9698791558949077, - "grad_norm": 1.5859786155144144, - "learning_rate": 9.473711696985632e-09, - "loss": 0.9968, - "step": 8066 - }, - { - "epoch": 0.9699993987855468, - "grad_norm": 2.080786126940223, - "learning_rate": 9.398132876856201e-09, - "loss": 0.9623, - "step": 8067 - }, - { - "epoch": 0.9701196416761859, - "grad_norm": 0.8007152995422409, - "learning_rate": 9.322856029447379e-09, - "loss": 0.84, - "step": 8068 - }, - { - "epoch": 0.970239884566825, - "grad_norm": 1.7616654956829374, - "learning_rate": 9.247881166178695e-09, - "loss": 0.9935, - "step": 8069 - }, - { - "epoch": 0.970360127457464, - "grad_norm": 2.0413222379218876, - "learning_rate": 9.173208298423274e-09, - "loss": 0.9686, - "step": 8070 - }, - { - "epoch": 0.9704803703481032, - "grad_norm": 1.6727265658113244, - "learning_rate": 9.09883743750961e-09, - "loss": 0.961, - "step": 8071 - }, - { - "epoch": 0.9706006132387422, - "grad_norm": 1.4892578925461044, - "learning_rate": 9.024768594719124e-09, - "loss": 1.0357, - "step": 8072 - }, - { - "epoch": 0.9707208561293813, - "grad_norm": 4.237215897930365, - "learning_rate": 8.95100178128816e-09, - "loss": 0.9211, - "step": 8073 - }, - { - "epoch": 0.9708410990200205, - "grad_norm": 1.731268635453276, - "learning_rate": 8.877537008407321e-09, - "loss": 0.8958, - "step": 8074 - }, - { - "epoch": 0.9709613419106595, - "grad_norm": 1.5001316807485838, - "learning_rate": 8.804374287221028e-09, - "loss": 0.8803, - "step": 8075 - }, - { - "epoch": 0.9710815848012986, - "grad_norm": 1.9025987771416772, - "learning_rate": 8.731513628827958e-09, - "loss": 1.0457, - "step": 8076 - }, - { - "epoch": 0.9712018276919377, - "grad_norm": 1.8459763097679138, - "learning_rate": 8.658955044280825e-09, - "loss": 1.0231, - "step": 8077 - }, - { - "epoch": 0.9713220705825768, - "grad_norm": 1.604785931771058, - "learning_rate": 8.586698544587268e-09, - "loss": 0.9694, - "step": 8078 - }, - { - "epoch": 0.9714423134732159, - "grad_norm": 1.7782032659431786, - "learning_rate": 8.514744140707853e-09, - "loss": 0.9381, - "step": 8079 - }, - { - "epoch": 0.971562556363855, - "grad_norm": 1.510006272258742, - "learning_rate": 8.443091843558515e-09, - "loss": 0.9592, - "step": 8080 - }, - { - "epoch": 0.9716827992544941, - "grad_norm": 1.8525548923200192, - "learning_rate": 8.37174166400878e-09, - "loss": 0.8501, - "step": 8081 - }, - { - "epoch": 0.9718030421451331, - "grad_norm": 1.9322010920461592, - "learning_rate": 8.300693612881992e-09, - "loss": 1.0549, - "step": 8082 - }, - { - "epoch": 0.9719232850357723, - "grad_norm": 2.2613423879295187, - "learning_rate": 8.22994770095664e-09, - "loss": 0.9991, - "step": 8083 - }, - { - "epoch": 0.9720435279264114, - "grad_norm": 1.9967277699431043, - "learning_rate": 8.159503938964585e-09, - "loss": 0.9575, - "step": 8084 - }, - { - "epoch": 0.9721637708170504, - "grad_norm": 1.7236492037331457, - "learning_rate": 8.089362337592164e-09, - "loss": 0.8995, - "step": 8085 - }, - { - "epoch": 0.9722840137076896, - "grad_norm": 1.5114589734101294, - "learning_rate": 8.019522907479536e-09, - "loss": 0.92, - "step": 8086 - }, - { - "epoch": 0.9724042565983286, - "grad_norm": 2.517349789181092, - "learning_rate": 7.949985659221558e-09, - "loss": 0.9679, - "step": 8087 - }, - { - "epoch": 0.9725244994889677, - "grad_norm": 2.0011674334752096, - "learning_rate": 7.880750603366904e-09, - "loss": 0.9869, - "step": 8088 - }, - { - "epoch": 0.9726447423796069, - "grad_norm": 1.6521236191881115, - "learning_rate": 7.811817750418282e-09, - "loss": 0.9932, - "step": 8089 - }, - { - "epoch": 0.9727649852702459, - "grad_norm": 2.668210288154812, - "learning_rate": 7.743187110833105e-09, - "loss": 0.9972, - "step": 8090 - }, - { - "epoch": 0.972885228160885, - "grad_norm": 1.4399626825053684, - "learning_rate": 7.674858695022602e-09, - "loss": 1.0118, - "step": 8091 - }, - { - "epoch": 0.9730054710515241, - "grad_norm": 2.319251951604386, - "learning_rate": 7.606832513351591e-09, - "loss": 0.961, - "step": 8092 - }, - { - "epoch": 0.9731257139421632, - "grad_norm": 0.8179203212213968, - "learning_rate": 7.539108576140264e-09, - "loss": 0.8721, - "step": 8093 - }, - { - "epoch": 0.9732459568328022, - "grad_norm": 5.285810318740922, - "learning_rate": 7.471686893661732e-09, - "loss": 0.8929, - "step": 8094 - }, - { - "epoch": 0.9733661997234414, - "grad_norm": 2.1408131092146285, - "learning_rate": 7.4045674761442636e-09, - "loss": 0.8381, - "step": 8095 - }, - { - "epoch": 0.9734864426140805, - "grad_norm": 1.8968349018917778, - "learning_rate": 7.337750333769488e-09, - "loss": 0.9303, - "step": 8096 - }, - { - "epoch": 0.9736066855047195, - "grad_norm": 2.2670998869884422, - "learning_rate": 7.2712354766737425e-09, - "loss": 0.9289, - "step": 8097 - }, - { - "epoch": 0.9737269283953586, - "grad_norm": 1.6155024055880998, - "learning_rate": 7.2050229149469565e-09, - "loss": 1.0044, - "step": 8098 - }, - { - "epoch": 0.9738471712859977, - "grad_norm": 1.6697089165426577, - "learning_rate": 7.139112658633984e-09, - "loss": 0.8304, - "step": 8099 - }, - { - "epoch": 0.9739674141766368, - "grad_norm": 1.961233534637442, - "learning_rate": 7.073504717733048e-09, - "loss": 0.9, - "step": 8100 - }, - { - "epoch": 0.9740876570672758, - "grad_norm": 0.7548120620952428, - "learning_rate": 7.008199102196855e-09, - "loss": 0.787, - "step": 8101 - }, - { - "epoch": 0.974207899957915, - "grad_norm": 0.8640013578280623, - "learning_rate": 6.9431958219321464e-09, - "loss": 0.8156, - "step": 8102 - }, - { - "epoch": 0.9743281428485541, - "grad_norm": 1.6731016703629633, - "learning_rate": 6.878494886800146e-09, - "loss": 0.9768, - "step": 8103 - }, - { - "epoch": 0.9744483857391931, - "grad_norm": 1.890771276947192, - "learning_rate": 6.814096306615669e-09, - "loss": 0.9604, - "step": 8104 - }, - { - "epoch": 0.9745686286298323, - "grad_norm": 1.9576429678787342, - "learning_rate": 6.750000091148011e-09, - "loss": 0.8524, - "step": 8105 - }, - { - "epoch": 0.9746888715204713, - "grad_norm": 1.6459816813509855, - "learning_rate": 6.686206250120729e-09, - "loss": 0.9331, - "step": 8106 - }, - { - "epoch": 0.9748091144111104, - "grad_norm": 1.7621080598549321, - "learning_rate": 6.622714793210749e-09, - "loss": 0.9435, - "step": 8107 - }, - { - "epoch": 0.9749293573017496, - "grad_norm": 1.5148317286391644, - "learning_rate": 6.559525730050364e-09, - "loss": 0.9782, - "step": 8108 - }, - { - "epoch": 0.9750496001923886, - "grad_norm": 1.920758302868513, - "learning_rate": 6.496639070224574e-09, - "loss": 0.9606, - "step": 8109 - }, - { - "epoch": 0.9751698430830277, - "grad_norm": 2.55490904614427, - "learning_rate": 6.4340548232739714e-09, - "loss": 1.0344, - "step": 8110 - }, - { - "epoch": 0.9752900859736668, - "grad_norm": 1.819876229720267, - "learning_rate": 6.371772998692071e-09, - "loss": 0.9929, - "step": 8111 - }, - { - "epoch": 0.9754103288643059, - "grad_norm": 2.4041177789840185, - "learning_rate": 6.309793605927094e-09, - "loss": 0.8507, - "step": 8112 - }, - { - "epoch": 0.975530571754945, - "grad_norm": 1.8325417138903533, - "learning_rate": 6.248116654381297e-09, - "loss": 0.9959, - "step": 8113 - }, - { - "epoch": 0.9756508146455841, - "grad_norm": 2.6397253908039335, - "learning_rate": 6.186742153410751e-09, - "loss": 0.9224, - "step": 8114 - }, - { - "epoch": 0.9757710575362232, - "grad_norm": 1.9898917340295625, - "learning_rate": 6.125670112326453e-09, - "loss": 1.0664, - "step": 8115 - }, - { - "epoch": 0.9758913004268622, - "grad_norm": 1.6034102039322236, - "learning_rate": 6.064900540392548e-09, - "loss": 0.9089, - "step": 8116 - }, - { - "epoch": 0.9760115433175014, - "grad_norm": 1.854963824184176, - "learning_rate": 6.0044334468278835e-09, - "loss": 0.9921, - "step": 8117 - }, - { - "epoch": 0.9761317862081405, - "grad_norm": 1.6410094037899436, - "learning_rate": 5.944268840805345e-09, - "loss": 0.9169, - "step": 8118 - }, - { - "epoch": 0.9762520290987795, - "grad_norm": 1.9180081551848815, - "learning_rate": 5.88440673145163e-09, - "loss": 0.8384, - "step": 8119 - }, - { - "epoch": 0.9763722719894187, - "grad_norm": 2.0403468738429225, - "learning_rate": 5.824847127848142e-09, - "loss": 1.03, - "step": 8120 - }, - { - "epoch": 0.9764925148800577, - "grad_norm": 1.7167774325193168, - "learning_rate": 5.765590039029433e-09, - "loss": 0.9755, - "step": 8121 - }, - { - "epoch": 0.9766127577706968, - "grad_norm": 1.8719981004998767, - "learning_rate": 5.706635473985422e-09, - "loss": 0.9102, - "step": 8122 - }, - { - "epoch": 0.976733000661336, - "grad_norm": 1.910938812529892, - "learning_rate": 5.6479834416591764e-09, - "loss": 1.0522, - "step": 8123 - }, - { - "epoch": 0.976853243551975, - "grad_norm": 1.6359052070119269, - "learning_rate": 5.589633950947803e-09, - "loss": 0.8754, - "step": 8124 - }, - { - "epoch": 0.9769734864426141, - "grad_norm": 1.9100139541515688, - "learning_rate": 5.5315870107035535e-09, - "loss": 0.8981, - "step": 8125 - }, - { - "epoch": 0.9770937293332532, - "grad_norm": 1.6719477272258494, - "learning_rate": 5.473842629731607e-09, - "loss": 0.9801, - "step": 8126 - }, - { - "epoch": 0.9772139722238923, - "grad_norm": 1.86109376854985, - "learning_rate": 5.416400816792066e-09, - "loss": 0.9864, - "step": 8127 - }, - { - "epoch": 0.9773342151145313, - "grad_norm": 2.305309767688499, - "learning_rate": 5.359261580598407e-09, - "loss": 0.9814, - "step": 8128 - }, - { - "epoch": 0.9774544580051704, - "grad_norm": 2.3016471852392635, - "learning_rate": 5.302424929819027e-09, - "loss": 0.9885, - "step": 8129 - }, - { - "epoch": 0.9775747008958096, - "grad_norm": 3.3032984057394574, - "learning_rate": 5.24589087307592e-09, - "loss": 0.9309, - "step": 8130 - }, - { - "epoch": 0.9776949437864486, - "grad_norm": 1.5415406390391018, - "learning_rate": 5.189659418944891e-09, - "loss": 0.8504, - "step": 8131 - }, - { - "epoch": 0.9778151866770877, - "grad_norm": 1.867096775037477, - "learning_rate": 5.133730575956674e-09, - "loss": 0.969, - "step": 8132 - }, - { - "epoch": 0.9779354295677268, - "grad_norm": 2.1272943116214162, - "learning_rate": 5.0781043525953696e-09, - "loss": 0.9227, - "step": 8133 - }, - { - "epoch": 0.9780556724583659, - "grad_norm": 1.5598108420253336, - "learning_rate": 5.0227807572995605e-09, - "loss": 0.9328, - "step": 8134 - }, - { - "epoch": 0.9781759153490049, - "grad_norm": 1.9412457344121303, - "learning_rate": 4.967759798461646e-09, - "loss": 0.8744, - "step": 8135 - }, - { - "epoch": 0.9782961582396441, - "grad_norm": 1.8923862545042363, - "learning_rate": 4.913041484428282e-09, - "loss": 0.936, - "step": 8136 - }, - { - "epoch": 0.9784164011302832, - "grad_norm": 1.651139417897409, - "learning_rate": 4.858625823500384e-09, - "loss": 0.9442, - "step": 8137 - }, - { - "epoch": 0.9785366440209222, - "grad_norm": 1.7601030811553977, - "learning_rate": 4.80451282393246e-09, - "loss": 0.9331, - "step": 8138 - }, - { - "epoch": 0.9786568869115614, - "grad_norm": 2.1358260219814476, - "learning_rate": 4.750702493933722e-09, - "loss": 0.875, - "step": 8139 - }, - { - "epoch": 0.9787771298022004, - "grad_norm": 1.7491281244842576, - "learning_rate": 4.697194841666974e-09, - "loss": 1.0454, - "step": 8140 - }, - { - "epoch": 0.9788973726928395, - "grad_norm": 1.6414827556837237, - "learning_rate": 4.6439898752492764e-09, - "loss": 1.0149, - "step": 8141 - }, - { - "epoch": 0.9790176155834787, - "grad_norm": 0.8030561929758361, - "learning_rate": 4.591087602751731e-09, - "loss": 0.8349, - "step": 8142 - }, - { - "epoch": 0.9791378584741177, - "grad_norm": 1.5904250523292243, - "learning_rate": 4.538488032199916e-09, - "loss": 0.9205, - "step": 8143 - }, - { - "epoch": 0.9792581013647568, - "grad_norm": 1.943771241982723, - "learning_rate": 4.486191171572784e-09, - "loss": 0.8831, - "step": 8144 - }, - { - "epoch": 0.9793783442553959, - "grad_norm": 1.391930042783507, - "learning_rate": 4.434197028803766e-09, - "loss": 0.9758, - "step": 8145 - }, - { - "epoch": 0.979498587146035, - "grad_norm": 1.987655211332122, - "learning_rate": 4.3825056117805514e-09, - "loss": 1.0174, - "step": 8146 - }, - { - "epoch": 0.979618830036674, - "grad_norm": 2.3800370859414235, - "learning_rate": 4.331116928344425e-09, - "loss": 1.0025, - "step": 8147 - }, - { - "epoch": 0.9797390729273132, - "grad_norm": 2.723831076533563, - "learning_rate": 4.28003098629115e-09, - "loss": 0.8344, - "step": 8148 - }, - { - "epoch": 0.9798593158179523, - "grad_norm": 2.0243085363535878, - "learning_rate": 4.229247793370305e-09, - "loss": 0.9931, - "step": 8149 - }, - { - "epoch": 0.9799795587085913, - "grad_norm": 1.54056827835373, - "learning_rate": 4.178767357285951e-09, - "loss": 0.9023, - "step": 8150 - }, - { - "epoch": 0.9800998015992305, - "grad_norm": 1.7964324406173455, - "learning_rate": 4.128589685695516e-09, - "loss": 0.9034, - "step": 8151 - }, - { - "epoch": 0.9802200444898695, - "grad_norm": 1.83508625181215, - "learning_rate": 4.078714786211135e-09, - "loss": 1.042, - "step": 8152 - }, - { - "epoch": 0.9803402873805086, - "grad_norm": 1.5935932157103967, - "learning_rate": 4.029142666398977e-09, - "loss": 0.9625, - "step": 8153 - }, - { - "epoch": 0.9804605302711478, - "grad_norm": 1.930199929589802, - "learning_rate": 3.979873333778805e-09, - "loss": 0.9991, - "step": 8154 - }, - { - "epoch": 0.9805807731617868, - "grad_norm": 1.9580125613642536, - "learning_rate": 3.930906795824862e-09, - "loss": 0.9441, - "step": 8155 - }, - { - "epoch": 0.9807010160524259, - "grad_norm": 1.937635601589819, - "learning_rate": 3.882243059965207e-09, - "loss": 0.9677, - "step": 8156 - }, - { - "epoch": 0.980821258943065, - "grad_norm": 2.4325241751023308, - "learning_rate": 3.833882133582156e-09, - "loss": 0.8699, - "step": 8157 - }, - { - "epoch": 0.9809415018337041, - "grad_norm": 1.5762068166724064, - "learning_rate": 3.785824024012285e-09, - "loss": 0.9733, - "step": 8158 - }, - { - "epoch": 0.9810617447243432, - "grad_norm": 1.7323370980525306, - "learning_rate": 3.738068738545541e-09, - "loss": 0.9806, - "step": 8159 - }, - { - "epoch": 0.9811819876149822, - "grad_norm": 2.2453953992476254, - "learning_rate": 3.6906162844265733e-09, - "loss": 0.9764, - "step": 8160 - }, - { - "epoch": 0.9813022305056214, - "grad_norm": 1.8032748155154323, - "learning_rate": 3.643466668853845e-09, - "loss": 0.9094, - "step": 8161 - }, - { - "epoch": 0.9814224733962604, - "grad_norm": 1.885656654160497, - "learning_rate": 3.59661989898008e-09, - "loss": 0.9516, - "step": 8162 - }, - { - "epoch": 0.9815427162868995, - "grad_norm": 2.7934551108983303, - "learning_rate": 3.5500759819115934e-09, - "loss": 0.9599, - "step": 8163 - }, - { - "epoch": 0.9816629591775387, - "grad_norm": 1.7759889909944326, - "learning_rate": 3.5038349247094034e-09, - "loss": 1.0173, - "step": 8164 - }, - { - "epoch": 0.9817832020681777, - "grad_norm": 1.8205140620282845, - "learning_rate": 3.4578967343878994e-09, - "loss": 0.9644, - "step": 8165 - }, - { - "epoch": 0.9819034449588168, - "grad_norm": 1.6202421962159865, - "learning_rate": 3.4122614179161733e-09, - "loss": 1.0003, - "step": 8166 - }, - { - "epoch": 0.9820236878494559, - "grad_norm": 1.6134330179350604, - "learning_rate": 3.36692898221691e-09, - "loss": 0.9724, - "step": 8167 - }, - { - "epoch": 0.982143930740095, - "grad_norm": 2.366956239992834, - "learning_rate": 3.3218994341668305e-09, - "loss": 0.9321, - "step": 8168 - }, - { - "epoch": 0.982264173630734, - "grad_norm": 1.3672378094817272, - "learning_rate": 3.2771727805971373e-09, - "loss": 0.952, - "step": 8169 - }, - { - "epoch": 0.9823844165213732, - "grad_norm": 2.3765539807449727, - "learning_rate": 3.232749028292847e-09, - "loss": 0.9662, - "step": 8170 - }, - { - "epoch": 0.9825046594120123, - "grad_norm": 1.6172904014099247, - "learning_rate": 3.188628183992792e-09, - "loss": 1.0826, - "step": 8171 - }, - { - "epoch": 0.9826249023026513, - "grad_norm": 0.7968709608985411, - "learning_rate": 3.1448102543902844e-09, - "loss": 0.8514, - "step": 8172 - }, - { - "epoch": 0.9827451451932905, - "grad_norm": 2.0168346944540145, - "learning_rate": 3.1012952461324515e-09, - "loss": 0.8704, - "step": 8173 - }, - { - "epoch": 0.9828653880839295, - "grad_norm": 1.9197897414475336, - "learning_rate": 3.0580831658204575e-09, - "loss": 0.9419, - "step": 8174 - }, - { - "epoch": 0.9829856309745686, - "grad_norm": 1.5903755816832101, - "learning_rate": 3.015174020009281e-09, - "loss": 0.9785, - "step": 8175 - }, - { - "epoch": 0.9831058738652078, - "grad_norm": 1.6572316697356113, - "learning_rate": 2.9725678152086043e-09, - "loss": 0.9468, - "step": 8176 - }, - { - "epoch": 0.9832261167558468, - "grad_norm": 2.6460913785119096, - "learning_rate": 2.930264557881257e-09, - "loss": 1.019, - "step": 8177 - }, - { - "epoch": 0.9833463596464859, - "grad_norm": 0.8311901228714399, - "learning_rate": 2.8882642544452163e-09, - "loss": 0.825, - "step": 8178 - }, - { - "epoch": 0.983466602537125, - "grad_norm": 2.1273321929628923, - "learning_rate": 2.8465669112716083e-09, - "loss": 0.9459, - "step": 8179 - }, - { - "epoch": 0.9835868454277641, - "grad_norm": 1.974838952850134, - "learning_rate": 2.8051725346858177e-09, - "loss": 0.9513, - "step": 8180 - }, - { - "epoch": 0.9837070883184031, - "grad_norm": 1.8313696343724322, - "learning_rate": 2.7640811309674883e-09, - "loss": 0.8987, - "step": 8181 - }, - { - "epoch": 0.9838273312090423, - "grad_norm": 1.5269101202942998, - "learning_rate": 2.7232927063498557e-09, - "loss": 1.0021, - "step": 8182 - }, - { - "epoch": 0.9839475740996814, - "grad_norm": 1.814496026491769, - "learning_rate": 2.682807267020859e-09, - "loss": 0.8814, - "step": 8183 - }, - { - "epoch": 0.9840678169903204, - "grad_norm": 1.4505922127584492, - "learning_rate": 2.642624819121808e-09, - "loss": 0.8272, - "step": 8184 - }, - { - "epoch": 0.9841880598809596, - "grad_norm": 2.0352229072652164, - "learning_rate": 2.6027453687487154e-09, - "loss": 0.8126, - "step": 8185 - }, - { - "epoch": 0.9843083027715986, - "grad_norm": 2.719643106327273, - "learning_rate": 2.5631689219509643e-09, - "loss": 0.7317, - "step": 8186 - }, - { - "epoch": 0.9844285456622377, - "grad_norm": 1.5766249221829562, - "learning_rate": 2.523895484732197e-09, - "loss": 1.0322, - "step": 8187 - }, - { - "epoch": 0.9845487885528769, - "grad_norm": 1.8019999519712249, - "learning_rate": 2.4849250630505357e-09, - "loss": 0.946, - "step": 8188 - }, - { - "epoch": 0.9846690314435159, - "grad_norm": 1.6511521247312486, - "learning_rate": 2.4462576628172528e-09, - "loss": 0.9328, - "step": 8189 - }, - { - "epoch": 0.984789274334155, - "grad_norm": 1.8186597401877664, - "learning_rate": 2.407893289898766e-09, - "loss": 0.9354, - "step": 8190 - }, - { - "epoch": 0.984909517224794, - "grad_norm": 1.8759536225456803, - "learning_rate": 2.3698319501144202e-09, - "loss": 1.0344, - "step": 8191 - }, - { - "epoch": 0.9850297601154332, - "grad_norm": 1.6239000412334026, - "learning_rate": 2.3320736492382644e-09, - "loss": 0.9275, - "step": 8192 - }, - { - "epoch": 0.9851500030060723, - "grad_norm": 1.5010432747861608, - "learning_rate": 2.29461839299816e-09, - "loss": 0.8829, - "step": 8193 - }, - { - "epoch": 0.9852702458967113, - "grad_norm": 2.206465116825457, - "learning_rate": 2.257466187076229e-09, - "loss": 0.999, - "step": 8194 - }, - { - "epoch": 0.9853904887873505, - "grad_norm": 1.675771504264894, - "learning_rate": 2.2206170371081854e-09, - "loss": 0.9027, - "step": 8195 - }, - { - "epoch": 0.9855107316779895, - "grad_norm": 1.582672215352111, - "learning_rate": 2.1840709486842247e-09, - "loss": 1.0448, - "step": 8196 - }, - { - "epoch": 0.9856309745686286, - "grad_norm": 2.125324224533096, - "learning_rate": 2.1478279273481335e-09, - "loss": 0.9953, - "step": 8197 - }, - { - "epoch": 0.9857512174592677, - "grad_norm": 3.8391226851108846, - "learning_rate": 2.1118879785981815e-09, - "loss": 1.0048, - "step": 8198 - }, - { - "epoch": 0.9858714603499068, - "grad_norm": 1.743877463655589, - "learning_rate": 2.0762511078862288e-09, - "loss": 0.9914, - "step": 8199 - }, - { - "epoch": 0.9859917032405459, - "grad_norm": 3.1210621246148826, - "learning_rate": 2.0409173206186183e-09, - "loss": 0.8505, - "step": 8200 - }, - { - "epoch": 0.986111946131185, - "grad_norm": 1.8455012135342783, - "learning_rate": 2.0058866221550617e-09, - "loss": 1.0637, - "step": 8201 - }, - { - "epoch": 0.9862321890218241, - "grad_norm": 1.9331430237377238, - "learning_rate": 1.971159017809976e-09, - "loss": 0.9495, - "step": 8202 - }, - { - "epoch": 0.9863524319124631, - "grad_norm": 6.0597996941123125, - "learning_rate": 1.93673451285159e-09, - "loss": 0.9715, - "step": 8203 - }, - { - "epoch": 0.9864726748031023, - "grad_norm": 0.8224375855322396, - "learning_rate": 1.9026131125019495e-09, - "loss": 0.7927, - "step": 8204 - }, - { - "epoch": 0.9865929176937414, - "grad_norm": 1.7268562584825429, - "learning_rate": 1.8687948219371363e-09, - "loss": 1.0585, - "step": 8205 - }, - { - "epoch": 0.9867131605843804, - "grad_norm": 1.9862513644560058, - "learning_rate": 1.835279646287491e-09, - "loss": 1.0842, - "step": 8206 - }, - { - "epoch": 0.9868334034750196, - "grad_norm": 1.697377858550077, - "learning_rate": 1.8020675906371685e-09, - "loss": 0.9721, - "step": 8207 - }, - { - "epoch": 0.9869536463656586, - "grad_norm": 2.1406704417507956, - "learning_rate": 1.7691586600243612e-09, - "loss": 0.9408, - "step": 8208 - }, - { - "epoch": 0.9870738892562977, - "grad_norm": 2.5259122730162504, - "learning_rate": 1.7365528594415202e-09, - "loss": 1.0546, - "step": 8209 - }, - { - "epoch": 0.9871941321469369, - "grad_norm": 1.5975740966567002, - "learning_rate": 1.7042501938346888e-09, - "loss": 0.8723, - "step": 8210 - }, - { - "epoch": 0.9873143750375759, - "grad_norm": 1.8419852135196284, - "learning_rate": 1.6722506681043913e-09, - "loss": 0.9677, - "step": 8211 - }, - { - "epoch": 0.987434617928215, - "grad_norm": 2.100756636050138, - "learning_rate": 1.640554287104745e-09, - "loss": 0.8875, - "step": 8212 - }, - { - "epoch": 0.9875548608188541, - "grad_norm": 2.1062584353314837, - "learning_rate": 1.609161055644348e-09, - "loss": 0.9912, - "step": 8213 - }, - { - "epoch": 0.9876751037094932, - "grad_norm": 1.876324249567265, - "learning_rate": 1.5780709784849467e-09, - "loss": 0.8707, - "step": 8214 - }, - { - "epoch": 0.9877953466001322, - "grad_norm": 1.844168922548197, - "learning_rate": 1.5472840603436565e-09, - "loss": 1.0139, - "step": 8215 - }, - { - "epoch": 0.9879155894907714, - "grad_norm": 1.8426546625573486, - "learning_rate": 1.5168003058900757e-09, - "loss": 0.9899, - "step": 8216 - }, - { - "epoch": 0.9880358323814105, - "grad_norm": 1.8244865923342106, - "learning_rate": 1.4866197197491715e-09, - "loss": 1.1203, - "step": 8217 - }, - { - "epoch": 0.9881560752720495, - "grad_norm": 2.70561914645571, - "learning_rate": 1.4567423064988371e-09, - "loss": 0.977, - "step": 8218 - }, - { - "epoch": 0.9882763181626887, - "grad_norm": 1.8773290791535582, - "learning_rate": 1.4271680706718913e-09, - "loss": 0.9735, - "step": 8219 - }, - { - "epoch": 0.9883965610533277, - "grad_norm": 1.6958974365315773, - "learning_rate": 1.3978970167543013e-09, - "loss": 1.0247, - "step": 8220 - }, - { - "epoch": 0.9885168039439668, - "grad_norm": 1.9554522267892107, - "learning_rate": 1.3689291491867372e-09, - "loss": 0.9767, - "step": 8221 - }, - { - "epoch": 0.988637046834606, - "grad_norm": 1.9180957884194016, - "learning_rate": 1.3402644723636836e-09, - "loss": 0.9356, - "step": 8222 - }, - { - "epoch": 0.988757289725245, - "grad_norm": 1.8304183790217858, - "learning_rate": 1.311902990633218e-09, - "loss": 1.0282, - "step": 8223 - }, - { - "epoch": 0.9888775326158841, - "grad_norm": 2.8167591482325234, - "learning_rate": 1.2838447082978987e-09, - "loss": 0.9055, - "step": 8224 - }, - { - "epoch": 0.9889977755065231, - "grad_norm": 2.2863864293591996, - "learning_rate": 1.2560896296143208e-09, - "loss": 1.032, - "step": 8225 - }, - { - "epoch": 0.9891180183971623, - "grad_norm": 2.011481943359043, - "learning_rate": 1.2286377587926722e-09, - "loss": 1.0149, - "step": 8226 - }, - { - "epoch": 0.9892382612878013, - "grad_norm": 1.8599349388926087, - "learning_rate": 1.2014890999973992e-09, - "loss": 0.9516, - "step": 8227 - }, - { - "epoch": 0.9893585041784404, - "grad_norm": 1.5409916439535352, - "learning_rate": 1.1746436573472073e-09, - "loss": 0.9803, - "step": 8228 - }, - { - "epoch": 0.9894787470690796, - "grad_norm": 2.0944432491610505, - "learning_rate": 1.1481014349141726e-09, - "loss": 0.8924, - "step": 8229 - }, - { - "epoch": 0.9895989899597186, - "grad_norm": 1.6069041423017076, - "learning_rate": 1.121862436724852e-09, - "loss": 1.0374, - "step": 8230 - }, - { - "epoch": 0.9897192328503577, - "grad_norm": 1.6451696755927239, - "learning_rate": 1.0959266667598388e-09, - "loss": 0.9104, - "step": 8231 - }, - { - "epoch": 0.9898394757409968, - "grad_norm": 3.6513912162388023, - "learning_rate": 1.0702941289533196e-09, - "loss": 0.9406, - "step": 8232 - }, - { - "epoch": 0.9899597186316359, - "grad_norm": 1.952201563926612, - "learning_rate": 1.0449648271939615e-09, - "loss": 1.0805, - "step": 8233 - }, - { - "epoch": 0.990079961522275, - "grad_norm": 1.4283458480524105, - "learning_rate": 1.0199387653240243e-09, - "loss": 0.9271, - "step": 8234 - }, - { - "epoch": 0.9902002044129141, - "grad_norm": 1.5114752206137212, - "learning_rate": 9.952159471400267e-10, - "loss": 0.8971, - "step": 8235 - }, - { - "epoch": 0.9903204473035532, - "grad_norm": 1.7453585790222177, - "learning_rate": 9.707963763923022e-10, - "loss": 1.0321, - "step": 8236 - }, - { - "epoch": 0.9904406901941922, - "grad_norm": 1.7235245019528405, - "learning_rate": 9.466800567854427e-10, - "loss": 0.9816, - "step": 8237 - }, - { - "epoch": 0.9905609330848314, - "grad_norm": 1.7319628463266492, - "learning_rate": 9.228669919778553e-10, - "loss": 0.8865, - "step": 8238 - }, - { - "epoch": 0.9906811759754705, - "grad_norm": 2.06755140911797, - "learning_rate": 8.993571855817617e-10, - "loss": 0.9855, - "step": 8239 - }, - { - "epoch": 0.9908014188661095, - "grad_norm": 1.9545786826553373, - "learning_rate": 8.761506411638642e-10, - "loss": 0.9395, - "step": 8240 - }, - { - "epoch": 0.9909216617567487, - "grad_norm": 1.6507494293771139, - "learning_rate": 8.53247362244236e-10, - "loss": 0.9445, - "step": 8241 - }, - { - "epoch": 0.9910419046473877, - "grad_norm": 1.6470920803314848, - "learning_rate": 8.306473522976532e-10, - "loss": 0.8886, - "step": 8242 - }, - { - "epoch": 0.9911621475380268, - "grad_norm": 1.8303498643471219, - "learning_rate": 8.083506147522623e-10, - "loss": 0.9181, - "step": 8243 - }, - { - "epoch": 0.991282390428666, - "grad_norm": 2.2431420610384345, - "learning_rate": 7.863571529906909e-10, - "loss": 1.0545, - "step": 8244 - }, - { - "epoch": 0.991402633319305, - "grad_norm": 0.8055173798171393, - "learning_rate": 7.646669703489372e-10, - "loss": 0.8459, - "step": 8245 - }, - { - "epoch": 0.9915228762099441, - "grad_norm": 1.68263043593794, - "learning_rate": 7.432800701177023e-10, - "loss": 0.7789, - "step": 8246 - }, - { - "epoch": 0.9916431191005832, - "grad_norm": 0.8359897632839117, - "learning_rate": 7.221964555415017e-10, - "loss": 0.7979, - "step": 8247 - }, - { - "epoch": 0.9917633619912223, - "grad_norm": 1.69437590010509, - "learning_rate": 7.01416129818222e-10, - "loss": 0.9455, - "step": 8248 - }, - { - "epoch": 0.9918836048818613, - "grad_norm": 1.7555799081510954, - "learning_rate": 6.809390961006745e-10, - "loss": 0.7836, - "step": 8249 - }, - { - "epoch": 0.9920038477725005, - "grad_norm": 1.924818994013137, - "learning_rate": 6.607653574948191e-10, - "loss": 0.8926, - "step": 8250 - }, - { - "epoch": 0.9921240906631396, - "grad_norm": 1.7129297371621364, - "learning_rate": 6.408949170613187e-10, - "loss": 1.0166, - "step": 8251 - }, - { - "epoch": 0.9922443335537786, - "grad_norm": 1.6008502399849853, - "learning_rate": 6.213277778144288e-10, - "loss": 1.0189, - "step": 8252 - }, - { - "epoch": 0.9923645764444178, - "grad_norm": 2.045034848128465, - "learning_rate": 6.020639427224416e-10, - "loss": 0.877, - "step": 8253 - }, - { - "epoch": 0.9924848193350568, - "grad_norm": 1.917667714907349, - "learning_rate": 5.831034147076864e-10, - "loss": 0.9228, - "step": 8254 - }, - { - "epoch": 0.9926050622256959, - "grad_norm": 0.7266952229351333, - "learning_rate": 5.644461966463065e-10, - "loss": 0.7706, - "step": 8255 - }, - { - "epoch": 0.9927253051163349, - "grad_norm": 1.8785205849724627, - "learning_rate": 5.460922913687049e-10, - "loss": 0.9556, - "step": 8256 - }, - { - "epoch": 0.9928455480069741, - "grad_norm": 2.1327027233224265, - "learning_rate": 5.280417016593208e-10, - "loss": 0.9463, - "step": 8257 - }, - { - "epoch": 0.9929657908976132, - "grad_norm": 1.6706768270174972, - "learning_rate": 5.102944302559642e-10, - "loss": 0.9486, - "step": 8258 - }, - { - "epoch": 0.9930860337882522, - "grad_norm": 1.9456797180422079, - "learning_rate": 4.9285047985137e-10, - "loss": 0.986, - "step": 8259 - }, - { - "epoch": 0.9932062766788914, - "grad_norm": 1.8034735886448616, - "learning_rate": 4.757098530916436e-10, - "loss": 0.9456, - "step": 8260 - }, - { - "epoch": 0.9933265195695304, - "grad_norm": 2.380261165330276, - "learning_rate": 4.5887255257670563e-10, - "loss": 0.9858, - "step": 8261 - }, - { - "epoch": 0.9934467624601695, - "grad_norm": 1.8945516998131169, - "learning_rate": 4.4233858086117906e-10, - "loss": 0.963, - "step": 8262 - }, - { - "epoch": 0.9935670053508087, - "grad_norm": 2.035574315452936, - "learning_rate": 4.261079404528356e-10, - "loss": 0.8744, - "step": 8263 - }, - { - "epoch": 0.9936872482414477, - "grad_norm": 1.7017758126271718, - "learning_rate": 4.1018063381437205e-10, - "loss": 0.8883, - "step": 8264 - }, - { - "epoch": 0.9938074911320868, - "grad_norm": 0.9407012959204225, - "learning_rate": 3.9455666336141167e-10, - "loss": 0.8571, - "step": 8265 - }, - { - "epoch": 0.9939277340227259, - "grad_norm": 2.317111494961826, - "learning_rate": 3.7923603146450267e-10, - "loss": 1.0235, - "step": 8266 - }, - { - "epoch": 0.994047976913365, - "grad_norm": 1.9878141382861632, - "learning_rate": 3.642187404473418e-10, - "loss": 1.005, - "step": 8267 - }, - { - "epoch": 0.994168219804004, - "grad_norm": 2.4020902670511806, - "learning_rate": 3.495047925885508e-10, - "loss": 1.0498, - "step": 8268 - }, - { - "epoch": 0.9942884626946432, - "grad_norm": 1.8662602504441055, - "learning_rate": 3.350941901199e-10, - "loss": 1.0359, - "step": 8269 - }, - { - "epoch": 0.9944087055852823, - "grad_norm": 2.1939087856460913, - "learning_rate": 3.2098693522764066e-10, - "loss": 1.0248, - "step": 8270 - }, - { - "epoch": 0.9945289484759213, - "grad_norm": 2.1985652623470315, - "learning_rate": 3.071830300516165e-10, - "loss": 1.0162, - "step": 8271 - }, - { - "epoch": 0.9946491913665605, - "grad_norm": 1.9864659144379078, - "learning_rate": 2.9368247668615234e-10, - "loss": 0.9094, - "step": 8272 - }, - { - "epoch": 0.9947694342571995, - "grad_norm": 2.2827441404370012, - "learning_rate": 2.804852771789434e-10, - "loss": 0.82, - "step": 8273 - }, - { - "epoch": 0.9948896771478386, - "grad_norm": 1.7988998945500505, - "learning_rate": 2.675914335321661e-10, - "loss": 0.7528, - "step": 8274 - }, - { - "epoch": 0.9950099200384778, - "grad_norm": 2.324368922406934, - "learning_rate": 2.550009477018111e-10, - "loss": 0.9905, - "step": 8275 - }, - { - "epoch": 0.9951301629291168, - "grad_norm": 2.1892562355718517, - "learning_rate": 2.4271382159790634e-10, - "loss": 0.8247, - "step": 8276 - }, - { - "epoch": 0.9952504058197559, - "grad_norm": 1.5691136580778822, - "learning_rate": 2.3073005708429406e-10, - "loss": 1.0562, - "step": 8277 - }, - { - "epoch": 0.995370648710395, - "grad_norm": 1.7890751600858694, - "learning_rate": 2.190496559788535e-10, - "loss": 0.9128, - "step": 8278 - }, - { - "epoch": 0.9954908916010341, - "grad_norm": 2.183974231197658, - "learning_rate": 2.0767262005372265e-10, - "loss": 0.9665, - "step": 8279 - }, - { - "epoch": 0.9956111344916732, - "grad_norm": 1.8664627903341728, - "learning_rate": 1.965989510346322e-10, - "loss": 0.946, - "step": 8280 - }, - { - "epoch": 0.9957313773823123, - "grad_norm": 1.885313660001469, - "learning_rate": 1.8582865060134955e-10, - "loss": 0.9015, - "step": 8281 - }, - { - "epoch": 0.9958516202729514, - "grad_norm": 0.814035065933674, - "learning_rate": 1.7536172038790098e-10, - "loss": 0.7841, - "step": 8282 - }, - { - "epoch": 0.9959718631635904, - "grad_norm": 2.0694995363598543, - "learning_rate": 1.651981619819054e-10, - "loss": 0.8976, - "step": 8283 - }, - { - "epoch": 0.9960921060542296, - "grad_norm": 5.0523021322895465, - "learning_rate": 1.5533797692546257e-10, - "loss": 0.9078, - "step": 8284 - }, - { - "epoch": 0.9962123489448687, - "grad_norm": 1.8520812905310688, - "learning_rate": 1.4578116671404296e-10, - "loss": 1.0289, - "step": 8285 - }, - { - "epoch": 0.9963325918355077, - "grad_norm": 2.2624484188037974, - "learning_rate": 1.3652773279759777e-10, - "loss": 0.9114, - "step": 8286 - }, - { - "epoch": 0.9964528347261468, - "grad_norm": 1.5460583621577804, - "learning_rate": 1.2757767657989305e-10, - "loss": 0.8164, - "step": 8287 - }, - { - "epoch": 0.9965730776167859, - "grad_norm": 1.6630542548270308, - "learning_rate": 1.1893099941850948e-10, - "loss": 1.0647, - "step": 8288 - }, - { - "epoch": 0.996693320507425, - "grad_norm": 2.102042331301977, - "learning_rate": 1.105877026252866e-10, - "loss": 0.9712, - "step": 8289 - }, - { - "epoch": 0.996813563398064, - "grad_norm": 1.909221584318901, - "learning_rate": 1.0254778746565663e-10, - "loss": 0.9216, - "step": 8290 - }, - { - "epoch": 0.9969338062887032, - "grad_norm": 1.7792242477116311, - "learning_rate": 9.481125515953259e-11, - "loss": 0.9317, - "step": 8291 - }, - { - "epoch": 0.9970540491793423, - "grad_norm": 1.5059768492399799, - "learning_rate": 8.737810688064228e-11, - "loss": 0.9976, - "step": 8292 - }, - { - "epoch": 0.9971742920699813, - "grad_norm": 2.3785385073098175, - "learning_rate": 8.024834375608414e-11, - "loss": 0.9889, - "step": 8293 - }, - { - "epoch": 0.9972945349606205, - "grad_norm": 0.8557539860629503, - "learning_rate": 7.342196686788149e-11, - "loss": 0.8585, - "step": 8294 - }, - { - "epoch": 0.9974147778512595, - "grad_norm": 2.544099664673199, - "learning_rate": 6.689897725142834e-11, - "loss": 0.8854, - "step": 8295 - }, - { - "epoch": 0.9975350207418986, - "grad_norm": 2.585153512731344, - "learning_rate": 6.067937589615545e-11, - "loss": 1.0754, - "step": 8296 - }, - { - "epoch": 0.9976552636325378, - "grad_norm": 0.803385375448975, - "learning_rate": 5.476316374575241e-11, - "loss": 0.7789, - "step": 8297 - }, - { - "epoch": 0.9977755065231768, - "grad_norm": 1.918257557554159, - "learning_rate": 4.9150341697723476e-11, - "loss": 0.9285, - "step": 8298 - }, - { - "epoch": 0.9978957494138159, - "grad_norm": 1.6826091108609844, - "learning_rate": 4.384091060338768e-11, - "loss": 0.8597, - "step": 8299 - }, - { - "epoch": 0.998015992304455, - "grad_norm": 2.138169822814041, - "learning_rate": 3.883487126810081e-11, - "loss": 0.9358, - "step": 8300 - }, - { - "epoch": 0.9981362351950941, - "grad_norm": 1.5452841853526413, - "learning_rate": 3.41322244516995e-11, - "loss": 0.9955, - "step": 8301 - }, - { - "epoch": 0.9982564780857331, - "grad_norm": 1.5634058805901905, - "learning_rate": 2.9732970866946925e-11, - "loss": 0.8242, - "step": 8302 - }, - { - "epoch": 0.9983767209763723, - "grad_norm": 2.0391649242414984, - "learning_rate": 2.563711118175327e-11, - "loss": 0.9867, - "step": 8303 - }, - { - "epoch": 0.9984969638670114, - "grad_norm": 1.717072379060254, - "learning_rate": 2.184464601717728e-11, - "loss": 1.0347, - "step": 8304 - }, - { - "epoch": 0.9986172067576504, - "grad_norm": 2.5703071199957943, - "learning_rate": 1.8355575948758585e-11, - "loss": 0.9813, - "step": 8305 - }, - { - "epoch": 0.9987374496482896, - "grad_norm": 2.075086509096224, - "learning_rate": 1.5169901505407424e-11, - "loss": 0.9433, - "step": 8306 - }, - { - "epoch": 0.9988576925389286, - "grad_norm": 1.6947144890742363, - "learning_rate": 1.228762317073695e-11, - "loss": 0.9293, - "step": 8307 - }, - { - "epoch": 0.9989779354295677, - "grad_norm": 2.0156782276311342, - "learning_rate": 9.70874138195299e-12, - "loss": 0.9847, - "step": 8308 - }, - { - "epoch": 0.9990981783202069, - "grad_norm": 1.525642872369294, - "learning_rate": 7.433256530076093e-12, - "loss": 0.9417, - "step": 8309 - }, - { - "epoch": 0.9992184212108459, - "grad_norm": 2.1736379370559145, - "learning_rate": 5.46116896038562e-12, - "loss": 0.95, - "step": 8310 - }, - { - "epoch": 0.999338664101485, - "grad_norm": 1.872626264361515, - "learning_rate": 3.792478972197699e-12, - "loss": 0.8273, - "step": 8311 - }, - { - "epoch": 0.9994589069921241, - "grad_norm": 2.4116919275985063, - "learning_rate": 2.4271868181990895e-12, - "loss": 0.8938, - "step": 8312 - }, - { - "epoch": 0.9995791498827632, - "grad_norm": 2.0829711853612882, - "learning_rate": 1.3652927060014973e-12, - "loss": 1.0015, - "step": 8313 - }, - { - "epoch": 0.9996993927734023, - "grad_norm": 2.0452206747659263, - "learning_rate": 6.067967965872612e-13, - "loss": 0.8414, - "step": 8314 - }, - { - "epoch": 0.9998196356640414, - "grad_norm": 1.6016204823488522, - "learning_rate": 1.5169920497548615e-13, - "loss": 0.9675, - "step": 8315 - }, - { - "epoch": 0.9999398785546805, - "grad_norm": 1.139140639483344, - "learning_rate": 0.0, - "loss": 0.7758, - "step": 8316 - }, - { - "epoch": 0.9999398785546805, - "step": 8316, - "total_flos": 6.686482292560364e+17, - "train_loss": 0.2849125224575508, - "train_runtime": 42592.135, - "train_samples_per_second": 7.81, - "train_steps_per_second": 0.195 - } - ], - "logging_steps": 1.0, - "max_steps": 8316, - "num_input_tokens_seen": 0, - "num_train_epochs": 1, - "save_steps": 100, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": true - }, - "attributes": {} - } - }, - "total_flos": 6.686482292560364e+17, - "train_batch_size": 5, - "trial_name": null, - "trial_params": null -} diff --git a/sft/smoe/training_args.bin b/sft/smoe/training_args.bin deleted file mode 100644 index 8ebd1eaad774550fa425611ff029fce3fdc9e481..0000000000000000000000000000000000000000 --- a/sft/smoe/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:61756c07bfb3f317144941ced65db861389324ed6960061a4c7838ceba32c266 -size 8120 diff --git a/sft/smoe_cosinegating/added_tokens.json b/sft/smoe_cosinegating/added_tokens.json deleted file mode 100644 index c9d3d3a1b74d87e381e471f7b33784015d2dc0ea..0000000000000000000000000000000000000000 --- a/sft/smoe_cosinegating/added_tokens.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "<|assistant|>": 32001, - "<|endoftext|>": 32000, - "<|end|>": 32007, - "<|placeholder1|>": 32002, - "<|placeholder2|>": 32003, - "<|placeholder3|>": 32004, - "<|placeholder4|>": 32005, - "<|placeholder5|>": 32008, - "<|placeholder6|>": 32009, - "<|system|>": 32006, - "<|user|>": 32010 -} diff --git a/sft/smoe_cosinegating/config.json b/sft/smoe_cosinegating/config.json deleted file mode 100644 index 430f5e5b45fac773baa297d919eb514c87f70e7b..0000000000000000000000000000000000000000 --- a/sft/smoe_cosinegating/config.json +++ /dev/null @@ -1,66 +0,0 @@ -{ - "_name_or_path": "/cm/archive/thongdt4/toolkitmoe/checkpoints/phi3mini-siglip224/pft", - "architectures": [ - "LlavaPhiForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" - }, - "balance_loss_coef": 0.1, - "bos_token_id": 1, - "clip_smoe": true, - "dropout": false, - "embd_pdrop": 0.0, - "eos_token_id": 32000, - "freeze_mm_mlp_adapter": false, - "hidden_act": "silu", - "hidden_size": 3072, - "image_aspect_ratio": "pad", - "initializer_range": 0.02, - "intermediate_size": 8192, - "local_rank": 0, - "max_position_embeddings": 4096, - "mlp_smoe": true, - "mm_hidden_size": 1152, - "mm_patch_merge_type": "flat", - "mm_projector_lr": null, - "mm_projector_type": "moe", - "mm_use_im_patch_token": false, - "mm_use_im_start_end": false, - "mm_vision_select_feature": "patch", - "mm_vision_select_layer": -2, - "mm_vision_tower": "google/siglip-so400m-patch14-224", - "model_type": "llava_phi", - "moe_name": "smoe_cosinegating", - "num_attention_heads": 32, - "num_experts": 4, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "num_layers": 3, - "num_selected": 2, - "original_max_position_embeddings": 4096, - "pad_token_id": 32000, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "router_z_loss_coef": 0.01, - "scales": [ - 1, - 3 - ], - "sliding_window": 2047, - "tie_word_embeddings": false, - "tokenizer_model_max_length": 2048, - "tokenizer_padding_side": "right", - "torch_dtype": "bfloat16", - "training": true, - "transformers_version": "4.43.2", - "tune_mm_mlp_adapter": false, - "use_cache": true, - "use_mm_proj": true, - "vocab_size": 32064 -} diff --git a/sft/smoe_cosinegating/generation_config.json b/sft/smoe_cosinegating/generation_config.json deleted file mode 100644 index 3a20824ea777f1ebd11da590160a7209fe3b62c6..0000000000000000000000000000000000000000 --- a/sft/smoe_cosinegating/generation_config.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "_from_model_config": true, - "bos_token_id": 1, - "do_sample": true, - "eos_token_id": [ - 32000, - 32001, - 32007 - ], - "pad_token_id": 32000, - "transformers_version": "4.43.2" -} diff --git a/sft/smoe_cosinegating/model-00001-of-00003.safetensors b/sft/smoe_cosinegating/model-00001-of-00003.safetensors deleted file mode 100644 index de5f60a2f71a38d815afa40dcca2c54dd6e0c43b..0000000000000000000000000000000000000000 --- a/sft/smoe_cosinegating/model-00001-of-00003.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:35ad0d676b7b860b1a5754ae2e4fc3170a859b67bdcc12b80359e39e7c9354bc -size 4972489328 diff --git a/sft/smoe_cosinegating/model-00002-of-00003.safetensors b/sft/smoe_cosinegating/model-00002-of-00003.safetensors deleted file mode 100644 index 5e965fd04865d3403d5c64be5b351afc1c3aea94..0000000000000000000000000000000000000000 --- a/sft/smoe_cosinegating/model-00002-of-00003.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cd56c83d2cd92280052b2df45a7f84808db1a6f67adf8eaa18c9ae1d54c4b748 -size 4985533608 diff --git a/sft/smoe_cosinegating/model-00003-of-00003.safetensors b/sft/smoe_cosinegating/model-00003-of-00003.safetensors deleted file mode 100644 index 11d1d557011607a4c15c7c9c27851aad8c37207d..0000000000000000000000000000000000000000 --- a/sft/smoe_cosinegating/model-00003-of-00003.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7911dd07a7b1f5f1a951c1de6fce45f76e9c8f60131be654a505124e2669bb07 -size 248943664 diff --git a/sft/smoe_cosinegating/model.safetensors.index.json b/sft/smoe_cosinegating/model.safetensors.index.json deleted file mode 100644 index f5e0d563e520320e7e1cb47747945b2591e60790..0000000000000000000000000000000000000000 --- a/sft/smoe_cosinegating/model.safetensors.index.json +++ /dev/null @@ -1,1033 +0,0 @@ -{ - "metadata": { - "total_size": 10206819680 - }, - "weight_map": { - "lm_head.weight": "model-00003-of-00003.safetensors", - "model.embed_tokens.weight": "model-00001-of-00003.safetensors", - "model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.0.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.0.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.1.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.1.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.10.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.10.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.10.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.10.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.11.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.11.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.11.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.11.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.12.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.12.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.12.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.12.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.13.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.13.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.13.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.13.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.14.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.14.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.14.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.14.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.15.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.15.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.15.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.15.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.16.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.16.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.16.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.16.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.17.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.17.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.17.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.17.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.18.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.18.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.18.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.18.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.19.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.19.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.19.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.19.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.2.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.2.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.20.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.20.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.20.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.20.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.20.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.21.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.21.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.21.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.22.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.22.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.22.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.22.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.23.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.23.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.23.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.23.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.24.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.24.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.24.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.24.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.25.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.25.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.25.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.25.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.26.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.26.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.26.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.26.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.27.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.27.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.27.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.27.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.28.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.28.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.28.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.28.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.28.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.28.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.29.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.29.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.29.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.29.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.29.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.29.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.3.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.3.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.30.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.30.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.30.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.30.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.30.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.30.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.31.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.31.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.31.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.31.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.31.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.31.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.4.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.4.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.5.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.5.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.6.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.6.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.7.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.7.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.8.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.8.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.9.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.9.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.mm_projector.moelayer.experts.0.0.bias": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.0.0.weight": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.0.2.bias": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.0.2.weight": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.1.0.bias": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.1.0.weight": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.1.2.bias": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.1.2.weight": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.2.0.bias": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.2.0.weight": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.2.2.bias": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.2.2.weight": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.3.0.bias": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.3.0.weight": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.3.2.bias": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.3.2.weight": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.gate.bias": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.gate.weight": "model-00003-of-00003.safetensors", - "model.norm.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.embeddings.patch_embedding.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.embeddings.patch_embedding.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.embeddings.position_embedding.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00002-of-00003.safetensors" - } -} diff --git a/sft/smoe_cosinegating/special_tokens_map.json b/sft/smoe_cosinegating/special_tokens_map.json deleted file mode 100644 index 3e4d5a5bc1cb51753cc9ae0305ece0da60052b10..0000000000000000000000000000000000000000 --- a/sft/smoe_cosinegating/special_tokens_map.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "bos_token": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - }, - "eos_token": { - "content": "<|endoftext|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - }, - "pad_token": "", - "unk_token": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - } -} diff --git a/sft/smoe_cosinegating/tokenizer.model b/sft/smoe_cosinegating/tokenizer.model deleted file mode 100644 index 6c00c742ce03c627d6cd5b795984876fa49fa899..0000000000000000000000000000000000000000 --- a/sft/smoe_cosinegating/tokenizer.model +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 -size 499723 diff --git a/sft/smoe_cosinegating/tokenizer_config.json b/sft/smoe_cosinegating/tokenizer_config.json deleted file mode 100644 index 3bd56c6314b14d6a33a69cd1802e04dbc1e47840..0000000000000000000000000000000000000000 --- a/sft/smoe_cosinegating/tokenizer_config.json +++ /dev/null @@ -1,132 +0,0 @@ -{ - "add_bos_token": true, - "add_eos_token": false, - "add_prefix_space": true, - "added_tokens_decoder": { - "0": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "1": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "2": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": false - }, - "32000": { - "content": "<|endoftext|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "32001": { - "content": "<|assistant|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32002": { - "content": "<|placeholder1|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32003": { - "content": "<|placeholder2|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32004": { - "content": "<|placeholder3|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32005": { - "content": "<|placeholder4|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32006": { - "content": "<|system|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32007": { - "content": "<|end|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32008": { - "content": "<|placeholder5|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32009": { - "content": "<|placeholder6|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32010": { - "content": "<|user|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - } - }, - "bos_token": "", - "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}", - "clean_up_tokenization_spaces": false, - "eos_token": "<|endoftext|>", - "legacy": false, - "model_max_length": 2048, - "pad_token": "", - "padding_side": "right", - "sp_model_kwargs": {}, - "spaces_between_special_tokens": false, - "tokenizer_class": "LlamaTokenizer", - "unk_token": "", - "use_default_system_prompt": false -} diff --git a/sft/smoe_cosinegating/trainer_state.json b/sft/smoe_cosinegating/trainer_state.json deleted file mode 100644 index 9543992bd62fe1de5ef735e50976c8b3fc8987c7..0000000000000000000000000000000000000000 --- a/sft/smoe_cosinegating/trainer_state.json +++ /dev/null @@ -1,58254 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.9999398785546805, - "eval_steps": 500, - "global_step": 8316, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.00012024289063909097, - "grad_norm": 16.35469205299927, - "learning_rate": 0.0, - "loss": 1.6395, - "step": 1 - }, - { - "epoch": 0.00024048578127818193, - "grad_norm": 16.559040212045524, - "learning_rate": 5.021476677069823e-07, - "loss": 1.6065, - "step": 2 - }, - { - "epoch": 0.0003607286719172729, - "grad_norm": 12.000080108375188, - "learning_rate": 7.958852231401551e-07, - "loss": 1.4714, - "step": 3 - }, - { - "epoch": 0.00048097156255636386, - "grad_norm": 12.391057857153921, - "learning_rate": 1.0042953354139647e-06, - "loss": 1.5427, - "step": 4 - }, - { - "epoch": 0.0006012144531954548, - "grad_norm": 14.677892261501118, - "learning_rate": 1.1659507774310057e-06, - "loss": 1.6131, - "step": 5 - }, - { - "epoch": 0.0007214573438345458, - "grad_norm": 14.72826633462448, - "learning_rate": 1.2980328908471373e-06, - "loss": 1.4687, - "step": 6 - }, - { - "epoch": 0.0008417002344736367, - "grad_norm": 2.9109527931487365, - "learning_rate": 1.4097067265369432e-06, - "loss": 1.0172, - "step": 7 - }, - { - "epoch": 0.0009619431251127277, - "grad_norm": 15.410754800638113, - "learning_rate": 1.506443003120947e-06, - "loss": 1.4296, - "step": 8 - }, - { - "epoch": 0.0010821860157518186, - "grad_norm": 7.79742344091268, - "learning_rate": 1.5917704462803102e-06, - "loss": 1.533, - "step": 9 - }, - { - "epoch": 0.0012024289063909096, - "grad_norm": 7.908220249194229, - "learning_rate": 1.6680984451379884e-06, - "loss": 1.5649, - "step": 10 - }, - { - "epoch": 0.0013226717970300007, - "grad_norm": 6.1753871533808, - "learning_rate": 1.7371455188905097e-06, - "loss": 1.3701, - "step": 11 - }, - { - "epoch": 0.0014429146876690916, - "grad_norm": 4.649391807241684, - "learning_rate": 1.8001805585541196e-06, - "loss": 1.3534, - "step": 12 - }, - { - "epoch": 0.0015631575783081825, - "grad_norm": 4.065131875464434, - "learning_rate": 1.8581671739548328e-06, - "loss": 1.4137, - "step": 13 - }, - { - "epoch": 0.0016834004689472734, - "grad_norm": 3.782377973303907, - "learning_rate": 1.9118543942439254e-06, - "loss": 1.2726, - "step": 14 - }, - { - "epoch": 0.0018036433595863645, - "grad_norm": 4.454863413077308, - "learning_rate": 1.961836000571161e-06, - "loss": 1.2808, - "step": 15 - }, - { - "epoch": 0.0019238862502254555, - "grad_norm": 3.0240676578037418, - "learning_rate": 2.0085906708279293e-06, - "loss": 0.8763, - "step": 16 - }, - { - "epoch": 0.0020441291408645466, - "grad_norm": 2.727585302849403, - "learning_rate": 2.0525099325728135e-06, - "loss": 1.3182, - "step": 17 - }, - { - "epoch": 0.0021643720315036373, - "grad_norm": 2.9435751569460296, - "learning_rate": 2.0939181139872922e-06, - "loss": 0.9564, - "step": 18 - }, - { - "epoch": 0.0022846149221427284, - "grad_norm": 2.9572984380712826, - "learning_rate": 2.1330868934640175e-06, - "loss": 1.1646, - "step": 19 - }, - { - "epoch": 0.002404857812781819, - "grad_norm": 2.4887137284215095, - "learning_rate": 2.170246112844971e-06, - "loss": 1.0068, - "step": 20 - }, - { - "epoch": 0.0025251007034209102, - "grad_norm": 3.0011597616856607, - "learning_rate": 2.2055919496770983e-06, - "loss": 1.2032, - "step": 21 - }, - { - "epoch": 0.0026453435940600014, - "grad_norm": 2.548921757290007, - "learning_rate": 2.2392931865974923e-06, - "loss": 1.0923, - "step": 22 - }, - { - "epoch": 0.002765586484699092, - "grad_norm": 2.515746309186898, - "learning_rate": 2.271496085962064e-06, - "loss": 1.2052, - "step": 23 - }, - { - "epoch": 0.002885829375338183, - "grad_norm": 2.541141351496633, - "learning_rate": 2.3023282262611022e-06, - "loss": 1.2336, - "step": 24 - }, - { - "epoch": 0.003006072265977274, - "grad_norm": 3.0065398459180237, - "learning_rate": 2.3319015548620114e-06, - "loss": 1.1282, - "step": 25 - }, - { - "epoch": 0.003126315156616365, - "grad_norm": 1.9688875664983307, - "learning_rate": 2.3603148416618152e-06, - "loss": 1.1284, - "step": 26 - }, - { - "epoch": 0.003246558047255456, - "grad_norm": 2.208332265697677, - "learning_rate": 2.3876556694204647e-06, - "loss": 1.2088, - "step": 27 - }, - { - "epoch": 0.003366800937894547, - "grad_norm": 1.9739484426071126, - "learning_rate": 2.414002061950908e-06, - "loss": 1.1032, - "step": 28 - }, - { - "epoch": 0.003487043828533638, - "grad_norm": 1.931330302028803, - "learning_rate": 2.4394238264681557e-06, - "loss": 1.2023, - "step": 29 - }, - { - "epoch": 0.003607286719172729, - "grad_norm": 1.725479407657124, - "learning_rate": 2.4639836682781433e-06, - "loss": 1.201, - "step": 30 - }, - { - "epoch": 0.00372752960981182, - "grad_norm": 2.2947305023266034, - "learning_rate": 2.487738122623307e-06, - "loss": 1.2017, - "step": 31 - }, - { - "epoch": 0.003847772500450911, - "grad_norm": 2.186478185419686, - "learning_rate": 2.510738338534912e-06, - "loss": 1.177, - "step": 32 - }, - { - "epoch": 0.003968015391090002, - "grad_norm": 2.1792267343133203, - "learning_rate": 2.5330307420306648e-06, - "loss": 1.227, - "step": 33 - }, - { - "epoch": 0.004088258281729093, - "grad_norm": 1.9457974116776031, - "learning_rate": 2.554657600279796e-06, - "loss": 1.0781, - "step": 34 - }, - { - "epoch": 0.004208501172368184, - "grad_norm": 1.9626024936225963, - "learning_rate": 2.5756575039679493e-06, - "loss": 1.2466, - "step": 35 - }, - { - "epoch": 0.0043287440630072746, - "grad_norm": 1.8178525355578037, - "learning_rate": 2.5960657816942747e-06, - "loss": 1.1575, - "step": 36 - }, - { - "epoch": 0.004448986953646365, - "grad_norm": 1.2609436205180076, - "learning_rate": 2.6159148575788668e-06, - "loss": 0.8758, - "step": 37 - }, - { - "epoch": 0.004569229844285457, - "grad_norm": 2.049215012960339, - "learning_rate": 2.635234561171e-06, - "loss": 1.1983, - "step": 38 - }, - { - "epoch": 0.0046894727349245475, - "grad_norm": 2.0710266033326143, - "learning_rate": 2.6540523970949877e-06, - "loss": 1.1483, - "step": 39 - }, - { - "epoch": 0.004809715625563638, - "grad_norm": 2.682514445611715, - "learning_rate": 2.6723937805519533e-06, - "loss": 1.1374, - "step": 40 - }, - { - "epoch": 0.00492995851620273, - "grad_norm": 2.1824734292701513, - "learning_rate": 2.690282243737839e-06, - "loss": 1.1473, - "step": 41 - }, - { - "epoch": 0.0050502014068418205, - "grad_norm": 2.371631240883965, - "learning_rate": 2.7077396173840807e-06, - "loss": 1.1977, - "step": 42 - }, - { - "epoch": 0.005170444297480911, - "grad_norm": 2.301859319728255, - "learning_rate": 2.7247861909342594e-06, - "loss": 1.138, - "step": 43 - }, - { - "epoch": 0.005290687188120003, - "grad_norm": 2.2200937634505356, - "learning_rate": 2.7414408543044743e-06, - "loss": 1.0324, - "step": 44 - }, - { - "epoch": 0.005410930078759093, - "grad_norm": 3.064059269274575, - "learning_rate": 2.7577212237113157e-06, - "loss": 1.0093, - "step": 45 - }, - { - "epoch": 0.005531172969398184, - "grad_norm": 2.9171407768377664, - "learning_rate": 2.7736437536690466e-06, - "loss": 1.2575, - "step": 46 - }, - { - "epoch": 0.005651415860037276, - "grad_norm": 2.2106309010925993, - "learning_rate": 2.789223836941131e-06, - "loss": 1.2889, - "step": 47 - }, - { - "epoch": 0.005771658750676366, - "grad_norm": 2.4814729835381617, - "learning_rate": 2.8044758939680847e-06, - "loss": 1.2874, - "step": 48 - }, - { - "epoch": 0.005891901641315457, - "grad_norm": 3.9015557731720145, - "learning_rate": 2.8194134530738863e-06, - "loss": 1.2205, - "step": 49 - }, - { - "epoch": 0.006012144531954548, - "grad_norm": 2.658484652127856, - "learning_rate": 2.834049222568994e-06, - "loss": 1.1109, - "step": 50 - }, - { - "epoch": 0.006132387422593639, - "grad_norm": 1.9478920375742508, - "learning_rate": 2.848395155712969e-06, - "loss": 1.1435, - "step": 51 - }, - { - "epoch": 0.00625263031323273, - "grad_norm": 2.0757816267318296, - "learning_rate": 2.8624625093687977e-06, - "loss": 1.203, - "step": 52 - }, - { - "epoch": 0.006372873203871821, - "grad_norm": 1.9044902370780508, - "learning_rate": 2.876261897070029e-06, - "loss": 1.0992, - "step": 53 - }, - { - "epoch": 0.006493116094510912, - "grad_norm": 2.0571819292248312, - "learning_rate": 2.889803337127447e-06, - "loss": 1.14, - "step": 54 - }, - { - "epoch": 0.006613358985150003, - "grad_norm": 2.4447507365348806, - "learning_rate": 2.903096296321516e-06, - "loss": 1.0676, - "step": 55 - }, - { - "epoch": 0.006733601875789094, - "grad_norm": 1.79674138111379, - "learning_rate": 2.9161497296578907e-06, - "loss": 1.1395, - "step": 56 - }, - { - "epoch": 0.006853844766428185, - "grad_norm": 2.2666842385153227, - "learning_rate": 2.928972116604173e-06, - "loss": 1.0782, - "step": 57 - }, - { - "epoch": 0.006974087657067276, - "grad_norm": 1.8756251246736073, - "learning_rate": 2.9415714941751377e-06, - "loss": 1.2262, - "step": 58 - }, - { - "epoch": 0.007094330547706367, - "grad_norm": 2.1359394332999777, - "learning_rate": 2.9539554871897396e-06, - "loss": 1.1534, - "step": 59 - }, - { - "epoch": 0.007214573438345458, - "grad_norm": 2.1135090195747863, - "learning_rate": 2.9661313359851253e-06, - "loss": 1.1939, - "step": 60 - }, - { - "epoch": 0.007334816328984549, - "grad_norm": 1.9740837748451334, - "learning_rate": 2.978105921839922e-06, - "loss": 1.1649, - "step": 61 - }, - { - "epoch": 0.00745505921962364, - "grad_norm": 2.220753477091044, - "learning_rate": 2.9898857903302893e-06, - "loss": 0.9584, - "step": 62 - }, - { - "epoch": 0.007575302110262731, - "grad_norm": 2.612320020398927, - "learning_rate": 3.001477172817253e-06, - "loss": 1.1013, - "step": 63 - }, - { - "epoch": 0.007695545000901822, - "grad_norm": 3.7148526068259957, - "learning_rate": 3.012886006241894e-06, - "loss": 1.1846, - "step": 64 - }, - { - "epoch": 0.007815787891540913, - "grad_norm": 2.616304436494145, - "learning_rate": 3.0241179513858383e-06, - "loss": 1.1178, - "step": 65 - }, - { - "epoch": 0.007936030782180003, - "grad_norm": 2.4029788286575817, - "learning_rate": 3.035178409737647e-06, - "loss": 1.1082, - "step": 66 - }, - { - "epoch": 0.008056273672819095, - "grad_norm": 2.0471376439840943, - "learning_rate": 3.046072539090907e-06, - "loss": 1.1102, - "step": 67 - }, - { - "epoch": 0.008176516563458186, - "grad_norm": 2.1794514408604857, - "learning_rate": 3.056805267986779e-06, - "loss": 1.2688, - "step": 68 - }, - { - "epoch": 0.008296759454097276, - "grad_norm": 2.0088504231102973, - "learning_rate": 3.0673813091022194e-06, - "loss": 1.1693, - "step": 69 - }, - { - "epoch": 0.008417002344736368, - "grad_norm": 1.1967671338271901, - "learning_rate": 3.0778051716749317e-06, - "loss": 0.8979, - "step": 70 - }, - { - "epoch": 0.008537245235375458, - "grad_norm": 2.013687266930141, - "learning_rate": 3.0880811730470094e-06, - "loss": 1.1355, - "step": 71 - }, - { - "epoch": 0.008657488126014549, - "grad_norm": 2.251717865326768, - "learning_rate": 3.098213449401257e-06, - "loss": 0.8501, - "step": 72 - }, - { - "epoch": 0.00877773101665364, - "grad_norm": 1.756554454329666, - "learning_rate": 3.1082059657570015e-06, - "loss": 1.2048, - "step": 73 - }, - { - "epoch": 0.00889797390729273, - "grad_norm": 2.128428554311684, - "learning_rate": 3.1180625252858496e-06, - "loss": 1.1849, - "step": 74 - }, - { - "epoch": 0.009018216797931822, - "grad_norm": 2.524605495491807, - "learning_rate": 3.1277867780021663e-06, - "loss": 1.0262, - "step": 75 - }, - { - "epoch": 0.009138459688570914, - "grad_norm": 2.1226791723176888, - "learning_rate": 3.1373822288779824e-06, - "loss": 1.1738, - "step": 76 - }, - { - "epoch": 0.009258702579210003, - "grad_norm": 2.181514616821758, - "learning_rate": 3.1468522454274533e-06, - "loss": 1.0117, - "step": 77 - }, - { - "epoch": 0.009378945469849095, - "grad_norm": 1.7845845381704262, - "learning_rate": 3.15620006480197e-06, - "loss": 1.1355, - "step": 78 - }, - { - "epoch": 0.009499188360488187, - "grad_norm": 2.746399863750522, - "learning_rate": 3.1654288004333087e-06, - "loss": 0.9698, - "step": 79 - }, - { - "epoch": 0.009619431251127276, - "grad_norm": 1.9351879599309831, - "learning_rate": 3.1745414482589353e-06, - "loss": 0.9717, - "step": 80 - }, - { - "epoch": 0.009739674141766368, - "grad_norm": 2.309126068037343, - "learning_rate": 3.1835408925606204e-06, - "loss": 1.0891, - "step": 81 - }, - { - "epoch": 0.00985991703240546, - "grad_norm": 2.309870075023435, - "learning_rate": 3.1924299114448214e-06, - "loss": 1.1038, - "step": 82 - }, - { - "epoch": 0.00998015992304455, - "grad_norm": 3.3043362766779834, - "learning_rate": 3.2012111819909055e-06, - "loss": 1.0667, - "step": 83 - }, - { - "epoch": 0.010100402813683641, - "grad_norm": 2.696032353345721, - "learning_rate": 3.2098872850910627e-06, - "loss": 1.166, - "step": 84 - }, - { - "epoch": 0.010220645704322733, - "grad_norm": 2.0835629781675618, - "learning_rate": 3.2184607100038194e-06, - "loss": 1.1143, - "step": 85 - }, - { - "epoch": 0.010340888594961822, - "grad_norm": 2.137316297701606, - "learning_rate": 3.2269338586412414e-06, - "loss": 1.1508, - "step": 86 - }, - { - "epoch": 0.010461131485600914, - "grad_norm": 2.0822347287485843, - "learning_rate": 3.2353090496083106e-06, - "loss": 1.1893, - "step": 87 - }, - { - "epoch": 0.010581374376240005, - "grad_norm": 1.8365301595984935, - "learning_rate": 3.2435885220114572e-06, - "loss": 1.0419, - "step": 88 - }, - { - "epoch": 0.010701617266879095, - "grad_norm": 1.7999670873388018, - "learning_rate": 3.2517744390519113e-06, - "loss": 1.1595, - "step": 89 - }, - { - "epoch": 0.010821860157518187, - "grad_norm": 2.10289078608546, - "learning_rate": 3.259868891418298e-06, - "loss": 0.9682, - "step": 90 - }, - { - "epoch": 0.010942103048157278, - "grad_norm": 1.8648984916205122, - "learning_rate": 3.2678739004917757e-06, - "loss": 1.0658, - "step": 91 - }, - { - "epoch": 0.011062345938796368, - "grad_norm": 1.5515841848465766, - "learning_rate": 3.275791421376029e-06, - "loss": 1.1411, - "step": 92 - }, - { - "epoch": 0.01118258882943546, - "grad_norm": 1.815513736119228, - "learning_rate": 3.2836233457634622e-06, - "loss": 1.1751, - "step": 93 - }, - { - "epoch": 0.011302831720074551, - "grad_norm": 1.8445037335642138, - "learning_rate": 3.2913715046481135e-06, - "loss": 1.0717, - "step": 94 - }, - { - "epoch": 0.011423074610713641, - "grad_norm": 3.9047776766313413, - "learning_rate": 3.299037670895023e-06, - "loss": 1.1116, - "step": 95 - }, - { - "epoch": 0.011543317501352733, - "grad_norm": 1.8573610313871471, - "learning_rate": 3.3066235616750667e-06, - "loss": 1.0213, - "step": 96 - }, - { - "epoch": 0.011663560391991824, - "grad_norm": 2.3023162539846296, - "learning_rate": 3.3141308407736276e-06, - "loss": 1.129, - "step": 97 - }, - { - "epoch": 0.011783803282630914, - "grad_norm": 1.8325218731250024, - "learning_rate": 3.321561120780869e-06, - "loss": 1.0805, - "step": 98 - }, - { - "epoch": 0.011904046173270006, - "grad_norm": 2.7872186116743545, - "learning_rate": 3.3289159651708192e-06, - "loss": 1.235, - "step": 99 - }, - { - "epoch": 0.012024289063909096, - "grad_norm": 1.9920903200868652, - "learning_rate": 3.3361968902759768e-06, - "loss": 1.1981, - "step": 100 - }, - { - "epoch": 0.012144531954548187, - "grad_norm": 3.3679733598430235, - "learning_rate": 3.343405367163663e-06, - "loss": 1.1637, - "step": 101 - }, - { - "epoch": 0.012264774845187279, - "grad_norm": 2.3688955659853743, - "learning_rate": 3.350542823419951e-06, - "loss": 1.0359, - "step": 102 - }, - { - "epoch": 0.012385017735826368, - "grad_norm": 3.067988712643148, - "learning_rate": 3.3576106448465615e-06, - "loss": 1.1074, - "step": 103 - }, - { - "epoch": 0.01250526062646546, - "grad_norm": 2.3258563771798264, - "learning_rate": 3.3646101770757797e-06, - "loss": 1.1019, - "step": 104 - }, - { - "epoch": 0.012625503517104552, - "grad_norm": 1.754946529736763, - "learning_rate": 3.371542727108104e-06, - "loss": 1.0741, - "step": 105 - }, - { - "epoch": 0.012745746407743641, - "grad_norm": 2.3477777078346396, - "learning_rate": 3.3784095647770114e-06, - "loss": 1.1263, - "step": 106 - }, - { - "epoch": 0.012865989298382733, - "grad_norm": 2.000977873161784, - "learning_rate": 3.3852119241449547e-06, - "loss": 1.1099, - "step": 107 - }, - { - "epoch": 0.012986232189021825, - "grad_norm": 2.3053575479233346, - "learning_rate": 3.3919510048344295e-06, - "loss": 1.1892, - "step": 108 - }, - { - "epoch": 0.013106475079660914, - "grad_norm": 2.194596577721412, - "learning_rate": 3.3986279732976907e-06, - "loss": 1.0901, - "step": 109 - }, - { - "epoch": 0.013226717970300006, - "grad_norm": 2.1276648025245555, - "learning_rate": 3.4052439640284983e-06, - "loss": 1.1777, - "step": 110 - }, - { - "epoch": 0.013346960860939098, - "grad_norm": 7.192738622499887, - "learning_rate": 3.4118000807190217e-06, - "loss": 1.0377, - "step": 111 - }, - { - "epoch": 0.013467203751578187, - "grad_norm": 1.6200551578173896, - "learning_rate": 3.4182973973648723e-06, - "loss": 0.9904, - "step": 112 - }, - { - "epoch": 0.013587446642217279, - "grad_norm": 2.9888854768521647, - "learning_rate": 3.424736959321014e-06, - "loss": 1.1649, - "step": 113 - }, - { - "epoch": 0.01370768953285637, - "grad_norm": 1.8074846265463549, - "learning_rate": 3.431119784311155e-06, - "loss": 1.1098, - "step": 114 - }, - { - "epoch": 0.01382793242349546, - "grad_norm": 1.9868496460570406, - "learning_rate": 3.43744686339307e-06, - "loss": 1.0052, - "step": 115 - }, - { - "epoch": 0.013948175314134552, - "grad_norm": 2.053354157036718, - "learning_rate": 3.44371916188212e-06, - "loss": 1.1411, - "step": 116 - }, - { - "epoch": 0.014068418204773643, - "grad_norm": 1.9899192912541872, - "learning_rate": 3.449937620235143e-06, - "loss": 1.0914, - "step": 117 - }, - { - "epoch": 0.014188661095412733, - "grad_norm": 3.2067710408482317, - "learning_rate": 3.456103154896722e-06, - "loss": 1.1028, - "step": 118 - }, - { - "epoch": 0.014308903986051825, - "grad_norm": 1.7002666180166919, - "learning_rate": 3.462216659109757e-06, - "loss": 1.1451, - "step": 119 - }, - { - "epoch": 0.014429146876690916, - "grad_norm": 2.398088361446809, - "learning_rate": 3.4682790036921077e-06, - "loss": 1.0767, - "step": 120 - }, - { - "epoch": 0.014549389767330006, - "grad_norm": 1.722721297577881, - "learning_rate": 3.4742910377810193e-06, - "loss": 1.0509, - "step": 121 - }, - { - "epoch": 0.014669632657969098, - "grad_norm": 2.2112241094183, - "learning_rate": 3.4802535895469042e-06, - "loss": 1.1164, - "step": 122 - }, - { - "epoch": 0.01478987554860819, - "grad_norm": 2.021885221240284, - "learning_rate": 3.4861674668779934e-06, - "loss": 1.1207, - "step": 123 - }, - { - "epoch": 0.01491011843924728, - "grad_norm": 1.83602742725655, - "learning_rate": 3.492033458037272e-06, - "loss": 1.067, - "step": 124 - }, - { - "epoch": 0.01503036132988637, - "grad_norm": 2.039733542449684, - "learning_rate": 3.497852332293018e-06, - "loss": 1.0938, - "step": 125 - }, - { - "epoch": 0.015150604220525462, - "grad_norm": 1.701493978390504, - "learning_rate": 3.5036248405242356e-06, - "loss": 1.1908, - "step": 126 - }, - { - "epoch": 0.015270847111164552, - "grad_norm": 2.6776114114899423, - "learning_rate": 3.509351715802146e-06, - "loss": 1.0538, - "step": 127 - }, - { - "epoch": 0.015391090001803644, - "grad_norm": 1.8480744160865923, - "learning_rate": 3.5150336739488763e-06, - "loss": 1.0093, - "step": 128 - }, - { - "epoch": 0.015511332892442733, - "grad_norm": 1.8988142228456315, - "learning_rate": 3.5206714140744143e-06, - "loss": 1.053, - "step": 129 - }, - { - "epoch": 0.015631575783081827, - "grad_norm": 3.148055363411825, - "learning_rate": 3.5262656190928208e-06, - "loss": 1.107, - "step": 130 - }, - { - "epoch": 0.015751818673720917, - "grad_norm": 0.9888913529314163, - "learning_rate": 3.5318169562186737e-06, - "loss": 0.951, - "step": 131 - }, - { - "epoch": 0.015872061564360006, - "grad_norm": 1.7080713939664807, - "learning_rate": 3.5373260774446292e-06, - "loss": 1.0535, - "step": 132 - }, - { - "epoch": 0.0159923044549991, - "grad_norm": 1.7903819320300551, - "learning_rate": 3.542793620000961e-06, - "loss": 1.1379, - "step": 133 - }, - { - "epoch": 0.01611254734563819, - "grad_norm": 2.5802293916494117, - "learning_rate": 3.5482202067978894e-06, - "loss": 1.0919, - "step": 134 - }, - { - "epoch": 0.01623279023627728, - "grad_norm": 1.9778955359649775, - "learning_rate": 3.553606446851471e-06, - "loss": 0.9815, - "step": 135 - }, - { - "epoch": 0.016353033126916373, - "grad_norm": 1.699367613136256, - "learning_rate": 3.5589529356937613e-06, - "loss": 1.0571, - "step": 136 - }, - { - "epoch": 0.016473276017555463, - "grad_norm": 1.630085031479685, - "learning_rate": 3.5642602557679627e-06, - "loss": 1.0038, - "step": 137 - }, - { - "epoch": 0.016593518908194552, - "grad_norm": 2.0261792787497876, - "learning_rate": 3.569528976809202e-06, - "loss": 1.0672, - "step": 138 - }, - { - "epoch": 0.016713761798833646, - "grad_norm": 1.6175082713273834, - "learning_rate": 3.5747596562115522e-06, - "loss": 1.1084, - "step": 139 - }, - { - "epoch": 0.016834004689472735, - "grad_norm": 11.275500071192797, - "learning_rate": 3.5799528393819138e-06, - "loss": 1.1301, - "step": 140 - }, - { - "epoch": 0.016954247580111825, - "grad_norm": 1.8017321411619691, - "learning_rate": 3.585109060081286e-06, - "loss": 1.0971, - "step": 141 - }, - { - "epoch": 0.017074490470750915, - "grad_norm": 1.707660550323162, - "learning_rate": 3.590228840753992e-06, - "loss": 1.0058, - "step": 142 - }, - { - "epoch": 0.01719473336139001, - "grad_norm": 3.5859474730508785, - "learning_rate": 3.5953126928453423e-06, - "loss": 1.097, - "step": 143 - }, - { - "epoch": 0.017314976252029098, - "grad_norm": 1.8953637879579721, - "learning_rate": 3.600361117108239e-06, - "loss": 1.0343, - "step": 144 - }, - { - "epoch": 0.017435219142668188, - "grad_norm": 2.454569397433865, - "learning_rate": 3.6053746038991616e-06, - "loss": 1.1952, - "step": 145 - }, - { - "epoch": 0.01755546203330728, - "grad_norm": 1.0505558608870722, - "learning_rate": 3.6103536334639843e-06, - "loss": 0.8577, - "step": 146 - }, - { - "epoch": 0.01767570492394637, - "grad_norm": 2.2446940696606363, - "learning_rate": 3.615298676214041e-06, - "loss": 1.0754, - "step": 147 - }, - { - "epoch": 0.01779594781458546, - "grad_norm": 1.7890601678691473, - "learning_rate": 3.6202101929928317e-06, - "loss": 1.1151, - "step": 148 - }, - { - "epoch": 0.017916190705224554, - "grad_norm": 1.658585682874944, - "learning_rate": 3.6250886353337413e-06, - "loss": 1.1103, - "step": 149 - }, - { - "epoch": 0.018036433595863644, - "grad_norm": 1.886975320307951, - "learning_rate": 3.6299344457091488e-06, - "loss": 1.0876, - "step": 150 - }, - { - "epoch": 0.018156676486502734, - "grad_norm": 2.042988355845216, - "learning_rate": 3.634748057771256e-06, - "loss": 1.1542, - "step": 151 - }, - { - "epoch": 0.018276919377141827, - "grad_norm": 3.059718834918477, - "learning_rate": 3.639529896584965e-06, - "loss": 1.0846, - "step": 152 - }, - { - "epoch": 0.018397162267780917, - "grad_norm": 2.237745503857165, - "learning_rate": 3.6442803788531233e-06, - "loss": 1.118, - "step": 153 - }, - { - "epoch": 0.018517405158420007, - "grad_norm": 2.752703031835405, - "learning_rate": 3.6489999131344357e-06, - "loss": 1.1736, - "step": 154 - }, - { - "epoch": 0.0186376480490591, - "grad_norm": 1.5726076879455813, - "learning_rate": 3.653688900054313e-06, - "loss": 1.1383, - "step": 155 - }, - { - "epoch": 0.01875789093969819, - "grad_norm": 1.9109434288350893, - "learning_rate": 3.6583477325089526e-06, - "loss": 0.9872, - "step": 156 - }, - { - "epoch": 0.01887813383033728, - "grad_norm": 2.652041343562326, - "learning_rate": 3.6629767958628916e-06, - "loss": 1.26, - "step": 157 - }, - { - "epoch": 0.018998376720976373, - "grad_norm": 2.114934081003989, - "learning_rate": 3.667576468140291e-06, - "loss": 1.0814, - "step": 158 - }, - { - "epoch": 0.019118619611615463, - "grad_norm": 2.2115498156559092, - "learning_rate": 3.672147120210184e-06, - "loss": 1.1073, - "step": 159 - }, - { - "epoch": 0.019238862502254553, - "grad_norm": 1.9248529947800357, - "learning_rate": 3.6766891159659177e-06, - "loss": 1.0951, - "step": 160 - }, - { - "epoch": 0.019359105392893646, - "grad_norm": 3.554615464371405, - "learning_rate": 3.6812028124990075e-06, - "loss": 1.0916, - "step": 161 - }, - { - "epoch": 0.019479348283532736, - "grad_norm": 2.814469643834894, - "learning_rate": 3.6856885602676016e-06, - "loss": 1.0418, - "step": 162 - }, - { - "epoch": 0.019599591174171826, - "grad_norm": 4.42268412705399, - "learning_rate": 3.6901467032597733e-06, - "loss": 1.1694, - "step": 163 - }, - { - "epoch": 0.01971983406481092, - "grad_norm": 2.3002011874566755, - "learning_rate": 3.694577579151804e-06, - "loss": 1.1011, - "step": 164 - }, - { - "epoch": 0.01984007695545001, - "grad_norm": 2.1564543254074175, - "learning_rate": 3.6989815194616703e-06, - "loss": 0.9661, - "step": 165 - }, - { - "epoch": 0.0199603198460891, - "grad_norm": 2.100605232579327, - "learning_rate": 3.703358849697888e-06, - "loss": 1.0347, - "step": 166 - }, - { - "epoch": 0.020080562736728192, - "grad_norm": 1.6084615837444909, - "learning_rate": 3.7077098895038803e-06, - "loss": 1.051, - "step": 167 - }, - { - "epoch": 0.020200805627367282, - "grad_norm": 6.577727462100708, - "learning_rate": 3.712034952798045e-06, - "loss": 1.1955, - "step": 168 - }, - { - "epoch": 0.02032104851800637, - "grad_norm": 1.9055674847574524, - "learning_rate": 3.7163343479096656e-06, - "loss": 1.0704, - "step": 169 - }, - { - "epoch": 0.020441291408645465, - "grad_norm": 2.124358473037579, - "learning_rate": 3.720608377710802e-06, - "loss": 1.0492, - "step": 170 - }, - { - "epoch": 0.020561534299284555, - "grad_norm": 5.611512013498387, - "learning_rate": 3.7248573397443277e-06, - "loss": 1.0917, - "step": 171 - }, - { - "epoch": 0.020681777189923645, - "grad_norm": 2.2294464173466526, - "learning_rate": 3.729081526348224e-06, - "loss": 1.2104, - "step": 172 - }, - { - "epoch": 0.020802020080562738, - "grad_norm": 1.727992850522863, - "learning_rate": 3.7332812247762777e-06, - "loss": 1.0686, - "step": 173 - }, - { - "epoch": 0.020922262971201828, - "grad_norm": 2.3262655525979, - "learning_rate": 3.737456717315293e-06, - "loss": 1.175, - "step": 174 - }, - { - "epoch": 0.021042505861840918, - "grad_norm": 2.577147234836542, - "learning_rate": 3.7416082813989552e-06, - "loss": 1.1342, - "step": 175 - }, - { - "epoch": 0.02116274875248001, - "grad_norm": 4.313463849702513, - "learning_rate": 3.745736189718439e-06, - "loss": 1.1231, - "step": 176 - }, - { - "epoch": 0.0212829916431191, - "grad_norm": 3.4157495585884323, - "learning_rate": 3.749840710329894e-06, - "loss": 0.9623, - "step": 177 - }, - { - "epoch": 0.02140323453375819, - "grad_norm": 2.4531035209735927, - "learning_rate": 3.7539221067588938e-06, - "loss": 1.202, - "step": 178 - }, - { - "epoch": 0.021523477424397284, - "grad_norm": 5.109577886537093, - "learning_rate": 3.757980638101964e-06, - "loss": 1.1614, - "step": 179 - }, - { - "epoch": 0.021643720315036374, - "grad_norm": 2.146183232294776, - "learning_rate": 3.7620165591252806e-06, - "loss": 1.1276, - "step": 180 - }, - { - "epoch": 0.021763963205675464, - "grad_norm": 2.018893881001818, - "learning_rate": 3.766030120360636e-06, - "loss": 1.168, - "step": 181 - }, - { - "epoch": 0.021884206096314557, - "grad_norm": 1.9707570820010578, - "learning_rate": 3.7700215681987578e-06, - "loss": 1.1085, - "step": 182 - }, - { - "epoch": 0.022004448986953647, - "grad_norm": 1.4902970887033418, - "learning_rate": 3.7739911449800767e-06, - "loss": 1.048, - "step": 183 - }, - { - "epoch": 0.022124691877592736, - "grad_norm": 1.638849732996262, - "learning_rate": 3.7779390890830114e-06, - "loss": 1.0318, - "step": 184 - }, - { - "epoch": 0.02224493476823183, - "grad_norm": 1.6540801031234333, - "learning_rate": 3.7818656350098723e-06, - "loss": 1.0846, - "step": 185 - }, - { - "epoch": 0.02236517765887092, - "grad_norm": 2.9851181942336757, - "learning_rate": 3.7857710134704447e-06, - "loss": 0.9958, - "step": 186 - }, - { - "epoch": 0.02248542054951001, - "grad_norm": 2.194253251912051, - "learning_rate": 3.7896554514633234e-06, - "loss": 1.0202, - "step": 187 - }, - { - "epoch": 0.022605663440149103, - "grad_norm": 2.304053060847807, - "learning_rate": 3.7935191723550955e-06, - "loss": 1.066, - "step": 188 - }, - { - "epoch": 0.022725906330788193, - "grad_norm": 1.8919880264943727, - "learning_rate": 3.797362395957408e-06, - "loss": 1.1161, - "step": 189 - }, - { - "epoch": 0.022846149221427282, - "grad_norm": 3.0560396523824087, - "learning_rate": 3.8011853386020055e-06, - "loss": 1.0008, - "step": 190 - }, - { - "epoch": 0.022966392112066376, - "grad_norm": 2.412305766770921, - "learning_rate": 3.804988213213804e-06, - "loss": 1.1192, - "step": 191 - }, - { - "epoch": 0.023086635002705466, - "grad_norm": 1.0467972228351934, - "learning_rate": 3.808771229382049e-06, - "loss": 0.887, - "step": 192 - }, - { - "epoch": 0.023206877893344555, - "grad_norm": 1.8299607409242349, - "learning_rate": 3.8125345934296324e-06, - "loss": 1.0765, - "step": 193 - }, - { - "epoch": 0.02332712078398365, - "grad_norm": 1.9450016003089734, - "learning_rate": 3.81627850848061e-06, - "loss": 1.103, - "step": 194 - }, - { - "epoch": 0.02344736367462274, - "grad_norm": 2.2391323848021853, - "learning_rate": 3.820003174525994e-06, - "loss": 1.0791, - "step": 195 - }, - { - "epoch": 0.02356760656526183, - "grad_norm": 3.2687456470117704, - "learning_rate": 3.823708788487851e-06, - "loss": 1.063, - "step": 196 - }, - { - "epoch": 0.02368784945590092, - "grad_norm": 1.8956789296192198, - "learning_rate": 3.827395544281781e-06, - "loss": 1.0714, - "step": 197 - }, - { - "epoch": 0.02380809234654001, - "grad_norm": 3.0277020642285604, - "learning_rate": 3.831063632877802e-06, - "loss": 1.0297, - "step": 198 - }, - { - "epoch": 0.0239283352371791, - "grad_norm": 2.1619171636064465, - "learning_rate": 3.834713242359712e-06, - "loss": 0.9824, - "step": 199 - }, - { - "epoch": 0.02404857812781819, - "grad_norm": 1.8305873101425487, - "learning_rate": 3.838344557982959e-06, - "loss": 1.1008, - "step": 200 - }, - { - "epoch": 0.024168821018457284, - "grad_norm": 3.7339450476944167, - "learning_rate": 3.841957762231063e-06, - "loss": 1.0784, - "step": 201 - }, - { - "epoch": 0.024289063909096374, - "grad_norm": 1.9782480885566223, - "learning_rate": 3.8455530348706454e-06, - "loss": 1.0953, - "step": 202 - }, - { - "epoch": 0.024409306799735464, - "grad_norm": 1.8970947540540444, - "learning_rate": 3.849130553005099e-06, - "loss": 1.0059, - "step": 203 - }, - { - "epoch": 0.024529549690374557, - "grad_norm": 2.232671128022612, - "learning_rate": 3.852690491126933e-06, - "loss": 1.061, - "step": 204 - }, - { - "epoch": 0.024649792581013647, - "grad_norm": 2.362443154271765, - "learning_rate": 3.856233021168845e-06, - "loss": 1.1312, - "step": 205 - }, - { - "epoch": 0.024770035471652737, - "grad_norm": 1.8020814517090369, - "learning_rate": 3.859758312553544e-06, - "loss": 1.1383, - "step": 206 - }, - { - "epoch": 0.02489027836229183, - "grad_norm": 2.00277826935575, - "learning_rate": 3.8632665322423735e-06, - "loss": 1.1505, - "step": 207 - }, - { - "epoch": 0.02501052125293092, - "grad_norm": 1.5825833334533188, - "learning_rate": 3.866757844782762e-06, - "loss": 1.0832, - "step": 208 - }, - { - "epoch": 0.02513076414357001, - "grad_norm": 2.0387927348267345, - "learning_rate": 3.870232412354527e-06, - "loss": 1.1374, - "step": 209 - }, - { - "epoch": 0.025251007034209103, - "grad_norm": 1.7611579763766585, - "learning_rate": 3.873690394815086e-06, - "loss": 1.1482, - "step": 210 - }, - { - "epoch": 0.025371249924848193, - "grad_norm": 3.063203380541606, - "learning_rate": 3.877131949743587e-06, - "loss": 1.1431, - "step": 211 - }, - { - "epoch": 0.025491492815487283, - "grad_norm": 1.9421729678824855, - "learning_rate": 3.880557232483993e-06, - "loss": 1.0192, - "step": 212 - }, - { - "epoch": 0.025611735706126376, - "grad_norm": 2.2225925322451907, - "learning_rate": 3.883966396187164e-06, - "loss": 1.1009, - "step": 213 - }, - { - "epoch": 0.025731978596765466, - "grad_norm": 1.8865597107003966, - "learning_rate": 3.887359591851937e-06, - "loss": 1.1296, - "step": 214 - }, - { - "epoch": 0.025852221487404556, - "grad_norm": 1.5089701423497186, - "learning_rate": 3.890736968365265e-06, - "loss": 1.1592, - "step": 215 - }, - { - "epoch": 0.02597246437804365, - "grad_norm": 1.6256659683644246, - "learning_rate": 3.894098672541412e-06, - "loss": 1.0749, - "step": 216 - }, - { - "epoch": 0.02609270726868274, - "grad_norm": 2.139676628815022, - "learning_rate": 3.89744484916025e-06, - "loss": 0.9802, - "step": 217 - }, - { - "epoch": 0.02621295015932183, - "grad_norm": 1.8236670629874758, - "learning_rate": 3.900775641004673e-06, - "loss": 1.0956, - "step": 218 - }, - { - "epoch": 0.026333193049960922, - "grad_norm": 6.851311116842921, - "learning_rate": 3.904091188897156e-06, - "loss": 0.9645, - "step": 219 - }, - { - "epoch": 0.026453435940600012, - "grad_norm": 1.7861732574272002, - "learning_rate": 3.90739163173548e-06, - "loss": 1.0507, - "step": 220 - }, - { - "epoch": 0.026573678831239102, - "grad_norm": 2.2894009805340807, - "learning_rate": 3.910677106527646e-06, - "loss": 1.1148, - "step": 221 - }, - { - "epoch": 0.026693921721878195, - "grad_norm": 2.150232293643172, - "learning_rate": 3.913947748426004e-06, - "loss": 1.0654, - "step": 222 - }, - { - "epoch": 0.026814164612517285, - "grad_norm": 2.4653637997897433, - "learning_rate": 3.9172036907606136e-06, - "loss": 0.9954, - "step": 223 - }, - { - "epoch": 0.026934407503156375, - "grad_norm": 2.0171609393675207, - "learning_rate": 3.920445065071855e-06, - "loss": 1.1699, - "step": 224 - }, - { - "epoch": 0.027054650393795468, - "grad_norm": 4.338391408650035, - "learning_rate": 3.923672001142322e-06, - "loss": 1.0204, - "step": 225 - }, - { - "epoch": 0.027174893284434558, - "grad_norm": 2.2313856922423665, - "learning_rate": 3.926884627027996e-06, - "loss": 1.0768, - "step": 226 - }, - { - "epoch": 0.027295136175073648, - "grad_norm": 1.9794932112764065, - "learning_rate": 3.930083069088744e-06, - "loss": 1.0023, - "step": 227 - }, - { - "epoch": 0.02741537906571274, - "grad_norm": 1.0628602595199355, - "learning_rate": 3.933267452018137e-06, - "loss": 0.8703, - "step": 228 - }, - { - "epoch": 0.02753562195635183, - "grad_norm": 2.090079321017686, - "learning_rate": 3.936437898872622e-06, - "loss": 1.0707, - "step": 229 - }, - { - "epoch": 0.02765586484699092, - "grad_norm": 2.038293803224987, - "learning_rate": 3.9395945311000525e-06, - "loss": 1.0265, - "step": 230 - }, - { - "epoch": 0.027776107737630014, - "grad_norm": 1.8323142152417733, - "learning_rate": 3.942737468567608e-06, - "loss": 1.145, - "step": 231 - }, - { - "epoch": 0.027896350628269104, - "grad_norm": 1.7928799875266224, - "learning_rate": 3.9458668295891026e-06, - "loss": 1.0846, - "step": 232 - }, - { - "epoch": 0.028016593518908194, - "grad_norm": 2.3020380858610663, - "learning_rate": 3.948982730951712e-06, - "loss": 1.0843, - "step": 233 - }, - { - "epoch": 0.028136836409547287, - "grad_norm": 2.1721824593033445, - "learning_rate": 3.9520852879421254e-06, - "loss": 1.0454, - "step": 234 - }, - { - "epoch": 0.028257079300186377, - "grad_norm": 1.9964163383306022, - "learning_rate": 3.955174614372137e-06, - "loss": 1.045, - "step": 235 - }, - { - "epoch": 0.028377322190825467, - "grad_norm": 2.02474968387419, - "learning_rate": 3.9582508226037045e-06, - "loss": 1.075, - "step": 236 - }, - { - "epoch": 0.02849756508146456, - "grad_norm": 2.6906720448633337, - "learning_rate": 3.9613140235734636e-06, - "loss": 1.1604, - "step": 237 - }, - { - "epoch": 0.02861780797210365, - "grad_norm": 1.9649178887214918, - "learning_rate": 3.96436432681674e-06, - "loss": 1.0424, - "step": 238 - }, - { - "epoch": 0.02873805086274274, - "grad_norm": 1.8911309314064666, - "learning_rate": 3.967401840491044e-06, - "loss": 1.1289, - "step": 239 - }, - { - "epoch": 0.028858293753381833, - "grad_norm": 1.8788592358455032, - "learning_rate": 3.97042667139909e-06, - "loss": 1.1057, - "step": 240 - }, - { - "epoch": 0.028978536644020923, - "grad_norm": 1.719308380977341, - "learning_rate": 3.973438925011327e-06, - "loss": 1.1023, - "step": 241 - }, - { - "epoch": 0.029098779534660012, - "grad_norm": 2.081785033591707, - "learning_rate": 3.976438705488002e-06, - "loss": 1.1437, - "step": 242 - }, - { - "epoch": 0.029219022425299106, - "grad_norm": 2.4343006950271806, - "learning_rate": 3.9794261157007744e-06, - "loss": 1.16, - "step": 243 - }, - { - "epoch": 0.029339265315938196, - "grad_norm": 2.889476486097178, - "learning_rate": 3.982401257253887e-06, - "loss": 1.0781, - "step": 244 - }, - { - "epoch": 0.029459508206577285, - "grad_norm": 2.0057823515518516, - "learning_rate": 3.985364230504893e-06, - "loss": 1.1232, - "step": 245 - }, - { - "epoch": 0.02957975109721638, - "grad_norm": 2.31705623982895, - "learning_rate": 3.988315134584976e-06, - "loss": 1.0723, - "step": 246 - }, - { - "epoch": 0.02969999398785547, - "grad_norm": 3.943222247569313, - "learning_rate": 3.991254067418851e-06, - "loss": 1.0289, - "step": 247 - }, - { - "epoch": 0.02982023687849456, - "grad_norm": 1.9002571333510532, - "learning_rate": 3.994181125744254e-06, - "loss": 1.0497, - "step": 248 - }, - { - "epoch": 0.02994047976913365, - "grad_norm": 2.2826231912583084, - "learning_rate": 3.99709640513106e-06, - "loss": 0.9787, - "step": 249 - }, - { - "epoch": 0.03006072265977274, - "grad_norm": 1.942971901149058, - "learning_rate": 4e-06, - "loss": 1.0802, - "step": 250 - }, - { - "epoch": 0.03018096555041183, - "grad_norm": 2.9474531332727505, - "learning_rate": 3.999999848300794e-06, - "loss": 1.1191, - "step": 251 - }, - { - "epoch": 0.030301208441050925, - "grad_norm": 1.4512586195473731, - "learning_rate": 3.999999393203203e-06, - "loss": 1.1182, - "step": 252 - }, - { - "epoch": 0.030421451331690014, - "grad_norm": 1.9844164506082336, - "learning_rate": 3.999998634707293e-06, - "loss": 1.0768, - "step": 253 - }, - { - "epoch": 0.030541694222329104, - "grad_norm": 2.4947198420772825, - "learning_rate": 3.999997572813182e-06, - "loss": 1.1907, - "step": 254 - }, - { - "epoch": 0.030661937112968194, - "grad_norm": 1.7989189795994047, - "learning_rate": 3.999996207521028e-06, - "loss": 1.0998, - "step": 255 - }, - { - "epoch": 0.030782180003607287, - "grad_norm": 2.465558270420488, - "learning_rate": 3.999994538831039e-06, - "loss": 1.0503, - "step": 256 - }, - { - "epoch": 0.030902422894246377, - "grad_norm": 2.43650680261105, - "learning_rate": 3.99999256674347e-06, - "loss": 1.072, - "step": 257 - }, - { - "epoch": 0.031022665784885467, - "grad_norm": 0.9298076712253524, - "learning_rate": 3.999990291258618e-06, - "loss": 0.7927, - "step": 258 - }, - { - "epoch": 0.03114290867552456, - "grad_norm": 2.166548750187375, - "learning_rate": 3.999987712376829e-06, - "loss": 1.0877, - "step": 259 - }, - { - "epoch": 0.031263151566163654, - "grad_norm": 1.8716483996352786, - "learning_rate": 3.999984830098494e-06, - "loss": 1.0442, - "step": 260 - }, - { - "epoch": 0.03138339445680274, - "grad_norm": 2.6305480636907137, - "learning_rate": 3.999981644424051e-06, - "loss": 1.1981, - "step": 261 - }, - { - "epoch": 0.03150363734744183, - "grad_norm": 2.0673702421603553, - "learning_rate": 3.999978155353982e-06, - "loss": 1.0901, - "step": 262 - }, - { - "epoch": 0.03162388023808092, - "grad_norm": 2.2334268332120315, - "learning_rate": 3.9999743628888186e-06, - "loss": 1.0244, - "step": 263 - }, - { - "epoch": 0.03174412312872001, - "grad_norm": 2.5575257380021053, - "learning_rate": 3.999970267029133e-06, - "loss": 1.1147, - "step": 264 - }, - { - "epoch": 0.0318643660193591, - "grad_norm": 2.1543938207731084, - "learning_rate": 3.999965867775548e-06, - "loss": 1.0234, - "step": 265 - }, - { - "epoch": 0.0319846089099982, - "grad_norm": 2.4641992660428484, - "learning_rate": 3.9999611651287315e-06, - "loss": 1.0877, - "step": 266 - }, - { - "epoch": 0.03210485180063729, - "grad_norm": 2.6926543023144394, - "learning_rate": 3.999956159089396e-06, - "loss": 1.026, - "step": 267 - }, - { - "epoch": 0.03222509469127638, - "grad_norm": 6.1020266933901315, - "learning_rate": 3.999950849658302e-06, - "loss": 1.0196, - "step": 268 - }, - { - "epoch": 0.03234533758191547, - "grad_norm": 3.156477967140505, - "learning_rate": 3.999945236836254e-06, - "loss": 1.0668, - "step": 269 - }, - { - "epoch": 0.03246558047255456, - "grad_norm": 3.2498777806702877, - "learning_rate": 3.999939320624103e-06, - "loss": 1.1771, - "step": 270 - }, - { - "epoch": 0.03258582336319365, - "grad_norm": 1.8939605167829372, - "learning_rate": 3.999933101022749e-06, - "loss": 1.1235, - "step": 271 - }, - { - "epoch": 0.032706066253832745, - "grad_norm": 3.3446819397114624, - "learning_rate": 3.999926578033132e-06, - "loss": 1.0863, - "step": 272 - }, - { - "epoch": 0.032826309144471835, - "grad_norm": 2.8774968790390365, - "learning_rate": 3.999919751656244e-06, - "loss": 0.8625, - "step": 273 - }, - { - "epoch": 0.032946552035110925, - "grad_norm": 2.188223256027363, - "learning_rate": 3.9999126218931195e-06, - "loss": 0.9922, - "step": 274 - }, - { - "epoch": 0.033066794925750015, - "grad_norm": 2.557634992271694, - "learning_rate": 3.99990518874484e-06, - "loss": 1.1289, - "step": 275 - }, - { - "epoch": 0.033187037816389105, - "grad_norm": 2.2321219655830373, - "learning_rate": 3.999897452212534e-06, - "loss": 1.1548, - "step": 276 - }, - { - "epoch": 0.033307280707028195, - "grad_norm": 1.8493633489315884, - "learning_rate": 3.999889412297374e-06, - "loss": 1.225, - "step": 277 - }, - { - "epoch": 0.03342752359766729, - "grad_norm": 2.218240706327263, - "learning_rate": 3.999881069000581e-06, - "loss": 1.0239, - "step": 278 - }, - { - "epoch": 0.03354776648830638, - "grad_norm": 2.5247472428823237, - "learning_rate": 3.99987242232342e-06, - "loss": 1.1001, - "step": 279 - }, - { - "epoch": 0.03366800937894547, - "grad_norm": 2.1216410849268335, - "learning_rate": 3.9998634722672026e-06, - "loss": 1.0214, - "step": 280 - }, - { - "epoch": 0.03378825226958456, - "grad_norm": 1.878537401567705, - "learning_rate": 3.999854218833286e-06, - "loss": 1.0227, - "step": 281 - }, - { - "epoch": 0.03390849516022365, - "grad_norm": 2.0441114100273508, - "learning_rate": 3.999844662023075e-06, - "loss": 1.0528, - "step": 282 - }, - { - "epoch": 0.03402873805086274, - "grad_norm": 1.7306591471688466, - "learning_rate": 3.999834801838018e-06, - "loss": 1.147, - "step": 283 - }, - { - "epoch": 0.03414898094150183, - "grad_norm": 1.8907413289245683, - "learning_rate": 3.9998246382796115e-06, - "loss": 0.9715, - "step": 284 - }, - { - "epoch": 0.03426922383214093, - "grad_norm": 2.952139069204454, - "learning_rate": 3.999814171349399e-06, - "loss": 1.1353, - "step": 285 - }, - { - "epoch": 0.03438946672278002, - "grad_norm": 1.669026452642852, - "learning_rate": 3.9998034010489655e-06, - "loss": 0.9692, - "step": 286 - }, - { - "epoch": 0.03450970961341911, - "grad_norm": 2.0476943843918987, - "learning_rate": 3.999792327379946e-06, - "loss": 0.9894, - "step": 287 - }, - { - "epoch": 0.034629952504058197, - "grad_norm": 2.4867866375016057, - "learning_rate": 3.999780950344021e-06, - "loss": 1.2056, - "step": 288 - }, - { - "epoch": 0.034750195394697286, - "grad_norm": 1.701133613725514, - "learning_rate": 3.999769269942916e-06, - "loss": 1.0483, - "step": 289 - }, - { - "epoch": 0.034870438285336376, - "grad_norm": 1.6661259568811462, - "learning_rate": 3.999757286178402e-06, - "loss": 1.0471, - "step": 290 - }, - { - "epoch": 0.03499068117597547, - "grad_norm": 1.6997097665205296, - "learning_rate": 3.999744999052299e-06, - "loss": 1.1333, - "step": 291 - }, - { - "epoch": 0.03511092406661456, - "grad_norm": 0.9899454692310513, - "learning_rate": 3.9997324085664675e-06, - "loss": 0.8971, - "step": 292 - }, - { - "epoch": 0.03523116695725365, - "grad_norm": 1.9854628216988266, - "learning_rate": 3.999719514722821e-06, - "loss": 1.1447, - "step": 293 - }, - { - "epoch": 0.03535140984789274, - "grad_norm": 2.1143789208240786, - "learning_rate": 3.999706317523314e-06, - "loss": 0.981, - "step": 294 - }, - { - "epoch": 0.03547165273853183, - "grad_norm": 1.8402538449952428, - "learning_rate": 3.999692816969948e-06, - "loss": 1.0828, - "step": 295 - }, - { - "epoch": 0.03559189562917092, - "grad_norm": 0.9903958281166478, - "learning_rate": 3.999679013064772e-06, - "loss": 0.9428, - "step": 296 - }, - { - "epoch": 0.03571213851981002, - "grad_norm": 2.224204759248949, - "learning_rate": 3.99966490580988e-06, - "loss": 1.0901, - "step": 297 - }, - { - "epoch": 0.03583238141044911, - "grad_norm": 2.2807982533100657, - "learning_rate": 3.999650495207411e-06, - "loss": 0.8889, - "step": 298 - }, - { - "epoch": 0.0359526243010882, - "grad_norm": 2.3700538126956614, - "learning_rate": 3.999635781259553e-06, - "loss": 1.127, - "step": 299 - }, - { - "epoch": 0.03607286719172729, - "grad_norm": 0.9705162252183205, - "learning_rate": 3.999620763968535e-06, - "loss": 0.7938, - "step": 300 - }, - { - "epoch": 0.03619311008236638, - "grad_norm": 1.5217869351264752, - "learning_rate": 3.999605443336638e-06, - "loss": 1.0984, - "step": 301 - }, - { - "epoch": 0.03631335297300547, - "grad_norm": 2.1197486391941567, - "learning_rate": 3.999589819366185e-06, - "loss": 1.1208, - "step": 302 - }, - { - "epoch": 0.036433595863644565, - "grad_norm": 2.0115461849530623, - "learning_rate": 3.999573892059547e-06, - "loss": 1.0657, - "step": 303 - }, - { - "epoch": 0.036553838754283655, - "grad_norm": 1.802358139635442, - "learning_rate": 3.999557661419138e-06, - "loss": 1.0384, - "step": 304 - }, - { - "epoch": 0.036674081644922744, - "grad_norm": 5.308085862903321, - "learning_rate": 3.9995411274474225e-06, - "loss": 1.0323, - "step": 305 - }, - { - "epoch": 0.036794324535561834, - "grad_norm": 1.6138448031093167, - "learning_rate": 3.999524290146908e-06, - "loss": 1.0473, - "step": 306 - }, - { - "epoch": 0.036914567426200924, - "grad_norm": 1.9917466816597404, - "learning_rate": 3.9995071495201485e-06, - "loss": 1.1361, - "step": 307 - }, - { - "epoch": 0.037034810316840014, - "grad_norm": 13.227431744731229, - "learning_rate": 3.999489705569744e-06, - "loss": 1.2016, - "step": 308 - }, - { - "epoch": 0.03715505320747911, - "grad_norm": 1.9304669616765038, - "learning_rate": 3.999471958298341e-06, - "loss": 1.0976, - "step": 309 - }, - { - "epoch": 0.0372752960981182, - "grad_norm": 2.549894543879414, - "learning_rate": 3.999453907708631e-06, - "loss": 0.9899, - "step": 310 - }, - { - "epoch": 0.03739553898875729, - "grad_norm": 1.72546807724797, - "learning_rate": 3.999435553803353e-06, - "loss": 1.0481, - "step": 311 - }, - { - "epoch": 0.03751578187939638, - "grad_norm": 2.3033873948167263, - "learning_rate": 3.999416896585292e-06, - "loss": 1.0657, - "step": 312 - }, - { - "epoch": 0.03763602477003547, - "grad_norm": 3.7540871281865082, - "learning_rate": 3.9993979360572775e-06, - "loss": 1.0957, - "step": 313 - }, - { - "epoch": 0.03775626766067456, - "grad_norm": 2.7701043530029477, - "learning_rate": 3.999378672222185e-06, - "loss": 1.063, - "step": 314 - }, - { - "epoch": 0.03787651055131366, - "grad_norm": 1.8991134131032978, - "learning_rate": 3.9993591050829385e-06, - "loss": 1.0624, - "step": 315 - }, - { - "epoch": 0.037996753441952746, - "grad_norm": 1.934649924614411, - "learning_rate": 3.999339234642506e-06, - "loss": 1.0266, - "step": 316 - }, - { - "epoch": 0.038116996332591836, - "grad_norm": 2.244036931895058, - "learning_rate": 3.9993190609038994e-06, - "loss": 1.0697, - "step": 317 - }, - { - "epoch": 0.038237239223230926, - "grad_norm": 1.760522609735603, - "learning_rate": 3.999298583870182e-06, - "loss": 1.0689, - "step": 318 - }, - { - "epoch": 0.038357482113870016, - "grad_norm": 1.6759411573254541, - "learning_rate": 3.999277803544458e-06, - "loss": 1.0064, - "step": 319 - }, - { - "epoch": 0.038477725004509106, - "grad_norm": 0.9417840982881327, - "learning_rate": 3.999256719929882e-06, - "loss": 0.9001, - "step": 320 - }, - { - "epoch": 0.0385979678951482, - "grad_norm": 8.71797554104046, - "learning_rate": 3.999235333029651e-06, - "loss": 0.9806, - "step": 321 - }, - { - "epoch": 0.03871821078578729, - "grad_norm": 1.7057174410429117, - "learning_rate": 3.999213642847009e-06, - "loss": 1.0477, - "step": 322 - }, - { - "epoch": 0.03883845367642638, - "grad_norm": 1.6826109529048743, - "learning_rate": 3.999191649385247e-06, - "loss": 1.1445, - "step": 323 - }, - { - "epoch": 0.03895869656706547, - "grad_norm": 0.8820028727910666, - "learning_rate": 3.999169352647702e-06, - "loss": 0.8688, - "step": 324 - }, - { - "epoch": 0.03907893945770456, - "grad_norm": 1.764801778712202, - "learning_rate": 3.999146752637755e-06, - "loss": 1.0544, - "step": 325 - }, - { - "epoch": 0.03919918234834365, - "grad_norm": 2.5423466966110353, - "learning_rate": 3.999123849358836e-06, - "loss": 1.1319, - "step": 326 - }, - { - "epoch": 0.03931942523898275, - "grad_norm": 1.7295783445242983, - "learning_rate": 3.999100642814418e-06, - "loss": 0.9796, - "step": 327 - }, - { - "epoch": 0.03943966812962184, - "grad_norm": 2.0466281654679648, - "learning_rate": 3.999077133008022e-06, - "loss": 1.1254, - "step": 328 - }, - { - "epoch": 0.03955991102026093, - "grad_norm": 2.0629288198716247, - "learning_rate": 3.9990533199432145e-06, - "loss": 1.1361, - "step": 329 - }, - { - "epoch": 0.03968015391090002, - "grad_norm": 3.1414071243159687, - "learning_rate": 3.999029203623608e-06, - "loss": 0.9887, - "step": 330 - }, - { - "epoch": 0.03980039680153911, - "grad_norm": 2.6781775666355987, - "learning_rate": 3.99900478405286e-06, - "loss": 1.0936, - "step": 331 - }, - { - "epoch": 0.0399206396921782, - "grad_norm": 2.229792557549004, - "learning_rate": 3.998980061234676e-06, - "loss": 1.0589, - "step": 332 - }, - { - "epoch": 0.040040882582817294, - "grad_norm": 2.7575726715499984, - "learning_rate": 3.9989550351728055e-06, - "loss": 0.9911, - "step": 333 - }, - { - "epoch": 0.040161125473456384, - "grad_norm": 2.129892775115684, - "learning_rate": 3.998929705871046e-06, - "loss": 1.0665, - "step": 334 - }, - { - "epoch": 0.040281368364095474, - "grad_norm": 2.286571305828752, - "learning_rate": 3.99890407333324e-06, - "loss": 1.1117, - "step": 335 - }, - { - "epoch": 0.040401611254734564, - "grad_norm": 1.6510696729657361, - "learning_rate": 3.998878137563275e-06, - "loss": 1.0929, - "step": 336 - }, - { - "epoch": 0.040521854145373654, - "grad_norm": 2.086617462544958, - "learning_rate": 3.998851898565085e-06, - "loss": 1.0813, - "step": 337 - }, - { - "epoch": 0.04064209703601274, - "grad_norm": 1.8742566860568703, - "learning_rate": 3.998825356342653e-06, - "loss": 1.06, - "step": 338 - }, - { - "epoch": 0.04076233992665183, - "grad_norm": 2.1572118631728143, - "learning_rate": 3.998798510900003e-06, - "loss": 0.9538, - "step": 339 - }, - { - "epoch": 0.04088258281729093, - "grad_norm": 2.5350737712591953, - "learning_rate": 3.998771362241207e-06, - "loss": 1.0802, - "step": 340 - }, - { - "epoch": 0.04100282570793002, - "grad_norm": 1.7904500453087469, - "learning_rate": 3.998743910370385e-06, - "loss": 1.1047, - "step": 341 - }, - { - "epoch": 0.04112306859856911, - "grad_norm": 2.32033017341228, - "learning_rate": 3.998716155291702e-06, - "loss": 0.9597, - "step": 342 - }, - { - "epoch": 0.0412433114892082, - "grad_norm": 1.547356963934722, - "learning_rate": 3.998688097009366e-06, - "loss": 1.1373, - "step": 343 - }, - { - "epoch": 0.04136355437984729, - "grad_norm": 2.08858433503082, - "learning_rate": 3.998659735527636e-06, - "loss": 1.034, - "step": 344 - }, - { - "epoch": 0.04148379727048638, - "grad_norm": 1.5937131241664866, - "learning_rate": 3.998631070850813e-06, - "loss": 1.0027, - "step": 345 - }, - { - "epoch": 0.041604040161125476, - "grad_norm": 2.528581130630316, - "learning_rate": 3.9986021029832455e-06, - "loss": 1.0649, - "step": 346 - }, - { - "epoch": 0.041724283051764566, - "grad_norm": 2.4050630887137574, - "learning_rate": 3.9985728319293285e-06, - "loss": 1.1343, - "step": 347 - }, - { - "epoch": 0.041844525942403656, - "grad_norm": 1.9754795530434623, - "learning_rate": 3.998543257693501e-06, - "loss": 1.0811, - "step": 348 - }, - { - "epoch": 0.041964768833042745, - "grad_norm": 1.6241954866021302, - "learning_rate": 3.998513380280251e-06, - "loss": 1.1065, - "step": 349 - }, - { - "epoch": 0.042085011723681835, - "grad_norm": 2.2480123005187695, - "learning_rate": 3.99848319969411e-06, - "loss": 1.1813, - "step": 350 - }, - { - "epoch": 0.042205254614320925, - "grad_norm": 1.9562956442093538, - "learning_rate": 3.9984527159396564e-06, - "loss": 1.015, - "step": 351 - }, - { - "epoch": 0.04232549750496002, - "grad_norm": 2.1127648136809896, - "learning_rate": 3.9984219290215154e-06, - "loss": 1.0598, - "step": 352 - }, - { - "epoch": 0.04244574039559911, - "grad_norm": 1.4232867432204672, - "learning_rate": 3.998390838944356e-06, - "loss": 1.1162, - "step": 353 - }, - { - "epoch": 0.0425659832862382, - "grad_norm": 4.227049850599484, - "learning_rate": 3.998359445712895e-06, - "loss": 1.1244, - "step": 354 - }, - { - "epoch": 0.04268622617687729, - "grad_norm": 2.8074094692182765, - "learning_rate": 3.9983277493318955e-06, - "loss": 1.0337, - "step": 355 - }, - { - "epoch": 0.04280646906751638, - "grad_norm": 1.8114548004390874, - "learning_rate": 3.998295749806165e-06, - "loss": 1.0419, - "step": 356 - }, - { - "epoch": 0.04292671195815547, - "grad_norm": 1.7981023162906646, - "learning_rate": 3.998263447140558e-06, - "loss": 1.07, - "step": 357 - }, - { - "epoch": 0.04304695484879457, - "grad_norm": 2.0248238663543803, - "learning_rate": 3.998230841339976e-06, - "loss": 1.0448, - "step": 358 - }, - { - "epoch": 0.04316719773943366, - "grad_norm": 2.471351411829312, - "learning_rate": 3.998197932409363e-06, - "loss": 1.0778, - "step": 359 - }, - { - "epoch": 0.04328744063007275, - "grad_norm": 2.197263671874132, - "learning_rate": 3.9981647203537125e-06, - "loss": 1.0915, - "step": 360 - }, - { - "epoch": 0.04340768352071184, - "grad_norm": 1.856704140027587, - "learning_rate": 3.998131205178063e-06, - "loss": 1.1844, - "step": 361 - }, - { - "epoch": 0.04352792641135093, - "grad_norm": 2.694485656405631, - "learning_rate": 3.998097386887498e-06, - "loss": 0.9969, - "step": 362 - }, - { - "epoch": 0.04364816930199002, - "grad_norm": 1.8385073592863714, - "learning_rate": 3.998063265487148e-06, - "loss": 1.0708, - "step": 363 - }, - { - "epoch": 0.043768412192629114, - "grad_norm": 1.8957882830398884, - "learning_rate": 3.99802884098219e-06, - "loss": 1.0408, - "step": 364 - }, - { - "epoch": 0.043888655083268203, - "grad_norm": 2.1068855571906684, - "learning_rate": 3.997994113377845e-06, - "loss": 1.0492, - "step": 365 - }, - { - "epoch": 0.04400889797390729, - "grad_norm": 2.0226007447301626, - "learning_rate": 3.9979590826793815e-06, - "loss": 1.0643, - "step": 366 - }, - { - "epoch": 0.04412914086454638, - "grad_norm": 1.6853755718010397, - "learning_rate": 3.997923748892113e-06, - "loss": 1.0444, - "step": 367 - }, - { - "epoch": 0.04424938375518547, - "grad_norm": 1.5136356472479198, - "learning_rate": 3.9978881120214015e-06, - "loss": 1.1136, - "step": 368 - }, - { - "epoch": 0.04436962664582456, - "grad_norm": 1.9152326471428698, - "learning_rate": 3.997852172072652e-06, - "loss": 1.0222, - "step": 369 - }, - { - "epoch": 0.04448986953646366, - "grad_norm": 2.283490896241492, - "learning_rate": 3.9978159290513155e-06, - "loss": 1.1286, - "step": 370 - }, - { - "epoch": 0.04461011242710275, - "grad_norm": 2.909940939497279, - "learning_rate": 3.997779382962892e-06, - "loss": 1.0337, - "step": 371 - }, - { - "epoch": 0.04473035531774184, - "grad_norm": 1.8640707011014452, - "learning_rate": 3.997742533812924e-06, - "loss": 0.969, - "step": 372 - }, - { - "epoch": 0.04485059820838093, - "grad_norm": 2.1403489874660564, - "learning_rate": 3.997705381607001e-06, - "loss": 1.1402, - "step": 373 - }, - { - "epoch": 0.04497084109902002, - "grad_norm": 1.0017940997362165, - "learning_rate": 3.997667926350761e-06, - "loss": 0.8667, - "step": 374 - }, - { - "epoch": 0.04509108398965911, - "grad_norm": 0.8916358899363718, - "learning_rate": 3.997630168049886e-06, - "loss": 0.8368, - "step": 375 - }, - { - "epoch": 0.045211326880298205, - "grad_norm": 1.630300825946259, - "learning_rate": 3.997592106710101e-06, - "loss": 1.0044, - "step": 376 - }, - { - "epoch": 0.045331569770937295, - "grad_norm": 2.102891466344869, - "learning_rate": 3.997553742337182e-06, - "loss": 0.8934, - "step": 377 - }, - { - "epoch": 0.045451812661576385, - "grad_norm": 1.677562060849708, - "learning_rate": 3.997515074936949e-06, - "loss": 1.1432, - "step": 378 - }, - { - "epoch": 0.045572055552215475, - "grad_norm": 2.0985403346742904, - "learning_rate": 3.997476104515268e-06, - "loss": 1.0861, - "step": 379 - }, - { - "epoch": 0.045692298442854565, - "grad_norm": 1.7931106278543902, - "learning_rate": 3.9974368310780485e-06, - "loss": 1.0047, - "step": 380 - }, - { - "epoch": 0.045812541333493655, - "grad_norm": 2.1864840736951883, - "learning_rate": 3.997397254631251e-06, - "loss": 0.9701, - "step": 381 - }, - { - "epoch": 0.04593278422413275, - "grad_norm": 2.960114941040696, - "learning_rate": 3.997357375180878e-06, - "loss": 0.8861, - "step": 382 - }, - { - "epoch": 0.04605302711477184, - "grad_norm": 1.697929436883314, - "learning_rate": 3.997317192732979e-06, - "loss": 0.9771, - "step": 383 - }, - { - "epoch": 0.04617327000541093, - "grad_norm": 3.353304608059577, - "learning_rate": 3.99727670729365e-06, - "loss": 1.0464, - "step": 384 - }, - { - "epoch": 0.04629351289605002, - "grad_norm": 1.8692864148913126, - "learning_rate": 3.997235918869033e-06, - "loss": 1.0077, - "step": 385 - }, - { - "epoch": 0.04641375578668911, - "grad_norm": 1.8482416689300696, - "learning_rate": 3.997194827465315e-06, - "loss": 1.0608, - "step": 386 - }, - { - "epoch": 0.0465339986773282, - "grad_norm": 2.34345192603272, - "learning_rate": 3.997153433088728e-06, - "loss": 1.1449, - "step": 387 - }, - { - "epoch": 0.0466542415679673, - "grad_norm": 1.800898814966584, - "learning_rate": 3.997111735745554e-06, - "loss": 1.0308, - "step": 388 - }, - { - "epoch": 0.04677448445860639, - "grad_norm": 2.0316059680953957, - "learning_rate": 3.997069735442118e-06, - "loss": 1.0588, - "step": 389 - }, - { - "epoch": 0.04689472734924548, - "grad_norm": 1.3796736011258661, - "learning_rate": 3.997027432184792e-06, - "loss": 1.0247, - "step": 390 - }, - { - "epoch": 0.04701497023988457, - "grad_norm": 1.8916793909639211, - "learning_rate": 3.99698482597999e-06, - "loss": 1.1112, - "step": 391 - }, - { - "epoch": 0.04713521313052366, - "grad_norm": 0.9179718991489367, - "learning_rate": 3.99694191683418e-06, - "loss": 0.893, - "step": 392 - }, - { - "epoch": 0.047255456021162746, - "grad_norm": 1.7440828012618754, - "learning_rate": 3.996898704753867e-06, - "loss": 1.0541, - "step": 393 - }, - { - "epoch": 0.04737569891180184, - "grad_norm": 2.5800672208915114, - "learning_rate": 3.996855189745609e-06, - "loss": 1.1183, - "step": 394 - }, - { - "epoch": 0.04749594180244093, - "grad_norm": 1.873917267034699, - "learning_rate": 3.996811371816007e-06, - "loss": 1.1546, - "step": 395 - }, - { - "epoch": 0.04761618469308002, - "grad_norm": 1.853671454259439, - "learning_rate": 3.996767250971707e-06, - "loss": 1.0123, - "step": 396 - }, - { - "epoch": 0.04773642758371911, - "grad_norm": 1.9927186145773153, - "learning_rate": 3.996722827219403e-06, - "loss": 1.0921, - "step": 397 - }, - { - "epoch": 0.0478566704743582, - "grad_norm": 2.428666860244133, - "learning_rate": 3.996678100565833e-06, - "loss": 1.0511, - "step": 398 - }, - { - "epoch": 0.04797691336499729, - "grad_norm": 4.018966294040276, - "learning_rate": 3.996633071017783e-06, - "loss": 1.1055, - "step": 399 - }, - { - "epoch": 0.04809715625563638, - "grad_norm": 3.269238093096094, - "learning_rate": 3.996587738582084e-06, - "loss": 1.0469, - "step": 400 - }, - { - "epoch": 0.04821739914627548, - "grad_norm": 2.33085042684227, - "learning_rate": 3.9965421032656115e-06, - "loss": 1.0825, - "step": 401 - }, - { - "epoch": 0.04833764203691457, - "grad_norm": 2.121777728046434, - "learning_rate": 3.99649616507529e-06, - "loss": 1.1697, - "step": 402 - }, - { - "epoch": 0.04845788492755366, - "grad_norm": 0.9662751919224724, - "learning_rate": 3.996449924018088e-06, - "loss": 0.9089, - "step": 403 - }, - { - "epoch": 0.04857812781819275, - "grad_norm": 3.2796724432699134, - "learning_rate": 3.99640338010102e-06, - "loss": 1.0291, - "step": 404 - }, - { - "epoch": 0.04869837070883184, - "grad_norm": 1.6174958897911336, - "learning_rate": 3.996356533331146e-06, - "loss": 1.0096, - "step": 405 - }, - { - "epoch": 0.04881861359947093, - "grad_norm": 3.259108178252712, - "learning_rate": 3.996309383715573e-06, - "loss": 0.8421, - "step": 406 - }, - { - "epoch": 0.048938856490110025, - "grad_norm": 1.9733152591784018, - "learning_rate": 3.996261931261454e-06, - "loss": 0.9575, - "step": 407 - }, - { - "epoch": 0.049059099380749115, - "grad_norm": 9.632872693445346, - "learning_rate": 3.996214175975987e-06, - "loss": 1.0906, - "step": 408 - }, - { - "epoch": 0.049179342271388204, - "grad_norm": 1.8238221090597062, - "learning_rate": 3.996166117866417e-06, - "loss": 1.0165, - "step": 409 - }, - { - "epoch": 0.049299585162027294, - "grad_norm": 1.9815658149156095, - "learning_rate": 3.996117756940035e-06, - "loss": 1.0949, - "step": 410 - }, - { - "epoch": 0.049419828052666384, - "grad_norm": 3.351967384762379, - "learning_rate": 3.996069093204175e-06, - "loss": 1.1938, - "step": 411 - }, - { - "epoch": 0.049540070943305474, - "grad_norm": 2.0584231456153588, - "learning_rate": 3.996020126666221e-06, - "loss": 1.1065, - "step": 412 - }, - { - "epoch": 0.04966031383394457, - "grad_norm": 2.187994764778527, - "learning_rate": 3.995970857333601e-06, - "loss": 1.0505, - "step": 413 - }, - { - "epoch": 0.04978055672458366, - "grad_norm": 1.8523484505505654, - "learning_rate": 3.995921285213789e-06, - "loss": 1.0261, - "step": 414 - }, - { - "epoch": 0.04990079961522275, - "grad_norm": 3.4969422743856873, - "learning_rate": 3.995871410314305e-06, - "loss": 1.0388, - "step": 415 - }, - { - "epoch": 0.05002104250586184, - "grad_norm": 1.0147660364433133, - "learning_rate": 3.995821232642714e-06, - "loss": 0.8916, - "step": 416 - }, - { - "epoch": 0.05014128539650093, - "grad_norm": 1.9476555497971189, - "learning_rate": 3.995770752206629e-06, - "loss": 1.0517, - "step": 417 - }, - { - "epoch": 0.05026152828714002, - "grad_norm": 1.8663736269476165, - "learning_rate": 3.995719969013709e-06, - "loss": 1.196, - "step": 418 - }, - { - "epoch": 0.05038177117777912, - "grad_norm": 2.8422899795016843, - "learning_rate": 3.995668883071655e-06, - "loss": 1.0878, - "step": 419 - }, - { - "epoch": 0.050502014068418206, - "grad_norm": 2.221631932007347, - "learning_rate": 3.995617494388219e-06, - "loss": 1.1393, - "step": 420 - }, - { - "epoch": 0.050622256959057296, - "grad_norm": 1.8024062894463384, - "learning_rate": 3.995565802971196e-06, - "loss": 1.0333, - "step": 421 - }, - { - "epoch": 0.050742499849696386, - "grad_norm": 1.7313375595480878, - "learning_rate": 3.995513808828427e-06, - "loss": 0.9053, - "step": 422 - }, - { - "epoch": 0.050862742740335476, - "grad_norm": 1.8185435856359522, - "learning_rate": 3.9954615119678e-06, - "loss": 0.9926, - "step": 423 - }, - { - "epoch": 0.050982985630974566, - "grad_norm": 1.9458287178436802, - "learning_rate": 3.995408912397248e-06, - "loss": 1.0249, - "step": 424 - }, - { - "epoch": 0.05110322852161366, - "grad_norm": 2.109684448149571, - "learning_rate": 3.99535601012475e-06, - "loss": 1.1547, - "step": 425 - }, - { - "epoch": 0.05122347141225275, - "grad_norm": 1.3677064619126582, - "learning_rate": 3.995302805158333e-06, - "loss": 0.9838, - "step": 426 - }, - { - "epoch": 0.05134371430289184, - "grad_norm": 2.467046125539766, - "learning_rate": 3.9952492975060665e-06, - "loss": 1.0656, - "step": 427 - }, - { - "epoch": 0.05146395719353093, - "grad_norm": 2.5796589305088196, - "learning_rate": 3.995195487176067e-06, - "loss": 1.079, - "step": 428 - }, - { - "epoch": 0.05158420008417002, - "grad_norm": 3.1355260954525113, - "learning_rate": 3.995141374176499e-06, - "loss": 1.0817, - "step": 429 - }, - { - "epoch": 0.05170444297480911, - "grad_norm": 1.2416317734306501, - "learning_rate": 3.995086958515572e-06, - "loss": 0.9105, - "step": 430 - }, - { - "epoch": 0.05182468586544821, - "grad_norm": 0.9262419250589327, - "learning_rate": 3.995032240201538e-06, - "loss": 0.8783, - "step": 431 - }, - { - "epoch": 0.0519449287560873, - "grad_norm": 1.0187265428522188, - "learning_rate": 3.9949772192427e-06, - "loss": 0.8875, - "step": 432 - }, - { - "epoch": 0.05206517164672639, - "grad_norm": 1.7664755064029842, - "learning_rate": 3.994921895647405e-06, - "loss": 1.0288, - "step": 433 - }, - { - "epoch": 0.05218541453736548, - "grad_norm": 0.8140612421007744, - "learning_rate": 3.994866269424043e-06, - "loss": 0.8043, - "step": 434 - }, - { - "epoch": 0.05230565742800457, - "grad_norm": 1.918028043929761, - "learning_rate": 3.9948103405810545e-06, - "loss": 1.0093, - "step": 435 - }, - { - "epoch": 0.05242590031864366, - "grad_norm": 1.7165104927700123, - "learning_rate": 3.994754109126923e-06, - "loss": 1.0818, - "step": 436 - }, - { - "epoch": 0.052546143209282754, - "grad_norm": 1.9621668150094467, - "learning_rate": 3.994697575070181e-06, - "loss": 1.1592, - "step": 437 - }, - { - "epoch": 0.052666386099921844, - "grad_norm": 2.41991665326187, - "learning_rate": 3.994640738419402e-06, - "loss": 1.1362, - "step": 438 - }, - { - "epoch": 0.052786628990560934, - "grad_norm": 1.6747536520786444, - "learning_rate": 3.9945835991832075e-06, - "loss": 1.0342, - "step": 439 - }, - { - "epoch": 0.052906871881200024, - "grad_norm": 2.0731192510580922, - "learning_rate": 3.994526157370268e-06, - "loss": 1.1528, - "step": 440 - }, - { - "epoch": 0.053027114771839114, - "grad_norm": 0.9044355117283573, - "learning_rate": 3.994468412989296e-06, - "loss": 0.846, - "step": 441 - }, - { - "epoch": 0.053147357662478203, - "grad_norm": 1.9924349762035936, - "learning_rate": 3.994410366049052e-06, - "loss": 1.1686, - "step": 442 - }, - { - "epoch": 0.0532676005531173, - "grad_norm": 2.034236766144404, - "learning_rate": 3.994352016558341e-06, - "loss": 1.0614, - "step": 443 - }, - { - "epoch": 0.05338784344375639, - "grad_norm": 2.1962753845033176, - "learning_rate": 3.994293364526014e-06, - "loss": 0.9653, - "step": 444 - }, - { - "epoch": 0.05350808633439548, - "grad_norm": 1.6958629224415622, - "learning_rate": 3.99423440996097e-06, - "loss": 1.0659, - "step": 445 - }, - { - "epoch": 0.05362832922503457, - "grad_norm": 2.310015400426482, - "learning_rate": 3.994175152872152e-06, - "loss": 1.0415, - "step": 446 - }, - { - "epoch": 0.05374857211567366, - "grad_norm": 1.7497769622360506, - "learning_rate": 3.994115593268548e-06, - "loss": 1.0229, - "step": 447 - }, - { - "epoch": 0.05386881500631275, - "grad_norm": 2.086108711445604, - "learning_rate": 3.994055731159195e-06, - "loss": 1.0486, - "step": 448 - }, - { - "epoch": 0.053989057896951846, - "grad_norm": 1.611429477162865, - "learning_rate": 3.993995566553172e-06, - "loss": 1.0983, - "step": 449 - }, - { - "epoch": 0.054109300787590936, - "grad_norm": 1.4756974204039641, - "learning_rate": 3.993935099459607e-06, - "loss": 0.9977, - "step": 450 - }, - { - "epoch": 0.054229543678230026, - "grad_norm": 1.8626955019348888, - "learning_rate": 3.993874329887673e-06, - "loss": 0.975, - "step": 451 - }, - { - "epoch": 0.054349786568869116, - "grad_norm": 2.1117104493830805, - "learning_rate": 3.993813257846589e-06, - "loss": 1.085, - "step": 452 - }, - { - "epoch": 0.054470029459508205, - "grad_norm": 2.0454485633069037, - "learning_rate": 3.993751883345619e-06, - "loss": 1.1588, - "step": 453 - }, - { - "epoch": 0.054590272350147295, - "grad_norm": 3.877447155433104, - "learning_rate": 3.993690206394073e-06, - "loss": 1.1003, - "step": 454 - }, - { - "epoch": 0.054710515240786385, - "grad_norm": 2.081711964701047, - "learning_rate": 3.993628227001307e-06, - "loss": 1.1053, - "step": 455 - }, - { - "epoch": 0.05483075813142548, - "grad_norm": 1.9853982765117262, - "learning_rate": 3.993565945176726e-06, - "loss": 0.941, - "step": 456 - }, - { - "epoch": 0.05495100102206457, - "grad_norm": 1.9080509371086176, - "learning_rate": 3.993503360929776e-06, - "loss": 1.07, - "step": 457 - }, - { - "epoch": 0.05507124391270366, - "grad_norm": 1.3940596009980881, - "learning_rate": 3.99344047426995e-06, - "loss": 1.0408, - "step": 458 - }, - { - "epoch": 0.05519148680334275, - "grad_norm": 1.851338304555334, - "learning_rate": 3.993377285206789e-06, - "loss": 1.157, - "step": 459 - }, - { - "epoch": 0.05531172969398184, - "grad_norm": 1.552937581902609, - "learning_rate": 3.99331379374988e-06, - "loss": 1.0943, - "step": 460 - }, - { - "epoch": 0.05543197258462093, - "grad_norm": 2.0068984270228323, - "learning_rate": 3.993249999908852e-06, - "loss": 1.034, - "step": 461 - }, - { - "epoch": 0.05555221547526003, - "grad_norm": 1.8336783936918464, - "learning_rate": 3.993185903693384e-06, - "loss": 1.0942, - "step": 462 - }, - { - "epoch": 0.05567245836589912, - "grad_norm": 1.955351818945916, - "learning_rate": 3.9931215051131995e-06, - "loss": 1.0557, - "step": 463 - }, - { - "epoch": 0.05579270125653821, - "grad_norm": 1.4722011682616383, - "learning_rate": 3.993056804178068e-06, - "loss": 1.0319, - "step": 464 - }, - { - "epoch": 0.0559129441471773, - "grad_norm": 2.120627448600257, - "learning_rate": 3.992991800897803e-06, - "loss": 1.0723, - "step": 465 - }, - { - "epoch": 0.05603318703781639, - "grad_norm": 2.6143242047742388, - "learning_rate": 3.9929264952822665e-06, - "loss": 1.1221, - "step": 466 - }, - { - "epoch": 0.05615342992845548, - "grad_norm": 1.7021642652466207, - "learning_rate": 3.992860887341366e-06, - "loss": 1.1108, - "step": 467 - }, - { - "epoch": 0.056273672819094574, - "grad_norm": 2.1130880948002897, - "learning_rate": 3.992794977085052e-06, - "loss": 1.0507, - "step": 468 - }, - { - "epoch": 0.056393915709733664, - "grad_norm": 1.8000435135138297, - "learning_rate": 3.992728764523326e-06, - "loss": 1.0795, - "step": 469 - }, - { - "epoch": 0.05651415860037275, - "grad_norm": 1.5097728891925826, - "learning_rate": 3.99266224966623e-06, - "loss": 1.0323, - "step": 470 - }, - { - "epoch": 0.05663440149101184, - "grad_norm": 1.8158783832696306, - "learning_rate": 3.992595432523855e-06, - "loss": 1.1022, - "step": 471 - }, - { - "epoch": 0.05675464438165093, - "grad_norm": 1.7712130270405655, - "learning_rate": 3.992528313106338e-06, - "loss": 1.091, - "step": 472 - }, - { - "epoch": 0.05687488727229002, - "grad_norm": 2.2051983539521878, - "learning_rate": 3.9924608914238595e-06, - "loss": 1.0475, - "step": 473 - }, - { - "epoch": 0.05699513016292912, - "grad_norm": 2.271812041405946, - "learning_rate": 3.992393167486648e-06, - "loss": 1.0647, - "step": 474 - }, - { - "epoch": 0.05711537305356821, - "grad_norm": 2.253452407391905, - "learning_rate": 3.992325141304977e-06, - "loss": 1.0294, - "step": 475 - }, - { - "epoch": 0.0572356159442073, - "grad_norm": 1.8904373454133818, - "learning_rate": 3.992256812889166e-06, - "loss": 1.0879, - "step": 476 - }, - { - "epoch": 0.05735585883484639, - "grad_norm": 2.1822164760797955, - "learning_rate": 3.992188182249582e-06, - "loss": 0.9946, - "step": 477 - }, - { - "epoch": 0.05747610172548548, - "grad_norm": 2.28515604133279, - "learning_rate": 3.992119249396633e-06, - "loss": 1.1406, - "step": 478 - }, - { - "epoch": 0.05759634461612457, - "grad_norm": 1.6580433762953857, - "learning_rate": 3.992050014340778e-06, - "loss": 1.0459, - "step": 479 - }, - { - "epoch": 0.057716587506763666, - "grad_norm": 1.4184122364108054, - "learning_rate": 3.99198047709252e-06, - "loss": 0.8089, - "step": 480 - }, - { - "epoch": 0.057836830397402755, - "grad_norm": 1.5786329150785683, - "learning_rate": 3.991910637662408e-06, - "loss": 1.016, - "step": 481 - }, - { - "epoch": 0.057957073288041845, - "grad_norm": 16.86333056441133, - "learning_rate": 3.9918404960610355e-06, - "loss": 1.0336, - "step": 482 - }, - { - "epoch": 0.058077316178680935, - "grad_norm": 2.005565291170047, - "learning_rate": 3.991770052299043e-06, - "loss": 1.0003, - "step": 483 - }, - { - "epoch": 0.058197559069320025, - "grad_norm": 2.765125639063203, - "learning_rate": 3.991699306387118e-06, - "loss": 1.1048, - "step": 484 - }, - { - "epoch": 0.058317801959959115, - "grad_norm": 1.759496805910646, - "learning_rate": 3.991628258335991e-06, - "loss": 1.017, - "step": 485 - }, - { - "epoch": 0.05843804485059821, - "grad_norm": 2.538543554239324, - "learning_rate": 3.991556908156442e-06, - "loss": 1.1062, - "step": 486 - }, - { - "epoch": 0.0585582877412373, - "grad_norm": 1.6450520685181933, - "learning_rate": 3.9914852558592914e-06, - "loss": 1.0998, - "step": 487 - }, - { - "epoch": 0.05867853063187639, - "grad_norm": 3.1413175663814132, - "learning_rate": 3.991413301455413e-06, - "loss": 1.0497, - "step": 488 - }, - { - "epoch": 0.05879877352251548, - "grad_norm": 2.5596626203965096, - "learning_rate": 3.991341044955719e-06, - "loss": 0.9986, - "step": 489 - }, - { - "epoch": 0.05891901641315457, - "grad_norm": 1.8944325214892064, - "learning_rate": 3.991268486371172e-06, - "loss": 1.045, - "step": 490 - }, - { - "epoch": 0.05903925930379366, - "grad_norm": 2.593669108772509, - "learning_rate": 3.991195625712779e-06, - "loss": 1.0967, - "step": 491 - }, - { - "epoch": 0.05915950219443276, - "grad_norm": 1.9476140513096754, - "learning_rate": 3.991122462991592e-06, - "loss": 1.0432, - "step": 492 - }, - { - "epoch": 0.05927974508507185, - "grad_norm": 3.007197963803823, - "learning_rate": 3.991048998218712e-06, - "loss": 1.0424, - "step": 493 - }, - { - "epoch": 0.05939998797571094, - "grad_norm": 2.226107152589374, - "learning_rate": 3.990975231405281e-06, - "loss": 0.9943, - "step": 494 - }, - { - "epoch": 0.05952023086635003, - "grad_norm": 11.47409190987461, - "learning_rate": 3.990901162562491e-06, - "loss": 1.0121, - "step": 495 - }, - { - "epoch": 0.05964047375698912, - "grad_norm": 1.7416259818671522, - "learning_rate": 3.9908267917015765e-06, - "loss": 1.1306, - "step": 496 - }, - { - "epoch": 0.059760716647628206, - "grad_norm": 2.4723008127853143, - "learning_rate": 3.990752118833821e-06, - "loss": 1.1447, - "step": 497 - }, - { - "epoch": 0.0598809595382673, - "grad_norm": 2.2242071174877474, - "learning_rate": 3.990677143970553e-06, - "loss": 1.0064, - "step": 498 - }, - { - "epoch": 0.06000120242890639, - "grad_norm": 1.870803460078764, - "learning_rate": 3.990601867123144e-06, - "loss": 1.0388, - "step": 499 - }, - { - "epoch": 0.06012144531954548, - "grad_norm": 1.9912688287585933, - "learning_rate": 3.990526288303014e-06, - "loss": 1.0817, - "step": 500 - }, - { - "epoch": 0.06024168821018457, - "grad_norm": 1.5476597808535726, - "learning_rate": 3.9904504075216295e-06, - "loss": 1.1291, - "step": 501 - }, - { - "epoch": 0.06036193110082366, - "grad_norm": 3.1693095839844587, - "learning_rate": 3.990374224790501e-06, - "loss": 1.1587, - "step": 502 - }, - { - "epoch": 0.06048217399146275, - "grad_norm": 2.0673710494321704, - "learning_rate": 3.990297740121185e-06, - "loss": 0.9369, - "step": 503 - }, - { - "epoch": 0.06060241688210185, - "grad_norm": 1.669015238967177, - "learning_rate": 3.990220953525284e-06, - "loss": 1.0046, - "step": 504 - }, - { - "epoch": 0.06072265977274094, - "grad_norm": 4.490900376065377, - "learning_rate": 3.9901438650144465e-06, - "loss": 0.9681, - "step": 505 - }, - { - "epoch": 0.06084290266338003, - "grad_norm": 3.3172475513803112, - "learning_rate": 3.990066474600367e-06, - "loss": 1.1395, - "step": 506 - }, - { - "epoch": 0.06096314555401912, - "grad_norm": 1.6876045477235093, - "learning_rate": 3.989988782294786e-06, - "loss": 0.9038, - "step": 507 - }, - { - "epoch": 0.06108338844465821, - "grad_norm": 1.7318041881896113, - "learning_rate": 3.989910788109489e-06, - "loss": 1.1742, - "step": 508 - }, - { - "epoch": 0.0612036313352973, - "grad_norm": 2.2389248493526046, - "learning_rate": 3.989832492056307e-06, - "loss": 0.9792, - "step": 509 - }, - { - "epoch": 0.06132387422593639, - "grad_norm": 2.028876928470859, - "learning_rate": 3.989753894147119e-06, - "loss": 1.0304, - "step": 510 - }, - { - "epoch": 0.061444117116575485, - "grad_norm": 2.318881123169343, - "learning_rate": 3.989674994393846e-06, - "loss": 1.0249, - "step": 511 - }, - { - "epoch": 0.061564360007214575, - "grad_norm": 2.5729038892009135, - "learning_rate": 3.98959579280846e-06, - "loss": 1.1696, - "step": 512 - }, - { - "epoch": 0.061684602897853665, - "grad_norm": 1.8877904782134003, - "learning_rate": 3.989516289402973e-06, - "loss": 1.0572, - "step": 513 - }, - { - "epoch": 0.061804845788492754, - "grad_norm": 2.566697938736816, - "learning_rate": 3.989436484189447e-06, - "loss": 1.0464, - "step": 514 - }, - { - "epoch": 0.061925088679131844, - "grad_norm": 2.508628451031014, - "learning_rate": 3.9893563771799885e-06, - "loss": 1.047, - "step": 515 - }, - { - "epoch": 0.062045331569770934, - "grad_norm": 2.99218526894907, - "learning_rate": 3.989275968386749e-06, - "loss": 1.0967, - "step": 516 - }, - { - "epoch": 0.06216557446041003, - "grad_norm": 1.8094603602380537, - "learning_rate": 3.989195257821926e-06, - "loss": 0.9918, - "step": 517 - }, - { - "epoch": 0.06228581735104912, - "grad_norm": 2.2192567058675494, - "learning_rate": 3.989114245497765e-06, - "loss": 1.0744, - "step": 518 - }, - { - "epoch": 0.06240606024168821, - "grad_norm": 2.6145594825011385, - "learning_rate": 3.989032931426554e-06, - "loss": 1.1771, - "step": 519 - }, - { - "epoch": 0.06252630313232731, - "grad_norm": 1.9063452399804113, - "learning_rate": 3.9889513156206295e-06, - "loss": 1.0998, - "step": 520 - }, - { - "epoch": 0.06264654602296639, - "grad_norm": 2.277075723936089, - "learning_rate": 3.988869398092371e-06, - "loss": 0.9524, - "step": 521 - }, - { - "epoch": 0.06276678891360549, - "grad_norm": 2.338554116083551, - "learning_rate": 3.988787178854206e-06, - "loss": 1.0132, - "step": 522 - }, - { - "epoch": 0.06288703180424457, - "grad_norm": 1.893273067533815, - "learning_rate": 3.988704657918608e-06, - "loss": 1.1029, - "step": 523 - }, - { - "epoch": 0.06300727469488367, - "grad_norm": 2.3753483165504305, - "learning_rate": 3.988621835298094e-06, - "loss": 1.041, - "step": 524 - }, - { - "epoch": 0.06312751758552275, - "grad_norm": 1.8813197126177104, - "learning_rate": 3.988538711005229e-06, - "loss": 1.1445, - "step": 525 - }, - { - "epoch": 0.06324776047616185, - "grad_norm": 6.604091543578632, - "learning_rate": 3.988455285052622e-06, - "loss": 1.1186, - "step": 526 - }, - { - "epoch": 0.06336800336680094, - "grad_norm": 1.7993523810218128, - "learning_rate": 3.98837155745293e-06, - "loss": 1.058, - "step": 527 - }, - { - "epoch": 0.06348824625744003, - "grad_norm": 1.9516624773253648, - "learning_rate": 3.988287528218854e-06, - "loss": 0.9887, - "step": 528 - }, - { - "epoch": 0.06360848914807912, - "grad_norm": 1.8215725839139094, - "learning_rate": 3.98820319736314e-06, - "loss": 1.1251, - "step": 529 - }, - { - "epoch": 0.0637287320387182, - "grad_norm": 7.662173848266215, - "learning_rate": 3.988118564898582e-06, - "loss": 1.0822, - "step": 530 - }, - { - "epoch": 0.0638489749293573, - "grad_norm": 3.0792358253018337, - "learning_rate": 3.988033630838019e-06, - "loss": 1.1155, - "step": 531 - }, - { - "epoch": 0.0639692178199964, - "grad_norm": 1.6534645121330245, - "learning_rate": 3.987948395194334e-06, - "loss": 1.1042, - "step": 532 - }, - { - "epoch": 0.06408946071063548, - "grad_norm": 1.8639099851697645, - "learning_rate": 3.987862857980458e-06, - "loss": 1.0076, - "step": 533 - }, - { - "epoch": 0.06420970360127458, - "grad_norm": 1.8193285988512753, - "learning_rate": 3.987777019209368e-06, - "loss": 0.9947, - "step": 534 - }, - { - "epoch": 0.06432994649191366, - "grad_norm": 1.6793994745331606, - "learning_rate": 3.987690878894084e-06, - "loss": 1.0408, - "step": 535 - }, - { - "epoch": 0.06445018938255276, - "grad_norm": 3.3332079545918956, - "learning_rate": 3.987604437047673e-06, - "loss": 1.0742, - "step": 536 - }, - { - "epoch": 0.06457043227319184, - "grad_norm": 1.8585480846745626, - "learning_rate": 3.987517693683251e-06, - "loss": 1.0086, - "step": 537 - }, - { - "epoch": 0.06469067516383094, - "grad_norm": 2.355926924597142, - "learning_rate": 3.9874306488139745e-06, - "loss": 1.1815, - "step": 538 - }, - { - "epoch": 0.06481091805447003, - "grad_norm": 2.5721397513929527, - "learning_rate": 3.987343302453049e-06, - "loss": 1.1, - "step": 539 - }, - { - "epoch": 0.06493116094510912, - "grad_norm": 1.7939855148470623, - "learning_rate": 3.987255654613724e-06, - "loss": 1.0552, - "step": 540 - }, - { - "epoch": 0.06505140383574821, - "grad_norm": 2.2825787998397837, - "learning_rate": 3.987167705309296e-06, - "loss": 0.9284, - "step": 541 - }, - { - "epoch": 0.0651716467263873, - "grad_norm": 1.7867475299660676, - "learning_rate": 3.987079454553108e-06, - "loss": 1.176, - "step": 542 - }, - { - "epoch": 0.0652918896170264, - "grad_norm": 1.8782351875670584, - "learning_rate": 3.986990902358546e-06, - "loss": 1.146, - "step": 543 - }, - { - "epoch": 0.06541213250766549, - "grad_norm": 1.8599180822894186, - "learning_rate": 3.986902048739045e-06, - "loss": 1.1558, - "step": 544 - }, - { - "epoch": 0.06553237539830457, - "grad_norm": 2.3305089636979965, - "learning_rate": 3.986812893708082e-06, - "loss": 1.028, - "step": 545 - }, - { - "epoch": 0.06565261828894367, - "grad_norm": 1.9166269367357258, - "learning_rate": 3.9867234372791826e-06, - "loss": 1.0463, - "step": 546 - }, - { - "epoch": 0.06577286117958275, - "grad_norm": 1.5000298814976127, - "learning_rate": 3.986633679465918e-06, - "loss": 1.1027, - "step": 547 - }, - { - "epoch": 0.06589310407022185, - "grad_norm": 2.3859218634372197, - "learning_rate": 3.986543620281904e-06, - "loss": 1.0383, - "step": 548 - }, - { - "epoch": 0.06601334696086093, - "grad_norm": 1.681950592644386, - "learning_rate": 3.986453259740802e-06, - "loss": 1.1337, - "step": 549 - }, - { - "epoch": 0.06613358985150003, - "grad_norm": 2.505365812199047, - "learning_rate": 3.986362597856319e-06, - "loss": 1.0131, - "step": 550 - }, - { - "epoch": 0.06625383274213913, - "grad_norm": 2.738224654762412, - "learning_rate": 3.986271634642211e-06, - "loss": 1.0397, - "step": 551 - }, - { - "epoch": 0.06637407563277821, - "grad_norm": 1.9204883069727965, - "learning_rate": 3.986180370112274e-06, - "loss": 1.0455, - "step": 552 - }, - { - "epoch": 0.0664943185234173, - "grad_norm": 1.9493301757277255, - "learning_rate": 3.986088804280354e-06, - "loss": 0.9752, - "step": 553 - }, - { - "epoch": 0.06661456141405639, - "grad_norm": 3.087536052539698, - "learning_rate": 3.985996937160342e-06, - "loss": 1.1659, - "step": 554 - }, - { - "epoch": 0.06673480430469549, - "grad_norm": 1.957105441029151, - "learning_rate": 3.985904768766173e-06, - "loss": 0.927, - "step": 555 - }, - { - "epoch": 0.06685504719533458, - "grad_norm": 2.4017665758955657, - "learning_rate": 3.98581229911183e-06, - "loss": 0.9886, - "step": 556 - }, - { - "epoch": 0.06697529008597367, - "grad_norm": 2.07636260827939, - "learning_rate": 3.985719528211341e-06, - "loss": 1.1351, - "step": 557 - }, - { - "epoch": 0.06709553297661276, - "grad_norm": 0.9284640685048166, - "learning_rate": 3.985626456078777e-06, - "loss": 0.9063, - "step": 558 - }, - { - "epoch": 0.06721577586725185, - "grad_norm": 2.2033852497896227, - "learning_rate": 3.985533082728259e-06, - "loss": 1.0895, - "step": 559 - }, - { - "epoch": 0.06733601875789094, - "grad_norm": 2.555392573406997, - "learning_rate": 3.985439408173951e-06, - "loss": 0.9761, - "step": 560 - }, - { - "epoch": 0.06745626164853002, - "grad_norm": 1.892433688515039, - "learning_rate": 3.9853454324300634e-06, - "loss": 0.9413, - "step": 561 - }, - { - "epoch": 0.06757650453916912, - "grad_norm": 1.9396196891199138, - "learning_rate": 3.985251155510852e-06, - "loss": 1.0066, - "step": 562 - }, - { - "epoch": 0.06769674742980822, - "grad_norm": 3.101529753005258, - "learning_rate": 3.98515657743062e-06, - "loss": 1.0354, - "step": 563 - }, - { - "epoch": 0.0678169903204473, - "grad_norm": 2.2051601884571994, - "learning_rate": 3.985061698203711e-06, - "loss": 1.003, - "step": 564 - }, - { - "epoch": 0.0679372332110864, - "grad_norm": 0.8850510160106406, - "learning_rate": 3.984966517844523e-06, - "loss": 0.9021, - "step": 565 - }, - { - "epoch": 0.06805747610172548, - "grad_norm": 2.311205836895907, - "learning_rate": 3.984871036367492e-06, - "loss": 1.034, - "step": 566 - }, - { - "epoch": 0.06817771899236458, - "grad_norm": 1.9645110013502773, - "learning_rate": 3.984775253787102e-06, - "loss": 1.0606, - "step": 567 - }, - { - "epoch": 0.06829796188300366, - "grad_norm": 3.2033884823987613, - "learning_rate": 3.984679170117885e-06, - "loss": 1.113, - "step": 568 - }, - { - "epoch": 0.06841820477364276, - "grad_norm": 5.47106535807641, - "learning_rate": 3.984582785374415e-06, - "loss": 1.0109, - "step": 569 - }, - { - "epoch": 0.06853844766428185, - "grad_norm": 2.318932427892856, - "learning_rate": 3.9844860995713155e-06, - "loss": 1.0389, - "step": 570 - }, - { - "epoch": 0.06865869055492094, - "grad_norm": 3.742213876268932, - "learning_rate": 3.9843891127232524e-06, - "loss": 1.0446, - "step": 571 - }, - { - "epoch": 0.06877893344556003, - "grad_norm": 2.6529621195585325, - "learning_rate": 3.984291824844938e-06, - "loss": 0.8975, - "step": 572 - }, - { - "epoch": 0.06889917633619912, - "grad_norm": 37.167909427501264, - "learning_rate": 3.984194235951132e-06, - "loss": 1.074, - "step": 573 - }, - { - "epoch": 0.06901941922683821, - "grad_norm": 3.7641142667802883, - "learning_rate": 3.9840963460566375e-06, - "loss": 1.0769, - "step": 574 - }, - { - "epoch": 0.06913966211747731, - "grad_norm": 2.72648283833864, - "learning_rate": 3.983998155176305e-06, - "loss": 1.1179, - "step": 575 - }, - { - "epoch": 0.06925990500811639, - "grad_norm": 0.8312682830979717, - "learning_rate": 3.9838996633250305e-06, - "loss": 0.8253, - "step": 576 - }, - { - "epoch": 0.06938014789875549, - "grad_norm": 2.3637163250409072, - "learning_rate": 3.983800870517753e-06, - "loss": 1.1104, - "step": 577 - }, - { - "epoch": 0.06950039078939457, - "grad_norm": 3.197020878451284, - "learning_rate": 3.983701776769463e-06, - "loss": 1.0193, - "step": 578 - }, - { - "epoch": 0.06962063368003367, - "grad_norm": 2.131714702519315, - "learning_rate": 3.9836023820951885e-06, - "loss": 1.0761, - "step": 579 - }, - { - "epoch": 0.06974087657067275, - "grad_norm": 1.8475594041537151, - "learning_rate": 3.983502686510011e-06, - "loss": 0.915, - "step": 580 - }, - { - "epoch": 0.06986111946131185, - "grad_norm": 1.9697024751021621, - "learning_rate": 3.9834026900290525e-06, - "loss": 0.9539, - "step": 581 - }, - { - "epoch": 0.06998136235195095, - "grad_norm": 1.7251626117742056, - "learning_rate": 3.983302392667482e-06, - "loss": 1.2235, - "step": 582 - }, - { - "epoch": 0.07010160524259003, - "grad_norm": 1.64358326461527, - "learning_rate": 3.983201794440517e-06, - "loss": 1.1671, - "step": 583 - }, - { - "epoch": 0.07022184813322913, - "grad_norm": 1.648117323601863, - "learning_rate": 3.9831008953634165e-06, - "loss": 0.911, - "step": 584 - }, - { - "epoch": 0.07034209102386821, - "grad_norm": 1.7631234195976653, - "learning_rate": 3.9829996954514864e-06, - "loss": 1.0366, - "step": 585 - }, - { - "epoch": 0.0704623339145073, - "grad_norm": 1.7125156039898415, - "learning_rate": 3.982898194720079e-06, - "loss": 1.0692, - "step": 586 - }, - { - "epoch": 0.0705825768051464, - "grad_norm": 4.967093713480358, - "learning_rate": 3.982796393184592e-06, - "loss": 1.0413, - "step": 587 - }, - { - "epoch": 0.07070281969578548, - "grad_norm": 0.8069747343705279, - "learning_rate": 3.98269429086047e-06, - "loss": 0.8836, - "step": 588 - }, - { - "epoch": 0.07082306258642458, - "grad_norm": 2.4764891412020567, - "learning_rate": 3.982591887763199e-06, - "loss": 1.0934, - "step": 589 - }, - { - "epoch": 0.07094330547706366, - "grad_norm": 2.1167212391624197, - "learning_rate": 3.982489183908316e-06, - "loss": 1.0449, - "step": 590 - }, - { - "epoch": 0.07106354836770276, - "grad_norm": 1.7927898907844106, - "learning_rate": 3.982386179311399e-06, - "loss": 1.0711, - "step": 591 - }, - { - "epoch": 0.07118379125834184, - "grad_norm": 2.0476805288541207, - "learning_rate": 3.982282873988075e-06, - "loss": 1.1056, - "step": 592 - }, - { - "epoch": 0.07130403414898094, - "grad_norm": 1.6563958877514273, - "learning_rate": 3.982179267954016e-06, - "loss": 1.0989, - "step": 593 - }, - { - "epoch": 0.07142427703962004, - "grad_norm": 2.589536530697557, - "learning_rate": 3.982075361224937e-06, - "loss": 1.1962, - "step": 594 - }, - { - "epoch": 0.07154451993025912, - "grad_norm": 1.7180598173685684, - "learning_rate": 3.981971153816602e-06, - "loss": 1.1136, - "step": 595 - }, - { - "epoch": 0.07166476282089822, - "grad_norm": 1.5976669199244484, - "learning_rate": 3.981866645744819e-06, - "loss": 1.1806, - "step": 596 - }, - { - "epoch": 0.0717850057115373, - "grad_norm": 2.2427803345736717, - "learning_rate": 3.9817618370254416e-06, - "loss": 1.0385, - "step": 597 - }, - { - "epoch": 0.0719052486021764, - "grad_norm": 2.1011684573007967, - "learning_rate": 3.9816567276743684e-06, - "loss": 1.097, - "step": 598 - }, - { - "epoch": 0.0720254914928155, - "grad_norm": 2.1022852679354247, - "learning_rate": 3.9815513177075466e-06, - "loss": 0.9997, - "step": 599 - }, - { - "epoch": 0.07214573438345458, - "grad_norm": 1.6380744363315767, - "learning_rate": 3.9814456071409646e-06, - "loss": 0.9385, - "step": 600 - }, - { - "epoch": 0.07226597727409367, - "grad_norm": 2.474041061356664, - "learning_rate": 3.981339595990659e-06, - "loss": 1.0872, - "step": 601 - }, - { - "epoch": 0.07238622016473276, - "grad_norm": 2.41604907815234, - "learning_rate": 3.981233284272713e-06, - "loss": 1.0346, - "step": 602 - }, - { - "epoch": 0.07250646305537185, - "grad_norm": 1.5045068111449327, - "learning_rate": 3.981126672003253e-06, - "loss": 1.1229, - "step": 603 - }, - { - "epoch": 0.07262670594601094, - "grad_norm": 5.086605373894885, - "learning_rate": 3.981019759198451e-06, - "loss": 1.0197, - "step": 604 - }, - { - "epoch": 0.07274694883665003, - "grad_norm": 2.1822633460732894, - "learning_rate": 3.980912545874528e-06, - "loss": 1.0605, - "step": 605 - }, - { - "epoch": 0.07286719172728913, - "grad_norm": 3.070971236287707, - "learning_rate": 3.980805032047746e-06, - "loss": 1.0866, - "step": 606 - }, - { - "epoch": 0.07298743461792821, - "grad_norm": 1.8450054484961471, - "learning_rate": 3.980697217734415e-06, - "loss": 1.0348, - "step": 607 - }, - { - "epoch": 0.07310767750856731, - "grad_norm": 2.0282266271018656, - "learning_rate": 3.980589102950891e-06, - "loss": 1.1484, - "step": 608 - }, - { - "epoch": 0.07322792039920639, - "grad_norm": 3.0257074080640822, - "learning_rate": 3.9804806877135755e-06, - "loss": 0.9967, - "step": 609 - }, - { - "epoch": 0.07334816328984549, - "grad_norm": 2.134857762085605, - "learning_rate": 3.980371972038915e-06, - "loss": 1.0918, - "step": 610 - }, - { - "epoch": 0.07346840618048459, - "grad_norm": 2.317993546556733, - "learning_rate": 3.980262955943399e-06, - "loss": 1.0647, - "step": 611 - }, - { - "epoch": 0.07358864907112367, - "grad_norm": 2.644370408005718, - "learning_rate": 3.980153639443569e-06, - "loss": 1.1076, - "step": 612 - }, - { - "epoch": 0.07370889196176277, - "grad_norm": 2.0358733160480744, - "learning_rate": 3.980044022556005e-06, - "loss": 1.0316, - "step": 613 - }, - { - "epoch": 0.07382913485240185, - "grad_norm": 2.7097499565957204, - "learning_rate": 3.9799341052973375e-06, - "loss": 0.9492, - "step": 614 - }, - { - "epoch": 0.07394937774304094, - "grad_norm": 2.4690921401502037, - "learning_rate": 3.979823887684241e-06, - "loss": 0.9822, - "step": 615 - }, - { - "epoch": 0.07406962063368003, - "grad_norm": 2.2129091563378966, - "learning_rate": 3.979713369733434e-06, - "loss": 1.0818, - "step": 616 - }, - { - "epoch": 0.07418986352431912, - "grad_norm": 1.929962138659444, - "learning_rate": 3.979602551461683e-06, - "loss": 1.0719, - "step": 617 - }, - { - "epoch": 0.07431010641495822, - "grad_norm": 6.054456585818462, - "learning_rate": 3.979491432885799e-06, - "loss": 1.1501, - "step": 618 - }, - { - "epoch": 0.0744303493055973, - "grad_norm": 2.340831312552377, - "learning_rate": 3.97938001402264e-06, - "loss": 1.0571, - "step": 619 - }, - { - "epoch": 0.0745505921962364, - "grad_norm": 2.6878193510296655, - "learning_rate": 3.979268294889105e-06, - "loss": 1.0367, - "step": 620 - }, - { - "epoch": 0.07467083508687548, - "grad_norm": 2.639908371693514, - "learning_rate": 3.979156275502143e-06, - "loss": 0.9816, - "step": 621 - }, - { - "epoch": 0.07479107797751458, - "grad_norm": 3.558761240998695, - "learning_rate": 3.979043955878749e-06, - "loss": 1.1488, - "step": 622 - }, - { - "epoch": 0.07491132086815366, - "grad_norm": 1.9286420206239494, - "learning_rate": 3.978931336035959e-06, - "loss": 1.0599, - "step": 623 - }, - { - "epoch": 0.07503156375879276, - "grad_norm": 2.4427670035945095, - "learning_rate": 3.9788184159908595e-06, - "loss": 1.0538, - "step": 624 - }, - { - "epoch": 0.07515180664943186, - "grad_norm": 3.002777244606249, - "learning_rate": 3.97870519576058e-06, - "loss": 1.0525, - "step": 625 - }, - { - "epoch": 0.07527204954007094, - "grad_norm": 2.6464786093110626, - "learning_rate": 3.978591675362295e-06, - "loss": 1.032, - "step": 626 - }, - { - "epoch": 0.07539229243071004, - "grad_norm": 3.485905059020594, - "learning_rate": 3.978477854813226e-06, - "loss": 1.1058, - "step": 627 - }, - { - "epoch": 0.07551253532134912, - "grad_norm": 1.8988582317552856, - "learning_rate": 3.97836373413064e-06, - "loss": 1.0572, - "step": 628 - }, - { - "epoch": 0.07563277821198822, - "grad_norm": 1.9739016388037782, - "learning_rate": 3.978249313331848e-06, - "loss": 0.9768, - "step": 629 - }, - { - "epoch": 0.07575302110262731, - "grad_norm": 6.356585066374452, - "learning_rate": 3.978134592434208e-06, - "loss": 0.8541, - "step": 630 - }, - { - "epoch": 0.0758732639932664, - "grad_norm": 1.017226145489239, - "learning_rate": 3.978019571455123e-06, - "loss": 0.8786, - "step": 631 - }, - { - "epoch": 0.07599350688390549, - "grad_norm": 1.8973746758690737, - "learning_rate": 3.977904250412042e-06, - "loss": 1.0674, - "step": 632 - }, - { - "epoch": 0.07611374977454458, - "grad_norm": 1.8181622292807185, - "learning_rate": 3.97778862932246e-06, - "loss": 1.0871, - "step": 633 - }, - { - "epoch": 0.07623399266518367, - "grad_norm": 1.899899645714963, - "learning_rate": 3.9776727082039144e-06, - "loss": 1.1624, - "step": 634 - }, - { - "epoch": 0.07635423555582276, - "grad_norm": 0.8286295739158571, - "learning_rate": 3.977556487073991e-06, - "loss": 0.815, - "step": 635 - }, - { - "epoch": 0.07647447844646185, - "grad_norm": 1.6745474303605437, - "learning_rate": 3.97743996595032e-06, - "loss": 1.0355, - "step": 636 - }, - { - "epoch": 0.07659472133710095, - "grad_norm": 1.4652361941752154, - "learning_rate": 3.9773231448505804e-06, - "loss": 1.0438, - "step": 637 - }, - { - "epoch": 0.07671496422774003, - "grad_norm": 2.972289214364599, - "learning_rate": 3.977206023792491e-06, - "loss": 1.0001, - "step": 638 - }, - { - "epoch": 0.07683520711837913, - "grad_norm": 2.547255879573493, - "learning_rate": 3.97708860279382e-06, - "loss": 1.0441, - "step": 639 - }, - { - "epoch": 0.07695545000901821, - "grad_norm": 1.7209307620690883, - "learning_rate": 3.97697088187238e-06, - "loss": 1.0174, - "step": 640 - }, - { - "epoch": 0.07707569289965731, - "grad_norm": 2.6496599914944037, - "learning_rate": 3.976852861046029e-06, - "loss": 1.1386, - "step": 641 - }, - { - "epoch": 0.0771959357902964, - "grad_norm": 1.4665296288250298, - "learning_rate": 3.97673454033267e-06, - "loss": 1.0248, - "step": 642 - }, - { - "epoch": 0.07731617868093549, - "grad_norm": 2.1524298178189194, - "learning_rate": 3.976615919750254e-06, - "loss": 1.0438, - "step": 643 - }, - { - "epoch": 0.07743642157157458, - "grad_norm": 1.9149565807828566, - "learning_rate": 3.976496999316775e-06, - "loss": 1.0917, - "step": 644 - }, - { - "epoch": 0.07755666446221367, - "grad_norm": 1.821341620701934, - "learning_rate": 3.976377779050271e-06, - "loss": 1.0676, - "step": 645 - }, - { - "epoch": 0.07767690735285276, - "grad_norm": 3.1299006373809903, - "learning_rate": 3.976258258968831e-06, - "loss": 1.0622, - "step": 646 - }, - { - "epoch": 0.07779715024349185, - "grad_norm": 4.04566118454895, - "learning_rate": 3.976138439090583e-06, - "loss": 0.9652, - "step": 647 - }, - { - "epoch": 0.07791739313413094, - "grad_norm": 1.8596939326287083, - "learning_rate": 3.976018319433706e-06, - "loss": 1.075, - "step": 648 - }, - { - "epoch": 0.07803763602477004, - "grad_norm": 17.293683013400262, - "learning_rate": 3.9758979000164205e-06, - "loss": 1.1473, - "step": 649 - }, - { - "epoch": 0.07815787891540912, - "grad_norm": 1.9965930172493669, - "learning_rate": 3.975777180856995e-06, - "loss": 0.9474, - "step": 650 - }, - { - "epoch": 0.07827812180604822, - "grad_norm": 2.5967333079035066, - "learning_rate": 3.975656161973742e-06, - "loss": 1.0934, - "step": 651 - }, - { - "epoch": 0.0783983646966873, - "grad_norm": 2.5687135865538013, - "learning_rate": 3.9755348433850194e-06, - "loss": 1.1221, - "step": 652 - }, - { - "epoch": 0.0785186075873264, - "grad_norm": 1.068915464743219, - "learning_rate": 3.975413225109232e-06, - "loss": 0.9473, - "step": 653 - }, - { - "epoch": 0.0786388504779655, - "grad_norm": 3.186079269629951, - "learning_rate": 3.975291307164829e-06, - "loss": 1.1627, - "step": 654 - }, - { - "epoch": 0.07875909336860458, - "grad_norm": 1.7947952176009299, - "learning_rate": 3.975169089570306e-06, - "loss": 1.083, - "step": 655 - }, - { - "epoch": 0.07887933625924368, - "grad_norm": 3.1792153797608935, - "learning_rate": 3.975046572344202e-06, - "loss": 1.1476, - "step": 656 - }, - { - "epoch": 0.07899957914988276, - "grad_norm": 1.6006086532907973, - "learning_rate": 3.974923755505103e-06, - "loss": 0.9483, - "step": 657 - }, - { - "epoch": 0.07911982204052186, - "grad_norm": 1.5995487858402697, - "learning_rate": 3.974800639071641e-06, - "loss": 1.133, - "step": 658 - }, - { - "epoch": 0.07924006493116094, - "grad_norm": 2.008321853330522, - "learning_rate": 3.974677223062492e-06, - "loss": 1.2331, - "step": 659 - }, - { - "epoch": 0.07936030782180004, - "grad_norm": 1.8483712423901277, - "learning_rate": 3.974553507496378e-06, - "loss": 0.9752, - "step": 660 - }, - { - "epoch": 0.07948055071243913, - "grad_norm": 1.9025189516825398, - "learning_rate": 3.974429492392068e-06, - "loss": 1.1072, - "step": 661 - }, - { - "epoch": 0.07960079360307822, - "grad_norm": 1.7672334619509973, - "learning_rate": 3.974305177768373e-06, - "loss": 1.1289, - "step": 662 - }, - { - "epoch": 0.07972103649371731, - "grad_norm": 2.026239171408088, - "learning_rate": 3.974180563644152e-06, - "loss": 1.0852, - "step": 663 - }, - { - "epoch": 0.0798412793843564, - "grad_norm": 2.1654476869333443, - "learning_rate": 3.97405565003831e-06, - "loss": 1.1206, - "step": 664 - }, - { - "epoch": 0.07996152227499549, - "grad_norm": 3.4273938132099846, - "learning_rate": 3.973930436969794e-06, - "loss": 1.0214, - "step": 665 - }, - { - "epoch": 0.08008176516563459, - "grad_norm": 1.9129767627730525, - "learning_rate": 3.973804924457602e-06, - "loss": 1.085, - "step": 666 - }, - { - "epoch": 0.08020200805627367, - "grad_norm": 1.6508412124135092, - "learning_rate": 3.973679112520771e-06, - "loss": 1.0809, - "step": 667 - }, - { - "epoch": 0.08032225094691277, - "grad_norm": 1.771552945695173, - "learning_rate": 3.973553001178389e-06, - "loss": 1.2215, - "step": 668 - }, - { - "epoch": 0.08044249383755185, - "grad_norm": 1.8370613716435318, - "learning_rate": 3.973426590449585e-06, - "loss": 0.9829, - "step": 669 - }, - { - "epoch": 0.08056273672819095, - "grad_norm": 1.7635320901657654, - "learning_rate": 3.9732998803535364e-06, - "loss": 0.9899, - "step": 670 - }, - { - "epoch": 0.08068297961883003, - "grad_norm": 1.922312864960711, - "learning_rate": 3.973172870909465e-06, - "loss": 1.0863, - "step": 671 - }, - { - "epoch": 0.08080322250946913, - "grad_norm": 2.366682042712199, - "learning_rate": 3.973045562136638e-06, - "loss": 1.0391, - "step": 672 - }, - { - "epoch": 0.08092346540010822, - "grad_norm": 2.170757417821901, - "learning_rate": 3.972917954054368e-06, - "loss": 1.1387, - "step": 673 - }, - { - "epoch": 0.08104370829074731, - "grad_norm": 2.1332395915456246, - "learning_rate": 3.972790046682013e-06, - "loss": 1.0376, - "step": 674 - }, - { - "epoch": 0.0811639511813864, - "grad_norm": 1.7283087827556785, - "learning_rate": 3.972661840038977e-06, - "loss": 1.0209, - "step": 675 - }, - { - "epoch": 0.08128419407202549, - "grad_norm": 2.072552890263762, - "learning_rate": 3.972533334144707e-06, - "loss": 1.0629, - "step": 676 - }, - { - "epoch": 0.08140443696266458, - "grad_norm": 1.7997118904751537, - "learning_rate": 3.972404529018699e-06, - "loss": 1.0171, - "step": 677 - }, - { - "epoch": 0.08152467985330367, - "grad_norm": 1.655854015877113, - "learning_rate": 3.972275424680493e-06, - "loss": 1.086, - "step": 678 - }, - { - "epoch": 0.08164492274394276, - "grad_norm": 2.081302250386666, - "learning_rate": 3.972146021149673e-06, - "loss": 1.1416, - "step": 679 - }, - { - "epoch": 0.08176516563458186, - "grad_norm": 2.127164075354556, - "learning_rate": 3.972016318445868e-06, - "loss": 1.023, - "step": 680 - }, - { - "epoch": 0.08188540852522094, - "grad_norm": 1.7138130722849032, - "learning_rate": 3.971886316588757e-06, - "loss": 1.1416, - "step": 681 - }, - { - "epoch": 0.08200565141586004, - "grad_norm": 2.521728503806788, - "learning_rate": 3.9717560155980595e-06, - "loss": 0.9735, - "step": 682 - }, - { - "epoch": 0.08212589430649912, - "grad_norm": 1.9941402065414902, - "learning_rate": 3.971625415493542e-06, - "loss": 1.1529, - "step": 683 - }, - { - "epoch": 0.08224613719713822, - "grad_norm": 1.8093009867708525, - "learning_rate": 3.971494516295017e-06, - "loss": 1.0998, - "step": 684 - }, - { - "epoch": 0.08236638008777732, - "grad_norm": 2.0817785055976556, - "learning_rate": 3.971363318022341e-06, - "loss": 1.0787, - "step": 685 - }, - { - "epoch": 0.0824866229784164, - "grad_norm": 3.1337210462683487, - "learning_rate": 3.971231820695417e-06, - "loss": 0.9125, - "step": 686 - }, - { - "epoch": 0.0826068658690555, - "grad_norm": 1.6118856309449203, - "learning_rate": 3.971100024334193e-06, - "loss": 1.0427, - "step": 687 - }, - { - "epoch": 0.08272710875969458, - "grad_norm": 1.7321829475179216, - "learning_rate": 3.970967928958663e-06, - "loss": 1.0888, - "step": 688 - }, - { - "epoch": 0.08284735165033368, - "grad_norm": 1.5247748349417787, - "learning_rate": 3.970835534588865e-06, - "loss": 1.0609, - "step": 689 - }, - { - "epoch": 0.08296759454097276, - "grad_norm": 1.6946307802494969, - "learning_rate": 3.970702841244883e-06, - "loss": 1.0864, - "step": 690 - }, - { - "epoch": 0.08308783743161186, - "grad_norm": 1.863557231305711, - "learning_rate": 3.970569848946847e-06, - "loss": 1.0539, - "step": 691 - }, - { - "epoch": 0.08320808032225095, - "grad_norm": 2.400818478574636, - "learning_rate": 3.970436557714932e-06, - "loss": 1.0543, - "step": 692 - }, - { - "epoch": 0.08332832321289003, - "grad_norm": 1.8701094106806317, - "learning_rate": 3.970302967569358e-06, - "loss": 1.0861, - "step": 693 - }, - { - "epoch": 0.08344856610352913, - "grad_norm": 2.6175726308282514, - "learning_rate": 3.9701690785303896e-06, - "loss": 0.9154, - "step": 694 - }, - { - "epoch": 0.08356880899416821, - "grad_norm": 1.9437888432880923, - "learning_rate": 3.970034890618339e-06, - "loss": 1.1103, - "step": 695 - }, - { - "epoch": 0.08368905188480731, - "grad_norm": 1.7515969483172342, - "learning_rate": 3.969900403853562e-06, - "loss": 1.1066, - "step": 696 - }, - { - "epoch": 0.08380929477544641, - "grad_norm": 1.452810848281916, - "learning_rate": 3.96976561825646e-06, - "loss": 1.0116, - "step": 697 - }, - { - "epoch": 0.08392953766608549, - "grad_norm": 1.6929179213445122, - "learning_rate": 3.969630533847479e-06, - "loss": 1.1014, - "step": 698 - }, - { - "epoch": 0.08404978055672459, - "grad_norm": 2.3231064130210397, - "learning_rate": 3.969495150647113e-06, - "loss": 1.0796, - "step": 699 - }, - { - "epoch": 0.08417002344736367, - "grad_norm": 1.688109958755389, - "learning_rate": 3.969359468675899e-06, - "loss": 0.9949, - "step": 700 - }, - { - "epoch": 0.08429026633800277, - "grad_norm": 1.8592248423234734, - "learning_rate": 3.969223487954418e-06, - "loss": 1.1267, - "step": 701 - }, - { - "epoch": 0.08441050922864185, - "grad_norm": 1.8003252159481384, - "learning_rate": 3.969087208503301e-06, - "loss": 1.0545, - "step": 702 - }, - { - "epoch": 0.08453075211928095, - "grad_norm": 2.428324522562304, - "learning_rate": 3.968950630343219e-06, - "loss": 1.07, - "step": 703 - }, - { - "epoch": 0.08465099500992004, - "grad_norm": 1.7703396127616136, - "learning_rate": 3.968813753494892e-06, - "loss": 1.1569, - "step": 704 - }, - { - "epoch": 0.08477123790055913, - "grad_norm": 2.267550367239667, - "learning_rate": 3.968676577979084e-06, - "loss": 0.9812, - "step": 705 - }, - { - "epoch": 0.08489148079119822, - "grad_norm": 1.7839811033950186, - "learning_rate": 3.968539103816605e-06, - "loss": 1.0111, - "step": 706 - }, - { - "epoch": 0.0850117236818373, - "grad_norm": 1.8831894366675437, - "learning_rate": 3.9684013310283085e-06, - "loss": 1.123, - "step": 707 - }, - { - "epoch": 0.0851319665724764, - "grad_norm": 1.8342261596810787, - "learning_rate": 3.9682632596350956e-06, - "loss": 0.875, - "step": 708 - }, - { - "epoch": 0.0852522094631155, - "grad_norm": 1.7542597832811275, - "learning_rate": 3.968124889657911e-06, - "loss": 1.0062, - "step": 709 - }, - { - "epoch": 0.08537245235375458, - "grad_norm": 2.161612766371327, - "learning_rate": 3.967986221117746e-06, - "loss": 1.1372, - "step": 710 - }, - { - "epoch": 0.08549269524439368, - "grad_norm": 1.7301141448759751, - "learning_rate": 3.967847254035635e-06, - "loss": 1.0961, - "step": 711 - }, - { - "epoch": 0.08561293813503276, - "grad_norm": 2.113570949701909, - "learning_rate": 3.967707988432661e-06, - "loss": 1.0925, - "step": 712 - }, - { - "epoch": 0.08573318102567186, - "grad_norm": 1.9322448957871599, - "learning_rate": 3.967568424329949e-06, - "loss": 1.102, - "step": 713 - }, - { - "epoch": 0.08585342391631094, - "grad_norm": 0.8083468073937771, - "learning_rate": 3.967428561748671e-06, - "loss": 0.827, - "step": 714 - }, - { - "epoch": 0.08597366680695004, - "grad_norm": 1.8931490230956651, - "learning_rate": 3.967288400710045e-06, - "loss": 1.0991, - "step": 715 - }, - { - "epoch": 0.08609390969758914, - "grad_norm": 1.9379265223451871, - "learning_rate": 3.9671479412353335e-06, - "loss": 1.1083, - "step": 716 - }, - { - "epoch": 0.08621415258822822, - "grad_norm": 2.2803935703432514, - "learning_rate": 3.967007183345843e-06, - "loss": 0.9774, - "step": 717 - }, - { - "epoch": 0.08633439547886732, - "grad_norm": 2.060965689812657, - "learning_rate": 3.966866127062927e-06, - "loss": 1.1271, - "step": 718 - }, - { - "epoch": 0.0864546383695064, - "grad_norm": 0.9203018731894554, - "learning_rate": 3.966724772407982e-06, - "loss": 0.9138, - "step": 719 - }, - { - "epoch": 0.0865748812601455, - "grad_norm": 1.899100293914417, - "learning_rate": 3.966583119402454e-06, - "loss": 1.1084, - "step": 720 - }, - { - "epoch": 0.08669512415078459, - "grad_norm": 1.4793753499142257, - "learning_rate": 3.9664411680678305e-06, - "loss": 1.0498, - "step": 721 - }, - { - "epoch": 0.08681536704142367, - "grad_norm": 0.8930119434766909, - "learning_rate": 3.966298918425644e-06, - "loss": 0.8733, - "step": 722 - }, - { - "epoch": 0.08693560993206277, - "grad_norm": 2.09881412083256, - "learning_rate": 3.966156370497476e-06, - "loss": 1.0625, - "step": 723 - }, - { - "epoch": 0.08705585282270185, - "grad_norm": 2.1250195221845676, - "learning_rate": 3.96601352430495e-06, - "loss": 1.1111, - "step": 724 - }, - { - "epoch": 0.08717609571334095, - "grad_norm": 1.568125402116781, - "learning_rate": 3.965870379869735e-06, - "loss": 1.0579, - "step": 725 - }, - { - "epoch": 0.08729633860398003, - "grad_norm": 1.8467418512929121, - "learning_rate": 3.965726937213547e-06, - "loss": 1.0943, - "step": 726 - }, - { - "epoch": 0.08741658149461913, - "grad_norm": 2.57438823506694, - "learning_rate": 3.965583196358144e-06, - "loss": 1.0339, - "step": 727 - }, - { - "epoch": 0.08753682438525823, - "grad_norm": 1.9336063269003851, - "learning_rate": 3.965439157325335e-06, - "loss": 0.9838, - "step": 728 - }, - { - "epoch": 0.08765706727589731, - "grad_norm": 2.3833552508469027, - "learning_rate": 3.965294820136968e-06, - "loss": 0.9873, - "step": 729 - }, - { - "epoch": 0.08777731016653641, - "grad_norm": 1.9766389734001883, - "learning_rate": 3.965150184814938e-06, - "loss": 1.0928, - "step": 730 - }, - { - "epoch": 0.08789755305717549, - "grad_norm": 2.0248169192215792, - "learning_rate": 3.965005251381189e-06, - "loss": 0.981, - "step": 731 - }, - { - "epoch": 0.08801779594781459, - "grad_norm": 0.853778650185678, - "learning_rate": 3.964860019857705e-06, - "loss": 0.8991, - "step": 732 - }, - { - "epoch": 0.08813803883845367, - "grad_norm": 2.5231379753423604, - "learning_rate": 3.964714490266518e-06, - "loss": 1.0652, - "step": 733 - }, - { - "epoch": 0.08825828172909277, - "grad_norm": 0.8813129618993634, - "learning_rate": 3.964568662629706e-06, - "loss": 0.8917, - "step": 734 - }, - { - "epoch": 0.08837852461973186, - "grad_norm": 2.1547481448108776, - "learning_rate": 3.9644225369693895e-06, - "loss": 1.0668, - "step": 735 - }, - { - "epoch": 0.08849876751037095, - "grad_norm": 2.215629962867161, - "learning_rate": 3.964276113307735e-06, - "loss": 1.1022, - "step": 736 - }, - { - "epoch": 0.08861901040101004, - "grad_norm": 2.1084870906848048, - "learning_rate": 3.9641293916669574e-06, - "loss": 1.0298, - "step": 737 - }, - { - "epoch": 0.08873925329164913, - "grad_norm": 3.071359237572698, - "learning_rate": 3.9639823720693115e-06, - "loss": 1.0616, - "step": 738 - }, - { - "epoch": 0.08885949618228822, - "grad_norm": 0.8409337267441941, - "learning_rate": 3.963835054537102e-06, - "loss": 0.8839, - "step": 739 - }, - { - "epoch": 0.08897973907292732, - "grad_norm": 2.1555421192475785, - "learning_rate": 3.963687439092676e-06, - "loss": 0.8387, - "step": 740 - }, - { - "epoch": 0.0890999819635664, - "grad_norm": 1.7530886097223404, - "learning_rate": 3.963539525758427e-06, - "loss": 1.0302, - "step": 741 - }, - { - "epoch": 0.0892202248542055, - "grad_norm": 1.9881813608795298, - "learning_rate": 3.9633913145567925e-06, - "loss": 0.9104, - "step": 742 - }, - { - "epoch": 0.08934046774484458, - "grad_norm": 1.9862759713646985, - "learning_rate": 3.9632428055102575e-06, - "loss": 1.0427, - "step": 743 - }, - { - "epoch": 0.08946071063548368, - "grad_norm": 2.077929494739276, - "learning_rate": 3.9630939986413495e-06, - "loss": 0.9079, - "step": 744 - }, - { - "epoch": 0.08958095352612276, - "grad_norm": 1.503287606752724, - "learning_rate": 3.962944893972643e-06, - "loss": 1.0094, - "step": 745 - }, - { - "epoch": 0.08970119641676186, - "grad_norm": 2.669923393064903, - "learning_rate": 3.962795491526756e-06, - "loss": 1.156, - "step": 746 - }, - { - "epoch": 0.08982143930740095, - "grad_norm": 2.2824749727746796, - "learning_rate": 3.962645791326354e-06, - "loss": 1.128, - "step": 747 - }, - { - "epoch": 0.08994168219804004, - "grad_norm": 1.7879846649408375, - "learning_rate": 3.962495793394146e-06, - "loss": 1.0585, - "step": 748 - }, - { - "epoch": 0.09006192508867913, - "grad_norm": 0.6929451578246467, - "learning_rate": 3.9623454977528864e-06, - "loss": 0.8438, - "step": 749 - }, - { - "epoch": 0.09018216797931822, - "grad_norm": 1.712839262832772, - "learning_rate": 3.962194904425375e-06, - "loss": 1.0788, - "step": 750 - }, - { - "epoch": 0.09030241086995731, - "grad_norm": 1.7988021467654545, - "learning_rate": 3.9620440134344566e-06, - "loss": 0.9057, - "step": 751 - }, - { - "epoch": 0.09042265376059641, - "grad_norm": 3.0319263450916063, - "learning_rate": 3.9618928248030215e-06, - "loss": 1.0462, - "step": 752 - }, - { - "epoch": 0.0905428966512355, - "grad_norm": 2.033683375678172, - "learning_rate": 3.961741338554005e-06, - "loss": 1.0648, - "step": 753 - }, - { - "epoch": 0.09066313954187459, - "grad_norm": 1.8925620630486188, - "learning_rate": 3.9615895547103865e-06, - "loss": 0.9818, - "step": 754 - }, - { - "epoch": 0.09078338243251367, - "grad_norm": 1.9661818208411452, - "learning_rate": 3.961437473295193e-06, - "loss": 1.0051, - "step": 755 - }, - { - "epoch": 0.09090362532315277, - "grad_norm": 2.50469177588409, - "learning_rate": 3.961285094331495e-06, - "loss": 0.9468, - "step": 756 - }, - { - "epoch": 0.09102386821379185, - "grad_norm": 1.6209550545431162, - "learning_rate": 3.961132417842406e-06, - "loss": 1.0816, - "step": 757 - }, - { - "epoch": 0.09114411110443095, - "grad_norm": 2.3905279601105, - "learning_rate": 3.960979443851089e-06, - "loss": 0.9809, - "step": 758 - }, - { - "epoch": 0.09126435399507005, - "grad_norm": 1.6832273575814978, - "learning_rate": 3.96082617238075e-06, - "loss": 1.0241, - "step": 759 - }, - { - "epoch": 0.09138459688570913, - "grad_norm": 7.994266362696103, - "learning_rate": 3.960672603454639e-06, - "loss": 1.026, - "step": 760 - }, - { - "epoch": 0.09150483977634823, - "grad_norm": 2.7191428415897443, - "learning_rate": 3.960518737096054e-06, - "loss": 1.0042, - "step": 761 - }, - { - "epoch": 0.09162508266698731, - "grad_norm": 2.142850512539733, - "learning_rate": 3.960364573328334e-06, - "loss": 0.9699, - "step": 762 - }, - { - "epoch": 0.0917453255576264, - "grad_norm": 1.7051539103826556, - "learning_rate": 3.9602101121748675e-06, - "loss": 1.1139, - "step": 763 - }, - { - "epoch": 0.0918655684482655, - "grad_norm": 2.11826108619709, - "learning_rate": 3.960055353659085e-06, - "loss": 0.9535, - "step": 764 - }, - { - "epoch": 0.09198581133890459, - "grad_norm": 2.191154914355353, - "learning_rate": 3.959900297804465e-06, - "loss": 1.071, - "step": 765 - }, - { - "epoch": 0.09210605422954368, - "grad_norm": 1.9312464173910249, - "learning_rate": 3.9597449446345276e-06, - "loss": 0.9933, - "step": 766 - }, - { - "epoch": 0.09222629712018277, - "grad_norm": 2.2186787553551124, - "learning_rate": 3.95958929417284e-06, - "loss": 1.0607, - "step": 767 - }, - { - "epoch": 0.09234654001082186, - "grad_norm": 0.749039830074452, - "learning_rate": 3.9594333464430145e-06, - "loss": 0.8432, - "step": 768 - }, - { - "epoch": 0.09246678290146094, - "grad_norm": 1.9722142092522592, - "learning_rate": 3.959277101468709e-06, - "loss": 1.109, - "step": 769 - }, - { - "epoch": 0.09258702579210004, - "grad_norm": 2.131626232420889, - "learning_rate": 3.959120559273624e-06, - "loss": 1.0168, - "step": 770 - }, - { - "epoch": 0.09270726868273914, - "grad_norm": 4.750462760468566, - "learning_rate": 3.958963719881509e-06, - "loss": 1.0741, - "step": 771 - }, - { - "epoch": 0.09282751157337822, - "grad_norm": 1.7522531037816782, - "learning_rate": 3.958806583316154e-06, - "loss": 1.167, - "step": 772 - }, - { - "epoch": 0.09294775446401732, - "grad_norm": 1.6537217330535192, - "learning_rate": 3.9586491496013985e-06, - "loss": 1.0216, - "step": 773 - }, - { - "epoch": 0.0930679973546564, - "grad_norm": 1.841881920963639, - "learning_rate": 3.958491418761124e-06, - "loss": 1.0506, - "step": 774 - }, - { - "epoch": 0.0931882402452955, - "grad_norm": 1.944709347285427, - "learning_rate": 3.958333390819258e-06, - "loss": 0.9633, - "step": 775 - }, - { - "epoch": 0.0933084831359346, - "grad_norm": 2.450001471382789, - "learning_rate": 3.9581750657997754e-06, - "loss": 1.0262, - "step": 776 - }, - { - "epoch": 0.09342872602657368, - "grad_norm": 1.640038376335899, - "learning_rate": 3.95801644372669e-06, - "loss": 1.1277, - "step": 777 - }, - { - "epoch": 0.09354896891721277, - "grad_norm": 14.682882938701159, - "learning_rate": 3.957857524624068e-06, - "loss": 1.0694, - "step": 778 - }, - { - "epoch": 0.09366921180785186, - "grad_norm": 2.2447225349089424, - "learning_rate": 3.957698308516016e-06, - "loss": 1.1281, - "step": 779 - }, - { - "epoch": 0.09378945469849095, - "grad_norm": 1.7842678138086314, - "learning_rate": 3.957538795426688e-06, - "loss": 1.0543, - "step": 780 - }, - { - "epoch": 0.09390969758913004, - "grad_norm": 2.0854196655780317, - "learning_rate": 3.9573789853802804e-06, - "loss": 0.9974, - "step": 781 - }, - { - "epoch": 0.09402994047976913, - "grad_norm": 1.7913943091093119, - "learning_rate": 3.957218878401037e-06, - "loss": 0.9817, - "step": 782 - }, - { - "epoch": 0.09415018337040823, - "grad_norm": 1.6784920630200602, - "learning_rate": 3.957058474513246e-06, - "loss": 1.1273, - "step": 783 - }, - { - "epoch": 0.09427042626104731, - "grad_norm": 2.500655946509091, - "learning_rate": 3.956897773741241e-06, - "loss": 1.0165, - "step": 784 - }, - { - "epoch": 0.09439066915168641, - "grad_norm": 1.508479786010083, - "learning_rate": 3.956736776109398e-06, - "loss": 0.9472, - "step": 785 - }, - { - "epoch": 0.09451091204232549, - "grad_norm": 2.0559539952901456, - "learning_rate": 3.956575481642143e-06, - "loss": 1.0637, - "step": 786 - }, - { - "epoch": 0.09463115493296459, - "grad_norm": 2.133083004967013, - "learning_rate": 3.956413890363943e-06, - "loss": 0.9746, - "step": 787 - }, - { - "epoch": 0.09475139782360369, - "grad_norm": 1.8058512184772468, - "learning_rate": 3.956252002299312e-06, - "loss": 1.0518, - "step": 788 - }, - { - "epoch": 0.09487164071424277, - "grad_norm": 1.8354917593874664, - "learning_rate": 3.956089817472807e-06, - "loss": 1.136, - "step": 789 - }, - { - "epoch": 0.09499188360488187, - "grad_norm": 1.849860781508967, - "learning_rate": 3.955927335909032e-06, - "loss": 1.0894, - "step": 790 - }, - { - "epoch": 0.09511212649552095, - "grad_norm": 2.0055176203278684, - "learning_rate": 3.955764557632634e-06, - "loss": 0.9811, - "step": 791 - }, - { - "epoch": 0.09523236938616005, - "grad_norm": 2.051647998570974, - "learning_rate": 3.955601482668309e-06, - "loss": 1.1747, - "step": 792 - }, - { - "epoch": 0.09535261227679913, - "grad_norm": 1.6328062942605979, - "learning_rate": 3.955438111040794e-06, - "loss": 1.1106, - "step": 793 - }, - { - "epoch": 0.09547285516743823, - "grad_norm": 2.6857197210001806, - "learning_rate": 3.955274442774873e-06, - "loss": 1.0364, - "step": 794 - }, - { - "epoch": 0.09559309805807732, - "grad_norm": 2.116584720176716, - "learning_rate": 3.9551104778953725e-06, - "loss": 0.9425, - "step": 795 - }, - { - "epoch": 0.0957133409487164, - "grad_norm": 2.0304164643434013, - "learning_rate": 3.954946216427167e-06, - "loss": 1.091, - "step": 796 - }, - { - "epoch": 0.0958335838393555, - "grad_norm": 0.829462913964909, - "learning_rate": 3.954781658395176e-06, - "loss": 0.8793, - "step": 797 - }, - { - "epoch": 0.09595382672999458, - "grad_norm": 1.9616056724903488, - "learning_rate": 3.95461680382436e-06, - "loss": 1.1484, - "step": 798 - }, - { - "epoch": 0.09607406962063368, - "grad_norm": 2.245973374831425, - "learning_rate": 3.9544516527397295e-06, - "loss": 1.0864, - "step": 799 - }, - { - "epoch": 0.09619431251127276, - "grad_norm": 1.7034831939038442, - "learning_rate": 3.954286205166338e-06, - "loss": 1.0323, - "step": 800 - }, - { - "epoch": 0.09631455540191186, - "grad_norm": 1.9271207101307366, - "learning_rate": 3.954120461129282e-06, - "loss": 1.0648, - "step": 801 - }, - { - "epoch": 0.09643479829255096, - "grad_norm": 1.8526189824039974, - "learning_rate": 3.953954420653706e-06, - "loss": 1.07, - "step": 802 - }, - { - "epoch": 0.09655504118319004, - "grad_norm": 1.7050633031106928, - "learning_rate": 3.953788083764798e-06, - "loss": 1.1085, - "step": 803 - }, - { - "epoch": 0.09667528407382914, - "grad_norm": 2.1296501889672315, - "learning_rate": 3.953621450487792e-06, - "loss": 1.148, - "step": 804 - }, - { - "epoch": 0.09679552696446822, - "grad_norm": 0.8354947709240097, - "learning_rate": 3.953454520847964e-06, - "loss": 0.8819, - "step": 805 - }, - { - "epoch": 0.09691576985510732, - "grad_norm": 1.9802682989580362, - "learning_rate": 3.9532872948706395e-06, - "loss": 0.9678, - "step": 806 - }, - { - "epoch": 0.09703601274574641, - "grad_norm": 2.7694111570676663, - "learning_rate": 3.9531197725811845e-06, - "loss": 1.0632, - "step": 807 - }, - { - "epoch": 0.0971562556363855, - "grad_norm": 1.6811055429787598, - "learning_rate": 3.952951954005013e-06, - "loss": 1.0983, - "step": 808 - }, - { - "epoch": 0.0972764985270246, - "grad_norm": 1.5975508153770233, - "learning_rate": 3.952783839167584e-06, - "loss": 1.0777, - "step": 809 - }, - { - "epoch": 0.09739674141766368, - "grad_norm": 2.452555888061212, - "learning_rate": 3.952615428094398e-06, - "loss": 0.9743, - "step": 810 - }, - { - "epoch": 0.09751698430830277, - "grad_norm": 1.719333202945329, - "learning_rate": 3.952446720811004e-06, - "loss": 0.9671, - "step": 811 - }, - { - "epoch": 0.09763722719894186, - "grad_norm": 0.8232096420135834, - "learning_rate": 3.952277717342995e-06, - "loss": 0.904, - "step": 812 - }, - { - "epoch": 0.09775747008958095, - "grad_norm": 6.1796770059566395, - "learning_rate": 3.952108417716009e-06, - "loss": 1.0901, - "step": 813 - }, - { - "epoch": 0.09787771298022005, - "grad_norm": 1.5222075363403658, - "learning_rate": 3.951938821955727e-06, - "loss": 1.0811, - "step": 814 - }, - { - "epoch": 0.09799795587085913, - "grad_norm": 1.427649207125461, - "learning_rate": 3.9517689300878786e-06, - "loss": 0.9897, - "step": 815 - }, - { - "epoch": 0.09811819876149823, - "grad_norm": 1.640082423999912, - "learning_rate": 3.951598742138236e-06, - "loss": 1.0121, - "step": 816 - }, - { - "epoch": 0.09823844165213731, - "grad_norm": 1.7565406097313132, - "learning_rate": 3.951428258132615e-06, - "loss": 1.0195, - "step": 817 - }, - { - "epoch": 0.09835868454277641, - "grad_norm": 1.7594934860633584, - "learning_rate": 3.951257478096879e-06, - "loss": 1.0701, - "step": 818 - }, - { - "epoch": 0.0984789274334155, - "grad_norm": 2.4055170701697235, - "learning_rate": 3.951086402056936e-06, - "loss": 0.9137, - "step": 819 - }, - { - "epoch": 0.09859917032405459, - "grad_norm": 1.496701030138469, - "learning_rate": 3.950915030038735e-06, - "loss": 1.0653, - "step": 820 - }, - { - "epoch": 0.09871941321469369, - "grad_norm": 2.4645305259208703, - "learning_rate": 3.9507433620682765e-06, - "loss": 1.0642, - "step": 821 - }, - { - "epoch": 0.09883965610533277, - "grad_norm": 1.3879205788005906, - "learning_rate": 3.9505713981716e-06, - "loss": 1.1069, - "step": 822 - }, - { - "epoch": 0.09895989899597187, - "grad_norm": 1.6640276838072157, - "learning_rate": 3.950399138374795e-06, - "loss": 1.038, - "step": 823 - }, - { - "epoch": 0.09908014188661095, - "grad_norm": 1.402472189969229, - "learning_rate": 3.95022658270399e-06, - "loss": 0.9713, - "step": 824 - }, - { - "epoch": 0.09920038477725004, - "grad_norm": 4.758226999852877, - "learning_rate": 3.9500537311853635e-06, - "loss": 1.0055, - "step": 825 - }, - { - "epoch": 0.09932062766788914, - "grad_norm": 2.160995899660809, - "learning_rate": 3.949880583845136e-06, - "loss": 1.0589, - "step": 826 - }, - { - "epoch": 0.09944087055852822, - "grad_norm": 1.7321420677981032, - "learning_rate": 3.949707140709575e-06, - "loss": 1.0431, - "step": 827 - }, - { - "epoch": 0.09956111344916732, - "grad_norm": 2.0566001663503233, - "learning_rate": 3.949533401804991e-06, - "loss": 1.0695, - "step": 828 - }, - { - "epoch": 0.0996813563398064, - "grad_norm": 1.768213855375612, - "learning_rate": 3.949359367157739e-06, - "loss": 1.1369, - "step": 829 - }, - { - "epoch": 0.0998015992304455, - "grad_norm": 1.9370388589721717, - "learning_rate": 3.949185036794222e-06, - "loss": 0.9882, - "step": 830 - }, - { - "epoch": 0.0999218421210846, - "grad_norm": 1.5163810653206737, - "learning_rate": 3.949010410740884e-06, - "loss": 1.0061, - "step": 831 - }, - { - "epoch": 0.10004208501172368, - "grad_norm": 1.611404694558566, - "learning_rate": 3.948835489024216e-06, - "loss": 1.0958, - "step": 832 - }, - { - "epoch": 0.10016232790236278, - "grad_norm": 1.8720403837021438, - "learning_rate": 3.948660271670755e-06, - "loss": 1.1274, - "step": 833 - }, - { - "epoch": 0.10028257079300186, - "grad_norm": 2.0544749102830195, - "learning_rate": 3.948484758707079e-06, - "loss": 1.0722, - "step": 834 - }, - { - "epoch": 0.10040281368364096, - "grad_norm": 1.9773831680450358, - "learning_rate": 3.948308950159815e-06, - "loss": 1.0638, - "step": 835 - }, - { - "epoch": 0.10052305657428004, - "grad_norm": 2.3224210566807564, - "learning_rate": 3.9481328460556326e-06, - "loss": 0.9899, - "step": 836 - }, - { - "epoch": 0.10064329946491914, - "grad_norm": 2.282631024948432, - "learning_rate": 3.9479564464212455e-06, - "loss": 1.1215, - "step": 837 - }, - { - "epoch": 0.10076354235555823, - "grad_norm": 2.637513262819327, - "learning_rate": 3.947779751283414e-06, - "loss": 0.9964, - "step": 838 - }, - { - "epoch": 0.10088378524619732, - "grad_norm": 2.201372897320361, - "learning_rate": 3.947602760668944e-06, - "loss": 0.9866, - "step": 839 - }, - { - "epoch": 0.10100402813683641, - "grad_norm": 1.5468507244632828, - "learning_rate": 3.947425474604684e-06, - "loss": 0.9412, - "step": 840 - }, - { - "epoch": 0.1011242710274755, - "grad_norm": 1.7993940525563383, - "learning_rate": 3.947247893117528e-06, - "loss": 1.1545, - "step": 841 - }, - { - "epoch": 0.10124451391811459, - "grad_norm": 4.080958993067755, - "learning_rate": 3.947070016234413e-06, - "loss": 0.9235, - "step": 842 - }, - { - "epoch": 0.10136475680875369, - "grad_norm": 2.248932055175066, - "learning_rate": 3.946891843982326e-06, - "loss": 0.9701, - "step": 843 - }, - { - "epoch": 0.10148499969939277, - "grad_norm": 1.9356908658048178, - "learning_rate": 3.9467133763882935e-06, - "loss": 0.972, - "step": 844 - }, - { - "epoch": 0.10160524259003187, - "grad_norm": 1.839741827536988, - "learning_rate": 3.9465346134793905e-06, - "loss": 1.1005, - "step": 845 - }, - { - "epoch": 0.10172548548067095, - "grad_norm": 1.8237190950950564, - "learning_rate": 3.9463555552827335e-06, - "loss": 1.0286, - "step": 846 - }, - { - "epoch": 0.10184572837131005, - "grad_norm": 2.3638586426635406, - "learning_rate": 3.946176201825487e-06, - "loss": 1.0976, - "step": 847 - }, - { - "epoch": 0.10196597126194913, - "grad_norm": 1.6838693769314486, - "learning_rate": 3.9459965531348575e-06, - "loss": 1.0657, - "step": 848 - }, - { - "epoch": 0.10208621415258823, - "grad_norm": 1.9745522631376615, - "learning_rate": 3.945816609238098e-06, - "loss": 1.0877, - "step": 849 - }, - { - "epoch": 0.10220645704322733, - "grad_norm": 1.703753014363836, - "learning_rate": 3.945636370162507e-06, - "loss": 1.0792, - "step": 850 - }, - { - "epoch": 0.10232669993386641, - "grad_norm": 1.5511592529213705, - "learning_rate": 3.945455835935425e-06, - "loss": 1.0227, - "step": 851 - }, - { - "epoch": 0.1024469428245055, - "grad_norm": 2.154886229094092, - "learning_rate": 3.94527500658424e-06, - "loss": 0.9721, - "step": 852 - }, - { - "epoch": 0.10256718571514459, - "grad_norm": 1.7003440901393536, - "learning_rate": 3.945093882136382e-06, - "loss": 1.0437, - "step": 853 - }, - { - "epoch": 0.10268742860578368, - "grad_norm": 1.681645161869606, - "learning_rate": 3.944912462619329e-06, - "loss": 1.0689, - "step": 854 - }, - { - "epoch": 0.10280767149642277, - "grad_norm": 1.7952318058852965, - "learning_rate": 3.9447307480606025e-06, - "loss": 1.0346, - "step": 855 - }, - { - "epoch": 0.10292791438706186, - "grad_norm": 2.065712478793684, - "learning_rate": 3.944548738487767e-06, - "loss": 1.1335, - "step": 856 - }, - { - "epoch": 0.10304815727770096, - "grad_norm": 2.099265960287425, - "learning_rate": 3.944366433928434e-06, - "loss": 1.1341, - "step": 857 - }, - { - "epoch": 0.10316840016834004, - "grad_norm": 1.4738411828743374, - "learning_rate": 3.9441838344102594e-06, - "loss": 1.0598, - "step": 858 - }, - { - "epoch": 0.10328864305897914, - "grad_norm": 2.4001828282972384, - "learning_rate": 3.944000939960943e-06, - "loss": 0.9066, - "step": 859 - }, - { - "epoch": 0.10340888594961822, - "grad_norm": 1.401820688271303, - "learning_rate": 3.943817750608229e-06, - "loss": 1.0343, - "step": 860 - }, - { - "epoch": 0.10352912884025732, - "grad_norm": 2.298863179758745, - "learning_rate": 3.943634266379908e-06, - "loss": 1.0481, - "step": 861 - }, - { - "epoch": 0.10364937173089642, - "grad_norm": 2.3161923597629617, - "learning_rate": 3.943450487303815e-06, - "loss": 1.0795, - "step": 862 - }, - { - "epoch": 0.1037696146215355, - "grad_norm": 1.8361553955637073, - "learning_rate": 3.943266413407827e-06, - "loss": 1.079, - "step": 863 - }, - { - "epoch": 0.1038898575121746, - "grad_norm": 1.7260612347018722, - "learning_rate": 3.94308204471987e-06, - "loss": 1.071, - "step": 864 - }, - { - "epoch": 0.10401010040281368, - "grad_norm": 2.464894047872307, - "learning_rate": 3.942897381267912e-06, - "loss": 0.977, - "step": 865 - }, - { - "epoch": 0.10413034329345278, - "grad_norm": 2.7716744850340507, - "learning_rate": 3.942712423079965e-06, - "loss": 0.9016, - "step": 866 - }, - { - "epoch": 0.10425058618409186, - "grad_norm": 1.9270270536985294, - "learning_rate": 3.942527170184088e-06, - "loss": 1.1327, - "step": 867 - }, - { - "epoch": 0.10437082907473096, - "grad_norm": 2.5765584174147556, - "learning_rate": 3.942341622608385e-06, - "loss": 1.0146, - "step": 868 - }, - { - "epoch": 0.10449107196537005, - "grad_norm": 1.4832064395733886, - "learning_rate": 3.942155780381001e-06, - "loss": 1.002, - "step": 869 - }, - { - "epoch": 0.10461131485600914, - "grad_norm": 1.908210371739694, - "learning_rate": 3.94196964353013e-06, - "loss": 0.9947, - "step": 870 - }, - { - "epoch": 0.10473155774664823, - "grad_norm": 1.966621034976437, - "learning_rate": 3.941783212084008e-06, - "loss": 1.0294, - "step": 871 - }, - { - "epoch": 0.10485180063728732, - "grad_norm": 2.4107224156479767, - "learning_rate": 3.941596486070916e-06, - "loss": 1.0016, - "step": 872 - }, - { - "epoch": 0.10497204352792641, - "grad_norm": 2.1932949148405867, - "learning_rate": 3.941409465519182e-06, - "loss": 0.8112, - "step": 873 - }, - { - "epoch": 0.10509228641856551, - "grad_norm": 1.4949526904986739, - "learning_rate": 3.941222150457176e-06, - "loss": 1.081, - "step": 874 - }, - { - "epoch": 0.10521252930920459, - "grad_norm": 2.3132675546680677, - "learning_rate": 3.941034540913311e-06, - "loss": 0.9562, - "step": 875 - }, - { - "epoch": 0.10533277219984369, - "grad_norm": 2.3726787265872322, - "learning_rate": 3.940846636916051e-06, - "loss": 1.0559, - "step": 876 - }, - { - "epoch": 0.10545301509048277, - "grad_norm": 1.7532499653005604, - "learning_rate": 3.940658438493899e-06, - "loss": 1.0923, - "step": 877 - }, - { - "epoch": 0.10557325798112187, - "grad_norm": 1.9171716467117186, - "learning_rate": 3.940469945675405e-06, - "loss": 0.9847, - "step": 878 - }, - { - "epoch": 0.10569350087176095, - "grad_norm": 1.702194729742892, - "learning_rate": 3.940281158489163e-06, - "loss": 1.1381, - "step": 879 - }, - { - "epoch": 0.10581374376240005, - "grad_norm": 1.4783067845509157, - "learning_rate": 3.940092076963812e-06, - "loss": 1.0518, - "step": 880 - }, - { - "epoch": 0.10593398665303914, - "grad_norm": 2.1425410696074962, - "learning_rate": 3.9399027011280355e-06, - "loss": 1.0174, - "step": 881 - }, - { - "epoch": 0.10605422954367823, - "grad_norm": 2.1259207133368196, - "learning_rate": 3.939713031010561e-06, - "loss": 0.9976, - "step": 882 - }, - { - "epoch": 0.10617447243431732, - "grad_norm": 1.8373833340308485, - "learning_rate": 3.939523066640163e-06, - "loss": 1.0075, - "step": 883 - }, - { - "epoch": 0.10629471532495641, - "grad_norm": 1.6518942219210828, - "learning_rate": 3.939332808045657e-06, - "loss": 1.0378, - "step": 884 - }, - { - "epoch": 0.1064149582155955, - "grad_norm": 1.4886540624293074, - "learning_rate": 3.939142255255906e-06, - "loss": 1.0698, - "step": 885 - }, - { - "epoch": 0.1065352011062346, - "grad_norm": 2.0877073401997643, - "learning_rate": 3.938951408299817e-06, - "loss": 1.1101, - "step": 886 - }, - { - "epoch": 0.10665544399687368, - "grad_norm": 0.8026291158677851, - "learning_rate": 3.938760267206342e-06, - "loss": 0.8096, - "step": 887 - }, - { - "epoch": 0.10677568688751278, - "grad_norm": 6.435770932177052, - "learning_rate": 3.938568832004475e-06, - "loss": 1.0254, - "step": 888 - }, - { - "epoch": 0.10689592977815186, - "grad_norm": 1.965396264963511, - "learning_rate": 3.938377102723257e-06, - "loss": 0.9848, - "step": 889 - }, - { - "epoch": 0.10701617266879096, - "grad_norm": 1.7997697153072687, - "learning_rate": 3.938185079391774e-06, - "loss": 1.0722, - "step": 890 - }, - { - "epoch": 0.10713641555943004, - "grad_norm": 2.592868977023895, - "learning_rate": 3.937992762039157e-06, - "loss": 1.2879, - "step": 891 - }, - { - "epoch": 0.10725665845006914, - "grad_norm": 1.5258556248223893, - "learning_rate": 3.937800150694577e-06, - "loss": 1.029, - "step": 892 - }, - { - "epoch": 0.10737690134070824, - "grad_norm": 2.0317898546423874, - "learning_rate": 3.937607245387255e-06, - "loss": 0.98, - "step": 893 - }, - { - "epoch": 0.10749714423134732, - "grad_norm": 1.7924972279845102, - "learning_rate": 3.937414046146455e-06, - "loss": 0.9558, - "step": 894 - }, - { - "epoch": 0.10761738712198642, - "grad_norm": 1.7429616260254737, - "learning_rate": 3.9372205530014845e-06, - "loss": 0.9862, - "step": 895 - }, - { - "epoch": 0.1077376300126255, - "grad_norm": 1.8315621692583626, - "learning_rate": 3.937026765981696e-06, - "loss": 0.9544, - "step": 896 - }, - { - "epoch": 0.1078578729032646, - "grad_norm": 1.59547749561875, - "learning_rate": 3.936832685116488e-06, - "loss": 1.0178, - "step": 897 - }, - { - "epoch": 0.10797811579390369, - "grad_norm": 2.271192563075473, - "learning_rate": 3.936638310435301e-06, - "loss": 1.1204, - "step": 898 - }, - { - "epoch": 0.10809835868454278, - "grad_norm": 2.004756516586465, - "learning_rate": 3.936443641967623e-06, - "loss": 1.0523, - "step": 899 - }, - { - "epoch": 0.10821860157518187, - "grad_norm": 1.6674369303658976, - "learning_rate": 3.936248679742983e-06, - "loss": 1.0572, - "step": 900 - }, - { - "epoch": 0.10833884446582095, - "grad_norm": 1.0215133750402132, - "learning_rate": 3.936053423790959e-06, - "loss": 0.9841, - "step": 901 - }, - { - "epoch": 0.10845908735646005, - "grad_norm": 1.7187370993390139, - "learning_rate": 3.935857874141168e-06, - "loss": 0.9975, - "step": 902 - }, - { - "epoch": 0.10857933024709913, - "grad_norm": 2.998157889333068, - "learning_rate": 3.935662030823279e-06, - "loss": 1.0623, - "step": 903 - }, - { - "epoch": 0.10869957313773823, - "grad_norm": 2.622041124575674, - "learning_rate": 3.935465893866998e-06, - "loss": 0.9449, - "step": 904 - }, - { - "epoch": 0.10881981602837733, - "grad_norm": 1.7344077167346357, - "learning_rate": 3.935269463302079e-06, - "loss": 1.0325, - "step": 905 - }, - { - "epoch": 0.10894005891901641, - "grad_norm": 2.443054033772559, - "learning_rate": 3.935072739158322e-06, - "loss": 1.0005, - "step": 906 - }, - { - "epoch": 0.10906030180965551, - "grad_norm": 1.4581734524233716, - "learning_rate": 3.934875721465569e-06, - "loss": 1.0253, - "step": 907 - }, - { - "epoch": 0.10918054470029459, - "grad_norm": 2.47044526816484, - "learning_rate": 3.9346784102537076e-06, - "loss": 0.9418, - "step": 908 - }, - { - "epoch": 0.10930078759093369, - "grad_norm": 1.6260438647388733, - "learning_rate": 3.934480805552669e-06, - "loss": 1.0095, - "step": 909 - }, - { - "epoch": 0.10942103048157277, - "grad_norm": 2.0781726114655767, - "learning_rate": 3.93428290739243e-06, - "loss": 1.1076, - "step": 910 - }, - { - "epoch": 0.10954127337221187, - "grad_norm": 1.9664897356888131, - "learning_rate": 3.9340847158030125e-06, - "loss": 1.0153, - "step": 911 - }, - { - "epoch": 0.10966151626285096, - "grad_norm": 1.519261745686815, - "learning_rate": 3.9338862308144814e-06, - "loss": 0.9768, - "step": 912 - }, - { - "epoch": 0.10978175915349005, - "grad_norm": 1.4932557920493366, - "learning_rate": 3.933687452456946e-06, - "loss": 1.0781, - "step": 913 - }, - { - "epoch": 0.10990200204412914, - "grad_norm": 1.931805826341417, - "learning_rate": 3.933488380760562e-06, - "loss": 1.0909, - "step": 914 - }, - { - "epoch": 0.11002224493476823, - "grad_norm": 1.7364901455270902, - "learning_rate": 3.9332890157555286e-06, - "loss": 1.1016, - "step": 915 - }, - { - "epoch": 0.11014248782540732, - "grad_norm": 1.9701737599302318, - "learning_rate": 3.933089357472088e-06, - "loss": 0.9941, - "step": 916 - }, - { - "epoch": 0.11026273071604642, - "grad_norm": 1.684468973401135, - "learning_rate": 3.932889405940529e-06, - "loss": 1.0917, - "step": 917 - }, - { - "epoch": 0.1103829736066855, - "grad_norm": 2.078231464613076, - "learning_rate": 3.932689161191184e-06, - "loss": 1.0287, - "step": 918 - }, - { - "epoch": 0.1105032164973246, - "grad_norm": 2.3614543316278147, - "learning_rate": 3.93248862325443e-06, - "loss": 1.1043, - "step": 919 - }, - { - "epoch": 0.11062345938796368, - "grad_norm": 0.9817882033750028, - "learning_rate": 3.932287792160688e-06, - "loss": 0.8871, - "step": 920 - }, - { - "epoch": 0.11074370227860278, - "grad_norm": 7.5448700510289815, - "learning_rate": 3.932086667940424e-06, - "loss": 1.0265, - "step": 921 - }, - { - "epoch": 0.11086394516924186, - "grad_norm": 1.6791708928032159, - "learning_rate": 3.93188525062415e-06, - "loss": 1.0374, - "step": 922 - }, - { - "epoch": 0.11098418805988096, - "grad_norm": 1.6091775865676523, - "learning_rate": 3.931683540242418e-06, - "loss": 1.0901, - "step": 923 - }, - { - "epoch": 0.11110443095052006, - "grad_norm": 2.5272603559222406, - "learning_rate": 3.9314815368258295e-06, - "loss": 1.141, - "step": 924 - }, - { - "epoch": 0.11122467384115914, - "grad_norm": 1.5018521635849011, - "learning_rate": 3.9312792404050275e-06, - "loss": 1.016, - "step": 925 - }, - { - "epoch": 0.11134491673179824, - "grad_norm": 1.6685780611648235, - "learning_rate": 3.9310766510107e-06, - "loss": 1.0075, - "step": 926 - }, - { - "epoch": 0.11146515962243732, - "grad_norm": 4.418446859836411, - "learning_rate": 3.9308737686735806e-06, - "loss": 1.1516, - "step": 927 - }, - { - "epoch": 0.11158540251307641, - "grad_norm": 1.8772196029780788, - "learning_rate": 3.9306705934244455e-06, - "loss": 1.0525, - "step": 928 - }, - { - "epoch": 0.11170564540371551, - "grad_norm": 1.538754969820477, - "learning_rate": 3.930467125294116e-06, - "loss": 1.1106, - "step": 929 - }, - { - "epoch": 0.1118258882943546, - "grad_norm": 1.0651399688797025, - "learning_rate": 3.930263364313458e-06, - "loss": 0.8682, - "step": 930 - }, - { - "epoch": 0.11194613118499369, - "grad_norm": 2.0569341292527965, - "learning_rate": 3.930059310513384e-06, - "loss": 1.0689, - "step": 931 - }, - { - "epoch": 0.11206637407563277, - "grad_norm": 1.6259571337756278, - "learning_rate": 3.929854963924846e-06, - "loss": 1.0591, - "step": 932 - }, - { - "epoch": 0.11218661696627187, - "grad_norm": 1.7712301894152194, - "learning_rate": 3.929650324578845e-06, - "loss": 1.0026, - "step": 933 - }, - { - "epoch": 0.11230685985691095, - "grad_norm": 2.5132636129218735, - "learning_rate": 3.929445392506423e-06, - "loss": 1.0591, - "step": 934 - }, - { - "epoch": 0.11242710274755005, - "grad_norm": 1.9250407004078975, - "learning_rate": 3.92924016773867e-06, - "loss": 0.9921, - "step": 935 - }, - { - "epoch": 0.11254734563818915, - "grad_norm": 2.419516713459319, - "learning_rate": 3.9290346503067175e-06, - "loss": 0.9738, - "step": 936 - }, - { - "epoch": 0.11266758852882823, - "grad_norm": 1.5774207479455804, - "learning_rate": 3.9288288402417415e-06, - "loss": 1.0236, - "step": 937 - }, - { - "epoch": 0.11278783141946733, - "grad_norm": 2.0545468590471496, - "learning_rate": 3.928622737574964e-06, - "loss": 0.9342, - "step": 938 - }, - { - "epoch": 0.11290807431010641, - "grad_norm": 1.7822175995065717, - "learning_rate": 3.928416342337652e-06, - "loss": 1.1358, - "step": 939 - }, - { - "epoch": 0.1130283172007455, - "grad_norm": 1.8328023560642466, - "learning_rate": 3.928209654561113e-06, - "loss": 1.0606, - "step": 940 - }, - { - "epoch": 0.1131485600913846, - "grad_norm": 2.0841407609946607, - "learning_rate": 3.928002674276703e-06, - "loss": 1.0304, - "step": 941 - }, - { - "epoch": 0.11326880298202369, - "grad_norm": 2.0701472378394317, - "learning_rate": 3.92779540151582e-06, - "loss": 0.9897, - "step": 942 - }, - { - "epoch": 0.11338904587266278, - "grad_norm": 1.9404712781974898, - "learning_rate": 3.927587836309907e-06, - "loss": 1.089, - "step": 943 - }, - { - "epoch": 0.11350928876330187, - "grad_norm": 1.9072615644189612, - "learning_rate": 3.927379978690452e-06, - "loss": 1.0105, - "step": 944 - }, - { - "epoch": 0.11362953165394096, - "grad_norm": 2.2975967207547856, - "learning_rate": 3.927171828688987e-06, - "loss": 1.0968, - "step": 945 - }, - { - "epoch": 0.11374977454458005, - "grad_norm": 1.8657535170054649, - "learning_rate": 3.926963386337088e-06, - "loss": 1.0478, - "step": 946 - }, - { - "epoch": 0.11387001743521914, - "grad_norm": 2.350016646630597, - "learning_rate": 3.926754651666375e-06, - "loss": 0.9297, - "step": 947 - }, - { - "epoch": 0.11399026032585824, - "grad_norm": 3.4805005347173803, - "learning_rate": 3.926545624708513e-06, - "loss": 1.0159, - "step": 948 - }, - { - "epoch": 0.11411050321649732, - "grad_norm": 2.416367994684602, - "learning_rate": 3.926336305495213e-06, - "loss": 1.0905, - "step": 949 - }, - { - "epoch": 0.11423074610713642, - "grad_norm": 1.9996753667578677, - "learning_rate": 3.926126694058226e-06, - "loss": 1.1217, - "step": 950 - }, - { - "epoch": 0.1143509889977755, - "grad_norm": 1.3365622950034868, - "learning_rate": 3.92591679042935e-06, - "loss": 1.0489, - "step": 951 - }, - { - "epoch": 0.1144712318884146, - "grad_norm": 1.5766992454845883, - "learning_rate": 3.92570659464043e-06, - "loss": 1.0524, - "step": 952 - }, - { - "epoch": 0.1145914747790537, - "grad_norm": 1.6895110544846128, - "learning_rate": 3.925496106723349e-06, - "loss": 1.0242, - "step": 953 - }, - { - "epoch": 0.11471171766969278, - "grad_norm": 1.8545595531471708, - "learning_rate": 3.9252853267100405e-06, - "loss": 1.069, - "step": 954 - }, - { - "epoch": 0.11483196056033187, - "grad_norm": 4.145680231433538, - "learning_rate": 3.9250742546324786e-06, - "loss": 1.0636, - "step": 955 - }, - { - "epoch": 0.11495220345097096, - "grad_norm": 1.5556390811808556, - "learning_rate": 3.924862890522683e-06, - "loss": 1.0886, - "step": 956 - }, - { - "epoch": 0.11507244634161005, - "grad_norm": 1.9824316766806642, - "learning_rate": 3.9246512344127174e-06, - "loss": 1.0895, - "step": 957 - }, - { - "epoch": 0.11519268923224914, - "grad_norm": 1.9936534798436942, - "learning_rate": 3.9244392863346895e-06, - "loss": 1.0452, - "step": 958 - }, - { - "epoch": 0.11531293212288823, - "grad_norm": 2.4051315879114967, - "learning_rate": 3.9242270463207524e-06, - "loss": 1.1474, - "step": 959 - }, - { - "epoch": 0.11543317501352733, - "grad_norm": 2.9734482380627507, - "learning_rate": 3.924014514403102e-06, - "loss": 1.0724, - "step": 960 - }, - { - "epoch": 0.11555341790416641, - "grad_norm": 2.101230864610746, - "learning_rate": 3.92380169061398e-06, - "loss": 1.149, - "step": 961 - }, - { - "epoch": 0.11567366079480551, - "grad_norm": 1.8451889371403454, - "learning_rate": 3.9235885749856705e-06, - "loss": 1.0628, - "step": 962 - }, - { - "epoch": 0.1157939036854446, - "grad_norm": 1.5349000518019427, - "learning_rate": 3.9233751675505035e-06, - "loss": 1.056, - "step": 963 - }, - { - "epoch": 0.11591414657608369, - "grad_norm": 3.112435373148552, - "learning_rate": 3.923161468340853e-06, - "loss": 1.0643, - "step": 964 - }, - { - "epoch": 0.11603438946672277, - "grad_norm": 1.611320578817557, - "learning_rate": 3.9229474773891374e-06, - "loss": 1.0388, - "step": 965 - }, - { - "epoch": 0.11615463235736187, - "grad_norm": 2.311493835130739, - "learning_rate": 3.922733194727818e-06, - "loss": 1.0713, - "step": 966 - }, - { - "epoch": 0.11627487524800097, - "grad_norm": 1.9713947152553677, - "learning_rate": 3.922518620389402e-06, - "loss": 1.0912, - "step": 967 - }, - { - "epoch": 0.11639511813864005, - "grad_norm": 1.537762552809517, - "learning_rate": 3.922303754406439e-06, - "loss": 1.1282, - "step": 968 - }, - { - "epoch": 0.11651536102927915, - "grad_norm": 1.9186382174159942, - "learning_rate": 3.922088596811526e-06, - "loss": 1.012, - "step": 969 - }, - { - "epoch": 0.11663560391991823, - "grad_norm": 1.933157515204573, - "learning_rate": 3.9218731476373e-06, - "loss": 1.0878, - "step": 970 - }, - { - "epoch": 0.11675584681055733, - "grad_norm": 2.204833322051203, - "learning_rate": 3.9216574069164455e-06, - "loss": 1.0798, - "step": 971 - }, - { - "epoch": 0.11687608970119642, - "grad_norm": 1.6599039043050352, - "learning_rate": 3.921441374681691e-06, - "loss": 1.0313, - "step": 972 - }, - { - "epoch": 0.1169963325918355, - "grad_norm": 1.5246794504436743, - "learning_rate": 3.921225050965808e-06, - "loss": 0.8764, - "step": 973 - }, - { - "epoch": 0.1171165754824746, - "grad_norm": 2.1417077660482455, - "learning_rate": 3.921008435801612e-06, - "loss": 0.9721, - "step": 974 - }, - { - "epoch": 0.11723681837311369, - "grad_norm": 4.019791041394999, - "learning_rate": 3.920791529221963e-06, - "loss": 0.9805, - "step": 975 - }, - { - "epoch": 0.11735706126375278, - "grad_norm": 1.7699537972707837, - "learning_rate": 3.920574331259768e-06, - "loss": 0.9908, - "step": 976 - }, - { - "epoch": 0.11747730415439187, - "grad_norm": 2.242767790558292, - "learning_rate": 3.9203568419479716e-06, - "loss": 1.0308, - "step": 977 - }, - { - "epoch": 0.11759754704503096, - "grad_norm": 1.7250243364911744, - "learning_rate": 3.92013906131957e-06, - "loss": 0.981, - "step": 978 - }, - { - "epoch": 0.11771778993567006, - "grad_norm": 1.422565250743846, - "learning_rate": 3.9199209894076e-06, - "loss": 1.0522, - "step": 979 - }, - { - "epoch": 0.11783803282630914, - "grad_norm": 1.7444532910842814, - "learning_rate": 3.919702626245142e-06, - "loss": 1.1316, - "step": 980 - }, - { - "epoch": 0.11795827571694824, - "grad_norm": 2.1244473580194567, - "learning_rate": 3.919483971865322e-06, - "loss": 0.8814, - "step": 981 - }, - { - "epoch": 0.11807851860758732, - "grad_norm": 1.690551259800104, - "learning_rate": 3.91926502630131e-06, - "loss": 1.0987, - "step": 982 - }, - { - "epoch": 0.11819876149822642, - "grad_norm": 1.6866187867743039, - "learning_rate": 3.91904578958632e-06, - "loss": 0.9533, - "step": 983 - }, - { - "epoch": 0.11831900438886551, - "grad_norm": 1.8162946769328456, - "learning_rate": 3.918826261753608e-06, - "loss": 1.076, - "step": 984 - }, - { - "epoch": 0.1184392472795046, - "grad_norm": 5.034973662079279, - "learning_rate": 3.918606442836478e-06, - "loss": 0.9379, - "step": 985 - }, - { - "epoch": 0.1185594901701437, - "grad_norm": 1.6194387053396535, - "learning_rate": 3.918386332868277e-06, - "loss": 1.0039, - "step": 986 - }, - { - "epoch": 0.11867973306078278, - "grad_norm": 1.5887686728784023, - "learning_rate": 3.918165931882394e-06, - "loss": 1.1716, - "step": 987 - }, - { - "epoch": 0.11879997595142187, - "grad_norm": 2.1071012321960634, - "learning_rate": 3.917945239912264e-06, - "loss": 0.9906, - "step": 988 - }, - { - "epoch": 0.11892021884206096, - "grad_norm": 1.7991094876571245, - "learning_rate": 3.917724256991367e-06, - "loss": 0.9934, - "step": 989 - }, - { - "epoch": 0.11904046173270005, - "grad_norm": 3.144107538902975, - "learning_rate": 3.9175029831532245e-06, - "loss": 1.0414, - "step": 990 - }, - { - "epoch": 0.11916070462333915, - "grad_norm": 3.882147933609714, - "learning_rate": 3.917281418431404e-06, - "loss": 1.1042, - "step": 991 - }, - { - "epoch": 0.11928094751397823, - "grad_norm": 2.120243583576833, - "learning_rate": 3.917059562859516e-06, - "loss": 1.0071, - "step": 992 - }, - { - "epoch": 0.11940119040461733, - "grad_norm": 1.8586437165710177, - "learning_rate": 3.916837416471218e-06, - "loss": 1.1154, - "step": 993 - }, - { - "epoch": 0.11952143329525641, - "grad_norm": 2.699126709888351, - "learning_rate": 3.916614979300207e-06, - "loss": 0.9654, - "step": 994 - }, - { - "epoch": 0.11964167618589551, - "grad_norm": 1.5398532752947713, - "learning_rate": 3.9163922513802274e-06, - "loss": 1.0162, - "step": 995 - }, - { - "epoch": 0.1197619190765346, - "grad_norm": 2.349426719053634, - "learning_rate": 3.916169232745067e-06, - "loss": 1.0532, - "step": 996 - }, - { - "epoch": 0.11988216196717369, - "grad_norm": 3.5498722536984517, - "learning_rate": 3.915945923428559e-06, - "loss": 1.1436, - "step": 997 - }, - { - "epoch": 0.12000240485781279, - "grad_norm": 1.9672781877164678, - "learning_rate": 3.915722323464577e-06, - "loss": 1.0658, - "step": 998 - }, - { - "epoch": 0.12012264774845187, - "grad_norm": 2.4474496490629765, - "learning_rate": 3.91549843288704e-06, - "loss": 0.9357, - "step": 999 - }, - { - "epoch": 0.12024289063909097, - "grad_norm": 1.8403194646692203, - "learning_rate": 3.915274251729916e-06, - "loss": 1.0241, - "step": 1000 - }, - { - "epoch": 0.12036313352973005, - "grad_norm": 1.7894286068458873, - "learning_rate": 3.91504978002721e-06, - "loss": 1.1322, - "step": 1001 - }, - { - "epoch": 0.12048337642036915, - "grad_norm": 2.0411151478427008, - "learning_rate": 3.914825017812974e-06, - "loss": 0.9988, - "step": 1002 - }, - { - "epoch": 0.12060361931100824, - "grad_norm": 1.896631834227643, - "learning_rate": 3.9145999651213065e-06, - "loss": 0.9584, - "step": 1003 - }, - { - "epoch": 0.12072386220164733, - "grad_norm": 2.084594522837954, - "learning_rate": 3.9143746219863465e-06, - "loss": 1.114, - "step": 1004 - }, - { - "epoch": 0.12084410509228642, - "grad_norm": 1.0061114837841925, - "learning_rate": 3.914148988442278e-06, - "loss": 0.9497, - "step": 1005 - }, - { - "epoch": 0.1209643479829255, - "grad_norm": 2.2775070629156913, - "learning_rate": 3.91392306452333e-06, - "loss": 1.1864, - "step": 1006 - }, - { - "epoch": 0.1210845908735646, - "grad_norm": 2.638769818569916, - "learning_rate": 3.913696850263774e-06, - "loss": 0.9012, - "step": 1007 - }, - { - "epoch": 0.1212048337642037, - "grad_norm": 1.7872515845911383, - "learning_rate": 3.913470345697929e-06, - "loss": 1.0182, - "step": 1008 - }, - { - "epoch": 0.12132507665484278, - "grad_norm": 2.0426163962173485, - "learning_rate": 3.913243550860153e-06, - "loss": 1.0848, - "step": 1009 - }, - { - "epoch": 0.12144531954548188, - "grad_norm": 1.8262889804113487, - "learning_rate": 3.913016465784852e-06, - "loss": 0.9908, - "step": 1010 - }, - { - "epoch": 0.12156556243612096, - "grad_norm": 2.5111967170193186, - "learning_rate": 3.912789090506474e-06, - "loss": 0.9524, - "step": 1011 - }, - { - "epoch": 0.12168580532676006, - "grad_norm": 1.9367958758263402, - "learning_rate": 3.9125614250595114e-06, - "loss": 0.9536, - "step": 1012 - }, - { - "epoch": 0.12180604821739914, - "grad_norm": 2.4633500157033557, - "learning_rate": 3.912333469478502e-06, - "loss": 1.1205, - "step": 1013 - }, - { - "epoch": 0.12192629110803824, - "grad_norm": 1.7831092634285504, - "learning_rate": 3.912105223798025e-06, - "loss": 1.0116, - "step": 1014 - }, - { - "epoch": 0.12204653399867733, - "grad_norm": 1.0281131314691214, - "learning_rate": 3.9118766880527065e-06, - "loss": 0.9516, - "step": 1015 - }, - { - "epoch": 0.12216677688931642, - "grad_norm": 1.5781082303506435, - "learning_rate": 3.9116478622772145e-06, - "loss": 0.9586, - "step": 1016 - }, - { - "epoch": 0.12228701977995551, - "grad_norm": 1.660689468022559, - "learning_rate": 3.911418746506261e-06, - "loss": 1.1032, - "step": 1017 - }, - { - "epoch": 0.1224072626705946, - "grad_norm": 2.410174946271943, - "learning_rate": 3.911189340774604e-06, - "loss": 1.0108, - "step": 1018 - }, - { - "epoch": 0.1225275055612337, - "grad_norm": 1.5542971633791987, - "learning_rate": 3.910959645117043e-06, - "loss": 1.0297, - "step": 1019 - }, - { - "epoch": 0.12264774845187278, - "grad_norm": 0.7833674156268601, - "learning_rate": 3.910729659568423e-06, - "loss": 0.8262, - "step": 1020 - }, - { - "epoch": 0.12276799134251187, - "grad_norm": 1.6389305447099645, - "learning_rate": 3.9104993841636344e-06, - "loss": 1.0503, - "step": 1021 - }, - { - "epoch": 0.12288823423315097, - "grad_norm": 1.6904415348371669, - "learning_rate": 3.910268818937608e-06, - "loss": 1.0348, - "step": 1022 - }, - { - "epoch": 0.12300847712379005, - "grad_norm": 2.9266805858179685, - "learning_rate": 3.9100379639253196e-06, - "loss": 1.1038, - "step": 1023 - }, - { - "epoch": 0.12312872001442915, - "grad_norm": 2.5346707436082143, - "learning_rate": 3.909806819161791e-06, - "loss": 1.0843, - "step": 1024 - }, - { - "epoch": 0.12324896290506823, - "grad_norm": 1.729174679044368, - "learning_rate": 3.909575384682086e-06, - "loss": 1.0989, - "step": 1025 - }, - { - "epoch": 0.12336920579570733, - "grad_norm": 1.6683174144100994, - "learning_rate": 3.9093436605213144e-06, - "loss": 0.9233, - "step": 1026 - }, - { - "epoch": 0.12348944868634643, - "grad_norm": 1.6696036052242702, - "learning_rate": 3.909111646714627e-06, - "loss": 1.0205, - "step": 1027 - }, - { - "epoch": 0.12360969157698551, - "grad_norm": 1.82250798321769, - "learning_rate": 3.9088793432972206e-06, - "loss": 0.95, - "step": 1028 - }, - { - "epoch": 0.1237299344676246, - "grad_norm": 1.8803797156790778, - "learning_rate": 3.908646750304336e-06, - "loss": 1.0499, - "step": 1029 - }, - { - "epoch": 0.12385017735826369, - "grad_norm": 1.4212146944544144, - "learning_rate": 3.908413867771257e-06, - "loss": 1.0922, - "step": 1030 - }, - { - "epoch": 0.12397042024890279, - "grad_norm": 2.547368101349786, - "learning_rate": 3.908180695733311e-06, - "loss": 1.0443, - "step": 1031 - }, - { - "epoch": 0.12409066313954187, - "grad_norm": 1.6814860805691938, - "learning_rate": 3.907947234225871e-06, - "loss": 1.056, - "step": 1032 - }, - { - "epoch": 0.12421090603018096, - "grad_norm": 1.971603263594406, - "learning_rate": 3.907713483284352e-06, - "loss": 1.105, - "step": 1033 - }, - { - "epoch": 0.12433114892082006, - "grad_norm": 2.1098766719337565, - "learning_rate": 3.907479442944216e-06, - "loss": 1.2134, - "step": 1034 - }, - { - "epoch": 0.12445139181145914, - "grad_norm": 1.885591284602517, - "learning_rate": 3.907245113240963e-06, - "loss": 1.1453, - "step": 1035 - }, - { - "epoch": 0.12457163470209824, - "grad_norm": 1.6686937244084448, - "learning_rate": 3.907010494210144e-06, - "loss": 0.9768, - "step": 1036 - }, - { - "epoch": 0.12469187759273732, - "grad_norm": 1.9365531084227832, - "learning_rate": 3.9067755858873495e-06, - "loss": 1.1554, - "step": 1037 - }, - { - "epoch": 0.12481212048337642, - "grad_norm": 0.8456099813499273, - "learning_rate": 3.906540388308214e-06, - "loss": 0.8936, - "step": 1038 - }, - { - "epoch": 0.12493236337401552, - "grad_norm": 1.8446017738571165, - "learning_rate": 3.906304901508417e-06, - "loss": 1.0446, - "step": 1039 - }, - { - "epoch": 0.12505260626465461, - "grad_norm": 1.7678276433864222, - "learning_rate": 3.9060691255236835e-06, - "loss": 0.9799, - "step": 1040 - }, - { - "epoch": 0.1251728491552937, - "grad_norm": 1.4379264364884314, - "learning_rate": 3.905833060389778e-06, - "loss": 1.0528, - "step": 1041 - }, - { - "epoch": 0.12529309204593278, - "grad_norm": 1.9877236414387431, - "learning_rate": 3.905596706142513e-06, - "loss": 1.0112, - "step": 1042 - }, - { - "epoch": 0.12541333493657186, - "grad_norm": 2.042821466742238, - "learning_rate": 3.9053600628177435e-06, - "loss": 1.0855, - "step": 1043 - }, - { - "epoch": 0.12553357782721097, - "grad_norm": 1.7999109246148408, - "learning_rate": 3.905123130451367e-06, - "loss": 1.0766, - "step": 1044 - }, - { - "epoch": 0.12565382071785006, - "grad_norm": 1.631086249189454, - "learning_rate": 3.904885909079326e-06, - "loss": 1.0243, - "step": 1045 - }, - { - "epoch": 0.12577406360848914, - "grad_norm": 2.077515017825862, - "learning_rate": 3.904648398737607e-06, - "loss": 0.9951, - "step": 1046 - }, - { - "epoch": 0.12589430649912825, - "grad_norm": 1.7003579015407049, - "learning_rate": 3.9044105994622406e-06, - "loss": 1.0077, - "step": 1047 - }, - { - "epoch": 0.12601454938976733, - "grad_norm": 1.6622852989897452, - "learning_rate": 3.9041725112893005e-06, - "loss": 1.0464, - "step": 1048 - }, - { - "epoch": 0.12613479228040642, - "grad_norm": 1.7298804116122057, - "learning_rate": 3.903934134254904e-06, - "loss": 0.982, - "step": 1049 - }, - { - "epoch": 0.1262550351710455, - "grad_norm": 3.204767317872944, - "learning_rate": 3.903695468395213e-06, - "loss": 1.0777, - "step": 1050 - }, - { - "epoch": 0.1263752780616846, - "grad_norm": 2.0792079771087724, - "learning_rate": 3.903456513746434e-06, - "loss": 0.7942, - "step": 1051 - }, - { - "epoch": 0.1264955209523237, - "grad_norm": 1.7309672927014246, - "learning_rate": 3.903217270344815e-06, - "loss": 1.1079, - "step": 1052 - }, - { - "epoch": 0.12661576384296278, - "grad_norm": 1.4374412856343024, - "learning_rate": 3.902977738226648e-06, - "loss": 1.0553, - "step": 1053 - }, - { - "epoch": 0.12673600673360189, - "grad_norm": 1.7089079223970627, - "learning_rate": 3.902737917428273e-06, - "loss": 1.1384, - "step": 1054 - }, - { - "epoch": 0.12685624962424097, - "grad_norm": 1.5931635506935322, - "learning_rate": 3.902497807986068e-06, - "loss": 1.0691, - "step": 1055 - }, - { - "epoch": 0.12697649251488005, - "grad_norm": 1.5564114008170011, - "learning_rate": 3.902257409936458e-06, - "loss": 1.0669, - "step": 1056 - }, - { - "epoch": 0.12709673540551916, - "grad_norm": 2.081926468436579, - "learning_rate": 3.902016723315912e-06, - "loss": 1.0763, - "step": 1057 - }, - { - "epoch": 0.12721697829615825, - "grad_norm": 2.022136490906092, - "learning_rate": 3.901775748160941e-06, - "loss": 0.9197, - "step": 1058 - }, - { - "epoch": 0.12733722118679733, - "grad_norm": 0.8291404903695351, - "learning_rate": 3.901534484508101e-06, - "loss": 0.8692, - "step": 1059 - }, - { - "epoch": 0.1274574640774364, - "grad_norm": 1.7166333429873666, - "learning_rate": 3.901292932393991e-06, - "loss": 0.9728, - "step": 1060 - }, - { - "epoch": 0.12757770696807552, - "grad_norm": 5.382545497788811, - "learning_rate": 3.9010510918552555e-06, - "loss": 1.0818, - "step": 1061 - }, - { - "epoch": 0.1276979498587146, - "grad_norm": 2.0663010222498515, - "learning_rate": 3.900808962928581e-06, - "loss": 0.9849, - "step": 1062 - }, - { - "epoch": 0.1278181927493537, - "grad_norm": 1.901680143254521, - "learning_rate": 3.900566545650698e-06, - "loss": 1.1187, - "step": 1063 - }, - { - "epoch": 0.1279384356399928, - "grad_norm": 2.3194079964135037, - "learning_rate": 3.900323840058381e-06, - "loss": 1.0471, - "step": 1064 - }, - { - "epoch": 0.12805867853063188, - "grad_norm": 1.6617048154461003, - "learning_rate": 3.900080846188449e-06, - "loss": 1.0451, - "step": 1065 - }, - { - "epoch": 0.12817892142127096, - "grad_norm": 1.7252791800768197, - "learning_rate": 3.8998375640777625e-06, - "loss": 1.0378, - "step": 1066 - }, - { - "epoch": 0.12829916431191005, - "grad_norm": 0.9116865726078406, - "learning_rate": 3.899593993763229e-06, - "loss": 0.7835, - "step": 1067 - }, - { - "epoch": 0.12841940720254916, - "grad_norm": 3.9817865079076835, - "learning_rate": 3.899350135281796e-06, - "loss": 1.04, - "step": 1068 - }, - { - "epoch": 0.12853965009318824, - "grad_norm": 1.8125697155403009, - "learning_rate": 3.8991059886704585e-06, - "loss": 1.0211, - "step": 1069 - }, - { - "epoch": 0.12865989298382732, - "grad_norm": 2.0792693235731616, - "learning_rate": 3.898861553966252e-06, - "loss": 1.0474, - "step": 1070 - }, - { - "epoch": 0.12878013587446643, - "grad_norm": 1.48281400819145, - "learning_rate": 3.898616831206257e-06, - "loss": 1.1084, - "step": 1071 - }, - { - "epoch": 0.12890037876510552, - "grad_norm": 1.7837031688036058, - "learning_rate": 3.8983718204276e-06, - "loss": 0.9998, - "step": 1072 - }, - { - "epoch": 0.1290206216557446, - "grad_norm": 1.6098653546297688, - "learning_rate": 3.898126521667446e-06, - "loss": 1.0631, - "step": 1073 - }, - { - "epoch": 0.12914086454638368, - "grad_norm": 1.5817999023070903, - "learning_rate": 3.897880934963007e-06, - "loss": 1.0659, - "step": 1074 - }, - { - "epoch": 0.1292611074370228, - "grad_norm": 1.732703288208065, - "learning_rate": 3.89763506035154e-06, - "loss": 1.007, - "step": 1075 - }, - { - "epoch": 0.12938135032766188, - "grad_norm": 1.7785159762235747, - "learning_rate": 3.897388897870343e-06, - "loss": 1.0432, - "step": 1076 - }, - { - "epoch": 0.12950159321830096, - "grad_norm": 1.9500242109507429, - "learning_rate": 3.89714244755676e-06, - "loss": 0.9669, - "step": 1077 - }, - { - "epoch": 0.12962183610894007, - "grad_norm": 2.294190791130589, - "learning_rate": 3.896895709448175e-06, - "loss": 1.0887, - "step": 1078 - }, - { - "epoch": 0.12974207899957915, - "grad_norm": 2.4785047550429, - "learning_rate": 3.896648683582019e-06, - "loss": 1.0033, - "step": 1079 - }, - { - "epoch": 0.12986232189021824, - "grad_norm": 1.9658996293145443, - "learning_rate": 3.896401369995766e-06, - "loss": 1.0351, - "step": 1080 - }, - { - "epoch": 0.12998256478085732, - "grad_norm": 1.5674064658213296, - "learning_rate": 3.896153768726932e-06, - "loss": 1.0168, - "step": 1081 - }, - { - "epoch": 0.13010280767149643, - "grad_norm": 5.302613876968854, - "learning_rate": 3.8959058798130806e-06, - "loss": 1.1045, - "step": 1082 - }, - { - "epoch": 0.1302230505621355, - "grad_norm": 1.5952822481550675, - "learning_rate": 3.895657703291814e-06, - "loss": 0.9762, - "step": 1083 - }, - { - "epoch": 0.1303432934527746, - "grad_norm": 2.2413028473801386, - "learning_rate": 3.895409239200781e-06, - "loss": 1.0314, - "step": 1084 - }, - { - "epoch": 0.1304635363434137, - "grad_norm": 2.000186673035731, - "learning_rate": 3.895160487577673e-06, - "loss": 1.1502, - "step": 1085 - }, - { - "epoch": 0.1305837792340528, - "grad_norm": 0.7826224669394317, - "learning_rate": 3.894911448460226e-06, - "loss": 0.8635, - "step": 1086 - }, - { - "epoch": 0.13070402212469187, - "grad_norm": 1.8268832040211667, - "learning_rate": 3.8946621218862195e-06, - "loss": 0.9595, - "step": 1087 - }, - { - "epoch": 0.13082426501533098, - "grad_norm": 1.7256219986416623, - "learning_rate": 3.894412507893475e-06, - "loss": 1.1153, - "step": 1088 - }, - { - "epoch": 0.13094450790597006, - "grad_norm": 3.693625454166476, - "learning_rate": 3.894162606519859e-06, - "loss": 0.951, - "step": 1089 - }, - { - "epoch": 0.13106475079660915, - "grad_norm": 1.675685710895641, - "learning_rate": 3.893912417803282e-06, - "loss": 0.9953, - "step": 1090 - }, - { - "epoch": 0.13118499368724823, - "grad_norm": 1.802038717728243, - "learning_rate": 3.8936619417816975e-06, - "loss": 0.9911, - "step": 1091 - }, - { - "epoch": 0.13130523657788734, - "grad_norm": 1.8373244869981653, - "learning_rate": 3.8934111784931015e-06, - "loss": 0.9548, - "step": 1092 - }, - { - "epoch": 0.13142547946852642, - "grad_norm": 1.0164077676678176, - "learning_rate": 3.893160127975535e-06, - "loss": 0.8748, - "step": 1093 - }, - { - "epoch": 0.1315457223591655, - "grad_norm": 2.323329210355248, - "learning_rate": 3.8929087902670826e-06, - "loss": 1.0407, - "step": 1094 - }, - { - "epoch": 0.13166596524980462, - "grad_norm": 0.904372210073221, - "learning_rate": 3.8926571654058715e-06, - "loss": 0.8583, - "step": 1095 - }, - { - "epoch": 0.1317862081404437, - "grad_norm": 2.695608194689303, - "learning_rate": 3.892405253430074e-06, - "loss": 0.9928, - "step": 1096 - }, - { - "epoch": 0.13190645103108278, - "grad_norm": 1.6214143367116085, - "learning_rate": 3.892153054377904e-06, - "loss": 1.0538, - "step": 1097 - }, - { - "epoch": 0.13202669392172187, - "grad_norm": 1.0110161304111513, - "learning_rate": 3.891900568287619e-06, - "loss": 0.8684, - "step": 1098 - }, - { - "epoch": 0.13214693681236098, - "grad_norm": 2.70438280774923, - "learning_rate": 3.891647795197523e-06, - "loss": 0.9608, - "step": 1099 - }, - { - "epoch": 0.13226717970300006, - "grad_norm": 1.901859981742078, - "learning_rate": 3.8913947351459605e-06, - "loss": 0.9223, - "step": 1100 - }, - { - "epoch": 0.13238742259363914, - "grad_norm": 2.0744951714132864, - "learning_rate": 3.89114138817132e-06, - "loss": 0.9056, - "step": 1101 - }, - { - "epoch": 0.13250766548427825, - "grad_norm": 1.6946753082315178, - "learning_rate": 3.890887754312035e-06, - "loss": 1.0767, - "step": 1102 - }, - { - "epoch": 0.13262790837491734, - "grad_norm": 1.7101986478566211, - "learning_rate": 3.890633833606581e-06, - "loss": 1.1033, - "step": 1103 - }, - { - "epoch": 0.13274815126555642, - "grad_norm": 1.5388849611075381, - "learning_rate": 3.890379626093477e-06, - "loss": 0.9233, - "step": 1104 - }, - { - "epoch": 0.1328683941561955, - "grad_norm": 2.0736007200734337, - "learning_rate": 3.890125131811287e-06, - "loss": 1.1471, - "step": 1105 - }, - { - "epoch": 0.1329886370468346, - "grad_norm": 1.852198624285455, - "learning_rate": 3.889870350798618e-06, - "loss": 0.9826, - "step": 1106 - }, - { - "epoch": 0.1331088799374737, - "grad_norm": 1.9480241620055554, - "learning_rate": 3.889615283094119e-06, - "loss": 1.0223, - "step": 1107 - }, - { - "epoch": 0.13322912282811278, - "grad_norm": 1.9671106931010145, - "learning_rate": 3.889359928736485e-06, - "loss": 1.0766, - "step": 1108 - }, - { - "epoch": 0.1333493657187519, - "grad_norm": 1.7296684946396164, - "learning_rate": 3.889104287764451e-06, - "loss": 1.1372, - "step": 1109 - }, - { - "epoch": 0.13346960860939097, - "grad_norm": 1.7553120100452948, - "learning_rate": 3.888848360216798e-06, - "loss": 1.136, - "step": 1110 - }, - { - "epoch": 0.13358985150003005, - "grad_norm": 0.8128462567152347, - "learning_rate": 3.888592146132351e-06, - "loss": 0.8136, - "step": 1111 - }, - { - "epoch": 0.13371009439066917, - "grad_norm": 2.8843979856292288, - "learning_rate": 3.888335645549978e-06, - "loss": 1.0154, - "step": 1112 - }, - { - "epoch": 0.13383033728130825, - "grad_norm": 1.939584656526119, - "learning_rate": 3.888078858508588e-06, - "loss": 1.0557, - "step": 1113 - }, - { - "epoch": 0.13395058017194733, - "grad_norm": 3.505417037728669, - "learning_rate": 3.8878217850471365e-06, - "loss": 1.073, - "step": 1114 - }, - { - "epoch": 0.13407082306258641, - "grad_norm": 1.6650662685643889, - "learning_rate": 3.887564425204621e-06, - "loss": 0.9708, - "step": 1115 - }, - { - "epoch": 0.13419106595322552, - "grad_norm": 0.8333298484411488, - "learning_rate": 3.887306779020083e-06, - "loss": 0.807, - "step": 1116 - }, - { - "epoch": 0.1343113088438646, - "grad_norm": 1.9611571899394304, - "learning_rate": 3.887048846532608e-06, - "loss": 0.9496, - "step": 1117 - }, - { - "epoch": 0.1344315517345037, - "grad_norm": 0.7544703769518561, - "learning_rate": 3.8867906277813224e-06, - "loss": 0.8287, - "step": 1118 - }, - { - "epoch": 0.1345517946251428, - "grad_norm": 1.861243711400729, - "learning_rate": 3.886532122805399e-06, - "loss": 0.97, - "step": 1119 - }, - { - "epoch": 0.13467203751578188, - "grad_norm": 1.6412731525214639, - "learning_rate": 3.886273331644053e-06, - "loss": 1.1218, - "step": 1120 - }, - { - "epoch": 0.13479228040642097, - "grad_norm": 1.8966788478028853, - "learning_rate": 3.886014254336542e-06, - "loss": 1.0579, - "step": 1121 - }, - { - "epoch": 0.13491252329706005, - "grad_norm": 1.5463360704969416, - "learning_rate": 3.885754890922168e-06, - "loss": 1.1543, - "step": 1122 - }, - { - "epoch": 0.13503276618769916, - "grad_norm": 3.5523306274842845, - "learning_rate": 3.885495241440277e-06, - "loss": 1.0113, - "step": 1123 - }, - { - "epoch": 0.13515300907833824, - "grad_norm": 1.8008736609457057, - "learning_rate": 3.885235305930257e-06, - "loss": 0.9772, - "step": 1124 - }, - { - "epoch": 0.13527325196897733, - "grad_norm": 1.986276331463538, - "learning_rate": 3.884975084431539e-06, - "loss": 1.0947, - "step": 1125 - }, - { - "epoch": 0.13539349485961644, - "grad_norm": 2.3585865648633413, - "learning_rate": 3.8847145769836e-06, - "loss": 1.1557, - "step": 1126 - }, - { - "epoch": 0.13551373775025552, - "grad_norm": 1.999712029229299, - "learning_rate": 3.884453783625959e-06, - "loss": 0.9018, - "step": 1127 - }, - { - "epoch": 0.1356339806408946, - "grad_norm": 1.9628704620388553, - "learning_rate": 3.884192704398176e-06, - "loss": 1.0859, - "step": 1128 - }, - { - "epoch": 0.13575422353153369, - "grad_norm": 1.7428961710948714, - "learning_rate": 3.883931339339858e-06, - "loss": 0.979, - "step": 1129 - }, - { - "epoch": 0.1358744664221728, - "grad_norm": 2.118715080040982, - "learning_rate": 3.883669688490654e-06, - "loss": 1.0133, - "step": 1130 - }, - { - "epoch": 0.13599470931281188, - "grad_norm": 2.471499011046289, - "learning_rate": 3.883407751890256e-06, - "loss": 1.0765, - "step": 1131 - }, - { - "epoch": 0.13611495220345096, - "grad_norm": 1.6435909527898884, - "learning_rate": 3.8831455295783994e-06, - "loss": 1.0776, - "step": 1132 - }, - { - "epoch": 0.13623519509409007, - "grad_norm": 1.9363863420844398, - "learning_rate": 3.882883021594864e-06, - "loss": 0.9695, - "step": 1133 - }, - { - "epoch": 0.13635543798472916, - "grad_norm": 2.050165807019811, - "learning_rate": 3.8826202279794705e-06, - "loss": 1.1043, - "step": 1134 - }, - { - "epoch": 0.13647568087536824, - "grad_norm": 1.8549210874527466, - "learning_rate": 3.882357148772085e-06, - "loss": 0.9288, - "step": 1135 - }, - { - "epoch": 0.13659592376600732, - "grad_norm": 2.3835250039378595, - "learning_rate": 3.882093784012617e-06, - "loss": 1.0828, - "step": 1136 - }, - { - "epoch": 0.13671616665664643, - "grad_norm": 3.4766362900350214, - "learning_rate": 3.881830133741019e-06, - "loss": 1.062, - "step": 1137 - }, - { - "epoch": 0.13683640954728551, - "grad_norm": 2.6190489568748196, - "learning_rate": 3.881566197997285e-06, - "loss": 0.994, - "step": 1138 - }, - { - "epoch": 0.1369566524379246, - "grad_norm": 1.3881445203951834, - "learning_rate": 3.881301976821456e-06, - "loss": 0.9786, - "step": 1139 - }, - { - "epoch": 0.1370768953285637, - "grad_norm": 1.806890159076238, - "learning_rate": 3.881037470253612e-06, - "loss": 1.1343, - "step": 1140 - }, - { - "epoch": 0.1371971382192028, - "grad_norm": 2.8043247508985467, - "learning_rate": 3.88077267833388e-06, - "loss": 1.0297, - "step": 1141 - }, - { - "epoch": 0.13731738110984187, - "grad_norm": 4.520214454492133, - "learning_rate": 3.880507601102427e-06, - "loss": 1.0657, - "step": 1142 - }, - { - "epoch": 0.13743762400048098, - "grad_norm": 1.8367904305110765, - "learning_rate": 3.880242238599467e-06, - "loss": 1.051, - "step": 1143 - }, - { - "epoch": 0.13755786689112007, - "grad_norm": 1.532257760522291, - "learning_rate": 3.879976590865254e-06, - "loss": 1.0645, - "step": 1144 - }, - { - "epoch": 0.13767810978175915, - "grad_norm": 2.1527726149411657, - "learning_rate": 3.879710657940087e-06, - "loss": 1.1035, - "step": 1145 - }, - { - "epoch": 0.13779835267239823, - "grad_norm": 2.026467312097161, - "learning_rate": 3.879444439864308e-06, - "loss": 0.932, - "step": 1146 - }, - { - "epoch": 0.13791859556303734, - "grad_norm": 1.582161377629561, - "learning_rate": 3.879177936678301e-06, - "loss": 1.0896, - "step": 1147 - }, - { - "epoch": 0.13803883845367643, - "grad_norm": 2.1426932930746894, - "learning_rate": 3.878911148422496e-06, - "loss": 1.0096, - "step": 1148 - }, - { - "epoch": 0.1381590813443155, - "grad_norm": 2.02694951158379, - "learning_rate": 3.878644075137364e-06, - "loss": 0.9276, - "step": 1149 - }, - { - "epoch": 0.13827932423495462, - "grad_norm": 4.759228425201618, - "learning_rate": 3.878376716863418e-06, - "loss": 1.0135, - "step": 1150 - }, - { - "epoch": 0.1383995671255937, - "grad_norm": 3.3677192144260415, - "learning_rate": 3.878109073641219e-06, - "loss": 0.9528, - "step": 1151 - }, - { - "epoch": 0.13851981001623279, - "grad_norm": 1.451865029401367, - "learning_rate": 3.877841145511366e-06, - "loss": 1.0394, - "step": 1152 - }, - { - "epoch": 0.13864005290687187, - "grad_norm": 2.263888094485195, - "learning_rate": 3.8775729325145035e-06, - "loss": 1.0517, - "step": 1153 - }, - { - "epoch": 0.13876029579751098, - "grad_norm": 0.7816177265677412, - "learning_rate": 3.877304434691321e-06, - "loss": 0.8795, - "step": 1154 - }, - { - "epoch": 0.13888053868815006, - "grad_norm": 1.7457871135823857, - "learning_rate": 3.877035652082548e-06, - "loss": 1.0253, - "step": 1155 - }, - { - "epoch": 0.13900078157878915, - "grad_norm": 2.340968605153632, - "learning_rate": 3.87676658472896e-06, - "loss": 1.0744, - "step": 1156 - }, - { - "epoch": 0.13912102446942826, - "grad_norm": 1.7788275592807579, - "learning_rate": 3.876497232671372e-06, - "loss": 1.0791, - "step": 1157 - }, - { - "epoch": 0.13924126736006734, - "grad_norm": 2.008425963970961, - "learning_rate": 3.876227595950647e-06, - "loss": 1.0655, - "step": 1158 - }, - { - "epoch": 0.13936151025070642, - "grad_norm": 1.4486250573853696, - "learning_rate": 3.875957674607686e-06, - "loss": 1.0153, - "step": 1159 - }, - { - "epoch": 0.1394817531413455, - "grad_norm": 1.872486273253056, - "learning_rate": 3.8756874686834386e-06, - "loss": 1.1118, - "step": 1160 - }, - { - "epoch": 0.13960199603198462, - "grad_norm": 1.8089885898607707, - "learning_rate": 3.875416978218893e-06, - "loss": 1.035, - "step": 1161 - }, - { - "epoch": 0.1397222389226237, - "grad_norm": 2.224510773440386, - "learning_rate": 3.8751462032550835e-06, - "loss": 1.0554, - "step": 1162 - }, - { - "epoch": 0.13984248181326278, - "grad_norm": 2.169459255014364, - "learning_rate": 3.874875143833085e-06, - "loss": 1.0484, - "step": 1163 - }, - { - "epoch": 0.1399627247039019, - "grad_norm": 2.0842792906513243, - "learning_rate": 3.874603799994019e-06, - "loss": 0.9211, - "step": 1164 - }, - { - "epoch": 0.14008296759454097, - "grad_norm": 1.734996306335739, - "learning_rate": 3.874332171779046e-06, - "loss": 1.1049, - "step": 1165 - }, - { - "epoch": 0.14020321048518006, - "grad_norm": 1.774181193227622, - "learning_rate": 3.874060259229373e-06, - "loss": 0.9897, - "step": 1166 - }, - { - "epoch": 0.14032345337581917, - "grad_norm": 1.972678125845597, - "learning_rate": 3.873788062386249e-06, - "loss": 1.1523, - "step": 1167 - }, - { - "epoch": 0.14044369626645825, - "grad_norm": 1.6407178580027377, - "learning_rate": 3.873515581290965e-06, - "loss": 1.0513, - "step": 1168 - }, - { - "epoch": 0.14056393915709733, - "grad_norm": 1.994949819273118, - "learning_rate": 3.8732428159848575e-06, - "loss": 0.9819, - "step": 1169 - }, - { - "epoch": 0.14068418204773642, - "grad_norm": 1.7509557974882435, - "learning_rate": 3.872969766509304e-06, - "loss": 1.0097, - "step": 1170 - }, - { - "epoch": 0.14080442493837553, - "grad_norm": 0.8386554206474187, - "learning_rate": 3.872696432905726e-06, - "loss": 0.8183, - "step": 1171 - }, - { - "epoch": 0.1409246678290146, - "grad_norm": 1.8660149503806087, - "learning_rate": 3.872422815215589e-06, - "loss": 0.9519, - "step": 1172 - }, - { - "epoch": 0.1410449107196537, - "grad_norm": 1.6862488806447244, - "learning_rate": 3.8721489134803994e-06, - "loss": 0.9831, - "step": 1173 - }, - { - "epoch": 0.1411651536102928, - "grad_norm": 1.9815617842493236, - "learning_rate": 3.871874727741707e-06, - "loss": 0.9537, - "step": 1174 - }, - { - "epoch": 0.1412853965009319, - "grad_norm": 1.6472256079144534, - "learning_rate": 3.871600258041108e-06, - "loss": 1.192, - "step": 1175 - }, - { - "epoch": 0.14140563939157097, - "grad_norm": 2.1545772975211754, - "learning_rate": 3.871325504420238e-06, - "loss": 1.0924, - "step": 1176 - }, - { - "epoch": 0.14152588228221005, - "grad_norm": 1.8054445338554412, - "learning_rate": 3.871050466920776e-06, - "loss": 1.0403, - "step": 1177 - }, - { - "epoch": 0.14164612517284916, - "grad_norm": 1.655189390496222, - "learning_rate": 3.870775145584447e-06, - "loss": 1.0295, - "step": 1178 - }, - { - "epoch": 0.14176636806348825, - "grad_norm": 2.3093252297950095, - "learning_rate": 3.8704995404530145e-06, - "loss": 0.8569, - "step": 1179 - }, - { - "epoch": 0.14188661095412733, - "grad_norm": 2.3040414712935244, - "learning_rate": 3.87022365156829e-06, - "loss": 1.0792, - "step": 1180 - }, - { - "epoch": 0.14200685384476644, - "grad_norm": 1.7514657285638622, - "learning_rate": 3.869947478972123e-06, - "loss": 1.0344, - "step": 1181 - }, - { - "epoch": 0.14212709673540552, - "grad_norm": 1.9083600482431162, - "learning_rate": 3.869671022706412e-06, - "loss": 1.047, - "step": 1182 - }, - { - "epoch": 0.1422473396260446, - "grad_norm": 2.8051104638419266, - "learning_rate": 3.869394282813092e-06, - "loss": 0.877, - "step": 1183 - }, - { - "epoch": 0.1423675825166837, - "grad_norm": 2.1435188724940777, - "learning_rate": 3.869117259334147e-06, - "loss": 1.119, - "step": 1184 - }, - { - "epoch": 0.1424878254073228, - "grad_norm": 1.6856908637260026, - "learning_rate": 3.868839952311599e-06, - "loss": 1.0558, - "step": 1185 - }, - { - "epoch": 0.14260806829796188, - "grad_norm": 2.0043812685119984, - "learning_rate": 3.868562361787516e-06, - "loss": 1.035, - "step": 1186 - }, - { - "epoch": 0.14272831118860096, - "grad_norm": 2.3164507136331234, - "learning_rate": 3.868284487804009e-06, - "loss": 0.9275, - "step": 1187 - }, - { - "epoch": 0.14284855407924008, - "grad_norm": 1.5055369862016972, - "learning_rate": 3.86800633040323e-06, - "loss": 1.0109, - "step": 1188 - }, - { - "epoch": 0.14296879696987916, - "grad_norm": 2.0226573250105777, - "learning_rate": 3.867727889627376e-06, - "loss": 1.0142, - "step": 1189 - }, - { - "epoch": 0.14308903986051824, - "grad_norm": 3.051589370351407, - "learning_rate": 3.867449165518687e-06, - "loss": 1.0196, - "step": 1190 - }, - { - "epoch": 0.14320928275115732, - "grad_norm": 1.9019019144481129, - "learning_rate": 3.867170158119444e-06, - "loss": 0.9424, - "step": 1191 - }, - { - "epoch": 0.14332952564179643, - "grad_norm": 2.1888476988611645, - "learning_rate": 3.866890867471972e-06, - "loss": 0.9846, - "step": 1192 - }, - { - "epoch": 0.14344976853243552, - "grad_norm": 2.281551915922592, - "learning_rate": 3.86661129361864e-06, - "loss": 1.1294, - "step": 1193 - }, - { - "epoch": 0.1435700114230746, - "grad_norm": 1.7901095195916277, - "learning_rate": 3.866331436601859e-06, - "loss": 1.088, - "step": 1194 - }, - { - "epoch": 0.1436902543137137, - "grad_norm": 1.8173082962625111, - "learning_rate": 3.866051296464083e-06, - "loss": 0.97, - "step": 1195 - }, - { - "epoch": 0.1438104972043528, - "grad_norm": 1.8250857137121643, - "learning_rate": 3.86577087324781e-06, - "loss": 1.0729, - "step": 1196 - }, - { - "epoch": 0.14393074009499188, - "grad_norm": 8.44782580331902, - "learning_rate": 3.865490166995578e-06, - "loss": 1.002, - "step": 1197 - }, - { - "epoch": 0.144050982985631, - "grad_norm": 1.975263266581267, - "learning_rate": 3.86520917774997e-06, - "loss": 1.0663, - "step": 1198 - }, - { - "epoch": 0.14417122587627007, - "grad_norm": 2.4994879198142983, - "learning_rate": 3.864927905553614e-06, - "loss": 0.9795, - "step": 1199 - }, - { - "epoch": 0.14429146876690915, - "grad_norm": 1.440623083547778, - "learning_rate": 3.8646463504491765e-06, - "loss": 1.1151, - "step": 1200 - }, - { - "epoch": 0.14441171165754824, - "grad_norm": 2.3994670236383904, - "learning_rate": 3.8643645124793705e-06, - "loss": 1.0676, - "step": 1201 - }, - { - "epoch": 0.14453195454818735, - "grad_norm": 4.553253209724901, - "learning_rate": 3.8640823916869515e-06, - "loss": 0.9786, - "step": 1202 - }, - { - "epoch": 0.14465219743882643, - "grad_norm": 1.5324541145412605, - "learning_rate": 3.863799988114714e-06, - "loss": 1.0105, - "step": 1203 - }, - { - "epoch": 0.1447724403294655, - "grad_norm": 4.947259649189541, - "learning_rate": 3.863517301805502e-06, - "loss": 0.9368, - "step": 1204 - }, - { - "epoch": 0.14489268322010462, - "grad_norm": 2.5914118695708495, - "learning_rate": 3.863234332802196e-06, - "loss": 1.1953, - "step": 1205 - }, - { - "epoch": 0.1450129261107437, - "grad_norm": 1.9713413199472702, - "learning_rate": 3.862951081147723e-06, - "loss": 0.9679, - "step": 1206 - }, - { - "epoch": 0.1451331690013828, - "grad_norm": 1.9828842450712565, - "learning_rate": 3.862667546885053e-06, - "loss": 1.0138, - "step": 1207 - }, - { - "epoch": 0.14525341189202187, - "grad_norm": 2.11750289988854, - "learning_rate": 3.8623837300571965e-06, - "loss": 0.9569, - "step": 1208 - }, - { - "epoch": 0.14537365478266098, - "grad_norm": 1.7333564860680533, - "learning_rate": 3.8620996307072085e-06, - "loss": 1.0677, - "step": 1209 - }, - { - "epoch": 0.14549389767330007, - "grad_norm": 1.6325431492055407, - "learning_rate": 3.861815248878188e-06, - "loss": 0.8712, - "step": 1210 - }, - { - "epoch": 0.14561414056393915, - "grad_norm": 2.2911722401245256, - "learning_rate": 3.861530584613274e-06, - "loss": 1.0264, - "step": 1211 - }, - { - "epoch": 0.14573438345457826, - "grad_norm": 2.249873475649932, - "learning_rate": 3.86124563795565e-06, - "loss": 1.0536, - "step": 1212 - }, - { - "epoch": 0.14585462634521734, - "grad_norm": 1.6616815717992208, - "learning_rate": 3.860960408948543e-06, - "loss": 0.9374, - "step": 1213 - }, - { - "epoch": 0.14597486923585642, - "grad_norm": 2.251740312404891, - "learning_rate": 3.860674897635222e-06, - "loss": 1.1287, - "step": 1214 - }, - { - "epoch": 0.1460951121264955, - "grad_norm": 1.8905729176307011, - "learning_rate": 3.860389104058998e-06, - "loss": 1.0601, - "step": 1215 - }, - { - "epoch": 0.14621535501713462, - "grad_norm": 1.8626357265916451, - "learning_rate": 3.860103028263227e-06, - "loss": 0.9565, - "step": 1216 - }, - { - "epoch": 0.1463355979077737, - "grad_norm": 1.9318838863112888, - "learning_rate": 3.859816670291304e-06, - "loss": 0.921, - "step": 1217 - }, - { - "epoch": 0.14645584079841278, - "grad_norm": 1.7776518502272753, - "learning_rate": 3.859530030186672e-06, - "loss": 1.1254, - "step": 1218 - }, - { - "epoch": 0.1465760836890519, - "grad_norm": 2.5544055468154068, - "learning_rate": 3.859243107992813e-06, - "loss": 1.0579, - "step": 1219 - }, - { - "epoch": 0.14669632657969098, - "grad_norm": 2.464574058517413, - "learning_rate": 3.858955903753252e-06, - "loss": 1.0161, - "step": 1220 - }, - { - "epoch": 0.14681656947033006, - "grad_norm": 1.5423901209344704, - "learning_rate": 3.858668417511559e-06, - "loss": 1.0596, - "step": 1221 - }, - { - "epoch": 0.14693681236096917, - "grad_norm": 2.0491646343943524, - "learning_rate": 3.8583806493113445e-06, - "loss": 0.9918, - "step": 1222 - }, - { - "epoch": 0.14705705525160825, - "grad_norm": 1.887474145775369, - "learning_rate": 3.858092599196263e-06, - "loss": 1.0498, - "step": 1223 - }, - { - "epoch": 0.14717729814224734, - "grad_norm": 1.958897536417299, - "learning_rate": 3.857804267210012e-06, - "loss": 1.0513, - "step": 1224 - }, - { - "epoch": 0.14729754103288642, - "grad_norm": 1.8827943919723755, - "learning_rate": 3.857515653396331e-06, - "loss": 1.1141, - "step": 1225 - }, - { - "epoch": 0.14741778392352553, - "grad_norm": 2.9804445918192783, - "learning_rate": 3.857226757799002e-06, - "loss": 1.0956, - "step": 1226 - }, - { - "epoch": 0.1475380268141646, - "grad_norm": 2.4203442073392645, - "learning_rate": 3.85693758046185e-06, - "loss": 0.9706, - "step": 1227 - }, - { - "epoch": 0.1476582697048037, - "grad_norm": 1.789451723362595, - "learning_rate": 3.8566481214287435e-06, - "loss": 1.0558, - "step": 1228 - }, - { - "epoch": 0.1477785125954428, - "grad_norm": 1.8649840821105896, - "learning_rate": 3.8563583807435935e-06, - "loss": 1.1321, - "step": 1229 - }, - { - "epoch": 0.1478987554860819, - "grad_norm": 2.1961483704659157, - "learning_rate": 3.856068358450353e-06, - "loss": 1.0065, - "step": 1230 - }, - { - "epoch": 0.14801899837672097, - "grad_norm": 1.749071487967997, - "learning_rate": 3.8557780545930186e-06, - "loss": 1.0808, - "step": 1231 - }, - { - "epoch": 0.14813924126736006, - "grad_norm": 1.599306406247359, - "learning_rate": 3.855487469215628e-06, - "loss": 1.0191, - "step": 1232 - }, - { - "epoch": 0.14825948415799917, - "grad_norm": 2.692216150592978, - "learning_rate": 3.855196602362264e-06, - "loss": 0.9528, - "step": 1233 - }, - { - "epoch": 0.14837972704863825, - "grad_norm": 2.237625851634785, - "learning_rate": 3.854905454077051e-06, - "loss": 1.1799, - "step": 1234 - }, - { - "epoch": 0.14849996993927733, - "grad_norm": 1.6876182514720015, - "learning_rate": 3.854614024404155e-06, - "loss": 1.1045, - "step": 1235 - }, - { - "epoch": 0.14862021282991644, - "grad_norm": 1.796413196431838, - "learning_rate": 3.8543223133877865e-06, - "loss": 1.1283, - "step": 1236 - }, - { - "epoch": 0.14874045572055553, - "grad_norm": 2.2549220396043856, - "learning_rate": 3.854030321072198e-06, - "loss": 1.1088, - "step": 1237 - }, - { - "epoch": 0.1488606986111946, - "grad_norm": 1.9355957764146088, - "learning_rate": 3.853738047501682e-06, - "loss": 0.9609, - "step": 1238 - }, - { - "epoch": 0.1489809415018337, - "grad_norm": 1.8766994404118988, - "learning_rate": 3.85344549272058e-06, - "loss": 1.0066, - "step": 1239 - }, - { - "epoch": 0.1491011843924728, - "grad_norm": 1.7338629301733377, - "learning_rate": 3.853152656773269e-06, - "loss": 1.0551, - "step": 1240 - }, - { - "epoch": 0.14922142728311188, - "grad_norm": 1.7048499794791057, - "learning_rate": 3.852859539704174e-06, - "loss": 1.0706, - "step": 1241 - }, - { - "epoch": 0.14934167017375097, - "grad_norm": 3.2878955211514573, - "learning_rate": 3.85256614155776e-06, - "loss": 0.9897, - "step": 1242 - }, - { - "epoch": 0.14946191306439008, - "grad_norm": 1.8351090530296097, - "learning_rate": 3.852272462378535e-06, - "loss": 0.9786, - "step": 1243 - }, - { - "epoch": 0.14958215595502916, - "grad_norm": 2.4784948469873744, - "learning_rate": 3.85197850221105e-06, - "loss": 1.0149, - "step": 1244 - }, - { - "epoch": 0.14970239884566824, - "grad_norm": 1.8846715715886724, - "learning_rate": 3.851684261099899e-06, - "loss": 0.9958, - "step": 1245 - }, - { - "epoch": 0.14982264173630733, - "grad_norm": 2.6147083895945196, - "learning_rate": 3.851389739089718e-06, - "loss": 1.0932, - "step": 1246 - }, - { - "epoch": 0.14994288462694644, - "grad_norm": 1.653862654120855, - "learning_rate": 3.851094936225186e-06, - "loss": 1.0329, - "step": 1247 - }, - { - "epoch": 0.15006312751758552, - "grad_norm": 1.337956069125306, - "learning_rate": 3.850799852551024e-06, - "loss": 1.0023, - "step": 1248 - }, - { - "epoch": 0.1501833704082246, - "grad_norm": 2.096196581365176, - "learning_rate": 3.850504488111995e-06, - "loss": 1.0881, - "step": 1249 - }, - { - "epoch": 0.15030361329886371, - "grad_norm": 2.0840424920141403, - "learning_rate": 3.850208842952907e-06, - "loss": 1.0549, - "step": 1250 - }, - { - "epoch": 0.1504238561895028, - "grad_norm": 1.5321125014807508, - "learning_rate": 3.849912917118608e-06, - "loss": 1.0182, - "step": 1251 - }, - { - "epoch": 0.15054409908014188, - "grad_norm": 0.965480208420749, - "learning_rate": 3.849616710653992e-06, - "loss": 0.8587, - "step": 1252 - }, - { - "epoch": 0.150664341970781, - "grad_norm": 1.4598810792854044, - "learning_rate": 3.84932022360399e-06, - "loss": 0.9813, - "step": 1253 - }, - { - "epoch": 0.15078458486142007, - "grad_norm": 2.1875864284335043, - "learning_rate": 3.849023456013581e-06, - "loss": 1.0631, - "step": 1254 - }, - { - "epoch": 0.15090482775205916, - "grad_norm": 2.014017217939433, - "learning_rate": 3.848726407927784e-06, - "loss": 0.8489, - "step": 1255 - }, - { - "epoch": 0.15102507064269824, - "grad_norm": 2.5050738345474817, - "learning_rate": 3.84842907939166e-06, - "loss": 1.1061, - "step": 1256 - }, - { - "epoch": 0.15114531353333735, - "grad_norm": 2.3568881235741075, - "learning_rate": 3.8481314704503146e-06, - "loss": 0.9432, - "step": 1257 - }, - { - "epoch": 0.15126555642397643, - "grad_norm": 2.184986741573516, - "learning_rate": 3.847833581148895e-06, - "loss": 1.1176, - "step": 1258 - }, - { - "epoch": 0.15138579931461552, - "grad_norm": 1.9791454581747263, - "learning_rate": 3.84753541153259e-06, - "loss": 1.0353, - "step": 1259 - }, - { - "epoch": 0.15150604220525463, - "grad_norm": 1.4233981347948048, - "learning_rate": 3.847236961646633e-06, - "loss": 1.0643, - "step": 1260 - }, - { - "epoch": 0.1516262850958937, - "grad_norm": 2.61902993101282, - "learning_rate": 3.846938231536296e-06, - "loss": 1.0116, - "step": 1261 - }, - { - "epoch": 0.1517465279865328, - "grad_norm": 1.565207081826438, - "learning_rate": 3.8466392212468995e-06, - "loss": 1.043, - "step": 1262 - }, - { - "epoch": 0.15186677087717187, - "grad_norm": 0.8032495557924497, - "learning_rate": 3.8463399308238e-06, - "loss": 0.8786, - "step": 1263 - }, - { - "epoch": 0.15198701376781099, - "grad_norm": 1.4771423008836815, - "learning_rate": 3.846040360312402e-06, - "loss": 0.872, - "step": 1264 - }, - { - "epoch": 0.15210725665845007, - "grad_norm": 2.188992781144913, - "learning_rate": 3.8457405097581485e-06, - "loss": 1.0426, - "step": 1265 - }, - { - "epoch": 0.15222749954908915, - "grad_norm": 1.7268778655352515, - "learning_rate": 3.8454403792065275e-06, - "loss": 1.0071, - "step": 1266 - }, - { - "epoch": 0.15234774243972826, - "grad_norm": 1.8514366911948907, - "learning_rate": 3.845139968703068e-06, - "loss": 1.0812, - "step": 1267 - }, - { - "epoch": 0.15246798533036734, - "grad_norm": 1.6987242231174708, - "learning_rate": 3.844839278293342e-06, - "loss": 1.0617, - "step": 1268 - }, - { - "epoch": 0.15258822822100643, - "grad_norm": 1.8502334138178107, - "learning_rate": 3.8445383080229654e-06, - "loss": 0.9888, - "step": 1269 - }, - { - "epoch": 0.1527084711116455, - "grad_norm": 2.043958376839519, - "learning_rate": 3.844237057937593e-06, - "loss": 0.9685, - "step": 1270 - }, - { - "epoch": 0.15282871400228462, - "grad_norm": 1.984695288574236, - "learning_rate": 3.843935528082926e-06, - "loss": 1.0086, - "step": 1271 - }, - { - "epoch": 0.1529489568929237, - "grad_norm": 1.5943273358788854, - "learning_rate": 3.843633718504704e-06, - "loss": 1.0765, - "step": 1272 - }, - { - "epoch": 0.1530691997835628, - "grad_norm": 2.107204875079564, - "learning_rate": 3.843331629248715e-06, - "loss": 1.1212, - "step": 1273 - }, - { - "epoch": 0.1531894426742019, - "grad_norm": 1.9481340650800993, - "learning_rate": 3.843029260360782e-06, - "loss": 0.9932, - "step": 1274 - }, - { - "epoch": 0.15330968556484098, - "grad_norm": 1.7448025223882984, - "learning_rate": 3.8427266118867755e-06, - "loss": 1.0119, - "step": 1275 - }, - { - "epoch": 0.15342992845548006, - "grad_norm": 2.316893347849408, - "learning_rate": 3.842423683872608e-06, - "loss": 1.0571, - "step": 1276 - }, - { - "epoch": 0.15355017134611917, - "grad_norm": 2.203588342303622, - "learning_rate": 3.842120476364232e-06, - "loss": 1.0112, - "step": 1277 - }, - { - "epoch": 0.15367041423675826, - "grad_norm": 2.208500789795057, - "learning_rate": 3.841816989407644e-06, - "loss": 1.0621, - "step": 1278 - }, - { - "epoch": 0.15379065712739734, - "grad_norm": 1.9730788580810192, - "learning_rate": 3.841513223048884e-06, - "loss": 0.9953, - "step": 1279 - }, - { - "epoch": 0.15391090001803642, - "grad_norm": 2.3861815601923304, - "learning_rate": 3.841209177334031e-06, - "loss": 1.0192, - "step": 1280 - }, - { - "epoch": 0.15403114290867553, - "grad_norm": 7.590880554344933, - "learning_rate": 3.84090485230921e-06, - "loss": 0.9763, - "step": 1281 - }, - { - "epoch": 0.15415138579931462, - "grad_norm": 3.499570411476296, - "learning_rate": 3.840600248020588e-06, - "loss": 0.9897, - "step": 1282 - }, - { - "epoch": 0.1542716286899537, - "grad_norm": 2.0254450085363906, - "learning_rate": 3.840295364514371e-06, - "loss": 1.025, - "step": 1283 - }, - { - "epoch": 0.1543918715805928, - "grad_norm": 2.857091273114619, - "learning_rate": 3.83999020183681e-06, - "loss": 1.0112, - "step": 1284 - }, - { - "epoch": 0.1545121144712319, - "grad_norm": 1.6864528939448244, - "learning_rate": 3.839684760034199e-06, - "loss": 1.0097, - "step": 1285 - }, - { - "epoch": 0.15463235736187098, - "grad_norm": 2.196542632788786, - "learning_rate": 3.8393790391528716e-06, - "loss": 0.8779, - "step": 1286 - }, - { - "epoch": 0.15475260025251006, - "grad_norm": 2.141143735975684, - "learning_rate": 3.8390730392392075e-06, - "loss": 1.1134, - "step": 1287 - }, - { - "epoch": 0.15487284314314917, - "grad_norm": 2.0124889491944753, - "learning_rate": 3.838766760339626e-06, - "loss": 1.0179, - "step": 1288 - }, - { - "epoch": 0.15499308603378825, - "grad_norm": 2.7001554020558642, - "learning_rate": 3.838460202500587e-06, - "loss": 1.0235, - "step": 1289 - }, - { - "epoch": 0.15511332892442733, - "grad_norm": 1.9100511518089915, - "learning_rate": 3.838153365768599e-06, - "loss": 0.9687, - "step": 1290 - }, - { - "epoch": 0.15523357181506645, - "grad_norm": 4.971228407289431, - "learning_rate": 3.837846250190206e-06, - "loss": 0.9791, - "step": 1291 - }, - { - "epoch": 0.15535381470570553, - "grad_norm": 2.813123930494626, - "learning_rate": 3.837538855811998e-06, - "loss": 0.9947, - "step": 1292 - }, - { - "epoch": 0.1554740575963446, - "grad_norm": 3.019980807969708, - "learning_rate": 3.837231182680606e-06, - "loss": 0.9282, - "step": 1293 - }, - { - "epoch": 0.1555943004869837, - "grad_norm": 1.4740954583571313, - "learning_rate": 3.836923230842706e-06, - "loss": 0.9898, - "step": 1294 - }, - { - "epoch": 0.1557145433776228, - "grad_norm": 1.8390689198293586, - "learning_rate": 3.836615000345011e-06, - "loss": 1.0395, - "step": 1295 - }, - { - "epoch": 0.1558347862682619, - "grad_norm": 3.1241395910240795, - "learning_rate": 3.836306491234282e-06, - "loss": 1.0076, - "step": 1296 - }, - { - "epoch": 0.15595502915890097, - "grad_norm": 2.181440516680141, - "learning_rate": 3.835997703557317e-06, - "loss": 0.976, - "step": 1297 - }, - { - "epoch": 0.15607527204954008, - "grad_norm": 1.4798354754630958, - "learning_rate": 3.83568863736096e-06, - "loss": 1.0317, - "step": 1298 - }, - { - "epoch": 0.15619551494017916, - "grad_norm": 2.4293712152520786, - "learning_rate": 3.8353792926920975e-06, - "loss": 1.1212, - "step": 1299 - }, - { - "epoch": 0.15631575783081825, - "grad_norm": 2.039779829223974, - "learning_rate": 3.835069669597655e-06, - "loss": 1.0455, - "step": 1300 - }, - { - "epoch": 0.15643600072145733, - "grad_norm": 1.900297658592901, - "learning_rate": 3.834759768124603e-06, - "loss": 1.0257, - "step": 1301 - }, - { - "epoch": 0.15655624361209644, - "grad_norm": 2.0185622467504807, - "learning_rate": 3.834449588319953e-06, - "loss": 0.9937, - "step": 1302 - }, - { - "epoch": 0.15667648650273552, - "grad_norm": 1.6528837435364947, - "learning_rate": 3.834139130230758e-06, - "loss": 1.0693, - "step": 1303 - }, - { - "epoch": 0.1567967293933746, - "grad_norm": 1.3657069009275116, - "learning_rate": 3.833828393904117e-06, - "loss": 1.0456, - "step": 1304 - }, - { - "epoch": 0.15691697228401372, - "grad_norm": 2.034184492950697, - "learning_rate": 3.833517379387165e-06, - "loss": 0.9981, - "step": 1305 - }, - { - "epoch": 0.1570372151746528, - "grad_norm": 1.8233161343257918, - "learning_rate": 3.833206086727085e-06, - "loss": 1.1196, - "step": 1306 - }, - { - "epoch": 0.15715745806529188, - "grad_norm": 2.214302763895238, - "learning_rate": 3.8328945159710994e-06, - "loss": 0.9452, - "step": 1307 - }, - { - "epoch": 0.157277700955931, - "grad_norm": 1.7306380694974886, - "learning_rate": 3.832582667166473e-06, - "loss": 1.1115, - "step": 1308 - }, - { - "epoch": 0.15739794384657008, - "grad_norm": 1.8027606933259674, - "learning_rate": 3.8322705403605125e-06, - "loss": 1.0514, - "step": 1309 - }, - { - "epoch": 0.15751818673720916, - "grad_norm": 1.9320177842105541, - "learning_rate": 3.831958135600568e-06, - "loss": 1.0443, - "step": 1310 - }, - { - "epoch": 0.15763842962784824, - "grad_norm": 1.75012519933529, - "learning_rate": 3.831645452934032e-06, - "loss": 1.0301, - "step": 1311 - }, - { - "epoch": 0.15775867251848735, - "grad_norm": 1.5945043835392791, - "learning_rate": 3.831332492408336e-06, - "loss": 1.0357, - "step": 1312 - }, - { - "epoch": 0.15787891540912644, - "grad_norm": 1.6154830722707103, - "learning_rate": 3.831019254070957e-06, - "loss": 0.9128, - "step": 1313 - }, - { - "epoch": 0.15799915829976552, - "grad_norm": 3.2294592612000814, - "learning_rate": 3.8307057379694135e-06, - "loss": 1.1813, - "step": 1314 - }, - { - "epoch": 0.15811940119040463, - "grad_norm": 1.745751810408605, - "learning_rate": 3.830391944151264e-06, - "loss": 1.0487, - "step": 1315 - }, - { - "epoch": 0.1582396440810437, - "grad_norm": 1.6545226968711158, - "learning_rate": 3.830077872664114e-06, - "loss": 0.898, - "step": 1316 - }, - { - "epoch": 0.1583598869716828, - "grad_norm": 1.5942635550255455, - "learning_rate": 3.829763523555604e-06, - "loss": 0.955, - "step": 1317 - }, - { - "epoch": 0.15848012986232188, - "grad_norm": 4.599151134949729, - "learning_rate": 3.829448896873423e-06, - "loss": 1.023, - "step": 1318 - }, - { - "epoch": 0.158600372752961, - "grad_norm": 1.9360593546525242, - "learning_rate": 3.829133992665299e-06, - "loss": 1.0166, - "step": 1319 - }, - { - "epoch": 0.15872061564360007, - "grad_norm": 1.9595566101112238, - "learning_rate": 3.828818810979002e-06, - "loss": 1.1204, - "step": 1320 - }, - { - "epoch": 0.15884085853423915, - "grad_norm": 1.669413813886368, - "learning_rate": 3.8285033518623454e-06, - "loss": 1.0287, - "step": 1321 - }, - { - "epoch": 0.15896110142487826, - "grad_norm": 1.9610890485707042, - "learning_rate": 3.8281876153631845e-06, - "loss": 1.0474, - "step": 1322 - }, - { - "epoch": 0.15908134431551735, - "grad_norm": 2.022430758617184, - "learning_rate": 3.827871601529416e-06, - "loss": 0.8727, - "step": 1323 - }, - { - "epoch": 0.15920158720615643, - "grad_norm": 1.6752416635660827, - "learning_rate": 3.827555310408979e-06, - "loss": 1.0275, - "step": 1324 - }, - { - "epoch": 0.1593218300967955, - "grad_norm": 1.5912067840772939, - "learning_rate": 3.827238742049854e-06, - "loss": 1.0525, - "step": 1325 - }, - { - "epoch": 0.15944207298743462, - "grad_norm": 1.9203336784368221, - "learning_rate": 3.826921896500066e-06, - "loss": 0.7563, - "step": 1326 - }, - { - "epoch": 0.1595623158780737, - "grad_norm": 1.890120762191269, - "learning_rate": 3.826604773807678e-06, - "loss": 1.0173, - "step": 1327 - }, - { - "epoch": 0.1596825587687128, - "grad_norm": 2.384067992597706, - "learning_rate": 3.826287374020798e-06, - "loss": 0.9591, - "step": 1328 - }, - { - "epoch": 0.1598028016593519, - "grad_norm": 1.878151216498531, - "learning_rate": 3.825969697187575e-06, - "loss": 1.0554, - "step": 1329 - }, - { - "epoch": 0.15992304454999098, - "grad_norm": 1.8713655214702203, - "learning_rate": 3.8256517433562015e-06, - "loss": 0.928, - "step": 1330 - }, - { - "epoch": 0.16004328744063007, - "grad_norm": 2.218295198490608, - "learning_rate": 3.82533351257491e-06, - "loss": 1.1502, - "step": 1331 - }, - { - "epoch": 0.16016353033126918, - "grad_norm": 1.6180521547048008, - "learning_rate": 3.825015004891975e-06, - "loss": 1.1183, - "step": 1332 - }, - { - "epoch": 0.16028377322190826, - "grad_norm": 4.7216226284362595, - "learning_rate": 3.824696220355716e-06, - "loss": 0.9929, - "step": 1333 - }, - { - "epoch": 0.16040401611254734, - "grad_norm": 1.5584846018574212, - "learning_rate": 3.824377159014491e-06, - "loss": 1.0256, - "step": 1334 - }, - { - "epoch": 0.16052425900318643, - "grad_norm": 1.8034855526735978, - "learning_rate": 3.824057820916702e-06, - "loss": 1.0819, - "step": 1335 - }, - { - "epoch": 0.16064450189382554, - "grad_norm": 2.0702955713119793, - "learning_rate": 3.8237382061107904e-06, - "loss": 0.9482, - "step": 1336 - }, - { - "epoch": 0.16076474478446462, - "grad_norm": 1.7398452409344758, - "learning_rate": 3.823418314645243e-06, - "loss": 1.0206, - "step": 1337 - }, - { - "epoch": 0.1608849876751037, - "grad_norm": 2.878829396023017, - "learning_rate": 3.823098146568588e-06, - "loss": 0.9895, - "step": 1338 - }, - { - "epoch": 0.1610052305657428, - "grad_norm": 1.6182200503289474, - "learning_rate": 3.822777701929394e-06, - "loss": 0.9529, - "step": 1339 - }, - { - "epoch": 0.1611254734563819, - "grad_norm": 1.608736670853083, - "learning_rate": 3.8224569807762714e-06, - "loss": 0.9717, - "step": 1340 - }, - { - "epoch": 0.16124571634702098, - "grad_norm": 2.7952882325348845, - "learning_rate": 3.822135983157873e-06, - "loss": 1.011, - "step": 1341 - }, - { - "epoch": 0.16136595923766006, - "grad_norm": 2.2632926774715822, - "learning_rate": 3.821814709122896e-06, - "loss": 1.0644, - "step": 1342 - }, - { - "epoch": 0.16148620212829917, - "grad_norm": 2.50863852518872, - "learning_rate": 3.821493158720076e-06, - "loss": 1.0829, - "step": 1343 - }, - { - "epoch": 0.16160644501893826, - "grad_norm": 2.656572569166252, - "learning_rate": 3.821171331998191e-06, - "loss": 0.9658, - "step": 1344 - }, - { - "epoch": 0.16172668790957734, - "grad_norm": 0.8001190052329358, - "learning_rate": 3.820849229006064e-06, - "loss": 0.8232, - "step": 1345 - }, - { - "epoch": 0.16184693080021645, - "grad_norm": 1.9410372408321386, - "learning_rate": 3.8205268497925564e-06, - "loss": 0.9432, - "step": 1346 - }, - { - "epoch": 0.16196717369085553, - "grad_norm": 1.9614931816237333, - "learning_rate": 3.8202041944065725e-06, - "loss": 1.0037, - "step": 1347 - }, - { - "epoch": 0.16208741658149461, - "grad_norm": 2.058735272575468, - "learning_rate": 3.819881262897061e-06, - "loss": 0.9689, - "step": 1348 - }, - { - "epoch": 0.1622076594721337, - "grad_norm": 6.659485223252673, - "learning_rate": 3.819558055313008e-06, - "loss": 0.9683, - "step": 1349 - }, - { - "epoch": 0.1623279023627728, - "grad_norm": 1.690931222650554, - "learning_rate": 3.819234571703444e-06, - "loss": 0.999, - "step": 1350 - }, - { - "epoch": 0.1624481452534119, - "grad_norm": 1.9174966328179788, - "learning_rate": 3.8189108121174435e-06, - "loss": 1.0846, - "step": 1351 - }, - { - "epoch": 0.16256838814405097, - "grad_norm": 1.5499459041876924, - "learning_rate": 3.818586776604118e-06, - "loss": 1.065, - "step": 1352 - }, - { - "epoch": 0.16268863103469008, - "grad_norm": 1.8909055013097358, - "learning_rate": 3.818262465212625e-06, - "loss": 0.8423, - "step": 1353 - }, - { - "epoch": 0.16280887392532917, - "grad_norm": 2.887215247707846, - "learning_rate": 3.817937877992161e-06, - "loss": 1.0038, - "step": 1354 - }, - { - "epoch": 0.16292911681596825, - "grad_norm": 2.4510373507968457, - "learning_rate": 3.817613014991967e-06, - "loss": 1.0782, - "step": 1355 - }, - { - "epoch": 0.16304935970660733, - "grad_norm": 1.7519614943492607, - "learning_rate": 3.817287876261323e-06, - "loss": 0.9889, - "step": 1356 - }, - { - "epoch": 0.16316960259724644, - "grad_norm": 1.7586323500421417, - "learning_rate": 3.816962461849553e-06, - "loss": 1.0323, - "step": 1357 - }, - { - "epoch": 0.16328984548788553, - "grad_norm": 1.7375032548393665, - "learning_rate": 3.8166367718060235e-06, - "loss": 1.0665, - "step": 1358 - }, - { - "epoch": 0.1634100883785246, - "grad_norm": 2.598926755500536, - "learning_rate": 3.816310806180139e-06, - "loss": 0.9886, - "step": 1359 - }, - { - "epoch": 0.16353033126916372, - "grad_norm": 1.563557838934057, - "learning_rate": 3.81598456502135e-06, - "loss": 1.037, - "step": 1360 - }, - { - "epoch": 0.1636505741598028, - "grad_norm": 1.9294851096577847, - "learning_rate": 3.8156580483791455e-06, - "loss": 1.0996, - "step": 1361 - }, - { - "epoch": 0.16377081705044189, - "grad_norm": 2.241058065149873, - "learning_rate": 3.815331256303059e-06, - "loss": 0.994, - "step": 1362 - }, - { - "epoch": 0.163891059941081, - "grad_norm": 2.095984106789822, - "learning_rate": 3.815004188842665e-06, - "loss": 1.0016, - "step": 1363 - }, - { - "epoch": 0.16401130283172008, - "grad_norm": 1.5798318527176645, - "learning_rate": 3.814676846047578e-06, - "loss": 1.0254, - "step": 1364 - }, - { - "epoch": 0.16413154572235916, - "grad_norm": 1.8746997592866657, - "learning_rate": 3.8143492279674565e-06, - "loss": 0.9283, - "step": 1365 - }, - { - "epoch": 0.16425178861299825, - "grad_norm": 0.9289134032517034, - "learning_rate": 3.8140213346519997e-06, - "loss": 0.8888, - "step": 1366 - }, - { - "epoch": 0.16437203150363736, - "grad_norm": 2.9038207042820647, - "learning_rate": 3.813693166150948e-06, - "loss": 0.9981, - "step": 1367 - }, - { - "epoch": 0.16449227439427644, - "grad_norm": 2.6765067126444517, - "learning_rate": 3.813364722514086e-06, - "loss": 1.0922, - "step": 1368 - }, - { - "epoch": 0.16461251728491552, - "grad_norm": 2.890998445049278, - "learning_rate": 3.8130360037912368e-06, - "loss": 1.0368, - "step": 1369 - }, - { - "epoch": 0.16473276017555463, - "grad_norm": 3.1538675210883813, - "learning_rate": 3.812707010032268e-06, - "loss": 1.045, - "step": 1370 - }, - { - "epoch": 0.16485300306619372, - "grad_norm": 3.4491127144746776, - "learning_rate": 3.8123777412870863e-06, - "loss": 1.0215, - "step": 1371 - }, - { - "epoch": 0.1649732459568328, - "grad_norm": 1.7221820896695674, - "learning_rate": 3.812048197605643e-06, - "loss": 1.011, - "step": 1372 - }, - { - "epoch": 0.16509348884747188, - "grad_norm": 1.7413161626269102, - "learning_rate": 3.8117183790379277e-06, - "loss": 1.0373, - "step": 1373 - }, - { - "epoch": 0.165213731738111, - "grad_norm": 2.798195468610833, - "learning_rate": 3.811388285633976e-06, - "loss": 1.1761, - "step": 1374 - }, - { - "epoch": 0.16533397462875007, - "grad_norm": 2.362785047662508, - "learning_rate": 3.811057917443861e-06, - "loss": 0.8489, - "step": 1375 - }, - { - "epoch": 0.16545421751938916, - "grad_norm": 0.8659698598122989, - "learning_rate": 3.8107272745177e-06, - "loss": 0.9368, - "step": 1376 - }, - { - "epoch": 0.16557446041002827, - "grad_norm": 1.6459801604371957, - "learning_rate": 3.8103963569056513e-06, - "loss": 1.0253, - "step": 1377 - }, - { - "epoch": 0.16569470330066735, - "grad_norm": 1.4007249590003747, - "learning_rate": 3.8100651646579146e-06, - "loss": 1.1122, - "step": 1378 - }, - { - "epoch": 0.16581494619130643, - "grad_norm": 2.055609318600043, - "learning_rate": 3.8097336978247317e-06, - "loss": 1.1569, - "step": 1379 - }, - { - "epoch": 0.16593518908194552, - "grad_norm": 2.116345452793925, - "learning_rate": 3.8094019564563854e-06, - "loss": 1.1236, - "step": 1380 - }, - { - "epoch": 0.16605543197258463, - "grad_norm": 2.2908395372706623, - "learning_rate": 3.809069940603201e-06, - "loss": 0.995, - "step": 1381 - }, - { - "epoch": 0.1661756748632237, - "grad_norm": 7.4567273788924995, - "learning_rate": 3.8087376503155452e-06, - "loss": 1.0014, - "step": 1382 - }, - { - "epoch": 0.1662959177538628, - "grad_norm": 0.9733270071152854, - "learning_rate": 3.808405085643826e-06, - "loss": 0.839, - "step": 1383 - }, - { - "epoch": 0.1664161606445019, - "grad_norm": 1.9940414478773156, - "learning_rate": 3.8080722466384925e-06, - "loss": 1.1259, - "step": 1384 - }, - { - "epoch": 0.166536403535141, - "grad_norm": 2.0142947990034825, - "learning_rate": 3.8077391333500376e-06, - "loss": 0.9281, - "step": 1385 - }, - { - "epoch": 0.16665664642578007, - "grad_norm": 1.4374012291188185, - "learning_rate": 3.8074057458289934e-06, - "loss": 0.9931, - "step": 1386 - }, - { - "epoch": 0.16677688931641918, - "grad_norm": 2.263465220223545, - "learning_rate": 3.807072084125934e-06, - "loss": 1.045, - "step": 1387 - }, - { - "epoch": 0.16689713220705826, - "grad_norm": 2.1046521473430992, - "learning_rate": 3.806738148291477e-06, - "loss": 1.0319, - "step": 1388 - }, - { - "epoch": 0.16701737509769735, - "grad_norm": 2.2194539417524632, - "learning_rate": 3.8064039383762793e-06, - "loss": 0.9451, - "step": 1389 - }, - { - "epoch": 0.16713761798833643, - "grad_norm": 2.038874241114822, - "learning_rate": 3.8060694544310396e-06, - "loss": 0.9969, - "step": 1390 - }, - { - "epoch": 0.16725786087897554, - "grad_norm": 1.9198190500504269, - "learning_rate": 3.8057346965065006e-06, - "loss": 1.0144, - "step": 1391 - }, - { - "epoch": 0.16737810376961462, - "grad_norm": 2.0913221032389653, - "learning_rate": 3.805399664653443e-06, - "loss": 1.081, - "step": 1392 - }, - { - "epoch": 0.1674983466602537, - "grad_norm": 2.739205850526542, - "learning_rate": 3.805064358922692e-06, - "loss": 0.9887, - "step": 1393 - }, - { - "epoch": 0.16761858955089282, - "grad_norm": 1.5718876923490426, - "learning_rate": 3.8047287793651136e-06, - "loss": 1.0367, - "step": 1394 - }, - { - "epoch": 0.1677388324415319, - "grad_norm": 1.9387805152719881, - "learning_rate": 3.8043929260316137e-06, - "loss": 1.1051, - "step": 1395 - }, - { - "epoch": 0.16785907533217098, - "grad_norm": 2.469210714495988, - "learning_rate": 3.8040567989731417e-06, - "loss": 1.066, - "step": 1396 - }, - { - "epoch": 0.16797931822281006, - "grad_norm": 1.7880566030499283, - "learning_rate": 3.8037203982406876e-06, - "loss": 1.0289, - "step": 1397 - }, - { - "epoch": 0.16809956111344918, - "grad_norm": 6.870191800079023, - "learning_rate": 3.8033837238852835e-06, - "loss": 0.9617, - "step": 1398 - }, - { - "epoch": 0.16821980400408826, - "grad_norm": 1.6129104151104088, - "learning_rate": 3.8030467759580017e-06, - "loss": 0.9244, - "step": 1399 - }, - { - "epoch": 0.16834004689472734, - "grad_norm": 1.8173595920987702, - "learning_rate": 3.802709554509958e-06, - "loss": 1.1034, - "step": 1400 - }, - { - "epoch": 0.16846028978536645, - "grad_norm": 5.3183755135405155, - "learning_rate": 3.8023720595923083e-06, - "loss": 1.015, - "step": 1401 - }, - { - "epoch": 0.16858053267600553, - "grad_norm": 1.886900014443003, - "learning_rate": 3.80203429125625e-06, - "loss": 1.1067, - "step": 1402 - }, - { - "epoch": 0.16870077556664462, - "grad_norm": 1.676321231657018, - "learning_rate": 3.8016962495530225e-06, - "loss": 0.9324, - "step": 1403 - }, - { - "epoch": 0.1688210184572837, - "grad_norm": 2.1379247187679216, - "learning_rate": 3.8013579345339063e-06, - "loss": 0.9901, - "step": 1404 - }, - { - "epoch": 0.1689412613479228, - "grad_norm": 8.959647527301314, - "learning_rate": 3.801019346250224e-06, - "loss": 0.9267, - "step": 1405 - }, - { - "epoch": 0.1690615042385619, - "grad_norm": 2.3998969294350165, - "learning_rate": 3.8006804847533395e-06, - "loss": 1.0661, - "step": 1406 - }, - { - "epoch": 0.16918174712920098, - "grad_norm": 1.6994980155078045, - "learning_rate": 3.8003413500946556e-06, - "loss": 1.0752, - "step": 1407 - }, - { - "epoch": 0.1693019900198401, - "grad_norm": 2.1666817909104106, - "learning_rate": 3.8000019423256216e-06, - "loss": 1.0572, - "step": 1408 - }, - { - "epoch": 0.16942223291047917, - "grad_norm": 1.5326408764556887, - "learning_rate": 3.7996622614977234e-06, - "loss": 1.1051, - "step": 1409 - }, - { - "epoch": 0.16954247580111825, - "grad_norm": 2.1767751180475168, - "learning_rate": 3.799322307662492e-06, - "loss": 1.0237, - "step": 1410 - }, - { - "epoch": 0.16966271869175734, - "grad_norm": 2.0135519323138293, - "learning_rate": 3.798982080871496e-06, - "loss": 1.0727, - "step": 1411 - }, - { - "epoch": 0.16978296158239645, - "grad_norm": 1.7345787134902402, - "learning_rate": 3.798641581176349e-06, - "loss": 0.9131, - "step": 1412 - }, - { - "epoch": 0.16990320447303553, - "grad_norm": 3.8316319186601437, - "learning_rate": 3.7983008086287044e-06, - "loss": 0.9751, - "step": 1413 - }, - { - "epoch": 0.1700234473636746, - "grad_norm": 2.5309957682718496, - "learning_rate": 3.797959763280257e-06, - "loss": 1.0267, - "step": 1414 - }, - { - "epoch": 0.17014369025431372, - "grad_norm": 1.9819158500183491, - "learning_rate": 3.797618445182743e-06, - "loss": 1.0221, - "step": 1415 - }, - { - "epoch": 0.1702639331449528, - "grad_norm": 1.8798167348370656, - "learning_rate": 3.79727685438794e-06, - "loss": 1.0761, - "step": 1416 - }, - { - "epoch": 0.1703841760355919, - "grad_norm": 0.8788149637923374, - "learning_rate": 3.796934990947667e-06, - "loss": 0.8674, - "step": 1417 - }, - { - "epoch": 0.170504418926231, - "grad_norm": 0.9640543561742929, - "learning_rate": 3.7965928549137854e-06, - "loss": 0.8873, - "step": 1418 - }, - { - "epoch": 0.17062466181687008, - "grad_norm": 2.429733423124086, - "learning_rate": 3.7962504463381953e-06, - "loss": 0.9936, - "step": 1419 - }, - { - "epoch": 0.17074490470750917, - "grad_norm": 1.6421972824371527, - "learning_rate": 3.7959077652728412e-06, - "loss": 1.0123, - "step": 1420 - }, - { - "epoch": 0.17086514759814825, - "grad_norm": 2.130561283285683, - "learning_rate": 3.795564811769707e-06, - "loss": 0.9945, - "step": 1421 - }, - { - "epoch": 0.17098539048878736, - "grad_norm": 2.4514634646658124, - "learning_rate": 3.795221585880818e-06, - "loss": 1.0128, - "step": 1422 - }, - { - "epoch": 0.17110563337942644, - "grad_norm": 1.6440645765397677, - "learning_rate": 3.794878087658242e-06, - "loss": 1.1396, - "step": 1423 - }, - { - "epoch": 0.17122587627006552, - "grad_norm": 1.830258160084569, - "learning_rate": 3.7945343171540873e-06, - "loss": 1.0126, - "step": 1424 - }, - { - "epoch": 0.17134611916070464, - "grad_norm": 1.8470949768117062, - "learning_rate": 3.7941902744205033e-06, - "loss": 1.0176, - "step": 1425 - }, - { - "epoch": 0.17146636205134372, - "grad_norm": 1.8483151315538717, - "learning_rate": 3.7938459595096817e-06, - "loss": 1.0681, - "step": 1426 - }, - { - "epoch": 0.1715866049419828, - "grad_norm": 1.8256773253519696, - "learning_rate": 3.7935013724738545e-06, - "loss": 1.0864, - "step": 1427 - }, - { - "epoch": 0.17170684783262188, - "grad_norm": 1.4974566991847302, - "learning_rate": 3.7931565133652945e-06, - "loss": 1.0095, - "step": 1428 - }, - { - "epoch": 0.171827090723261, - "grad_norm": 2.1500109650088306, - "learning_rate": 3.792811382236317e-06, - "loss": 0.9078, - "step": 1429 - }, - { - "epoch": 0.17194733361390008, - "grad_norm": 1.7288399429810313, - "learning_rate": 3.792465979139279e-06, - "loss": 1.0083, - "step": 1430 - }, - { - "epoch": 0.17206757650453916, - "grad_norm": 1.0341648752738526, - "learning_rate": 3.792120304126576e-06, - "loss": 0.9689, - "step": 1431 - }, - { - "epoch": 0.17218781939517827, - "grad_norm": 1.8553631883006352, - "learning_rate": 3.791774357250649e-06, - "loss": 1.0687, - "step": 1432 - }, - { - "epoch": 0.17230806228581735, - "grad_norm": 2.304268284833867, - "learning_rate": 3.7914281385639757e-06, - "loss": 1.02, - "step": 1433 - }, - { - "epoch": 0.17242830517645644, - "grad_norm": 1.9434750623150727, - "learning_rate": 3.7910816481190784e-06, - "loss": 1.0193, - "step": 1434 - }, - { - "epoch": 0.17254854806709552, - "grad_norm": 3.3463144896217014, - "learning_rate": 3.7907348859685193e-06, - "loss": 0.9805, - "step": 1435 - }, - { - "epoch": 0.17266879095773463, - "grad_norm": 2.015910047144036, - "learning_rate": 3.790387852164902e-06, - "loss": 1.0334, - "step": 1436 - }, - { - "epoch": 0.1727890338483737, - "grad_norm": 1.7521625508450236, - "learning_rate": 3.7900405467608707e-06, - "loss": 0.9935, - "step": 1437 - }, - { - "epoch": 0.1729092767390128, - "grad_norm": 3.121692738448756, - "learning_rate": 3.7896929698091114e-06, - "loss": 1.0122, - "step": 1438 - }, - { - "epoch": 0.1730295196296519, - "grad_norm": 2.3961625135646276, - "learning_rate": 3.7893451213623518e-06, - "loss": 0.924, - "step": 1439 - }, - { - "epoch": 0.173149762520291, - "grad_norm": 1.8008651217376843, - "learning_rate": 3.7889970014733606e-06, - "loss": 1.0492, - "step": 1440 - }, - { - "epoch": 0.17327000541093007, - "grad_norm": 1.5686642931136496, - "learning_rate": 3.7886486101949463e-06, - "loss": 0.9999, - "step": 1441 - }, - { - "epoch": 0.17339024830156918, - "grad_norm": 2.6630957854717985, - "learning_rate": 3.7882999475799594e-06, - "loss": 1.1004, - "step": 1442 - }, - { - "epoch": 0.17351049119220827, - "grad_norm": 1.6478286992967297, - "learning_rate": 3.787951013681293e-06, - "loss": 1.0378, - "step": 1443 - }, - { - "epoch": 0.17363073408284735, - "grad_norm": 2.869635179041234, - "learning_rate": 3.787601808551879e-06, - "loss": 1.0114, - "step": 1444 - }, - { - "epoch": 0.17375097697348643, - "grad_norm": 2.098463418194079, - "learning_rate": 3.7872523322446926e-06, - "loss": 1.0657, - "step": 1445 - }, - { - "epoch": 0.17387121986412554, - "grad_norm": 3.660153774720681, - "learning_rate": 3.7869025848127478e-06, - "loss": 0.8296, - "step": 1446 - }, - { - "epoch": 0.17399146275476463, - "grad_norm": 3.5854217889820474, - "learning_rate": 3.786552566309102e-06, - "loss": 1.0355, - "step": 1447 - }, - { - "epoch": 0.1741117056454037, - "grad_norm": 2.2389662727926756, - "learning_rate": 3.7862022767868517e-06, - "loss": 1.0955, - "step": 1448 - }, - { - "epoch": 0.17423194853604282, - "grad_norm": 2.1058453451321504, - "learning_rate": 3.7858517162991367e-06, - "loss": 1.0764, - "step": 1449 - }, - { - "epoch": 0.1743521914266819, - "grad_norm": 2.2116570801324733, - "learning_rate": 3.7855008848991363e-06, - "loss": 0.8386, - "step": 1450 - }, - { - "epoch": 0.17447243431732098, - "grad_norm": 10.451512963680448, - "learning_rate": 3.7851497826400714e-06, - "loss": 1.0066, - "step": 1451 - }, - { - "epoch": 0.17459267720796007, - "grad_norm": 2.143115856810852, - "learning_rate": 3.7847984095752034e-06, - "loss": 0.9826, - "step": 1452 - }, - { - "epoch": 0.17471292009859918, - "grad_norm": 2.7470702257098725, - "learning_rate": 3.784446765757836e-06, - "loss": 1.0339, - "step": 1453 - }, - { - "epoch": 0.17483316298923826, - "grad_norm": 1.9717887557283607, - "learning_rate": 3.7840948512413133e-06, - "loss": 1.0038, - "step": 1454 - }, - { - "epoch": 0.17495340587987734, - "grad_norm": 1.6717459370361354, - "learning_rate": 3.7837426660790196e-06, - "loss": 1.0156, - "step": 1455 - }, - { - "epoch": 0.17507364877051645, - "grad_norm": 3.0711973046209735, - "learning_rate": 3.783390210324382e-06, - "loss": 1.0457, - "step": 1456 - }, - { - "epoch": 0.17519389166115554, - "grad_norm": 7.93438323096757, - "learning_rate": 3.7830374840308676e-06, - "loss": 0.9585, - "step": 1457 - }, - { - "epoch": 0.17531413455179462, - "grad_norm": 7.374504202001773, - "learning_rate": 3.7826844872519842e-06, - "loss": 1.069, - "step": 1458 - }, - { - "epoch": 0.1754343774424337, - "grad_norm": 1.9708011175618179, - "learning_rate": 3.782331220041282e-06, - "loss": 0.9568, - "step": 1459 - }, - { - "epoch": 0.17555462033307281, - "grad_norm": 1.8495376395762673, - "learning_rate": 3.7819776824523504e-06, - "loss": 1.0546, - "step": 1460 - }, - { - "epoch": 0.1756748632237119, - "grad_norm": 3.4596123838776633, - "learning_rate": 3.7816238745388213e-06, - "loss": 1.0668, - "step": 1461 - }, - { - "epoch": 0.17579510611435098, - "grad_norm": 1.749751141428558, - "learning_rate": 3.781269796354367e-06, - "loss": 1.1024, - "step": 1462 - }, - { - "epoch": 0.1759153490049901, - "grad_norm": 1.8317106897804325, - "learning_rate": 3.7809154479527006e-06, - "loss": 1.1009, - "step": 1463 - }, - { - "epoch": 0.17603559189562917, - "grad_norm": 1.9989139469129444, - "learning_rate": 3.780560829387577e-06, - "loss": 1.0656, - "step": 1464 - }, - { - "epoch": 0.17615583478626826, - "grad_norm": 0.8746465241489333, - "learning_rate": 3.7802059407127915e-06, - "loss": 0.8465, - "step": 1465 - }, - { - "epoch": 0.17627607767690734, - "grad_norm": 2.599400627301575, - "learning_rate": 3.7798507819821797e-06, - "loss": 1.0934, - "step": 1466 - }, - { - "epoch": 0.17639632056754645, - "grad_norm": 3.030717154006677, - "learning_rate": 3.7794953532496197e-06, - "loss": 1.0196, - "step": 1467 - }, - { - "epoch": 0.17651656345818553, - "grad_norm": 1.7187435496816108, - "learning_rate": 3.7791396545690295e-06, - "loss": 0.8703, - "step": 1468 - }, - { - "epoch": 0.17663680634882462, - "grad_norm": 1.9710356750278941, - "learning_rate": 3.7787836859943685e-06, - "loss": 1.0386, - "step": 1469 - }, - { - "epoch": 0.17675704923946373, - "grad_norm": 2.517251572838327, - "learning_rate": 3.7784274475796363e-06, - "loss": 1.0265, - "step": 1470 - }, - { - "epoch": 0.1768772921301028, - "grad_norm": 2.1952327835977052, - "learning_rate": 3.7780709393788745e-06, - "loss": 1.0039, - "step": 1471 - }, - { - "epoch": 0.1769975350207419, - "grad_norm": 1.7498156586694928, - "learning_rate": 3.777714161446165e-06, - "loss": 0.9838, - "step": 1472 - }, - { - "epoch": 0.177117777911381, - "grad_norm": 2.263845547324263, - "learning_rate": 3.7773571138356304e-06, - "loss": 0.9161, - "step": 1473 - }, - { - "epoch": 0.17723802080202009, - "grad_norm": 2.0926690798759013, - "learning_rate": 3.776999796601435e-06, - "loss": 1.1191, - "step": 1474 - }, - { - "epoch": 0.17735826369265917, - "grad_norm": 1.7569408374828093, - "learning_rate": 3.776642209797783e-06, - "loss": 0.947, - "step": 1475 - }, - { - "epoch": 0.17747850658329825, - "grad_norm": 1.9412244869012243, - "learning_rate": 3.7762843534789205e-06, - "loss": 1.0011, - "step": 1476 - }, - { - "epoch": 0.17759874947393736, - "grad_norm": 2.1252575886501934, - "learning_rate": 3.7759262276991343e-06, - "loss": 1.1092, - "step": 1477 - }, - { - "epoch": 0.17771899236457644, - "grad_norm": 2.085978189707306, - "learning_rate": 3.7755678325127506e-06, - "loss": 1.0349, - "step": 1478 - }, - { - "epoch": 0.17783923525521553, - "grad_norm": 1.9057313807675231, - "learning_rate": 3.7752091679741393e-06, - "loss": 0.9842, - "step": 1479 - }, - { - "epoch": 0.17795947814585464, - "grad_norm": 3.060256447611292, - "learning_rate": 3.774850234137708e-06, - "loss": 0.998, - "step": 1480 - }, - { - "epoch": 0.17807972103649372, - "grad_norm": 2.2477119255927036, - "learning_rate": 3.7744910310579076e-06, - "loss": 1.0548, - "step": 1481 - }, - { - "epoch": 0.1781999639271328, - "grad_norm": 4.085984647343903, - "learning_rate": 3.774131558789229e-06, - "loss": 1.0775, - "step": 1482 - }, - { - "epoch": 0.1783202068177719, - "grad_norm": 2.3372009330819465, - "learning_rate": 3.773771817386203e-06, - "loss": 0.9221, - "step": 1483 - }, - { - "epoch": 0.178440449708411, - "grad_norm": 1.7963995262989685, - "learning_rate": 3.773411806903403e-06, - "loss": 1.0265, - "step": 1484 - }, - { - "epoch": 0.17856069259905008, - "grad_norm": 2.0752154181011324, - "learning_rate": 3.7730515273954415e-06, - "loss": 1.1694, - "step": 1485 - }, - { - "epoch": 0.17868093548968916, - "grad_norm": 1.762026605541606, - "learning_rate": 3.772690978916973e-06, - "loss": 1.0653, - "step": 1486 - }, - { - "epoch": 0.17880117838032827, - "grad_norm": 2.1295471906446726, - "learning_rate": 3.772330161522693e-06, - "loss": 1.1054, - "step": 1487 - }, - { - "epoch": 0.17892142127096736, - "grad_norm": 1.8573529444277845, - "learning_rate": 3.7719690752673365e-06, - "loss": 1.032, - "step": 1488 - }, - { - "epoch": 0.17904166416160644, - "grad_norm": 1.8773179032240717, - "learning_rate": 3.7716077202056796e-06, - "loss": 1.0175, - "step": 1489 - }, - { - "epoch": 0.17916190705224552, - "grad_norm": 1.8824663912545374, - "learning_rate": 3.7712460963925404e-06, - "loss": 1.1552, - "step": 1490 - }, - { - "epoch": 0.17928214994288463, - "grad_norm": 1.7118009713786217, - "learning_rate": 3.7708842038827775e-06, - "loss": 0.9782, - "step": 1491 - }, - { - "epoch": 0.17940239283352372, - "grad_norm": 1.4285036615919613, - "learning_rate": 3.770522042731288e-06, - "loss": 1.0796, - "step": 1492 - }, - { - "epoch": 0.1795226357241628, - "grad_norm": 2.042521148721563, - "learning_rate": 3.7701596129930122e-06, - "loss": 1.1119, - "step": 1493 - }, - { - "epoch": 0.1796428786148019, - "grad_norm": 1.835704837422762, - "learning_rate": 3.7697969147229315e-06, - "loss": 0.9616, - "step": 1494 - }, - { - "epoch": 0.179763121505441, - "grad_norm": 2.0381584648091864, - "learning_rate": 3.7694339479760647e-06, - "loss": 1.0777, - "step": 1495 - }, - { - "epoch": 0.17988336439608008, - "grad_norm": 0.7808729406249967, - "learning_rate": 3.769070712807476e-06, - "loss": 0.8379, - "step": 1496 - }, - { - "epoch": 0.18000360728671919, - "grad_norm": 1.5636497082402938, - "learning_rate": 3.768707209272266e-06, - "loss": 1.0177, - "step": 1497 - }, - { - "epoch": 0.18012385017735827, - "grad_norm": 2.184640268528152, - "learning_rate": 3.768343437425579e-06, - "loss": 0.9859, - "step": 1498 - }, - { - "epoch": 0.18024409306799735, - "grad_norm": 2.0565933265572567, - "learning_rate": 3.7679793973225987e-06, - "loss": 1.091, - "step": 1499 - }, - { - "epoch": 0.18036433595863643, - "grad_norm": 0.8705096488934978, - "learning_rate": 3.767615089018549e-06, - "loss": 0.8714, - "step": 1500 - }, - { - "epoch": 0.18048457884927555, - "grad_norm": 3.00967753663274, - "learning_rate": 3.7672505125686966e-06, - "loss": 1.0841, - "step": 1501 - }, - { - "epoch": 0.18060482173991463, - "grad_norm": 2.42142231309856, - "learning_rate": 3.7668856680283455e-06, - "loss": 1.083, - "step": 1502 - }, - { - "epoch": 0.1807250646305537, - "grad_norm": 2.0379784285626936, - "learning_rate": 3.7665205554528437e-06, - "loss": 1.0521, - "step": 1503 - }, - { - "epoch": 0.18084530752119282, - "grad_norm": 1.6920062756207828, - "learning_rate": 3.7661551748975782e-06, - "loss": 0.9827, - "step": 1504 - }, - { - "epoch": 0.1809655504118319, - "grad_norm": 0.8132444419190114, - "learning_rate": 3.7657895264179772e-06, - "loss": 0.8486, - "step": 1505 - }, - { - "epoch": 0.181085793302471, - "grad_norm": 1.9263195964461837, - "learning_rate": 3.765423610069509e-06, - "loss": 0.9774, - "step": 1506 - }, - { - "epoch": 0.18120603619311007, - "grad_norm": 1.693978976154138, - "learning_rate": 3.765057425907683e-06, - "loss": 0.9582, - "step": 1507 - }, - { - "epoch": 0.18132627908374918, - "grad_norm": 1.936065019374821, - "learning_rate": 3.764690973988048e-06, - "loss": 1.0137, - "step": 1508 - }, - { - "epoch": 0.18144652197438826, - "grad_norm": 1.7727250335919305, - "learning_rate": 3.7643242543661967e-06, - "loss": 0.983, - "step": 1509 - }, - { - "epoch": 0.18156676486502735, - "grad_norm": 0.8420654010200346, - "learning_rate": 3.7639572670977573e-06, - "loss": 0.8734, - "step": 1510 - }, - { - "epoch": 0.18168700775566646, - "grad_norm": 1.5120771116393432, - "learning_rate": 3.7635900122384042e-06, - "loss": 0.9965, - "step": 1511 - }, - { - "epoch": 0.18180725064630554, - "grad_norm": 2.2182270562068673, - "learning_rate": 3.7632224898438477e-06, - "loss": 1.0996, - "step": 1512 - }, - { - "epoch": 0.18192749353694462, - "grad_norm": 8.06819271228558, - "learning_rate": 3.762854699969842e-06, - "loss": 1.0189, - "step": 1513 - }, - { - "epoch": 0.1820477364275837, - "grad_norm": 1.7907704692023794, - "learning_rate": 3.762486642672179e-06, - "loss": 0.9556, - "step": 1514 - }, - { - "epoch": 0.18216797931822282, - "grad_norm": 2.036040657574316, - "learning_rate": 3.7621183180066946e-06, - "loss": 1.1011, - "step": 1515 - }, - { - "epoch": 0.1822882222088619, - "grad_norm": 1.633105019097887, - "learning_rate": 3.7617497260292625e-06, - "loss": 0.9686, - "step": 1516 - }, - { - "epoch": 0.18240846509950098, - "grad_norm": 2.333100398107927, - "learning_rate": 3.7613808667957967e-06, - "loss": 1.0244, - "step": 1517 - }, - { - "epoch": 0.1825287079901401, - "grad_norm": 3.25820678959526, - "learning_rate": 3.7610117403622547e-06, - "loss": 1.1443, - "step": 1518 - }, - { - "epoch": 0.18264895088077918, - "grad_norm": 1.5678237247350357, - "learning_rate": 3.7606423467846313e-06, - "loss": 1.131, - "step": 1519 - }, - { - "epoch": 0.18276919377141826, - "grad_norm": 1.3397626490834902, - "learning_rate": 3.760272686118964e-06, - "loss": 1.0226, - "step": 1520 - }, - { - "epoch": 0.18288943666205737, - "grad_norm": 2.4243976867511177, - "learning_rate": 3.7599027584213297e-06, - "loss": 1.1542, - "step": 1521 - }, - { - "epoch": 0.18300967955269645, - "grad_norm": 2.4428717281349117, - "learning_rate": 3.7595325637478465e-06, - "loss": 1.0098, - "step": 1522 - }, - { - "epoch": 0.18312992244333554, - "grad_norm": 1.6734383662316852, - "learning_rate": 3.7591621021546723e-06, - "loss": 1.048, - "step": 1523 - }, - { - "epoch": 0.18325016533397462, - "grad_norm": 1.5939992821905993, - "learning_rate": 3.7587913736980062e-06, - "loss": 1.0501, - "step": 1524 - }, - { - "epoch": 0.18337040822461373, - "grad_norm": 1.5673428063916977, - "learning_rate": 3.7584203784340865e-06, - "loss": 1.0735, - "step": 1525 - }, - { - "epoch": 0.1834906511152528, - "grad_norm": 2.1389324987460205, - "learning_rate": 3.7580491164191938e-06, - "loss": 1.0835, - "step": 1526 - }, - { - "epoch": 0.1836108940058919, - "grad_norm": 0.7592387198777448, - "learning_rate": 3.757677587709648e-06, - "loss": 0.865, - "step": 1527 - }, - { - "epoch": 0.183731136896531, - "grad_norm": 2.5456622460082863, - "learning_rate": 3.7573057923618095e-06, - "loss": 0.9904, - "step": 1528 - }, - { - "epoch": 0.1838513797871701, - "grad_norm": 1.9589714133536487, - "learning_rate": 3.7569337304320793e-06, - "loss": 0.9714, - "step": 1529 - }, - { - "epoch": 0.18397162267780917, - "grad_norm": 0.830539530252536, - "learning_rate": 3.756561401976899e-06, - "loss": 0.8906, - "step": 1530 - }, - { - "epoch": 0.18409186556844825, - "grad_norm": 1.8406808167885091, - "learning_rate": 3.7561888070527514e-06, - "loss": 1.0521, - "step": 1531 - }, - { - "epoch": 0.18421210845908736, - "grad_norm": 2.1941625223532353, - "learning_rate": 3.7558159457161577e-06, - "loss": 1.0247, - "step": 1532 - }, - { - "epoch": 0.18433235134972645, - "grad_norm": 1.8829097485258686, - "learning_rate": 3.755442818023681e-06, - "loss": 1.01, - "step": 1533 - }, - { - "epoch": 0.18445259424036553, - "grad_norm": 1.7078885297961781, - "learning_rate": 3.7550694240319246e-06, - "loss": 0.9922, - "step": 1534 - }, - { - "epoch": 0.18457283713100464, - "grad_norm": 1.9154706485165387, - "learning_rate": 3.7546957637975326e-06, - "loss": 1.0053, - "step": 1535 - }, - { - "epoch": 0.18469308002164372, - "grad_norm": 1.4444854019341133, - "learning_rate": 3.7543218373771873e-06, - "loss": 0.9708, - "step": 1536 - }, - { - "epoch": 0.1848133229122828, - "grad_norm": 1.293260754575761, - "learning_rate": 3.753947644827615e-06, - "loss": 1.0153, - "step": 1537 - }, - { - "epoch": 0.1849335658029219, - "grad_norm": 0.9700748244655928, - "learning_rate": 3.753573186205579e-06, - "loss": 0.8322, - "step": 1538 - }, - { - "epoch": 0.185053808693561, - "grad_norm": 5.622412022656704, - "learning_rate": 3.753198461567885e-06, - "loss": 1.0092, - "step": 1539 - }, - { - "epoch": 0.18517405158420008, - "grad_norm": 1.7136562815387817, - "learning_rate": 3.7528234709713783e-06, - "loss": 1.1496, - "step": 1540 - }, - { - "epoch": 0.18529429447483917, - "grad_norm": 1.7376264731942417, - "learning_rate": 3.7524482144729447e-06, - "loss": 1.078, - "step": 1541 - }, - { - "epoch": 0.18541453736547828, - "grad_norm": 1.7989672214602883, - "learning_rate": 3.7520726921295106e-06, - "loss": 1.0624, - "step": 1542 - }, - { - "epoch": 0.18553478025611736, - "grad_norm": 1.6252455892580764, - "learning_rate": 3.751696903998042e-06, - "loss": 0.9613, - "step": 1543 - }, - { - "epoch": 0.18565502314675644, - "grad_norm": 2.5077989999758326, - "learning_rate": 3.7513208501355456e-06, - "loss": 0.9256, - "step": 1544 - }, - { - "epoch": 0.18577526603739553, - "grad_norm": 1.588588884829638, - "learning_rate": 3.750944530599069e-06, - "loss": 1.0706, - "step": 1545 - }, - { - "epoch": 0.18589550892803464, - "grad_norm": 1.7970224071245644, - "learning_rate": 3.7505679454456992e-06, - "loss": 1.0423, - "step": 1546 - }, - { - "epoch": 0.18601575181867372, - "grad_norm": 2.1951737003636107, - "learning_rate": 3.750191094732564e-06, - "loss": 0.9336, - "step": 1547 - }, - { - "epoch": 0.1861359947093128, - "grad_norm": 1.538805945066585, - "learning_rate": 3.7498139785168313e-06, - "loss": 0.981, - "step": 1548 - }, - { - "epoch": 0.1862562375999519, - "grad_norm": 2.7475248815713234, - "learning_rate": 3.749436596855709e-06, - "loss": 1.0073, - "step": 1549 - }, - { - "epoch": 0.186376480490591, - "grad_norm": 1.818795157065319, - "learning_rate": 3.749058949806446e-06, - "loss": 1.1404, - "step": 1550 - }, - { - "epoch": 0.18649672338123008, - "grad_norm": 1.6262610750752198, - "learning_rate": 3.748681037426331e-06, - "loss": 1.0672, - "step": 1551 - }, - { - "epoch": 0.1866169662718692, - "grad_norm": 2.090656329320795, - "learning_rate": 3.7483028597726936e-06, - "loss": 1.1458, - "step": 1552 - }, - { - "epoch": 0.18673720916250827, - "grad_norm": 1.6484026430611918, - "learning_rate": 3.7479244169029017e-06, - "loss": 0.8573, - "step": 1553 - }, - { - "epoch": 0.18685745205314735, - "grad_norm": 2.4981277130623774, - "learning_rate": 3.7475457088743658e-06, - "loss": 0.9662, - "step": 1554 - }, - { - "epoch": 0.18697769494378644, - "grad_norm": 1.9667682664565314, - "learning_rate": 3.7471667357445348e-06, - "loss": 0.9706, - "step": 1555 - }, - { - "epoch": 0.18709793783442555, - "grad_norm": 1.7323377861938052, - "learning_rate": 3.7467874975709e-06, - "loss": 0.9619, - "step": 1556 - }, - { - "epoch": 0.18721818072506463, - "grad_norm": 2.2612388510189323, - "learning_rate": 3.7464079944109904e-06, - "loss": 1.0087, - "step": 1557 - }, - { - "epoch": 0.18733842361570371, - "grad_norm": 2.941563332303362, - "learning_rate": 3.746028226322376e-06, - "loss": 1.0029, - "step": 1558 - }, - { - "epoch": 0.18745866650634282, - "grad_norm": 1.537498802866896, - "learning_rate": 3.745648193362669e-06, - "loss": 0.9858, - "step": 1559 - }, - { - "epoch": 0.1875789093969819, - "grad_norm": 2.147628132347086, - "learning_rate": 3.745267895589518e-06, - "loss": 0.9614, - "step": 1560 - }, - { - "epoch": 0.187699152287621, - "grad_norm": 9.423289532860442, - "learning_rate": 3.7448873330606154e-06, - "loss": 1.046, - "step": 1561 - }, - { - "epoch": 0.18781939517826007, - "grad_norm": 2.0022215426000827, - "learning_rate": 3.7445065058336914e-06, - "loss": 1.1063, - "step": 1562 - }, - { - "epoch": 0.18793963806889918, - "grad_norm": 1.7704709149664637, - "learning_rate": 3.7441254139665176e-06, - "loss": 1.0926, - "step": 1563 - }, - { - "epoch": 0.18805988095953827, - "grad_norm": 1.9179630318301981, - "learning_rate": 3.743744057516905e-06, - "loss": 1.0508, - "step": 1564 - }, - { - "epoch": 0.18818012385017735, - "grad_norm": 3.027303899687313, - "learning_rate": 3.743362436542706e-06, - "loss": 1.1081, - "step": 1565 - }, - { - "epoch": 0.18830036674081646, - "grad_norm": 1.6753273373633268, - "learning_rate": 3.7429805511018115e-06, - "loss": 0.9987, - "step": 1566 - }, - { - "epoch": 0.18842060963145554, - "grad_norm": 1.684705256710115, - "learning_rate": 3.7425984012521524e-06, - "loss": 1.0026, - "step": 1567 - }, - { - "epoch": 0.18854085252209463, - "grad_norm": 0.8750944427203056, - "learning_rate": 3.7422159870517025e-06, - "loss": 0.8583, - "step": 1568 - }, - { - "epoch": 0.1886610954127337, - "grad_norm": 1.5154790906379298, - "learning_rate": 3.7418333085584717e-06, - "loss": 1.0202, - "step": 1569 - }, - { - "epoch": 0.18878133830337282, - "grad_norm": 2.7859188710358476, - "learning_rate": 3.7414503658305128e-06, - "loss": 1.1413, - "step": 1570 - }, - { - "epoch": 0.1889015811940119, - "grad_norm": 2.1728446497432174, - "learning_rate": 3.7410671589259185e-06, - "loss": 1.0061, - "step": 1571 - }, - { - "epoch": 0.18902182408465099, - "grad_norm": 1.8463034324688057, - "learning_rate": 3.7406836879028205e-06, - "loss": 1.0236, - "step": 1572 - }, - { - "epoch": 0.1891420669752901, - "grad_norm": 1.8956140314496086, - "learning_rate": 3.7402999528193907e-06, - "loss": 1.0024, - "step": 1573 - }, - { - "epoch": 0.18926230986592918, - "grad_norm": 2.493149813750656, - "learning_rate": 3.739915953733842e-06, - "loss": 1.0842, - "step": 1574 - }, - { - "epoch": 0.18938255275656826, - "grad_norm": 1.5935974796093308, - "learning_rate": 3.7395316907044264e-06, - "loss": 1.047, - "step": 1575 - }, - { - "epoch": 0.18950279564720737, - "grad_norm": 1.4414261813969613, - "learning_rate": 3.7391471637894364e-06, - "loss": 1.02, - "step": 1576 - }, - { - "epoch": 0.18962303853784646, - "grad_norm": 1.7357759058855728, - "learning_rate": 3.738762373047205e-06, - "loss": 1.0814, - "step": 1577 - }, - { - "epoch": 0.18974328142848554, - "grad_norm": 1.517163233459828, - "learning_rate": 3.738377318536103e-06, - "loss": 1.0585, - "step": 1578 - }, - { - "epoch": 0.18986352431912462, - "grad_norm": 2.1486198209216023, - "learning_rate": 3.7379920003145447e-06, - "loss": 0.9488, - "step": 1579 - }, - { - "epoch": 0.18998376720976373, - "grad_norm": 1.7209092881602366, - "learning_rate": 3.7376064184409817e-06, - "loss": 1.069, - "step": 1580 - }, - { - "epoch": 0.19010401010040281, - "grad_norm": 1.6525978271798794, - "learning_rate": 3.7372205729739063e-06, - "loss": 1.0962, - "step": 1581 - }, - { - "epoch": 0.1902242529910419, - "grad_norm": 3.205169323095033, - "learning_rate": 3.7368344639718514e-06, - "loss": 0.9476, - "step": 1582 - }, - { - "epoch": 0.190344495881681, - "grad_norm": 1.4164108251044336, - "learning_rate": 3.7364480914933895e-06, - "loss": 1.0373, - "step": 1583 - }, - { - "epoch": 0.1904647387723201, - "grad_norm": 1.74322615654462, - "learning_rate": 3.7360614555971325e-06, - "loss": 1.0438, - "step": 1584 - }, - { - "epoch": 0.19058498166295917, - "grad_norm": 1.707600444112556, - "learning_rate": 3.735674556341733e-06, - "loss": 1.0854, - "step": 1585 - }, - { - "epoch": 0.19070522455359826, - "grad_norm": 1.7042438436752043, - "learning_rate": 3.7352873937858835e-06, - "loss": 1.0592, - "step": 1586 - }, - { - "epoch": 0.19082546744423737, - "grad_norm": 2.0506960315255403, - "learning_rate": 3.734899967988316e-06, - "loss": 0.9498, - "step": 1587 - }, - { - "epoch": 0.19094571033487645, - "grad_norm": 1.5527853519540826, - "learning_rate": 3.7345122790078026e-06, - "loss": 1.0677, - "step": 1588 - }, - { - "epoch": 0.19106595322551553, - "grad_norm": 2.199274363924092, - "learning_rate": 3.7341243269031556e-06, - "loss": 1.1696, - "step": 1589 - }, - { - "epoch": 0.19118619611615464, - "grad_norm": 1.5478170589608624, - "learning_rate": 3.7337361117332275e-06, - "loss": 1.0061, - "step": 1590 - }, - { - "epoch": 0.19130643900679373, - "grad_norm": 1.84515004424044, - "learning_rate": 3.7333476335569087e-06, - "loss": 0.9995, - "step": 1591 - }, - { - "epoch": 0.1914266818974328, - "grad_norm": 2.1575312470508354, - "learning_rate": 3.7329588924331325e-06, - "loss": 0.8929, - "step": 1592 - }, - { - "epoch": 0.1915469247880719, - "grad_norm": 3.0539499938907886, - "learning_rate": 3.732569888420871e-06, - "loss": 1.0526, - "step": 1593 - }, - { - "epoch": 0.191667167678711, - "grad_norm": 1.8788111418764233, - "learning_rate": 3.732180621579134e-06, - "loss": 1.0616, - "step": 1594 - }, - { - "epoch": 0.1917874105693501, - "grad_norm": 1.6861110905467993, - "learning_rate": 3.7317910919669745e-06, - "loss": 1.0428, - "step": 1595 - }, - { - "epoch": 0.19190765345998917, - "grad_norm": 3.429043303101951, - "learning_rate": 3.7314012996434826e-06, - "loss": 0.994, - "step": 1596 - }, - { - "epoch": 0.19202789635062828, - "grad_norm": 1.7043352639042941, - "learning_rate": 3.7310112446677907e-06, - "loss": 1.0363, - "step": 1597 - }, - { - "epoch": 0.19214813924126736, - "grad_norm": 2.470391319412091, - "learning_rate": 3.7306209270990695e-06, - "loss": 0.9152, - "step": 1598 - }, - { - "epoch": 0.19226838213190645, - "grad_norm": 1.9715155901175156, - "learning_rate": 3.7302303469965292e-06, - "loss": 1.096, - "step": 1599 - }, - { - "epoch": 0.19238862502254553, - "grad_norm": 1.689911849527538, - "learning_rate": 3.7298395044194206e-06, - "loss": 0.942, - "step": 1600 - }, - { - "epoch": 0.19250886791318464, - "grad_norm": 1.7956491020329768, - "learning_rate": 3.7294483994270356e-06, - "loss": 1.1632, - "step": 1601 - }, - { - "epoch": 0.19262911080382372, - "grad_norm": 2.0210751916411556, - "learning_rate": 3.7290570320787033e-06, - "loss": 1.0054, - "step": 1602 - }, - { - "epoch": 0.1927493536944628, - "grad_norm": 1.7264005835373946, - "learning_rate": 3.728665402433793e-06, - "loss": 0.9391, - "step": 1603 - }, - { - "epoch": 0.19286959658510192, - "grad_norm": 2.452406565435006, - "learning_rate": 3.7282735105517164e-06, - "loss": 1.0922, - "step": 1604 - }, - { - "epoch": 0.192989839475741, - "grad_norm": 1.9143904424499718, - "learning_rate": 3.727881356491922e-06, - "loss": 0.9075, - "step": 1605 - }, - { - "epoch": 0.19311008236638008, - "grad_norm": 1.7464517316026433, - "learning_rate": 3.7274889403139002e-06, - "loss": 0.9792, - "step": 1606 - }, - { - "epoch": 0.1932303252570192, - "grad_norm": 5.887012294646939, - "learning_rate": 3.727096262077179e-06, - "loss": 1.0213, - "step": 1607 - }, - { - "epoch": 0.19335056814765827, - "grad_norm": 1.687753234512107, - "learning_rate": 3.7267033218413285e-06, - "loss": 1.0831, - "step": 1608 - }, - { - "epoch": 0.19347081103829736, - "grad_norm": 1.8412218379439254, - "learning_rate": 3.726310119665957e-06, - "loss": 1.0412, - "step": 1609 - }, - { - "epoch": 0.19359105392893644, - "grad_norm": 1.8511835024247754, - "learning_rate": 3.725916655610713e-06, - "loss": 1.0825, - "step": 1610 - }, - { - "epoch": 0.19371129681957555, - "grad_norm": 2.0443255439451087, - "learning_rate": 3.725522929735284e-06, - "loss": 0.9851, - "step": 1611 - }, - { - "epoch": 0.19383153971021463, - "grad_norm": 3.3956281603906064, - "learning_rate": 3.725128942099399e-06, - "loss": 0.9677, - "step": 1612 - }, - { - "epoch": 0.19395178260085372, - "grad_norm": 1.6268591882301484, - "learning_rate": 3.7247346927628245e-06, - "loss": 1.0342, - "step": 1613 - }, - { - "epoch": 0.19407202549149283, - "grad_norm": 1.6952348682542258, - "learning_rate": 3.7243401817853694e-06, - "loss": 1.0171, - "step": 1614 - }, - { - "epoch": 0.1941922683821319, - "grad_norm": 1.9848102482729097, - "learning_rate": 3.723945409226879e-06, - "loss": 0.9532, - "step": 1615 - }, - { - "epoch": 0.194312511272771, - "grad_norm": 2.083293253195189, - "learning_rate": 3.723550375147241e-06, - "loss": 1.0333, - "step": 1616 - }, - { - "epoch": 0.19443275416341008, - "grad_norm": 1.7279075113199147, - "learning_rate": 3.7231550796063816e-06, - "loss": 1.0296, - "step": 1617 - }, - { - "epoch": 0.1945529970540492, - "grad_norm": 1.5734360029105496, - "learning_rate": 3.722759522664266e-06, - "loss": 0.8739, - "step": 1618 - }, - { - "epoch": 0.19467323994468827, - "grad_norm": 1.7625694504710345, - "learning_rate": 3.7223637043809016e-06, - "loss": 1.0408, - "step": 1619 - }, - { - "epoch": 0.19479348283532735, - "grad_norm": 3.3246133393066923, - "learning_rate": 3.7219676248163322e-06, - "loss": 1.0933, - "step": 1620 - }, - { - "epoch": 0.19491372572596646, - "grad_norm": 1.7710186431265362, - "learning_rate": 3.721571284030643e-06, - "loss": 1.1561, - "step": 1621 - }, - { - "epoch": 0.19503396861660555, - "grad_norm": 2.3030728131079155, - "learning_rate": 3.7211746820839587e-06, - "loss": 1.0152, - "step": 1622 - }, - { - "epoch": 0.19515421150724463, - "grad_norm": 1.6430447853332293, - "learning_rate": 3.7207778190364437e-06, - "loss": 1.0487, - "step": 1623 - }, - { - "epoch": 0.1952744543978837, - "grad_norm": 1.9580862281679319, - "learning_rate": 3.720380694948302e-06, - "loss": 0.9697, - "step": 1624 - }, - { - "epoch": 0.19539469728852282, - "grad_norm": 0.9822919088439535, - "learning_rate": 3.719983309879777e-06, - "loss": 0.9744, - "step": 1625 - }, - { - "epoch": 0.1955149401791619, - "grad_norm": 1.4995562373825384, - "learning_rate": 3.719585663891151e-06, - "loss": 1.0114, - "step": 1626 - }, - { - "epoch": 0.195635183069801, - "grad_norm": 3.580149248730907, - "learning_rate": 3.719187757042747e-06, - "loss": 1.018, - "step": 1627 - }, - { - "epoch": 0.1957554259604401, - "grad_norm": 0.8034800015769605, - "learning_rate": 3.7187895893949275e-06, - "loss": 0.83, - "step": 1628 - }, - { - "epoch": 0.19587566885107918, - "grad_norm": 2.662159796815916, - "learning_rate": 3.7183911610080937e-06, - "loss": 0.9854, - "step": 1629 - }, - { - "epoch": 0.19599591174171827, - "grad_norm": 2.160038370568704, - "learning_rate": 3.7179924719426872e-06, - "loss": 0.9795, - "step": 1630 - }, - { - "epoch": 0.19611615463235738, - "grad_norm": 2.8699412413639824, - "learning_rate": 3.7175935222591885e-06, - "loss": 0.9934, - "step": 1631 - }, - { - "epoch": 0.19623639752299646, - "grad_norm": 1.5411192807145515, - "learning_rate": 3.717194312018118e-06, - "loss": 0.9821, - "step": 1632 - }, - { - "epoch": 0.19635664041363554, - "grad_norm": 1.8025439193813915, - "learning_rate": 3.716794841280036e-06, - "loss": 0.9903, - "step": 1633 - }, - { - "epoch": 0.19647688330427462, - "grad_norm": 1.8754301213604845, - "learning_rate": 3.7163951101055407e-06, - "loss": 1.006, - "step": 1634 - }, - { - "epoch": 0.19659712619491373, - "grad_norm": 5.496677088468321, - "learning_rate": 3.715995118555273e-06, - "loss": 1.0189, - "step": 1635 - }, - { - "epoch": 0.19671736908555282, - "grad_norm": 2.1000569562908353, - "learning_rate": 3.71559486668991e-06, - "loss": 1.0854, - "step": 1636 - }, - { - "epoch": 0.1968376119761919, - "grad_norm": 1.6049262473728272, - "learning_rate": 3.715194354570169e-06, - "loss": 1.0032, - "step": 1637 - }, - { - "epoch": 0.196957854866831, - "grad_norm": 1.64122753886232, - "learning_rate": 3.714793582256809e-06, - "loss": 1.0567, - "step": 1638 - }, - { - "epoch": 0.1970780977574701, - "grad_norm": 2.307989309223346, - "learning_rate": 3.7143925498106253e-06, - "loss": 1.0732, - "step": 1639 - }, - { - "epoch": 0.19719834064810918, - "grad_norm": 1.747268315839156, - "learning_rate": 3.7139912572924558e-06, - "loss": 1.0236, - "step": 1640 - }, - { - "epoch": 0.19731858353874826, - "grad_norm": 2.5478796352420483, - "learning_rate": 3.7135897047631744e-06, - "loss": 1.0364, - "step": 1641 - }, - { - "epoch": 0.19743882642938737, - "grad_norm": 1.5745063310812664, - "learning_rate": 3.713187892283698e-06, - "loss": 0.9852, - "step": 1642 - }, - { - "epoch": 0.19755906932002645, - "grad_norm": 2.6675546975502877, - "learning_rate": 3.71278581991498e-06, - "loss": 1.0936, - "step": 1643 - }, - { - "epoch": 0.19767931221066554, - "grad_norm": 1.6780146585626707, - "learning_rate": 3.712383487718015e-06, - "loss": 1.0136, - "step": 1644 - }, - { - "epoch": 0.19779955510130465, - "grad_norm": 1.713358103032549, - "learning_rate": 3.7119808957538365e-06, - "loss": 1.0942, - "step": 1645 - }, - { - "epoch": 0.19791979799194373, - "grad_norm": 2.028232974806285, - "learning_rate": 3.711578044083517e-06, - "loss": 1.031, - "step": 1646 - }, - { - "epoch": 0.1980400408825828, - "grad_norm": 2.079070141627885, - "learning_rate": 3.7111749327681698e-06, - "loss": 0.9795, - "step": 1647 - }, - { - "epoch": 0.1981602837732219, - "grad_norm": 3.47813547960538, - "learning_rate": 3.7107715618689455e-06, - "loss": 1.0921, - "step": 1648 - }, - { - "epoch": 0.198280526663861, - "grad_norm": 1.4616186012852905, - "learning_rate": 3.710367931447035e-06, - "loss": 1.0595, - "step": 1649 - }, - { - "epoch": 0.1984007695545001, - "grad_norm": 2.1873759643266553, - "learning_rate": 3.70996404156367e-06, - "loss": 1.0893, - "step": 1650 - }, - { - "epoch": 0.19852101244513917, - "grad_norm": 1.5679491772686889, - "learning_rate": 3.7095598922801187e-06, - "loss": 0.9556, - "step": 1651 - }, - { - "epoch": 0.19864125533577828, - "grad_norm": 1.946719292893898, - "learning_rate": 3.7091554836576914e-06, - "loss": 0.9842, - "step": 1652 - }, - { - "epoch": 0.19876149822641737, - "grad_norm": 5.982851317532077, - "learning_rate": 3.708750815757736e-06, - "loss": 1.0556, - "step": 1653 - }, - { - "epoch": 0.19888174111705645, - "grad_norm": 2.3975989649676537, - "learning_rate": 3.7083458886416407e-06, - "loss": 0.9678, - "step": 1654 - }, - { - "epoch": 0.19900198400769553, - "grad_norm": 1.9731812033564724, - "learning_rate": 3.707940702370832e-06, - "loss": 1.1129, - "step": 1655 - }, - { - "epoch": 0.19912222689833464, - "grad_norm": 0.7655506876478682, - "learning_rate": 3.707535257006777e-06, - "loss": 0.829, - "step": 1656 - }, - { - "epoch": 0.19924246978897373, - "grad_norm": 2.0227390099984675, - "learning_rate": 3.707129552610981e-06, - "loss": 1.1189, - "step": 1657 - }, - { - "epoch": 0.1993627126796128, - "grad_norm": 1.729016523468684, - "learning_rate": 3.70672358924499e-06, - "loss": 0.9664, - "step": 1658 - }, - { - "epoch": 0.19948295557025192, - "grad_norm": 2.4303147466138295, - "learning_rate": 3.706317366970386e-06, - "loss": 1.0127, - "step": 1659 - }, - { - "epoch": 0.199603198460891, - "grad_norm": 2.511521683195495, - "learning_rate": 3.705910885848795e-06, - "loss": 1.0712, - "step": 1660 - }, - { - "epoch": 0.19972344135153008, - "grad_norm": 1.8872895887707122, - "learning_rate": 3.705504145941879e-06, - "loss": 1.067, - "step": 1661 - }, - { - "epoch": 0.1998436842421692, - "grad_norm": 1.7984492827691503, - "learning_rate": 3.7050971473113403e-06, - "loss": 1.022, - "step": 1662 - }, - { - "epoch": 0.19996392713280828, - "grad_norm": 1.7833876247995235, - "learning_rate": 3.7046898900189196e-06, - "loss": 1.0269, - "step": 1663 - }, - { - "epoch": 0.20008417002344736, - "grad_norm": 1.7022153892634482, - "learning_rate": 3.704282374126398e-06, - "loss": 1.0659, - "step": 1664 - }, - { - "epoch": 0.20020441291408644, - "grad_norm": 1.7658936962823737, - "learning_rate": 3.7038745996955954e-06, - "loss": 1.1078, - "step": 1665 - }, - { - "epoch": 0.20032465580472555, - "grad_norm": 2.6141810215304675, - "learning_rate": 3.703466566788371e-06, - "loss": 0.9489, - "step": 1666 - }, - { - "epoch": 0.20044489869536464, - "grad_norm": 1.5931367629325188, - "learning_rate": 3.703058275466622e-06, - "loss": 0.9777, - "step": 1667 - }, - { - "epoch": 0.20056514158600372, - "grad_norm": 1.6719159272790616, - "learning_rate": 3.7026497257922877e-06, - "loss": 1.0057, - "step": 1668 - }, - { - "epoch": 0.20068538447664283, - "grad_norm": 1.766738135613635, - "learning_rate": 3.7022409178273436e-06, - "loss": 1.0804, - "step": 1669 - }, - { - "epoch": 0.2008056273672819, - "grad_norm": 1.6077617225747534, - "learning_rate": 3.7018318516338054e-06, - "loss": 1.0113, - "step": 1670 - }, - { - "epoch": 0.200925870257921, - "grad_norm": 2.45904390658428, - "learning_rate": 3.7014225272737284e-06, - "loss": 1.0434, - "step": 1671 - }, - { - "epoch": 0.20104611314856008, - "grad_norm": 2.253249682655088, - "learning_rate": 3.701012944809207e-06, - "loss": 0.9668, - "step": 1672 - }, - { - "epoch": 0.2011663560391992, - "grad_norm": 2.0284164618971885, - "learning_rate": 3.700603104302374e-06, - "loss": 1.0115, - "step": 1673 - }, - { - "epoch": 0.20128659892983827, - "grad_norm": 1.3683369627349584, - "learning_rate": 3.7001930058154027e-06, - "loss": 0.816, - "step": 1674 - }, - { - "epoch": 0.20140684182047736, - "grad_norm": 2.203000545368309, - "learning_rate": 3.6997826494105037e-06, - "loss": 1.0312, - "step": 1675 - }, - { - "epoch": 0.20152708471111647, - "grad_norm": 2.8931095477400453, - "learning_rate": 3.6993720351499286e-06, - "loss": 0.9222, - "step": 1676 - }, - { - "epoch": 0.20164732760175555, - "grad_norm": 1.9642698993293919, - "learning_rate": 3.6989611630959666e-06, - "loss": 1.0073, - "step": 1677 - }, - { - "epoch": 0.20176757049239463, - "grad_norm": 0.7203191959624292, - "learning_rate": 3.6985500333109474e-06, - "loss": 0.8465, - "step": 1678 - }, - { - "epoch": 0.20188781338303372, - "grad_norm": 2.099998828342656, - "learning_rate": 3.6981386458572385e-06, - "loss": 0.9991, - "step": 1679 - }, - { - "epoch": 0.20200805627367283, - "grad_norm": 3.8466371687403234, - "learning_rate": 3.6977270007972468e-06, - "loss": 0.995, - "step": 1680 - }, - { - "epoch": 0.2021282991643119, - "grad_norm": 2.013998513873706, - "learning_rate": 3.6973150981934196e-06, - "loss": 0.9491, - "step": 1681 - }, - { - "epoch": 0.202248542054951, - "grad_norm": 2.5367383443335862, - "learning_rate": 3.6969029381082415e-06, - "loss": 1.0622, - "step": 1682 - }, - { - "epoch": 0.2023687849455901, - "grad_norm": 1.8572521112080589, - "learning_rate": 3.696490520604237e-06, - "loss": 1.0309, - "step": 1683 - }, - { - "epoch": 0.20248902783622919, - "grad_norm": 2.0599043725789605, - "learning_rate": 3.696077845743968e-06, - "loss": 1.0379, - "step": 1684 - }, - { - "epoch": 0.20260927072686827, - "grad_norm": 2.0909081146177644, - "learning_rate": 3.69566491359004e-06, - "loss": 0.9635, - "step": 1685 - }, - { - "epoch": 0.20272951361750738, - "grad_norm": 1.4933095977094657, - "learning_rate": 3.695251724205092e-06, - "loss": 0.9314, - "step": 1686 - }, - { - "epoch": 0.20284975650814646, - "grad_norm": 1.6018624373772017, - "learning_rate": 3.6948382776518054e-06, - "loss": 1.0869, - "step": 1687 - }, - { - "epoch": 0.20296999939878554, - "grad_norm": 2.9690525904591367, - "learning_rate": 3.6944245739929e-06, - "loss": 1.0213, - "step": 1688 - }, - { - "epoch": 0.20309024228942463, - "grad_norm": 2.4741665294112254, - "learning_rate": 3.6940106132911332e-06, - "loss": 0.9529, - "step": 1689 - }, - { - "epoch": 0.20321048518006374, - "grad_norm": 2.334034632696348, - "learning_rate": 3.6935963956093037e-06, - "loss": 1.1086, - "step": 1690 - }, - { - "epoch": 0.20333072807070282, - "grad_norm": 1.671726255841247, - "learning_rate": 3.6931819210102474e-06, - "loss": 0.9136, - "step": 1691 - }, - { - "epoch": 0.2034509709613419, - "grad_norm": 1.8165249037186575, - "learning_rate": 3.6927671895568402e-06, - "loss": 1.0741, - "step": 1692 - }, - { - "epoch": 0.20357121385198101, - "grad_norm": 2.1502881766297537, - "learning_rate": 3.692352201311996e-06, - "loss": 1.1031, - "step": 1693 - }, - { - "epoch": 0.2036914567426201, - "grad_norm": 1.7698937185889745, - "learning_rate": 3.6919369563386687e-06, - "loss": 0.9891, - "step": 1694 - }, - { - "epoch": 0.20381169963325918, - "grad_norm": 1.8434630348466299, - "learning_rate": 3.69152145469985e-06, - "loss": 1.0266, - "step": 1695 - }, - { - "epoch": 0.20393194252389826, - "grad_norm": 1.5931022674147879, - "learning_rate": 3.691105696458572e-06, - "loss": 1.0455, - "step": 1696 - }, - { - "epoch": 0.20405218541453737, - "grad_norm": 3.3093828613648957, - "learning_rate": 3.690689681677904e-06, - "loss": 0.9092, - "step": 1697 - }, - { - "epoch": 0.20417242830517646, - "grad_norm": 1.601873004865948, - "learning_rate": 3.690273410420956e-06, - "loss": 1.1121, - "step": 1698 - }, - { - "epoch": 0.20429267119581554, - "grad_norm": 2.314520082430718, - "learning_rate": 3.689856882750875e-06, - "loss": 0.9981, - "step": 1699 - }, - { - "epoch": 0.20441291408645465, - "grad_norm": 1.6536738676162879, - "learning_rate": 3.6894400987308486e-06, - "loss": 1.0169, - "step": 1700 - }, - { - "epoch": 0.20453315697709373, - "grad_norm": 2.3453758195618355, - "learning_rate": 3.6890230584241024e-06, - "loss": 1.0808, - "step": 1701 - }, - { - "epoch": 0.20465339986773282, - "grad_norm": 0.8947598694083547, - "learning_rate": 3.6886057618939016e-06, - "loss": 0.9313, - "step": 1702 - }, - { - "epoch": 0.2047736427583719, - "grad_norm": 1.918635421463172, - "learning_rate": 3.6881882092035492e-06, - "loss": 0.9295, - "step": 1703 - }, - { - "epoch": 0.204893885649011, - "grad_norm": 1.01881839780023, - "learning_rate": 3.6877704004163873e-06, - "loss": 0.9101, - "step": 1704 - }, - { - "epoch": 0.2050141285396501, - "grad_norm": 1.610297077800234, - "learning_rate": 3.6873523355957984e-06, - "loss": 1.008, - "step": 1705 - }, - { - "epoch": 0.20513437143028918, - "grad_norm": 1.052769138420397, - "learning_rate": 3.686934014805201e-06, - "loss": 0.9519, - "step": 1706 - }, - { - "epoch": 0.20525461432092829, - "grad_norm": 1.5921853556819774, - "learning_rate": 3.6865154381080552e-06, - "loss": 1.0345, - "step": 1707 - }, - { - "epoch": 0.20537485721156737, - "grad_norm": 1.6917259143294658, - "learning_rate": 3.6860966055678585e-06, - "loss": 1.0552, - "step": 1708 - }, - { - "epoch": 0.20549510010220645, - "grad_norm": 4.91156184821268, - "learning_rate": 3.685677517248147e-06, - "loss": 1.0903, - "step": 1709 - }, - { - "epoch": 0.20561534299284553, - "grad_norm": 1.9623887972092466, - "learning_rate": 3.6852581732124967e-06, - "loss": 1.0339, - "step": 1710 - }, - { - "epoch": 0.20573558588348465, - "grad_norm": 1.7460587943928356, - "learning_rate": 3.6848385735245213e-06, - "loss": 0.9888, - "step": 1711 - }, - { - "epoch": 0.20585582877412373, - "grad_norm": 1.8571064036323366, - "learning_rate": 3.6844187182478734e-06, - "loss": 1.0929, - "step": 1712 - }, - { - "epoch": 0.2059760716647628, - "grad_norm": 2.109372739437446, - "learning_rate": 3.683998607446246e-06, - "loss": 0.9791, - "step": 1713 - }, - { - "epoch": 0.20609631455540192, - "grad_norm": 1.7811947194940991, - "learning_rate": 3.6835782411833686e-06, - "loss": 0.9755, - "step": 1714 - }, - { - "epoch": 0.206216557446041, - "grad_norm": 1.7405522630927377, - "learning_rate": 3.68315761952301e-06, - "loss": 0.9778, - "step": 1715 - }, - { - "epoch": 0.2063368003366801, - "grad_norm": 1.8554717053841594, - "learning_rate": 3.6827367425289797e-06, - "loss": 1.063, - "step": 1716 - }, - { - "epoch": 0.2064570432273192, - "grad_norm": 2.2342780032145044, - "learning_rate": 3.6823156102651225e-06, - "loss": 0.9495, - "step": 1717 - }, - { - "epoch": 0.20657728611795828, - "grad_norm": 1.5396593359881223, - "learning_rate": 3.6818942227953257e-06, - "loss": 0.9452, - "step": 1718 - }, - { - "epoch": 0.20669752900859736, - "grad_norm": 1.7525033756501125, - "learning_rate": 3.681472580183512e-06, - "loss": 0.9227, - "step": 1719 - }, - { - "epoch": 0.20681777189923645, - "grad_norm": 1.8593084499727022, - "learning_rate": 3.6810506824936455e-06, - "loss": 1.0895, - "step": 1720 - }, - { - "epoch": 0.20693801478987556, - "grad_norm": 1.1883495203354038, - "learning_rate": 3.680628529789726e-06, - "loss": 0.8953, - "step": 1721 - }, - { - "epoch": 0.20705825768051464, - "grad_norm": 1.8997267752501719, - "learning_rate": 3.680206122135796e-06, - "loss": 1.082, - "step": 1722 - }, - { - "epoch": 0.20717850057115372, - "grad_norm": 1.5686557817471896, - "learning_rate": 3.6797834595959323e-06, - "loss": 1.0085, - "step": 1723 - }, - { - "epoch": 0.20729874346179283, - "grad_norm": 2.2241186818215515, - "learning_rate": 3.679360542234254e-06, - "loss": 1.0157, - "step": 1724 - }, - { - "epoch": 0.20741898635243192, - "grad_norm": 1.570460450144135, - "learning_rate": 3.678937370114916e-06, - "loss": 0.9527, - "step": 1725 - }, - { - "epoch": 0.207539229243071, - "grad_norm": 1.7852288362866946, - "learning_rate": 3.678513943302114e-06, - "loss": 1.0184, - "step": 1726 - }, - { - "epoch": 0.20765947213371008, - "grad_norm": 2.058111434692569, - "learning_rate": 3.678090261860082e-06, - "loss": 1.0802, - "step": 1727 - }, - { - "epoch": 0.2077797150243492, - "grad_norm": 1.9908802605398574, - "learning_rate": 3.6776663258530906e-06, - "loss": 1.0122, - "step": 1728 - }, - { - "epoch": 0.20789995791498828, - "grad_norm": 1.6150271226241135, - "learning_rate": 3.6772421353454516e-06, - "loss": 0.9477, - "step": 1729 - }, - { - "epoch": 0.20802020080562736, - "grad_norm": 1.742233344189075, - "learning_rate": 3.6768176904015153e-06, - "loss": 1.1083, - "step": 1730 - }, - { - "epoch": 0.20814044369626647, - "grad_norm": 1.6841198305050524, - "learning_rate": 3.6763929910856674e-06, - "loss": 0.8274, - "step": 1731 - }, - { - "epoch": 0.20826068658690555, - "grad_norm": 1.9322667355912677, - "learning_rate": 3.6759680374623365e-06, - "loss": 1.0032, - "step": 1732 - }, - { - "epoch": 0.20838092947754464, - "grad_norm": 2.708833873173771, - "learning_rate": 3.675542829595986e-06, - "loss": 0.981, - "step": 1733 - }, - { - "epoch": 0.20850117236818372, - "grad_norm": 1.4481862958333693, - "learning_rate": 3.6751173675511213e-06, - "loss": 1.0224, - "step": 1734 - }, - { - "epoch": 0.20862141525882283, - "grad_norm": 1.822338433973004, - "learning_rate": 3.674691651392283e-06, - "loss": 1.1111, - "step": 1735 - }, - { - "epoch": 0.2087416581494619, - "grad_norm": 1.7995594704168958, - "learning_rate": 3.674265681184053e-06, - "loss": 0.9913, - "step": 1736 - }, - { - "epoch": 0.208861901040101, - "grad_norm": 1.5602326536309965, - "learning_rate": 3.6738394569910504e-06, - "loss": 1.0936, - "step": 1737 - }, - { - "epoch": 0.2089821439307401, - "grad_norm": 1.8740533346283115, - "learning_rate": 3.6734129788779333e-06, - "loss": 1.0601, - "step": 1738 - }, - { - "epoch": 0.2091023868213792, - "grad_norm": 1.7170051734770837, - "learning_rate": 3.6729862469093976e-06, - "loss": 1.1307, - "step": 1739 - }, - { - "epoch": 0.20922262971201827, - "grad_norm": 2.232362067221606, - "learning_rate": 3.6725592611501782e-06, - "loss": 1.0545, - "step": 1740 - }, - { - "epoch": 0.20934287260265738, - "grad_norm": 1.8483231290645081, - "learning_rate": 3.6721320216650496e-06, - "loss": 0.9943, - "step": 1741 - }, - { - "epoch": 0.20946311549329646, - "grad_norm": 1.5678385514666253, - "learning_rate": 3.6717045285188215e-06, - "loss": 1.0725, - "step": 1742 - }, - { - "epoch": 0.20958335838393555, - "grad_norm": 1.7622183960304771, - "learning_rate": 3.671276781776346e-06, - "loss": 1.0833, - "step": 1743 - }, - { - "epoch": 0.20970360127457463, - "grad_norm": 1.749296523797717, - "learning_rate": 3.6708487815025128e-06, - "loss": 0.8968, - "step": 1744 - }, - { - "epoch": 0.20982384416521374, - "grad_norm": 2.802694223362222, - "learning_rate": 3.6704205277622463e-06, - "loss": 0.9757, - "step": 1745 - }, - { - "epoch": 0.20994408705585282, - "grad_norm": 1.5869038461209601, - "learning_rate": 3.6699920206205146e-06, - "loss": 1.027, - "step": 1746 - }, - { - "epoch": 0.2100643299464919, - "grad_norm": 1.5752684924443718, - "learning_rate": 3.669563260142321e-06, - "loss": 1.0511, - "step": 1747 - }, - { - "epoch": 0.21018457283713102, - "grad_norm": 1.8888361010312245, - "learning_rate": 3.6691342463927083e-06, - "loss": 1.0675, - "step": 1748 - }, - { - "epoch": 0.2103048157277701, - "grad_norm": 1.4524024069167027, - "learning_rate": 3.668704979436758e-06, - "loss": 1.0473, - "step": 1749 - }, - { - "epoch": 0.21042505861840918, - "grad_norm": 1.8995729041672658, - "learning_rate": 3.668275459339588e-06, - "loss": 1.0156, - "step": 1750 - }, - { - "epoch": 0.21054530150904827, - "grad_norm": 1.6661504263862756, - "learning_rate": 3.667845686166358e-06, - "loss": 1.0363, - "step": 1751 - }, - { - "epoch": 0.21066554439968738, - "grad_norm": 1.8258494372972907, - "learning_rate": 3.6674156599822634e-06, - "loss": 1.0852, - "step": 1752 - }, - { - "epoch": 0.21078578729032646, - "grad_norm": 1.6049367204117049, - "learning_rate": 3.666985380852539e-06, - "loss": 1.0402, - "step": 1753 - }, - { - "epoch": 0.21090603018096554, - "grad_norm": 2.1381600102711453, - "learning_rate": 3.6665548488424576e-06, - "loss": 0.9821, - "step": 1754 - }, - { - "epoch": 0.21102627307160465, - "grad_norm": 1.8625673729916739, - "learning_rate": 3.6661240640173307e-06, - "loss": 1.1039, - "step": 1755 - }, - { - "epoch": 0.21114651596224374, - "grad_norm": 0.9491578698748396, - "learning_rate": 3.6656930264425085e-06, - "loss": 0.8684, - "step": 1756 - }, - { - "epoch": 0.21126675885288282, - "grad_norm": 1.5762741263688753, - "learning_rate": 3.665261736183378e-06, - "loss": 0.9944, - "step": 1757 - }, - { - "epoch": 0.2113870017435219, - "grad_norm": 2.25884342086965, - "learning_rate": 3.664830193305366e-06, - "loss": 1.1184, - "step": 1758 - }, - { - "epoch": 0.211507244634161, - "grad_norm": 2.4650331354912502, - "learning_rate": 3.6643983978739373e-06, - "loss": 0.9857, - "step": 1759 - }, - { - "epoch": 0.2116274875248001, - "grad_norm": 2.11087073524773, - "learning_rate": 3.663966349954596e-06, - "loss": 1.0467, - "step": 1760 - }, - { - "epoch": 0.21174773041543918, - "grad_norm": 1.0526318898326013, - "learning_rate": 3.6635340496128816e-06, - "loss": 0.8495, - "step": 1761 - }, - { - "epoch": 0.2118679733060783, - "grad_norm": 1.4742592098989247, - "learning_rate": 3.6631014969143747e-06, - "loss": 1.1492, - "step": 1762 - }, - { - "epoch": 0.21198821619671737, - "grad_norm": 1.6997574577095, - "learning_rate": 3.662668691924693e-06, - "loss": 1.1169, - "step": 1763 - }, - { - "epoch": 0.21210845908735645, - "grad_norm": 1.7015512177064407, - "learning_rate": 3.6622356347094927e-06, - "loss": 0.9435, - "step": 1764 - }, - { - "epoch": 0.21222870197799554, - "grad_norm": 1.7109807675260036, - "learning_rate": 3.6618023253344684e-06, - "loss": 1.0138, - "step": 1765 - }, - { - "epoch": 0.21234894486863465, - "grad_norm": 1.608043453137111, - "learning_rate": 3.6613687638653527e-06, - "loss": 1.0623, - "step": 1766 - }, - { - "epoch": 0.21246918775927373, - "grad_norm": 1.6059718770691889, - "learning_rate": 3.660934950367916e-06, - "loss": 1.0064, - "step": 1767 - }, - { - "epoch": 0.21258943064991281, - "grad_norm": 1.7136486990139497, - "learning_rate": 3.660500884907968e-06, - "loss": 1.0646, - "step": 1768 - }, - { - "epoch": 0.21270967354055192, - "grad_norm": 0.9750253710746963, - "learning_rate": 3.660066567551356e-06, - "loss": 0.8563, - "step": 1769 - }, - { - "epoch": 0.212829916431191, - "grad_norm": 2.330332995510427, - "learning_rate": 3.6596319983639657e-06, - "loss": 1.0647, - "step": 1770 - }, - { - "epoch": 0.2129501593218301, - "grad_norm": 1.4364258443225648, - "learning_rate": 3.6591971774117214e-06, - "loss": 1.0919, - "step": 1771 - }, - { - "epoch": 0.2130704022124692, - "grad_norm": 1.9955247041783175, - "learning_rate": 3.6587621047605833e-06, - "loss": 1.0436, - "step": 1772 - }, - { - "epoch": 0.21319064510310828, - "grad_norm": 1.821054921171979, - "learning_rate": 3.6583267804765542e-06, - "loss": 1.0996, - "step": 1773 - }, - { - "epoch": 0.21331088799374737, - "grad_norm": 1.5751064627859628, - "learning_rate": 3.6578912046256702e-06, - "loss": 1.077, - "step": 1774 - }, - { - "epoch": 0.21343113088438645, - "grad_norm": 1.9370213809588546, - "learning_rate": 3.6574553772740083e-06, - "loss": 0.9927, - "step": 1775 - }, - { - "epoch": 0.21355137377502556, - "grad_norm": 0.8827187479926484, - "learning_rate": 3.657019298487684e-06, - "loss": 0.8932, - "step": 1776 - }, - { - "epoch": 0.21367161666566464, - "grad_norm": 1.8383944040888918, - "learning_rate": 3.6565829683328495e-06, - "loss": 1.0622, - "step": 1777 - }, - { - "epoch": 0.21379185955630373, - "grad_norm": 1.630050586547353, - "learning_rate": 3.6561463868756965e-06, - "loss": 1.0887, - "step": 1778 - }, - { - "epoch": 0.21391210244694284, - "grad_norm": 1.5067753362429026, - "learning_rate": 3.655709554182452e-06, - "loss": 1.0103, - "step": 1779 - }, - { - "epoch": 0.21403234533758192, - "grad_norm": 1.6331638523973844, - "learning_rate": 3.6552724703193855e-06, - "loss": 1.0774, - "step": 1780 - }, - { - "epoch": 0.214152588228221, - "grad_norm": 0.7812643812763259, - "learning_rate": 3.654835135352801e-06, - "loss": 0.8133, - "step": 1781 - }, - { - "epoch": 0.21427283111886009, - "grad_norm": 1.5877536841319593, - "learning_rate": 3.654397549349043e-06, - "loss": 1.0994, - "step": 1782 - }, - { - "epoch": 0.2143930740094992, - "grad_norm": 1.9623086702418961, - "learning_rate": 3.653959712374491e-06, - "loss": 0.9882, - "step": 1783 - }, - { - "epoch": 0.21451331690013828, - "grad_norm": 1.5590973234158432, - "learning_rate": 3.6535216244955663e-06, - "loss": 1.0511, - "step": 1784 - }, - { - "epoch": 0.21463355979077736, - "grad_norm": 1.5682065136288068, - "learning_rate": 3.653083285778726e-06, - "loss": 0.9372, - "step": 1785 - }, - { - "epoch": 0.21475380268141647, - "grad_norm": 1.7149461441926037, - "learning_rate": 3.6526446962904653e-06, - "loss": 1.0281, - "step": 1786 - }, - { - "epoch": 0.21487404557205556, - "grad_norm": 1.4215451371429662, - "learning_rate": 3.652205856097318e-06, - "loss": 0.9733, - "step": 1787 - }, - { - "epoch": 0.21499428846269464, - "grad_norm": 1.8715590374313122, - "learning_rate": 3.651766765265856e-06, - "loss": 1.0175, - "step": 1788 - }, - { - "epoch": 0.21511453135333372, - "grad_norm": 2.041045178651165, - "learning_rate": 3.65132742386269e-06, - "loss": 1.0318, - "step": 1789 - }, - { - "epoch": 0.21523477424397283, - "grad_norm": 1.6288013744668839, - "learning_rate": 3.6508878319544656e-06, - "loss": 1.0772, - "step": 1790 - }, - { - "epoch": 0.21535501713461191, - "grad_norm": 3.536553105848037, - "learning_rate": 3.65044798960787e-06, - "loss": 1.0348, - "step": 1791 - }, - { - "epoch": 0.215475260025251, - "grad_norm": 2.177766123997827, - "learning_rate": 3.650007896889627e-06, - "loss": 1.0106, - "step": 1792 - }, - { - "epoch": 0.2155955029158901, - "grad_norm": 1.6852049585765194, - "learning_rate": 3.6495675538664974e-06, - "loss": 1.034, - "step": 1793 - }, - { - "epoch": 0.2157157458065292, - "grad_norm": 1.62165120662731, - "learning_rate": 3.649126960605282e-06, - "loss": 1.0588, - "step": 1794 - }, - { - "epoch": 0.21583598869716827, - "grad_norm": 2.128297603290336, - "learning_rate": 3.6486861171728174e-06, - "loss": 1.0624, - "step": 1795 - }, - { - "epoch": 0.21595623158780738, - "grad_norm": 2.3587079652599767, - "learning_rate": 3.6482450236359803e-06, - "loss": 1.0162, - "step": 1796 - }, - { - "epoch": 0.21607647447844647, - "grad_norm": 2.5872316377950044, - "learning_rate": 3.647803680061683e-06, - "loss": 1.0057, - "step": 1797 - }, - { - "epoch": 0.21619671736908555, - "grad_norm": 2.5437385577570124, - "learning_rate": 3.6473620865168776e-06, - "loss": 0.991, - "step": 1798 - }, - { - "epoch": 0.21631696025972463, - "grad_norm": 1.9502500813854227, - "learning_rate": 3.646920243068554e-06, - "loss": 1.0495, - "step": 1799 - }, - { - "epoch": 0.21643720315036374, - "grad_norm": 1.4520546036901567, - "learning_rate": 3.6464781497837384e-06, - "loss": 0.9709, - "step": 1800 - }, - { - "epoch": 0.21655744604100283, - "grad_norm": 1.5746582023679514, - "learning_rate": 3.6460358067294965e-06, - "loss": 0.9641, - "step": 1801 - }, - { - "epoch": 0.2166776889316419, - "grad_norm": 2.0691983661945295, - "learning_rate": 3.645593213972932e-06, - "loss": 1.0059, - "step": 1802 - }, - { - "epoch": 0.21679793182228102, - "grad_norm": 1.807678452437102, - "learning_rate": 3.6451503715811852e-06, - "loss": 1.0218, - "step": 1803 - }, - { - "epoch": 0.2169181747129201, - "grad_norm": 1.7103724134375868, - "learning_rate": 3.6447072796214345e-06, - "loss": 1.0332, - "step": 1804 - }, - { - "epoch": 0.21703841760355919, - "grad_norm": 0.9417306175009501, - "learning_rate": 3.644263938160898e-06, - "loss": 0.9015, - "step": 1805 - }, - { - "epoch": 0.21715866049419827, - "grad_norm": 1.812010074040112, - "learning_rate": 3.6438203472668293e-06, - "loss": 0.9428, - "step": 1806 - }, - { - "epoch": 0.21727890338483738, - "grad_norm": 1.8332120103862548, - "learning_rate": 3.6433765070065206e-06, - "loss": 1.0457, - "step": 1807 - }, - { - "epoch": 0.21739914627547646, - "grad_norm": 2.175293352360376, - "learning_rate": 3.6429324174473025e-06, - "loss": 1.1109, - "step": 1808 - }, - { - "epoch": 0.21751938916611555, - "grad_norm": 1.9935666565018626, - "learning_rate": 3.6424880786565425e-06, - "loss": 1.081, - "step": 1809 - }, - { - "epoch": 0.21763963205675466, - "grad_norm": 1.8211389720118147, - "learning_rate": 3.6420434907016482e-06, - "loss": 1.0226, - "step": 1810 - }, - { - "epoch": 0.21775987494739374, - "grad_norm": 1.541555177248597, - "learning_rate": 3.6415986536500606e-06, - "loss": 1.0402, - "step": 1811 - }, - { - "epoch": 0.21788011783803282, - "grad_norm": 1.6572349786404224, - "learning_rate": 3.641153567569263e-06, - "loss": 1.0472, - "step": 1812 - }, - { - "epoch": 0.2180003607286719, - "grad_norm": 1.8320990511257809, - "learning_rate": 3.640708232526774e-06, - "loss": 1.1763, - "step": 1813 - }, - { - "epoch": 0.21812060361931102, - "grad_norm": 1.7203078405887102, - "learning_rate": 3.6402626485901504e-06, - "loss": 1.0083, - "step": 1814 - }, - { - "epoch": 0.2182408465099501, - "grad_norm": 1.8624679741890224, - "learning_rate": 3.639816815826988e-06, - "loss": 1.0079, - "step": 1815 - }, - { - "epoch": 0.21836108940058918, - "grad_norm": 1.6472581016073824, - "learning_rate": 3.6393707343049176e-06, - "loss": 1.01, - "step": 1816 - }, - { - "epoch": 0.2184813322912283, - "grad_norm": 2.3196166565314313, - "learning_rate": 3.6389244040916104e-06, - "loss": 0.9691, - "step": 1817 - }, - { - "epoch": 0.21860157518186737, - "grad_norm": 1.9319707050392882, - "learning_rate": 3.6384778252547747e-06, - "loss": 1.0185, - "step": 1818 - }, - { - "epoch": 0.21872181807250646, - "grad_norm": 2.0403166089547504, - "learning_rate": 3.638030997862155e-06, - "loss": 1.0087, - "step": 1819 - }, - { - "epoch": 0.21884206096314554, - "grad_norm": 0.8608061923991966, - "learning_rate": 3.6375839219815356e-06, - "loss": 0.8567, - "step": 1820 - }, - { - "epoch": 0.21896230385378465, - "grad_norm": 1.9124821581039437, - "learning_rate": 3.6371365976807375e-06, - "loss": 1.0539, - "step": 1821 - }, - { - "epoch": 0.21908254674442373, - "grad_norm": 1.775573519612005, - "learning_rate": 3.6366890250276185e-06, - "loss": 1.0655, - "step": 1822 - }, - { - "epoch": 0.21920278963506282, - "grad_norm": 1.7041377285068062, - "learning_rate": 3.6362412040900764e-06, - "loss": 1.1321, - "step": 1823 - }, - { - "epoch": 0.21932303252570193, - "grad_norm": 1.8396220792475166, - "learning_rate": 3.635793134936044e-06, - "loss": 1.0315, - "step": 1824 - }, - { - "epoch": 0.219443275416341, - "grad_norm": 1.5054462425240027, - "learning_rate": 3.635344817633494e-06, - "loss": 0.9586, - "step": 1825 - }, - { - "epoch": 0.2195635183069801, - "grad_norm": 1.902763304767497, - "learning_rate": 3.634896252250436e-06, - "loss": 0.9837, - "step": 1826 - }, - { - "epoch": 0.2196837611976192, - "grad_norm": 1.548064342999912, - "learning_rate": 3.6344474388549157e-06, - "loss": 1.057, - "step": 1827 - }, - { - "epoch": 0.2198040040882583, - "grad_norm": 1.9266452050691172, - "learning_rate": 3.6339983775150183e-06, - "loss": 1.0384, - "step": 1828 - }, - { - "epoch": 0.21992424697889737, - "grad_norm": 2.085691630413987, - "learning_rate": 3.6335490682988664e-06, - "loss": 1.0757, - "step": 1829 - }, - { - "epoch": 0.22004448986953645, - "grad_norm": 1.7555873774744195, - "learning_rate": 3.63309951127462e-06, - "loss": 1.0618, - "step": 1830 - }, - { - "epoch": 0.22016473276017556, - "grad_norm": 2.063067733769566, - "learning_rate": 3.6326497065104757e-06, - "loss": 0.979, - "step": 1831 - }, - { - "epoch": 0.22028497565081465, - "grad_norm": 2.1153066240618754, - "learning_rate": 3.6321996540746697e-06, - "loss": 1.0081, - "step": 1832 - }, - { - "epoch": 0.22040521854145373, - "grad_norm": 1.6771907258646863, - "learning_rate": 3.6317493540354733e-06, - "loss": 1.0304, - "step": 1833 - }, - { - "epoch": 0.22052546143209284, - "grad_norm": 2.089659381470197, - "learning_rate": 3.6312988064611976e-06, - "loss": 1.0093, - "step": 1834 - }, - { - "epoch": 0.22064570432273192, - "grad_norm": 1.7240292706449765, - "learning_rate": 3.6308480114201896e-06, - "loss": 1.035, - "step": 1835 - }, - { - "epoch": 0.220765947213371, - "grad_norm": 1.575469625182033, - "learning_rate": 3.630396968980835e-06, - "loss": 0.9963, - "step": 1836 - }, - { - "epoch": 0.2208861901040101, - "grad_norm": 2.5727479289910873, - "learning_rate": 3.6299456792115575e-06, - "loss": 1.0761, - "step": 1837 - }, - { - "epoch": 0.2210064329946492, - "grad_norm": 1.6449859787827512, - "learning_rate": 3.629494142180815e-06, - "loss": 1.0421, - "step": 1838 - }, - { - "epoch": 0.22112667588528828, - "grad_norm": 1.9075731156961415, - "learning_rate": 3.6290423579571075e-06, - "loss": 1.0797, - "step": 1839 - }, - { - "epoch": 0.22124691877592736, - "grad_norm": 1.448948837603844, - "learning_rate": 3.6285903266089694e-06, - "loss": 1.0291, - "step": 1840 - }, - { - "epoch": 0.22136716166656648, - "grad_norm": 1.5959878805173617, - "learning_rate": 3.628138048204974e-06, - "loss": 1.0016, - "step": 1841 - }, - { - "epoch": 0.22148740455720556, - "grad_norm": 1.629308344476645, - "learning_rate": 3.6276855228137304e-06, - "loss": 1.0014, - "step": 1842 - }, - { - "epoch": 0.22160764744784464, - "grad_norm": 1.9309449211060725, - "learning_rate": 3.6272327505038874e-06, - "loss": 1.0458, - "step": 1843 - }, - { - "epoch": 0.22172789033848372, - "grad_norm": 1.6478555384252587, - "learning_rate": 3.626779731344131e-06, - "loss": 1.0118, - "step": 1844 - }, - { - "epoch": 0.22184813322912283, - "grad_norm": 2.0448352463817154, - "learning_rate": 3.6263264654031814e-06, - "loss": 1.0838, - "step": 1845 - }, - { - "epoch": 0.22196837611976192, - "grad_norm": 0.701520657725296, - "learning_rate": 3.6258729527498008e-06, - "loss": 0.8356, - "step": 1846 - }, - { - "epoch": 0.222088619010401, - "grad_norm": 5.334926347447585, - "learning_rate": 3.6254191934527854e-06, - "loss": 0.8641, - "step": 1847 - }, - { - "epoch": 0.2222088619010401, - "grad_norm": 2.632221280057492, - "learning_rate": 3.6249651875809715e-06, - "loss": 0.8706, - "step": 1848 - }, - { - "epoch": 0.2223291047916792, - "grad_norm": 2.1184314859712585, - "learning_rate": 3.62451093520323e-06, - "loss": 1.1204, - "step": 1849 - }, - { - "epoch": 0.22244934768231828, - "grad_norm": 2.350694261629469, - "learning_rate": 3.6240564363884714e-06, - "loss": 1.1254, - "step": 1850 - }, - { - "epoch": 0.2225695905729574, - "grad_norm": 1.6293000767451609, - "learning_rate": 3.623601691205643e-06, - "loss": 0.9339, - "step": 1851 - }, - { - "epoch": 0.22268983346359647, - "grad_norm": 1.8298304501143736, - "learning_rate": 3.623146699723729e-06, - "loss": 1.0485, - "step": 1852 - }, - { - "epoch": 0.22281007635423555, - "grad_norm": 1.692138302018292, - "learning_rate": 3.6226914620117507e-06, - "loss": 1.0155, - "step": 1853 - }, - { - "epoch": 0.22293031924487464, - "grad_norm": 2.691361602722928, - "learning_rate": 3.622235978138768e-06, - "loss": 1.0325, - "step": 1854 - }, - { - "epoch": 0.22305056213551375, - "grad_norm": 2.007626179759327, - "learning_rate": 3.621780248173877e-06, - "loss": 1.052, - "step": 1855 - }, - { - "epoch": 0.22317080502615283, - "grad_norm": 0.8697663872675295, - "learning_rate": 3.6213242721862125e-06, - "loss": 0.8858, - "step": 1856 - }, - { - "epoch": 0.2232910479167919, - "grad_norm": 1.4582413871480704, - "learning_rate": 3.620868050244945e-06, - "loss": 0.9841, - "step": 1857 - }, - { - "epoch": 0.22341129080743102, - "grad_norm": 1.6618761197043697, - "learning_rate": 3.6204115824192817e-06, - "loss": 1.0073, - "step": 1858 - }, - { - "epoch": 0.2235315336980701, - "grad_norm": 1.869659830048353, - "learning_rate": 3.619954868778471e-06, - "loss": 1.0024, - "step": 1859 - }, - { - "epoch": 0.2236517765887092, - "grad_norm": 1.6703681929379037, - "learning_rate": 3.6194979093917944e-06, - "loss": 1.0498, - "step": 1860 - }, - { - "epoch": 0.22377201947934827, - "grad_norm": 1.8369051716582752, - "learning_rate": 3.6190407043285724e-06, - "loss": 1.1127, - "step": 1861 - }, - { - "epoch": 0.22389226236998738, - "grad_norm": 2.93079406185556, - "learning_rate": 3.618583253658163e-06, - "loss": 0.972, - "step": 1862 - }, - { - "epoch": 0.22401250526062647, - "grad_norm": 1.7529577736477329, - "learning_rate": 3.618125557449961e-06, - "loss": 1.1025, - "step": 1863 - }, - { - "epoch": 0.22413274815126555, - "grad_norm": 1.7771808926932575, - "learning_rate": 3.6176676157733983e-06, - "loss": 1.0615, - "step": 1864 - }, - { - "epoch": 0.22425299104190466, - "grad_norm": 2.0975058730796823, - "learning_rate": 3.6172094286979443e-06, - "loss": 0.9801, - "step": 1865 - }, - { - "epoch": 0.22437323393254374, - "grad_norm": 1.5460666895074548, - "learning_rate": 3.6167509962931064e-06, - "loss": 1.0352, - "step": 1866 - }, - { - "epoch": 0.22449347682318282, - "grad_norm": 3.9232204606622587, - "learning_rate": 3.6162923186284276e-06, - "loss": 1.0002, - "step": 1867 - }, - { - "epoch": 0.2246137197138219, - "grad_norm": 2.096347165935192, - "learning_rate": 3.6158333957734888e-06, - "loss": 1.0927, - "step": 1868 - }, - { - "epoch": 0.22473396260446102, - "grad_norm": 1.8290021047522698, - "learning_rate": 3.6153742277979088e-06, - "loss": 1.06, - "step": 1869 - }, - { - "epoch": 0.2248542054951001, - "grad_norm": 2.0111697142172678, - "learning_rate": 3.6149148147713434e-06, - "loss": 1.0157, - "step": 1870 - }, - { - "epoch": 0.22497444838573918, - "grad_norm": 1.5029341292981797, - "learning_rate": 3.614455156763484e-06, - "loss": 1.093, - "step": 1871 - }, - { - "epoch": 0.2250946912763783, - "grad_norm": 2.7758908019066793, - "learning_rate": 3.613995253844061e-06, - "loss": 0.9446, - "step": 1872 - }, - { - "epoch": 0.22521493416701738, - "grad_norm": 1.8006272653540967, - "learning_rate": 3.6135351060828414e-06, - "loss": 1.0378, - "step": 1873 - }, - { - "epoch": 0.22533517705765646, - "grad_norm": 3.694238486480687, - "learning_rate": 3.6130747135496285e-06, - "loss": 0.9167, - "step": 1874 - }, - { - "epoch": 0.22545541994829554, - "grad_norm": 2.051379655997883, - "learning_rate": 3.6126140763142646e-06, - "loss": 0.894, - "step": 1875 - }, - { - "epoch": 0.22557566283893465, - "grad_norm": 2.312016204539603, - "learning_rate": 3.6121531944466275e-06, - "loss": 1.078, - "step": 1876 - }, - { - "epoch": 0.22569590572957374, - "grad_norm": 1.9807453752353212, - "learning_rate": 3.611692068016633e-06, - "loss": 1.0113, - "step": 1877 - }, - { - "epoch": 0.22581614862021282, - "grad_norm": 2.4030285363251145, - "learning_rate": 3.611230697094233e-06, - "loss": 0.9752, - "step": 1878 - }, - { - "epoch": 0.22593639151085193, - "grad_norm": 1.790793168990429, - "learning_rate": 3.6107690817494173e-06, - "loss": 1.1029, - "step": 1879 - }, - { - "epoch": 0.226056634401491, - "grad_norm": 2.3189713940486665, - "learning_rate": 3.6103072220522117e-06, - "loss": 0.9307, - "step": 1880 - }, - { - "epoch": 0.2261768772921301, - "grad_norm": 1.647899015439004, - "learning_rate": 3.609845118072682e-06, - "loss": 1.1385, - "step": 1881 - }, - { - "epoch": 0.2262971201827692, - "grad_norm": 1.644942642185904, - "learning_rate": 3.6093827698809276e-06, - "loss": 1.0263, - "step": 1882 - }, - { - "epoch": 0.2264173630734083, - "grad_norm": 2.0507696242229994, - "learning_rate": 3.6089201775470864e-06, - "loss": 1.0735, - "step": 1883 - }, - { - "epoch": 0.22653760596404737, - "grad_norm": 1.4111622104420483, - "learning_rate": 3.6084573411413334e-06, - "loss": 1.0106, - "step": 1884 - }, - { - "epoch": 0.22665784885468646, - "grad_norm": 1.922365389651498, - "learning_rate": 3.607994260733881e-06, - "loss": 1.0434, - "step": 1885 - }, - { - "epoch": 0.22677809174532557, - "grad_norm": 1.6887230855856856, - "learning_rate": 3.6075309363949776e-06, - "loss": 0.9756, - "step": 1886 - }, - { - "epoch": 0.22689833463596465, - "grad_norm": 1.836476088719501, - "learning_rate": 3.6070673681949094e-06, - "loss": 1.0372, - "step": 1887 - }, - { - "epoch": 0.22701857752660373, - "grad_norm": 1.567677122631294, - "learning_rate": 3.606603556203999e-06, - "loss": 1.0389, - "step": 1888 - }, - { - "epoch": 0.22713882041724284, - "grad_norm": 1.7055832506817437, - "learning_rate": 3.6061395004926066e-06, - "loss": 1.0671, - "step": 1889 - }, - { - "epoch": 0.22725906330788193, - "grad_norm": 1.9805705930267676, - "learning_rate": 3.605675201131129e-06, - "loss": 1.0819, - "step": 1890 - }, - { - "epoch": 0.227379306198521, - "grad_norm": 2.1381939079556838, - "learning_rate": 3.60521065819e-06, - "loss": 1.0317, - "step": 1891 - }, - { - "epoch": 0.2274995490891601, - "grad_norm": 2.2447045848503744, - "learning_rate": 3.60474587173969e-06, - "loss": 1.1017, - "step": 1892 - }, - { - "epoch": 0.2276197919797992, - "grad_norm": 1.815188157388212, - "learning_rate": 3.6042808418507084e-06, - "loss": 1.0607, - "step": 1893 - }, - { - "epoch": 0.22774003487043828, - "grad_norm": 3.0779499449548333, - "learning_rate": 3.6038155685935976e-06, - "loss": 1.0012, - "step": 1894 - }, - { - "epoch": 0.22786027776107737, - "grad_norm": 1.732295809075806, - "learning_rate": 3.6033500520389404e-06, - "loss": 0.932, - "step": 1895 - }, - { - "epoch": 0.22798052065171648, - "grad_norm": 0.7958585951492265, - "learning_rate": 3.6028842922573553e-06, - "loss": 0.9172, - "step": 1896 - }, - { - "epoch": 0.22810076354235556, - "grad_norm": 0.8206638492008542, - "learning_rate": 3.602418289319497e-06, - "loss": 0.8944, - "step": 1897 - }, - { - "epoch": 0.22822100643299464, - "grad_norm": 1.8243783231615445, - "learning_rate": 3.601952043296059e-06, - "loss": 0.9606, - "step": 1898 - }, - { - "epoch": 0.22834124932363373, - "grad_norm": 1.8392810649522067, - "learning_rate": 3.6014855542577696e-06, - "loss": 1.0383, - "step": 1899 - }, - { - "epoch": 0.22846149221427284, - "grad_norm": 1.633429816698505, - "learning_rate": 3.6010188222753943e-06, - "loss": 1.0728, - "step": 1900 - }, - { - "epoch": 0.22858173510491192, - "grad_norm": 0.9492010421043106, - "learning_rate": 3.6005518474197372e-06, - "loss": 0.9046, - "step": 1901 - }, - { - "epoch": 0.228701977995551, - "grad_norm": 1.6253740907106333, - "learning_rate": 3.6000846297616373e-06, - "loss": 1.0217, - "step": 1902 - }, - { - "epoch": 0.22882222088619011, - "grad_norm": 1.902396888695716, - "learning_rate": 3.5996171693719717e-06, - "loss": 0.9592, - "step": 1903 - }, - { - "epoch": 0.2289424637768292, - "grad_norm": 0.8718274024905555, - "learning_rate": 3.5991494663216528e-06, - "loss": 0.9013, - "step": 1904 - }, - { - "epoch": 0.22906270666746828, - "grad_norm": 1.7895117449698763, - "learning_rate": 3.5986815206816314e-06, - "loss": 1.1043, - "step": 1905 - }, - { - "epoch": 0.2291829495581074, - "grad_norm": 1.628227183820101, - "learning_rate": 3.598213332522895e-06, - "loss": 0.9766, - "step": 1906 - }, - { - "epoch": 0.22930319244874647, - "grad_norm": 1.8699289411415674, - "learning_rate": 3.597744901916466e-06, - "loss": 1.0045, - "step": 1907 - }, - { - "epoch": 0.22942343533938556, - "grad_norm": 1.7554451153551602, - "learning_rate": 3.5972762289334058e-06, - "loss": 0.9937, - "step": 1908 - }, - { - "epoch": 0.22954367823002464, - "grad_norm": 1.8279979042149723, - "learning_rate": 3.5968073136448116e-06, - "loss": 1.0745, - "step": 1909 - }, - { - "epoch": 0.22966392112066375, - "grad_norm": 1.467993683575338, - "learning_rate": 3.596338156121818e-06, - "loss": 1.1344, - "step": 1910 - }, - { - "epoch": 0.22978416401130283, - "grad_norm": 0.7823938388534096, - "learning_rate": 3.595868756435595e-06, - "loss": 0.8523, - "step": 1911 - }, - { - "epoch": 0.22990440690194192, - "grad_norm": 1.9398523632385298, - "learning_rate": 3.5953991146573504e-06, - "loss": 1.0241, - "step": 1912 - }, - { - "epoch": 0.23002464979258103, - "grad_norm": 2.2868757511376425, - "learning_rate": 3.5949292308583294e-06, - "loss": 1.0542, - "step": 1913 - }, - { - "epoch": 0.2301448926832201, - "grad_norm": 2.073808245079477, - "learning_rate": 3.594459105109811e-06, - "loss": 1.0351, - "step": 1914 - }, - { - "epoch": 0.2302651355738592, - "grad_norm": 1.5605518403051875, - "learning_rate": 3.593988737483115e-06, - "loss": 1.0372, - "step": 1915 - }, - { - "epoch": 0.23038537846449827, - "grad_norm": 1.7406046566553428, - "learning_rate": 3.5935181280495947e-06, - "loss": 1.0053, - "step": 1916 - }, - { - "epoch": 0.23050562135513739, - "grad_norm": 0.8925380238679697, - "learning_rate": 3.5930472768806412e-06, - "loss": 0.8185, - "step": 1917 - }, - { - "epoch": 0.23062586424577647, - "grad_norm": 1.7021563513917075, - "learning_rate": 3.5925761840476826e-06, - "loss": 1.0001, - "step": 1918 - }, - { - "epoch": 0.23074610713641555, - "grad_norm": 1.8224225564418108, - "learning_rate": 3.592104849622183e-06, - "loss": 1.0524, - "step": 1919 - }, - { - "epoch": 0.23086635002705466, - "grad_norm": 1.3277470219125678, - "learning_rate": 3.591633273675644e-06, - "loss": 0.9596, - "step": 1920 - }, - { - "epoch": 0.23098659291769374, - "grad_norm": 0.9725627930871598, - "learning_rate": 3.591161456279602e-06, - "loss": 0.8583, - "step": 1921 - }, - { - "epoch": 0.23110683580833283, - "grad_norm": 1.340863209793234, - "learning_rate": 3.590689397505633e-06, - "loss": 1.0258, - "step": 1922 - }, - { - "epoch": 0.2312270786989719, - "grad_norm": 1.5467547745656394, - "learning_rate": 3.590217097425347e-06, - "loss": 1.094, - "step": 1923 - }, - { - "epoch": 0.23134732158961102, - "grad_norm": 1.9032874929026482, - "learning_rate": 3.589744556110391e-06, - "loss": 0.9407, - "step": 1924 - }, - { - "epoch": 0.2314675644802501, - "grad_norm": 1.545089076045271, - "learning_rate": 3.58927177363245e-06, - "loss": 1.0756, - "step": 1925 - }, - { - "epoch": 0.2315878073708892, - "grad_norm": 1.9198312825341206, - "learning_rate": 3.5887987500632447e-06, - "loss": 0.9646, - "step": 1926 - }, - { - "epoch": 0.2317080502615283, - "grad_norm": 1.6682591777304963, - "learning_rate": 3.5883254854745325e-06, - "loss": 1.0593, - "step": 1927 - }, - { - "epoch": 0.23182829315216738, - "grad_norm": 1.7878246439384806, - "learning_rate": 3.587851979938107e-06, - "loss": 0.9864, - "step": 1928 - }, - { - "epoch": 0.23194853604280646, - "grad_norm": 1.7538426988808324, - "learning_rate": 3.5873782335257985e-06, - "loss": 0.9995, - "step": 1929 - }, - { - "epoch": 0.23206877893344555, - "grad_norm": 1.8098145683944131, - "learning_rate": 3.5869042463094744e-06, - "loss": 1.0186, - "step": 1930 - }, - { - "epoch": 0.23218902182408466, - "grad_norm": 1.7953497342464133, - "learning_rate": 3.586430018361038e-06, - "loss": 0.9968, - "step": 1931 - }, - { - "epoch": 0.23230926471472374, - "grad_norm": 2.0665737725986877, - "learning_rate": 3.5859555497524283e-06, - "loss": 0.9946, - "step": 1932 - }, - { - "epoch": 0.23242950760536282, - "grad_norm": 2.2001708224474794, - "learning_rate": 3.5854808405556237e-06, - "loss": 1.142, - "step": 1933 - }, - { - "epoch": 0.23254975049600193, - "grad_norm": 2.311634932916492, - "learning_rate": 3.5850058908426355e-06, - "loss": 0.9873, - "step": 1934 - }, - { - "epoch": 0.23266999338664102, - "grad_norm": 1.8917426596794422, - "learning_rate": 3.584530700685514e-06, - "loss": 1.0766, - "step": 1935 - }, - { - "epoch": 0.2327902362772801, - "grad_norm": 1.9700512899702702, - "learning_rate": 3.5840552701563448e-06, - "loss": 1.1233, - "step": 1936 - }, - { - "epoch": 0.2329104791679192, - "grad_norm": 2.6034546349171857, - "learning_rate": 3.5835795993272513e-06, - "loss": 1.0488, - "step": 1937 - }, - { - "epoch": 0.2330307220585583, - "grad_norm": 1.9829553046179091, - "learning_rate": 3.583103688270391e-06, - "loss": 0.9428, - "step": 1938 - }, - { - "epoch": 0.23315096494919738, - "grad_norm": 1.9100263742553274, - "learning_rate": 3.58262753705796e-06, - "loss": 1.1133, - "step": 1939 - }, - { - "epoch": 0.23327120783983646, - "grad_norm": 0.7617569742637793, - "learning_rate": 3.5821511457621902e-06, - "loss": 0.8048, - "step": 1940 - }, - { - "epoch": 0.23339145073047557, - "grad_norm": 3.5412546404354717, - "learning_rate": 3.5816745144553497e-06, - "loss": 1.0381, - "step": 1941 - }, - { - "epoch": 0.23351169362111465, - "grad_norm": 2.064035710890807, - "learning_rate": 3.5811976432097424e-06, - "loss": 0.9833, - "step": 1942 - }, - { - "epoch": 0.23363193651175373, - "grad_norm": 1.8581848182121263, - "learning_rate": 3.58072053209771e-06, - "loss": 1.0773, - "step": 1943 - }, - { - "epoch": 0.23375217940239285, - "grad_norm": 2.3634421553888085, - "learning_rate": 3.5802431811916296e-06, - "loss": 1.0227, - "step": 1944 - }, - { - "epoch": 0.23387242229303193, - "grad_norm": 1.6337500258010127, - "learning_rate": 3.579765590563916e-06, - "loss": 1.0277, - "step": 1945 - }, - { - "epoch": 0.233992665183671, - "grad_norm": 1.7586417043783142, - "learning_rate": 3.579287760287017e-06, - "loss": 1.039, - "step": 1946 - }, - { - "epoch": 0.2341129080743101, - "grad_norm": 1.559030266155399, - "learning_rate": 3.578809690433421e-06, - "loss": 0.9583, - "step": 1947 - }, - { - "epoch": 0.2342331509649492, - "grad_norm": 1.9408948136393958, - "learning_rate": 3.578331381075651e-06, - "loss": 1.0359, - "step": 1948 - }, - { - "epoch": 0.2343533938555883, - "grad_norm": 1.9592748029238247, - "learning_rate": 3.5778528322862646e-06, - "loss": 0.9231, - "step": 1949 - }, - { - "epoch": 0.23447363674622737, - "grad_norm": 1.6214266148045848, - "learning_rate": 3.5773740441378585e-06, - "loss": 1.0929, - "step": 1950 - }, - { - "epoch": 0.23459387963686648, - "grad_norm": 1.99074151912283, - "learning_rate": 3.5768950167030633e-06, - "loss": 0.9649, - "step": 1951 - }, - { - "epoch": 0.23471412252750556, - "grad_norm": 1.7697014800457032, - "learning_rate": 3.576415750054548e-06, - "loss": 1.0134, - "step": 1952 - }, - { - "epoch": 0.23483436541814465, - "grad_norm": 1.7367525043373397, - "learning_rate": 3.5759362442650172e-06, - "loss": 1.0791, - "step": 1953 - }, - { - "epoch": 0.23495460830878373, - "grad_norm": 2.0166741531275147, - "learning_rate": 3.5754564994072113e-06, - "loss": 1.0781, - "step": 1954 - }, - { - "epoch": 0.23507485119942284, - "grad_norm": 2.119798015113216, - "learning_rate": 3.5749765155539067e-06, - "loss": 0.8354, - "step": 1955 - }, - { - "epoch": 0.23519509409006192, - "grad_norm": 2.000190964165032, - "learning_rate": 3.574496292777917e-06, - "loss": 1.1535, - "step": 1956 - }, - { - "epoch": 0.235315336980701, - "grad_norm": 1.7705540006644245, - "learning_rate": 3.574015831152092e-06, - "loss": 0.9413, - "step": 1957 - }, - { - "epoch": 0.23543557987134012, - "grad_norm": 2.127888455457576, - "learning_rate": 3.573535130749316e-06, - "loss": 1.0705, - "step": 1958 - }, - { - "epoch": 0.2355558227619792, - "grad_norm": 1.5636487933858443, - "learning_rate": 3.5730541916425127e-06, - "loss": 0.9682, - "step": 1959 - }, - { - "epoch": 0.23567606565261828, - "grad_norm": 1.8239678612646637, - "learning_rate": 3.572573013904639e-06, - "loss": 1.0878, - "step": 1960 - }, - { - "epoch": 0.2357963085432574, - "grad_norm": 2.0616196574240946, - "learning_rate": 3.572091597608689e-06, - "loss": 1.1493, - "step": 1961 - }, - { - "epoch": 0.23591655143389648, - "grad_norm": 2.3543854558321136, - "learning_rate": 3.571609942827694e-06, - "loss": 0.9645, - "step": 1962 - }, - { - "epoch": 0.23603679432453556, - "grad_norm": 1.606724751294421, - "learning_rate": 3.57112804963472e-06, - "loss": 1.1019, - "step": 1963 - }, - { - "epoch": 0.23615703721517464, - "grad_norm": 1.9767795488805615, - "learning_rate": 3.57064591810287e-06, - "loss": 0.9949, - "step": 1964 - }, - { - "epoch": 0.23627728010581375, - "grad_norm": 2.1103906517521653, - "learning_rate": 3.570163548305284e-06, - "loss": 1.0339, - "step": 1965 - }, - { - "epoch": 0.23639752299645284, - "grad_norm": 2.0570283616650937, - "learning_rate": 3.569680940315135e-06, - "loss": 0.9372, - "step": 1966 - }, - { - "epoch": 0.23651776588709192, - "grad_norm": 1.6626429704269, - "learning_rate": 3.5691980942056356e-06, - "loss": 1.0452, - "step": 1967 - }, - { - "epoch": 0.23663800877773103, - "grad_norm": 1.5829497508485695, - "learning_rate": 3.5687150100500332e-06, - "loss": 1.0188, - "step": 1968 - }, - { - "epoch": 0.2367582516683701, - "grad_norm": 1.7218371323092596, - "learning_rate": 3.568231687921611e-06, - "loss": 0.973, - "step": 1969 - }, - { - "epoch": 0.2368784945590092, - "grad_norm": 2.0261816321293047, - "learning_rate": 3.5677481278936883e-06, - "loss": 1.0396, - "step": 1970 - }, - { - "epoch": 0.23699873744964828, - "grad_norm": 0.8183713561571676, - "learning_rate": 3.5672643300396214e-06, - "loss": 0.825, - "step": 1971 - }, - { - "epoch": 0.2371189803402874, - "grad_norm": 2.00576939515654, - "learning_rate": 3.566780294432802e-06, - "loss": 0.9175, - "step": 1972 - }, - { - "epoch": 0.23723922323092647, - "grad_norm": 1.855735615060548, - "learning_rate": 3.566296021146657e-06, - "loss": 0.9688, - "step": 1973 - }, - { - "epoch": 0.23735946612156555, - "grad_norm": 1.4576500608950922, - "learning_rate": 3.565811510254652e-06, - "loss": 0.9593, - "step": 1974 - }, - { - "epoch": 0.23747970901220466, - "grad_norm": 0.7840554030801536, - "learning_rate": 3.5653267618302845e-06, - "loss": 0.8244, - "step": 1975 - }, - { - "epoch": 0.23759995190284375, - "grad_norm": 1.7071978664484966, - "learning_rate": 3.564841775947093e-06, - "loss": 1.0856, - "step": 1976 - }, - { - "epoch": 0.23772019479348283, - "grad_norm": 1.9934929852463896, - "learning_rate": 3.5643565526786475e-06, - "loss": 0.9901, - "step": 1977 - }, - { - "epoch": 0.2378404376841219, - "grad_norm": 1.5629454931321032, - "learning_rate": 3.5638710920985574e-06, - "loss": 1.0028, - "step": 1978 - }, - { - "epoch": 0.23796068057476102, - "grad_norm": 1.7090790989373665, - "learning_rate": 3.5633853942804655e-06, - "loss": 1.0486, - "step": 1979 - }, - { - "epoch": 0.2380809234654001, - "grad_norm": 2.3501919525967008, - "learning_rate": 3.5628994592980527e-06, - "loss": 1.002, - "step": 1980 - }, - { - "epoch": 0.2382011663560392, - "grad_norm": 2.3871534485478856, - "learning_rate": 3.562413287225034e-06, - "loss": 0.9347, - "step": 1981 - }, - { - "epoch": 0.2383214092466783, - "grad_norm": 2.0428294030363054, - "learning_rate": 3.5619268781351623e-06, - "loss": 1.1176, - "step": 1982 - }, - { - "epoch": 0.23844165213731738, - "grad_norm": 1.72165877686004, - "learning_rate": 3.5614402321022256e-06, - "loss": 1.0007, - "step": 1983 - }, - { - "epoch": 0.23856189502795647, - "grad_norm": 1.8376789680570658, - "learning_rate": 3.5609533492000463e-06, - "loss": 1.1012, - "step": 1984 - }, - { - "epoch": 0.23868213791859555, - "grad_norm": 2.1584950797648106, - "learning_rate": 3.560466229502485e-06, - "loss": 1.011, - "step": 1985 - }, - { - "epoch": 0.23880238080923466, - "grad_norm": 1.93991584290513, - "learning_rate": 3.5599788730834384e-06, - "loss": 1.124, - "step": 1986 - }, - { - "epoch": 0.23892262369987374, - "grad_norm": 2.3010134620256126, - "learning_rate": 3.559491280016836e-06, - "loss": 1.0315, - "step": 1987 - }, - { - "epoch": 0.23904286659051283, - "grad_norm": 1.643638676691998, - "learning_rate": 3.5590034503766465e-06, - "loss": 0.9454, - "step": 1988 - }, - { - "epoch": 0.23916310948115194, - "grad_norm": 2.2078962673270253, - "learning_rate": 3.558515384236874e-06, - "loss": 1.0456, - "step": 1989 - }, - { - "epoch": 0.23928335237179102, - "grad_norm": 1.5910046437289829, - "learning_rate": 3.558027081671556e-06, - "loss": 1.064, - "step": 1990 - }, - { - "epoch": 0.2394035952624301, - "grad_norm": 1.831203314280479, - "learning_rate": 3.557538542754769e-06, - "loss": 0.9207, - "step": 1991 - }, - { - "epoch": 0.2395238381530692, - "grad_norm": 1.5437780647970059, - "learning_rate": 3.557049767560623e-06, - "loss": 0.8941, - "step": 1992 - }, - { - "epoch": 0.2396440810437083, - "grad_norm": 1.9768416618457108, - "learning_rate": 3.5565607561632655e-06, - "loss": 1.0916, - "step": 1993 - }, - { - "epoch": 0.23976432393434738, - "grad_norm": 1.833434326106709, - "learning_rate": 3.5560715086368787e-06, - "loss": 1.014, - "step": 1994 - }, - { - "epoch": 0.23988456682498646, - "grad_norm": 1.7793809554734303, - "learning_rate": 3.5555820250556816e-06, - "loss": 1.0541, - "step": 1995 - }, - { - "epoch": 0.24000480971562557, - "grad_norm": 2.031617351472756, - "learning_rate": 3.5550923054939278e-06, - "loss": 0.9216, - "step": 1996 - }, - { - "epoch": 0.24012505260626466, - "grad_norm": 1.6581420168271261, - "learning_rate": 3.5546023500259083e-06, - "loss": 0.9724, - "step": 1997 - }, - { - "epoch": 0.24024529549690374, - "grad_norm": 2.0560784219042483, - "learning_rate": 3.5541121587259477e-06, - "loss": 1.0363, - "step": 1998 - }, - { - "epoch": 0.24036553838754285, - "grad_norm": 0.8022829814656396, - "learning_rate": 3.553621731668408e-06, - "loss": 0.8212, - "step": 1999 - }, - { - "epoch": 0.24048578127818193, - "grad_norm": 1.5013541625872107, - "learning_rate": 3.553131068927688e-06, - "loss": 1.0652, - "step": 2000 - }, - { - "epoch": 0.24060602416882101, - "grad_norm": 1.5677978726625275, - "learning_rate": 3.552640170578219e-06, - "loss": 1.035, - "step": 2001 - }, - { - "epoch": 0.2407262670594601, - "grad_norm": 1.9226278055715085, - "learning_rate": 3.5521490366944703e-06, - "loss": 1.0053, - "step": 2002 - }, - { - "epoch": 0.2408465099500992, - "grad_norm": 3.1341580286357473, - "learning_rate": 3.5516576673509474e-06, - "loss": 1.033, - "step": 2003 - }, - { - "epoch": 0.2409667528407383, - "grad_norm": 1.5099426402635254, - "learning_rate": 3.5511660626221896e-06, - "loss": 1.0915, - "step": 2004 - }, - { - "epoch": 0.24108699573137737, - "grad_norm": 2.20250460499727, - "learning_rate": 3.5506742225827744e-06, - "loss": 1.1133, - "step": 2005 - }, - { - "epoch": 0.24120723862201648, - "grad_norm": 2.0081301189301564, - "learning_rate": 3.5501821473073116e-06, - "loss": 1.1311, - "step": 2006 - }, - { - "epoch": 0.24132748151265557, - "grad_norm": 1.999742968255399, - "learning_rate": 3.54968983687045e-06, - "loss": 1.0981, - "step": 2007 - }, - { - "epoch": 0.24144772440329465, - "grad_norm": 2.0000706898594394, - "learning_rate": 3.549197291346872e-06, - "loss": 1.1262, - "step": 2008 - }, - { - "epoch": 0.24156796729393373, - "grad_norm": 1.951533164788091, - "learning_rate": 3.548704510811297e-06, - "loss": 1.0264, - "step": 2009 - }, - { - "epoch": 0.24168821018457284, - "grad_norm": 2.0792376759035336, - "learning_rate": 3.5482114953384787e-06, - "loss": 0.9732, - "step": 2010 - }, - { - "epoch": 0.24180845307521193, - "grad_norm": 1.798303583563133, - "learning_rate": 3.5477182450032077e-06, - "loss": 1.0716, - "step": 2011 - }, - { - "epoch": 0.241928695965851, - "grad_norm": 1.854177632102974, - "learning_rate": 3.5472247598803097e-06, - "loss": 1.0653, - "step": 2012 - }, - { - "epoch": 0.24204893885649012, - "grad_norm": 2.001534707608264, - "learning_rate": 3.546731040044645e-06, - "loss": 1.0884, - "step": 2013 - }, - { - "epoch": 0.2421691817471292, - "grad_norm": 1.6118435491930336, - "learning_rate": 3.546237085571112e-06, - "loss": 0.9837, - "step": 2014 - }, - { - "epoch": 0.24228942463776829, - "grad_norm": 1.741998910318903, - "learning_rate": 3.5457428965346425e-06, - "loss": 0.9525, - "step": 2015 - }, - { - "epoch": 0.2424096675284074, - "grad_norm": 1.666272625436868, - "learning_rate": 3.545248473010205e-06, - "loss": 0.9749, - "step": 2016 - }, - { - "epoch": 0.24252991041904648, - "grad_norm": 1.5901429740634194, - "learning_rate": 3.544753815072802e-06, - "loss": 1.1012, - "step": 2017 - }, - { - "epoch": 0.24265015330968556, - "grad_norm": 1.7591627572270836, - "learning_rate": 3.544258922797474e-06, - "loss": 1.1152, - "step": 2018 - }, - { - "epoch": 0.24277039620032465, - "grad_norm": 1.3719850343966802, - "learning_rate": 3.543763796259295e-06, - "loss": 1.0092, - "step": 2019 - }, - { - "epoch": 0.24289063909096376, - "grad_norm": 1.7796634501639936, - "learning_rate": 3.5432684355333754e-06, - "loss": 1.1396, - "step": 2020 - }, - { - "epoch": 0.24301088198160284, - "grad_norm": 2.9153868092411956, - "learning_rate": 3.5427728406948613e-06, - "loss": 0.9886, - "step": 2021 - }, - { - "epoch": 0.24313112487224192, - "grad_norm": 0.77054312544089, - "learning_rate": 3.542277011818934e-06, - "loss": 0.835, - "step": 2022 - }, - { - "epoch": 0.24325136776288103, - "grad_norm": 2.0843085740241265, - "learning_rate": 3.5417809489808104e-06, - "loss": 0.9661, - "step": 2023 - }, - { - "epoch": 0.24337161065352012, - "grad_norm": 1.6143592761046008, - "learning_rate": 3.5412846522557422e-06, - "loss": 0.9529, - "step": 2024 - }, - { - "epoch": 0.2434918535441592, - "grad_norm": 1.9958581953418149, - "learning_rate": 3.540788121719018e-06, - "loss": 0.9721, - "step": 2025 - }, - { - "epoch": 0.24361209643479828, - "grad_norm": 1.939402353903369, - "learning_rate": 3.5402913574459604e-06, - "loss": 1.0551, - "step": 2026 - }, - { - "epoch": 0.2437323393254374, - "grad_norm": 1.772326957746174, - "learning_rate": 3.5397943595119297e-06, - "loss": 1.0838, - "step": 2027 - }, - { - "epoch": 0.24385258221607647, - "grad_norm": 2.381140851099972, - "learning_rate": 3.5392971279923177e-06, - "loss": 1.0014, - "step": 2028 - }, - { - "epoch": 0.24397282510671556, - "grad_norm": 2.3878596649084876, - "learning_rate": 3.5387996629625557e-06, - "loss": 1.0601, - "step": 2029 - }, - { - "epoch": 0.24409306799735467, - "grad_norm": 0.8668004796149595, - "learning_rate": 3.5383019644981083e-06, - "loss": 0.8305, - "step": 2030 - }, - { - "epoch": 0.24421331088799375, - "grad_norm": 2.11998606047456, - "learning_rate": 3.5378040326744763e-06, - "loss": 0.9553, - "step": 2031 - }, - { - "epoch": 0.24433355377863283, - "grad_norm": 3.143076229509758, - "learning_rate": 3.5373058675671946e-06, - "loss": 1.0912, - "step": 2032 - }, - { - "epoch": 0.24445379666927192, - "grad_norm": 2.247976346736562, - "learning_rate": 3.536807469251836e-06, - "loss": 0.9611, - "step": 2033 - }, - { - "epoch": 0.24457403955991103, - "grad_norm": 1.7796004839247639, - "learning_rate": 3.5363088378040055e-06, - "loss": 1.0482, - "step": 2034 - }, - { - "epoch": 0.2446942824505501, - "grad_norm": 0.7792484201458706, - "learning_rate": 3.5358099732993463e-06, - "loss": 0.9138, - "step": 2035 - }, - { - "epoch": 0.2448145253411892, - "grad_norm": 1.7713664056597596, - "learning_rate": 3.535310875813535e-06, - "loss": 1.123, - "step": 2036 - }, - { - "epoch": 0.2449347682318283, - "grad_norm": 1.8280990468326703, - "learning_rate": 3.5348115454222843e-06, - "loss": 1.0455, - "step": 2037 - }, - { - "epoch": 0.2450550111224674, - "grad_norm": 6.601060909198848, - "learning_rate": 3.5343119822013425e-06, - "loss": 1.093, - "step": 2038 - }, - { - "epoch": 0.24517525401310647, - "grad_norm": 1.6233018658752, - "learning_rate": 3.533812186226493e-06, - "loss": 0.9971, - "step": 2039 - }, - { - "epoch": 0.24529549690374555, - "grad_norm": 1.6663024583873598, - "learning_rate": 3.5333121575735545e-06, - "loss": 0.993, - "step": 2040 - }, - { - "epoch": 0.24541573979438466, - "grad_norm": 1.7429009588951692, - "learning_rate": 3.532811896318381e-06, - "loss": 0.9842, - "step": 2041 - }, - { - "epoch": 0.24553598268502375, - "grad_norm": 2.154456124943235, - "learning_rate": 3.5323114025368615e-06, - "loss": 1.0452, - "step": 2042 - }, - { - "epoch": 0.24565622557566283, - "grad_norm": 2.1487874803719476, - "learning_rate": 3.53181067630492e-06, - "loss": 1.0392, - "step": 2043 - }, - { - "epoch": 0.24577646846630194, - "grad_norm": 1.8034560060055622, - "learning_rate": 3.5313097176985175e-06, - "loss": 0.9937, - "step": 2044 - }, - { - "epoch": 0.24589671135694102, - "grad_norm": 1.7541863232717156, - "learning_rate": 3.5308085267936482e-06, - "loss": 1.0321, - "step": 2045 - }, - { - "epoch": 0.2460169542475801, - "grad_norm": 1.6629286650808883, - "learning_rate": 3.530307103666342e-06, - "loss": 1.1319, - "step": 2046 - }, - { - "epoch": 0.24613719713821922, - "grad_norm": 4.039956796245146, - "learning_rate": 3.5298054483926658e-06, - "loss": 1.0349, - "step": 2047 - }, - { - "epoch": 0.2462574400288583, - "grad_norm": 1.9597805303197253, - "learning_rate": 3.5293035610487187e-06, - "loss": 1.0616, - "step": 2048 - }, - { - "epoch": 0.24637768291949738, - "grad_norm": 0.7279718327994327, - "learning_rate": 3.5288014417106374e-06, - "loss": 0.8755, - "step": 2049 - }, - { - "epoch": 0.24649792581013646, - "grad_norm": 2.132419536007201, - "learning_rate": 3.528299090454593e-06, - "loss": 0.9813, - "step": 2050 - }, - { - "epoch": 0.24661816870077558, - "grad_norm": 3.1841666051416526, - "learning_rate": 3.527796507356792e-06, - "loss": 1.0503, - "step": 2051 - }, - { - "epoch": 0.24673841159141466, - "grad_norm": 2.3935979172325252, - "learning_rate": 3.527293692493475e-06, - "loss": 1.1325, - "step": 2052 - }, - { - "epoch": 0.24685865448205374, - "grad_norm": 2.645378754580899, - "learning_rate": 3.52679064594092e-06, - "loss": 0.9713, - "step": 2053 - }, - { - "epoch": 0.24697889737269285, - "grad_norm": 1.9281822851441335, - "learning_rate": 3.5262873677754375e-06, - "loss": 0.9749, - "step": 2054 - }, - { - "epoch": 0.24709914026333193, - "grad_norm": 1.5719473759576532, - "learning_rate": 3.5257838580733745e-06, - "loss": 1.0349, - "step": 2055 - }, - { - "epoch": 0.24721938315397102, - "grad_norm": 1.9413520912890312, - "learning_rate": 3.5252801169111138e-06, - "loss": 1.1029, - "step": 2056 - }, - { - "epoch": 0.2473396260446101, - "grad_norm": 2.752531273826169, - "learning_rate": 3.524776144365072e-06, - "loss": 1.0237, - "step": 2057 - }, - { - "epoch": 0.2474598689352492, - "grad_norm": 1.4249552803384236, - "learning_rate": 3.5242719405117016e-06, - "loss": 1.0256, - "step": 2058 - }, - { - "epoch": 0.2475801118258883, - "grad_norm": 2.4706618233949085, - "learning_rate": 3.5237675054274893e-06, - "loss": 0.9851, - "step": 2059 - }, - { - "epoch": 0.24770035471652738, - "grad_norm": 1.7014365970211354, - "learning_rate": 3.5232628391889584e-06, - "loss": 1.0285, - "step": 2060 - }, - { - "epoch": 0.2478205976071665, - "grad_norm": 3.2574235789190142, - "learning_rate": 3.522757941872666e-06, - "loss": 0.8703, - "step": 2061 - }, - { - "epoch": 0.24794084049780557, - "grad_norm": 1.4429754384066025, - "learning_rate": 3.5222528135552042e-06, - "loss": 1.0586, - "step": 2062 - }, - { - "epoch": 0.24806108338844465, - "grad_norm": 1.6199586870082598, - "learning_rate": 3.521747454313201e-06, - "loss": 1.0365, - "step": 2063 - }, - { - "epoch": 0.24818132627908374, - "grad_norm": 1.8881241833692324, - "learning_rate": 3.521241864223319e-06, - "loss": 0.8949, - "step": 2064 - }, - { - "epoch": 0.24830156916972285, - "grad_norm": 0.7822574223635302, - "learning_rate": 3.5207360433622552e-06, - "loss": 0.8632, - "step": 2065 - }, - { - "epoch": 0.24842181206036193, - "grad_norm": 1.4353748036754606, - "learning_rate": 3.5202299918067437e-06, - "loss": 0.9714, - "step": 2066 - }, - { - "epoch": 0.248542054951001, - "grad_norm": 2.058349015746304, - "learning_rate": 3.519723709633551e-06, - "loss": 0.9221, - "step": 2067 - }, - { - "epoch": 0.24866229784164012, - "grad_norm": 1.8160123008193791, - "learning_rate": 3.519217196919479e-06, - "loss": 1.0558, - "step": 2068 - }, - { - "epoch": 0.2487825407322792, - "grad_norm": 1.714611759004236, - "learning_rate": 3.518710453741367e-06, - "loss": 0.9612, - "step": 2069 - }, - { - "epoch": 0.2489027836229183, - "grad_norm": 1.7877421608530146, - "learning_rate": 3.518203480176086e-06, - "loss": 0.9048, - "step": 2070 - }, - { - "epoch": 0.2490230265135574, - "grad_norm": 1.503728999802222, - "learning_rate": 3.517696276300545e-06, - "loss": 1.0254, - "step": 2071 - }, - { - "epoch": 0.24914326940419648, - "grad_norm": 2.64209981510027, - "learning_rate": 3.517188842191685e-06, - "loss": 0.9276, - "step": 2072 - }, - { - "epoch": 0.24926351229483557, - "grad_norm": 1.6313340644076109, - "learning_rate": 3.5166811779264837e-06, - "loss": 0.9759, - "step": 2073 - }, - { - "epoch": 0.24938375518547465, - "grad_norm": 1.9340234144227417, - "learning_rate": 3.5161732835819545e-06, - "loss": 1.0103, - "step": 2074 - }, - { - "epoch": 0.24950399807611376, - "grad_norm": 1.6213361076364707, - "learning_rate": 3.515665159235143e-06, - "loss": 1.0585, - "step": 2075 - }, - { - "epoch": 0.24962424096675284, - "grad_norm": 1.60627574083654, - "learning_rate": 3.5151568049631318e-06, - "loss": 0.9825, - "step": 2076 - }, - { - "epoch": 0.24974448385739192, - "grad_norm": 1.6200355109102107, - "learning_rate": 3.5146482208430385e-06, - "loss": 1.0259, - "step": 2077 - }, - { - "epoch": 0.24986472674803104, - "grad_norm": 1.8781361736657076, - "learning_rate": 3.514139406952014e-06, - "loss": 0.9027, - "step": 2078 - }, - { - "epoch": 0.24998496963867012, - "grad_norm": 1.6020503347873503, - "learning_rate": 3.5136303633672454e-06, - "loss": 1.0636, - "step": 2079 - }, - { - "epoch": 0.25010521252930923, - "grad_norm": 1.4922963047803162, - "learning_rate": 3.5131210901659544e-06, - "loss": 0.9767, - "step": 2080 - }, - { - "epoch": 0.2502254554199483, - "grad_norm": 2.03724887915185, - "learning_rate": 3.5126115874253967e-06, - "loss": 1.0457, - "step": 2081 - }, - { - "epoch": 0.2503456983105874, - "grad_norm": 2.2274874723448224, - "learning_rate": 3.5121018552228644e-06, - "loss": 1.034, - "step": 2082 - }, - { - "epoch": 0.2504659412012265, - "grad_norm": 1.9148693641745507, - "learning_rate": 3.5115918936356827e-06, - "loss": 0.9966, - "step": 2083 - }, - { - "epoch": 0.25058618409186556, - "grad_norm": 1.7923793779805617, - "learning_rate": 3.5110817027412123e-06, - "loss": 1.0186, - "step": 2084 - }, - { - "epoch": 0.25070642698250467, - "grad_norm": 1.8579159309207234, - "learning_rate": 3.5105712826168493e-06, - "loss": 0.9211, - "step": 2085 - }, - { - "epoch": 0.2508266698731437, - "grad_norm": 1.6369889430078717, - "learning_rate": 3.5100606333400235e-06, - "loss": 0.9328, - "step": 2086 - }, - { - "epoch": 0.25094691276378284, - "grad_norm": 2.474784333051506, - "learning_rate": 3.5095497549882006e-06, - "loss": 1.0034, - "step": 2087 - }, - { - "epoch": 0.25106715565442195, - "grad_norm": 1.7027194038882876, - "learning_rate": 3.50903864763888e-06, - "loss": 0.9521, - "step": 2088 - }, - { - "epoch": 0.251187398545061, - "grad_norm": 2.060638743879159, - "learning_rate": 3.5085273113695965e-06, - "loss": 0.9981, - "step": 2089 - }, - { - "epoch": 0.2513076414357001, - "grad_norm": 1.8231039623136045, - "learning_rate": 3.508015746257919e-06, - "loss": 1.0154, - "step": 2090 - }, - { - "epoch": 0.2514278843263392, - "grad_norm": 1.6747487406380692, - "learning_rate": 3.5075039523814518e-06, - "loss": 1.0616, - "step": 2091 - }, - { - "epoch": 0.2515481272169783, - "grad_norm": 2.9867349451071514, - "learning_rate": 3.506991929817834e-06, - "loss": 1.0536, - "step": 2092 - }, - { - "epoch": 0.2516683701076174, - "grad_norm": 1.8378699339006983, - "learning_rate": 3.506479678644738e-06, - "loss": 1.0483, - "step": 2093 - }, - { - "epoch": 0.2517886129982565, - "grad_norm": 2.3329763366440965, - "learning_rate": 3.505967198939873e-06, - "loss": 0.9686, - "step": 2094 - }, - { - "epoch": 0.25190885588889556, - "grad_norm": 2.4384967037739473, - "learning_rate": 3.5054544907809813e-06, - "loss": 1.0121, - "step": 2095 - }, - { - "epoch": 0.25202909877953467, - "grad_norm": 1.8252138613456883, - "learning_rate": 3.50494155424584e-06, - "loss": 1.0282, - "step": 2096 - }, - { - "epoch": 0.2521493416701738, - "grad_norm": 1.6764506534694263, - "learning_rate": 3.504428389412262e-06, - "loss": 1.065, - "step": 2097 - }, - { - "epoch": 0.25226958456081283, - "grad_norm": 1.985170820558799, - "learning_rate": 3.5039149963580927e-06, - "loss": 0.9652, - "step": 2098 - }, - { - "epoch": 0.25238982745145194, - "grad_norm": 2.640007172488093, - "learning_rate": 3.503401375161215e-06, - "loss": 0.9235, - "step": 2099 - }, - { - "epoch": 0.252510070342091, - "grad_norm": 1.9688146671392106, - "learning_rate": 3.502887525899544e-06, - "loss": 1.0733, - "step": 2100 - }, - { - "epoch": 0.2526303132327301, - "grad_norm": 2.3831382653852375, - "learning_rate": 3.50237344865103e-06, - "loss": 1.0549, - "step": 2101 - }, - { - "epoch": 0.2527505561233692, - "grad_norm": 2.257149041186341, - "learning_rate": 3.501859143493658e-06, - "loss": 1.0038, - "step": 2102 - }, - { - "epoch": 0.2528707990140083, - "grad_norm": 0.8647898615009015, - "learning_rate": 3.5013446105054488e-06, - "loss": 0.8798, - "step": 2103 - }, - { - "epoch": 0.2529910419046474, - "grad_norm": 2.694324787886952, - "learning_rate": 3.5008298497644555e-06, - "loss": 0.9836, - "step": 2104 - }, - { - "epoch": 0.2531112847952865, - "grad_norm": 1.5450869928948314, - "learning_rate": 3.500314861348767e-06, - "loss": 1.1063, - "step": 2105 - }, - { - "epoch": 0.25323152768592555, - "grad_norm": 1.5943310744194943, - "learning_rate": 3.499799645336507e-06, - "loss": 0.9977, - "step": 2106 - }, - { - "epoch": 0.25335177057656466, - "grad_norm": 1.3788006015321435, - "learning_rate": 3.4992842018058336e-06, - "loss": 1.0965, - "step": 2107 - }, - { - "epoch": 0.25347201346720377, - "grad_norm": 2.136731247106964, - "learning_rate": 3.4987685308349384e-06, - "loss": 1.1018, - "step": 2108 - }, - { - "epoch": 0.2535922563578428, - "grad_norm": 4.219481002206138, - "learning_rate": 3.4982526325020497e-06, - "loss": 0.8432, - "step": 2109 - }, - { - "epoch": 0.25371249924848194, - "grad_norm": 2.1407357068915007, - "learning_rate": 3.4977365068854273e-06, - "loss": 1.047, - "step": 2110 - }, - { - "epoch": 0.25383274213912105, - "grad_norm": 2.962923777727343, - "learning_rate": 3.4972201540633676e-06, - "loss": 0.95, - "step": 2111 - }, - { - "epoch": 0.2539529850297601, - "grad_norm": 1.6774386943638206, - "learning_rate": 3.4967035741142008e-06, - "loss": 1.0809, - "step": 2112 - }, - { - "epoch": 0.2540732279203992, - "grad_norm": 1.7200393695539506, - "learning_rate": 3.4961867671162917e-06, - "loss": 1.0507, - "step": 2113 - }, - { - "epoch": 0.2541934708110383, - "grad_norm": 2.1176311408173922, - "learning_rate": 3.4956697331480402e-06, - "loss": 1.0053, - "step": 2114 - }, - { - "epoch": 0.2543137137016774, - "grad_norm": 1.4336009012402529, - "learning_rate": 3.495152472287879e-06, - "loss": 1.0299, - "step": 2115 - }, - { - "epoch": 0.2544339565923165, - "grad_norm": 1.5663947583012459, - "learning_rate": 3.4946349846142766e-06, - "loss": 0.9654, - "step": 2116 - }, - { - "epoch": 0.25455419948295555, - "grad_norm": 2.2518905538317724, - "learning_rate": 3.4941172702057353e-06, - "loss": 0.9844, - "step": 2117 - }, - { - "epoch": 0.25467444237359466, - "grad_norm": 1.8244629396295406, - "learning_rate": 3.4935993291407924e-06, - "loss": 1.0333, - "step": 2118 - }, - { - "epoch": 0.25479468526423377, - "grad_norm": 2.389306795791722, - "learning_rate": 3.4930811614980183e-06, - "loss": 0.9355, - "step": 2119 - }, - { - "epoch": 0.2549149281548728, - "grad_norm": 1.7473299911381674, - "learning_rate": 3.4925627673560198e-06, - "loss": 1.0234, - "step": 2120 - }, - { - "epoch": 0.25503517104551193, - "grad_norm": 1.6571133360793584, - "learning_rate": 3.4920441467934357e-06, - "loss": 1.1183, - "step": 2121 - }, - { - "epoch": 0.25515541393615104, - "grad_norm": 2.6741749640938655, - "learning_rate": 3.491525299888941e-06, - "loss": 1.0641, - "step": 2122 - }, - { - "epoch": 0.2552756568267901, - "grad_norm": 1.0089697883665123, - "learning_rate": 3.491006226721244e-06, - "loss": 0.916, - "step": 2123 - }, - { - "epoch": 0.2553958997174292, - "grad_norm": 1.7177113949586669, - "learning_rate": 3.4904869273690882e-06, - "loss": 0.9993, - "step": 2124 - }, - { - "epoch": 0.2555161426080683, - "grad_norm": 1.6434566221486582, - "learning_rate": 3.489967401911251e-06, - "loss": 1.1215, - "step": 2125 - }, - { - "epoch": 0.2556363854987074, - "grad_norm": 1.422286508559302, - "learning_rate": 3.4894476504265428e-06, - "loss": 0.9243, - "step": 2126 - }, - { - "epoch": 0.2557566283893465, - "grad_norm": 0.7698461982156991, - "learning_rate": 3.4889276729938104e-06, - "loss": 0.8048, - "step": 2127 - }, - { - "epoch": 0.2558768712799856, - "grad_norm": 1.9313552383341146, - "learning_rate": 3.488407469691934e-06, - "loss": 1.0316, - "step": 2128 - }, - { - "epoch": 0.25599711417062465, - "grad_norm": 1.9224396163128612, - "learning_rate": 3.487887040599828e-06, - "loss": 1.0398, - "step": 2129 - }, - { - "epoch": 0.25611735706126376, - "grad_norm": 2.096682871446115, - "learning_rate": 3.4873663857964407e-06, - "loss": 0.9869, - "step": 2130 - }, - { - "epoch": 0.2562375999519028, - "grad_norm": 1.5646391911531654, - "learning_rate": 3.4868455053607556e-06, - "loss": 0.9007, - "step": 2131 - }, - { - "epoch": 0.2563578428425419, - "grad_norm": 1.8453209213852808, - "learning_rate": 3.486324399371789e-06, - "loss": 0.9432, - "step": 2132 - }, - { - "epoch": 0.25647808573318104, - "grad_norm": 1.7267374493783627, - "learning_rate": 3.485803067908593e-06, - "loss": 1.0127, - "step": 2133 - }, - { - "epoch": 0.2565983286238201, - "grad_norm": 1.5305773756376124, - "learning_rate": 3.485281511050253e-06, - "loss": 1.0201, - "step": 2134 - }, - { - "epoch": 0.2567185715144592, - "grad_norm": 4.287641446196566, - "learning_rate": 3.484759728875889e-06, - "loss": 1.1256, - "step": 2135 - }, - { - "epoch": 0.2568388144050983, - "grad_norm": 1.6505004123986535, - "learning_rate": 3.4842377214646543e-06, - "loss": 1.0411, - "step": 2136 - }, - { - "epoch": 0.25695905729573737, - "grad_norm": 2.079002023216096, - "learning_rate": 3.483715488895737e-06, - "loss": 0.9006, - "step": 2137 - }, - { - "epoch": 0.2570793001863765, - "grad_norm": 1.7482316755929779, - "learning_rate": 3.48319303124836e-06, - "loss": 1.019, - "step": 2138 - }, - { - "epoch": 0.2571995430770156, - "grad_norm": 2.21343744967594, - "learning_rate": 3.4826703486017798e-06, - "loss": 0.9072, - "step": 2139 - }, - { - "epoch": 0.25731978596765465, - "grad_norm": 1.4848997292879504, - "learning_rate": 3.4821474410352867e-06, - "loss": 0.998, - "step": 2140 - }, - { - "epoch": 0.25744002885829376, - "grad_norm": 0.9129635652352129, - "learning_rate": 3.481624308628205e-06, - "loss": 0.9022, - "step": 2141 - }, - { - "epoch": 0.25756027174893287, - "grad_norm": 2.5095064615704943, - "learning_rate": 3.481100951459893e-06, - "loss": 1.2234, - "step": 2142 - }, - { - "epoch": 0.2576805146395719, - "grad_norm": 1.6464641746286408, - "learning_rate": 3.4805773696097453e-06, - "loss": 1.0175, - "step": 2143 - }, - { - "epoch": 0.25780075753021103, - "grad_norm": 1.7664427762344341, - "learning_rate": 3.4800535631571874e-06, - "loss": 1.1029, - "step": 2144 - }, - { - "epoch": 0.25792100042085014, - "grad_norm": 1.9021251639048773, - "learning_rate": 3.4795295321816804e-06, - "loss": 1.0027, - "step": 2145 - }, - { - "epoch": 0.2580412433114892, - "grad_norm": 2.21127295223937, - "learning_rate": 3.47900527676272e-06, - "loss": 1.1443, - "step": 2146 - }, - { - "epoch": 0.2581614862021283, - "grad_norm": 2.4507152603210374, - "learning_rate": 3.478480796979835e-06, - "loss": 1.1156, - "step": 2147 - }, - { - "epoch": 0.25828172909276736, - "grad_norm": 1.5184567925891348, - "learning_rate": 3.4779560929125894e-06, - "loss": 1.0004, - "step": 2148 - }, - { - "epoch": 0.2584019719834065, - "grad_norm": 0.8361012307368668, - "learning_rate": 3.4774311646405783e-06, - "loss": 0.8157, - "step": 2149 - }, - { - "epoch": 0.2585222148740456, - "grad_norm": 1.67630914229615, - "learning_rate": 3.476906012243435e-06, - "loss": 1.0627, - "step": 2150 - }, - { - "epoch": 0.25864245776468464, - "grad_norm": 1.513679671728912, - "learning_rate": 3.476380635800824e-06, - "loss": 1.0449, - "step": 2151 - }, - { - "epoch": 0.25876270065532375, - "grad_norm": 2.4127270606518385, - "learning_rate": 3.475855035392444e-06, - "loss": 1.0833, - "step": 2152 - }, - { - "epoch": 0.25888294354596286, - "grad_norm": 1.843583309591155, - "learning_rate": 3.475329211098029e-06, - "loss": 0.9336, - "step": 2153 - }, - { - "epoch": 0.2590031864366019, - "grad_norm": 1.5791238324370818, - "learning_rate": 3.4748031629973453e-06, - "loss": 1.0518, - "step": 2154 - }, - { - "epoch": 0.25912342932724103, - "grad_norm": 0.8399407619069802, - "learning_rate": 3.4742768911701944e-06, - "loss": 0.8065, - "step": 2155 - }, - { - "epoch": 0.25924367221788014, - "grad_norm": 3.3737426110971502, - "learning_rate": 3.4737503956964113e-06, - "loss": 0.9317, - "step": 2156 - }, - { - "epoch": 0.2593639151085192, - "grad_norm": 3.26693684634892, - "learning_rate": 3.473223676655865e-06, - "loss": 0.9037, - "step": 2157 - }, - { - "epoch": 0.2594841579991583, - "grad_norm": 3.087245229487711, - "learning_rate": 3.472696734128459e-06, - "loss": 1.0306, - "step": 2158 - }, - { - "epoch": 0.2596044008897974, - "grad_norm": 1.5931125937001611, - "learning_rate": 3.4721695681941286e-06, - "loss": 0.9823, - "step": 2159 - }, - { - "epoch": 0.25972464378043647, - "grad_norm": 1.9237407528938202, - "learning_rate": 3.471642178932845e-06, - "loss": 1.0585, - "step": 2160 - }, - { - "epoch": 0.2598448866710756, - "grad_norm": 1.8585918925189069, - "learning_rate": 3.471114566424613e-06, - "loss": 1.1277, - "step": 2161 - }, - { - "epoch": 0.25996512956171464, - "grad_norm": 2.5192210398299655, - "learning_rate": 3.4705867307494715e-06, - "loss": 0.9946, - "step": 2162 - }, - { - "epoch": 0.26008537245235375, - "grad_norm": 2.7556682604212215, - "learning_rate": 3.470058671987492e-06, - "loss": 1.072, - "step": 2163 - }, - { - "epoch": 0.26020561534299286, - "grad_norm": 1.7478537340925442, - "learning_rate": 3.4695303902187805e-06, - "loss": 1.0674, - "step": 2164 - }, - { - "epoch": 0.2603258582336319, - "grad_norm": 1.7651791980322484, - "learning_rate": 3.469001885523478e-06, - "loss": 1.0176, - "step": 2165 - }, - { - "epoch": 0.260446101124271, - "grad_norm": 2.0135886381393293, - "learning_rate": 3.4684731579817568e-06, - "loss": 1.039, - "step": 2166 - }, - { - "epoch": 0.26056634401491013, - "grad_norm": 1.4740246150162581, - "learning_rate": 3.4679442076738247e-06, - "loss": 0.9995, - "step": 2167 - }, - { - "epoch": 0.2606865869055492, - "grad_norm": 2.006409626252294, - "learning_rate": 3.4674150346799245e-06, - "loss": 1.0636, - "step": 2168 - }, - { - "epoch": 0.2608068297961883, - "grad_norm": 2.5704649906085737, - "learning_rate": 3.4668856390803295e-06, - "loss": 1.0317, - "step": 2169 - }, - { - "epoch": 0.2609270726868274, - "grad_norm": 1.8300212578502906, - "learning_rate": 3.4663560209553495e-06, - "loss": 1.1253, - "step": 2170 - }, - { - "epoch": 0.26104731557746647, - "grad_norm": 1.6545515168447644, - "learning_rate": 3.4658261803853267e-06, - "loss": 1.0171, - "step": 2171 - }, - { - "epoch": 0.2611675584681056, - "grad_norm": 1.6673375527838075, - "learning_rate": 3.4652961174506383e-06, - "loss": 1.0395, - "step": 2172 - }, - { - "epoch": 0.2612878013587447, - "grad_norm": 1.0234907878951276, - "learning_rate": 3.464765832231694e-06, - "loss": 0.8382, - "step": 2173 - }, - { - "epoch": 0.26140804424938374, - "grad_norm": 1.9465725046838638, - "learning_rate": 3.4642353248089373e-06, - "loss": 0.9338, - "step": 2174 - }, - { - "epoch": 0.26152828714002285, - "grad_norm": 2.02548608950375, - "learning_rate": 3.463704595262846e-06, - "loss": 1.0312, - "step": 2175 - }, - { - "epoch": 0.26164853003066196, - "grad_norm": 2.4626661254305295, - "learning_rate": 3.463173643673931e-06, - "loss": 0.9341, - "step": 2176 - }, - { - "epoch": 0.261768772921301, - "grad_norm": 0.9601173001886866, - "learning_rate": 3.4626424701227387e-06, - "loss": 0.8955, - "step": 2177 - }, - { - "epoch": 0.26188901581194013, - "grad_norm": 0.9169464587427317, - "learning_rate": 3.4621110746898452e-06, - "loss": 0.8513, - "step": 2178 - }, - { - "epoch": 0.2620092587025792, - "grad_norm": 1.385917410549025, - "learning_rate": 3.4615794574558654e-06, - "loss": 0.9735, - "step": 2179 - }, - { - "epoch": 0.2621295015932183, - "grad_norm": 3.3200828562589093, - "learning_rate": 3.4610476185014436e-06, - "loss": 1.078, - "step": 2180 - }, - { - "epoch": 0.2622497444838574, - "grad_norm": 1.5812037434804556, - "learning_rate": 3.4605155579072597e-06, - "loss": 1.0259, - "step": 2181 - }, - { - "epoch": 0.26236998737449646, - "grad_norm": 1.6694619420989936, - "learning_rate": 3.459983275754027e-06, - "loss": 0.9382, - "step": 2182 - }, - { - "epoch": 0.26249023026513557, - "grad_norm": 2.4443072608061147, - "learning_rate": 3.4594507721224918e-06, - "loss": 1.0238, - "step": 2183 - }, - { - "epoch": 0.2626104731557747, - "grad_norm": 2.2114686361663733, - "learning_rate": 3.4589180470934353e-06, - "loss": 1.0504, - "step": 2184 - }, - { - "epoch": 0.26273071604641374, - "grad_norm": 1.6490139066481058, - "learning_rate": 3.4583851007476713e-06, - "loss": 0.9992, - "step": 2185 - }, - { - "epoch": 0.26285095893705285, - "grad_norm": 1.8775655360663501, - "learning_rate": 3.4578519331660464e-06, - "loss": 0.9192, - "step": 2186 - }, - { - "epoch": 0.26297120182769196, - "grad_norm": 1.7596305431211723, - "learning_rate": 3.4573185444294426e-06, - "loss": 1.0509, - "step": 2187 - }, - { - "epoch": 0.263091444718331, - "grad_norm": 1.5585337844824048, - "learning_rate": 3.456784934618774e-06, - "loss": 1.0154, - "step": 2188 - }, - { - "epoch": 0.2632116876089701, - "grad_norm": 2.873715818781791, - "learning_rate": 3.4562511038149897e-06, - "loss": 1.0267, - "step": 2189 - }, - { - "epoch": 0.26333193049960923, - "grad_norm": 0.8801033514194156, - "learning_rate": 3.4557170520990705e-06, - "loss": 0.833, - "step": 2190 - }, - { - "epoch": 0.2634521733902483, - "grad_norm": 1.4207450652834743, - "learning_rate": 3.4551827795520324e-06, - "loss": 1.0926, - "step": 2191 - }, - { - "epoch": 0.2635724162808874, - "grad_norm": 1.7656438370771637, - "learning_rate": 3.4546482862549226e-06, - "loss": 1.0677, - "step": 2192 - }, - { - "epoch": 0.2636926591715265, - "grad_norm": 1.8284407122969588, - "learning_rate": 3.4541135722888253e-06, - "loss": 1.0162, - "step": 2193 - }, - { - "epoch": 0.26381290206216557, - "grad_norm": 1.8274402477134548, - "learning_rate": 3.453578637734854e-06, - "loss": 1.0346, - "step": 2194 - }, - { - "epoch": 0.2639331449528047, - "grad_norm": 1.559518563612917, - "learning_rate": 3.4530434826741605e-06, - "loss": 1.0164, - "step": 2195 - }, - { - "epoch": 0.26405338784344373, - "grad_norm": 1.4800688271370166, - "learning_rate": 3.452508107187926e-06, - "loss": 0.92, - "step": 2196 - }, - { - "epoch": 0.26417363073408284, - "grad_norm": 1.9501148679108302, - "learning_rate": 3.451972511357366e-06, - "loss": 0.9941, - "step": 2197 - }, - { - "epoch": 0.26429387362472195, - "grad_norm": 1.4897456936505935, - "learning_rate": 3.45143669526373e-06, - "loss": 1.0832, - "step": 2198 - }, - { - "epoch": 0.264414116515361, - "grad_norm": 0.8144274005440807, - "learning_rate": 3.450900658988302e-06, - "loss": 0.8669, - "step": 2199 - }, - { - "epoch": 0.2645343594060001, - "grad_norm": 1.6705088515174191, - "learning_rate": 3.450364402612397e-06, - "loss": 1.0086, - "step": 2200 - }, - { - "epoch": 0.26465460229663923, - "grad_norm": 1.6395800941496685, - "learning_rate": 3.449827926217366e-06, - "loss": 1.0617, - "step": 2201 - }, - { - "epoch": 0.2647748451872783, - "grad_norm": 1.7611063296630538, - "learning_rate": 3.449291229884591e-06, - "loss": 1.036, - "step": 2202 - }, - { - "epoch": 0.2648950880779174, - "grad_norm": 2.3018816921479686, - "learning_rate": 3.4487543136954887e-06, - "loss": 1.0942, - "step": 2203 - }, - { - "epoch": 0.2650153309685565, - "grad_norm": 1.6863014768713327, - "learning_rate": 3.448217177731509e-06, - "loss": 1.1413, - "step": 2204 - }, - { - "epoch": 0.26513557385919556, - "grad_norm": 1.9735292221584917, - "learning_rate": 3.4476798220741348e-06, - "loss": 1.0046, - "step": 2205 - }, - { - "epoch": 0.26525581674983467, - "grad_norm": 1.4550263466039106, - "learning_rate": 3.4471422468048826e-06, - "loss": 1.013, - "step": 2206 - }, - { - "epoch": 0.2653760596404738, - "grad_norm": 2.019074320059645, - "learning_rate": 3.4466044520053022e-06, - "loss": 0.9577, - "step": 2207 - }, - { - "epoch": 0.26549630253111284, - "grad_norm": 1.8135632651889075, - "learning_rate": 3.446066437756977e-06, - "loss": 0.8385, - "step": 2208 - }, - { - "epoch": 0.26561654542175195, - "grad_norm": 2.2960306289371086, - "learning_rate": 3.4455282041415224e-06, - "loss": 0.9856, - "step": 2209 - }, - { - "epoch": 0.265736788312391, - "grad_norm": 2.1020340514541864, - "learning_rate": 3.4449897512405894e-06, - "loss": 1.1076, - "step": 2210 - }, - { - "epoch": 0.2658570312030301, - "grad_norm": 2.6653841828913194, - "learning_rate": 3.444451079135859e-06, - "loss": 0.9771, - "step": 2211 - }, - { - "epoch": 0.2659772740936692, - "grad_norm": 1.7251374590741475, - "learning_rate": 3.4439121879090493e-06, - "loss": 0.9668, - "step": 2212 - }, - { - "epoch": 0.2660975169843083, - "grad_norm": 2.5989053806167477, - "learning_rate": 3.4433730776419082e-06, - "loss": 1.0551, - "step": 2213 - }, - { - "epoch": 0.2662177598749474, - "grad_norm": 2.2630611250743438, - "learning_rate": 3.4428337484162183e-06, - "loss": 1.0334, - "step": 2214 - }, - { - "epoch": 0.2663380027655865, - "grad_norm": 2.011986931267342, - "learning_rate": 3.442294200313797e-06, - "loss": 1.0792, - "step": 2215 - }, - { - "epoch": 0.26645824565622556, - "grad_norm": 0.8631392487296596, - "learning_rate": 3.4417544334164916e-06, - "loss": 0.8118, - "step": 2216 - }, - { - "epoch": 0.26657848854686467, - "grad_norm": 1.49519851234162, - "learning_rate": 3.4412144478061854e-06, - "loss": 1.0112, - "step": 2217 - }, - { - "epoch": 0.2666987314375038, - "grad_norm": 1.8873234445215592, - "learning_rate": 3.4406742435647925e-06, - "loss": 0.9794, - "step": 2218 - }, - { - "epoch": 0.26681897432814283, - "grad_norm": 2.0324285756003806, - "learning_rate": 3.440133820774263e-06, - "loss": 1.0185, - "step": 2219 - }, - { - "epoch": 0.26693921721878194, - "grad_norm": 2.1914419351483976, - "learning_rate": 3.439593179516578e-06, - "loss": 1.044, - "step": 2220 - }, - { - "epoch": 0.26705946010942105, - "grad_norm": 2.2001090542900466, - "learning_rate": 3.4390523198737524e-06, - "loss": 1.0387, - "step": 2221 - }, - { - "epoch": 0.2671797030000601, - "grad_norm": 2.073462971787424, - "learning_rate": 3.4385112419278333e-06, - "loss": 0.9623, - "step": 2222 - }, - { - "epoch": 0.2672999458906992, - "grad_norm": 0.809858061634384, - "learning_rate": 3.4379699457609033e-06, - "loss": 0.9263, - "step": 2223 - }, - { - "epoch": 0.26742018878133833, - "grad_norm": 1.650876956589909, - "learning_rate": 3.4374284314550755e-06, - "loss": 1.1247, - "step": 2224 - }, - { - "epoch": 0.2675404316719774, - "grad_norm": 3.754008757177106, - "learning_rate": 3.436886699092498e-06, - "loss": 1.0352, - "step": 2225 - }, - { - "epoch": 0.2676606745626165, - "grad_norm": 2.6072671565754124, - "learning_rate": 3.4363447487553502e-06, - "loss": 0.9473, - "step": 2226 - }, - { - "epoch": 0.26778091745325555, - "grad_norm": 2.123433713930641, - "learning_rate": 3.4358025805258455e-06, - "loss": 1.0172, - "step": 2227 - }, - { - "epoch": 0.26790116034389466, - "grad_norm": 1.7936137582962461, - "learning_rate": 3.435260194486232e-06, - "loss": 1.0603, - "step": 2228 - }, - { - "epoch": 0.2680214032345338, - "grad_norm": 5.890021417187027, - "learning_rate": 3.4347175907187875e-06, - "loss": 1.0411, - "step": 2229 - }, - { - "epoch": 0.26814164612517283, - "grad_norm": 1.6984642019690859, - "learning_rate": 3.4341747693058254e-06, - "loss": 1.1022, - "step": 2230 - }, - { - "epoch": 0.26826188901581194, - "grad_norm": 1.8358051658174517, - "learning_rate": 3.4336317303296916e-06, - "loss": 0.9885, - "step": 2231 - }, - { - "epoch": 0.26838213190645105, - "grad_norm": 2.630683649505511, - "learning_rate": 3.4330884738727635e-06, - "loss": 0.9771, - "step": 2232 - }, - { - "epoch": 0.2685023747970901, - "grad_norm": 1.9034535893242646, - "learning_rate": 3.4325450000174535e-06, - "loss": 0.9433, - "step": 2233 - }, - { - "epoch": 0.2686226176877292, - "grad_norm": 4.824721633708357, - "learning_rate": 3.4320013088462067e-06, - "loss": 0.9684, - "step": 2234 - }, - { - "epoch": 0.2687428605783683, - "grad_norm": 1.4819311000545328, - "learning_rate": 3.431457400441499e-06, - "loss": 1.0464, - "step": 2235 - }, - { - "epoch": 0.2688631034690074, - "grad_norm": 1.0085210157860998, - "learning_rate": 3.4309132748858424e-06, - "loss": 0.9196, - "step": 2236 - }, - { - "epoch": 0.2689833463596465, - "grad_norm": 1.550436524480403, - "learning_rate": 3.430368932261779e-06, - "loss": 1.0697, - "step": 2237 - }, - { - "epoch": 0.2691035892502856, - "grad_norm": 1.8071612953266403, - "learning_rate": 3.429824372651886e-06, - "loss": 0.9809, - "step": 2238 - }, - { - "epoch": 0.26922383214092466, - "grad_norm": 2.691648165124505, - "learning_rate": 3.4292795961387732e-06, - "loss": 1.0652, - "step": 2239 - }, - { - "epoch": 0.26934407503156377, - "grad_norm": 2.1776234688337146, - "learning_rate": 3.4287346028050818e-06, - "loss": 1.1016, - "step": 2240 - }, - { - "epoch": 0.2694643179222028, - "grad_norm": 2.258068452998654, - "learning_rate": 3.4281893927334866e-06, - "loss": 1.0232, - "step": 2241 - }, - { - "epoch": 0.26958456081284193, - "grad_norm": 2.0385238696065264, - "learning_rate": 3.4276439660066963e-06, - "loss": 0.9779, - "step": 2242 - }, - { - "epoch": 0.26970480370348104, - "grad_norm": 2.1794074640900103, - "learning_rate": 3.427098322707452e-06, - "loss": 1.0714, - "step": 2243 - }, - { - "epoch": 0.2698250465941201, - "grad_norm": 2.1028549587789294, - "learning_rate": 3.426552462918526e-06, - "loss": 1.1208, - "step": 2244 - }, - { - "epoch": 0.2699452894847592, - "grad_norm": 3.8680670460489037, - "learning_rate": 3.426006386722726e-06, - "loss": 0.9725, - "step": 2245 - }, - { - "epoch": 0.2700655323753983, - "grad_norm": 1.8735176584690787, - "learning_rate": 3.4254600942028914e-06, - "loss": 1.1452, - "step": 2246 - }, - { - "epoch": 0.2701857752660374, - "grad_norm": 1.9521350640682802, - "learning_rate": 3.424913585441893e-06, - "loss": 1.0387, - "step": 2247 - }, - { - "epoch": 0.2703060181566765, - "grad_norm": 1.8183058783081145, - "learning_rate": 3.4243668605226374e-06, - "loss": 1.1029, - "step": 2248 - }, - { - "epoch": 0.2704262610473156, - "grad_norm": 2.684404764243548, - "learning_rate": 3.423819919528061e-06, - "loss": 1.0597, - "step": 2249 - }, - { - "epoch": 0.27054650393795465, - "grad_norm": 1.6305783697839624, - "learning_rate": 3.4232727625411355e-06, - "loss": 1.0161, - "step": 2250 - }, - { - "epoch": 0.27066674682859376, - "grad_norm": 1.8052879094368472, - "learning_rate": 3.4227253896448626e-06, - "loss": 1.0881, - "step": 2251 - }, - { - "epoch": 0.2707869897192329, - "grad_norm": 1.9552456139038665, - "learning_rate": 3.42217780092228e-06, - "loss": 1.0357, - "step": 2252 - }, - { - "epoch": 0.27090723260987193, - "grad_norm": 0.8922906324968588, - "learning_rate": 3.421629996456456e-06, - "loss": 0.8672, - "step": 2253 - }, - { - "epoch": 0.27102747550051104, - "grad_norm": 1.7228120832825033, - "learning_rate": 3.421081976330491e-06, - "loss": 1.0497, - "step": 2254 - }, - { - "epoch": 0.27114771839115015, - "grad_norm": 1.8973651259062525, - "learning_rate": 3.4205337406275207e-06, - "loss": 1.0968, - "step": 2255 - }, - { - "epoch": 0.2712679612817892, - "grad_norm": 5.426009039494837, - "learning_rate": 3.4199852894307114e-06, - "loss": 0.9797, - "step": 2256 - }, - { - "epoch": 0.2713882041724283, - "grad_norm": 2.4894849898787386, - "learning_rate": 3.419436622823262e-06, - "loss": 1.0148, - "step": 2257 - }, - { - "epoch": 0.27150844706306737, - "grad_norm": 1.5757211184813604, - "learning_rate": 3.4188877408884063e-06, - "loss": 0.9626, - "step": 2258 - }, - { - "epoch": 0.2716286899537065, - "grad_norm": 3.326761315578573, - "learning_rate": 3.4183386437094088e-06, - "loss": 0.8845, - "step": 2259 - }, - { - "epoch": 0.2717489328443456, - "grad_norm": 2.167632975651683, - "learning_rate": 3.417789331369565e-06, - "loss": 1.0523, - "step": 2260 - }, - { - "epoch": 0.27186917573498465, - "grad_norm": 2.115732967264426, - "learning_rate": 3.4172398039522088e-06, - "loss": 1.1336, - "step": 2261 - }, - { - "epoch": 0.27198941862562376, - "grad_norm": 1.5545589067961036, - "learning_rate": 3.4166900615407e-06, - "loss": 1.0183, - "step": 2262 - }, - { - "epoch": 0.27210966151626287, - "grad_norm": 2.1680727822027155, - "learning_rate": 3.416140104218436e-06, - "loss": 0.9722, - "step": 2263 - }, - { - "epoch": 0.2722299044069019, - "grad_norm": 0.8534691141056598, - "learning_rate": 3.4155899320688437e-06, - "loss": 0.975, - "step": 2264 - }, - { - "epoch": 0.27235014729754103, - "grad_norm": 2.1199244302200375, - "learning_rate": 3.415039545175384e-06, - "loss": 0.9665, - "step": 2265 - }, - { - "epoch": 0.27247039018818014, - "grad_norm": 2.233364597025817, - "learning_rate": 3.414488943621551e-06, - "loss": 0.8839, - "step": 2266 - }, - { - "epoch": 0.2725906330788192, - "grad_norm": 1.8352095439085698, - "learning_rate": 3.41393812749087e-06, - "loss": 0.9701, - "step": 2267 - }, - { - "epoch": 0.2727108759694583, - "grad_norm": 2.1310390602946407, - "learning_rate": 3.4133870968668984e-06, - "loss": 0.9514, - "step": 2268 - }, - { - "epoch": 0.2728311188600974, - "grad_norm": 1.573657444541804, - "learning_rate": 3.412835851833229e-06, - "loss": 1.0092, - "step": 2269 - }, - { - "epoch": 0.2729513617507365, - "grad_norm": 1.64463526743396, - "learning_rate": 3.4122843924734834e-06, - "loss": 1.003, - "step": 2270 - }, - { - "epoch": 0.2730716046413756, - "grad_norm": 1.9753876462600515, - "learning_rate": 3.411732718871319e-06, - "loss": 1.1147, - "step": 2271 - }, - { - "epoch": 0.27319184753201464, - "grad_norm": 1.441355800004222, - "learning_rate": 3.4111808311104227e-06, - "loss": 1.0107, - "step": 2272 - }, - { - "epoch": 0.27331209042265375, - "grad_norm": 6.724546123631517, - "learning_rate": 3.410628729274517e-06, - "loss": 0.9193, - "step": 2273 - }, - { - "epoch": 0.27343233331329286, - "grad_norm": 1.7432143944064145, - "learning_rate": 3.4100764134473546e-06, - "loss": 1.0587, - "step": 2274 - }, - { - "epoch": 0.2735525762039319, - "grad_norm": 2.3154196994393916, - "learning_rate": 3.4095238837127215e-06, - "loss": 1.0862, - "step": 2275 - }, - { - "epoch": 0.27367281909457103, - "grad_norm": 1.7287216839571071, - "learning_rate": 3.4089711401544355e-06, - "loss": 1.0281, - "step": 2276 - }, - { - "epoch": 0.27379306198521014, - "grad_norm": 3.086000543264266, - "learning_rate": 3.4084181828563486e-06, - "loss": 0.9076, - "step": 2277 - }, - { - "epoch": 0.2739133048758492, - "grad_norm": 1.561821370572208, - "learning_rate": 3.4078650119023428e-06, - "loss": 0.9326, - "step": 2278 - }, - { - "epoch": 0.2740335477664883, - "grad_norm": 1.9102672084417156, - "learning_rate": 3.4073116273763337e-06, - "loss": 0.9719, - "step": 2279 - }, - { - "epoch": 0.2741537906571274, - "grad_norm": 1.652784212172919, - "learning_rate": 3.40675802936227e-06, - "loss": 1.0446, - "step": 2280 - }, - { - "epoch": 0.27427403354776647, - "grad_norm": 1.578346539045689, - "learning_rate": 3.4062042179441318e-06, - "loss": 0.9386, - "step": 2281 - }, - { - "epoch": 0.2743942764384056, - "grad_norm": 1.6676032851915399, - "learning_rate": 3.4056501932059314e-06, - "loss": 1.0409, - "step": 2282 - }, - { - "epoch": 0.2745145193290447, - "grad_norm": 0.8819327908511114, - "learning_rate": 3.405095955231715e-06, - "loss": 0.8441, - "step": 2283 - }, - { - "epoch": 0.27463476221968375, - "grad_norm": 2.059531184801987, - "learning_rate": 3.4045415041055585e-06, - "loss": 1.1714, - "step": 2284 - }, - { - "epoch": 0.27475500511032286, - "grad_norm": 2.1529573372504407, - "learning_rate": 3.4039868399115728e-06, - "loss": 1.0257, - "step": 2285 - }, - { - "epoch": 0.27487524800096197, - "grad_norm": 1.6678506063389813, - "learning_rate": 3.4034319627339003e-06, - "loss": 1.0293, - "step": 2286 - }, - { - "epoch": 0.274995490891601, - "grad_norm": 2.138417017134956, - "learning_rate": 3.402876872656715e-06, - "loss": 0.9222, - "step": 2287 - }, - { - "epoch": 0.27511573378224013, - "grad_norm": 2.1167694467336307, - "learning_rate": 3.402321569764223e-06, - "loss": 1.1268, - "step": 2288 - }, - { - "epoch": 0.2752359766728792, - "grad_norm": 1.6802449565505835, - "learning_rate": 3.4017660541406635e-06, - "loss": 1.0592, - "step": 2289 - }, - { - "epoch": 0.2753562195635183, - "grad_norm": 1.5422775845174819, - "learning_rate": 3.4012103258703092e-06, - "loss": 0.9718, - "step": 2290 - }, - { - "epoch": 0.2754764624541574, - "grad_norm": 1.7498172937112175, - "learning_rate": 3.4006543850374616e-06, - "loss": 1.0528, - "step": 2291 - }, - { - "epoch": 0.27559670534479647, - "grad_norm": 1.8479530144535412, - "learning_rate": 3.400098231726458e-06, - "loss": 0.9832, - "step": 2292 - }, - { - "epoch": 0.2757169482354356, - "grad_norm": 1.9552958516806318, - "learning_rate": 3.3995418660216657e-06, - "loss": 1.1038, - "step": 2293 - }, - { - "epoch": 0.2758371911260747, - "grad_norm": 2.1417580828718434, - "learning_rate": 3.3989852880074848e-06, - "loss": 1.0476, - "step": 2294 - }, - { - "epoch": 0.27595743401671374, - "grad_norm": 0.7662422261302702, - "learning_rate": 3.398428497768348e-06, - "loss": 0.8746, - "step": 2295 - }, - { - "epoch": 0.27607767690735285, - "grad_norm": 1.6447918248272129, - "learning_rate": 3.3978714953887205e-06, - "loss": 0.9474, - "step": 2296 - }, - { - "epoch": 0.27619791979799196, - "grad_norm": 1.7295856504425553, - "learning_rate": 3.397314280953098e-06, - "loss": 1.0952, - "step": 2297 - }, - { - "epoch": 0.276318162688631, - "grad_norm": 1.9637571936657108, - "learning_rate": 3.3967568545460108e-06, - "loss": 1.0271, - "step": 2298 - }, - { - "epoch": 0.27643840557927013, - "grad_norm": 1.728513280144949, - "learning_rate": 3.3961992162520185e-06, - "loss": 1.0268, - "step": 2299 - }, - { - "epoch": 0.27655864846990924, - "grad_norm": 2.2439000836599696, - "learning_rate": 3.3956413661557156e-06, - "loss": 0.9534, - "step": 2300 - }, - { - "epoch": 0.2766788913605483, - "grad_norm": 2.2387186791355234, - "learning_rate": 3.3950833043417273e-06, - "loss": 0.8868, - "step": 2301 - }, - { - "epoch": 0.2767991342511874, - "grad_norm": 2.31152880095491, - "learning_rate": 3.3945250308947105e-06, - "loss": 0.947, - "step": 2302 - }, - { - "epoch": 0.2769193771418265, - "grad_norm": 1.241513243206306, - "learning_rate": 3.3939665458993556e-06, - "loss": 0.934, - "step": 2303 - }, - { - "epoch": 0.27703962003246557, - "grad_norm": 1.749765448519359, - "learning_rate": 3.3934078494403843e-06, - "loss": 0.9932, - "step": 2304 - }, - { - "epoch": 0.2771598629231047, - "grad_norm": 1.8913171935515034, - "learning_rate": 3.3928489416025495e-06, - "loss": 1.0429, - "step": 2305 - }, - { - "epoch": 0.27728010581374374, - "grad_norm": 1.9574109468628915, - "learning_rate": 3.392289822470638e-06, - "loss": 1.0166, - "step": 2306 - }, - { - "epoch": 0.27740034870438285, - "grad_norm": 2.876638526056885, - "learning_rate": 3.3917304921294674e-06, - "loss": 0.9883, - "step": 2307 - }, - { - "epoch": 0.27752059159502196, - "grad_norm": 1.5002189317514376, - "learning_rate": 3.3911709506638876e-06, - "loss": 1.0417, - "step": 2308 - }, - { - "epoch": 0.277640834485661, - "grad_norm": 1.8059640308232723, - "learning_rate": 3.390611198158781e-06, - "loss": 1.0409, - "step": 2309 - }, - { - "epoch": 0.2777610773763001, - "grad_norm": 1.7532990557918313, - "learning_rate": 3.3900512346990612e-06, - "loss": 1.1297, - "step": 2310 - }, - { - "epoch": 0.27788132026693924, - "grad_norm": 2.891307853255153, - "learning_rate": 3.389491060369674e-06, - "loss": 0.8868, - "step": 2311 - }, - { - "epoch": 0.2780015631575783, - "grad_norm": 1.7174203064043752, - "learning_rate": 3.388930675255598e-06, - "loss": 1.1224, - "step": 2312 - }, - { - "epoch": 0.2781218060482174, - "grad_norm": 2.6594643386196952, - "learning_rate": 3.388370079441843e-06, - "loss": 1.019, - "step": 2313 - }, - { - "epoch": 0.2782420489388565, - "grad_norm": 1.8570731524601438, - "learning_rate": 3.3878092730134505e-06, - "loss": 1.1567, - "step": 2314 - }, - { - "epoch": 0.27836229182949557, - "grad_norm": 1.5269407243046997, - "learning_rate": 3.3872482560554947e-06, - "loss": 1.0331, - "step": 2315 - }, - { - "epoch": 0.2784825347201347, - "grad_norm": 0.8274969922613242, - "learning_rate": 3.386687028653082e-06, - "loss": 0.826, - "step": 2316 - }, - { - "epoch": 0.2786027776107738, - "grad_norm": 1.6608359706849427, - "learning_rate": 3.386125590891349e-06, - "loss": 1.0752, - "step": 2317 - }, - { - "epoch": 0.27872302050141284, - "grad_norm": 2.0165640839056644, - "learning_rate": 3.3855639428554657e-06, - "loss": 1.0622, - "step": 2318 - }, - { - "epoch": 0.27884326339205195, - "grad_norm": 1.7932096830563977, - "learning_rate": 3.385002084630635e-06, - "loss": 1.0395, - "step": 2319 - }, - { - "epoch": 0.278963506282691, - "grad_norm": 1.8958397819772672, - "learning_rate": 3.384440016302088e-06, - "loss": 1.067, - "step": 2320 - }, - { - "epoch": 0.2790837491733301, - "grad_norm": 1.9636661345058573, - "learning_rate": 3.3838777379550923e-06, - "loss": 0.8502, - "step": 2321 - }, - { - "epoch": 0.27920399206396923, - "grad_norm": 2.068759782197534, - "learning_rate": 3.383315249674944e-06, - "loss": 1.0196, - "step": 2322 - }, - { - "epoch": 0.2793242349546083, - "grad_norm": 2.071971415812273, - "learning_rate": 3.3827525515469715e-06, - "loss": 1.0876, - "step": 2323 - }, - { - "epoch": 0.2794444778452474, - "grad_norm": 2.8684732739987346, - "learning_rate": 3.3821896436565367e-06, - "loss": 0.9332, - "step": 2324 - }, - { - "epoch": 0.2795647207358865, - "grad_norm": 1.6956701648516848, - "learning_rate": 3.381626526089032e-06, - "loss": 0.9288, - "step": 2325 - }, - { - "epoch": 0.27968496362652556, - "grad_norm": 2.0343320499228392, - "learning_rate": 3.3810631989298815e-06, - "loss": 1.0166, - "step": 2326 - }, - { - "epoch": 0.2798052065171647, - "grad_norm": 3.51727446114314, - "learning_rate": 3.3804996622645423e-06, - "loss": 1.071, - "step": 2327 - }, - { - "epoch": 0.2799254494078038, - "grad_norm": 1.7570465453598647, - "learning_rate": 3.3799359161785015e-06, - "loss": 1.1175, - "step": 2328 - }, - { - "epoch": 0.28004569229844284, - "grad_norm": 1.5208105669102991, - "learning_rate": 3.3793719607572798e-06, - "loss": 1.0833, - "step": 2329 - }, - { - "epoch": 0.28016593518908195, - "grad_norm": 1.83270251374186, - "learning_rate": 3.378807796086428e-06, - "loss": 1.0067, - "step": 2330 - }, - { - "epoch": 0.28028617807972106, - "grad_norm": 2.9908402798935927, - "learning_rate": 3.37824342225153e-06, - "loss": 0.997, - "step": 2331 - }, - { - "epoch": 0.2804064209703601, - "grad_norm": 1.662671147765418, - "learning_rate": 3.3776788393382006e-06, - "loss": 1.0004, - "step": 2332 - }, - { - "epoch": 0.2805266638609992, - "grad_norm": 2.195438150631507, - "learning_rate": 3.3771140474320872e-06, - "loss": 0.9939, - "step": 2333 - }, - { - "epoch": 0.28064690675163834, - "grad_norm": 1.6749752213652296, - "learning_rate": 3.3765490466188664e-06, - "loss": 1.0227, - "step": 2334 - }, - { - "epoch": 0.2807671496422774, - "grad_norm": 6.714067965747862, - "learning_rate": 3.3759838369842508e-06, - "loss": 0.9642, - "step": 2335 - }, - { - "epoch": 0.2808873925329165, - "grad_norm": 2.5637204125090602, - "learning_rate": 3.375418418613981e-06, - "loss": 0.9596, - "step": 2336 - }, - { - "epoch": 0.28100763542355556, - "grad_norm": 2.1022410378656917, - "learning_rate": 3.374852791593831e-06, - "loss": 1.0565, - "step": 2337 - }, - { - "epoch": 0.28112787831419467, - "grad_norm": 2.8378477782488982, - "learning_rate": 3.374286956009605e-06, - "loss": 0.7715, - "step": 2338 - }, - { - "epoch": 0.2812481212048338, - "grad_norm": 1.9207389389090987, - "learning_rate": 3.3737209119471405e-06, - "loss": 0.9936, - "step": 2339 - }, - { - "epoch": 0.28136836409547283, - "grad_norm": 2.0192846151305854, - "learning_rate": 3.373154659492306e-06, - "loss": 0.8707, - "step": 2340 - }, - { - "epoch": 0.28148860698611194, - "grad_norm": 2.3045480459662784, - "learning_rate": 3.3725881987310016e-06, - "loss": 1.0749, - "step": 2341 - }, - { - "epoch": 0.28160884987675106, - "grad_norm": 1.6503710502953433, - "learning_rate": 3.372021529749159e-06, - "loss": 1.1011, - "step": 2342 - }, - { - "epoch": 0.2817290927673901, - "grad_norm": 1.6508711798057512, - "learning_rate": 3.3714546526327405e-06, - "loss": 1.1494, - "step": 2343 - }, - { - "epoch": 0.2818493356580292, - "grad_norm": 2.026702132521221, - "learning_rate": 3.3708875674677423e-06, - "loss": 1.1086, - "step": 2344 - }, - { - "epoch": 0.28196957854866833, - "grad_norm": 2.216397179688217, - "learning_rate": 3.37032027434019e-06, - "loss": 1.049, - "step": 2345 - }, - { - "epoch": 0.2820898214393074, - "grad_norm": 1.7475141853771838, - "learning_rate": 3.369752773336141e-06, - "loss": 1.0607, - "step": 2346 - }, - { - "epoch": 0.2822100643299465, - "grad_norm": 1.579664782201337, - "learning_rate": 3.3691850645416864e-06, - "loss": 1.0131, - "step": 2347 - }, - { - "epoch": 0.2823303072205856, - "grad_norm": 2.0862089401967827, - "learning_rate": 3.368617148042945e-06, - "loss": 1.06, - "step": 2348 - }, - { - "epoch": 0.28245055011122466, - "grad_norm": 1.83321383115681, - "learning_rate": 3.368049023926071e-06, - "loss": 1.0742, - "step": 2349 - }, - { - "epoch": 0.2825707930018638, - "grad_norm": 1.500345746565661, - "learning_rate": 3.3674806922772476e-06, - "loss": 1.0629, - "step": 2350 - }, - { - "epoch": 0.28269103589250283, - "grad_norm": 1.66234223900773, - "learning_rate": 3.3669121531826904e-06, - "loss": 0.9713, - "step": 2351 - }, - { - "epoch": 0.28281127878314194, - "grad_norm": 1.6740627769767642, - "learning_rate": 3.366343406728647e-06, - "loss": 1.0599, - "step": 2352 - }, - { - "epoch": 0.28293152167378105, - "grad_norm": 1.7821581349235385, - "learning_rate": 3.3657744530013946e-06, - "loss": 0.9122, - "step": 2353 - }, - { - "epoch": 0.2830517645644201, - "grad_norm": 1.8814532806978574, - "learning_rate": 3.3652052920872437e-06, - "loss": 0.9452, - "step": 2354 - }, - { - "epoch": 0.2831720074550592, - "grad_norm": 2.2969286646380187, - "learning_rate": 3.3646359240725355e-06, - "loss": 1.0824, - "step": 2355 - }, - { - "epoch": 0.2832922503456983, - "grad_norm": 6.62281561773714, - "learning_rate": 3.364066349043643e-06, - "loss": 0.9111, - "step": 2356 - }, - { - "epoch": 0.2834124932363374, - "grad_norm": 1.9249467569889904, - "learning_rate": 3.363496567086969e-06, - "loss": 1.0559, - "step": 2357 - }, - { - "epoch": 0.2835327361269765, - "grad_norm": 1.9647436400860405, - "learning_rate": 3.3629265782889506e-06, - "loss": 0.9832, - "step": 2358 - }, - { - "epoch": 0.2836529790176156, - "grad_norm": 1.716686744325624, - "learning_rate": 3.362356382736054e-06, - "loss": 0.943, - "step": 2359 - }, - { - "epoch": 0.28377322190825466, - "grad_norm": 1.9594870746689497, - "learning_rate": 3.361785980514777e-06, - "loss": 1.1421, - "step": 2360 - }, - { - "epoch": 0.28389346479889377, - "grad_norm": 1.8128135672995047, - "learning_rate": 3.361215371711649e-06, - "loss": 0.9976, - "step": 2361 - }, - { - "epoch": 0.2840137076895329, - "grad_norm": 1.8730554669991608, - "learning_rate": 3.3606445564132326e-06, - "loss": 1.0591, - "step": 2362 - }, - { - "epoch": 0.28413395058017193, - "grad_norm": 2.0404751732869215, - "learning_rate": 3.360073534706118e-06, - "loss": 1.0389, - "step": 2363 - }, - { - "epoch": 0.28425419347081105, - "grad_norm": 1.923051365535808, - "learning_rate": 3.35950230667693e-06, - "loss": 0.9911, - "step": 2364 - }, - { - "epoch": 0.28437443636145016, - "grad_norm": 1.888332648124245, - "learning_rate": 3.358930872412323e-06, - "loss": 1.0882, - "step": 2365 - }, - { - "epoch": 0.2844946792520892, - "grad_norm": 1.4812447720849857, - "learning_rate": 3.3583592319989825e-06, - "loss": 1.0357, - "step": 2366 - }, - { - "epoch": 0.2846149221427283, - "grad_norm": 1.8673243771744221, - "learning_rate": 3.357787385523627e-06, - "loss": 0.9135, - "step": 2367 - }, - { - "epoch": 0.2847351650333674, - "grad_norm": 1.770632504313651, - "learning_rate": 3.3572153330730048e-06, - "loss": 1.062, - "step": 2368 - }, - { - "epoch": 0.2848554079240065, - "grad_norm": 0.8268833208422328, - "learning_rate": 3.3566430747338956e-06, - "loss": 0.9008, - "step": 2369 - }, - { - "epoch": 0.2849756508146456, - "grad_norm": 4.263727345894312, - "learning_rate": 3.35607061059311e-06, - "loss": 1.1004, - "step": 2370 - }, - { - "epoch": 0.28509589370528465, - "grad_norm": 1.5898737424228702, - "learning_rate": 3.3554979407374917e-06, - "loss": 0.9792, - "step": 2371 - }, - { - "epoch": 0.28521613659592376, - "grad_norm": 1.5188712522981462, - "learning_rate": 3.3549250652539134e-06, - "loss": 0.966, - "step": 2372 - }, - { - "epoch": 0.2853363794865629, - "grad_norm": 1.8077798087049968, - "learning_rate": 3.3543519842292794e-06, - "loss": 1.0448, - "step": 2373 - }, - { - "epoch": 0.28545662237720193, - "grad_norm": 1.8761058089668634, - "learning_rate": 3.353778697750527e-06, - "loss": 1.07, - "step": 2374 - }, - { - "epoch": 0.28557686526784104, - "grad_norm": 1.6930839552186847, - "learning_rate": 3.353205205904622e-06, - "loss": 1.1212, - "step": 2375 - }, - { - "epoch": 0.28569710815848015, - "grad_norm": 1.7008277028109131, - "learning_rate": 3.3526315087785637e-06, - "loss": 0.9577, - "step": 2376 - }, - { - "epoch": 0.2858173510491192, - "grad_norm": 1.6096443765618518, - "learning_rate": 3.3520576064593805e-06, - "loss": 1.0387, - "step": 2377 - }, - { - "epoch": 0.2859375939397583, - "grad_norm": 1.6465449022274703, - "learning_rate": 3.3514834990341337e-06, - "loss": 1.0486, - "step": 2378 - }, - { - "epoch": 0.2860578368303974, - "grad_norm": 2.983322517190716, - "learning_rate": 3.3509091865899144e-06, - "loss": 1.1557, - "step": 2379 - }, - { - "epoch": 0.2861780797210365, - "grad_norm": 2.159680940234191, - "learning_rate": 3.350334669213846e-06, - "loss": 0.9328, - "step": 2380 - }, - { - "epoch": 0.2862983226116756, - "grad_norm": 11.540346439655337, - "learning_rate": 3.3497599469930816e-06, - "loss": 0.9958, - "step": 2381 - }, - { - "epoch": 0.28641856550231465, - "grad_norm": 2.115667268902721, - "learning_rate": 3.349185020014807e-06, - "loss": 1.0607, - "step": 2382 - }, - { - "epoch": 0.28653880839295376, - "grad_norm": 1.7309507641591413, - "learning_rate": 3.348609888366237e-06, - "loss": 0.9759, - "step": 2383 - }, - { - "epoch": 0.28665905128359287, - "grad_norm": 4.383590547583931, - "learning_rate": 3.348034552134619e-06, - "loss": 0.8563, - "step": 2384 - }, - { - "epoch": 0.2867792941742319, - "grad_norm": 1.725369416638318, - "learning_rate": 3.3474590114072316e-06, - "loss": 1.0726, - "step": 2385 - }, - { - "epoch": 0.28689953706487104, - "grad_norm": 1.9296419084236518, - "learning_rate": 3.3468832662713836e-06, - "loss": 1.0644, - "step": 2386 - }, - { - "epoch": 0.28701977995551015, - "grad_norm": 2.073885156275163, - "learning_rate": 3.346307316814415e-06, - "loss": 1.0754, - "step": 2387 - }, - { - "epoch": 0.2871400228461492, - "grad_norm": 1.7696841007599475, - "learning_rate": 3.3457311631236965e-06, - "loss": 0.9934, - "step": 2388 - }, - { - "epoch": 0.2872602657367883, - "grad_norm": 1.7092051336211804, - "learning_rate": 3.345154805286631e-06, - "loss": 1.0744, - "step": 2389 - }, - { - "epoch": 0.2873805086274274, - "grad_norm": 2.504946773671693, - "learning_rate": 3.344578243390651e-06, - "loss": 0.9854, - "step": 2390 - }, - { - "epoch": 0.2875007515180665, - "grad_norm": 2.3381195593645456, - "learning_rate": 3.3440014775232206e-06, - "loss": 1.0209, - "step": 2391 - }, - { - "epoch": 0.2876209944087056, - "grad_norm": 2.734965407799585, - "learning_rate": 3.343424507771834e-06, - "loss": 0.9467, - "step": 2392 - }, - { - "epoch": 0.2877412372993447, - "grad_norm": 1.7094188900740344, - "learning_rate": 3.342847334224018e-06, - "loss": 1.1013, - "step": 2393 - }, - { - "epoch": 0.28786148018998375, - "grad_norm": 0.8773407281295353, - "learning_rate": 3.342269956967329e-06, - "loss": 0.9005, - "step": 2394 - }, - { - "epoch": 0.28798172308062286, - "grad_norm": 3.863396283202702, - "learning_rate": 3.341692376089355e-06, - "loss": 0.9505, - "step": 2395 - }, - { - "epoch": 0.288101965971262, - "grad_norm": 8.62822231968725, - "learning_rate": 3.3411145916777146e-06, - "loss": 1.0595, - "step": 2396 - }, - { - "epoch": 0.28822220886190103, - "grad_norm": 1.9671031179350933, - "learning_rate": 3.3405366038200566e-06, - "loss": 1.138, - "step": 2397 - }, - { - "epoch": 0.28834245175254014, - "grad_norm": 2.2398589799973254, - "learning_rate": 3.3399584126040617e-06, - "loss": 1.0689, - "step": 2398 - }, - { - "epoch": 0.2884626946431792, - "grad_norm": 1.804906790485001, - "learning_rate": 3.339380018117441e-06, - "loss": 1.1258, - "step": 2399 - }, - { - "epoch": 0.2885829375338183, - "grad_norm": 2.3154409111455623, - "learning_rate": 3.3388014204479366e-06, - "loss": 1.0136, - "step": 2400 - }, - { - "epoch": 0.2887031804244574, - "grad_norm": 2.0281901861921052, - "learning_rate": 3.338222619683321e-06, - "loss": 1.1418, - "step": 2401 - }, - { - "epoch": 0.2888234233150965, - "grad_norm": 2.4801050114702647, - "learning_rate": 3.337643615911398e-06, - "loss": 0.9689, - "step": 2402 - }, - { - "epoch": 0.2889436662057356, - "grad_norm": 1.9151542197097906, - "learning_rate": 3.3370644092200026e-06, - "loss": 1.0257, - "step": 2403 - }, - { - "epoch": 0.2890639090963747, - "grad_norm": 1.9748578467093454, - "learning_rate": 3.3364849996969985e-06, - "loss": 1.0138, - "step": 2404 - }, - { - "epoch": 0.28918415198701375, - "grad_norm": 1.7140167953531866, - "learning_rate": 3.335905387430283e-06, - "loss": 1.08, - "step": 2405 - }, - { - "epoch": 0.28930439487765286, - "grad_norm": 2.9620372169315, - "learning_rate": 3.335325572507782e-06, - "loss": 1.0517, - "step": 2406 - }, - { - "epoch": 0.28942463776829197, - "grad_norm": 1.5767889126787022, - "learning_rate": 3.3347455550174537e-06, - "loss": 0.9737, - "step": 2407 - }, - { - "epoch": 0.289544880658931, - "grad_norm": 2.1714921792293764, - "learning_rate": 3.3341653350472864e-06, - "loss": 0.9113, - "step": 2408 - }, - { - "epoch": 0.28966512354957014, - "grad_norm": 2.34191609159509, - "learning_rate": 3.333584912685298e-06, - "loss": 0.9232, - "step": 2409 - }, - { - "epoch": 0.28978536644020925, - "grad_norm": 0.954602144362085, - "learning_rate": 3.3330042880195385e-06, - "loss": 0.8206, - "step": 2410 - }, - { - "epoch": 0.2899056093308483, - "grad_norm": 2.373839847899205, - "learning_rate": 3.3324234611380888e-06, - "loss": 1.0141, - "step": 2411 - }, - { - "epoch": 0.2900258522214874, - "grad_norm": 1.4779535432051993, - "learning_rate": 3.3318424321290596e-06, - "loss": 1.0445, - "step": 2412 - }, - { - "epoch": 0.2901460951121265, - "grad_norm": 0.8441581445347276, - "learning_rate": 3.3312612010805917e-06, - "loss": 0.8773, - "step": 2413 - }, - { - "epoch": 0.2902663380027656, - "grad_norm": 1.7570810109950021, - "learning_rate": 3.330679768080858e-06, - "loss": 0.9228, - "step": 2414 - }, - { - "epoch": 0.2903865808934047, - "grad_norm": 2.8633526927158837, - "learning_rate": 3.3300981332180627e-06, - "loss": 1.0645, - "step": 2415 - }, - { - "epoch": 0.29050682378404374, - "grad_norm": 1.786282574281198, - "learning_rate": 3.3295162965804373e-06, - "loss": 1.031, - "step": 2416 - }, - { - "epoch": 0.29062706667468285, - "grad_norm": 2.180783232231357, - "learning_rate": 3.328934258256247e-06, - "loss": 1.0013, - "step": 2417 - }, - { - "epoch": 0.29074730956532197, - "grad_norm": 2.198095329459707, - "learning_rate": 3.3283520183337856e-06, - "loss": 0.9036, - "step": 2418 - }, - { - "epoch": 0.290867552455961, - "grad_norm": 1.7976591969797049, - "learning_rate": 3.3277695769013797e-06, - "loss": 0.9265, - "step": 2419 - }, - { - "epoch": 0.29098779534660013, - "grad_norm": 1.7023188231605282, - "learning_rate": 3.327186934047385e-06, - "loss": 1.0022, - "step": 2420 - }, - { - "epoch": 0.29110803823723924, - "grad_norm": 2.0196866062403735, - "learning_rate": 3.3266040898601877e-06, - "loss": 0.8922, - "step": 2421 - }, - { - "epoch": 0.2912282811278783, - "grad_norm": 1.7331421740589283, - "learning_rate": 3.3260210444282045e-06, - "loss": 1.0046, - "step": 2422 - }, - { - "epoch": 0.2913485240185174, - "grad_norm": 2.0516612462773476, - "learning_rate": 3.325437797839883e-06, - "loss": 0.955, - "step": 2423 - }, - { - "epoch": 0.2914687669091565, - "grad_norm": 2.1913352043616596, - "learning_rate": 3.3248543501837015e-06, - "loss": 0.9782, - "step": 2424 - }, - { - "epoch": 0.2915890097997956, - "grad_norm": 2.5364872005838284, - "learning_rate": 3.3242707015481684e-06, - "loss": 0.9993, - "step": 2425 - }, - { - "epoch": 0.2917092526904347, - "grad_norm": 1.5705106240161844, - "learning_rate": 3.323686852021823e-06, - "loss": 1.0416, - "step": 2426 - }, - { - "epoch": 0.2918294955810738, - "grad_norm": 1.9894848611106597, - "learning_rate": 3.323102801693235e-06, - "loss": 1.0278, - "step": 2427 - }, - { - "epoch": 0.29194973847171285, - "grad_norm": 2.0647709222046013, - "learning_rate": 3.322518550651003e-06, - "loss": 1.0233, - "step": 2428 - }, - { - "epoch": 0.29206998136235196, - "grad_norm": 1.6423626371685283, - "learning_rate": 3.3219340989837586e-06, - "loss": 1.0443, - "step": 2429 - }, - { - "epoch": 0.292190224252991, - "grad_norm": 1.8581807123756748, - "learning_rate": 3.3213494467801625e-06, - "loss": 1.0325, - "step": 2430 - }, - { - "epoch": 0.2923104671436301, - "grad_norm": 2.2138949721872483, - "learning_rate": 3.3207645941289063e-06, - "loss": 0.9484, - "step": 2431 - }, - { - "epoch": 0.29243071003426924, - "grad_norm": 2.2908511936143254, - "learning_rate": 3.320179541118711e-06, - "loss": 1.0318, - "step": 2432 - }, - { - "epoch": 0.2925509529249083, - "grad_norm": 1.0638301601307805, - "learning_rate": 3.3195942878383293e-06, - "loss": 0.8757, - "step": 2433 - }, - { - "epoch": 0.2926711958155474, - "grad_norm": 1.6485539255112656, - "learning_rate": 3.319008834376543e-06, - "loss": 1.0089, - "step": 2434 - }, - { - "epoch": 0.2927914387061865, - "grad_norm": 2.117348752796005, - "learning_rate": 3.3184231808221654e-06, - "loss": 1.112, - "step": 2435 - }, - { - "epoch": 0.29291168159682557, - "grad_norm": 1.9422658939759565, - "learning_rate": 3.3178373272640394e-06, - "loss": 0.8552, - "step": 2436 - }, - { - "epoch": 0.2930319244874647, - "grad_norm": 1.993123571348112, - "learning_rate": 3.3172512737910387e-06, - "loss": 1.0919, - "step": 2437 - }, - { - "epoch": 0.2931521673781038, - "grad_norm": 3.128360010059434, - "learning_rate": 3.3166650204920674e-06, - "loss": 1.1095, - "step": 2438 - }, - { - "epoch": 0.29327241026874284, - "grad_norm": 1.526055614767897, - "learning_rate": 3.316078567456059e-06, - "loss": 1.0505, - "step": 2439 - }, - { - "epoch": 0.29339265315938196, - "grad_norm": 1.4679645304799405, - "learning_rate": 3.3154919147719786e-06, - "loss": 1.0003, - "step": 2440 - }, - { - "epoch": 0.29351289605002107, - "grad_norm": 2.8064812579797502, - "learning_rate": 3.31490506252882e-06, - "loss": 1.1007, - "step": 2441 - }, - { - "epoch": 0.2936331389406601, - "grad_norm": 1.8953778764310063, - "learning_rate": 3.31431801081561e-06, - "loss": 1.0738, - "step": 2442 - }, - { - "epoch": 0.29375338183129923, - "grad_norm": 1.1493064551271501, - "learning_rate": 3.313730759721402e-06, - "loss": 0.9189, - "step": 2443 - }, - { - "epoch": 0.29387362472193834, - "grad_norm": 2.440101995368787, - "learning_rate": 3.313143309335282e-06, - "loss": 1.0827, - "step": 2444 - }, - { - "epoch": 0.2939938676125774, - "grad_norm": 1.773674902801423, - "learning_rate": 3.3125556597463665e-06, - "loss": 1.0654, - "step": 2445 - }, - { - "epoch": 0.2941141105032165, - "grad_norm": 1.4574141012172066, - "learning_rate": 3.311967811043801e-06, - "loss": 0.8873, - "step": 2446 - }, - { - "epoch": 0.29423435339385556, - "grad_norm": 1.9991768096543552, - "learning_rate": 3.3113797633167617e-06, - "loss": 1.0511, - "step": 2447 - }, - { - "epoch": 0.2943545962844947, - "grad_norm": 2.1418360048233427, - "learning_rate": 3.310791516654455e-06, - "loss": 0.9164, - "step": 2448 - }, - { - "epoch": 0.2944748391751338, - "grad_norm": 1.698829694033644, - "learning_rate": 3.3102030711461177e-06, - "loss": 1.0287, - "step": 2449 - }, - { - "epoch": 0.29459508206577284, - "grad_norm": 1.7444990073955902, - "learning_rate": 3.3096144268810156e-06, - "loss": 0.9091, - "step": 2450 - }, - { - "epoch": 0.29471532495641195, - "grad_norm": 2.0944767160512456, - "learning_rate": 3.3090255839484462e-06, - "loss": 0.9534, - "step": 2451 - }, - { - "epoch": 0.29483556784705106, - "grad_norm": 1.7042348902445077, - "learning_rate": 3.3084365424377366e-06, - "loss": 1.0832, - "step": 2452 - }, - { - "epoch": 0.2949558107376901, - "grad_norm": 0.7800657833920595, - "learning_rate": 3.307847302438245e-06, - "loss": 0.8151, - "step": 2453 - }, - { - "epoch": 0.2950760536283292, - "grad_norm": 1.9988969502873433, - "learning_rate": 3.3072578640393562e-06, - "loss": 1.0049, - "step": 2454 - }, - { - "epoch": 0.29519629651896834, - "grad_norm": 1.8278137985488367, - "learning_rate": 3.3066682273304886e-06, - "loss": 1.0244, - "step": 2455 - }, - { - "epoch": 0.2953165394096074, - "grad_norm": 1.8010421861773203, - "learning_rate": 3.3060783924010904e-06, - "loss": 1.007, - "step": 2456 - }, - { - "epoch": 0.2954367823002465, - "grad_norm": 1.9157166476208212, - "learning_rate": 3.3054883593406387e-06, - "loss": 1.076, - "step": 2457 - }, - { - "epoch": 0.2955570251908856, - "grad_norm": 2.485106834248905, - "learning_rate": 3.3048981282386404e-06, - "loss": 0.8801, - "step": 2458 - }, - { - "epoch": 0.29567726808152467, - "grad_norm": 2.1107857966601737, - "learning_rate": 3.304307699184634e-06, - "loss": 1.0529, - "step": 2459 - }, - { - "epoch": 0.2957975109721638, - "grad_norm": 1.659410448288389, - "learning_rate": 3.3037170722681866e-06, - "loss": 1.0221, - "step": 2460 - }, - { - "epoch": 0.29591775386280283, - "grad_norm": 1.7838762565043287, - "learning_rate": 3.3031262475788956e-06, - "loss": 0.918, - "step": 2461 - }, - { - "epoch": 0.29603799675344195, - "grad_norm": 1.6545778146326884, - "learning_rate": 3.3025352252063897e-06, - "loss": 0.967, - "step": 2462 - }, - { - "epoch": 0.29615823964408106, - "grad_norm": 1.9126914583001988, - "learning_rate": 3.3019440052403252e-06, - "loss": 0.9732, - "step": 2463 - }, - { - "epoch": 0.2962784825347201, - "grad_norm": 1.7889837389271463, - "learning_rate": 3.30135258777039e-06, - "loss": 0.9412, - "step": 2464 - }, - { - "epoch": 0.2963987254253592, - "grad_norm": 2.112501187295975, - "learning_rate": 3.3007609728863024e-06, - "loss": 0.9283, - "step": 2465 - }, - { - "epoch": 0.29651896831599833, - "grad_norm": 2.2386493480043526, - "learning_rate": 3.300169160677809e-06, - "loss": 0.9642, - "step": 2466 - }, - { - "epoch": 0.2966392112066374, - "grad_norm": 2.7830578361200926, - "learning_rate": 3.2995771512346878e-06, - "loss": 1.0011, - "step": 2467 - }, - { - "epoch": 0.2967594540972765, - "grad_norm": 1.9637975618756967, - "learning_rate": 3.298984944646746e-06, - "loss": 0.9587, - "step": 2468 - }, - { - "epoch": 0.2968796969879156, - "grad_norm": 1.9907430760503801, - "learning_rate": 3.298392541003822e-06, - "loss": 1.0408, - "step": 2469 - }, - { - "epoch": 0.29699993987855466, - "grad_norm": 1.85043906079406, - "learning_rate": 3.2977999403957806e-06, - "loss": 1.1194, - "step": 2470 - }, - { - "epoch": 0.2971201827691938, - "grad_norm": 1.860193729463645, - "learning_rate": 3.2972071429125207e-06, - "loss": 0.9007, - "step": 2471 - }, - { - "epoch": 0.2972404256598329, - "grad_norm": 2.5417403434539216, - "learning_rate": 3.2966141486439682e-06, - "loss": 1.1137, - "step": 2472 - }, - { - "epoch": 0.29736066855047194, - "grad_norm": 2.1381111698991266, - "learning_rate": 3.29602095768008e-06, - "loss": 0.8758, - "step": 2473 - }, - { - "epoch": 0.29748091144111105, - "grad_norm": 1.610121026317857, - "learning_rate": 3.2954275701108437e-06, - "loss": 0.8687, - "step": 2474 - }, - { - "epoch": 0.29760115433175016, - "grad_norm": 1.9411574267576917, - "learning_rate": 3.294833986026275e-06, - "loss": 0.9207, - "step": 2475 - }, - { - "epoch": 0.2977213972223892, - "grad_norm": 1.7914003647426018, - "learning_rate": 3.29424020551642e-06, - "loss": 1.0836, - "step": 2476 - }, - { - "epoch": 0.2978416401130283, - "grad_norm": 1.698207861458545, - "learning_rate": 3.2936462286713546e-06, - "loss": 0.9433, - "step": 2477 - }, - { - "epoch": 0.2979618830036674, - "grad_norm": 1.9689056092784871, - "learning_rate": 3.2930520555811846e-06, - "loss": 1.0039, - "step": 2478 - }, - { - "epoch": 0.2980821258943065, - "grad_norm": 1.8374622263237153, - "learning_rate": 3.292457686336046e-06, - "loss": 1.03, - "step": 2479 - }, - { - "epoch": 0.2982023687849456, - "grad_norm": 0.8407388220097006, - "learning_rate": 3.291863121026105e-06, - "loss": 0.8817, - "step": 2480 - }, - { - "epoch": 0.29832261167558466, - "grad_norm": 1.9577352814543074, - "learning_rate": 3.2912683597415547e-06, - "loss": 0.9979, - "step": 2481 - }, - { - "epoch": 0.29844285456622377, - "grad_norm": 2.082390788169531, - "learning_rate": 3.2906734025726213e-06, - "loss": 1.0142, - "step": 2482 - }, - { - "epoch": 0.2985630974568629, - "grad_norm": 2.75254608545346, - "learning_rate": 3.290078249609559e-06, - "loss": 1.105, - "step": 2483 - }, - { - "epoch": 0.29868334034750194, - "grad_norm": 1.760044427224274, - "learning_rate": 3.2894829009426514e-06, - "loss": 1.1081, - "step": 2484 - }, - { - "epoch": 0.29880358323814105, - "grad_norm": 1.7225767363797395, - "learning_rate": 3.288887356662213e-06, - "loss": 0.9987, - "step": 2485 - }, - { - "epoch": 0.29892382612878016, - "grad_norm": 0.8025281506648587, - "learning_rate": 3.288291616858588e-06, - "loss": 0.8455, - "step": 2486 - }, - { - "epoch": 0.2990440690194192, - "grad_norm": 2.1134144852076227, - "learning_rate": 3.287695681622149e-06, - "loss": 0.9974, - "step": 2487 - }, - { - "epoch": 0.2991643119100583, - "grad_norm": 1.7053451765920247, - "learning_rate": 3.2870995510432982e-06, - "loss": 1.0424, - "step": 2488 - }, - { - "epoch": 0.29928455480069743, - "grad_norm": 1.7520340950753204, - "learning_rate": 3.2865032252124697e-06, - "loss": 0.9986, - "step": 2489 - }, - { - "epoch": 0.2994047976913365, - "grad_norm": 1.3844368225484511, - "learning_rate": 3.2859067042201243e-06, - "loss": 1.0011, - "step": 2490 - }, - { - "epoch": 0.2995250405819756, - "grad_norm": 1.702530223337299, - "learning_rate": 3.2853099881567544e-06, - "loss": 1.0051, - "step": 2491 - }, - { - "epoch": 0.29964528347261465, - "grad_norm": 1.5496839970285434, - "learning_rate": 3.284713077112881e-06, - "loss": 1.0179, - "step": 2492 - }, - { - "epoch": 0.29976552636325376, - "grad_norm": 2.3589270968683715, - "learning_rate": 3.284115971179056e-06, - "loss": 1.0966, - "step": 2493 - }, - { - "epoch": 0.2998857692538929, - "grad_norm": 1.708675962684538, - "learning_rate": 3.283518670445859e-06, - "loss": 1.0191, - "step": 2494 - }, - { - "epoch": 0.30000601214453193, - "grad_norm": 1.1776755145849356, - "learning_rate": 3.2829211750038995e-06, - "loss": 0.8083, - "step": 2495 - }, - { - "epoch": 0.30012625503517104, - "grad_norm": 1.5891315636494399, - "learning_rate": 3.2823234849438183e-06, - "loss": 1.1086, - "step": 2496 - }, - { - "epoch": 0.30024649792581015, - "grad_norm": 2.2295920658338595, - "learning_rate": 3.2817256003562836e-06, - "loss": 0.9747, - "step": 2497 - }, - { - "epoch": 0.3003667408164492, - "grad_norm": 1.8112759896698223, - "learning_rate": 3.281127521331995e-06, - "loss": 0.8938, - "step": 2498 - }, - { - "epoch": 0.3004869837070883, - "grad_norm": 0.8990515559289137, - "learning_rate": 3.2805292479616798e-06, - "loss": 0.8725, - "step": 2499 - }, - { - "epoch": 0.30060722659772743, - "grad_norm": 2.0115813865800223, - "learning_rate": 3.2799307803360955e-06, - "loss": 1.1348, - "step": 2500 - }, - { - "epoch": 0.3007274694883665, - "grad_norm": 1.3946726751340925, - "learning_rate": 3.27933211854603e-06, - "loss": 1.0438, - "step": 2501 - }, - { - "epoch": 0.3008477123790056, - "grad_norm": 1.6503684499481037, - "learning_rate": 3.278733262682299e-06, - "loss": 1.0988, - "step": 2502 - }, - { - "epoch": 0.3009679552696447, - "grad_norm": 2.1235831530022184, - "learning_rate": 3.2781342128357484e-06, - "loss": 1.0548, - "step": 2503 - }, - { - "epoch": 0.30108819816028376, - "grad_norm": 4.075204322232731, - "learning_rate": 3.2775349690972547e-06, - "loss": 1.0347, - "step": 2504 - }, - { - "epoch": 0.30120844105092287, - "grad_norm": 0.7768174119567384, - "learning_rate": 3.276935531557722e-06, - "loss": 0.7958, - "step": 2505 - }, - { - "epoch": 0.301328683941562, - "grad_norm": 2.6919152120470096, - "learning_rate": 3.2763359003080837e-06, - "loss": 1.0275, - "step": 2506 - }, - { - "epoch": 0.30144892683220104, - "grad_norm": 0.9871881342611422, - "learning_rate": 3.2757360754393047e-06, - "loss": 0.9075, - "step": 2507 - }, - { - "epoch": 0.30156916972284015, - "grad_norm": 13.158443603311277, - "learning_rate": 3.2751360570423767e-06, - "loss": 0.8715, - "step": 2508 - }, - { - "epoch": 0.3016894126134792, - "grad_norm": 1.8738047286802233, - "learning_rate": 3.2745358452083236e-06, - "loss": 0.9929, - "step": 2509 - }, - { - "epoch": 0.3018096555041183, - "grad_norm": 1.260764549523916, - "learning_rate": 3.2739354400281955e-06, - "loss": 1.0457, - "step": 2510 - }, - { - "epoch": 0.3019298983947574, - "grad_norm": 0.887597946349936, - "learning_rate": 3.2733348415930744e-06, - "loss": 0.9195, - "step": 2511 - }, - { - "epoch": 0.3020501412853965, - "grad_norm": 1.7960813013336467, - "learning_rate": 3.27273404999407e-06, - "loss": 1.0403, - "step": 2512 - }, - { - "epoch": 0.3021703841760356, - "grad_norm": 0.7936399278388907, - "learning_rate": 3.272133065322322e-06, - "loss": 0.8579, - "step": 2513 - }, - { - "epoch": 0.3022906270666747, - "grad_norm": 1.4551708627116813, - "learning_rate": 3.271531887669e-06, - "loss": 1.0145, - "step": 2514 - }, - { - "epoch": 0.30241086995731375, - "grad_norm": 2.3967931621560927, - "learning_rate": 3.2709305171253015e-06, - "loss": 0.8637, - "step": 2515 - }, - { - "epoch": 0.30253111284795287, - "grad_norm": 1.8306289869729357, - "learning_rate": 3.2703289537824536e-06, - "loss": 1.0074, - "step": 2516 - }, - { - "epoch": 0.302651355738592, - "grad_norm": 2.739704453337656, - "learning_rate": 3.269727197731714e-06, - "loss": 1.0158, - "step": 2517 - }, - { - "epoch": 0.30277159862923103, - "grad_norm": 1.5053023558645655, - "learning_rate": 3.269125249064367e-06, - "loss": 1.0058, - "step": 2518 - }, - { - "epoch": 0.30289184151987014, - "grad_norm": 1.457891869709777, - "learning_rate": 3.2685231078717297e-06, - "loss": 1.0602, - "step": 2519 - }, - { - "epoch": 0.30301208441050925, - "grad_norm": 2.0242654292246614, - "learning_rate": 3.267920774245145e-06, - "loss": 0.9789, - "step": 2520 - }, - { - "epoch": 0.3031323273011483, - "grad_norm": 1.8541821325564347, - "learning_rate": 3.2673182482759876e-06, - "loss": 1.0726, - "step": 2521 - }, - { - "epoch": 0.3032525701917874, - "grad_norm": 2.316002332906915, - "learning_rate": 3.266715530055659e-06, - "loss": 0.8941, - "step": 2522 - }, - { - "epoch": 0.30337281308242653, - "grad_norm": 2.9607226613393163, - "learning_rate": 3.2661126196755927e-06, - "loss": 1.0318, - "step": 2523 - }, - { - "epoch": 0.3034930559730656, - "grad_norm": 0.8304083672118491, - "learning_rate": 3.265509517227248e-06, - "loss": 0.8379, - "step": 2524 - }, - { - "epoch": 0.3036132988637047, - "grad_norm": 1.6262766151683972, - "learning_rate": 3.264906222802115e-06, - "loss": 1.0328, - "step": 2525 - }, - { - "epoch": 0.30373354175434375, - "grad_norm": 1.9217676272287905, - "learning_rate": 3.264302736491715e-06, - "loss": 1.0065, - "step": 2526 - }, - { - "epoch": 0.30385378464498286, - "grad_norm": 1.672961586063716, - "learning_rate": 3.263699058387594e-06, - "loss": 1.0938, - "step": 2527 - }, - { - "epoch": 0.30397402753562197, - "grad_norm": 1.9583719804345703, - "learning_rate": 3.2630951885813315e-06, - "loss": 1.1345, - "step": 2528 - }, - { - "epoch": 0.304094270426261, - "grad_norm": 1.7576960546283116, - "learning_rate": 3.262491127164533e-06, - "loss": 1.0108, - "step": 2529 - }, - { - "epoch": 0.30421451331690014, - "grad_norm": 2.2221890513275615, - "learning_rate": 3.2618868742288337e-06, - "loss": 1.0241, - "step": 2530 - }, - { - "epoch": 0.30433475620753925, - "grad_norm": 1.9400829203080208, - "learning_rate": 3.261282429865899e-06, - "loss": 0.9558, - "step": 2531 - }, - { - "epoch": 0.3044549990981783, - "grad_norm": 1.7963635587534292, - "learning_rate": 3.2606777941674225e-06, - "loss": 0.9565, - "step": 2532 - }, - { - "epoch": 0.3045752419888174, - "grad_norm": 1.9537011479318678, - "learning_rate": 3.2600729672251276e-06, - "loss": 1.0707, - "step": 2533 - }, - { - "epoch": 0.3046954848794565, - "grad_norm": 1.9555563479961369, - "learning_rate": 3.259467949130765e-06, - "loss": 0.8835, - "step": 2534 - }, - { - "epoch": 0.3048157277700956, - "grad_norm": 3.453654391069567, - "learning_rate": 3.2588627399761164e-06, - "loss": 1.0646, - "step": 2535 - }, - { - "epoch": 0.3049359706607347, - "grad_norm": 2.508865943286053, - "learning_rate": 3.2582573398529903e-06, - "loss": 0.9398, - "step": 2536 - }, - { - "epoch": 0.3050562135513738, - "grad_norm": 2.1984503099709523, - "learning_rate": 3.2576517488532265e-06, - "loss": 0.9679, - "step": 2537 - }, - { - "epoch": 0.30517645644201286, - "grad_norm": 1.5718180712483183, - "learning_rate": 3.257045967068692e-06, - "loss": 1.0923, - "step": 2538 - }, - { - "epoch": 0.30529669933265197, - "grad_norm": 1.6214369812657872, - "learning_rate": 3.2564399945912848e-06, - "loss": 1.0533, - "step": 2539 - }, - { - "epoch": 0.305416942223291, - "grad_norm": 3.095674474926573, - "learning_rate": 3.2558338315129287e-06, - "loss": 1.0555, - "step": 2540 - }, - { - "epoch": 0.30553718511393013, - "grad_norm": 1.9251373019113018, - "learning_rate": 3.2552274779255785e-06, - "loss": 0.991, - "step": 2541 - }, - { - "epoch": 0.30565742800456924, - "grad_norm": 3.0362633216663033, - "learning_rate": 3.2546209339212184e-06, - "loss": 1.0021, - "step": 2542 - }, - { - "epoch": 0.3057776708952083, - "grad_norm": 1.4788224621235644, - "learning_rate": 3.25401419959186e-06, - "loss": 0.9942, - "step": 2543 - }, - { - "epoch": 0.3058979137858474, - "grad_norm": 1.7523504548746003, - "learning_rate": 3.253407275029545e-06, - "loss": 0.9981, - "step": 2544 - }, - { - "epoch": 0.3060181566764865, - "grad_norm": 2.686356589579908, - "learning_rate": 3.2528001603263425e-06, - "loss": 1.0307, - "step": 2545 - }, - { - "epoch": 0.3061383995671256, - "grad_norm": 1.8853218167158727, - "learning_rate": 3.2521928555743514e-06, - "loss": 1.0469, - "step": 2546 - }, - { - "epoch": 0.3062586424577647, - "grad_norm": 1.5659288549327322, - "learning_rate": 3.2515853608657e-06, - "loss": 0.9101, - "step": 2547 - }, - { - "epoch": 0.3063788853484038, - "grad_norm": 2.431047555688201, - "learning_rate": 3.250977676292545e-06, - "loss": 0.9808, - "step": 2548 - }, - { - "epoch": 0.30649912823904285, - "grad_norm": 1.931074686333651, - "learning_rate": 3.2503698019470712e-06, - "loss": 1.0211, - "step": 2549 - }, - { - "epoch": 0.30661937112968196, - "grad_norm": 1.9308755283193235, - "learning_rate": 3.249761737921492e-06, - "loss": 1.0051, - "step": 2550 - }, - { - "epoch": 0.30673961402032107, - "grad_norm": 2.011210019868277, - "learning_rate": 3.249153484308051e-06, - "loss": 0.9755, - "step": 2551 - }, - { - "epoch": 0.3068598569109601, - "grad_norm": 1.9168554987007158, - "learning_rate": 3.2485450411990194e-06, - "loss": 1.0031, - "step": 2552 - }, - { - "epoch": 0.30698009980159924, - "grad_norm": 2.6247553938477695, - "learning_rate": 3.2479364086866983e-06, - "loss": 1.0529, - "step": 2553 - }, - { - "epoch": 0.30710034269223835, - "grad_norm": 1.5541732167505637, - "learning_rate": 3.247327586863416e-06, - "loss": 1.0376, - "step": 2554 - }, - { - "epoch": 0.3072205855828774, - "grad_norm": 2.2531896870251633, - "learning_rate": 3.2467185758215304e-06, - "loss": 1.0055, - "step": 2555 - }, - { - "epoch": 0.3073408284735165, - "grad_norm": 2.6262959505588617, - "learning_rate": 3.246109375653428e-06, - "loss": 1.0875, - "step": 2556 - }, - { - "epoch": 0.30746107136415557, - "grad_norm": 1.768748196442564, - "learning_rate": 3.2454999864515243e-06, - "loss": 1.0131, - "step": 2557 - }, - { - "epoch": 0.3075813142547947, - "grad_norm": 5.047472466690825, - "learning_rate": 3.244890408308263e-06, - "loss": 0.9183, - "step": 2558 - }, - { - "epoch": 0.3077015571454338, - "grad_norm": 1.9031790088324967, - "learning_rate": 3.2442806413161165e-06, - "loss": 0.8432, - "step": 2559 - }, - { - "epoch": 0.30782180003607285, - "grad_norm": 1.8605985702774868, - "learning_rate": 3.243670685567586e-06, - "loss": 0.9979, - "step": 2560 - }, - { - "epoch": 0.30794204292671196, - "grad_norm": 1.9788873441917296, - "learning_rate": 3.2430605411552012e-06, - "loss": 1.0323, - "step": 2561 - }, - { - "epoch": 0.30806228581735107, - "grad_norm": 0.8934150628404431, - "learning_rate": 3.2424502081715205e-06, - "loss": 0.9497, - "step": 2562 - }, - { - "epoch": 0.3081825287079901, - "grad_norm": 1.6972510158318326, - "learning_rate": 3.241839686709132e-06, - "loss": 1.0121, - "step": 2563 - }, - { - "epoch": 0.30830277159862923, - "grad_norm": 2.5252363566449354, - "learning_rate": 3.2412289768606495e-06, - "loss": 1.0494, - "step": 2564 - }, - { - "epoch": 0.30842301448926834, - "grad_norm": 1.5363089623554633, - "learning_rate": 3.240618078718718e-06, - "loss": 1.0495, - "step": 2565 - }, - { - "epoch": 0.3085432573799074, - "grad_norm": 1.7909263663477528, - "learning_rate": 3.240006992376011e-06, - "loss": 0.9695, - "step": 2566 - }, - { - "epoch": 0.3086635002705465, - "grad_norm": 2.904240067009146, - "learning_rate": 3.2393957179252284e-06, - "loss": 0.9866, - "step": 2567 - }, - { - "epoch": 0.3087837431611856, - "grad_norm": 1.9321608040668399, - "learning_rate": 3.2387842554591016e-06, - "loss": 1.04, - "step": 2568 - }, - { - "epoch": 0.3089039860518247, - "grad_norm": 2.0210915888777343, - "learning_rate": 3.238172605070388e-06, - "loss": 1.0941, - "step": 2569 - }, - { - "epoch": 0.3090242289424638, - "grad_norm": 2.1174336533133884, - "learning_rate": 3.2375607668518745e-06, - "loss": 1.0144, - "step": 2570 - }, - { - "epoch": 0.30914447183310284, - "grad_norm": 2.2927564168483996, - "learning_rate": 3.236948740896377e-06, - "loss": 1.1289, - "step": 2571 - }, - { - "epoch": 0.30926471472374195, - "grad_norm": 1.3398295143194174, - "learning_rate": 3.2363365272967384e-06, - "loss": 1.0674, - "step": 2572 - }, - { - "epoch": 0.30938495761438106, - "grad_norm": 2.1665316930935847, - "learning_rate": 3.235724126145832e-06, - "loss": 1.0361, - "step": 2573 - }, - { - "epoch": 0.3095052005050201, - "grad_norm": 1.4073392675986345, - "learning_rate": 3.235111537536558e-06, - "loss": 1.0025, - "step": 2574 - }, - { - "epoch": 0.30962544339565923, - "grad_norm": 1.867818430520507, - "learning_rate": 3.2344987615618456e-06, - "loss": 1.0596, - "step": 2575 - }, - { - "epoch": 0.30974568628629834, - "grad_norm": 1.476335780585207, - "learning_rate": 3.2338857983146533e-06, - "loss": 1.0124, - "step": 2576 - }, - { - "epoch": 0.3098659291769374, - "grad_norm": 1.7930772536613295, - "learning_rate": 3.233272647887966e-06, - "loss": 0.9946, - "step": 2577 - }, - { - "epoch": 0.3099861720675765, - "grad_norm": 1.5397889412503265, - "learning_rate": 3.2326593103747985e-06, - "loss": 1.1235, - "step": 2578 - }, - { - "epoch": 0.3101064149582156, - "grad_norm": 1.7988177867679305, - "learning_rate": 3.2320457858681936e-06, - "loss": 1.0732, - "step": 2579 - }, - { - "epoch": 0.31022665784885467, - "grad_norm": 2.2395825289939206, - "learning_rate": 3.2314320744612228e-06, - "loss": 1.0856, - "step": 2580 - }, - { - "epoch": 0.3103469007394938, - "grad_norm": 2.8664543191492746, - "learning_rate": 3.2308181762469854e-06, - "loss": 0.9907, - "step": 2581 - }, - { - "epoch": 0.3104671436301329, - "grad_norm": 2.590370550417538, - "learning_rate": 3.230204091318609e-06, - "loss": 1.0203, - "step": 2582 - }, - { - "epoch": 0.31058738652077195, - "grad_norm": 1.7262204206778986, - "learning_rate": 3.2295898197692503e-06, - "loss": 1.0735, - "step": 2583 - }, - { - "epoch": 0.31070762941141106, - "grad_norm": 1.5296366230157636, - "learning_rate": 3.228975361692094e-06, - "loss": 1.0209, - "step": 2584 - }, - { - "epoch": 0.31082787230205017, - "grad_norm": 3.1396748638079144, - "learning_rate": 3.228360717180352e-06, - "loss": 1.0328, - "step": 2585 - }, - { - "epoch": 0.3109481151926892, - "grad_norm": 0.8920542976634653, - "learning_rate": 3.227745886327266e-06, - "loss": 0.8746, - "step": 2586 - }, - { - "epoch": 0.31106835808332833, - "grad_norm": 0.8079717335289036, - "learning_rate": 3.227130869226105e-06, - "loss": 0.8194, - "step": 2587 - }, - { - "epoch": 0.3111886009739674, - "grad_norm": 2.1221788416362726, - "learning_rate": 3.226515665970167e-06, - "loss": 1.0533, - "step": 2588 - }, - { - "epoch": 0.3113088438646065, - "grad_norm": 3.492987829065401, - "learning_rate": 3.225900276652777e-06, - "loss": 1.0892, - "step": 2589 - }, - { - "epoch": 0.3114290867552456, - "grad_norm": 1.8606237497732352, - "learning_rate": 3.2252847013672906e-06, - "loss": 0.9829, - "step": 2590 - }, - { - "epoch": 0.31154932964588467, - "grad_norm": 1.9954114968543617, - "learning_rate": 3.224668940207089e-06, - "loss": 0.9943, - "step": 2591 - }, - { - "epoch": 0.3116695725365238, - "grad_norm": 4.210383504403179, - "learning_rate": 3.2240529932655828e-06, - "loss": 1.0997, - "step": 2592 - }, - { - "epoch": 0.3117898154271629, - "grad_norm": 2.934216551316472, - "learning_rate": 3.223436860636211e-06, - "loss": 1.1177, - "step": 2593 - }, - { - "epoch": 0.31191005831780194, - "grad_norm": 1.5533071553121411, - "learning_rate": 3.2228205424124403e-06, - "loss": 0.9677, - "step": 2594 - }, - { - "epoch": 0.31203030120844105, - "grad_norm": 3.408294344230566, - "learning_rate": 3.222204038687765e-06, - "loss": 0.9715, - "step": 2595 - }, - { - "epoch": 0.31215054409908016, - "grad_norm": 1.6874954788712584, - "learning_rate": 3.221587349555709e-06, - "loss": 1.1116, - "step": 2596 - }, - { - "epoch": 0.3122707869897192, - "grad_norm": 1.9254157471986528, - "learning_rate": 3.2209704751098236e-06, - "loss": 0.9189, - "step": 2597 - }, - { - "epoch": 0.31239102988035833, - "grad_norm": 2.2201170672230988, - "learning_rate": 3.2203534154436875e-06, - "loss": 1.062, - "step": 2598 - }, - { - "epoch": 0.31251127277099744, - "grad_norm": 2.010713374194055, - "learning_rate": 3.2197361706509084e-06, - "loss": 0.9949, - "step": 2599 - }, - { - "epoch": 0.3126315156616365, - "grad_norm": 3.374975416305865, - "learning_rate": 3.2191187408251228e-06, - "loss": 1.0791, - "step": 2600 - }, - { - "epoch": 0.3127517585522756, - "grad_norm": 3.224378239037931, - "learning_rate": 3.218501126059993e-06, - "loss": 1.0047, - "step": 2601 - }, - { - "epoch": 0.31287200144291466, - "grad_norm": 1.6800645981585338, - "learning_rate": 3.2178833264492116e-06, - "loss": 1.0422, - "step": 2602 - }, - { - "epoch": 0.31299224433355377, - "grad_norm": 2.0627959068034336, - "learning_rate": 3.217265342086498e-06, - "loss": 0.99, - "step": 2603 - }, - { - "epoch": 0.3131124872241929, - "grad_norm": 2.004690392388794, - "learning_rate": 3.216647173065599e-06, - "loss": 0.9643, - "step": 2604 - }, - { - "epoch": 0.31323273011483194, - "grad_norm": 1.7283464424398844, - "learning_rate": 3.216028819480292e-06, - "loss": 0.9695, - "step": 2605 - }, - { - "epoch": 0.31335297300547105, - "grad_norm": 1.8139914591545072, - "learning_rate": 3.2154102814243793e-06, - "loss": 0.9896, - "step": 2606 - }, - { - "epoch": 0.31347321589611016, - "grad_norm": 6.6221177651162595, - "learning_rate": 3.2147915589916937e-06, - "loss": 0.9084, - "step": 2607 - }, - { - "epoch": 0.3135934587867492, - "grad_norm": 1.7907060295265493, - "learning_rate": 3.2141726522760938e-06, - "loss": 1.0544, - "step": 2608 - }, - { - "epoch": 0.3137137016773883, - "grad_norm": 0.7228070076067064, - "learning_rate": 3.213553561371469e-06, - "loss": 0.7899, - "step": 2609 - }, - { - "epoch": 0.31383394456802743, - "grad_norm": 1.9952137420040337, - "learning_rate": 3.212934286371733e-06, - "loss": 1.1921, - "step": 2610 - }, - { - "epoch": 0.3139541874586665, - "grad_norm": 2.1162101488591634, - "learning_rate": 3.2123148273708304e-06, - "loss": 1.063, - "step": 2611 - }, - { - "epoch": 0.3140744303493056, - "grad_norm": 2.1937235990929964, - "learning_rate": 3.211695184462733e-06, - "loss": 0.9934, - "step": 2612 - }, - { - "epoch": 0.3141946732399447, - "grad_norm": 0.8816968569487209, - "learning_rate": 3.2110753577414383e-06, - "loss": 0.8755, - "step": 2613 - }, - { - "epoch": 0.31431491613058377, - "grad_norm": 1.8309252558421025, - "learning_rate": 3.2104553473009757e-06, - "loss": 1.018, - "step": 2614 - }, - { - "epoch": 0.3144351590212229, - "grad_norm": 1.6676704086321712, - "learning_rate": 3.209835153235399e-06, - "loss": 0.9036, - "step": 2615 - }, - { - "epoch": 0.314555401911862, - "grad_norm": 2.5728042722135154, - "learning_rate": 3.2092147756387916e-06, - "loss": 0.9088, - "step": 2616 - }, - { - "epoch": 0.31467564480250104, - "grad_norm": 2.31156087828122, - "learning_rate": 3.208594214605264e-06, - "loss": 1.0631, - "step": 2617 - }, - { - "epoch": 0.31479588769314015, - "grad_norm": 1.8196337175159787, - "learning_rate": 3.2079734702289553e-06, - "loss": 1.0052, - "step": 2618 - }, - { - "epoch": 0.3149161305837792, - "grad_norm": 0.8803904568811403, - "learning_rate": 3.207352542604031e-06, - "loss": 0.8739, - "step": 2619 - }, - { - "epoch": 0.3150363734744183, - "grad_norm": 1.5302291892932696, - "learning_rate": 3.2067314318246864e-06, - "loss": 1.0148, - "step": 2620 - }, - { - "epoch": 0.31515661636505743, - "grad_norm": 1.624043256371478, - "learning_rate": 3.206110137985143e-06, - "loss": 1.0002, - "step": 2621 - }, - { - "epoch": 0.3152768592556965, - "grad_norm": 2.3973046028458054, - "learning_rate": 3.2054886611796505e-06, - "loss": 1.1494, - "step": 2622 - }, - { - "epoch": 0.3153971021463356, - "grad_norm": 0.9543509806450233, - "learning_rate": 3.204867001502487e-06, - "loss": 0.9216, - "step": 2623 - }, - { - "epoch": 0.3155173450369747, - "grad_norm": 1.8443342673219298, - "learning_rate": 3.2042451590479567e-06, - "loss": 1.0398, - "step": 2624 - }, - { - "epoch": 0.31563758792761376, - "grad_norm": 1.597527011410154, - "learning_rate": 3.203623133910394e-06, - "loss": 1.0881, - "step": 2625 - }, - { - "epoch": 0.31575783081825287, - "grad_norm": 2.0342817715848494, - "learning_rate": 3.203000926184158e-06, - "loss": 1.003, - "step": 2626 - }, - { - "epoch": 0.315878073708892, - "grad_norm": 1.7614513118099384, - "learning_rate": 3.202378535963639e-06, - "loss": 0.9979, - "step": 2627 - }, - { - "epoch": 0.31599831659953104, - "grad_norm": 1.5203616660657062, - "learning_rate": 3.2017559633432516e-06, - "loss": 1.0679, - "step": 2628 - }, - { - "epoch": 0.31611855949017015, - "grad_norm": 3.445600225150918, - "learning_rate": 3.2011332084174398e-06, - "loss": 0.8919, - "step": 2629 - }, - { - "epoch": 0.31623880238080926, - "grad_norm": 1.5564549046940195, - "learning_rate": 3.2005102712806756e-06, - "loss": 1.1179, - "step": 2630 - }, - { - "epoch": 0.3163590452714483, - "grad_norm": 1.9797691900414558, - "learning_rate": 3.1998871520274575e-06, - "loss": 0.9591, - "step": 2631 - }, - { - "epoch": 0.3164792881620874, - "grad_norm": 1.6855913600598813, - "learning_rate": 3.199263850752312e-06, - "loss": 1.0734, - "step": 2632 - }, - { - "epoch": 0.31659953105272653, - "grad_norm": 1.992370836386293, - "learning_rate": 3.198640367549795e-06, - "loss": 1.089, - "step": 2633 - }, - { - "epoch": 0.3167197739433656, - "grad_norm": 1.5759339938936976, - "learning_rate": 3.198016702514487e-06, - "loss": 1.0894, - "step": 2634 - }, - { - "epoch": 0.3168400168340047, - "grad_norm": 1.9950663989114934, - "learning_rate": 3.1973928557409972e-06, - "loss": 1.0772, - "step": 2635 - }, - { - "epoch": 0.31696025972464376, - "grad_norm": 1.6511543628566254, - "learning_rate": 3.1967688273239636e-06, - "loss": 0.9258, - "step": 2636 - }, - { - "epoch": 0.31708050261528287, - "grad_norm": 2.0001691508288717, - "learning_rate": 3.1961446173580503e-06, - "loss": 1.0521, - "step": 2637 - }, - { - "epoch": 0.317200745505922, - "grad_norm": 1.968760172499761, - "learning_rate": 3.1955202259379502e-06, - "loss": 1.0021, - "step": 2638 - }, - { - "epoch": 0.31732098839656103, - "grad_norm": 1.558754742943248, - "learning_rate": 3.194895653158381e-06, - "loss": 1.0486, - "step": 2639 - }, - { - "epoch": 0.31744123128720014, - "grad_norm": 0.7770394347799376, - "learning_rate": 3.194270899114093e-06, - "loss": 0.8269, - "step": 2640 - }, - { - "epoch": 0.31756147417783925, - "grad_norm": 1.8341961333169507, - "learning_rate": 3.193645963899858e-06, - "loss": 1.0515, - "step": 2641 - }, - { - "epoch": 0.3176817170684783, - "grad_norm": 1.615122559446899, - "learning_rate": 3.193020847610479e-06, - "loss": 1.0621, - "step": 2642 - }, - { - "epoch": 0.3178019599591174, - "grad_norm": 2.021327387661774, - "learning_rate": 3.192395550340787e-06, - "loss": 0.9435, - "step": 2643 - }, - { - "epoch": 0.31792220284975653, - "grad_norm": 2.0719905170901187, - "learning_rate": 3.191770072185638e-06, - "loss": 0.9974, - "step": 2644 - }, - { - "epoch": 0.3180424457403956, - "grad_norm": 2.1028365913787748, - "learning_rate": 3.191144413239916e-06, - "loss": 0.9585, - "step": 2645 - }, - { - "epoch": 0.3181626886310347, - "grad_norm": 2.1079603184361804, - "learning_rate": 3.190518573598534e-06, - "loss": 1.1063, - "step": 2646 - }, - { - "epoch": 0.3182829315216738, - "grad_norm": 1.6800397636612385, - "learning_rate": 3.1898925533564308e-06, - "loss": 1.0066, - "step": 2647 - }, - { - "epoch": 0.31840317441231286, - "grad_norm": 1.8904817385029884, - "learning_rate": 3.1892663526085733e-06, - "loss": 0.8724, - "step": 2648 - }, - { - "epoch": 0.31852341730295197, - "grad_norm": 0.7694614591756219, - "learning_rate": 3.188639971449956e-06, - "loss": 0.8318, - "step": 2649 - }, - { - "epoch": 0.318643660193591, - "grad_norm": 1.674447406884869, - "learning_rate": 3.1880134099756e-06, - "loss": 0.9541, - "step": 2650 - }, - { - "epoch": 0.31876390308423014, - "grad_norm": 1.6649385393959202, - "learning_rate": 3.1873866682805535e-06, - "loss": 0.9279, - "step": 2651 - }, - { - "epoch": 0.31888414597486925, - "grad_norm": 1.6013893592100765, - "learning_rate": 3.186759746459894e-06, - "loss": 1.1133, - "step": 2652 - }, - { - "epoch": 0.3190043888655083, - "grad_norm": 1.620084149447458, - "learning_rate": 3.1861326446087246e-06, - "loss": 1.0231, - "step": 2653 - }, - { - "epoch": 0.3191246317561474, - "grad_norm": 1.8322079701380525, - "learning_rate": 3.1855053628221763e-06, - "loss": 0.9384, - "step": 2654 - }, - { - "epoch": 0.3192448746467865, - "grad_norm": 2.7956685282480156, - "learning_rate": 3.184877901195407e-06, - "loss": 1.1341, - "step": 2655 - }, - { - "epoch": 0.3193651175374256, - "grad_norm": 0.8447878776744409, - "learning_rate": 3.184250259823602e-06, - "loss": 0.8993, - "step": 2656 - }, - { - "epoch": 0.3194853604280647, - "grad_norm": 1.8886357711291952, - "learning_rate": 3.183622438801974e-06, - "loss": 1.0384, - "step": 2657 - }, - { - "epoch": 0.3196056033187038, - "grad_norm": 3.3144932633747413, - "learning_rate": 3.1829944382257637e-06, - "loss": 0.9925, - "step": 2658 - }, - { - "epoch": 0.31972584620934286, - "grad_norm": 2.3982926613878357, - "learning_rate": 3.1823662581902373e-06, - "loss": 1.0533, - "step": 2659 - }, - { - "epoch": 0.31984608909998197, - "grad_norm": 3.2548669905668555, - "learning_rate": 3.1817378987906896e-06, - "loss": 0.9789, - "step": 2660 - }, - { - "epoch": 0.3199663319906211, - "grad_norm": 2.096614756687018, - "learning_rate": 3.181109360122442e-06, - "loss": 1.026, - "step": 2661 - }, - { - "epoch": 0.32008657488126013, - "grad_norm": 2.2958074670939537, - "learning_rate": 3.1804806422808445e-06, - "loss": 1.0147, - "step": 2662 - }, - { - "epoch": 0.32020681777189924, - "grad_norm": 1.7208314949499968, - "learning_rate": 3.1798517453612714e-06, - "loss": 0.9567, - "step": 2663 - }, - { - "epoch": 0.32032706066253835, - "grad_norm": 1.6844155414635227, - "learning_rate": 3.1792226694591265e-06, - "loss": 0.9844, - "step": 2664 - }, - { - "epoch": 0.3204473035531774, - "grad_norm": 2.5072403965266954, - "learning_rate": 3.178593414669841e-06, - "loss": 1.0324, - "step": 2665 - }, - { - "epoch": 0.3205675464438165, - "grad_norm": 2.367109643644113, - "learning_rate": 3.1779639810888707e-06, - "loss": 0.9374, - "step": 2666 - }, - { - "epoch": 0.3206877893344556, - "grad_norm": 1.989772394860681, - "learning_rate": 3.1773343688117013e-06, - "loss": 0.9908, - "step": 2667 - }, - { - "epoch": 0.3208080322250947, - "grad_norm": 2.954364334903881, - "learning_rate": 3.1767045779338445e-06, - "loss": 1.0676, - "step": 2668 - }, - { - "epoch": 0.3209282751157338, - "grad_norm": 1.9294654625784207, - "learning_rate": 3.176074608550839e-06, - "loss": 1.1437, - "step": 2669 - }, - { - "epoch": 0.32104851800637285, - "grad_norm": 2.213022495794955, - "learning_rate": 3.17544446075825e-06, - "loss": 1.0503, - "step": 2670 - }, - { - "epoch": 0.32116876089701196, - "grad_norm": 1.4897509749541058, - "learning_rate": 3.174814134651671e-06, - "loss": 0.9403, - "step": 2671 - }, - { - "epoch": 0.3212890037876511, - "grad_norm": 1.777204034365158, - "learning_rate": 3.1741836303267215e-06, - "loss": 1.0376, - "step": 2672 - }, - { - "epoch": 0.32140924667829013, - "grad_norm": 1.7251847237350877, - "learning_rate": 3.1735529478790496e-06, - "loss": 0.9803, - "step": 2673 - }, - { - "epoch": 0.32152948956892924, - "grad_norm": 1.8395064058848123, - "learning_rate": 3.172922087404328e-06, - "loss": 1.0265, - "step": 2674 - }, - { - "epoch": 0.32164973245956835, - "grad_norm": 0.7879527758254297, - "learning_rate": 3.1722910489982586e-06, - "loss": 0.8152, - "step": 2675 - }, - { - "epoch": 0.3217699753502074, - "grad_norm": 1.8657401632422532, - "learning_rate": 3.1716598327565694e-06, - "loss": 1.0328, - "step": 2676 - }, - { - "epoch": 0.3218902182408465, - "grad_norm": 1.4169052801947712, - "learning_rate": 3.171028438775015e-06, - "loss": 1.0689, - "step": 2677 - }, - { - "epoch": 0.3220104611314856, - "grad_norm": 1.9467524212843839, - "learning_rate": 3.170396867149377e-06, - "loss": 1.072, - "step": 2678 - }, - { - "epoch": 0.3221307040221247, - "grad_norm": 1.7314199756676927, - "learning_rate": 3.1697651179754653e-06, - "loss": 1.0887, - "step": 2679 - }, - { - "epoch": 0.3222509469127638, - "grad_norm": 1.4941460503373722, - "learning_rate": 3.1691331913491153e-06, - "loss": 0.9642, - "step": 2680 - }, - { - "epoch": 0.32237118980340285, - "grad_norm": 1.8495228796515972, - "learning_rate": 3.1685010873661898e-06, - "loss": 1.0709, - "step": 2681 - }, - { - "epoch": 0.32249143269404196, - "grad_norm": 2.097536790454649, - "learning_rate": 3.167868806122578e-06, - "loss": 1.0261, - "step": 2682 - }, - { - "epoch": 0.32261167558468107, - "grad_norm": 2.5533746268151254, - "learning_rate": 3.1672363477141968e-06, - "loss": 0.8917, - "step": 2683 - }, - { - "epoch": 0.3227319184753201, - "grad_norm": 1.9309270792487159, - "learning_rate": 3.1666037122369903e-06, - "loss": 1.085, - "step": 2684 - }, - { - "epoch": 0.32285216136595923, - "grad_norm": 1.7540742585525282, - "learning_rate": 3.165970899786928e-06, - "loss": 1.0847, - "step": 2685 - }, - { - "epoch": 0.32297240425659834, - "grad_norm": 1.6256125102723826, - "learning_rate": 3.1653379104600067e-06, - "loss": 0.9734, - "step": 2686 - }, - { - "epoch": 0.3230926471472374, - "grad_norm": 2.2791844252801696, - "learning_rate": 3.164704744352251e-06, - "loss": 0.927, - "step": 2687 - }, - { - "epoch": 0.3232128900378765, - "grad_norm": 1.6589716016880818, - "learning_rate": 3.164071401559713e-06, - "loss": 1.043, - "step": 2688 - }, - { - "epoch": 0.3233331329285156, - "grad_norm": 1.5565375433933226, - "learning_rate": 3.1634378821784674e-06, - "loss": 0.9398, - "step": 2689 - }, - { - "epoch": 0.3234533758191547, - "grad_norm": 2.0975813470463565, - "learning_rate": 3.1628041863046208e-06, - "loss": 0.9724, - "step": 2690 - }, - { - "epoch": 0.3235736187097938, - "grad_norm": 1.9639089495497448, - "learning_rate": 3.162170314034304e-06, - "loss": 1.1405, - "step": 2691 - }, - { - "epoch": 0.3236938616004329, - "grad_norm": 2.6690145428441054, - "learning_rate": 3.1615362654636738e-06, - "loss": 1.0309, - "step": 2692 - }, - { - "epoch": 0.32381410449107195, - "grad_norm": 1.5592470257981341, - "learning_rate": 3.1609020406889163e-06, - "loss": 1.1037, - "step": 2693 - }, - { - "epoch": 0.32393434738171106, - "grad_norm": 1.7066304938883747, - "learning_rate": 3.1602676398062416e-06, - "loss": 1.0728, - "step": 2694 - }, - { - "epoch": 0.3240545902723502, - "grad_norm": 2.0708809701927096, - "learning_rate": 3.1596330629118886e-06, - "loss": 0.8434, - "step": 2695 - }, - { - "epoch": 0.32417483316298923, - "grad_norm": 1.9933092138283472, - "learning_rate": 3.1589983101021223e-06, - "loss": 0.9611, - "step": 2696 - }, - { - "epoch": 0.32429507605362834, - "grad_norm": 3.702900553794485, - "learning_rate": 3.1583633814732337e-06, - "loss": 1.0786, - "step": 2697 - }, - { - "epoch": 0.3244153189442674, - "grad_norm": 9.281030517449036, - "learning_rate": 3.157728277121541e-06, - "loss": 0.9431, - "step": 2698 - }, - { - "epoch": 0.3245355618349065, - "grad_norm": 2.112263827693691, - "learning_rate": 3.1570929971433897e-06, - "loss": 1.0127, - "step": 2699 - }, - { - "epoch": 0.3246558047255456, - "grad_norm": 2.3064036041631204, - "learning_rate": 3.1564575416351504e-06, - "loss": 1.0577, - "step": 2700 - }, - { - "epoch": 0.32477604761618467, - "grad_norm": 2.6459818120030008, - "learning_rate": 3.155821910693221e-06, - "loss": 0.98, - "step": 2701 - }, - { - "epoch": 0.3248962905068238, - "grad_norm": 1.5517594257632032, - "learning_rate": 3.1551861044140275e-06, - "loss": 1.0846, - "step": 2702 - }, - { - "epoch": 0.3250165333974629, - "grad_norm": 1.4732174398545357, - "learning_rate": 3.15455012289402e-06, - "loss": 0.999, - "step": 2703 - }, - { - "epoch": 0.32513677628810195, - "grad_norm": 1.5260046823162272, - "learning_rate": 3.153913966229677e-06, - "loss": 1.0653, - "step": 2704 - }, - { - "epoch": 0.32525701917874106, - "grad_norm": 0.6568393331729754, - "learning_rate": 3.1532776345175027e-06, - "loss": 0.7476, - "step": 2705 - }, - { - "epoch": 0.32537726206938017, - "grad_norm": 1.7047907531405162, - "learning_rate": 3.1526411278540285e-06, - "loss": 1.0078, - "step": 2706 - }, - { - "epoch": 0.3254975049600192, - "grad_norm": 2.0524436389094, - "learning_rate": 3.1520044463358116e-06, - "loss": 1.0451, - "step": 2707 - }, - { - "epoch": 0.32561774785065833, - "grad_norm": 1.4054874047848491, - "learning_rate": 3.151367590059436e-06, - "loss": 1.0278, - "step": 2708 - }, - { - "epoch": 0.32573799074129745, - "grad_norm": 1.8590954482274753, - "learning_rate": 3.1507305591215117e-06, - "loss": 1.0922, - "step": 2709 - }, - { - "epoch": 0.3258582336319365, - "grad_norm": 0.7409499039209007, - "learning_rate": 3.150093353618677e-06, - "loss": 0.8145, - "step": 2710 - }, - { - "epoch": 0.3259784765225756, - "grad_norm": 2.1640295583923863, - "learning_rate": 3.149455973647596e-06, - "loss": 1.1102, - "step": 2711 - }, - { - "epoch": 0.32609871941321467, - "grad_norm": 1.8567511373351797, - "learning_rate": 3.1488184193049563e-06, - "loss": 1.0024, - "step": 2712 - }, - { - "epoch": 0.3262189623038538, - "grad_norm": 1.6658227055286774, - "learning_rate": 3.1481806906874767e-06, - "loss": 0.958, - "step": 2713 - }, - { - "epoch": 0.3263392051944929, - "grad_norm": 1.411894047149747, - "learning_rate": 3.147542787891899e-06, - "loss": 1.1056, - "step": 2714 - }, - { - "epoch": 0.32645944808513194, - "grad_norm": 1.6527903429011437, - "learning_rate": 3.1469047110149926e-06, - "loss": 0.9761, - "step": 2715 - }, - { - "epoch": 0.32657969097577105, - "grad_norm": 1.7814222553686163, - "learning_rate": 3.146266460153554e-06, - "loss": 1.0827, - "step": 2716 - }, - { - "epoch": 0.32669993386641016, - "grad_norm": 1.6047162522337417, - "learning_rate": 3.145628035404404e-06, - "loss": 1.0279, - "step": 2717 - }, - { - "epoch": 0.3268201767570492, - "grad_norm": 1.1384351428643913, - "learning_rate": 3.1449894368643922e-06, - "loss": 0.8343, - "step": 2718 - }, - { - "epoch": 0.32694041964768833, - "grad_norm": 1.3333318630846183, - "learning_rate": 3.1443506646303934e-06, - "loss": 0.9489, - "step": 2719 - }, - { - "epoch": 0.32706066253832744, - "grad_norm": 2.127374108217329, - "learning_rate": 3.1437117187993086e-06, - "loss": 0.8966, - "step": 2720 - }, - { - "epoch": 0.3271809054289665, - "grad_norm": 1.6712250203653212, - "learning_rate": 3.143072599468065e-06, - "loss": 1.0283, - "step": 2721 - }, - { - "epoch": 0.3273011483196056, - "grad_norm": 1.6215274092868746, - "learning_rate": 3.1424333067336174e-06, - "loss": 0.9849, - "step": 2722 - }, - { - "epoch": 0.3274213912102447, - "grad_norm": 1.6025384673978176, - "learning_rate": 3.141793840692945e-06, - "loss": 0.9986, - "step": 2723 - }, - { - "epoch": 0.32754163410088377, - "grad_norm": 2.0856277292957914, - "learning_rate": 3.1411542014430553e-06, - "loss": 0.8425, - "step": 2724 - }, - { - "epoch": 0.3276618769915229, - "grad_norm": 1.7452464939022652, - "learning_rate": 3.1405143890809804e-06, - "loss": 1.0528, - "step": 2725 - }, - { - "epoch": 0.327782119882162, - "grad_norm": 1.508534234017255, - "learning_rate": 3.1398744037037796e-06, - "loss": 0.9288, - "step": 2726 - }, - { - "epoch": 0.32790236277280105, - "grad_norm": 1.597398732517208, - "learning_rate": 3.139234245408538e-06, - "loss": 1.0705, - "step": 2727 - }, - { - "epoch": 0.32802260566344016, - "grad_norm": 1.4988909436051487, - "learning_rate": 3.1385939142923666e-06, - "loss": 0.9982, - "step": 2728 - }, - { - "epoch": 0.3281428485540792, - "grad_norm": 3.4550858535809548, - "learning_rate": 3.137953410452405e-06, - "loss": 1.0085, - "step": 2729 - }, - { - "epoch": 0.3282630914447183, - "grad_norm": 1.5511175218645208, - "learning_rate": 3.1373127339858146e-06, - "loss": 0.9805, - "step": 2730 - }, - { - "epoch": 0.32838333433535744, - "grad_norm": 1.7705495569575505, - "learning_rate": 3.136671884989787e-06, - "loss": 0.9722, - "step": 2731 - }, - { - "epoch": 0.3285035772259965, - "grad_norm": 2.384884193215945, - "learning_rate": 3.1360308635615383e-06, - "loss": 1.0996, - "step": 2732 - }, - { - "epoch": 0.3286238201166356, - "grad_norm": 1.8035082907193978, - "learning_rate": 3.135389669798311e-06, - "loss": 1.0172, - "step": 2733 - }, - { - "epoch": 0.3287440630072747, - "grad_norm": 2.2285413013420032, - "learning_rate": 3.134748303797373e-06, - "loss": 1.0317, - "step": 2734 - }, - { - "epoch": 0.32886430589791377, - "grad_norm": 1.9815069182451073, - "learning_rate": 3.1341067656560203e-06, - "loss": 1.0371, - "step": 2735 - }, - { - "epoch": 0.3289845487885529, - "grad_norm": 2.071319910779242, - "learning_rate": 3.133465055471572e-06, - "loss": 1.0948, - "step": 2736 - }, - { - "epoch": 0.329104791679192, - "grad_norm": 2.507812024648271, - "learning_rate": 3.1328231733413767e-06, - "loss": 0.8875, - "step": 2737 - }, - { - "epoch": 0.32922503456983104, - "grad_norm": 2.074693758323482, - "learning_rate": 3.1321811193628067e-06, - "loss": 1.1307, - "step": 2738 - }, - { - "epoch": 0.32934527746047015, - "grad_norm": 2.0119212817118197, - "learning_rate": 3.131538893633261e-06, - "loss": 0.9376, - "step": 2739 - }, - { - "epoch": 0.32946552035110926, - "grad_norm": 2.109503622018942, - "learning_rate": 3.130896496250165e-06, - "loss": 1.013, - "step": 2740 - }, - { - "epoch": 0.3295857632417483, - "grad_norm": 1.8139115460499475, - "learning_rate": 3.1302539273109693e-06, - "loss": 1.0954, - "step": 2741 - }, - { - "epoch": 0.32970600613238743, - "grad_norm": 1.5901651643344068, - "learning_rate": 3.1296111869131513e-06, - "loss": 1.0358, - "step": 2742 - }, - { - "epoch": 0.32982624902302654, - "grad_norm": 1.7254172967564012, - "learning_rate": 3.1289682751542153e-06, - "loss": 1.0815, - "step": 2743 - }, - { - "epoch": 0.3299464919136656, - "grad_norm": 1.9573602152445753, - "learning_rate": 3.1283251921316883e-06, - "loss": 0.9467, - "step": 2744 - }, - { - "epoch": 0.3300667348043047, - "grad_norm": 1.9071829342088171, - "learning_rate": 3.1276819379431277e-06, - "loss": 1.0492, - "step": 2745 - }, - { - "epoch": 0.33018697769494376, - "grad_norm": 1.6876128123686502, - "learning_rate": 3.1270385126861134e-06, - "loss": 0.9786, - "step": 2746 - }, - { - "epoch": 0.3303072205855829, - "grad_norm": 1.6035854310466022, - "learning_rate": 3.1263949164582533e-06, - "loss": 1.0499, - "step": 2747 - }, - { - "epoch": 0.330427463476222, - "grad_norm": 1.7770407617559765, - "learning_rate": 3.1257511493571797e-06, - "loss": 1.0056, - "step": 2748 - }, - { - "epoch": 0.33054770636686104, - "grad_norm": 4.7526408433587655, - "learning_rate": 3.125107211480552e-06, - "loss": 1.0149, - "step": 2749 - }, - { - "epoch": 0.33066794925750015, - "grad_norm": 1.610994727582491, - "learning_rate": 3.124463102926054e-06, - "loss": 1.0228, - "step": 2750 - }, - { - "epoch": 0.33078819214813926, - "grad_norm": 0.7600598782868297, - "learning_rate": 3.1238188237913984e-06, - "loss": 0.8592, - "step": 2751 - }, - { - "epoch": 0.3309084350387783, - "grad_norm": 1.8569931032774778, - "learning_rate": 3.1231743741743202e-06, - "loss": 0.9917, - "step": 2752 - }, - { - "epoch": 0.3310286779294174, - "grad_norm": 3.6724741345475667, - "learning_rate": 3.122529754172582e-06, - "loss": 1.0671, - "step": 2753 - }, - { - "epoch": 0.33114892082005654, - "grad_norm": 1.825451975207193, - "learning_rate": 3.1218849638839736e-06, - "loss": 0.9529, - "step": 2754 - }, - { - "epoch": 0.3312691637106956, - "grad_norm": 1.9626693069416061, - "learning_rate": 3.121240003406307e-06, - "loss": 1.0087, - "step": 2755 - }, - { - "epoch": 0.3313894066013347, - "grad_norm": 1.7159090317709476, - "learning_rate": 3.120594872837425e-06, - "loss": 0.947, - "step": 2756 - }, - { - "epoch": 0.3315096494919738, - "grad_norm": 0.8290633517425222, - "learning_rate": 3.1199495722751906e-06, - "loss": 0.878, - "step": 2757 - }, - { - "epoch": 0.33162989238261287, - "grad_norm": 1.4932293674915482, - "learning_rate": 3.1193041018174972e-06, - "loss": 1.0703, - "step": 2758 - }, - { - "epoch": 0.331750135273252, - "grad_norm": 2.1552464457983604, - "learning_rate": 3.118658461562261e-06, - "loss": 1.1784, - "step": 2759 - }, - { - "epoch": 0.33187037816389103, - "grad_norm": 1.4644203489659573, - "learning_rate": 3.118012651607426e-06, - "loss": 1.0792, - "step": 2760 - }, - { - "epoch": 0.33199062105453014, - "grad_norm": 2.2718468833558245, - "learning_rate": 3.1173666720509603e-06, - "loss": 1.0604, - "step": 2761 - }, - { - "epoch": 0.33211086394516925, - "grad_norm": 1.587566422402014, - "learning_rate": 3.116720522990859e-06, - "loss": 0.9142, - "step": 2762 - }, - { - "epoch": 0.3322311068358083, - "grad_norm": 1.646750287250675, - "learning_rate": 3.116074204525142e-06, - "loss": 0.8507, - "step": 2763 - }, - { - "epoch": 0.3323513497264474, - "grad_norm": 1.986021485192289, - "learning_rate": 3.1154277167518553e-06, - "loss": 1.0574, - "step": 2764 - }, - { - "epoch": 0.33247159261708653, - "grad_norm": 0.8459804159607643, - "learning_rate": 3.114781059769072e-06, - "loss": 0.8562, - "step": 2765 - }, - { - "epoch": 0.3325918355077256, - "grad_norm": 2.4648952085803213, - "learning_rate": 3.1141342336748874e-06, - "loss": 0.9165, - "step": 2766 - }, - { - "epoch": 0.3327120783983647, - "grad_norm": 1.5437717328105547, - "learning_rate": 3.1134872385674253e-06, - "loss": 1.0447, - "step": 2767 - }, - { - "epoch": 0.3328323212890038, - "grad_norm": 2.410633998119205, - "learning_rate": 3.1128400745448353e-06, - "loss": 1.0946, - "step": 2768 - }, - { - "epoch": 0.33295256417964286, - "grad_norm": 2.544550109549774, - "learning_rate": 3.11219274170529e-06, - "loss": 0.8586, - "step": 2769 - }, - { - "epoch": 0.333072807070282, - "grad_norm": 1.6326671334859593, - "learning_rate": 3.1115452401469903e-06, - "loss": 1.0387, - "step": 2770 - }, - { - "epoch": 0.3331930499609211, - "grad_norm": 2.5177992434352108, - "learning_rate": 3.1108975699681613e-06, - "loss": 1.0923, - "step": 2771 - }, - { - "epoch": 0.33331329285156014, - "grad_norm": 1.494511257901357, - "learning_rate": 3.1102497312670542e-06, - "loss": 0.9417, - "step": 2772 - }, - { - "epoch": 0.33343353574219925, - "grad_norm": 1.9094940473089657, - "learning_rate": 3.109601724141946e-06, - "loss": 1.0324, - "step": 2773 - }, - { - "epoch": 0.33355377863283836, - "grad_norm": 2.2192677713077753, - "learning_rate": 3.108953548691138e-06, - "loss": 0.9116, - "step": 2774 - }, - { - "epoch": 0.3336740215234774, - "grad_norm": 2.145433357763074, - "learning_rate": 3.108305205012959e-06, - "loss": 0.951, - "step": 2775 - }, - { - "epoch": 0.3337942644141165, - "grad_norm": 2.1508203227868865, - "learning_rate": 3.107656693205761e-06, - "loss": 1.1035, - "step": 2776 - }, - { - "epoch": 0.3339145073047556, - "grad_norm": 3.905476119631037, - "learning_rate": 3.107008013367924e-06, - "loss": 0.9247, - "step": 2777 - }, - { - "epoch": 0.3340347501953947, - "grad_norm": 1.84478394564006, - "learning_rate": 3.1063591655978507e-06, - "loss": 1.0982, - "step": 2778 - }, - { - "epoch": 0.3341549930860338, - "grad_norm": 1.5820398401097806, - "learning_rate": 3.105710149993972e-06, - "loss": 1.0184, - "step": 2779 - }, - { - "epoch": 0.33427523597667286, - "grad_norm": 2.5071493918023595, - "learning_rate": 3.1050609666547427e-06, - "loss": 1.0794, - "step": 2780 - }, - { - "epoch": 0.33439547886731197, - "grad_norm": 1.7277964329640885, - "learning_rate": 3.104411615678644e-06, - "loss": 0.9968, - "step": 2781 - }, - { - "epoch": 0.3345157217579511, - "grad_norm": 2.40075750317484, - "learning_rate": 3.1037620971641803e-06, - "loss": 0.9635, - "step": 2782 - }, - { - "epoch": 0.33463596464859013, - "grad_norm": 2.634156560382646, - "learning_rate": 3.1031124112098844e-06, - "loss": 0.8722, - "step": 2783 - }, - { - "epoch": 0.33475620753922924, - "grad_norm": 2.382040930728627, - "learning_rate": 3.1024625579143127e-06, - "loss": 0.948, - "step": 2784 - }, - { - "epoch": 0.33487645042986836, - "grad_norm": 1.6349842285422165, - "learning_rate": 3.101812537376048e-06, - "loss": 0.958, - "step": 2785 - }, - { - "epoch": 0.3349966933205074, - "grad_norm": 1.944532858639853, - "learning_rate": 3.1011623496936973e-06, - "loss": 1.0743, - "step": 2786 - }, - { - "epoch": 0.3351169362111465, - "grad_norm": 1.65761164707783, - "learning_rate": 3.100511994965893e-06, - "loss": 0.9266, - "step": 2787 - }, - { - "epoch": 0.33523717910178563, - "grad_norm": 1.5610042279007208, - "learning_rate": 3.0998614732912947e-06, - "loss": 1.0897, - "step": 2788 - }, - { - "epoch": 0.3353574219924247, - "grad_norm": 1.8825202176856208, - "learning_rate": 3.0992107847685855e-06, - "loss": 0.9101, - "step": 2789 - }, - { - "epoch": 0.3354776648830638, - "grad_norm": 2.5660161341682493, - "learning_rate": 3.0985599294964736e-06, - "loss": 1.0275, - "step": 2790 - }, - { - "epoch": 0.33559790777370285, - "grad_norm": 2.9818350483783145, - "learning_rate": 3.097908907573695e-06, - "loss": 0.9298, - "step": 2791 - }, - { - "epoch": 0.33571815066434196, - "grad_norm": 6.42335986863208, - "learning_rate": 3.0972577190990067e-06, - "loss": 1.12, - "step": 2792 - }, - { - "epoch": 0.3358383935549811, - "grad_norm": 1.7157045606498849, - "learning_rate": 3.096606364171196e-06, - "loss": 1.0299, - "step": 2793 - }, - { - "epoch": 0.33595863644562013, - "grad_norm": 2.2492882344522918, - "learning_rate": 3.0959548428890703e-06, - "loss": 1.0812, - "step": 2794 - }, - { - "epoch": 0.33607887933625924, - "grad_norm": 1.5518827202029166, - "learning_rate": 3.095303155351468e-06, - "loss": 1.0608, - "step": 2795 - }, - { - "epoch": 0.33619912222689835, - "grad_norm": 2.4385371813650702, - "learning_rate": 3.0946513016572464e-06, - "loss": 1.0216, - "step": 2796 - }, - { - "epoch": 0.3363193651175374, - "grad_norm": 2.003425882627405, - "learning_rate": 3.0939992819052938e-06, - "loss": 0.9963, - "step": 2797 - }, - { - "epoch": 0.3364396080081765, - "grad_norm": 2.1393066578992324, - "learning_rate": 3.0933470961945193e-06, - "loss": 1.0415, - "step": 2798 - }, - { - "epoch": 0.3365598508988156, - "grad_norm": 2.327241461107622, - "learning_rate": 3.0926947446238597e-06, - "loss": 0.9225, - "step": 2799 - }, - { - "epoch": 0.3366800937894547, - "grad_norm": 6.253891305709034, - "learning_rate": 3.092042227292276e-06, - "loss": 1.0513, - "step": 2800 - }, - { - "epoch": 0.3368003366800938, - "grad_norm": 1.5535628501768002, - "learning_rate": 3.0913895442987557e-06, - "loss": 1.111, - "step": 2801 - }, - { - "epoch": 0.3369205795707329, - "grad_norm": 1.880857093310532, - "learning_rate": 3.090736695742308e-06, - "loss": 1.0782, - "step": 2802 - }, - { - "epoch": 0.33704082246137196, - "grad_norm": 2.0801784150925475, - "learning_rate": 3.0900836817219713e-06, - "loss": 0.741, - "step": 2803 - }, - { - "epoch": 0.33716106535201107, - "grad_norm": 2.2069178189186043, - "learning_rate": 3.089430502336807e-06, - "loss": 1.0709, - "step": 2804 - }, - { - "epoch": 0.3372813082426502, - "grad_norm": 2.0594731864619202, - "learning_rate": 3.088777157685902e-06, - "loss": 1.1257, - "step": 2805 - }, - { - "epoch": 0.33740155113328923, - "grad_norm": 2.343062134573769, - "learning_rate": 3.088123647868367e-06, - "loss": 1.0863, - "step": 2806 - }, - { - "epoch": 0.33752179402392835, - "grad_norm": 1.922480232028005, - "learning_rate": 3.0874699729833405e-06, - "loss": 1.0441, - "step": 2807 - }, - { - "epoch": 0.3376420369145674, - "grad_norm": 1.481216362679902, - "learning_rate": 3.086816133129983e-06, - "loss": 1.0302, - "step": 2808 - }, - { - "epoch": 0.3377622798052065, - "grad_norm": 1.627947262015214, - "learning_rate": 3.0861621284074826e-06, - "loss": 0.9949, - "step": 2809 - }, - { - "epoch": 0.3378825226958456, - "grad_norm": 1.6768640252378733, - "learning_rate": 3.085507958915051e-06, - "loss": 0.9605, - "step": 2810 - }, - { - "epoch": 0.3380027655864847, - "grad_norm": 1.992975116691155, - "learning_rate": 3.084853624751925e-06, - "loss": 0.9388, - "step": 2811 - }, - { - "epoch": 0.3381230084771238, - "grad_norm": 3.5082526323787993, - "learning_rate": 3.0841991260173668e-06, - "loss": 1.0876, - "step": 2812 - }, - { - "epoch": 0.3382432513677629, - "grad_norm": 2.3313025992751792, - "learning_rate": 3.0835444628106634e-06, - "loss": 1.0266, - "step": 2813 - }, - { - "epoch": 0.33836349425840195, - "grad_norm": 1.6867182298299916, - "learning_rate": 3.082889635231126e-06, - "loss": 1.0611, - "step": 2814 - }, - { - "epoch": 0.33848373714904106, - "grad_norm": 2.2048806844595097, - "learning_rate": 3.0822346433780925e-06, - "loss": 0.9873, - "step": 2815 - }, - { - "epoch": 0.3386039800396802, - "grad_norm": 1.960403916915097, - "learning_rate": 3.0815794873509237e-06, - "loss": 1.0953, - "step": 2816 - }, - { - "epoch": 0.33872422293031923, - "grad_norm": 1.8510502617728934, - "learning_rate": 3.0809241672490066e-06, - "loss": 0.9566, - "step": 2817 - }, - { - "epoch": 0.33884446582095834, - "grad_norm": 1.5013166053601184, - "learning_rate": 3.080268683171753e-06, - "loss": 1.0771, - "step": 2818 - }, - { - "epoch": 0.33896470871159745, - "grad_norm": 1.9410139028964173, - "learning_rate": 3.0796130352185985e-06, - "loss": 1.1119, - "step": 2819 - }, - { - "epoch": 0.3390849516022365, - "grad_norm": 1.7792049514022374, - "learning_rate": 3.0789572234890057e-06, - "loss": 0.905, - "step": 2820 - }, - { - "epoch": 0.3392051944928756, - "grad_norm": 1.6210815429227894, - "learning_rate": 3.0783012480824596e-06, - "loss": 1.0079, - "step": 2821 - }, - { - "epoch": 0.33932543738351467, - "grad_norm": 2.0955108525935633, - "learning_rate": 3.077645109098471e-06, - "loss": 0.9717, - "step": 2822 - }, - { - "epoch": 0.3394456802741538, - "grad_norm": 1.5505056448608896, - "learning_rate": 3.076988806636577e-06, - "loss": 0.9443, - "step": 2823 - }, - { - "epoch": 0.3395659231647929, - "grad_norm": 1.697140178719591, - "learning_rate": 3.0763323407963377e-06, - "loss": 1.115, - "step": 2824 - }, - { - "epoch": 0.33968616605543195, - "grad_norm": 1.6181444663022095, - "learning_rate": 3.075675711677337e-06, - "loss": 1.0328, - "step": 2825 - }, - { - "epoch": 0.33980640894607106, - "grad_norm": 4.8823179437044, - "learning_rate": 3.0750189193791865e-06, - "loss": 1.002, - "step": 2826 - }, - { - "epoch": 0.33992665183671017, - "grad_norm": 1.800728290300422, - "learning_rate": 3.0743619640015203e-06, - "loss": 0.9324, - "step": 2827 - }, - { - "epoch": 0.3400468947273492, - "grad_norm": 2.1820100831761238, - "learning_rate": 3.073704845643999e-06, - "loss": 1.1525, - "step": 2828 - }, - { - "epoch": 0.34016713761798834, - "grad_norm": 3.6279787124463487, - "learning_rate": 3.0730475644063063e-06, - "loss": 1.01, - "step": 2829 - }, - { - "epoch": 0.34028738050862745, - "grad_norm": 1.696722753730657, - "learning_rate": 3.072390120388151e-06, - "loss": 0.8849, - "step": 2830 - }, - { - "epoch": 0.3404076233992665, - "grad_norm": 3.6094895381836647, - "learning_rate": 3.071732513689267e-06, - "loss": 0.9393, - "step": 2831 - }, - { - "epoch": 0.3405278662899056, - "grad_norm": 5.504750541003357, - "learning_rate": 3.0710747444094134e-06, - "loss": 0.9096, - "step": 2832 - }, - { - "epoch": 0.3406481091805447, - "grad_norm": 1.787905789711673, - "learning_rate": 3.070416812648372e-06, - "loss": 0.8824, - "step": 2833 - }, - { - "epoch": 0.3407683520711838, - "grad_norm": 1.7688740231870685, - "learning_rate": 3.069758718505951e-06, - "loss": 0.8895, - "step": 2834 - }, - { - "epoch": 0.3408885949618229, - "grad_norm": 1.687152473551042, - "learning_rate": 3.0691004620819836e-06, - "loss": 1.0345, - "step": 2835 - }, - { - "epoch": 0.341008837852462, - "grad_norm": 0.8176416422543041, - "learning_rate": 3.0684420434763254e-06, - "loss": 0.8667, - "step": 2836 - }, - { - "epoch": 0.34112908074310105, - "grad_norm": 1.6826465889724844, - "learning_rate": 3.06778346278886e-06, - "loss": 0.9951, - "step": 2837 - }, - { - "epoch": 0.34124932363374016, - "grad_norm": 1.907582552062198, - "learning_rate": 3.0671247201194906e-06, - "loss": 1.0115, - "step": 2838 - }, - { - "epoch": 0.3413695665243792, - "grad_norm": 1.659500387450906, - "learning_rate": 3.066465815568151e-06, - "loss": 0.9808, - "step": 2839 - }, - { - "epoch": 0.34148980941501833, - "grad_norm": 1.612851434306802, - "learning_rate": 3.0658067492347947e-06, - "loss": 0.9169, - "step": 2840 - }, - { - "epoch": 0.34161005230565744, - "grad_norm": 3.7930024417594774, - "learning_rate": 3.065147521219402e-06, - "loss": 0.9002, - "step": 2841 - }, - { - "epoch": 0.3417302951962965, - "grad_norm": 1.4425684267736045, - "learning_rate": 3.064488131621977e-06, - "loss": 0.9773, - "step": 2842 - }, - { - "epoch": 0.3418505380869356, - "grad_norm": 1.679977817729554, - "learning_rate": 3.063828580542549e-06, - "loss": 0.9651, - "step": 2843 - }, - { - "epoch": 0.3419707809775747, - "grad_norm": 1.681965689083505, - "learning_rate": 3.0631688680811706e-06, - "loss": 0.9507, - "step": 2844 - }, - { - "epoch": 0.3420910238682138, - "grad_norm": 1.8472641897891708, - "learning_rate": 3.062508994337921e-06, - "loss": 0.9835, - "step": 2845 - }, - { - "epoch": 0.3422112667588529, - "grad_norm": 1.8004024876844962, - "learning_rate": 3.0618489594129013e-06, - "loss": 1.0158, - "step": 2846 - }, - { - "epoch": 0.342331509649492, - "grad_norm": 2.07271301444867, - "learning_rate": 3.061188763406239e-06, - "loss": 0.9326, - "step": 2847 - }, - { - "epoch": 0.34245175254013105, - "grad_norm": 2.9633569821879098, - "learning_rate": 3.060528406418085e-06, - "loss": 1.0515, - "step": 2848 - }, - { - "epoch": 0.34257199543077016, - "grad_norm": 1.408580459642047, - "learning_rate": 3.0598678885486145e-06, - "loss": 0.8508, - "step": 2849 - }, - { - "epoch": 0.34269223832140927, - "grad_norm": 2.0925758827515053, - "learning_rate": 3.0592072098980282e-06, - "loss": 0.9738, - "step": 2850 - }, - { - "epoch": 0.3428124812120483, - "grad_norm": 2.5043285091189, - "learning_rate": 3.0585463705665514e-06, - "loss": 0.958, - "step": 2851 - }, - { - "epoch": 0.34293272410268744, - "grad_norm": 2.073700748569258, - "learning_rate": 3.0578853706544304e-06, - "loss": 0.9365, - "step": 2852 - }, - { - "epoch": 0.34305296699332655, - "grad_norm": 1.7831257096164495, - "learning_rate": 3.0572242102619404e-06, - "loss": 0.8776, - "step": 2853 - }, - { - "epoch": 0.3431732098839656, - "grad_norm": 1.6853150244746442, - "learning_rate": 3.0565628894893784e-06, - "loss": 1.0351, - "step": 2854 - }, - { - "epoch": 0.3432934527746047, - "grad_norm": 2.18006002326016, - "learning_rate": 3.0559014084370655e-06, - "loss": 0.9693, - "step": 2855 - }, - { - "epoch": 0.34341369566524377, - "grad_norm": 2.351313479599371, - "learning_rate": 3.055239767205349e-06, - "loss": 1.0068, - "step": 2856 - }, - { - "epoch": 0.3435339385558829, - "grad_norm": 1.9809114762640034, - "learning_rate": 3.054577965894599e-06, - "loss": 1.0018, - "step": 2857 - }, - { - "epoch": 0.343654181446522, - "grad_norm": 1.5545182639172088, - "learning_rate": 3.0539160046052094e-06, - "loss": 0.9335, - "step": 2858 - }, - { - "epoch": 0.34377442433716104, - "grad_norm": 2.839666599798338, - "learning_rate": 3.0532538834376003e-06, - "loss": 0.9331, - "step": 2859 - }, - { - "epoch": 0.34389466722780015, - "grad_norm": 1.9095524806871407, - "learning_rate": 3.0525916024922143e-06, - "loss": 1.0134, - "step": 2860 - }, - { - "epoch": 0.34401491011843927, - "grad_norm": 2.5834170502254388, - "learning_rate": 3.0519291618695193e-06, - "loss": 1.0675, - "step": 2861 - }, - { - "epoch": 0.3441351530090783, - "grad_norm": 1.8534522090952728, - "learning_rate": 3.0512665616700065e-06, - "loss": 0.9844, - "step": 2862 - }, - { - "epoch": 0.34425539589971743, - "grad_norm": 1.7578476965877345, - "learning_rate": 3.0506038019941933e-06, - "loss": 1.1278, - "step": 2863 - }, - { - "epoch": 0.34437563879035654, - "grad_norm": 2.144038020323514, - "learning_rate": 3.049940882942617e-06, - "loss": 0.9092, - "step": 2864 - }, - { - "epoch": 0.3444958816809956, - "grad_norm": 1.6971531030649296, - "learning_rate": 3.0492778046158448e-06, - "loss": 1.0246, - "step": 2865 - }, - { - "epoch": 0.3446161245716347, - "grad_norm": 36.09864178160395, - "learning_rate": 3.0486145671144633e-06, - "loss": 0.9901, - "step": 2866 - }, - { - "epoch": 0.3447363674622738, - "grad_norm": 6.538299406639266, - "learning_rate": 3.047951170539086e-06, - "loss": 0.9959, - "step": 2867 - }, - { - "epoch": 0.3448566103529129, - "grad_norm": 1.9814604133041813, - "learning_rate": 3.047287614990349e-06, - "loss": 1.0715, - "step": 2868 - }, - { - "epoch": 0.344976853243552, - "grad_norm": 2.6535706414278577, - "learning_rate": 3.046623900568914e-06, - "loss": 0.8473, - "step": 2869 - }, - { - "epoch": 0.34509709613419104, - "grad_norm": 5.002478176147501, - "learning_rate": 3.045960027375465e-06, - "loss": 0.9281, - "step": 2870 - }, - { - "epoch": 0.34521733902483015, - "grad_norm": 2.331567459377265, - "learning_rate": 3.045295995510711e-06, - "loss": 1.0471, - "step": 2871 - }, - { - "epoch": 0.34533758191546926, - "grad_norm": 1.8091401496231279, - "learning_rate": 3.0446318050753865e-06, - "loss": 0.9616, - "step": 2872 - }, - { - "epoch": 0.3454578248061083, - "grad_norm": 1.848871520963935, - "learning_rate": 3.0439674561702474e-06, - "loss": 1.0123, - "step": 2873 - }, - { - "epoch": 0.3455780676967474, - "grad_norm": 2.142890789131137, - "learning_rate": 3.043302948896076e-06, - "loss": 1.1081, - "step": 2874 - }, - { - "epoch": 0.34569831058738654, - "grad_norm": 1.7244181716011042, - "learning_rate": 3.0426382833536756e-06, - "loss": 0.8308, - "step": 2875 - }, - { - "epoch": 0.3458185534780256, - "grad_norm": 2.1845386623908647, - "learning_rate": 3.041973459643877e-06, - "loss": 1.0099, - "step": 2876 - }, - { - "epoch": 0.3459387963686647, - "grad_norm": 2.0952560929252027, - "learning_rate": 3.0413084778675334e-06, - "loss": 0.9015, - "step": 2877 - }, - { - "epoch": 0.3460590392593038, - "grad_norm": 1.7872063615673777, - "learning_rate": 3.0406433381255214e-06, - "loss": 1.0629, - "step": 2878 - }, - { - "epoch": 0.34617928214994287, - "grad_norm": 3.7897299267204345, - "learning_rate": 3.0399780405187425e-06, - "loss": 1.0506, - "step": 2879 - }, - { - "epoch": 0.346299525040582, - "grad_norm": 2.0833534494064634, - "learning_rate": 3.0393125851481216e-06, - "loss": 1.0108, - "step": 2880 - }, - { - "epoch": 0.3464197679312211, - "grad_norm": 1.8755438969437086, - "learning_rate": 3.038646972114608e-06, - "loss": 1.0932, - "step": 2881 - }, - { - "epoch": 0.34654001082186014, - "grad_norm": 1.5006476434653995, - "learning_rate": 3.037981201519174e-06, - "loss": 0.9042, - "step": 2882 - }, - { - "epoch": 0.34666025371249926, - "grad_norm": 2.676862556665514, - "learning_rate": 3.0373152734628175e-06, - "loss": 0.937, - "step": 2883 - }, - { - "epoch": 0.34678049660313837, - "grad_norm": 1.9317607167310662, - "learning_rate": 3.0366491880465584e-06, - "loss": 0.9875, - "step": 2884 - }, - { - "epoch": 0.3469007394937774, - "grad_norm": 1.7245518641316875, - "learning_rate": 3.035982945371443e-06, - "loss": 1.051, - "step": 2885 - }, - { - "epoch": 0.34702098238441653, - "grad_norm": 2.2548100660905046, - "learning_rate": 3.035316545538537e-06, - "loss": 1.0817, - "step": 2886 - }, - { - "epoch": 0.3471412252750556, - "grad_norm": 2.2576940168819926, - "learning_rate": 3.034649988648935e-06, - "loss": 1.028, - "step": 2887 - }, - { - "epoch": 0.3472614681656947, - "grad_norm": 1.5662441407601742, - "learning_rate": 3.033983274803752e-06, - "loss": 1.0388, - "step": 2888 - }, - { - "epoch": 0.3473817110563338, - "grad_norm": 2.715168929350601, - "learning_rate": 3.0333164041041283e-06, - "loss": 0.9426, - "step": 2889 - }, - { - "epoch": 0.34750195394697286, - "grad_norm": 2.4637749677735163, - "learning_rate": 3.032649376651228e-06, - "loss": 0.9522, - "step": 2890 - }, - { - "epoch": 0.347622196837612, - "grad_norm": 1.4465786723141234, - "learning_rate": 3.031982192546238e-06, - "loss": 0.9903, - "step": 2891 - }, - { - "epoch": 0.3477424397282511, - "grad_norm": 2.1215175816411813, - "learning_rate": 3.0313148518903696e-06, - "loss": 1.1775, - "step": 2892 - }, - { - "epoch": 0.34786268261889014, - "grad_norm": 38.04678195573838, - "learning_rate": 3.030647354784859e-06, - "loss": 1.032, - "step": 2893 - }, - { - "epoch": 0.34798292550952925, - "grad_norm": 1.8098326820681268, - "learning_rate": 3.029979701330964e-06, - "loss": 1.0014, - "step": 2894 - }, - { - "epoch": 0.34810316840016836, - "grad_norm": 1.914519274957544, - "learning_rate": 3.029311891629966e-06, - "loss": 1.0338, - "step": 2895 - }, - { - "epoch": 0.3482234112908074, - "grad_norm": 1.9146825914800978, - "learning_rate": 3.0286439257831744e-06, - "loss": 0.9738, - "step": 2896 - }, - { - "epoch": 0.3483436541814465, - "grad_norm": 1.9056144029942599, - "learning_rate": 3.0279758038919156e-06, - "loss": 0.9396, - "step": 2897 - }, - { - "epoch": 0.34846389707208564, - "grad_norm": 2.0398290369912346, - "learning_rate": 3.0273075260575455e-06, - "loss": 1.0108, - "step": 2898 - }, - { - "epoch": 0.3485841399627247, - "grad_norm": 12.17414445005563, - "learning_rate": 3.0266390923814396e-06, - "loss": 1.0256, - "step": 2899 - }, - { - "epoch": 0.3487043828533638, - "grad_norm": 1.5641407553562845, - "learning_rate": 3.0259705029650008e-06, - "loss": 1.0477, - "step": 2900 - }, - { - "epoch": 0.34882462574400286, - "grad_norm": 2.3240653876989024, - "learning_rate": 3.025301757909652e-06, - "loss": 0.9603, - "step": 2901 - }, - { - "epoch": 0.34894486863464197, - "grad_norm": 1.4593168847481632, - "learning_rate": 3.024632857316842e-06, - "loss": 1.0357, - "step": 2902 - }, - { - "epoch": 0.3490651115252811, - "grad_norm": 2.218243285868138, - "learning_rate": 3.0239638012880412e-06, - "loss": 1.0098, - "step": 2903 - }, - { - "epoch": 0.34918535441592014, - "grad_norm": 2.2245311371640546, - "learning_rate": 3.0232945899247466e-06, - "loss": 1.0346, - "step": 2904 - }, - { - "epoch": 0.34930559730655925, - "grad_norm": 2.610683273092465, - "learning_rate": 3.022625223328476e-06, - "loss": 1.0104, - "step": 2905 - }, - { - "epoch": 0.34942584019719836, - "grad_norm": 1.7156025591496107, - "learning_rate": 3.0219557016007723e-06, - "loss": 0.9205, - "step": 2906 - }, - { - "epoch": 0.3495460830878374, - "grad_norm": 1.7098939404306228, - "learning_rate": 3.021286024843202e-06, - "loss": 0.9295, - "step": 2907 - }, - { - "epoch": 0.3496663259784765, - "grad_norm": 1.148004047831932, - "learning_rate": 3.0206161931573526e-06, - "loss": 0.9348, - "step": 2908 - }, - { - "epoch": 0.34978656886911563, - "grad_norm": 1.528175771294985, - "learning_rate": 3.0199462066448388e-06, - "loss": 1.16, - "step": 2909 - }, - { - "epoch": 0.3499068117597547, - "grad_norm": 1.5640511252249731, - "learning_rate": 3.019276065407296e-06, - "loss": 0.9217, - "step": 2910 - }, - { - "epoch": 0.3500270546503938, - "grad_norm": 2.8976640106204212, - "learning_rate": 3.018605769546385e-06, - "loss": 1.0417, - "step": 2911 - }, - { - "epoch": 0.3501472975410329, - "grad_norm": 1.76108852710276, - "learning_rate": 3.017935319163788e-06, - "loss": 1.0308, - "step": 2912 - }, - { - "epoch": 0.35026754043167196, - "grad_norm": 2.1082059658413783, - "learning_rate": 3.017264714361213e-06, - "loss": 0.9404, - "step": 2913 - }, - { - "epoch": 0.3503877833223111, - "grad_norm": 1.6991095146057835, - "learning_rate": 3.016593955240389e-06, - "loss": 1.0466, - "step": 2914 - }, - { - "epoch": 0.3505080262129502, - "grad_norm": 2.2224065174399947, - "learning_rate": 3.015923041903071e-06, - "loss": 0.8875, - "step": 2915 - }, - { - "epoch": 0.35062826910358924, - "grad_norm": 2.155140909425075, - "learning_rate": 3.0152519744510347e-06, - "loss": 1.0597, - "step": 2916 - }, - { - "epoch": 0.35074851199422835, - "grad_norm": 1.6854894458097431, - "learning_rate": 3.014580752986081e-06, - "loss": 1.0558, - "step": 2917 - }, - { - "epoch": 0.3508687548848674, - "grad_norm": 1.6635860423660762, - "learning_rate": 3.0139093776100345e-06, - "loss": 1.0142, - "step": 2918 - }, - { - "epoch": 0.3509889977755065, - "grad_norm": 1.6646039198396954, - "learning_rate": 3.013237848424741e-06, - "loss": 0.9819, - "step": 2919 - }, - { - "epoch": 0.35110924066614563, - "grad_norm": 2.304885390239587, - "learning_rate": 3.012566165532072e-06, - "loss": 0.9814, - "step": 2920 - }, - { - "epoch": 0.3512294835567847, - "grad_norm": 2.1342756122555033, - "learning_rate": 3.0118943290339207e-06, - "loss": 0.9907, - "step": 2921 - }, - { - "epoch": 0.3513497264474238, - "grad_norm": 1.7032452462120353, - "learning_rate": 3.011222339032204e-06, - "loss": 0.9062, - "step": 2922 - }, - { - "epoch": 0.3514699693380629, - "grad_norm": 1.6724617187041997, - "learning_rate": 3.0105501956288626e-06, - "loss": 0.922, - "step": 2923 - }, - { - "epoch": 0.35159021222870196, - "grad_norm": 1.8260825721814458, - "learning_rate": 3.0098778989258602e-06, - "loss": 0.9721, - "step": 2924 - }, - { - "epoch": 0.35171045511934107, - "grad_norm": 2.995618959130687, - "learning_rate": 3.009205449025183e-06, - "loss": 1.1098, - "step": 2925 - }, - { - "epoch": 0.3518306980099802, - "grad_norm": 1.7842511777121617, - "learning_rate": 3.008532846028842e-06, - "loss": 0.8581, - "step": 2926 - }, - { - "epoch": 0.35195094090061924, - "grad_norm": 2.306949139408502, - "learning_rate": 3.0078600900388694e-06, - "loss": 0.9379, - "step": 2927 - }, - { - "epoch": 0.35207118379125835, - "grad_norm": 1.8515702259530267, - "learning_rate": 3.007187181157323e-06, - "loss": 0.9699, - "step": 2928 - }, - { - "epoch": 0.35219142668189746, - "grad_norm": 2.038410184736574, - "learning_rate": 3.006514119486282e-06, - "loss": 0.9105, - "step": 2929 - }, - { - "epoch": 0.3523116695725365, - "grad_norm": 1.9652839304090448, - "learning_rate": 3.005840905127849e-06, - "loss": 0.9187, - "step": 2930 - }, - { - "epoch": 0.3524319124631756, - "grad_norm": 2.4902543848767147, - "learning_rate": 3.0051675381841516e-06, - "loss": 1.0976, - "step": 2931 - }, - { - "epoch": 0.3525521553538147, - "grad_norm": 1.5606959800482194, - "learning_rate": 3.0044940187573363e-06, - "loss": 0.9959, - "step": 2932 - }, - { - "epoch": 0.3526723982444538, - "grad_norm": 1.8817003693221794, - "learning_rate": 3.003820346949578e-06, - "loss": 0.8861, - "step": 2933 - }, - { - "epoch": 0.3527926411350929, - "grad_norm": 1.82211646545886, - "learning_rate": 3.003146522863071e-06, - "loss": 1.0236, - "step": 2934 - }, - { - "epoch": 0.35291288402573195, - "grad_norm": 1.939855190063616, - "learning_rate": 3.0024725466000345e-06, - "loss": 1.095, - "step": 2935 - }, - { - "epoch": 0.35303312691637107, - "grad_norm": 1.6538896836436345, - "learning_rate": 3.0017984182627087e-06, - "loss": 1.0234, - "step": 2936 - }, - { - "epoch": 0.3531533698070102, - "grad_norm": 1.6876939026374522, - "learning_rate": 3.00112413795336e-06, - "loss": 1.0506, - "step": 2937 - }, - { - "epoch": 0.35327361269764923, - "grad_norm": 1.8643692007303918, - "learning_rate": 3.000449705774275e-06, - "loss": 1.032, - "step": 2938 - }, - { - "epoch": 0.35339385558828834, - "grad_norm": 1.804674288878863, - "learning_rate": 2.9997751218277654e-06, - "loss": 0.9427, - "step": 2939 - }, - { - "epoch": 0.35351409847892745, - "grad_norm": 1.9043229164886064, - "learning_rate": 2.999100386216166e-06, - "loss": 1.0045, - "step": 2940 - }, - { - "epoch": 0.3536343413695665, - "grad_norm": 1.6841116902877225, - "learning_rate": 2.998425499041831e-06, - "loss": 0.9704, - "step": 2941 - }, - { - "epoch": 0.3537545842602056, - "grad_norm": 0.8791149654523576, - "learning_rate": 2.997750460407142e-06, - "loss": 0.8608, - "step": 2942 - }, - { - "epoch": 0.35387482715084473, - "grad_norm": 1.729568074832305, - "learning_rate": 2.997075270414501e-06, - "loss": 0.9354, - "step": 2943 - }, - { - "epoch": 0.3539950700414838, - "grad_norm": 0.7743560653847024, - "learning_rate": 2.9963999291663347e-06, - "loss": 0.8321, - "step": 2944 - }, - { - "epoch": 0.3541153129321229, - "grad_norm": 2.117155518237458, - "learning_rate": 2.9957244367650915e-06, - "loss": 0.9695, - "step": 2945 - }, - { - "epoch": 0.354235555822762, - "grad_norm": 2.6872924014795645, - "learning_rate": 2.9950487933132425e-06, - "loss": 1.0681, - "step": 2946 - }, - { - "epoch": 0.35435579871340106, - "grad_norm": 1.7932281638910919, - "learning_rate": 2.994372998913283e-06, - "loss": 0.9497, - "step": 2947 - }, - { - "epoch": 0.35447604160404017, - "grad_norm": 2.576597836485202, - "learning_rate": 2.99369705366773e-06, - "loss": 0.8615, - "step": 2948 - }, - { - "epoch": 0.3545962844946792, - "grad_norm": 2.04468098472223, - "learning_rate": 2.9930209576791244e-06, - "loss": 1.0442, - "step": 2949 - }, - { - "epoch": 0.35471652738531834, - "grad_norm": 1.7182683096323224, - "learning_rate": 2.9923447110500285e-06, - "loss": 0.8673, - "step": 2950 - }, - { - "epoch": 0.35483677027595745, - "grad_norm": 1.3337248386344276, - "learning_rate": 2.9916683138830295e-06, - "loss": 0.9794, - "step": 2951 - }, - { - "epoch": 0.3549570131665965, - "grad_norm": 1.6731270353399643, - "learning_rate": 2.9909917662807353e-06, - "loss": 1.0431, - "step": 2952 - }, - { - "epoch": 0.3550772560572356, - "grad_norm": 2.3861910522277827, - "learning_rate": 2.9903150683457783e-06, - "loss": 0.9244, - "step": 2953 - }, - { - "epoch": 0.3551974989478747, - "grad_norm": 1.8913007427097335, - "learning_rate": 2.9896382201808126e-06, - "loss": 0.8812, - "step": 2954 - }, - { - "epoch": 0.3553177418385138, - "grad_norm": 1.8558707675090365, - "learning_rate": 2.988961221888516e-06, - "loss": 1.0355, - "step": 2955 - }, - { - "epoch": 0.3554379847291529, - "grad_norm": 2.640725128261828, - "learning_rate": 2.988284073571589e-06, - "loss": 1.0186, - "step": 2956 - }, - { - "epoch": 0.355558227619792, - "grad_norm": 27.673475170772168, - "learning_rate": 2.9876067753327528e-06, - "loss": 0.9546, - "step": 2957 - }, - { - "epoch": 0.35567847051043106, - "grad_norm": 1.8750652301885815, - "learning_rate": 2.986929327274754e-06, - "loss": 1.0343, - "step": 2958 - }, - { - "epoch": 0.35579871340107017, - "grad_norm": 1.8854045200132261, - "learning_rate": 2.9862517295003617e-06, - "loss": 1.0113, - "step": 2959 - }, - { - "epoch": 0.3559189562917093, - "grad_norm": 1.7379066320785215, - "learning_rate": 2.9855739821123654e-06, - "loss": 0.9572, - "step": 2960 - }, - { - "epoch": 0.35603919918234833, - "grad_norm": 1.727409051689944, - "learning_rate": 2.98489608521358e-06, - "loss": 1.045, - "step": 2961 - }, - { - "epoch": 0.35615944207298744, - "grad_norm": 2.6254540913508686, - "learning_rate": 2.9842180389068425e-06, - "loss": 1.0273, - "step": 2962 - }, - { - "epoch": 0.35627968496362655, - "grad_norm": 0.8059028410364824, - "learning_rate": 2.98353984329501e-06, - "loss": 0.8549, - "step": 2963 - }, - { - "epoch": 0.3563999278542656, - "grad_norm": 1.7912075060577684, - "learning_rate": 2.982861498480965e-06, - "loss": 0.9388, - "step": 2964 - }, - { - "epoch": 0.3565201707449047, - "grad_norm": 1.8360891725913073, - "learning_rate": 2.9821830045676122e-06, - "loss": 1.0521, - "step": 2965 - }, - { - "epoch": 0.3566404136355438, - "grad_norm": 1.6758714487125388, - "learning_rate": 2.9815043616578793e-06, - "loss": 0.9496, - "step": 2966 - }, - { - "epoch": 0.3567606565261829, - "grad_norm": 1.775779689883794, - "learning_rate": 2.9808255698547145e-06, - "loss": 1.0081, - "step": 2967 - }, - { - "epoch": 0.356880899416822, - "grad_norm": 1.9526560716368722, - "learning_rate": 2.9801466292610913e-06, - "loss": 1.0181, - "step": 2968 - }, - { - "epoch": 0.35700114230746105, - "grad_norm": 2.0048202601511926, - "learning_rate": 2.979467539980003e-06, - "loss": 1.0382, - "step": 2969 - }, - { - "epoch": 0.35712138519810016, - "grad_norm": 2.5243315161189672, - "learning_rate": 2.978788302114468e-06, - "loss": 0.9969, - "step": 2970 - }, - { - "epoch": 0.35724162808873927, - "grad_norm": 2.3600399267730907, - "learning_rate": 2.9781089157675255e-06, - "loss": 1.0447, - "step": 2971 - }, - { - "epoch": 0.3573618709793783, - "grad_norm": 1.2848853199619787, - "learning_rate": 2.977429381042238e-06, - "loss": 1.1074, - "step": 2972 - }, - { - "epoch": 0.35748211387001744, - "grad_norm": 2.079988031719694, - "learning_rate": 2.9767496980416913e-06, - "loss": 1.125, - "step": 2973 - }, - { - "epoch": 0.35760235676065655, - "grad_norm": 2.0298774189887934, - "learning_rate": 2.9760698668689914e-06, - "loss": 1.0377, - "step": 2974 - }, - { - "epoch": 0.3577225996512956, - "grad_norm": 1.893109037496346, - "learning_rate": 2.975389887627269e-06, - "loss": 0.9428, - "step": 2975 - }, - { - "epoch": 0.3578428425419347, - "grad_norm": 1.946776057832062, - "learning_rate": 2.9747097604196764e-06, - "loss": 1.1265, - "step": 2976 - }, - { - "epoch": 0.3579630854325738, - "grad_norm": 0.6847404888825379, - "learning_rate": 2.9740294853493875e-06, - "loss": 0.8289, - "step": 2977 - }, - { - "epoch": 0.3580833283232129, - "grad_norm": 1.87580714336987, - "learning_rate": 2.9733490625196008e-06, - "loss": 0.907, - "step": 2978 - }, - { - "epoch": 0.358203571213852, - "grad_norm": 4.814621494900732, - "learning_rate": 2.9726684920335353e-06, - "loss": 0.9906, - "step": 2979 - }, - { - "epoch": 0.35832381410449105, - "grad_norm": 2.220125014075784, - "learning_rate": 2.971987773994432e-06, - "loss": 1.0529, - "step": 2980 - }, - { - "epoch": 0.35844405699513016, - "grad_norm": 2.0087466905396885, - "learning_rate": 2.9713069085055566e-06, - "loss": 1.0602, - "step": 2981 - }, - { - "epoch": 0.35856429988576927, - "grad_norm": 1.6002739165201991, - "learning_rate": 2.9706258956701958e-06, - "loss": 1.0212, - "step": 2982 - }, - { - "epoch": 0.3586845427764083, - "grad_norm": 2.7374604174395545, - "learning_rate": 2.9699447355916575e-06, - "loss": 1.0014, - "step": 2983 - }, - { - "epoch": 0.35880478566704743, - "grad_norm": 1.9386709428085924, - "learning_rate": 2.969263428373275e-06, - "loss": 0.9632, - "step": 2984 - }, - { - "epoch": 0.35892502855768654, - "grad_norm": 1.733016642042092, - "learning_rate": 2.9685819741184007e-06, - "loss": 1.0137, - "step": 2985 - }, - { - "epoch": 0.3590452714483256, - "grad_norm": 1.8883122572209485, - "learning_rate": 2.967900372930411e-06, - "loss": 0.9157, - "step": 2986 - }, - { - "epoch": 0.3591655143389647, - "grad_norm": 2.246123471270749, - "learning_rate": 2.9672186249127046e-06, - "loss": 1.0214, - "step": 2987 - }, - { - "epoch": 0.3592857572296038, - "grad_norm": 1.7826074480626826, - "learning_rate": 2.9665367301687014e-06, - "loss": 1.0153, - "step": 2988 - }, - { - "epoch": 0.3594060001202429, - "grad_norm": 1.837494924110097, - "learning_rate": 2.965854688801845e-06, - "loss": 0.9879, - "step": 2989 - }, - { - "epoch": 0.359526243010882, - "grad_norm": 1.8247450532899727, - "learning_rate": 2.9651725009156005e-06, - "loss": 0.9874, - "step": 2990 - }, - { - "epoch": 0.3596464859015211, - "grad_norm": 1.644845674226827, - "learning_rate": 2.964490166613454e-06, - "loss": 0.9744, - "step": 2991 - }, - { - "epoch": 0.35976672879216015, - "grad_norm": 0.8439389123300949, - "learning_rate": 2.963807685998917e-06, - "loss": 0.8331, - "step": 2992 - }, - { - "epoch": 0.35988697168279926, - "grad_norm": 1.4709920612019844, - "learning_rate": 2.9631250591755196e-06, - "loss": 1.0106, - "step": 2993 - }, - { - "epoch": 0.36000721457343837, - "grad_norm": 1.8049158389476792, - "learning_rate": 2.962442286246817e-06, - "loss": 0.8142, - "step": 2994 - }, - { - "epoch": 0.3601274574640774, - "grad_norm": 1.5248210395492245, - "learning_rate": 2.9617593673163853e-06, - "loss": 0.93, - "step": 2995 - }, - { - "epoch": 0.36024770035471654, - "grad_norm": 1.9993191990841932, - "learning_rate": 2.9610763024878216e-06, - "loss": 0.9998, - "step": 2996 - }, - { - "epoch": 0.3603679432453556, - "grad_norm": 1.548584427007317, - "learning_rate": 2.960393091864747e-06, - "loss": 1.1399, - "step": 2997 - }, - { - "epoch": 0.3604881861359947, - "grad_norm": 1.706463822042513, - "learning_rate": 2.959709735550804e-06, - "loss": 0.9732, - "step": 2998 - }, - { - "epoch": 0.3606084290266338, - "grad_norm": 2.113644722020542, - "learning_rate": 2.9590262336496575e-06, - "loss": 0.9805, - "step": 2999 - }, - { - "epoch": 0.36072867191727287, - "grad_norm": 1.8839644710811025, - "learning_rate": 2.9583425862649936e-06, - "loss": 1.0772, - "step": 3000 - }, - { - "epoch": 0.360848914807912, - "grad_norm": 2.7194627726370943, - "learning_rate": 2.9576587935005215e-06, - "loss": 0.968, - "step": 3001 - }, - { - "epoch": 0.3609691576985511, - "grad_norm": 2.538806233311537, - "learning_rate": 2.9569748554599713e-06, - "loss": 0.953, - "step": 3002 - }, - { - "epoch": 0.36108940058919015, - "grad_norm": 1.7899868505410488, - "learning_rate": 2.956290772247097e-06, - "loss": 0.9493, - "step": 3003 - }, - { - "epoch": 0.36120964347982926, - "grad_norm": 1.565361606880861, - "learning_rate": 2.9556065439656724e-06, - "loss": 0.9699, - "step": 3004 - }, - { - "epoch": 0.36132988637046837, - "grad_norm": 1.7847284184145042, - "learning_rate": 2.9549221707194952e-06, - "loss": 1.0542, - "step": 3005 - }, - { - "epoch": 0.3614501292611074, - "grad_norm": 2.4209769275862647, - "learning_rate": 2.954237652612384e-06, - "loss": 0.9586, - "step": 3006 - }, - { - "epoch": 0.36157037215174653, - "grad_norm": 1.7568488298147718, - "learning_rate": 2.9535529897481796e-06, - "loss": 1.074, - "step": 3007 - }, - { - "epoch": 0.36169061504238564, - "grad_norm": 1.9959241343358756, - "learning_rate": 2.9528681822307446e-06, - "loss": 1.0074, - "step": 3008 - }, - { - "epoch": 0.3618108579330247, - "grad_norm": 1.7551997455269865, - "learning_rate": 2.952183230163964e-06, - "loss": 1.0545, - "step": 3009 - }, - { - "epoch": 0.3619311008236638, - "grad_norm": 2.518680399271475, - "learning_rate": 2.9514981336517448e-06, - "loss": 0.9717, - "step": 3010 - }, - { - "epoch": 0.36205134371430286, - "grad_norm": 1.8072701342030133, - "learning_rate": 2.950812892798015e-06, - "loss": 1.0402, - "step": 3011 - }, - { - "epoch": 0.362171586604942, - "grad_norm": 1.798857151257059, - "learning_rate": 2.9501275077067256e-06, - "loss": 1.1008, - "step": 3012 - }, - { - "epoch": 0.3622918294955811, - "grad_norm": 1.568861105582524, - "learning_rate": 2.949441978481848e-06, - "loss": 1.1109, - "step": 3013 - }, - { - "epoch": 0.36241207238622014, - "grad_norm": 1.7603856221307694, - "learning_rate": 2.9487563052273778e-06, - "loss": 1.0282, - "step": 3014 - }, - { - "epoch": 0.36253231527685925, - "grad_norm": 1.6453214726651706, - "learning_rate": 2.94807048804733e-06, - "loss": 1.086, - "step": 3015 - }, - { - "epoch": 0.36265255816749836, - "grad_norm": 1.952895738497551, - "learning_rate": 2.9473845270457434e-06, - "loss": 1.1307, - "step": 3016 - }, - { - "epoch": 0.3627728010581374, - "grad_norm": 2.9038113442820546, - "learning_rate": 2.946698422326677e-06, - "loss": 0.9251, - "step": 3017 - }, - { - "epoch": 0.36289304394877653, - "grad_norm": 2.0353811655947425, - "learning_rate": 2.946012173994213e-06, - "loss": 1.0257, - "step": 3018 - }, - { - "epoch": 0.36301328683941564, - "grad_norm": 1.3902550537423617, - "learning_rate": 2.945325782152454e-06, - "loss": 0.912, - "step": 3019 - }, - { - "epoch": 0.3631335297300547, - "grad_norm": 1.8757334228417624, - "learning_rate": 2.9446392469055257e-06, - "loss": 1.027, - "step": 3020 - }, - { - "epoch": 0.3632537726206938, - "grad_norm": 1.5317658314584857, - "learning_rate": 2.9439525683575745e-06, - "loss": 1.0275, - "step": 3021 - }, - { - "epoch": 0.3633740155113329, - "grad_norm": 1.9890010108043774, - "learning_rate": 2.9432657466127694e-06, - "loss": 0.9735, - "step": 3022 - }, - { - "epoch": 0.36349425840197197, - "grad_norm": 2.1729446082606083, - "learning_rate": 2.9425787817753007e-06, - "loss": 0.9996, - "step": 3023 - }, - { - "epoch": 0.3636145012926111, - "grad_norm": 1.489175442778449, - "learning_rate": 2.94189167394938e-06, - "loss": 0.9406, - "step": 3024 - }, - { - "epoch": 0.3637347441832502, - "grad_norm": 1.69918647823162, - "learning_rate": 2.941204423239241e-06, - "loss": 1.0436, - "step": 3025 - }, - { - "epoch": 0.36385498707388925, - "grad_norm": 1.7536609367654528, - "learning_rate": 2.9405170297491395e-06, - "loss": 0.9968, - "step": 3026 - }, - { - "epoch": 0.36397522996452836, - "grad_norm": 3.14836980022786, - "learning_rate": 2.939829493583353e-06, - "loss": 1.0322, - "step": 3027 - }, - { - "epoch": 0.3640954728551674, - "grad_norm": 2.607873541755819, - "learning_rate": 2.939141814846179e-06, - "loss": 1.0637, - "step": 3028 - }, - { - "epoch": 0.3642157157458065, - "grad_norm": 3.0475821432507506, - "learning_rate": 2.938453993641938e-06, - "loss": 1.0496, - "step": 3029 - }, - { - "epoch": 0.36433595863644563, - "grad_norm": 2.248664671639806, - "learning_rate": 2.937766030074973e-06, - "loss": 0.9314, - "step": 3030 - }, - { - "epoch": 0.3644562015270847, - "grad_norm": 2.11176927105996, - "learning_rate": 2.937077924249646e-06, - "loss": 1.0471, - "step": 3031 - }, - { - "epoch": 0.3645764444177238, - "grad_norm": 3.4451071057328106, - "learning_rate": 2.9363896762703443e-06, - "loss": 0.9848, - "step": 3032 - }, - { - "epoch": 0.3646966873083629, - "grad_norm": 1.5546501576908638, - "learning_rate": 2.9357012862414725e-06, - "loss": 1.0704, - "step": 3033 - }, - { - "epoch": 0.36481693019900197, - "grad_norm": 2.4610012470042246, - "learning_rate": 2.9350127542674593e-06, - "loss": 0.9449, - "step": 3034 - }, - { - "epoch": 0.3649371730896411, - "grad_norm": 2.1958017041741167, - "learning_rate": 2.934324080452755e-06, - "loss": 0.9921, - "step": 3035 - }, - { - "epoch": 0.3650574159802802, - "grad_norm": 1.3951460821306918, - "learning_rate": 2.9336352649018307e-06, - "loss": 1.0074, - "step": 3036 - }, - { - "epoch": 0.36517765887091924, - "grad_norm": 1.823670919689606, - "learning_rate": 2.9329463077191783e-06, - "loss": 0.9235, - "step": 3037 - }, - { - "epoch": 0.36529790176155835, - "grad_norm": 3.7755751468503655, - "learning_rate": 2.9322572090093135e-06, - "loss": 0.8748, - "step": 3038 - }, - { - "epoch": 0.36541814465219746, - "grad_norm": 2.786280336963584, - "learning_rate": 2.9315679688767713e-06, - "loss": 0.9899, - "step": 3039 - }, - { - "epoch": 0.3655383875428365, - "grad_norm": 1.4511574170713162, - "learning_rate": 2.9308785874261085e-06, - "loss": 0.8968, - "step": 3040 - }, - { - "epoch": 0.36565863043347563, - "grad_norm": 1.6412287736436724, - "learning_rate": 2.9301890647619045e-06, - "loss": 1.0425, - "step": 3041 - }, - { - "epoch": 0.36577887332411474, - "grad_norm": 2.02240894937859, - "learning_rate": 2.929499400988759e-06, - "loss": 1.0383, - "step": 3042 - }, - { - "epoch": 0.3658991162147538, - "grad_norm": 1.5590495348995514, - "learning_rate": 2.9288095962112927e-06, - "loss": 0.878, - "step": 3043 - }, - { - "epoch": 0.3660193591053929, - "grad_norm": 1.671312183943721, - "learning_rate": 2.9281196505341503e-06, - "loss": 1.081, - "step": 3044 - }, - { - "epoch": 0.36613960199603196, - "grad_norm": 1.8071855042981035, - "learning_rate": 2.9274295640619946e-06, - "loss": 1.0167, - "step": 3045 - }, - { - "epoch": 0.36625984488667107, - "grad_norm": 1.6719719689314705, - "learning_rate": 2.9267393368995103e-06, - "loss": 1.0119, - "step": 3046 - }, - { - "epoch": 0.3663800877773102, - "grad_norm": 2.4970041444535513, - "learning_rate": 2.926048969151407e-06, - "loss": 0.9797, - "step": 3047 - }, - { - "epoch": 0.36650033066794924, - "grad_norm": 1.7619646327943186, - "learning_rate": 2.92535846092241e-06, - "loss": 0.9122, - "step": 3048 - }, - { - "epoch": 0.36662057355858835, - "grad_norm": 5.636359379786342, - "learning_rate": 2.9246678123172704e-06, - "loss": 1.0556, - "step": 3049 - }, - { - "epoch": 0.36674081644922746, - "grad_norm": 4.020702669593387, - "learning_rate": 2.9239770234407596e-06, - "loss": 0.9694, - "step": 3050 - }, - { - "epoch": 0.3668610593398665, - "grad_norm": 1.62045650596359, - "learning_rate": 2.9232860943976686e-06, - "loss": 0.9129, - "step": 3051 - }, - { - "epoch": 0.3669813022305056, - "grad_norm": 1.6292615178327268, - "learning_rate": 2.9225950252928115e-06, - "loss": 1.0706, - "step": 3052 - }, - { - "epoch": 0.36710154512114473, - "grad_norm": 2.0277604867322805, - "learning_rate": 2.9219038162310217e-06, - "loss": 1.0508, - "step": 3053 - }, - { - "epoch": 0.3672217880117838, - "grad_norm": 3.2304150209029476, - "learning_rate": 2.921212467317157e-06, - "loss": 1.0547, - "step": 3054 - }, - { - "epoch": 0.3673420309024229, - "grad_norm": 2.086731834470572, - "learning_rate": 2.920520978656093e-06, - "loss": 1.0388, - "step": 3055 - }, - { - "epoch": 0.367462273793062, - "grad_norm": 2.119760561503173, - "learning_rate": 2.919829350352729e-06, - "loss": 0.9984, - "step": 3056 - }, - { - "epoch": 0.36758251668370107, - "grad_norm": 0.7521789211771331, - "learning_rate": 2.919137582511983e-06, - "loss": 0.8521, - "step": 3057 - }, - { - "epoch": 0.3677027595743402, - "grad_norm": 2.6761236486848916, - "learning_rate": 2.918445675238797e-06, - "loss": 0.8673, - "step": 3058 - }, - { - "epoch": 0.36782300246497923, - "grad_norm": 1.8567025349072757, - "learning_rate": 2.917753628638132e-06, - "loss": 0.9278, - "step": 3059 - }, - { - "epoch": 0.36794324535561834, - "grad_norm": 3.166457186010865, - "learning_rate": 2.9170614428149716e-06, - "loss": 0.9324, - "step": 3060 - }, - { - "epoch": 0.36806348824625745, - "grad_norm": 2.3160108772444064, - "learning_rate": 2.9163691178743195e-06, - "loss": 1.096, - "step": 3061 - }, - { - "epoch": 0.3681837311368965, - "grad_norm": 2.093935260832196, - "learning_rate": 2.9156766539212006e-06, - "loss": 1.0106, - "step": 3062 - }, - { - "epoch": 0.3683039740275356, - "grad_norm": 2.022218314831956, - "learning_rate": 2.9149840510606614e-06, - "loss": 0.9453, - "step": 3063 - }, - { - "epoch": 0.36842421691817473, - "grad_norm": 1.0556494952669184, - "learning_rate": 2.914291309397769e-06, - "loss": 0.9164, - "step": 3064 - }, - { - "epoch": 0.3685444598088138, - "grad_norm": 1.7284054133737652, - "learning_rate": 2.9135984290376117e-06, - "loss": 1.0213, - "step": 3065 - }, - { - "epoch": 0.3686647026994529, - "grad_norm": 1.5680393448291956, - "learning_rate": 2.9129054100853e-06, - "loss": 1.0599, - "step": 3066 - }, - { - "epoch": 0.368784945590092, - "grad_norm": 1.413210406618108, - "learning_rate": 2.912212252645963e-06, - "loss": 0.9831, - "step": 3067 - }, - { - "epoch": 0.36890518848073106, - "grad_norm": 2.0337885326259153, - "learning_rate": 2.9115189568247523e-06, - "loss": 0.9853, - "step": 3068 - }, - { - "epoch": 0.36902543137137017, - "grad_norm": 2.0436135436067144, - "learning_rate": 2.910825522726841e-06, - "loss": 1.1412, - "step": 3069 - }, - { - "epoch": 0.3691456742620093, - "grad_norm": 1.7834092822197922, - "learning_rate": 2.9101319504574215e-06, - "loss": 0.9966, - "step": 3070 - }, - { - "epoch": 0.36926591715264834, - "grad_norm": 1.7685093908128016, - "learning_rate": 2.909438240121709e-06, - "loss": 0.9865, - "step": 3071 - }, - { - "epoch": 0.36938616004328745, - "grad_norm": 1.864832905808535, - "learning_rate": 2.908744391824939e-06, - "loss": 0.9276, - "step": 3072 - }, - { - "epoch": 0.36950640293392656, - "grad_norm": 2.0891214709065236, - "learning_rate": 2.908050405672367e-06, - "loss": 1.0172, - "step": 3073 - }, - { - "epoch": 0.3696266458245656, - "grad_norm": 1.7429852904215033, - "learning_rate": 2.9073562817692703e-06, - "loss": 1.0136, - "step": 3074 - }, - { - "epoch": 0.3697468887152047, - "grad_norm": 0.7831382058652092, - "learning_rate": 2.9066620202209468e-06, - "loss": 0.8414, - "step": 3075 - }, - { - "epoch": 0.3698671316058438, - "grad_norm": 2.6423105131148543, - "learning_rate": 2.905967621132716e-06, - "loss": 1.014, - "step": 3076 - }, - { - "epoch": 0.3699873744964829, - "grad_norm": 2.2959902350321078, - "learning_rate": 2.9052730846099172e-06, - "loss": 0.9788, - "step": 3077 - }, - { - "epoch": 0.370107617387122, - "grad_norm": 0.885449008724953, - "learning_rate": 2.9045784107579123e-06, - "loss": 0.8794, - "step": 3078 - }, - { - "epoch": 0.37022786027776106, - "grad_norm": 1.8608081327452515, - "learning_rate": 2.9038835996820807e-06, - "loss": 0.896, - "step": 3079 - }, - { - "epoch": 0.37034810316840017, - "grad_norm": 1.7678340494758569, - "learning_rate": 2.903188651487826e-06, - "loss": 1.0191, - "step": 3080 - }, - { - "epoch": 0.3704683460590393, - "grad_norm": 3.971957134895254, - "learning_rate": 2.902493566280571e-06, - "loss": 1.0981, - "step": 3081 - }, - { - "epoch": 0.37058858894967833, - "grad_norm": 1.8554746607636117, - "learning_rate": 2.9017983441657595e-06, - "loss": 1.0387, - "step": 3082 - }, - { - "epoch": 0.37070883184031744, - "grad_norm": 2.937280200283178, - "learning_rate": 2.9011029852488564e-06, - "loss": 0.9874, - "step": 3083 - }, - { - "epoch": 0.37082907473095655, - "grad_norm": 1.0103678281851152, - "learning_rate": 2.9004074896353465e-06, - "loss": 0.9094, - "step": 3084 - }, - { - "epoch": 0.3709493176215956, - "grad_norm": 1.6861161102913504, - "learning_rate": 2.8997118574307362e-06, - "loss": 1.046, - "step": 3085 - }, - { - "epoch": 0.3710695605122347, - "grad_norm": 1.7893640522250533, - "learning_rate": 2.899016088740553e-06, - "loss": 0.9744, - "step": 3086 - }, - { - "epoch": 0.37118980340287383, - "grad_norm": 2.531972240500725, - "learning_rate": 2.898320183670344e-06, - "loss": 1.0208, - "step": 3087 - }, - { - "epoch": 0.3713100462935129, - "grad_norm": 1.8890950972376883, - "learning_rate": 2.8976241423256767e-06, - "loss": 1.1124, - "step": 3088 - }, - { - "epoch": 0.371430289184152, - "grad_norm": 1.82681096771603, - "learning_rate": 2.896927964812142e-06, - "loss": 0.9106, - "step": 3089 - }, - { - "epoch": 0.37155053207479105, - "grad_norm": 2.2875390304821037, - "learning_rate": 2.8962316512353465e-06, - "loss": 0.9819, - "step": 3090 - }, - { - "epoch": 0.37167077496543016, - "grad_norm": 1.6001297659703528, - "learning_rate": 2.8955352017009233e-06, - "loss": 0.9801, - "step": 3091 - }, - { - "epoch": 0.3717910178560693, - "grad_norm": 1.9539107306244858, - "learning_rate": 2.8948386163145212e-06, - "loss": 1.0011, - "step": 3092 - }, - { - "epoch": 0.3719112607467083, - "grad_norm": 1.6830831581011498, - "learning_rate": 2.8941418951818135e-06, - "loss": 1.0238, - "step": 3093 - }, - { - "epoch": 0.37203150363734744, - "grad_norm": 1.9837288593014273, - "learning_rate": 2.8934450384084903e-06, - "loss": 0.9415, - "step": 3094 - }, - { - "epoch": 0.37215174652798655, - "grad_norm": 3.0836202298968343, - "learning_rate": 2.8927480461002653e-06, - "loss": 0.9407, - "step": 3095 - }, - { - "epoch": 0.3722719894186256, - "grad_norm": 2.429530687576809, - "learning_rate": 2.892050918362872e-06, - "loss": 1.0863, - "step": 3096 - }, - { - "epoch": 0.3723922323092647, - "grad_norm": 0.9040822377903397, - "learning_rate": 2.8913536553020626e-06, - "loss": 0.8436, - "step": 3097 - }, - { - "epoch": 0.3725124751999038, - "grad_norm": 1.7386213677088829, - "learning_rate": 2.8906562570236137e-06, - "loss": 1.0807, - "step": 3098 - }, - { - "epoch": 0.3726327180905429, - "grad_norm": 1.5037009359141829, - "learning_rate": 2.889958723633318e-06, - "loss": 0.9922, - "step": 3099 - }, - { - "epoch": 0.372752960981182, - "grad_norm": 1.5776450305413978, - "learning_rate": 2.889261055236992e-06, - "loss": 0.9692, - "step": 3100 - }, - { - "epoch": 0.3728732038718211, - "grad_norm": 2.024457284233188, - "learning_rate": 2.8885632519404704e-06, - "loss": 1.0487, - "step": 3101 - }, - { - "epoch": 0.37299344676246016, - "grad_norm": 1.9998488369082796, - "learning_rate": 2.8878653138496107e-06, - "loss": 0.9793, - "step": 3102 - }, - { - "epoch": 0.37311368965309927, - "grad_norm": 3.5489627558564285, - "learning_rate": 2.8871672410702878e-06, - "loss": 0.9863, - "step": 3103 - }, - { - "epoch": 0.3732339325437384, - "grad_norm": 1.5644185304090954, - "learning_rate": 2.8864690337084008e-06, - "loss": 1.0476, - "step": 3104 - }, - { - "epoch": 0.37335417543437743, - "grad_norm": 1.6498840638091001, - "learning_rate": 2.885770691869866e-06, - "loss": 1.0034, - "step": 3105 - }, - { - "epoch": 0.37347441832501654, - "grad_norm": 1.893060109086444, - "learning_rate": 2.8850722156606207e-06, - "loss": 0.9728, - "step": 3106 - }, - { - "epoch": 0.3735946612156556, - "grad_norm": 1.8771131052358092, - "learning_rate": 2.8843736051866252e-06, - "loss": 0.8982, - "step": 3107 - }, - { - "epoch": 0.3737149041062947, - "grad_norm": 1.4196028947989323, - "learning_rate": 2.8836748605538557e-06, - "loss": 0.9206, - "step": 3108 - }, - { - "epoch": 0.3738351469969338, - "grad_norm": 3.0545482554980232, - "learning_rate": 2.882975981868313e-06, - "loss": 0.8648, - "step": 3109 - }, - { - "epoch": 0.3739553898875729, - "grad_norm": 2.2588942948965887, - "learning_rate": 2.882276969236016e-06, - "loss": 0.918, - "step": 3110 - }, - { - "epoch": 0.374075632778212, - "grad_norm": 1.9968665730234392, - "learning_rate": 2.881577822763005e-06, - "loss": 0.9945, - "step": 3111 - }, - { - "epoch": 0.3741958756688511, - "grad_norm": 1.8407905232171886, - "learning_rate": 2.880878542555338e-06, - "loss": 1.1058, - "step": 3112 - }, - { - "epoch": 0.37431611855949015, - "grad_norm": 2.253860658286993, - "learning_rate": 2.8801791287190976e-06, - "loss": 1.032, - "step": 3113 - }, - { - "epoch": 0.37443636145012926, - "grad_norm": 2.576245264218349, - "learning_rate": 2.8794795813603817e-06, - "loss": 1.0875, - "step": 3114 - }, - { - "epoch": 0.3745566043407684, - "grad_norm": 1.9428909120510012, - "learning_rate": 2.878779900585314e-06, - "loss": 1.0502, - "step": 3115 - }, - { - "epoch": 0.37467684723140743, - "grad_norm": 1.7702343619627894, - "learning_rate": 2.8780800865000336e-06, - "loss": 0.986, - "step": 3116 - }, - { - "epoch": 0.37479709012204654, - "grad_norm": 1.0469878406346893, - "learning_rate": 2.877380139210702e-06, - "loss": 0.8889, - "step": 3117 - }, - { - "epoch": 0.37491733301268565, - "grad_norm": 1.5560221095389826, - "learning_rate": 2.876680058823501e-06, - "loss": 0.9823, - "step": 3118 - }, - { - "epoch": 0.3750375759033247, - "grad_norm": 1.8713926582260088, - "learning_rate": 2.8759798454446314e-06, - "loss": 0.8942, - "step": 3119 - }, - { - "epoch": 0.3751578187939638, - "grad_norm": 3.947238323677851, - "learning_rate": 2.8752794991803173e-06, - "loss": 1.0431, - "step": 3120 - }, - { - "epoch": 0.37527806168460287, - "grad_norm": 1.9066795115382786, - "learning_rate": 2.8745790201367976e-06, - "loss": 0.9798, - "step": 3121 - }, - { - "epoch": 0.375398304575242, - "grad_norm": 1.9222620132434063, - "learning_rate": 2.8738784084203373e-06, - "loss": 1.0808, - "step": 3122 - }, - { - "epoch": 0.3755185474658811, - "grad_norm": 1.544557241434341, - "learning_rate": 2.873177664137216e-06, - "loss": 1.0192, - "step": 3123 - }, - { - "epoch": 0.37563879035652015, - "grad_norm": 1.6647804395004058, - "learning_rate": 2.8724767873937384e-06, - "loss": 0.9181, - "step": 3124 - }, - { - "epoch": 0.37575903324715926, - "grad_norm": 2.0047018096722327, - "learning_rate": 2.871775778296225e-06, - "loss": 1.1044, - "step": 3125 - }, - { - "epoch": 0.37587927613779837, - "grad_norm": 2.032987467737032, - "learning_rate": 2.8710746369510196e-06, - "loss": 1.0149, - "step": 3126 - }, - { - "epoch": 0.3759995190284374, - "grad_norm": 2.402711619922275, - "learning_rate": 2.8703733634644846e-06, - "loss": 1.0661, - "step": 3127 - }, - { - "epoch": 0.37611976191907653, - "grad_norm": 1.5813634145721038, - "learning_rate": 2.869671957943002e-06, - "loss": 1.0242, - "step": 3128 - }, - { - "epoch": 0.37624000480971564, - "grad_norm": 1.9864395695483432, - "learning_rate": 2.8689704204929747e-06, - "loss": 0.9707, - "step": 3129 - }, - { - "epoch": 0.3763602477003547, - "grad_norm": 1.7201865694969147, - "learning_rate": 2.8682687512208253e-06, - "loss": 1.0299, - "step": 3130 - }, - { - "epoch": 0.3764804905909938, - "grad_norm": 1.9177886816208154, - "learning_rate": 2.8675669502329972e-06, - "loss": 1.0329, - "step": 3131 - }, - { - "epoch": 0.3766007334816329, - "grad_norm": 2.3553114501659254, - "learning_rate": 2.866865017635952e-06, - "loss": 1.0805, - "step": 3132 - }, - { - "epoch": 0.376720976372272, - "grad_norm": 1.6200472107825081, - "learning_rate": 2.866162953536174e-06, - "loss": 1.0189, - "step": 3133 - }, - { - "epoch": 0.3768412192629111, - "grad_norm": 1.6263793812919172, - "learning_rate": 2.8654607580401634e-06, - "loss": 0.9801, - "step": 3134 - }, - { - "epoch": 0.3769614621535502, - "grad_norm": 0.9337405165225185, - "learning_rate": 2.8647584312544446e-06, - "loss": 0.9147, - "step": 3135 - }, - { - "epoch": 0.37708170504418925, - "grad_norm": 1.2793444908540805, - "learning_rate": 2.864055973285559e-06, - "loss": 1.084, - "step": 3136 - }, - { - "epoch": 0.37720194793482836, - "grad_norm": 2.2078362271275127, - "learning_rate": 2.8633533842400698e-06, - "loss": 1.0992, - "step": 3137 - }, - { - "epoch": 0.3773221908254674, - "grad_norm": 1.9643351387642674, - "learning_rate": 2.862650664224558e-06, - "loss": 1.0109, - "step": 3138 - }, - { - "epoch": 0.37744243371610653, - "grad_norm": 2.1861720822966078, - "learning_rate": 2.861947813345627e-06, - "loss": 0.9333, - "step": 3139 - }, - { - "epoch": 0.37756267660674564, - "grad_norm": 1.755215094880646, - "learning_rate": 2.8612448317098974e-06, - "loss": 0.9463, - "step": 3140 - }, - { - "epoch": 0.3776829194973847, - "grad_norm": 2.125672626629177, - "learning_rate": 2.8605417194240114e-06, - "loss": 1.0646, - "step": 3141 - }, - { - "epoch": 0.3778031623880238, - "grad_norm": 1.5886244538627645, - "learning_rate": 2.8598384765946315e-06, - "loss": 1.0232, - "step": 3142 - }, - { - "epoch": 0.3779234052786629, - "grad_norm": 2.24017594651219, - "learning_rate": 2.8591351033284377e-06, - "loss": 0.9509, - "step": 3143 - }, - { - "epoch": 0.37804364816930197, - "grad_norm": 2.1354599614910477, - "learning_rate": 2.8584315997321325e-06, - "loss": 1.0653, - "step": 3144 - }, - { - "epoch": 0.3781638910599411, - "grad_norm": 2.9100866117402053, - "learning_rate": 2.8577279659124356e-06, - "loss": 1.0088, - "step": 3145 - }, - { - "epoch": 0.3782841339505802, - "grad_norm": 1.7713546284831372, - "learning_rate": 2.857024201976089e-06, - "loss": 1.0589, - "step": 3146 - }, - { - "epoch": 0.37840437684121925, - "grad_norm": 2.1793967432502384, - "learning_rate": 2.8563203080298516e-06, - "loss": 0.9745, - "step": 3147 - }, - { - "epoch": 0.37852461973185836, - "grad_norm": 65.50923569190454, - "learning_rate": 2.855616284180505e-06, - "loss": 1.116, - "step": 3148 - }, - { - "epoch": 0.37864486262249747, - "grad_norm": 0.9311291392195048, - "learning_rate": 2.8549121305348477e-06, - "loss": 0.959, - "step": 3149 - }, - { - "epoch": 0.3787651055131365, - "grad_norm": 4.742294285183099, - "learning_rate": 2.8542078471997006e-06, - "loss": 1.0528, - "step": 3150 - }, - { - "epoch": 0.37888534840377563, - "grad_norm": 1.602983690841628, - "learning_rate": 2.8535034342819013e-06, - "loss": 0.9828, - "step": 3151 - }, - { - "epoch": 0.37900559129441475, - "grad_norm": 1.3827234875322425, - "learning_rate": 2.85279889188831e-06, - "loss": 0.9468, - "step": 3152 - }, - { - "epoch": 0.3791258341850538, - "grad_norm": 1.5368009109913372, - "learning_rate": 2.852094220125805e-06, - "loss": 1.0353, - "step": 3153 - }, - { - "epoch": 0.3792460770756929, - "grad_norm": 2.0379370144665563, - "learning_rate": 2.8513894191012846e-06, - "loss": 0.9367, - "step": 3154 - }, - { - "epoch": 0.37936631996633197, - "grad_norm": 1.7499690053102395, - "learning_rate": 2.8506844889216664e-06, - "loss": 1.0102, - "step": 3155 - }, - { - "epoch": 0.3794865628569711, - "grad_norm": 0.9110127949554162, - "learning_rate": 2.849979429693887e-06, - "loss": 0.901, - "step": 3156 - }, - { - "epoch": 0.3796068057476102, - "grad_norm": 1.983879568174875, - "learning_rate": 2.8492742415249042e-06, - "loss": 0.9711, - "step": 3157 - }, - { - "epoch": 0.37972704863824924, - "grad_norm": 1.5027896054864613, - "learning_rate": 2.848568924521694e-06, - "loss": 0.9907, - "step": 3158 - }, - { - "epoch": 0.37984729152888835, - "grad_norm": 2.0104267601062875, - "learning_rate": 2.8478634787912526e-06, - "loss": 0.9605, - "step": 3159 - }, - { - "epoch": 0.37996753441952746, - "grad_norm": 1.8618627622982673, - "learning_rate": 2.847157904440596e-06, - "loss": 0.9963, - "step": 3160 - }, - { - "epoch": 0.3800877773101665, - "grad_norm": 1.753364734271777, - "learning_rate": 2.846452201576759e-06, - "loss": 0.9809, - "step": 3161 - }, - { - "epoch": 0.38020802020080563, - "grad_norm": 0.9417613139453929, - "learning_rate": 2.845746370306795e-06, - "loss": 0.8863, - "step": 3162 - }, - { - "epoch": 0.38032826309144474, - "grad_norm": 2.2760827055722253, - "learning_rate": 2.84504041073778e-06, - "loss": 1.0188, - "step": 3163 - }, - { - "epoch": 0.3804485059820838, - "grad_norm": 1.7783936470240875, - "learning_rate": 2.844334322976806e-06, - "loss": 1.0307, - "step": 3164 - }, - { - "epoch": 0.3805687488727229, - "grad_norm": 1.731238475995321, - "learning_rate": 2.8436281071309866e-06, - "loss": 1.0621, - "step": 3165 - }, - { - "epoch": 0.380688991763362, - "grad_norm": 0.7531075075896027, - "learning_rate": 2.842921763307455e-06, - "loss": 0.7856, - "step": 3166 - }, - { - "epoch": 0.38080923465400107, - "grad_norm": 1.8274497716970395, - "learning_rate": 2.842215291613361e-06, - "loss": 1.0623, - "step": 3167 - }, - { - "epoch": 0.3809294775446402, - "grad_norm": 0.8364789403623248, - "learning_rate": 2.8415086921558774e-06, - "loss": 0.8724, - "step": 3168 - }, - { - "epoch": 0.38104972043527924, - "grad_norm": 1.3786469126506058, - "learning_rate": 2.840801965042194e-06, - "loss": 1.0107, - "step": 3169 - }, - { - "epoch": 0.38116996332591835, - "grad_norm": 1.9713386592145867, - "learning_rate": 2.840095110379521e-06, - "loss": 1.0708, - "step": 3170 - }, - { - "epoch": 0.38129020621655746, - "grad_norm": 0.7389382562382598, - "learning_rate": 2.8393881282750884e-06, - "loss": 0.7854, - "step": 3171 - }, - { - "epoch": 0.3814104491071965, - "grad_norm": 2.1752035856881706, - "learning_rate": 2.838681018836144e-06, - "loss": 1.0136, - "step": 3172 - }, - { - "epoch": 0.3815306919978356, - "grad_norm": 1.814444649700167, - "learning_rate": 2.837973782169955e-06, - "loss": 1.0026, - "step": 3173 - }, - { - "epoch": 0.38165093488847474, - "grad_norm": 0.8874348509073496, - "learning_rate": 2.8372664183838096e-06, - "loss": 0.8578, - "step": 3174 - }, - { - "epoch": 0.3817711777791138, - "grad_norm": 2.0830914802040454, - "learning_rate": 2.836558927585015e-06, - "loss": 0.9175, - "step": 3175 - }, - { - "epoch": 0.3818914206697529, - "grad_norm": 1.5999124354004806, - "learning_rate": 2.8358513098808957e-06, - "loss": 1.0471, - "step": 3176 - }, - { - "epoch": 0.382011663560392, - "grad_norm": 1.7077885045829, - "learning_rate": 2.835143565378798e-06, - "loss": 1.0, - "step": 3177 - }, - { - "epoch": 0.38213190645103107, - "grad_norm": 1.9929945563870288, - "learning_rate": 2.8344356941860847e-06, - "loss": 1.0049, - "step": 3178 - }, - { - "epoch": 0.3822521493416702, - "grad_norm": 2.0290177036676824, - "learning_rate": 2.8337276964101403e-06, - "loss": 0.8906, - "step": 3179 - }, - { - "epoch": 0.3823723922323093, - "grad_norm": 2.525812029805882, - "learning_rate": 2.833019572158367e-06, - "loss": 0.9839, - "step": 3180 - }, - { - "epoch": 0.38249263512294834, - "grad_norm": 2.0606669748026927, - "learning_rate": 2.8323113215381872e-06, - "loss": 1.0363, - "step": 3181 - }, - { - "epoch": 0.38261287801358745, - "grad_norm": 1.8033452843195446, - "learning_rate": 2.831602944657042e-06, - "loss": 0.9843, - "step": 3182 - }, - { - "epoch": 0.38273312090422656, - "grad_norm": 2.589190508590786, - "learning_rate": 2.830894441622391e-06, - "loss": 0.9716, - "step": 3183 - }, - { - "epoch": 0.3828533637948656, - "grad_norm": 1.6652078045222527, - "learning_rate": 2.8301858125417134e-06, - "loss": 1.0242, - "step": 3184 - }, - { - "epoch": 0.38297360668550473, - "grad_norm": 1.9901912486127276, - "learning_rate": 2.8294770575225082e-06, - "loss": 0.9702, - "step": 3185 - }, - { - "epoch": 0.3830938495761438, - "grad_norm": 2.3794942796177088, - "learning_rate": 2.828768176672293e-06, - "loss": 1.0725, - "step": 3186 - }, - { - "epoch": 0.3832140924667829, - "grad_norm": 2.9494195528701135, - "learning_rate": 2.8280591700986044e-06, - "loss": 0.9537, - "step": 3187 - }, - { - "epoch": 0.383334335357422, - "grad_norm": 2.051736547404377, - "learning_rate": 2.827350037908999e-06, - "loss": 0.9847, - "step": 3188 - }, - { - "epoch": 0.38345457824806106, - "grad_norm": 2.0342561045415954, - "learning_rate": 2.8266407802110496e-06, - "loss": 1.0221, - "step": 3189 - }, - { - "epoch": 0.3835748211387002, - "grad_norm": 2.4288868458863266, - "learning_rate": 2.8259313971123515e-06, - "loss": 0.9813, - "step": 3190 - }, - { - "epoch": 0.3836950640293393, - "grad_norm": 1.3937515943030574, - "learning_rate": 2.8252218887205166e-06, - "loss": 1.0052, - "step": 3191 - }, - { - "epoch": 0.38381530691997834, - "grad_norm": 1.5881737788552177, - "learning_rate": 2.824512255143178e-06, - "loss": 1.041, - "step": 3192 - }, - { - "epoch": 0.38393554981061745, - "grad_norm": 1.7505682295201876, - "learning_rate": 2.8238024964879855e-06, - "loss": 1.0173, - "step": 3193 - }, - { - "epoch": 0.38405579270125656, - "grad_norm": 1.9874850554474193, - "learning_rate": 2.8230926128626095e-06, - "loss": 0.9929, - "step": 3194 - }, - { - "epoch": 0.3841760355918956, - "grad_norm": 1.7339853759817734, - "learning_rate": 2.822382604374738e-06, - "loss": 1.0166, - "step": 3195 - }, - { - "epoch": 0.3842962784825347, - "grad_norm": 2.0471681574194625, - "learning_rate": 2.8216724711320793e-06, - "loss": 0.8845, - "step": 3196 - }, - { - "epoch": 0.38441652137317384, - "grad_norm": 2.1485367075816235, - "learning_rate": 2.820962213242361e-06, - "loss": 1.0332, - "step": 3197 - }, - { - "epoch": 0.3845367642638129, - "grad_norm": 2.030626171859633, - "learning_rate": 2.8202518308133264e-06, - "loss": 1.0738, - "step": 3198 - }, - { - "epoch": 0.384657007154452, - "grad_norm": 1.8558645368378608, - "learning_rate": 2.8195413239527426e-06, - "loss": 0.9651, - "step": 3199 - }, - { - "epoch": 0.38477725004509106, - "grad_norm": 1.8016113221698664, - "learning_rate": 2.8188306927683906e-06, - "loss": 1.0389, - "step": 3200 - }, - { - "epoch": 0.38489749293573017, - "grad_norm": 2.1907224625307777, - "learning_rate": 2.818119937368074e-06, - "loss": 0.9845, - "step": 3201 - }, - { - "epoch": 0.3850177358263693, - "grad_norm": 2.504707481985172, - "learning_rate": 2.817409057859613e-06, - "loss": 0.8825, - "step": 3202 - }, - { - "epoch": 0.38513797871700833, - "grad_norm": 1.6094004064239675, - "learning_rate": 2.8166980543508482e-06, - "loss": 1.0134, - "step": 3203 - }, - { - "epoch": 0.38525822160764744, - "grad_norm": 1.8487568776877752, - "learning_rate": 2.815986926949638e-06, - "loss": 1.0286, - "step": 3204 - }, - { - "epoch": 0.38537846449828655, - "grad_norm": 1.5254722895723316, - "learning_rate": 2.8152756757638597e-06, - "loss": 1.0269, - "step": 3205 - }, - { - "epoch": 0.3854987073889256, - "grad_norm": 1.7610401952901273, - "learning_rate": 2.8145643009014093e-06, - "loss": 1.0673, - "step": 3206 - }, - { - "epoch": 0.3856189502795647, - "grad_norm": 2.338017077041796, - "learning_rate": 2.813852802470202e-06, - "loss": 1.0149, - "step": 3207 - }, - { - "epoch": 0.38573919317020383, - "grad_norm": 2.415855359225634, - "learning_rate": 2.8131411805781717e-06, - "loss": 0.9545, - "step": 3208 - }, - { - "epoch": 0.3858594360608429, - "grad_norm": 2.238221811833511, - "learning_rate": 2.8124294353332707e-06, - "loss": 0.8666, - "step": 3209 - }, - { - "epoch": 0.385979678951482, - "grad_norm": 1.7717352272086344, - "learning_rate": 2.8117175668434713e-06, - "loss": 1.0071, - "step": 3210 - }, - { - "epoch": 0.3860999218421211, - "grad_norm": 3.9372071656826657, - "learning_rate": 2.811005575216762e-06, - "loss": 0.9236, - "step": 3211 - }, - { - "epoch": 0.38622016473276016, - "grad_norm": 1.303382540714573, - "learning_rate": 2.8102934605611513e-06, - "loss": 1.0132, - "step": 3212 - }, - { - "epoch": 0.3863404076233993, - "grad_norm": 2.4081429306142894, - "learning_rate": 2.8095812229846665e-06, - "loss": 0.9007, - "step": 3213 - }, - { - "epoch": 0.3864606505140384, - "grad_norm": 1.9778654003497393, - "learning_rate": 2.808868862595355e-06, - "loss": 0.9174, - "step": 3214 - }, - { - "epoch": 0.38658089340467744, - "grad_norm": 1.653534516520175, - "learning_rate": 2.8081563795012795e-06, - "loss": 1.0259, - "step": 3215 - }, - { - "epoch": 0.38670113629531655, - "grad_norm": 1.6396551262706036, - "learning_rate": 2.807443773810524e-06, - "loss": 0.9651, - "step": 3216 - }, - { - "epoch": 0.3868213791859556, - "grad_norm": 2.9035876805707463, - "learning_rate": 2.80673104563119e-06, - "loss": 1.1196, - "step": 3217 - }, - { - "epoch": 0.3869416220765947, - "grad_norm": 1.62333388378805, - "learning_rate": 2.8060181950713976e-06, - "loss": 1.0059, - "step": 3218 - }, - { - "epoch": 0.3870618649672338, - "grad_norm": 1.972052997468894, - "learning_rate": 2.805305222239286e-06, - "loss": 1.0424, - "step": 3219 - }, - { - "epoch": 0.3871821078578729, - "grad_norm": 1.7312830952810287, - "learning_rate": 2.8045921272430118e-06, - "loss": 0.9664, - "step": 3220 - }, - { - "epoch": 0.387302350748512, - "grad_norm": 6.9906166400612175, - "learning_rate": 2.803878910190753e-06, - "loss": 0.9883, - "step": 3221 - }, - { - "epoch": 0.3874225936391511, - "grad_norm": 2.136420917849832, - "learning_rate": 2.8031655711907017e-06, - "loss": 1.0474, - "step": 3222 - }, - { - "epoch": 0.38754283652979016, - "grad_norm": 2.4096119171704036, - "learning_rate": 2.8024521103510723e-06, - "loss": 1.0401, - "step": 3223 - }, - { - "epoch": 0.38766307942042927, - "grad_norm": 1.535005440717865, - "learning_rate": 2.8017385277800952e-06, - "loss": 0.9816, - "step": 3224 - }, - { - "epoch": 0.3877833223110684, - "grad_norm": 3.774039210646993, - "learning_rate": 2.8010248235860213e-06, - "loss": 0.9717, - "step": 3225 - }, - { - "epoch": 0.38790356520170743, - "grad_norm": 0.8489750039427787, - "learning_rate": 2.8003109978771192e-06, - "loss": 0.8988, - "step": 3226 - }, - { - "epoch": 0.38802380809234654, - "grad_norm": 2.60418266800097, - "learning_rate": 2.799597050761674e-06, - "loss": 1.0174, - "step": 3227 - }, - { - "epoch": 0.38814405098298566, - "grad_norm": 2.0866529973509635, - "learning_rate": 2.7988829823479924e-06, - "loss": 1.0223, - "step": 3228 - }, - { - "epoch": 0.3882642938736247, - "grad_norm": 1.7585653091767774, - "learning_rate": 2.7981687927443976e-06, - "loss": 0.8734, - "step": 3229 - }, - { - "epoch": 0.3883845367642638, - "grad_norm": 1.5955764178580132, - "learning_rate": 2.797454482059231e-06, - "loss": 1.0846, - "step": 3230 - }, - { - "epoch": 0.3885047796549029, - "grad_norm": 3.3750511448128515, - "learning_rate": 2.7967400504008537e-06, - "loss": 1.0749, - "step": 3231 - }, - { - "epoch": 0.388625022545542, - "grad_norm": 0.8578112506683646, - "learning_rate": 2.7960254978776456e-06, - "loss": 0.855, - "step": 3232 - }, - { - "epoch": 0.3887452654361811, - "grad_norm": 1.8577221488317708, - "learning_rate": 2.7953108245980006e-06, - "loss": 1.0389, - "step": 3233 - }, - { - "epoch": 0.38886550832682015, - "grad_norm": 1.4493482177154904, - "learning_rate": 2.7945960306703365e-06, - "loss": 0.9725, - "step": 3234 - }, - { - "epoch": 0.38898575121745926, - "grad_norm": 1.5954659891594987, - "learning_rate": 2.7938811162030865e-06, - "loss": 0.8869, - "step": 3235 - }, - { - "epoch": 0.3891059941080984, - "grad_norm": 1.6830077955005367, - "learning_rate": 2.793166081304702e-06, - "loss": 1.0537, - "step": 3236 - }, - { - "epoch": 0.38922623699873743, - "grad_norm": 1.6093638891929118, - "learning_rate": 2.7924509260836543e-06, - "loss": 1.0523, - "step": 3237 - }, - { - "epoch": 0.38934647988937654, - "grad_norm": 1.5364247291467747, - "learning_rate": 2.791735650648431e-06, - "loss": 0.9138, - "step": 3238 - }, - { - "epoch": 0.38946672278001565, - "grad_norm": 1.8613732123342979, - "learning_rate": 2.791020255107538e-06, - "loss": 0.9772, - "step": 3239 - }, - { - "epoch": 0.3895869656706547, - "grad_norm": 1.4065253094220955, - "learning_rate": 2.7903047395695023e-06, - "loss": 1.0261, - "step": 3240 - }, - { - "epoch": 0.3897072085612938, - "grad_norm": 1.9831107610413143, - "learning_rate": 2.789589104142865e-06, - "loss": 1.127, - "step": 3241 - }, - { - "epoch": 0.3898274514519329, - "grad_norm": 2.2585651766679393, - "learning_rate": 2.7888733489361895e-06, - "loss": 0.9951, - "step": 3242 - }, - { - "epoch": 0.389947694342572, - "grad_norm": 0.7864556638828135, - "learning_rate": 2.788157474058054e-06, - "loss": 0.8802, - "step": 3243 - }, - { - "epoch": 0.3900679372332111, - "grad_norm": 1.7142540823765602, - "learning_rate": 2.7874414796170555e-06, - "loss": 0.9316, - "step": 3244 - }, - { - "epoch": 0.3901881801238502, - "grad_norm": 2.283481186119575, - "learning_rate": 2.7867253657218113e-06, - "loss": 1.0658, - "step": 3245 - }, - { - "epoch": 0.39030842301448926, - "grad_norm": 1.530798242063288, - "learning_rate": 2.7860091324809544e-06, - "loss": 0.9581, - "step": 3246 - }, - { - "epoch": 0.39042866590512837, - "grad_norm": 1.6486726566015124, - "learning_rate": 2.7852927800031377e-06, - "loss": 1.034, - "step": 3247 - }, - { - "epoch": 0.3905489087957674, - "grad_norm": 1.5098551616298637, - "learning_rate": 2.7845763083970298e-06, - "loss": 1.0531, - "step": 3248 - }, - { - "epoch": 0.39066915168640653, - "grad_norm": 1.628917959340408, - "learning_rate": 2.7838597177713205e-06, - "loss": 1.0533, - "step": 3249 - }, - { - "epoch": 0.39078939457704565, - "grad_norm": 1.6654874046992345, - "learning_rate": 2.7831430082347143e-06, - "loss": 0.9663, - "step": 3250 - }, - { - "epoch": 0.3909096374676847, - "grad_norm": 2.2907146441520423, - "learning_rate": 2.7824261798959373e-06, - "loss": 1.0538, - "step": 3251 - }, - { - "epoch": 0.3910298803583238, - "grad_norm": 1.792186225551428, - "learning_rate": 2.78170923286373e-06, - "loss": 1.0215, - "step": 3252 - }, - { - "epoch": 0.3911501232489629, - "grad_norm": 1.9958492360712572, - "learning_rate": 2.780992167246854e-06, - "loss": 1.061, - "step": 3253 - }, - { - "epoch": 0.391270366139602, - "grad_norm": 1.0072106035206145, - "learning_rate": 2.7802749831540883e-06, - "loss": 1.0095, - "step": 3254 - }, - { - "epoch": 0.3913906090302411, - "grad_norm": 1.751715364344871, - "learning_rate": 2.7795576806942268e-06, - "loss": 1.0469, - "step": 3255 - }, - { - "epoch": 0.3915108519208802, - "grad_norm": 0.7932651886470221, - "learning_rate": 2.778840259976085e-06, - "loss": 0.8079, - "step": 3256 - }, - { - "epoch": 0.39163109481151925, - "grad_norm": 3.5607820350552033, - "learning_rate": 2.778122721108495e-06, - "loss": 1.0067, - "step": 3257 - }, - { - "epoch": 0.39175133770215836, - "grad_norm": 1.7025336542598297, - "learning_rate": 2.7774050642003076e-06, - "loss": 1.1122, - "step": 3258 - }, - { - "epoch": 0.3918715805927975, - "grad_norm": 1.6473292381228515, - "learning_rate": 2.7766872893603896e-06, - "loss": 1.1632, - "step": 3259 - }, - { - "epoch": 0.39199182348343653, - "grad_norm": 1.5823426493537958, - "learning_rate": 2.7759693966976275e-06, - "loss": 0.9634, - "step": 3260 - }, - { - "epoch": 0.39211206637407564, - "grad_norm": 1.9050167659712582, - "learning_rate": 2.7752513863209242e-06, - "loss": 1.0794, - "step": 3261 - }, - { - "epoch": 0.39223230926471475, - "grad_norm": 1.794632216898508, - "learning_rate": 2.774533258339203e-06, - "loss": 1.064, - "step": 3262 - }, - { - "epoch": 0.3923525521553538, - "grad_norm": 2.099768571362971, - "learning_rate": 2.7738150128614014e-06, - "loss": 1.0166, - "step": 3263 - }, - { - "epoch": 0.3924727950459929, - "grad_norm": 1.752587313143528, - "learning_rate": 2.7730966499964777e-06, - "loss": 1.1243, - "step": 3264 - }, - { - "epoch": 0.39259303793663197, - "grad_norm": 2.3084311904087698, - "learning_rate": 2.772378169853408e-06, - "loss": 1.0289, - "step": 3265 - }, - { - "epoch": 0.3927132808272711, - "grad_norm": 1.6853645376372857, - "learning_rate": 2.771659572541183e-06, - "loss": 0.9685, - "step": 3266 - }, - { - "epoch": 0.3928335237179102, - "grad_norm": 3.8174610066499017, - "learning_rate": 2.7709408581688143e-06, - "loss": 1.1048, - "step": 3267 - }, - { - "epoch": 0.39295376660854925, - "grad_norm": 1.6549843324686047, - "learning_rate": 2.7702220268453307e-06, - "loss": 1.1124, - "step": 3268 - }, - { - "epoch": 0.39307400949918836, - "grad_norm": 1.7832416308821775, - "learning_rate": 2.7695030786797785e-06, - "loss": 1.0735, - "step": 3269 - }, - { - "epoch": 0.39319425238982747, - "grad_norm": 5.349206469858659, - "learning_rate": 2.7687840137812206e-06, - "loss": 0.9674, - "step": 3270 - }, - { - "epoch": 0.3933144952804665, - "grad_norm": 0.7996954651197767, - "learning_rate": 2.7680648322587395e-06, - "loss": 0.8657, - "step": 3271 - }, - { - "epoch": 0.39343473817110564, - "grad_norm": 2.627452159123021, - "learning_rate": 2.7673455342214334e-06, - "loss": 1.0425, - "step": 3272 - }, - { - "epoch": 0.39355498106174475, - "grad_norm": 1.9509710412821377, - "learning_rate": 2.7666261197784198e-06, - "loss": 0.9898, - "step": 3273 - }, - { - "epoch": 0.3936752239523838, - "grad_norm": 3.4008220856815594, - "learning_rate": 2.7659065890388336e-06, - "loss": 0.9997, - "step": 3274 - }, - { - "epoch": 0.3937954668430229, - "grad_norm": 2.5242365938888316, - "learning_rate": 2.7651869421118266e-06, - "loss": 1.076, - "step": 3275 - }, - { - "epoch": 0.393915709733662, - "grad_norm": 1.5941043534624022, - "learning_rate": 2.76446717910657e-06, - "loss": 1.0576, - "step": 3276 - }, - { - "epoch": 0.3940359526243011, - "grad_norm": 1.905084409971275, - "learning_rate": 2.763747300132249e-06, - "loss": 0.9887, - "step": 3277 - }, - { - "epoch": 0.3941561955149402, - "grad_norm": 1.672018027203857, - "learning_rate": 2.7630273052980704e-06, - "loss": 1.0954, - "step": 3278 - }, - { - "epoch": 0.39427643840557924, - "grad_norm": 2.0878379822277777, - "learning_rate": 2.7623071947132554e-06, - "loss": 0.9057, - "step": 3279 - }, - { - "epoch": 0.39439668129621835, - "grad_norm": 1.7597175955701123, - "learning_rate": 2.7615869684870458e-06, - "loss": 1.0073, - "step": 3280 - }, - { - "epoch": 0.39451692418685746, - "grad_norm": 1.6878428640673158, - "learning_rate": 2.7608666267286986e-06, - "loss": 1.0677, - "step": 3281 - }, - { - "epoch": 0.3946371670774965, - "grad_norm": 1.9425768633036196, - "learning_rate": 2.760146169547489e-06, - "loss": 1.0904, - "step": 3282 - }, - { - "epoch": 0.39475740996813563, - "grad_norm": 1.5354834443283316, - "learning_rate": 2.75942559705271e-06, - "loss": 0.9952, - "step": 3283 - }, - { - "epoch": 0.39487765285877474, - "grad_norm": 1.6647273781541678, - "learning_rate": 2.7587049093536713e-06, - "loss": 1.1121, - "step": 3284 - }, - { - "epoch": 0.3949978957494138, - "grad_norm": 1.6547309819344813, - "learning_rate": 2.757984106559701e-06, - "loss": 1.0332, - "step": 3285 - }, - { - "epoch": 0.3951181386400529, - "grad_norm": 2.017811262281894, - "learning_rate": 2.7572631887801446e-06, - "loss": 0.9513, - "step": 3286 - }, - { - "epoch": 0.395238381530692, - "grad_norm": 1.8172080618563422, - "learning_rate": 2.7565421561243654e-06, - "loss": 0.9789, - "step": 3287 - }, - { - "epoch": 0.3953586244213311, - "grad_norm": 1.9918817622127123, - "learning_rate": 2.7558210087017413e-06, - "loss": 1.0526, - "step": 3288 - }, - { - "epoch": 0.3954788673119702, - "grad_norm": 9.147854963172847, - "learning_rate": 2.7550997466216724e-06, - "loss": 0.977, - "step": 3289 - }, - { - "epoch": 0.3955991102026093, - "grad_norm": 1.6014628309545098, - "learning_rate": 2.7543783699935714e-06, - "loss": 1.0345, - "step": 3290 - }, - { - "epoch": 0.39571935309324835, - "grad_norm": 2.4228612060846975, - "learning_rate": 2.753656878926872e-06, - "loss": 1.0848, - "step": 3291 - }, - { - "epoch": 0.39583959598388746, - "grad_norm": 1.7461254279145486, - "learning_rate": 2.752935273531023e-06, - "loss": 0.9674, - "step": 3292 - }, - { - "epoch": 0.39595983887452657, - "grad_norm": 3.7026580644928195, - "learning_rate": 2.752213553915492e-06, - "loss": 1.0258, - "step": 3293 - }, - { - "epoch": 0.3960800817651656, - "grad_norm": 0.8035339308875612, - "learning_rate": 2.751491720189762e-06, - "loss": 0.9141, - "step": 3294 - }, - { - "epoch": 0.39620032465580474, - "grad_norm": 2.142974900234407, - "learning_rate": 2.7507697724633364e-06, - "loss": 1.141, - "step": 3295 - }, - { - "epoch": 0.3963205675464438, - "grad_norm": 0.7868033446029958, - "learning_rate": 2.7500477108457327e-06, - "loss": 0.7976, - "step": 3296 - }, - { - "epoch": 0.3964408104370829, - "grad_norm": 2.1890018212392315, - "learning_rate": 2.7493255354464877e-06, - "loss": 1.0381, - "step": 3297 - }, - { - "epoch": 0.396561053327722, - "grad_norm": 1.8118671759455887, - "learning_rate": 2.748603246375156e-06, - "loss": 0.9907, - "step": 3298 - }, - { - "epoch": 0.39668129621836107, - "grad_norm": 1.9145163484574224, - "learning_rate": 2.7478808437413055e-06, - "loss": 0.9209, - "step": 3299 - }, - { - "epoch": 0.3968015391090002, - "grad_norm": 1.86785097983416, - "learning_rate": 2.7471583276545263e-06, - "loss": 0.8934, - "step": 3300 - }, - { - "epoch": 0.3969217819996393, - "grad_norm": 1.8525823046152456, - "learning_rate": 2.7464356982244224e-06, - "loss": 0.9321, - "step": 3301 - }, - { - "epoch": 0.39704202489027834, - "grad_norm": 0.8212717895868674, - "learning_rate": 2.745712955560617e-06, - "loss": 0.8774, - "step": 3302 - }, - { - "epoch": 0.39716226778091746, - "grad_norm": 2.1974224119409187, - "learning_rate": 2.7449900997727496e-06, - "loss": 1.0058, - "step": 3303 - }, - { - "epoch": 0.39728251067155657, - "grad_norm": 1.5779818479366619, - "learning_rate": 2.744267130970476e-06, - "loss": 1.0674, - "step": 3304 - }, - { - "epoch": 0.3974027535621956, - "grad_norm": 1.7217756516168803, - "learning_rate": 2.7435440492634697e-06, - "loss": 0.9942, - "step": 3305 - }, - { - "epoch": 0.39752299645283473, - "grad_norm": 1.9951882296035284, - "learning_rate": 2.7428208547614228e-06, - "loss": 0.8921, - "step": 3306 - }, - { - "epoch": 0.39764323934347384, - "grad_norm": 1.8155306766978645, - "learning_rate": 2.742097547574043e-06, - "loss": 0.9986, - "step": 3307 - }, - { - "epoch": 0.3977634822341129, - "grad_norm": 2.7243598851106023, - "learning_rate": 2.7413741278110544e-06, - "loss": 1.0027, - "step": 3308 - }, - { - "epoch": 0.397883725124752, - "grad_norm": 1.9235656869102549, - "learning_rate": 2.7406505955822016e-06, - "loss": 0.9143, - "step": 3309 - }, - { - "epoch": 0.39800396801539106, - "grad_norm": 2.242488827544654, - "learning_rate": 2.7399269509972415e-06, - "loss": 0.8933, - "step": 3310 - }, - { - "epoch": 0.3981242109060302, - "grad_norm": 2.133071157120721, - "learning_rate": 2.7392031941659514e-06, - "loss": 1.064, - "step": 3311 - }, - { - "epoch": 0.3982444537966693, - "grad_norm": 1.8151190678237055, - "learning_rate": 2.7384793251981244e-06, - "loss": 1.0866, - "step": 3312 - }, - { - "epoch": 0.39836469668730834, - "grad_norm": 1.6435826843742167, - "learning_rate": 2.737755344203571e-06, - "loss": 1.0358, - "step": 3313 - }, - { - "epoch": 0.39848493957794745, - "grad_norm": 1.5141129177269321, - "learning_rate": 2.7370312512921186e-06, - "loss": 1.0262, - "step": 3314 - }, - { - "epoch": 0.39860518246858656, - "grad_norm": 2.41344239007834, - "learning_rate": 2.736307046573611e-06, - "loss": 0.9978, - "step": 3315 - }, - { - "epoch": 0.3987254253592256, - "grad_norm": 1.6322701023270272, - "learning_rate": 2.73558273015791e-06, - "loss": 1.053, - "step": 3316 - }, - { - "epoch": 0.3988456682498647, - "grad_norm": 2.1214281243621516, - "learning_rate": 2.734858302154894e-06, - "loss": 0.9416, - "step": 3317 - }, - { - "epoch": 0.39896591114050384, - "grad_norm": 2.1891795658718496, - "learning_rate": 2.734133762674457e-06, - "loss": 0.988, - "step": 3318 - }, - { - "epoch": 0.3990861540311429, - "grad_norm": 1.9803170591870451, - "learning_rate": 2.7334091118265124e-06, - "loss": 0.9389, - "step": 3319 - }, - { - "epoch": 0.399206396921782, - "grad_norm": 0.6637372286155173, - "learning_rate": 2.732684349720989e-06, - "loss": 0.8188, - "step": 3320 - }, - { - "epoch": 0.3993266398124211, - "grad_norm": 1.7707082442439581, - "learning_rate": 2.7319594764678318e-06, - "loss": 0.9793, - "step": 3321 - }, - { - "epoch": 0.39944688270306017, - "grad_norm": 1.4924802642744248, - "learning_rate": 2.7312344921770044e-06, - "loss": 1.0671, - "step": 3322 - }, - { - "epoch": 0.3995671255936993, - "grad_norm": 1.8212128733226258, - "learning_rate": 2.7305093969584857e-06, - "loss": 1.0132, - "step": 3323 - }, - { - "epoch": 0.3996873684843384, - "grad_norm": 1.638404942665058, - "learning_rate": 2.729784190922272e-06, - "loss": 1.0219, - "step": 3324 - }, - { - "epoch": 0.39980761137497745, - "grad_norm": 0.7747655313885466, - "learning_rate": 2.729058874178378e-06, - "loss": 0.8238, - "step": 3325 - }, - { - "epoch": 0.39992785426561656, - "grad_norm": 2.063029134673152, - "learning_rate": 2.7283334468368315e-06, - "loss": 0.9245, - "step": 3326 - }, - { - "epoch": 0.4000480971562556, - "grad_norm": 2.0378806244288716, - "learning_rate": 2.72760790900768e-06, - "loss": 0.9628, - "step": 3327 - }, - { - "epoch": 0.4001683400468947, - "grad_norm": 2.9010292660204464, - "learning_rate": 2.7268822608009875e-06, - "loss": 1.0223, - "step": 3328 - }, - { - "epoch": 0.40028858293753383, - "grad_norm": 1.8566735782985464, - "learning_rate": 2.726156502326834e-06, - "loss": 1.0124, - "step": 3329 - }, - { - "epoch": 0.4004088258281729, - "grad_norm": 0.7085331092803011, - "learning_rate": 2.725430633695316e-06, - "loss": 0.8606, - "step": 3330 - }, - { - "epoch": 0.400529068718812, - "grad_norm": 0.9345460129212626, - "learning_rate": 2.7247046550165485e-06, - "loss": 0.856, - "step": 3331 - }, - { - "epoch": 0.4006493116094511, - "grad_norm": 1.453520854232145, - "learning_rate": 2.7239785664006606e-06, - "loss": 0.982, - "step": 3332 - }, - { - "epoch": 0.40076955450009016, - "grad_norm": 0.8282543657131953, - "learning_rate": 2.7232523679578002e-06, - "loss": 0.8987, - "step": 3333 - }, - { - "epoch": 0.4008897973907293, - "grad_norm": 2.0148608511309574, - "learning_rate": 2.7225260597981295e-06, - "loss": 1.0319, - "step": 3334 - }, - { - "epoch": 0.4010100402813684, - "grad_norm": 3.0590260323211464, - "learning_rate": 2.721799642031831e-06, - "loss": 1.0134, - "step": 3335 - }, - { - "epoch": 0.40113028317200744, - "grad_norm": 3.3255906462405, - "learning_rate": 2.721073114769101e-06, - "loss": 1.0058, - "step": 3336 - }, - { - "epoch": 0.40125052606264655, - "grad_norm": 3.374386767024017, - "learning_rate": 2.7203464781201523e-06, - "loss": 0.9797, - "step": 3337 - }, - { - "epoch": 0.40137076895328566, - "grad_norm": 2.2596013463609723, - "learning_rate": 2.719619732195215e-06, - "loss": 1.0044, - "step": 3338 - }, - { - "epoch": 0.4014910118439247, - "grad_norm": 1.2563783513636357, - "learning_rate": 2.7188928771045377e-06, - "loss": 0.9578, - "step": 3339 - }, - { - "epoch": 0.4016112547345638, - "grad_norm": 1.6715105319043646, - "learning_rate": 2.7181659129583815e-06, - "loss": 1.0295, - "step": 3340 - }, - { - "epoch": 0.4017314976252029, - "grad_norm": 1.847714316360961, - "learning_rate": 2.7174388398670276e-06, - "loss": 0.9994, - "step": 3341 - }, - { - "epoch": 0.401851740515842, - "grad_norm": 1.78526723170301, - "learning_rate": 2.716711657940773e-06, - "loss": 1.15, - "step": 3342 - }, - { - "epoch": 0.4019719834064811, - "grad_norm": 0.8458258207266278, - "learning_rate": 2.7159843672899284e-06, - "loss": 0.8344, - "step": 3343 - }, - { - "epoch": 0.40209222629712016, - "grad_norm": 1.8684626099946768, - "learning_rate": 2.715256968024825e-06, - "loss": 1.0436, - "step": 3344 - }, - { - "epoch": 0.40221246918775927, - "grad_norm": 1.5698966619171182, - "learning_rate": 2.7145294602558083e-06, - "loss": 1.0535, - "step": 3345 - }, - { - "epoch": 0.4023327120783984, - "grad_norm": 1.9810409772583353, - "learning_rate": 2.713801844093241e-06, - "loss": 0.9329, - "step": 3346 - }, - { - "epoch": 0.40245295496903744, - "grad_norm": 2.049619392790758, - "learning_rate": 2.7130741196475014e-06, - "loss": 1.1152, - "step": 3347 - }, - { - "epoch": 0.40257319785967655, - "grad_norm": 1.7879739973195758, - "learning_rate": 2.7123462870289848e-06, - "loss": 1.0283, - "step": 3348 - }, - { - "epoch": 0.40269344075031566, - "grad_norm": 1.5417761720823893, - "learning_rate": 2.711618346348102e-06, - "loss": 1.0377, - "step": 3349 - }, - { - "epoch": 0.4028136836409547, - "grad_norm": 1.5280219326092914, - "learning_rate": 2.7108902977152825e-06, - "loss": 0.8579, - "step": 3350 - }, - { - "epoch": 0.4029339265315938, - "grad_norm": 2.0792614117009136, - "learning_rate": 2.7101621412409704e-06, - "loss": 0.9755, - "step": 3351 - }, - { - "epoch": 0.40305416942223293, - "grad_norm": 1.9132195309722948, - "learning_rate": 2.7094338770356256e-06, - "loss": 1.0875, - "step": 3352 - }, - { - "epoch": 0.403174412312872, - "grad_norm": 1.770595138087934, - "learning_rate": 2.708705505209726e-06, - "loss": 0.8702, - "step": 3353 - }, - { - "epoch": 0.4032946552035111, - "grad_norm": 2.0236673703209984, - "learning_rate": 2.7079770258737646e-06, - "loss": 1.1433, - "step": 3354 - }, - { - "epoch": 0.4034148980941502, - "grad_norm": 2.1753755530234073, - "learning_rate": 2.707248439138251e-06, - "loss": 0.9767, - "step": 3355 - }, - { - "epoch": 0.40353514098478926, - "grad_norm": 1.8166098203718473, - "learning_rate": 2.7065197451137114e-06, - "loss": 0.8835, - "step": 3356 - }, - { - "epoch": 0.4036553838754284, - "grad_norm": 1.8193863899086724, - "learning_rate": 2.7057909439106894e-06, - "loss": 0.9056, - "step": 3357 - }, - { - "epoch": 0.40377562676606743, - "grad_norm": 1.6359419333498981, - "learning_rate": 2.7050620356397417e-06, - "loss": 1.0135, - "step": 3358 - }, - { - "epoch": 0.40389586965670654, - "grad_norm": 1.523777224490742, - "learning_rate": 2.7043330204114437e-06, - "loss": 0.9535, - "step": 3359 - }, - { - "epoch": 0.40401611254734565, - "grad_norm": 2.2853955787467037, - "learning_rate": 2.7036038983363862e-06, - "loss": 1.0851, - "step": 3360 - }, - { - "epoch": 0.4041363554379847, - "grad_norm": 1.4680938878503995, - "learning_rate": 2.702874669525177e-06, - "loss": 1.0647, - "step": 3361 - }, - { - "epoch": 0.4042565983286238, - "grad_norm": 1.782828117121666, - "learning_rate": 2.7021453340884394e-06, - "loss": 0.9205, - "step": 3362 - }, - { - "epoch": 0.40437684121926293, - "grad_norm": 3.620708292801968, - "learning_rate": 2.7014158921368125e-06, - "loss": 0.9545, - "step": 3363 - }, - { - "epoch": 0.404497084109902, - "grad_norm": 2.217558419921369, - "learning_rate": 2.700686343780953e-06, - "loss": 1.0835, - "step": 3364 - }, - { - "epoch": 0.4046173270005411, - "grad_norm": 1.6755871782655098, - "learning_rate": 2.699956689131532e-06, - "loss": 1.1151, - "step": 3365 - }, - { - "epoch": 0.4047375698911802, - "grad_norm": 1.84262302676352, - "learning_rate": 2.699226928299238e-06, - "loss": 1.0758, - "step": 3366 - }, - { - "epoch": 0.40485781278181926, - "grad_norm": 2.014897059799973, - "learning_rate": 2.698497061394774e-06, - "loss": 1.0172, - "step": 3367 - }, - { - "epoch": 0.40497805567245837, - "grad_norm": 1.457885328233857, - "learning_rate": 2.6977670885288627e-06, - "loss": 1.0307, - "step": 3368 - }, - { - "epoch": 0.4050982985630975, - "grad_norm": 1.8427947204363515, - "learning_rate": 2.6970370098122378e-06, - "loss": 0.9823, - "step": 3369 - }, - { - "epoch": 0.40521854145373654, - "grad_norm": 1.3877668261640983, - "learning_rate": 2.6963068253556535e-06, - "loss": 1.0839, - "step": 3370 - }, - { - "epoch": 0.40533878434437565, - "grad_norm": 2.733959493402593, - "learning_rate": 2.6955765352698763e-06, - "loss": 1.076, - "step": 3371 - }, - { - "epoch": 0.40545902723501476, - "grad_norm": 1.9105391479410805, - "learning_rate": 2.6948461396656923e-06, - "loss": 0.9636, - "step": 3372 - }, - { - "epoch": 0.4055792701256538, - "grad_norm": 2.512878340138584, - "learning_rate": 2.6941156386539013e-06, - "loss": 0.9736, - "step": 3373 - }, - { - "epoch": 0.4056995130162929, - "grad_norm": 1.844745819653656, - "learning_rate": 2.6933850323453203e-06, - "loss": 1.0371, - "step": 3374 - }, - { - "epoch": 0.405819755906932, - "grad_norm": 1.7307385649776523, - "learning_rate": 2.6926543208507806e-06, - "loss": 0.9735, - "step": 3375 - }, - { - "epoch": 0.4059399987975711, - "grad_norm": 3.908237287456886, - "learning_rate": 2.6919235042811316e-06, - "loss": 1.0324, - "step": 3376 - }, - { - "epoch": 0.4060602416882102, - "grad_norm": 2.1285661058185625, - "learning_rate": 2.691192582747237e-06, - "loss": 0.9846, - "step": 3377 - }, - { - "epoch": 0.40618048457884925, - "grad_norm": 1.7098447192667867, - "learning_rate": 2.6904615563599765e-06, - "loss": 0.9591, - "step": 3378 - }, - { - "epoch": 0.40630072746948837, - "grad_norm": 1.8401255138752288, - "learning_rate": 2.6897304252302477e-06, - "loss": 1.0605, - "step": 3379 - }, - { - "epoch": 0.4064209703601275, - "grad_norm": 0.827122603444246, - "learning_rate": 2.688999189468962e-06, - "loss": 0.8006, - "step": 3380 - }, - { - "epoch": 0.40654121325076653, - "grad_norm": 2.2302251305289826, - "learning_rate": 2.6882678491870464e-06, - "loss": 0.9809, - "step": 3381 - }, - { - "epoch": 0.40666145614140564, - "grad_norm": 1.7228500706980066, - "learning_rate": 2.6875364044954453e-06, - "loss": 0.9382, - "step": 3382 - }, - { - "epoch": 0.40678169903204475, - "grad_norm": 1.7132324434452664, - "learning_rate": 2.6868048555051185e-06, - "loss": 1.0475, - "step": 3383 - }, - { - "epoch": 0.4069019419226838, - "grad_norm": 21.885217340412897, - "learning_rate": 2.686073202327041e-06, - "loss": 1.0898, - "step": 3384 - }, - { - "epoch": 0.4070221848133229, - "grad_norm": 1.4857981234386761, - "learning_rate": 2.6853414450722043e-06, - "loss": 0.9633, - "step": 3385 - }, - { - "epoch": 0.40714242770396203, - "grad_norm": 2.0592129262958636, - "learning_rate": 2.684609583851616e-06, - "loss": 1.0759, - "step": 3386 - }, - { - "epoch": 0.4072626705946011, - "grad_norm": 1.693433784161194, - "learning_rate": 2.683877618776297e-06, - "loss": 1.0333, - "step": 3387 - }, - { - "epoch": 0.4073829134852402, - "grad_norm": 2.20431659479428, - "learning_rate": 2.6831455499572876e-06, - "loss": 0.9682, - "step": 3388 - }, - { - "epoch": 0.40750315637587925, - "grad_norm": 1.7943317493420154, - "learning_rate": 2.682413377505641e-06, - "loss": 1.007, - "step": 3389 - }, - { - "epoch": 0.40762339926651836, - "grad_norm": 2.425932899440269, - "learning_rate": 2.6816811015324284e-06, - "loss": 0.9928, - "step": 3390 - }, - { - "epoch": 0.40774364215715747, - "grad_norm": 0.7936959150616058, - "learning_rate": 2.6809487221487343e-06, - "loss": 0.8513, - "step": 3391 - }, - { - "epoch": 0.4078638850477965, - "grad_norm": 4.911914252772224, - "learning_rate": 2.6802162394656605e-06, - "loss": 1.0515, - "step": 3392 - }, - { - "epoch": 0.40798412793843564, - "grad_norm": 1.7268538423416322, - "learning_rate": 2.679483653594324e-06, - "loss": 0.9508, - "step": 3393 - }, - { - "epoch": 0.40810437082907475, - "grad_norm": 2.4072874730820706, - "learning_rate": 2.678750964645857e-06, - "loss": 0.9825, - "step": 3394 - }, - { - "epoch": 0.4082246137197138, - "grad_norm": 2.2909875268017994, - "learning_rate": 2.6780181727314094e-06, - "loss": 1.0736, - "step": 3395 - }, - { - "epoch": 0.4083448566103529, - "grad_norm": 1.6339988961373644, - "learning_rate": 2.6772852779621435e-06, - "loss": 1.0034, - "step": 3396 - }, - { - "epoch": 0.408465099500992, - "grad_norm": 10.505075953489067, - "learning_rate": 2.676552280449239e-06, - "loss": 1.0765, - "step": 3397 - }, - { - "epoch": 0.4085853423916311, - "grad_norm": 2.2767473490071177, - "learning_rate": 2.6758191803038917e-06, - "loss": 0.9753, - "step": 3398 - }, - { - "epoch": 0.4087055852822702, - "grad_norm": 1.5320082363674183, - "learning_rate": 2.6750859776373125e-06, - "loss": 1.061, - "step": 3399 - }, - { - "epoch": 0.4088258281729093, - "grad_norm": 0.7719282587495997, - "learning_rate": 2.674352672560727e-06, - "loss": 0.8485, - "step": 3400 - }, - { - "epoch": 0.40894607106354836, - "grad_norm": 1.8899676859076253, - "learning_rate": 2.673619265185377e-06, - "loss": 1.0039, - "step": 3401 - }, - { - "epoch": 0.40906631395418747, - "grad_norm": 1.5010836978121922, - "learning_rate": 2.672885755622521e-06, - "loss": 1.0, - "step": 3402 - }, - { - "epoch": 0.4091865568448266, - "grad_norm": 2.0286377762588383, - "learning_rate": 2.67215214398343e-06, - "loss": 0.9312, - "step": 3403 - }, - { - "epoch": 0.40930679973546563, - "grad_norm": 2.020916756812083, - "learning_rate": 2.671418430379393e-06, - "loss": 0.9997, - "step": 3404 - }, - { - "epoch": 0.40942704262610474, - "grad_norm": 1.7326548526141476, - "learning_rate": 2.670684614921715e-06, - "loss": 1.0577, - "step": 3405 - }, - { - "epoch": 0.4095472855167438, - "grad_norm": 2.341212717585302, - "learning_rate": 2.6699506977217128e-06, - "loss": 0.9214, - "step": 3406 - }, - { - "epoch": 0.4096675284073829, - "grad_norm": 1.970051410991775, - "learning_rate": 2.6692166788907233e-06, - "loss": 0.9254, - "step": 3407 - }, - { - "epoch": 0.409787771298022, - "grad_norm": 1.8949198845306556, - "learning_rate": 2.6684825585400957e-06, - "loss": 0.9952, - "step": 3408 - }, - { - "epoch": 0.4099080141886611, - "grad_norm": 0.882094268304591, - "learning_rate": 2.6677483367811947e-06, - "loss": 0.951, - "step": 3409 - }, - { - "epoch": 0.4100282570793002, - "grad_norm": 3.0103870817180476, - "learning_rate": 2.6670140137254028e-06, - "loss": 0.9862, - "step": 3410 - }, - { - "epoch": 0.4101484999699393, - "grad_norm": 2.2197768293362525, - "learning_rate": 2.666279589484115e-06, - "loss": 1.1055, - "step": 3411 - }, - { - "epoch": 0.41026874286057835, - "grad_norm": 1.9765712451364408, - "learning_rate": 2.6655450641687435e-06, - "loss": 1.034, - "step": 3412 - }, - { - "epoch": 0.41038898575121746, - "grad_norm": 1.517767424296012, - "learning_rate": 2.664810437890715e-06, - "loss": 0.9171, - "step": 3413 - }, - { - "epoch": 0.41050922864185657, - "grad_norm": 1.7522021470700169, - "learning_rate": 2.6640757107614714e-06, - "loss": 1.0285, - "step": 3414 - }, - { - "epoch": 0.4106294715324956, - "grad_norm": 3.7991036361772927, - "learning_rate": 2.6633408828924697e-06, - "loss": 0.9338, - "step": 3415 - }, - { - "epoch": 0.41074971442313474, - "grad_norm": 1.6293421466238363, - "learning_rate": 2.662605954395185e-06, - "loss": 0.9291, - "step": 3416 - }, - { - "epoch": 0.41086995731377385, - "grad_norm": 2.088979267753396, - "learning_rate": 2.6618709253811027e-06, - "loss": 1.0663, - "step": 3417 - }, - { - "epoch": 0.4109902002044129, - "grad_norm": 1.5265446204053057, - "learning_rate": 2.6611357959617277e-06, - "loss": 1.1071, - "step": 3418 - }, - { - "epoch": 0.411110443095052, - "grad_norm": 21.731044884394723, - "learning_rate": 2.660400566248578e-06, - "loss": 1.137, - "step": 3419 - }, - { - "epoch": 0.41123068598569107, - "grad_norm": 4.0222699591635624, - "learning_rate": 2.6596652363531876e-06, - "loss": 0.9024, - "step": 3420 - }, - { - "epoch": 0.4113509288763302, - "grad_norm": 1.8971369177594832, - "learning_rate": 2.6589298063871055e-06, - "loss": 1.0101, - "step": 3421 - }, - { - "epoch": 0.4114711717669693, - "grad_norm": 1.8137241701880393, - "learning_rate": 2.658194276461895e-06, - "loss": 0.9352, - "step": 3422 - }, - { - "epoch": 0.41159141465760835, - "grad_norm": 1.705301206836912, - "learning_rate": 2.6574586466891368e-06, - "loss": 0.9004, - "step": 3423 - }, - { - "epoch": 0.41171165754824746, - "grad_norm": 1.7661344080287038, - "learning_rate": 2.6567229171804247e-06, - "loss": 0.8788, - "step": 3424 - }, - { - "epoch": 0.41183190043888657, - "grad_norm": 2.5333669781542105, - "learning_rate": 2.655987088047368e-06, - "loss": 1.0989, - "step": 3425 - }, - { - "epoch": 0.4119521433295256, - "grad_norm": 10.556268964607003, - "learning_rate": 2.6552511594015912e-06, - "loss": 1.0104, - "step": 3426 - }, - { - "epoch": 0.41207238622016473, - "grad_norm": 2.4279001414143226, - "learning_rate": 2.654515131354735e-06, - "loss": 1.0801, - "step": 3427 - }, - { - "epoch": 0.41219262911080384, - "grad_norm": 2.152472462760417, - "learning_rate": 2.653779004018453e-06, - "loss": 1.0868, - "step": 3428 - }, - { - "epoch": 0.4123128720014429, - "grad_norm": 1.8956485560431724, - "learning_rate": 2.653042777504417e-06, - "loss": 1.0434, - "step": 3429 - }, - { - "epoch": 0.412433114892082, - "grad_norm": 1.7082738478313628, - "learning_rate": 2.6523064519243105e-06, - "loss": 1.0244, - "step": 3430 - }, - { - "epoch": 0.4125533577827211, - "grad_norm": 2.2932266012387235, - "learning_rate": 2.6515700273898333e-06, - "loss": 1.0208, - "step": 3431 - }, - { - "epoch": 0.4126736006733602, - "grad_norm": 1.8451650975697076, - "learning_rate": 2.6508335040127018e-06, - "loss": 0.9163, - "step": 3432 - }, - { - "epoch": 0.4127938435639993, - "grad_norm": 2.1040575609782324, - "learning_rate": 2.6500968819046446e-06, - "loss": 1.0051, - "step": 3433 - }, - { - "epoch": 0.4129140864546384, - "grad_norm": 3.1253195027098517, - "learning_rate": 2.649360161177408e-06, - "loss": 0.8255, - "step": 3434 - }, - { - "epoch": 0.41303432934527745, - "grad_norm": 1.527126130896046, - "learning_rate": 2.6486233419427504e-06, - "loss": 0.9631, - "step": 3435 - }, - { - "epoch": 0.41315457223591656, - "grad_norm": 3.4370157074068524, - "learning_rate": 2.6478864243124484e-06, - "loss": 0.9856, - "step": 3436 - }, - { - "epoch": 0.4132748151265556, - "grad_norm": 2.1276531486211288, - "learning_rate": 2.6471494083982903e-06, - "loss": 1.0804, - "step": 3437 - }, - { - "epoch": 0.4133950580171947, - "grad_norm": 1.7377186068225179, - "learning_rate": 2.6464122943120818e-06, - "loss": 0.9784, - "step": 3438 - }, - { - "epoch": 0.41351530090783384, - "grad_norm": 2.6918738502941064, - "learning_rate": 2.645675082165642e-06, - "loss": 1.0552, - "step": 3439 - }, - { - "epoch": 0.4136355437984729, - "grad_norm": 2.477212814484672, - "learning_rate": 2.644937772070806e-06, - "loss": 0.9841, - "step": 3440 - }, - { - "epoch": 0.413755786689112, - "grad_norm": 2.7283084383857776, - "learning_rate": 2.6442003641394225e-06, - "loss": 1.0702, - "step": 3441 - }, - { - "epoch": 0.4138760295797511, - "grad_norm": 1.4113118094465433, - "learning_rate": 2.643462858483356e-06, - "loss": 1.0647, - "step": 3442 - }, - { - "epoch": 0.41399627247039017, - "grad_norm": 2.1427156583308142, - "learning_rate": 2.6427252552144856e-06, - "loss": 0.9637, - "step": 3443 - }, - { - "epoch": 0.4141165153610293, - "grad_norm": 1.8302506698317462, - "learning_rate": 2.6419875544447044e-06, - "loss": 0.9792, - "step": 3444 - }, - { - "epoch": 0.4142367582516684, - "grad_norm": 1.489784022685798, - "learning_rate": 2.6412497562859218e-06, - "loss": 0.9459, - "step": 3445 - }, - { - "epoch": 0.41435700114230745, - "grad_norm": 2.6740310625370616, - "learning_rate": 2.6405118608500617e-06, - "loss": 0.9851, - "step": 3446 - }, - { - "epoch": 0.41447724403294656, - "grad_norm": 1.8252736866198709, - "learning_rate": 2.6397738682490613e-06, - "loss": 1.0308, - "step": 3447 - }, - { - "epoch": 0.41459748692358567, - "grad_norm": 1.7463476350614562, - "learning_rate": 2.6390357785948734e-06, - "loss": 0.9836, - "step": 3448 - }, - { - "epoch": 0.4147177298142247, - "grad_norm": 1.6757959040643515, - "learning_rate": 2.6382975919994667e-06, - "loss": 1.0316, - "step": 3449 - }, - { - "epoch": 0.41483797270486383, - "grad_norm": 1.4723196277344666, - "learning_rate": 2.637559308574822e-06, - "loss": 0.9512, - "step": 3450 - }, - { - "epoch": 0.4149582155955029, - "grad_norm": 1.9194712920212051, - "learning_rate": 2.6368209284329376e-06, - "loss": 0.951, - "step": 3451 - }, - { - "epoch": 0.415078458486142, - "grad_norm": 2.009522300055455, - "learning_rate": 2.636082451685825e-06, - "loss": 0.9885, - "step": 3452 - }, - { - "epoch": 0.4151987013767811, - "grad_norm": 1.4309709968320161, - "learning_rate": 2.6353438784455094e-06, - "loss": 1.0901, - "step": 3453 - }, - { - "epoch": 0.41531894426742016, - "grad_norm": 2.333031316920633, - "learning_rate": 2.6346052088240326e-06, - "loss": 0.9336, - "step": 3454 - }, - { - "epoch": 0.4154391871580593, - "grad_norm": 1.9453724311880776, - "learning_rate": 2.63386644293345e-06, - "loss": 1.0048, - "step": 3455 - }, - { - "epoch": 0.4155594300486984, - "grad_norm": 2.069463476595597, - "learning_rate": 2.633127580885833e-06, - "loss": 1.0647, - "step": 3456 - }, - { - "epoch": 0.41567967293933744, - "grad_norm": 2.340289396665302, - "learning_rate": 2.632388622793265e-06, - "loss": 0.8783, - "step": 3457 - }, - { - "epoch": 0.41579991582997655, - "grad_norm": 1.5803338634879245, - "learning_rate": 2.6316495687678457e-06, - "loss": 0.914, - "step": 3458 - }, - { - "epoch": 0.41592015872061566, - "grad_norm": 2.832485483507767, - "learning_rate": 2.6309104189216887e-06, - "loss": 0.9925, - "step": 3459 - }, - { - "epoch": 0.4160404016112547, - "grad_norm": 2.429449726012441, - "learning_rate": 2.630171173366923e-06, - "loss": 0.9744, - "step": 3460 - }, - { - "epoch": 0.41616064450189383, - "grad_norm": 2.6361050718144745, - "learning_rate": 2.629431832215691e-06, - "loss": 0.9731, - "step": 3461 - }, - { - "epoch": 0.41628088739253294, - "grad_norm": 4.719265334934419, - "learning_rate": 2.628692395580151e-06, - "loss": 1.1022, - "step": 3462 - }, - { - "epoch": 0.416401130283172, - "grad_norm": 2.9033853502427807, - "learning_rate": 2.6279528635724747e-06, - "loss": 1.0212, - "step": 3463 - }, - { - "epoch": 0.4165213731738111, - "grad_norm": 2.2087963506544805, - "learning_rate": 2.627213236304848e-06, - "loss": 1.0106, - "step": 3464 - }, - { - "epoch": 0.4166416160644502, - "grad_norm": 2.1553165792720024, - "learning_rate": 2.626473513889472e-06, - "loss": 0.934, - "step": 3465 - }, - { - "epoch": 0.41676185895508927, - "grad_norm": 1.8327973478167363, - "learning_rate": 2.625733696438562e-06, - "loss": 1.0511, - "step": 3466 - }, - { - "epoch": 0.4168821018457284, - "grad_norm": 1.9559190220286837, - "learning_rate": 2.6249937840643476e-06, - "loss": 0.9832, - "step": 3467 - }, - { - "epoch": 0.41700234473636744, - "grad_norm": 1.860372131714535, - "learning_rate": 2.6242537768790733e-06, - "loss": 0.9004, - "step": 3468 - }, - { - "epoch": 0.41712258762700655, - "grad_norm": 2.1289755713699465, - "learning_rate": 2.6235136749949975e-06, - "loss": 0.9153, - "step": 3469 - }, - { - "epoch": 0.41724283051764566, - "grad_norm": 2.0438250466663272, - "learning_rate": 2.6227734785243924e-06, - "loss": 0.842, - "step": 3470 - }, - { - "epoch": 0.4173630734082847, - "grad_norm": 1.988509249402146, - "learning_rate": 2.6220331875795466e-06, - "loss": 1.016, - "step": 3471 - }, - { - "epoch": 0.4174833162989238, - "grad_norm": 1.8248101854246621, - "learning_rate": 2.62129280227276e-06, - "loss": 0.9841, - "step": 3472 - }, - { - "epoch": 0.41760355918956293, - "grad_norm": 1.8303153455261756, - "learning_rate": 2.62055232271635e-06, - "loss": 0.9142, - "step": 3473 - }, - { - "epoch": 0.417723802080202, - "grad_norm": 1.9941641183154855, - "learning_rate": 2.619811749022646e-06, - "loss": 1.1131, - "step": 3474 - }, - { - "epoch": 0.4178440449708411, - "grad_norm": 2.5116042708772355, - "learning_rate": 2.6190710813039917e-06, - "loss": 0.9424, - "step": 3475 - }, - { - "epoch": 0.4179642878614802, - "grad_norm": 2.4845057039247207, - "learning_rate": 2.618330319672747e-06, - "loss": 1.064, - "step": 3476 - }, - { - "epoch": 0.41808453075211927, - "grad_norm": 1.9373165166789652, - "learning_rate": 2.617589464241284e-06, - "loss": 1.1443, - "step": 3477 - }, - { - "epoch": 0.4182047736427584, - "grad_norm": 1.8734635733857283, - "learning_rate": 2.6168485151219914e-06, - "loss": 0.9667, - "step": 3478 - }, - { - "epoch": 0.4183250165333975, - "grad_norm": 2.8567380993108555, - "learning_rate": 2.616107472427269e-06, - "loss": 0.9361, - "step": 3479 - }, - { - "epoch": 0.41844525942403654, - "grad_norm": 3.4131448939829343, - "learning_rate": 2.615366336269533e-06, - "loss": 0.9975, - "step": 3480 - }, - { - "epoch": 0.41856550231467565, - "grad_norm": 2.557233282744836, - "learning_rate": 2.6146251067612126e-06, - "loss": 1.0227, - "step": 3481 - }, - { - "epoch": 0.41868574520531476, - "grad_norm": 1.6401074274280447, - "learning_rate": 2.6138837840147525e-06, - "loss": 1.0431, - "step": 3482 - }, - { - "epoch": 0.4188059880959538, - "grad_norm": 2.7379542004451705, - "learning_rate": 2.6131423681426103e-06, - "loss": 1.0118, - "step": 3483 - }, - { - "epoch": 0.41892623098659293, - "grad_norm": 1.6954719714562232, - "learning_rate": 2.6124008592572587e-06, - "loss": 0.9588, - "step": 3484 - }, - { - "epoch": 0.419046473877232, - "grad_norm": 2.5443256935808884, - "learning_rate": 2.6116592574711835e-06, - "loss": 1.0422, - "step": 3485 - }, - { - "epoch": 0.4191667167678711, - "grad_norm": 1.9884882071655896, - "learning_rate": 2.6109175628968853e-06, - "loss": 1.0661, - "step": 3486 - }, - { - "epoch": 0.4192869596585102, - "grad_norm": 1.827770492219689, - "learning_rate": 2.610175775646878e-06, - "loss": 1.0603, - "step": 3487 - }, - { - "epoch": 0.41940720254914926, - "grad_norm": 1.8445858757852296, - "learning_rate": 2.6094338958336907e-06, - "loss": 0.9734, - "step": 3488 - }, - { - "epoch": 0.41952744543978837, - "grad_norm": 1.795921470739155, - "learning_rate": 2.608691923569867e-06, - "loss": 1.0462, - "step": 3489 - }, - { - "epoch": 0.4196476883304275, - "grad_norm": 1.5536919094529364, - "learning_rate": 2.6079498589679616e-06, - "loss": 0.9894, - "step": 3490 - }, - { - "epoch": 0.41976793122106654, - "grad_norm": 1.944960106487127, - "learning_rate": 2.6072077021405465e-06, - "loss": 0.9909, - "step": 3491 - }, - { - "epoch": 0.41988817411170565, - "grad_norm": 1.5925420035148161, - "learning_rate": 2.6064654532002054e-06, - "loss": 0.9237, - "step": 3492 - }, - { - "epoch": 0.42000841700234476, - "grad_norm": 1.4625527331634698, - "learning_rate": 2.6057231122595375e-06, - "loss": 0.9838, - "step": 3493 - }, - { - "epoch": 0.4201286598929838, - "grad_norm": 1.5672957256520987, - "learning_rate": 2.604980679431154e-06, - "loss": 0.9589, - "step": 3494 - }, - { - "epoch": 0.4202489027836229, - "grad_norm": 2.643035691169327, - "learning_rate": 2.604238154827684e-06, - "loss": 0.9796, - "step": 3495 - }, - { - "epoch": 0.42036914567426203, - "grad_norm": 1.743053409259314, - "learning_rate": 2.6034955385617656e-06, - "loss": 0.9612, - "step": 3496 - }, - { - "epoch": 0.4204893885649011, - "grad_norm": 0.7522899715418924, - "learning_rate": 2.6027528307460544e-06, - "loss": 0.8949, - "step": 3497 - }, - { - "epoch": 0.4206096314555402, - "grad_norm": 1.7494441239471212, - "learning_rate": 2.602010031493217e-06, - "loss": 1.0867, - "step": 3498 - }, - { - "epoch": 0.42072987434617926, - "grad_norm": 2.45601372201174, - "learning_rate": 2.6012671409159367e-06, - "loss": 1.0941, - "step": 3499 - }, - { - "epoch": 0.42085011723681837, - "grad_norm": 1.9305350724131538, - "learning_rate": 2.6005241591269097e-06, - "loss": 1.0464, - "step": 3500 - }, - { - "epoch": 0.4209703601274575, - "grad_norm": 2.0757926529990005, - "learning_rate": 2.5997810862388454e-06, - "loss": 1.034, - "step": 3501 - }, - { - "epoch": 0.42109060301809653, - "grad_norm": 2.7024635872871245, - "learning_rate": 2.599037922364467e-06, - "loss": 0.9859, - "step": 3502 - }, - { - "epoch": 0.42121084590873564, - "grad_norm": 1.9539793663575369, - "learning_rate": 2.5982946676165112e-06, - "loss": 0.9815, - "step": 3503 - }, - { - "epoch": 0.42133108879937475, - "grad_norm": 0.7654332290571376, - "learning_rate": 2.5975513221077313e-06, - "loss": 0.8352, - "step": 3504 - }, - { - "epoch": 0.4214513316900138, - "grad_norm": 2.1569000591103507, - "learning_rate": 2.5968078859508897e-06, - "loss": 1.1123, - "step": 3505 - }, - { - "epoch": 0.4215715745806529, - "grad_norm": 1.8043464899932407, - "learning_rate": 2.5960643592587673e-06, - "loss": 1.0224, - "step": 3506 - }, - { - "epoch": 0.42169181747129203, - "grad_norm": 2.0381832638776327, - "learning_rate": 2.5953207421441553e-06, - "loss": 1.0504, - "step": 3507 - }, - { - "epoch": 0.4218120603619311, - "grad_norm": 2.666835650414555, - "learning_rate": 2.5945770347198603e-06, - "loss": 0.9785, - "step": 3508 - }, - { - "epoch": 0.4219323032525702, - "grad_norm": 1.9312047514815138, - "learning_rate": 2.593833237098701e-06, - "loss": 1.0613, - "step": 3509 - }, - { - "epoch": 0.4220525461432093, - "grad_norm": 1.8677830723880295, - "learning_rate": 2.593089349393512e-06, - "loss": 0.8558, - "step": 3510 - }, - { - "epoch": 0.42217278903384836, - "grad_norm": 1.7240583116135813, - "learning_rate": 2.592345371717141e-06, - "loss": 1.0753, - "step": 3511 - }, - { - "epoch": 0.42229303192448747, - "grad_norm": 2.0163686400744734, - "learning_rate": 2.591601304182448e-06, - "loss": 0.9324, - "step": 3512 - }, - { - "epoch": 0.4224132748151266, - "grad_norm": 1.667003915362231, - "learning_rate": 2.5908571469023067e-06, - "loss": 1.0166, - "step": 3513 - }, - { - "epoch": 0.42253351770576564, - "grad_norm": 5.03659569738053, - "learning_rate": 2.5901128999896067e-06, - "loss": 0.9879, - "step": 3514 - }, - { - "epoch": 0.42265376059640475, - "grad_norm": 1.5562837926420607, - "learning_rate": 2.5893685635572487e-06, - "loss": 0.9165, - "step": 3515 - }, - { - "epoch": 0.4227740034870438, - "grad_norm": 2.0874091397001946, - "learning_rate": 2.5886241377181483e-06, - "loss": 0.9253, - "step": 3516 - }, - { - "epoch": 0.4228942463776829, - "grad_norm": 1.723821475156418, - "learning_rate": 2.587879622585234e-06, - "loss": 1.0369, - "step": 3517 - }, - { - "epoch": 0.423014489268322, - "grad_norm": 2.010912687419765, - "learning_rate": 2.5871350182714486e-06, - "loss": 0.9939, - "step": 3518 - }, - { - "epoch": 0.4231347321589611, - "grad_norm": 2.4168137363100297, - "learning_rate": 2.586390324889748e-06, - "loss": 1.0275, - "step": 3519 - }, - { - "epoch": 0.4232549750496002, - "grad_norm": 3.576736380640413, - "learning_rate": 2.5856455425531003e-06, - "loss": 0.8964, - "step": 3520 - }, - { - "epoch": 0.4233752179402393, - "grad_norm": 2.2243257765483198, - "learning_rate": 2.5849006713744902e-06, - "loss": 1.0384, - "step": 3521 - }, - { - "epoch": 0.42349546083087836, - "grad_norm": 2.559342742083181, - "learning_rate": 2.5841557114669135e-06, - "loss": 0.9553, - "step": 3522 - }, - { - "epoch": 0.42361570372151747, - "grad_norm": 2.731298492489996, - "learning_rate": 2.58341066294338e-06, - "loss": 0.8977, - "step": 3523 - }, - { - "epoch": 0.4237359466121566, - "grad_norm": 2.0131349782095396, - "learning_rate": 2.5826655259169124e-06, - "loss": 1.0848, - "step": 3524 - }, - { - "epoch": 0.42385618950279563, - "grad_norm": 1.6894378837824076, - "learning_rate": 2.5819203005005475e-06, - "loss": 1.1278, - "step": 3525 - }, - { - "epoch": 0.42397643239343474, - "grad_norm": 1.862537611511194, - "learning_rate": 2.581174986807336e-06, - "loss": 1.0153, - "step": 3526 - }, - { - "epoch": 0.42409667528407385, - "grad_norm": 2.397460440268533, - "learning_rate": 2.580429584950341e-06, - "loss": 1.1391, - "step": 3527 - }, - { - "epoch": 0.4242169181747129, - "grad_norm": 2.0803769179105385, - "learning_rate": 2.5796840950426397e-06, - "loss": 0.8889, - "step": 3528 - }, - { - "epoch": 0.424337161065352, - "grad_norm": 1.870680283514335, - "learning_rate": 2.578938517197322e-06, - "loss": 0.8848, - "step": 3529 - }, - { - "epoch": 0.4244574039559911, - "grad_norm": 2.0757286768620578, - "learning_rate": 2.5781928515274916e-06, - "loss": 0.8529, - "step": 3530 - }, - { - "epoch": 0.4245776468466302, - "grad_norm": 2.330685020895354, - "learning_rate": 2.577447098146265e-06, - "loss": 0.9128, - "step": 3531 - }, - { - "epoch": 0.4246978897372693, - "grad_norm": 1.7220751415875921, - "learning_rate": 2.5767012571667724e-06, - "loss": 1.0151, - "step": 3532 - }, - { - "epoch": 0.42481813262790835, - "grad_norm": 1.7640160850993905, - "learning_rate": 2.5759553287021587e-06, - "loss": 0.9154, - "step": 3533 - }, - { - "epoch": 0.42493837551854746, - "grad_norm": 1.7022076157105608, - "learning_rate": 2.5752093128655786e-06, - "loss": 1.0009, - "step": 3534 - }, - { - "epoch": 0.4250586184091866, - "grad_norm": 1.7281536520709075, - "learning_rate": 2.574463209770204e-06, - "loss": 0.967, - "step": 3535 - }, - { - "epoch": 0.42517886129982563, - "grad_norm": 1.68161631000587, - "learning_rate": 2.5737170195292165e-06, - "loss": 1.0207, - "step": 3536 - }, - { - "epoch": 0.42529910419046474, - "grad_norm": 1.9169517042536675, - "learning_rate": 2.572970742255814e-06, - "loss": 1.0081, - "step": 3537 - }, - { - "epoch": 0.42541934708110385, - "grad_norm": 3.422296785461594, - "learning_rate": 2.5722243780632046e-06, - "loss": 1.0447, - "step": 3538 - }, - { - "epoch": 0.4255395899717429, - "grad_norm": 1.138195324170032, - "learning_rate": 2.5714779270646125e-06, - "loss": 0.8896, - "step": 3539 - }, - { - "epoch": 0.425659832862382, - "grad_norm": 2.3099216831026697, - "learning_rate": 2.5707313893732735e-06, - "loss": 0.9934, - "step": 3540 - }, - { - "epoch": 0.4257800757530211, - "grad_norm": 2.0729641038332645, - "learning_rate": 2.5699847651024364e-06, - "loss": 0.9955, - "step": 3541 - }, - { - "epoch": 0.4259003186436602, - "grad_norm": 1.904914012580281, - "learning_rate": 2.5692380543653627e-06, - "loss": 0.9999, - "step": 3542 - }, - { - "epoch": 0.4260205615342993, - "grad_norm": 1.791408150526753, - "learning_rate": 2.5684912572753293e-06, - "loss": 0.93, - "step": 3543 - }, - { - "epoch": 0.4261408044249384, - "grad_norm": 1.7212387792682797, - "learning_rate": 2.5677443739456245e-06, - "loss": 1.0757, - "step": 3544 - }, - { - "epoch": 0.42626104731557746, - "grad_norm": 2.1432563591518683, - "learning_rate": 2.5669974044895495e-06, - "loss": 1.0236, - "step": 3545 - }, - { - "epoch": 0.42638129020621657, - "grad_norm": 2.152402236614614, - "learning_rate": 2.5662503490204187e-06, - "loss": 1.0146, - "step": 3546 - }, - { - "epoch": 0.4265015330968556, - "grad_norm": 2.044020198027967, - "learning_rate": 2.5655032076515603e-06, - "loss": 0.9929, - "step": 3547 - }, - { - "epoch": 0.42662177598749473, - "grad_norm": 2.0914333679470296, - "learning_rate": 2.5647559804963155e-06, - "loss": 1.0456, - "step": 3548 - }, - { - "epoch": 0.42674201887813384, - "grad_norm": 1.962722634485855, - "learning_rate": 2.5640086676680364e-06, - "loss": 1.0147, - "step": 3549 - }, - { - "epoch": 0.4268622617687729, - "grad_norm": 2.1896221221575645, - "learning_rate": 2.5632612692800923e-06, - "loss": 1.0412, - "step": 3550 - }, - { - "epoch": 0.426982504659412, - "grad_norm": 3.5151194569242956, - "learning_rate": 2.5625137854458603e-06, - "loss": 0.9756, - "step": 3551 - }, - { - "epoch": 0.4271027475500511, - "grad_norm": 2.305876034239118, - "learning_rate": 2.561766216278735e-06, - "loss": 1.0291, - "step": 3552 - }, - { - "epoch": 0.4272229904406902, - "grad_norm": 1.9902073013216204, - "learning_rate": 2.561018561892121e-06, - "loss": 1.0345, - "step": 3553 - }, - { - "epoch": 0.4273432333313293, - "grad_norm": 1.460568141654896, - "learning_rate": 2.5602708223994363e-06, - "loss": 0.9847, - "step": 3554 - }, - { - "epoch": 0.4274634762219684, - "grad_norm": 2.406329364210918, - "learning_rate": 2.559522997914115e-06, - "loss": 0.9077, - "step": 3555 - }, - { - "epoch": 0.42758371911260745, - "grad_norm": 1.9954613208162004, - "learning_rate": 2.558775088549599e-06, - "loss": 1.0767, - "step": 3556 - }, - { - "epoch": 0.42770396200324656, - "grad_norm": 2.5307465982841038, - "learning_rate": 2.5580270944193467e-06, - "loss": 0.8984, - "step": 3557 - }, - { - "epoch": 0.4278242048938857, - "grad_norm": 1.0571050557536092, - "learning_rate": 2.557279015636827e-06, - "loss": 0.8061, - "step": 3558 - }, - { - "epoch": 0.42794444778452473, - "grad_norm": 0.8525054330736579, - "learning_rate": 2.5565308523155245e-06, - "loss": 0.8899, - "step": 3559 - }, - { - "epoch": 0.42806469067516384, - "grad_norm": 2.2357935104017956, - "learning_rate": 2.5557826045689336e-06, - "loss": 1.0563, - "step": 3560 - }, - { - "epoch": 0.4281849335658029, - "grad_norm": 0.944409972072853, - "learning_rate": 2.5550342725105643e-06, - "loss": 0.8404, - "step": 3561 - }, - { - "epoch": 0.428305176456442, - "grad_norm": 1.86131326643466, - "learning_rate": 2.554285856253937e-06, - "loss": 1.0469, - "step": 3562 - }, - { - "epoch": 0.4284254193470811, - "grad_norm": 1.8971202031984982, - "learning_rate": 2.5535373559125855e-06, - "loss": 1.0001, - "step": 3563 - }, - { - "epoch": 0.42854566223772017, - "grad_norm": 1.538998830030169, - "learning_rate": 2.552788771600057e-06, - "loss": 1.0482, - "step": 3564 - }, - { - "epoch": 0.4286659051283593, - "grad_norm": 1.735826177315224, - "learning_rate": 2.5520401034299118e-06, - "loss": 1.0457, - "step": 3565 - }, - { - "epoch": 0.4287861480189984, - "grad_norm": 1.7367714486511614, - "learning_rate": 2.551291351515722e-06, - "loss": 1.097, - "step": 3566 - }, - { - "epoch": 0.42890639090963745, - "grad_norm": 1.5047961487963655, - "learning_rate": 2.5505425159710726e-06, - "loss": 1.0836, - "step": 3567 - }, - { - "epoch": 0.42902663380027656, - "grad_norm": 2.607367377122416, - "learning_rate": 2.549793596909561e-06, - "loss": 1.0606, - "step": 3568 - }, - { - "epoch": 0.42914687669091567, - "grad_norm": 2.260102585683876, - "learning_rate": 2.5490445944447976e-06, - "loss": 0.8985, - "step": 3569 - }, - { - "epoch": 0.4292671195815547, - "grad_norm": 1.8540744401375941, - "learning_rate": 2.548295508690406e-06, - "loss": 0.8888, - "step": 3570 - }, - { - "epoch": 0.42938736247219383, - "grad_norm": 3.8412816255349416, - "learning_rate": 2.5475463397600217e-06, - "loss": 0.9914, - "step": 3571 - }, - { - "epoch": 0.42950760536283294, - "grad_norm": 10.57989406020662, - "learning_rate": 2.546797087767293e-06, - "loss": 1.0059, - "step": 3572 - }, - { - "epoch": 0.429627848253472, - "grad_norm": 1.7660349817191854, - "learning_rate": 2.546047752825881e-06, - "loss": 1.1044, - "step": 3573 - }, - { - "epoch": 0.4297480911441111, - "grad_norm": 1.8767438408661758, - "learning_rate": 2.5452983350494595e-06, - "loss": 1.1661, - "step": 3574 - }, - { - "epoch": 0.4298683340347502, - "grad_norm": 2.5948607123300556, - "learning_rate": 2.544548834551713e-06, - "loss": 0.8873, - "step": 3575 - }, - { - "epoch": 0.4299885769253893, - "grad_norm": 2.1322919608598765, - "learning_rate": 2.5437992514463424e-06, - "loss": 1.1687, - "step": 3576 - }, - { - "epoch": 0.4301088198160284, - "grad_norm": 2.2208333509873417, - "learning_rate": 2.5430495858470565e-06, - "loss": 1.1086, - "step": 3577 - }, - { - "epoch": 0.43022906270666744, - "grad_norm": 1.934557680245702, - "learning_rate": 2.54229983786758e-06, - "loss": 1.0076, - "step": 3578 - }, - { - "epoch": 0.43034930559730655, - "grad_norm": 2.2816992539427967, - "learning_rate": 2.541550007621651e-06, - "loss": 1.0782, - "step": 3579 - }, - { - "epoch": 0.43046954848794566, - "grad_norm": 1.688154305659526, - "learning_rate": 2.5408000952230156e-06, - "loss": 1.0286, - "step": 3580 - }, - { - "epoch": 0.4305897913785847, - "grad_norm": 1.9875501326470124, - "learning_rate": 2.5400501007854357e-06, - "loss": 1.1242, - "step": 3581 - }, - { - "epoch": 0.43071003426922383, - "grad_norm": 1.9677261839084388, - "learning_rate": 2.539300024422685e-06, - "loss": 0.9906, - "step": 3582 - }, - { - "epoch": 0.43083027715986294, - "grad_norm": 0.799502547395428, - "learning_rate": 2.538549866248549e-06, - "loss": 0.8709, - "step": 3583 - }, - { - "epoch": 0.430950520050502, - "grad_norm": 5.711996791721159, - "learning_rate": 2.5377996263768274e-06, - "loss": 1.0449, - "step": 3584 - }, - { - "epoch": 0.4310707629411411, - "grad_norm": 1.6622442780199849, - "learning_rate": 2.5370493049213293e-06, - "loss": 0.9087, - "step": 3585 - }, - { - "epoch": 0.4311910058317802, - "grad_norm": 5.142789344492081, - "learning_rate": 2.536298901995878e-06, - "loss": 1.031, - "step": 3586 - }, - { - "epoch": 0.43131124872241927, - "grad_norm": 1.5322220500152641, - "learning_rate": 2.535548417714311e-06, - "loss": 1.0296, - "step": 3587 - }, - { - "epoch": 0.4314314916130584, - "grad_norm": 1.5196613167843396, - "learning_rate": 2.534797852190474e-06, - "loss": 1.0914, - "step": 3588 - }, - { - "epoch": 0.4315517345036975, - "grad_norm": 1.7995868288138552, - "learning_rate": 2.5340472055382283e-06, - "loss": 1.035, - "step": 3589 - }, - { - "epoch": 0.43167197739433655, - "grad_norm": 1.9920599803128882, - "learning_rate": 2.5332964778714468e-06, - "loss": 1.0389, - "step": 3590 - }, - { - "epoch": 0.43179222028497566, - "grad_norm": 2.0072303254501556, - "learning_rate": 2.5325456693040123e-06, - "loss": 0.8961, - "step": 3591 - }, - { - "epoch": 0.43191246317561477, - "grad_norm": 2.0947445457415714, - "learning_rate": 2.531794779949824e-06, - "loss": 0.986, - "step": 3592 - }, - { - "epoch": 0.4320327060662538, - "grad_norm": 2.3201946395584554, - "learning_rate": 2.5310438099227903e-06, - "loss": 1.1078, - "step": 3593 - }, - { - "epoch": 0.43215294895689293, - "grad_norm": 1.3444247658570578, - "learning_rate": 2.530292759336833e-06, - "loss": 0.7922, - "step": 3594 - }, - { - "epoch": 0.432273191847532, - "grad_norm": 2.263792572885006, - "learning_rate": 2.5295416283058855e-06, - "loss": 0.9287, - "step": 3595 - }, - { - "epoch": 0.4323934347381711, - "grad_norm": 2.749351078110616, - "learning_rate": 2.5287904169438943e-06, - "loss": 0.886, - "step": 3596 - }, - { - "epoch": 0.4325136776288102, - "grad_norm": 3.512320359818785, - "learning_rate": 2.528039125364817e-06, - "loss": 0.8711, - "step": 3597 - }, - { - "epoch": 0.43263392051944927, - "grad_norm": 1.861984089309125, - "learning_rate": 2.5272877536826246e-06, - "loss": 0.984, - "step": 3598 - }, - { - "epoch": 0.4327541634100884, - "grad_norm": 2.3497046548831286, - "learning_rate": 2.5265363020112986e-06, - "loss": 0.9298, - "step": 3599 - }, - { - "epoch": 0.4328744063007275, - "grad_norm": 1.7110571971810722, - "learning_rate": 2.5257847704648344e-06, - "loss": 1.0715, - "step": 3600 - }, - { - "epoch": 0.43299464919136654, - "grad_norm": 2.047249330260353, - "learning_rate": 2.525033159157239e-06, - "loss": 0.9927, - "step": 3601 - }, - { - "epoch": 0.43311489208200565, - "grad_norm": 2.246421935942183, - "learning_rate": 2.52428146820253e-06, - "loss": 1.0084, - "step": 3602 - }, - { - "epoch": 0.43323513497264476, - "grad_norm": 1.8577529499850352, - "learning_rate": 2.52352969771474e-06, - "loss": 1.0503, - "step": 3603 - }, - { - "epoch": 0.4333553778632838, - "grad_norm": 3.4955931576840933, - "learning_rate": 2.5227778478079106e-06, - "loss": 1.11, - "step": 3604 - }, - { - "epoch": 0.43347562075392293, - "grad_norm": 1.5159919530509018, - "learning_rate": 2.522025918596098e-06, - "loss": 0.9962, - "step": 3605 - }, - { - "epoch": 0.43359586364456204, - "grad_norm": 1.4091524054093074, - "learning_rate": 2.521273910193368e-06, - "loss": 0.8873, - "step": 3606 - }, - { - "epoch": 0.4337161065352011, - "grad_norm": 5.938042545627443, - "learning_rate": 2.5205218227138006e-06, - "loss": 1.106, - "step": 3607 - }, - { - "epoch": 0.4338363494258402, - "grad_norm": 2.074411042059224, - "learning_rate": 2.519769656271486e-06, - "loss": 1.0169, - "step": 3608 - }, - { - "epoch": 0.43395659231647926, - "grad_norm": 2.4514605469957216, - "learning_rate": 2.5190174109805285e-06, - "loss": 0.9141, - "step": 3609 - }, - { - "epoch": 0.43407683520711837, - "grad_norm": 1.8540651815122742, - "learning_rate": 2.518265086955042e-06, - "loss": 0.863, - "step": 3610 - }, - { - "epoch": 0.4341970780977575, - "grad_norm": 2.6351114531078745, - "learning_rate": 2.5175126843091534e-06, - "loss": 1.064, - "step": 3611 - }, - { - "epoch": 0.43431732098839654, - "grad_norm": 5.645069685337758, - "learning_rate": 2.5167602031570034e-06, - "loss": 0.9722, - "step": 3612 - }, - { - "epoch": 0.43443756387903565, - "grad_norm": 1.7056319657991104, - "learning_rate": 2.51600764361274e-06, - "loss": 0.9626, - "step": 3613 - }, - { - "epoch": 0.43455780676967476, - "grad_norm": 2.406679684805422, - "learning_rate": 2.5152550057905283e-06, - "loss": 1.0139, - "step": 3614 - }, - { - "epoch": 0.4346780496603138, - "grad_norm": 2.311770556076634, - "learning_rate": 2.5145022898045415e-06, - "loss": 0.989, - "step": 3615 - }, - { - "epoch": 0.4347982925509529, - "grad_norm": 3.6236731633135792, - "learning_rate": 2.5137494957689664e-06, - "loss": 1.1279, - "step": 3616 - }, - { - "epoch": 0.43491853544159204, - "grad_norm": 0.7500181593286007, - "learning_rate": 2.5129966237980016e-06, - "loss": 0.8309, - "step": 3617 - }, - { - "epoch": 0.4350387783322311, - "grad_norm": 1.9506273703177246, - "learning_rate": 2.512243674005857e-06, - "loss": 1.0097, - "step": 3618 - }, - { - "epoch": 0.4351590212228702, - "grad_norm": 1.7297688397008568, - "learning_rate": 2.5114906465067537e-06, - "loss": 1.0855, - "step": 3619 - }, - { - "epoch": 0.4352792641135093, - "grad_norm": 2.8366266259289747, - "learning_rate": 2.5107375414149264e-06, - "loss": 0.9921, - "step": 3620 - }, - { - "epoch": 0.43539950700414837, - "grad_norm": 2.2525448712315637, - "learning_rate": 2.5099843588446197e-06, - "loss": 0.9458, - "step": 3621 - }, - { - "epoch": 0.4355197498947875, - "grad_norm": 1.6525163132039797, - "learning_rate": 2.509231098910091e-06, - "loss": 0.8507, - "step": 3622 - }, - { - "epoch": 0.4356399927854266, - "grad_norm": 1.9674851729380838, - "learning_rate": 2.508477761725611e-06, - "loss": 0.9773, - "step": 3623 - }, - { - "epoch": 0.43576023567606564, - "grad_norm": 1.8989858581032464, - "learning_rate": 2.507724347405458e-06, - "loss": 1.038, - "step": 3624 - }, - { - "epoch": 0.43588047856670475, - "grad_norm": 1.7232121200039665, - "learning_rate": 2.5069708560639243e-06, - "loss": 1.0516, - "step": 3625 - }, - { - "epoch": 0.4360007214573438, - "grad_norm": 2.3355340003133915, - "learning_rate": 2.5062172878153158e-06, - "loss": 0.8395, - "step": 3626 - }, - { - "epoch": 0.4361209643479829, - "grad_norm": 2.479120708221243, - "learning_rate": 2.505463642773947e-06, - "loss": 1.1021, - "step": 3627 - }, - { - "epoch": 0.43624120723862203, - "grad_norm": 5.2142835680514885, - "learning_rate": 2.504709921054146e-06, - "loss": 0.9812, - "step": 3628 - }, - { - "epoch": 0.4363614501292611, - "grad_norm": 4.508300120195513, - "learning_rate": 2.50395612277025e-06, - "loss": 1.0719, - "step": 3629 - }, - { - "epoch": 0.4364816930199002, - "grad_norm": 1.9669059109508948, - "learning_rate": 2.503202248036612e-06, - "loss": 0.9599, - "step": 3630 - }, - { - "epoch": 0.4366019359105393, - "grad_norm": 1.915582668216319, - "learning_rate": 2.5024482969675927e-06, - "loss": 0.9633, - "step": 3631 - }, - { - "epoch": 0.43672217880117836, - "grad_norm": 2.0855642834937305, - "learning_rate": 2.501694269677566e-06, - "loss": 1.07, - "step": 3632 - }, - { - "epoch": 0.4368424216918175, - "grad_norm": 2.071829646608091, - "learning_rate": 2.500940166280918e-06, - "loss": 1.0382, - "step": 3633 - }, - { - "epoch": 0.4369626645824566, - "grad_norm": 1.8464434072631595, - "learning_rate": 2.500185986892045e-06, - "loss": 1.0188, - "step": 3634 - }, - { - "epoch": 0.43708290747309564, - "grad_norm": 2.500912785787517, - "learning_rate": 2.499431731625355e-06, - "loss": 0.9994, - "step": 3635 - }, - { - "epoch": 0.43720315036373475, - "grad_norm": 2.059682134899988, - "learning_rate": 2.4986774005952686e-06, - "loss": 1.0304, - "step": 3636 - }, - { - "epoch": 0.43732339325437386, - "grad_norm": 1.8639944698841102, - "learning_rate": 2.4979229939162166e-06, - "loss": 1.0715, - "step": 3637 - }, - { - "epoch": 0.4374436361450129, - "grad_norm": 1.540037514365899, - "learning_rate": 2.4971685117026433e-06, - "loss": 1.027, - "step": 3638 - }, - { - "epoch": 0.437563879035652, - "grad_norm": 1.7182114190785298, - "learning_rate": 2.4964139540690018e-06, - "loss": 1.0001, - "step": 3639 - }, - { - "epoch": 0.4376841219262911, - "grad_norm": 1.8485572344065935, - "learning_rate": 2.495659321129758e-06, - "loss": 0.9577, - "step": 3640 - }, - { - "epoch": 0.4378043648169302, - "grad_norm": 2.605865128090074, - "learning_rate": 2.494904612999389e-06, - "loss": 0.9858, - "step": 3641 - }, - { - "epoch": 0.4379246077075693, - "grad_norm": 0.841805513342061, - "learning_rate": 2.4941498297923843e-06, - "loss": 0.8384, - "step": 3642 - }, - { - "epoch": 0.43804485059820836, - "grad_norm": 1.7789811525252421, - "learning_rate": 2.4933949716232424e-06, - "loss": 0.931, - "step": 3643 - }, - { - "epoch": 0.43816509348884747, - "grad_norm": 2.39996597345391, - "learning_rate": 2.492640038606476e-06, - "loss": 0.9639, - "step": 3644 - }, - { - "epoch": 0.4382853363794866, - "grad_norm": 1.7905953183468601, - "learning_rate": 2.491885030856608e-06, - "loss": 1.0158, - "step": 3645 - }, - { - "epoch": 0.43840557927012563, - "grad_norm": 2.1594462271176145, - "learning_rate": 2.4911299484881713e-06, - "loss": 1.0563, - "step": 3646 - }, - { - "epoch": 0.43852582216076474, - "grad_norm": 1.8142842685406206, - "learning_rate": 2.490374791615712e-06, - "loss": 1.0401, - "step": 3647 - }, - { - "epoch": 0.43864606505140386, - "grad_norm": 5.674722256354718, - "learning_rate": 2.4896195603537867e-06, - "loss": 1.0137, - "step": 3648 - }, - { - "epoch": 0.4387663079420429, - "grad_norm": 1.9981441231756034, - "learning_rate": 2.488864254816964e-06, - "loss": 0.9763, - "step": 3649 - }, - { - "epoch": 0.438886550832682, - "grad_norm": 4.023522118724857, - "learning_rate": 2.4881088751198218e-06, - "loss": 0.9049, - "step": 3650 - }, - { - "epoch": 0.43900679372332113, - "grad_norm": 2.3954245066474376, - "learning_rate": 2.4873534213769517e-06, - "loss": 0.8734, - "step": 3651 - }, - { - "epoch": 0.4391270366139602, - "grad_norm": 1.6147705297752268, - "learning_rate": 2.4865978937029547e-06, - "loss": 0.9429, - "step": 3652 - }, - { - "epoch": 0.4392472795045993, - "grad_norm": 1.7407892885870826, - "learning_rate": 2.485842292212445e-06, - "loss": 0.8927, - "step": 3653 - }, - { - "epoch": 0.4393675223952384, - "grad_norm": 1.9467887945056077, - "learning_rate": 2.485086617020045e-06, - "loss": 1.0286, - "step": 3654 - }, - { - "epoch": 0.43948776528587746, - "grad_norm": 2.2535300643333986, - "learning_rate": 2.4843308682403903e-06, - "loss": 1.045, - "step": 3655 - }, - { - "epoch": 0.4396080081765166, - "grad_norm": 1.7254938469121852, - "learning_rate": 2.4835750459881294e-06, - "loss": 1.0562, - "step": 3656 - }, - { - "epoch": 0.43972825106715563, - "grad_norm": 3.217036411467094, - "learning_rate": 2.4828191503779177e-06, - "loss": 1.0392, - "step": 3657 - }, - { - "epoch": 0.43984849395779474, - "grad_norm": 1.983898255758049, - "learning_rate": 2.482063181524425e-06, - "loss": 1.125, - "step": 3658 - }, - { - "epoch": 0.43996873684843385, - "grad_norm": 3.3328171012406704, - "learning_rate": 2.4813071395423307e-06, - "loss": 1.0391, - "step": 3659 - }, - { - "epoch": 0.4400889797390729, - "grad_norm": 1.7412563281687086, - "learning_rate": 2.4805510245463263e-06, - "loss": 0.8769, - "step": 3660 - }, - { - "epoch": 0.440209222629712, - "grad_norm": 1.9848405787447052, - "learning_rate": 2.4797948366511137e-06, - "loss": 0.8238, - "step": 3661 - }, - { - "epoch": 0.4403294655203511, - "grad_norm": 1.6891441812447388, - "learning_rate": 2.4790385759714055e-06, - "loss": 0.9976, - "step": 3662 - }, - { - "epoch": 0.4404497084109902, - "grad_norm": 2.0932058509755094, - "learning_rate": 2.478282242621926e-06, - "loss": 0.9441, - "step": 3663 - }, - { - "epoch": 0.4405699513016293, - "grad_norm": 0.8997220020628267, - "learning_rate": 2.477525836717411e-06, - "loss": 0.8668, - "step": 3664 - }, - { - "epoch": 0.4406901941922684, - "grad_norm": 2.463311978386528, - "learning_rate": 2.476769358372606e-06, - "loss": 1.0218, - "step": 3665 - }, - { - "epoch": 0.44081043708290746, - "grad_norm": 2.0457194320556136, - "learning_rate": 2.4760128077022683e-06, - "loss": 0.9795, - "step": 3666 - }, - { - "epoch": 0.44093067997354657, - "grad_norm": 1.5873566840653361, - "learning_rate": 2.4752561848211672e-06, - "loss": 0.9117, - "step": 3667 - }, - { - "epoch": 0.4410509228641857, - "grad_norm": 1.8191985950239757, - "learning_rate": 2.4744994898440797e-06, - "loss": 0.9423, - "step": 3668 - }, - { - "epoch": 0.44117116575482473, - "grad_norm": 1.858000816509285, - "learning_rate": 2.473742722885797e-06, - "loss": 1.0646, - "step": 3669 - }, - { - "epoch": 0.44129140864546385, - "grad_norm": 2.7066427285055057, - "learning_rate": 2.4729858840611197e-06, - "loss": 0.8868, - "step": 3670 - }, - { - "epoch": 0.4414116515361029, - "grad_norm": 2.0287670512255533, - "learning_rate": 2.4722289734848605e-06, - "loss": 0.9534, - "step": 3671 - }, - { - "epoch": 0.441531894426742, - "grad_norm": 2.130726111674133, - "learning_rate": 2.471471991271841e-06, - "loss": 1.0138, - "step": 3672 - }, - { - "epoch": 0.4416521373173811, - "grad_norm": 2.2454467903174575, - "learning_rate": 2.470714937536896e-06, - "loss": 1.0305, - "step": 3673 - }, - { - "epoch": 0.4417723802080202, - "grad_norm": 1.7823307707119929, - "learning_rate": 2.469957812394868e-06, - "loss": 0.9444, - "step": 3674 - }, - { - "epoch": 0.4418926230986593, - "grad_norm": 2.16613813213775, - "learning_rate": 2.4692006159606148e-06, - "loss": 0.9993, - "step": 3675 - }, - { - "epoch": 0.4420128659892984, - "grad_norm": 1.7138240624025902, - "learning_rate": 2.468443348349e-06, - "loss": 1.0155, - "step": 3676 - }, - { - "epoch": 0.44213310887993745, - "grad_norm": 2.337601697208851, - "learning_rate": 2.467686009674902e-06, - "loss": 1.0563, - "step": 3677 - }, - { - "epoch": 0.44225335177057656, - "grad_norm": 1.90239413153575, - "learning_rate": 2.466928600053209e-06, - "loss": 1.0783, - "step": 3678 - }, - { - "epoch": 0.4423735946612157, - "grad_norm": 2.0994793518969765, - "learning_rate": 2.466171119598818e-06, - "loss": 0.9448, - "step": 3679 - }, - { - "epoch": 0.44249383755185473, - "grad_norm": 2.748915978833348, - "learning_rate": 2.465413568426639e-06, - "loss": 1.0002, - "step": 3680 - }, - { - "epoch": 0.44261408044249384, - "grad_norm": 1.929266570973755, - "learning_rate": 2.464655946651591e-06, - "loss": 1.0488, - "step": 3681 - }, - { - "epoch": 0.44273432333313295, - "grad_norm": 2.015417753698679, - "learning_rate": 2.4638982543886065e-06, - "loss": 1.0283, - "step": 3682 - }, - { - "epoch": 0.442854566223772, - "grad_norm": 3.1476927224226445, - "learning_rate": 2.4631404917526254e-06, - "loss": 1.0955, - "step": 3683 - }, - { - "epoch": 0.4429748091144111, - "grad_norm": 1.5781004497771338, - "learning_rate": 2.4623826588586e-06, - "loss": 1.0167, - "step": 3684 - }, - { - "epoch": 0.4430950520050502, - "grad_norm": 1.40551289207601, - "learning_rate": 2.461624755821492e-06, - "loss": 1.0574, - "step": 3685 - }, - { - "epoch": 0.4432152948956893, - "grad_norm": 1.5748256526300428, - "learning_rate": 2.4608667827562763e-06, - "loss": 1.0004, - "step": 3686 - }, - { - "epoch": 0.4433355377863284, - "grad_norm": 1.6797024038673818, - "learning_rate": 2.460108739777936e-06, - "loss": 1.1239, - "step": 3687 - }, - { - "epoch": 0.44345578067696745, - "grad_norm": 1.7238228582374788, - "learning_rate": 2.4593506270014656e-06, - "loss": 0.9899, - "step": 3688 - }, - { - "epoch": 0.44357602356760656, - "grad_norm": 1.4668846459674991, - "learning_rate": 2.45859244454187e-06, - "loss": 1.0471, - "step": 3689 - }, - { - "epoch": 0.44369626645824567, - "grad_norm": 1.69762519565764, - "learning_rate": 2.4578341925141655e-06, - "loss": 0.8909, - "step": 3690 - }, - { - "epoch": 0.4438165093488847, - "grad_norm": 2.3866526193400732, - "learning_rate": 2.457075871033378e-06, - "loss": 0.9506, - "step": 3691 - }, - { - "epoch": 0.44393675223952384, - "grad_norm": 2.2227675907827784, - "learning_rate": 2.4563174802145445e-06, - "loss": 1.1121, - "step": 3692 - }, - { - "epoch": 0.44405699513016295, - "grad_norm": 0.6051231136060159, - "learning_rate": 2.455559020172712e-06, - "loss": 0.7299, - "step": 3693 - }, - { - "epoch": 0.444177238020802, - "grad_norm": 1.8019226164253912, - "learning_rate": 2.4548004910229385e-06, - "loss": 1.1275, - "step": 3694 - }, - { - "epoch": 0.4442974809114411, - "grad_norm": 1.9148218011240143, - "learning_rate": 2.4540418928802913e-06, - "loss": 1.1, - "step": 3695 - }, - { - "epoch": 0.4444177238020802, - "grad_norm": 2.015208356511507, - "learning_rate": 2.4532832258598506e-06, - "loss": 0.892, - "step": 3696 - }, - { - "epoch": 0.4445379666927193, - "grad_norm": 1.8913081172427046, - "learning_rate": 2.4525244900767047e-06, - "loss": 1.0356, - "step": 3697 - }, - { - "epoch": 0.4446582095833584, - "grad_norm": 0.8557555880485315, - "learning_rate": 2.4517656856459536e-06, - "loss": 0.8638, - "step": 3698 - }, - { - "epoch": 0.4447784524739975, - "grad_norm": 3.2472202444238207, - "learning_rate": 2.4510068126827073e-06, - "loss": 0.9055, - "step": 3699 - }, - { - "epoch": 0.44489869536463655, - "grad_norm": 2.14750601255843, - "learning_rate": 2.450247871302086e-06, - "loss": 1.0477, - "step": 3700 - }, - { - "epoch": 0.44501893825527566, - "grad_norm": 2.3497956694381643, - "learning_rate": 2.44948886161922e-06, - "loss": 1.0658, - "step": 3701 - }, - { - "epoch": 0.4451391811459148, - "grad_norm": 1.8307114261583388, - "learning_rate": 2.4487297837492524e-06, - "loss": 1.0794, - "step": 3702 - }, - { - "epoch": 0.44525942403655383, - "grad_norm": 2.015802656520876, - "learning_rate": 2.4479706378073323e-06, - "loss": 0.8446, - "step": 3703 - }, - { - "epoch": 0.44537966692719294, - "grad_norm": 1.598785970967701, - "learning_rate": 2.447211423908623e-06, - "loss": 1.0673, - "step": 3704 - }, - { - "epoch": 0.445499909817832, - "grad_norm": 2.015940678442757, - "learning_rate": 2.4464521421682966e-06, - "loss": 0.9756, - "step": 3705 - }, - { - "epoch": 0.4456201527084711, - "grad_norm": 1.2284917055842877, - "learning_rate": 2.4456927927015345e-06, - "loss": 1.1032, - "step": 3706 - }, - { - "epoch": 0.4457403955991102, - "grad_norm": 2.0673327613361496, - "learning_rate": 2.4449333756235307e-06, - "loss": 0.9915, - "step": 3707 - }, - { - "epoch": 0.4458606384897493, - "grad_norm": 2.61338114868264, - "learning_rate": 2.4441738910494876e-06, - "loss": 1.0231, - "step": 3708 - }, - { - "epoch": 0.4459808813803884, - "grad_norm": 1.7541679068203828, - "learning_rate": 2.4434143390946176e-06, - "loss": 1.0529, - "step": 3709 - }, - { - "epoch": 0.4461011242710275, - "grad_norm": 1.7974107026692103, - "learning_rate": 2.4426547198741457e-06, - "loss": 1.0767, - "step": 3710 - }, - { - "epoch": 0.44622136716166655, - "grad_norm": 1.8564061417076736, - "learning_rate": 2.441895033503305e-06, - "loss": 0.9778, - "step": 3711 - }, - { - "epoch": 0.44634161005230566, - "grad_norm": 1.6098193694013032, - "learning_rate": 2.4411352800973375e-06, - "loss": 1.0587, - "step": 3712 - }, - { - "epoch": 0.44646185294294477, - "grad_norm": 2.4662062662058792, - "learning_rate": 2.4403754597715005e-06, - "loss": 0.9807, - "step": 3713 - }, - { - "epoch": 0.4465820958335838, - "grad_norm": 2.053548170633289, - "learning_rate": 2.4396155726410553e-06, - "loss": 1.1528, - "step": 3714 - }, - { - "epoch": 0.44670233872422294, - "grad_norm": 2.407916495164807, - "learning_rate": 2.438855618821278e-06, - "loss": 1.1459, - "step": 3715 - }, - { - "epoch": 0.44682258161486205, - "grad_norm": 1.4697186948242489, - "learning_rate": 2.4380955984274517e-06, - "loss": 0.9034, - "step": 3716 - }, - { - "epoch": 0.4469428245055011, - "grad_norm": 2.322496099561161, - "learning_rate": 2.4373355115748716e-06, - "loss": 1.0016, - "step": 3717 - }, - { - "epoch": 0.4470630673961402, - "grad_norm": 2.0542772703074923, - "learning_rate": 2.436575358378842e-06, - "loss": 0.9507, - "step": 3718 - }, - { - "epoch": 0.44718331028677927, - "grad_norm": 4.585021153258737, - "learning_rate": 2.4358151389546782e-06, - "loss": 1.0598, - "step": 3719 - }, - { - "epoch": 0.4473035531774184, - "grad_norm": 2.2841736335560006, - "learning_rate": 2.4350548534177035e-06, - "loss": 0.9893, - "step": 3720 - }, - { - "epoch": 0.4474237960680575, - "grad_norm": 2.425435261155137, - "learning_rate": 2.434294501883254e-06, - "loss": 0.896, - "step": 3721 - }, - { - "epoch": 0.44754403895869654, - "grad_norm": 1.7500574919929808, - "learning_rate": 2.433534084466674e-06, - "loss": 0.8917, - "step": 3722 - }, - { - "epoch": 0.44766428184933565, - "grad_norm": 1.6672993730586765, - "learning_rate": 2.4327736012833178e-06, - "loss": 0.9428, - "step": 3723 - }, - { - "epoch": 0.44778452473997477, - "grad_norm": 1.8620744871980883, - "learning_rate": 2.4320130524485506e-06, - "loss": 0.9903, - "step": 3724 - }, - { - "epoch": 0.4479047676306138, - "grad_norm": 1.4130768264866957, - "learning_rate": 2.431252438077746e-06, - "loss": 1.0254, - "step": 3725 - }, - { - "epoch": 0.44802501052125293, - "grad_norm": 2.4296331368893136, - "learning_rate": 2.4304917582862906e-06, - "loss": 0.9944, - "step": 3726 - }, - { - "epoch": 0.44814525341189204, - "grad_norm": 2.2390786127121642, - "learning_rate": 2.4297310131895774e-06, - "loss": 1.1114, - "step": 3727 - }, - { - "epoch": 0.4482654963025311, - "grad_norm": 2.1531698368979395, - "learning_rate": 2.4289702029030113e-06, - "loss": 0.9908, - "step": 3728 - }, - { - "epoch": 0.4483857391931702, - "grad_norm": 1.8457945785874368, - "learning_rate": 2.4282093275420057e-06, - "loss": 1.05, - "step": 3729 - }, - { - "epoch": 0.4485059820838093, - "grad_norm": 2.0403718800141415, - "learning_rate": 2.4274483872219863e-06, - "loss": 0.9283, - "step": 3730 - }, - { - "epoch": 0.4486262249744484, - "grad_norm": 1.6551253621083915, - "learning_rate": 2.426687382058386e-06, - "loss": 1.1549, - "step": 3731 - }, - { - "epoch": 0.4487464678650875, - "grad_norm": 0.9630434793161917, - "learning_rate": 2.425926312166649e-06, - "loss": 0.8499, - "step": 3732 - }, - { - "epoch": 0.4488667107557266, - "grad_norm": 2.297589664473704, - "learning_rate": 2.42516517766223e-06, - "loss": 0.9486, - "step": 3733 - }, - { - "epoch": 0.44898695364636565, - "grad_norm": 1.778769589796783, - "learning_rate": 2.4244039786605907e-06, - "loss": 0.9012, - "step": 3734 - }, - { - "epoch": 0.44910719653700476, - "grad_norm": 2.1368875663501634, - "learning_rate": 2.4236427152772055e-06, - "loss": 1.056, - "step": 3735 - }, - { - "epoch": 0.4492274394276438, - "grad_norm": 0.9130717393719819, - "learning_rate": 2.422881387627557e-06, - "loss": 0.8413, - "step": 3736 - }, - { - "epoch": 0.4493476823182829, - "grad_norm": 1.4918603987952117, - "learning_rate": 2.422119995827139e-06, - "loss": 1.0009, - "step": 3737 - }, - { - "epoch": 0.44946792520892204, - "grad_norm": 2.255282452945954, - "learning_rate": 2.4213585399914528e-06, - "loss": 0.9698, - "step": 3738 - }, - { - "epoch": 0.4495881680995611, - "grad_norm": 2.1790987265543524, - "learning_rate": 2.4205970202360113e-06, - "loss": 1.0768, - "step": 3739 - }, - { - "epoch": 0.4497084109902002, - "grad_norm": 2.1198887783241362, - "learning_rate": 2.4198354366763354e-06, - "loss": 1.0092, - "step": 3740 - }, - { - "epoch": 0.4498286538808393, - "grad_norm": 2.1317677157115407, - "learning_rate": 2.4190737894279587e-06, - "loss": 1.0085, - "step": 3741 - }, - { - "epoch": 0.44994889677147837, - "grad_norm": 2.0004840503486805, - "learning_rate": 2.4183120786064203e-06, - "loss": 1.0344, - "step": 3742 - }, - { - "epoch": 0.4500691396621175, - "grad_norm": 2.24551315101267, - "learning_rate": 2.417550304327273e-06, - "loss": 1.0784, - "step": 3743 - }, - { - "epoch": 0.4501893825527566, - "grad_norm": 1.613444396260441, - "learning_rate": 2.4167884667060763e-06, - "loss": 0.9871, - "step": 3744 - }, - { - "epoch": 0.45030962544339564, - "grad_norm": 2.2268859628326685, - "learning_rate": 2.4160265658584e-06, - "loss": 1.0975, - "step": 3745 - }, - { - "epoch": 0.45042986833403476, - "grad_norm": 1.9415299128670083, - "learning_rate": 2.4152646018998253e-06, - "loss": 0.9049, - "step": 3746 - }, - { - "epoch": 0.45055011122467387, - "grad_norm": 1.6043255302075108, - "learning_rate": 2.4145025749459403e-06, - "loss": 0.9445, - "step": 3747 - }, - { - "epoch": 0.4506703541153129, - "grad_norm": 1.8942330981161688, - "learning_rate": 2.413740485112344e-06, - "loss": 0.9309, - "step": 3748 - }, - { - "epoch": 0.45079059700595203, - "grad_norm": 1.785771637404302, - "learning_rate": 2.412978332514646e-06, - "loss": 1.0472, - "step": 3749 - }, - { - "epoch": 0.4509108398965911, - "grad_norm": 2.200853407668466, - "learning_rate": 2.4122161172684623e-06, - "loss": 0.9467, - "step": 3750 - }, - { - "epoch": 0.4510310827872302, - "grad_norm": 2.090220194629296, - "learning_rate": 2.4114538394894216e-06, - "loss": 1.0595, - "step": 3751 - }, - { - "epoch": 0.4511513256778693, - "grad_norm": 1.723644777423946, - "learning_rate": 2.410691499293161e-06, - "loss": 1.0616, - "step": 3752 - }, - { - "epoch": 0.45127156856850836, - "grad_norm": 1.6775065612664652, - "learning_rate": 2.409929096795326e-06, - "loss": 0.9735, - "step": 3753 - }, - { - "epoch": 0.4513918114591475, - "grad_norm": 1.9700126837307568, - "learning_rate": 2.409166632111573e-06, - "loss": 1.0177, - "step": 3754 - }, - { - "epoch": 0.4515120543497866, - "grad_norm": 1.7774931792438193, - "learning_rate": 2.4084041053575674e-06, - "loss": 1.0203, - "step": 3755 - }, - { - "epoch": 0.45163229724042564, - "grad_norm": 2.041823348696031, - "learning_rate": 2.4076415166489834e-06, - "loss": 0.9482, - "step": 3756 - }, - { - "epoch": 0.45175254013106475, - "grad_norm": 1.5089859423529266, - "learning_rate": 2.406878866101506e-06, - "loss": 1.0249, - "step": 3757 - }, - { - "epoch": 0.45187278302170386, - "grad_norm": 1.9697974915185503, - "learning_rate": 2.4061161538308273e-06, - "loss": 1.01, - "step": 3758 - }, - { - "epoch": 0.4519930259123429, - "grad_norm": 2.0207857998111183, - "learning_rate": 2.4053533799526523e-06, - "loss": 1.1174, - "step": 3759 - }, - { - "epoch": 0.452113268802982, - "grad_norm": 2.2281742085163168, - "learning_rate": 2.404590544582691e-06, - "loss": 1.0872, - "step": 3760 - }, - { - "epoch": 0.45223351169362114, - "grad_norm": 1.740348358360264, - "learning_rate": 2.403827647836666e-06, - "loss": 1.0329, - "step": 3761 - }, - { - "epoch": 0.4523537545842602, - "grad_norm": 2.4948958744804384, - "learning_rate": 2.4030646898303075e-06, - "loss": 0.9288, - "step": 3762 - }, - { - "epoch": 0.4524739974748993, - "grad_norm": 2.3877500313259694, - "learning_rate": 2.4023016706793566e-06, - "loss": 1.0568, - "step": 3763 - }, - { - "epoch": 0.4525942403655384, - "grad_norm": 0.8318965605844317, - "learning_rate": 2.401538590499561e-06, - "loss": 0.8354, - "step": 3764 - }, - { - "epoch": 0.45271448325617747, - "grad_norm": 2.0831590070904387, - "learning_rate": 2.400775449406682e-06, - "loss": 0.9358, - "step": 3765 - }, - { - "epoch": 0.4528347261468166, - "grad_norm": 1.6873088304530832, - "learning_rate": 2.400012247516485e-06, - "loss": 0.9554, - "step": 3766 - }, - { - "epoch": 0.45295496903745563, - "grad_norm": 2.4547331574465643, - "learning_rate": 2.3992489849447484e-06, - "loss": 1.1251, - "step": 3767 - }, - { - "epoch": 0.45307521192809475, - "grad_norm": 2.0322214004619688, - "learning_rate": 2.3984856618072584e-06, - "loss": 1.0176, - "step": 3768 - }, - { - "epoch": 0.45319545481873386, - "grad_norm": 1.872173849714141, - "learning_rate": 2.3977222782198098e-06, - "loss": 0.9625, - "step": 3769 - }, - { - "epoch": 0.4533156977093729, - "grad_norm": 2.113271998810042, - "learning_rate": 2.3969588342982077e-06, - "loss": 0.9803, - "step": 3770 - }, - { - "epoch": 0.453435940600012, - "grad_norm": 1.471077312878041, - "learning_rate": 2.396195330158267e-06, - "loss": 0.955, - "step": 3771 - }, - { - "epoch": 0.45355618349065113, - "grad_norm": 1.7800215115479605, - "learning_rate": 2.3954317659158094e-06, - "loss": 1.0178, - "step": 3772 - }, - { - "epoch": 0.4536764263812902, - "grad_norm": 0.8963462041619042, - "learning_rate": 2.394668141686667e-06, - "loss": 0.8546, - "step": 3773 - }, - { - "epoch": 0.4537966692719293, - "grad_norm": 2.0594573263520033, - "learning_rate": 2.3939044575866813e-06, - "loss": 0.9206, - "step": 3774 - }, - { - "epoch": 0.4539169121625684, - "grad_norm": 2.162003291421338, - "learning_rate": 2.3931407137317024e-06, - "loss": 0.9875, - "step": 3775 - }, - { - "epoch": 0.45403715505320746, - "grad_norm": 2.5461076387844184, - "learning_rate": 2.3923769102375907e-06, - "loss": 1.085, - "step": 3776 - }, - { - "epoch": 0.4541573979438466, - "grad_norm": 2.4351952244635338, - "learning_rate": 2.391613047220213e-06, - "loss": 1.0216, - "step": 3777 - }, - { - "epoch": 0.4542776408344857, - "grad_norm": 1.7819541074920773, - "learning_rate": 2.390849124795447e-06, - "loss": 1.023, - "step": 3778 - }, - { - "epoch": 0.45439788372512474, - "grad_norm": 1.8022373626092354, - "learning_rate": 2.3900851430791804e-06, - "loss": 1.0737, - "step": 3779 - }, - { - "epoch": 0.45451812661576385, - "grad_norm": 2.1249074354767745, - "learning_rate": 2.389321102187307e-06, - "loss": 1.0829, - "step": 3780 - }, - { - "epoch": 0.4546383695064029, - "grad_norm": 2.0952463069874576, - "learning_rate": 2.3885570022357326e-06, - "loss": 1.0522, - "step": 3781 - }, - { - "epoch": 0.454758612397042, - "grad_norm": 0.8390504505848003, - "learning_rate": 2.38779284334037e-06, - "loss": 0.8592, - "step": 3782 - }, - { - "epoch": 0.4548788552876811, - "grad_norm": 4.602587652081465, - "learning_rate": 2.387028625617141e-06, - "loss": 1.0122, - "step": 3783 - }, - { - "epoch": 0.4549990981783202, - "grad_norm": 1.7712091234184109, - "learning_rate": 2.3862643491819766e-06, - "loss": 1.084, - "step": 3784 - }, - { - "epoch": 0.4551193410689593, - "grad_norm": 1.8439345186651168, - "learning_rate": 2.3855000141508186e-06, - "loss": 1.0717, - "step": 3785 - }, - { - "epoch": 0.4552395839595984, - "grad_norm": 1.9650242989860123, - "learning_rate": 2.3847356206396143e-06, - "loss": 1.0665, - "step": 3786 - }, - { - "epoch": 0.45535982685023746, - "grad_norm": 1.5061759170738382, - "learning_rate": 2.3839711687643227e-06, - "loss": 1.0163, - "step": 3787 - }, - { - "epoch": 0.45548006974087657, - "grad_norm": 2.0971654108140685, - "learning_rate": 2.38320665864091e-06, - "loss": 0.9669, - "step": 3788 - }, - { - "epoch": 0.4556003126315157, - "grad_norm": 1.934796569934113, - "learning_rate": 2.3824420903853516e-06, - "loss": 1.0454, - "step": 3789 - }, - { - "epoch": 0.45572055552215474, - "grad_norm": 2.3234396261732666, - "learning_rate": 2.3816774641136324e-06, - "loss": 1.0493, - "step": 3790 - }, - { - "epoch": 0.45584079841279385, - "grad_norm": 2.6734064186202353, - "learning_rate": 2.380912779941745e-06, - "loss": 0.952, - "step": 3791 - }, - { - "epoch": 0.45596104130343296, - "grad_norm": 1.8720128742574123, - "learning_rate": 2.3801480379856918e-06, - "loss": 1.0587, - "step": 3792 - }, - { - "epoch": 0.456081284194072, - "grad_norm": 1.5559079541191645, - "learning_rate": 2.379383238361484e-06, - "loss": 1.0651, - "step": 3793 - }, - { - "epoch": 0.4562015270847111, - "grad_norm": 1.732315146202425, - "learning_rate": 2.3786183811851407e-06, - "loss": 1.0299, - "step": 3794 - }, - { - "epoch": 0.45632176997535023, - "grad_norm": 1.5591611665879817, - "learning_rate": 2.3778534665726892e-06, - "loss": 1.033, - "step": 3795 - }, - { - "epoch": 0.4564420128659893, - "grad_norm": 2.6508182413998114, - "learning_rate": 2.377088494640168e-06, - "loss": 0.9532, - "step": 3796 - }, - { - "epoch": 0.4565622557566284, - "grad_norm": 1.6382044783737673, - "learning_rate": 2.3763234655036216e-06, - "loss": 1.0064, - "step": 3797 - }, - { - "epoch": 0.45668249864726745, - "grad_norm": 1.7190345528610103, - "learning_rate": 2.3755583792791046e-06, - "loss": 1.0955, - "step": 3798 - }, - { - "epoch": 0.45680274153790656, - "grad_norm": 2.1184122407124497, - "learning_rate": 2.3747932360826803e-06, - "loss": 0.9735, - "step": 3799 - }, - { - "epoch": 0.4569229844285457, - "grad_norm": 2.1057606567272575, - "learning_rate": 2.3740280360304205e-06, - "loss": 1.0457, - "step": 3800 - }, - { - "epoch": 0.45704322731918473, - "grad_norm": 1.570136976158885, - "learning_rate": 2.3732627792384038e-06, - "loss": 0.911, - "step": 3801 - }, - { - "epoch": 0.45716347020982384, - "grad_norm": 1.8879360276391803, - "learning_rate": 2.3724974658227207e-06, - "loss": 0.9793, - "step": 3802 - }, - { - "epoch": 0.45728371310046295, - "grad_norm": 2.0450109482451144, - "learning_rate": 2.3717320958994687e-06, - "loss": 0.9357, - "step": 3803 - }, - { - "epoch": 0.457403955991102, - "grad_norm": 6.2816666042658165, - "learning_rate": 2.3709666695847534e-06, - "loss": 0.9263, - "step": 3804 - }, - { - "epoch": 0.4575241988817411, - "grad_norm": 1.6176188902518824, - "learning_rate": 2.370201186994689e-06, - "loss": 0.9346, - "step": 3805 - }, - { - "epoch": 0.45764444177238023, - "grad_norm": 1.9584279815311039, - "learning_rate": 2.369435648245399e-06, - "loss": 0.9259, - "step": 3806 - }, - { - "epoch": 0.4577646846630193, - "grad_norm": 1.690452888457372, - "learning_rate": 2.368670053453015e-06, - "loss": 1.0801, - "step": 3807 - }, - { - "epoch": 0.4578849275536584, - "grad_norm": 2.2230016242264594, - "learning_rate": 2.3679044027336757e-06, - "loss": 0.9601, - "step": 3808 - }, - { - "epoch": 0.4580051704442975, - "grad_norm": 2.3135938377171676, - "learning_rate": 2.3671386962035326e-06, - "loss": 0.9163, - "step": 3809 - }, - { - "epoch": 0.45812541333493656, - "grad_norm": 1.8077556076922965, - "learning_rate": 2.3663729339787405e-06, - "loss": 0.9198, - "step": 3810 - }, - { - "epoch": 0.45824565622557567, - "grad_norm": 3.8726707348948017, - "learning_rate": 2.365607116175466e-06, - "loss": 0.954, - "step": 3811 - }, - { - "epoch": 0.4583658991162148, - "grad_norm": 3.695298176711472, - "learning_rate": 2.3648412429098825e-06, - "loss": 0.8927, - "step": 3812 - }, - { - "epoch": 0.45848614200685384, - "grad_norm": 1.9045278555830183, - "learning_rate": 2.364075314298172e-06, - "loss": 1.0454, - "step": 3813 - }, - { - "epoch": 0.45860638489749295, - "grad_norm": 1.8823339716835532, - "learning_rate": 2.3633093304565267e-06, - "loss": 0.9342, - "step": 3814 - }, - { - "epoch": 0.458726627788132, - "grad_norm": 1.6555655162662402, - "learning_rate": 2.3625432915011443e-06, - "loss": 0.8624, - "step": 3815 - }, - { - "epoch": 0.4588468706787711, - "grad_norm": 1.5146034966988966, - "learning_rate": 2.3617771975482334e-06, - "loss": 0.8823, - "step": 3816 - }, - { - "epoch": 0.4589671135694102, - "grad_norm": 1.4957409634490124, - "learning_rate": 2.3610110487140083e-06, - "loss": 0.9751, - "step": 3817 - }, - { - "epoch": 0.4590873564600493, - "grad_norm": 1.6297310403656409, - "learning_rate": 2.360244845114695e-06, - "loss": 1.0418, - "step": 3818 - }, - { - "epoch": 0.4592075993506884, - "grad_norm": 2.1163660687039645, - "learning_rate": 2.3594785868665245e-06, - "loss": 0.92, - "step": 3819 - }, - { - "epoch": 0.4593278422413275, - "grad_norm": 2.0717282618692616, - "learning_rate": 2.3587122740857386e-06, - "loss": 1.036, - "step": 3820 - }, - { - "epoch": 0.45944808513196655, - "grad_norm": 1.503125590048692, - "learning_rate": 2.357945906888586e-06, - "loss": 1.0168, - "step": 3821 - }, - { - "epoch": 0.45956832802260567, - "grad_norm": 2.1952529844857414, - "learning_rate": 2.357179485391324e-06, - "loss": 1.0226, - "step": 3822 - }, - { - "epoch": 0.4596885709132448, - "grad_norm": 2.260689140340111, - "learning_rate": 2.3564130097102173e-06, - "loss": 1.0839, - "step": 3823 - }, - { - "epoch": 0.45980881380388383, - "grad_norm": 1.5899644660577845, - "learning_rate": 2.355646479961541e-06, - "loss": 0.9692, - "step": 3824 - }, - { - "epoch": 0.45992905669452294, - "grad_norm": 2.5907374892528603, - "learning_rate": 2.354879896261576e-06, - "loss": 0.9452, - "step": 3825 - }, - { - "epoch": 0.46004929958516205, - "grad_norm": 1.919887228077831, - "learning_rate": 2.3541132587266133e-06, - "loss": 0.7996, - "step": 3826 - }, - { - "epoch": 0.4601695424758011, - "grad_norm": 1.9788380667968999, - "learning_rate": 2.3533465674729515e-06, - "loss": 0.9261, - "step": 3827 - }, - { - "epoch": 0.4602897853664402, - "grad_norm": 1.8132694189761398, - "learning_rate": 2.352579822616895e-06, - "loss": 0.9608, - "step": 3828 - }, - { - "epoch": 0.4604100282570793, - "grad_norm": 1.4947499746784234, - "learning_rate": 2.351813024274761e-06, - "loss": 1.0075, - "step": 3829 - }, - { - "epoch": 0.4605302711477184, - "grad_norm": 1.9433588842650134, - "learning_rate": 2.3510461725628693e-06, - "loss": 0.965, - "step": 3830 - }, - { - "epoch": 0.4606505140383575, - "grad_norm": 1.7094599645165822, - "learning_rate": 2.350279267597554e-06, - "loss": 0.9322, - "step": 3831 - }, - { - "epoch": 0.46077075692899655, - "grad_norm": 2.2127311628886934, - "learning_rate": 2.3495123094951515e-06, - "loss": 1.0538, - "step": 3832 - }, - { - "epoch": 0.46089099981963566, - "grad_norm": 1.806675596156464, - "learning_rate": 2.34874529837201e-06, - "loss": 0.984, - "step": 3833 - }, - { - "epoch": 0.46101124271027477, - "grad_norm": 1.8631101005851656, - "learning_rate": 2.347978234344483e-06, - "loss": 1.0216, - "step": 3834 - }, - { - "epoch": 0.4611314856009138, - "grad_norm": 1.6725396234582797, - "learning_rate": 2.347211117528935e-06, - "loss": 0.9244, - "step": 3835 - }, - { - "epoch": 0.46125172849155294, - "grad_norm": 1.6714617494753228, - "learning_rate": 2.3464439480417374e-06, - "loss": 0.9422, - "step": 3836 - }, - { - "epoch": 0.46137197138219205, - "grad_norm": 2.7539786254895517, - "learning_rate": 2.3456767259992676e-06, - "loss": 1.0043, - "step": 3837 - }, - { - "epoch": 0.4614922142728311, - "grad_norm": 3.8636532441448743, - "learning_rate": 2.3449094515179135e-06, - "loss": 1.1073, - "step": 3838 - }, - { - "epoch": 0.4616124571634702, - "grad_norm": 1.6844278557292431, - "learning_rate": 2.34414212471407e-06, - "loss": 1.0413, - "step": 3839 - }, - { - "epoch": 0.4617327000541093, - "grad_norm": 1.7616823450628112, - "learning_rate": 2.3433747457041394e-06, - "loss": 0.9569, - "step": 3840 - }, - { - "epoch": 0.4618529429447484, - "grad_norm": 1.7239774105556962, - "learning_rate": 2.342607314604533e-06, - "loss": 1.0677, - "step": 3841 - }, - { - "epoch": 0.4619731858353875, - "grad_norm": 1.7914958547149746, - "learning_rate": 2.3418398315316694e-06, - "loss": 1.074, - "step": 3842 - }, - { - "epoch": 0.4620934287260266, - "grad_norm": 3.6566039346085497, - "learning_rate": 2.3410722966019755e-06, - "loss": 1.0125, - "step": 3843 - }, - { - "epoch": 0.46221367161666566, - "grad_norm": 1.7348274809184354, - "learning_rate": 2.3403047099318848e-06, - "loss": 0.8884, - "step": 3844 - }, - { - "epoch": 0.46233391450730477, - "grad_norm": 2.564601036855629, - "learning_rate": 2.3395370716378405e-06, - "loss": 0.9745, - "step": 3845 - }, - { - "epoch": 0.4624541573979438, - "grad_norm": 2.2192307072374877, - "learning_rate": 2.338769381836292e-06, - "loss": 0.9499, - "step": 3846 - }, - { - "epoch": 0.46257440028858293, - "grad_norm": 1.9800597601840573, - "learning_rate": 2.3380016406436984e-06, - "loss": 0.967, - "step": 3847 - }, - { - "epoch": 0.46269464317922204, - "grad_norm": 2.1569083494152226, - "learning_rate": 2.337233848176524e-06, - "loss": 1.0443, - "step": 3848 - }, - { - "epoch": 0.4628148860698611, - "grad_norm": 2.558730166926296, - "learning_rate": 2.3364660045512435e-06, - "loss": 1.0488, - "step": 3849 - }, - { - "epoch": 0.4629351289605002, - "grad_norm": 0.7708771452610234, - "learning_rate": 2.335698109884337e-06, - "loss": 0.8588, - "step": 3850 - }, - { - "epoch": 0.4630553718511393, - "grad_norm": 0.8287836370647002, - "learning_rate": 2.334930164292294e-06, - "loss": 0.886, - "step": 3851 - }, - { - "epoch": 0.4631756147417784, - "grad_norm": 1.9715237529851357, - "learning_rate": 2.334162167891612e-06, - "loss": 1.0263, - "step": 3852 - }, - { - "epoch": 0.4632958576324175, - "grad_norm": 2.75608222433492, - "learning_rate": 2.333394120798795e-06, - "loss": 0.9683, - "step": 3853 - }, - { - "epoch": 0.4634161005230566, - "grad_norm": 2.0830759143740214, - "learning_rate": 2.3326260231303545e-06, - "loss": 0.9559, - "step": 3854 - }, - { - "epoch": 0.46353634341369565, - "grad_norm": 1.680811022772906, - "learning_rate": 2.331857875002811e-06, - "loss": 1.1015, - "step": 3855 - }, - { - "epoch": 0.46365658630433476, - "grad_norm": 1.74649889723871, - "learning_rate": 2.3310896765326916e-06, - "loss": 0.9841, - "step": 3856 - }, - { - "epoch": 0.46377682919497387, - "grad_norm": 2.595391271269645, - "learning_rate": 2.330321427836531e-06, - "loss": 1.0655, - "step": 3857 - }, - { - "epoch": 0.4638970720856129, - "grad_norm": 1.6279468958814156, - "learning_rate": 2.3295531290308733e-06, - "loss": 1.0588, - "step": 3858 - }, - { - "epoch": 0.46401731497625204, - "grad_norm": 2.6461075067514503, - "learning_rate": 2.3287847802322678e-06, - "loss": 0.9877, - "step": 3859 - }, - { - "epoch": 0.4641375578668911, - "grad_norm": 1.851193419421871, - "learning_rate": 2.3280163815572723e-06, - "loss": 1.0652, - "step": 3860 - }, - { - "epoch": 0.4642578007575302, - "grad_norm": 1.9556761292954217, - "learning_rate": 2.3272479331224522e-06, - "loss": 0.9905, - "step": 3861 - }, - { - "epoch": 0.4643780436481693, - "grad_norm": 1.7629317275668992, - "learning_rate": 2.3264794350443817e-06, - "loss": 1.0116, - "step": 3862 - }, - { - "epoch": 0.46449828653880837, - "grad_norm": 2.238023886272741, - "learning_rate": 2.3257108874396396e-06, - "loss": 1.0186, - "step": 3863 - }, - { - "epoch": 0.4646185294294475, - "grad_norm": 1.954970319673847, - "learning_rate": 2.3249422904248152e-06, - "loss": 0.972, - "step": 3864 - }, - { - "epoch": 0.4647387723200866, - "grad_norm": 1.4018570844480849, - "learning_rate": 2.324173644116504e-06, - "loss": 1.1004, - "step": 3865 - }, - { - "epoch": 0.46485901521072565, - "grad_norm": 1.7206749973314688, - "learning_rate": 2.3234049486313087e-06, - "loss": 1.0468, - "step": 3866 - }, - { - "epoch": 0.46497925810136476, - "grad_norm": 1.9998174226394299, - "learning_rate": 2.322636204085839e-06, - "loss": 1.0008, - "step": 3867 - }, - { - "epoch": 0.46509950099200387, - "grad_norm": 2.2125647928696877, - "learning_rate": 2.3218674105967143e-06, - "loss": 1.0057, - "step": 3868 - }, - { - "epoch": 0.4652197438826429, - "grad_norm": 1.5822928506705713, - "learning_rate": 2.3210985682805593e-06, - "loss": 1.0654, - "step": 3869 - }, - { - "epoch": 0.46533998677328203, - "grad_norm": 2.7569426550700555, - "learning_rate": 2.320329677254007e-06, - "loss": 0.916, - "step": 3870 - }, - { - "epoch": 0.46546022966392114, - "grad_norm": 1.880187170978623, - "learning_rate": 2.319560737633697e-06, - "loss": 0.9577, - "step": 3871 - }, - { - "epoch": 0.4655804725545602, - "grad_norm": 1.4291925459465853, - "learning_rate": 2.3187917495362775e-06, - "loss": 0.912, - "step": 3872 - }, - { - "epoch": 0.4657007154451993, - "grad_norm": 5.616533986834635, - "learning_rate": 2.318022713078403e-06, - "loss": 0.984, - "step": 3873 - }, - { - "epoch": 0.4658209583358384, - "grad_norm": 1.94963806167926, - "learning_rate": 2.3172536283767354e-06, - "loss": 1.0727, - "step": 3874 - }, - { - "epoch": 0.4659412012264775, - "grad_norm": 2.119197102862027, - "learning_rate": 2.3164844955479447e-06, - "loss": 1.0425, - "step": 3875 - }, - { - "epoch": 0.4660614441171166, - "grad_norm": 1.7321004300546583, - "learning_rate": 2.3157153147087082e-06, - "loss": 0.9345, - "step": 3876 - }, - { - "epoch": 0.46618168700775564, - "grad_norm": 1.8750481281461497, - "learning_rate": 2.314946085975709e-06, - "loss": 1.0648, - "step": 3877 - }, - { - "epoch": 0.46630192989839475, - "grad_norm": 2.0910302332512676, - "learning_rate": 2.3141768094656393e-06, - "loss": 1.0537, - "step": 3878 - }, - { - "epoch": 0.46642217278903386, - "grad_norm": 2.138804420300538, - "learning_rate": 2.3134074852951966e-06, - "loss": 1.0526, - "step": 3879 - }, - { - "epoch": 0.4665424156796729, - "grad_norm": 1.6722351114206524, - "learning_rate": 2.312638113581088e-06, - "loss": 1.0106, - "step": 3880 - }, - { - "epoch": 0.46666265857031203, - "grad_norm": 2.893787036106727, - "learning_rate": 2.311868694440027e-06, - "loss": 1.0061, - "step": 3881 - }, - { - "epoch": 0.46678290146095114, - "grad_norm": 0.7380055311326544, - "learning_rate": 2.3110992279887323e-06, - "loss": 0.873, - "step": 3882 - }, - { - "epoch": 0.4669031443515902, - "grad_norm": 2.2143793174191213, - "learning_rate": 2.310329714343932e-06, - "loss": 1.0784, - "step": 3883 - }, - { - "epoch": 0.4670233872422293, - "grad_norm": 2.0302440793482237, - "learning_rate": 2.309560153622361e-06, - "loss": 1.0481, - "step": 3884 - }, - { - "epoch": 0.4671436301328684, - "grad_norm": 4.309041059693674, - "learning_rate": 2.3087905459407602e-06, - "loss": 0.9763, - "step": 3885 - }, - { - "epoch": 0.46726387302350747, - "grad_norm": 0.8540174811425563, - "learning_rate": 2.3080208914158795e-06, - "loss": 0.9129, - "step": 3886 - }, - { - "epoch": 0.4673841159141466, - "grad_norm": 2.081505685857612, - "learning_rate": 2.3072511901644753e-06, - "loss": 0.9358, - "step": 3887 - }, - { - "epoch": 0.4675043588047857, - "grad_norm": 2.2583402129158814, - "learning_rate": 2.306481442303309e-06, - "loss": 1.04, - "step": 3888 - }, - { - "epoch": 0.46762460169542475, - "grad_norm": 1.6989748724953544, - "learning_rate": 2.3057116479491515e-06, - "loss": 0.9584, - "step": 3889 - }, - { - "epoch": 0.46774484458606386, - "grad_norm": 2.662841696147684, - "learning_rate": 2.30494180721878e-06, - "loss": 0.9993, - "step": 3890 - }, - { - "epoch": 0.4678650874767029, - "grad_norm": 1.8047970164703842, - "learning_rate": 2.3041719202289794e-06, - "loss": 1.1271, - "step": 3891 - }, - { - "epoch": 0.467985330367342, - "grad_norm": 1.5890035822775752, - "learning_rate": 2.30340198709654e-06, - "loss": 1.0265, - "step": 3892 - }, - { - "epoch": 0.46810557325798113, - "grad_norm": 1.9527474000225113, - "learning_rate": 2.3026320079382605e-06, - "loss": 0.9801, - "step": 3893 - }, - { - "epoch": 0.4682258161486202, - "grad_norm": 2.691491113218957, - "learning_rate": 2.3018619828709454e-06, - "loss": 0.9968, - "step": 3894 - }, - { - "epoch": 0.4683460590392593, - "grad_norm": 1.792814692774684, - "learning_rate": 2.3010919120114084e-06, - "loss": 1.0482, - "step": 3895 - }, - { - "epoch": 0.4684663019298984, - "grad_norm": 2.4099345545766444, - "learning_rate": 2.3003217954764672e-06, - "loss": 0.8785, - "step": 3896 - }, - { - "epoch": 0.46858654482053747, - "grad_norm": 1.7322020106264413, - "learning_rate": 2.299551633382949e-06, - "loss": 1.0248, - "step": 3897 - }, - { - "epoch": 0.4687067877111766, - "grad_norm": 2.202553316438758, - "learning_rate": 2.2987814258476854e-06, - "loss": 1.0812, - "step": 3898 - }, - { - "epoch": 0.4688270306018157, - "grad_norm": 2.346848029782977, - "learning_rate": 2.2980111729875177e-06, - "loss": 0.9085, - "step": 3899 - }, - { - "epoch": 0.46894727349245474, - "grad_norm": 1.796826768310733, - "learning_rate": 2.2972408749192917e-06, - "loss": 1.0472, - "step": 3900 - }, - { - "epoch": 0.46906751638309385, - "grad_norm": 2.246625276782344, - "learning_rate": 2.296470531759861e-06, - "loss": 0.9007, - "step": 3901 - }, - { - "epoch": 0.46918775927373296, - "grad_norm": 1.9383218929360975, - "learning_rate": 2.2957001436260866e-06, - "loss": 1.0325, - "step": 3902 - }, - { - "epoch": 0.469308002164372, - "grad_norm": 15.984888570073181, - "learning_rate": 2.294929710634836e-06, - "loss": 0.9497, - "step": 3903 - }, - { - "epoch": 0.46942824505501113, - "grad_norm": 1.9270874298327096, - "learning_rate": 2.2941592329029823e-06, - "loss": 0.8359, - "step": 3904 - }, - { - "epoch": 0.46954848794565024, - "grad_norm": 1.8371001113375771, - "learning_rate": 2.2933887105474067e-06, - "loss": 1.0128, - "step": 3905 - }, - { - "epoch": 0.4696687308362893, - "grad_norm": 1.5206602947436643, - "learning_rate": 2.2926181436849974e-06, - "loss": 1.0376, - "step": 3906 - }, - { - "epoch": 0.4697889737269284, - "grad_norm": 1.657679894237619, - "learning_rate": 2.2918475324326478e-06, - "loss": 0.9517, - "step": 3907 - }, - { - "epoch": 0.46990921661756746, - "grad_norm": 2.2531300072475458, - "learning_rate": 2.2910768769072603e-06, - "loss": 1.1275, - "step": 3908 - }, - { - "epoch": 0.47002945950820657, - "grad_norm": 1.8604110267727392, - "learning_rate": 2.2903061772257417e-06, - "loss": 0.9858, - "step": 3909 - }, - { - "epoch": 0.4701497023988457, - "grad_norm": 1.4650328653965052, - "learning_rate": 2.289535433505007e-06, - "loss": 1.0165, - "step": 3910 - }, - { - "epoch": 0.47026994528948474, - "grad_norm": 1.5947003990218291, - "learning_rate": 2.2887646458619767e-06, - "loss": 0.8641, - "step": 3911 - }, - { - "epoch": 0.47039018818012385, - "grad_norm": 3.4031457317130656, - "learning_rate": 2.2879938144135797e-06, - "loss": 0.9976, - "step": 3912 - }, - { - "epoch": 0.47051043107076296, - "grad_norm": 1.5810404371028188, - "learning_rate": 2.2872229392767496e-06, - "loss": 0.9904, - "step": 3913 - }, - { - "epoch": 0.470630673961402, - "grad_norm": 1.552665353836437, - "learning_rate": 2.286452020568428e-06, - "loss": 0.9808, - "step": 3914 - }, - { - "epoch": 0.4707509168520411, - "grad_norm": 2.258747263718316, - "learning_rate": 2.2856810584055637e-06, - "loss": 0.9668, - "step": 3915 - }, - { - "epoch": 0.47087115974268023, - "grad_norm": 1.4112121770907542, - "learning_rate": 2.2849100529051085e-06, - "loss": 0.9059, - "step": 3916 - }, - { - "epoch": 0.4709914026333193, - "grad_norm": 3.1474032158549097, - "learning_rate": 2.284139004184026e-06, - "loss": 1.0293, - "step": 3917 - }, - { - "epoch": 0.4711116455239584, - "grad_norm": 2.3499522224601628, - "learning_rate": 2.2833679123592814e-06, - "loss": 0.9698, - "step": 3918 - }, - { - "epoch": 0.4712318884145975, - "grad_norm": 1.9483950906281713, - "learning_rate": 2.2825967775478508e-06, - "loss": 0.8664, - "step": 3919 - }, - { - "epoch": 0.47135213130523657, - "grad_norm": 2.1829873541318467, - "learning_rate": 2.2818255998667135e-06, - "loss": 1.0607, - "step": 3920 - }, - { - "epoch": 0.4714723741958757, - "grad_norm": 1.5958841284228402, - "learning_rate": 2.2810543794328566e-06, - "loss": 1.0193, - "step": 3921 - }, - { - "epoch": 0.4715926170865148, - "grad_norm": 1.6912286325981258, - "learning_rate": 2.2802831163632735e-06, - "loss": 1.0474, - "step": 3922 - }, - { - "epoch": 0.47171285997715384, - "grad_norm": 4.338851691713551, - "learning_rate": 2.279511810774965e-06, - "loss": 0.9676, - "step": 3923 - }, - { - "epoch": 0.47183310286779295, - "grad_norm": 2.341745969395627, - "learning_rate": 2.2787404627849364e-06, - "loss": 0.9481, - "step": 3924 - }, - { - "epoch": 0.471953345758432, - "grad_norm": 1.6985742508652704, - "learning_rate": 2.277969072510202e-06, - "loss": 1.0202, - "step": 3925 - }, - { - "epoch": 0.4720735886490711, - "grad_norm": 1.7688005635613036, - "learning_rate": 2.2771976400677803e-06, - "loss": 1.0382, - "step": 3926 - }, - { - "epoch": 0.47219383153971023, - "grad_norm": 1.8620196856022528, - "learning_rate": 2.2764261655746965e-06, - "loss": 1.0159, - "step": 3927 - }, - { - "epoch": 0.4723140744303493, - "grad_norm": 1.474957759704941, - "learning_rate": 2.2756546491479832e-06, - "loss": 0.9856, - "step": 3928 - }, - { - "epoch": 0.4724343173209884, - "grad_norm": 3.5546701954850093, - "learning_rate": 2.274883090904679e-06, - "loss": 1.0415, - "step": 3929 - }, - { - "epoch": 0.4725545602116275, - "grad_norm": 2.4514263127405798, - "learning_rate": 2.2741114909618283e-06, - "loss": 0.9054, - "step": 3930 - }, - { - "epoch": 0.47267480310226656, - "grad_norm": 1.67452963302604, - "learning_rate": 2.2733398494364828e-06, - "loss": 0.9465, - "step": 3931 - }, - { - "epoch": 0.47279504599290567, - "grad_norm": 2.1351326978482286, - "learning_rate": 2.272568166445699e-06, - "loss": 1.0725, - "step": 3932 - }, - { - "epoch": 0.4729152888835448, - "grad_norm": 1.9212939616865334, - "learning_rate": 2.271796442106541e-06, - "loss": 0.8758, - "step": 3933 - }, - { - "epoch": 0.47303553177418384, - "grad_norm": 0.7987842128035049, - "learning_rate": 2.271024676536079e-06, - "loss": 0.8423, - "step": 3934 - }, - { - "epoch": 0.47315577466482295, - "grad_norm": 3.634335828128861, - "learning_rate": 2.2702528698513894e-06, - "loss": 0.9627, - "step": 3935 - }, - { - "epoch": 0.47327601755546206, - "grad_norm": 1.8443359478416552, - "learning_rate": 2.269481022169554e-06, - "loss": 1.0171, - "step": 3936 - }, - { - "epoch": 0.4733962604461011, - "grad_norm": 2.3500263861940156, - "learning_rate": 2.2687091336076614e-06, - "loss": 1.0382, - "step": 3937 - }, - { - "epoch": 0.4735165033367402, - "grad_norm": 2.409440538675238, - "learning_rate": 2.267937204282807e-06, - "loss": 1.0286, - "step": 3938 - }, - { - "epoch": 0.4736367462273793, - "grad_norm": 2.2724428917955604, - "learning_rate": 2.2671652343120926e-06, - "loss": 1.0163, - "step": 3939 - }, - { - "epoch": 0.4737569891180184, - "grad_norm": 1.5850400593505811, - "learning_rate": 2.2663932238126236e-06, - "loss": 1.0329, - "step": 3940 - }, - { - "epoch": 0.4738772320086575, - "grad_norm": 1.8407720018113563, - "learning_rate": 2.265621172901515e-06, - "loss": 1.033, - "step": 3941 - }, - { - "epoch": 0.47399747489929656, - "grad_norm": 2.2753954449385647, - "learning_rate": 2.2648490816958854e-06, - "loss": 0.9462, - "step": 3942 - }, - { - "epoch": 0.47411771778993567, - "grad_norm": 2.048149585640339, - "learning_rate": 2.264076950312861e-06, - "loss": 0.9532, - "step": 3943 - }, - { - "epoch": 0.4742379606805748, - "grad_norm": 1.82269798783685, - "learning_rate": 2.2633047788695727e-06, - "loss": 1.0497, - "step": 3944 - }, - { - "epoch": 0.47435820357121383, - "grad_norm": 2.034593852235548, - "learning_rate": 2.262532567483159e-06, - "loss": 0.8718, - "step": 3945 - }, - { - "epoch": 0.47447844646185294, - "grad_norm": 1.7922087743771902, - "learning_rate": 2.2617603162707635e-06, - "loss": 1.0351, - "step": 3946 - }, - { - "epoch": 0.47459868935249205, - "grad_norm": 1.546262986075439, - "learning_rate": 2.2609880253495363e-06, - "loss": 1.0444, - "step": 3947 - }, - { - "epoch": 0.4747189322431311, - "grad_norm": 1.9970467936802787, - "learning_rate": 2.260215694836633e-06, - "loss": 1.0884, - "step": 3948 - }, - { - "epoch": 0.4748391751337702, - "grad_norm": 1.7470622608901245, - "learning_rate": 2.2594433248492157e-06, - "loss": 0.8742, - "step": 3949 - }, - { - "epoch": 0.47495941802440933, - "grad_norm": 1.5889517416374883, - "learning_rate": 2.2586709155044527e-06, - "loss": 1.0227, - "step": 3950 - }, - { - "epoch": 0.4750796609150484, - "grad_norm": 1.6463846015792114, - "learning_rate": 2.2578984669195167e-06, - "loss": 0.9845, - "step": 3951 - }, - { - "epoch": 0.4751999038056875, - "grad_norm": 1.9187704923945779, - "learning_rate": 2.2571259792115887e-06, - "loss": 0.898, - "step": 3952 - }, - { - "epoch": 0.4753201466963266, - "grad_norm": 1.7286180368893258, - "learning_rate": 2.2563534524978544e-06, - "loss": 1.0209, - "step": 3953 - }, - { - "epoch": 0.47544038958696566, - "grad_norm": 1.6572331083906888, - "learning_rate": 2.2555808868955052e-06, - "loss": 0.945, - "step": 3954 - }, - { - "epoch": 0.47556063247760477, - "grad_norm": 2.4080483788291076, - "learning_rate": 2.254808282521738e-06, - "loss": 0.9629, - "step": 3955 - }, - { - "epoch": 0.4756808753682438, - "grad_norm": 1.692459729841094, - "learning_rate": 2.2540356394937573e-06, - "loss": 1.0362, - "step": 3956 - }, - { - "epoch": 0.47580111825888294, - "grad_norm": 2.2186510305763036, - "learning_rate": 2.253262957928772e-06, - "loss": 1.0653, - "step": 3957 - }, - { - "epoch": 0.47592136114952205, - "grad_norm": 1.781974645081032, - "learning_rate": 2.2524902379439976e-06, - "loss": 0.9494, - "step": 3958 - }, - { - "epoch": 0.4760416040401611, - "grad_norm": 0.7570777162320058, - "learning_rate": 2.251717479656655e-06, - "loss": 0.8877, - "step": 3959 - }, - { - "epoch": 0.4761618469308002, - "grad_norm": 2.1330876993784726, - "learning_rate": 2.2509446831839704e-06, - "loss": 0.9859, - "step": 3960 - }, - { - "epoch": 0.4762820898214393, - "grad_norm": 2.4280938812971837, - "learning_rate": 2.250171848643177e-06, - "loss": 1.0439, - "step": 3961 - }, - { - "epoch": 0.4764023327120784, - "grad_norm": 2.8886078265803494, - "learning_rate": 2.249398976151513e-06, - "loss": 1.0962, - "step": 3962 - }, - { - "epoch": 0.4765225756027175, - "grad_norm": 2.1047408450473877, - "learning_rate": 2.248626065826223e-06, - "loss": 1.0217, - "step": 3963 - }, - { - "epoch": 0.4766428184933566, - "grad_norm": 0.7609363939719808, - "learning_rate": 2.2478531177845564e-06, - "loss": 0.8735, - "step": 3964 - }, - { - "epoch": 0.47676306138399566, - "grad_norm": 1.7393209369905365, - "learning_rate": 2.247080132143769e-06, - "loss": 1.0854, - "step": 3965 - }, - { - "epoch": 0.47688330427463477, - "grad_norm": 2.306583361839486, - "learning_rate": 2.246307109021121e-06, - "loss": 0.9164, - "step": 3966 - }, - { - "epoch": 0.4770035471652739, - "grad_norm": 1.614575105051383, - "learning_rate": 2.2455340485338817e-06, - "loss": 1.051, - "step": 3967 - }, - { - "epoch": 0.47712379005591293, - "grad_norm": 2.4113895963974827, - "learning_rate": 2.244760950799322e-06, - "loss": 0.913, - "step": 3968 - }, - { - "epoch": 0.47724403294655204, - "grad_norm": 2.1987383302484096, - "learning_rate": 2.2439878159347203e-06, - "loss": 0.964, - "step": 3969 - }, - { - "epoch": 0.4773642758371911, - "grad_norm": 0.8262709970735137, - "learning_rate": 2.2432146440573616e-06, - "loss": 0.8569, - "step": 3970 - }, - { - "epoch": 0.4774845187278302, - "grad_norm": 1.7085553311885062, - "learning_rate": 2.242441435284534e-06, - "loss": 0.8932, - "step": 3971 - }, - { - "epoch": 0.4776047616184693, - "grad_norm": 2.016191742936943, - "learning_rate": 2.2416681897335337e-06, - "loss": 1.0797, - "step": 3972 - }, - { - "epoch": 0.4777250045091084, - "grad_norm": 3.608790230548263, - "learning_rate": 2.240894907521661e-06, - "loss": 0.893, - "step": 3973 - }, - { - "epoch": 0.4778452473997475, - "grad_norm": 1.5826855472064119, - "learning_rate": 2.240121588766223e-06, - "loss": 0.8696, - "step": 3974 - }, - { - "epoch": 0.4779654902903866, - "grad_norm": 1.774082016305851, - "learning_rate": 2.239348233584531e-06, - "loss": 0.9319, - "step": 3975 - }, - { - "epoch": 0.47808573318102565, - "grad_norm": 1.7603202733241197, - "learning_rate": 2.2385748420939013e-06, - "loss": 1.0296, - "step": 3976 - }, - { - "epoch": 0.47820597607166476, - "grad_norm": 1.7469873381987233, - "learning_rate": 2.2378014144116583e-06, - "loss": 0.9536, - "step": 3977 - }, - { - "epoch": 0.4783262189623039, - "grad_norm": 1.6398274254133998, - "learning_rate": 2.23702795065513e-06, - "loss": 1.0231, - "step": 3978 - }, - { - "epoch": 0.47844646185294293, - "grad_norm": 0.9907837797649087, - "learning_rate": 2.2362544509416493e-06, - "loss": 0.9381, - "step": 3979 - }, - { - "epoch": 0.47856670474358204, - "grad_norm": 2.4153919694826875, - "learning_rate": 2.2354809153885572e-06, - "loss": 1.0555, - "step": 3980 - }, - { - "epoch": 0.47868694763422115, - "grad_norm": 1.9835551813679746, - "learning_rate": 2.234707344113197e-06, - "loss": 1.0523, - "step": 3981 - }, - { - "epoch": 0.4788071905248602, - "grad_norm": 1.743935499271549, - "learning_rate": 2.233933737232919e-06, - "loss": 1.0103, - "step": 3982 - }, - { - "epoch": 0.4789274334154993, - "grad_norm": 1.683226578540875, - "learning_rate": 2.2331600948650793e-06, - "loss": 1.0112, - "step": 3983 - }, - { - "epoch": 0.4790476763061384, - "grad_norm": 2.0364460143930536, - "learning_rate": 2.2323864171270386e-06, - "loss": 1.0316, - "step": 3984 - }, - { - "epoch": 0.4791679191967775, - "grad_norm": 3.8252405627030606, - "learning_rate": 2.231612704136164e-06, - "loss": 0.9564, - "step": 3985 - }, - { - "epoch": 0.4792881620874166, - "grad_norm": 2.1007236096868276, - "learning_rate": 2.2308389560098253e-06, - "loss": 0.9802, - "step": 3986 - }, - { - "epoch": 0.47940840497805565, - "grad_norm": 5.566618519801327, - "learning_rate": 2.2300651728654008e-06, - "loss": 0.9918, - "step": 3987 - }, - { - "epoch": 0.47952864786869476, - "grad_norm": 0.7689238731567766, - "learning_rate": 2.229291354820272e-06, - "loss": 0.8662, - "step": 3988 - }, - { - "epoch": 0.47964889075933387, - "grad_norm": 1.8919602400168687, - "learning_rate": 2.228517501991828e-06, - "loss": 1.002, - "step": 3989 - }, - { - "epoch": 0.4797691336499729, - "grad_norm": 0.8535519729661175, - "learning_rate": 2.22774361449746e-06, - "loss": 0.872, - "step": 3990 - }, - { - "epoch": 0.47988937654061203, - "grad_norm": 2.0859579592497948, - "learning_rate": 2.2269696924545668e-06, - "loss": 0.9307, - "step": 3991 - }, - { - "epoch": 0.48000961943125114, - "grad_norm": 2.613219119304163, - "learning_rate": 2.2261957359805523e-06, - "loss": 1.0059, - "step": 3992 - }, - { - "epoch": 0.4801298623218902, - "grad_norm": 1.9058586094020675, - "learning_rate": 2.225421745192823e-06, - "loss": 0.974, - "step": 3993 - }, - { - "epoch": 0.4802501052125293, - "grad_norm": 2.2353811066267903, - "learning_rate": 2.2246477202087955e-06, - "loss": 1.0098, - "step": 3994 - }, - { - "epoch": 0.4803703481031684, - "grad_norm": 1.5884801467072713, - "learning_rate": 2.223873661145887e-06, - "loss": 1.0572, - "step": 3995 - }, - { - "epoch": 0.4804905909938075, - "grad_norm": 1.7300216062662899, - "learning_rate": 2.2230995681215226e-06, - "loss": 0.9371, - "step": 3996 - }, - { - "epoch": 0.4806108338844466, - "grad_norm": 1.7807806634797392, - "learning_rate": 2.2223254412531305e-06, - "loss": 1.014, - "step": 3997 - }, - { - "epoch": 0.4807310767750857, - "grad_norm": 2.464606368880806, - "learning_rate": 2.221551280658146e-06, - "loss": 1.0501, - "step": 3998 - }, - { - "epoch": 0.48085131966572475, - "grad_norm": 3.34066373340501, - "learning_rate": 2.2207770864540085e-06, - "loss": 0.9645, - "step": 3999 - }, - { - "epoch": 0.48097156255636386, - "grad_norm": 2.0595945064738865, - "learning_rate": 2.220002858758162e-06, - "loss": 0.9517, - "step": 4000 - }, - { - "epoch": 0.481091805447003, - "grad_norm": 0.8399119504694054, - "learning_rate": 2.2192285976880573e-06, - "loss": 0.8583, - "step": 4001 - }, - { - "epoch": 0.48121204833764203, - "grad_norm": 1.4163822935827415, - "learning_rate": 2.2184543033611485e-06, - "loss": 1.033, - "step": 4002 - }, - { - "epoch": 0.48133229122828114, - "grad_norm": 2.2026396954126692, - "learning_rate": 2.2176799758948957e-06, - "loss": 1.0498, - "step": 4003 - }, - { - "epoch": 0.4814525341189202, - "grad_norm": 2.356567329064887, - "learning_rate": 2.2169056154067635e-06, - "loss": 0.959, - "step": 4004 - }, - { - "epoch": 0.4815727770095593, - "grad_norm": 1.5911018960716694, - "learning_rate": 2.216131222014222e-06, - "loss": 1.053, - "step": 4005 - }, - { - "epoch": 0.4816930199001984, - "grad_norm": 1.8109678830353728, - "learning_rate": 2.2153567958347455e-06, - "loss": 1.0305, - "step": 4006 - }, - { - "epoch": 0.48181326279083747, - "grad_norm": 2.1519841553200467, - "learning_rate": 2.214582336985815e-06, - "loss": 1.0261, - "step": 4007 - }, - { - "epoch": 0.4819335056814766, - "grad_norm": 2.7592909526877505, - "learning_rate": 2.2138078455849142e-06, - "loss": 0.8789, - "step": 4008 - }, - { - "epoch": 0.4820537485721157, - "grad_norm": 1.8973033641307844, - "learning_rate": 2.2130333217495334e-06, - "loss": 1.022, - "step": 4009 - }, - { - "epoch": 0.48217399146275475, - "grad_norm": 2.2506412016315362, - "learning_rate": 2.2122587655971665e-06, - "loss": 0.9023, - "step": 4010 - }, - { - "epoch": 0.48229423435339386, - "grad_norm": 1.628711569882357, - "learning_rate": 2.211484177245314e-06, - "loss": 0.8702, - "step": 4011 - }, - { - "epoch": 0.48241447724403297, - "grad_norm": 2.345868793262453, - "learning_rate": 2.21070955681148e-06, - "loss": 0.9602, - "step": 4012 - }, - { - "epoch": 0.482534720134672, - "grad_norm": 1.522444807533621, - "learning_rate": 2.209934904413174e-06, - "loss": 1.0016, - "step": 4013 - }, - { - "epoch": 0.48265496302531113, - "grad_norm": 4.533283908424969, - "learning_rate": 2.2091602201679095e-06, - "loss": 0.945, - "step": 4014 - }, - { - "epoch": 0.48277520591595025, - "grad_norm": 2.562293439773983, - "learning_rate": 2.208385504193206e-06, - "loss": 1.0616, - "step": 4015 - }, - { - "epoch": 0.4828954488065893, - "grad_norm": 2.194441109935089, - "learning_rate": 2.2076107566065873e-06, - "loss": 1.0369, - "step": 4016 - }, - { - "epoch": 0.4830156916972284, - "grad_norm": 1.979178224496389, - "learning_rate": 2.2068359775255816e-06, - "loss": 0.9752, - "step": 4017 - }, - { - "epoch": 0.48313593458786747, - "grad_norm": 2.329330234778093, - "learning_rate": 2.206061167067723e-06, - "loss": 1.0027, - "step": 4018 - }, - { - "epoch": 0.4832561774785066, - "grad_norm": 2.041400489842799, - "learning_rate": 2.205286325350549e-06, - "loss": 1.0308, - "step": 4019 - }, - { - "epoch": 0.4833764203691457, - "grad_norm": 1.9120213426970942, - "learning_rate": 2.204511452491603e-06, - "loss": 0.9536, - "step": 4020 - }, - { - "epoch": 0.48349666325978474, - "grad_norm": 1.5628667019646114, - "learning_rate": 2.2037365486084316e-06, - "loss": 0.9783, - "step": 4021 - }, - { - "epoch": 0.48361690615042385, - "grad_norm": 2.216268521837702, - "learning_rate": 2.2029616138185886e-06, - "loss": 1.0095, - "step": 4022 - }, - { - "epoch": 0.48373714904106296, - "grad_norm": 1.6684806806424959, - "learning_rate": 2.202186648239629e-06, - "loss": 1.0537, - "step": 4023 - }, - { - "epoch": 0.483857391931702, - "grad_norm": 1.9913832293886287, - "learning_rate": 2.201411651989117e-06, - "loss": 0.9439, - "step": 4024 - }, - { - "epoch": 0.48397763482234113, - "grad_norm": 2.449058515305251, - "learning_rate": 2.2006366251846167e-06, - "loss": 1.0125, - "step": 4025 - }, - { - "epoch": 0.48409787771298024, - "grad_norm": 1.6817410003797846, - "learning_rate": 2.1998615679436997e-06, - "loss": 0.9799, - "step": 4026 - }, - { - "epoch": 0.4842181206036193, - "grad_norm": 3.636627097772659, - "learning_rate": 2.199086480383942e-06, - "loss": 0.9948, - "step": 4027 - }, - { - "epoch": 0.4843383634942584, - "grad_norm": 2.6955644130465166, - "learning_rate": 2.1983113626229234e-06, - "loss": 0.9005, - "step": 4028 - }, - { - "epoch": 0.4844586063848975, - "grad_norm": 1.879185582451304, - "learning_rate": 2.1975362147782293e-06, - "loss": 1.0085, - "step": 4029 - }, - { - "epoch": 0.48457884927553657, - "grad_norm": 0.8078705506707635, - "learning_rate": 2.196761036967448e-06, - "loss": 0.8008, - "step": 4030 - }, - { - "epoch": 0.4846990921661757, - "grad_norm": 1.6351239940744324, - "learning_rate": 2.1959858293081743e-06, - "loss": 1.0127, - "step": 4031 - }, - { - "epoch": 0.4848193350568148, - "grad_norm": 1.6243336118127334, - "learning_rate": 2.1952105919180056e-06, - "loss": 0.9934, - "step": 4032 - }, - { - "epoch": 0.48493957794745385, - "grad_norm": 2.694330893633146, - "learning_rate": 2.1944353249145456e-06, - "loss": 0.909, - "step": 4033 - }, - { - "epoch": 0.48505982083809296, - "grad_norm": 2.0243416316411267, - "learning_rate": 2.193660028415401e-06, - "loss": 0.9754, - "step": 4034 - }, - { - "epoch": 0.485180063728732, - "grad_norm": 1.7029661751981506, - "learning_rate": 2.1928847025381852e-06, - "loss": 1.0462, - "step": 4035 - }, - { - "epoch": 0.4853003066193711, - "grad_norm": 6.91934688374614, - "learning_rate": 2.192109347400512e-06, - "loss": 1.0705, - "step": 4036 - }, - { - "epoch": 0.48542054951001024, - "grad_norm": 2.0101710615861474, - "learning_rate": 2.191333963120004e-06, - "loss": 1.0191, - "step": 4037 - }, - { - "epoch": 0.4855407924006493, - "grad_norm": 2.1586827362307512, - "learning_rate": 2.190558549814286e-06, - "loss": 0.9363, - "step": 4038 - }, - { - "epoch": 0.4856610352912884, - "grad_norm": 6.27053724162324, - "learning_rate": 2.1897831076009872e-06, - "loss": 1.0174, - "step": 4039 - }, - { - "epoch": 0.4857812781819275, - "grad_norm": 2.643431486958782, - "learning_rate": 2.1890076365977426e-06, - "loss": 1.0266, - "step": 4040 - }, - { - "epoch": 0.48590152107256657, - "grad_norm": 0.8897702649781232, - "learning_rate": 2.188232136922189e-06, - "loss": 0.796, - "step": 4041 - }, - { - "epoch": 0.4860217639632057, - "grad_norm": 1.7467500618082497, - "learning_rate": 2.187456608691971e-06, - "loss": 0.9879, - "step": 4042 - }, - { - "epoch": 0.4861420068538448, - "grad_norm": 1.9532477378426867, - "learning_rate": 2.1866810520247334e-06, - "loss": 1.1045, - "step": 4043 - }, - { - "epoch": 0.48626224974448384, - "grad_norm": 1.961858160913393, - "learning_rate": 2.185905467038129e-06, - "loss": 0.8829, - "step": 4044 - }, - { - "epoch": 0.48638249263512295, - "grad_norm": 1.720966574411336, - "learning_rate": 2.1851298538498127e-06, - "loss": 1.0045, - "step": 4045 - }, - { - "epoch": 0.48650273552576206, - "grad_norm": 1.7822622802499655, - "learning_rate": 2.184354212577446e-06, - "loss": 1.022, - "step": 4046 - }, - { - "epoch": 0.4866229784164011, - "grad_norm": 3.7223739276798486, - "learning_rate": 2.1835785433386907e-06, - "loss": 0.8635, - "step": 4047 - }, - { - "epoch": 0.48674322130704023, - "grad_norm": 2.5660875839663824, - "learning_rate": 2.182802846251216e-06, - "loss": 0.8757, - "step": 4048 - }, - { - "epoch": 0.4868634641976793, - "grad_norm": 1.9057249377936056, - "learning_rate": 2.182027121432696e-06, - "loss": 0.9582, - "step": 4049 - }, - { - "epoch": 0.4869837070883184, - "grad_norm": 1.9767609749158024, - "learning_rate": 2.1812513690008054e-06, - "loss": 1.0499, - "step": 4050 - }, - { - "epoch": 0.4871039499789575, - "grad_norm": 1.9601482611653416, - "learning_rate": 2.180475589073227e-06, - "loss": 1.0295, - "step": 4051 - }, - { - "epoch": 0.48722419286959656, - "grad_norm": 1.6272570267826096, - "learning_rate": 2.1796997817676456e-06, - "loss": 0.9653, - "step": 4052 - }, - { - "epoch": 0.4873444357602357, - "grad_norm": 1.5581195463658146, - "learning_rate": 2.1789239472017494e-06, - "loss": 0.903, - "step": 4053 - }, - { - "epoch": 0.4874646786508748, - "grad_norm": 1.9641213880889432, - "learning_rate": 2.1781480854932326e-06, - "loss": 0.9551, - "step": 4054 - }, - { - "epoch": 0.48758492154151384, - "grad_norm": 1.7061525086437803, - "learning_rate": 2.1773721967597933e-06, - "loss": 1.021, - "step": 4055 - }, - { - "epoch": 0.48770516443215295, - "grad_norm": 0.8955133036504388, - "learning_rate": 2.1765962811191322e-06, - "loss": 0.8624, - "step": 4056 - }, - { - "epoch": 0.48782540732279206, - "grad_norm": 0.9095265252417082, - "learning_rate": 2.1758203386889566e-06, - "loss": 0.9161, - "step": 4057 - }, - { - "epoch": 0.4879456502134311, - "grad_norm": 2.123152209970274, - "learning_rate": 2.1750443695869746e-06, - "loss": 1.0678, - "step": 4058 - }, - { - "epoch": 0.4880658931040702, - "grad_norm": 1.986806026196312, - "learning_rate": 2.174268373930901e-06, - "loss": 1.0862, - "step": 4059 - }, - { - "epoch": 0.48818613599470934, - "grad_norm": 1.8436582510151536, - "learning_rate": 2.1734923518384537e-06, - "loss": 1.0261, - "step": 4060 - }, - { - "epoch": 0.4883063788853484, - "grad_norm": 1.9167916284819362, - "learning_rate": 2.1727163034273547e-06, - "loss": 1.0454, - "step": 4061 - }, - { - "epoch": 0.4884266217759875, - "grad_norm": 1.9521836720846306, - "learning_rate": 2.17194022881533e-06, - "loss": 1.008, - "step": 4062 - }, - { - "epoch": 0.4885468646666266, - "grad_norm": 1.598391860205933, - "learning_rate": 2.1711641281201092e-06, - "loss": 0.9017, - "step": 4063 - }, - { - "epoch": 0.48866710755726567, - "grad_norm": 1.9714706030406755, - "learning_rate": 2.1703880014594264e-06, - "loss": 1.0152, - "step": 4064 - }, - { - "epoch": 0.4887873504479048, - "grad_norm": 1.6544570575074193, - "learning_rate": 2.1696118489510182e-06, - "loss": 0.962, - "step": 4065 - }, - { - "epoch": 0.48890759333854383, - "grad_norm": 2.0575163770837337, - "learning_rate": 2.1688356707126286e-06, - "loss": 0.9557, - "step": 4066 - }, - { - "epoch": 0.48902783622918294, - "grad_norm": 1.9015254898669887, - "learning_rate": 2.168059466862001e-06, - "loss": 0.9283, - "step": 4067 - }, - { - "epoch": 0.48914807911982205, - "grad_norm": 1.7959936219422865, - "learning_rate": 2.167283237516887e-06, - "loss": 1.044, - "step": 4068 - }, - { - "epoch": 0.4892683220104611, - "grad_norm": 1.9199390119561994, - "learning_rate": 2.1665069827950383e-06, - "loss": 0.9799, - "step": 4069 - }, - { - "epoch": 0.4893885649011002, - "grad_norm": 1.9249634776428328, - "learning_rate": 2.1657307028142126e-06, - "loss": 1.095, - "step": 4070 - }, - { - "epoch": 0.48950880779173933, - "grad_norm": 1.83877785293987, - "learning_rate": 2.164954397692171e-06, - "loss": 0.9003, - "step": 4071 - }, - { - "epoch": 0.4896290506823784, - "grad_norm": 1.1354047908803506, - "learning_rate": 2.164178067546678e-06, - "loss": 1.0594, - "step": 4072 - }, - { - "epoch": 0.4897492935730175, - "grad_norm": 1.9498343250613315, - "learning_rate": 2.163401712495504e-06, - "loss": 1.1373, - "step": 4073 - }, - { - "epoch": 0.4898695364636566, - "grad_norm": 2.4570691904798734, - "learning_rate": 2.1626253326564194e-06, - "loss": 1.019, - "step": 4074 - }, - { - "epoch": 0.48998977935429566, - "grad_norm": 1.7556628837585735, - "learning_rate": 2.161848928147201e-06, - "loss": 1.0051, - "step": 4075 - }, - { - "epoch": 0.4901100222449348, - "grad_norm": 3.6070216049253765, - "learning_rate": 2.161072499085629e-06, - "loss": 1.0403, - "step": 4076 - }, - { - "epoch": 0.4902302651355739, - "grad_norm": 1.7879869984745043, - "learning_rate": 2.160296045589487e-06, - "loss": 1.0498, - "step": 4077 - }, - { - "epoch": 0.49035050802621294, - "grad_norm": 1.7008434727597204, - "learning_rate": 2.159519567776562e-06, - "loss": 0.9226, - "step": 4078 - }, - { - "epoch": 0.49047075091685205, - "grad_norm": 2.685731882831933, - "learning_rate": 2.1587430657646463e-06, - "loss": 0.9368, - "step": 4079 - }, - { - "epoch": 0.4905909938074911, - "grad_norm": 1.6415458637979843, - "learning_rate": 2.157966539671533e-06, - "loss": 1.0076, - "step": 4080 - }, - { - "epoch": 0.4907112366981302, - "grad_norm": 1.973811591203882, - "learning_rate": 2.157189989615021e-06, - "loss": 0.8953, - "step": 4081 - }, - { - "epoch": 0.4908314795887693, - "grad_norm": 1.6734303877649983, - "learning_rate": 2.156413415712913e-06, - "loss": 0.9745, - "step": 4082 - }, - { - "epoch": 0.4909517224794084, - "grad_norm": 1.7939495653528794, - "learning_rate": 2.155636818083014e-06, - "loss": 1.0054, - "step": 4083 - }, - { - "epoch": 0.4910719653700475, - "grad_norm": 2.145038148767515, - "learning_rate": 2.154860196843134e-06, - "loss": 1.0722, - "step": 4084 - }, - { - "epoch": 0.4911922082606866, - "grad_norm": 1.8072694745932016, - "learning_rate": 2.154083552111085e-06, - "loss": 0.9933, - "step": 4085 - }, - { - "epoch": 0.49131245115132566, - "grad_norm": 1.7138121680315759, - "learning_rate": 2.1533068840046834e-06, - "loss": 1.0507, - "step": 4086 - }, - { - "epoch": 0.49143269404196477, - "grad_norm": 2.8985520003495657, - "learning_rate": 2.152530192641749e-06, - "loss": 0.8404, - "step": 4087 - }, - { - "epoch": 0.4915529369326039, - "grad_norm": 3.2080614709648656, - "learning_rate": 2.1517534781401068e-06, - "loss": 0.9461, - "step": 4088 - }, - { - "epoch": 0.49167317982324293, - "grad_norm": 2.0423447657114133, - "learning_rate": 2.150976740617581e-06, - "loss": 0.9197, - "step": 4089 - }, - { - "epoch": 0.49179342271388204, - "grad_norm": 1.8003277983476498, - "learning_rate": 2.150199980192006e-06, - "loss": 0.9551, - "step": 4090 - }, - { - "epoch": 0.49191366560452116, - "grad_norm": 1.8165476754208865, - "learning_rate": 2.1494231969812114e-06, - "loss": 1.0395, - "step": 4091 - }, - { - "epoch": 0.4920339084951602, - "grad_norm": 3.328793167596437, - "learning_rate": 2.1486463911030372e-06, - "loss": 1.0372, - "step": 4092 - }, - { - "epoch": 0.4921541513857993, - "grad_norm": 2.0003088474225437, - "learning_rate": 2.147869562675324e-06, - "loss": 0.9728, - "step": 4093 - }, - { - "epoch": 0.49227439427643843, - "grad_norm": 1.617659126838507, - "learning_rate": 2.147092711815915e-06, - "loss": 0.9493, - "step": 4094 - }, - { - "epoch": 0.4923946371670775, - "grad_norm": 2.456417425530711, - "learning_rate": 2.1463158386426593e-06, - "loss": 1.0919, - "step": 4095 - }, - { - "epoch": 0.4925148800577166, - "grad_norm": 2.011199943531251, - "learning_rate": 2.145538943273407e-06, - "loss": 1.0119, - "step": 4096 - }, - { - "epoch": 0.49263512294835565, - "grad_norm": 1.649079256650315, - "learning_rate": 2.144762025826013e-06, - "loss": 0.9488, - "step": 4097 - }, - { - "epoch": 0.49275536583899476, - "grad_norm": 2.5820612595469106, - "learning_rate": 2.143985086418334e-06, - "loss": 1.0987, - "step": 4098 - }, - { - "epoch": 0.4928756087296339, - "grad_norm": 1.3799224221496758, - "learning_rate": 2.1432081251682324e-06, - "loss": 0.9993, - "step": 4099 - }, - { - "epoch": 0.49299585162027293, - "grad_norm": 1.583424348055702, - "learning_rate": 2.142431142193572e-06, - "loss": 1.0948, - "step": 4100 - }, - { - "epoch": 0.49311609451091204, - "grad_norm": 2.123179385095368, - "learning_rate": 2.1416541376122207e-06, - "loss": 0.946, - "step": 4101 - }, - { - "epoch": 0.49323633740155115, - "grad_norm": 1.6182931262651676, - "learning_rate": 2.1408771115420496e-06, - "loss": 0.955, - "step": 4102 - }, - { - "epoch": 0.4933565802921902, - "grad_norm": 2.2395842323005706, - "learning_rate": 2.140100064100932e-06, - "loss": 0.8786, - "step": 4103 - }, - { - "epoch": 0.4934768231828293, - "grad_norm": 1.9272924223870078, - "learning_rate": 2.139322995406746e-06, - "loss": 0.9824, - "step": 4104 - }, - { - "epoch": 0.4935970660734684, - "grad_norm": 1.6130675389055478, - "learning_rate": 2.1385459055773727e-06, - "loss": 1.0351, - "step": 4105 - }, - { - "epoch": 0.4937173089641075, - "grad_norm": 1.8754865015205693, - "learning_rate": 2.137768794730696e-06, - "loss": 0.9695, - "step": 4106 - }, - { - "epoch": 0.4938375518547466, - "grad_norm": 2.0379378333975176, - "learning_rate": 2.1369916629846026e-06, - "loss": 1.0311, - "step": 4107 - }, - { - "epoch": 0.4939577947453857, - "grad_norm": 1.7258553416078215, - "learning_rate": 2.136214510456983e-06, - "loss": 0.9796, - "step": 4108 - }, - { - "epoch": 0.49407803763602476, - "grad_norm": 0.9753601912767962, - "learning_rate": 2.1354373372657296e-06, - "loss": 0.937, - "step": 4109 - }, - { - "epoch": 0.49419828052666387, - "grad_norm": 1.4417490151026422, - "learning_rate": 2.1346601435287404e-06, - "loss": 0.9359, - "step": 4110 - }, - { - "epoch": 0.494318523417303, - "grad_norm": 1.6378277865509663, - "learning_rate": 2.1338829293639144e-06, - "loss": 1.0313, - "step": 4111 - }, - { - "epoch": 0.49443876630794203, - "grad_norm": 5.475168178535884, - "learning_rate": 2.1331056948891547e-06, - "loss": 1.0605, - "step": 4112 - }, - { - "epoch": 0.49455900919858115, - "grad_norm": 1.9868495260587373, - "learning_rate": 2.1323284402223666e-06, - "loss": 0.9928, - "step": 4113 - }, - { - "epoch": 0.4946792520892202, - "grad_norm": 2.1403603494592, - "learning_rate": 2.1315511654814597e-06, - "loss": 1.1054, - "step": 4114 - }, - { - "epoch": 0.4947994949798593, - "grad_norm": 1.818089187494913, - "learning_rate": 2.1307738707843456e-06, - "loss": 1.0084, - "step": 4115 - }, - { - "epoch": 0.4949197378704984, - "grad_norm": 1.957058538981397, - "learning_rate": 2.1299965562489385e-06, - "loss": 0.9228, - "step": 4116 - }, - { - "epoch": 0.4950399807611375, - "grad_norm": 1.3155551955309224, - "learning_rate": 2.129219221993158e-06, - "loss": 1.0221, - "step": 4117 - }, - { - "epoch": 0.4951602236517766, - "grad_norm": 0.8278402882711294, - "learning_rate": 2.128441868134924e-06, - "loss": 0.8762, - "step": 4118 - }, - { - "epoch": 0.4952804665424157, - "grad_norm": 2.1915473552568403, - "learning_rate": 2.1276644947921606e-06, - "loss": 1.0652, - "step": 4119 - }, - { - "epoch": 0.49540070943305475, - "grad_norm": 1.7713605507303118, - "learning_rate": 2.126887102082795e-06, - "loss": 1.0526, - "step": 4120 - }, - { - "epoch": 0.49552095232369386, - "grad_norm": 2.0284571300750347, - "learning_rate": 2.126109690124757e-06, - "loss": 0.9286, - "step": 4121 - }, - { - "epoch": 0.495641195214333, - "grad_norm": 2.0490880752184473, - "learning_rate": 2.1253322590359786e-06, - "loss": 0.9437, - "step": 4122 - }, - { - "epoch": 0.49576143810497203, - "grad_norm": 2.0861698549851777, - "learning_rate": 2.124554808934397e-06, - "loss": 0.97, - "step": 4123 - }, - { - "epoch": 0.49588168099561114, - "grad_norm": 1.7445206009317273, - "learning_rate": 2.1237773399379496e-06, - "loss": 0.9652, - "step": 4124 - }, - { - "epoch": 0.49600192388625025, - "grad_norm": 1.9949327291175067, - "learning_rate": 2.122999852164578e-06, - "loss": 1.0964, - "step": 4125 - }, - { - "epoch": 0.4961221667768893, - "grad_norm": 2.1346368497798944, - "learning_rate": 2.122222345732227e-06, - "loss": 0.804, - "step": 4126 - }, - { - "epoch": 0.4962424096675284, - "grad_norm": 1.7183821544624538, - "learning_rate": 2.121444820758843e-06, - "loss": 1.0595, - "step": 4127 - }, - { - "epoch": 0.49636265255816747, - "grad_norm": 1.8314871885042694, - "learning_rate": 2.120667277362376e-06, - "loss": 1.0057, - "step": 4128 - }, - { - "epoch": 0.4964828954488066, - "grad_norm": 2.1595702109311383, - "learning_rate": 2.1198897156607796e-06, - "loss": 1.0782, - "step": 4129 - }, - { - "epoch": 0.4966031383394457, - "grad_norm": 3.1240916648633648, - "learning_rate": 2.1191121357720085e-06, - "loss": 0.9742, - "step": 4130 - }, - { - "epoch": 0.49672338123008475, - "grad_norm": 1.6453259647756704, - "learning_rate": 2.1183345378140206e-06, - "loss": 0.9706, - "step": 4131 - }, - { - "epoch": 0.49684362412072386, - "grad_norm": 0.9719905183588681, - "learning_rate": 2.1175569219047783e-06, - "loss": 0.8928, - "step": 4132 - }, - { - "epoch": 0.49696386701136297, - "grad_norm": 1.466289732281704, - "learning_rate": 2.1167792881622437e-06, - "loss": 0.958, - "step": 4133 - }, - { - "epoch": 0.497084109902002, - "grad_norm": 1.8622984143428036, - "learning_rate": 2.116001636704384e-06, - "loss": 1.0363, - "step": 4134 - }, - { - "epoch": 0.49720435279264114, - "grad_norm": 2.7032722036158403, - "learning_rate": 2.1152239676491685e-06, - "loss": 1.0354, - "step": 4135 - }, - { - "epoch": 0.49732459568328025, - "grad_norm": 2.27165692527662, - "learning_rate": 2.114446281114569e-06, - "loss": 0.9703, - "step": 4136 - }, - { - "epoch": 0.4974448385739193, - "grad_norm": 1.6788726959496727, - "learning_rate": 2.1136685772185587e-06, - "loss": 0.9847, - "step": 4137 - }, - { - "epoch": 0.4975650814645584, - "grad_norm": 1.6093521857264919, - "learning_rate": 2.1128908560791163e-06, - "loss": 1.0115, - "step": 4138 - }, - { - "epoch": 0.4976853243551975, - "grad_norm": 1.658811855125884, - "learning_rate": 2.1121131178142203e-06, - "loss": 1.0136, - "step": 4139 - }, - { - "epoch": 0.4978055672458366, - "grad_norm": 1.7592543728334855, - "learning_rate": 2.1113353625418544e-06, - "loss": 1.0519, - "step": 4140 - }, - { - "epoch": 0.4979258101364757, - "grad_norm": 2.787614633572294, - "learning_rate": 2.1105575903800017e-06, - "loss": 1.0221, - "step": 4141 - }, - { - "epoch": 0.4980460530271148, - "grad_norm": 1.8285073426630947, - "learning_rate": 2.1097798014466502e-06, - "loss": 1.0781, - "step": 4142 - }, - { - "epoch": 0.49816629591775385, - "grad_norm": 6.124553119160926, - "learning_rate": 2.109001995859791e-06, - "loss": 0.8145, - "step": 4143 - }, - { - "epoch": 0.49828653880839296, - "grad_norm": 0.7622805748697529, - "learning_rate": 2.108224173737415e-06, - "loss": 0.8634, - "step": 4144 - }, - { - "epoch": 0.498406781699032, - "grad_norm": 1.9971518502226866, - "learning_rate": 2.1074463351975183e-06, - "loss": 0.9896, - "step": 4145 - }, - { - "epoch": 0.49852702458967113, - "grad_norm": 1.9551193431125855, - "learning_rate": 2.106668480358098e-06, - "loss": 0.9431, - "step": 4146 - }, - { - "epoch": 0.49864726748031024, - "grad_norm": 1.8310486328024895, - "learning_rate": 2.105890609337154e-06, - "loss": 0.9401, - "step": 4147 - }, - { - "epoch": 0.4987675103709493, - "grad_norm": 0.6884800038353155, - "learning_rate": 2.1051127222526883e-06, - "loss": 0.8778, - "step": 4148 - }, - { - "epoch": 0.4988877532615884, - "grad_norm": 12.740449207956045, - "learning_rate": 2.1043348192227067e-06, - "loss": 1.0314, - "step": 4149 - }, - { - "epoch": 0.4990079961522275, - "grad_norm": 1.62797794373495, - "learning_rate": 2.1035569003652156e-06, - "loss": 0.8561, - "step": 4150 - }, - { - "epoch": 0.4991282390428666, - "grad_norm": 3.439863883363959, - "learning_rate": 2.1027789657982255e-06, - "loss": 1.0506, - "step": 4151 - }, - { - "epoch": 0.4992484819335057, - "grad_norm": 1.7353650952843538, - "learning_rate": 2.1020010156397482e-06, - "loss": 1.0003, - "step": 4152 - }, - { - "epoch": 0.4993687248241448, - "grad_norm": 1.8092193510214492, - "learning_rate": 2.101223050007797e-06, - "loss": 1.0092, - "step": 4153 - }, - { - "epoch": 0.49948896771478385, - "grad_norm": 4.121630361612811, - "learning_rate": 2.1004450690203904e-06, - "loss": 0.8141, - "step": 4154 - }, - { - "epoch": 0.49960921060542296, - "grad_norm": 2.5350409483000673, - "learning_rate": 2.099667072795546e-06, - "loss": 0.9459, - "step": 4155 - }, - { - "epoch": 0.49972945349606207, - "grad_norm": 2.162401463904187, - "learning_rate": 2.0988890614512864e-06, - "loss": 1.0235, - "step": 4156 - }, - { - "epoch": 0.4998496963867011, - "grad_norm": 1.6661914942801659, - "learning_rate": 2.098111035105635e-06, - "loss": 1.0658, - "step": 4157 - }, - { - "epoch": 0.49996993927734024, - "grad_norm": 1.671020289336143, - "learning_rate": 2.0973329938766176e-06, - "loss": 0.9638, - "step": 4158 - }, - { - "epoch": 0.5000901821679793, - "grad_norm": 1.903278223142886, - "learning_rate": 2.0965549378822618e-06, - "loss": 1.0206, - "step": 4159 - }, - { - "epoch": 0.5002104250586185, - "grad_norm": 1.9821198031288847, - "learning_rate": 2.095776867240599e-06, - "loss": 1.0654, - "step": 4160 - }, - { - "epoch": 0.5003306679492575, - "grad_norm": 1.8010874589254366, - "learning_rate": 2.094998782069661e-06, - "loss": 1.0514, - "step": 4161 - }, - { - "epoch": 0.5004509108398966, - "grad_norm": 1.6024206328116377, - "learning_rate": 2.0942206824874845e-06, - "loss": 0.9728, - "step": 4162 - }, - { - "epoch": 0.5005711537305357, - "grad_norm": 1.8744371205078587, - "learning_rate": 2.093442568612105e-06, - "loss": 1.0231, - "step": 4163 - }, - { - "epoch": 0.5006913966211748, - "grad_norm": 1.6487237040032918, - "learning_rate": 2.0926644405615613e-06, - "loss": 1.0769, - "step": 4164 - }, - { - "epoch": 0.5008116395118138, - "grad_norm": 1.8616778434456616, - "learning_rate": 2.091886298453897e-06, - "loss": 1.0455, - "step": 4165 - }, - { - "epoch": 0.500931882402453, - "grad_norm": 2.535887249855942, - "learning_rate": 2.091108142407153e-06, - "loss": 0.963, - "step": 4166 - }, - { - "epoch": 0.5010521252930921, - "grad_norm": 0.8939821648664157, - "learning_rate": 2.090329972539377e-06, - "loss": 0.9221, - "step": 4167 - }, - { - "epoch": 0.5011723681837311, - "grad_norm": 1.8496703447002985, - "learning_rate": 2.089551788968616e-06, - "loss": 0.9139, - "step": 4168 - }, - { - "epoch": 0.5012926110743702, - "grad_norm": 0.8862179589767905, - "learning_rate": 2.08877359181292e-06, - "loss": 0.8765, - "step": 4169 - }, - { - "epoch": 0.5014128539650093, - "grad_norm": 2.55916396914056, - "learning_rate": 2.0879953811903396e-06, - "loss": 1.0841, - "step": 4170 - }, - { - "epoch": 0.5015330968556484, - "grad_norm": 1.6943182776682335, - "learning_rate": 2.08721715721893e-06, - "loss": 1.0142, - "step": 4171 - }, - { - "epoch": 0.5016533397462875, - "grad_norm": 1.8068107236225146, - "learning_rate": 2.0864389200167477e-06, - "loss": 0.9942, - "step": 4172 - }, - { - "epoch": 0.5017735826369266, - "grad_norm": 3.100833836054554, - "learning_rate": 2.0856606697018504e-06, - "loss": 1.0186, - "step": 4173 - }, - { - "epoch": 0.5018938255275657, - "grad_norm": 1.9476628333637296, - "learning_rate": 2.084882406392297e-06, - "loss": 0.9548, - "step": 4174 - }, - { - "epoch": 0.5020140684182047, - "grad_norm": 2.1641222122712933, - "learning_rate": 2.0841041302061496e-06, - "loss": 0.9305, - "step": 4175 - }, - { - "epoch": 0.5021343113088439, - "grad_norm": 1.8557264932004172, - "learning_rate": 2.083325841261473e-06, - "loss": 0.9772, - "step": 4176 - }, - { - "epoch": 0.502254554199483, - "grad_norm": 1.8489896386590186, - "learning_rate": 2.0825475396763322e-06, - "loss": 0.903, - "step": 4177 - }, - { - "epoch": 0.502374797090122, - "grad_norm": 1.440367782038386, - "learning_rate": 2.081769225568796e-06, - "loss": 0.8803, - "step": 4178 - }, - { - "epoch": 0.5024950399807612, - "grad_norm": 1.3485064050213276, - "learning_rate": 2.0809908990569327e-06, - "loss": 0.9882, - "step": 4179 - }, - { - "epoch": 0.5026152828714002, - "grad_norm": 1.820469206912339, - "learning_rate": 2.0802125602588146e-06, - "loss": 1.019, - "step": 4180 - }, - { - "epoch": 0.5027355257620393, - "grad_norm": 1.7271659797554941, - "learning_rate": 2.0794342092925146e-06, - "loss": 0.9006, - "step": 4181 - }, - { - "epoch": 0.5028557686526784, - "grad_norm": 1.938303319458667, - "learning_rate": 2.078655846276108e-06, - "loss": 0.9134, - "step": 4182 - }, - { - "epoch": 0.5029760115433175, - "grad_norm": 1.8884812488629927, - "learning_rate": 2.0778774713276727e-06, - "loss": 0.9065, - "step": 4183 - }, - { - "epoch": 0.5030962544339566, - "grad_norm": 6.1554044346137085, - "learning_rate": 2.077099084565287e-06, - "loss": 0.9002, - "step": 4184 - }, - { - "epoch": 0.5032164973245957, - "grad_norm": 2.346157007828611, - "learning_rate": 2.0763206861070313e-06, - "loss": 0.8793, - "step": 4185 - }, - { - "epoch": 0.5033367402152348, - "grad_norm": 2.077350558197698, - "learning_rate": 2.0755422760709876e-06, - "loss": 0.9812, - "step": 4186 - }, - { - "epoch": 0.5034569831058738, - "grad_norm": 2.4559201395178656, - "learning_rate": 2.0747638545752417e-06, - "loss": 0.9971, - "step": 4187 - }, - { - "epoch": 0.503577225996513, - "grad_norm": 2.1438932320926787, - "learning_rate": 2.073985421737878e-06, - "loss": 1.0606, - "step": 4188 - }, - { - "epoch": 0.5036974688871521, - "grad_norm": 2.113785604203011, - "learning_rate": 2.0732069776769844e-06, - "loss": 0.9746, - "step": 4189 - }, - { - "epoch": 0.5038177117777911, - "grad_norm": 2.198761643492213, - "learning_rate": 2.072428522510651e-06, - "loss": 0.962, - "step": 4190 - }, - { - "epoch": 0.5039379546684303, - "grad_norm": 2.887102279685335, - "learning_rate": 2.071650056356968e-06, - "loss": 0.9903, - "step": 4191 - }, - { - "epoch": 0.5040581975590693, - "grad_norm": 1.9252979840593756, - "learning_rate": 2.070871579334028e-06, - "loss": 1.0251, - "step": 4192 - }, - { - "epoch": 0.5041784404497084, - "grad_norm": 2.232972244944369, - "learning_rate": 2.0700930915599264e-06, - "loss": 0.954, - "step": 4193 - }, - { - "epoch": 0.5042986833403476, - "grad_norm": 2.302318842882274, - "learning_rate": 2.0693145931527583e-06, - "loss": 1.0125, - "step": 4194 - }, - { - "epoch": 0.5044189262309866, - "grad_norm": 1.5461904378904945, - "learning_rate": 2.068536084230622e-06, - "loss": 1.0122, - "step": 4195 - }, - { - "epoch": 0.5045391691216257, - "grad_norm": 1.9586525176718559, - "learning_rate": 2.067757564911616e-06, - "loss": 1.112, - "step": 4196 - }, - { - "epoch": 0.5046594120122648, - "grad_norm": 2.2899316172821873, - "learning_rate": 2.0669790353138407e-06, - "loss": 1.1523, - "step": 4197 - }, - { - "epoch": 0.5047796549029039, - "grad_norm": 2.07293972085628, - "learning_rate": 2.0662004955553995e-06, - "loss": 0.9595, - "step": 4198 - }, - { - "epoch": 0.5048998977935429, - "grad_norm": 1.9257709123747955, - "learning_rate": 2.065421945754395e-06, - "loss": 0.9923, - "step": 4199 - }, - { - "epoch": 0.505020140684182, - "grad_norm": 1.796246891536691, - "learning_rate": 2.0646433860289344e-06, - "loss": 1.009, - "step": 4200 - }, - { - "epoch": 0.5051403835748212, - "grad_norm": 1.8097494894091015, - "learning_rate": 2.0638648164971233e-06, - "loss": 1.0497, - "step": 4201 - }, - { - "epoch": 0.5052606264654602, - "grad_norm": 3.0647798732350613, - "learning_rate": 2.06308623727707e-06, - "loss": 1.1151, - "step": 4202 - }, - { - "epoch": 0.5053808693560993, - "grad_norm": 2.9611758334983302, - "learning_rate": 2.0623076484868846e-06, - "loss": 0.9853, - "step": 4203 - }, - { - "epoch": 0.5055011122467384, - "grad_norm": 0.9478093425372388, - "learning_rate": 2.061529050244679e-06, - "loss": 0.917, - "step": 4204 - }, - { - "epoch": 0.5056213551373775, - "grad_norm": 2.208035454962971, - "learning_rate": 2.060750442668565e-06, - "loss": 0.9785, - "step": 4205 - }, - { - "epoch": 0.5057415980280165, - "grad_norm": 2.1495521566909503, - "learning_rate": 2.059971825876657e-06, - "loss": 0.8704, - "step": 4206 - }, - { - "epoch": 0.5058618409186557, - "grad_norm": 1.6719964954759292, - "learning_rate": 2.0591931999870713e-06, - "loss": 0.9929, - "step": 4207 - }, - { - "epoch": 0.5059820838092948, - "grad_norm": 1.044029525057989, - "learning_rate": 2.0584145651179234e-06, - "loss": 0.8688, - "step": 4208 - }, - { - "epoch": 0.5061023266999338, - "grad_norm": 2.7626468981872656, - "learning_rate": 2.0576359213873327e-06, - "loss": 1.0116, - "step": 4209 - }, - { - "epoch": 0.506222569590573, - "grad_norm": 2.1111842555486398, - "learning_rate": 2.056857268913419e-06, - "loss": 0.9282, - "step": 4210 - }, - { - "epoch": 0.506342812481212, - "grad_norm": 2.003050504301982, - "learning_rate": 2.056078607814303e-06, - "loss": 1.0758, - "step": 4211 - }, - { - "epoch": 0.5064630553718511, - "grad_norm": 2.064227334543843, - "learning_rate": 2.055299938208106e-06, - "loss": 1.021, - "step": 4212 - }, - { - "epoch": 0.5065832982624903, - "grad_norm": 1.6721849956290522, - "learning_rate": 2.0545212602129526e-06, - "loss": 1.0898, - "step": 4213 - }, - { - "epoch": 0.5067035411531293, - "grad_norm": 16.831602907783694, - "learning_rate": 2.0537425739469673e-06, - "loss": 0.9026, - "step": 4214 - }, - { - "epoch": 0.5068237840437684, - "grad_norm": 0.9138369811859134, - "learning_rate": 2.052963879528276e-06, - "loss": 0.8741, - "step": 4215 - }, - { - "epoch": 0.5069440269344075, - "grad_norm": 2.2400357757844924, - "learning_rate": 2.052185177075007e-06, - "loss": 0.9928, - "step": 4216 - }, - { - "epoch": 0.5070642698250466, - "grad_norm": 1.6192273529716488, - "learning_rate": 2.051406466705288e-06, - "loss": 1.0583, - "step": 4217 - }, - { - "epoch": 0.5071845127156857, - "grad_norm": 1.9475126884546592, - "learning_rate": 2.0506277485372486e-06, - "loss": 1.0381, - "step": 4218 - }, - { - "epoch": 0.5073047556063248, - "grad_norm": 2.292400647156011, - "learning_rate": 2.04984902268902e-06, - "loss": 0.9019, - "step": 4219 - }, - { - "epoch": 0.5074249984969639, - "grad_norm": 2.052536683585309, - "learning_rate": 2.0490702892787345e-06, - "loss": 0.9757, - "step": 4220 - }, - { - "epoch": 0.5075452413876029, - "grad_norm": 1.87871920156203, - "learning_rate": 2.0482915484245246e-06, - "loss": 0.8539, - "step": 4221 - }, - { - "epoch": 0.5076654842782421, - "grad_norm": 2.9878746404331222, - "learning_rate": 2.047512800244526e-06, - "loss": 1.0605, - "step": 4222 - }, - { - "epoch": 0.5077857271688812, - "grad_norm": 1.7101702777118193, - "learning_rate": 2.046734044856873e-06, - "loss": 1.0172, - "step": 4223 - }, - { - "epoch": 0.5079059700595202, - "grad_norm": 1.7926366161813023, - "learning_rate": 2.045955282379702e-06, - "loss": 1.0336, - "step": 4224 - }, - { - "epoch": 0.5080262129501594, - "grad_norm": 3.680196998133165, - "learning_rate": 2.045176512931152e-06, - "loss": 0.9841, - "step": 4225 - }, - { - "epoch": 0.5081464558407984, - "grad_norm": 1.6852400446304743, - "learning_rate": 2.0443977366293604e-06, - "loss": 0.9894, - "step": 4226 - }, - { - "epoch": 0.5082666987314375, - "grad_norm": 1.4561068386713971, - "learning_rate": 2.043618953592468e-06, - "loss": 1.0068, - "step": 4227 - }, - { - "epoch": 0.5083869416220766, - "grad_norm": 1.623506299673064, - "learning_rate": 2.0428401639386144e-06, - "loss": 1.0411, - "step": 4228 - }, - { - "epoch": 0.5085071845127157, - "grad_norm": 0.9076341056519329, - "learning_rate": 2.042061367785943e-06, - "loss": 0.8827, - "step": 4229 - }, - { - "epoch": 0.5086274274033548, - "grad_norm": 2.5756693849578465, - "learning_rate": 2.041282565252594e-06, - "loss": 0.9769, - "step": 4230 - }, - { - "epoch": 0.5087476702939938, - "grad_norm": 1.8841017112125262, - "learning_rate": 2.040503756456714e-06, - "loss": 0.9926, - "step": 4231 - }, - { - "epoch": 0.508867913184633, - "grad_norm": 2.726323682934071, - "learning_rate": 2.0397249415164456e-06, - "loss": 1.0167, - "step": 4232 - }, - { - "epoch": 0.508988156075272, - "grad_norm": 1.4407562196098556, - "learning_rate": 2.0389461205499354e-06, - "loss": 1.0349, - "step": 4233 - }, - { - "epoch": 0.5091083989659111, - "grad_norm": 2.029422935998191, - "learning_rate": 2.03816729367533e-06, - "loss": 0.9593, - "step": 4234 - }, - { - "epoch": 0.5092286418565503, - "grad_norm": 2.1732465406456356, - "learning_rate": 2.0373884610107765e-06, - "loss": 0.9344, - "step": 4235 - }, - { - "epoch": 0.5093488847471893, - "grad_norm": 2.337813118754931, - "learning_rate": 2.0366096226744225e-06, - "loss": 0.9191, - "step": 4236 - }, - { - "epoch": 0.5094691276378284, - "grad_norm": 1.669888465948698, - "learning_rate": 2.035830778784418e-06, - "loss": 1.0013, - "step": 4237 - }, - { - "epoch": 0.5095893705284675, - "grad_norm": 3.6121972349860574, - "learning_rate": 2.0350519294589134e-06, - "loss": 1.0336, - "step": 4238 - }, - { - "epoch": 0.5097096134191066, - "grad_norm": 1.740341234618958, - "learning_rate": 2.0342730748160588e-06, - "loss": 1.0593, - "step": 4239 - }, - { - "epoch": 0.5098298563097456, - "grad_norm": 2.0573641092285766, - "learning_rate": 2.033494214974006e-06, - "loss": 0.931, - "step": 4240 - }, - { - "epoch": 0.5099500992003848, - "grad_norm": 1.7666648570837193, - "learning_rate": 2.0327153500509067e-06, - "loss": 1.0631, - "step": 4241 - }, - { - "epoch": 0.5100703420910239, - "grad_norm": 2.1395049123188348, - "learning_rate": 2.031936480164916e-06, - "loss": 1.0809, - "step": 4242 - }, - { - "epoch": 0.5101905849816629, - "grad_norm": 2.2595665266183493, - "learning_rate": 2.0311576054341857e-06, - "loss": 1.0333, - "step": 4243 - }, - { - "epoch": 0.5103108278723021, - "grad_norm": 1.8354019357852926, - "learning_rate": 2.0303787259768715e-06, - "loss": 0.8625, - "step": 4244 - }, - { - "epoch": 0.5104310707629411, - "grad_norm": 2.3526963450173315, - "learning_rate": 2.0295998419111294e-06, - "loss": 0.915, - "step": 4245 - }, - { - "epoch": 0.5105513136535802, - "grad_norm": 2.3775857602284742, - "learning_rate": 2.028820953355115e-06, - "loss": 0.9626, - "step": 4246 - }, - { - "epoch": 0.5106715565442194, - "grad_norm": 1.789613197560049, - "learning_rate": 2.0280420604269834e-06, - "loss": 1.0153, - "step": 4247 - }, - { - "epoch": 0.5107917994348584, - "grad_norm": 0.7946208036923108, - "learning_rate": 2.027263163244895e-06, - "loss": 0.8754, - "step": 4248 - }, - { - "epoch": 0.5109120423254975, - "grad_norm": 1.5818940279753313, - "learning_rate": 2.026484261927005e-06, - "loss": 0.9694, - "step": 4249 - }, - { - "epoch": 0.5110322852161366, - "grad_norm": 2.284110796844843, - "learning_rate": 2.025705356591475e-06, - "loss": 0.9604, - "step": 4250 - }, - { - "epoch": 0.5111525281067757, - "grad_norm": 0.8675770958383803, - "learning_rate": 2.024926447356462e-06, - "loss": 0.853, - "step": 4251 - }, - { - "epoch": 0.5112727709974147, - "grad_norm": 1.9157560369034803, - "learning_rate": 2.024147534340127e-06, - "loss": 1.018, - "step": 4252 - }, - { - "epoch": 0.5113930138880539, - "grad_norm": 1.744083211366059, - "learning_rate": 2.02336861766063e-06, - "loss": 1.024, - "step": 4253 - }, - { - "epoch": 0.511513256778693, - "grad_norm": 3.833390332226681, - "learning_rate": 2.0225896974361327e-06, - "loss": 1.009, - "step": 4254 - }, - { - "epoch": 0.511633499669332, - "grad_norm": 0.9717450757285878, - "learning_rate": 2.0218107737847962e-06, - "loss": 0.8866, - "step": 4255 - }, - { - "epoch": 0.5117537425599712, - "grad_norm": 1.9859634409725155, - "learning_rate": 2.0210318468247826e-06, - "loss": 0.9916, - "step": 4256 - }, - { - "epoch": 0.5118739854506102, - "grad_norm": 1.6740297354104712, - "learning_rate": 2.020252916674255e-06, - "loss": 1.0461, - "step": 4257 - }, - { - "epoch": 0.5119942283412493, - "grad_norm": 1.7097501074697072, - "learning_rate": 2.019473983451375e-06, - "loss": 1.0379, - "step": 4258 - }, - { - "epoch": 0.5121144712318885, - "grad_norm": 2.0931600622075006, - "learning_rate": 2.0186950472743076e-06, - "loss": 0.9395, - "step": 4259 - }, - { - "epoch": 0.5122347141225275, - "grad_norm": 3.7917975238953936, - "learning_rate": 2.0179161082612162e-06, - "loss": 0.9711, - "step": 4260 - }, - { - "epoch": 0.5123549570131666, - "grad_norm": 4.185746466309756, - "learning_rate": 2.017137166530266e-06, - "loss": 0.9497, - "step": 4261 - }, - { - "epoch": 0.5124751999038056, - "grad_norm": 1.7900182844538766, - "learning_rate": 2.0163582221996213e-06, - "loss": 1.0301, - "step": 4262 - }, - { - "epoch": 0.5125954427944448, - "grad_norm": 2.8679173343614623, - "learning_rate": 2.015579275387446e-06, - "loss": 0.9116, - "step": 4263 - }, - { - "epoch": 0.5127156856850839, - "grad_norm": 1.7724092838547842, - "learning_rate": 2.0148003262119085e-06, - "loss": 0.9201, - "step": 4264 - }, - { - "epoch": 0.5128359285757229, - "grad_norm": 1.7534658987940548, - "learning_rate": 2.0140213747911728e-06, - "loss": 0.9915, - "step": 4265 - }, - { - "epoch": 0.5129561714663621, - "grad_norm": 1.9275672167624478, - "learning_rate": 2.013242421243406e-06, - "loss": 1.0304, - "step": 4266 - }, - { - "epoch": 0.5130764143570011, - "grad_norm": 1.6831778523744896, - "learning_rate": 2.012463465686774e-06, - "loss": 1.0163, - "step": 4267 - }, - { - "epoch": 0.5131966572476402, - "grad_norm": 0.9492959513097405, - "learning_rate": 2.0116845082394446e-06, - "loss": 0.8321, - "step": 4268 - }, - { - "epoch": 0.5133169001382794, - "grad_norm": 1.746175333118381, - "learning_rate": 2.0109055490195836e-06, - "loss": 1.0199, - "step": 4269 - }, - { - "epoch": 0.5134371430289184, - "grad_norm": 1.9765614747056073, - "learning_rate": 2.0101265881453605e-06, - "loss": 0.8669, - "step": 4270 - }, - { - "epoch": 0.5135573859195575, - "grad_norm": 1.940040583923413, - "learning_rate": 2.009347625734941e-06, - "loss": 1.0205, - "step": 4271 - }, - { - "epoch": 0.5136776288101966, - "grad_norm": 2.1319189190932875, - "learning_rate": 2.0085686619064954e-06, - "loss": 0.9813, - "step": 4272 - }, - { - "epoch": 0.5137978717008357, - "grad_norm": 2.618959197289849, - "learning_rate": 2.00778969677819e-06, - "loss": 1.0644, - "step": 4273 - }, - { - "epoch": 0.5139181145914747, - "grad_norm": 1.9044486117431754, - "learning_rate": 2.0070107304681934e-06, - "loss": 0.881, - "step": 4274 - }, - { - "epoch": 0.5140383574821139, - "grad_norm": 1.7510671767676707, - "learning_rate": 2.006231763094675e-06, - "loss": 1.0113, - "step": 4275 - }, - { - "epoch": 0.514158600372753, - "grad_norm": 1.8510707411435283, - "learning_rate": 2.0054527947758027e-06, - "loss": 1.0974, - "step": 4276 - }, - { - "epoch": 0.514278843263392, - "grad_norm": 0.7980340494478064, - "learning_rate": 2.004673825629746e-06, - "loss": 0.8233, - "step": 4277 - }, - { - "epoch": 0.5143990861540312, - "grad_norm": 1.5788697855276412, - "learning_rate": 2.0038948557746744e-06, - "loss": 0.9487, - "step": 4278 - }, - { - "epoch": 0.5145193290446702, - "grad_norm": 1.6848663689121783, - "learning_rate": 2.0031158853287558e-06, - "loss": 0.9812, - "step": 4279 - }, - { - "epoch": 0.5146395719353093, - "grad_norm": 2.501694581775933, - "learning_rate": 2.0023369144101593e-06, - "loss": 0.9507, - "step": 4280 - }, - { - "epoch": 0.5147598148259485, - "grad_norm": 2.6560137475358836, - "learning_rate": 2.0015579431370555e-06, - "loss": 0.9885, - "step": 4281 - }, - { - "epoch": 0.5148800577165875, - "grad_norm": 2.9215449998514873, - "learning_rate": 2.000778971627612e-06, - "loss": 0.929, - "step": 4282 - }, - { - "epoch": 0.5150003006072266, - "grad_norm": 1.7491878259744176, - "learning_rate": 2e-06, - "loss": 1.1281, - "step": 4283 - }, - { - "epoch": 0.5151205434978657, - "grad_norm": 4.929220089766674, - "learning_rate": 1.9992210283723878e-06, - "loss": 1.0938, - "step": 4284 - }, - { - "epoch": 0.5152407863885048, - "grad_norm": 2.803941547791283, - "learning_rate": 1.9984420568629448e-06, - "loss": 1.0144, - "step": 4285 - }, - { - "epoch": 0.5153610292791438, - "grad_norm": 1.908036504868107, - "learning_rate": 1.9976630855898405e-06, - "loss": 1.0137, - "step": 4286 - }, - { - "epoch": 0.515481272169783, - "grad_norm": 1.9344557565030527, - "learning_rate": 1.9968841146712445e-06, - "loss": 0.9714, - "step": 4287 - }, - { - "epoch": 0.5156015150604221, - "grad_norm": 1.4507964972052767, - "learning_rate": 1.996105144225326e-06, - "loss": 0.9412, - "step": 4288 - }, - { - "epoch": 0.5157217579510611, - "grad_norm": 1.812685529487022, - "learning_rate": 1.995326174370254e-06, - "loss": 1.0232, - "step": 4289 - }, - { - "epoch": 0.5158420008417003, - "grad_norm": 3.9567834865054254, - "learning_rate": 1.994547205224197e-06, - "loss": 0.9537, - "step": 4290 - }, - { - "epoch": 0.5159622437323393, - "grad_norm": 1.8993885185661883, - "learning_rate": 1.993768236905325e-06, - "loss": 0.9057, - "step": 4291 - }, - { - "epoch": 0.5160824866229784, - "grad_norm": 1.9260615208704353, - "learning_rate": 1.992989269531807e-06, - "loss": 0.888, - "step": 4292 - }, - { - "epoch": 0.5162027295136175, - "grad_norm": 3.609883590469804, - "learning_rate": 1.99221030322181e-06, - "loss": 0.9086, - "step": 4293 - }, - { - "epoch": 0.5163229724042566, - "grad_norm": 1.5031169454949627, - "learning_rate": 1.991431338093505e-06, - "loss": 1.0356, - "step": 4294 - }, - { - "epoch": 0.5164432152948957, - "grad_norm": 1.6951878233740068, - "learning_rate": 1.9906523742650587e-06, - "loss": 1.0172, - "step": 4295 - }, - { - "epoch": 0.5165634581855347, - "grad_norm": 1.8193364616880618, - "learning_rate": 1.9898734118546397e-06, - "loss": 0.9892, - "step": 4296 - }, - { - "epoch": 0.5166837010761739, - "grad_norm": 1.439624460445302, - "learning_rate": 1.989094450980416e-06, - "loss": 1.0342, - "step": 4297 - }, - { - "epoch": 0.516803943966813, - "grad_norm": 2.346364000464242, - "learning_rate": 1.9883154917605556e-06, - "loss": 0.9964, - "step": 4298 - }, - { - "epoch": 0.516924186857452, - "grad_norm": 2.2235258040649706, - "learning_rate": 1.9875365343132262e-06, - "loss": 1.0553, - "step": 4299 - }, - { - "epoch": 0.5170444297480912, - "grad_norm": 2.1547575498706033, - "learning_rate": 1.9867575787565946e-06, - "loss": 1.071, - "step": 4300 - }, - { - "epoch": 0.5171646726387302, - "grad_norm": 1.8846162252559016, - "learning_rate": 1.9859786252088275e-06, - "loss": 1.0931, - "step": 4301 - }, - { - "epoch": 0.5172849155293693, - "grad_norm": 3.044741152330093, - "learning_rate": 1.9851996737880914e-06, - "loss": 0.8971, - "step": 4302 - }, - { - "epoch": 0.5174051584200084, - "grad_norm": 2.3923170297588117, - "learning_rate": 1.9844207246125537e-06, - "loss": 0.9767, - "step": 4303 - }, - { - "epoch": 0.5175254013106475, - "grad_norm": 1.8578474678361292, - "learning_rate": 1.983641777800379e-06, - "loss": 0.9114, - "step": 4304 - }, - { - "epoch": 0.5176456442012866, - "grad_norm": 1.0244769576177681, - "learning_rate": 1.9828628334697343e-06, - "loss": 0.8583, - "step": 4305 - }, - { - "epoch": 0.5177658870919257, - "grad_norm": 0.8038837529807572, - "learning_rate": 1.982083891738784e-06, - "loss": 0.8257, - "step": 4306 - }, - { - "epoch": 0.5178861299825648, - "grad_norm": 1.4245017803659021, - "learning_rate": 1.9813049527256923e-06, - "loss": 1.0572, - "step": 4307 - }, - { - "epoch": 0.5180063728732038, - "grad_norm": 3.881686686399621, - "learning_rate": 1.9805260165486252e-06, - "loss": 1.0659, - "step": 4308 - }, - { - "epoch": 0.518126615763843, - "grad_norm": 1.980634272352948, - "learning_rate": 1.9797470833257457e-06, - "loss": 1.0967, - "step": 4309 - }, - { - "epoch": 0.5182468586544821, - "grad_norm": 3.219280014103169, - "learning_rate": 1.9789681531752177e-06, - "loss": 1.0069, - "step": 4310 - }, - { - "epoch": 0.5183671015451211, - "grad_norm": 1.4133130189647285, - "learning_rate": 1.978189226215204e-06, - "loss": 0.9546, - "step": 4311 - }, - { - "epoch": 0.5184873444357603, - "grad_norm": 2.051588615257075, - "learning_rate": 1.9774103025638675e-06, - "loss": 1.0014, - "step": 4312 - }, - { - "epoch": 0.5186075873263993, - "grad_norm": 1.4387998094727743, - "learning_rate": 1.9766313823393696e-06, - "loss": 0.9924, - "step": 4313 - }, - { - "epoch": 0.5187278302170384, - "grad_norm": 2.355762267289582, - "learning_rate": 1.975852465659873e-06, - "loss": 0.9191, - "step": 4314 - }, - { - "epoch": 0.5188480731076776, - "grad_norm": 2.4549342001742245, - "learning_rate": 1.9750735526435377e-06, - "loss": 0.9327, - "step": 4315 - }, - { - "epoch": 0.5189683159983166, - "grad_norm": 2.4054152787235004, - "learning_rate": 1.974294643408525e-06, - "loss": 1.0296, - "step": 4316 - }, - { - "epoch": 0.5190885588889557, - "grad_norm": 1.9517740689290344, - "learning_rate": 1.9735157380729947e-06, - "loss": 0.9002, - "step": 4317 - }, - { - "epoch": 0.5192088017795948, - "grad_norm": 1.9071648075479513, - "learning_rate": 1.9727368367551053e-06, - "loss": 1.0661, - "step": 4318 - }, - { - "epoch": 0.5193290446702339, - "grad_norm": 1.71845964233318, - "learning_rate": 1.9719579395730164e-06, - "loss": 0.9157, - "step": 4319 - }, - { - "epoch": 0.5194492875608729, - "grad_norm": 2.05256595519541, - "learning_rate": 1.9711790466448854e-06, - "loss": 1.158, - "step": 4320 - }, - { - "epoch": 0.5195695304515121, - "grad_norm": 2.0450226067596913, - "learning_rate": 1.9704001580888704e-06, - "loss": 0.9336, - "step": 4321 - }, - { - "epoch": 0.5196897733421512, - "grad_norm": 3.445604653633116, - "learning_rate": 1.9696212740231283e-06, - "loss": 1.1048, - "step": 4322 - }, - { - "epoch": 0.5198100162327902, - "grad_norm": 2.050726840796093, - "learning_rate": 1.9688423945658146e-06, - "loss": 1.0534, - "step": 4323 - }, - { - "epoch": 0.5199302591234293, - "grad_norm": 2.4507285883714425, - "learning_rate": 1.9680635198350845e-06, - "loss": 0.9459, - "step": 4324 - }, - { - "epoch": 0.5200505020140684, - "grad_norm": 1.8103110150529174, - "learning_rate": 1.967284649949093e-06, - "loss": 0.9538, - "step": 4325 - }, - { - "epoch": 0.5201707449047075, - "grad_norm": 1.8029242815398046, - "learning_rate": 1.966505785025994e-06, - "loss": 0.9541, - "step": 4326 - }, - { - "epoch": 0.5202909877953465, - "grad_norm": 1.9418255911472395, - "learning_rate": 1.965726925183941e-06, - "loss": 0.999, - "step": 4327 - }, - { - "epoch": 0.5204112306859857, - "grad_norm": 2.139434260517982, - "learning_rate": 1.964948070541087e-06, - "loss": 1.0735, - "step": 4328 - }, - { - "epoch": 0.5205314735766248, - "grad_norm": 2.8204284813193374, - "learning_rate": 1.9641692212155816e-06, - "loss": 0.9144, - "step": 4329 - }, - { - "epoch": 0.5206517164672638, - "grad_norm": 1.6845607591441067, - "learning_rate": 1.9633903773255777e-06, - "loss": 0.9636, - "step": 4330 - }, - { - "epoch": 0.520771959357903, - "grad_norm": 1.5715446382858878, - "learning_rate": 1.9626115389892237e-06, - "loss": 0.9823, - "step": 4331 - }, - { - "epoch": 0.520892202248542, - "grad_norm": 2.027739205131444, - "learning_rate": 1.96183270632467e-06, - "loss": 1.0819, - "step": 4332 - }, - { - "epoch": 0.5210124451391811, - "grad_norm": 1.5100322453422201, - "learning_rate": 1.9610538794500644e-06, - "loss": 1.0173, - "step": 4333 - }, - { - "epoch": 0.5211326880298203, - "grad_norm": 0.8091955479405604, - "learning_rate": 1.9602750584835542e-06, - "loss": 0.8639, - "step": 4334 - }, - { - "epoch": 0.5212529309204593, - "grad_norm": 2.0255744872699704, - "learning_rate": 1.959496243543286e-06, - "loss": 1.059, - "step": 4335 - }, - { - "epoch": 0.5213731738110984, - "grad_norm": 1.983104990254996, - "learning_rate": 1.9587174347474057e-06, - "loss": 1.0247, - "step": 4336 - }, - { - "epoch": 0.5214934167017375, - "grad_norm": 3.364731319861721, - "learning_rate": 1.9579386322140574e-06, - "loss": 1.039, - "step": 4337 - }, - { - "epoch": 0.5216136595923766, - "grad_norm": 1.7459839651938007, - "learning_rate": 1.9571598360613854e-06, - "loss": 1.0377, - "step": 4338 - }, - { - "epoch": 0.5217339024830157, - "grad_norm": 2.016033988681259, - "learning_rate": 1.956381046407532e-06, - "loss": 0.9306, - "step": 4339 - }, - { - "epoch": 0.5218541453736548, - "grad_norm": 1.9759010142888793, - "learning_rate": 1.9556022633706394e-06, - "loss": 1.0857, - "step": 4340 - }, - { - "epoch": 0.5219743882642939, - "grad_norm": 1.593324641891673, - "learning_rate": 1.954823487068848e-06, - "loss": 1.0253, - "step": 4341 - }, - { - "epoch": 0.5220946311549329, - "grad_norm": 1.5897302235553372, - "learning_rate": 1.9540447176202976e-06, - "loss": 1.0385, - "step": 4342 - }, - { - "epoch": 0.5222148740455721, - "grad_norm": 1.0554470576747372, - "learning_rate": 1.9532659551431272e-06, - "loss": 0.8842, - "step": 4343 - }, - { - "epoch": 0.5223351169362112, - "grad_norm": 1.50460220681439, - "learning_rate": 1.9524871997554744e-06, - "loss": 0.8959, - "step": 4344 - }, - { - "epoch": 0.5224553598268502, - "grad_norm": 2.0727660413550835, - "learning_rate": 1.951708451575475e-06, - "loss": 1.0262, - "step": 4345 - }, - { - "epoch": 0.5225756027174894, - "grad_norm": 1.6864365653065936, - "learning_rate": 1.9509297107212657e-06, - "loss": 1.0461, - "step": 4346 - }, - { - "epoch": 0.5226958456081284, - "grad_norm": 1.5229107068210919, - "learning_rate": 1.95015097731098e-06, - "loss": 1.0242, - "step": 4347 - }, - { - "epoch": 0.5228160884987675, - "grad_norm": 2.1843809507013345, - "learning_rate": 1.949372251462751e-06, - "loss": 1.0455, - "step": 4348 - }, - { - "epoch": 0.5229363313894067, - "grad_norm": 3.0186782164953674, - "learning_rate": 1.9485935332947124e-06, - "loss": 1.0543, - "step": 4349 - }, - { - "epoch": 0.5230565742800457, - "grad_norm": 2.3620934393906508, - "learning_rate": 1.947814822924993e-06, - "loss": 1.0619, - "step": 4350 - }, - { - "epoch": 0.5231768171706848, - "grad_norm": 2.1826133642653875, - "learning_rate": 1.9470361204717236e-06, - "loss": 1.0555, - "step": 4351 - }, - { - "epoch": 0.5232970600613239, - "grad_norm": 1.6980171953954897, - "learning_rate": 1.9462574260530326e-06, - "loss": 1.041, - "step": 4352 - }, - { - "epoch": 0.523417302951963, - "grad_norm": 1.733695782155415, - "learning_rate": 1.9454787397870472e-06, - "loss": 1.038, - "step": 4353 - }, - { - "epoch": 0.523537545842602, - "grad_norm": 1.7896510326412893, - "learning_rate": 1.944700061791894e-06, - "loss": 0.9447, - "step": 4354 - }, - { - "epoch": 0.5236577887332411, - "grad_norm": 4.788096394256122, - "learning_rate": 1.943921392185698e-06, - "loss": 0.8851, - "step": 4355 - }, - { - "epoch": 0.5237780316238803, - "grad_norm": 2.13415842591164, - "learning_rate": 1.9431427310865814e-06, - "loss": 1.0002, - "step": 4356 - }, - { - "epoch": 0.5238982745145193, - "grad_norm": 1.5538192701665212, - "learning_rate": 1.942364078612667e-06, - "loss": 1.0227, - "step": 4357 - }, - { - "epoch": 0.5240185174051584, - "grad_norm": 1.8730307728509836, - "learning_rate": 1.9415854348820765e-06, - "loss": 0.9809, - "step": 4358 - }, - { - "epoch": 0.5241387602957975, - "grad_norm": 7.729175029520478, - "learning_rate": 1.940806800012929e-06, - "loss": 0.8998, - "step": 4359 - }, - { - "epoch": 0.5242590031864366, - "grad_norm": 5.37034618377111, - "learning_rate": 1.9400281741233432e-06, - "loss": 0.8676, - "step": 4360 - }, - { - "epoch": 0.5243792460770756, - "grad_norm": 0.6963056693430707, - "learning_rate": 1.939249557331435e-06, - "loss": 0.7898, - "step": 4361 - }, - { - "epoch": 0.5244994889677148, - "grad_norm": 1.8007041931215377, - "learning_rate": 1.938470949755321e-06, - "loss": 0.9539, - "step": 4362 - }, - { - "epoch": 0.5246197318583539, - "grad_norm": 0.9205062856485487, - "learning_rate": 1.937692351513115e-06, - "loss": 0.8538, - "step": 4363 - }, - { - "epoch": 0.5247399747489929, - "grad_norm": 1.7494501885068492, - "learning_rate": 1.9369137627229297e-06, - "loss": 1.0328, - "step": 4364 - }, - { - "epoch": 0.5248602176396321, - "grad_norm": 1.939086141757253, - "learning_rate": 1.936135183502877e-06, - "loss": 1.1073, - "step": 4365 - }, - { - "epoch": 0.5249804605302711, - "grad_norm": 1.972327780995503, - "learning_rate": 1.935356613971066e-06, - "loss": 1.0372, - "step": 4366 - }, - { - "epoch": 0.5251007034209102, - "grad_norm": 1.5866794447958668, - "learning_rate": 1.9345780542456047e-06, - "loss": 0.9954, - "step": 4367 - }, - { - "epoch": 0.5252209463115494, - "grad_norm": 1.9913558121493644, - "learning_rate": 1.9337995044446007e-06, - "loss": 0.9508, - "step": 4368 - }, - { - "epoch": 0.5253411892021884, - "grad_norm": 1.803777027416701, - "learning_rate": 1.9330209646861596e-06, - "loss": 1.0281, - "step": 4369 - }, - { - "epoch": 0.5254614320928275, - "grad_norm": 1.5152874600227098, - "learning_rate": 1.9322424350883843e-06, - "loss": 1.0055, - "step": 4370 - }, - { - "epoch": 0.5255816749834666, - "grad_norm": 1.9935482389432297, - "learning_rate": 1.931463915769379e-06, - "loss": 1.0092, - "step": 4371 - }, - { - "epoch": 0.5257019178741057, - "grad_norm": 2.0786937422947545, - "learning_rate": 1.930685406847242e-06, - "loss": 0.9672, - "step": 4372 - }, - { - "epoch": 0.5258221607647448, - "grad_norm": 1.523185983093666, - "learning_rate": 1.9299069084400734e-06, - "loss": 1.0513, - "step": 4373 - }, - { - "epoch": 0.5259424036553839, - "grad_norm": 2.104524135125836, - "learning_rate": 1.9291284206659717e-06, - "loss": 0.918, - "step": 4374 - }, - { - "epoch": 0.526062646546023, - "grad_norm": 1.8336828144353887, - "learning_rate": 1.928349943643032e-06, - "loss": 0.9451, - "step": 4375 - }, - { - "epoch": 0.526182889436662, - "grad_norm": 1.6973902894692623, - "learning_rate": 1.9275714774893493e-06, - "loss": 1.0604, - "step": 4376 - }, - { - "epoch": 0.5263031323273012, - "grad_norm": 2.2125915163585255, - "learning_rate": 1.9267930223230154e-06, - "loss": 0.962, - "step": 4377 - }, - { - "epoch": 0.5264233752179402, - "grad_norm": 1.9827139202409438, - "learning_rate": 1.9260145782621224e-06, - "loss": 1.0161, - "step": 4378 - }, - { - "epoch": 0.5265436181085793, - "grad_norm": 1.794093757680026, - "learning_rate": 1.925236145424758e-06, - "loss": 1.1033, - "step": 4379 - }, - { - "epoch": 0.5266638609992185, - "grad_norm": 0.7115227941425859, - "learning_rate": 1.924457723929012e-06, - "loss": 0.8336, - "step": 4380 - }, - { - "epoch": 0.5267841038898575, - "grad_norm": 1.3904109318589895, - "learning_rate": 1.9236793138929685e-06, - "loss": 1.056, - "step": 4381 - }, - { - "epoch": 0.5269043467804966, - "grad_norm": 2.201155844412567, - "learning_rate": 1.9229009154347133e-06, - "loss": 1.0403, - "step": 4382 - }, - { - "epoch": 0.5270245896711357, - "grad_norm": 1.8590445345037623, - "learning_rate": 1.922122528672327e-06, - "loss": 1.0329, - "step": 4383 - }, - { - "epoch": 0.5271448325617748, - "grad_norm": 2.787469916758693, - "learning_rate": 1.9213441537238914e-06, - "loss": 1.009, - "step": 4384 - }, - { - "epoch": 0.5272650754524139, - "grad_norm": 1.1821470074093028, - "learning_rate": 1.920565790707485e-06, - "loss": 0.885, - "step": 4385 - }, - { - "epoch": 0.527385318343053, - "grad_norm": 2.144204814920925, - "learning_rate": 1.9197874397411853e-06, - "loss": 0.8894, - "step": 4386 - }, - { - "epoch": 0.5275055612336921, - "grad_norm": 2.9388872482721484, - "learning_rate": 1.919009100943067e-06, - "loss": 0.8875, - "step": 4387 - }, - { - "epoch": 0.5276258041243311, - "grad_norm": 3.1170120381088253, - "learning_rate": 1.9182307744312043e-06, - "loss": 0.8859, - "step": 4388 - }, - { - "epoch": 0.5277460470149702, - "grad_norm": 1.5604383218414506, - "learning_rate": 1.9174524603236676e-06, - "loss": 0.9912, - "step": 4389 - }, - { - "epoch": 0.5278662899056094, - "grad_norm": 1.9734925565190222, - "learning_rate": 1.916674158738527e-06, - "loss": 0.994, - "step": 4390 - }, - { - "epoch": 0.5279865327962484, - "grad_norm": 1.908997853393307, - "learning_rate": 1.9158958697938506e-06, - "loss": 0.8303, - "step": 4391 - }, - { - "epoch": 0.5281067756868875, - "grad_norm": 7.047992220195586, - "learning_rate": 1.9151175936077032e-06, - "loss": 1.0899, - "step": 4392 - }, - { - "epoch": 0.5282270185775266, - "grad_norm": 1.7144365457188155, - "learning_rate": 1.9143393302981507e-06, - "loss": 1.02, - "step": 4393 - }, - { - "epoch": 0.5283472614681657, - "grad_norm": 1.6047987085008226, - "learning_rate": 1.913561079983252e-06, - "loss": 1.0617, - "step": 4394 - }, - { - "epoch": 0.5284675043588047, - "grad_norm": 3.2319002128153187, - "learning_rate": 1.9127828427810693e-06, - "loss": 0.9831, - "step": 4395 - }, - { - "epoch": 0.5285877472494439, - "grad_norm": 2.191131955420128, - "learning_rate": 1.9120046188096607e-06, - "loss": 1.0401, - "step": 4396 - }, - { - "epoch": 0.528707990140083, - "grad_norm": 1.8545746586577534, - "learning_rate": 1.9112264081870804e-06, - "loss": 0.9771, - "step": 4397 - }, - { - "epoch": 0.528828233030722, - "grad_norm": 1.8792434358105359, - "learning_rate": 1.9104482110313843e-06, - "loss": 0.9834, - "step": 4398 - }, - { - "epoch": 0.5289484759213612, - "grad_norm": 2.8349394079659582, - "learning_rate": 1.909670027460623e-06, - "loss": 0.9753, - "step": 4399 - }, - { - "epoch": 0.5290687188120002, - "grad_norm": 3.214823874577838, - "learning_rate": 1.908891857592847e-06, - "loss": 0.9409, - "step": 4400 - }, - { - "epoch": 0.5291889617026393, - "grad_norm": 1.9056496847473785, - "learning_rate": 1.9081137015461034e-06, - "loss": 1.1209, - "step": 4401 - }, - { - "epoch": 0.5293092045932785, - "grad_norm": 3.0737360147512516, - "learning_rate": 1.9073355594384383e-06, - "loss": 1.1309, - "step": 4402 - }, - { - "epoch": 0.5294294474839175, - "grad_norm": 1.836222070594527, - "learning_rate": 1.906557431387895e-06, - "loss": 1.0349, - "step": 4403 - }, - { - "epoch": 0.5295496903745566, - "grad_norm": 1.8963906513473694, - "learning_rate": 1.905779317512516e-06, - "loss": 1.0159, - "step": 4404 - }, - { - "epoch": 0.5296699332651957, - "grad_norm": 1.766509788073608, - "learning_rate": 1.9050012179303385e-06, - "loss": 1.0329, - "step": 4405 - }, - { - "epoch": 0.5297901761558348, - "grad_norm": 8.971334796030128, - "learning_rate": 1.904223132759401e-06, - "loss": 0.913, - "step": 4406 - }, - { - "epoch": 0.5299104190464738, - "grad_norm": 2.3420207447139862, - "learning_rate": 1.9034450621177383e-06, - "loss": 0.9135, - "step": 4407 - }, - { - "epoch": 0.530030661937113, - "grad_norm": 2.2450149200895213, - "learning_rate": 1.9026670061233824e-06, - "loss": 0.9357, - "step": 4408 - }, - { - "epoch": 0.5301509048277521, - "grad_norm": 3.310452386188435, - "learning_rate": 1.901888964894365e-06, - "loss": 1.0371, - "step": 4409 - }, - { - "epoch": 0.5302711477183911, - "grad_norm": 1.7506799739226326, - "learning_rate": 1.9011109385487134e-06, - "loss": 0.9117, - "step": 4410 - }, - { - "epoch": 0.5303913906090303, - "grad_norm": 2.0077290202201175, - "learning_rate": 1.900332927204454e-06, - "loss": 0.8844, - "step": 4411 - }, - { - "epoch": 0.5305116334996693, - "grad_norm": 1.8330719356874758, - "learning_rate": 1.8995549309796097e-06, - "loss": 0.999, - "step": 4412 - }, - { - "epoch": 0.5306318763903084, - "grad_norm": 1.6821194098969972, - "learning_rate": 1.8987769499922028e-06, - "loss": 0.9917, - "step": 4413 - }, - { - "epoch": 0.5307521192809476, - "grad_norm": 1.9999163133278706, - "learning_rate": 1.897998984360252e-06, - "loss": 0.9387, - "step": 4414 - }, - { - "epoch": 0.5308723621715866, - "grad_norm": 1.3792027054809932, - "learning_rate": 1.897221034201775e-06, - "loss": 1.0082, - "step": 4415 - }, - { - "epoch": 0.5309926050622257, - "grad_norm": 1.3956026294656148, - "learning_rate": 1.8964430996347842e-06, - "loss": 0.8952, - "step": 4416 - }, - { - "epoch": 0.5311128479528648, - "grad_norm": 1.6245331460395558, - "learning_rate": 1.8956651807772931e-06, - "loss": 1.0518, - "step": 4417 - }, - { - "epoch": 0.5312330908435039, - "grad_norm": 1.7865900004126813, - "learning_rate": 1.8948872777473115e-06, - "loss": 1.0636, - "step": 4418 - }, - { - "epoch": 0.531353333734143, - "grad_norm": 1.6308696815027999, - "learning_rate": 1.8941093906628458e-06, - "loss": 0.8631, - "step": 4419 - }, - { - "epoch": 0.531473576624782, - "grad_norm": 1.868137070280857, - "learning_rate": 1.893331519641902e-06, - "loss": 0.9434, - "step": 4420 - }, - { - "epoch": 0.5315938195154212, - "grad_norm": 2.154816302902726, - "learning_rate": 1.8925536648024815e-06, - "loss": 0.9758, - "step": 4421 - }, - { - "epoch": 0.5317140624060602, - "grad_norm": 1.744645715179311, - "learning_rate": 1.8917758262625849e-06, - "loss": 0.9872, - "step": 4422 - }, - { - "epoch": 0.5318343052966993, - "grad_norm": 1.764909042089525, - "learning_rate": 1.8909980041402089e-06, - "loss": 1.0431, - "step": 4423 - }, - { - "epoch": 0.5319545481873384, - "grad_norm": 2.2664976346811256, - "learning_rate": 1.8902201985533494e-06, - "loss": 0.8864, - "step": 4424 - }, - { - "epoch": 0.5320747910779775, - "grad_norm": 3.360136575647906, - "learning_rate": 1.8894424096199983e-06, - "loss": 0.9818, - "step": 4425 - }, - { - "epoch": 0.5321950339686166, - "grad_norm": 1.7022206416439511, - "learning_rate": 1.8886646374581463e-06, - "loss": 1.0893, - "step": 4426 - }, - { - "epoch": 0.5323152768592557, - "grad_norm": 2.8724433477673346, - "learning_rate": 1.8878868821857795e-06, - "loss": 0.9329, - "step": 4427 - }, - { - "epoch": 0.5324355197498948, - "grad_norm": 3.4469120525122054, - "learning_rate": 1.8871091439208838e-06, - "loss": 0.9753, - "step": 4428 - }, - { - "epoch": 0.5325557626405338, - "grad_norm": 2.350323017727285, - "learning_rate": 1.8863314227814414e-06, - "loss": 1.0056, - "step": 4429 - }, - { - "epoch": 0.532676005531173, - "grad_norm": 2.2269591931774126, - "learning_rate": 1.8855537188854313e-06, - "loss": 0.7125, - "step": 4430 - }, - { - "epoch": 0.5327962484218121, - "grad_norm": 1.8972563660309125, - "learning_rate": 1.8847760323508315e-06, - "loss": 1.0111, - "step": 4431 - }, - { - "epoch": 0.5329164913124511, - "grad_norm": 1.6523353810921337, - "learning_rate": 1.883998363295616e-06, - "loss": 0.9853, - "step": 4432 - }, - { - "epoch": 0.5330367342030903, - "grad_norm": 0.993406223117958, - "learning_rate": 1.8832207118377565e-06, - "loss": 0.9184, - "step": 4433 - }, - { - "epoch": 0.5331569770937293, - "grad_norm": 2.728757308272161, - "learning_rate": 1.882443078095222e-06, - "loss": 0.9243, - "step": 4434 - }, - { - "epoch": 0.5332772199843684, - "grad_norm": 0.8568584702648245, - "learning_rate": 1.8816654621859794e-06, - "loss": 0.9208, - "step": 4435 - }, - { - "epoch": 0.5333974628750076, - "grad_norm": 2.237559683232841, - "learning_rate": 1.8808878642279915e-06, - "loss": 0.9521, - "step": 4436 - }, - { - "epoch": 0.5335177057656466, - "grad_norm": 2.1612029755894695, - "learning_rate": 1.8801102843392209e-06, - "loss": 0.8883, - "step": 4437 - }, - { - "epoch": 0.5336379486562857, - "grad_norm": 1.4126998489920939, - "learning_rate": 1.8793327226376238e-06, - "loss": 1.0852, - "step": 4438 - }, - { - "epoch": 0.5337581915469248, - "grad_norm": 1.8468970902569293, - "learning_rate": 1.8785551792411569e-06, - "loss": 1.0307, - "step": 4439 - }, - { - "epoch": 0.5338784344375639, - "grad_norm": 2.088656478605354, - "learning_rate": 1.8777776542677733e-06, - "loss": 1.0583, - "step": 4440 - }, - { - "epoch": 0.5339986773282029, - "grad_norm": 2.996749070179888, - "learning_rate": 1.8770001478354216e-06, - "loss": 0.9477, - "step": 4441 - }, - { - "epoch": 0.5341189202188421, - "grad_norm": 3.884424868790655, - "learning_rate": 1.8762226600620504e-06, - "loss": 1.0592, - "step": 4442 - }, - { - "epoch": 0.5342391631094812, - "grad_norm": 2.5909765651501417, - "learning_rate": 1.8754451910656031e-06, - "loss": 0.8314, - "step": 4443 - }, - { - "epoch": 0.5343594060001202, - "grad_norm": 1.8478176054552558, - "learning_rate": 1.8746677409640212e-06, - "loss": 1.054, - "step": 4444 - }, - { - "epoch": 0.5344796488907594, - "grad_norm": 1.7739825650447536, - "learning_rate": 1.8738903098752432e-06, - "loss": 1.0707, - "step": 4445 - }, - { - "epoch": 0.5345998917813984, - "grad_norm": 1.9532195411688948, - "learning_rate": 1.8731128979172052e-06, - "loss": 0.9659, - "step": 4446 - }, - { - "epoch": 0.5347201346720375, - "grad_norm": 2.153341903059277, - "learning_rate": 1.8723355052078394e-06, - "loss": 0.8912, - "step": 4447 - }, - { - "epoch": 0.5348403775626767, - "grad_norm": 2.065776072646335, - "learning_rate": 1.8715581318650765e-06, - "loss": 1.0029, - "step": 4448 - }, - { - "epoch": 0.5349606204533157, - "grad_norm": 2.5863815802192285, - "learning_rate": 1.8707807780068422e-06, - "loss": 1.0528, - "step": 4449 - }, - { - "epoch": 0.5350808633439548, - "grad_norm": 1.9805742043854728, - "learning_rate": 1.8700034437510611e-06, - "loss": 0.8922, - "step": 4450 - }, - { - "epoch": 0.5352011062345938, - "grad_norm": 2.0701289257498128, - "learning_rate": 1.8692261292156549e-06, - "loss": 1.0316, - "step": 4451 - }, - { - "epoch": 0.535321349125233, - "grad_norm": 2.2393621527696954, - "learning_rate": 1.8684488345185401e-06, - "loss": 1.0486, - "step": 4452 - }, - { - "epoch": 0.535441592015872, - "grad_norm": 2.569677581749412, - "learning_rate": 1.8676715597776332e-06, - "loss": 1.0235, - "step": 4453 - }, - { - "epoch": 0.5355618349065111, - "grad_norm": 1.5714975317153699, - "learning_rate": 1.8668943051108455e-06, - "loss": 0.9911, - "step": 4454 - }, - { - "epoch": 0.5356820777971503, - "grad_norm": 1.6379802640208017, - "learning_rate": 1.8661170706360856e-06, - "loss": 0.9921, - "step": 4455 - }, - { - "epoch": 0.5358023206877893, - "grad_norm": 1.6478005575307595, - "learning_rate": 1.8653398564712594e-06, - "loss": 1.0462, - "step": 4456 - }, - { - "epoch": 0.5359225635784284, - "grad_norm": 1.506635770030825, - "learning_rate": 1.8645626627342704e-06, - "loss": 1.0498, - "step": 4457 - }, - { - "epoch": 0.5360428064690675, - "grad_norm": 1.8146780673268494, - "learning_rate": 1.8637854895430172e-06, - "loss": 1.0339, - "step": 4458 - }, - { - "epoch": 0.5361630493597066, - "grad_norm": 1.8837121104357857, - "learning_rate": 1.8630083370153978e-06, - "loss": 0.9153, - "step": 4459 - }, - { - "epoch": 0.5362832922503457, - "grad_norm": 0.792890535452899, - "learning_rate": 1.8622312052693041e-06, - "loss": 0.8212, - "step": 4460 - }, - { - "epoch": 0.5364035351409848, - "grad_norm": 2.5353482824635973, - "learning_rate": 1.8614540944226267e-06, - "loss": 0.9515, - "step": 4461 - }, - { - "epoch": 0.5365237780316239, - "grad_norm": 1.9794656895735097, - "learning_rate": 1.8606770045932537e-06, - "loss": 0.9114, - "step": 4462 - }, - { - "epoch": 0.5366440209222629, - "grad_norm": 1.7953491366560028, - "learning_rate": 1.859899935899068e-06, - "loss": 1.049, - "step": 4463 - }, - { - "epoch": 0.5367642638129021, - "grad_norm": 1.4012447290380947, - "learning_rate": 1.8591228884579506e-06, - "loss": 1.0223, - "step": 4464 - }, - { - "epoch": 0.5368845067035412, - "grad_norm": 2.1883176093058334, - "learning_rate": 1.8583458623877795e-06, - "loss": 1.0477, - "step": 4465 - }, - { - "epoch": 0.5370047495941802, - "grad_norm": 2.32707528636865, - "learning_rate": 1.8575688578064281e-06, - "loss": 0.9718, - "step": 4466 - }, - { - "epoch": 0.5371249924848194, - "grad_norm": 1.9144877058796579, - "learning_rate": 1.8567918748317674e-06, - "loss": 0.9871, - "step": 4467 - }, - { - "epoch": 0.5372452353754584, - "grad_norm": 1.9373001487779529, - "learning_rate": 1.8560149135816659e-06, - "loss": 1.048, - "step": 4468 - }, - { - "epoch": 0.5373654782660975, - "grad_norm": 2.0411860490675413, - "learning_rate": 1.8552379741739873e-06, - "loss": 1.0731, - "step": 4469 - }, - { - "epoch": 0.5374857211567367, - "grad_norm": 0.8946107054492204, - "learning_rate": 1.8544610567265935e-06, - "loss": 0.8135, - "step": 4470 - }, - { - "epoch": 0.5376059640473757, - "grad_norm": 1.8830067387975364, - "learning_rate": 1.853684161357341e-06, - "loss": 1.0659, - "step": 4471 - }, - { - "epoch": 0.5377262069380148, - "grad_norm": 1.9246299041092934, - "learning_rate": 1.852907288184085e-06, - "loss": 1.0067, - "step": 4472 - }, - { - "epoch": 0.5378464498286539, - "grad_norm": 2.190423156755757, - "learning_rate": 1.8521304373246762e-06, - "loss": 0.9356, - "step": 4473 - }, - { - "epoch": 0.537966692719293, - "grad_norm": 2.16672789658324, - "learning_rate": 1.8513536088969626e-06, - "loss": 1.1174, - "step": 4474 - }, - { - "epoch": 0.538086935609932, - "grad_norm": 2.8256147002817356, - "learning_rate": 1.8505768030187884e-06, - "loss": 1.0267, - "step": 4475 - }, - { - "epoch": 0.5382071785005712, - "grad_norm": 1.4901361395578943, - "learning_rate": 1.849800019807995e-06, - "loss": 1.0301, - "step": 4476 - }, - { - "epoch": 0.5383274213912103, - "grad_norm": 2.1803323208953755, - "learning_rate": 1.8490232593824186e-06, - "loss": 0.9459, - "step": 4477 - }, - { - "epoch": 0.5384476642818493, - "grad_norm": 1.637984703486263, - "learning_rate": 1.8482465218598935e-06, - "loss": 1.0716, - "step": 4478 - }, - { - "epoch": 0.5385679071724885, - "grad_norm": 1.8067390043948879, - "learning_rate": 1.8474698073582508e-06, - "loss": 1.0674, - "step": 4479 - }, - { - "epoch": 0.5386881500631275, - "grad_norm": 1.8819987964484393, - "learning_rate": 1.8466931159953166e-06, - "loss": 1.0945, - "step": 4480 - }, - { - "epoch": 0.5388083929537666, - "grad_norm": 2.466642905026333, - "learning_rate": 1.8459164478889158e-06, - "loss": 1.0664, - "step": 4481 - }, - { - "epoch": 0.5389286358444056, - "grad_norm": 1.7978145630783373, - "learning_rate": 1.8451398031568663e-06, - "loss": 0.99, - "step": 4482 - }, - { - "epoch": 0.5390488787350448, - "grad_norm": 1.7020978717657749, - "learning_rate": 1.844363181916986e-06, - "loss": 0.9767, - "step": 4483 - }, - { - "epoch": 0.5391691216256839, - "grad_norm": 2.3467894110584266, - "learning_rate": 1.8435865842870868e-06, - "loss": 1.0612, - "step": 4484 - }, - { - "epoch": 0.5392893645163229, - "grad_norm": 1.7690228875136809, - "learning_rate": 1.8428100103849787e-06, - "loss": 0.9474, - "step": 4485 - }, - { - "epoch": 0.5394096074069621, - "grad_norm": 2.784542406660338, - "learning_rate": 1.842033460328467e-06, - "loss": 0.9534, - "step": 4486 - }, - { - "epoch": 0.5395298502976011, - "grad_norm": 2.4347544147950053, - "learning_rate": 1.8412569342353541e-06, - "loss": 0.9833, - "step": 4487 - }, - { - "epoch": 0.5396500931882402, - "grad_norm": 4.850700264941417, - "learning_rate": 1.840480432223438e-06, - "loss": 1.0722, - "step": 4488 - }, - { - "epoch": 0.5397703360788794, - "grad_norm": 2.204708315009598, - "learning_rate": 1.8397039544105131e-06, - "loss": 1.004, - "step": 4489 - }, - { - "epoch": 0.5398905789695184, - "grad_norm": 2.7962927185533477, - "learning_rate": 1.8389275009143711e-06, - "loss": 0.9316, - "step": 4490 - }, - { - "epoch": 0.5400108218601575, - "grad_norm": 2.027529081425317, - "learning_rate": 1.8381510718527988e-06, - "loss": 0.9624, - "step": 4491 - }, - { - "epoch": 0.5401310647507966, - "grad_norm": 1.6812645893811111, - "learning_rate": 1.8373746673435812e-06, - "loss": 0.8611, - "step": 4492 - }, - { - "epoch": 0.5402513076414357, - "grad_norm": 1.7147611628568247, - "learning_rate": 1.8365982875044964e-06, - "loss": 1.0154, - "step": 4493 - }, - { - "epoch": 0.5403715505320748, - "grad_norm": 2.3112495753286333, - "learning_rate": 1.8358219324533217e-06, - "loss": 0.9892, - "step": 4494 - }, - { - "epoch": 0.5404917934227139, - "grad_norm": 1.546001534509329, - "learning_rate": 1.8350456023078292e-06, - "loss": 0.9244, - "step": 4495 - }, - { - "epoch": 0.540612036313353, - "grad_norm": 3.322600888800785, - "learning_rate": 1.8342692971857874e-06, - "loss": 1.0042, - "step": 4496 - }, - { - "epoch": 0.540732279203992, - "grad_norm": 2.3172716965301134, - "learning_rate": 1.833493017204962e-06, - "loss": 0.944, - "step": 4497 - }, - { - "epoch": 0.5408525220946312, - "grad_norm": 2.1098394942140066, - "learning_rate": 1.8327167624831134e-06, - "loss": 0.9986, - "step": 4498 - }, - { - "epoch": 0.5409727649852702, - "grad_norm": 1.7457580926503533, - "learning_rate": 1.831940533137999e-06, - "loss": 0.9333, - "step": 4499 - }, - { - "epoch": 0.5410930078759093, - "grad_norm": 1.5776021865659344, - "learning_rate": 1.8311643292873718e-06, - "loss": 0.9505, - "step": 4500 - }, - { - "epoch": 0.5412132507665485, - "grad_norm": 1.7881326049514965, - "learning_rate": 1.8303881510489818e-06, - "loss": 1.112, - "step": 4501 - }, - { - "epoch": 0.5413334936571875, - "grad_norm": 2.7165262182084433, - "learning_rate": 1.829611998540574e-06, - "loss": 0.9237, - "step": 4502 - }, - { - "epoch": 0.5414537365478266, - "grad_norm": 1.6519513757768138, - "learning_rate": 1.8288358718798914e-06, - "loss": 1.0318, - "step": 4503 - }, - { - "epoch": 0.5415739794384657, - "grad_norm": 1.8989046252075867, - "learning_rate": 1.8280597711846703e-06, - "loss": 0.954, - "step": 4504 - }, - { - "epoch": 0.5416942223291048, - "grad_norm": 1.9623914700767107, - "learning_rate": 1.8272836965726455e-06, - "loss": 1.0609, - "step": 4505 - }, - { - "epoch": 0.5418144652197439, - "grad_norm": 2.05261845719626, - "learning_rate": 1.8265076481615461e-06, - "loss": 1.0074, - "step": 4506 - }, - { - "epoch": 0.541934708110383, - "grad_norm": 2.196912515029638, - "learning_rate": 1.8257316260690987e-06, - "loss": 1.1014, - "step": 4507 - }, - { - "epoch": 0.5420549510010221, - "grad_norm": 1.499788746262869, - "learning_rate": 1.8249556304130254e-06, - "loss": 0.9901, - "step": 4508 - }, - { - "epoch": 0.5421751938916611, - "grad_norm": 3.682068655696973, - "learning_rate": 1.824179661311044e-06, - "loss": 0.9134, - "step": 4509 - }, - { - "epoch": 0.5422954367823003, - "grad_norm": 1.9052141220424748, - "learning_rate": 1.823403718880868e-06, - "loss": 1.0202, - "step": 4510 - }, - { - "epoch": 0.5424156796729394, - "grad_norm": 1.9881649320562178, - "learning_rate": 1.822627803240207e-06, - "loss": 0.8971, - "step": 4511 - }, - { - "epoch": 0.5425359225635784, - "grad_norm": 7.023355757223842, - "learning_rate": 1.8218519145067675e-06, - "loss": 1.0773, - "step": 4512 - }, - { - "epoch": 0.5426561654542175, - "grad_norm": 1.981058668651415, - "learning_rate": 1.8210760527982508e-06, - "loss": 1.1237, - "step": 4513 - }, - { - "epoch": 0.5427764083448566, - "grad_norm": 1.6523025543632726, - "learning_rate": 1.8203002182323552e-06, - "loss": 0.9755, - "step": 4514 - }, - { - "epoch": 0.5428966512354957, - "grad_norm": 1.679652368932213, - "learning_rate": 1.819524410926773e-06, - "loss": 0.9834, - "step": 4515 - }, - { - "epoch": 0.5430168941261347, - "grad_norm": 1.9485220418911344, - "learning_rate": 1.8187486309991944e-06, - "loss": 0.9998, - "step": 4516 - }, - { - "epoch": 0.5431371370167739, - "grad_norm": 1.6023469073981949, - "learning_rate": 1.817972878567304e-06, - "loss": 1.0023, - "step": 4517 - }, - { - "epoch": 0.543257379907413, - "grad_norm": 1.7594760059477883, - "learning_rate": 1.8171971537487834e-06, - "loss": 0.9909, - "step": 4518 - }, - { - "epoch": 0.543377622798052, - "grad_norm": 1.8138125041179434, - "learning_rate": 1.8164214566613093e-06, - "loss": 1.0301, - "step": 4519 - }, - { - "epoch": 0.5434978656886912, - "grad_norm": 3.623948964304855, - "learning_rate": 1.8156457874225547e-06, - "loss": 0.8794, - "step": 4520 - }, - { - "epoch": 0.5436181085793302, - "grad_norm": 1.740167310387912, - "learning_rate": 1.814870146150187e-06, - "loss": 1.0332, - "step": 4521 - }, - { - "epoch": 0.5437383514699693, - "grad_norm": 2.2016382273421393, - "learning_rate": 1.814094532961871e-06, - "loss": 1.0289, - "step": 4522 - }, - { - "epoch": 0.5438585943606085, - "grad_norm": 2.586214079572853, - "learning_rate": 1.8133189479752666e-06, - "loss": 1.0624, - "step": 4523 - }, - { - "epoch": 0.5439788372512475, - "grad_norm": 1.8836757216462334, - "learning_rate": 1.8125433913080292e-06, - "loss": 1.0465, - "step": 4524 - }, - { - "epoch": 0.5440990801418866, - "grad_norm": 1.8808252761300912, - "learning_rate": 1.811767863077811e-06, - "loss": 1.0613, - "step": 4525 - }, - { - "epoch": 0.5442193230325257, - "grad_norm": 1.5171014731912649, - "learning_rate": 1.8109923634022577e-06, - "loss": 1.0105, - "step": 4526 - }, - { - "epoch": 0.5443395659231648, - "grad_norm": 2.0400527361990886, - "learning_rate": 1.8102168923990128e-06, - "loss": 1.0889, - "step": 4527 - }, - { - "epoch": 0.5444598088138038, - "grad_norm": 1.8082503122471578, - "learning_rate": 1.809441450185714e-06, - "loss": 1.0266, - "step": 4528 - }, - { - "epoch": 0.544580051704443, - "grad_norm": 2.56048746862103, - "learning_rate": 1.8086660368799958e-06, - "loss": 0.9641, - "step": 4529 - }, - { - "epoch": 0.5447002945950821, - "grad_norm": 1.5945423624865411, - "learning_rate": 1.807890652599488e-06, - "loss": 0.998, - "step": 4530 - }, - { - "epoch": 0.5448205374857211, - "grad_norm": 2.2312309713113225, - "learning_rate": 1.8071152974618156e-06, - "loss": 1.0523, - "step": 4531 - }, - { - "epoch": 0.5449407803763603, - "grad_norm": 2.1789150168104716, - "learning_rate": 1.806339971584599e-06, - "loss": 1.0098, - "step": 4532 - }, - { - "epoch": 0.5450610232669993, - "grad_norm": 16.09633626064096, - "learning_rate": 1.8055646750854546e-06, - "loss": 1.0859, - "step": 4533 - }, - { - "epoch": 0.5451812661576384, - "grad_norm": 2.2510137393554146, - "learning_rate": 1.8047894080819945e-06, - "loss": 1.0493, - "step": 4534 - }, - { - "epoch": 0.5453015090482776, - "grad_norm": 0.7524703191689115, - "learning_rate": 1.8040141706918258e-06, - "loss": 0.8967, - "step": 4535 - }, - { - "epoch": 0.5454217519389166, - "grad_norm": 3.5733933793427046, - "learning_rate": 1.8032389630325525e-06, - "loss": 0.9942, - "step": 4536 - }, - { - "epoch": 0.5455419948295557, - "grad_norm": 1.8078533329551856, - "learning_rate": 1.8024637852217707e-06, - "loss": 0.9848, - "step": 4537 - }, - { - "epoch": 0.5456622377201948, - "grad_norm": 1.7168417307404504, - "learning_rate": 1.8016886373770766e-06, - "loss": 1.0716, - "step": 4538 - }, - { - "epoch": 0.5457824806108339, - "grad_norm": 1.6835455301198998, - "learning_rate": 1.8009135196160579e-06, - "loss": 1.0108, - "step": 4539 - }, - { - "epoch": 0.545902723501473, - "grad_norm": 1.6979668576675897, - "learning_rate": 1.8001384320563e-06, - "loss": 1.0694, - "step": 4540 - }, - { - "epoch": 0.5460229663921121, - "grad_norm": 0.8124791656170437, - "learning_rate": 1.7993633748153833e-06, - "loss": 0.8421, - "step": 4541 - }, - { - "epoch": 0.5461432092827512, - "grad_norm": 1.665133923823603, - "learning_rate": 1.7985883480108834e-06, - "loss": 0.958, - "step": 4542 - }, - { - "epoch": 0.5462634521733902, - "grad_norm": 1.8219283631036025, - "learning_rate": 1.797813351760371e-06, - "loss": 0.9496, - "step": 4543 - }, - { - "epoch": 0.5463836950640293, - "grad_norm": 1.7245179926507923, - "learning_rate": 1.7970383861814116e-06, - "loss": 1.0123, - "step": 4544 - }, - { - "epoch": 0.5465039379546685, - "grad_norm": 2.367022316586136, - "learning_rate": 1.7962634513915684e-06, - "loss": 0.9733, - "step": 4545 - }, - { - "epoch": 0.5466241808453075, - "grad_norm": 1.8720147209689062, - "learning_rate": 1.7954885475083969e-06, - "loss": 1.0229, - "step": 4546 - }, - { - "epoch": 0.5467444237359466, - "grad_norm": 1.948352384199467, - "learning_rate": 1.7947136746494513e-06, - "loss": 0.9643, - "step": 4547 - }, - { - "epoch": 0.5468646666265857, - "grad_norm": 8.255763497121883, - "learning_rate": 1.793938832932277e-06, - "loss": 1.1066, - "step": 4548 - }, - { - "epoch": 0.5469849095172248, - "grad_norm": 1.8510243724329234, - "learning_rate": 1.7931640224744185e-06, - "loss": 0.9207, - "step": 4549 - }, - { - "epoch": 0.5471051524078638, - "grad_norm": 1.7153286954226743, - "learning_rate": 1.7923892433934127e-06, - "loss": 0.9676, - "step": 4550 - }, - { - "epoch": 0.547225395298503, - "grad_norm": 1.7749025452415634, - "learning_rate": 1.7916144958067939e-06, - "loss": 1.0164, - "step": 4551 - }, - { - "epoch": 0.5473456381891421, - "grad_norm": 1.6336179508001973, - "learning_rate": 1.7908397798320905e-06, - "loss": 1.0197, - "step": 4552 - }, - { - "epoch": 0.5474658810797811, - "grad_norm": 1.6893390242132744, - "learning_rate": 1.7900650955868265e-06, - "loss": 0.9733, - "step": 4553 - }, - { - "epoch": 0.5475861239704203, - "grad_norm": 1.42553851007836, - "learning_rate": 1.7892904431885202e-06, - "loss": 0.9945, - "step": 4554 - }, - { - "epoch": 0.5477063668610593, - "grad_norm": 1.7998098537890448, - "learning_rate": 1.788515822754686e-06, - "loss": 0.9808, - "step": 4555 - }, - { - "epoch": 0.5478266097516984, - "grad_norm": 3.0172239817977466, - "learning_rate": 1.7877412344028335e-06, - "loss": 1.0167, - "step": 4556 - }, - { - "epoch": 0.5479468526423376, - "grad_norm": 8.388375149365006, - "learning_rate": 1.7869666782504668e-06, - "loss": 1.0097, - "step": 4557 - }, - { - "epoch": 0.5480670955329766, - "grad_norm": 2.8275469616026467, - "learning_rate": 1.7861921544150867e-06, - "loss": 0.9183, - "step": 4558 - }, - { - "epoch": 0.5481873384236157, - "grad_norm": 1.9782087383655387, - "learning_rate": 1.7854176630141856e-06, - "loss": 0.9997, - "step": 4559 - }, - { - "epoch": 0.5483075813142548, - "grad_norm": 2.376513751631649, - "learning_rate": 1.784643204165255e-06, - "loss": 1.0787, - "step": 4560 - }, - { - "epoch": 0.5484278242048939, - "grad_norm": 6.388331517034256, - "learning_rate": 1.7838687779857783e-06, - "loss": 0.9966, - "step": 4561 - }, - { - "epoch": 0.5485480670955329, - "grad_norm": 2.249031282213305, - "learning_rate": 1.7830943845932366e-06, - "loss": 0.8669, - "step": 4562 - }, - { - "epoch": 0.5486683099861721, - "grad_norm": 1.6537615237479675, - "learning_rate": 1.7823200241051044e-06, - "loss": 0.9806, - "step": 4563 - }, - { - "epoch": 0.5487885528768112, - "grad_norm": 2.1444863347698555, - "learning_rate": 1.7815456966388513e-06, - "loss": 1.0317, - "step": 4564 - }, - { - "epoch": 0.5489087957674502, - "grad_norm": 2.2072389758093225, - "learning_rate": 1.780771402311943e-06, - "loss": 1.0554, - "step": 4565 - }, - { - "epoch": 0.5490290386580894, - "grad_norm": 1.7308353354390882, - "learning_rate": 1.7799971412418374e-06, - "loss": 1.0142, - "step": 4566 - }, - { - "epoch": 0.5491492815487284, - "grad_norm": 2.177328165940933, - "learning_rate": 1.7792229135459918e-06, - "loss": 0.9685, - "step": 4567 - }, - { - "epoch": 0.5492695244393675, - "grad_norm": 0.7879856050478744, - "learning_rate": 1.7784487193418538e-06, - "loss": 0.8692, - "step": 4568 - }, - { - "epoch": 0.5493897673300067, - "grad_norm": 2.62117625076826, - "learning_rate": 1.7776745587468698e-06, - "loss": 0.8463, - "step": 4569 - }, - { - "epoch": 0.5495100102206457, - "grad_norm": 3.351963828362835, - "learning_rate": 1.7769004318784776e-06, - "loss": 1.0475, - "step": 4570 - }, - { - "epoch": 0.5496302531112848, - "grad_norm": 1.577784359870458, - "learning_rate": 1.776126338854113e-06, - "loss": 1.0306, - "step": 4571 - }, - { - "epoch": 0.5497504960019239, - "grad_norm": 1.6311792117138875, - "learning_rate": 1.7753522797912044e-06, - "loss": 1.0699, - "step": 4572 - }, - { - "epoch": 0.549870738892563, - "grad_norm": 17.98471267873687, - "learning_rate": 1.7745782548071765e-06, - "loss": 0.9377, - "step": 4573 - }, - { - "epoch": 0.549990981783202, - "grad_norm": 1.5112940465220859, - "learning_rate": 1.7738042640194482e-06, - "loss": 0.9672, - "step": 4574 - }, - { - "epoch": 0.5501112246738411, - "grad_norm": 1.592556450410707, - "learning_rate": 1.7730303075454335e-06, - "loss": 0.939, - "step": 4575 - }, - { - "epoch": 0.5502314675644803, - "grad_norm": 2.001358762760398, - "learning_rate": 1.7722563855025402e-06, - "loss": 1.0774, - "step": 4576 - }, - { - "epoch": 0.5503517104551193, - "grad_norm": 1.7759732170748899, - "learning_rate": 1.7714824980081721e-06, - "loss": 0.9372, - "step": 4577 - }, - { - "epoch": 0.5504719533457584, - "grad_norm": 1.558334673236672, - "learning_rate": 1.7707086451797276e-06, - "loss": 0.9691, - "step": 4578 - }, - { - "epoch": 0.5505921962363975, - "grad_norm": 0.7067142640097331, - "learning_rate": 1.7699348271345993e-06, - "loss": 0.7729, - "step": 4579 - }, - { - "epoch": 0.5507124391270366, - "grad_norm": 0.717805656696095, - "learning_rate": 1.7691610439901753e-06, - "loss": 0.796, - "step": 4580 - }, - { - "epoch": 0.5508326820176757, - "grad_norm": 4.6393601463066725, - "learning_rate": 1.7683872958638367e-06, - "loss": 0.9867, - "step": 4581 - }, - { - "epoch": 0.5509529249083148, - "grad_norm": 2.8826945624482403, - "learning_rate": 1.7676135828729614e-06, - "loss": 1.0713, - "step": 4582 - }, - { - "epoch": 0.5510731677989539, - "grad_norm": 2.2374577097385893, - "learning_rate": 1.7668399051349205e-06, - "loss": 1.0568, - "step": 4583 - }, - { - "epoch": 0.5511934106895929, - "grad_norm": 2.075443574304262, - "learning_rate": 1.766066262767081e-06, - "loss": 1.0527, - "step": 4584 - }, - { - "epoch": 0.5513136535802321, - "grad_norm": 3.792900988428253, - "learning_rate": 1.765292655886803e-06, - "loss": 1.0091, - "step": 4585 - }, - { - "epoch": 0.5514338964708712, - "grad_norm": 2.094000616304877, - "learning_rate": 1.764519084611443e-06, - "loss": 0.9354, - "step": 4586 - }, - { - "epoch": 0.5515541393615102, - "grad_norm": 1.7029243141016912, - "learning_rate": 1.7637455490583505e-06, - "loss": 1.0038, - "step": 4587 - }, - { - "epoch": 0.5516743822521494, - "grad_norm": 1.9430974275799007, - "learning_rate": 1.7629720493448701e-06, - "loss": 0.9954, - "step": 4588 - }, - { - "epoch": 0.5517946251427884, - "grad_norm": 1.795943574405591, - "learning_rate": 1.7621985855883418e-06, - "loss": 1.0753, - "step": 4589 - }, - { - "epoch": 0.5519148680334275, - "grad_norm": 1.736149266989292, - "learning_rate": 1.7614251579060983e-06, - "loss": 0.9581, - "step": 4590 - }, - { - "epoch": 0.5520351109240667, - "grad_norm": 1.8808981632974713, - "learning_rate": 1.76065176641547e-06, - "loss": 1.0722, - "step": 4591 - }, - { - "epoch": 0.5521553538147057, - "grad_norm": 1.5982177046292476, - "learning_rate": 1.759878411233777e-06, - "loss": 1.0054, - "step": 4592 - }, - { - "epoch": 0.5522755967053448, - "grad_norm": 2.215160098350009, - "learning_rate": 1.7591050924783388e-06, - "loss": 0.9852, - "step": 4593 - }, - { - "epoch": 0.5523958395959839, - "grad_norm": 0.8562974791345686, - "learning_rate": 1.7583318102664661e-06, - "loss": 0.8639, - "step": 4594 - }, - { - "epoch": 0.552516082486623, - "grad_norm": 1.9798881085887474, - "learning_rate": 1.757558564715466e-06, - "loss": 1.0224, - "step": 4595 - }, - { - "epoch": 0.552636325377262, - "grad_norm": 2.418675826165052, - "learning_rate": 1.7567853559426386e-06, - "loss": 0.9657, - "step": 4596 - }, - { - "epoch": 0.5527565682679012, - "grad_norm": 1.8639887140448104, - "learning_rate": 1.7560121840652797e-06, - "loss": 0.9795, - "step": 4597 - }, - { - "epoch": 0.5528768111585403, - "grad_norm": 2.520843968224489, - "learning_rate": 1.7552390492006782e-06, - "loss": 0.9261, - "step": 4598 - }, - { - "epoch": 0.5529970540491793, - "grad_norm": 2.2132074679738376, - "learning_rate": 1.7544659514661184e-06, - "loss": 0.8834, - "step": 4599 - }, - { - "epoch": 0.5531172969398185, - "grad_norm": 1.6759323372122081, - "learning_rate": 1.7536928909788786e-06, - "loss": 1.0196, - "step": 4600 - }, - { - "epoch": 0.5532375398304575, - "grad_norm": 0.906490721466249, - "learning_rate": 1.752919867856231e-06, - "loss": 0.8717, - "step": 4601 - }, - { - "epoch": 0.5533577827210966, - "grad_norm": 1.5449494987554881, - "learning_rate": 1.7521468822154436e-06, - "loss": 1.006, - "step": 4602 - }, - { - "epoch": 0.5534780256117358, - "grad_norm": 1.7741205857675624, - "learning_rate": 1.751373934173777e-06, - "loss": 0.9711, - "step": 4603 - }, - { - "epoch": 0.5535982685023748, - "grad_norm": 1.589454697003247, - "learning_rate": 1.750601023848487e-06, - "loss": 0.9626, - "step": 4604 - }, - { - "epoch": 0.5537185113930139, - "grad_norm": 3.0261473820528426, - "learning_rate": 1.749828151356823e-06, - "loss": 0.9716, - "step": 4605 - }, - { - "epoch": 0.553838754283653, - "grad_norm": 1.7511773236351842, - "learning_rate": 1.7490553168160297e-06, - "loss": 0.9836, - "step": 4606 - }, - { - "epoch": 0.5539589971742921, - "grad_norm": 2.4463720006836165, - "learning_rate": 1.748282520343345e-06, - "loss": 0.9916, - "step": 4607 - }, - { - "epoch": 0.5540792400649311, - "grad_norm": 1.9149091444051056, - "learning_rate": 1.7475097620560023e-06, - "loss": 1.0198, - "step": 4608 - }, - { - "epoch": 0.5541994829555702, - "grad_norm": 1.8551229375526135, - "learning_rate": 1.746737042071228e-06, - "loss": 0.9313, - "step": 4609 - }, - { - "epoch": 0.5543197258462094, - "grad_norm": 3.7986479964122726, - "learning_rate": 1.7459643605062424e-06, - "loss": 1.0241, - "step": 4610 - }, - { - "epoch": 0.5544399687368484, - "grad_norm": 1.5046012560568276, - "learning_rate": 1.745191717478262e-06, - "loss": 1.0356, - "step": 4611 - }, - { - "epoch": 0.5545602116274875, - "grad_norm": 1.630425785012228, - "learning_rate": 1.7444191131044948e-06, - "loss": 1.0275, - "step": 4612 - }, - { - "epoch": 0.5546804545181266, - "grad_norm": 1.6165454714202856, - "learning_rate": 1.7436465475021456e-06, - "loss": 0.9478, - "step": 4613 - }, - { - "epoch": 0.5548006974087657, - "grad_norm": 2.008939078167279, - "learning_rate": 1.7428740207884111e-06, - "loss": 0.9292, - "step": 4614 - }, - { - "epoch": 0.5549209402994048, - "grad_norm": 1.8724956317845567, - "learning_rate": 1.7421015330804833e-06, - "loss": 0.8443, - "step": 4615 - }, - { - "epoch": 0.5550411831900439, - "grad_norm": 1.9448498401682872, - "learning_rate": 1.7413290844955475e-06, - "loss": 0.9615, - "step": 4616 - }, - { - "epoch": 0.555161426080683, - "grad_norm": 1.8380866275583791, - "learning_rate": 1.7405566751507843e-06, - "loss": 1.0055, - "step": 4617 - }, - { - "epoch": 0.555281668971322, - "grad_norm": 1.5468714550247415, - "learning_rate": 1.7397843051633668e-06, - "loss": 0.9033, - "step": 4618 - }, - { - "epoch": 0.5554019118619612, - "grad_norm": 1.635498101541507, - "learning_rate": 1.739011974650464e-06, - "loss": 0.9426, - "step": 4619 - }, - { - "epoch": 0.5555221547526003, - "grad_norm": 2.0101716546166326, - "learning_rate": 1.7382396837292365e-06, - "loss": 0.9947, - "step": 4620 - }, - { - "epoch": 0.5556423976432393, - "grad_norm": 2.270740180481827, - "learning_rate": 1.737467432516841e-06, - "loss": 0.9732, - "step": 4621 - }, - { - "epoch": 0.5557626405338785, - "grad_norm": 2.2457882349962905, - "learning_rate": 1.7366952211304274e-06, - "loss": 0.9621, - "step": 4622 - }, - { - "epoch": 0.5558828834245175, - "grad_norm": 1.7757010109894844, - "learning_rate": 1.735923049687139e-06, - "loss": 1.0587, - "step": 4623 - }, - { - "epoch": 0.5560031263151566, - "grad_norm": 1.4174375773953427, - "learning_rate": 1.7351509183041144e-06, - "loss": 0.9741, - "step": 4624 - }, - { - "epoch": 0.5561233692057957, - "grad_norm": 1.578671426879197, - "learning_rate": 1.7343788270984852e-06, - "loss": 0.9543, - "step": 4625 - }, - { - "epoch": 0.5562436120964348, - "grad_norm": 1.775785194590627, - "learning_rate": 1.7336067761873764e-06, - "loss": 0.9718, - "step": 4626 - }, - { - "epoch": 0.5563638549870739, - "grad_norm": 1.9472639720913558, - "learning_rate": 1.7328347656879076e-06, - "loss": 0.9884, - "step": 4627 - }, - { - "epoch": 0.556484097877713, - "grad_norm": 3.1149472095308552, - "learning_rate": 1.7320627957171927e-06, - "loss": 0.9127, - "step": 4628 - }, - { - "epoch": 0.5566043407683521, - "grad_norm": 1.7985684583295949, - "learning_rate": 1.7312908663923382e-06, - "loss": 1.0316, - "step": 4629 - }, - { - "epoch": 0.5567245836589911, - "grad_norm": 1.836806525855318, - "learning_rate": 1.7305189778304463e-06, - "loss": 0.902, - "step": 4630 - }, - { - "epoch": 0.5568448265496303, - "grad_norm": 1.6977736367984577, - "learning_rate": 1.729747130148611e-06, - "loss": 1.0314, - "step": 4631 - }, - { - "epoch": 0.5569650694402694, - "grad_norm": 1.7534360531474813, - "learning_rate": 1.7289753234639208e-06, - "loss": 0.9898, - "step": 4632 - }, - { - "epoch": 0.5570853123309084, - "grad_norm": 1.8324278705135342, - "learning_rate": 1.7282035578934592e-06, - "loss": 0.9881, - "step": 4633 - }, - { - "epoch": 0.5572055552215476, - "grad_norm": 1.6314031185620268, - "learning_rate": 1.727431833554301e-06, - "loss": 1.015, - "step": 4634 - }, - { - "epoch": 0.5573257981121866, - "grad_norm": 1.7491312595447075, - "learning_rate": 1.7266601505635175e-06, - "loss": 1.0008, - "step": 4635 - }, - { - "epoch": 0.5574460410028257, - "grad_norm": 1.739873058687173, - "learning_rate": 1.7258885090381717e-06, - "loss": 0.9868, - "step": 4636 - }, - { - "epoch": 0.5575662838934649, - "grad_norm": 2.359516164838675, - "learning_rate": 1.7251169090953213e-06, - "loss": 1.0177, - "step": 4637 - }, - { - "epoch": 0.5576865267841039, - "grad_norm": 3.0791757407117086, - "learning_rate": 1.7243453508520168e-06, - "loss": 0.9927, - "step": 4638 - }, - { - "epoch": 0.557806769674743, - "grad_norm": 1.916402017594153, - "learning_rate": 1.7235738344253038e-06, - "loss": 1.0691, - "step": 4639 - }, - { - "epoch": 0.557927012565382, - "grad_norm": 2.2042478040485447, - "learning_rate": 1.72280235993222e-06, - "loss": 1.0514, - "step": 4640 - }, - { - "epoch": 0.5580472554560212, - "grad_norm": 2.2860241407453055, - "learning_rate": 1.722030927489798e-06, - "loss": 0.9289, - "step": 4641 - }, - { - "epoch": 0.5581674983466602, - "grad_norm": 1.6670565467035683, - "learning_rate": 1.7212595372150634e-06, - "loss": 0.9709, - "step": 4642 - }, - { - "epoch": 0.5582877412372993, - "grad_norm": 2.151617519586611, - "learning_rate": 1.720488189225035e-06, - "loss": 0.9663, - "step": 4643 - }, - { - "epoch": 0.5584079841279385, - "grad_norm": 2.4209789956720713, - "learning_rate": 1.7197168836367265e-06, - "loss": 1.0199, - "step": 4644 - }, - { - "epoch": 0.5585282270185775, - "grad_norm": 2.165549638325907, - "learning_rate": 1.7189456205671433e-06, - "loss": 1.0473, - "step": 4645 - }, - { - "epoch": 0.5586484699092166, - "grad_norm": 6.610424201961838, - "learning_rate": 1.7181744001332866e-06, - "loss": 1.045, - "step": 4646 - }, - { - "epoch": 0.5587687127998557, - "grad_norm": 2.0146387333897406, - "learning_rate": 1.7174032224521493e-06, - "loss": 0.8676, - "step": 4647 - }, - { - "epoch": 0.5588889556904948, - "grad_norm": 1.8856428723725807, - "learning_rate": 1.7166320876407184e-06, - "loss": 0.9288, - "step": 4648 - }, - { - "epoch": 0.5590091985811338, - "grad_norm": 2.594314146162151, - "learning_rate": 1.7158609958159742e-06, - "loss": 0.9084, - "step": 4649 - }, - { - "epoch": 0.559129441471773, - "grad_norm": 2.1322774250826484, - "learning_rate": 1.7150899470948911e-06, - "loss": 1.0092, - "step": 4650 - }, - { - "epoch": 0.5592496843624121, - "grad_norm": 0.8036460062267012, - "learning_rate": 1.7143189415944365e-06, - "loss": 0.8312, - "step": 4651 - }, - { - "epoch": 0.5593699272530511, - "grad_norm": 1.5747624445192014, - "learning_rate": 1.7135479794315714e-06, - "loss": 0.991, - "step": 4652 - }, - { - "epoch": 0.5594901701436903, - "grad_norm": 1.82148403729741, - "learning_rate": 1.7127770607232502e-06, - "loss": 1.0154, - "step": 4653 - }, - { - "epoch": 0.5596104130343293, - "grad_norm": 4.997106286971104, - "learning_rate": 1.7120061855864204e-06, - "loss": 1.0295, - "step": 4654 - }, - { - "epoch": 0.5597306559249684, - "grad_norm": 1.8514166665892864, - "learning_rate": 1.7112353541380233e-06, - "loss": 0.9497, - "step": 4655 - }, - { - "epoch": 0.5598508988156076, - "grad_norm": 1.6461336207981334, - "learning_rate": 1.7104645664949931e-06, - "loss": 0.9528, - "step": 4656 - }, - { - "epoch": 0.5599711417062466, - "grad_norm": 1.7352274956089573, - "learning_rate": 1.7096938227742584e-06, - "loss": 0.957, - "step": 4657 - }, - { - "epoch": 0.5600913845968857, - "grad_norm": 1.9996791820707829, - "learning_rate": 1.70892312309274e-06, - "loss": 1.068, - "step": 4658 - }, - { - "epoch": 0.5602116274875248, - "grad_norm": 2.942962923542895, - "learning_rate": 1.7081524675673523e-06, - "loss": 0.9087, - "step": 4659 - }, - { - "epoch": 0.5603318703781639, - "grad_norm": 0.8414215171512709, - "learning_rate": 1.7073818563150026e-06, - "loss": 0.8753, - "step": 4660 - }, - { - "epoch": 0.560452113268803, - "grad_norm": 4.322714387994629, - "learning_rate": 1.7066112894525935e-06, - "loss": 1.0987, - "step": 4661 - }, - { - "epoch": 0.5605723561594421, - "grad_norm": 1.5169751946892507, - "learning_rate": 1.7058407670970177e-06, - "loss": 0.9565, - "step": 4662 - }, - { - "epoch": 0.5606925990500812, - "grad_norm": 1.8874017651996642, - "learning_rate": 1.7050702893651643e-06, - "loss": 0.8503, - "step": 4663 - }, - { - "epoch": 0.5608128419407202, - "grad_norm": 8.447282559592852, - "learning_rate": 1.7042998563739134e-06, - "loss": 0.983, - "step": 4664 - }, - { - "epoch": 0.5609330848313594, - "grad_norm": 2.1714517743620574, - "learning_rate": 1.703529468240139e-06, - "loss": 0.9472, - "step": 4665 - }, - { - "epoch": 0.5610533277219985, - "grad_norm": 2.4895922502387275, - "learning_rate": 1.7027591250807088e-06, - "loss": 0.9733, - "step": 4666 - }, - { - "epoch": 0.5611735706126375, - "grad_norm": 2.215211437458394, - "learning_rate": 1.7019888270124825e-06, - "loss": 1.0736, - "step": 4667 - }, - { - "epoch": 0.5612938135032767, - "grad_norm": 1.8254215432962273, - "learning_rate": 1.7012185741523147e-06, - "loss": 1.0459, - "step": 4668 - }, - { - "epoch": 0.5614140563939157, - "grad_norm": 2.533411304210927, - "learning_rate": 1.7004483666170514e-06, - "loss": 0.8545, - "step": 4669 - }, - { - "epoch": 0.5615342992845548, - "grad_norm": 1.88921915545391, - "learning_rate": 1.699678204523533e-06, - "loss": 1.0323, - "step": 4670 - }, - { - "epoch": 0.5616545421751938, - "grad_norm": 2.2336864344129133, - "learning_rate": 1.6989080879885918e-06, - "loss": 0.9222, - "step": 4671 - }, - { - "epoch": 0.561774785065833, - "grad_norm": 0.9796717166389743, - "learning_rate": 1.6981380171290544e-06, - "loss": 0.8704, - "step": 4672 - }, - { - "epoch": 0.5618950279564721, - "grad_norm": 1.5791868659801873, - "learning_rate": 1.6973679920617396e-06, - "loss": 0.9695, - "step": 4673 - }, - { - "epoch": 0.5620152708471111, - "grad_norm": 2.0101167392517088, - "learning_rate": 1.6965980129034603e-06, - "loss": 1.0789, - "step": 4674 - }, - { - "epoch": 0.5621355137377503, - "grad_norm": 1.6022658129652954, - "learning_rate": 1.6958280797710209e-06, - "loss": 0.9933, - "step": 4675 - }, - { - "epoch": 0.5622557566283893, - "grad_norm": 0.7273059855653958, - "learning_rate": 1.6950581927812198e-06, - "loss": 0.7899, - "step": 4676 - }, - { - "epoch": 0.5623759995190284, - "grad_norm": 1.9756143580158834, - "learning_rate": 1.6942883520508486e-06, - "loss": 1.0119, - "step": 4677 - }, - { - "epoch": 0.5624962424096676, - "grad_norm": 1.8733007996789732, - "learning_rate": 1.693518557696691e-06, - "loss": 1.0078, - "step": 4678 - }, - { - "epoch": 0.5626164853003066, - "grad_norm": 1.8751482587055524, - "learning_rate": 1.6927488098355252e-06, - "loss": 1.1109, - "step": 4679 - }, - { - "epoch": 0.5627367281909457, - "grad_norm": 0.8804763671762137, - "learning_rate": 1.6919791085841201e-06, - "loss": 0.9065, - "step": 4680 - }, - { - "epoch": 0.5628569710815848, - "grad_norm": 3.1545098672767824, - "learning_rate": 1.6912094540592396e-06, - "loss": 1.0222, - "step": 4681 - }, - { - "epoch": 0.5629772139722239, - "grad_norm": 3.480013593888314, - "learning_rate": 1.6904398463776393e-06, - "loss": 1.0288, - "step": 4682 - }, - { - "epoch": 0.5630974568628629, - "grad_norm": 1.6864439874344794, - "learning_rate": 1.6896702856560683e-06, - "loss": 0.9595, - "step": 4683 - }, - { - "epoch": 0.5632176997535021, - "grad_norm": 4.676045983133288, - "learning_rate": 1.6889007720112677e-06, - "loss": 0.9141, - "step": 4684 - }, - { - "epoch": 0.5633379426441412, - "grad_norm": 1.7606950648852375, - "learning_rate": 1.6881313055599734e-06, - "loss": 1.0105, - "step": 4685 - }, - { - "epoch": 0.5634581855347802, - "grad_norm": 2.401447606611438, - "learning_rate": 1.6873618864189117e-06, - "loss": 1.0465, - "step": 4686 - }, - { - "epoch": 0.5635784284254194, - "grad_norm": 2.030787136453549, - "learning_rate": 1.686592514704803e-06, - "loss": 1.0133, - "step": 4687 - }, - { - "epoch": 0.5636986713160584, - "grad_norm": 2.229664351858423, - "learning_rate": 1.685823190534361e-06, - "loss": 0.9235, - "step": 4688 - }, - { - "epoch": 0.5638189142066975, - "grad_norm": 5.043984923156422, - "learning_rate": 1.6850539140242907e-06, - "loss": 1.0587, - "step": 4689 - }, - { - "epoch": 0.5639391570973367, - "grad_norm": 2.0401926233941143, - "learning_rate": 1.684284685291292e-06, - "loss": 1.0427, - "step": 4690 - }, - { - "epoch": 0.5640593999879757, - "grad_norm": 2.2241738875286057, - "learning_rate": 1.683515504452055e-06, - "loss": 1.0403, - "step": 4691 - }, - { - "epoch": 0.5641796428786148, - "grad_norm": 2.2964817670666857, - "learning_rate": 1.6827463716232648e-06, - "loss": 0.8888, - "step": 4692 - }, - { - "epoch": 0.5642998857692539, - "grad_norm": 1.6783262902837799, - "learning_rate": 1.6819772869215972e-06, - "loss": 0.9802, - "step": 4693 - }, - { - "epoch": 0.564420128659893, - "grad_norm": 4.768284390218454, - "learning_rate": 1.6812082504637228e-06, - "loss": 1.048, - "step": 4694 - }, - { - "epoch": 0.564540371550532, - "grad_norm": 1.3871026165990137, - "learning_rate": 1.6804392623663025e-06, - "loss": 0.9761, - "step": 4695 - }, - { - "epoch": 0.5646606144411712, - "grad_norm": 2.1527810318898317, - "learning_rate": 1.6796703227459935e-06, - "loss": 1.0083, - "step": 4696 - }, - { - "epoch": 0.5647808573318103, - "grad_norm": 1.7555189980283195, - "learning_rate": 1.6789014317194407e-06, - "loss": 0.9846, - "step": 4697 - }, - { - "epoch": 0.5649011002224493, - "grad_norm": 2.9218893611779424, - "learning_rate": 1.6781325894032853e-06, - "loss": 0.9529, - "step": 4698 - }, - { - "epoch": 0.5650213431130885, - "grad_norm": 1.7660778443923737, - "learning_rate": 1.6773637959141608e-06, - "loss": 1.1442, - "step": 4699 - }, - { - "epoch": 0.5651415860037275, - "grad_norm": 2.1238213804754276, - "learning_rate": 1.6765950513686915e-06, - "loss": 0.8926, - "step": 4700 - }, - { - "epoch": 0.5652618288943666, - "grad_norm": 2.0350501093213236, - "learning_rate": 1.675826355883496e-06, - "loss": 0.9964, - "step": 4701 - }, - { - "epoch": 0.5653820717850057, - "grad_norm": 1.9151731421780809, - "learning_rate": 1.6750577095751848e-06, - "loss": 1.0249, - "step": 4702 - }, - { - "epoch": 0.5655023146756448, - "grad_norm": 1.8811271373892022, - "learning_rate": 1.6742891125603605e-06, - "loss": 0.9585, - "step": 4703 - }, - { - "epoch": 0.5656225575662839, - "grad_norm": 1.8395485286797073, - "learning_rate": 1.6735205649556185e-06, - "loss": 0.9473, - "step": 4704 - }, - { - "epoch": 0.5657428004569229, - "grad_norm": 1.461318675206682, - "learning_rate": 1.6727520668775476e-06, - "loss": 1.0721, - "step": 4705 - }, - { - "epoch": 0.5658630433475621, - "grad_norm": 3.380148457534181, - "learning_rate": 1.6719836184427275e-06, - "loss": 0.9832, - "step": 4706 - }, - { - "epoch": 0.5659832862382012, - "grad_norm": 2.6883727142640694, - "learning_rate": 1.671215219767733e-06, - "loss": 0.875, - "step": 4707 - }, - { - "epoch": 0.5661035291288402, - "grad_norm": 2.7653250666335074, - "learning_rate": 1.670446870969127e-06, - "loss": 0.9896, - "step": 4708 - }, - { - "epoch": 0.5662237720194794, - "grad_norm": 2.063451258607571, - "learning_rate": 1.6696785721634685e-06, - "loss": 1.0311, - "step": 4709 - }, - { - "epoch": 0.5663440149101184, - "grad_norm": 1.716429861025266, - "learning_rate": 1.6689103234673086e-06, - "loss": 0.962, - "step": 4710 - }, - { - "epoch": 0.5664642578007575, - "grad_norm": 1.823730730212498, - "learning_rate": 1.668142124997189e-06, - "loss": 0.9998, - "step": 4711 - }, - { - "epoch": 0.5665845006913967, - "grad_norm": 0.7598431702097227, - "learning_rate": 1.6673739768696453e-06, - "loss": 0.859, - "step": 4712 - }, - { - "epoch": 0.5667047435820357, - "grad_norm": 3.7641722856712896, - "learning_rate": 1.6666058792012052e-06, - "loss": 0.9982, - "step": 4713 - }, - { - "epoch": 0.5668249864726748, - "grad_norm": 0.8648469974832462, - "learning_rate": 1.6658378321083878e-06, - "loss": 0.9317, - "step": 4714 - }, - { - "epoch": 0.5669452293633139, - "grad_norm": 1.9783698100728577, - "learning_rate": 1.6650698357077055e-06, - "loss": 1.0501, - "step": 4715 - }, - { - "epoch": 0.567065472253953, - "grad_norm": 2.2104060204837084, - "learning_rate": 1.6643018901156632e-06, - "loss": 1.0311, - "step": 4716 - }, - { - "epoch": 0.567185715144592, - "grad_norm": 2.367197168915897, - "learning_rate": 1.6635339954487566e-06, - "loss": 1.02, - "step": 4717 - }, - { - "epoch": 0.5673059580352312, - "grad_norm": 1.7089627509792655, - "learning_rate": 1.6627661518234765e-06, - "loss": 1.0526, - "step": 4718 - }, - { - "epoch": 0.5674262009258703, - "grad_norm": 1.6776913158855251, - "learning_rate": 1.661998359356302e-06, - "loss": 1.0723, - "step": 4719 - }, - { - "epoch": 0.5675464438165093, - "grad_norm": 1.0357466255738879, - "learning_rate": 1.6612306181637077e-06, - "loss": 0.8206, - "step": 4720 - }, - { - "epoch": 0.5676666867071485, - "grad_norm": 2.140536675406087, - "learning_rate": 1.6604629283621598e-06, - "loss": 0.8855, - "step": 4721 - }, - { - "epoch": 0.5677869295977875, - "grad_norm": 2.3460619903660644, - "learning_rate": 1.6596952900681152e-06, - "loss": 0.9791, - "step": 4722 - }, - { - "epoch": 0.5679071724884266, - "grad_norm": 2.235743177114964, - "learning_rate": 1.658927703398025e-06, - "loss": 1.0538, - "step": 4723 - }, - { - "epoch": 0.5680274153790658, - "grad_norm": 2.341408131763821, - "learning_rate": 1.6581601684683309e-06, - "loss": 1.0073, - "step": 4724 - }, - { - "epoch": 0.5681476582697048, - "grad_norm": 5.9792720063220015, - "learning_rate": 1.6573926853954674e-06, - "loss": 0.9172, - "step": 4725 - }, - { - "epoch": 0.5682679011603439, - "grad_norm": 1.9407910114728022, - "learning_rate": 1.6566252542958608e-06, - "loss": 1.0565, - "step": 4726 - }, - { - "epoch": 0.568388144050983, - "grad_norm": 1.7541726638558532, - "learning_rate": 1.6558578752859305e-06, - "loss": 1.0058, - "step": 4727 - }, - { - "epoch": 0.5685083869416221, - "grad_norm": 1.720977865188708, - "learning_rate": 1.6550905484820865e-06, - "loss": 1.0101, - "step": 4728 - }, - { - "epoch": 0.5686286298322611, - "grad_norm": 2.100569479792281, - "learning_rate": 1.6543232740007328e-06, - "loss": 1.0226, - "step": 4729 - }, - { - "epoch": 0.5687488727229003, - "grad_norm": 3.0105016007811165, - "learning_rate": 1.653556051958263e-06, - "loss": 0.9005, - "step": 4730 - }, - { - "epoch": 0.5688691156135394, - "grad_norm": 2.9214315638924906, - "learning_rate": 1.6527888824710642e-06, - "loss": 0.9667, - "step": 4731 - }, - { - "epoch": 0.5689893585041784, - "grad_norm": 2.4717528032078806, - "learning_rate": 1.6520217656555166e-06, - "loss": 0.9903, - "step": 4732 - }, - { - "epoch": 0.5691096013948175, - "grad_norm": 1.6158679240896536, - "learning_rate": 1.65125470162799e-06, - "loss": 0.9381, - "step": 4733 - }, - { - "epoch": 0.5692298442854566, - "grad_norm": 2.1260188409108607, - "learning_rate": 1.6504876905048485e-06, - "loss": 0.9328, - "step": 4734 - }, - { - "epoch": 0.5693500871760957, - "grad_norm": 1.5854587678002008, - "learning_rate": 1.6497207324024464e-06, - "loss": 0.9593, - "step": 4735 - }, - { - "epoch": 0.5694703300667348, - "grad_norm": 1.848085252826581, - "learning_rate": 1.6489538274371305e-06, - "loss": 1.054, - "step": 4736 - }, - { - "epoch": 0.5695905729573739, - "grad_norm": 1.9509008943031165, - "learning_rate": 1.6481869757252396e-06, - "loss": 1.0546, - "step": 4737 - }, - { - "epoch": 0.569710815848013, - "grad_norm": 1.7456934937377664, - "learning_rate": 1.647420177383105e-06, - "loss": 0.9469, - "step": 4738 - }, - { - "epoch": 0.569831058738652, - "grad_norm": 2.3496390045149673, - "learning_rate": 1.646653432527049e-06, - "loss": 0.9602, - "step": 4739 - }, - { - "epoch": 0.5699513016292912, - "grad_norm": 1.7116559754025058, - "learning_rate": 1.645886741273387e-06, - "loss": 0.9769, - "step": 4740 - }, - { - "epoch": 0.5700715445199303, - "grad_norm": 1.8456205157811003, - "learning_rate": 1.645120103738424e-06, - "loss": 0.9685, - "step": 4741 - }, - { - "epoch": 0.5701917874105693, - "grad_norm": 2.2237164427619946, - "learning_rate": 1.6443535200384591e-06, - "loss": 1.0622, - "step": 4742 - }, - { - "epoch": 0.5703120303012085, - "grad_norm": 1.7645368345485213, - "learning_rate": 1.6435869902897827e-06, - "loss": 0.9357, - "step": 4743 - }, - { - "epoch": 0.5704322731918475, - "grad_norm": 0.7968342714560159, - "learning_rate": 1.6428205146086764e-06, - "loss": 0.8762, - "step": 4744 - }, - { - "epoch": 0.5705525160824866, - "grad_norm": 1.4826598849610306, - "learning_rate": 1.6420540931114142e-06, - "loss": 0.9332, - "step": 4745 - }, - { - "epoch": 0.5706727589731257, - "grad_norm": 4.201965852884646, - "learning_rate": 1.6412877259142616e-06, - "loss": 1.024, - "step": 4746 - }, - { - "epoch": 0.5707930018637648, - "grad_norm": 2.5572772883511483, - "learning_rate": 1.6405214131334757e-06, - "loss": 0.9731, - "step": 4747 - }, - { - "epoch": 0.5709132447544039, - "grad_norm": 1.5545375885883579, - "learning_rate": 1.6397551548853052e-06, - "loss": 1.0241, - "step": 4748 - }, - { - "epoch": 0.571033487645043, - "grad_norm": 1.5573929293401336, - "learning_rate": 1.6389889512859917e-06, - "loss": 0.935, - "step": 4749 - }, - { - "epoch": 0.5711537305356821, - "grad_norm": 0.8815126217129436, - "learning_rate": 1.638222802451767e-06, - "loss": 0.8867, - "step": 4750 - }, - { - "epoch": 0.5712739734263211, - "grad_norm": 1.5672410372016694, - "learning_rate": 1.6374567084988561e-06, - "loss": 0.9827, - "step": 4751 - }, - { - "epoch": 0.5713942163169603, - "grad_norm": 1.8373285745596935, - "learning_rate": 1.6366906695434738e-06, - "loss": 0.9907, - "step": 4752 - }, - { - "epoch": 0.5715144592075994, - "grad_norm": 2.197513874865096, - "learning_rate": 1.6359246857018275e-06, - "loss": 1.0849, - "step": 4753 - }, - { - "epoch": 0.5716347020982384, - "grad_norm": 2.1064600265335653, - "learning_rate": 1.6351587570901178e-06, - "loss": 1.0088, - "step": 4754 - }, - { - "epoch": 0.5717549449888776, - "grad_norm": 2.2827860220353338, - "learning_rate": 1.634392883824534e-06, - "loss": 0.9821, - "step": 4755 - }, - { - "epoch": 0.5718751878795166, - "grad_norm": 1.5219952921550188, - "learning_rate": 1.6336270660212595e-06, - "loss": 0.9057, - "step": 4756 - }, - { - "epoch": 0.5719954307701557, - "grad_norm": 2.027963179846586, - "learning_rate": 1.6328613037964676e-06, - "loss": 0.8846, - "step": 4757 - }, - { - "epoch": 0.5721156736607949, - "grad_norm": 1.8647535093708927, - "learning_rate": 1.6320955972663241e-06, - "loss": 0.9138, - "step": 4758 - }, - { - "epoch": 0.5722359165514339, - "grad_norm": 1.8144100253896238, - "learning_rate": 1.6313299465469857e-06, - "loss": 0.8839, - "step": 4759 - }, - { - "epoch": 0.572356159442073, - "grad_norm": 3.583855450465538, - "learning_rate": 1.6305643517546014e-06, - "loss": 1.0246, - "step": 4760 - }, - { - "epoch": 0.5724764023327121, - "grad_norm": 1.9032304956921708, - "learning_rate": 1.629798813005311e-06, - "loss": 1.0758, - "step": 4761 - }, - { - "epoch": 0.5725966452233512, - "grad_norm": 2.039920436392141, - "learning_rate": 1.6290333304152473e-06, - "loss": 0.9406, - "step": 4762 - }, - { - "epoch": 0.5727168881139902, - "grad_norm": 1.8518336616611724, - "learning_rate": 1.6282679041005314e-06, - "loss": 0.8032, - "step": 4763 - }, - { - "epoch": 0.5728371310046293, - "grad_norm": 2.1514425450658075, - "learning_rate": 1.6275025341772789e-06, - "loss": 1.0938, - "step": 4764 - }, - { - "epoch": 0.5729573738952685, - "grad_norm": 2.988536868662732, - "learning_rate": 1.626737220761596e-06, - "loss": 1.0574, - "step": 4765 - }, - { - "epoch": 0.5730776167859075, - "grad_norm": 1.850134705459234, - "learning_rate": 1.62597196396958e-06, - "loss": 1.0223, - "step": 4766 - }, - { - "epoch": 0.5731978596765466, - "grad_norm": 2.0059706258705456, - "learning_rate": 1.6252067639173197e-06, - "loss": 1.0832, - "step": 4767 - }, - { - "epoch": 0.5733181025671857, - "grad_norm": 2.1174473902124187, - "learning_rate": 1.6244416207208956e-06, - "loss": 0.9256, - "step": 4768 - }, - { - "epoch": 0.5734383454578248, - "grad_norm": 1.6265625410418914, - "learning_rate": 1.6236765344963787e-06, - "loss": 0.9674, - "step": 4769 - }, - { - "epoch": 0.5735585883484638, - "grad_norm": 4.951437292565365, - "learning_rate": 1.6229115053598322e-06, - "loss": 0.9264, - "step": 4770 - }, - { - "epoch": 0.573678831239103, - "grad_norm": 1.9038849213868672, - "learning_rate": 1.6221465334273108e-06, - "loss": 0.9444, - "step": 4771 - }, - { - "epoch": 0.5737990741297421, - "grad_norm": 2.318174462468246, - "learning_rate": 1.6213816188148593e-06, - "loss": 0.8401, - "step": 4772 - }, - { - "epoch": 0.5739193170203811, - "grad_norm": 1.7304427099744337, - "learning_rate": 1.6206167616385162e-06, - "loss": 0.9979, - "step": 4773 - }, - { - "epoch": 0.5740395599110203, - "grad_norm": 2.330334530174149, - "learning_rate": 1.6198519620143078e-06, - "loss": 0.9723, - "step": 4774 - }, - { - "epoch": 0.5741598028016593, - "grad_norm": 1.5934040217633616, - "learning_rate": 1.6190872200582546e-06, - "loss": 1.009, - "step": 4775 - }, - { - "epoch": 0.5742800456922984, - "grad_norm": 2.0906867777958373, - "learning_rate": 1.6183225358863676e-06, - "loss": 1.0109, - "step": 4776 - }, - { - "epoch": 0.5744002885829376, - "grad_norm": 2.049186740615066, - "learning_rate": 1.617557909614648e-06, - "loss": 0.9388, - "step": 4777 - }, - { - "epoch": 0.5745205314735766, - "grad_norm": 1.8957144591682216, - "learning_rate": 1.6167933413590899e-06, - "loss": 1.0855, - "step": 4778 - }, - { - "epoch": 0.5746407743642157, - "grad_norm": 1.9823775563665258, - "learning_rate": 1.6160288312356773e-06, - "loss": 1.1413, - "step": 4779 - }, - { - "epoch": 0.5747610172548548, - "grad_norm": 2.2659718215146536, - "learning_rate": 1.6152643793603857e-06, - "loss": 1.0527, - "step": 4780 - }, - { - "epoch": 0.5748812601454939, - "grad_norm": 1.8440880546357112, - "learning_rate": 1.6144999858491815e-06, - "loss": 1.1083, - "step": 4781 - }, - { - "epoch": 0.575001503036133, - "grad_norm": 1.6012756090699667, - "learning_rate": 1.6137356508180232e-06, - "loss": 1.0837, - "step": 4782 - }, - { - "epoch": 0.5751217459267721, - "grad_norm": 2.2215886828888785, - "learning_rate": 1.6129713743828593e-06, - "loss": 1.0436, - "step": 4783 - }, - { - "epoch": 0.5752419888174112, - "grad_norm": 1.4217966917503075, - "learning_rate": 1.6122071566596306e-06, - "loss": 0.9904, - "step": 4784 - }, - { - "epoch": 0.5753622317080502, - "grad_norm": 2.3316012038702416, - "learning_rate": 1.6114429977642674e-06, - "loss": 1.0548, - "step": 4785 - }, - { - "epoch": 0.5754824745986894, - "grad_norm": 2.4134611596806588, - "learning_rate": 1.6106788978126926e-06, - "loss": 0.9605, - "step": 4786 - }, - { - "epoch": 0.5756027174893285, - "grad_norm": 2.470544669758269, - "learning_rate": 1.6099148569208196e-06, - "loss": 1.0084, - "step": 4787 - }, - { - "epoch": 0.5757229603799675, - "grad_norm": 2.149443678628029, - "learning_rate": 1.6091508752045523e-06, - "loss": 0.8554, - "step": 4788 - }, - { - "epoch": 0.5758432032706067, - "grad_norm": 1.6033322116718347, - "learning_rate": 1.608386952779787e-06, - "loss": 1.0955, - "step": 4789 - }, - { - "epoch": 0.5759634461612457, - "grad_norm": 5.7628716404825235, - "learning_rate": 1.6076230897624098e-06, - "loss": 0.9754, - "step": 4790 - }, - { - "epoch": 0.5760836890518848, - "grad_norm": 2.5535500703693956, - "learning_rate": 1.6068592862682974e-06, - "loss": 1.0024, - "step": 4791 - }, - { - "epoch": 0.576203931942524, - "grad_norm": 1.8960971299413634, - "learning_rate": 1.6060955424133187e-06, - "loss": 0.9703, - "step": 4792 - }, - { - "epoch": 0.576324174833163, - "grad_norm": 2.0404460787651066, - "learning_rate": 1.6053318583133332e-06, - "loss": 1.1245, - "step": 4793 - }, - { - "epoch": 0.5764444177238021, - "grad_norm": 1.988785355671453, - "learning_rate": 1.6045682340841907e-06, - "loss": 0.9881, - "step": 4794 - }, - { - "epoch": 0.5765646606144411, - "grad_norm": 0.8085505156084813, - "learning_rate": 1.6038046698417336e-06, - "loss": 0.8432, - "step": 4795 - }, - { - "epoch": 0.5766849035050803, - "grad_norm": 2.0202607067812455, - "learning_rate": 1.6030411657017919e-06, - "loss": 0.9203, - "step": 4796 - }, - { - "epoch": 0.5768051463957193, - "grad_norm": 1.759447075297437, - "learning_rate": 1.6022777217801903e-06, - "loss": 1.0754, - "step": 4797 - }, - { - "epoch": 0.5769253892863584, - "grad_norm": 1.6926629241330056, - "learning_rate": 1.601514338192742e-06, - "loss": 0.9611, - "step": 4798 - }, - { - "epoch": 0.5770456321769976, - "grad_norm": 2.454724707464121, - "learning_rate": 1.6007510150552514e-06, - "loss": 0.9461, - "step": 4799 - }, - { - "epoch": 0.5771658750676366, - "grad_norm": 1.5321016862503312, - "learning_rate": 1.599987752483515e-06, - "loss": 0.8574, - "step": 4800 - }, - { - "epoch": 0.5772861179582757, - "grad_norm": 1.8137382355511558, - "learning_rate": 1.5992245505933184e-06, - "loss": 0.9057, - "step": 4801 - }, - { - "epoch": 0.5774063608489148, - "grad_norm": 1.9334779646914804, - "learning_rate": 1.5984614095004388e-06, - "loss": 0.9369, - "step": 4802 - }, - { - "epoch": 0.5775266037395539, - "grad_norm": 2.482872754680811, - "learning_rate": 1.5976983293206438e-06, - "loss": 1.0381, - "step": 4803 - }, - { - "epoch": 0.577646846630193, - "grad_norm": 2.302716670077452, - "learning_rate": 1.5969353101696928e-06, - "loss": 0.9445, - "step": 4804 - }, - { - "epoch": 0.5777670895208321, - "grad_norm": 1.886935709292237, - "learning_rate": 1.5961723521633341e-06, - "loss": 1.032, - "step": 4805 - }, - { - "epoch": 0.5778873324114712, - "grad_norm": 2.2844771460201736, - "learning_rate": 1.5954094554173097e-06, - "loss": 1.1382, - "step": 4806 - }, - { - "epoch": 0.5780075753021102, - "grad_norm": 1.7879664632737113, - "learning_rate": 1.5946466200473482e-06, - "loss": 1.0211, - "step": 4807 - }, - { - "epoch": 0.5781278181927494, - "grad_norm": 1.71170639806667, - "learning_rate": 1.5938838461691723e-06, - "loss": 1.0601, - "step": 4808 - }, - { - "epoch": 0.5782480610833884, - "grad_norm": 2.8666406260645765, - "learning_rate": 1.593121133898494e-06, - "loss": 1.0627, - "step": 4809 - }, - { - "epoch": 0.5783683039740275, - "grad_norm": 1.83354011005338, - "learning_rate": 1.592358483351016e-06, - "loss": 1.0193, - "step": 4810 - }, - { - "epoch": 0.5784885468646667, - "grad_norm": 2.151962218764821, - "learning_rate": 1.5915958946424326e-06, - "loss": 0.9521, - "step": 4811 - }, - { - "epoch": 0.5786087897553057, - "grad_norm": 2.206067116049264, - "learning_rate": 1.5908333678884271e-06, - "loss": 0.9703, - "step": 4812 - }, - { - "epoch": 0.5787290326459448, - "grad_norm": 1.9664792483408702, - "learning_rate": 1.5900709032046743e-06, - "loss": 0.9671, - "step": 4813 - }, - { - "epoch": 0.5788492755365839, - "grad_norm": 2.1483483868237507, - "learning_rate": 1.5893085007068391e-06, - "loss": 1.0152, - "step": 4814 - }, - { - "epoch": 0.578969518427223, - "grad_norm": 1.994140027202102, - "learning_rate": 1.5885461605105786e-06, - "loss": 0.9393, - "step": 4815 - }, - { - "epoch": 0.579089761317862, - "grad_norm": 1.9473977919313046, - "learning_rate": 1.5877838827315375e-06, - "loss": 0.9985, - "step": 4816 - }, - { - "epoch": 0.5792100042085012, - "grad_norm": 1.81777285777413, - "learning_rate": 1.587021667485355e-06, - "loss": 0.9212, - "step": 4817 - }, - { - "epoch": 0.5793302470991403, - "grad_norm": 1.758725010003661, - "learning_rate": 1.5862595148876559e-06, - "loss": 1.0107, - "step": 4818 - }, - { - "epoch": 0.5794504899897793, - "grad_norm": 2.435149893876299, - "learning_rate": 1.58549742505406e-06, - "loss": 0.9924, - "step": 4819 - }, - { - "epoch": 0.5795707328804185, - "grad_norm": 4.568142680704001, - "learning_rate": 1.5847353981001747e-06, - "loss": 0.9851, - "step": 4820 - }, - { - "epoch": 0.5796909757710575, - "grad_norm": 8.300722885824227, - "learning_rate": 1.5839734341415993e-06, - "loss": 0.9183, - "step": 4821 - }, - { - "epoch": 0.5798112186616966, - "grad_norm": 1.9111091717567263, - "learning_rate": 1.5832115332939238e-06, - "loss": 0.9972, - "step": 4822 - }, - { - "epoch": 0.5799314615523358, - "grad_norm": 1.6120271033666045, - "learning_rate": 1.5824496956727272e-06, - "loss": 0.9807, - "step": 4823 - }, - { - "epoch": 0.5800517044429748, - "grad_norm": 2.8907571298454737, - "learning_rate": 1.5816879213935797e-06, - "loss": 0.9582, - "step": 4824 - }, - { - "epoch": 0.5801719473336139, - "grad_norm": 2.090751892709337, - "learning_rate": 1.5809262105720416e-06, - "loss": 1.018, - "step": 4825 - }, - { - "epoch": 0.580292190224253, - "grad_norm": 4.356281892284845, - "learning_rate": 1.5801645633236644e-06, - "loss": 1.0218, - "step": 4826 - }, - { - "epoch": 0.5804124331148921, - "grad_norm": 1.869450239569625, - "learning_rate": 1.579402979763989e-06, - "loss": 1.0015, - "step": 4827 - }, - { - "epoch": 0.5805326760055312, - "grad_norm": 2.9685493802998812, - "learning_rate": 1.578641460008548e-06, - "loss": 1.0354, - "step": 4828 - }, - { - "epoch": 0.5806529188961702, - "grad_norm": 1.9512156194264263, - "learning_rate": 1.5778800041728613e-06, - "loss": 0.9069, - "step": 4829 - }, - { - "epoch": 0.5807731617868094, - "grad_norm": 1.493292993202189, - "learning_rate": 1.577118612372443e-06, - "loss": 0.8943, - "step": 4830 - }, - { - "epoch": 0.5808934046774484, - "grad_norm": 1.9073074409614523, - "learning_rate": 1.5763572847227943e-06, - "loss": 0.9362, - "step": 4831 - }, - { - "epoch": 0.5810136475680875, - "grad_norm": 1.8170947011014578, - "learning_rate": 1.5755960213394091e-06, - "loss": 1.0358, - "step": 4832 - }, - { - "epoch": 0.5811338904587267, - "grad_norm": 2.8129470469904625, - "learning_rate": 1.5748348223377703e-06, - "loss": 1.0148, - "step": 4833 - }, - { - "epoch": 0.5812541333493657, - "grad_norm": 1.5732292302626336, - "learning_rate": 1.5740736878333507e-06, - "loss": 1.0088, - "step": 4834 - }, - { - "epoch": 0.5813743762400048, - "grad_norm": 2.4922499693691553, - "learning_rate": 1.5733126179416143e-06, - "loss": 1.0119, - "step": 4835 - }, - { - "epoch": 0.5814946191306439, - "grad_norm": 2.500733554031308, - "learning_rate": 1.5725516127780137e-06, - "loss": 0.9512, - "step": 4836 - }, - { - "epoch": 0.581614862021283, - "grad_norm": 1.8539763863756202, - "learning_rate": 1.5717906724579943e-06, - "loss": 1.1132, - "step": 4837 - }, - { - "epoch": 0.581735104911922, - "grad_norm": 2.073625325231373, - "learning_rate": 1.571029797096989e-06, - "loss": 0.9078, - "step": 4838 - }, - { - "epoch": 0.5818553478025612, - "grad_norm": 1.6129442652741786, - "learning_rate": 1.570268986810423e-06, - "loss": 1.0158, - "step": 4839 - }, - { - "epoch": 0.5819755906932003, - "grad_norm": 1.925237489888277, - "learning_rate": 1.5695082417137096e-06, - "loss": 0.984, - "step": 4840 - }, - { - "epoch": 0.5820958335838393, - "grad_norm": 1.4991171145912843, - "learning_rate": 1.5687475619222539e-06, - "loss": 0.9835, - "step": 4841 - }, - { - "epoch": 0.5822160764744785, - "grad_norm": 2.0253834443806538, - "learning_rate": 1.5679869475514496e-06, - "loss": 0.9652, - "step": 4842 - }, - { - "epoch": 0.5823363193651175, - "grad_norm": 1.9976044255390575, - "learning_rate": 1.567226398716682e-06, - "loss": 1.042, - "step": 4843 - }, - { - "epoch": 0.5824565622557566, - "grad_norm": 5.9736707286120945, - "learning_rate": 1.566465915533326e-06, - "loss": 0.8491, - "step": 4844 - }, - { - "epoch": 0.5825768051463958, - "grad_norm": 1.758209048152931, - "learning_rate": 1.5657054981167458e-06, - "loss": 1.1127, - "step": 4845 - }, - { - "epoch": 0.5826970480370348, - "grad_norm": 1.7607331151405898, - "learning_rate": 1.5649451465822965e-06, - "loss": 0.9074, - "step": 4846 - }, - { - "epoch": 0.5828172909276739, - "grad_norm": 1.749065285783205, - "learning_rate": 1.5641848610453218e-06, - "loss": 1.0598, - "step": 4847 - }, - { - "epoch": 0.582937533818313, - "grad_norm": 2.018164284596366, - "learning_rate": 1.563424641621158e-06, - "loss": 1.0899, - "step": 4848 - }, - { - "epoch": 0.5830577767089521, - "grad_norm": 1.9600536892797118, - "learning_rate": 1.5626644884251282e-06, - "loss": 0.929, - "step": 4849 - }, - { - "epoch": 0.5831780195995911, - "grad_norm": 1.5741683156758939, - "learning_rate": 1.5619044015725488e-06, - "loss": 1.1095, - "step": 4850 - }, - { - "epoch": 0.5832982624902303, - "grad_norm": 2.1142516103041245, - "learning_rate": 1.5611443811787224e-06, - "loss": 1.1, - "step": 4851 - }, - { - "epoch": 0.5834185053808694, - "grad_norm": 1.9317488066308093, - "learning_rate": 1.560384427358945e-06, - "loss": 0.922, - "step": 4852 - }, - { - "epoch": 0.5835387482715084, - "grad_norm": 1.4162229609791206, - "learning_rate": 1.5596245402284998e-06, - "loss": 0.9536, - "step": 4853 - }, - { - "epoch": 0.5836589911621476, - "grad_norm": 1.560612033825268, - "learning_rate": 1.5588647199026619e-06, - "loss": 1.0461, - "step": 4854 - }, - { - "epoch": 0.5837792340527866, - "grad_norm": 2.5344033085644924, - "learning_rate": 1.5581049664966956e-06, - "loss": 1.1049, - "step": 4855 - }, - { - "epoch": 0.5838994769434257, - "grad_norm": 1.0516272900419377, - "learning_rate": 1.5573452801258545e-06, - "loss": 0.9222, - "step": 4856 - }, - { - "epoch": 0.5840197198340649, - "grad_norm": 1.7828435629329225, - "learning_rate": 1.5565856609053824e-06, - "loss": 0.8645, - "step": 4857 - }, - { - "epoch": 0.5841399627247039, - "grad_norm": 1.7567326598586683, - "learning_rate": 1.5558261089505127e-06, - "loss": 1.0221, - "step": 4858 - }, - { - "epoch": 0.584260205615343, - "grad_norm": 1.8847378585430428, - "learning_rate": 1.5550666243764697e-06, - "loss": 1.0232, - "step": 4859 - }, - { - "epoch": 0.584380448505982, - "grad_norm": 1.8667828109348725, - "learning_rate": 1.554307207298465e-06, - "loss": 0.9941, - "step": 4860 - }, - { - "epoch": 0.5845006913966212, - "grad_norm": 1.8588129043452963, - "learning_rate": 1.553547857831704e-06, - "loss": 1.017, - "step": 4861 - }, - { - "epoch": 0.5846209342872603, - "grad_norm": 1.033988553144302, - "learning_rate": 1.5527885760913771e-06, - "loss": 0.9518, - "step": 4862 - }, - { - "epoch": 0.5847411771778993, - "grad_norm": 1.6702252385725398, - "learning_rate": 1.552029362192668e-06, - "loss": 0.995, - "step": 4863 - }, - { - "epoch": 0.5848614200685385, - "grad_norm": 1.9943194183092847, - "learning_rate": 1.5512702162507478e-06, - "loss": 0.9537, - "step": 4864 - }, - { - "epoch": 0.5849816629591775, - "grad_norm": 1.078742748691415, - "learning_rate": 1.5505111383807792e-06, - "loss": 0.8019, - "step": 4865 - }, - { - "epoch": 0.5851019058498166, - "grad_norm": 1.6400688318601557, - "learning_rate": 1.5497521286979138e-06, - "loss": 1.0341, - "step": 4866 - }, - { - "epoch": 0.5852221487404557, - "grad_norm": 2.5327976346388645, - "learning_rate": 1.5489931873172927e-06, - "loss": 0.9747, - "step": 4867 - }, - { - "epoch": 0.5853423916310948, - "grad_norm": 1.8961078808308618, - "learning_rate": 1.5482343143540467e-06, - "loss": 1.0226, - "step": 4868 - }, - { - "epoch": 0.5854626345217339, - "grad_norm": 1.8828702419180114, - "learning_rate": 1.547475509923295e-06, - "loss": 1.0674, - "step": 4869 - }, - { - "epoch": 0.585582877412373, - "grad_norm": 0.7882503068671096, - "learning_rate": 1.5467167741401495e-06, - "loss": 0.8234, - "step": 4870 - }, - { - "epoch": 0.5857031203030121, - "grad_norm": 2.0421347438357738, - "learning_rate": 1.5459581071197083e-06, - "loss": 0.9433, - "step": 4871 - }, - { - "epoch": 0.5858233631936511, - "grad_norm": 3.4774101681216667, - "learning_rate": 1.5451995089770624e-06, - "loss": 1.0554, - "step": 4872 - }, - { - "epoch": 0.5859436060842903, - "grad_norm": 1.3055495652045581, - "learning_rate": 1.5444409798272885e-06, - "loss": 0.9466, - "step": 4873 - }, - { - "epoch": 0.5860638489749294, - "grad_norm": 9.168540439408146, - "learning_rate": 1.543682519785456e-06, - "loss": 1.0293, - "step": 4874 - }, - { - "epoch": 0.5861840918655684, - "grad_norm": 2.561578817637854, - "learning_rate": 1.5429241289666219e-06, - "loss": 1.0304, - "step": 4875 - }, - { - "epoch": 0.5863043347562076, - "grad_norm": 1.9032838601643374, - "learning_rate": 1.5421658074858342e-06, - "loss": 0.9242, - "step": 4876 - }, - { - "epoch": 0.5864245776468466, - "grad_norm": 2.300998230629663, - "learning_rate": 1.5414075554581298e-06, - "loss": 0.8905, - "step": 4877 - }, - { - "epoch": 0.5865448205374857, - "grad_norm": 2.620679205732577, - "learning_rate": 1.5406493729985348e-06, - "loss": 1.0103, - "step": 4878 - }, - { - "epoch": 0.5866650634281249, - "grad_norm": 34.479411740694985, - "learning_rate": 1.5398912602220644e-06, - "loss": 0.9487, - "step": 4879 - }, - { - "epoch": 0.5867853063187639, - "grad_norm": 2.1414145245787095, - "learning_rate": 1.539133217243724e-06, - "loss": 1.0108, - "step": 4880 - }, - { - "epoch": 0.586905549209403, - "grad_norm": 2.3398638232457523, - "learning_rate": 1.5383752441785081e-06, - "loss": 0.9856, - "step": 4881 - }, - { - "epoch": 0.5870257921000421, - "grad_norm": 2.125441673603829, - "learning_rate": 1.5376173411414003e-06, - "loss": 1.0946, - "step": 4882 - }, - { - "epoch": 0.5871460349906812, - "grad_norm": 2.0335207644308277, - "learning_rate": 1.5368595082473753e-06, - "loss": 1.0129, - "step": 4883 - }, - { - "epoch": 0.5872662778813202, - "grad_norm": 1.643864222282007, - "learning_rate": 1.5361017456113935e-06, - "loss": 1.0109, - "step": 4884 - }, - { - "epoch": 0.5873865207719594, - "grad_norm": 1.9265647052835804, - "learning_rate": 1.5353440533484085e-06, - "loss": 1.0857, - "step": 4885 - }, - { - "epoch": 0.5875067636625985, - "grad_norm": 1.987881303594094, - "learning_rate": 1.534586431573361e-06, - "loss": 0.8897, - "step": 4886 - }, - { - "epoch": 0.5876270065532375, - "grad_norm": 1.8392631764823357, - "learning_rate": 1.5338288804011817e-06, - "loss": 1.0097, - "step": 4887 - }, - { - "epoch": 0.5877472494438767, - "grad_norm": 2.426048964318426, - "learning_rate": 1.533071399946791e-06, - "loss": 0.9389, - "step": 4888 - }, - { - "epoch": 0.5878674923345157, - "grad_norm": 1.9684652697827982, - "learning_rate": 1.5323139903250977e-06, - "loss": 0.8059, - "step": 4889 - }, - { - "epoch": 0.5879877352251548, - "grad_norm": 1.5852915383217716, - "learning_rate": 1.5315566516510002e-06, - "loss": 0.9977, - "step": 4890 - }, - { - "epoch": 0.5881079781157939, - "grad_norm": 1.7330509664940095, - "learning_rate": 1.5307993840393857e-06, - "loss": 0.9068, - "step": 4891 - }, - { - "epoch": 0.588228221006433, - "grad_norm": 3.067324205753791, - "learning_rate": 1.530042187605132e-06, - "loss": 1.04, - "step": 4892 - }, - { - "epoch": 0.5883484638970721, - "grad_norm": 1.3604565844830347, - "learning_rate": 1.5292850624631044e-06, - "loss": 1.0682, - "step": 4893 - }, - { - "epoch": 0.5884687067877111, - "grad_norm": 2.49791392076186, - "learning_rate": 1.5285280087281593e-06, - "loss": 1.0258, - "step": 4894 - }, - { - "epoch": 0.5885889496783503, - "grad_norm": 0.6502623789590527, - "learning_rate": 1.5277710265151398e-06, - "loss": 0.8012, - "step": 4895 - }, - { - "epoch": 0.5887091925689893, - "grad_norm": 3.207756453004497, - "learning_rate": 1.5270141159388803e-06, - "loss": 1.0042, - "step": 4896 - }, - { - "epoch": 0.5888294354596284, - "grad_norm": 1.6324561793476218, - "learning_rate": 1.526257277114203e-06, - "loss": 1.0259, - "step": 4897 - }, - { - "epoch": 0.5889496783502676, - "grad_norm": 1.7354459463201029, - "learning_rate": 1.5255005101559201e-06, - "loss": 1.037, - "step": 4898 - }, - { - "epoch": 0.5890699212409066, - "grad_norm": 1.8631355020712717, - "learning_rate": 1.524743815178833e-06, - "loss": 0.9975, - "step": 4899 - }, - { - "epoch": 0.5891901641315457, - "grad_norm": 2.02135522402197, - "learning_rate": 1.5239871922977315e-06, - "loss": 1.0355, - "step": 4900 - }, - { - "epoch": 0.5893104070221848, - "grad_norm": 1.60731485596907, - "learning_rate": 1.523230641627394e-06, - "loss": 1.1178, - "step": 4901 - }, - { - "epoch": 0.5894306499128239, - "grad_norm": 2.7553350543258217, - "learning_rate": 1.5224741632825888e-06, - "loss": 0.9588, - "step": 4902 - }, - { - "epoch": 0.589550892803463, - "grad_norm": 1.5016464098807072, - "learning_rate": 1.521717757378074e-06, - "loss": 0.9174, - "step": 4903 - }, - { - "epoch": 0.5896711356941021, - "grad_norm": 1.6338055524888835, - "learning_rate": 1.5209614240285943e-06, - "loss": 0.917, - "step": 4904 - }, - { - "epoch": 0.5897913785847412, - "grad_norm": 1.9520596459687594, - "learning_rate": 1.520205163348887e-06, - "loss": 1.0848, - "step": 4905 - }, - { - "epoch": 0.5899116214753802, - "grad_norm": 0.7445480152656242, - "learning_rate": 1.519448975453674e-06, - "loss": 0.8107, - "step": 4906 - }, - { - "epoch": 0.5900318643660194, - "grad_norm": 2.0813838102380924, - "learning_rate": 1.5186928604576696e-06, - "loss": 0.9886, - "step": 4907 - }, - { - "epoch": 0.5901521072566585, - "grad_norm": 2.0411443496339206, - "learning_rate": 1.5179368184755752e-06, - "loss": 1.0011, - "step": 4908 - }, - { - "epoch": 0.5902723501472975, - "grad_norm": 1.5585122146886563, - "learning_rate": 1.5171808496220821e-06, - "loss": 1.0583, - "step": 4909 - }, - { - "epoch": 0.5903925930379367, - "grad_norm": 1.8122515672573534, - "learning_rate": 1.5164249540118708e-06, - "loss": 1.0419, - "step": 4910 - }, - { - "epoch": 0.5905128359285757, - "grad_norm": 3.568919772036547, - "learning_rate": 1.5156691317596093e-06, - "loss": 1.0627, - "step": 4911 - }, - { - "epoch": 0.5906330788192148, - "grad_norm": 1.9932315857966203, - "learning_rate": 1.5149133829799556e-06, - "loss": 0.9015, - "step": 4912 - }, - { - "epoch": 0.590753321709854, - "grad_norm": 2.6253029784692976, - "learning_rate": 1.5141577077875556e-06, - "loss": 1.0264, - "step": 4913 - }, - { - "epoch": 0.590873564600493, - "grad_norm": 1.9921426207031272, - "learning_rate": 1.5134021062970451e-06, - "loss": 0.9528, - "step": 4914 - }, - { - "epoch": 0.5909938074911321, - "grad_norm": 1.7003400238170712, - "learning_rate": 1.5126465786230483e-06, - "loss": 1.0362, - "step": 4915 - }, - { - "epoch": 0.5911140503817712, - "grad_norm": 1.7025491283347187, - "learning_rate": 1.5118911248801787e-06, - "loss": 1.0422, - "step": 4916 - }, - { - "epoch": 0.5912342932724103, - "grad_norm": 2.8851118971149994, - "learning_rate": 1.5111357451830364e-06, - "loss": 1.0245, - "step": 4917 - }, - { - "epoch": 0.5913545361630493, - "grad_norm": 2.2503089692647364, - "learning_rate": 1.5103804396462131e-06, - "loss": 0.9417, - "step": 4918 - }, - { - "epoch": 0.5914747790536885, - "grad_norm": 2.1930240092774484, - "learning_rate": 1.5096252083842877e-06, - "loss": 1.0326, - "step": 4919 - }, - { - "epoch": 0.5915950219443276, - "grad_norm": 2.313519433567531, - "learning_rate": 1.5088700515118285e-06, - "loss": 1.0677, - "step": 4920 - }, - { - "epoch": 0.5917152648349666, - "grad_norm": 1.8401443009184129, - "learning_rate": 1.508114969143392e-06, - "loss": 0.9026, - "step": 4921 - }, - { - "epoch": 0.5918355077256057, - "grad_norm": 2.277407401554726, - "learning_rate": 1.5073599613935238e-06, - "loss": 1.0041, - "step": 4922 - }, - { - "epoch": 0.5919557506162448, - "grad_norm": 2.3600542720114723, - "learning_rate": 1.5066050283767574e-06, - "loss": 0.8077, - "step": 4923 - }, - { - "epoch": 0.5920759935068839, - "grad_norm": 3.3615770796370725, - "learning_rate": 1.505850170207616e-06, - "loss": 1.0626, - "step": 4924 - }, - { - "epoch": 0.592196236397523, - "grad_norm": 2.2029596361366375, - "learning_rate": 1.505095387000611e-06, - "loss": 1.0034, - "step": 4925 - }, - { - "epoch": 0.5923164792881621, - "grad_norm": 1.831518691208899, - "learning_rate": 1.504340678870242e-06, - "loss": 0.9789, - "step": 4926 - }, - { - "epoch": 0.5924367221788012, - "grad_norm": 1.8683404275905235, - "learning_rate": 1.5035860459309989e-06, - "loss": 1.1223, - "step": 4927 - }, - { - "epoch": 0.5925569650694402, - "grad_norm": 1.670511777318493, - "learning_rate": 1.5028314882973568e-06, - "loss": 0.8608, - "step": 4928 - }, - { - "epoch": 0.5926772079600794, - "grad_norm": 1.988024861830788, - "learning_rate": 1.502077006083783e-06, - "loss": 1.0706, - "step": 4929 - }, - { - "epoch": 0.5927974508507184, - "grad_norm": 1.6909854356622431, - "learning_rate": 1.5013225994047315e-06, - "loss": 1.0017, - "step": 4930 - }, - { - "epoch": 0.5929176937413575, - "grad_norm": 1.6650611137652702, - "learning_rate": 1.5005682683746452e-06, - "loss": 1.0346, - "step": 4931 - }, - { - "epoch": 0.5930379366319967, - "grad_norm": 2.689733508640524, - "learning_rate": 1.4998140131079553e-06, - "loss": 0.9539, - "step": 4932 - }, - { - "epoch": 0.5931581795226357, - "grad_norm": 1.7445418525370195, - "learning_rate": 1.4990598337190821e-06, - "loss": 0.9667, - "step": 4933 - }, - { - "epoch": 0.5932784224132748, - "grad_norm": 4.721383073833643, - "learning_rate": 1.4983057303224338e-06, - "loss": 0.9073, - "step": 4934 - }, - { - "epoch": 0.5933986653039139, - "grad_norm": 1.7017968274787099, - "learning_rate": 1.4975517030324072e-06, - "loss": 1.0939, - "step": 4935 - }, - { - "epoch": 0.593518908194553, - "grad_norm": 0.8283461509367865, - "learning_rate": 1.4967977519633882e-06, - "loss": 0.882, - "step": 4936 - }, - { - "epoch": 0.593639151085192, - "grad_norm": 2.6800344704076555, - "learning_rate": 1.4960438772297494e-06, - "loss": 1.0241, - "step": 4937 - }, - { - "epoch": 0.5937593939758312, - "grad_norm": 2.2388940741155805, - "learning_rate": 1.495290078945855e-06, - "loss": 0.9625, - "step": 4938 - }, - { - "epoch": 0.5938796368664703, - "grad_norm": 2.1606594830873904, - "learning_rate": 1.4945363572260529e-06, - "loss": 0.9688, - "step": 4939 - }, - { - "epoch": 0.5939998797571093, - "grad_norm": 2.825671738961762, - "learning_rate": 1.4937827121846845e-06, - "loss": 0.8989, - "step": 4940 - }, - { - "epoch": 0.5941201226477485, - "grad_norm": 1.5816197743246219, - "learning_rate": 1.4930291439360755e-06, - "loss": 0.9694, - "step": 4941 - }, - { - "epoch": 0.5942403655383875, - "grad_norm": 3.2400117122768117, - "learning_rate": 1.4922756525945427e-06, - "loss": 1.0236, - "step": 4942 - }, - { - "epoch": 0.5943606084290266, - "grad_norm": 0.77556002901907, - "learning_rate": 1.4915222382743894e-06, - "loss": 0.8499, - "step": 4943 - }, - { - "epoch": 0.5944808513196658, - "grad_norm": 2.67893513163041, - "learning_rate": 1.4907689010899085e-06, - "loss": 0.9508, - "step": 4944 - }, - { - "epoch": 0.5946010942103048, - "grad_norm": 2.1320618366914394, - "learning_rate": 1.4900156411553804e-06, - "loss": 0.8505, - "step": 4945 - }, - { - "epoch": 0.5947213371009439, - "grad_norm": 2.141345717233182, - "learning_rate": 1.4892624585850739e-06, - "loss": 1.0945, - "step": 4946 - }, - { - "epoch": 0.594841579991583, - "grad_norm": 1.8344514067299695, - "learning_rate": 1.4885093534932465e-06, - "loss": 1.0155, - "step": 4947 - }, - { - "epoch": 0.5949618228822221, - "grad_norm": 2.2099687051067645, - "learning_rate": 1.4877563259941433e-06, - "loss": 0.95, - "step": 4948 - }, - { - "epoch": 0.5950820657728612, - "grad_norm": 1.895809725380372, - "learning_rate": 1.4870033762019988e-06, - "loss": 0.9117, - "step": 4949 - }, - { - "epoch": 0.5952023086635003, - "grad_norm": 1.5960667544867388, - "learning_rate": 1.4862505042310334e-06, - "loss": 0.9587, - "step": 4950 - }, - { - "epoch": 0.5953225515541394, - "grad_norm": 1.4138785395381919, - "learning_rate": 1.4854977101954587e-06, - "loss": 0.9228, - "step": 4951 - }, - { - "epoch": 0.5954427944447784, - "grad_norm": 1.8415718790692934, - "learning_rate": 1.4847449942094716e-06, - "loss": 1.0857, - "step": 4952 - }, - { - "epoch": 0.5955630373354175, - "grad_norm": 1.8482565680827923, - "learning_rate": 1.4839923563872598e-06, - "loss": 1.0961, - "step": 4953 - }, - { - "epoch": 0.5956832802260567, - "grad_norm": 1.8286430081445535, - "learning_rate": 1.483239796842997e-06, - "loss": 0.9877, - "step": 4954 - }, - { - "epoch": 0.5958035231166957, - "grad_norm": 1.6329724539204762, - "learning_rate": 1.4824873156908462e-06, - "loss": 1.0687, - "step": 4955 - }, - { - "epoch": 0.5959237660073348, - "grad_norm": 2.474315406060475, - "learning_rate": 1.4817349130449584e-06, - "loss": 0.9875, - "step": 4956 - }, - { - "epoch": 0.5960440088979739, - "grad_norm": 9.588711984514534, - "learning_rate": 1.4809825890194717e-06, - "loss": 1.055, - "step": 4957 - }, - { - "epoch": 0.596164251788613, - "grad_norm": 1.704592501251151, - "learning_rate": 1.4802303437285139e-06, - "loss": 1.0011, - "step": 4958 - }, - { - "epoch": 0.596284494679252, - "grad_norm": 2.205196624085686, - "learning_rate": 1.4794781772861994e-06, - "loss": 1.0337, - "step": 4959 - }, - { - "epoch": 0.5964047375698912, - "grad_norm": 1.9710591413352765, - "learning_rate": 1.4787260898066324e-06, - "loss": 0.9024, - "step": 4960 - }, - { - "epoch": 0.5965249804605303, - "grad_norm": 2.246114024195183, - "learning_rate": 1.4779740814039023e-06, - "loss": 1.0821, - "step": 4961 - }, - { - "epoch": 0.5966452233511693, - "grad_norm": 1.786654188209817, - "learning_rate": 1.4772221521920894e-06, - "loss": 0.9068, - "step": 4962 - }, - { - "epoch": 0.5967654662418085, - "grad_norm": 2.4983072271970164, - "learning_rate": 1.4764703022852598e-06, - "loss": 0.9696, - "step": 4963 - }, - { - "epoch": 0.5968857091324475, - "grad_norm": 1.7130161008907683, - "learning_rate": 1.4757185317974696e-06, - "loss": 1.001, - "step": 4964 - }, - { - "epoch": 0.5970059520230866, - "grad_norm": 2.006851263057358, - "learning_rate": 1.474966840842761e-06, - "loss": 0.9442, - "step": 4965 - }, - { - "epoch": 0.5971261949137258, - "grad_norm": 1.8054465146843492, - "learning_rate": 1.4742152295351655e-06, - "loss": 1.0939, - "step": 4966 - }, - { - "epoch": 0.5972464378043648, - "grad_norm": 2.187674706158253, - "learning_rate": 1.4734636979887016e-06, - "loss": 0.8707, - "step": 4967 - }, - { - "epoch": 0.5973666806950039, - "grad_norm": 1.8464944102008747, - "learning_rate": 1.4727122463173755e-06, - "loss": 1.1321, - "step": 4968 - }, - { - "epoch": 0.597486923585643, - "grad_norm": 1.994563761176075, - "learning_rate": 1.471960874635183e-06, - "loss": 0.8756, - "step": 4969 - }, - { - "epoch": 0.5976071664762821, - "grad_norm": 1.9090155879763577, - "learning_rate": 1.4712095830561055e-06, - "loss": 0.9343, - "step": 4970 - }, - { - "epoch": 0.5977274093669211, - "grad_norm": 2.6869872735208005, - "learning_rate": 1.4704583716941147e-06, - "loss": 1.0389, - "step": 4971 - }, - { - "epoch": 0.5978476522575603, - "grad_norm": 1.460352653083634, - "learning_rate": 1.4697072406631672e-06, - "loss": 0.9506, - "step": 4972 - }, - { - "epoch": 0.5979678951481994, - "grad_norm": 1.6998963857377491, - "learning_rate": 1.4689561900772097e-06, - "loss": 0.9586, - "step": 4973 - }, - { - "epoch": 0.5980881380388384, - "grad_norm": 2.3532565785535615, - "learning_rate": 1.4682052200501758e-06, - "loss": 0.9608, - "step": 4974 - }, - { - "epoch": 0.5982083809294776, - "grad_norm": 1.8392252601286003, - "learning_rate": 1.4674543306959876e-06, - "loss": 1.026, - "step": 4975 - }, - { - "epoch": 0.5983286238201166, - "grad_norm": 2.1100641325835863, - "learning_rate": 1.4667035221285535e-06, - "loss": 1.0726, - "step": 4976 - }, - { - "epoch": 0.5984488667107557, - "grad_norm": 1.6797993467485675, - "learning_rate": 1.4659527944617715e-06, - "loss": 0.97, - "step": 4977 - }, - { - "epoch": 0.5985691096013949, - "grad_norm": 1.9466794278540887, - "learning_rate": 1.465202147809526e-06, - "loss": 0.9904, - "step": 4978 - }, - { - "epoch": 0.5986893524920339, - "grad_norm": 1.7765788350155325, - "learning_rate": 1.4644515822856888e-06, - "loss": 0.988, - "step": 4979 - }, - { - "epoch": 0.598809595382673, - "grad_norm": 0.7785520124951694, - "learning_rate": 1.4637010980041215e-06, - "loss": 0.819, - "step": 4980 - }, - { - "epoch": 0.5989298382733121, - "grad_norm": 2.0956713842030736, - "learning_rate": 1.4629506950786707e-06, - "loss": 1.1327, - "step": 4981 - }, - { - "epoch": 0.5990500811639512, - "grad_norm": 0.7997067942347044, - "learning_rate": 1.4622003736231733e-06, - "loss": 0.818, - "step": 4982 - }, - { - "epoch": 0.5991703240545903, - "grad_norm": 2.7153841427661103, - "learning_rate": 1.461450133751451e-06, - "loss": 1.0355, - "step": 4983 - }, - { - "epoch": 0.5992905669452293, - "grad_norm": 1.9013806045286115, - "learning_rate": 1.4606999755773153e-06, - "loss": 0.9934, - "step": 4984 - }, - { - "epoch": 0.5994108098358685, - "grad_norm": 1.5484604080307147, - "learning_rate": 1.4599498992145643e-06, - "loss": 1.0449, - "step": 4985 - }, - { - "epoch": 0.5995310527265075, - "grad_norm": 1.8922597579956448, - "learning_rate": 1.4591999047769846e-06, - "loss": 0.9385, - "step": 4986 - }, - { - "epoch": 0.5996512956171466, - "grad_norm": 1.9148824375809235, - "learning_rate": 1.4584499923783486e-06, - "loss": 0.9862, - "step": 4987 - }, - { - "epoch": 0.5997715385077858, - "grad_norm": 1.6289293026836615, - "learning_rate": 1.457700162132419e-06, - "loss": 0.9899, - "step": 4988 - }, - { - "epoch": 0.5998917813984248, - "grad_norm": 2.300846843723568, - "learning_rate": 1.4569504141529433e-06, - "loss": 0.9585, - "step": 4989 - }, - { - "epoch": 0.6000120242890639, - "grad_norm": 1.8837383732123654, - "learning_rate": 1.456200748553658e-06, - "loss": 0.9472, - "step": 4990 - }, - { - "epoch": 0.600132267179703, - "grad_norm": 1.7792178156317529, - "learning_rate": 1.455451165448287e-06, - "loss": 1.0127, - "step": 4991 - }, - { - "epoch": 0.6002525100703421, - "grad_norm": 2.5623634115893394, - "learning_rate": 1.4547016649505407e-06, - "loss": 0.9644, - "step": 4992 - }, - { - "epoch": 0.6003727529609811, - "grad_norm": 2.028926988215875, - "learning_rate": 1.4539522471741193e-06, - "loss": 1.0788, - "step": 4993 - }, - { - "epoch": 0.6004929958516203, - "grad_norm": 1.8765847184948716, - "learning_rate": 1.4532029122327067e-06, - "loss": 0.9364, - "step": 4994 - }, - { - "epoch": 0.6006132387422594, - "grad_norm": 1.851145443845353, - "learning_rate": 1.4524536602399783e-06, - "loss": 0.9887, - "step": 4995 - }, - { - "epoch": 0.6007334816328984, - "grad_norm": 1.7715411024673753, - "learning_rate": 1.4517044913095938e-06, - "loss": 0.9961, - "step": 4996 - }, - { - "epoch": 0.6008537245235376, - "grad_norm": 1.7457783049269495, - "learning_rate": 1.4509554055552022e-06, - "loss": 1.0423, - "step": 4997 - }, - { - "epoch": 0.6009739674141766, - "grad_norm": 2.28588104491228, - "learning_rate": 1.450206403090439e-06, - "loss": 1.0692, - "step": 4998 - }, - { - "epoch": 0.6010942103048157, - "grad_norm": 2.1954587840149378, - "learning_rate": 1.4494574840289274e-06, - "loss": 1.0885, - "step": 4999 - }, - { - "epoch": 0.6012144531954549, - "grad_norm": 1.7668419079267395, - "learning_rate": 1.4487086484842782e-06, - "loss": 0.9737, - "step": 5000 - }, - { - "epoch": 0.6013346960860939, - "grad_norm": 2.151548816911658, - "learning_rate": 1.4479598965700878e-06, - "loss": 0.8284, - "step": 5001 - }, - { - "epoch": 0.601454938976733, - "grad_norm": 2.216112745481058, - "learning_rate": 1.4472112283999427e-06, - "loss": 0.9107, - "step": 5002 - }, - { - "epoch": 0.6015751818673721, - "grad_norm": 2.253954485239931, - "learning_rate": 1.4464626440874143e-06, - "loss": 0.9287, - "step": 5003 - }, - { - "epoch": 0.6016954247580112, - "grad_norm": 3.230632588707322, - "learning_rate": 1.4457141437460636e-06, - "loss": 0.9844, - "step": 5004 - }, - { - "epoch": 0.6018156676486502, - "grad_norm": 2.187545666899159, - "learning_rate": 1.444965727489436e-06, - "loss": 0.9586, - "step": 5005 - }, - { - "epoch": 0.6019359105392894, - "grad_norm": 2.0282782310120724, - "learning_rate": 1.444217395431066e-06, - "loss": 0.8599, - "step": 5006 - }, - { - "epoch": 0.6020561534299285, - "grad_norm": 0.9543465775090328, - "learning_rate": 1.4434691476844755e-06, - "loss": 0.816, - "step": 5007 - }, - { - "epoch": 0.6021763963205675, - "grad_norm": 2.2673569997763505, - "learning_rate": 1.4427209843631729e-06, - "loss": 0.9021, - "step": 5008 - }, - { - "epoch": 0.6022966392112067, - "grad_norm": 1.868111864428573, - "learning_rate": 1.4419729055806534e-06, - "loss": 1.049, - "step": 5009 - }, - { - "epoch": 0.6024168821018457, - "grad_norm": 1.6439363033178795, - "learning_rate": 1.441224911450401e-06, - "loss": 1.0584, - "step": 5010 - }, - { - "epoch": 0.6025371249924848, - "grad_norm": 1.5286888190165953, - "learning_rate": 1.4404770020858851e-06, - "loss": 1.0538, - "step": 5011 - }, - { - "epoch": 0.602657367883124, - "grad_norm": 1.4977151635272878, - "learning_rate": 1.439729177600563e-06, - "loss": 1.0918, - "step": 5012 - }, - { - "epoch": 0.602777610773763, - "grad_norm": 2.670053318362836, - "learning_rate": 1.4389814381078793e-06, - "loss": 0.9554, - "step": 5013 - }, - { - "epoch": 0.6028978536644021, - "grad_norm": 4.0879902396934735, - "learning_rate": 1.438233783721265e-06, - "loss": 1.0295, - "step": 5014 - }, - { - "epoch": 0.6030180965550412, - "grad_norm": 2.247279748021848, - "learning_rate": 1.43748621455414e-06, - "loss": 1.0163, - "step": 5015 - }, - { - "epoch": 0.6031383394456803, - "grad_norm": 2.223231772410376, - "learning_rate": 1.4367387307199082e-06, - "loss": 1.0275, - "step": 5016 - }, - { - "epoch": 0.6032585823363193, - "grad_norm": 2.838142315598157, - "learning_rate": 1.4359913323319632e-06, - "loss": 1.0491, - "step": 5017 - }, - { - "epoch": 0.6033788252269584, - "grad_norm": 1.7546118865191964, - "learning_rate": 1.4352440195036847e-06, - "loss": 1.005, - "step": 5018 - }, - { - "epoch": 0.6034990681175976, - "grad_norm": 1.5442833044911253, - "learning_rate": 1.4344967923484395e-06, - "loss": 1.0266, - "step": 5019 - }, - { - "epoch": 0.6036193110082366, - "grad_norm": 4.493070777632752, - "learning_rate": 1.433749650979581e-06, - "loss": 0.9493, - "step": 5020 - }, - { - "epoch": 0.6037395538988757, - "grad_norm": 1.9239894741190753, - "learning_rate": 1.433002595510451e-06, - "loss": 0.9182, - "step": 5021 - }, - { - "epoch": 0.6038597967895148, - "grad_norm": 1.9511559899096866, - "learning_rate": 1.4322556260543757e-06, - "loss": 0.9472, - "step": 5022 - }, - { - "epoch": 0.6039800396801539, - "grad_norm": 0.9134379152185447, - "learning_rate": 1.4315087427246703e-06, - "loss": 0.8878, - "step": 5023 - }, - { - "epoch": 0.604100282570793, - "grad_norm": 0.9413238742607971, - "learning_rate": 1.4307619456346372e-06, - "loss": 0.8506, - "step": 5024 - }, - { - "epoch": 0.6042205254614321, - "grad_norm": 3.5763660088623066, - "learning_rate": 1.430015234897564e-06, - "loss": 0.9588, - "step": 5025 - }, - { - "epoch": 0.6043407683520712, - "grad_norm": 1.5874471760769606, - "learning_rate": 1.4292686106267274e-06, - "loss": 0.8944, - "step": 5026 - }, - { - "epoch": 0.6044610112427102, - "grad_norm": 1.6614542117012903, - "learning_rate": 1.4285220729353876e-06, - "loss": 0.9911, - "step": 5027 - }, - { - "epoch": 0.6045812541333494, - "grad_norm": 3.0606412891268406, - "learning_rate": 1.4277756219367957e-06, - "loss": 1.0133, - "step": 5028 - }, - { - "epoch": 0.6047014970239885, - "grad_norm": 1.9724176545837775, - "learning_rate": 1.4270292577441864e-06, - "loss": 1.0285, - "step": 5029 - }, - { - "epoch": 0.6048217399146275, - "grad_norm": 1.892101499460067, - "learning_rate": 1.4262829804707836e-06, - "loss": 0.9456, - "step": 5030 - }, - { - "epoch": 0.6049419828052667, - "grad_norm": 1.3663988290959284, - "learning_rate": 1.4255367902297958e-06, - "loss": 0.9238, - "step": 5031 - }, - { - "epoch": 0.6050622256959057, - "grad_norm": 2.078398722354417, - "learning_rate": 1.4247906871344215e-06, - "loss": 1.0231, - "step": 5032 - }, - { - "epoch": 0.6051824685865448, - "grad_norm": 1.9725381645890905, - "learning_rate": 1.4240446712978415e-06, - "loss": 0.9872, - "step": 5033 - }, - { - "epoch": 0.605302711477184, - "grad_norm": 1.8675464620261897, - "learning_rate": 1.423298742833227e-06, - "loss": 0.9701, - "step": 5034 - }, - { - "epoch": 0.605422954367823, - "grad_norm": 1.6643758129614958, - "learning_rate": 1.4225529018537352e-06, - "loss": 0.9521, - "step": 5035 - }, - { - "epoch": 0.6055431972584621, - "grad_norm": 1.7063540025251482, - "learning_rate": 1.4218071484725082e-06, - "loss": 1.0092, - "step": 5036 - }, - { - "epoch": 0.6056634401491012, - "grad_norm": 2.1025500618398425, - "learning_rate": 1.4210614828026786e-06, - "loss": 0.9899, - "step": 5037 - }, - { - "epoch": 0.6057836830397403, - "grad_norm": 1.937416259432609, - "learning_rate": 1.4203159049573605e-06, - "loss": 0.9762, - "step": 5038 - }, - { - "epoch": 0.6059039259303793, - "grad_norm": 1.9735788133152785, - "learning_rate": 1.4195704150496593e-06, - "loss": 1.1028, - "step": 5039 - }, - { - "epoch": 0.6060241688210185, - "grad_norm": 1.8292975944405216, - "learning_rate": 1.4188250131926639e-06, - "loss": 0.9652, - "step": 5040 - }, - { - "epoch": 0.6061444117116576, - "grad_norm": 1.988439887112976, - "learning_rate": 1.4180796994994525e-06, - "loss": 1.0466, - "step": 5041 - }, - { - "epoch": 0.6062646546022966, - "grad_norm": 1.8942808005366412, - "learning_rate": 1.4173344740830877e-06, - "loss": 0.9536, - "step": 5042 - }, - { - "epoch": 0.6063848974929358, - "grad_norm": 1.519722737698607, - "learning_rate": 1.4165893370566206e-06, - "loss": 0.9336, - "step": 5043 - }, - { - "epoch": 0.6065051403835748, - "grad_norm": 1.8270973512408295, - "learning_rate": 1.4158442885330865e-06, - "loss": 0.9959, - "step": 5044 - }, - { - "epoch": 0.6066253832742139, - "grad_norm": 2.0253930970106055, - "learning_rate": 1.4150993286255094e-06, - "loss": 1.0185, - "step": 5045 - }, - { - "epoch": 0.6067456261648531, - "grad_norm": 1.8868831460226636, - "learning_rate": 1.4143544574468993e-06, - "loss": 1.0175, - "step": 5046 - }, - { - "epoch": 0.6068658690554921, - "grad_norm": 2.1080667465399268, - "learning_rate": 1.4136096751102523e-06, - "loss": 1.0508, - "step": 5047 - }, - { - "epoch": 0.6069861119461312, - "grad_norm": 1.937053382835094, - "learning_rate": 1.4128649817285516e-06, - "loss": 1.0562, - "step": 5048 - }, - { - "epoch": 0.6071063548367702, - "grad_norm": 1.9098197156892969, - "learning_rate": 1.412120377414766e-06, - "loss": 0.8612, - "step": 5049 - }, - { - "epoch": 0.6072265977274094, - "grad_norm": 1.9976038884538685, - "learning_rate": 1.4113758622818522e-06, - "loss": 0.9337, - "step": 5050 - }, - { - "epoch": 0.6073468406180484, - "grad_norm": 2.155064906760317, - "learning_rate": 1.410631436442751e-06, - "loss": 1.0548, - "step": 5051 - }, - { - "epoch": 0.6074670835086875, - "grad_norm": 2.191957999279518, - "learning_rate": 1.4098871000103936e-06, - "loss": 1.0949, - "step": 5052 - }, - { - "epoch": 0.6075873263993267, - "grad_norm": 1.8197921860651747, - "learning_rate": 1.409142853097693e-06, - "loss": 1.05, - "step": 5053 - }, - { - "epoch": 0.6077075692899657, - "grad_norm": 2.4816818514893724, - "learning_rate": 1.408398695817553e-06, - "loss": 1.0252, - "step": 5054 - }, - { - "epoch": 0.6078278121806048, - "grad_norm": 1.5953772969775217, - "learning_rate": 1.4076546282828593e-06, - "loss": 0.9255, - "step": 5055 - }, - { - "epoch": 0.6079480550712439, - "grad_norm": 2.5496962198060884, - "learning_rate": 1.4069106506064874e-06, - "loss": 0.8904, - "step": 5056 - }, - { - "epoch": 0.608068297961883, - "grad_norm": 1.8689236410349372, - "learning_rate": 1.4061667629012989e-06, - "loss": 1.011, - "step": 5057 - }, - { - "epoch": 0.608188540852522, - "grad_norm": 2.9881282193761582, - "learning_rate": 1.40542296528014e-06, - "loss": 1.0605, - "step": 5058 - }, - { - "epoch": 0.6083087837431612, - "grad_norm": 3.0083941008822537, - "learning_rate": 1.4046792578558452e-06, - "loss": 0.9856, - "step": 5059 - }, - { - "epoch": 0.6084290266338003, - "grad_norm": 2.0802824824423705, - "learning_rate": 1.4039356407412325e-06, - "loss": 0.9915, - "step": 5060 - }, - { - "epoch": 0.6085492695244393, - "grad_norm": 0.8621013051577026, - "learning_rate": 1.40319211404911e-06, - "loss": 0.8499, - "step": 5061 - }, - { - "epoch": 0.6086695124150785, - "grad_norm": 1.771989878541616, - "learning_rate": 1.4024486778922691e-06, - "loss": 1.1264, - "step": 5062 - }, - { - "epoch": 0.6087897553057176, - "grad_norm": 1.8801673891746762, - "learning_rate": 1.4017053323834884e-06, - "loss": 1.0053, - "step": 5063 - }, - { - "epoch": 0.6089099981963566, - "grad_norm": 5.587267822630463, - "learning_rate": 1.4009620776355333e-06, - "loss": 0.9954, - "step": 5064 - }, - { - "epoch": 0.6090302410869958, - "grad_norm": 1.6843362042943444, - "learning_rate": 1.4002189137611553e-06, - "loss": 1.0214, - "step": 5065 - }, - { - "epoch": 0.6091504839776348, - "grad_norm": 1.5601266765513648, - "learning_rate": 1.3994758408730901e-06, - "loss": 0.9252, - "step": 5066 - }, - { - "epoch": 0.6092707268682739, - "grad_norm": 1.6766006138318665, - "learning_rate": 1.3987328590840629e-06, - "loss": 0.9982, - "step": 5067 - }, - { - "epoch": 0.609390969758913, - "grad_norm": 3.015710700996449, - "learning_rate": 1.397989968506783e-06, - "loss": 1.0858, - "step": 5068 - }, - { - "epoch": 0.6095112126495521, - "grad_norm": 1.949498219608845, - "learning_rate": 1.3972471692539458e-06, - "loss": 0.9533, - "step": 5069 - }, - { - "epoch": 0.6096314555401912, - "grad_norm": 2.5755750588062702, - "learning_rate": 1.3965044614382348e-06, - "loss": 0.9827, - "step": 5070 - }, - { - "epoch": 0.6097516984308303, - "grad_norm": 2.9316054919613665, - "learning_rate": 1.3957618451723162e-06, - "loss": 0.98, - "step": 5071 - }, - { - "epoch": 0.6098719413214694, - "grad_norm": 2.0571597930233767, - "learning_rate": 1.3950193205688457e-06, - "loss": 0.9303, - "step": 5072 - }, - { - "epoch": 0.6099921842121084, - "grad_norm": 2.095920860732954, - "learning_rate": 1.3942768877404627e-06, - "loss": 1.0627, - "step": 5073 - }, - { - "epoch": 0.6101124271027476, - "grad_norm": 1.6630863675773813, - "learning_rate": 1.393534546799795e-06, - "loss": 0.974, - "step": 5074 - }, - { - "epoch": 0.6102326699933867, - "grad_norm": 5.132179813496554, - "learning_rate": 1.3927922978594536e-06, - "loss": 0.9045, - "step": 5075 - }, - { - "epoch": 0.6103529128840257, - "grad_norm": 0.8542975154921403, - "learning_rate": 1.3920501410320387e-06, - "loss": 0.8557, - "step": 5076 - }, - { - "epoch": 0.6104731557746649, - "grad_norm": 2.4275976681139775, - "learning_rate": 1.3913080764301333e-06, - "loss": 0.9903, - "step": 5077 - }, - { - "epoch": 0.6105933986653039, - "grad_norm": 1.9778051880152687, - "learning_rate": 1.3905661041663085e-06, - "loss": 0.9448, - "step": 5078 - }, - { - "epoch": 0.610713641555943, - "grad_norm": 2.370249062018022, - "learning_rate": 1.389824224353122e-06, - "loss": 0.8778, - "step": 5079 - }, - { - "epoch": 0.610833884446582, - "grad_norm": 1.4485536267456947, - "learning_rate": 1.389082437103115e-06, - "loss": 0.9964, - "step": 5080 - }, - { - "epoch": 0.6109541273372212, - "grad_norm": 1.9572825622617536, - "learning_rate": 1.3883407425288172e-06, - "loss": 1.0127, - "step": 5081 - }, - { - "epoch": 0.6110743702278603, - "grad_norm": 1.7949558790304612, - "learning_rate": 1.3875991407427417e-06, - "loss": 1.0126, - "step": 5082 - }, - { - "epoch": 0.6111946131184993, - "grad_norm": 0.7608124647868305, - "learning_rate": 1.38685763185739e-06, - "loss": 0.842, - "step": 5083 - }, - { - "epoch": 0.6113148560091385, - "grad_norm": 2.481768890625606, - "learning_rate": 1.3861162159852476e-06, - "loss": 0.9031, - "step": 5084 - }, - { - "epoch": 0.6114350988997775, - "grad_norm": 1.9784738097521646, - "learning_rate": 1.3853748932387875e-06, - "loss": 1.0322, - "step": 5085 - }, - { - "epoch": 0.6115553417904166, - "grad_norm": 7.097108020929524, - "learning_rate": 1.3846336637304671e-06, - "loss": 0.9834, - "step": 5086 - }, - { - "epoch": 0.6116755846810558, - "grad_norm": 2.879088438731452, - "learning_rate": 1.3838925275727316e-06, - "loss": 1.0532, - "step": 5087 - }, - { - "epoch": 0.6117958275716948, - "grad_norm": 1.7167967361259187, - "learning_rate": 1.3831514848780089e-06, - "loss": 1.0203, - "step": 5088 - }, - { - "epoch": 0.6119160704623339, - "grad_norm": 3.172475372771982, - "learning_rate": 1.3824105357587152e-06, - "loss": 1.1499, - "step": 5089 - }, - { - "epoch": 0.612036313352973, - "grad_norm": 2.411977021451087, - "learning_rate": 1.381669680327253e-06, - "loss": 1.0509, - "step": 5090 - }, - { - "epoch": 0.6121565562436121, - "grad_norm": 1.7378994983284515, - "learning_rate": 1.380928918696008e-06, - "loss": 0.939, - "step": 5091 - }, - { - "epoch": 0.6122767991342511, - "grad_norm": 2.076767327372871, - "learning_rate": 1.3801882509773548e-06, - "loss": 0.9336, - "step": 5092 - }, - { - "epoch": 0.6123970420248903, - "grad_norm": 4.178051437609118, - "learning_rate": 1.3794476772836503e-06, - "loss": 1.0448, - "step": 5093 - }, - { - "epoch": 0.6125172849155294, - "grad_norm": 1.6333484408919041, - "learning_rate": 1.3787071977272402e-06, - "loss": 1.0687, - "step": 5094 - }, - { - "epoch": 0.6126375278061684, - "grad_norm": 2.3387499562869536, - "learning_rate": 1.3779668124204535e-06, - "loss": 0.9464, - "step": 5095 - }, - { - "epoch": 0.6127577706968076, - "grad_norm": 3.239977568325051, - "learning_rate": 1.3772265214756074e-06, - "loss": 1.038, - "step": 5096 - }, - { - "epoch": 0.6128780135874466, - "grad_norm": 2.2025210587602166, - "learning_rate": 1.3764863250050025e-06, - "loss": 0.9842, - "step": 5097 - }, - { - "epoch": 0.6129982564780857, - "grad_norm": 1.7094634512659197, - "learning_rate": 1.3757462231209272e-06, - "loss": 1.0339, - "step": 5098 - }, - { - "epoch": 0.6131184993687249, - "grad_norm": 1.869273213912654, - "learning_rate": 1.3750062159356525e-06, - "loss": 1.1162, - "step": 5099 - }, - { - "epoch": 0.6132387422593639, - "grad_norm": 1.843037483773854, - "learning_rate": 1.3742663035614382e-06, - "loss": 1.0539, - "step": 5100 - }, - { - "epoch": 0.613358985150003, - "grad_norm": 1.80078277256833, - "learning_rate": 1.3735264861105283e-06, - "loss": 1.0294, - "step": 5101 - }, - { - "epoch": 0.6134792280406421, - "grad_norm": 1.8672529392666193, - "learning_rate": 1.372786763695152e-06, - "loss": 1.0109, - "step": 5102 - }, - { - "epoch": 0.6135994709312812, - "grad_norm": 1.823475132727925, - "learning_rate": 1.3720471364275257e-06, - "loss": 1.0006, - "step": 5103 - }, - { - "epoch": 0.6137197138219203, - "grad_norm": 1.9283281240939658, - "learning_rate": 1.3713076044198486e-06, - "loss": 0.9967, - "step": 5104 - }, - { - "epoch": 0.6138399567125594, - "grad_norm": 2.702347571973257, - "learning_rate": 1.3705681677843086e-06, - "loss": 1.0386, - "step": 5105 - }, - { - "epoch": 0.6139601996031985, - "grad_norm": 0.8065809176979292, - "learning_rate": 1.3698288266330768e-06, - "loss": 0.8621, - "step": 5106 - }, - { - "epoch": 0.6140804424938375, - "grad_norm": 2.1353005230340374, - "learning_rate": 1.3690895810783113e-06, - "loss": 0.9555, - "step": 5107 - }, - { - "epoch": 0.6142006853844767, - "grad_norm": 2.4348570359890878, - "learning_rate": 1.3683504312321543e-06, - "loss": 0.9516, - "step": 5108 - }, - { - "epoch": 0.6143209282751158, - "grad_norm": 2.0399710432503215, - "learning_rate": 1.3676113772067355e-06, - "loss": 1.028, - "step": 5109 - }, - { - "epoch": 0.6144411711657548, - "grad_norm": 1.7408221586875239, - "learning_rate": 1.3668724191141671e-06, - "loss": 0.942, - "step": 5110 - }, - { - "epoch": 0.6145614140563939, - "grad_norm": 1.8856184695101985, - "learning_rate": 1.3661335570665493e-06, - "loss": 0.8945, - "step": 5111 - }, - { - "epoch": 0.614681656947033, - "grad_norm": 10.57070225168366, - "learning_rate": 1.3653947911759676e-06, - "loss": 0.9308, - "step": 5112 - }, - { - "epoch": 0.6148018998376721, - "grad_norm": 1.8128204062502669, - "learning_rate": 1.3646561215544904e-06, - "loss": 0.969, - "step": 5113 - }, - { - "epoch": 0.6149221427283111, - "grad_norm": 2.0380784506630576, - "learning_rate": 1.363917548314176e-06, - "loss": 1.0285, - "step": 5114 - }, - { - "epoch": 0.6150423856189503, - "grad_norm": 2.1795660825977827, - "learning_rate": 1.3631790715670626e-06, - "loss": 0.9652, - "step": 5115 - }, - { - "epoch": 0.6151626285095894, - "grad_norm": 1.8047816924908255, - "learning_rate": 1.3624406914251783e-06, - "loss": 1.0909, - "step": 5116 - }, - { - "epoch": 0.6152828714002284, - "grad_norm": 1.8393221559234463, - "learning_rate": 1.3617024080005335e-06, - "loss": 1.1085, - "step": 5117 - }, - { - "epoch": 0.6154031142908676, - "grad_norm": 3.8480547858321383, - "learning_rate": 1.3609642214051266e-06, - "loss": 0.9704, - "step": 5118 - }, - { - "epoch": 0.6155233571815066, - "grad_norm": 1.942271049586763, - "learning_rate": 1.3602261317509385e-06, - "loss": 0.8971, - "step": 5119 - }, - { - "epoch": 0.6156436000721457, - "grad_norm": 3.2399498996864176, - "learning_rate": 1.3594881391499387e-06, - "loss": 1.0388, - "step": 5120 - }, - { - "epoch": 0.6157638429627849, - "grad_norm": 2.225139926410956, - "learning_rate": 1.3587502437140778e-06, - "loss": 1.0187, - "step": 5121 - }, - { - "epoch": 0.6158840858534239, - "grad_norm": 2.282643767693599, - "learning_rate": 1.3580124455552952e-06, - "loss": 1.0809, - "step": 5122 - }, - { - "epoch": 0.616004328744063, - "grad_norm": 1.7673451644221336, - "learning_rate": 1.3572747447855148e-06, - "loss": 1.0936, - "step": 5123 - }, - { - "epoch": 0.6161245716347021, - "grad_norm": 2.1002935386043595, - "learning_rate": 1.356537141516644e-06, - "loss": 0.9203, - "step": 5124 - }, - { - "epoch": 0.6162448145253412, - "grad_norm": 2.5186750036390757, - "learning_rate": 1.3557996358605775e-06, - "loss": 0.8511, - "step": 5125 - }, - { - "epoch": 0.6163650574159802, - "grad_norm": 2.4199680819652793, - "learning_rate": 1.3550622279291941e-06, - "loss": 0.9266, - "step": 5126 - }, - { - "epoch": 0.6164853003066194, - "grad_norm": 1.8218060697991516, - "learning_rate": 1.354324917834358e-06, - "loss": 1.0595, - "step": 5127 - }, - { - "epoch": 0.6166055431972585, - "grad_norm": 1.7951962797762153, - "learning_rate": 1.353587705687918e-06, - "loss": 0.9947, - "step": 5128 - }, - { - "epoch": 0.6167257860878975, - "grad_norm": 2.4003685747690438, - "learning_rate": 1.3528505916017096e-06, - "loss": 0.9539, - "step": 5129 - }, - { - "epoch": 0.6168460289785367, - "grad_norm": 1.9053372558667272, - "learning_rate": 1.3521135756875514e-06, - "loss": 1.1106, - "step": 5130 - }, - { - "epoch": 0.6169662718691757, - "grad_norm": 1.9943480500516637, - "learning_rate": 1.3513766580572496e-06, - "loss": 1.0851, - "step": 5131 - }, - { - "epoch": 0.6170865147598148, - "grad_norm": 1.9547849391493672, - "learning_rate": 1.3506398388225924e-06, - "loss": 1.0007, - "step": 5132 - }, - { - "epoch": 0.617206757650454, - "grad_norm": 1.7564267948531476, - "learning_rate": 1.349903118095355e-06, - "loss": 0.9392, - "step": 5133 - }, - { - "epoch": 0.617327000541093, - "grad_norm": 1.706681065026781, - "learning_rate": 1.349166495987298e-06, - "loss": 0.9661, - "step": 5134 - }, - { - "epoch": 0.6174472434317321, - "grad_norm": 0.9246776457297257, - "learning_rate": 1.348429972610166e-06, - "loss": 0.907, - "step": 5135 - }, - { - "epoch": 0.6175674863223712, - "grad_norm": 0.8656335603035946, - "learning_rate": 1.3476935480756897e-06, - "loss": 0.8405, - "step": 5136 - }, - { - "epoch": 0.6176877292130103, - "grad_norm": 2.0781922293665973, - "learning_rate": 1.346957222495583e-06, - "loss": 0.9828, - "step": 5137 - }, - { - "epoch": 0.6178079721036493, - "grad_norm": 4.465377495368948, - "learning_rate": 1.3462209959815466e-06, - "loss": 0.9415, - "step": 5138 - }, - { - "epoch": 0.6179282149942885, - "grad_norm": 1.935750232718689, - "learning_rate": 1.345484868645265e-06, - "loss": 0.9616, - "step": 5139 - }, - { - "epoch": 0.6180484578849276, - "grad_norm": 2.338026152772065, - "learning_rate": 1.3447488405984088e-06, - "loss": 1.0098, - "step": 5140 - }, - { - "epoch": 0.6181687007755666, - "grad_norm": 2.072449930320171, - "learning_rate": 1.3440129119526322e-06, - "loss": 0.918, - "step": 5141 - }, - { - "epoch": 0.6182889436662057, - "grad_norm": 0.9468239924726574, - "learning_rate": 1.3432770828195762e-06, - "loss": 0.7995, - "step": 5142 - }, - { - "epoch": 0.6184091865568448, - "grad_norm": 3.3484409974863616, - "learning_rate": 1.3425413533108635e-06, - "loss": 0.9458, - "step": 5143 - }, - { - "epoch": 0.6185294294474839, - "grad_norm": 3.4500909655091827, - "learning_rate": 1.341805723538105e-06, - "loss": 0.939, - "step": 5144 - }, - { - "epoch": 0.618649672338123, - "grad_norm": 1.5116386293771389, - "learning_rate": 1.3410701936128948e-06, - "loss": 1.0024, - "step": 5145 - }, - { - "epoch": 0.6187699152287621, - "grad_norm": 3.5160396077135214, - "learning_rate": 1.340334763646812e-06, - "loss": 1.0822, - "step": 5146 - }, - { - "epoch": 0.6188901581194012, - "grad_norm": 2.4859265934424974, - "learning_rate": 1.3395994337514218e-06, - "loss": 0.9768, - "step": 5147 - }, - { - "epoch": 0.6190104010100402, - "grad_norm": 1.968380121272019, - "learning_rate": 1.3388642040382725e-06, - "loss": 1.0106, - "step": 5148 - }, - { - "epoch": 0.6191306439006794, - "grad_norm": 1.8374148005241442, - "learning_rate": 1.3381290746188975e-06, - "loss": 1.0697, - "step": 5149 - }, - { - "epoch": 0.6192508867913185, - "grad_norm": 1.6098779429496135, - "learning_rate": 1.3373940456048152e-06, - "loss": 0.9093, - "step": 5150 - }, - { - "epoch": 0.6193711296819575, - "grad_norm": 1.482339849200524, - "learning_rate": 1.3366591171075299e-06, - "loss": 0.825, - "step": 5151 - }, - { - "epoch": 0.6194913725725967, - "grad_norm": 1.9613797726314486, - "learning_rate": 1.335924289238529e-06, - "loss": 1.1319, - "step": 5152 - }, - { - "epoch": 0.6196116154632357, - "grad_norm": 1.7202930806213348, - "learning_rate": 1.3351895621092859e-06, - "loss": 0.9913, - "step": 5153 - }, - { - "epoch": 0.6197318583538748, - "grad_norm": 1.9096088522195696, - "learning_rate": 1.3344549358312567e-06, - "loss": 0.9966, - "step": 5154 - }, - { - "epoch": 0.619852101244514, - "grad_norm": 1.943590681409587, - "learning_rate": 1.3337204105158852e-06, - "loss": 1.0091, - "step": 5155 - }, - { - "epoch": 0.619972344135153, - "grad_norm": 2.034911391744962, - "learning_rate": 1.332985986274597e-06, - "loss": 0.9539, - "step": 5156 - }, - { - "epoch": 0.6200925870257921, - "grad_norm": 3.682328687568976, - "learning_rate": 1.3322516632188047e-06, - "loss": 0.9789, - "step": 5157 - }, - { - "epoch": 0.6202128299164312, - "grad_norm": 1.6022671521727463, - "learning_rate": 1.3315174414599045e-06, - "loss": 0.9066, - "step": 5158 - }, - { - "epoch": 0.6203330728070703, - "grad_norm": 2.27398224167277, - "learning_rate": 1.3307833211092768e-06, - "loss": 0.9822, - "step": 5159 - }, - { - "epoch": 0.6204533156977093, - "grad_norm": 2.0847445730826495, - "learning_rate": 1.3300493022782873e-06, - "loss": 0.9741, - "step": 5160 - }, - { - "epoch": 0.6205735585883485, - "grad_norm": 1.763634766856814, - "learning_rate": 1.3293153850782855e-06, - "loss": 0.9551, - "step": 5161 - }, - { - "epoch": 0.6206938014789876, - "grad_norm": 1.7831048510108671, - "learning_rate": 1.3285815696206069e-06, - "loss": 0.9362, - "step": 5162 - }, - { - "epoch": 0.6208140443696266, - "grad_norm": 1.9789798715347504, - "learning_rate": 1.32784785601657e-06, - "loss": 0.9984, - "step": 5163 - }, - { - "epoch": 0.6209342872602658, - "grad_norm": 1.6497920107520994, - "learning_rate": 1.3271142443774798e-06, - "loss": 0.9712, - "step": 5164 - }, - { - "epoch": 0.6210545301509048, - "grad_norm": 1.718555855623289, - "learning_rate": 1.3263807348146228e-06, - "loss": 1.0428, - "step": 5165 - }, - { - "epoch": 0.6211747730415439, - "grad_norm": 1.7287011343337875, - "learning_rate": 1.3256473274392733e-06, - "loss": 0.9702, - "step": 5166 - }, - { - "epoch": 0.6212950159321831, - "grad_norm": 1.590401516486506, - "learning_rate": 1.3249140223626873e-06, - "loss": 0.9295, - "step": 5167 - }, - { - "epoch": 0.6214152588228221, - "grad_norm": 1.776728261496621, - "learning_rate": 1.3241808196961077e-06, - "loss": 0.99, - "step": 5168 - }, - { - "epoch": 0.6215355017134612, - "grad_norm": 1.7704258019736918, - "learning_rate": 1.3234477195507608e-06, - "loss": 0.9405, - "step": 5169 - }, - { - "epoch": 0.6216557446041003, - "grad_norm": 1.817139770720689, - "learning_rate": 1.322714722037857e-06, - "loss": 0.8605, - "step": 5170 - }, - { - "epoch": 0.6217759874947394, - "grad_norm": 1.905112505616825, - "learning_rate": 1.321981827268591e-06, - "loss": 1.0017, - "step": 5171 - }, - { - "epoch": 0.6218962303853784, - "grad_norm": 1.7132346700519367, - "learning_rate": 1.3212490353541426e-06, - "loss": 1.0433, - "step": 5172 - }, - { - "epoch": 0.6220164732760175, - "grad_norm": 1.7988464155727268, - "learning_rate": 1.3205163464056762e-06, - "loss": 1.0334, - "step": 5173 - }, - { - "epoch": 0.6221367161666567, - "grad_norm": 1.8895468672234192, - "learning_rate": 1.319783760534339e-06, - "loss": 0.95, - "step": 5174 - }, - { - "epoch": 0.6222569590572957, - "grad_norm": 3.3241187684637605, - "learning_rate": 1.319051277851266e-06, - "loss": 0.9841, - "step": 5175 - }, - { - "epoch": 0.6223772019479348, - "grad_norm": 2.0436474928974, - "learning_rate": 1.3183188984675716e-06, - "loss": 1.0761, - "step": 5176 - }, - { - "epoch": 0.6224974448385739, - "grad_norm": 2.4132703944511937, - "learning_rate": 1.3175866224943586e-06, - "loss": 0.9456, - "step": 5177 - }, - { - "epoch": 0.622617687729213, - "grad_norm": 2.3767855106331974, - "learning_rate": 1.316854450042712e-06, - "loss": 0.9635, - "step": 5178 - }, - { - "epoch": 0.622737930619852, - "grad_norm": 2.7539050379540773, - "learning_rate": 1.3161223812237024e-06, - "loss": 0.9697, - "step": 5179 - }, - { - "epoch": 0.6228581735104912, - "grad_norm": 2.488829931013084, - "learning_rate": 1.3153904161483842e-06, - "loss": 1.0864, - "step": 5180 - }, - { - "epoch": 0.6229784164011303, - "grad_norm": 2.410343898596873, - "learning_rate": 1.3146585549277953e-06, - "loss": 1.0896, - "step": 5181 - }, - { - "epoch": 0.6230986592917693, - "grad_norm": 5.163075131945615, - "learning_rate": 1.3139267976729591e-06, - "loss": 1.0081, - "step": 5182 - }, - { - "epoch": 0.6232189021824085, - "grad_norm": 1.4836955071254951, - "learning_rate": 1.3131951444948815e-06, - "loss": 0.938, - "step": 5183 - }, - { - "epoch": 0.6233391450730476, - "grad_norm": 1.9241379231844382, - "learning_rate": 1.3124635955045546e-06, - "loss": 0.9826, - "step": 5184 - }, - { - "epoch": 0.6234593879636866, - "grad_norm": 1.5823990759222961, - "learning_rate": 1.3117321508129537e-06, - "loss": 1.0663, - "step": 5185 - }, - { - "epoch": 0.6235796308543258, - "grad_norm": 1.3702821856530778, - "learning_rate": 1.3110008105310388e-06, - "loss": 0.9929, - "step": 5186 - }, - { - "epoch": 0.6236998737449648, - "grad_norm": 1.792520038913034, - "learning_rate": 1.3102695747697526e-06, - "loss": 1.0101, - "step": 5187 - }, - { - "epoch": 0.6238201166356039, - "grad_norm": 2.3404383341736086, - "learning_rate": 1.3095384436400237e-06, - "loss": 1.1319, - "step": 5188 - }, - { - "epoch": 0.623940359526243, - "grad_norm": 1.966018477266707, - "learning_rate": 1.3088074172527633e-06, - "loss": 1.0533, - "step": 5189 - }, - { - "epoch": 0.6240606024168821, - "grad_norm": 2.8961020877822046, - "learning_rate": 1.3080764957188684e-06, - "loss": 0.9515, - "step": 5190 - }, - { - "epoch": 0.6241808453075212, - "grad_norm": 2.0251087960110077, - "learning_rate": 1.3073456791492192e-06, - "loss": 0.9376, - "step": 5191 - }, - { - "epoch": 0.6243010881981603, - "grad_norm": 1.9352950809039169, - "learning_rate": 1.3066149676546801e-06, - "loss": 1.0144, - "step": 5192 - }, - { - "epoch": 0.6244213310887994, - "grad_norm": 1.8089235470602085, - "learning_rate": 1.3058843613460985e-06, - "loss": 0.8832, - "step": 5193 - }, - { - "epoch": 0.6245415739794384, - "grad_norm": 2.285681196381976, - "learning_rate": 1.3051538603343075e-06, - "loss": 0.9733, - "step": 5194 - }, - { - "epoch": 0.6246618168700776, - "grad_norm": 1.87313527524139, - "learning_rate": 1.3044234647301235e-06, - "loss": 0.9055, - "step": 5195 - }, - { - "epoch": 0.6247820597607167, - "grad_norm": 1.6356197482160684, - "learning_rate": 1.303693174644347e-06, - "loss": 0.9478, - "step": 5196 - }, - { - "epoch": 0.6249023026513557, - "grad_norm": 2.2943096756578836, - "learning_rate": 1.3029629901877625e-06, - "loss": 1.0349, - "step": 5197 - }, - { - "epoch": 0.6250225455419949, - "grad_norm": 2.8492161124499376, - "learning_rate": 1.3022329114711376e-06, - "loss": 1.0088, - "step": 5198 - }, - { - "epoch": 0.6251427884326339, - "grad_norm": 1.835431357883136, - "learning_rate": 1.3015029386052256e-06, - "loss": 0.9271, - "step": 5199 - }, - { - "epoch": 0.625263031323273, - "grad_norm": 1.8782839944221408, - "learning_rate": 1.3007730717007622e-06, - "loss": 0.9563, - "step": 5200 - }, - { - "epoch": 0.6253832742139122, - "grad_norm": 1.9787055298175291, - "learning_rate": 1.3000433108684676e-06, - "loss": 0.9887, - "step": 5201 - }, - { - "epoch": 0.6255035171045512, - "grad_norm": 2.561153523316226, - "learning_rate": 1.2993136562190467e-06, - "loss": 1.0296, - "step": 5202 - }, - { - "epoch": 0.6256237599951903, - "grad_norm": 1.3888756539455944, - "learning_rate": 1.2985841078631871e-06, - "loss": 0.9338, - "step": 5203 - }, - { - "epoch": 0.6257440028858293, - "grad_norm": 1.9537590523557058, - "learning_rate": 1.2978546659115608e-06, - "loss": 1.0095, - "step": 5204 - }, - { - "epoch": 0.6258642457764685, - "grad_norm": 2.1872922526211407, - "learning_rate": 1.2971253304748228e-06, - "loss": 1.0862, - "step": 5205 - }, - { - "epoch": 0.6259844886671075, - "grad_norm": 1.6418524373990608, - "learning_rate": 1.296396101663614e-06, - "loss": 0.9848, - "step": 5206 - }, - { - "epoch": 0.6261047315577466, - "grad_norm": 2.0173865612333866, - "learning_rate": 1.2956669795885565e-06, - "loss": 1.0721, - "step": 5207 - }, - { - "epoch": 0.6262249744483858, - "grad_norm": 2.1773816015418404, - "learning_rate": 1.294937964360259e-06, - "loss": 0.9232, - "step": 5208 - }, - { - "epoch": 0.6263452173390248, - "grad_norm": 2.304466935287847, - "learning_rate": 1.2942090560893108e-06, - "loss": 0.9263, - "step": 5209 - }, - { - "epoch": 0.6264654602296639, - "grad_norm": 2.1402867182168697, - "learning_rate": 1.2934802548862882e-06, - "loss": 0.8266, - "step": 5210 - }, - { - "epoch": 0.626585703120303, - "grad_norm": 2.1764830958881833, - "learning_rate": 1.292751560861749e-06, - "loss": 1.0594, - "step": 5211 - }, - { - "epoch": 0.6267059460109421, - "grad_norm": 1.7364106475134549, - "learning_rate": 1.2920229741262354e-06, - "loss": 1.0254, - "step": 5212 - }, - { - "epoch": 0.6268261889015811, - "grad_norm": 2.068663663877733, - "learning_rate": 1.2912944947902739e-06, - "loss": 0.9743, - "step": 5213 - }, - { - "epoch": 0.6269464317922203, - "grad_norm": 2.5269393007681935, - "learning_rate": 1.2905661229643742e-06, - "loss": 0.948, - "step": 5214 - }, - { - "epoch": 0.6270666746828594, - "grad_norm": 2.613897003024303, - "learning_rate": 1.2898378587590299e-06, - "loss": 1.0711, - "step": 5215 - }, - { - "epoch": 0.6271869175734984, - "grad_norm": 1.9139092831693094, - "learning_rate": 1.2891097022847173e-06, - "loss": 1.1036, - "step": 5216 - }, - { - "epoch": 0.6273071604641376, - "grad_norm": 1.9006355025939885, - "learning_rate": 1.2883816536518978e-06, - "loss": 0.9034, - "step": 5217 - }, - { - "epoch": 0.6274274033547766, - "grad_norm": 1.7518673879000082, - "learning_rate": 1.2876537129710155e-06, - "loss": 1.0494, - "step": 5218 - }, - { - "epoch": 0.6275476462454157, - "grad_norm": 2.1939009611784477, - "learning_rate": 1.286925880352499e-06, - "loss": 0.978, - "step": 5219 - }, - { - "epoch": 0.6276678891360549, - "grad_norm": 1.8954490719340507, - "learning_rate": 1.2861981559067592e-06, - "loss": 0.9322, - "step": 5220 - }, - { - "epoch": 0.6277881320266939, - "grad_norm": 1.7606465868562589, - "learning_rate": 1.2854705397441917e-06, - "loss": 1.0308, - "step": 5221 - }, - { - "epoch": 0.627908374917333, - "grad_norm": 1.9507288768797495, - "learning_rate": 1.2847430319751747e-06, - "loss": 1.0079, - "step": 5222 - }, - { - "epoch": 0.6280286178079721, - "grad_norm": 6.052694354397379, - "learning_rate": 1.2840156327100712e-06, - "loss": 0.9086, - "step": 5223 - }, - { - "epoch": 0.6281488606986112, - "grad_norm": 1.822351059132391, - "learning_rate": 1.2832883420592272e-06, - "loss": 0.952, - "step": 5224 - }, - { - "epoch": 0.6282691035892503, - "grad_norm": 12.236878510417228, - "learning_rate": 1.282561160132972e-06, - "loss": 0.8753, - "step": 5225 - }, - { - "epoch": 0.6283893464798894, - "grad_norm": 1.5186206275876368, - "learning_rate": 1.2818340870416186e-06, - "loss": 1.0412, - "step": 5226 - }, - { - "epoch": 0.6285095893705285, - "grad_norm": 1.6601767146027275, - "learning_rate": 1.2811071228954626e-06, - "loss": 0.9862, - "step": 5227 - }, - { - "epoch": 0.6286298322611675, - "grad_norm": 1.8129657278551068, - "learning_rate": 1.2803802678047846e-06, - "loss": 1.0392, - "step": 5228 - }, - { - "epoch": 0.6287500751518067, - "grad_norm": 1.82405184323268, - "learning_rate": 1.279653521879848e-06, - "loss": 0.9625, - "step": 5229 - }, - { - "epoch": 0.6288703180424458, - "grad_norm": 1.8609399060442766, - "learning_rate": 1.2789268852308997e-06, - "loss": 1.0734, - "step": 5230 - }, - { - "epoch": 0.6289905609330848, - "grad_norm": 2.049900992258886, - "learning_rate": 1.2782003579681688e-06, - "loss": 0.9334, - "step": 5231 - }, - { - "epoch": 0.629110803823724, - "grad_norm": 1.596026421693926, - "learning_rate": 1.2774739402018701e-06, - "loss": 0.9678, - "step": 5232 - }, - { - "epoch": 0.629231046714363, - "grad_norm": 1.6877763309992961, - "learning_rate": 1.2767476320422002e-06, - "loss": 0.962, - "step": 5233 - }, - { - "epoch": 0.6293512896050021, - "grad_norm": 0.7272973804874103, - "learning_rate": 1.2760214335993392e-06, - "loss": 0.8287, - "step": 5234 - }, - { - "epoch": 0.6294715324956413, - "grad_norm": 2.097749903344634, - "learning_rate": 1.2752953449834514e-06, - "loss": 0.8193, - "step": 5235 - }, - { - "epoch": 0.6295917753862803, - "grad_norm": 1.6625466763246834, - "learning_rate": 1.2745693663046836e-06, - "loss": 1.0326, - "step": 5236 - }, - { - "epoch": 0.6297120182769194, - "grad_norm": 1.9024527829861375, - "learning_rate": 1.2738434976731662e-06, - "loss": 1.0342, - "step": 5237 - }, - { - "epoch": 0.6298322611675584, - "grad_norm": 2.4641377303903886, - "learning_rate": 1.2731177391990125e-06, - "loss": 0.9812, - "step": 5238 - }, - { - "epoch": 0.6299525040581976, - "grad_norm": 2.156141748337089, - "learning_rate": 1.2723920909923203e-06, - "loss": 1.051, - "step": 5239 - }, - { - "epoch": 0.6300727469488366, - "grad_norm": 0.8922835183125973, - "learning_rate": 1.2716665531631688e-06, - "loss": 0.8854, - "step": 5240 - }, - { - "epoch": 0.6301929898394757, - "grad_norm": 1.6845066224373106, - "learning_rate": 1.270941125821623e-06, - "loss": 1.0007, - "step": 5241 - }, - { - "epoch": 0.6303132327301149, - "grad_norm": 5.485303838168082, - "learning_rate": 1.2702158090777278e-06, - "loss": 0.9912, - "step": 5242 - }, - { - "epoch": 0.6304334756207539, - "grad_norm": 1.9982283016703355, - "learning_rate": 1.2694906030415148e-06, - "loss": 0.9802, - "step": 5243 - }, - { - "epoch": 0.630553718511393, - "grad_norm": 2.619630225928722, - "learning_rate": 1.2687655078229958e-06, - "loss": 1.0451, - "step": 5244 - }, - { - "epoch": 0.6306739614020321, - "grad_norm": 2.405964425715821, - "learning_rate": 1.2680405235321678e-06, - "loss": 0.926, - "step": 5245 - }, - { - "epoch": 0.6307942042926712, - "grad_norm": 1.9864794768188545, - "learning_rate": 1.267315650279011e-06, - "loss": 1.0161, - "step": 5246 - }, - { - "epoch": 0.6309144471833102, - "grad_norm": 1.8457113921772523, - "learning_rate": 1.2665908881734874e-06, - "loss": 0.9646, - "step": 5247 - }, - { - "epoch": 0.6310346900739494, - "grad_norm": 2.2236175871651573, - "learning_rate": 1.2658662373255432e-06, - "loss": 1.0835, - "step": 5248 - }, - { - "epoch": 0.6311549329645885, - "grad_norm": 0.8058413038941151, - "learning_rate": 1.2651416978451063e-06, - "loss": 0.8038, - "step": 5249 - }, - { - "epoch": 0.6312751758552275, - "grad_norm": 2.8070986273833385, - "learning_rate": 1.2644172698420903e-06, - "loss": 0.8752, - "step": 5250 - }, - { - "epoch": 0.6313954187458667, - "grad_norm": 1.6918004657968848, - "learning_rate": 1.2636929534263892e-06, - "loss": 1.0751, - "step": 5251 - }, - { - "epoch": 0.6315156616365057, - "grad_norm": 2.1086985315758895, - "learning_rate": 1.2629687487078821e-06, - "loss": 1.0014, - "step": 5252 - }, - { - "epoch": 0.6316359045271448, - "grad_norm": 1.6868503874332603, - "learning_rate": 1.2622446557964293e-06, - "loss": 0.9921, - "step": 5253 - }, - { - "epoch": 0.631756147417784, - "grad_norm": 1.547302264150776, - "learning_rate": 1.261520674801876e-06, - "loss": 0.9326, - "step": 5254 - }, - { - "epoch": 0.631876390308423, - "grad_norm": 4.227829720279278, - "learning_rate": 1.2607968058340488e-06, - "loss": 0.9565, - "step": 5255 - }, - { - "epoch": 0.6319966331990621, - "grad_norm": 2.0646524468436143, - "learning_rate": 1.2600730490027583e-06, - "loss": 0.9631, - "step": 5256 - }, - { - "epoch": 0.6321168760897012, - "grad_norm": 1.5914253784288541, - "learning_rate": 1.2593494044177984e-06, - "loss": 1.0295, - "step": 5257 - }, - { - "epoch": 0.6322371189803403, - "grad_norm": 2.109770448139217, - "learning_rate": 1.2586258721889448e-06, - "loss": 1.0285, - "step": 5258 - }, - { - "epoch": 0.6323573618709794, - "grad_norm": 1.7868181835640446, - "learning_rate": 1.2579024524259573e-06, - "loss": 1.0437, - "step": 5259 - }, - { - "epoch": 0.6324776047616185, - "grad_norm": 1.7913819315973676, - "learning_rate": 1.2571791452385768e-06, - "loss": 1.1426, - "step": 5260 - }, - { - "epoch": 0.6325978476522576, - "grad_norm": 3.2741980932262673, - "learning_rate": 1.2564559507365301e-06, - "loss": 1.0005, - "step": 5261 - }, - { - "epoch": 0.6327180905428966, - "grad_norm": 2.176069641176944, - "learning_rate": 1.2557328690295244e-06, - "loss": 1.026, - "step": 5262 - }, - { - "epoch": 0.6328383334335358, - "grad_norm": 2.043216843429718, - "learning_rate": 1.255009900227251e-06, - "loss": 0.9889, - "step": 5263 - }, - { - "epoch": 0.6329585763241748, - "grad_norm": 1.9567161199741954, - "learning_rate": 1.254287044439383e-06, - "loss": 1.0274, - "step": 5264 - }, - { - "epoch": 0.6330788192148139, - "grad_norm": 0.8191424062815933, - "learning_rate": 1.2535643017755776e-06, - "loss": 0.7979, - "step": 5265 - }, - { - "epoch": 0.6331990621054531, - "grad_norm": 2.3583475869582635, - "learning_rate": 1.2528416723454737e-06, - "loss": 0.9432, - "step": 5266 - }, - { - "epoch": 0.6333193049960921, - "grad_norm": 1.5220979719122036, - "learning_rate": 1.2521191562586945e-06, - "loss": 0.94, - "step": 5267 - }, - { - "epoch": 0.6334395478867312, - "grad_norm": 2.06804800886877, - "learning_rate": 1.2513967536248445e-06, - "loss": 1.0006, - "step": 5268 - }, - { - "epoch": 0.6335597907773702, - "grad_norm": 1.529748764603651, - "learning_rate": 1.2506744645535117e-06, - "loss": 1.0421, - "step": 5269 - }, - { - "epoch": 0.6336800336680094, - "grad_norm": 1.8585214659339175, - "learning_rate": 1.249952289154267e-06, - "loss": 0.8255, - "step": 5270 - }, - { - "epoch": 0.6338002765586485, - "grad_norm": 1.6807541410255287, - "learning_rate": 1.2492302275366635e-06, - "loss": 0.993, - "step": 5271 - }, - { - "epoch": 0.6339205194492875, - "grad_norm": 2.7522641311464304, - "learning_rate": 1.2485082798102377e-06, - "loss": 0.889, - "step": 5272 - }, - { - "epoch": 0.6340407623399267, - "grad_norm": 2.5020322169265876, - "learning_rate": 1.2477864460845084e-06, - "loss": 0.9157, - "step": 5273 - }, - { - "epoch": 0.6341610052305657, - "grad_norm": 5.212318706332789, - "learning_rate": 1.2470647264689776e-06, - "loss": 0.9718, - "step": 5274 - }, - { - "epoch": 0.6342812481212048, - "grad_norm": 2.0132089965313087, - "learning_rate": 1.2463431210731282e-06, - "loss": 0.9434, - "step": 5275 - }, - { - "epoch": 0.634401491011844, - "grad_norm": 2.078448736458383, - "learning_rate": 1.2456216300064289e-06, - "loss": 0.9894, - "step": 5276 - }, - { - "epoch": 0.634521733902483, - "grad_norm": 1.6967303416376707, - "learning_rate": 1.244900253378328e-06, - "loss": 1.0086, - "step": 5277 - }, - { - "epoch": 0.6346419767931221, - "grad_norm": 1.9934548927908238, - "learning_rate": 1.2441789912982583e-06, - "loss": 0.9215, - "step": 5278 - }, - { - "epoch": 0.6347622196837612, - "grad_norm": 1.9263436074753848, - "learning_rate": 1.2434578438756346e-06, - "loss": 0.8742, - "step": 5279 - }, - { - "epoch": 0.6348824625744003, - "grad_norm": 1.8824683543631837, - "learning_rate": 1.242736811219855e-06, - "loss": 1.0178, - "step": 5280 - }, - { - "epoch": 0.6350027054650393, - "grad_norm": 2.460556921216989, - "learning_rate": 1.2420158934402988e-06, - "loss": 1.0494, - "step": 5281 - }, - { - "epoch": 0.6351229483556785, - "grad_norm": 2.061042733213536, - "learning_rate": 1.2412950906463286e-06, - "loss": 1.0701, - "step": 5282 - }, - { - "epoch": 0.6352431912463176, - "grad_norm": 3.7295171192951018, - "learning_rate": 1.2405744029472902e-06, - "loss": 1.1224, - "step": 5283 - }, - { - "epoch": 0.6353634341369566, - "grad_norm": 2.2159138134814893, - "learning_rate": 1.2398538304525108e-06, - "loss": 0.989, - "step": 5284 - }, - { - "epoch": 0.6354836770275958, - "grad_norm": 1.909676520831496, - "learning_rate": 1.2391333732713016e-06, - "loss": 0.9907, - "step": 5285 - }, - { - "epoch": 0.6356039199182348, - "grad_norm": 1.9217730239237298, - "learning_rate": 1.2384130315129543e-06, - "loss": 1.0108, - "step": 5286 - }, - { - "epoch": 0.6357241628088739, - "grad_norm": 2.794015968173359, - "learning_rate": 1.2376928052867447e-06, - "loss": 0.961, - "step": 5287 - }, - { - "epoch": 0.6358444056995131, - "grad_norm": 2.5146099435767293, - "learning_rate": 1.2369726947019299e-06, - "loss": 1.0167, - "step": 5288 - }, - { - "epoch": 0.6359646485901521, - "grad_norm": 1.9097948727130525, - "learning_rate": 1.2362526998677511e-06, - "loss": 0.8958, - "step": 5289 - }, - { - "epoch": 0.6360848914807912, - "grad_norm": 1.7406023280860492, - "learning_rate": 1.2355328208934301e-06, - "loss": 1.074, - "step": 5290 - }, - { - "epoch": 0.6362051343714303, - "grad_norm": 1.5732606760048018, - "learning_rate": 1.2348130578881728e-06, - "loss": 0.9593, - "step": 5291 - }, - { - "epoch": 0.6363253772620694, - "grad_norm": 2.755369839084089, - "learning_rate": 1.2340934109611664e-06, - "loss": 0.9834, - "step": 5292 - }, - { - "epoch": 0.6364456201527084, - "grad_norm": 9.247658923026986, - "learning_rate": 1.2333738802215798e-06, - "loss": 0.9161, - "step": 5293 - }, - { - "epoch": 0.6365658630433476, - "grad_norm": 2.1315049855126733, - "learning_rate": 1.2326544657785668e-06, - "loss": 1.04, - "step": 5294 - }, - { - "epoch": 0.6366861059339867, - "grad_norm": 2.1861210427657127, - "learning_rate": 1.2319351677412608e-06, - "loss": 0.9744, - "step": 5295 - }, - { - "epoch": 0.6368063488246257, - "grad_norm": 1.8074248720123196, - "learning_rate": 1.2312159862187796e-06, - "loss": 0.9689, - "step": 5296 - }, - { - "epoch": 0.6369265917152649, - "grad_norm": 4.445573366686658, - "learning_rate": 1.2304969213202217e-06, - "loss": 0.9897, - "step": 5297 - }, - { - "epoch": 0.6370468346059039, - "grad_norm": 2.3098681138936836, - "learning_rate": 1.2297779731546692e-06, - "loss": 1.0166, - "step": 5298 - }, - { - "epoch": 0.637167077496543, - "grad_norm": 2.1417274699009603, - "learning_rate": 1.2290591418311853e-06, - "loss": 1.011, - "step": 5299 - }, - { - "epoch": 0.637287320387182, - "grad_norm": 1.4501594718845525, - "learning_rate": 1.2283404274588172e-06, - "loss": 0.9398, - "step": 5300 - }, - { - "epoch": 0.6374075632778212, - "grad_norm": 0.7686401474781991, - "learning_rate": 1.227621830146592e-06, - "loss": 0.7857, - "step": 5301 - }, - { - "epoch": 0.6375278061684603, - "grad_norm": 1.6631378327010433, - "learning_rate": 1.2269033500035217e-06, - "loss": 1.0218, - "step": 5302 - }, - { - "epoch": 0.6376480490590993, - "grad_norm": 1.7313226181940395, - "learning_rate": 1.2261849871385988e-06, - "loss": 0.9719, - "step": 5303 - }, - { - "epoch": 0.6377682919497385, - "grad_norm": 1.9287512357340704, - "learning_rate": 1.2254667416607972e-06, - "loss": 0.8444, - "step": 5304 - }, - { - "epoch": 0.6378885348403776, - "grad_norm": 1.6274312245359195, - "learning_rate": 1.2247486136790756e-06, - "loss": 1.0651, - "step": 5305 - }, - { - "epoch": 0.6380087777310166, - "grad_norm": 2.002894452853175, - "learning_rate": 1.2240306033023726e-06, - "loss": 1.0296, - "step": 5306 - }, - { - "epoch": 0.6381290206216558, - "grad_norm": 1.6723713360891788, - "learning_rate": 1.223312710639611e-06, - "loss": 0.9502, - "step": 5307 - }, - { - "epoch": 0.6382492635122948, - "grad_norm": 2.6301713594664267, - "learning_rate": 1.2225949357996928e-06, - "loss": 1.0916, - "step": 5308 - }, - { - "epoch": 0.6383695064029339, - "grad_norm": 1.4938038963905476, - "learning_rate": 1.221877278891505e-06, - "loss": 1.0344, - "step": 5309 - }, - { - "epoch": 0.638489749293573, - "grad_norm": 2.7704368154621424, - "learning_rate": 1.221159740023915e-06, - "loss": 0.9385, - "step": 5310 - }, - { - "epoch": 0.6386099921842121, - "grad_norm": 1.9706627167716768, - "learning_rate": 1.2204423193057735e-06, - "loss": 0.9619, - "step": 5311 - }, - { - "epoch": 0.6387302350748512, - "grad_norm": 0.9683323544405937, - "learning_rate": 1.2197250168459122e-06, - "loss": 0.9196, - "step": 5312 - }, - { - "epoch": 0.6388504779654903, - "grad_norm": 1.7508125462388202, - "learning_rate": 1.2190078327531454e-06, - "loss": 0.9745, - "step": 5313 - }, - { - "epoch": 0.6389707208561294, - "grad_norm": 1.5401189441077308, - "learning_rate": 1.2182907671362697e-06, - "loss": 0.9572, - "step": 5314 - }, - { - "epoch": 0.6390909637467684, - "grad_norm": 2.5232968128774873, - "learning_rate": 1.2175738201040626e-06, - "loss": 1.0086, - "step": 5315 - }, - { - "epoch": 0.6392112066374076, - "grad_norm": 2.9997092741922997, - "learning_rate": 1.2168569917652855e-06, - "loss": 1.0229, - "step": 5316 - }, - { - "epoch": 0.6393314495280467, - "grad_norm": 2.1825814673041823, - "learning_rate": 1.2161402822286797e-06, - "loss": 0.8673, - "step": 5317 - }, - { - "epoch": 0.6394516924186857, - "grad_norm": 1.9839325659428286, - "learning_rate": 1.2154236916029703e-06, - "loss": 1.0207, - "step": 5318 - }, - { - "epoch": 0.6395719353093249, - "grad_norm": 2.65038927565943, - "learning_rate": 1.2147072199968627e-06, - "loss": 0.9577, - "step": 5319 - }, - { - "epoch": 0.6396921781999639, - "grad_norm": 1.7635525719053293, - "learning_rate": 1.2139908675190454e-06, - "loss": 0.9434, - "step": 5320 - }, - { - "epoch": 0.639812421090603, - "grad_norm": 1.9095381222235968, - "learning_rate": 1.2132746342781883e-06, - "loss": 0.9805, - "step": 5321 - }, - { - "epoch": 0.6399326639812422, - "grad_norm": 2.339178892003334, - "learning_rate": 1.2125585203829442e-06, - "loss": 1.0204, - "step": 5322 - }, - { - "epoch": 0.6400529068718812, - "grad_norm": 5.376448502644372, - "learning_rate": 1.211842525941946e-06, - "loss": 0.9706, - "step": 5323 - }, - { - "epoch": 0.6401731497625203, - "grad_norm": 1.80390966257307, - "learning_rate": 1.2111266510638105e-06, - "loss": 1.0243, - "step": 5324 - }, - { - "epoch": 0.6402933926531594, - "grad_norm": 1.7041221289500343, - "learning_rate": 1.2104108958571346e-06, - "loss": 1.0303, - "step": 5325 - }, - { - "epoch": 0.6404136355437985, - "grad_norm": 1.4453976167926537, - "learning_rate": 1.2096952604304975e-06, - "loss": 0.9866, - "step": 5326 - }, - { - "epoch": 0.6405338784344375, - "grad_norm": 3.3751760366236074, - "learning_rate": 1.2089797448924616e-06, - "loss": 0.9309, - "step": 5327 - }, - { - "epoch": 0.6406541213250767, - "grad_norm": 2.022604752552111, - "learning_rate": 1.2082643493515692e-06, - "loss": 0.8867, - "step": 5328 - }, - { - "epoch": 0.6407743642157158, - "grad_norm": 8.146573110356453, - "learning_rate": 1.207549073916346e-06, - "loss": 1.0449, - "step": 5329 - }, - { - "epoch": 0.6408946071063548, - "grad_norm": 2.058341023460365, - "learning_rate": 1.2068339186952976e-06, - "loss": 1.014, - "step": 5330 - }, - { - "epoch": 0.6410148499969939, - "grad_norm": 1.9504607683278357, - "learning_rate": 1.2061188837969136e-06, - "loss": 0.9538, - "step": 5331 - }, - { - "epoch": 0.641135092887633, - "grad_norm": 2.2863034229909163, - "learning_rate": 1.2054039693296631e-06, - "loss": 1.0736, - "step": 5332 - }, - { - "epoch": 0.6412553357782721, - "grad_norm": 2.0372013645487375, - "learning_rate": 1.2046891754019992e-06, - "loss": 1.0433, - "step": 5333 - }, - { - "epoch": 0.6413755786689112, - "grad_norm": 3.1447826593014603, - "learning_rate": 1.2039745021223548e-06, - "loss": 1.0522, - "step": 5334 - }, - { - "epoch": 0.6414958215595503, - "grad_norm": 0.9284259346800494, - "learning_rate": 1.2032599495991456e-06, - "loss": 0.8514, - "step": 5335 - }, - { - "epoch": 0.6416160644501894, - "grad_norm": 1.8853658244266267, - "learning_rate": 1.2025455179407685e-06, - "loss": 0.9245, - "step": 5336 - }, - { - "epoch": 0.6417363073408284, - "grad_norm": 2.224435854768296, - "learning_rate": 1.2018312072556022e-06, - "loss": 0.9671, - "step": 5337 - }, - { - "epoch": 0.6418565502314676, - "grad_norm": 3.349231255990472, - "learning_rate": 1.2011170176520077e-06, - "loss": 0.9754, - "step": 5338 - }, - { - "epoch": 0.6419767931221066, - "grad_norm": 1.6548656220062683, - "learning_rate": 1.2004029492383256e-06, - "loss": 1.0416, - "step": 5339 - }, - { - "epoch": 0.6420970360127457, - "grad_norm": 1.7486453262643171, - "learning_rate": 1.1996890021228814e-06, - "loss": 0.9699, - "step": 5340 - }, - { - "epoch": 0.6422172789033849, - "grad_norm": 1.5622997918608443, - "learning_rate": 1.1989751764139785e-06, - "loss": 0.9294, - "step": 5341 - }, - { - "epoch": 0.6423375217940239, - "grad_norm": 1.7895590413775941, - "learning_rate": 1.1982614722199044e-06, - "loss": 1.0535, - "step": 5342 - }, - { - "epoch": 0.642457764684663, - "grad_norm": 2.5538593781546925, - "learning_rate": 1.1975478896489276e-06, - "loss": 1.0064, - "step": 5343 - }, - { - "epoch": 0.6425780075753021, - "grad_norm": 2.2745892206262455, - "learning_rate": 1.1968344288092981e-06, - "loss": 0.9936, - "step": 5344 - }, - { - "epoch": 0.6426982504659412, - "grad_norm": 1.90070399242204, - "learning_rate": 1.1961210898092468e-06, - "loss": 0.8742, - "step": 5345 - }, - { - "epoch": 0.6428184933565803, - "grad_norm": 2.694808780206388, - "learning_rate": 1.1954078727569874e-06, - "loss": 1.0272, - "step": 5346 - }, - { - "epoch": 0.6429387362472194, - "grad_norm": 1.5676921028319357, - "learning_rate": 1.1946947777607141e-06, - "loss": 1.0142, - "step": 5347 - }, - { - "epoch": 0.6430589791378585, - "grad_norm": 1.7724443923556303, - "learning_rate": 1.1939818049286024e-06, - "loss": 1.0273, - "step": 5348 - }, - { - "epoch": 0.6431792220284975, - "grad_norm": 1.714186766300607, - "learning_rate": 1.1932689543688101e-06, - "loss": 0.9714, - "step": 5349 - }, - { - "epoch": 0.6432994649191367, - "grad_norm": 4.136762321610122, - "learning_rate": 1.1925562261894756e-06, - "loss": 0.9533, - "step": 5350 - }, - { - "epoch": 0.6434197078097758, - "grad_norm": 2.672811600564435, - "learning_rate": 1.1918436204987207e-06, - "loss": 1.0073, - "step": 5351 - }, - { - "epoch": 0.6435399507004148, - "grad_norm": 8.381353445076103, - "learning_rate": 1.191131137404645e-06, - "loss": 1.0475, - "step": 5352 - }, - { - "epoch": 0.643660193591054, - "grad_norm": 1.8948014214173539, - "learning_rate": 1.190418777015333e-06, - "loss": 0.9977, - "step": 5353 - }, - { - "epoch": 0.643780436481693, - "grad_norm": 1.9476887846737656, - "learning_rate": 1.1897065394388487e-06, - "loss": 0.9666, - "step": 5354 - }, - { - "epoch": 0.6439006793723321, - "grad_norm": 1.5204980907484646, - "learning_rate": 1.1889944247832385e-06, - "loss": 0.994, - "step": 5355 - }, - { - "epoch": 0.6440209222629713, - "grad_norm": 3.7166352229363446, - "learning_rate": 1.1882824331565283e-06, - "loss": 0.9313, - "step": 5356 - }, - { - "epoch": 0.6441411651536103, - "grad_norm": 2.379206696377882, - "learning_rate": 1.1875705646667287e-06, - "loss": 1.1189, - "step": 5357 - }, - { - "epoch": 0.6442614080442494, - "grad_norm": 2.11550081640817, - "learning_rate": 1.1868588194218282e-06, - "loss": 0.981, - "step": 5358 - }, - { - "epoch": 0.6443816509348885, - "grad_norm": 1.6476586838799665, - "learning_rate": 1.1861471975297979e-06, - "loss": 0.9639, - "step": 5359 - }, - { - "epoch": 0.6445018938255276, - "grad_norm": 1.6402477012214662, - "learning_rate": 1.185435699098591e-06, - "loss": 0.9372, - "step": 5360 - }, - { - "epoch": 0.6446221367161666, - "grad_norm": 2.974154081210201, - "learning_rate": 1.1847243242361403e-06, - "loss": 1.0068, - "step": 5361 - }, - { - "epoch": 0.6447423796068057, - "grad_norm": 2.8533416632355406, - "learning_rate": 1.1840130730503624e-06, - "loss": 1.0151, - "step": 5362 - }, - { - "epoch": 0.6448626224974449, - "grad_norm": 2.084322872396516, - "learning_rate": 1.1833019456491518e-06, - "loss": 0.9847, - "step": 5363 - }, - { - "epoch": 0.6449828653880839, - "grad_norm": 2.0462490609830404, - "learning_rate": 1.1825909421403871e-06, - "loss": 1.0094, - "step": 5364 - }, - { - "epoch": 0.645103108278723, - "grad_norm": 2.045943536010993, - "learning_rate": 1.181880062631926e-06, - "loss": 0.985, - "step": 5365 - }, - { - "epoch": 0.6452233511693621, - "grad_norm": 2.402081631882907, - "learning_rate": 1.1811693072316093e-06, - "loss": 1.0808, - "step": 5366 - }, - { - "epoch": 0.6453435940600012, - "grad_norm": 2.2827076892434675, - "learning_rate": 1.1804586760472574e-06, - "loss": 1.0586, - "step": 5367 - }, - { - "epoch": 0.6454638369506402, - "grad_norm": 2.082277780791548, - "learning_rate": 1.1797481691866736e-06, - "loss": 1.0213, - "step": 5368 - }, - { - "epoch": 0.6455840798412794, - "grad_norm": 2.751812597603016, - "learning_rate": 1.1790377867576393e-06, - "loss": 1.0541, - "step": 5369 - }, - { - "epoch": 0.6457043227319185, - "grad_norm": 1.7790488312030481, - "learning_rate": 1.1783275288679203e-06, - "loss": 0.9937, - "step": 5370 - }, - { - "epoch": 0.6458245656225575, - "grad_norm": 0.9411004547229918, - "learning_rate": 1.177617395625262e-06, - "loss": 0.8829, - "step": 5371 - }, - { - "epoch": 0.6459448085131967, - "grad_norm": 1.8962947856766812, - "learning_rate": 1.1769073871373908e-06, - "loss": 0.987, - "step": 5372 - }, - { - "epoch": 0.6460650514038357, - "grad_norm": 14.958379350483419, - "learning_rate": 1.176197503512015e-06, - "loss": 1.0681, - "step": 5373 - }, - { - "epoch": 0.6461852942944748, - "grad_norm": 2.2815870271228325, - "learning_rate": 1.1754877448568223e-06, - "loss": 1.054, - "step": 5374 - }, - { - "epoch": 0.646305537185114, - "grad_norm": 2.7948541426432083, - "learning_rate": 1.1747781112794837e-06, - "loss": 1.1344, - "step": 5375 - }, - { - "epoch": 0.646425780075753, - "grad_norm": 1.5666468713740764, - "learning_rate": 1.1740686028876487e-06, - "loss": 1.0494, - "step": 5376 - }, - { - "epoch": 0.6465460229663921, - "grad_norm": 2.424340353057082, - "learning_rate": 1.1733592197889507e-06, - "loss": 0.9749, - "step": 5377 - }, - { - "epoch": 0.6466662658570312, - "grad_norm": 3.3956271774029436, - "learning_rate": 1.1726499620910014e-06, - "loss": 0.9497, - "step": 5378 - }, - { - "epoch": 0.6467865087476703, - "grad_norm": 2.0443144645813263, - "learning_rate": 1.1719408299013955e-06, - "loss": 0.9894, - "step": 5379 - }, - { - "epoch": 0.6469067516383094, - "grad_norm": 2.858246961202468, - "learning_rate": 1.1712318233277067e-06, - "loss": 0.9914, - "step": 5380 - }, - { - "epoch": 0.6470269945289485, - "grad_norm": 0.7542636794890837, - "learning_rate": 1.1705229424774916e-06, - "loss": 0.8237, - "step": 5381 - }, - { - "epoch": 0.6471472374195876, - "grad_norm": 1.6150225462387884, - "learning_rate": 1.1698141874582867e-06, - "loss": 0.8696, - "step": 5382 - }, - { - "epoch": 0.6472674803102266, - "grad_norm": 1.9076087986627006, - "learning_rate": 1.169105558377609e-06, - "loss": 0.954, - "step": 5383 - }, - { - "epoch": 0.6473877232008658, - "grad_norm": 1.9239214414630195, - "learning_rate": 1.1683970553429587e-06, - "loss": 1.0084, - "step": 5384 - }, - { - "epoch": 0.6475079660915048, - "grad_norm": 1.7266211866528471, - "learning_rate": 1.1676886784618128e-06, - "loss": 1.0441, - "step": 5385 - }, - { - "epoch": 0.6476282089821439, - "grad_norm": 2.1522140325376116, - "learning_rate": 1.1669804278416332e-06, - "loss": 1.0627, - "step": 5386 - }, - { - "epoch": 0.6477484518727831, - "grad_norm": 6.057653003607038, - "learning_rate": 1.1662723035898602e-06, - "loss": 0.946, - "step": 5387 - }, - { - "epoch": 0.6478686947634221, - "grad_norm": 1.6223727776209234, - "learning_rate": 1.165564305813915e-06, - "loss": 1.0447, - "step": 5388 - }, - { - "epoch": 0.6479889376540612, - "grad_norm": 2.026557196475628, - "learning_rate": 1.1648564346212019e-06, - "loss": 1.0416, - "step": 5389 - }, - { - "epoch": 0.6481091805447003, - "grad_norm": 1.5438903374146762, - "learning_rate": 1.164148690119104e-06, - "loss": 0.9948, - "step": 5390 - }, - { - "epoch": 0.6482294234353394, - "grad_norm": 1.7316825519135448, - "learning_rate": 1.163441072414985e-06, - "loss": 0.9685, - "step": 5391 - }, - { - "epoch": 0.6483496663259785, - "grad_norm": 1.9303368468167272, - "learning_rate": 1.16273358161619e-06, - "loss": 0.9229, - "step": 5392 - }, - { - "epoch": 0.6484699092166175, - "grad_norm": 1.942770710643413, - "learning_rate": 1.1620262178300446e-06, - "loss": 1.0742, - "step": 5393 - }, - { - "epoch": 0.6485901521072567, - "grad_norm": 1.625440097781093, - "learning_rate": 1.1613189811638563e-06, - "loss": 0.986, - "step": 5394 - }, - { - "epoch": 0.6487103949978957, - "grad_norm": 1.5810309367666737, - "learning_rate": 1.1606118717249117e-06, - "loss": 1.0114, - "step": 5395 - }, - { - "epoch": 0.6488306378885348, - "grad_norm": 1.925617822363185, - "learning_rate": 1.1599048896204787e-06, - "loss": 0.9178, - "step": 5396 - }, - { - "epoch": 0.648950880779174, - "grad_norm": 2.3369018197286913, - "learning_rate": 1.1591980349578061e-06, - "loss": 1.0418, - "step": 5397 - }, - { - "epoch": 0.649071123669813, - "grad_norm": 0.7846014995230981, - "learning_rate": 1.158491307844123e-06, - "loss": 0.8068, - "step": 5398 - }, - { - "epoch": 0.6491913665604521, - "grad_norm": 1.4975994292351866, - "learning_rate": 1.1577847083866387e-06, - "loss": 1.0734, - "step": 5399 - }, - { - "epoch": 0.6493116094510912, - "grad_norm": 2.628387989579752, - "learning_rate": 1.1570782366925453e-06, - "loss": 0.9554, - "step": 5400 - }, - { - "epoch": 0.6494318523417303, - "grad_norm": 1.7372790930300945, - "learning_rate": 1.1563718928690132e-06, - "loss": 0.9869, - "step": 5401 - }, - { - "epoch": 0.6495520952323693, - "grad_norm": 2.3334450581459993, - "learning_rate": 1.1556656770231942e-06, - "loss": 0.9412, - "step": 5402 - }, - { - "epoch": 0.6496723381230085, - "grad_norm": 1.686096667535637, - "learning_rate": 1.1549595892622207e-06, - "loss": 0.9914, - "step": 5403 - }, - { - "epoch": 0.6497925810136476, - "grad_norm": 0.8277921907576845, - "learning_rate": 1.1542536296932047e-06, - "loss": 0.8652, - "step": 5404 - }, - { - "epoch": 0.6499128239042866, - "grad_norm": 1.6811814872083648, - "learning_rate": 1.1535477984232414e-06, - "loss": 0.927, - "step": 5405 - }, - { - "epoch": 0.6500330667949258, - "grad_norm": 2.153068517378707, - "learning_rate": 1.152842095559404e-06, - "loss": 0.9986, - "step": 5406 - }, - { - "epoch": 0.6501533096855648, - "grad_norm": 1.712603728779613, - "learning_rate": 1.1521365212087474e-06, - "loss": 0.9966, - "step": 5407 - }, - { - "epoch": 0.6502735525762039, - "grad_norm": 1.7656714509238183, - "learning_rate": 1.1514310754783062e-06, - "loss": 0.933, - "step": 5408 - }, - { - "epoch": 0.6503937954668431, - "grad_norm": 3.0411498445010867, - "learning_rate": 1.1507257584750964e-06, - "loss": 0.964, - "step": 5409 - }, - { - "epoch": 0.6505140383574821, - "grad_norm": 1.90379149944637, - "learning_rate": 1.150020570306113e-06, - "loss": 1.0009, - "step": 5410 - }, - { - "epoch": 0.6506342812481212, - "grad_norm": 1.7234995328472624, - "learning_rate": 1.1493155110783338e-06, - "loss": 0.9811, - "step": 5411 - }, - { - "epoch": 0.6507545241387603, - "grad_norm": 2.9137645088400803, - "learning_rate": 1.1486105808987155e-06, - "loss": 0.9397, - "step": 5412 - }, - { - "epoch": 0.6508747670293994, - "grad_norm": 1.765259156261037, - "learning_rate": 1.1479057798741947e-06, - "loss": 1.037, - "step": 5413 - }, - { - "epoch": 0.6509950099200384, - "grad_norm": 0.8293054552393541, - "learning_rate": 1.14720110811169e-06, - "loss": 0.8107, - "step": 5414 - }, - { - "epoch": 0.6511152528106776, - "grad_norm": 2.4290722623074585, - "learning_rate": 1.146496565718098e-06, - "loss": 0.9959, - "step": 5415 - }, - { - "epoch": 0.6512354957013167, - "grad_norm": 2.195766849947, - "learning_rate": 1.1457921528002996e-06, - "loss": 0.9827, - "step": 5416 - }, - { - "epoch": 0.6513557385919557, - "grad_norm": 2.89808377063638, - "learning_rate": 1.1450878694651522e-06, - "loss": 0.946, - "step": 5417 - }, - { - "epoch": 0.6514759814825949, - "grad_norm": 2.1196648437153023, - "learning_rate": 1.1443837158194954e-06, - "loss": 0.8487, - "step": 5418 - }, - { - "epoch": 0.651596224373234, - "grad_norm": 7.018290737544932, - "learning_rate": 1.1436796919701484e-06, - "loss": 0.9696, - "step": 5419 - }, - { - "epoch": 0.651716467263873, - "grad_norm": 1.973023635343733, - "learning_rate": 1.1429757980239115e-06, - "loss": 0.8487, - "step": 5420 - }, - { - "epoch": 0.6518367101545122, - "grad_norm": 2.463529257444026, - "learning_rate": 1.1422720340875636e-06, - "loss": 1.0381, - "step": 5421 - }, - { - "epoch": 0.6519569530451512, - "grad_norm": 2.6549030254635437, - "learning_rate": 1.1415684002678671e-06, - "loss": 1.0207, - "step": 5422 - }, - { - "epoch": 0.6520771959357903, - "grad_norm": 2.2557157396439367, - "learning_rate": 1.1408648966715617e-06, - "loss": 1.0171, - "step": 5423 - }, - { - "epoch": 0.6521974388264293, - "grad_norm": 1.8932718082402535, - "learning_rate": 1.1401615234053683e-06, - "loss": 0.9512, - "step": 5424 - }, - { - "epoch": 0.6523176817170685, - "grad_norm": 2.7306823210354456, - "learning_rate": 1.1394582805759885e-06, - "loss": 0.9845, - "step": 5425 - }, - { - "epoch": 0.6524379246077076, - "grad_norm": 1.8386941545772473, - "learning_rate": 1.1387551682901022e-06, - "loss": 0.9862, - "step": 5426 - }, - { - "epoch": 0.6525581674983466, - "grad_norm": 2.157664733291028, - "learning_rate": 1.138052186654373e-06, - "loss": 0.9345, - "step": 5427 - }, - { - "epoch": 0.6526784103889858, - "grad_norm": 2.320497447648029, - "learning_rate": 1.1373493357754417e-06, - "loss": 1.1109, - "step": 5428 - }, - { - "epoch": 0.6527986532796248, - "grad_norm": 1.9751298861709028, - "learning_rate": 1.1366466157599303e-06, - "loss": 0.9991, - "step": 5429 - }, - { - "epoch": 0.6529188961702639, - "grad_norm": 2.195024247437458, - "learning_rate": 1.1359440267144412e-06, - "loss": 0.9875, - "step": 5430 - }, - { - "epoch": 0.653039139060903, - "grad_norm": 2.132511550854149, - "learning_rate": 1.1352415687455556e-06, - "loss": 0.9725, - "step": 5431 - }, - { - "epoch": 0.6531593819515421, - "grad_norm": 3.3650093556879135, - "learning_rate": 1.1345392419598362e-06, - "loss": 0.8747, - "step": 5432 - }, - { - "epoch": 0.6532796248421812, - "grad_norm": 2.19172576370145, - "learning_rate": 1.1338370464638263e-06, - "loss": 0.942, - "step": 5433 - }, - { - "epoch": 0.6533998677328203, - "grad_norm": 2.5097942187391102, - "learning_rate": 1.1331349823640474e-06, - "loss": 0.8744, - "step": 5434 - }, - { - "epoch": 0.6535201106234594, - "grad_norm": 2.310107565848351, - "learning_rate": 1.132433049767003e-06, - "loss": 1.012, - "step": 5435 - }, - { - "epoch": 0.6536403535140984, - "grad_norm": 3.5026425875951928, - "learning_rate": 1.1317312487791748e-06, - "loss": 1.0307, - "step": 5436 - }, - { - "epoch": 0.6537605964047376, - "grad_norm": 2.0346458805534025, - "learning_rate": 1.1310295795070253e-06, - "loss": 0.9523, - "step": 5437 - }, - { - "epoch": 0.6538808392953767, - "grad_norm": 3.9299789532978746, - "learning_rate": 1.1303280420569982e-06, - "loss": 1.0372, - "step": 5438 - }, - { - "epoch": 0.6540010821860157, - "grad_norm": 1.8109755188684347, - "learning_rate": 1.1296266365355158e-06, - "loss": 0.9956, - "step": 5439 - }, - { - "epoch": 0.6541213250766549, - "grad_norm": 2.3803038351876085, - "learning_rate": 1.1289253630489806e-06, - "loss": 0.9571, - "step": 5440 - }, - { - "epoch": 0.6542415679672939, - "grad_norm": 4.132507889884899, - "learning_rate": 1.1282242217037753e-06, - "loss": 0.9739, - "step": 5441 - }, - { - "epoch": 0.654361810857933, - "grad_norm": 2.420356619071385, - "learning_rate": 1.127523212606262e-06, - "loss": 0.8566, - "step": 5442 - }, - { - "epoch": 0.6544820537485722, - "grad_norm": 1.697187380199818, - "learning_rate": 1.1268223358627835e-06, - "loss": 0.9558, - "step": 5443 - }, - { - "epoch": 0.6546022966392112, - "grad_norm": 1.5951930226449835, - "learning_rate": 1.126121591579663e-06, - "loss": 0.9406, - "step": 5444 - }, - { - "epoch": 0.6547225395298503, - "grad_norm": 2.8032439615954647, - "learning_rate": 1.1254209798632018e-06, - "loss": 0.9167, - "step": 5445 - }, - { - "epoch": 0.6548427824204894, - "grad_norm": 1.6031814951915637, - "learning_rate": 1.124720500819683e-06, - "loss": 1.0779, - "step": 5446 - }, - { - "epoch": 0.6549630253111285, - "grad_norm": 1.8530432337304543, - "learning_rate": 1.1240201545553682e-06, - "loss": 1.0527, - "step": 5447 - }, - { - "epoch": 0.6550832682017675, - "grad_norm": 2.779579775493504, - "learning_rate": 1.1233199411764987e-06, - "loss": 0.9603, - "step": 5448 - }, - { - "epoch": 0.6552035110924067, - "grad_norm": 5.02383767724677, - "learning_rate": 1.1226198607892978e-06, - "loss": 0.9237, - "step": 5449 - }, - { - "epoch": 0.6553237539830458, - "grad_norm": 1.682725014001932, - "learning_rate": 1.1219199134999664e-06, - "loss": 1.0262, - "step": 5450 - }, - { - "epoch": 0.6554439968736848, - "grad_norm": 2.641417976811443, - "learning_rate": 1.1212200994146863e-06, - "loss": 1.0102, - "step": 5451 - }, - { - "epoch": 0.655564239764324, - "grad_norm": 1.709348872959679, - "learning_rate": 1.120520418639618e-06, - "loss": 0.9817, - "step": 5452 - }, - { - "epoch": 0.655684482654963, - "grad_norm": 1.8370485231223894, - "learning_rate": 1.119820871280903e-06, - "loss": 1.0581, - "step": 5453 - }, - { - "epoch": 0.6558047255456021, - "grad_norm": 1.7253089600457465, - "learning_rate": 1.1191214574446614e-06, - "loss": 0.9632, - "step": 5454 - }, - { - "epoch": 0.6559249684362413, - "grad_norm": 1.7934936552932514, - "learning_rate": 1.118422177236995e-06, - "loss": 1.0271, - "step": 5455 - }, - { - "epoch": 0.6560452113268803, - "grad_norm": 2.3423526921972178, - "learning_rate": 1.1177230307639835e-06, - "loss": 1.0828, - "step": 5456 - }, - { - "epoch": 0.6561654542175194, - "grad_norm": 1.7069771275789722, - "learning_rate": 1.1170240181316865e-06, - "loss": 1.0199, - "step": 5457 - }, - { - "epoch": 0.6562856971081584, - "grad_norm": 2.53822683814362, - "learning_rate": 1.1163251394461442e-06, - "loss": 1.0285, - "step": 5458 - }, - { - "epoch": 0.6564059399987976, - "grad_norm": 1.93464684371117, - "learning_rate": 1.1156263948133746e-06, - "loss": 1.0542, - "step": 5459 - }, - { - "epoch": 0.6565261828894366, - "grad_norm": 3.0160778278862743, - "learning_rate": 1.1149277843393787e-06, - "loss": 1.0152, - "step": 5460 - }, - { - "epoch": 0.6566464257800757, - "grad_norm": 2.1891112796595484, - "learning_rate": 1.1142293081301342e-06, - "loss": 0.8624, - "step": 5461 - }, - { - "epoch": 0.6567666686707149, - "grad_norm": 4.215203723260463, - "learning_rate": 1.1135309662915995e-06, - "loss": 0.906, - "step": 5462 - }, - { - "epoch": 0.6568869115613539, - "grad_norm": 1.8620689174833571, - "learning_rate": 1.112832758929712e-06, - "loss": 0.828, - "step": 5463 - }, - { - "epoch": 0.657007154451993, - "grad_norm": 1.730098572827875, - "learning_rate": 1.11213468615039e-06, - "loss": 0.9714, - "step": 5464 - }, - { - "epoch": 0.6571273973426321, - "grad_norm": 4.681226270383782, - "learning_rate": 1.1114367480595292e-06, - "loss": 0.9873, - "step": 5465 - }, - { - "epoch": 0.6572476402332712, - "grad_norm": 2.830278114429232, - "learning_rate": 1.1107389447630086e-06, - "loss": 1.0469, - "step": 5466 - }, - { - "epoch": 0.6573678831239103, - "grad_norm": 2.7038733708791747, - "learning_rate": 1.1100412763666818e-06, - "loss": 1.0116, - "step": 5467 - }, - { - "epoch": 0.6574881260145494, - "grad_norm": 5.877662217835647, - "learning_rate": 1.1093437429763865e-06, - "loss": 1.0238, - "step": 5468 - }, - { - "epoch": 0.6576083689051885, - "grad_norm": 2.155873583680981, - "learning_rate": 1.1086463446979361e-06, - "loss": 0.9723, - "step": 5469 - }, - { - "epoch": 0.6577286117958275, - "grad_norm": 1.7937815035402827, - "learning_rate": 1.1079490816371277e-06, - "loss": 0.9958, - "step": 5470 - }, - { - "epoch": 0.6578488546864667, - "grad_norm": 3.2221877618752015, - "learning_rate": 1.1072519538997352e-06, - "loss": 0.9654, - "step": 5471 - }, - { - "epoch": 0.6579690975771058, - "grad_norm": 1.9574600328674499, - "learning_rate": 1.1065549615915095e-06, - "loss": 1.0457, - "step": 5472 - }, - { - "epoch": 0.6580893404677448, - "grad_norm": 2.05107768504876, - "learning_rate": 1.105858104818187e-06, - "loss": 1.0035, - "step": 5473 - }, - { - "epoch": 0.658209583358384, - "grad_norm": 5.389323290240937, - "learning_rate": 1.105161383685478e-06, - "loss": 0.9758, - "step": 5474 - }, - { - "epoch": 0.658329826249023, - "grad_norm": 0.7690321954850563, - "learning_rate": 1.1044647982990771e-06, - "loss": 0.8302, - "step": 5475 - }, - { - "epoch": 0.6584500691396621, - "grad_norm": 2.196492594052791, - "learning_rate": 1.1037683487646536e-06, - "loss": 0.8656, - "step": 5476 - }, - { - "epoch": 0.6585703120303013, - "grad_norm": 1.7905566377222575, - "learning_rate": 1.1030720351878583e-06, - "loss": 1.0001, - "step": 5477 - }, - { - "epoch": 0.6586905549209403, - "grad_norm": 1.4167503818385572, - "learning_rate": 1.102375857674323e-06, - "loss": 0.8472, - "step": 5478 - }, - { - "epoch": 0.6588107978115794, - "grad_norm": 1.6970577134595335, - "learning_rate": 1.1016798163296561e-06, - "loss": 1.1331, - "step": 5479 - }, - { - "epoch": 0.6589310407022185, - "grad_norm": 1.8181173817096978, - "learning_rate": 1.1009839112594471e-06, - "loss": 0.8892, - "step": 5480 - }, - { - "epoch": 0.6590512835928576, - "grad_norm": 2.1524686967476474, - "learning_rate": 1.1002881425692638e-06, - "loss": 0.9475, - "step": 5481 - }, - { - "epoch": 0.6591715264834966, - "grad_norm": 1.8216385493270972, - "learning_rate": 1.0995925103646532e-06, - "loss": 0.984, - "step": 5482 - }, - { - "epoch": 0.6592917693741358, - "grad_norm": 2.226758018074063, - "learning_rate": 1.0988970147511437e-06, - "loss": 0.9, - "step": 5483 - }, - { - "epoch": 0.6594120122647749, - "grad_norm": 2.002475398718582, - "learning_rate": 1.0982016558342405e-06, - "loss": 1.0373, - "step": 5484 - }, - { - "epoch": 0.6595322551554139, - "grad_norm": 2.1080556628860463, - "learning_rate": 1.0975064337194291e-06, - "loss": 0.946, - "step": 5485 - }, - { - "epoch": 0.6596524980460531, - "grad_norm": 1.3619932632251672, - "learning_rate": 1.0968113485121743e-06, - "loss": 0.9329, - "step": 5486 - }, - { - "epoch": 0.6597727409366921, - "grad_norm": 1.8129032607194049, - "learning_rate": 1.0961164003179185e-06, - "loss": 1.0287, - "step": 5487 - }, - { - "epoch": 0.6598929838273312, - "grad_norm": 2.0485267090158406, - "learning_rate": 1.0954215892420884e-06, - "loss": 1.0647, - "step": 5488 - }, - { - "epoch": 0.6600132267179702, - "grad_norm": 1.656666973096076, - "learning_rate": 1.094726915390082e-06, - "loss": 0.9371, - "step": 5489 - }, - { - "epoch": 0.6601334696086094, - "grad_norm": 2.186766583245594, - "learning_rate": 1.0940323788672836e-06, - "loss": 0.9254, - "step": 5490 - }, - { - "epoch": 0.6602537124992485, - "grad_norm": 1.5793816124098723, - "learning_rate": 1.0933379797790522e-06, - "loss": 0.9713, - "step": 5491 - }, - { - "epoch": 0.6603739553898875, - "grad_norm": 2.921942399007495, - "learning_rate": 1.0926437182307293e-06, - "loss": 0.9418, - "step": 5492 - }, - { - "epoch": 0.6604941982805267, - "grad_norm": 1.5741780088846389, - "learning_rate": 1.0919495943276338e-06, - "loss": 1.0116, - "step": 5493 - }, - { - "epoch": 0.6606144411711657, - "grad_norm": 2.4790715645466057, - "learning_rate": 1.0912556081750611e-06, - "loss": 0.9916, - "step": 5494 - }, - { - "epoch": 0.6607346840618048, - "grad_norm": 2.0125139461057815, - "learning_rate": 1.0905617598782909e-06, - "loss": 0.9934, - "step": 5495 - }, - { - "epoch": 0.660854926952444, - "grad_norm": 1.954798721332729, - "learning_rate": 1.0898680495425775e-06, - "loss": 1.0449, - "step": 5496 - }, - { - "epoch": 0.660975169843083, - "grad_norm": 1.924487811229598, - "learning_rate": 1.0891744772731594e-06, - "loss": 1.0343, - "step": 5497 - }, - { - "epoch": 0.6610954127337221, - "grad_norm": 1.8575959229377736, - "learning_rate": 1.088481043175248e-06, - "loss": 0.8775, - "step": 5498 - }, - { - "epoch": 0.6612156556243612, - "grad_norm": 1.592624416431751, - "learning_rate": 1.0877877473540368e-06, - "loss": 0.9832, - "step": 5499 - }, - { - "epoch": 0.6613358985150003, - "grad_norm": 2.4759617025590406, - "learning_rate": 1.0870945899147002e-06, - "loss": 0.9551, - "step": 5500 - }, - { - "epoch": 0.6614561414056394, - "grad_norm": 1.9791146187881963, - "learning_rate": 1.0864015709623879e-06, - "loss": 0.9814, - "step": 5501 - }, - { - "epoch": 0.6615763842962785, - "grad_norm": 2.503028751575455, - "learning_rate": 1.0857086906022313e-06, - "loss": 1.0223, - "step": 5502 - }, - { - "epoch": 0.6616966271869176, - "grad_norm": 2.790946516458736, - "learning_rate": 1.0850159489393388e-06, - "loss": 0.9609, - "step": 5503 - }, - { - "epoch": 0.6618168700775566, - "grad_norm": 1.6292660542227304, - "learning_rate": 1.0843233460787992e-06, - "loss": 1.0481, - "step": 5504 - }, - { - "epoch": 0.6619371129681958, - "grad_norm": 1.553803849334407, - "learning_rate": 1.0836308821256805e-06, - "loss": 1.0021, - "step": 5505 - }, - { - "epoch": 0.6620573558588349, - "grad_norm": 2.0717835004806293, - "learning_rate": 1.0829385571850282e-06, - "loss": 1.0103, - "step": 5506 - }, - { - "epoch": 0.6621775987494739, - "grad_norm": 2.473180248307611, - "learning_rate": 1.0822463713618679e-06, - "loss": 1.0669, - "step": 5507 - }, - { - "epoch": 0.6622978416401131, - "grad_norm": 1.8712570979226448, - "learning_rate": 1.0815543247612034e-06, - "loss": 1.0711, - "step": 5508 - }, - { - "epoch": 0.6624180845307521, - "grad_norm": 1.4887129190152546, - "learning_rate": 1.0808624174880168e-06, - "loss": 1.0562, - "step": 5509 - }, - { - "epoch": 0.6625383274213912, - "grad_norm": 1.8099502518703672, - "learning_rate": 1.080170649647272e-06, - "loss": 1.0261, - "step": 5510 - }, - { - "epoch": 0.6626585703120303, - "grad_norm": 1.8107109448146015, - "learning_rate": 1.0794790213439068e-06, - "loss": 0.8976, - "step": 5511 - }, - { - "epoch": 0.6627788132026694, - "grad_norm": 1.9692634034203707, - "learning_rate": 1.078787532682843e-06, - "loss": 1.0102, - "step": 5512 - }, - { - "epoch": 0.6628990560933085, - "grad_norm": 2.2224707769096828, - "learning_rate": 1.0780961837689773e-06, - "loss": 0.989, - "step": 5513 - }, - { - "epoch": 0.6630192989839476, - "grad_norm": 1.4679926279023208, - "learning_rate": 1.0774049747071883e-06, - "loss": 0.9309, - "step": 5514 - }, - { - "epoch": 0.6631395418745867, - "grad_norm": 2.390332671599145, - "learning_rate": 1.076713905602332e-06, - "loss": 0.9124, - "step": 5515 - }, - { - "epoch": 0.6632597847652257, - "grad_norm": 1.8024586707468124, - "learning_rate": 1.07602297655924e-06, - "loss": 1.04, - "step": 5516 - }, - { - "epoch": 0.6633800276558649, - "grad_norm": 1.971332853967176, - "learning_rate": 1.0753321876827292e-06, - "loss": 1.0322, - "step": 5517 - }, - { - "epoch": 0.663500270546504, - "grad_norm": 2.154679873911507, - "learning_rate": 1.0746415390775893e-06, - "loss": 0.9719, - "step": 5518 - }, - { - "epoch": 0.663620513437143, - "grad_norm": 1.6484875422832252, - "learning_rate": 1.0739510308485939e-06, - "loss": 1.0045, - "step": 5519 - }, - { - "epoch": 0.6637407563277821, - "grad_norm": 0.8066309819788131, - "learning_rate": 1.07326066310049e-06, - "loss": 0.8753, - "step": 5520 - }, - { - "epoch": 0.6638609992184212, - "grad_norm": 1.8902612761892845, - "learning_rate": 1.0725704359380059e-06, - "loss": 1.0278, - "step": 5521 - }, - { - "epoch": 0.6639812421090603, - "grad_norm": 1.9727489487424406, - "learning_rate": 1.0718803494658497e-06, - "loss": 0.9471, - "step": 5522 - }, - { - "epoch": 0.6641014849996993, - "grad_norm": 2.046006462504602, - "learning_rate": 1.071190403788707e-06, - "loss": 1.0711, - "step": 5523 - }, - { - "epoch": 0.6642217278903385, - "grad_norm": 1.7814102100803086, - "learning_rate": 1.0705005990112415e-06, - "loss": 0.9773, - "step": 5524 - }, - { - "epoch": 0.6643419707809776, - "grad_norm": 2.807565386750397, - "learning_rate": 1.0698109352380957e-06, - "loss": 0.9722, - "step": 5525 - }, - { - "epoch": 0.6644622136716166, - "grad_norm": 1.6795721679836237, - "learning_rate": 1.0691214125738909e-06, - "loss": 1.0084, - "step": 5526 - }, - { - "epoch": 0.6645824565622558, - "grad_norm": 0.874003080844905, - "learning_rate": 1.0684320311232287e-06, - "loss": 0.8467, - "step": 5527 - }, - { - "epoch": 0.6647026994528948, - "grad_norm": 2.279599258775351, - "learning_rate": 1.0677427909906865e-06, - "loss": 1.0339, - "step": 5528 - }, - { - "epoch": 0.6648229423435339, - "grad_norm": 1.6720220198116733, - "learning_rate": 1.0670536922808216e-06, - "loss": 0.9435, - "step": 5529 - }, - { - "epoch": 0.6649431852341731, - "grad_norm": 2.1165441682867367, - "learning_rate": 1.06636473509817e-06, - "loss": 0.9467, - "step": 5530 - }, - { - "epoch": 0.6650634281248121, - "grad_norm": 2.3001619033220555, - "learning_rate": 1.0656759195472447e-06, - "loss": 1.0379, - "step": 5531 - }, - { - "epoch": 0.6651836710154512, - "grad_norm": 0.8508596757204027, - "learning_rate": 1.0649872457325414e-06, - "loss": 0.8747, - "step": 5532 - }, - { - "epoch": 0.6653039139060903, - "grad_norm": 0.9008353700947093, - "learning_rate": 1.0642987137585278e-06, - "loss": 0.8464, - "step": 5533 - }, - { - "epoch": 0.6654241567967294, - "grad_norm": 2.0108657361721427, - "learning_rate": 1.0636103237296561e-06, - "loss": 1.0518, - "step": 5534 - }, - { - "epoch": 0.6655443996873684, - "grad_norm": 1.5934207800619942, - "learning_rate": 1.062922075750353e-06, - "loss": 1.0674, - "step": 5535 - }, - { - "epoch": 0.6656646425780076, - "grad_norm": 2.234449692124605, - "learning_rate": 1.0622339699250267e-06, - "loss": 0.9502, - "step": 5536 - }, - { - "epoch": 0.6657848854686467, - "grad_norm": 1.5710091634977255, - "learning_rate": 1.0615460063580624e-06, - "loss": 1.0231, - "step": 5537 - }, - { - "epoch": 0.6659051283592857, - "grad_norm": 2.1190389160500036, - "learning_rate": 1.060858185153821e-06, - "loss": 0.9614, - "step": 5538 - }, - { - "epoch": 0.6660253712499249, - "grad_norm": 2.619253225843215, - "learning_rate": 1.0601705064166474e-06, - "loss": 0.9935, - "step": 5539 - }, - { - "epoch": 0.666145614140564, - "grad_norm": 4.693773674428998, - "learning_rate": 1.0594829702508596e-06, - "loss": 0.9624, - "step": 5540 - }, - { - "epoch": 0.666265857031203, - "grad_norm": 1.678087616971156, - "learning_rate": 1.0587955767607592e-06, - "loss": 0.7819, - "step": 5541 - }, - { - "epoch": 0.6663860999218422, - "grad_norm": 3.3328099475677346, - "learning_rate": 1.0581083260506206e-06, - "loss": 1.0054, - "step": 5542 - }, - { - "epoch": 0.6665063428124812, - "grad_norm": 2.1300075097466555, - "learning_rate": 1.0574212182246993e-06, - "loss": 0.9942, - "step": 5543 - }, - { - "epoch": 0.6666265857031203, - "grad_norm": 2.2980643586059912, - "learning_rate": 1.0567342533872303e-06, - "loss": 0.9807, - "step": 5544 - }, - { - "epoch": 0.6667468285937594, - "grad_norm": 1.5332627594642532, - "learning_rate": 1.0560474316424255e-06, - "loss": 1.043, - "step": 5545 - }, - { - "epoch": 0.6668670714843985, - "grad_norm": 2.6771056969644897, - "learning_rate": 1.0553607530944746e-06, - "loss": 0.9702, - "step": 5546 - }, - { - "epoch": 0.6669873143750376, - "grad_norm": 1.8259260205912284, - "learning_rate": 1.0546742178475463e-06, - "loss": 1.1196, - "step": 5547 - }, - { - "epoch": 0.6671075572656767, - "grad_norm": 1.7984449079876343, - "learning_rate": 1.0539878260057868e-06, - "loss": 1.0972, - "step": 5548 - }, - { - "epoch": 0.6672278001563158, - "grad_norm": 2.7031478439865206, - "learning_rate": 1.0533015776733226e-06, - "loss": 0.9112, - "step": 5549 - }, - { - "epoch": 0.6673480430469548, - "grad_norm": 2.53315399522279, - "learning_rate": 1.0526154729542566e-06, - "loss": 1.0117, - "step": 5550 - }, - { - "epoch": 0.6674682859375939, - "grad_norm": 6.578982045935541, - "learning_rate": 1.0519295119526699e-06, - "loss": 1.0317, - "step": 5551 - }, - { - "epoch": 0.667588528828233, - "grad_norm": 1.613795311703114, - "learning_rate": 1.0512436947726227e-06, - "loss": 1.0588, - "step": 5552 - }, - { - "epoch": 0.6677087717188721, - "grad_norm": 2.604858235402493, - "learning_rate": 1.0505580215181517e-06, - "loss": 0.8846, - "step": 5553 - }, - { - "epoch": 0.6678290146095112, - "grad_norm": 0.8184552557593385, - "learning_rate": 1.0498724922932753e-06, - "loss": 0.8237, - "step": 5554 - }, - { - "epoch": 0.6679492575001503, - "grad_norm": 1.9988078498222044, - "learning_rate": 1.0491871072019851e-06, - "loss": 1.0882, - "step": 5555 - }, - { - "epoch": 0.6680695003907894, - "grad_norm": 1.8983860087377875, - "learning_rate": 1.0485018663482555e-06, - "loss": 0.8661, - "step": 5556 - }, - { - "epoch": 0.6681897432814284, - "grad_norm": 3.665187118145941, - "learning_rate": 1.0478167698360354e-06, - "loss": 0.9429, - "step": 5557 - }, - { - "epoch": 0.6683099861720676, - "grad_norm": 2.3943024319072586, - "learning_rate": 1.0471318177692556e-06, - "loss": 0.926, - "step": 5558 - }, - { - "epoch": 0.6684302290627067, - "grad_norm": 2.174841960558459, - "learning_rate": 1.046447010251821e-06, - "loss": 0.9803, - "step": 5559 - }, - { - "epoch": 0.6685504719533457, - "grad_norm": 2.0988413838887685, - "learning_rate": 1.0457623473876157e-06, - "loss": 0.9882, - "step": 5560 - }, - { - "epoch": 0.6686707148439849, - "grad_norm": 1.8777892664771718, - "learning_rate": 1.0450778292805046e-06, - "loss": 0.932, - "step": 5561 - }, - { - "epoch": 0.6687909577346239, - "grad_norm": 1.7526963442575985, - "learning_rate": 1.0443934560343267e-06, - "loss": 1.0216, - "step": 5562 - }, - { - "epoch": 0.668911200625263, - "grad_norm": 2.064839741027843, - "learning_rate": 1.0437092277529034e-06, - "loss": 1.0028, - "step": 5563 - }, - { - "epoch": 0.6690314435159022, - "grad_norm": 2.141070577093378, - "learning_rate": 1.0430251445400292e-06, - "loss": 0.9713, - "step": 5564 - }, - { - "epoch": 0.6691516864065412, - "grad_norm": 2.1913489131940374, - "learning_rate": 1.0423412064994787e-06, - "loss": 0.8587, - "step": 5565 - }, - { - "epoch": 0.6692719292971803, - "grad_norm": 1.8671466391972604, - "learning_rate": 1.0416574137350064e-06, - "loss": 0.9714, - "step": 5566 - }, - { - "epoch": 0.6693921721878194, - "grad_norm": 2.378055614607142, - "learning_rate": 1.0409737663503428e-06, - "loss": 1.0484, - "step": 5567 - }, - { - "epoch": 0.6695124150784585, - "grad_norm": 1.7608671646490848, - "learning_rate": 1.040290264449196e-06, - "loss": 1.0685, - "step": 5568 - }, - { - "epoch": 0.6696326579690975, - "grad_norm": 2.563891614464987, - "learning_rate": 1.0396069081352532e-06, - "loss": 0.8638, - "step": 5569 - }, - { - "epoch": 0.6697529008597367, - "grad_norm": 0.8255716943967443, - "learning_rate": 1.0389236975121782e-06, - "loss": 0.8262, - "step": 5570 - }, - { - "epoch": 0.6698731437503758, - "grad_norm": 2.0055688575216615, - "learning_rate": 1.0382406326836147e-06, - "loss": 0.9431, - "step": 5571 - }, - { - "epoch": 0.6699933866410148, - "grad_norm": 2.781484872565996, - "learning_rate": 1.0375577137531828e-06, - "loss": 0.9755, - "step": 5572 - }, - { - "epoch": 0.670113629531654, - "grad_norm": 1.6864076540398671, - "learning_rate": 1.0368749408244802e-06, - "loss": 0.9462, - "step": 5573 - }, - { - "epoch": 0.670233872422293, - "grad_norm": 1.7125170658124411, - "learning_rate": 1.0361923140010836e-06, - "loss": 1.0132, - "step": 5574 - }, - { - "epoch": 0.6703541153129321, - "grad_norm": 2.3858564101993043, - "learning_rate": 1.0355098333865455e-06, - "loss": 0.8728, - "step": 5575 - }, - { - "epoch": 0.6704743582035713, - "grad_norm": 1.565885157489697, - "learning_rate": 1.0348274990844006e-06, - "loss": 0.9227, - "step": 5576 - }, - { - "epoch": 0.6705946010942103, - "grad_norm": 1.5965167694501359, - "learning_rate": 1.034145311198155e-06, - "loss": 0.9517, - "step": 5577 - }, - { - "epoch": 0.6707148439848494, - "grad_norm": 1.6073303567412345, - "learning_rate": 1.0334632698312989e-06, - "loss": 0.8673, - "step": 5578 - }, - { - "epoch": 0.6708350868754885, - "grad_norm": 1.7953837965710133, - "learning_rate": 1.032781375087295e-06, - "loss": 0.9853, - "step": 5579 - }, - { - "epoch": 0.6709553297661276, - "grad_norm": 1.3647313753538846, - "learning_rate": 1.0320996270695891e-06, - "loss": 0.901, - "step": 5580 - }, - { - "epoch": 0.6710755726567667, - "grad_norm": 1.7960805712428372, - "learning_rate": 1.0314180258815998e-06, - "loss": 0.9632, - "step": 5581 - }, - { - "epoch": 0.6711958155474057, - "grad_norm": 2.2270466596000285, - "learning_rate": 1.0307365716267247e-06, - "loss": 0.9762, - "step": 5582 - }, - { - "epoch": 0.6713160584380449, - "grad_norm": 2.145547150228244, - "learning_rate": 1.0300552644083423e-06, - "loss": 1.0131, - "step": 5583 - }, - { - "epoch": 0.6714363013286839, - "grad_norm": 3.4262452806275356, - "learning_rate": 1.0293741043298036e-06, - "loss": 0.9619, - "step": 5584 - }, - { - "epoch": 0.671556544219323, - "grad_norm": 2.892395796815258, - "learning_rate": 1.0286930914944436e-06, - "loss": 0.9478, - "step": 5585 - }, - { - "epoch": 0.6716767871099621, - "grad_norm": 2.2414849539734054, - "learning_rate": 1.0280122260055684e-06, - "loss": 1.007, - "step": 5586 - }, - { - "epoch": 0.6717970300006012, - "grad_norm": 40.15483642940007, - "learning_rate": 1.0273315079664652e-06, - "loss": 1.0491, - "step": 5587 - }, - { - "epoch": 0.6719172728912403, - "grad_norm": 2.358158328389277, - "learning_rate": 1.0266509374803992e-06, - "loss": 0.9807, - "step": 5588 - }, - { - "epoch": 0.6720375157818794, - "grad_norm": 2.4373861432993853, - "learning_rate": 1.0259705146506123e-06, - "loss": 1.0706, - "step": 5589 - }, - { - "epoch": 0.6721577586725185, - "grad_norm": 2.1246119874242764, - "learning_rate": 1.025290239580324e-06, - "loss": 1.0018, - "step": 5590 - }, - { - "epoch": 0.6722780015631575, - "grad_norm": 2.68648492140011, - "learning_rate": 1.0246101123727313e-06, - "loss": 0.9861, - "step": 5591 - }, - { - "epoch": 0.6723982444537967, - "grad_norm": 1.8873166860526505, - "learning_rate": 1.0239301331310085e-06, - "loss": 1.0161, - "step": 5592 - }, - { - "epoch": 0.6725184873444358, - "grad_norm": 1.907575677892866, - "learning_rate": 1.0232503019583088e-06, - "loss": 1.112, - "step": 5593 - }, - { - "epoch": 0.6726387302350748, - "grad_norm": 1.8996028384661598, - "learning_rate": 1.0225706189577619e-06, - "loss": 0.9297, - "step": 5594 - }, - { - "epoch": 0.672758973125714, - "grad_norm": 6.36472584699623, - "learning_rate": 1.021891084232475e-06, - "loss": 0.9752, - "step": 5595 - }, - { - "epoch": 0.672879216016353, - "grad_norm": 2.380829232755002, - "learning_rate": 1.0212116978855325e-06, - "loss": 1.0295, - "step": 5596 - }, - { - "epoch": 0.6729994589069921, - "grad_norm": 3.241833918031522, - "learning_rate": 1.020532460019997e-06, - "loss": 1.0225, - "step": 5597 - }, - { - "epoch": 0.6731197017976313, - "grad_norm": 1.7042795170335692, - "learning_rate": 1.0198533707389096e-06, - "loss": 0.9301, - "step": 5598 - }, - { - "epoch": 0.6732399446882703, - "grad_norm": 2.032604646135459, - "learning_rate": 1.0191744301452853e-06, - "loss": 0.9639, - "step": 5599 - }, - { - "epoch": 0.6733601875789094, - "grad_norm": 1.6245665338897843, - "learning_rate": 1.0184956383421208e-06, - "loss": 0.9319, - "step": 5600 - }, - { - "epoch": 0.6734804304695485, - "grad_norm": 2.3152393920905863, - "learning_rate": 1.017816995432387e-06, - "loss": 0.8804, - "step": 5601 - }, - { - "epoch": 0.6736006733601876, - "grad_norm": 2.0921631678447623, - "learning_rate": 1.0171385015190353e-06, - "loss": 0.9706, - "step": 5602 - }, - { - "epoch": 0.6737209162508266, - "grad_norm": 2.1310483462337486, - "learning_rate": 1.0164601567049908e-06, - "loss": 0.9584, - "step": 5603 - }, - { - "epoch": 0.6738411591414658, - "grad_norm": 2.790519953306596, - "learning_rate": 1.015781961093158e-06, - "loss": 1.0303, - "step": 5604 - }, - { - "epoch": 0.6739614020321049, - "grad_norm": 1.748185170419794, - "learning_rate": 1.0151039147864197e-06, - "loss": 1.003, - "step": 5605 - }, - { - "epoch": 0.6740816449227439, - "grad_norm": 2.213270163247415, - "learning_rate": 1.0144260178876336e-06, - "loss": 0.8995, - "step": 5606 - }, - { - "epoch": 0.6742018878133831, - "grad_norm": 2.1201222479859574, - "learning_rate": 1.0137482704996388e-06, - "loss": 0.9006, - "step": 5607 - }, - { - "epoch": 0.6743221307040221, - "grad_norm": 2.356564293903578, - "learning_rate": 1.0130706727252461e-06, - "loss": 1.0248, - "step": 5608 - }, - { - "epoch": 0.6744423735946612, - "grad_norm": 2.2892720513885094, - "learning_rate": 1.0123932246672468e-06, - "loss": 0.908, - "step": 5609 - }, - { - "epoch": 0.6745626164853004, - "grad_norm": 0.7927959235010515, - "learning_rate": 1.0117159264284114e-06, - "loss": 0.8149, - "step": 5610 - }, - { - "epoch": 0.6746828593759394, - "grad_norm": 1.951748294095173, - "learning_rate": 1.0110387781114837e-06, - "loss": 1.0, - "step": 5611 - }, - { - "epoch": 0.6748031022665785, - "grad_norm": 1.9459801566081603, - "learning_rate": 1.0103617798191872e-06, - "loss": 0.998, - "step": 5612 - }, - { - "epoch": 0.6749233451572175, - "grad_norm": 5.576728368137737, - "learning_rate": 1.0096849316542217e-06, - "loss": 1.0509, - "step": 5613 - }, - { - "epoch": 0.6750435880478567, - "grad_norm": 5.730479400978513, - "learning_rate": 1.0090082337192643e-06, - "loss": 0.9742, - "step": 5614 - }, - { - "epoch": 0.6751638309384957, - "grad_norm": 3.2388905134973074, - "learning_rate": 1.0083316861169705e-06, - "loss": 1.0091, - "step": 5615 - }, - { - "epoch": 0.6752840738291348, - "grad_norm": 2.1525048058929137, - "learning_rate": 1.0076552889499713e-06, - "loss": 0.9524, - "step": 5616 - }, - { - "epoch": 0.675404316719774, - "grad_norm": 1.8115924009784086, - "learning_rate": 1.006979042320876e-06, - "loss": 0.9609, - "step": 5617 - }, - { - "epoch": 0.675524559610413, - "grad_norm": 2.3246503669612943, - "learning_rate": 1.0063029463322702e-06, - "loss": 0.8643, - "step": 5618 - }, - { - "epoch": 0.6756448025010521, - "grad_norm": 2.278492031174722, - "learning_rate": 1.0056270010867164e-06, - "loss": 0.9781, - "step": 5619 - }, - { - "epoch": 0.6757650453916912, - "grad_norm": 2.906271001268206, - "learning_rate": 1.004951206686758e-06, - "loss": 1.0075, - "step": 5620 - }, - { - "epoch": 0.6758852882823303, - "grad_norm": 2.0236924647822856, - "learning_rate": 1.0042755632349087e-06, - "loss": 0.9462, - "step": 5621 - }, - { - "epoch": 0.6760055311729694, - "grad_norm": 6.2929426840319875, - "learning_rate": 1.0036000708336653e-06, - "loss": 0.8582, - "step": 5622 - }, - { - "epoch": 0.6761257740636085, - "grad_norm": 3.945187768758506, - "learning_rate": 1.0029247295854984e-06, - "loss": 1.0261, - "step": 5623 - }, - { - "epoch": 0.6762460169542476, - "grad_norm": 2.9248910932562375, - "learning_rate": 1.0022495395928588e-06, - "loss": 0.9456, - "step": 5624 - }, - { - "epoch": 0.6763662598448866, - "grad_norm": 0.8096263760249789, - "learning_rate": 1.0015745009581697e-06, - "loss": 0.8709, - "step": 5625 - }, - { - "epoch": 0.6764865027355258, - "grad_norm": 2.1079065933886674, - "learning_rate": 1.0008996137838343e-06, - "loss": 0.8957, - "step": 5626 - }, - { - "epoch": 0.6766067456261649, - "grad_norm": 1.8967641983476229, - "learning_rate": 1.000224878172234e-06, - "loss": 1.0285, - "step": 5627 - }, - { - "epoch": 0.6767269885168039, - "grad_norm": 2.015004262044105, - "learning_rate": 9.99550294225724e-07, - "loss": 0.9631, - "step": 5628 - }, - { - "epoch": 0.6768472314074431, - "grad_norm": 1.9033684134827518, - "learning_rate": 9.988758620466402e-07, - "loss": 0.9428, - "step": 5629 - }, - { - "epoch": 0.6769674742980821, - "grad_norm": 1.5040636649048997, - "learning_rate": 9.982015817372917e-07, - "loss": 0.9903, - "step": 5630 - }, - { - "epoch": 0.6770877171887212, - "grad_norm": 2.0514356749730775, - "learning_rate": 9.975274533999657e-07, - "loss": 1.0496, - "step": 5631 - }, - { - "epoch": 0.6772079600793603, - "grad_norm": 2.8795627378613053, - "learning_rate": 9.96853477136929e-07, - "loss": 1.0678, - "step": 5632 - }, - { - "epoch": 0.6773282029699994, - "grad_norm": 9.127040687106723, - "learning_rate": 9.96179653050422e-07, - "loss": 0.9702, - "step": 5633 - }, - { - "epoch": 0.6774484458606385, - "grad_norm": 2.05714472636259, - "learning_rate": 9.955059812426635e-07, - "loss": 0.9659, - "step": 5634 - }, - { - "epoch": 0.6775686887512776, - "grad_norm": 1.893672913848845, - "learning_rate": 9.948324618158493e-07, - "loss": 1.0527, - "step": 5635 - }, - { - "epoch": 0.6776889316419167, - "grad_norm": 6.340297717977279, - "learning_rate": 9.941590948721502e-07, - "loss": 1.0059, - "step": 5636 - }, - { - "epoch": 0.6778091745325557, - "grad_norm": 3.3809987198681397, - "learning_rate": 9.934858805137188e-07, - "loss": 0.9892, - "step": 5637 - }, - { - "epoch": 0.6779294174231949, - "grad_norm": 3.9067217732688504, - "learning_rate": 9.92812818842677e-07, - "loss": 1.0415, - "step": 5638 - }, - { - "epoch": 0.678049660313834, - "grad_norm": 2.1086938959334107, - "learning_rate": 9.921399099611306e-07, - "loss": 0.8699, - "step": 5639 - }, - { - "epoch": 0.678169903204473, - "grad_norm": 1.756308628610554, - "learning_rate": 9.914671539711588e-07, - "loss": 0.9238, - "step": 5640 - }, - { - "epoch": 0.6782901460951122, - "grad_norm": 2.79384316581342, - "learning_rate": 9.90794550974817e-07, - "loss": 1.0149, - "step": 5641 - }, - { - "epoch": 0.6784103889857512, - "grad_norm": 2.2339772757430585, - "learning_rate": 9.901221010741407e-07, - "loss": 1.0461, - "step": 5642 - }, - { - "epoch": 0.6785306318763903, - "grad_norm": 1.7966917525022263, - "learning_rate": 9.894498043711375e-07, - "loss": 0.9782, - "step": 5643 - }, - { - "epoch": 0.6786508747670293, - "grad_norm": 1.9698826392791955, - "learning_rate": 9.887776609677962e-07, - "loss": 0.9239, - "step": 5644 - }, - { - "epoch": 0.6787711176576685, - "grad_norm": 2.44370575301365, - "learning_rate": 9.88105670966079e-07, - "loss": 0.9531, - "step": 5645 - }, - { - "epoch": 0.6788913605483076, - "grad_norm": 1.911079417687933, - "learning_rate": 9.874338344679283e-07, - "loss": 1.0161, - "step": 5646 - }, - { - "epoch": 0.6790116034389466, - "grad_norm": 1.8517822264015613, - "learning_rate": 9.86762151575259e-07, - "loss": 0.9732, - "step": 5647 - }, - { - "epoch": 0.6791318463295858, - "grad_norm": 1.621780948447701, - "learning_rate": 9.860906223899651e-07, - "loss": 1.0283, - "step": 5648 - }, - { - "epoch": 0.6792520892202248, - "grad_norm": 2.1288924750486116, - "learning_rate": 9.854192470139184e-07, - "loss": 0.9827, - "step": 5649 - }, - { - "epoch": 0.6793723321108639, - "grad_norm": 2.1017891573724374, - "learning_rate": 9.847480255489645e-07, - "loss": 0.9464, - "step": 5650 - }, - { - "epoch": 0.6794925750015031, - "grad_norm": 1.6620961781570691, - "learning_rate": 9.840769580969295e-07, - "loss": 0.9212, - "step": 5651 - }, - { - "epoch": 0.6796128178921421, - "grad_norm": 4.193678667397021, - "learning_rate": 9.834060447596114e-07, - "loss": 1.0253, - "step": 5652 - }, - { - "epoch": 0.6797330607827812, - "grad_norm": 1.9489923318914646, - "learning_rate": 9.827352856387868e-07, - "loss": 1.0075, - "step": 5653 - }, - { - "epoch": 0.6798533036734203, - "grad_norm": 0.8075538988696794, - "learning_rate": 9.820646808362118e-07, - "loss": 0.8867, - "step": 5654 - }, - { - "epoch": 0.6799735465640594, - "grad_norm": 2.0372612842598854, - "learning_rate": 9.813942304536154e-07, - "loss": 0.9481, - "step": 5655 - }, - { - "epoch": 0.6800937894546984, - "grad_norm": 1.8719996288250287, - "learning_rate": 9.807239345927043e-07, - "loss": 0.8685, - "step": 5656 - }, - { - "epoch": 0.6802140323453376, - "grad_norm": 2.451372820759801, - "learning_rate": 9.80053793355162e-07, - "loss": 0.9588, - "step": 5657 - }, - { - "epoch": 0.6803342752359767, - "grad_norm": 1.9075213085899223, - "learning_rate": 9.793838068426472e-07, - "loss": 0.9724, - "step": 5658 - }, - { - "epoch": 0.6804545181266157, - "grad_norm": 2.0261961053532387, - "learning_rate": 9.78713975156799e-07, - "loss": 0.8359, - "step": 5659 - }, - { - "epoch": 0.6805747610172549, - "grad_norm": 2.3453369871280585, - "learning_rate": 9.780442983992273e-07, - "loss": 0.9496, - "step": 5660 - }, - { - "epoch": 0.680695003907894, - "grad_norm": 1.6012530516357883, - "learning_rate": 9.773747766715238e-07, - "loss": 0.9429, - "step": 5661 - }, - { - "epoch": 0.680815246798533, - "grad_norm": 1.5488158871334379, - "learning_rate": 9.767054100752536e-07, - "loss": 1.032, - "step": 5662 - }, - { - "epoch": 0.6809354896891722, - "grad_norm": 1.7734115531468484, - "learning_rate": 9.760361987119584e-07, - "loss": 1.0381, - "step": 5663 - }, - { - "epoch": 0.6810557325798112, - "grad_norm": 2.1759614989647234, - "learning_rate": 9.753671426831592e-07, - "loss": 0.9101, - "step": 5664 - }, - { - "epoch": 0.6811759754704503, - "grad_norm": 1.904599960856551, - "learning_rate": 9.746982420903483e-07, - "loss": 1.0215, - "step": 5665 - }, - { - "epoch": 0.6812962183610894, - "grad_norm": 1.4354128405220554, - "learning_rate": 9.740294970349993e-07, - "loss": 0.9723, - "step": 5666 - }, - { - "epoch": 0.6814164612517285, - "grad_norm": 0.9785527509700853, - "learning_rate": 9.733609076185594e-07, - "loss": 0.9118, - "step": 5667 - }, - { - "epoch": 0.6815367041423676, - "grad_norm": 1.7828118019202936, - "learning_rate": 9.72692473942455e-07, - "loss": 1.0691, - "step": 5668 - }, - { - "epoch": 0.6816569470330067, - "grad_norm": 2.549081420005142, - "learning_rate": 9.720241961080849e-07, - "loss": 1.008, - "step": 5669 - }, - { - "epoch": 0.6817771899236458, - "grad_norm": 1.8332382813290724, - "learning_rate": 9.713560742168259e-07, - "loss": 0.9472, - "step": 5670 - }, - { - "epoch": 0.6818974328142848, - "grad_norm": 1.881788236352975, - "learning_rate": 9.706881083700333e-07, - "loss": 0.9445, - "step": 5671 - }, - { - "epoch": 0.682017675704924, - "grad_norm": 1.8348393179160951, - "learning_rate": 9.700202986690357e-07, - "loss": 1.0539, - "step": 5672 - }, - { - "epoch": 0.682137918595563, - "grad_norm": 1.9141643886306265, - "learning_rate": 9.693526452151413e-07, - "loss": 0.8879, - "step": 5673 - }, - { - "epoch": 0.6822581614862021, - "grad_norm": 1.6284058165775954, - "learning_rate": 9.686851481096305e-07, - "loss": 0.9892, - "step": 5674 - }, - { - "epoch": 0.6823784043768413, - "grad_norm": 1.8623658816800994, - "learning_rate": 9.68017807453762e-07, - "loss": 0.9588, - "step": 5675 - }, - { - "epoch": 0.6824986472674803, - "grad_norm": 2.0979476527401, - "learning_rate": 9.673506233487721e-07, - "loss": 0.9599, - "step": 5676 - }, - { - "epoch": 0.6826188901581194, - "grad_norm": 1.944640261765827, - "learning_rate": 9.666835958958717e-07, - "loss": 1.0933, - "step": 5677 - }, - { - "epoch": 0.6827391330487584, - "grad_norm": 1.8135657630063093, - "learning_rate": 9.660167251962484e-07, - "loss": 1.0247, - "step": 5678 - }, - { - "epoch": 0.6828593759393976, - "grad_norm": 1.5500470400409536, - "learning_rate": 9.653500113510654e-07, - "loss": 1.0047, - "step": 5679 - }, - { - "epoch": 0.6829796188300367, - "grad_norm": 2.765098306105144, - "learning_rate": 9.646834544614627e-07, - "loss": 0.8977, - "step": 5680 - }, - { - "epoch": 0.6830998617206757, - "grad_norm": 2.135058997989573, - "learning_rate": 9.64017054628558e-07, - "loss": 0.9928, - "step": 5681 - }, - { - "epoch": 0.6832201046113149, - "grad_norm": 1.706183460880455, - "learning_rate": 9.63350811953441e-07, - "loss": 1.0187, - "step": 5682 - }, - { - "epoch": 0.6833403475019539, - "grad_norm": 1.9347469089403018, - "learning_rate": 9.626847265371826e-07, - "loss": 0.9309, - "step": 5683 - }, - { - "epoch": 0.683460590392593, - "grad_norm": 2.355090058663941, - "learning_rate": 9.620187984808262e-07, - "loss": 1.0136, - "step": 5684 - }, - { - "epoch": 0.6835808332832322, - "grad_norm": 1.8752432347521328, - "learning_rate": 9.613530278853919e-07, - "loss": 1.0868, - "step": 5685 - }, - { - "epoch": 0.6837010761738712, - "grad_norm": 1.844028322581579, - "learning_rate": 9.60687414851879e-07, - "loss": 0.9681, - "step": 5686 - }, - { - "epoch": 0.6838213190645103, - "grad_norm": 2.1215130863905407, - "learning_rate": 9.600219594812575e-07, - "loss": 0.9965, - "step": 5687 - }, - { - "epoch": 0.6839415619551494, - "grad_norm": 1.5127196633217983, - "learning_rate": 9.593566618744786e-07, - "loss": 0.9556, - "step": 5688 - }, - { - "epoch": 0.6840618048457885, - "grad_norm": 1.5890359161763965, - "learning_rate": 9.58691522132466e-07, - "loss": 0.9645, - "step": 5689 - }, - { - "epoch": 0.6841820477364275, - "grad_norm": 2.682166018212775, - "learning_rate": 9.58026540356123e-07, - "loss": 1.0707, - "step": 5690 - }, - { - "epoch": 0.6843022906270667, - "grad_norm": 1.6620373648656257, - "learning_rate": 9.573617166463246e-07, - "loss": 1.0953, - "step": 5691 - }, - { - "epoch": 0.6844225335177058, - "grad_norm": 1.831572908437977, - "learning_rate": 9.56697051103924e-07, - "loss": 0.8276, - "step": 5692 - }, - { - "epoch": 0.6845427764083448, - "grad_norm": 9.496573382323719, - "learning_rate": 9.560325438297522e-07, - "loss": 1.0338, - "step": 5693 - }, - { - "epoch": 0.684663019298984, - "grad_norm": 2.279734800709539, - "learning_rate": 9.553681949246127e-07, - "loss": 1.103, - "step": 5694 - }, - { - "epoch": 0.684783262189623, - "grad_norm": 3.086776735171005, - "learning_rate": 9.547040044892886e-07, - "loss": 0.9837, - "step": 5695 - }, - { - "epoch": 0.6849035050802621, - "grad_norm": 0.8955626892115202, - "learning_rate": 9.540399726245354e-07, - "loss": 0.8721, - "step": 5696 - }, - { - "epoch": 0.6850237479709013, - "grad_norm": 2.8501897982926514, - "learning_rate": 9.533760994310859e-07, - "loss": 0.9191, - "step": 5697 - }, - { - "epoch": 0.6851439908615403, - "grad_norm": 2.147246873567956, - "learning_rate": 9.527123850096508e-07, - "loss": 0.9763, - "step": 5698 - }, - { - "epoch": 0.6852642337521794, - "grad_norm": 1.7968204655871933, - "learning_rate": 9.520488294609142e-07, - "loss": 0.9405, - "step": 5699 - }, - { - "epoch": 0.6853844766428185, - "grad_norm": 0.9176958693063133, - "learning_rate": 9.513854328855368e-07, - "loss": 0.8291, - "step": 5700 - }, - { - "epoch": 0.6855047195334576, - "grad_norm": 4.043768555776953, - "learning_rate": 9.507221953841558e-07, - "loss": 1.0411, - "step": 5701 - }, - { - "epoch": 0.6856249624240967, - "grad_norm": 1.6797016941624099, - "learning_rate": 9.500591170573824e-07, - "loss": 1.0058, - "step": 5702 - }, - { - "epoch": 0.6857452053147358, - "grad_norm": 2.203379839503976, - "learning_rate": 9.493961980058078e-07, - "loss": 0.9693, - "step": 5703 - }, - { - "epoch": 0.6858654482053749, - "grad_norm": 2.0388862855266936, - "learning_rate": 9.48733438329993e-07, - "loss": 0.9076, - "step": 5704 - }, - { - "epoch": 0.6859856910960139, - "grad_norm": 1.7316674758546986, - "learning_rate": 9.480708381304807e-07, - "loss": 0.9691, - "step": 5705 - }, - { - "epoch": 0.6861059339866531, - "grad_norm": 2.494350248293798, - "learning_rate": 9.474083975077858e-07, - "loss": 1.0695, - "step": 5706 - }, - { - "epoch": 0.6862261768772921, - "grad_norm": 2.31578181632843, - "learning_rate": 9.467461165623994e-07, - "loss": 1.0306, - "step": 5707 - }, - { - "epoch": 0.6863464197679312, - "grad_norm": 1.8248888373812877, - "learning_rate": 9.46083995394791e-07, - "loss": 1.0218, - "step": 5708 - }, - { - "epoch": 0.6864666626585703, - "grad_norm": 5.295477308565972, - "learning_rate": 9.454220341054012e-07, - "loss": 0.8612, - "step": 5709 - }, - { - "epoch": 0.6865869055492094, - "grad_norm": 2.536298168433759, - "learning_rate": 9.447602327946512e-07, - "loss": 1.035, - "step": 5710 - }, - { - "epoch": 0.6867071484398485, - "grad_norm": 1.7467376409311524, - "learning_rate": 9.440985915629338e-07, - "loss": 0.9987, - "step": 5711 - }, - { - "epoch": 0.6868273913304875, - "grad_norm": 1.7476804892743787, - "learning_rate": 9.434371105106223e-07, - "loss": 0.95, - "step": 5712 - }, - { - "epoch": 0.6869476342211267, - "grad_norm": 3.975944426042634, - "learning_rate": 9.427757897380602e-07, - "loss": 0.9399, - "step": 5713 - }, - { - "epoch": 0.6870678771117658, - "grad_norm": 2.316746910224057, - "learning_rate": 9.421146293455695e-07, - "loss": 1.0809, - "step": 5714 - }, - { - "epoch": 0.6871881200024048, - "grad_norm": 1.6586065262218412, - "learning_rate": 9.414536294334489e-07, - "loss": 0.9117, - "step": 5715 - }, - { - "epoch": 0.687308362893044, - "grad_norm": 1.803677825541191, - "learning_rate": 9.407927901019708e-07, - "loss": 0.9345, - "step": 5716 - }, - { - "epoch": 0.687428605783683, - "grad_norm": 2.2247403368360126, - "learning_rate": 9.401321114513854e-07, - "loss": 1.0008, - "step": 5717 - }, - { - "epoch": 0.6875488486743221, - "grad_norm": 1.8195805858640526, - "learning_rate": 9.394715935819155e-07, - "loss": 0.9855, - "step": 5718 - }, - { - "epoch": 0.6876690915649613, - "grad_norm": 2.020398306149241, - "learning_rate": 9.388112365937608e-07, - "loss": 0.8512, - "step": 5719 - }, - { - "epoch": 0.6877893344556003, - "grad_norm": 2.067420869025612, - "learning_rate": 9.381510405870985e-07, - "loss": 1.0461, - "step": 5720 - }, - { - "epoch": 0.6879095773462394, - "grad_norm": 2.289762943108374, - "learning_rate": 9.374910056620791e-07, - "loss": 1.0044, - "step": 5721 - }, - { - "epoch": 0.6880298202368785, - "grad_norm": 1.6674207729751869, - "learning_rate": 9.368311319188293e-07, - "loss": 1.0449, - "step": 5722 - }, - { - "epoch": 0.6881500631275176, - "grad_norm": 2.2082008705957668, - "learning_rate": 9.361714194574515e-07, - "loss": 1.0289, - "step": 5723 - }, - { - "epoch": 0.6882703060181566, - "grad_norm": 0.7514603622729228, - "learning_rate": 9.355118683780228e-07, - "loss": 0.8294, - "step": 5724 - }, - { - "epoch": 0.6883905489087958, - "grad_norm": 1.9001951518976703, - "learning_rate": 9.348524787805987e-07, - "loss": 1.0246, - "step": 5725 - }, - { - "epoch": 0.6885107917994349, - "grad_norm": 2.473259971161287, - "learning_rate": 9.341932507652053e-07, - "loss": 1.0813, - "step": 5726 - }, - { - "epoch": 0.6886310346900739, - "grad_norm": 2.0851792232409876, - "learning_rate": 9.335341844318489e-07, - "loss": 1.0141, - "step": 5727 - }, - { - "epoch": 0.6887512775807131, - "grad_norm": 2.096024146472367, - "learning_rate": 9.328752798805091e-07, - "loss": 0.9684, - "step": 5728 - }, - { - "epoch": 0.6888715204713521, - "grad_norm": 2.0492124533402, - "learning_rate": 9.322165372111399e-07, - "loss": 0.9819, - "step": 5729 - }, - { - "epoch": 0.6889917633619912, - "grad_norm": 2.126076930300479, - "learning_rate": 9.315579565236747e-07, - "loss": 0.9861, - "step": 5730 - }, - { - "epoch": 0.6891120062526304, - "grad_norm": 1.718232302293309, - "learning_rate": 9.308995379180162e-07, - "loss": 0.9735, - "step": 5731 - }, - { - "epoch": 0.6892322491432694, - "grad_norm": 0.8531925104838173, - "learning_rate": 9.302412814940488e-07, - "loss": 0.8679, - "step": 5732 - }, - { - "epoch": 0.6893524920339085, - "grad_norm": 2.049727454327508, - "learning_rate": 9.295831873516276e-07, - "loss": 0.9298, - "step": 5733 - }, - { - "epoch": 0.6894727349245476, - "grad_norm": 1.8331043866283232, - "learning_rate": 9.289252555905873e-07, - "loss": 0.9958, - "step": 5734 - }, - { - "epoch": 0.6895929778151867, - "grad_norm": 2.8802268572045504, - "learning_rate": 9.282674863107334e-07, - "loss": 0.9855, - "step": 5735 - }, - { - "epoch": 0.6897132207058257, - "grad_norm": 2.7603816959877028, - "learning_rate": 9.276098796118488e-07, - "loss": 0.9958, - "step": 5736 - }, - { - "epoch": 0.6898334635964649, - "grad_norm": 1.682056548357563, - "learning_rate": 9.269524355936938e-07, - "loss": 0.8969, - "step": 5737 - }, - { - "epoch": 0.689953706487104, - "grad_norm": 1.6273840508710724, - "learning_rate": 9.262951543560002e-07, - "loss": 1.0758, - "step": 5738 - }, - { - "epoch": 0.690073949377743, - "grad_norm": 2.5509270572320095, - "learning_rate": 9.256380359984795e-07, - "loss": 1.0859, - "step": 5739 - }, - { - "epoch": 0.6901941922683821, - "grad_norm": 2.010749420404973, - "learning_rate": 9.249810806208139e-07, - "loss": 0.9697, - "step": 5740 - }, - { - "epoch": 0.6903144351590212, - "grad_norm": 2.026996560769911, - "learning_rate": 9.243242883226627e-07, - "loss": 1.0396, - "step": 5741 - }, - { - "epoch": 0.6904346780496603, - "grad_norm": 1.823300508323393, - "learning_rate": 9.236676592036628e-07, - "loss": 0.927, - "step": 5742 - }, - { - "epoch": 0.6905549209402994, - "grad_norm": 1.9964661969256878, - "learning_rate": 9.230111933634228e-07, - "loss": 0.9596, - "step": 5743 - }, - { - "epoch": 0.6906751638309385, - "grad_norm": 1.357759063336447, - "learning_rate": 9.223548909015288e-07, - "loss": 1.0358, - "step": 5744 - }, - { - "epoch": 0.6907954067215776, - "grad_norm": 1.9816104524925822, - "learning_rate": 9.216987519175407e-07, - "loss": 0.9369, - "step": 5745 - }, - { - "epoch": 0.6909156496122166, - "grad_norm": 1.615544318245051, - "learning_rate": 9.210427765109942e-07, - "loss": 0.919, - "step": 5746 - }, - { - "epoch": 0.6910358925028558, - "grad_norm": 1.9205938891973728, - "learning_rate": 9.20386964781402e-07, - "loss": 1.0401, - "step": 5747 - }, - { - "epoch": 0.6911561353934949, - "grad_norm": 2.0495566935500182, - "learning_rate": 9.197313168282472e-07, - "loss": 1.0687, - "step": 5748 - }, - { - "epoch": 0.6912763782841339, - "grad_norm": 1.9870341348773364, - "learning_rate": 9.190758327509935e-07, - "loss": 0.9478, - "step": 5749 - }, - { - "epoch": 0.6913966211747731, - "grad_norm": 0.8934597611164097, - "learning_rate": 9.184205126490767e-07, - "loss": 0.9027, - "step": 5750 - }, - { - "epoch": 0.6915168640654121, - "grad_norm": 0.9354395476320302, - "learning_rate": 9.177653566219075e-07, - "loss": 0.8702, - "step": 5751 - }, - { - "epoch": 0.6916371069560512, - "grad_norm": 2.1947555106911074, - "learning_rate": 9.171103647688744e-07, - "loss": 0.9888, - "step": 5752 - }, - { - "epoch": 0.6917573498466904, - "grad_norm": 1.8421823981993406, - "learning_rate": 9.164555371893367e-07, - "loss": 0.9183, - "step": 5753 - }, - { - "epoch": 0.6918775927373294, - "grad_norm": 1.7436561039627965, - "learning_rate": 9.158008739826333e-07, - "loss": 0.9832, - "step": 5754 - }, - { - "epoch": 0.6919978356279685, - "grad_norm": 1.6345550875525041, - "learning_rate": 9.151463752480744e-07, - "loss": 1.0893, - "step": 5755 - }, - { - "epoch": 0.6921180785186076, - "grad_norm": 1.4343089468713055, - "learning_rate": 9.144920410849493e-07, - "loss": 1.0284, - "step": 5756 - }, - { - "epoch": 0.6922383214092467, - "grad_norm": 1.8174766094061638, - "learning_rate": 9.138378715925176e-07, - "loss": 1.0316, - "step": 5757 - }, - { - "epoch": 0.6923585642998857, - "grad_norm": 2.0092348040947927, - "learning_rate": 9.131838668700167e-07, - "loss": 1.0406, - "step": 5758 - }, - { - "epoch": 0.6924788071905249, - "grad_norm": 2.132183163753228, - "learning_rate": 9.125300270166598e-07, - "loss": 1.0984, - "step": 5759 - }, - { - "epoch": 0.692599050081164, - "grad_norm": 1.8774008956130044, - "learning_rate": 9.118763521316324e-07, - "loss": 1.0966, - "step": 5760 - }, - { - "epoch": 0.692719292971803, - "grad_norm": 1.972390638542201, - "learning_rate": 9.112228423140987e-07, - "loss": 0.9914, - "step": 5761 - }, - { - "epoch": 0.6928395358624422, - "grad_norm": 2.2835836101650093, - "learning_rate": 9.105694976631932e-07, - "loss": 1.0936, - "step": 5762 - }, - { - "epoch": 0.6929597787530812, - "grad_norm": 2.187362012598215, - "learning_rate": 9.099163182780283e-07, - "loss": 0.955, - "step": 5763 - }, - { - "epoch": 0.6930800216437203, - "grad_norm": 2.3785171566421206, - "learning_rate": 9.092633042576916e-07, - "loss": 0.7231, - "step": 5764 - }, - { - "epoch": 0.6932002645343595, - "grad_norm": 2.0237721052370365, - "learning_rate": 9.086104557012446e-07, - "loss": 0.8014, - "step": 5765 - }, - { - "epoch": 0.6933205074249985, - "grad_norm": 1.6862651404060927, - "learning_rate": 9.079577727077239e-07, - "loss": 0.8872, - "step": 5766 - }, - { - "epoch": 0.6934407503156376, - "grad_norm": 2.7816334149396185, - "learning_rate": 9.073052553761404e-07, - "loss": 0.9537, - "step": 5767 - }, - { - "epoch": 0.6935609932062767, - "grad_norm": 1.6100905225906645, - "learning_rate": 9.066529038054805e-07, - "loss": 1.0005, - "step": 5768 - }, - { - "epoch": 0.6936812360969158, - "grad_norm": 1.9184436715103357, - "learning_rate": 9.060007180947071e-07, - "loss": 0.9668, - "step": 5769 - }, - { - "epoch": 0.6938014789875548, - "grad_norm": 2.2729697254210546, - "learning_rate": 9.053486983427534e-07, - "loss": 0.9654, - "step": 5770 - }, - { - "epoch": 0.6939217218781939, - "grad_norm": 1.8387442703209664, - "learning_rate": 9.046968446485326e-07, - "loss": 0.9356, - "step": 5771 - }, - { - "epoch": 0.6940419647688331, - "grad_norm": 1.8768696682066994, - "learning_rate": 9.040451571109295e-07, - "loss": 0.9268, - "step": 5772 - }, - { - "epoch": 0.6941622076594721, - "grad_norm": 0.9467981817225607, - "learning_rate": 9.033936358288042e-07, - "loss": 0.8812, - "step": 5773 - }, - { - "epoch": 0.6942824505501112, - "grad_norm": 1.772989493763321, - "learning_rate": 9.027422809009937e-07, - "loss": 1.0511, - "step": 5774 - }, - { - "epoch": 0.6944026934407503, - "grad_norm": 1.551406234755042, - "learning_rate": 9.020910924263054e-07, - "loss": 1.0628, - "step": 5775 - }, - { - "epoch": 0.6945229363313894, - "grad_norm": 0.9605920061986472, - "learning_rate": 9.014400705035261e-07, - "loss": 0.8549, - "step": 5776 - }, - { - "epoch": 0.6946431792220285, - "grad_norm": 2.795902360119934, - "learning_rate": 9.00789215231414e-07, - "loss": 1.0004, - "step": 5777 - }, - { - "epoch": 0.6947634221126676, - "grad_norm": 2.5798695528335247, - "learning_rate": 9.001385267087056e-07, - "loss": 1.0522, - "step": 5778 - }, - { - "epoch": 0.6948836650033067, - "grad_norm": 1.486901632537859, - "learning_rate": 8.994880050341072e-07, - "loss": 0.933, - "step": 5779 - }, - { - "epoch": 0.6950039078939457, - "grad_norm": 1.9029963507805223, - "learning_rate": 8.988376503063026e-07, - "loss": 1.0074, - "step": 5780 - }, - { - "epoch": 0.6951241507845849, - "grad_norm": 2.270415404705561, - "learning_rate": 8.981874626239521e-07, - "loss": 1.05, - "step": 5781 - }, - { - "epoch": 0.695244393675224, - "grad_norm": 2.260125054975861, - "learning_rate": 8.975374420856872e-07, - "loss": 1.1101, - "step": 5782 - }, - { - "epoch": 0.695364636565863, - "grad_norm": 1.946559460390014, - "learning_rate": 8.968875887901157e-07, - "loss": 0.9509, - "step": 5783 - }, - { - "epoch": 0.6954848794565022, - "grad_norm": 2.8228393393948505, - "learning_rate": 8.9623790283582e-07, - "loss": 0.8588, - "step": 5784 - }, - { - "epoch": 0.6956051223471412, - "grad_norm": 2.0733713261638513, - "learning_rate": 8.955883843213561e-07, - "loss": 0.9922, - "step": 5785 - }, - { - "epoch": 0.6957253652377803, - "grad_norm": 2.1862852811065854, - "learning_rate": 8.949390333452569e-07, - "loss": 1.1082, - "step": 5786 - }, - { - "epoch": 0.6958456081284194, - "grad_norm": 1.687344579250479, - "learning_rate": 8.942898500060279e-07, - "loss": 0.9071, - "step": 5787 - }, - { - "epoch": 0.6959658510190585, - "grad_norm": 3.495854647680936, - "learning_rate": 8.936408344021493e-07, - "loss": 0.9468, - "step": 5788 - }, - { - "epoch": 0.6960860939096976, - "grad_norm": 2.3888430467785726, - "learning_rate": 8.929919866320765e-07, - "loss": 0.9349, - "step": 5789 - }, - { - "epoch": 0.6962063368003367, - "grad_norm": 1.7322011847921526, - "learning_rate": 8.923433067942385e-07, - "loss": 1.0357, - "step": 5790 - }, - { - "epoch": 0.6963265796909758, - "grad_norm": 2.6623987277117314, - "learning_rate": 8.916947949870417e-07, - "loss": 0.9147, - "step": 5791 - }, - { - "epoch": 0.6964468225816148, - "grad_norm": 0.9715327012727538, - "learning_rate": 8.910464513088615e-07, - "loss": 0.8653, - "step": 5792 - }, - { - "epoch": 0.696567065472254, - "grad_norm": 1.855239886173161, - "learning_rate": 8.903982758580542e-07, - "loss": 1.0192, - "step": 5793 - }, - { - "epoch": 0.696687308362893, - "grad_norm": 1.8111905596702742, - "learning_rate": 8.897502687329457e-07, - "loss": 1.0389, - "step": 5794 - }, - { - "epoch": 0.6968075512535321, - "grad_norm": 1.9248160212399388, - "learning_rate": 8.891024300318382e-07, - "loss": 1.0279, - "step": 5795 - }, - { - "epoch": 0.6969277941441713, - "grad_norm": 1.6445547360444381, - "learning_rate": 8.884547598530103e-07, - "loss": 0.9843, - "step": 5796 - }, - { - "epoch": 0.6970480370348103, - "grad_norm": 2.3515367474840927, - "learning_rate": 8.8780725829471e-07, - "loss": 0.9787, - "step": 5797 - }, - { - "epoch": 0.6971682799254494, - "grad_norm": 1.8937761512688087, - "learning_rate": 8.87159925455165e-07, - "loss": 0.9992, - "step": 5798 - }, - { - "epoch": 0.6972885228160886, - "grad_norm": 5.7749827463131735, - "learning_rate": 8.865127614325738e-07, - "loss": 0.9631, - "step": 5799 - }, - { - "epoch": 0.6974087657067276, - "grad_norm": 1.700822165771983, - "learning_rate": 8.85865766325113e-07, - "loss": 0.9014, - "step": 5800 - }, - { - "epoch": 0.6975290085973667, - "grad_norm": 2.5336031866236053, - "learning_rate": 8.852189402309287e-07, - "loss": 0.9486, - "step": 5801 - }, - { - "epoch": 0.6976492514880057, - "grad_norm": 6.724435219571737, - "learning_rate": 8.845722832481441e-07, - "loss": 0.9588, - "step": 5802 - }, - { - "epoch": 0.6977694943786449, - "grad_norm": 1.9874804969646418, - "learning_rate": 8.83925795474858e-07, - "loss": 1.0086, - "step": 5803 - }, - { - "epoch": 0.6978897372692839, - "grad_norm": 2.3433949519485338, - "learning_rate": 8.832794770091414e-07, - "loss": 0.8351, - "step": 5804 - }, - { - "epoch": 0.698009980159923, - "grad_norm": 1.9533160306969244, - "learning_rate": 8.826333279490401e-07, - "loss": 1.0538, - "step": 5805 - }, - { - "epoch": 0.6981302230505622, - "grad_norm": 2.278707158307968, - "learning_rate": 8.819873483925748e-07, - "loss": 0.9175, - "step": 5806 - }, - { - "epoch": 0.6982504659412012, - "grad_norm": 2.030831631344205, - "learning_rate": 8.81341538437739e-07, - "loss": 0.9823, - "step": 5807 - }, - { - "epoch": 0.6983707088318403, - "grad_norm": 1.740371784303951, - "learning_rate": 8.80695898182503e-07, - "loss": 0.9157, - "step": 5808 - }, - { - "epoch": 0.6984909517224794, - "grad_norm": 0.9121759949886796, - "learning_rate": 8.800504277248093e-07, - "loss": 0.9206, - "step": 5809 - }, - { - "epoch": 0.6986111946131185, - "grad_norm": 1.8428640661711562, - "learning_rate": 8.794051271625753e-07, - "loss": 0.982, - "step": 5810 - }, - { - "epoch": 0.6987314375037575, - "grad_norm": 1.7690562438045594, - "learning_rate": 8.787599965936925e-07, - "loss": 1.0615, - "step": 5811 - }, - { - "epoch": 0.6988516803943967, - "grad_norm": 1.809765561764784, - "learning_rate": 8.781150361160261e-07, - "loss": 0.9519, - "step": 5812 - }, - { - "epoch": 0.6989719232850358, - "grad_norm": 2.2657633772060217, - "learning_rate": 8.774702458274181e-07, - "loss": 0.9614, - "step": 5813 - }, - { - "epoch": 0.6990921661756748, - "grad_norm": 2.82524299251699, - "learning_rate": 8.768256258256799e-07, - "loss": 0.9321, - "step": 5814 - }, - { - "epoch": 0.699212409066314, - "grad_norm": 1.6398665355246127, - "learning_rate": 8.76181176208602e-07, - "loss": 0.9675, - "step": 5815 - }, - { - "epoch": 0.699332651956953, - "grad_norm": 1.7265103017243915, - "learning_rate": 8.755368970739461e-07, - "loss": 0.9609, - "step": 5816 - }, - { - "epoch": 0.6994528948475921, - "grad_norm": 2.8626575076817384, - "learning_rate": 8.748927885194479e-07, - "loss": 0.8489, - "step": 5817 - }, - { - "epoch": 0.6995731377382313, - "grad_norm": 0.8983582834896812, - "learning_rate": 8.742488506428209e-07, - "loss": 0.8166, - "step": 5818 - }, - { - "epoch": 0.6996933806288703, - "grad_norm": 1.7198244377847867, - "learning_rate": 8.736050835417466e-07, - "loss": 1.0161, - "step": 5819 - }, - { - "epoch": 0.6998136235195094, - "grad_norm": 1.7673590592841042, - "learning_rate": 8.729614873138862e-07, - "loss": 0.8551, - "step": 5820 - }, - { - "epoch": 0.6999338664101485, - "grad_norm": 2.2108058283657757, - "learning_rate": 8.723180620568716e-07, - "loss": 1.0072, - "step": 5821 - }, - { - "epoch": 0.7000541093007876, - "grad_norm": 2.1729503137672235, - "learning_rate": 8.716748078683116e-07, - "loss": 1.0894, - "step": 5822 - }, - { - "epoch": 0.7001743521914267, - "grad_norm": 2.2634876561351773, - "learning_rate": 8.710317248457855e-07, - "loss": 0.9167, - "step": 5823 - }, - { - "epoch": 0.7002945950820658, - "grad_norm": 1.8339308140807713, - "learning_rate": 8.703888130868482e-07, - "loss": 0.9448, - "step": 5824 - }, - { - "epoch": 0.7004148379727049, - "grad_norm": 3.3769347685121387, - "learning_rate": 8.697460726890307e-07, - "loss": 1.049, - "step": 5825 - }, - { - "epoch": 0.7005350808633439, - "grad_norm": 1.9298322368504173, - "learning_rate": 8.691035037498354e-07, - "loss": 1.1308, - "step": 5826 - }, - { - "epoch": 0.7006553237539831, - "grad_norm": 2.0494838716161317, - "learning_rate": 8.684611063667391e-07, - "loss": 0.9589, - "step": 5827 - }, - { - "epoch": 0.7007755666446221, - "grad_norm": 2.048204063307348, - "learning_rate": 8.678188806371935e-07, - "loss": 0.9949, - "step": 5828 - }, - { - "epoch": 0.7008958095352612, - "grad_norm": 1.5318823306757674, - "learning_rate": 8.671768266586228e-07, - "loss": 1.0845, - "step": 5829 - }, - { - "epoch": 0.7010160524259004, - "grad_norm": 2.1415571424728124, - "learning_rate": 8.665349445284275e-07, - "loss": 1.0161, - "step": 5830 - }, - { - "epoch": 0.7011362953165394, - "grad_norm": 1.3837568000688398, - "learning_rate": 8.658932343439799e-07, - "loss": 1.039, - "step": 5831 - }, - { - "epoch": 0.7012565382071785, - "grad_norm": 1.8752003880549215, - "learning_rate": 8.65251696202627e-07, - "loss": 0.998, - "step": 5832 - }, - { - "epoch": 0.7013767810978175, - "grad_norm": 2.233913560716468, - "learning_rate": 8.646103302016896e-07, - "loss": 1.1053, - "step": 5833 - }, - { - "epoch": 0.7014970239884567, - "grad_norm": 1.6547987715903065, - "learning_rate": 8.639691364384614e-07, - "loss": 1.1168, - "step": 5834 - }, - { - "epoch": 0.7016172668790958, - "grad_norm": 1.7780585895063428, - "learning_rate": 8.633281150102136e-07, - "loss": 0.9546, - "step": 5835 - }, - { - "epoch": 0.7017375097697348, - "grad_norm": 2.2952299909518317, - "learning_rate": 8.626872660141855e-07, - "loss": 0.9093, - "step": 5836 - }, - { - "epoch": 0.701857752660374, - "grad_norm": 1.5634480461269675, - "learning_rate": 8.620465895475957e-07, - "loss": 0.9759, - "step": 5837 - }, - { - "epoch": 0.701977995551013, - "grad_norm": 1.3785750123633187, - "learning_rate": 8.614060857076333e-07, - "loss": 0.9845, - "step": 5838 - }, - { - "epoch": 0.7020982384416521, - "grad_norm": 1.9281852527274057, - "learning_rate": 8.60765754591462e-07, - "loss": 0.9781, - "step": 5839 - }, - { - "epoch": 0.7022184813322913, - "grad_norm": 1.8035575995663717, - "learning_rate": 8.601255962962211e-07, - "loss": 0.957, - "step": 5840 - }, - { - "epoch": 0.7023387242229303, - "grad_norm": 3.118873388873026, - "learning_rate": 8.594856109190194e-07, - "loss": 0.9517, - "step": 5841 - }, - { - "epoch": 0.7024589671135694, - "grad_norm": 1.5030550839348527, - "learning_rate": 8.588457985569446e-07, - "loss": 0.9253, - "step": 5842 - }, - { - "epoch": 0.7025792100042085, - "grad_norm": 4.108791295358463, - "learning_rate": 8.582061593070542e-07, - "loss": 0.9436, - "step": 5843 - }, - { - "epoch": 0.7026994528948476, - "grad_norm": 2.140587242517429, - "learning_rate": 8.57566693266383e-07, - "loss": 0.9985, - "step": 5844 - }, - { - "epoch": 0.7028196957854866, - "grad_norm": 2.1331535318222943, - "learning_rate": 8.569274005319354e-07, - "loss": 0.9245, - "step": 5845 - }, - { - "epoch": 0.7029399386761258, - "grad_norm": 2.1420697536704645, - "learning_rate": 8.562882812006913e-07, - "loss": 1.0324, - "step": 5846 - }, - { - "epoch": 0.7030601815667649, - "grad_norm": 1.9605327051611743, - "learning_rate": 8.556493353696066e-07, - "loss": 0.9997, - "step": 5847 - }, - { - "epoch": 0.7031804244574039, - "grad_norm": 2.3704061752447463, - "learning_rate": 8.550105631356077e-07, - "loss": 0.9102, - "step": 5848 - }, - { - "epoch": 0.7033006673480431, - "grad_norm": 2.8841132974155337, - "learning_rate": 8.543719645955961e-07, - "loss": 0.9948, - "step": 5849 - }, - { - "epoch": 0.7034209102386821, - "grad_norm": 2.146425949287309, - "learning_rate": 8.537335398464467e-07, - "loss": 0.9718, - "step": 5850 - }, - { - "epoch": 0.7035411531293212, - "grad_norm": 3.130190696391574, - "learning_rate": 8.53095288985007e-07, - "loss": 1.082, - "step": 5851 - }, - { - "epoch": 0.7036613960199604, - "grad_norm": 1.7267645117874937, - "learning_rate": 8.524572121081009e-07, - "loss": 1.0507, - "step": 5852 - }, - { - "epoch": 0.7037816389105994, - "grad_norm": 2.84749884360058, - "learning_rate": 8.518193093125232e-07, - "loss": 0.8571, - "step": 5853 - }, - { - "epoch": 0.7039018818012385, - "grad_norm": 2.5487168094107187, - "learning_rate": 8.511815806950436e-07, - "loss": 1.0305, - "step": 5854 - }, - { - "epoch": 0.7040221246918776, - "grad_norm": 1.6246184854877996, - "learning_rate": 8.505440263524044e-07, - "loss": 1.0041, - "step": 5855 - }, - { - "epoch": 0.7041423675825167, - "grad_norm": 2.2509625813014824, - "learning_rate": 8.49906646381322e-07, - "loss": 1.1023, - "step": 5856 - }, - { - "epoch": 0.7042626104731557, - "grad_norm": 1.7148964424119089, - "learning_rate": 8.492694408784884e-07, - "loss": 0.9459, - "step": 5857 - }, - { - "epoch": 0.7043828533637949, - "grad_norm": 2.2806145815142207, - "learning_rate": 8.486324099405642e-07, - "loss": 0.8481, - "step": 5858 - }, - { - "epoch": 0.704503096254434, - "grad_norm": 1.5160435364141027, - "learning_rate": 8.479955536641887e-07, - "loss": 0.9851, - "step": 5859 - }, - { - "epoch": 0.704623339145073, - "grad_norm": 1.976572752914675, - "learning_rate": 8.473588721459716e-07, - "loss": 0.8851, - "step": 5860 - }, - { - "epoch": 0.7047435820357122, - "grad_norm": 1.8762869550812924, - "learning_rate": 8.467223654824967e-07, - "loss": 0.9409, - "step": 5861 - }, - { - "epoch": 0.7048638249263512, - "grad_norm": 2.4273962274875824, - "learning_rate": 8.460860337703233e-07, - "loss": 0.8623, - "step": 5862 - }, - { - "epoch": 0.7049840678169903, - "grad_norm": 1.717490272105735, - "learning_rate": 8.454498771059797e-07, - "loss": 0.9454, - "step": 5863 - }, - { - "epoch": 0.7051043107076294, - "grad_norm": 2.0586752830153756, - "learning_rate": 8.448138955859725e-07, - "loss": 1.0622, - "step": 5864 - }, - { - "epoch": 0.7052245535982685, - "grad_norm": 1.6627292216896425, - "learning_rate": 8.44178089306778e-07, - "loss": 1.1277, - "step": 5865 - }, - { - "epoch": 0.7053447964889076, - "grad_norm": 1.9258692723212147, - "learning_rate": 8.4354245836485e-07, - "loss": 1.0057, - "step": 5866 - }, - { - "epoch": 0.7054650393795466, - "grad_norm": 1.4332057833289515, - "learning_rate": 8.429070028566108e-07, - "loss": 0.9575, - "step": 5867 - }, - { - "epoch": 0.7055852822701858, - "grad_norm": 2.7323703065686424, - "learning_rate": 8.422717228784586e-07, - "loss": 0.979, - "step": 5868 - }, - { - "epoch": 0.7057055251608249, - "grad_norm": 1.8958253825373785, - "learning_rate": 8.416366185267663e-07, - "loss": 0.918, - "step": 5869 - }, - { - "epoch": 0.7058257680514639, - "grad_norm": 1.5814821397060508, - "learning_rate": 8.410016898978778e-07, - "loss": 1.0053, - "step": 5870 - }, - { - "epoch": 0.7059460109421031, - "grad_norm": 4.248959582266967, - "learning_rate": 8.403669370881115e-07, - "loss": 1.0213, - "step": 5871 - }, - { - "epoch": 0.7060662538327421, - "grad_norm": 1.5485261524124156, - "learning_rate": 8.397323601937587e-07, - "loss": 1.0047, - "step": 5872 - }, - { - "epoch": 0.7061864967233812, - "grad_norm": 1.66518904829875, - "learning_rate": 8.390979593110838e-07, - "loss": 0.9995, - "step": 5873 - }, - { - "epoch": 0.7063067396140204, - "grad_norm": 1.492423233665028, - "learning_rate": 8.384637345363262e-07, - "loss": 1.0393, - "step": 5874 - }, - { - "epoch": 0.7064269825046594, - "grad_norm": 3.2319992111790516, - "learning_rate": 8.378296859656964e-07, - "loss": 0.9974, - "step": 5875 - }, - { - "epoch": 0.7065472253952985, - "grad_norm": 2.2828197565296304, - "learning_rate": 8.371958136953792e-07, - "loss": 0.9046, - "step": 5876 - }, - { - "epoch": 0.7066674682859376, - "grad_norm": 2.2008400830308954, - "learning_rate": 8.365621178215326e-07, - "loss": 0.888, - "step": 5877 - }, - { - "epoch": 0.7067877111765767, - "grad_norm": 2.498436534277829, - "learning_rate": 8.359285984402871e-07, - "loss": 0.9804, - "step": 5878 - }, - { - "epoch": 0.7069079540672157, - "grad_norm": 1.8394913710536693, - "learning_rate": 8.352952556477489e-07, - "loss": 0.9698, - "step": 5879 - }, - { - "epoch": 0.7070281969578549, - "grad_norm": 2.2932831582783164, - "learning_rate": 8.34662089539993e-07, - "loss": 1.0047, - "step": 5880 - }, - { - "epoch": 0.707148439848494, - "grad_norm": 2.1086623506810396, - "learning_rate": 8.340291002130722e-07, - "loss": 1.0214, - "step": 5881 - }, - { - "epoch": 0.707268682739133, - "grad_norm": 2.7089445940437087, - "learning_rate": 8.3339628776301e-07, - "loss": 1.0265, - "step": 5882 - }, - { - "epoch": 0.7073889256297722, - "grad_norm": 1.628264302977358, - "learning_rate": 8.327636522858033e-07, - "loss": 0.7955, - "step": 5883 - }, - { - "epoch": 0.7075091685204112, - "grad_norm": 1.7873313557491628, - "learning_rate": 8.321311938774225e-07, - "loss": 0.9935, - "step": 5884 - }, - { - "epoch": 0.7076294114110503, - "grad_norm": 2.6792705422806615, - "learning_rate": 8.314989126338104e-07, - "loss": 1.0186, - "step": 5885 - }, - { - "epoch": 0.7077496543016895, - "grad_norm": 1.5849792893076489, - "learning_rate": 8.308668086508847e-07, - "loss": 1.0751, - "step": 5886 - }, - { - "epoch": 0.7078698971923285, - "grad_norm": 1.8254507344310442, - "learning_rate": 8.302348820245342e-07, - "loss": 0.9682, - "step": 5887 - }, - { - "epoch": 0.7079901400829676, - "grad_norm": 2.861415447686304, - "learning_rate": 8.296031328506232e-07, - "loss": 0.9342, - "step": 5888 - }, - { - "epoch": 0.7081103829736067, - "grad_norm": 2.0891773908431412, - "learning_rate": 8.289715612249857e-07, - "loss": 0.9872, - "step": 5889 - }, - { - "epoch": 0.7082306258642458, - "grad_norm": 2.369142838970008, - "learning_rate": 8.283401672434305e-07, - "loss": 0.9999, - "step": 5890 - }, - { - "epoch": 0.7083508687548848, - "grad_norm": 1.732109858854475, - "learning_rate": 8.277089510017412e-07, - "loss": 0.9311, - "step": 5891 - }, - { - "epoch": 0.708471111645524, - "grad_norm": 1.8139849532028007, - "learning_rate": 8.270779125956719e-07, - "loss": 1.0524, - "step": 5892 - }, - { - "epoch": 0.7085913545361631, - "grad_norm": 2.0292906481121378, - "learning_rate": 8.264470521209505e-07, - "loss": 1.0251, - "step": 5893 - }, - { - "epoch": 0.7087115974268021, - "grad_norm": 2.8965819966796675, - "learning_rate": 8.258163696732785e-07, - "loss": 0.9916, - "step": 5894 - }, - { - "epoch": 0.7088318403174413, - "grad_norm": 1.71812566341583, - "learning_rate": 8.251858653483288e-07, - "loss": 0.9986, - "step": 5895 - }, - { - "epoch": 0.7089520832080803, - "grad_norm": 10.237128014146709, - "learning_rate": 8.245555392417501e-07, - "loss": 1.0854, - "step": 5896 - }, - { - "epoch": 0.7090723260987194, - "grad_norm": 1.9314390565252233, - "learning_rate": 8.239253914491613e-07, - "loss": 1.0188, - "step": 5897 - }, - { - "epoch": 0.7091925689893585, - "grad_norm": 1.7590649021530083, - "learning_rate": 8.232954220661556e-07, - "loss": 0.9776, - "step": 5898 - }, - { - "epoch": 0.7093128118799976, - "grad_norm": 2.0854365858205814, - "learning_rate": 8.226656311882989e-07, - "loss": 0.9166, - "step": 5899 - }, - { - "epoch": 0.7094330547706367, - "grad_norm": 2.2734702196421246, - "learning_rate": 8.22036018911129e-07, - "loss": 0.9937, - "step": 5900 - }, - { - "epoch": 0.7095532976612757, - "grad_norm": 2.244823010101172, - "learning_rate": 8.214065853301599e-07, - "loss": 1.0333, - "step": 5901 - }, - { - "epoch": 0.7096735405519149, - "grad_norm": 0.7881933278558098, - "learning_rate": 8.207773305408734e-07, - "loss": 0.8349, - "step": 5902 - }, - { - "epoch": 0.709793783442554, - "grad_norm": 2.0844847231328227, - "learning_rate": 8.201482546387288e-07, - "loss": 1.0237, - "step": 5903 - }, - { - "epoch": 0.709914026333193, - "grad_norm": 1.7841001763293638, - "learning_rate": 8.195193577191553e-07, - "loss": 1.1405, - "step": 5904 - }, - { - "epoch": 0.7100342692238322, - "grad_norm": 1.6692403551514057, - "learning_rate": 8.188906398775579e-07, - "loss": 1.0709, - "step": 5905 - }, - { - "epoch": 0.7101545121144712, - "grad_norm": 1.911308830159162, - "learning_rate": 8.18262101209311e-07, - "loss": 0.9156, - "step": 5906 - }, - { - "epoch": 0.7102747550051103, - "grad_norm": 2.0876706813087624, - "learning_rate": 8.176337418097626e-07, - "loss": 0.932, - "step": 5907 - }, - { - "epoch": 0.7103949978957494, - "grad_norm": 1.9026275986980197, - "learning_rate": 8.170055617742364e-07, - "loss": 1.0253, - "step": 5908 - }, - { - "epoch": 0.7105152407863885, - "grad_norm": 1.813616638854221, - "learning_rate": 8.163775611980252e-07, - "loss": 0.942, - "step": 5909 - }, - { - "epoch": 0.7106354836770276, - "grad_norm": 1.697255230027863, - "learning_rate": 8.157497401763982e-07, - "loss": 1.0142, - "step": 5910 - }, - { - "epoch": 0.7107557265676667, - "grad_norm": 3.015131934776063, - "learning_rate": 8.151220988045935e-07, - "loss": 1.0054, - "step": 5911 - }, - { - "epoch": 0.7108759694583058, - "grad_norm": 1.573587522959346, - "learning_rate": 8.144946371778234e-07, - "loss": 1.0601, - "step": 5912 - }, - { - "epoch": 0.7109962123489448, - "grad_norm": 1.917750452985674, - "learning_rate": 8.138673553912751e-07, - "loss": 1.0083, - "step": 5913 - }, - { - "epoch": 0.711116455239584, - "grad_norm": 3.4825185440347477, - "learning_rate": 8.132402535401059e-07, - "loss": 0.8024, - "step": 5914 - }, - { - "epoch": 0.711236698130223, - "grad_norm": 1.5994853622563308, - "learning_rate": 8.126133317194465e-07, - "loss": 0.9738, - "step": 5915 - }, - { - "epoch": 0.7113569410208621, - "grad_norm": 2.1168996467428216, - "learning_rate": 8.11986590024401e-07, - "loss": 0.9674, - "step": 5916 - }, - { - "epoch": 0.7114771839115013, - "grad_norm": 3.0218839375029414, - "learning_rate": 8.113600285500442e-07, - "loss": 0.9095, - "step": 5917 - }, - { - "epoch": 0.7115974268021403, - "grad_norm": 1.6519373039882548, - "learning_rate": 8.107336473914268e-07, - "loss": 0.9672, - "step": 5918 - }, - { - "epoch": 0.7117176696927794, - "grad_norm": 0.8346022878530021, - "learning_rate": 8.101074466435694e-07, - "loss": 0.8368, - "step": 5919 - }, - { - "epoch": 0.7118379125834186, - "grad_norm": 1.6534590327768297, - "learning_rate": 8.094814264014662e-07, - "loss": 0.9064, - "step": 5920 - }, - { - "epoch": 0.7119581554740576, - "grad_norm": 2.3664436808384175, - "learning_rate": 8.088555867600844e-07, - "loss": 1.0562, - "step": 5921 - }, - { - "epoch": 0.7120783983646967, - "grad_norm": 1.8469931317807369, - "learning_rate": 8.08229927814362e-07, - "loss": 0.8368, - "step": 5922 - }, - { - "epoch": 0.7121986412553358, - "grad_norm": 3.2257522415405235, - "learning_rate": 8.076044496592134e-07, - "loss": 0.8754, - "step": 5923 - }, - { - "epoch": 0.7123188841459749, - "grad_norm": 2.178636741999195, - "learning_rate": 8.069791523895204e-07, - "loss": 1.003, - "step": 5924 - }, - { - "epoch": 0.7124391270366139, - "grad_norm": 2.1256887778203484, - "learning_rate": 8.063540361001422e-07, - "loss": 1.0108, - "step": 5925 - }, - { - "epoch": 0.7125593699272531, - "grad_norm": 3.8358755941892433, - "learning_rate": 8.057291008859069e-07, - "loss": 1.0273, - "step": 5926 - }, - { - "epoch": 0.7126796128178922, - "grad_norm": 1.863347594161672, - "learning_rate": 8.051043468416187e-07, - "loss": 0.906, - "step": 5927 - }, - { - "epoch": 0.7127998557085312, - "grad_norm": 1.735668490465609, - "learning_rate": 8.044797740620506e-07, - "loss": 1.0528, - "step": 5928 - }, - { - "epoch": 0.7129200985991703, - "grad_norm": 1.8346963786133055, - "learning_rate": 8.038553826419494e-07, - "loss": 1.0129, - "step": 5929 - }, - { - "epoch": 0.7130403414898094, - "grad_norm": 2.0219652866209876, - "learning_rate": 8.032311726760364e-07, - "loss": 1.0341, - "step": 5930 - }, - { - "epoch": 0.7131605843804485, - "grad_norm": 1.7681474474833456, - "learning_rate": 8.026071442590022e-07, - "loss": 0.9204, - "step": 5931 - }, - { - "epoch": 0.7132808272710875, - "grad_norm": 5.022220446343422, - "learning_rate": 8.019832974855134e-07, - "loss": 1.0457, - "step": 5932 - }, - { - "epoch": 0.7134010701617267, - "grad_norm": 2.783224712087336, - "learning_rate": 8.013596324502052e-07, - "loss": 1.0553, - "step": 5933 - }, - { - "epoch": 0.7135213130523658, - "grad_norm": 1.6883831538876668, - "learning_rate": 8.007361492476872e-07, - "loss": 1.0185, - "step": 5934 - }, - { - "epoch": 0.7136415559430048, - "grad_norm": 1.5917611521825548, - "learning_rate": 8.001128479725426e-07, - "loss": 1.0108, - "step": 5935 - }, - { - "epoch": 0.713761798833644, - "grad_norm": 1.5896932544757827, - "learning_rate": 7.994897287193248e-07, - "loss": 1.0307, - "step": 5936 - }, - { - "epoch": 0.713882041724283, - "grad_norm": 3.4915101762055447, - "learning_rate": 7.988667915825605e-07, - "loss": 1.0726, - "step": 5937 - }, - { - "epoch": 0.7140022846149221, - "grad_norm": 1.9706350717429335, - "learning_rate": 7.982440366567491e-07, - "loss": 0.9824, - "step": 5938 - }, - { - "epoch": 0.7141225275055613, - "grad_norm": 1.5049563222231124, - "learning_rate": 7.97621464036361e-07, - "loss": 0.9829, - "step": 5939 - }, - { - "epoch": 0.7142427703962003, - "grad_norm": 3.914929322521902, - "learning_rate": 7.969990738158417e-07, - "loss": 0.9118, - "step": 5940 - }, - { - "epoch": 0.7143630132868394, - "grad_norm": 2.040813994600949, - "learning_rate": 7.963768660896062e-07, - "loss": 1.0726, - "step": 5941 - }, - { - "epoch": 0.7144832561774785, - "grad_norm": 1.712998633638584, - "learning_rate": 7.957548409520432e-07, - "loss": 1.0519, - "step": 5942 - }, - { - "epoch": 0.7146034990681176, - "grad_norm": 2.334849024268055, - "learning_rate": 7.951329984975135e-07, - "loss": 1.0756, - "step": 5943 - }, - { - "epoch": 0.7147237419587567, - "grad_norm": 2.2349839881135667, - "learning_rate": 7.94511338820349e-07, - "loss": 0.7939, - "step": 5944 - }, - { - "epoch": 0.7148439848493958, - "grad_norm": 2.3488761284111757, - "learning_rate": 7.938898620148575e-07, - "loss": 1.0142, - "step": 5945 - }, - { - "epoch": 0.7149642277400349, - "grad_norm": 2.7365164082762035, - "learning_rate": 7.932685681753135e-07, - "loss": 0.9414, - "step": 5946 - }, - { - "epoch": 0.7150844706306739, - "grad_norm": 1.6476671488922812, - "learning_rate": 7.92647457395969e-07, - "loss": 0.8549, - "step": 5947 - }, - { - "epoch": 0.7152047135213131, - "grad_norm": 2.851983193496471, - "learning_rate": 7.920265297710444e-07, - "loss": 0.9733, - "step": 5948 - }, - { - "epoch": 0.7153249564119522, - "grad_norm": 2.1619441823044143, - "learning_rate": 7.914057853947363e-07, - "loss": 0.9541, - "step": 5949 - }, - { - "epoch": 0.7154451993025912, - "grad_norm": 7.684909290977147, - "learning_rate": 7.907852243612089e-07, - "loss": 0.8627, - "step": 5950 - }, - { - "epoch": 0.7155654421932304, - "grad_norm": 1.8751674577320698, - "learning_rate": 7.901648467646009e-07, - "loss": 0.9454, - "step": 5951 - }, - { - "epoch": 0.7156856850838694, - "grad_norm": 3.402677149593066, - "learning_rate": 7.895446526990244e-07, - "loss": 0.9534, - "step": 5952 - }, - { - "epoch": 0.7158059279745085, - "grad_norm": 1.647073913953877, - "learning_rate": 7.889246422585609e-07, - "loss": 0.9796, - "step": 5953 - }, - { - "epoch": 0.7159261708651476, - "grad_norm": 2.696425056274779, - "learning_rate": 7.883048155372675e-07, - "loss": 0.9653, - "step": 5954 - }, - { - "epoch": 0.7160464137557867, - "grad_norm": 2.7325055516527743, - "learning_rate": 7.876851726291698e-07, - "loss": 0.9349, - "step": 5955 - }, - { - "epoch": 0.7161666566464258, - "grad_norm": 1.726426960730014, - "learning_rate": 7.870657136282666e-07, - "loss": 1.01, - "step": 5956 - }, - { - "epoch": 0.7162868995370649, - "grad_norm": 1.5428027099892174, - "learning_rate": 7.86446438628531e-07, - "loss": 1.0462, - "step": 5957 - }, - { - "epoch": 0.716407142427704, - "grad_norm": 0.8026477553512372, - "learning_rate": 7.858273477239059e-07, - "loss": 0.8303, - "step": 5958 - }, - { - "epoch": 0.716527385318343, - "grad_norm": 1.8648226777936951, - "learning_rate": 7.852084410083067e-07, - "loss": 0.9391, - "step": 5959 - }, - { - "epoch": 0.7166476282089821, - "grad_norm": 2.2214489492130944, - "learning_rate": 7.84589718575621e-07, - "loss": 0.8665, - "step": 5960 - }, - { - "epoch": 0.7167678710996213, - "grad_norm": 2.419644811747098, - "learning_rate": 7.83971180519708e-07, - "loss": 0.9149, - "step": 5961 - }, - { - "epoch": 0.7168881139902603, - "grad_norm": 1.8968719809192343, - "learning_rate": 7.833528269344008e-07, - "loss": 0.9774, - "step": 5962 - }, - { - "epoch": 0.7170083568808994, - "grad_norm": 2.1057239724572527, - "learning_rate": 7.827346579135023e-07, - "loss": 1.0084, - "step": 5963 - }, - { - "epoch": 0.7171285997715385, - "grad_norm": 2.086691387972576, - "learning_rate": 7.821166735507885e-07, - "loss": 1.0605, - "step": 5964 - }, - { - "epoch": 0.7172488426621776, - "grad_norm": 1.757235555274936, - "learning_rate": 7.81498873940007e-07, - "loss": 0.914, - "step": 5965 - }, - { - "epoch": 0.7173690855528166, - "grad_norm": 2.4741811765727335, - "learning_rate": 7.808812591748768e-07, - "loss": 1.0021, - "step": 5966 - }, - { - "epoch": 0.7174893284434558, - "grad_norm": 2.281292745111356, - "learning_rate": 7.802638293490915e-07, - "loss": 0.8799, - "step": 5967 - }, - { - "epoch": 0.7176095713340949, - "grad_norm": 1.6772077131252763, - "learning_rate": 7.796465845563123e-07, - "loss": 1.0087, - "step": 5968 - }, - { - "epoch": 0.7177298142247339, - "grad_norm": 2.1347306677075504, - "learning_rate": 7.790295248901766e-07, - "loss": 1.0285, - "step": 5969 - }, - { - "epoch": 0.7178500571153731, - "grad_norm": 1.628114869421195, - "learning_rate": 7.784126504442902e-07, - "loss": 0.8578, - "step": 5970 - }, - { - "epoch": 0.7179703000060121, - "grad_norm": 1.3795307247477568, - "learning_rate": 7.777959613122351e-07, - "loss": 0.9045, - "step": 5971 - }, - { - "epoch": 0.7180905428966512, - "grad_norm": 1.8197162617646434, - "learning_rate": 7.771794575875604e-07, - "loss": 1.0074, - "step": 5972 - }, - { - "epoch": 0.7182107857872904, - "grad_norm": 2.1814714466913134, - "learning_rate": 7.765631393637888e-07, - "loss": 1.0123, - "step": 5973 - }, - { - "epoch": 0.7183310286779294, - "grad_norm": 5.879839587975361, - "learning_rate": 7.75947006734417e-07, - "loss": 0.7114, - "step": 5974 - }, - { - "epoch": 0.7184512715685685, - "grad_norm": 1.9060994698546303, - "learning_rate": 7.753310597929101e-07, - "loss": 1.0544, - "step": 5975 - }, - { - "epoch": 0.7185715144592076, - "grad_norm": 0.7800480943304307, - "learning_rate": 7.747152986327095e-07, - "loss": 0.8126, - "step": 5976 - }, - { - "epoch": 0.7186917573498467, - "grad_norm": 2.3504710151526034, - "learning_rate": 7.740997233472228e-07, - "loss": 0.9106, - "step": 5977 - }, - { - "epoch": 0.7188120002404857, - "grad_norm": 2.700326871512539, - "learning_rate": 7.734843340298329e-07, - "loss": 0.9354, - "step": 5978 - }, - { - "epoch": 0.7189322431311249, - "grad_norm": 2.50205727806363, - "learning_rate": 7.72869130773895e-07, - "loss": 0.9866, - "step": 5979 - }, - { - "epoch": 0.719052486021764, - "grad_norm": 0.8249794220525444, - "learning_rate": 7.722541136727343e-07, - "loss": 0.8423, - "step": 5980 - }, - { - "epoch": 0.719172728912403, - "grad_norm": 1.8218459845141364, - "learning_rate": 7.716392828196483e-07, - "loss": 1.0404, - "step": 5981 - }, - { - "epoch": 0.7192929718030422, - "grad_norm": 2.2175933616965433, - "learning_rate": 7.710246383079064e-07, - "loss": 0.996, - "step": 5982 - }, - { - "epoch": 0.7194132146936812, - "grad_norm": 2.6595094317336927, - "learning_rate": 7.704101802307492e-07, - "loss": 1.1461, - "step": 5983 - }, - { - "epoch": 0.7195334575843203, - "grad_norm": 2.0595214606433667, - "learning_rate": 7.697959086813912e-07, - "loss": 1.1021, - "step": 5984 - }, - { - "epoch": 0.7196537004749595, - "grad_norm": 1.6644982139140085, - "learning_rate": 7.691818237530145e-07, - "loss": 1.0298, - "step": 5985 - }, - { - "epoch": 0.7197739433655985, - "grad_norm": 1.9895076903764757, - "learning_rate": 7.685679255387774e-07, - "loss": 1.0074, - "step": 5986 - }, - { - "epoch": 0.7198941862562376, - "grad_norm": 1.847767735801432, - "learning_rate": 7.679542141318065e-07, - "loss": 0.9991, - "step": 5987 - }, - { - "epoch": 0.7200144291468767, - "grad_norm": 1.7824311606662508, - "learning_rate": 7.673406896252013e-07, - "loss": 0.9985, - "step": 5988 - }, - { - "epoch": 0.7201346720375158, - "grad_norm": 1.513425587952432, - "learning_rate": 7.667273521120347e-07, - "loss": 1.0103, - "step": 5989 - }, - { - "epoch": 0.7202549149281549, - "grad_norm": 2.2843004498926227, - "learning_rate": 7.661142016853468e-07, - "loss": 1.0268, - "step": 5990 - }, - { - "epoch": 0.7203751578187939, - "grad_norm": 1.743888537738483, - "learning_rate": 7.655012384381543e-07, - "loss": 0.9773, - "step": 5991 - }, - { - "epoch": 0.7204954007094331, - "grad_norm": 1.7708959979769097, - "learning_rate": 7.648884624634415e-07, - "loss": 1.0438, - "step": 5992 - }, - { - "epoch": 0.7206156436000721, - "grad_norm": 2.5620925393287863, - "learning_rate": 7.642758738541683e-07, - "loss": 1.1141, - "step": 5993 - }, - { - "epoch": 0.7207358864907112, - "grad_norm": 0.7751384488387633, - "learning_rate": 7.636634727032621e-07, - "loss": 0.8508, - "step": 5994 - }, - { - "epoch": 0.7208561293813504, - "grad_norm": 2.1981283028723944, - "learning_rate": 7.630512591036231e-07, - "loss": 1.0256, - "step": 5995 - }, - { - "epoch": 0.7209763722719894, - "grad_norm": 2.3063117042288512, - "learning_rate": 7.624392331481255e-07, - "loss": 0.8764, - "step": 5996 - }, - { - "epoch": 0.7210966151626285, - "grad_norm": 0.990251258841593, - "learning_rate": 7.618273949296115e-07, - "loss": 0.7739, - "step": 5997 - }, - { - "epoch": 0.7212168580532676, - "grad_norm": 1.8646760913927565, - "learning_rate": 7.612157445408987e-07, - "loss": 0.914, - "step": 5998 - }, - { - "epoch": 0.7213371009439067, - "grad_norm": 2.6136432391291113, - "learning_rate": 7.606042820747716e-07, - "loss": 0.975, - "step": 5999 - }, - { - "epoch": 0.7214573438345457, - "grad_norm": 1.8700752432877832, - "learning_rate": 7.599930076239889e-07, - "loss": 1.0805, - "step": 6000 - }, - { - "epoch": 0.7215775867251849, - "grad_norm": 2.297597862209983, - "learning_rate": 7.593819212812818e-07, - "loss": 0.9421, - "step": 6001 - }, - { - "epoch": 0.721697829615824, - "grad_norm": 2.3133318667814824, - "learning_rate": 7.587710231393508e-07, - "loss": 0.9535, - "step": 6002 - }, - { - "epoch": 0.721818072506463, - "grad_norm": 2.016406477019553, - "learning_rate": 7.581603132908685e-07, - "loss": 1.0656, - "step": 6003 - }, - { - "epoch": 0.7219383153971022, - "grad_norm": 1.7352046872196583, - "learning_rate": 7.575497918284795e-07, - "loss": 1.0048, - "step": 6004 - }, - { - "epoch": 0.7220585582877412, - "grad_norm": 2.2106596971010077, - "learning_rate": 7.569394588447984e-07, - "loss": 0.9772, - "step": 6005 - }, - { - "epoch": 0.7221788011783803, - "grad_norm": 6.277928627174762, - "learning_rate": 7.563293144324146e-07, - "loss": 1.0018, - "step": 6006 - }, - { - "epoch": 0.7222990440690195, - "grad_norm": 1.7786764325646371, - "learning_rate": 7.557193586838834e-07, - "loss": 1.0302, - "step": 6007 - }, - { - "epoch": 0.7224192869596585, - "grad_norm": 2.348324393405868, - "learning_rate": 7.551095916917371e-07, - "loss": 0.9369, - "step": 6008 - }, - { - "epoch": 0.7225395298502976, - "grad_norm": 2.270071677702164, - "learning_rate": 7.545000135484758e-07, - "loss": 0.8885, - "step": 6009 - }, - { - "epoch": 0.7226597727409367, - "grad_norm": 2.123952270718675, - "learning_rate": 7.538906243465714e-07, - "loss": 0.8624, - "step": 6010 - }, - { - "epoch": 0.7227800156315758, - "grad_norm": 1.9130775252853909, - "learning_rate": 7.5328142417847e-07, - "loss": 1.012, - "step": 6011 - }, - { - "epoch": 0.7229002585222148, - "grad_norm": 3.4543296128244108, - "learning_rate": 7.526724131365838e-07, - "loss": 0.9215, - "step": 6012 - }, - { - "epoch": 0.723020501412854, - "grad_norm": 1.9205018387050512, - "learning_rate": 7.520635913133017e-07, - "loss": 0.931, - "step": 6013 - }, - { - "epoch": 0.7231407443034931, - "grad_norm": 3.0228971249776664, - "learning_rate": 7.514549588009798e-07, - "loss": 1.0498, - "step": 6014 - }, - { - "epoch": 0.7232609871941321, - "grad_norm": 9.021949704861516, - "learning_rate": 7.508465156919492e-07, - "loss": 0.9442, - "step": 6015 - }, - { - "epoch": 0.7233812300847713, - "grad_norm": 3.6487882719159637, - "learning_rate": 7.502382620785083e-07, - "loss": 0.8527, - "step": 6016 - }, - { - "epoch": 0.7235014729754103, - "grad_norm": 0.925170181702969, - "learning_rate": 7.496301980529289e-07, - "loss": 0.9234, - "step": 6017 - }, - { - "epoch": 0.7236217158660494, - "grad_norm": 1.8495325477378335, - "learning_rate": 7.490223237074547e-07, - "loss": 0.9783, - "step": 6018 - }, - { - "epoch": 0.7237419587566886, - "grad_norm": 1.9019073675123555, - "learning_rate": 7.484146391342989e-07, - "loss": 0.8829, - "step": 6019 - }, - { - "epoch": 0.7238622016473276, - "grad_norm": 2.190326934867031, - "learning_rate": 7.478071444256484e-07, - "loss": 0.7965, - "step": 6020 - }, - { - "epoch": 0.7239824445379667, - "grad_norm": 2.269423674300716, - "learning_rate": 7.471998396736579e-07, - "loss": 1.0132, - "step": 6021 - }, - { - "epoch": 0.7241026874286057, - "grad_norm": 1.7763409478932335, - "learning_rate": 7.465927249704549e-07, - "loss": 0.9848, - "step": 6022 - }, - { - "epoch": 0.7242229303192449, - "grad_norm": 1.7100057634178434, - "learning_rate": 7.459858004081398e-07, - "loss": 0.9987, - "step": 6023 - }, - { - "epoch": 0.724343173209884, - "grad_norm": 0.6647437472054748, - "learning_rate": 7.453790660787815e-07, - "loss": 0.8318, - "step": 6024 - }, - { - "epoch": 0.724463416100523, - "grad_norm": 2.0505737199681873, - "learning_rate": 7.447725220744214e-07, - "loss": 0.8699, - "step": 6025 - }, - { - "epoch": 0.7245836589911622, - "grad_norm": 2.3048656330126223, - "learning_rate": 7.441661684870717e-07, - "loss": 0.9873, - "step": 6026 - }, - { - "epoch": 0.7247039018818012, - "grad_norm": 1.8103307699963713, - "learning_rate": 7.435600054087152e-07, - "loss": 1.0458, - "step": 6027 - }, - { - "epoch": 0.7248241447724403, - "grad_norm": 2.004106240210833, - "learning_rate": 7.42954032931308e-07, - "loss": 0.9753, - "step": 6028 - }, - { - "epoch": 0.7249443876630794, - "grad_norm": 1.6776962897670848, - "learning_rate": 7.423482511467733e-07, - "loss": 0.9706, - "step": 6029 - }, - { - "epoch": 0.7250646305537185, - "grad_norm": 2.6748887404361623, - "learning_rate": 7.417426601470099e-07, - "loss": 0.881, - "step": 6030 - }, - { - "epoch": 0.7251848734443576, - "grad_norm": 3.970392563097828, - "learning_rate": 7.411372600238841e-07, - "loss": 1.0071, - "step": 6031 - }, - { - "epoch": 0.7253051163349967, - "grad_norm": 4.114952103068379, - "learning_rate": 7.405320508692346e-07, - "loss": 0.9663, - "step": 6032 - }, - { - "epoch": 0.7254253592256358, - "grad_norm": 1.8825811980062883, - "learning_rate": 7.399270327748727e-07, - "loss": 0.9827, - "step": 6033 - }, - { - "epoch": 0.7255456021162748, - "grad_norm": 1.7428596465854393, - "learning_rate": 7.39322205832577e-07, - "loss": 0.9733, - "step": 6034 - }, - { - "epoch": 0.725665845006914, - "grad_norm": 1.8892649653297475, - "learning_rate": 7.387175701341009e-07, - "loss": 1.0325, - "step": 6035 - }, - { - "epoch": 0.7257860878975531, - "grad_norm": 2.5959521217986, - "learning_rate": 7.381131257711659e-07, - "loss": 0.9572, - "step": 6036 - }, - { - "epoch": 0.7259063307881921, - "grad_norm": 1.8064758555564406, - "learning_rate": 7.375088728354677e-07, - "loss": 1.0638, - "step": 6037 - }, - { - "epoch": 0.7260265736788313, - "grad_norm": 1.925850207349648, - "learning_rate": 7.369048114186691e-07, - "loss": 0.9014, - "step": 6038 - }, - { - "epoch": 0.7261468165694703, - "grad_norm": 18.452609837485134, - "learning_rate": 7.363009416124055e-07, - "loss": 1.0621, - "step": 6039 - }, - { - "epoch": 0.7262670594601094, - "grad_norm": 2.5805340233451854, - "learning_rate": 7.356972635082852e-07, - "loss": 0.8663, - "step": 6040 - }, - { - "epoch": 0.7263873023507486, - "grad_norm": 1.940805384411619, - "learning_rate": 7.35093777197884e-07, - "loss": 0.9797, - "step": 6041 - }, - { - "epoch": 0.7265075452413876, - "grad_norm": 2.5171098769359195, - "learning_rate": 7.344904827727525e-07, - "loss": 1.0763, - "step": 6042 - }, - { - "epoch": 0.7266277881320267, - "grad_norm": 3.217657107390747, - "learning_rate": 7.338873803244076e-07, - "loss": 0.966, - "step": 6043 - }, - { - "epoch": 0.7267480310226658, - "grad_norm": 1.7276341491373053, - "learning_rate": 7.332844699443401e-07, - "loss": 1.0287, - "step": 6044 - }, - { - "epoch": 0.7268682739133049, - "grad_norm": 1.8090882912551949, - "learning_rate": 7.326817517240121e-07, - "loss": 0.9785, - "step": 6045 - }, - { - "epoch": 0.7269885168039439, - "grad_norm": 1.7153942293422078, - "learning_rate": 7.320792257548545e-07, - "loss": 1.0677, - "step": 6046 - }, - { - "epoch": 0.7271087596945831, - "grad_norm": 1.9298823331630186, - "learning_rate": 7.314768921282704e-07, - "loss": 0.9919, - "step": 6047 - }, - { - "epoch": 0.7272290025852222, - "grad_norm": 2.329871629772392, - "learning_rate": 7.30874750935633e-07, - "loss": 0.9414, - "step": 6048 - }, - { - "epoch": 0.7273492454758612, - "grad_norm": 1.7994385055460216, - "learning_rate": 7.30272802268286e-07, - "loss": 1.0174, - "step": 6049 - }, - { - "epoch": 0.7274694883665004, - "grad_norm": 1.6915368432777744, - "learning_rate": 7.29671046217547e-07, - "loss": 0.9937, - "step": 6050 - }, - { - "epoch": 0.7275897312571394, - "grad_norm": 2.9847261711541413, - "learning_rate": 7.290694828746988e-07, - "loss": 1.05, - "step": 6051 - }, - { - "epoch": 0.7277099741477785, - "grad_norm": 1.7223217005695206, - "learning_rate": 7.284681123310004e-07, - "loss": 1.0869, - "step": 6052 - }, - { - "epoch": 0.7278302170384175, - "grad_norm": 1.8335840602857654, - "learning_rate": 7.27866934677678e-07, - "loss": 1.022, - "step": 6053 - }, - { - "epoch": 0.7279504599290567, - "grad_norm": 1.5530234781469137, - "learning_rate": 7.272659500059297e-07, - "loss": 1.0071, - "step": 6054 - }, - { - "epoch": 0.7280707028196958, - "grad_norm": 2.0868503130617624, - "learning_rate": 7.266651584069264e-07, - "loss": 1.0348, - "step": 6055 - }, - { - "epoch": 0.7281909457103348, - "grad_norm": 4.448672561678951, - "learning_rate": 7.260645599718045e-07, - "loss": 0.8044, - "step": 6056 - }, - { - "epoch": 0.728311188600974, - "grad_norm": 10.644364247543157, - "learning_rate": 7.254641547916767e-07, - "loss": 0.9037, - "step": 6057 - }, - { - "epoch": 0.728431431491613, - "grad_norm": 4.322655923479, - "learning_rate": 7.248639429576226e-07, - "loss": 0.9221, - "step": 6058 - }, - { - "epoch": 0.7285516743822521, - "grad_norm": 1.6475330058888544, - "learning_rate": 7.242639245606959e-07, - "loss": 0.9569, - "step": 6059 - }, - { - "epoch": 0.7286719172728913, - "grad_norm": 1.9760208895617721, - "learning_rate": 7.236640996919168e-07, - "loss": 1.0572, - "step": 6060 - }, - { - "epoch": 0.7287921601635303, - "grad_norm": 1.9931754261909647, - "learning_rate": 7.230644684422782e-07, - "loss": 0.9361, - "step": 6061 - }, - { - "epoch": 0.7289124030541694, - "grad_norm": 1.7948045162981519, - "learning_rate": 7.224650309027451e-07, - "loss": 1.0479, - "step": 6062 - }, - { - "epoch": 0.7290326459448085, - "grad_norm": 1.6480065094208165, - "learning_rate": 7.218657871642506e-07, - "loss": 0.9115, - "step": 6063 - }, - { - "epoch": 0.7291528888354476, - "grad_norm": 2.100908709511359, - "learning_rate": 7.212667373177012e-07, - "loss": 0.8459, - "step": 6064 - }, - { - "epoch": 0.7292731317260867, - "grad_norm": 2.411799385445391, - "learning_rate": 7.206678814539704e-07, - "loss": 0.9884, - "step": 6065 - }, - { - "epoch": 0.7293933746167258, - "grad_norm": 1.5180310686623626, - "learning_rate": 7.20069219663904e-07, - "loss": 0.957, - "step": 6066 - }, - { - "epoch": 0.7295136175073649, - "grad_norm": 2.6159488308886387, - "learning_rate": 7.1947075203832e-07, - "loss": 1.023, - "step": 6067 - }, - { - "epoch": 0.7296338603980039, - "grad_norm": 0.9590026791567964, - "learning_rate": 7.188724786680049e-07, - "loss": 0.8461, - "step": 6068 - }, - { - "epoch": 0.7297541032886431, - "grad_norm": 1.6166459065838041, - "learning_rate": 7.182743996437162e-07, - "loss": 0.9768, - "step": 6069 - }, - { - "epoch": 0.7298743461792822, - "grad_norm": 2.0309031263695245, - "learning_rate": 7.176765150561819e-07, - "loss": 0.9147, - "step": 6070 - }, - { - "epoch": 0.7299945890699212, - "grad_norm": 2.857903106904961, - "learning_rate": 7.170788249961002e-07, - "loss": 1.0227, - "step": 6071 - }, - { - "epoch": 0.7301148319605604, - "grad_norm": 2.670688477236506, - "learning_rate": 7.164813295541418e-07, - "loss": 1.1122, - "step": 6072 - }, - { - "epoch": 0.7302350748511994, - "grad_norm": 1.873185360495696, - "learning_rate": 7.15884028820944e-07, - "loss": 0.9248, - "step": 6073 - }, - { - "epoch": 0.7303553177418385, - "grad_norm": 2.17843252717445, - "learning_rate": 7.152869228871185e-07, - "loss": 0.8312, - "step": 6074 - }, - { - "epoch": 0.7304755606324776, - "grad_norm": 1.5453114940613037, - "learning_rate": 7.146900118432457e-07, - "loss": 0.9511, - "step": 6075 - }, - { - "epoch": 0.7305958035231167, - "grad_norm": 1.565339064994296, - "learning_rate": 7.140932957798753e-07, - "loss": 1.0834, - "step": 6076 - }, - { - "epoch": 0.7307160464137558, - "grad_norm": 3.3968443194480384, - "learning_rate": 7.134967747875309e-07, - "loss": 0.943, - "step": 6077 - }, - { - "epoch": 0.7308362893043949, - "grad_norm": 1.8170585527532983, - "learning_rate": 7.129004489567014e-07, - "loss": 1.0476, - "step": 6078 - }, - { - "epoch": 0.730956532195034, - "grad_norm": 2.0139949624491993, - "learning_rate": 7.123043183778512e-07, - "loss": 1.0123, - "step": 6079 - }, - { - "epoch": 0.731076775085673, - "grad_norm": 1.4682886738359613, - "learning_rate": 7.117083831414114e-07, - "loss": 0.8753, - "step": 6080 - }, - { - "epoch": 0.7311970179763122, - "grad_norm": 1.938800190928304, - "learning_rate": 7.11112643337787e-07, - "loss": 0.9339, - "step": 6081 - }, - { - "epoch": 0.7313172608669513, - "grad_norm": 2.826968130314605, - "learning_rate": 7.10517099057349e-07, - "loss": 0.9956, - "step": 6082 - }, - { - "epoch": 0.7314375037575903, - "grad_norm": 3.4511779543310914, - "learning_rate": 7.099217503904411e-07, - "loss": 0.8451, - "step": 6083 - }, - { - "epoch": 0.7315577466482295, - "grad_norm": 1.7768422519646192, - "learning_rate": 7.093265974273788e-07, - "loss": 1.1341, - "step": 6084 - }, - { - "epoch": 0.7316779895388685, - "grad_norm": 1.647391977545871, - "learning_rate": 7.087316402584447e-07, - "loss": 0.9503, - "step": 6085 - }, - { - "epoch": 0.7317982324295076, - "grad_norm": 1.83828870493767, - "learning_rate": 7.081368789738953e-07, - "loss": 1.086, - "step": 6086 - }, - { - "epoch": 0.7319184753201466, - "grad_norm": 1.9714680634170167, - "learning_rate": 7.075423136639537e-07, - "loss": 1.0, - "step": 6087 - }, - { - "epoch": 0.7320387182107858, - "grad_norm": 1.6025923232503692, - "learning_rate": 7.069479444188149e-07, - "loss": 0.975, - "step": 6088 - }, - { - "epoch": 0.7321589611014249, - "grad_norm": 1.6704653920639523, - "learning_rate": 7.063537713286453e-07, - "loss": 1.0589, - "step": 6089 - }, - { - "epoch": 0.7322792039920639, - "grad_norm": 2.8421285848608786, - "learning_rate": 7.057597944835803e-07, - "loss": 1.0365, - "step": 6090 - }, - { - "epoch": 0.7323994468827031, - "grad_norm": 1.9199703051337056, - "learning_rate": 7.051660139737253e-07, - "loss": 0.9756, - "step": 6091 - }, - { - "epoch": 0.7325196897733421, - "grad_norm": 2.1985675396447104, - "learning_rate": 7.045724298891565e-07, - "loss": 0.9873, - "step": 6092 - }, - { - "epoch": 0.7326399326639812, - "grad_norm": 2.4758407551917307, - "learning_rate": 7.039790423199192e-07, - "loss": 0.9265, - "step": 6093 - }, - { - "epoch": 0.7327601755546204, - "grad_norm": 2.077624956359918, - "learning_rate": 7.033858513560322e-07, - "loss": 1.0107, - "step": 6094 - }, - { - "epoch": 0.7328804184452594, - "grad_norm": 4.277946144082453, - "learning_rate": 7.027928570874794e-07, - "loss": 0.999, - "step": 6095 - }, - { - "epoch": 0.7330006613358985, - "grad_norm": 1.8843422527852296, - "learning_rate": 7.022000596042194e-07, - "loss": 1.0756, - "step": 6096 - }, - { - "epoch": 0.7331209042265376, - "grad_norm": 2.558122384717524, - "learning_rate": 7.016074589961784e-07, - "loss": 1.0532, - "step": 6097 - }, - { - "epoch": 0.7332411471171767, - "grad_norm": 1.6482898230189145, - "learning_rate": 7.01015055353253e-07, - "loss": 0.8937, - "step": 6098 - }, - { - "epoch": 0.7333613900078157, - "grad_norm": 2.539895727915873, - "learning_rate": 7.004228487653123e-07, - "loss": 1.003, - "step": 6099 - }, - { - "epoch": 0.7334816328984549, - "grad_norm": 1.7316111632047415, - "learning_rate": 6.998308393221906e-07, - "loss": 1.0089, - "step": 6100 - }, - { - "epoch": 0.733601875789094, - "grad_norm": 2.5868568284474134, - "learning_rate": 6.992390271136977e-07, - "loss": 0.9443, - "step": 6101 - }, - { - "epoch": 0.733722118679733, - "grad_norm": 1.7899899140398814, - "learning_rate": 6.986474122296094e-07, - "loss": 1.0911, - "step": 6102 - }, - { - "epoch": 0.7338423615703722, - "grad_norm": 2.158862977111533, - "learning_rate": 6.980559947596751e-07, - "loss": 0.9575, - "step": 6103 - }, - { - "epoch": 0.7339626044610112, - "grad_norm": 9.221594006488672, - "learning_rate": 6.974647747936109e-07, - "loss": 0.989, - "step": 6104 - }, - { - "epoch": 0.7340828473516503, - "grad_norm": 2.789160633230698, - "learning_rate": 6.968737524211039e-07, - "loss": 1.0496, - "step": 6105 - }, - { - "epoch": 0.7342030902422895, - "grad_norm": 2.347842597864977, - "learning_rate": 6.962829277318132e-07, - "loss": 1.0317, - "step": 6106 - }, - { - "epoch": 0.7343233331329285, - "grad_norm": 1.832641044590697, - "learning_rate": 6.956923008153652e-07, - "loss": 1.0591, - "step": 6107 - }, - { - "epoch": 0.7344435760235676, - "grad_norm": 2.0041311037499683, - "learning_rate": 6.951018717613593e-07, - "loss": 1.07, - "step": 6108 - }, - { - "epoch": 0.7345638189142067, - "grad_norm": 2.1771514249188932, - "learning_rate": 6.945116406593614e-07, - "loss": 1.0079, - "step": 6109 - }, - { - "epoch": 0.7346840618048458, - "grad_norm": 2.143293179699009, - "learning_rate": 6.939216075989089e-07, - "loss": 0.9714, - "step": 6110 - }, - { - "epoch": 0.7348043046954849, - "grad_norm": 2.4882429230760974, - "learning_rate": 6.933317726695109e-07, - "loss": 0.8905, - "step": 6111 - }, - { - "epoch": 0.734924547586124, - "grad_norm": 3.1455867914933426, - "learning_rate": 6.92742135960644e-07, - "loss": 1.023, - "step": 6112 - }, - { - "epoch": 0.7350447904767631, - "grad_norm": 0.8712473849307949, - "learning_rate": 6.921526975617556e-07, - "loss": 0.8299, - "step": 6113 - }, - { - "epoch": 0.7351650333674021, - "grad_norm": 1.6921963509198714, - "learning_rate": 6.915634575622631e-07, - "loss": 0.9724, - "step": 6114 - }, - { - "epoch": 0.7352852762580413, - "grad_norm": 1.9249489244893487, - "learning_rate": 6.909744160515532e-07, - "loss": 0.9447, - "step": 6115 - }, - { - "epoch": 0.7354055191486804, - "grad_norm": 2.2719274795536664, - "learning_rate": 6.903855731189849e-07, - "loss": 0.9182, - "step": 6116 - }, - { - "epoch": 0.7355257620393194, - "grad_norm": 2.2194911095152614, - "learning_rate": 6.897969288538825e-07, - "loss": 1.0485, - "step": 6117 - }, - { - "epoch": 0.7356460049299585, - "grad_norm": 1.7014870422838413, - "learning_rate": 6.892084833455452e-07, - "loss": 1.0436, - "step": 6118 - }, - { - "epoch": 0.7357662478205976, - "grad_norm": 1.4379692970149658, - "learning_rate": 6.886202366832384e-07, - "loss": 1.0733, - "step": 6119 - }, - { - "epoch": 0.7358864907112367, - "grad_norm": 1.6086767959834403, - "learning_rate": 6.880321889561987e-07, - "loss": 0.9677, - "step": 6120 - }, - { - "epoch": 0.7360067336018757, - "grad_norm": 2.136345588416006, - "learning_rate": 6.874443402536338e-07, - "loss": 0.8908, - "step": 6121 - }, - { - "epoch": 0.7361269764925149, - "grad_norm": 1.502998137325064, - "learning_rate": 6.868566906647177e-07, - "loss": 1.0322, - "step": 6122 - }, - { - "epoch": 0.736247219383154, - "grad_norm": 2.9581246817351072, - "learning_rate": 6.862692402785984e-07, - "loss": 1.0553, - "step": 6123 - }, - { - "epoch": 0.736367462273793, - "grad_norm": 0.6908826623529334, - "learning_rate": 6.856819891843899e-07, - "loss": 0.7464, - "step": 6124 - }, - { - "epoch": 0.7364877051644322, - "grad_norm": 1.9003589466744024, - "learning_rate": 6.8509493747118e-07, - "loss": 0.9517, - "step": 6125 - }, - { - "epoch": 0.7366079480550712, - "grad_norm": 2.1270933499101954, - "learning_rate": 6.845080852280221e-07, - "loss": 1.1115, - "step": 6126 - }, - { - "epoch": 0.7367281909457103, - "grad_norm": 1.5946732633038554, - "learning_rate": 6.839214325439409e-07, - "loss": 0.9774, - "step": 6127 - }, - { - "epoch": 0.7368484338363495, - "grad_norm": 6.304629414230566, - "learning_rate": 6.833349795079327e-07, - "loss": 0.9484, - "step": 6128 - }, - { - "epoch": 0.7369686767269885, - "grad_norm": 1.6941865618817107, - "learning_rate": 6.827487262089613e-07, - "loss": 0.9117, - "step": 6129 - }, - { - "epoch": 0.7370889196176276, - "grad_norm": 7.210651826004316, - "learning_rate": 6.821626727359606e-07, - "loss": 0.831, - "step": 6130 - }, - { - "epoch": 0.7372091625082667, - "grad_norm": 2.620072508383734, - "learning_rate": 6.815768191778348e-07, - "loss": 1.0082, - "step": 6131 - }, - { - "epoch": 0.7373294053989058, - "grad_norm": 1.9568683609778263, - "learning_rate": 6.809911656234569e-07, - "loss": 0.9622, - "step": 6132 - }, - { - "epoch": 0.7374496482895448, - "grad_norm": 2.3743659729169697, - "learning_rate": 6.804057121616707e-07, - "loss": 1.0174, - "step": 6133 - }, - { - "epoch": 0.737569891180184, - "grad_norm": 2.1608247739968314, - "learning_rate": 6.798204588812888e-07, - "loss": 0.9548, - "step": 6134 - }, - { - "epoch": 0.7376901340708231, - "grad_norm": 2.038125476901441, - "learning_rate": 6.792354058710937e-07, - "loss": 0.9784, - "step": 6135 - }, - { - "epoch": 0.7378103769614621, - "grad_norm": 1.987508747393474, - "learning_rate": 6.786505532198374e-07, - "loss": 0.8874, - "step": 6136 - }, - { - "epoch": 0.7379306198521013, - "grad_norm": 2.485379957475319, - "learning_rate": 6.780659010162411e-07, - "loss": 1.0836, - "step": 6137 - }, - { - "epoch": 0.7380508627427403, - "grad_norm": 1.892356584020521, - "learning_rate": 6.774814493489975e-07, - "loss": 1.0617, - "step": 6138 - }, - { - "epoch": 0.7381711056333794, - "grad_norm": 1.6722790951666375, - "learning_rate": 6.768971983067655e-07, - "loss": 0.8967, - "step": 6139 - }, - { - "epoch": 0.7382913485240186, - "grad_norm": 1.034165682171013, - "learning_rate": 6.763131479781772e-07, - "loss": 0.9352, - "step": 6140 - }, - { - "epoch": 0.7384115914146576, - "grad_norm": 2.1248716427801035, - "learning_rate": 6.757292984518316e-07, - "loss": 1.0044, - "step": 6141 - }, - { - "epoch": 0.7385318343052967, - "grad_norm": 0.8485649968520967, - "learning_rate": 6.751456498162981e-07, - "loss": 0.8455, - "step": 6142 - }, - { - "epoch": 0.7386520771959358, - "grad_norm": 1.8864141814697781, - "learning_rate": 6.745622021601174e-07, - "loss": 1.0866, - "step": 6143 - }, - { - "epoch": 0.7387723200865749, - "grad_norm": 2.460207585671076, - "learning_rate": 6.739789555717954e-07, - "loss": 0.9351, - "step": 6144 - }, - { - "epoch": 0.738892562977214, - "grad_norm": 2.0765698572721667, - "learning_rate": 6.733959101398124e-07, - "loss": 1.0135, - "step": 6145 - }, - { - "epoch": 0.7390128058678531, - "grad_norm": 1.7362899518617376, - "learning_rate": 6.728130659526143e-07, - "loss": 1.0435, - "step": 6146 - }, - { - "epoch": 0.7391330487584922, - "grad_norm": 2.640728559099829, - "learning_rate": 6.7223042309862e-07, - "loss": 0.9312, - "step": 6147 - }, - { - "epoch": 0.7392532916491312, - "grad_norm": 1.9715986683917448, - "learning_rate": 6.716479816662144e-07, - "loss": 0.9563, - "step": 6148 - }, - { - "epoch": 0.7393735345397703, - "grad_norm": 2.697934321243867, - "learning_rate": 6.710657417437531e-07, - "loss": 0.9601, - "step": 6149 - }, - { - "epoch": 0.7394937774304094, - "grad_norm": 2.1088314910320265, - "learning_rate": 6.704837034195628e-07, - "loss": 1.0195, - "step": 6150 - }, - { - "epoch": 0.7396140203210485, - "grad_norm": 1.620295978634807, - "learning_rate": 6.699018667819376e-07, - "loss": 1.0807, - "step": 6151 - }, - { - "epoch": 0.7397342632116876, - "grad_norm": 1.5452385926195111, - "learning_rate": 6.693202319191415e-07, - "loss": 0.9557, - "step": 6152 - }, - { - "epoch": 0.7398545061023267, - "grad_norm": 2.22358456284115, - "learning_rate": 6.687387989194084e-07, - "loss": 0.9664, - "step": 6153 - }, - { - "epoch": 0.7399747489929658, - "grad_norm": 2.4235600669851185, - "learning_rate": 6.681575678709404e-07, - "loss": 1.0235, - "step": 6154 - }, - { - "epoch": 0.7400949918836048, - "grad_norm": 2.4544567210923343, - "learning_rate": 6.67576538861911e-07, - "loss": 0.9369, - "step": 6155 - }, - { - "epoch": 0.740215234774244, - "grad_norm": 1.4261239254031537, - "learning_rate": 6.669957119804612e-07, - "loss": 1.0549, - "step": 6156 - }, - { - "epoch": 0.7403354776648831, - "grad_norm": 2.776370708343925, - "learning_rate": 6.66415087314702e-07, - "loss": 0.942, - "step": 6157 - }, - { - "epoch": 0.7404557205555221, - "grad_norm": 1.9391069823641358, - "learning_rate": 6.65834664952714e-07, - "loss": 0.9544, - "step": 6158 - }, - { - "epoch": 0.7405759634461613, - "grad_norm": 1.4774973498480406, - "learning_rate": 6.652544449825457e-07, - "loss": 0.9847, - "step": 6159 - }, - { - "epoch": 0.7406962063368003, - "grad_norm": 1.5264221689153847, - "learning_rate": 6.646744274922182e-07, - "loss": 0.9923, - "step": 6160 - }, - { - "epoch": 0.7408164492274394, - "grad_norm": 3.552301767443873, - "learning_rate": 6.640946125697171e-07, - "loss": 0.9925, - "step": 6161 - }, - { - "epoch": 0.7409366921180786, - "grad_norm": 2.2864385674968424, - "learning_rate": 6.635150003030017e-07, - "loss": 0.9889, - "step": 6162 - }, - { - "epoch": 0.7410569350087176, - "grad_norm": 3.6948537400635257, - "learning_rate": 6.629355907799981e-07, - "loss": 1.0837, - "step": 6163 - }, - { - "epoch": 0.7411771778993567, - "grad_norm": 2.068153493574846, - "learning_rate": 6.623563840886015e-07, - "loss": 0.9314, - "step": 6164 - }, - { - "epoch": 0.7412974207899958, - "grad_norm": 1.8211379246731927, - "learning_rate": 6.617773803166795e-07, - "loss": 0.9287, - "step": 6165 - }, - { - "epoch": 0.7414176636806349, - "grad_norm": 2.0453168456612065, - "learning_rate": 6.611985795520634e-07, - "loss": 1.0462, - "step": 6166 - }, - { - "epoch": 0.7415379065712739, - "grad_norm": 2.899906906738391, - "learning_rate": 6.606199818825588e-07, - "loss": 0.9938, - "step": 6167 - }, - { - "epoch": 0.7416581494619131, - "grad_norm": 2.1037732376565055, - "learning_rate": 6.600415873959377e-07, - "loss": 1.0417, - "step": 6168 - }, - { - "epoch": 0.7417783923525522, - "grad_norm": 1.9297665722665787, - "learning_rate": 6.594633961799437e-07, - "loss": 0.8787, - "step": 6169 - }, - { - "epoch": 0.7418986352431912, - "grad_norm": 1.7710348650377612, - "learning_rate": 6.588854083222857e-07, - "loss": 1.0508, - "step": 6170 - }, - { - "epoch": 0.7420188781338304, - "grad_norm": 2.028603223190748, - "learning_rate": 6.583076239106444e-07, - "loss": 1.0335, - "step": 6171 - }, - { - "epoch": 0.7421391210244694, - "grad_norm": 3.5144461456701013, - "learning_rate": 6.577300430326707e-07, - "loss": 0.9849, - "step": 6172 - }, - { - "epoch": 0.7422593639151085, - "grad_norm": 1.923854521749426, - "learning_rate": 6.571526657759821e-07, - "loss": 0.9477, - "step": 6173 - }, - { - "epoch": 0.7423796068057477, - "grad_norm": 1.5622171527439042, - "learning_rate": 6.565754922281663e-07, - "loss": 0.943, - "step": 6174 - }, - { - "epoch": 0.7424998496963867, - "grad_norm": 2.0318304552999673, - "learning_rate": 6.559985224767801e-07, - "loss": 1.0155, - "step": 6175 - }, - { - "epoch": 0.7426200925870258, - "grad_norm": 2.1696370622040573, - "learning_rate": 6.55421756609349e-07, - "loss": 0.9807, - "step": 6176 - }, - { - "epoch": 0.7427403354776649, - "grad_norm": 1.7070718930641438, - "learning_rate": 6.54845194713369e-07, - "loss": 1.0152, - "step": 6177 - }, - { - "epoch": 0.742860578368304, - "grad_norm": 2.1899606216931504, - "learning_rate": 6.542688368763034e-07, - "loss": 1.0355, - "step": 6178 - }, - { - "epoch": 0.742980821258943, - "grad_norm": 2.1394949944623254, - "learning_rate": 6.536926831855854e-07, - "loss": 1.0014, - "step": 6179 - }, - { - "epoch": 0.7431010641495821, - "grad_norm": 2.6861077184260114, - "learning_rate": 6.531167337286165e-07, - "loss": 0.9588, - "step": 6180 - }, - { - "epoch": 0.7432213070402213, - "grad_norm": 1.4552949679815421, - "learning_rate": 6.52540988592768e-07, - "loss": 1.0224, - "step": 6181 - }, - { - "epoch": 0.7433415499308603, - "grad_norm": 2.1796381681595456, - "learning_rate": 6.519654478653814e-07, - "loss": 1.0635, - "step": 6182 - }, - { - "epoch": 0.7434617928214994, - "grad_norm": 0.9312245781520031, - "learning_rate": 6.51390111633763e-07, - "loss": 0.8105, - "step": 6183 - }, - { - "epoch": 0.7435820357121385, - "grad_norm": 1.6952009735691502, - "learning_rate": 6.508149799851932e-07, - "loss": 0.9921, - "step": 6184 - }, - { - "epoch": 0.7437022786027776, - "grad_norm": 2.063923084711073, - "learning_rate": 6.502400530069183e-07, - "loss": 0.8468, - "step": 6185 - }, - { - "epoch": 0.7438225214934167, - "grad_norm": 2.193930846147668, - "learning_rate": 6.496653307861535e-07, - "loss": 0.9162, - "step": 6186 - }, - { - "epoch": 0.7439427643840558, - "grad_norm": 1.8643832037166885, - "learning_rate": 6.490908134100857e-07, - "loss": 0.8854, - "step": 6187 - }, - { - "epoch": 0.7440630072746949, - "grad_norm": 2.0247611057981576, - "learning_rate": 6.48516500965866e-07, - "loss": 0.9314, - "step": 6188 - }, - { - "epoch": 0.7441832501653339, - "grad_norm": 1.8796225469184704, - "learning_rate": 6.479423935406192e-07, - "loss": 1.0449, - "step": 6189 - }, - { - "epoch": 0.7443034930559731, - "grad_norm": 0.9122163762915737, - "learning_rate": 6.473684912214357e-07, - "loss": 0.94, - "step": 6190 - }, - { - "epoch": 0.7444237359466122, - "grad_norm": 1.8862464582183733, - "learning_rate": 6.467947940953778e-07, - "loss": 0.9295, - "step": 6191 - }, - { - "epoch": 0.7445439788372512, - "grad_norm": 1.6720220198116733, - "learning_rate": 6.462213022494732e-07, - "loss": 0.9564, - "step": 6192 - }, - { - "epoch": 0.7446642217278904, - "grad_norm": 0.7978050563896835, - "learning_rate": 6.456480157707201e-07, - "loss": 0.8852, - "step": 6193 - }, - { - "epoch": 0.7447844646185294, - "grad_norm": 2.036643979975851, - "learning_rate": 6.450749347460866e-07, - "loss": 1.0827, - "step": 6194 - }, - { - "epoch": 0.7449047075091685, - "grad_norm": 1.6662310587327964, - "learning_rate": 6.445020592625083e-07, - "loss": 1.0194, - "step": 6195 - }, - { - "epoch": 0.7450249503998077, - "grad_norm": 2.736474326650532, - "learning_rate": 6.4392938940689e-07, - "loss": 1.039, - "step": 6196 - }, - { - "epoch": 0.7451451932904467, - "grad_norm": 2.155020100440082, - "learning_rate": 6.433569252661049e-07, - "loss": 0.9389, - "step": 6197 - }, - { - "epoch": 0.7452654361810858, - "grad_norm": 3.2642162149846854, - "learning_rate": 6.427846669269952e-07, - "loss": 0.9369, - "step": 6198 - }, - { - "epoch": 0.7453856790717249, - "grad_norm": 2.032037083647852, - "learning_rate": 6.422126144763729e-07, - "loss": 1.0515, - "step": 6199 - }, - { - "epoch": 0.745505921962364, - "grad_norm": 2.2713261923976766, - "learning_rate": 6.416407680010174e-07, - "loss": 1.0055, - "step": 6200 - }, - { - "epoch": 0.745626164853003, - "grad_norm": 2.756694015757805, - "learning_rate": 6.410691275876774e-07, - "loss": 1.043, - "step": 6201 - }, - { - "epoch": 0.7457464077436422, - "grad_norm": 2.4521588409215296, - "learning_rate": 6.404976933230704e-07, - "loss": 0.9903, - "step": 6202 - }, - { - "epoch": 0.7458666506342813, - "grad_norm": 2.3212393505404734, - "learning_rate": 6.399264652938813e-07, - "loss": 0.9612, - "step": 6203 - }, - { - "epoch": 0.7459868935249203, - "grad_norm": 1.9155820459027488, - "learning_rate": 6.393554435867679e-07, - "loss": 0.9736, - "step": 6204 - }, - { - "epoch": 0.7461071364155595, - "grad_norm": 2.1670496186480404, - "learning_rate": 6.387846282883502e-07, - "loss": 1.0623, - "step": 6205 - }, - { - "epoch": 0.7462273793061985, - "grad_norm": 3.3008399010136835, - "learning_rate": 6.38214019485223e-07, - "loss": 0.9998, - "step": 6206 - }, - { - "epoch": 0.7463476221968376, - "grad_norm": 3.956407232308905, - "learning_rate": 6.376436172639461e-07, - "loss": 0.9417, - "step": 6207 - }, - { - "epoch": 0.7464678650874768, - "grad_norm": 2.380774054434104, - "learning_rate": 6.370734217110487e-07, - "loss": 0.8715, - "step": 6208 - }, - { - "epoch": 0.7465881079781158, - "grad_norm": 1.350310434747276, - "learning_rate": 6.36503432913031e-07, - "loss": 0.8738, - "step": 6209 - }, - { - "epoch": 0.7467083508687549, - "grad_norm": 2.6803384329488735, - "learning_rate": 6.359336509563569e-07, - "loss": 0.9115, - "step": 6210 - }, - { - "epoch": 0.7468285937593939, - "grad_norm": 1.765960863965006, - "learning_rate": 6.353640759274641e-07, - "loss": 1.0383, - "step": 6211 - }, - { - "epoch": 0.7469488366500331, - "grad_norm": 2.593684092224334, - "learning_rate": 6.347947079127556e-07, - "loss": 0.9741, - "step": 6212 - }, - { - "epoch": 0.7470690795406721, - "grad_norm": 1.7668029096838958, - "learning_rate": 6.342255469986053e-07, - "loss": 0.9939, - "step": 6213 - }, - { - "epoch": 0.7471893224313112, - "grad_norm": 2.295246195500915, - "learning_rate": 6.336565932713533e-07, - "loss": 1.0056, - "step": 6214 - }, - { - "epoch": 0.7473095653219504, - "grad_norm": 1.7016898591830163, - "learning_rate": 6.330878468173088e-07, - "loss": 1.0105, - "step": 6215 - }, - { - "epoch": 0.7474298082125894, - "grad_norm": 1.6765572411844876, - "learning_rate": 6.32519307722752e-07, - "loss": 0.9623, - "step": 6216 - }, - { - "epoch": 0.7475500511032285, - "grad_norm": 0.7909504804748044, - "learning_rate": 6.31950976073929e-07, - "loss": 0.8123, - "step": 6217 - }, - { - "epoch": 0.7476702939938676, - "grad_norm": 2.539997480346151, - "learning_rate": 6.31382851957055e-07, - "loss": 1.0271, - "step": 6218 - }, - { - "epoch": 0.7477905368845067, - "grad_norm": 2.1066123673326165, - "learning_rate": 6.308149354583143e-07, - "loss": 0.9388, - "step": 6219 - }, - { - "epoch": 0.7479107797751458, - "grad_norm": 1.7828926409429195, - "learning_rate": 6.302472266638586e-07, - "loss": 1.0452, - "step": 6220 - }, - { - "epoch": 0.7480310226657849, - "grad_norm": 2.0841136488284198, - "learning_rate": 6.296797256598101e-07, - "loss": 0.9314, - "step": 6221 - }, - { - "epoch": 0.748151265556424, - "grad_norm": 1.6187720161030732, - "learning_rate": 6.291124325322576e-07, - "loss": 1.0353, - "step": 6222 - }, - { - "epoch": 0.748271508447063, - "grad_norm": 1.5796803279000762, - "learning_rate": 6.285453473672595e-07, - "loss": 0.8536, - "step": 6223 - }, - { - "epoch": 0.7483917513377022, - "grad_norm": 2.1775796741226485, - "learning_rate": 6.279784702508415e-07, - "loss": 0.9909, - "step": 6224 - }, - { - "epoch": 0.7485119942283412, - "grad_norm": 0.8731295825944488, - "learning_rate": 6.274118012689979e-07, - "loss": 0.8773, - "step": 6225 - }, - { - "epoch": 0.7486322371189803, - "grad_norm": 1.4680243789665042, - "learning_rate": 6.268453405076943e-07, - "loss": 0.9124, - "step": 6226 - }, - { - "epoch": 0.7487524800096195, - "grad_norm": 1.9940079692539758, - "learning_rate": 6.262790880528592e-07, - "loss": 1.0498, - "step": 6227 - }, - { - "epoch": 0.7488727229002585, - "grad_norm": 4.184857345938972, - "learning_rate": 6.257130439903951e-07, - "loss": 1.0236, - "step": 6228 - }, - { - "epoch": 0.7489929657908976, - "grad_norm": 1.912309115987876, - "learning_rate": 6.251472084061695e-07, - "loss": 1.0344, - "step": 6229 - }, - { - "epoch": 0.7491132086815367, - "grad_norm": 2.260931269934348, - "learning_rate": 6.245815813860191e-07, - "loss": 1.1287, - "step": 6230 - }, - { - "epoch": 0.7492334515721758, - "grad_norm": 2.2820959743474445, - "learning_rate": 6.240161630157495e-07, - "loss": 0.9215, - "step": 6231 - }, - { - "epoch": 0.7493536944628149, - "grad_norm": 3.167429999320923, - "learning_rate": 6.23450953381133e-07, - "loss": 0.9222, - "step": 6232 - }, - { - "epoch": 0.749473937353454, - "grad_norm": 4.508769285849794, - "learning_rate": 6.228859525679131e-07, - "loss": 0.9117, - "step": 6233 - }, - { - "epoch": 0.7495941802440931, - "grad_norm": 2.2380116351918615, - "learning_rate": 6.223211606617986e-07, - "loss": 1.0287, - "step": 6234 - }, - { - "epoch": 0.7497144231347321, - "grad_norm": 1.6660240682944754, - "learning_rate": 6.217565777484701e-07, - "loss": 1.0583, - "step": 6235 - }, - { - "epoch": 0.7498346660253713, - "grad_norm": 1.8386020236243716, - "learning_rate": 6.211922039135722e-07, - "loss": 1.0291, - "step": 6236 - }, - { - "epoch": 0.7499549089160104, - "grad_norm": 2.1466947388244972, - "learning_rate": 6.206280392427201e-07, - "loss": 1.0304, - "step": 6237 - }, - { - "epoch": 0.7500751518066494, - "grad_norm": 1.8354354496607108, - "learning_rate": 6.200640838214983e-07, - "loss": 0.9641, - "step": 6238 - }, - { - "epoch": 0.7501953946972886, - "grad_norm": 1.8638996881335423, - "learning_rate": 6.195003377354578e-07, - "loss": 0.9015, - "step": 6239 - }, - { - "epoch": 0.7503156375879276, - "grad_norm": 3.0339503893785014, - "learning_rate": 6.189368010701183e-07, - "loss": 0.9575, - "step": 6240 - }, - { - "epoch": 0.7504358804785667, - "grad_norm": 1.9992161049029675, - "learning_rate": 6.183734739109683e-07, - "loss": 0.9886, - "step": 6241 - }, - { - "epoch": 0.7505561233692057, - "grad_norm": 2.1250177270486463, - "learning_rate": 6.178103563434629e-07, - "loss": 0.9178, - "step": 6242 - }, - { - "epoch": 0.7506763662598449, - "grad_norm": 3.3875534278625827, - "learning_rate": 6.172474484530283e-07, - "loss": 1.0694, - "step": 6243 - }, - { - "epoch": 0.750796609150484, - "grad_norm": 1.8582121474548408, - "learning_rate": 6.166847503250563e-07, - "loss": 0.9847, - "step": 6244 - }, - { - "epoch": 0.750916852041123, - "grad_norm": 3.0520924816493564, - "learning_rate": 6.161222620449078e-07, - "loss": 1.0238, - "step": 6245 - }, - { - "epoch": 0.7510370949317622, - "grad_norm": 1.778532934073195, - "learning_rate": 6.155599836979117e-07, - "loss": 1.0348, - "step": 6246 - }, - { - "epoch": 0.7511573378224012, - "grad_norm": 1.8461797835441471, - "learning_rate": 6.149979153693649e-07, - "loss": 1.0421, - "step": 6247 - }, - { - "epoch": 0.7512775807130403, - "grad_norm": 2.1386113406458667, - "learning_rate": 6.144360571445343e-07, - "loss": 0.9888, - "step": 6248 - }, - { - "epoch": 0.7513978236036795, - "grad_norm": 1.810744652341651, - "learning_rate": 6.138744091086509e-07, - "loss": 1.0277, - "step": 6249 - }, - { - "epoch": 0.7515180664943185, - "grad_norm": 3.2208103141274624, - "learning_rate": 6.133129713469183e-07, - "loss": 0.9625, - "step": 6250 - }, - { - "epoch": 0.7516383093849576, - "grad_norm": 1.6548612278269341, - "learning_rate": 6.127517439445053e-07, - "loss": 0.8673, - "step": 6251 - }, - { - "epoch": 0.7517585522755967, - "grad_norm": 3.798672223263023, - "learning_rate": 6.121907269865498e-07, - "loss": 1.0505, - "step": 6252 - }, - { - "epoch": 0.7518787951662358, - "grad_norm": 0.9767951688161968, - "learning_rate": 6.116299205581577e-07, - "loss": 0.9605, - "step": 6253 - }, - { - "epoch": 0.7519990380568748, - "grad_norm": 2.2793202010843157, - "learning_rate": 6.110693247444018e-07, - "loss": 0.9136, - "step": 6254 - }, - { - "epoch": 0.752119280947514, - "grad_norm": 1.7071443079470838, - "learning_rate": 6.105089396303258e-07, - "loss": 1.0525, - "step": 6255 - }, - { - "epoch": 0.7522395238381531, - "grad_norm": 1.9250657801102447, - "learning_rate": 6.099487653009383e-07, - "loss": 0.9895, - "step": 6256 - }, - { - "epoch": 0.7523597667287921, - "grad_norm": 2.5345789365172693, - "learning_rate": 6.093888018412192e-07, - "loss": 1.0674, - "step": 6257 - }, - { - "epoch": 0.7524800096194313, - "grad_norm": 2.5602966698467142, - "learning_rate": 6.088290493361125e-07, - "loss": 0.8093, - "step": 6258 - }, - { - "epoch": 0.7526002525100703, - "grad_norm": 2.344909279538626, - "learning_rate": 6.082695078705322e-07, - "loss": 0.9482, - "step": 6259 - }, - { - "epoch": 0.7527204954007094, - "grad_norm": 2.2975575996293327, - "learning_rate": 6.077101775293618e-07, - "loss": 0.9122, - "step": 6260 - }, - { - "epoch": 0.7528407382913486, - "grad_norm": 2.63400160201016, - "learning_rate": 6.071510583974504e-07, - "loss": 1.0627, - "step": 6261 - }, - { - "epoch": 0.7529609811819876, - "grad_norm": 1.935903753028068, - "learning_rate": 6.065921505596161e-07, - "loss": 0.9415, - "step": 6262 - }, - { - "epoch": 0.7530812240726267, - "grad_norm": 1.8281641246814961, - "learning_rate": 6.060334541006445e-07, - "loss": 1.0018, - "step": 6263 - }, - { - "epoch": 0.7532014669632658, - "grad_norm": 3.9797109794148735, - "learning_rate": 6.05474969105289e-07, - "loss": 0.9164, - "step": 6264 - }, - { - "epoch": 0.7533217098539049, - "grad_norm": 3.1869250508290983, - "learning_rate": 6.049166956582725e-07, - "loss": 0.9636, - "step": 6265 - }, - { - "epoch": 0.753441952744544, - "grad_norm": 1.9807161857533373, - "learning_rate": 6.043586338442841e-07, - "loss": 1.0989, - "step": 6266 - }, - { - "epoch": 0.7535621956351831, - "grad_norm": 1.2758601727809307, - "learning_rate": 6.038007837479815e-07, - "loss": 0.9602, - "step": 6267 - }, - { - "epoch": 0.7536824385258222, - "grad_norm": 5.013171204792876, - "learning_rate": 6.032431454539897e-07, - "loss": 0.8637, - "step": 6268 - }, - { - "epoch": 0.7538026814164612, - "grad_norm": 1.6583272036082912, - "learning_rate": 6.026857190469014e-07, - "loss": 1.0367, - "step": 6269 - }, - { - "epoch": 0.7539229243071004, - "grad_norm": 1.7449340382654408, - "learning_rate": 6.0212850461128e-07, - "loss": 0.9739, - "step": 6270 - }, - { - "epoch": 0.7540431671977395, - "grad_norm": 5.137892718747198, - "learning_rate": 6.015715022316516e-07, - "loss": 0.9825, - "step": 6271 - }, - { - "epoch": 0.7541634100883785, - "grad_norm": 2.7707702706027533, - "learning_rate": 6.010147119925154e-07, - "loss": 1.0058, - "step": 6272 - }, - { - "epoch": 0.7542836529790176, - "grad_norm": 2.059169970944186, - "learning_rate": 6.004581339783348e-07, - "loss": 0.8867, - "step": 6273 - }, - { - "epoch": 0.7544038958696567, - "grad_norm": 7.353108647851559, - "learning_rate": 5.999017682735425e-07, - "loss": 0.909, - "step": 6274 - }, - { - "epoch": 0.7545241387602958, - "grad_norm": 2.2268615521826027, - "learning_rate": 5.993456149625387e-07, - "loss": 0.8924, - "step": 6275 - }, - { - "epoch": 0.7546443816509348, - "grad_norm": 1.5534400727813418, - "learning_rate": 5.987896741296909e-07, - "loss": 1.0416, - "step": 6276 - }, - { - "epoch": 0.754764624541574, - "grad_norm": 2.1444464216843593, - "learning_rate": 5.982339458593361e-07, - "loss": 1.0119, - "step": 6277 - }, - { - "epoch": 0.7548848674322131, - "grad_norm": 1.4739030573784146, - "learning_rate": 5.976784302357767e-07, - "loss": 1.0726, - "step": 6278 - }, - { - "epoch": 0.7550051103228521, - "grad_norm": 3.7039808097243365, - "learning_rate": 5.971231273432855e-07, - "loss": 0.9552, - "step": 6279 - }, - { - "epoch": 0.7551253532134913, - "grad_norm": 0.8354210728999225, - "learning_rate": 5.965680372661e-07, - "loss": 0.8146, - "step": 6280 - }, - { - "epoch": 0.7552455961041303, - "grad_norm": 2.686104700588645, - "learning_rate": 5.960131600884266e-07, - "loss": 0.7955, - "step": 6281 - }, - { - "epoch": 0.7553658389947694, - "grad_norm": 5.320349067192616, - "learning_rate": 5.954584958944413e-07, - "loss": 0.9886, - "step": 6282 - }, - { - "epoch": 0.7554860818854086, - "grad_norm": 2.092666003753437, - "learning_rate": 5.949040447682854e-07, - "loss": 1.0344, - "step": 6283 - }, - { - "epoch": 0.7556063247760476, - "grad_norm": 2.921074742359678, - "learning_rate": 5.943498067940686e-07, - "loss": 0.9186, - "step": 6284 - }, - { - "epoch": 0.7557265676666867, - "grad_norm": 1.620170606177429, - "learning_rate": 5.937957820558686e-07, - "loss": 1.0461, - "step": 6285 - }, - { - "epoch": 0.7558468105573258, - "grad_norm": 0.8495416022268625, - "learning_rate": 5.932419706377296e-07, - "loss": 0.9052, - "step": 6286 - }, - { - "epoch": 0.7559670534479649, - "grad_norm": 2.0691090666913867, - "learning_rate": 5.92688372623666e-07, - "loss": 0.9701, - "step": 6287 - }, - { - "epoch": 0.7560872963386039, - "grad_norm": 2.817567964020133, - "learning_rate": 5.921349880976574e-07, - "loss": 0.9623, - "step": 6288 - }, - { - "epoch": 0.7562075392292431, - "grad_norm": 1.985688383143224, - "learning_rate": 5.915818171436515e-07, - "loss": 1.0448, - "step": 6289 - }, - { - "epoch": 0.7563277821198822, - "grad_norm": 1.5822866728123943, - "learning_rate": 5.910288598455642e-07, - "loss": 0.9747, - "step": 6290 - }, - { - "epoch": 0.7564480250105212, - "grad_norm": 2.1559901426440296, - "learning_rate": 5.90476116287278e-07, - "loss": 0.9609, - "step": 6291 - }, - { - "epoch": 0.7565682679011604, - "grad_norm": 18.65909686324483, - "learning_rate": 5.899235865526456e-07, - "loss": 0.9091, - "step": 6292 - }, - { - "epoch": 0.7566885107917994, - "grad_norm": 1.662073226876308, - "learning_rate": 5.893712707254825e-07, - "loss": 1.0438, - "step": 6293 - }, - { - "epoch": 0.7568087536824385, - "grad_norm": 4.7701878451101605, - "learning_rate": 5.888191688895769e-07, - "loss": 0.8888, - "step": 6294 - }, - { - "epoch": 0.7569289965730777, - "grad_norm": 2.3845062739681064, - "learning_rate": 5.882672811286813e-07, - "loss": 0.8463, - "step": 6295 - }, - { - "epoch": 0.7570492394637167, - "grad_norm": 2.0425066744486386, - "learning_rate": 5.877156075265166e-07, - "loss": 0.9305, - "step": 6296 - }, - { - "epoch": 0.7571694823543558, - "grad_norm": 3.2276639005263146, - "learning_rate": 5.871641481667715e-07, - "loss": 0.9168, - "step": 6297 - }, - { - "epoch": 0.7572897252449949, - "grad_norm": 1.5914066515076046, - "learning_rate": 5.866129031331011e-07, - "loss": 1.071, - "step": 6298 - }, - { - "epoch": 0.757409968135634, - "grad_norm": 2.2928082021626444, - "learning_rate": 5.8606187250913e-07, - "loss": 1.062, - "step": 6299 - }, - { - "epoch": 0.757530211026273, - "grad_norm": 1.9254137659646084, - "learning_rate": 5.855110563784482e-07, - "loss": 1.0677, - "step": 6300 - }, - { - "epoch": 0.7576504539169122, - "grad_norm": 1.8226282018669087, - "learning_rate": 5.849604548246156e-07, - "loss": 0.8736, - "step": 6301 - }, - { - "epoch": 0.7577706968075513, - "grad_norm": 5.643265124812133, - "learning_rate": 5.844100679311565e-07, - "loss": 1.0285, - "step": 6302 - }, - { - "epoch": 0.7578909396981903, - "grad_norm": 2.352959100921461, - "learning_rate": 5.838598957815637e-07, - "loss": 0.9882, - "step": 6303 - }, - { - "epoch": 0.7580111825888295, - "grad_norm": 1.4299281183596388, - "learning_rate": 5.833099384592996e-07, - "loss": 1.0865, - "step": 6304 - }, - { - "epoch": 0.7581314254794685, - "grad_norm": 2.0429399243977993, - "learning_rate": 5.827601960477913e-07, - "loss": 0.9354, - "step": 6305 - }, - { - "epoch": 0.7582516683701076, - "grad_norm": 2.3336333581495947, - "learning_rate": 5.822106686304344e-07, - "loss": 0.9407, - "step": 6306 - }, - { - "epoch": 0.7583719112607467, - "grad_norm": 1.858953668637743, - "learning_rate": 5.816613562905919e-07, - "loss": 0.8054, - "step": 6307 - }, - { - "epoch": 0.7584921541513858, - "grad_norm": 1.8920959551312593, - "learning_rate": 5.811122591115933e-07, - "loss": 0.9398, - "step": 6308 - }, - { - "epoch": 0.7586123970420249, - "grad_norm": 2.3745927963518985, - "learning_rate": 5.805633771767376e-07, - "loss": 0.947, - "step": 6309 - }, - { - "epoch": 0.7587326399326639, - "grad_norm": 1.661153408597882, - "learning_rate": 5.800147105692888e-07, - "loss": 1.0084, - "step": 6310 - }, - { - "epoch": 0.7588528828233031, - "grad_norm": 1.6525603168231184, - "learning_rate": 5.794662593724795e-07, - "loss": 1.0218, - "step": 6311 - }, - { - "epoch": 0.7589731257139422, - "grad_norm": 2.2903451288006784, - "learning_rate": 5.789180236695091e-07, - "loss": 0.9812, - "step": 6312 - }, - { - "epoch": 0.7590933686045812, - "grad_norm": 2.482727464164345, - "learning_rate": 5.78370003543544e-07, - "loss": 1.0853, - "step": 6313 - }, - { - "epoch": 0.7592136114952204, - "grad_norm": 2.7698828900366568, - "learning_rate": 5.778221990777203e-07, - "loss": 1.0653, - "step": 6314 - }, - { - "epoch": 0.7593338543858594, - "grad_norm": 2.1122012948574667, - "learning_rate": 5.772746103551372e-07, - "loss": 1.0547, - "step": 6315 - }, - { - "epoch": 0.7594540972764985, - "grad_norm": 3.6913519961290837, - "learning_rate": 5.767272374588648e-07, - "loss": 0.9474, - "step": 6316 - }, - { - "epoch": 0.7595743401671377, - "grad_norm": 2.131193895244733, - "learning_rate": 5.76180080471939e-07, - "loss": 1.0152, - "step": 6317 - }, - { - "epoch": 0.7596945830577767, - "grad_norm": 2.125050263651868, - "learning_rate": 5.756331394773631e-07, - "loss": 0.9486, - "step": 6318 - }, - { - "epoch": 0.7598148259484158, - "grad_norm": 1.7950715676069326, - "learning_rate": 5.750864145581071e-07, - "loss": 0.9968, - "step": 6319 - }, - { - "epoch": 0.7599350688390549, - "grad_norm": 3.292147315621509, - "learning_rate": 5.745399057971085e-07, - "loss": 1.0809, - "step": 6320 - }, - { - "epoch": 0.760055311729694, - "grad_norm": 3.134405097550691, - "learning_rate": 5.739936132772738e-07, - "loss": 0.9818, - "step": 6321 - }, - { - "epoch": 0.760175554620333, - "grad_norm": 2.538714012292869, - "learning_rate": 5.734475370814733e-07, - "loss": 0.9747, - "step": 6322 - }, - { - "epoch": 0.7602957975109722, - "grad_norm": 1.5050235709692303, - "learning_rate": 5.729016772925483e-07, - "loss": 1.0072, - "step": 6323 - }, - { - "epoch": 0.7604160404016113, - "grad_norm": 1.7701815658592166, - "learning_rate": 5.723560339933038e-07, - "loss": 0.9354, - "step": 6324 - }, - { - "epoch": 0.7605362832922503, - "grad_norm": 2.1041648663302235, - "learning_rate": 5.71810607266513e-07, - "loss": 0.8776, - "step": 6325 - }, - { - "epoch": 0.7606565261828895, - "grad_norm": 1.9756719822231505, - "learning_rate": 5.712653971949184e-07, - "loss": 0.8324, - "step": 6326 - }, - { - "epoch": 0.7607767690735285, - "grad_norm": 2.7375514299970076, - "learning_rate": 5.707204038612268e-07, - "loss": 0.993, - "step": 6327 - }, - { - "epoch": 0.7608970119641676, - "grad_norm": 2.1389009535918495, - "learning_rate": 5.701756273481138e-07, - "loss": 0.96, - "step": 6328 - }, - { - "epoch": 0.7610172548548068, - "grad_norm": 1.4014908260871293, - "learning_rate": 5.696310677382212e-07, - "loss": 0.9664, - "step": 6329 - }, - { - "epoch": 0.7611374977454458, - "grad_norm": 0.839611535904133, - "learning_rate": 5.690867251141576e-07, - "loss": 0.8802, - "step": 6330 - }, - { - "epoch": 0.7612577406360849, - "grad_norm": 3.3001287955379364, - "learning_rate": 5.685425995585013e-07, - "loss": 1.1507, - "step": 6331 - }, - { - "epoch": 0.761377983526724, - "grad_norm": 0.7903464418410437, - "learning_rate": 5.679986911537935e-07, - "loss": 0.8528, - "step": 6332 - }, - { - "epoch": 0.7614982264173631, - "grad_norm": 1.9869532618869261, - "learning_rate": 5.674549999825462e-07, - "loss": 0.9101, - "step": 6333 - }, - { - "epoch": 0.7616184693080021, - "grad_norm": 1.121414351394855, - "learning_rate": 5.669115261272363e-07, - "loss": 1.0055, - "step": 6334 - }, - { - "epoch": 0.7617387121986413, - "grad_norm": 3.0378740433579705, - "learning_rate": 5.663682696703081e-07, - "loss": 0.9607, - "step": 6335 - }, - { - "epoch": 0.7618589550892804, - "grad_norm": 1.8705725849938544, - "learning_rate": 5.658252306941746e-07, - "loss": 1.06, - "step": 6336 - }, - { - "epoch": 0.7619791979799194, - "grad_norm": 2.0018649942491615, - "learning_rate": 5.65282409281212e-07, - "loss": 0.989, - "step": 6337 - }, - { - "epoch": 0.7620994408705585, - "grad_norm": 2.069121396013397, - "learning_rate": 5.64739805513768e-07, - "loss": 0.921, - "step": 6338 - }, - { - "epoch": 0.7622196837611976, - "grad_norm": 0.8163152032487161, - "learning_rate": 5.641974194741541e-07, - "loss": 0.8187, - "step": 6339 - }, - { - "epoch": 0.7623399266518367, - "grad_norm": 0.8896263278931439, - "learning_rate": 5.636552512446502e-07, - "loss": 0.8825, - "step": 6340 - }, - { - "epoch": 0.7624601695424758, - "grad_norm": 1.5673794660205709, - "learning_rate": 5.631133009075027e-07, - "loss": 1.0091, - "step": 6341 - }, - { - "epoch": 0.7625804124331149, - "grad_norm": 1.797999486271803, - "learning_rate": 5.625715685449242e-07, - "loss": 0.924, - "step": 6342 - }, - { - "epoch": 0.762700655323754, - "grad_norm": 1.4660389817794075, - "learning_rate": 5.620300542390966e-07, - "loss": 0.9469, - "step": 6343 - }, - { - "epoch": 0.762820898214393, - "grad_norm": 3.2340419431949607, - "learning_rate": 5.614887580721659e-07, - "loss": 1.0809, - "step": 6344 - }, - { - "epoch": 0.7629411411050322, - "grad_norm": 3.4902513158864186, - "learning_rate": 5.609476801262481e-07, - "loss": 0.9703, - "step": 6345 - }, - { - "epoch": 0.7630613839956712, - "grad_norm": 2.3549156235402875, - "learning_rate": 5.604068204834223e-07, - "loss": 0.8797, - "step": 6346 - }, - { - "epoch": 0.7631816268863103, - "grad_norm": 2.4359564173014645, - "learning_rate": 5.598661792257367e-07, - "loss": 0.9819, - "step": 6347 - }, - { - "epoch": 0.7633018697769495, - "grad_norm": 1.8351434816815153, - "learning_rate": 5.593257564352071e-07, - "loss": 1.0011, - "step": 6348 - }, - { - "epoch": 0.7634221126675885, - "grad_norm": 1.628973431196678, - "learning_rate": 5.58785552193815e-07, - "loss": 0.9854, - "step": 6349 - }, - { - "epoch": 0.7635423555582276, - "grad_norm": 1.9847509673789296, - "learning_rate": 5.582455665835086e-07, - "loss": 0.9835, - "step": 6350 - }, - { - "epoch": 0.7636625984488667, - "grad_norm": 2.6995833463963703, - "learning_rate": 5.577057996862036e-07, - "loss": 0.9655, - "step": 6351 - }, - { - "epoch": 0.7637828413395058, - "grad_norm": 3.3897788402693028, - "learning_rate": 5.571662515837814e-07, - "loss": 0.9846, - "step": 6352 - }, - { - "epoch": 0.7639030842301449, - "grad_norm": 1.652843643215303, - "learning_rate": 5.566269223580926e-07, - "loss": 1.0679, - "step": 6353 - }, - { - "epoch": 0.764023327120784, - "grad_norm": 1.9928444770160476, - "learning_rate": 5.560878120909511e-07, - "loss": 0.9761, - "step": 6354 - }, - { - "epoch": 0.7641435700114231, - "grad_norm": 1.0763655417313278, - "learning_rate": 5.55548920864141e-07, - "loss": 0.8803, - "step": 6355 - }, - { - "epoch": 0.7642638129020621, - "grad_norm": 1.862436162891482, - "learning_rate": 5.550102487594113e-07, - "loss": 0.9999, - "step": 6356 - }, - { - "epoch": 0.7643840557927013, - "grad_norm": 1.4989533746605088, - "learning_rate": 5.54471795858477e-07, - "loss": 0.9467, - "step": 6357 - }, - { - "epoch": 0.7645042986833404, - "grad_norm": 1.901059447601634, - "learning_rate": 5.539335622430235e-07, - "loss": 1.0644, - "step": 6358 - }, - { - "epoch": 0.7646245415739794, - "grad_norm": 1.847025402677171, - "learning_rate": 5.533955479946975e-07, - "loss": 0.9767, - "step": 6359 - }, - { - "epoch": 0.7647447844646186, - "grad_norm": 0.9054538913195553, - "learning_rate": 5.528577531951173e-07, - "loss": 0.9056, - "step": 6360 - }, - { - "epoch": 0.7648650273552576, - "grad_norm": 2.7618889870842995, - "learning_rate": 5.523201779258653e-07, - "loss": 0.9706, - "step": 6361 - }, - { - "epoch": 0.7649852702458967, - "grad_norm": 1.6826366704608762, - "learning_rate": 5.517828222684912e-07, - "loss": 1.0709, - "step": 6362 - }, - { - "epoch": 0.7651055131365359, - "grad_norm": 0.7738895828975436, - "learning_rate": 5.512456863045117e-07, - "loss": 0.8374, - "step": 6363 - }, - { - "epoch": 0.7652257560271749, - "grad_norm": 2.775284359083442, - "learning_rate": 5.507087701154089e-07, - "loss": 0.9662, - "step": 6364 - }, - { - "epoch": 0.765345998917814, - "grad_norm": 1.7324824274226058, - "learning_rate": 5.50172073782634e-07, - "loss": 0.975, - "step": 6365 - }, - { - "epoch": 0.7654662418084531, - "grad_norm": 2.0122895551318223, - "learning_rate": 5.496355973876023e-07, - "loss": 1.0986, - "step": 6366 - }, - { - "epoch": 0.7655864846990922, - "grad_norm": 3.480270362962683, - "learning_rate": 5.490993410116984e-07, - "loss": 0.9437, - "step": 6367 - }, - { - "epoch": 0.7657067275897312, - "grad_norm": 1.9116165414417379, - "learning_rate": 5.485633047362704e-07, - "loss": 0.9366, - "step": 6368 - }, - { - "epoch": 0.7658269704803703, - "grad_norm": 2.0271808654384915, - "learning_rate": 5.480274886426341e-07, - "loss": 1.0108, - "step": 6369 - }, - { - "epoch": 0.7659472133710095, - "grad_norm": 2.6323765243742505, - "learning_rate": 5.474918928120744e-07, - "loss": 1.0084, - "step": 6370 - }, - { - "epoch": 0.7660674562616485, - "grad_norm": 1.880239667833889, - "learning_rate": 5.469565173258392e-07, - "loss": 1.1051, - "step": 6371 - }, - { - "epoch": 0.7661876991522876, - "grad_norm": 1.5121121941877458, - "learning_rate": 5.464213622651454e-07, - "loss": 0.8723, - "step": 6372 - }, - { - "epoch": 0.7663079420429267, - "grad_norm": 2.688176801759343, - "learning_rate": 5.458864277111753e-07, - "loss": 1.0685, - "step": 6373 - }, - { - "epoch": 0.7664281849335658, - "grad_norm": 2.322037899828456, - "learning_rate": 5.453517137450769e-07, - "loss": 0.9155, - "step": 6374 - }, - { - "epoch": 0.7665484278242048, - "grad_norm": 1.7468378243900744, - "learning_rate": 5.448172204479684e-07, - "loss": 0.9811, - "step": 6375 - }, - { - "epoch": 0.766668670714844, - "grad_norm": 1.7852723065136424, - "learning_rate": 5.442829479009294e-07, - "loss": 0.9716, - "step": 6376 - }, - { - "epoch": 0.7667889136054831, - "grad_norm": 3.0344914674260246, - "learning_rate": 5.437488961850103e-07, - "loss": 0.9483, - "step": 6377 - }, - { - "epoch": 0.7669091564961221, - "grad_norm": 1.7491832598365185, - "learning_rate": 5.432150653812258e-07, - "loss": 0.9841, - "step": 6378 - }, - { - "epoch": 0.7670293993867613, - "grad_norm": 2.7874205642124465, - "learning_rate": 5.42681455570557e-07, - "loss": 1.0583, - "step": 6379 - }, - { - "epoch": 0.7671496422774003, - "grad_norm": 2.0256153302078115, - "learning_rate": 5.42148066833954e-07, - "loss": 0.8804, - "step": 6380 - }, - { - "epoch": 0.7672698851680394, - "grad_norm": 1.9667070476244135, - "learning_rate": 5.416148992523289e-07, - "loss": 0.9921, - "step": 6381 - }, - { - "epoch": 0.7673901280586786, - "grad_norm": 1.6538630865966968, - "learning_rate": 5.410819529065644e-07, - "loss": 1.0129, - "step": 6382 - }, - { - "epoch": 0.7675103709493176, - "grad_norm": 1.979948377995954, - "learning_rate": 5.405492278775079e-07, - "loss": 0.8841, - "step": 6383 - }, - { - "epoch": 0.7676306138399567, - "grad_norm": 2.5588250208573706, - "learning_rate": 5.400167242459732e-07, - "loss": 1.0226, - "step": 6384 - }, - { - "epoch": 0.7677508567305958, - "grad_norm": 1.575277724832867, - "learning_rate": 5.394844420927405e-07, - "loss": 1.0319, - "step": 6385 - }, - { - "epoch": 0.7678710996212349, - "grad_norm": 10.170938897018063, - "learning_rate": 5.389523814985562e-07, - "loss": 0.9516, - "step": 6386 - }, - { - "epoch": 0.767991342511874, - "grad_norm": 1.9931363108981786, - "learning_rate": 5.384205425441344e-07, - "loss": 0.9897, - "step": 6387 - }, - { - "epoch": 0.7681115854025131, - "grad_norm": 1.7133979002714288, - "learning_rate": 5.378889253101537e-07, - "loss": 1.07, - "step": 6388 - }, - { - "epoch": 0.7682318282931522, - "grad_norm": 1.7245540761236249, - "learning_rate": 5.373575298772617e-07, - "loss": 1.0337, - "step": 6389 - }, - { - "epoch": 0.7683520711837912, - "grad_norm": 0.7860941812252105, - "learning_rate": 5.368263563260689e-07, - "loss": 0.8545, - "step": 6390 - }, - { - "epoch": 0.7684723140744304, - "grad_norm": 2.550979115872076, - "learning_rate": 5.362954047371537e-07, - "loss": 0.8697, - "step": 6391 - }, - { - "epoch": 0.7685925569650695, - "grad_norm": 8.149184634371597, - "learning_rate": 5.357646751910627e-07, - "loss": 0.9525, - "step": 6392 - }, - { - "epoch": 0.7687127998557085, - "grad_norm": 2.1428892314869783, - "learning_rate": 5.352341677683061e-07, - "loss": 1.027, - "step": 6393 - }, - { - "epoch": 0.7688330427463477, - "grad_norm": 1.8138844694298257, - "learning_rate": 5.347038825493617e-07, - "loss": 1.0182, - "step": 6394 - }, - { - "epoch": 0.7689532856369867, - "grad_norm": 2.062440119943377, - "learning_rate": 5.341738196146732e-07, - "loss": 0.9049, - "step": 6395 - }, - { - "epoch": 0.7690735285276258, - "grad_norm": 2.52250961392615, - "learning_rate": 5.336439790446503e-07, - "loss": 0.9612, - "step": 6396 - }, - { - "epoch": 0.769193771418265, - "grad_norm": 1.763527628765201, - "learning_rate": 5.331143609196711e-07, - "loss": 0.8606, - "step": 6397 - }, - { - "epoch": 0.769314014308904, - "grad_norm": 1.8473773124747976, - "learning_rate": 5.325849653200758e-07, - "loss": 1.0017, - "step": 6398 - }, - { - "epoch": 0.7694342571995431, - "grad_norm": 2.0242405774298966, - "learning_rate": 5.32055792326175e-07, - "loss": 0.9906, - "step": 6399 - }, - { - "epoch": 0.7695545000901821, - "grad_norm": 1.823507362210699, - "learning_rate": 5.315268420182437e-07, - "loss": 0.9572, - "step": 6400 - }, - { - "epoch": 0.7696747429808213, - "grad_norm": 1.6807327212152365, - "learning_rate": 5.309981144765221e-07, - "loss": 0.9942, - "step": 6401 - }, - { - "epoch": 0.7697949858714603, - "grad_norm": 2.5643911129033623, - "learning_rate": 5.304696097812196e-07, - "loss": 0.9818, - "step": 6402 - }, - { - "epoch": 0.7699152287620994, - "grad_norm": 3.051444202636148, - "learning_rate": 5.299413280125078e-07, - "loss": 0.8346, - "step": 6403 - }, - { - "epoch": 0.7700354716527386, - "grad_norm": 2.0017726909923086, - "learning_rate": 5.294132692505284e-07, - "loss": 0.9527, - "step": 6404 - }, - { - "epoch": 0.7701557145433776, - "grad_norm": 2.15218046645556, - "learning_rate": 5.288854335753861e-07, - "loss": 1.0172, - "step": 6405 - }, - { - "epoch": 0.7702759574340167, - "grad_norm": 1.6778002405171684, - "learning_rate": 5.283578210671551e-07, - "loss": 0.9914, - "step": 6406 - }, - { - "epoch": 0.7703962003246558, - "grad_norm": 2.2645247319935153, - "learning_rate": 5.278304318058719e-07, - "loss": 0.9917, - "step": 6407 - }, - { - "epoch": 0.7705164432152949, - "grad_norm": 1.6445634344942988, - "learning_rate": 5.273032658715411e-07, - "loss": 1.0211, - "step": 6408 - }, - { - "epoch": 0.7706366861059339, - "grad_norm": 2.3319914796846968, - "learning_rate": 5.267763233441347e-07, - "loss": 1.0004, - "step": 6409 - }, - { - "epoch": 0.7707569289965731, - "grad_norm": 2.0640675916471785, - "learning_rate": 5.26249604303588e-07, - "loss": 0.9261, - "step": 6410 - }, - { - "epoch": 0.7708771718872122, - "grad_norm": 3.28878427810233, - "learning_rate": 5.257231088298057e-07, - "loss": 1.0045, - "step": 6411 - }, - { - "epoch": 0.7709974147778512, - "grad_norm": 0.8448982196658622, - "learning_rate": 5.25196837002655e-07, - "loss": 0.8008, - "step": 6412 - }, - { - "epoch": 0.7711176576684904, - "grad_norm": 2.734637787316599, - "learning_rate": 5.24670788901971e-07, - "loss": 0.9233, - "step": 6413 - }, - { - "epoch": 0.7712379005591294, - "grad_norm": 2.5032990622513243, - "learning_rate": 5.241449646075557e-07, - "loss": 0.9105, - "step": 6414 - }, - { - "epoch": 0.7713581434497685, - "grad_norm": 1.974994203100268, - "learning_rate": 5.236193641991762e-07, - "loss": 0.9543, - "step": 6415 - }, - { - "epoch": 0.7714783863404077, - "grad_norm": 2.0438910712687597, - "learning_rate": 5.23093987756565e-07, - "loss": 0.9366, - "step": 6416 - }, - { - "epoch": 0.7715986292310467, - "grad_norm": 2.1517767462235433, - "learning_rate": 5.225688353594217e-07, - "loss": 0.9878, - "step": 6417 - }, - { - "epoch": 0.7717188721216858, - "grad_norm": 2.0513485078561353, - "learning_rate": 5.220439070874108e-07, - "loss": 1.0084, - "step": 6418 - }, - { - "epoch": 0.7718391150123249, - "grad_norm": 1.5388868202596473, - "learning_rate": 5.215192030201652e-07, - "loss": 0.9447, - "step": 6419 - }, - { - "epoch": 0.771959357902964, - "grad_norm": 1.8946338743749613, - "learning_rate": 5.209947232372798e-07, - "loss": 1.0883, - "step": 6420 - }, - { - "epoch": 0.772079600793603, - "grad_norm": 1.643477077091002, - "learning_rate": 5.204704678183196e-07, - "loss": 1.0309, - "step": 6421 - }, - { - "epoch": 0.7721998436842422, - "grad_norm": 1.8904759371925377, - "learning_rate": 5.19946436842813e-07, - "loss": 1.0725, - "step": 6422 - }, - { - "epoch": 0.7723200865748813, - "grad_norm": 2.101692280785947, - "learning_rate": 5.194226303902546e-07, - "loss": 0.9158, - "step": 6423 - }, - { - "epoch": 0.7724403294655203, - "grad_norm": 2.0397030346832463, - "learning_rate": 5.188990485401072e-07, - "loss": 0.9412, - "step": 6424 - }, - { - "epoch": 0.7725605723561595, - "grad_norm": 1.9121765187274378, - "learning_rate": 5.183756913717954e-07, - "loss": 1.0911, - "step": 6425 - }, - { - "epoch": 0.7726808152467985, - "grad_norm": 1.8672584296764152, - "learning_rate": 5.178525589647136e-07, - "loss": 0.9659, - "step": 6426 - }, - { - "epoch": 0.7728010581374376, - "grad_norm": 2.183692233845182, - "learning_rate": 5.173296513982197e-07, - "loss": 1.0168, - "step": 6427 - }, - { - "epoch": 0.7729213010280768, - "grad_norm": 3.773804577650439, - "learning_rate": 5.168069687516398e-07, - "loss": 0.8836, - "step": 6428 - }, - { - "epoch": 0.7730415439187158, - "grad_norm": 2.042314997387393, - "learning_rate": 5.16284511104263e-07, - "loss": 0.9471, - "step": 6429 - }, - { - "epoch": 0.7731617868093549, - "grad_norm": 2.7405117381233732, - "learning_rate": 5.157622785353457e-07, - "loss": 1.0348, - "step": 6430 - }, - { - "epoch": 0.7732820296999939, - "grad_norm": 0.6957676929838158, - "learning_rate": 5.152402711241113e-07, - "loss": 0.8523, - "step": 6431 - }, - { - "epoch": 0.7734022725906331, - "grad_norm": 2.2260140797665886, - "learning_rate": 5.147184889497465e-07, - "loss": 1.0607, - "step": 6432 - }, - { - "epoch": 0.7735225154812722, - "grad_norm": 2.507380368087821, - "learning_rate": 5.141969320914072e-07, - "loss": 1.0317, - "step": 6433 - }, - { - "epoch": 0.7736427583719112, - "grad_norm": 2.7230128926263686, - "learning_rate": 5.136756006282113e-07, - "loss": 0.8629, - "step": 6434 - }, - { - "epoch": 0.7737630012625504, - "grad_norm": 2.058278242395842, - "learning_rate": 5.131544946392446e-07, - "loss": 1.0766, - "step": 6435 - }, - { - "epoch": 0.7738832441531894, - "grad_norm": 2.7386005122983335, - "learning_rate": 5.126336142035592e-07, - "loss": 0.8722, - "step": 6436 - }, - { - "epoch": 0.7740034870438285, - "grad_norm": 2.206015996437489, - "learning_rate": 5.121129594001721e-07, - "loss": 0.949, - "step": 6437 - }, - { - "epoch": 0.7741237299344677, - "grad_norm": 1.5135571245566732, - "learning_rate": 5.115925303080661e-07, - "loss": 1.0453, - "step": 6438 - }, - { - "epoch": 0.7742439728251067, - "grad_norm": 2.4338293558515027, - "learning_rate": 5.110723270061899e-07, - "loss": 1.0211, - "step": 6439 - }, - { - "epoch": 0.7743642157157458, - "grad_norm": 26.27266506632892, - "learning_rate": 5.105523495734572e-07, - "loss": 1.027, - "step": 6440 - }, - { - "epoch": 0.7744844586063849, - "grad_norm": 1.4870533120219362, - "learning_rate": 5.100325980887499e-07, - "loss": 0.973, - "step": 6441 - }, - { - "epoch": 0.774604701497024, - "grad_norm": 2.0360841008861463, - "learning_rate": 5.095130726309116e-07, - "loss": 1.0596, - "step": 6442 - }, - { - "epoch": 0.774724944387663, - "grad_norm": 0.8946602074567126, - "learning_rate": 5.089937732787559e-07, - "loss": 0.897, - "step": 6443 - }, - { - "epoch": 0.7748451872783022, - "grad_norm": 2.2129296268214587, - "learning_rate": 5.084747001110592e-07, - "loss": 0.896, - "step": 6444 - }, - { - "epoch": 0.7749654301689413, - "grad_norm": 1.9491567353377084, - "learning_rate": 5.07955853206564e-07, - "loss": 0.9288, - "step": 6445 - }, - { - "epoch": 0.7750856730595803, - "grad_norm": 1.6582861566143003, - "learning_rate": 5.074372326439807e-07, - "loss": 0.9416, - "step": 6446 - }, - { - "epoch": 0.7752059159502195, - "grad_norm": 5.420092726755326, - "learning_rate": 5.069188385019814e-07, - "loss": 0.9677, - "step": 6447 - }, - { - "epoch": 0.7753261588408585, - "grad_norm": 2.531057374006262, - "learning_rate": 5.064006708592077e-07, - "loss": 0.8507, - "step": 6448 - }, - { - "epoch": 0.7754464017314976, - "grad_norm": 2.205076178793982, - "learning_rate": 5.058827297942641e-07, - "loss": 0.9816, - "step": 6449 - }, - { - "epoch": 0.7755666446221368, - "grad_norm": 2.5324247483486224, - "learning_rate": 5.053650153857237e-07, - "loss": 0.9698, - "step": 6450 - }, - { - "epoch": 0.7756868875127758, - "grad_norm": 1.7165307716480496, - "learning_rate": 5.048475277121214e-07, - "loss": 0.9292, - "step": 6451 - }, - { - "epoch": 0.7758071304034149, - "grad_norm": 1.7579520615170485, - "learning_rate": 5.043302668519598e-07, - "loss": 0.9968, - "step": 6452 - }, - { - "epoch": 0.775927373294054, - "grad_norm": 1.7257590517383887, - "learning_rate": 5.038132328837079e-07, - "loss": 0.9534, - "step": 6453 - }, - { - "epoch": 0.7760476161846931, - "grad_norm": 1.9153281250956, - "learning_rate": 5.032964258857993e-07, - "loss": 0.9663, - "step": 6454 - }, - { - "epoch": 0.7761678590753321, - "grad_norm": 1.5464737496970964, - "learning_rate": 5.027798459366329e-07, - "loss": 0.9159, - "step": 6455 - }, - { - "epoch": 0.7762881019659713, - "grad_norm": 5.6335932895489345, - "learning_rate": 5.02263493114573e-07, - "loss": 0.8648, - "step": 6456 - }, - { - "epoch": 0.7764083448566104, - "grad_norm": 3.1337919535375414, - "learning_rate": 5.017473674979502e-07, - "loss": 0.9957, - "step": 6457 - }, - { - "epoch": 0.7765285877472494, - "grad_norm": 0.7903144648758675, - "learning_rate": 5.01231469165061e-07, - "loss": 0.8301, - "step": 6458 - }, - { - "epoch": 0.7766488306378886, - "grad_norm": 0.961846859515971, - "learning_rate": 5.007157981941663e-07, - "loss": 0.8472, - "step": 6459 - }, - { - "epoch": 0.7767690735285276, - "grad_norm": 1.0305959766157646, - "learning_rate": 5.002003546634928e-07, - "loss": 0.9311, - "step": 6460 - }, - { - "epoch": 0.7768893164191667, - "grad_norm": 2.06773428976709, - "learning_rate": 4.996851386512331e-07, - "loss": 0.9903, - "step": 6461 - }, - { - "epoch": 0.7770095593098058, - "grad_norm": 1.5859489252590304, - "learning_rate": 4.991701502355444e-07, - "loss": 1.0571, - "step": 6462 - }, - { - "epoch": 0.7771298022004449, - "grad_norm": 1.6202885477912994, - "learning_rate": 4.986553894945518e-07, - "loss": 0.9953, - "step": 6463 - }, - { - "epoch": 0.777250045091084, - "grad_norm": 2.214397513296407, - "learning_rate": 4.981408565063416e-07, - "loss": 1.094, - "step": 6464 - }, - { - "epoch": 0.777370287981723, - "grad_norm": 2.4894515658400205, - "learning_rate": 4.976265513489701e-07, - "loss": 0.9924, - "step": 6465 - }, - { - "epoch": 0.7774905308723622, - "grad_norm": 1.8772742148613246, - "learning_rate": 4.971124741004562e-07, - "loss": 1.0332, - "step": 6466 - }, - { - "epoch": 0.7776107737630013, - "grad_norm": 1.8754162008571265, - "learning_rate": 4.965986248387846e-07, - "loss": 0.9899, - "step": 6467 - }, - { - "epoch": 0.7777310166536403, - "grad_norm": 2.01728834968296, - "learning_rate": 4.960850036419073e-07, - "loss": 0.997, - "step": 6468 - }, - { - "epoch": 0.7778512595442795, - "grad_norm": 1.706662904293165, - "learning_rate": 4.955716105877378e-07, - "loss": 1.0241, - "step": 6469 - }, - { - "epoch": 0.7779715024349185, - "grad_norm": 1.5372512926365183, - "learning_rate": 4.950584457541598e-07, - "loss": 1.0633, - "step": 6470 - }, - { - "epoch": 0.7780917453255576, - "grad_norm": 1.517136282404995, - "learning_rate": 4.945455092190183e-07, - "loss": 1.0537, - "step": 6471 - }, - { - "epoch": 0.7782119882161967, - "grad_norm": 0.7030968130607792, - "learning_rate": 4.940328010601271e-07, - "loss": 0.8079, - "step": 6472 - }, - { - "epoch": 0.7783322311068358, - "grad_norm": 2.2059071606755745, - "learning_rate": 4.935203213552621e-07, - "loss": 0.9894, - "step": 6473 - }, - { - "epoch": 0.7784524739974749, - "grad_norm": 2.695607752454042, - "learning_rate": 4.930080701821662e-07, - "loss": 0.8898, - "step": 6474 - }, - { - "epoch": 0.778572716888114, - "grad_norm": 1.91244812440017, - "learning_rate": 4.92496047618548e-07, - "loss": 1.0015, - "step": 6475 - }, - { - "epoch": 0.7786929597787531, - "grad_norm": 5.888733739828432, - "learning_rate": 4.919842537420811e-07, - "loss": 1.0031, - "step": 6476 - }, - { - "epoch": 0.7788132026693921, - "grad_norm": 1.6102619874931776, - "learning_rate": 4.91472688630404e-07, - "loss": 1.0273, - "step": 6477 - }, - { - "epoch": 0.7789334455600313, - "grad_norm": 2.377617247184434, - "learning_rate": 4.909613523611202e-07, - "loss": 0.9705, - "step": 6478 - }, - { - "epoch": 0.7790536884506704, - "grad_norm": 1.752013682804066, - "learning_rate": 4.904502450117991e-07, - "loss": 0.9755, - "step": 6479 - }, - { - "epoch": 0.7791739313413094, - "grad_norm": 2.7280039652294974, - "learning_rate": 4.899393666599762e-07, - "loss": 0.9552, - "step": 6480 - }, - { - "epoch": 0.7792941742319486, - "grad_norm": 3.450558083771021, - "learning_rate": 4.894287173831506e-07, - "loss": 0.9506, - "step": 6481 - }, - { - "epoch": 0.7794144171225876, - "grad_norm": 2.2616473830695756, - "learning_rate": 4.889182972587877e-07, - "loss": 1.0711, - "step": 6482 - }, - { - "epoch": 0.7795346600132267, - "grad_norm": 5.516530943216121, - "learning_rate": 4.884081063643177e-07, - "loss": 0.9014, - "step": 6483 - }, - { - "epoch": 0.7796549029038659, - "grad_norm": 0.894506429136317, - "learning_rate": 4.878981447771353e-07, - "loss": 0.7996, - "step": 6484 - }, - { - "epoch": 0.7797751457945049, - "grad_norm": 1.476837213382346, - "learning_rate": 4.873884125746035e-07, - "loss": 0.9617, - "step": 6485 - }, - { - "epoch": 0.779895388685144, - "grad_norm": 3.332544392363768, - "learning_rate": 4.868789098340456e-07, - "loss": 0.9555, - "step": 6486 - }, - { - "epoch": 0.7800156315757831, - "grad_norm": 2.628164472975251, - "learning_rate": 4.863696366327543e-07, - "loss": 0.943, - "step": 6487 - }, - { - "epoch": 0.7801358744664222, - "grad_norm": 3.370924148625076, - "learning_rate": 4.85860593047986e-07, - "loss": 1.013, - "step": 6488 - }, - { - "epoch": 0.7802561173570612, - "grad_norm": 1.6404520579421815, - "learning_rate": 4.853517791569613e-07, - "loss": 0.9743, - "step": 6489 - }, - { - "epoch": 0.7803763602477004, - "grad_norm": 2.0046606595746788, - "learning_rate": 4.848431950368684e-07, - "loss": 0.8859, - "step": 6490 - }, - { - "epoch": 0.7804966031383395, - "grad_norm": 0.7449657041221064, - "learning_rate": 4.843348407648569e-07, - "loss": 0.8196, - "step": 6491 - }, - { - "epoch": 0.7806168460289785, - "grad_norm": 2.1070996480949717, - "learning_rate": 4.838267164180457e-07, - "loss": 1.0556, - "step": 6492 - }, - { - "epoch": 0.7807370889196176, - "grad_norm": 2.0487932142223517, - "learning_rate": 4.833188220735156e-07, - "loss": 1.0743, - "step": 6493 - }, - { - "epoch": 0.7808573318102567, - "grad_norm": 2.4892639423472978, - "learning_rate": 4.828111578083152e-07, - "loss": 0.9691, - "step": 6494 - }, - { - "epoch": 0.7809775747008958, - "grad_norm": 1.921351788020314, - "learning_rate": 4.823037236994556e-07, - "loss": 1.0356, - "step": 6495 - }, - { - "epoch": 0.7810978175915348, - "grad_norm": 0.7780482104914633, - "learning_rate": 4.817965198239136e-07, - "loss": 0.8228, - "step": 6496 - }, - { - "epoch": 0.781218060482174, - "grad_norm": 1.8846646771055573, - "learning_rate": 4.812895462586331e-07, - "loss": 0.9639, - "step": 6497 - }, - { - "epoch": 0.7813383033728131, - "grad_norm": 1.5173996436228647, - "learning_rate": 4.807828030805207e-07, - "loss": 1.0513, - "step": 6498 - }, - { - "epoch": 0.7814585462634521, - "grad_norm": 1.7500346044116337, - "learning_rate": 4.802762903664495e-07, - "loss": 0.9062, - "step": 6499 - }, - { - "epoch": 0.7815787891540913, - "grad_norm": 2.253573970003014, - "learning_rate": 4.797700081932565e-07, - "loss": 0.9592, - "step": 6500 - }, - { - "epoch": 0.7816990320447303, - "grad_norm": 2.036948324769974, - "learning_rate": 4.792639566377442e-07, - "loss": 1.0452, - "step": 6501 - }, - { - "epoch": 0.7818192749353694, - "grad_norm": 9.451886056465877, - "learning_rate": 4.78758135776681e-07, - "loss": 1.0057, - "step": 6502 - }, - { - "epoch": 0.7819395178260086, - "grad_norm": 1.843584796810387, - "learning_rate": 4.782525456867989e-07, - "loss": 1.0114, - "step": 6503 - }, - { - "epoch": 0.7820597607166476, - "grad_norm": 1.6471261688897592, - "learning_rate": 4.777471864447959e-07, - "loss": 1.0625, - "step": 6504 - }, - { - "epoch": 0.7821800036072867, - "grad_norm": 2.2212291352897298, - "learning_rate": 4.772420581273344e-07, - "loss": 1.0314, - "step": 6505 - }, - { - "epoch": 0.7823002464979258, - "grad_norm": 1.8275748835301602, - "learning_rate": 4.7673716081104134e-07, - "loss": 0.9987, - "step": 6506 - }, - { - "epoch": 0.7824204893885649, - "grad_norm": 1.6207641266444446, - "learning_rate": 4.762324945725109e-07, - "loss": 1.0717, - "step": 6507 - }, - { - "epoch": 0.782540732279204, - "grad_norm": 1.658418135974718, - "learning_rate": 4.7572805948829844e-07, - "loss": 0.9819, - "step": 6508 - }, - { - "epoch": 0.7826609751698431, - "grad_norm": 1.5615200784633978, - "learning_rate": 4.7522385563492795e-07, - "loss": 0.936, - "step": 6509 - }, - { - "epoch": 0.7827812180604822, - "grad_norm": 1.890084055287877, - "learning_rate": 4.747198830888863e-07, - "loss": 0.927, - "step": 6510 - }, - { - "epoch": 0.7829014609511212, - "grad_norm": 2.093559711975907, - "learning_rate": 4.742161419266251e-07, - "loss": 0.9134, - "step": 6511 - }, - { - "epoch": 0.7830217038417604, - "grad_norm": 2.193244693302251, - "learning_rate": 4.7371263222456304e-07, - "loss": 0.8869, - "step": 6512 - }, - { - "epoch": 0.7831419467323995, - "grad_norm": 0.8124169160471634, - "learning_rate": 4.7320935405908004e-07, - "loss": 0.8757, - "step": 6513 - }, - { - "epoch": 0.7832621896230385, - "grad_norm": 2.0284607737159104, - "learning_rate": 4.7270630750652475e-07, - "loss": 1.051, - "step": 6514 - }, - { - "epoch": 0.7833824325136777, - "grad_norm": 2.789980526244316, - "learning_rate": 4.7220349264320746e-07, - "loss": 1.0324, - "step": 6515 - }, - { - "epoch": 0.7835026754043167, - "grad_norm": 0.7920546877432183, - "learning_rate": 4.71700909545407e-07, - "loss": 0.8243, - "step": 6516 - }, - { - "epoch": 0.7836229182949558, - "grad_norm": 2.06911114078808, - "learning_rate": 4.711985582893627e-07, - "loss": 0.9987, - "step": 6517 - }, - { - "epoch": 0.783743161185595, - "grad_norm": 1.7539827801632293, - "learning_rate": 4.706964389512811e-07, - "loss": 0.9461, - "step": 6518 - }, - { - "epoch": 0.783863404076234, - "grad_norm": 2.6990568385716847, - "learning_rate": 4.701945516073345e-07, - "loss": 1.1022, - "step": 6519 - }, - { - "epoch": 0.7839836469668731, - "grad_norm": 2.0640470309259, - "learning_rate": 4.696928963336577e-07, - "loss": 0.9805, - "step": 6520 - }, - { - "epoch": 0.7841038898575122, - "grad_norm": 0.8785306679068294, - "learning_rate": 4.6919147320635224e-07, - "loss": 0.8666, - "step": 6521 - }, - { - "epoch": 0.7842241327481513, - "grad_norm": 2.874644547755416, - "learning_rate": 4.6869028230148286e-07, - "loss": 0.9517, - "step": 6522 - }, - { - "epoch": 0.7843443756387903, - "grad_norm": 2.1741094278696753, - "learning_rate": 4.6818932369507957e-07, - "loss": 0.8296, - "step": 6523 - }, - { - "epoch": 0.7844646185294295, - "grad_norm": 2.190451021128835, - "learning_rate": 4.676885974631386e-07, - "loss": 1.1105, - "step": 6524 - }, - { - "epoch": 0.7845848614200686, - "grad_norm": 4.402790199259589, - "learning_rate": 4.67188103681619e-07, - "loss": 1.0315, - "step": 6525 - }, - { - "epoch": 0.7847051043107076, - "grad_norm": 2.257747768015345, - "learning_rate": 4.666878424264453e-07, - "loss": 0.9195, - "step": 6526 - }, - { - "epoch": 0.7848253472013467, - "grad_norm": 2.8652643307483157, - "learning_rate": 4.661878137735069e-07, - "loss": 0.9671, - "step": 6527 - }, - { - "epoch": 0.7849455900919858, - "grad_norm": 8.91543523063022, - "learning_rate": 4.656880177986571e-07, - "loss": 0.9783, - "step": 6528 - }, - { - "epoch": 0.7850658329826249, - "grad_norm": 2.0008093865087093, - "learning_rate": 4.6518845457771607e-07, - "loss": 1.0411, - "step": 6529 - }, - { - "epoch": 0.7851860758732639, - "grad_norm": 2.029435741383738, - "learning_rate": 4.646891241864652e-07, - "loss": 1.0292, - "step": 6530 - }, - { - "epoch": 0.7853063187639031, - "grad_norm": 1.8335886112851405, - "learning_rate": 4.6419002670065397e-07, - "loss": 0.9636, - "step": 6531 - }, - { - "epoch": 0.7854265616545422, - "grad_norm": 1.9258232189503255, - "learning_rate": 4.6369116219599445e-07, - "loss": 1.0756, - "step": 6532 - }, - { - "epoch": 0.7855468045451812, - "grad_norm": 2.042804076813618, - "learning_rate": 4.631925307481637e-07, - "loss": 1.0236, - "step": 6533 - }, - { - "epoch": 0.7856670474358204, - "grad_norm": 2.514021178594102, - "learning_rate": 4.6269413243280533e-07, - "loss": 0.9789, - "step": 6534 - }, - { - "epoch": 0.7857872903264594, - "grad_norm": 2.7098160328657586, - "learning_rate": 4.621959673255236e-07, - "loss": 0.9782, - "step": 6535 - }, - { - "epoch": 0.7859075332170985, - "grad_norm": 1.896855578252418, - "learning_rate": 4.6169803550189135e-07, - "loss": 1.1282, - "step": 6536 - }, - { - "epoch": 0.7860277761077377, - "grad_norm": 2.111543120413418, - "learning_rate": 4.6120033703744355e-07, - "loss": 0.9948, - "step": 6537 - }, - { - "epoch": 0.7861480189983767, - "grad_norm": 1.9329305108570887, - "learning_rate": 4.607028720076822e-07, - "loss": 1.0129, - "step": 6538 - }, - { - "epoch": 0.7862682618890158, - "grad_norm": 2.177162266323994, - "learning_rate": 4.6020564048807074e-07, - "loss": 0.9625, - "step": 6539 - }, - { - "epoch": 0.7863885047796549, - "grad_norm": 2.6301439837650893, - "learning_rate": 4.5970864255403883e-07, - "loss": 0.9487, - "step": 6540 - }, - { - "epoch": 0.786508747670294, - "grad_norm": 1.77934806071752, - "learning_rate": 4.59211878280982e-07, - "loss": 1.053, - "step": 6541 - }, - { - "epoch": 0.786628990560933, - "grad_norm": 3.720926881805286, - "learning_rate": 4.587153477442578e-07, - "loss": 0.9317, - "step": 6542 - }, - { - "epoch": 0.7867492334515722, - "grad_norm": 2.6161196223915506, - "learning_rate": 4.582190510191899e-07, - "loss": 1.0487, - "step": 6543 - }, - { - "epoch": 0.7868694763422113, - "grad_norm": 1.7500510889497845, - "learning_rate": 4.5772298818106625e-07, - "loss": 1.092, - "step": 6544 - }, - { - "epoch": 0.7869897192328503, - "grad_norm": 3.352943971552043, - "learning_rate": 4.572271593051384e-07, - "loss": 0.9589, - "step": 6545 - }, - { - "epoch": 0.7871099621234895, - "grad_norm": 1.6323989271388961, - "learning_rate": 4.567315644666245e-07, - "loss": 1.0038, - "step": 6546 - }, - { - "epoch": 0.7872302050141285, - "grad_norm": 2.344001858212512, - "learning_rate": 4.5623620374070507e-07, - "loss": 1.0706, - "step": 6547 - }, - { - "epoch": 0.7873504479047676, - "grad_norm": 0.8188498399736087, - "learning_rate": 4.557410772025263e-07, - "loss": 0.8588, - "step": 6548 - }, - { - "epoch": 0.7874706907954068, - "grad_norm": 2.0341867198630244, - "learning_rate": 4.5524618492719803e-07, - "loss": 0.894, - "step": 6549 - }, - { - "epoch": 0.7875909336860458, - "grad_norm": 1.424901738041515, - "learning_rate": 4.54751526989795e-07, - "loss": 1.0075, - "step": 6550 - }, - { - "epoch": 0.7877111765766849, - "grad_norm": 1.846027519339187, - "learning_rate": 4.5425710346535775e-07, - "loss": 1.0222, - "step": 6551 - }, - { - "epoch": 0.787831419467324, - "grad_norm": 2.0844247883536653, - "learning_rate": 4.537629144288877e-07, - "loss": 1.0489, - "step": 6552 - }, - { - "epoch": 0.7879516623579631, - "grad_norm": 4.389689824436431, - "learning_rate": 4.5326895995535477e-07, - "loss": 0.9736, - "step": 6553 - }, - { - "epoch": 0.7880719052486022, - "grad_norm": 2.349185794438054, - "learning_rate": 4.527752401196907e-07, - "loss": 1.0757, - "step": 6554 - }, - { - "epoch": 0.7881921481392413, - "grad_norm": 1.9608833943839827, - "learning_rate": 4.5228175499679254e-07, - "loss": 0.9022, - "step": 6555 - }, - { - "epoch": 0.7883123910298804, - "grad_norm": 0.8667132825113352, - "learning_rate": 4.5178850466152174e-07, - "loss": 0.796, - "step": 6556 - }, - { - "epoch": 0.7884326339205194, - "grad_norm": 2.2403778322909953, - "learning_rate": 4.512954891887031e-07, - "loss": 1.0452, - "step": 6557 - }, - { - "epoch": 0.7885528768111585, - "grad_norm": 2.5261759823880325, - "learning_rate": 4.5080270865312806e-07, - "loss": 1.0675, - "step": 6558 - }, - { - "epoch": 0.7886731197017977, - "grad_norm": 2.398409566809805, - "learning_rate": 4.5031016312954985e-07, - "loss": 0.9392, - "step": 6559 - }, - { - "epoch": 0.7887933625924367, - "grad_norm": 1.8134997011515221, - "learning_rate": 4.498178526926886e-07, - "loss": 0.9759, - "step": 6560 - }, - { - "epoch": 0.7889136054830758, - "grad_norm": 2.3169648653213946, - "learning_rate": 4.4932577741722635e-07, - "loss": 0.96, - "step": 6561 - }, - { - "epoch": 0.7890338483737149, - "grad_norm": 1.6811834017244414, - "learning_rate": 4.4883393737780985e-07, - "loss": 0.9811, - "step": 6562 - }, - { - "epoch": 0.789154091264354, - "grad_norm": 2.324883579087571, - "learning_rate": 4.4834233264905254e-07, - "loss": 1.0102, - "step": 6563 - }, - { - "epoch": 0.789274334154993, - "grad_norm": 2.655230517249198, - "learning_rate": 4.478509633055294e-07, - "loss": 0.9532, - "step": 6564 - }, - { - "epoch": 0.7893945770456322, - "grad_norm": 2.4843005283158144, - "learning_rate": 4.473598294217813e-07, - "loss": 1.0388, - "step": 6565 - }, - { - "epoch": 0.7895148199362713, - "grad_norm": 2.096884132513179, - "learning_rate": 4.468689310723124e-07, - "loss": 0.9526, - "step": 6566 - }, - { - "epoch": 0.7896350628269103, - "grad_norm": 3.0243781152004807, - "learning_rate": 4.463782683315913e-07, - "loss": 1.0125, - "step": 6567 - }, - { - "epoch": 0.7897553057175495, - "grad_norm": 2.0938588583816347, - "learning_rate": 4.458878412740523e-07, - "loss": 0.9537, - "step": 6568 - }, - { - "epoch": 0.7898755486081885, - "grad_norm": 5.781384605696491, - "learning_rate": 4.453976499740919e-07, - "loss": 1.0047, - "step": 6569 - }, - { - "epoch": 0.7899957914988276, - "grad_norm": 6.624199009103442, - "learning_rate": 4.4490769450607215e-07, - "loss": 1.0122, - "step": 6570 - }, - { - "epoch": 0.7901160343894668, - "grad_norm": 1.8974919731362383, - "learning_rate": 4.4441797494431845e-07, - "loss": 0.9681, - "step": 6571 - }, - { - "epoch": 0.7902362772801058, - "grad_norm": 2.117514046686595, - "learning_rate": 4.439284913631207e-07, - "loss": 1.0093, - "step": 6572 - }, - { - "epoch": 0.7903565201707449, - "grad_norm": 2.699768717008124, - "learning_rate": 4.434392438367347e-07, - "loss": 1.0664, - "step": 6573 - }, - { - "epoch": 0.790476763061384, - "grad_norm": 1.8796978272418339, - "learning_rate": 4.4295023243937677e-07, - "loss": 0.9686, - "step": 6574 - }, - { - "epoch": 0.7905970059520231, - "grad_norm": 1.6906619647880259, - "learning_rate": 4.4246145724523123e-07, - "loss": 1.0381, - "step": 6575 - }, - { - "epoch": 0.7907172488426621, - "grad_norm": 3.4520140366782734, - "learning_rate": 4.41972918328444e-07, - "loss": 0.995, - "step": 6576 - }, - { - "epoch": 0.7908374917333013, - "grad_norm": 1.919520727182314, - "learning_rate": 4.4148461576312646e-07, - "loss": 1.0127, - "step": 6577 - }, - { - "epoch": 0.7909577346239404, - "grad_norm": 1.5254706485091383, - "learning_rate": 4.4099654962335343e-07, - "loss": 0.9784, - "step": 6578 - }, - { - "epoch": 0.7910779775145794, - "grad_norm": 4.281268934221931, - "learning_rate": 4.405087199831636e-07, - "loss": 0.9724, - "step": 6579 - }, - { - "epoch": 0.7911982204052186, - "grad_norm": 1.9581911224782707, - "learning_rate": 4.400211269165619e-07, - "loss": 0.9054, - "step": 6580 - }, - { - "epoch": 0.7913184632958576, - "grad_norm": 1.458182526919924, - "learning_rate": 4.3953377049751416e-07, - "loss": 0.9996, - "step": 6581 - }, - { - "epoch": 0.7914387061864967, - "grad_norm": 2.5581373899725053, - "learning_rate": 4.390466507999537e-07, - "loss": 1.0072, - "step": 6582 - }, - { - "epoch": 0.7915589490771359, - "grad_norm": 2.3880421766840865, - "learning_rate": 4.385597678977748e-07, - "loss": 0.9899, - "step": 6583 - }, - { - "epoch": 0.7916791919677749, - "grad_norm": 1.6457898178464276, - "learning_rate": 4.3807312186483726e-07, - "loss": 0.9762, - "step": 6584 - }, - { - "epoch": 0.791799434858414, - "grad_norm": 1.7622192077977066, - "learning_rate": 4.375867127749655e-07, - "loss": 1.0084, - "step": 6585 - }, - { - "epoch": 0.7919196777490531, - "grad_norm": 2.1050058963720235, - "learning_rate": 4.3710054070194744e-07, - "loss": 0.9129, - "step": 6586 - }, - { - "epoch": 0.7920399206396922, - "grad_norm": 3.124555327249302, - "learning_rate": 4.3661460571953455e-07, - "loss": 0.8949, - "step": 6587 - }, - { - "epoch": 0.7921601635303313, - "grad_norm": 1.611355794033273, - "learning_rate": 4.36128907901443e-07, - "loss": 0.9111, - "step": 6588 - }, - { - "epoch": 0.7922804064209703, - "grad_norm": 3.6799159535886243, - "learning_rate": 4.356434473213519e-07, - "loss": 0.9477, - "step": 6589 - }, - { - "epoch": 0.7924006493116095, - "grad_norm": 2.1364620968174686, - "learning_rate": 4.351582240529068e-07, - "loss": 1.0212, - "step": 6590 - }, - { - "epoch": 0.7925208922022485, - "grad_norm": 0.7128032055909017, - "learning_rate": 4.346732381697149e-07, - "loss": 0.8412, - "step": 6591 - }, - { - "epoch": 0.7926411350928876, - "grad_norm": 1.6719602046182278, - "learning_rate": 4.3418848974534825e-07, - "loss": 1.0427, - "step": 6592 - }, - { - "epoch": 0.7927613779835267, - "grad_norm": 6.33221047469599, - "learning_rate": 4.3370397885334276e-07, - "loss": 0.9157, - "step": 6593 - }, - { - "epoch": 0.7928816208741658, - "grad_norm": 2.096257885603611, - "learning_rate": 4.3321970556719777e-07, - "loss": 0.9819, - "step": 6594 - }, - { - "epoch": 0.7930018637648049, - "grad_norm": 2.2002153594639866, - "learning_rate": 4.3273566996037856e-07, - "loss": 0.9367, - "step": 6595 - }, - { - "epoch": 0.793122106655444, - "grad_norm": 2.0774756542849615, - "learning_rate": 4.322518721063113e-07, - "loss": 1.0286, - "step": 6596 - }, - { - "epoch": 0.7932423495460831, - "grad_norm": 2.1649667110923296, - "learning_rate": 4.3176831207838906e-07, - "loss": 0.9272, - "step": 6597 - }, - { - "epoch": 0.7933625924367221, - "grad_norm": 2.124100438618627, - "learning_rate": 4.3128498994996685e-07, - "loss": 0.9782, - "step": 6598 - }, - { - "epoch": 0.7934828353273613, - "grad_norm": 1.8630408685801496, - "learning_rate": 4.308019057943646e-07, - "loss": 0.9433, - "step": 6599 - }, - { - "epoch": 0.7936030782180004, - "grad_norm": 1.9377027836169092, - "learning_rate": 4.3031905968486535e-07, - "loss": 0.977, - "step": 6600 - }, - { - "epoch": 0.7937233211086394, - "grad_norm": 1.800446981986884, - "learning_rate": 4.298364516947162e-07, - "loss": 0.9123, - "step": 6601 - }, - { - "epoch": 0.7938435639992786, - "grad_norm": 2.5167092776533244, - "learning_rate": 4.293540818971295e-07, - "loss": 0.8936, - "step": 6602 - }, - { - "epoch": 0.7939638068899176, - "grad_norm": 3.9940809324695947, - "learning_rate": 4.2887195036527934e-07, - "loss": 1.0013, - "step": 6603 - }, - { - "epoch": 0.7940840497805567, - "grad_norm": 2.298646619918808, - "learning_rate": 4.28390057172306e-07, - "loss": 0.9667, - "step": 6604 - }, - { - "epoch": 0.7942042926711959, - "grad_norm": 5.76521378371382, - "learning_rate": 4.279084023913111e-07, - "loss": 0.9544, - "step": 6605 - }, - { - "epoch": 0.7943245355618349, - "grad_norm": 1.9727288261130984, - "learning_rate": 4.2742698609536096e-07, - "loss": 0.9206, - "step": 6606 - }, - { - "epoch": 0.794444778452474, - "grad_norm": 2.2866001881685016, - "learning_rate": 4.2694580835748706e-07, - "loss": 1.0078, - "step": 6607 - }, - { - "epoch": 0.7945650213431131, - "grad_norm": 2.0304260930464153, - "learning_rate": 4.264648692506836e-07, - "loss": 0.968, - "step": 6608 - }, - { - "epoch": 0.7946852642337522, - "grad_norm": 1.7205912351222519, - "learning_rate": 4.2598416884790824e-07, - "loss": 0.9499, - "step": 6609 - }, - { - "epoch": 0.7948055071243912, - "grad_norm": 2.026951628820632, - "learning_rate": 4.255037072220828e-07, - "loss": 1.0428, - "step": 6610 - }, - { - "epoch": 0.7949257500150304, - "grad_norm": 1.6245983801229147, - "learning_rate": 4.2502348444609293e-07, - "loss": 0.9437, - "step": 6611 - }, - { - "epoch": 0.7950459929056695, - "grad_norm": 2.577474338614653, - "learning_rate": 4.2454350059278844e-07, - "loss": 0.9163, - "step": 6612 - }, - { - "epoch": 0.7951662357963085, - "grad_norm": 1.8729587888241932, - "learning_rate": 4.240637557349824e-07, - "loss": 1.0767, - "step": 6613 - }, - { - "epoch": 0.7952864786869477, - "grad_norm": 1.8900277952138451, - "learning_rate": 4.235842499454516e-07, - "loss": 0.8857, - "step": 6614 - }, - { - "epoch": 0.7954067215775867, - "grad_norm": 1.825018305229459, - "learning_rate": 4.2310498329693687e-07, - "loss": 1.0537, - "step": 6615 - }, - { - "epoch": 0.7955269644682258, - "grad_norm": 1.513695816342921, - "learning_rate": 4.2262595586214164e-07, - "loss": 1.0397, - "step": 6616 - }, - { - "epoch": 0.795647207358865, - "grad_norm": 1.7893667836855045, - "learning_rate": 4.221471677137358e-07, - "loss": 0.9993, - "step": 6617 - }, - { - "epoch": 0.795767450249504, - "grad_norm": 1.573346296165428, - "learning_rate": 4.216686189243492e-07, - "loss": 0.9332, - "step": 6618 - }, - { - "epoch": 0.7958876931401431, - "grad_norm": 1.6802696461220739, - "learning_rate": 4.211903095665785e-07, - "loss": 0.9562, - "step": 6619 - }, - { - "epoch": 0.7960079360307821, - "grad_norm": 1.8964964437051623, - "learning_rate": 4.2071223971298277e-07, - "loss": 0.9829, - "step": 6620 - }, - { - "epoch": 0.7961281789214213, - "grad_norm": 2.052339437804758, - "learning_rate": 4.2023440943608433e-07, - "loss": 0.8418, - "step": 6621 - }, - { - "epoch": 0.7962484218120603, - "grad_norm": 6.259739116071015, - "learning_rate": 4.1975681880837023e-07, - "loss": 1.0172, - "step": 6622 - }, - { - "epoch": 0.7963686647026994, - "grad_norm": 1.7529990519876304, - "learning_rate": 4.192794679022895e-07, - "loss": 1.0547, - "step": 6623 - }, - { - "epoch": 0.7964889075933386, - "grad_norm": 1.7412630373979472, - "learning_rate": 4.1880235679025743e-07, - "loss": 0.9415, - "step": 6624 - }, - { - "epoch": 0.7966091504839776, - "grad_norm": 1.6935586603061104, - "learning_rate": 4.1832548554464986e-07, - "loss": 0.8657, - "step": 6625 - }, - { - "epoch": 0.7967293933746167, - "grad_norm": 0.7730281016039752, - "learning_rate": 4.178488542378098e-07, - "loss": 0.8392, - "step": 6626 - }, - { - "epoch": 0.7968496362652558, - "grad_norm": 1.8143018776524413, - "learning_rate": 4.173724629420401e-07, - "loss": 1.1178, - "step": 6627 - }, - { - "epoch": 0.7969698791558949, - "grad_norm": 2.295929175543917, - "learning_rate": 4.168963117296087e-07, - "loss": 0.9028, - "step": 6628 - }, - { - "epoch": 0.797090122046534, - "grad_norm": 1.9551714742441697, - "learning_rate": 4.1642040067274876e-07, - "loss": 0.9837, - "step": 6629 - }, - { - "epoch": 0.7972103649371731, - "grad_norm": 1.7960099501472235, - "learning_rate": 4.1594472984365493e-07, - "loss": 0.9522, - "step": 6630 - }, - { - "epoch": 0.7973306078278122, - "grad_norm": 1.805185884958707, - "learning_rate": 4.154692993144862e-07, - "loss": 1.0017, - "step": 6631 - }, - { - "epoch": 0.7974508507184512, - "grad_norm": 2.132218610080091, - "learning_rate": 4.1499410915736476e-07, - "loss": 0.9431, - "step": 6632 - }, - { - "epoch": 0.7975710936090904, - "grad_norm": 0.8240132369418005, - "learning_rate": 4.145191594443762e-07, - "loss": 0.9397, - "step": 6633 - }, - { - "epoch": 0.7976913364997295, - "grad_norm": 3.212064744726732, - "learning_rate": 4.140444502475713e-07, - "loss": 0.9409, - "step": 6634 - }, - { - "epoch": 0.7978115793903685, - "grad_norm": 2.261033555569311, - "learning_rate": 4.1356998163896216e-07, - "loss": 0.9295, - "step": 6635 - }, - { - "epoch": 0.7979318222810077, - "grad_norm": 2.390586703124622, - "learning_rate": 4.130957536905255e-07, - "loss": 0.9761, - "step": 6636 - }, - { - "epoch": 0.7980520651716467, - "grad_norm": 2.661114535487694, - "learning_rate": 4.1262176647420134e-07, - "loss": 0.939, - "step": 6637 - }, - { - "epoch": 0.7981723080622858, - "grad_norm": 1.733206690538929, - "learning_rate": 4.121480200618923e-07, - "loss": 1.0291, - "step": 6638 - }, - { - "epoch": 0.798292550952925, - "grad_norm": 1.6920485477336558, - "learning_rate": 4.116745145254674e-07, - "loss": 1.0274, - "step": 6639 - }, - { - "epoch": 0.798412793843564, - "grad_norm": 0.7787082141056474, - "learning_rate": 4.1120124993675476e-07, - "loss": 0.8391, - "step": 6640 - }, - { - "epoch": 0.7985330367342031, - "grad_norm": 2.914775689525948, - "learning_rate": 4.107282263675498e-07, - "loss": 0.855, - "step": 6641 - }, - { - "epoch": 0.7986532796248422, - "grad_norm": 0.7434889375290791, - "learning_rate": 4.1025544388960907e-07, - "loss": 0.7744, - "step": 6642 - }, - { - "epoch": 0.7987735225154813, - "grad_norm": 1.902744822746574, - "learning_rate": 4.097829025746538e-07, - "loss": 0.9421, - "step": 6643 - }, - { - "epoch": 0.7988937654061203, - "grad_norm": 0.6971181505183791, - "learning_rate": 4.0931060249436757e-07, - "loss": 0.8548, - "step": 6644 - }, - { - "epoch": 0.7990140082967595, - "grad_norm": 1.9789558967660736, - "learning_rate": 4.088385437203978e-07, - "loss": 0.9229, - "step": 6645 - }, - { - "epoch": 0.7991342511873986, - "grad_norm": 4.266760147580647, - "learning_rate": 4.083667263243564e-07, - "loss": 0.9991, - "step": 6646 - }, - { - "epoch": 0.7992544940780376, - "grad_norm": 1.8156791954684433, - "learning_rate": 4.0789515037781653e-07, - "loss": 0.9403, - "step": 6647 - }, - { - "epoch": 0.7993747369686768, - "grad_norm": 1.707398399134039, - "learning_rate": 4.0742381595231755e-07, - "loss": 1.062, - "step": 6648 - }, - { - "epoch": 0.7994949798593158, - "grad_norm": 1.8162466984067078, - "learning_rate": 4.06952723119359e-07, - "loss": 1.0135, - "step": 6649 - }, - { - "epoch": 0.7996152227499549, - "grad_norm": 2.646451402296902, - "learning_rate": 4.0648187195040504e-07, - "loss": 0.8955, - "step": 6650 - }, - { - "epoch": 0.799735465640594, - "grad_norm": 0.9583363567525744, - "learning_rate": 4.060112625168848e-07, - "loss": 0.9557, - "step": 6651 - }, - { - "epoch": 0.7998557085312331, - "grad_norm": 1.632748689271596, - "learning_rate": 4.055408948901886e-07, - "loss": 0.9695, - "step": 6652 - }, - { - "epoch": 0.7999759514218722, - "grad_norm": 1.959042184115566, - "learning_rate": 4.050707691416708e-07, - "loss": 0.9428, - "step": 6653 - }, - { - "epoch": 0.8000961943125112, - "grad_norm": 0.7259021752737549, - "learning_rate": 4.046008853426495e-07, - "loss": 0.8536, - "step": 6654 - }, - { - "epoch": 0.8002164372031504, - "grad_norm": 1.6359822293554553, - "learning_rate": 4.0413124356440464e-07, - "loss": 0.8612, - "step": 6655 - }, - { - "epoch": 0.8003366800937894, - "grad_norm": 2.7128214597295903, - "learning_rate": 4.0366184387818223e-07, - "loss": 1.0568, - "step": 6656 - }, - { - "epoch": 0.8004569229844285, - "grad_norm": 1.9061226724093465, - "learning_rate": 4.0319268635518797e-07, - "loss": 1.0805, - "step": 6657 - }, - { - "epoch": 0.8005771658750677, - "grad_norm": 1.9919937937191718, - "learning_rate": 4.027237710665943e-07, - "loss": 0.9816, - "step": 6658 - }, - { - "epoch": 0.8006974087657067, - "grad_norm": 2.134734465013038, - "learning_rate": 4.022550980835344e-07, - "loss": 0.9274, - "step": 6659 - }, - { - "epoch": 0.8008176516563458, - "grad_norm": 2.0534457671771595, - "learning_rate": 4.017866674771051e-07, - "loss": 1.0346, - "step": 6660 - }, - { - "epoch": 0.8009378945469849, - "grad_norm": 1.5412032057935974, - "learning_rate": 4.013184793183688e-07, - "loss": 0.9777, - "step": 6661 - }, - { - "epoch": 0.801058137437624, - "grad_norm": 1.993246537501592, - "learning_rate": 4.008505336783472e-07, - "loss": 0.9516, - "step": 6662 - }, - { - "epoch": 0.801178380328263, - "grad_norm": 1.8534675165812786, - "learning_rate": 4.003828306280284e-07, - "loss": 1.0512, - "step": 6663 - }, - { - "epoch": 0.8012986232189022, - "grad_norm": 1.8941784087708158, - "learning_rate": 3.999153702383626e-07, - "loss": 1.0094, - "step": 6664 - }, - { - "epoch": 0.8014188661095413, - "grad_norm": 1.939436652199052, - "learning_rate": 3.9944815258026263e-07, - "loss": 0.9597, - "step": 6665 - }, - { - "epoch": 0.8015391090001803, - "grad_norm": 2.5749409752164008, - "learning_rate": 3.989811777246057e-07, - "loss": 1.0622, - "step": 6666 - }, - { - "epoch": 0.8016593518908195, - "grad_norm": 0.8861540285882981, - "learning_rate": 3.985144457422305e-07, - "loss": 0.9323, - "step": 6667 - }, - { - "epoch": 0.8017795947814585, - "grad_norm": 2.0894641567130843, - "learning_rate": 3.9804795670394096e-07, - "loss": 0.9918, - "step": 6668 - }, - { - "epoch": 0.8018998376720976, - "grad_norm": 1.99832733065372, - "learning_rate": 3.975817106805022e-07, - "loss": 0.9413, - "step": 6669 - }, - { - "epoch": 0.8020200805627368, - "grad_norm": 1.8430247415961014, - "learning_rate": 3.97115707742645e-07, - "loss": 0.871, - "step": 6670 - }, - { - "epoch": 0.8021403234533758, - "grad_norm": 2.001763043567538, - "learning_rate": 3.966499479610599e-07, - "loss": 0.8858, - "step": 6671 - }, - { - "epoch": 0.8022605663440149, - "grad_norm": 1.6793404154025602, - "learning_rate": 3.9618443140640225e-07, - "loss": 0.8862, - "step": 6672 - }, - { - "epoch": 0.802380809234654, - "grad_norm": 0.7684042951895725, - "learning_rate": 3.957191581492918e-07, - "loss": 0.7806, - "step": 6673 - }, - { - "epoch": 0.8025010521252931, - "grad_norm": 2.335342892908816, - "learning_rate": 3.952541282603097e-07, - "loss": 0.9372, - "step": 6674 - }, - { - "epoch": 0.8026212950159322, - "grad_norm": 3.3729783113598297, - "learning_rate": 3.9478934181000013e-07, - "loss": 1.0609, - "step": 6675 - }, - { - "epoch": 0.8027415379065713, - "grad_norm": 2.2349966824910164, - "learning_rate": 3.943247988688714e-07, - "loss": 1.0787, - "step": 6676 - }, - { - "epoch": 0.8028617807972104, - "grad_norm": 1.8016967432274151, - "learning_rate": 3.938604995073933e-07, - "loss": 0.9449, - "step": 6677 - }, - { - "epoch": 0.8029820236878494, - "grad_norm": 2.0223889082492805, - "learning_rate": 3.9339644379600157e-07, - "loss": 0.8945, - "step": 6678 - }, - { - "epoch": 0.8031022665784886, - "grad_norm": 5.347276026247281, - "learning_rate": 3.929326318050907e-07, - "loss": 0.9464, - "step": 6679 - }, - { - "epoch": 0.8032225094691277, - "grad_norm": 1.9563407974378095, - "learning_rate": 3.924690636050225e-07, - "loss": 1.0132, - "step": 6680 - }, - { - "epoch": 0.8033427523597667, - "grad_norm": 2.2071461875491822, - "learning_rate": 3.9200573926611915e-07, - "loss": 0.9534, - "step": 6681 - }, - { - "epoch": 0.8034629952504058, - "grad_norm": 2.541883761613845, - "learning_rate": 3.9154265885866613e-07, - "loss": 0.9549, - "step": 6682 - }, - { - "epoch": 0.8035832381410449, - "grad_norm": 2.585127135937411, - "learning_rate": 3.9107982245291394e-07, - "loss": 0.9798, - "step": 6683 - }, - { - "epoch": 0.803703481031684, - "grad_norm": 2.606660355936722, - "learning_rate": 3.9061723011907245e-07, - "loss": 0.9998, - "step": 6684 - }, - { - "epoch": 0.803823723922323, - "grad_norm": 1.628610561254437, - "learning_rate": 3.901548819273179e-07, - "loss": 1.0162, - "step": 6685 - }, - { - "epoch": 0.8039439668129622, - "grad_norm": 2.15239138126459, - "learning_rate": 3.896927779477881e-07, - "loss": 0.9216, - "step": 6686 - }, - { - "epoch": 0.8040642097036013, - "grad_norm": 1.9241722457538322, - "learning_rate": 3.892309182505833e-07, - "loss": 0.9076, - "step": 6687 - }, - { - "epoch": 0.8041844525942403, - "grad_norm": 2.965723844434568, - "learning_rate": 3.887693029057675e-07, - "loss": 1.0947, - "step": 6688 - }, - { - "epoch": 0.8043046954848795, - "grad_norm": 1.5869474155148857, - "learning_rate": 3.8830793198336684e-07, - "loss": 1.0411, - "step": 6689 - }, - { - "epoch": 0.8044249383755185, - "grad_norm": 1.7732774435404657, - "learning_rate": 3.878468055533721e-07, - "loss": 0.9359, - "step": 6690 - }, - { - "epoch": 0.8045451812661576, - "grad_norm": 2.793932256364767, - "learning_rate": 3.8738592368573464e-07, - "loss": 1.082, - "step": 6691 - }, - { - "epoch": 0.8046654241567968, - "grad_norm": 2.5065609670058175, - "learning_rate": 3.8692528645037137e-07, - "loss": 1.107, - "step": 6692 - }, - { - "epoch": 0.8047856670474358, - "grad_norm": 2.4874384963516833, - "learning_rate": 3.8646489391715907e-07, - "loss": 1.0063, - "step": 6693 - }, - { - "epoch": 0.8049059099380749, - "grad_norm": 2.1372379878562517, - "learning_rate": 3.8600474615593903e-07, - "loss": 1.1077, - "step": 6694 - }, - { - "epoch": 0.805026152828714, - "grad_norm": 0.8266133601039581, - "learning_rate": 3.8554484323651605e-07, - "loss": 0.8887, - "step": 6695 - }, - { - "epoch": 0.8051463957193531, - "grad_norm": 2.2862902835156445, - "learning_rate": 3.85085185228657e-07, - "loss": 1.0153, - "step": 6696 - }, - { - "epoch": 0.8052666386099921, - "grad_norm": 1.7851229274319151, - "learning_rate": 3.8462577220209114e-07, - "loss": 0.9721, - "step": 6697 - }, - { - "epoch": 0.8053868815006313, - "grad_norm": 0.6925431511101101, - "learning_rate": 3.8416660422651127e-07, - "loss": 0.8395, - "step": 6698 - }, - { - "epoch": 0.8055071243912704, - "grad_norm": 1.738537304482112, - "learning_rate": 3.837076813715723e-07, - "loss": 0.9222, - "step": 6699 - }, - { - "epoch": 0.8056273672819094, - "grad_norm": 1.7652569277446037, - "learning_rate": 3.832490037068941e-07, - "loss": 0.9808, - "step": 6700 - }, - { - "epoch": 0.8057476101725486, - "grad_norm": 2.489446298400435, - "learning_rate": 3.827905713020554e-07, - "loss": 0.9883, - "step": 6701 - }, - { - "epoch": 0.8058678530631876, - "grad_norm": 2.1291192464406414, - "learning_rate": 3.823323842266017e-07, - "loss": 0.9065, - "step": 6702 - }, - { - "epoch": 0.8059880959538267, - "grad_norm": 2.2352629277766685, - "learning_rate": 3.818744425500393e-07, - "loss": 0.9652, - "step": 6703 - }, - { - "epoch": 0.8061083388444659, - "grad_norm": 1.8784660093244316, - "learning_rate": 3.8141674634183675e-07, - "loss": 1.0399, - "step": 6704 - }, - { - "epoch": 0.8062285817351049, - "grad_norm": 1.5654516950310884, - "learning_rate": 3.809592956714278e-07, - "loss": 0.8815, - "step": 6705 - }, - { - "epoch": 0.806348824625744, - "grad_norm": 1.9416534450664786, - "learning_rate": 3.805020906082057e-07, - "loss": 0.9684, - "step": 6706 - }, - { - "epoch": 0.8064690675163831, - "grad_norm": 3.9803059463225394, - "learning_rate": 3.8004513122152917e-07, - "loss": 1.0356, - "step": 6707 - }, - { - "epoch": 0.8065893104070222, - "grad_norm": 4.113311854408938, - "learning_rate": 3.79588417580718e-07, - "loss": 0.8972, - "step": 6708 - }, - { - "epoch": 0.8067095532976613, - "grad_norm": 2.8393010141623654, - "learning_rate": 3.791319497550558e-07, - "loss": 0.9922, - "step": 6709 - }, - { - "epoch": 0.8068297961883004, - "grad_norm": 1.6855518257006525, - "learning_rate": 3.78675727813788e-07, - "loss": 0.9441, - "step": 6710 - }, - { - "epoch": 0.8069500390789395, - "grad_norm": 2.3081234938706734, - "learning_rate": 3.782197518261225e-07, - "loss": 0.9705, - "step": 6711 - }, - { - "epoch": 0.8070702819695785, - "grad_norm": 2.1789661157152165, - "learning_rate": 3.777640218612319e-07, - "loss": 1.1939, - "step": 6712 - }, - { - "epoch": 0.8071905248602176, - "grad_norm": 2.9460720226343464, - "learning_rate": 3.773085379882488e-07, - "loss": 0.9466, - "step": 6713 - }, - { - "epoch": 0.8073107677508568, - "grad_norm": 1.9259588999180601, - "learning_rate": 3.768533002762715e-07, - "loss": 0.9956, - "step": 6714 - }, - { - "epoch": 0.8074310106414958, - "grad_norm": 1.8948974255502415, - "learning_rate": 3.763983087943572e-07, - "loss": 0.9915, - "step": 6715 - }, - { - "epoch": 0.8075512535321349, - "grad_norm": 1.609155880723809, - "learning_rate": 3.759435636115282e-07, - "loss": 1.03, - "step": 6716 - }, - { - "epoch": 0.807671496422774, - "grad_norm": 4.302055426530943, - "learning_rate": 3.7548906479676967e-07, - "loss": 0.9708, - "step": 6717 - }, - { - "epoch": 0.8077917393134131, - "grad_norm": 1.732484354055857, - "learning_rate": 3.7503481241902855e-07, - "loss": 0.9371, - "step": 6718 - }, - { - "epoch": 0.8079119822040521, - "grad_norm": 1.9824667338289754, - "learning_rate": 3.745808065472145e-07, - "loss": 1.0267, - "step": 6719 - }, - { - "epoch": 0.8080322250946913, - "grad_norm": 1.6972184959338805, - "learning_rate": 3.741270472501994e-07, - "loss": 0.989, - "step": 6720 - }, - { - "epoch": 0.8081524679853304, - "grad_norm": 2.9328157127171735, - "learning_rate": 3.736735345968183e-07, - "loss": 0.9576, - "step": 6721 - }, - { - "epoch": 0.8082727108759694, - "grad_norm": 1.5290418000479729, - "learning_rate": 3.7322026865586986e-07, - "loss": 1.0258, - "step": 6722 - }, - { - "epoch": 0.8083929537666086, - "grad_norm": 1.8253352079463125, - "learning_rate": 3.7276724949611206e-07, - "loss": 0.9585, - "step": 6723 - }, - { - "epoch": 0.8085131966572476, - "grad_norm": 7.857808072552938, - "learning_rate": 3.723144771862694e-07, - "loss": 0.9743, - "step": 6724 - }, - { - "epoch": 0.8086334395478867, - "grad_norm": 1.5223170145751477, - "learning_rate": 3.718619517950263e-07, - "loss": 0.9978, - "step": 6725 - }, - { - "epoch": 0.8087536824385259, - "grad_norm": 2.3752591844980793, - "learning_rate": 3.714096733910301e-07, - "loss": 0.9991, - "step": 6726 - }, - { - "epoch": 0.8088739253291649, - "grad_norm": 3.799379378383879, - "learning_rate": 3.709576420428926e-07, - "loss": 0.9286, - "step": 6727 - }, - { - "epoch": 0.808994168219804, - "grad_norm": 2.033511736604969, - "learning_rate": 3.7050585781918463e-07, - "loss": 0.9622, - "step": 6728 - }, - { - "epoch": 0.8091144111104431, - "grad_norm": 1.889983265313231, - "learning_rate": 3.700543207884428e-07, - "loss": 0.916, - "step": 6729 - }, - { - "epoch": 0.8092346540010822, - "grad_norm": 1.9301132164988193, - "learning_rate": 3.6960303101916466e-07, - "loss": 0.9343, - "step": 6730 - }, - { - "epoch": 0.8093548968917212, - "grad_norm": 0.7965453344854858, - "learning_rate": 3.6915198857981047e-07, - "loss": 0.8173, - "step": 6731 - }, - { - "epoch": 0.8094751397823604, - "grad_norm": 1.778001734746681, - "learning_rate": 3.687011935388027e-07, - "loss": 0.9148, - "step": 6732 - }, - { - "epoch": 0.8095953826729995, - "grad_norm": 2.144935667055799, - "learning_rate": 3.6825064596452646e-07, - "loss": 0.958, - "step": 6733 - }, - { - "epoch": 0.8097156255636385, - "grad_norm": 1.704260980969967, - "learning_rate": 3.678003459253305e-07, - "loss": 0.9364, - "step": 6734 - }, - { - "epoch": 0.8098358684542777, - "grad_norm": 1.9849073642091153, - "learning_rate": 3.673502934895236e-07, - "loss": 0.9705, - "step": 6735 - }, - { - "epoch": 0.8099561113449167, - "grad_norm": 0.6906416809000774, - "learning_rate": 3.669004887253802e-07, - "loss": 0.823, - "step": 6736 - }, - { - "epoch": 0.8100763542355558, - "grad_norm": 1.4836861869375222, - "learning_rate": 3.664509317011335e-07, - "loss": 1.0186, - "step": 6737 - }, - { - "epoch": 0.810196597126195, - "grad_norm": 5.187346858326409, - "learning_rate": 3.6600162248498134e-07, - "loss": 0.959, - "step": 6738 - }, - { - "epoch": 0.810316840016834, - "grad_norm": 2.2714752433245615, - "learning_rate": 3.6555256114508426e-07, - "loss": 0.9954, - "step": 6739 - }, - { - "epoch": 0.8104370829074731, - "grad_norm": 1.7480342587489903, - "learning_rate": 3.651037477495642e-07, - "loss": 0.9607, - "step": 6740 - }, - { - "epoch": 0.8105573257981122, - "grad_norm": 3.6520685265620467, - "learning_rate": 3.6465518236650584e-07, - "loss": 0.9082, - "step": 6741 - }, - { - "epoch": 0.8106775686887513, - "grad_norm": 1.7284896934848637, - "learning_rate": 3.642068650639558e-07, - "loss": 1.0098, - "step": 6742 - }, - { - "epoch": 0.8107978115793903, - "grad_norm": 2.273643772690972, - "learning_rate": 3.6375879590992334e-07, - "loss": 0.8786, - "step": 6743 - }, - { - "epoch": 0.8109180544700295, - "grad_norm": 1.7417720419051754, - "learning_rate": 3.6331097497238173e-07, - "loss": 1.0338, - "step": 6744 - }, - { - "epoch": 0.8110382973606686, - "grad_norm": 1.8721230369629405, - "learning_rate": 3.628634023192627e-07, - "loss": 1.0239, - "step": 6745 - }, - { - "epoch": 0.8111585402513076, - "grad_norm": 2.894260244514317, - "learning_rate": 3.624160780184644e-07, - "loss": 0.9893, - "step": 6746 - }, - { - "epoch": 0.8112787831419467, - "grad_norm": 1.5788245586268341, - "learning_rate": 3.6196900213784496e-07, - "loss": 0.9804, - "step": 6747 - }, - { - "epoch": 0.8113990260325858, - "grad_norm": 1.8772174438709774, - "learning_rate": 3.6152217474522527e-07, - "loss": 1.091, - "step": 6748 - }, - { - "epoch": 0.8115192689232249, - "grad_norm": 2.145032369018179, - "learning_rate": 3.6107559590838975e-07, - "loss": 0.9535, - "step": 6749 - }, - { - "epoch": 0.811639511813864, - "grad_norm": 2.314159004419445, - "learning_rate": 3.606292656950822e-07, - "loss": 0.8934, - "step": 6750 - }, - { - "epoch": 0.8117597547045031, - "grad_norm": 1.8615659099532897, - "learning_rate": 3.601831841730121e-07, - "loss": 1.0916, - "step": 6751 - }, - { - "epoch": 0.8118799975951422, - "grad_norm": 1.6306711419486775, - "learning_rate": 3.5973735140984916e-07, - "loss": 0.9684, - "step": 6752 - }, - { - "epoch": 0.8120002404857812, - "grad_norm": 2.9710685911475054, - "learning_rate": 3.5929176747322607e-07, - "loss": 1.0122, - "step": 6753 - }, - { - "epoch": 0.8121204833764204, - "grad_norm": 0.8080568973200389, - "learning_rate": 3.588464324307372e-07, - "loss": 0.8061, - "step": 6754 - }, - { - "epoch": 0.8122407262670595, - "grad_norm": 2.4323469614500404, - "learning_rate": 3.584013463499391e-07, - "loss": 0.9852, - "step": 6755 - }, - { - "epoch": 0.8123609691576985, - "grad_norm": 0.7241590390902949, - "learning_rate": 3.579565092983521e-07, - "loss": 0.8844, - "step": 6756 - }, - { - "epoch": 0.8124812120483377, - "grad_norm": 2.1456731594513507, - "learning_rate": 3.575119213434565e-07, - "loss": 1.0657, - "step": 6757 - }, - { - "epoch": 0.8126014549389767, - "grad_norm": 1.6997136940807718, - "learning_rate": 3.5706758255269765e-07, - "loss": 1.0461, - "step": 6758 - }, - { - "epoch": 0.8127216978296158, - "grad_norm": 1.59477440317861, - "learning_rate": 3.566234929934795e-07, - "loss": 0.9296, - "step": 6759 - }, - { - "epoch": 0.812841940720255, - "grad_norm": 1.328499146661854, - "learning_rate": 3.561796527331706e-07, - "loss": 0.9515, - "step": 6760 - }, - { - "epoch": 0.812962183610894, - "grad_norm": 1.8390188777549996, - "learning_rate": 3.5573606183910163e-07, - "loss": 0.996, - "step": 6761 - }, - { - "epoch": 0.8130824265015331, - "grad_norm": 1.951545381738186, - "learning_rate": 3.5529272037856493e-07, - "loss": 1.0122, - "step": 6762 - }, - { - "epoch": 0.8132026693921722, - "grad_norm": 1.5557836760796733, - "learning_rate": 3.548496284188149e-07, - "loss": 0.8064, - "step": 6763 - }, - { - "epoch": 0.8133229122828113, - "grad_norm": 2.9874893635197357, - "learning_rate": 3.544067860270681e-07, - "loss": 1.0172, - "step": 6764 - }, - { - "epoch": 0.8134431551734503, - "grad_norm": 4.575119959191366, - "learning_rate": 3.539641932705029e-07, - "loss": 0.9441, - "step": 6765 - }, - { - "epoch": 0.8135633980640895, - "grad_norm": 1.9517999655757088, - "learning_rate": 3.53521850216262e-07, - "loss": 0.9849, - "step": 6766 - }, - { - "epoch": 0.8136836409547286, - "grad_norm": 2.0705225424111693, - "learning_rate": 3.530797569314461e-07, - "loss": 0.9989, - "step": 6767 - }, - { - "epoch": 0.8138038838453676, - "grad_norm": 2.2081129815870213, - "learning_rate": 3.5263791348312235e-07, - "loss": 1.0007, - "step": 6768 - }, - { - "epoch": 0.8139241267360068, - "grad_norm": 1.6522082550551882, - "learning_rate": 3.521963199383171e-07, - "loss": 0.9418, - "step": 6769 - }, - { - "epoch": 0.8140443696266458, - "grad_norm": 1.9480275889211314, - "learning_rate": 3.517549763640197e-07, - "loss": 1.0047, - "step": 6770 - }, - { - "epoch": 0.8141646125172849, - "grad_norm": 1.8965420778023239, - "learning_rate": 3.513138828271829e-07, - "loss": 0.9419, - "step": 6771 - }, - { - "epoch": 0.8142848554079241, - "grad_norm": 1.8566171405060599, - "learning_rate": 3.508730393947179e-07, - "loss": 0.9336, - "step": 6772 - }, - { - "epoch": 0.8144050982985631, - "grad_norm": 1.7508345385182207, - "learning_rate": 3.504324461335024e-07, - "loss": 0.9416, - "step": 6773 - }, - { - "epoch": 0.8145253411892022, - "grad_norm": 1.6162423583142864, - "learning_rate": 3.499921031103732e-07, - "loss": 1.1036, - "step": 6774 - }, - { - "epoch": 0.8146455840798413, - "grad_norm": 1.988335569347948, - "learning_rate": 3.4955201039212987e-07, - "loss": 1.0123, - "step": 6775 - }, - { - "epoch": 0.8147658269704804, - "grad_norm": 1.751790765322026, - "learning_rate": 3.4911216804553465e-07, - "loss": 0.8759, - "step": 6776 - }, - { - "epoch": 0.8148860698611194, - "grad_norm": 2.1163082760551015, - "learning_rate": 3.4867257613731017e-07, - "loss": 0.9355, - "step": 6777 - }, - { - "epoch": 0.8150063127517585, - "grad_norm": 1.6850355307571234, - "learning_rate": 3.4823323473414343e-07, - "loss": 1.0799, - "step": 6778 - }, - { - "epoch": 0.8151265556423977, - "grad_norm": 2.9163014501160918, - "learning_rate": 3.477941439026812e-07, - "loss": 0.9888, - "step": 6779 - }, - { - "epoch": 0.8152467985330367, - "grad_norm": 1.8438928193203734, - "learning_rate": 3.473553037095349e-07, - "loss": 0.9583, - "step": 6780 - }, - { - "epoch": 0.8153670414236758, - "grad_norm": 5.152792216255316, - "learning_rate": 3.469167142212743e-07, - "loss": 1.0539, - "step": 6781 - }, - { - "epoch": 0.8154872843143149, - "grad_norm": 2.1078211960213733, - "learning_rate": 3.4647837550443337e-07, - "loss": 0.8553, - "step": 6782 - }, - { - "epoch": 0.815607527204954, - "grad_norm": 1.8008753820214893, - "learning_rate": 3.460402876255086e-07, - "loss": 0.9708, - "step": 6783 - }, - { - "epoch": 0.815727770095593, - "grad_norm": 9.092902599585397, - "learning_rate": 3.456024506509574e-07, - "loss": 0.9501, - "step": 6784 - }, - { - "epoch": 0.8158480129862322, - "grad_norm": 2.0487812280515114, - "learning_rate": 3.4516486464719873e-07, - "loss": 0.9671, - "step": 6785 - }, - { - "epoch": 0.8159682558768713, - "grad_norm": 2.369730373609964, - "learning_rate": 3.4472752968061445e-07, - "loss": 0.8583, - "step": 6786 - }, - { - "epoch": 0.8160884987675103, - "grad_norm": 1.9987602563801246, - "learning_rate": 3.442904458175475e-07, - "loss": 0.9643, - "step": 6787 - }, - { - "epoch": 0.8162087416581495, - "grad_norm": 1.4271343342747367, - "learning_rate": 3.438536131243044e-07, - "loss": 0.9838, - "step": 6788 - }, - { - "epoch": 0.8163289845487885, - "grad_norm": 2.0582253055821704, - "learning_rate": 3.434170316671503e-07, - "loss": 0.8449, - "step": 6789 - }, - { - "epoch": 0.8164492274394276, - "grad_norm": 2.609663347770415, - "learning_rate": 3.4298070151231583e-07, - "loss": 1.1283, - "step": 6790 - }, - { - "epoch": 0.8165694703300668, - "grad_norm": 3.45634455353188, - "learning_rate": 3.425446227259916e-07, - "loss": 0.8282, - "step": 6791 - }, - { - "epoch": 0.8166897132207058, - "grad_norm": 1.8219761265845709, - "learning_rate": 3.421087953743296e-07, - "loss": 1.0562, - "step": 6792 - }, - { - "epoch": 0.8168099561113449, - "grad_norm": 2.990750518046239, - "learning_rate": 3.416732195234464e-07, - "loss": 1.0271, - "step": 6793 - }, - { - "epoch": 0.816930199001984, - "grad_norm": 1.4472834988827536, - "learning_rate": 3.4123789523941613e-07, - "loss": 1.0196, - "step": 6794 - }, - { - "epoch": 0.8170504418926231, - "grad_norm": 1.5169623855172205, - "learning_rate": 3.4080282258827884e-07, - "loss": 0.866, - "step": 6795 - }, - { - "epoch": 0.8171706847832622, - "grad_norm": 2.1392568406627253, - "learning_rate": 3.403680016360342e-07, - "loss": 0.9467, - "step": 6796 - }, - { - "epoch": 0.8172909276739013, - "grad_norm": 1.5964295544229652, - "learning_rate": 3.3993343244864403e-07, - "loss": 0.9066, - "step": 6797 - }, - { - "epoch": 0.8174111705645404, - "grad_norm": 2.9820286979394535, - "learning_rate": 3.394991150920323e-07, - "loss": 0.955, - "step": 6798 - }, - { - "epoch": 0.8175314134551794, - "grad_norm": 1.8189681168163743, - "learning_rate": 3.3906504963208396e-07, - "loss": 0.9755, - "step": 6799 - }, - { - "epoch": 0.8176516563458186, - "grad_norm": 1.9172554010306952, - "learning_rate": 3.3863123613464774e-07, - "loss": 0.8811, - "step": 6800 - }, - { - "epoch": 0.8177718992364577, - "grad_norm": 1.7013942778971263, - "learning_rate": 3.381976746655317e-07, - "loss": 0.9643, - "step": 6801 - }, - { - "epoch": 0.8178921421270967, - "grad_norm": 2.0447986350683274, - "learning_rate": 3.3776436529050756e-07, - "loss": 0.9089, - "step": 6802 - }, - { - "epoch": 0.8180123850177359, - "grad_norm": 1.7186886169569795, - "learning_rate": 3.373313080753073e-07, - "loss": 0.9489, - "step": 6803 - }, - { - "epoch": 0.8181326279083749, - "grad_norm": 2.1410132285809205, - "learning_rate": 3.3689850308562527e-07, - "loss": 1.0069, - "step": 6804 - }, - { - "epoch": 0.818252870799014, - "grad_norm": 2.1268890062936463, - "learning_rate": 3.364659503871183e-07, - "loss": 1.0051, - "step": 6805 - }, - { - "epoch": 0.8183731136896532, - "grad_norm": 1.9429608573799284, - "learning_rate": 3.3603365004540417e-07, - "loss": 1.0642, - "step": 6806 - }, - { - "epoch": 0.8184933565802922, - "grad_norm": 1.9721009332078512, - "learning_rate": 3.356016021260624e-07, - "loss": 0.9923, - "step": 6807 - }, - { - "epoch": 0.8186135994709313, - "grad_norm": 2.466449583226939, - "learning_rate": 3.35169806694634e-07, - "loss": 0.876, - "step": 6808 - }, - { - "epoch": 0.8187338423615703, - "grad_norm": 0.7439243272629303, - "learning_rate": 3.3473826381662186e-07, - "loss": 0.854, - "step": 6809 - }, - { - "epoch": 0.8188540852522095, - "grad_norm": 2.4050488136714767, - "learning_rate": 3.3430697355749216e-07, - "loss": 1.0439, - "step": 6810 - }, - { - "epoch": 0.8189743281428485, - "grad_norm": 3.305339054600774, - "learning_rate": 3.3387593598266907e-07, - "loss": 0.9735, - "step": 6811 - }, - { - "epoch": 0.8190945710334876, - "grad_norm": 1.6934722194009237, - "learning_rate": 3.3344515115754225e-07, - "loss": 1.0131, - "step": 6812 - }, - { - "epoch": 0.8192148139241268, - "grad_norm": 2.041437979625686, - "learning_rate": 3.33014619147461e-07, - "loss": 1.0246, - "step": 6813 - }, - { - "epoch": 0.8193350568147658, - "grad_norm": 1.8685866983055597, - "learning_rate": 3.325843400177362e-07, - "loss": 0.946, - "step": 6814 - }, - { - "epoch": 0.8194552997054049, - "grad_norm": 2.001835814969958, - "learning_rate": 3.32154313833642e-07, - "loss": 0.9645, - "step": 6815 - }, - { - "epoch": 0.819575542596044, - "grad_norm": 2.0318861918755733, - "learning_rate": 3.3172454066041164e-07, - "loss": 0.8337, - "step": 6816 - }, - { - "epoch": 0.8196957854866831, - "grad_norm": 2.208547209933131, - "learning_rate": 3.3129502056324234e-07, - "loss": 0.9902, - "step": 6817 - }, - { - "epoch": 0.8198160283773221, - "grad_norm": 0.7891647725819378, - "learning_rate": 3.3086575360729165e-07, - "loss": 0.8562, - "step": 6818 - }, - { - "epoch": 0.8199362712679613, - "grad_norm": 1.7551032316923967, - "learning_rate": 3.3043673985767906e-07, - "loss": 0.946, - "step": 6819 - }, - { - "epoch": 0.8200565141586004, - "grad_norm": 1.9645988053824703, - "learning_rate": 3.3000797937948564e-07, - "loss": 1.0046, - "step": 6820 - }, - { - "epoch": 0.8201767570492394, - "grad_norm": 0.9371120285880716, - "learning_rate": 3.295794722377534e-07, - "loss": 0.9083, - "step": 6821 - }, - { - "epoch": 0.8202969999398786, - "grad_norm": 3.2353884081436486, - "learning_rate": 3.291512184974876e-07, - "loss": 1.0234, - "step": 6822 - }, - { - "epoch": 0.8204172428305176, - "grad_norm": 2.3883683268327607, - "learning_rate": 3.2872321822365346e-07, - "loss": 0.9019, - "step": 6823 - }, - { - "epoch": 0.8205374857211567, - "grad_norm": 2.027988691407525, - "learning_rate": 3.282954714811783e-07, - "loss": 0.9707, - "step": 6824 - }, - { - "epoch": 0.8206577286117959, - "grad_norm": 2.265029408009249, - "learning_rate": 3.2786797833495093e-07, - "loss": 0.9384, - "step": 6825 - }, - { - "epoch": 0.8207779715024349, - "grad_norm": 2.0637876941327704, - "learning_rate": 3.274407388498213e-07, - "loss": 0.9566, - "step": 6826 - }, - { - "epoch": 0.820898214393074, - "grad_norm": 1.8072274569520848, - "learning_rate": 3.270137530906021e-07, - "loss": 0.9747, - "step": 6827 - }, - { - "epoch": 0.8210184572837131, - "grad_norm": 3.8702638046287534, - "learning_rate": 3.265870211220665e-07, - "loss": 1.0565, - "step": 6828 - }, - { - "epoch": 0.8211387001743522, - "grad_norm": 1.9973220181575426, - "learning_rate": 3.2616054300894934e-07, - "loss": 1.0439, - "step": 6829 - }, - { - "epoch": 0.8212589430649913, - "grad_norm": 4.391758673981106, - "learning_rate": 3.2573431881594693e-07, - "loss": 1.0781, - "step": 6830 - }, - { - "epoch": 0.8213791859556304, - "grad_norm": 2.652107059665298, - "learning_rate": 3.2530834860771663e-07, - "loss": 0.881, - "step": 6831 - }, - { - "epoch": 0.8214994288462695, - "grad_norm": 1.9362970124577688, - "learning_rate": 3.248826324488794e-07, - "loss": 0.9631, - "step": 6832 - }, - { - "epoch": 0.8216196717369085, - "grad_norm": 1.6305947460264438, - "learning_rate": 3.244571704040138e-07, - "loss": 1.1056, - "step": 6833 - }, - { - "epoch": 0.8217399146275477, - "grad_norm": 1.7688233431110416, - "learning_rate": 3.2403196253766374e-07, - "loss": 0.9656, - "step": 6834 - }, - { - "epoch": 0.8218601575181868, - "grad_norm": 2.1547649632416905, - "learning_rate": 3.2360700891433254e-07, - "loss": 1.014, - "step": 6835 - }, - { - "epoch": 0.8219804004088258, - "grad_norm": 0.8203960194531593, - "learning_rate": 3.231823095984847e-07, - "loss": 0.8256, - "step": 6836 - }, - { - "epoch": 0.822100643299465, - "grad_norm": 2.2215932975992128, - "learning_rate": 3.2275786465454814e-07, - "loss": 0.983, - "step": 6837 - }, - { - "epoch": 0.822220886190104, - "grad_norm": 1.792206712401959, - "learning_rate": 3.2233367414690917e-07, - "loss": 0.9944, - "step": 6838 - }, - { - "epoch": 0.8223411290807431, - "grad_norm": 1.9076874738086531, - "learning_rate": 3.219097381399183e-07, - "loss": 1.0691, - "step": 6839 - }, - { - "epoch": 0.8224613719713821, - "grad_norm": 1.6744909053725234, - "learning_rate": 3.2148605669788584e-07, - "loss": 1.031, - "step": 6840 - }, - { - "epoch": 0.8225816148620213, - "grad_norm": 2.7804737507967285, - "learning_rate": 3.2106262988508405e-07, - "loss": 0.9984, - "step": 6841 - }, - { - "epoch": 0.8227018577526604, - "grad_norm": 2.023819463903128, - "learning_rate": 3.206394577657465e-07, - "loss": 0.9679, - "step": 6842 - }, - { - "epoch": 0.8228221006432994, - "grad_norm": 2.914865664336092, - "learning_rate": 3.202165404040675e-07, - "loss": 0.9441, - "step": 6843 - }, - { - "epoch": 0.8229423435339386, - "grad_norm": 1.919591648267895, - "learning_rate": 3.1979387786420396e-07, - "loss": 0.9742, - "step": 6844 - }, - { - "epoch": 0.8230625864245776, - "grad_norm": 9.748941999888373, - "learning_rate": 3.1937147021027346e-07, - "loss": 1.0502, - "step": 6845 - }, - { - "epoch": 0.8231828293152167, - "grad_norm": 2.4062208694081866, - "learning_rate": 3.189493175063547e-07, - "loss": 0.9931, - "step": 6846 - }, - { - "epoch": 0.8233030722058559, - "grad_norm": 1.7468145534041029, - "learning_rate": 3.1852741981648776e-07, - "loss": 0.9002, - "step": 6847 - }, - { - "epoch": 0.8234233150964949, - "grad_norm": 2.339924857082379, - "learning_rate": 3.1810577720467404e-07, - "loss": 0.9306, - "step": 6848 - }, - { - "epoch": 0.823543557987134, - "grad_norm": 1.6291478844590064, - "learning_rate": 3.176843897348769e-07, - "loss": 0.7968, - "step": 6849 - }, - { - "epoch": 0.8236638008777731, - "grad_norm": 2.3434217095927226, - "learning_rate": 3.1726325747102034e-07, - "loss": 0.9896, - "step": 6850 - }, - { - "epoch": 0.8237840437684122, - "grad_norm": 1.4182702786915475, - "learning_rate": 3.1684238047698974e-07, - "loss": 0.8724, - "step": 6851 - }, - { - "epoch": 0.8239042866590512, - "grad_norm": 2.0648190725282958, - "learning_rate": 3.1642175881663155e-07, - "loss": 0.7538, - "step": 6852 - }, - { - "epoch": 0.8240245295496904, - "grad_norm": 1.9011565151969678, - "learning_rate": 3.160013925537537e-07, - "loss": 1.0724, - "step": 6853 - }, - { - "epoch": 0.8241447724403295, - "grad_norm": 2.471834790545373, - "learning_rate": 3.155812817521266e-07, - "loss": 0.983, - "step": 6854 - }, - { - "epoch": 0.8242650153309685, - "grad_norm": 2.2475855906719637, - "learning_rate": 3.151614264754787e-07, - "loss": 1.0035, - "step": 6855 - }, - { - "epoch": 0.8243852582216077, - "grad_norm": 1.8280248369996235, - "learning_rate": 3.147418267875035e-07, - "loss": 1.0237, - "step": 6856 - }, - { - "epoch": 0.8245055011122467, - "grad_norm": 2.910532595474383, - "learning_rate": 3.1432248275185315e-07, - "loss": 0.8815, - "step": 6857 - }, - { - "epoch": 0.8246257440028858, - "grad_norm": 2.4588209950873954, - "learning_rate": 3.139033944321412e-07, - "loss": 1.0082, - "step": 6858 - }, - { - "epoch": 0.824745986893525, - "grad_norm": 1.4478595457084644, - "learning_rate": 3.1348456189194507e-07, - "loss": 1.0187, - "step": 6859 - }, - { - "epoch": 0.824866229784164, - "grad_norm": 1.6497883978900143, - "learning_rate": 3.1306598519479876e-07, - "loss": 1.0575, - "step": 6860 - }, - { - "epoch": 0.8249864726748031, - "grad_norm": 1.6536888617576961, - "learning_rate": 3.1264766440420177e-07, - "loss": 1.0143, - "step": 6861 - }, - { - "epoch": 0.8251067155654422, - "grad_norm": 2.1167785700055672, - "learning_rate": 3.122295995836124e-07, - "loss": 0.9072, - "step": 6862 - }, - { - "epoch": 0.8252269584560813, - "grad_norm": 1.7528291041172144, - "learning_rate": 3.118117907964508e-07, - "loss": 1.0081, - "step": 6863 - }, - { - "epoch": 0.8253472013467203, - "grad_norm": 14.38605956183961, - "learning_rate": 3.1139423810609856e-07, - "loss": 1.0332, - "step": 6864 - }, - { - "epoch": 0.8254674442373595, - "grad_norm": 1.9503458009898451, - "learning_rate": 3.1097694157589714e-07, - "loss": 0.9842, - "step": 6865 - }, - { - "epoch": 0.8255876871279986, - "grad_norm": 3.7687885086344313, - "learning_rate": 3.105599012691511e-07, - "loss": 0.9932, - "step": 6866 - }, - { - "epoch": 0.8257079300186376, - "grad_norm": 1.3367645202327523, - "learning_rate": 3.101431172491249e-07, - "loss": 1.0496, - "step": 6867 - }, - { - "epoch": 0.8258281729092768, - "grad_norm": 2.2784886827319655, - "learning_rate": 3.097265895790444e-07, - "loss": 0.9468, - "step": 6868 - }, - { - "epoch": 0.8259484157999158, - "grad_norm": 2.7354657422728654, - "learning_rate": 3.093103183220962e-07, - "loss": 1.0599, - "step": 6869 - }, - { - "epoch": 0.8260686586905549, - "grad_norm": 0.9318398316653199, - "learning_rate": 3.0889430354142796e-07, - "loss": 0.88, - "step": 6870 - }, - { - "epoch": 0.826188901581194, - "grad_norm": 2.2019970759373235, - "learning_rate": 3.084785453001497e-07, - "loss": 0.9291, - "step": 6871 - }, - { - "epoch": 0.8263091444718331, - "grad_norm": 2.294971949254232, - "learning_rate": 3.080630436613314e-07, - "loss": 1.0471, - "step": 6872 - }, - { - "epoch": 0.8264293873624722, - "grad_norm": 1.9872626972683056, - "learning_rate": 3.076477986880039e-07, - "loss": 1.0844, - "step": 6873 - }, - { - "epoch": 0.8265496302531112, - "grad_norm": 1.9029235582899982, - "learning_rate": 3.0723281044315986e-07, - "loss": 0.9178, - "step": 6874 - }, - { - "epoch": 0.8266698731437504, - "grad_norm": 2.035981638760016, - "learning_rate": 3.068180789897521e-07, - "loss": 0.9919, - "step": 6875 - }, - { - "epoch": 0.8267901160343895, - "grad_norm": 1.4767242827416258, - "learning_rate": 3.064036043906966e-07, - "loss": 1.0446, - "step": 6876 - }, - { - "epoch": 0.8269103589250285, - "grad_norm": 2.1025624218540977, - "learning_rate": 3.059893867088668e-07, - "loss": 0.9061, - "step": 6877 - }, - { - "epoch": 0.8270306018156677, - "grad_norm": 3.034231076819803, - "learning_rate": 3.055754260071004e-07, - "loss": 0.8966, - "step": 6878 - }, - { - "epoch": 0.8271508447063067, - "grad_norm": 2.009412195030515, - "learning_rate": 3.051617223481948e-07, - "loss": 0.9664, - "step": 6879 - }, - { - "epoch": 0.8272710875969458, - "grad_norm": 2.208306246417446, - "learning_rate": 3.047482757949078e-07, - "loss": 0.9792, - "step": 6880 - }, - { - "epoch": 0.827391330487585, - "grad_norm": 2.1775422289462023, - "learning_rate": 3.043350864099605e-07, - "loss": 1.076, - "step": 6881 - }, - { - "epoch": 0.827511573378224, - "grad_norm": 2.265846609918724, - "learning_rate": 3.039221542560315e-07, - "loss": 1.0406, - "step": 6882 - }, - { - "epoch": 0.8276318162688631, - "grad_norm": 1.9064092100602097, - "learning_rate": 3.0350947939576356e-07, - "loss": 0.9727, - "step": 6883 - }, - { - "epoch": 0.8277520591595022, - "grad_norm": 1.5787691369184278, - "learning_rate": 3.0309706189175876e-07, - "loss": 0.9554, - "step": 6884 - }, - { - "epoch": 0.8278723020501413, - "grad_norm": 0.8101119379710139, - "learning_rate": 3.0268490180658045e-07, - "loss": 0.8259, - "step": 6885 - }, - { - "epoch": 0.8279925449407803, - "grad_norm": 3.113405615262246, - "learning_rate": 3.0227299920275305e-07, - "loss": 1.0131, - "step": 6886 - }, - { - "epoch": 0.8281127878314195, - "grad_norm": 2.5916921886238207, - "learning_rate": 3.018613541427613e-07, - "loss": 1.0852, - "step": 6887 - }, - { - "epoch": 0.8282330307220586, - "grad_norm": 1.9829664262285023, - "learning_rate": 3.0144996668905243e-07, - "loss": 0.9654, - "step": 6888 - }, - { - "epoch": 0.8283532736126976, - "grad_norm": 2.282475912879958, - "learning_rate": 3.010388369040331e-07, - "loss": 1.0471, - "step": 6889 - }, - { - "epoch": 0.8284735165033368, - "grad_norm": 1.7204186402439885, - "learning_rate": 3.0062796485007156e-07, - "loss": 1.0571, - "step": 6890 - }, - { - "epoch": 0.8285937593939758, - "grad_norm": 6.713435000294099, - "learning_rate": 3.002173505894965e-07, - "loss": 0.8771, - "step": 6891 - }, - { - "epoch": 0.8287140022846149, - "grad_norm": 2.740215233472881, - "learning_rate": 2.998069941845973e-07, - "loss": 0.8548, - "step": 6892 - }, - { - "epoch": 0.8288342451752541, - "grad_norm": 0.7626311486444489, - "learning_rate": 2.993968956976258e-07, - "loss": 0.8501, - "step": 6893 - }, - { - "epoch": 0.8289544880658931, - "grad_norm": 2.040955465059849, - "learning_rate": 2.9898705519079313e-07, - "loss": 0.923, - "step": 6894 - }, - { - "epoch": 0.8290747309565322, - "grad_norm": 1.627762134261288, - "learning_rate": 2.985774727262715e-07, - "loss": 0.973, - "step": 6895 - }, - { - "epoch": 0.8291949738471713, - "grad_norm": 1.7873480965686273, - "learning_rate": 2.981681483661949e-07, - "loss": 1.04, - "step": 6896 - }, - { - "epoch": 0.8293152167378104, - "grad_norm": 1.5824560278015745, - "learning_rate": 2.9775908217265633e-07, - "loss": 0.9353, - "step": 6897 - }, - { - "epoch": 0.8294354596284494, - "grad_norm": 0.8016923512952682, - "learning_rate": 2.9735027420771253e-07, - "loss": 0.755, - "step": 6898 - }, - { - "epoch": 0.8295557025190886, - "grad_norm": 2.165602924184865, - "learning_rate": 2.969417245333774e-07, - "loss": 0.9429, - "step": 6899 - }, - { - "epoch": 0.8296759454097277, - "grad_norm": 1.8920207270642213, - "learning_rate": 2.9653343321162915e-07, - "loss": 1.0117, - "step": 6900 - }, - { - "epoch": 0.8297961883003667, - "grad_norm": 2.402660714925718, - "learning_rate": 2.9612540030440446e-07, - "loss": 0.8712, - "step": 6901 - }, - { - "epoch": 0.8299164311910058, - "grad_norm": 0.843590756683211, - "learning_rate": 2.9571762587360206e-07, - "loss": 0.877, - "step": 6902 - }, - { - "epoch": 0.8300366740816449, - "grad_norm": 1.7645780446927724, - "learning_rate": 2.953101099810806e-07, - "loss": 0.9682, - "step": 6903 - }, - { - "epoch": 0.830156916972284, - "grad_norm": 2.1655349954945144, - "learning_rate": 2.9490285268865965e-07, - "loss": 1.0673, - "step": 6904 - }, - { - "epoch": 0.830277159862923, - "grad_norm": 1.9918266418351442, - "learning_rate": 2.9449585405812085e-07, - "loss": 1.0338, - "step": 6905 - }, - { - "epoch": 0.8303974027535622, - "grad_norm": 2.1816217187729916, - "learning_rate": 2.940891141512043e-07, - "loss": 0.9719, - "step": 6906 - }, - { - "epoch": 0.8305176456442013, - "grad_norm": 1.9840273815465144, - "learning_rate": 2.9368263302961385e-07, - "loss": 0.9456, - "step": 6907 - }, - { - "epoch": 0.8306378885348403, - "grad_norm": 1.9590789985053292, - "learning_rate": 2.9327641075501075e-07, - "loss": 1.0291, - "step": 6908 - }, - { - "epoch": 0.8307581314254795, - "grad_norm": 2.173379171371735, - "learning_rate": 2.9287044738901866e-07, - "loss": 0.8831, - "step": 6909 - }, - { - "epoch": 0.8308783743161186, - "grad_norm": 1.9110829108520853, - "learning_rate": 2.9246474299322274e-07, - "loss": 1.1374, - "step": 6910 - }, - { - "epoch": 0.8309986172067576, - "grad_norm": 0.9141761513504588, - "learning_rate": 2.920592976291678e-07, - "loss": 0.8957, - "step": 6911 - }, - { - "epoch": 0.8311188600973968, - "grad_norm": 2.0232678198643, - "learning_rate": 2.916541113583595e-07, - "loss": 1.0391, - "step": 6912 - }, - { - "epoch": 0.8312391029880358, - "grad_norm": 13.358651410411262, - "learning_rate": 2.912491842422642e-07, - "loss": 0.89, - "step": 6913 - }, - { - "epoch": 0.8313593458786749, - "grad_norm": 1.635242169378789, - "learning_rate": 2.9084451634230857e-07, - "loss": 0.931, - "step": 6914 - }, - { - "epoch": 0.831479588769314, - "grad_norm": 2.3157129392000564, - "learning_rate": 2.9044010771988125e-07, - "loss": 0.9472, - "step": 6915 - }, - { - "epoch": 0.8315998316599531, - "grad_norm": 1.7691088039562732, - "learning_rate": 2.900359584363303e-07, - "loss": 0.9629, - "step": 6916 - }, - { - "epoch": 0.8317200745505922, - "grad_norm": 8.198698582535256, - "learning_rate": 2.8963206855296494e-07, - "loss": 1.0658, - "step": 6917 - }, - { - "epoch": 0.8318403174412313, - "grad_norm": 1.6233675166035642, - "learning_rate": 2.892284381310548e-07, - "loss": 1.0034, - "step": 6918 - }, - { - "epoch": 0.8319605603318704, - "grad_norm": 2.4040547054496146, - "learning_rate": 2.888250672318302e-07, - "loss": 0.9412, - "step": 6919 - }, - { - "epoch": 0.8320808032225094, - "grad_norm": 1.4363981253233287, - "learning_rate": 2.884219559164831e-07, - "loss": 0.9179, - "step": 6920 - }, - { - "epoch": 0.8322010461131486, - "grad_norm": 2.178766418325135, - "learning_rate": 2.880191042461635e-07, - "loss": 1.0406, - "step": 6921 - }, - { - "epoch": 0.8323212890037877, - "grad_norm": 2.448997378041432, - "learning_rate": 2.876165122819849e-07, - "loss": 1.031, - "step": 6922 - }, - { - "epoch": 0.8324415318944267, - "grad_norm": 1.7593714603910193, - "learning_rate": 2.872141800850201e-07, - "loss": 1.0215, - "step": 6923 - }, - { - "epoch": 0.8325617747850659, - "grad_norm": 2.3713994088406953, - "learning_rate": 2.868121077163024e-07, - "loss": 0.9616, - "step": 6924 - }, - { - "epoch": 0.8326820176757049, - "grad_norm": 1.7955186451618657, - "learning_rate": 2.864102952368257e-07, - "loss": 0.9506, - "step": 6925 - }, - { - "epoch": 0.832802260566344, - "grad_norm": 1.3527830929731164, - "learning_rate": 2.860087427075444e-07, - "loss": 0.8254, - "step": 6926 - }, - { - "epoch": 0.8329225034569832, - "grad_norm": 3.5637542123010335, - "learning_rate": 2.856074501893744e-07, - "loss": 1.0909, - "step": 6927 - }, - { - "epoch": 0.8330427463476222, - "grad_norm": 1.8046480645168814, - "learning_rate": 2.8520641774319054e-07, - "loss": 1.0436, - "step": 6928 - }, - { - "epoch": 0.8331629892382613, - "grad_norm": 2.0050840132846277, - "learning_rate": 2.848056454298309e-07, - "loss": 0.9848, - "step": 6929 - }, - { - "epoch": 0.8332832321289004, - "grad_norm": 7.707594497601254, - "learning_rate": 2.844051333100905e-07, - "loss": 0.8839, - "step": 6930 - }, - { - "epoch": 0.8334034750195395, - "grad_norm": 9.409335317942935, - "learning_rate": 2.840048814447269e-07, - "loss": 1.07, - "step": 6931 - }, - { - "epoch": 0.8335237179101785, - "grad_norm": 2.6634407996059104, - "learning_rate": 2.836048898944587e-07, - "loss": 0.9624, - "step": 6932 - }, - { - "epoch": 0.8336439608008177, - "grad_norm": 2.435841216019013, - "learning_rate": 2.832051587199642e-07, - "loss": 0.954, - "step": 6933 - }, - { - "epoch": 0.8337642036914568, - "grad_norm": 0.8016624626305864, - "learning_rate": 2.828056879818821e-07, - "loss": 0.8352, - "step": 6934 - }, - { - "epoch": 0.8338844465820958, - "grad_norm": 1.9093756767808678, - "learning_rate": 2.824064777408117e-07, - "loss": 1.06, - "step": 6935 - }, - { - "epoch": 0.8340046894727349, - "grad_norm": 3.5013084690560183, - "learning_rate": 2.8200752805731263e-07, - "loss": 0.9874, - "step": 6936 - }, - { - "epoch": 0.834124932363374, - "grad_norm": 2.6186408263690644, - "learning_rate": 2.8160883899190625e-07, - "loss": 1.0371, - "step": 6937 - }, - { - "epoch": 0.8342451752540131, - "grad_norm": 2.7357805399855937, - "learning_rate": 2.8121041060507234e-07, - "loss": 0.9654, - "step": 6938 - }, - { - "epoch": 0.8343654181446521, - "grad_norm": 2.0000956035652093, - "learning_rate": 2.808122429572528e-07, - "loss": 0.9445, - "step": 6939 - }, - { - "epoch": 0.8344856610352913, - "grad_norm": 3.26848480724336, - "learning_rate": 2.804143361088489e-07, - "loss": 0.9878, - "step": 6940 - }, - { - "epoch": 0.8346059039259304, - "grad_norm": 2.112229062374267, - "learning_rate": 2.8001669012022277e-07, - "loss": 0.9913, - "step": 6941 - }, - { - "epoch": 0.8347261468165694, - "grad_norm": 1.568850163761418, - "learning_rate": 2.7961930505169795e-07, - "loss": 0.9264, - "step": 6942 - }, - { - "epoch": 0.8348463897072086, - "grad_norm": 2.0795097613315368, - "learning_rate": 2.792221809635558e-07, - "loss": 0.9912, - "step": 6943 - }, - { - "epoch": 0.8349666325978476, - "grad_norm": 1.8395700433351097, - "learning_rate": 2.788253179160411e-07, - "loss": 0.982, - "step": 6944 - }, - { - "epoch": 0.8350868754884867, - "grad_norm": 1.6895663008515365, - "learning_rate": 2.7842871596935725e-07, - "loss": 0.8749, - "step": 6945 - }, - { - "epoch": 0.8352071183791259, - "grad_norm": 2.2026641580153274, - "learning_rate": 2.780323751836682e-07, - "loss": 0.919, - "step": 6946 - }, - { - "epoch": 0.8353273612697649, - "grad_norm": 1.4351663305668867, - "learning_rate": 2.7763629561909876e-07, - "loss": 1.0139, - "step": 6947 - }, - { - "epoch": 0.835447604160404, - "grad_norm": 1.8803422480381278, - "learning_rate": 2.772404773357335e-07, - "loss": 0.9939, - "step": 6948 - }, - { - "epoch": 0.8355678470510431, - "grad_norm": 1.788459109706192, - "learning_rate": 2.7684492039361853e-07, - "loss": 1.0068, - "step": 6949 - }, - { - "epoch": 0.8356880899416822, - "grad_norm": 2.6547093130633406, - "learning_rate": 2.764496248527586e-07, - "loss": 1.0662, - "step": 6950 - }, - { - "epoch": 0.8358083328323213, - "grad_norm": 2.147194464623242, - "learning_rate": 2.760545907731211e-07, - "loss": 1.0093, - "step": 6951 - }, - { - "epoch": 0.8359285757229604, - "grad_norm": 6.557249457415613, - "learning_rate": 2.75659818214631e-07, - "loss": 0.9053, - "step": 6952 - }, - { - "epoch": 0.8360488186135995, - "grad_norm": 2.174321724375233, - "learning_rate": 2.752653072371749e-07, - "loss": 1.0158, - "step": 6953 - }, - { - "epoch": 0.8361690615042385, - "grad_norm": 1.6711860734672483, - "learning_rate": 2.7487105790060105e-07, - "loss": 0.9721, - "step": 6954 - }, - { - "epoch": 0.8362893043948777, - "grad_norm": 1.9410777130606773, - "learning_rate": 2.7447707026471587e-07, - "loss": 0.9225, - "step": 6955 - }, - { - "epoch": 0.8364095472855168, - "grad_norm": 2.5527433411280946, - "learning_rate": 2.740833443892874e-07, - "loss": 1.0307, - "step": 6956 - }, - { - "epoch": 0.8365297901761558, - "grad_norm": 2.554351597905793, - "learning_rate": 2.7368988033404327e-07, - "loss": 1.0189, - "step": 6957 - }, - { - "epoch": 0.836650033066795, - "grad_norm": 1.6879432943681614, - "learning_rate": 2.732966781586712e-07, - "loss": 1.0772, - "step": 6958 - }, - { - "epoch": 0.836770275957434, - "grad_norm": 2.5405721548667874, - "learning_rate": 2.729037379228205e-07, - "loss": 0.9002, - "step": 6959 - }, - { - "epoch": 0.8368905188480731, - "grad_norm": 2.159211046809813, - "learning_rate": 2.725110596860998e-07, - "loss": 1.0347, - "step": 6960 - }, - { - "epoch": 0.8370107617387123, - "grad_norm": 2.0612972104673237, - "learning_rate": 2.7211864350807776e-07, - "loss": 0.932, - "step": 6961 - }, - { - "epoch": 0.8371310046293513, - "grad_norm": 1.6688894549330897, - "learning_rate": 2.717264894482836e-07, - "loss": 0.9663, - "step": 6962 - }, - { - "epoch": 0.8372512475199904, - "grad_norm": 2.091257462109137, - "learning_rate": 2.7133459756620646e-07, - "loss": 1.0397, - "step": 6963 - }, - { - "epoch": 0.8373714904106295, - "grad_norm": 1.854071096750451, - "learning_rate": 2.7094296792129733e-07, - "loss": 0.9647, - "step": 6964 - }, - { - "epoch": 0.8374917333012686, - "grad_norm": 1.6861945860448122, - "learning_rate": 2.7055160057296424e-07, - "loss": 0.9837, - "step": 6965 - }, - { - "epoch": 0.8376119761919076, - "grad_norm": 2.170110520919188, - "learning_rate": 2.7016049558057896e-07, - "loss": 0.9475, - "step": 6966 - }, - { - "epoch": 0.8377322190825467, - "grad_norm": 2.058718827734842, - "learning_rate": 2.6976965300347074e-07, - "loss": 0.9427, - "step": 6967 - }, - { - "epoch": 0.8378524619731859, - "grad_norm": 2.3087482432971624, - "learning_rate": 2.693790729009309e-07, - "loss": 0.9253, - "step": 6968 - }, - { - "epoch": 0.8379727048638249, - "grad_norm": 2.4185272705673144, - "learning_rate": 2.6898875533220946e-07, - "loss": 1.1094, - "step": 6969 - }, - { - "epoch": 0.838092947754464, - "grad_norm": 1.681945843971203, - "learning_rate": 2.685987003565171e-07, - "loss": 1.0441, - "step": 6970 - }, - { - "epoch": 0.8382131906451031, - "grad_norm": 2.6431510625615524, - "learning_rate": 2.6820890803302566e-07, - "loss": 0.9821, - "step": 6971 - }, - { - "epoch": 0.8383334335357422, - "grad_norm": 3.7812013228885166, - "learning_rate": 2.6781937842086557e-07, - "loss": 1.0451, - "step": 6972 - }, - { - "epoch": 0.8384536764263812, - "grad_norm": 1.8597445721747705, - "learning_rate": 2.6743011157912933e-07, - "loss": 0.9125, - "step": 6973 - }, - { - "epoch": 0.8385739193170204, - "grad_norm": 2.075105466805063, - "learning_rate": 2.6704110756686725e-07, - "loss": 0.8841, - "step": 6974 - }, - { - "epoch": 0.8386941622076595, - "grad_norm": 2.6272059662824345, - "learning_rate": 2.6665236644309085e-07, - "loss": 1.0704, - "step": 6975 - }, - { - "epoch": 0.8388144050982985, - "grad_norm": 1.813136811032201, - "learning_rate": 2.662638882667727e-07, - "loss": 1.0216, - "step": 6976 - }, - { - "epoch": 0.8389346479889377, - "grad_norm": 2.4839084595046925, - "learning_rate": 2.658756730968443e-07, - "loss": 0.9619, - "step": 6977 - }, - { - "epoch": 0.8390548908795767, - "grad_norm": 2.0183014362480733, - "learning_rate": 2.654877209921975e-07, - "loss": 1.1125, - "step": 6978 - }, - { - "epoch": 0.8391751337702158, - "grad_norm": 2.6294198746022146, - "learning_rate": 2.651000320116843e-07, - "loss": 0.8626, - "step": 6979 - }, - { - "epoch": 0.839295376660855, - "grad_norm": 1.7597799184136154, - "learning_rate": 2.647126062141163e-07, - "loss": 0.9845, - "step": 6980 - }, - { - "epoch": 0.839415619551494, - "grad_norm": 2.549118832200396, - "learning_rate": 2.643254436582669e-07, - "loss": 1.0626, - "step": 6981 - }, - { - "epoch": 0.8395358624421331, - "grad_norm": 2.4598258277828013, - "learning_rate": 2.6393854440286743e-07, - "loss": 1.0521, - "step": 6982 - }, - { - "epoch": 0.8396561053327722, - "grad_norm": 1.9731448937453269, - "learning_rate": 2.6355190850661045e-07, - "loss": 0.9347, - "step": 6983 - }, - { - "epoch": 0.8397763482234113, - "grad_norm": 1.5058342007562573, - "learning_rate": 2.631655360281486e-07, - "loss": 1.0939, - "step": 6984 - }, - { - "epoch": 0.8398965911140504, - "grad_norm": 1.827838712267235, - "learning_rate": 2.6277942702609323e-07, - "loss": 0.8882, - "step": 6985 - }, - { - "epoch": 0.8400168340046895, - "grad_norm": 1.9118985146519596, - "learning_rate": 2.623935815590186e-07, - "loss": 1.099, - "step": 6986 - }, - { - "epoch": 0.8401370768953286, - "grad_norm": 1.8045538650510622, - "learning_rate": 2.6200799968545516e-07, - "loss": 1.0411, - "step": 6987 - }, - { - "epoch": 0.8402573197859676, - "grad_norm": 0.8160070511078523, - "learning_rate": 2.616226814638969e-07, - "loss": 0.8256, - "step": 6988 - }, - { - "epoch": 0.8403775626766068, - "grad_norm": 3.6277381815622864, - "learning_rate": 2.612376269527954e-07, - "loss": 1.0042, - "step": 6989 - }, - { - "epoch": 0.8404978055672458, - "grad_norm": 1.83057200669322, - "learning_rate": 2.608528362105635e-07, - "loss": 0.9087, - "step": 6990 - }, - { - "epoch": 0.8406180484578849, - "grad_norm": 2.122862694690312, - "learning_rate": 2.6046830929557374e-07, - "loss": 0.955, - "step": 6991 - }, - { - "epoch": 0.8407382913485241, - "grad_norm": 2.364859162990221, - "learning_rate": 2.6008404626615776e-07, - "loss": 1.0803, - "step": 6992 - }, - { - "epoch": 0.8408585342391631, - "grad_norm": 3.1062560421061707, - "learning_rate": 2.597000471806092e-07, - "loss": 0.9634, - "step": 6993 - }, - { - "epoch": 0.8409787771298022, - "grad_norm": 2.0372539114217627, - "learning_rate": 2.593163120971793e-07, - "loss": 0.9561, - "step": 6994 - }, - { - "epoch": 0.8410990200204413, - "grad_norm": 2.084065715485868, - "learning_rate": 2.5893284107408165e-07, - "loss": 0.9168, - "step": 6995 - }, - { - "epoch": 0.8412192629110804, - "grad_norm": 1.7539751680762963, - "learning_rate": 2.5854963416948726e-07, - "loss": 1.0179, - "step": 6996 - }, - { - "epoch": 0.8413395058017195, - "grad_norm": 1.8438384471303908, - "learning_rate": 2.5816669144152816e-07, - "loss": 0.917, - "step": 6997 - }, - { - "epoch": 0.8414597486923585, - "grad_norm": 0.9498313528244309, - "learning_rate": 2.5778401294829777e-07, - "loss": 0.9626, - "step": 6998 - }, - { - "epoch": 0.8415799915829977, - "grad_norm": 1.6475227312710838, - "learning_rate": 2.574015987478473e-07, - "loss": 0.8766, - "step": 6999 - }, - { - "epoch": 0.8417002344736367, - "grad_norm": 2.598452797280964, - "learning_rate": 2.570194488981887e-07, - "loss": 1.092, - "step": 7000 - }, - { - "epoch": 0.8418204773642758, - "grad_norm": 0.8619474230373445, - "learning_rate": 2.566375634572939e-07, - "loss": 0.8699, - "step": 7001 - }, - { - "epoch": 0.841940720254915, - "grad_norm": 2.316125862430649, - "learning_rate": 2.562559424830943e-07, - "loss": 0.9855, - "step": 7002 - }, - { - "epoch": 0.842060963145554, - "grad_norm": 2.345089135661845, - "learning_rate": 2.5587458603348256e-07, - "loss": 0.9311, - "step": 7003 - }, - { - "epoch": 0.8421812060361931, - "grad_norm": 2.5790181780230323, - "learning_rate": 2.554934941663085e-07, - "loss": 1.0678, - "step": 7004 - }, - { - "epoch": 0.8423014489268322, - "grad_norm": 2.4044637614387168, - "learning_rate": 2.5511266693938484e-07, - "loss": 0.9597, - "step": 7005 - }, - { - "epoch": 0.8424216918174713, - "grad_norm": 1.4192530173020494, - "learning_rate": 2.547321044104822e-07, - "loss": 0.9991, - "step": 7006 - }, - { - "epoch": 0.8425419347081103, - "grad_norm": 2.611239652197729, - "learning_rate": 2.5435180663733113e-07, - "loss": 0.9977, - "step": 7007 - }, - { - "epoch": 0.8426621775987495, - "grad_norm": 2.9054953302868767, - "learning_rate": 2.539717736776241e-07, - "loss": 0.9445, - "step": 7008 - }, - { - "epoch": 0.8427824204893886, - "grad_norm": 1.436050015463118, - "learning_rate": 2.535920055890097e-07, - "loss": 0.9874, - "step": 7009 - }, - { - "epoch": 0.8429026633800276, - "grad_norm": 1.9464391793078217, - "learning_rate": 2.5321250242910006e-07, - "loss": 0.8753, - "step": 7010 - }, - { - "epoch": 0.8430229062706668, - "grad_norm": 1.6689677407156678, - "learning_rate": 2.5283326425546493e-07, - "loss": 1.088, - "step": 7011 - }, - { - "epoch": 0.8431431491613058, - "grad_norm": 1.9716815013225313, - "learning_rate": 2.5245429112563443e-07, - "loss": 0.9212, - "step": 7012 - }, - { - "epoch": 0.8432633920519449, - "grad_norm": 1.8097751787618133, - "learning_rate": 2.5207558309709865e-07, - "loss": 1.0503, - "step": 7013 - }, - { - "epoch": 0.8433836349425841, - "grad_norm": 0.7103781123807174, - "learning_rate": 2.516971402273065e-07, - "loss": 0.8149, - "step": 7014 - }, - { - "epoch": 0.8435038778332231, - "grad_norm": 1.962772620150815, - "learning_rate": 2.513189625736687e-07, - "loss": 0.8996, - "step": 7015 - }, - { - "epoch": 0.8436241207238622, - "grad_norm": 2.1152967054600467, - "learning_rate": 2.509410501935534e-07, - "loss": 0.931, - "step": 7016 - }, - { - "epoch": 0.8437443636145013, - "grad_norm": 3.7763579698701184, - "learning_rate": 2.5056340314429116e-07, - "loss": 0.991, - "step": 7017 - }, - { - "epoch": 0.8438646065051404, - "grad_norm": 2.5591640623032403, - "learning_rate": 2.5018602148316904e-07, - "loss": 1.0295, - "step": 7018 - }, - { - "epoch": 0.8439848493957794, - "grad_norm": 1.7567352384807784, - "learning_rate": 2.498089052674359e-07, - "loss": 1.0221, - "step": 7019 - }, - { - "epoch": 0.8441050922864186, - "grad_norm": 2.063770365345943, - "learning_rate": 2.494320545543007e-07, - "loss": 0.9843, - "step": 7020 - }, - { - "epoch": 0.8442253351770577, - "grad_norm": 2.306970118957058, - "learning_rate": 2.490554694009308e-07, - "loss": 0.9059, - "step": 7021 - }, - { - "epoch": 0.8443455780676967, - "grad_norm": 3.4295380370781885, - "learning_rate": 2.4867914986445426e-07, - "loss": 1.0196, - "step": 7022 - }, - { - "epoch": 0.8444658209583359, - "grad_norm": 2.070729569944676, - "learning_rate": 2.483030960019581e-07, - "loss": 0.9335, - "step": 7023 - }, - { - "epoch": 0.8445860638489749, - "grad_norm": 0.7617230537488594, - "learning_rate": 2.479273078704891e-07, - "loss": 0.8004, - "step": 7024 - }, - { - "epoch": 0.844706306739614, - "grad_norm": 0.7903446318586905, - "learning_rate": 2.475517855270552e-07, - "loss": 0.8975, - "step": 7025 - }, - { - "epoch": 0.8448265496302532, - "grad_norm": 3.1573244286757416, - "learning_rate": 2.4717652902862143e-07, - "loss": 0.958, - "step": 7026 - }, - { - "epoch": 0.8449467925208922, - "grad_norm": 1.6235305304210415, - "learning_rate": 2.4680153843211495e-07, - "loss": 1.0445, - "step": 7027 - }, - { - "epoch": 0.8450670354115313, - "grad_norm": 1.9639683132606656, - "learning_rate": 2.464268137944212e-07, - "loss": 0.954, - "step": 7028 - }, - { - "epoch": 0.8451872783021703, - "grad_norm": 2.049490502474375, - "learning_rate": 2.46052355172385e-07, - "loss": 1.0133, - "step": 7029 - }, - { - "epoch": 0.8453075211928095, - "grad_norm": 1.8077402428485165, - "learning_rate": 2.456781626228128e-07, - "loss": 0.9761, - "step": 7030 - }, - { - "epoch": 0.8454277640834486, - "grad_norm": 1.0709090971707524, - "learning_rate": 2.453042362024675e-07, - "loss": 0.9775, - "step": 7031 - }, - { - "epoch": 0.8455480069740876, - "grad_norm": 2.0484227742263057, - "learning_rate": 2.449305759680751e-07, - "loss": 0.9615, - "step": 7032 - }, - { - "epoch": 0.8456682498647268, - "grad_norm": 1.7089136425094544, - "learning_rate": 2.445571819763188e-07, - "loss": 0.9794, - "step": 7033 - }, - { - "epoch": 0.8457884927553658, - "grad_norm": 1.5209746970053986, - "learning_rate": 2.4418405428384227e-07, - "loss": 0.8143, - "step": 7034 - }, - { - "epoch": 0.8459087356460049, - "grad_norm": 2.0191448144468893, - "learning_rate": 2.4381119294724864e-07, - "loss": 0.9494, - "step": 7035 - }, - { - "epoch": 0.846028978536644, - "grad_norm": 2.555835617540365, - "learning_rate": 2.434385980231004e-07, - "loss": 0.7721, - "step": 7036 - }, - { - "epoch": 0.8461492214272831, - "grad_norm": 1.7395536759031802, - "learning_rate": 2.4306626956792043e-07, - "loss": 0.8804, - "step": 7037 - }, - { - "epoch": 0.8462694643179222, - "grad_norm": 8.336077988360435, - "learning_rate": 2.4269420763819017e-07, - "loss": 0.9864, - "step": 7038 - }, - { - "epoch": 0.8463897072085613, - "grad_norm": 2.407311539765326, - "learning_rate": 2.4232241229035223e-07, - "loss": 1.0605, - "step": 7039 - }, - { - "epoch": 0.8465099500992004, - "grad_norm": 0.8402727317833696, - "learning_rate": 2.419508835808064e-07, - "loss": 0.8277, - "step": 7040 - }, - { - "epoch": 0.8466301929898394, - "grad_norm": 3.7039083304215312, - "learning_rate": 2.415796215659134e-07, - "loss": 0.8595, - "step": 7041 - }, - { - "epoch": 0.8467504358804786, - "grad_norm": 2.078292954620356, - "learning_rate": 2.412086263019939e-07, - "loss": 0.9993, - "step": 7042 - }, - { - "epoch": 0.8468706787711177, - "grad_norm": 1.9991672689615316, - "learning_rate": 2.408378978453276e-07, - "loss": 1.0299, - "step": 7043 - }, - { - "epoch": 0.8469909216617567, - "grad_norm": 0.7963793465970667, - "learning_rate": 2.404674362521533e-07, - "loss": 0.8927, - "step": 7044 - }, - { - "epoch": 0.8471111645523959, - "grad_norm": 2.1964281172978977, - "learning_rate": 2.4009724157866997e-07, - "loss": 0.9713, - "step": 7045 - }, - { - "epoch": 0.8472314074430349, - "grad_norm": 6.252409508685963, - "learning_rate": 2.3972731388103564e-07, - "loss": 0.9941, - "step": 7046 - }, - { - "epoch": 0.847351650333674, - "grad_norm": 0.8079123459284512, - "learning_rate": 2.393576532153687e-07, - "loss": 0.8804, - "step": 7047 - }, - { - "epoch": 0.8474718932243132, - "grad_norm": 0.9618566505610309, - "learning_rate": 2.389882596377453e-07, - "loss": 0.8571, - "step": 7048 - }, - { - "epoch": 0.8475921361149522, - "grad_norm": 1.8011611239911398, - "learning_rate": 2.386191332042031e-07, - "loss": 0.9921, - "step": 7049 - }, - { - "epoch": 0.8477123790055913, - "grad_norm": 1.9229585027913738, - "learning_rate": 2.3825027397073794e-07, - "loss": 0.9578, - "step": 7050 - }, - { - "epoch": 0.8478326218962304, - "grad_norm": 2.165530371421925, - "learning_rate": 2.3788168199330515e-07, - "loss": 0.9015, - "step": 7051 - }, - { - "epoch": 0.8479528647868695, - "grad_norm": 1.6888473218409228, - "learning_rate": 2.3751335732782074e-07, - "loss": 0.9583, - "step": 7052 - }, - { - "epoch": 0.8480731076775085, - "grad_norm": 2.6341399064153417, - "learning_rate": 2.371453000301582e-07, - "loss": 1.0266, - "step": 7053 - }, - { - "epoch": 0.8481933505681477, - "grad_norm": 1.8692467479016823, - "learning_rate": 2.3677751015615222e-07, - "loss": 0.9748, - "step": 7054 - }, - { - "epoch": 0.8483135934587868, - "grad_norm": 1.7291582712482878, - "learning_rate": 2.3640998776159593e-07, - "loss": 1.0808, - "step": 7055 - }, - { - "epoch": 0.8484338363494258, - "grad_norm": 1.9219572468348218, - "learning_rate": 2.3604273290224253e-07, - "loss": 1.0393, - "step": 7056 - }, - { - "epoch": 0.848554079240065, - "grad_norm": 1.9414255920306294, - "learning_rate": 2.356757456338039e-07, - "loss": 0.9746, - "step": 7057 - }, - { - "epoch": 0.848674322130704, - "grad_norm": 0.8329857737229691, - "learning_rate": 2.3530902601195147e-07, - "loss": 0.884, - "step": 7058 - }, - { - "epoch": 0.8487945650213431, - "grad_norm": 2.082441851295224, - "learning_rate": 2.34942574092317e-07, - "loss": 1.0112, - "step": 7059 - }, - { - "epoch": 0.8489148079119821, - "grad_norm": 1.7536373484532475, - "learning_rate": 2.3457638993049045e-07, - "loss": 0.9945, - "step": 7060 - }, - { - "epoch": 0.8490350508026213, - "grad_norm": 2.349533067131621, - "learning_rate": 2.3421047358202252e-07, - "loss": 0.873, - "step": 7061 - }, - { - "epoch": 0.8491552936932604, - "grad_norm": 3.9055137245563167, - "learning_rate": 2.3384482510242144e-07, - "loss": 1.0523, - "step": 7062 - }, - { - "epoch": 0.8492755365838994, - "grad_norm": 2.0756238068643533, - "learning_rate": 2.3347944454715575e-07, - "loss": 1.0029, - "step": 7063 - }, - { - "epoch": 0.8493957794745386, - "grad_norm": 1.7691334662584242, - "learning_rate": 2.331143319716542e-07, - "loss": 0.8952, - "step": 7064 - }, - { - "epoch": 0.8495160223651776, - "grad_norm": 2.1655853091699537, - "learning_rate": 2.3274948743130363e-07, - "loss": 0.8854, - "step": 7065 - }, - { - "epoch": 0.8496362652558167, - "grad_norm": 2.2285860202646965, - "learning_rate": 2.3238491098145085e-07, - "loss": 1.0238, - "step": 7066 - }, - { - "epoch": 0.8497565081464559, - "grad_norm": 2.607520718076183, - "learning_rate": 2.3202060267740141e-07, - "loss": 0.9648, - "step": 7067 - }, - { - "epoch": 0.8498767510370949, - "grad_norm": 4.426976364937786, - "learning_rate": 2.3165656257442044e-07, - "loss": 1.0046, - "step": 7068 - }, - { - "epoch": 0.849996993927734, - "grad_norm": 2.142684502686235, - "learning_rate": 2.31292790727734e-07, - "loss": 1.1307, - "step": 7069 - }, - { - "epoch": 0.8501172368183731, - "grad_norm": 2.3074258344805836, - "learning_rate": 2.3092928719252392e-07, - "loss": 1.0357, - "step": 7070 - }, - { - "epoch": 0.8502374797090122, - "grad_norm": 2.2508255185901724, - "learning_rate": 2.3056605202393475e-07, - "loss": 1.0122, - "step": 7071 - }, - { - "epoch": 0.8503577225996513, - "grad_norm": 1.8940181705968788, - "learning_rate": 2.3020308527706888e-07, - "loss": 0.9014, - "step": 7072 - }, - { - "epoch": 0.8504779654902904, - "grad_norm": 2.2068905945915356, - "learning_rate": 2.2984038700698715e-07, - "loss": 1.114, - "step": 7073 - }, - { - "epoch": 0.8505982083809295, - "grad_norm": 1.871002895881047, - "learning_rate": 2.2947795726871222e-07, - "loss": 1.0193, - "step": 7074 - }, - { - "epoch": 0.8507184512715685, - "grad_norm": 3.575767706757248, - "learning_rate": 2.2911579611722253e-07, - "loss": 1.0839, - "step": 7075 - }, - { - "epoch": 0.8508386941622077, - "grad_norm": 1.6751977576048598, - "learning_rate": 2.2875390360745905e-07, - "loss": 1.1012, - "step": 7076 - }, - { - "epoch": 0.8509589370528468, - "grad_norm": 2.1775666450261113, - "learning_rate": 2.2839227979432008e-07, - "loss": 1.0095, - "step": 7077 - }, - { - "epoch": 0.8510791799434858, - "grad_norm": 8.521844506987295, - "learning_rate": 2.2803092473266373e-07, - "loss": 1.0765, - "step": 7078 - }, - { - "epoch": 0.851199422834125, - "grad_norm": 1.9946625537438922, - "learning_rate": 2.2766983847730724e-07, - "loss": 1.0955, - "step": 7079 - }, - { - "epoch": 0.851319665724764, - "grad_norm": 3.831897604942066, - "learning_rate": 2.2730902108302663e-07, - "loss": 0.8985, - "step": 7080 - }, - { - "epoch": 0.8514399086154031, - "grad_norm": 5.564152332877999, - "learning_rate": 2.269484726045583e-07, - "loss": 0.9201, - "step": 7081 - }, - { - "epoch": 0.8515601515060423, - "grad_norm": 1.971124216540518, - "learning_rate": 2.2658819309659672e-07, - "loss": 1.0176, - "step": 7082 - }, - { - "epoch": 0.8516803943966813, - "grad_norm": 1.8805115121592, - "learning_rate": 2.2622818261379706e-07, - "loss": 1.0671, - "step": 7083 - }, - { - "epoch": 0.8518006372873204, - "grad_norm": 1.8736587813697312, - "learning_rate": 2.2586844121077142e-07, - "loss": 0.9805, - "step": 7084 - }, - { - "epoch": 0.8519208801779595, - "grad_norm": 1.7166005652703966, - "learning_rate": 2.2550896894209215e-07, - "loss": 0.9505, - "step": 7085 - }, - { - "epoch": 0.8520411230685986, - "grad_norm": 0.7153189491924504, - "learning_rate": 2.2514976586229184e-07, - "loss": 0.8214, - "step": 7086 - }, - { - "epoch": 0.8521613659592376, - "grad_norm": 0.8742660441487694, - "learning_rate": 2.247908320258609e-07, - "loss": 0.8587, - "step": 7087 - }, - { - "epoch": 0.8522816088498768, - "grad_norm": 2.4160408876003405, - "learning_rate": 2.2443216748724914e-07, - "loss": 1.023, - "step": 7088 - }, - { - "epoch": 0.8524018517405159, - "grad_norm": 79.1709205956881, - "learning_rate": 2.2407377230086588e-07, - "loss": 0.9771, - "step": 7089 - }, - { - "epoch": 0.8525220946311549, - "grad_norm": 1.8064473476639729, - "learning_rate": 2.23715646521079e-07, - "loss": 1.0619, - "step": 7090 - }, - { - "epoch": 0.852642337521794, - "grad_norm": 2.1745144839283572, - "learning_rate": 2.2335779020221724e-07, - "loss": 1.0636, - "step": 7091 - }, - { - "epoch": 0.8527625804124331, - "grad_norm": 2.2055406245898777, - "learning_rate": 2.2300020339856497e-07, - "loss": 0.8541, - "step": 7092 - }, - { - "epoch": 0.8528828233030722, - "grad_norm": 2.214636952805844, - "learning_rate": 2.2264288616436966e-07, - "loss": 1.0078, - "step": 7093 - }, - { - "epoch": 0.8530030661937112, - "grad_norm": 3.194172177713485, - "learning_rate": 2.222858385538351e-07, - "loss": 0.9634, - "step": 7094 - }, - { - "epoch": 0.8531233090843504, - "grad_norm": 1.9342708134364004, - "learning_rate": 2.2192906062112527e-07, - "loss": 0.9036, - "step": 7095 - }, - { - "epoch": 0.8532435519749895, - "grad_norm": 2.0917018152857794, - "learning_rate": 2.2157255242036377e-07, - "loss": 0.9346, - "step": 7096 - }, - { - "epoch": 0.8533637948656285, - "grad_norm": 1.9806830235724655, - "learning_rate": 2.2121631400563135e-07, - "loss": 0.9741, - "step": 7097 - }, - { - "epoch": 0.8534840377562677, - "grad_norm": 0.8512324822369666, - "learning_rate": 2.208603454309701e-07, - "loss": 0.8366, - "step": 7098 - }, - { - "epoch": 0.8536042806469067, - "grad_norm": 2.1813552656159247, - "learning_rate": 2.2050464675037994e-07, - "loss": 0.9356, - "step": 7099 - }, - { - "epoch": 0.8537245235375458, - "grad_norm": 1.8746486334595838, - "learning_rate": 2.2014921801782016e-07, - "loss": 0.9598, - "step": 7100 - }, - { - "epoch": 0.853844766428185, - "grad_norm": 2.2107879264569505, - "learning_rate": 2.1979405928720872e-07, - "loss": 0.967, - "step": 7101 - }, - { - "epoch": 0.853965009318824, - "grad_norm": 5.091042764568381, - "learning_rate": 2.1943917061242257e-07, - "loss": 1.0183, - "step": 7102 - }, - { - "epoch": 0.8540852522094631, - "grad_norm": 2.815446878063847, - "learning_rate": 2.1908455204729903e-07, - "loss": 0.8951, - "step": 7103 - }, - { - "epoch": 0.8542054951001022, - "grad_norm": 2.227393493846164, - "learning_rate": 2.1873020364563265e-07, - "loss": 1.008, - "step": 7104 - }, - { - "epoch": 0.8543257379907413, - "grad_norm": 2.4894850856489783, - "learning_rate": 2.183761254611789e-07, - "loss": 0.9935, - "step": 7105 - }, - { - "epoch": 0.8544459808813804, - "grad_norm": 1.9470363470484842, - "learning_rate": 2.1802231754764987e-07, - "loss": 0.9387, - "step": 7106 - }, - { - "epoch": 0.8545662237720195, - "grad_norm": 1.8869695713923411, - "learning_rate": 2.17668779958718e-07, - "loss": 0.9901, - "step": 7107 - }, - { - "epoch": 0.8546864666626586, - "grad_norm": 2.0328803928540387, - "learning_rate": 2.1731551274801553e-07, - "loss": 1.0289, - "step": 7108 - }, - { - "epoch": 0.8548067095532976, - "grad_norm": 1.9614534952999123, - "learning_rate": 2.169625159691324e-07, - "loss": 0.8424, - "step": 7109 - }, - { - "epoch": 0.8549269524439368, - "grad_norm": 2.2234359473131216, - "learning_rate": 2.1660978967561784e-07, - "loss": 0.9796, - "step": 7110 - }, - { - "epoch": 0.8550471953345758, - "grad_norm": 2.001681693680022, - "learning_rate": 2.1625733392098035e-07, - "loss": 1.019, - "step": 7111 - }, - { - "epoch": 0.8551674382252149, - "grad_norm": 1.9102686437449117, - "learning_rate": 2.159051487586867e-07, - "loss": 1.0268, - "step": 7112 - }, - { - "epoch": 0.8552876811158541, - "grad_norm": 3.1934892820480334, - "learning_rate": 2.155532342421642e-07, - "loss": 0.9568, - "step": 7113 - }, - { - "epoch": 0.8554079240064931, - "grad_norm": 1.7266598497106438, - "learning_rate": 2.1520159042479636e-07, - "loss": 1.0154, - "step": 7114 - }, - { - "epoch": 0.8555281668971322, - "grad_norm": 2.3289662319829363, - "learning_rate": 2.148502173599287e-07, - "loss": 0.9365, - "step": 7115 - }, - { - "epoch": 0.8556484097877713, - "grad_norm": 1.7592560668641213, - "learning_rate": 2.1449911510086372e-07, - "loss": 0.8822, - "step": 7116 - }, - { - "epoch": 0.8557686526784104, - "grad_norm": 1.9613738163509375, - "learning_rate": 2.141482837008628e-07, - "loss": 1.001, - "step": 7117 - }, - { - "epoch": 0.8558888955690495, - "grad_norm": 1.7341127626961654, - "learning_rate": 2.1379772321314826e-07, - "loss": 0.9445, - "step": 7118 - }, - { - "epoch": 0.8560091384596886, - "grad_norm": 1.818818293596884, - "learning_rate": 2.1344743369089802e-07, - "loss": 1.0526, - "step": 7119 - }, - { - "epoch": 0.8561293813503277, - "grad_norm": 1.605875896423617, - "learning_rate": 2.130974151872522e-07, - "loss": 1.0451, - "step": 7120 - }, - { - "epoch": 0.8562496242409667, - "grad_norm": 1.8873338664121604, - "learning_rate": 2.1274766775530773e-07, - "loss": 1.0175, - "step": 7121 - }, - { - "epoch": 0.8563698671316058, - "grad_norm": 2.359847267833756, - "learning_rate": 2.1239819144812077e-07, - "loss": 1.0242, - "step": 7122 - }, - { - "epoch": 0.856490110022245, - "grad_norm": 1.6693963902744393, - "learning_rate": 2.1204898631870716e-07, - "loss": 0.9245, - "step": 7123 - }, - { - "epoch": 0.856610352912884, - "grad_norm": 1.9300617055960794, - "learning_rate": 2.1170005242004006e-07, - "loss": 0.9974, - "step": 7124 - }, - { - "epoch": 0.8567305958035231, - "grad_norm": 1.8701027174905271, - "learning_rate": 2.1135138980505384e-07, - "loss": 1.0068, - "step": 7125 - }, - { - "epoch": 0.8568508386941622, - "grad_norm": 1.7444834270832623, - "learning_rate": 2.110029985266395e-07, - "loss": 0.9618, - "step": 7126 - }, - { - "epoch": 0.8569710815848013, - "grad_norm": 1.7440058366695343, - "learning_rate": 2.1065487863764787e-07, - "loss": 0.9631, - "step": 7127 - }, - { - "epoch": 0.8570913244754403, - "grad_norm": 1.630903232007605, - "learning_rate": 2.1030703019088846e-07, - "loss": 1.0857, - "step": 7128 - }, - { - "epoch": 0.8572115673660795, - "grad_norm": 1.8032617923674128, - "learning_rate": 2.099594532391291e-07, - "loss": 0.9351, - "step": 7129 - }, - { - "epoch": 0.8573318102567186, - "grad_norm": 1.6775071297734248, - "learning_rate": 2.0961214783509806e-07, - "loss": 1.0147, - "step": 7130 - }, - { - "epoch": 0.8574520531473576, - "grad_norm": 1.973476488660036, - "learning_rate": 2.0926511403148051e-07, - "loss": 0.9764, - "step": 7131 - }, - { - "epoch": 0.8575722960379968, - "grad_norm": 2.0183317950006865, - "learning_rate": 2.0891835188092143e-07, - "loss": 0.9875, - "step": 7132 - }, - { - "epoch": 0.8576925389286358, - "grad_norm": 1.8974334196489564, - "learning_rate": 2.0857186143602434e-07, - "loss": 1.0405, - "step": 7133 - }, - { - "epoch": 0.8578127818192749, - "grad_norm": 1.8169759533749539, - "learning_rate": 2.0822564274935094e-07, - "loss": 0.9073, - "step": 7134 - }, - { - "epoch": 0.8579330247099141, - "grad_norm": 1.7060599979346371, - "learning_rate": 2.078796958734239e-07, - "loss": 0.899, - "step": 7135 - }, - { - "epoch": 0.8580532676005531, - "grad_norm": 3.022406508243757, - "learning_rate": 2.0753402086072124e-07, - "loss": 0.9711, - "step": 7136 - }, - { - "epoch": 0.8581735104911922, - "grad_norm": 2.8066871761318692, - "learning_rate": 2.071886177636828e-07, - "loss": 0.9863, - "step": 7137 - }, - { - "epoch": 0.8582937533818313, - "grad_norm": 19.967476148971997, - "learning_rate": 2.0684348663470575e-07, - "loss": 1.0496, - "step": 7138 - }, - { - "epoch": 0.8584139962724704, - "grad_norm": 1.973896082665692, - "learning_rate": 2.0649862752614555e-07, - "loss": 0.846, - "step": 7139 - }, - { - "epoch": 0.8585342391631094, - "grad_norm": 0.7732698278071066, - "learning_rate": 2.0615404049031838e-07, - "loss": 0.8252, - "step": 7140 - }, - { - "epoch": 0.8586544820537486, - "grad_norm": 2.1556824752741535, - "learning_rate": 2.0580972557949616e-07, - "loss": 1.0134, - "step": 7141 - }, - { - "epoch": 0.8587747249443877, - "grad_norm": 0.8087113889176939, - "learning_rate": 2.054656828459125e-07, - "loss": 0.7818, - "step": 7142 - }, - { - "epoch": 0.8588949678350267, - "grad_norm": 2.1763010277372197, - "learning_rate": 2.051219123417578e-07, - "loss": 0.9996, - "step": 7143 - }, - { - "epoch": 0.8590152107256659, - "grad_norm": 4.337193212735583, - "learning_rate": 2.0477841411918196e-07, - "loss": 0.8327, - "step": 7144 - }, - { - "epoch": 0.859135453616305, - "grad_norm": 2.016469024662441, - "learning_rate": 2.0443518823029326e-07, - "loss": 0.977, - "step": 7145 - }, - { - "epoch": 0.859255696506944, - "grad_norm": 2.0773669703155266, - "learning_rate": 2.0409223472715854e-07, - "loss": 0.998, - "step": 7146 - }, - { - "epoch": 0.8593759393975832, - "grad_norm": 5.702583851680677, - "learning_rate": 2.0374955366180434e-07, - "loss": 0.9734, - "step": 7147 - }, - { - "epoch": 0.8594961822882222, - "grad_norm": 1.8257630568884566, - "learning_rate": 2.034071450862147e-07, - "loss": 0.9563, - "step": 7148 - }, - { - "epoch": 0.8596164251788613, - "grad_norm": 1.6442327160465522, - "learning_rate": 2.030650090523327e-07, - "loss": 0.9995, - "step": 7149 - }, - { - "epoch": 0.8597366680695004, - "grad_norm": 1.727994713174715, - "learning_rate": 2.0272314561205995e-07, - "loss": 0.8295, - "step": 7150 - }, - { - "epoch": 0.8598569109601395, - "grad_norm": 2.109406421568632, - "learning_rate": 2.023815548172567e-07, - "loss": 0.961, - "step": 7151 - }, - { - "epoch": 0.8599771538507786, - "grad_norm": 1.614338156840551, - "learning_rate": 2.0204023671974267e-07, - "loss": 0.896, - "step": 7152 - }, - { - "epoch": 0.8600973967414177, - "grad_norm": 2.792537287750873, - "learning_rate": 2.0169919137129532e-07, - "loss": 1.0361, - "step": 7153 - }, - { - "epoch": 0.8602176396320568, - "grad_norm": 2.0228929412843857, - "learning_rate": 2.013584188236508e-07, - "loss": 0.9236, - "step": 7154 - }, - { - "epoch": 0.8603378825226958, - "grad_norm": 1.7831034470574962, - "learning_rate": 2.0101791912850396e-07, - "loss": 1.0253, - "step": 7155 - }, - { - "epoch": 0.8604581254133349, - "grad_norm": 2.0462492940129, - "learning_rate": 2.006776923375082e-07, - "loss": 0.8638, - "step": 7156 - }, - { - "epoch": 0.860578368303974, - "grad_norm": 1.6758151817887896, - "learning_rate": 2.003377385022764e-07, - "loss": 0.933, - "step": 7157 - }, - { - "epoch": 0.8606986111946131, - "grad_norm": 2.2210244354800035, - "learning_rate": 1.9999805767437826e-07, - "loss": 0.9987, - "step": 7158 - }, - { - "epoch": 0.8608188540852522, - "grad_norm": 1.7409403489623128, - "learning_rate": 1.9965864990534386e-07, - "loss": 0.9446, - "step": 7159 - }, - { - "epoch": 0.8609390969758913, - "grad_norm": 5.50291642980324, - "learning_rate": 1.9931951524666092e-07, - "loss": 1.003, - "step": 7160 - }, - { - "epoch": 0.8610593398665304, - "grad_norm": 1.6521664066602464, - "learning_rate": 1.9898065374977534e-07, - "loss": 1.0386, - "step": 7161 - }, - { - "epoch": 0.8611795827571694, - "grad_norm": 1.7996742669062982, - "learning_rate": 1.9864206546609342e-07, - "loss": 0.9556, - "step": 7162 - }, - { - "epoch": 0.8612998256478086, - "grad_norm": 1.8188554556250562, - "learning_rate": 1.983037504469771e-07, - "loss": 1.071, - "step": 7163 - }, - { - "epoch": 0.8614200685384477, - "grad_norm": 2.753476979231657, - "learning_rate": 1.9796570874374984e-07, - "loss": 0.8974, - "step": 7164 - }, - { - "epoch": 0.8615403114290867, - "grad_norm": 1.596782044095889, - "learning_rate": 1.976279404076917e-07, - "loss": 1.0015, - "step": 7165 - }, - { - "epoch": 0.8616605543197259, - "grad_norm": 1.9526282327233393, - "learning_rate": 1.9729044549004193e-07, - "loss": 0.9912, - "step": 7166 - }, - { - "epoch": 0.8617807972103649, - "grad_norm": 1.5881580160493005, - "learning_rate": 1.9695322404199822e-07, - "loss": 0.9336, - "step": 7167 - }, - { - "epoch": 0.861901040101004, - "grad_norm": 1.8063059233881695, - "learning_rate": 1.9661627611471654e-07, - "loss": 1.0521, - "step": 7168 - }, - { - "epoch": 0.8620212829916432, - "grad_norm": 1.9513073817826505, - "learning_rate": 1.9627960175931246e-07, - "loss": 0.9369, - "step": 7169 - }, - { - "epoch": 0.8621415258822822, - "grad_norm": 1.8640606607586339, - "learning_rate": 1.9594320102685847e-07, - "loss": 0.9738, - "step": 7170 - }, - { - "epoch": 0.8622617687729213, - "grad_norm": 2.041251808628232, - "learning_rate": 1.956070739683864e-07, - "loss": 0.8697, - "step": 7171 - }, - { - "epoch": 0.8623820116635604, - "grad_norm": 1.6353520990086305, - "learning_rate": 1.9527122063488678e-07, - "loss": 0.9734, - "step": 7172 - }, - { - "epoch": 0.8625022545541995, - "grad_norm": 1.5765018235960389, - "learning_rate": 1.9493564107730755e-07, - "loss": 1.031, - "step": 7173 - }, - { - "epoch": 0.8626224974448385, - "grad_norm": 1.9175548845018502, - "learning_rate": 1.9460033534655684e-07, - "loss": 0.844, - "step": 7174 - }, - { - "epoch": 0.8627427403354777, - "grad_norm": 7.029958106142674, - "learning_rate": 1.9426530349349978e-07, - "loss": 1.0669, - "step": 7175 - }, - { - "epoch": 0.8628629832261168, - "grad_norm": 1.9224544364976628, - "learning_rate": 1.9393054556896038e-07, - "loss": 0.8828, - "step": 7176 - }, - { - "epoch": 0.8629832261167558, - "grad_norm": 2.3036066139212354, - "learning_rate": 1.9359606162372133e-07, - "loss": 0.9217, - "step": 7177 - }, - { - "epoch": 0.863103469007395, - "grad_norm": 1.67961127529625, - "learning_rate": 1.9326185170852293e-07, - "loss": 0.9404, - "step": 7178 - }, - { - "epoch": 0.863223711898034, - "grad_norm": 1.9687501816522424, - "learning_rate": 1.9292791587406598e-07, - "loss": 0.9485, - "step": 7179 - }, - { - "epoch": 0.8633439547886731, - "grad_norm": 2.11491559304343, - "learning_rate": 1.9259425417100661e-07, - "loss": 1.0934, - "step": 7180 - }, - { - "epoch": 0.8634641976793123, - "grad_norm": 3.2422310332167736, - "learning_rate": 1.9226086664996234e-07, - "loss": 0.9754, - "step": 7181 - }, - { - "epoch": 0.8635844405699513, - "grad_norm": 2.336974153172524, - "learning_rate": 1.9192775336150712e-07, - "loss": 0.9745, - "step": 7182 - }, - { - "epoch": 0.8637046834605904, - "grad_norm": 0.7918245300371546, - "learning_rate": 1.915949143561739e-07, - "loss": 0.8055, - "step": 7183 - }, - { - "epoch": 0.8638249263512295, - "grad_norm": 2.2271881830592557, - "learning_rate": 1.9126234968445498e-07, - "loss": 1.0053, - "step": 7184 - }, - { - "epoch": 0.8639451692418686, - "grad_norm": 1.4922109072781657, - "learning_rate": 1.9093005939679884e-07, - "loss": 0.8957, - "step": 7185 - }, - { - "epoch": 0.8640654121325076, - "grad_norm": 2.234790896676393, - "learning_rate": 1.9059804354361452e-07, - "loss": 0.9907, - "step": 7186 - }, - { - "epoch": 0.8641856550231467, - "grad_norm": 1.554114001134321, - "learning_rate": 1.902663021752684e-07, - "loss": 0.9429, - "step": 7187 - }, - { - "epoch": 0.8643058979137859, - "grad_norm": 3.0152265058114054, - "learning_rate": 1.8993483534208556e-07, - "loss": 1.049, - "step": 7188 - }, - { - "epoch": 0.8644261408044249, - "grad_norm": 2.912330092029646, - "learning_rate": 1.8960364309434884e-07, - "loss": 0.9715, - "step": 7189 - }, - { - "epoch": 0.864546383695064, - "grad_norm": 1.8575225064748333, - "learning_rate": 1.8927272548229967e-07, - "loss": 1.0178, - "step": 7190 - }, - { - "epoch": 0.8646666265857031, - "grad_norm": 1.4325699229592532, - "learning_rate": 1.8894208255613876e-07, - "loss": 1.059, - "step": 7191 - }, - { - "epoch": 0.8647868694763422, - "grad_norm": 1.819375251185545, - "learning_rate": 1.8861171436602397e-07, - "loss": 0.9975, - "step": 7192 - }, - { - "epoch": 0.8649071123669813, - "grad_norm": 2.0568691028938155, - "learning_rate": 1.882816209620719e-07, - "loss": 1.0363, - "step": 7193 - }, - { - "epoch": 0.8650273552576204, - "grad_norm": 2.2538772660479354, - "learning_rate": 1.8795180239435738e-07, - "loss": 0.997, - "step": 7194 - }, - { - "epoch": 0.8651475981482595, - "grad_norm": 2.4815118954135316, - "learning_rate": 1.8762225871291348e-07, - "loss": 0.987, - "step": 7195 - }, - { - "epoch": 0.8652678410388985, - "grad_norm": 1.746199773176714, - "learning_rate": 1.8729298996773201e-07, - "loss": 1.0345, - "step": 7196 - }, - { - "epoch": 0.8653880839295377, - "grad_norm": 0.8543329929832865, - "learning_rate": 1.8696399620876301e-07, - "loss": 0.8664, - "step": 7197 - }, - { - "epoch": 0.8655083268201768, - "grad_norm": 2.2975094496373885, - "learning_rate": 1.866352774859141e-07, - "loss": 1.0186, - "step": 7198 - }, - { - "epoch": 0.8656285697108158, - "grad_norm": 2.3611549585297653, - "learning_rate": 1.8630683384905188e-07, - "loss": 0.9197, - "step": 7199 - }, - { - "epoch": 0.865748812601455, - "grad_norm": 2.2044751514497447, - "learning_rate": 1.8597866534800045e-07, - "loss": 1.1231, - "step": 7200 - }, - { - "epoch": 0.865869055492094, - "grad_norm": 2.203877530186354, - "learning_rate": 1.8565077203254398e-07, - "loss": 0.976, - "step": 7201 - }, - { - "epoch": 0.8659892983827331, - "grad_norm": 3.121942473972097, - "learning_rate": 1.8532315395242203e-07, - "loss": 0.9637, - "step": 7202 - }, - { - "epoch": 0.8661095412733723, - "grad_norm": 2.2955613304591567, - "learning_rate": 1.849958111573353e-07, - "loss": 0.9463, - "step": 7203 - }, - { - "epoch": 0.8662297841640113, - "grad_norm": 1.9026876839735813, - "learning_rate": 1.8466874369694074e-07, - "loss": 0.8687, - "step": 7204 - }, - { - "epoch": 0.8663500270546504, - "grad_norm": 4.038257038961206, - "learning_rate": 1.843419516208542e-07, - "loss": 0.9386, - "step": 7205 - }, - { - "epoch": 0.8664702699452895, - "grad_norm": 2.2686206875353965, - "learning_rate": 1.8401543497865047e-07, - "loss": 1.0236, - "step": 7206 - }, - { - "epoch": 0.8665905128359286, - "grad_norm": 2.4082423297447484, - "learning_rate": 1.836891938198608e-07, - "loss": 0.8792, - "step": 7207 - }, - { - "epoch": 0.8667107557265676, - "grad_norm": 2.4644952155833164, - "learning_rate": 1.8336322819397677e-07, - "loss": 0.9466, - "step": 7208 - }, - { - "epoch": 0.8668309986172068, - "grad_norm": 2.9078220504612102, - "learning_rate": 1.8303753815044654e-07, - "loss": 0.8568, - "step": 7209 - }, - { - "epoch": 0.8669512415078459, - "grad_norm": 2.5337630618511024, - "learning_rate": 1.827121237386773e-07, - "loss": 0.9297, - "step": 7210 - }, - { - "epoch": 0.8670714843984849, - "grad_norm": 2.7120768766394225, - "learning_rate": 1.8238698500803374e-07, - "loss": 0.986, - "step": 7211 - }, - { - "epoch": 0.8671917272891241, - "grad_norm": 0.8473780654579421, - "learning_rate": 1.820621220078391e-07, - "loss": 0.854, - "step": 7212 - }, - { - "epoch": 0.8673119701797631, - "grad_norm": 2.0095391474904467, - "learning_rate": 1.8173753478737553e-07, - "loss": 0.9033, - "step": 7213 - }, - { - "epoch": 0.8674322130704022, - "grad_norm": 2.029270557456873, - "learning_rate": 1.8141322339588205e-07, - "loss": 1.024, - "step": 7214 - }, - { - "epoch": 0.8675524559610414, - "grad_norm": 1.914897503014718, - "learning_rate": 1.810891878825569e-07, - "loss": 0.9286, - "step": 7215 - }, - { - "epoch": 0.8676726988516804, - "grad_norm": 1.9566845006092108, - "learning_rate": 1.8076542829655561e-07, - "loss": 0.9436, - "step": 7216 - }, - { - "epoch": 0.8677929417423195, - "grad_norm": 2.0935181446660303, - "learning_rate": 1.8044194468699203e-07, - "loss": 1.0286, - "step": 7217 - }, - { - "epoch": 0.8679131846329585, - "grad_norm": 2.707155959654445, - "learning_rate": 1.8011873710293912e-07, - "loss": 0.9875, - "step": 7218 - }, - { - "epoch": 0.8680334275235977, - "grad_norm": 2.1634239604327354, - "learning_rate": 1.7979580559342677e-07, - "loss": 0.927, - "step": 7219 - }, - { - "epoch": 0.8681536704142367, - "grad_norm": 1.9400971756026166, - "learning_rate": 1.7947315020744358e-07, - "loss": 0.8987, - "step": 7220 - }, - { - "epoch": 0.8682739133048758, - "grad_norm": 2.0318633107295305, - "learning_rate": 1.7915077099393594e-07, - "loss": 1.0212, - "step": 7221 - }, - { - "epoch": 0.868394156195515, - "grad_norm": 2.0785647048991263, - "learning_rate": 1.788286680018083e-07, - "loss": 0.9677, - "step": 7222 - }, - { - "epoch": 0.868514399086154, - "grad_norm": 1.5247577130819523, - "learning_rate": 1.7850684127992443e-07, - "loss": 0.952, - "step": 7223 - }, - { - "epoch": 0.8686346419767931, - "grad_norm": 2.1723440199015447, - "learning_rate": 1.7818529087710378e-07, - "loss": 0.9359, - "step": 7224 - }, - { - "epoch": 0.8687548848674322, - "grad_norm": 3.9952081349645616, - "learning_rate": 1.7786401684212637e-07, - "loss": 1.0767, - "step": 7225 - }, - { - "epoch": 0.8688751277580713, - "grad_norm": 0.7381881574440967, - "learning_rate": 1.7754301922372883e-07, - "loss": 0.8037, - "step": 7226 - }, - { - "epoch": 0.8689953706487104, - "grad_norm": 1.8946824475547726, - "learning_rate": 1.7722229807060617e-07, - "loss": 1.0376, - "step": 7227 - }, - { - "epoch": 0.8691156135393495, - "grad_norm": 2.735482302302191, - "learning_rate": 1.7690185343141172e-07, - "loss": 1.045, - "step": 7228 - }, - { - "epoch": 0.8692358564299886, - "grad_norm": 2.517736271640204, - "learning_rate": 1.7658168535475615e-07, - "loss": 0.9397, - "step": 7229 - }, - { - "epoch": 0.8693560993206276, - "grad_norm": 1.7144953693183849, - "learning_rate": 1.7626179388920948e-07, - "loss": 0.8758, - "step": 7230 - }, - { - "epoch": 0.8694763422112668, - "grad_norm": 1.653824739965838, - "learning_rate": 1.7594217908329866e-07, - "loss": 1.0336, - "step": 7231 - }, - { - "epoch": 0.8695965851019059, - "grad_norm": 3.157970903548849, - "learning_rate": 1.7562284098550895e-07, - "loss": 0.9649, - "step": 7232 - }, - { - "epoch": 0.8697168279925449, - "grad_norm": 0.881511438426445, - "learning_rate": 1.753037796442838e-07, - "loss": 0.8947, - "step": 7233 - }, - { - "epoch": 0.8698370708831841, - "grad_norm": 2.355653366521298, - "learning_rate": 1.74984995108024e-07, - "loss": 0.9797, - "step": 7234 - }, - { - "epoch": 0.8699573137738231, - "grad_norm": 1.8883957764597508, - "learning_rate": 1.7466648742508981e-07, - "loss": 1.0604, - "step": 7235 - }, - { - "epoch": 0.8700775566644622, - "grad_norm": 1.9453887924492705, - "learning_rate": 1.7434825664379837e-07, - "loss": 1.0657, - "step": 7236 - }, - { - "epoch": 0.8701977995551013, - "grad_norm": 2.521038793172308, - "learning_rate": 1.740303028124246e-07, - "loss": 1.093, - "step": 7237 - }, - { - "epoch": 0.8703180424457404, - "grad_norm": 2.4857847424561195, - "learning_rate": 1.7371262597920212e-07, - "loss": 0.9877, - "step": 7238 - }, - { - "epoch": 0.8704382853363795, - "grad_norm": 1.4270313374126595, - "learning_rate": 1.7339522619232195e-07, - "loss": 0.9923, - "step": 7239 - }, - { - "epoch": 0.8705585282270186, - "grad_norm": 1.8423680444305681, - "learning_rate": 1.730781034999338e-07, - "loss": 0.9832, - "step": 7240 - }, - { - "epoch": 0.8706787711176577, - "grad_norm": 2.343371856701996, - "learning_rate": 1.7276125795014497e-07, - "loss": 0.9707, - "step": 7241 - }, - { - "epoch": 0.8707990140082967, - "grad_norm": 2.355202731013788, - "learning_rate": 1.7244468959102054e-07, - "loss": 0.9056, - "step": 7242 - }, - { - "epoch": 0.8709192568989359, - "grad_norm": 2.868774307580161, - "learning_rate": 1.7212839847058348e-07, - "loss": 1.0806, - "step": 7243 - }, - { - "epoch": 0.871039499789575, - "grad_norm": 2.1295994741113873, - "learning_rate": 1.718123846368147e-07, - "loss": 0.9694, - "step": 7244 - }, - { - "epoch": 0.871159742680214, - "grad_norm": 2.7744477487175296, - "learning_rate": 1.714966481376543e-07, - "loss": 0.9428, - "step": 7245 - }, - { - "epoch": 0.8712799855708532, - "grad_norm": 4.3340462073805766, - "learning_rate": 1.7118118902099797e-07, - "loss": 1.0529, - "step": 7246 - }, - { - "epoch": 0.8714002284614922, - "grad_norm": 1.6320029264031146, - "learning_rate": 1.7086600733470146e-07, - "loss": 1.0345, - "step": 7247 - }, - { - "epoch": 0.8715204713521313, - "grad_norm": 2.0195334930702606, - "learning_rate": 1.7055110312657738e-07, - "loss": 0.9909, - "step": 7248 - }, - { - "epoch": 0.8716407142427703, - "grad_norm": 2.100890665549383, - "learning_rate": 1.702364764443962e-07, - "loss": 0.9747, - "step": 7249 - }, - { - "epoch": 0.8717609571334095, - "grad_norm": 1.900491302113048, - "learning_rate": 1.6992212733588685e-07, - "loss": 0.9577, - "step": 7250 - }, - { - "epoch": 0.8718812000240486, - "grad_norm": 1.9306962314949279, - "learning_rate": 1.6960805584873538e-07, - "loss": 0.9836, - "step": 7251 - }, - { - "epoch": 0.8720014429146876, - "grad_norm": 2.2534546292216966, - "learning_rate": 1.6929426203058684e-07, - "loss": 1.0121, - "step": 7252 - }, - { - "epoch": 0.8721216858053268, - "grad_norm": 3.3876449216128304, - "learning_rate": 1.689807459290431e-07, - "loss": 1.0352, - "step": 7253 - }, - { - "epoch": 0.8722419286959658, - "grad_norm": 9.669564262285087, - "learning_rate": 1.6866750759166437e-07, - "loss": 0.9338, - "step": 7254 - }, - { - "epoch": 0.8723621715866049, - "grad_norm": 2.392532086610632, - "learning_rate": 1.6835454706596865e-07, - "loss": 0.9988, - "step": 7255 - }, - { - "epoch": 0.8724824144772441, - "grad_norm": 1.9650812024526279, - "learning_rate": 1.680418643994317e-07, - "loss": 0.9714, - "step": 7256 - }, - { - "epoch": 0.8726026573678831, - "grad_norm": 0.948813991826139, - "learning_rate": 1.6772945963948738e-07, - "loss": 0.9214, - "step": 7257 - }, - { - "epoch": 0.8727229002585222, - "grad_norm": 2.6730147936661712, - "learning_rate": 1.6741733283352733e-07, - "loss": 0.9984, - "step": 7258 - }, - { - "epoch": 0.8728431431491613, - "grad_norm": 1.6693183388300656, - "learning_rate": 1.6710548402890102e-07, - "loss": 1.0655, - "step": 7259 - }, - { - "epoch": 0.8729633860398004, - "grad_norm": 1.7062350234048602, - "learning_rate": 1.6679391327291527e-07, - "loss": 0.8969, - "step": 7260 - }, - { - "epoch": 0.8730836289304394, - "grad_norm": 4.160748334847064, - "learning_rate": 1.6648262061283492e-07, - "loss": 0.9066, - "step": 7261 - }, - { - "epoch": 0.8732038718210786, - "grad_norm": 2.1815271851801836, - "learning_rate": 1.6617160609588353e-07, - "loss": 0.9637, - "step": 7262 - }, - { - "epoch": 0.8733241147117177, - "grad_norm": 2.1646471025792713, - "learning_rate": 1.6586086976924163e-07, - "loss": 0.9394, - "step": 7263 - }, - { - "epoch": 0.8734443576023567, - "grad_norm": 2.255117001934341, - "learning_rate": 1.6555041168004747e-07, - "loss": 1.0163, - "step": 7264 - }, - { - "epoch": 0.8735646004929959, - "grad_norm": 1.7611541858436748, - "learning_rate": 1.6524023187539715e-07, - "loss": 0.9171, - "step": 7265 - }, - { - "epoch": 0.873684843383635, - "grad_norm": 2.1898755934404344, - "learning_rate": 1.649303304023446e-07, - "loss": 0.9791, - "step": 7266 - }, - { - "epoch": 0.873805086274274, - "grad_norm": 1.5473768402786339, - "learning_rate": 1.6462070730790246e-07, - "loss": 1.0109, - "step": 7267 - }, - { - "epoch": 0.8739253291649132, - "grad_norm": 2.1787463928648023, - "learning_rate": 1.6431136263903912e-07, - "loss": 1.0111, - "step": 7268 - }, - { - "epoch": 0.8740455720555522, - "grad_norm": 2.2012398953831376, - "learning_rate": 1.6400229644268282e-07, - "loss": 0.9715, - "step": 7269 - }, - { - "epoch": 0.8741658149461913, - "grad_norm": 2.150225973452083, - "learning_rate": 1.6369350876571852e-07, - "loss": 1.0374, - "step": 7270 - }, - { - "epoch": 0.8742860578368304, - "grad_norm": 2.0724069042026745, - "learning_rate": 1.6338499965498874e-07, - "loss": 1.0379, - "step": 7271 - }, - { - "epoch": 0.8744063007274695, - "grad_norm": 1.6243126222347029, - "learning_rate": 1.630767691572943e-07, - "loss": 1.0034, - "step": 7272 - }, - { - "epoch": 0.8745265436181086, - "grad_norm": 0.7575402311462506, - "learning_rate": 1.6276881731939306e-07, - "loss": 0.7946, - "step": 7273 - }, - { - "epoch": 0.8746467865087477, - "grad_norm": 1.8823439145572995, - "learning_rate": 1.6246114418800193e-07, - "loss": 0.9854, - "step": 7274 - }, - { - "epoch": 0.8747670293993868, - "grad_norm": 1.8841287910718378, - "learning_rate": 1.6215374980979423e-07, - "loss": 0.9942, - "step": 7275 - }, - { - "epoch": 0.8748872722900258, - "grad_norm": 1.9535724585095309, - "learning_rate": 1.6184663423140133e-07, - "loss": 0.919, - "step": 7276 - }, - { - "epoch": 0.875007515180665, - "grad_norm": 2.343993619337283, - "learning_rate": 1.615397974994126e-07, - "loss": 0.8774, - "step": 7277 - }, - { - "epoch": 0.875127758071304, - "grad_norm": 1.5194096455297876, - "learning_rate": 1.6123323966037438e-07, - "loss": 1.0318, - "step": 7278 - }, - { - "epoch": 0.8752480009619431, - "grad_norm": 2.0184283020877767, - "learning_rate": 1.6092696076079216e-07, - "loss": 1.0096, - "step": 7279 - }, - { - "epoch": 0.8753682438525822, - "grad_norm": 1.5391445234874854, - "learning_rate": 1.6062096084712785e-07, - "loss": 0.9642, - "step": 7280 - }, - { - "epoch": 0.8754884867432213, - "grad_norm": 1.8496352196955244, - "learning_rate": 1.6031523996580098e-07, - "loss": 0.9378, - "step": 7281 - }, - { - "epoch": 0.8756087296338604, - "grad_norm": 2.1447022305615944, - "learning_rate": 1.6000979816318981e-07, - "loss": 0.8924, - "step": 7282 - }, - { - "epoch": 0.8757289725244994, - "grad_norm": 2.0405300895268974, - "learning_rate": 1.5970463548562886e-07, - "loss": 0.9808, - "step": 7283 - }, - { - "epoch": 0.8758492154151386, - "grad_norm": 2.1022959283771616, - "learning_rate": 1.5939975197941192e-07, - "loss": 0.9452, - "step": 7284 - }, - { - "epoch": 0.8759694583057777, - "grad_norm": 0.9309345824883246, - "learning_rate": 1.5909514769078892e-07, - "loss": 0.7932, - "step": 7285 - }, - { - "epoch": 0.8760897011964167, - "grad_norm": 1.4833760966162464, - "learning_rate": 1.5879082266596867e-07, - "loss": 1.0084, - "step": 7286 - }, - { - "epoch": 0.8762099440870559, - "grad_norm": 1.6044918903111411, - "learning_rate": 1.5848677695111645e-07, - "loss": 0.9483, - "step": 7287 - }, - { - "epoch": 0.8763301869776949, - "grad_norm": 2.784150400746966, - "learning_rate": 1.5818301059235562e-07, - "loss": 0.933, - "step": 7288 - }, - { - "epoch": 0.876450429868334, - "grad_norm": 1.8528404493406578, - "learning_rate": 1.578795236357684e-07, - "loss": 1.0438, - "step": 7289 - }, - { - "epoch": 0.8765706727589732, - "grad_norm": 2.4362253622156818, - "learning_rate": 1.5757631612739218e-07, - "loss": 1.083, - "step": 7290 - }, - { - "epoch": 0.8766909156496122, - "grad_norm": 0.8798911108034222, - "learning_rate": 1.572733881132242e-07, - "loss": 0.9042, - "step": 7291 - }, - { - "epoch": 0.8768111585402513, - "grad_norm": 0.7671413586447882, - "learning_rate": 1.5697073963921814e-07, - "loss": 0.8466, - "step": 7292 - }, - { - "epoch": 0.8769314014308904, - "grad_norm": 2.390643150908933, - "learning_rate": 1.566683707512857e-07, - "loss": 1.0778, - "step": 7293 - }, - { - "epoch": 0.8770516443215295, - "grad_norm": 2.28505660923925, - "learning_rate": 1.5636628149529553e-07, - "loss": 1.0216, - "step": 7294 - }, - { - "epoch": 0.8771718872121685, - "grad_norm": 2.5766606651654085, - "learning_rate": 1.560644719170743e-07, - "loss": 1.017, - "step": 7295 - }, - { - "epoch": 0.8772921301028077, - "grad_norm": 1.9475033231420407, - "learning_rate": 1.5576294206240692e-07, - "loss": 0.9438, - "step": 7296 - }, - { - "epoch": 0.8774123729934468, - "grad_norm": 1.7632868991908357, - "learning_rate": 1.5546169197703507e-07, - "loss": 0.9167, - "step": 7297 - }, - { - "epoch": 0.8775326158840858, - "grad_norm": 2.54022659612641, - "learning_rate": 1.5516072170665774e-07, - "loss": 0.9935, - "step": 7298 - }, - { - "epoch": 0.877652858774725, - "grad_norm": 2.1724916310033935, - "learning_rate": 1.5486003129693214e-07, - "loss": 1.0887, - "step": 7299 - }, - { - "epoch": 0.877773101665364, - "grad_norm": 1.7534195324458706, - "learning_rate": 1.545596207934725e-07, - "loss": 1.0059, - "step": 7300 - }, - { - "epoch": 0.8778933445560031, - "grad_norm": 2.170151390209218, - "learning_rate": 1.5425949024185147e-07, - "loss": 1.0034, - "step": 7301 - }, - { - "epoch": 0.8780135874466423, - "grad_norm": 3.515982647823701, - "learning_rate": 1.5395963968759818e-07, - "loss": 0.9084, - "step": 7302 - }, - { - "epoch": 0.8781338303372813, - "grad_norm": 2.003877933294693, - "learning_rate": 1.536600691761998e-07, - "loss": 0.8714, - "step": 7303 - }, - { - "epoch": 0.8782540732279204, - "grad_norm": 1.7515116702079474, - "learning_rate": 1.5336077875310084e-07, - "loss": 0.9426, - "step": 7304 - }, - { - "epoch": 0.8783743161185595, - "grad_norm": 2.009737746746939, - "learning_rate": 1.5306176846370321e-07, - "loss": 0.9671, - "step": 7305 - }, - { - "epoch": 0.8784945590091986, - "grad_norm": 6.418126852510931, - "learning_rate": 1.5276303835336712e-07, - "loss": 0.9714, - "step": 7306 - }, - { - "epoch": 0.8786148018998376, - "grad_norm": 0.779993878487261, - "learning_rate": 1.524645884674094e-07, - "loss": 0.7884, - "step": 7307 - }, - { - "epoch": 0.8787350447904768, - "grad_norm": 2.212906139619047, - "learning_rate": 1.521664188511047e-07, - "loss": 1.0199, - "step": 7308 - }, - { - "epoch": 0.8788552876811159, - "grad_norm": 2.1104902921777358, - "learning_rate": 1.518685295496851e-07, - "loss": 1.0368, - "step": 7309 - }, - { - "epoch": 0.8789755305717549, - "grad_norm": 1.534508333026314, - "learning_rate": 1.5157092060833975e-07, - "loss": 1.0763, - "step": 7310 - }, - { - "epoch": 0.879095773462394, - "grad_norm": 1.8906737392227793, - "learning_rate": 1.5127359207221658e-07, - "loss": 0.8899, - "step": 7311 - }, - { - "epoch": 0.8792160163530331, - "grad_norm": 2.3484658164057692, - "learning_rate": 1.5097654398641923e-07, - "loss": 0.9545, - "step": 7312 - }, - { - "epoch": 0.8793362592436722, - "grad_norm": 2.223421900165705, - "learning_rate": 1.5067977639601014e-07, - "loss": 0.9564, - "step": 7313 - }, - { - "epoch": 0.8794565021343113, - "grad_norm": 2.8175444399587697, - "learning_rate": 1.5038328934600864e-07, - "loss": 0.9444, - "step": 7314 - }, - { - "epoch": 0.8795767450249504, - "grad_norm": 1.8541878545457762, - "learning_rate": 1.5008708288139161e-07, - "loss": 0.9283, - "step": 7315 - }, - { - "epoch": 0.8796969879155895, - "grad_norm": 1.9724357859477961, - "learning_rate": 1.497911570470931e-07, - "loss": 0.9664, - "step": 7316 - }, - { - "epoch": 0.8798172308062285, - "grad_norm": 1.6834821553430255, - "learning_rate": 1.494955118880048e-07, - "loss": 1.0789, - "step": 7317 - }, - { - "epoch": 0.8799374736968677, - "grad_norm": 1.5204163942191307, - "learning_rate": 1.4920014744897634e-07, - "loss": 0.9569, - "step": 7318 - }, - { - "epoch": 0.8800577165875068, - "grad_norm": 1.752750211150636, - "learning_rate": 1.4890506377481392e-07, - "loss": 1.0919, - "step": 7319 - }, - { - "epoch": 0.8801779594781458, - "grad_norm": 7.1142593540878565, - "learning_rate": 1.486102609102815e-07, - "loss": 0.868, - "step": 7320 - }, - { - "epoch": 0.880298202368785, - "grad_norm": 2.34590314509404, - "learning_rate": 1.483157389001004e-07, - "loss": 1.0792, - "step": 7321 - }, - { - "epoch": 0.880418445259424, - "grad_norm": 1.9407436537289349, - "learning_rate": 1.4802149778894933e-07, - "loss": 1.0162, - "step": 7322 - }, - { - "epoch": 0.8805386881500631, - "grad_norm": 3.993175167927514, - "learning_rate": 1.4772753762146484e-07, - "loss": 1.104, - "step": 7323 - }, - { - "epoch": 0.8806589310407023, - "grad_norm": 1.7091353867432197, - "learning_rate": 1.474338584422401e-07, - "loss": 0.93, - "step": 7324 - }, - { - "epoch": 0.8807791739313413, - "grad_norm": 1.8532429083484308, - "learning_rate": 1.4714046029582595e-07, - "loss": 0.9841, - "step": 7325 - }, - { - "epoch": 0.8808994168219804, - "grad_norm": 3.570956716701925, - "learning_rate": 1.46847343226731e-07, - "loss": 0.9895, - "step": 7326 - }, - { - "epoch": 0.8810196597126195, - "grad_norm": 1.934406333262833, - "learning_rate": 1.465545072794203e-07, - "loss": 0.9222, - "step": 7327 - }, - { - "epoch": 0.8811399026032586, - "grad_norm": 2.1184383511967124, - "learning_rate": 1.4626195249831774e-07, - "loss": 0.9865, - "step": 7328 - }, - { - "epoch": 0.8812601454938976, - "grad_norm": 1.7911411520129636, - "learning_rate": 1.4596967892780244e-07, - "loss": 0.9487, - "step": 7329 - }, - { - "epoch": 0.8813803883845368, - "grad_norm": 1.7306096900050585, - "learning_rate": 1.4567768661221314e-07, - "loss": 0.9773, - "step": 7330 - }, - { - "epoch": 0.8815006312751759, - "grad_norm": 1.9955361141644543, - "learning_rate": 1.4538597559584442e-07, - "loss": 0.9726, - "step": 7331 - }, - { - "epoch": 0.8816208741658149, - "grad_norm": 1.876877289985634, - "learning_rate": 1.4509454592294823e-07, - "loss": 1.0021, - "step": 7332 - }, - { - "epoch": 0.8817411170564541, - "grad_norm": 2.446360695526808, - "learning_rate": 1.448033976377354e-07, - "loss": 1.0196, - "step": 7333 - }, - { - "epoch": 0.8818613599470931, - "grad_norm": 2.0190371235367324, - "learning_rate": 1.445125307843713e-07, - "loss": 0.9677, - "step": 7334 - }, - { - "epoch": 0.8819816028377322, - "grad_norm": 1.6560760982554186, - "learning_rate": 1.442219454069813e-07, - "loss": 0.9774, - "step": 7335 - }, - { - "epoch": 0.8821018457283714, - "grad_norm": 3.340111151804486, - "learning_rate": 1.4393164154964676e-07, - "loss": 0.8944, - "step": 7336 - }, - { - "epoch": 0.8822220886190104, - "grad_norm": 1.6626417515490644, - "learning_rate": 1.4364161925640649e-07, - "loss": 1.1627, - "step": 7337 - }, - { - "epoch": 0.8823423315096495, - "grad_norm": 1.6865343227156684, - "learning_rate": 1.4335187857125663e-07, - "loss": 1.0818, - "step": 7338 - }, - { - "epoch": 0.8824625744002886, - "grad_norm": 1.9863647337534132, - "learning_rate": 1.4306241953815023e-07, - "loss": 0.9839, - "step": 7339 - }, - { - "epoch": 0.8825828172909277, - "grad_norm": 1.972496947853038, - "learning_rate": 1.4277324220099862e-07, - "loss": 0.9394, - "step": 7340 - }, - { - "epoch": 0.8827030601815667, - "grad_norm": 1.7806675945957002, - "learning_rate": 1.4248434660366938e-07, - "loss": 0.9718, - "step": 7341 - }, - { - "epoch": 0.8828233030722058, - "grad_norm": 1.918751046245675, - "learning_rate": 1.4219573278998808e-07, - "loss": 0.9374, - "step": 7342 - }, - { - "epoch": 0.882943545962845, - "grad_norm": 2.217436388172071, - "learning_rate": 1.4190740080373685e-07, - "loss": 0.881, - "step": 7343 - }, - { - "epoch": 0.883063788853484, - "grad_norm": 1.7324335727920315, - "learning_rate": 1.4161935068865538e-07, - "loss": 1.0777, - "step": 7344 - }, - { - "epoch": 0.8831840317441231, - "grad_norm": 2.572389268039673, - "learning_rate": 1.4133158248844113e-07, - "loss": 0.9826, - "step": 7345 - }, - { - "epoch": 0.8833042746347622, - "grad_norm": 1.7482346758842509, - "learning_rate": 1.4104409624674785e-07, - "loss": 0.9593, - "step": 7346 - }, - { - "epoch": 0.8834245175254013, - "grad_norm": 1.7881750046085907, - "learning_rate": 1.407568920071873e-07, - "loss": 1.0126, - "step": 7347 - }, - { - "epoch": 0.8835447604160404, - "grad_norm": 1.9383281660571423, - "learning_rate": 1.4046996981332782e-07, - "loss": 0.903, - "step": 7348 - }, - { - "epoch": 0.8836650033066795, - "grad_norm": 1.8879105178350086, - "learning_rate": 1.4018332970869516e-07, - "loss": 1.0063, - "step": 7349 - }, - { - "epoch": 0.8837852461973186, - "grad_norm": 1.663886763915055, - "learning_rate": 1.398969717367733e-07, - "loss": 1.0748, - "step": 7350 - }, - { - "epoch": 0.8839054890879576, - "grad_norm": 1.7938869012035548, - "learning_rate": 1.396108959410014e-07, - "loss": 0.994, - "step": 7351 - }, - { - "epoch": 0.8840257319785968, - "grad_norm": 1.5792592570891217, - "learning_rate": 1.3932510236477745e-07, - "loss": 1.0365, - "step": 7352 - }, - { - "epoch": 0.8841459748692359, - "grad_norm": 1.7405991776638112, - "learning_rate": 1.3903959105145636e-07, - "loss": 0.7861, - "step": 7353 - }, - { - "epoch": 0.8842662177598749, - "grad_norm": 1.8611626246267727, - "learning_rate": 1.387543620443492e-07, - "loss": 1.0547, - "step": 7354 - }, - { - "epoch": 0.8843864606505141, - "grad_norm": 1.643177335942392, - "learning_rate": 1.3846941538672606e-07, - "loss": 1.0717, - "step": 7355 - }, - { - "epoch": 0.8845067035411531, - "grad_norm": 2.2402769448876008, - "learning_rate": 1.3818475112181193e-07, - "loss": 1.0401, - "step": 7356 - }, - { - "epoch": 0.8846269464317922, - "grad_norm": 2.0153976430488685, - "learning_rate": 1.3790036929279091e-07, - "loss": 1.0157, - "step": 7357 - }, - { - "epoch": 0.8847471893224313, - "grad_norm": 2.0623960757804927, - "learning_rate": 1.3761626994280363e-07, - "loss": 0.8174, - "step": 7358 - }, - { - "epoch": 0.8848674322130704, - "grad_norm": 1.7894735738588896, - "learning_rate": 1.3733245311494735e-07, - "loss": 0.9637, - "step": 7359 - }, - { - "epoch": 0.8849876751037095, - "grad_norm": 1.8826797248591896, - "learning_rate": 1.3704891885227676e-07, - "loss": 0.9428, - "step": 7360 - }, - { - "epoch": 0.8851079179943486, - "grad_norm": 2.184858853038941, - "learning_rate": 1.367656671978037e-07, - "loss": 0.9985, - "step": 7361 - }, - { - "epoch": 0.8852281608849877, - "grad_norm": 2.2534420388239114, - "learning_rate": 1.36482698194498e-07, - "loss": 0.9743, - "step": 7362 - }, - { - "epoch": 0.8853484037756267, - "grad_norm": 1.9672122581081402, - "learning_rate": 1.3620001188528506e-07, - "loss": 0.946, - "step": 7363 - }, - { - "epoch": 0.8854686466662659, - "grad_norm": 4.982437761820517, - "learning_rate": 1.3591760831304865e-07, - "loss": 0.9544, - "step": 7364 - }, - { - "epoch": 0.885588889556905, - "grad_norm": 1.908163329913055, - "learning_rate": 1.356354875206287e-07, - "loss": 1.032, - "step": 7365 - }, - { - "epoch": 0.885709132447544, - "grad_norm": 2.4313756836714737, - "learning_rate": 1.3535364955082296e-07, - "loss": 0.922, - "step": 7366 - }, - { - "epoch": 0.8858293753381832, - "grad_norm": 1.8104913696867166, - "learning_rate": 1.3507209444638613e-07, - "loss": 0.8701, - "step": 7367 - }, - { - "epoch": 0.8859496182288222, - "grad_norm": 1.8910035314017597, - "learning_rate": 1.347908222500298e-07, - "loss": 0.9709, - "step": 7368 - }, - { - "epoch": 0.8860698611194613, - "grad_norm": 2.3545659044608853, - "learning_rate": 1.3450983300442276e-07, - "loss": 0.9244, - "step": 7369 - }, - { - "epoch": 0.8861901040101005, - "grad_norm": 1.839453589017481, - "learning_rate": 1.3422912675219068e-07, - "loss": 0.9614, - "step": 7370 - }, - { - "epoch": 0.8863103469007395, - "grad_norm": 1.8042846494278248, - "learning_rate": 1.339487035359166e-07, - "loss": 1.0118, - "step": 7371 - }, - { - "epoch": 0.8864305897913786, - "grad_norm": 1.7057137369715019, - "learning_rate": 1.336685633981409e-07, - "loss": 1.0796, - "step": 7372 - }, - { - "epoch": 0.8865508326820177, - "grad_norm": 1.7586139801502232, - "learning_rate": 1.333887063813597e-07, - "loss": 0.9704, - "step": 7373 - }, - { - "epoch": 0.8866710755726568, - "grad_norm": 1.9926138627317957, - "learning_rate": 1.331091325280278e-07, - "loss": 0.8932, - "step": 7374 - }, - { - "epoch": 0.8867913184632958, - "grad_norm": 1.8019148099155502, - "learning_rate": 1.3282984188055625e-07, - "loss": 1.0137, - "step": 7375 - }, - { - "epoch": 0.8869115613539349, - "grad_norm": 2.165867462456833, - "learning_rate": 1.3255083448131288e-07, - "loss": 1.0203, - "step": 7376 - }, - { - "epoch": 0.8870318042445741, - "grad_norm": 2.9658339785555246, - "learning_rate": 1.3227211037262365e-07, - "loss": 1.0172, - "step": 7377 - }, - { - "epoch": 0.8871520471352131, - "grad_norm": 2.948457612098949, - "learning_rate": 1.319936695967696e-07, - "loss": 1.0857, - "step": 7378 - }, - { - "epoch": 0.8872722900258522, - "grad_norm": 2.0285116664672285, - "learning_rate": 1.3171551219599097e-07, - "loss": 1.0511, - "step": 7379 - }, - { - "epoch": 0.8873925329164913, - "grad_norm": 3.2561026278075054, - "learning_rate": 1.3143763821248377e-07, - "loss": 0.9979, - "step": 7380 - }, - { - "epoch": 0.8875127758071304, - "grad_norm": 1.6823735251652292, - "learning_rate": 1.3116004768840118e-07, - "loss": 0.9484, - "step": 7381 - }, - { - "epoch": 0.8876330186977694, - "grad_norm": 1.8568396713814752, - "learning_rate": 1.3088274066585348e-07, - "loss": 0.967, - "step": 7382 - }, - { - "epoch": 0.8877532615884086, - "grad_norm": 1.953025510161366, - "learning_rate": 1.3060571718690749e-07, - "loss": 1.1354, - "step": 7383 - }, - { - "epoch": 0.8878735044790477, - "grad_norm": 0.7659007373891434, - "learning_rate": 1.3032897729358805e-07, - "loss": 0.8301, - "step": 7384 - }, - { - "epoch": 0.8879937473696867, - "grad_norm": 2.1071830380836754, - "learning_rate": 1.3005252102787645e-07, - "loss": 1.0255, - "step": 7385 - }, - { - "epoch": 0.8881139902603259, - "grad_norm": 1.506723198223975, - "learning_rate": 1.297763484317105e-07, - "loss": 0.9643, - "step": 7386 - }, - { - "epoch": 0.888234233150965, - "grad_norm": 2.2330793345426785, - "learning_rate": 1.2950045954698551e-07, - "loss": 0.9305, - "step": 7387 - }, - { - "epoch": 0.888354476041604, - "grad_norm": 1.7681771796500638, - "learning_rate": 1.2922485441555343e-07, - "loss": 0.984, - "step": 7388 - }, - { - "epoch": 0.8884747189322432, - "grad_norm": 2.0471890041233407, - "learning_rate": 1.2894953307922363e-07, - "loss": 1.0466, - "step": 7389 - }, - { - "epoch": 0.8885949618228822, - "grad_norm": 2.0382480675659975, - "learning_rate": 1.2867449557976208e-07, - "loss": 1.071, - "step": 7390 - }, - { - "epoch": 0.8887152047135213, - "grad_norm": 1.9469755486597302, - "learning_rate": 1.283997419588916e-07, - "loss": 0.9888, - "step": 7391 - }, - { - "epoch": 0.8888354476041604, - "grad_norm": 1.8420841644269128, - "learning_rate": 1.2812527225829216e-07, - "loss": 0.8456, - "step": 7392 - }, - { - "epoch": 0.8889556904947995, - "grad_norm": 1.944048122100814, - "learning_rate": 1.2785108651960052e-07, - "loss": 0.9884, - "step": 7393 - }, - { - "epoch": 0.8890759333854386, - "grad_norm": 1.9018762158889218, - "learning_rate": 1.2757718478441094e-07, - "loss": 1.0225, - "step": 7394 - }, - { - "epoch": 0.8891961762760777, - "grad_norm": 1.8727476579098716, - "learning_rate": 1.2730356709427302e-07, - "loss": 1.0017, - "step": 7395 - }, - { - "epoch": 0.8893164191667168, - "grad_norm": 1.4402862580868308, - "learning_rate": 1.2703023349069542e-07, - "loss": 0.8255, - "step": 7396 - }, - { - "epoch": 0.8894366620573558, - "grad_norm": 2.01759419614848, - "learning_rate": 1.2675718401514223e-07, - "loss": 0.845, - "step": 7397 - }, - { - "epoch": 0.889556904947995, - "grad_norm": 1.837182714355797, - "learning_rate": 1.264844187090346e-07, - "loss": 0.9724, - "step": 7398 - }, - { - "epoch": 0.889677147838634, - "grad_norm": 1.8194110259011442, - "learning_rate": 1.262119376137516e-07, - "loss": 0.9818, - "step": 7399 - }, - { - "epoch": 0.8897973907292731, - "grad_norm": 1.7720391226081418, - "learning_rate": 1.2593974077062707e-07, - "loss": 1.0759, - "step": 7400 - }, - { - "epoch": 0.8899176336199123, - "grad_norm": 1.5232390445657729, - "learning_rate": 1.2566782822095423e-07, - "loss": 0.8568, - "step": 7401 - }, - { - "epoch": 0.8900378765105513, - "grad_norm": 1.8994017612872875, - "learning_rate": 1.2539620000598162e-07, - "loss": 0.9475, - "step": 7402 - }, - { - "epoch": 0.8901581194011904, - "grad_norm": 1.7264887591188567, - "learning_rate": 1.2512485616691492e-07, - "loss": 1.0245, - "step": 7403 - }, - { - "epoch": 0.8902783622918296, - "grad_norm": 1.4325422957802045, - "learning_rate": 1.2485379674491681e-07, - "loss": 1.036, - "step": 7404 - }, - { - "epoch": 0.8903986051824686, - "grad_norm": 3.816278680018273, - "learning_rate": 1.2458302178110657e-07, - "loss": 1.0172, - "step": 7405 - }, - { - "epoch": 0.8905188480731077, - "grad_norm": 1.7883097968399875, - "learning_rate": 1.2431253131656118e-07, - "loss": 1.0513, - "step": 7406 - }, - { - "epoch": 0.8906390909637467, - "grad_norm": 1.7301851819545158, - "learning_rate": 1.240423253923133e-07, - "loss": 0.9896, - "step": 7407 - }, - { - "epoch": 0.8907593338543859, - "grad_norm": 2.04485961469511, - "learning_rate": 1.237724040493533e-07, - "loss": 0.9159, - "step": 7408 - }, - { - "epoch": 0.8908795767450249, - "grad_norm": 3.5223353114727387, - "learning_rate": 1.2350276732862773e-07, - "loss": 0.9594, - "step": 7409 - }, - { - "epoch": 0.890999819635664, - "grad_norm": 0.8656740126522732, - "learning_rate": 1.2323341527103993e-07, - "loss": 0.8503, - "step": 7410 - }, - { - "epoch": 0.8911200625263032, - "grad_norm": 2.2530640825351074, - "learning_rate": 1.2296434791745135e-07, - "loss": 1.072, - "step": 7411 - }, - { - "epoch": 0.8912403054169422, - "grad_norm": 1.807457915326628, - "learning_rate": 1.2269556530867875e-07, - "loss": 1.0004, - "step": 7412 - }, - { - "epoch": 0.8913605483075813, - "grad_norm": 2.7532789449269175, - "learning_rate": 1.2242706748549614e-07, - "loss": 1.0498, - "step": 7413 - }, - { - "epoch": 0.8914807911982204, - "grad_norm": 1.7358592100712924, - "learning_rate": 1.2215885448863473e-07, - "loss": 1.0504, - "step": 7414 - }, - { - "epoch": 0.8916010340888595, - "grad_norm": 1.8889815067821834, - "learning_rate": 1.2189092635878152e-07, - "loss": 1.0307, - "step": 7415 - }, - { - "epoch": 0.8917212769794985, - "grad_norm": 1.7060050061857912, - "learning_rate": 1.216232831365822e-07, - "loss": 1.0004, - "step": 7416 - }, - { - "epoch": 0.8918415198701377, - "grad_norm": 1.841837909844109, - "learning_rate": 1.2135592486263678e-07, - "loss": 1.039, - "step": 7417 - }, - { - "epoch": 0.8919617627607768, - "grad_norm": 1.5984215430752506, - "learning_rate": 1.2108885157750415e-07, - "loss": 0.8421, - "step": 7418 - }, - { - "epoch": 0.8920820056514158, - "grad_norm": 1.775636225835755, - "learning_rate": 1.2082206332169897e-07, - "loss": 1.03, - "step": 7419 - }, - { - "epoch": 0.892202248542055, - "grad_norm": 2.4717389133189682, - "learning_rate": 1.2055556013569225e-07, - "loss": 0.9629, - "step": 7420 - }, - { - "epoch": 0.892322491432694, - "grad_norm": 1.4590480233930825, - "learning_rate": 1.2028934205991315e-07, - "loss": 1.0427, - "step": 7421 - }, - { - "epoch": 0.8924427343233331, - "grad_norm": 1.70773224218867, - "learning_rate": 1.2002340913474607e-07, - "loss": 0.9953, - "step": 7422 - }, - { - "epoch": 0.8925629772139723, - "grad_norm": 1.960414497571577, - "learning_rate": 1.1975776140053317e-07, - "loss": 0.9715, - "step": 7423 - }, - { - "epoch": 0.8926832201046113, - "grad_norm": 1.8445134279584927, - "learning_rate": 1.194923988975729e-07, - "loss": 0.9659, - "step": 7424 - }, - { - "epoch": 0.8928034629952504, - "grad_norm": 2.5355381376767983, - "learning_rate": 1.192273216661206e-07, - "loss": 0.9659, - "step": 7425 - }, - { - "epoch": 0.8929237058858895, - "grad_norm": 0.7610084939224483, - "learning_rate": 1.189625297463881e-07, - "loss": 0.8409, - "step": 7426 - }, - { - "epoch": 0.8930439487765286, - "grad_norm": 1.5672367776595006, - "learning_rate": 1.1869802317854394e-07, - "loss": 1.0231, - "step": 7427 - }, - { - "epoch": 0.8931641916671677, - "grad_norm": 1.7173284026799787, - "learning_rate": 1.1843380200271425e-07, - "loss": 0.954, - "step": 7428 - }, - { - "epoch": 0.8932844345578068, - "grad_norm": 1.9087440559579445, - "learning_rate": 1.181698662589805e-07, - "loss": 1.0344, - "step": 7429 - }, - { - "epoch": 0.8934046774484459, - "grad_norm": 1.76511720089918, - "learning_rate": 1.1790621598738249e-07, - "loss": 0.9864, - "step": 7430 - }, - { - "epoch": 0.8935249203390849, - "grad_norm": 1.8810645255987213, - "learning_rate": 1.1764285122791461e-07, - "loss": 0.9777, - "step": 7431 - }, - { - "epoch": 0.8936451632297241, - "grad_norm": 3.7093754422091934, - "learning_rate": 1.173797720205294e-07, - "loss": 1.001, - "step": 7432 - }, - { - "epoch": 0.8937654061203631, - "grad_norm": 3.619751122702722, - "learning_rate": 1.1711697840513602e-07, - "loss": 0.9501, - "step": 7433 - }, - { - "epoch": 0.8938856490110022, - "grad_norm": 2.098897044862287, - "learning_rate": 1.1685447042160012e-07, - "loss": 0.933, - "step": 7434 - }, - { - "epoch": 0.8940058919016414, - "grad_norm": 1.435356615394775, - "learning_rate": 1.1659224810974367e-07, - "loss": 0.9374, - "step": 7435 - }, - { - "epoch": 0.8941261347922804, - "grad_norm": 2.901585599266057, - "learning_rate": 1.1633031150934591e-07, - "loss": 0.9138, - "step": 7436 - }, - { - "epoch": 0.8942463776829195, - "grad_norm": 2.4038456385685296, - "learning_rate": 1.1606866066014176e-07, - "loss": 1.0269, - "step": 7437 - }, - { - "epoch": 0.8943666205735585, - "grad_norm": 2.646606352412717, - "learning_rate": 1.1580729560182434e-07, - "loss": 0.9767, - "step": 7438 - }, - { - "epoch": 0.8944868634641977, - "grad_norm": 1.838775065216857, - "learning_rate": 1.1554621637404171e-07, - "loss": 0.9467, - "step": 7439 - }, - { - "epoch": 0.8946071063548368, - "grad_norm": 5.148483623477482, - "learning_rate": 1.1528542301639999e-07, - "loss": 0.8346, - "step": 7440 - }, - { - "epoch": 0.8947273492454758, - "grad_norm": 2.175873513037642, - "learning_rate": 1.1502491556846105e-07, - "loss": 1.049, - "step": 7441 - }, - { - "epoch": 0.894847592136115, - "grad_norm": 2.754466158174451, - "learning_rate": 1.1476469406974331e-07, - "loss": 1.0414, - "step": 7442 - }, - { - "epoch": 0.894967835026754, - "grad_norm": 2.4733505841019467, - "learning_rate": 1.1450475855972341e-07, - "loss": 0.9995, - "step": 7443 - }, - { - "epoch": 0.8950880779173931, - "grad_norm": 2.059719291931295, - "learning_rate": 1.1424510907783158e-07, - "loss": 0.9348, - "step": 7444 - }, - { - "epoch": 0.8952083208080323, - "grad_norm": 1.6843749886762234, - "learning_rate": 1.1398574566345787e-07, - "loss": 1.0522, - "step": 7445 - }, - { - "epoch": 0.8953285636986713, - "grad_norm": 2.3621692405748926, - "learning_rate": 1.1372666835594702e-07, - "loss": 1.0609, - "step": 7446 - }, - { - "epoch": 0.8954488065893104, - "grad_norm": 2.0110076536377233, - "learning_rate": 1.1346787719460071e-07, - "loss": 0.9451, - "step": 7447 - }, - { - "epoch": 0.8955690494799495, - "grad_norm": 2.113686908703004, - "learning_rate": 1.1320937221867732e-07, - "loss": 0.9566, - "step": 7448 - }, - { - "epoch": 0.8956892923705886, - "grad_norm": 1.911833668237408, - "learning_rate": 1.1295115346739192e-07, - "loss": 1.0211, - "step": 7449 - }, - { - "epoch": 0.8958095352612276, - "grad_norm": 4.39053898520061, - "learning_rate": 1.1269322097991629e-07, - "loss": 0.9636, - "step": 7450 - }, - { - "epoch": 0.8959297781518668, - "grad_norm": 2.0280959069938205, - "learning_rate": 1.1243557479537846e-07, - "loss": 0.9114, - "step": 7451 - }, - { - "epoch": 0.8960500210425059, - "grad_norm": 2.6726365816562905, - "learning_rate": 1.121782149528634e-07, - "loss": 0.9207, - "step": 7452 - }, - { - "epoch": 0.8961702639331449, - "grad_norm": 2.7248978656912053, - "learning_rate": 1.1192114149141208e-07, - "loss": 1.0224, - "step": 7453 - }, - { - "epoch": 0.8962905068237841, - "grad_norm": 2.31804024252136, - "learning_rate": 1.1166435445002197e-07, - "loss": 0.88, - "step": 7454 - }, - { - "epoch": 0.8964107497144231, - "grad_norm": 2.0437604198131862, - "learning_rate": 1.1140785386764818e-07, - "loss": 0.9141, - "step": 7455 - }, - { - "epoch": 0.8965309926050622, - "grad_norm": 2.126712445670219, - "learning_rate": 1.1115163978320153e-07, - "loss": 0.9242, - "step": 7456 - }, - { - "epoch": 0.8966512354957014, - "grad_norm": 1.7745832195701423, - "learning_rate": 1.1089571223554917e-07, - "loss": 1.0552, - "step": 7457 - }, - { - "epoch": 0.8967714783863404, - "grad_norm": 3.1268806901842248, - "learning_rate": 1.1064007126351537e-07, - "loss": 1.0678, - "step": 7458 - }, - { - "epoch": 0.8968917212769795, - "grad_norm": 2.081862680793257, - "learning_rate": 1.1038471690588003e-07, - "loss": 0.9925, - "step": 7459 - }, - { - "epoch": 0.8970119641676186, - "grad_norm": 1.742571117093725, - "learning_rate": 1.1012964920138145e-07, - "loss": 1.0247, - "step": 7460 - }, - { - "epoch": 0.8971322070582577, - "grad_norm": 1.594016856871418, - "learning_rate": 1.0987486818871205e-07, - "loss": 0.9845, - "step": 7461 - }, - { - "epoch": 0.8972524499488967, - "grad_norm": 2.5367004675833424, - "learning_rate": 1.0962037390652245e-07, - "loss": 0.9539, - "step": 7462 - }, - { - "epoch": 0.8973726928395359, - "grad_norm": 1.6832415927148396, - "learning_rate": 1.0936616639341911e-07, - "loss": 0.9509, - "step": 7463 - }, - { - "epoch": 0.897492935730175, - "grad_norm": 0.8338549332592355, - "learning_rate": 1.0911224568796473e-07, - "loss": 0.7966, - "step": 7464 - }, - { - "epoch": 0.897613178620814, - "grad_norm": 1.7755359216707125, - "learning_rate": 1.0885861182867984e-07, - "loss": 0.9344, - "step": 7465 - }, - { - "epoch": 0.8977334215114532, - "grad_norm": 1.8321782360203804, - "learning_rate": 1.0860526485403942e-07, - "loss": 0.9369, - "step": 7466 - }, - { - "epoch": 0.8978536644020922, - "grad_norm": 1.7966511462549182, - "learning_rate": 1.0835220480247675e-07, - "loss": 1.0062, - "step": 7467 - }, - { - "epoch": 0.8979739072927313, - "grad_norm": 2.180737642378485, - "learning_rate": 1.0809943171238067e-07, - "loss": 1.069, - "step": 7468 - }, - { - "epoch": 0.8980941501833704, - "grad_norm": 1.9767195446967762, - "learning_rate": 1.078469456220965e-07, - "loss": 0.8672, - "step": 7469 - }, - { - "epoch": 0.8982143930740095, - "grad_norm": 2.2390500757121603, - "learning_rate": 1.0759474656992606e-07, - "loss": 0.9201, - "step": 7470 - }, - { - "epoch": 0.8983346359646486, - "grad_norm": 3.625081554679311, - "learning_rate": 1.0734283459412785e-07, - "loss": 1.0071, - "step": 7471 - }, - { - "epoch": 0.8984548788552876, - "grad_norm": 1.6027427232428662, - "learning_rate": 1.0709120973291707e-07, - "loss": 1.0325, - "step": 7472 - }, - { - "epoch": 0.8985751217459268, - "grad_norm": 2.1200671442663106, - "learning_rate": 1.0683987202446475e-07, - "loss": 1.0081, - "step": 7473 - }, - { - "epoch": 0.8986953646365659, - "grad_norm": 2.74252760644613, - "learning_rate": 1.0658882150689862e-07, - "loss": 0.9352, - "step": 7474 - }, - { - "epoch": 0.8988156075272049, - "grad_norm": 2.5030940936177184, - "learning_rate": 1.0633805821830288e-07, - "loss": 1.0111, - "step": 7475 - }, - { - "epoch": 0.8989358504178441, - "grad_norm": 2.2966980119881377, - "learning_rate": 1.0608758219671753e-07, - "loss": 1.058, - "step": 7476 - }, - { - "epoch": 0.8990560933084831, - "grad_norm": 2.1623283626820147, - "learning_rate": 1.0583739348014065e-07, - "loss": 0.9351, - "step": 7477 - }, - { - "epoch": 0.8991763361991222, - "grad_norm": 1.7168600615416354, - "learning_rate": 1.0558749210652518e-07, - "loss": 1.0751, - "step": 7478 - }, - { - "epoch": 0.8992965790897613, - "grad_norm": 1.6241067485348974, - "learning_rate": 1.053378781137808e-07, - "loss": 1.0844, - "step": 7479 - }, - { - "epoch": 0.8994168219804004, - "grad_norm": 1.77498233410278, - "learning_rate": 1.0508855153977392e-07, - "loss": 1.0018, - "step": 7480 - }, - { - "epoch": 0.8995370648710395, - "grad_norm": 2.298223190637579, - "learning_rate": 1.0483951242232669e-07, - "loss": 0.8955, - "step": 7481 - }, - { - "epoch": 0.8996573077616786, - "grad_norm": 1.2215765917949135, - "learning_rate": 1.0459076079921936e-07, - "loss": 0.8393, - "step": 7482 - }, - { - "epoch": 0.8997775506523177, - "grad_norm": 2.396560978617268, - "learning_rate": 1.0434229670818618e-07, - "loss": 1.069, - "step": 7483 - }, - { - "epoch": 0.8998977935429567, - "grad_norm": 1.7125636346507744, - "learning_rate": 1.0409412018691944e-07, - "loss": 1.0281, - "step": 7484 - }, - { - "epoch": 0.9000180364335959, - "grad_norm": 1.7375028431821107, - "learning_rate": 1.0384623127306724e-07, - "loss": 0.9858, - "step": 7485 - }, - { - "epoch": 0.900138279324235, - "grad_norm": 20.756051893015368, - "learning_rate": 1.0359863000423397e-07, - "loss": 1.0158, - "step": 7486 - }, - { - "epoch": 0.900258522214874, - "grad_norm": 1.5692127229262771, - "learning_rate": 1.0335131641798112e-07, - "loss": 0.9426, - "step": 7487 - }, - { - "epoch": 0.9003787651055132, - "grad_norm": 0.8578409548071874, - "learning_rate": 1.0310429055182512e-07, - "loss": 0.8585, - "step": 7488 - }, - { - "epoch": 0.9004990079961522, - "grad_norm": 2.044389453208258, - "learning_rate": 1.0285755244324024e-07, - "loss": 0.9611, - "step": 7489 - }, - { - "epoch": 0.9006192508867913, - "grad_norm": 1.8747265934289894, - "learning_rate": 1.0261110212965629e-07, - "loss": 0.9178, - "step": 7490 - }, - { - "epoch": 0.9007394937774305, - "grad_norm": 2.03665568638582, - "learning_rate": 1.023649396484596e-07, - "loss": 1.0243, - "step": 7491 - }, - { - "epoch": 0.9008597366680695, - "grad_norm": 2.2680672997677194, - "learning_rate": 1.0211906503699275e-07, - "loss": 0.9042, - "step": 7492 - }, - { - "epoch": 0.9009799795587086, - "grad_norm": 2.066203520264182, - "learning_rate": 1.0187347833255455e-07, - "loss": 1.0499, - "step": 7493 - }, - { - "epoch": 0.9011002224493477, - "grad_norm": 1.7271864786243405, - "learning_rate": 1.0162817957240056e-07, - "loss": 1.0264, - "step": 7494 - }, - { - "epoch": 0.9012204653399868, - "grad_norm": 0.9716100619880681, - "learning_rate": 1.0138316879374253e-07, - "loss": 0.9146, - "step": 7495 - }, - { - "epoch": 0.9013407082306258, - "grad_norm": 3.8109049978273952, - "learning_rate": 1.0113844603374833e-07, - "loss": 0.9707, - "step": 7496 - }, - { - "epoch": 0.901460951121265, - "grad_norm": 2.3945000850273623, - "learning_rate": 1.0089401132954178e-07, - "loss": 0.9427, - "step": 7497 - }, - { - "epoch": 0.9015811940119041, - "grad_norm": 1.6486364307867756, - "learning_rate": 1.006498647182037e-07, - "loss": 0.9545, - "step": 7498 - }, - { - "epoch": 0.9017014369025431, - "grad_norm": 2.956555669680089, - "learning_rate": 1.004060062367713e-07, - "loss": 0.9509, - "step": 7499 - }, - { - "epoch": 0.9018216797931822, - "grad_norm": 1.7431063431445433, - "learning_rate": 1.0016243592223728e-07, - "loss": 0.9229, - "step": 7500 - }, - { - "epoch": 0.9019419226838213, - "grad_norm": 1.6364351085987312, - "learning_rate": 9.991915381155114e-08, - "loss": 0.877, - "step": 7501 - }, - { - "epoch": 0.9020621655744604, - "grad_norm": 2.076637481414975, - "learning_rate": 9.967615994161871e-08, - "loss": 0.9855, - "step": 7502 - }, - { - "epoch": 0.9021824084650995, - "grad_norm": 2.5550999672998134, - "learning_rate": 9.943345434930161e-08, - "loss": 1.0052, - "step": 7503 - }, - { - "epoch": 0.9023026513557386, - "grad_norm": 2.169774089076034, - "learning_rate": 9.919103707141885e-08, - "loss": 0.9205, - "step": 7504 - }, - { - "epoch": 0.9024228942463777, - "grad_norm": 2.034881514677962, - "learning_rate": 9.89489081447441e-08, - "loss": 1.0035, - "step": 7505 - }, - { - "epoch": 0.9025431371370167, - "grad_norm": 1.843739590372991, - "learning_rate": 9.870706760600844e-08, - "loss": 1.0598, - "step": 7506 - }, - { - "epoch": 0.9026633800276559, - "grad_norm": 1.8234933722009228, - "learning_rate": 9.846551549189918e-08, - "loss": 0.9594, - "step": 7507 - }, - { - "epoch": 0.902783622918295, - "grad_norm": 2.627650602980772, - "learning_rate": 9.822425183905902e-08, - "loss": 0.9175, - "step": 7508 - }, - { - "epoch": 0.902903865808934, - "grad_norm": 0.926979994997294, - "learning_rate": 9.798327668408823e-08, - "loss": 1.0035, - "step": 7509 - }, - { - "epoch": 0.9030241086995732, - "grad_norm": 1.8239677959075409, - "learning_rate": 9.774259006354158e-08, - "loss": 0.9207, - "step": 7510 - }, - { - "epoch": 0.9031443515902122, - "grad_norm": 2.340150128463723, - "learning_rate": 9.750219201393184e-08, - "loss": 0.9839, - "step": 7511 - }, - { - "epoch": 0.9032645944808513, - "grad_norm": 2.2253815313109575, - "learning_rate": 9.726208257172697e-08, - "loss": 1.0087, - "step": 7512 - }, - { - "epoch": 0.9033848373714904, - "grad_norm": 2.118538063274581, - "learning_rate": 9.702226177335115e-08, - "loss": 0.9774, - "step": 7513 - }, - { - "epoch": 0.9035050802621295, - "grad_norm": 1.495745904788405, - "learning_rate": 9.67827296551853e-08, - "loss": 0.9541, - "step": 7514 - }, - { - "epoch": 0.9036253231527686, - "grad_norm": 2.9773532888299212, - "learning_rate": 9.65434862535659e-08, - "loss": 0.905, - "step": 7515 - }, - { - "epoch": 0.9037455660434077, - "grad_norm": 2.49510734541234, - "learning_rate": 9.630453160478635e-08, - "loss": 0.8816, - "step": 7516 - }, - { - "epoch": 0.9038658089340468, - "grad_norm": 1.5913139874582571, - "learning_rate": 9.60658657450959e-08, - "loss": 1.0447, - "step": 7517 - }, - { - "epoch": 0.9039860518246858, - "grad_norm": 1.6792647428559682, - "learning_rate": 9.582748871069979e-08, - "loss": 1.0219, - "step": 7518 - }, - { - "epoch": 0.904106294715325, - "grad_norm": 1.8254646441364422, - "learning_rate": 9.558940053775954e-08, - "loss": 1.0509, - "step": 7519 - }, - { - "epoch": 0.904226537605964, - "grad_norm": 2.165060205833836, - "learning_rate": 9.535160126239294e-08, - "loss": 0.9156, - "step": 7520 - }, - { - "epoch": 0.9043467804966031, - "grad_norm": 1.4921173558550276, - "learning_rate": 9.511409092067424e-08, - "loss": 0.9385, - "step": 7521 - }, - { - "epoch": 0.9044670233872423, - "grad_norm": 2.157873454741887, - "learning_rate": 9.487686954863327e-08, - "loss": 0.9033, - "step": 7522 - }, - { - "epoch": 0.9045872662778813, - "grad_norm": 11.348569240380522, - "learning_rate": 9.46399371822566e-08, - "loss": 0.9959, - "step": 7523 - }, - { - "epoch": 0.9047075091685204, - "grad_norm": 2.0095288254871755, - "learning_rate": 9.440329385748657e-08, - "loss": 0.9451, - "step": 7524 - }, - { - "epoch": 0.9048277520591596, - "grad_norm": 1.7189820566444765, - "learning_rate": 9.416693961022137e-08, - "loss": 0.9382, - "step": 7525 - }, - { - "epoch": 0.9049479949497986, - "grad_norm": 1.8287980886461044, - "learning_rate": 9.393087447631654e-08, - "loss": 1.0015, - "step": 7526 - }, - { - "epoch": 0.9050682378404377, - "grad_norm": 2.1784516799677576, - "learning_rate": 9.36950984915823e-08, - "loss": 0.9516, - "step": 7527 - }, - { - "epoch": 0.9051884807310768, - "grad_norm": 4.700965563023351, - "learning_rate": 9.345961169178607e-08, - "loss": 0.9238, - "step": 7528 - }, - { - "epoch": 0.9053087236217159, - "grad_norm": 1.2833380083931984, - "learning_rate": 9.322441411265081e-08, - "loss": 0.9559, - "step": 7529 - }, - { - "epoch": 0.9054289665123549, - "grad_norm": 1.7461061754573972, - "learning_rate": 9.298950578985554e-08, - "loss": 0.9629, - "step": 7530 - }, - { - "epoch": 0.905549209402994, - "grad_norm": 16.940646509221263, - "learning_rate": 9.275488675903665e-08, - "loss": 0.9409, - "step": 7531 - }, - { - "epoch": 0.9056694522936332, - "grad_norm": 1.785449381661748, - "learning_rate": 9.252055705578454e-08, - "loss": 0.9619, - "step": 7532 - }, - { - "epoch": 0.9057896951842722, - "grad_norm": 1.721513433970724, - "learning_rate": 9.228651671564747e-08, - "loss": 0.9434, - "step": 7533 - }, - { - "epoch": 0.9059099380749113, - "grad_norm": 1.4958907903547274, - "learning_rate": 9.205276577412901e-08, - "loss": 1.0111, - "step": 7534 - }, - { - "epoch": 0.9060301809655504, - "grad_norm": 3.0487869914846133, - "learning_rate": 9.181930426668905e-08, - "loss": 0.9959, - "step": 7535 - }, - { - "epoch": 0.9061504238561895, - "grad_norm": 1.540229238936442, - "learning_rate": 9.158613222874346e-08, - "loss": 0.9122, - "step": 7536 - }, - { - "epoch": 0.9062706667468285, - "grad_norm": 1.8305218625798858, - "learning_rate": 9.135324969566394e-08, - "loss": 1.0506, - "step": 7537 - }, - { - "epoch": 0.9063909096374677, - "grad_norm": 1.816071969723584, - "learning_rate": 9.112065670277913e-08, - "loss": 0.9836, - "step": 7538 - }, - { - "epoch": 0.9065111525281068, - "grad_norm": 1.9621194263939739, - "learning_rate": 9.088835328537303e-08, - "loss": 0.9514, - "step": 7539 - }, - { - "epoch": 0.9066313954187458, - "grad_norm": 2.1421973029982895, - "learning_rate": 9.065633947868568e-08, - "loss": 0.9432, - "step": 7540 - }, - { - "epoch": 0.906751638309385, - "grad_norm": 2.3220424175873076, - "learning_rate": 9.042461531791379e-08, - "loss": 1.0249, - "step": 7541 - }, - { - "epoch": 0.906871881200024, - "grad_norm": 4.939401960428466, - "learning_rate": 9.019318083820903e-08, - "loss": 1.004, - "step": 7542 - }, - { - "epoch": 0.9069921240906631, - "grad_norm": 1.850249070077494, - "learning_rate": 8.996203607468045e-08, - "loss": 1.0778, - "step": 7543 - }, - { - "epoch": 0.9071123669813023, - "grad_norm": 1.4983861506696312, - "learning_rate": 8.973118106239241e-08, - "loss": 0.9858, - "step": 7544 - }, - { - "epoch": 0.9072326098719413, - "grad_norm": 2.9770278366678755, - "learning_rate": 8.95006158363656e-08, - "loss": 1.1761, - "step": 7545 - }, - { - "epoch": 0.9073528527625804, - "grad_norm": 1.8922467802859229, - "learning_rate": 8.9270340431576e-08, - "loss": 1.0049, - "step": 7546 - }, - { - "epoch": 0.9074730956532195, - "grad_norm": 3.3331194014245624, - "learning_rate": 8.904035488295658e-08, - "loss": 0.9622, - "step": 7547 - }, - { - "epoch": 0.9075933385438586, - "grad_norm": 0.7021606401807668, - "learning_rate": 8.881065922539632e-08, - "loss": 0.7957, - "step": 7548 - }, - { - "epoch": 0.9077135814344977, - "grad_norm": 1.7537915164658366, - "learning_rate": 8.85812534937389e-08, - "loss": 0.9711, - "step": 7549 - }, - { - "epoch": 0.9078338243251368, - "grad_norm": 4.5299390277590765, - "learning_rate": 8.835213772278583e-08, - "loss": 0.8889, - "step": 7550 - }, - { - "epoch": 0.9079540672157759, - "grad_norm": 1.850021944689116, - "learning_rate": 8.812331194729373e-08, - "loss": 1.0149, - "step": 7551 - }, - { - "epoch": 0.9080743101064149, - "grad_norm": 3.8590307661353895, - "learning_rate": 8.789477620197461e-08, - "loss": 0.9531, - "step": 7552 - }, - { - "epoch": 0.9081945529970541, - "grad_norm": 2.2460438598399426, - "learning_rate": 8.766653052149831e-08, - "loss": 1.0249, - "step": 7553 - }, - { - "epoch": 0.9083147958876931, - "grad_norm": 1.9616767128314705, - "learning_rate": 8.743857494048823e-08, - "loss": 0.9722, - "step": 7554 - }, - { - "epoch": 0.9084350387783322, - "grad_norm": 2.1659735769278687, - "learning_rate": 8.721090949352605e-08, - "loss": 0.8656, - "step": 7555 - }, - { - "epoch": 0.9085552816689714, - "grad_norm": 1.8910209304240284, - "learning_rate": 8.698353421514793e-08, - "loss": 0.9555, - "step": 7556 - }, - { - "epoch": 0.9086755245596104, - "grad_norm": 2.729171684978812, - "learning_rate": 8.67564491398467e-08, - "loss": 1.0265, - "step": 7557 - }, - { - "epoch": 0.9087957674502495, - "grad_norm": 1.7566498026439499, - "learning_rate": 8.652965430207104e-08, - "loss": 0.9682, - "step": 7558 - }, - { - "epoch": 0.9089160103408886, - "grad_norm": 2.013160559249285, - "learning_rate": 8.630314973622521e-08, - "loss": 0.8862, - "step": 7559 - }, - { - "epoch": 0.9090362532315277, - "grad_norm": 1.9054519984160772, - "learning_rate": 8.607693547666995e-08, - "loss": 0.9402, - "step": 7560 - }, - { - "epoch": 0.9091564961221668, - "grad_norm": 0.934379598038501, - "learning_rate": 8.585101155772201e-08, - "loss": 0.8576, - "step": 7561 - }, - { - "epoch": 0.9092767390128058, - "grad_norm": 1.7618306667870771, - "learning_rate": 8.562537801365377e-08, - "loss": 0.9195, - "step": 7562 - }, - { - "epoch": 0.909396981903445, - "grad_norm": 1.6835428393957454, - "learning_rate": 8.540003487869362e-08, - "loss": 0.9239, - "step": 7563 - }, - { - "epoch": 0.909517224794084, - "grad_norm": 2.0841033529766877, - "learning_rate": 8.517498218702557e-08, - "loss": 1.0277, - "step": 7564 - }, - { - "epoch": 0.9096374676847231, - "grad_norm": 1.778970832975869, - "learning_rate": 8.49502199727905e-08, - "loss": 0.9295, - "step": 7565 - }, - { - "epoch": 0.9097577105753623, - "grad_norm": 2.3112037737418447, - "learning_rate": 8.472574827008428e-08, - "loss": 0.8831, - "step": 7566 - }, - { - "epoch": 0.9098779534660013, - "grad_norm": 1.6498255377348716, - "learning_rate": 8.450156711295942e-08, - "loss": 1.064, - "step": 7567 - }, - { - "epoch": 0.9099981963566404, - "grad_norm": 2.1031954069099754, - "learning_rate": 8.427767653542383e-08, - "loss": 1.0972, - "step": 7568 - }, - { - "epoch": 0.9101184392472795, - "grad_norm": 2.036038901086686, - "learning_rate": 8.405407657144125e-08, - "loss": 0.9309, - "step": 7569 - }, - { - "epoch": 0.9102386821379186, - "grad_norm": 2.474651188786927, - "learning_rate": 8.383076725493232e-08, - "loss": 0.951, - "step": 7570 - }, - { - "epoch": 0.9103589250285576, - "grad_norm": 1.9539175027906863, - "learning_rate": 8.360774861977216e-08, - "loss": 0.9104, - "step": 7571 - }, - { - "epoch": 0.9104791679191968, - "grad_norm": 1.6961826605140053, - "learning_rate": 8.338502069979281e-08, - "loss": 0.98, - "step": 7572 - }, - { - "epoch": 0.9105994108098359, - "grad_norm": 6.5398575858846, - "learning_rate": 8.316258352878214e-08, - "loss": 1.0264, - "step": 7573 - }, - { - "epoch": 0.9107196537004749, - "grad_norm": 1.8654473787287023, - "learning_rate": 8.294043714048338e-08, - "loss": 0.9385, - "step": 7574 - }, - { - "epoch": 0.9108398965911141, - "grad_norm": 0.7869120840434533, - "learning_rate": 8.271858156859624e-08, - "loss": 0.8761, - "step": 7575 - }, - { - "epoch": 0.9109601394817531, - "grad_norm": 1.5993093132313172, - "learning_rate": 8.249701684677557e-08, - "loss": 0.9635, - "step": 7576 - }, - { - "epoch": 0.9110803823723922, - "grad_norm": 1.893365686279003, - "learning_rate": 8.227574300863294e-08, - "loss": 1.0401, - "step": 7577 - }, - { - "epoch": 0.9112006252630314, - "grad_norm": 2.0027793407507573, - "learning_rate": 8.205476008773548e-08, - "loss": 0.9316, - "step": 7578 - }, - { - "epoch": 0.9113208681536704, - "grad_norm": 2.068955577766177, - "learning_rate": 8.183406811760596e-08, - "loss": 1.0551, - "step": 7579 - }, - { - "epoch": 0.9114411110443095, - "grad_norm": 4.343697746566328, - "learning_rate": 8.161366713172313e-08, - "loss": 0.9721, - "step": 7580 - }, - { - "epoch": 0.9115613539349486, - "grad_norm": 5.53233025126241, - "learning_rate": 8.139355716352137e-08, - "loss": 1.0797, - "step": 7581 - }, - { - "epoch": 0.9116815968255877, - "grad_norm": 1.8180882039679007, - "learning_rate": 8.117373824639196e-08, - "loss": 0.9316, - "step": 7582 - }, - { - "epoch": 0.9118018397162267, - "grad_norm": 0.7358764963317868, - "learning_rate": 8.095421041368067e-08, - "loss": 0.8418, - "step": 7583 - }, - { - "epoch": 0.9119220826068659, - "grad_norm": 3.666508295511506, - "learning_rate": 8.073497369868999e-08, - "loss": 0.9447, - "step": 7584 - }, - { - "epoch": 0.912042325497505, - "grad_norm": 1.538954135545984, - "learning_rate": 8.051602813467772e-08, - "loss": 0.9796, - "step": 7585 - }, - { - "epoch": 0.912162568388144, - "grad_norm": 1.5880421170912054, - "learning_rate": 8.029737375485756e-08, - "loss": 0.9416, - "step": 7586 - }, - { - "epoch": 0.9122828112787832, - "grad_norm": 1.7955586794998095, - "learning_rate": 8.007901059239986e-08, - "loss": 0.9575, - "step": 7587 - }, - { - "epoch": 0.9124030541694222, - "grad_norm": 1.600793421239646, - "learning_rate": 7.986093868042964e-08, - "loss": 1.0325, - "step": 7588 - }, - { - "epoch": 0.9125232970600613, - "grad_norm": 2.8950553965168555, - "learning_rate": 7.964315805202826e-08, - "loss": 0.9071, - "step": 7589 - }, - { - "epoch": 0.9126435399507005, - "grad_norm": 1.8222419434671693, - "learning_rate": 7.942566874023304e-08, - "loss": 0.9639, - "step": 7590 - }, - { - "epoch": 0.9127637828413395, - "grad_norm": 2.885648330220709, - "learning_rate": 7.920847077803649e-08, - "loss": 0.9311, - "step": 7591 - }, - { - "epoch": 0.9128840257319786, - "grad_norm": 2.539649778085713, - "learning_rate": 7.899156419838826e-08, - "loss": 1.0413, - "step": 7592 - }, - { - "epoch": 0.9130042686226177, - "grad_norm": 1.6828731401970571, - "learning_rate": 7.87749490341918e-08, - "loss": 0.8876, - "step": 7593 - }, - { - "epoch": 0.9131245115132568, - "grad_norm": 1.9042087945375028, - "learning_rate": 7.855862531830836e-08, - "loss": 1.0664, - "step": 7594 - }, - { - "epoch": 0.9132447544038959, - "grad_norm": 3.2334081706664137, - "learning_rate": 7.834259308355373e-08, - "loss": 0.9536, - "step": 7595 - }, - { - "epoch": 0.9133649972945349, - "grad_norm": 3.0294952032514324, - "learning_rate": 7.812685236269989e-08, - "loss": 0.9722, - "step": 7596 - }, - { - "epoch": 0.9134852401851741, - "grad_norm": 0.8621669499435958, - "learning_rate": 7.791140318847445e-08, - "loss": 0.8479, - "step": 7597 - }, - { - "epoch": 0.9136054830758131, - "grad_norm": 2.092398591677761, - "learning_rate": 7.769624559356081e-08, - "loss": 1.0235, - "step": 7598 - }, - { - "epoch": 0.9137257259664522, - "grad_norm": 3.518534359329288, - "learning_rate": 7.748137961059842e-08, - "loss": 0.9836, - "step": 7599 - }, - { - "epoch": 0.9138459688570914, - "grad_norm": 2.2531675718309296, - "learning_rate": 7.726680527218211e-08, - "loss": 0.889, - "step": 7600 - }, - { - "epoch": 0.9139662117477304, - "grad_norm": 3.1726608430387215, - "learning_rate": 7.70525226108627e-08, - "loss": 0.9858, - "step": 7601 - }, - { - "epoch": 0.9140864546383695, - "grad_norm": 1.9120777037883965, - "learning_rate": 7.683853165914666e-08, - "loss": 1.0283, - "step": 7602 - }, - { - "epoch": 0.9142066975290086, - "grad_norm": 1.759990648114761, - "learning_rate": 7.662483244949602e-08, - "loss": 1.0047, - "step": 7603 - }, - { - "epoch": 0.9143269404196477, - "grad_norm": 2.5737031874769483, - "learning_rate": 7.641142501432951e-08, - "loss": 1.0334, - "step": 7604 - }, - { - "epoch": 0.9144471833102867, - "grad_norm": 1.758139957021264, - "learning_rate": 7.619830938602013e-08, - "loss": 0.9694, - "step": 7605 - }, - { - "epoch": 0.9145674262009259, - "grad_norm": 2.026602372458867, - "learning_rate": 7.598548559689777e-08, - "loss": 1.0374, - "step": 7606 - }, - { - "epoch": 0.914687669091565, - "grad_norm": 2.4329053171394746, - "learning_rate": 7.577295367924751e-08, - "loss": 1.0379, - "step": 7607 - }, - { - "epoch": 0.914807911982204, - "grad_norm": 1.6456605200840424, - "learning_rate": 7.556071366531002e-08, - "loss": 1.0516, - "step": 7608 - }, - { - "epoch": 0.9149281548728432, - "grad_norm": 2.119718045792517, - "learning_rate": 7.53487655872822e-08, - "loss": 1.0147, - "step": 7609 - }, - { - "epoch": 0.9150483977634822, - "grad_norm": 1.625064481776348, - "learning_rate": 7.513710947731656e-08, - "loss": 0.9747, - "step": 7610 - }, - { - "epoch": 0.9151686406541213, - "grad_norm": 1.911323986130846, - "learning_rate": 7.492574536752095e-08, - "loss": 1.0802, - "step": 7611 - }, - { - "epoch": 0.9152888835447605, - "grad_norm": 1.7473933697913855, - "learning_rate": 7.471467328995907e-08, - "loss": 1.0141, - "step": 7612 - }, - { - "epoch": 0.9154091264353995, - "grad_norm": 2.7605631567514135, - "learning_rate": 7.450389327665018e-08, - "loss": 0.8382, - "step": 7613 - }, - { - "epoch": 0.9155293693260386, - "grad_norm": 2.450272766810851, - "learning_rate": 7.429340535957029e-08, - "loss": 0.9096, - "step": 7614 - }, - { - "epoch": 0.9156496122166777, - "grad_norm": 2.154232242060817, - "learning_rate": 7.40832095706494e-08, - "loss": 0.944, - "step": 7615 - }, - { - "epoch": 0.9157698551073168, - "grad_norm": 1.7775441486439705, - "learning_rate": 7.387330594177443e-08, - "loss": 1.0285, - "step": 7616 - }, - { - "epoch": 0.9158900979979558, - "grad_norm": 1.9280456477178745, - "learning_rate": 7.366369450478749e-08, - "loss": 1.0137, - "step": 7617 - }, - { - "epoch": 0.916010340888595, - "grad_norm": 2.2945394254129017, - "learning_rate": 7.345437529148646e-08, - "loss": 0.8867, - "step": 7618 - }, - { - "epoch": 0.9161305837792341, - "grad_norm": 1.9936509086833556, - "learning_rate": 7.324534833362483e-08, - "loss": 0.966, - "step": 7619 - }, - { - "epoch": 0.9162508266698731, - "grad_norm": 1.8293772915798754, - "learning_rate": 7.303661366291192e-08, - "loss": 0.9116, - "step": 7620 - }, - { - "epoch": 0.9163710695605123, - "grad_norm": 1.6471361564865763, - "learning_rate": 7.28281713110126e-08, - "loss": 1.0473, - "step": 7621 - }, - { - "epoch": 0.9164913124511513, - "grad_norm": 1.7078593534724358, - "learning_rate": 7.262002130954759e-08, - "loss": 1.0088, - "step": 7622 - }, - { - "epoch": 0.9166115553417904, - "grad_norm": 1.6725447552085593, - "learning_rate": 7.241216369009296e-08, - "loss": 1.0169, - "step": 7623 - }, - { - "epoch": 0.9167317982324296, - "grad_norm": 2.0922235646746925, - "learning_rate": 7.220459848418037e-08, - "loss": 0.8976, - "step": 7624 - }, - { - "epoch": 0.9168520411230686, - "grad_norm": 1.7041819381340506, - "learning_rate": 7.199732572329708e-08, - "loss": 1.0226, - "step": 7625 - }, - { - "epoch": 0.9169722840137077, - "grad_norm": 3.6224377225441624, - "learning_rate": 7.179034543888684e-08, - "loss": 0.9962, - "step": 7626 - }, - { - "epoch": 0.9170925269043467, - "grad_norm": 1.856736306346087, - "learning_rate": 7.158365766234808e-08, - "loss": 1.0096, - "step": 7627 - }, - { - "epoch": 0.9172127697949859, - "grad_norm": 2.1735055415063336, - "learning_rate": 7.137726242503527e-08, - "loss": 0.952, - "step": 7628 - }, - { - "epoch": 0.917333012685625, - "grad_norm": 2.465994830612618, - "learning_rate": 7.11711597582585e-08, - "loss": 1.014, - "step": 7629 - }, - { - "epoch": 0.917453255576264, - "grad_norm": 1.9510193727940217, - "learning_rate": 7.096534969328271e-08, - "loss": 1.0324, - "step": 7630 - }, - { - "epoch": 0.9175734984669032, - "grad_norm": 2.1263620835711916, - "learning_rate": 7.075983226132987e-08, - "loss": 1.0714, - "step": 7631 - }, - { - "epoch": 0.9176937413575422, - "grad_norm": 3.900568989059869, - "learning_rate": 7.055460749357656e-08, - "loss": 1.0133, - "step": 7632 - }, - { - "epoch": 0.9178139842481813, - "grad_norm": 2.2046931752673236, - "learning_rate": 7.034967542115521e-08, - "loss": 0.9345, - "step": 7633 - }, - { - "epoch": 0.9179342271388204, - "grad_norm": 1.8047962238539428, - "learning_rate": 7.014503607515388e-08, - "loss": 0.9826, - "step": 7634 - }, - { - "epoch": 0.9180544700294595, - "grad_norm": 1.8066910360365505, - "learning_rate": 6.994068948661592e-08, - "loss": 0.9044, - "step": 7635 - }, - { - "epoch": 0.9181747129200986, - "grad_norm": 2.0530011469210225, - "learning_rate": 6.973663568654142e-08, - "loss": 0.9864, - "step": 7636 - }, - { - "epoch": 0.9182949558107377, - "grad_norm": 2.3171333085435046, - "learning_rate": 6.953287470588386e-08, - "loss": 0.8775, - "step": 7637 - }, - { - "epoch": 0.9184151987013768, - "grad_norm": 2.6491962797516346, - "learning_rate": 6.932940657555452e-08, - "loss": 1.0852, - "step": 7638 - }, - { - "epoch": 0.9185354415920158, - "grad_norm": 1.4253441545600833, - "learning_rate": 6.912623132641938e-08, - "loss": 0.9853, - "step": 7639 - }, - { - "epoch": 0.918655684482655, - "grad_norm": 1.9698126210981461, - "learning_rate": 6.892334898929952e-08, - "loss": 0.985, - "step": 7640 - }, - { - "epoch": 0.918775927373294, - "grad_norm": 2.075429329635975, - "learning_rate": 6.872075959497236e-08, - "loss": 1.0694, - "step": 7641 - }, - { - "epoch": 0.9188961702639331, - "grad_norm": 3.099396302291733, - "learning_rate": 6.85184631741702e-08, - "loss": 1.0519, - "step": 7642 - }, - { - "epoch": 0.9190164131545723, - "grad_norm": 5.505468251117474, - "learning_rate": 6.831645975758161e-08, - "loss": 1.0069, - "step": 7643 - }, - { - "epoch": 0.9191366560452113, - "grad_norm": 2.0736027896774036, - "learning_rate": 6.811474937585026e-08, - "loss": 0.9109, - "step": 7644 - }, - { - "epoch": 0.9192568989358504, - "grad_norm": 1.5358859359142, - "learning_rate": 6.79133320595755e-08, - "loss": 1.0123, - "step": 7645 - }, - { - "epoch": 0.9193771418264896, - "grad_norm": 1.760714360936002, - "learning_rate": 6.771220783931198e-08, - "loss": 0.9792, - "step": 7646 - }, - { - "epoch": 0.9194973847171286, - "grad_norm": 0.8733083858154201, - "learning_rate": 6.751137674556994e-08, - "loss": 0.9079, - "step": 7647 - }, - { - "epoch": 0.9196176276077677, - "grad_norm": 4.313031813407121, - "learning_rate": 6.731083880881572e-08, - "loss": 1.0068, - "step": 7648 - }, - { - "epoch": 0.9197378704984068, - "grad_norm": 2.036704501390093, - "learning_rate": 6.711059405947072e-08, - "loss": 1.041, - "step": 7649 - }, - { - "epoch": 0.9198581133890459, - "grad_norm": 2.4359766772865323, - "learning_rate": 6.691064252791156e-08, - "loss": 1.0045, - "step": 7650 - }, - { - "epoch": 0.9199783562796849, - "grad_norm": 1.6520106924602302, - "learning_rate": 6.67109842444713e-08, - "loss": 1.0016, - "step": 7651 - }, - { - "epoch": 0.9200985991703241, - "grad_norm": 1.7276857614631629, - "learning_rate": 6.651161923943704e-08, - "loss": 0.9962, - "step": 7652 - }, - { - "epoch": 0.9202188420609632, - "grad_norm": 2.07895293987933, - "learning_rate": 6.631254754305326e-08, - "loss": 0.997, - "step": 7653 - }, - { - "epoch": 0.9203390849516022, - "grad_norm": 1.9270906465408812, - "learning_rate": 6.611376918551848e-08, - "loss": 1.012, - "step": 7654 - }, - { - "epoch": 0.9204593278422414, - "grad_norm": 2.448473657317929, - "learning_rate": 6.591528419698744e-08, - "loss": 1.0239, - "step": 7655 - }, - { - "epoch": 0.9205795707328804, - "grad_norm": 3.4151056646976246, - "learning_rate": 6.571709260756986e-08, - "loss": 1.0736, - "step": 7656 - }, - { - "epoch": 0.9206998136235195, - "grad_norm": 2.336083755252357, - "learning_rate": 6.551919444733122e-08, - "loss": 0.9872, - "step": 7657 - }, - { - "epoch": 0.9208200565141585, - "grad_norm": 2.037982757106085, - "learning_rate": 6.53215897462931e-08, - "loss": 0.8821, - "step": 7658 - }, - { - "epoch": 0.9209402994047977, - "grad_norm": 2.2090618683218746, - "learning_rate": 6.512427853443103e-08, - "loss": 0.9894, - "step": 7659 - }, - { - "epoch": 0.9210605422954368, - "grad_norm": 2.123401826245882, - "learning_rate": 6.492726084167799e-08, - "loss": 0.9898, - "step": 7660 - }, - { - "epoch": 0.9211807851860758, - "grad_norm": 0.7983280845965963, - "learning_rate": 6.473053669792072e-08, - "loss": 0.8131, - "step": 7661 - }, - { - "epoch": 0.921301028076715, - "grad_norm": 2.075425423822753, - "learning_rate": 6.453410613300248e-08, - "loss": 0.9561, - "step": 7662 - }, - { - "epoch": 0.921421270967354, - "grad_norm": 1.8423796911828882, - "learning_rate": 6.43379691767214e-08, - "loss": 0.811, - "step": 7663 - }, - { - "epoch": 0.9215415138579931, - "grad_norm": 0.7754588829764036, - "learning_rate": 6.414212585883105e-08, - "loss": 0.843, - "step": 7664 - }, - { - "epoch": 0.9216617567486323, - "grad_norm": 1.5267007161942392, - "learning_rate": 6.394657620904143e-08, - "loss": 0.9261, - "step": 7665 - }, - { - "epoch": 0.9217819996392713, - "grad_norm": 2.1366825972432837, - "learning_rate": 6.375132025701657e-08, - "loss": 0.9458, - "step": 7666 - }, - { - "epoch": 0.9219022425299104, - "grad_norm": 5.144994484589849, - "learning_rate": 6.355635803237724e-08, - "loss": 0.9229, - "step": 7667 - }, - { - "epoch": 0.9220224854205495, - "grad_norm": 2.072801698598751, - "learning_rate": 6.336168956469867e-08, - "loss": 1.0314, - "step": 7668 - }, - { - "epoch": 0.9221427283111886, - "grad_norm": 1.6288589726820928, - "learning_rate": 6.316731488351168e-08, - "loss": 0.9506, - "step": 7669 - }, - { - "epoch": 0.9222629712018277, - "grad_norm": 5.410611019920645, - "learning_rate": 6.297323401830334e-08, - "loss": 0.8675, - "step": 7670 - }, - { - "epoch": 0.9223832140924668, - "grad_norm": 2.127071044567552, - "learning_rate": 6.277944699851523e-08, - "loss": 0.9204, - "step": 7671 - }, - { - "epoch": 0.9225034569831059, - "grad_norm": 2.36602301329564, - "learning_rate": 6.25859538535447e-08, - "loss": 0.9635, - "step": 7672 - }, - { - "epoch": 0.9226236998737449, - "grad_norm": 2.607119379105832, - "learning_rate": 6.239275461274474e-08, - "loss": 1.0053, - "step": 7673 - }, - { - "epoch": 0.9227439427643841, - "grad_norm": 1.7646210103101674, - "learning_rate": 6.219984930542299e-08, - "loss": 1.0884, - "step": 7674 - }, - { - "epoch": 0.9228641856550232, - "grad_norm": 5.355991018081211, - "learning_rate": 6.200723796084383e-08, - "loss": 0.9828, - "step": 7675 - }, - { - "epoch": 0.9229844285456622, - "grad_norm": 0.765527524387257, - "learning_rate": 6.181492060822546e-08, - "loss": 0.8754, - "step": 7676 - }, - { - "epoch": 0.9231046714363014, - "grad_norm": 3.039160248819626, - "learning_rate": 6.162289727674274e-08, - "loss": 1.0481, - "step": 7677 - }, - { - "epoch": 0.9232249143269404, - "grad_norm": 5.312031265224229, - "learning_rate": 6.143116799552527e-08, - "loss": 1.1091, - "step": 7678 - }, - { - "epoch": 0.9233451572175795, - "grad_norm": 2.924166836228089, - "learning_rate": 6.123973279365802e-08, - "loss": 0.7893, - "step": 7679 - }, - { - "epoch": 0.9234654001082186, - "grad_norm": 1.795703605633535, - "learning_rate": 6.10485917001824e-08, - "loss": 1.0113, - "step": 7680 - }, - { - "epoch": 0.9235856429988577, - "grad_norm": 1.7034237100969933, - "learning_rate": 6.085774474409322e-08, - "loss": 1.0374, - "step": 7681 - }, - { - "epoch": 0.9237058858894968, - "grad_norm": 2.208212963158586, - "learning_rate": 6.066719195434267e-08, - "loss": 0.9337, - "step": 7682 - }, - { - "epoch": 0.9238261287801359, - "grad_norm": 1.9565404706786211, - "learning_rate": 6.047693335983717e-08, - "loss": 0.8953, - "step": 7683 - }, - { - "epoch": 0.923946371670775, - "grad_norm": 15.58620868341775, - "learning_rate": 6.028696898943853e-08, - "loss": 1.0484, - "step": 7684 - }, - { - "epoch": 0.924066614561414, - "grad_norm": 1.9647795588964614, - "learning_rate": 6.00972988719648e-08, - "loss": 0.9406, - "step": 7685 - }, - { - "epoch": 0.9241868574520532, - "grad_norm": 2.0346567782082174, - "learning_rate": 5.990792303618807e-08, - "loss": 0.9428, - "step": 7686 - }, - { - "epoch": 0.9243071003426923, - "grad_norm": 1.5922407596204413, - "learning_rate": 5.971884151083695e-08, - "loss": 0.9268, - "step": 7687 - }, - { - "epoch": 0.9244273432333313, - "grad_norm": 1.7925973810960119, - "learning_rate": 5.9530054324595124e-08, - "loss": 0.9705, - "step": 7688 - }, - { - "epoch": 0.9245475861239704, - "grad_norm": 0.7587878449379911, - "learning_rate": 5.934156150610103e-08, - "loss": 0.8288, - "step": 7689 - }, - { - "epoch": 0.9246678290146095, - "grad_norm": 2.3243060440211383, - "learning_rate": 5.915336308394914e-08, - "loss": 1.0102, - "step": 7690 - }, - { - "epoch": 0.9247880719052486, - "grad_norm": 1.4822364258899818, - "learning_rate": 5.89654590866886e-08, - "loss": 1.0059, - "step": 7691 - }, - { - "epoch": 0.9249083147958876, - "grad_norm": 2.115644505015042, - "learning_rate": 5.877784954282483e-08, - "loss": 1.1156, - "step": 7692 - }, - { - "epoch": 0.9250285576865268, - "grad_norm": 1.9043829482751178, - "learning_rate": 5.8590534480817963e-08, - "loss": 0.9496, - "step": 7693 - }, - { - "epoch": 0.9251488005771659, - "grad_norm": 2.310906866668849, - "learning_rate": 5.840351392908349e-08, - "loss": 0.9611, - "step": 7694 - }, - { - "epoch": 0.9252690434678049, - "grad_norm": 2.8664007536699483, - "learning_rate": 5.821678791599205e-08, - "loss": 0.9386, - "step": 7695 - }, - { - "epoch": 0.9253892863584441, - "grad_norm": 2.1284999074651383, - "learning_rate": 5.803035646986965e-08, - "loss": 1.0398, - "step": 7696 - }, - { - "epoch": 0.9255095292490831, - "grad_norm": 2.226884035685813, - "learning_rate": 5.7844219618998766e-08, - "loss": 0.9042, - "step": 7697 - }, - { - "epoch": 0.9256297721397222, - "grad_norm": 3.4340196503758507, - "learning_rate": 5.765837739161505e-08, - "loss": 0.9435, - "step": 7698 - }, - { - "epoch": 0.9257500150303614, - "grad_norm": 1.7952307434324193, - "learning_rate": 5.7472829815911504e-08, - "loss": 0.9828, - "step": 7699 - }, - { - "epoch": 0.9258702579210004, - "grad_norm": 1.7608234304260058, - "learning_rate": 5.7287576920035164e-08, - "loss": 1.0416, - "step": 7700 - }, - { - "epoch": 0.9259905008116395, - "grad_norm": 1.7753162935011961, - "learning_rate": 5.7102618732088435e-08, - "loss": 0.9894, - "step": 7701 - }, - { - "epoch": 0.9261107437022786, - "grad_norm": 1.835397843933003, - "learning_rate": 5.6917955280130216e-08, - "loss": 0.9714, - "step": 7702 - }, - { - "epoch": 0.9262309865929177, - "grad_norm": 2.8482342252533055, - "learning_rate": 5.6733586592172755e-08, - "loss": 0.9504, - "step": 7703 - }, - { - "epoch": 0.9263512294835567, - "grad_norm": 2.1564581950199955, - "learning_rate": 5.6549512696185244e-08, - "loss": 1.0403, - "step": 7704 - }, - { - "epoch": 0.9264714723741959, - "grad_norm": 2.063986502669803, - "learning_rate": 5.636573362009156e-08, - "loss": 0.9127, - "step": 7705 - }, - { - "epoch": 0.926591715264835, - "grad_norm": 2.9825401857263643, - "learning_rate": 5.618224939177074e-08, - "loss": 0.993, - "step": 7706 - }, - { - "epoch": 0.926711958155474, - "grad_norm": 1.6352030944675495, - "learning_rate": 5.599906003905719e-08, - "loss": 0.9317, - "step": 7707 - }, - { - "epoch": 0.9268322010461132, - "grad_norm": 2.8158078556614767, - "learning_rate": 5.581616558974023e-08, - "loss": 1.0465, - "step": 7708 - }, - { - "epoch": 0.9269524439367522, - "grad_norm": 1.73593935138044, - "learning_rate": 5.5633566071565444e-08, - "loss": 1.0148, - "step": 7709 - }, - { - "epoch": 0.9270726868273913, - "grad_norm": 1.9791470242245026, - "learning_rate": 5.5451261512232896e-08, - "loss": 0.9223, - "step": 7710 - }, - { - "epoch": 0.9271929297180305, - "grad_norm": 1.671150264346139, - "learning_rate": 5.5269251939397576e-08, - "loss": 0.8503, - "step": 7711 - }, - { - "epoch": 0.9273131726086695, - "grad_norm": 2.065203657475598, - "learning_rate": 5.508753738067073e-08, - "loss": 0.9958, - "step": 7712 - }, - { - "epoch": 0.9274334154993086, - "grad_norm": 1.767576708714039, - "learning_rate": 5.4906117863617875e-08, - "loss": 1.0162, - "step": 7713 - }, - { - "epoch": 0.9275536583899477, - "grad_norm": 1.992641023362171, - "learning_rate": 5.4724993415760533e-08, - "loss": 1.0164, - "step": 7714 - }, - { - "epoch": 0.9276739012805868, - "grad_norm": 2.3664278630768023, - "learning_rate": 5.454416406457496e-08, - "loss": 0.9772, - "step": 7715 - }, - { - "epoch": 0.9277941441712259, - "grad_norm": 3.255431698065786, - "learning_rate": 5.436362983749299e-08, - "loss": 0.9639, - "step": 7716 - }, - { - "epoch": 0.927914387061865, - "grad_norm": 2.0687059610986545, - "learning_rate": 5.418339076190137e-08, - "loss": 0.8662, - "step": 7717 - }, - { - "epoch": 0.9280346299525041, - "grad_norm": 1.7322361447659267, - "learning_rate": 5.400344686514202e-08, - "loss": 1.1179, - "step": 7718 - }, - { - "epoch": 0.9281548728431431, - "grad_norm": 2.074363344256879, - "learning_rate": 5.38237981745131e-08, - "loss": 0.8978, - "step": 7719 - }, - { - "epoch": 0.9282751157337822, - "grad_norm": 1.5092567286939325, - "learning_rate": 5.364444471726592e-08, - "loss": 1.0381, - "step": 7720 - }, - { - "epoch": 0.9283953586244214, - "grad_norm": 2.8117828302482324, - "learning_rate": 5.346538652060939e-08, - "loss": 1.0294, - "step": 7721 - }, - { - "epoch": 0.9285156015150604, - "grad_norm": 2.2081656722235152, - "learning_rate": 5.3286623611705994e-08, - "loss": 0.9306, - "step": 7722 - }, - { - "epoch": 0.9286358444056995, - "grad_norm": 0.909131697259889, - "learning_rate": 5.3108156017673824e-08, - "loss": 0.8827, - "step": 7723 - }, - { - "epoch": 0.9287560872963386, - "grad_norm": 1.786402161125656, - "learning_rate": 5.2929983765586775e-08, - "loss": 0.9431, - "step": 7724 - }, - { - "epoch": 0.9288763301869777, - "grad_norm": 2.0677474344113325, - "learning_rate": 5.275210688247278e-08, - "loss": 0.857, - "step": 7725 - }, - { - "epoch": 0.9289965730776167, - "grad_norm": 2.2710917847804963, - "learning_rate": 5.257452539531604e-08, - "loss": 1.0782, - "step": 7726 - }, - { - "epoch": 0.9291168159682559, - "grad_norm": 1.5094741599166832, - "learning_rate": 5.2397239331055445e-08, - "loss": 0.9168, - "step": 7727 - }, - { - "epoch": 0.929237058858895, - "grad_norm": 2.21758798607466, - "learning_rate": 5.2220248716585036e-08, - "loss": 1.033, - "step": 7728 - }, - { - "epoch": 0.929357301749534, - "grad_norm": 2.2965191416242714, - "learning_rate": 5.204355357875445e-08, - "loss": 0.9863, - "step": 7729 - }, - { - "epoch": 0.9294775446401732, - "grad_norm": 8.33504829562085, - "learning_rate": 5.1867153944367584e-08, - "loss": 0.9343, - "step": 7730 - }, - { - "epoch": 0.9295977875308122, - "grad_norm": 1.9818006021182848, - "learning_rate": 5.16910498401848e-08, - "loss": 0.9664, - "step": 7731 - }, - { - "epoch": 0.9297180304214513, - "grad_norm": 1.861496812635834, - "learning_rate": 5.151524129292073e-08, - "loss": 1.0631, - "step": 7732 - }, - { - "epoch": 0.9298382733120905, - "grad_norm": 1.8042424301320603, - "learning_rate": 5.1339728329245155e-08, - "loss": 0.8998, - "step": 7733 - }, - { - "epoch": 0.9299585162027295, - "grad_norm": 3.0356132330223637, - "learning_rate": 5.116451097578367e-08, - "loss": 1.0208, - "step": 7734 - }, - { - "epoch": 0.9300787590933686, - "grad_norm": 1.5930961314743854, - "learning_rate": 5.0989589259115895e-08, - "loss": 0.9736, - "step": 7735 - }, - { - "epoch": 0.9301990019840077, - "grad_norm": 1.8053472724829078, - "learning_rate": 5.081496320577816e-08, - "loss": 0.934, - "step": 7736 - }, - { - "epoch": 0.9303192448746468, - "grad_norm": 0.9420925820082393, - "learning_rate": 5.0640632842260835e-08, - "loss": 0.8913, - "step": 7737 - }, - { - "epoch": 0.9304394877652858, - "grad_norm": 1.3693491386503074, - "learning_rate": 5.0466598195009426e-08, - "loss": 0.9495, - "step": 7738 - }, - { - "epoch": 0.930559730655925, - "grad_norm": 1.93710618477505, - "learning_rate": 5.0292859290425036e-08, - "loss": 0.9361, - "step": 7739 - }, - { - "epoch": 0.9306799735465641, - "grad_norm": 2.083617788604536, - "learning_rate": 5.011941615486348e-08, - "loss": 1.0048, - "step": 7740 - }, - { - "epoch": 0.9308002164372031, - "grad_norm": 1.9071550565832538, - "learning_rate": 4.994626881463659e-08, - "loss": 1.0778, - "step": 7741 - }, - { - "epoch": 0.9309204593278423, - "grad_norm": 1.880510878239605, - "learning_rate": 4.9773417296009814e-08, - "loss": 0.9415, - "step": 7742 - }, - { - "epoch": 0.9310407022184813, - "grad_norm": 3.407785472036832, - "learning_rate": 4.960086162520527e-08, - "loss": 0.8854, - "step": 7743 - }, - { - "epoch": 0.9311609451091204, - "grad_norm": 1.8973303812280575, - "learning_rate": 4.942860182839936e-08, - "loss": 1.053, - "step": 7744 - }, - { - "epoch": 0.9312811879997596, - "grad_norm": 1.96258147732655, - "learning_rate": 4.925663793172341e-08, - "loss": 1.0162, - "step": 7745 - }, - { - "epoch": 0.9314014308903986, - "grad_norm": 0.8525308824913626, - "learning_rate": 4.908496996126477e-08, - "loss": 0.8429, - "step": 7746 - }, - { - "epoch": 0.9315216737810377, - "grad_norm": 1.6846039964929442, - "learning_rate": 4.89135979430646e-08, - "loss": 0.9916, - "step": 7747 - }, - { - "epoch": 0.9316419166716768, - "grad_norm": 1.6866568825680464, - "learning_rate": 4.874252190312078e-08, - "loss": 1.0721, - "step": 7748 - }, - { - "epoch": 0.9317621595623159, - "grad_norm": 1.6928860928047316, - "learning_rate": 4.857174186738477e-08, - "loss": 0.874, - "step": 7749 - }, - { - "epoch": 0.931882402452955, - "grad_norm": 3.2136965590150686, - "learning_rate": 4.840125786176408e-08, - "loss": 0.9662, - "step": 7750 - }, - { - "epoch": 0.932002645343594, - "grad_norm": 2.7035096176030007, - "learning_rate": 4.823106991212067e-08, - "loss": 0.9982, - "step": 7751 - }, - { - "epoch": 0.9321228882342332, - "grad_norm": 2.596351697506714, - "learning_rate": 4.806117804427212e-08, - "loss": 1.0742, - "step": 7752 - }, - { - "epoch": 0.9322431311248722, - "grad_norm": 7.415534658159007, - "learning_rate": 4.7891582283990926e-08, - "loss": 0.8732, - "step": 7753 - }, - { - "epoch": 0.9323633740155113, - "grad_norm": 1.9773116068071892, - "learning_rate": 4.772228265700473e-08, - "loss": 0.9541, - "step": 7754 - }, - { - "epoch": 0.9324836169061504, - "grad_norm": 2.1182691900634976, - "learning_rate": 4.75532791889961e-08, - "loss": 0.9791, - "step": 7755 - }, - { - "epoch": 0.9326038597967895, - "grad_norm": 1.8783031613968182, - "learning_rate": 4.738457190560252e-08, - "loss": 0.8852, - "step": 7756 - }, - { - "epoch": 0.9327241026874286, - "grad_norm": 2.411525244776647, - "learning_rate": 4.721616083241664e-08, - "loss": 1.0201, - "step": 7757 - }, - { - "epoch": 0.9328443455780677, - "grad_norm": 1.9015545157623053, - "learning_rate": 4.7048045994986684e-08, - "loss": 1.0027, - "step": 7758 - }, - { - "epoch": 0.9329645884687068, - "grad_norm": 2.02045011000103, - "learning_rate": 4.688022741881559e-08, - "loss": 1.1355, - "step": 7759 - }, - { - "epoch": 0.9330848313593458, - "grad_norm": 1.8409011296914661, - "learning_rate": 4.671270512936076e-08, - "loss": 0.985, - "step": 7760 - }, - { - "epoch": 0.933205074249985, - "grad_norm": 1.7901275662833376, - "learning_rate": 4.6545479152035884e-08, - "loss": 1.0574, - "step": 7761 - }, - { - "epoch": 0.9333253171406241, - "grad_norm": 2.189477517147802, - "learning_rate": 4.637854951220821e-08, - "loss": 1.0009, - "step": 7762 - }, - { - "epoch": 0.9334455600312631, - "grad_norm": 1.9328114786353532, - "learning_rate": 4.621191623520171e-08, - "loss": 0.9746, - "step": 7763 - }, - { - "epoch": 0.9335658029219023, - "grad_norm": 2.2156654730840897, - "learning_rate": 4.604557934629372e-08, - "loss": 1.0756, - "step": 7764 - }, - { - "epoch": 0.9336860458125413, - "grad_norm": 2.0879561696768683, - "learning_rate": 4.587953887071805e-08, - "loss": 1.0345, - "step": 7765 - }, - { - "epoch": 0.9338062887031804, - "grad_norm": 1.7051941088263955, - "learning_rate": 4.5713794833662554e-08, - "loss": 1.0916, - "step": 7766 - }, - { - "epoch": 0.9339265315938196, - "grad_norm": 1.8891185717468602, - "learning_rate": 4.5548347260270236e-08, - "loss": 0.8612, - "step": 7767 - }, - { - "epoch": 0.9340467744844586, - "grad_norm": 1.6302034258133833, - "learning_rate": 4.538319617564012e-08, - "loss": 0.9284, - "step": 7768 - }, - { - "epoch": 0.9341670173750977, - "grad_norm": 1.9879840861092262, - "learning_rate": 4.521834160482485e-08, - "loss": 0.9752, - "step": 7769 - }, - { - "epoch": 0.9342872602657368, - "grad_norm": 1.4515222714272982, - "learning_rate": 4.5053783572832846e-08, - "loss": 1.0474, - "step": 7770 - }, - { - "epoch": 0.9344075031563759, - "grad_norm": 1.6771129660821826, - "learning_rate": 4.488952210462771e-08, - "loss": 0.9886, - "step": 7771 - }, - { - "epoch": 0.9345277460470149, - "grad_norm": 4.604318101328649, - "learning_rate": 4.4725557225127495e-08, - "loss": 1.0871, - "step": 7772 - }, - { - "epoch": 0.9346479889376541, - "grad_norm": 1.4803797712737956, - "learning_rate": 4.456188895920565e-08, - "loss": 1.0226, - "step": 7773 - }, - { - "epoch": 0.9347682318282932, - "grad_norm": 2.2384660521372766, - "learning_rate": 4.439851733169031e-08, - "loss": 1.0735, - "step": 7774 - }, - { - "epoch": 0.9348884747189322, - "grad_norm": 2.082493600091339, - "learning_rate": 4.4235442367365204e-08, - "loss": 0.9272, - "step": 7775 - }, - { - "epoch": 0.9350087176095714, - "grad_norm": 1.890001619832793, - "learning_rate": 4.4072664090968545e-08, - "loss": 1.0131, - "step": 7776 - }, - { - "epoch": 0.9351289605002104, - "grad_norm": 1.8379870722858784, - "learning_rate": 4.391018252719347e-08, - "loss": 1.0679, - "step": 7777 - }, - { - "epoch": 0.9352492033908495, - "grad_norm": 2.1230294683983413, - "learning_rate": 4.374799770068849e-08, - "loss": 0.9184, - "step": 7778 - }, - { - "epoch": 0.9353694462814887, - "grad_norm": 1.8046238875634377, - "learning_rate": 4.358610963605658e-08, - "loss": 0.9727, - "step": 7779 - }, - { - "epoch": 0.9354896891721277, - "grad_norm": 1.9330568126809533, - "learning_rate": 4.342451835785677e-08, - "loss": 0.9139, - "step": 7780 - }, - { - "epoch": 0.9356099320627668, - "grad_norm": 1.5248210395492245, - "learning_rate": 4.3263223890601665e-08, - "loss": 0.9773, - "step": 7781 - }, - { - "epoch": 0.9357301749534058, - "grad_norm": 2.1763128593433354, - "learning_rate": 4.31022262587597e-08, - "loss": 1.0299, - "step": 7782 - }, - { - "epoch": 0.935850417844045, - "grad_norm": 2.613145582386736, - "learning_rate": 4.2941525486754225e-08, - "loss": 0.8895, - "step": 7783 - }, - { - "epoch": 0.935970660734684, - "grad_norm": 1.7266960265021265, - "learning_rate": 4.278112159896286e-08, - "loss": 1.0174, - "step": 7784 - }, - { - "epoch": 0.9360909036253231, - "grad_norm": 1.689691251168903, - "learning_rate": 4.2621014619719896e-08, - "loss": 0.8998, - "step": 7785 - }, - { - "epoch": 0.9362111465159623, - "grad_norm": 0.8462463114558062, - "learning_rate": 4.246120457331215e-08, - "loss": 0.8602, - "step": 7786 - }, - { - "epoch": 0.9363313894066013, - "grad_norm": 2.8298129108175143, - "learning_rate": 4.2301691483983325e-08, - "loss": 0.951, - "step": 7787 - }, - { - "epoch": 0.9364516322972404, - "grad_norm": 8.287783021538955, - "learning_rate": 4.214247537593163e-08, - "loss": 0.9863, - "step": 7788 - }, - { - "epoch": 0.9365718751878795, - "grad_norm": 2.0561112377339983, - "learning_rate": 4.1983556273309293e-08, - "loss": 1.0339, - "step": 7789 - }, - { - "epoch": 0.9366921180785186, - "grad_norm": 2.6239270333460993, - "learning_rate": 4.182493420022526e-08, - "loss": 0.9187, - "step": 7790 - }, - { - "epoch": 0.9368123609691577, - "grad_norm": 2.484871196977837, - "learning_rate": 4.166660918074139e-08, - "loss": 1.016, - "step": 7791 - }, - { - "epoch": 0.9369326038597968, - "grad_norm": 1.5122254776684092, - "learning_rate": 4.15085812388758e-08, - "loss": 0.9623, - "step": 7792 - }, - { - "epoch": 0.9370528467504359, - "grad_norm": 2.121884586722694, - "learning_rate": 4.135085039860153e-08, - "loss": 1.0178, - "step": 7793 - }, - { - "epoch": 0.9371730896410749, - "grad_norm": 2.8690475542256535, - "learning_rate": 4.1193416683845906e-08, - "loss": 1.012, - "step": 7794 - }, - { - "epoch": 0.9372933325317141, - "grad_norm": 2.5398363078679225, - "learning_rate": 4.103628011849136e-08, - "loss": 1.0601, - "step": 7795 - }, - { - "epoch": 0.9374135754223532, - "grad_norm": 1.8775306154642633, - "learning_rate": 4.0879440726375506e-08, - "loss": 0.9805, - "step": 7796 - }, - { - "epoch": 0.9375338183129922, - "grad_norm": 2.6482734741910634, - "learning_rate": 4.0722898531291074e-08, - "loss": 0.7893, - "step": 7797 - }, - { - "epoch": 0.9376540612036314, - "grad_norm": 1.8896024476598314, - "learning_rate": 4.0566653556985295e-08, - "loss": 0.9929, - "step": 7798 - }, - { - "epoch": 0.9377743040942704, - "grad_norm": 2.693040326802075, - "learning_rate": 4.0410705827159886e-08, - "loss": 1.0401, - "step": 7799 - }, - { - "epoch": 0.9378945469849095, - "grad_norm": 2.6459540592394943, - "learning_rate": 4.0255055365472356e-08, - "loss": 0.9414, - "step": 7800 - }, - { - "epoch": 0.9380147898755486, - "grad_norm": 2.0945289643106935, - "learning_rate": 4.009970219553471e-08, - "loss": 0.9641, - "step": 7801 - }, - { - "epoch": 0.9381350327661877, - "grad_norm": 2.767327253537938, - "learning_rate": 3.99446463409141e-08, - "loss": 0.9959, - "step": 7802 - }, - { - "epoch": 0.9382552756568268, - "grad_norm": 2.1374402266504817, - "learning_rate": 3.978988782513215e-08, - "loss": 0.9151, - "step": 7803 - }, - { - "epoch": 0.9383755185474659, - "grad_norm": 1.6276896299176795, - "learning_rate": 3.963542667166586e-08, - "loss": 0.9898, - "step": 7804 - }, - { - "epoch": 0.938495761438105, - "grad_norm": 1.9743147421755922, - "learning_rate": 3.9481262903946486e-08, - "loss": 0.9171, - "step": 7805 - }, - { - "epoch": 0.938616004328744, - "grad_norm": 0.7993724805659482, - "learning_rate": 3.932739654536066e-08, - "loss": 0.8031, - "step": 7806 - }, - { - "epoch": 0.9387362472193832, - "grad_norm": 2.253461612101066, - "learning_rate": 3.917382761925014e-08, - "loss": 0.9687, - "step": 7807 - }, - { - "epoch": 0.9388564901100223, - "grad_norm": 1.668586991595868, - "learning_rate": 3.9020556148910754e-08, - "loss": 1.0206, - "step": 7808 - }, - { - "epoch": 0.9389767330006613, - "grad_norm": 0.7286618165305273, - "learning_rate": 3.8867582157593895e-08, - "loss": 0.8199, - "step": 7809 - }, - { - "epoch": 0.9390969758913005, - "grad_norm": 1.752545548961305, - "learning_rate": 3.871490566850544e-08, - "loss": 0.9922, - "step": 7810 - }, - { - "epoch": 0.9392172187819395, - "grad_norm": 1.7996933437437344, - "learning_rate": 3.856252670480642e-08, - "loss": 0.9363, - "step": 7811 - }, - { - "epoch": 0.9393374616725786, - "grad_norm": 1.7176183963485177, - "learning_rate": 3.841044528961279e-08, - "loss": 1.0413, - "step": 7812 - }, - { - "epoch": 0.9394577045632178, - "grad_norm": 1.8936102762074802, - "learning_rate": 3.825866144599477e-08, - "loss": 1.02, - "step": 7813 - }, - { - "epoch": 0.9395779474538568, - "grad_norm": 2.117118806182059, - "learning_rate": 3.8107175196978145e-08, - "loss": 0.9801, - "step": 7814 - }, - { - "epoch": 0.9396981903444959, - "grad_norm": 2.0466189624723183, - "learning_rate": 3.7955986565542996e-08, - "loss": 0.9973, - "step": 7815 - }, - { - "epoch": 0.9398184332351349, - "grad_norm": 2.1800534614517133, - "learning_rate": 3.780509557462497e-08, - "loss": 0.9184, - "step": 7816 - }, - { - "epoch": 0.9399386761257741, - "grad_norm": 1.4810982123399372, - "learning_rate": 3.765450224711375e-08, - "loss": 0.9807, - "step": 7817 - }, - { - "epoch": 0.9400589190164131, - "grad_norm": 2.2732216644081737, - "learning_rate": 3.750420660585396e-08, - "loss": 1.0213, - "step": 7818 - }, - { - "epoch": 0.9401791619070522, - "grad_norm": 4.146171798819623, - "learning_rate": 3.735420867364603e-08, - "loss": 1.0267, - "step": 7819 - }, - { - "epoch": 0.9402994047976914, - "grad_norm": 1.724768487625798, - "learning_rate": 3.7204508473244186e-08, - "loss": 0.8494, - "step": 7820 - }, - { - "epoch": 0.9404196476883304, - "grad_norm": 1.4410509121204713, - "learning_rate": 3.7055106027357395e-08, - "loss": 0.9196, - "step": 7821 - }, - { - "epoch": 0.9405398905789695, - "grad_norm": 2.259755707424758, - "learning_rate": 3.690600135865063e-08, - "loss": 0.9461, - "step": 7822 - }, - { - "epoch": 0.9406601334696086, - "grad_norm": 0.765783332490298, - "learning_rate": 3.675719448974246e-08, - "loss": 0.8333, - "step": 7823 - }, - { - "epoch": 0.9407803763602477, - "grad_norm": 1.7639899320855739, - "learning_rate": 3.6608685443207054e-08, - "loss": 0.8319, - "step": 7824 - }, - { - "epoch": 0.9409006192508867, - "grad_norm": 2.5408176400698914, - "learning_rate": 3.646047424157306e-08, - "loss": 0.9021, - "step": 7825 - }, - { - "epoch": 0.9410208621415259, - "grad_norm": 7.067235658638838, - "learning_rate": 3.631256090732382e-08, - "loss": 0.9018, - "step": 7826 - }, - { - "epoch": 0.941141105032165, - "grad_norm": 1.6300680650646997, - "learning_rate": 3.6164945462897833e-08, - "loss": 1.0513, - "step": 7827 - }, - { - "epoch": 0.941261347922804, - "grad_norm": 2.163207508399449, - "learning_rate": 3.6017627930687856e-08, - "loss": 0.9832, - "step": 7828 - }, - { - "epoch": 0.9413815908134432, - "grad_norm": 2.444306090321667, - "learning_rate": 3.587060833304267e-08, - "loss": 0.9967, - "step": 7829 - }, - { - "epoch": 0.9415018337040822, - "grad_norm": 2.0679127730635813, - "learning_rate": 3.5723886692264225e-08, - "loss": 0.8716, - "step": 7830 - }, - { - "epoch": 0.9416220765947213, - "grad_norm": 11.783263178665548, - "learning_rate": 3.557746303061071e-08, - "loss": 0.8406, - "step": 7831 - }, - { - "epoch": 0.9417423194853605, - "grad_norm": 1.7320662244157174, - "learning_rate": 3.543133737029391e-08, - "loss": 0.9511, - "step": 7832 - }, - { - "epoch": 0.9418625623759995, - "grad_norm": 1.6389561475900276, - "learning_rate": 3.5285509733481214e-08, - "loss": 0.9149, - "step": 7833 - }, - { - "epoch": 0.9419828052666386, - "grad_norm": 1.7866961559073036, - "learning_rate": 3.513998014229469e-08, - "loss": 0.9953, - "step": 7834 - }, - { - "epoch": 0.9421030481572777, - "grad_norm": 2.0975228094922667, - "learning_rate": 3.499474861881069e-08, - "loss": 1.0916, - "step": 7835 - }, - { - "epoch": 0.9422232910479168, - "grad_norm": 2.1156768476931314, - "learning_rate": 3.4849815185061136e-08, - "loss": 0.9083, - "step": 7836 - }, - { - "epoch": 0.9423435339385559, - "grad_norm": 1.8819949325914904, - "learning_rate": 3.470517986303223e-08, - "loss": 0.9891, - "step": 7837 - }, - { - "epoch": 0.942463776829195, - "grad_norm": 1.7547193651711352, - "learning_rate": 3.4560842674664856e-08, - "loss": 1.0257, - "step": 7838 - }, - { - "epoch": 0.9425840197198341, - "grad_norm": 1.959697376847413, - "learning_rate": 3.441680364185506e-08, - "loss": 0.9846, - "step": 7839 - }, - { - "epoch": 0.9427042626104731, - "grad_norm": 2.0457370302902644, - "learning_rate": 3.427306278645314e-08, - "loss": 0.9778, - "step": 7840 - }, - { - "epoch": 0.9428245055011123, - "grad_norm": 2.849668276036224, - "learning_rate": 3.4129620130264767e-08, - "loss": 0.959, - "step": 7841 - }, - { - "epoch": 0.9429447483917514, - "grad_norm": 2.0605707103923554, - "learning_rate": 3.398647569505009e-08, - "loss": 1.0151, - "step": 7842 - }, - { - "epoch": 0.9430649912823904, - "grad_norm": 3.300659177430199, - "learning_rate": 3.384362950252373e-08, - "loss": 0.9677, - "step": 7843 - }, - { - "epoch": 0.9431852341730296, - "grad_norm": 2.3380779551423236, - "learning_rate": 3.3701081574355473e-08, - "loss": 0.7998, - "step": 7844 - }, - { - "epoch": 0.9433054770636686, - "grad_norm": 0.6544104861790937, - "learning_rate": 3.3558831932169796e-08, - "loss": 0.7676, - "step": 7845 - }, - { - "epoch": 0.9434257199543077, - "grad_norm": 2.782451755760075, - "learning_rate": 3.341688059754588e-08, - "loss": 1.1088, - "step": 7846 - }, - { - "epoch": 0.9435459628449467, - "grad_norm": 2.54167599493931, - "learning_rate": 3.327522759201762e-08, - "loss": 1.0, - "step": 7847 - }, - { - "epoch": 0.9436662057355859, - "grad_norm": 2.1628901752556198, - "learning_rate": 3.313387293707359e-08, - "loss": 0.8957, - "step": 7848 - }, - { - "epoch": 0.943786448626225, - "grad_norm": 3.054490339155557, - "learning_rate": 3.29928166541571e-08, - "loss": 0.9136, - "step": 7849 - }, - { - "epoch": 0.943906691516864, - "grad_norm": 3.8604628539406094, - "learning_rate": 3.2852058764666346e-08, - "loss": 1.0274, - "step": 7850 - }, - { - "epoch": 0.9440269344075032, - "grad_norm": 1.9424962259498961, - "learning_rate": 3.2711599289954264e-08, - "loss": 0.9227, - "step": 7851 - }, - { - "epoch": 0.9441471772981422, - "grad_norm": 1.6898030706933307, - "learning_rate": 3.257143825132847e-08, - "loss": 1.0019, - "step": 7852 - }, - { - "epoch": 0.9442674201887813, - "grad_norm": 1.7446483116661466, - "learning_rate": 3.243157567005106e-08, - "loss": 0.9832, - "step": 7853 - }, - { - "epoch": 0.9443876630794205, - "grad_norm": 2.565347252702258, - "learning_rate": 3.2292011567339296e-08, - "loss": 0.8703, - "step": 7854 - }, - { - "epoch": 0.9445079059700595, - "grad_norm": 2.7447020909980115, - "learning_rate": 3.21527459643649e-08, - "loss": 0.8017, - "step": 7855 - }, - { - "epoch": 0.9446281488606986, - "grad_norm": 2.2837149484467796, - "learning_rate": 3.2013778882254536e-08, - "loss": 0.9689, - "step": 7856 - }, - { - "epoch": 0.9447483917513377, - "grad_norm": 2.005494200571368, - "learning_rate": 3.1875110342088676e-08, - "loss": 0.9907, - "step": 7857 - }, - { - "epoch": 0.9448686346419768, - "grad_norm": 1.8884404700453306, - "learning_rate": 3.1736740364904035e-08, - "loss": 0.8837, - "step": 7858 - }, - { - "epoch": 0.9449888775326158, - "grad_norm": 2.5369053524407312, - "learning_rate": 3.159866897169094e-08, - "loss": 0.9962, - "step": 7859 - }, - { - "epoch": 0.945109120423255, - "grad_norm": 1.703424689847107, - "learning_rate": 3.146089618339487e-08, - "loss": 0.9835, - "step": 7860 - }, - { - "epoch": 0.9452293633138941, - "grad_norm": 1.913416601699574, - "learning_rate": 3.132342202091554e-08, - "loss": 0.9102, - "step": 7861 - }, - { - "epoch": 0.9453496062045331, - "grad_norm": 2.4257921596410776, - "learning_rate": 3.1186246505107595e-08, - "loss": 0.9064, - "step": 7862 - }, - { - "epoch": 0.9454698490951723, - "grad_norm": 1.9191342171054036, - "learning_rate": 3.104936965678084e-08, - "loss": 1.0687, - "step": 7863 - }, - { - "epoch": 0.9455900919858113, - "grad_norm": 1.9457379837145419, - "learning_rate": 3.091279149669956e-08, - "loss": 1.0337, - "step": 7864 - }, - { - "epoch": 0.9457103348764504, - "grad_norm": 2.609458602205591, - "learning_rate": 3.0776512045581624e-08, - "loss": 0.9688, - "step": 7865 - }, - { - "epoch": 0.9458305777670896, - "grad_norm": 2.173129371285396, - "learning_rate": 3.0640531324101384e-08, - "loss": 1.0008, - "step": 7866 - }, - { - "epoch": 0.9459508206577286, - "grad_norm": 6.807460495816308, - "learning_rate": 3.0504849352886554e-08, - "loss": 0.9883, - "step": 7867 - }, - { - "epoch": 0.9460710635483677, - "grad_norm": 2.9891073842724207, - "learning_rate": 3.036946615252023e-08, - "loss": 0.9405, - "step": 7868 - }, - { - "epoch": 0.9461913064390068, - "grad_norm": 2.1967551490356074, - "learning_rate": 3.0234381743539984e-08, - "loss": 0.8908, - "step": 7869 - }, - { - "epoch": 0.9463115493296459, - "grad_norm": 3.0832947049857187, - "learning_rate": 3.0099596146437863e-08, - "loss": 1.0256, - "step": 7870 - }, - { - "epoch": 0.946431792220285, - "grad_norm": 0.804579903529275, - "learning_rate": 2.996510938166086e-08, - "loss": 0.8602, - "step": 7871 - }, - { - "epoch": 0.9465520351109241, - "grad_norm": 1.956759131238108, - "learning_rate": 2.983092146960997e-08, - "loss": 0.9601, - "step": 7872 - }, - { - "epoch": 0.9466722780015632, - "grad_norm": 2.2490065818747977, - "learning_rate": 2.9697032430642256e-08, - "loss": 1.0235, - "step": 7873 - }, - { - "epoch": 0.9467925208922022, - "grad_norm": 2.7853070789388403, - "learning_rate": 2.9563442285067906e-08, - "loss": 0.9628, - "step": 7874 - }, - { - "epoch": 0.9469127637828414, - "grad_norm": 2.119047129453926, - "learning_rate": 2.943015105315294e-08, - "loss": 1.0265, - "step": 7875 - }, - { - "epoch": 0.9470330066734804, - "grad_norm": 2.7831808524328117, - "learning_rate": 2.929715875511718e-08, - "loss": 0.8898, - "step": 7876 - }, - { - "epoch": 0.9471532495641195, - "grad_norm": 1.7534457751337018, - "learning_rate": 2.9164465411135375e-08, - "loss": 0.9264, - "step": 7877 - }, - { - "epoch": 0.9472734924547586, - "grad_norm": 2.004488438440419, - "learning_rate": 2.9032071041337426e-08, - "loss": 1.0293, - "step": 7878 - }, - { - "epoch": 0.9473937353453977, - "grad_norm": 2.0669096998653473, - "learning_rate": 2.889997566580704e-08, - "loss": 0.9603, - "step": 7879 - }, - { - "epoch": 0.9475139782360368, - "grad_norm": 1.637707978064843, - "learning_rate": 2.8768179304583086e-08, - "loss": 0.9266, - "step": 7880 - }, - { - "epoch": 0.9476342211266758, - "grad_norm": 1.5959246888999432, - "learning_rate": 2.8636681977659117e-08, - "loss": 0.9641, - "step": 7881 - }, - { - "epoch": 0.947754464017315, - "grad_norm": 1.7834544010006559, - "learning_rate": 2.850548370498318e-08, - "loss": 1.0075, - "step": 7882 - }, - { - "epoch": 0.9478747069079541, - "grad_norm": 1.8310263018633115, - "learning_rate": 2.8374584506457798e-08, - "loss": 0.9408, - "step": 7883 - }, - { - "epoch": 0.9479949497985931, - "grad_norm": 2.4005326554171518, - "learning_rate": 2.824398440193998e-08, - "loss": 0.9024, - "step": 7884 - }, - { - "epoch": 0.9481151926892323, - "grad_norm": 2.1034723275633334, - "learning_rate": 2.811368341124232e-08, - "loss": 0.94, - "step": 7885 - }, - { - "epoch": 0.9482354355798713, - "grad_norm": 3.9039546478795315, - "learning_rate": 2.7983681554131222e-08, - "loss": 0.9041, - "step": 7886 - }, - { - "epoch": 0.9483556784705104, - "grad_norm": 2.021242106934791, - "learning_rate": 2.7853978850327365e-08, - "loss": 0.9276, - "step": 7887 - }, - { - "epoch": 0.9484759213611496, - "grad_norm": 1.8907547583070605, - "learning_rate": 2.7724575319507225e-08, - "loss": 1.1025, - "step": 7888 - }, - { - "epoch": 0.9485961642517886, - "grad_norm": 2.0248589548982037, - "learning_rate": 2.759547098130044e-08, - "loss": 1.0105, - "step": 7889 - }, - { - "epoch": 0.9487164071424277, - "grad_norm": 2.3967580475803887, - "learning_rate": 2.746666585529267e-08, - "loss": 0.9963, - "step": 7890 - }, - { - "epoch": 0.9488366500330668, - "grad_norm": 2.390400296290889, - "learning_rate": 2.73381599610234e-08, - "loss": 0.9643, - "step": 7891 - }, - { - "epoch": 0.9489568929237059, - "grad_norm": 1.8196687010118795, - "learning_rate": 2.7209953317987033e-08, - "loss": 0.9408, - "step": 7892 - }, - { - "epoch": 0.9490771358143449, - "grad_norm": 2.706634800712407, - "learning_rate": 2.7082045945631793e-08, - "loss": 1.0007, - "step": 7893 - }, - { - "epoch": 0.9491973787049841, - "grad_norm": 4.65030592865701, - "learning_rate": 2.6954437863361712e-08, - "loss": 0.9221, - "step": 7894 - }, - { - "epoch": 0.9493176215956232, - "grad_norm": 1.8715711394763623, - "learning_rate": 2.6827129090534862e-08, - "loss": 0.9399, - "step": 7895 - }, - { - "epoch": 0.9494378644862622, - "grad_norm": 1.731921204274476, - "learning_rate": 2.670011964646335e-08, - "loss": 1.0139, - "step": 7896 - }, - { - "epoch": 0.9495581073769014, - "grad_norm": 2.797659566792618, - "learning_rate": 2.657340955041487e-08, - "loss": 0.9148, - "step": 7897 - }, - { - "epoch": 0.9496783502675404, - "grad_norm": 2.2449682981975734, - "learning_rate": 2.6446998821611167e-08, - "loss": 0.9427, - "step": 7898 - }, - { - "epoch": 0.9497985931581795, - "grad_norm": 3.392207325825153, - "learning_rate": 2.6320887479228228e-08, - "loss": 0.9496, - "step": 7899 - }, - { - "epoch": 0.9499188360488187, - "grad_norm": 2.431321358307717, - "learning_rate": 2.619507554239786e-08, - "loss": 0.9538, - "step": 7900 - }, - { - "epoch": 0.9500390789394577, - "grad_norm": 1.8876844505475132, - "learning_rate": 2.606956303020502e-08, - "loss": 0.9345, - "step": 7901 - }, - { - "epoch": 0.9501593218300968, - "grad_norm": 2.1947801697992206, - "learning_rate": 2.5944349961690036e-08, - "loss": 1.0659, - "step": 7902 - }, - { - "epoch": 0.9502795647207359, - "grad_norm": 3.139064571974257, - "learning_rate": 2.581943635584749e-08, - "loss": 0.9606, - "step": 7903 - }, - { - "epoch": 0.950399807611375, - "grad_norm": 2.214447793434334, - "learning_rate": 2.569482223162689e-08, - "loss": 0.8912, - "step": 7904 - }, - { - "epoch": 0.950520050502014, - "grad_norm": 1.763750955323218, - "learning_rate": 2.5570507607932e-08, - "loss": 0.9568, - "step": 7905 - }, - { - "epoch": 0.9506402933926532, - "grad_norm": 2.71013275426608, - "learning_rate": 2.54464925036213e-08, - "loss": 0.8528, - "step": 7906 - }, - { - "epoch": 0.9507605362832923, - "grad_norm": 2.9350481555114047, - "learning_rate": 2.532277693750773e-08, - "loss": 0.8288, - "step": 7907 - }, - { - "epoch": 0.9508807791739313, - "grad_norm": 1.8961010279317343, - "learning_rate": 2.5199360928358948e-08, - "loss": 0.9818, - "step": 7908 - }, - { - "epoch": 0.9510010220645704, - "grad_norm": 1.8844122837104356, - "learning_rate": 2.507624449489665e-08, - "loss": 1.0952, - "step": 7909 - }, - { - "epoch": 0.9511212649552095, - "grad_norm": 1.9372780580456643, - "learning_rate": 2.495342765579811e-08, - "loss": 0.8813, - "step": 7910 - }, - { - "epoch": 0.9512415078458486, - "grad_norm": 1.873822987199316, - "learning_rate": 2.4830910429693984e-08, - "loss": 0.9419, - "step": 7911 - }, - { - "epoch": 0.9513617507364877, - "grad_norm": 1.993779283266656, - "learning_rate": 2.470869283517052e-08, - "loss": 1.0249, - "step": 7912 - }, - { - "epoch": 0.9514819936271268, - "grad_norm": 1.67042835429743, - "learning_rate": 2.458677489076777e-08, - "loss": 0.9978, - "step": 7913 - }, - { - "epoch": 0.9516022365177659, - "grad_norm": 2.0256039131006225, - "learning_rate": 2.446515661498072e-08, - "loss": 1.0604, - "step": 7914 - }, - { - "epoch": 0.9517224794084049, - "grad_norm": 2.350482375776136, - "learning_rate": 2.434383802625861e-08, - "loss": 0.9674, - "step": 7915 - }, - { - "epoch": 0.9518427222990441, - "grad_norm": 1.806600572296731, - "learning_rate": 2.4222819143005168e-08, - "loss": 0.9654, - "step": 7916 - }, - { - "epoch": 0.9519629651896832, - "grad_norm": 1.795095607580957, - "learning_rate": 2.4102099983579706e-08, - "loss": 1.0411, - "step": 7917 - }, - { - "epoch": 0.9520832080803222, - "grad_norm": 2.2996311389534063, - "learning_rate": 2.3981680566294236e-08, - "loss": 0.9896, - "step": 7918 - }, - { - "epoch": 0.9522034509709614, - "grad_norm": 8.628089240973074, - "learning_rate": 2.3861560909416822e-08, - "loss": 0.9646, - "step": 7919 - }, - { - "epoch": 0.9523236938616004, - "grad_norm": 2.9451633891196662, - "learning_rate": 2.3741741031169325e-08, - "loss": 1.0529, - "step": 7920 - }, - { - "epoch": 0.9524439367522395, - "grad_norm": 1.6576854315551652, - "learning_rate": 2.3622220949728544e-08, - "loss": 0.9372, - "step": 7921 - }, - { - "epoch": 0.9525641796428787, - "grad_norm": 2.6713134889461374, - "learning_rate": 2.3503000683225526e-08, - "loss": 0.8411, - "step": 7922 - }, - { - "epoch": 0.9526844225335177, - "grad_norm": 2.769776240651726, - "learning_rate": 2.3384080249745585e-08, - "loss": 1.0632, - "step": 7923 - }, - { - "epoch": 0.9528046654241568, - "grad_norm": 2.1797432841063955, - "learning_rate": 2.3265459667329178e-08, - "loss": 1.0663, - "step": 7924 - }, - { - "epoch": 0.9529249083147959, - "grad_norm": 2.2511102268476257, - "learning_rate": 2.31471389539708e-08, - "loss": 1.0943, - "step": 7925 - }, - { - "epoch": 0.953045151205435, - "grad_norm": 2.509899186317266, - "learning_rate": 2.3029118127619872e-08, - "loss": 0.9586, - "step": 7926 - }, - { - "epoch": 0.953165394096074, - "grad_norm": 2.1097243408277313, - "learning_rate": 2.2911397206179628e-08, - "loss": 1.0963, - "step": 7927 - }, - { - "epoch": 0.9532856369867132, - "grad_norm": 19.349230752484466, - "learning_rate": 2.279397620750845e-08, - "loss": 0.8545, - "step": 7928 - }, - { - "epoch": 0.9534058798773523, - "grad_norm": 3.0562610524607634, - "learning_rate": 2.2676855149419195e-08, - "loss": 1.0172, - "step": 7929 - }, - { - "epoch": 0.9535261227679913, - "grad_norm": 2.412285703322292, - "learning_rate": 2.2560034049678988e-08, - "loss": 0.9753, - "step": 7930 - }, - { - "epoch": 0.9536463656586305, - "grad_norm": 4.803939537817667, - "learning_rate": 2.2443512926008988e-08, - "loss": 0.9838, - "step": 7931 - }, - { - "epoch": 0.9537666085492695, - "grad_norm": 2.4383137640577366, - "learning_rate": 2.2327291796085946e-08, - "loss": 0.934, - "step": 7932 - }, - { - "epoch": 0.9538868514399086, - "grad_norm": 3.3007555356801634, - "learning_rate": 2.2211370677540197e-08, - "loss": 1.0014, - "step": 7933 - }, - { - "epoch": 0.9540070943305478, - "grad_norm": 6.216287249866399, - "learning_rate": 2.2095749587957012e-08, - "loss": 1.0101, - "step": 7934 - }, - { - "epoch": 0.9541273372211868, - "grad_norm": 2.442974301128467, - "learning_rate": 2.1980428544876138e-08, - "loss": 0.9264, - "step": 7935 - }, - { - "epoch": 0.9542475801118259, - "grad_norm": 2.051210660028356, - "learning_rate": 2.1865407565791584e-08, - "loss": 0.9656, - "step": 7936 - }, - { - "epoch": 0.954367823002465, - "grad_norm": 1.8926766353199984, - "learning_rate": 2.175068666815183e-08, - "loss": 1.0034, - "step": 7937 - }, - { - "epoch": 0.9544880658931041, - "grad_norm": 1.9566785909602382, - "learning_rate": 2.163626586935985e-08, - "loss": 1.0193, - "step": 7938 - }, - { - "epoch": 0.9546083087837431, - "grad_norm": 1.8327610538738879, - "learning_rate": 2.1522145186773755e-08, - "loss": 0.8611, - "step": 7939 - }, - { - "epoch": 0.9547285516743822, - "grad_norm": 1.5963214250684503, - "learning_rate": 2.140832463770481e-08, - "loss": 1.0855, - "step": 7940 - }, - { - "epoch": 0.9548487945650214, - "grad_norm": 2.021872721793116, - "learning_rate": 2.129480423941987e-08, - "loss": 0.9821, - "step": 7941 - }, - { - "epoch": 0.9549690374556604, - "grad_norm": 1.5652456954801819, - "learning_rate": 2.1181584009140052e-08, - "loss": 1.0335, - "step": 7942 - }, - { - "epoch": 0.9550892803462995, - "grad_norm": 1.9816680226536574, - "learning_rate": 2.10686639640405e-08, - "loss": 1.0613, - "step": 7943 - }, - { - "epoch": 0.9552095232369386, - "grad_norm": 1.620350347829161, - "learning_rate": 2.0956044121251294e-08, - "loss": 1.0393, - "step": 7944 - }, - { - "epoch": 0.9553297661275777, - "grad_norm": 2.8761528025902336, - "learning_rate": 2.084372449785654e-08, - "loss": 1.0443, - "step": 7945 - }, - { - "epoch": 0.9554500090182168, - "grad_norm": 2.2786625859689233, - "learning_rate": 2.0731705110895282e-08, - "loss": 0.9088, - "step": 7946 - }, - { - "epoch": 0.9555702519088559, - "grad_norm": 1.9832746804270636, - "learning_rate": 2.0619985977360587e-08, - "loss": 1.1, - "step": 7947 - }, - { - "epoch": 0.955690494799495, - "grad_norm": 1.778435340441658, - "learning_rate": 2.0508567114200237e-08, - "loss": 1.0013, - "step": 7948 - }, - { - "epoch": 0.955810737690134, - "grad_norm": 2.7765632315974433, - "learning_rate": 2.0397448538316485e-08, - "loss": 1.007, - "step": 7949 - }, - { - "epoch": 0.9559309805807732, - "grad_norm": 2.4874634169759076, - "learning_rate": 2.028663026656563e-08, - "loss": 0.8881, - "step": 7950 - }, - { - "epoch": 0.9560512234714122, - "grad_norm": 2.178166450095141, - "learning_rate": 2.0176112315758885e-08, - "loss": 0.942, - "step": 7951 - }, - { - "epoch": 0.9561714663620513, - "grad_norm": 2.450470770156593, - "learning_rate": 2.0065894702661957e-08, - "loss": 0.9296, - "step": 7952 - }, - { - "epoch": 0.9562917092526905, - "grad_norm": 1.7227431640843665, - "learning_rate": 1.9955977443994577e-08, - "loss": 1.01, - "step": 7953 - }, - { - "epoch": 0.9564119521433295, - "grad_norm": 4.828428783550843, - "learning_rate": 1.9846360556430965e-08, - "loss": 0.8643, - "step": 7954 - }, - { - "epoch": 0.9565321950339686, - "grad_norm": 3.0249525901925467, - "learning_rate": 1.973704405660004e-08, - "loss": 0.8471, - "step": 7955 - }, - { - "epoch": 0.9566524379246077, - "grad_norm": 3.416534715911263, - "learning_rate": 1.9628027961085203e-08, - "loss": 1.0048, - "step": 7956 - }, - { - "epoch": 0.9567726808152468, - "grad_norm": 2.4126224111226198, - "learning_rate": 1.9519312286423894e-08, - "loss": 1.0659, - "step": 7957 - }, - { - "epoch": 0.9568929237058859, - "grad_norm": 3.163486908727756, - "learning_rate": 1.9410897049108255e-08, - "loss": 1.0029, - "step": 7958 - }, - { - "epoch": 0.957013166596525, - "grad_norm": 1.9657286818288466, - "learning_rate": 1.9302782265584905e-08, - "loss": 1.143, - "step": 7959 - }, - { - "epoch": 0.9571334094871641, - "grad_norm": 2.068737193641324, - "learning_rate": 1.9194967952254282e-08, - "loss": 1.0902, - "step": 7960 - }, - { - "epoch": 0.9572536523778031, - "grad_norm": 2.372448805808387, - "learning_rate": 1.9087454125472635e-08, - "loss": 1.0364, - "step": 7961 - }, - { - "epoch": 0.9573738952684423, - "grad_norm": 2.304166676784007, - "learning_rate": 1.8980240801548696e-08, - "loss": 1.0132, - "step": 7962 - }, - { - "epoch": 0.9574941381590814, - "grad_norm": 1.8210749523316956, - "learning_rate": 1.8873327996747458e-08, - "loss": 0.971, - "step": 7963 - }, - { - "epoch": 0.9576143810497204, - "grad_norm": 1.818208845978034, - "learning_rate": 1.8766715727287053e-08, - "loss": 0.8952, - "step": 7964 - }, - { - "epoch": 0.9577346239403596, - "grad_norm": 1.7764075863414002, - "learning_rate": 1.8660404009340546e-08, - "loss": 1.0156, - "step": 7965 - }, - { - "epoch": 0.9578548668309986, - "grad_norm": 0.917126727974605, - "learning_rate": 1.8554392859035485e-08, - "loss": 0.8973, - "step": 7966 - }, - { - "epoch": 0.9579751097216377, - "grad_norm": 2.4507399706636757, - "learning_rate": 1.8448682292453444e-08, - "loss": 1.0196, - "step": 7967 - }, - { - "epoch": 0.9580953526122769, - "grad_norm": 1.9130790831057018, - "learning_rate": 1.8343272325631154e-08, - "loss": 0.8942, - "step": 7968 - }, - { - "epoch": 0.9582155955029159, - "grad_norm": 2.341546001470386, - "learning_rate": 1.8238162974558492e-08, - "loss": 1.003, - "step": 7969 - }, - { - "epoch": 0.958335838393555, - "grad_norm": 2.247419997373051, - "learning_rate": 1.8133354255181144e-08, - "loss": 0.9719, - "step": 7970 - }, - { - "epoch": 0.958456081284194, - "grad_norm": 1.9958277933870756, - "learning_rate": 1.802884618339795e-08, - "loss": 0.9855, - "step": 7971 - }, - { - "epoch": 0.9585763241748332, - "grad_norm": 2.0514979681367085, - "learning_rate": 1.7924638775062894e-08, - "loss": 1.0375, - "step": 7972 - }, - { - "epoch": 0.9586965670654722, - "grad_norm": 1.9934728328134812, - "learning_rate": 1.7820732045984444e-08, - "loss": 1.0499, - "step": 7973 - }, - { - "epoch": 0.9588168099561113, - "grad_norm": 1.8478974070041725, - "learning_rate": 1.7717126011924655e-08, - "loss": 0.9706, - "step": 7974 - }, - { - "epoch": 0.9589370528467505, - "grad_norm": 2.6642617070063537, - "learning_rate": 1.7613820688600957e-08, - "loss": 0.9958, - "step": 7975 - }, - { - "epoch": 0.9590572957373895, - "grad_norm": 1.807334181169184, - "learning_rate": 1.7510816091684588e-08, - "loss": 1.0174, - "step": 7976 - }, - { - "epoch": 0.9591775386280286, - "grad_norm": 2.6898220587661013, - "learning_rate": 1.740811223680083e-08, - "loss": 1.0301, - "step": 7977 - }, - { - "epoch": 0.9592977815186677, - "grad_norm": 3.780963855351711, - "learning_rate": 1.7305709139530334e-08, - "loss": 0.9659, - "step": 7978 - }, - { - "epoch": 0.9594180244093068, - "grad_norm": 2.297535392646356, - "learning_rate": 1.7203606815407334e-08, - "loss": 0.9832, - "step": 7979 - }, - { - "epoch": 0.9595382672999458, - "grad_norm": 1.578855892928023, - "learning_rate": 1.7101805279920557e-08, - "loss": 1.026, - "step": 7980 - }, - { - "epoch": 0.959658510190585, - "grad_norm": 1.8867322711973447, - "learning_rate": 1.7000304548513643e-08, - "loss": 1.0446, - "step": 7981 - }, - { - "epoch": 0.9597787530812241, - "grad_norm": 2.357649620704652, - "learning_rate": 1.6899104636583394e-08, - "loss": 1.0544, - "step": 7982 - }, - { - "epoch": 0.9598989959718631, - "grad_norm": 0.7577929936680871, - "learning_rate": 1.6798205559482638e-08, - "loss": 0.8853, - "step": 7983 - }, - { - "epoch": 0.9600192388625023, - "grad_norm": 2.2736994536718, - "learning_rate": 1.669760733251713e-08, - "loss": 0.9962, - "step": 7984 - }, - { - "epoch": 0.9601394817531413, - "grad_norm": 2.108116622261254, - "learning_rate": 1.659730997094755e-08, - "loss": 1.0577, - "step": 7985 - }, - { - "epoch": 0.9602597246437804, - "grad_norm": 1.8000869571026572, - "learning_rate": 1.6497313489989283e-08, - "loss": 0.8497, - "step": 7986 - }, - { - "epoch": 0.9603799675344196, - "grad_norm": 2.485653146608871, - "learning_rate": 1.639761790481131e-08, - "loss": 0.922, - "step": 7987 - }, - { - "epoch": 0.9605002104250586, - "grad_norm": 1.8924191371720123, - "learning_rate": 1.6298223230537754e-08, - "loss": 1.0198, - "step": 7988 - }, - { - "epoch": 0.9606204533156977, - "grad_norm": 1.8175630556945697, - "learning_rate": 1.619912948224611e-08, - "loss": 0.9227, - "step": 7989 - }, - { - "epoch": 0.9607406962063368, - "grad_norm": 2.3313800151661175, - "learning_rate": 1.6100336674969682e-08, - "loss": 0.832, - "step": 7990 - }, - { - "epoch": 0.9608609390969759, - "grad_norm": 1.911353362178188, - "learning_rate": 1.600184482369449e-08, - "loss": 0.9975, - "step": 7991 - }, - { - "epoch": 0.960981181987615, - "grad_norm": 2.2492145650916804, - "learning_rate": 1.5903653943362126e-08, - "loss": 1.1245, - "step": 7992 - }, - { - "epoch": 0.9611014248782541, - "grad_norm": 2.2223749770538848, - "learning_rate": 1.580576404886802e-08, - "loss": 0.9929, - "step": 7993 - }, - { - "epoch": 0.9612216677688932, - "grad_norm": 2.2178901833125946, - "learning_rate": 1.570817515506162e-08, - "loss": 1.0302, - "step": 7994 - }, - { - "epoch": 0.9613419106595322, - "grad_norm": 2.217865028656919, - "learning_rate": 1.561088727674753e-08, - "loss": 1.038, - "step": 7995 - }, - { - "epoch": 0.9614621535501714, - "grad_norm": 6.797875902866386, - "learning_rate": 1.551390042868417e-08, - "loss": 0.9409, - "step": 7996 - }, - { - "epoch": 0.9615823964408104, - "grad_norm": 1.8641428364967263, - "learning_rate": 1.5417214625584207e-08, - "loss": 0.9362, - "step": 7997 - }, - { - "epoch": 0.9617026393314495, - "grad_norm": 1.504582082317758, - "learning_rate": 1.5320829882114806e-08, - "loss": 1.079, - "step": 7998 - }, - { - "epoch": 0.9618228822220887, - "grad_norm": 2.4367122844525877, - "learning_rate": 1.5224746212897378e-08, - "loss": 1.0143, - "step": 7999 - }, - { - "epoch": 0.9619431251127277, - "grad_norm": 5.12187899949556, - "learning_rate": 1.512896363250804e-08, - "loss": 0.9963, - "step": 8000 - }, - { - "epoch": 0.9620633680033668, - "grad_norm": 1.9945419460656821, - "learning_rate": 1.503348215547673e-08, - "loss": 0.9861, - "step": 8001 - }, - { - "epoch": 0.962183610894006, - "grad_norm": 1.740160048895881, - "learning_rate": 1.4938301796288078e-08, - "loss": 1.0326, - "step": 8002 - }, - { - "epoch": 0.962303853784645, - "grad_norm": 2.7961576596779874, - "learning_rate": 1.4843422569380537e-08, - "loss": 1.0528, - "step": 8003 - }, - { - "epoch": 0.9624240966752841, - "grad_norm": 2.1671999006184737, - "learning_rate": 1.4748844489147483e-08, - "loss": 1.0527, - "step": 8004 - }, - { - "epoch": 0.9625443395659231, - "grad_norm": 1.9084919737241577, - "learning_rate": 1.4654567569936326e-08, - "loss": 0.9383, - "step": 8005 - }, - { - "epoch": 0.9626645824565623, - "grad_norm": 1.8756324337256172, - "learning_rate": 1.456059182604874e-08, - "loss": 1.0535, - "step": 8006 - }, - { - "epoch": 0.9627848253472013, - "grad_norm": 1.799449370193191, - "learning_rate": 1.4466917271740653e-08, - "loss": 0.989, - "step": 8007 - }, - { - "epoch": 0.9629050682378404, - "grad_norm": 1.9806826022707702, - "learning_rate": 1.4373543921222697e-08, - "loss": 0.905, - "step": 8008 - }, - { - "epoch": 0.9630253111284796, - "grad_norm": 1.8287710368784051, - "learning_rate": 1.428047178865932e-08, - "loss": 1.0136, - "step": 8009 - }, - { - "epoch": 0.9631455540191186, - "grad_norm": 1.8469666046816342, - "learning_rate": 1.4187700888169451e-08, - "loss": 0.9715, - "step": 8010 - }, - { - "epoch": 0.9632657969097577, - "grad_norm": 0.8415750445802488, - "learning_rate": 1.40952312338265e-08, - "loss": 0.8787, - "step": 8011 - }, - { - "epoch": 0.9633860398003968, - "grad_norm": 3.629687599842269, - "learning_rate": 1.4003062839657909e-08, - "loss": 0.9123, - "step": 8012 - }, - { - "epoch": 0.9635062826910359, - "grad_norm": 1.6811251852792886, - "learning_rate": 1.391119571964583e-08, - "loss": 1.0296, - "step": 8013 - }, - { - "epoch": 0.9636265255816749, - "grad_norm": 1.8505273170547285, - "learning_rate": 1.3819629887726225e-08, - "loss": 0.9599, - "step": 8014 - }, - { - "epoch": 0.9637467684723141, - "grad_norm": 1.859396926365926, - "learning_rate": 1.3728365357789317e-08, - "loss": 0.997, - "step": 8015 - }, - { - "epoch": 0.9638670113629532, - "grad_norm": 2.8132603783059684, - "learning_rate": 1.3637402143680254e-08, - "loss": 1.0006, - "step": 8016 - }, - { - "epoch": 0.9639872542535922, - "grad_norm": 0.7683682245812502, - "learning_rate": 1.3546740259197998e-08, - "loss": 0.8046, - "step": 8017 - }, - { - "epoch": 0.9641074971442314, - "grad_norm": 3.6622274720541146, - "learning_rate": 1.3456379718095989e-08, - "loss": 0.9285, - "step": 8018 - }, - { - "epoch": 0.9642277400348704, - "grad_norm": 0.8747120792251347, - "learning_rate": 1.3366320534081487e-08, - "loss": 0.8836, - "step": 8019 - }, - { - "epoch": 0.9643479829255095, - "grad_norm": 2.1373859040443812, - "learning_rate": 1.3276562720816675e-08, - "loss": 0.9753, - "step": 8020 - }, - { - "epoch": 0.9644682258161487, - "grad_norm": 2.5141705401471572, - "learning_rate": 1.3187106291917549e-08, - "loss": 1.0597, - "step": 8021 - }, - { - "epoch": 0.9645884687067877, - "grad_norm": 1.7056660026522659, - "learning_rate": 1.309795126095503e-08, - "loss": 0.9303, - "step": 8022 - }, - { - "epoch": 0.9647087115974268, - "grad_norm": 2.458955287382566, - "learning_rate": 1.3009097641453192e-08, - "loss": 1.0336, - "step": 8023 - }, - { - "epoch": 0.9648289544880659, - "grad_norm": 1.5977637667491862, - "learning_rate": 1.2920545446891474e-08, - "loss": 0.9885, - "step": 8024 - }, - { - "epoch": 0.964949197378705, - "grad_norm": 1.6656967519796826, - "learning_rate": 1.2832294690703127e-08, - "loss": 0.9277, - "step": 8025 - }, - { - "epoch": 0.965069440269344, - "grad_norm": 1.9582544946695652, - "learning_rate": 1.2744345386275668e-08, - "loss": 1.0023, - "step": 8026 - }, - { - "epoch": 0.9651896831599832, - "grad_norm": 2.5075639738249365, - "learning_rate": 1.265669754695109e-08, - "loss": 1.0115, - "step": 8027 - }, - { - "epoch": 0.9653099260506223, - "grad_norm": 3.90018975701004, - "learning_rate": 1.2569351186025201e-08, - "loss": 1.0451, - "step": 8028 - }, - { - "epoch": 0.9654301689412613, - "grad_norm": 1.4348947717716931, - "learning_rate": 1.2482306316748737e-08, - "loss": 0.9775, - "step": 8029 - }, - { - "epoch": 0.9655504118319005, - "grad_norm": 1.703148937932045, - "learning_rate": 1.2395562952326021e-08, - "loss": 1.0132, - "step": 8030 - }, - { - "epoch": 0.9656706547225395, - "grad_norm": 1.9807574120098437, - "learning_rate": 1.2309121105916309e-08, - "loss": 1.0424, - "step": 8031 - }, - { - "epoch": 0.9657908976131786, - "grad_norm": 3.439219790184651, - "learning_rate": 1.222298079063222e-08, - "loss": 0.9136, - "step": 8032 - }, - { - "epoch": 0.9659111405038178, - "grad_norm": 1.9499227899769103, - "learning_rate": 1.2137142019541524e-08, - "loss": 0.9599, - "step": 8033 - }, - { - "epoch": 0.9660313833944568, - "grad_norm": 4.657880049724036, - "learning_rate": 1.2051604805666027e-08, - "loss": 0.969, - "step": 8034 - }, - { - "epoch": 0.9661516262850959, - "grad_norm": 1.9113784343710332, - "learning_rate": 1.196636916198135e-08, - "loss": 1.0146, - "step": 8035 - }, - { - "epoch": 0.9662718691757349, - "grad_norm": 3.258010089837568, - "learning_rate": 1.1881435101418036e-08, - "loss": 1.0043, - "step": 8036 - }, - { - "epoch": 0.9663921120663741, - "grad_norm": 0.8643616694748493, - "learning_rate": 1.1796802636860003e-08, - "loss": 0.9346, - "step": 8037 - }, - { - "epoch": 0.9665123549570132, - "grad_norm": 2.3910478205539474, - "learning_rate": 1.1712471781146316e-08, - "loss": 0.9663, - "step": 8038 - }, - { - "epoch": 0.9666325978476522, - "grad_norm": 1.9152003428987086, - "learning_rate": 1.1628442547069628e-08, - "loss": 0.9054, - "step": 8039 - }, - { - "epoch": 0.9667528407382914, - "grad_norm": 3.1329762708518403, - "learning_rate": 1.1544714947377521e-08, - "loss": 0.9933, - "step": 8040 - }, - { - "epoch": 0.9668730836289304, - "grad_norm": 4.077330530177721, - "learning_rate": 1.1461288994770945e-08, - "loss": 0.9281, - "step": 8041 - }, - { - "epoch": 0.9669933265195695, - "grad_norm": 1.7978503026755839, - "learning_rate": 1.1378164701906002e-08, - "loss": 1.0067, - "step": 8042 - }, - { - "epoch": 0.9671135694102087, - "grad_norm": 3.337910053337256, - "learning_rate": 1.1295342081392156e-08, - "loss": 0.8955, - "step": 8043 - }, - { - "epoch": 0.9672338123008477, - "grad_norm": 4.97707298451249, - "learning_rate": 1.1212821145793804e-08, - "loss": 0.9207, - "step": 8044 - }, - { - "epoch": 0.9673540551914868, - "grad_norm": 2.0394465643823443, - "learning_rate": 1.1130601907629156e-08, - "loss": 1.0197, - "step": 8045 - }, - { - "epoch": 0.9674742980821259, - "grad_norm": 0.834747465629649, - "learning_rate": 1.1048684379370899e-08, - "loss": 0.9078, - "step": 8046 - }, - { - "epoch": 0.967594540972765, - "grad_norm": 1.9623751290800788, - "learning_rate": 1.0967068573445759e-08, - "loss": 0.9769, - "step": 8047 - }, - { - "epoch": 0.967714783863404, - "grad_norm": 2.0708746380847387, - "learning_rate": 1.0885754502234945e-08, - "loss": 0.8783, - "step": 8048 - }, - { - "epoch": 0.9678350267540432, - "grad_norm": 1.9372789810618054, - "learning_rate": 1.08047421780737e-08, - "loss": 1.0093, - "step": 8049 - }, - { - "epoch": 0.9679552696446823, - "grad_norm": 2.1622185408162404, - "learning_rate": 1.0724031613251305e-08, - "loss": 0.9692, - "step": 8050 - }, - { - "epoch": 0.9680755125353213, - "grad_norm": 2.0648867351143787, - "learning_rate": 1.0643622820011744e-08, - "loss": 0.8894, - "step": 8051 - }, - { - "epoch": 0.9681957554259605, - "grad_norm": 2.6490305008822554, - "learning_rate": 1.0563515810552814e-08, - "loss": 0.9099, - "step": 8052 - }, - { - "epoch": 0.9683159983165995, - "grad_norm": 1.4835583498298452, - "learning_rate": 1.0483710597026795e-08, - "loss": 0.9628, - "step": 8053 - }, - { - "epoch": 0.9684362412072386, - "grad_norm": 2.550590285999959, - "learning_rate": 1.0404207191540227e-08, - "loss": 0.9639, - "step": 8054 - }, - { - "epoch": 0.9685564840978778, - "grad_norm": 2.4099639370701205, - "learning_rate": 1.0325005606153236e-08, - "loss": 0.9821, - "step": 8055 - }, - { - "epoch": 0.9686767269885168, - "grad_norm": 2.9155060411657905, - "learning_rate": 1.0246105852881104e-08, - "loss": 1.013, - "step": 8056 - }, - { - "epoch": 0.9687969698791559, - "grad_norm": 1.8202532975552448, - "learning_rate": 1.0167507943692476e-08, - "loss": 1.021, - "step": 8057 - }, - { - "epoch": 0.968917212769795, - "grad_norm": 3.081874983661244, - "learning_rate": 1.008921189051093e-08, - "loss": 0.9419, - "step": 8058 - }, - { - "epoch": 0.9690374556604341, - "grad_norm": 3.8672952656228343, - "learning_rate": 1.0011217705213848e-08, - "loss": 0.9979, - "step": 8059 - }, - { - "epoch": 0.9691576985510731, - "grad_norm": 1.64793887438883, - "learning_rate": 9.933525399632658e-09, - "loss": 0.98, - "step": 8060 - }, - { - "epoch": 0.9692779414417123, - "grad_norm": 1.7011654281056086, - "learning_rate": 9.856134985553488e-09, - "loss": 0.8821, - "step": 8061 - }, - { - "epoch": 0.9693981843323514, - "grad_norm": 1.6133944492481516, - "learning_rate": 9.77904647471628e-09, - "loss": 0.9617, - "step": 8062 - }, - { - "epoch": 0.9695184272229904, - "grad_norm": 1.6460477311084143, - "learning_rate": 9.702259878815454e-09, - "loss": 0.9675, - "step": 8063 - }, - { - "epoch": 0.9696386701136296, - "grad_norm": 2.9898631178739685, - "learning_rate": 9.625775209499254e-09, - "loss": 0.9757, - "step": 8064 - }, - { - "epoch": 0.9697589130042686, - "grad_norm": 1.9900971459873307, - "learning_rate": 9.549592478370172e-09, - "loss": 0.9699, - "step": 8065 - }, - { - "epoch": 0.9698791558949077, - "grad_norm": 1.8904311025515483, - "learning_rate": 9.473711696985632e-09, - "loss": 1.0287, - "step": 8066 - }, - { - "epoch": 0.9699993987855468, - "grad_norm": 8.636587568966448, - "learning_rate": 9.398132876856201e-09, - "loss": 0.9878, - "step": 8067 - }, - { - "epoch": 0.9701196416761859, - "grad_norm": 0.777191185476586, - "learning_rate": 9.322856029447379e-09, - "loss": 0.8762, - "step": 8068 - }, - { - "epoch": 0.970239884566825, - "grad_norm": 2.201492245638167, - "learning_rate": 9.247881166178695e-09, - "loss": 1.0278, - "step": 8069 - }, - { - "epoch": 0.970360127457464, - "grad_norm": 2.0419665006155783, - "learning_rate": 9.173208298423274e-09, - "loss": 0.9996, - "step": 8070 - }, - { - "epoch": 0.9704803703481032, - "grad_norm": 1.4994079693075035, - "learning_rate": 9.09883743750961e-09, - "loss": 0.9921, - "step": 8071 - }, - { - "epoch": 0.9706006132387422, - "grad_norm": 1.5618558700385268, - "learning_rate": 9.024768594719124e-09, - "loss": 1.0663, - "step": 8072 - }, - { - "epoch": 0.9707208561293813, - "grad_norm": 1.9844090015772382, - "learning_rate": 8.95100178128816e-09, - "loss": 0.948, - "step": 8073 - }, - { - "epoch": 0.9708410990200205, - "grad_norm": 1.842014336313794, - "learning_rate": 8.877537008407321e-09, - "loss": 0.933, - "step": 8074 - }, - { - "epoch": 0.9709613419106595, - "grad_norm": 1.4842287493484474, - "learning_rate": 8.804374287221028e-09, - "loss": 0.9104, - "step": 8075 - }, - { - "epoch": 0.9710815848012986, - "grad_norm": 1.6995614243417252, - "learning_rate": 8.731513628827958e-09, - "loss": 1.0782, - "step": 8076 - }, - { - "epoch": 0.9712018276919377, - "grad_norm": 1.826966725727618, - "learning_rate": 8.658955044280825e-09, - "loss": 1.0551, - "step": 8077 - }, - { - "epoch": 0.9713220705825768, - "grad_norm": 1.4049022574042285, - "learning_rate": 8.586698544587268e-09, - "loss": 1.0003, - "step": 8078 - }, - { - "epoch": 0.9714423134732159, - "grad_norm": 1.9353596955760901, - "learning_rate": 8.514744140707853e-09, - "loss": 0.9713, - "step": 8079 - }, - { - "epoch": 0.971562556363855, - "grad_norm": 1.737351552535928, - "learning_rate": 8.443091843558515e-09, - "loss": 0.9892, - "step": 8080 - }, - { - "epoch": 0.9716827992544941, - "grad_norm": 2.3014089249535723, - "learning_rate": 8.37174166400878e-09, - "loss": 0.8792, - "step": 8081 - }, - { - "epoch": 0.9718030421451331, - "grad_norm": 2.344349492014885, - "learning_rate": 8.300693612881992e-09, - "loss": 1.0869, - "step": 8082 - }, - { - "epoch": 0.9719232850357723, - "grad_norm": 1.8664254265395261, - "learning_rate": 8.22994770095664e-09, - "loss": 1.0332, - "step": 8083 - }, - { - "epoch": 0.9720435279264114, - "grad_norm": 2.302509999041223, - "learning_rate": 8.159503938964585e-09, - "loss": 0.9848, - "step": 8084 - }, - { - "epoch": 0.9721637708170504, - "grad_norm": 2.0491652161400578, - "learning_rate": 8.089362337592164e-09, - "loss": 0.9341, - "step": 8085 - }, - { - "epoch": 0.9722840137076896, - "grad_norm": 1.510980154577437, - "learning_rate": 8.019522907479536e-09, - "loss": 0.9513, - "step": 8086 - }, - { - "epoch": 0.9724042565983286, - "grad_norm": 3.735015558825747, - "learning_rate": 7.949985659221558e-09, - "loss": 0.9961, - "step": 8087 - }, - { - "epoch": 0.9725244994889677, - "grad_norm": 2.012879742164145, - "learning_rate": 7.880750603366904e-09, - "loss": 1.0138, - "step": 8088 - }, - { - "epoch": 0.9726447423796069, - "grad_norm": 1.7661127412651234, - "learning_rate": 7.811817750418282e-09, - "loss": 1.0254, - "step": 8089 - }, - { - "epoch": 0.9727649852702459, - "grad_norm": 1.5383457896815598, - "learning_rate": 7.743187110833105e-09, - "loss": 1.0255, - "step": 8090 - }, - { - "epoch": 0.972885228160885, - "grad_norm": 1.5049225779038, - "learning_rate": 7.674858695022602e-09, - "loss": 1.0389, - "step": 8091 - }, - { - "epoch": 0.9730054710515241, - "grad_norm": 5.111796420680735, - "learning_rate": 7.606832513351591e-09, - "loss": 0.986, - "step": 8092 - }, - { - "epoch": 0.9731257139421632, - "grad_norm": 0.7990908761891938, - "learning_rate": 7.539108576140264e-09, - "loss": 0.9079, - "step": 8093 - }, - { - "epoch": 0.9732459568328022, - "grad_norm": 2.0456690839521423, - "learning_rate": 7.471686893661732e-09, - "loss": 0.9227, - "step": 8094 - }, - { - "epoch": 0.9733661997234414, - "grad_norm": 2.3550502727998293, - "learning_rate": 7.4045674761442636e-09, - "loss": 0.8694, - "step": 8095 - }, - { - "epoch": 0.9734864426140805, - "grad_norm": 1.8783810331532416, - "learning_rate": 7.337750333769488e-09, - "loss": 0.9592, - "step": 8096 - }, - { - "epoch": 0.9736066855047195, - "grad_norm": 1.7980244816014015, - "learning_rate": 7.2712354766737425e-09, - "loss": 0.9589, - "step": 8097 - }, - { - "epoch": 0.9737269283953586, - "grad_norm": 1.5870198282061976, - "learning_rate": 7.2050229149469565e-09, - "loss": 1.0358, - "step": 8098 - }, - { - "epoch": 0.9738471712859977, - "grad_norm": 2.405546011060301, - "learning_rate": 7.139112658633984e-09, - "loss": 0.8614, - "step": 8099 - }, - { - "epoch": 0.9739674141766368, - "grad_norm": 3.0541460967227736, - "learning_rate": 7.073504717733048e-09, - "loss": 0.9339, - "step": 8100 - }, - { - "epoch": 0.9740876570672758, - "grad_norm": 0.7323858633725049, - "learning_rate": 7.008199102196855e-09, - "loss": 0.8234, - "step": 8101 - }, - { - "epoch": 0.974207899957915, - "grad_norm": 0.8219376587280557, - "learning_rate": 6.9431958219321464e-09, - "loss": 0.8506, - "step": 8102 - }, - { - "epoch": 0.9743281428485541, - "grad_norm": 1.767164656190425, - "learning_rate": 6.878494886800146e-09, - "loss": 1.0094, - "step": 8103 - }, - { - "epoch": 0.9744483857391931, - "grad_norm": 1.688209172748673, - "learning_rate": 6.814096306615669e-09, - "loss": 0.9888, - "step": 8104 - }, - { - "epoch": 0.9745686286298323, - "grad_norm": 3.004877734497659, - "learning_rate": 6.750000091148011e-09, - "loss": 0.8779, - "step": 8105 - }, - { - "epoch": 0.9746888715204713, - "grad_norm": 1.8045520814232798, - "learning_rate": 6.686206250120729e-09, - "loss": 0.9652, - "step": 8106 - }, - { - "epoch": 0.9748091144111104, - "grad_norm": 3.1489875256220747, - "learning_rate": 6.622714793210749e-09, - "loss": 0.9761, - "step": 8107 - }, - { - "epoch": 0.9749293573017496, - "grad_norm": 1.6261412207824937, - "learning_rate": 6.559525730050364e-09, - "loss": 1.0091, - "step": 8108 - }, - { - "epoch": 0.9750496001923886, - "grad_norm": 1.9530814204122473, - "learning_rate": 6.496639070224574e-09, - "loss": 0.9922, - "step": 8109 - }, - { - "epoch": 0.9751698430830277, - "grad_norm": 4.795291999780533, - "learning_rate": 6.4340548232739714e-09, - "loss": 1.0602, - "step": 8110 - }, - { - "epoch": 0.9752900859736668, - "grad_norm": 1.6760135658552733, - "learning_rate": 6.371772998692071e-09, - "loss": 1.0251, - "step": 8111 - }, - { - "epoch": 0.9754103288643059, - "grad_norm": 2.6992623840426693, - "learning_rate": 6.309793605927094e-09, - "loss": 0.881, - "step": 8112 - }, - { - "epoch": 0.975530571754945, - "grad_norm": 2.2237983544824664, - "learning_rate": 6.248116654381297e-09, - "loss": 1.0235, - "step": 8113 - }, - { - "epoch": 0.9756508146455841, - "grad_norm": 1.768081306788738, - "learning_rate": 6.186742153410751e-09, - "loss": 0.961, - "step": 8114 - }, - { - "epoch": 0.9757710575362232, - "grad_norm": 1.9689499889704594, - "learning_rate": 6.125670112326453e-09, - "loss": 1.0961, - "step": 8115 - }, - { - "epoch": 0.9758913004268622, - "grad_norm": 1.8542860899909528, - "learning_rate": 6.064900540392548e-09, - "loss": 0.9339, - "step": 8116 - }, - { - "epoch": 0.9760115433175014, - "grad_norm": 2.0692401916042846, - "learning_rate": 6.0044334468278835e-09, - "loss": 1.019, - "step": 8117 - }, - { - "epoch": 0.9761317862081405, - "grad_norm": 1.7909195769169808, - "learning_rate": 5.944268840805345e-09, - "loss": 0.9517, - "step": 8118 - }, - { - "epoch": 0.9762520290987795, - "grad_norm": 2.2650758275019176, - "learning_rate": 5.88440673145163e-09, - "loss": 0.8664, - "step": 8119 - }, - { - "epoch": 0.9763722719894187, - "grad_norm": 2.592417822982012, - "learning_rate": 5.824847127848142e-09, - "loss": 1.0535, - "step": 8120 - }, - { - "epoch": 0.9764925148800577, - "grad_norm": 1.7642646883168904, - "learning_rate": 5.765590039029433e-09, - "loss": 1.0118, - "step": 8121 - }, - { - "epoch": 0.9766127577706968, - "grad_norm": 2.2123453894436063, - "learning_rate": 5.706635473985422e-09, - "loss": 0.9423, - "step": 8122 - }, - { - "epoch": 0.976733000661336, - "grad_norm": 1.9776312912520888, - "learning_rate": 5.6479834416591764e-09, - "loss": 1.083, - "step": 8123 - }, - { - "epoch": 0.976853243551975, - "grad_norm": 1.7057631472243475, - "learning_rate": 5.589633950947803e-09, - "loss": 0.9058, - "step": 8124 - }, - { - "epoch": 0.9769734864426141, - "grad_norm": 2.3674890307259946, - "learning_rate": 5.5315870107035535e-09, - "loss": 0.9288, - "step": 8125 - }, - { - "epoch": 0.9770937293332532, - "grad_norm": 1.6539355247043792, - "learning_rate": 5.473842629731607e-09, - "loss": 1.0115, - "step": 8126 - }, - { - "epoch": 0.9772139722238923, - "grad_norm": 3.3302420746389894, - "learning_rate": 5.416400816792066e-09, - "loss": 1.0084, - "step": 8127 - }, - { - "epoch": 0.9773342151145313, - "grad_norm": 2.52711186236961, - "learning_rate": 5.359261580598407e-09, - "loss": 1.0159, - "step": 8128 - }, - { - "epoch": 0.9774544580051704, - "grad_norm": 2.2828771980198614, - "learning_rate": 5.302424929819027e-09, - "loss": 1.0161, - "step": 8129 - }, - { - "epoch": 0.9775747008958096, - "grad_norm": 2.4545089802456306, - "learning_rate": 5.24589087307592e-09, - "loss": 0.9588, - "step": 8130 - }, - { - "epoch": 0.9776949437864486, - "grad_norm": 1.485286312438239, - "learning_rate": 5.189659418944891e-09, - "loss": 0.8815, - "step": 8131 - }, - { - "epoch": 0.9778151866770877, - "grad_norm": 1.8576280096414863, - "learning_rate": 5.133730575956674e-09, - "loss": 1.0, - "step": 8132 - }, - { - "epoch": 0.9779354295677268, - "grad_norm": 2.872407490674063, - "learning_rate": 5.0781043525953696e-09, - "loss": 0.9506, - "step": 8133 - }, - { - "epoch": 0.9780556724583659, - "grad_norm": 2.1441397665713233, - "learning_rate": 5.0227807572995605e-09, - "loss": 0.9594, - "step": 8134 - }, - { - "epoch": 0.9781759153490049, - "grad_norm": 2.156541003064952, - "learning_rate": 4.967759798461646e-09, - "loss": 0.9034, - "step": 8135 - }, - { - "epoch": 0.9782961582396441, - "grad_norm": 2.127827838422799, - "learning_rate": 4.913041484428282e-09, - "loss": 0.9732, - "step": 8136 - }, - { - "epoch": 0.9784164011302832, - "grad_norm": 1.7035856411441859, - "learning_rate": 4.858625823500384e-09, - "loss": 0.977, - "step": 8137 - }, - { - "epoch": 0.9785366440209222, - "grad_norm": 1.735241029360883, - "learning_rate": 4.80451282393246e-09, - "loss": 0.9683, - "step": 8138 - }, - { - "epoch": 0.9786568869115614, - "grad_norm": 11.272526546268503, - "learning_rate": 4.750702493933722e-09, - "loss": 0.904, - "step": 8139 - }, - { - "epoch": 0.9787771298022004, - "grad_norm": 1.9952414647110863, - "learning_rate": 4.697194841666974e-09, - "loss": 1.0825, - "step": 8140 - }, - { - "epoch": 0.9788973726928395, - "grad_norm": 1.735293239800563, - "learning_rate": 4.6439898752492764e-09, - "loss": 1.048, - "step": 8141 - }, - { - "epoch": 0.9790176155834787, - "grad_norm": 0.7498156400428622, - "learning_rate": 4.591087602751731e-09, - "loss": 0.871, - "step": 8142 - }, - { - "epoch": 0.9791378584741177, - "grad_norm": 1.57392233052745, - "learning_rate": 4.538488032199916e-09, - "loss": 0.9549, - "step": 8143 - }, - { - "epoch": 0.9792581013647568, - "grad_norm": 2.1296351873475805, - "learning_rate": 4.486191171572784e-09, - "loss": 0.9154, - "step": 8144 - }, - { - "epoch": 0.9793783442553959, - "grad_norm": 1.5400553178141005, - "learning_rate": 4.434197028803766e-09, - "loss": 1.0121, - "step": 8145 - }, - { - "epoch": 0.979498587146035, - "grad_norm": 2.3824833877160443, - "learning_rate": 4.3825056117805514e-09, - "loss": 1.0481, - "step": 8146 - }, - { - "epoch": 0.979618830036674, - "grad_norm": 2.1501826186580297, - "learning_rate": 4.331116928344425e-09, - "loss": 1.0328, - "step": 8147 - }, - { - "epoch": 0.9797390729273132, - "grad_norm": 2.1086270737108372, - "learning_rate": 4.28003098629115e-09, - "loss": 0.8615, - "step": 8148 - }, - { - "epoch": 0.9798593158179523, - "grad_norm": 11.413380022161421, - "learning_rate": 4.229247793370305e-09, - "loss": 1.0254, - "step": 8149 - }, - { - "epoch": 0.9799795587085913, - "grad_norm": 2.3060217147360578, - "learning_rate": 4.178767357285951e-09, - "loss": 0.9375, - "step": 8150 - }, - { - "epoch": 0.9800998015992305, - "grad_norm": 1.908116286926668, - "learning_rate": 4.128589685695516e-09, - "loss": 0.9371, - "step": 8151 - }, - { - "epoch": 0.9802200444898695, - "grad_norm": 1.7299014296040802, - "learning_rate": 4.078714786211135e-09, - "loss": 1.0763, - "step": 8152 - }, - { - "epoch": 0.9803402873805086, - "grad_norm": 1.7560744445592231, - "learning_rate": 4.029142666398977e-09, - "loss": 0.9942, - "step": 8153 - }, - { - "epoch": 0.9804605302711478, - "grad_norm": 3.8539681185881958, - "learning_rate": 3.979873333778805e-09, - "loss": 1.0268, - "step": 8154 - }, - { - "epoch": 0.9805807731617868, - "grad_norm": 2.018236464620542, - "learning_rate": 3.930906795824862e-09, - "loss": 0.9733, - "step": 8155 - }, - { - "epoch": 0.9807010160524259, - "grad_norm": 2.836067638091015, - "learning_rate": 3.882243059965207e-09, - "loss": 0.998, - "step": 8156 - }, - { - "epoch": 0.980821258943065, - "grad_norm": 2.2415292019393753, - "learning_rate": 3.833882133582156e-09, - "loss": 0.9004, - "step": 8157 - }, - { - "epoch": 0.9809415018337041, - "grad_norm": 1.616751423151025, - "learning_rate": 3.785824024012285e-09, - "loss": 1.0021, - "step": 8158 - }, - { - "epoch": 0.9810617447243432, - "grad_norm": 1.457033786311256, - "learning_rate": 3.738068738545541e-09, - "loss": 1.0115, - "step": 8159 - }, - { - "epoch": 0.9811819876149822, - "grad_norm": 2.249530743302584, - "learning_rate": 3.6906162844265733e-09, - "loss": 1.0135, - "step": 8160 - }, - { - "epoch": 0.9813022305056214, - "grad_norm": 1.644555025993508, - "learning_rate": 3.643466668853845e-09, - "loss": 0.9392, - "step": 8161 - }, - { - "epoch": 0.9814224733962604, - "grad_norm": 1.9181000767515244, - "learning_rate": 3.59661989898008e-09, - "loss": 0.9797, - "step": 8162 - }, - { - "epoch": 0.9815427162868995, - "grad_norm": 1.8332163672270696, - "learning_rate": 3.5500759819115934e-09, - "loss": 0.997, - "step": 8163 - }, - { - "epoch": 0.9816629591775387, - "grad_norm": 16.004376766484654, - "learning_rate": 3.5038349247094034e-09, - "loss": 1.0465, - "step": 8164 - }, - { - "epoch": 0.9817832020681777, - "grad_norm": 2.080232627037845, - "learning_rate": 3.4578967343878994e-09, - "loss": 0.998, - "step": 8165 - }, - { - "epoch": 0.9819034449588168, - "grad_norm": 1.9032537958570033, - "learning_rate": 3.4122614179161733e-09, - "loss": 1.0312, - "step": 8166 - }, - { - "epoch": 0.9820236878494559, - "grad_norm": 1.6285820142024297, - "learning_rate": 3.36692898221691e-09, - "loss": 1.0016, - "step": 8167 - }, - { - "epoch": 0.982143930740095, - "grad_norm": 1.5618428183388933, - "learning_rate": 3.3218994341668305e-09, - "loss": 0.9638, - "step": 8168 - }, - { - "epoch": 0.982264173630734, - "grad_norm": 1.9336145265021722, - "learning_rate": 3.2771727805971373e-09, - "loss": 0.9852, - "step": 8169 - }, - { - "epoch": 0.9823844165213732, - "grad_norm": 1.7964474376672614, - "learning_rate": 3.232749028292847e-09, - "loss": 0.9988, - "step": 8170 - }, - { - "epoch": 0.9825046594120123, - "grad_norm": 1.6388861751354329, - "learning_rate": 3.188628183992792e-09, - "loss": 1.1083, - "step": 8171 - }, - { - "epoch": 0.9826249023026513, - "grad_norm": 0.799556744981419, - "learning_rate": 3.1448102543902844e-09, - "loss": 0.8883, - "step": 8172 - }, - { - "epoch": 0.9827451451932905, - "grad_norm": 3.217208196750374, - "learning_rate": 3.1012952461324515e-09, - "loss": 0.8978, - "step": 8173 - }, - { - "epoch": 0.9828653880839295, - "grad_norm": 2.3772087868682306, - "learning_rate": 3.0580831658204575e-09, - "loss": 0.9729, - "step": 8174 - }, - { - "epoch": 0.9829856309745686, - "grad_norm": 3.9528981019292466, - "learning_rate": 3.015174020009281e-09, - "loss": 1.0101, - "step": 8175 - }, - { - "epoch": 0.9831058738652078, - "grad_norm": 4.510783309071869, - "learning_rate": 2.9725678152086043e-09, - "loss": 0.9809, - "step": 8176 - }, - { - "epoch": 0.9832261167558468, - "grad_norm": 2.5805005775218923, - "learning_rate": 2.930264557881257e-09, - "loss": 1.0528, - "step": 8177 - }, - { - "epoch": 0.9833463596464859, - "grad_norm": 0.8120343267673545, - "learning_rate": 2.8882642544452163e-09, - "loss": 0.8614, - "step": 8178 - }, - { - "epoch": 0.983466602537125, - "grad_norm": 2.832220831477806, - "learning_rate": 2.8465669112716083e-09, - "loss": 0.9764, - "step": 8179 - }, - { - "epoch": 0.9835868454277641, - "grad_norm": 2.236399873473768, - "learning_rate": 2.8051725346858177e-09, - "loss": 0.9837, - "step": 8180 - }, - { - "epoch": 0.9837070883184031, - "grad_norm": 2.0839744217267584, - "learning_rate": 2.7640811309674883e-09, - "loss": 0.9289, - "step": 8181 - }, - { - "epoch": 0.9838273312090423, - "grad_norm": 1.884711672944609, - "learning_rate": 2.7232927063498557e-09, - "loss": 1.0325, - "step": 8182 - }, - { - "epoch": 0.9839475740996814, - "grad_norm": 2.0726300781069997, - "learning_rate": 2.682807267020859e-09, - "loss": 0.9118, - "step": 8183 - }, - { - "epoch": 0.9840678169903204, - "grad_norm": 2.5195643696345473, - "learning_rate": 2.642624819121808e-09, - "loss": 0.8578, - "step": 8184 - }, - { - "epoch": 0.9841880598809596, - "grad_norm": 2.058163679392544, - "learning_rate": 2.6027453687487154e-09, - "loss": 0.8423, - "step": 8185 - }, - { - "epoch": 0.9843083027715986, - "grad_norm": 2.412135469241447, - "learning_rate": 2.5631689219509643e-09, - "loss": 0.7625, - "step": 8186 - }, - { - "epoch": 0.9844285456622377, - "grad_norm": 2.0310228660945393, - "learning_rate": 2.523895484732197e-09, - "loss": 1.0655, - "step": 8187 - }, - { - "epoch": 0.9845487885528769, - "grad_norm": 1.822737948421641, - "learning_rate": 2.4849250630505357e-09, - "loss": 0.9763, - "step": 8188 - }, - { - "epoch": 0.9846690314435159, - "grad_norm": 1.9442659190133098, - "learning_rate": 2.4462576628172528e-09, - "loss": 0.9636, - "step": 8189 - }, - { - "epoch": 0.984789274334155, - "grad_norm": 2.5220170407668654, - "learning_rate": 2.407893289898766e-09, - "loss": 0.9655, - "step": 8190 - }, - { - "epoch": 0.984909517224794, - "grad_norm": 2.8121729342705173, - "learning_rate": 2.3698319501144202e-09, - "loss": 1.0673, - "step": 8191 - }, - { - "epoch": 0.9850297601154332, - "grad_norm": 2.755758152541745, - "learning_rate": 2.3320736492382644e-09, - "loss": 0.9587, - "step": 8192 - }, - { - "epoch": 0.9851500030060723, - "grad_norm": 1.524981845685118, - "learning_rate": 2.29461839299816e-09, - "loss": 0.9106, - "step": 8193 - }, - { - "epoch": 0.9852702458967113, - "grad_norm": 1.5842306037427003, - "learning_rate": 2.257466187076229e-09, - "loss": 1.0321, - "step": 8194 - }, - { - "epoch": 0.9853904887873505, - "grad_norm": 1.7523484820549893, - "learning_rate": 2.2206170371081854e-09, - "loss": 0.9364, - "step": 8195 - }, - { - "epoch": 0.9855107316779895, - "grad_norm": 1.8274400520146414, - "learning_rate": 2.1840709486842247e-09, - "loss": 1.0756, - "step": 8196 - }, - { - "epoch": 0.9856309745686286, - "grad_norm": 3.941735789772946, - "learning_rate": 2.1478279273481335e-09, - "loss": 1.0273, - "step": 8197 - }, - { - "epoch": 0.9857512174592677, - "grad_norm": 2.6712544930919657, - "learning_rate": 2.1118879785981815e-09, - "loss": 1.0368, - "step": 8198 - }, - { - "epoch": 0.9858714603499068, - "grad_norm": 1.7851724769973305, - "learning_rate": 2.0762511078862288e-09, - "loss": 1.0207, - "step": 8199 - }, - { - "epoch": 0.9859917032405459, - "grad_norm": 2.1946592614770104, - "learning_rate": 2.0409173206186183e-09, - "loss": 0.8846, - "step": 8200 - }, - { - "epoch": 0.986111946131185, - "grad_norm": 2.1993276999336766, - "learning_rate": 2.0058866221550617e-09, - "loss": 1.0998, - "step": 8201 - }, - { - "epoch": 0.9862321890218241, - "grad_norm": 2.051618597653876, - "learning_rate": 1.971159017809976e-09, - "loss": 0.984, - "step": 8202 - }, - { - "epoch": 0.9863524319124631, - "grad_norm": 2.218410412227656, - "learning_rate": 1.93673451285159e-09, - "loss": 1.0114, - "step": 8203 - }, - { - "epoch": 0.9864726748031023, - "grad_norm": 0.7546503614682215, - "learning_rate": 1.9026131125019495e-09, - "loss": 0.8294, - "step": 8204 - }, - { - "epoch": 0.9865929176937414, - "grad_norm": 1.9994466731445426, - "learning_rate": 1.8687948219371363e-09, - "loss": 1.0885, - "step": 8205 - }, - { - "epoch": 0.9867131605843804, - "grad_norm": 2.5551405572021526, - "learning_rate": 1.835279646287491e-09, - "loss": 1.1131, - "step": 8206 - }, - { - "epoch": 0.9868334034750196, - "grad_norm": 1.7172534322533577, - "learning_rate": 1.8020675906371685e-09, - "loss": 1.0021, - "step": 8207 - }, - { - "epoch": 0.9869536463656586, - "grad_norm": 1.8785584062355973, - "learning_rate": 1.7691586600243612e-09, - "loss": 0.9779, - "step": 8208 - }, - { - "epoch": 0.9870738892562977, - "grad_norm": 2.1811863930945696, - "learning_rate": 1.7365528594415202e-09, - "loss": 1.0851, - "step": 8209 - }, - { - "epoch": 0.9871941321469369, - "grad_norm": 1.6141694881878668, - "learning_rate": 1.7042501938346888e-09, - "loss": 0.9016, - "step": 8210 - }, - { - "epoch": 0.9873143750375759, - "grad_norm": 1.8965273065549748, - "learning_rate": 1.6722506681043913e-09, - "loss": 0.9988, - "step": 8211 - }, - { - "epoch": 0.987434617928215, - "grad_norm": 2.663969424235518, - "learning_rate": 1.640554287104745e-09, - "loss": 0.916, - "step": 8212 - }, - { - "epoch": 0.9875548608188541, - "grad_norm": 1.9745206275262386, - "learning_rate": 1.609161055644348e-09, - "loss": 1.0224, - "step": 8213 - }, - { - "epoch": 0.9876751037094932, - "grad_norm": 1.9836986921149675, - "learning_rate": 1.5780709784849467e-09, - "loss": 0.9065, - "step": 8214 - }, - { - "epoch": 0.9877953466001322, - "grad_norm": 2.1517682145452373, - "learning_rate": 1.5472840603436565e-09, - "loss": 1.0465, - "step": 8215 - }, - { - "epoch": 0.9879155894907714, - "grad_norm": 2.0051456061786483, - "learning_rate": 1.5168003058900757e-09, - "loss": 1.0257, - "step": 8216 - }, - { - "epoch": 0.9880358323814105, - "grad_norm": 2.1204369864749197, - "learning_rate": 1.4866197197491715e-09, - "loss": 1.1482, - "step": 8217 - }, - { - "epoch": 0.9881560752720495, - "grad_norm": 3.374884709402354, - "learning_rate": 1.4567423064988371e-09, - "loss": 1.0012, - "step": 8218 - }, - { - "epoch": 0.9882763181626887, - "grad_norm": 2.243725398786833, - "learning_rate": 1.4271680706718913e-09, - "loss": 1.0085, - "step": 8219 - }, - { - "epoch": 0.9883965610533277, - "grad_norm": 3.87563146552814, - "learning_rate": 1.3978970167543013e-09, - "loss": 1.0547, - "step": 8220 - }, - { - "epoch": 0.9885168039439668, - "grad_norm": 4.109991339235856, - "learning_rate": 1.3689291491867372e-09, - "loss": 1.0055, - "step": 8221 - }, - { - "epoch": 0.988637046834606, - "grad_norm": 1.8650200685745753, - "learning_rate": 1.3402644723636836e-09, - "loss": 0.9637, - "step": 8222 - }, - { - "epoch": 0.988757289725245, - "grad_norm": 1.801889736235687, - "learning_rate": 1.311902990633218e-09, - "loss": 1.0593, - "step": 8223 - }, - { - "epoch": 0.9888775326158841, - "grad_norm": 1.5963965488634535, - "learning_rate": 1.2838447082978987e-09, - "loss": 0.9415, - "step": 8224 - }, - { - "epoch": 0.9889977755065231, - "grad_norm": 6.543725177823474, - "learning_rate": 1.2560896296143208e-09, - "loss": 1.0569, - "step": 8225 - }, - { - "epoch": 0.9891180183971623, - "grad_norm": 2.128499795452643, - "learning_rate": 1.2286377587926722e-09, - "loss": 1.0482, - "step": 8226 - }, - { - "epoch": 0.9892382612878013, - "grad_norm": 2.040818667605089, - "learning_rate": 1.2014890999973992e-09, - "loss": 0.9786, - "step": 8227 - }, - { - "epoch": 0.9893585041784404, - "grad_norm": 1.7747573982253297, - "learning_rate": 1.1746436573472073e-09, - "loss": 1.0083, - "step": 8228 - }, - { - "epoch": 0.9894787470690796, - "grad_norm": 1.9765928364110648, - "learning_rate": 1.1481014349141726e-09, - "loss": 0.9256, - "step": 8229 - }, - { - "epoch": 0.9895989899597186, - "grad_norm": 2.0384015294558835, - "learning_rate": 1.121862436724852e-09, - "loss": 1.0676, - "step": 8230 - }, - { - "epoch": 0.9897192328503577, - "grad_norm": 2.234213869914005, - "learning_rate": 1.0959266667598388e-09, - "loss": 0.9412, - "step": 8231 - }, - { - "epoch": 0.9898394757409968, - "grad_norm": 2.263809634370555, - "learning_rate": 1.0702941289533196e-09, - "loss": 0.9722, - "step": 8232 - }, - { - "epoch": 0.9899597186316359, - "grad_norm": 2.6536351516603487, - "learning_rate": 1.0449648271939615e-09, - "loss": 1.109, - "step": 8233 - }, - { - "epoch": 0.990079961522275, - "grad_norm": 5.603435123539472, - "learning_rate": 1.0199387653240243e-09, - "loss": 0.9588, - "step": 8234 - }, - { - "epoch": 0.9902002044129141, - "grad_norm": 1.5571524856614667, - "learning_rate": 9.952159471400267e-10, - "loss": 0.9329, - "step": 8235 - }, - { - "epoch": 0.9903204473035532, - "grad_norm": 2.507765915102271, - "learning_rate": 9.707963763923022e-10, - "loss": 1.0627, - "step": 8236 - }, - { - "epoch": 0.9904406901941922, - "grad_norm": 2.449415183821807, - "learning_rate": 9.466800567854427e-10, - "loss": 1.0118, - "step": 8237 - }, - { - "epoch": 0.9905609330848314, - "grad_norm": 2.001094280334293, - "learning_rate": 9.228669919778553e-10, - "loss": 0.9133, - "step": 8238 - }, - { - "epoch": 0.9906811759754705, - "grad_norm": 2.1405218614666155, - "learning_rate": 8.993571855817617e-10, - "loss": 1.0202, - "step": 8239 - }, - { - "epoch": 0.9908014188661095, - "grad_norm": 1.5624372088213336, - "learning_rate": 8.761506411638642e-10, - "loss": 0.9686, - "step": 8240 - }, - { - "epoch": 0.9909216617567487, - "grad_norm": 1.9219769706656329, - "learning_rate": 8.53247362244236e-10, - "loss": 0.9718, - "step": 8241 - }, - { - "epoch": 0.9910419046473877, - "grad_norm": 1.593101294645325, - "learning_rate": 8.306473522976532e-10, - "loss": 0.9156, - "step": 8242 - }, - { - "epoch": 0.9911621475380268, - "grad_norm": 1.7501629344838756, - "learning_rate": 8.083506147522623e-10, - "loss": 0.9524, - "step": 8243 - }, - { - "epoch": 0.991282390428666, - "grad_norm": 2.8113037956386124, - "learning_rate": 7.863571529906909e-10, - "loss": 1.0824, - "step": 8244 - }, - { - "epoch": 0.991402633319305, - "grad_norm": 0.7872007709761077, - "learning_rate": 7.646669703489372e-10, - "loss": 0.8824, - "step": 8245 - }, - { - "epoch": 0.9915228762099441, - "grad_norm": 1.7414767607442485, - "learning_rate": 7.432800701177023e-10, - "loss": 0.8064, - "step": 8246 - }, - { - "epoch": 0.9916431191005832, - "grad_norm": 0.8529802821452956, - "learning_rate": 7.221964555415017e-10, - "loss": 0.8347, - "step": 8247 - }, - { - "epoch": 0.9917633619912223, - "grad_norm": 2.355535756133898, - "learning_rate": 7.01416129818222e-10, - "loss": 0.978, - "step": 8248 - }, - { - "epoch": 0.9918836048818613, - "grad_norm": 12.59640255389628, - "learning_rate": 6.809390961006745e-10, - "loss": 0.8149, - "step": 8249 - }, - { - "epoch": 0.9920038477725005, - "grad_norm": 1.6911179647057555, - "learning_rate": 6.607653574948191e-10, - "loss": 0.9196, - "step": 8250 - }, - { - "epoch": 0.9921240906631396, - "grad_norm": 1.7758680987822437, - "learning_rate": 6.408949170613187e-10, - "loss": 1.0456, - "step": 8251 - }, - { - "epoch": 0.9922443335537786, - "grad_norm": 1.6265234041966363, - "learning_rate": 6.213277778144288e-10, - "loss": 1.0503, - "step": 8252 - }, - { - "epoch": 0.9923645764444178, - "grad_norm": 2.0837877032387895, - "learning_rate": 6.020639427224416e-10, - "loss": 0.8997, - "step": 8253 - }, - { - "epoch": 0.9924848193350568, - "grad_norm": 1.9293171144927255, - "learning_rate": 5.831034147076864e-10, - "loss": 0.9594, - "step": 8254 - }, - { - "epoch": 0.9926050622256959, - "grad_norm": 0.7174260758654267, - "learning_rate": 5.644461966463065e-10, - "loss": 0.8069, - "step": 8255 - }, - { - "epoch": 0.9927253051163349, - "grad_norm": 1.9937009560224115, - "learning_rate": 5.460922913687049e-10, - "loss": 0.9864, - "step": 8256 - }, - { - "epoch": 0.9928455480069741, - "grad_norm": 1.8561892104550564, - "learning_rate": 5.280417016593208e-10, - "loss": 0.9789, - "step": 8257 - }, - { - "epoch": 0.9929657908976132, - "grad_norm": 1.6386522330560742, - "learning_rate": 5.102944302559642e-10, - "loss": 0.9821, - "step": 8258 - }, - { - "epoch": 0.9930860337882522, - "grad_norm": 2.36673825473597, - "learning_rate": 4.9285047985137e-10, - "loss": 1.0167, - "step": 8259 - }, - { - "epoch": 0.9932062766788914, - "grad_norm": 1.5117840420129651, - "learning_rate": 4.757098530916436e-10, - "loss": 0.9764, - "step": 8260 - }, - { - "epoch": 0.9933265195695304, - "grad_norm": 2.686328455159615, - "learning_rate": 4.5887255257670563e-10, - "loss": 1.015, - "step": 8261 - }, - { - "epoch": 0.9934467624601695, - "grad_norm": 2.2532127543170533, - "learning_rate": 4.4233858086117906e-10, - "loss": 0.998, - "step": 8262 - }, - { - "epoch": 0.9935670053508087, - "grad_norm": 3.519223147845424, - "learning_rate": 4.261079404528356e-10, - "loss": 0.9117, - "step": 8263 - }, - { - "epoch": 0.9936872482414477, - "grad_norm": 1.7826010281902556, - "learning_rate": 4.1018063381437205e-10, - "loss": 0.9141, - "step": 8264 - }, - { - "epoch": 0.9938074911320868, - "grad_norm": 0.9619597293450765, - "learning_rate": 3.9455666336141167e-10, - "loss": 0.8938, - "step": 8265 - }, - { - "epoch": 0.9939277340227259, - "grad_norm": 2.536294690335803, - "learning_rate": 3.7923603146450267e-10, - "loss": 1.0468, - "step": 8266 - }, - { - "epoch": 0.994047976913365, - "grad_norm": 1.9139056705919633, - "learning_rate": 3.642187404473418e-10, - "loss": 1.0299, - "step": 8267 - }, - { - "epoch": 0.994168219804004, - "grad_norm": 4.246661726401274, - "learning_rate": 3.495047925885508e-10, - "loss": 1.0834, - "step": 8268 - }, - { - "epoch": 0.9942884626946432, - "grad_norm": 2.1810015474057733, - "learning_rate": 3.350941901199e-10, - "loss": 1.0682, - "step": 8269 - }, - { - "epoch": 0.9944087055852823, - "grad_norm": 2.5561275797958745, - "learning_rate": 3.2098693522764066e-10, - "loss": 1.0591, - "step": 8270 - }, - { - "epoch": 0.9945289484759213, - "grad_norm": 1.9479656587311351, - "learning_rate": 3.071830300516165e-10, - "loss": 1.0486, - "step": 8271 - }, - { - "epoch": 0.9946491913665605, - "grad_norm": 1.851549237038798, - "learning_rate": 2.9368247668615234e-10, - "loss": 0.9365, - "step": 8272 - }, - { - "epoch": 0.9947694342571995, - "grad_norm": 4.4081434145269265, - "learning_rate": 2.804852771789434e-10, - "loss": 0.8514, - "step": 8273 - }, - { - "epoch": 0.9948896771478386, - "grad_norm": 1.694908902693825, - "learning_rate": 2.675914335321661e-10, - "loss": 0.7876, - "step": 8274 - }, - { - "epoch": 0.9950099200384778, - "grad_norm": 2.6200252805465687, - "learning_rate": 2.550009477018111e-10, - "loss": 1.0189, - "step": 8275 - }, - { - "epoch": 0.9951301629291168, - "grad_norm": 1.7580319415978263, - "learning_rate": 2.4271382159790634e-10, - "loss": 0.8583, - "step": 8276 - }, - { - "epoch": 0.9952504058197559, - "grad_norm": 2.0470030067544758, - "learning_rate": 2.3073005708429406e-10, - "loss": 1.089, - "step": 8277 - }, - { - "epoch": 0.995370648710395, - "grad_norm": 1.747273228110903, - "learning_rate": 2.190496559788535e-10, - "loss": 0.9454, - "step": 8278 - }, - { - "epoch": 0.9954908916010341, - "grad_norm": 3.801223713446175, - "learning_rate": 2.0767262005372265e-10, - "loss": 0.9926, - "step": 8279 - }, - { - "epoch": 0.9956111344916732, - "grad_norm": 2.110331678671945, - "learning_rate": 1.965989510346322e-10, - "loss": 0.9794, - "step": 8280 - }, - { - "epoch": 0.9957313773823123, - "grad_norm": 1.876971289381084, - "learning_rate": 1.8582865060134955e-10, - "loss": 0.9343, - "step": 8281 - }, - { - "epoch": 0.9958516202729514, - "grad_norm": 0.7968798244554671, - "learning_rate": 1.7536172038790098e-10, - "loss": 0.8208, - "step": 8282 - }, - { - "epoch": 0.9959718631635904, - "grad_norm": 2.205470358577389, - "learning_rate": 1.651981619819054e-10, - "loss": 0.9272, - "step": 8283 - }, - { - "epoch": 0.9960921060542296, - "grad_norm": 2.316884086432595, - "learning_rate": 1.5533797692546257e-10, - "loss": 0.9275, - "step": 8284 - }, - { - "epoch": 0.9962123489448687, - "grad_norm": 1.917476116862263, - "learning_rate": 1.4578116671404296e-10, - "loss": 1.0618, - "step": 8285 - }, - { - "epoch": 0.9963325918355077, - "grad_norm": 3.9094479097417802, - "learning_rate": 1.3652773279759777e-10, - "loss": 0.9466, - "step": 8286 - }, - { - "epoch": 0.9964528347261468, - "grad_norm": 2.9405096341568644, - "learning_rate": 1.2757767657989305e-10, - "loss": 0.8532, - "step": 8287 - }, - { - "epoch": 0.9965730776167859, - "grad_norm": 3.2907029362765847, - "learning_rate": 1.1893099941850948e-10, - "loss": 1.0934, - "step": 8288 - }, - { - "epoch": 0.996693320507425, - "grad_norm": 2.171549820741491, - "learning_rate": 1.105877026252866e-10, - "loss": 1.0041, - "step": 8289 - }, - { - "epoch": 0.996813563398064, - "grad_norm": 2.7772890667689394, - "learning_rate": 1.0254778746565663e-10, - "loss": 0.95, - "step": 8290 - }, - { - "epoch": 0.9969338062887032, - "grad_norm": 2.3063233857509804, - "learning_rate": 9.481125515953259e-11, - "loss": 0.9613, - "step": 8291 - }, - { - "epoch": 0.9970540491793423, - "grad_norm": 1.5420823267174382, - "learning_rate": 8.737810688064228e-11, - "loss": 1.0282, - "step": 8292 - }, - { - "epoch": 0.9971742920699813, - "grad_norm": 3.421265012117899, - "learning_rate": 8.024834375608414e-11, - "loss": 1.0182, - "step": 8293 - }, - { - "epoch": 0.9972945349606205, - "grad_norm": 0.837426081284825, - "learning_rate": 7.342196686788149e-11, - "loss": 0.8943, - "step": 8294 - }, - { - "epoch": 0.9974147778512595, - "grad_norm": 2.2817012392837763, - "learning_rate": 6.689897725142834e-11, - "loss": 0.9134, - "step": 8295 - }, - { - "epoch": 0.9975350207418986, - "grad_norm": 2.0232953938546534, - "learning_rate": 6.067937589615545e-11, - "loss": 1.1079, - "step": 8296 - }, - { - "epoch": 0.9976552636325378, - "grad_norm": 0.7604255458017275, - "learning_rate": 5.476316374575241e-11, - "loss": 0.8148, - "step": 8297 - }, - { - "epoch": 0.9977755065231768, - "grad_norm": 2.5271450713421433, - "learning_rate": 4.9150341697723476e-11, - "loss": 0.9572, - "step": 8298 - }, - { - "epoch": 0.9978957494138159, - "grad_norm": 1.7370835882442754, - "learning_rate": 4.384091060338768e-11, - "loss": 0.892, - "step": 8299 - }, - { - "epoch": 0.998015992304455, - "grad_norm": 3.2484766984843376, - "learning_rate": 3.883487126810081e-11, - "loss": 0.9679, - "step": 8300 - }, - { - "epoch": 0.9981362351950941, - "grad_norm": 1.771263302232842, - "learning_rate": 3.41322244516995e-11, - "loss": 1.0279, - "step": 8301 - }, - { - "epoch": 0.9982564780857331, - "grad_norm": 1.8056203569700466, - "learning_rate": 2.9732970866946925e-11, - "loss": 0.8547, - "step": 8302 - }, - { - "epoch": 0.9983767209763723, - "grad_norm": 2.386346216674903, - "learning_rate": 2.563711118175327e-11, - "loss": 1.0128, - "step": 8303 - }, - { - "epoch": 0.9984969638670114, - "grad_norm": 1.7715106192498509, - "learning_rate": 2.184464601717728e-11, - "loss": 1.0617, - "step": 8304 - }, - { - "epoch": 0.9986172067576504, - "grad_norm": 2.41651520309228, - "learning_rate": 1.8355575948758585e-11, - "loss": 1.0031, - "step": 8305 - }, - { - "epoch": 0.9987374496482896, - "grad_norm": 2.021252722988996, - "learning_rate": 1.5169901505407424e-11, - "loss": 0.9638, - "step": 8306 - }, - { - "epoch": 0.9988576925389286, - "grad_norm": 1.6996147308230154, - "learning_rate": 1.228762317073695e-11, - "loss": 0.9639, - "step": 8307 - }, - { - "epoch": 0.9989779354295677, - "grad_norm": 2.424255382618805, - "learning_rate": 9.70874138195299e-12, - "loss": 1.0135, - "step": 8308 - }, - { - "epoch": 0.9990981783202069, - "grad_norm": 1.6701719933302341, - "learning_rate": 7.433256530076093e-12, - "loss": 0.9728, - "step": 8309 - }, - { - "epoch": 0.9992184212108459, - "grad_norm": 2.280360113616316, - "learning_rate": 5.46116896038562e-12, - "loss": 0.9816, - "step": 8310 - }, - { - "epoch": 0.999338664101485, - "grad_norm": 2.3534284013595843, - "learning_rate": 3.792478972197699e-12, - "loss": 0.8507, - "step": 8311 - }, - { - "epoch": 0.9994589069921241, - "grad_norm": 2.7930689653677176, - "learning_rate": 2.4271868181990895e-12, - "loss": 0.92, - "step": 8312 - }, - { - "epoch": 0.9995791498827632, - "grad_norm": 2.5842803531830008, - "learning_rate": 1.3652927060014973e-12, - "loss": 1.0358, - "step": 8313 - }, - { - "epoch": 0.9996993927734023, - "grad_norm": 2.085862290530262, - "learning_rate": 6.067967965872612e-13, - "loss": 0.876, - "step": 8314 - }, - { - "epoch": 0.9998196356640414, - "grad_norm": 1.4265698068187223, - "learning_rate": 1.5169920497548615e-13, - "loss": 0.9972, - "step": 8315 - }, - { - "epoch": 0.9999398785546805, - "grad_norm": 1.1359162529344868, - "learning_rate": 0.0, - "loss": 0.8094, - "step": 8316 - }, - { - "epoch": 0.9999398785546805, - "step": 8316, - "total_flos": 6.68648365344424e+17, - "train_loss": 0.997480898106425, - "train_runtime": 144423.7188, - "train_samples_per_second": 2.303, - "train_steps_per_second": 0.058 - } - ], - "logging_steps": 1.0, - "max_steps": 8316, - "num_input_tokens_seen": 0, - "num_train_epochs": 1, - "save_steps": 100, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": true - }, - "attributes": {} - } - }, - "total_flos": 6.68648365344424e+17, - "train_batch_size": 5, - "trial_name": null, - "trial_params": null -} diff --git a/sft/smoe_cosinegating/training_args.bin b/sft/smoe_cosinegating/training_args.bin deleted file mode 100644 index 434886ec6eaf8e5a0b21a427801047926150b44c..0000000000000000000000000000000000000000 --- a/sft/smoe_cosinegating/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:60cec9748462c83f1ad285ec43d62e04acb5bd54d2e44a1d755485e7a9d09ea8 -size 8184 diff --git a/sft/smoe_perturbed/added_tokens.json b/sft/smoe_perturbed/added_tokens.json deleted file mode 100644 index c9d3d3a1b74d87e381e471f7b33784015d2dc0ea..0000000000000000000000000000000000000000 --- a/sft/smoe_perturbed/added_tokens.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "<|assistant|>": 32001, - "<|endoftext|>": 32000, - "<|end|>": 32007, - "<|placeholder1|>": 32002, - "<|placeholder2|>": 32003, - "<|placeholder3|>": 32004, - "<|placeholder4|>": 32005, - "<|placeholder5|>": 32008, - "<|placeholder6|>": 32009, - "<|system|>": 32006, - "<|user|>": 32010 -} diff --git a/sft/smoe_perturbed/config.json b/sft/smoe_perturbed/config.json deleted file mode 100644 index 71967cad9154bb69ffd649bf93c02f375e46ade7..0000000000000000000000000000000000000000 --- a/sft/smoe_perturbed/config.json +++ /dev/null @@ -1,66 +0,0 @@ -{ - "_name_or_path": "/cm/archive/thongdt4/toolkitmoe/checkpoints/phi3mini-siglip224/pft", - "architectures": [ - "LlavaPhiForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" - }, - "balance_loss_coef": 0.1, - "bos_token_id": 1, - "clip_smoe": true, - "dropout": false, - "embd_pdrop": 0.0, - "eos_token_id": 32000, - "freeze_mm_mlp_adapter": false, - "hidden_act": "silu", - "hidden_size": 3072, - "image_aspect_ratio": "pad", - "initializer_range": 0.02, - "intermediate_size": 8192, - "local_rank": 0, - "max_position_embeddings": 4096, - "mlp_smoe": true, - "mm_hidden_size": 1152, - "mm_patch_merge_type": "flat", - "mm_projector_lr": null, - "mm_projector_type": "moe", - "mm_use_im_patch_token": false, - "mm_use_im_start_end": false, - "mm_vision_select_feature": "patch", - "mm_vision_select_layer": -2, - "mm_vision_tower": "google/siglip-so400m-patch14-224", - "model_type": "llava_phi", - "moe_name": "smoe_perturbed", - "num_attention_heads": 32, - "num_experts": 2, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "num_layers": 3, - "num_selected": 2, - "original_max_position_embeddings": 4096, - "pad_token_id": 32000, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "router_z_loss_coef": 0.01, - "scales": [ - 1, - 3 - ], - "sliding_window": 2047, - "tie_word_embeddings": false, - "tokenizer_model_max_length": 2048, - "tokenizer_padding_side": "right", - "torch_dtype": "bfloat16", - "training": true, - "transformers_version": "4.43.2", - "tune_mm_mlp_adapter": false, - "use_cache": true, - "use_mm_proj": true, - "vocab_size": 32064 -} diff --git a/sft/smoe_perturbed/generation_config.json b/sft/smoe_perturbed/generation_config.json deleted file mode 100644 index 3a20824ea777f1ebd11da590160a7209fe3b62c6..0000000000000000000000000000000000000000 --- a/sft/smoe_perturbed/generation_config.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "_from_model_config": true, - "bos_token_id": 1, - "do_sample": true, - "eos_token_id": [ - 32000, - 32001, - 32007 - ], - "pad_token_id": 32000, - "transformers_version": "4.43.2" -} diff --git a/sft/smoe_perturbed/model-00001-of-00003.safetensors b/sft/smoe_perturbed/model-00001-of-00003.safetensors deleted file mode 100644 index 3a48a088e35cc6c23b3b2a92f9dc28d56e0779b1..0000000000000000000000000000000000000000 --- a/sft/smoe_perturbed/model-00001-of-00003.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:915e651ce990462da7bd2d15c47fbf7141493b9056075cdd87e9933faf29ce88 -size 4972489328 diff --git a/sft/smoe_perturbed/model-00002-of-00003.safetensors b/sft/smoe_perturbed/model-00002-of-00003.safetensors deleted file mode 100644 index 28e738a6a3e1b2a33e30fc125db2e02dd08cc494..0000000000000000000000000000000000000000 --- a/sft/smoe_perturbed/model-00002-of-00003.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9a9941e4a220b4474c69c45c86b21a19feb2b12065416612d3f25c4a8ccd8044 -size 4985533608 diff --git a/sft/smoe_perturbed/model-00003-of-00003.safetensors b/sft/smoe_perturbed/model-00003-of-00003.safetensors deleted file mode 100644 index 49990a2573c26a6e753f88f3d6c8129891fc6f21..0000000000000000000000000000000000000000 --- a/sft/smoe_perturbed/model-00003-of-00003.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9e1b22a19161e149a6276cb8cd7c5b2fd1fc3cbe86decb5523a7703a9ac029e8 -size 248943664 diff --git a/sft/smoe_perturbed/model.safetensors.index.json b/sft/smoe_perturbed/model.safetensors.index.json deleted file mode 100644 index f5e0d563e520320e7e1cb47747945b2591e60790..0000000000000000000000000000000000000000 --- a/sft/smoe_perturbed/model.safetensors.index.json +++ /dev/null @@ -1,1033 +0,0 @@ -{ - "metadata": { - "total_size": 10206819680 - }, - "weight_map": { - "lm_head.weight": "model-00003-of-00003.safetensors", - "model.embed_tokens.weight": "model-00001-of-00003.safetensors", - "model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.0.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.0.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.1.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.1.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.10.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.10.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.10.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.10.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.11.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.11.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.11.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.11.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.12.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.12.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.12.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.12.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.13.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.13.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.13.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.13.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.14.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.14.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.14.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.14.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.15.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.15.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.15.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.15.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.16.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.16.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.16.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.16.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.17.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.17.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.17.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.17.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.18.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.18.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.18.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.18.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.19.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.19.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.19.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.19.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.2.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.2.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.20.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.20.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.20.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.20.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.20.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.21.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.21.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.21.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.22.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.22.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.22.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.22.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.23.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.23.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.23.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.23.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.24.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.24.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.24.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.24.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.25.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.25.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.25.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.25.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.26.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.26.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.26.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.26.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.27.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.27.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.27.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.27.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.28.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.28.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.28.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.28.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.28.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.28.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.29.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.29.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.29.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.29.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.29.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.29.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.3.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.3.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.30.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.30.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.30.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.30.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.30.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.30.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.31.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.31.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.31.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.31.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.31.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.31.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.4.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.4.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.5.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.5.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.6.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.6.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.7.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.7.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.8.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.8.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.9.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.9.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.mm_projector.moelayer.experts.0.0.bias": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.0.0.weight": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.0.2.bias": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.0.2.weight": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.1.0.bias": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.1.0.weight": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.1.2.bias": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.1.2.weight": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.2.0.bias": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.2.0.weight": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.2.2.bias": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.2.2.weight": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.3.0.bias": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.3.0.weight": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.3.2.bias": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.3.2.weight": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.gate.bias": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.gate.weight": "model-00003-of-00003.safetensors", - "model.norm.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.embeddings.patch_embedding.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.embeddings.patch_embedding.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.embeddings.position_embedding.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00002-of-00003.safetensors" - } -} diff --git a/sft/smoe_perturbed/special_tokens_map.json b/sft/smoe_perturbed/special_tokens_map.json deleted file mode 100644 index 3e4d5a5bc1cb51753cc9ae0305ece0da60052b10..0000000000000000000000000000000000000000 --- a/sft/smoe_perturbed/special_tokens_map.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "bos_token": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - }, - "eos_token": { - "content": "<|endoftext|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - }, - "pad_token": "", - "unk_token": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - } -} diff --git a/sft/smoe_perturbed/tokenizer.model b/sft/smoe_perturbed/tokenizer.model deleted file mode 100644 index 6c00c742ce03c627d6cd5b795984876fa49fa899..0000000000000000000000000000000000000000 --- a/sft/smoe_perturbed/tokenizer.model +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 -size 499723 diff --git a/sft/smoe_perturbed/tokenizer_config.json b/sft/smoe_perturbed/tokenizer_config.json deleted file mode 100644 index 3bd56c6314b14d6a33a69cd1802e04dbc1e47840..0000000000000000000000000000000000000000 --- a/sft/smoe_perturbed/tokenizer_config.json +++ /dev/null @@ -1,132 +0,0 @@ -{ - "add_bos_token": true, - "add_eos_token": false, - "add_prefix_space": true, - "added_tokens_decoder": { - "0": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "1": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "2": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": false - }, - "32000": { - "content": "<|endoftext|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "32001": { - "content": "<|assistant|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32002": { - "content": "<|placeholder1|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32003": { - "content": "<|placeholder2|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32004": { - "content": "<|placeholder3|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32005": { - "content": "<|placeholder4|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32006": { - "content": "<|system|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32007": { - "content": "<|end|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32008": { - "content": "<|placeholder5|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32009": { - "content": "<|placeholder6|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32010": { - "content": "<|user|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - } - }, - "bos_token": "", - "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}", - "clean_up_tokenization_spaces": false, - "eos_token": "<|endoftext|>", - "legacy": false, - "model_max_length": 2048, - "pad_token": "", - "padding_side": "right", - "sp_model_kwargs": {}, - "spaces_between_special_tokens": false, - "tokenizer_class": "LlamaTokenizer", - "unk_token": "", - "use_default_system_prompt": false -} diff --git a/sft/smoe_perturbed/trainer_state.json b/sft/smoe_perturbed/trainer_state.json deleted file mode 100644 index c63a19bd2b9f588379ccd875a604d5c00a794d0c..0000000000000000000000000000000000000000 --- a/sft/smoe_perturbed/trainer_state.json +++ /dev/null @@ -1,58254 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.9999398785546805, - "eval_steps": 500, - "global_step": 8316, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.00012024289063909097, - "grad_norm": 16.23420017651297, - "learning_rate": 0.0, - "loss": 1.6383, - "step": 1 - }, - { - "epoch": 0.00024048578127818193, - "grad_norm": 16.35627712878673, - "learning_rate": 5.021476677069823e-07, - "loss": 1.6062, - "step": 2 - }, - { - "epoch": 0.0003607286719172729, - "grad_norm": 11.857923577190908, - "learning_rate": 7.958852231401551e-07, - "loss": 1.4722, - "step": 3 - }, - { - "epoch": 0.00048097156255636386, - "grad_norm": 12.431550869913616, - "learning_rate": 1.0042953354139647e-06, - "loss": 1.5413, - "step": 4 - }, - { - "epoch": 0.0006012144531954548, - "grad_norm": 14.744304527213583, - "learning_rate": 1.1659507774310057e-06, - "loss": 1.6139, - "step": 5 - }, - { - "epoch": 0.0007214573438345458, - "grad_norm": 14.21573199426651, - "learning_rate": 1.2980328908471373e-06, - "loss": 1.4683, - "step": 6 - }, - { - "epoch": 0.0008417002344736367, - "grad_norm": 2.9108263306779683, - "learning_rate": 1.4097067265369432e-06, - "loss": 1.017, - "step": 7 - }, - { - "epoch": 0.0009619431251127277, - "grad_norm": 15.176765572968304, - "learning_rate": 1.506443003120947e-06, - "loss": 1.4277, - "step": 8 - }, - { - "epoch": 0.0010821860157518186, - "grad_norm": 7.9315249564485155, - "learning_rate": 1.5917704462803102e-06, - "loss": 1.5337, - "step": 9 - }, - { - "epoch": 0.0012024289063909096, - "grad_norm": 8.138798660744515, - "learning_rate": 1.6680984451379884e-06, - "loss": 1.5649, - "step": 10 - }, - { - "epoch": 0.0013226717970300007, - "grad_norm": 6.076279230006292, - "learning_rate": 1.7371455188905097e-06, - "loss": 1.3691, - "step": 11 - }, - { - "epoch": 0.0014429146876690916, - "grad_norm": 4.642536835573124, - "learning_rate": 1.8001805585541196e-06, - "loss": 1.353, - "step": 12 - }, - { - "epoch": 0.0015631575783081825, - "grad_norm": 4.333268507448113, - "learning_rate": 1.8581671739548328e-06, - "loss": 1.4142, - "step": 13 - }, - { - "epoch": 0.0016834004689472734, - "grad_norm": 3.753469261115725, - "learning_rate": 1.9118543942439254e-06, - "loss": 1.2723, - "step": 14 - }, - { - "epoch": 0.0018036433595863645, - "grad_norm": 3.716785200364284, - "learning_rate": 1.961836000571161e-06, - "loss": 1.2803, - "step": 15 - }, - { - "epoch": 0.0019238862502254555, - "grad_norm": 3.032969508167793, - "learning_rate": 2.0085906708279293e-06, - "loss": 0.8763, - "step": 16 - }, - { - "epoch": 0.0020441291408645466, - "grad_norm": 2.8130106144558034, - "learning_rate": 2.0525099325728135e-06, - "loss": 1.3184, - "step": 17 - }, - { - "epoch": 0.0021643720315036373, - "grad_norm": 2.958181422098686, - "learning_rate": 2.0939181139872922e-06, - "loss": 0.9562, - "step": 18 - }, - { - "epoch": 0.0022846149221427284, - "grad_norm": 3.1782715290090264, - "learning_rate": 2.1330868934640175e-06, - "loss": 1.1641, - "step": 19 - }, - { - "epoch": 0.002404857812781819, - "grad_norm": 2.7238758043049307, - "learning_rate": 2.170246112844971e-06, - "loss": 1.0068, - "step": 20 - }, - { - "epoch": 0.0025251007034209102, - "grad_norm": 3.114962823685756, - "learning_rate": 2.2055919496770983e-06, - "loss": 1.2016, - "step": 21 - }, - { - "epoch": 0.0026453435940600014, - "grad_norm": 2.447726292343117, - "learning_rate": 2.2392931865974923e-06, - "loss": 1.0927, - "step": 22 - }, - { - "epoch": 0.002765586484699092, - "grad_norm": 2.5329930461753456, - "learning_rate": 2.271496085962064e-06, - "loss": 1.207, - "step": 23 - }, - { - "epoch": 0.002885829375338183, - "grad_norm": 2.668299552549569, - "learning_rate": 2.3023282262611022e-06, - "loss": 1.2334, - "step": 24 - }, - { - "epoch": 0.003006072265977274, - "grad_norm": 3.3582563090109305, - "learning_rate": 2.3319015548620114e-06, - "loss": 1.1286, - "step": 25 - }, - { - "epoch": 0.003126315156616365, - "grad_norm": 1.887484882622615, - "learning_rate": 2.3603148416618152e-06, - "loss": 1.129, - "step": 26 - }, - { - "epoch": 0.003246558047255456, - "grad_norm": 2.294381273594122, - "learning_rate": 2.3876556694204647e-06, - "loss": 1.2086, - "step": 27 - }, - { - "epoch": 0.003366800937894547, - "grad_norm": 2.600894143337755, - "learning_rate": 2.414002061950908e-06, - "loss": 1.1034, - "step": 28 - }, - { - "epoch": 0.003487043828533638, - "grad_norm": 2.574845326046589, - "learning_rate": 2.4394238264681557e-06, - "loss": 1.2016, - "step": 29 - }, - { - "epoch": 0.003607286719172729, - "grad_norm": 1.6451475026284612, - "learning_rate": 2.4639836682781433e-06, - "loss": 1.1989, - "step": 30 - }, - { - "epoch": 0.00372752960981182, - "grad_norm": 2.194235866897927, - "learning_rate": 2.487738122623307e-06, - "loss": 1.2023, - "step": 31 - }, - { - "epoch": 0.003847772500450911, - "grad_norm": 1.9573032700250694, - "learning_rate": 2.510738338534912e-06, - "loss": 1.1768, - "step": 32 - }, - { - "epoch": 0.003968015391090002, - "grad_norm": 3.248651371420489, - "learning_rate": 2.5330307420306648e-06, - "loss": 1.2268, - "step": 33 - }, - { - "epoch": 0.004088258281729093, - "grad_norm": 1.7479530351907118, - "learning_rate": 2.554657600279796e-06, - "loss": 1.0766, - "step": 34 - }, - { - "epoch": 0.004208501172368184, - "grad_norm": 1.9467609329239959, - "learning_rate": 2.5756575039679493e-06, - "loss": 1.2461, - "step": 35 - }, - { - "epoch": 0.0043287440630072746, - "grad_norm": 1.921539029164015, - "learning_rate": 2.5960657816942747e-06, - "loss": 1.1579, - "step": 36 - }, - { - "epoch": 0.004448986953646365, - "grad_norm": 1.269991137030606, - "learning_rate": 2.6159148575788668e-06, - "loss": 0.8765, - "step": 37 - }, - { - "epoch": 0.004569229844285457, - "grad_norm": 2.772660525381275, - "learning_rate": 2.635234561171e-06, - "loss": 1.1991, - "step": 38 - }, - { - "epoch": 0.0046894727349245475, - "grad_norm": 2.193696754439741, - "learning_rate": 2.6540523970949877e-06, - "loss": 1.1498, - "step": 39 - }, - { - "epoch": 0.004809715625563638, - "grad_norm": 2.3956352041391935, - "learning_rate": 2.6723937805519533e-06, - "loss": 1.1376, - "step": 40 - }, - { - "epoch": 0.00492995851620273, - "grad_norm": 2.2089686169724194, - "learning_rate": 2.690282243737839e-06, - "loss": 1.149, - "step": 41 - }, - { - "epoch": 0.0050502014068418205, - "grad_norm": 2.234425257404354, - "learning_rate": 2.7077396173840807e-06, - "loss": 1.199, - "step": 42 - }, - { - "epoch": 0.005170444297480911, - "grad_norm": 5.658552681103554, - "learning_rate": 2.7247861909342594e-06, - "loss": 1.1389, - "step": 43 - }, - { - "epoch": 0.005290687188120003, - "grad_norm": 2.1108036430832025, - "learning_rate": 2.7414408543044743e-06, - "loss": 1.0341, - "step": 44 - }, - { - "epoch": 0.005410930078759093, - "grad_norm": 5.341932199497054, - "learning_rate": 2.7577212237113157e-06, - "loss": 1.0082, - "step": 45 - }, - { - "epoch": 0.005531172969398184, - "grad_norm": 2.578824497465232, - "learning_rate": 2.7736437536690466e-06, - "loss": 1.2563, - "step": 46 - }, - { - "epoch": 0.005651415860037276, - "grad_norm": 2.635389204724034, - "learning_rate": 2.789223836941131e-06, - "loss": 1.2916, - "step": 47 - }, - { - "epoch": 0.005771658750676366, - "grad_norm": 2.575319740665729, - "learning_rate": 2.8044758939680847e-06, - "loss": 1.2873, - "step": 48 - }, - { - "epoch": 0.005891901641315457, - "grad_norm": 2.5191016962134496, - "learning_rate": 2.8194134530738863e-06, - "loss": 1.2214, - "step": 49 - }, - { - "epoch": 0.006012144531954548, - "grad_norm": 2.616384810151687, - "learning_rate": 2.834049222568994e-06, - "loss": 1.1128, - "step": 50 - }, - { - "epoch": 0.006132387422593639, - "grad_norm": 1.9570972789353902, - "learning_rate": 2.848395155712969e-06, - "loss": 1.1442, - "step": 51 - }, - { - "epoch": 0.00625263031323273, - "grad_norm": 2.085880121592042, - "learning_rate": 2.8624625093687977e-06, - "loss": 1.2037, - "step": 52 - }, - { - "epoch": 0.006372873203871821, - "grad_norm": 2.072900270432335, - "learning_rate": 2.876261897070029e-06, - "loss": 1.0992, - "step": 53 - }, - { - "epoch": 0.006493116094510912, - "grad_norm": 2.3332205699739177, - "learning_rate": 2.889803337127447e-06, - "loss": 1.141, - "step": 54 - }, - { - "epoch": 0.006613358985150003, - "grad_norm": 3.1837700356196383, - "learning_rate": 2.903096296321516e-06, - "loss": 1.0678, - "step": 55 - }, - { - "epoch": 0.006733601875789094, - "grad_norm": 1.9299168624167349, - "learning_rate": 2.9161497296578907e-06, - "loss": 1.1409, - "step": 56 - }, - { - "epoch": 0.006853844766428185, - "grad_norm": 2.1702987110202487, - "learning_rate": 2.928972116604173e-06, - "loss": 1.077, - "step": 57 - }, - { - "epoch": 0.006974087657067276, - "grad_norm": 2.5953367044972593, - "learning_rate": 2.9415714941751377e-06, - "loss": 1.2287, - "step": 58 - }, - { - "epoch": 0.007094330547706367, - "grad_norm": 2.0033091348888714, - "learning_rate": 2.9539554871897396e-06, - "loss": 1.1544, - "step": 59 - }, - { - "epoch": 0.007214573438345458, - "grad_norm": 1.9408429746714553, - "learning_rate": 2.9661313359851253e-06, - "loss": 1.195, - "step": 60 - }, - { - "epoch": 0.007334816328984549, - "grad_norm": 1.7834758570870815, - "learning_rate": 2.978105921839922e-06, - "loss": 1.1673, - "step": 61 - }, - { - "epoch": 0.00745505921962364, - "grad_norm": 3.423559462282592, - "learning_rate": 2.9898857903302893e-06, - "loss": 0.9608, - "step": 62 - }, - { - "epoch": 0.007575302110262731, - "grad_norm": 6.102519606230986, - "learning_rate": 3.001477172817253e-06, - "loss": 1.104, - "step": 63 - }, - { - "epoch": 0.007695545000901822, - "grad_norm": 2.0816926090323125, - "learning_rate": 3.012886006241894e-06, - "loss": 1.1855, - "step": 64 - }, - { - "epoch": 0.007815787891540913, - "grad_norm": 1.9338780666127529, - "learning_rate": 3.0241179513858383e-06, - "loss": 1.1185, - "step": 65 - }, - { - "epoch": 0.007936030782180003, - "grad_norm": 3.2487539690440386, - "learning_rate": 3.035178409737647e-06, - "loss": 1.1101, - "step": 66 - }, - { - "epoch": 0.008056273672819095, - "grad_norm": 1.9352271992964363, - "learning_rate": 3.046072539090907e-06, - "loss": 1.1099, - "step": 67 - }, - { - "epoch": 0.008176516563458186, - "grad_norm": 2.785647655221189, - "learning_rate": 3.056805267986779e-06, - "loss": 1.2684, - "step": 68 - }, - { - "epoch": 0.008296759454097276, - "grad_norm": 2.1091249670967946, - "learning_rate": 3.0673813091022194e-06, - "loss": 1.1716, - "step": 69 - }, - { - "epoch": 0.008417002344736368, - "grad_norm": 1.2322693744017832, - "learning_rate": 3.0778051716749317e-06, - "loss": 0.8978, - "step": 70 - }, - { - "epoch": 0.008537245235375458, - "grad_norm": 3.136628146068479, - "learning_rate": 3.0880811730470094e-06, - "loss": 1.1375, - "step": 71 - }, - { - "epoch": 0.008657488126014549, - "grad_norm": 1.1295932672435045, - "learning_rate": 3.098213449401257e-06, - "loss": 0.8498, - "step": 72 - }, - { - "epoch": 0.00877773101665364, - "grad_norm": 1.9376840196404812, - "learning_rate": 3.1082059657570015e-06, - "loss": 1.2053, - "step": 73 - }, - { - "epoch": 0.00889797390729273, - "grad_norm": 1.9721046809718752, - "learning_rate": 3.1180625252858496e-06, - "loss": 1.185, - "step": 74 - }, - { - "epoch": 0.009018216797931822, - "grad_norm": 2.8989490972644334, - "learning_rate": 3.1277867780021663e-06, - "loss": 1.0268, - "step": 75 - }, - { - "epoch": 0.009138459688570914, - "grad_norm": 1.6610343495766826, - "learning_rate": 3.1373822288779824e-06, - "loss": 1.1744, - "step": 76 - }, - { - "epoch": 0.009258702579210003, - "grad_norm": 1.8075309250588067, - "learning_rate": 3.1468522454274533e-06, - "loss": 1.012, - "step": 77 - }, - { - "epoch": 0.009378945469849095, - "grad_norm": 1.9822682889627958, - "learning_rate": 3.15620006480197e-06, - "loss": 1.1351, - "step": 78 - }, - { - "epoch": 0.009499188360488187, - "grad_norm": 2.6144773200884877, - "learning_rate": 3.1654288004333087e-06, - "loss": 0.9699, - "step": 79 - }, - { - "epoch": 0.009619431251127276, - "grad_norm": 2.362012386964623, - "learning_rate": 3.1745414482589353e-06, - "loss": 0.9717, - "step": 80 - }, - { - "epoch": 0.009739674141766368, - "grad_norm": 2.0960247152123483, - "learning_rate": 3.1835408925606204e-06, - "loss": 1.088, - "step": 81 - }, - { - "epoch": 0.00985991703240546, - "grad_norm": 2.165254780884642, - "learning_rate": 3.1924299114448214e-06, - "loss": 1.1042, - "step": 82 - }, - { - "epoch": 0.00998015992304455, - "grad_norm": 2.485164109151358, - "learning_rate": 3.2012111819909055e-06, - "loss": 1.0689, - "step": 83 - }, - { - "epoch": 0.010100402813683641, - "grad_norm": 2.614081883059349, - "learning_rate": 3.2098872850910627e-06, - "loss": 1.1664, - "step": 84 - }, - { - "epoch": 0.010220645704322733, - "grad_norm": 1.9012034795864503, - "learning_rate": 3.2184607100038194e-06, - "loss": 1.1148, - "step": 85 - }, - { - "epoch": 0.010340888594961822, - "grad_norm": 1.790724802481399, - "learning_rate": 3.2269338586412414e-06, - "loss": 1.1543, - "step": 86 - }, - { - "epoch": 0.010461131485600914, - "grad_norm": 2.14653635699682, - "learning_rate": 3.2353090496083106e-06, - "loss": 1.1896, - "step": 87 - }, - { - "epoch": 0.010581374376240005, - "grad_norm": 1.9960832390256527, - "learning_rate": 3.2435885220114572e-06, - "loss": 1.0422, - "step": 88 - }, - { - "epoch": 0.010701617266879095, - "grad_norm": 1.7982985455268528, - "learning_rate": 3.2517744390519113e-06, - "loss": 1.1575, - "step": 89 - }, - { - "epoch": 0.010821860157518187, - "grad_norm": 1.8610055009411124, - "learning_rate": 3.259868891418298e-06, - "loss": 0.9695, - "step": 90 - }, - { - "epoch": 0.010942103048157278, - "grad_norm": 1.9277077840190475, - "learning_rate": 3.2678739004917757e-06, - "loss": 1.0662, - "step": 91 - }, - { - "epoch": 0.011062345938796368, - "grad_norm": 1.6650535963212825, - "learning_rate": 3.275791421376029e-06, - "loss": 1.1408, - "step": 92 - }, - { - "epoch": 0.01118258882943546, - "grad_norm": 1.8276325442862424, - "learning_rate": 3.2836233457634622e-06, - "loss": 1.1746, - "step": 93 - }, - { - "epoch": 0.011302831720074551, - "grad_norm": 1.797601834782885, - "learning_rate": 3.2913715046481135e-06, - "loss": 1.0717, - "step": 94 - }, - { - "epoch": 0.011423074610713641, - "grad_norm": 1.9379303361991151, - "learning_rate": 3.299037670895023e-06, - "loss": 1.1099, - "step": 95 - }, - { - "epoch": 0.011543317501352733, - "grad_norm": 1.8717343820475936, - "learning_rate": 3.3066235616750667e-06, - "loss": 1.0224, - "step": 96 - }, - { - "epoch": 0.011663560391991824, - "grad_norm": 1.9641903953214097, - "learning_rate": 3.3141308407736276e-06, - "loss": 1.1286, - "step": 97 - }, - { - "epoch": 0.011783803282630914, - "grad_norm": 2.0002598593694976, - "learning_rate": 3.321561120780869e-06, - "loss": 1.0819, - "step": 98 - }, - { - "epoch": 0.011904046173270006, - "grad_norm": 1.9886088223222216, - "learning_rate": 3.3289159651708192e-06, - "loss": 1.2376, - "step": 99 - }, - { - "epoch": 0.012024289063909096, - "grad_norm": 2.1394935457828073, - "learning_rate": 3.3361968902759768e-06, - "loss": 1.1983, - "step": 100 - }, - { - "epoch": 0.012144531954548187, - "grad_norm": 2.336017824150697, - "learning_rate": 3.343405367163663e-06, - "loss": 1.1627, - "step": 101 - }, - { - "epoch": 0.012264774845187279, - "grad_norm": 6.645475882201946, - "learning_rate": 3.350542823419951e-06, - "loss": 1.0357, - "step": 102 - }, - { - "epoch": 0.012385017735826368, - "grad_norm": 2.9435979978027667, - "learning_rate": 3.3576106448465615e-06, - "loss": 1.111, - "step": 103 - }, - { - "epoch": 0.01250526062646546, - "grad_norm": 2.3020706061332175, - "learning_rate": 3.3646101770757797e-06, - "loss": 1.1031, - "step": 104 - }, - { - "epoch": 0.012625503517104552, - "grad_norm": 1.925020141000781, - "learning_rate": 3.371542727108104e-06, - "loss": 1.0719, - "step": 105 - }, - { - "epoch": 0.012745746407743641, - "grad_norm": 2.627886502107072, - "learning_rate": 3.3784095647770114e-06, - "loss": 1.1262, - "step": 106 - }, - { - "epoch": 0.012865989298382733, - "grad_norm": 1.9051973532027802, - "learning_rate": 3.3852119241449547e-06, - "loss": 1.1089, - "step": 107 - }, - { - "epoch": 0.012986232189021825, - "grad_norm": 2.2921135784521365, - "learning_rate": 3.3919510048344295e-06, - "loss": 1.1908, - "step": 108 - }, - { - "epoch": 0.013106475079660914, - "grad_norm": 1.8583369202286981, - "learning_rate": 3.3986279732976907e-06, - "loss": 1.0907, - "step": 109 - }, - { - "epoch": 0.013226717970300006, - "grad_norm": 2.370464008797517, - "learning_rate": 3.4052439640284983e-06, - "loss": 1.1778, - "step": 110 - }, - { - "epoch": 0.013346960860939098, - "grad_norm": 1.6922927894136164, - "learning_rate": 3.4118000807190217e-06, - "loss": 1.0368, - "step": 111 - }, - { - "epoch": 0.013467203751578187, - "grad_norm": 1.5816883610375498, - "learning_rate": 3.4182973973648723e-06, - "loss": 0.9903, - "step": 112 - }, - { - "epoch": 0.013587446642217279, - "grad_norm": 2.3423606314873666, - "learning_rate": 3.424736959321014e-06, - "loss": 1.1644, - "step": 113 - }, - { - "epoch": 0.01370768953285637, - "grad_norm": 1.7502270278807106, - "learning_rate": 3.431119784311155e-06, - "loss": 1.1098, - "step": 114 - }, - { - "epoch": 0.01382793242349546, - "grad_norm": 1.6056413024098486, - "learning_rate": 3.43744686339307e-06, - "loss": 1.0057, - "step": 115 - }, - { - "epoch": 0.013948175314134552, - "grad_norm": 1.9538841908280036, - "learning_rate": 3.44371916188212e-06, - "loss": 1.1405, - "step": 116 - }, - { - "epoch": 0.014068418204773643, - "grad_norm": 1.8499573779995686, - "learning_rate": 3.449937620235143e-06, - "loss": 1.0901, - "step": 117 - }, - { - "epoch": 0.014188661095412733, - "grad_norm": 1.7885368274526239, - "learning_rate": 3.456103154896722e-06, - "loss": 1.1035, - "step": 118 - }, - { - "epoch": 0.014308903986051825, - "grad_norm": 1.6566783423111477, - "learning_rate": 3.462216659109757e-06, - "loss": 1.1453, - "step": 119 - }, - { - "epoch": 0.014429146876690916, - "grad_norm": 2.1722204359046677, - "learning_rate": 3.4682790036921077e-06, - "loss": 1.0756, - "step": 120 - }, - { - "epoch": 0.014549389767330006, - "grad_norm": 1.8817021431744827, - "learning_rate": 3.4742910377810193e-06, - "loss": 1.05, - "step": 121 - }, - { - "epoch": 0.014669632657969098, - "grad_norm": 1.875778672336903, - "learning_rate": 3.4802535895469042e-06, - "loss": 1.1162, - "step": 122 - }, - { - "epoch": 0.01478987554860819, - "grad_norm": 1.8280459655892487, - "learning_rate": 3.4861674668779934e-06, - "loss": 1.121, - "step": 123 - }, - { - "epoch": 0.01491011843924728, - "grad_norm": 1.772044369846027, - "learning_rate": 3.492033458037272e-06, - "loss": 1.0674, - "step": 124 - }, - { - "epoch": 0.01503036132988637, - "grad_norm": 2.515016091145483, - "learning_rate": 3.497852332293018e-06, - "loss": 1.091, - "step": 125 - }, - { - "epoch": 0.015150604220525462, - "grad_norm": 3.1575172826153537, - "learning_rate": 3.5036248405242356e-06, - "loss": 1.1917, - "step": 126 - }, - { - "epoch": 0.015270847111164552, - "grad_norm": 1.8684711592737309, - "learning_rate": 3.509351715802146e-06, - "loss": 1.0544, - "step": 127 - }, - { - "epoch": 0.015391090001803644, - "grad_norm": 1.9303435781755192, - "learning_rate": 3.5150336739488763e-06, - "loss": 1.0098, - "step": 128 - }, - { - "epoch": 0.015511332892442733, - "grad_norm": 1.6476957993804993, - "learning_rate": 3.5206714140744143e-06, - "loss": 1.0531, - "step": 129 - }, - { - "epoch": 0.015631575783081827, - "grad_norm": 2.155939052109575, - "learning_rate": 3.5262656190928208e-06, - "loss": 1.1054, - "step": 130 - }, - { - "epoch": 0.015751818673720917, - "grad_norm": 0.9817443088554019, - "learning_rate": 3.5318169562186737e-06, - "loss": 0.9501, - "step": 131 - }, - { - "epoch": 0.015872061564360006, - "grad_norm": 1.9120986517351255, - "learning_rate": 3.5373260774446292e-06, - "loss": 1.052, - "step": 132 - }, - { - "epoch": 0.0159923044549991, - "grad_norm": 1.7935516141383065, - "learning_rate": 3.542793620000961e-06, - "loss": 1.1374, - "step": 133 - }, - { - "epoch": 0.01611254734563819, - "grad_norm": 4.315559076224176, - "learning_rate": 3.5482202067978894e-06, - "loss": 1.0912, - "step": 134 - }, - { - "epoch": 0.01623279023627728, - "grad_norm": 1.866132366257103, - "learning_rate": 3.553606446851471e-06, - "loss": 0.9804, - "step": 135 - }, - { - "epoch": 0.016353033126916373, - "grad_norm": 1.6225348627844418, - "learning_rate": 3.5589529356937613e-06, - "loss": 1.0573, - "step": 136 - }, - { - "epoch": 0.016473276017555463, - "grad_norm": 1.6398752588422658, - "learning_rate": 3.5642602557679627e-06, - "loss": 1.0036, - "step": 137 - }, - { - "epoch": 0.016593518908194552, - "grad_norm": 2.9786014331902035, - "learning_rate": 3.569528976809202e-06, - "loss": 1.0681, - "step": 138 - }, - { - "epoch": 0.016713761798833646, - "grad_norm": 1.9530936276777264, - "learning_rate": 3.5747596562115522e-06, - "loss": 1.1079, - "step": 139 - }, - { - "epoch": 0.016834004689472735, - "grad_norm": 2.4834973206448985, - "learning_rate": 3.5799528393819138e-06, - "loss": 1.127, - "step": 140 - }, - { - "epoch": 0.016954247580111825, - "grad_norm": 1.8908064573868089, - "learning_rate": 3.585109060081286e-06, - "loss": 1.0951, - "step": 141 - }, - { - "epoch": 0.017074490470750915, - "grad_norm": 1.6013368773287504, - "learning_rate": 3.590228840753992e-06, - "loss": 1.0048, - "step": 142 - }, - { - "epoch": 0.01719473336139001, - "grad_norm": 1.9362493600961257, - "learning_rate": 3.5953126928453423e-06, - "loss": 1.0974, - "step": 143 - }, - { - "epoch": 0.017314976252029098, - "grad_norm": 1.951409769304755, - "learning_rate": 3.600361117108239e-06, - "loss": 1.0336, - "step": 144 - }, - { - "epoch": 0.017435219142668188, - "grad_norm": 1.7865244757712968, - "learning_rate": 3.6053746038991616e-06, - "loss": 1.1953, - "step": 145 - }, - { - "epoch": 0.01755546203330728, - "grad_norm": 1.0413147459015661, - "learning_rate": 3.6103536334639843e-06, - "loss": 0.8568, - "step": 146 - }, - { - "epoch": 0.01767570492394637, - "grad_norm": 1.9252486972425487, - "learning_rate": 3.615298676214041e-06, - "loss": 1.0773, - "step": 147 - }, - { - "epoch": 0.01779594781458546, - "grad_norm": 1.7205117646413486, - "learning_rate": 3.6202101929928317e-06, - "loss": 1.1151, - "step": 148 - }, - { - "epoch": 0.017916190705224554, - "grad_norm": 1.5938250206138813, - "learning_rate": 3.6250886353337413e-06, - "loss": 1.1116, - "step": 149 - }, - { - "epoch": 0.018036433595863644, - "grad_norm": 1.6679246525887363, - "learning_rate": 3.6299344457091488e-06, - "loss": 1.0875, - "step": 150 - }, - { - "epoch": 0.018156676486502734, - "grad_norm": 2.01689096364695, - "learning_rate": 3.634748057771256e-06, - "loss": 1.1543, - "step": 151 - }, - { - "epoch": 0.018276919377141827, - "grad_norm": 1.4228098384732581, - "learning_rate": 3.639529896584965e-06, - "loss": 1.0852, - "step": 152 - }, - { - "epoch": 0.018397162267780917, - "grad_norm": 2.66794667716551, - "learning_rate": 3.6442803788531233e-06, - "loss": 1.1166, - "step": 153 - }, - { - "epoch": 0.018517405158420007, - "grad_norm": 1.76680574349525, - "learning_rate": 3.6489999131344357e-06, - "loss": 1.1749, - "step": 154 - }, - { - "epoch": 0.0186376480490591, - "grad_norm": 1.5943440097023616, - "learning_rate": 3.653688900054313e-06, - "loss": 1.1368, - "step": 155 - }, - { - "epoch": 0.01875789093969819, - "grad_norm": 1.982007093210933, - "learning_rate": 3.6583477325089526e-06, - "loss": 0.9874, - "step": 156 - }, - { - "epoch": 0.01887813383033728, - "grad_norm": 1.9870301752971704, - "learning_rate": 3.6629767958628916e-06, - "loss": 1.2611, - "step": 157 - }, - { - "epoch": 0.018998376720976373, - "grad_norm": 2.9983245621602927, - "learning_rate": 3.667576468140291e-06, - "loss": 1.0809, - "step": 158 - }, - { - "epoch": 0.019118619611615463, - "grad_norm": 2.094567552057717, - "learning_rate": 3.672147120210184e-06, - "loss": 1.1071, - "step": 159 - }, - { - "epoch": 0.019238862502254553, - "grad_norm": 1.864739892734027, - "learning_rate": 3.6766891159659177e-06, - "loss": 1.0943, - "step": 160 - }, - { - "epoch": 0.019359105392893646, - "grad_norm": 3.5145432906177123, - "learning_rate": 3.6812028124990075e-06, - "loss": 1.0931, - "step": 161 - }, - { - "epoch": 0.019479348283532736, - "grad_norm": 2.6034017940055842, - "learning_rate": 3.6856885602676016e-06, - "loss": 1.043, - "step": 162 - }, - { - "epoch": 0.019599591174171826, - "grad_norm": 4.502954360937336, - "learning_rate": 3.6901467032597733e-06, - "loss": 1.1694, - "step": 163 - }, - { - "epoch": 0.01971983406481092, - "grad_norm": 1.9473557982232361, - "learning_rate": 3.694577579151804e-06, - "loss": 1.0993, - "step": 164 - }, - { - "epoch": 0.01984007695545001, - "grad_norm": 2.2448560406039157, - "learning_rate": 3.6989815194616703e-06, - "loss": 0.9698, - "step": 165 - }, - { - "epoch": 0.0199603198460891, - "grad_norm": 1.948184302672351, - "learning_rate": 3.703358849697888e-06, - "loss": 1.0339, - "step": 166 - }, - { - "epoch": 0.020080562736728192, - "grad_norm": 1.823633267469085, - "learning_rate": 3.7077098895038803e-06, - "loss": 1.049, - "step": 167 - }, - { - "epoch": 0.020200805627367282, - "grad_norm": 2.0417323847982707, - "learning_rate": 3.712034952798045e-06, - "loss": 1.1976, - "step": 168 - }, - { - "epoch": 0.02032104851800637, - "grad_norm": 2.085712092075575, - "learning_rate": 3.7163343479096656e-06, - "loss": 1.0697, - "step": 169 - }, - { - "epoch": 0.020441291408645465, - "grad_norm": 1.9307374144354732, - "learning_rate": 3.720608377710802e-06, - "loss": 1.048, - "step": 170 - }, - { - "epoch": 0.020561534299284555, - "grad_norm": 2.620403352366856, - "learning_rate": 3.7248573397443277e-06, - "loss": 1.0909, - "step": 171 - }, - { - "epoch": 0.020681777189923645, - "grad_norm": 1.6965789986365214, - "learning_rate": 3.729081526348224e-06, - "loss": 1.2087, - "step": 172 - }, - { - "epoch": 0.020802020080562738, - "grad_norm": 1.5738679480798776, - "learning_rate": 3.7332812247762777e-06, - "loss": 1.0684, - "step": 173 - }, - { - "epoch": 0.020922262971201828, - "grad_norm": 1.977786322312108, - "learning_rate": 3.737456717315293e-06, - "loss": 1.1762, - "step": 174 - }, - { - "epoch": 0.021042505861840918, - "grad_norm": 1.4661669642098432, - "learning_rate": 3.7416082813989552e-06, - "loss": 1.1338, - "step": 175 - }, - { - "epoch": 0.02116274875248001, - "grad_norm": 3.1805072449444354, - "learning_rate": 3.745736189718439e-06, - "loss": 1.1258, - "step": 176 - }, - { - "epoch": 0.0212829916431191, - "grad_norm": 2.588853649678155, - "learning_rate": 3.749840710329894e-06, - "loss": 0.9618, - "step": 177 - }, - { - "epoch": 0.02140323453375819, - "grad_norm": 4.211475118596873, - "learning_rate": 3.7539221067588938e-06, - "loss": 1.2007, - "step": 178 - }, - { - "epoch": 0.021523477424397284, - "grad_norm": 3.8928777461675375, - "learning_rate": 3.757980638101964e-06, - "loss": 1.1624, - "step": 179 - }, - { - "epoch": 0.021643720315036374, - "grad_norm": 2.0845319605898247, - "learning_rate": 3.7620165591252806e-06, - "loss": 1.1248, - "step": 180 - }, - { - "epoch": 0.021763963205675464, - "grad_norm": 1.839908347645998, - "learning_rate": 3.766030120360636e-06, - "loss": 1.1676, - "step": 181 - }, - { - "epoch": 0.021884206096314557, - "grad_norm": 2.3954337629963454, - "learning_rate": 3.7700215681987578e-06, - "loss": 1.1088, - "step": 182 - }, - { - "epoch": 0.022004448986953647, - "grad_norm": 1.5392569791813107, - "learning_rate": 3.7739911449800767e-06, - "loss": 1.0488, - "step": 183 - }, - { - "epoch": 0.022124691877592736, - "grad_norm": 1.6174473945282308, - "learning_rate": 3.7779390890830114e-06, - "loss": 1.0289, - "step": 184 - }, - { - "epoch": 0.02224493476823183, - "grad_norm": 1.6413490514216973, - "learning_rate": 3.7818656350098723e-06, - "loss": 1.0844, - "step": 185 - }, - { - "epoch": 0.02236517765887092, - "grad_norm": 3.060588317863157, - "learning_rate": 3.7857710134704447e-06, - "loss": 0.992, - "step": 186 - }, - { - "epoch": 0.02248542054951001, - "grad_norm": 1.8220242159476794, - "learning_rate": 3.7896554514633234e-06, - "loss": 1.0207, - "step": 187 - }, - { - "epoch": 0.022605663440149103, - "grad_norm": 1.8789419063745738, - "learning_rate": 3.7935191723550955e-06, - "loss": 1.0663, - "step": 188 - }, - { - "epoch": 0.022725906330788193, - "grad_norm": 1.791855809481856, - "learning_rate": 3.797362395957408e-06, - "loss": 1.1174, - "step": 189 - }, - { - "epoch": 0.022846149221427282, - "grad_norm": 1.9294968483833144, - "learning_rate": 3.8011853386020055e-06, - "loss": 1.0011, - "step": 190 - }, - { - "epoch": 0.022966392112066376, - "grad_norm": 2.5442069653108854, - "learning_rate": 3.804988213213804e-06, - "loss": 1.1157, - "step": 191 - }, - { - "epoch": 0.023086635002705466, - "grad_norm": 1.1039726458733516, - "learning_rate": 3.808771229382049e-06, - "loss": 0.886, - "step": 192 - }, - { - "epoch": 0.023206877893344555, - "grad_norm": 1.9364221097182337, - "learning_rate": 3.8125345934296324e-06, - "loss": 1.0761, - "step": 193 - }, - { - "epoch": 0.02332712078398365, - "grad_norm": 1.767508118649928, - "learning_rate": 3.81627850848061e-06, - "loss": 1.1032, - "step": 194 - }, - { - "epoch": 0.02344736367462274, - "grad_norm": 2.353007737438419, - "learning_rate": 3.820003174525994e-06, - "loss": 1.0776, - "step": 195 - }, - { - "epoch": 0.02356760656526183, - "grad_norm": 2.1409019792221806, - "learning_rate": 3.823708788487851e-06, - "loss": 1.0624, - "step": 196 - }, - { - "epoch": 0.02368784945590092, - "grad_norm": 3.5303022074813173, - "learning_rate": 3.827395544281781e-06, - "loss": 1.0715, - "step": 197 - }, - { - "epoch": 0.02380809234654001, - "grad_norm": 2.160771149626811, - "learning_rate": 3.831063632877802e-06, - "loss": 1.0303, - "step": 198 - }, - { - "epoch": 0.0239283352371791, - "grad_norm": 3.5115665322179033, - "learning_rate": 3.834713242359712e-06, - "loss": 0.9806, - "step": 199 - }, - { - "epoch": 0.02404857812781819, - "grad_norm": 1.7293556370562808, - "learning_rate": 3.838344557982959e-06, - "loss": 1.0989, - "step": 200 - }, - { - "epoch": 0.024168821018457284, - "grad_norm": 2.3367407192879717, - "learning_rate": 3.841957762231063e-06, - "loss": 1.0778, - "step": 201 - }, - { - "epoch": 0.024289063909096374, - "grad_norm": 1.7063094299921473, - "learning_rate": 3.8455530348706454e-06, - "loss": 1.0959, - "step": 202 - }, - { - "epoch": 0.024409306799735464, - "grad_norm": 2.033208167283313, - "learning_rate": 3.849130553005099e-06, - "loss": 1.0045, - "step": 203 - }, - { - "epoch": 0.024529549690374557, - "grad_norm": 1.644125917912252, - "learning_rate": 3.852690491126933e-06, - "loss": 1.06, - "step": 204 - }, - { - "epoch": 0.024649792581013647, - "grad_norm": 2.2027809469620605, - "learning_rate": 3.856233021168845e-06, - "loss": 1.1297, - "step": 205 - }, - { - "epoch": 0.024770035471652737, - "grad_norm": 1.8688027645559309, - "learning_rate": 3.859758312553544e-06, - "loss": 1.1378, - "step": 206 - }, - { - "epoch": 0.02489027836229183, - "grad_norm": 2.3147199130914333, - "learning_rate": 3.8632665322423735e-06, - "loss": 1.1495, - "step": 207 - }, - { - "epoch": 0.02501052125293092, - "grad_norm": 1.530077582742197, - "learning_rate": 3.866757844782762e-06, - "loss": 1.0816, - "step": 208 - }, - { - "epoch": 0.02513076414357001, - "grad_norm": 2.088173228908331, - "learning_rate": 3.870232412354527e-06, - "loss": 1.1332, - "step": 209 - }, - { - "epoch": 0.025251007034209103, - "grad_norm": 1.9395792479289373, - "learning_rate": 3.873690394815086e-06, - "loss": 1.1466, - "step": 210 - }, - { - "epoch": 0.025371249924848193, - "grad_norm": 2.1075189407239625, - "learning_rate": 3.877131949743587e-06, - "loss": 1.1409, - "step": 211 - }, - { - "epoch": 0.025491492815487283, - "grad_norm": 2.4827017277469157, - "learning_rate": 3.880557232483993e-06, - "loss": 1.0209, - "step": 212 - }, - { - "epoch": 0.025611735706126376, - "grad_norm": 1.7299714417939411, - "learning_rate": 3.883966396187164e-06, - "loss": 1.0984, - "step": 213 - }, - { - "epoch": 0.025731978596765466, - "grad_norm": 2.293396372132007, - "learning_rate": 3.887359591851937e-06, - "loss": 1.1275, - "step": 214 - }, - { - "epoch": 0.025852221487404556, - "grad_norm": 2.1539266480872965, - "learning_rate": 3.890736968365265e-06, - "loss": 1.1575, - "step": 215 - }, - { - "epoch": 0.02597246437804365, - "grad_norm": 1.585909161995708, - "learning_rate": 3.894098672541412e-06, - "loss": 1.0754, - "step": 216 - }, - { - "epoch": 0.02609270726868274, - "grad_norm": 2.066961376137526, - "learning_rate": 3.89744484916025e-06, - "loss": 0.9786, - "step": 217 - }, - { - "epoch": 0.02621295015932183, - "grad_norm": 1.806926840715067, - "learning_rate": 3.900775641004673e-06, - "loss": 1.0945, - "step": 218 - }, - { - "epoch": 0.026333193049960922, - "grad_norm": 2.691059859841562, - "learning_rate": 3.904091188897156e-06, - "loss": 0.964, - "step": 219 - }, - { - "epoch": 0.026453435940600012, - "grad_norm": 1.8804423502712018, - "learning_rate": 3.90739163173548e-06, - "loss": 1.0506, - "step": 220 - }, - { - "epoch": 0.026573678831239102, - "grad_norm": 2.1633688575911787, - "learning_rate": 3.910677106527646e-06, - "loss": 1.1137, - "step": 221 - }, - { - "epoch": 0.026693921721878195, - "grad_norm": 2.534594269285283, - "learning_rate": 3.913947748426004e-06, - "loss": 1.0648, - "step": 222 - }, - { - "epoch": 0.026814164612517285, - "grad_norm": 2.7601100439441533, - "learning_rate": 3.9172036907606136e-06, - "loss": 0.994, - "step": 223 - }, - { - "epoch": 0.026934407503156375, - "grad_norm": 1.6833547612831332, - "learning_rate": 3.920445065071855e-06, - "loss": 1.1682, - "step": 224 - }, - { - "epoch": 0.027054650393795468, - "grad_norm": 3.400241837595994, - "learning_rate": 3.923672001142322e-06, - "loss": 1.0221, - "step": 225 - }, - { - "epoch": 0.027174893284434558, - "grad_norm": 1.592896101839889, - "learning_rate": 3.926884627027996e-06, - "loss": 1.076, - "step": 226 - }, - { - "epoch": 0.027295136175073648, - "grad_norm": 1.7412021741577006, - "learning_rate": 3.930083069088744e-06, - "loss": 1.0015, - "step": 227 - }, - { - "epoch": 0.02741537906571274, - "grad_norm": 1.0336373179415999, - "learning_rate": 3.933267452018137e-06, - "loss": 0.8695, - "step": 228 - }, - { - "epoch": 0.02753562195635183, - "grad_norm": 9.7676583820563, - "learning_rate": 3.936437898872622e-06, - "loss": 1.0704, - "step": 229 - }, - { - "epoch": 0.02765586484699092, - "grad_norm": 2.392535474747408, - "learning_rate": 3.9395945311000525e-06, - "loss": 1.0268, - "step": 230 - }, - { - "epoch": 0.027776107737630014, - "grad_norm": 2.4690503287827106, - "learning_rate": 3.942737468567608e-06, - "loss": 1.1412, - "step": 231 - }, - { - "epoch": 0.027896350628269104, - "grad_norm": 1.8198907715644406, - "learning_rate": 3.9458668295891026e-06, - "loss": 1.0845, - "step": 232 - }, - { - "epoch": 0.028016593518908194, - "grad_norm": 2.2898350996923247, - "learning_rate": 3.948982730951712e-06, - "loss": 1.087, - "step": 233 - }, - { - "epoch": 0.028136836409547287, - "grad_norm": 1.8197029632220558, - "learning_rate": 3.9520852879421254e-06, - "loss": 1.0468, - "step": 234 - }, - { - "epoch": 0.028257079300186377, - "grad_norm": 2.1211652374866223, - "learning_rate": 3.955174614372137e-06, - "loss": 1.0456, - "step": 235 - }, - { - "epoch": 0.028377322190825467, - "grad_norm": 1.9732379320104996, - "learning_rate": 3.9582508226037045e-06, - "loss": 1.0774, - "step": 236 - }, - { - "epoch": 0.02849756508146456, - "grad_norm": 1.9491714747014734, - "learning_rate": 3.9613140235734636e-06, - "loss": 1.1566, - "step": 237 - }, - { - "epoch": 0.02861780797210365, - "grad_norm": 2.1443520281725057, - "learning_rate": 3.96436432681674e-06, - "loss": 1.0396, - "step": 238 - }, - { - "epoch": 0.02873805086274274, - "grad_norm": 2.3516806132414234, - "learning_rate": 3.967401840491044e-06, - "loss": 1.1273, - "step": 239 - }, - { - "epoch": 0.028858293753381833, - "grad_norm": 1.8534608919284823, - "learning_rate": 3.97042667139909e-06, - "loss": 1.103, - "step": 240 - }, - { - "epoch": 0.028978536644020923, - "grad_norm": 2.138764847080192, - "learning_rate": 3.973438925011327e-06, - "loss": 1.1023, - "step": 241 - }, - { - "epoch": 0.029098779534660012, - "grad_norm": 2.0502509312454897, - "learning_rate": 3.976438705488002e-06, - "loss": 1.1449, - "step": 242 - }, - { - "epoch": 0.029219022425299106, - "grad_norm": 3.1059216822804725, - "learning_rate": 3.9794261157007744e-06, - "loss": 1.1585, - "step": 243 - }, - { - "epoch": 0.029339265315938196, - "grad_norm": 2.6007266606394057, - "learning_rate": 3.982401257253887e-06, - "loss": 1.0789, - "step": 244 - }, - { - "epoch": 0.029459508206577285, - "grad_norm": 1.7771159602483553, - "learning_rate": 3.985364230504893e-06, - "loss": 1.1206, - "step": 245 - }, - { - "epoch": 0.02957975109721638, - "grad_norm": 1.7512592144981696, - "learning_rate": 3.988315134584976e-06, - "loss": 1.0744, - "step": 246 - }, - { - "epoch": 0.02969999398785547, - "grad_norm": 1.5401576449471213, - "learning_rate": 3.991254067418851e-06, - "loss": 1.0263, - "step": 247 - }, - { - "epoch": 0.02982023687849456, - "grad_norm": 1.8291669957096206, - "learning_rate": 3.994181125744254e-06, - "loss": 1.0513, - "step": 248 - }, - { - "epoch": 0.02994047976913365, - "grad_norm": 2.4012949072845027, - "learning_rate": 3.99709640513106e-06, - "loss": 0.9771, - "step": 249 - }, - { - "epoch": 0.03006072265977274, - "grad_norm": 2.14285173642521, - "learning_rate": 4e-06, - "loss": 1.0786, - "step": 250 - }, - { - "epoch": 0.03018096555041183, - "grad_norm": 3.013272964272297, - "learning_rate": 3.999999848300794e-06, - "loss": 1.1171, - "step": 251 - }, - { - "epoch": 0.030301208441050925, - "grad_norm": 1.5229271449586579, - "learning_rate": 3.999999393203203e-06, - "loss": 1.116, - "step": 252 - }, - { - "epoch": 0.030421451331690014, - "grad_norm": 1.5799653304195502, - "learning_rate": 3.999998634707293e-06, - "loss": 1.0745, - "step": 253 - }, - { - "epoch": 0.030541694222329104, - "grad_norm": 2.1761694515600287, - "learning_rate": 3.999997572813182e-06, - "loss": 1.1903, - "step": 254 - }, - { - "epoch": 0.030661937112968194, - "grad_norm": 1.7392150428135462, - "learning_rate": 3.999996207521028e-06, - "loss": 1.0962, - "step": 255 - }, - { - "epoch": 0.030782180003607287, - "grad_norm": 2.3864115565719355, - "learning_rate": 3.999994538831039e-06, - "loss": 1.0508, - "step": 256 - }, - { - "epoch": 0.030902422894246377, - "grad_norm": 2.813994455653343, - "learning_rate": 3.99999256674347e-06, - "loss": 1.0733, - "step": 257 - }, - { - "epoch": 0.031022665784885467, - "grad_norm": 0.9263807842262964, - "learning_rate": 3.999990291258618e-06, - "loss": 0.7925, - "step": 258 - }, - { - "epoch": 0.03114290867552456, - "grad_norm": 2.3287794998110565, - "learning_rate": 3.999987712376829e-06, - "loss": 1.0883, - "step": 259 - }, - { - "epoch": 0.031263151566163654, - "grad_norm": 1.7710489328819667, - "learning_rate": 3.999984830098494e-06, - "loss": 1.0417, - "step": 260 - }, - { - "epoch": 0.03138339445680274, - "grad_norm": 2.5387916772634167, - "learning_rate": 3.999981644424051e-06, - "loss": 1.1975, - "step": 261 - }, - { - "epoch": 0.03150363734744183, - "grad_norm": 2.1345548674054062, - "learning_rate": 3.999978155353982e-06, - "loss": 1.087, - "step": 262 - }, - { - "epoch": 0.03162388023808092, - "grad_norm": 1.8793000821657315, - "learning_rate": 3.9999743628888186e-06, - "loss": 1.025, - "step": 263 - }, - { - "epoch": 0.03174412312872001, - "grad_norm": 2.5617702654620245, - "learning_rate": 3.999970267029133e-06, - "loss": 1.1128, - "step": 264 - }, - { - "epoch": 0.0318643660193591, - "grad_norm": 1.9865825118838703, - "learning_rate": 3.999965867775548e-06, - "loss": 1.0212, - "step": 265 - }, - { - "epoch": 0.0319846089099982, - "grad_norm": 2.13993768726075, - "learning_rate": 3.9999611651287315e-06, - "loss": 1.0898, - "step": 266 - }, - { - "epoch": 0.03210485180063729, - "grad_norm": 2.4740020319776894, - "learning_rate": 3.999956159089396e-06, - "loss": 1.0267, - "step": 267 - }, - { - "epoch": 0.03222509469127638, - "grad_norm": 2.124374409759969, - "learning_rate": 3.999950849658302e-06, - "loss": 1.0197, - "step": 268 - }, - { - "epoch": 0.03234533758191547, - "grad_norm": 2.137044320417618, - "learning_rate": 3.999945236836254e-06, - "loss": 1.07, - "step": 269 - }, - { - "epoch": 0.03246558047255456, - "grad_norm": 2.5766451200675786, - "learning_rate": 3.999939320624103e-06, - "loss": 1.1748, - "step": 270 - }, - { - "epoch": 0.03258582336319365, - "grad_norm": 1.6984923465299169, - "learning_rate": 3.999933101022749e-06, - "loss": 1.1207, - "step": 271 - }, - { - "epoch": 0.032706066253832745, - "grad_norm": 1.645096199330197, - "learning_rate": 3.999926578033132e-06, - "loss": 1.0839, - "step": 272 - }, - { - "epoch": 0.032826309144471835, - "grad_norm": 1.934951706118524, - "learning_rate": 3.999919751656244e-06, - "loss": 0.8591, - "step": 273 - }, - { - "epoch": 0.032946552035110925, - "grad_norm": 3.1169252978500994, - "learning_rate": 3.9999126218931195e-06, - "loss": 0.9944, - "step": 274 - }, - { - "epoch": 0.033066794925750015, - "grad_norm": 2.2041495896406813, - "learning_rate": 3.99990518874484e-06, - "loss": 1.1268, - "step": 275 - }, - { - "epoch": 0.033187037816389105, - "grad_norm": 2.1273224425045547, - "learning_rate": 3.999897452212534e-06, - "loss": 1.1565, - "step": 276 - }, - { - "epoch": 0.033307280707028195, - "grad_norm": 2.206095539337887, - "learning_rate": 3.999889412297374e-06, - "loss": 1.2241, - "step": 277 - }, - { - "epoch": 0.03342752359766729, - "grad_norm": 2.1220799747392776, - "learning_rate": 3.999881069000581e-06, - "loss": 1.0225, - "step": 278 - }, - { - "epoch": 0.03354776648830638, - "grad_norm": 2.796887616177027, - "learning_rate": 3.99987242232342e-06, - "loss": 1.0953, - "step": 279 - }, - { - "epoch": 0.03366800937894547, - "grad_norm": 1.763750549792144, - "learning_rate": 3.9998634722672026e-06, - "loss": 1.0236, - "step": 280 - }, - { - "epoch": 0.03378825226958456, - "grad_norm": 1.862795016586358, - "learning_rate": 3.999854218833286e-06, - "loss": 1.0199, - "step": 281 - }, - { - "epoch": 0.03390849516022365, - "grad_norm": 3.032323745423319, - "learning_rate": 3.999844662023075e-06, - "loss": 1.0507, - "step": 282 - }, - { - "epoch": 0.03402873805086274, - "grad_norm": 1.8408442083371113, - "learning_rate": 3.999834801838018e-06, - "loss": 1.1443, - "step": 283 - }, - { - "epoch": 0.03414898094150183, - "grad_norm": 2.431596993051067, - "learning_rate": 3.9998246382796115e-06, - "loss": 0.9707, - "step": 284 - }, - { - "epoch": 0.03426922383214093, - "grad_norm": 3.253642608476513, - "learning_rate": 3.999814171349399e-06, - "loss": 1.1321, - "step": 285 - }, - { - "epoch": 0.03438946672278002, - "grad_norm": 1.6394433989019337, - "learning_rate": 3.9998034010489655e-06, - "loss": 0.966, - "step": 286 - }, - { - "epoch": 0.03450970961341911, - "grad_norm": 2.047123202352299, - "learning_rate": 3.999792327379946e-06, - "loss": 0.9906, - "step": 287 - }, - { - "epoch": 0.034629952504058197, - "grad_norm": 2.089587044324334, - "learning_rate": 3.999780950344021e-06, - "loss": 1.2053, - "step": 288 - }, - { - "epoch": 0.034750195394697286, - "grad_norm": 1.7854469780442854, - "learning_rate": 3.999769269942916e-06, - "loss": 1.0472, - "step": 289 - }, - { - "epoch": 0.034870438285336376, - "grad_norm": 2.036927724403436, - "learning_rate": 3.999757286178402e-06, - "loss": 1.0467, - "step": 290 - }, - { - "epoch": 0.03499068117597547, - "grad_norm": 1.6953160158463687, - "learning_rate": 3.999744999052299e-06, - "loss": 1.1364, - "step": 291 - }, - { - "epoch": 0.03511092406661456, - "grad_norm": 0.990739140718884, - "learning_rate": 3.9997324085664675e-06, - "loss": 0.8969, - "step": 292 - }, - { - "epoch": 0.03523116695725365, - "grad_norm": 1.8992264603490072, - "learning_rate": 3.999719514722821e-06, - "loss": 1.1466, - "step": 293 - }, - { - "epoch": 0.03535140984789274, - "grad_norm": 2.5263773796663367, - "learning_rate": 3.999706317523314e-06, - "loss": 0.9776, - "step": 294 - }, - { - "epoch": 0.03547165273853183, - "grad_norm": 1.9136038094364638, - "learning_rate": 3.999692816969948e-06, - "loss": 1.0817, - "step": 295 - }, - { - "epoch": 0.03559189562917092, - "grad_norm": 1.1433174545271654, - "learning_rate": 3.999679013064772e-06, - "loss": 0.9419, - "step": 296 - }, - { - "epoch": 0.03571213851981002, - "grad_norm": 2.331872062425647, - "learning_rate": 3.99966490580988e-06, - "loss": 1.0894, - "step": 297 - }, - { - "epoch": 0.03583238141044911, - "grad_norm": 2.3665073538157584, - "learning_rate": 3.999650495207411e-06, - "loss": 0.8884, - "step": 298 - }, - { - "epoch": 0.0359526243010882, - "grad_norm": 2.2239340811366715, - "learning_rate": 3.999635781259553e-06, - "loss": 1.1277, - "step": 299 - }, - { - "epoch": 0.03607286719172729, - "grad_norm": 0.9882158099435567, - "learning_rate": 3.999620763968535e-06, - "loss": 0.7936, - "step": 300 - }, - { - "epoch": 0.03619311008236638, - "grad_norm": 1.6897415532334028, - "learning_rate": 3.999605443336638e-06, - "loss": 1.0975, - "step": 301 - }, - { - "epoch": 0.03631335297300547, - "grad_norm": 2.5484688107305447, - "learning_rate": 3.999589819366185e-06, - "loss": 1.1197, - "step": 302 - }, - { - "epoch": 0.036433595863644565, - "grad_norm": 1.6897776738482886, - "learning_rate": 3.999573892059547e-06, - "loss": 1.0618, - "step": 303 - }, - { - "epoch": 0.036553838754283655, - "grad_norm": 2.1539685992659074, - "learning_rate": 3.999557661419138e-06, - "loss": 1.039, - "step": 304 - }, - { - "epoch": 0.036674081644922744, - "grad_norm": 1.9299312545708422, - "learning_rate": 3.9995411274474225e-06, - "loss": 1.0321, - "step": 305 - }, - { - "epoch": 0.036794324535561834, - "grad_norm": 1.8736739237726858, - "learning_rate": 3.999524290146908e-06, - "loss": 1.0457, - "step": 306 - }, - { - "epoch": 0.036914567426200924, - "grad_norm": 2.0886224618506266, - "learning_rate": 3.9995071495201485e-06, - "loss": 1.1337, - "step": 307 - }, - { - "epoch": 0.037034810316840014, - "grad_norm": 2.0448959917326834, - "learning_rate": 3.999489705569744e-06, - "loss": 1.2004, - "step": 308 - }, - { - "epoch": 0.03715505320747911, - "grad_norm": 1.8921704241913757, - "learning_rate": 3.999471958298341e-06, - "loss": 1.096, - "step": 309 - }, - { - "epoch": 0.0372752960981182, - "grad_norm": 1.8923557020881956, - "learning_rate": 3.999453907708631e-06, - "loss": 0.9881, - "step": 310 - }, - { - "epoch": 0.03739553898875729, - "grad_norm": 1.7685852216657685, - "learning_rate": 3.999435553803353e-06, - "loss": 1.0486, - "step": 311 - }, - { - "epoch": 0.03751578187939638, - "grad_norm": 2.3290936803784343, - "learning_rate": 3.999416896585292e-06, - "loss": 1.0636, - "step": 312 - }, - { - "epoch": 0.03763602477003547, - "grad_norm": 3.415659934490835, - "learning_rate": 3.9993979360572775e-06, - "loss": 1.0944, - "step": 313 - }, - { - "epoch": 0.03775626766067456, - "grad_norm": 2.887821301836021, - "learning_rate": 3.999378672222185e-06, - "loss": 1.0636, - "step": 314 - }, - { - "epoch": 0.03787651055131366, - "grad_norm": 1.8529717599123945, - "learning_rate": 3.9993591050829385e-06, - "loss": 1.0604, - "step": 315 - }, - { - "epoch": 0.037996753441952746, - "grad_norm": 1.9119599920101267, - "learning_rate": 3.999339234642506e-06, - "loss": 1.0257, - "step": 316 - }, - { - "epoch": 0.038116996332591836, - "grad_norm": 1.7961490657430492, - "learning_rate": 3.9993190609038994e-06, - "loss": 1.0696, - "step": 317 - }, - { - "epoch": 0.038237239223230926, - "grad_norm": 1.6413584205162532, - "learning_rate": 3.999298583870182e-06, - "loss": 1.0682, - "step": 318 - }, - { - "epoch": 0.038357482113870016, - "grad_norm": 1.7236800492597768, - "learning_rate": 3.999277803544458e-06, - "loss": 1.0041, - "step": 319 - }, - { - "epoch": 0.038477725004509106, - "grad_norm": 0.9767922703325456, - "learning_rate": 3.999256719929882e-06, - "loss": 0.9004, - "step": 320 - }, - { - "epoch": 0.0385979678951482, - "grad_norm": 1.2794375276776586, - "learning_rate": 3.999235333029651e-06, - "loss": 0.9805, - "step": 321 - }, - { - "epoch": 0.03871821078578729, - "grad_norm": 1.6750255383494679, - "learning_rate": 3.999213642847009e-06, - "loss": 1.0434, - "step": 322 - }, - { - "epoch": 0.03883845367642638, - "grad_norm": 1.7220774259849747, - "learning_rate": 3.999191649385247e-06, - "loss": 1.1425, - "step": 323 - }, - { - "epoch": 0.03895869656706547, - "grad_norm": 0.893285191653865, - "learning_rate": 3.999169352647702e-06, - "loss": 0.869, - "step": 324 - }, - { - "epoch": 0.03907893945770456, - "grad_norm": 1.7780951955003632, - "learning_rate": 3.999146752637755e-06, - "loss": 1.0528, - "step": 325 - }, - { - "epoch": 0.03919918234834365, - "grad_norm": 2.376182863911484, - "learning_rate": 3.999123849358836e-06, - "loss": 1.1337, - "step": 326 - }, - { - "epoch": 0.03931942523898275, - "grad_norm": 1.8443968979644998, - "learning_rate": 3.999100642814418e-06, - "loss": 0.9792, - "step": 327 - }, - { - "epoch": 0.03943966812962184, - "grad_norm": 5.443519680261973, - "learning_rate": 3.999077133008022e-06, - "loss": 1.1235, - "step": 328 - }, - { - "epoch": 0.03955991102026093, - "grad_norm": 1.745107419115253, - "learning_rate": 3.9990533199432145e-06, - "loss": 1.1353, - "step": 329 - }, - { - "epoch": 0.03968015391090002, - "grad_norm": 1.8890848112721348, - "learning_rate": 3.999029203623608e-06, - "loss": 0.9869, - "step": 330 - }, - { - "epoch": 0.03980039680153911, - "grad_norm": 1.722440745050099, - "learning_rate": 3.99900478405286e-06, - "loss": 1.0937, - "step": 331 - }, - { - "epoch": 0.0399206396921782, - "grad_norm": 2.8858048124021143, - "learning_rate": 3.998980061234676e-06, - "loss": 1.0598, - "step": 332 - }, - { - "epoch": 0.040040882582817294, - "grad_norm": 2.5959890421972607, - "learning_rate": 3.9989550351728055e-06, - "loss": 0.9914, - "step": 333 - }, - { - "epoch": 0.040161125473456384, - "grad_norm": 2.1375621407382708, - "learning_rate": 3.998929705871046e-06, - "loss": 1.0664, - "step": 334 - }, - { - "epoch": 0.040281368364095474, - "grad_norm": 2.2826775042901986, - "learning_rate": 3.99890407333324e-06, - "loss": 1.1116, - "step": 335 - }, - { - "epoch": 0.040401611254734564, - "grad_norm": 1.5742959129031833, - "learning_rate": 3.998878137563275e-06, - "loss": 1.0935, - "step": 336 - }, - { - "epoch": 0.040521854145373654, - "grad_norm": 2.233479567148013, - "learning_rate": 3.998851898565085e-06, - "loss": 1.0821, - "step": 337 - }, - { - "epoch": 0.04064209703601274, - "grad_norm": 2.176292920858856, - "learning_rate": 3.998825356342653e-06, - "loss": 1.0603, - "step": 338 - }, - { - "epoch": 0.04076233992665183, - "grad_norm": 2.1703371599964654, - "learning_rate": 3.998798510900003e-06, - "loss": 0.9567, - "step": 339 - }, - { - "epoch": 0.04088258281729093, - "grad_norm": 2.0694801816763246, - "learning_rate": 3.998771362241207e-06, - "loss": 1.0793, - "step": 340 - }, - { - "epoch": 0.04100282570793002, - "grad_norm": 1.8520979610018518, - "learning_rate": 3.998743910370385e-06, - "loss": 1.1036, - "step": 341 - }, - { - "epoch": 0.04112306859856911, - "grad_norm": 1.6738432946958817, - "learning_rate": 3.998716155291702e-06, - "loss": 0.9601, - "step": 342 - }, - { - "epoch": 0.0412433114892082, - "grad_norm": 1.6380245853577193, - "learning_rate": 3.998688097009366e-06, - "loss": 1.1353, - "step": 343 - }, - { - "epoch": 0.04136355437984729, - "grad_norm": 2.025917212840567, - "learning_rate": 3.998659735527636e-06, - "loss": 1.0325, - "step": 344 - }, - { - "epoch": 0.04148379727048638, - "grad_norm": 1.5955218021836262, - "learning_rate": 3.998631070850813e-06, - "loss": 1.0, - "step": 345 - }, - { - "epoch": 0.041604040161125476, - "grad_norm": 2.3911739541867103, - "learning_rate": 3.9986021029832455e-06, - "loss": 1.0633, - "step": 346 - }, - { - "epoch": 0.041724283051764566, - "grad_norm": 2.48126121537679, - "learning_rate": 3.9985728319293285e-06, - "loss": 1.1337, - "step": 347 - }, - { - "epoch": 0.041844525942403656, - "grad_norm": 2.0851155351056136, - "learning_rate": 3.998543257693501e-06, - "loss": 1.0768, - "step": 348 - }, - { - "epoch": 0.041964768833042745, - "grad_norm": 1.6535766907502, - "learning_rate": 3.998513380280251e-06, - "loss": 1.1084, - "step": 349 - }, - { - "epoch": 0.042085011723681835, - "grad_norm": 14.717351438000296, - "learning_rate": 3.99848319969411e-06, - "loss": 1.1802, - "step": 350 - }, - { - "epoch": 0.042205254614320925, - "grad_norm": 3.65947209725373, - "learning_rate": 3.9984527159396564e-06, - "loss": 1.014, - "step": 351 - }, - { - "epoch": 0.04232549750496002, - "grad_norm": 11.954186406102087, - "learning_rate": 3.9984219290215154e-06, - "loss": 1.0605, - "step": 352 - }, - { - "epoch": 0.04244574039559911, - "grad_norm": 1.5726086733917868, - "learning_rate": 3.998390838944356e-06, - "loss": 1.115, - "step": 353 - }, - { - "epoch": 0.0425659832862382, - "grad_norm": 1.9558828186007606, - "learning_rate": 3.998359445712895e-06, - "loss": 1.1237, - "step": 354 - }, - { - "epoch": 0.04268622617687729, - "grad_norm": 3.0690522458777476, - "learning_rate": 3.9983277493318955e-06, - "loss": 1.0381, - "step": 355 - }, - { - "epoch": 0.04280646906751638, - "grad_norm": 1.7003885386143298, - "learning_rate": 3.998295749806165e-06, - "loss": 1.0396, - "step": 356 - }, - { - "epoch": 0.04292671195815547, - "grad_norm": 1.815315296330517, - "learning_rate": 3.998263447140558e-06, - "loss": 1.0716, - "step": 357 - }, - { - "epoch": 0.04304695484879457, - "grad_norm": 3.062226108058881, - "learning_rate": 3.998230841339976e-06, - "loss": 1.046, - "step": 358 - }, - { - "epoch": 0.04316719773943366, - "grad_norm": 1.954367463695414, - "learning_rate": 3.998197932409363e-06, - "loss": 1.0799, - "step": 359 - }, - { - "epoch": 0.04328744063007275, - "grad_norm": 1.879689963221026, - "learning_rate": 3.9981647203537125e-06, - "loss": 1.0938, - "step": 360 - }, - { - "epoch": 0.04340768352071184, - "grad_norm": 1.8671435107551988, - "learning_rate": 3.998131205178063e-06, - "loss": 1.1834, - "step": 361 - }, - { - "epoch": 0.04352792641135093, - "grad_norm": 2.1263558045558506, - "learning_rate": 3.998097386887498e-06, - "loss": 0.9957, - "step": 362 - }, - { - "epoch": 0.04364816930199002, - "grad_norm": 1.5023151650885798, - "learning_rate": 3.998063265487148e-06, - "loss": 1.069, - "step": 363 - }, - { - "epoch": 0.043768412192629114, - "grad_norm": 2.0030960914085014, - "learning_rate": 3.99802884098219e-06, - "loss": 1.0382, - "step": 364 - }, - { - "epoch": 0.043888655083268203, - "grad_norm": 2.090952812461446, - "learning_rate": 3.997994113377845e-06, - "loss": 1.0473, - "step": 365 - }, - { - "epoch": 0.04400889797390729, - "grad_norm": 1.9152828141182285, - "learning_rate": 3.9979590826793815e-06, - "loss": 1.0614, - "step": 366 - }, - { - "epoch": 0.04412914086454638, - "grad_norm": 1.5529507852089608, - "learning_rate": 3.997923748892113e-06, - "loss": 1.045, - "step": 367 - }, - { - "epoch": 0.04424938375518547, - "grad_norm": 1.7201469293203757, - "learning_rate": 3.9978881120214015e-06, - "loss": 1.1145, - "step": 368 - }, - { - "epoch": 0.04436962664582456, - "grad_norm": 1.7538144229902708, - "learning_rate": 3.997852172072652e-06, - "loss": 1.0206, - "step": 369 - }, - { - "epoch": 0.04448986953646366, - "grad_norm": 5.748530324449309, - "learning_rate": 3.9978159290513155e-06, - "loss": 1.1248, - "step": 370 - }, - { - "epoch": 0.04461011242710275, - "grad_norm": 1.805445920435905, - "learning_rate": 3.997779382962892e-06, - "loss": 1.032, - "step": 371 - }, - { - "epoch": 0.04473035531774184, - "grad_norm": 1.7731067500286244, - "learning_rate": 3.997742533812924e-06, - "loss": 0.966, - "step": 372 - }, - { - "epoch": 0.04485059820838093, - "grad_norm": 2.2476303549910073, - "learning_rate": 3.997705381607001e-06, - "loss": 1.1443, - "step": 373 - }, - { - "epoch": 0.04497084109902002, - "grad_norm": 0.9870079492617956, - "learning_rate": 3.997667926350761e-06, - "loss": 0.866, - "step": 374 - }, - { - "epoch": 0.04509108398965911, - "grad_norm": 0.8820623400794861, - "learning_rate": 3.997630168049886e-06, - "loss": 0.837, - "step": 375 - }, - { - "epoch": 0.045211326880298205, - "grad_norm": 1.7628130506255304, - "learning_rate": 3.997592106710101e-06, - "loss": 1.0049, - "step": 376 - }, - { - "epoch": 0.045331569770937295, - "grad_norm": 3.5038681453474343, - "learning_rate": 3.997553742337182e-06, - "loss": 0.888, - "step": 377 - }, - { - "epoch": 0.045451812661576385, - "grad_norm": 2.06270979767929, - "learning_rate": 3.997515074936949e-06, - "loss": 1.1436, - "step": 378 - }, - { - "epoch": 0.045572055552215475, - "grad_norm": 2.4873810821172055, - "learning_rate": 3.997476104515268e-06, - "loss": 1.0862, - "step": 379 - }, - { - "epoch": 0.045692298442854565, - "grad_norm": 1.6870356733330047, - "learning_rate": 3.9974368310780485e-06, - "loss": 1.0032, - "step": 380 - }, - { - "epoch": 0.045812541333493655, - "grad_norm": 2.9186352398875033, - "learning_rate": 3.997397254631251e-06, - "loss": 0.9663, - "step": 381 - }, - { - "epoch": 0.04593278422413275, - "grad_norm": 0.913600968132909, - "learning_rate": 3.997357375180878e-06, - "loss": 0.8856, - "step": 382 - }, - { - "epoch": 0.04605302711477184, - "grad_norm": 1.680169111891006, - "learning_rate": 3.997317192732979e-06, - "loss": 0.9786, - "step": 383 - }, - { - "epoch": 0.04617327000541093, - "grad_norm": 2.3841170945181225, - "learning_rate": 3.99727670729365e-06, - "loss": 1.0466, - "step": 384 - }, - { - "epoch": 0.04629351289605002, - "grad_norm": 1.7209327016347584, - "learning_rate": 3.997235918869033e-06, - "loss": 1.0036, - "step": 385 - }, - { - "epoch": 0.04641375578668911, - "grad_norm": 1.9538761983129178, - "learning_rate": 3.997194827465315e-06, - "loss": 1.0561, - "step": 386 - }, - { - "epoch": 0.0465339986773282, - "grad_norm": 2.469999359377882, - "learning_rate": 3.997153433088728e-06, - "loss": 1.1482, - "step": 387 - }, - { - "epoch": 0.0466542415679673, - "grad_norm": 1.6800948247942118, - "learning_rate": 3.997111735745554e-06, - "loss": 1.034, - "step": 388 - }, - { - "epoch": 0.04677448445860639, - "grad_norm": 1.6568988033097192, - "learning_rate": 3.997069735442118e-06, - "loss": 1.0554, - "step": 389 - }, - { - "epoch": 0.04689472734924548, - "grad_norm": 1.3210775652976303, - "learning_rate": 3.997027432184792e-06, - "loss": 1.0228, - "step": 390 - }, - { - "epoch": 0.04701497023988457, - "grad_norm": 1.5961365128743572, - "learning_rate": 3.99698482597999e-06, - "loss": 1.1085, - "step": 391 - }, - { - "epoch": 0.04713521313052366, - "grad_norm": 0.913288571317776, - "learning_rate": 3.99694191683418e-06, - "loss": 0.8918, - "step": 392 - }, - { - "epoch": 0.047255456021162746, - "grad_norm": 1.8888330716257171, - "learning_rate": 3.996898704753867e-06, - "loss": 1.0507, - "step": 393 - }, - { - "epoch": 0.04737569891180184, - "grad_norm": 2.391677425329309, - "learning_rate": 3.996855189745609e-06, - "loss": 1.1119, - "step": 394 - }, - { - "epoch": 0.04749594180244093, - "grad_norm": 1.6638850444324018, - "learning_rate": 3.996811371816007e-06, - "loss": 1.1509, - "step": 395 - }, - { - "epoch": 0.04761618469308002, - "grad_norm": 1.9329781216933917, - "learning_rate": 3.996767250971707e-06, - "loss": 1.0106, - "step": 396 - }, - { - "epoch": 0.04773642758371911, - "grad_norm": 1.7499204345054118, - "learning_rate": 3.996722827219403e-06, - "loss": 1.0924, - "step": 397 - }, - { - "epoch": 0.0478566704743582, - "grad_norm": 2.10918894759635, - "learning_rate": 3.996678100565833e-06, - "loss": 1.05, - "step": 398 - }, - { - "epoch": 0.04797691336499729, - "grad_norm": 2.417591466110285, - "learning_rate": 3.996633071017783e-06, - "loss": 1.1055, - "step": 399 - }, - { - "epoch": 0.04809715625563638, - "grad_norm": 2.0260152414620367, - "learning_rate": 3.996587738582084e-06, - "loss": 1.0432, - "step": 400 - }, - { - "epoch": 0.04821739914627548, - "grad_norm": 2.752032049386124, - "learning_rate": 3.9965421032656115e-06, - "loss": 1.0825, - "step": 401 - }, - { - "epoch": 0.04833764203691457, - "grad_norm": 2.3131556225913923, - "learning_rate": 3.99649616507529e-06, - "loss": 1.1664, - "step": 402 - }, - { - "epoch": 0.04845788492755366, - "grad_norm": 0.9423926792715338, - "learning_rate": 3.996449924018088e-06, - "loss": 0.9075, - "step": 403 - }, - { - "epoch": 0.04857812781819275, - "grad_norm": 2.2902177102492582, - "learning_rate": 3.99640338010102e-06, - "loss": 1.0288, - "step": 404 - }, - { - "epoch": 0.04869837070883184, - "grad_norm": 1.6735291186131023, - "learning_rate": 3.996356533331146e-06, - "loss": 1.008, - "step": 405 - }, - { - "epoch": 0.04881861359947093, - "grad_norm": 2.341167705859456, - "learning_rate": 3.996309383715573e-06, - "loss": 0.8435, - "step": 406 - }, - { - "epoch": 0.048938856490110025, - "grad_norm": 2.088074464617485, - "learning_rate": 3.996261931261454e-06, - "loss": 0.9583, - "step": 407 - }, - { - "epoch": 0.049059099380749115, - "grad_norm": 1.6239852671401251, - "learning_rate": 3.996214175975987e-06, - "loss": 1.0881, - "step": 408 - }, - { - "epoch": 0.049179342271388204, - "grad_norm": 2.437640259448646, - "learning_rate": 3.996166117866417e-06, - "loss": 1.0192, - "step": 409 - }, - { - "epoch": 0.049299585162027294, - "grad_norm": 1.9346440708940578, - "learning_rate": 3.996117756940035e-06, - "loss": 1.0935, - "step": 410 - }, - { - "epoch": 0.049419828052666384, - "grad_norm": 2.0085341051134304, - "learning_rate": 3.996069093204175e-06, - "loss": 1.1961, - "step": 411 - }, - { - "epoch": 0.049540070943305474, - "grad_norm": 1.9508899565242117, - "learning_rate": 3.996020126666221e-06, - "loss": 1.1039, - "step": 412 - }, - { - "epoch": 0.04966031383394457, - "grad_norm": 1.9583761197008607, - "learning_rate": 3.995970857333601e-06, - "loss": 1.0505, - "step": 413 - }, - { - "epoch": 0.04978055672458366, - "grad_norm": 1.5568510557069233, - "learning_rate": 3.995921285213789e-06, - "loss": 1.0256, - "step": 414 - }, - { - "epoch": 0.04990079961522275, - "grad_norm": 3.2475421121053167, - "learning_rate": 3.995871410314305e-06, - "loss": 1.0331, - "step": 415 - }, - { - "epoch": 0.05002104250586184, - "grad_norm": 1.0074060492808978, - "learning_rate": 3.995821232642714e-06, - "loss": 0.8914, - "step": 416 - }, - { - "epoch": 0.05014128539650093, - "grad_norm": 2.2778405617639033, - "learning_rate": 3.995770752206629e-06, - "loss": 1.0547, - "step": 417 - }, - { - "epoch": 0.05026152828714002, - "grad_norm": 1.9038024573509722, - "learning_rate": 3.995719969013709e-06, - "loss": 1.1949, - "step": 418 - }, - { - "epoch": 0.05038177117777912, - "grad_norm": 2.7039512297673207, - "learning_rate": 3.995668883071655e-06, - "loss": 1.084, - "step": 419 - }, - { - "epoch": 0.050502014068418206, - "grad_norm": 2.43925530247739, - "learning_rate": 3.995617494388219e-06, - "loss": 1.1378, - "step": 420 - }, - { - "epoch": 0.050622256959057296, - "grad_norm": 1.7489481898595476, - "learning_rate": 3.995565802971196e-06, - "loss": 1.0346, - "step": 421 - }, - { - "epoch": 0.050742499849696386, - "grad_norm": 1.865354588015086, - "learning_rate": 3.995513808828427e-06, - "loss": 0.9062, - "step": 422 - }, - { - "epoch": 0.050862742740335476, - "grad_norm": 2.1232653999340565, - "learning_rate": 3.9954615119678e-06, - "loss": 0.9907, - "step": 423 - }, - { - "epoch": 0.050982985630974566, - "grad_norm": 2.020643389187788, - "learning_rate": 3.995408912397248e-06, - "loss": 1.0221, - "step": 424 - }, - { - "epoch": 0.05110322852161366, - "grad_norm": 2.1171254504355854, - "learning_rate": 3.99535601012475e-06, - "loss": 1.1528, - "step": 425 - }, - { - "epoch": 0.05122347141225275, - "grad_norm": 1.5352825860219548, - "learning_rate": 3.995302805158333e-06, - "loss": 0.982, - "step": 426 - }, - { - "epoch": 0.05134371430289184, - "grad_norm": 1.6014984862397537, - "learning_rate": 3.9952492975060665e-06, - "loss": 1.0634, - "step": 427 - }, - { - "epoch": 0.05146395719353093, - "grad_norm": 2.810183015748393, - "learning_rate": 3.995195487176067e-06, - "loss": 1.0799, - "step": 428 - }, - { - "epoch": 0.05158420008417002, - "grad_norm": 1.6734622301683928, - "learning_rate": 3.995141374176499e-06, - "loss": 1.0834, - "step": 429 - }, - { - "epoch": 0.05170444297480911, - "grad_norm": 0.9403742598602088, - "learning_rate": 3.995086958515572e-06, - "loss": 0.9093, - "step": 430 - }, - { - "epoch": 0.05182468586544821, - "grad_norm": 0.923907802766927, - "learning_rate": 3.995032240201538e-06, - "loss": 0.8777, - "step": 431 - }, - { - "epoch": 0.0519449287560873, - "grad_norm": 0.9974692627227163, - "learning_rate": 3.9949772192427e-06, - "loss": 0.8862, - "step": 432 - }, - { - "epoch": 0.05206517164672639, - "grad_norm": 1.6228827842273614, - "learning_rate": 3.994921895647405e-06, - "loss": 1.025, - "step": 433 - }, - { - "epoch": 0.05218541453736548, - "grad_norm": 0.8148135112882874, - "learning_rate": 3.994866269424043e-06, - "loss": 0.8043, - "step": 434 - }, - { - "epoch": 0.05230565742800457, - "grad_norm": 2.403152552968824, - "learning_rate": 3.9948103405810545e-06, - "loss": 1.0096, - "step": 435 - }, - { - "epoch": 0.05242590031864366, - "grad_norm": 1.6556429920248554, - "learning_rate": 3.994754109126923e-06, - "loss": 1.0793, - "step": 436 - }, - { - "epoch": 0.052546143209282754, - "grad_norm": 1.5847740979869624, - "learning_rate": 3.994697575070181e-06, - "loss": 1.1605, - "step": 437 - }, - { - "epoch": 0.052666386099921844, - "grad_norm": 1.8738449353578746, - "learning_rate": 3.994640738419402e-06, - "loss": 1.1344, - "step": 438 - }, - { - "epoch": 0.052786628990560934, - "grad_norm": 1.6506957725712497, - "learning_rate": 3.9945835991832075e-06, - "loss": 1.0323, - "step": 439 - }, - { - "epoch": 0.052906871881200024, - "grad_norm": 1.9902412032589563, - "learning_rate": 3.994526157370268e-06, - "loss": 1.1506, - "step": 440 - }, - { - "epoch": 0.053027114771839114, - "grad_norm": 0.9018755271841347, - "learning_rate": 3.994468412989296e-06, - "loss": 0.8452, - "step": 441 - }, - { - "epoch": 0.053147357662478203, - "grad_norm": 1.9399189768910081, - "learning_rate": 3.994410366049052e-06, - "loss": 1.1662, - "step": 442 - }, - { - "epoch": 0.0532676005531173, - "grad_norm": 2.2116797182013763, - "learning_rate": 3.994352016558341e-06, - "loss": 1.0626, - "step": 443 - }, - { - "epoch": 0.05338784344375639, - "grad_norm": 1.8381044626239795, - "learning_rate": 3.994293364526014e-06, - "loss": 0.9654, - "step": 444 - }, - { - "epoch": 0.05350808633439548, - "grad_norm": 1.6740847805891128, - "learning_rate": 3.99423440996097e-06, - "loss": 1.0654, - "step": 445 - }, - { - "epoch": 0.05362832922503457, - "grad_norm": 2.1753918831852954, - "learning_rate": 3.994175152872152e-06, - "loss": 1.0392, - "step": 446 - }, - { - "epoch": 0.05374857211567366, - "grad_norm": 1.9089234163040603, - "learning_rate": 3.994115593268548e-06, - "loss": 1.0206, - "step": 447 - }, - { - "epoch": 0.05386881500631275, - "grad_norm": 2.6177487810569406, - "learning_rate": 3.994055731159195e-06, - "loss": 1.049, - "step": 448 - }, - { - "epoch": 0.053989057896951846, - "grad_norm": 1.7103602162693603, - "learning_rate": 3.993995566553172e-06, - "loss": 1.0944, - "step": 449 - }, - { - "epoch": 0.054109300787590936, - "grad_norm": 1.5673796181334272, - "learning_rate": 3.993935099459607e-06, - "loss": 0.9981, - "step": 450 - }, - { - "epoch": 0.054229543678230026, - "grad_norm": 1.9050233990691496, - "learning_rate": 3.993874329887673e-06, - "loss": 0.9731, - "step": 451 - }, - { - "epoch": 0.054349786568869116, - "grad_norm": 2.99727650042216, - "learning_rate": 3.993813257846589e-06, - "loss": 1.0893, - "step": 452 - }, - { - "epoch": 0.054470029459508205, - "grad_norm": 2.209503571520593, - "learning_rate": 3.993751883345619e-06, - "loss": 1.1589, - "step": 453 - }, - { - "epoch": 0.054590272350147295, - "grad_norm": 2.8208893162305797, - "learning_rate": 3.993690206394073e-06, - "loss": 1.1038, - "step": 454 - }, - { - "epoch": 0.054710515240786385, - "grad_norm": 2.25088706014443, - "learning_rate": 3.993628227001307e-06, - "loss": 1.1023, - "step": 455 - }, - { - "epoch": 0.05483075813142548, - "grad_norm": 1.7674646161974183, - "learning_rate": 3.993565945176726e-06, - "loss": 0.9383, - "step": 456 - }, - { - "epoch": 0.05495100102206457, - "grad_norm": 1.7592244221024722, - "learning_rate": 3.993503360929776e-06, - "loss": 1.0668, - "step": 457 - }, - { - "epoch": 0.05507124391270366, - "grad_norm": 1.5507135256337239, - "learning_rate": 3.99344047426995e-06, - "loss": 1.0411, - "step": 458 - }, - { - "epoch": 0.05519148680334275, - "grad_norm": 1.8798790077071885, - "learning_rate": 3.993377285206789e-06, - "loss": 1.1593, - "step": 459 - }, - { - "epoch": 0.05531172969398184, - "grad_norm": 1.5795191281746261, - "learning_rate": 3.99331379374988e-06, - "loss": 1.0946, - "step": 460 - }, - { - "epoch": 0.05543197258462093, - "grad_norm": 1.7137270965269966, - "learning_rate": 3.993249999908852e-06, - "loss": 1.0332, - "step": 461 - }, - { - "epoch": 0.05555221547526003, - "grad_norm": 1.6580440233733886, - "learning_rate": 3.993185903693384e-06, - "loss": 1.0941, - "step": 462 - }, - { - "epoch": 0.05567245836589912, - "grad_norm": 2.4772443825418113, - "learning_rate": 3.9931215051131995e-06, - "loss": 1.0531, - "step": 463 - }, - { - "epoch": 0.05579270125653821, - "grad_norm": 1.4567091025769832, - "learning_rate": 3.993056804178068e-06, - "loss": 1.0312, - "step": 464 - }, - { - "epoch": 0.0559129441471773, - "grad_norm": 1.7957506725761962, - "learning_rate": 3.992991800897803e-06, - "loss": 1.0724, - "step": 465 - }, - { - "epoch": 0.05603318703781639, - "grad_norm": 2.1888017323478737, - "learning_rate": 3.9929264952822665e-06, - "loss": 1.1207, - "step": 466 - }, - { - "epoch": 0.05615342992845548, - "grad_norm": 1.7320910012035151, - "learning_rate": 3.992860887341366e-06, - "loss": 1.1117, - "step": 467 - }, - { - "epoch": 0.056273672819094574, - "grad_norm": 2.5504840017511334, - "learning_rate": 3.992794977085052e-06, - "loss": 1.0457, - "step": 468 - }, - { - "epoch": 0.056393915709733664, - "grad_norm": 1.806300511693362, - "learning_rate": 3.992728764523326e-06, - "loss": 1.0766, - "step": 469 - }, - { - "epoch": 0.05651415860037275, - "grad_norm": 1.475348321552106, - "learning_rate": 3.99266224966623e-06, - "loss": 1.0316, - "step": 470 - }, - { - "epoch": 0.05663440149101184, - "grad_norm": 2.0218667078806245, - "learning_rate": 3.992595432523855e-06, - "loss": 1.1021, - "step": 471 - }, - { - "epoch": 0.05675464438165093, - "grad_norm": 1.712309126414701, - "learning_rate": 3.992528313106338e-06, - "loss": 1.0909, - "step": 472 - }, - { - "epoch": 0.05687488727229002, - "grad_norm": 1.9924549596427101, - "learning_rate": 3.9924608914238595e-06, - "loss": 1.0446, - "step": 473 - }, - { - "epoch": 0.05699513016292912, - "grad_norm": 1.8765583556015917, - "learning_rate": 3.992393167486648e-06, - "loss": 1.0609, - "step": 474 - }, - { - "epoch": 0.05711537305356821, - "grad_norm": 2.397146268810025, - "learning_rate": 3.992325141304977e-06, - "loss": 1.0304, - "step": 475 - }, - { - "epoch": 0.0572356159442073, - "grad_norm": 3.2292453058983464, - "learning_rate": 3.992256812889166e-06, - "loss": 1.0867, - "step": 476 - }, - { - "epoch": 0.05735585883484639, - "grad_norm": 2.1060676925952446, - "learning_rate": 3.992188182249582e-06, - "loss": 0.9955, - "step": 477 - }, - { - "epoch": 0.05747610172548548, - "grad_norm": 4.152338926899245, - "learning_rate": 3.992119249396633e-06, - "loss": 1.141, - "step": 478 - }, - { - "epoch": 0.05759634461612457, - "grad_norm": 1.7710523656882333, - "learning_rate": 3.992050014340778e-06, - "loss": 1.0467, - "step": 479 - }, - { - "epoch": 0.057716587506763666, - "grad_norm": 1.13158123583428, - "learning_rate": 3.99198047709252e-06, - "loss": 0.8054, - "step": 480 - }, - { - "epoch": 0.057836830397402755, - "grad_norm": 1.6137225491931804, - "learning_rate": 3.991910637662408e-06, - "loss": 1.0156, - "step": 481 - }, - { - "epoch": 0.057957073288041845, - "grad_norm": 1.9557133727741787, - "learning_rate": 3.9918404960610355e-06, - "loss": 1.0319, - "step": 482 - }, - { - "epoch": 0.058077316178680935, - "grad_norm": 2.016690822593261, - "learning_rate": 3.991770052299043e-06, - "loss": 1.0007, - "step": 483 - }, - { - "epoch": 0.058197559069320025, - "grad_norm": 2.4018206763495753, - "learning_rate": 3.991699306387118e-06, - "loss": 1.1008, - "step": 484 - }, - { - "epoch": 0.058317801959959115, - "grad_norm": 1.8844125367533235, - "learning_rate": 3.991628258335991e-06, - "loss": 1.0136, - "step": 485 - }, - { - "epoch": 0.05843804485059821, - "grad_norm": 2.7673798936317886, - "learning_rate": 3.991556908156442e-06, - "loss": 1.0992, - "step": 486 - }, - { - "epoch": 0.0585582877412373, - "grad_norm": 1.5561593148033772, - "learning_rate": 3.9914852558592914e-06, - "loss": 1.095, - "step": 487 - }, - { - "epoch": 0.05867853063187639, - "grad_norm": 2.8595120756718417, - "learning_rate": 3.991413301455413e-06, - "loss": 1.0438, - "step": 488 - }, - { - "epoch": 0.05879877352251548, - "grad_norm": 2.086827806074524, - "learning_rate": 3.991341044955719e-06, - "loss": 0.9951, - "step": 489 - }, - { - "epoch": 0.05891901641315457, - "grad_norm": 2.022100529776092, - "learning_rate": 3.991268486371172e-06, - "loss": 1.0438, - "step": 490 - }, - { - "epoch": 0.05903925930379366, - "grad_norm": 2.106874467322894, - "learning_rate": 3.991195625712779e-06, - "loss": 1.0942, - "step": 491 - }, - { - "epoch": 0.05915950219443276, - "grad_norm": 2.9427927910941873, - "learning_rate": 3.991122462991592e-06, - "loss": 1.0432, - "step": 492 - }, - { - "epoch": 0.05927974508507185, - "grad_norm": 2.656143455612519, - "learning_rate": 3.991048998218712e-06, - "loss": 1.0371, - "step": 493 - }, - { - "epoch": 0.05939998797571094, - "grad_norm": 2.0307074188948997, - "learning_rate": 3.990975231405281e-06, - "loss": 0.9868, - "step": 494 - }, - { - "epoch": 0.05952023086635003, - "grad_norm": 1.766275606718946, - "learning_rate": 3.990901162562491e-06, - "loss": 1.009, - "step": 495 - }, - { - "epoch": 0.05964047375698912, - "grad_norm": 1.849779195753867, - "learning_rate": 3.9908267917015765e-06, - "loss": 1.1319, - "step": 496 - }, - { - "epoch": 0.059760716647628206, - "grad_norm": 1.7949285164628217, - "learning_rate": 3.990752118833821e-06, - "loss": 1.1445, - "step": 497 - }, - { - "epoch": 0.0598809595382673, - "grad_norm": 1.8364399973068808, - "learning_rate": 3.990677143970553e-06, - "loss": 1.0044, - "step": 498 - }, - { - "epoch": 0.06000120242890639, - "grad_norm": 1.7780688473059438, - "learning_rate": 3.990601867123144e-06, - "loss": 1.0355, - "step": 499 - }, - { - "epoch": 0.06012144531954548, - "grad_norm": 2.5003521671207127, - "learning_rate": 3.990526288303014e-06, - "loss": 1.0786, - "step": 500 - }, - { - "epoch": 0.06024168821018457, - "grad_norm": 1.5970434678527254, - "learning_rate": 3.9904504075216295e-06, - "loss": 1.1288, - "step": 501 - }, - { - "epoch": 0.06036193110082366, - "grad_norm": 2.0279694108104445, - "learning_rate": 3.990374224790501e-06, - "loss": 1.1597, - "step": 502 - }, - { - "epoch": 0.06048217399146275, - "grad_norm": 1.9481357172505074, - "learning_rate": 3.990297740121185e-06, - "loss": 0.9379, - "step": 503 - }, - { - "epoch": 0.06060241688210185, - "grad_norm": 1.9213281489246494, - "learning_rate": 3.990220953525284e-06, - "loss": 1.0063, - "step": 504 - }, - { - "epoch": 0.06072265977274094, - "grad_norm": 2.561135091368238, - "learning_rate": 3.9901438650144465e-06, - "loss": 0.9745, - "step": 505 - }, - { - "epoch": 0.06084290266338003, - "grad_norm": 2.273739824151688, - "learning_rate": 3.990066474600367e-06, - "loss": 1.1412, - "step": 506 - }, - { - "epoch": 0.06096314555401912, - "grad_norm": 1.6676765561178808, - "learning_rate": 3.989988782294786e-06, - "loss": 0.9032, - "step": 507 - }, - { - "epoch": 0.06108338844465821, - "grad_norm": 2.2620798721984965, - "learning_rate": 3.989910788109489e-06, - "loss": 1.1731, - "step": 508 - }, - { - "epoch": 0.0612036313352973, - "grad_norm": 2.093599342505556, - "learning_rate": 3.989832492056307e-06, - "loss": 0.9767, - "step": 509 - }, - { - "epoch": 0.06132387422593639, - "grad_norm": 1.8364906289442904, - "learning_rate": 3.989753894147119e-06, - "loss": 1.0316, - "step": 510 - }, - { - "epoch": 0.061444117116575485, - "grad_norm": 1.5190069475749628, - "learning_rate": 3.989674994393846e-06, - "loss": 1.0222, - "step": 511 - }, - { - "epoch": 0.061564360007214575, - "grad_norm": 1.8796918023901301, - "learning_rate": 3.98959579280846e-06, - "loss": 1.1692, - "step": 512 - }, - { - "epoch": 0.061684602897853665, - "grad_norm": 2.4163550691213325, - "learning_rate": 3.989516289402973e-06, - "loss": 1.0591, - "step": 513 - }, - { - "epoch": 0.061804845788492754, - "grad_norm": 3.2660779136993185, - "learning_rate": 3.989436484189447e-06, - "loss": 1.0464, - "step": 514 - }, - { - "epoch": 0.061925088679131844, - "grad_norm": 2.4594186122182884, - "learning_rate": 3.9893563771799885e-06, - "loss": 1.0426, - "step": 515 - }, - { - "epoch": 0.062045331569770934, - "grad_norm": 2.2968302612099603, - "learning_rate": 3.989275968386749e-06, - "loss": 1.0943, - "step": 516 - }, - { - "epoch": 0.06216557446041003, - "grad_norm": 1.844337822265696, - "learning_rate": 3.989195257821926e-06, - "loss": 0.9967, - "step": 517 - }, - { - "epoch": 0.06228581735104912, - "grad_norm": 1.9220979142588681, - "learning_rate": 3.989114245497765e-06, - "loss": 1.0737, - "step": 518 - }, - { - "epoch": 0.06240606024168821, - "grad_norm": 1.987644295882527, - "learning_rate": 3.989032931426554e-06, - "loss": 1.1737, - "step": 519 - }, - { - "epoch": 0.06252630313232731, - "grad_norm": 1.9764766146730928, - "learning_rate": 3.9889513156206295e-06, - "loss": 1.0972, - "step": 520 - }, - { - "epoch": 0.06264654602296639, - "grad_norm": 2.154092012903565, - "learning_rate": 3.988869398092371e-06, - "loss": 0.9467, - "step": 521 - }, - { - "epoch": 0.06276678891360549, - "grad_norm": 2.73179164377922, - "learning_rate": 3.988787178854206e-06, - "loss": 1.0115, - "step": 522 - }, - { - "epoch": 0.06288703180424457, - "grad_norm": 1.905856732935246, - "learning_rate": 3.988704657918608e-06, - "loss": 1.1007, - "step": 523 - }, - { - "epoch": 0.06300727469488367, - "grad_norm": 2.2131269956642767, - "learning_rate": 3.988621835298094e-06, - "loss": 1.0397, - "step": 524 - }, - { - "epoch": 0.06312751758552275, - "grad_norm": 1.7725031739974557, - "learning_rate": 3.988538711005229e-06, - "loss": 1.1439, - "step": 525 - }, - { - "epoch": 0.06324776047616185, - "grad_norm": 2.070723352511561, - "learning_rate": 3.988455285052622e-06, - "loss": 1.122, - "step": 526 - }, - { - "epoch": 0.06336800336680094, - "grad_norm": 1.749825741402457, - "learning_rate": 3.98837155745293e-06, - "loss": 1.0575, - "step": 527 - }, - { - "epoch": 0.06348824625744003, - "grad_norm": 9.631173272046942, - "learning_rate": 3.988287528218854e-06, - "loss": 0.9881, - "step": 528 - }, - { - "epoch": 0.06360848914807912, - "grad_norm": 1.771399650524424, - "learning_rate": 3.98820319736314e-06, - "loss": 1.1243, - "step": 529 - }, - { - "epoch": 0.0637287320387182, - "grad_norm": 1.6591194775070632, - "learning_rate": 3.988118564898582e-06, - "loss": 1.0827, - "step": 530 - }, - { - "epoch": 0.0638489749293573, - "grad_norm": 7.273425437187902, - "learning_rate": 3.988033630838019e-06, - "loss": 1.1162, - "step": 531 - }, - { - "epoch": 0.0639692178199964, - "grad_norm": 1.5806622645524495, - "learning_rate": 3.987948395194334e-06, - "loss": 1.101, - "step": 532 - }, - { - "epoch": 0.06408946071063548, - "grad_norm": 1.7550055578203583, - "learning_rate": 3.987862857980458e-06, - "loss": 1.0035, - "step": 533 - }, - { - "epoch": 0.06420970360127458, - "grad_norm": 1.831714073981532, - "learning_rate": 3.987777019209368e-06, - "loss": 0.9987, - "step": 534 - }, - { - "epoch": 0.06432994649191366, - "grad_norm": 1.478592703056106, - "learning_rate": 3.987690878894084e-06, - "loss": 1.0384, - "step": 535 - }, - { - "epoch": 0.06445018938255276, - "grad_norm": 2.59686762949865, - "learning_rate": 3.987604437047673e-06, - "loss": 1.0753, - "step": 536 - }, - { - "epoch": 0.06457043227319184, - "grad_norm": 2.0097800978129277, - "learning_rate": 3.987517693683251e-06, - "loss": 1.0089, - "step": 537 - }, - { - "epoch": 0.06469067516383094, - "grad_norm": 2.2085702037332013, - "learning_rate": 3.9874306488139745e-06, - "loss": 1.1826, - "step": 538 - }, - { - "epoch": 0.06481091805447003, - "grad_norm": 1.7928376326479134, - "learning_rate": 3.987343302453049e-06, - "loss": 1.0982, - "step": 539 - }, - { - "epoch": 0.06493116094510912, - "grad_norm": 1.5689250833144401, - "learning_rate": 3.987255654613724e-06, - "loss": 1.0566, - "step": 540 - }, - { - "epoch": 0.06505140383574821, - "grad_norm": 2.116937826714926, - "learning_rate": 3.987167705309296e-06, - "loss": 0.9297, - "step": 541 - }, - { - "epoch": 0.0651716467263873, - "grad_norm": 2.6474906556682285, - "learning_rate": 3.987079454553108e-06, - "loss": 1.1759, - "step": 542 - }, - { - "epoch": 0.0652918896170264, - "grad_norm": 1.755522529107635, - "learning_rate": 3.986990902358546e-06, - "loss": 1.1467, - "step": 543 - }, - { - "epoch": 0.06541213250766549, - "grad_norm": 1.9773240865018316, - "learning_rate": 3.986902048739045e-06, - "loss": 1.1546, - "step": 544 - }, - { - "epoch": 0.06553237539830457, - "grad_norm": 3.678006251775294, - "learning_rate": 3.986812893708082e-06, - "loss": 1.0246, - "step": 545 - }, - { - "epoch": 0.06565261828894367, - "grad_norm": 1.8457524691651368, - "learning_rate": 3.9867234372791826e-06, - "loss": 1.0466, - "step": 546 - }, - { - "epoch": 0.06577286117958275, - "grad_norm": 1.6292224458217364, - "learning_rate": 3.986633679465918e-06, - "loss": 1.1005, - "step": 547 - }, - { - "epoch": 0.06589310407022185, - "grad_norm": 2.6258544439518223, - "learning_rate": 3.986543620281904e-06, - "loss": 1.0397, - "step": 548 - }, - { - "epoch": 0.06601334696086093, - "grad_norm": 1.7468960346190674, - "learning_rate": 3.986453259740802e-06, - "loss": 1.1363, - "step": 549 - }, - { - "epoch": 0.06613358985150003, - "grad_norm": 3.0208263704066214, - "learning_rate": 3.986362597856319e-06, - "loss": 1.0146, - "step": 550 - }, - { - "epoch": 0.06625383274213913, - "grad_norm": 2.7355993527035283, - "learning_rate": 3.986271634642211e-06, - "loss": 1.0438, - "step": 551 - }, - { - "epoch": 0.06637407563277821, - "grad_norm": 1.8238079907385836, - "learning_rate": 3.986180370112274e-06, - "loss": 1.0426, - "step": 552 - }, - { - "epoch": 0.0664943185234173, - "grad_norm": 2.0155579311833978, - "learning_rate": 3.986088804280354e-06, - "loss": 0.9745, - "step": 553 - }, - { - "epoch": 0.06661456141405639, - "grad_norm": 2.0398919183485438, - "learning_rate": 3.985996937160342e-06, - "loss": 1.163, - "step": 554 - }, - { - "epoch": 0.06673480430469549, - "grad_norm": 2.285930169928813, - "learning_rate": 3.985904768766173e-06, - "loss": 0.9284, - "step": 555 - }, - { - "epoch": 0.06685504719533458, - "grad_norm": 3.6550079583578445, - "learning_rate": 3.98581229911183e-06, - "loss": 0.9882, - "step": 556 - }, - { - "epoch": 0.06697529008597367, - "grad_norm": 1.5250681439793166, - "learning_rate": 3.985719528211341e-06, - "loss": 1.1383, - "step": 557 - }, - { - "epoch": 0.06709553297661276, - "grad_norm": 0.9315103563766026, - "learning_rate": 3.985626456078777e-06, - "loss": 0.9036, - "step": 558 - }, - { - "epoch": 0.06721577586725185, - "grad_norm": 1.964535273762706, - "learning_rate": 3.985533082728259e-06, - "loss": 1.0916, - "step": 559 - }, - { - "epoch": 0.06733601875789094, - "grad_norm": 1.740932953745915, - "learning_rate": 3.985439408173951e-06, - "loss": 0.9736, - "step": 560 - }, - { - "epoch": 0.06745626164853002, - "grad_norm": 2.016208889394908, - "learning_rate": 3.9853454324300634e-06, - "loss": 0.9435, - "step": 561 - }, - { - "epoch": 0.06757650453916912, - "grad_norm": 1.955165316136114, - "learning_rate": 3.985251155510852e-06, - "loss": 1.0047, - "step": 562 - }, - { - "epoch": 0.06769674742980822, - "grad_norm": 1.7660816243577313, - "learning_rate": 3.98515657743062e-06, - "loss": 1.0362, - "step": 563 - }, - { - "epoch": 0.0678169903204473, - "grad_norm": 1.6922091721320756, - "learning_rate": 3.985061698203711e-06, - "loss": 1.0017, - "step": 564 - }, - { - "epoch": 0.0679372332110864, - "grad_norm": 0.8984786314463864, - "learning_rate": 3.984966517844523e-06, - "loss": 0.9018, - "step": 565 - }, - { - "epoch": 0.06805747610172548, - "grad_norm": 2.1345600053578697, - "learning_rate": 3.984871036367492e-06, - "loss": 1.0333, - "step": 566 - }, - { - "epoch": 0.06817771899236458, - "grad_norm": 2.4166529315251237, - "learning_rate": 3.984775253787102e-06, - "loss": 1.0612, - "step": 567 - }, - { - "epoch": 0.06829796188300366, - "grad_norm": 3.243578509022011, - "learning_rate": 3.984679170117885e-06, - "loss": 1.1048, - "step": 568 - }, - { - "epoch": 0.06841820477364276, - "grad_norm": 3.7332768441649518, - "learning_rate": 3.984582785374415e-06, - "loss": 1.0061, - "step": 569 - }, - { - "epoch": 0.06853844766428185, - "grad_norm": 1.982985242649248, - "learning_rate": 3.9844860995713155e-06, - "loss": 1.0374, - "step": 570 - }, - { - "epoch": 0.06865869055492094, - "grad_norm": 2.063541727260208, - "learning_rate": 3.9843891127232524e-06, - "loss": 1.0435, - "step": 571 - }, - { - "epoch": 0.06877893344556003, - "grad_norm": 2.1612062851129052, - "learning_rate": 3.984291824844938e-06, - "loss": 0.8966, - "step": 572 - }, - { - "epoch": 0.06889917633619912, - "grad_norm": 2.3517943613307395, - "learning_rate": 3.984194235951132e-06, - "loss": 1.0721, - "step": 573 - }, - { - "epoch": 0.06901941922683821, - "grad_norm": 3.0655937916511635, - "learning_rate": 3.9840963460566375e-06, - "loss": 1.077, - "step": 574 - }, - { - "epoch": 0.06913966211747731, - "grad_norm": 1.4238832146514209, - "learning_rate": 3.983998155176305e-06, - "loss": 1.1172, - "step": 575 - }, - { - "epoch": 0.06925990500811639, - "grad_norm": 0.8745326088136031, - "learning_rate": 3.9838996633250305e-06, - "loss": 0.8248, - "step": 576 - }, - { - "epoch": 0.06938014789875549, - "grad_norm": 2.7514939151774964, - "learning_rate": 3.983800870517753e-06, - "loss": 1.1103, - "step": 577 - }, - { - "epoch": 0.06950039078939457, - "grad_norm": 2.7916759566726594, - "learning_rate": 3.983701776769463e-06, - "loss": 1.0162, - "step": 578 - }, - { - "epoch": 0.06962063368003367, - "grad_norm": 1.7653272934812234, - "learning_rate": 3.9836023820951885e-06, - "loss": 1.0762, - "step": 579 - }, - { - "epoch": 0.06974087657067275, - "grad_norm": 2.019694987582281, - "learning_rate": 3.983502686510011e-06, - "loss": 0.9182, - "step": 580 - }, - { - "epoch": 0.06986111946131185, - "grad_norm": 1.6791039451199725, - "learning_rate": 3.9834026900290525e-06, - "loss": 0.9537, - "step": 581 - }, - { - "epoch": 0.06998136235195095, - "grad_norm": 1.8492753594343343, - "learning_rate": 3.983302392667482e-06, - "loss": 1.2197, - "step": 582 - }, - { - "epoch": 0.07010160524259003, - "grad_norm": 1.9524161921842003, - "learning_rate": 3.983201794440517e-06, - "loss": 1.1669, - "step": 583 - }, - { - "epoch": 0.07022184813322913, - "grad_norm": 1.6090657205525358, - "learning_rate": 3.9831008953634165e-06, - "loss": 0.9129, - "step": 584 - }, - { - "epoch": 0.07034209102386821, - "grad_norm": 1.6780726277225737, - "learning_rate": 3.9829996954514864e-06, - "loss": 1.0367, - "step": 585 - }, - { - "epoch": 0.0704623339145073, - "grad_norm": 1.6480237975022658, - "learning_rate": 3.982898194720079e-06, - "loss": 1.0681, - "step": 586 - }, - { - "epoch": 0.0705825768051464, - "grad_norm": 1.8469520823857088, - "learning_rate": 3.982796393184592e-06, - "loss": 1.041, - "step": 587 - }, - { - "epoch": 0.07070281969578548, - "grad_norm": 0.8248168077240037, - "learning_rate": 3.98269429086047e-06, - "loss": 0.8833, - "step": 588 - }, - { - "epoch": 0.07082306258642458, - "grad_norm": 2.1384231492424655, - "learning_rate": 3.982591887763199e-06, - "loss": 1.0883, - "step": 589 - }, - { - "epoch": 0.07094330547706366, - "grad_norm": 2.0461016642798686, - "learning_rate": 3.982489183908316e-06, - "loss": 1.0429, - "step": 590 - }, - { - "epoch": 0.07106354836770276, - "grad_norm": 1.71750512556841, - "learning_rate": 3.982386179311399e-06, - "loss": 1.0711, - "step": 591 - }, - { - "epoch": 0.07118379125834184, - "grad_norm": 2.0812048784010013, - "learning_rate": 3.982282873988075e-06, - "loss": 1.101, - "step": 592 - }, - { - "epoch": 0.07130403414898094, - "grad_norm": 1.7249179101844239, - "learning_rate": 3.982179267954016e-06, - "loss": 1.099, - "step": 593 - }, - { - "epoch": 0.07142427703962004, - "grad_norm": 2.2950264894706174, - "learning_rate": 3.982075361224937e-06, - "loss": 1.198, - "step": 594 - }, - { - "epoch": 0.07154451993025912, - "grad_norm": 1.7375971787535172, - "learning_rate": 3.981971153816602e-06, - "loss": 1.112, - "step": 595 - }, - { - "epoch": 0.07166476282089822, - "grad_norm": 1.401737985501304, - "learning_rate": 3.981866645744819e-06, - "loss": 1.1794, - "step": 596 - }, - { - "epoch": 0.0717850057115373, - "grad_norm": 1.8390648361359752, - "learning_rate": 3.9817618370254416e-06, - "loss": 1.0376, - "step": 597 - }, - { - "epoch": 0.0719052486021764, - "grad_norm": 3.193359375, - "learning_rate": 3.9816567276743684e-06, - "loss": 1.0924, - "step": 598 - }, - { - "epoch": 0.0720254914928155, - "grad_norm": 1.7639247843910504, - "learning_rate": 3.9815513177075466e-06, - "loss": 0.9958, - "step": 599 - }, - { - "epoch": 0.07214573438345458, - "grad_norm": 1.4947942363792244, - "learning_rate": 3.9814456071409646e-06, - "loss": 0.9349, - "step": 600 - }, - { - "epoch": 0.07226597727409367, - "grad_norm": 2.317518716549318, - "learning_rate": 3.981339595990659e-06, - "loss": 1.0876, - "step": 601 - }, - { - "epoch": 0.07238622016473276, - "grad_norm": 1.8150369712910315, - "learning_rate": 3.981233284272713e-06, - "loss": 1.0352, - "step": 602 - }, - { - "epoch": 0.07250646305537185, - "grad_norm": 1.7358996588894249, - "learning_rate": 3.981126672003253e-06, - "loss": 1.1235, - "step": 603 - }, - { - "epoch": 0.07262670594601094, - "grad_norm": 2.0116393434722752, - "learning_rate": 3.981019759198451e-06, - "loss": 1.0159, - "step": 604 - }, - { - "epoch": 0.07274694883665003, - "grad_norm": 4.088059292014295, - "learning_rate": 3.980912545874528e-06, - "loss": 1.0586, - "step": 605 - }, - { - "epoch": 0.07286719172728913, - "grad_norm": 2.480179513002543, - "learning_rate": 3.980805032047746e-06, - "loss": 1.0862, - "step": 606 - }, - { - "epoch": 0.07298743461792821, - "grad_norm": 1.916979128481691, - "learning_rate": 3.980697217734415e-06, - "loss": 1.0362, - "step": 607 - }, - { - "epoch": 0.07310767750856731, - "grad_norm": 2.1049974016549573, - "learning_rate": 3.980589102950891e-06, - "loss": 1.1475, - "step": 608 - }, - { - "epoch": 0.07322792039920639, - "grad_norm": 2.0946533759432233, - "learning_rate": 3.9804806877135755e-06, - "loss": 0.9965, - "step": 609 - }, - { - "epoch": 0.07334816328984549, - "grad_norm": 2.128025033538252, - "learning_rate": 3.980371972038915e-06, - "loss": 1.0954, - "step": 610 - }, - { - "epoch": 0.07346840618048459, - "grad_norm": 1.628953818671812, - "learning_rate": 3.980262955943399e-06, - "loss": 1.0652, - "step": 611 - }, - { - "epoch": 0.07358864907112367, - "grad_norm": 2.21186976104228, - "learning_rate": 3.980153639443569e-06, - "loss": 1.103, - "step": 612 - }, - { - "epoch": 0.07370889196176277, - "grad_norm": 1.8503185874247392, - "learning_rate": 3.980044022556005e-06, - "loss": 1.0314, - "step": 613 - }, - { - "epoch": 0.07382913485240185, - "grad_norm": 1.9721303106510788, - "learning_rate": 3.9799341052973375e-06, - "loss": 0.9502, - "step": 614 - }, - { - "epoch": 0.07394937774304094, - "grad_norm": 2.336335725553491, - "learning_rate": 3.979823887684241e-06, - "loss": 0.9804, - "step": 615 - }, - { - "epoch": 0.07406962063368003, - "grad_norm": 2.473237124608604, - "learning_rate": 3.979713369733434e-06, - "loss": 1.0797, - "step": 616 - }, - { - "epoch": 0.07418986352431912, - "grad_norm": 2.013348617285174, - "learning_rate": 3.979602551461683e-06, - "loss": 1.0743, - "step": 617 - }, - { - "epoch": 0.07431010641495822, - "grad_norm": 2.7041400921832945, - "learning_rate": 3.979491432885799e-06, - "loss": 1.1528, - "step": 618 - }, - { - "epoch": 0.0744303493055973, - "grad_norm": 1.8888766188628152, - "learning_rate": 3.97938001402264e-06, - "loss": 1.0603, - "step": 619 - }, - { - "epoch": 0.0745505921962364, - "grad_norm": 4.652479040066889, - "learning_rate": 3.979268294889105e-06, - "loss": 1.0387, - "step": 620 - }, - { - "epoch": 0.07467083508687548, - "grad_norm": 1.7174850664225292, - "learning_rate": 3.979156275502143e-06, - "loss": 0.9807, - "step": 621 - }, - { - "epoch": 0.07479107797751458, - "grad_norm": 2.9857604321129303, - "learning_rate": 3.979043955878749e-06, - "loss": 1.1489, - "step": 622 - }, - { - "epoch": 0.07491132086815366, - "grad_norm": 1.7311411083143258, - "learning_rate": 3.978931336035959e-06, - "loss": 1.0585, - "step": 623 - }, - { - "epoch": 0.07503156375879276, - "grad_norm": 2.1446627661673796, - "learning_rate": 3.9788184159908595e-06, - "loss": 1.0573, - "step": 624 - }, - { - "epoch": 0.07515180664943186, - "grad_norm": 2.5717497636378313, - "learning_rate": 3.97870519576058e-06, - "loss": 1.0537, - "step": 625 - }, - { - "epoch": 0.07527204954007094, - "grad_norm": 2.2018370414534996, - "learning_rate": 3.978591675362295e-06, - "loss": 1.0351, - "step": 626 - }, - { - "epoch": 0.07539229243071004, - "grad_norm": 7.636405794019051, - "learning_rate": 3.978477854813226e-06, - "loss": 1.1065, - "step": 627 - }, - { - "epoch": 0.07551253532134912, - "grad_norm": 1.7124885948525321, - "learning_rate": 3.97836373413064e-06, - "loss": 1.0548, - "step": 628 - }, - { - "epoch": 0.07563277821198822, - "grad_norm": 1.6740181279813127, - "learning_rate": 3.978249313331848e-06, - "loss": 0.98, - "step": 629 - }, - { - "epoch": 0.07575302110262731, - "grad_norm": 3.1338744230642255, - "learning_rate": 3.978134592434208e-06, - "loss": 0.8521, - "step": 630 - }, - { - "epoch": 0.0758732639932664, - "grad_norm": 1.0000079273863334, - "learning_rate": 3.978019571455123e-06, - "loss": 0.878, - "step": 631 - }, - { - "epoch": 0.07599350688390549, - "grad_norm": 2.860789465082216, - "learning_rate": 3.977904250412042e-06, - "loss": 1.0699, - "step": 632 - }, - { - "epoch": 0.07611374977454458, - "grad_norm": 2.0203122781397913, - "learning_rate": 3.97778862932246e-06, - "loss": 1.09, - "step": 633 - }, - { - "epoch": 0.07623399266518367, - "grad_norm": 2.6204529389681244, - "learning_rate": 3.9776727082039144e-06, - "loss": 1.1637, - "step": 634 - }, - { - "epoch": 0.07635423555582276, - "grad_norm": 0.836851315722309, - "learning_rate": 3.977556487073991e-06, - "loss": 0.8143, - "step": 635 - }, - { - "epoch": 0.07647447844646185, - "grad_norm": 1.5201120712976428, - "learning_rate": 3.97743996595032e-06, - "loss": 1.0352, - "step": 636 - }, - { - "epoch": 0.07659472133710095, - "grad_norm": 1.6255939571902833, - "learning_rate": 3.9773231448505804e-06, - "loss": 1.0448, - "step": 637 - }, - { - "epoch": 0.07671496422774003, - "grad_norm": 2.2885865637835114, - "learning_rate": 3.977206023792491e-06, - "loss": 1.0005, - "step": 638 - }, - { - "epoch": 0.07683520711837913, - "grad_norm": 2.2009466778727536, - "learning_rate": 3.97708860279382e-06, - "loss": 1.0435, - "step": 639 - }, - { - "epoch": 0.07695545000901821, - "grad_norm": 1.7272053898229425, - "learning_rate": 3.97697088187238e-06, - "loss": 1.0159, - "step": 640 - }, - { - "epoch": 0.07707569289965731, - "grad_norm": 1.9382696776407866, - "learning_rate": 3.976852861046029e-06, - "loss": 1.1414, - "step": 641 - }, - { - "epoch": 0.0771959357902964, - "grad_norm": 1.814268367628624, - "learning_rate": 3.97673454033267e-06, - "loss": 1.0232, - "step": 642 - }, - { - "epoch": 0.07731617868093549, - "grad_norm": 1.6952142643044403, - "learning_rate": 3.976615919750254e-06, - "loss": 1.0447, - "step": 643 - }, - { - "epoch": 0.07743642157157458, - "grad_norm": 1.826590456393768, - "learning_rate": 3.976496999316775e-06, - "loss": 1.0939, - "step": 644 - }, - { - "epoch": 0.07755666446221367, - "grad_norm": 1.9217243290587656, - "learning_rate": 3.976377779050271e-06, - "loss": 1.0695, - "step": 645 - }, - { - "epoch": 0.07767690735285276, - "grad_norm": 1.86821243056015, - "learning_rate": 3.976258258968831e-06, - "loss": 1.0612, - "step": 646 - }, - { - "epoch": 0.07779715024349185, - "grad_norm": 2.1547813389560853, - "learning_rate": 3.976138439090583e-06, - "loss": 0.9695, - "step": 647 - }, - { - "epoch": 0.07791739313413094, - "grad_norm": 1.950830744801786, - "learning_rate": 3.976018319433706e-06, - "loss": 1.0708, - "step": 648 - }, - { - "epoch": 0.07803763602477004, - "grad_norm": 2.0718404637560974, - "learning_rate": 3.9758979000164205e-06, - "loss": 1.1468, - "step": 649 - }, - { - "epoch": 0.07815787891540912, - "grad_norm": 1.8773085052318699, - "learning_rate": 3.975777180856995e-06, - "loss": 0.9467, - "step": 650 - }, - { - "epoch": 0.07827812180604822, - "grad_norm": 1.9526815902922847, - "learning_rate": 3.975656161973742e-06, - "loss": 1.0892, - "step": 651 - }, - { - "epoch": 0.0783983646966873, - "grad_norm": 2.1812778809959803, - "learning_rate": 3.9755348433850194e-06, - "loss": 1.1182, - "step": 652 - }, - { - "epoch": 0.0785186075873264, - "grad_norm": 1.0517047666811088, - "learning_rate": 3.975413225109232e-06, - "loss": 0.9465, - "step": 653 - }, - { - "epoch": 0.0786388504779655, - "grad_norm": 2.8759515265782105, - "learning_rate": 3.975291307164829e-06, - "loss": 1.159, - "step": 654 - }, - { - "epoch": 0.07875909336860458, - "grad_norm": 1.834667269608672, - "learning_rate": 3.975169089570306e-06, - "loss": 1.0786, - "step": 655 - }, - { - "epoch": 0.07887933625924368, - "grad_norm": 1.7499615801272286, - "learning_rate": 3.975046572344202e-06, - "loss": 1.1484, - "step": 656 - }, - { - "epoch": 0.07899957914988276, - "grad_norm": 1.5956235606646145, - "learning_rate": 3.974923755505103e-06, - "loss": 0.9478, - "step": 657 - }, - { - "epoch": 0.07911982204052186, - "grad_norm": 1.585504106206518, - "learning_rate": 3.974800639071641e-06, - "loss": 1.1348, - "step": 658 - }, - { - "epoch": 0.07924006493116094, - "grad_norm": 2.971966416550415, - "learning_rate": 3.974677223062492e-06, - "loss": 1.2318, - "step": 659 - }, - { - "epoch": 0.07936030782180004, - "grad_norm": 2.1205323321093212, - "learning_rate": 3.974553507496378e-06, - "loss": 0.9737, - "step": 660 - }, - { - "epoch": 0.07948055071243913, - "grad_norm": 1.9154304442934673, - "learning_rate": 3.974429492392068e-06, - "loss": 1.1076, - "step": 661 - }, - { - "epoch": 0.07960079360307822, - "grad_norm": 2.0006044189767374, - "learning_rate": 3.974305177768373e-06, - "loss": 1.1324, - "step": 662 - }, - { - "epoch": 0.07972103649371731, - "grad_norm": 4.238335928146722, - "learning_rate": 3.974180563644152e-06, - "loss": 1.0816, - "step": 663 - }, - { - "epoch": 0.0798412793843564, - "grad_norm": 1.851097853420897, - "learning_rate": 3.97405565003831e-06, - "loss": 1.1192, - "step": 664 - }, - { - "epoch": 0.07996152227499549, - "grad_norm": 2.2006607883948432, - "learning_rate": 3.973930436969794e-06, - "loss": 1.0177, - "step": 665 - }, - { - "epoch": 0.08008176516563459, - "grad_norm": 1.7118586320710858, - "learning_rate": 3.973804924457602e-06, - "loss": 1.087, - "step": 666 - }, - { - "epoch": 0.08020200805627367, - "grad_norm": 1.5793881035439676, - "learning_rate": 3.973679112520771e-06, - "loss": 1.0794, - "step": 667 - }, - { - "epoch": 0.08032225094691277, - "grad_norm": 1.681246154100068, - "learning_rate": 3.973553001178389e-06, - "loss": 1.216, - "step": 668 - }, - { - "epoch": 0.08044249383755185, - "grad_norm": 1.7755098040607369, - "learning_rate": 3.973426590449585e-06, - "loss": 0.9822, - "step": 669 - }, - { - "epoch": 0.08056273672819095, - "grad_norm": 1.8250531201927436, - "learning_rate": 3.9732998803535364e-06, - "loss": 0.9855, - "step": 670 - }, - { - "epoch": 0.08068297961883003, - "grad_norm": 1.8814031620980818, - "learning_rate": 3.973172870909465e-06, - "loss": 1.0871, - "step": 671 - }, - { - "epoch": 0.08080322250946913, - "grad_norm": 2.3739964221969165, - "learning_rate": 3.973045562136638e-06, - "loss": 1.0436, - "step": 672 - }, - { - "epoch": 0.08092346540010822, - "grad_norm": 1.8614207962199132, - "learning_rate": 3.972917954054368e-06, - "loss": 1.1392, - "step": 673 - }, - { - "epoch": 0.08104370829074731, - "grad_norm": 2.6051479982885923, - "learning_rate": 3.972790046682013e-06, - "loss": 1.038, - "step": 674 - }, - { - "epoch": 0.0811639511813864, - "grad_norm": 1.9866095749547776, - "learning_rate": 3.972661840038977e-06, - "loss": 1.02, - "step": 675 - }, - { - "epoch": 0.08128419407202549, - "grad_norm": 2.083602798837037, - "learning_rate": 3.972533334144707e-06, - "loss": 1.0637, - "step": 676 - }, - { - "epoch": 0.08140443696266458, - "grad_norm": 2.113867715347169, - "learning_rate": 3.972404529018699e-06, - "loss": 1.0187, - "step": 677 - }, - { - "epoch": 0.08152467985330367, - "grad_norm": 2.6084914853185723, - "learning_rate": 3.972275424680493e-06, - "loss": 1.0822, - "step": 678 - }, - { - "epoch": 0.08164492274394276, - "grad_norm": 1.7807628567783242, - "learning_rate": 3.972146021149673e-06, - "loss": 1.141, - "step": 679 - }, - { - "epoch": 0.08176516563458186, - "grad_norm": 2.0976210152563355, - "learning_rate": 3.972016318445868e-06, - "loss": 1.0182, - "step": 680 - }, - { - "epoch": 0.08188540852522094, - "grad_norm": 2.2286666832293527, - "learning_rate": 3.971886316588757e-06, - "loss": 1.1382, - "step": 681 - }, - { - "epoch": 0.08200565141586004, - "grad_norm": 2.5873895815080847, - "learning_rate": 3.9717560155980595e-06, - "loss": 0.9732, - "step": 682 - }, - { - "epoch": 0.08212589430649912, - "grad_norm": 1.7059065476611042, - "learning_rate": 3.971625415493542e-06, - "loss": 1.1526, - "step": 683 - }, - { - "epoch": 0.08224613719713822, - "grad_norm": 1.967200623242007, - "learning_rate": 3.971494516295017e-06, - "loss": 1.0996, - "step": 684 - }, - { - "epoch": 0.08236638008777732, - "grad_norm": 3.2635399399667686, - "learning_rate": 3.971363318022341e-06, - "loss": 1.0794, - "step": 685 - }, - { - "epoch": 0.0824866229784164, - "grad_norm": 1.8342667139900233, - "learning_rate": 3.971231820695417e-06, - "loss": 0.9143, - "step": 686 - }, - { - "epoch": 0.0826068658690555, - "grad_norm": 1.5825963647558412, - "learning_rate": 3.971100024334193e-06, - "loss": 1.0413, - "step": 687 - }, - { - "epoch": 0.08272710875969458, - "grad_norm": 1.9539070699843342, - "learning_rate": 3.970967928958663e-06, - "loss": 1.0887, - "step": 688 - }, - { - "epoch": 0.08284735165033368, - "grad_norm": 1.5307863273041897, - "learning_rate": 3.970835534588865e-06, - "loss": 1.0582, - "step": 689 - }, - { - "epoch": 0.08296759454097276, - "grad_norm": 1.646227037042283, - "learning_rate": 3.970702841244883e-06, - "loss": 1.0845, - "step": 690 - }, - { - "epoch": 0.08308783743161186, - "grad_norm": 1.6144348896657217, - "learning_rate": 3.970569848946847e-06, - "loss": 1.0555, - "step": 691 - }, - { - "epoch": 0.08320808032225095, - "grad_norm": 1.8911263928105373, - "learning_rate": 3.970436557714932e-06, - "loss": 1.0535, - "step": 692 - }, - { - "epoch": 0.08332832321289003, - "grad_norm": 1.8358648407538707, - "learning_rate": 3.970302967569358e-06, - "loss": 1.0822, - "step": 693 - }, - { - "epoch": 0.08344856610352913, - "grad_norm": 2.0924386999706264, - "learning_rate": 3.9701690785303896e-06, - "loss": 0.9153, - "step": 694 - }, - { - "epoch": 0.08356880899416821, - "grad_norm": 3.26357559071205, - "learning_rate": 3.970034890618339e-06, - "loss": 1.1084, - "step": 695 - }, - { - "epoch": 0.08368905188480731, - "grad_norm": 1.7893139525035229, - "learning_rate": 3.969900403853562e-06, - "loss": 1.1041, - "step": 696 - }, - { - "epoch": 0.08380929477544641, - "grad_norm": 1.4532090029483027, - "learning_rate": 3.96976561825646e-06, - "loss": 1.0103, - "step": 697 - }, - { - "epoch": 0.08392953766608549, - "grad_norm": 1.8805047926006189, - "learning_rate": 3.969630533847479e-06, - "loss": 1.0972, - "step": 698 - }, - { - "epoch": 0.08404978055672459, - "grad_norm": 1.7908830999267231, - "learning_rate": 3.969495150647113e-06, - "loss": 1.0791, - "step": 699 - }, - { - "epoch": 0.08417002344736367, - "grad_norm": 2.802749346649908, - "learning_rate": 3.969359468675899e-06, - "loss": 0.9908, - "step": 700 - }, - { - "epoch": 0.08429026633800277, - "grad_norm": 1.7699542687321954, - "learning_rate": 3.969223487954418e-06, - "loss": 1.1215, - "step": 701 - }, - { - "epoch": 0.08441050922864185, - "grad_norm": 1.7296911003537083, - "learning_rate": 3.969087208503301e-06, - "loss": 1.051, - "step": 702 - }, - { - "epoch": 0.08453075211928095, - "grad_norm": 2.2019390404186323, - "learning_rate": 3.968950630343219e-06, - "loss": 1.0651, - "step": 703 - }, - { - "epoch": 0.08465099500992004, - "grad_norm": 4.357863516250771, - "learning_rate": 3.968813753494892e-06, - "loss": 1.1579, - "step": 704 - }, - { - "epoch": 0.08477123790055913, - "grad_norm": 1.8217423353100697, - "learning_rate": 3.968676577979084e-06, - "loss": 0.9806, - "step": 705 - }, - { - "epoch": 0.08489148079119822, - "grad_norm": 1.9133392836093865, - "learning_rate": 3.968539103816605e-06, - "loss": 1.009, - "step": 706 - }, - { - "epoch": 0.0850117236818373, - "grad_norm": 1.6440869816326107, - "learning_rate": 3.9684013310283085e-06, - "loss": 1.119, - "step": 707 - }, - { - "epoch": 0.0851319665724764, - "grad_norm": 3.2820904336635963, - "learning_rate": 3.9682632596350956e-06, - "loss": 0.8708, - "step": 708 - }, - { - "epoch": 0.0852522094631155, - "grad_norm": 1.7633900633750155, - "learning_rate": 3.968124889657911e-06, - "loss": 1.0081, - "step": 709 - }, - { - "epoch": 0.08537245235375458, - "grad_norm": 2.11760738474326, - "learning_rate": 3.967986221117746e-06, - "loss": 1.1354, - "step": 710 - }, - { - "epoch": 0.08549269524439368, - "grad_norm": 2.134143344265631, - "learning_rate": 3.967847254035635e-06, - "loss": 1.0966, - "step": 711 - }, - { - "epoch": 0.08561293813503276, - "grad_norm": 2.2246115184636293, - "learning_rate": 3.967707988432661e-06, - "loss": 1.0921, - "step": 712 - }, - { - "epoch": 0.08573318102567186, - "grad_norm": 2.2604284212946517, - "learning_rate": 3.967568424329949e-06, - "loss": 1.1037, - "step": 713 - }, - { - "epoch": 0.08585342391631094, - "grad_norm": 0.8087897685088646, - "learning_rate": 3.967428561748671e-06, - "loss": 0.8276, - "step": 714 - }, - { - "epoch": 0.08597366680695004, - "grad_norm": 1.7841593757396088, - "learning_rate": 3.967288400710045e-06, - "loss": 1.0979, - "step": 715 - }, - { - "epoch": 0.08609390969758914, - "grad_norm": 1.64294908396605, - "learning_rate": 3.9671479412353335e-06, - "loss": 1.1077, - "step": 716 - }, - { - "epoch": 0.08621415258822822, - "grad_norm": 1.8153399219040813, - "learning_rate": 3.967007183345843e-06, - "loss": 0.9738, - "step": 717 - }, - { - "epoch": 0.08633439547886732, - "grad_norm": 1.8897505700556771, - "learning_rate": 3.966866127062927e-06, - "loss": 1.1266, - "step": 718 - }, - { - "epoch": 0.0864546383695064, - "grad_norm": 1.0377665303750996, - "learning_rate": 3.966724772407982e-06, - "loss": 0.9133, - "step": 719 - }, - { - "epoch": 0.0865748812601455, - "grad_norm": 2.214839013580694, - "learning_rate": 3.966583119402454e-06, - "loss": 1.1114, - "step": 720 - }, - { - "epoch": 0.08669512415078459, - "grad_norm": 1.834622955460438, - "learning_rate": 3.9664411680678305e-06, - "loss": 1.0483, - "step": 721 - }, - { - "epoch": 0.08681536704142367, - "grad_norm": 0.8884135111744479, - "learning_rate": 3.966298918425644e-06, - "loss": 0.8735, - "step": 722 - }, - { - "epoch": 0.08693560993206277, - "grad_norm": 1.5512935839485231, - "learning_rate": 3.966156370497476e-06, - "loss": 1.0616, - "step": 723 - }, - { - "epoch": 0.08705585282270185, - "grad_norm": 1.6333145026983418, - "learning_rate": 3.96601352430495e-06, - "loss": 1.1099, - "step": 724 - }, - { - "epoch": 0.08717609571334095, - "grad_norm": 1.3743078483771025, - "learning_rate": 3.965870379869735e-06, - "loss": 1.0549, - "step": 725 - }, - { - "epoch": 0.08729633860398003, - "grad_norm": 1.9399238929370872, - "learning_rate": 3.965726937213547e-06, - "loss": 1.0915, - "step": 726 - }, - { - "epoch": 0.08741658149461913, - "grad_norm": 1.9702840610905141, - "learning_rate": 3.965583196358144e-06, - "loss": 1.0315, - "step": 727 - }, - { - "epoch": 0.08753682438525823, - "grad_norm": 1.9620948810714176, - "learning_rate": 3.965439157325335e-06, - "loss": 0.9792, - "step": 728 - }, - { - "epoch": 0.08765706727589731, - "grad_norm": 2.088845615355826, - "learning_rate": 3.965294820136968e-06, - "loss": 0.9797, - "step": 729 - }, - { - "epoch": 0.08777731016653641, - "grad_norm": 1.8295247514566801, - "learning_rate": 3.965150184814938e-06, - "loss": 1.0908, - "step": 730 - }, - { - "epoch": 0.08789755305717549, - "grad_norm": 1.8963883254835423, - "learning_rate": 3.965005251381189e-06, - "loss": 0.9809, - "step": 731 - }, - { - "epoch": 0.08801779594781459, - "grad_norm": 0.8612576064395387, - "learning_rate": 3.964860019857705e-06, - "loss": 0.8985, - "step": 732 - }, - { - "epoch": 0.08813803883845367, - "grad_norm": 1.751612397299044, - "learning_rate": 3.964714490266518e-06, - "loss": 1.0623, - "step": 733 - }, - { - "epoch": 0.08825828172909277, - "grad_norm": 0.8722170784232317, - "learning_rate": 3.964568662629706e-06, - "loss": 0.892, - "step": 734 - }, - { - "epoch": 0.08837852461973186, - "grad_norm": 2.0438271464215028, - "learning_rate": 3.9644225369693895e-06, - "loss": 1.0636, - "step": 735 - }, - { - "epoch": 0.08849876751037095, - "grad_norm": 1.84381484871308, - "learning_rate": 3.964276113307735e-06, - "loss": 1.1049, - "step": 736 - }, - { - "epoch": 0.08861901040101004, - "grad_norm": 1.7002382223727894, - "learning_rate": 3.9641293916669574e-06, - "loss": 1.0297, - "step": 737 - }, - { - "epoch": 0.08873925329164913, - "grad_norm": 2.0181869666497048, - "learning_rate": 3.9639823720693115e-06, - "loss": 1.0612, - "step": 738 - }, - { - "epoch": 0.08885949618228822, - "grad_norm": 0.8509411426008126, - "learning_rate": 3.963835054537102e-06, - "loss": 0.8836, - "step": 739 - }, - { - "epoch": 0.08897973907292732, - "grad_norm": 2.14770317706325, - "learning_rate": 3.963687439092676e-06, - "loss": 0.8374, - "step": 740 - }, - { - "epoch": 0.0890999819635664, - "grad_norm": 1.7544880944775116, - "learning_rate": 3.963539525758427e-06, - "loss": 1.0282, - "step": 741 - }, - { - "epoch": 0.0892202248542055, - "grad_norm": 4.683754403585905, - "learning_rate": 3.9633913145567925e-06, - "loss": 0.9079, - "step": 742 - }, - { - "epoch": 0.08934046774484458, - "grad_norm": 1.694839200519889, - "learning_rate": 3.9632428055102575e-06, - "loss": 1.0418, - "step": 743 - }, - { - "epoch": 0.08946071063548368, - "grad_norm": 2.2593024066749297, - "learning_rate": 3.9630939986413495e-06, - "loss": 0.9037, - "step": 744 - }, - { - "epoch": 0.08958095352612276, - "grad_norm": 1.486372076078989, - "learning_rate": 3.962944893972643e-06, - "loss": 1.0091, - "step": 745 - }, - { - "epoch": 0.08970119641676186, - "grad_norm": 2.2436110599609074, - "learning_rate": 3.962795491526756e-06, - "loss": 1.1465, - "step": 746 - }, - { - "epoch": 0.08982143930740095, - "grad_norm": 2.0464845241219645, - "learning_rate": 3.962645791326354e-06, - "loss": 1.1235, - "step": 747 - }, - { - "epoch": 0.08994168219804004, - "grad_norm": 2.034499752596852, - "learning_rate": 3.962495793394146e-06, - "loss": 1.0603, - "step": 748 - }, - { - "epoch": 0.09006192508867913, - "grad_norm": 0.6881418699713764, - "learning_rate": 3.9623454977528864e-06, - "loss": 0.8437, - "step": 749 - }, - { - "epoch": 0.09018216797931822, - "grad_norm": 1.5265615660431269, - "learning_rate": 3.962194904425375e-06, - "loss": 1.0758, - "step": 750 - }, - { - "epoch": 0.09030241086995731, - "grad_norm": 1.695182338235723, - "learning_rate": 3.9620440134344566e-06, - "loss": 0.9054, - "step": 751 - }, - { - "epoch": 0.09042265376059641, - "grad_norm": 2.499238184250839, - "learning_rate": 3.9618928248030215e-06, - "loss": 1.0434, - "step": 752 - }, - { - "epoch": 0.0905428966512355, - "grad_norm": 2.387503686872102, - "learning_rate": 3.961741338554005e-06, - "loss": 1.0671, - "step": 753 - }, - { - "epoch": 0.09066313954187459, - "grad_norm": 1.7884279816684328, - "learning_rate": 3.9615895547103865e-06, - "loss": 0.9775, - "step": 754 - }, - { - "epoch": 0.09078338243251367, - "grad_norm": 1.7801202905920883, - "learning_rate": 3.961437473295193e-06, - "loss": 1.0046, - "step": 755 - }, - { - "epoch": 0.09090362532315277, - "grad_norm": 2.0435542768266335, - "learning_rate": 3.961285094331495e-06, - "loss": 0.9421, - "step": 756 - }, - { - "epoch": 0.09102386821379185, - "grad_norm": 1.791184944627264, - "learning_rate": 3.961132417842406e-06, - "loss": 1.0828, - "step": 757 - }, - { - "epoch": 0.09114411110443095, - "grad_norm": 2.2955117883763854, - "learning_rate": 3.960979443851089e-06, - "loss": 0.9895, - "step": 758 - }, - { - "epoch": 0.09126435399507005, - "grad_norm": 1.6104664481078013, - "learning_rate": 3.96082617238075e-06, - "loss": 1.0215, - "step": 759 - }, - { - "epoch": 0.09138459688570913, - "grad_norm": 1.9004920548190403, - "learning_rate": 3.960672603454639e-06, - "loss": 1.0233, - "step": 760 - }, - { - "epoch": 0.09150483977634823, - "grad_norm": 2.1255781845684667, - "learning_rate": 3.960518737096054e-06, - "loss": 0.9964, - "step": 761 - }, - { - "epoch": 0.09162508266698731, - "grad_norm": 2.2322527002828156, - "learning_rate": 3.960364573328334e-06, - "loss": 0.9672, - "step": 762 - }, - { - "epoch": 0.0917453255576264, - "grad_norm": 1.733653631633057, - "learning_rate": 3.9602101121748675e-06, - "loss": 1.1135, - "step": 763 - }, - { - "epoch": 0.0918655684482655, - "grad_norm": 1.640174585912039, - "learning_rate": 3.960055353659085e-06, - "loss": 0.9515, - "step": 764 - }, - { - "epoch": 0.09198581133890459, - "grad_norm": 1.7101181367589062, - "learning_rate": 3.959900297804465e-06, - "loss": 1.0718, - "step": 765 - }, - { - "epoch": 0.09210605422954368, - "grad_norm": 1.7970609485774882, - "learning_rate": 3.9597449446345276e-06, - "loss": 0.9918, - "step": 766 - }, - { - "epoch": 0.09222629712018277, - "grad_norm": 2.465815478027439, - "learning_rate": 3.95958929417284e-06, - "loss": 1.0611, - "step": 767 - }, - { - "epoch": 0.09234654001082186, - "grad_norm": 0.7450453497377818, - "learning_rate": 3.9594333464430145e-06, - "loss": 0.8432, - "step": 768 - }, - { - "epoch": 0.09246678290146094, - "grad_norm": 1.84874024157112, - "learning_rate": 3.959277101468709e-06, - "loss": 1.1051, - "step": 769 - }, - { - "epoch": 0.09258702579210004, - "grad_norm": 4.2242698444813085, - "learning_rate": 3.959120559273624e-06, - "loss": 1.0159, - "step": 770 - }, - { - "epoch": 0.09270726868273914, - "grad_norm": 1.662422123856138, - "learning_rate": 3.958963719881509e-06, - "loss": 1.0727, - "step": 771 - }, - { - "epoch": 0.09282751157337822, - "grad_norm": 1.7712375254383574, - "learning_rate": 3.958806583316154e-06, - "loss": 1.1637, - "step": 772 - }, - { - "epoch": 0.09294775446401732, - "grad_norm": 1.5757214967503124, - "learning_rate": 3.9586491496013985e-06, - "loss": 1.0195, - "step": 773 - }, - { - "epoch": 0.0930679973546564, - "grad_norm": 1.8660830499335412, - "learning_rate": 3.958491418761124e-06, - "loss": 1.0462, - "step": 774 - }, - { - "epoch": 0.0931882402452955, - "grad_norm": 1.8409142751010634, - "learning_rate": 3.958333390819258e-06, - "loss": 0.9651, - "step": 775 - }, - { - "epoch": 0.0933084831359346, - "grad_norm": 1.7583009846440107, - "learning_rate": 3.9581750657997754e-06, - "loss": 1.0249, - "step": 776 - }, - { - "epoch": 0.09342872602657368, - "grad_norm": 1.6651915539133524, - "learning_rate": 3.95801644372669e-06, - "loss": 1.127, - "step": 777 - }, - { - "epoch": 0.09354896891721277, - "grad_norm": 2.343129699002263, - "learning_rate": 3.957857524624068e-06, - "loss": 1.0675, - "step": 778 - }, - { - "epoch": 0.09366921180785186, - "grad_norm": 1.4388671676161429, - "learning_rate": 3.957698308516016e-06, - "loss": 1.1292, - "step": 779 - }, - { - "epoch": 0.09378945469849095, - "grad_norm": 1.8435819516942866, - "learning_rate": 3.957538795426688e-06, - "loss": 1.0525, - "step": 780 - }, - { - "epoch": 0.09390969758913004, - "grad_norm": 2.137575859820661, - "learning_rate": 3.9573789853802804e-06, - "loss": 0.9961, - "step": 781 - }, - { - "epoch": 0.09402994047976913, - "grad_norm": 1.8587213858048273, - "learning_rate": 3.957218878401037e-06, - "loss": 0.9813, - "step": 782 - }, - { - "epoch": 0.09415018337040823, - "grad_norm": 1.778228070266931, - "learning_rate": 3.957058474513246e-06, - "loss": 1.1296, - "step": 783 - }, - { - "epoch": 0.09427042626104731, - "grad_norm": 1.5867554007222384, - "learning_rate": 3.956897773741241e-06, - "loss": 1.0135, - "step": 784 - }, - { - "epoch": 0.09439066915168641, - "grad_norm": 1.649784568247566, - "learning_rate": 3.956736776109398e-06, - "loss": 0.947, - "step": 785 - }, - { - "epoch": 0.09451091204232549, - "grad_norm": 1.5924572657146694, - "learning_rate": 3.956575481642143e-06, - "loss": 1.0661, - "step": 786 - }, - { - "epoch": 0.09463115493296459, - "grad_norm": 2.3430339482347597, - "learning_rate": 3.956413890363943e-06, - "loss": 0.9747, - "step": 787 - }, - { - "epoch": 0.09475139782360369, - "grad_norm": 1.952201197542414, - "learning_rate": 3.956252002299312e-06, - "loss": 1.0517, - "step": 788 - }, - { - "epoch": 0.09487164071424277, - "grad_norm": 1.842802289080911, - "learning_rate": 3.956089817472807e-06, - "loss": 1.1373, - "step": 789 - }, - { - "epoch": 0.09499188360488187, - "grad_norm": 1.8171630595327486, - "learning_rate": 3.955927335909032e-06, - "loss": 1.0903, - "step": 790 - }, - { - "epoch": 0.09511212649552095, - "grad_norm": 2.153668945877309, - "learning_rate": 3.955764557632634e-06, - "loss": 0.9844, - "step": 791 - }, - { - "epoch": 0.09523236938616005, - "grad_norm": 2.121311015036661, - "learning_rate": 3.955601482668309e-06, - "loss": 1.1729, - "step": 792 - }, - { - "epoch": 0.09535261227679913, - "grad_norm": 1.6236988140023165, - "learning_rate": 3.955438111040794e-06, - "loss": 1.1099, - "step": 793 - }, - { - "epoch": 0.09547285516743823, - "grad_norm": 1.7612673568141082, - "learning_rate": 3.955274442774873e-06, - "loss": 1.038, - "step": 794 - }, - { - "epoch": 0.09559309805807732, - "grad_norm": 2.1596763036275126, - "learning_rate": 3.9551104778953725e-06, - "loss": 0.9441, - "step": 795 - }, - { - "epoch": 0.0957133409487164, - "grad_norm": 1.9624258525472003, - "learning_rate": 3.954946216427167e-06, - "loss": 1.0889, - "step": 796 - }, - { - "epoch": 0.0958335838393555, - "grad_norm": 0.8374387149308024, - "learning_rate": 3.954781658395176e-06, - "loss": 0.879, - "step": 797 - }, - { - "epoch": 0.09595382672999458, - "grad_norm": 1.8518806538284722, - "learning_rate": 3.95461680382436e-06, - "loss": 1.1473, - "step": 798 - }, - { - "epoch": 0.09607406962063368, - "grad_norm": 2.3658480741877614, - "learning_rate": 3.9544516527397295e-06, - "loss": 1.092, - "step": 799 - }, - { - "epoch": 0.09619431251127276, - "grad_norm": 2.0793133543122218, - "learning_rate": 3.954286205166338e-06, - "loss": 1.0338, - "step": 800 - }, - { - "epoch": 0.09631455540191186, - "grad_norm": 2.349416571084682, - "learning_rate": 3.954120461129282e-06, - "loss": 1.0675, - "step": 801 - }, - { - "epoch": 0.09643479829255096, - "grad_norm": 1.8582811744566117, - "learning_rate": 3.953954420653706e-06, - "loss": 1.0727, - "step": 802 - }, - { - "epoch": 0.09655504118319004, - "grad_norm": 1.92552353564083, - "learning_rate": 3.953788083764798e-06, - "loss": 1.111, - "step": 803 - }, - { - "epoch": 0.09667528407382914, - "grad_norm": 2.4962902200712733, - "learning_rate": 3.953621450487792e-06, - "loss": 1.1495, - "step": 804 - }, - { - "epoch": 0.09679552696446822, - "grad_norm": 0.8402072208686298, - "learning_rate": 3.953454520847964e-06, - "loss": 0.8815, - "step": 805 - }, - { - "epoch": 0.09691576985510732, - "grad_norm": 2.5180885146795835, - "learning_rate": 3.9532872948706395e-06, - "loss": 0.9656, - "step": 806 - }, - { - "epoch": 0.09703601274574641, - "grad_norm": 2.1280080037719387, - "learning_rate": 3.9531197725811845e-06, - "loss": 1.0593, - "step": 807 - }, - { - "epoch": 0.0971562556363855, - "grad_norm": 1.6728510636720286, - "learning_rate": 3.952951954005013e-06, - "loss": 1.0981, - "step": 808 - }, - { - "epoch": 0.0972764985270246, - "grad_norm": 1.6638724348386489, - "learning_rate": 3.952783839167584e-06, - "loss": 1.0748, - "step": 809 - }, - { - "epoch": 0.09739674141766368, - "grad_norm": 2.2936102052752285, - "learning_rate": 3.952615428094398e-06, - "loss": 0.9783, - "step": 810 - }, - { - "epoch": 0.09751698430830277, - "grad_norm": 1.6456658080909246, - "learning_rate": 3.952446720811004e-06, - "loss": 0.9622, - "step": 811 - }, - { - "epoch": 0.09763722719894186, - "grad_norm": 5.7248679199974575, - "learning_rate": 3.952277717342995e-06, - "loss": 0.9039, - "step": 812 - }, - { - "epoch": 0.09775747008958095, - "grad_norm": 1.9343598052431032, - "learning_rate": 3.952108417716009e-06, - "loss": 1.088, - "step": 813 - }, - { - "epoch": 0.09787771298022005, - "grad_norm": 1.7071871129858955, - "learning_rate": 3.951938821955727e-06, - "loss": 1.0851, - "step": 814 - }, - { - "epoch": 0.09799795587085913, - "grad_norm": 1.4262028319092526, - "learning_rate": 3.9517689300878786e-06, - "loss": 0.9879, - "step": 815 - }, - { - "epoch": 0.09811819876149823, - "grad_norm": 1.5334157614044563, - "learning_rate": 3.951598742138236e-06, - "loss": 1.0092, - "step": 816 - }, - { - "epoch": 0.09823844165213731, - "grad_norm": 1.850884808484792, - "learning_rate": 3.951428258132615e-06, - "loss": 1.0188, - "step": 817 - }, - { - "epoch": 0.09835868454277641, - "grad_norm": 1.737793243142364, - "learning_rate": 3.951257478096879e-06, - "loss": 1.0715, - "step": 818 - }, - { - "epoch": 0.0984789274334155, - "grad_norm": 2.367655188560814, - "learning_rate": 3.951086402056936e-06, - "loss": 0.9044, - "step": 819 - }, - { - "epoch": 0.09859917032405459, - "grad_norm": 1.6317578427743022, - "learning_rate": 3.950915030038735e-06, - "loss": 1.0634, - "step": 820 - }, - { - "epoch": 0.09871941321469369, - "grad_norm": 2.6449443094487735, - "learning_rate": 3.9507433620682765e-06, - "loss": 1.06, - "step": 821 - }, - { - "epoch": 0.09883965610533277, - "grad_norm": 1.3967495326418118, - "learning_rate": 3.9505713981716e-06, - "loss": 1.1047, - "step": 822 - }, - { - "epoch": 0.09895989899597187, - "grad_norm": 1.9120972801503968, - "learning_rate": 3.950399138374795e-06, - "loss": 1.0364, - "step": 823 - }, - { - "epoch": 0.09908014188661095, - "grad_norm": 1.8716959770582104, - "learning_rate": 3.95022658270399e-06, - "loss": 0.9693, - "step": 824 - }, - { - "epoch": 0.09920038477725004, - "grad_norm": 1.7224378382479926, - "learning_rate": 3.9500537311853635e-06, - "loss": 1.0063, - "step": 825 - }, - { - "epoch": 0.09932062766788914, - "grad_norm": 2.091500055534258, - "learning_rate": 3.949880583845136e-06, - "loss": 1.056, - "step": 826 - }, - { - "epoch": 0.09944087055852822, - "grad_norm": 1.6386974819214983, - "learning_rate": 3.949707140709575e-06, - "loss": 1.0407, - "step": 827 - }, - { - "epoch": 0.09956111344916732, - "grad_norm": 2.1192194910799804, - "learning_rate": 3.949533401804991e-06, - "loss": 1.0711, - "step": 828 - }, - { - "epoch": 0.0996813563398064, - "grad_norm": 1.991424234817124, - "learning_rate": 3.949359367157739e-06, - "loss": 1.138, - "step": 829 - }, - { - "epoch": 0.0998015992304455, - "grad_norm": 2.089109830241699, - "learning_rate": 3.949185036794222e-06, - "loss": 0.9901, - "step": 830 - }, - { - "epoch": 0.0999218421210846, - "grad_norm": 1.4919710651435114, - "learning_rate": 3.949010410740884e-06, - "loss": 1.0053, - "step": 831 - }, - { - "epoch": 0.10004208501172368, - "grad_norm": 1.633894128607645, - "learning_rate": 3.948835489024216e-06, - "loss": 1.0941, - "step": 832 - }, - { - "epoch": 0.10016232790236278, - "grad_norm": 1.7096249497018896, - "learning_rate": 3.948660271670755e-06, - "loss": 1.1251, - "step": 833 - }, - { - "epoch": 0.10028257079300186, - "grad_norm": 2.0333717415522576, - "learning_rate": 3.948484758707079e-06, - "loss": 1.0682, - "step": 834 - }, - { - "epoch": 0.10040281368364096, - "grad_norm": 1.7979322557300255, - "learning_rate": 3.948308950159815e-06, - "loss": 1.0621, - "step": 835 - }, - { - "epoch": 0.10052305657428004, - "grad_norm": 4.518743685431705, - "learning_rate": 3.9481328460556326e-06, - "loss": 0.9901, - "step": 836 - }, - { - "epoch": 0.10064329946491914, - "grad_norm": 1.9563754690475734, - "learning_rate": 3.9479564464212455e-06, - "loss": 1.1194, - "step": 837 - }, - { - "epoch": 0.10076354235555823, - "grad_norm": 2.4991269494547423, - "learning_rate": 3.947779751283414e-06, - "loss": 0.997, - "step": 838 - }, - { - "epoch": 0.10088378524619732, - "grad_norm": 1.7223096572393484, - "learning_rate": 3.947602760668944e-06, - "loss": 0.9857, - "step": 839 - }, - { - "epoch": 0.10100402813683641, - "grad_norm": 1.7251501736716606, - "learning_rate": 3.947425474604684e-06, - "loss": 0.9421, - "step": 840 - }, - { - "epoch": 0.1011242710274755, - "grad_norm": 1.7764890523069115, - "learning_rate": 3.947247893117528e-06, - "loss": 1.1545, - "step": 841 - }, - { - "epoch": 0.10124451391811459, - "grad_norm": 5.752004191199462, - "learning_rate": 3.947070016234413e-06, - "loss": 0.9258, - "step": 842 - }, - { - "epoch": 0.10136475680875369, - "grad_norm": 2.3490403549411076, - "learning_rate": 3.946891843982326e-06, - "loss": 0.9677, - "step": 843 - }, - { - "epoch": 0.10148499969939277, - "grad_norm": 2.0487813444224394, - "learning_rate": 3.9467133763882935e-06, - "loss": 0.9699, - "step": 844 - }, - { - "epoch": 0.10160524259003187, - "grad_norm": 1.804801441905785, - "learning_rate": 3.9465346134793905e-06, - "loss": 1.0997, - "step": 845 - }, - { - "epoch": 0.10172548548067095, - "grad_norm": 3.258313037649753, - "learning_rate": 3.9463555552827335e-06, - "loss": 1.0249, - "step": 846 - }, - { - "epoch": 0.10184572837131005, - "grad_norm": 3.2515593969136893, - "learning_rate": 3.946176201825487e-06, - "loss": 1.0931, - "step": 847 - }, - { - "epoch": 0.10196597126194913, - "grad_norm": 1.8155497839050088, - "learning_rate": 3.9459965531348575e-06, - "loss": 1.0653, - "step": 848 - }, - { - "epoch": 0.10208621415258823, - "grad_norm": 2.15059218124718, - "learning_rate": 3.945816609238098e-06, - "loss": 1.0837, - "step": 849 - }, - { - "epoch": 0.10220645704322733, - "grad_norm": 1.76770963232307, - "learning_rate": 3.945636370162507e-06, - "loss": 1.0816, - "step": 850 - }, - { - "epoch": 0.10232669993386641, - "grad_norm": 1.6339064588397965, - "learning_rate": 3.945455835935425e-06, - "loss": 1.0259, - "step": 851 - }, - { - "epoch": 0.1024469428245055, - "grad_norm": 1.7793375423067654, - "learning_rate": 3.94527500658424e-06, - "loss": 0.9717, - "step": 852 - }, - { - "epoch": 0.10256718571514459, - "grad_norm": 1.6804659436857305, - "learning_rate": 3.945093882136382e-06, - "loss": 1.041, - "step": 853 - }, - { - "epoch": 0.10268742860578368, - "grad_norm": 7.936829786465511, - "learning_rate": 3.944912462619329e-06, - "loss": 1.0715, - "step": 854 - }, - { - "epoch": 0.10280767149642277, - "grad_norm": 2.214384700830342, - "learning_rate": 3.9447307480606025e-06, - "loss": 1.0351, - "step": 855 - }, - { - "epoch": 0.10292791438706186, - "grad_norm": 1.9900202912106026, - "learning_rate": 3.944548738487767e-06, - "loss": 1.1316, - "step": 856 - }, - { - "epoch": 0.10304815727770096, - "grad_norm": 1.9438120252519686, - "learning_rate": 3.944366433928434e-06, - "loss": 1.1306, - "step": 857 - }, - { - "epoch": 0.10316840016834004, - "grad_norm": 1.3742244006849118, - "learning_rate": 3.9441838344102594e-06, - "loss": 1.0585, - "step": 858 - }, - { - "epoch": 0.10328864305897914, - "grad_norm": 1.8933126089255776, - "learning_rate": 3.944000939960943e-06, - "loss": 0.9051, - "step": 859 - }, - { - "epoch": 0.10340888594961822, - "grad_norm": 1.3913888869147337, - "learning_rate": 3.943817750608229e-06, - "loss": 1.032, - "step": 860 - }, - { - "epoch": 0.10352912884025732, - "grad_norm": 4.525947783461829, - "learning_rate": 3.943634266379908e-06, - "loss": 1.0501, - "step": 861 - }, - { - "epoch": 0.10364937173089642, - "grad_norm": 1.7740489560998356, - "learning_rate": 3.943450487303815e-06, - "loss": 1.0786, - "step": 862 - }, - { - "epoch": 0.1037696146215355, - "grad_norm": 1.601565402888947, - "learning_rate": 3.943266413407827e-06, - "loss": 1.081, - "step": 863 - }, - { - "epoch": 0.1038898575121746, - "grad_norm": 1.998286765157552, - "learning_rate": 3.94308204471987e-06, - "loss": 1.0705, - "step": 864 - }, - { - "epoch": 0.10401010040281368, - "grad_norm": 2.791832240494353, - "learning_rate": 3.942897381267912e-06, - "loss": 0.9771, - "step": 865 - }, - { - "epoch": 0.10413034329345278, - "grad_norm": 2.4353698812993962, - "learning_rate": 3.942712423079965e-06, - "loss": 0.9081, - "step": 866 - }, - { - "epoch": 0.10425058618409186, - "grad_norm": 1.9734433860419198, - "learning_rate": 3.942527170184088e-06, - "loss": 1.1281, - "step": 867 - }, - { - "epoch": 0.10437082907473096, - "grad_norm": 3.055760968172166, - "learning_rate": 3.942341622608385e-06, - "loss": 1.0164, - "step": 868 - }, - { - "epoch": 0.10449107196537005, - "grad_norm": 1.7023915802747416, - "learning_rate": 3.942155780381001e-06, - "loss": 0.9987, - "step": 869 - }, - { - "epoch": 0.10461131485600914, - "grad_norm": 1.6911940937148422, - "learning_rate": 3.94196964353013e-06, - "loss": 0.9914, - "step": 870 - }, - { - "epoch": 0.10473155774664823, - "grad_norm": 2.344201921762265, - "learning_rate": 3.941783212084008e-06, - "loss": 1.0332, - "step": 871 - }, - { - "epoch": 0.10485180063728732, - "grad_norm": 2.348201542394437, - "learning_rate": 3.941596486070916e-06, - "loss": 1.0006, - "step": 872 - }, - { - "epoch": 0.10497204352792641, - "grad_norm": 2.449654231681966, - "learning_rate": 3.941409465519182e-06, - "loss": 0.8091, - "step": 873 - }, - { - "epoch": 0.10509228641856551, - "grad_norm": 1.7950380306412352, - "learning_rate": 3.941222150457176e-06, - "loss": 1.0793, - "step": 874 - }, - { - "epoch": 0.10521252930920459, - "grad_norm": 4.7685941858196745, - "learning_rate": 3.941034540913311e-06, - "loss": 0.9491, - "step": 875 - }, - { - "epoch": 0.10533277219984369, - "grad_norm": 1.4850972075066395, - "learning_rate": 3.940846636916051e-06, - "loss": 1.0533, - "step": 876 - }, - { - "epoch": 0.10545301509048277, - "grad_norm": 1.7559165393659786, - "learning_rate": 3.940658438493899e-06, - "loss": 1.0932, - "step": 877 - }, - { - "epoch": 0.10557325798112187, - "grad_norm": 2.086851569789935, - "learning_rate": 3.940469945675405e-06, - "loss": 0.9837, - "step": 878 - }, - { - "epoch": 0.10569350087176095, - "grad_norm": 1.922661225065625, - "learning_rate": 3.940281158489163e-06, - "loss": 1.1371, - "step": 879 - }, - { - "epoch": 0.10581374376240005, - "grad_norm": 1.5323650426387856, - "learning_rate": 3.940092076963812e-06, - "loss": 1.0499, - "step": 880 - }, - { - "epoch": 0.10593398665303914, - "grad_norm": 1.8658171537219415, - "learning_rate": 3.9399027011280355e-06, - "loss": 1.0112, - "step": 881 - }, - { - "epoch": 0.10605422954367823, - "grad_norm": 1.9004637026874636, - "learning_rate": 3.939713031010561e-06, - "loss": 1.0005, - "step": 882 - }, - { - "epoch": 0.10617447243431732, - "grad_norm": 1.972589351917169, - "learning_rate": 3.939523066640163e-06, - "loss": 1.0107, - "step": 883 - }, - { - "epoch": 0.10629471532495641, - "grad_norm": 1.62095468682995, - "learning_rate": 3.939332808045657e-06, - "loss": 1.0332, - "step": 884 - }, - { - "epoch": 0.1064149582155955, - "grad_norm": 1.6752366823965859, - "learning_rate": 3.939142255255906e-06, - "loss": 1.0662, - "step": 885 - }, - { - "epoch": 0.1065352011062346, - "grad_norm": 1.8008950418858503, - "learning_rate": 3.938951408299817e-06, - "loss": 1.1086, - "step": 886 - }, - { - "epoch": 0.10665544399687368, - "grad_norm": 0.8181590925417689, - "learning_rate": 3.938760267206342e-06, - "loss": 0.8096, - "step": 887 - }, - { - "epoch": 0.10677568688751278, - "grad_norm": 2.0931272576981423, - "learning_rate": 3.938568832004475e-06, - "loss": 1.0204, - "step": 888 - }, - { - "epoch": 0.10689592977815186, - "grad_norm": 1.8851703742866028, - "learning_rate": 3.938377102723257e-06, - "loss": 0.983, - "step": 889 - }, - { - "epoch": 0.10701617266879096, - "grad_norm": 1.8381973318780174, - "learning_rate": 3.938185079391774e-06, - "loss": 1.0682, - "step": 890 - }, - { - "epoch": 0.10713641555943004, - "grad_norm": 2.362021572373062, - "learning_rate": 3.937992762039157e-06, - "loss": 1.2839, - "step": 891 - }, - { - "epoch": 0.10725665845006914, - "grad_norm": 1.5545827552360143, - "learning_rate": 3.937800150694577e-06, - "loss": 1.0292, - "step": 892 - }, - { - "epoch": 0.10737690134070824, - "grad_norm": 1.8712011636949748, - "learning_rate": 3.937607245387255e-06, - "loss": 0.981, - "step": 893 - }, - { - "epoch": 0.10749714423134732, - "grad_norm": 1.755755836226171, - "learning_rate": 3.937414046146455e-06, - "loss": 0.9549, - "step": 894 - }, - { - "epoch": 0.10761738712198642, - "grad_norm": 1.9113459402470685, - "learning_rate": 3.9372205530014845e-06, - "loss": 0.988, - "step": 895 - }, - { - "epoch": 0.1077376300126255, - "grad_norm": 2.158210526535668, - "learning_rate": 3.937026765981696e-06, - "loss": 0.9515, - "step": 896 - }, - { - "epoch": 0.1078578729032646, - "grad_norm": 1.6264905695625709, - "learning_rate": 3.936832685116488e-06, - "loss": 1.0159, - "step": 897 - }, - { - "epoch": 0.10797811579390369, - "grad_norm": 1.9972034214121115, - "learning_rate": 3.936638310435301e-06, - "loss": 1.1241, - "step": 898 - }, - { - "epoch": 0.10809835868454278, - "grad_norm": 1.8563095596982206, - "learning_rate": 3.936443641967623e-06, - "loss": 1.051, - "step": 899 - }, - { - "epoch": 0.10821860157518187, - "grad_norm": 1.7977512379285157, - "learning_rate": 3.936248679742983e-06, - "loss": 1.0545, - "step": 900 - }, - { - "epoch": 0.10833884446582095, - "grad_norm": 1.0236186675251886, - "learning_rate": 3.936053423790959e-06, - "loss": 0.9837, - "step": 901 - }, - { - "epoch": 0.10845908735646005, - "grad_norm": 1.7945077312491071, - "learning_rate": 3.935857874141168e-06, - "loss": 0.998, - "step": 902 - }, - { - "epoch": 0.10857933024709913, - "grad_norm": 2.002017910537098, - "learning_rate": 3.935662030823279e-06, - "loss": 1.0624, - "step": 903 - }, - { - "epoch": 0.10869957313773823, - "grad_norm": 2.136988091099589, - "learning_rate": 3.935465893866998e-06, - "loss": 0.9437, - "step": 904 - }, - { - "epoch": 0.10881981602837733, - "grad_norm": 1.6952926704899491, - "learning_rate": 3.935269463302079e-06, - "loss": 1.0334, - "step": 905 - }, - { - "epoch": 0.10894005891901641, - "grad_norm": 2.385004854817118, - "learning_rate": 3.935072739158322e-06, - "loss": 0.999, - "step": 906 - }, - { - "epoch": 0.10906030180965551, - "grad_norm": 1.5294343733087408, - "learning_rate": 3.934875721465569e-06, - "loss": 1.0244, - "step": 907 - }, - { - "epoch": 0.10918054470029459, - "grad_norm": 2.117811610725602, - "learning_rate": 3.9346784102537076e-06, - "loss": 0.9392, - "step": 908 - }, - { - "epoch": 0.10930078759093369, - "grad_norm": 2.017574816218245, - "learning_rate": 3.934480805552669e-06, - "loss": 1.0066, - "step": 909 - }, - { - "epoch": 0.10942103048157277, - "grad_norm": 2.016058586858768, - "learning_rate": 3.93428290739243e-06, - "loss": 1.1138, - "step": 910 - }, - { - "epoch": 0.10954127337221187, - "grad_norm": 2.2064248121129277, - "learning_rate": 3.9340847158030125e-06, - "loss": 1.0148, - "step": 911 - }, - { - "epoch": 0.10966151626285096, - "grad_norm": 1.6061883135279604, - "learning_rate": 3.9338862308144814e-06, - "loss": 0.9779, - "step": 912 - }, - { - "epoch": 0.10978175915349005, - "grad_norm": 2.078328517040473, - "learning_rate": 3.933687452456946e-06, - "loss": 1.0774, - "step": 913 - }, - { - "epoch": 0.10990200204412914, - "grad_norm": 2.0629883390347623, - "learning_rate": 3.933488380760562e-06, - "loss": 1.0918, - "step": 914 - }, - { - "epoch": 0.11002224493476823, - "grad_norm": 1.8670383537451685, - "learning_rate": 3.9332890157555286e-06, - "loss": 1.1023, - "step": 915 - }, - { - "epoch": 0.11014248782540732, - "grad_norm": 1.6544208142400834, - "learning_rate": 3.933089357472088e-06, - "loss": 0.9921, - "step": 916 - }, - { - "epoch": 0.11026273071604642, - "grad_norm": 1.6147232815377195, - "learning_rate": 3.932889405940529e-06, - "loss": 1.0898, - "step": 917 - }, - { - "epoch": 0.1103829736066855, - "grad_norm": 2.459660177407435, - "learning_rate": 3.932689161191184e-06, - "loss": 1.0258, - "step": 918 - }, - { - "epoch": 0.1105032164973246, - "grad_norm": 2.243663342005824, - "learning_rate": 3.93248862325443e-06, - "loss": 1.1014, - "step": 919 - }, - { - "epoch": 0.11062345938796368, - "grad_norm": 1.000247149920342, - "learning_rate": 3.932287792160688e-06, - "loss": 0.8868, - "step": 920 - }, - { - "epoch": 0.11074370227860278, - "grad_norm": 2.229478071571607, - "learning_rate": 3.932086667940424e-06, - "loss": 1.0256, - "step": 921 - }, - { - "epoch": 0.11086394516924186, - "grad_norm": 1.641309250330832, - "learning_rate": 3.93188525062415e-06, - "loss": 1.042, - "step": 922 - }, - { - "epoch": 0.11098418805988096, - "grad_norm": 2.4198911355493156, - "learning_rate": 3.931683540242418e-06, - "loss": 1.0901, - "step": 923 - }, - { - "epoch": 0.11110443095052006, - "grad_norm": 2.736915323884227, - "learning_rate": 3.9314815368258295e-06, - "loss": 1.1323, - "step": 924 - }, - { - "epoch": 0.11122467384115914, - "grad_norm": 1.5518283335828098, - "learning_rate": 3.9312792404050275e-06, - "loss": 1.0149, - "step": 925 - }, - { - "epoch": 0.11134491673179824, - "grad_norm": 1.620698958892874, - "learning_rate": 3.9310766510107e-06, - "loss": 1.0068, - "step": 926 - }, - { - "epoch": 0.11146515962243732, - "grad_norm": 1.8125683344094055, - "learning_rate": 3.9308737686735806e-06, - "loss": 1.1503, - "step": 927 - }, - { - "epoch": 0.11158540251307641, - "grad_norm": 1.76616046179118, - "learning_rate": 3.9306705934244455e-06, - "loss": 1.0487, - "step": 928 - }, - { - "epoch": 0.11170564540371551, - "grad_norm": 1.6440786432033083, - "learning_rate": 3.930467125294116e-06, - "loss": 1.1133, - "step": 929 - }, - { - "epoch": 0.1118258882943546, - "grad_norm": 1.0521618023808326, - "learning_rate": 3.930263364313458e-06, - "loss": 0.8672, - "step": 930 - }, - { - "epoch": 0.11194613118499369, - "grad_norm": 2.0319844018682907, - "learning_rate": 3.930059310513384e-06, - "loss": 1.0633, - "step": 931 - }, - { - "epoch": 0.11206637407563277, - "grad_norm": 2.1472688582952735, - "learning_rate": 3.929854963924846e-06, - "loss": 1.0584, - "step": 932 - }, - { - "epoch": 0.11218661696627187, - "grad_norm": 1.6088652312677412, - "learning_rate": 3.929650324578845e-06, - "loss": 1.0027, - "step": 933 - }, - { - "epoch": 0.11230685985691095, - "grad_norm": 2.5736455669956966, - "learning_rate": 3.929445392506423e-06, - "loss": 1.0578, - "step": 934 - }, - { - "epoch": 0.11242710274755005, - "grad_norm": 2.000955472642336, - "learning_rate": 3.92924016773867e-06, - "loss": 0.9929, - "step": 935 - }, - { - "epoch": 0.11254734563818915, - "grad_norm": 2.3176339355415005, - "learning_rate": 3.9290346503067175e-06, - "loss": 0.9727, - "step": 936 - }, - { - "epoch": 0.11266758852882823, - "grad_norm": 2.1932233868476505, - "learning_rate": 3.9288288402417415e-06, - "loss": 1.0233, - "step": 937 - }, - { - "epoch": 0.11278783141946733, - "grad_norm": 2.1202016398066297, - "learning_rate": 3.928622737574964e-06, - "loss": 0.9299, - "step": 938 - }, - { - "epoch": 0.11290807431010641, - "grad_norm": 1.7729976965754264, - "learning_rate": 3.928416342337652e-06, - "loss": 1.135, - "step": 939 - }, - { - "epoch": 0.1130283172007455, - "grad_norm": 1.6016293395446661, - "learning_rate": 3.928209654561113e-06, - "loss": 1.0575, - "step": 940 - }, - { - "epoch": 0.1131485600913846, - "grad_norm": 1.9619245134894614, - "learning_rate": 3.928002674276703e-06, - "loss": 1.027, - "step": 941 - }, - { - "epoch": 0.11326880298202369, - "grad_norm": 2.1357937611657114, - "learning_rate": 3.92779540151582e-06, - "loss": 0.9888, - "step": 942 - }, - { - "epoch": 0.11338904587266278, - "grad_norm": 1.6463105278502952, - "learning_rate": 3.927587836309907e-06, - "loss": 1.0881, - "step": 943 - }, - { - "epoch": 0.11350928876330187, - "grad_norm": 2.0548527292167513, - "learning_rate": 3.927379978690452e-06, - "loss": 1.0074, - "step": 944 - }, - { - "epoch": 0.11362953165394096, - "grad_norm": 2.3246726226022907, - "learning_rate": 3.927171828688987e-06, - "loss": 1.0947, - "step": 945 - }, - { - "epoch": 0.11374977454458005, - "grad_norm": 1.8609165244918184, - "learning_rate": 3.926963386337088e-06, - "loss": 1.0463, - "step": 946 - }, - { - "epoch": 0.11387001743521914, - "grad_norm": 1.9955296624525598, - "learning_rate": 3.926754651666375e-06, - "loss": 0.9296, - "step": 947 - }, - { - "epoch": 0.11399026032585824, - "grad_norm": 2.5058723146100053, - "learning_rate": 3.926545624708513e-06, - "loss": 1.0171, - "step": 948 - }, - { - "epoch": 0.11411050321649732, - "grad_norm": 1.8755813333219873, - "learning_rate": 3.926336305495213e-06, - "loss": 1.0901, - "step": 949 - }, - { - "epoch": 0.11423074610713642, - "grad_norm": 1.9257463370789387, - "learning_rate": 3.926126694058226e-06, - "loss": 1.1215, - "step": 950 - }, - { - "epoch": 0.1143509889977755, - "grad_norm": 1.349938094521118, - "learning_rate": 3.92591679042935e-06, - "loss": 1.0459, - "step": 951 - }, - { - "epoch": 0.1144712318884146, - "grad_norm": 1.5163290217375964, - "learning_rate": 3.92570659464043e-06, - "loss": 1.0514, - "step": 952 - }, - { - "epoch": 0.1145914747790537, - "grad_norm": 1.6943563410605054, - "learning_rate": 3.925496106723349e-06, - "loss": 1.0251, - "step": 953 - }, - { - "epoch": 0.11471171766969278, - "grad_norm": 1.8342067920825684, - "learning_rate": 3.9252853267100405e-06, - "loss": 1.0667, - "step": 954 - }, - { - "epoch": 0.11483196056033187, - "grad_norm": 13.955972378844208, - "learning_rate": 3.9250742546324786e-06, - "loss": 1.0613, - "step": 955 - }, - { - "epoch": 0.11495220345097096, - "grad_norm": 1.7889034417088963, - "learning_rate": 3.924862890522683e-06, - "loss": 1.0908, - "step": 956 - }, - { - "epoch": 0.11507244634161005, - "grad_norm": 1.8708385699679597, - "learning_rate": 3.9246512344127174e-06, - "loss": 1.0871, - "step": 957 - }, - { - "epoch": 0.11519268923224914, - "grad_norm": 3.2368269762070025, - "learning_rate": 3.9244392863346895e-06, - "loss": 1.0446, - "step": 958 - }, - { - "epoch": 0.11531293212288823, - "grad_norm": 1.7903759395354804, - "learning_rate": 3.9242270463207524e-06, - "loss": 1.1452, - "step": 959 - }, - { - "epoch": 0.11543317501352733, - "grad_norm": 8.13824510107648, - "learning_rate": 3.924014514403102e-06, - "loss": 1.0701, - "step": 960 - }, - { - "epoch": 0.11555341790416641, - "grad_norm": 2.647501642300508, - "learning_rate": 3.92380169061398e-06, - "loss": 1.1476, - "step": 961 - }, - { - "epoch": 0.11567366079480551, - "grad_norm": 1.7870292603926186, - "learning_rate": 3.9235885749856705e-06, - "loss": 1.0696, - "step": 962 - }, - { - "epoch": 0.1157939036854446, - "grad_norm": 1.6248369868750638, - "learning_rate": 3.9233751675505035e-06, - "loss": 1.0581, - "step": 963 - }, - { - "epoch": 0.11591414657608369, - "grad_norm": 1.8667555433073255, - "learning_rate": 3.923161468340853e-06, - "loss": 1.0648, - "step": 964 - }, - { - "epoch": 0.11603438946672277, - "grad_norm": 1.772673656635388, - "learning_rate": 3.9229474773891374e-06, - "loss": 1.0377, - "step": 965 - }, - { - "epoch": 0.11615463235736187, - "grad_norm": 1.8947561228897696, - "learning_rate": 3.922733194727818e-06, - "loss": 1.071, - "step": 966 - }, - { - "epoch": 0.11627487524800097, - "grad_norm": 2.155770289921381, - "learning_rate": 3.922518620389402e-06, - "loss": 1.0987, - "step": 967 - }, - { - "epoch": 0.11639511813864005, - "grad_norm": 1.5607200402349837, - "learning_rate": 3.922303754406439e-06, - "loss": 1.1298, - "step": 968 - }, - { - "epoch": 0.11651536102927915, - "grad_norm": 1.5486747983296705, - "learning_rate": 3.922088596811526e-06, - "loss": 1.0095, - "step": 969 - }, - { - "epoch": 0.11663560391991823, - "grad_norm": 1.9426301287607968, - "learning_rate": 3.9218731476373e-06, - "loss": 1.0853, - "step": 970 - }, - { - "epoch": 0.11675584681055733, - "grad_norm": 1.8312678912333609, - "learning_rate": 3.9216574069164455e-06, - "loss": 1.0801, - "step": 971 - }, - { - "epoch": 0.11687608970119642, - "grad_norm": 1.3798907604380093, - "learning_rate": 3.921441374681691e-06, - "loss": 1.0295, - "step": 972 - }, - { - "epoch": 0.1169963325918355, - "grad_norm": 1.811443876179906, - "learning_rate": 3.921225050965808e-06, - "loss": 0.8762, - "step": 973 - }, - { - "epoch": 0.1171165754824746, - "grad_norm": 2.0392526121413916, - "learning_rate": 3.921008435801612e-06, - "loss": 0.9696, - "step": 974 - }, - { - "epoch": 0.11723681837311369, - "grad_norm": 2.2234350894746027, - "learning_rate": 3.920791529221963e-06, - "loss": 0.986, - "step": 975 - }, - { - "epoch": 0.11735706126375278, - "grad_norm": 1.7759224710897525, - "learning_rate": 3.920574331259768e-06, - "loss": 0.991, - "step": 976 - }, - { - "epoch": 0.11747730415439187, - "grad_norm": 2.221563140830413, - "learning_rate": 3.9203568419479716e-06, - "loss": 1.0299, - "step": 977 - }, - { - "epoch": 0.11759754704503096, - "grad_norm": 2.03572317709451, - "learning_rate": 3.92013906131957e-06, - "loss": 0.9793, - "step": 978 - }, - { - "epoch": 0.11771778993567006, - "grad_norm": 1.397452662967947, - "learning_rate": 3.9199209894076e-06, - "loss": 1.0506, - "step": 979 - }, - { - "epoch": 0.11783803282630914, - "grad_norm": 1.6515947813345644, - "learning_rate": 3.919702626245142e-06, - "loss": 1.1317, - "step": 980 - }, - { - "epoch": 0.11795827571694824, - "grad_norm": 2.7008204697025326, - "learning_rate": 3.919483971865322e-06, - "loss": 0.8806, - "step": 981 - }, - { - "epoch": 0.11807851860758732, - "grad_norm": 1.8917306236624816, - "learning_rate": 3.91926502630131e-06, - "loss": 1.098, - "step": 982 - }, - { - "epoch": 0.11819876149822642, - "grad_norm": 1.735518551049509, - "learning_rate": 3.91904578958632e-06, - "loss": 0.9546, - "step": 983 - }, - { - "epoch": 0.11831900438886551, - "grad_norm": 1.8672600257227459, - "learning_rate": 3.918826261753608e-06, - "loss": 1.0777, - "step": 984 - }, - { - "epoch": 0.1184392472795046, - "grad_norm": 3.1798268130664273, - "learning_rate": 3.918606442836478e-06, - "loss": 0.9331, - "step": 985 - }, - { - "epoch": 0.1185594901701437, - "grad_norm": 1.6507025610118349, - "learning_rate": 3.918386332868277e-06, - "loss": 1.0042, - "step": 986 - }, - { - "epoch": 0.11867973306078278, - "grad_norm": 1.587558012375444, - "learning_rate": 3.918165931882394e-06, - "loss": 1.1704, - "step": 987 - }, - { - "epoch": 0.11879997595142187, - "grad_norm": 2.6487070132156765, - "learning_rate": 3.917945239912264e-06, - "loss": 0.9922, - "step": 988 - }, - { - "epoch": 0.11892021884206096, - "grad_norm": 1.9085203939761877, - "learning_rate": 3.917724256991367e-06, - "loss": 0.9914, - "step": 989 - }, - { - "epoch": 0.11904046173270005, - "grad_norm": 1.9136628648491705, - "learning_rate": 3.9175029831532245e-06, - "loss": 1.0445, - "step": 990 - }, - { - "epoch": 0.11916070462333915, - "grad_norm": 2.016937656428233, - "learning_rate": 3.917281418431404e-06, - "loss": 1.1051, - "step": 991 - }, - { - "epoch": 0.11928094751397823, - "grad_norm": 1.8201107191635453, - "learning_rate": 3.917059562859516e-06, - "loss": 1.0016, - "step": 992 - }, - { - "epoch": 0.11940119040461733, - "grad_norm": 1.7951538467357937, - "learning_rate": 3.916837416471218e-06, - "loss": 1.1134, - "step": 993 - }, - { - "epoch": 0.11952143329525641, - "grad_norm": 2.313392363884381, - "learning_rate": 3.916614979300207e-06, - "loss": 0.9619, - "step": 994 - }, - { - "epoch": 0.11964167618589551, - "grad_norm": 1.4311677534301324, - "learning_rate": 3.9163922513802274e-06, - "loss": 1.016, - "step": 995 - }, - { - "epoch": 0.1197619190765346, - "grad_norm": 2.19809717338071, - "learning_rate": 3.916169232745067e-06, - "loss": 1.0553, - "step": 996 - }, - { - "epoch": 0.11988216196717369, - "grad_norm": 2.509634336607428, - "learning_rate": 3.915945923428559e-06, - "loss": 1.1446, - "step": 997 - }, - { - "epoch": 0.12000240485781279, - "grad_norm": 2.147539540682869, - "learning_rate": 3.915722323464577e-06, - "loss": 1.0589, - "step": 998 - }, - { - "epoch": 0.12012264774845187, - "grad_norm": 2.413672554595106, - "learning_rate": 3.91549843288704e-06, - "loss": 0.9368, - "step": 999 - }, - { - "epoch": 0.12024289063909097, - "grad_norm": 1.8210797964352745, - "learning_rate": 3.915274251729916e-06, - "loss": 1.0245, - "step": 1000 - }, - { - "epoch": 0.12036313352973005, - "grad_norm": 1.7159491866753604, - "learning_rate": 3.91504978002721e-06, - "loss": 1.1317, - "step": 1001 - }, - { - "epoch": 0.12048337642036915, - "grad_norm": 3.850827903410773, - "learning_rate": 3.914825017812974e-06, - "loss": 1.0053, - "step": 1002 - }, - { - "epoch": 0.12060361931100824, - "grad_norm": 1.860492594822348, - "learning_rate": 3.9145999651213065e-06, - "loss": 0.9584, - "step": 1003 - }, - { - "epoch": 0.12072386220164733, - "grad_norm": 2.4050042036182364, - "learning_rate": 3.9143746219863465e-06, - "loss": 1.1163, - "step": 1004 - }, - { - "epoch": 0.12084410509228642, - "grad_norm": 1.0495158237211233, - "learning_rate": 3.914148988442278e-06, - "loss": 0.9494, - "step": 1005 - }, - { - "epoch": 0.1209643479829255, - "grad_norm": 2.4103285667609, - "learning_rate": 3.91392306452333e-06, - "loss": 1.1846, - "step": 1006 - }, - { - "epoch": 0.1210845908735646, - "grad_norm": 3.8978512003933288, - "learning_rate": 3.913696850263774e-06, - "loss": 0.897, - "step": 1007 - }, - { - "epoch": 0.1212048337642037, - "grad_norm": 2.514068785589029, - "learning_rate": 3.913470345697929e-06, - "loss": 1.0179, - "step": 1008 - }, - { - "epoch": 0.12132507665484278, - "grad_norm": 1.9159047505907443, - "learning_rate": 3.913243550860153e-06, - "loss": 1.0843, - "step": 1009 - }, - { - "epoch": 0.12144531954548188, - "grad_norm": 1.678199925517068, - "learning_rate": 3.913016465784852e-06, - "loss": 0.9918, - "step": 1010 - }, - { - "epoch": 0.12156556243612096, - "grad_norm": 2.7041339204174046, - "learning_rate": 3.912789090506474e-06, - "loss": 0.9419, - "step": 1011 - }, - { - "epoch": 0.12168580532676006, - "grad_norm": 2.5496215988340722, - "learning_rate": 3.9125614250595114e-06, - "loss": 0.9602, - "step": 1012 - }, - { - "epoch": 0.12180604821739914, - "grad_norm": 2.3258477665029536, - "learning_rate": 3.912333469478502e-06, - "loss": 1.1206, - "step": 1013 - }, - { - "epoch": 0.12192629110803824, - "grad_norm": 2.025728320788544, - "learning_rate": 3.912105223798025e-06, - "loss": 1.0124, - "step": 1014 - }, - { - "epoch": 0.12204653399867733, - "grad_norm": 1.096125883255406, - "learning_rate": 3.9118766880527065e-06, - "loss": 0.9512, - "step": 1015 - }, - { - "epoch": 0.12216677688931642, - "grad_norm": 1.5178820135178688, - "learning_rate": 3.9116478622772145e-06, - "loss": 0.9568, - "step": 1016 - }, - { - "epoch": 0.12228701977995551, - "grad_norm": 1.5542185473880576, - "learning_rate": 3.911418746506261e-06, - "loss": 1.1028, - "step": 1017 - }, - { - "epoch": 0.1224072626705946, - "grad_norm": 1.5666829385656422, - "learning_rate": 3.911189340774604e-06, - "loss": 1.0095, - "step": 1018 - }, - { - "epoch": 0.1225275055612337, - "grad_norm": 22.7884786306761, - "learning_rate": 3.910959645117043e-06, - "loss": 1.029, - "step": 1019 - }, - { - "epoch": 0.12264774845187278, - "grad_norm": 0.9135337017296417, - "learning_rate": 3.910729659568423e-06, - "loss": 0.8258, - "step": 1020 - }, - { - "epoch": 0.12276799134251187, - "grad_norm": 1.665961815880741, - "learning_rate": 3.9104993841636344e-06, - "loss": 1.0516, - "step": 1021 - }, - { - "epoch": 0.12288823423315097, - "grad_norm": 1.6259182023122662, - "learning_rate": 3.910268818937608e-06, - "loss": 1.0373, - "step": 1022 - }, - { - "epoch": 0.12300847712379005, - "grad_norm": 2.0726374401356242, - "learning_rate": 3.9100379639253196e-06, - "loss": 1.0984, - "step": 1023 - }, - { - "epoch": 0.12312872001442915, - "grad_norm": 2.5124899716912545, - "learning_rate": 3.909806819161791e-06, - "loss": 1.0846, - "step": 1024 - }, - { - "epoch": 0.12324896290506823, - "grad_norm": 2.0253311781219874, - "learning_rate": 3.909575384682086e-06, - "loss": 1.0929, - "step": 1025 - }, - { - "epoch": 0.12336920579570733, - "grad_norm": 1.612056461221017, - "learning_rate": 3.9093436605213144e-06, - "loss": 0.9223, - "step": 1026 - }, - { - "epoch": 0.12348944868634643, - "grad_norm": 1.9468101037538625, - "learning_rate": 3.909111646714627e-06, - "loss": 1.0183, - "step": 1027 - }, - { - "epoch": 0.12360969157698551, - "grad_norm": 2.65623671584454, - "learning_rate": 3.9088793432972206e-06, - "loss": 0.9526, - "step": 1028 - }, - { - "epoch": 0.1237299344676246, - "grad_norm": 1.9441443923260666, - "learning_rate": 3.908646750304336e-06, - "loss": 1.0492, - "step": 1029 - }, - { - "epoch": 0.12385017735826369, - "grad_norm": 1.355765758152004, - "learning_rate": 3.908413867771257e-06, - "loss": 1.0902, - "step": 1030 - }, - { - "epoch": 0.12397042024890279, - "grad_norm": 1.6921261847633418, - "learning_rate": 3.908180695733311e-06, - "loss": 1.0404, - "step": 1031 - }, - { - "epoch": 0.12409066313954187, - "grad_norm": 1.6966366145552103, - "learning_rate": 3.907947234225871e-06, - "loss": 1.0539, - "step": 1032 - }, - { - "epoch": 0.12421090603018096, - "grad_norm": 1.7922012581334585, - "learning_rate": 3.907713483284352e-06, - "loss": 1.1007, - "step": 1033 - }, - { - "epoch": 0.12433114892082006, - "grad_norm": 3.264855691194662, - "learning_rate": 3.907479442944216e-06, - "loss": 1.2106, - "step": 1034 - }, - { - "epoch": 0.12445139181145914, - "grad_norm": 2.2626927836358335, - "learning_rate": 3.907245113240963e-06, - "loss": 1.1437, - "step": 1035 - }, - { - "epoch": 0.12457163470209824, - "grad_norm": 3.5869227034591673, - "learning_rate": 3.907010494210144e-06, - "loss": 0.974, - "step": 1036 - }, - { - "epoch": 0.12469187759273732, - "grad_norm": 1.9237422401118038, - "learning_rate": 3.9067755858873495e-06, - "loss": 1.1537, - "step": 1037 - }, - { - "epoch": 0.12481212048337642, - "grad_norm": 0.9535884980918576, - "learning_rate": 3.906540388308214e-06, - "loss": 0.8934, - "step": 1038 - }, - { - "epoch": 0.12493236337401552, - "grad_norm": 1.7811761138047726, - "learning_rate": 3.906304901508417e-06, - "loss": 1.0471, - "step": 1039 - }, - { - "epoch": 0.12505260626465461, - "grad_norm": 1.8471382173002202, - "learning_rate": 3.9060691255236835e-06, - "loss": 0.9812, - "step": 1040 - }, - { - "epoch": 0.1251728491552937, - "grad_norm": 1.497535827284561, - "learning_rate": 3.905833060389778e-06, - "loss": 1.0475, - "step": 1041 - }, - { - "epoch": 0.12529309204593278, - "grad_norm": 2.031929254667022, - "learning_rate": 3.905596706142513e-06, - "loss": 1.0119, - "step": 1042 - }, - { - "epoch": 0.12541333493657186, - "grad_norm": 1.9891190777783176, - "learning_rate": 3.9053600628177435e-06, - "loss": 1.0821, - "step": 1043 - }, - { - "epoch": 0.12553357782721097, - "grad_norm": 1.958206280854728, - "learning_rate": 3.905123130451367e-06, - "loss": 1.075, - "step": 1044 - }, - { - "epoch": 0.12565382071785006, - "grad_norm": 2.105280766915412, - "learning_rate": 3.904885909079326e-06, - "loss": 1.0249, - "step": 1045 - }, - { - "epoch": 0.12577406360848914, - "grad_norm": 2.057139395056188, - "learning_rate": 3.904648398737607e-06, - "loss": 1.0005, - "step": 1046 - }, - { - "epoch": 0.12589430649912825, - "grad_norm": 1.9576872984239961, - "learning_rate": 3.9044105994622406e-06, - "loss": 1.0075, - "step": 1047 - }, - { - "epoch": 0.12601454938976733, - "grad_norm": 1.678754964609538, - "learning_rate": 3.9041725112893005e-06, - "loss": 1.0452, - "step": 1048 - }, - { - "epoch": 0.12613479228040642, - "grad_norm": 2.676621621142012, - "learning_rate": 3.903934134254904e-06, - "loss": 0.9814, - "step": 1049 - }, - { - "epoch": 0.1262550351710455, - "grad_norm": 1.9035130222677734, - "learning_rate": 3.903695468395213e-06, - "loss": 1.0777, - "step": 1050 - }, - { - "epoch": 0.1263752780616846, - "grad_norm": 2.049888431025181, - "learning_rate": 3.903456513746434e-06, - "loss": 0.7925, - "step": 1051 - }, - { - "epoch": 0.1264955209523237, - "grad_norm": 1.642702512986784, - "learning_rate": 3.903217270344815e-06, - "loss": 1.1065, - "step": 1052 - }, - { - "epoch": 0.12661576384296278, - "grad_norm": 1.5581444879163162, - "learning_rate": 3.902977738226648e-06, - "loss": 1.0553, - "step": 1053 - }, - { - "epoch": 0.12673600673360189, - "grad_norm": 1.7524764386169975, - "learning_rate": 3.902737917428273e-06, - "loss": 1.1377, - "step": 1054 - }, - { - "epoch": 0.12685624962424097, - "grad_norm": 2.090061982070796, - "learning_rate": 3.902497807986068e-06, - "loss": 1.0697, - "step": 1055 - }, - { - "epoch": 0.12697649251488005, - "grad_norm": 1.6036323384081308, - "learning_rate": 3.902257409936458e-06, - "loss": 1.0641, - "step": 1056 - }, - { - "epoch": 0.12709673540551916, - "grad_norm": 1.772332607699371, - "learning_rate": 3.902016723315912e-06, - "loss": 1.0734, - "step": 1057 - }, - { - "epoch": 0.12721697829615825, - "grad_norm": 2.173247857119413, - "learning_rate": 3.901775748160941e-06, - "loss": 0.9191, - "step": 1058 - }, - { - "epoch": 0.12733722118679733, - "grad_norm": 0.8261966566332596, - "learning_rate": 3.901534484508101e-06, - "loss": 0.869, - "step": 1059 - }, - { - "epoch": 0.1274574640774364, - "grad_norm": 2.2185247602441596, - "learning_rate": 3.901292932393991e-06, - "loss": 0.9757, - "step": 1060 - }, - { - "epoch": 0.12757770696807552, - "grad_norm": 1.9922791104664161, - "learning_rate": 3.9010510918552555e-06, - "loss": 1.0774, - "step": 1061 - }, - { - "epoch": 0.1276979498587146, - "grad_norm": 3.7404185914572246, - "learning_rate": 3.900808962928581e-06, - "loss": 0.9784, - "step": 1062 - }, - { - "epoch": 0.1278181927493537, - "grad_norm": 1.8210363954261861, - "learning_rate": 3.900566545650698e-06, - "loss": 1.1157, - "step": 1063 - }, - { - "epoch": 0.1279384356399928, - "grad_norm": 2.092586592656589, - "learning_rate": 3.900323840058381e-06, - "loss": 1.0432, - "step": 1064 - }, - { - "epoch": 0.12805867853063188, - "grad_norm": 1.7102486255664207, - "learning_rate": 3.900080846188449e-06, - "loss": 1.0437, - "step": 1065 - }, - { - "epoch": 0.12817892142127096, - "grad_norm": 1.6394165674859567, - "learning_rate": 3.8998375640777625e-06, - "loss": 1.0353, - "step": 1066 - }, - { - "epoch": 0.12829916431191005, - "grad_norm": 0.7334871403555662, - "learning_rate": 3.899593993763229e-06, - "loss": 0.7838, - "step": 1067 - }, - { - "epoch": 0.12841940720254916, - "grad_norm": 2.81999471041677, - "learning_rate": 3.899350135281796e-06, - "loss": 1.0412, - "step": 1068 - }, - { - "epoch": 0.12853965009318824, - "grad_norm": 1.9511685757995756, - "learning_rate": 3.8991059886704585e-06, - "loss": 1.0234, - "step": 1069 - }, - { - "epoch": 0.12865989298382732, - "grad_norm": 1.9032568649434758, - "learning_rate": 3.898861553966252e-06, - "loss": 1.0485, - "step": 1070 - }, - { - "epoch": 0.12878013587446643, - "grad_norm": 1.583850633875896, - "learning_rate": 3.898616831206257e-06, - "loss": 1.1068, - "step": 1071 - }, - { - "epoch": 0.12890037876510552, - "grad_norm": 2.1048305584631004, - "learning_rate": 3.8983718204276e-06, - "loss": 1.0035, - "step": 1072 - }, - { - "epoch": 0.1290206216557446, - "grad_norm": 2.1979881624522624, - "learning_rate": 3.898126521667446e-06, - "loss": 1.0626, - "step": 1073 - }, - { - "epoch": 0.12914086454638368, - "grad_norm": 1.8285140577218701, - "learning_rate": 3.897880934963007e-06, - "loss": 1.066, - "step": 1074 - }, - { - "epoch": 0.1292611074370228, - "grad_norm": 1.8197566808763181, - "learning_rate": 3.89763506035154e-06, - "loss": 1.0042, - "step": 1075 - }, - { - "epoch": 0.12938135032766188, - "grad_norm": 1.588049398557139, - "learning_rate": 3.897388897870343e-06, - "loss": 1.047, - "step": 1076 - }, - { - "epoch": 0.12950159321830096, - "grad_norm": 1.694483048706273, - "learning_rate": 3.89714244755676e-06, - "loss": 0.9722, - "step": 1077 - }, - { - "epoch": 0.12962183610894007, - "grad_norm": 2.108762298891826, - "learning_rate": 3.896895709448175e-06, - "loss": 1.0867, - "step": 1078 - }, - { - "epoch": 0.12974207899957915, - "grad_norm": 3.520194783890278, - "learning_rate": 3.896648683582019e-06, - "loss": 0.9971, - "step": 1079 - }, - { - "epoch": 0.12986232189021824, - "grad_norm": 3.3811845981104054, - "learning_rate": 3.896401369995766e-06, - "loss": 1.036, - "step": 1080 - }, - { - "epoch": 0.12998256478085732, - "grad_norm": 1.6079191270769717, - "learning_rate": 3.896153768726932e-06, - "loss": 1.0165, - "step": 1081 - }, - { - "epoch": 0.13010280767149643, - "grad_norm": 2.0324519196063346, - "learning_rate": 3.8959058798130806e-06, - "loss": 1.1096, - "step": 1082 - }, - { - "epoch": 0.1302230505621355, - "grad_norm": 1.726735102108602, - "learning_rate": 3.895657703291814e-06, - "loss": 0.9798, - "step": 1083 - }, - { - "epoch": 0.1303432934527746, - "grad_norm": 3.221466547682045, - "learning_rate": 3.895409239200781e-06, - "loss": 1.0281, - "step": 1084 - }, - { - "epoch": 0.1304635363434137, - "grad_norm": 2.0639214674698882, - "learning_rate": 3.895160487577673e-06, - "loss": 1.1468, - "step": 1085 - }, - { - "epoch": 0.1305837792340528, - "grad_norm": 0.7841811316679962, - "learning_rate": 3.894911448460226e-06, - "loss": 0.8643, - "step": 1086 - }, - { - "epoch": 0.13070402212469187, - "grad_norm": 1.736617485797298, - "learning_rate": 3.8946621218862195e-06, - "loss": 0.9569, - "step": 1087 - }, - { - "epoch": 0.13082426501533098, - "grad_norm": 1.7971400189880895, - "learning_rate": 3.894412507893475e-06, - "loss": 1.1116, - "step": 1088 - }, - { - "epoch": 0.13094450790597006, - "grad_norm": 1.7200557950307307, - "learning_rate": 3.894162606519859e-06, - "loss": 0.9493, - "step": 1089 - }, - { - "epoch": 0.13106475079660915, - "grad_norm": 1.8546621396420642, - "learning_rate": 3.893912417803282e-06, - "loss": 0.9974, - "step": 1090 - }, - { - "epoch": 0.13118499368724823, - "grad_norm": 2.6073651825531514, - "learning_rate": 3.8936619417816975e-06, - "loss": 0.9973, - "step": 1091 - }, - { - "epoch": 0.13130523657788734, - "grad_norm": 1.8501126693233236, - "learning_rate": 3.8934111784931015e-06, - "loss": 0.9567, - "step": 1092 - }, - { - "epoch": 0.13142547946852642, - "grad_norm": 0.980682623284336, - "learning_rate": 3.893160127975535e-06, - "loss": 0.8749, - "step": 1093 - }, - { - "epoch": 0.1315457223591655, - "grad_norm": 1.9610265582405135, - "learning_rate": 3.8929087902670826e-06, - "loss": 1.0383, - "step": 1094 - }, - { - "epoch": 0.13166596524980462, - "grad_norm": 0.9385677614864104, - "learning_rate": 3.8926571654058715e-06, - "loss": 0.858, - "step": 1095 - }, - { - "epoch": 0.1317862081404437, - "grad_norm": 2.265059722887741, - "learning_rate": 3.892405253430074e-06, - "loss": 0.9939, - "step": 1096 - }, - { - "epoch": 0.13190645103108278, - "grad_norm": 1.817061701821297, - "learning_rate": 3.892153054377904e-06, - "loss": 1.0536, - "step": 1097 - }, - { - "epoch": 0.13202669392172187, - "grad_norm": 1.0063654839252105, - "learning_rate": 3.891900568287619e-06, - "loss": 0.8692, - "step": 1098 - }, - { - "epoch": 0.13214693681236098, - "grad_norm": 2.327285922619952, - "learning_rate": 3.891647795197523e-06, - "loss": 0.9621, - "step": 1099 - }, - { - "epoch": 0.13226717970300006, - "grad_norm": 2.0342611442149083, - "learning_rate": 3.8913947351459605e-06, - "loss": 0.9194, - "step": 1100 - }, - { - "epoch": 0.13238742259363914, - "grad_norm": 1.620907618855978, - "learning_rate": 3.89114138817132e-06, - "loss": 0.9057, - "step": 1101 - }, - { - "epoch": 0.13250766548427825, - "grad_norm": 1.767048624722186, - "learning_rate": 3.890887754312035e-06, - "loss": 1.0709, - "step": 1102 - }, - { - "epoch": 0.13262790837491734, - "grad_norm": 2.1990766191374864, - "learning_rate": 3.890633833606581e-06, - "loss": 1.1015, - "step": 1103 - }, - { - "epoch": 0.13274815126555642, - "grad_norm": 2.3189778712089906, - "learning_rate": 3.890379626093477e-06, - "loss": 0.9238, - "step": 1104 - }, - { - "epoch": 0.1328683941561955, - "grad_norm": 2.1333425233563523, - "learning_rate": 3.890125131811287e-06, - "loss": 1.1469, - "step": 1105 - }, - { - "epoch": 0.1329886370468346, - "grad_norm": 1.6695445565821128, - "learning_rate": 3.889870350798618e-06, - "loss": 0.9833, - "step": 1106 - }, - { - "epoch": 0.1331088799374737, - "grad_norm": 1.4206720650324358, - "learning_rate": 3.889615283094119e-06, - "loss": 1.0222, - "step": 1107 - }, - { - "epoch": 0.13322912282811278, - "grad_norm": 2.0250415703545, - "learning_rate": 3.889359928736485e-06, - "loss": 1.0687, - "step": 1108 - }, - { - "epoch": 0.1333493657187519, - "grad_norm": 1.7338819748113432, - "learning_rate": 3.889104287764451e-06, - "loss": 1.1351, - "step": 1109 - }, - { - "epoch": 0.13346960860939097, - "grad_norm": 1.7839080653900161, - "learning_rate": 3.888848360216798e-06, - "loss": 1.1295, - "step": 1110 - }, - { - "epoch": 0.13358985150003005, - "grad_norm": 0.8147444901398578, - "learning_rate": 3.888592146132351e-06, - "loss": 0.814, - "step": 1111 - }, - { - "epoch": 0.13371009439066917, - "grad_norm": 1.6571554911653323, - "learning_rate": 3.888335645549978e-06, - "loss": 1.0124, - "step": 1112 - }, - { - "epoch": 0.13383033728130825, - "grad_norm": 2.093605719762094, - "learning_rate": 3.888078858508588e-06, - "loss": 1.0539, - "step": 1113 - }, - { - "epoch": 0.13395058017194733, - "grad_norm": 1.908714077654591, - "learning_rate": 3.8878217850471365e-06, - "loss": 1.077, - "step": 1114 - }, - { - "epoch": 0.13407082306258641, - "grad_norm": 1.8550410048690253, - "learning_rate": 3.887564425204621e-06, - "loss": 0.9689, - "step": 1115 - }, - { - "epoch": 0.13419106595322552, - "grad_norm": 0.8359264836521297, - "learning_rate": 3.887306779020083e-06, - "loss": 0.8054, - "step": 1116 - }, - { - "epoch": 0.1343113088438646, - "grad_norm": 3.7824977360912135, - "learning_rate": 3.887048846532608e-06, - "loss": 0.9485, - "step": 1117 - }, - { - "epoch": 0.1344315517345037, - "grad_norm": 0.811657909356512, - "learning_rate": 3.8867906277813224e-06, - "loss": 0.8289, - "step": 1118 - }, - { - "epoch": 0.1345517946251428, - "grad_norm": 1.7342891500034538, - "learning_rate": 3.886532122805399e-06, - "loss": 0.9738, - "step": 1119 - }, - { - "epoch": 0.13467203751578188, - "grad_norm": 2.231902455054471, - "learning_rate": 3.886273331644053e-06, - "loss": 1.1222, - "step": 1120 - }, - { - "epoch": 0.13479228040642097, - "grad_norm": 1.9701044176961728, - "learning_rate": 3.886014254336542e-06, - "loss": 1.0598, - "step": 1121 - }, - { - "epoch": 0.13491252329706005, - "grad_norm": 1.6834286212704705, - "learning_rate": 3.885754890922168e-06, - "loss": 1.1516, - "step": 1122 - }, - { - "epoch": 0.13503276618769916, - "grad_norm": 1.7815538113818288, - "learning_rate": 3.885495241440277e-06, - "loss": 1.0115, - "step": 1123 - }, - { - "epoch": 0.13515300907833824, - "grad_norm": 1.953782970225934, - "learning_rate": 3.885235305930257e-06, - "loss": 0.9754, - "step": 1124 - }, - { - "epoch": 0.13527325196897733, - "grad_norm": 1.9233792008738173, - "learning_rate": 3.884975084431539e-06, - "loss": 1.0932, - "step": 1125 - }, - { - "epoch": 0.13539349485961644, - "grad_norm": 2.161705634129381, - "learning_rate": 3.8847145769836e-06, - "loss": 1.1531, - "step": 1126 - }, - { - "epoch": 0.13551373775025552, - "grad_norm": 2.809238662212075, - "learning_rate": 3.884453783625959e-06, - "loss": 0.9031, - "step": 1127 - }, - { - "epoch": 0.1356339806408946, - "grad_norm": 2.0178933797444802, - "learning_rate": 3.884192704398176e-06, - "loss": 1.0839, - "step": 1128 - }, - { - "epoch": 0.13575422353153369, - "grad_norm": 1.873168304930497, - "learning_rate": 3.883931339339858e-06, - "loss": 0.9746, - "step": 1129 - }, - { - "epoch": 0.1358744664221728, - "grad_norm": 1.8426129989445408, - "learning_rate": 3.883669688490654e-06, - "loss": 1.0187, - "step": 1130 - }, - { - "epoch": 0.13599470931281188, - "grad_norm": 1.8684376637404019, - "learning_rate": 3.883407751890256e-06, - "loss": 1.0771, - "step": 1131 - }, - { - "epoch": 0.13611495220345096, - "grad_norm": 1.6830057413974506, - "learning_rate": 3.8831455295783994e-06, - "loss": 1.0776, - "step": 1132 - }, - { - "epoch": 0.13623519509409007, - "grad_norm": 1.6910784890555022, - "learning_rate": 3.882883021594864e-06, - "loss": 0.9642, - "step": 1133 - }, - { - "epoch": 0.13635543798472916, - "grad_norm": 1.8360834591249033, - "learning_rate": 3.8826202279794705e-06, - "loss": 1.0977, - "step": 1134 - }, - { - "epoch": 0.13647568087536824, - "grad_norm": 1.8529386275493853, - "learning_rate": 3.882357148772085e-06, - "loss": 0.9242, - "step": 1135 - }, - { - "epoch": 0.13659592376600732, - "grad_norm": 30.78098032876796, - "learning_rate": 3.882093784012617e-06, - "loss": 1.0724, - "step": 1136 - }, - { - "epoch": 0.13671616665664643, - "grad_norm": 2.0060966076545674, - "learning_rate": 3.881830133741019e-06, - "loss": 1.0626, - "step": 1137 - }, - { - "epoch": 0.13683640954728551, - "grad_norm": 7.891922706175229, - "learning_rate": 3.881566197997285e-06, - "loss": 0.9932, - "step": 1138 - }, - { - "epoch": 0.1369566524379246, - "grad_norm": 1.5529556980384824, - "learning_rate": 3.881301976821456e-06, - "loss": 0.9763, - "step": 1139 - }, - { - "epoch": 0.1370768953285637, - "grad_norm": 1.9423440245624404, - "learning_rate": 3.881037470253612e-06, - "loss": 1.1334, - "step": 1140 - }, - { - "epoch": 0.1371971382192028, - "grad_norm": 3.1968529306136597, - "learning_rate": 3.88077267833388e-06, - "loss": 1.0291, - "step": 1141 - }, - { - "epoch": 0.13731738110984187, - "grad_norm": 1.8604103860040684, - "learning_rate": 3.880507601102427e-06, - "loss": 1.0652, - "step": 1142 - }, - { - "epoch": 0.13743762400048098, - "grad_norm": 1.7018684161251842, - "learning_rate": 3.880242238599467e-06, - "loss": 1.0471, - "step": 1143 - }, - { - "epoch": 0.13755786689112007, - "grad_norm": 1.663724193301201, - "learning_rate": 3.879976590865254e-06, - "loss": 1.0622, - "step": 1144 - }, - { - "epoch": 0.13767810978175915, - "grad_norm": 1.757240575361628, - "learning_rate": 3.879710657940087e-06, - "loss": 1.1034, - "step": 1145 - }, - { - "epoch": 0.13779835267239823, - "grad_norm": 1.7247447115198735, - "learning_rate": 3.879444439864308e-06, - "loss": 0.9283, - "step": 1146 - }, - { - "epoch": 0.13791859556303734, - "grad_norm": 1.9105841970532313, - "learning_rate": 3.879177936678301e-06, - "loss": 1.0853, - "step": 1147 - }, - { - "epoch": 0.13803883845367643, - "grad_norm": 3.5344442588435414, - "learning_rate": 3.878911148422496e-06, - "loss": 1.0077, - "step": 1148 - }, - { - "epoch": 0.1381590813443155, - "grad_norm": 2.0912885858655037, - "learning_rate": 3.878644075137364e-06, - "loss": 0.9277, - "step": 1149 - }, - { - "epoch": 0.13827932423495462, - "grad_norm": 2.0047926699018013, - "learning_rate": 3.878376716863418e-06, - "loss": 1.0153, - "step": 1150 - }, - { - "epoch": 0.1383995671255937, - "grad_norm": 2.0356360398936895, - "learning_rate": 3.878109073641219e-06, - "loss": 0.9458, - "step": 1151 - }, - { - "epoch": 0.13851981001623279, - "grad_norm": 1.4274934703724331, - "learning_rate": 3.877841145511366e-06, - "loss": 1.0366, - "step": 1152 - }, - { - "epoch": 0.13864005290687187, - "grad_norm": 1.5336783480520984, - "learning_rate": 3.8775729325145035e-06, - "loss": 1.0513, - "step": 1153 - }, - { - "epoch": 0.13876029579751098, - "grad_norm": 0.7815118350904118, - "learning_rate": 3.877304434691321e-06, - "loss": 0.8799, - "step": 1154 - }, - { - "epoch": 0.13888053868815006, - "grad_norm": 1.584218037371198, - "learning_rate": 3.877035652082548e-06, - "loss": 1.024, - "step": 1155 - }, - { - "epoch": 0.13900078157878915, - "grad_norm": 1.7576760472211874, - "learning_rate": 3.87676658472896e-06, - "loss": 1.0746, - "step": 1156 - }, - { - "epoch": 0.13912102446942826, - "grad_norm": 1.6651370022514933, - "learning_rate": 3.876497232671372e-06, - "loss": 1.0782, - "step": 1157 - }, - { - "epoch": 0.13924126736006734, - "grad_norm": 2.2352280488894176, - "learning_rate": 3.876227595950647e-06, - "loss": 1.0617, - "step": 1158 - }, - { - "epoch": 0.13936151025070642, - "grad_norm": 1.4991020057789493, - "learning_rate": 3.875957674607686e-06, - "loss": 1.0135, - "step": 1159 - }, - { - "epoch": 0.1394817531413455, - "grad_norm": 2.1551149117263706, - "learning_rate": 3.8756874686834386e-06, - "loss": 1.1155, - "step": 1160 - }, - { - "epoch": 0.13960199603198462, - "grad_norm": 1.5799184749189235, - "learning_rate": 3.875416978218893e-06, - "loss": 1.0309, - "step": 1161 - }, - { - "epoch": 0.1397222389226237, - "grad_norm": 3.41222508479392, - "learning_rate": 3.8751462032550835e-06, - "loss": 1.054, - "step": 1162 - }, - { - "epoch": 0.13984248181326278, - "grad_norm": 3.0773416674593106, - "learning_rate": 3.874875143833085e-06, - "loss": 1.0484, - "step": 1163 - }, - { - "epoch": 0.1399627247039019, - "grad_norm": 1.6931578836384402, - "learning_rate": 3.874603799994019e-06, - "loss": 0.9206, - "step": 1164 - }, - { - "epoch": 0.14008296759454097, - "grad_norm": 1.8755073496736314, - "learning_rate": 3.874332171779046e-06, - "loss": 1.1067, - "step": 1165 - }, - { - "epoch": 0.14020321048518006, - "grad_norm": 1.7107702025397296, - "learning_rate": 3.874060259229373e-06, - "loss": 0.9915, - "step": 1166 - }, - { - "epoch": 0.14032345337581917, - "grad_norm": 2.0108810310171363, - "learning_rate": 3.873788062386249e-06, - "loss": 1.1511, - "step": 1167 - }, - { - "epoch": 0.14044369626645825, - "grad_norm": 1.7585783921752767, - "learning_rate": 3.873515581290965e-06, - "loss": 1.0514, - "step": 1168 - }, - { - "epoch": 0.14056393915709733, - "grad_norm": 1.9879266388940906, - "learning_rate": 3.8732428159848575e-06, - "loss": 0.9836, - "step": 1169 - }, - { - "epoch": 0.14068418204773642, - "grad_norm": 2.0013336265194392, - "learning_rate": 3.872969766509304e-06, - "loss": 1.0114, - "step": 1170 - }, - { - "epoch": 0.14080442493837553, - "grad_norm": 0.846170837891973, - "learning_rate": 3.872696432905726e-06, - "loss": 0.8187, - "step": 1171 - }, - { - "epoch": 0.1409246678290146, - "grad_norm": 2.014035448317347, - "learning_rate": 3.872422815215589e-06, - "loss": 0.9478, - "step": 1172 - }, - { - "epoch": 0.1410449107196537, - "grad_norm": 1.7477675230697205, - "learning_rate": 3.8721489134803994e-06, - "loss": 0.9771, - "step": 1173 - }, - { - "epoch": 0.1411651536102928, - "grad_norm": 2.146738275119441, - "learning_rate": 3.871874727741707e-06, - "loss": 0.9561, - "step": 1174 - }, - { - "epoch": 0.1412853965009319, - "grad_norm": 1.751268948568882, - "learning_rate": 3.871600258041108e-06, - "loss": 1.1917, - "step": 1175 - }, - { - "epoch": 0.14140563939157097, - "grad_norm": 2.353872488990746, - "learning_rate": 3.871325504420238e-06, - "loss": 1.0847, - "step": 1176 - }, - { - "epoch": 0.14152588228221005, - "grad_norm": 1.6454344237853737, - "learning_rate": 3.871050466920776e-06, - "loss": 1.0411, - "step": 1177 - }, - { - "epoch": 0.14164612517284916, - "grad_norm": 1.8907511645379944, - "learning_rate": 3.870775145584447e-06, - "loss": 1.0331, - "step": 1178 - }, - { - "epoch": 0.14176636806348825, - "grad_norm": 3.5981827970828753, - "learning_rate": 3.8704995404530145e-06, - "loss": 0.8614, - "step": 1179 - }, - { - "epoch": 0.14188661095412733, - "grad_norm": 1.8621676974228705, - "learning_rate": 3.87022365156829e-06, - "loss": 1.0756, - "step": 1180 - }, - { - "epoch": 0.14200685384476644, - "grad_norm": 2.0231498602187323, - "learning_rate": 3.869947478972123e-06, - "loss": 1.0311, - "step": 1181 - }, - { - "epoch": 0.14212709673540552, - "grad_norm": 1.8385284322491058, - "learning_rate": 3.869671022706412e-06, - "loss": 1.0452, - "step": 1182 - }, - { - "epoch": 0.1422473396260446, - "grad_norm": 2.010675548692373, - "learning_rate": 3.869394282813092e-06, - "loss": 0.8823, - "step": 1183 - }, - { - "epoch": 0.1423675825166837, - "grad_norm": 2.2537412268985686, - "learning_rate": 3.869117259334147e-06, - "loss": 1.1229, - "step": 1184 - }, - { - "epoch": 0.1424878254073228, - "grad_norm": 1.7866051468055388, - "learning_rate": 3.868839952311599e-06, - "loss": 1.0546, - "step": 1185 - }, - { - "epoch": 0.14260806829796188, - "grad_norm": 2.0857977088063375, - "learning_rate": 3.868562361787516e-06, - "loss": 1.0339, - "step": 1186 - }, - { - "epoch": 0.14272831118860096, - "grad_norm": 1.7227420569261518, - "learning_rate": 3.868284487804009e-06, - "loss": 0.927, - "step": 1187 - }, - { - "epoch": 0.14284855407924008, - "grad_norm": 1.5713772177045984, - "learning_rate": 3.86800633040323e-06, - "loss": 1.0104, - "step": 1188 - }, - { - "epoch": 0.14296879696987916, - "grad_norm": 2.0746029715276695, - "learning_rate": 3.867727889627376e-06, - "loss": 1.0159, - "step": 1189 - }, - { - "epoch": 0.14308903986051824, - "grad_norm": 2.0487109388033993, - "learning_rate": 3.867449165518687e-06, - "loss": 1.0159, - "step": 1190 - }, - { - "epoch": 0.14320928275115732, - "grad_norm": 2.0015069529488616, - "learning_rate": 3.867170158119444e-06, - "loss": 0.9382, - "step": 1191 - }, - { - "epoch": 0.14332952564179643, - "grad_norm": 1.9269596231978536, - "learning_rate": 3.866890867471972e-06, - "loss": 0.9812, - "step": 1192 - }, - { - "epoch": 0.14344976853243552, - "grad_norm": 2.5573973675939308, - "learning_rate": 3.86661129361864e-06, - "loss": 1.1263, - "step": 1193 - }, - { - "epoch": 0.1435700114230746, - "grad_norm": 14.34406700698171, - "learning_rate": 3.866331436601859e-06, - "loss": 1.0933, - "step": 1194 - }, - { - "epoch": 0.1436902543137137, - "grad_norm": 1.9008911979036833, - "learning_rate": 3.866051296464083e-06, - "loss": 0.9657, - "step": 1195 - }, - { - "epoch": 0.1438104972043528, - "grad_norm": 1.9775176624034987, - "learning_rate": 3.86577087324781e-06, - "loss": 1.0744, - "step": 1196 - }, - { - "epoch": 0.14393074009499188, - "grad_norm": 2.16513596775124, - "learning_rate": 3.865490166995578e-06, - "loss": 1.0007, - "step": 1197 - }, - { - "epoch": 0.144050982985631, - "grad_norm": 2.332696714293666, - "learning_rate": 3.86520917774997e-06, - "loss": 1.0626, - "step": 1198 - }, - { - "epoch": 0.14417122587627007, - "grad_norm": 2.0418098036227375, - "learning_rate": 3.864927905553614e-06, - "loss": 0.9799, - "step": 1199 - }, - { - "epoch": 0.14429146876690915, - "grad_norm": 1.5024636223272183, - "learning_rate": 3.8646463504491765e-06, - "loss": 1.1179, - "step": 1200 - }, - { - "epoch": 0.14441171165754824, - "grad_norm": 1.7875430388704516, - "learning_rate": 3.8643645124793705e-06, - "loss": 1.0678, - "step": 1201 - }, - { - "epoch": 0.14453195454818735, - "grad_norm": 1.559201382621438, - "learning_rate": 3.8640823916869515e-06, - "loss": 0.9798, - "step": 1202 - }, - { - "epoch": 0.14465219743882643, - "grad_norm": 1.5705107758257704, - "learning_rate": 3.863799988114714e-06, - "loss": 1.0119, - "step": 1203 - }, - { - "epoch": 0.1447724403294655, - "grad_norm": 5.828907441089377, - "learning_rate": 3.863517301805502e-06, - "loss": 0.9324, - "step": 1204 - }, - { - "epoch": 0.14489268322010462, - "grad_norm": 2.283756290253828, - "learning_rate": 3.863234332802196e-06, - "loss": 1.1978, - "step": 1205 - }, - { - "epoch": 0.1450129261107437, - "grad_norm": 2.2979782465145004, - "learning_rate": 3.862951081147723e-06, - "loss": 0.9705, - "step": 1206 - }, - { - "epoch": 0.1451331690013828, - "grad_norm": 2.4718251451174025, - "learning_rate": 3.862667546885053e-06, - "loss": 1.0115, - "step": 1207 - }, - { - "epoch": 0.14525341189202187, - "grad_norm": 2.520647993183894, - "learning_rate": 3.8623837300571965e-06, - "loss": 0.9592, - "step": 1208 - }, - { - "epoch": 0.14537365478266098, - "grad_norm": 2.3739607695605667, - "learning_rate": 3.8620996307072085e-06, - "loss": 1.064, - "step": 1209 - }, - { - "epoch": 0.14549389767330007, - "grad_norm": 2.05139860034533, - "learning_rate": 3.861815248878188e-06, - "loss": 0.8722, - "step": 1210 - }, - { - "epoch": 0.14561414056393915, - "grad_norm": 2.0367595192970183, - "learning_rate": 3.861530584613274e-06, - "loss": 1.0302, - "step": 1211 - }, - { - "epoch": 0.14573438345457826, - "grad_norm": 2.632406050553416, - "learning_rate": 3.86124563795565e-06, - "loss": 1.0475, - "step": 1212 - }, - { - "epoch": 0.14585462634521734, - "grad_norm": 1.8110681336939236, - "learning_rate": 3.860960408948543e-06, - "loss": 0.937, - "step": 1213 - }, - { - "epoch": 0.14597486923585642, - "grad_norm": 2.1298037815604562, - "learning_rate": 3.860674897635222e-06, - "loss": 1.1281, - "step": 1214 - }, - { - "epoch": 0.1460951121264955, - "grad_norm": 1.8314731943604217, - "learning_rate": 3.860389104058998e-06, - "loss": 1.0607, - "step": 1215 - }, - { - "epoch": 0.14621535501713462, - "grad_norm": 2.061368227148053, - "learning_rate": 3.860103028263227e-06, - "loss": 0.9542, - "step": 1216 - }, - { - "epoch": 0.1463355979077737, - "grad_norm": 2.2426487252239338, - "learning_rate": 3.859816670291304e-06, - "loss": 0.9184, - "step": 1217 - }, - { - "epoch": 0.14645584079841278, - "grad_norm": 1.961096525393174, - "learning_rate": 3.859530030186672e-06, - "loss": 1.1263, - "step": 1218 - }, - { - "epoch": 0.1465760836890519, - "grad_norm": 2.1927457853065166, - "learning_rate": 3.859243107992813e-06, - "loss": 1.0531, - "step": 1219 - }, - { - "epoch": 0.14669632657969098, - "grad_norm": 2.8135619807625147, - "learning_rate": 3.858955903753252e-06, - "loss": 1.0148, - "step": 1220 - }, - { - "epoch": 0.14681656947033006, - "grad_norm": 1.35014537982165, - "learning_rate": 3.858668417511559e-06, - "loss": 1.0602, - "step": 1221 - }, - { - "epoch": 0.14693681236096917, - "grad_norm": 2.361189289918004, - "learning_rate": 3.8583806493113445e-06, - "loss": 0.9876, - "step": 1222 - }, - { - "epoch": 0.14705705525160825, - "grad_norm": 1.7583628153325577, - "learning_rate": 3.858092599196263e-06, - "loss": 1.051, - "step": 1223 - }, - { - "epoch": 0.14717729814224734, - "grad_norm": 2.090121527063507, - "learning_rate": 3.857804267210012e-06, - "loss": 1.0545, - "step": 1224 - }, - { - "epoch": 0.14729754103288642, - "grad_norm": 1.8782049761864148, - "learning_rate": 3.857515653396331e-06, - "loss": 1.1079, - "step": 1225 - }, - { - "epoch": 0.14741778392352553, - "grad_norm": 2.003834029712549, - "learning_rate": 3.857226757799002e-06, - "loss": 1.0901, - "step": 1226 - }, - { - "epoch": 0.1475380268141646, - "grad_norm": 2.10690966047748, - "learning_rate": 3.85693758046185e-06, - "loss": 0.9634, - "step": 1227 - }, - { - "epoch": 0.1476582697048037, - "grad_norm": 1.6564707339044975, - "learning_rate": 3.8566481214287435e-06, - "loss": 1.0584, - "step": 1228 - }, - { - "epoch": 0.1477785125954428, - "grad_norm": 1.7858648250340974, - "learning_rate": 3.8563583807435935e-06, - "loss": 1.1294, - "step": 1229 - }, - { - "epoch": 0.1478987554860819, - "grad_norm": 1.6846978269220534, - "learning_rate": 3.856068358450353e-06, - "loss": 1.0008, - "step": 1230 - }, - { - "epoch": 0.14801899837672097, - "grad_norm": 1.8012440514979198, - "learning_rate": 3.8557780545930186e-06, - "loss": 1.0796, - "step": 1231 - }, - { - "epoch": 0.14813924126736006, - "grad_norm": 1.6538573923223865, - "learning_rate": 3.855487469215628e-06, - "loss": 1.0187, - "step": 1232 - }, - { - "epoch": 0.14825948415799917, - "grad_norm": 1.9950515205977708, - "learning_rate": 3.855196602362264e-06, - "loss": 0.9507, - "step": 1233 - }, - { - "epoch": 0.14837972704863825, - "grad_norm": 2.369420877031134, - "learning_rate": 3.854905454077051e-06, - "loss": 1.1738, - "step": 1234 - }, - { - "epoch": 0.14849996993927733, - "grad_norm": 1.6619993502894486, - "learning_rate": 3.854614024404155e-06, - "loss": 1.1026, - "step": 1235 - }, - { - "epoch": 0.14862021282991644, - "grad_norm": 1.7673571706685038, - "learning_rate": 3.8543223133877865e-06, - "loss": 1.1266, - "step": 1236 - }, - { - "epoch": 0.14874045572055553, - "grad_norm": 2.2399018763440095, - "learning_rate": 3.854030321072198e-06, - "loss": 1.1078, - "step": 1237 - }, - { - "epoch": 0.1488606986111946, - "grad_norm": 1.874993387846414, - "learning_rate": 3.853738047501682e-06, - "loss": 0.9596, - "step": 1238 - }, - { - "epoch": 0.1489809415018337, - "grad_norm": 2.026485548124588, - "learning_rate": 3.85344549272058e-06, - "loss": 1.0075, - "step": 1239 - }, - { - "epoch": 0.1491011843924728, - "grad_norm": 1.6764827940410902, - "learning_rate": 3.853152656773269e-06, - "loss": 1.0559, - "step": 1240 - }, - { - "epoch": 0.14922142728311188, - "grad_norm": 1.76208566705936, - "learning_rate": 3.852859539704174e-06, - "loss": 1.0663, - "step": 1241 - }, - { - "epoch": 0.14934167017375097, - "grad_norm": 2.23080094464169, - "learning_rate": 3.85256614155776e-06, - "loss": 0.9879, - "step": 1242 - }, - { - "epoch": 0.14946191306439008, - "grad_norm": 2.1871340854323296, - "learning_rate": 3.852272462378535e-06, - "loss": 0.9735, - "step": 1243 - }, - { - "epoch": 0.14958215595502916, - "grad_norm": 1.808367853084234, - "learning_rate": 3.85197850221105e-06, - "loss": 1.0147, - "step": 1244 - }, - { - "epoch": 0.14970239884566824, - "grad_norm": 1.7749456639099586, - "learning_rate": 3.851684261099899e-06, - "loss": 0.9879, - "step": 1245 - }, - { - "epoch": 0.14982264173630733, - "grad_norm": 1.9375110748958928, - "learning_rate": 3.851389739089718e-06, - "loss": 1.0946, - "step": 1246 - }, - { - "epoch": 0.14994288462694644, - "grad_norm": 1.8532687666964414, - "learning_rate": 3.851094936225186e-06, - "loss": 1.0346, - "step": 1247 - }, - { - "epoch": 0.15006312751758552, - "grad_norm": 1.5164640011764254, - "learning_rate": 3.850799852551024e-06, - "loss": 1.0032, - "step": 1248 - }, - { - "epoch": 0.1501833704082246, - "grad_norm": 2.0890495716456727, - "learning_rate": 3.850504488111995e-06, - "loss": 1.0864, - "step": 1249 - }, - { - "epoch": 0.15030361329886371, - "grad_norm": 1.6190958603582732, - "learning_rate": 3.850208842952907e-06, - "loss": 1.0523, - "step": 1250 - }, - { - "epoch": 0.1504238561895028, - "grad_norm": 1.800660896830593, - "learning_rate": 3.849912917118608e-06, - "loss": 1.0189, - "step": 1251 - }, - { - "epoch": 0.15054409908014188, - "grad_norm": 0.9688153706306211, - "learning_rate": 3.849616710653992e-06, - "loss": 0.8593, - "step": 1252 - }, - { - "epoch": 0.150664341970781, - "grad_norm": 1.8039602820894032, - "learning_rate": 3.84932022360399e-06, - "loss": 0.9828, - "step": 1253 - }, - { - "epoch": 0.15078458486142007, - "grad_norm": 2.173319055221712, - "learning_rate": 3.849023456013581e-06, - "loss": 1.0657, - "step": 1254 - }, - { - "epoch": 0.15090482775205916, - "grad_norm": 1.8122972177655159, - "learning_rate": 3.848726407927784e-06, - "loss": 0.8449, - "step": 1255 - }, - { - "epoch": 0.15102507064269824, - "grad_norm": 2.335364434108753, - "learning_rate": 3.84842907939166e-06, - "loss": 1.1037, - "step": 1256 - }, - { - "epoch": 0.15114531353333735, - "grad_norm": 2.7749217185586366, - "learning_rate": 3.8481314704503146e-06, - "loss": 0.9391, - "step": 1257 - }, - { - "epoch": 0.15126555642397643, - "grad_norm": 2.1300372837725705, - "learning_rate": 3.847833581148895e-06, - "loss": 1.1153, - "step": 1258 - }, - { - "epoch": 0.15138579931461552, - "grad_norm": 2.4581647481064013, - "learning_rate": 3.84753541153259e-06, - "loss": 1.0324, - "step": 1259 - }, - { - "epoch": 0.15150604220525463, - "grad_norm": 1.4290993736743522, - "learning_rate": 3.847236961646633e-06, - "loss": 1.0651, - "step": 1260 - }, - { - "epoch": 0.1516262850958937, - "grad_norm": 1.9861256243954275, - "learning_rate": 3.846938231536296e-06, - "loss": 1.0131, - "step": 1261 - }, - { - "epoch": 0.1517465279865328, - "grad_norm": 1.748581038541236, - "learning_rate": 3.8466392212468995e-06, - "loss": 1.0377, - "step": 1262 - }, - { - "epoch": 0.15186677087717187, - "grad_norm": 0.8023333510752941, - "learning_rate": 3.8463399308238e-06, - "loss": 0.8783, - "step": 1263 - }, - { - "epoch": 0.15198701376781099, - "grad_norm": 1.601176666676968, - "learning_rate": 3.846040360312402e-06, - "loss": 0.8691, - "step": 1264 - }, - { - "epoch": 0.15210725665845007, - "grad_norm": 1.7624704994275902, - "learning_rate": 3.8457405097581485e-06, - "loss": 1.0412, - "step": 1265 - }, - { - "epoch": 0.15222749954908915, - "grad_norm": 1.674013855287737, - "learning_rate": 3.8454403792065275e-06, - "loss": 1.0095, - "step": 1266 - }, - { - "epoch": 0.15234774243972826, - "grad_norm": 1.7233132560539337, - "learning_rate": 3.845139968703068e-06, - "loss": 1.0827, - "step": 1267 - }, - { - "epoch": 0.15246798533036734, - "grad_norm": 1.6919349742641003, - "learning_rate": 3.844839278293342e-06, - "loss": 1.059, - "step": 1268 - }, - { - "epoch": 0.15258822822100643, - "grad_norm": 1.8699704423904187, - "learning_rate": 3.8445383080229654e-06, - "loss": 0.9875, - "step": 1269 - }, - { - "epoch": 0.1527084711116455, - "grad_norm": 2.0883505361974395, - "learning_rate": 3.844237057937593e-06, - "loss": 0.9674, - "step": 1270 - }, - { - "epoch": 0.15282871400228462, - "grad_norm": 2.371811684227015, - "learning_rate": 3.843935528082926e-06, - "loss": 1.0132, - "step": 1271 - }, - { - "epoch": 0.1529489568929237, - "grad_norm": 1.5754572915959424, - "learning_rate": 3.843633718504704e-06, - "loss": 1.0787, - "step": 1272 - }, - { - "epoch": 0.1530691997835628, - "grad_norm": 2.2361841946723535, - "learning_rate": 3.843331629248715e-06, - "loss": 1.1215, - "step": 1273 - }, - { - "epoch": 0.1531894426742019, - "grad_norm": 2.4645113713347877, - "learning_rate": 3.843029260360782e-06, - "loss": 0.9908, - "step": 1274 - }, - { - "epoch": 0.15330968556484098, - "grad_norm": 1.9542828599212632, - "learning_rate": 3.8427266118867755e-06, - "loss": 1.0138, - "step": 1275 - }, - { - "epoch": 0.15342992845548006, - "grad_norm": 1.8008250729737543, - "learning_rate": 3.842423683872608e-06, - "loss": 1.0545, - "step": 1276 - }, - { - "epoch": 0.15355017134611917, - "grad_norm": 2.751234731046881, - "learning_rate": 3.842120476364232e-06, - "loss": 1.0167, - "step": 1277 - }, - { - "epoch": 0.15367041423675826, - "grad_norm": 2.8065193165981372, - "learning_rate": 3.841816989407644e-06, - "loss": 1.0593, - "step": 1278 - }, - { - "epoch": 0.15379065712739734, - "grad_norm": 2.0618402986592566, - "learning_rate": 3.841513223048884e-06, - "loss": 0.9952, - "step": 1279 - }, - { - "epoch": 0.15391090001803642, - "grad_norm": 2.136555834597024, - "learning_rate": 3.841209177334031e-06, - "loss": 1.0065, - "step": 1280 - }, - { - "epoch": 0.15403114290867553, - "grad_norm": 1.7633198231478187, - "learning_rate": 3.84090485230921e-06, - "loss": 0.9761, - "step": 1281 - }, - { - "epoch": 0.15415138579931462, - "grad_norm": 2.596237644245131, - "learning_rate": 3.840600248020588e-06, - "loss": 0.9927, - "step": 1282 - }, - { - "epoch": 0.1542716286899537, - "grad_norm": 2.0739741351759684, - "learning_rate": 3.840295364514371e-06, - "loss": 1.0256, - "step": 1283 - }, - { - "epoch": 0.1543918715805928, - "grad_norm": 2.438795870100757, - "learning_rate": 3.83999020183681e-06, - "loss": 1.0091, - "step": 1284 - }, - { - "epoch": 0.1545121144712319, - "grad_norm": 2.344493900498591, - "learning_rate": 3.839684760034199e-06, - "loss": 1.0073, - "step": 1285 - }, - { - "epoch": 0.15463235736187098, - "grad_norm": 2.1817137348216034, - "learning_rate": 3.8393790391528716e-06, - "loss": 0.8787, - "step": 1286 - }, - { - "epoch": 0.15475260025251006, - "grad_norm": 2.402805587911419, - "learning_rate": 3.8390730392392075e-06, - "loss": 1.1118, - "step": 1287 - }, - { - "epoch": 0.15487284314314917, - "grad_norm": 1.8759727497846472, - "learning_rate": 3.838766760339626e-06, - "loss": 1.0203, - "step": 1288 - }, - { - "epoch": 0.15499308603378825, - "grad_norm": 2.4501931717681957, - "learning_rate": 3.838460202500587e-06, - "loss": 1.0219, - "step": 1289 - }, - { - "epoch": 0.15511332892442733, - "grad_norm": 2.072282882748002, - "learning_rate": 3.838153365768599e-06, - "loss": 0.9691, - "step": 1290 - }, - { - "epoch": 0.15523357181506645, - "grad_norm": 2.336650828780819, - "learning_rate": 3.837846250190206e-06, - "loss": 0.9762, - "step": 1291 - }, - { - "epoch": 0.15535381470570553, - "grad_norm": 1.964595953481787, - "learning_rate": 3.837538855811998e-06, - "loss": 0.9938, - "step": 1292 - }, - { - "epoch": 0.1554740575963446, - "grad_norm": 2.2424313083704503, - "learning_rate": 3.837231182680606e-06, - "loss": 0.9328, - "step": 1293 - }, - { - "epoch": 0.1555943004869837, - "grad_norm": 3.9640145472963084, - "learning_rate": 3.836923230842706e-06, - "loss": 0.991, - "step": 1294 - }, - { - "epoch": 0.1557145433776228, - "grad_norm": 1.9309622689131096, - "learning_rate": 3.836615000345011e-06, - "loss": 1.0408, - "step": 1295 - }, - { - "epoch": 0.1558347862682619, - "grad_norm": 1.9607429559871241, - "learning_rate": 3.836306491234282e-06, - "loss": 1.0063, - "step": 1296 - }, - { - "epoch": 0.15595502915890097, - "grad_norm": 2.795264776855583, - "learning_rate": 3.835997703557317e-06, - "loss": 0.97, - "step": 1297 - }, - { - "epoch": 0.15607527204954008, - "grad_norm": 1.4491340221715479, - "learning_rate": 3.83568863736096e-06, - "loss": 1.033, - "step": 1298 - }, - { - "epoch": 0.15619551494017916, - "grad_norm": 2.3495378364460016, - "learning_rate": 3.8353792926920975e-06, - "loss": 1.1169, - "step": 1299 - }, - { - "epoch": 0.15631575783081825, - "grad_norm": 2.1108671209093743, - "learning_rate": 3.835069669597655e-06, - "loss": 1.045, - "step": 1300 - }, - { - "epoch": 0.15643600072145733, - "grad_norm": 2.303756474088947, - "learning_rate": 3.834759768124603e-06, - "loss": 1.0241, - "step": 1301 - }, - { - "epoch": 0.15655624361209644, - "grad_norm": 2.5640156613197433, - "learning_rate": 3.834449588319953e-06, - "loss": 0.9906, - "step": 1302 - }, - { - "epoch": 0.15667648650273552, - "grad_norm": 1.7035027881148272, - "learning_rate": 3.834139130230758e-06, - "loss": 1.0663, - "step": 1303 - }, - { - "epoch": 0.1567967293933746, - "grad_norm": 1.4414741479445634, - "learning_rate": 3.833828393904117e-06, - "loss": 1.0455, - "step": 1304 - }, - { - "epoch": 0.15691697228401372, - "grad_norm": 2.116360097996579, - "learning_rate": 3.833517379387165e-06, - "loss": 1.0032, - "step": 1305 - }, - { - "epoch": 0.1570372151746528, - "grad_norm": 1.6173192214764234, - "learning_rate": 3.833206086727085e-06, - "loss": 1.1205, - "step": 1306 - }, - { - "epoch": 0.15715745806529188, - "grad_norm": 2.0599313404159334, - "learning_rate": 3.8328945159710994e-06, - "loss": 0.9423, - "step": 1307 - }, - { - "epoch": 0.157277700955931, - "grad_norm": 1.9517052945626117, - "learning_rate": 3.832582667166473e-06, - "loss": 1.1105, - "step": 1308 - }, - { - "epoch": 0.15739794384657008, - "grad_norm": 1.9311259232943, - "learning_rate": 3.8322705403605125e-06, - "loss": 1.0528, - "step": 1309 - }, - { - "epoch": 0.15751818673720916, - "grad_norm": 2.220736621614127, - "learning_rate": 3.831958135600568e-06, - "loss": 1.0501, - "step": 1310 - }, - { - "epoch": 0.15763842962784824, - "grad_norm": 1.7989120215319476, - "learning_rate": 3.831645452934032e-06, - "loss": 1.0328, - "step": 1311 - }, - { - "epoch": 0.15775867251848735, - "grad_norm": 1.601004452129133, - "learning_rate": 3.831332492408336e-06, - "loss": 1.0374, - "step": 1312 - }, - { - "epoch": 0.15787891540912644, - "grad_norm": 1.7296314151151744, - "learning_rate": 3.831019254070957e-06, - "loss": 0.9143, - "step": 1313 - }, - { - "epoch": 0.15799915829976552, - "grad_norm": 2.5649393148103266, - "learning_rate": 3.8307057379694135e-06, - "loss": 1.1782, - "step": 1314 - }, - { - "epoch": 0.15811940119040463, - "grad_norm": 2.18753672977674, - "learning_rate": 3.830391944151264e-06, - "loss": 1.0497, - "step": 1315 - }, - { - "epoch": 0.1582396440810437, - "grad_norm": 1.6430818599524002, - "learning_rate": 3.830077872664114e-06, - "loss": 0.8923, - "step": 1316 - }, - { - "epoch": 0.1583598869716828, - "grad_norm": 1.7821228833284641, - "learning_rate": 3.829763523555604e-06, - "loss": 0.9551, - "step": 1317 - }, - { - "epoch": 0.15848012986232188, - "grad_norm": 1.9997309861460153, - "learning_rate": 3.829448896873423e-06, - "loss": 1.0248, - "step": 1318 - }, - { - "epoch": 0.158600372752961, - "grad_norm": 1.8535104797742075, - "learning_rate": 3.829133992665299e-06, - "loss": 1.0168, - "step": 1319 - }, - { - "epoch": 0.15872061564360007, - "grad_norm": 2.105941691034293, - "learning_rate": 3.828818810979002e-06, - "loss": 1.1166, - "step": 1320 - }, - { - "epoch": 0.15884085853423915, - "grad_norm": 4.051978939382004, - "learning_rate": 3.8285033518623454e-06, - "loss": 1.025, - "step": 1321 - }, - { - "epoch": 0.15896110142487826, - "grad_norm": 2.150700157956379, - "learning_rate": 3.8281876153631845e-06, - "loss": 1.0475, - "step": 1322 - }, - { - "epoch": 0.15908134431551735, - "grad_norm": 3.316579214377624, - "learning_rate": 3.827871601529416e-06, - "loss": 0.8742, - "step": 1323 - }, - { - "epoch": 0.15920158720615643, - "grad_norm": 1.7795961967806606, - "learning_rate": 3.827555310408979e-06, - "loss": 1.03, - "step": 1324 - }, - { - "epoch": 0.1593218300967955, - "grad_norm": 1.4361041382243822, - "learning_rate": 3.827238742049854e-06, - "loss": 1.0527, - "step": 1325 - }, - { - "epoch": 0.15944207298743462, - "grad_norm": 1.9295103169383123, - "learning_rate": 3.826921896500066e-06, - "loss": 0.7554, - "step": 1326 - }, - { - "epoch": 0.1595623158780737, - "grad_norm": 1.7087026832576018, - "learning_rate": 3.826604773807678e-06, - "loss": 1.0154, - "step": 1327 - }, - { - "epoch": 0.1596825587687128, - "grad_norm": 2.687479773156987, - "learning_rate": 3.826287374020798e-06, - "loss": 0.9578, - "step": 1328 - }, - { - "epoch": 0.1598028016593519, - "grad_norm": 1.9169679349666195, - "learning_rate": 3.825969697187575e-06, - "loss": 1.0565, - "step": 1329 - }, - { - "epoch": 0.15992304454999098, - "grad_norm": 1.673777344688987, - "learning_rate": 3.8256517433562015e-06, - "loss": 0.932, - "step": 1330 - }, - { - "epoch": 0.16004328744063007, - "grad_norm": 5.8610908945868925, - "learning_rate": 3.82533351257491e-06, - "loss": 1.1579, - "step": 1331 - }, - { - "epoch": 0.16016353033126918, - "grad_norm": 1.6836499693792855, - "learning_rate": 3.825015004891975e-06, - "loss": 1.1155, - "step": 1332 - }, - { - "epoch": 0.16028377322190826, - "grad_norm": 2.7409553911544835, - "learning_rate": 3.824696220355716e-06, - "loss": 0.9846, - "step": 1333 - }, - { - "epoch": 0.16040401611254734, - "grad_norm": 1.6423521124512834, - "learning_rate": 3.824377159014491e-06, - "loss": 1.0243, - "step": 1334 - }, - { - "epoch": 0.16052425900318643, - "grad_norm": 1.538456056658442, - "learning_rate": 3.824057820916702e-06, - "loss": 1.0806, - "step": 1335 - }, - { - "epoch": 0.16064450189382554, - "grad_norm": 2.0404939851866173, - "learning_rate": 3.8237382061107904e-06, - "loss": 0.9474, - "step": 1336 - }, - { - "epoch": 0.16076474478446462, - "grad_norm": 2.0039832265840998, - "learning_rate": 3.823418314645243e-06, - "loss": 1.0164, - "step": 1337 - }, - { - "epoch": 0.1608849876751037, - "grad_norm": 2.7013202441683872, - "learning_rate": 3.823098146568588e-06, - "loss": 0.9879, - "step": 1338 - }, - { - "epoch": 0.1610052305657428, - "grad_norm": 1.6043468555794949, - "learning_rate": 3.822777701929394e-06, - "loss": 0.948, - "step": 1339 - }, - { - "epoch": 0.1611254734563819, - "grad_norm": 1.9982121344245152, - "learning_rate": 3.8224569807762714e-06, - "loss": 0.9735, - "step": 1340 - }, - { - "epoch": 0.16124571634702098, - "grad_norm": 1.9226211712057968, - "learning_rate": 3.822135983157873e-06, - "loss": 1.0076, - "step": 1341 - }, - { - "epoch": 0.16136595923766006, - "grad_norm": 1.6881473853594788, - "learning_rate": 3.821814709122896e-06, - "loss": 1.0674, - "step": 1342 - }, - { - "epoch": 0.16148620212829917, - "grad_norm": 2.0235372982673803, - "learning_rate": 3.821493158720076e-06, - "loss": 1.0802, - "step": 1343 - }, - { - "epoch": 0.16160644501893826, - "grad_norm": 3.753698051466643, - "learning_rate": 3.821171331998191e-06, - "loss": 0.9697, - "step": 1344 - }, - { - "epoch": 0.16172668790957734, - "grad_norm": 0.7976830817033507, - "learning_rate": 3.820849229006064e-06, - "loss": 0.8233, - "step": 1345 - }, - { - "epoch": 0.16184693080021645, - "grad_norm": 2.430311803554, - "learning_rate": 3.8205268497925564e-06, - "loss": 0.9415, - "step": 1346 - }, - { - "epoch": 0.16196717369085553, - "grad_norm": 1.9927618657319859, - "learning_rate": 3.8202041944065725e-06, - "loss": 1.0041, - "step": 1347 - }, - { - "epoch": 0.16208741658149461, - "grad_norm": 1.9668702734062198, - "learning_rate": 3.819881262897061e-06, - "loss": 0.9705, - "step": 1348 - }, - { - "epoch": 0.1622076594721337, - "grad_norm": 1.8922653648427, - "learning_rate": 3.819558055313008e-06, - "loss": 0.965, - "step": 1349 - }, - { - "epoch": 0.1623279023627728, - "grad_norm": 2.1805199568232556, - "learning_rate": 3.819234571703444e-06, - "loss": 0.9982, - "step": 1350 - }, - { - "epoch": 0.1624481452534119, - "grad_norm": 3.037066669715834, - "learning_rate": 3.8189108121174435e-06, - "loss": 1.086, - "step": 1351 - }, - { - "epoch": 0.16256838814405097, - "grad_norm": 1.6064280964123079, - "learning_rate": 3.818586776604118e-06, - "loss": 1.0669, - "step": 1352 - }, - { - "epoch": 0.16268863103469008, - "grad_norm": 1.7342170437203714, - "learning_rate": 3.818262465212625e-06, - "loss": 0.8466, - "step": 1353 - }, - { - "epoch": 0.16280887392532917, - "grad_norm": 1.9134455095142504, - "learning_rate": 3.817937877992161e-06, - "loss": 0.9977, - "step": 1354 - }, - { - "epoch": 0.16292911681596825, - "grad_norm": 2.26515466477788, - "learning_rate": 3.817613014991967e-06, - "loss": 1.0752, - "step": 1355 - }, - { - "epoch": 0.16304935970660733, - "grad_norm": 1.9149644244801354, - "learning_rate": 3.817287876261323e-06, - "loss": 0.9912, - "step": 1356 - }, - { - "epoch": 0.16316960259724644, - "grad_norm": 1.7292618169994323, - "learning_rate": 3.816962461849553e-06, - "loss": 1.0322, - "step": 1357 - }, - { - "epoch": 0.16328984548788553, - "grad_norm": 1.7798934589604638, - "learning_rate": 3.8166367718060235e-06, - "loss": 1.0642, - "step": 1358 - }, - { - "epoch": 0.1634100883785246, - "grad_norm": 3.0118150114318007, - "learning_rate": 3.816310806180139e-06, - "loss": 0.9825, - "step": 1359 - }, - { - "epoch": 0.16353033126916372, - "grad_norm": 1.4574997381403116, - "learning_rate": 3.81598456502135e-06, - "loss": 1.0339, - "step": 1360 - }, - { - "epoch": 0.1636505741598028, - "grad_norm": 1.8250286256818289, - "learning_rate": 3.8156580483791455e-06, - "loss": 1.0986, - "step": 1361 - }, - { - "epoch": 0.16377081705044189, - "grad_norm": 1.9356640146109336, - "learning_rate": 3.815331256303059e-06, - "loss": 0.99, - "step": 1362 - }, - { - "epoch": 0.163891059941081, - "grad_norm": 2.0663425601597947, - "learning_rate": 3.815004188842665e-06, - "loss": 1.0006, - "step": 1363 - }, - { - "epoch": 0.16401130283172008, - "grad_norm": 1.746796878166545, - "learning_rate": 3.814676846047578e-06, - "loss": 1.0233, - "step": 1364 - }, - { - "epoch": 0.16413154572235916, - "grad_norm": 1.6972819899284772, - "learning_rate": 3.8143492279674565e-06, - "loss": 0.9309, - "step": 1365 - }, - { - "epoch": 0.16425178861299825, - "grad_norm": 0.9327321565874697, - "learning_rate": 3.8140213346519997e-06, - "loss": 0.8889, - "step": 1366 - }, - { - "epoch": 0.16437203150363736, - "grad_norm": 1.998576790352431, - "learning_rate": 3.813693166150948e-06, - "loss": 0.9977, - "step": 1367 - }, - { - "epoch": 0.16449227439427644, - "grad_norm": 2.244675588298697, - "learning_rate": 3.813364722514086e-06, - "loss": 1.0832, - "step": 1368 - }, - { - "epoch": 0.16461251728491552, - "grad_norm": 2.0445407049465536, - "learning_rate": 3.8130360037912368e-06, - "loss": 1.0389, - "step": 1369 - }, - { - "epoch": 0.16473276017555463, - "grad_norm": 4.295285350482017, - "learning_rate": 3.812707010032268e-06, - "loss": 1.0441, - "step": 1370 - }, - { - "epoch": 0.16485300306619372, - "grad_norm": 1.599108495298655, - "learning_rate": 3.8123777412870863e-06, - "loss": 1.0192, - "step": 1371 - }, - { - "epoch": 0.1649732459568328, - "grad_norm": 1.977045173665331, - "learning_rate": 3.812048197605643e-06, - "loss": 1.0098, - "step": 1372 - }, - { - "epoch": 0.16509348884747188, - "grad_norm": 1.866496129704082, - "learning_rate": 3.8117183790379277e-06, - "loss": 1.0344, - "step": 1373 - }, - { - "epoch": 0.165213731738111, - "grad_norm": 3.2590805257170175, - "learning_rate": 3.811388285633976e-06, - "loss": 1.1645, - "step": 1374 - }, - { - "epoch": 0.16533397462875007, - "grad_norm": 2.289368280291036, - "learning_rate": 3.811057917443861e-06, - "loss": 0.8476, - "step": 1375 - }, - { - "epoch": 0.16545421751938916, - "grad_norm": 0.8766892679791821, - "learning_rate": 3.8107272745177e-06, - "loss": 0.9361, - "step": 1376 - }, - { - "epoch": 0.16557446041002827, - "grad_norm": 1.5989297207768836, - "learning_rate": 3.8103963569056513e-06, - "loss": 1.0254, - "step": 1377 - }, - { - "epoch": 0.16569470330066735, - "grad_norm": 1.4648991688735544, - "learning_rate": 3.8100651646579146e-06, - "loss": 1.1099, - "step": 1378 - }, - { - "epoch": 0.16581494619130643, - "grad_norm": 2.0102903757963753, - "learning_rate": 3.8097336978247317e-06, - "loss": 1.1565, - "step": 1379 - }, - { - "epoch": 0.16593518908194552, - "grad_norm": 4.642574427454354, - "learning_rate": 3.8094019564563854e-06, - "loss": 1.1224, - "step": 1380 - }, - { - "epoch": 0.16605543197258463, - "grad_norm": 2.0705006639875183, - "learning_rate": 3.809069940603201e-06, - "loss": 0.9965, - "step": 1381 - }, - { - "epoch": 0.1661756748632237, - "grad_norm": 2.7634870811120402, - "learning_rate": 3.8087376503155452e-06, - "loss": 1.0006, - "step": 1382 - }, - { - "epoch": 0.1662959177538628, - "grad_norm": 0.9606799889376924, - "learning_rate": 3.808405085643826e-06, - "loss": 0.8386, - "step": 1383 - }, - { - "epoch": 0.1664161606445019, - "grad_norm": 2.0942060059408205, - "learning_rate": 3.8080722466384925e-06, - "loss": 1.1273, - "step": 1384 - }, - { - "epoch": 0.166536403535141, - "grad_norm": 2.4598682805594767, - "learning_rate": 3.8077391333500376e-06, - "loss": 0.9308, - "step": 1385 - }, - { - "epoch": 0.16665664642578007, - "grad_norm": 1.5084181443848141, - "learning_rate": 3.8074057458289934e-06, - "loss": 0.9898, - "step": 1386 - }, - { - "epoch": 0.16677688931641918, - "grad_norm": 1.786711768439847, - "learning_rate": 3.807072084125934e-06, - "loss": 1.0432, - "step": 1387 - }, - { - "epoch": 0.16689713220705826, - "grad_norm": 2.3566915649796623, - "learning_rate": 3.806738148291477e-06, - "loss": 1.0299, - "step": 1388 - }, - { - "epoch": 0.16701737509769735, - "grad_norm": 1.862897725490701, - "learning_rate": 3.8064039383762793e-06, - "loss": 0.9446, - "step": 1389 - }, - { - "epoch": 0.16713761798833643, - "grad_norm": 2.175723939753806, - "learning_rate": 3.8060694544310396e-06, - "loss": 0.9932, - "step": 1390 - }, - { - "epoch": 0.16725786087897554, - "grad_norm": 1.6414150697161607, - "learning_rate": 3.8057346965065006e-06, - "loss": 1.0146, - "step": 1391 - }, - { - "epoch": 0.16737810376961462, - "grad_norm": 1.4550465010720388, - "learning_rate": 3.805399664653443e-06, - "loss": 1.0785, - "step": 1392 - }, - { - "epoch": 0.1674983466602537, - "grad_norm": 4.348609895129653, - "learning_rate": 3.805064358922692e-06, - "loss": 0.9829, - "step": 1393 - }, - { - "epoch": 0.16761858955089282, - "grad_norm": 1.5897879625884739, - "learning_rate": 3.8047287793651136e-06, - "loss": 1.0302, - "step": 1394 - }, - { - "epoch": 0.1677388324415319, - "grad_norm": 2.952668977273861, - "learning_rate": 3.8043929260316137e-06, - "loss": 1.1077, - "step": 1395 - }, - { - "epoch": 0.16785907533217098, - "grad_norm": 1.919472285705324, - "learning_rate": 3.8040567989731417e-06, - "loss": 1.0629, - "step": 1396 - }, - { - "epoch": 0.16797931822281006, - "grad_norm": 1.8687662768213267, - "learning_rate": 3.8037203982406876e-06, - "loss": 1.0256, - "step": 1397 - }, - { - "epoch": 0.16809956111344918, - "grad_norm": 5.4305926769101145, - "learning_rate": 3.8033837238852835e-06, - "loss": 0.9599, - "step": 1398 - }, - { - "epoch": 0.16821980400408826, - "grad_norm": 1.642729581003246, - "learning_rate": 3.8030467759580017e-06, - "loss": 0.9228, - "step": 1399 - }, - { - "epoch": 0.16834004689472734, - "grad_norm": 1.83939053082042, - "learning_rate": 3.802709554509958e-06, - "loss": 1.1026, - "step": 1400 - }, - { - "epoch": 0.16846028978536645, - "grad_norm": 1.7300167828225415, - "learning_rate": 3.8023720595923083e-06, - "loss": 1.0129, - "step": 1401 - }, - { - "epoch": 0.16858053267600553, - "grad_norm": 2.12203301134957, - "learning_rate": 3.80203429125625e-06, - "loss": 1.0999, - "step": 1402 - }, - { - "epoch": 0.16870077556664462, - "grad_norm": 2.2486406034415647, - "learning_rate": 3.8016962495530225e-06, - "loss": 0.9326, - "step": 1403 - }, - { - "epoch": 0.1688210184572837, - "grad_norm": 2.167245335209436, - "learning_rate": 3.8013579345339063e-06, - "loss": 0.9844, - "step": 1404 - }, - { - "epoch": 0.1689412613479228, - "grad_norm": 1.7666272719807308, - "learning_rate": 3.801019346250224e-06, - "loss": 0.9293, - "step": 1405 - }, - { - "epoch": 0.1690615042385619, - "grad_norm": 3.6238025134342062, - "learning_rate": 3.8006804847533395e-06, - "loss": 1.0638, - "step": 1406 - }, - { - "epoch": 0.16918174712920098, - "grad_norm": 2.304885804001692, - "learning_rate": 3.8003413500946556e-06, - "loss": 1.0756, - "step": 1407 - }, - { - "epoch": 0.1693019900198401, - "grad_norm": 4.356074570745266, - "learning_rate": 3.8000019423256216e-06, - "loss": 1.0544, - "step": 1408 - }, - { - "epoch": 0.16942223291047917, - "grad_norm": 1.5752482112698192, - "learning_rate": 3.7996622614977234e-06, - "loss": 1.1046, - "step": 1409 - }, - { - "epoch": 0.16954247580111825, - "grad_norm": 1.8043755596121702, - "learning_rate": 3.799322307662492e-06, - "loss": 1.0237, - "step": 1410 - }, - { - "epoch": 0.16966271869175734, - "grad_norm": 2.2539318486550006, - "learning_rate": 3.798982080871496e-06, - "loss": 1.0696, - "step": 1411 - }, - { - "epoch": 0.16978296158239645, - "grad_norm": 2.033972221852496, - "learning_rate": 3.798641581176349e-06, - "loss": 0.9095, - "step": 1412 - }, - { - "epoch": 0.16990320447303553, - "grad_norm": 2.025372614545947, - "learning_rate": 3.7983008086287044e-06, - "loss": 0.9743, - "step": 1413 - }, - { - "epoch": 0.1700234473636746, - "grad_norm": 1.9548126863663144, - "learning_rate": 3.797959763280257e-06, - "loss": 1.0247, - "step": 1414 - }, - { - "epoch": 0.17014369025431372, - "grad_norm": 1.8663500579767751, - "learning_rate": 3.797618445182743e-06, - "loss": 1.02, - "step": 1415 - }, - { - "epoch": 0.1702639331449528, - "grad_norm": 2.461404326227536, - "learning_rate": 3.79727685438794e-06, - "loss": 1.0712, - "step": 1416 - }, - { - "epoch": 0.1703841760355919, - "grad_norm": 0.922389064931278, - "learning_rate": 3.796934990947667e-06, - "loss": 0.8671, - "step": 1417 - }, - { - "epoch": 0.170504418926231, - "grad_norm": 0.9554263301238112, - "learning_rate": 3.7965928549137854e-06, - "loss": 0.8875, - "step": 1418 - }, - { - "epoch": 0.17062466181687008, - "grad_norm": 1.9104440546377899, - "learning_rate": 3.7962504463381953e-06, - "loss": 0.9976, - "step": 1419 - }, - { - "epoch": 0.17074490470750917, - "grad_norm": 1.5435948127861083, - "learning_rate": 3.7959077652728412e-06, - "loss": 1.0127, - "step": 1420 - }, - { - "epoch": 0.17086514759814825, - "grad_norm": 1.9497930563643118, - "learning_rate": 3.795564811769707e-06, - "loss": 0.9921, - "step": 1421 - }, - { - "epoch": 0.17098539048878736, - "grad_norm": 1.9704841363554333, - "learning_rate": 3.795221585880818e-06, - "loss": 1.0148, - "step": 1422 - }, - { - "epoch": 0.17110563337942644, - "grad_norm": 1.5439612178477857, - "learning_rate": 3.794878087658242e-06, - "loss": 1.1389, - "step": 1423 - }, - { - "epoch": 0.17122587627006552, - "grad_norm": 1.5369352559567393, - "learning_rate": 3.7945343171540873e-06, - "loss": 1.0121, - "step": 1424 - }, - { - "epoch": 0.17134611916070464, - "grad_norm": 1.8941539270677739, - "learning_rate": 3.7941902744205033e-06, - "loss": 1.0171, - "step": 1425 - }, - { - "epoch": 0.17146636205134372, - "grad_norm": 1.9604289698712996, - "learning_rate": 3.7938459595096817e-06, - "loss": 1.0641, - "step": 1426 - }, - { - "epoch": 0.1715866049419828, - "grad_norm": 1.8048279282400912, - "learning_rate": 3.7935013724738545e-06, - "loss": 1.0841, - "step": 1427 - }, - { - "epoch": 0.17170684783262188, - "grad_norm": 1.6993854309412881, - "learning_rate": 3.7931565133652945e-06, - "loss": 1.0089, - "step": 1428 - }, - { - "epoch": 0.171827090723261, - "grad_norm": 2.0048265630428186, - "learning_rate": 3.792811382236317e-06, - "loss": 0.9056, - "step": 1429 - }, - { - "epoch": 0.17194733361390008, - "grad_norm": 1.8190133366234804, - "learning_rate": 3.792465979139279e-06, - "loss": 1.0085, - "step": 1430 - }, - { - "epoch": 0.17206757650453916, - "grad_norm": 1.2428837390459295, - "learning_rate": 3.792120304126576e-06, - "loss": 0.9689, - "step": 1431 - }, - { - "epoch": 0.17218781939517827, - "grad_norm": 1.6264052550486616, - "learning_rate": 3.791774357250649e-06, - "loss": 1.0682, - "step": 1432 - }, - { - "epoch": 0.17230806228581735, - "grad_norm": 3.3835793698555108, - "learning_rate": 3.7914281385639757e-06, - "loss": 1.0227, - "step": 1433 - }, - { - "epoch": 0.17242830517645644, - "grad_norm": 1.7450389022644337, - "learning_rate": 3.7910816481190784e-06, - "loss": 1.017, - "step": 1434 - }, - { - "epoch": 0.17254854806709552, - "grad_norm": 1.8620575859433404, - "learning_rate": 3.7907348859685193e-06, - "loss": 0.9834, - "step": 1435 - }, - { - "epoch": 0.17266879095773463, - "grad_norm": 2.4528250541273393, - "learning_rate": 3.790387852164902e-06, - "loss": 1.0296, - "step": 1436 - }, - { - "epoch": 0.1727890338483737, - "grad_norm": 1.7314477910832513, - "learning_rate": 3.7900405467608707e-06, - "loss": 0.9953, - "step": 1437 - }, - { - "epoch": 0.1729092767390128, - "grad_norm": 4.411966930317346, - "learning_rate": 3.7896929698091114e-06, - "loss": 1.0083, - "step": 1438 - }, - { - "epoch": 0.1730295196296519, - "grad_norm": 2.9388650198121202, - "learning_rate": 3.7893451213623518e-06, - "loss": 0.9267, - "step": 1439 - }, - { - "epoch": 0.173149762520291, - "grad_norm": 1.7506583882654476, - "learning_rate": 3.7889970014733606e-06, - "loss": 1.0482, - "step": 1440 - }, - { - "epoch": 0.17327000541093007, - "grad_norm": 1.9894098402610423, - "learning_rate": 3.7886486101949463e-06, - "loss": 0.9998, - "step": 1441 - }, - { - "epoch": 0.17339024830156918, - "grad_norm": 1.9430189591728013, - "learning_rate": 3.7882999475799594e-06, - "loss": 1.105, - "step": 1442 - }, - { - "epoch": 0.17351049119220827, - "grad_norm": 1.8459036581909891, - "learning_rate": 3.787951013681293e-06, - "loss": 1.0379, - "step": 1443 - }, - { - "epoch": 0.17363073408284735, - "grad_norm": 1.8415559548404576, - "learning_rate": 3.787601808551879e-06, - "loss": 1.0069, - "step": 1444 - }, - { - "epoch": 0.17375097697348643, - "grad_norm": 2.2698441780485394, - "learning_rate": 3.7872523322446926e-06, - "loss": 1.0616, - "step": 1445 - }, - { - "epoch": 0.17387121986412554, - "grad_norm": 1.7173600558774615, - "learning_rate": 3.7869025848127478e-06, - "loss": 0.8334, - "step": 1446 - }, - { - "epoch": 0.17399146275476463, - "grad_norm": 2.4576890607660644, - "learning_rate": 3.786552566309102e-06, - "loss": 1.0313, - "step": 1447 - }, - { - "epoch": 0.1741117056454037, - "grad_norm": 2.099446986816209, - "learning_rate": 3.7862022767868517e-06, - "loss": 1.0952, - "step": 1448 - }, - { - "epoch": 0.17423194853604282, - "grad_norm": 1.757517743299189, - "learning_rate": 3.7858517162991367e-06, - "loss": 1.0738, - "step": 1449 - }, - { - "epoch": 0.1743521914266819, - "grad_norm": 2.0046555454862025, - "learning_rate": 3.7855008848991363e-06, - "loss": 0.8369, - "step": 1450 - }, - { - "epoch": 0.17447243431732098, - "grad_norm": 1.9999940991314622, - "learning_rate": 3.7851497826400714e-06, - "loss": 1.0067, - "step": 1451 - }, - { - "epoch": 0.17459267720796007, - "grad_norm": 1.6991666237739282, - "learning_rate": 3.7847984095752034e-06, - "loss": 0.983, - "step": 1452 - }, - { - "epoch": 0.17471292009859918, - "grad_norm": 1.7124894998047515, - "learning_rate": 3.784446765757836e-06, - "loss": 1.0321, - "step": 1453 - }, - { - "epoch": 0.17483316298923826, - "grad_norm": 5.026784870231846, - "learning_rate": 3.7840948512413133e-06, - "loss": 1.006, - "step": 1454 - }, - { - "epoch": 0.17495340587987734, - "grad_norm": 2.309451284426225, - "learning_rate": 3.7837426660790196e-06, - "loss": 1.012, - "step": 1455 - }, - { - "epoch": 0.17507364877051645, - "grad_norm": 3.2833251204283056, - "learning_rate": 3.783390210324382e-06, - "loss": 1.0452, - "step": 1456 - }, - { - "epoch": 0.17519389166115554, - "grad_norm": 1.844851213467404, - "learning_rate": 3.7830374840308676e-06, - "loss": 0.96, - "step": 1457 - }, - { - "epoch": 0.17531413455179462, - "grad_norm": 2.403297297101296, - "learning_rate": 3.7826844872519842e-06, - "loss": 1.0617, - "step": 1458 - }, - { - "epoch": 0.1754343774424337, - "grad_norm": 1.8951394903936007, - "learning_rate": 3.782331220041282e-06, - "loss": 0.9582, - "step": 1459 - }, - { - "epoch": 0.17555462033307281, - "grad_norm": 2.371704224078895, - "learning_rate": 3.7819776824523504e-06, - "loss": 1.0531, - "step": 1460 - }, - { - "epoch": 0.1756748632237119, - "grad_norm": 1.8610865305072852, - "learning_rate": 3.7816238745388213e-06, - "loss": 1.0656, - "step": 1461 - }, - { - "epoch": 0.17579510611435098, - "grad_norm": 1.9814123430136583, - "learning_rate": 3.781269796354367e-06, - "loss": 1.1008, - "step": 1462 - }, - { - "epoch": 0.1759153490049901, - "grad_norm": 1.6384642404676582, - "learning_rate": 3.7809154479527006e-06, - "loss": 1.0987, - "step": 1463 - }, - { - "epoch": 0.17603559189562917, - "grad_norm": 2.1646984281466453, - "learning_rate": 3.780560829387577e-06, - "loss": 1.0654, - "step": 1464 - }, - { - "epoch": 0.17615583478626826, - "grad_norm": 0.873295827441227, - "learning_rate": 3.7802059407127915e-06, - "loss": 0.8459, - "step": 1465 - }, - { - "epoch": 0.17627607767690734, - "grad_norm": 2.40087250108778, - "learning_rate": 3.7798507819821797e-06, - "loss": 1.0915, - "step": 1466 - }, - { - "epoch": 0.17639632056754645, - "grad_norm": 20.351871398807376, - "learning_rate": 3.7794953532496197e-06, - "loss": 1.023, - "step": 1467 - }, - { - "epoch": 0.17651656345818553, - "grad_norm": 0.9361543853075975, - "learning_rate": 3.7791396545690295e-06, - "loss": 0.8704, - "step": 1468 - }, - { - "epoch": 0.17663680634882462, - "grad_norm": 1.9323813596680406, - "learning_rate": 3.7787836859943685e-06, - "loss": 1.0335, - "step": 1469 - }, - { - "epoch": 0.17675704923946373, - "grad_norm": 2.0533101502055193, - "learning_rate": 3.7784274475796363e-06, - "loss": 1.019, - "step": 1470 - }, - { - "epoch": 0.1768772921301028, - "grad_norm": 1.782116127250542, - "learning_rate": 3.7780709393788745e-06, - "loss": 0.997, - "step": 1471 - }, - { - "epoch": 0.1769975350207419, - "grad_norm": 2.083188764006554, - "learning_rate": 3.777714161446165e-06, - "loss": 0.981, - "step": 1472 - }, - { - "epoch": 0.177117777911381, - "grad_norm": 2.364191759487513, - "learning_rate": 3.7773571138356304e-06, - "loss": 0.918, - "step": 1473 - }, - { - "epoch": 0.17723802080202009, - "grad_norm": 2.986538567123324, - "learning_rate": 3.776999796601435e-06, - "loss": 1.118, - "step": 1474 - }, - { - "epoch": 0.17735826369265917, - "grad_norm": 3.0009371565302474, - "learning_rate": 3.776642209797783e-06, - "loss": 0.9471, - "step": 1475 - }, - { - "epoch": 0.17747850658329825, - "grad_norm": 1.6778134559405526, - "learning_rate": 3.7762843534789205e-06, - "loss": 1.0011, - "step": 1476 - }, - { - "epoch": 0.17759874947393736, - "grad_norm": 22.673221014489208, - "learning_rate": 3.7759262276991343e-06, - "loss": 1.104, - "step": 1477 - }, - { - "epoch": 0.17771899236457644, - "grad_norm": 2.1995641189875776, - "learning_rate": 3.7755678325127506e-06, - "loss": 1.0311, - "step": 1478 - }, - { - "epoch": 0.17783923525521553, - "grad_norm": 1.7431524367171842, - "learning_rate": 3.7752091679741393e-06, - "loss": 0.9867, - "step": 1479 - }, - { - "epoch": 0.17795947814585464, - "grad_norm": 3.7791272975521104, - "learning_rate": 3.774850234137708e-06, - "loss": 1.0001, - "step": 1480 - }, - { - "epoch": 0.17807972103649372, - "grad_norm": 2.4442456145292577, - "learning_rate": 3.7744910310579076e-06, - "loss": 1.0538, - "step": 1481 - }, - { - "epoch": 0.1781999639271328, - "grad_norm": 1.848089639108035, - "learning_rate": 3.774131558789229e-06, - "loss": 1.0784, - "step": 1482 - }, - { - "epoch": 0.1783202068177719, - "grad_norm": 4.268665836783555, - "learning_rate": 3.773771817386203e-06, - "loss": 0.9249, - "step": 1483 - }, - { - "epoch": 0.178440449708411, - "grad_norm": 1.6597190370976926, - "learning_rate": 3.773411806903403e-06, - "loss": 1.0229, - "step": 1484 - }, - { - "epoch": 0.17856069259905008, - "grad_norm": 1.6836232052237723, - "learning_rate": 3.7730515273954415e-06, - "loss": 1.1709, - "step": 1485 - }, - { - "epoch": 0.17868093548968916, - "grad_norm": 9.342628437207736, - "learning_rate": 3.772690978916973e-06, - "loss": 1.0676, - "step": 1486 - }, - { - "epoch": 0.17880117838032827, - "grad_norm": 2.1215482614724643, - "learning_rate": 3.772330161522693e-06, - "loss": 1.1036, - "step": 1487 - }, - { - "epoch": 0.17892142127096736, - "grad_norm": 2.0935525374032715, - "learning_rate": 3.7719690752673365e-06, - "loss": 1.0308, - "step": 1488 - }, - { - "epoch": 0.17904166416160644, - "grad_norm": 1.9043625414359113, - "learning_rate": 3.7716077202056796e-06, - "loss": 1.015, - "step": 1489 - }, - { - "epoch": 0.17916190705224552, - "grad_norm": 2.1237161909603914, - "learning_rate": 3.7712460963925404e-06, - "loss": 1.155, - "step": 1490 - }, - { - "epoch": 0.17928214994288463, - "grad_norm": 1.5551338513529243, - "learning_rate": 3.7708842038827775e-06, - "loss": 0.9807, - "step": 1491 - }, - { - "epoch": 0.17940239283352372, - "grad_norm": 1.5901647145346913, - "learning_rate": 3.770522042731288e-06, - "loss": 1.0798, - "step": 1492 - }, - { - "epoch": 0.1795226357241628, - "grad_norm": 1.9789581858268812, - "learning_rate": 3.7701596129930122e-06, - "loss": 1.106, - "step": 1493 - }, - { - "epoch": 0.1796428786148019, - "grad_norm": 5.109729999493575, - "learning_rate": 3.7697969147229315e-06, - "loss": 0.9587, - "step": 1494 - }, - { - "epoch": 0.179763121505441, - "grad_norm": 2.2395596406851483, - "learning_rate": 3.7694339479760647e-06, - "loss": 1.0804, - "step": 1495 - }, - { - "epoch": 0.17988336439608008, - "grad_norm": 0.884410458450384, - "learning_rate": 3.769070712807476e-06, - "loss": 0.8383, - "step": 1496 - }, - { - "epoch": 0.18000360728671919, - "grad_norm": 1.994302322751458, - "learning_rate": 3.768707209272266e-06, - "loss": 1.0131, - "step": 1497 - }, - { - "epoch": 0.18012385017735827, - "grad_norm": 2.1919696376048177, - "learning_rate": 3.768343437425579e-06, - "loss": 0.9873, - "step": 1498 - }, - { - "epoch": 0.18024409306799735, - "grad_norm": 2.2352713540928812, - "learning_rate": 3.7679793973225987e-06, - "loss": 1.0917, - "step": 1499 - }, - { - "epoch": 0.18036433595863643, - "grad_norm": 0.8826856606436001, - "learning_rate": 3.767615089018549e-06, - "loss": 0.8717, - "step": 1500 - }, - { - "epoch": 0.18048457884927555, - "grad_norm": 1.7849707308808507, - "learning_rate": 3.7672505125686966e-06, - "loss": 1.0817, - "step": 1501 - }, - { - "epoch": 0.18060482173991463, - "grad_norm": 3.3667352996729583, - "learning_rate": 3.7668856680283455e-06, - "loss": 1.078, - "step": 1502 - }, - { - "epoch": 0.1807250646305537, - "grad_norm": 1.7229504669207418, - "learning_rate": 3.7665205554528437e-06, - "loss": 1.0545, - "step": 1503 - }, - { - "epoch": 0.18084530752119282, - "grad_norm": 1.739754247739978, - "learning_rate": 3.7661551748975782e-06, - "loss": 0.9835, - "step": 1504 - }, - { - "epoch": 0.1809655504118319, - "grad_norm": 0.8142566398160144, - "learning_rate": 3.7657895264179772e-06, - "loss": 0.8492, - "step": 1505 - }, - { - "epoch": 0.181085793302471, - "grad_norm": 3.056996909147777, - "learning_rate": 3.765423610069509e-06, - "loss": 0.9759, - "step": 1506 - }, - { - "epoch": 0.18120603619311007, - "grad_norm": 3.438859150692389, - "learning_rate": 3.765057425907683e-06, - "loss": 0.9576, - "step": 1507 - }, - { - "epoch": 0.18132627908374918, - "grad_norm": 2.197206596429334, - "learning_rate": 3.764690973988048e-06, - "loss": 1.0081, - "step": 1508 - }, - { - "epoch": 0.18144652197438826, - "grad_norm": 2.331716442744798, - "learning_rate": 3.7643242543661967e-06, - "loss": 0.9831, - "step": 1509 - }, - { - "epoch": 0.18156676486502735, - "grad_norm": 0.8416558167416064, - "learning_rate": 3.7639572670977573e-06, - "loss": 0.8737, - "step": 1510 - }, - { - "epoch": 0.18168700775566646, - "grad_norm": 1.4454332455897279, - "learning_rate": 3.7635900122384042e-06, - "loss": 0.996, - "step": 1511 - }, - { - "epoch": 0.18180725064630554, - "grad_norm": 2.0247072926652088, - "learning_rate": 3.7632224898438477e-06, - "loss": 1.1, - "step": 1512 - }, - { - "epoch": 0.18192749353694462, - "grad_norm": 1.4333459291717892, - "learning_rate": 3.762854699969842e-06, - "loss": 1.0175, - "step": 1513 - }, - { - "epoch": 0.1820477364275837, - "grad_norm": 1.8831712689620184, - "learning_rate": 3.762486642672179e-06, - "loss": 0.9577, - "step": 1514 - }, - { - "epoch": 0.18216797931822282, - "grad_norm": 2.024533008369471, - "learning_rate": 3.7621183180066946e-06, - "loss": 1.0948, - "step": 1515 - }, - { - "epoch": 0.1822882222088619, - "grad_norm": 1.5593656765156443, - "learning_rate": 3.7617497260292625e-06, - "loss": 0.9668, - "step": 1516 - }, - { - "epoch": 0.18240846509950098, - "grad_norm": 2.3680934871893475, - "learning_rate": 3.7613808667957967e-06, - "loss": 1.0257, - "step": 1517 - }, - { - "epoch": 0.1825287079901401, - "grad_norm": 1.8156155085356604, - "learning_rate": 3.7610117403622547e-06, - "loss": 1.1405, - "step": 1518 - }, - { - "epoch": 0.18264895088077918, - "grad_norm": 1.5636600003159686, - "learning_rate": 3.7606423467846313e-06, - "loss": 1.1284, - "step": 1519 - }, - { - "epoch": 0.18276919377141826, - "grad_norm": 1.4547947959787946, - "learning_rate": 3.760272686118964e-06, - "loss": 1.0239, - "step": 1520 - }, - { - "epoch": 0.18288943666205737, - "grad_norm": 1.9267657318957876, - "learning_rate": 3.7599027584213297e-06, - "loss": 1.1499, - "step": 1521 - }, - { - "epoch": 0.18300967955269645, - "grad_norm": 2.1857911110669934, - "learning_rate": 3.7595325637478465e-06, - "loss": 1.0108, - "step": 1522 - }, - { - "epoch": 0.18312992244333554, - "grad_norm": 2.127815401072772, - "learning_rate": 3.7591621021546723e-06, - "loss": 1.0466, - "step": 1523 - }, - { - "epoch": 0.18325016533397462, - "grad_norm": 1.797638308067922, - "learning_rate": 3.7587913736980062e-06, - "loss": 1.0443, - "step": 1524 - }, - { - "epoch": 0.18337040822461373, - "grad_norm": 1.7089299656517354, - "learning_rate": 3.7584203784340865e-06, - "loss": 1.0708, - "step": 1525 - }, - { - "epoch": 0.1834906511152528, - "grad_norm": 1.9758482837580642, - "learning_rate": 3.7580491164191938e-06, - "loss": 1.0849, - "step": 1526 - }, - { - "epoch": 0.1836108940058919, - "grad_norm": 1.3344598372423313, - "learning_rate": 3.757677587709648e-06, - "loss": 0.864, - "step": 1527 - }, - { - "epoch": 0.183731136896531, - "grad_norm": 2.067206705455416, - "learning_rate": 3.7573057923618095e-06, - "loss": 0.9914, - "step": 1528 - }, - { - "epoch": 0.1838513797871701, - "grad_norm": 1.7159646092435565, - "learning_rate": 3.7569337304320793e-06, - "loss": 0.9715, - "step": 1529 - }, - { - "epoch": 0.18397162267780917, - "grad_norm": 0.8328393226870282, - "learning_rate": 3.756561401976899e-06, - "loss": 0.8913, - "step": 1530 - }, - { - "epoch": 0.18409186556844825, - "grad_norm": 1.661822822156255, - "learning_rate": 3.7561888070527514e-06, - "loss": 1.0517, - "step": 1531 - }, - { - "epoch": 0.18421210845908736, - "grad_norm": 2.8899190504623307, - "learning_rate": 3.7558159457161577e-06, - "loss": 1.0257, - "step": 1532 - }, - { - "epoch": 0.18433235134972645, - "grad_norm": 1.9720178760933156, - "learning_rate": 3.755442818023681e-06, - "loss": 1.0053, - "step": 1533 - }, - { - "epoch": 0.18445259424036553, - "grad_norm": 2.2757461204659175, - "learning_rate": 3.7550694240319246e-06, - "loss": 0.9895, - "step": 1534 - }, - { - "epoch": 0.18457283713100464, - "grad_norm": 1.9409967063431701, - "learning_rate": 3.7546957637975326e-06, - "loss": 1.0027, - "step": 1535 - }, - { - "epoch": 0.18469308002164372, - "grad_norm": 1.44190007049499, - "learning_rate": 3.7543218373771873e-06, - "loss": 0.9715, - "step": 1536 - }, - { - "epoch": 0.1848133229122828, - "grad_norm": 1.5356290055355857, - "learning_rate": 3.753947644827615e-06, - "loss": 1.0151, - "step": 1537 - }, - { - "epoch": 0.1849335658029219, - "grad_norm": 0.799494495721207, - "learning_rate": 3.753573186205579e-06, - "loss": 0.8324, - "step": 1538 - }, - { - "epoch": 0.185053808693561, - "grad_norm": 2.0099651508251926, - "learning_rate": 3.753198461567885e-06, - "loss": 1.009, - "step": 1539 - }, - { - "epoch": 0.18517405158420008, - "grad_norm": 1.781594226436227, - "learning_rate": 3.7528234709713783e-06, - "loss": 1.1477, - "step": 1540 - }, - { - "epoch": 0.18529429447483917, - "grad_norm": 1.76759943659459, - "learning_rate": 3.7524482144729447e-06, - "loss": 1.0734, - "step": 1541 - }, - { - "epoch": 0.18541453736547828, - "grad_norm": 3.1950843188602183, - "learning_rate": 3.7520726921295106e-06, - "loss": 1.0657, - "step": 1542 - }, - { - "epoch": 0.18553478025611736, - "grad_norm": 2.0223096848997693, - "learning_rate": 3.751696903998042e-06, - "loss": 0.9573, - "step": 1543 - }, - { - "epoch": 0.18565502314675644, - "grad_norm": 1.5050350560259198, - "learning_rate": 3.7513208501355456e-06, - "loss": 0.9272, - "step": 1544 - }, - { - "epoch": 0.18577526603739553, - "grad_norm": 1.738091276267576, - "learning_rate": 3.750944530599069e-06, - "loss": 1.0735, - "step": 1545 - }, - { - "epoch": 0.18589550892803464, - "grad_norm": 3.019818330567882, - "learning_rate": 3.7505679454456992e-06, - "loss": 1.042, - "step": 1546 - }, - { - "epoch": 0.18601575181867372, - "grad_norm": 2.9390666923107727, - "learning_rate": 3.750191094732564e-06, - "loss": 0.931, - "step": 1547 - }, - { - "epoch": 0.1861359947093128, - "grad_norm": 1.7167484072634385, - "learning_rate": 3.7498139785168313e-06, - "loss": 0.9795, - "step": 1548 - }, - { - "epoch": 0.1862562375999519, - "grad_norm": 1.6001360239382714, - "learning_rate": 3.749436596855709e-06, - "loss": 1.0071, - "step": 1549 - }, - { - "epoch": 0.186376480490591, - "grad_norm": 1.7941805332827414, - "learning_rate": 3.749058949806446e-06, - "loss": 1.1399, - "step": 1550 - }, - { - "epoch": 0.18649672338123008, - "grad_norm": 1.5883060301401737, - "learning_rate": 3.748681037426331e-06, - "loss": 1.0679, - "step": 1551 - }, - { - "epoch": 0.1866169662718692, - "grad_norm": 2.0331650143147137, - "learning_rate": 3.7483028597726936e-06, - "loss": 1.1492, - "step": 1552 - }, - { - "epoch": 0.18673720916250827, - "grad_norm": 1.8924370271190871, - "learning_rate": 3.7479244169029017e-06, - "loss": 0.8546, - "step": 1553 - }, - { - "epoch": 0.18685745205314735, - "grad_norm": 2.2613012689500436, - "learning_rate": 3.7475457088743658e-06, - "loss": 0.9651, - "step": 1554 - }, - { - "epoch": 0.18697769494378644, - "grad_norm": 1.8183486888993532, - "learning_rate": 3.7471667357445348e-06, - "loss": 0.9722, - "step": 1555 - }, - { - "epoch": 0.18709793783442555, - "grad_norm": 1.8111550174973996, - "learning_rate": 3.7467874975709e-06, - "loss": 0.958, - "step": 1556 - }, - { - "epoch": 0.18721818072506463, - "grad_norm": 2.5891277997951114, - "learning_rate": 3.7464079944109904e-06, - "loss": 1.01, - "step": 1557 - }, - { - "epoch": 0.18733842361570371, - "grad_norm": 1.9469048903422514, - "learning_rate": 3.746028226322376e-06, - "loss": 1.0025, - "step": 1558 - }, - { - "epoch": 0.18745866650634282, - "grad_norm": 1.8601183647517814, - "learning_rate": 3.745648193362669e-06, - "loss": 0.9899, - "step": 1559 - }, - { - "epoch": 0.1875789093969819, - "grad_norm": 1.8360746291873085, - "learning_rate": 3.745267895589518e-06, - "loss": 0.9599, - "step": 1560 - }, - { - "epoch": 0.187699152287621, - "grad_norm": 2.1109007791957857, - "learning_rate": 3.7448873330606154e-06, - "loss": 1.0471, - "step": 1561 - }, - { - "epoch": 0.18781939517826007, - "grad_norm": 2.0328964603100337, - "learning_rate": 3.7445065058336914e-06, - "loss": 1.1046, - "step": 1562 - }, - { - "epoch": 0.18793963806889918, - "grad_norm": 1.8686642094442973, - "learning_rate": 3.7441254139665176e-06, - "loss": 1.0926, - "step": 1563 - }, - { - "epoch": 0.18805988095953827, - "grad_norm": 1.5642781630083087, - "learning_rate": 3.743744057516905e-06, - "loss": 1.0516, - "step": 1564 - }, - { - "epoch": 0.18818012385017735, - "grad_norm": 2.839944158193128, - "learning_rate": 3.743362436542706e-06, - "loss": 1.1114, - "step": 1565 - }, - { - "epoch": 0.18830036674081646, - "grad_norm": 1.7497397638014152, - "learning_rate": 3.7429805511018115e-06, - "loss": 0.9977, - "step": 1566 - }, - { - "epoch": 0.18842060963145554, - "grad_norm": 1.9992825294091159, - "learning_rate": 3.7425984012521524e-06, - "loss": 1.002, - "step": 1567 - }, - { - "epoch": 0.18854085252209463, - "grad_norm": 0.730142734826179, - "learning_rate": 3.7422159870517025e-06, - "loss": 0.8578, - "step": 1568 - }, - { - "epoch": 0.1886610954127337, - "grad_norm": 1.4558360743519585, - "learning_rate": 3.7418333085584717e-06, - "loss": 1.0197, - "step": 1569 - }, - { - "epoch": 0.18878133830337282, - "grad_norm": 1.8589102540907325, - "learning_rate": 3.7414503658305128e-06, - "loss": 1.1402, - "step": 1570 - }, - { - "epoch": 0.1889015811940119, - "grad_norm": 2.2946356412255664, - "learning_rate": 3.7410671589259185e-06, - "loss": 1.0085, - "step": 1571 - }, - { - "epoch": 0.18902182408465099, - "grad_norm": 2.2381140095542906, - "learning_rate": 3.7406836879028205e-06, - "loss": 1.0233, - "step": 1572 - }, - { - "epoch": 0.1891420669752901, - "grad_norm": 1.9039644391321326, - "learning_rate": 3.7402999528193907e-06, - "loss": 1.004, - "step": 1573 - }, - { - "epoch": 0.18926230986592918, - "grad_norm": 2.372387000750372, - "learning_rate": 3.739915953733842e-06, - "loss": 1.0786, - "step": 1574 - }, - { - "epoch": 0.18938255275656826, - "grad_norm": 3.114843725506333, - "learning_rate": 3.7395316907044264e-06, - "loss": 1.0451, - "step": 1575 - }, - { - "epoch": 0.18950279564720737, - "grad_norm": 1.6921816275398738, - "learning_rate": 3.7391471637894364e-06, - "loss": 1.0184, - "step": 1576 - }, - { - "epoch": 0.18962303853784646, - "grad_norm": 1.8637912779956718, - "learning_rate": 3.738762373047205e-06, - "loss": 1.0836, - "step": 1577 - }, - { - "epoch": 0.18974328142848554, - "grad_norm": 1.5867261006472861, - "learning_rate": 3.738377318536103e-06, - "loss": 1.061, - "step": 1578 - }, - { - "epoch": 0.18986352431912462, - "grad_norm": 1.9805525361344676, - "learning_rate": 3.7379920003145447e-06, - "loss": 0.9546, - "step": 1579 - }, - { - "epoch": 0.18998376720976373, - "grad_norm": 1.52588085937375, - "learning_rate": 3.7376064184409817e-06, - "loss": 1.0691, - "step": 1580 - }, - { - "epoch": 0.19010401010040281, - "grad_norm": 1.2843742426292246, - "learning_rate": 3.7372205729739063e-06, - "loss": 1.0962, - "step": 1581 - }, - { - "epoch": 0.1902242529910419, - "grad_norm": 1.8070451932996698, - "learning_rate": 3.7368344639718514e-06, - "loss": 0.9464, - "step": 1582 - }, - { - "epoch": 0.190344495881681, - "grad_norm": 1.701011185880074, - "learning_rate": 3.7364480914933895e-06, - "loss": 1.0356, - "step": 1583 - }, - { - "epoch": 0.1904647387723201, - "grad_norm": 1.7346719023742556, - "learning_rate": 3.7360614555971325e-06, - "loss": 1.04, - "step": 1584 - }, - { - "epoch": 0.19058498166295917, - "grad_norm": 1.9659154559106065, - "learning_rate": 3.735674556341733e-06, - "loss": 1.0821, - "step": 1585 - }, - { - "epoch": 0.19070522455359826, - "grad_norm": 2.207033518562164, - "learning_rate": 3.7352873937858835e-06, - "loss": 1.0617, - "step": 1586 - }, - { - "epoch": 0.19082546744423737, - "grad_norm": 1.9227410202838429, - "learning_rate": 3.734899967988316e-06, - "loss": 0.9517, - "step": 1587 - }, - { - "epoch": 0.19094571033487645, - "grad_norm": 1.6981359780932164, - "learning_rate": 3.7345122790078026e-06, - "loss": 1.0661, - "step": 1588 - }, - { - "epoch": 0.19106595322551553, - "grad_norm": 2.326762062875168, - "learning_rate": 3.7341243269031556e-06, - "loss": 1.1613, - "step": 1589 - }, - { - "epoch": 0.19118619611615464, - "grad_norm": 1.4775549565662396, - "learning_rate": 3.7337361117332275e-06, - "loss": 1.0094, - "step": 1590 - }, - { - "epoch": 0.19130643900679373, - "grad_norm": 2.040055073574376, - "learning_rate": 3.7333476335569087e-06, - "loss": 0.9977, - "step": 1591 - }, - { - "epoch": 0.1914266818974328, - "grad_norm": 2.2805549921532493, - "learning_rate": 3.7329588924331325e-06, - "loss": 0.8946, - "step": 1592 - }, - { - "epoch": 0.1915469247880719, - "grad_norm": 4.3491835606287035, - "learning_rate": 3.732569888420871e-06, - "loss": 1.0494, - "step": 1593 - }, - { - "epoch": 0.191667167678711, - "grad_norm": 1.9943040562237881, - "learning_rate": 3.732180621579134e-06, - "loss": 1.0628, - "step": 1594 - }, - { - "epoch": 0.1917874105693501, - "grad_norm": 1.8194743834241376, - "learning_rate": 3.7317910919669745e-06, - "loss": 1.0409, - "step": 1595 - }, - { - "epoch": 0.19190765345998917, - "grad_norm": 2.567992673738671, - "learning_rate": 3.7314012996434826e-06, - "loss": 0.9886, - "step": 1596 - }, - { - "epoch": 0.19202789635062828, - "grad_norm": 2.136551928938318, - "learning_rate": 3.7310112446677907e-06, - "loss": 1.0388, - "step": 1597 - }, - { - "epoch": 0.19214813924126736, - "grad_norm": 2.217124558483535, - "learning_rate": 3.7306209270990695e-06, - "loss": 0.917, - "step": 1598 - }, - { - "epoch": 0.19226838213190645, - "grad_norm": 1.8547784746027902, - "learning_rate": 3.7302303469965292e-06, - "loss": 1.0967, - "step": 1599 - }, - { - "epoch": 0.19238862502254553, - "grad_norm": 1.7204893154709384, - "learning_rate": 3.7298395044194206e-06, - "loss": 0.9405, - "step": 1600 - }, - { - "epoch": 0.19250886791318464, - "grad_norm": 1.651004329544019, - "learning_rate": 3.7294483994270356e-06, - "loss": 1.1675, - "step": 1601 - }, - { - "epoch": 0.19262911080382372, - "grad_norm": 1.8613181339087834, - "learning_rate": 3.7290570320787033e-06, - "loss": 1.0063, - "step": 1602 - }, - { - "epoch": 0.1927493536944628, - "grad_norm": 1.9422009566585874, - "learning_rate": 3.728665402433793e-06, - "loss": 0.9408, - "step": 1603 - }, - { - "epoch": 0.19286959658510192, - "grad_norm": 5.918671317619827, - "learning_rate": 3.7282735105517164e-06, - "loss": 1.0931, - "step": 1604 - }, - { - "epoch": 0.192989839475741, - "grad_norm": 2.7077048428327, - "learning_rate": 3.727881356491922e-06, - "loss": 0.9107, - "step": 1605 - }, - { - "epoch": 0.19311008236638008, - "grad_norm": 1.9060598810162106, - "learning_rate": 3.7274889403139002e-06, - "loss": 0.9771, - "step": 1606 - }, - { - "epoch": 0.1932303252570192, - "grad_norm": 2.2986636301464607, - "learning_rate": 3.727096262077179e-06, - "loss": 1.0194, - "step": 1607 - }, - { - "epoch": 0.19335056814765827, - "grad_norm": 1.7270322825115447, - "learning_rate": 3.7267033218413285e-06, - "loss": 1.0859, - "step": 1608 - }, - { - "epoch": 0.19347081103829736, - "grad_norm": 2.187218566229155, - "learning_rate": 3.726310119665957e-06, - "loss": 1.0403, - "step": 1609 - }, - { - "epoch": 0.19359105392893644, - "grad_norm": 1.79989396948587, - "learning_rate": 3.725916655610713e-06, - "loss": 1.0802, - "step": 1610 - }, - { - "epoch": 0.19371129681957555, - "grad_norm": 2.7956791031239696, - "learning_rate": 3.725522929735284e-06, - "loss": 0.9827, - "step": 1611 - }, - { - "epoch": 0.19383153971021463, - "grad_norm": 1.9764892805809042, - "learning_rate": 3.725128942099399e-06, - "loss": 0.9631, - "step": 1612 - }, - { - "epoch": 0.19395178260085372, - "grad_norm": 1.6455598273956282, - "learning_rate": 3.7247346927628245e-06, - "loss": 1.0325, - "step": 1613 - }, - { - "epoch": 0.19407202549149283, - "grad_norm": 1.6963126752458424, - "learning_rate": 3.7243401817853694e-06, - "loss": 1.0099, - "step": 1614 - }, - { - "epoch": 0.1941922683821319, - "grad_norm": 1.993249468022621, - "learning_rate": 3.723945409226879e-06, - "loss": 0.9523, - "step": 1615 - }, - { - "epoch": 0.194312511272771, - "grad_norm": 2.2043603991032232, - "learning_rate": 3.723550375147241e-06, - "loss": 1.035, - "step": 1616 - }, - { - "epoch": 0.19443275416341008, - "grad_norm": 1.727864253703596, - "learning_rate": 3.7231550796063816e-06, - "loss": 1.0291, - "step": 1617 - }, - { - "epoch": 0.1945529970540492, - "grad_norm": 1.658754147119807, - "learning_rate": 3.722759522664266e-06, - "loss": 0.8721, - "step": 1618 - }, - { - "epoch": 0.19467323994468827, - "grad_norm": 3.6948724529051478, - "learning_rate": 3.7223637043809016e-06, - "loss": 1.037, - "step": 1619 - }, - { - "epoch": 0.19479348283532735, - "grad_norm": 1.6881941321410314, - "learning_rate": 3.7219676248163322e-06, - "loss": 1.0921, - "step": 1620 - }, - { - "epoch": 0.19491372572596646, - "grad_norm": 2.0084311871676475, - "learning_rate": 3.721571284030643e-06, - "loss": 1.1556, - "step": 1621 - }, - { - "epoch": 0.19503396861660555, - "grad_norm": 2.1414895643736602, - "learning_rate": 3.7211746820839587e-06, - "loss": 1.019, - "step": 1622 - }, - { - "epoch": 0.19515421150724463, - "grad_norm": 1.8582943893708388, - "learning_rate": 3.7207778190364437e-06, - "loss": 1.0506, - "step": 1623 - }, - { - "epoch": 0.1952744543978837, - "grad_norm": 1.9131577825370176, - "learning_rate": 3.720380694948302e-06, - "loss": 0.9676, - "step": 1624 - }, - { - "epoch": 0.19539469728852282, - "grad_norm": 0.9825552817558093, - "learning_rate": 3.719983309879777e-06, - "loss": 0.9736, - "step": 1625 - }, - { - "epoch": 0.1955149401791619, - "grad_norm": 1.6037570711189593, - "learning_rate": 3.719585663891151e-06, - "loss": 1.0132, - "step": 1626 - }, - { - "epoch": 0.195635183069801, - "grad_norm": 2.276051070996256, - "learning_rate": 3.719187757042747e-06, - "loss": 1.0133, - "step": 1627 - }, - { - "epoch": 0.1957554259604401, - "grad_norm": 0.7838946402779056, - "learning_rate": 3.7187895893949275e-06, - "loss": 0.8296, - "step": 1628 - }, - { - "epoch": 0.19587566885107918, - "grad_norm": 2.5234634823243938, - "learning_rate": 3.7183911610080937e-06, - "loss": 0.994, - "step": 1629 - }, - { - "epoch": 0.19599591174171827, - "grad_norm": 2.5861784024696814, - "learning_rate": 3.7179924719426872e-06, - "loss": 0.9771, - "step": 1630 - }, - { - "epoch": 0.19611615463235738, - "grad_norm": 2.4545368578146975, - "learning_rate": 3.7175935222591885e-06, - "loss": 0.9916, - "step": 1631 - }, - { - "epoch": 0.19623639752299646, - "grad_norm": 1.6204495172496771, - "learning_rate": 3.717194312018118e-06, - "loss": 0.9827, - "step": 1632 - }, - { - "epoch": 0.19635664041363554, - "grad_norm": 3.1280702386077546, - "learning_rate": 3.716794841280036e-06, - "loss": 0.9899, - "step": 1633 - }, - { - "epoch": 0.19647688330427462, - "grad_norm": 2.0288759883699354, - "learning_rate": 3.7163951101055407e-06, - "loss": 1.0067, - "step": 1634 - }, - { - "epoch": 0.19659712619491373, - "grad_norm": 1.7629591810377025, - "learning_rate": 3.715995118555273e-06, - "loss": 1.0192, - "step": 1635 - }, - { - "epoch": 0.19671736908555282, - "grad_norm": 1.9860995751011068, - "learning_rate": 3.71559486668991e-06, - "loss": 1.0825, - "step": 1636 - }, - { - "epoch": 0.1968376119761919, - "grad_norm": 1.5763618515466933, - "learning_rate": 3.715194354570169e-06, - "loss": 1.0019, - "step": 1637 - }, - { - "epoch": 0.196957854866831, - "grad_norm": 3.4713663767289638, - "learning_rate": 3.714793582256809e-06, - "loss": 1.0543, - "step": 1638 - }, - { - "epoch": 0.1970780977574701, - "grad_norm": 5.3878676286847025, - "learning_rate": 3.7143925498106253e-06, - "loss": 1.0686, - "step": 1639 - }, - { - "epoch": 0.19719834064810918, - "grad_norm": 1.7953487382622855, - "learning_rate": 3.7139912572924558e-06, - "loss": 1.0269, - "step": 1640 - }, - { - "epoch": 0.19731858353874826, - "grad_norm": 2.8002060814359258, - "learning_rate": 3.7135897047631744e-06, - "loss": 1.0395, - "step": 1641 - }, - { - "epoch": 0.19743882642938737, - "grad_norm": 1.6353308134768438, - "learning_rate": 3.713187892283698e-06, - "loss": 0.9863, - "step": 1642 - }, - { - "epoch": 0.19755906932002645, - "grad_norm": 2.0651988087566635, - "learning_rate": 3.71278581991498e-06, - "loss": 1.0916, - "step": 1643 - }, - { - "epoch": 0.19767931221066554, - "grad_norm": 1.7342647483080895, - "learning_rate": 3.712383487718015e-06, - "loss": 1.0169, - "step": 1644 - }, - { - "epoch": 0.19779955510130465, - "grad_norm": 1.6638237150558062, - "learning_rate": 3.7119808957538365e-06, - "loss": 1.0975, - "step": 1645 - }, - { - "epoch": 0.19791979799194373, - "grad_norm": 2.522590046117578, - "learning_rate": 3.711578044083517e-06, - "loss": 1.0321, - "step": 1646 - }, - { - "epoch": 0.1980400408825828, - "grad_norm": 1.8150256745175024, - "learning_rate": 3.7111749327681698e-06, - "loss": 0.9778, - "step": 1647 - }, - { - "epoch": 0.1981602837732219, - "grad_norm": 2.1231631025208606, - "learning_rate": 3.7107715618689455e-06, - "loss": 1.0905, - "step": 1648 - }, - { - "epoch": 0.198280526663861, - "grad_norm": 1.4656508193354754, - "learning_rate": 3.710367931447035e-06, - "loss": 1.0597, - "step": 1649 - }, - { - "epoch": 0.1984007695545001, - "grad_norm": 10.003913113771562, - "learning_rate": 3.70996404156367e-06, - "loss": 1.0951, - "step": 1650 - }, - { - "epoch": 0.19852101244513917, - "grad_norm": 1.7502222601211563, - "learning_rate": 3.7095598922801187e-06, - "loss": 0.953, - "step": 1651 - }, - { - "epoch": 0.19864125533577828, - "grad_norm": 2.0472334919099846, - "learning_rate": 3.7091554836576914e-06, - "loss": 0.9864, - "step": 1652 - }, - { - "epoch": 0.19876149822641737, - "grad_norm": 1.7082774765683366, - "learning_rate": 3.708750815757736e-06, - "loss": 1.0596, - "step": 1653 - }, - { - "epoch": 0.19888174111705645, - "grad_norm": 1.982540935079308, - "learning_rate": 3.7083458886416407e-06, - "loss": 0.9674, - "step": 1654 - }, - { - "epoch": 0.19900198400769553, - "grad_norm": 2.1961303490758093, - "learning_rate": 3.707940702370832e-06, - "loss": 1.109, - "step": 1655 - }, - { - "epoch": 0.19912222689833464, - "grad_norm": 0.7648079073712643, - "learning_rate": 3.707535257006777e-06, - "loss": 0.8296, - "step": 1656 - }, - { - "epoch": 0.19924246978897373, - "grad_norm": 1.8838158937306395, - "learning_rate": 3.707129552610981e-06, - "loss": 1.1199, - "step": 1657 - }, - { - "epoch": 0.1993627126796128, - "grad_norm": 1.7375231514904577, - "learning_rate": 3.70672358924499e-06, - "loss": 0.9652, - "step": 1658 - }, - { - "epoch": 0.19948295557025192, - "grad_norm": 1.8657491722508905, - "learning_rate": 3.706317366970386e-06, - "loss": 1.0141, - "step": 1659 - }, - { - "epoch": 0.199603198460891, - "grad_norm": 1.7482244475974034, - "learning_rate": 3.705910885848795e-06, - "loss": 1.0689, - "step": 1660 - }, - { - "epoch": 0.19972344135153008, - "grad_norm": 3.657290318828448, - "learning_rate": 3.705504145941879e-06, - "loss": 1.0673, - "step": 1661 - }, - { - "epoch": 0.1998436842421692, - "grad_norm": 2.056653956472402, - "learning_rate": 3.7050971473113403e-06, - "loss": 1.0248, - "step": 1662 - }, - { - "epoch": 0.19996392713280828, - "grad_norm": 2.2897950129819593, - "learning_rate": 3.7046898900189196e-06, - "loss": 1.0295, - "step": 1663 - }, - { - "epoch": 0.20008417002344736, - "grad_norm": 1.5076282764219247, - "learning_rate": 3.704282374126398e-06, - "loss": 1.0652, - "step": 1664 - }, - { - "epoch": 0.20020441291408644, - "grad_norm": 2.3859519413459176, - "learning_rate": 3.7038745996955954e-06, - "loss": 1.1084, - "step": 1665 - }, - { - "epoch": 0.20032465580472555, - "grad_norm": 3.28348661216631, - "learning_rate": 3.703466566788371e-06, - "loss": 0.9512, - "step": 1666 - }, - { - "epoch": 0.20044489869536464, - "grad_norm": 1.9654420582555319, - "learning_rate": 3.703058275466622e-06, - "loss": 0.9741, - "step": 1667 - }, - { - "epoch": 0.20056514158600372, - "grad_norm": 1.6772099875571485, - "learning_rate": 3.7026497257922877e-06, - "loss": 1.0053, - "step": 1668 - }, - { - "epoch": 0.20068538447664283, - "grad_norm": 1.5113721346665125, - "learning_rate": 3.7022409178273436e-06, - "loss": 1.0778, - "step": 1669 - }, - { - "epoch": 0.2008056273672819, - "grad_norm": 1.673273517684461, - "learning_rate": 3.7018318516338054e-06, - "loss": 1.0103, - "step": 1670 - }, - { - "epoch": 0.200925870257921, - "grad_norm": 4.536593318760161, - "learning_rate": 3.7014225272737284e-06, - "loss": 1.043, - "step": 1671 - }, - { - "epoch": 0.20104611314856008, - "grad_norm": 1.700574101952002, - "learning_rate": 3.701012944809207e-06, - "loss": 0.9691, - "step": 1672 - }, - { - "epoch": 0.2011663560391992, - "grad_norm": 1.9033745512811187, - "learning_rate": 3.700603104302374e-06, - "loss": 1.0119, - "step": 1673 - }, - { - "epoch": 0.20128659892983827, - "grad_norm": 0.8699005576555005, - "learning_rate": 3.7001930058154027e-06, - "loss": 0.8165, - "step": 1674 - }, - { - "epoch": 0.20140684182047736, - "grad_norm": 3.6836047531400484, - "learning_rate": 3.6997826494105037e-06, - "loss": 1.0286, - "step": 1675 - }, - { - "epoch": 0.20152708471111647, - "grad_norm": 5.3160352724625906, - "learning_rate": 3.6993720351499286e-06, - "loss": 0.9267, - "step": 1676 - }, - { - "epoch": 0.20164732760175555, - "grad_norm": 1.7141130493833492, - "learning_rate": 3.6989611630959666e-06, - "loss": 1.0058, - "step": 1677 - }, - { - "epoch": 0.20176757049239463, - "grad_norm": 0.7095111749829524, - "learning_rate": 3.6985500333109474e-06, - "loss": 0.8453, - "step": 1678 - }, - { - "epoch": 0.20188781338303372, - "grad_norm": 3.803570781275325, - "learning_rate": 3.6981386458572385e-06, - "loss": 0.9897, - "step": 1679 - }, - { - "epoch": 0.20200805627367283, - "grad_norm": 3.078335672517751, - "learning_rate": 3.6977270007972468e-06, - "loss": 0.9953, - "step": 1680 - }, - { - "epoch": 0.2021282991643119, - "grad_norm": 2.0717771711311204, - "learning_rate": 3.6973150981934196e-06, - "loss": 0.9428, - "step": 1681 - }, - { - "epoch": 0.202248542054951, - "grad_norm": 2.465502667504722, - "learning_rate": 3.6969029381082415e-06, - "loss": 1.0649, - "step": 1682 - }, - { - "epoch": 0.2023687849455901, - "grad_norm": 1.9019568204501094, - "learning_rate": 3.696490520604237e-06, - "loss": 1.0269, - "step": 1683 - }, - { - "epoch": 0.20248902783622919, - "grad_norm": 1.5399463265218418, - "learning_rate": 3.696077845743968e-06, - "loss": 1.0367, - "step": 1684 - }, - { - "epoch": 0.20260927072686827, - "grad_norm": 11.547435948189497, - "learning_rate": 3.69566491359004e-06, - "loss": 0.9641, - "step": 1685 - }, - { - "epoch": 0.20272951361750738, - "grad_norm": 1.8018390585509911, - "learning_rate": 3.695251724205092e-06, - "loss": 0.9336, - "step": 1686 - }, - { - "epoch": 0.20284975650814646, - "grad_norm": 1.4579698745482816, - "learning_rate": 3.6948382776518054e-06, - "loss": 1.0877, - "step": 1687 - }, - { - "epoch": 0.20296999939878554, - "grad_norm": 2.077032275195691, - "learning_rate": 3.6944245739929e-06, - "loss": 1.0195, - "step": 1688 - }, - { - "epoch": 0.20309024228942463, - "grad_norm": 1.938338928717521, - "learning_rate": 3.6940106132911332e-06, - "loss": 0.951, - "step": 1689 - }, - { - "epoch": 0.20321048518006374, - "grad_norm": 2.157873896692835, - "learning_rate": 3.6935963956093037e-06, - "loss": 1.1075, - "step": 1690 - }, - { - "epoch": 0.20333072807070282, - "grad_norm": 1.626416982394908, - "learning_rate": 3.6931819210102474e-06, - "loss": 0.915, - "step": 1691 - }, - { - "epoch": 0.2034509709613419, - "grad_norm": 1.72112643937674, - "learning_rate": 3.6927671895568402e-06, - "loss": 1.0756, - "step": 1692 - }, - { - "epoch": 0.20357121385198101, - "grad_norm": 1.7492045229443551, - "learning_rate": 3.692352201311996e-06, - "loss": 1.1004, - "step": 1693 - }, - { - "epoch": 0.2036914567426201, - "grad_norm": 1.9370871073171254, - "learning_rate": 3.6919369563386687e-06, - "loss": 0.9885, - "step": 1694 - }, - { - "epoch": 0.20381169963325918, - "grad_norm": 1.9324152273754291, - "learning_rate": 3.69152145469985e-06, - "loss": 1.0239, - "step": 1695 - }, - { - "epoch": 0.20393194252389826, - "grad_norm": 1.8592511304098152, - "learning_rate": 3.691105696458572e-06, - "loss": 1.0453, - "step": 1696 - }, - { - "epoch": 0.20405218541453737, - "grad_norm": 2.372964187248949, - "learning_rate": 3.690689681677904e-06, - "loss": 0.8995, - "step": 1697 - }, - { - "epoch": 0.20417242830517646, - "grad_norm": 3.479388719123612, - "learning_rate": 3.690273410420956e-06, - "loss": 1.1095, - "step": 1698 - }, - { - "epoch": 0.20429267119581554, - "grad_norm": 2.590665246814228, - "learning_rate": 3.689856882750875e-06, - "loss": 1.0003, - "step": 1699 - }, - { - "epoch": 0.20441291408645465, - "grad_norm": 1.7457931225621433, - "learning_rate": 3.6894400987308486e-06, - "loss": 1.0207, - "step": 1700 - }, - { - "epoch": 0.20453315697709373, - "grad_norm": 1.7370883920653388, - "learning_rate": 3.6890230584241024e-06, - "loss": 1.0783, - "step": 1701 - }, - { - "epoch": 0.20465339986773282, - "grad_norm": 0.9083121285602815, - "learning_rate": 3.6886057618939016e-06, - "loss": 0.9316, - "step": 1702 - }, - { - "epoch": 0.2047736427583719, - "grad_norm": 2.686360139643552, - "learning_rate": 3.6881882092035492e-06, - "loss": 0.9258, - "step": 1703 - }, - { - "epoch": 0.204893885649011, - "grad_norm": 1.0148913741592118, - "learning_rate": 3.6877704004163873e-06, - "loss": 0.9101, - "step": 1704 - }, - { - "epoch": 0.2050141285396501, - "grad_norm": 1.754504673062067, - "learning_rate": 3.6873523355957984e-06, - "loss": 1.0108, - "step": 1705 - }, - { - "epoch": 0.20513437143028918, - "grad_norm": 1.0644658919102497, - "learning_rate": 3.686934014805201e-06, - "loss": 0.9521, - "step": 1706 - }, - { - "epoch": 0.20525461432092829, - "grad_norm": 1.7303949000184222, - "learning_rate": 3.6865154381080552e-06, - "loss": 1.0353, - "step": 1707 - }, - { - "epoch": 0.20537485721156737, - "grad_norm": 1.8130234587256857, - "learning_rate": 3.6860966055678585e-06, - "loss": 1.0543, - "step": 1708 - }, - { - "epoch": 0.20549510010220645, - "grad_norm": 1.893294790215906, - "learning_rate": 3.685677517248147e-06, - "loss": 1.0885, - "step": 1709 - }, - { - "epoch": 0.20561534299284553, - "grad_norm": 1.6516764851396022, - "learning_rate": 3.6852581732124967e-06, - "loss": 1.0277, - "step": 1710 - }, - { - "epoch": 0.20573558588348465, - "grad_norm": 3.601762679157617, - "learning_rate": 3.6848385735245213e-06, - "loss": 0.9867, - "step": 1711 - }, - { - "epoch": 0.20585582877412373, - "grad_norm": 1.7913816654133756, - "learning_rate": 3.6844187182478734e-06, - "loss": 1.0986, - "step": 1712 - }, - { - "epoch": 0.2059760716647628, - "grad_norm": 1.61599090092524, - "learning_rate": 3.683998607446246e-06, - "loss": 0.9817, - "step": 1713 - }, - { - "epoch": 0.20609631455540192, - "grad_norm": 1.9031916614502131, - "learning_rate": 3.6835782411833686e-06, - "loss": 0.9757, - "step": 1714 - }, - { - "epoch": 0.206216557446041, - "grad_norm": 2.1595672301037285, - "learning_rate": 3.68315761952301e-06, - "loss": 0.9774, - "step": 1715 - }, - { - "epoch": 0.2063368003366801, - "grad_norm": 1.9591639427881142, - "learning_rate": 3.6827367425289797e-06, - "loss": 1.0607, - "step": 1716 - }, - { - "epoch": 0.2064570432273192, - "grad_norm": 2.74646419018037, - "learning_rate": 3.6823156102651225e-06, - "loss": 0.9539, - "step": 1717 - }, - { - "epoch": 0.20657728611795828, - "grad_norm": 1.5449471839346034, - "learning_rate": 3.6818942227953257e-06, - "loss": 0.9387, - "step": 1718 - }, - { - "epoch": 0.20669752900859736, - "grad_norm": 2.036677577191903, - "learning_rate": 3.681472580183512e-06, - "loss": 0.9203, - "step": 1719 - }, - { - "epoch": 0.20681777189923645, - "grad_norm": 1.9211285389794328, - "learning_rate": 3.6810506824936455e-06, - "loss": 1.0885, - "step": 1720 - }, - { - "epoch": 0.20693801478987556, - "grad_norm": 1.1316737798561816, - "learning_rate": 3.680628529789726e-06, - "loss": 0.8947, - "step": 1721 - }, - { - "epoch": 0.20705825768051464, - "grad_norm": 2.0911092473903614, - "learning_rate": 3.680206122135796e-06, - "loss": 1.0835, - "step": 1722 - }, - { - "epoch": 0.20717850057115372, - "grad_norm": 1.6764894780648807, - "learning_rate": 3.6797834595959323e-06, - "loss": 1.0111, - "step": 1723 - }, - { - "epoch": 0.20729874346179283, - "grad_norm": 2.3458170613006275, - "learning_rate": 3.679360542234254e-06, - "loss": 1.0037, - "step": 1724 - }, - { - "epoch": 0.20741898635243192, - "grad_norm": 1.8189196845008169, - "learning_rate": 3.678937370114916e-06, - "loss": 0.9543, - "step": 1725 - }, - { - "epoch": 0.207539229243071, - "grad_norm": 1.8593358268137494, - "learning_rate": 3.678513943302114e-06, - "loss": 1.0208, - "step": 1726 - }, - { - "epoch": 0.20765947213371008, - "grad_norm": 1.5865182799720736, - "learning_rate": 3.678090261860082e-06, - "loss": 1.08, - "step": 1727 - }, - { - "epoch": 0.2077797150243492, - "grad_norm": 2.5423978993968315, - "learning_rate": 3.6776663258530906e-06, - "loss": 1.0098, - "step": 1728 - }, - { - "epoch": 0.20789995791498828, - "grad_norm": 1.7122440313902736, - "learning_rate": 3.6772421353454516e-06, - "loss": 0.9441, - "step": 1729 - }, - { - "epoch": 0.20802020080562736, - "grad_norm": 1.8031697021443782, - "learning_rate": 3.6768176904015153e-06, - "loss": 1.1086, - "step": 1730 - }, - { - "epoch": 0.20814044369626647, - "grad_norm": 1.6593534076672352, - "learning_rate": 3.6763929910856674e-06, - "loss": 0.8268, - "step": 1731 - }, - { - "epoch": 0.20826068658690555, - "grad_norm": 2.178246024886556, - "learning_rate": 3.6759680374623365e-06, - "loss": 1.0002, - "step": 1732 - }, - { - "epoch": 0.20838092947754464, - "grad_norm": 27.50774260677005, - "learning_rate": 3.675542829595986e-06, - "loss": 0.9848, - "step": 1733 - }, - { - "epoch": 0.20850117236818372, - "grad_norm": 1.5785540007089707, - "learning_rate": 3.6751173675511213e-06, - "loss": 1.0214, - "step": 1734 - }, - { - "epoch": 0.20862141525882283, - "grad_norm": 1.9214082476756051, - "learning_rate": 3.674691651392283e-06, - "loss": 1.1081, - "step": 1735 - }, - { - "epoch": 0.2087416581494619, - "grad_norm": 2.7732972015570314, - "learning_rate": 3.674265681184053e-06, - "loss": 0.995, - "step": 1736 - }, - { - "epoch": 0.208861901040101, - "grad_norm": 1.7325590782496405, - "learning_rate": 3.6738394569910504e-06, - "loss": 1.0913, - "step": 1737 - }, - { - "epoch": 0.2089821439307401, - "grad_norm": 2.189056496676614, - "learning_rate": 3.6734129788779333e-06, - "loss": 1.0582, - "step": 1738 - }, - { - "epoch": 0.2091023868213792, - "grad_norm": 1.7430092280674874, - "learning_rate": 3.6729862469093976e-06, - "loss": 1.1288, - "step": 1739 - }, - { - "epoch": 0.20922262971201827, - "grad_norm": 2.422600323752104, - "learning_rate": 3.6725592611501782e-06, - "loss": 1.0514, - "step": 1740 - }, - { - "epoch": 0.20934287260265738, - "grad_norm": 1.648269789422021, - "learning_rate": 3.6721320216650496e-06, - "loss": 0.9956, - "step": 1741 - }, - { - "epoch": 0.20946311549329646, - "grad_norm": 2.019416968176068, - "learning_rate": 3.6717045285188215e-06, - "loss": 1.0687, - "step": 1742 - }, - { - "epoch": 0.20958335838393555, - "grad_norm": 2.546472061204696, - "learning_rate": 3.671276781776346e-06, - "loss": 1.0822, - "step": 1743 - }, - { - "epoch": 0.20970360127457463, - "grad_norm": 2.1276511315929016, - "learning_rate": 3.6708487815025128e-06, - "loss": 0.8975, - "step": 1744 - }, - { - "epoch": 0.20982384416521374, - "grad_norm": 1.884835450455283, - "learning_rate": 3.6704205277622463e-06, - "loss": 0.9786, - "step": 1745 - }, - { - "epoch": 0.20994408705585282, - "grad_norm": 3.8964860882671672, - "learning_rate": 3.6699920206205146e-06, - "loss": 1.0279, - "step": 1746 - }, - { - "epoch": 0.2100643299464919, - "grad_norm": 1.6182026648420627, - "learning_rate": 3.669563260142321e-06, - "loss": 1.0515, - "step": 1747 - }, - { - "epoch": 0.21018457283713102, - "grad_norm": 2.0322170596471114, - "learning_rate": 3.6691342463927083e-06, - "loss": 1.0652, - "step": 1748 - }, - { - "epoch": 0.2103048157277701, - "grad_norm": 1.5953927642925982, - "learning_rate": 3.668704979436758e-06, - "loss": 1.0486, - "step": 1749 - }, - { - "epoch": 0.21042505861840918, - "grad_norm": 2.469731871997802, - "learning_rate": 3.668275459339588e-06, - "loss": 1.0169, - "step": 1750 - }, - { - "epoch": 0.21054530150904827, - "grad_norm": 1.7201600966132038, - "learning_rate": 3.667845686166358e-06, - "loss": 1.0389, - "step": 1751 - }, - { - "epoch": 0.21066554439968738, - "grad_norm": 1.831430300016121, - "learning_rate": 3.6674156599822634e-06, - "loss": 1.0868, - "step": 1752 - }, - { - "epoch": 0.21078578729032646, - "grad_norm": 2.332488406562409, - "learning_rate": 3.666985380852539e-06, - "loss": 1.0383, - "step": 1753 - }, - { - "epoch": 0.21090603018096554, - "grad_norm": 2.0891817274287483, - "learning_rate": 3.6665548488424576e-06, - "loss": 0.9838, - "step": 1754 - }, - { - "epoch": 0.21102627307160465, - "grad_norm": 1.5725939674381082, - "learning_rate": 3.6661240640173307e-06, - "loss": 1.1081, - "step": 1755 - }, - { - "epoch": 0.21114651596224374, - "grad_norm": 0.9135674334161433, - "learning_rate": 3.6656930264425085e-06, - "loss": 0.8679, - "step": 1756 - }, - { - "epoch": 0.21126675885288282, - "grad_norm": 1.8493543890203858, - "learning_rate": 3.665261736183378e-06, - "loss": 0.9963, - "step": 1757 - }, - { - "epoch": 0.2113870017435219, - "grad_norm": 2.40747792012992, - "learning_rate": 3.664830193305366e-06, - "loss": 1.1175, - "step": 1758 - }, - { - "epoch": 0.211507244634161, - "grad_norm": 2.5960134718241226, - "learning_rate": 3.6643983978739373e-06, - "loss": 0.9851, - "step": 1759 - }, - { - "epoch": 0.2116274875248001, - "grad_norm": 2.598196240279577, - "learning_rate": 3.663966349954596e-06, - "loss": 1.0442, - "step": 1760 - }, - { - "epoch": 0.21174773041543918, - "grad_norm": 0.7665851955731933, - "learning_rate": 3.6635340496128816e-06, - "loss": 0.849, - "step": 1761 - }, - { - "epoch": 0.2118679733060783, - "grad_norm": 1.42505206966636, - "learning_rate": 3.6631014969143747e-06, - "loss": 1.1465, - "step": 1762 - }, - { - "epoch": 0.21198821619671737, - "grad_norm": 2.118036416667358, - "learning_rate": 3.662668691924693e-06, - "loss": 1.1166, - "step": 1763 - }, - { - "epoch": 0.21210845908735645, - "grad_norm": 1.896564203029942, - "learning_rate": 3.6622356347094927e-06, - "loss": 0.9437, - "step": 1764 - }, - { - "epoch": 0.21222870197799554, - "grad_norm": 1.961876389866653, - "learning_rate": 3.6618023253344684e-06, - "loss": 1.0163, - "step": 1765 - }, - { - "epoch": 0.21234894486863465, - "grad_norm": 1.5363652950070659, - "learning_rate": 3.6613687638653527e-06, - "loss": 1.0642, - "step": 1766 - }, - { - "epoch": 0.21246918775927373, - "grad_norm": 1.828746396606817, - "learning_rate": 3.660934950367916e-06, - "loss": 1.0072, - "step": 1767 - }, - { - "epoch": 0.21258943064991281, - "grad_norm": 1.561734736915578, - "learning_rate": 3.660500884907968e-06, - "loss": 1.0621, - "step": 1768 - }, - { - "epoch": 0.21270967354055192, - "grad_norm": 0.8449810725129415, - "learning_rate": 3.660066567551356e-06, - "loss": 0.8565, - "step": 1769 - }, - { - "epoch": 0.212829916431191, - "grad_norm": 2.318452237076332, - "learning_rate": 3.6596319983639657e-06, - "loss": 1.0656, - "step": 1770 - }, - { - "epoch": 0.2129501593218301, - "grad_norm": 1.6572073562758964, - "learning_rate": 3.6591971774117214e-06, - "loss": 1.0939, - "step": 1771 - }, - { - "epoch": 0.2130704022124692, - "grad_norm": 1.897122276817449, - "learning_rate": 3.6587621047605833e-06, - "loss": 1.0393, - "step": 1772 - }, - { - "epoch": 0.21319064510310828, - "grad_norm": 1.8083849265243472, - "learning_rate": 3.6583267804765542e-06, - "loss": 1.0974, - "step": 1773 - }, - { - "epoch": 0.21331088799374737, - "grad_norm": 2.0221267048258555, - "learning_rate": 3.6578912046256702e-06, - "loss": 1.0796, - "step": 1774 - }, - { - "epoch": 0.21343113088438645, - "grad_norm": 2.0783830066649887, - "learning_rate": 3.6574553772740083e-06, - "loss": 0.9919, - "step": 1775 - }, - { - "epoch": 0.21355137377502556, - "grad_norm": 0.8726050092907026, - "learning_rate": 3.657019298487684e-06, - "loss": 0.8935, - "step": 1776 - }, - { - "epoch": 0.21367161666566464, - "grad_norm": 1.6403377463137925, - "learning_rate": 3.6565829683328495e-06, - "loss": 1.0597, - "step": 1777 - }, - { - "epoch": 0.21379185955630373, - "grad_norm": 1.8257316507532353, - "learning_rate": 3.6561463868756965e-06, - "loss": 1.0851, - "step": 1778 - }, - { - "epoch": 0.21391210244694284, - "grad_norm": 1.7394275785909339, - "learning_rate": 3.655709554182452e-06, - "loss": 1.0078, - "step": 1779 - }, - { - "epoch": 0.21403234533758192, - "grad_norm": 1.6361413633740831, - "learning_rate": 3.6552724703193855e-06, - "loss": 1.0791, - "step": 1780 - }, - { - "epoch": 0.214152588228221, - "grad_norm": 0.7734592319094157, - "learning_rate": 3.654835135352801e-06, - "loss": 0.8134, - "step": 1781 - }, - { - "epoch": 0.21427283111886009, - "grad_norm": 1.678300577707128, - "learning_rate": 3.654397549349043e-06, - "loss": 1.0999, - "step": 1782 - }, - { - "epoch": 0.2143930740094992, - "grad_norm": 1.944679433005016, - "learning_rate": 3.653959712374491e-06, - "loss": 0.9863, - "step": 1783 - }, - { - "epoch": 0.21451331690013828, - "grad_norm": 1.5609758190992262, - "learning_rate": 3.6535216244955663e-06, - "loss": 1.0532, - "step": 1784 - }, - { - "epoch": 0.21463355979077736, - "grad_norm": 1.6887273916542493, - "learning_rate": 3.653083285778726e-06, - "loss": 0.9341, - "step": 1785 - }, - { - "epoch": 0.21475380268141647, - "grad_norm": 2.2511673124506406, - "learning_rate": 3.6526446962904653e-06, - "loss": 1.0283, - "step": 1786 - }, - { - "epoch": 0.21487404557205556, - "grad_norm": 1.4054212883425525, - "learning_rate": 3.652205856097318e-06, - "loss": 0.9742, - "step": 1787 - }, - { - "epoch": 0.21499428846269464, - "grad_norm": 1.7914413561835212, - "learning_rate": 3.651766765265856e-06, - "loss": 1.0146, - "step": 1788 - }, - { - "epoch": 0.21511453135333372, - "grad_norm": 2.19611091620631, - "learning_rate": 3.65132742386269e-06, - "loss": 1.0285, - "step": 1789 - }, - { - "epoch": 0.21523477424397283, - "grad_norm": 1.6719580656432904, - "learning_rate": 3.6508878319544656e-06, - "loss": 1.0813, - "step": 1790 - }, - { - "epoch": 0.21535501713461191, - "grad_norm": 2.1538811539331864, - "learning_rate": 3.65044798960787e-06, - "loss": 1.0376, - "step": 1791 - }, - { - "epoch": 0.215475260025251, - "grad_norm": 1.9772776640508523, - "learning_rate": 3.650007896889627e-06, - "loss": 1.007, - "step": 1792 - }, - { - "epoch": 0.2155955029158901, - "grad_norm": 1.7214026355315923, - "learning_rate": 3.6495675538664974e-06, - "loss": 1.0312, - "step": 1793 - }, - { - "epoch": 0.2157157458065292, - "grad_norm": 1.8430081184195979, - "learning_rate": 3.649126960605282e-06, - "loss": 1.0513, - "step": 1794 - }, - { - "epoch": 0.21583598869716827, - "grad_norm": 2.258339157190825, - "learning_rate": 3.6486861171728174e-06, - "loss": 1.0644, - "step": 1795 - }, - { - "epoch": 0.21595623158780738, - "grad_norm": 1.5783699525387027, - "learning_rate": 3.6482450236359803e-06, - "loss": 1.0163, - "step": 1796 - }, - { - "epoch": 0.21607647447844647, - "grad_norm": 2.0328433316683308, - "learning_rate": 3.647803680061683e-06, - "loss": 0.9992, - "step": 1797 - }, - { - "epoch": 0.21619671736908555, - "grad_norm": 2.6060134341229606, - "learning_rate": 3.6473620865168776e-06, - "loss": 0.9898, - "step": 1798 - }, - { - "epoch": 0.21631696025972463, - "grad_norm": 1.8621346006451305, - "learning_rate": 3.646920243068554e-06, - "loss": 1.0495, - "step": 1799 - }, - { - "epoch": 0.21643720315036374, - "grad_norm": 1.6842684710331899, - "learning_rate": 3.6464781497837384e-06, - "loss": 0.9725, - "step": 1800 - }, - { - "epoch": 0.21655744604100283, - "grad_norm": 1.606928475023183, - "learning_rate": 3.6460358067294965e-06, - "loss": 0.9601, - "step": 1801 - }, - { - "epoch": 0.2166776889316419, - "grad_norm": 1.87783929108356, - "learning_rate": 3.645593213972932e-06, - "loss": 1.007, - "step": 1802 - }, - { - "epoch": 0.21679793182228102, - "grad_norm": 2.7330525715388583, - "learning_rate": 3.6451503715811852e-06, - "loss": 1.0233, - "step": 1803 - }, - { - "epoch": 0.2169181747129201, - "grad_norm": 2.152380304320375, - "learning_rate": 3.6447072796214345e-06, - "loss": 1.0294, - "step": 1804 - }, - { - "epoch": 0.21703841760355919, - "grad_norm": 0.986934211264071, - "learning_rate": 3.644263938160898e-06, - "loss": 0.9004, - "step": 1805 - }, - { - "epoch": 0.21715866049419827, - "grad_norm": 1.7122258600378837, - "learning_rate": 3.6438203472668293e-06, - "loss": 0.9445, - "step": 1806 - }, - { - "epoch": 0.21727890338483738, - "grad_norm": 2.6061641102641455, - "learning_rate": 3.6433765070065206e-06, - "loss": 1.0491, - "step": 1807 - }, - { - "epoch": 0.21739914627547646, - "grad_norm": 2.434790156446036, - "learning_rate": 3.6429324174473025e-06, - "loss": 1.1096, - "step": 1808 - }, - { - "epoch": 0.21751938916611555, - "grad_norm": 1.8946640754622033, - "learning_rate": 3.6424880786565425e-06, - "loss": 1.0839, - "step": 1809 - }, - { - "epoch": 0.21763963205675466, - "grad_norm": 2.4188878495710564, - "learning_rate": 3.6420434907016482e-06, - "loss": 1.0207, - "step": 1810 - }, - { - "epoch": 0.21775987494739374, - "grad_norm": 1.6325524227961228, - "learning_rate": 3.6415986536500606e-06, - "loss": 1.0424, - "step": 1811 - }, - { - "epoch": 0.21788011783803282, - "grad_norm": 2.6275015219585103, - "learning_rate": 3.641153567569263e-06, - "loss": 1.0428, - "step": 1812 - }, - { - "epoch": 0.2180003607286719, - "grad_norm": 4.511736928399525, - "learning_rate": 3.640708232526774e-06, - "loss": 1.1782, - "step": 1813 - }, - { - "epoch": 0.21812060361931102, - "grad_norm": 2.313043478946742, - "learning_rate": 3.6402626485901504e-06, - "loss": 1.014, - "step": 1814 - }, - { - "epoch": 0.2182408465099501, - "grad_norm": 2.2616920799040123, - "learning_rate": 3.639816815826988e-06, - "loss": 1.0049, - "step": 1815 - }, - { - "epoch": 0.21836108940058918, - "grad_norm": 2.3832989305623062, - "learning_rate": 3.6393707343049176e-06, - "loss": 1.0096, - "step": 1816 - }, - { - "epoch": 0.2184813322912283, - "grad_norm": 2.6447696099420175, - "learning_rate": 3.6389244040916104e-06, - "loss": 0.9664, - "step": 1817 - }, - { - "epoch": 0.21860157518186737, - "grad_norm": 2.301320451593574, - "learning_rate": 3.6384778252547747e-06, - "loss": 1.0141, - "step": 1818 - }, - { - "epoch": 0.21872181807250646, - "grad_norm": 2.0930278158877473, - "learning_rate": 3.638030997862155e-06, - "loss": 1.0075, - "step": 1819 - }, - { - "epoch": 0.21884206096314554, - "grad_norm": 0.837944366936819, - "learning_rate": 3.6375839219815356e-06, - "loss": 0.8563, - "step": 1820 - }, - { - "epoch": 0.21896230385378465, - "grad_norm": 2.0183110046201627, - "learning_rate": 3.6371365976807375e-06, - "loss": 1.0593, - "step": 1821 - }, - { - "epoch": 0.21908254674442373, - "grad_norm": 2.955041656975077, - "learning_rate": 3.6366890250276185e-06, - "loss": 1.0629, - "step": 1822 - }, - { - "epoch": 0.21920278963506282, - "grad_norm": 1.6552735815357607, - "learning_rate": 3.6362412040900764e-06, - "loss": 1.1269, - "step": 1823 - }, - { - "epoch": 0.21932303252570193, - "grad_norm": 1.9683581976594637, - "learning_rate": 3.635793134936044e-06, - "loss": 1.0281, - "step": 1824 - }, - { - "epoch": 0.219443275416341, - "grad_norm": 2.409175926919947, - "learning_rate": 3.635344817633494e-06, - "loss": 0.9598, - "step": 1825 - }, - { - "epoch": 0.2195635183069801, - "grad_norm": 2.236119263070084, - "learning_rate": 3.634896252250436e-06, - "loss": 0.9867, - "step": 1826 - }, - { - "epoch": 0.2196837611976192, - "grad_norm": 1.7914436186708034, - "learning_rate": 3.6344474388549157e-06, - "loss": 1.0559, - "step": 1827 - }, - { - "epoch": 0.2198040040882583, - "grad_norm": 2.048747712949229, - "learning_rate": 3.6339983775150183e-06, - "loss": 1.0392, - "step": 1828 - }, - { - "epoch": 0.21992424697889737, - "grad_norm": 3.2949911945786856, - "learning_rate": 3.6335490682988664e-06, - "loss": 1.0744, - "step": 1829 - }, - { - "epoch": 0.22004448986953645, - "grad_norm": 1.8387899114139905, - "learning_rate": 3.63309951127462e-06, - "loss": 1.0602, - "step": 1830 - }, - { - "epoch": 0.22016473276017556, - "grad_norm": 2.4634249271748008, - "learning_rate": 3.6326497065104757e-06, - "loss": 0.9817, - "step": 1831 - }, - { - "epoch": 0.22028497565081465, - "grad_norm": 1.9540558084739073, - "learning_rate": 3.6321996540746697e-06, - "loss": 1.0083, - "step": 1832 - }, - { - "epoch": 0.22040521854145373, - "grad_norm": 1.7862076949601668, - "learning_rate": 3.6317493540354733e-06, - "loss": 1.0268, - "step": 1833 - }, - { - "epoch": 0.22052546143209284, - "grad_norm": 2.2536539501810715, - "learning_rate": 3.6312988064611976e-06, - "loss": 1.0043, - "step": 1834 - }, - { - "epoch": 0.22064570432273192, - "grad_norm": 2.17176818584997, - "learning_rate": 3.6308480114201896e-06, - "loss": 1.0355, - "step": 1835 - }, - { - "epoch": 0.220765947213371, - "grad_norm": 1.8129540894045542, - "learning_rate": 3.630396968980835e-06, - "loss": 0.9934, - "step": 1836 - }, - { - "epoch": 0.2208861901040101, - "grad_norm": 2.6474479694675015, - "learning_rate": 3.6299456792115575e-06, - "loss": 1.0692, - "step": 1837 - }, - { - "epoch": 0.2210064329946492, - "grad_norm": 1.5962069403591772, - "learning_rate": 3.629494142180815e-06, - "loss": 1.0438, - "step": 1838 - }, - { - "epoch": 0.22112667588528828, - "grad_norm": 2.224450645778996, - "learning_rate": 3.6290423579571075e-06, - "loss": 1.078, - "step": 1839 - }, - { - "epoch": 0.22124691877592736, - "grad_norm": 2.6094603381792876, - "learning_rate": 3.6285903266089694e-06, - "loss": 1.0262, - "step": 1840 - }, - { - "epoch": 0.22136716166656648, - "grad_norm": 1.7825413088940796, - "learning_rate": 3.628138048204974e-06, - "loss": 0.9979, - "step": 1841 - }, - { - "epoch": 0.22148740455720556, - "grad_norm": 1.751151931924835, - "learning_rate": 3.6276855228137304e-06, - "loss": 1.0053, - "step": 1842 - }, - { - "epoch": 0.22160764744784464, - "grad_norm": 2.0420669111447562, - "learning_rate": 3.6272327505038874e-06, - "loss": 1.0455, - "step": 1843 - }, - { - "epoch": 0.22172789033848372, - "grad_norm": 1.7518981448661197, - "learning_rate": 3.626779731344131e-06, - "loss": 1.01, - "step": 1844 - }, - { - "epoch": 0.22184813322912283, - "grad_norm": 1.8918538159235732, - "learning_rate": 3.6263264654031814e-06, - "loss": 1.0839, - "step": 1845 - }, - { - "epoch": 0.22196837611976192, - "grad_norm": 0.6940841110829087, - "learning_rate": 3.6258729527498008e-06, - "loss": 0.8339, - "step": 1846 - }, - { - "epoch": 0.222088619010401, - "grad_norm": 2.4917495964649325, - "learning_rate": 3.6254191934527854e-06, - "loss": 0.8627, - "step": 1847 - }, - { - "epoch": 0.2222088619010401, - "grad_norm": 11.454384603100342, - "learning_rate": 3.6249651875809715e-06, - "loss": 0.8702, - "step": 1848 - }, - { - "epoch": 0.2223291047916792, - "grad_norm": 3.173950869020741, - "learning_rate": 3.62451093520323e-06, - "loss": 1.1214, - "step": 1849 - }, - { - "epoch": 0.22244934768231828, - "grad_norm": 1.9738149129522382, - "learning_rate": 3.6240564363884714e-06, - "loss": 1.1212, - "step": 1850 - }, - { - "epoch": 0.2225695905729574, - "grad_norm": 2.086611178191533, - "learning_rate": 3.623601691205643e-06, - "loss": 0.9299, - "step": 1851 - }, - { - "epoch": 0.22268983346359647, - "grad_norm": 2.40711553290473, - "learning_rate": 3.623146699723729e-06, - "loss": 1.0442, - "step": 1852 - }, - { - "epoch": 0.22281007635423555, - "grad_norm": 2.057797475193702, - "learning_rate": 3.6226914620117507e-06, - "loss": 1.0143, - "step": 1853 - }, - { - "epoch": 0.22293031924487464, - "grad_norm": 2.1532369376928706, - "learning_rate": 3.622235978138768e-06, - "loss": 1.0293, - "step": 1854 - }, - { - "epoch": 0.22305056213551375, - "grad_norm": 2.0968752637882893, - "learning_rate": 3.621780248173877e-06, - "loss": 1.048, - "step": 1855 - }, - { - "epoch": 0.22317080502615283, - "grad_norm": 0.873006218085151, - "learning_rate": 3.6213242721862125e-06, - "loss": 0.8853, - "step": 1856 - }, - { - "epoch": 0.2232910479167919, - "grad_norm": 2.3052960125888298, - "learning_rate": 3.620868050244945e-06, - "loss": 0.9749, - "step": 1857 - }, - { - "epoch": 0.22341129080743102, - "grad_norm": 1.71694622757191, - "learning_rate": 3.6204115824192817e-06, - "loss": 1.0033, - "step": 1858 - }, - { - "epoch": 0.2235315336980701, - "grad_norm": 2.5747546736506925, - "learning_rate": 3.619954868778471e-06, - "loss": 1.001, - "step": 1859 - }, - { - "epoch": 0.2236517765887092, - "grad_norm": 1.6478640747680413, - "learning_rate": 3.6194979093917944e-06, - "loss": 1.0451, - "step": 1860 - }, - { - "epoch": 0.22377201947934827, - "grad_norm": 2.958258471190955, - "learning_rate": 3.6190407043285724e-06, - "loss": 1.1096, - "step": 1861 - }, - { - "epoch": 0.22389226236998738, - "grad_norm": 1.8315844285788332, - "learning_rate": 3.618583253658163e-06, - "loss": 0.9742, - "step": 1862 - }, - { - "epoch": 0.22401250526062647, - "grad_norm": 1.8561887608968308, - "learning_rate": 3.618125557449961e-06, - "loss": 1.0973, - "step": 1863 - }, - { - "epoch": 0.22413274815126555, - "grad_norm": 1.7128845005948095, - "learning_rate": 3.6176676157733983e-06, - "loss": 1.0635, - "step": 1864 - }, - { - "epoch": 0.22425299104190466, - "grad_norm": 2.209754762290728, - "learning_rate": 3.6172094286979443e-06, - "loss": 0.9758, - "step": 1865 - }, - { - "epoch": 0.22437323393254374, - "grad_norm": 1.9284871186391686, - "learning_rate": 3.6167509962931064e-06, - "loss": 1.0341, - "step": 1866 - }, - { - "epoch": 0.22449347682318282, - "grad_norm": 2.875863650425702, - "learning_rate": 3.6162923186284276e-06, - "loss": 0.9965, - "step": 1867 - }, - { - "epoch": 0.2246137197138219, - "grad_norm": 2.494462938624836, - "learning_rate": 3.6158333957734888e-06, - "loss": 1.0923, - "step": 1868 - }, - { - "epoch": 0.22473396260446102, - "grad_norm": 2.1876784115650953, - "learning_rate": 3.6153742277979088e-06, - "loss": 1.0597, - "step": 1869 - }, - { - "epoch": 0.2248542054951001, - "grad_norm": 2.287528503740669, - "learning_rate": 3.6149148147713434e-06, - "loss": 1.0125, - "step": 1870 - }, - { - "epoch": 0.22497444838573918, - "grad_norm": 1.7228290358756086, - "learning_rate": 3.614455156763484e-06, - "loss": 1.0949, - "step": 1871 - }, - { - "epoch": 0.2250946912763783, - "grad_norm": 2.3134888261797686, - "learning_rate": 3.613995253844061e-06, - "loss": 0.9435, - "step": 1872 - }, - { - "epoch": 0.22521493416701738, - "grad_norm": 2.054883824241149, - "learning_rate": 3.6135351060828414e-06, - "loss": 1.0395, - "step": 1873 - }, - { - "epoch": 0.22533517705765646, - "grad_norm": 2.833475689023226, - "learning_rate": 3.6130747135496285e-06, - "loss": 0.9122, - "step": 1874 - }, - { - "epoch": 0.22545541994829554, - "grad_norm": 1.8824513195780268, - "learning_rate": 3.6126140763142646e-06, - "loss": 0.8929, - "step": 1875 - }, - { - "epoch": 0.22557566283893465, - "grad_norm": 3.9157706243496353, - "learning_rate": 3.6121531944466275e-06, - "loss": 1.0785, - "step": 1876 - }, - { - "epoch": 0.22569590572957374, - "grad_norm": 2.015179843686087, - "learning_rate": 3.611692068016633e-06, - "loss": 1.0145, - "step": 1877 - }, - { - "epoch": 0.22581614862021282, - "grad_norm": 1.958959181862244, - "learning_rate": 3.611230697094233e-06, - "loss": 0.9697, - "step": 1878 - }, - { - "epoch": 0.22593639151085193, - "grad_norm": 1.8122677489811467, - "learning_rate": 3.6107690817494173e-06, - "loss": 1.1001, - "step": 1879 - }, - { - "epoch": 0.226056634401491, - "grad_norm": 2.007794213118936, - "learning_rate": 3.6103072220522117e-06, - "loss": 0.9321, - "step": 1880 - }, - { - "epoch": 0.2261768772921301, - "grad_norm": 1.6939850985379428, - "learning_rate": 3.609845118072682e-06, - "loss": 1.1402, - "step": 1881 - }, - { - "epoch": 0.2262971201827692, - "grad_norm": 2.488608921100826, - "learning_rate": 3.6093827698809276e-06, - "loss": 1.026, - "step": 1882 - }, - { - "epoch": 0.2264173630734083, - "grad_norm": 2.225596007340646, - "learning_rate": 3.6089201775470864e-06, - "loss": 1.0751, - "step": 1883 - }, - { - "epoch": 0.22653760596404737, - "grad_norm": 1.4899882942098317, - "learning_rate": 3.6084573411413334e-06, - "loss": 1.0073, - "step": 1884 - }, - { - "epoch": 0.22665784885468646, - "grad_norm": 2.4723249216453076, - "learning_rate": 3.607994260733881e-06, - "loss": 1.044, - "step": 1885 - }, - { - "epoch": 0.22677809174532557, - "grad_norm": 15.9734247759291, - "learning_rate": 3.6075309363949776e-06, - "loss": 0.9748, - "step": 1886 - }, - { - "epoch": 0.22689833463596465, - "grad_norm": 1.9321284743610523, - "learning_rate": 3.6070673681949094e-06, - "loss": 1.0401, - "step": 1887 - }, - { - "epoch": 0.22701857752660373, - "grad_norm": 1.7230702289874396, - "learning_rate": 3.606603556203999e-06, - "loss": 1.0409, - "step": 1888 - }, - { - "epoch": 0.22713882041724284, - "grad_norm": 1.6248037513191873, - "learning_rate": 3.6061395004926066e-06, - "loss": 1.0695, - "step": 1889 - }, - { - "epoch": 0.22725906330788193, - "grad_norm": 2.162669701064145, - "learning_rate": 3.605675201131129e-06, - "loss": 1.0796, - "step": 1890 - }, - { - "epoch": 0.227379306198521, - "grad_norm": 1.979487851499938, - "learning_rate": 3.60521065819e-06, - "loss": 1.03, - "step": 1891 - }, - { - "epoch": 0.2274995490891601, - "grad_norm": 1.7702695809369469, - "learning_rate": 3.60474587173969e-06, - "loss": 1.0992, - "step": 1892 - }, - { - "epoch": 0.2276197919797992, - "grad_norm": 1.698656291608601, - "learning_rate": 3.6042808418507084e-06, - "loss": 1.0573, - "step": 1893 - }, - { - "epoch": 0.22774003487043828, - "grad_norm": 1.8004512194821445, - "learning_rate": 3.6038155685935976e-06, - "loss": 1.0005, - "step": 1894 - }, - { - "epoch": 0.22786027776107737, - "grad_norm": 1.8019818918713368, - "learning_rate": 3.6033500520389404e-06, - "loss": 0.9333, - "step": 1895 - }, - { - "epoch": 0.22798052065171648, - "grad_norm": 0.808654377456598, - "learning_rate": 3.6028842922573553e-06, - "loss": 0.917, - "step": 1896 - }, - { - "epoch": 0.22810076354235556, - "grad_norm": 0.832967705784109, - "learning_rate": 3.602418289319497e-06, - "loss": 0.8939, - "step": 1897 - }, - { - "epoch": 0.22822100643299464, - "grad_norm": 2.183544288034112, - "learning_rate": 3.601952043296059e-06, - "loss": 0.9611, - "step": 1898 - }, - { - "epoch": 0.22834124932363373, - "grad_norm": 1.9469339744489662, - "learning_rate": 3.6014855542577696e-06, - "loss": 1.0389, - "step": 1899 - }, - { - "epoch": 0.22846149221427284, - "grad_norm": 1.707971239870738, - "learning_rate": 3.6010188222753943e-06, - "loss": 1.0728, - "step": 1900 - }, - { - "epoch": 0.22858173510491192, - "grad_norm": 0.9575855129293381, - "learning_rate": 3.6005518474197372e-06, - "loss": 0.9047, - "step": 1901 - }, - { - "epoch": 0.228701977995551, - "grad_norm": 1.6688310239173887, - "learning_rate": 3.6000846297616373e-06, - "loss": 1.0179, - "step": 1902 - }, - { - "epoch": 0.22882222088619011, - "grad_norm": 1.993227877756431, - "learning_rate": 3.5996171693719717e-06, - "loss": 0.9587, - "step": 1903 - }, - { - "epoch": 0.2289424637768292, - "grad_norm": 0.8693807548809656, - "learning_rate": 3.5991494663216528e-06, - "loss": 0.9004, - "step": 1904 - }, - { - "epoch": 0.22906270666746828, - "grad_norm": 1.9584752159936507, - "learning_rate": 3.5986815206816314e-06, - "loss": 1.1036, - "step": 1905 - }, - { - "epoch": 0.2291829495581074, - "grad_norm": 1.8102463490302214, - "learning_rate": 3.598213332522895e-06, - "loss": 0.9738, - "step": 1906 - }, - { - "epoch": 0.22930319244874647, - "grad_norm": 1.8783414947594217, - "learning_rate": 3.597744901916466e-06, - "loss": 1.0014, - "step": 1907 - }, - { - "epoch": 0.22942343533938556, - "grad_norm": 2.198809353546184, - "learning_rate": 3.5972762289334058e-06, - "loss": 0.9988, - "step": 1908 - }, - { - "epoch": 0.22954367823002464, - "grad_norm": 2.308271718034899, - "learning_rate": 3.5968073136448116e-06, - "loss": 1.0758, - "step": 1909 - }, - { - "epoch": 0.22966392112066375, - "grad_norm": 2.0908829146469325, - "learning_rate": 3.596338156121818e-06, - "loss": 1.1386, - "step": 1910 - }, - { - "epoch": 0.22978416401130283, - "grad_norm": 1.0435445960312117, - "learning_rate": 3.595868756435595e-06, - "loss": 0.8523, - "step": 1911 - }, - { - "epoch": 0.22990440690194192, - "grad_norm": 1.918831873833735, - "learning_rate": 3.5953991146573504e-06, - "loss": 1.0247, - "step": 1912 - }, - { - "epoch": 0.23002464979258103, - "grad_norm": 2.4628589697670793, - "learning_rate": 3.5949292308583294e-06, - "loss": 1.0538, - "step": 1913 - }, - { - "epoch": 0.2301448926832201, - "grad_norm": 2.0554033200134625, - "learning_rate": 3.594459105109811e-06, - "loss": 1.0332, - "step": 1914 - }, - { - "epoch": 0.2302651355738592, - "grad_norm": 1.7415396679049475, - "learning_rate": 3.593988737483115e-06, - "loss": 1.039, - "step": 1915 - }, - { - "epoch": 0.23038537846449827, - "grad_norm": 2.237117871094669, - "learning_rate": 3.5935181280495947e-06, - "loss": 1.0126, - "step": 1916 - }, - { - "epoch": 0.23050562135513739, - "grad_norm": 0.903618807576331, - "learning_rate": 3.5930472768806412e-06, - "loss": 0.8182, - "step": 1917 - }, - { - "epoch": 0.23062586424577647, - "grad_norm": 2.0180194442235693, - "learning_rate": 3.5925761840476826e-06, - "loss": 0.9971, - "step": 1918 - }, - { - "epoch": 0.23074610713641555, - "grad_norm": 1.6704639648049258, - "learning_rate": 3.592104849622183e-06, - "loss": 1.0514, - "step": 1919 - }, - { - "epoch": 0.23086635002705466, - "grad_norm": 1.5763930835565372, - "learning_rate": 3.591633273675644e-06, - "loss": 0.9609, - "step": 1920 - }, - { - "epoch": 0.23098659291769374, - "grad_norm": 0.9917199242072383, - "learning_rate": 3.591161456279602e-06, - "loss": 0.8584, - "step": 1921 - }, - { - "epoch": 0.23110683580833283, - "grad_norm": 1.3802473338632895, - "learning_rate": 3.590689397505633e-06, - "loss": 1.0251, - "step": 1922 - }, - { - "epoch": 0.2312270786989719, - "grad_norm": 1.6543722484581593, - "learning_rate": 3.590217097425347e-06, - "loss": 1.0943, - "step": 1923 - }, - { - "epoch": 0.23134732158961102, - "grad_norm": 2.169903196293932, - "learning_rate": 3.589744556110391e-06, - "loss": 0.9421, - "step": 1924 - }, - { - "epoch": 0.2314675644802501, - "grad_norm": 1.4591386297303914, - "learning_rate": 3.58927177363245e-06, - "loss": 1.0755, - "step": 1925 - }, - { - "epoch": 0.2315878073708892, - "grad_norm": 2.6350363556237673, - "learning_rate": 3.5887987500632447e-06, - "loss": 0.9621, - "step": 1926 - }, - { - "epoch": 0.2317080502615283, - "grad_norm": 1.6985496869976542, - "learning_rate": 3.5883254854745325e-06, - "loss": 1.0592, - "step": 1927 - }, - { - "epoch": 0.23182829315216738, - "grad_norm": 2.1001930738430246, - "learning_rate": 3.587851979938107e-06, - "loss": 0.9862, - "step": 1928 - }, - { - "epoch": 0.23194853604280646, - "grad_norm": 2.1421819440573464, - "learning_rate": 3.5873782335257985e-06, - "loss": 0.9986, - "step": 1929 - }, - { - "epoch": 0.23206877893344555, - "grad_norm": 2.2856263288536307, - "learning_rate": 3.5869042463094744e-06, - "loss": 1.0151, - "step": 1930 - }, - { - "epoch": 0.23218902182408466, - "grad_norm": 1.7249786569131893, - "learning_rate": 3.586430018361038e-06, - "loss": 0.997, - "step": 1931 - }, - { - "epoch": 0.23230926471472374, - "grad_norm": 2.5971684736469007, - "learning_rate": 3.5859555497524283e-06, - "loss": 0.9976, - "step": 1932 - }, - { - "epoch": 0.23242950760536282, - "grad_norm": 1.8252752540679427, - "learning_rate": 3.5854808405556237e-06, - "loss": 1.1419, - "step": 1933 - }, - { - "epoch": 0.23254975049600193, - "grad_norm": 2.3017128578687056, - "learning_rate": 3.5850058908426355e-06, - "loss": 0.986, - "step": 1934 - }, - { - "epoch": 0.23266999338664102, - "grad_norm": 1.7022180504715951, - "learning_rate": 3.584530700685514e-06, - "loss": 1.0749, - "step": 1935 - }, - { - "epoch": 0.2327902362772801, - "grad_norm": 1.938570772663459, - "learning_rate": 3.5840552701563448e-06, - "loss": 1.1261, - "step": 1936 - }, - { - "epoch": 0.2329104791679192, - "grad_norm": 3.3872182581103587, - "learning_rate": 3.5835795993272513e-06, - "loss": 1.0479, - "step": 1937 - }, - { - "epoch": 0.2330307220585583, - "grad_norm": 2.8139779340289617, - "learning_rate": 3.583103688270391e-06, - "loss": 0.9399, - "step": 1938 - }, - { - "epoch": 0.23315096494919738, - "grad_norm": 2.2972722131757575, - "learning_rate": 3.58262753705796e-06, - "loss": 1.1143, - "step": 1939 - }, - { - "epoch": 0.23327120783983646, - "grad_norm": 0.7615040794508751, - "learning_rate": 3.5821511457621902e-06, - "loss": 0.8044, - "step": 1940 - }, - { - "epoch": 0.23339145073047557, - "grad_norm": 2.921371987847805, - "learning_rate": 3.5816745144553497e-06, - "loss": 1.0435, - "step": 1941 - }, - { - "epoch": 0.23351169362111465, - "grad_norm": 2.7332500650546665, - "learning_rate": 3.5811976432097424e-06, - "loss": 0.9879, - "step": 1942 - }, - { - "epoch": 0.23363193651175373, - "grad_norm": 2.4212268577340184, - "learning_rate": 3.58072053209771e-06, - "loss": 1.0775, - "step": 1943 - }, - { - "epoch": 0.23375217940239285, - "grad_norm": 1.882416996234145, - "learning_rate": 3.5802431811916296e-06, - "loss": 1.0228, - "step": 1944 - }, - { - "epoch": 0.23387242229303193, - "grad_norm": 1.8344329512896076, - "learning_rate": 3.579765590563916e-06, - "loss": 1.0299, - "step": 1945 - }, - { - "epoch": 0.233992665183671, - "grad_norm": 1.9205484541719815, - "learning_rate": 3.579287760287017e-06, - "loss": 1.0364, - "step": 1946 - }, - { - "epoch": 0.2341129080743101, - "grad_norm": 1.7006523311259523, - "learning_rate": 3.578809690433421e-06, - "loss": 0.9582, - "step": 1947 - }, - { - "epoch": 0.2342331509649492, - "grad_norm": 3.8844364078544316, - "learning_rate": 3.578331381075651e-06, - "loss": 1.0381, - "step": 1948 - }, - { - "epoch": 0.2343533938555883, - "grad_norm": 2.3293969692773526, - "learning_rate": 3.5778528322862646e-06, - "loss": 0.922, - "step": 1949 - }, - { - "epoch": 0.23447363674622737, - "grad_norm": 1.544285774694749, - "learning_rate": 3.5773740441378585e-06, - "loss": 1.0925, - "step": 1950 - }, - { - "epoch": 0.23459387963686648, - "grad_norm": 2.2643300532176807, - "learning_rate": 3.5768950167030633e-06, - "loss": 0.9659, - "step": 1951 - }, - { - "epoch": 0.23471412252750556, - "grad_norm": 1.5967329943796416, - "learning_rate": 3.576415750054548e-06, - "loss": 1.0126, - "step": 1952 - }, - { - "epoch": 0.23483436541814465, - "grad_norm": 1.8231761493379877, - "learning_rate": 3.5759362442650172e-06, - "loss": 1.0793, - "step": 1953 - }, - { - "epoch": 0.23495460830878373, - "grad_norm": 1.8344271026958199, - "learning_rate": 3.5754564994072113e-06, - "loss": 1.0741, - "step": 1954 - }, - { - "epoch": 0.23507485119942284, - "grad_norm": 2.0452963295829933, - "learning_rate": 3.5749765155539067e-06, - "loss": 0.8373, - "step": 1955 - }, - { - "epoch": 0.23519509409006192, - "grad_norm": 2.2373216312854143, - "learning_rate": 3.574496292777917e-06, - "loss": 1.1488, - "step": 1956 - }, - { - "epoch": 0.235315336980701, - "grad_norm": 2.174762370841676, - "learning_rate": 3.574015831152092e-06, - "loss": 0.9434, - "step": 1957 - }, - { - "epoch": 0.23543557987134012, - "grad_norm": 2.260057857414244, - "learning_rate": 3.573535130749316e-06, - "loss": 1.0674, - "step": 1958 - }, - { - "epoch": 0.2355558227619792, - "grad_norm": 2.771579689714952, - "learning_rate": 3.5730541916425127e-06, - "loss": 0.9661, - "step": 1959 - }, - { - "epoch": 0.23567606565261828, - "grad_norm": 2.0101376143835936, - "learning_rate": 3.572573013904639e-06, - "loss": 1.0829, - "step": 1960 - }, - { - "epoch": 0.2357963085432574, - "grad_norm": 1.9963261717211758, - "learning_rate": 3.572091597608689e-06, - "loss": 1.1509, - "step": 1961 - }, - { - "epoch": 0.23591655143389648, - "grad_norm": 1.9414534687770375, - "learning_rate": 3.571609942827694e-06, - "loss": 0.9597, - "step": 1962 - }, - { - "epoch": 0.23603679432453556, - "grad_norm": 1.645159893439446, - "learning_rate": 3.57112804963472e-06, - "loss": 1.1019, - "step": 1963 - }, - { - "epoch": 0.23615703721517464, - "grad_norm": 1.7623161436459207, - "learning_rate": 3.57064591810287e-06, - "loss": 0.9935, - "step": 1964 - }, - { - "epoch": 0.23627728010581375, - "grad_norm": 10.88926200054457, - "learning_rate": 3.570163548305284e-06, - "loss": 1.0362, - "step": 1965 - }, - { - "epoch": 0.23639752299645284, - "grad_norm": 2.928625784180124, - "learning_rate": 3.569680940315135e-06, - "loss": 0.9295, - "step": 1966 - }, - { - "epoch": 0.23651776588709192, - "grad_norm": 2.119218928564838, - "learning_rate": 3.5691980942056356e-06, - "loss": 1.0442, - "step": 1967 - }, - { - "epoch": 0.23663800877773103, - "grad_norm": 1.582972719720139, - "learning_rate": 3.5687150100500332e-06, - "loss": 1.0164, - "step": 1968 - }, - { - "epoch": 0.2367582516683701, - "grad_norm": 1.5161856961117903, - "learning_rate": 3.568231687921611e-06, - "loss": 0.9707, - "step": 1969 - }, - { - "epoch": 0.2368784945590092, - "grad_norm": 1.5878072156031755, - "learning_rate": 3.5677481278936883e-06, - "loss": 1.0394, - "step": 1970 - }, - { - "epoch": 0.23699873744964828, - "grad_norm": 0.8169163109885175, - "learning_rate": 3.5672643300396214e-06, - "loss": 0.8241, - "step": 1971 - }, - { - "epoch": 0.2371189803402874, - "grad_norm": 2.0346863070440393, - "learning_rate": 3.566780294432802e-06, - "loss": 0.9206, - "step": 1972 - }, - { - "epoch": 0.23723922323092647, - "grad_norm": 3.88752813298389, - "learning_rate": 3.566296021146657e-06, - "loss": 0.9727, - "step": 1973 - }, - { - "epoch": 0.23735946612156555, - "grad_norm": 1.5711038597688385, - "learning_rate": 3.565811510254652e-06, - "loss": 0.9599, - "step": 1974 - }, - { - "epoch": 0.23747970901220466, - "grad_norm": 0.7859402059277163, - "learning_rate": 3.5653267618302845e-06, - "loss": 0.8249, - "step": 1975 - }, - { - "epoch": 0.23759995190284375, - "grad_norm": 1.652457808217088, - "learning_rate": 3.564841775947093e-06, - "loss": 1.0879, - "step": 1976 - }, - { - "epoch": 0.23772019479348283, - "grad_norm": 2.025660644895588, - "learning_rate": 3.5643565526786475e-06, - "loss": 0.9909, - "step": 1977 - }, - { - "epoch": 0.2378404376841219, - "grad_norm": 1.8486716966121535, - "learning_rate": 3.5638710920985574e-06, - "loss": 1.0023, - "step": 1978 - }, - { - "epoch": 0.23796068057476102, - "grad_norm": 2.188433856538918, - "learning_rate": 3.5633853942804655e-06, - "loss": 1.0464, - "step": 1979 - }, - { - "epoch": 0.2380809234654001, - "grad_norm": 3.9475633904342287, - "learning_rate": 3.5628994592980527e-06, - "loss": 1.0029, - "step": 1980 - }, - { - "epoch": 0.2382011663560392, - "grad_norm": 2.5728087203101033, - "learning_rate": 3.562413287225034e-06, - "loss": 0.9282, - "step": 1981 - }, - { - "epoch": 0.2383214092466783, - "grad_norm": 2.035703969752434, - "learning_rate": 3.5619268781351623e-06, - "loss": 1.1159, - "step": 1982 - }, - { - "epoch": 0.23844165213731738, - "grad_norm": 1.9848092872998917, - "learning_rate": 3.5614402321022256e-06, - "loss": 1.0016, - "step": 1983 - }, - { - "epoch": 0.23856189502795647, - "grad_norm": 2.010317890593484, - "learning_rate": 3.5609533492000463e-06, - "loss": 1.0975, - "step": 1984 - }, - { - "epoch": 0.23868213791859555, - "grad_norm": 1.8784300900676298, - "learning_rate": 3.560466229502485e-06, - "loss": 1.0127, - "step": 1985 - }, - { - "epoch": 0.23880238080923466, - "grad_norm": 14.116030385443828, - "learning_rate": 3.5599788730834384e-06, - "loss": 1.1256, - "step": 1986 - }, - { - "epoch": 0.23892262369987374, - "grad_norm": 2.594068576597452, - "learning_rate": 3.559491280016836e-06, - "loss": 1.0341, - "step": 1987 - }, - { - "epoch": 0.23904286659051283, - "grad_norm": 1.9807207598036782, - "learning_rate": 3.5590034503766465e-06, - "loss": 0.945, - "step": 1988 - }, - { - "epoch": 0.23916310948115194, - "grad_norm": 3.064119961524163, - "learning_rate": 3.558515384236874e-06, - "loss": 1.0435, - "step": 1989 - }, - { - "epoch": 0.23928335237179102, - "grad_norm": 1.6100281806517163, - "learning_rate": 3.558027081671556e-06, - "loss": 1.0655, - "step": 1990 - }, - { - "epoch": 0.2394035952624301, - "grad_norm": 1.7333813819583457, - "learning_rate": 3.557538542754769e-06, - "loss": 0.9222, - "step": 1991 - }, - { - "epoch": 0.2395238381530692, - "grad_norm": 1.7195800857507118, - "learning_rate": 3.557049767560623e-06, - "loss": 0.8991, - "step": 1992 - }, - { - "epoch": 0.2396440810437083, - "grad_norm": 1.8214931343026708, - "learning_rate": 3.5565607561632655e-06, - "loss": 1.0887, - "step": 1993 - }, - { - "epoch": 0.23976432393434738, - "grad_norm": 2.6463068040346953, - "learning_rate": 3.5560715086368787e-06, - "loss": 1.0183, - "step": 1994 - }, - { - "epoch": 0.23988456682498646, - "grad_norm": 1.8146772133345317, - "learning_rate": 3.5555820250556816e-06, - "loss": 1.0532, - "step": 1995 - }, - { - "epoch": 0.24000480971562557, - "grad_norm": 2.1765848589951604, - "learning_rate": 3.5550923054939278e-06, - "loss": 0.9193, - "step": 1996 - }, - { - "epoch": 0.24012505260626466, - "grad_norm": 1.779868945751426, - "learning_rate": 3.5546023500259083e-06, - "loss": 0.976, - "step": 1997 - }, - { - "epoch": 0.24024529549690374, - "grad_norm": 1.7265120278830746, - "learning_rate": 3.5541121587259477e-06, - "loss": 1.0368, - "step": 1998 - }, - { - "epoch": 0.24036553838754285, - "grad_norm": 0.8099745234851006, - "learning_rate": 3.553621731668408e-06, - "loss": 0.8213, - "step": 1999 - }, - { - "epoch": 0.24048578127818193, - "grad_norm": 1.9021981749557035, - "learning_rate": 3.553131068927688e-06, - "loss": 1.0657, - "step": 2000 - }, - { - "epoch": 0.24060602416882101, - "grad_norm": 1.7679591321493944, - "learning_rate": 3.552640170578219e-06, - "loss": 1.0334, - "step": 2001 - }, - { - "epoch": 0.2407262670594601, - "grad_norm": 1.6858432548986517, - "learning_rate": 3.5521490366944703e-06, - "loss": 1.0051, - "step": 2002 - }, - { - "epoch": 0.2408465099500992, - "grad_norm": 2.383877375559683, - "learning_rate": 3.5516576673509474e-06, - "loss": 1.03, - "step": 2003 - }, - { - "epoch": 0.2409667528407383, - "grad_norm": 1.9054566905761083, - "learning_rate": 3.5511660626221896e-06, - "loss": 1.0916, - "step": 2004 - }, - { - "epoch": 0.24108699573137737, - "grad_norm": 2.499984168956223, - "learning_rate": 3.5506742225827744e-06, - "loss": 1.114, - "step": 2005 - }, - { - "epoch": 0.24120723862201648, - "grad_norm": 2.2383174660300873, - "learning_rate": 3.5501821473073116e-06, - "loss": 1.1316, - "step": 2006 - }, - { - "epoch": 0.24132748151265557, - "grad_norm": 2.1454520272532585, - "learning_rate": 3.54968983687045e-06, - "loss": 1.0976, - "step": 2007 - }, - { - "epoch": 0.24144772440329465, - "grad_norm": 2.1755600000066995, - "learning_rate": 3.549197291346872e-06, - "loss": 1.1268, - "step": 2008 - }, - { - "epoch": 0.24156796729393373, - "grad_norm": 2.245795560199474, - "learning_rate": 3.548704510811297e-06, - "loss": 1.0242, - "step": 2009 - }, - { - "epoch": 0.24168821018457284, - "grad_norm": 2.293340545613385, - "learning_rate": 3.5482114953384787e-06, - "loss": 0.9751, - "step": 2010 - }, - { - "epoch": 0.24180845307521193, - "grad_norm": 1.8187130953790438, - "learning_rate": 3.5477182450032077e-06, - "loss": 1.0709, - "step": 2011 - }, - { - "epoch": 0.241928695965851, - "grad_norm": 2.9793332637986882, - "learning_rate": 3.5472247598803097e-06, - "loss": 1.0607, - "step": 2012 - }, - { - "epoch": 0.24204893885649012, - "grad_norm": 2.1007019323070177, - "learning_rate": 3.546731040044645e-06, - "loss": 1.0834, - "step": 2013 - }, - { - "epoch": 0.2421691817471292, - "grad_norm": 1.6142470306941055, - "learning_rate": 3.546237085571112e-06, - "loss": 0.9778, - "step": 2014 - }, - { - "epoch": 0.24228942463776829, - "grad_norm": 2.2122124005481845, - "learning_rate": 3.5457428965346425e-06, - "loss": 0.9565, - "step": 2015 - }, - { - "epoch": 0.2424096675284074, - "grad_norm": 1.6296515280684367, - "learning_rate": 3.545248473010205e-06, - "loss": 0.9761, - "step": 2016 - }, - { - "epoch": 0.24252991041904648, - "grad_norm": 1.9507094437625443, - "learning_rate": 3.544753815072802e-06, - "loss": 1.1, - "step": 2017 - }, - { - "epoch": 0.24265015330968556, - "grad_norm": 1.7292230053038686, - "learning_rate": 3.544258922797474e-06, - "loss": 1.1154, - "step": 2018 - }, - { - "epoch": 0.24277039620032465, - "grad_norm": 3.6588983318156676, - "learning_rate": 3.543763796259295e-06, - "loss": 1.0061, - "step": 2019 - }, - { - "epoch": 0.24289063909096376, - "grad_norm": 2.549271000646088, - "learning_rate": 3.5432684355333754e-06, - "loss": 1.1372, - "step": 2020 - }, - { - "epoch": 0.24301088198160284, - "grad_norm": 1.8442811362549634, - "learning_rate": 3.5427728406948613e-06, - "loss": 0.9892, - "step": 2021 - }, - { - "epoch": 0.24313112487224192, - "grad_norm": 0.7631917551571971, - "learning_rate": 3.542277011818934e-06, - "loss": 0.8343, - "step": 2022 - }, - { - "epoch": 0.24325136776288103, - "grad_norm": 2.630003293546638, - "learning_rate": 3.5417809489808104e-06, - "loss": 0.9723, - "step": 2023 - }, - { - "epoch": 0.24337161065352012, - "grad_norm": 2.0896161392187436, - "learning_rate": 3.5412846522557422e-06, - "loss": 0.9553, - "step": 2024 - }, - { - "epoch": 0.2434918535441592, - "grad_norm": 3.4105625675213083, - "learning_rate": 3.540788121719018e-06, - "loss": 0.9696, - "step": 2025 - }, - { - "epoch": 0.24361209643479828, - "grad_norm": 1.8738906121506005, - "learning_rate": 3.5402913574459604e-06, - "loss": 1.0561, - "step": 2026 - }, - { - "epoch": 0.2437323393254374, - "grad_norm": 1.621240374816699, - "learning_rate": 3.5397943595119297e-06, - "loss": 1.082, - "step": 2027 - }, - { - "epoch": 0.24385258221607647, - "grad_norm": 2.19413503109961, - "learning_rate": 3.5392971279923177e-06, - "loss": 1.0021, - "step": 2028 - }, - { - "epoch": 0.24397282510671556, - "grad_norm": 3.054765861267126, - "learning_rate": 3.5387996629625557e-06, - "loss": 1.0586, - "step": 2029 - }, - { - "epoch": 0.24409306799735467, - "grad_norm": 0.8717855605871342, - "learning_rate": 3.5383019644981083e-06, - "loss": 0.8302, - "step": 2030 - }, - { - "epoch": 0.24421331088799375, - "grad_norm": 2.159008086591792, - "learning_rate": 3.5378040326744763e-06, - "loss": 0.9593, - "step": 2031 - }, - { - "epoch": 0.24433355377863283, - "grad_norm": 6.907827067796644, - "learning_rate": 3.5373058675671946e-06, - "loss": 1.0861, - "step": 2032 - }, - { - "epoch": 0.24445379666927192, - "grad_norm": 6.293423977754044, - "learning_rate": 3.536807469251836e-06, - "loss": 0.9629, - "step": 2033 - }, - { - "epoch": 0.24457403955991103, - "grad_norm": 2.025010488035569, - "learning_rate": 3.5363088378040055e-06, - "loss": 1.0424, - "step": 2034 - }, - { - "epoch": 0.2446942824505501, - "grad_norm": 0.7716832158213915, - "learning_rate": 3.5358099732993463e-06, - "loss": 0.9133, - "step": 2035 - }, - { - "epoch": 0.2448145253411892, - "grad_norm": 1.9061470003911607, - "learning_rate": 3.535310875813535e-06, - "loss": 1.1243, - "step": 2036 - }, - { - "epoch": 0.2449347682318283, - "grad_norm": 2.0117800212529087, - "learning_rate": 3.5348115454222843e-06, - "loss": 1.0422, - "step": 2037 - }, - { - "epoch": 0.2450550111224674, - "grad_norm": 2.0898904100004305, - "learning_rate": 3.5343119822013425e-06, - "loss": 1.0882, - "step": 2038 - }, - { - "epoch": 0.24517525401310647, - "grad_norm": 5.228419682417267, - "learning_rate": 3.533812186226493e-06, - "loss": 0.9986, - "step": 2039 - }, - { - "epoch": 0.24529549690374555, - "grad_norm": 1.656454613480365, - "learning_rate": 3.5333121575735545e-06, - "loss": 0.9894, - "step": 2040 - }, - { - "epoch": 0.24541573979438466, - "grad_norm": 2.2979147497487884, - "learning_rate": 3.532811896318381e-06, - "loss": 0.984, - "step": 2041 - }, - { - "epoch": 0.24553598268502375, - "grad_norm": 2.0677018890152317, - "learning_rate": 3.5323114025368615e-06, - "loss": 1.0436, - "step": 2042 - }, - { - "epoch": 0.24565622557566283, - "grad_norm": 2.5058415829271423, - "learning_rate": 3.53181067630492e-06, - "loss": 1.0437, - "step": 2043 - }, - { - "epoch": 0.24577646846630194, - "grad_norm": 1.7192047904603303, - "learning_rate": 3.5313097176985175e-06, - "loss": 0.9943, - "step": 2044 - }, - { - "epoch": 0.24589671135694102, - "grad_norm": 1.6669417710549366, - "learning_rate": 3.5308085267936482e-06, - "loss": 1.0313, - "step": 2045 - }, - { - "epoch": 0.2460169542475801, - "grad_norm": 1.6293649736512228, - "learning_rate": 3.530307103666342e-06, - "loss": 1.136, - "step": 2046 - }, - { - "epoch": 0.24613719713821922, - "grad_norm": 2.4147337933833857, - "learning_rate": 3.5298054483926658e-06, - "loss": 1.0322, - "step": 2047 - }, - { - "epoch": 0.2462574400288583, - "grad_norm": 2.344917718545896, - "learning_rate": 3.5293035610487187e-06, - "loss": 1.0611, - "step": 2048 - }, - { - "epoch": 0.24637768291949738, - "grad_norm": 0.7103714418605747, - "learning_rate": 3.5288014417106374e-06, - "loss": 0.8754, - "step": 2049 - }, - { - "epoch": 0.24649792581013646, - "grad_norm": 1.7704503785973673, - "learning_rate": 3.528299090454593e-06, - "loss": 0.9839, - "step": 2050 - }, - { - "epoch": 0.24661816870077558, - "grad_norm": 3.790380252695232, - "learning_rate": 3.527796507356792e-06, - "loss": 1.0522, - "step": 2051 - }, - { - "epoch": 0.24673841159141466, - "grad_norm": 2.4763090082153925, - "learning_rate": 3.527293692493475e-06, - "loss": 1.1321, - "step": 2052 - }, - { - "epoch": 0.24685865448205374, - "grad_norm": 2.407660133051212, - "learning_rate": 3.52679064594092e-06, - "loss": 0.9705, - "step": 2053 - }, - { - "epoch": 0.24697889737269285, - "grad_norm": 2.0073311670359857, - "learning_rate": 3.5262873677754375e-06, - "loss": 0.9733, - "step": 2054 - }, - { - "epoch": 0.24709914026333193, - "grad_norm": 1.7953000671646784, - "learning_rate": 3.5257838580733745e-06, - "loss": 1.0355, - "step": 2055 - }, - { - "epoch": 0.24721938315397102, - "grad_norm": 1.8345777305340152, - "learning_rate": 3.5252801169111138e-06, - "loss": 1.1015, - "step": 2056 - }, - { - "epoch": 0.2473396260446101, - "grad_norm": 1.6201450743295767, - "learning_rate": 3.524776144365072e-06, - "loss": 1.0257, - "step": 2057 - }, - { - "epoch": 0.2474598689352492, - "grad_norm": 1.5215394545098886, - "learning_rate": 3.5242719405117016e-06, - "loss": 1.0245, - "step": 2058 - }, - { - "epoch": 0.2475801118258883, - "grad_norm": 2.4311196378065927, - "learning_rate": 3.5237675054274893e-06, - "loss": 0.9772, - "step": 2059 - }, - { - "epoch": 0.24770035471652738, - "grad_norm": 1.8696966191381363, - "learning_rate": 3.5232628391889584e-06, - "loss": 1.0296, - "step": 2060 - }, - { - "epoch": 0.2478205976071665, - "grad_norm": 2.540652672123093, - "learning_rate": 3.522757941872666e-06, - "loss": 0.8689, - "step": 2061 - }, - { - "epoch": 0.24794084049780557, - "grad_norm": 1.48028756595588, - "learning_rate": 3.5222528135552042e-06, - "loss": 1.0589, - "step": 2062 - }, - { - "epoch": 0.24806108338844465, - "grad_norm": 1.8212150333689774, - "learning_rate": 3.521747454313201e-06, - "loss": 1.0347, - "step": 2063 - }, - { - "epoch": 0.24818132627908374, - "grad_norm": 1.9063050309039236, - "learning_rate": 3.521241864223319e-06, - "loss": 0.9029, - "step": 2064 - }, - { - "epoch": 0.24830156916972285, - "grad_norm": 0.801400405823805, - "learning_rate": 3.5207360433622552e-06, - "loss": 0.8629, - "step": 2065 - }, - { - "epoch": 0.24842181206036193, - "grad_norm": 1.4947935983820362, - "learning_rate": 3.5202299918067437e-06, - "loss": 0.9705, - "step": 2066 - }, - { - "epoch": 0.248542054951001, - "grad_norm": 2.2104327700714763, - "learning_rate": 3.519723709633551e-06, - "loss": 0.9255, - "step": 2067 - }, - { - "epoch": 0.24866229784164012, - "grad_norm": 1.837940503212827, - "learning_rate": 3.519217196919479e-06, - "loss": 1.0538, - "step": 2068 - }, - { - "epoch": 0.2487825407322792, - "grad_norm": 1.6314917520448444, - "learning_rate": 3.518710453741367e-06, - "loss": 0.9557, - "step": 2069 - }, - { - "epoch": 0.2489027836229183, - "grad_norm": 2.049338336324614, - "learning_rate": 3.518203480176086e-06, - "loss": 0.9074, - "step": 2070 - }, - { - "epoch": 0.2490230265135574, - "grad_norm": 1.8002635683370276, - "learning_rate": 3.517696276300545e-06, - "loss": 1.0286, - "step": 2071 - }, - { - "epoch": 0.24914326940419648, - "grad_norm": 2.1354947951996293, - "learning_rate": 3.517188842191685e-06, - "loss": 0.9291, - "step": 2072 - }, - { - "epoch": 0.24926351229483557, - "grad_norm": 2.2918303228954082, - "learning_rate": 3.5166811779264837e-06, - "loss": 0.9745, - "step": 2073 - }, - { - "epoch": 0.24938375518547465, - "grad_norm": 1.8160547059847605, - "learning_rate": 3.5161732835819545e-06, - "loss": 1.0127, - "step": 2074 - }, - { - "epoch": 0.24950399807611376, - "grad_norm": 2.1752107101625713, - "learning_rate": 3.515665159235143e-06, - "loss": 1.056, - "step": 2075 - }, - { - "epoch": 0.24962424096675284, - "grad_norm": 1.608478333388645, - "learning_rate": 3.5151568049631318e-06, - "loss": 0.9831, - "step": 2076 - }, - { - "epoch": 0.24974448385739192, - "grad_norm": 1.4515008361004542, - "learning_rate": 3.5146482208430385e-06, - "loss": 1.024, - "step": 2077 - }, - { - "epoch": 0.24986472674803104, - "grad_norm": 1.7951000569331073, - "learning_rate": 3.514139406952014e-06, - "loss": 0.9079, - "step": 2078 - }, - { - "epoch": 0.24998496963867012, - "grad_norm": 1.679617379077776, - "learning_rate": 3.5136303633672454e-06, - "loss": 1.0674, - "step": 2079 - }, - { - "epoch": 0.25010521252930923, - "grad_norm": 1.8594768079566395, - "learning_rate": 3.5131210901659544e-06, - "loss": 0.9735, - "step": 2080 - }, - { - "epoch": 0.2502254554199483, - "grad_norm": 2.069315544341172, - "learning_rate": 3.5126115874253967e-06, - "loss": 1.0474, - "step": 2081 - }, - { - "epoch": 0.2503456983105874, - "grad_norm": 1.7188067340156965, - "learning_rate": 3.5121018552228644e-06, - "loss": 1.0329, - "step": 2082 - }, - { - "epoch": 0.2504659412012265, - "grad_norm": 2.0542486033885785, - "learning_rate": 3.5115918936356827e-06, - "loss": 0.989, - "step": 2083 - }, - { - "epoch": 0.25058618409186556, - "grad_norm": 2.2747951792644936, - "learning_rate": 3.5110817027412123e-06, - "loss": 1.016, - "step": 2084 - }, - { - "epoch": 0.25070642698250467, - "grad_norm": 1.9014324535577132, - "learning_rate": 3.5105712826168493e-06, - "loss": 0.9206, - "step": 2085 - }, - { - "epoch": 0.2508266698731437, - "grad_norm": 1.7140918377962302, - "learning_rate": 3.5100606333400235e-06, - "loss": 0.9349, - "step": 2086 - }, - { - "epoch": 0.25094691276378284, - "grad_norm": 2.022827645676356, - "learning_rate": 3.5095497549882006e-06, - "loss": 1.0047, - "step": 2087 - }, - { - "epoch": 0.25106715565442195, - "grad_norm": 1.820993648021498, - "learning_rate": 3.50903864763888e-06, - "loss": 0.9506, - "step": 2088 - }, - { - "epoch": 0.251187398545061, - "grad_norm": 2.264869721899636, - "learning_rate": 3.5085273113695965e-06, - "loss": 0.9955, - "step": 2089 - }, - { - "epoch": 0.2513076414357001, - "grad_norm": 2.3489805729413056, - "learning_rate": 3.508015746257919e-06, - "loss": 1.0134, - "step": 2090 - }, - { - "epoch": 0.2514278843263392, - "grad_norm": 1.9683584399106604, - "learning_rate": 3.5075039523814518e-06, - "loss": 1.0597, - "step": 2091 - }, - { - "epoch": 0.2515481272169783, - "grad_norm": 2.0152488180028576, - "learning_rate": 3.506991929817834e-06, - "loss": 1.0499, - "step": 2092 - }, - { - "epoch": 0.2516683701076174, - "grad_norm": 1.8075207025547964, - "learning_rate": 3.506479678644738e-06, - "loss": 1.0546, - "step": 2093 - }, - { - "epoch": 0.2517886129982565, - "grad_norm": 2.16220508835684, - "learning_rate": 3.505967198939873e-06, - "loss": 0.9715, - "step": 2094 - }, - { - "epoch": 0.25190885588889556, - "grad_norm": 1.9224763255156014, - "learning_rate": 3.5054544907809813e-06, - "loss": 1.0113, - "step": 2095 - }, - { - "epoch": 0.25202909877953467, - "grad_norm": 2.115446268530369, - "learning_rate": 3.50494155424584e-06, - "loss": 1.0343, - "step": 2096 - }, - { - "epoch": 0.2521493416701738, - "grad_norm": 1.5464369028257787, - "learning_rate": 3.504428389412262e-06, - "loss": 1.0644, - "step": 2097 - }, - { - "epoch": 0.25226958456081283, - "grad_norm": 2.6385774517883105, - "learning_rate": 3.5039149963580927e-06, - "loss": 0.9645, - "step": 2098 - }, - { - "epoch": 0.25238982745145194, - "grad_norm": 2.141391255028047, - "learning_rate": 3.503401375161215e-06, - "loss": 0.9256, - "step": 2099 - }, - { - "epoch": 0.252510070342091, - "grad_norm": 1.4728643538462505, - "learning_rate": 3.502887525899544e-06, - "loss": 1.0681, - "step": 2100 - }, - { - "epoch": 0.2526303132327301, - "grad_norm": 1.778643524779073, - "learning_rate": 3.50237344865103e-06, - "loss": 1.0549, - "step": 2101 - }, - { - "epoch": 0.2527505561233692, - "grad_norm": 2.3326213863143046, - "learning_rate": 3.501859143493658e-06, - "loss": 1.0007, - "step": 2102 - }, - { - "epoch": 0.2528707990140083, - "grad_norm": 0.8626659150800816, - "learning_rate": 3.5013446105054488e-06, - "loss": 0.8797, - "step": 2103 - }, - { - "epoch": 0.2529910419046474, - "grad_norm": 2.642020043244379, - "learning_rate": 3.5008298497644555e-06, - "loss": 0.9843, - "step": 2104 - }, - { - "epoch": 0.2531112847952865, - "grad_norm": 1.6211023537281097, - "learning_rate": 3.500314861348767e-06, - "loss": 1.1065, - "step": 2105 - }, - { - "epoch": 0.25323152768592555, - "grad_norm": 2.2417790376465723, - "learning_rate": 3.499799645336507e-06, - "loss": 1.0028, - "step": 2106 - }, - { - "epoch": 0.25335177057656466, - "grad_norm": 1.3634345288171072, - "learning_rate": 3.4992842018058336e-06, - "loss": 1.0964, - "step": 2107 - }, - { - "epoch": 0.25347201346720377, - "grad_norm": 5.842625800538418, - "learning_rate": 3.4987685308349384e-06, - "loss": 1.1039, - "step": 2108 - }, - { - "epoch": 0.2535922563578428, - "grad_norm": 2.030849241189974, - "learning_rate": 3.4982526325020497e-06, - "loss": 0.8433, - "step": 2109 - }, - { - "epoch": 0.25371249924848194, - "grad_norm": 2.0962396878720737, - "learning_rate": 3.4977365068854273e-06, - "loss": 1.0524, - "step": 2110 - }, - { - "epoch": 0.25383274213912105, - "grad_norm": 1.563078201124176, - "learning_rate": 3.4972201540633676e-06, - "loss": 0.9555, - "step": 2111 - }, - { - "epoch": 0.2539529850297601, - "grad_norm": 2.5209203867947947, - "learning_rate": 3.4967035741142008e-06, - "loss": 1.0796, - "step": 2112 - }, - { - "epoch": 0.2540732279203992, - "grad_norm": 1.653630830791833, - "learning_rate": 3.4961867671162917e-06, - "loss": 1.0476, - "step": 2113 - }, - { - "epoch": 0.2541934708110383, - "grad_norm": 2.958993721623027, - "learning_rate": 3.4956697331480402e-06, - "loss": 1.0058, - "step": 2114 - }, - { - "epoch": 0.2543137137016774, - "grad_norm": 1.699365719106626, - "learning_rate": 3.495152472287879e-06, - "loss": 1.0331, - "step": 2115 - }, - { - "epoch": 0.2544339565923165, - "grad_norm": 2.2864596309672667, - "learning_rate": 3.4946349846142766e-06, - "loss": 0.9691, - "step": 2116 - }, - { - "epoch": 0.25455419948295555, - "grad_norm": 2.0153584858549527, - "learning_rate": 3.4941172702057353e-06, - "loss": 0.9837, - "step": 2117 - }, - { - "epoch": 0.25467444237359466, - "grad_norm": 2.140242605094312, - "learning_rate": 3.4935993291407924e-06, - "loss": 1.03, - "step": 2118 - }, - { - "epoch": 0.25479468526423377, - "grad_norm": 3.438177146107032, - "learning_rate": 3.4930811614980183e-06, - "loss": 0.9378, - "step": 2119 - }, - { - "epoch": 0.2549149281548728, - "grad_norm": 1.5675671620559524, - "learning_rate": 3.4925627673560198e-06, - "loss": 1.0258, - "step": 2120 - }, - { - "epoch": 0.25503517104551193, - "grad_norm": 1.684865024605326, - "learning_rate": 3.4920441467934357e-06, - "loss": 1.1155, - "step": 2121 - }, - { - "epoch": 0.25515541393615104, - "grad_norm": 2.2147063897275854, - "learning_rate": 3.491525299888941e-06, - "loss": 1.0623, - "step": 2122 - }, - { - "epoch": 0.2552756568267901, - "grad_norm": 1.0066828939780466, - "learning_rate": 3.491006226721244e-06, - "loss": 0.9156, - "step": 2123 - }, - { - "epoch": 0.2553958997174292, - "grad_norm": 2.721935817244904, - "learning_rate": 3.4904869273690882e-06, - "loss": 0.9964, - "step": 2124 - }, - { - "epoch": 0.2555161426080683, - "grad_norm": 1.7248342849080567, - "learning_rate": 3.489967401911251e-06, - "loss": 1.1164, - "step": 2125 - }, - { - "epoch": 0.2556363854987074, - "grad_norm": 2.0676735235274064, - "learning_rate": 3.4894476504265428e-06, - "loss": 0.9232, - "step": 2126 - }, - { - "epoch": 0.2557566283893465, - "grad_norm": 0.7703022845908245, - "learning_rate": 3.4889276729938104e-06, - "loss": 0.8046, - "step": 2127 - }, - { - "epoch": 0.2558768712799856, - "grad_norm": 1.7576855422902504, - "learning_rate": 3.488407469691934e-06, - "loss": 1.0367, - "step": 2128 - }, - { - "epoch": 0.25599711417062465, - "grad_norm": 2.0307038966919917, - "learning_rate": 3.487887040599828e-06, - "loss": 1.0372, - "step": 2129 - }, - { - "epoch": 0.25611735706126376, - "grad_norm": 2.3650421392061807, - "learning_rate": 3.4873663857964407e-06, - "loss": 0.9866, - "step": 2130 - }, - { - "epoch": 0.2562375999519028, - "grad_norm": 2.9726949080210563, - "learning_rate": 3.4868455053607556e-06, - "loss": 0.8994, - "step": 2131 - }, - { - "epoch": 0.2563578428425419, - "grad_norm": 3.1075449403074242, - "learning_rate": 3.486324399371789e-06, - "loss": 0.9503, - "step": 2132 - }, - { - "epoch": 0.25647808573318104, - "grad_norm": 6.437533776648343, - "learning_rate": 3.485803067908593e-06, - "loss": 1.0138, - "step": 2133 - }, - { - "epoch": 0.2565983286238201, - "grad_norm": 3.03199444310829, - "learning_rate": 3.485281511050253e-06, - "loss": 1.0172, - "step": 2134 - }, - { - "epoch": 0.2567185715144592, - "grad_norm": 2.8047279631264694, - "learning_rate": 3.484759728875889e-06, - "loss": 1.1241, - "step": 2135 - }, - { - "epoch": 0.2568388144050983, - "grad_norm": 1.902382726877077, - "learning_rate": 3.4842377214646543e-06, - "loss": 1.0403, - "step": 2136 - }, - { - "epoch": 0.25695905729573737, - "grad_norm": 1.6398388386835026, - "learning_rate": 3.483715488895737e-06, - "loss": 0.8977, - "step": 2137 - }, - { - "epoch": 0.2570793001863765, - "grad_norm": 2.025037685090724, - "learning_rate": 3.48319303124836e-06, - "loss": 1.0155, - "step": 2138 - }, - { - "epoch": 0.2571995430770156, - "grad_norm": 2.370148673082224, - "learning_rate": 3.4826703486017798e-06, - "loss": 0.91, - "step": 2139 - }, - { - "epoch": 0.25731978596765465, - "grad_norm": 1.5850936824658008, - "learning_rate": 3.4821474410352867e-06, - "loss": 0.9942, - "step": 2140 - }, - { - "epoch": 0.25744002885829376, - "grad_norm": 1.2192870692894375, - "learning_rate": 3.481624308628205e-06, - "loss": 0.902, - "step": 2141 - }, - { - "epoch": 0.25756027174893287, - "grad_norm": 2.724634839010712, - "learning_rate": 3.481100951459893e-06, - "loss": 1.227, - "step": 2142 - }, - { - "epoch": 0.2576805146395719, - "grad_norm": 1.5384681444759858, - "learning_rate": 3.4805773696097453e-06, - "loss": 1.0142, - "step": 2143 - }, - { - "epoch": 0.25780075753021103, - "grad_norm": 2.1178035051082476, - "learning_rate": 3.4800535631571874e-06, - "loss": 1.1004, - "step": 2144 - }, - { - "epoch": 0.25792100042085014, - "grad_norm": 2.9607340961735535, - "learning_rate": 3.4795295321816804e-06, - "loss": 0.9998, - "step": 2145 - }, - { - "epoch": 0.2580412433114892, - "grad_norm": 3.7392110116798922, - "learning_rate": 3.47900527676272e-06, - "loss": 1.1425, - "step": 2146 - }, - { - "epoch": 0.2581614862021283, - "grad_norm": 2.18358839986347, - "learning_rate": 3.478480796979835e-06, - "loss": 1.1162, - "step": 2147 - }, - { - "epoch": 0.25828172909276736, - "grad_norm": 1.4523708068067458, - "learning_rate": 3.4779560929125894e-06, - "loss": 0.9996, - "step": 2148 - }, - { - "epoch": 0.2584019719834065, - "grad_norm": 0.6975589637376193, - "learning_rate": 3.4774311646405783e-06, - "loss": 0.8156, - "step": 2149 - }, - { - "epoch": 0.2585222148740456, - "grad_norm": 1.8651883583983382, - "learning_rate": 3.476906012243435e-06, - "loss": 1.0572, - "step": 2150 - }, - { - "epoch": 0.25864245776468464, - "grad_norm": 5.729673199080531, - "learning_rate": 3.476380635800824e-06, - "loss": 1.0461, - "step": 2151 - }, - { - "epoch": 0.25876270065532375, - "grad_norm": 7.505890631033672, - "learning_rate": 3.475855035392444e-06, - "loss": 1.0845, - "step": 2152 - }, - { - "epoch": 0.25888294354596286, - "grad_norm": 1.6261796337623429, - "learning_rate": 3.475329211098029e-06, - "loss": 0.9302, - "step": 2153 - }, - { - "epoch": 0.2590031864366019, - "grad_norm": 1.5773852285719607, - "learning_rate": 3.4748031629973453e-06, - "loss": 1.0513, - "step": 2154 - }, - { - "epoch": 0.25912342932724103, - "grad_norm": 0.8514035627799105, - "learning_rate": 3.4742768911701944e-06, - "loss": 0.8072, - "step": 2155 - }, - { - "epoch": 0.25924367221788014, - "grad_norm": 3.2252914474446293, - "learning_rate": 3.4737503956964113e-06, - "loss": 0.9316, - "step": 2156 - }, - { - "epoch": 0.2593639151085192, - "grad_norm": 2.3458293591788073, - "learning_rate": 3.473223676655865e-06, - "loss": 0.9046, - "step": 2157 - }, - { - "epoch": 0.2594841579991583, - "grad_norm": 1.8320279965239634, - "learning_rate": 3.472696734128459e-06, - "loss": 1.0276, - "step": 2158 - }, - { - "epoch": 0.2596044008897974, - "grad_norm": 1.691682154329307, - "learning_rate": 3.4721695681941286e-06, - "loss": 0.9852, - "step": 2159 - }, - { - "epoch": 0.25972464378043647, - "grad_norm": 2.3937368646565096, - "learning_rate": 3.471642178932845e-06, - "loss": 1.0599, - "step": 2160 - }, - { - "epoch": 0.2598448866710756, - "grad_norm": 2.1988106547149724, - "learning_rate": 3.471114566424613e-06, - "loss": 1.129, - "step": 2161 - }, - { - "epoch": 0.25996512956171464, - "grad_norm": 1.8164568494086364, - "learning_rate": 3.4705867307494715e-06, - "loss": 0.9928, - "step": 2162 - }, - { - "epoch": 0.26008537245235375, - "grad_norm": 2.350916472755112, - "learning_rate": 3.470058671987492e-06, - "loss": 1.0739, - "step": 2163 - }, - { - "epoch": 0.26020561534299286, - "grad_norm": 1.6751201188017983, - "learning_rate": 3.4695303902187805e-06, - "loss": 1.0673, - "step": 2164 - }, - { - "epoch": 0.2603258582336319, - "grad_norm": 1.9155290240441585, - "learning_rate": 3.469001885523478e-06, - "loss": 1.019, - "step": 2165 - }, - { - "epoch": 0.260446101124271, - "grad_norm": 1.9546342435392448, - "learning_rate": 3.4684731579817568e-06, - "loss": 1.0386, - "step": 2166 - }, - { - "epoch": 0.26056634401491013, - "grad_norm": 1.5460253607383492, - "learning_rate": 3.4679442076738247e-06, - "loss": 0.9976, - "step": 2167 - }, - { - "epoch": 0.2606865869055492, - "grad_norm": 1.8469544059607301, - "learning_rate": 3.4674150346799245e-06, - "loss": 1.0638, - "step": 2168 - }, - { - "epoch": 0.2608068297961883, - "grad_norm": 2.31557239879422, - "learning_rate": 3.4668856390803295e-06, - "loss": 1.0249, - "step": 2169 - }, - { - "epoch": 0.2609270726868274, - "grad_norm": 2.030840084089231, - "learning_rate": 3.4663560209553495e-06, - "loss": 1.1263, - "step": 2170 - }, - { - "epoch": 0.26104731557746647, - "grad_norm": 1.6220251877471814, - "learning_rate": 3.4658261803853267e-06, - "loss": 1.013, - "step": 2171 - }, - { - "epoch": 0.2611675584681056, - "grad_norm": 1.899158984318397, - "learning_rate": 3.4652961174506383e-06, - "loss": 1.0376, - "step": 2172 - }, - { - "epoch": 0.2612878013587447, - "grad_norm": 1.0422583743221276, - "learning_rate": 3.464765832231694e-06, - "loss": 0.8379, - "step": 2173 - }, - { - "epoch": 0.26140804424938374, - "grad_norm": 1.8641143152113233, - "learning_rate": 3.4642353248089373e-06, - "loss": 0.9305, - "step": 2174 - }, - { - "epoch": 0.26152828714002285, - "grad_norm": 2.335894325903535, - "learning_rate": 3.463704595262846e-06, - "loss": 1.0345, - "step": 2175 - }, - { - "epoch": 0.26164853003066196, - "grad_norm": 1.8959314579424917, - "learning_rate": 3.463173643673931e-06, - "loss": 0.9316, - "step": 2176 - }, - { - "epoch": 0.261768772921301, - "grad_norm": 0.9438843460964098, - "learning_rate": 3.4626424701227387e-06, - "loss": 0.8961, - "step": 2177 - }, - { - "epoch": 0.26188901581194013, - "grad_norm": 0.8649447195216404, - "learning_rate": 3.4621110746898452e-06, - "loss": 0.8511, - "step": 2178 - }, - { - "epoch": 0.2620092587025792, - "grad_norm": 1.3308025073639824, - "learning_rate": 3.4615794574558654e-06, - "loss": 0.9718, - "step": 2179 - }, - { - "epoch": 0.2621295015932183, - "grad_norm": 2.1027243426695006, - "learning_rate": 3.4610476185014436e-06, - "loss": 1.0759, - "step": 2180 - }, - { - "epoch": 0.2622497444838574, - "grad_norm": 7.201271728949003, - "learning_rate": 3.4605155579072597e-06, - "loss": 1.029, - "step": 2181 - }, - { - "epoch": 0.26236998737449646, - "grad_norm": 1.7260031506099656, - "learning_rate": 3.459983275754027e-06, - "loss": 0.9409, - "step": 2182 - }, - { - "epoch": 0.26249023026513557, - "grad_norm": 2.2481563219991023, - "learning_rate": 3.4594507721224918e-06, - "loss": 1.021, - "step": 2183 - }, - { - "epoch": 0.2626104731557747, - "grad_norm": 1.5339591525233347, - "learning_rate": 3.4589180470934353e-06, - "loss": 1.0493, - "step": 2184 - }, - { - "epoch": 0.26273071604641374, - "grad_norm": 1.6786916220165673, - "learning_rate": 3.4583851007476713e-06, - "loss": 0.9939, - "step": 2185 - }, - { - "epoch": 0.26285095893705285, - "grad_norm": 2.0968099979155, - "learning_rate": 3.4578519331660464e-06, - "loss": 0.9206, - "step": 2186 - }, - { - "epoch": 0.26297120182769196, - "grad_norm": 1.8005655618779477, - "learning_rate": 3.4573185444294426e-06, - "loss": 1.0444, - "step": 2187 - }, - { - "epoch": 0.263091444718331, - "grad_norm": 1.5850026800331538, - "learning_rate": 3.456784934618774e-06, - "loss": 1.0139, - "step": 2188 - }, - { - "epoch": 0.2632116876089701, - "grad_norm": 2.2363302572315162, - "learning_rate": 3.4562511038149897e-06, - "loss": 1.0223, - "step": 2189 - }, - { - "epoch": 0.26333193049960923, - "grad_norm": 0.884278860359429, - "learning_rate": 3.4557170520990705e-06, - "loss": 0.8329, - "step": 2190 - }, - { - "epoch": 0.2634521733902483, - "grad_norm": 1.442300162662512, - "learning_rate": 3.4551827795520324e-06, - "loss": 1.0902, - "step": 2191 - }, - { - "epoch": 0.2635724162808874, - "grad_norm": 2.006964597267186, - "learning_rate": 3.4546482862549226e-06, - "loss": 1.0714, - "step": 2192 - }, - { - "epoch": 0.2636926591715265, - "grad_norm": 2.2141769993370697, - "learning_rate": 3.4541135722888253e-06, - "loss": 1.0144, - "step": 2193 - }, - { - "epoch": 0.26381290206216557, - "grad_norm": 1.827166575061968, - "learning_rate": 3.453578637734854e-06, - "loss": 1.0279, - "step": 2194 - }, - { - "epoch": 0.2639331449528047, - "grad_norm": 2.0077403014742874, - "learning_rate": 3.4530434826741605e-06, - "loss": 1.0165, - "step": 2195 - }, - { - "epoch": 0.26405338784344373, - "grad_norm": 1.7178864130166882, - "learning_rate": 3.452508107187926e-06, - "loss": 0.9249, - "step": 2196 - }, - { - "epoch": 0.26417363073408284, - "grad_norm": 1.6793659700889856, - "learning_rate": 3.451972511357366e-06, - "loss": 0.9951, - "step": 2197 - }, - { - "epoch": 0.26429387362472195, - "grad_norm": 4.667480443209606, - "learning_rate": 3.45143669526373e-06, - "loss": 1.0824, - "step": 2198 - }, - { - "epoch": 0.264414116515361, - "grad_norm": 0.8122691780284866, - "learning_rate": 3.450900658988302e-06, - "loss": 0.8668, - "step": 2199 - }, - { - "epoch": 0.2645343594060001, - "grad_norm": 1.810725033583872, - "learning_rate": 3.450364402612397e-06, - "loss": 1.0098, - "step": 2200 - }, - { - "epoch": 0.26465460229663923, - "grad_norm": 3.645816098127044, - "learning_rate": 3.449827926217366e-06, - "loss": 1.0569, - "step": 2201 - }, - { - "epoch": 0.2647748451872783, - "grad_norm": 1.7823274265122702, - "learning_rate": 3.449291229884591e-06, - "loss": 1.0326, - "step": 2202 - }, - { - "epoch": 0.2648950880779174, - "grad_norm": 1.785153311762489, - "learning_rate": 3.4487543136954887e-06, - "loss": 1.0961, - "step": 2203 - }, - { - "epoch": 0.2650153309685565, - "grad_norm": 1.7343804986540088, - "learning_rate": 3.448217177731509e-06, - "loss": 1.14, - "step": 2204 - }, - { - "epoch": 0.26513557385919556, - "grad_norm": 1.9237846253410193, - "learning_rate": 3.4476798220741348e-06, - "loss": 1.0005, - "step": 2205 - }, - { - "epoch": 0.26525581674983467, - "grad_norm": 2.004641154609084, - "learning_rate": 3.4471422468048826e-06, - "loss": 1.0139, - "step": 2206 - }, - { - "epoch": 0.2653760596404738, - "grad_norm": 2.247564693279389, - "learning_rate": 3.4466044520053022e-06, - "loss": 0.9566, - "step": 2207 - }, - { - "epoch": 0.26549630253111284, - "grad_norm": 1.9213264116568025, - "learning_rate": 3.446066437756977e-06, - "loss": 0.8387, - "step": 2208 - }, - { - "epoch": 0.26561654542175195, - "grad_norm": 1.9850280167838923, - "learning_rate": 3.4455282041415224e-06, - "loss": 0.9869, - "step": 2209 - }, - { - "epoch": 0.265736788312391, - "grad_norm": 2.1460839159184384, - "learning_rate": 3.4449897512405894e-06, - "loss": 1.102, - "step": 2210 - }, - { - "epoch": 0.2658570312030301, - "grad_norm": 1.9260816978089277, - "learning_rate": 3.444451079135859e-06, - "loss": 0.9727, - "step": 2211 - }, - { - "epoch": 0.2659772740936692, - "grad_norm": 1.9206246130930806, - "learning_rate": 3.4439121879090493e-06, - "loss": 0.9651, - "step": 2212 - }, - { - "epoch": 0.2660975169843083, - "grad_norm": 3.0481020290689513, - "learning_rate": 3.4433730776419082e-06, - "loss": 1.0592, - "step": 2213 - }, - { - "epoch": 0.2662177598749474, - "grad_norm": 2.021463145268957, - "learning_rate": 3.4428337484162183e-06, - "loss": 1.0323, - "step": 2214 - }, - { - "epoch": 0.2663380027655865, - "grad_norm": 1.9928559023476766, - "learning_rate": 3.442294200313797e-06, - "loss": 1.0755, - "step": 2215 - }, - { - "epoch": 0.26645824565622556, - "grad_norm": 1.3564564297291228, - "learning_rate": 3.4417544334164916e-06, - "loss": 0.8119, - "step": 2216 - }, - { - "epoch": 0.26657848854686467, - "grad_norm": 1.735049898271043, - "learning_rate": 3.4412144478061854e-06, - "loss": 1.011, - "step": 2217 - }, - { - "epoch": 0.2666987314375038, - "grad_norm": 1.7075588362514054, - "learning_rate": 3.4406742435647925e-06, - "loss": 0.9752, - "step": 2218 - }, - { - "epoch": 0.26681897432814283, - "grad_norm": 2.0465511619124492, - "learning_rate": 3.440133820774263e-06, - "loss": 1.0224, - "step": 2219 - }, - { - "epoch": 0.26693921721878194, - "grad_norm": 2.0168337487398786, - "learning_rate": 3.439593179516578e-06, - "loss": 1.0421, - "step": 2220 - }, - { - "epoch": 0.26705946010942105, - "grad_norm": 2.1352853377981873, - "learning_rate": 3.4390523198737524e-06, - "loss": 1.0406, - "step": 2221 - }, - { - "epoch": 0.2671797030000601, - "grad_norm": 2.197330836705916, - "learning_rate": 3.4385112419278333e-06, - "loss": 0.9653, - "step": 2222 - }, - { - "epoch": 0.2672999458906992, - "grad_norm": 0.8172631398308976, - "learning_rate": 3.4379699457609033e-06, - "loss": 0.9265, - "step": 2223 - }, - { - "epoch": 0.26742018878133833, - "grad_norm": 2.4099181318821326, - "learning_rate": 3.4374284314550755e-06, - "loss": 1.1263, - "step": 2224 - }, - { - "epoch": 0.2675404316719774, - "grad_norm": 1.8995510022535202, - "learning_rate": 3.436886699092498e-06, - "loss": 1.0379, - "step": 2225 - }, - { - "epoch": 0.2676606745626165, - "grad_norm": 2.528061070712533, - "learning_rate": 3.4363447487553502e-06, - "loss": 0.9484, - "step": 2226 - }, - { - "epoch": 0.26778091745325555, - "grad_norm": 2.702690486493726, - "learning_rate": 3.4358025805258455e-06, - "loss": 1.0166, - "step": 2227 - }, - { - "epoch": 0.26790116034389466, - "grad_norm": 3.8116693060737505, - "learning_rate": 3.435260194486232e-06, - "loss": 1.0648, - "step": 2228 - }, - { - "epoch": 0.2680214032345338, - "grad_norm": 2.2756839939633227, - "learning_rate": 3.4347175907187875e-06, - "loss": 1.0457, - "step": 2229 - }, - { - "epoch": 0.26814164612517283, - "grad_norm": 2.5045024382247525, - "learning_rate": 3.4341747693058254e-06, - "loss": 1.1035, - "step": 2230 - }, - { - "epoch": 0.26826188901581194, - "grad_norm": 1.9448427912471526, - "learning_rate": 3.4336317303296916e-06, - "loss": 0.9916, - "step": 2231 - }, - { - "epoch": 0.26838213190645105, - "grad_norm": 3.1152083536334856, - "learning_rate": 3.4330884738727635e-06, - "loss": 0.9797, - "step": 2232 - }, - { - "epoch": 0.2685023747970901, - "grad_norm": 1.9237228442203211, - "learning_rate": 3.4325450000174535e-06, - "loss": 0.9418, - "step": 2233 - }, - { - "epoch": 0.2686226176877292, - "grad_norm": 1.7460614570515975, - "learning_rate": 3.4320013088462067e-06, - "loss": 0.9651, - "step": 2234 - }, - { - "epoch": 0.2687428605783683, - "grad_norm": 1.6039687525875757, - "learning_rate": 3.431457400441499e-06, - "loss": 1.0464, - "step": 2235 - }, - { - "epoch": 0.2688631034690074, - "grad_norm": 1.0061828685663148, - "learning_rate": 3.4309132748858424e-06, - "loss": 0.9191, - "step": 2236 - }, - { - "epoch": 0.2689833463596465, - "grad_norm": 1.648769688694768, - "learning_rate": 3.430368932261779e-06, - "loss": 1.0704, - "step": 2237 - }, - { - "epoch": 0.2691035892502856, - "grad_norm": 1.7567328634342356, - "learning_rate": 3.429824372651886e-06, - "loss": 0.9784, - "step": 2238 - }, - { - "epoch": 0.26922383214092466, - "grad_norm": 1.9367234919752734, - "learning_rate": 3.4292795961387732e-06, - "loss": 1.0685, - "step": 2239 - }, - { - "epoch": 0.26934407503156377, - "grad_norm": 1.8538975538201738, - "learning_rate": 3.4287346028050818e-06, - "loss": 1.1029, - "step": 2240 - }, - { - "epoch": 0.2694643179222028, - "grad_norm": 1.460077859030522, - "learning_rate": 3.4281893927334866e-06, - "loss": 1.0232, - "step": 2241 - }, - { - "epoch": 0.26958456081284193, - "grad_norm": 1.9357067546386288, - "learning_rate": 3.4276439660066963e-06, - "loss": 0.982, - "step": 2242 - }, - { - "epoch": 0.26970480370348104, - "grad_norm": 1.9843230954087734, - "learning_rate": 3.427098322707452e-06, - "loss": 1.07, - "step": 2243 - }, - { - "epoch": 0.2698250465941201, - "grad_norm": 2.020872987443341, - "learning_rate": 3.426552462918526e-06, - "loss": 1.1195, - "step": 2244 - }, - { - "epoch": 0.2699452894847592, - "grad_norm": 3.3109511587123146, - "learning_rate": 3.426006386722726e-06, - "loss": 0.9714, - "step": 2245 - }, - { - "epoch": 0.2700655323753983, - "grad_norm": 1.960972515872625, - "learning_rate": 3.4254600942028914e-06, - "loss": 1.15, - "step": 2246 - }, - { - "epoch": 0.2701857752660374, - "grad_norm": 2.059817679586819, - "learning_rate": 3.424913585441893e-06, - "loss": 1.0386, - "step": 2247 - }, - { - "epoch": 0.2703060181566765, - "grad_norm": 2.121129831249574, - "learning_rate": 3.4243668605226374e-06, - "loss": 1.1014, - "step": 2248 - }, - { - "epoch": 0.2704262610473156, - "grad_norm": 2.5995872720169952, - "learning_rate": 3.423819919528061e-06, - "loss": 1.0619, - "step": 2249 - }, - { - "epoch": 0.27054650393795465, - "grad_norm": 2.18871077680491, - "learning_rate": 3.4232727625411355e-06, - "loss": 1.0166, - "step": 2250 - }, - { - "epoch": 0.27066674682859376, - "grad_norm": 1.6759195337394144, - "learning_rate": 3.4227253896448626e-06, - "loss": 1.0941, - "step": 2251 - }, - { - "epoch": 0.2707869897192329, - "grad_norm": 2.005669782643209, - "learning_rate": 3.42217780092228e-06, - "loss": 1.0367, - "step": 2252 - }, - { - "epoch": 0.27090723260987193, - "grad_norm": 0.8379913838587717, - "learning_rate": 3.421629996456456e-06, - "loss": 0.8666, - "step": 2253 - }, - { - "epoch": 0.27102747550051104, - "grad_norm": 2.1954183858402705, - "learning_rate": 3.421081976330491e-06, - "loss": 1.05, - "step": 2254 - }, - { - "epoch": 0.27114771839115015, - "grad_norm": 1.6727190115502253, - "learning_rate": 3.4205337406275207e-06, - "loss": 1.1009, - "step": 2255 - }, - { - "epoch": 0.2712679612817892, - "grad_norm": 2.791327232867416, - "learning_rate": 3.4199852894307114e-06, - "loss": 0.9825, - "step": 2256 - }, - { - "epoch": 0.2713882041724283, - "grad_norm": 1.7427930934045088, - "learning_rate": 3.419436622823262e-06, - "loss": 1.0144, - "step": 2257 - }, - { - "epoch": 0.27150844706306737, - "grad_norm": 1.7632509998841712, - "learning_rate": 3.4188877408884063e-06, - "loss": 0.9642, - "step": 2258 - }, - { - "epoch": 0.2716286899537065, - "grad_norm": 2.333640509776437, - "learning_rate": 3.4183386437094088e-06, - "loss": 0.8891, - "step": 2259 - }, - { - "epoch": 0.2717489328443456, - "grad_norm": 2.2019206333114805, - "learning_rate": 3.417789331369565e-06, - "loss": 1.0459, - "step": 2260 - }, - { - "epoch": 0.27186917573498465, - "grad_norm": 1.9335557105435976, - "learning_rate": 3.4172398039522088e-06, - "loss": 1.1348, - "step": 2261 - }, - { - "epoch": 0.27198941862562376, - "grad_norm": 1.5778212302153982, - "learning_rate": 3.4166900615407e-06, - "loss": 1.0212, - "step": 2262 - }, - { - "epoch": 0.27210966151626287, - "grad_norm": 1.7466424340224536, - "learning_rate": 3.416140104218436e-06, - "loss": 0.9692, - "step": 2263 - }, - { - "epoch": 0.2722299044069019, - "grad_norm": 0.8489070752043482, - "learning_rate": 3.4155899320688437e-06, - "loss": 0.9746, - "step": 2264 - }, - { - "epoch": 0.27235014729754103, - "grad_norm": 2.3792108049546266, - "learning_rate": 3.415039545175384e-06, - "loss": 0.967, - "step": 2265 - }, - { - "epoch": 0.27247039018818014, - "grad_norm": 2.4307645023693687, - "learning_rate": 3.414488943621551e-06, - "loss": 0.8884, - "step": 2266 - }, - { - "epoch": 0.2725906330788192, - "grad_norm": 1.8159456058623913, - "learning_rate": 3.41393812749087e-06, - "loss": 0.9635, - "step": 2267 - }, - { - "epoch": 0.2727108759694583, - "grad_norm": 4.289030481437865, - "learning_rate": 3.4133870968668984e-06, - "loss": 0.9492, - "step": 2268 - }, - { - "epoch": 0.2728311188600974, - "grad_norm": 2.1594269057575937, - "learning_rate": 3.412835851833229e-06, - "loss": 1.0133, - "step": 2269 - }, - { - "epoch": 0.2729513617507365, - "grad_norm": 2.286454625803752, - "learning_rate": 3.4122843924734834e-06, - "loss": 1.0036, - "step": 2270 - }, - { - "epoch": 0.2730716046413756, - "grad_norm": 5.580432403928869, - "learning_rate": 3.411732718871319e-06, - "loss": 1.113, - "step": 2271 - }, - { - "epoch": 0.27319184753201464, - "grad_norm": 1.57728221786518, - "learning_rate": 3.4111808311104227e-06, - "loss": 1.0095, - "step": 2272 - }, - { - "epoch": 0.27331209042265375, - "grad_norm": 1.8685537790220692, - "learning_rate": 3.410628729274517e-06, - "loss": 0.9216, - "step": 2273 - }, - { - "epoch": 0.27343233331329286, - "grad_norm": 1.892503734972958, - "learning_rate": 3.4100764134473546e-06, - "loss": 1.0544, - "step": 2274 - }, - { - "epoch": 0.2735525762039319, - "grad_norm": 2.934108318199041, - "learning_rate": 3.4095238837127215e-06, - "loss": 1.0842, - "step": 2275 - }, - { - "epoch": 0.27367281909457103, - "grad_norm": 1.968785240221248, - "learning_rate": 3.4089711401544355e-06, - "loss": 1.0253, - "step": 2276 - }, - { - "epoch": 0.27379306198521014, - "grad_norm": 2.121908070189989, - "learning_rate": 3.4084181828563486e-06, - "loss": 0.8986, - "step": 2277 - }, - { - "epoch": 0.2739133048758492, - "grad_norm": 1.860781033822064, - "learning_rate": 3.4078650119023428e-06, - "loss": 0.9284, - "step": 2278 - }, - { - "epoch": 0.2740335477664883, - "grad_norm": 2.0119983071458494, - "learning_rate": 3.4073116273763337e-06, - "loss": 0.9729, - "step": 2279 - }, - { - "epoch": 0.2741537906571274, - "grad_norm": 1.792643731604781, - "learning_rate": 3.40675802936227e-06, - "loss": 1.0424, - "step": 2280 - }, - { - "epoch": 0.27427403354776647, - "grad_norm": 5.896149571792528, - "learning_rate": 3.4062042179441318e-06, - "loss": 0.9407, - "step": 2281 - }, - { - "epoch": 0.2743942764384056, - "grad_norm": 1.8265834732091577, - "learning_rate": 3.4056501932059314e-06, - "loss": 1.0419, - "step": 2282 - }, - { - "epoch": 0.2745145193290447, - "grad_norm": 0.8429028532134912, - "learning_rate": 3.405095955231715e-06, - "loss": 0.8441, - "step": 2283 - }, - { - "epoch": 0.27463476221968375, - "grad_norm": 2.3131138785795002, - "learning_rate": 3.4045415041055585e-06, - "loss": 1.1712, - "step": 2284 - }, - { - "epoch": 0.27475500511032286, - "grad_norm": 2.2168414006403934, - "learning_rate": 3.4039868399115728e-06, - "loss": 1.0287, - "step": 2285 - }, - { - "epoch": 0.27487524800096197, - "grad_norm": 1.69749738825062, - "learning_rate": 3.4034319627339003e-06, - "loss": 1.0256, - "step": 2286 - }, - { - "epoch": 0.274995490891601, - "grad_norm": 2.081223551264235, - "learning_rate": 3.402876872656715e-06, - "loss": 0.9269, - "step": 2287 - }, - { - "epoch": 0.27511573378224013, - "grad_norm": 2.1100582570354534, - "learning_rate": 3.402321569764223e-06, - "loss": 1.1334, - "step": 2288 - }, - { - "epoch": 0.2752359766728792, - "grad_norm": 1.6830372609792341, - "learning_rate": 3.4017660541406635e-06, - "loss": 1.0603, - "step": 2289 - }, - { - "epoch": 0.2753562195635183, - "grad_norm": 1.9528813324565388, - "learning_rate": 3.4012103258703092e-06, - "loss": 0.9714, - "step": 2290 - }, - { - "epoch": 0.2754764624541574, - "grad_norm": 2.294579325332406, - "learning_rate": 3.4006543850374616e-06, - "loss": 1.0553, - "step": 2291 - }, - { - "epoch": 0.27559670534479647, - "grad_norm": 2.4445120869534924, - "learning_rate": 3.400098231726458e-06, - "loss": 0.9823, - "step": 2292 - }, - { - "epoch": 0.2757169482354356, - "grad_norm": 1.7293627371179194, - "learning_rate": 3.3995418660216657e-06, - "loss": 1.1056, - "step": 2293 - }, - { - "epoch": 0.2758371911260747, - "grad_norm": 2.0985076142709898, - "learning_rate": 3.3989852880074848e-06, - "loss": 1.0414, - "step": 2294 - }, - { - "epoch": 0.27595743401671374, - "grad_norm": 0.7580850248812386, - "learning_rate": 3.398428497768348e-06, - "loss": 0.8737, - "step": 2295 - }, - { - "epoch": 0.27607767690735285, - "grad_norm": 1.6738021296615808, - "learning_rate": 3.3978714953887205e-06, - "loss": 0.9453, - "step": 2296 - }, - { - "epoch": 0.27619791979799196, - "grad_norm": 1.6671735310706672, - "learning_rate": 3.397314280953098e-06, - "loss": 1.0969, - "step": 2297 - }, - { - "epoch": 0.276318162688631, - "grad_norm": 1.897778306336122, - "learning_rate": 3.3967568545460108e-06, - "loss": 1.0293, - "step": 2298 - }, - { - "epoch": 0.27643840557927013, - "grad_norm": 1.7950697081480138, - "learning_rate": 3.3961992162520185e-06, - "loss": 1.0299, - "step": 2299 - }, - { - "epoch": 0.27655864846990924, - "grad_norm": 2.9059891122192654, - "learning_rate": 3.3956413661557156e-06, - "loss": 0.9473, - "step": 2300 - }, - { - "epoch": 0.2766788913605483, - "grad_norm": 7.4592357352416645, - "learning_rate": 3.3950833043417273e-06, - "loss": 0.8879, - "step": 2301 - }, - { - "epoch": 0.2767991342511874, - "grad_norm": 2.047059960780882, - "learning_rate": 3.3945250308947105e-06, - "loss": 0.9479, - "step": 2302 - }, - { - "epoch": 0.2769193771418265, - "grad_norm": 1.2953776712438834, - "learning_rate": 3.3939665458993556e-06, - "loss": 0.9331, - "step": 2303 - }, - { - "epoch": 0.27703962003246557, - "grad_norm": 1.9676562707441012, - "learning_rate": 3.3934078494403843e-06, - "loss": 0.9937, - "step": 2304 - }, - { - "epoch": 0.2771598629231047, - "grad_norm": 1.9051559310333404, - "learning_rate": 3.3928489416025495e-06, - "loss": 1.0373, - "step": 2305 - }, - { - "epoch": 0.27728010581374374, - "grad_norm": 1.7472717953664034, - "learning_rate": 3.392289822470638e-06, - "loss": 1.0108, - "step": 2306 - }, - { - "epoch": 0.27740034870438285, - "grad_norm": 2.0991939541793614, - "learning_rate": 3.3917304921294674e-06, - "loss": 0.9903, - "step": 2307 - }, - { - "epoch": 0.27752059159502196, - "grad_norm": 1.4996382754006077, - "learning_rate": 3.3911709506638876e-06, - "loss": 1.0406, - "step": 2308 - }, - { - "epoch": 0.277640834485661, - "grad_norm": 2.05659749999302, - "learning_rate": 3.390611198158781e-06, - "loss": 1.037, - "step": 2309 - }, - { - "epoch": 0.2777610773763001, - "grad_norm": 1.9140758903190742, - "learning_rate": 3.3900512346990612e-06, - "loss": 1.1265, - "step": 2310 - }, - { - "epoch": 0.27788132026693924, - "grad_norm": 1.7780154122370098, - "learning_rate": 3.389491060369674e-06, - "loss": 0.8869, - "step": 2311 - }, - { - "epoch": 0.2780015631575783, - "grad_norm": 2.295248065249217, - "learning_rate": 3.388930675255598e-06, - "loss": 1.1254, - "step": 2312 - }, - { - "epoch": 0.2781218060482174, - "grad_norm": 2.3401600109721397, - "learning_rate": 3.388370079441843e-06, - "loss": 1.0134, - "step": 2313 - }, - { - "epoch": 0.2782420489388565, - "grad_norm": 1.8185989107382716, - "learning_rate": 3.3878092730134505e-06, - "loss": 1.152, - "step": 2314 - }, - { - "epoch": 0.27836229182949557, - "grad_norm": 1.6802364428211227, - "learning_rate": 3.3872482560554947e-06, - "loss": 1.0356, - "step": 2315 - }, - { - "epoch": 0.2784825347201347, - "grad_norm": 0.8229078743058508, - "learning_rate": 3.386687028653082e-06, - "loss": 0.8256, - "step": 2316 - }, - { - "epoch": 0.2786027776107738, - "grad_norm": 2.276753632135113, - "learning_rate": 3.386125590891349e-06, - "loss": 1.0757, - "step": 2317 - }, - { - "epoch": 0.27872302050141284, - "grad_norm": 2.309000924957341, - "learning_rate": 3.3855639428554657e-06, - "loss": 1.0649, - "step": 2318 - }, - { - "epoch": 0.27884326339205195, - "grad_norm": 1.697353488065788, - "learning_rate": 3.385002084630635e-06, - "loss": 1.0364, - "step": 2319 - }, - { - "epoch": 0.278963506282691, - "grad_norm": 2.0255197540879326, - "learning_rate": 3.384440016302088e-06, - "loss": 1.0714, - "step": 2320 - }, - { - "epoch": 0.2790837491733301, - "grad_norm": 2.083417763588647, - "learning_rate": 3.3838777379550923e-06, - "loss": 0.8519, - "step": 2321 - }, - { - "epoch": 0.27920399206396923, - "grad_norm": 1.934840869422388, - "learning_rate": 3.383315249674944e-06, - "loss": 1.0241, - "step": 2322 - }, - { - "epoch": 0.2793242349546083, - "grad_norm": 2.4062132399091953, - "learning_rate": 3.3827525515469715e-06, - "loss": 1.0858, - "step": 2323 - }, - { - "epoch": 0.2794444778452474, - "grad_norm": 3.2105871598811717, - "learning_rate": 3.3821896436565367e-06, - "loss": 0.9346, - "step": 2324 - }, - { - "epoch": 0.2795647207358865, - "grad_norm": 2.1180796415144196, - "learning_rate": 3.381626526089032e-06, - "loss": 0.9278, - "step": 2325 - }, - { - "epoch": 0.27968496362652556, - "grad_norm": 1.9009101996404691, - "learning_rate": 3.3810631989298815e-06, - "loss": 1.0174, - "step": 2326 - }, - { - "epoch": 0.2798052065171647, - "grad_norm": 1.998282171659759, - "learning_rate": 3.3804996622645423e-06, - "loss": 1.0753, - "step": 2327 - }, - { - "epoch": 0.2799254494078038, - "grad_norm": 1.7071504529452255, - "learning_rate": 3.3799359161785015e-06, - "loss": 1.1188, - "step": 2328 - }, - { - "epoch": 0.28004569229844284, - "grad_norm": 1.5636228722873404, - "learning_rate": 3.3793719607572798e-06, - "loss": 1.0808, - "step": 2329 - }, - { - "epoch": 0.28016593518908195, - "grad_norm": 2.62088753220191, - "learning_rate": 3.378807796086428e-06, - "loss": 1.0091, - "step": 2330 - }, - { - "epoch": 0.28028617807972106, - "grad_norm": 1.8876240140077332, - "learning_rate": 3.37824342225153e-06, - "loss": 0.9941, - "step": 2331 - }, - { - "epoch": 0.2804064209703601, - "grad_norm": 2.61345112110216, - "learning_rate": 3.3776788393382006e-06, - "loss": 0.9987, - "step": 2332 - }, - { - "epoch": 0.2805266638609992, - "grad_norm": 2.651918896903598, - "learning_rate": 3.3771140474320872e-06, - "loss": 0.9926, - "step": 2333 - }, - { - "epoch": 0.28064690675163834, - "grad_norm": 1.7692622976141184, - "learning_rate": 3.3765490466188664e-06, - "loss": 1.0161, - "step": 2334 - }, - { - "epoch": 0.2807671496422774, - "grad_norm": 3.2260291002714685, - "learning_rate": 3.3759838369842508e-06, - "loss": 0.963, - "step": 2335 - }, - { - "epoch": 0.2808873925329165, - "grad_norm": 17.31680850184159, - "learning_rate": 3.375418418613981e-06, - "loss": 0.9604, - "step": 2336 - }, - { - "epoch": 0.28100763542355556, - "grad_norm": 2.194791901788637, - "learning_rate": 3.374852791593831e-06, - "loss": 1.0581, - "step": 2337 - }, - { - "epoch": 0.28112787831419467, - "grad_norm": 2.7876228442301128, - "learning_rate": 3.374286956009605e-06, - "loss": 0.7691, - "step": 2338 - }, - { - "epoch": 0.2812481212048338, - "grad_norm": 1.80483684499835, - "learning_rate": 3.3737209119471405e-06, - "loss": 0.989, - "step": 2339 - }, - { - "epoch": 0.28136836409547283, - "grad_norm": 3.5143082929233684, - "learning_rate": 3.373154659492306e-06, - "loss": 0.8744, - "step": 2340 - }, - { - "epoch": 0.28148860698611194, - "grad_norm": 1.7422512025690766, - "learning_rate": 3.3725881987310016e-06, - "loss": 1.0735, - "step": 2341 - }, - { - "epoch": 0.28160884987675106, - "grad_norm": 3.981433695293635, - "learning_rate": 3.372021529749159e-06, - "loss": 1.1036, - "step": 2342 - }, - { - "epoch": 0.2817290927673901, - "grad_norm": 1.889684522023038, - "learning_rate": 3.3714546526327405e-06, - "loss": 1.1487, - "step": 2343 - }, - { - "epoch": 0.2818493356580292, - "grad_norm": 2.3178136451598013, - "learning_rate": 3.3708875674677423e-06, - "loss": 1.107, - "step": 2344 - }, - { - "epoch": 0.28196957854866833, - "grad_norm": 1.6574966489317837, - "learning_rate": 3.37032027434019e-06, - "loss": 1.0544, - "step": 2345 - }, - { - "epoch": 0.2820898214393074, - "grad_norm": 1.877039880563628, - "learning_rate": 3.369752773336141e-06, - "loss": 1.0599, - "step": 2346 - }, - { - "epoch": 0.2822100643299465, - "grad_norm": 1.7455339665207286, - "learning_rate": 3.3691850645416864e-06, - "loss": 1.0133, - "step": 2347 - }, - { - "epoch": 0.2823303072205856, - "grad_norm": 1.6287445061240415, - "learning_rate": 3.368617148042945e-06, - "loss": 1.0596, - "step": 2348 - }, - { - "epoch": 0.28245055011122466, - "grad_norm": 1.6914980116834464, - "learning_rate": 3.368049023926071e-06, - "loss": 1.0725, - "step": 2349 - }, - { - "epoch": 0.2825707930018638, - "grad_norm": 1.7881518049207696, - "learning_rate": 3.3674806922772476e-06, - "loss": 1.0636, - "step": 2350 - }, - { - "epoch": 0.28269103589250283, - "grad_norm": 1.6152056653424574, - "learning_rate": 3.3669121531826904e-06, - "loss": 0.9749, - "step": 2351 - }, - { - "epoch": 0.28281127878314194, - "grad_norm": 1.8546038408701828, - "learning_rate": 3.366343406728647e-06, - "loss": 1.0585, - "step": 2352 - }, - { - "epoch": 0.28293152167378105, - "grad_norm": 1.7285216940218313, - "learning_rate": 3.3657744530013946e-06, - "loss": 0.9112, - "step": 2353 - }, - { - "epoch": 0.2830517645644201, - "grad_norm": 1.9529081910914377, - "learning_rate": 3.3652052920872437e-06, - "loss": 0.9423, - "step": 2354 - }, - { - "epoch": 0.2831720074550592, - "grad_norm": 6.057887574193852, - "learning_rate": 3.3646359240725355e-06, - "loss": 1.0806, - "step": 2355 - }, - { - "epoch": 0.2832922503456983, - "grad_norm": 2.3199288921311094, - "learning_rate": 3.364066349043643e-06, - "loss": 0.9085, - "step": 2356 - }, - { - "epoch": 0.2834124932363374, - "grad_norm": 1.6172517772929564, - "learning_rate": 3.363496567086969e-06, - "loss": 1.0562, - "step": 2357 - }, - { - "epoch": 0.2835327361269765, - "grad_norm": 1.7273731658791098, - "learning_rate": 3.3629265782889506e-06, - "loss": 0.983, - "step": 2358 - }, - { - "epoch": 0.2836529790176156, - "grad_norm": 1.8341429035542922, - "learning_rate": 3.362356382736054e-06, - "loss": 0.9449, - "step": 2359 - }, - { - "epoch": 0.28377322190825466, - "grad_norm": 1.8708855945223208, - "learning_rate": 3.361785980514777e-06, - "loss": 1.1412, - "step": 2360 - }, - { - "epoch": 0.28389346479889377, - "grad_norm": 1.7602992799283825, - "learning_rate": 3.361215371711649e-06, - "loss": 0.998, - "step": 2361 - }, - { - "epoch": 0.2840137076895329, - "grad_norm": 1.8063792436552655, - "learning_rate": 3.3606445564132326e-06, - "loss": 1.0603, - "step": 2362 - }, - { - "epoch": 0.28413395058017193, - "grad_norm": 1.9001182996161083, - "learning_rate": 3.360073534706118e-06, - "loss": 1.0349, - "step": 2363 - }, - { - "epoch": 0.28425419347081105, - "grad_norm": 1.8806007659050488, - "learning_rate": 3.35950230667693e-06, - "loss": 0.9867, - "step": 2364 - }, - { - "epoch": 0.28437443636145016, - "grad_norm": 1.9643971597342211, - "learning_rate": 3.358930872412323e-06, - "loss": 1.0884, - "step": 2365 - }, - { - "epoch": 0.2844946792520892, - "grad_norm": 1.4716997759080195, - "learning_rate": 3.3583592319989825e-06, - "loss": 1.0367, - "step": 2366 - }, - { - "epoch": 0.2846149221427283, - "grad_norm": 1.850101973326482, - "learning_rate": 3.357787385523627e-06, - "loss": 0.9137, - "step": 2367 - }, - { - "epoch": 0.2847351650333674, - "grad_norm": 2.6472989225889796, - "learning_rate": 3.3572153330730048e-06, - "loss": 1.0603, - "step": 2368 - }, - { - "epoch": 0.2848554079240065, - "grad_norm": 0.8120718341350543, - "learning_rate": 3.3566430747338956e-06, - "loss": 0.9012, - "step": 2369 - }, - { - "epoch": 0.2849756508146456, - "grad_norm": 5.1203694707956435, - "learning_rate": 3.35607061059311e-06, - "loss": 1.0988, - "step": 2370 - }, - { - "epoch": 0.28509589370528465, - "grad_norm": 1.7378434562155354, - "learning_rate": 3.3554979407374917e-06, - "loss": 0.9808, - "step": 2371 - }, - { - "epoch": 0.28521613659592376, - "grad_norm": 1.570353189576997, - "learning_rate": 3.3549250652539134e-06, - "loss": 0.9671, - "step": 2372 - }, - { - "epoch": 0.2853363794865629, - "grad_norm": 2.0410694754035377, - "learning_rate": 3.3543519842292794e-06, - "loss": 1.0439, - "step": 2373 - }, - { - "epoch": 0.28545662237720193, - "grad_norm": 2.6306637108533715, - "learning_rate": 3.353778697750527e-06, - "loss": 1.0702, - "step": 2374 - }, - { - "epoch": 0.28557686526784104, - "grad_norm": 1.5038586893859414, - "learning_rate": 3.353205205904622e-06, - "loss": 1.1192, - "step": 2375 - }, - { - "epoch": 0.28569710815848015, - "grad_norm": 1.6719557840670076, - "learning_rate": 3.3526315087785637e-06, - "loss": 0.9558, - "step": 2376 - }, - { - "epoch": 0.2858173510491192, - "grad_norm": 1.7693315608042788, - "learning_rate": 3.3520576064593805e-06, - "loss": 1.0392, - "step": 2377 - }, - { - "epoch": 0.2859375939397583, - "grad_norm": 1.4132127263383398, - "learning_rate": 3.3514834990341337e-06, - "loss": 1.0501, - "step": 2378 - }, - { - "epoch": 0.2860578368303974, - "grad_norm": 2.1746639211291567, - "learning_rate": 3.3509091865899144e-06, - "loss": 1.1582, - "step": 2379 - }, - { - "epoch": 0.2861780797210365, - "grad_norm": 1.8523837171705078, - "learning_rate": 3.350334669213846e-06, - "loss": 0.935, - "step": 2380 - }, - { - "epoch": 0.2862983226116756, - "grad_norm": 1.9239549004469563, - "learning_rate": 3.3497599469930816e-06, - "loss": 0.9988, - "step": 2381 - }, - { - "epoch": 0.28641856550231465, - "grad_norm": 1.9922365070373884, - "learning_rate": 3.349185020014807e-06, - "loss": 1.0603, - "step": 2382 - }, - { - "epoch": 0.28653880839295376, - "grad_norm": 1.8538849505886068, - "learning_rate": 3.348609888366237e-06, - "loss": 0.9786, - "step": 2383 - }, - { - "epoch": 0.28665905128359287, - "grad_norm": 2.0678836033970787, - "learning_rate": 3.348034552134619e-06, - "loss": 0.8628, - "step": 2384 - }, - { - "epoch": 0.2867792941742319, - "grad_norm": 1.7218779797536887, - "learning_rate": 3.3474590114072316e-06, - "loss": 1.0769, - "step": 2385 - }, - { - "epoch": 0.28689953706487104, - "grad_norm": 7.600191123968503, - "learning_rate": 3.3468832662713836e-06, - "loss": 1.0619, - "step": 2386 - }, - { - "epoch": 0.28701977995551015, - "grad_norm": 1.9228871479296687, - "learning_rate": 3.346307316814415e-06, - "loss": 1.0789, - "step": 2387 - }, - { - "epoch": 0.2871400228461492, - "grad_norm": 1.7862762343533682, - "learning_rate": 3.3457311631236965e-06, - "loss": 0.9891, - "step": 2388 - }, - { - "epoch": 0.2872602657367883, - "grad_norm": 1.9381923053941807, - "learning_rate": 3.345154805286631e-06, - "loss": 1.0746, - "step": 2389 - }, - { - "epoch": 0.2873805086274274, - "grad_norm": 2.360851406948802, - "learning_rate": 3.344578243390651e-06, - "loss": 0.9883, - "step": 2390 - }, - { - "epoch": 0.2875007515180665, - "grad_norm": 2.179675358564135, - "learning_rate": 3.3440014775232206e-06, - "loss": 1.0188, - "step": 2391 - }, - { - "epoch": 0.2876209944087056, - "grad_norm": 2.763669718279856, - "learning_rate": 3.343424507771834e-06, - "loss": 0.9478, - "step": 2392 - }, - { - "epoch": 0.2877412372993447, - "grad_norm": 1.7396100055530515, - "learning_rate": 3.342847334224018e-06, - "loss": 1.1003, - "step": 2393 - }, - { - "epoch": 0.28786148018998375, - "grad_norm": 0.8589369611157599, - "learning_rate": 3.342269956967329e-06, - "loss": 0.8981, - "step": 2394 - }, - { - "epoch": 0.28798172308062286, - "grad_norm": 2.493153160779632, - "learning_rate": 3.341692376089355e-06, - "loss": 0.9493, - "step": 2395 - }, - { - "epoch": 0.288101965971262, - "grad_norm": 2.4369365334306, - "learning_rate": 3.3411145916777146e-06, - "loss": 1.0645, - "step": 2396 - }, - { - "epoch": 0.28822220886190103, - "grad_norm": 1.880954252803163, - "learning_rate": 3.3405366038200566e-06, - "loss": 1.133, - "step": 2397 - }, - { - "epoch": 0.28834245175254014, - "grad_norm": 2.1795234208236773, - "learning_rate": 3.3399584126040617e-06, - "loss": 1.077, - "step": 2398 - }, - { - "epoch": 0.2884626946431792, - "grad_norm": 1.790010559221376, - "learning_rate": 3.339380018117441e-06, - "loss": 1.129, - "step": 2399 - }, - { - "epoch": 0.2885829375338183, - "grad_norm": 2.195232566382845, - "learning_rate": 3.3388014204479366e-06, - "loss": 1.0152, - "step": 2400 - }, - { - "epoch": 0.2887031804244574, - "grad_norm": 2.0501584805893, - "learning_rate": 3.338222619683321e-06, - "loss": 1.1423, - "step": 2401 - }, - { - "epoch": 0.2888234233150965, - "grad_norm": 2.2058463097413896, - "learning_rate": 3.337643615911398e-06, - "loss": 0.9675, - "step": 2402 - }, - { - "epoch": 0.2889436662057356, - "grad_norm": 2.1552811809585637, - "learning_rate": 3.3370644092200026e-06, - "loss": 1.0207, - "step": 2403 - }, - { - "epoch": 0.2890639090963747, - "grad_norm": 1.7712698304998493, - "learning_rate": 3.3364849996969985e-06, - "loss": 1.0179, - "step": 2404 - }, - { - "epoch": 0.28918415198701375, - "grad_norm": 2.252151202632777, - "learning_rate": 3.335905387430283e-06, - "loss": 1.08, - "step": 2405 - }, - { - "epoch": 0.28930439487765286, - "grad_norm": 1.7544968594044374, - "learning_rate": 3.335325572507782e-06, - "loss": 1.052, - "step": 2406 - }, - { - "epoch": 0.28942463776829197, - "grad_norm": 1.715517506940653, - "learning_rate": 3.3347455550174537e-06, - "loss": 0.9736, - "step": 2407 - }, - { - "epoch": 0.289544880658931, - "grad_norm": 2.048429408519369, - "learning_rate": 3.3341653350472864e-06, - "loss": 0.9148, - "step": 2408 - }, - { - "epoch": 0.28966512354957014, - "grad_norm": 2.154286582555592, - "learning_rate": 3.333584912685298e-06, - "loss": 0.9306, - "step": 2409 - }, - { - "epoch": 0.28978536644020925, - "grad_norm": 0.8509294449126915, - "learning_rate": 3.3330042880195385e-06, - "loss": 0.8206, - "step": 2410 - }, - { - "epoch": 0.2899056093308483, - "grad_norm": 1.8776824836076056, - "learning_rate": 3.3324234611380888e-06, - "loss": 1.0138, - "step": 2411 - }, - { - "epoch": 0.2900258522214874, - "grad_norm": 2.132918246934915, - "learning_rate": 3.3318424321290596e-06, - "loss": 1.0451, - "step": 2412 - }, - { - "epoch": 0.2901460951121265, - "grad_norm": 0.8469175996658584, - "learning_rate": 3.3312612010805917e-06, - "loss": 0.8775, - "step": 2413 - }, - { - "epoch": 0.2902663380027656, - "grad_norm": 1.5910603884799719, - "learning_rate": 3.330679768080858e-06, - "loss": 0.9246, - "step": 2414 - }, - { - "epoch": 0.2903865808934047, - "grad_norm": 2.873599333196363, - "learning_rate": 3.3300981332180627e-06, - "loss": 1.0698, - "step": 2415 - }, - { - "epoch": 0.29050682378404374, - "grad_norm": 1.9961368683041687, - "learning_rate": 3.3295162965804373e-06, - "loss": 1.0344, - "step": 2416 - }, - { - "epoch": 0.29062706667468285, - "grad_norm": 1.87932114174145, - "learning_rate": 3.328934258256247e-06, - "loss": 1.0084, - "step": 2417 - }, - { - "epoch": 0.29074730956532197, - "grad_norm": 2.0450919735472373, - "learning_rate": 3.3283520183337856e-06, - "loss": 0.9016, - "step": 2418 - }, - { - "epoch": 0.290867552455961, - "grad_norm": 1.750544463337432, - "learning_rate": 3.3277695769013797e-06, - "loss": 0.9256, - "step": 2419 - }, - { - "epoch": 0.29098779534660013, - "grad_norm": 1.9290088278655955, - "learning_rate": 3.327186934047385e-06, - "loss": 1.0, - "step": 2420 - }, - { - "epoch": 0.29110803823723924, - "grad_norm": 3.2267405758687078, - "learning_rate": 3.3266040898601877e-06, - "loss": 0.8942, - "step": 2421 - }, - { - "epoch": 0.2912282811278783, - "grad_norm": 1.8927476805014072, - "learning_rate": 3.3260210444282045e-06, - "loss": 1.0096, - "step": 2422 - }, - { - "epoch": 0.2913485240185174, - "grad_norm": 1.826498628646624, - "learning_rate": 3.325437797839883e-06, - "loss": 0.9543, - "step": 2423 - }, - { - "epoch": 0.2914687669091565, - "grad_norm": 4.455098889213448, - "learning_rate": 3.3248543501837015e-06, - "loss": 0.9845, - "step": 2424 - }, - { - "epoch": 0.2915890097997956, - "grad_norm": 1.8607596362847323, - "learning_rate": 3.3242707015481684e-06, - "loss": 0.9989, - "step": 2425 - }, - { - "epoch": 0.2917092526904347, - "grad_norm": 1.95633860378278, - "learning_rate": 3.323686852021823e-06, - "loss": 1.0392, - "step": 2426 - }, - { - "epoch": 0.2918294955810738, - "grad_norm": 2.669230976582517, - "learning_rate": 3.323102801693235e-06, - "loss": 1.026, - "step": 2427 - }, - { - "epoch": 0.29194973847171285, - "grad_norm": 1.9554179045909468, - "learning_rate": 3.322518550651003e-06, - "loss": 1.0229, - "step": 2428 - }, - { - "epoch": 0.29206998136235196, - "grad_norm": 1.7442411626564858, - "learning_rate": 3.3219340989837586e-06, - "loss": 1.0437, - "step": 2429 - }, - { - "epoch": 0.292190224252991, - "grad_norm": 5.0561057316553155, - "learning_rate": 3.3213494467801625e-06, - "loss": 1.0373, - "step": 2430 - }, - { - "epoch": 0.2923104671436301, - "grad_norm": 3.8655965595848993, - "learning_rate": 3.3207645941289063e-06, - "loss": 0.9492, - "step": 2431 - }, - { - "epoch": 0.29243071003426924, - "grad_norm": 1.612454107651699, - "learning_rate": 3.320179541118711e-06, - "loss": 1.0321, - "step": 2432 - }, - { - "epoch": 0.2925509529249083, - "grad_norm": 1.0524450693724237, - "learning_rate": 3.3195942878383293e-06, - "loss": 0.8754, - "step": 2433 - }, - { - "epoch": 0.2926711958155474, - "grad_norm": 1.8080265482244646, - "learning_rate": 3.319008834376543e-06, - "loss": 1.0062, - "step": 2434 - }, - { - "epoch": 0.2927914387061865, - "grad_norm": 2.3241430446572795, - "learning_rate": 3.3184231808221654e-06, - "loss": 1.109, - "step": 2435 - }, - { - "epoch": 0.29291168159682557, - "grad_norm": 2.6236965031142803, - "learning_rate": 3.3178373272640394e-06, - "loss": 0.8567, - "step": 2436 - }, - { - "epoch": 0.2930319244874647, - "grad_norm": 2.259095352193602, - "learning_rate": 3.3172512737910387e-06, - "loss": 1.0883, - "step": 2437 - }, - { - "epoch": 0.2931521673781038, - "grad_norm": 6.239656969080662, - "learning_rate": 3.3166650204920674e-06, - "loss": 1.1117, - "step": 2438 - }, - { - "epoch": 0.29327241026874284, - "grad_norm": 1.687747866475934, - "learning_rate": 3.316078567456059e-06, - "loss": 1.0475, - "step": 2439 - }, - { - "epoch": 0.29339265315938196, - "grad_norm": 1.4465345010872341, - "learning_rate": 3.3154919147719786e-06, - "loss": 0.9979, - "step": 2440 - }, - { - "epoch": 0.29351289605002107, - "grad_norm": 1.916333715572287, - "learning_rate": 3.31490506252882e-06, - "loss": 1.0993, - "step": 2441 - }, - { - "epoch": 0.2936331389406601, - "grad_norm": 1.7823237478853275, - "learning_rate": 3.31431801081561e-06, - "loss": 1.0728, - "step": 2442 - }, - { - "epoch": 0.29375338183129923, - "grad_norm": 1.0791244297280158, - "learning_rate": 3.313730759721402e-06, - "loss": 0.9189, - "step": 2443 - }, - { - "epoch": 0.29387362472193834, - "grad_norm": 2.220258065938285, - "learning_rate": 3.313143309335282e-06, - "loss": 1.0855, - "step": 2444 - }, - { - "epoch": 0.2939938676125774, - "grad_norm": 1.7068269026805594, - "learning_rate": 3.3125556597463665e-06, - "loss": 1.0683, - "step": 2445 - }, - { - "epoch": 0.2941141105032165, - "grad_norm": 1.6602630939883491, - "learning_rate": 3.311967811043801e-06, - "loss": 0.8882, - "step": 2446 - }, - { - "epoch": 0.29423435339385556, - "grad_norm": 2.328641181405908, - "learning_rate": 3.3113797633167617e-06, - "loss": 1.0459, - "step": 2447 - }, - { - "epoch": 0.2943545962844947, - "grad_norm": 2.014879073875782, - "learning_rate": 3.310791516654455e-06, - "loss": 0.913, - "step": 2448 - }, - { - "epoch": 0.2944748391751338, - "grad_norm": 1.7918236094497673, - "learning_rate": 3.3102030711461177e-06, - "loss": 1.0257, - "step": 2449 - }, - { - "epoch": 0.29459508206577284, - "grad_norm": 2.423518847361337, - "learning_rate": 3.3096144268810156e-06, - "loss": 0.9116, - "step": 2450 - }, - { - "epoch": 0.29471532495641195, - "grad_norm": 1.9119260737121433, - "learning_rate": 3.3090255839484462e-06, - "loss": 0.9523, - "step": 2451 - }, - { - "epoch": 0.29483556784705106, - "grad_norm": 1.7000358633858568, - "learning_rate": 3.3084365424377366e-06, - "loss": 1.0833, - "step": 2452 - }, - { - "epoch": 0.2949558107376901, - "grad_norm": 0.7822742995270073, - "learning_rate": 3.307847302438245e-06, - "loss": 0.8146, - "step": 2453 - }, - { - "epoch": 0.2950760536283292, - "grad_norm": 3.9778137037287538, - "learning_rate": 3.3072578640393562e-06, - "loss": 1.0076, - "step": 2454 - }, - { - "epoch": 0.29519629651896834, - "grad_norm": 1.7745243052959458, - "learning_rate": 3.3066682273304886e-06, - "loss": 1.025, - "step": 2455 - }, - { - "epoch": 0.2953165394096074, - "grad_norm": 2.259694829440136, - "learning_rate": 3.3060783924010904e-06, - "loss": 1.0091, - "step": 2456 - }, - { - "epoch": 0.2954367823002465, - "grad_norm": 2.095004488774045, - "learning_rate": 3.3054883593406387e-06, - "loss": 1.0753, - "step": 2457 - }, - { - "epoch": 0.2955570251908856, - "grad_norm": 2.613301138809313, - "learning_rate": 3.3048981282386404e-06, - "loss": 0.8869, - "step": 2458 - }, - { - "epoch": 0.29567726808152467, - "grad_norm": 1.8687077163454504, - "learning_rate": 3.304307699184634e-06, - "loss": 1.058, - "step": 2459 - }, - { - "epoch": 0.2957975109721638, - "grad_norm": 2.285782270186826, - "learning_rate": 3.3037170722681866e-06, - "loss": 1.0218, - "step": 2460 - }, - { - "epoch": 0.29591775386280283, - "grad_norm": 1.811538441228298, - "learning_rate": 3.3031262475788956e-06, - "loss": 0.9172, - "step": 2461 - }, - { - "epoch": 0.29603799675344195, - "grad_norm": 1.724189750451714, - "learning_rate": 3.3025352252063897e-06, - "loss": 0.9696, - "step": 2462 - }, - { - "epoch": 0.29615823964408106, - "grad_norm": 1.969130766847131, - "learning_rate": 3.3019440052403252e-06, - "loss": 0.9773, - "step": 2463 - }, - { - "epoch": 0.2962784825347201, - "grad_norm": 1.7631526955934689, - "learning_rate": 3.30135258777039e-06, - "loss": 0.9376, - "step": 2464 - }, - { - "epoch": 0.2963987254253592, - "grad_norm": 2.0425960864621326, - "learning_rate": 3.3007609728863024e-06, - "loss": 0.9265, - "step": 2465 - }, - { - "epoch": 0.29651896831599833, - "grad_norm": 1.7526123439679295, - "learning_rate": 3.300169160677809e-06, - "loss": 0.9634, - "step": 2466 - }, - { - "epoch": 0.2966392112066374, - "grad_norm": 8.372381455728972, - "learning_rate": 3.2995771512346878e-06, - "loss": 0.999, - "step": 2467 - }, - { - "epoch": 0.2967594540972765, - "grad_norm": 1.9135972060808306, - "learning_rate": 3.298984944646746e-06, - "loss": 0.9609, - "step": 2468 - }, - { - "epoch": 0.2968796969879156, - "grad_norm": 1.73776841045797, - "learning_rate": 3.298392541003822e-06, - "loss": 1.0397, - "step": 2469 - }, - { - "epoch": 0.29699993987855466, - "grad_norm": 1.6666817982304645, - "learning_rate": 3.2977999403957806e-06, - "loss": 1.1177, - "step": 2470 - }, - { - "epoch": 0.2971201827691938, - "grad_norm": 1.682550589493836, - "learning_rate": 3.2972071429125207e-06, - "loss": 0.9033, - "step": 2471 - }, - { - "epoch": 0.2972404256598329, - "grad_norm": 2.860360397893063, - "learning_rate": 3.2966141486439682e-06, - "loss": 1.1114, - "step": 2472 - }, - { - "epoch": 0.29736066855047194, - "grad_norm": 2.3148072563330553, - "learning_rate": 3.29602095768008e-06, - "loss": 0.8729, - "step": 2473 - }, - { - "epoch": 0.29748091144111105, - "grad_norm": 1.7126647730725666, - "learning_rate": 3.2954275701108437e-06, - "loss": 0.873, - "step": 2474 - }, - { - "epoch": 0.29760115433175016, - "grad_norm": 1.8532979694715745, - "learning_rate": 3.294833986026275e-06, - "loss": 0.9213, - "step": 2475 - }, - { - "epoch": 0.2977213972223892, - "grad_norm": 2.580746883840914, - "learning_rate": 3.29424020551642e-06, - "loss": 1.0857, - "step": 2476 - }, - { - "epoch": 0.2978416401130283, - "grad_norm": 1.8146927822082801, - "learning_rate": 3.2936462286713546e-06, - "loss": 0.9413, - "step": 2477 - }, - { - "epoch": 0.2979618830036674, - "grad_norm": 1.8080717779528932, - "learning_rate": 3.2930520555811846e-06, - "loss": 1.0012, - "step": 2478 - }, - { - "epoch": 0.2980821258943065, - "grad_norm": 1.933447013450537, - "learning_rate": 3.292457686336046e-06, - "loss": 1.0288, - "step": 2479 - }, - { - "epoch": 0.2982023687849456, - "grad_norm": 1.1309619389275911, - "learning_rate": 3.291863121026105e-06, - "loss": 0.8821, - "step": 2480 - }, - { - "epoch": 0.29832261167558466, - "grad_norm": 2.1194634960690957, - "learning_rate": 3.2912683597415547e-06, - "loss": 0.9973, - "step": 2481 - }, - { - "epoch": 0.29844285456622377, - "grad_norm": 2.207858580414153, - "learning_rate": 3.2906734025726213e-06, - "loss": 1.0141, - "step": 2482 - }, - { - "epoch": 0.2985630974568629, - "grad_norm": 1.8882641514930052, - "learning_rate": 3.290078249609559e-06, - "loss": 1.105, - "step": 2483 - }, - { - "epoch": 0.29868334034750194, - "grad_norm": 2.5829957208188152, - "learning_rate": 3.2894829009426514e-06, - "loss": 1.1066, - "step": 2484 - }, - { - "epoch": 0.29880358323814105, - "grad_norm": 2.0260631360744568, - "learning_rate": 3.288887356662213e-06, - "loss": 0.9997, - "step": 2485 - }, - { - "epoch": 0.29892382612878016, - "grad_norm": 0.803984696218871, - "learning_rate": 3.288291616858588e-06, - "loss": 0.8453, - "step": 2486 - }, - { - "epoch": 0.2990440690194192, - "grad_norm": 1.9458382137436148, - "learning_rate": 3.287695681622149e-06, - "loss": 0.9955, - "step": 2487 - }, - { - "epoch": 0.2991643119100583, - "grad_norm": 2.6317755131340483, - "learning_rate": 3.2870995510432982e-06, - "loss": 1.0374, - "step": 2488 - }, - { - "epoch": 0.29928455480069743, - "grad_norm": 1.7836825174506483, - "learning_rate": 3.2865032252124697e-06, - "loss": 0.9948, - "step": 2489 - }, - { - "epoch": 0.2994047976913365, - "grad_norm": 1.365275588389017, - "learning_rate": 3.2859067042201243e-06, - "loss": 1.0024, - "step": 2490 - }, - { - "epoch": 0.2995250405819756, - "grad_norm": 1.7600441563008884, - "learning_rate": 3.2853099881567544e-06, - "loss": 1.0009, - "step": 2491 - }, - { - "epoch": 0.29964528347261465, - "grad_norm": 4.798349160188943, - "learning_rate": 3.284713077112881e-06, - "loss": 1.0156, - "step": 2492 - }, - { - "epoch": 0.29976552636325376, - "grad_norm": 2.617876924266435, - "learning_rate": 3.284115971179056e-06, - "loss": 1.0908, - "step": 2493 - }, - { - "epoch": 0.2998857692538929, - "grad_norm": 1.7237156661655146, - "learning_rate": 3.283518670445859e-06, - "loss": 1.0229, - "step": 2494 - }, - { - "epoch": 0.30000601214453193, - "grad_norm": 1.102856572590108, - "learning_rate": 3.2829211750038995e-06, - "loss": 0.8078, - "step": 2495 - }, - { - "epoch": 0.30012625503517104, - "grad_norm": 3.9904248551115447, - "learning_rate": 3.2823234849438183e-06, - "loss": 1.1097, - "step": 2496 - }, - { - "epoch": 0.30024649792581015, - "grad_norm": 2.113203064887738, - "learning_rate": 3.2817256003562836e-06, - "loss": 0.9744, - "step": 2497 - }, - { - "epoch": 0.3003667408164492, - "grad_norm": 2.1637082687928673, - "learning_rate": 3.281127521331995e-06, - "loss": 0.8955, - "step": 2498 - }, - { - "epoch": 0.3004869837070883, - "grad_norm": 0.8830795896805438, - "learning_rate": 3.2805292479616798e-06, - "loss": 0.8733, - "step": 2499 - }, - { - "epoch": 0.30060722659772743, - "grad_norm": 2.1081563184206074, - "learning_rate": 3.2799307803360955e-06, - "loss": 1.1346, - "step": 2500 - }, - { - "epoch": 0.3007274694883665, - "grad_norm": 1.7636956670598982, - "learning_rate": 3.27933211854603e-06, - "loss": 1.047, - "step": 2501 - }, - { - "epoch": 0.3008477123790056, - "grad_norm": 2.1425165882158366, - "learning_rate": 3.278733262682299e-06, - "loss": 1.0974, - "step": 2502 - }, - { - "epoch": 0.3009679552696447, - "grad_norm": 2.130914870954694, - "learning_rate": 3.2781342128357484e-06, - "loss": 1.0456, - "step": 2503 - }, - { - "epoch": 0.30108819816028376, - "grad_norm": 2.5684671473645646, - "learning_rate": 3.2775349690972547e-06, - "loss": 1.0353, - "step": 2504 - }, - { - "epoch": 0.30120844105092287, - "grad_norm": 0.7682737348311927, - "learning_rate": 3.276935531557722e-06, - "loss": 0.7957, - "step": 2505 - }, - { - "epoch": 0.301328683941562, - "grad_norm": 2.014366762604235, - "learning_rate": 3.2763359003080837e-06, - "loss": 1.0261, - "step": 2506 - }, - { - "epoch": 0.30144892683220104, - "grad_norm": 0.9652486715952306, - "learning_rate": 3.2757360754393047e-06, - "loss": 0.9075, - "step": 2507 - }, - { - "epoch": 0.30156916972284015, - "grad_norm": 2.487351943167365, - "learning_rate": 3.2751360570423767e-06, - "loss": 0.8645, - "step": 2508 - }, - { - "epoch": 0.3016894126134792, - "grad_norm": 6.9036418426014805, - "learning_rate": 3.2745358452083236e-06, - "loss": 0.9886, - "step": 2509 - }, - { - "epoch": 0.3018096555041183, - "grad_norm": 1.3296980573905208, - "learning_rate": 3.2739354400281955e-06, - "loss": 1.0449, - "step": 2510 - }, - { - "epoch": 0.3019298983947574, - "grad_norm": 0.9321383380362757, - "learning_rate": 3.2733348415930744e-06, - "loss": 0.9201, - "step": 2511 - }, - { - "epoch": 0.3020501412853965, - "grad_norm": 2.3450053604162315, - "learning_rate": 3.27273404999407e-06, - "loss": 1.0429, - "step": 2512 - }, - { - "epoch": 0.3021703841760356, - "grad_norm": 0.870262776021042, - "learning_rate": 3.272133065322322e-06, - "loss": 0.8592, - "step": 2513 - }, - { - "epoch": 0.3022906270666747, - "grad_norm": 1.5564647848065094, - "learning_rate": 3.271531887669e-06, - "loss": 1.0144, - "step": 2514 - }, - { - "epoch": 0.30241086995731375, - "grad_norm": 3.5639020603564555, - "learning_rate": 3.2709305171253015e-06, - "loss": 0.867, - "step": 2515 - }, - { - "epoch": 0.30253111284795287, - "grad_norm": 2.210320268614902, - "learning_rate": 3.2703289537824536e-06, - "loss": 1.0114, - "step": 2516 - }, - { - "epoch": 0.302651355738592, - "grad_norm": 2.2758361118355097, - "learning_rate": 3.269727197731714e-06, - "loss": 1.0108, - "step": 2517 - }, - { - "epoch": 0.30277159862923103, - "grad_norm": 1.580612714605829, - "learning_rate": 3.269125249064367e-06, - "loss": 1.0109, - "step": 2518 - }, - { - "epoch": 0.30289184151987014, - "grad_norm": 1.5591663656773047, - "learning_rate": 3.2685231078717297e-06, - "loss": 1.0592, - "step": 2519 - }, - { - "epoch": 0.30301208441050925, - "grad_norm": 2.258894611536322, - "learning_rate": 3.267920774245145e-06, - "loss": 0.9776, - "step": 2520 - }, - { - "epoch": 0.3031323273011483, - "grad_norm": 1.8812778280740987, - "learning_rate": 3.2673182482759876e-06, - "loss": 1.073, - "step": 2521 - }, - { - "epoch": 0.3032525701917874, - "grad_norm": 2.003308420814491, - "learning_rate": 3.266715530055659e-06, - "loss": 0.8916, - "step": 2522 - }, - { - "epoch": 0.30337281308242653, - "grad_norm": 1.4427015485730252, - "learning_rate": 3.2661126196755927e-06, - "loss": 1.0355, - "step": 2523 - }, - { - "epoch": 0.3034930559730656, - "grad_norm": 0.8248056789774336, - "learning_rate": 3.265509517227248e-06, - "loss": 0.837, - "step": 2524 - }, - { - "epoch": 0.3036132988637047, - "grad_norm": 1.553608045162149, - "learning_rate": 3.264906222802115e-06, - "loss": 1.0342, - "step": 2525 - }, - { - "epoch": 0.30373354175434375, - "grad_norm": 3.8070656239229654, - "learning_rate": 3.264302736491715e-06, - "loss": 1.0024, - "step": 2526 - }, - { - "epoch": 0.30385378464498286, - "grad_norm": 1.6664417432965575, - "learning_rate": 3.263699058387594e-06, - "loss": 1.0937, - "step": 2527 - }, - { - "epoch": 0.30397402753562197, - "grad_norm": 2.0050564266645683, - "learning_rate": 3.2630951885813315e-06, - "loss": 1.1354, - "step": 2528 - }, - { - "epoch": 0.304094270426261, - "grad_norm": 1.7433874676300243, - "learning_rate": 3.262491127164533e-06, - "loss": 1.0112, - "step": 2529 - }, - { - "epoch": 0.30421451331690014, - "grad_norm": 2.05940546178213, - "learning_rate": 3.2618868742288337e-06, - "loss": 1.0224, - "step": 2530 - }, - { - "epoch": 0.30433475620753925, - "grad_norm": 1.8546811008101944, - "learning_rate": 3.261282429865899e-06, - "loss": 0.9548, - "step": 2531 - }, - { - "epoch": 0.3044549990981783, - "grad_norm": 2.944885546754557, - "learning_rate": 3.2606777941674225e-06, - "loss": 0.9601, - "step": 2532 - }, - { - "epoch": 0.3045752419888174, - "grad_norm": 1.8967047424829915, - "learning_rate": 3.2600729672251276e-06, - "loss": 1.0734, - "step": 2533 - }, - { - "epoch": 0.3046954848794565, - "grad_norm": 4.03485419443644, - "learning_rate": 3.259467949130765e-06, - "loss": 0.8877, - "step": 2534 - }, - { - "epoch": 0.3048157277700956, - "grad_norm": 2.2631509887585275, - "learning_rate": 3.2588627399761164e-06, - "loss": 1.0652, - "step": 2535 - }, - { - "epoch": 0.3049359706607347, - "grad_norm": 1.6333598994345773, - "learning_rate": 3.2582573398529903e-06, - "loss": 0.9426, - "step": 2536 - }, - { - "epoch": 0.3050562135513738, - "grad_norm": 5.763465704441169, - "learning_rate": 3.2576517488532265e-06, - "loss": 0.9687, - "step": 2537 - }, - { - "epoch": 0.30517645644201286, - "grad_norm": 2.180253587724857, - "learning_rate": 3.257045967068692e-06, - "loss": 1.0983, - "step": 2538 - }, - { - "epoch": 0.30529669933265197, - "grad_norm": 1.4514556648350687, - "learning_rate": 3.2564399945912848e-06, - "loss": 1.0508, - "step": 2539 - }, - { - "epoch": 0.305416942223291, - "grad_norm": 11.058751047539017, - "learning_rate": 3.2558338315129287e-06, - "loss": 1.0558, - "step": 2540 - }, - { - "epoch": 0.30553718511393013, - "grad_norm": 2.678862241671093, - "learning_rate": 3.2552274779255785e-06, - "loss": 0.9914, - "step": 2541 - }, - { - "epoch": 0.30565742800456924, - "grad_norm": 2.3955220448708374, - "learning_rate": 3.2546209339212184e-06, - "loss": 1.0, - "step": 2542 - }, - { - "epoch": 0.3057776708952083, - "grad_norm": 1.63602383594357, - "learning_rate": 3.25401419959186e-06, - "loss": 0.9992, - "step": 2543 - }, - { - "epoch": 0.3058979137858474, - "grad_norm": 1.924930841268746, - "learning_rate": 3.253407275029545e-06, - "loss": 0.9952, - "step": 2544 - }, - { - "epoch": 0.3060181566764865, - "grad_norm": 2.2014413446968386, - "learning_rate": 3.2528001603263425e-06, - "loss": 1.0352, - "step": 2545 - }, - { - "epoch": 0.3061383995671256, - "grad_norm": 1.8857920011384122, - "learning_rate": 3.2521928555743514e-06, - "loss": 1.0475, - "step": 2546 - }, - { - "epoch": 0.3062586424577647, - "grad_norm": 1.6704706729117478, - "learning_rate": 3.2515853608657e-06, - "loss": 0.9107, - "step": 2547 - }, - { - "epoch": 0.3063788853484038, - "grad_norm": 3.4952995890079217, - "learning_rate": 3.250977676292545e-06, - "loss": 0.9768, - "step": 2548 - }, - { - "epoch": 0.30649912823904285, - "grad_norm": 2.3949977799885125, - "learning_rate": 3.2503698019470712e-06, - "loss": 1.023, - "step": 2549 - }, - { - "epoch": 0.30661937112968196, - "grad_norm": 1.732160856071971, - "learning_rate": 3.249761737921492e-06, - "loss": 1.0036, - "step": 2550 - }, - { - "epoch": 0.30673961402032107, - "grad_norm": 1.847278128839977, - "learning_rate": 3.249153484308051e-06, - "loss": 0.9711, - "step": 2551 - }, - { - "epoch": 0.3068598569109601, - "grad_norm": 2.674163730421997, - "learning_rate": 3.2485450411990194e-06, - "loss": 1.0051, - "step": 2552 - }, - { - "epoch": 0.30698009980159924, - "grad_norm": 1.6584052691680038, - "learning_rate": 3.2479364086866983e-06, - "loss": 1.0535, - "step": 2553 - }, - { - "epoch": 0.30710034269223835, - "grad_norm": 1.9352636659369071, - "learning_rate": 3.247327586863416e-06, - "loss": 1.0369, - "step": 2554 - }, - { - "epoch": 0.3072205855828774, - "grad_norm": 2.474376399537741, - "learning_rate": 3.2467185758215304e-06, - "loss": 1.0052, - "step": 2555 - }, - { - "epoch": 0.3073408284735165, - "grad_norm": 2.3753637737957796, - "learning_rate": 3.246109375653428e-06, - "loss": 1.0958, - "step": 2556 - }, - { - "epoch": 0.30746107136415557, - "grad_norm": 1.7579325317091268, - "learning_rate": 3.2454999864515243e-06, - "loss": 1.016, - "step": 2557 - }, - { - "epoch": 0.3075813142547947, - "grad_norm": 5.66765746636173, - "learning_rate": 3.244890408308263e-06, - "loss": 0.9184, - "step": 2558 - }, - { - "epoch": 0.3077015571454338, - "grad_norm": 4.5262819613403105, - "learning_rate": 3.2442806413161165e-06, - "loss": 0.8444, - "step": 2559 - }, - { - "epoch": 0.30782180003607285, - "grad_norm": 2.077058561503218, - "learning_rate": 3.243670685567586e-06, - "loss": 0.9942, - "step": 2560 - }, - { - "epoch": 0.30794204292671196, - "grad_norm": 2.2382687872894205, - "learning_rate": 3.2430605411552012e-06, - "loss": 1.0354, - "step": 2561 - }, - { - "epoch": 0.30806228581735107, - "grad_norm": 0.8945587020726239, - "learning_rate": 3.2424502081715205e-06, - "loss": 0.9492, - "step": 2562 - }, - { - "epoch": 0.3081825287079901, - "grad_norm": 1.617691477507422, - "learning_rate": 3.241839686709132e-06, - "loss": 1.0098, - "step": 2563 - }, - { - "epoch": 0.30830277159862923, - "grad_norm": 2.770326056449928, - "learning_rate": 3.2412289768606495e-06, - "loss": 1.0481, - "step": 2564 - }, - { - "epoch": 0.30842301448926834, - "grad_norm": 1.8118036181997295, - "learning_rate": 3.240618078718718e-06, - "loss": 1.0503, - "step": 2565 - }, - { - "epoch": 0.3085432573799074, - "grad_norm": 1.9660331508523055, - "learning_rate": 3.240006992376011e-06, - "loss": 0.9668, - "step": 2566 - }, - { - "epoch": 0.3086635002705465, - "grad_norm": 2.7680208852123354, - "learning_rate": 3.2393957179252284e-06, - "loss": 0.9884, - "step": 2567 - }, - { - "epoch": 0.3087837431611856, - "grad_norm": 1.7099793420625764, - "learning_rate": 3.2387842554591016e-06, - "loss": 1.0396, - "step": 2568 - }, - { - "epoch": 0.3089039860518247, - "grad_norm": 2.1247883859621477, - "learning_rate": 3.238172605070388e-06, - "loss": 1.0945, - "step": 2569 - }, - { - "epoch": 0.3090242289424638, - "grad_norm": 2.8340705211751773, - "learning_rate": 3.2375607668518745e-06, - "loss": 1.0128, - "step": 2570 - }, - { - "epoch": 0.30914447183310284, - "grad_norm": 1.9606561344922695, - "learning_rate": 3.236948740896377e-06, - "loss": 1.1286, - "step": 2571 - }, - { - "epoch": 0.30926471472374195, - "grad_norm": 2.0512191450272965, - "learning_rate": 3.2363365272967384e-06, - "loss": 1.0694, - "step": 2572 - }, - { - "epoch": 0.30938495761438106, - "grad_norm": 2.01360675399369, - "learning_rate": 3.235724126145832e-06, - "loss": 1.0399, - "step": 2573 - }, - { - "epoch": 0.3095052005050201, - "grad_norm": 1.6277688718746715, - "learning_rate": 3.235111537536558e-06, - "loss": 1.0032, - "step": 2574 - }, - { - "epoch": 0.30962544339565923, - "grad_norm": 1.8414438340143164, - "learning_rate": 3.2344987615618456e-06, - "loss": 1.0573, - "step": 2575 - }, - { - "epoch": 0.30974568628629834, - "grad_norm": 1.4665974203293317, - "learning_rate": 3.2338857983146533e-06, - "loss": 1.0125, - "step": 2576 - }, - { - "epoch": 0.3098659291769374, - "grad_norm": 2.3973537320099085, - "learning_rate": 3.233272647887966e-06, - "loss": 0.9912, - "step": 2577 - }, - { - "epoch": 0.3099861720675765, - "grad_norm": 1.5784625457244796, - "learning_rate": 3.2326593103747985e-06, - "loss": 1.1242, - "step": 2578 - }, - { - "epoch": 0.3101064149582156, - "grad_norm": 2.0403052741127405, - "learning_rate": 3.2320457858681936e-06, - "loss": 1.074, - "step": 2579 - }, - { - "epoch": 0.31022665784885467, - "grad_norm": 2.389389915808905, - "learning_rate": 3.2314320744612228e-06, - "loss": 1.0874, - "step": 2580 - }, - { - "epoch": 0.3103469007394938, - "grad_norm": 1.8893755102078036, - "learning_rate": 3.2308181762469854e-06, - "loss": 0.9913, - "step": 2581 - }, - { - "epoch": 0.3104671436301329, - "grad_norm": 2.0148481897761523, - "learning_rate": 3.230204091318609e-06, - "loss": 1.0201, - "step": 2582 - }, - { - "epoch": 0.31058738652077195, - "grad_norm": 1.8991552181437248, - "learning_rate": 3.2295898197692503e-06, - "loss": 1.0723, - "step": 2583 - }, - { - "epoch": 0.31070762941141106, - "grad_norm": 1.8855135841821067, - "learning_rate": 3.228975361692094e-06, - "loss": 1.0198, - "step": 2584 - }, - { - "epoch": 0.31082787230205017, - "grad_norm": 2.1317502684773655, - "learning_rate": 3.228360717180352e-06, - "loss": 1.0295, - "step": 2585 - }, - { - "epoch": 0.3109481151926892, - "grad_norm": 0.8626570365140757, - "learning_rate": 3.227745886327266e-06, - "loss": 0.8746, - "step": 2586 - }, - { - "epoch": 0.31106835808332833, - "grad_norm": 0.8801012519544053, - "learning_rate": 3.227130869226105e-06, - "loss": 0.8196, - "step": 2587 - }, - { - "epoch": 0.3111886009739674, - "grad_norm": 4.270922825813833, - "learning_rate": 3.226515665970167e-06, - "loss": 1.0527, - "step": 2588 - }, - { - "epoch": 0.3113088438646065, - "grad_norm": 2.2452696772920375, - "learning_rate": 3.225900276652777e-06, - "loss": 1.0794, - "step": 2589 - }, - { - "epoch": 0.3114290867552456, - "grad_norm": 1.6140945268908173, - "learning_rate": 3.2252847013672906e-06, - "loss": 0.9815, - "step": 2590 - }, - { - "epoch": 0.31154932964588467, - "grad_norm": 1.889368885268886, - "learning_rate": 3.224668940207089e-06, - "loss": 0.9873, - "step": 2591 - }, - { - "epoch": 0.3116695725365238, - "grad_norm": 1.650299755958105, - "learning_rate": 3.2240529932655828e-06, - "loss": 1.0974, - "step": 2592 - }, - { - "epoch": 0.3117898154271629, - "grad_norm": 2.713550286941357, - "learning_rate": 3.223436860636211e-06, - "loss": 1.1126, - "step": 2593 - }, - { - "epoch": 0.31191005831780194, - "grad_norm": 1.7262721443275182, - "learning_rate": 3.2228205424124403e-06, - "loss": 0.965, - "step": 2594 - }, - { - "epoch": 0.31203030120844105, - "grad_norm": 2.7980551334372663, - "learning_rate": 3.222204038687765e-06, - "loss": 0.9692, - "step": 2595 - }, - { - "epoch": 0.31215054409908016, - "grad_norm": 1.4873308967189645, - "learning_rate": 3.221587349555709e-06, - "loss": 1.109, - "step": 2596 - }, - { - "epoch": 0.3122707869897192, - "grad_norm": 1.6657415524582984, - "learning_rate": 3.2209704751098236e-06, - "loss": 0.9168, - "step": 2597 - }, - { - "epoch": 0.31239102988035833, - "grad_norm": 2.1140304619390773, - "learning_rate": 3.2203534154436875e-06, - "loss": 1.0609, - "step": 2598 - }, - { - "epoch": 0.31251127277099744, - "grad_norm": 2.130408640602114, - "learning_rate": 3.2197361706509084e-06, - "loss": 0.9915, - "step": 2599 - }, - { - "epoch": 0.3126315156616365, - "grad_norm": 4.181660922489856, - "learning_rate": 3.2191187408251228e-06, - "loss": 1.0732, - "step": 2600 - }, - { - "epoch": 0.3127517585522756, - "grad_norm": 3.072359213400906, - "learning_rate": 3.218501126059993e-06, - "loss": 1.0067, - "step": 2601 - }, - { - "epoch": 0.31287200144291466, - "grad_norm": 2.146596223591538, - "learning_rate": 3.2178833264492116e-06, - "loss": 1.0402, - "step": 2602 - }, - { - "epoch": 0.31299224433355377, - "grad_norm": 2.0640539615289897, - "learning_rate": 3.217265342086498e-06, - "loss": 0.9871, - "step": 2603 - }, - { - "epoch": 0.3131124872241929, - "grad_norm": 1.9180698717733826, - "learning_rate": 3.216647173065599e-06, - "loss": 0.9563, - "step": 2604 - }, - { - "epoch": 0.31323273011483194, - "grad_norm": 2.2946741887615243, - "learning_rate": 3.216028819480292e-06, - "loss": 0.9717, - "step": 2605 - }, - { - "epoch": 0.31335297300547105, - "grad_norm": 2.072247216577049, - "learning_rate": 3.2154102814243793e-06, - "loss": 0.9944, - "step": 2606 - }, - { - "epoch": 0.31347321589611016, - "grad_norm": 1.918398558399591, - "learning_rate": 3.2147915589916937e-06, - "loss": 0.8984, - "step": 2607 - }, - { - "epoch": 0.3135934587867492, - "grad_norm": 2.5633947159553085, - "learning_rate": 3.2141726522760938e-06, - "loss": 1.0554, - "step": 2608 - }, - { - "epoch": 0.3137137016773883, - "grad_norm": 0.7287986962225105, - "learning_rate": 3.213553561371469e-06, - "loss": 0.7894, - "step": 2609 - }, - { - "epoch": 0.31383394456802743, - "grad_norm": 2.215629962867161, - "learning_rate": 3.212934286371733e-06, - "loss": 1.1931, - "step": 2610 - }, - { - "epoch": 0.3139541874586665, - "grad_norm": 2.3831058509231395, - "learning_rate": 3.2123148273708304e-06, - "loss": 1.066, - "step": 2611 - }, - { - "epoch": 0.3140744303493056, - "grad_norm": 1.7526741032772133, - "learning_rate": 3.211695184462733e-06, - "loss": 0.9931, - "step": 2612 - }, - { - "epoch": 0.3141946732399447, - "grad_norm": 0.8864588419594895, - "learning_rate": 3.2110753577414383e-06, - "loss": 0.8754, - "step": 2613 - }, - { - "epoch": 0.31431491613058377, - "grad_norm": 1.7712365831985541, - "learning_rate": 3.2104553473009757e-06, - "loss": 1.0164, - "step": 2614 - }, - { - "epoch": 0.3144351590212229, - "grad_norm": 1.7289505406018804, - "learning_rate": 3.209835153235399e-06, - "loss": 0.9035, - "step": 2615 - }, - { - "epoch": 0.314555401911862, - "grad_norm": 2.347977551198609, - "learning_rate": 3.2092147756387916e-06, - "loss": 0.9064, - "step": 2616 - }, - { - "epoch": 0.31467564480250104, - "grad_norm": 1.660962364606373, - "learning_rate": 3.208594214605264e-06, - "loss": 1.0598, - "step": 2617 - }, - { - "epoch": 0.31479588769314015, - "grad_norm": 1.8909462900104015, - "learning_rate": 3.2079734702289553e-06, - "loss": 1.0082, - "step": 2618 - }, - { - "epoch": 0.3149161305837792, - "grad_norm": 0.8321015592427745, - "learning_rate": 3.207352542604031e-06, - "loss": 0.8745, - "step": 2619 - }, - { - "epoch": 0.3150363734744183, - "grad_norm": 1.5800701277899443, - "learning_rate": 3.2067314318246864e-06, - "loss": 1.0112, - "step": 2620 - }, - { - "epoch": 0.31515661636505743, - "grad_norm": 1.9498878813569438, - "learning_rate": 3.206110137985143e-06, - "loss": 0.9959, - "step": 2621 - }, - { - "epoch": 0.3152768592556965, - "grad_norm": 1.710070385962247, - "learning_rate": 3.2054886611796505e-06, - "loss": 1.1505, - "step": 2622 - }, - { - "epoch": 0.3153971021463356, - "grad_norm": 0.9745400811217718, - "learning_rate": 3.204867001502487e-06, - "loss": 0.9212, - "step": 2623 - }, - { - "epoch": 0.3155173450369747, - "grad_norm": 2.777938585924896, - "learning_rate": 3.2042451590479567e-06, - "loss": 1.0345, - "step": 2624 - }, - { - "epoch": 0.31563758792761376, - "grad_norm": 1.5836614051372269, - "learning_rate": 3.203623133910394e-06, - "loss": 1.0882, - "step": 2625 - }, - { - "epoch": 0.31575783081825287, - "grad_norm": 2.3351904653759017, - "learning_rate": 3.203000926184158e-06, - "loss": 1.0005, - "step": 2626 - }, - { - "epoch": 0.315878073708892, - "grad_norm": 2.7188224344360328, - "learning_rate": 3.202378535963639e-06, - "loss": 1.0016, - "step": 2627 - }, - { - "epoch": 0.31599831659953104, - "grad_norm": 1.7938882967174554, - "learning_rate": 3.2017559633432516e-06, - "loss": 1.0668, - "step": 2628 - }, - { - "epoch": 0.31611855949017015, - "grad_norm": 1.6904342713010136, - "learning_rate": 3.2011332084174398e-06, - "loss": 0.8915, - "step": 2629 - }, - { - "epoch": 0.31623880238080926, - "grad_norm": 1.5256299015577466, - "learning_rate": 3.2005102712806756e-06, - "loss": 1.1201, - "step": 2630 - }, - { - "epoch": 0.3163590452714483, - "grad_norm": 2.126183124367402, - "learning_rate": 3.1998871520274575e-06, - "loss": 0.9598, - "step": 2631 - }, - { - "epoch": 0.3164792881620874, - "grad_norm": 2.069773708266238, - "learning_rate": 3.199263850752312e-06, - "loss": 1.0722, - "step": 2632 - }, - { - "epoch": 0.31659953105272653, - "grad_norm": 2.5459012469417757, - "learning_rate": 3.198640367549795e-06, - "loss": 1.0927, - "step": 2633 - }, - { - "epoch": 0.3167197739433656, - "grad_norm": 1.5723349230167023, - "learning_rate": 3.198016702514487e-06, - "loss": 1.0902, - "step": 2634 - }, - { - "epoch": 0.3168400168340047, - "grad_norm": 1.5452679850431656, - "learning_rate": 3.1973928557409972e-06, - "loss": 1.0773, - "step": 2635 - }, - { - "epoch": 0.31696025972464376, - "grad_norm": 1.7256241401800094, - "learning_rate": 3.1967688273239636e-06, - "loss": 0.9295, - "step": 2636 - }, - { - "epoch": 0.31708050261528287, - "grad_norm": 1.7225240712913066, - "learning_rate": 3.1961446173580503e-06, - "loss": 1.0492, - "step": 2637 - }, - { - "epoch": 0.317200745505922, - "grad_norm": 2.5531400607725923, - "learning_rate": 3.1955202259379502e-06, - "loss": 0.9998, - "step": 2638 - }, - { - "epoch": 0.31732098839656103, - "grad_norm": 2.1195102914181017, - "learning_rate": 3.194895653158381e-06, - "loss": 1.0498, - "step": 2639 - }, - { - "epoch": 0.31744123128720014, - "grad_norm": 0.7769182660552393, - "learning_rate": 3.194270899114093e-06, - "loss": 0.8257, - "step": 2640 - }, - { - "epoch": 0.31756147417783925, - "grad_norm": 1.58068760456412, - "learning_rate": 3.193645963899858e-06, - "loss": 1.0524, - "step": 2641 - }, - { - "epoch": 0.3176817170684783, - "grad_norm": 1.6908007236054106, - "learning_rate": 3.193020847610479e-06, - "loss": 1.0628, - "step": 2642 - }, - { - "epoch": 0.3178019599591174, - "grad_norm": 2.092068694589659, - "learning_rate": 3.192395550340787e-06, - "loss": 0.9412, - "step": 2643 - }, - { - "epoch": 0.31792220284975653, - "grad_norm": 1.8494703488330837, - "learning_rate": 3.191770072185638e-06, - "loss": 0.9934, - "step": 2644 - }, - { - "epoch": 0.3180424457403956, - "grad_norm": 2.1910838605674834, - "learning_rate": 3.191144413239916e-06, - "loss": 0.9626, - "step": 2645 - }, - { - "epoch": 0.3181626886310347, - "grad_norm": 1.8420177663014652, - "learning_rate": 3.190518573598534e-06, - "loss": 1.1039, - "step": 2646 - }, - { - "epoch": 0.3182829315216738, - "grad_norm": 1.5848332210846554, - "learning_rate": 3.1898925533564308e-06, - "loss": 1.006, - "step": 2647 - }, - { - "epoch": 0.31840317441231286, - "grad_norm": 2.58055388737207, - "learning_rate": 3.1892663526085733e-06, - "loss": 0.8729, - "step": 2648 - }, - { - "epoch": 0.31852341730295197, - "grad_norm": 0.7659491806353598, - "learning_rate": 3.188639971449956e-06, - "loss": 0.8317, - "step": 2649 - }, - { - "epoch": 0.318643660193591, - "grad_norm": 1.7594499209562708, - "learning_rate": 3.1880134099756e-06, - "loss": 0.954, - "step": 2650 - }, - { - "epoch": 0.31876390308423014, - "grad_norm": 1.7384233395227588, - "learning_rate": 3.1873866682805535e-06, - "loss": 0.9274, - "step": 2651 - }, - { - "epoch": 0.31888414597486925, - "grad_norm": 1.718081881973229, - "learning_rate": 3.186759746459894e-06, - "loss": 1.1176, - "step": 2652 - }, - { - "epoch": 0.3190043888655083, - "grad_norm": 2.552633690690871, - "learning_rate": 3.1861326446087246e-06, - "loss": 1.0241, - "step": 2653 - }, - { - "epoch": 0.3191246317561474, - "grad_norm": 1.757738985007872, - "learning_rate": 3.1855053628221763e-06, - "loss": 0.9443, - "step": 2654 - }, - { - "epoch": 0.3192448746467865, - "grad_norm": 2.525592840113141, - "learning_rate": 3.184877901195407e-06, - "loss": 1.1348, - "step": 2655 - }, - { - "epoch": 0.3193651175374256, - "grad_norm": 0.844404884786495, - "learning_rate": 3.184250259823602e-06, - "loss": 0.8988, - "step": 2656 - }, - { - "epoch": 0.3194853604280647, - "grad_norm": 2.8793807447445303, - "learning_rate": 3.183622438801974e-06, - "loss": 1.0413, - "step": 2657 - }, - { - "epoch": 0.3196056033187038, - "grad_norm": 3.0111069113438944, - "learning_rate": 3.1829944382257637e-06, - "loss": 0.9914, - "step": 2658 - }, - { - "epoch": 0.31972584620934286, - "grad_norm": 2.2592289583273804, - "learning_rate": 3.1823662581902373e-06, - "loss": 1.0528, - "step": 2659 - }, - { - "epoch": 0.31984608909998197, - "grad_norm": 2.09344571308006, - "learning_rate": 3.1817378987906896e-06, - "loss": 0.975, - "step": 2660 - }, - { - "epoch": 0.3199663319906211, - "grad_norm": 1.9888983407793457, - "learning_rate": 3.181109360122442e-06, - "loss": 1.0284, - "step": 2661 - }, - { - "epoch": 0.32008657488126013, - "grad_norm": 2.242529866201488, - "learning_rate": 3.1804806422808445e-06, - "loss": 1.0157, - "step": 2662 - }, - { - "epoch": 0.32020681777189924, - "grad_norm": 1.991094491349739, - "learning_rate": 3.1798517453612714e-06, - "loss": 0.9562, - "step": 2663 - }, - { - "epoch": 0.32032706066253835, - "grad_norm": 1.6798715180131665, - "learning_rate": 3.1792226694591265e-06, - "loss": 0.9842, - "step": 2664 - }, - { - "epoch": 0.3204473035531774, - "grad_norm": 1.9252634338853365, - "learning_rate": 3.178593414669841e-06, - "loss": 1.0346, - "step": 2665 - }, - { - "epoch": 0.3205675464438165, - "grad_norm": 2.043888271682276, - "learning_rate": 3.1779639810888707e-06, - "loss": 0.9343, - "step": 2666 - }, - { - "epoch": 0.3206877893344556, - "grad_norm": 1.6278672964386418, - "learning_rate": 3.1773343688117013e-06, - "loss": 0.994, - "step": 2667 - }, - { - "epoch": 0.3208080322250947, - "grad_norm": 2.0595642929025355, - "learning_rate": 3.1767045779338445e-06, - "loss": 1.0703, - "step": 2668 - }, - { - "epoch": 0.3209282751157338, - "grad_norm": 2.181774821717554, - "learning_rate": 3.176074608550839e-06, - "loss": 1.1439, - "step": 2669 - }, - { - "epoch": 0.32104851800637285, - "grad_norm": 2.171750620841342, - "learning_rate": 3.17544446075825e-06, - "loss": 1.049, - "step": 2670 - }, - { - "epoch": 0.32116876089701196, - "grad_norm": 1.4770203541545777, - "learning_rate": 3.174814134651671e-06, - "loss": 0.9413, - "step": 2671 - }, - { - "epoch": 0.3212890037876511, - "grad_norm": 1.6867961828666702, - "learning_rate": 3.1741836303267215e-06, - "loss": 1.0414, - "step": 2672 - }, - { - "epoch": 0.32140924667829013, - "grad_norm": 1.8819117627311033, - "learning_rate": 3.1735529478790496e-06, - "loss": 0.9848, - "step": 2673 - }, - { - "epoch": 0.32152948956892924, - "grad_norm": 1.86796764212607, - "learning_rate": 3.172922087404328e-06, - "loss": 1.0264, - "step": 2674 - }, - { - "epoch": 0.32164973245956835, - "grad_norm": 1.0638159848646878, - "learning_rate": 3.1722910489982586e-06, - "loss": 0.8158, - "step": 2675 - }, - { - "epoch": 0.3217699753502074, - "grad_norm": 1.3950886186870615, - "learning_rate": 3.1716598327565694e-06, - "loss": 1.0328, - "step": 2676 - }, - { - "epoch": 0.3218902182408465, - "grad_norm": 1.3778430416730627, - "learning_rate": 3.171028438775015e-06, - "loss": 1.0713, - "step": 2677 - }, - { - "epoch": 0.3220104611314856, - "grad_norm": 1.8369348292619327, - "learning_rate": 3.170396867149377e-06, - "loss": 1.0731, - "step": 2678 - }, - { - "epoch": 0.3221307040221247, - "grad_norm": 1.7671771358654902, - "learning_rate": 3.1697651179754653e-06, - "loss": 1.0866, - "step": 2679 - }, - { - "epoch": 0.3222509469127638, - "grad_norm": 1.690669086325706, - "learning_rate": 3.1691331913491153e-06, - "loss": 0.9666, - "step": 2680 - }, - { - "epoch": 0.32237118980340285, - "grad_norm": 4.245078434842959, - "learning_rate": 3.1685010873661898e-06, - "loss": 1.0713, - "step": 2681 - }, - { - "epoch": 0.32249143269404196, - "grad_norm": 2.5547784911583817, - "learning_rate": 3.167868806122578e-06, - "loss": 1.028, - "step": 2682 - }, - { - "epoch": 0.32261167558468107, - "grad_norm": 2.0612326687557845, - "learning_rate": 3.1672363477141968e-06, - "loss": 0.8936, - "step": 2683 - }, - { - "epoch": 0.3227319184753201, - "grad_norm": 2.002852313306205, - "learning_rate": 3.1666037122369903e-06, - "loss": 1.0853, - "step": 2684 - }, - { - "epoch": 0.32285216136595923, - "grad_norm": 2.1325464327230312, - "learning_rate": 3.165970899786928e-06, - "loss": 1.0879, - "step": 2685 - }, - { - "epoch": 0.32297240425659834, - "grad_norm": 1.5756479595589106, - "learning_rate": 3.1653379104600067e-06, - "loss": 0.9712, - "step": 2686 - }, - { - "epoch": 0.3230926471472374, - "grad_norm": 1.4299208653835203, - "learning_rate": 3.164704744352251e-06, - "loss": 0.9296, - "step": 2687 - }, - { - "epoch": 0.3232128900378765, - "grad_norm": 1.9331399404328893, - "learning_rate": 3.164071401559713e-06, - "loss": 1.04, - "step": 2688 - }, - { - "epoch": 0.3233331329285156, - "grad_norm": 1.5318691792371675, - "learning_rate": 3.1634378821784674e-06, - "loss": 0.9405, - "step": 2689 - }, - { - "epoch": 0.3234533758191547, - "grad_norm": 3.3354999335667403, - "learning_rate": 3.1628041863046208e-06, - "loss": 0.9751, - "step": 2690 - }, - { - "epoch": 0.3235736187097938, - "grad_norm": 6.016714339274359, - "learning_rate": 3.162170314034304e-06, - "loss": 1.1369, - "step": 2691 - }, - { - "epoch": 0.3236938616004329, - "grad_norm": 1.5899875585201222, - "learning_rate": 3.1615362654636738e-06, - "loss": 1.0334, - "step": 2692 - }, - { - "epoch": 0.32381410449107195, - "grad_norm": 1.6259862401175964, - "learning_rate": 3.1609020406889163e-06, - "loss": 1.1047, - "step": 2693 - }, - { - "epoch": 0.32393434738171106, - "grad_norm": 1.7188718579182922, - "learning_rate": 3.1602676398062416e-06, - "loss": 1.0744, - "step": 2694 - }, - { - "epoch": 0.3240545902723502, - "grad_norm": 2.466928413611161, - "learning_rate": 3.1596330629118886e-06, - "loss": 0.8477, - "step": 2695 - }, - { - "epoch": 0.32417483316298923, - "grad_norm": 2.3346917648593046, - "learning_rate": 3.1589983101021223e-06, - "loss": 0.9616, - "step": 2696 - }, - { - "epoch": 0.32429507605362834, - "grad_norm": 2.15297339449619, - "learning_rate": 3.1583633814732337e-06, - "loss": 1.0776, - "step": 2697 - }, - { - "epoch": 0.3244153189442674, - "grad_norm": 2.663265522971679, - "learning_rate": 3.157728277121541e-06, - "loss": 0.9442, - "step": 2698 - }, - { - "epoch": 0.3245355618349065, - "grad_norm": 2.533404340083518, - "learning_rate": 3.1570929971433897e-06, - "loss": 1.0133, - "step": 2699 - }, - { - "epoch": 0.3246558047255456, - "grad_norm": 2.1881327122778744, - "learning_rate": 3.1564575416351504e-06, - "loss": 1.0586, - "step": 2700 - }, - { - "epoch": 0.32477604761618467, - "grad_norm": 1.6832328816773872, - "learning_rate": 3.155821910693221e-06, - "loss": 0.9732, - "step": 2701 - }, - { - "epoch": 0.3248962905068238, - "grad_norm": 1.610758066775584, - "learning_rate": 3.1551861044140275e-06, - "loss": 1.0869, - "step": 2702 - }, - { - "epoch": 0.3250165333974629, - "grad_norm": 1.842558718383631, - "learning_rate": 3.15455012289402e-06, - "loss": 1.0001, - "step": 2703 - }, - { - "epoch": 0.32513677628810195, - "grad_norm": 1.5575346826209484, - "learning_rate": 3.153913966229677e-06, - "loss": 1.0658, - "step": 2704 - }, - { - "epoch": 0.32525701917874106, - "grad_norm": 0.6625033981308068, - "learning_rate": 3.1532776345175027e-06, - "loss": 0.7473, - "step": 2705 - }, - { - "epoch": 0.32537726206938017, - "grad_norm": 1.9199853306448353, - "learning_rate": 3.1526411278540285e-06, - "loss": 1.0137, - "step": 2706 - }, - { - "epoch": 0.3254975049600192, - "grad_norm": 1.9914052586872901, - "learning_rate": 3.1520044463358116e-06, - "loss": 1.046, - "step": 2707 - }, - { - "epoch": 0.32561774785065833, - "grad_norm": 1.9799947980369281, - "learning_rate": 3.151367590059436e-06, - "loss": 1.0296, - "step": 2708 - }, - { - "epoch": 0.32573799074129745, - "grad_norm": 1.8018393231901224, - "learning_rate": 3.1507305591215117e-06, - "loss": 1.094, - "step": 2709 - }, - { - "epoch": 0.3258582336319365, - "grad_norm": 0.9078415673964924, - "learning_rate": 3.150093353618677e-06, - "loss": 0.8147, - "step": 2710 - }, - { - "epoch": 0.3259784765225756, - "grad_norm": 2.775050164533955, - "learning_rate": 3.149455973647596e-06, - "loss": 1.1072, - "step": 2711 - }, - { - "epoch": 0.32609871941321467, - "grad_norm": 6.475383803138562, - "learning_rate": 3.1488184193049563e-06, - "loss": 0.9945, - "step": 2712 - }, - { - "epoch": 0.3262189623038538, - "grad_norm": 1.6877207435057768, - "learning_rate": 3.1481806906874767e-06, - "loss": 0.9591, - "step": 2713 - }, - { - "epoch": 0.3263392051944929, - "grad_norm": 1.8698365640872747, - "learning_rate": 3.147542787891899e-06, - "loss": 1.1062, - "step": 2714 - }, - { - "epoch": 0.32645944808513194, - "grad_norm": 2.344702971950256, - "learning_rate": 3.1469047110149926e-06, - "loss": 0.9752, - "step": 2715 - }, - { - "epoch": 0.32657969097577105, - "grad_norm": 1.77137858654292, - "learning_rate": 3.146266460153554e-06, - "loss": 1.0788, - "step": 2716 - }, - { - "epoch": 0.32669993386641016, - "grad_norm": 1.5930735330510528, - "learning_rate": 3.145628035404404e-06, - "loss": 1.0298, - "step": 2717 - }, - { - "epoch": 0.3268201767570492, - "grad_norm": 0.8727707052689598, - "learning_rate": 3.1449894368643922e-06, - "loss": 0.8336, - "step": 2718 - }, - { - "epoch": 0.32694041964768833, - "grad_norm": 1.4555560050523628, - "learning_rate": 3.1443506646303934e-06, - "loss": 0.9503, - "step": 2719 - }, - { - "epoch": 0.32706066253832744, - "grad_norm": 3.5096525875648132, - "learning_rate": 3.1437117187993086e-06, - "loss": 0.8981, - "step": 2720 - }, - { - "epoch": 0.3271809054289665, - "grad_norm": 1.5718899674963802, - "learning_rate": 3.143072599468065e-06, - "loss": 1.0291, - "step": 2721 - }, - { - "epoch": 0.3273011483196056, - "grad_norm": 1.5284791126564825, - "learning_rate": 3.1424333067336174e-06, - "loss": 0.9821, - "step": 2722 - }, - { - "epoch": 0.3274213912102447, - "grad_norm": 2.0282369714988757, - "learning_rate": 3.141793840692945e-06, - "loss": 0.9998, - "step": 2723 - }, - { - "epoch": 0.32754163410088377, - "grad_norm": 2.239601265295822, - "learning_rate": 3.1411542014430553e-06, - "loss": 0.8401, - "step": 2724 - }, - { - "epoch": 0.3276618769915229, - "grad_norm": 1.5268386825258506, - "learning_rate": 3.1405143890809804e-06, - "loss": 1.0485, - "step": 2725 - }, - { - "epoch": 0.327782119882162, - "grad_norm": 1.6775602843229933, - "learning_rate": 3.1398744037037796e-06, - "loss": 0.9331, - "step": 2726 - }, - { - "epoch": 0.32790236277280105, - "grad_norm": 1.7649540259233516, - "learning_rate": 3.139234245408538e-06, - "loss": 1.0702, - "step": 2727 - }, - { - "epoch": 0.32802260566344016, - "grad_norm": 1.5810421712850014, - "learning_rate": 3.1385939142923666e-06, - "loss": 0.9959, - "step": 2728 - }, - { - "epoch": 0.3281428485540792, - "grad_norm": 2.3164301287238875, - "learning_rate": 3.137953410452405e-06, - "loss": 1.0081, - "step": 2729 - }, - { - "epoch": 0.3282630914447183, - "grad_norm": 1.5359893169180576, - "learning_rate": 3.1373127339858146e-06, - "loss": 0.9823, - "step": 2730 - }, - { - "epoch": 0.32838333433535744, - "grad_norm": 1.6578104118024815, - "learning_rate": 3.136671884989787e-06, - "loss": 0.9719, - "step": 2731 - }, - { - "epoch": 0.3285035772259965, - "grad_norm": 2.213508001333303, - "learning_rate": 3.1360308635615383e-06, - "loss": 1.1042, - "step": 2732 - }, - { - "epoch": 0.3286238201166356, - "grad_norm": 1.72002779538961, - "learning_rate": 3.135389669798311e-06, - "loss": 1.0206, - "step": 2733 - }, - { - "epoch": 0.3287440630072747, - "grad_norm": 2.156758566327658, - "learning_rate": 3.134748303797373e-06, - "loss": 1.0309, - "step": 2734 - }, - { - "epoch": 0.32886430589791377, - "grad_norm": 2.343079738144171, - "learning_rate": 3.1341067656560203e-06, - "loss": 1.0402, - "step": 2735 - }, - { - "epoch": 0.3289845487885529, - "grad_norm": 2.1208499325725336, - "learning_rate": 3.133465055471572e-06, - "loss": 1.093, - "step": 2736 - }, - { - "epoch": 0.329104791679192, - "grad_norm": 2.2810508954676183, - "learning_rate": 3.1328231733413767e-06, - "loss": 0.8876, - "step": 2737 - }, - { - "epoch": 0.32922503456983104, - "grad_norm": 2.358856548415655, - "learning_rate": 3.1321811193628067e-06, - "loss": 1.135, - "step": 2738 - }, - { - "epoch": 0.32934527746047015, - "grad_norm": 3.511989222412256, - "learning_rate": 3.131538893633261e-06, - "loss": 0.9383, - "step": 2739 - }, - { - "epoch": 0.32946552035110926, - "grad_norm": 5.112818872631889, - "learning_rate": 3.130896496250165e-06, - "loss": 1.0136, - "step": 2740 - }, - { - "epoch": 0.3295857632417483, - "grad_norm": 2.3403837316458467, - "learning_rate": 3.1302539273109693e-06, - "loss": 1.0968, - "step": 2741 - }, - { - "epoch": 0.32970600613238743, - "grad_norm": 1.4983968910227803, - "learning_rate": 3.1296111869131513e-06, - "loss": 1.0339, - "step": 2742 - }, - { - "epoch": 0.32982624902302654, - "grad_norm": 1.6933873932136358, - "learning_rate": 3.1289682751542153e-06, - "loss": 1.078, - "step": 2743 - }, - { - "epoch": 0.3299464919136656, - "grad_norm": 3.6760353705911757, - "learning_rate": 3.1283251921316883e-06, - "loss": 0.9477, - "step": 2744 - }, - { - "epoch": 0.3300667348043047, - "grad_norm": 2.0958439618770814, - "learning_rate": 3.1276819379431277e-06, - "loss": 1.0494, - "step": 2745 - }, - { - "epoch": 0.33018697769494376, - "grad_norm": 1.8762443863634155, - "learning_rate": 3.1270385126861134e-06, - "loss": 0.9823, - "step": 2746 - }, - { - "epoch": 0.3303072205855829, - "grad_norm": 2.147639233801723, - "learning_rate": 3.1263949164582533e-06, - "loss": 1.0533, - "step": 2747 - }, - { - "epoch": 0.330427463476222, - "grad_norm": 1.980662439870588, - "learning_rate": 3.1257511493571797e-06, - "loss": 1.006, - "step": 2748 - }, - { - "epoch": 0.33054770636686104, - "grad_norm": 1.6968596118770207, - "learning_rate": 3.125107211480552e-06, - "loss": 1.0099, - "step": 2749 - }, - { - "epoch": 0.33066794925750015, - "grad_norm": 1.5674706550297113, - "learning_rate": 3.124463102926054e-06, - "loss": 1.0223, - "step": 2750 - }, - { - "epoch": 0.33078819214813926, - "grad_norm": 0.7596993699395933, - "learning_rate": 3.1238188237913984e-06, - "loss": 0.8593, - "step": 2751 - }, - { - "epoch": 0.3309084350387783, - "grad_norm": 2.4741399330278644, - "learning_rate": 3.1231743741743202e-06, - "loss": 0.9926, - "step": 2752 - }, - { - "epoch": 0.3310286779294174, - "grad_norm": 1.9393872944242074, - "learning_rate": 3.122529754172582e-06, - "loss": 1.066, - "step": 2753 - }, - { - "epoch": 0.33114892082005654, - "grad_norm": 1.8021110870634993, - "learning_rate": 3.1218849638839736e-06, - "loss": 0.9545, - "step": 2754 - }, - { - "epoch": 0.3312691637106956, - "grad_norm": 2.2452046898726783, - "learning_rate": 3.121240003406307e-06, - "loss": 1.0081, - "step": 2755 - }, - { - "epoch": 0.3313894066013347, - "grad_norm": 1.7493258267572083, - "learning_rate": 3.120594872837425e-06, - "loss": 0.9452, - "step": 2756 - }, - { - "epoch": 0.3315096494919738, - "grad_norm": 0.8251122066024381, - "learning_rate": 3.1199495722751906e-06, - "loss": 0.8784, - "step": 2757 - }, - { - "epoch": 0.33162989238261287, - "grad_norm": 2.8786496755897457, - "learning_rate": 3.1193041018174972e-06, - "loss": 1.0709, - "step": 2758 - }, - { - "epoch": 0.331750135273252, - "grad_norm": 2.056734639017803, - "learning_rate": 3.118658461562261e-06, - "loss": 1.1814, - "step": 2759 - }, - { - "epoch": 0.33187037816389103, - "grad_norm": 2.1395616326598192, - "learning_rate": 3.118012651607426e-06, - "loss": 1.0774, - "step": 2760 - }, - { - "epoch": 0.33199062105453014, - "grad_norm": 2.0815827707936667, - "learning_rate": 3.1173666720509603e-06, - "loss": 1.0592, - "step": 2761 - }, - { - "epoch": 0.33211086394516925, - "grad_norm": 1.8316062971214717, - "learning_rate": 3.116720522990859e-06, - "loss": 0.9122, - "step": 2762 - }, - { - "epoch": 0.3322311068358083, - "grad_norm": 1.7684235126584613, - "learning_rate": 3.116074204525142e-06, - "loss": 0.8511, - "step": 2763 - }, - { - "epoch": 0.3323513497264474, - "grad_norm": 1.5122945314933667, - "learning_rate": 3.1154277167518553e-06, - "loss": 1.0587, - "step": 2764 - }, - { - "epoch": 0.33247159261708653, - "grad_norm": 0.8441375619390538, - "learning_rate": 3.114781059769072e-06, - "loss": 0.8556, - "step": 2765 - }, - { - "epoch": 0.3325918355077256, - "grad_norm": 2.4270870115937178, - "learning_rate": 3.1141342336748874e-06, - "loss": 0.9139, - "step": 2766 - }, - { - "epoch": 0.3327120783983647, - "grad_norm": 1.6608129302113182, - "learning_rate": 3.1134872385674253e-06, - "loss": 1.0442, - "step": 2767 - }, - { - "epoch": 0.3328323212890038, - "grad_norm": 2.429410176534043, - "learning_rate": 3.1128400745448353e-06, - "loss": 1.093, - "step": 2768 - }, - { - "epoch": 0.33295256417964286, - "grad_norm": 2.4134942530975927, - "learning_rate": 3.11219274170529e-06, - "loss": 0.8579, - "step": 2769 - }, - { - "epoch": 0.333072807070282, - "grad_norm": 1.8201775889685328, - "learning_rate": 3.1115452401469903e-06, - "loss": 1.0414, - "step": 2770 - }, - { - "epoch": 0.3331930499609211, - "grad_norm": 1.8834846531440468, - "learning_rate": 3.1108975699681613e-06, - "loss": 1.0963, - "step": 2771 - }, - { - "epoch": 0.33331329285156014, - "grad_norm": 2.686388451233215, - "learning_rate": 3.1102497312670542e-06, - "loss": 0.9456, - "step": 2772 - }, - { - "epoch": 0.33343353574219925, - "grad_norm": 1.6833160950309973, - "learning_rate": 3.109601724141946e-06, - "loss": 1.0326, - "step": 2773 - }, - { - "epoch": 0.33355377863283836, - "grad_norm": 1.715214508921549, - "learning_rate": 3.108953548691138e-06, - "loss": 0.9148, - "step": 2774 - }, - { - "epoch": 0.3336740215234774, - "grad_norm": 2.049571001708929, - "learning_rate": 3.108305205012959e-06, - "loss": 0.9534, - "step": 2775 - }, - { - "epoch": 0.3337942644141165, - "grad_norm": 2.4376542458270176, - "learning_rate": 3.107656693205761e-06, - "loss": 1.1059, - "step": 2776 - }, - { - "epoch": 0.3339145073047556, - "grad_norm": 4.390042195041315, - "learning_rate": 3.107008013367924e-06, - "loss": 0.9276, - "step": 2777 - }, - { - "epoch": 0.3340347501953947, - "grad_norm": 3.03713496648181, - "learning_rate": 3.1063591655978507e-06, - "loss": 1.0964, - "step": 2778 - }, - { - "epoch": 0.3341549930860338, - "grad_norm": 1.650923025720166, - "learning_rate": 3.105710149993972e-06, - "loss": 1.0195, - "step": 2779 - }, - { - "epoch": 0.33427523597667286, - "grad_norm": 1.8360947561890246, - "learning_rate": 3.1050609666547427e-06, - "loss": 1.0838, - "step": 2780 - }, - { - "epoch": 0.33439547886731197, - "grad_norm": 3.0707080382446224, - "learning_rate": 3.104411615678644e-06, - "loss": 1.0027, - "step": 2781 - }, - { - "epoch": 0.3345157217579511, - "grad_norm": 2.332819359801849, - "learning_rate": 3.1037620971641803e-06, - "loss": 0.9668, - "step": 2782 - }, - { - "epoch": 0.33463596464859013, - "grad_norm": 2.9451245316242405, - "learning_rate": 3.1031124112098844e-06, - "loss": 0.8755, - "step": 2783 - }, - { - "epoch": 0.33475620753922924, - "grad_norm": 1.9753438336422342, - "learning_rate": 3.1024625579143127e-06, - "loss": 0.9517, - "step": 2784 - }, - { - "epoch": 0.33487645042986836, - "grad_norm": 1.8333311369911678, - "learning_rate": 3.101812537376048e-06, - "loss": 0.9568, - "step": 2785 - }, - { - "epoch": 0.3349966933205074, - "grad_norm": 1.8194059152705546, - "learning_rate": 3.1011623496936973e-06, - "loss": 1.072, - "step": 2786 - }, - { - "epoch": 0.3351169362111465, - "grad_norm": 1.9438560579564412, - "learning_rate": 3.100511994965893e-06, - "loss": 0.9242, - "step": 2787 - }, - { - "epoch": 0.33523717910178563, - "grad_norm": 3.9935009133951738, - "learning_rate": 3.0998614732912947e-06, - "loss": 1.089, - "step": 2788 - }, - { - "epoch": 0.3353574219924247, - "grad_norm": 1.9888496710582737, - "learning_rate": 3.0992107847685855e-06, - "loss": 0.9096, - "step": 2789 - }, - { - "epoch": 0.3354776648830638, - "grad_norm": 1.6454503623826844, - "learning_rate": 3.0985599294964736e-06, - "loss": 1.0278, - "step": 2790 - }, - { - "epoch": 0.33559790777370285, - "grad_norm": 2.5644219796816334, - "learning_rate": 3.097908907573695e-06, - "loss": 0.9378, - "step": 2791 - }, - { - "epoch": 0.33571815066434196, - "grad_norm": 2.1091039412792414, - "learning_rate": 3.0972577190990067e-06, - "loss": 1.1251, - "step": 2792 - }, - { - "epoch": 0.3358383935549811, - "grad_norm": 1.8165944647950256, - "learning_rate": 3.096606364171196e-06, - "loss": 1.0328, - "step": 2793 - }, - { - "epoch": 0.33595863644562013, - "grad_norm": 1.9004839632140003, - "learning_rate": 3.0959548428890703e-06, - "loss": 1.0777, - "step": 2794 - }, - { - "epoch": 0.33607887933625924, - "grad_norm": 1.7987985018300847, - "learning_rate": 3.095303155351468e-06, - "loss": 1.0615, - "step": 2795 - }, - { - "epoch": 0.33619912222689835, - "grad_norm": 2.187744453940142, - "learning_rate": 3.0946513016572464e-06, - "loss": 1.0174, - "step": 2796 - }, - { - "epoch": 0.3363193651175374, - "grad_norm": 1.726647491434521, - "learning_rate": 3.0939992819052938e-06, - "loss": 0.9937, - "step": 2797 - }, - { - "epoch": 0.3364396080081765, - "grad_norm": 2.2292971231190726, - "learning_rate": 3.0933470961945193e-06, - "loss": 1.0428, - "step": 2798 - }, - { - "epoch": 0.3365598508988156, - "grad_norm": 2.29212533233319, - "learning_rate": 3.0926947446238597e-06, - "loss": 0.9237, - "step": 2799 - }, - { - "epoch": 0.3366800937894547, - "grad_norm": 1.8810285925874684, - "learning_rate": 3.092042227292276e-06, - "loss": 1.0521, - "step": 2800 - }, - { - "epoch": 0.3368003366800938, - "grad_norm": 1.5105168890313507, - "learning_rate": 3.0913895442987557e-06, - "loss": 1.1109, - "step": 2801 - }, - { - "epoch": 0.3369205795707329, - "grad_norm": 1.770879168945713, - "learning_rate": 3.090736695742308e-06, - "loss": 1.0804, - "step": 2802 - }, - { - "epoch": 0.33704082246137196, - "grad_norm": 3.866218214228123, - "learning_rate": 3.0900836817219713e-06, - "loss": 0.7487, - "step": 2803 - }, - { - "epoch": 0.33716106535201107, - "grad_norm": 1.816302093472897, - "learning_rate": 3.089430502336807e-06, - "loss": 1.0718, - "step": 2804 - }, - { - "epoch": 0.3372813082426502, - "grad_norm": 2.7695859142337547, - "learning_rate": 3.088777157685902e-06, - "loss": 1.13, - "step": 2805 - }, - { - "epoch": 0.33740155113328923, - "grad_norm": 1.8523636384473077, - "learning_rate": 3.088123647868367e-06, - "loss": 1.0881, - "step": 2806 - }, - { - "epoch": 0.33752179402392835, - "grad_norm": 2.0619078277351983, - "learning_rate": 3.0874699729833405e-06, - "loss": 1.0467, - "step": 2807 - }, - { - "epoch": 0.3376420369145674, - "grad_norm": 1.5580547423729094, - "learning_rate": 3.086816133129983e-06, - "loss": 1.0277, - "step": 2808 - }, - { - "epoch": 0.3377622798052065, - "grad_norm": 1.9327827987917572, - "learning_rate": 3.0861621284074826e-06, - "loss": 0.9961, - "step": 2809 - }, - { - "epoch": 0.3378825226958456, - "grad_norm": 2.0952422105348307, - "learning_rate": 3.085507958915051e-06, - "loss": 0.9628, - "step": 2810 - }, - { - "epoch": 0.3380027655864847, - "grad_norm": 1.6880312895654517, - "learning_rate": 3.084853624751925e-06, - "loss": 0.9414, - "step": 2811 - }, - { - "epoch": 0.3381230084771238, - "grad_norm": 1.9149914414137381, - "learning_rate": 3.0841991260173668e-06, - "loss": 1.0821, - "step": 2812 - }, - { - "epoch": 0.3382432513677629, - "grad_norm": 1.6084566922225836, - "learning_rate": 3.0835444628106634e-06, - "loss": 1.0242, - "step": 2813 - }, - { - "epoch": 0.33836349425840195, - "grad_norm": 4.501910757663984, - "learning_rate": 3.082889635231126e-06, - "loss": 1.0596, - "step": 2814 - }, - { - "epoch": 0.33848373714904106, - "grad_norm": 2.560657676318826, - "learning_rate": 3.0822346433780925e-06, - "loss": 0.9943, - "step": 2815 - }, - { - "epoch": 0.3386039800396802, - "grad_norm": 2.107611477076756, - "learning_rate": 3.0815794873509237e-06, - "loss": 1.0937, - "step": 2816 - }, - { - "epoch": 0.33872422293031923, - "grad_norm": 1.9467959588478585, - "learning_rate": 3.0809241672490066e-06, - "loss": 0.9581, - "step": 2817 - }, - { - "epoch": 0.33884446582095834, - "grad_norm": 1.5634501810606078, - "learning_rate": 3.080268683171753e-06, - "loss": 1.0737, - "step": 2818 - }, - { - "epoch": 0.33896470871159745, - "grad_norm": 1.9356836603604552, - "learning_rate": 3.0796130352185985e-06, - "loss": 1.1105, - "step": 2819 - }, - { - "epoch": 0.3390849516022365, - "grad_norm": 1.7443362272649099, - "learning_rate": 3.0789572234890057e-06, - "loss": 0.9023, - "step": 2820 - }, - { - "epoch": 0.3392051944928756, - "grad_norm": 1.9456738975058978, - "learning_rate": 3.0783012480824596e-06, - "loss": 1.0128, - "step": 2821 - }, - { - "epoch": 0.33932543738351467, - "grad_norm": 1.9899492443263767, - "learning_rate": 3.077645109098471e-06, - "loss": 0.971, - "step": 2822 - }, - { - "epoch": 0.3394456802741538, - "grad_norm": 1.8873398037059226, - "learning_rate": 3.076988806636577e-06, - "loss": 0.9445, - "step": 2823 - }, - { - "epoch": 0.3395659231647929, - "grad_norm": 2.238434738036053, - "learning_rate": 3.0763323407963377e-06, - "loss": 1.1169, - "step": 2824 - }, - { - "epoch": 0.33968616605543195, - "grad_norm": 1.7575005148483978, - "learning_rate": 3.075675711677337e-06, - "loss": 1.0341, - "step": 2825 - }, - { - "epoch": 0.33980640894607106, - "grad_norm": 4.286383231498951, - "learning_rate": 3.0750189193791865e-06, - "loss": 1.0045, - "step": 2826 - }, - { - "epoch": 0.33992665183671017, - "grad_norm": 2.1741157882934807, - "learning_rate": 3.0743619640015203e-06, - "loss": 0.9371, - "step": 2827 - }, - { - "epoch": 0.3400468947273492, - "grad_norm": 2.0046524532403294, - "learning_rate": 3.073704845643999e-06, - "loss": 1.1514, - "step": 2828 - }, - { - "epoch": 0.34016713761798834, - "grad_norm": 2.8300413941500113, - "learning_rate": 3.0730475644063063e-06, - "loss": 1.0155, - "step": 2829 - }, - { - "epoch": 0.34028738050862745, - "grad_norm": 1.6113118488761302, - "learning_rate": 3.072390120388151e-06, - "loss": 0.8824, - "step": 2830 - }, - { - "epoch": 0.3404076233992665, - "grad_norm": 2.8912768479554907, - "learning_rate": 3.071732513689267e-06, - "loss": 0.9374, - "step": 2831 - }, - { - "epoch": 0.3405278662899056, - "grad_norm": 2.958656579925181, - "learning_rate": 3.0710747444094134e-06, - "loss": 0.9033, - "step": 2832 - }, - { - "epoch": 0.3406481091805447, - "grad_norm": 2.09627676558311, - "learning_rate": 3.070416812648372e-06, - "loss": 0.8822, - "step": 2833 - }, - { - "epoch": 0.3407683520711838, - "grad_norm": 3.3122472126790883, - "learning_rate": 3.069758718505951e-06, - "loss": 0.8931, - "step": 2834 - }, - { - "epoch": 0.3408885949618229, - "grad_norm": 1.6718399899818945, - "learning_rate": 3.0691004620819836e-06, - "loss": 1.037, - "step": 2835 - }, - { - "epoch": 0.341008837852462, - "grad_norm": 0.8075264045957028, - "learning_rate": 3.0684420434763254e-06, - "loss": 0.8678, - "step": 2836 - }, - { - "epoch": 0.34112908074310105, - "grad_norm": 1.975276483425579, - "learning_rate": 3.06778346278886e-06, - "loss": 0.9963, - "step": 2837 - }, - { - "epoch": 0.34124932363374016, - "grad_norm": 1.6085651915777053, - "learning_rate": 3.0671247201194906e-06, - "loss": 1.0117, - "step": 2838 - }, - { - "epoch": 0.3413695665243792, - "grad_norm": 2.0573457992449637, - "learning_rate": 3.066465815568151e-06, - "loss": 0.9819, - "step": 2839 - }, - { - "epoch": 0.34148980941501833, - "grad_norm": 1.5342761907208369, - "learning_rate": 3.0658067492347947e-06, - "loss": 0.9122, - "step": 2840 - }, - { - "epoch": 0.34161005230565744, - "grad_norm": 5.5925006856564075, - "learning_rate": 3.065147521219402e-06, - "loss": 0.8998, - "step": 2841 - }, - { - "epoch": 0.3417302951962965, - "grad_norm": 1.396684282839682, - "learning_rate": 3.064488131621977e-06, - "loss": 0.9779, - "step": 2842 - }, - { - "epoch": 0.3418505380869356, - "grad_norm": 9.438577956727107, - "learning_rate": 3.063828580542549e-06, - "loss": 0.9685, - "step": 2843 - }, - { - "epoch": 0.3419707809775747, - "grad_norm": 1.6475625269692915, - "learning_rate": 3.0631688680811706e-06, - "loss": 0.9494, - "step": 2844 - }, - { - "epoch": 0.3420910238682138, - "grad_norm": 1.7954245642736746, - "learning_rate": 3.062508994337921e-06, - "loss": 0.9841, - "step": 2845 - }, - { - "epoch": 0.3422112667588529, - "grad_norm": 3.1983209079052477, - "learning_rate": 3.0618489594129013e-06, - "loss": 1.0125, - "step": 2846 - }, - { - "epoch": 0.342331509649492, - "grad_norm": 4.99456644462025, - "learning_rate": 3.061188763406239e-06, - "loss": 0.9376, - "step": 2847 - }, - { - "epoch": 0.34245175254013105, - "grad_norm": 2.602511854190561, - "learning_rate": 3.060528406418085e-06, - "loss": 1.0578, - "step": 2848 - }, - { - "epoch": 0.34257199543077016, - "grad_norm": 2.369166186164148, - "learning_rate": 3.0598678885486145e-06, - "loss": 0.8533, - "step": 2849 - }, - { - "epoch": 0.34269223832140927, - "grad_norm": 1.6436782763247837, - "learning_rate": 3.0592072098980282e-06, - "loss": 0.9726, - "step": 2850 - }, - { - "epoch": 0.3428124812120483, - "grad_norm": 3.265906800883281, - "learning_rate": 3.0585463705665514e-06, - "loss": 0.9561, - "step": 2851 - }, - { - "epoch": 0.34293272410268744, - "grad_norm": 2.3582601376374956, - "learning_rate": 3.0578853706544304e-06, - "loss": 0.9364, - "step": 2852 - }, - { - "epoch": 0.34305296699332655, - "grad_norm": 1.9293652470042204, - "learning_rate": 3.0572242102619404e-06, - "loss": 0.875, - "step": 2853 - }, - { - "epoch": 0.3431732098839656, - "grad_norm": 1.6723941460427376, - "learning_rate": 3.0565628894893784e-06, - "loss": 1.0316, - "step": 2854 - }, - { - "epoch": 0.3432934527746047, - "grad_norm": 1.7319395131347957, - "learning_rate": 3.0559014084370655e-06, - "loss": 0.9708, - "step": 2855 - }, - { - "epoch": 0.34341369566524377, - "grad_norm": 1.6547060553903474, - "learning_rate": 3.055239767205349e-06, - "loss": 1.0106, - "step": 2856 - }, - { - "epoch": 0.3435339385558829, - "grad_norm": 1.7004688093358395, - "learning_rate": 3.054577965894599e-06, - "loss": 1.0051, - "step": 2857 - }, - { - "epoch": 0.343654181446522, - "grad_norm": 1.8707138026170331, - "learning_rate": 3.0539160046052094e-06, - "loss": 0.9337, - "step": 2858 - }, - { - "epoch": 0.34377442433716104, - "grad_norm": 2.1713490020722253, - "learning_rate": 3.0532538834376003e-06, - "loss": 0.9322, - "step": 2859 - }, - { - "epoch": 0.34389466722780015, - "grad_norm": 1.8270566377969502, - "learning_rate": 3.0525916024922143e-06, - "loss": 1.0127, - "step": 2860 - }, - { - "epoch": 0.34401491011843927, - "grad_norm": 2.5563159850168535, - "learning_rate": 3.0519291618695193e-06, - "loss": 1.0638, - "step": 2861 - }, - { - "epoch": 0.3441351530090783, - "grad_norm": 1.7056513256530914, - "learning_rate": 3.0512665616700065e-06, - "loss": 0.9851, - "step": 2862 - }, - { - "epoch": 0.34425539589971743, - "grad_norm": 1.946462942147422, - "learning_rate": 3.0506038019941933e-06, - "loss": 1.1288, - "step": 2863 - }, - { - "epoch": 0.34437563879035654, - "grad_norm": 2.383711648361475, - "learning_rate": 3.049940882942617e-06, - "loss": 0.9103, - "step": 2864 - }, - { - "epoch": 0.3444958816809956, - "grad_norm": 2.0089097642775102, - "learning_rate": 3.0492778046158448e-06, - "loss": 1.0229, - "step": 2865 - }, - { - "epoch": 0.3446161245716347, - "grad_norm": 2.0555515575656442, - "learning_rate": 3.0486145671144633e-06, - "loss": 0.9915, - "step": 2866 - }, - { - "epoch": 0.3447363674622738, - "grad_norm": 3.2399831608935155, - "learning_rate": 3.047951170539086e-06, - "loss": 0.9942, - "step": 2867 - }, - { - "epoch": 0.3448566103529129, - "grad_norm": 2.107980224632434, - "learning_rate": 3.047287614990349e-06, - "loss": 1.0716, - "step": 2868 - }, - { - "epoch": 0.344976853243552, - "grad_norm": 2.2151613899144955, - "learning_rate": 3.046623900568914e-06, - "loss": 0.8463, - "step": 2869 - }, - { - "epoch": 0.34509709613419104, - "grad_norm": 5.034083924737818, - "learning_rate": 3.045960027375465e-06, - "loss": 0.9236, - "step": 2870 - }, - { - "epoch": 0.34521733902483015, - "grad_norm": 2.3436110391747804, - "learning_rate": 3.045295995510711e-06, - "loss": 1.043, - "step": 2871 - }, - { - "epoch": 0.34533758191546926, - "grad_norm": 1.8505245470316978, - "learning_rate": 3.0446318050753865e-06, - "loss": 0.9622, - "step": 2872 - }, - { - "epoch": 0.3454578248061083, - "grad_norm": 8.301390143108891, - "learning_rate": 3.0439674561702474e-06, - "loss": 1.0152, - "step": 2873 - }, - { - "epoch": 0.3455780676967474, - "grad_norm": 3.0585616342702466, - "learning_rate": 3.043302948896076e-06, - "loss": 1.1093, - "step": 2874 - }, - { - "epoch": 0.34569831058738654, - "grad_norm": 2.325774779485352, - "learning_rate": 3.0426382833536756e-06, - "loss": 0.8286, - "step": 2875 - }, - { - "epoch": 0.3458185534780256, - "grad_norm": 2.593099029844939, - "learning_rate": 3.041973459643877e-06, - "loss": 1.009, - "step": 2876 - }, - { - "epoch": 0.3459387963686647, - "grad_norm": 2.326731117343242, - "learning_rate": 3.0413084778675334e-06, - "loss": 0.902, - "step": 2877 - }, - { - "epoch": 0.3460590392593038, - "grad_norm": 1.7885340280727047, - "learning_rate": 3.0406433381255214e-06, - "loss": 1.0665, - "step": 2878 - }, - { - "epoch": 0.34617928214994287, - "grad_norm": 2.508774902493856, - "learning_rate": 3.0399780405187425e-06, - "loss": 1.051, - "step": 2879 - }, - { - "epoch": 0.346299525040582, - "grad_norm": 1.8710443095960287, - "learning_rate": 3.0393125851481216e-06, - "loss": 1.0097, - "step": 2880 - }, - { - "epoch": 0.3464197679312211, - "grad_norm": 2.243055966288316, - "learning_rate": 3.038646972114608e-06, - "loss": 1.0943, - "step": 2881 - }, - { - "epoch": 0.34654001082186014, - "grad_norm": 1.7138268446996858, - "learning_rate": 3.037981201519174e-06, - "loss": 0.9035, - "step": 2882 - }, - { - "epoch": 0.34666025371249926, - "grad_norm": 2.8601253338007457, - "learning_rate": 3.0373152734628175e-06, - "loss": 0.9362, - "step": 2883 - }, - { - "epoch": 0.34678049660313837, - "grad_norm": 3.092045323985969, - "learning_rate": 3.0366491880465584e-06, - "loss": 0.986, - "step": 2884 - }, - { - "epoch": 0.3469007394937774, - "grad_norm": 1.5442199269782095, - "learning_rate": 3.035982945371443e-06, - "loss": 1.0486, - "step": 2885 - }, - { - "epoch": 0.34702098238441653, - "grad_norm": 2.295375412298875, - "learning_rate": 3.035316545538537e-06, - "loss": 1.0805, - "step": 2886 - }, - { - "epoch": 0.3471412252750556, - "grad_norm": 4.4619411000972775, - "learning_rate": 3.034649988648935e-06, - "loss": 1.0215, - "step": 2887 - }, - { - "epoch": 0.3472614681656947, - "grad_norm": 1.6037707480131371, - "learning_rate": 3.033983274803752e-06, - "loss": 1.0434, - "step": 2888 - }, - { - "epoch": 0.3473817110563338, - "grad_norm": 3.43462570705435, - "learning_rate": 3.0333164041041283e-06, - "loss": 0.9433, - "step": 2889 - }, - { - "epoch": 0.34750195394697286, - "grad_norm": 1.9436479057165894, - "learning_rate": 3.032649376651228e-06, - "loss": 0.9568, - "step": 2890 - }, - { - "epoch": 0.347622196837612, - "grad_norm": 2.256513069857127, - "learning_rate": 3.031982192546238e-06, - "loss": 0.9882, - "step": 2891 - }, - { - "epoch": 0.3477424397282511, - "grad_norm": 3.081073104392263, - "learning_rate": 3.0313148518903696e-06, - "loss": 1.1748, - "step": 2892 - }, - { - "epoch": 0.34786268261889014, - "grad_norm": 2.1782834579645938, - "learning_rate": 3.030647354784859e-06, - "loss": 1.032, - "step": 2893 - }, - { - "epoch": 0.34798292550952925, - "grad_norm": 2.0810546875, - "learning_rate": 3.029979701330964e-06, - "loss": 1.0029, - "step": 2894 - }, - { - "epoch": 0.34810316840016836, - "grad_norm": 2.0366404680397396, - "learning_rate": 3.029311891629966e-06, - "loss": 1.0313, - "step": 2895 - }, - { - "epoch": 0.3482234112908074, - "grad_norm": 1.8071982353164682, - "learning_rate": 3.0286439257831744e-06, - "loss": 0.9722, - "step": 2896 - }, - { - "epoch": 0.3483436541814465, - "grad_norm": 2.0188067259952915, - "learning_rate": 3.0279758038919156e-06, - "loss": 0.9357, - "step": 2897 - }, - { - "epoch": 0.34846389707208564, - "grad_norm": 2.081501104196498, - "learning_rate": 3.0273075260575455e-06, - "loss": 1.0124, - "step": 2898 - }, - { - "epoch": 0.3485841399627247, - "grad_norm": 1.7033806005340906, - "learning_rate": 3.0266390923814396e-06, - "loss": 1.0268, - "step": 2899 - }, - { - "epoch": 0.3487043828533638, - "grad_norm": 2.1438003712066322, - "learning_rate": 3.0259705029650008e-06, - "loss": 1.0447, - "step": 2900 - }, - { - "epoch": 0.34882462574400286, - "grad_norm": 2.0684633454009016, - "learning_rate": 3.025301757909652e-06, - "loss": 0.9582, - "step": 2901 - }, - { - "epoch": 0.34894486863464197, - "grad_norm": 1.426336645358, - "learning_rate": 3.024632857316842e-06, - "loss": 1.0333, - "step": 2902 - }, - { - "epoch": 0.3490651115252811, - "grad_norm": 2.103540333608944, - "learning_rate": 3.0239638012880412e-06, - "loss": 1.0048, - "step": 2903 - }, - { - "epoch": 0.34918535441592014, - "grad_norm": 3.641338360359617, - "learning_rate": 3.0232945899247466e-06, - "loss": 1.0342, - "step": 2904 - }, - { - "epoch": 0.34930559730655925, - "grad_norm": 2.286862614950759, - "learning_rate": 3.022625223328476e-06, - "loss": 1.0097, - "step": 2905 - }, - { - "epoch": 0.34942584019719836, - "grad_norm": 1.5010304090760278, - "learning_rate": 3.0219557016007723e-06, - "loss": 0.9225, - "step": 2906 - }, - { - "epoch": 0.3495460830878374, - "grad_norm": 2.2823478460833355, - "learning_rate": 3.021286024843202e-06, - "loss": 0.9271, - "step": 2907 - }, - { - "epoch": 0.3496663259784765, - "grad_norm": 1.1316052548164803, - "learning_rate": 3.0206161931573526e-06, - "loss": 0.9349, - "step": 2908 - }, - { - "epoch": 0.34978656886911563, - "grad_norm": 1.6333562502258892, - "learning_rate": 3.0199462066448388e-06, - "loss": 1.1579, - "step": 2909 - }, - { - "epoch": 0.3499068117597547, - "grad_norm": 1.655643136028466, - "learning_rate": 3.019276065407296e-06, - "loss": 0.9244, - "step": 2910 - }, - { - "epoch": 0.3500270546503938, - "grad_norm": 2.0817607539313654, - "learning_rate": 3.018605769546385e-06, - "loss": 1.0387, - "step": 2911 - }, - { - "epoch": 0.3501472975410329, - "grad_norm": 1.8183755023704125, - "learning_rate": 3.017935319163788e-06, - "loss": 1.0313, - "step": 2912 - }, - { - "epoch": 0.35026754043167196, - "grad_norm": 1.5232561052363758, - "learning_rate": 3.017264714361213e-06, - "loss": 0.9391, - "step": 2913 - }, - { - "epoch": 0.3503877833223111, - "grad_norm": 2.7340922182181324, - "learning_rate": 3.016593955240389e-06, - "loss": 1.048, - "step": 2914 - }, - { - "epoch": 0.3505080262129502, - "grad_norm": 0.8154434659787446, - "learning_rate": 3.015923041903071e-06, - "loss": 0.8872, - "step": 2915 - }, - { - "epoch": 0.35062826910358924, - "grad_norm": 5.3160709721037716, - "learning_rate": 3.0152519744510347e-06, - "loss": 1.0575, - "step": 2916 - }, - { - "epoch": 0.35074851199422835, - "grad_norm": 2.0816396949839002, - "learning_rate": 3.014580752986081e-06, - "loss": 1.0583, - "step": 2917 - }, - { - "epoch": 0.3508687548848674, - "grad_norm": 1.7445155442415359, - "learning_rate": 3.0139093776100345e-06, - "loss": 1.0146, - "step": 2918 - }, - { - "epoch": 0.3509889977755065, - "grad_norm": 1.7105345926586475, - "learning_rate": 3.013237848424741e-06, - "loss": 0.9801, - "step": 2919 - }, - { - "epoch": 0.35110924066614563, - "grad_norm": 1.9918377737606998, - "learning_rate": 3.012566165532072e-06, - "loss": 0.9798, - "step": 2920 - }, - { - "epoch": 0.3512294835567847, - "grad_norm": 2.3845517674188255, - "learning_rate": 3.0118943290339207e-06, - "loss": 0.9956, - "step": 2921 - }, - { - "epoch": 0.3513497264474238, - "grad_norm": 2.758421401109452, - "learning_rate": 3.011222339032204e-06, - "loss": 0.9079, - "step": 2922 - }, - { - "epoch": 0.3514699693380629, - "grad_norm": 1.6308432938017032, - "learning_rate": 3.0105501956288626e-06, - "loss": 0.9204, - "step": 2923 - }, - { - "epoch": 0.35159021222870196, - "grad_norm": 3.0716068558260856, - "learning_rate": 3.0098778989258602e-06, - "loss": 0.9689, - "step": 2924 - }, - { - "epoch": 0.35171045511934107, - "grad_norm": 2.892088153767128, - "learning_rate": 3.009205449025183e-06, - "loss": 1.1082, - "step": 2925 - }, - { - "epoch": 0.3518306980099802, - "grad_norm": 1.7873344904962134, - "learning_rate": 3.008532846028842e-06, - "loss": 0.8599, - "step": 2926 - }, - { - "epoch": 0.35195094090061924, - "grad_norm": 3.124311752823813, - "learning_rate": 3.0078600900388694e-06, - "loss": 0.9357, - "step": 2927 - }, - { - "epoch": 0.35207118379125835, - "grad_norm": 1.8889130967901466, - "learning_rate": 3.007187181157323e-06, - "loss": 0.9682, - "step": 2928 - }, - { - "epoch": 0.35219142668189746, - "grad_norm": 2.087034815506554, - "learning_rate": 3.006514119486282e-06, - "loss": 0.9065, - "step": 2929 - }, - { - "epoch": 0.3523116695725365, - "grad_norm": 1.597347238964654, - "learning_rate": 3.005840905127849e-06, - "loss": 0.9202, - "step": 2930 - }, - { - "epoch": 0.3524319124631756, - "grad_norm": 3.1251661637952255, - "learning_rate": 3.0051675381841516e-06, - "loss": 1.099, - "step": 2931 - }, - { - "epoch": 0.3525521553538147, - "grad_norm": 1.5551665828319021, - "learning_rate": 3.0044940187573363e-06, - "loss": 0.9971, - "step": 2932 - }, - { - "epoch": 0.3526723982444538, - "grad_norm": 2.3710837953160575, - "learning_rate": 3.003820346949578e-06, - "loss": 0.8924, - "step": 2933 - }, - { - "epoch": 0.3527926411350929, - "grad_norm": 2.4301445337961196, - "learning_rate": 3.003146522863071e-06, - "loss": 1.0215, - "step": 2934 - }, - { - "epoch": 0.35291288402573195, - "grad_norm": 3.6135594276364884, - "learning_rate": 3.0024725466000345e-06, - "loss": 1.0954, - "step": 2935 - }, - { - "epoch": 0.35303312691637107, - "grad_norm": 1.8235196524110981, - "learning_rate": 3.0017984182627087e-06, - "loss": 1.0223, - "step": 2936 - }, - { - "epoch": 0.3531533698070102, - "grad_norm": 2.030362213968155, - "learning_rate": 3.00112413795336e-06, - "loss": 1.0492, - "step": 2937 - }, - { - "epoch": 0.35327361269764923, - "grad_norm": 1.8137634740601087, - "learning_rate": 3.000449705774275e-06, - "loss": 1.0346, - "step": 2938 - }, - { - "epoch": 0.35339385558828834, - "grad_norm": 1.9023686276166263, - "learning_rate": 2.9997751218277654e-06, - "loss": 0.9427, - "step": 2939 - }, - { - "epoch": 0.35351409847892745, - "grad_norm": 2.4285026187925807, - "learning_rate": 2.999100386216166e-06, - "loss": 1.0014, - "step": 2940 - }, - { - "epoch": 0.3536343413695665, - "grad_norm": 1.7727857560067544, - "learning_rate": 2.998425499041831e-06, - "loss": 0.9764, - "step": 2941 - }, - { - "epoch": 0.3537545842602056, - "grad_norm": 0.8834467946964595, - "learning_rate": 2.997750460407142e-06, - "loss": 0.8603, - "step": 2942 - }, - { - "epoch": 0.35387482715084473, - "grad_norm": 2.279287042408633, - "learning_rate": 2.997075270414501e-06, - "loss": 0.9374, - "step": 2943 - }, - { - "epoch": 0.3539950700414838, - "grad_norm": 0.8282070838985457, - "learning_rate": 2.9963999291663347e-06, - "loss": 0.8313, - "step": 2944 - }, - { - "epoch": 0.3541153129321229, - "grad_norm": 2.3279368689389712, - "learning_rate": 2.9957244367650915e-06, - "loss": 0.9708, - "step": 2945 - }, - { - "epoch": 0.354235555822762, - "grad_norm": 1.9829609556062218, - "learning_rate": 2.9950487933132425e-06, - "loss": 1.0694, - "step": 2946 - }, - { - "epoch": 0.35435579871340106, - "grad_norm": 2.1966932848338834, - "learning_rate": 2.994372998913283e-06, - "loss": 0.9499, - "step": 2947 - }, - { - "epoch": 0.35447604160404017, - "grad_norm": 2.66516448467007, - "learning_rate": 2.99369705366773e-06, - "loss": 0.8575, - "step": 2948 - }, - { - "epoch": 0.3545962844946792, - "grad_norm": 1.9886628929449364, - "learning_rate": 2.9930209576791244e-06, - "loss": 1.0489, - "step": 2949 - }, - { - "epoch": 0.35471652738531834, - "grad_norm": 2.875709943604976, - "learning_rate": 2.9923447110500285e-06, - "loss": 0.8722, - "step": 2950 - }, - { - "epoch": 0.35483677027595745, - "grad_norm": 1.3161645774980917, - "learning_rate": 2.9916683138830295e-06, - "loss": 0.9839, - "step": 2951 - }, - { - "epoch": 0.3549570131665965, - "grad_norm": 1.8438818932896346, - "learning_rate": 2.9909917662807353e-06, - "loss": 1.0417, - "step": 2952 - }, - { - "epoch": 0.3550772560572356, - "grad_norm": 2.126819280611395, - "learning_rate": 2.9903150683457783e-06, - "loss": 0.9246, - "step": 2953 - }, - { - "epoch": 0.3551974989478747, - "grad_norm": 1.794796280311622, - "learning_rate": 2.9896382201808126e-06, - "loss": 0.8796, - "step": 2954 - }, - { - "epoch": 0.3553177418385138, - "grad_norm": 1.9195091137601372, - "learning_rate": 2.988961221888516e-06, - "loss": 1.0366, - "step": 2955 - }, - { - "epoch": 0.3554379847291529, - "grad_norm": 2.628761049429708, - "learning_rate": 2.988284073571589e-06, - "loss": 1.0188, - "step": 2956 - }, - { - "epoch": 0.355558227619792, - "grad_norm": 2.1713733779912587, - "learning_rate": 2.9876067753327528e-06, - "loss": 0.9576, - "step": 2957 - }, - { - "epoch": 0.35567847051043106, - "grad_norm": 1.9309057181922704, - "learning_rate": 2.986929327274754e-06, - "loss": 1.0316, - "step": 2958 - }, - { - "epoch": 0.35579871340107017, - "grad_norm": 1.5041281796477808, - "learning_rate": 2.9862517295003617e-06, - "loss": 1.0095, - "step": 2959 - }, - { - "epoch": 0.3559189562917093, - "grad_norm": 1.6221458605788703, - "learning_rate": 2.9855739821123654e-06, - "loss": 0.9561, - "step": 2960 - }, - { - "epoch": 0.35603919918234833, - "grad_norm": 1.5562714602051664, - "learning_rate": 2.98489608521358e-06, - "loss": 1.0431, - "step": 2961 - }, - { - "epoch": 0.35615944207298744, - "grad_norm": 1.843015233416221, - "learning_rate": 2.9842180389068425e-06, - "loss": 1.0262, - "step": 2962 - }, - { - "epoch": 0.35627968496362655, - "grad_norm": 0.7779853511647196, - "learning_rate": 2.98353984329501e-06, - "loss": 0.8539, - "step": 2963 - }, - { - "epoch": 0.3563999278542656, - "grad_norm": 1.867986532052478, - "learning_rate": 2.982861498480965e-06, - "loss": 0.9439, - "step": 2964 - }, - { - "epoch": 0.3565201707449047, - "grad_norm": 1.5811694399852014, - "learning_rate": 2.9821830045676122e-06, - "loss": 1.0474, - "step": 2965 - }, - { - "epoch": 0.3566404136355438, - "grad_norm": 1.8219718082939416, - "learning_rate": 2.9815043616578793e-06, - "loss": 0.9491, - "step": 2966 - }, - { - "epoch": 0.3567606565261829, - "grad_norm": 2.1336213419138286, - "learning_rate": 2.9808255698547145e-06, - "loss": 1.0104, - "step": 2967 - }, - { - "epoch": 0.356880899416822, - "grad_norm": 2.71295363680534, - "learning_rate": 2.9801466292610913e-06, - "loss": 1.019, - "step": 2968 - }, - { - "epoch": 0.35700114230746105, - "grad_norm": 3.432122619510265, - "learning_rate": 2.979467539980003e-06, - "loss": 1.0426, - "step": 2969 - }, - { - "epoch": 0.35712138519810016, - "grad_norm": 1.6536570711329726, - "learning_rate": 2.978788302114468e-06, - "loss": 0.9951, - "step": 2970 - }, - { - "epoch": 0.35724162808873927, - "grad_norm": 1.986337607332025, - "learning_rate": 2.9781089157675255e-06, - "loss": 1.0493, - "step": 2971 - }, - { - "epoch": 0.3573618709793783, - "grad_norm": 1.386105984980079, - "learning_rate": 2.977429381042238e-06, - "loss": 1.1096, - "step": 2972 - }, - { - "epoch": 0.35748211387001744, - "grad_norm": 2.2194609241831293, - "learning_rate": 2.9767496980416913e-06, - "loss": 1.1231, - "step": 2973 - }, - { - "epoch": 0.35760235676065655, - "grad_norm": 2.8000650807037966, - "learning_rate": 2.9760698668689914e-06, - "loss": 1.0324, - "step": 2974 - }, - { - "epoch": 0.3577225996512956, - "grad_norm": 7.634215742301235, - "learning_rate": 2.975389887627269e-06, - "loss": 0.9441, - "step": 2975 - }, - { - "epoch": 0.3578428425419347, - "grad_norm": 2.393229742261107, - "learning_rate": 2.9747097604196764e-06, - "loss": 1.1257, - "step": 2976 - }, - { - "epoch": 0.3579630854325738, - "grad_norm": 0.7062848825195468, - "learning_rate": 2.9740294853493875e-06, - "loss": 0.8291, - "step": 2977 - }, - { - "epoch": 0.3580833283232129, - "grad_norm": 2.0641618449165455, - "learning_rate": 2.9733490625196008e-06, - "loss": 0.9073, - "step": 2978 - }, - { - "epoch": 0.358203571213852, - "grad_norm": 3.0751641454537633, - "learning_rate": 2.9726684920335353e-06, - "loss": 0.9902, - "step": 2979 - }, - { - "epoch": 0.35832381410449105, - "grad_norm": 1.9688848419030656, - "learning_rate": 2.971987773994432e-06, - "loss": 1.0492, - "step": 2980 - }, - { - "epoch": 0.35844405699513016, - "grad_norm": 2.3387951164848344, - "learning_rate": 2.9713069085055566e-06, - "loss": 1.0596, - "step": 2981 - }, - { - "epoch": 0.35856429988576927, - "grad_norm": 10.272925070395146, - "learning_rate": 2.9706258956701958e-06, - "loss": 1.0168, - "step": 2982 - }, - { - "epoch": 0.3586845427764083, - "grad_norm": 18.625231030970628, - "learning_rate": 2.9699447355916575e-06, - "loss": 0.9986, - "step": 2983 - }, - { - "epoch": 0.35880478566704743, - "grad_norm": 3.7586027016431025, - "learning_rate": 2.969263428373275e-06, - "loss": 0.9654, - "step": 2984 - }, - { - "epoch": 0.35892502855768654, - "grad_norm": 2.201989821581136, - "learning_rate": 2.9685819741184007e-06, - "loss": 1.016, - "step": 2985 - }, - { - "epoch": 0.3590452714483256, - "grad_norm": 2.2823519200969886, - "learning_rate": 2.967900372930411e-06, - "loss": 0.9194, - "step": 2986 - }, - { - "epoch": 0.3591655143389647, - "grad_norm": 2.847738968598837, - "learning_rate": 2.9672186249127046e-06, - "loss": 1.0285, - "step": 2987 - }, - { - "epoch": 0.3592857572296038, - "grad_norm": 2.17327495436086, - "learning_rate": 2.9665367301687014e-06, - "loss": 1.0107, - "step": 2988 - }, - { - "epoch": 0.3594060001202429, - "grad_norm": 1.6991084622075234, - "learning_rate": 2.965854688801845e-06, - "loss": 0.9871, - "step": 2989 - }, - { - "epoch": 0.359526243010882, - "grad_norm": 1.78222629494861, - "learning_rate": 2.9651725009156005e-06, - "loss": 0.9873, - "step": 2990 - }, - { - "epoch": 0.3596464859015211, - "grad_norm": 2.093538871482613, - "learning_rate": 2.964490166613454e-06, - "loss": 0.9801, - "step": 2991 - }, - { - "epoch": 0.35976672879216015, - "grad_norm": 0.8395632253152575, - "learning_rate": 2.963807685998917e-06, - "loss": 0.8335, - "step": 2992 - }, - { - "epoch": 0.35988697168279926, - "grad_norm": 1.8750645944277138, - "learning_rate": 2.9631250591755196e-06, - "loss": 1.0123, - "step": 2993 - }, - { - "epoch": 0.36000721457343837, - "grad_norm": 1.9307976745465019, - "learning_rate": 2.962442286246817e-06, - "loss": 0.8136, - "step": 2994 - }, - { - "epoch": 0.3601274574640774, - "grad_norm": 1.5690284907353913, - "learning_rate": 2.9617593673163853e-06, - "loss": 0.9312, - "step": 2995 - }, - { - "epoch": 0.36024770035471654, - "grad_norm": 2.736351737356857, - "learning_rate": 2.9610763024878216e-06, - "loss": 1.0002, - "step": 2996 - }, - { - "epoch": 0.3603679432453556, - "grad_norm": 1.8038774133676283, - "learning_rate": 2.960393091864747e-06, - "loss": 1.1399, - "step": 2997 - }, - { - "epoch": 0.3604881861359947, - "grad_norm": 1.6280688871060462, - "learning_rate": 2.959709735550804e-06, - "loss": 0.9748, - "step": 2998 - }, - { - "epoch": 0.3606084290266338, - "grad_norm": 2.200239090498777, - "learning_rate": 2.9590262336496575e-06, - "loss": 0.9861, - "step": 2999 - }, - { - "epoch": 0.36072867191727287, - "grad_norm": 1.915887204220769, - "learning_rate": 2.9583425862649936e-06, - "loss": 1.0782, - "step": 3000 - }, - { - "epoch": 0.360848914807912, - "grad_norm": 2.4750229535820587, - "learning_rate": 2.9576587935005215e-06, - "loss": 0.9641, - "step": 3001 - }, - { - "epoch": 0.3609691576985511, - "grad_norm": 2.048323257251826, - "learning_rate": 2.9569748554599713e-06, - "loss": 0.9505, - "step": 3002 - }, - { - "epoch": 0.36108940058919015, - "grad_norm": 2.289763567850461, - "learning_rate": 2.956290772247097e-06, - "loss": 0.9491, - "step": 3003 - }, - { - "epoch": 0.36120964347982926, - "grad_norm": 1.724154973028079, - "learning_rate": 2.9556065439656724e-06, - "loss": 0.9725, - "step": 3004 - }, - { - "epoch": 0.36132988637046837, - "grad_norm": 1.6052369927338412, - "learning_rate": 2.9549221707194952e-06, - "loss": 1.0572, - "step": 3005 - }, - { - "epoch": 0.3614501292611074, - "grad_norm": 2.191398525394418, - "learning_rate": 2.954237652612384e-06, - "loss": 0.9613, - "step": 3006 - }, - { - "epoch": 0.36157037215174653, - "grad_norm": 1.742740492021439, - "learning_rate": 2.9535529897481796e-06, - "loss": 1.0735, - "step": 3007 - }, - { - "epoch": 0.36169061504238564, - "grad_norm": 2.4511985260627838, - "learning_rate": 2.9528681822307446e-06, - "loss": 1.0026, - "step": 3008 - }, - { - "epoch": 0.3618108579330247, - "grad_norm": 4.3213176420311985, - "learning_rate": 2.952183230163964e-06, - "loss": 1.0574, - "step": 3009 - }, - { - "epoch": 0.3619311008236638, - "grad_norm": 1.7748462610422608, - "learning_rate": 2.9514981336517448e-06, - "loss": 0.9667, - "step": 3010 - }, - { - "epoch": 0.36205134371430286, - "grad_norm": 1.8038519043664178, - "learning_rate": 2.950812892798015e-06, - "loss": 1.0439, - "step": 3011 - }, - { - "epoch": 0.362171586604942, - "grad_norm": 1.6812950780561542, - "learning_rate": 2.9501275077067256e-06, - "loss": 1.096, - "step": 3012 - }, - { - "epoch": 0.3622918294955811, - "grad_norm": 1.5424502503519493, - "learning_rate": 2.949441978481848e-06, - "loss": 1.1125, - "step": 3013 - }, - { - "epoch": 0.36241207238622014, - "grad_norm": 1.9003936987104189, - "learning_rate": 2.9487563052273778e-06, - "loss": 1.0267, - "step": 3014 - }, - { - "epoch": 0.36253231527685925, - "grad_norm": 1.748461660264228, - "learning_rate": 2.94807048804733e-06, - "loss": 1.085, - "step": 3015 - }, - { - "epoch": 0.36265255816749836, - "grad_norm": 1.9479542148935465, - "learning_rate": 2.9473845270457434e-06, - "loss": 1.1267, - "step": 3016 - }, - { - "epoch": 0.3627728010581374, - "grad_norm": 2.0300247017997077, - "learning_rate": 2.946698422326677e-06, - "loss": 0.9296, - "step": 3017 - }, - { - "epoch": 0.36289304394877653, - "grad_norm": 1.8007299453146102, - "learning_rate": 2.946012173994213e-06, - "loss": 1.0238, - "step": 3018 - }, - { - "epoch": 0.36301328683941564, - "grad_norm": 1.3853645697337609, - "learning_rate": 2.945325782152454e-06, - "loss": 0.9101, - "step": 3019 - }, - { - "epoch": 0.3631335297300547, - "grad_norm": 2.325136865668728, - "learning_rate": 2.9446392469055257e-06, - "loss": 1.021, - "step": 3020 - }, - { - "epoch": 0.3632537726206938, - "grad_norm": 1.619761680333644, - "learning_rate": 2.9439525683575745e-06, - "loss": 1.0302, - "step": 3021 - }, - { - "epoch": 0.3633740155113329, - "grad_norm": 2.6099624429390254, - "learning_rate": 2.9432657466127694e-06, - "loss": 0.9726, - "step": 3022 - }, - { - "epoch": 0.36349425840197197, - "grad_norm": 3.1766884646980422, - "learning_rate": 2.9425787817753007e-06, - "loss": 0.9963, - "step": 3023 - }, - { - "epoch": 0.3636145012926111, - "grad_norm": 1.8967813559831812, - "learning_rate": 2.94189167394938e-06, - "loss": 0.9422, - "step": 3024 - }, - { - "epoch": 0.3637347441832502, - "grad_norm": 1.7869299293019598, - "learning_rate": 2.941204423239241e-06, - "loss": 1.0414, - "step": 3025 - }, - { - "epoch": 0.36385498707388925, - "grad_norm": 2.129179266825779, - "learning_rate": 2.9405170297491395e-06, - "loss": 0.9959, - "step": 3026 - }, - { - "epoch": 0.36397522996452836, - "grad_norm": 1.9718952788505717, - "learning_rate": 2.939829493583353e-06, - "loss": 1.0296, - "step": 3027 - }, - { - "epoch": 0.3640954728551674, - "grad_norm": 2.4652619649412784, - "learning_rate": 2.939141814846179e-06, - "loss": 1.0664, - "step": 3028 - }, - { - "epoch": 0.3642157157458065, - "grad_norm": 1.9789399937442096, - "learning_rate": 2.938453993641938e-06, - "loss": 1.051, - "step": 3029 - }, - { - "epoch": 0.36433595863644563, - "grad_norm": 1.880573381683492, - "learning_rate": 2.937766030074973e-06, - "loss": 0.931, - "step": 3030 - }, - { - "epoch": 0.3644562015270847, - "grad_norm": 1.9187480640712253, - "learning_rate": 2.937077924249646e-06, - "loss": 1.0489, - "step": 3031 - }, - { - "epoch": 0.3645764444177238, - "grad_norm": 1.9908654706981617, - "learning_rate": 2.9363896762703443e-06, - "loss": 0.9828, - "step": 3032 - }, - { - "epoch": 0.3646966873083629, - "grad_norm": 2.0602811958460987, - "learning_rate": 2.9357012862414725e-06, - "loss": 1.0675, - "step": 3033 - }, - { - "epoch": 0.36481693019900197, - "grad_norm": 1.9122596190733425, - "learning_rate": 2.9350127542674593e-06, - "loss": 0.9459, - "step": 3034 - }, - { - "epoch": 0.3649371730896411, - "grad_norm": 2.783776410940652, - "learning_rate": 2.934324080452755e-06, - "loss": 0.9923, - "step": 3035 - }, - { - "epoch": 0.3650574159802802, - "grad_norm": 1.3485662511577237, - "learning_rate": 2.9336352649018307e-06, - "loss": 1.0049, - "step": 3036 - }, - { - "epoch": 0.36517765887091924, - "grad_norm": 2.1416562234745142, - "learning_rate": 2.9329463077191783e-06, - "loss": 0.9277, - "step": 3037 - }, - { - "epoch": 0.36529790176155835, - "grad_norm": 2.623653793279469, - "learning_rate": 2.9322572090093135e-06, - "loss": 0.8709, - "step": 3038 - }, - { - "epoch": 0.36541814465219746, - "grad_norm": 2.6280434540243496, - "learning_rate": 2.9315679688767713e-06, - "loss": 0.9951, - "step": 3039 - }, - { - "epoch": 0.3655383875428365, - "grad_norm": 3.6218848161671335, - "learning_rate": 2.9308785874261085e-06, - "loss": 0.8977, - "step": 3040 - }, - { - "epoch": 0.36565863043347563, - "grad_norm": 4.666090021382622, - "learning_rate": 2.9301890647619045e-06, - "loss": 1.0413, - "step": 3041 - }, - { - "epoch": 0.36577887332411474, - "grad_norm": 2.4254261193033635, - "learning_rate": 2.929499400988759e-06, - "loss": 1.0319, - "step": 3042 - }, - { - "epoch": 0.3658991162147538, - "grad_norm": 1.7244429200177702, - "learning_rate": 2.9288095962112927e-06, - "loss": 0.8782, - "step": 3043 - }, - { - "epoch": 0.3660193591053929, - "grad_norm": 2.1782939653796443, - "learning_rate": 2.9281196505341503e-06, - "loss": 1.078, - "step": 3044 - }, - { - "epoch": 0.36613960199603196, - "grad_norm": 2.024632517119149, - "learning_rate": 2.9274295640619946e-06, - "loss": 1.0164, - "step": 3045 - }, - { - "epoch": 0.36625984488667107, - "grad_norm": 1.768478518298664, - "learning_rate": 2.9267393368995103e-06, - "loss": 1.0144, - "step": 3046 - }, - { - "epoch": 0.3663800877773102, - "grad_norm": 2.26000437221273, - "learning_rate": 2.926048969151407e-06, - "loss": 0.9759, - "step": 3047 - }, - { - "epoch": 0.36650033066794924, - "grad_norm": 1.7559808300814117, - "learning_rate": 2.92535846092241e-06, - "loss": 0.9095, - "step": 3048 - }, - { - "epoch": 0.36662057355858835, - "grad_norm": 1.7788227341472345, - "learning_rate": 2.9246678123172704e-06, - "loss": 1.052, - "step": 3049 - }, - { - "epoch": 0.36674081644922746, - "grad_norm": 2.5255812287590955, - "learning_rate": 2.9239770234407596e-06, - "loss": 0.9667, - "step": 3050 - }, - { - "epoch": 0.3668610593398665, - "grad_norm": 1.6273861751804133, - "learning_rate": 2.9232860943976686e-06, - "loss": 0.9113, - "step": 3051 - }, - { - "epoch": 0.3669813022305056, - "grad_norm": 1.606159367954483, - "learning_rate": 2.9225950252928115e-06, - "loss": 1.0711, - "step": 3052 - }, - { - "epoch": 0.36710154512114473, - "grad_norm": 2.4368747985323176, - "learning_rate": 2.9219038162310217e-06, - "loss": 1.0525, - "step": 3053 - }, - { - "epoch": 0.3672217880117838, - "grad_norm": 2.1545387886187144, - "learning_rate": 2.921212467317157e-06, - "loss": 1.0526, - "step": 3054 - }, - { - "epoch": 0.3673420309024229, - "grad_norm": 2.4591074118159013, - "learning_rate": 2.920520978656093e-06, - "loss": 1.0414, - "step": 3055 - }, - { - "epoch": 0.367462273793062, - "grad_norm": 2.193543614082826, - "learning_rate": 2.919829350352729e-06, - "loss": 0.9982, - "step": 3056 - }, - { - "epoch": 0.36758251668370107, - "grad_norm": 0.8968062145159905, - "learning_rate": 2.919137582511983e-06, - "loss": 0.8508, - "step": 3057 - }, - { - "epoch": 0.3677027595743402, - "grad_norm": 2.0747547786081664, - "learning_rate": 2.918445675238797e-06, - "loss": 0.8686, - "step": 3058 - }, - { - "epoch": 0.36782300246497923, - "grad_norm": 1.7918873436632035, - "learning_rate": 2.917753628638132e-06, - "loss": 0.9324, - "step": 3059 - }, - { - "epoch": 0.36794324535561834, - "grad_norm": 2.0973602597560825, - "learning_rate": 2.9170614428149716e-06, - "loss": 0.9371, - "step": 3060 - }, - { - "epoch": 0.36806348824625745, - "grad_norm": 3.2891471652553084, - "learning_rate": 2.9163691178743195e-06, - "loss": 1.093, - "step": 3061 - }, - { - "epoch": 0.3681837311368965, - "grad_norm": 2.1961692142989513, - "learning_rate": 2.9156766539212006e-06, - "loss": 1.01, - "step": 3062 - }, - { - "epoch": 0.3683039740275356, - "grad_norm": 1.9893618422111161, - "learning_rate": 2.9149840510606614e-06, - "loss": 0.9448, - "step": 3063 - }, - { - "epoch": 0.36842421691817473, - "grad_norm": 1.035670717619639, - "learning_rate": 2.914291309397769e-06, - "loss": 0.9155, - "step": 3064 - }, - { - "epoch": 0.3685444598088138, - "grad_norm": 1.9303542618435745, - "learning_rate": 2.9135984290376117e-06, - "loss": 1.0191, - "step": 3065 - }, - { - "epoch": 0.3686647026994529, - "grad_norm": 1.709878881414785, - "learning_rate": 2.9129054100853e-06, - "loss": 1.0612, - "step": 3066 - }, - { - "epoch": 0.368784945590092, - "grad_norm": 1.562140232629432, - "learning_rate": 2.912212252645963e-06, - "loss": 0.9856, - "step": 3067 - }, - { - "epoch": 0.36890518848073106, - "grad_norm": 1.8623891810276623, - "learning_rate": 2.9115189568247523e-06, - "loss": 0.9821, - "step": 3068 - }, - { - "epoch": 0.36902543137137017, - "grad_norm": 2.3200207664941517, - "learning_rate": 2.910825522726841e-06, - "loss": 1.1441, - "step": 3069 - }, - { - "epoch": 0.3691456742620093, - "grad_norm": 2.501564013488426, - "learning_rate": 2.9101319504574215e-06, - "loss": 0.9944, - "step": 3070 - }, - { - "epoch": 0.36926591715264834, - "grad_norm": 1.9342687180121134, - "learning_rate": 2.909438240121709e-06, - "loss": 0.9875, - "step": 3071 - }, - { - "epoch": 0.36938616004328745, - "grad_norm": 1.5299183251515491, - "learning_rate": 2.908744391824939e-06, - "loss": 0.9294, - "step": 3072 - }, - { - "epoch": 0.36950640293392656, - "grad_norm": 1.7104632273325506, - "learning_rate": 2.908050405672367e-06, - "loss": 1.0155, - "step": 3073 - }, - { - "epoch": 0.3696266458245656, - "grad_norm": 1.8854082504281322, - "learning_rate": 2.9073562817692703e-06, - "loss": 1.0138, - "step": 3074 - }, - { - "epoch": 0.3697468887152047, - "grad_norm": 0.7972539486887465, - "learning_rate": 2.9066620202209468e-06, - "loss": 0.8412, - "step": 3075 - }, - { - "epoch": 0.3698671316058438, - "grad_norm": 1.786390216147848, - "learning_rate": 2.905967621132716e-06, - "loss": 1.013, - "step": 3076 - }, - { - "epoch": 0.3699873744964829, - "grad_norm": 1.9942850477207363, - "learning_rate": 2.9052730846099172e-06, - "loss": 0.9784, - "step": 3077 - }, - { - "epoch": 0.370107617387122, - "grad_norm": 0.8722002673743734, - "learning_rate": 2.9045784107579123e-06, - "loss": 0.8795, - "step": 3078 - }, - { - "epoch": 0.37022786027776106, - "grad_norm": 1.814514750408346, - "learning_rate": 2.9038835996820807e-06, - "loss": 0.8947, - "step": 3079 - }, - { - "epoch": 0.37034810316840017, - "grad_norm": 1.7173282638488803, - "learning_rate": 2.903188651487826e-06, - "loss": 1.017, - "step": 3080 - }, - { - "epoch": 0.3704683460590393, - "grad_norm": 2.1932296918404726, - "learning_rate": 2.902493566280571e-06, - "loss": 1.097, - "step": 3081 - }, - { - "epoch": 0.37058858894967833, - "grad_norm": 1.9363165902199555, - "learning_rate": 2.9017983441657595e-06, - "loss": 1.0403, - "step": 3082 - }, - { - "epoch": 0.37070883184031744, - "grad_norm": 2.195275465900647, - "learning_rate": 2.9011029852488564e-06, - "loss": 0.9918, - "step": 3083 - }, - { - "epoch": 0.37082907473095655, - "grad_norm": 1.0061328108886358, - "learning_rate": 2.9004074896353465e-06, - "loss": 0.9084, - "step": 3084 - }, - { - "epoch": 0.3709493176215956, - "grad_norm": 3.491907027415013, - "learning_rate": 2.8997118574307362e-06, - "loss": 1.0466, - "step": 3085 - }, - { - "epoch": 0.3710695605122347, - "grad_norm": 4.882179646488435, - "learning_rate": 2.899016088740553e-06, - "loss": 0.9751, - "step": 3086 - }, - { - "epoch": 0.37118980340287383, - "grad_norm": 1.8090689181162736, - "learning_rate": 2.898320183670344e-06, - "loss": 1.023, - "step": 3087 - }, - { - "epoch": 0.3713100462935129, - "grad_norm": 1.6580854358403176, - "learning_rate": 2.8976241423256767e-06, - "loss": 1.116, - "step": 3088 - }, - { - "epoch": 0.371430289184152, - "grad_norm": 1.8735341382017692, - "learning_rate": 2.896927964812142e-06, - "loss": 0.9118, - "step": 3089 - }, - { - "epoch": 0.37155053207479105, - "grad_norm": 5.976913222450295, - "learning_rate": 2.8962316512353465e-06, - "loss": 0.9825, - "step": 3090 - }, - { - "epoch": 0.37167077496543016, - "grad_norm": 1.700128982363145, - "learning_rate": 2.8955352017009233e-06, - "loss": 0.9809, - "step": 3091 - }, - { - "epoch": 0.3717910178560693, - "grad_norm": 1.812362336817321, - "learning_rate": 2.8948386163145212e-06, - "loss": 1.0032, - "step": 3092 - }, - { - "epoch": 0.3719112607467083, - "grad_norm": 1.6630387716722677, - "learning_rate": 2.8941418951818135e-06, - "loss": 1.0227, - "step": 3093 - }, - { - "epoch": 0.37203150363734744, - "grad_norm": 2.0128324814521132, - "learning_rate": 2.8934450384084903e-06, - "loss": 0.9449, - "step": 3094 - }, - { - "epoch": 0.37215174652798655, - "grad_norm": 1.995287947178798, - "learning_rate": 2.8927480461002653e-06, - "loss": 0.9417, - "step": 3095 - }, - { - "epoch": 0.3722719894186256, - "grad_norm": 2.608690000610864, - "learning_rate": 2.892050918362872e-06, - "loss": 1.0912, - "step": 3096 - }, - { - "epoch": 0.3723922323092647, - "grad_norm": 0.891500678314105, - "learning_rate": 2.8913536553020626e-06, - "loss": 0.8433, - "step": 3097 - }, - { - "epoch": 0.3725124751999038, - "grad_norm": 5.761917038431191, - "learning_rate": 2.8906562570236137e-06, - "loss": 1.0829, - "step": 3098 - }, - { - "epoch": 0.3726327180905429, - "grad_norm": 1.7181686805454024, - "learning_rate": 2.889958723633318e-06, - "loss": 0.9925, - "step": 3099 - }, - { - "epoch": 0.372752960981182, - "grad_norm": 1.5816683129213942, - "learning_rate": 2.889261055236992e-06, - "loss": 0.9678, - "step": 3100 - }, - { - "epoch": 0.3728732038718211, - "grad_norm": 1.6758254963473267, - "learning_rate": 2.8885632519404704e-06, - "loss": 1.048, - "step": 3101 - }, - { - "epoch": 0.37299344676246016, - "grad_norm": 1.7943025170026299, - "learning_rate": 2.8878653138496107e-06, - "loss": 0.9815, - "step": 3102 - }, - { - "epoch": 0.37311368965309927, - "grad_norm": 2.252238855158852, - "learning_rate": 2.8871672410702878e-06, - "loss": 0.9839, - "step": 3103 - }, - { - "epoch": 0.3732339325437384, - "grad_norm": 3.5599112223153266, - "learning_rate": 2.8864690337084008e-06, - "loss": 1.0466, - "step": 3104 - }, - { - "epoch": 0.37335417543437743, - "grad_norm": 1.757869058334898, - "learning_rate": 2.885770691869866e-06, - "loss": 1.0027, - "step": 3105 - }, - { - "epoch": 0.37347441832501654, - "grad_norm": 8.124617817033092, - "learning_rate": 2.8850722156606207e-06, - "loss": 0.9718, - "step": 3106 - }, - { - "epoch": 0.3735946612156556, - "grad_norm": 1.5623889883660238, - "learning_rate": 2.8843736051866252e-06, - "loss": 0.8992, - "step": 3107 - }, - { - "epoch": 0.3737149041062947, - "grad_norm": 1.6026117376926206, - "learning_rate": 2.8836748605538557e-06, - "loss": 0.9226, - "step": 3108 - }, - { - "epoch": 0.3738351469969338, - "grad_norm": 2.0590874154208416, - "learning_rate": 2.882975981868313e-06, - "loss": 0.8576, - "step": 3109 - }, - { - "epoch": 0.3739553898875729, - "grad_norm": 2.10958160518862, - "learning_rate": 2.882276969236016e-06, - "loss": 0.9185, - "step": 3110 - }, - { - "epoch": 0.374075632778212, - "grad_norm": 2.191137287137432, - "learning_rate": 2.881577822763005e-06, - "loss": 0.9971, - "step": 3111 - }, - { - "epoch": 0.3741958756688511, - "grad_norm": 2.151030373408958, - "learning_rate": 2.880878542555338e-06, - "loss": 1.1042, - "step": 3112 - }, - { - "epoch": 0.37431611855949015, - "grad_norm": 3.739648391596921, - "learning_rate": 2.8801791287190976e-06, - "loss": 1.0283, - "step": 3113 - }, - { - "epoch": 0.37443636145012926, - "grad_norm": 4.32194832983027, - "learning_rate": 2.8794795813603817e-06, - "loss": 1.0877, - "step": 3114 - }, - { - "epoch": 0.3745566043407684, - "grad_norm": 4.593009629564844, - "learning_rate": 2.878779900585314e-06, - "loss": 1.0475, - "step": 3115 - }, - { - "epoch": 0.37467684723140743, - "grad_norm": 1.5449201002725663, - "learning_rate": 2.8780800865000336e-06, - "loss": 0.9891, - "step": 3116 - }, - { - "epoch": 0.37479709012204654, - "grad_norm": 1.0293170847347375, - "learning_rate": 2.877380139210702e-06, - "loss": 0.8883, - "step": 3117 - }, - { - "epoch": 0.37491733301268565, - "grad_norm": 1.6131709248132895, - "learning_rate": 2.876680058823501e-06, - "loss": 0.9873, - "step": 3118 - }, - { - "epoch": 0.3750375759033247, - "grad_norm": 2.4068164901734, - "learning_rate": 2.8759798454446314e-06, - "loss": 0.8902, - "step": 3119 - }, - { - "epoch": 0.3751578187939638, - "grad_norm": 1.8516051854874986, - "learning_rate": 2.8752794991803173e-06, - "loss": 1.049, - "step": 3120 - }, - { - "epoch": 0.37527806168460287, - "grad_norm": 2.013757831503796, - "learning_rate": 2.8745790201367976e-06, - "loss": 0.9746, - "step": 3121 - }, - { - "epoch": 0.375398304575242, - "grad_norm": 2.385971027131212, - "learning_rate": 2.8738784084203373e-06, - "loss": 1.0773, - "step": 3122 - }, - { - "epoch": 0.3755185474658811, - "grad_norm": 1.5596056261167093, - "learning_rate": 2.873177664137216e-06, - "loss": 1.0167, - "step": 3123 - }, - { - "epoch": 0.37563879035652015, - "grad_norm": 4.864704705084963, - "learning_rate": 2.8724767873937384e-06, - "loss": 0.9191, - "step": 3124 - }, - { - "epoch": 0.37575903324715926, - "grad_norm": 1.852297544446089, - "learning_rate": 2.871775778296225e-06, - "loss": 1.1075, - "step": 3125 - }, - { - "epoch": 0.37587927613779837, - "grad_norm": 2.132459450590009, - "learning_rate": 2.8710746369510196e-06, - "loss": 1.0171, - "step": 3126 - }, - { - "epoch": 0.3759995190284374, - "grad_norm": 3.607880253938289, - "learning_rate": 2.8703733634644846e-06, - "loss": 1.0641, - "step": 3127 - }, - { - "epoch": 0.37611976191907653, - "grad_norm": 1.6703278700546924, - "learning_rate": 2.869671957943002e-06, - "loss": 1.0245, - "step": 3128 - }, - { - "epoch": 0.37624000480971564, - "grad_norm": 2.187227286641737, - "learning_rate": 2.8689704204929747e-06, - "loss": 0.9759, - "step": 3129 - }, - { - "epoch": 0.3763602477003547, - "grad_norm": 1.7841577053542865, - "learning_rate": 2.8682687512208253e-06, - "loss": 1.0274, - "step": 3130 - }, - { - "epoch": 0.3764804905909938, - "grad_norm": 1.961362329187443, - "learning_rate": 2.8675669502329972e-06, - "loss": 1.038, - "step": 3131 - }, - { - "epoch": 0.3766007334816329, - "grad_norm": 2.2066844566450206, - "learning_rate": 2.866865017635952e-06, - "loss": 1.0768, - "step": 3132 - }, - { - "epoch": 0.376720976372272, - "grad_norm": 1.9891591110799822, - "learning_rate": 2.866162953536174e-06, - "loss": 1.0174, - "step": 3133 - }, - { - "epoch": 0.3768412192629111, - "grad_norm": 1.610739268585554, - "learning_rate": 2.8654607580401634e-06, - "loss": 0.9752, - "step": 3134 - }, - { - "epoch": 0.3769614621535502, - "grad_norm": 0.9163340521402917, - "learning_rate": 2.8647584312544446e-06, - "loss": 0.9158, - "step": 3135 - }, - { - "epoch": 0.37708170504418925, - "grad_norm": 1.3366050613901128, - "learning_rate": 2.864055973285559e-06, - "loss": 1.0813, - "step": 3136 - }, - { - "epoch": 0.37720194793482836, - "grad_norm": 2.011210731137218, - "learning_rate": 2.8633533842400698e-06, - "loss": 1.1, - "step": 3137 - }, - { - "epoch": 0.3773221908254674, - "grad_norm": 1.9093086218651092, - "learning_rate": 2.862650664224558e-06, - "loss": 1.0173, - "step": 3138 - }, - { - "epoch": 0.37744243371610653, - "grad_norm": 1.796904787563052, - "learning_rate": 2.861947813345627e-06, - "loss": 0.9312, - "step": 3139 - }, - { - "epoch": 0.37756267660674564, - "grad_norm": 2.3085214568048356, - "learning_rate": 2.8612448317098974e-06, - "loss": 0.9484, - "step": 3140 - }, - { - "epoch": 0.3776829194973847, - "grad_norm": 3.307366766289734, - "learning_rate": 2.8605417194240114e-06, - "loss": 1.0636, - "step": 3141 - }, - { - "epoch": 0.3778031623880238, - "grad_norm": 2.2182188875907083, - "learning_rate": 2.8598384765946315e-06, - "loss": 1.0249, - "step": 3142 - }, - { - "epoch": 0.3779234052786629, - "grad_norm": 1.8598832149143443, - "learning_rate": 2.8591351033284377e-06, - "loss": 0.9499, - "step": 3143 - }, - { - "epoch": 0.37804364816930197, - "grad_norm": 1.8782512450989022, - "learning_rate": 2.8584315997321325e-06, - "loss": 1.0639, - "step": 3144 - }, - { - "epoch": 0.3781638910599411, - "grad_norm": 3.2936591061640224, - "learning_rate": 2.8577279659124356e-06, - "loss": 1.0096, - "step": 3145 - }, - { - "epoch": 0.3782841339505802, - "grad_norm": 3.967318178949102, - "learning_rate": 2.857024201976089e-06, - "loss": 1.0558, - "step": 3146 - }, - { - "epoch": 0.37840437684121925, - "grad_norm": 2.179918283803331, - "learning_rate": 2.8563203080298516e-06, - "loss": 0.971, - "step": 3147 - }, - { - "epoch": 0.37852461973185836, - "grad_norm": 2.5313737627078856, - "learning_rate": 2.855616284180505e-06, - "loss": 1.1185, - "step": 3148 - }, - { - "epoch": 0.37864486262249747, - "grad_norm": 0.9325634047094349, - "learning_rate": 2.8549121305348477e-06, - "loss": 0.9593, - "step": 3149 - }, - { - "epoch": 0.3787651055131365, - "grad_norm": 2.017693338123453, - "learning_rate": 2.8542078471997006e-06, - "loss": 1.0548, - "step": 3150 - }, - { - "epoch": 0.37888534840377563, - "grad_norm": 1.7189916267479577, - "learning_rate": 2.8535034342819013e-06, - "loss": 0.9853, - "step": 3151 - }, - { - "epoch": 0.37900559129441475, - "grad_norm": 1.6081668800567086, - "learning_rate": 2.85279889188831e-06, - "loss": 0.9473, - "step": 3152 - }, - { - "epoch": 0.3791258341850538, - "grad_norm": 1.5255722348075567, - "learning_rate": 2.852094220125805e-06, - "loss": 1.0361, - "step": 3153 - }, - { - "epoch": 0.3792460770756929, - "grad_norm": 2.373817852350191, - "learning_rate": 2.8513894191012846e-06, - "loss": 0.9378, - "step": 3154 - }, - { - "epoch": 0.37936631996633197, - "grad_norm": 1.4224614202165184, - "learning_rate": 2.8506844889216664e-06, - "loss": 1.0097, - "step": 3155 - }, - { - "epoch": 0.3794865628569711, - "grad_norm": 0.9116659781670164, - "learning_rate": 2.849979429693887e-06, - "loss": 0.9002, - "step": 3156 - }, - { - "epoch": 0.3796068057476102, - "grad_norm": 1.90183503479074, - "learning_rate": 2.8492742415249042e-06, - "loss": 0.9742, - "step": 3157 - }, - { - "epoch": 0.37972704863824924, - "grad_norm": 1.5275526757530549, - "learning_rate": 2.848568924521694e-06, - "loss": 0.9918, - "step": 3158 - }, - { - "epoch": 0.37984729152888835, - "grad_norm": 1.7191330916281489, - "learning_rate": 2.8478634787912526e-06, - "loss": 0.9588, - "step": 3159 - }, - { - "epoch": 0.37996753441952746, - "grad_norm": 1.9198951758141647, - "learning_rate": 2.847157904440596e-06, - "loss": 1.002, - "step": 3160 - }, - { - "epoch": 0.3800877773101665, - "grad_norm": 1.5342346997417318, - "learning_rate": 2.846452201576759e-06, - "loss": 0.9835, - "step": 3161 - }, - { - "epoch": 0.38020802020080563, - "grad_norm": 0.9371361662265709, - "learning_rate": 2.845746370306795e-06, - "loss": 0.8861, - "step": 3162 - }, - { - "epoch": 0.38032826309144474, - "grad_norm": 1.872564577891351, - "learning_rate": 2.84504041073778e-06, - "loss": 1.0132, - "step": 3163 - }, - { - "epoch": 0.3804485059820838, - "grad_norm": 2.388135623703593, - "learning_rate": 2.844334322976806e-06, - "loss": 1.0299, - "step": 3164 - }, - { - "epoch": 0.3805687488727229, - "grad_norm": 1.6328335263299292, - "learning_rate": 2.8436281071309866e-06, - "loss": 1.0627, - "step": 3165 - }, - { - "epoch": 0.380688991763362, - "grad_norm": 0.7534220825795012, - "learning_rate": 2.842921763307455e-06, - "loss": 0.7858, - "step": 3166 - }, - { - "epoch": 0.38080923465400107, - "grad_norm": 1.788181537799696, - "learning_rate": 2.842215291613361e-06, - "loss": 1.0609, - "step": 3167 - }, - { - "epoch": 0.3809294775446402, - "grad_norm": 0.8733516562216561, - "learning_rate": 2.8415086921558774e-06, - "loss": 0.8729, - "step": 3168 - }, - { - "epoch": 0.38104972043527924, - "grad_norm": 1.6313383027362054, - "learning_rate": 2.840801965042194e-06, - "loss": 1.0131, - "step": 3169 - }, - { - "epoch": 0.38116996332591835, - "grad_norm": 1.6422696544012905, - "learning_rate": 2.840095110379521e-06, - "loss": 1.0685, - "step": 3170 - }, - { - "epoch": 0.38129020621655746, - "grad_norm": 0.7354334650111337, - "learning_rate": 2.8393881282750884e-06, - "loss": 0.7849, - "step": 3171 - }, - { - "epoch": 0.3814104491071965, - "grad_norm": 1.9648630433244494, - "learning_rate": 2.838681018836144e-06, - "loss": 1.0096, - "step": 3172 - }, - { - "epoch": 0.3815306919978356, - "grad_norm": 1.917939910361078, - "learning_rate": 2.837973782169955e-06, - "loss": 1.0014, - "step": 3173 - }, - { - "epoch": 0.38165093488847474, - "grad_norm": 0.8697433953205772, - "learning_rate": 2.8372664183838096e-06, - "loss": 0.8576, - "step": 3174 - }, - { - "epoch": 0.3817711777791138, - "grad_norm": 2.417229115597038, - "learning_rate": 2.836558927585015e-06, - "loss": 0.9185, - "step": 3175 - }, - { - "epoch": 0.3818914206697529, - "grad_norm": 2.035313810667005, - "learning_rate": 2.8358513098808957e-06, - "loss": 1.0499, - "step": 3176 - }, - { - "epoch": 0.382011663560392, - "grad_norm": 2.0560975548721823, - "learning_rate": 2.835143565378798e-06, - "loss": 0.9992, - "step": 3177 - }, - { - "epoch": 0.38213190645103107, - "grad_norm": 2.020302129189292, - "learning_rate": 2.8344356941860847e-06, - "loss": 1.0056, - "step": 3178 - }, - { - "epoch": 0.3822521493416702, - "grad_norm": 2.069691230117406, - "learning_rate": 2.8337276964101403e-06, - "loss": 0.8936, - "step": 3179 - }, - { - "epoch": 0.3823723922323093, - "grad_norm": 2.013810871703136, - "learning_rate": 2.833019572158367e-06, - "loss": 0.9842, - "step": 3180 - }, - { - "epoch": 0.38249263512294834, - "grad_norm": 5.9907597279410165, - "learning_rate": 2.8323113215381872e-06, - "loss": 1.0371, - "step": 3181 - }, - { - "epoch": 0.38261287801358745, - "grad_norm": 1.8821925492007574, - "learning_rate": 2.831602944657042e-06, - "loss": 0.9835, - "step": 3182 - }, - { - "epoch": 0.38273312090422656, - "grad_norm": 6.079395779150325, - "learning_rate": 2.830894441622391e-06, - "loss": 0.9739, - "step": 3183 - }, - { - "epoch": 0.3828533637948656, - "grad_norm": 3.327661902933406, - "learning_rate": 2.8301858125417134e-06, - "loss": 1.0251, - "step": 3184 - }, - { - "epoch": 0.38297360668550473, - "grad_norm": 1.9277028368193316, - "learning_rate": 2.8294770575225082e-06, - "loss": 0.9728, - "step": 3185 - }, - { - "epoch": 0.3830938495761438, - "grad_norm": 1.7517509556437396, - "learning_rate": 2.828768176672293e-06, - "loss": 1.0681, - "step": 3186 - }, - { - "epoch": 0.3832140924667829, - "grad_norm": 1.7684382079602146, - "learning_rate": 2.8280591700986044e-06, - "loss": 0.954, - "step": 3187 - }, - { - "epoch": 0.383334335357422, - "grad_norm": 1.8314072577229066, - "learning_rate": 2.827350037908999e-06, - "loss": 0.9866, - "step": 3188 - }, - { - "epoch": 0.38345457824806106, - "grad_norm": 2.6018412672646853, - "learning_rate": 2.8266407802110496e-06, - "loss": 1.0226, - "step": 3189 - }, - { - "epoch": 0.3835748211387002, - "grad_norm": 1.9287436335242882, - "learning_rate": 2.8259313971123515e-06, - "loss": 0.9854, - "step": 3190 - }, - { - "epoch": 0.3836950640293393, - "grad_norm": 1.477799235732304, - "learning_rate": 2.8252218887205166e-06, - "loss": 1.0003, - "step": 3191 - }, - { - "epoch": 0.38381530691997834, - "grad_norm": 1.8301506231679554, - "learning_rate": 2.824512255143178e-06, - "loss": 1.0382, - "step": 3192 - }, - { - "epoch": 0.38393554981061745, - "grad_norm": 1.8720983941922582, - "learning_rate": 2.8238024964879855e-06, - "loss": 1.0177, - "step": 3193 - }, - { - "epoch": 0.38405579270125656, - "grad_norm": 2.576153180312665, - "learning_rate": 2.8230926128626095e-06, - "loss": 0.9917, - "step": 3194 - }, - { - "epoch": 0.3841760355918956, - "grad_norm": 1.7456990250278759, - "learning_rate": 2.822382604374738e-06, - "loss": 1.0189, - "step": 3195 - }, - { - "epoch": 0.3842962784825347, - "grad_norm": 2.132949768791221, - "learning_rate": 2.8216724711320793e-06, - "loss": 0.8872, - "step": 3196 - }, - { - "epoch": 0.38441652137317384, - "grad_norm": 1.5073965813382275, - "learning_rate": 2.820962213242361e-06, - "loss": 1.0317, - "step": 3197 - }, - { - "epoch": 0.3845367642638129, - "grad_norm": 4.16056358957949, - "learning_rate": 2.8202518308133264e-06, - "loss": 1.0761, - "step": 3198 - }, - { - "epoch": 0.384657007154452, - "grad_norm": 2.64502210020124, - "learning_rate": 2.8195413239527426e-06, - "loss": 0.9657, - "step": 3199 - }, - { - "epoch": 0.38477725004509106, - "grad_norm": 1.63108800324839, - "learning_rate": 2.8188306927683906e-06, - "loss": 1.0371, - "step": 3200 - }, - { - "epoch": 0.38489749293573017, - "grad_norm": 2.0036067627311165, - "learning_rate": 2.818119937368074e-06, - "loss": 0.9817, - "step": 3201 - }, - { - "epoch": 0.3850177358263693, - "grad_norm": 2.986926999865724, - "learning_rate": 2.817409057859613e-06, - "loss": 0.8819, - "step": 3202 - }, - { - "epoch": 0.38513797871700833, - "grad_norm": 1.5943111852827159, - "learning_rate": 2.8166980543508482e-06, - "loss": 1.0161, - "step": 3203 - }, - { - "epoch": 0.38525822160764744, - "grad_norm": 1.815417014206538, - "learning_rate": 2.815986926949638e-06, - "loss": 1.0263, - "step": 3204 - }, - { - "epoch": 0.38537846449828655, - "grad_norm": 1.5358447988947692, - "learning_rate": 2.8152756757638597e-06, - "loss": 1.0291, - "step": 3205 - }, - { - "epoch": 0.3854987073889256, - "grad_norm": 1.9028926738877239, - "learning_rate": 2.8145643009014093e-06, - "loss": 1.068, - "step": 3206 - }, - { - "epoch": 0.3856189502795647, - "grad_norm": 1.7406841000945639, - "learning_rate": 2.813852802470202e-06, - "loss": 1.0226, - "step": 3207 - }, - { - "epoch": 0.38573919317020383, - "grad_norm": 2.037434830822781, - "learning_rate": 2.8131411805781717e-06, - "loss": 0.9535, - "step": 3208 - }, - { - "epoch": 0.3858594360608429, - "grad_norm": 2.30163278673094, - "learning_rate": 2.8124294353332707e-06, - "loss": 0.8678, - "step": 3209 - }, - { - "epoch": 0.385979678951482, - "grad_norm": 1.8113579276016587, - "learning_rate": 2.8117175668434713e-06, - "loss": 1.007, - "step": 3210 - }, - { - "epoch": 0.3860999218421211, - "grad_norm": 2.0078288868038734, - "learning_rate": 2.811005575216762e-06, - "loss": 0.92, - "step": 3211 - }, - { - "epoch": 0.38622016473276016, - "grad_norm": 1.2736473612662949, - "learning_rate": 2.8102934605611513e-06, - "loss": 1.0129, - "step": 3212 - }, - { - "epoch": 0.3863404076233993, - "grad_norm": 1.9306978985875423, - "learning_rate": 2.8095812229846665e-06, - "loss": 0.9026, - "step": 3213 - }, - { - "epoch": 0.3864606505140384, - "grad_norm": 2.7320513634272663, - "learning_rate": 2.808868862595355e-06, - "loss": 0.9221, - "step": 3214 - }, - { - "epoch": 0.38658089340467744, - "grad_norm": 1.772636198936634, - "learning_rate": 2.8081563795012795e-06, - "loss": 1.0281, - "step": 3215 - }, - { - "epoch": 0.38670113629531655, - "grad_norm": 1.678627495858353, - "learning_rate": 2.807443773810524e-06, - "loss": 0.9645, - "step": 3216 - }, - { - "epoch": 0.3868213791859556, - "grad_norm": 1.929620409583021, - "learning_rate": 2.80673104563119e-06, - "loss": 1.1202, - "step": 3217 - }, - { - "epoch": 0.3869416220765947, - "grad_norm": 1.8926202003675634, - "learning_rate": 2.8060181950713976e-06, - "loss": 1.008, - "step": 3218 - }, - { - "epoch": 0.3870618649672338, - "grad_norm": 1.7823945099591982, - "learning_rate": 2.805305222239286e-06, - "loss": 1.0365, - "step": 3219 - }, - { - "epoch": 0.3871821078578729, - "grad_norm": 2.4328242718773776, - "learning_rate": 2.8045921272430118e-06, - "loss": 0.9647, - "step": 3220 - }, - { - "epoch": 0.387302350748512, - "grad_norm": 2.5685706454217803, - "learning_rate": 2.803878910190753e-06, - "loss": 0.989, - "step": 3221 - }, - { - "epoch": 0.3874225936391511, - "grad_norm": 2.7065479458139112, - "learning_rate": 2.8031655711907017e-06, - "loss": 1.0495, - "step": 3222 - }, - { - "epoch": 0.38754283652979016, - "grad_norm": 1.9564111758979672, - "learning_rate": 2.8024521103510723e-06, - "loss": 1.0436, - "step": 3223 - }, - { - "epoch": 0.38766307942042927, - "grad_norm": 1.6145051834717892, - "learning_rate": 2.8017385277800952e-06, - "loss": 0.9834, - "step": 3224 - }, - { - "epoch": 0.3877833223110684, - "grad_norm": 2.1102632136035724, - "learning_rate": 2.8010248235860213e-06, - "loss": 0.9763, - "step": 3225 - }, - { - "epoch": 0.38790356520170743, - "grad_norm": 0.8359484092499676, - "learning_rate": 2.8003109978771192e-06, - "loss": 0.8986, - "step": 3226 - }, - { - "epoch": 0.38802380809234654, - "grad_norm": 2.294013284234027, - "learning_rate": 2.799597050761674e-06, - "loss": 1.0229, - "step": 3227 - }, - { - "epoch": 0.38814405098298566, - "grad_norm": 1.9659780940235372, - "learning_rate": 2.7988829823479924e-06, - "loss": 1.0285, - "step": 3228 - }, - { - "epoch": 0.3882642938736247, - "grad_norm": 1.792509730803441, - "learning_rate": 2.7981687927443976e-06, - "loss": 0.8697, - "step": 3229 - }, - { - "epoch": 0.3883845367642638, - "grad_norm": 2.6348916738346224, - "learning_rate": 2.797454482059231e-06, - "loss": 1.0805, - "step": 3230 - }, - { - "epoch": 0.3885047796549029, - "grad_norm": 1.5997757039211606, - "learning_rate": 2.7967400504008537e-06, - "loss": 1.0748, - "step": 3231 - }, - { - "epoch": 0.388625022545542, - "grad_norm": 0.8750056539080378, - "learning_rate": 2.7960254978776456e-06, - "loss": 0.8546, - "step": 3232 - }, - { - "epoch": 0.3887452654361811, - "grad_norm": 2.971045319500755, - "learning_rate": 2.7953108245980006e-06, - "loss": 1.0396, - "step": 3233 - }, - { - "epoch": 0.38886550832682015, - "grad_norm": 1.8834914886549643, - "learning_rate": 2.7945960306703365e-06, - "loss": 0.9701, - "step": 3234 - }, - { - "epoch": 0.38898575121745926, - "grad_norm": 1.5384610932606273, - "learning_rate": 2.7938811162030865e-06, - "loss": 0.8852, - "step": 3235 - }, - { - "epoch": 0.3891059941080984, - "grad_norm": 1.5558737824788376, - "learning_rate": 2.793166081304702e-06, - "loss": 1.0554, - "step": 3236 - }, - { - "epoch": 0.38922623699873743, - "grad_norm": 7.745800972703871, - "learning_rate": 2.7924509260836543e-06, - "loss": 1.0525, - "step": 3237 - }, - { - "epoch": 0.38934647988937654, - "grad_norm": 1.4591780895234105, - "learning_rate": 2.791735650648431e-06, - "loss": 0.9151, - "step": 3238 - }, - { - "epoch": 0.38946672278001565, - "grad_norm": 2.7627722903733107, - "learning_rate": 2.791020255107538e-06, - "loss": 0.9793, - "step": 3239 - }, - { - "epoch": 0.3895869656706547, - "grad_norm": 1.3984978092764266, - "learning_rate": 2.7903047395695023e-06, - "loss": 1.0275, - "step": 3240 - }, - { - "epoch": 0.3897072085612938, - "grad_norm": 1.7949574729535833, - "learning_rate": 2.789589104142865e-06, - "loss": 1.1299, - "step": 3241 - }, - { - "epoch": 0.3898274514519329, - "grad_norm": 1.5920529773849528, - "learning_rate": 2.7888733489361895e-06, - "loss": 0.9975, - "step": 3242 - }, - { - "epoch": 0.389947694342572, - "grad_norm": 0.7851812254316355, - "learning_rate": 2.788157474058054e-06, - "loss": 0.8806, - "step": 3243 - }, - { - "epoch": 0.3900679372332111, - "grad_norm": 1.4689167820812905, - "learning_rate": 2.7874414796170555e-06, - "loss": 0.9307, - "step": 3244 - }, - { - "epoch": 0.3901881801238502, - "grad_norm": 2.1571172545798487, - "learning_rate": 2.7867253657218113e-06, - "loss": 1.0698, - "step": 3245 - }, - { - "epoch": 0.39030842301448926, - "grad_norm": 1.4768306751032854, - "learning_rate": 2.7860091324809544e-06, - "loss": 0.9601, - "step": 3246 - }, - { - "epoch": 0.39042866590512837, - "grad_norm": 1.6435794930447616, - "learning_rate": 2.7852927800031377e-06, - "loss": 1.0359, - "step": 3247 - }, - { - "epoch": 0.3905489087957674, - "grad_norm": 1.5751717004584906, - "learning_rate": 2.7845763083970298e-06, - "loss": 1.0584, - "step": 3248 - }, - { - "epoch": 0.39066915168640653, - "grad_norm": 1.787477149017032, - "learning_rate": 2.7838597177713205e-06, - "loss": 1.0537, - "step": 3249 - }, - { - "epoch": 0.39078939457704565, - "grad_norm": 1.7302003398284207, - "learning_rate": 2.7831430082347143e-06, - "loss": 0.9639, - "step": 3250 - }, - { - "epoch": 0.3909096374676847, - "grad_norm": 1.746907226046877, - "learning_rate": 2.7824261798959373e-06, - "loss": 1.0544, - "step": 3251 - }, - { - "epoch": 0.3910298803583238, - "grad_norm": 1.7150636154031607, - "learning_rate": 2.78170923286373e-06, - "loss": 1.0246, - "step": 3252 - }, - { - "epoch": 0.3911501232489629, - "grad_norm": 2.0670539980083666, - "learning_rate": 2.780992167246854e-06, - "loss": 1.0604, - "step": 3253 - }, - { - "epoch": 0.391270366139602, - "grad_norm": 0.9847116046208161, - "learning_rate": 2.7802749831540883e-06, - "loss": 1.0098, - "step": 3254 - }, - { - "epoch": 0.3913906090302411, - "grad_norm": 2.0446961432240327, - "learning_rate": 2.7795576806942268e-06, - "loss": 1.0467, - "step": 3255 - }, - { - "epoch": 0.3915108519208802, - "grad_norm": 0.7985024756104773, - "learning_rate": 2.778840259976085e-06, - "loss": 0.8078, - "step": 3256 - }, - { - "epoch": 0.39163109481151925, - "grad_norm": 2.3011801718706524, - "learning_rate": 2.778122721108495e-06, - "loss": 1.0007, - "step": 3257 - }, - { - "epoch": 0.39175133770215836, - "grad_norm": 1.873386769884491, - "learning_rate": 2.7774050642003076e-06, - "loss": 1.1137, - "step": 3258 - }, - { - "epoch": 0.3918715805927975, - "grad_norm": 1.955862156748103, - "learning_rate": 2.7766872893603896e-06, - "loss": 1.1647, - "step": 3259 - }, - { - "epoch": 0.39199182348343653, - "grad_norm": 1.6482809995874714, - "learning_rate": 2.7759693966976275e-06, - "loss": 0.9649, - "step": 3260 - }, - { - "epoch": 0.39211206637407564, - "grad_norm": 1.8325664332318903, - "learning_rate": 2.7752513863209242e-06, - "loss": 1.0818, - "step": 3261 - }, - { - "epoch": 0.39223230926471475, - "grad_norm": 1.78488404183175, - "learning_rate": 2.774533258339203e-06, - "loss": 1.0619, - "step": 3262 - }, - { - "epoch": 0.3923525521553538, - "grad_norm": 2.522921482861744, - "learning_rate": 2.7738150128614014e-06, - "loss": 1.0194, - "step": 3263 - }, - { - "epoch": 0.3924727950459929, - "grad_norm": 1.7647178142275124, - "learning_rate": 2.7730966499964777e-06, - "loss": 1.1248, - "step": 3264 - }, - { - "epoch": 0.39259303793663197, - "grad_norm": 3.7295091922808847, - "learning_rate": 2.772378169853408e-06, - "loss": 1.0316, - "step": 3265 - }, - { - "epoch": 0.3927132808272711, - "grad_norm": 1.8151257667369127, - "learning_rate": 2.771659572541183e-06, - "loss": 0.9688, - "step": 3266 - }, - { - "epoch": 0.3928335237179102, - "grad_norm": 1.8322101822850707, - "learning_rate": 2.7709408581688143e-06, - "loss": 1.1012, - "step": 3267 - }, - { - "epoch": 0.39295376660854925, - "grad_norm": 2.0014967086425677, - "learning_rate": 2.7702220268453307e-06, - "loss": 1.1081, - "step": 3268 - }, - { - "epoch": 0.39307400949918836, - "grad_norm": 1.979413355126587, - "learning_rate": 2.7695030786797785e-06, - "loss": 1.0754, - "step": 3269 - }, - { - "epoch": 0.39319425238982747, - "grad_norm": 2.107193900037378, - "learning_rate": 2.7687840137812206e-06, - "loss": 0.968, - "step": 3270 - }, - { - "epoch": 0.3933144952804665, - "grad_norm": 0.8021750996266139, - "learning_rate": 2.7680648322587395e-06, - "loss": 0.8662, - "step": 3271 - }, - { - "epoch": 0.39343473817110564, - "grad_norm": 1.8787313843881617, - "learning_rate": 2.7673455342214334e-06, - "loss": 1.045, - "step": 3272 - }, - { - "epoch": 0.39355498106174475, - "grad_norm": 1.88564205051966, - "learning_rate": 2.7666261197784198e-06, - "loss": 0.9884, - "step": 3273 - }, - { - "epoch": 0.3936752239523838, - "grad_norm": 1.8715652158535627, - "learning_rate": 2.7659065890388336e-06, - "loss": 0.9964, - "step": 3274 - }, - { - "epoch": 0.3937954668430229, - "grad_norm": 1.8447993250522223, - "learning_rate": 2.7651869421118266e-06, - "loss": 1.0759, - "step": 3275 - }, - { - "epoch": 0.393915709733662, - "grad_norm": 1.5967777887007437, - "learning_rate": 2.76446717910657e-06, - "loss": 1.0585, - "step": 3276 - }, - { - "epoch": 0.3940359526243011, - "grad_norm": 3.087924288735698, - "learning_rate": 2.763747300132249e-06, - "loss": 0.9931, - "step": 3277 - }, - { - "epoch": 0.3941561955149402, - "grad_norm": 1.7693294047925543, - "learning_rate": 2.7630273052980704e-06, - "loss": 1.0901, - "step": 3278 - }, - { - "epoch": 0.39427643840557924, - "grad_norm": 2.463315365957653, - "learning_rate": 2.7623071947132554e-06, - "loss": 0.9094, - "step": 3279 - }, - { - "epoch": 0.39439668129621835, - "grad_norm": 1.8854303166297548, - "learning_rate": 2.7615869684870458e-06, - "loss": 1.0096, - "step": 3280 - }, - { - "epoch": 0.39451692418685746, - "grad_norm": 1.7775998109245714, - "learning_rate": 2.7608666267286986e-06, - "loss": 1.0695, - "step": 3281 - }, - { - "epoch": 0.3946371670774965, - "grad_norm": 2.2264616290884565, - "learning_rate": 2.760146169547489e-06, - "loss": 1.0888, - "step": 3282 - }, - { - "epoch": 0.39475740996813563, - "grad_norm": 1.3987933510615493, - "learning_rate": 2.75942559705271e-06, - "loss": 0.9956, - "step": 3283 - }, - { - "epoch": 0.39487765285877474, - "grad_norm": 1.8477236068093748, - "learning_rate": 2.7587049093536713e-06, - "loss": 1.1152, - "step": 3284 - }, - { - "epoch": 0.3949978957494138, - "grad_norm": 1.7230654552688558, - "learning_rate": 2.757984106559701e-06, - "loss": 1.0312, - "step": 3285 - }, - { - "epoch": 0.3951181386400529, - "grad_norm": 1.7563663213362186, - "learning_rate": 2.7572631887801446e-06, - "loss": 0.9494, - "step": 3286 - }, - { - "epoch": 0.395238381530692, - "grad_norm": 1.915403495789208, - "learning_rate": 2.7565421561243654e-06, - "loss": 0.9819, - "step": 3287 - }, - { - "epoch": 0.3953586244213311, - "grad_norm": 1.9374094911169764, - "learning_rate": 2.7558210087017413e-06, - "loss": 1.0524, - "step": 3288 - }, - { - "epoch": 0.3954788673119702, - "grad_norm": 1.8558675558254756, - "learning_rate": 2.7550997466216724e-06, - "loss": 0.9768, - "step": 3289 - }, - { - "epoch": 0.3955991102026093, - "grad_norm": 2.202978034562342, - "learning_rate": 2.7543783699935714e-06, - "loss": 1.0341, - "step": 3290 - }, - { - "epoch": 0.39571935309324835, - "grad_norm": 2.6102596770561046, - "learning_rate": 2.753656878926872e-06, - "loss": 1.0822, - "step": 3291 - }, - { - "epoch": 0.39583959598388746, - "grad_norm": 1.8333428411526216, - "learning_rate": 2.752935273531023e-06, - "loss": 0.967, - "step": 3292 - }, - { - "epoch": 0.39595983887452657, - "grad_norm": 2.3999474956809257, - "learning_rate": 2.752213553915492e-06, - "loss": 1.0245, - "step": 3293 - }, - { - "epoch": 0.3960800817651656, - "grad_norm": 0.804871547354473, - "learning_rate": 2.751491720189762e-06, - "loss": 0.9138, - "step": 3294 - }, - { - "epoch": 0.39620032465580474, - "grad_norm": 2.079105461407218, - "learning_rate": 2.7507697724633364e-06, - "loss": 1.1389, - "step": 3295 - }, - { - "epoch": 0.3963205675464438, - "grad_norm": 0.796752995145762, - "learning_rate": 2.7500477108457327e-06, - "loss": 0.7975, - "step": 3296 - }, - { - "epoch": 0.3964408104370829, - "grad_norm": 2.0258757875555955, - "learning_rate": 2.7493255354464877e-06, - "loss": 1.0373, - "step": 3297 - }, - { - "epoch": 0.396561053327722, - "grad_norm": 2.1603775324966374, - "learning_rate": 2.748603246375156e-06, - "loss": 0.9927, - "step": 3298 - }, - { - "epoch": 0.39668129621836107, - "grad_norm": 1.840491891396964, - "learning_rate": 2.7478808437413055e-06, - "loss": 0.9212, - "step": 3299 - }, - { - "epoch": 0.3968015391090002, - "grad_norm": 1.7915922711325967, - "learning_rate": 2.7471583276545263e-06, - "loss": 0.8909, - "step": 3300 - }, - { - "epoch": 0.3969217819996393, - "grad_norm": 1.8711243310392367, - "learning_rate": 2.7464356982244224e-06, - "loss": 0.9296, - "step": 3301 - }, - { - "epoch": 0.39704202489027834, - "grad_norm": 0.8255583737539032, - "learning_rate": 2.745712955560617e-06, - "loss": 0.8766, - "step": 3302 - }, - { - "epoch": 0.39716226778091746, - "grad_norm": 1.9674109491846896, - "learning_rate": 2.7449900997727496e-06, - "loss": 1.0042, - "step": 3303 - }, - { - "epoch": 0.39728251067155657, - "grad_norm": 2.187641793151434, - "learning_rate": 2.744267130970476e-06, - "loss": 1.0674, - "step": 3304 - }, - { - "epoch": 0.3974027535621956, - "grad_norm": 1.792753917314779, - "learning_rate": 2.7435440492634697e-06, - "loss": 0.9967, - "step": 3305 - }, - { - "epoch": 0.39752299645283473, - "grad_norm": 2.092621570369015, - "learning_rate": 2.7428208547614228e-06, - "loss": 0.8945, - "step": 3306 - }, - { - "epoch": 0.39764323934347384, - "grad_norm": 2.8358330825624702, - "learning_rate": 2.742097547574043e-06, - "loss": 1.0006, - "step": 3307 - }, - { - "epoch": 0.3977634822341129, - "grad_norm": 2.290999182392509, - "learning_rate": 2.7413741278110544e-06, - "loss": 1.002, - "step": 3308 - }, - { - "epoch": 0.397883725124752, - "grad_norm": 2.329391749316428, - "learning_rate": 2.7406505955822016e-06, - "loss": 0.9135, - "step": 3309 - }, - { - "epoch": 0.39800396801539106, - "grad_norm": 2.2777189335032237, - "learning_rate": 2.7399269509972415e-06, - "loss": 0.8958, - "step": 3310 - }, - { - "epoch": 0.3981242109060302, - "grad_norm": 2.132994815174495, - "learning_rate": 2.7392031941659514e-06, - "loss": 1.0655, - "step": 3311 - }, - { - "epoch": 0.3982444537966693, - "grad_norm": 2.580405134432653, - "learning_rate": 2.7384793251981244e-06, - "loss": 1.0885, - "step": 3312 - }, - { - "epoch": 0.39836469668730834, - "grad_norm": 3.182661685357459, - "learning_rate": 2.737755344203571e-06, - "loss": 1.0355, - "step": 3313 - }, - { - "epoch": 0.39848493957794745, - "grad_norm": 1.701106143561835, - "learning_rate": 2.7370312512921186e-06, - "loss": 1.023, - "step": 3314 - }, - { - "epoch": 0.39860518246858656, - "grad_norm": 2.170507865225411, - "learning_rate": 2.736307046573611e-06, - "loss": 0.9979, - "step": 3315 - }, - { - "epoch": 0.3987254253592256, - "grad_norm": 1.5426313973079262, - "learning_rate": 2.73558273015791e-06, - "loss": 1.0515, - "step": 3316 - }, - { - "epoch": 0.3988456682498647, - "grad_norm": 3.1258072382679174, - "learning_rate": 2.734858302154894e-06, - "loss": 0.9391, - "step": 3317 - }, - { - "epoch": 0.39896591114050384, - "grad_norm": 2.1931859913452523, - "learning_rate": 2.734133762674457e-06, - "loss": 0.9888, - "step": 3318 - }, - { - "epoch": 0.3990861540311429, - "grad_norm": 1.8333497985909646, - "learning_rate": 2.7334091118265124e-06, - "loss": 0.94, - "step": 3319 - }, - { - "epoch": 0.399206396921782, - "grad_norm": 0.7114047097755615, - "learning_rate": 2.732684349720989e-06, - "loss": 0.8188, - "step": 3320 - }, - { - "epoch": 0.3993266398124211, - "grad_norm": 1.7246873434031795, - "learning_rate": 2.7319594764678318e-06, - "loss": 0.9801, - "step": 3321 - }, - { - "epoch": 0.39944688270306017, - "grad_norm": 1.717920415249526, - "learning_rate": 2.7312344921770044e-06, - "loss": 1.0664, - "step": 3322 - }, - { - "epoch": 0.3995671255936993, - "grad_norm": 2.308065647712207, - "learning_rate": 2.7305093969584857e-06, - "loss": 1.0135, - "step": 3323 - }, - { - "epoch": 0.3996873684843384, - "grad_norm": 2.2434024509742683, - "learning_rate": 2.729784190922272e-06, - "loss": 1.0223, - "step": 3324 - }, - { - "epoch": 0.39980761137497745, - "grad_norm": 0.7772991991934852, - "learning_rate": 2.729058874178378e-06, - "loss": 0.8238, - "step": 3325 - }, - { - "epoch": 0.39992785426561656, - "grad_norm": 1.7224379766672517, - "learning_rate": 2.7283334468368315e-06, - "loss": 0.9227, - "step": 3326 - }, - { - "epoch": 0.4000480971562556, - "grad_norm": 2.811916375535185, - "learning_rate": 2.72760790900768e-06, - "loss": 0.959, - "step": 3327 - }, - { - "epoch": 0.4001683400468947, - "grad_norm": 1.7260197955971104, - "learning_rate": 2.7268822608009875e-06, - "loss": 1.0231, - "step": 3328 - }, - { - "epoch": 0.40028858293753383, - "grad_norm": 1.9212788223159682, - "learning_rate": 2.726156502326834e-06, - "loss": 1.014, - "step": 3329 - }, - { - "epoch": 0.4004088258281729, - "grad_norm": 0.7520349631424903, - "learning_rate": 2.725430633695316e-06, - "loss": 0.8603, - "step": 3330 - }, - { - "epoch": 0.400529068718812, - "grad_norm": 0.9321277232734603, - "learning_rate": 2.7247046550165485e-06, - "loss": 0.8566, - "step": 3331 - }, - { - "epoch": 0.4006493116094511, - "grad_norm": 1.529183063193965, - "learning_rate": 2.7239785664006606e-06, - "loss": 0.9815, - "step": 3332 - }, - { - "epoch": 0.40076955450009016, - "grad_norm": 0.8208558236024703, - "learning_rate": 2.7232523679578002e-06, - "loss": 0.8984, - "step": 3333 - }, - { - "epoch": 0.4008897973907293, - "grad_norm": 2.065157709634211, - "learning_rate": 2.7225260597981295e-06, - "loss": 1.0283, - "step": 3334 - }, - { - "epoch": 0.4010100402813684, - "grad_norm": 2.1697174992356167, - "learning_rate": 2.721799642031831e-06, - "loss": 1.0172, - "step": 3335 - }, - { - "epoch": 0.40113028317200744, - "grad_norm": 2.2216753949134356, - "learning_rate": 2.721073114769101e-06, - "loss": 1.0079, - "step": 3336 - }, - { - "epoch": 0.40125052606264655, - "grad_norm": 1.6774090594728268, - "learning_rate": 2.7203464781201523e-06, - "loss": 0.9834, - "step": 3337 - }, - { - "epoch": 0.40137076895328566, - "grad_norm": 4.151730022847968, - "learning_rate": 2.719619732195215e-06, - "loss": 1.0068, - "step": 3338 - }, - { - "epoch": 0.4014910118439247, - "grad_norm": 1.3118141743960394, - "learning_rate": 2.7188928771045377e-06, - "loss": 0.9597, - "step": 3339 - }, - { - "epoch": 0.4016112547345638, - "grad_norm": 1.9211878594876846, - "learning_rate": 2.7181659129583815e-06, - "loss": 1.0281, - "step": 3340 - }, - { - "epoch": 0.4017314976252029, - "grad_norm": 2.058949738328592, - "learning_rate": 2.7174388398670276e-06, - "loss": 0.9981, - "step": 3341 - }, - { - "epoch": 0.401851740515842, - "grad_norm": 1.8963457050953665, - "learning_rate": 2.716711657940773e-06, - "loss": 1.1481, - "step": 3342 - }, - { - "epoch": 0.4019719834064811, - "grad_norm": 0.8477770811529146, - "learning_rate": 2.7159843672899284e-06, - "loss": 0.8333, - "step": 3343 - }, - { - "epoch": 0.40209222629712016, - "grad_norm": 2.0266704874671926, - "learning_rate": 2.715256968024825e-06, - "loss": 1.0407, - "step": 3344 - }, - { - "epoch": 0.40221246918775927, - "grad_norm": 1.5598969711613737, - "learning_rate": 2.7145294602558083e-06, - "loss": 1.0523, - "step": 3345 - }, - { - "epoch": 0.4023327120783984, - "grad_norm": 1.9171297370895397, - "learning_rate": 2.713801844093241e-06, - "loss": 0.9329, - "step": 3346 - }, - { - "epoch": 0.40245295496903744, - "grad_norm": 2.2908760672207893, - "learning_rate": 2.7130741196475014e-06, - "loss": 1.1162, - "step": 3347 - }, - { - "epoch": 0.40257319785967655, - "grad_norm": 1.7483098178636078, - "learning_rate": 2.7123462870289848e-06, - "loss": 1.0309, - "step": 3348 - }, - { - "epoch": 0.40269344075031566, - "grad_norm": 1.6621498972595952, - "learning_rate": 2.711618346348102e-06, - "loss": 1.0404, - "step": 3349 - }, - { - "epoch": 0.4028136836409547, - "grad_norm": 2.6245193949742394, - "learning_rate": 2.7108902977152825e-06, - "loss": 0.8563, - "step": 3350 - }, - { - "epoch": 0.4029339265315938, - "grad_norm": 2.065135312577103, - "learning_rate": 2.7101621412409704e-06, - "loss": 0.9774, - "step": 3351 - }, - { - "epoch": 0.40305416942223293, - "grad_norm": 1.8990004219026584, - "learning_rate": 2.7094338770356256e-06, - "loss": 1.0938, - "step": 3352 - }, - { - "epoch": 0.403174412312872, - "grad_norm": 1.9624437117552853, - "learning_rate": 2.708705505209726e-06, - "loss": 0.8682, - "step": 3353 - }, - { - "epoch": 0.4032946552035111, - "grad_norm": 3.0541184619744124, - "learning_rate": 2.7079770258737646e-06, - "loss": 1.1441, - "step": 3354 - }, - { - "epoch": 0.4034148980941502, - "grad_norm": 2.746435890209262, - "learning_rate": 2.707248439138251e-06, - "loss": 0.9833, - "step": 3355 - }, - { - "epoch": 0.40353514098478926, - "grad_norm": 1.7252435263520702, - "learning_rate": 2.7065197451137114e-06, - "loss": 0.8851, - "step": 3356 - }, - { - "epoch": 0.4036553838754284, - "grad_norm": 1.9158093013752282, - "learning_rate": 2.7057909439106894e-06, - "loss": 0.9066, - "step": 3357 - }, - { - "epoch": 0.40377562676606743, - "grad_norm": 1.8788921014645632, - "learning_rate": 2.7050620356397417e-06, - "loss": 1.0127, - "step": 3358 - }, - { - "epoch": 0.40389586965670654, - "grad_norm": 1.6523809766972262, - "learning_rate": 2.7043330204114437e-06, - "loss": 0.9548, - "step": 3359 - }, - { - "epoch": 0.40401611254734565, - "grad_norm": 1.7235040978631158, - "learning_rate": 2.7036038983363862e-06, - "loss": 1.0846, - "step": 3360 - }, - { - "epoch": 0.4041363554379847, - "grad_norm": 2.2605945382400625, - "learning_rate": 2.702874669525177e-06, - "loss": 1.0694, - "step": 3361 - }, - { - "epoch": 0.4042565983286238, - "grad_norm": 2.277608394786075, - "learning_rate": 2.7021453340884394e-06, - "loss": 0.9197, - "step": 3362 - }, - { - "epoch": 0.40437684121926293, - "grad_norm": 4.217881629017269, - "learning_rate": 2.7014158921368125e-06, - "loss": 0.9626, - "step": 3363 - }, - { - "epoch": 0.404497084109902, - "grad_norm": 1.7903026963143809, - "learning_rate": 2.700686343780953e-06, - "loss": 1.0854, - "step": 3364 - }, - { - "epoch": 0.4046173270005411, - "grad_norm": 4.397821034244477, - "learning_rate": 2.699956689131532e-06, - "loss": 1.1149, - "step": 3365 - }, - { - "epoch": 0.4047375698911802, - "grad_norm": 1.9285816931577535, - "learning_rate": 2.699226928299238e-06, - "loss": 1.0813, - "step": 3366 - }, - { - "epoch": 0.40485781278181926, - "grad_norm": 2.230447264666386, - "learning_rate": 2.698497061394774e-06, - "loss": 1.0183, - "step": 3367 - }, - { - "epoch": 0.40497805567245837, - "grad_norm": 1.4900898994617013, - "learning_rate": 2.6977670885288627e-06, - "loss": 1.032, - "step": 3368 - }, - { - "epoch": 0.4050982985630975, - "grad_norm": 1.883212288480318, - "learning_rate": 2.6970370098122378e-06, - "loss": 0.9812, - "step": 3369 - }, - { - "epoch": 0.40521854145373654, - "grad_norm": 1.3993476591041785, - "learning_rate": 2.6963068253556535e-06, - "loss": 1.0848, - "step": 3370 - }, - { - "epoch": 0.40533878434437565, - "grad_norm": 1.8708605531384617, - "learning_rate": 2.6955765352698763e-06, - "loss": 1.0766, - "step": 3371 - }, - { - "epoch": 0.40545902723501476, - "grad_norm": 2.218147948343335, - "learning_rate": 2.6948461396656923e-06, - "loss": 0.9606, - "step": 3372 - }, - { - "epoch": 0.4055792701256538, - "grad_norm": 2.130440423411337, - "learning_rate": 2.6941156386539013e-06, - "loss": 0.979, - "step": 3373 - }, - { - "epoch": 0.4056995130162929, - "grad_norm": 2.032290500293151, - "learning_rate": 2.6933850323453203e-06, - "loss": 1.0381, - "step": 3374 - }, - { - "epoch": 0.405819755906932, - "grad_norm": 1.850548317559571, - "learning_rate": 2.6926543208507806e-06, - "loss": 0.976, - "step": 3375 - }, - { - "epoch": 0.4059399987975711, - "grad_norm": 4.01672466969589, - "learning_rate": 2.6919235042811316e-06, - "loss": 1.0304, - "step": 3376 - }, - { - "epoch": 0.4060602416882102, - "grad_norm": 3.1729663794053073, - "learning_rate": 2.691192582747237e-06, - "loss": 0.9861, - "step": 3377 - }, - { - "epoch": 0.40618048457884925, - "grad_norm": 1.9342840638609473, - "learning_rate": 2.6904615563599765e-06, - "loss": 0.9568, - "step": 3378 - }, - { - "epoch": 0.40630072746948837, - "grad_norm": 1.8384543191911082, - "learning_rate": 2.6897304252302477e-06, - "loss": 1.0627, - "step": 3379 - }, - { - "epoch": 0.4064209703601275, - "grad_norm": 0.8064938982137357, - "learning_rate": 2.688999189468962e-06, - "loss": 0.8002, - "step": 3380 - }, - { - "epoch": 0.40654121325076653, - "grad_norm": 2.136817497468273, - "learning_rate": 2.6882678491870464e-06, - "loss": 0.9834, - "step": 3381 - }, - { - "epoch": 0.40666145614140564, - "grad_norm": 1.6094335156515445, - "learning_rate": 2.6875364044954453e-06, - "loss": 0.9389, - "step": 3382 - }, - { - "epoch": 0.40678169903204475, - "grad_norm": 1.5135771297212959, - "learning_rate": 2.6868048555051185e-06, - "loss": 1.0443, - "step": 3383 - }, - { - "epoch": 0.4069019419226838, - "grad_norm": 2.6044453179369182, - "learning_rate": 2.686073202327041e-06, - "loss": 1.0883, - "step": 3384 - }, - { - "epoch": 0.4070221848133229, - "grad_norm": 1.6389237802458796, - "learning_rate": 2.6853414450722043e-06, - "loss": 0.9635, - "step": 3385 - }, - { - "epoch": 0.40714242770396203, - "grad_norm": 2.962379768603718, - "learning_rate": 2.684609583851616e-06, - "loss": 1.076, - "step": 3386 - }, - { - "epoch": 0.4072626705946011, - "grad_norm": 1.8113698395546376, - "learning_rate": 2.683877618776297e-06, - "loss": 1.0349, - "step": 3387 - }, - { - "epoch": 0.4073829134852402, - "grad_norm": 2.4798785130677663, - "learning_rate": 2.6831455499572876e-06, - "loss": 0.9678, - "step": 3388 - }, - { - "epoch": 0.40750315637587925, - "grad_norm": 2.179953938268796, - "learning_rate": 2.682413377505641e-06, - "loss": 1.0075, - "step": 3389 - }, - { - "epoch": 0.40762339926651836, - "grad_norm": 1.7892249420914352, - "learning_rate": 2.6816811015324284e-06, - "loss": 0.9956, - "step": 3390 - }, - { - "epoch": 0.40774364215715747, - "grad_norm": 0.7990323204454974, - "learning_rate": 2.6809487221487343e-06, - "loss": 0.8503, - "step": 3391 - }, - { - "epoch": 0.4078638850477965, - "grad_norm": 2.7027522364794065, - "learning_rate": 2.6802162394656605e-06, - "loss": 1.0564, - "step": 3392 - }, - { - "epoch": 0.40798412793843564, - "grad_norm": 1.8892380852893356, - "learning_rate": 2.679483653594324e-06, - "loss": 0.9529, - "step": 3393 - }, - { - "epoch": 0.40810437082907475, - "grad_norm": 3.0009083962014866, - "learning_rate": 2.678750964645857e-06, - "loss": 0.9909, - "step": 3394 - }, - { - "epoch": 0.4082246137197138, - "grad_norm": 2.277223665660359, - "learning_rate": 2.6780181727314094e-06, - "loss": 1.0748, - "step": 3395 - }, - { - "epoch": 0.4083448566103529, - "grad_norm": 30.507714468472457, - "learning_rate": 2.6772852779621435e-06, - "loss": 1.003, - "step": 3396 - }, - { - "epoch": 0.408465099500992, - "grad_norm": 3.7590557111836898, - "learning_rate": 2.676552280449239e-06, - "loss": 1.0788, - "step": 3397 - }, - { - "epoch": 0.4085853423916311, - "grad_norm": 3.2361860871440937, - "learning_rate": 2.6758191803038917e-06, - "loss": 0.9847, - "step": 3398 - }, - { - "epoch": 0.4087055852822702, - "grad_norm": 1.6024437689474629, - "learning_rate": 2.6750859776373125e-06, - "loss": 1.0611, - "step": 3399 - }, - { - "epoch": 0.4088258281729093, - "grad_norm": 0.7790175011251841, - "learning_rate": 2.674352672560727e-06, - "loss": 0.8486, - "step": 3400 - }, - { - "epoch": 0.40894607106354836, - "grad_norm": 1.6141810828756478, - "learning_rate": 2.673619265185377e-06, - "loss": 1.0015, - "step": 3401 - }, - { - "epoch": 0.40906631395418747, - "grad_norm": 1.5930816894726039, - "learning_rate": 2.672885755622521e-06, - "loss": 0.9994, - "step": 3402 - }, - { - "epoch": 0.4091865568448266, - "grad_norm": 2.2544503175122, - "learning_rate": 2.67215214398343e-06, - "loss": 0.9266, - "step": 3403 - }, - { - "epoch": 0.40930679973546563, - "grad_norm": 2.5686839779725, - "learning_rate": 2.671418430379393e-06, - "loss": 1.0075, - "step": 3404 - }, - { - "epoch": 0.40942704262610474, - "grad_norm": 1.618421369502796, - "learning_rate": 2.670684614921715e-06, - "loss": 1.0594, - "step": 3405 - }, - { - "epoch": 0.4095472855167438, - "grad_norm": 2.068515904945193, - "learning_rate": 2.6699506977217128e-06, - "loss": 0.9213, - "step": 3406 - }, - { - "epoch": 0.4096675284073829, - "grad_norm": 2.3564572513813804, - "learning_rate": 2.6692166788907233e-06, - "loss": 0.9238, - "step": 3407 - }, - { - "epoch": 0.409787771298022, - "grad_norm": 2.1879199033718204, - "learning_rate": 2.6684825585400957e-06, - "loss": 0.9921, - "step": 3408 - }, - { - "epoch": 0.4099080141886611, - "grad_norm": 0.8936548062293619, - "learning_rate": 2.6677483367811947e-06, - "loss": 0.9509, - "step": 3409 - }, - { - "epoch": 0.4100282570793002, - "grad_norm": 1.4899934146402751, - "learning_rate": 2.6670140137254028e-06, - "loss": 0.9876, - "step": 3410 - }, - { - "epoch": 0.4101484999699393, - "grad_norm": 3.4828086719517573, - "learning_rate": 2.666279589484115e-06, - "loss": 1.1042, - "step": 3411 - }, - { - "epoch": 0.41026874286057835, - "grad_norm": 1.94911190496951, - "learning_rate": 2.6655450641687435e-06, - "loss": 1.0377, - "step": 3412 - }, - { - "epoch": 0.41038898575121746, - "grad_norm": 2.150742282923203, - "learning_rate": 2.664810437890715e-06, - "loss": 0.9205, - "step": 3413 - }, - { - "epoch": 0.41050922864185657, - "grad_norm": 1.8104254590323243, - "learning_rate": 2.6640757107614714e-06, - "loss": 1.0252, - "step": 3414 - }, - { - "epoch": 0.4106294715324956, - "grad_norm": 2.1258589186774275, - "learning_rate": 2.6633408828924697e-06, - "loss": 0.9364, - "step": 3415 - }, - { - "epoch": 0.41074971442313474, - "grad_norm": 1.6555003502977024, - "learning_rate": 2.662605954395185e-06, - "loss": 0.9297, - "step": 3416 - }, - { - "epoch": 0.41086995731377385, - "grad_norm": 1.6928763047042672, - "learning_rate": 2.6618709253811027e-06, - "loss": 1.0686, - "step": 3417 - }, - { - "epoch": 0.4109902002044129, - "grad_norm": 1.4066379435862477, - "learning_rate": 2.6611357959617277e-06, - "loss": 1.1079, - "step": 3418 - }, - { - "epoch": 0.411110443095052, - "grad_norm": 1.7623612613046424, - "learning_rate": 2.660400566248578e-06, - "loss": 1.1368, - "step": 3419 - }, - { - "epoch": 0.41123068598569107, - "grad_norm": 2.8922386820266124, - "learning_rate": 2.6596652363531876e-06, - "loss": 0.8928, - "step": 3420 - }, - { - "epoch": 0.4113509288763302, - "grad_norm": 1.4744420240820424, - "learning_rate": 2.6589298063871055e-06, - "loss": 1.0108, - "step": 3421 - }, - { - "epoch": 0.4114711717669693, - "grad_norm": 1.8423851263087745, - "learning_rate": 2.658194276461895e-06, - "loss": 0.9317, - "step": 3422 - }, - { - "epoch": 0.41159141465760835, - "grad_norm": 1.776703234946906, - "learning_rate": 2.6574586466891368e-06, - "loss": 0.9003, - "step": 3423 - }, - { - "epoch": 0.41171165754824746, - "grad_norm": 1.7664954816305345, - "learning_rate": 2.6567229171804247e-06, - "loss": 0.8772, - "step": 3424 - }, - { - "epoch": 0.41183190043888657, - "grad_norm": 3.0544029161814, - "learning_rate": 2.655987088047368e-06, - "loss": 1.1026, - "step": 3425 - }, - { - "epoch": 0.4119521433295256, - "grad_norm": 1.914374812590378, - "learning_rate": 2.6552511594015912e-06, - "loss": 1.0101, - "step": 3426 - }, - { - "epoch": 0.41207238622016473, - "grad_norm": 6.125394847365581, - "learning_rate": 2.654515131354735e-06, - "loss": 1.0817, - "step": 3427 - }, - { - "epoch": 0.41219262911080384, - "grad_norm": 2.2491971808906865, - "learning_rate": 2.653779004018453e-06, - "loss": 1.0885, - "step": 3428 - }, - { - "epoch": 0.4123128720014429, - "grad_norm": 1.8340530788890084, - "learning_rate": 2.653042777504417e-06, - "loss": 1.0455, - "step": 3429 - }, - { - "epoch": 0.412433114892082, - "grad_norm": 1.7630052964968863, - "learning_rate": 2.6523064519243105e-06, - "loss": 1.0246, - "step": 3430 - }, - { - "epoch": 0.4125533577827211, - "grad_norm": 2.4806162872891897, - "learning_rate": 2.6515700273898333e-06, - "loss": 1.0168, - "step": 3431 - }, - { - "epoch": 0.4126736006733602, - "grad_norm": 1.783592691308408, - "learning_rate": 2.6508335040127018e-06, - "loss": 0.916, - "step": 3432 - }, - { - "epoch": 0.4127938435639993, - "grad_norm": 1.5202121336878427, - "learning_rate": 2.6500968819046446e-06, - "loss": 1.006, - "step": 3433 - }, - { - "epoch": 0.4129140864546384, - "grad_norm": 3.029090505391999, - "learning_rate": 2.649360161177408e-06, - "loss": 0.8265, - "step": 3434 - }, - { - "epoch": 0.41303432934527745, - "grad_norm": 1.5798890480487189, - "learning_rate": 2.6486233419427504e-06, - "loss": 0.96, - "step": 3435 - }, - { - "epoch": 0.41315457223591656, - "grad_norm": 2.3978771835068238, - "learning_rate": 2.6478864243124484e-06, - "loss": 0.9802, - "step": 3436 - }, - { - "epoch": 0.4132748151265556, - "grad_norm": 1.9580266252408258, - "learning_rate": 2.6471494083982903e-06, - "loss": 1.0833, - "step": 3437 - }, - { - "epoch": 0.4133950580171947, - "grad_norm": 1.9083276276593375, - "learning_rate": 2.6464122943120818e-06, - "loss": 0.9812, - "step": 3438 - }, - { - "epoch": 0.41351530090783384, - "grad_norm": 3.1014727139869325, - "learning_rate": 2.645675082165642e-06, - "loss": 1.0507, - "step": 3439 - }, - { - "epoch": 0.4136355437984729, - "grad_norm": 2.4166540167466493, - "learning_rate": 2.644937772070806e-06, - "loss": 0.9828, - "step": 3440 - }, - { - "epoch": 0.413755786689112, - "grad_norm": 4.414278332320067, - "learning_rate": 2.6442003641394225e-06, - "loss": 1.0671, - "step": 3441 - }, - { - "epoch": 0.4138760295797511, - "grad_norm": 1.4111577332087988, - "learning_rate": 2.643462858483356e-06, - "loss": 1.0659, - "step": 3442 - }, - { - "epoch": 0.41399627247039017, - "grad_norm": 2.885919648731251, - "learning_rate": 2.6427252552144856e-06, - "loss": 0.9621, - "step": 3443 - }, - { - "epoch": 0.4141165153610293, - "grad_norm": 2.031944860303478, - "learning_rate": 2.6419875544447044e-06, - "loss": 0.9829, - "step": 3444 - }, - { - "epoch": 0.4142367582516684, - "grad_norm": 2.619205801179179, - "learning_rate": 2.6412497562859218e-06, - "loss": 0.9496, - "step": 3445 - }, - { - "epoch": 0.41435700114230745, - "grad_norm": 2.099651162374862, - "learning_rate": 2.6405118608500617e-06, - "loss": 0.9865, - "step": 3446 - }, - { - "epoch": 0.41447724403294656, - "grad_norm": 1.6688875263136307, - "learning_rate": 2.6397738682490613e-06, - "loss": 1.0344, - "step": 3447 - }, - { - "epoch": 0.41459748692358567, - "grad_norm": 1.616276286207192, - "learning_rate": 2.6390357785948734e-06, - "loss": 0.9822, - "step": 3448 - }, - { - "epoch": 0.4147177298142247, - "grad_norm": 2.205115318829366, - "learning_rate": 2.6382975919994667e-06, - "loss": 1.0298, - "step": 3449 - }, - { - "epoch": 0.41483797270486383, - "grad_norm": 1.576686845909706, - "learning_rate": 2.637559308574822e-06, - "loss": 0.9522, - "step": 3450 - }, - { - "epoch": 0.4149582155955029, - "grad_norm": 2.0883628660865474, - "learning_rate": 2.6368209284329376e-06, - "loss": 0.9496, - "step": 3451 - }, - { - "epoch": 0.415078458486142, - "grad_norm": 2.5797661384042976, - "learning_rate": 2.636082451685825e-06, - "loss": 0.9885, - "step": 3452 - }, - { - "epoch": 0.4151987013767811, - "grad_norm": 1.5070349709567206, - "learning_rate": 2.6353438784455094e-06, - "loss": 1.0861, - "step": 3453 - }, - { - "epoch": 0.41531894426742016, - "grad_norm": 2.142516922054877, - "learning_rate": 2.6346052088240326e-06, - "loss": 0.9328, - "step": 3454 - }, - { - "epoch": 0.4154391871580593, - "grad_norm": 1.8725035260196587, - "learning_rate": 2.63386644293345e-06, - "loss": 1.0047, - "step": 3455 - }, - { - "epoch": 0.4155594300486984, - "grad_norm": 2.7175349885851645, - "learning_rate": 2.633127580885833e-06, - "loss": 1.0689, - "step": 3456 - }, - { - "epoch": 0.41567967293933744, - "grad_norm": 1.9903414681534106, - "learning_rate": 2.632388622793265e-06, - "loss": 0.8806, - "step": 3457 - }, - { - "epoch": 0.41579991582997655, - "grad_norm": 1.5780837554074278, - "learning_rate": 2.6316495687678457e-06, - "loss": 0.9109, - "step": 3458 - }, - { - "epoch": 0.41592015872061566, - "grad_norm": 2.553783945239856, - "learning_rate": 2.6309104189216887e-06, - "loss": 0.9949, - "step": 3459 - }, - { - "epoch": 0.4160404016112547, - "grad_norm": 2.259863426841829, - "learning_rate": 2.630171173366923e-06, - "loss": 0.9767, - "step": 3460 - }, - { - "epoch": 0.41616064450189383, - "grad_norm": 2.856876302955412, - "learning_rate": 2.629431832215691e-06, - "loss": 0.9771, - "step": 3461 - }, - { - "epoch": 0.41628088739253294, - "grad_norm": 1.6626889286391455, - "learning_rate": 2.628692395580151e-06, - "loss": 1.103, - "step": 3462 - }, - { - "epoch": 0.416401130283172, - "grad_norm": 1.7989943238063322, - "learning_rate": 2.6279528635724747e-06, - "loss": 1.029, - "step": 3463 - }, - { - "epoch": 0.4165213731738111, - "grad_norm": 3.0445745146439966, - "learning_rate": 2.627213236304848e-06, - "loss": 1.0208, - "step": 3464 - }, - { - "epoch": 0.4166416160644502, - "grad_norm": 2.4773873962002897, - "learning_rate": 2.626473513889472e-06, - "loss": 0.9316, - "step": 3465 - }, - { - "epoch": 0.41676185895508927, - "grad_norm": 1.8472555423621935, - "learning_rate": 2.625733696438562e-06, - "loss": 1.0573, - "step": 3466 - }, - { - "epoch": 0.4168821018457284, - "grad_norm": 2.3576307101537775, - "learning_rate": 2.6249937840643476e-06, - "loss": 0.9834, - "step": 3467 - }, - { - "epoch": 0.41700234473636744, - "grad_norm": 2.051792672835695, - "learning_rate": 2.6242537768790733e-06, - "loss": 0.8983, - "step": 3468 - }, - { - "epoch": 0.41712258762700655, - "grad_norm": 3.3255116406071963, - "learning_rate": 2.6235136749949975e-06, - "loss": 0.9211, - "step": 3469 - }, - { - "epoch": 0.41724283051764566, - "grad_norm": 2.09470698563113, - "learning_rate": 2.6227734785243924e-06, - "loss": 0.845, - "step": 3470 - }, - { - "epoch": 0.4173630734082847, - "grad_norm": 1.9378187471118977, - "learning_rate": 2.6220331875795466e-06, - "loss": 1.0169, - "step": 3471 - }, - { - "epoch": 0.4174833162989238, - "grad_norm": 1.7212486831117024, - "learning_rate": 2.62129280227276e-06, - "loss": 0.985, - "step": 3472 - }, - { - "epoch": 0.41760355918956293, - "grad_norm": 1.9404401927677064, - "learning_rate": 2.62055232271635e-06, - "loss": 0.9129, - "step": 3473 - }, - { - "epoch": 0.417723802080202, - "grad_norm": 2.20064518744568, - "learning_rate": 2.619811749022646e-06, - "loss": 1.1149, - "step": 3474 - }, - { - "epoch": 0.4178440449708411, - "grad_norm": 2.2532421700622907, - "learning_rate": 2.6190710813039917e-06, - "loss": 0.9422, - "step": 3475 - }, - { - "epoch": 0.4179642878614802, - "grad_norm": 2.3865370361280362, - "learning_rate": 2.618330319672747e-06, - "loss": 1.0631, - "step": 3476 - }, - { - "epoch": 0.41808453075211927, - "grad_norm": 3.318076424213491, - "learning_rate": 2.617589464241284e-06, - "loss": 1.1455, - "step": 3477 - }, - { - "epoch": 0.4182047736427584, - "grad_norm": 1.736123380829581, - "learning_rate": 2.6168485151219914e-06, - "loss": 0.9651, - "step": 3478 - }, - { - "epoch": 0.4183250165333975, - "grad_norm": 3.825022160085675, - "learning_rate": 2.616107472427269e-06, - "loss": 0.9398, - "step": 3479 - }, - { - "epoch": 0.41844525942403654, - "grad_norm": 2.1767121383372707, - "learning_rate": 2.615366336269533e-06, - "loss": 0.9947, - "step": 3480 - }, - { - "epoch": 0.41856550231467565, - "grad_norm": 4.025057746046483, - "learning_rate": 2.6146251067612126e-06, - "loss": 1.0203, - "step": 3481 - }, - { - "epoch": 0.41868574520531476, - "grad_norm": 1.4598628696944032, - "learning_rate": 2.6138837840147525e-06, - "loss": 1.0456, - "step": 3482 - }, - { - "epoch": 0.4188059880959538, - "grad_norm": 2.0557800405228686, - "learning_rate": 2.6131423681426103e-06, - "loss": 1.0056, - "step": 3483 - }, - { - "epoch": 0.41892623098659293, - "grad_norm": 1.6634280289174839, - "learning_rate": 2.6124008592572587e-06, - "loss": 0.9576, - "step": 3484 - }, - { - "epoch": 0.419046473877232, - "grad_norm": 2.2804986421848263, - "learning_rate": 2.6116592574711835e-06, - "loss": 1.0417, - "step": 3485 - }, - { - "epoch": 0.4191667167678711, - "grad_norm": 2.437558980374915, - "learning_rate": 2.6109175628968853e-06, - "loss": 1.0666, - "step": 3486 - }, - { - "epoch": 0.4192869596585102, - "grad_norm": 2.585657018104102, - "learning_rate": 2.610175775646878e-06, - "loss": 1.0609, - "step": 3487 - }, - { - "epoch": 0.41940720254914926, - "grad_norm": 1.8912023498220416, - "learning_rate": 2.6094338958336907e-06, - "loss": 0.9722, - "step": 3488 - }, - { - "epoch": 0.41952744543978837, - "grad_norm": 1.9488356219476466, - "learning_rate": 2.608691923569867e-06, - "loss": 1.0494, - "step": 3489 - }, - { - "epoch": 0.4196476883304275, - "grad_norm": 1.8767539403936797, - "learning_rate": 2.6079498589679616e-06, - "loss": 0.9863, - "step": 3490 - }, - { - "epoch": 0.41976793122106654, - "grad_norm": 1.6788803645322274, - "learning_rate": 2.6072077021405465e-06, - "loss": 0.9922, - "step": 3491 - }, - { - "epoch": 0.41988817411170565, - "grad_norm": 1.6292243482259947, - "learning_rate": 2.6064654532002054e-06, - "loss": 0.9251, - "step": 3492 - }, - { - "epoch": 0.42000841700234476, - "grad_norm": 1.4253807027800207, - "learning_rate": 2.6057231122595375e-06, - "loss": 0.9864, - "step": 3493 - }, - { - "epoch": 0.4201286598929838, - "grad_norm": 1.4504996754206414, - "learning_rate": 2.604980679431154e-06, - "loss": 0.9597, - "step": 3494 - }, - { - "epoch": 0.4202489027836229, - "grad_norm": 2.083791707793686, - "learning_rate": 2.604238154827684e-06, - "loss": 0.9782, - "step": 3495 - }, - { - "epoch": 0.42036914567426203, - "grad_norm": 3.8967967890498123, - "learning_rate": 2.6034955385617656e-06, - "loss": 0.9653, - "step": 3496 - }, - { - "epoch": 0.4204893885649011, - "grad_norm": 0.7635579914535379, - "learning_rate": 2.6027528307460544e-06, - "loss": 0.8953, - "step": 3497 - }, - { - "epoch": 0.4206096314555402, - "grad_norm": 1.7057520352837436, - "learning_rate": 2.602010031493217e-06, - "loss": 1.0867, - "step": 3498 - }, - { - "epoch": 0.42072987434617926, - "grad_norm": 1.8135968539718446, - "learning_rate": 2.6012671409159367e-06, - "loss": 1.0948, - "step": 3499 - }, - { - "epoch": 0.42085011723681837, - "grad_norm": 1.758294747216979, - "learning_rate": 2.6005241591269097e-06, - "loss": 1.0471, - "step": 3500 - }, - { - "epoch": 0.4209703601274575, - "grad_norm": 1.4652099965324419, - "learning_rate": 2.5997810862388454e-06, - "loss": 1.0315, - "step": 3501 - }, - { - "epoch": 0.42109060301809653, - "grad_norm": 2.328055054298748, - "learning_rate": 2.599037922364467e-06, - "loss": 0.9879, - "step": 3502 - }, - { - "epoch": 0.42121084590873564, - "grad_norm": 2.347542504524013, - "learning_rate": 2.5982946676165112e-06, - "loss": 0.986, - "step": 3503 - }, - { - "epoch": 0.42133108879937475, - "grad_norm": 0.7687974713562774, - "learning_rate": 2.5975513221077313e-06, - "loss": 0.8346, - "step": 3504 - }, - { - "epoch": 0.4214513316900138, - "grad_norm": 1.9762550084667692, - "learning_rate": 2.5968078859508897e-06, - "loss": 1.1105, - "step": 3505 - }, - { - "epoch": 0.4215715745806529, - "grad_norm": 2.1137562780213703, - "learning_rate": 2.5960643592587673e-06, - "loss": 1.0261, - "step": 3506 - }, - { - "epoch": 0.42169181747129203, - "grad_norm": 1.660596937144167, - "learning_rate": 2.5953207421441553e-06, - "loss": 1.0505, - "step": 3507 - }, - { - "epoch": 0.4218120603619311, - "grad_norm": 2.1513972200227305, - "learning_rate": 2.5945770347198603e-06, - "loss": 0.9803, - "step": 3508 - }, - { - "epoch": 0.4219323032525702, - "grad_norm": 1.687033553472121, - "learning_rate": 2.593833237098701e-06, - "loss": 1.0631, - "step": 3509 - }, - { - "epoch": 0.4220525461432093, - "grad_norm": 1.8646822286908193, - "learning_rate": 2.593089349393512e-06, - "loss": 0.8589, - "step": 3510 - }, - { - "epoch": 0.42217278903384836, - "grad_norm": 3.1996531954235112, - "learning_rate": 2.592345371717141e-06, - "loss": 1.0729, - "step": 3511 - }, - { - "epoch": 0.42229303192448747, - "grad_norm": 2.2208395775951133, - "learning_rate": 2.591601304182448e-06, - "loss": 0.9372, - "step": 3512 - }, - { - "epoch": 0.4224132748151266, - "grad_norm": 1.552704356161279, - "learning_rate": 2.5908571469023067e-06, - "loss": 1.0173, - "step": 3513 - }, - { - "epoch": 0.42253351770576564, - "grad_norm": 2.374711872492315, - "learning_rate": 2.5901128999896067e-06, - "loss": 0.9907, - "step": 3514 - }, - { - "epoch": 0.42265376059640475, - "grad_norm": 1.5976874388096463, - "learning_rate": 2.5893685635572487e-06, - "loss": 0.9142, - "step": 3515 - }, - { - "epoch": 0.4227740034870438, - "grad_norm": 1.8951920762765566, - "learning_rate": 2.5886241377181483e-06, - "loss": 0.9242, - "step": 3516 - }, - { - "epoch": 0.4228942463776829, - "grad_norm": 1.726173184375564, - "learning_rate": 2.587879622585234e-06, - "loss": 1.0379, - "step": 3517 - }, - { - "epoch": 0.423014489268322, - "grad_norm": 1.958304533509938, - "learning_rate": 2.5871350182714486e-06, - "loss": 0.9905, - "step": 3518 - }, - { - "epoch": 0.4231347321589611, - "grad_norm": 1.8153053146695541, - "learning_rate": 2.586390324889748e-06, - "loss": 1.0324, - "step": 3519 - }, - { - "epoch": 0.4232549750496002, - "grad_norm": 2.5434389866206955, - "learning_rate": 2.5856455425531003e-06, - "loss": 0.8933, - "step": 3520 - }, - { - "epoch": 0.4233752179402393, - "grad_norm": 2.4288074334657828, - "learning_rate": 2.5849006713744902e-06, - "loss": 1.0363, - "step": 3521 - }, - { - "epoch": 0.42349546083087836, - "grad_norm": 2.807666185034413, - "learning_rate": 2.5841557114669135e-06, - "loss": 0.9588, - "step": 3522 - }, - { - "epoch": 0.42361570372151747, - "grad_norm": 2.3218238095869577, - "learning_rate": 2.58341066294338e-06, - "loss": 0.9016, - "step": 3523 - }, - { - "epoch": 0.4237359466121566, - "grad_norm": 2.347060446898214, - "learning_rate": 2.5826655259169124e-06, - "loss": 1.0871, - "step": 3524 - }, - { - "epoch": 0.42385618950279563, - "grad_norm": 2.055528243917302, - "learning_rate": 2.5819203005005475e-06, - "loss": 1.1292, - "step": 3525 - }, - { - "epoch": 0.42397643239343474, - "grad_norm": 1.5837060422750882, - "learning_rate": 2.581174986807336e-06, - "loss": 1.0152, - "step": 3526 - }, - { - "epoch": 0.42409667528407385, - "grad_norm": 2.3028674164082767, - "learning_rate": 2.580429584950341e-06, - "loss": 1.1434, - "step": 3527 - }, - { - "epoch": 0.4242169181747129, - "grad_norm": 2.378587422671031, - "learning_rate": 2.5796840950426397e-06, - "loss": 0.8939, - "step": 3528 - }, - { - "epoch": 0.424337161065352, - "grad_norm": 2.0071452772536085, - "learning_rate": 2.578938517197322e-06, - "loss": 0.8885, - "step": 3529 - }, - { - "epoch": 0.4244574039559911, - "grad_norm": 2.102806432213251, - "learning_rate": 2.5781928515274916e-06, - "loss": 0.8498, - "step": 3530 - }, - { - "epoch": 0.4245776468466302, - "grad_norm": 2.805284356920195, - "learning_rate": 2.577447098146265e-06, - "loss": 0.9098, - "step": 3531 - }, - { - "epoch": 0.4246978897372693, - "grad_norm": 1.4860610220639014, - "learning_rate": 2.5767012571667724e-06, - "loss": 1.0102, - "step": 3532 - }, - { - "epoch": 0.42481813262790835, - "grad_norm": 3.188331046880476, - "learning_rate": 2.5759553287021587e-06, - "loss": 0.918, - "step": 3533 - }, - { - "epoch": 0.42493837551854746, - "grad_norm": 2.1379428962382705, - "learning_rate": 2.5752093128655786e-06, - "loss": 1.0008, - "step": 3534 - }, - { - "epoch": 0.4250586184091866, - "grad_norm": 1.8347072943709757, - "learning_rate": 2.574463209770204e-06, - "loss": 0.9641, - "step": 3535 - }, - { - "epoch": 0.42517886129982563, - "grad_norm": 1.9677677428965368, - "learning_rate": 2.5737170195292165e-06, - "loss": 1.0216, - "step": 3536 - }, - { - "epoch": 0.42529910419046474, - "grad_norm": 2.100089198443356, - "learning_rate": 2.572970742255814e-06, - "loss": 1.005, - "step": 3537 - }, - { - "epoch": 0.42541934708110385, - "grad_norm": 1.645592571305572, - "learning_rate": 2.5722243780632046e-06, - "loss": 1.0468, - "step": 3538 - }, - { - "epoch": 0.4255395899717429, - "grad_norm": 0.8287882038591519, - "learning_rate": 2.5714779270646125e-06, - "loss": 0.8889, - "step": 3539 - }, - { - "epoch": 0.425659832862382, - "grad_norm": 2.017560399320126, - "learning_rate": 2.5707313893732735e-06, - "loss": 0.9924, - "step": 3540 - }, - { - "epoch": 0.4257800757530211, - "grad_norm": 1.8597259190340392, - "learning_rate": 2.5699847651024364e-06, - "loss": 0.9949, - "step": 3541 - }, - { - "epoch": 0.4259003186436602, - "grad_norm": 2.3254087847592197, - "learning_rate": 2.5692380543653627e-06, - "loss": 1.0009, - "step": 3542 - }, - { - "epoch": 0.4260205615342993, - "grad_norm": 1.990224372157303, - "learning_rate": 2.5684912572753293e-06, - "loss": 0.9289, - "step": 3543 - }, - { - "epoch": 0.4261408044249384, - "grad_norm": 2.3285224114523757, - "learning_rate": 2.5677443739456245e-06, - "loss": 1.0759, - "step": 3544 - }, - { - "epoch": 0.42626104731557746, - "grad_norm": 2.020419547069172, - "learning_rate": 2.5669974044895495e-06, - "loss": 1.0275, - "step": 3545 - }, - { - "epoch": 0.42638129020621657, - "grad_norm": 1.9203151172095545, - "learning_rate": 2.5662503490204187e-06, - "loss": 1.0203, - "step": 3546 - }, - { - "epoch": 0.4265015330968556, - "grad_norm": 2.057326677885599, - "learning_rate": 2.5655032076515603e-06, - "loss": 0.9911, - "step": 3547 - }, - { - "epoch": 0.42662177598749473, - "grad_norm": 3.2183839163878756, - "learning_rate": 2.5647559804963155e-06, - "loss": 1.0484, - "step": 3548 - }, - { - "epoch": 0.42674201887813384, - "grad_norm": 1.9102185323035703, - "learning_rate": 2.5640086676680364e-06, - "loss": 1.0171, - "step": 3549 - }, - { - "epoch": 0.4268622617687729, - "grad_norm": 3.226607721856977, - "learning_rate": 2.5632612692800923e-06, - "loss": 1.0427, - "step": 3550 - }, - { - "epoch": 0.426982504659412, - "grad_norm": 2.650173462282797, - "learning_rate": 2.5625137854458603e-06, - "loss": 0.971, - "step": 3551 - }, - { - "epoch": 0.4271027475500511, - "grad_norm": 1.784170533873447, - "learning_rate": 2.561766216278735e-06, - "loss": 1.0329, - "step": 3552 - }, - { - "epoch": 0.4272229904406902, - "grad_norm": 1.8369008884636908, - "learning_rate": 2.561018561892121e-06, - "loss": 1.0349, - "step": 3553 - }, - { - "epoch": 0.4273432333313293, - "grad_norm": 1.4948905708298863, - "learning_rate": 2.5602708223994363e-06, - "loss": 0.9861, - "step": 3554 - }, - { - "epoch": 0.4274634762219684, - "grad_norm": 3.0462943281815584, - "learning_rate": 2.559522997914115e-06, - "loss": 0.9081, - "step": 3555 - }, - { - "epoch": 0.42758371911260745, - "grad_norm": 2.0238171077767824, - "learning_rate": 2.558775088549599e-06, - "loss": 1.0739, - "step": 3556 - }, - { - "epoch": 0.42770396200324656, - "grad_norm": 2.2946186011380454, - "learning_rate": 2.5580270944193467e-06, - "loss": 0.896, - "step": 3557 - }, - { - "epoch": 0.4278242048938857, - "grad_norm": 0.8008244944735341, - "learning_rate": 2.557279015636827e-06, - "loss": 0.806, - "step": 3558 - }, - { - "epoch": 0.42794444778452473, - "grad_norm": 0.8137939859623196, - "learning_rate": 2.5565308523155245e-06, - "loss": 0.8896, - "step": 3559 - }, - { - "epoch": 0.42806469067516384, - "grad_norm": 2.354199929691606, - "learning_rate": 2.5557826045689336e-06, - "loss": 1.0534, - "step": 3560 - }, - { - "epoch": 0.4281849335658029, - "grad_norm": 0.8438211340412005, - "learning_rate": 2.5550342725105643e-06, - "loss": 0.8406, - "step": 3561 - }, - { - "epoch": 0.428305176456442, - "grad_norm": 1.6036877928849929, - "learning_rate": 2.554285856253937e-06, - "loss": 1.0514, - "step": 3562 - }, - { - "epoch": 0.4284254193470811, - "grad_norm": 1.7316232106860374, - "learning_rate": 2.5535373559125855e-06, - "loss": 0.9962, - "step": 3563 - }, - { - "epoch": 0.42854566223772017, - "grad_norm": 1.7592555247744954, - "learning_rate": 2.552788771600057e-06, - "loss": 1.0493, - "step": 3564 - }, - { - "epoch": 0.4286659051283593, - "grad_norm": 1.7176433815444143, - "learning_rate": 2.5520401034299118e-06, - "loss": 1.047, - "step": 3565 - }, - { - "epoch": 0.4287861480189984, - "grad_norm": 1.9444087736702838, - "learning_rate": 2.551291351515722e-06, - "loss": 1.0979, - "step": 3566 - }, - { - "epoch": 0.42890639090963745, - "grad_norm": 1.5296405196646774, - "learning_rate": 2.5505425159710726e-06, - "loss": 1.0873, - "step": 3567 - }, - { - "epoch": 0.42902663380027656, - "grad_norm": 4.313635856203762, - "learning_rate": 2.549793596909561e-06, - "loss": 1.0637, - "step": 3568 - }, - { - "epoch": 0.42914687669091567, - "grad_norm": 3.6442167058463912, - "learning_rate": 2.5490445944447976e-06, - "loss": 0.8924, - "step": 3569 - }, - { - "epoch": 0.4292671195815547, - "grad_norm": 3.111844716167023, - "learning_rate": 2.548295508690406e-06, - "loss": 0.8867, - "step": 3570 - }, - { - "epoch": 0.42938736247219383, - "grad_norm": 1.588703993544617, - "learning_rate": 2.5475463397600217e-06, - "loss": 0.993, - "step": 3571 - }, - { - "epoch": 0.42950760536283294, - "grad_norm": 1.935587646794025, - "learning_rate": 2.546797087767293e-06, - "loss": 1.0088, - "step": 3572 - }, - { - "epoch": 0.429627848253472, - "grad_norm": 1.5749286726286362, - "learning_rate": 2.546047752825881e-06, - "loss": 1.1032, - "step": 3573 - }, - { - "epoch": 0.4297480911441111, - "grad_norm": 2.0416568314711157, - "learning_rate": 2.5452983350494595e-06, - "loss": 1.169, - "step": 3574 - }, - { - "epoch": 0.4298683340347502, - "grad_norm": 2.1938000013081744, - "learning_rate": 2.544548834551713e-06, - "loss": 0.883, - "step": 3575 - }, - { - "epoch": 0.4299885769253893, - "grad_norm": 2.1845079940890613, - "learning_rate": 2.5437992514463424e-06, - "loss": 1.1701, - "step": 3576 - }, - { - "epoch": 0.4301088198160284, - "grad_norm": 1.6395848928183183, - "learning_rate": 2.5430495858470565e-06, - "loss": 1.1065, - "step": 3577 - }, - { - "epoch": 0.43022906270666744, - "grad_norm": 2.1947555106911074, - "learning_rate": 2.54229983786758e-06, - "loss": 1.0028, - "step": 3578 - }, - { - "epoch": 0.43034930559730655, - "grad_norm": 3.284924729180167, - "learning_rate": 2.541550007621651e-06, - "loss": 1.0765, - "step": 3579 - }, - { - "epoch": 0.43046954848794566, - "grad_norm": 1.7699043604772, - "learning_rate": 2.5408000952230156e-06, - "loss": 1.0263, - "step": 3580 - }, - { - "epoch": 0.4305897913785847, - "grad_norm": 1.944989097093948, - "learning_rate": 2.5400501007854357e-06, - "loss": 1.1248, - "step": 3581 - }, - { - "epoch": 0.43071003426922383, - "grad_norm": 1.7763198082861695, - "learning_rate": 2.539300024422685e-06, - "loss": 0.9908, - "step": 3582 - }, - { - "epoch": 0.43083027715986294, - "grad_norm": 0.8104565673524382, - "learning_rate": 2.538549866248549e-06, - "loss": 0.8709, - "step": 3583 - }, - { - "epoch": 0.430950520050502, - "grad_norm": 1.8295440383035444, - "learning_rate": 2.5377996263768274e-06, - "loss": 1.0455, - "step": 3584 - }, - { - "epoch": 0.4310707629411411, - "grad_norm": 1.6310533602351354, - "learning_rate": 2.5370493049213293e-06, - "loss": 0.9056, - "step": 3585 - }, - { - "epoch": 0.4311910058317802, - "grad_norm": 1.8040504817539205, - "learning_rate": 2.536298901995878e-06, - "loss": 1.0303, - "step": 3586 - }, - { - "epoch": 0.43131124872241927, - "grad_norm": 1.6170763355324216, - "learning_rate": 2.535548417714311e-06, - "loss": 1.0361, - "step": 3587 - }, - { - "epoch": 0.4314314916130584, - "grad_norm": 1.431477411185853, - "learning_rate": 2.534797852190474e-06, - "loss": 1.0905, - "step": 3588 - }, - { - "epoch": 0.4315517345036975, - "grad_norm": 1.850502064598352, - "learning_rate": 2.5340472055382283e-06, - "loss": 1.0382, - "step": 3589 - }, - { - "epoch": 0.43167197739433655, - "grad_norm": 2.4271961453351247, - "learning_rate": 2.5332964778714468e-06, - "loss": 1.0417, - "step": 3590 - }, - { - "epoch": 0.43179222028497566, - "grad_norm": 1.824069750171635, - "learning_rate": 2.5325456693040123e-06, - "loss": 0.8974, - "step": 3591 - }, - { - "epoch": 0.43191246317561477, - "grad_norm": 2.1644467449937554, - "learning_rate": 2.531794779949824e-06, - "loss": 0.9834, - "step": 3592 - }, - { - "epoch": 0.4320327060662538, - "grad_norm": 1.8629455263955026, - "learning_rate": 2.5310438099227903e-06, - "loss": 1.1073, - "step": 3593 - }, - { - "epoch": 0.43215294895689293, - "grad_norm": 1.3508984031199618, - "learning_rate": 2.530292759336833e-06, - "loss": 0.7923, - "step": 3594 - }, - { - "epoch": 0.432273191847532, - "grad_norm": 2.005288760731485, - "learning_rate": 2.5295416283058855e-06, - "loss": 0.9376, - "step": 3595 - }, - { - "epoch": 0.4323934347381711, - "grad_norm": 1.74319100666326, - "learning_rate": 2.5287904169438943e-06, - "loss": 0.8914, - "step": 3596 - }, - { - "epoch": 0.4325136776288102, - "grad_norm": 2.643334527883304, - "learning_rate": 2.528039125364817e-06, - "loss": 0.8812, - "step": 3597 - }, - { - "epoch": 0.43263392051944927, - "grad_norm": 2.5054247175153197, - "learning_rate": 2.5272877536826246e-06, - "loss": 0.984, - "step": 3598 - }, - { - "epoch": 0.4327541634100884, - "grad_norm": 4.048772300854302, - "learning_rate": 2.5265363020112986e-06, - "loss": 0.9284, - "step": 3599 - }, - { - "epoch": 0.4328744063007275, - "grad_norm": 1.7163286666456, - "learning_rate": 2.5257847704648344e-06, - "loss": 1.0728, - "step": 3600 - }, - { - "epoch": 0.43299464919136654, - "grad_norm": 1.764372862687203, - "learning_rate": 2.525033159157239e-06, - "loss": 0.9941, - "step": 3601 - }, - { - "epoch": 0.43311489208200565, - "grad_norm": 1.7592069393133853, - "learning_rate": 2.52428146820253e-06, - "loss": 1.012, - "step": 3602 - }, - { - "epoch": 0.43323513497264476, - "grad_norm": 1.6411162594578377, - "learning_rate": 2.52352969771474e-06, - "loss": 1.0528, - "step": 3603 - }, - { - "epoch": 0.4333553778632838, - "grad_norm": 2.0549070291766416, - "learning_rate": 2.5227778478079106e-06, - "loss": 1.1104, - "step": 3604 - }, - { - "epoch": 0.43347562075392293, - "grad_norm": 1.4588935956582847, - "learning_rate": 2.522025918596098e-06, - "loss": 0.993, - "step": 3605 - }, - { - "epoch": 0.43359586364456204, - "grad_norm": 1.574977871572872, - "learning_rate": 2.521273910193368e-06, - "loss": 0.8863, - "step": 3606 - }, - { - "epoch": 0.4337161065352011, - "grad_norm": 2.1551799606980353, - "learning_rate": 2.5205218227138006e-06, - "loss": 1.105, - "step": 3607 - }, - { - "epoch": 0.4338363494258402, - "grad_norm": 2.0467953266041987, - "learning_rate": 2.519769656271486e-06, - "loss": 1.0113, - "step": 3608 - }, - { - "epoch": 0.43395659231647926, - "grad_norm": 1.9543096992945708, - "learning_rate": 2.5190174109805285e-06, - "loss": 0.9152, - "step": 3609 - }, - { - "epoch": 0.43407683520711837, - "grad_norm": 2.2993423807163964, - "learning_rate": 2.518265086955042e-06, - "loss": 0.8732, - "step": 3610 - }, - { - "epoch": 0.4341970780977575, - "grad_norm": 1.8629231938630508, - "learning_rate": 2.5175126843091534e-06, - "loss": 1.0675, - "step": 3611 - }, - { - "epoch": 0.43431732098839654, - "grad_norm": 1.8713757137251104, - "learning_rate": 2.5167602031570034e-06, - "loss": 0.9739, - "step": 3612 - }, - { - "epoch": 0.43443756387903565, - "grad_norm": 1.5162305900167894, - "learning_rate": 2.51600764361274e-06, - "loss": 0.9654, - "step": 3613 - }, - { - "epoch": 0.43455780676967476, - "grad_norm": 2.227124273805874, - "learning_rate": 2.5152550057905283e-06, - "loss": 1.0179, - "step": 3614 - }, - { - "epoch": 0.4346780496603138, - "grad_norm": 2.2681536014085375, - "learning_rate": 2.5145022898045415e-06, - "loss": 0.9898, - "step": 3615 - }, - { - "epoch": 0.4347982925509529, - "grad_norm": 1.9855709527008165, - "learning_rate": 2.5137494957689664e-06, - "loss": 1.1299, - "step": 3616 - }, - { - "epoch": 0.43491853544159204, - "grad_norm": 0.7615118675008519, - "learning_rate": 2.5129966237980016e-06, - "loss": 0.8307, - "step": 3617 - }, - { - "epoch": 0.4350387783322311, - "grad_norm": 2.0282011185337807, - "learning_rate": 2.512243674005857e-06, - "loss": 1.0119, - "step": 3618 - }, - { - "epoch": 0.4351590212228702, - "grad_norm": 1.7180888898544109, - "learning_rate": 2.5114906465067537e-06, - "loss": 1.0869, - "step": 3619 - }, - { - "epoch": 0.4352792641135093, - "grad_norm": 1.8416693634197576, - "learning_rate": 2.5107375414149264e-06, - "loss": 0.9904, - "step": 3620 - }, - { - "epoch": 0.43539950700414837, - "grad_norm": 2.934883088312962, - "learning_rate": 2.5099843588446197e-06, - "loss": 0.9445, - "step": 3621 - }, - { - "epoch": 0.4355197498947875, - "grad_norm": 2.094150561832305, - "learning_rate": 2.509231098910091e-06, - "loss": 0.8491, - "step": 3622 - }, - { - "epoch": 0.4356399927854266, - "grad_norm": 2.025587905501768, - "learning_rate": 2.508477761725611e-06, - "loss": 0.9807, - "step": 3623 - }, - { - "epoch": 0.43576023567606564, - "grad_norm": 2.516255554405332, - "learning_rate": 2.507724347405458e-06, - "loss": 1.0391, - "step": 3624 - }, - { - "epoch": 0.43588047856670475, - "grad_norm": 2.3587860989297336, - "learning_rate": 2.5069708560639243e-06, - "loss": 1.0536, - "step": 3625 - }, - { - "epoch": 0.4360007214573438, - "grad_norm": 2.0597949929678485, - "learning_rate": 2.5062172878153158e-06, - "loss": 0.8433, - "step": 3626 - }, - { - "epoch": 0.4361209643479829, - "grad_norm": 1.7460204926213456, - "learning_rate": 2.505463642773947e-06, - "loss": 1.1027, - "step": 3627 - }, - { - "epoch": 0.43624120723862203, - "grad_norm": 2.2318966866034886, - "learning_rate": 2.504709921054146e-06, - "loss": 0.9812, - "step": 3628 - }, - { - "epoch": 0.4363614501292611, - "grad_norm": 2.0733690263470423, - "learning_rate": 2.50395612277025e-06, - "loss": 1.067, - "step": 3629 - }, - { - "epoch": 0.4364816930199002, - "grad_norm": 1.742721544190212, - "learning_rate": 2.503202248036612e-06, - "loss": 0.9615, - "step": 3630 - }, - { - "epoch": 0.4366019359105393, - "grad_norm": 1.9276001177166937, - "learning_rate": 2.5024482969675927e-06, - "loss": 0.9648, - "step": 3631 - }, - { - "epoch": 0.43672217880117836, - "grad_norm": 1.980318684507335, - "learning_rate": 2.501694269677566e-06, - "loss": 1.0706, - "step": 3632 - }, - { - "epoch": 0.4368424216918175, - "grad_norm": 2.654825165015451, - "learning_rate": 2.500940166280918e-06, - "loss": 1.038, - "step": 3633 - }, - { - "epoch": 0.4369626645824566, - "grad_norm": 2.0211281577750992, - "learning_rate": 2.500185986892045e-06, - "loss": 1.022, - "step": 3634 - }, - { - "epoch": 0.43708290747309564, - "grad_norm": 1.9743844195627553, - "learning_rate": 2.499431731625355e-06, - "loss": 1.004, - "step": 3635 - }, - { - "epoch": 0.43720315036373475, - "grad_norm": 1.8810705460537511, - "learning_rate": 2.4986774005952686e-06, - "loss": 1.026, - "step": 3636 - }, - { - "epoch": 0.43732339325437386, - "grad_norm": 2.1155483756001408, - "learning_rate": 2.4979229939162166e-06, - "loss": 1.0707, - "step": 3637 - }, - { - "epoch": 0.4374436361450129, - "grad_norm": 2.0156647434308503, - "learning_rate": 2.4971685117026433e-06, - "loss": 1.0307, - "step": 3638 - }, - { - "epoch": 0.437563879035652, - "grad_norm": 1.4037529285838557, - "learning_rate": 2.4964139540690018e-06, - "loss": 1.0003, - "step": 3639 - }, - { - "epoch": 0.4376841219262911, - "grad_norm": 1.9079893337967275, - "learning_rate": 2.495659321129758e-06, - "loss": 0.956, - "step": 3640 - }, - { - "epoch": 0.4378043648169302, - "grad_norm": 1.623823767227885, - "learning_rate": 2.494904612999389e-06, - "loss": 0.987, - "step": 3641 - }, - { - "epoch": 0.4379246077075693, - "grad_norm": 0.7954863622420705, - "learning_rate": 2.4941498297923843e-06, - "loss": 0.8383, - "step": 3642 - }, - { - "epoch": 0.43804485059820836, - "grad_norm": 1.633683113999705, - "learning_rate": 2.4933949716232424e-06, - "loss": 0.928, - "step": 3643 - }, - { - "epoch": 0.43816509348884747, - "grad_norm": 2.2295217023342024, - "learning_rate": 2.492640038606476e-06, - "loss": 0.9686, - "step": 3644 - }, - { - "epoch": 0.4382853363794866, - "grad_norm": 1.9748579070728232, - "learning_rate": 2.491885030856608e-06, - "loss": 1.0182, - "step": 3645 - }, - { - "epoch": 0.43840557927012563, - "grad_norm": 1.900156632040591, - "learning_rate": 2.4911299484881713e-06, - "loss": 1.0549, - "step": 3646 - }, - { - "epoch": 0.43852582216076474, - "grad_norm": 1.7255572676638196, - "learning_rate": 2.490374791615712e-06, - "loss": 1.0398, - "step": 3647 - }, - { - "epoch": 0.43864606505140386, - "grad_norm": 3.435278573546642, - "learning_rate": 2.4896195603537867e-06, - "loss": 1.0132, - "step": 3648 - }, - { - "epoch": 0.4387663079420429, - "grad_norm": 3.333394781182078, - "learning_rate": 2.488864254816964e-06, - "loss": 0.9756, - "step": 3649 - }, - { - "epoch": 0.438886550832682, - "grad_norm": 2.0438700742766605, - "learning_rate": 2.4881088751198218e-06, - "loss": 0.9007, - "step": 3650 - }, - { - "epoch": 0.43900679372332113, - "grad_norm": 2.531075742398343, - "learning_rate": 2.4873534213769517e-06, - "loss": 0.8745, - "step": 3651 - }, - { - "epoch": 0.4391270366139602, - "grad_norm": 2.0071651142362166, - "learning_rate": 2.4865978937029547e-06, - "loss": 0.9444, - "step": 3652 - }, - { - "epoch": 0.4392472795045993, - "grad_norm": 1.5531338543706672, - "learning_rate": 2.485842292212445e-06, - "loss": 0.8972, - "step": 3653 - }, - { - "epoch": 0.4393675223952384, - "grad_norm": 1.6959228120234218, - "learning_rate": 2.485086617020045e-06, - "loss": 1.028, - "step": 3654 - }, - { - "epoch": 0.43948776528587746, - "grad_norm": 2.210396097167218, - "learning_rate": 2.4843308682403903e-06, - "loss": 1.0478, - "step": 3655 - }, - { - "epoch": 0.4396080081765166, - "grad_norm": 1.5505771454603758, - "learning_rate": 2.4835750459881294e-06, - "loss": 1.0553, - "step": 3656 - }, - { - "epoch": 0.43972825106715563, - "grad_norm": 5.203244686182097, - "learning_rate": 2.4828191503779177e-06, - "loss": 1.0401, - "step": 3657 - }, - { - "epoch": 0.43984849395779474, - "grad_norm": 1.92558662096486, - "learning_rate": 2.482063181524425e-06, - "loss": 1.1215, - "step": 3658 - }, - { - "epoch": 0.43996873684843385, - "grad_norm": 1.8847849790538034, - "learning_rate": 2.4813071395423307e-06, - "loss": 1.0396, - "step": 3659 - }, - { - "epoch": 0.4400889797390729, - "grad_norm": 1.6790423906528908, - "learning_rate": 2.4805510245463263e-06, - "loss": 0.8746, - "step": 3660 - }, - { - "epoch": 0.440209222629712, - "grad_norm": 1.8807140385299173, - "learning_rate": 2.4797948366511137e-06, - "loss": 0.8212, - "step": 3661 - }, - { - "epoch": 0.4403294655203511, - "grad_norm": 1.7428514387251803, - "learning_rate": 2.4790385759714055e-06, - "loss": 0.995, - "step": 3662 - }, - { - "epoch": 0.4404497084109902, - "grad_norm": 1.925385037841863, - "learning_rate": 2.478282242621926e-06, - "loss": 0.945, - "step": 3663 - }, - { - "epoch": 0.4405699513016293, - "grad_norm": 0.8986433042242519, - "learning_rate": 2.477525836717411e-06, - "loss": 0.8666, - "step": 3664 - }, - { - "epoch": 0.4406901941922684, - "grad_norm": 2.2632906759827556, - "learning_rate": 2.476769358372606e-06, - "loss": 1.0261, - "step": 3665 - }, - { - "epoch": 0.44081043708290746, - "grad_norm": 2.315453679263355, - "learning_rate": 2.4760128077022683e-06, - "loss": 0.9799, - "step": 3666 - }, - { - "epoch": 0.44093067997354657, - "grad_norm": 1.439188504578895, - "learning_rate": 2.4752561848211672e-06, - "loss": 0.9129, - "step": 3667 - }, - { - "epoch": 0.4410509228641857, - "grad_norm": 1.820692554795937, - "learning_rate": 2.4744994898440797e-06, - "loss": 0.9415, - "step": 3668 - }, - { - "epoch": 0.44117116575482473, - "grad_norm": 4.4704595110286025, - "learning_rate": 2.473742722885797e-06, - "loss": 1.067, - "step": 3669 - }, - { - "epoch": 0.44129140864546385, - "grad_norm": 2.5479515000522777, - "learning_rate": 2.4729858840611197e-06, - "loss": 0.8901, - "step": 3670 - }, - { - "epoch": 0.4414116515361029, - "grad_norm": 2.5606621455151246, - "learning_rate": 2.4722289734848605e-06, - "loss": 0.9583, - "step": 3671 - }, - { - "epoch": 0.441531894426742, - "grad_norm": 2.23587401997725, - "learning_rate": 2.471471991271841e-06, - "loss": 1.0155, - "step": 3672 - }, - { - "epoch": 0.4416521373173811, - "grad_norm": 1.8716562974438198, - "learning_rate": 2.470714937536896e-06, - "loss": 1.0266, - "step": 3673 - }, - { - "epoch": 0.4417723802080202, - "grad_norm": 2.0282528406417404, - "learning_rate": 2.469957812394868e-06, - "loss": 0.9391, - "step": 3674 - }, - { - "epoch": 0.4418926230986593, - "grad_norm": 2.3589156758436634, - "learning_rate": 2.4692006159606148e-06, - "loss": 0.9976, - "step": 3675 - }, - { - "epoch": 0.4420128659892984, - "grad_norm": 2.131555431327961, - "learning_rate": 2.468443348349e-06, - "loss": 1.0148, - "step": 3676 - }, - { - "epoch": 0.44213310887993745, - "grad_norm": 2.7439775411670997, - "learning_rate": 2.467686009674902e-06, - "loss": 1.0587, - "step": 3677 - }, - { - "epoch": 0.44225335177057656, - "grad_norm": 1.7787088035804428, - "learning_rate": 2.466928600053209e-06, - "loss": 1.0822, - "step": 3678 - }, - { - "epoch": 0.4423735946612157, - "grad_norm": 1.955153182777347, - "learning_rate": 2.466171119598818e-06, - "loss": 0.9482, - "step": 3679 - }, - { - "epoch": 0.44249383755185473, - "grad_norm": 2.118508802880479, - "learning_rate": 2.465413568426639e-06, - "loss": 1.0026, - "step": 3680 - }, - { - "epoch": 0.44261408044249384, - "grad_norm": 1.4803369307399492, - "learning_rate": 2.464655946651591e-06, - "loss": 1.0461, - "step": 3681 - }, - { - "epoch": 0.44273432333313295, - "grad_norm": 1.979854571270973, - "learning_rate": 2.4638982543886065e-06, - "loss": 1.0344, - "step": 3682 - }, - { - "epoch": 0.442854566223772, - "grad_norm": 2.7204009557165065, - "learning_rate": 2.4631404917526254e-06, - "loss": 1.1007, - "step": 3683 - }, - { - "epoch": 0.4429748091144111, - "grad_norm": 1.5817979428661753, - "learning_rate": 2.4623826588586e-06, - "loss": 1.0165, - "step": 3684 - }, - { - "epoch": 0.4430950520050502, - "grad_norm": 1.4378313429057283, - "learning_rate": 2.461624755821492e-06, - "loss": 1.0604, - "step": 3685 - }, - { - "epoch": 0.4432152948956893, - "grad_norm": 1.5665538841937483, - "learning_rate": 2.4608667827562763e-06, - "loss": 0.9971, - "step": 3686 - }, - { - "epoch": 0.4433355377863284, - "grad_norm": 1.7959743730547586, - "learning_rate": 2.460108739777936e-06, - "loss": 1.1218, - "step": 3687 - }, - { - "epoch": 0.44345578067696745, - "grad_norm": 1.4647072690325904, - "learning_rate": 2.4593506270014656e-06, - "loss": 0.9857, - "step": 3688 - }, - { - "epoch": 0.44357602356760656, - "grad_norm": 1.408677231636333, - "learning_rate": 2.45859244454187e-06, - "loss": 1.0493, - "step": 3689 - }, - { - "epoch": 0.44369626645824567, - "grad_norm": 1.5625051879796683, - "learning_rate": 2.4578341925141655e-06, - "loss": 0.8922, - "step": 3690 - }, - { - "epoch": 0.4438165093488847, - "grad_norm": 5.560099909042567, - "learning_rate": 2.457075871033378e-06, - "loss": 0.9532, - "step": 3691 - }, - { - "epoch": 0.44393675223952384, - "grad_norm": 3.205760634407564, - "learning_rate": 2.4563174802145445e-06, - "loss": 1.1161, - "step": 3692 - }, - { - "epoch": 0.44405699513016295, - "grad_norm": 0.6060689596843104, - "learning_rate": 2.455559020172712e-06, - "loss": 0.7294, - "step": 3693 - }, - { - "epoch": 0.444177238020802, - "grad_norm": 2.682143528869092, - "learning_rate": 2.4548004910229385e-06, - "loss": 1.1245, - "step": 3694 - }, - { - "epoch": 0.4442974809114411, - "grad_norm": 1.6995814144681383, - "learning_rate": 2.4540418928802913e-06, - "loss": 1.1001, - "step": 3695 - }, - { - "epoch": 0.4444177238020802, - "grad_norm": 2.0770255026870172, - "learning_rate": 2.4532832258598506e-06, - "loss": 0.8912, - "step": 3696 - }, - { - "epoch": 0.4445379666927193, - "grad_norm": 1.6895918419839684, - "learning_rate": 2.4525244900767047e-06, - "loss": 1.0327, - "step": 3697 - }, - { - "epoch": 0.4446582095833584, - "grad_norm": 0.830142819744531, - "learning_rate": 2.4517656856459536e-06, - "loss": 0.8634, - "step": 3698 - }, - { - "epoch": 0.4447784524739975, - "grad_norm": 1.7945658566264797, - "learning_rate": 2.4510068126827073e-06, - "loss": 0.911, - "step": 3699 - }, - { - "epoch": 0.44489869536463655, - "grad_norm": 2.0586126279598718, - "learning_rate": 2.450247871302086e-06, - "loss": 1.0511, - "step": 3700 - }, - { - "epoch": 0.44501893825527566, - "grad_norm": 2.2625840398028116, - "learning_rate": 2.44948886161922e-06, - "loss": 1.074, - "step": 3701 - }, - { - "epoch": 0.4451391811459148, - "grad_norm": 1.7019252225853212, - "learning_rate": 2.4487297837492524e-06, - "loss": 1.0778, - "step": 3702 - }, - { - "epoch": 0.44525942403655383, - "grad_norm": 2.0289326286731, - "learning_rate": 2.4479706378073323e-06, - "loss": 0.851, - "step": 3703 - }, - { - "epoch": 0.44537966692719294, - "grad_norm": 1.5208833067883385, - "learning_rate": 2.447211423908623e-06, - "loss": 1.0692, - "step": 3704 - }, - { - "epoch": 0.445499909817832, - "grad_norm": 2.7890514726180373, - "learning_rate": 2.4464521421682966e-06, - "loss": 0.9773, - "step": 3705 - }, - { - "epoch": 0.4456201527084711, - "grad_norm": 1.3740551910310108, - "learning_rate": 2.4456927927015345e-06, - "loss": 1.1041, - "step": 3706 - }, - { - "epoch": 0.4457403955991102, - "grad_norm": 2.062974123934411, - "learning_rate": 2.4449333756235307e-06, - "loss": 0.987, - "step": 3707 - }, - { - "epoch": 0.4458606384897493, - "grad_norm": 2.2859306914201407, - "learning_rate": 2.4441738910494876e-06, - "loss": 1.0232, - "step": 3708 - }, - { - "epoch": 0.4459808813803884, - "grad_norm": 2.148421519826933, - "learning_rate": 2.4434143390946176e-06, - "loss": 1.0511, - "step": 3709 - }, - { - "epoch": 0.4461011242710275, - "grad_norm": 1.9054713300411437, - "learning_rate": 2.4426547198741457e-06, - "loss": 1.0803, - "step": 3710 - }, - { - "epoch": 0.44622136716166655, - "grad_norm": 2.1823668061084067, - "learning_rate": 2.441895033503305e-06, - "loss": 0.9835, - "step": 3711 - }, - { - "epoch": 0.44634161005230566, - "grad_norm": 1.6963436664759564, - "learning_rate": 2.4411352800973375e-06, - "loss": 1.0549, - "step": 3712 - }, - { - "epoch": 0.44646185294294477, - "grad_norm": 2.511031227285287, - "learning_rate": 2.4403754597715005e-06, - "loss": 0.982, - "step": 3713 - }, - { - "epoch": 0.4465820958335838, - "grad_norm": 2.058434843991426, - "learning_rate": 2.4396155726410553e-06, - "loss": 1.1544, - "step": 3714 - }, - { - "epoch": 0.44670233872422294, - "grad_norm": 2.501797792614222, - "learning_rate": 2.438855618821278e-06, - "loss": 1.1465, - "step": 3715 - }, - { - "epoch": 0.44682258161486205, - "grad_norm": 1.5501873887522701, - "learning_rate": 2.4380955984274517e-06, - "loss": 0.9039, - "step": 3716 - }, - { - "epoch": 0.4469428245055011, - "grad_norm": 2.084850814019442, - "learning_rate": 2.4373355115748716e-06, - "loss": 1.0016, - "step": 3717 - }, - { - "epoch": 0.4470630673961402, - "grad_norm": 1.6022800233872814, - "learning_rate": 2.436575358378842e-06, - "loss": 0.9496, - "step": 3718 - }, - { - "epoch": 0.44718331028677927, - "grad_norm": 3.7212167460448904, - "learning_rate": 2.4358151389546782e-06, - "loss": 1.0609, - "step": 3719 - }, - { - "epoch": 0.4473035531774184, - "grad_norm": 2.576652800097871, - "learning_rate": 2.4350548534177035e-06, - "loss": 0.9886, - "step": 3720 - }, - { - "epoch": 0.4474237960680575, - "grad_norm": 1.615167655627778, - "learning_rate": 2.434294501883254e-06, - "loss": 0.9017, - "step": 3721 - }, - { - "epoch": 0.44754403895869654, - "grad_norm": 2.109871134868078, - "learning_rate": 2.433534084466674e-06, - "loss": 0.8917, - "step": 3722 - }, - { - "epoch": 0.44766428184933565, - "grad_norm": 1.8219850902763868, - "learning_rate": 2.4327736012833178e-06, - "loss": 0.9438, - "step": 3723 - }, - { - "epoch": 0.44778452473997477, - "grad_norm": 2.161695376960176, - "learning_rate": 2.4320130524485506e-06, - "loss": 0.9943, - "step": 3724 - }, - { - "epoch": 0.4479047676306138, - "grad_norm": 1.442435292688424, - "learning_rate": 2.431252438077746e-06, - "loss": 1.0283, - "step": 3725 - }, - { - "epoch": 0.44802501052125293, - "grad_norm": 6.371077883084933, - "learning_rate": 2.4304917582862906e-06, - "loss": 1.0017, - "step": 3726 - }, - { - "epoch": 0.44814525341189204, - "grad_norm": 3.4703540014596173, - "learning_rate": 2.4297310131895774e-06, - "loss": 1.1146, - "step": 3727 - }, - { - "epoch": 0.4482654963025311, - "grad_norm": 2.284082509322575, - "learning_rate": 2.4289702029030113e-06, - "loss": 0.9878, - "step": 3728 - }, - { - "epoch": 0.4483857391931702, - "grad_norm": 14.861501002124005, - "learning_rate": 2.4282093275420057e-06, - "loss": 1.0495, - "step": 3729 - }, - { - "epoch": 0.4485059820838093, - "grad_norm": 1.9106291827105208, - "learning_rate": 2.4274483872219863e-06, - "loss": 0.9307, - "step": 3730 - }, - { - "epoch": 0.4486262249744484, - "grad_norm": 1.7396193936506987, - "learning_rate": 2.426687382058386e-06, - "loss": 1.1554, - "step": 3731 - }, - { - "epoch": 0.4487464678650875, - "grad_norm": 0.9632208143892261, - "learning_rate": 2.425926312166649e-06, - "loss": 0.8502, - "step": 3732 - }, - { - "epoch": 0.4488667107557266, - "grad_norm": 2.054791582031413, - "learning_rate": 2.42516517766223e-06, - "loss": 0.9509, - "step": 3733 - }, - { - "epoch": 0.44898695364636565, - "grad_norm": 1.8502969400414908, - "learning_rate": 2.4244039786605907e-06, - "loss": 0.9075, - "step": 3734 - }, - { - "epoch": 0.44910719653700476, - "grad_norm": 5.42681659572603, - "learning_rate": 2.4236427152772055e-06, - "loss": 1.0516, - "step": 3735 - }, - { - "epoch": 0.4492274394276438, - "grad_norm": 0.9179136868458948, - "learning_rate": 2.422881387627557e-06, - "loss": 0.8406, - "step": 3736 - }, - { - "epoch": 0.4493476823182829, - "grad_norm": 1.5908266070892962, - "learning_rate": 2.422119995827139e-06, - "loss": 0.9952, - "step": 3737 - }, - { - "epoch": 0.44946792520892204, - "grad_norm": 2.35534687897305, - "learning_rate": 2.4213585399914528e-06, - "loss": 0.9675, - "step": 3738 - }, - { - "epoch": 0.4495881680995611, - "grad_norm": 1.7838767242861686, - "learning_rate": 2.4205970202360113e-06, - "loss": 1.0764, - "step": 3739 - }, - { - "epoch": 0.4497084109902002, - "grad_norm": 1.905243279449619, - "learning_rate": 2.4198354366763354e-06, - "loss": 1.0132, - "step": 3740 - }, - { - "epoch": 0.4498286538808393, - "grad_norm": 2.190402802477953, - "learning_rate": 2.4190737894279587e-06, - "loss": 1.01, - "step": 3741 - }, - { - "epoch": 0.44994889677147837, - "grad_norm": 2.238047322936257, - "learning_rate": 2.4183120786064203e-06, - "loss": 1.0291, - "step": 3742 - }, - { - "epoch": 0.4500691396621175, - "grad_norm": 3.2042472548120724, - "learning_rate": 2.417550304327273e-06, - "loss": 1.0771, - "step": 3743 - }, - { - "epoch": 0.4501893825527566, - "grad_norm": 1.497834709093557, - "learning_rate": 2.4167884667060763e-06, - "loss": 0.9865, - "step": 3744 - }, - { - "epoch": 0.45030962544339564, - "grad_norm": 2.0331741609443323, - "learning_rate": 2.4160265658584e-06, - "loss": 1.098, - "step": 3745 - }, - { - "epoch": 0.45042986833403476, - "grad_norm": 14.04340283143432, - "learning_rate": 2.4152646018998253e-06, - "loss": 0.9094, - "step": 3746 - }, - { - "epoch": 0.45055011122467387, - "grad_norm": 1.775724977540671, - "learning_rate": 2.4145025749459403e-06, - "loss": 0.9425, - "step": 3747 - }, - { - "epoch": 0.4506703541153129, - "grad_norm": 2.151085016461896, - "learning_rate": 2.413740485112344e-06, - "loss": 0.9304, - "step": 3748 - }, - { - "epoch": 0.45079059700595203, - "grad_norm": 1.7479499662165283, - "learning_rate": 2.412978332514646e-06, - "loss": 1.0511, - "step": 3749 - }, - { - "epoch": 0.4509108398965911, - "grad_norm": 2.40866225569288, - "learning_rate": 2.4122161172684623e-06, - "loss": 0.9473, - "step": 3750 - }, - { - "epoch": 0.4510310827872302, - "grad_norm": 2.056450381269575, - "learning_rate": 2.4114538394894216e-06, - "loss": 1.0624, - "step": 3751 - }, - { - "epoch": 0.4511513256778693, - "grad_norm": 1.7899313070659701, - "learning_rate": 2.410691499293161e-06, - "loss": 1.0595, - "step": 3752 - }, - { - "epoch": 0.45127156856850836, - "grad_norm": 1.5674882989960799, - "learning_rate": 2.409929096795326e-06, - "loss": 0.9701, - "step": 3753 - }, - { - "epoch": 0.4513918114591475, - "grad_norm": 1.7183921441424292, - "learning_rate": 2.409166632111573e-06, - "loss": 1.0199, - "step": 3754 - }, - { - "epoch": 0.4515120543497866, - "grad_norm": 1.8237094208959452, - "learning_rate": 2.4084041053575674e-06, - "loss": 1.0239, - "step": 3755 - }, - { - "epoch": 0.45163229724042564, - "grad_norm": 1.8189182426529815, - "learning_rate": 2.4076415166489834e-06, - "loss": 0.9494, - "step": 3756 - }, - { - "epoch": 0.45175254013106475, - "grad_norm": 1.7552277274338473, - "learning_rate": 2.406878866101506e-06, - "loss": 1.0281, - "step": 3757 - }, - { - "epoch": 0.45187278302170386, - "grad_norm": 3.421967108893384, - "learning_rate": 2.4061161538308273e-06, - "loss": 1.0129, - "step": 3758 - }, - { - "epoch": 0.4519930259123429, - "grad_norm": 1.7734564557511856, - "learning_rate": 2.4053533799526523e-06, - "loss": 1.1148, - "step": 3759 - }, - { - "epoch": 0.452113268802982, - "grad_norm": 2.3033354333112595, - "learning_rate": 2.404590544582691e-06, - "loss": 1.089, - "step": 3760 - }, - { - "epoch": 0.45223351169362114, - "grad_norm": 2.239264626706815, - "learning_rate": 2.403827647836666e-06, - "loss": 1.0349, - "step": 3761 - }, - { - "epoch": 0.4523537545842602, - "grad_norm": 4.179343179085601, - "learning_rate": 2.4030646898303075e-06, - "loss": 0.9247, - "step": 3762 - }, - { - "epoch": 0.4524739974748993, - "grad_norm": 2.081435241703492, - "learning_rate": 2.4023016706793566e-06, - "loss": 1.0586, - "step": 3763 - }, - { - "epoch": 0.4525942403655384, - "grad_norm": 0.8450669854039937, - "learning_rate": 2.401538590499561e-06, - "loss": 0.8351, - "step": 3764 - }, - { - "epoch": 0.45271448325617747, - "grad_norm": 2.4248842310062884, - "learning_rate": 2.400775449406682e-06, - "loss": 0.9352, - "step": 3765 - }, - { - "epoch": 0.4528347261468166, - "grad_norm": 2.5293786937779967, - "learning_rate": 2.400012247516485e-06, - "loss": 0.9607, - "step": 3766 - }, - { - "epoch": 0.45295496903745563, - "grad_norm": 2.2512312804973624, - "learning_rate": 2.3992489849447484e-06, - "loss": 1.125, - "step": 3767 - }, - { - "epoch": 0.45307521192809475, - "grad_norm": 1.5718235318382527, - "learning_rate": 2.3984856618072584e-06, - "loss": 1.0153, - "step": 3768 - }, - { - "epoch": 0.45319545481873386, - "grad_norm": 1.7717877078882025, - "learning_rate": 2.3977222782198098e-06, - "loss": 0.9625, - "step": 3769 - }, - { - "epoch": 0.4533156977093729, - "grad_norm": 1.8870826512416428, - "learning_rate": 2.3969588342982077e-06, - "loss": 0.9819, - "step": 3770 - }, - { - "epoch": 0.453435940600012, - "grad_norm": 1.6844092427525779, - "learning_rate": 2.396195330158267e-06, - "loss": 0.953, - "step": 3771 - }, - { - "epoch": 0.45355618349065113, - "grad_norm": 4.280588363421616, - "learning_rate": 2.3954317659158094e-06, - "loss": 1.0174, - "step": 3772 - }, - { - "epoch": 0.4536764263812902, - "grad_norm": 0.9298305241178296, - "learning_rate": 2.394668141686667e-06, - "loss": 0.8547, - "step": 3773 - }, - { - "epoch": 0.4537966692719293, - "grad_norm": 2.08368804455091, - "learning_rate": 2.3939044575866813e-06, - "loss": 0.9166, - "step": 3774 - }, - { - "epoch": 0.4539169121625684, - "grad_norm": 2.031935121461704, - "learning_rate": 2.3931407137317024e-06, - "loss": 0.9848, - "step": 3775 - }, - { - "epoch": 0.45403715505320746, - "grad_norm": 1.7959680673398453, - "learning_rate": 2.3923769102375907e-06, - "loss": 1.0799, - "step": 3776 - }, - { - "epoch": 0.4541573979438466, - "grad_norm": 2.251080889156395, - "learning_rate": 2.391613047220213e-06, - "loss": 1.0219, - "step": 3777 - }, - { - "epoch": 0.4542776408344857, - "grad_norm": 1.8155999475743432, - "learning_rate": 2.390849124795447e-06, - "loss": 1.0209, - "step": 3778 - }, - { - "epoch": 0.45439788372512474, - "grad_norm": 2.3320091668238336, - "learning_rate": 2.3900851430791804e-06, - "loss": 1.07, - "step": 3779 - }, - { - "epoch": 0.45451812661576385, - "grad_norm": 2.225012417019261, - "learning_rate": 2.389321102187307e-06, - "loss": 1.0831, - "step": 3780 - }, - { - "epoch": 0.4546383695064029, - "grad_norm": 1.6065136555798631, - "learning_rate": 2.3885570022357326e-06, - "loss": 1.0529, - "step": 3781 - }, - { - "epoch": 0.454758612397042, - "grad_norm": 0.8265668136974476, - "learning_rate": 2.38779284334037e-06, - "loss": 0.8594, - "step": 3782 - }, - { - "epoch": 0.4548788552876811, - "grad_norm": 2.234798364611168, - "learning_rate": 2.387028625617141e-06, - "loss": 1.0173, - "step": 3783 - }, - { - "epoch": 0.4549990981783202, - "grad_norm": 1.8730186802369921, - "learning_rate": 2.3862643491819766e-06, - "loss": 1.0828, - "step": 3784 - }, - { - "epoch": 0.4551193410689593, - "grad_norm": 2.0021393777688643, - "learning_rate": 2.3855000141508186e-06, - "loss": 1.0653, - "step": 3785 - }, - { - "epoch": 0.4552395839595984, - "grad_norm": 2.355726844729664, - "learning_rate": 2.3847356206396143e-06, - "loss": 1.0637, - "step": 3786 - }, - { - "epoch": 0.45535982685023746, - "grad_norm": 2.527004968026172, - "learning_rate": 2.3839711687643227e-06, - "loss": 1.0135, - "step": 3787 - }, - { - "epoch": 0.45548006974087657, - "grad_norm": 1.813208671672121, - "learning_rate": 2.38320665864091e-06, - "loss": 0.9659, - "step": 3788 - }, - { - "epoch": 0.4556003126315157, - "grad_norm": 2.25829418284509, - "learning_rate": 2.3824420903853516e-06, - "loss": 1.0449, - "step": 3789 - }, - { - "epoch": 0.45572055552215474, - "grad_norm": 2.0262698818881026, - "learning_rate": 2.3816774641136324e-06, - "loss": 1.0474, - "step": 3790 - }, - { - "epoch": 0.45584079841279385, - "grad_norm": 1.7244857104365574, - "learning_rate": 2.380912779941745e-06, - "loss": 0.953, - "step": 3791 - }, - { - "epoch": 0.45596104130343296, - "grad_norm": 2.0340146544012034, - "learning_rate": 2.3801480379856918e-06, - "loss": 1.0606, - "step": 3792 - }, - { - "epoch": 0.456081284194072, - "grad_norm": 1.6339712457496447, - "learning_rate": 2.379383238361484e-06, - "loss": 1.0635, - "step": 3793 - }, - { - "epoch": 0.4562015270847111, - "grad_norm": 4.7410185377092064, - "learning_rate": 2.3786183811851407e-06, - "loss": 1.0351, - "step": 3794 - }, - { - "epoch": 0.45632176997535023, - "grad_norm": 1.7960913898308153, - "learning_rate": 2.3778534665726892e-06, - "loss": 1.0305, - "step": 3795 - }, - { - "epoch": 0.4564420128659893, - "grad_norm": 1.9720745173142444, - "learning_rate": 2.377088494640168e-06, - "loss": 0.9588, - "step": 3796 - }, - { - "epoch": 0.4565622557566284, - "grad_norm": 1.7277858766990848, - "learning_rate": 2.3763234655036216e-06, - "loss": 1.0083, - "step": 3797 - }, - { - "epoch": 0.45668249864726745, - "grad_norm": 1.7041240176932055, - "learning_rate": 2.3755583792791046e-06, - "loss": 1.0949, - "step": 3798 - }, - { - "epoch": 0.45680274153790656, - "grad_norm": 1.8938579188652866, - "learning_rate": 2.3747932360826803e-06, - "loss": 0.9747, - "step": 3799 - }, - { - "epoch": 0.4569229844285457, - "grad_norm": 1.7714931904195972, - "learning_rate": 2.3740280360304205e-06, - "loss": 1.0452, - "step": 3800 - }, - { - "epoch": 0.45704322731918473, - "grad_norm": 1.63293244865846, - "learning_rate": 2.3732627792384038e-06, - "loss": 0.9115, - "step": 3801 - }, - { - "epoch": 0.45716347020982384, - "grad_norm": 1.7901980864817093, - "learning_rate": 2.3724974658227207e-06, - "loss": 0.9735, - "step": 3802 - }, - { - "epoch": 0.45728371310046295, - "grad_norm": 2.0984880726756847, - "learning_rate": 2.3717320958994687e-06, - "loss": 0.9329, - "step": 3803 - }, - { - "epoch": 0.457403955991102, - "grad_norm": 2.90210748586214, - "learning_rate": 2.3709666695847534e-06, - "loss": 0.9289, - "step": 3804 - }, - { - "epoch": 0.4575241988817411, - "grad_norm": 1.834798126550071, - "learning_rate": 2.370201186994689e-06, - "loss": 0.9359, - "step": 3805 - }, - { - "epoch": 0.45764444177238023, - "grad_norm": 1.9692652194684581, - "learning_rate": 2.369435648245399e-06, - "loss": 0.9263, - "step": 3806 - }, - { - "epoch": 0.4577646846630193, - "grad_norm": 2.8516842332748893, - "learning_rate": 2.368670053453015e-06, - "loss": 1.0769, - "step": 3807 - }, - { - "epoch": 0.4578849275536584, - "grad_norm": 2.061317451627249, - "learning_rate": 2.3679044027336757e-06, - "loss": 0.9655, - "step": 3808 - }, - { - "epoch": 0.4580051704442975, - "grad_norm": 2.5726611876708234, - "learning_rate": 2.3671386962035326e-06, - "loss": 0.9159, - "step": 3809 - }, - { - "epoch": 0.45812541333493656, - "grad_norm": 2.191564544019694, - "learning_rate": 2.3663729339787405e-06, - "loss": 0.9195, - "step": 3810 - }, - { - "epoch": 0.45824565622557567, - "grad_norm": 2.4785850761669175, - "learning_rate": 2.365607116175466e-06, - "loss": 0.9535, - "step": 3811 - }, - { - "epoch": 0.4583658991162148, - "grad_norm": 2.3620958618362953, - "learning_rate": 2.3648412429098825e-06, - "loss": 0.9053, - "step": 3812 - }, - { - "epoch": 0.45848614200685384, - "grad_norm": 1.8300012594750243, - "learning_rate": 2.364075314298172e-06, - "loss": 1.0544, - "step": 3813 - }, - { - "epoch": 0.45860638489749295, - "grad_norm": 1.82307604138453, - "learning_rate": 2.3633093304565267e-06, - "loss": 0.9276, - "step": 3814 - }, - { - "epoch": 0.458726627788132, - "grad_norm": 1.677649392583574, - "learning_rate": 2.3625432915011443e-06, - "loss": 0.8612, - "step": 3815 - }, - { - "epoch": 0.4588468706787711, - "grad_norm": 1.617057021032728, - "learning_rate": 2.3617771975482334e-06, - "loss": 0.8817, - "step": 3816 - }, - { - "epoch": 0.4589671135694102, - "grad_norm": 1.605321724278327, - "learning_rate": 2.3610110487140083e-06, - "loss": 0.9765, - "step": 3817 - }, - { - "epoch": 0.4590873564600493, - "grad_norm": 2.3364540983295763, - "learning_rate": 2.360244845114695e-06, - "loss": 1.0426, - "step": 3818 - }, - { - "epoch": 0.4592075993506884, - "grad_norm": 2.2706325570570596, - "learning_rate": 2.3594785868665245e-06, - "loss": 0.9209, - "step": 3819 - }, - { - "epoch": 0.4593278422413275, - "grad_norm": 2.047456964359579, - "learning_rate": 2.3587122740857386e-06, - "loss": 1.0325, - "step": 3820 - }, - { - "epoch": 0.45944808513196655, - "grad_norm": 1.5707305048197677, - "learning_rate": 2.357945906888586e-06, - "loss": 1.0151, - "step": 3821 - }, - { - "epoch": 0.45956832802260567, - "grad_norm": 2.1290863240929085, - "learning_rate": 2.357179485391324e-06, - "loss": 1.0253, - "step": 3822 - }, - { - "epoch": 0.4596885709132448, - "grad_norm": 1.773263258923846, - "learning_rate": 2.3564130097102173e-06, - "loss": 1.0867, - "step": 3823 - }, - { - "epoch": 0.45980881380388383, - "grad_norm": 1.7532136564906102, - "learning_rate": 2.355646479961541e-06, - "loss": 0.9666, - "step": 3824 - }, - { - "epoch": 0.45992905669452294, - "grad_norm": 2.0853707332932143, - "learning_rate": 2.354879896261576e-06, - "loss": 0.9493, - "step": 3825 - }, - { - "epoch": 0.46004929958516205, - "grad_norm": 1.6805561746993734, - "learning_rate": 2.3541132587266133e-06, - "loss": 0.8015, - "step": 3826 - }, - { - "epoch": 0.4601695424758011, - "grad_norm": 2.037050738272523, - "learning_rate": 2.3533465674729515e-06, - "loss": 0.9246, - "step": 3827 - }, - { - "epoch": 0.4602897853664402, - "grad_norm": 2.790225430340337, - "learning_rate": 2.352579822616895e-06, - "loss": 0.9572, - "step": 3828 - }, - { - "epoch": 0.4604100282570793, - "grad_norm": 1.6535545584251519, - "learning_rate": 2.351813024274761e-06, - "loss": 1.0101, - "step": 3829 - }, - { - "epoch": 0.4605302711477184, - "grad_norm": 2.2664404092323, - "learning_rate": 2.3510461725628693e-06, - "loss": 0.9597, - "step": 3830 - }, - { - "epoch": 0.4606505140383575, - "grad_norm": 1.6228883668181138, - "learning_rate": 2.350279267597554e-06, - "loss": 0.9328, - "step": 3831 - }, - { - "epoch": 0.46077075692899655, - "grad_norm": 3.461793126413745, - "learning_rate": 2.3495123094951515e-06, - "loss": 1.0537, - "step": 3832 - }, - { - "epoch": 0.46089099981963566, - "grad_norm": 1.9960722502205641, - "learning_rate": 2.34874529837201e-06, - "loss": 0.984, - "step": 3833 - }, - { - "epoch": 0.46101124271027477, - "grad_norm": 2.1432281036806926, - "learning_rate": 2.347978234344483e-06, - "loss": 1.0205, - "step": 3834 - }, - { - "epoch": 0.4611314856009138, - "grad_norm": 1.9577797925794156, - "learning_rate": 2.347211117528935e-06, - "loss": 0.9247, - "step": 3835 - }, - { - "epoch": 0.46125172849155294, - "grad_norm": 1.5891428159148502, - "learning_rate": 2.3464439480417374e-06, - "loss": 0.9466, - "step": 3836 - }, - { - "epoch": 0.46137197138219205, - "grad_norm": 2.730082603294814, - "learning_rate": 2.3456767259992676e-06, - "loss": 1.0021, - "step": 3837 - }, - { - "epoch": 0.4614922142728311, - "grad_norm": 2.4621433755014914, - "learning_rate": 2.3449094515179135e-06, - "loss": 1.1097, - "step": 3838 - }, - { - "epoch": 0.4616124571634702, - "grad_norm": 1.5397191849350425, - "learning_rate": 2.34414212471407e-06, - "loss": 1.0424, - "step": 3839 - }, - { - "epoch": 0.4617327000541093, - "grad_norm": 2.08618002635342, - "learning_rate": 2.3433747457041394e-06, - "loss": 0.9584, - "step": 3840 - }, - { - "epoch": 0.4618529429447484, - "grad_norm": 1.5699201254966615, - "learning_rate": 2.342607314604533e-06, - "loss": 1.0729, - "step": 3841 - }, - { - "epoch": 0.4619731858353875, - "grad_norm": 2.1953738601091692, - "learning_rate": 2.3418398315316694e-06, - "loss": 1.074, - "step": 3842 - }, - { - "epoch": 0.4620934287260266, - "grad_norm": 2.21193238644452, - "learning_rate": 2.3410722966019755e-06, - "loss": 1.0094, - "step": 3843 - }, - { - "epoch": 0.46221367161666566, - "grad_norm": 1.5407337080728436, - "learning_rate": 2.3403047099318848e-06, - "loss": 0.889, - "step": 3844 - }, - { - "epoch": 0.46233391450730477, - "grad_norm": 2.963476858564541, - "learning_rate": 2.3395370716378405e-06, - "loss": 0.9775, - "step": 3845 - }, - { - "epoch": 0.4624541573979438, - "grad_norm": 1.9540724020578488, - "learning_rate": 2.338769381836292e-06, - "loss": 0.9536, - "step": 3846 - }, - { - "epoch": 0.46257440028858293, - "grad_norm": 1.8413590915161828, - "learning_rate": 2.3380016406436984e-06, - "loss": 0.9649, - "step": 3847 - }, - { - "epoch": 0.46269464317922204, - "grad_norm": 1.7965088844493693, - "learning_rate": 2.337233848176524e-06, - "loss": 1.0451, - "step": 3848 - }, - { - "epoch": 0.4628148860698611, - "grad_norm": 1.9471733048420101, - "learning_rate": 2.3364660045512435e-06, - "loss": 1.0501, - "step": 3849 - }, - { - "epoch": 0.4629351289605002, - "grad_norm": 0.7711641443773447, - "learning_rate": 2.335698109884337e-06, - "loss": 0.859, - "step": 3850 - }, - { - "epoch": 0.4630553718511393, - "grad_norm": 0.8512115805382212, - "learning_rate": 2.334930164292294e-06, - "loss": 0.8852, - "step": 3851 - }, - { - "epoch": 0.4631756147417784, - "grad_norm": 1.9629249987273232, - "learning_rate": 2.334162167891612e-06, - "loss": 1.0222, - "step": 3852 - }, - { - "epoch": 0.4632958576324175, - "grad_norm": 1.9755149749365937, - "learning_rate": 2.333394120798795e-06, - "loss": 0.9694, - "step": 3853 - }, - { - "epoch": 0.4634161005230566, - "grad_norm": 2.298809871422857, - "learning_rate": 2.3326260231303545e-06, - "loss": 0.9524, - "step": 3854 - }, - { - "epoch": 0.46353634341369565, - "grad_norm": 1.7253318301601632, - "learning_rate": 2.331857875002811e-06, - "loss": 1.1039, - "step": 3855 - }, - { - "epoch": 0.46365658630433476, - "grad_norm": 1.5963224705530759, - "learning_rate": 2.3310896765326916e-06, - "loss": 0.9871, - "step": 3856 - }, - { - "epoch": 0.46377682919497387, - "grad_norm": 1.7374235973437957, - "learning_rate": 2.330321427836531e-06, - "loss": 1.0667, - "step": 3857 - }, - { - "epoch": 0.4638970720856129, - "grad_norm": 2.06319704703674, - "learning_rate": 2.3295531290308733e-06, - "loss": 1.0627, - "step": 3858 - }, - { - "epoch": 0.46401731497625204, - "grad_norm": 3.0638932153604563, - "learning_rate": 2.3287847802322678e-06, - "loss": 0.989, - "step": 3859 - }, - { - "epoch": 0.4641375578668911, - "grad_norm": 1.870475842219705, - "learning_rate": 2.3280163815572723e-06, - "loss": 1.0663, - "step": 3860 - }, - { - "epoch": 0.4642578007575302, - "grad_norm": 2.2468794269544685, - "learning_rate": 2.3272479331224522e-06, - "loss": 0.994, - "step": 3861 - }, - { - "epoch": 0.4643780436481693, - "grad_norm": 1.8682059858117823, - "learning_rate": 2.3264794350443817e-06, - "loss": 1.0081, - "step": 3862 - }, - { - "epoch": 0.46449828653880837, - "grad_norm": 1.702459572803607, - "learning_rate": 2.3257108874396396e-06, - "loss": 1.0154, - "step": 3863 - }, - { - "epoch": 0.4646185294294475, - "grad_norm": 3.2755018475823863, - "learning_rate": 2.3249422904248152e-06, - "loss": 0.9688, - "step": 3864 - }, - { - "epoch": 0.4647387723200866, - "grad_norm": 1.3494873875031776, - "learning_rate": 2.324173644116504e-06, - "loss": 1.0996, - "step": 3865 - }, - { - "epoch": 0.46485901521072565, - "grad_norm": 4.6693048967039825, - "learning_rate": 2.3234049486313087e-06, - "loss": 1.047, - "step": 3866 - }, - { - "epoch": 0.46497925810136476, - "grad_norm": 2.2568022371794205, - "learning_rate": 2.322636204085839e-06, - "loss": 0.9992, - "step": 3867 - }, - { - "epoch": 0.46509950099200387, - "grad_norm": 2.330199066630255, - "learning_rate": 2.3218674105967143e-06, - "loss": 1.0136, - "step": 3868 - }, - { - "epoch": 0.4652197438826429, - "grad_norm": 1.495608736707783, - "learning_rate": 2.3210985682805593e-06, - "loss": 1.0694, - "step": 3869 - }, - { - "epoch": 0.46533998677328203, - "grad_norm": 2.530700718186668, - "learning_rate": 2.320329677254007e-06, - "loss": 0.914, - "step": 3870 - }, - { - "epoch": 0.46546022966392114, - "grad_norm": 2.790345994558407, - "learning_rate": 2.319560737633697e-06, - "loss": 0.9573, - "step": 3871 - }, - { - "epoch": 0.4655804725545602, - "grad_norm": 1.6212050801850524, - "learning_rate": 2.3187917495362775e-06, - "loss": 0.9138, - "step": 3872 - }, - { - "epoch": 0.4657007154451993, - "grad_norm": 3.410128424007679, - "learning_rate": 2.318022713078403e-06, - "loss": 0.9899, - "step": 3873 - }, - { - "epoch": 0.4658209583358384, - "grad_norm": 2.07217473182202, - "learning_rate": 2.3172536283767354e-06, - "loss": 1.0717, - "step": 3874 - }, - { - "epoch": 0.4659412012264775, - "grad_norm": 2.4323148106870036, - "learning_rate": 2.3164844955479447e-06, - "loss": 1.042, - "step": 3875 - }, - { - "epoch": 0.4660614441171166, - "grad_norm": 1.7833039337777088, - "learning_rate": 2.3157153147087082e-06, - "loss": 0.934, - "step": 3876 - }, - { - "epoch": 0.46618168700775564, - "grad_norm": 1.814733050447866, - "learning_rate": 2.314946085975709e-06, - "loss": 1.0692, - "step": 3877 - }, - { - "epoch": 0.46630192989839475, - "grad_norm": 1.7350028336471592, - "learning_rate": 2.3141768094656393e-06, - "loss": 1.0581, - "step": 3878 - }, - { - "epoch": 0.46642217278903386, - "grad_norm": 2.597638627759775, - "learning_rate": 2.3134074852951966e-06, - "loss": 1.0528, - "step": 3879 - }, - { - "epoch": 0.4665424156796729, - "grad_norm": 1.7493879065219788, - "learning_rate": 2.312638113581088e-06, - "loss": 1.0103, - "step": 3880 - }, - { - "epoch": 0.46666265857031203, - "grad_norm": 2.4138647695998894, - "learning_rate": 2.311868694440027e-06, - "loss": 1.0105, - "step": 3881 - }, - { - "epoch": 0.46678290146095114, - "grad_norm": 0.723314470083266, - "learning_rate": 2.3110992279887323e-06, - "loss": 0.8724, - "step": 3882 - }, - { - "epoch": 0.4669031443515902, - "grad_norm": 2.2001503416190262, - "learning_rate": 2.310329714343932e-06, - "loss": 1.0749, - "step": 3883 - }, - { - "epoch": 0.4670233872422293, - "grad_norm": 2.083006782370526, - "learning_rate": 2.309560153622361e-06, - "loss": 1.0432, - "step": 3884 - }, - { - "epoch": 0.4671436301328684, - "grad_norm": 2.327629292602088, - "learning_rate": 2.3087905459407602e-06, - "loss": 0.9714, - "step": 3885 - }, - { - "epoch": 0.46726387302350747, - "grad_norm": 0.8591536843817223, - "learning_rate": 2.3080208914158795e-06, - "loss": 0.9124, - "step": 3886 - }, - { - "epoch": 0.4673841159141466, - "grad_norm": 1.9667822071123235, - "learning_rate": 2.3072511901644753e-06, - "loss": 0.9394, - "step": 3887 - }, - { - "epoch": 0.4675043588047857, - "grad_norm": 2.729834050315283, - "learning_rate": 2.306481442303309e-06, - "loss": 1.0322, - "step": 3888 - }, - { - "epoch": 0.46762460169542475, - "grad_norm": 2.55124200979474, - "learning_rate": 2.3057116479491515e-06, - "loss": 0.9609, - "step": 3889 - }, - { - "epoch": 0.46774484458606386, - "grad_norm": 3.3929866880767907, - "learning_rate": 2.30494180721878e-06, - "loss": 0.9982, - "step": 3890 - }, - { - "epoch": 0.4678650874767029, - "grad_norm": 1.934924475011628, - "learning_rate": 2.3041719202289794e-06, - "loss": 1.1212, - "step": 3891 - }, - { - "epoch": 0.467985330367342, - "grad_norm": 1.6868354760508062, - "learning_rate": 2.30340198709654e-06, - "loss": 1.0291, - "step": 3892 - }, - { - "epoch": 0.46810557325798113, - "grad_norm": 2.147192576989079, - "learning_rate": 2.3026320079382605e-06, - "loss": 0.9788, - "step": 3893 - }, - { - "epoch": 0.4682258161486202, - "grad_norm": 2.4317112188548, - "learning_rate": 2.3018619828709454e-06, - "loss": 0.9912, - "step": 3894 - }, - { - "epoch": 0.4683460590392593, - "grad_norm": 1.9204798030372257, - "learning_rate": 2.3010919120114084e-06, - "loss": 1.0497, - "step": 3895 - }, - { - "epoch": 0.4684663019298984, - "grad_norm": 2.4288456185013456, - "learning_rate": 2.3003217954764672e-06, - "loss": 0.8849, - "step": 3896 - }, - { - "epoch": 0.46858654482053747, - "grad_norm": 2.0781334898352193, - "learning_rate": 2.299551633382949e-06, - "loss": 1.0237, - "step": 3897 - }, - { - "epoch": 0.4687067877111766, - "grad_norm": 1.7289411635219911, - "learning_rate": 2.2987814258476854e-06, - "loss": 1.0768, - "step": 3898 - }, - { - "epoch": 0.4688270306018157, - "grad_norm": 3.595643581247549, - "learning_rate": 2.2980111729875177e-06, - "loss": 0.9032, - "step": 3899 - }, - { - "epoch": 0.46894727349245474, - "grad_norm": 1.5883231424416362, - "learning_rate": 2.2972408749192917e-06, - "loss": 1.05, - "step": 3900 - }, - { - "epoch": 0.46906751638309385, - "grad_norm": 1.887979216719854, - "learning_rate": 2.296470531759861e-06, - "loss": 0.8984, - "step": 3901 - }, - { - "epoch": 0.46918775927373296, - "grad_norm": 1.9831852988553407, - "learning_rate": 2.2957001436260866e-06, - "loss": 1.0299, - "step": 3902 - }, - { - "epoch": 0.469308002164372, - "grad_norm": 1.6392042275129906, - "learning_rate": 2.294929710634836e-06, - "loss": 0.9475, - "step": 3903 - }, - { - "epoch": 0.46942824505501113, - "grad_norm": 2.326994960908962, - "learning_rate": 2.2941592329029823e-06, - "loss": 0.834, - "step": 3904 - }, - { - "epoch": 0.46954848794565024, - "grad_norm": 1.9752240983360498, - "learning_rate": 2.2933887105474067e-06, - "loss": 1.0153, - "step": 3905 - }, - { - "epoch": 0.4696687308362893, - "grad_norm": 2.1766401749420767, - "learning_rate": 2.2926181436849974e-06, - "loss": 1.0348, - "step": 3906 - }, - { - "epoch": 0.4697889737269284, - "grad_norm": 1.7229613987544528, - "learning_rate": 2.2918475324326478e-06, - "loss": 0.9522, - "step": 3907 - }, - { - "epoch": 0.46990921661756746, - "grad_norm": 2.191175370455171, - "learning_rate": 2.2910768769072603e-06, - "loss": 1.1281, - "step": 3908 - }, - { - "epoch": 0.47002945950820657, - "grad_norm": 1.8130792152701787, - "learning_rate": 2.2903061772257417e-06, - "loss": 0.9878, - "step": 3909 - }, - { - "epoch": 0.4701497023988457, - "grad_norm": 1.4099356354571286, - "learning_rate": 2.289535433505007e-06, - "loss": 1.0141, - "step": 3910 - }, - { - "epoch": 0.47026994528948474, - "grad_norm": 1.727689418427757, - "learning_rate": 2.2887646458619767e-06, - "loss": 0.864, - "step": 3911 - }, - { - "epoch": 0.47039018818012385, - "grad_norm": 1.9715931057636675, - "learning_rate": 2.2879938144135797e-06, - "loss": 0.9963, - "step": 3912 - }, - { - "epoch": 0.47051043107076296, - "grad_norm": 2.892581916512961, - "learning_rate": 2.2872229392767496e-06, - "loss": 0.9917, - "step": 3913 - }, - { - "epoch": 0.470630673961402, - "grad_norm": 1.525022884885082, - "learning_rate": 2.286452020568428e-06, - "loss": 0.9795, - "step": 3914 - }, - { - "epoch": 0.4707509168520411, - "grad_norm": 1.6700827716256492, - "learning_rate": 2.2856810584055637e-06, - "loss": 0.9674, - "step": 3915 - }, - { - "epoch": 0.47087115974268023, - "grad_norm": 1.5112679373636622, - "learning_rate": 2.2849100529051085e-06, - "loss": 0.906, - "step": 3916 - }, - { - "epoch": 0.4709914026333193, - "grad_norm": 2.300613474921946, - "learning_rate": 2.284139004184026e-06, - "loss": 1.0268, - "step": 3917 - }, - { - "epoch": 0.4711116455239584, - "grad_norm": 3.240211417753212, - "learning_rate": 2.2833679123592814e-06, - "loss": 0.9666, - "step": 3918 - }, - { - "epoch": 0.4712318884145975, - "grad_norm": 1.6782262078992094, - "learning_rate": 2.2825967775478508e-06, - "loss": 0.8662, - "step": 3919 - }, - { - "epoch": 0.47135213130523657, - "grad_norm": 1.9764533333165435, - "learning_rate": 2.2818255998667135e-06, - "loss": 1.0562, - "step": 3920 - }, - { - "epoch": 0.4714723741958757, - "grad_norm": 1.472655521463472, - "learning_rate": 2.2810543794328566e-06, - "loss": 1.0208, - "step": 3921 - }, - { - "epoch": 0.4715926170865148, - "grad_norm": 1.7095433656113084, - "learning_rate": 2.2802831163632735e-06, - "loss": 1.0499, - "step": 3922 - }, - { - "epoch": 0.47171285997715384, - "grad_norm": 1.57350873435487, - "learning_rate": 2.279511810774965e-06, - "loss": 0.9659, - "step": 3923 - }, - { - "epoch": 0.47183310286779295, - "grad_norm": 2.409086067103236, - "learning_rate": 2.2787404627849364e-06, - "loss": 0.9475, - "step": 3924 - }, - { - "epoch": 0.471953345758432, - "grad_norm": 1.5745176121540452, - "learning_rate": 2.277969072510202e-06, - "loss": 1.0171, - "step": 3925 - }, - { - "epoch": 0.4720735886490711, - "grad_norm": 1.5875282765671126, - "learning_rate": 2.2771976400677803e-06, - "loss": 1.0437, - "step": 3926 - }, - { - "epoch": 0.47219383153971023, - "grad_norm": 1.84456829726981, - "learning_rate": 2.2764261655746965e-06, - "loss": 1.0145, - "step": 3927 - }, - { - "epoch": 0.4723140744303493, - "grad_norm": 1.5503328765778794, - "learning_rate": 2.2756546491479832e-06, - "loss": 0.9867, - "step": 3928 - }, - { - "epoch": 0.4724343173209884, - "grad_norm": 2.8761033967681633, - "learning_rate": 2.274883090904679e-06, - "loss": 1.0423, - "step": 3929 - }, - { - "epoch": 0.4725545602116275, - "grad_norm": 1.98348582613828, - "learning_rate": 2.2741114909618283e-06, - "loss": 0.9031, - "step": 3930 - }, - { - "epoch": 0.47267480310226656, - "grad_norm": 1.7289797057532208, - "learning_rate": 2.2733398494364828e-06, - "loss": 0.9439, - "step": 3931 - }, - { - "epoch": 0.47279504599290567, - "grad_norm": 2.4461363358325356, - "learning_rate": 2.272568166445699e-06, - "loss": 1.0745, - "step": 3932 - }, - { - "epoch": 0.4729152888835448, - "grad_norm": 2.089966729369299, - "learning_rate": 2.271796442106541e-06, - "loss": 0.8788, - "step": 3933 - }, - { - "epoch": 0.47303553177418384, - "grad_norm": 0.8017827168359173, - "learning_rate": 2.271024676536079e-06, - "loss": 0.8416, - "step": 3934 - }, - { - "epoch": 0.47315577466482295, - "grad_norm": 3.1202278320561767, - "learning_rate": 2.2702528698513894e-06, - "loss": 0.9621, - "step": 3935 - }, - { - "epoch": 0.47327601755546206, - "grad_norm": 1.7748541194445948, - "learning_rate": 2.269481022169554e-06, - "loss": 1.0153, - "step": 3936 - }, - { - "epoch": 0.4733962604461011, - "grad_norm": 1.7757884168501232, - "learning_rate": 2.2687091336076614e-06, - "loss": 1.0402, - "step": 3937 - }, - { - "epoch": 0.4735165033367402, - "grad_norm": 1.759679997569915, - "learning_rate": 2.267937204282807e-06, - "loss": 1.0305, - "step": 3938 - }, - { - "epoch": 0.4736367462273793, - "grad_norm": 1.9681542721385887, - "learning_rate": 2.2671652343120926e-06, - "loss": 1.0179, - "step": 3939 - }, - { - "epoch": 0.4737569891180184, - "grad_norm": 1.6026191017298814, - "learning_rate": 2.2663932238126236e-06, - "loss": 1.0322, - "step": 3940 - }, - { - "epoch": 0.4738772320086575, - "grad_norm": 1.482029477180221, - "learning_rate": 2.265621172901515e-06, - "loss": 1.0344, - "step": 3941 - }, - { - "epoch": 0.47399747489929656, - "grad_norm": 2.2714069120363236, - "learning_rate": 2.2648490816958854e-06, - "loss": 0.9482, - "step": 3942 - }, - { - "epoch": 0.47411771778993567, - "grad_norm": 2.2493011660911058, - "learning_rate": 2.264076950312861e-06, - "loss": 0.9538, - "step": 3943 - }, - { - "epoch": 0.4742379606805748, - "grad_norm": 2.2761030268890887, - "learning_rate": 2.2633047788695727e-06, - "loss": 1.0505, - "step": 3944 - }, - { - "epoch": 0.47435820357121383, - "grad_norm": 1.7090802149465736, - "learning_rate": 2.262532567483159e-06, - "loss": 0.8732, - "step": 3945 - }, - { - "epoch": 0.47447844646185294, - "grad_norm": 1.7800658456003395, - "learning_rate": 2.2617603162707635e-06, - "loss": 1.0348, - "step": 3946 - }, - { - "epoch": 0.47459868935249205, - "grad_norm": 1.7751506983224723, - "learning_rate": 2.2609880253495363e-06, - "loss": 1.044, - "step": 3947 - }, - { - "epoch": 0.4747189322431311, - "grad_norm": 3.7181666581844697, - "learning_rate": 2.260215694836633e-06, - "loss": 1.0901, - "step": 3948 - }, - { - "epoch": 0.4748391751337702, - "grad_norm": 11.800284715224102, - "learning_rate": 2.2594433248492157e-06, - "loss": 0.8761, - "step": 3949 - }, - { - "epoch": 0.47495941802440933, - "grad_norm": 1.6698196469074862, - "learning_rate": 2.2586709155044527e-06, - "loss": 1.0257, - "step": 3950 - }, - { - "epoch": 0.4750796609150484, - "grad_norm": 1.50741548203227, - "learning_rate": 2.2578984669195167e-06, - "loss": 0.9889, - "step": 3951 - }, - { - "epoch": 0.4751999038056875, - "grad_norm": 1.7642110378551903, - "learning_rate": 2.2571259792115887e-06, - "loss": 0.8952, - "step": 3952 - }, - { - "epoch": 0.4753201466963266, - "grad_norm": 1.779377672724698, - "learning_rate": 2.2563534524978544e-06, - "loss": 1.0218, - "step": 3953 - }, - { - "epoch": 0.47544038958696566, - "grad_norm": 1.4788060173975033, - "learning_rate": 2.2555808868955052e-06, - "loss": 0.9453, - "step": 3954 - }, - { - "epoch": 0.47556063247760477, - "grad_norm": 2.9803349976131956, - "learning_rate": 2.254808282521738e-06, - "loss": 0.96, - "step": 3955 - }, - { - "epoch": 0.4756808753682438, - "grad_norm": 1.5765302550815121, - "learning_rate": 2.2540356394937573e-06, - "loss": 1.0377, - "step": 3956 - }, - { - "epoch": 0.47580111825888294, - "grad_norm": 2.7147999272823253, - "learning_rate": 2.253262957928772e-06, - "loss": 1.0636, - "step": 3957 - }, - { - "epoch": 0.47592136114952205, - "grad_norm": 3.102122835138794, - "learning_rate": 2.2524902379439976e-06, - "loss": 0.9486, - "step": 3958 - }, - { - "epoch": 0.4760416040401611, - "grad_norm": 0.7571282984961043, - "learning_rate": 2.251717479656655e-06, - "loss": 0.8874, - "step": 3959 - }, - { - "epoch": 0.4761618469308002, - "grad_norm": 2.3793508931555283, - "learning_rate": 2.2509446831839704e-06, - "loss": 0.9907, - "step": 3960 - }, - { - "epoch": 0.4762820898214393, - "grad_norm": 2.379357406354769, - "learning_rate": 2.250171848643177e-06, - "loss": 1.0447, - "step": 3961 - }, - { - "epoch": 0.4764023327120784, - "grad_norm": 1.7670965898168411, - "learning_rate": 2.249398976151513e-06, - "loss": 1.1004, - "step": 3962 - }, - { - "epoch": 0.4765225756027175, - "grad_norm": 2.743278351654192, - "learning_rate": 2.248626065826223e-06, - "loss": 1.0243, - "step": 3963 - }, - { - "epoch": 0.4766428184933566, - "grad_norm": 0.765350370393034, - "learning_rate": 2.2478531177845564e-06, - "loss": 0.873, - "step": 3964 - }, - { - "epoch": 0.47676306138399566, - "grad_norm": 1.9288543261572775, - "learning_rate": 2.247080132143769e-06, - "loss": 1.079, - "step": 3965 - }, - { - "epoch": 0.47688330427463477, - "grad_norm": 3.0557260138234934, - "learning_rate": 2.246307109021121e-06, - "loss": 0.9193, - "step": 3966 - }, - { - "epoch": 0.4770035471652739, - "grad_norm": 1.6056726330870734, - "learning_rate": 2.2455340485338817e-06, - "loss": 1.0501, - "step": 3967 - }, - { - "epoch": 0.47712379005591293, - "grad_norm": 2.6019806396972154, - "learning_rate": 2.244760950799322e-06, - "loss": 0.9082, - "step": 3968 - }, - { - "epoch": 0.47724403294655204, - "grad_norm": 2.0550319832496418, - "learning_rate": 2.2439878159347203e-06, - "loss": 0.956, - "step": 3969 - }, - { - "epoch": 0.4773642758371911, - "grad_norm": 0.8444236962135532, - "learning_rate": 2.2432146440573616e-06, - "loss": 0.8567, - "step": 3970 - }, - { - "epoch": 0.4774845187278302, - "grad_norm": 2.280806302333328, - "learning_rate": 2.242441435284534e-06, - "loss": 0.892, - "step": 3971 - }, - { - "epoch": 0.4776047616184693, - "grad_norm": 2.0015449517200716, - "learning_rate": 2.2416681897335337e-06, - "loss": 1.0804, - "step": 3972 - }, - { - "epoch": 0.4777250045091084, - "grad_norm": 2.1664921861477278, - "learning_rate": 2.240894907521661e-06, - "loss": 0.895, - "step": 3973 - }, - { - "epoch": 0.4778452473997475, - "grad_norm": 1.8928099058277377, - "learning_rate": 2.240121588766223e-06, - "loss": 0.8702, - "step": 3974 - }, - { - "epoch": 0.4779654902903866, - "grad_norm": 1.7528565117908923, - "learning_rate": 2.239348233584531e-06, - "loss": 0.9337, - "step": 3975 - }, - { - "epoch": 0.47808573318102565, - "grad_norm": 1.721617647244365, - "learning_rate": 2.2385748420939013e-06, - "loss": 1.0352, - "step": 3976 - }, - { - "epoch": 0.47820597607166476, - "grad_norm": 1.7004913124999903, - "learning_rate": 2.2378014144116583e-06, - "loss": 0.9529, - "step": 3977 - }, - { - "epoch": 0.4783262189623039, - "grad_norm": 2.0475391737249455, - "learning_rate": 2.23702795065513e-06, - "loss": 1.0252, - "step": 3978 - }, - { - "epoch": 0.47844646185294293, - "grad_norm": 0.979894158136274, - "learning_rate": 2.2362544509416493e-06, - "loss": 0.9375, - "step": 3979 - }, - { - "epoch": 0.47856670474358204, - "grad_norm": 2.06947015864407, - "learning_rate": 2.2354809153885572e-06, - "loss": 1.0528, - "step": 3980 - }, - { - "epoch": 0.47868694763422115, - "grad_norm": 3.8633265478180436, - "learning_rate": 2.234707344113197e-06, - "loss": 1.0548, - "step": 3981 - }, - { - "epoch": 0.4788071905248602, - "grad_norm": 2.179674921033652, - "learning_rate": 2.233933737232919e-06, - "loss": 1.0115, - "step": 3982 - }, - { - "epoch": 0.4789274334154993, - "grad_norm": 1.7244592344222238, - "learning_rate": 2.2331600948650793e-06, - "loss": 1.0112, - "step": 3983 - }, - { - "epoch": 0.4790476763061384, - "grad_norm": 1.5456923770988786, - "learning_rate": 2.2323864171270386e-06, - "loss": 1.0291, - "step": 3984 - }, - { - "epoch": 0.4791679191967775, - "grad_norm": 1.7653389758128397, - "learning_rate": 2.231612704136164e-06, - "loss": 0.953, - "step": 3985 - }, - { - "epoch": 0.4792881620874166, - "grad_norm": 3.9041438413839606, - "learning_rate": 2.2308389560098253e-06, - "loss": 0.9765, - "step": 3986 - }, - { - "epoch": 0.47940840497805565, - "grad_norm": 2.1655423719708247, - "learning_rate": 2.2300651728654008e-06, - "loss": 0.9912, - "step": 3987 - }, - { - "epoch": 0.47952864786869476, - "grad_norm": 0.7505197710766093, - "learning_rate": 2.229291354820272e-06, - "loss": 0.8666, - "step": 3988 - }, - { - "epoch": 0.47964889075933387, - "grad_norm": 2.770260993147362, - "learning_rate": 2.228517501991828e-06, - "loss": 1.0018, - "step": 3989 - }, - { - "epoch": 0.4797691336499729, - "grad_norm": 5.215016931522786, - "learning_rate": 2.22774361449746e-06, - "loss": 0.8726, - "step": 3990 - }, - { - "epoch": 0.47988937654061203, - "grad_norm": 3.79210950899341, - "learning_rate": 2.2269696924545668e-06, - "loss": 0.9335, - "step": 3991 - }, - { - "epoch": 0.48000961943125114, - "grad_norm": 2.1761210260183486, - "learning_rate": 2.2261957359805523e-06, - "loss": 1.0038, - "step": 3992 - }, - { - "epoch": 0.4801298623218902, - "grad_norm": 2.043095484462332, - "learning_rate": 2.225421745192823e-06, - "loss": 0.9791, - "step": 3993 - }, - { - "epoch": 0.4802501052125293, - "grad_norm": 2.0121168020559757, - "learning_rate": 2.2246477202087955e-06, - "loss": 1.0067, - "step": 3994 - }, - { - "epoch": 0.4803703481031684, - "grad_norm": 1.508333659655007, - "learning_rate": 2.223873661145887e-06, - "loss": 1.0582, - "step": 3995 - }, - { - "epoch": 0.4804905909938075, - "grad_norm": 1.6041435471226977, - "learning_rate": 2.2230995681215226e-06, - "loss": 0.9444, - "step": 3996 - }, - { - "epoch": 0.4806108338844466, - "grad_norm": 2.228686902002931, - "learning_rate": 2.2223254412531305e-06, - "loss": 1.0158, - "step": 3997 - }, - { - "epoch": 0.4807310767750857, - "grad_norm": 1.678019276277492, - "learning_rate": 2.221551280658146e-06, - "loss": 1.0505, - "step": 3998 - }, - { - "epoch": 0.48085131966572475, - "grad_norm": 1.6081370805508246, - "learning_rate": 2.2207770864540085e-06, - "loss": 0.9703, - "step": 3999 - }, - { - "epoch": 0.48097156255636386, - "grad_norm": 2.3203678830919294, - "learning_rate": 2.220002858758162e-06, - "loss": 0.9522, - "step": 4000 - }, - { - "epoch": 0.481091805447003, - "grad_norm": 0.8385090002169471, - "learning_rate": 2.2192285976880573e-06, - "loss": 0.8584, - "step": 4001 - }, - { - "epoch": 0.48121204833764203, - "grad_norm": 1.9170659381906636, - "learning_rate": 2.2184543033611485e-06, - "loss": 1.0305, - "step": 4002 - }, - { - "epoch": 0.48133229122828114, - "grad_norm": 1.9373368840619336, - "learning_rate": 2.2176799758948957e-06, - "loss": 1.0509, - "step": 4003 - }, - { - "epoch": 0.4814525341189202, - "grad_norm": 2.139488308240981, - "learning_rate": 2.2169056154067635e-06, - "loss": 0.9565, - "step": 4004 - }, - { - "epoch": 0.4815727770095593, - "grad_norm": 1.612670191573451, - "learning_rate": 2.216131222014222e-06, - "loss": 1.0548, - "step": 4005 - }, - { - "epoch": 0.4816930199001984, - "grad_norm": 2.4247081305659486, - "learning_rate": 2.2153567958347455e-06, - "loss": 1.0364, - "step": 4006 - }, - { - "epoch": 0.48181326279083747, - "grad_norm": 1.929055917525636, - "learning_rate": 2.214582336985815e-06, - "loss": 1.0291, - "step": 4007 - }, - { - "epoch": 0.4819335056814766, - "grad_norm": 2.0615310704945142, - "learning_rate": 2.2138078455849142e-06, - "loss": 0.8827, - "step": 4008 - }, - { - "epoch": 0.4820537485721157, - "grad_norm": 2.364362485097727, - "learning_rate": 2.2130333217495334e-06, - "loss": 1.0238, - "step": 4009 - }, - { - "epoch": 0.48217399146275475, - "grad_norm": 3.4463193637535134, - "learning_rate": 2.2122587655971665e-06, - "loss": 0.9024, - "step": 4010 - }, - { - "epoch": 0.48229423435339386, - "grad_norm": 7.936821856027538, - "learning_rate": 2.211484177245314e-06, - "loss": 0.865, - "step": 4011 - }, - { - "epoch": 0.48241447724403297, - "grad_norm": 1.9470651230660168, - "learning_rate": 2.21070955681148e-06, - "loss": 0.9631, - "step": 4012 - }, - { - "epoch": 0.482534720134672, - "grad_norm": 1.638078293350994, - "learning_rate": 2.209934904413174e-06, - "loss": 1.0044, - "step": 4013 - }, - { - "epoch": 0.48265496302531113, - "grad_norm": 1.975884845351222, - "learning_rate": 2.2091602201679095e-06, - "loss": 0.944, - "step": 4014 - }, - { - "epoch": 0.48277520591595025, - "grad_norm": 2.2943007387484706, - "learning_rate": 2.208385504193206e-06, - "loss": 1.0631, - "step": 4015 - }, - { - "epoch": 0.4828954488065893, - "grad_norm": 3.527020647663953, - "learning_rate": 2.2076107566065873e-06, - "loss": 1.0348, - "step": 4016 - }, - { - "epoch": 0.4830156916972284, - "grad_norm": 2.1074426913360242, - "learning_rate": 2.2068359775255816e-06, - "loss": 0.9757, - "step": 4017 - }, - { - "epoch": 0.48313593458786747, - "grad_norm": 2.3100284049529463, - "learning_rate": 2.206061167067723e-06, - "loss": 1.0049, - "step": 4018 - }, - { - "epoch": 0.4832561774785066, - "grad_norm": 1.8642121554699205, - "learning_rate": 2.205286325350549e-06, - "loss": 1.0243, - "step": 4019 - }, - { - "epoch": 0.4833764203691457, - "grad_norm": 2.0943310059583014, - "learning_rate": 2.204511452491603e-06, - "loss": 0.9583, - "step": 4020 - }, - { - "epoch": 0.48349666325978474, - "grad_norm": 1.66309876809404, - "learning_rate": 2.2037365486084316e-06, - "loss": 0.9763, - "step": 4021 - }, - { - "epoch": 0.48361690615042385, - "grad_norm": 1.8019365754391905, - "learning_rate": 2.2029616138185886e-06, - "loss": 1.0095, - "step": 4022 - }, - { - "epoch": 0.48373714904106296, - "grad_norm": 2.5136169569867675, - "learning_rate": 2.202186648239629e-06, - "loss": 1.0529, - "step": 4023 - }, - { - "epoch": 0.483857391931702, - "grad_norm": 1.9885644617334481, - "learning_rate": 2.201411651989117e-06, - "loss": 0.9456, - "step": 4024 - }, - { - "epoch": 0.48397763482234113, - "grad_norm": 1.8650679429183432, - "learning_rate": 2.2006366251846167e-06, - "loss": 1.0088, - "step": 4025 - }, - { - "epoch": 0.48409787771298024, - "grad_norm": 1.6818456934569626, - "learning_rate": 2.1998615679436997e-06, - "loss": 0.9778, - "step": 4026 - }, - { - "epoch": 0.4842181206036193, - "grad_norm": 2.23084305329949, - "learning_rate": 2.199086480383942e-06, - "loss": 0.9979, - "step": 4027 - }, - { - "epoch": 0.4843383634942584, - "grad_norm": 3.2128201276935187, - "learning_rate": 2.1983113626229234e-06, - "loss": 0.8945, - "step": 4028 - }, - { - "epoch": 0.4844586063848975, - "grad_norm": 2.7113533690442106, - "learning_rate": 2.1975362147782293e-06, - "loss": 1.0122, - "step": 4029 - }, - { - "epoch": 0.48457884927553657, - "grad_norm": 0.7815654117937234, - "learning_rate": 2.196761036967448e-06, - "loss": 0.8008, - "step": 4030 - }, - { - "epoch": 0.4846990921661757, - "grad_norm": 1.8470345675089566, - "learning_rate": 2.1959858293081743e-06, - "loss": 1.0095, - "step": 4031 - }, - { - "epoch": 0.4848193350568148, - "grad_norm": 1.7077223995694057, - "learning_rate": 2.1952105919180056e-06, - "loss": 0.996, - "step": 4032 - }, - { - "epoch": 0.48493957794745385, - "grad_norm": 3.6879210150943065, - "learning_rate": 2.1944353249145456e-06, - "loss": 0.9072, - "step": 4033 - }, - { - "epoch": 0.48505982083809296, - "grad_norm": 1.644522696357283, - "learning_rate": 2.193660028415401e-06, - "loss": 0.9788, - "step": 4034 - }, - { - "epoch": 0.485180063728732, - "grad_norm": 1.6304765263341376, - "learning_rate": 2.1928847025381852e-06, - "loss": 1.0496, - "step": 4035 - }, - { - "epoch": 0.4853003066193711, - "grad_norm": 1.9163095169131255, - "learning_rate": 2.192109347400512e-06, - "loss": 1.0679, - "step": 4036 - }, - { - "epoch": 0.48542054951001024, - "grad_norm": 1.6750498066799084, - "learning_rate": 2.191333963120004e-06, - "loss": 1.017, - "step": 4037 - }, - { - "epoch": 0.4855407924006493, - "grad_norm": 2.5391646614062617, - "learning_rate": 2.190558549814286e-06, - "loss": 0.9309, - "step": 4038 - }, - { - "epoch": 0.4856610352912884, - "grad_norm": 1.7532462256638586, - "learning_rate": 2.1897831076009872e-06, - "loss": 1.0233, - "step": 4039 - }, - { - "epoch": 0.4857812781819275, - "grad_norm": 2.7561595599156368, - "learning_rate": 2.1890076365977426e-06, - "loss": 1.0285, - "step": 4040 - }, - { - "epoch": 0.48590152107256657, - "grad_norm": 0.9022632579490006, - "learning_rate": 2.188232136922189e-06, - "loss": 0.7954, - "step": 4041 - }, - { - "epoch": 0.4860217639632057, - "grad_norm": 2.3646435050758523, - "learning_rate": 2.187456608691971e-06, - "loss": 0.9887, - "step": 4042 - }, - { - "epoch": 0.4861420068538448, - "grad_norm": 2.5278627313816897, - "learning_rate": 2.1866810520247334e-06, - "loss": 1.1076, - "step": 4043 - }, - { - "epoch": 0.48626224974448384, - "grad_norm": 1.9072548140988714, - "learning_rate": 2.185905467038129e-06, - "loss": 0.8843, - "step": 4044 - }, - { - "epoch": 0.48638249263512295, - "grad_norm": 1.6894480446094133, - "learning_rate": 2.1851298538498127e-06, - "loss": 1.0054, - "step": 4045 - }, - { - "epoch": 0.48650273552576206, - "grad_norm": 1.7909839423485874, - "learning_rate": 2.184354212577446e-06, - "loss": 1.0215, - "step": 4046 - }, - { - "epoch": 0.4866229784164011, - "grad_norm": 3.1512886560074826, - "learning_rate": 2.1835785433386907e-06, - "loss": 0.8631, - "step": 4047 - }, - { - "epoch": 0.48674322130704023, - "grad_norm": 3.999689805401003, - "learning_rate": 2.182802846251216e-06, - "loss": 0.8806, - "step": 4048 - }, - { - "epoch": 0.4868634641976793, - "grad_norm": 2.435248778087269, - "learning_rate": 2.182027121432696e-06, - "loss": 0.9583, - "step": 4049 - }, - { - "epoch": 0.4869837070883184, - "grad_norm": 2.388609591379323, - "learning_rate": 2.1812513690008054e-06, - "loss": 1.0484, - "step": 4050 - }, - { - "epoch": 0.4871039499789575, - "grad_norm": 2.0737615681349166, - "learning_rate": 2.180475589073227e-06, - "loss": 1.0279, - "step": 4051 - }, - { - "epoch": 0.48722419286959656, - "grad_norm": 1.601923311110034, - "learning_rate": 2.1796997817676456e-06, - "loss": 0.965, - "step": 4052 - }, - { - "epoch": 0.4873444357602357, - "grad_norm": 2.31947953913998, - "learning_rate": 2.1789239472017494e-06, - "loss": 0.9065, - "step": 4053 - }, - { - "epoch": 0.4874646786508748, - "grad_norm": 2.658348242101049, - "learning_rate": 2.1781480854932326e-06, - "loss": 0.9595, - "step": 4054 - }, - { - "epoch": 0.48758492154151384, - "grad_norm": 3.8625788807144392, - "learning_rate": 2.1773721967597933e-06, - "loss": 1.0208, - "step": 4055 - }, - { - "epoch": 0.48770516443215295, - "grad_norm": 0.8919579419369089, - "learning_rate": 2.1765962811191322e-06, - "loss": 0.8618, - "step": 4056 - }, - { - "epoch": 0.48782540732279206, - "grad_norm": 0.9091667723429823, - "learning_rate": 2.1758203386889566e-06, - "loss": 0.9147, - "step": 4057 - }, - { - "epoch": 0.4879456502134311, - "grad_norm": 2.4364244093102325, - "learning_rate": 2.1750443695869746e-06, - "loss": 1.0726, - "step": 4058 - }, - { - "epoch": 0.4880658931040702, - "grad_norm": 1.8479056643646259, - "learning_rate": 2.174268373930901e-06, - "loss": 1.0868, - "step": 4059 - }, - { - "epoch": 0.48818613599470934, - "grad_norm": 2.783727678090406, - "learning_rate": 2.1734923518384537e-06, - "loss": 1.0286, - "step": 4060 - }, - { - "epoch": 0.4883063788853484, - "grad_norm": 1.9654562509268048, - "learning_rate": 2.1727163034273547e-06, - "loss": 1.0434, - "step": 4061 - }, - { - "epoch": 0.4884266217759875, - "grad_norm": 5.148991139676755, - "learning_rate": 2.17194022881533e-06, - "loss": 1.0108, - "step": 4062 - }, - { - "epoch": 0.4885468646666266, - "grad_norm": 1.802616863615665, - "learning_rate": 2.1711641281201092e-06, - "loss": 0.9012, - "step": 4063 - }, - { - "epoch": 0.48866710755726567, - "grad_norm": 2.3616802751269756, - "learning_rate": 2.1703880014594264e-06, - "loss": 1.0196, - "step": 4064 - }, - { - "epoch": 0.4887873504479048, - "grad_norm": 1.6953897063082881, - "learning_rate": 2.1696118489510182e-06, - "loss": 0.9617, - "step": 4065 - }, - { - "epoch": 0.48890759333854383, - "grad_norm": 2.02345882684375, - "learning_rate": 2.1688356707126286e-06, - "loss": 0.9577, - "step": 4066 - }, - { - "epoch": 0.48902783622918294, - "grad_norm": 1.9877203429337453, - "learning_rate": 2.168059466862001e-06, - "loss": 0.928, - "step": 4067 - }, - { - "epoch": 0.48914807911982205, - "grad_norm": 2.2726921616356357, - "learning_rate": 2.167283237516887e-06, - "loss": 1.0436, - "step": 4068 - }, - { - "epoch": 0.4892683220104611, - "grad_norm": 1.9198895254737456, - "learning_rate": 2.1665069827950383e-06, - "loss": 0.9836, - "step": 4069 - }, - { - "epoch": 0.4893885649011002, - "grad_norm": 1.754883627660495, - "learning_rate": 2.1657307028142126e-06, - "loss": 1.1022, - "step": 4070 - }, - { - "epoch": 0.48950880779173933, - "grad_norm": 2.4793983372598944, - "learning_rate": 2.164954397692171e-06, - "loss": 0.8991, - "step": 4071 - }, - { - "epoch": 0.4896290506823784, - "grad_norm": 1.1311576067882665, - "learning_rate": 2.164178067546678e-06, - "loss": 1.0582, - "step": 4072 - }, - { - "epoch": 0.4897492935730175, - "grad_norm": 1.613914383982144, - "learning_rate": 2.163401712495504e-06, - "loss": 1.1324, - "step": 4073 - }, - { - "epoch": 0.4898695364636566, - "grad_norm": 1.5975007445523195, - "learning_rate": 2.1626253326564194e-06, - "loss": 1.0175, - "step": 4074 - }, - { - "epoch": 0.48998977935429566, - "grad_norm": 1.7373674026499897, - "learning_rate": 2.161848928147201e-06, - "loss": 1.0016, - "step": 4075 - }, - { - "epoch": 0.4901100222449348, - "grad_norm": 1.92745570803565, - "learning_rate": 2.161072499085629e-06, - "loss": 1.0416, - "step": 4076 - }, - { - "epoch": 0.4902302651355739, - "grad_norm": 1.537034843605822, - "learning_rate": 2.160296045589487e-06, - "loss": 1.0511, - "step": 4077 - }, - { - "epoch": 0.49035050802621294, - "grad_norm": 1.7690543570008475, - "learning_rate": 2.159519567776562e-06, - "loss": 0.9189, - "step": 4078 - }, - { - "epoch": 0.49047075091685205, - "grad_norm": 5.428611242535517, - "learning_rate": 2.1587430657646463e-06, - "loss": 0.9357, - "step": 4079 - }, - { - "epoch": 0.4905909938074911, - "grad_norm": 1.7631072601215614, - "learning_rate": 2.157966539671533e-06, - "loss": 1.0107, - "step": 4080 - }, - { - "epoch": 0.4907112366981302, - "grad_norm": 1.7287337515750767, - "learning_rate": 2.157189989615021e-06, - "loss": 0.894, - "step": 4081 - }, - { - "epoch": 0.4908314795887693, - "grad_norm": 1.8048277961396377, - "learning_rate": 2.156413415712913e-06, - "loss": 0.9741, - "step": 4082 - }, - { - "epoch": 0.4909517224794084, - "grad_norm": 2.4734868828476007, - "learning_rate": 2.155636818083014e-06, - "loss": 1.0065, - "step": 4083 - }, - { - "epoch": 0.4910719653700475, - "grad_norm": 1.6210602170854087, - "learning_rate": 2.154860196843134e-06, - "loss": 1.0719, - "step": 4084 - }, - { - "epoch": 0.4911922082606866, - "grad_norm": 2.5235121394122975, - "learning_rate": 2.154083552111085e-06, - "loss": 0.9899, - "step": 4085 - }, - { - "epoch": 0.49131245115132566, - "grad_norm": 1.683965725973561, - "learning_rate": 2.1533068840046834e-06, - "loss": 1.0487, - "step": 4086 - }, - { - "epoch": 0.49143269404196477, - "grad_norm": 2.217274887373683, - "learning_rate": 2.152530192641749e-06, - "loss": 0.8398, - "step": 4087 - }, - { - "epoch": 0.4915529369326039, - "grad_norm": 2.1741143626828663, - "learning_rate": 2.1517534781401068e-06, - "loss": 0.9452, - "step": 4088 - }, - { - "epoch": 0.49167317982324293, - "grad_norm": 2.928880259881102, - "learning_rate": 2.150976740617581e-06, - "loss": 0.9185, - "step": 4089 - }, - { - "epoch": 0.49179342271388204, - "grad_norm": 2.3959653956186333, - "learning_rate": 2.150199980192006e-06, - "loss": 0.9523, - "step": 4090 - }, - { - "epoch": 0.49191366560452116, - "grad_norm": 1.6167569531795114, - "learning_rate": 2.1494231969812114e-06, - "loss": 1.0424, - "step": 4091 - }, - { - "epoch": 0.4920339084951602, - "grad_norm": 2.1052839378536032, - "learning_rate": 2.1486463911030372e-06, - "loss": 1.0419, - "step": 4092 - }, - { - "epoch": 0.4921541513857993, - "grad_norm": 1.6814008623810888, - "learning_rate": 2.147869562675324e-06, - "loss": 0.9724, - "step": 4093 - }, - { - "epoch": 0.49227439427643843, - "grad_norm": 2.382973987547827, - "learning_rate": 2.147092711815915e-06, - "loss": 0.951, - "step": 4094 - }, - { - "epoch": 0.4923946371670775, - "grad_norm": 2.1788872237552805, - "learning_rate": 2.1463158386426593e-06, - "loss": 1.092, - "step": 4095 - }, - { - "epoch": 0.4925148800577166, - "grad_norm": 14.022299309659864, - "learning_rate": 2.145538943273407e-06, - "loss": 1.0112, - "step": 4096 - }, - { - "epoch": 0.49263512294835565, - "grad_norm": 1.7997537523949334, - "learning_rate": 2.144762025826013e-06, - "loss": 0.948, - "step": 4097 - }, - { - "epoch": 0.49275536583899476, - "grad_norm": 2.0730530072587485, - "learning_rate": 2.143985086418334e-06, - "loss": 1.0948, - "step": 4098 - }, - { - "epoch": 0.4928756087296339, - "grad_norm": 1.5130245276541154, - "learning_rate": 2.1432081251682324e-06, - "loss": 1.0023, - "step": 4099 - }, - { - "epoch": 0.49299585162027293, - "grad_norm": 3.7099374166387196, - "learning_rate": 2.142431142193572e-06, - "loss": 1.0929, - "step": 4100 - }, - { - "epoch": 0.49311609451091204, - "grad_norm": 2.484068461766241, - "learning_rate": 2.1416541376122207e-06, - "loss": 0.9568, - "step": 4101 - }, - { - "epoch": 0.49323633740155115, - "grad_norm": 1.8758109564218293, - "learning_rate": 2.1408771115420496e-06, - "loss": 0.9572, - "step": 4102 - }, - { - "epoch": 0.4933565802921902, - "grad_norm": 2.753426151496863, - "learning_rate": 2.140100064100932e-06, - "loss": 0.8772, - "step": 4103 - }, - { - "epoch": 0.4934768231828293, - "grad_norm": 1.7584718950947191, - "learning_rate": 2.139322995406746e-06, - "loss": 0.9856, - "step": 4104 - }, - { - "epoch": 0.4935970660734684, - "grad_norm": 1.895155341794242, - "learning_rate": 2.1385459055773727e-06, - "loss": 1.0311, - "step": 4105 - }, - { - "epoch": 0.4937173089641075, - "grad_norm": 1.9083734161076538, - "learning_rate": 2.137768794730696e-06, - "loss": 0.974, - "step": 4106 - }, - { - "epoch": 0.4938375518547466, - "grad_norm": 2.1521237463417084, - "learning_rate": 2.1369916629846026e-06, - "loss": 1.0311, - "step": 4107 - }, - { - "epoch": 0.4939577947453857, - "grad_norm": 1.6678386700045653, - "learning_rate": 2.136214510456983e-06, - "loss": 0.9763, - "step": 4108 - }, - { - "epoch": 0.49407803763602476, - "grad_norm": 0.9511184520719139, - "learning_rate": 2.1354373372657296e-06, - "loss": 0.9376, - "step": 4109 - }, - { - "epoch": 0.49419828052666387, - "grad_norm": 1.409079481383379, - "learning_rate": 2.1346601435287404e-06, - "loss": 0.9343, - "step": 4110 - }, - { - "epoch": 0.494318523417303, - "grad_norm": 1.8430834066277688, - "learning_rate": 2.1338829293639144e-06, - "loss": 1.0316, - "step": 4111 - }, - { - "epoch": 0.49443876630794203, - "grad_norm": 2.215520953706707, - "learning_rate": 2.1331056948891547e-06, - "loss": 1.0663, - "step": 4112 - }, - { - "epoch": 0.49455900919858115, - "grad_norm": 2.6092751306849733, - "learning_rate": 2.1323284402223666e-06, - "loss": 0.9955, - "step": 4113 - }, - { - "epoch": 0.4946792520892202, - "grad_norm": 1.6736165183346228, - "learning_rate": 2.1315511654814597e-06, - "loss": 1.1035, - "step": 4114 - }, - { - "epoch": 0.4947994949798593, - "grad_norm": 2.4271254201577186, - "learning_rate": 2.1307738707843456e-06, - "loss": 1.01, - "step": 4115 - }, - { - "epoch": 0.4949197378704984, - "grad_norm": 1.9659426822087729, - "learning_rate": 2.1299965562489385e-06, - "loss": 0.9189, - "step": 4116 - }, - { - "epoch": 0.4950399807611375, - "grad_norm": 2.059603535731028, - "learning_rate": 2.129219221993158e-06, - "loss": 1.0211, - "step": 4117 - }, - { - "epoch": 0.4951602236517766, - "grad_norm": 0.8214203755639198, - "learning_rate": 2.128441868134924e-06, - "loss": 0.8763, - "step": 4118 - }, - { - "epoch": 0.4952804665424157, - "grad_norm": 2.6901207499009874, - "learning_rate": 2.1276644947921606e-06, - "loss": 1.0597, - "step": 4119 - }, - { - "epoch": 0.49540070943305475, - "grad_norm": 2.389973670144074, - "learning_rate": 2.126887102082795e-06, - "loss": 1.0551, - "step": 4120 - }, - { - "epoch": 0.49552095232369386, - "grad_norm": 1.524643797000845, - "learning_rate": 2.126109690124757e-06, - "loss": 0.9301, - "step": 4121 - }, - { - "epoch": 0.495641195214333, - "grad_norm": 1.627051452376482, - "learning_rate": 2.1253322590359786e-06, - "loss": 0.9454, - "step": 4122 - }, - { - "epoch": 0.49576143810497203, - "grad_norm": 2.709876476724355, - "learning_rate": 2.124554808934397e-06, - "loss": 0.97, - "step": 4123 - }, - { - "epoch": 0.49588168099561114, - "grad_norm": 1.9636634633734202, - "learning_rate": 2.1237773399379496e-06, - "loss": 0.9637, - "step": 4124 - }, - { - "epoch": 0.49600192388625025, - "grad_norm": 1.8062782707773861, - "learning_rate": 2.122999852164578e-06, - "loss": 1.0976, - "step": 4125 - }, - { - "epoch": 0.4961221667768893, - "grad_norm": 2.7775327722794607, - "learning_rate": 2.122222345732227e-06, - "loss": 0.7966, - "step": 4126 - }, - { - "epoch": 0.4962424096675284, - "grad_norm": 1.7610558998901318, - "learning_rate": 2.121444820758843e-06, - "loss": 1.0588, - "step": 4127 - }, - { - "epoch": 0.49636265255816747, - "grad_norm": 2.063870754764117, - "learning_rate": 2.120667277362376e-06, - "loss": 1.0023, - "step": 4128 - }, - { - "epoch": 0.4964828954488066, - "grad_norm": 3.010674559074624, - "learning_rate": 2.1198897156607796e-06, - "loss": 1.0765, - "step": 4129 - }, - { - "epoch": 0.4966031383394457, - "grad_norm": 4.572526340148043, - "learning_rate": 2.1191121357720085e-06, - "loss": 0.9705, - "step": 4130 - }, - { - "epoch": 0.49672338123008475, - "grad_norm": 1.706420929000923, - "learning_rate": 2.1183345378140206e-06, - "loss": 0.9737, - "step": 4131 - }, - { - "epoch": 0.49684362412072386, - "grad_norm": 0.9887361346597653, - "learning_rate": 2.1175569219047783e-06, - "loss": 0.8934, - "step": 4132 - }, - { - "epoch": 0.49696386701136297, - "grad_norm": 1.6804872250217506, - "learning_rate": 2.1167792881622437e-06, - "loss": 0.9579, - "step": 4133 - }, - { - "epoch": 0.497084109902002, - "grad_norm": 2.42369650976011, - "learning_rate": 2.116001636704384e-06, - "loss": 1.0363, - "step": 4134 - }, - { - "epoch": 0.49720435279264114, - "grad_norm": 2.2828581902685547, - "learning_rate": 2.1152239676491685e-06, - "loss": 1.0357, - "step": 4135 - }, - { - "epoch": 0.49732459568328025, - "grad_norm": 2.137150304197038, - "learning_rate": 2.114446281114569e-06, - "loss": 0.9695, - "step": 4136 - }, - { - "epoch": 0.4974448385739193, - "grad_norm": 4.210026063815735, - "learning_rate": 2.1136685772185587e-06, - "loss": 0.9864, - "step": 4137 - }, - { - "epoch": 0.4975650814645584, - "grad_norm": 1.650681256769518, - "learning_rate": 2.1128908560791163e-06, - "loss": 1.01, - "step": 4138 - }, - { - "epoch": 0.4976853243551975, - "grad_norm": 1.5914765392543329, - "learning_rate": 2.1121131178142203e-06, - "loss": 1.0151, - "step": 4139 - }, - { - "epoch": 0.4978055672458366, - "grad_norm": 1.5695144371396819, - "learning_rate": 2.1113353625418544e-06, - "loss": 1.0501, - "step": 4140 - }, - { - "epoch": 0.4979258101364757, - "grad_norm": 1.7858500061229965, - "learning_rate": 2.1105575903800017e-06, - "loss": 1.0188, - "step": 4141 - }, - { - "epoch": 0.4980460530271148, - "grad_norm": 1.7010749587822749, - "learning_rate": 2.1097798014466502e-06, - "loss": 1.0767, - "step": 4142 - }, - { - "epoch": 0.49816629591775385, - "grad_norm": 3.9374416286440987, - "learning_rate": 2.109001995859791e-06, - "loss": 0.8143, - "step": 4143 - }, - { - "epoch": 0.49828653880839296, - "grad_norm": 0.7693561412814828, - "learning_rate": 2.108224173737415e-06, - "loss": 0.8637, - "step": 4144 - }, - { - "epoch": 0.498406781699032, - "grad_norm": 1.8240492290663026, - "learning_rate": 2.1074463351975183e-06, - "loss": 0.9938, - "step": 4145 - }, - { - "epoch": 0.49852702458967113, - "grad_norm": 1.7581571114025174, - "learning_rate": 2.106668480358098e-06, - "loss": 0.945, - "step": 4146 - }, - { - "epoch": 0.49864726748031024, - "grad_norm": 1.8888022723939208, - "learning_rate": 2.105890609337154e-06, - "loss": 0.9349, - "step": 4147 - }, - { - "epoch": 0.4987675103709493, - "grad_norm": 0.6877275003810929, - "learning_rate": 2.1051127222526883e-06, - "loss": 0.8777, - "step": 4148 - }, - { - "epoch": 0.4988877532615884, - "grad_norm": 2.703739074459214, - "learning_rate": 2.1043348192227067e-06, - "loss": 1.0328, - "step": 4149 - }, - { - "epoch": 0.4990079961522275, - "grad_norm": 2.0301420271099353, - "learning_rate": 2.1035569003652156e-06, - "loss": 0.859, - "step": 4150 - }, - { - "epoch": 0.4991282390428666, - "grad_norm": 2.0479389948774114, - "learning_rate": 2.1027789657982255e-06, - "loss": 1.0517, - "step": 4151 - }, - { - "epoch": 0.4992484819335057, - "grad_norm": 1.8993410071908308, - "learning_rate": 2.1020010156397482e-06, - "loss": 1.0039, - "step": 4152 - }, - { - "epoch": 0.4993687248241448, - "grad_norm": 1.3638562845129991, - "learning_rate": 2.101223050007797e-06, - "loss": 1.0113, - "step": 4153 - }, - { - "epoch": 0.49948896771478385, - "grad_norm": 0.8788868880788333, - "learning_rate": 2.1004450690203904e-06, - "loss": 0.8144, - "step": 4154 - }, - { - "epoch": 0.49960921060542296, - "grad_norm": 0.9734562108184173, - "learning_rate": 2.099667072795546e-06, - "loss": 0.9453, - "step": 4155 - }, - { - "epoch": 0.49972945349606207, - "grad_norm": 1.7510510421886203, - "learning_rate": 2.0988890614512864e-06, - "loss": 1.0245, - "step": 4156 - }, - { - "epoch": 0.4998496963867011, - "grad_norm": 1.7132576317644326, - "learning_rate": 2.098111035105635e-06, - "loss": 1.0631, - "step": 4157 - }, - { - "epoch": 0.49996993927734024, - "grad_norm": 1.7169743468957097, - "learning_rate": 2.0973329938766176e-06, - "loss": 0.9636, - "step": 4158 - }, - { - "epoch": 0.5000901821679793, - "grad_norm": 1.728962744519394, - "learning_rate": 2.0965549378822618e-06, - "loss": 1.0192, - "step": 4159 - }, - { - "epoch": 0.5002104250586185, - "grad_norm": 2.1019561285491224, - "learning_rate": 2.095776867240599e-06, - "loss": 1.0671, - "step": 4160 - }, - { - "epoch": 0.5003306679492575, - "grad_norm": 1.887186059674874, - "learning_rate": 2.094998782069661e-06, - "loss": 1.0532, - "step": 4161 - }, - { - "epoch": 0.5004509108398966, - "grad_norm": 1.7930797135334176, - "learning_rate": 2.0942206824874845e-06, - "loss": 0.9762, - "step": 4162 - }, - { - "epoch": 0.5005711537305357, - "grad_norm": 1.918388740254209, - "learning_rate": 2.093442568612105e-06, - "loss": 1.0266, - "step": 4163 - }, - { - "epoch": 0.5006913966211748, - "grad_norm": 1.669847203420054, - "learning_rate": 2.0926644405615613e-06, - "loss": 1.0779, - "step": 4164 - }, - { - "epoch": 0.5008116395118138, - "grad_norm": 1.8615618115741301, - "learning_rate": 2.091886298453897e-06, - "loss": 1.0484, - "step": 4165 - }, - { - "epoch": 0.500931882402453, - "grad_norm": 2.3488466922819793, - "learning_rate": 2.091108142407153e-06, - "loss": 0.9689, - "step": 4166 - }, - { - "epoch": 0.5010521252930921, - "grad_norm": 0.9177521145639607, - "learning_rate": 2.090329972539377e-06, - "loss": 0.9223, - "step": 4167 - }, - { - "epoch": 0.5011723681837311, - "grad_norm": 1.6451115615025713, - "learning_rate": 2.089551788968616e-06, - "loss": 0.9132, - "step": 4168 - }, - { - "epoch": 0.5012926110743702, - "grad_norm": 0.8812013626204114, - "learning_rate": 2.08877359181292e-06, - "loss": 0.8764, - "step": 4169 - }, - { - "epoch": 0.5014128539650093, - "grad_norm": 3.8179697291601467, - "learning_rate": 2.0879953811903396e-06, - "loss": 1.0907, - "step": 4170 - }, - { - "epoch": 0.5015330968556484, - "grad_norm": 1.6695612646322606, - "learning_rate": 2.08721715721893e-06, - "loss": 1.013, - "step": 4171 - }, - { - "epoch": 0.5016533397462875, - "grad_norm": 1.8686438590944363, - "learning_rate": 2.0864389200167477e-06, - "loss": 1.0009, - "step": 4172 - }, - { - "epoch": 0.5017735826369266, - "grad_norm": 8.631600328499752, - "learning_rate": 2.0856606697018504e-06, - "loss": 1.0204, - "step": 4173 - }, - { - "epoch": 0.5018938255275657, - "grad_norm": 2.3085111290207156, - "learning_rate": 2.084882406392297e-06, - "loss": 0.9586, - "step": 4174 - }, - { - "epoch": 0.5020140684182047, - "grad_norm": 1.8296244414137706, - "learning_rate": 2.0841041302061496e-06, - "loss": 0.9315, - "step": 4175 - }, - { - "epoch": 0.5021343113088439, - "grad_norm": 1.7972223360800899, - "learning_rate": 2.083325841261473e-06, - "loss": 0.9769, - "step": 4176 - }, - { - "epoch": 0.502254554199483, - "grad_norm": 3.7084936185907638, - "learning_rate": 2.0825475396763322e-06, - "loss": 0.9028, - "step": 4177 - }, - { - "epoch": 0.502374797090122, - "grad_norm": 1.5310928886933586, - "learning_rate": 2.081769225568796e-06, - "loss": 0.8784, - "step": 4178 - }, - { - "epoch": 0.5024950399807612, - "grad_norm": 4.433182886386528, - "learning_rate": 2.0809908990569327e-06, - "loss": 0.9861, - "step": 4179 - }, - { - "epoch": 0.5026152828714002, - "grad_norm": 2.2524269578206932, - "learning_rate": 2.0802125602588146e-06, - "loss": 1.0187, - "step": 4180 - }, - { - "epoch": 0.5027355257620393, - "grad_norm": 1.7668733487899075, - "learning_rate": 2.0794342092925146e-06, - "loss": 0.902, - "step": 4181 - }, - { - "epoch": 0.5028557686526784, - "grad_norm": 2.0889740176924745, - "learning_rate": 2.078655846276108e-06, - "loss": 0.917, - "step": 4182 - }, - { - "epoch": 0.5029760115433175, - "grad_norm": 1.782259471014668, - "learning_rate": 2.0778774713276727e-06, - "loss": 0.9054, - "step": 4183 - }, - { - "epoch": 0.5030962544339566, - "grad_norm": 2.5865318329889515, - "learning_rate": 2.077099084565287e-06, - "loss": 0.8995, - "step": 4184 - }, - { - "epoch": 0.5032164973245957, - "grad_norm": 2.228031463585798, - "learning_rate": 2.0763206861070313e-06, - "loss": 0.8696, - "step": 4185 - }, - { - "epoch": 0.5033367402152348, - "grad_norm": 1.8643250810447463, - "learning_rate": 2.0755422760709876e-06, - "loss": 0.9808, - "step": 4186 - }, - { - "epoch": 0.5034569831058738, - "grad_norm": 1.7595882008791073, - "learning_rate": 2.0747638545752417e-06, - "loss": 0.9988, - "step": 4187 - }, - { - "epoch": 0.503577225996513, - "grad_norm": 2.001998498914158, - "learning_rate": 2.073985421737878e-06, - "loss": 1.0596, - "step": 4188 - }, - { - "epoch": 0.5036974688871521, - "grad_norm": 2.047580510048236, - "learning_rate": 2.0732069776769844e-06, - "loss": 0.9777, - "step": 4189 - }, - { - "epoch": 0.5038177117777911, - "grad_norm": 2.035144886417111, - "learning_rate": 2.072428522510651e-06, - "loss": 0.9611, - "step": 4190 - }, - { - "epoch": 0.5039379546684303, - "grad_norm": 5.505177055373491, - "learning_rate": 2.071650056356968e-06, - "loss": 0.9925, - "step": 4191 - }, - { - "epoch": 0.5040581975590693, - "grad_norm": 2.1270414531992388, - "learning_rate": 2.070871579334028e-06, - "loss": 1.0265, - "step": 4192 - }, - { - "epoch": 0.5041784404497084, - "grad_norm": 1.6369053408799241, - "learning_rate": 2.0700930915599264e-06, - "loss": 0.9539, - "step": 4193 - }, - { - "epoch": 0.5042986833403476, - "grad_norm": 2.0249109978602444, - "learning_rate": 2.0693145931527583e-06, - "loss": 1.0141, - "step": 4194 - }, - { - "epoch": 0.5044189262309866, - "grad_norm": 1.6339046348468218, - "learning_rate": 2.068536084230622e-06, - "loss": 1.0142, - "step": 4195 - }, - { - "epoch": 0.5045391691216257, - "grad_norm": 2.2683161044768654, - "learning_rate": 2.067757564911616e-06, - "loss": 1.1131, - "step": 4196 - }, - { - "epoch": 0.5046594120122648, - "grad_norm": 2.072442567625447, - "learning_rate": 2.0669790353138407e-06, - "loss": 1.1502, - "step": 4197 - }, - { - "epoch": 0.5047796549029039, - "grad_norm": 3.5289009855956137, - "learning_rate": 2.0662004955553995e-06, - "loss": 0.962, - "step": 4198 - }, - { - "epoch": 0.5048998977935429, - "grad_norm": 3.787683609644557, - "learning_rate": 2.065421945754395e-06, - "loss": 0.9896, - "step": 4199 - }, - { - "epoch": 0.505020140684182, - "grad_norm": 1.557056634660618, - "learning_rate": 2.0646433860289344e-06, - "loss": 1.0076, - "step": 4200 - }, - { - "epoch": 0.5051403835748212, - "grad_norm": 2.7234199142544857, - "learning_rate": 2.0638648164971233e-06, - "loss": 1.0472, - "step": 4201 - }, - { - "epoch": 0.5052606264654602, - "grad_norm": 2.0393384257154596, - "learning_rate": 2.06308623727707e-06, - "loss": 1.1155, - "step": 4202 - }, - { - "epoch": 0.5053808693560993, - "grad_norm": 2.196713146688531, - "learning_rate": 2.0623076484868846e-06, - "loss": 0.9789, - "step": 4203 - }, - { - "epoch": 0.5055011122467384, - "grad_norm": 0.9187290695297526, - "learning_rate": 2.061529050244679e-06, - "loss": 0.9174, - "step": 4204 - }, - { - "epoch": 0.5056213551373775, - "grad_norm": 6.4085337964250355, - "learning_rate": 2.060750442668565e-06, - "loss": 0.9826, - "step": 4205 - }, - { - "epoch": 0.5057415980280165, - "grad_norm": 2.076247550324448, - "learning_rate": 2.059971825876657e-06, - "loss": 0.8673, - "step": 4206 - }, - { - "epoch": 0.5058618409186557, - "grad_norm": 1.9307550110762424, - "learning_rate": 2.0591931999870713e-06, - "loss": 0.994, - "step": 4207 - }, - { - "epoch": 0.5059820838092948, - "grad_norm": 0.9058069264114045, - "learning_rate": 2.0584145651179234e-06, - "loss": 0.8666, - "step": 4208 - }, - { - "epoch": 0.5061023266999338, - "grad_norm": 2.059980645505481, - "learning_rate": 2.0576359213873327e-06, - "loss": 1.0139, - "step": 4209 - }, - { - "epoch": 0.506222569590573, - "grad_norm": 2.308303737335888, - "learning_rate": 2.056857268913419e-06, - "loss": 0.9227, - "step": 4210 - }, - { - "epoch": 0.506342812481212, - "grad_norm": 2.2489841075113497, - "learning_rate": 2.056078607814303e-06, - "loss": 1.0768, - "step": 4211 - }, - { - "epoch": 0.5064630553718511, - "grad_norm": 1.938559027390291, - "learning_rate": 2.055299938208106e-06, - "loss": 1.0239, - "step": 4212 - }, - { - "epoch": 0.5065832982624903, - "grad_norm": 2.0410372354193855, - "learning_rate": 2.0545212602129526e-06, - "loss": 1.0891, - "step": 4213 - }, - { - "epoch": 0.5067035411531293, - "grad_norm": 2.0880429504040223, - "learning_rate": 2.0537425739469673e-06, - "loss": 0.8921, - "step": 4214 - }, - { - "epoch": 0.5068237840437684, - "grad_norm": 0.9141244785782557, - "learning_rate": 2.052963879528276e-06, - "loss": 0.874, - "step": 4215 - }, - { - "epoch": 0.5069440269344075, - "grad_norm": 2.0125494862034388, - "learning_rate": 2.052185177075007e-06, - "loss": 0.989, - "step": 4216 - }, - { - "epoch": 0.5070642698250466, - "grad_norm": 3.873446614638265, - "learning_rate": 2.051406466705288e-06, - "loss": 1.0577, - "step": 4217 - }, - { - "epoch": 0.5071845127156857, - "grad_norm": 1.710807621176269, - "learning_rate": 2.0506277485372486e-06, - "loss": 1.0373, - "step": 4218 - }, - { - "epoch": 0.5073047556063248, - "grad_norm": 4.133831626840574, - "learning_rate": 2.04984902268902e-06, - "loss": 0.8979, - "step": 4219 - }, - { - "epoch": 0.5074249984969639, - "grad_norm": 2.735752129497534, - "learning_rate": 2.0490702892787345e-06, - "loss": 0.9821, - "step": 4220 - }, - { - "epoch": 0.5075452413876029, - "grad_norm": 1.7617685518182522, - "learning_rate": 2.0482915484245246e-06, - "loss": 0.8566, - "step": 4221 - }, - { - "epoch": 0.5076654842782421, - "grad_norm": 2.4381177070623994, - "learning_rate": 2.047512800244526e-06, - "loss": 1.0622, - "step": 4222 - }, - { - "epoch": 0.5077857271688812, - "grad_norm": 2.1559772042465366, - "learning_rate": 2.046734044856873e-06, - "loss": 1.0143, - "step": 4223 - }, - { - "epoch": 0.5079059700595202, - "grad_norm": 1.8151684553089507, - "learning_rate": 2.045955282379702e-06, - "loss": 1.0327, - "step": 4224 - }, - { - "epoch": 0.5080262129501594, - "grad_norm": 4.7351975938118365, - "learning_rate": 2.045176512931152e-06, - "loss": 0.9915, - "step": 4225 - }, - { - "epoch": 0.5081464558407984, - "grad_norm": 1.747948738625346, - "learning_rate": 2.0443977366293604e-06, - "loss": 0.988, - "step": 4226 - }, - { - "epoch": 0.5082666987314375, - "grad_norm": 1.6885819675249452, - "learning_rate": 2.043618953592468e-06, - "loss": 1.0068, - "step": 4227 - }, - { - "epoch": 0.5083869416220766, - "grad_norm": 38.27242883060533, - "learning_rate": 2.0428401639386144e-06, - "loss": 1.0414, - "step": 4228 - }, - { - "epoch": 0.5085071845127157, - "grad_norm": 0.9116513983292369, - "learning_rate": 2.042061367785943e-06, - "loss": 0.8824, - "step": 4229 - }, - { - "epoch": 0.5086274274033548, - "grad_norm": 2.25253471017528, - "learning_rate": 2.041282565252594e-06, - "loss": 0.9797, - "step": 4230 - }, - { - "epoch": 0.5087476702939938, - "grad_norm": 1.6573819305116204, - "learning_rate": 2.040503756456714e-06, - "loss": 0.9928, - "step": 4231 - }, - { - "epoch": 0.508867913184633, - "grad_norm": 1.9709255973827884, - "learning_rate": 2.0397249415164456e-06, - "loss": 1.0206, - "step": 4232 - }, - { - "epoch": 0.508988156075272, - "grad_norm": 1.913317850805681, - "learning_rate": 2.0389461205499354e-06, - "loss": 1.0346, - "step": 4233 - }, - { - "epoch": 0.5091083989659111, - "grad_norm": 2.5894068008447864, - "learning_rate": 2.03816729367533e-06, - "loss": 0.9577, - "step": 4234 - }, - { - "epoch": 0.5092286418565503, - "grad_norm": 2.3290053373728514, - "learning_rate": 2.0373884610107765e-06, - "loss": 0.9344, - "step": 4235 - }, - { - "epoch": 0.5093488847471893, - "grad_norm": 3.4054420282864926, - "learning_rate": 2.0366096226744225e-06, - "loss": 0.9161, - "step": 4236 - }, - { - "epoch": 0.5094691276378284, - "grad_norm": 1.7949042750046738, - "learning_rate": 2.035830778784418e-06, - "loss": 1.0029, - "step": 4237 - }, - { - "epoch": 0.5095893705284675, - "grad_norm": 3.2330332751179687, - "learning_rate": 2.0350519294589134e-06, - "loss": 1.0321, - "step": 4238 - }, - { - "epoch": 0.5097096134191066, - "grad_norm": 1.8147919074744334, - "learning_rate": 2.0342730748160588e-06, - "loss": 1.0618, - "step": 4239 - }, - { - "epoch": 0.5098298563097456, - "grad_norm": 1.8549327839192116, - "learning_rate": 2.033494214974006e-06, - "loss": 0.9327, - "step": 4240 - }, - { - "epoch": 0.5099500992003848, - "grad_norm": 1.5678225842114135, - "learning_rate": 2.0327153500509067e-06, - "loss": 1.0581, - "step": 4241 - }, - { - "epoch": 0.5100703420910239, - "grad_norm": 3.1072725635384244, - "learning_rate": 2.031936480164916e-06, - "loss": 1.0808, - "step": 4242 - }, - { - "epoch": 0.5101905849816629, - "grad_norm": 2.3252829799923784, - "learning_rate": 2.0311576054341857e-06, - "loss": 1.035, - "step": 4243 - }, - { - "epoch": 0.5103108278723021, - "grad_norm": 1.6656830110163448, - "learning_rate": 2.0303787259768715e-06, - "loss": 0.8582, - "step": 4244 - }, - { - "epoch": 0.5104310707629411, - "grad_norm": 2.646028756761745, - "learning_rate": 2.0295998419111294e-06, - "loss": 0.9167, - "step": 4245 - }, - { - "epoch": 0.5105513136535802, - "grad_norm": 2.252865132703763, - "learning_rate": 2.028820953355115e-06, - "loss": 0.9623, - "step": 4246 - }, - { - "epoch": 0.5106715565442194, - "grad_norm": 2.219133693131611, - "learning_rate": 2.0280420604269834e-06, - "loss": 1.0146, - "step": 4247 - }, - { - "epoch": 0.5107917994348584, - "grad_norm": 0.8141603012057378, - "learning_rate": 2.027263163244895e-06, - "loss": 0.8751, - "step": 4248 - }, - { - "epoch": 0.5109120423254975, - "grad_norm": 1.6387700811882682, - "learning_rate": 2.026484261927005e-06, - "loss": 0.9693, - "step": 4249 - }, - { - "epoch": 0.5110322852161366, - "grad_norm": 2.2940354213823477, - "learning_rate": 2.025705356591475e-06, - "loss": 0.9639, - "step": 4250 - }, - { - "epoch": 0.5111525281067757, - "grad_norm": 0.8646279442243315, - "learning_rate": 2.024926447356462e-06, - "loss": 0.8529, - "step": 4251 - }, - { - "epoch": 0.5112727709974147, - "grad_norm": 1.899767374554868, - "learning_rate": 2.024147534340127e-06, - "loss": 1.0187, - "step": 4252 - }, - { - "epoch": 0.5113930138880539, - "grad_norm": 2.077566200789893, - "learning_rate": 2.02336861766063e-06, - "loss": 1.0285, - "step": 4253 - }, - { - "epoch": 0.511513256778693, - "grad_norm": 2.901859207061049, - "learning_rate": 2.0225896974361327e-06, - "loss": 1.0109, - "step": 4254 - }, - { - "epoch": 0.511633499669332, - "grad_norm": 0.945501135298412, - "learning_rate": 2.0218107737847962e-06, - "loss": 0.8857, - "step": 4255 - }, - { - "epoch": 0.5117537425599712, - "grad_norm": 2.000974417778865, - "learning_rate": 2.0210318468247826e-06, - "loss": 0.9912, - "step": 4256 - }, - { - "epoch": 0.5118739854506102, - "grad_norm": 1.7472122330911546, - "learning_rate": 2.020252916674255e-06, - "loss": 1.0473, - "step": 4257 - }, - { - "epoch": 0.5119942283412493, - "grad_norm": 1.7660512494074025, - "learning_rate": 2.019473983451375e-06, - "loss": 1.04, - "step": 4258 - }, - { - "epoch": 0.5121144712318885, - "grad_norm": 1.6899258872716356, - "learning_rate": 2.0186950472743076e-06, - "loss": 0.9378, - "step": 4259 - }, - { - "epoch": 0.5122347141225275, - "grad_norm": 1.7397662388322863, - "learning_rate": 2.0179161082612162e-06, - "loss": 0.9716, - "step": 4260 - }, - { - "epoch": 0.5123549570131666, - "grad_norm": 4.701760128858784, - "learning_rate": 2.017137166530266e-06, - "loss": 0.9571, - "step": 4261 - }, - { - "epoch": 0.5124751999038056, - "grad_norm": 2.387506982283843, - "learning_rate": 2.0163582221996213e-06, - "loss": 1.0303, - "step": 4262 - }, - { - "epoch": 0.5125954427944448, - "grad_norm": 2.6450608595136345, - "learning_rate": 2.015579275387446e-06, - "loss": 0.9107, - "step": 4263 - }, - { - "epoch": 0.5127156856850839, - "grad_norm": 1.8542861542794615, - "learning_rate": 2.0148003262119085e-06, - "loss": 0.9177, - "step": 4264 - }, - { - "epoch": 0.5128359285757229, - "grad_norm": 2.1053761194691916, - "learning_rate": 2.0140213747911728e-06, - "loss": 0.989, - "step": 4265 - }, - { - "epoch": 0.5129561714663621, - "grad_norm": 1.871002768452817, - "learning_rate": 2.013242421243406e-06, - "loss": 1.0259, - "step": 4266 - }, - { - "epoch": 0.5130764143570011, - "grad_norm": 1.4932642541955783, - "learning_rate": 2.012463465686774e-06, - "loss": 1.0159, - "step": 4267 - }, - { - "epoch": 0.5131966572476402, - "grad_norm": 0.8549936098562667, - "learning_rate": 2.0116845082394446e-06, - "loss": 0.8313, - "step": 4268 - }, - { - "epoch": 0.5133169001382794, - "grad_norm": 1.8192693643885274, - "learning_rate": 2.0109055490195836e-06, - "loss": 1.0202, - "step": 4269 - }, - { - "epoch": 0.5134371430289184, - "grad_norm": 2.331366618412523, - "learning_rate": 2.0101265881453605e-06, - "loss": 0.8671, - "step": 4270 - }, - { - "epoch": 0.5135573859195575, - "grad_norm": 2.392470501405954, - "learning_rate": 2.009347625734941e-06, - "loss": 1.0198, - "step": 4271 - }, - { - "epoch": 0.5136776288101966, - "grad_norm": 2.7266982080303244, - "learning_rate": 2.0085686619064954e-06, - "loss": 0.9849, - "step": 4272 - }, - { - "epoch": 0.5137978717008357, - "grad_norm": 2.1237507682381285, - "learning_rate": 2.00778969677819e-06, - "loss": 1.065, - "step": 4273 - }, - { - "epoch": 0.5139181145914747, - "grad_norm": 1.8858057818364704, - "learning_rate": 2.0070107304681934e-06, - "loss": 0.8806, - "step": 4274 - }, - { - "epoch": 0.5140383574821139, - "grad_norm": 2.3082538490060873, - "learning_rate": 2.006231763094675e-06, - "loss": 1.0136, - "step": 4275 - }, - { - "epoch": 0.514158600372753, - "grad_norm": 1.928088742105371, - "learning_rate": 2.0054527947758027e-06, - "loss": 1.0953, - "step": 4276 - }, - { - "epoch": 0.514278843263392, - "grad_norm": 0.7736591445382409, - "learning_rate": 2.004673825629746e-06, - "loss": 0.8233, - "step": 4277 - }, - { - "epoch": 0.5143990861540312, - "grad_norm": 1.5882688777983982, - "learning_rate": 2.0038948557746744e-06, - "loss": 0.9487, - "step": 4278 - }, - { - "epoch": 0.5145193290446702, - "grad_norm": 1.5899074832223097, - "learning_rate": 2.0031158853287558e-06, - "loss": 0.9815, - "step": 4279 - }, - { - "epoch": 0.5146395719353093, - "grad_norm": 1.9108821683095298, - "learning_rate": 2.0023369144101593e-06, - "loss": 0.9537, - "step": 4280 - }, - { - "epoch": 0.5147598148259485, - "grad_norm": 1.7416218750348698, - "learning_rate": 2.0015579431370555e-06, - "loss": 0.9949, - "step": 4281 - }, - { - "epoch": 0.5148800577165875, - "grad_norm": 2.20763881726415, - "learning_rate": 2.000778971627612e-06, - "loss": 0.9324, - "step": 4282 - }, - { - "epoch": 0.5150003006072266, - "grad_norm": 1.9673507198525064, - "learning_rate": 2e-06, - "loss": 1.1277, - "step": 4283 - }, - { - "epoch": 0.5151205434978657, - "grad_norm": 1.7035099959285938, - "learning_rate": 1.9992210283723878e-06, - "loss": 1.0936, - "step": 4284 - }, - { - "epoch": 0.5152407863885048, - "grad_norm": 1.4972242262541209, - "learning_rate": 1.9984420568629448e-06, - "loss": 1.0155, - "step": 4285 - }, - { - "epoch": 0.5153610292791438, - "grad_norm": 2.536830543190558, - "learning_rate": 1.9976630855898405e-06, - "loss": 1.0084, - "step": 4286 - }, - { - "epoch": 0.515481272169783, - "grad_norm": 2.681072622269637, - "learning_rate": 1.9968841146712445e-06, - "loss": 0.9754, - "step": 4287 - }, - { - "epoch": 0.5156015150604221, - "grad_norm": 1.5780259658408027, - "learning_rate": 1.996105144225326e-06, - "loss": 0.9382, - "step": 4288 - }, - { - "epoch": 0.5157217579510611, - "grad_norm": 1.9109120502165242, - "learning_rate": 1.995326174370254e-06, - "loss": 1.0228, - "step": 4289 - }, - { - "epoch": 0.5158420008417003, - "grad_norm": 2.820716469699729, - "learning_rate": 1.994547205224197e-06, - "loss": 0.9526, - "step": 4290 - }, - { - "epoch": 0.5159622437323393, - "grad_norm": 1.8483062955543026, - "learning_rate": 1.993768236905325e-06, - "loss": 0.9036, - "step": 4291 - }, - { - "epoch": 0.5160824866229784, - "grad_norm": 2.2588984112096604, - "learning_rate": 1.992989269531807e-06, - "loss": 0.888, - "step": 4292 - }, - { - "epoch": 0.5162027295136175, - "grad_norm": 2.69394540808177, - "learning_rate": 1.99221030322181e-06, - "loss": 0.9054, - "step": 4293 - }, - { - "epoch": 0.5163229724042566, - "grad_norm": 1.4961070565610513, - "learning_rate": 1.991431338093505e-06, - "loss": 1.0359, - "step": 4294 - }, - { - "epoch": 0.5164432152948957, - "grad_norm": 2.096964858925364, - "learning_rate": 1.9906523742650587e-06, - "loss": 1.0178, - "step": 4295 - }, - { - "epoch": 0.5165634581855347, - "grad_norm": 1.9206371507840632, - "learning_rate": 1.9898734118546397e-06, - "loss": 0.9894, - "step": 4296 - }, - { - "epoch": 0.5166837010761739, - "grad_norm": 1.534795743045267, - "learning_rate": 1.989094450980416e-06, - "loss": 1.0347, - "step": 4297 - }, - { - "epoch": 0.516803943966813, - "grad_norm": 3.4323981137862543, - "learning_rate": 1.9883154917605556e-06, - "loss": 0.9951, - "step": 4298 - }, - { - "epoch": 0.516924186857452, - "grad_norm": 1.6529865141985993, - "learning_rate": 1.9875365343132262e-06, - "loss": 1.0514, - "step": 4299 - }, - { - "epoch": 0.5170444297480912, - "grad_norm": 2.1890251292620024, - "learning_rate": 1.9867575787565946e-06, - "loss": 1.0747, - "step": 4300 - }, - { - "epoch": 0.5171646726387302, - "grad_norm": 2.093758312607391, - "learning_rate": 1.9859786252088275e-06, - "loss": 1.091, - "step": 4301 - }, - { - "epoch": 0.5172849155293693, - "grad_norm": 2.9266703213587064, - "learning_rate": 1.9851996737880914e-06, - "loss": 0.8976, - "step": 4302 - }, - { - "epoch": 0.5174051584200084, - "grad_norm": 1.8858151374823484, - "learning_rate": 1.9844207246125537e-06, - "loss": 0.976, - "step": 4303 - }, - { - "epoch": 0.5175254013106475, - "grad_norm": 1.8532762925657642, - "learning_rate": 1.983641777800379e-06, - "loss": 0.9151, - "step": 4304 - }, - { - "epoch": 0.5176456442012866, - "grad_norm": 0.9210542807886947, - "learning_rate": 1.9828628334697343e-06, - "loss": 0.8582, - "step": 4305 - }, - { - "epoch": 0.5177658870919257, - "grad_norm": 0.7931080587755649, - "learning_rate": 1.982083891738784e-06, - "loss": 0.8253, - "step": 4306 - }, - { - "epoch": 0.5178861299825648, - "grad_norm": 1.3784797160054094, - "learning_rate": 1.9813049527256923e-06, - "loss": 1.0573, - "step": 4307 - }, - { - "epoch": 0.5180063728732038, - "grad_norm": 2.2246778576418613, - "learning_rate": 1.9805260165486252e-06, - "loss": 1.0667, - "step": 4308 - }, - { - "epoch": 0.518126615763843, - "grad_norm": 1.7518334321266467, - "learning_rate": 1.9797470833257457e-06, - "loss": 1.0927, - "step": 4309 - }, - { - "epoch": 0.5182468586544821, - "grad_norm": 2.2358037475418033, - "learning_rate": 1.9789681531752177e-06, - "loss": 1.0033, - "step": 4310 - }, - { - "epoch": 0.5183671015451211, - "grad_norm": 1.4444252383756633, - "learning_rate": 1.978189226215204e-06, - "loss": 0.9535, - "step": 4311 - }, - { - "epoch": 0.5184873444357603, - "grad_norm": 1.8820864595301807, - "learning_rate": 1.9774103025638675e-06, - "loss": 0.9994, - "step": 4312 - }, - { - "epoch": 0.5186075873263993, - "grad_norm": 1.6851737624997325, - "learning_rate": 1.9766313823393696e-06, - "loss": 0.9926, - "step": 4313 - }, - { - "epoch": 0.5187278302170384, - "grad_norm": 2.1110366493054293, - "learning_rate": 1.975852465659873e-06, - "loss": 0.9183, - "step": 4314 - }, - { - "epoch": 0.5188480731076776, - "grad_norm": 2.2433613220025723, - "learning_rate": 1.9750735526435377e-06, - "loss": 0.9325, - "step": 4315 - }, - { - "epoch": 0.5189683159983166, - "grad_norm": 5.242264270930959, - "learning_rate": 1.974294643408525e-06, - "loss": 1.0282, - "step": 4316 - }, - { - "epoch": 0.5190885588889557, - "grad_norm": 1.8737823983331416, - "learning_rate": 1.9735157380729947e-06, - "loss": 0.9007, - "step": 4317 - }, - { - "epoch": 0.5192088017795948, - "grad_norm": 1.9919819445308826, - "learning_rate": 1.9727368367551053e-06, - "loss": 1.0644, - "step": 4318 - }, - { - "epoch": 0.5193290446702339, - "grad_norm": 1.860239677368774, - "learning_rate": 1.9719579395730164e-06, - "loss": 0.9187, - "step": 4319 - }, - { - "epoch": 0.5194492875608729, - "grad_norm": 2.3621257384616463, - "learning_rate": 1.9711790466448854e-06, - "loss": 1.1571, - "step": 4320 - }, - { - "epoch": 0.5195695304515121, - "grad_norm": 2.0814292853404956, - "learning_rate": 1.9704001580888704e-06, - "loss": 0.9406, - "step": 4321 - }, - { - "epoch": 0.5196897733421512, - "grad_norm": 1.9311974058427746, - "learning_rate": 1.9696212740231283e-06, - "loss": 1.1003, - "step": 4322 - }, - { - "epoch": 0.5198100162327902, - "grad_norm": 1.8651889975255802, - "learning_rate": 1.9688423945658146e-06, - "loss": 1.0521, - "step": 4323 - }, - { - "epoch": 0.5199302591234293, - "grad_norm": 1.975784691520641, - "learning_rate": 1.9680635198350845e-06, - "loss": 0.952, - "step": 4324 - }, - { - "epoch": 0.5200505020140684, - "grad_norm": 1.999423659252147, - "learning_rate": 1.967284649949093e-06, - "loss": 0.9557, - "step": 4325 - }, - { - "epoch": 0.5201707449047075, - "grad_norm": 1.7457556343845353, - "learning_rate": 1.966505785025994e-06, - "loss": 0.953, - "step": 4326 - }, - { - "epoch": 0.5202909877953465, - "grad_norm": 1.6933349466816783, - "learning_rate": 1.965726925183941e-06, - "loss": 1.0005, - "step": 4327 - }, - { - "epoch": 0.5204112306859857, - "grad_norm": 2.0431974731549176, - "learning_rate": 1.964948070541087e-06, - "loss": 1.0733, - "step": 4328 - }, - { - "epoch": 0.5205314735766248, - "grad_norm": 2.609586878414833, - "learning_rate": 1.9641692212155816e-06, - "loss": 0.908, - "step": 4329 - }, - { - "epoch": 0.5206517164672638, - "grad_norm": 2.5064710790676674, - "learning_rate": 1.9633903773255777e-06, - "loss": 0.9605, - "step": 4330 - }, - { - "epoch": 0.520771959357903, - "grad_norm": 1.6545192384405425, - "learning_rate": 1.9626115389892237e-06, - "loss": 0.9857, - "step": 4331 - }, - { - "epoch": 0.520892202248542, - "grad_norm": 1.9777860812156953, - "learning_rate": 1.96183270632467e-06, - "loss": 1.0849, - "step": 4332 - }, - { - "epoch": 0.5210124451391811, - "grad_norm": 1.714486330362217, - "learning_rate": 1.9610538794500644e-06, - "loss": 1.0145, - "step": 4333 - }, - { - "epoch": 0.5211326880298203, - "grad_norm": 0.8062362581013448, - "learning_rate": 1.9602750584835542e-06, - "loss": 0.864, - "step": 4334 - }, - { - "epoch": 0.5212529309204593, - "grad_norm": 2.447410585291738, - "learning_rate": 1.959496243543286e-06, - "loss": 1.0635, - "step": 4335 - }, - { - "epoch": 0.5213731738110984, - "grad_norm": 2.2175551944989813, - "learning_rate": 1.9587174347474057e-06, - "loss": 1.0242, - "step": 4336 - }, - { - "epoch": 0.5214934167017375, - "grad_norm": 2.1922778675815127, - "learning_rate": 1.9579386322140574e-06, - "loss": 1.041, - "step": 4337 - }, - { - "epoch": 0.5216136595923766, - "grad_norm": 1.6930607199057692, - "learning_rate": 1.9571598360613854e-06, - "loss": 1.0337, - "step": 4338 - }, - { - "epoch": 0.5217339024830157, - "grad_norm": 2.023879190789893, - "learning_rate": 1.956381046407532e-06, - "loss": 0.9281, - "step": 4339 - }, - { - "epoch": 0.5218541453736548, - "grad_norm": 1.758187758338966, - "learning_rate": 1.9556022633706394e-06, - "loss": 1.0878, - "step": 4340 - }, - { - "epoch": 0.5219743882642939, - "grad_norm": 1.638226890844559, - "learning_rate": 1.954823487068848e-06, - "loss": 1.0261, - "step": 4341 - }, - { - "epoch": 0.5220946311549329, - "grad_norm": 1.6324582970566288, - "learning_rate": 1.9540447176202976e-06, - "loss": 1.0379, - "step": 4342 - }, - { - "epoch": 0.5222148740455721, - "grad_norm": 0.8465552849108329, - "learning_rate": 1.9532659551431272e-06, - "loss": 0.8837, - "step": 4343 - }, - { - "epoch": 0.5223351169362112, - "grad_norm": 1.5616414572446222, - "learning_rate": 1.9524871997554744e-06, - "loss": 0.8938, - "step": 4344 - }, - { - "epoch": 0.5224553598268502, - "grad_norm": 2.333497677419556, - "learning_rate": 1.951708451575475e-06, - "loss": 1.0265, - "step": 4345 - }, - { - "epoch": 0.5225756027174894, - "grad_norm": 1.846532758317345, - "learning_rate": 1.9509297107212657e-06, - "loss": 1.0465, - "step": 4346 - }, - { - "epoch": 0.5226958456081284, - "grad_norm": 1.4815153182103926, - "learning_rate": 1.95015097731098e-06, - "loss": 1.022, - "step": 4347 - }, - { - "epoch": 0.5228160884987675, - "grad_norm": 2.3809904542785296, - "learning_rate": 1.949372251462751e-06, - "loss": 1.049, - "step": 4348 - }, - { - "epoch": 0.5229363313894067, - "grad_norm": 1.9146088112459705, - "learning_rate": 1.9485935332947124e-06, - "loss": 1.0543, - "step": 4349 - }, - { - "epoch": 0.5230565742800457, - "grad_norm": 2.513649111202287, - "learning_rate": 1.947814822924993e-06, - "loss": 1.0595, - "step": 4350 - }, - { - "epoch": 0.5231768171706848, - "grad_norm": 1.7453510659242037, - "learning_rate": 1.9470361204717236e-06, - "loss": 1.0556, - "step": 4351 - }, - { - "epoch": 0.5232970600613239, - "grad_norm": 1.5231940441738423, - "learning_rate": 1.9462574260530326e-06, - "loss": 1.0407, - "step": 4352 - }, - { - "epoch": 0.523417302951963, - "grad_norm": 2.203572329295549, - "learning_rate": 1.9454787397870472e-06, - "loss": 1.0387, - "step": 4353 - }, - { - "epoch": 0.523537545842602, - "grad_norm": 2.902976212317667, - "learning_rate": 1.944700061791894e-06, - "loss": 0.9479, - "step": 4354 - }, - { - "epoch": 0.5236577887332411, - "grad_norm": 7.039678921313496, - "learning_rate": 1.943921392185698e-06, - "loss": 0.8826, - "step": 4355 - }, - { - "epoch": 0.5237780316238803, - "grad_norm": 2.2724764650844667, - "learning_rate": 1.9431427310865814e-06, - "loss": 0.9989, - "step": 4356 - }, - { - "epoch": 0.5238982745145193, - "grad_norm": 2.1079565860034553, - "learning_rate": 1.942364078612667e-06, - "loss": 1.0259, - "step": 4357 - }, - { - "epoch": 0.5240185174051584, - "grad_norm": 2.4624951473904737, - "learning_rate": 1.9415854348820765e-06, - "loss": 0.9795, - "step": 4358 - }, - { - "epoch": 0.5241387602957975, - "grad_norm": 2.8577547746421046, - "learning_rate": 1.940806800012929e-06, - "loss": 0.9014, - "step": 4359 - }, - { - "epoch": 0.5242590031864366, - "grad_norm": 1.6263432452997069, - "learning_rate": 1.9400281741233432e-06, - "loss": 0.8693, - "step": 4360 - }, - { - "epoch": 0.5243792460770756, - "grad_norm": 0.6976246911030947, - "learning_rate": 1.939249557331435e-06, - "loss": 0.7899, - "step": 4361 - }, - { - "epoch": 0.5244994889677148, - "grad_norm": 2.6072655105853695, - "learning_rate": 1.938470949755321e-06, - "loss": 0.9562, - "step": 4362 - }, - { - "epoch": 0.5246197318583539, - "grad_norm": 0.9028041045107853, - "learning_rate": 1.937692351513115e-06, - "loss": 0.8536, - "step": 4363 - }, - { - "epoch": 0.5247399747489929, - "grad_norm": 1.6479834342512822, - "learning_rate": 1.9369137627229297e-06, - "loss": 1.0305, - "step": 4364 - }, - { - "epoch": 0.5248602176396321, - "grad_norm": 1.9839256558832792, - "learning_rate": 1.936135183502877e-06, - "loss": 1.1107, - "step": 4365 - }, - { - "epoch": 0.5249804605302711, - "grad_norm": 2.108248148266515, - "learning_rate": 1.935356613971066e-06, - "loss": 1.0353, - "step": 4366 - }, - { - "epoch": 0.5251007034209102, - "grad_norm": 1.6594063534980403, - "learning_rate": 1.9345780542456047e-06, - "loss": 0.9987, - "step": 4367 - }, - { - "epoch": 0.5252209463115494, - "grad_norm": 2.171776090057504, - "learning_rate": 1.9337995044446007e-06, - "loss": 0.9516, - "step": 4368 - }, - { - "epoch": 0.5253411892021884, - "grad_norm": 4.227702270938579, - "learning_rate": 1.9330209646861596e-06, - "loss": 1.029, - "step": 4369 - }, - { - "epoch": 0.5254614320928275, - "grad_norm": 1.7231617572947058, - "learning_rate": 1.9322424350883843e-06, - "loss": 1.0081, - "step": 4370 - }, - { - "epoch": 0.5255816749834666, - "grad_norm": 1.6947617581133065, - "learning_rate": 1.931463915769379e-06, - "loss": 1.0087, - "step": 4371 - }, - { - "epoch": 0.5257019178741057, - "grad_norm": 3.293749299311699, - "learning_rate": 1.930685406847242e-06, - "loss": 0.9659, - "step": 4372 - }, - { - "epoch": 0.5258221607647448, - "grad_norm": 1.356202644189401, - "learning_rate": 1.9299069084400734e-06, - "loss": 1.0497, - "step": 4373 - }, - { - "epoch": 0.5259424036553839, - "grad_norm": 1.7344827704410992, - "learning_rate": 1.9291284206659717e-06, - "loss": 0.92, - "step": 4374 - }, - { - "epoch": 0.526062646546023, - "grad_norm": 1.8961425221712598, - "learning_rate": 1.928349943643032e-06, - "loss": 0.9453, - "step": 4375 - }, - { - "epoch": 0.526182889436662, - "grad_norm": 2.2705297587985056, - "learning_rate": 1.9275714774893493e-06, - "loss": 1.0557, - "step": 4376 - }, - { - "epoch": 0.5263031323273012, - "grad_norm": 2.709717665741848, - "learning_rate": 1.9267930223230154e-06, - "loss": 0.9597, - "step": 4377 - }, - { - "epoch": 0.5264233752179402, - "grad_norm": 1.9967455611565874, - "learning_rate": 1.9260145782621224e-06, - "loss": 1.0126, - "step": 4378 - }, - { - "epoch": 0.5265436181085793, - "grad_norm": 1.9204914105895379, - "learning_rate": 1.925236145424758e-06, - "loss": 1.1029, - "step": 4379 - }, - { - "epoch": 0.5266638609992185, - "grad_norm": 1.3786308294483098, - "learning_rate": 1.924457723929012e-06, - "loss": 0.8335, - "step": 4380 - }, - { - "epoch": 0.5267841038898575, - "grad_norm": 1.5295024943086675, - "learning_rate": 1.9236793138929685e-06, - "loss": 1.0532, - "step": 4381 - }, - { - "epoch": 0.5269043467804966, - "grad_norm": 2.193330895365526, - "learning_rate": 1.9229009154347133e-06, - "loss": 1.0409, - "step": 4382 - }, - { - "epoch": 0.5270245896711357, - "grad_norm": 1.982778793300973, - "learning_rate": 1.922122528672327e-06, - "loss": 1.0361, - "step": 4383 - }, - { - "epoch": 0.5271448325617748, - "grad_norm": 2.276511719189027, - "learning_rate": 1.9213441537238914e-06, - "loss": 1.0131, - "step": 4384 - }, - { - "epoch": 0.5272650754524139, - "grad_norm": 0.9647891689414555, - "learning_rate": 1.920565790707485e-06, - "loss": 0.885, - "step": 4385 - }, - { - "epoch": 0.527385318343053, - "grad_norm": 2.365807159142881, - "learning_rate": 1.9197874397411853e-06, - "loss": 0.8938, - "step": 4386 - }, - { - "epoch": 0.5275055612336921, - "grad_norm": 3.199859294182086, - "learning_rate": 1.919009100943067e-06, - "loss": 0.8919, - "step": 4387 - }, - { - "epoch": 0.5276258041243311, - "grad_norm": 1.8018297961569087, - "learning_rate": 1.9182307744312043e-06, - "loss": 0.8839, - "step": 4388 - }, - { - "epoch": 0.5277460470149702, - "grad_norm": 2.4273546801205272, - "learning_rate": 1.9174524603236676e-06, - "loss": 0.9945, - "step": 4389 - }, - { - "epoch": 0.5278662899056094, - "grad_norm": 1.9707151021300435, - "learning_rate": 1.916674158738527e-06, - "loss": 0.9935, - "step": 4390 - }, - { - "epoch": 0.5279865327962484, - "grad_norm": 2.773337091052126, - "learning_rate": 1.9158958697938506e-06, - "loss": 0.8282, - "step": 4391 - }, - { - "epoch": 0.5281067756868875, - "grad_norm": 2.4006436915267106, - "learning_rate": 1.9151175936077032e-06, - "loss": 1.0911, - "step": 4392 - }, - { - "epoch": 0.5282270185775266, - "grad_norm": 1.9601595729953274, - "learning_rate": 1.9143393302981507e-06, - "loss": 1.0234, - "step": 4393 - }, - { - "epoch": 0.5283472614681657, - "grad_norm": 1.6554610334561264, - "learning_rate": 1.913561079983252e-06, - "loss": 1.0584, - "step": 4394 - }, - { - "epoch": 0.5284675043588047, - "grad_norm": 1.9504251359009899, - "learning_rate": 1.9127828427810693e-06, - "loss": 0.9791, - "step": 4395 - }, - { - "epoch": 0.5285877472494439, - "grad_norm": 1.825572652983721, - "learning_rate": 1.9120046188096607e-06, - "loss": 1.0371, - "step": 4396 - }, - { - "epoch": 0.528707990140083, - "grad_norm": 1.8301457379369062, - "learning_rate": 1.9112264081870804e-06, - "loss": 0.9766, - "step": 4397 - }, - { - "epoch": 0.528828233030722, - "grad_norm": 1.946448243517976, - "learning_rate": 1.9104482110313843e-06, - "loss": 0.9855, - "step": 4398 - }, - { - "epoch": 0.5289484759213612, - "grad_norm": 1.6890354941270898, - "learning_rate": 1.909670027460623e-06, - "loss": 0.974, - "step": 4399 - }, - { - "epoch": 0.5290687188120002, - "grad_norm": 1.8635443095882718, - "learning_rate": 1.908891857592847e-06, - "loss": 0.9455, - "step": 4400 - }, - { - "epoch": 0.5291889617026393, - "grad_norm": 2.1383792206659074, - "learning_rate": 1.9081137015461034e-06, - "loss": 1.1203, - "step": 4401 - }, - { - "epoch": 0.5293092045932785, - "grad_norm": 1.997060761743666, - "learning_rate": 1.9073355594384383e-06, - "loss": 1.1307, - "step": 4402 - }, - { - "epoch": 0.5294294474839175, - "grad_norm": 2.5500151577667314, - "learning_rate": 1.906557431387895e-06, - "loss": 1.0367, - "step": 4403 - }, - { - "epoch": 0.5295496903745566, - "grad_norm": 2.143850194074336, - "learning_rate": 1.905779317512516e-06, - "loss": 1.0139, - "step": 4404 - }, - { - "epoch": 0.5296699332651957, - "grad_norm": 2.6630965016865087, - "learning_rate": 1.9050012179303385e-06, - "loss": 1.0319, - "step": 4405 - }, - { - "epoch": 0.5297901761558348, - "grad_norm": 2.412523291282686, - "learning_rate": 1.904223132759401e-06, - "loss": 0.9144, - "step": 4406 - }, - { - "epoch": 0.5299104190464738, - "grad_norm": 2.165517600122527, - "learning_rate": 1.9034450621177383e-06, - "loss": 0.9171, - "step": 4407 - }, - { - "epoch": 0.530030661937113, - "grad_norm": 4.853050507454592, - "learning_rate": 1.9026670061233824e-06, - "loss": 0.9336, - "step": 4408 - }, - { - "epoch": 0.5301509048277521, - "grad_norm": 1.6663027445521643, - "learning_rate": 1.901888964894365e-06, - "loss": 1.0361, - "step": 4409 - }, - { - "epoch": 0.5302711477183911, - "grad_norm": 2.349041268406383, - "learning_rate": 1.9011109385487134e-06, - "loss": 0.9058, - "step": 4410 - }, - { - "epoch": 0.5303913906090303, - "grad_norm": 4.851990611772504, - "learning_rate": 1.900332927204454e-06, - "loss": 0.8846, - "step": 4411 - }, - { - "epoch": 0.5305116334996693, - "grad_norm": 1.79743272170006, - "learning_rate": 1.8995549309796097e-06, - "loss": 0.9983, - "step": 4412 - }, - { - "epoch": 0.5306318763903084, - "grad_norm": 1.9623416569483947, - "learning_rate": 1.8987769499922028e-06, - "loss": 0.9898, - "step": 4413 - }, - { - "epoch": 0.5307521192809476, - "grad_norm": 2.1350103100208355, - "learning_rate": 1.897998984360252e-06, - "loss": 0.9387, - "step": 4414 - }, - { - "epoch": 0.5308723621715866, - "grad_norm": 1.548990363803391, - "learning_rate": 1.897221034201775e-06, - "loss": 1.0095, - "step": 4415 - }, - { - "epoch": 0.5309926050622257, - "grad_norm": 1.547609097235694, - "learning_rate": 1.8964430996347842e-06, - "loss": 0.898, - "step": 4416 - }, - { - "epoch": 0.5311128479528648, - "grad_norm": 1.667356713814502, - "learning_rate": 1.8956651807772931e-06, - "loss": 1.0515, - "step": 4417 - }, - { - "epoch": 0.5312330908435039, - "grad_norm": 1.6246757917501418, - "learning_rate": 1.8948872777473115e-06, - "loss": 1.0644, - "step": 4418 - }, - { - "epoch": 0.531353333734143, - "grad_norm": 1.6900450270483707, - "learning_rate": 1.8941093906628458e-06, - "loss": 0.8645, - "step": 4419 - }, - { - "epoch": 0.531473576624782, - "grad_norm": 1.6891774917403721, - "learning_rate": 1.893331519641902e-06, - "loss": 0.9434, - "step": 4420 - }, - { - "epoch": 0.5315938195154212, - "grad_norm": 2.7812078237014517, - "learning_rate": 1.8925536648024815e-06, - "loss": 0.9723, - "step": 4421 - }, - { - "epoch": 0.5317140624060602, - "grad_norm": 1.753779009513167, - "learning_rate": 1.8917758262625849e-06, - "loss": 0.9876, - "step": 4422 - }, - { - "epoch": 0.5318343052966993, - "grad_norm": 1.8166384312713204, - "learning_rate": 1.8909980041402089e-06, - "loss": 1.0401, - "step": 4423 - }, - { - "epoch": 0.5319545481873384, - "grad_norm": 2.822694908149768, - "learning_rate": 1.8902201985533494e-06, - "loss": 0.8878, - "step": 4424 - }, - { - "epoch": 0.5320747910779775, - "grad_norm": 1.6401458040413373, - "learning_rate": 1.8894424096199983e-06, - "loss": 0.9837, - "step": 4425 - }, - { - "epoch": 0.5321950339686166, - "grad_norm": 2.217132838666189, - "learning_rate": 1.8886646374581463e-06, - "loss": 1.0912, - "step": 4426 - }, - { - "epoch": 0.5323152768592557, - "grad_norm": 1.578203256007587, - "learning_rate": 1.8878868821857795e-06, - "loss": 0.9378, - "step": 4427 - }, - { - "epoch": 0.5324355197498948, - "grad_norm": 2.3379741453008966, - "learning_rate": 1.8871091439208838e-06, - "loss": 0.9734, - "step": 4428 - }, - { - "epoch": 0.5325557626405338, - "grad_norm": 3.141144059592427, - "learning_rate": 1.8863314227814414e-06, - "loss": 1.0054, - "step": 4429 - }, - { - "epoch": 0.532676005531173, - "grad_norm": 5.162225024125453, - "learning_rate": 1.8855537188854313e-06, - "loss": 0.7079, - "step": 4430 - }, - { - "epoch": 0.5327962484218121, - "grad_norm": 1.8185358504756874, - "learning_rate": 1.8847760323508315e-06, - "loss": 1.0107, - "step": 4431 - }, - { - "epoch": 0.5329164913124511, - "grad_norm": 1.7283077481373879, - "learning_rate": 1.883998363295616e-06, - "loss": 0.9871, - "step": 4432 - }, - { - "epoch": 0.5330367342030903, - "grad_norm": 0.9531539850048925, - "learning_rate": 1.8832207118377565e-06, - "loss": 0.9173, - "step": 4433 - }, - { - "epoch": 0.5331569770937293, - "grad_norm": 1.9652160534490881, - "learning_rate": 1.882443078095222e-06, - "loss": 0.9291, - "step": 4434 - }, - { - "epoch": 0.5332772199843684, - "grad_norm": 0.8690461525310577, - "learning_rate": 1.8816654621859794e-06, - "loss": 0.9211, - "step": 4435 - }, - { - "epoch": 0.5333974628750076, - "grad_norm": 4.759314790020493, - "learning_rate": 1.8808878642279915e-06, - "loss": 0.9511, - "step": 4436 - }, - { - "epoch": 0.5335177057656466, - "grad_norm": 2.3521059745282265, - "learning_rate": 1.8801102843392209e-06, - "loss": 0.8892, - "step": 4437 - }, - { - "epoch": 0.5336379486562857, - "grad_norm": 1.518348684781154, - "learning_rate": 1.8793327226376238e-06, - "loss": 1.0859, - "step": 4438 - }, - { - "epoch": 0.5337581915469248, - "grad_norm": 1.8076160004301207, - "learning_rate": 1.8785551792411569e-06, - "loss": 1.0284, - "step": 4439 - }, - { - "epoch": 0.5338784344375639, - "grad_norm": 1.870150270493113, - "learning_rate": 1.8777776542677733e-06, - "loss": 1.0564, - "step": 4440 - }, - { - "epoch": 0.5339986773282029, - "grad_norm": 1.924233392930005, - "learning_rate": 1.8770001478354216e-06, - "loss": 0.9486, - "step": 4441 - }, - { - "epoch": 0.5341189202188421, - "grad_norm": 1.9945466079449357, - "learning_rate": 1.8762226600620504e-06, - "loss": 1.0632, - "step": 4442 - }, - { - "epoch": 0.5342391631094812, - "grad_norm": 2.899338409988942, - "learning_rate": 1.8754451910656031e-06, - "loss": 0.8243, - "step": 4443 - }, - { - "epoch": 0.5343594060001202, - "grad_norm": 1.869002732758134, - "learning_rate": 1.8746677409640212e-06, - "loss": 1.0548, - "step": 4444 - }, - { - "epoch": 0.5344796488907594, - "grad_norm": 1.646432099694679, - "learning_rate": 1.8738903098752432e-06, - "loss": 1.0702, - "step": 4445 - }, - { - "epoch": 0.5345998917813984, - "grad_norm": 2.025170839426522, - "learning_rate": 1.8731128979172052e-06, - "loss": 0.9686, - "step": 4446 - }, - { - "epoch": 0.5347201346720375, - "grad_norm": 2.0264574292782473, - "learning_rate": 1.8723355052078394e-06, - "loss": 0.8907, - "step": 4447 - }, - { - "epoch": 0.5348403775626767, - "grad_norm": 1.9605927183237637, - "learning_rate": 1.8715581318650765e-06, - "loss": 1.0038, - "step": 4448 - }, - { - "epoch": 0.5349606204533157, - "grad_norm": 2.321511623447954, - "learning_rate": 1.8707807780068422e-06, - "loss": 1.0519, - "step": 4449 - }, - { - "epoch": 0.5350808633439548, - "grad_norm": 2.380664895730636, - "learning_rate": 1.8700034437510611e-06, - "loss": 0.89, - "step": 4450 - }, - { - "epoch": 0.5352011062345938, - "grad_norm": 2.672394372977705, - "learning_rate": 1.8692261292156549e-06, - "loss": 1.0355, - "step": 4451 - }, - { - "epoch": 0.535321349125233, - "grad_norm": 2.028899138228475, - "learning_rate": 1.8684488345185401e-06, - "loss": 1.0452, - "step": 4452 - }, - { - "epoch": 0.535441592015872, - "grad_norm": 2.4867235515043578, - "learning_rate": 1.8676715597776332e-06, - "loss": 1.0242, - "step": 4453 - }, - { - "epoch": 0.5355618349065111, - "grad_norm": 1.6718762121463553, - "learning_rate": 1.8668943051108455e-06, - "loss": 0.9867, - "step": 4454 - }, - { - "epoch": 0.5356820777971503, - "grad_norm": 2.2852413846320823, - "learning_rate": 1.8661170706360856e-06, - "loss": 0.992, - "step": 4455 - }, - { - "epoch": 0.5358023206877893, - "grad_norm": 1.54356268545572, - "learning_rate": 1.8653398564712594e-06, - "loss": 1.0478, - "step": 4456 - }, - { - "epoch": 0.5359225635784284, - "grad_norm": 1.650859481755918, - "learning_rate": 1.8645626627342704e-06, - "loss": 1.0487, - "step": 4457 - }, - { - "epoch": 0.5360428064690675, - "grad_norm": 3.0612036627755157, - "learning_rate": 1.8637854895430172e-06, - "loss": 1.0342, - "step": 4458 - }, - { - "epoch": 0.5361630493597066, - "grad_norm": 2.693430190784041, - "learning_rate": 1.8630083370153978e-06, - "loss": 0.919, - "step": 4459 - }, - { - "epoch": 0.5362832922503457, - "grad_norm": 0.7882979815610048, - "learning_rate": 1.8622312052693041e-06, - "loss": 0.821, - "step": 4460 - }, - { - "epoch": 0.5364035351409848, - "grad_norm": 3.048430373488034, - "learning_rate": 1.8614540944226267e-06, - "loss": 0.951, - "step": 4461 - }, - { - "epoch": 0.5365237780316239, - "grad_norm": 2.0363592593653816, - "learning_rate": 1.8606770045932537e-06, - "loss": 0.9124, - "step": 4462 - }, - { - "epoch": 0.5366440209222629, - "grad_norm": 2.28718557646194, - "learning_rate": 1.859899935899068e-06, - "loss": 1.0475, - "step": 4463 - }, - { - "epoch": 0.5367642638129021, - "grad_norm": 1.6009408628274662, - "learning_rate": 1.8591228884579506e-06, - "loss": 1.0209, - "step": 4464 - }, - { - "epoch": 0.5368845067035412, - "grad_norm": 1.9244838468488705, - "learning_rate": 1.8583458623877795e-06, - "loss": 1.0442, - "step": 4465 - }, - { - "epoch": 0.5370047495941802, - "grad_norm": 2.127054007163346, - "learning_rate": 1.8575688578064281e-06, - "loss": 0.9724, - "step": 4466 - }, - { - "epoch": 0.5371249924848194, - "grad_norm": 2.4643254286762555, - "learning_rate": 1.8567918748317674e-06, - "loss": 0.9901, - "step": 4467 - }, - { - "epoch": 0.5372452353754584, - "grad_norm": 1.9145764964759582, - "learning_rate": 1.8560149135816659e-06, - "loss": 1.0527, - "step": 4468 - }, - { - "epoch": 0.5373654782660975, - "grad_norm": 2.72967910808983, - "learning_rate": 1.8552379741739873e-06, - "loss": 1.0767, - "step": 4469 - }, - { - "epoch": 0.5374857211567367, - "grad_norm": 0.9035200898175189, - "learning_rate": 1.8544610567265935e-06, - "loss": 0.8128, - "step": 4470 - }, - { - "epoch": 0.5376059640473757, - "grad_norm": 2.140125522675625, - "learning_rate": 1.853684161357341e-06, - "loss": 1.0658, - "step": 4471 - }, - { - "epoch": 0.5377262069380148, - "grad_norm": 2.0161264668109937, - "learning_rate": 1.852907288184085e-06, - "loss": 1.0043, - "step": 4472 - }, - { - "epoch": 0.5378464498286539, - "grad_norm": 2.5768142604955564, - "learning_rate": 1.8521304373246762e-06, - "loss": 0.9341, - "step": 4473 - }, - { - "epoch": 0.537966692719293, - "grad_norm": 2.5955890432393716, - "learning_rate": 1.8513536088969626e-06, - "loss": 1.1178, - "step": 4474 - }, - { - "epoch": 0.538086935609932, - "grad_norm": 2.3655637705449095, - "learning_rate": 1.8505768030187884e-06, - "loss": 1.0268, - "step": 4475 - }, - { - "epoch": 0.5382071785005712, - "grad_norm": 1.5629703576711684, - "learning_rate": 1.849800019807995e-06, - "loss": 1.029, - "step": 4476 - }, - { - "epoch": 0.5383274213912103, - "grad_norm": 1.9905131765244255, - "learning_rate": 1.8490232593824186e-06, - "loss": 0.9401, - "step": 4477 - }, - { - "epoch": 0.5384476642818493, - "grad_norm": 1.722887918892838, - "learning_rate": 1.8482465218598935e-06, - "loss": 1.0716, - "step": 4478 - }, - { - "epoch": 0.5385679071724885, - "grad_norm": 4.491904712566524, - "learning_rate": 1.8474698073582508e-06, - "loss": 1.0655, - "step": 4479 - }, - { - "epoch": 0.5386881500631275, - "grad_norm": 1.9433677174758122, - "learning_rate": 1.8466931159953166e-06, - "loss": 1.0991, - "step": 4480 - }, - { - "epoch": 0.5388083929537666, - "grad_norm": 1.8519606021455786, - "learning_rate": 1.8459164478889158e-06, - "loss": 1.0722, - "step": 4481 - }, - { - "epoch": 0.5389286358444056, - "grad_norm": 1.5446188300332817, - "learning_rate": 1.8451398031568663e-06, - "loss": 0.9865, - "step": 4482 - }, - { - "epoch": 0.5390488787350448, - "grad_norm": 1.7480278482946707, - "learning_rate": 1.844363181916986e-06, - "loss": 0.9753, - "step": 4483 - }, - { - "epoch": 0.5391691216256839, - "grad_norm": 1.885787576122263, - "learning_rate": 1.8435865842870868e-06, - "loss": 1.0612, - "step": 4484 - }, - { - "epoch": 0.5392893645163229, - "grad_norm": 2.2599485648049527, - "learning_rate": 1.8428100103849787e-06, - "loss": 0.9515, - "step": 4485 - }, - { - "epoch": 0.5394096074069621, - "grad_norm": 2.8384159911912654, - "learning_rate": 1.842033460328467e-06, - "loss": 0.9506, - "step": 4486 - }, - { - "epoch": 0.5395298502976011, - "grad_norm": 1.5781691139023077, - "learning_rate": 1.8412569342353541e-06, - "loss": 0.9828, - "step": 4487 - }, - { - "epoch": 0.5396500931882402, - "grad_norm": 2.755510098542142, - "learning_rate": 1.840480432223438e-06, - "loss": 1.0746, - "step": 4488 - }, - { - "epoch": 0.5397703360788794, - "grad_norm": 2.256980346654965, - "learning_rate": 1.8397039544105131e-06, - "loss": 1.0033, - "step": 4489 - }, - { - "epoch": 0.5398905789695184, - "grad_norm": 1.713191946531503, - "learning_rate": 1.8389275009143711e-06, - "loss": 0.9305, - "step": 4490 - }, - { - "epoch": 0.5400108218601575, - "grad_norm": 1.950461929579507, - "learning_rate": 1.8381510718527988e-06, - "loss": 0.9653, - "step": 4491 - }, - { - "epoch": 0.5401310647507966, - "grad_norm": 1.719301516738584, - "learning_rate": 1.8373746673435812e-06, - "loss": 0.8623, - "step": 4492 - }, - { - "epoch": 0.5402513076414357, - "grad_norm": 2.4127766663027637, - "learning_rate": 1.8365982875044964e-06, - "loss": 1.0127, - "step": 4493 - }, - { - "epoch": 0.5403715505320748, - "grad_norm": 2.224580545213487, - "learning_rate": 1.8358219324533217e-06, - "loss": 0.9838, - "step": 4494 - }, - { - "epoch": 0.5404917934227139, - "grad_norm": 1.616909279594474, - "learning_rate": 1.8350456023078292e-06, - "loss": 0.9243, - "step": 4495 - }, - { - "epoch": 0.540612036313353, - "grad_norm": 2.605782051065716, - "learning_rate": 1.8342692971857874e-06, - "loss": 0.9998, - "step": 4496 - }, - { - "epoch": 0.540732279203992, - "grad_norm": 2.55697175075306, - "learning_rate": 1.833493017204962e-06, - "loss": 0.9439, - "step": 4497 - }, - { - "epoch": 0.5408525220946312, - "grad_norm": 2.5387306348624623, - "learning_rate": 1.8327167624831134e-06, - "loss": 0.9981, - "step": 4498 - }, - { - "epoch": 0.5409727649852702, - "grad_norm": 28.316698045209893, - "learning_rate": 1.831940533137999e-06, - "loss": 0.9343, - "step": 4499 - }, - { - "epoch": 0.5410930078759093, - "grad_norm": 1.771737245725055, - "learning_rate": 1.8311643292873718e-06, - "loss": 0.9474, - "step": 4500 - }, - { - "epoch": 0.5412132507665485, - "grad_norm": 2.6654051141059676, - "learning_rate": 1.8303881510489818e-06, - "loss": 1.1126, - "step": 4501 - }, - { - "epoch": 0.5413334936571875, - "grad_norm": 1.6766283432971187, - "learning_rate": 1.829611998540574e-06, - "loss": 0.9238, - "step": 4502 - }, - { - "epoch": 0.5414537365478266, - "grad_norm": 3.0518301553925444, - "learning_rate": 1.8288358718798914e-06, - "loss": 1.0341, - "step": 4503 - }, - { - "epoch": 0.5415739794384657, - "grad_norm": 1.9159700814063456, - "learning_rate": 1.8280597711846703e-06, - "loss": 0.9553, - "step": 4504 - }, - { - "epoch": 0.5416942223291048, - "grad_norm": 2.777300570081223, - "learning_rate": 1.8272836965726455e-06, - "loss": 1.0636, - "step": 4505 - }, - { - "epoch": 0.5418144652197439, - "grad_norm": 1.8868912959524426, - "learning_rate": 1.8265076481615461e-06, - "loss": 1.0123, - "step": 4506 - }, - { - "epoch": 0.541934708110383, - "grad_norm": 2.128752144651738, - "learning_rate": 1.8257316260690987e-06, - "loss": 1.0991, - "step": 4507 - }, - { - "epoch": 0.5420549510010221, - "grad_norm": 1.6766968117816996, - "learning_rate": 1.8249556304130254e-06, - "loss": 0.9937, - "step": 4508 - }, - { - "epoch": 0.5421751938916611, - "grad_norm": 2.018126008053289, - "learning_rate": 1.824179661311044e-06, - "loss": 0.9132, - "step": 4509 - }, - { - "epoch": 0.5422954367823003, - "grad_norm": 1.8586589171637093, - "learning_rate": 1.823403718880868e-06, - "loss": 1.0183, - "step": 4510 - }, - { - "epoch": 0.5424156796729394, - "grad_norm": 1.7136196903294256, - "learning_rate": 1.822627803240207e-06, - "loss": 0.8968, - "step": 4511 - }, - { - "epoch": 0.5425359225635784, - "grad_norm": 1.9283930958438917, - "learning_rate": 1.8218519145067675e-06, - "loss": 1.0773, - "step": 4512 - }, - { - "epoch": 0.5426561654542175, - "grad_norm": 3.5961961465197034, - "learning_rate": 1.8210760527982508e-06, - "loss": 1.1248, - "step": 4513 - }, - { - "epoch": 0.5427764083448566, - "grad_norm": 2.0127141471325793, - "learning_rate": 1.8203002182323552e-06, - "loss": 0.9734, - "step": 4514 - }, - { - "epoch": 0.5428966512354957, - "grad_norm": 1.692219386764581, - "learning_rate": 1.819524410926773e-06, - "loss": 0.9845, - "step": 4515 - }, - { - "epoch": 0.5430168941261347, - "grad_norm": 1.5434116934924171, - "learning_rate": 1.8187486309991944e-06, - "loss": 1.0006, - "step": 4516 - }, - { - "epoch": 0.5431371370167739, - "grad_norm": 1.639733062977711, - "learning_rate": 1.817972878567304e-06, - "loss": 0.9978, - "step": 4517 - }, - { - "epoch": 0.543257379907413, - "grad_norm": 1.7731382142664711, - "learning_rate": 1.8171971537487834e-06, - "loss": 0.9925, - "step": 4518 - }, - { - "epoch": 0.543377622798052, - "grad_norm": 1.7761726964978444, - "learning_rate": 1.8164214566613093e-06, - "loss": 1.0275, - "step": 4519 - }, - { - "epoch": 0.5434978656886912, - "grad_norm": 2.571237136890243, - "learning_rate": 1.8156457874225547e-06, - "loss": 0.8795, - "step": 4520 - }, - { - "epoch": 0.5436181085793302, - "grad_norm": 1.922687637835524, - "learning_rate": 1.814870146150187e-06, - "loss": 1.0372, - "step": 4521 - }, - { - "epoch": 0.5437383514699693, - "grad_norm": 2.59524584919449, - "learning_rate": 1.814094532961871e-06, - "loss": 1.0255, - "step": 4522 - }, - { - "epoch": 0.5438585943606085, - "grad_norm": 2.0454985671652635, - "learning_rate": 1.8133189479752666e-06, - "loss": 1.064, - "step": 4523 - }, - { - "epoch": 0.5439788372512475, - "grad_norm": 1.9394485765540195, - "learning_rate": 1.8125433913080292e-06, - "loss": 1.0457, - "step": 4524 - }, - { - "epoch": 0.5440990801418866, - "grad_norm": 2.048399728620288, - "learning_rate": 1.811767863077811e-06, - "loss": 1.0578, - "step": 4525 - }, - { - "epoch": 0.5442193230325257, - "grad_norm": 2.0609099298792044, - "learning_rate": 1.8109923634022577e-06, - "loss": 1.0114, - "step": 4526 - }, - { - "epoch": 0.5443395659231648, - "grad_norm": 1.8222719051338727, - "learning_rate": 1.8102168923990128e-06, - "loss": 1.0899, - "step": 4527 - }, - { - "epoch": 0.5444598088138038, - "grad_norm": 1.9292749743889772, - "learning_rate": 1.809441450185714e-06, - "loss": 1.0266, - "step": 4528 - }, - { - "epoch": 0.544580051704443, - "grad_norm": 1.923261498983508, - "learning_rate": 1.8086660368799958e-06, - "loss": 0.9655, - "step": 4529 - }, - { - "epoch": 0.5447002945950821, - "grad_norm": 4.545100858973022, - "learning_rate": 1.807890652599488e-06, - "loss": 0.9989, - "step": 4530 - }, - { - "epoch": 0.5448205374857211, - "grad_norm": 1.7997854793722037, - "learning_rate": 1.8071152974618156e-06, - "loss": 1.0489, - "step": 4531 - }, - { - "epoch": 0.5449407803763603, - "grad_norm": 2.4523822607618024, - "learning_rate": 1.806339971584599e-06, - "loss": 1.0138, - "step": 4532 - }, - { - "epoch": 0.5450610232669993, - "grad_norm": 1.5751584563700092, - "learning_rate": 1.8055646750854546e-06, - "loss": 1.0842, - "step": 4533 - }, - { - "epoch": 0.5451812661576384, - "grad_norm": 2.5692429563777117, - "learning_rate": 1.8047894080819945e-06, - "loss": 1.0407, - "step": 4534 - }, - { - "epoch": 0.5453015090482776, - "grad_norm": 0.9345614154834637, - "learning_rate": 1.8040141706918258e-06, - "loss": 0.8967, - "step": 4535 - }, - { - "epoch": 0.5454217519389166, - "grad_norm": 1.7126745872883615, - "learning_rate": 1.8032389630325525e-06, - "loss": 0.9945, - "step": 4536 - }, - { - "epoch": 0.5455419948295557, - "grad_norm": 1.6235068870894136, - "learning_rate": 1.8024637852217707e-06, - "loss": 0.9841, - "step": 4537 - }, - { - "epoch": 0.5456622377201948, - "grad_norm": 1.9882268092528423, - "learning_rate": 1.8016886373770766e-06, - "loss": 1.0727, - "step": 4538 - }, - { - "epoch": 0.5457824806108339, - "grad_norm": 2.2026859143496083, - "learning_rate": 1.8009135196160579e-06, - "loss": 1.0159, - "step": 4539 - }, - { - "epoch": 0.545902723501473, - "grad_norm": 1.5712505973626627, - "learning_rate": 1.8001384320563e-06, - "loss": 1.069, - "step": 4540 - }, - { - "epoch": 0.5460229663921121, - "grad_norm": 1.038251804367691, - "learning_rate": 1.7993633748153833e-06, - "loss": 0.8416, - "step": 4541 - }, - { - "epoch": 0.5461432092827512, - "grad_norm": 2.563574867906736, - "learning_rate": 1.7985883480108834e-06, - "loss": 0.957, - "step": 4542 - }, - { - "epoch": 0.5462634521733902, - "grad_norm": 2.4928392855441457, - "learning_rate": 1.797813351760371e-06, - "loss": 0.9509, - "step": 4543 - }, - { - "epoch": 0.5463836950640293, - "grad_norm": 2.307142082055631, - "learning_rate": 1.7970383861814116e-06, - "loss": 1.016, - "step": 4544 - }, - { - "epoch": 0.5465039379546685, - "grad_norm": 1.774077984606395, - "learning_rate": 1.7962634513915684e-06, - "loss": 0.9703, - "step": 4545 - }, - { - "epoch": 0.5466241808453075, - "grad_norm": 2.0782349062660264, - "learning_rate": 1.7954885475083969e-06, - "loss": 1.023, - "step": 4546 - }, - { - "epoch": 0.5467444237359466, - "grad_norm": 1.9929609405472237, - "learning_rate": 1.7947136746494513e-06, - "loss": 0.9625, - "step": 4547 - }, - { - "epoch": 0.5468646666265857, - "grad_norm": 1.8001894029998549, - "learning_rate": 1.793938832932277e-06, - "loss": 1.1062, - "step": 4548 - }, - { - "epoch": 0.5469849095172248, - "grad_norm": 1.7170774471432309, - "learning_rate": 1.7931640224744185e-06, - "loss": 0.9243, - "step": 4549 - }, - { - "epoch": 0.5471051524078638, - "grad_norm": 1.5702628512148906, - "learning_rate": 1.7923892433934127e-06, - "loss": 0.9672, - "step": 4550 - }, - { - "epoch": 0.547225395298503, - "grad_norm": 8.457413747123825, - "learning_rate": 1.7916144958067939e-06, - "loss": 1.0137, - "step": 4551 - }, - { - "epoch": 0.5473456381891421, - "grad_norm": 1.611380725348841, - "learning_rate": 1.7908397798320905e-06, - "loss": 1.016, - "step": 4552 - }, - { - "epoch": 0.5474658810797811, - "grad_norm": 1.6913363329948878, - "learning_rate": 1.7900650955868265e-06, - "loss": 0.9722, - "step": 4553 - }, - { - "epoch": 0.5475861239704203, - "grad_norm": 1.4202258428779708, - "learning_rate": 1.7892904431885202e-06, - "loss": 0.9973, - "step": 4554 - }, - { - "epoch": 0.5477063668610593, - "grad_norm": 1.8184702969491227, - "learning_rate": 1.788515822754686e-06, - "loss": 0.9885, - "step": 4555 - }, - { - "epoch": 0.5478266097516984, - "grad_norm": 9.249130208129497, - "learning_rate": 1.7877412344028335e-06, - "loss": 1.0152, - "step": 4556 - }, - { - "epoch": 0.5479468526423376, - "grad_norm": 2.0766543584043506, - "learning_rate": 1.7869666782504668e-06, - "loss": 1.0086, - "step": 4557 - }, - { - "epoch": 0.5480670955329766, - "grad_norm": 1.7281141256642139, - "learning_rate": 1.7861921544150867e-06, - "loss": 0.9181, - "step": 4558 - }, - { - "epoch": 0.5481873384236157, - "grad_norm": 1.6219798412504927, - "learning_rate": 1.7854176630141856e-06, - "loss": 0.9951, - "step": 4559 - }, - { - "epoch": 0.5483075813142548, - "grad_norm": 2.257683034174703, - "learning_rate": 1.784643204165255e-06, - "loss": 1.0785, - "step": 4560 - }, - { - "epoch": 0.5484278242048939, - "grad_norm": 1.9714367411228968, - "learning_rate": 1.7838687779857783e-06, - "loss": 0.9974, - "step": 4561 - }, - { - "epoch": 0.5485480670955329, - "grad_norm": 2.044669907284459, - "learning_rate": 1.7830943845932366e-06, - "loss": 0.8687, - "step": 4562 - }, - { - "epoch": 0.5486683099861721, - "grad_norm": 1.508255493234994, - "learning_rate": 1.7823200241051044e-06, - "loss": 0.9787, - "step": 4563 - }, - { - "epoch": 0.5487885528768112, - "grad_norm": 3.3906185308298022, - "learning_rate": 1.7815456966388513e-06, - "loss": 1.0336, - "step": 4564 - }, - { - "epoch": 0.5489087957674502, - "grad_norm": 2.1318298982822155, - "learning_rate": 1.780771402311943e-06, - "loss": 1.0526, - "step": 4565 - }, - { - "epoch": 0.5490290386580894, - "grad_norm": 3.424733261525291, - "learning_rate": 1.7799971412418374e-06, - "loss": 1.0145, - "step": 4566 - }, - { - "epoch": 0.5491492815487284, - "grad_norm": 2.6071994872390882, - "learning_rate": 1.7792229135459918e-06, - "loss": 0.9697, - "step": 4567 - }, - { - "epoch": 0.5492695244393675, - "grad_norm": 0.7664935576534583, - "learning_rate": 1.7784487193418538e-06, - "loss": 0.8688, - "step": 4568 - }, - { - "epoch": 0.5493897673300067, - "grad_norm": 2.768295981078429, - "learning_rate": 1.7776745587468698e-06, - "loss": 0.8436, - "step": 4569 - }, - { - "epoch": 0.5495100102206457, - "grad_norm": 3.353536953466683, - "learning_rate": 1.7769004318784776e-06, - "loss": 1.0502, - "step": 4570 - }, - { - "epoch": 0.5496302531112848, - "grad_norm": 1.668897455108529, - "learning_rate": 1.776126338854113e-06, - "loss": 1.0353, - "step": 4571 - }, - { - "epoch": 0.5497504960019239, - "grad_norm": 1.7391839244640037, - "learning_rate": 1.7753522797912044e-06, - "loss": 1.0689, - "step": 4572 - }, - { - "epoch": 0.549870738892563, - "grad_norm": 2.3720478984956372, - "learning_rate": 1.7745782548071765e-06, - "loss": 0.9335, - "step": 4573 - }, - { - "epoch": 0.549990981783202, - "grad_norm": 1.7540567244314516, - "learning_rate": 1.7738042640194482e-06, - "loss": 0.9634, - "step": 4574 - }, - { - "epoch": 0.5501112246738411, - "grad_norm": 1.7069364124202584, - "learning_rate": 1.7730303075454335e-06, - "loss": 0.9374, - "step": 4575 - }, - { - "epoch": 0.5502314675644803, - "grad_norm": 1.7673883325678383, - "learning_rate": 1.7722563855025402e-06, - "loss": 1.0752, - "step": 4576 - }, - { - "epoch": 0.5503517104551193, - "grad_norm": 1.6944193794745204, - "learning_rate": 1.7714824980081721e-06, - "loss": 0.934, - "step": 4577 - }, - { - "epoch": 0.5504719533457584, - "grad_norm": 1.946702024267199, - "learning_rate": 1.7707086451797276e-06, - "loss": 0.9704, - "step": 4578 - }, - { - "epoch": 0.5505921962363975, - "grad_norm": 0.6966183433550078, - "learning_rate": 1.7699348271345993e-06, - "loss": 0.7725, - "step": 4579 - }, - { - "epoch": 0.5507124391270366, - "grad_norm": 0.7215822228883052, - "learning_rate": 1.7691610439901753e-06, - "loss": 0.7966, - "step": 4580 - }, - { - "epoch": 0.5508326820176757, - "grad_norm": 1.9197573896947175, - "learning_rate": 1.7683872958638367e-06, - "loss": 0.9881, - "step": 4581 - }, - { - "epoch": 0.5509529249083148, - "grad_norm": 1.7571472265386363, - "learning_rate": 1.7676135828729614e-06, - "loss": 1.0718, - "step": 4582 - }, - { - "epoch": 0.5510731677989539, - "grad_norm": 1.8013892481690872, - "learning_rate": 1.7668399051349205e-06, - "loss": 1.058, - "step": 4583 - }, - { - "epoch": 0.5511934106895929, - "grad_norm": 1.9589283898094303, - "learning_rate": 1.766066262767081e-06, - "loss": 1.0552, - "step": 4584 - }, - { - "epoch": 0.5513136535802321, - "grad_norm": 1.900600065038279, - "learning_rate": 1.765292655886803e-06, - "loss": 1.0097, - "step": 4585 - }, - { - "epoch": 0.5514338964708712, - "grad_norm": 1.843596565198523, - "learning_rate": 1.764519084611443e-06, - "loss": 0.9387, - "step": 4586 - }, - { - "epoch": 0.5515541393615102, - "grad_norm": 1.9403017766960011, - "learning_rate": 1.7637455490583505e-06, - "loss": 1.0041, - "step": 4587 - }, - { - "epoch": 0.5516743822521494, - "grad_norm": 2.2755453818415834, - "learning_rate": 1.7629720493448701e-06, - "loss": 1.005, - "step": 4588 - }, - { - "epoch": 0.5517946251427884, - "grad_norm": 2.0224048232799667, - "learning_rate": 1.7621985855883418e-06, - "loss": 1.0753, - "step": 4589 - }, - { - "epoch": 0.5519148680334275, - "grad_norm": 1.9452332629876221, - "learning_rate": 1.7614251579060983e-06, - "loss": 0.9589, - "step": 4590 - }, - { - "epoch": 0.5520351109240667, - "grad_norm": 4.4528759267652, - "learning_rate": 1.76065176641547e-06, - "loss": 1.0759, - "step": 4591 - }, - { - "epoch": 0.5521553538147057, - "grad_norm": 1.6433176378554204, - "learning_rate": 1.759878411233777e-06, - "loss": 1.0083, - "step": 4592 - }, - { - "epoch": 0.5522755967053448, - "grad_norm": 2.179507340399818, - "learning_rate": 1.7591050924783388e-06, - "loss": 0.9908, - "step": 4593 - }, - { - "epoch": 0.5523958395959839, - "grad_norm": 0.9210936581107653, - "learning_rate": 1.7583318102664661e-06, - "loss": 0.8631, - "step": 4594 - }, - { - "epoch": 0.552516082486623, - "grad_norm": 1.8387410287229762, - "learning_rate": 1.757558564715466e-06, - "loss": 1.0249, - "step": 4595 - }, - { - "epoch": 0.552636325377262, - "grad_norm": 3.811389198451178, - "learning_rate": 1.7567853559426386e-06, - "loss": 0.9781, - "step": 4596 - }, - { - "epoch": 0.5527565682679012, - "grad_norm": 1.9618364074741712, - "learning_rate": 1.7560121840652797e-06, - "loss": 0.9787, - "step": 4597 - }, - { - "epoch": 0.5528768111585403, - "grad_norm": 2.081400419647594, - "learning_rate": 1.7552390492006782e-06, - "loss": 0.9269, - "step": 4598 - }, - { - "epoch": 0.5529970540491793, - "grad_norm": 1.8901054361997327, - "learning_rate": 1.7544659514661184e-06, - "loss": 0.8798, - "step": 4599 - }, - { - "epoch": 0.5531172969398185, - "grad_norm": 2.0993137737044187, - "learning_rate": 1.7536928909788786e-06, - "loss": 1.026, - "step": 4600 - }, - { - "epoch": 0.5532375398304575, - "grad_norm": 0.8936445680867156, - "learning_rate": 1.752919867856231e-06, - "loss": 0.8721, - "step": 4601 - }, - { - "epoch": 0.5533577827210966, - "grad_norm": 1.7535145073587786, - "learning_rate": 1.7521468822154436e-06, - "loss": 1.0095, - "step": 4602 - }, - { - "epoch": 0.5534780256117358, - "grad_norm": 1.7828497144741715, - "learning_rate": 1.751373934173777e-06, - "loss": 0.9697, - "step": 4603 - }, - { - "epoch": 0.5535982685023748, - "grad_norm": 1.557601957336395, - "learning_rate": 1.750601023848487e-06, - "loss": 0.9617, - "step": 4604 - }, - { - "epoch": 0.5537185113930139, - "grad_norm": 1.8185517140739975, - "learning_rate": 1.749828151356823e-06, - "loss": 0.9684, - "step": 4605 - }, - { - "epoch": 0.553838754283653, - "grad_norm": 1.8123905543464893, - "learning_rate": 1.7490553168160297e-06, - "loss": 0.9829, - "step": 4606 - }, - { - "epoch": 0.5539589971742921, - "grad_norm": 2.326125752258425, - "learning_rate": 1.748282520343345e-06, - "loss": 0.9918, - "step": 4607 - }, - { - "epoch": 0.5540792400649311, - "grad_norm": 2.2057730269922255, - "learning_rate": 1.7475097620560023e-06, - "loss": 1.0223, - "step": 4608 - }, - { - "epoch": 0.5541994829555702, - "grad_norm": 2.428531187589622, - "learning_rate": 1.746737042071228e-06, - "loss": 0.9287, - "step": 4609 - }, - { - "epoch": 0.5543197258462094, - "grad_norm": 2.857039534880403, - "learning_rate": 1.7459643605062424e-06, - "loss": 1.0264, - "step": 4610 - }, - { - "epoch": 0.5544399687368484, - "grad_norm": 2.0718204404801956, - "learning_rate": 1.745191717478262e-06, - "loss": 1.0376, - "step": 4611 - }, - { - "epoch": 0.5545602116274875, - "grad_norm": 1.7572943029589307, - "learning_rate": 1.7444191131044948e-06, - "loss": 1.0304, - "step": 4612 - }, - { - "epoch": 0.5546804545181266, - "grad_norm": 1.914244538039214, - "learning_rate": 1.7436465475021456e-06, - "loss": 0.9548, - "step": 4613 - }, - { - "epoch": 0.5548006974087657, - "grad_norm": 1.9543162870844062, - "learning_rate": 1.7428740207884111e-06, - "loss": 0.9318, - "step": 4614 - }, - { - "epoch": 0.5549209402994048, - "grad_norm": 2.1377922304506, - "learning_rate": 1.7421015330804833e-06, - "loss": 0.8447, - "step": 4615 - }, - { - "epoch": 0.5550411831900439, - "grad_norm": 1.8047452975178975, - "learning_rate": 1.7413290844955475e-06, - "loss": 0.958, - "step": 4616 - }, - { - "epoch": 0.555161426080683, - "grad_norm": 1.7959301662079603, - "learning_rate": 1.7405566751507843e-06, - "loss": 1.0129, - "step": 4617 - }, - { - "epoch": 0.555281668971322, - "grad_norm": 1.696883427439183, - "learning_rate": 1.7397843051633668e-06, - "loss": 0.9005, - "step": 4618 - }, - { - "epoch": 0.5554019118619612, - "grad_norm": 1.6335908777548278, - "learning_rate": 1.739011974650464e-06, - "loss": 0.941, - "step": 4619 - }, - { - "epoch": 0.5555221547526003, - "grad_norm": 8.297595896176796, - "learning_rate": 1.7382396837292365e-06, - "loss": 0.9924, - "step": 4620 - }, - { - "epoch": 0.5556423976432393, - "grad_norm": 1.6843110081224666, - "learning_rate": 1.737467432516841e-06, - "loss": 0.9726, - "step": 4621 - }, - { - "epoch": 0.5557626405338785, - "grad_norm": 2.313745936261879, - "learning_rate": 1.7366952211304274e-06, - "loss": 0.9645, - "step": 4622 - }, - { - "epoch": 0.5558828834245175, - "grad_norm": 1.8071426931173291, - "learning_rate": 1.735923049687139e-06, - "loss": 1.0623, - "step": 4623 - }, - { - "epoch": 0.5560031263151566, - "grad_norm": 1.4797080307435477, - "learning_rate": 1.7351509183041144e-06, - "loss": 0.9766, - "step": 4624 - }, - { - "epoch": 0.5561233692057957, - "grad_norm": 22.19533656171698, - "learning_rate": 1.7343788270984852e-06, - "loss": 0.9564, - "step": 4625 - }, - { - "epoch": 0.5562436120964348, - "grad_norm": 1.6559920919892943, - "learning_rate": 1.7336067761873764e-06, - "loss": 0.9727, - "step": 4626 - }, - { - "epoch": 0.5563638549870739, - "grad_norm": 2.911169339180859, - "learning_rate": 1.7328347656879076e-06, - "loss": 0.9884, - "step": 4627 - }, - { - "epoch": 0.556484097877713, - "grad_norm": 2.9074345297424617, - "learning_rate": 1.7320627957171927e-06, - "loss": 0.912, - "step": 4628 - }, - { - "epoch": 0.5566043407683521, - "grad_norm": 1.6121233832987212, - "learning_rate": 1.7312908663923382e-06, - "loss": 1.0319, - "step": 4629 - }, - { - "epoch": 0.5567245836589911, - "grad_norm": 2.538521483075895, - "learning_rate": 1.7305189778304463e-06, - "loss": 0.8981, - "step": 4630 - }, - { - "epoch": 0.5568448265496303, - "grad_norm": 2.1950577014473005, - "learning_rate": 1.729747130148611e-06, - "loss": 1.0298, - "step": 4631 - }, - { - "epoch": 0.5569650694402694, - "grad_norm": 15.676521989102197, - "learning_rate": 1.7289753234639208e-06, - "loss": 0.9936, - "step": 4632 - }, - { - "epoch": 0.5570853123309084, - "grad_norm": 1.7563371358412676, - "learning_rate": 1.7282035578934592e-06, - "loss": 0.9894, - "step": 4633 - }, - { - "epoch": 0.5572055552215476, - "grad_norm": 1.7216213170984829, - "learning_rate": 1.727431833554301e-06, - "loss": 1.0147, - "step": 4634 - }, - { - "epoch": 0.5573257981121866, - "grad_norm": 1.8487846686960179, - "learning_rate": 1.7266601505635175e-06, - "loss": 1.0001, - "step": 4635 - }, - { - "epoch": 0.5574460410028257, - "grad_norm": 2.133039301735963, - "learning_rate": 1.7258885090381717e-06, - "loss": 0.9885, - "step": 4636 - }, - { - "epoch": 0.5575662838934649, - "grad_norm": 1.8118215146021273, - "learning_rate": 1.7251169090953213e-06, - "loss": 1.017, - "step": 4637 - }, - { - "epoch": 0.5576865267841039, - "grad_norm": 2.399346791226031, - "learning_rate": 1.7243453508520168e-06, - "loss": 0.9864, - "step": 4638 - }, - { - "epoch": 0.557806769674743, - "grad_norm": 2.127024976008903, - "learning_rate": 1.7235738344253038e-06, - "loss": 1.0695, - "step": 4639 - }, - { - "epoch": 0.557927012565382, - "grad_norm": 1.8116102994673635, - "learning_rate": 1.72280235993222e-06, - "loss": 1.0492, - "step": 4640 - }, - { - "epoch": 0.5580472554560212, - "grad_norm": 9.912065694177153, - "learning_rate": 1.722030927489798e-06, - "loss": 0.928, - "step": 4641 - }, - { - "epoch": 0.5581674983466602, - "grad_norm": 1.730463652248064, - "learning_rate": 1.7212595372150634e-06, - "loss": 0.9715, - "step": 4642 - }, - { - "epoch": 0.5582877412372993, - "grad_norm": 2.9962142263607556, - "learning_rate": 1.720488189225035e-06, - "loss": 0.9653, - "step": 4643 - }, - { - "epoch": 0.5584079841279385, - "grad_norm": 2.155815744209077, - "learning_rate": 1.7197168836367265e-06, - "loss": 1.0206, - "step": 4644 - }, - { - "epoch": 0.5585282270185775, - "grad_norm": 1.8855484201611845, - "learning_rate": 1.7189456205671433e-06, - "loss": 1.0485, - "step": 4645 - }, - { - "epoch": 0.5586484699092166, - "grad_norm": 1.8313826529202564, - "learning_rate": 1.7181744001332866e-06, - "loss": 1.0461, - "step": 4646 - }, - { - "epoch": 0.5587687127998557, - "grad_norm": 2.4001838216321083, - "learning_rate": 1.7174032224521493e-06, - "loss": 0.8668, - "step": 4647 - }, - { - "epoch": 0.5588889556904948, - "grad_norm": 1.4844749818558687, - "learning_rate": 1.7166320876407184e-06, - "loss": 0.9287, - "step": 4648 - }, - { - "epoch": 0.5590091985811338, - "grad_norm": 2.53769391075324, - "learning_rate": 1.7158609958159742e-06, - "loss": 0.9114, - "step": 4649 - }, - { - "epoch": 0.559129441471773, - "grad_norm": 2.670989397302833, - "learning_rate": 1.7150899470948911e-06, - "loss": 1.0098, - "step": 4650 - }, - { - "epoch": 0.5592496843624121, - "grad_norm": 0.8128288777192563, - "learning_rate": 1.7143189415944365e-06, - "loss": 0.8316, - "step": 4651 - }, - { - "epoch": 0.5593699272530511, - "grad_norm": 1.5774784841143248, - "learning_rate": 1.7135479794315714e-06, - "loss": 0.9901, - "step": 4652 - }, - { - "epoch": 0.5594901701436903, - "grad_norm": 2.086870991856375, - "learning_rate": 1.7127770607232502e-06, - "loss": 1.0184, - "step": 4653 - }, - { - "epoch": 0.5596104130343293, - "grad_norm": 1.8305082517978175, - "learning_rate": 1.7120061855864204e-06, - "loss": 1.0284, - "step": 4654 - }, - { - "epoch": 0.5597306559249684, - "grad_norm": 1.983261817422789, - "learning_rate": 1.7112353541380233e-06, - "loss": 0.9471, - "step": 4655 - }, - { - "epoch": 0.5598508988156076, - "grad_norm": 1.4204285359168805, - "learning_rate": 1.7104645664949931e-06, - "loss": 0.9552, - "step": 4656 - }, - { - "epoch": 0.5599711417062466, - "grad_norm": 2.025923685464828, - "learning_rate": 1.7096938227742584e-06, - "loss": 0.9569, - "step": 4657 - }, - { - "epoch": 0.5600913845968857, - "grad_norm": 2.416917513840155, - "learning_rate": 1.70892312309274e-06, - "loss": 1.0669, - "step": 4658 - }, - { - "epoch": 0.5602116274875248, - "grad_norm": 1.9227231022986104, - "learning_rate": 1.7081524675673523e-06, - "loss": 0.9066, - "step": 4659 - }, - { - "epoch": 0.5603318703781639, - "grad_norm": 0.862507088949953, - "learning_rate": 1.7073818563150026e-06, - "loss": 0.8752, - "step": 4660 - }, - { - "epoch": 0.560452113268803, - "grad_norm": 2.877187808656101, - "learning_rate": 1.7066112894525935e-06, - "loss": 1.0996, - "step": 4661 - }, - { - "epoch": 0.5605723561594421, - "grad_norm": 2.464145761073729, - "learning_rate": 1.7058407670970177e-06, - "loss": 0.9583, - "step": 4662 - }, - { - "epoch": 0.5606925990500812, - "grad_norm": 1.7495562126938742, - "learning_rate": 1.7050702893651643e-06, - "loss": 0.8488, - "step": 4663 - }, - { - "epoch": 0.5608128419407202, - "grad_norm": 2.388929376579429, - "learning_rate": 1.7042998563739134e-06, - "loss": 0.9867, - "step": 4664 - }, - { - "epoch": 0.5609330848313594, - "grad_norm": 2.2280298584558733, - "learning_rate": 1.703529468240139e-06, - "loss": 0.9415, - "step": 4665 - }, - { - "epoch": 0.5610533277219985, - "grad_norm": 7.272280427903637, - "learning_rate": 1.7027591250807088e-06, - "loss": 0.9694, - "step": 4666 - }, - { - "epoch": 0.5611735706126375, - "grad_norm": 2.5590264573274264, - "learning_rate": 1.7019888270124825e-06, - "loss": 1.071, - "step": 4667 - }, - { - "epoch": 0.5612938135032767, - "grad_norm": 1.8200688014807058, - "learning_rate": 1.7012185741523147e-06, - "loss": 1.0451, - "step": 4668 - }, - { - "epoch": 0.5614140563939157, - "grad_norm": 1.9226539087881926, - "learning_rate": 1.7004483666170514e-06, - "loss": 0.8588, - "step": 4669 - }, - { - "epoch": 0.5615342992845548, - "grad_norm": 1.8282333081105615, - "learning_rate": 1.699678204523533e-06, - "loss": 1.0392, - "step": 4670 - }, - { - "epoch": 0.5616545421751938, - "grad_norm": 3.011932484208222, - "learning_rate": 1.6989080879885918e-06, - "loss": 0.9164, - "step": 4671 - }, - { - "epoch": 0.561774785065833, - "grad_norm": 0.91479941764808, - "learning_rate": 1.6981380171290544e-06, - "loss": 0.8702, - "step": 4672 - }, - { - "epoch": 0.5618950279564721, - "grad_norm": 2.1676059178715237, - "learning_rate": 1.6973679920617396e-06, - "loss": 0.9698, - "step": 4673 - }, - { - "epoch": 0.5620152708471111, - "grad_norm": 2.7732040092075034, - "learning_rate": 1.6965980129034603e-06, - "loss": 1.076, - "step": 4674 - }, - { - "epoch": 0.5621355137377503, - "grad_norm": 1.4311485121281018, - "learning_rate": 1.6958280797710209e-06, - "loss": 0.9944, - "step": 4675 - }, - { - "epoch": 0.5622557566283893, - "grad_norm": 0.725186429224898, - "learning_rate": 1.6950581927812198e-06, - "loss": 0.7892, - "step": 4676 - }, - { - "epoch": 0.5623759995190284, - "grad_norm": 2.074022646612054, - "learning_rate": 1.6942883520508486e-06, - "loss": 1.0154, - "step": 4677 - }, - { - "epoch": 0.5624962424096676, - "grad_norm": 2.004369730453297, - "learning_rate": 1.693518557696691e-06, - "loss": 1.0048, - "step": 4678 - }, - { - "epoch": 0.5626164853003066, - "grad_norm": 1.949722928145498, - "learning_rate": 1.6927488098355252e-06, - "loss": 1.1097, - "step": 4679 - }, - { - "epoch": 0.5627367281909457, - "grad_norm": 0.8818037971393197, - "learning_rate": 1.6919791085841201e-06, - "loss": 0.9064, - "step": 4680 - }, - { - "epoch": 0.5628569710815848, - "grad_norm": 2.5375003645572494, - "learning_rate": 1.6912094540592396e-06, - "loss": 1.024, - "step": 4681 - }, - { - "epoch": 0.5629772139722239, - "grad_norm": 2.3790519682253874, - "learning_rate": 1.6904398463776393e-06, - "loss": 1.0253, - "step": 4682 - }, - { - "epoch": 0.5630974568628629, - "grad_norm": 1.737338241073008, - "learning_rate": 1.6896702856560683e-06, - "loss": 0.9597, - "step": 4683 - }, - { - "epoch": 0.5632176997535021, - "grad_norm": 2.8986492233886225, - "learning_rate": 1.6889007720112677e-06, - "loss": 0.9179, - "step": 4684 - }, - { - "epoch": 0.5633379426441412, - "grad_norm": 1.6423668470365416, - "learning_rate": 1.6881313055599734e-06, - "loss": 1.0095, - "step": 4685 - }, - { - "epoch": 0.5634581855347802, - "grad_norm": 2.373907540684504, - "learning_rate": 1.6873618864189117e-06, - "loss": 1.0461, - "step": 4686 - }, - { - "epoch": 0.5635784284254194, - "grad_norm": 2.097107657474593, - "learning_rate": 1.686592514704803e-06, - "loss": 1.0116, - "step": 4687 - }, - { - "epoch": 0.5636986713160584, - "grad_norm": 2.1315242244123316, - "learning_rate": 1.685823190534361e-06, - "loss": 0.9252, - "step": 4688 - }, - { - "epoch": 0.5638189142066975, - "grad_norm": 2.43525993902326, - "learning_rate": 1.6850539140242907e-06, - "loss": 1.0559, - "step": 4689 - }, - { - "epoch": 0.5639391570973367, - "grad_norm": 1.8338873921874597, - "learning_rate": 1.684284685291292e-06, - "loss": 1.0454, - "step": 4690 - }, - { - "epoch": 0.5640593999879757, - "grad_norm": 2.8370103724508224, - "learning_rate": 1.683515504452055e-06, - "loss": 1.0386, - "step": 4691 - }, - { - "epoch": 0.5641796428786148, - "grad_norm": 1.5249419779761615, - "learning_rate": 1.6827463716232648e-06, - "loss": 0.8879, - "step": 4692 - }, - { - "epoch": 0.5642998857692539, - "grad_norm": 1.682483067760318, - "learning_rate": 1.6819772869215972e-06, - "loss": 0.9847, - "step": 4693 - }, - { - "epoch": 0.564420128659893, - "grad_norm": 2.815326033796922, - "learning_rate": 1.6812082504637228e-06, - "loss": 1.0473, - "step": 4694 - }, - { - "epoch": 0.564540371550532, - "grad_norm": 1.8722961639772489, - "learning_rate": 1.6804392623663025e-06, - "loss": 0.9757, - "step": 4695 - }, - { - "epoch": 0.5646606144411712, - "grad_norm": 1.8597484181599044, - "learning_rate": 1.6796703227459935e-06, - "loss": 1.0095, - "step": 4696 - }, - { - "epoch": 0.5647808573318103, - "grad_norm": 2.076657917479836, - "learning_rate": 1.6789014317194407e-06, - "loss": 0.9863, - "step": 4697 - }, - { - "epoch": 0.5649011002224493, - "grad_norm": 2.803819782478829, - "learning_rate": 1.6781325894032853e-06, - "loss": 0.9514, - "step": 4698 - }, - { - "epoch": 0.5650213431130885, - "grad_norm": 1.7541051127431087, - "learning_rate": 1.6773637959141608e-06, - "loss": 1.1402, - "step": 4699 - }, - { - "epoch": 0.5651415860037275, - "grad_norm": 3.029551551290836, - "learning_rate": 1.6765950513686915e-06, - "loss": 0.8925, - "step": 4700 - }, - { - "epoch": 0.5652618288943666, - "grad_norm": 1.9485994933943553, - "learning_rate": 1.675826355883496e-06, - "loss": 0.9931, - "step": 4701 - }, - { - "epoch": 0.5653820717850057, - "grad_norm": 1.8532778363300528, - "learning_rate": 1.6750577095751848e-06, - "loss": 1.0243, - "step": 4702 - }, - { - "epoch": 0.5655023146756448, - "grad_norm": 1.6274984666198913, - "learning_rate": 1.6742891125603605e-06, - "loss": 0.9595, - "step": 4703 - }, - { - "epoch": 0.5656225575662839, - "grad_norm": 1.9942847488434556, - "learning_rate": 1.6735205649556185e-06, - "loss": 0.9475, - "step": 4704 - }, - { - "epoch": 0.5657428004569229, - "grad_norm": 1.550777790824818, - "learning_rate": 1.6727520668775476e-06, - "loss": 1.0705, - "step": 4705 - }, - { - "epoch": 0.5658630433475621, - "grad_norm": 1.6825580287636026, - "learning_rate": 1.6719836184427275e-06, - "loss": 0.9844, - "step": 4706 - }, - { - "epoch": 0.5659832862382012, - "grad_norm": 3.1269644094307627, - "learning_rate": 1.671215219767733e-06, - "loss": 0.8756, - "step": 4707 - }, - { - "epoch": 0.5661035291288402, - "grad_norm": 2.496893096590886, - "learning_rate": 1.670446870969127e-06, - "loss": 0.9904, - "step": 4708 - }, - { - "epoch": 0.5662237720194794, - "grad_norm": 2.0210442842579885, - "learning_rate": 1.6696785721634685e-06, - "loss": 1.0329, - "step": 4709 - }, - { - "epoch": 0.5663440149101184, - "grad_norm": 1.6409506338389748, - "learning_rate": 1.6689103234673086e-06, - "loss": 0.9617, - "step": 4710 - }, - { - "epoch": 0.5664642578007575, - "grad_norm": 2.2349317163878863, - "learning_rate": 1.668142124997189e-06, - "loss": 0.9974, - "step": 4711 - }, - { - "epoch": 0.5665845006913967, - "grad_norm": 0.755038937391971, - "learning_rate": 1.6673739768696453e-06, - "loss": 0.8591, - "step": 4712 - }, - { - "epoch": 0.5667047435820357, - "grad_norm": 1.6294720808167455, - "learning_rate": 1.6666058792012052e-06, - "loss": 0.9995, - "step": 4713 - }, - { - "epoch": 0.5668249864726748, - "grad_norm": 0.8698013023406186, - "learning_rate": 1.6658378321083878e-06, - "loss": 0.9315, - "step": 4714 - }, - { - "epoch": 0.5669452293633139, - "grad_norm": 2.1815107916548215, - "learning_rate": 1.6650698357077055e-06, - "loss": 1.0501, - "step": 4715 - }, - { - "epoch": 0.567065472253953, - "grad_norm": 2.302298442289923, - "learning_rate": 1.6643018901156632e-06, - "loss": 1.0287, - "step": 4716 - }, - { - "epoch": 0.567185715144592, - "grad_norm": 2.6207268766565566, - "learning_rate": 1.6635339954487566e-06, - "loss": 1.0179, - "step": 4717 - }, - { - "epoch": 0.5673059580352312, - "grad_norm": 1.743401143169652, - "learning_rate": 1.6627661518234765e-06, - "loss": 1.0552, - "step": 4718 - }, - { - "epoch": 0.5674262009258703, - "grad_norm": 1.7423546544102309, - "learning_rate": 1.661998359356302e-06, - "loss": 1.0736, - "step": 4719 - }, - { - "epoch": 0.5675464438165093, - "grad_norm": 0.7898946094834639, - "learning_rate": 1.6612306181637077e-06, - "loss": 0.8194, - "step": 4720 - }, - { - "epoch": 0.5676666867071485, - "grad_norm": 2.9136335860095635, - "learning_rate": 1.6604629283621598e-06, - "loss": 0.8903, - "step": 4721 - }, - { - "epoch": 0.5677869295977875, - "grad_norm": 1.9075310576824036, - "learning_rate": 1.6596952900681152e-06, - "loss": 0.9799, - "step": 4722 - }, - { - "epoch": 0.5679071724884266, - "grad_norm": 2.0172583297952738, - "learning_rate": 1.658927703398025e-06, - "loss": 1.0504, - "step": 4723 - }, - { - "epoch": 0.5680274153790658, - "grad_norm": 2.8510517655847543, - "learning_rate": 1.6581601684683309e-06, - "loss": 1.0037, - "step": 4724 - }, - { - "epoch": 0.5681476582697048, - "grad_norm": 3.085958205527847, - "learning_rate": 1.6573926853954674e-06, - "loss": 0.919, - "step": 4725 - }, - { - "epoch": 0.5682679011603439, - "grad_norm": 1.7681356488659254, - "learning_rate": 1.6566252542958608e-06, - "loss": 1.0581, - "step": 4726 - }, - { - "epoch": 0.568388144050983, - "grad_norm": 2.758327446861171, - "learning_rate": 1.6558578752859305e-06, - "loss": 1.0045, - "step": 4727 - }, - { - "epoch": 0.5685083869416221, - "grad_norm": 1.9109242773339354, - "learning_rate": 1.6550905484820865e-06, - "loss": 1.0135, - "step": 4728 - }, - { - "epoch": 0.5686286298322611, - "grad_norm": 2.0414442862504836, - "learning_rate": 1.6543232740007328e-06, - "loss": 1.0259, - "step": 4729 - }, - { - "epoch": 0.5687488727229003, - "grad_norm": 3.1375527609230227, - "learning_rate": 1.653556051958263e-06, - "loss": 0.9017, - "step": 4730 - }, - { - "epoch": 0.5688691156135394, - "grad_norm": 2.0395410202775466, - "learning_rate": 1.6527888824710642e-06, - "loss": 0.964, - "step": 4731 - }, - { - "epoch": 0.5689893585041784, - "grad_norm": 2.2591365113233937, - "learning_rate": 1.6520217656555166e-06, - "loss": 0.9969, - "step": 4732 - }, - { - "epoch": 0.5691096013948175, - "grad_norm": 1.5352028410153244, - "learning_rate": 1.65125470162799e-06, - "loss": 0.9421, - "step": 4733 - }, - { - "epoch": 0.5692298442854566, - "grad_norm": 2.978059167124298, - "learning_rate": 1.6504876905048485e-06, - "loss": 0.9323, - "step": 4734 - }, - { - "epoch": 0.5693500871760957, - "grad_norm": 1.5394704829846388, - "learning_rate": 1.6497207324024464e-06, - "loss": 0.9583, - "step": 4735 - }, - { - "epoch": 0.5694703300667348, - "grad_norm": 2.2685835890257007, - "learning_rate": 1.6489538274371305e-06, - "loss": 1.0532, - "step": 4736 - }, - { - "epoch": 0.5695905729573739, - "grad_norm": 1.7804313250663695, - "learning_rate": 1.6481869757252396e-06, - "loss": 1.0553, - "step": 4737 - }, - { - "epoch": 0.569710815848013, - "grad_norm": 1.46185429004288, - "learning_rate": 1.647420177383105e-06, - "loss": 0.9473, - "step": 4738 - }, - { - "epoch": 0.569831058738652, - "grad_norm": 1.8230574707874196, - "learning_rate": 1.646653432527049e-06, - "loss": 0.9555, - "step": 4739 - }, - { - "epoch": 0.5699513016292912, - "grad_norm": 1.4882673928724561, - "learning_rate": 1.645886741273387e-06, - "loss": 0.9755, - "step": 4740 - }, - { - "epoch": 0.5700715445199303, - "grad_norm": 2.2331659206823113, - "learning_rate": 1.645120103738424e-06, - "loss": 0.9674, - "step": 4741 - }, - { - "epoch": 0.5701917874105693, - "grad_norm": 2.951347340808708, - "learning_rate": 1.6443535200384591e-06, - "loss": 1.0589, - "step": 4742 - }, - { - "epoch": 0.5703120303012085, - "grad_norm": 1.6403292434776795, - "learning_rate": 1.6435869902897827e-06, - "loss": 0.9339, - "step": 4743 - }, - { - "epoch": 0.5704322731918475, - "grad_norm": 0.8113608444147374, - "learning_rate": 1.6428205146086764e-06, - "loss": 0.8752, - "step": 4744 - }, - { - "epoch": 0.5705525160824866, - "grad_norm": 1.4789243508594974, - "learning_rate": 1.6420540931114142e-06, - "loss": 0.9343, - "step": 4745 - }, - { - "epoch": 0.5706727589731257, - "grad_norm": 4.116522661127967, - "learning_rate": 1.6412877259142616e-06, - "loss": 1.0214, - "step": 4746 - }, - { - "epoch": 0.5707930018637648, - "grad_norm": 1.9630330959256552, - "learning_rate": 1.6405214131334757e-06, - "loss": 0.972, - "step": 4747 - }, - { - "epoch": 0.5709132447544039, - "grad_norm": 1.816474371832471, - "learning_rate": 1.6397551548853052e-06, - "loss": 1.0265, - "step": 4748 - }, - { - "epoch": 0.571033487645043, - "grad_norm": 1.8767345670383377, - "learning_rate": 1.6389889512859917e-06, - "loss": 0.9355, - "step": 4749 - }, - { - "epoch": 0.5711537305356821, - "grad_norm": 0.8758785401342577, - "learning_rate": 1.638222802451767e-06, - "loss": 0.8872, - "step": 4750 - }, - { - "epoch": 0.5712739734263211, - "grad_norm": 1.6666215731560452, - "learning_rate": 1.6374567084988561e-06, - "loss": 0.9823, - "step": 4751 - }, - { - "epoch": 0.5713942163169603, - "grad_norm": 1.678133081171621, - "learning_rate": 1.6366906695434738e-06, - "loss": 0.992, - "step": 4752 - }, - { - "epoch": 0.5715144592075994, - "grad_norm": 1.7866824781491588, - "learning_rate": 1.6359246857018275e-06, - "loss": 1.0842, - "step": 4753 - }, - { - "epoch": 0.5716347020982384, - "grad_norm": 1.7694345746778974, - "learning_rate": 1.6351587570901178e-06, - "loss": 1.0078, - "step": 4754 - }, - { - "epoch": 0.5717549449888776, - "grad_norm": 2.1299560198474463, - "learning_rate": 1.634392883824534e-06, - "loss": 0.9804, - "step": 4755 - }, - { - "epoch": 0.5718751878795166, - "grad_norm": 1.6037823435494472, - "learning_rate": 1.6336270660212595e-06, - "loss": 0.9097, - "step": 4756 - }, - { - "epoch": 0.5719954307701557, - "grad_norm": 2.015989758510915, - "learning_rate": 1.6328613037964676e-06, - "loss": 0.8909, - "step": 4757 - }, - { - "epoch": 0.5721156736607949, - "grad_norm": 2.3597886689173424, - "learning_rate": 1.6320955972663241e-06, - "loss": 0.9152, - "step": 4758 - }, - { - "epoch": 0.5722359165514339, - "grad_norm": 1.6710140828121902, - "learning_rate": 1.6313299465469857e-06, - "loss": 0.8849, - "step": 4759 - }, - { - "epoch": 0.572356159442073, - "grad_norm": 2.7643201950474316, - "learning_rate": 1.6305643517546014e-06, - "loss": 1.0243, - "step": 4760 - }, - { - "epoch": 0.5724764023327121, - "grad_norm": 1.8388539626283604, - "learning_rate": 1.629798813005311e-06, - "loss": 1.0769, - "step": 4761 - }, - { - "epoch": 0.5725966452233512, - "grad_norm": 1.778468520196096, - "learning_rate": 1.6290333304152473e-06, - "loss": 0.9351, - "step": 4762 - }, - { - "epoch": 0.5727168881139902, - "grad_norm": 1.8383335143383464, - "learning_rate": 1.6282679041005314e-06, - "loss": 0.8025, - "step": 4763 - }, - { - "epoch": 0.5728371310046293, - "grad_norm": 2.1934422029802705, - "learning_rate": 1.6275025341772789e-06, - "loss": 1.0935, - "step": 4764 - }, - { - "epoch": 0.5729573738952685, - "grad_norm": 2.4913169751540885, - "learning_rate": 1.626737220761596e-06, - "loss": 1.0567, - "step": 4765 - }, - { - "epoch": 0.5730776167859075, - "grad_norm": 2.145422244894165, - "learning_rate": 1.62597196396958e-06, - "loss": 1.0239, - "step": 4766 - }, - { - "epoch": 0.5731978596765466, - "grad_norm": 2.007350883408856, - "learning_rate": 1.6252067639173197e-06, - "loss": 1.0842, - "step": 4767 - }, - { - "epoch": 0.5733181025671857, - "grad_norm": 2.469522089882371, - "learning_rate": 1.6244416207208956e-06, - "loss": 0.9282, - "step": 4768 - }, - { - "epoch": 0.5734383454578248, - "grad_norm": 1.680509712007306, - "learning_rate": 1.6236765344963787e-06, - "loss": 0.9693, - "step": 4769 - }, - { - "epoch": 0.5735585883484638, - "grad_norm": 4.629247416224909, - "learning_rate": 1.6229115053598322e-06, - "loss": 0.9298, - "step": 4770 - }, - { - "epoch": 0.573678831239103, - "grad_norm": 1.89430125305347, - "learning_rate": 1.6221465334273108e-06, - "loss": 0.945, - "step": 4771 - }, - { - "epoch": 0.5737990741297421, - "grad_norm": 1.9622881368001954, - "learning_rate": 1.6213816188148593e-06, - "loss": 0.8404, - "step": 4772 - }, - { - "epoch": 0.5739193170203811, - "grad_norm": 1.6917055495104658, - "learning_rate": 1.6206167616385162e-06, - "loss": 0.9982, - "step": 4773 - }, - { - "epoch": 0.5740395599110203, - "grad_norm": 1.8634293535340112, - "learning_rate": 1.6198519620143078e-06, - "loss": 0.9755, - "step": 4774 - }, - { - "epoch": 0.5741598028016593, - "grad_norm": 1.7846940191327196, - "learning_rate": 1.6190872200582546e-06, - "loss": 1.0094, - "step": 4775 - }, - { - "epoch": 0.5742800456922984, - "grad_norm": 2.117695202077624, - "learning_rate": 1.6183225358863676e-06, - "loss": 1.0072, - "step": 4776 - }, - { - "epoch": 0.5744002885829376, - "grad_norm": 2.1943727701556623, - "learning_rate": 1.617557909614648e-06, - "loss": 0.9429, - "step": 4777 - }, - { - "epoch": 0.5745205314735766, - "grad_norm": 1.899767374554868, - "learning_rate": 1.6167933413590899e-06, - "loss": 1.0813, - "step": 4778 - }, - { - "epoch": 0.5746407743642157, - "grad_norm": 3.5212667676196694, - "learning_rate": 1.6160288312356773e-06, - "loss": 1.1376, - "step": 4779 - }, - { - "epoch": 0.5747610172548548, - "grad_norm": 1.5803299409682008, - "learning_rate": 1.6152643793603857e-06, - "loss": 1.0535, - "step": 4780 - }, - { - "epoch": 0.5748812601454939, - "grad_norm": 1.736419160428122, - "learning_rate": 1.6144999858491815e-06, - "loss": 1.1023, - "step": 4781 - }, - { - "epoch": 0.575001503036133, - "grad_norm": 1.658530338924144, - "learning_rate": 1.6137356508180232e-06, - "loss": 1.0863, - "step": 4782 - }, - { - "epoch": 0.5751217459267721, - "grad_norm": 1.7669363637711868, - "learning_rate": 1.6129713743828593e-06, - "loss": 1.0437, - "step": 4783 - }, - { - "epoch": 0.5752419888174112, - "grad_norm": 1.5055042050853067, - "learning_rate": 1.6122071566596306e-06, - "loss": 0.9888, - "step": 4784 - }, - { - "epoch": 0.5753622317080502, - "grad_norm": 1.96829654376054, - "learning_rate": 1.6114429977642674e-06, - "loss": 1.0558, - "step": 4785 - }, - { - "epoch": 0.5754824745986894, - "grad_norm": 1.8358154905643753, - "learning_rate": 1.6106788978126926e-06, - "loss": 0.9652, - "step": 4786 - }, - { - "epoch": 0.5756027174893285, - "grad_norm": 2.186336316803373, - "learning_rate": 1.6099148569208196e-06, - "loss": 1.0131, - "step": 4787 - }, - { - "epoch": 0.5757229603799675, - "grad_norm": 1.6596082074504628, - "learning_rate": 1.6091508752045523e-06, - "loss": 0.8596, - "step": 4788 - }, - { - "epoch": 0.5758432032706067, - "grad_norm": 2.352389370853752, - "learning_rate": 1.608386952779787e-06, - "loss": 1.0915, - "step": 4789 - }, - { - "epoch": 0.5759634461612457, - "grad_norm": 1.574408962221146, - "learning_rate": 1.6076230897624098e-06, - "loss": 0.9766, - "step": 4790 - }, - { - "epoch": 0.5760836890518848, - "grad_norm": 1.9013157755793215, - "learning_rate": 1.6068592862682974e-06, - "loss": 1.0028, - "step": 4791 - }, - { - "epoch": 0.576203931942524, - "grad_norm": 1.967098088008806, - "learning_rate": 1.6060955424133187e-06, - "loss": 0.9709, - "step": 4792 - }, - { - "epoch": 0.576324174833163, - "grad_norm": 1.6710714387764105, - "learning_rate": 1.6053318583133332e-06, - "loss": 1.1266, - "step": 4793 - }, - { - "epoch": 0.5764444177238021, - "grad_norm": 2.15978570281266, - "learning_rate": 1.6045682340841907e-06, - "loss": 0.9888, - "step": 4794 - }, - { - "epoch": 0.5765646606144411, - "grad_norm": 0.7819247574376612, - "learning_rate": 1.6038046698417336e-06, - "loss": 0.843, - "step": 4795 - }, - { - "epoch": 0.5766849035050803, - "grad_norm": 1.8427774482849173, - "learning_rate": 1.6030411657017919e-06, - "loss": 0.924, - "step": 4796 - }, - { - "epoch": 0.5768051463957193, - "grad_norm": 1.6878333292344894, - "learning_rate": 1.6022777217801903e-06, - "loss": 1.0734, - "step": 4797 - }, - { - "epoch": 0.5769253892863584, - "grad_norm": 1.8452089647258487, - "learning_rate": 1.601514338192742e-06, - "loss": 0.9635, - "step": 4798 - }, - { - "epoch": 0.5770456321769976, - "grad_norm": 2.0815265322883496, - "learning_rate": 1.6007510150552514e-06, - "loss": 0.9468, - "step": 4799 - }, - { - "epoch": 0.5771658750676366, - "grad_norm": 1.5226198007087575, - "learning_rate": 1.599987752483515e-06, - "loss": 0.8527, - "step": 4800 - }, - { - "epoch": 0.5772861179582757, - "grad_norm": 1.9535778283656717, - "learning_rate": 1.5992245505933184e-06, - "loss": 0.9073, - "step": 4801 - }, - { - "epoch": 0.5774063608489148, - "grad_norm": 1.7846219454778791, - "learning_rate": 1.5984614095004388e-06, - "loss": 0.9381, - "step": 4802 - }, - { - "epoch": 0.5775266037395539, - "grad_norm": 2.051146730825699, - "learning_rate": 1.5976983293206438e-06, - "loss": 1.0378, - "step": 4803 - }, - { - "epoch": 0.577646846630193, - "grad_norm": 3.0554071929825, - "learning_rate": 1.5969353101696928e-06, - "loss": 0.9434, - "step": 4804 - }, - { - "epoch": 0.5777670895208321, - "grad_norm": 1.4673373957266778, - "learning_rate": 1.5961723521633341e-06, - "loss": 1.0284, - "step": 4805 - }, - { - "epoch": 0.5778873324114712, - "grad_norm": 2.255688258695394, - "learning_rate": 1.5954094554173097e-06, - "loss": 1.1368, - "step": 4806 - }, - { - "epoch": 0.5780075753021102, - "grad_norm": 1.9631569752826263, - "learning_rate": 1.5946466200473482e-06, - "loss": 1.0207, - "step": 4807 - }, - { - "epoch": 0.5781278181927494, - "grad_norm": 1.8429979633313835, - "learning_rate": 1.5938838461691723e-06, - "loss": 1.0611, - "step": 4808 - }, - { - "epoch": 0.5782480610833884, - "grad_norm": 2.356539607779735, - "learning_rate": 1.593121133898494e-06, - "loss": 1.0603, - "step": 4809 - }, - { - "epoch": 0.5783683039740275, - "grad_norm": 1.952777068618976, - "learning_rate": 1.592358483351016e-06, - "loss": 1.0182, - "step": 4810 - }, - { - "epoch": 0.5784885468646667, - "grad_norm": 2.257915243695206, - "learning_rate": 1.5915958946424326e-06, - "loss": 0.9534, - "step": 4811 - }, - { - "epoch": 0.5786087897553057, - "grad_norm": 1.5499632861803792, - "learning_rate": 1.5908333678884271e-06, - "loss": 0.9732, - "step": 4812 - }, - { - "epoch": 0.5787290326459448, - "grad_norm": 1.8583616171881179, - "learning_rate": 1.5900709032046743e-06, - "loss": 0.9689, - "step": 4813 - }, - { - "epoch": 0.5788492755365839, - "grad_norm": 2.39243293165953, - "learning_rate": 1.5893085007068391e-06, - "loss": 1.015, - "step": 4814 - }, - { - "epoch": 0.578969518427223, - "grad_norm": 2.107633309633627, - "learning_rate": 1.5885461605105786e-06, - "loss": 0.9401, - "step": 4815 - }, - { - "epoch": 0.579089761317862, - "grad_norm": 1.7212547084990506, - "learning_rate": 1.5877838827315375e-06, - "loss": 0.9986, - "step": 4816 - }, - { - "epoch": 0.5792100042085012, - "grad_norm": 1.778885057781308, - "learning_rate": 1.587021667485355e-06, - "loss": 0.9208, - "step": 4817 - }, - { - "epoch": 0.5793302470991403, - "grad_norm": 1.6076329997618293, - "learning_rate": 1.5862595148876559e-06, - "loss": 1.0099, - "step": 4818 - }, - { - "epoch": 0.5794504899897793, - "grad_norm": 2.5185806255328753, - "learning_rate": 1.58549742505406e-06, - "loss": 0.9923, - "step": 4819 - }, - { - "epoch": 0.5795707328804185, - "grad_norm": 2.7252446790964666, - "learning_rate": 1.5847353981001747e-06, - "loss": 0.9861, - "step": 4820 - }, - { - "epoch": 0.5796909757710575, - "grad_norm": 1.7251118222909374, - "learning_rate": 1.5839734341415993e-06, - "loss": 0.9213, - "step": 4821 - }, - { - "epoch": 0.5798112186616966, - "grad_norm": 1.830031940931888, - "learning_rate": 1.5832115332939238e-06, - "loss": 0.9959, - "step": 4822 - }, - { - "epoch": 0.5799314615523358, - "grad_norm": 1.6067590285433793, - "learning_rate": 1.5824496956727272e-06, - "loss": 0.9837, - "step": 4823 - }, - { - "epoch": 0.5800517044429748, - "grad_norm": 1.7457309149636198, - "learning_rate": 1.5816879213935797e-06, - "loss": 0.9613, - "step": 4824 - }, - { - "epoch": 0.5801719473336139, - "grad_norm": 1.5675597094003788, - "learning_rate": 1.5809262105720416e-06, - "loss": 1.0154, - "step": 4825 - }, - { - "epoch": 0.580292190224253, - "grad_norm": 1.958471563885121, - "learning_rate": 1.5801645633236644e-06, - "loss": 1.0213, - "step": 4826 - }, - { - "epoch": 0.5804124331148921, - "grad_norm": 1.8185849485193368, - "learning_rate": 1.579402979763989e-06, - "loss": 1.0039, - "step": 4827 - }, - { - "epoch": 0.5805326760055312, - "grad_norm": 2.4449090106782307, - "learning_rate": 1.578641460008548e-06, - "loss": 1.034, - "step": 4828 - }, - { - "epoch": 0.5806529188961702, - "grad_norm": 2.0088339259544155, - "learning_rate": 1.5778800041728613e-06, - "loss": 0.9084, - "step": 4829 - }, - { - "epoch": 0.5807731617868094, - "grad_norm": 1.9228958891975272, - "learning_rate": 1.577118612372443e-06, - "loss": 0.8943, - "step": 4830 - }, - { - "epoch": 0.5808934046774484, - "grad_norm": 1.584896328420705, - "learning_rate": 1.5763572847227943e-06, - "loss": 0.9333, - "step": 4831 - }, - { - "epoch": 0.5810136475680875, - "grad_norm": 2.0538510552756937, - "learning_rate": 1.5755960213394091e-06, - "loss": 1.0348, - "step": 4832 - }, - { - "epoch": 0.5811338904587267, - "grad_norm": 2.8862145671429023, - "learning_rate": 1.5748348223377703e-06, - "loss": 1.0124, - "step": 4833 - }, - { - "epoch": 0.5812541333493657, - "grad_norm": 1.7692791420246734, - "learning_rate": 1.5740736878333507e-06, - "loss": 1.0132, - "step": 4834 - }, - { - "epoch": 0.5813743762400048, - "grad_norm": 2.227229610691052, - "learning_rate": 1.5733126179416143e-06, - "loss": 1.009, - "step": 4835 - }, - { - "epoch": 0.5814946191306439, - "grad_norm": 1.8803225946622986, - "learning_rate": 1.5725516127780137e-06, - "loss": 0.9503, - "step": 4836 - }, - { - "epoch": 0.581614862021283, - "grad_norm": 2.1016725419174382, - "learning_rate": 1.5717906724579943e-06, - "loss": 1.1116, - "step": 4837 - }, - { - "epoch": 0.581735104911922, - "grad_norm": 2.663816647637871, - "learning_rate": 1.571029797096989e-06, - "loss": 0.9049, - "step": 4838 - }, - { - "epoch": 0.5818553478025612, - "grad_norm": 2.1549017187661956, - "learning_rate": 1.570268986810423e-06, - "loss": 1.0131, - "step": 4839 - }, - { - "epoch": 0.5819755906932003, - "grad_norm": 1.7377717718089438, - "learning_rate": 1.5695082417137096e-06, - "loss": 0.9845, - "step": 4840 - }, - { - "epoch": 0.5820958335838393, - "grad_norm": 2.1826667797078496, - "learning_rate": 1.5687475619222539e-06, - "loss": 0.9818, - "step": 4841 - }, - { - "epoch": 0.5822160764744785, - "grad_norm": 1.9652535407463063, - "learning_rate": 1.5679869475514496e-06, - "loss": 0.9692, - "step": 4842 - }, - { - "epoch": 0.5823363193651175, - "grad_norm": 2.0559125953901916, - "learning_rate": 1.567226398716682e-06, - "loss": 1.0483, - "step": 4843 - }, - { - "epoch": 0.5824565622557566, - "grad_norm": 1.7343095646791824, - "learning_rate": 1.566465915533326e-06, - "loss": 0.8517, - "step": 4844 - }, - { - "epoch": 0.5825768051463958, - "grad_norm": 2.126542597333504, - "learning_rate": 1.5657054981167458e-06, - "loss": 1.1181, - "step": 4845 - }, - { - "epoch": 0.5826970480370348, - "grad_norm": 1.7730803949381604, - "learning_rate": 1.5649451465822965e-06, - "loss": 0.9031, - "step": 4846 - }, - { - "epoch": 0.5828172909276739, - "grad_norm": 1.7410694178195842, - "learning_rate": 1.5641848610453218e-06, - "loss": 1.064, - "step": 4847 - }, - { - "epoch": 0.582937533818313, - "grad_norm": 2.0821769047902237, - "learning_rate": 1.563424641621158e-06, - "loss": 1.0895, - "step": 4848 - }, - { - "epoch": 0.5830577767089521, - "grad_norm": 2.2278644168630004, - "learning_rate": 1.5626644884251282e-06, - "loss": 0.9307, - "step": 4849 - }, - { - "epoch": 0.5831780195995911, - "grad_norm": 1.5057095107972027, - "learning_rate": 1.5619044015725488e-06, - "loss": 1.1104, - "step": 4850 - }, - { - "epoch": 0.5832982624902303, - "grad_norm": 2.7025495148109466, - "learning_rate": 1.5611443811787224e-06, - "loss": 1.1039, - "step": 4851 - }, - { - "epoch": 0.5834185053808694, - "grad_norm": 2.1884377785496887, - "learning_rate": 1.560384427358945e-06, - "loss": 0.9268, - "step": 4852 - }, - { - "epoch": 0.5835387482715084, - "grad_norm": 1.5710531736926354, - "learning_rate": 1.5596245402284998e-06, - "loss": 0.9565, - "step": 4853 - }, - { - "epoch": 0.5836589911621476, - "grad_norm": 2.032434792874393, - "learning_rate": 1.5588647199026619e-06, - "loss": 1.046, - "step": 4854 - }, - { - "epoch": 0.5837792340527866, - "grad_norm": 1.9101738490130602, - "learning_rate": 1.5581049664966956e-06, - "loss": 1.1048, - "step": 4855 - }, - { - "epoch": 0.5838994769434257, - "grad_norm": 1.0472517901214284, - "learning_rate": 1.5573452801258545e-06, - "loss": 0.9214, - "step": 4856 - }, - { - "epoch": 0.5840197198340649, - "grad_norm": 2.099472197553657, - "learning_rate": 1.5565856609053824e-06, - "loss": 0.8683, - "step": 4857 - }, - { - "epoch": 0.5841399627247039, - "grad_norm": 1.8378938680978958, - "learning_rate": 1.5558261089505127e-06, - "loss": 1.0239, - "step": 4858 - }, - { - "epoch": 0.584260205615343, - "grad_norm": 4.642179594800362, - "learning_rate": 1.5550666243764697e-06, - "loss": 1.0279, - "step": 4859 - }, - { - "epoch": 0.584380448505982, - "grad_norm": 1.9802341058837525, - "learning_rate": 1.554307207298465e-06, - "loss": 0.994, - "step": 4860 - }, - { - "epoch": 0.5845006913966212, - "grad_norm": 2.1850087830390996, - "learning_rate": 1.553547857831704e-06, - "loss": 1.0172, - "step": 4861 - }, - { - "epoch": 0.5846209342872603, - "grad_norm": 0.9963227850197258, - "learning_rate": 1.5527885760913771e-06, - "loss": 0.9512, - "step": 4862 - }, - { - "epoch": 0.5847411771778993, - "grad_norm": 1.693255886624674, - "learning_rate": 1.552029362192668e-06, - "loss": 0.9923, - "step": 4863 - }, - { - "epoch": 0.5848614200685385, - "grad_norm": 2.361687139917264, - "learning_rate": 1.5512702162507478e-06, - "loss": 0.9498, - "step": 4864 - }, - { - "epoch": 0.5849816629591775, - "grad_norm": 1.0623547510833435, - "learning_rate": 1.5505111383807792e-06, - "loss": 0.802, - "step": 4865 - }, - { - "epoch": 0.5851019058498166, - "grad_norm": 1.6245399704251764, - "learning_rate": 1.5497521286979138e-06, - "loss": 1.0364, - "step": 4866 - }, - { - "epoch": 0.5852221487404557, - "grad_norm": 2.337445132993684, - "learning_rate": 1.5489931873172927e-06, - "loss": 0.9746, - "step": 4867 - }, - { - "epoch": 0.5853423916310948, - "grad_norm": 1.917468283439446, - "learning_rate": 1.5482343143540467e-06, - "loss": 1.021, - "step": 4868 - }, - { - "epoch": 0.5854626345217339, - "grad_norm": 1.9642682607296447, - "learning_rate": 1.547475509923295e-06, - "loss": 1.0661, - "step": 4869 - }, - { - "epoch": 0.585582877412373, - "grad_norm": 0.7496281735288011, - "learning_rate": 1.5467167741401495e-06, - "loss": 0.824, - "step": 4870 - }, - { - "epoch": 0.5857031203030121, - "grad_norm": 2.7199174851635908, - "learning_rate": 1.5459581071197083e-06, - "loss": 0.94, - "step": 4871 - }, - { - "epoch": 0.5858233631936511, - "grad_norm": 1.9947726362497582, - "learning_rate": 1.5451995089770624e-06, - "loss": 1.0554, - "step": 4872 - }, - { - "epoch": 0.5859436060842903, - "grad_norm": 1.2974901234245355, - "learning_rate": 1.5444409798272885e-06, - "loss": 0.9477, - "step": 4873 - }, - { - "epoch": 0.5860638489749294, - "grad_norm": 1.7643167155439532, - "learning_rate": 1.543682519785456e-06, - "loss": 1.0336, - "step": 4874 - }, - { - "epoch": 0.5861840918655684, - "grad_norm": 2.7456939970638925, - "learning_rate": 1.5429241289666219e-06, - "loss": 1.0236, - "step": 4875 - }, - { - "epoch": 0.5863043347562076, - "grad_norm": 1.8588822296865943, - "learning_rate": 1.5421658074858342e-06, - "loss": 0.9247, - "step": 4876 - }, - { - "epoch": 0.5864245776468466, - "grad_norm": 2.1847016009875735, - "learning_rate": 1.5414075554581298e-06, - "loss": 0.8883, - "step": 4877 - }, - { - "epoch": 0.5865448205374857, - "grad_norm": 2.332892739614468, - "learning_rate": 1.5406493729985348e-06, - "loss": 1.0096, - "step": 4878 - }, - { - "epoch": 0.5866650634281249, - "grad_norm": 2.051723300184213, - "learning_rate": 1.5398912602220644e-06, - "loss": 0.9489, - "step": 4879 - }, - { - "epoch": 0.5867853063187639, - "grad_norm": 2.2941737477389226, - "learning_rate": 1.539133217243724e-06, - "loss": 1.0114, - "step": 4880 - }, - { - "epoch": 0.586905549209403, - "grad_norm": 3.6242539526020607, - "learning_rate": 1.5383752441785081e-06, - "loss": 0.9821, - "step": 4881 - }, - { - "epoch": 0.5870257921000421, - "grad_norm": 2.581688470060885, - "learning_rate": 1.5376173411414003e-06, - "loss": 1.0903, - "step": 4882 - }, - { - "epoch": 0.5871460349906812, - "grad_norm": 1.9122221526727707, - "learning_rate": 1.5368595082473753e-06, - "loss": 1.0107, - "step": 4883 - }, - { - "epoch": 0.5872662778813202, - "grad_norm": 1.744314494763093, - "learning_rate": 1.5361017456113935e-06, - "loss": 1.0082, - "step": 4884 - }, - { - "epoch": 0.5873865207719594, - "grad_norm": 1.9952470211556006, - "learning_rate": 1.5353440533484085e-06, - "loss": 1.0879, - "step": 4885 - }, - { - "epoch": 0.5875067636625985, - "grad_norm": 1.6784276451399662, - "learning_rate": 1.534586431573361e-06, - "loss": 0.891, - "step": 4886 - }, - { - "epoch": 0.5876270065532375, - "grad_norm": 1.9958556270217003, - "learning_rate": 1.5338288804011817e-06, - "loss": 1.0105, - "step": 4887 - }, - { - "epoch": 0.5877472494438767, - "grad_norm": 2.2548183136211675, - "learning_rate": 1.533071399946791e-06, - "loss": 0.9446, - "step": 4888 - }, - { - "epoch": 0.5878674923345157, - "grad_norm": 2.106047994735146, - "learning_rate": 1.5323139903250977e-06, - "loss": 0.8049, - "step": 4889 - }, - { - "epoch": 0.5879877352251548, - "grad_norm": 1.7790884320868317, - "learning_rate": 1.5315566516510002e-06, - "loss": 0.9942, - "step": 4890 - }, - { - "epoch": 0.5881079781157939, - "grad_norm": 1.7015243147671726, - "learning_rate": 1.5307993840393857e-06, - "loss": 0.9132, - "step": 4891 - }, - { - "epoch": 0.588228221006433, - "grad_norm": 1.9462026987471257, - "learning_rate": 1.530042187605132e-06, - "loss": 1.04, - "step": 4892 - }, - { - "epoch": 0.5883484638970721, - "grad_norm": 1.401033687450656, - "learning_rate": 1.5292850624631044e-06, - "loss": 1.068, - "step": 4893 - }, - { - "epoch": 0.5884687067877111, - "grad_norm": 1.783743201004255, - "learning_rate": 1.5285280087281593e-06, - "loss": 1.0251, - "step": 4894 - }, - { - "epoch": 0.5885889496783503, - "grad_norm": 0.6468899656029677, - "learning_rate": 1.5277710265151398e-06, - "loss": 0.8015, - "step": 4895 - }, - { - "epoch": 0.5887091925689893, - "grad_norm": 2.4068830573878985, - "learning_rate": 1.5270141159388803e-06, - "loss": 0.9998, - "step": 4896 - }, - { - "epoch": 0.5888294354596284, - "grad_norm": 1.5758676530774622, - "learning_rate": 1.526257277114203e-06, - "loss": 1.0275, - "step": 4897 - }, - { - "epoch": 0.5889496783502676, - "grad_norm": 1.6431100099851288, - "learning_rate": 1.5255005101559201e-06, - "loss": 1.03, - "step": 4898 - }, - { - "epoch": 0.5890699212409066, - "grad_norm": 1.7570394214730156, - "learning_rate": 1.524743815178833e-06, - "loss": 0.9954, - "step": 4899 - }, - { - "epoch": 0.5891901641315457, - "grad_norm": 1.7273055329013738, - "learning_rate": 1.5239871922977315e-06, - "loss": 1.035, - "step": 4900 - }, - { - "epoch": 0.5893104070221848, - "grad_norm": 1.631910449095534, - "learning_rate": 1.523230641627394e-06, - "loss": 1.1207, - "step": 4901 - }, - { - "epoch": 0.5894306499128239, - "grad_norm": 2.340483971129948, - "learning_rate": 1.5224741632825888e-06, - "loss": 0.9575, - "step": 4902 - }, - { - "epoch": 0.589550892803463, - "grad_norm": 1.735757431456107, - "learning_rate": 1.521717757378074e-06, - "loss": 0.9175, - "step": 4903 - }, - { - "epoch": 0.5896711356941021, - "grad_norm": 1.7565932729156846, - "learning_rate": 1.5209614240285943e-06, - "loss": 0.9157, - "step": 4904 - }, - { - "epoch": 0.5897913785847412, - "grad_norm": 1.8412836680600435, - "learning_rate": 1.520205163348887e-06, - "loss": 1.0872, - "step": 4905 - }, - { - "epoch": 0.5899116214753802, - "grad_norm": 0.7757073819584123, - "learning_rate": 1.519448975453674e-06, - "loss": 0.8112, - "step": 4906 - }, - { - "epoch": 0.5900318643660194, - "grad_norm": 1.9753218062703166, - "learning_rate": 1.5186928604576696e-06, - "loss": 0.9863, - "step": 4907 - }, - { - "epoch": 0.5901521072566585, - "grad_norm": 2.2301098857200903, - "learning_rate": 1.5179368184755752e-06, - "loss": 1.003, - "step": 4908 - }, - { - "epoch": 0.5902723501472975, - "grad_norm": 1.6689874544397847, - "learning_rate": 1.5171808496220821e-06, - "loss": 1.0545, - "step": 4909 - }, - { - "epoch": 0.5903925930379367, - "grad_norm": 1.7409621920569536, - "learning_rate": 1.5164249540118708e-06, - "loss": 1.0425, - "step": 4910 - }, - { - "epoch": 0.5905128359285757, - "grad_norm": 1.6479537760353653, - "learning_rate": 1.5156691317596093e-06, - "loss": 1.0619, - "step": 4911 - }, - { - "epoch": 0.5906330788192148, - "grad_norm": 2.0865582746114737, - "learning_rate": 1.5149133829799556e-06, - "loss": 0.9013, - "step": 4912 - }, - { - "epoch": 0.590753321709854, - "grad_norm": 4.593548413479229, - "learning_rate": 1.5141577077875556e-06, - "loss": 1.0263, - "step": 4913 - }, - { - "epoch": 0.590873564600493, - "grad_norm": 2.231954690904037, - "learning_rate": 1.5134021062970451e-06, - "loss": 0.9494, - "step": 4914 - }, - { - "epoch": 0.5909938074911321, - "grad_norm": 1.6856638490968772, - "learning_rate": 1.5126465786230483e-06, - "loss": 1.0368, - "step": 4915 - }, - { - "epoch": 0.5911140503817712, - "grad_norm": 1.8267000301590586, - "learning_rate": 1.5118911248801787e-06, - "loss": 1.0405, - "step": 4916 - }, - { - "epoch": 0.5912342932724103, - "grad_norm": 1.8185804255240188, - "learning_rate": 1.5111357451830364e-06, - "loss": 1.0234, - "step": 4917 - }, - { - "epoch": 0.5913545361630493, - "grad_norm": 1.8988082586495785, - "learning_rate": 1.5103804396462131e-06, - "loss": 0.945, - "step": 4918 - }, - { - "epoch": 0.5914747790536885, - "grad_norm": 2.9388792979663414, - "learning_rate": 1.5096252083842877e-06, - "loss": 1.0315, - "step": 4919 - }, - { - "epoch": 0.5915950219443276, - "grad_norm": 1.7410312801664611, - "learning_rate": 1.5088700515118285e-06, - "loss": 1.073, - "step": 4920 - }, - { - "epoch": 0.5917152648349666, - "grad_norm": 1.7496180798807368, - "learning_rate": 1.508114969143392e-06, - "loss": 0.8978, - "step": 4921 - }, - { - "epoch": 0.5918355077256057, - "grad_norm": 1.466237699387151, - "learning_rate": 1.5073599613935238e-06, - "loss": 1.0054, - "step": 4922 - }, - { - "epoch": 0.5919557506162448, - "grad_norm": 2.279740134370852, - "learning_rate": 1.5066050283767574e-06, - "loss": 0.8103, - "step": 4923 - }, - { - "epoch": 0.5920759935068839, - "grad_norm": 3.298896495230288, - "learning_rate": 1.505850170207616e-06, - "loss": 1.0646, - "step": 4924 - }, - { - "epoch": 0.592196236397523, - "grad_norm": 2.1748579658522207, - "learning_rate": 1.505095387000611e-06, - "loss": 1.0013, - "step": 4925 - }, - { - "epoch": 0.5923164792881621, - "grad_norm": 1.8488500500702196, - "learning_rate": 1.504340678870242e-06, - "loss": 0.9728, - "step": 4926 - }, - { - "epoch": 0.5924367221788012, - "grad_norm": 2.184910249416243, - "learning_rate": 1.5035860459309989e-06, - "loss": 1.1237, - "step": 4927 - }, - { - "epoch": 0.5925569650694402, - "grad_norm": 2.2786791175954493, - "learning_rate": 1.5028314882973568e-06, - "loss": 0.863, - "step": 4928 - }, - { - "epoch": 0.5926772079600794, - "grad_norm": 1.8165027223624912, - "learning_rate": 1.502077006083783e-06, - "loss": 1.0732, - "step": 4929 - }, - { - "epoch": 0.5927974508507184, - "grad_norm": 2.1021934044421737, - "learning_rate": 1.5013225994047315e-06, - "loss": 1.0025, - "step": 4930 - }, - { - "epoch": 0.5929176937413575, - "grad_norm": 1.7382706202492757, - "learning_rate": 1.5005682683746452e-06, - "loss": 1.0365, - "step": 4931 - }, - { - "epoch": 0.5930379366319967, - "grad_norm": 1.8187456058829274, - "learning_rate": 1.4998140131079553e-06, - "loss": 0.9508, - "step": 4932 - }, - { - "epoch": 0.5931581795226357, - "grad_norm": 1.9256666663941726, - "learning_rate": 1.4990598337190821e-06, - "loss": 0.968, - "step": 4933 - }, - { - "epoch": 0.5932784224132748, - "grad_norm": 1.8639660102831395, - "learning_rate": 1.4983057303224338e-06, - "loss": 0.908, - "step": 4934 - }, - { - "epoch": 0.5933986653039139, - "grad_norm": 1.5184113364312002, - "learning_rate": 1.4975517030324072e-06, - "loss": 1.0925, - "step": 4935 - }, - { - "epoch": 0.593518908194553, - "grad_norm": 0.8414818689905587, - "learning_rate": 1.4967977519633882e-06, - "loss": 0.8821, - "step": 4936 - }, - { - "epoch": 0.593639151085192, - "grad_norm": 2.0114173441154004, - "learning_rate": 1.4960438772297494e-06, - "loss": 1.0224, - "step": 4937 - }, - { - "epoch": 0.5937593939758312, - "grad_norm": 3.511582962822487, - "learning_rate": 1.495290078945855e-06, - "loss": 0.9668, - "step": 4938 - }, - { - "epoch": 0.5938796368664703, - "grad_norm": 1.854299011936407, - "learning_rate": 1.4945363572260529e-06, - "loss": 0.9694, - "step": 4939 - }, - { - "epoch": 0.5939998797571093, - "grad_norm": 1.9597850315774732, - "learning_rate": 1.4937827121846845e-06, - "loss": 0.8979, - "step": 4940 - }, - { - "epoch": 0.5941201226477485, - "grad_norm": 1.713097589183739, - "learning_rate": 1.4930291439360755e-06, - "loss": 0.9721, - "step": 4941 - }, - { - "epoch": 0.5942403655383875, - "grad_norm": 6.681443939391722, - "learning_rate": 1.4922756525945427e-06, - "loss": 1.0215, - "step": 4942 - }, - { - "epoch": 0.5943606084290266, - "grad_norm": 0.7800334995974417, - "learning_rate": 1.4915222382743894e-06, - "loss": 0.8498, - "step": 4943 - }, - { - "epoch": 0.5944808513196658, - "grad_norm": 2.1222783779727954, - "learning_rate": 1.4907689010899085e-06, - "loss": 0.9536, - "step": 4944 - }, - { - "epoch": 0.5946010942103048, - "grad_norm": 3.655465848904865, - "learning_rate": 1.4900156411553804e-06, - "loss": 0.8539, - "step": 4945 - }, - { - "epoch": 0.5947213371009439, - "grad_norm": 2.089036789295787, - "learning_rate": 1.4892624585850739e-06, - "loss": 1.0894, - "step": 4946 - }, - { - "epoch": 0.594841579991583, - "grad_norm": 1.9127286662112493, - "learning_rate": 1.4885093534932465e-06, - "loss": 1.0136, - "step": 4947 - }, - { - "epoch": 0.5949618228822221, - "grad_norm": 2.632572151782051, - "learning_rate": 1.4877563259941433e-06, - "loss": 0.9481, - "step": 4948 - }, - { - "epoch": 0.5950820657728612, - "grad_norm": 1.96370741518154, - "learning_rate": 1.4870033762019988e-06, - "loss": 0.9119, - "step": 4949 - }, - { - "epoch": 0.5952023086635003, - "grad_norm": 1.642227552743972, - "learning_rate": 1.4862505042310334e-06, - "loss": 0.9574, - "step": 4950 - }, - { - "epoch": 0.5953225515541394, - "grad_norm": 1.5236688682731079, - "learning_rate": 1.4854977101954587e-06, - "loss": 0.9207, - "step": 4951 - }, - { - "epoch": 0.5954427944447784, - "grad_norm": 3.1247166314394708, - "learning_rate": 1.4847449942094716e-06, - "loss": 1.085, - "step": 4952 - }, - { - "epoch": 0.5955630373354175, - "grad_norm": 1.8263070612354142, - "learning_rate": 1.4839923563872598e-06, - "loss": 1.0952, - "step": 4953 - }, - { - "epoch": 0.5956832802260567, - "grad_norm": 2.7164851434204142, - "learning_rate": 1.483239796842997e-06, - "loss": 0.988, - "step": 4954 - }, - { - "epoch": 0.5958035231166957, - "grad_norm": 1.5312488322350857, - "learning_rate": 1.4824873156908462e-06, - "loss": 1.0675, - "step": 4955 - }, - { - "epoch": 0.5959237660073348, - "grad_norm": 1.4818888674778583, - "learning_rate": 1.4817349130449584e-06, - "loss": 0.9852, - "step": 4956 - }, - { - "epoch": 0.5960440088979739, - "grad_norm": 8.714645896454195, - "learning_rate": 1.4809825890194717e-06, - "loss": 1.0566, - "step": 4957 - }, - { - "epoch": 0.596164251788613, - "grad_norm": 2.025636281020623, - "learning_rate": 1.4802303437285139e-06, - "loss": 1.0054, - "step": 4958 - }, - { - "epoch": 0.596284494679252, - "grad_norm": 1.9890255237627514, - "learning_rate": 1.4794781772861994e-06, - "loss": 1.0345, - "step": 4959 - }, - { - "epoch": 0.5964047375698912, - "grad_norm": 4.35418983465477, - "learning_rate": 1.4787260898066324e-06, - "loss": 0.9058, - "step": 4960 - }, - { - "epoch": 0.5965249804605303, - "grad_norm": 2.094660660567355, - "learning_rate": 1.4779740814039023e-06, - "loss": 1.0818, - "step": 4961 - }, - { - "epoch": 0.5966452233511693, - "grad_norm": 2.0124874090902596, - "learning_rate": 1.4772221521920894e-06, - "loss": 0.91, - "step": 4962 - }, - { - "epoch": 0.5967654662418085, - "grad_norm": 1.9671132383502166, - "learning_rate": 1.4764703022852598e-06, - "loss": 0.9693, - "step": 4963 - }, - { - "epoch": 0.5968857091324475, - "grad_norm": 2.1103883922775366, - "learning_rate": 1.4757185317974696e-06, - "loss": 1.0018, - "step": 4964 - }, - { - "epoch": 0.5970059520230866, - "grad_norm": 2.0195476597722455, - "learning_rate": 1.474966840842761e-06, - "loss": 0.9414, - "step": 4965 - }, - { - "epoch": 0.5971261949137258, - "grad_norm": 6.824582086420888, - "learning_rate": 1.4742152295351655e-06, - "loss": 1.0954, - "step": 4966 - }, - { - "epoch": 0.5972464378043648, - "grad_norm": 7.231756210083418, - "learning_rate": 1.4734636979887016e-06, - "loss": 0.8719, - "step": 4967 - }, - { - "epoch": 0.5973666806950039, - "grad_norm": 2.637484426588029, - "learning_rate": 1.4727122463173755e-06, - "loss": 1.1314, - "step": 4968 - }, - { - "epoch": 0.597486923585643, - "grad_norm": 2.120418321615473, - "learning_rate": 1.471960874635183e-06, - "loss": 0.8758, - "step": 4969 - }, - { - "epoch": 0.5976071664762821, - "grad_norm": 2.944997269737649, - "learning_rate": 1.4712095830561055e-06, - "loss": 0.9301, - "step": 4970 - }, - { - "epoch": 0.5977274093669211, - "grad_norm": 1.844867173869488, - "learning_rate": 1.4704583716941147e-06, - "loss": 1.0385, - "step": 4971 - }, - { - "epoch": 0.5978476522575603, - "grad_norm": 1.5115878420588906, - "learning_rate": 1.4697072406631672e-06, - "loss": 0.9461, - "step": 4972 - }, - { - "epoch": 0.5979678951481994, - "grad_norm": 2.9511101525023, - "learning_rate": 1.4689561900772097e-06, - "loss": 0.9583, - "step": 4973 - }, - { - "epoch": 0.5980881380388384, - "grad_norm": 2.044060672397027, - "learning_rate": 1.4682052200501758e-06, - "loss": 0.9585, - "step": 4974 - }, - { - "epoch": 0.5982083809294776, - "grad_norm": 2.186373611366878, - "learning_rate": 1.4674543306959876e-06, - "loss": 1.0223, - "step": 4975 - }, - { - "epoch": 0.5983286238201166, - "grad_norm": 2.3958714578884672, - "learning_rate": 1.4667035221285535e-06, - "loss": 1.0738, - "step": 4976 - }, - { - "epoch": 0.5984488667107557, - "grad_norm": 1.6219894692188466, - "learning_rate": 1.4659527944617715e-06, - "loss": 0.9667, - "step": 4977 - }, - { - "epoch": 0.5985691096013949, - "grad_norm": 1.7322068279852794, - "learning_rate": 1.465202147809526e-06, - "loss": 0.9928, - "step": 4978 - }, - { - "epoch": 0.5986893524920339, - "grad_norm": 2.514432446731231, - "learning_rate": 1.4644515822856888e-06, - "loss": 0.9843, - "step": 4979 - }, - { - "epoch": 0.598809595382673, - "grad_norm": 0.7651195804159082, - "learning_rate": 1.4637010980041215e-06, - "loss": 0.8191, - "step": 4980 - }, - { - "epoch": 0.5989298382733121, - "grad_norm": 2.4768746838123046, - "learning_rate": 1.4629506950786707e-06, - "loss": 1.1298, - "step": 4981 - }, - { - "epoch": 0.5990500811639512, - "grad_norm": 0.8403838084626114, - "learning_rate": 1.4622003736231733e-06, - "loss": 0.8179, - "step": 4982 - }, - { - "epoch": 0.5991703240545903, - "grad_norm": 1.8492266249881828, - "learning_rate": 1.461450133751451e-06, - "loss": 1.0291, - "step": 4983 - }, - { - "epoch": 0.5992905669452293, - "grad_norm": 1.8948894987877174, - "learning_rate": 1.4606999755773153e-06, - "loss": 0.9949, - "step": 4984 - }, - { - "epoch": 0.5994108098358685, - "grad_norm": 1.9387084514757777, - "learning_rate": 1.4599498992145643e-06, - "loss": 1.0428, - "step": 4985 - }, - { - "epoch": 0.5995310527265075, - "grad_norm": 1.7947491883402125, - "learning_rate": 1.4591999047769846e-06, - "loss": 0.9348, - "step": 4986 - }, - { - "epoch": 0.5996512956171466, - "grad_norm": 2.598833090007517, - "learning_rate": 1.4584499923783486e-06, - "loss": 0.9837, - "step": 4987 - }, - { - "epoch": 0.5997715385077858, - "grad_norm": 1.662651789339913, - "learning_rate": 1.457700162132419e-06, - "loss": 0.9918, - "step": 4988 - }, - { - "epoch": 0.5998917813984248, - "grad_norm": 1.8551463278665192, - "learning_rate": 1.4569504141529433e-06, - "loss": 0.9612, - "step": 4989 - }, - { - "epoch": 0.6000120242890639, - "grad_norm": 2.0216376942159555, - "learning_rate": 1.456200748553658e-06, - "loss": 0.9464, - "step": 4990 - }, - { - "epoch": 0.600132267179703, - "grad_norm": 1.7554808347563637, - "learning_rate": 1.455451165448287e-06, - "loss": 1.0096, - "step": 4991 - }, - { - "epoch": 0.6002525100703421, - "grad_norm": 2.182902709582347, - "learning_rate": 1.4547016649505407e-06, - "loss": 0.9643, - "step": 4992 - }, - { - "epoch": 0.6003727529609811, - "grad_norm": 2.741333657382817, - "learning_rate": 1.4539522471741193e-06, - "loss": 1.0768, - "step": 4993 - }, - { - "epoch": 0.6004929958516203, - "grad_norm": 3.8146290072018076, - "learning_rate": 1.4532029122327067e-06, - "loss": 0.9338, - "step": 4994 - }, - { - "epoch": 0.6006132387422594, - "grad_norm": 2.0034824094140538, - "learning_rate": 1.4524536602399783e-06, - "loss": 0.9848, - "step": 4995 - }, - { - "epoch": 0.6007334816328984, - "grad_norm": 6.2823260699846974, - "learning_rate": 1.4517044913095938e-06, - "loss": 0.9991, - "step": 4996 - }, - { - "epoch": 0.6008537245235376, - "grad_norm": 1.50169078584552, - "learning_rate": 1.4509554055552022e-06, - "loss": 1.0421, - "step": 4997 - }, - { - "epoch": 0.6009739674141766, - "grad_norm": 3.136760098575539, - "learning_rate": 1.450206403090439e-06, - "loss": 1.0685, - "step": 4998 - }, - { - "epoch": 0.6010942103048157, - "grad_norm": 3.7372254541917838, - "learning_rate": 1.4494574840289274e-06, - "loss": 1.0901, - "step": 4999 - }, - { - "epoch": 0.6012144531954549, - "grad_norm": 1.6112590243232572, - "learning_rate": 1.4487086484842782e-06, - "loss": 0.9733, - "step": 5000 - }, - { - "epoch": 0.6013346960860939, - "grad_norm": 2.056344528112529, - "learning_rate": 1.4479598965700878e-06, - "loss": 0.826, - "step": 5001 - }, - { - "epoch": 0.601454938976733, - "grad_norm": 2.138126112049196, - "learning_rate": 1.4472112283999427e-06, - "loss": 0.9172, - "step": 5002 - }, - { - "epoch": 0.6015751818673721, - "grad_norm": 2.4238169114240002, - "learning_rate": 1.4464626440874143e-06, - "loss": 0.9273, - "step": 5003 - }, - { - "epoch": 0.6016954247580112, - "grad_norm": 2.826472943238382, - "learning_rate": 1.4457141437460636e-06, - "loss": 0.9736, - "step": 5004 - }, - { - "epoch": 0.6018156676486502, - "grad_norm": 4.663423273956599, - "learning_rate": 1.444965727489436e-06, - "loss": 0.9618, - "step": 5005 - }, - { - "epoch": 0.6019359105392894, - "grad_norm": 1.7082649155228955, - "learning_rate": 1.444217395431066e-06, - "loss": 0.859, - "step": 5006 - }, - { - "epoch": 0.6020561534299285, - "grad_norm": 0.8289678351186957, - "learning_rate": 1.4434691476844755e-06, - "loss": 0.8155, - "step": 5007 - }, - { - "epoch": 0.6021763963205675, - "grad_norm": 2.7098850988825167, - "learning_rate": 1.4427209843631729e-06, - "loss": 0.8996, - "step": 5008 - }, - { - "epoch": 0.6022966392112067, - "grad_norm": 2.2260637762113866, - "learning_rate": 1.4419729055806534e-06, - "loss": 1.0492, - "step": 5009 - }, - { - "epoch": 0.6024168821018457, - "grad_norm": 1.7456372238151858, - "learning_rate": 1.441224911450401e-06, - "loss": 1.0607, - "step": 5010 - }, - { - "epoch": 0.6025371249924848, - "grad_norm": 1.6323212975314123, - "learning_rate": 1.4404770020858851e-06, - "loss": 1.0578, - "step": 5011 - }, - { - "epoch": 0.602657367883124, - "grad_norm": 1.574339831218008, - "learning_rate": 1.439729177600563e-06, - "loss": 1.0929, - "step": 5012 - }, - { - "epoch": 0.602777610773763, - "grad_norm": 2.0006513726957977, - "learning_rate": 1.4389814381078793e-06, - "loss": 0.9546, - "step": 5013 - }, - { - "epoch": 0.6028978536644021, - "grad_norm": 3.28071778840444, - "learning_rate": 1.438233783721265e-06, - "loss": 1.0321, - "step": 5014 - }, - { - "epoch": 0.6030180965550412, - "grad_norm": 2.387943034983032, - "learning_rate": 1.43748621455414e-06, - "loss": 1.0127, - "step": 5015 - }, - { - "epoch": 0.6031383394456803, - "grad_norm": 2.1710978702534898, - "learning_rate": 1.4367387307199082e-06, - "loss": 1.0331, - "step": 5016 - }, - { - "epoch": 0.6032585823363193, - "grad_norm": 1.723579626370459, - "learning_rate": 1.4359913323319632e-06, - "loss": 1.0483, - "step": 5017 - }, - { - "epoch": 0.6033788252269584, - "grad_norm": 1.7940178754207277, - "learning_rate": 1.4352440195036847e-06, - "loss": 1.006, - "step": 5018 - }, - { - "epoch": 0.6034990681175976, - "grad_norm": 2.240616517203858, - "learning_rate": 1.4344967923484395e-06, - "loss": 1.0243, - "step": 5019 - }, - { - "epoch": 0.6036193110082366, - "grad_norm": 2.752845765587413, - "learning_rate": 1.433749650979581e-06, - "loss": 0.951, - "step": 5020 - }, - { - "epoch": 0.6037395538988757, - "grad_norm": 3.3070831633597364, - "learning_rate": 1.433002595510451e-06, - "loss": 0.9233, - "step": 5021 - }, - { - "epoch": 0.6038597967895148, - "grad_norm": 1.8650379656693483, - "learning_rate": 1.4322556260543757e-06, - "loss": 0.9511, - "step": 5022 - }, - { - "epoch": 0.6039800396801539, - "grad_norm": 0.9177776056500669, - "learning_rate": 1.4315087427246703e-06, - "loss": 0.8882, - "step": 5023 - }, - { - "epoch": 0.604100282570793, - "grad_norm": 0.940803144701013, - "learning_rate": 1.4307619456346372e-06, - "loss": 0.8505, - "step": 5024 - }, - { - "epoch": 0.6042205254614321, - "grad_norm": 2.546960279289044, - "learning_rate": 1.430015234897564e-06, - "loss": 0.9582, - "step": 5025 - }, - { - "epoch": 0.6043407683520712, - "grad_norm": 1.8149963813934824, - "learning_rate": 1.4292686106267274e-06, - "loss": 0.8931, - "step": 5026 - }, - { - "epoch": 0.6044610112427102, - "grad_norm": 1.5823029461429954, - "learning_rate": 1.4285220729353876e-06, - "loss": 0.9924, - "step": 5027 - }, - { - "epoch": 0.6045812541333494, - "grad_norm": 1.9823940932859365, - "learning_rate": 1.4277756219367957e-06, - "loss": 1.0124, - "step": 5028 - }, - { - "epoch": 0.6047014970239885, - "grad_norm": 2.174666771628921, - "learning_rate": 1.4270292577441864e-06, - "loss": 1.0271, - "step": 5029 - }, - { - "epoch": 0.6048217399146275, - "grad_norm": 1.426845122269739, - "learning_rate": 1.4262829804707836e-06, - "loss": 0.9471, - "step": 5030 - }, - { - "epoch": 0.6049419828052667, - "grad_norm": 1.4406995410625345, - "learning_rate": 1.4255367902297958e-06, - "loss": 0.9224, - "step": 5031 - }, - { - "epoch": 0.6050622256959057, - "grad_norm": 2.4235893826632546, - "learning_rate": 1.4247906871344215e-06, - "loss": 1.023, - "step": 5032 - }, - { - "epoch": 0.6051824685865448, - "grad_norm": 1.9666455844244861, - "learning_rate": 1.4240446712978415e-06, - "loss": 0.9852, - "step": 5033 - }, - { - "epoch": 0.605302711477184, - "grad_norm": 1.6529740378203923, - "learning_rate": 1.423298742833227e-06, - "loss": 0.9675, - "step": 5034 - }, - { - "epoch": 0.605422954367823, - "grad_norm": 1.711308060323591, - "learning_rate": 1.4225529018537352e-06, - "loss": 0.9506, - "step": 5035 - }, - { - "epoch": 0.6055431972584621, - "grad_norm": 1.8113346299644288, - "learning_rate": 1.4218071484725082e-06, - "loss": 1.0104, - "step": 5036 - }, - { - "epoch": 0.6056634401491012, - "grad_norm": 1.7705964846322868, - "learning_rate": 1.4210614828026786e-06, - "loss": 0.9846, - "step": 5037 - }, - { - "epoch": 0.6057836830397403, - "grad_norm": 1.6806640625, - "learning_rate": 1.4203159049573605e-06, - "loss": 0.9742, - "step": 5038 - }, - { - "epoch": 0.6059039259303793, - "grad_norm": 1.9333803001267529, - "learning_rate": 1.4195704150496593e-06, - "loss": 1.1029, - "step": 5039 - }, - { - "epoch": 0.6060241688210185, - "grad_norm": 1.644874518805975, - "learning_rate": 1.4188250131926639e-06, - "loss": 0.9632, - "step": 5040 - }, - { - "epoch": 0.6061444117116576, - "grad_norm": 2.3336481722094473, - "learning_rate": 1.4180796994994525e-06, - "loss": 1.0446, - "step": 5041 - }, - { - "epoch": 0.6062646546022966, - "grad_norm": 1.8225283254507494, - "learning_rate": 1.4173344740830877e-06, - "loss": 0.9531, - "step": 5042 - }, - { - "epoch": 0.6063848974929358, - "grad_norm": 1.5538500346529713, - "learning_rate": 1.4165893370566206e-06, - "loss": 0.9369, - "step": 5043 - }, - { - "epoch": 0.6065051403835748, - "grad_norm": 1.507888950129366, - "learning_rate": 1.4158442885330865e-06, - "loss": 0.9974, - "step": 5044 - }, - { - "epoch": 0.6066253832742139, - "grad_norm": 1.8030873921139996, - "learning_rate": 1.4150993286255094e-06, - "loss": 1.0172, - "step": 5045 - }, - { - "epoch": 0.6067456261648531, - "grad_norm": 1.7972818328716924, - "learning_rate": 1.4143544574468993e-06, - "loss": 1.0184, - "step": 5046 - }, - { - "epoch": 0.6068658690554921, - "grad_norm": 2.260321045243259, - "learning_rate": 1.4136096751102523e-06, - "loss": 1.0481, - "step": 5047 - }, - { - "epoch": 0.6069861119461312, - "grad_norm": 2.1221634501607074, - "learning_rate": 1.4128649817285516e-06, - "loss": 1.0561, - "step": 5048 - }, - { - "epoch": 0.6071063548367702, - "grad_norm": 2.4391888367644854, - "learning_rate": 1.412120377414766e-06, - "loss": 0.8611, - "step": 5049 - }, - { - "epoch": 0.6072265977274094, - "grad_norm": 1.5255619201996624, - "learning_rate": 1.4113758622818522e-06, - "loss": 0.9326, - "step": 5050 - }, - { - "epoch": 0.6073468406180484, - "grad_norm": 4.171820365176544, - "learning_rate": 1.410631436442751e-06, - "loss": 1.0562, - "step": 5051 - }, - { - "epoch": 0.6074670835086875, - "grad_norm": 1.9799487392456108, - "learning_rate": 1.4098871000103936e-06, - "loss": 1.0952, - "step": 5052 - }, - { - "epoch": 0.6075873263993267, - "grad_norm": 1.5892986891019423, - "learning_rate": 1.409142853097693e-06, - "loss": 1.0489, - "step": 5053 - }, - { - "epoch": 0.6077075692899657, - "grad_norm": 1.919305339556527, - "learning_rate": 1.408398695817553e-06, - "loss": 1.026, - "step": 5054 - }, - { - "epoch": 0.6078278121806048, - "grad_norm": 2.686673680510094, - "learning_rate": 1.4076546282828593e-06, - "loss": 0.927, - "step": 5055 - }, - { - "epoch": 0.6079480550712439, - "grad_norm": 2.55901490450801, - "learning_rate": 1.4069106506064874e-06, - "loss": 0.8881, - "step": 5056 - }, - { - "epoch": 0.608068297961883, - "grad_norm": 1.6616236047448336, - "learning_rate": 1.4061667629012989e-06, - "loss": 1.0104, - "step": 5057 - }, - { - "epoch": 0.608188540852522, - "grad_norm": 1.5675273888003622, - "learning_rate": 1.40542296528014e-06, - "loss": 1.0587, - "step": 5058 - }, - { - "epoch": 0.6083087837431612, - "grad_norm": 2.6019266692727814, - "learning_rate": 1.4046792578558452e-06, - "loss": 0.9832, - "step": 5059 - }, - { - "epoch": 0.6084290266338003, - "grad_norm": 2.2696207524574126, - "learning_rate": 1.4039356407412325e-06, - "loss": 0.9911, - "step": 5060 - }, - { - "epoch": 0.6085492695244393, - "grad_norm": 0.8336686333983034, - "learning_rate": 1.40319211404911e-06, - "loss": 0.8498, - "step": 5061 - }, - { - "epoch": 0.6086695124150785, - "grad_norm": 2.026683310243796, - "learning_rate": 1.4024486778922691e-06, - "loss": 1.1236, - "step": 5062 - }, - { - "epoch": 0.6087897553057176, - "grad_norm": 2.205655747910311, - "learning_rate": 1.4017053323834884e-06, - "loss": 1.0025, - "step": 5063 - }, - { - "epoch": 0.6089099981963566, - "grad_norm": 1.9469101561285198, - "learning_rate": 1.4009620776355333e-06, - "loss": 0.9957, - "step": 5064 - }, - { - "epoch": 0.6090302410869958, - "grad_norm": 1.7862122999287824, - "learning_rate": 1.4002189137611553e-06, - "loss": 1.0212, - "step": 5065 - }, - { - "epoch": 0.6091504839776348, - "grad_norm": 1.8475073982266148, - "learning_rate": 1.3994758408730901e-06, - "loss": 0.9228, - "step": 5066 - }, - { - "epoch": 0.6092707268682739, - "grad_norm": 2.1725237858461157, - "learning_rate": 1.3987328590840629e-06, - "loss": 0.9967, - "step": 5067 - }, - { - "epoch": 0.609390969758913, - "grad_norm": 1.9066600671178249, - "learning_rate": 1.397989968506783e-06, - "loss": 1.0898, - "step": 5068 - }, - { - "epoch": 0.6095112126495521, - "grad_norm": 2.4262043312173085, - "learning_rate": 1.3972471692539458e-06, - "loss": 0.9596, - "step": 5069 - }, - { - "epoch": 0.6096314555401912, - "grad_norm": 1.9423612705606919, - "learning_rate": 1.3965044614382348e-06, - "loss": 0.9797, - "step": 5070 - }, - { - "epoch": 0.6097516984308303, - "grad_norm": 2.7132108539178046, - "learning_rate": 1.3957618451723162e-06, - "loss": 0.982, - "step": 5071 - }, - { - "epoch": 0.6098719413214694, - "grad_norm": 2.3893166745526146, - "learning_rate": 1.3950193205688457e-06, - "loss": 0.929, - "step": 5072 - }, - { - "epoch": 0.6099921842121084, - "grad_norm": 1.7791631420139298, - "learning_rate": 1.3942768877404627e-06, - "loss": 1.0616, - "step": 5073 - }, - { - "epoch": 0.6101124271027476, - "grad_norm": 1.4964604737542075, - "learning_rate": 1.393534546799795e-06, - "loss": 0.975, - "step": 5074 - }, - { - "epoch": 0.6102326699933867, - "grad_norm": 1.692443036539352, - "learning_rate": 1.3927922978594536e-06, - "loss": 0.9047, - "step": 5075 - }, - { - "epoch": 0.6103529128840257, - "grad_norm": 0.8545348412903335, - "learning_rate": 1.3920501410320387e-06, - "loss": 0.8558, - "step": 5076 - }, - { - "epoch": 0.6104731557746649, - "grad_norm": 6.639155110758958, - "learning_rate": 1.3913080764301333e-06, - "loss": 0.9935, - "step": 5077 - }, - { - "epoch": 0.6105933986653039, - "grad_norm": 1.998815781952375, - "learning_rate": 1.3905661041663085e-06, - "loss": 0.9434, - "step": 5078 - }, - { - "epoch": 0.610713641555943, - "grad_norm": 2.4574486600804404, - "learning_rate": 1.389824224353122e-06, - "loss": 0.8774, - "step": 5079 - }, - { - "epoch": 0.610833884446582, - "grad_norm": 1.4378509922260154, - "learning_rate": 1.389082437103115e-06, - "loss": 0.9969, - "step": 5080 - }, - { - "epoch": 0.6109541273372212, - "grad_norm": 1.9533884099718628, - "learning_rate": 1.3883407425288172e-06, - "loss": 1.0146, - "step": 5081 - }, - { - "epoch": 0.6110743702278603, - "grad_norm": 2.036921637891633, - "learning_rate": 1.3875991407427417e-06, - "loss": 1.0149, - "step": 5082 - }, - { - "epoch": 0.6111946131184993, - "grad_norm": 0.7633757736729333, - "learning_rate": 1.38685763185739e-06, - "loss": 0.8417, - "step": 5083 - }, - { - "epoch": 0.6113148560091385, - "grad_norm": 2.6740150135563208, - "learning_rate": 1.3861162159852476e-06, - "loss": 0.9039, - "step": 5084 - }, - { - "epoch": 0.6114350988997775, - "grad_norm": 2.043085448686561, - "learning_rate": 1.3853748932387875e-06, - "loss": 1.0289, - "step": 5085 - }, - { - "epoch": 0.6115553417904166, - "grad_norm": 2.622673456875897, - "learning_rate": 1.3846336637304671e-06, - "loss": 0.9825, - "step": 5086 - }, - { - "epoch": 0.6116755846810558, - "grad_norm": 2.4494272535726456, - "learning_rate": 1.3838925275727316e-06, - "loss": 1.0581, - "step": 5087 - }, - { - "epoch": 0.6117958275716948, - "grad_norm": 1.7679842150488407, - "learning_rate": 1.3831514848780089e-06, - "loss": 1.0197, - "step": 5088 - }, - { - "epoch": 0.6119160704623339, - "grad_norm": 5.208474648147996, - "learning_rate": 1.3824105357587152e-06, - "loss": 1.1523, - "step": 5089 - }, - { - "epoch": 0.612036313352973, - "grad_norm": 2.070503427590631, - "learning_rate": 1.381669680327253e-06, - "loss": 1.049, - "step": 5090 - }, - { - "epoch": 0.6121565562436121, - "grad_norm": 5.5057672260823685, - "learning_rate": 1.380928918696008e-06, - "loss": 0.9379, - "step": 5091 - }, - { - "epoch": 0.6122767991342511, - "grad_norm": 2.3490822724817844, - "learning_rate": 1.3801882509773548e-06, - "loss": 0.9413, - "step": 5092 - }, - { - "epoch": 0.6123970420248903, - "grad_norm": 1.683930684160428, - "learning_rate": 1.3794476772836503e-06, - "loss": 1.0445, - "step": 5093 - }, - { - "epoch": 0.6125172849155294, - "grad_norm": 1.5833063708904138, - "learning_rate": 1.3787071977272402e-06, - "loss": 1.0683, - "step": 5094 - }, - { - "epoch": 0.6126375278061684, - "grad_norm": 2.8082924629505026, - "learning_rate": 1.3779668124204535e-06, - "loss": 0.944, - "step": 5095 - }, - { - "epoch": 0.6127577706968076, - "grad_norm": 4.124095239540402, - "learning_rate": 1.3772265214756074e-06, - "loss": 1.0368, - "step": 5096 - }, - { - "epoch": 0.6128780135874466, - "grad_norm": 1.7881887376153078, - "learning_rate": 1.3764863250050025e-06, - "loss": 0.986, - "step": 5097 - }, - { - "epoch": 0.6129982564780857, - "grad_norm": 1.9507352934325393, - "learning_rate": 1.3757462231209272e-06, - "loss": 1.0333, - "step": 5098 - }, - { - "epoch": 0.6131184993687249, - "grad_norm": 2.164496643366935, - "learning_rate": 1.3750062159356525e-06, - "loss": 1.1176, - "step": 5099 - }, - { - "epoch": 0.6132387422593639, - "grad_norm": 1.8189845664704969, - "learning_rate": 1.3742663035614382e-06, - "loss": 1.0551, - "step": 5100 - }, - { - "epoch": 0.613358985150003, - "grad_norm": 2.089343315886162, - "learning_rate": 1.3735264861105283e-06, - "loss": 1.0298, - "step": 5101 - }, - { - "epoch": 0.6134792280406421, - "grad_norm": 12.086655854944302, - "learning_rate": 1.372786763695152e-06, - "loss": 1.0106, - "step": 5102 - }, - { - "epoch": 0.6135994709312812, - "grad_norm": 2.274007719176115, - "learning_rate": 1.3720471364275257e-06, - "loss": 1.0012, - "step": 5103 - }, - { - "epoch": 0.6137197138219203, - "grad_norm": 1.8269566119741687, - "learning_rate": 1.3713076044198486e-06, - "loss": 0.9992, - "step": 5104 - }, - { - "epoch": 0.6138399567125594, - "grad_norm": 2.852562130812236, - "learning_rate": 1.3705681677843086e-06, - "loss": 1.037, - "step": 5105 - }, - { - "epoch": 0.6139601996031985, - "grad_norm": 0.7948890698011498, - "learning_rate": 1.3698288266330768e-06, - "loss": 0.8633, - "step": 5106 - }, - { - "epoch": 0.6140804424938375, - "grad_norm": 2.33920070364768, - "learning_rate": 1.3690895810783113e-06, - "loss": 0.9582, - "step": 5107 - }, - { - "epoch": 0.6142006853844767, - "grad_norm": 7.4633248389285045, - "learning_rate": 1.3683504312321543e-06, - "loss": 0.9476, - "step": 5108 - }, - { - "epoch": 0.6143209282751158, - "grad_norm": 1.982455188490105, - "learning_rate": 1.3676113772067355e-06, - "loss": 1.024, - "step": 5109 - }, - { - "epoch": 0.6144411711657548, - "grad_norm": 1.7952203844839154, - "learning_rate": 1.3668724191141671e-06, - "loss": 0.9468, - "step": 5110 - }, - { - "epoch": 0.6145614140563939, - "grad_norm": 6.921682936369656, - "learning_rate": 1.3661335570665493e-06, - "loss": 0.897, - "step": 5111 - }, - { - "epoch": 0.614681656947033, - "grad_norm": 2.555554910558352, - "learning_rate": 1.3653947911759676e-06, - "loss": 0.9323, - "step": 5112 - }, - { - "epoch": 0.6148018998376721, - "grad_norm": 1.5623055909329804, - "learning_rate": 1.3646561215544904e-06, - "loss": 0.9692, - "step": 5113 - }, - { - "epoch": 0.6149221427283111, - "grad_norm": 2.572334491384273, - "learning_rate": 1.363917548314176e-06, - "loss": 1.021, - "step": 5114 - }, - { - "epoch": 0.6150423856189503, - "grad_norm": 1.7095781613534409, - "learning_rate": 1.3631790715670626e-06, - "loss": 0.9605, - "step": 5115 - }, - { - "epoch": 0.6151626285095894, - "grad_norm": 3.0723300352335112, - "learning_rate": 1.3624406914251783e-06, - "loss": 1.0919, - "step": 5116 - }, - { - "epoch": 0.6152828714002284, - "grad_norm": 1.9243500442100878, - "learning_rate": 1.3617024080005335e-06, - "loss": 1.1059, - "step": 5117 - }, - { - "epoch": 0.6154031142908676, - "grad_norm": 1.7201716698873566, - "learning_rate": 1.3609642214051266e-06, - "loss": 0.9723, - "step": 5118 - }, - { - "epoch": 0.6155233571815066, - "grad_norm": 3.1988878463255226, - "learning_rate": 1.3602261317509385e-06, - "loss": 0.8996, - "step": 5119 - }, - { - "epoch": 0.6156436000721457, - "grad_norm": 2.755304336077145, - "learning_rate": 1.3594881391499387e-06, - "loss": 1.0393, - "step": 5120 - }, - { - "epoch": 0.6157638429627849, - "grad_norm": 1.9755544994679448, - "learning_rate": 1.3587502437140778e-06, - "loss": 1.0217, - "step": 5121 - }, - { - "epoch": 0.6158840858534239, - "grad_norm": 2.2621724097049873, - "learning_rate": 1.3580124455552952e-06, - "loss": 1.0771, - "step": 5122 - }, - { - "epoch": 0.616004328744063, - "grad_norm": 1.6089040567161397, - "learning_rate": 1.3572747447855148e-06, - "loss": 1.0948, - "step": 5123 - }, - { - "epoch": 0.6161245716347021, - "grad_norm": 16.163198721785296, - "learning_rate": 1.356537141516644e-06, - "loss": 0.9191, - "step": 5124 - }, - { - "epoch": 0.6162448145253412, - "grad_norm": 2.0517774505619566, - "learning_rate": 1.3557996358605775e-06, - "loss": 0.8465, - "step": 5125 - }, - { - "epoch": 0.6163650574159802, - "grad_norm": 4.562470736475074, - "learning_rate": 1.3550622279291941e-06, - "loss": 0.9307, - "step": 5126 - }, - { - "epoch": 0.6164853003066194, - "grad_norm": 1.2926385463635222, - "learning_rate": 1.354324917834358e-06, - "loss": 1.0586, - "step": 5127 - }, - { - "epoch": 0.6166055431972585, - "grad_norm": 1.8755652529318652, - "learning_rate": 1.353587705687918e-06, - "loss": 0.9967, - "step": 5128 - }, - { - "epoch": 0.6167257860878975, - "grad_norm": 5.720298015593858, - "learning_rate": 1.3528505916017096e-06, - "loss": 0.9499, - "step": 5129 - }, - { - "epoch": 0.6168460289785367, - "grad_norm": 103.56073663254332, - "learning_rate": 1.3521135756875514e-06, - "loss": 1.1053, - "step": 5130 - }, - { - "epoch": 0.6169662718691757, - "grad_norm": 1.4703382467514845, - "learning_rate": 1.3513766580572496e-06, - "loss": 1.0897, - "step": 5131 - }, - { - "epoch": 0.6170865147598148, - "grad_norm": 2.102287309300793, - "learning_rate": 1.3506398388225924e-06, - "loss": 1.0034, - "step": 5132 - }, - { - "epoch": 0.617206757650454, - "grad_norm": 2.1919166664098686, - "learning_rate": 1.349903118095355e-06, - "loss": 0.9385, - "step": 5133 - }, - { - "epoch": 0.617327000541093, - "grad_norm": 3.0110874330716553, - "learning_rate": 1.349166495987298e-06, - "loss": 0.9664, - "step": 5134 - }, - { - "epoch": 0.6174472434317321, - "grad_norm": 0.9307185634413937, - "learning_rate": 1.348429972610166e-06, - "loss": 0.9067, - "step": 5135 - }, - { - "epoch": 0.6175674863223712, - "grad_norm": 0.8666828852444197, - "learning_rate": 1.3476935480756897e-06, - "loss": 0.841, - "step": 5136 - }, - { - "epoch": 0.6176877292130103, - "grad_norm": 3.382252142396917, - "learning_rate": 1.346957222495583e-06, - "loss": 0.9835, - "step": 5137 - }, - { - "epoch": 0.6178079721036493, - "grad_norm": 2.565414724898077, - "learning_rate": 1.3462209959815466e-06, - "loss": 0.9449, - "step": 5138 - }, - { - "epoch": 0.6179282149942885, - "grad_norm": 2.1635936683670747, - "learning_rate": 1.345484868645265e-06, - "loss": 0.9664, - "step": 5139 - }, - { - "epoch": 0.6180484578849276, - "grad_norm": 2.1812328480043424, - "learning_rate": 1.3447488405984088e-06, - "loss": 1.0104, - "step": 5140 - }, - { - "epoch": 0.6181687007755666, - "grad_norm": 3.387397038207657, - "learning_rate": 1.3440129119526322e-06, - "loss": 0.9159, - "step": 5141 - }, - { - "epoch": 0.6182889436662057, - "grad_norm": 0.9524391395669384, - "learning_rate": 1.3432770828195762e-06, - "loss": 0.7983, - "step": 5142 - }, - { - "epoch": 0.6184091865568448, - "grad_norm": 3.231586230731031, - "learning_rate": 1.3425413533108635e-06, - "loss": 0.9426, - "step": 5143 - }, - { - "epoch": 0.6185294294474839, - "grad_norm": 2.2444750451162103, - "learning_rate": 1.341805723538105e-06, - "loss": 0.9374, - "step": 5144 - }, - { - "epoch": 0.618649672338123, - "grad_norm": 2.0363594935269758, - "learning_rate": 1.3410701936128948e-06, - "loss": 1.0053, - "step": 5145 - }, - { - "epoch": 0.6187699152287621, - "grad_norm": 2.8962831948213372, - "learning_rate": 1.340334763646812e-06, - "loss": 1.0841, - "step": 5146 - }, - { - "epoch": 0.6188901581194012, - "grad_norm": 1.7736978885880452, - "learning_rate": 1.3395994337514218e-06, - "loss": 0.9755, - "step": 5147 - }, - { - "epoch": 0.6190104010100402, - "grad_norm": 1.8795104770271978, - "learning_rate": 1.3388642040382725e-06, - "loss": 1.0116, - "step": 5148 - }, - { - "epoch": 0.6191306439006794, - "grad_norm": 1.5944216191957856, - "learning_rate": 1.3381290746188975e-06, - "loss": 1.0649, - "step": 5149 - }, - { - "epoch": 0.6192508867913185, - "grad_norm": 2.311524468934603, - "learning_rate": 1.3373940456048152e-06, - "loss": 0.9081, - "step": 5150 - }, - { - "epoch": 0.6193711296819575, - "grad_norm": 1.5674732407958236, - "learning_rate": 1.3366591171075299e-06, - "loss": 0.8234, - "step": 5151 - }, - { - "epoch": 0.6194913725725967, - "grad_norm": 1.860315806127834, - "learning_rate": 1.335924289238529e-06, - "loss": 1.1308, - "step": 5152 - }, - { - "epoch": 0.6196116154632357, - "grad_norm": 1.7163970794224388, - "learning_rate": 1.3351895621092859e-06, - "loss": 0.9943, - "step": 5153 - }, - { - "epoch": 0.6197318583538748, - "grad_norm": 1.91748482062786, - "learning_rate": 1.3344549358312567e-06, - "loss": 0.9998, - "step": 5154 - }, - { - "epoch": 0.619852101244514, - "grad_norm": 1.8661448228943163, - "learning_rate": 1.3337204105158852e-06, - "loss": 1.0098, - "step": 5155 - }, - { - "epoch": 0.619972344135153, - "grad_norm": 2.7781981965968474, - "learning_rate": 1.332985986274597e-06, - "loss": 0.9544, - "step": 5156 - }, - { - "epoch": 0.6200925870257921, - "grad_norm": 1.8936234334316846, - "learning_rate": 1.3322516632188047e-06, - "loss": 0.9779, - "step": 5157 - }, - { - "epoch": 0.6202128299164312, - "grad_norm": 1.876471387190159, - "learning_rate": 1.3315174414599045e-06, - "loss": 0.9072, - "step": 5158 - }, - { - "epoch": 0.6203330728070703, - "grad_norm": 1.6969973019526203, - "learning_rate": 1.3307833211092768e-06, - "loss": 0.983, - "step": 5159 - }, - { - "epoch": 0.6204533156977093, - "grad_norm": 1.5438733503351239, - "learning_rate": 1.3300493022782873e-06, - "loss": 0.9751, - "step": 5160 - }, - { - "epoch": 0.6205735585883485, - "grad_norm": 1.935848516677559, - "learning_rate": 1.3293153850782855e-06, - "loss": 0.9534, - "step": 5161 - }, - { - "epoch": 0.6206938014789876, - "grad_norm": 1.702647360923509, - "learning_rate": 1.3285815696206069e-06, - "loss": 0.9386, - "step": 5162 - }, - { - "epoch": 0.6208140443696266, - "grad_norm": 1.7610709274365988, - "learning_rate": 1.32784785601657e-06, - "loss": 0.9984, - "step": 5163 - }, - { - "epoch": 0.6209342872602658, - "grad_norm": 1.7359221834593759, - "learning_rate": 1.3271142443774798e-06, - "loss": 0.9742, - "step": 5164 - }, - { - "epoch": 0.6210545301509048, - "grad_norm": 2.1505897422855953, - "learning_rate": 1.3263807348146228e-06, - "loss": 1.0461, - "step": 5165 - }, - { - "epoch": 0.6211747730415439, - "grad_norm": 2.179563238505488, - "learning_rate": 1.3256473274392733e-06, - "loss": 0.9654, - "step": 5166 - }, - { - "epoch": 0.6212950159321831, - "grad_norm": 1.843539080436043, - "learning_rate": 1.3249140223626873e-06, - "loss": 0.9311, - "step": 5167 - }, - { - "epoch": 0.6214152588228221, - "grad_norm": 2.533613537885502, - "learning_rate": 1.3241808196961077e-06, - "loss": 0.992, - "step": 5168 - }, - { - "epoch": 0.6215355017134612, - "grad_norm": 1.7201365340179984, - "learning_rate": 1.3234477195507608e-06, - "loss": 0.9455, - "step": 5169 - }, - { - "epoch": 0.6216557446041003, - "grad_norm": 2.042209345519037, - "learning_rate": 1.322714722037857e-06, - "loss": 0.8631, - "step": 5170 - }, - { - "epoch": 0.6217759874947394, - "grad_norm": 4.551419952496625, - "learning_rate": 1.321981827268591e-06, - "loss": 1.0033, - "step": 5171 - }, - { - "epoch": 0.6218962303853784, - "grad_norm": 3.3948762321103336, - "learning_rate": 1.3212490353541426e-06, - "loss": 1.0418, - "step": 5172 - }, - { - "epoch": 0.6220164732760175, - "grad_norm": 2.149074168803864, - "learning_rate": 1.3205163464056762e-06, - "loss": 1.032, - "step": 5173 - }, - { - "epoch": 0.6221367161666567, - "grad_norm": 1.950663976845198, - "learning_rate": 1.319783760534339e-06, - "loss": 0.9542, - "step": 5174 - }, - { - "epoch": 0.6222569590572957, - "grad_norm": 2.032659775012978, - "learning_rate": 1.319051277851266e-06, - "loss": 0.9842, - "step": 5175 - }, - { - "epoch": 0.6223772019479348, - "grad_norm": 2.4640783218717623, - "learning_rate": 1.3183188984675716e-06, - "loss": 1.0783, - "step": 5176 - }, - { - "epoch": 0.6224974448385739, - "grad_norm": 2.51700000029099, - "learning_rate": 1.3175866224943586e-06, - "loss": 0.9447, - "step": 5177 - }, - { - "epoch": 0.622617687729213, - "grad_norm": 2.0149596542976402, - "learning_rate": 1.316854450042712e-06, - "loss": 0.9626, - "step": 5178 - }, - { - "epoch": 0.622737930619852, - "grad_norm": 1.8229601028353695, - "learning_rate": 1.3161223812237024e-06, - "loss": 0.9678, - "step": 5179 - }, - { - "epoch": 0.6228581735104912, - "grad_norm": 2.3986353093688657, - "learning_rate": 1.3153904161483842e-06, - "loss": 1.0804, - "step": 5180 - }, - { - "epoch": 0.6229784164011303, - "grad_norm": 1.8335691069225724, - "learning_rate": 1.3146585549277953e-06, - "loss": 1.0877, - "step": 5181 - }, - { - "epoch": 0.6230986592917693, - "grad_norm": 2.1081050864850317, - "learning_rate": 1.3139267976729591e-06, - "loss": 1.006, - "step": 5182 - }, - { - "epoch": 0.6232189021824085, - "grad_norm": 2.2991178811255057, - "learning_rate": 1.3131951444948815e-06, - "loss": 0.9401, - "step": 5183 - }, - { - "epoch": 0.6233391450730476, - "grad_norm": 1.8161544133005458, - "learning_rate": 1.3124635955045546e-06, - "loss": 0.9801, - "step": 5184 - }, - { - "epoch": 0.6234593879636866, - "grad_norm": 1.9754057503638478, - "learning_rate": 1.3117321508129537e-06, - "loss": 1.0663, - "step": 5185 - }, - { - "epoch": 0.6235796308543258, - "grad_norm": 1.4953402621284015, - "learning_rate": 1.3110008105310388e-06, - "loss": 0.9951, - "step": 5186 - }, - { - "epoch": 0.6236998737449648, - "grad_norm": 1.6804487765449534, - "learning_rate": 1.3102695747697526e-06, - "loss": 1.0095, - "step": 5187 - }, - { - "epoch": 0.6238201166356039, - "grad_norm": 2.508782695258804, - "learning_rate": 1.3095384436400237e-06, - "loss": 1.1353, - "step": 5188 - }, - { - "epoch": 0.623940359526243, - "grad_norm": 3.4545394279117123, - "learning_rate": 1.3088074172527633e-06, - "loss": 1.0543, - "step": 5189 - }, - { - "epoch": 0.6240606024168821, - "grad_norm": 1.968721359286805, - "learning_rate": 1.3080764957188684e-06, - "loss": 0.9511, - "step": 5190 - }, - { - "epoch": 0.6241808453075212, - "grad_norm": 2.325494291204623, - "learning_rate": 1.3073456791492192e-06, - "loss": 0.9347, - "step": 5191 - }, - { - "epoch": 0.6243010881981603, - "grad_norm": 1.9340861608648479, - "learning_rate": 1.3066149676546801e-06, - "loss": 1.0142, - "step": 5192 - }, - { - "epoch": 0.6244213310887994, - "grad_norm": 1.669235070411336, - "learning_rate": 1.3058843613460985e-06, - "loss": 0.8853, - "step": 5193 - }, - { - "epoch": 0.6245415739794384, - "grad_norm": 4.114852445791315, - "learning_rate": 1.3051538603343075e-06, - "loss": 0.9717, - "step": 5194 - }, - { - "epoch": 0.6246618168700776, - "grad_norm": 1.8201562380538967, - "learning_rate": 1.3044234647301235e-06, - "loss": 0.9071, - "step": 5195 - }, - { - "epoch": 0.6247820597607167, - "grad_norm": 1.6898479375200948, - "learning_rate": 1.303693174644347e-06, - "loss": 0.9523, - "step": 5196 - }, - { - "epoch": 0.6249023026513557, - "grad_norm": 1.7782749964194584, - "learning_rate": 1.3029629901877625e-06, - "loss": 1.0351, - "step": 5197 - }, - { - "epoch": 0.6250225455419949, - "grad_norm": 2.3912341769104217, - "learning_rate": 1.3022329114711376e-06, - "loss": 1.0085, - "step": 5198 - }, - { - "epoch": 0.6251427884326339, - "grad_norm": 2.312468863612702, - "learning_rate": 1.3015029386052256e-06, - "loss": 0.9262, - "step": 5199 - }, - { - "epoch": 0.625263031323273, - "grad_norm": 2.070876710413115, - "learning_rate": 1.3007730717007622e-06, - "loss": 0.9553, - "step": 5200 - }, - { - "epoch": 0.6253832742139122, - "grad_norm": 1.8362782263615105, - "learning_rate": 1.3000433108684676e-06, - "loss": 0.9852, - "step": 5201 - }, - { - "epoch": 0.6255035171045512, - "grad_norm": 2.479447089825943, - "learning_rate": 1.2993136562190467e-06, - "loss": 1.0237, - "step": 5202 - }, - { - "epoch": 0.6256237599951903, - "grad_norm": 1.4154921412490662, - "learning_rate": 1.2985841078631871e-06, - "loss": 0.9338, - "step": 5203 - }, - { - "epoch": 0.6257440028858293, - "grad_norm": 1.707187531953239, - "learning_rate": 1.2978546659115608e-06, - "loss": 1.0121, - "step": 5204 - }, - { - "epoch": 0.6258642457764685, - "grad_norm": 1.7155008990319613, - "learning_rate": 1.2971253304748228e-06, - "loss": 1.0857, - "step": 5205 - }, - { - "epoch": 0.6259844886671075, - "grad_norm": 2.828496750482962, - "learning_rate": 1.296396101663614e-06, - "loss": 0.983, - "step": 5206 - }, - { - "epoch": 0.6261047315577466, - "grad_norm": 1.857759944342439, - "learning_rate": 1.2956669795885565e-06, - "loss": 1.07, - "step": 5207 - }, - { - "epoch": 0.6262249744483858, - "grad_norm": 3.20654887986978, - "learning_rate": 1.294937964360259e-06, - "loss": 0.9206, - "step": 5208 - }, - { - "epoch": 0.6263452173390248, - "grad_norm": 2.6113781578275312, - "learning_rate": 1.2942090560893108e-06, - "loss": 0.9228, - "step": 5209 - }, - { - "epoch": 0.6264654602296639, - "grad_norm": 1.82915448276688, - "learning_rate": 1.2934802548862882e-06, - "loss": 0.8287, - "step": 5210 - }, - { - "epoch": 0.626585703120303, - "grad_norm": 1.7938606520493579, - "learning_rate": 1.292751560861749e-06, - "loss": 1.0581, - "step": 5211 - }, - { - "epoch": 0.6267059460109421, - "grad_norm": 1.7045890744725778, - "learning_rate": 1.2920229741262354e-06, - "loss": 1.0265, - "step": 5212 - }, - { - "epoch": 0.6268261889015811, - "grad_norm": 2.020485982505018, - "learning_rate": 1.2912944947902739e-06, - "loss": 0.9761, - "step": 5213 - }, - { - "epoch": 0.6269464317922203, - "grad_norm": 2.522835579998922, - "learning_rate": 1.2905661229643742e-06, - "loss": 0.9504, - "step": 5214 - }, - { - "epoch": 0.6270666746828594, - "grad_norm": 2.3242835797155963, - "learning_rate": 1.2898378587590299e-06, - "loss": 1.0706, - "step": 5215 - }, - { - "epoch": 0.6271869175734984, - "grad_norm": 1.8901210144699254, - "learning_rate": 1.2891097022847173e-06, - "loss": 1.1055, - "step": 5216 - }, - { - "epoch": 0.6273071604641376, - "grad_norm": 2.159172841392923, - "learning_rate": 1.2883816536518978e-06, - "loss": 0.9024, - "step": 5217 - }, - { - "epoch": 0.6274274033547766, - "grad_norm": 2.0361377304347084, - "learning_rate": 1.2876537129710155e-06, - "loss": 1.0524, - "step": 5218 - }, - { - "epoch": 0.6275476462454157, - "grad_norm": 2.4217442631043653, - "learning_rate": 1.286925880352499e-06, - "loss": 0.9786, - "step": 5219 - }, - { - "epoch": 0.6276678891360549, - "grad_norm": 2.560811486672902, - "learning_rate": 1.2861981559067592e-06, - "loss": 0.9323, - "step": 5220 - }, - { - "epoch": 0.6277881320266939, - "grad_norm": 2.542172918295467, - "learning_rate": 1.2854705397441917e-06, - "loss": 1.0279, - "step": 5221 - }, - { - "epoch": 0.627908374917333, - "grad_norm": 2.7651317609189836, - "learning_rate": 1.2847430319751747e-06, - "loss": 1.0079, - "step": 5222 - }, - { - "epoch": 0.6280286178079721, - "grad_norm": 2.3435754329520786, - "learning_rate": 1.2840156327100712e-06, - "loss": 0.905, - "step": 5223 - }, - { - "epoch": 0.6281488606986112, - "grad_norm": 3.948092668916356, - "learning_rate": 1.2832883420592272e-06, - "loss": 0.9512, - "step": 5224 - }, - { - "epoch": 0.6282691035892503, - "grad_norm": 2.389878698806139, - "learning_rate": 1.282561160132972e-06, - "loss": 0.8734, - "step": 5225 - }, - { - "epoch": 0.6283893464798894, - "grad_norm": 1.4609435382249047, - "learning_rate": 1.2818340870416186e-06, - "loss": 1.04, - "step": 5226 - }, - { - "epoch": 0.6285095893705285, - "grad_norm": 2.752884738930008, - "learning_rate": 1.2811071228954626e-06, - "loss": 0.985, - "step": 5227 - }, - { - "epoch": 0.6286298322611675, - "grad_norm": 2.0085548779922577, - "learning_rate": 1.2803802678047846e-06, - "loss": 1.0423, - "step": 5228 - }, - { - "epoch": 0.6287500751518067, - "grad_norm": 2.2495897766838726, - "learning_rate": 1.279653521879848e-06, - "loss": 0.9617, - "step": 5229 - }, - { - "epoch": 0.6288703180424458, - "grad_norm": 1.9253041138642037, - "learning_rate": 1.2789268852308997e-06, - "loss": 1.0705, - "step": 5230 - }, - { - "epoch": 0.6289905609330848, - "grad_norm": 1.9737165264361787, - "learning_rate": 1.2782003579681688e-06, - "loss": 0.9342, - "step": 5231 - }, - { - "epoch": 0.629110803823724, - "grad_norm": 1.5026220610383343, - "learning_rate": 1.2774739402018701e-06, - "loss": 0.9706, - "step": 5232 - }, - { - "epoch": 0.629231046714363, - "grad_norm": 1.6700649267127416, - "learning_rate": 1.2767476320422002e-06, - "loss": 0.9629, - "step": 5233 - }, - { - "epoch": 0.6293512896050021, - "grad_norm": 0.7244083094734823, - "learning_rate": 1.2760214335993392e-06, - "loss": 0.8288, - "step": 5234 - }, - { - "epoch": 0.6294715324956413, - "grad_norm": 1.9739035109772194, - "learning_rate": 1.2752953449834514e-06, - "loss": 0.8206, - "step": 5235 - }, - { - "epoch": 0.6295917753862803, - "grad_norm": 2.085136002420487, - "learning_rate": 1.2745693663046836e-06, - "loss": 1.031, - "step": 5236 - }, - { - "epoch": 0.6297120182769194, - "grad_norm": 2.0182657611769708, - "learning_rate": 1.2738434976731662e-06, - "loss": 1.033, - "step": 5237 - }, - { - "epoch": 0.6298322611675584, - "grad_norm": 3.4052182656127057, - "learning_rate": 1.2731177391990125e-06, - "loss": 0.984, - "step": 5238 - }, - { - "epoch": 0.6299525040581976, - "grad_norm": 2.029505993345762, - "learning_rate": 1.2723920909923203e-06, - "loss": 1.0489, - "step": 5239 - }, - { - "epoch": 0.6300727469488366, - "grad_norm": 0.9085114642707404, - "learning_rate": 1.2716665531631688e-06, - "loss": 0.885, - "step": 5240 - }, - { - "epoch": 0.6301929898394757, - "grad_norm": 2.8345299793003154, - "learning_rate": 1.270941125821623e-06, - "loss": 1.001, - "step": 5241 - }, - { - "epoch": 0.6303132327301149, - "grad_norm": 1.51130919120486, - "learning_rate": 1.2702158090777278e-06, - "loss": 0.9866, - "step": 5242 - }, - { - "epoch": 0.6304334756207539, - "grad_norm": 1.9209486426816398, - "learning_rate": 1.2694906030415148e-06, - "loss": 0.9786, - "step": 5243 - }, - { - "epoch": 0.630553718511393, - "grad_norm": 2.720236010410042, - "learning_rate": 1.2687655078229958e-06, - "loss": 1.0394, - "step": 5244 - }, - { - "epoch": 0.6306739614020321, - "grad_norm": 2.9321662690817885, - "learning_rate": 1.2680405235321678e-06, - "loss": 0.9239, - "step": 5245 - }, - { - "epoch": 0.6307942042926712, - "grad_norm": 2.370839339406787, - "learning_rate": 1.267315650279011e-06, - "loss": 1.0124, - "step": 5246 - }, - { - "epoch": 0.6309144471833102, - "grad_norm": 4.029116516958347, - "learning_rate": 1.2665908881734874e-06, - "loss": 0.9696, - "step": 5247 - }, - { - "epoch": 0.6310346900739494, - "grad_norm": 2.283083662133471, - "learning_rate": 1.2658662373255432e-06, - "loss": 1.08, - "step": 5248 - }, - { - "epoch": 0.6311549329645885, - "grad_norm": 0.8139179035420955, - "learning_rate": 1.2651416978451063e-06, - "loss": 0.804, - "step": 5249 - }, - { - "epoch": 0.6312751758552275, - "grad_norm": 1.8049950936163583, - "learning_rate": 1.2644172698420903e-06, - "loss": 0.88, - "step": 5250 - }, - { - "epoch": 0.6313954187458667, - "grad_norm": 1.712330847412309, - "learning_rate": 1.2636929534263892e-06, - "loss": 1.0765, - "step": 5251 - }, - { - "epoch": 0.6315156616365057, - "grad_norm": 1.6860799112353877, - "learning_rate": 1.2629687487078821e-06, - "loss": 0.9992, - "step": 5252 - }, - { - "epoch": 0.6316359045271448, - "grad_norm": 1.9880082518216349, - "learning_rate": 1.2622446557964293e-06, - "loss": 0.9916, - "step": 5253 - }, - { - "epoch": 0.631756147417784, - "grad_norm": 1.6378331726318438, - "learning_rate": 1.261520674801876e-06, - "loss": 0.9312, - "step": 5254 - }, - { - "epoch": 0.631876390308423, - "grad_norm": 2.5007971446406, - "learning_rate": 1.2607968058340488e-06, - "loss": 0.9586, - "step": 5255 - }, - { - "epoch": 0.6319966331990621, - "grad_norm": 1.8780083999364683, - "learning_rate": 1.2600730490027583e-06, - "loss": 0.9588, - "step": 5256 - }, - { - "epoch": 0.6321168760897012, - "grad_norm": 1.6569403882903875, - "learning_rate": 1.2593494044177984e-06, - "loss": 1.034, - "step": 5257 - }, - { - "epoch": 0.6322371189803403, - "grad_norm": 2.3781819360561727, - "learning_rate": 1.2586258721889448e-06, - "loss": 1.027, - "step": 5258 - }, - { - "epoch": 0.6323573618709794, - "grad_norm": 2.1410523148211547, - "learning_rate": 1.2579024524259573e-06, - "loss": 1.0436, - "step": 5259 - }, - { - "epoch": 0.6324776047616185, - "grad_norm": 60.15191542825133, - "learning_rate": 1.2571791452385768e-06, - "loss": 1.146, - "step": 5260 - }, - { - "epoch": 0.6325978476522576, - "grad_norm": 1.5874914063949472, - "learning_rate": 1.2564559507365301e-06, - "loss": 1.0004, - "step": 5261 - }, - { - "epoch": 0.6327180905428966, - "grad_norm": 4.3281959500333125, - "learning_rate": 1.2557328690295244e-06, - "loss": 1.0239, - "step": 5262 - }, - { - "epoch": 0.6328383334335358, - "grad_norm": 1.8165847526537866, - "learning_rate": 1.255009900227251e-06, - "loss": 0.9876, - "step": 5263 - }, - { - "epoch": 0.6329585763241748, - "grad_norm": 1.9375439300479043, - "learning_rate": 1.254287044439383e-06, - "loss": 1.0266, - "step": 5264 - }, - { - "epoch": 0.6330788192148139, - "grad_norm": 0.926434636420229, - "learning_rate": 1.2535643017755776e-06, - "loss": 0.7981, - "step": 5265 - }, - { - "epoch": 0.6331990621054531, - "grad_norm": 2.853586978007197, - "learning_rate": 1.2528416723454737e-06, - "loss": 0.9453, - "step": 5266 - }, - { - "epoch": 0.6333193049960921, - "grad_norm": 1.5464951790636747, - "learning_rate": 1.2521191562586945e-06, - "loss": 0.941, - "step": 5267 - }, - { - "epoch": 0.6334395478867312, - "grad_norm": 1.9893301424894916, - "learning_rate": 1.2513967536248445e-06, - "loss": 1.003, - "step": 5268 - }, - { - "epoch": 0.6335597907773702, - "grad_norm": 1.7377295829529544, - "learning_rate": 1.2506744645535117e-06, - "loss": 1.0422, - "step": 5269 - }, - { - "epoch": 0.6336800336680094, - "grad_norm": 1.742882286399631, - "learning_rate": 1.249952289154267e-06, - "loss": 0.8231, - "step": 5270 - }, - { - "epoch": 0.6338002765586485, - "grad_norm": 2.6138032354828855, - "learning_rate": 1.2492302275366635e-06, - "loss": 0.9936, - "step": 5271 - }, - { - "epoch": 0.6339205194492875, - "grad_norm": 2.168669826113986, - "learning_rate": 1.2485082798102377e-06, - "loss": 0.8841, - "step": 5272 - }, - { - "epoch": 0.6340407623399267, - "grad_norm": 2.799960006700727, - "learning_rate": 1.2477864460845084e-06, - "loss": 0.9208, - "step": 5273 - }, - { - "epoch": 0.6341610052305657, - "grad_norm": 3.0715141759710174, - "learning_rate": 1.2470647264689776e-06, - "loss": 0.9616, - "step": 5274 - }, - { - "epoch": 0.6342812481212048, - "grad_norm": 1.9134764104883768, - "learning_rate": 1.2463431210731282e-06, - "loss": 0.9425, - "step": 5275 - }, - { - "epoch": 0.634401491011844, - "grad_norm": 2.2202770726963372, - "learning_rate": 1.2456216300064289e-06, - "loss": 0.9903, - "step": 5276 - }, - { - "epoch": 0.634521733902483, - "grad_norm": 1.5921315221707566, - "learning_rate": 1.244900253378328e-06, - "loss": 1.01, - "step": 5277 - }, - { - "epoch": 0.6346419767931221, - "grad_norm": 1.8839584598740184, - "learning_rate": 1.2441789912982583e-06, - "loss": 0.9241, - "step": 5278 - }, - { - "epoch": 0.6347622196837612, - "grad_norm": 2.7558239043053034, - "learning_rate": 1.2434578438756346e-06, - "loss": 0.8788, - "step": 5279 - }, - { - "epoch": 0.6348824625744003, - "grad_norm": 2.169578600632226, - "learning_rate": 1.242736811219855e-06, - "loss": 1.0151, - "step": 5280 - }, - { - "epoch": 0.6350027054650393, - "grad_norm": 3.0069591550244494, - "learning_rate": 1.2420158934402988e-06, - "loss": 1.046, - "step": 5281 - }, - { - "epoch": 0.6351229483556785, - "grad_norm": 1.9700831183699161, - "learning_rate": 1.2412950906463286e-06, - "loss": 1.0698, - "step": 5282 - }, - { - "epoch": 0.6352431912463176, - "grad_norm": 1.7502835588969006, - "learning_rate": 1.2405744029472902e-06, - "loss": 1.1253, - "step": 5283 - }, - { - "epoch": 0.6353634341369566, - "grad_norm": 2.991480811109659, - "learning_rate": 1.2398538304525108e-06, - "loss": 0.9911, - "step": 5284 - }, - { - "epoch": 0.6354836770275958, - "grad_norm": 2.415247160464198, - "learning_rate": 1.2391333732713016e-06, - "loss": 0.9898, - "step": 5285 - }, - { - "epoch": 0.6356039199182348, - "grad_norm": 2.5436817581808175, - "learning_rate": 1.2384130315129543e-06, - "loss": 1.0111, - "step": 5286 - }, - { - "epoch": 0.6357241628088739, - "grad_norm": 2.3048992512296698, - "learning_rate": 1.2376928052867447e-06, - "loss": 0.9585, - "step": 5287 - }, - { - "epoch": 0.6358444056995131, - "grad_norm": 2.7497812530908443, - "learning_rate": 1.2369726947019299e-06, - "loss": 1.0126, - "step": 5288 - }, - { - "epoch": 0.6359646485901521, - "grad_norm": 2.367661129746248, - "learning_rate": 1.2362526998677511e-06, - "loss": 0.8972, - "step": 5289 - }, - { - "epoch": 0.6360848914807912, - "grad_norm": 1.7881394049641917, - "learning_rate": 1.2355328208934301e-06, - "loss": 1.0725, - "step": 5290 - }, - { - "epoch": 0.6362051343714303, - "grad_norm": 1.7197569931604557, - "learning_rate": 1.2348130578881728e-06, - "loss": 0.9593, - "step": 5291 - }, - { - "epoch": 0.6363253772620694, - "grad_norm": 2.164508649647068, - "learning_rate": 1.2340934109611664e-06, - "loss": 0.9855, - "step": 5292 - }, - { - "epoch": 0.6364456201527084, - "grad_norm": 2.6537705461710237, - "learning_rate": 1.2333738802215798e-06, - "loss": 0.9124, - "step": 5293 - }, - { - "epoch": 0.6365658630433476, - "grad_norm": 2.199955384929138, - "learning_rate": 1.2326544657785668e-06, - "loss": 1.0389, - "step": 5294 - }, - { - "epoch": 0.6366861059339867, - "grad_norm": 2.3182670234212286, - "learning_rate": 1.2319351677412608e-06, - "loss": 0.9786, - "step": 5295 - }, - { - "epoch": 0.6368063488246257, - "grad_norm": 1.7112306665926453, - "learning_rate": 1.2312159862187796e-06, - "loss": 0.9688, - "step": 5296 - }, - { - "epoch": 0.6369265917152649, - "grad_norm": 1.5791387040506795, - "learning_rate": 1.2304969213202217e-06, - "loss": 0.9905, - "step": 5297 - }, - { - "epoch": 0.6370468346059039, - "grad_norm": 3.0310823059888543, - "learning_rate": 1.2297779731546692e-06, - "loss": 1.0212, - "step": 5298 - }, - { - "epoch": 0.637167077496543, - "grad_norm": 3.3040586193324946, - "learning_rate": 1.2290591418311853e-06, - "loss": 1.014, - "step": 5299 - }, - { - "epoch": 0.637287320387182, - "grad_norm": 1.4593872167823352, - "learning_rate": 1.2283404274588172e-06, - "loss": 0.9414, - "step": 5300 - }, - { - "epoch": 0.6374075632778212, - "grad_norm": 0.7600394101339861, - "learning_rate": 1.227621830146592e-06, - "loss": 0.7841, - "step": 5301 - }, - { - "epoch": 0.6375278061684603, - "grad_norm": 1.8494241333700479, - "learning_rate": 1.2269033500035217e-06, - "loss": 1.0216, - "step": 5302 - }, - { - "epoch": 0.6376480490590993, - "grad_norm": 1.7372451265963629, - "learning_rate": 1.2261849871385988e-06, - "loss": 0.9695, - "step": 5303 - }, - { - "epoch": 0.6377682919497385, - "grad_norm": 1.9863472096474397, - "learning_rate": 1.2254667416607972e-06, - "loss": 0.8466, - "step": 5304 - }, - { - "epoch": 0.6378885348403776, - "grad_norm": 2.549098162030397, - "learning_rate": 1.2247486136790756e-06, - "loss": 1.0634, - "step": 5305 - }, - { - "epoch": 0.6380087777310166, - "grad_norm": 1.8781838406249187, - "learning_rate": 1.2240306033023726e-06, - "loss": 1.0289, - "step": 5306 - }, - { - "epoch": 0.6381290206216558, - "grad_norm": 1.8147321307916797, - "learning_rate": 1.223312710639611e-06, - "loss": 0.9538, - "step": 5307 - }, - { - "epoch": 0.6382492635122948, - "grad_norm": 2.1468529979128093, - "learning_rate": 1.2225949357996928e-06, - "loss": 1.0876, - "step": 5308 - }, - { - "epoch": 0.6383695064029339, - "grad_norm": 2.585794957775977, - "learning_rate": 1.221877278891505e-06, - "loss": 1.0344, - "step": 5309 - }, - { - "epoch": 0.638489749293573, - "grad_norm": 2.015459867115232, - "learning_rate": 1.221159740023915e-06, - "loss": 0.9389, - "step": 5310 - }, - { - "epoch": 0.6386099921842121, - "grad_norm": 2.0149992925297178, - "learning_rate": 1.2204423193057735e-06, - "loss": 0.9675, - "step": 5311 - }, - { - "epoch": 0.6387302350748512, - "grad_norm": 0.9563146282621371, - "learning_rate": 1.2197250168459122e-06, - "loss": 0.9193, - "step": 5312 - }, - { - "epoch": 0.6388504779654903, - "grad_norm": 1.8971054364525506, - "learning_rate": 1.2190078327531454e-06, - "loss": 0.9761, - "step": 5313 - }, - { - "epoch": 0.6389707208561294, - "grad_norm": 3.468587098291617, - "learning_rate": 1.2182907671362697e-06, - "loss": 0.9579, - "step": 5314 - }, - { - "epoch": 0.6390909637467684, - "grad_norm": 4.280924318275065, - "learning_rate": 1.2175738201040626e-06, - "loss": 1.0085, - "step": 5315 - }, - { - "epoch": 0.6392112066374076, - "grad_norm": 2.5593728313506725, - "learning_rate": 1.2168569917652855e-06, - "loss": 1.0224, - "step": 5316 - }, - { - "epoch": 0.6393314495280467, - "grad_norm": 1.7012987757409832, - "learning_rate": 1.2161402822286797e-06, - "loss": 0.8697, - "step": 5317 - }, - { - "epoch": 0.6394516924186857, - "grad_norm": 2.1780180194694316, - "learning_rate": 1.2154236916029703e-06, - "loss": 1.0227, - "step": 5318 - }, - { - "epoch": 0.6395719353093249, - "grad_norm": 2.2553390100926194, - "learning_rate": 1.2147072199968627e-06, - "loss": 0.9573, - "step": 5319 - }, - { - "epoch": 0.6396921781999639, - "grad_norm": 1.715103929112064, - "learning_rate": 1.2139908675190454e-06, - "loss": 0.9458, - "step": 5320 - }, - { - "epoch": 0.639812421090603, - "grad_norm": 3.6406955957730855, - "learning_rate": 1.2132746342781883e-06, - "loss": 0.9824, - "step": 5321 - }, - { - "epoch": 0.6399326639812422, - "grad_norm": 2.3443356608764314, - "learning_rate": 1.2125585203829442e-06, - "loss": 1.022, - "step": 5322 - }, - { - "epoch": 0.6400529068718812, - "grad_norm": 1.7793283637701456, - "learning_rate": 1.211842525941946e-06, - "loss": 0.97, - "step": 5323 - }, - { - "epoch": 0.6401731497625203, - "grad_norm": 2.0099364450000534, - "learning_rate": 1.2111266510638105e-06, - "loss": 1.0237, - "step": 5324 - }, - { - "epoch": 0.6402933926531594, - "grad_norm": 1.7472981303871609, - "learning_rate": 1.2104108958571346e-06, - "loss": 1.0289, - "step": 5325 - }, - { - "epoch": 0.6404136355437985, - "grad_norm": 1.6003119075262295, - "learning_rate": 1.2096952604304975e-06, - "loss": 0.9866, - "step": 5326 - }, - { - "epoch": 0.6405338784344375, - "grad_norm": 1.9698204279157663, - "learning_rate": 1.2089797448924616e-06, - "loss": 0.9319, - "step": 5327 - }, - { - "epoch": 0.6406541213250767, - "grad_norm": 2.5162949706529827, - "learning_rate": 1.2082643493515692e-06, - "loss": 0.8847, - "step": 5328 - }, - { - "epoch": 0.6407743642157158, - "grad_norm": 1.8922312195166329, - "learning_rate": 1.207549073916346e-06, - "loss": 1.046, - "step": 5329 - }, - { - "epoch": 0.6408946071063548, - "grad_norm": 2.6031678891169205, - "learning_rate": 1.2068339186952976e-06, - "loss": 1.0118, - "step": 5330 - }, - { - "epoch": 0.6410148499969939, - "grad_norm": 1.8252917121914312, - "learning_rate": 1.2061188837969136e-06, - "loss": 0.9581, - "step": 5331 - }, - { - "epoch": 0.641135092887633, - "grad_norm": 2.3229054563334515, - "learning_rate": 1.2054039693296631e-06, - "loss": 1.0727, - "step": 5332 - }, - { - "epoch": 0.6412553357782721, - "grad_norm": 1.6392008822111528, - "learning_rate": 1.2046891754019992e-06, - "loss": 1.036, - "step": 5333 - }, - { - "epoch": 0.6413755786689112, - "grad_norm": 2.0576312078678125, - "learning_rate": 1.2039745021223548e-06, - "loss": 1.0525, - "step": 5334 - }, - { - "epoch": 0.6414958215595503, - "grad_norm": 0.871765117489022, - "learning_rate": 1.2032599495991456e-06, - "loss": 0.8514, - "step": 5335 - }, - { - "epoch": 0.6416160644501894, - "grad_norm": 2.023906638649672, - "learning_rate": 1.2025455179407685e-06, - "loss": 0.9266, - "step": 5336 - }, - { - "epoch": 0.6417363073408284, - "grad_norm": 20.4656180511803, - "learning_rate": 1.2018312072556022e-06, - "loss": 0.9665, - "step": 5337 - }, - { - "epoch": 0.6418565502314676, - "grad_norm": 1.9282189468421524, - "learning_rate": 1.2011170176520077e-06, - "loss": 0.9765, - "step": 5338 - }, - { - "epoch": 0.6419767931221066, - "grad_norm": 1.7739071665216903, - "learning_rate": 1.2004029492383256e-06, - "loss": 1.0412, - "step": 5339 - }, - { - "epoch": 0.6420970360127457, - "grad_norm": 1.9264171858016195, - "learning_rate": 1.1996890021228814e-06, - "loss": 0.9715, - "step": 5340 - }, - { - "epoch": 0.6422172789033849, - "grad_norm": 1.532368698967251, - "learning_rate": 1.1989751764139785e-06, - "loss": 0.9275, - "step": 5341 - }, - { - "epoch": 0.6423375217940239, - "grad_norm": 1.592104118043107, - "learning_rate": 1.1982614722199044e-06, - "loss": 1.0523, - "step": 5342 - }, - { - "epoch": 0.642457764684663, - "grad_norm": 1.9628759279222792, - "learning_rate": 1.1975478896489276e-06, - "loss": 1.0073, - "step": 5343 - }, - { - "epoch": 0.6425780075753021, - "grad_norm": 1.8956294386774324, - "learning_rate": 1.1968344288092981e-06, - "loss": 0.993, - "step": 5344 - }, - { - "epoch": 0.6426982504659412, - "grad_norm": 5.709482490350346, - "learning_rate": 1.1961210898092468e-06, - "loss": 0.8715, - "step": 5345 - }, - { - "epoch": 0.6428184933565803, - "grad_norm": 2.5676782901597726, - "learning_rate": 1.1954078727569874e-06, - "loss": 1.0319, - "step": 5346 - }, - { - "epoch": 0.6429387362472194, - "grad_norm": 1.8018735274706807, - "learning_rate": 1.1946947777607141e-06, - "loss": 1.0104, - "step": 5347 - }, - { - "epoch": 0.6430589791378585, - "grad_norm": 1.903057489309555, - "learning_rate": 1.1939818049286024e-06, - "loss": 1.0276, - "step": 5348 - }, - { - "epoch": 0.6431792220284975, - "grad_norm": 1.5747653211110844, - "learning_rate": 1.1932689543688101e-06, - "loss": 0.9696, - "step": 5349 - }, - { - "epoch": 0.6432994649191367, - "grad_norm": 2.6251377796163373, - "learning_rate": 1.1925562261894756e-06, - "loss": 0.9512, - "step": 5350 - }, - { - "epoch": 0.6434197078097758, - "grad_norm": 1.890440246130047, - "learning_rate": 1.1918436204987207e-06, - "loss": 1.007, - "step": 5351 - }, - { - "epoch": 0.6435399507004148, - "grad_norm": 2.3669951206506705, - "learning_rate": 1.191131137404645e-06, - "loss": 1.0444, - "step": 5352 - }, - { - "epoch": 0.643660193591054, - "grad_norm": 1.937333930497013, - "learning_rate": 1.190418777015333e-06, - "loss": 0.9987, - "step": 5353 - }, - { - "epoch": 0.643780436481693, - "grad_norm": 1.5961625034426288, - "learning_rate": 1.1897065394388487e-06, - "loss": 0.9645, - "step": 5354 - }, - { - "epoch": 0.6439006793723321, - "grad_norm": 1.581339970015633, - "learning_rate": 1.1889944247832385e-06, - "loss": 0.992, - "step": 5355 - }, - { - "epoch": 0.6440209222629713, - "grad_norm": 2.295009971713341, - "learning_rate": 1.1882824331565283e-06, - "loss": 0.9335, - "step": 5356 - }, - { - "epoch": 0.6441411651536103, - "grad_norm": 2.0784797078918436, - "learning_rate": 1.1875705646667287e-06, - "loss": 1.1214, - "step": 5357 - }, - { - "epoch": 0.6442614080442494, - "grad_norm": 7.260205419487406, - "learning_rate": 1.1868588194218282e-06, - "loss": 0.9831, - "step": 5358 - }, - { - "epoch": 0.6443816509348885, - "grad_norm": 1.9136924541649338, - "learning_rate": 1.1861471975297979e-06, - "loss": 0.9642, - "step": 5359 - }, - { - "epoch": 0.6445018938255276, - "grad_norm": 1.6807908094048642, - "learning_rate": 1.185435699098591e-06, - "loss": 0.9394, - "step": 5360 - }, - { - "epoch": 0.6446221367161666, - "grad_norm": 2.479511418668036, - "learning_rate": 1.1847243242361403e-06, - "loss": 1.0051, - "step": 5361 - }, - { - "epoch": 0.6447423796068057, - "grad_norm": 1.7272815156557926, - "learning_rate": 1.1840130730503624e-06, - "loss": 1.0125, - "step": 5362 - }, - { - "epoch": 0.6448626224974449, - "grad_norm": 2.568348049820563, - "learning_rate": 1.1833019456491518e-06, - "loss": 0.9812, - "step": 5363 - }, - { - "epoch": 0.6449828653880839, - "grad_norm": 3.957582636806013, - "learning_rate": 1.1825909421403871e-06, - "loss": 1.0052, - "step": 5364 - }, - { - "epoch": 0.645103108278723, - "grad_norm": 1.7672060748809664, - "learning_rate": 1.181880062631926e-06, - "loss": 0.991, - "step": 5365 - }, - { - "epoch": 0.6452233511693621, - "grad_norm": 2.1451588530506958, - "learning_rate": 1.1811693072316093e-06, - "loss": 1.0794, - "step": 5366 - }, - { - "epoch": 0.6453435940600012, - "grad_norm": 3.0251655470261563, - "learning_rate": 1.1804586760472574e-06, - "loss": 1.0614, - "step": 5367 - }, - { - "epoch": 0.6454638369506402, - "grad_norm": 2.02099792229698, - "learning_rate": 1.1797481691866736e-06, - "loss": 1.0257, - "step": 5368 - }, - { - "epoch": 0.6455840798412794, - "grad_norm": 2.1670698622148126, - "learning_rate": 1.1790377867576393e-06, - "loss": 1.0538, - "step": 5369 - }, - { - "epoch": 0.6457043227319185, - "grad_norm": 1.722909921665973, - "learning_rate": 1.1783275288679203e-06, - "loss": 0.9898, - "step": 5370 - }, - { - "epoch": 0.6458245656225575, - "grad_norm": 0.9335975806504833, - "learning_rate": 1.177617395625262e-06, - "loss": 0.883, - "step": 5371 - }, - { - "epoch": 0.6459448085131967, - "grad_norm": 1.8525173123640821, - "learning_rate": 1.1769073871373908e-06, - "loss": 0.9856, - "step": 5372 - }, - { - "epoch": 0.6460650514038357, - "grad_norm": 1.6778328525831396, - "learning_rate": 1.176197503512015e-06, - "loss": 1.0679, - "step": 5373 - }, - { - "epoch": 0.6461852942944748, - "grad_norm": 2.5859011736850883, - "learning_rate": 1.1754877448568223e-06, - "loss": 1.0513, - "step": 5374 - }, - { - "epoch": 0.646305537185114, - "grad_norm": 2.0181746805691234, - "learning_rate": 1.1747781112794837e-06, - "loss": 1.1285, - "step": 5375 - }, - { - "epoch": 0.646425780075753, - "grad_norm": 1.715008703826972, - "learning_rate": 1.1740686028876487e-06, - "loss": 1.0495, - "step": 5376 - }, - { - "epoch": 0.6465460229663921, - "grad_norm": 4.040871191232265, - "learning_rate": 1.1733592197889507e-06, - "loss": 0.9783, - "step": 5377 - }, - { - "epoch": 0.6466662658570312, - "grad_norm": 1.8433131330784767, - "learning_rate": 1.1726499620910014e-06, - "loss": 0.948, - "step": 5378 - }, - { - "epoch": 0.6467865087476703, - "grad_norm": 2.1561086857141074, - "learning_rate": 1.1719408299013955e-06, - "loss": 0.9941, - "step": 5379 - }, - { - "epoch": 0.6469067516383094, - "grad_norm": 3.1044582426285925, - "learning_rate": 1.1712318233277067e-06, - "loss": 0.9891, - "step": 5380 - }, - { - "epoch": 0.6470269945289485, - "grad_norm": 0.7669440215247507, - "learning_rate": 1.1705229424774916e-06, - "loss": 0.8228, - "step": 5381 - }, - { - "epoch": 0.6471472374195876, - "grad_norm": 1.5572259010879066, - "learning_rate": 1.1698141874582867e-06, - "loss": 0.8691, - "step": 5382 - }, - { - "epoch": 0.6472674803102266, - "grad_norm": 1.5848358537412741, - "learning_rate": 1.169105558377609e-06, - "loss": 0.9505, - "step": 5383 - }, - { - "epoch": 0.6473877232008658, - "grad_norm": 1.6503139861782254, - "learning_rate": 1.1683970553429587e-06, - "loss": 1.0084, - "step": 5384 - }, - { - "epoch": 0.6475079660915048, - "grad_norm": 3.629335638785021, - "learning_rate": 1.1676886784618128e-06, - "loss": 1.0453, - "step": 5385 - }, - { - "epoch": 0.6476282089821439, - "grad_norm": 1.9812246832448503, - "learning_rate": 1.1669804278416332e-06, - "loss": 1.0572, - "step": 5386 - }, - { - "epoch": 0.6477484518727831, - "grad_norm": 1.7796877652537322, - "learning_rate": 1.1662723035898602e-06, - "loss": 0.9452, - "step": 5387 - }, - { - "epoch": 0.6478686947634221, - "grad_norm": 1.777878029135791, - "learning_rate": 1.165564305813915e-06, - "loss": 1.0444, - "step": 5388 - }, - { - "epoch": 0.6479889376540612, - "grad_norm": 1.6782333822076863, - "learning_rate": 1.1648564346212019e-06, - "loss": 1.0409, - "step": 5389 - }, - { - "epoch": 0.6481091805447003, - "grad_norm": 10.245159960308316, - "learning_rate": 1.164148690119104e-06, - "loss": 0.9932, - "step": 5390 - }, - { - "epoch": 0.6482294234353394, - "grad_norm": 1.7380287930042728, - "learning_rate": 1.163441072414985e-06, - "loss": 0.9727, - "step": 5391 - }, - { - "epoch": 0.6483496663259785, - "grad_norm": 1.7954051764914283, - "learning_rate": 1.16273358161619e-06, - "loss": 0.9244, - "step": 5392 - }, - { - "epoch": 0.6484699092166175, - "grad_norm": 1.8498286889635411, - "learning_rate": 1.1620262178300446e-06, - "loss": 1.0755, - "step": 5393 - }, - { - "epoch": 0.6485901521072567, - "grad_norm": 1.7279619440064913, - "learning_rate": 1.1613189811638563e-06, - "loss": 0.9862, - "step": 5394 - }, - { - "epoch": 0.6487103949978957, - "grad_norm": 2.1069330845342664, - "learning_rate": 1.1606118717249117e-06, - "loss": 1.0066, - "step": 5395 - }, - { - "epoch": 0.6488306378885348, - "grad_norm": 1.8156424936840445, - "learning_rate": 1.1599048896204787e-06, - "loss": 0.9117, - "step": 5396 - }, - { - "epoch": 0.648950880779174, - "grad_norm": 1.6598937062622015, - "learning_rate": 1.1591980349578061e-06, - "loss": 1.0397, - "step": 5397 - }, - { - "epoch": 0.649071123669813, - "grad_norm": 0.790802274589181, - "learning_rate": 1.158491307844123e-06, - "loss": 0.8064, - "step": 5398 - }, - { - "epoch": 0.6491913665604521, - "grad_norm": 2.298815471967621, - "learning_rate": 1.1577847083866387e-06, - "loss": 1.0722, - "step": 5399 - }, - { - "epoch": 0.6493116094510912, - "grad_norm": 1.8145711180855746, - "learning_rate": 1.1570782366925453e-06, - "loss": 0.9607, - "step": 5400 - }, - { - "epoch": 0.6494318523417303, - "grad_norm": 1.706757268152313, - "learning_rate": 1.1563718928690132e-06, - "loss": 0.9871, - "step": 5401 - }, - { - "epoch": 0.6495520952323693, - "grad_norm": 2.254369308003388, - "learning_rate": 1.1556656770231942e-06, - "loss": 0.944, - "step": 5402 - }, - { - "epoch": 0.6496723381230085, - "grad_norm": 1.4334599487063766, - "learning_rate": 1.1549595892622207e-06, - "loss": 0.99, - "step": 5403 - }, - { - "epoch": 0.6497925810136476, - "grad_norm": 0.8306813998625135, - "learning_rate": 1.1542536296932047e-06, - "loss": 0.8649, - "step": 5404 - }, - { - "epoch": 0.6499128239042866, - "grad_norm": 2.232560708308775, - "learning_rate": 1.1535477984232414e-06, - "loss": 0.9293, - "step": 5405 - }, - { - "epoch": 0.6500330667949258, - "grad_norm": 2.713310148841476, - "learning_rate": 1.152842095559404e-06, - "loss": 0.999, - "step": 5406 - }, - { - "epoch": 0.6501533096855648, - "grad_norm": 1.6223553631563532, - "learning_rate": 1.1521365212087474e-06, - "loss": 0.996, - "step": 5407 - }, - { - "epoch": 0.6502735525762039, - "grad_norm": 1.8260742161387307, - "learning_rate": 1.1514310754783062e-06, - "loss": 0.9314, - "step": 5408 - }, - { - "epoch": 0.6503937954668431, - "grad_norm": 2.0845946372096313, - "learning_rate": 1.1507257584750964e-06, - "loss": 0.9592, - "step": 5409 - }, - { - "epoch": 0.6505140383574821, - "grad_norm": 4.193843762050308, - "learning_rate": 1.150020570306113e-06, - "loss": 1.0002, - "step": 5410 - }, - { - "epoch": 0.6506342812481212, - "grad_norm": 2.1287968318713504, - "learning_rate": 1.1493155110783338e-06, - "loss": 0.982, - "step": 5411 - }, - { - "epoch": 0.6507545241387603, - "grad_norm": 2.0335270956094686, - "learning_rate": 1.1486105808987155e-06, - "loss": 0.9389, - "step": 5412 - }, - { - "epoch": 0.6508747670293994, - "grad_norm": 1.9205633510200522, - "learning_rate": 1.1479057798741947e-06, - "loss": 1.0354, - "step": 5413 - }, - { - "epoch": 0.6509950099200384, - "grad_norm": 0.8267293361461164, - "learning_rate": 1.14720110811169e-06, - "loss": 0.8114, - "step": 5414 - }, - { - "epoch": 0.6511152528106776, - "grad_norm": 1.729246788773058, - "learning_rate": 1.146496565718098e-06, - "loss": 0.9925, - "step": 5415 - }, - { - "epoch": 0.6512354957013167, - "grad_norm": 2.261669099094748, - "learning_rate": 1.1457921528002996e-06, - "loss": 0.9824, - "step": 5416 - }, - { - "epoch": 0.6513557385919557, - "grad_norm": 2.1192490791659906, - "learning_rate": 1.1450878694651522e-06, - "loss": 0.9457, - "step": 5417 - }, - { - "epoch": 0.6514759814825949, - "grad_norm": 2.1309430659263815, - "learning_rate": 1.1443837158194954e-06, - "loss": 0.8523, - "step": 5418 - }, - { - "epoch": 0.651596224373234, - "grad_norm": 1.550072880538944, - "learning_rate": 1.1436796919701484e-06, - "loss": 0.9687, - "step": 5419 - }, - { - "epoch": 0.651716467263873, - "grad_norm": 1.9964738995197566, - "learning_rate": 1.1429757980239115e-06, - "loss": 0.8481, - "step": 5420 - }, - { - "epoch": 0.6518367101545122, - "grad_norm": 4.480471942770768, - "learning_rate": 1.1422720340875636e-06, - "loss": 1.0377, - "step": 5421 - }, - { - "epoch": 0.6519569530451512, - "grad_norm": 1.9129148818650086, - "learning_rate": 1.1415684002678671e-06, - "loss": 1.0184, - "step": 5422 - }, - { - "epoch": 0.6520771959357903, - "grad_norm": 2.6240534211047466, - "learning_rate": 1.1408648966715617e-06, - "loss": 1.0157, - "step": 5423 - }, - { - "epoch": 0.6521974388264293, - "grad_norm": 1.8715799293337074, - "learning_rate": 1.1401615234053683e-06, - "loss": 0.9501, - "step": 5424 - }, - { - "epoch": 0.6523176817170685, - "grad_norm": 2.9815524669574764, - "learning_rate": 1.1394582805759885e-06, - "loss": 0.9862, - "step": 5425 - }, - { - "epoch": 0.6524379246077076, - "grad_norm": 15.875342177660691, - "learning_rate": 1.1387551682901022e-06, - "loss": 0.9879, - "step": 5426 - }, - { - "epoch": 0.6525581674983466, - "grad_norm": 2.09250638097157, - "learning_rate": 1.138052186654373e-06, - "loss": 0.9356, - "step": 5427 - }, - { - "epoch": 0.6526784103889858, - "grad_norm": 2.5294374169146083, - "learning_rate": 1.1373493357754417e-06, - "loss": 1.1091, - "step": 5428 - }, - { - "epoch": 0.6527986532796248, - "grad_norm": 13.773445254890046, - "learning_rate": 1.1366466157599303e-06, - "loss": 0.9988, - "step": 5429 - }, - { - "epoch": 0.6529188961702639, - "grad_norm": 2.198176677444566, - "learning_rate": 1.1359440267144412e-06, - "loss": 0.988, - "step": 5430 - }, - { - "epoch": 0.653039139060903, - "grad_norm": 2.9418171751370195, - "learning_rate": 1.1352415687455556e-06, - "loss": 0.9706, - "step": 5431 - }, - { - "epoch": 0.6531593819515421, - "grad_norm": 5.814821497505453, - "learning_rate": 1.1345392419598362e-06, - "loss": 0.8669, - "step": 5432 - }, - { - "epoch": 0.6532796248421812, - "grad_norm": 2.1454484711724153, - "learning_rate": 1.1338370464638263e-06, - "loss": 0.9448, - "step": 5433 - }, - { - "epoch": 0.6533998677328203, - "grad_norm": 2.5667580373609775, - "learning_rate": 1.1331349823640474e-06, - "loss": 0.8726, - "step": 5434 - }, - { - "epoch": 0.6535201106234594, - "grad_norm": 2.038859624014554, - "learning_rate": 1.132433049767003e-06, - "loss": 1.0134, - "step": 5435 - }, - { - "epoch": 0.6536403535140984, - "grad_norm": 1.423927921099697, - "learning_rate": 1.1317312487791748e-06, - "loss": 1.0334, - "step": 5436 - }, - { - "epoch": 0.6537605964047376, - "grad_norm": 1.8489503099151032, - "learning_rate": 1.1310295795070253e-06, - "loss": 0.9546, - "step": 5437 - }, - { - "epoch": 0.6538808392953767, - "grad_norm": 2.8322876702341606, - "learning_rate": 1.1303280420569982e-06, - "loss": 1.0389, - "step": 5438 - }, - { - "epoch": 0.6540010821860157, - "grad_norm": 1.717570854069249, - "learning_rate": 1.1296266365355158e-06, - "loss": 0.9968, - "step": 5439 - }, - { - "epoch": 0.6541213250766549, - "grad_norm": 2.098161292059101, - "learning_rate": 1.1289253630489806e-06, - "loss": 0.961, - "step": 5440 - }, - { - "epoch": 0.6542415679672939, - "grad_norm": 2.2492792246599933, - "learning_rate": 1.1282242217037753e-06, - "loss": 0.9691, - "step": 5441 - }, - { - "epoch": 0.654361810857933, - "grad_norm": 23.319566570984843, - "learning_rate": 1.127523212606262e-06, - "loss": 0.8555, - "step": 5442 - }, - { - "epoch": 0.6544820537485722, - "grad_norm": 3.061603025530498, - "learning_rate": 1.1268223358627835e-06, - "loss": 0.9565, - "step": 5443 - }, - { - "epoch": 0.6546022966392112, - "grad_norm": 1.9186033609129187, - "learning_rate": 1.126121591579663e-06, - "loss": 0.9409, - "step": 5444 - }, - { - "epoch": 0.6547225395298503, - "grad_norm": 1.5552637767000672, - "learning_rate": 1.1254209798632018e-06, - "loss": 0.9172, - "step": 5445 - }, - { - "epoch": 0.6548427824204894, - "grad_norm": 1.8939556702579128, - "learning_rate": 1.124720500819683e-06, - "loss": 1.0771, - "step": 5446 - }, - { - "epoch": 0.6549630253111285, - "grad_norm": 1.7528575999279006, - "learning_rate": 1.1240201545553682e-06, - "loss": 1.048, - "step": 5447 - }, - { - "epoch": 0.6550832682017675, - "grad_norm": 1.707151221068438, - "learning_rate": 1.1233199411764987e-06, - "loss": 0.9644, - "step": 5448 - }, - { - "epoch": 0.6552035110924067, - "grad_norm": 2.142424224083251, - "learning_rate": 1.1226198607892978e-06, - "loss": 0.9226, - "step": 5449 - }, - { - "epoch": 0.6553237539830458, - "grad_norm": 2.0146818098150896, - "learning_rate": 1.1219199134999664e-06, - "loss": 1.0263, - "step": 5450 - }, - { - "epoch": 0.6554439968736848, - "grad_norm": 5.37817838865088, - "learning_rate": 1.1212200994146863e-06, - "loss": 1.009, - "step": 5451 - }, - { - "epoch": 0.655564239764324, - "grad_norm": 2.0949142407547243, - "learning_rate": 1.120520418639618e-06, - "loss": 0.9801, - "step": 5452 - }, - { - "epoch": 0.655684482654963, - "grad_norm": 1.8323177935060626, - "learning_rate": 1.119820871280903e-06, - "loss": 1.06, - "step": 5453 - }, - { - "epoch": 0.6558047255456021, - "grad_norm": 2.2451134706974996, - "learning_rate": 1.1191214574446614e-06, - "loss": 0.9607, - "step": 5454 - }, - { - "epoch": 0.6559249684362413, - "grad_norm": 1.3573560520377397, - "learning_rate": 1.118422177236995e-06, - "loss": 1.0251, - "step": 5455 - }, - { - "epoch": 0.6560452113268803, - "grad_norm": 2.0028782637535674, - "learning_rate": 1.1177230307639835e-06, - "loss": 1.0778, - "step": 5456 - }, - { - "epoch": 0.6561654542175194, - "grad_norm": 1.5513449924595342, - "learning_rate": 1.1170240181316865e-06, - "loss": 1.0201, - "step": 5457 - }, - { - "epoch": 0.6562856971081584, - "grad_norm": 5.3661358509589805, - "learning_rate": 1.1163251394461442e-06, - "loss": 1.0259, - "step": 5458 - }, - { - "epoch": 0.6564059399987976, - "grad_norm": 2.014892090021183, - "learning_rate": 1.1156263948133746e-06, - "loss": 1.0537, - "step": 5459 - }, - { - "epoch": 0.6565261828894366, - "grad_norm": 1.6739110937220258, - "learning_rate": 1.1149277843393787e-06, - "loss": 1.0113, - "step": 5460 - }, - { - "epoch": 0.6566464257800757, - "grad_norm": 2.674986605744385, - "learning_rate": 1.1142293081301342e-06, - "loss": 0.8614, - "step": 5461 - }, - { - "epoch": 0.6567666686707149, - "grad_norm": 1.6242415785733373, - "learning_rate": 1.1135309662915995e-06, - "loss": 0.9047, - "step": 5462 - }, - { - "epoch": 0.6568869115613539, - "grad_norm": 1.8414503076888216, - "learning_rate": 1.112832758929712e-06, - "loss": 0.828, - "step": 5463 - }, - { - "epoch": 0.657007154451993, - "grad_norm": 1.7716317415189233, - "learning_rate": 1.11213468615039e-06, - "loss": 0.9714, - "step": 5464 - }, - { - "epoch": 0.6571273973426321, - "grad_norm": 2.5652527328817194, - "learning_rate": 1.1114367480595292e-06, - "loss": 0.9883, - "step": 5465 - }, - { - "epoch": 0.6572476402332712, - "grad_norm": 2.4808851958792966, - "learning_rate": 1.1107389447630086e-06, - "loss": 1.0482, - "step": 5466 - }, - { - "epoch": 0.6573678831239103, - "grad_norm": 2.0369063044832005, - "learning_rate": 1.1100412763666818e-06, - "loss": 1.0091, - "step": 5467 - }, - { - "epoch": 0.6574881260145494, - "grad_norm": 2.8223468069260504, - "learning_rate": 1.1093437429763865e-06, - "loss": 1.0232, - "step": 5468 - }, - { - "epoch": 0.6576083689051885, - "grad_norm": 1.8604261488493075, - "learning_rate": 1.1086463446979361e-06, - "loss": 0.9718, - "step": 5469 - }, - { - "epoch": 0.6577286117958275, - "grad_norm": 2.0192606466130023, - "learning_rate": 1.1079490816371277e-06, - "loss": 0.9961, - "step": 5470 - }, - { - "epoch": 0.6578488546864667, - "grad_norm": 2.603547676272611, - "learning_rate": 1.1072519538997352e-06, - "loss": 0.9673, - "step": 5471 - }, - { - "epoch": 0.6579690975771058, - "grad_norm": 1.5364958763923762, - "learning_rate": 1.1065549615915095e-06, - "loss": 1.0435, - "step": 5472 - }, - { - "epoch": 0.6580893404677448, - "grad_norm": 5.79884983365758, - "learning_rate": 1.105858104818187e-06, - "loss": 1.0044, - "step": 5473 - }, - { - "epoch": 0.658209583358384, - "grad_norm": 3.7431129792447657, - "learning_rate": 1.105161383685478e-06, - "loss": 0.9758, - "step": 5474 - }, - { - "epoch": 0.658329826249023, - "grad_norm": 0.7708290117159833, - "learning_rate": 1.1044647982990771e-06, - "loss": 0.8306, - "step": 5475 - }, - { - "epoch": 0.6584500691396621, - "grad_norm": 2.498749897256976, - "learning_rate": 1.1037683487646536e-06, - "loss": 0.8659, - "step": 5476 - }, - { - "epoch": 0.6585703120303013, - "grad_norm": 2.0057370632362557, - "learning_rate": 1.1030720351878583e-06, - "loss": 1.0003, - "step": 5477 - }, - { - "epoch": 0.6586905549209403, - "grad_norm": 0.8354156148496429, - "learning_rate": 1.102375857674323e-06, - "loss": 0.8461, - "step": 5478 - }, - { - "epoch": 0.6588107978115794, - "grad_norm": 1.9622977352996869, - "learning_rate": 1.1016798163296561e-06, - "loss": 1.1313, - "step": 5479 - }, - { - "epoch": 0.6589310407022185, - "grad_norm": 2.2209833214969614, - "learning_rate": 1.1009839112594471e-06, - "loss": 0.886, - "step": 5480 - }, - { - "epoch": 0.6590512835928576, - "grad_norm": 3.4194759511714143, - "learning_rate": 1.1002881425692638e-06, - "loss": 0.9506, - "step": 5481 - }, - { - "epoch": 0.6591715264834966, - "grad_norm": 1.6519724471540091, - "learning_rate": 1.0995925103646532e-06, - "loss": 0.9842, - "step": 5482 - }, - { - "epoch": 0.6592917693741358, - "grad_norm": 1.627333359632501, - "learning_rate": 1.0988970147511437e-06, - "loss": 0.9005, - "step": 5483 - }, - { - "epoch": 0.6594120122647749, - "grad_norm": 2.9824837488977143, - "learning_rate": 1.0982016558342405e-06, - "loss": 1.0352, - "step": 5484 - }, - { - "epoch": 0.6595322551554139, - "grad_norm": 2.538002050935534, - "learning_rate": 1.0975064337194291e-06, - "loss": 0.9481, - "step": 5485 - }, - { - "epoch": 0.6596524980460531, - "grad_norm": 1.455445272734447, - "learning_rate": 1.0968113485121743e-06, - "loss": 0.9287, - "step": 5486 - }, - { - "epoch": 0.6597727409366921, - "grad_norm": 1.971494789768719, - "learning_rate": 1.0961164003179185e-06, - "loss": 1.0302, - "step": 5487 - }, - { - "epoch": 0.6598929838273312, - "grad_norm": 2.2788118894106497, - "learning_rate": 1.0954215892420884e-06, - "loss": 1.0611, - "step": 5488 - }, - { - "epoch": 0.6600132267179702, - "grad_norm": 2.0588719218040414, - "learning_rate": 1.094726915390082e-06, - "loss": 0.9334, - "step": 5489 - }, - { - "epoch": 0.6601334696086094, - "grad_norm": 1.9295006788993485, - "learning_rate": 1.0940323788672836e-06, - "loss": 0.9273, - "step": 5490 - }, - { - "epoch": 0.6602537124992485, - "grad_norm": 1.6451393144974757, - "learning_rate": 1.0933379797790522e-06, - "loss": 0.9725, - "step": 5491 - }, - { - "epoch": 0.6603739553898875, - "grad_norm": 2.789702440890375, - "learning_rate": 1.0926437182307293e-06, - "loss": 0.9438, - "step": 5492 - }, - { - "epoch": 0.6604941982805267, - "grad_norm": 1.834854585774276, - "learning_rate": 1.0919495943276338e-06, - "loss": 1.0108, - "step": 5493 - }, - { - "epoch": 0.6606144411711657, - "grad_norm": 2.122324437225775, - "learning_rate": 1.0912556081750611e-06, - "loss": 0.9911, - "step": 5494 - }, - { - "epoch": 0.6607346840618048, - "grad_norm": 4.840807211532857, - "learning_rate": 1.0905617598782909e-06, - "loss": 0.9967, - "step": 5495 - }, - { - "epoch": 0.660854926952444, - "grad_norm": 3.1488132299940013, - "learning_rate": 1.0898680495425775e-06, - "loss": 1.0388, - "step": 5496 - }, - { - "epoch": 0.660975169843083, - "grad_norm": 2.071079788535081, - "learning_rate": 1.0891744772731594e-06, - "loss": 1.0327, - "step": 5497 - }, - { - "epoch": 0.6610954127337221, - "grad_norm": 1.6693436898823295, - "learning_rate": 1.088481043175248e-06, - "loss": 0.8767, - "step": 5498 - }, - { - "epoch": 0.6612156556243612, - "grad_norm": 1.5390807794560095, - "learning_rate": 1.0877877473540368e-06, - "loss": 0.9834, - "step": 5499 - }, - { - "epoch": 0.6613358985150003, - "grad_norm": 2.815952386144683, - "learning_rate": 1.0870945899147002e-06, - "loss": 0.9578, - "step": 5500 - }, - { - "epoch": 0.6614561414056394, - "grad_norm": 1.7183974164501141, - "learning_rate": 1.0864015709623879e-06, - "loss": 0.9784, - "step": 5501 - }, - { - "epoch": 0.6615763842962785, - "grad_norm": 2.045864991709017, - "learning_rate": 1.0857086906022313e-06, - "loss": 1.0206, - "step": 5502 - }, - { - "epoch": 0.6616966271869176, - "grad_norm": 2.3330730111363787, - "learning_rate": 1.0850159489393388e-06, - "loss": 0.9627, - "step": 5503 - }, - { - "epoch": 0.6618168700775566, - "grad_norm": 1.9428155646162695, - "learning_rate": 1.0843233460787992e-06, - "loss": 1.0495, - "step": 5504 - }, - { - "epoch": 0.6619371129681958, - "grad_norm": 1.6080959385344917, - "learning_rate": 1.0836308821256805e-06, - "loss": 1.0033, - "step": 5505 - }, - { - "epoch": 0.6620573558588349, - "grad_norm": 1.807169145137658, - "learning_rate": 1.0829385571850282e-06, - "loss": 1.0075, - "step": 5506 - }, - { - "epoch": 0.6621775987494739, - "grad_norm": 10.429029488255106, - "learning_rate": 1.0822463713618679e-06, - "loss": 1.0673, - "step": 5507 - }, - { - "epoch": 0.6622978416401131, - "grad_norm": 1.9401898324644768, - "learning_rate": 1.0815543247612034e-06, - "loss": 1.0752, - "step": 5508 - }, - { - "epoch": 0.6624180845307521, - "grad_norm": 1.470208194936809, - "learning_rate": 1.0808624174880168e-06, - "loss": 1.0575, - "step": 5509 - }, - { - "epoch": 0.6625383274213912, - "grad_norm": 1.5955398830926881, - "learning_rate": 1.080170649647272e-06, - "loss": 1.0237, - "step": 5510 - }, - { - "epoch": 0.6626585703120303, - "grad_norm": 1.5872929986274813, - "learning_rate": 1.0794790213439068e-06, - "loss": 0.8973, - "step": 5511 - }, - { - "epoch": 0.6627788132026694, - "grad_norm": 3.4627085819148284, - "learning_rate": 1.078787532682843e-06, - "loss": 1.01, - "step": 5512 - }, - { - "epoch": 0.6628990560933085, - "grad_norm": 2.013877525151372, - "learning_rate": 1.0780961837689773e-06, - "loss": 0.9897, - "step": 5513 - }, - { - "epoch": 0.6630192989839476, - "grad_norm": 1.5338097022390018, - "learning_rate": 1.0774049747071883e-06, - "loss": 0.9287, - "step": 5514 - }, - { - "epoch": 0.6631395418745867, - "grad_norm": 1.601224389091039, - "learning_rate": 1.076713905602332e-06, - "loss": 0.9108, - "step": 5515 - }, - { - "epoch": 0.6632597847652257, - "grad_norm": 1.7662041988495247, - "learning_rate": 1.07602297655924e-06, - "loss": 1.0376, - "step": 5516 - }, - { - "epoch": 0.6633800276558649, - "grad_norm": 1.6099692423471048, - "learning_rate": 1.0753321876827292e-06, - "loss": 1.0311, - "step": 5517 - }, - { - "epoch": 0.663500270546504, - "grad_norm": 1.8280667026712354, - "learning_rate": 1.0746415390775893e-06, - "loss": 0.9689, - "step": 5518 - }, - { - "epoch": 0.663620513437143, - "grad_norm": 2.3252278165055404, - "learning_rate": 1.0739510308485939e-06, - "loss": 1.0038, - "step": 5519 - }, - { - "epoch": 0.6637407563277821, - "grad_norm": 0.8116628295316867, - "learning_rate": 1.07326066310049e-06, - "loss": 0.8754, - "step": 5520 - }, - { - "epoch": 0.6638609992184212, - "grad_norm": 2.4952627120906232, - "learning_rate": 1.0725704359380059e-06, - "loss": 1.0256, - "step": 5521 - }, - { - "epoch": 0.6639812421090603, - "grad_norm": 1.9143772411426723, - "learning_rate": 1.0718803494658497e-06, - "loss": 0.9492, - "step": 5522 - }, - { - "epoch": 0.6641014849996993, - "grad_norm": 2.7273290917323076, - "learning_rate": 1.071190403788707e-06, - "loss": 1.0694, - "step": 5523 - }, - { - "epoch": 0.6642217278903385, - "grad_norm": 1.9423253667975844, - "learning_rate": 1.0705005990112415e-06, - "loss": 0.9786, - "step": 5524 - }, - { - "epoch": 0.6643419707809776, - "grad_norm": 2.5813873919482853, - "learning_rate": 1.0698109352380957e-06, - "loss": 0.9719, - "step": 5525 - }, - { - "epoch": 0.6644622136716166, - "grad_norm": 1.7271993851947265, - "learning_rate": 1.0691214125738909e-06, - "loss": 1.0076, - "step": 5526 - }, - { - "epoch": 0.6645824565622558, - "grad_norm": 0.8729608821975223, - "learning_rate": 1.0684320311232287e-06, - "loss": 0.8465, - "step": 5527 - }, - { - "epoch": 0.6647026994528948, - "grad_norm": 1.9073643788412546, - "learning_rate": 1.0677427909906865e-06, - "loss": 1.0358, - "step": 5528 - }, - { - "epoch": 0.6648229423435339, - "grad_norm": 2.0586725035339257, - "learning_rate": 1.0670536922808216e-06, - "loss": 0.9457, - "step": 5529 - }, - { - "epoch": 0.6649431852341731, - "grad_norm": 2.174632017203785, - "learning_rate": 1.06636473509817e-06, - "loss": 0.9463, - "step": 5530 - }, - { - "epoch": 0.6650634281248121, - "grad_norm": 1.8900889117404127, - "learning_rate": 1.0656759195472447e-06, - "loss": 1.0365, - "step": 5531 - }, - { - "epoch": 0.6651836710154512, - "grad_norm": 0.818617713957542, - "learning_rate": 1.0649872457325414e-06, - "loss": 0.8749, - "step": 5532 - }, - { - "epoch": 0.6653039139060903, - "grad_norm": 0.9007065357412016, - "learning_rate": 1.0642987137585278e-06, - "loss": 0.8458, - "step": 5533 - }, - { - "epoch": 0.6654241567967294, - "grad_norm": 1.6462373197468358, - "learning_rate": 1.0636103237296561e-06, - "loss": 1.0545, - "step": 5534 - }, - { - "epoch": 0.6655443996873684, - "grad_norm": 1.706910083232805, - "learning_rate": 1.062922075750353e-06, - "loss": 1.0679, - "step": 5535 - }, - { - "epoch": 0.6656646425780076, - "grad_norm": 2.1529431624788096, - "learning_rate": 1.0622339699250267e-06, - "loss": 0.9462, - "step": 5536 - }, - { - "epoch": 0.6657848854686467, - "grad_norm": 1.8959263649496083, - "learning_rate": 1.0615460063580624e-06, - "loss": 1.0251, - "step": 5537 - }, - { - "epoch": 0.6659051283592857, - "grad_norm": 2.2054870064196344, - "learning_rate": 1.060858185153821e-06, - "loss": 0.9645, - "step": 5538 - }, - { - "epoch": 0.6660253712499249, - "grad_norm": 2.8893726832244186, - "learning_rate": 1.0601705064166474e-06, - "loss": 0.9907, - "step": 5539 - }, - { - "epoch": 0.666145614140564, - "grad_norm": 2.4868182757730306, - "learning_rate": 1.0594829702508596e-06, - "loss": 0.9643, - "step": 5540 - }, - { - "epoch": 0.666265857031203, - "grad_norm": 1.7329441388609372, - "learning_rate": 1.0587955767607592e-06, - "loss": 0.7838, - "step": 5541 - }, - { - "epoch": 0.6663860999218422, - "grad_norm": 4.054185545911019, - "learning_rate": 1.0581083260506206e-06, - "loss": 1.0073, - "step": 5542 - }, - { - "epoch": 0.6665063428124812, - "grad_norm": 2.060118485163559, - "learning_rate": 1.0574212182246993e-06, - "loss": 0.9953, - "step": 5543 - }, - { - "epoch": 0.6666265857031203, - "grad_norm": 2.7985657424551866, - "learning_rate": 1.0567342533872303e-06, - "loss": 0.9779, - "step": 5544 - }, - { - "epoch": 0.6667468285937594, - "grad_norm": 1.518373023448514, - "learning_rate": 1.0560474316424255e-06, - "loss": 1.0437, - "step": 5545 - }, - { - "epoch": 0.6668670714843985, - "grad_norm": 5.168411985551518, - "learning_rate": 1.0553607530944746e-06, - "loss": 0.9633, - "step": 5546 - }, - { - "epoch": 0.6669873143750376, - "grad_norm": 1.8422370781022768, - "learning_rate": 1.0546742178475463e-06, - "loss": 1.121, - "step": 5547 - }, - { - "epoch": 0.6671075572656767, - "grad_norm": 4.756955525307229, - "learning_rate": 1.0539878260057868e-06, - "loss": 1.0944, - "step": 5548 - }, - { - "epoch": 0.6672278001563158, - "grad_norm": 2.9472764648885317, - "learning_rate": 1.0533015776733226e-06, - "loss": 0.9119, - "step": 5549 - }, - { - "epoch": 0.6673480430469548, - "grad_norm": 2.5085834969851883, - "learning_rate": 1.0526154729542566e-06, - "loss": 1.0107, - "step": 5550 - }, - { - "epoch": 0.6674682859375939, - "grad_norm": 2.807303650971395, - "learning_rate": 1.0519295119526699e-06, - "loss": 1.0233, - "step": 5551 - }, - { - "epoch": 0.667588528828233, - "grad_norm": 1.735213824351366, - "learning_rate": 1.0512436947726227e-06, - "loss": 1.0632, - "step": 5552 - }, - { - "epoch": 0.6677087717188721, - "grad_norm": 2.5869066894241852, - "learning_rate": 1.0505580215181517e-06, - "loss": 0.8866, - "step": 5553 - }, - { - "epoch": 0.6678290146095112, - "grad_norm": 0.8109296882855853, - "learning_rate": 1.0498724922932753e-06, - "loss": 0.824, - "step": 5554 - }, - { - "epoch": 0.6679492575001503, - "grad_norm": 2.427876180618173, - "learning_rate": 1.0491871072019851e-06, - "loss": 1.0895, - "step": 5555 - }, - { - "epoch": 0.6680695003907894, - "grad_norm": 2.4551451316556983, - "learning_rate": 1.0485018663482555e-06, - "loss": 0.8656, - "step": 5556 - }, - { - "epoch": 0.6681897432814284, - "grad_norm": 2.4709463851651243, - "learning_rate": 1.0478167698360354e-06, - "loss": 0.9379, - "step": 5557 - }, - { - "epoch": 0.6683099861720676, - "grad_norm": 2.0471482422135785, - "learning_rate": 1.0471318177692556e-06, - "loss": 0.9309, - "step": 5558 - }, - { - "epoch": 0.6684302290627067, - "grad_norm": 2.139317691089999, - "learning_rate": 1.046447010251821e-06, - "loss": 0.9809, - "step": 5559 - }, - { - "epoch": 0.6685504719533457, - "grad_norm": 1.665209737403507, - "learning_rate": 1.0457623473876157e-06, - "loss": 0.9916, - "step": 5560 - }, - { - "epoch": 0.6686707148439849, - "grad_norm": 1.6220154864963028, - "learning_rate": 1.0450778292805046e-06, - "loss": 0.9338, - "step": 5561 - }, - { - "epoch": 0.6687909577346239, - "grad_norm": 1.600813453242416, - "learning_rate": 1.0443934560343267e-06, - "loss": 1.0219, - "step": 5562 - }, - { - "epoch": 0.668911200625263, - "grad_norm": 1.7776140279987223, - "learning_rate": 1.0437092277529034e-06, - "loss": 1.0057, - "step": 5563 - }, - { - "epoch": 0.6690314435159022, - "grad_norm": 2.174306921313946, - "learning_rate": 1.0430251445400292e-06, - "loss": 0.9703, - "step": 5564 - }, - { - "epoch": 0.6691516864065412, - "grad_norm": 2.629447167169887, - "learning_rate": 1.0423412064994787e-06, - "loss": 0.8554, - "step": 5565 - }, - { - "epoch": 0.6692719292971803, - "grad_norm": 2.230320913890368, - "learning_rate": 1.0416574137350064e-06, - "loss": 0.9686, - "step": 5566 - }, - { - "epoch": 0.6693921721878194, - "grad_norm": 2.664969371025611, - "learning_rate": 1.0409737663503428e-06, - "loss": 1.0466, - "step": 5567 - }, - { - "epoch": 0.6695124150784585, - "grad_norm": 1.773518699097871, - "learning_rate": 1.040290264449196e-06, - "loss": 1.0658, - "step": 5568 - }, - { - "epoch": 0.6696326579690975, - "grad_norm": 2.755762045781967, - "learning_rate": 1.0396069081352532e-06, - "loss": 0.8652, - "step": 5569 - }, - { - "epoch": 0.6697529008597367, - "grad_norm": 0.8195090219048781, - "learning_rate": 1.0389236975121782e-06, - "loss": 0.8263, - "step": 5570 - }, - { - "epoch": 0.6698731437503758, - "grad_norm": 3.215724967981893, - "learning_rate": 1.0382406326836147e-06, - "loss": 0.9451, - "step": 5571 - }, - { - "epoch": 0.6699933866410148, - "grad_norm": 2.9946931949956612, - "learning_rate": 1.0375577137531828e-06, - "loss": 0.9735, - "step": 5572 - }, - { - "epoch": 0.670113629531654, - "grad_norm": 1.490944869231716, - "learning_rate": 1.0368749408244802e-06, - "loss": 0.9503, - "step": 5573 - }, - { - "epoch": 0.670233872422293, - "grad_norm": 6.148011247335457, - "learning_rate": 1.0361923140010836e-06, - "loss": 1.0139, - "step": 5574 - }, - { - "epoch": 0.6703541153129321, - "grad_norm": 4.922535491706917, - "learning_rate": 1.0355098333865455e-06, - "loss": 0.8699, - "step": 5575 - }, - { - "epoch": 0.6704743582035713, - "grad_norm": 4.982742664114524, - "learning_rate": 1.0348274990844006e-06, - "loss": 0.9215, - "step": 5576 - }, - { - "epoch": 0.6705946010942103, - "grad_norm": 1.791087374815202, - "learning_rate": 1.034145311198155e-06, - "loss": 0.954, - "step": 5577 - }, - { - "epoch": 0.6707148439848494, - "grad_norm": 1.7240975161452505, - "learning_rate": 1.0334632698312989e-06, - "loss": 0.8707, - "step": 5578 - }, - { - "epoch": 0.6708350868754885, - "grad_norm": 1.7316459285655728, - "learning_rate": 1.032781375087295e-06, - "loss": 0.9868, - "step": 5579 - }, - { - "epoch": 0.6709553297661276, - "grad_norm": 1.5301465320853131, - "learning_rate": 1.0320996270695891e-06, - "loss": 0.9007, - "step": 5580 - }, - { - "epoch": 0.6710755726567667, - "grad_norm": 1.9386564925546421, - "learning_rate": 1.0314180258815998e-06, - "loss": 0.9644, - "step": 5581 - }, - { - "epoch": 0.6711958155474057, - "grad_norm": 1.490561433152484, - "learning_rate": 1.0307365716267247e-06, - "loss": 0.9748, - "step": 5582 - }, - { - "epoch": 0.6713160584380449, - "grad_norm": 3.29268245877932, - "learning_rate": 1.0300552644083423e-06, - "loss": 1.0162, - "step": 5583 - }, - { - "epoch": 0.6714363013286839, - "grad_norm": 2.265504610217339, - "learning_rate": 1.0293741043298036e-06, - "loss": 0.9498, - "step": 5584 - }, - { - "epoch": 0.671556544219323, - "grad_norm": 2.280836721112773, - "learning_rate": 1.0286930914944436e-06, - "loss": 0.9463, - "step": 5585 - }, - { - "epoch": 0.6716767871099621, - "grad_norm": 2.3306557642000096, - "learning_rate": 1.0280122260055684e-06, - "loss": 1.0078, - "step": 5586 - }, - { - "epoch": 0.6717970300006012, - "grad_norm": 2.053901551053725, - "learning_rate": 1.0273315079664652e-06, - "loss": 1.0535, - "step": 5587 - }, - { - "epoch": 0.6719172728912403, - "grad_norm": 2.4234184023352427, - "learning_rate": 1.0266509374803992e-06, - "loss": 0.9803, - "step": 5588 - }, - { - "epoch": 0.6720375157818794, - "grad_norm": 2.3883644336593086, - "learning_rate": 1.0259705146506123e-06, - "loss": 1.072, - "step": 5589 - }, - { - "epoch": 0.6721577586725185, - "grad_norm": 1.9466739164940294, - "learning_rate": 1.025290239580324e-06, - "loss": 1.0007, - "step": 5590 - }, - { - "epoch": 0.6722780015631575, - "grad_norm": 1.6110619897406144, - "learning_rate": 1.0246101123727313e-06, - "loss": 0.9852, - "step": 5591 - }, - { - "epoch": 0.6723982444537967, - "grad_norm": 1.6948107842803395, - "learning_rate": 1.0239301331310085e-06, - "loss": 1.0186, - "step": 5592 - }, - { - "epoch": 0.6725184873444358, - "grad_norm": 1.548827971065172, - "learning_rate": 1.0232503019583088e-06, - "loss": 1.1129, - "step": 5593 - }, - { - "epoch": 0.6726387302350748, - "grad_norm": 1.9868415461553064, - "learning_rate": 1.0225706189577619e-06, - "loss": 0.9292, - "step": 5594 - }, - { - "epoch": 0.672758973125714, - "grad_norm": 10.195918624223953, - "learning_rate": 1.021891084232475e-06, - "loss": 0.9713, - "step": 5595 - }, - { - "epoch": 0.672879216016353, - "grad_norm": 1.972213786141467, - "learning_rate": 1.0212116978855325e-06, - "loss": 1.0231, - "step": 5596 - }, - { - "epoch": 0.6729994589069921, - "grad_norm": 1.5420502451813756, - "learning_rate": 1.020532460019997e-06, - "loss": 1.0221, - "step": 5597 - }, - { - "epoch": 0.6731197017976313, - "grad_norm": 2.448934876267281, - "learning_rate": 1.0198533707389096e-06, - "loss": 0.9285, - "step": 5598 - }, - { - "epoch": 0.6732399446882703, - "grad_norm": 1.6882766066897335, - "learning_rate": 1.0191744301452853e-06, - "loss": 0.9652, - "step": 5599 - }, - { - "epoch": 0.6733601875789094, - "grad_norm": 2.827373425848941, - "learning_rate": 1.0184956383421208e-06, - "loss": 0.9293, - "step": 5600 - }, - { - "epoch": 0.6734804304695485, - "grad_norm": 4.35269889854743, - "learning_rate": 1.017816995432387e-06, - "loss": 0.8789, - "step": 5601 - }, - { - "epoch": 0.6736006733601876, - "grad_norm": 1.7861211994308774, - "learning_rate": 1.0171385015190353e-06, - "loss": 0.9673, - "step": 5602 - }, - { - "epoch": 0.6737209162508266, - "grad_norm": 1.8549332980479134, - "learning_rate": 1.0164601567049908e-06, - "loss": 0.9575, - "step": 5603 - }, - { - "epoch": 0.6738411591414658, - "grad_norm": 1.9344387481489214, - "learning_rate": 1.015781961093158e-06, - "loss": 1.0333, - "step": 5604 - }, - { - "epoch": 0.6739614020321049, - "grad_norm": 1.6522271586377686, - "learning_rate": 1.0151039147864197e-06, - "loss": 0.9986, - "step": 5605 - }, - { - "epoch": 0.6740816449227439, - "grad_norm": 2.4976299495702388, - "learning_rate": 1.0144260178876336e-06, - "loss": 0.8958, - "step": 5606 - }, - { - "epoch": 0.6742018878133831, - "grad_norm": 2.34316856801658, - "learning_rate": 1.0137482704996388e-06, - "loss": 0.9006, - "step": 5607 - }, - { - "epoch": 0.6743221307040221, - "grad_norm": 3.5484070011039317, - "learning_rate": 1.0130706727252461e-06, - "loss": 1.0216, - "step": 5608 - }, - { - "epoch": 0.6744423735946612, - "grad_norm": 2.482391333223311, - "learning_rate": 1.0123932246672468e-06, - "loss": 0.9089, - "step": 5609 - }, - { - "epoch": 0.6745626164853004, - "grad_norm": 0.7966284651023244, - "learning_rate": 1.0117159264284114e-06, - "loss": 0.8146, - "step": 5610 - }, - { - "epoch": 0.6746828593759394, - "grad_norm": 1.641094758128692, - "learning_rate": 1.0110387781114837e-06, - "loss": 1.0013, - "step": 5611 - }, - { - "epoch": 0.6748031022665785, - "grad_norm": 2.0722594121755358, - "learning_rate": 1.0103617798191872e-06, - "loss": 0.9909, - "step": 5612 - }, - { - "epoch": 0.6749233451572175, - "grad_norm": 2.571706283801485, - "learning_rate": 1.0096849316542217e-06, - "loss": 1.0494, - "step": 5613 - }, - { - "epoch": 0.6750435880478567, - "grad_norm": 2.263097682009473, - "learning_rate": 1.0090082337192643e-06, - "loss": 0.9703, - "step": 5614 - }, - { - "epoch": 0.6751638309384957, - "grad_norm": 1.9715216971549934, - "learning_rate": 1.0083316861169705e-06, - "loss": 1.0045, - "step": 5615 - }, - { - "epoch": 0.6752840738291348, - "grad_norm": 1.844117338150275, - "learning_rate": 1.0076552889499713e-06, - "loss": 0.9567, - "step": 5616 - }, - { - "epoch": 0.675404316719774, - "grad_norm": 4.12570554305895, - "learning_rate": 1.006979042320876e-06, - "loss": 0.9593, - "step": 5617 - }, - { - "epoch": 0.675524559610413, - "grad_norm": 2.0015518605100593, - "learning_rate": 1.0063029463322702e-06, - "loss": 0.8617, - "step": 5618 - }, - { - "epoch": 0.6756448025010521, - "grad_norm": 2.589132035679361, - "learning_rate": 1.0056270010867164e-06, - "loss": 0.9783, - "step": 5619 - }, - { - "epoch": 0.6757650453916912, - "grad_norm": 4.43446934326118, - "learning_rate": 1.004951206686758e-06, - "loss": 1.0062, - "step": 5620 - }, - { - "epoch": 0.6758852882823303, - "grad_norm": 1.8741782612976945, - "learning_rate": 1.0042755632349087e-06, - "loss": 0.9449, - "step": 5621 - }, - { - "epoch": 0.6760055311729694, - "grad_norm": 2.458867052838098, - "learning_rate": 1.0036000708336653e-06, - "loss": 0.8574, - "step": 5622 - }, - { - "epoch": 0.6761257740636085, - "grad_norm": 2.015634107938785, - "learning_rate": 1.0029247295854984e-06, - "loss": 1.0253, - "step": 5623 - }, - { - "epoch": 0.6762460169542476, - "grad_norm": 2.9982886201395296, - "learning_rate": 1.0022495395928588e-06, - "loss": 0.9442, - "step": 5624 - }, - { - "epoch": 0.6763662598448866, - "grad_norm": 0.7844518281918138, - "learning_rate": 1.0015745009581697e-06, - "loss": 0.871, - "step": 5625 - }, - { - "epoch": 0.6764865027355258, - "grad_norm": 2.143833178805516, - "learning_rate": 1.0008996137838343e-06, - "loss": 0.8965, - "step": 5626 - }, - { - "epoch": 0.6766067456261649, - "grad_norm": 1.8630564172119426, - "learning_rate": 1.000224878172234e-06, - "loss": 1.0249, - "step": 5627 - }, - { - "epoch": 0.6767269885168039, - "grad_norm": 2.2140203220223893, - "learning_rate": 9.99550294225724e-07, - "loss": 0.9602, - "step": 5628 - }, - { - "epoch": 0.6768472314074431, - "grad_norm": 1.8845557535759407, - "learning_rate": 9.988758620466402e-07, - "loss": 0.9422, - "step": 5629 - }, - { - "epoch": 0.6769674742980821, - "grad_norm": 1.5263496929979516, - "learning_rate": 9.982015817372917e-07, - "loss": 0.9921, - "step": 5630 - }, - { - "epoch": 0.6770877171887212, - "grad_norm": 1.8768054534902796, - "learning_rate": 9.975274533999657e-07, - "loss": 1.0496, - "step": 5631 - }, - { - "epoch": 0.6772079600793603, - "grad_norm": 3.7549931662506215, - "learning_rate": 9.96853477136929e-07, - "loss": 1.0704, - "step": 5632 - }, - { - "epoch": 0.6773282029699994, - "grad_norm": 1.8144733604395047, - "learning_rate": 9.96179653050422e-07, - "loss": 0.9679, - "step": 5633 - }, - { - "epoch": 0.6774484458606385, - "grad_norm": 5.516722140460659, - "learning_rate": 9.955059812426635e-07, - "loss": 0.9652, - "step": 5634 - }, - { - "epoch": 0.6775686887512776, - "grad_norm": 1.9117954452339074, - "learning_rate": 9.948324618158493e-07, - "loss": 1.0536, - "step": 5635 - }, - { - "epoch": 0.6776889316419167, - "grad_norm": 2.401741262420615, - "learning_rate": 9.941590948721502e-07, - "loss": 1.0111, - "step": 5636 - }, - { - "epoch": 0.6778091745325557, - "grad_norm": 1.9216917617407674, - "learning_rate": 9.934858805137188e-07, - "loss": 0.9892, - "step": 5637 - }, - { - "epoch": 0.6779294174231949, - "grad_norm": 1.7231063427778095, - "learning_rate": 9.92812818842677e-07, - "loss": 1.0395, - "step": 5638 - }, - { - "epoch": 0.678049660313834, - "grad_norm": 1.6999907044549256, - "learning_rate": 9.921399099611306e-07, - "loss": 0.869, - "step": 5639 - }, - { - "epoch": 0.678169903204473, - "grad_norm": 2.028946142204748, - "learning_rate": 9.914671539711588e-07, - "loss": 0.9217, - "step": 5640 - }, - { - "epoch": 0.6782901460951122, - "grad_norm": 1.816864415293103, - "learning_rate": 9.90794550974817e-07, - "loss": 1.0115, - "step": 5641 - }, - { - "epoch": 0.6784103889857512, - "grad_norm": 2.218625562160231, - "learning_rate": 9.901221010741407e-07, - "loss": 1.0412, - "step": 5642 - }, - { - "epoch": 0.6785306318763903, - "grad_norm": 2.7701078818070184, - "learning_rate": 9.894498043711375e-07, - "loss": 0.9757, - "step": 5643 - }, - { - "epoch": 0.6786508747670293, - "grad_norm": 3.2505175838608045, - "learning_rate": 9.887776609677962e-07, - "loss": 0.9244, - "step": 5644 - }, - { - "epoch": 0.6787711176576685, - "grad_norm": 1.731104473511541, - "learning_rate": 9.88105670966079e-07, - "loss": 0.9549, - "step": 5645 - }, - { - "epoch": 0.6788913605483076, - "grad_norm": 2.0280493535539117, - "learning_rate": 9.874338344679283e-07, - "loss": 1.0171, - "step": 5646 - }, - { - "epoch": 0.6790116034389466, - "grad_norm": 1.6517934762190563, - "learning_rate": 9.86762151575259e-07, - "loss": 0.9757, - "step": 5647 - }, - { - "epoch": 0.6791318463295858, - "grad_norm": 1.347499628226206, - "learning_rate": 9.860906223899651e-07, - "loss": 1.0264, - "step": 5648 - }, - { - "epoch": 0.6792520892202248, - "grad_norm": 5.444774453331951, - "learning_rate": 9.854192470139184e-07, - "loss": 0.9845, - "step": 5649 - }, - { - "epoch": 0.6793723321108639, - "grad_norm": 1.8813107781715201, - "learning_rate": 9.847480255489645e-07, - "loss": 0.9482, - "step": 5650 - }, - { - "epoch": 0.6794925750015031, - "grad_norm": 2.2481643818387824, - "learning_rate": 9.840769580969295e-07, - "loss": 0.9194, - "step": 5651 - }, - { - "epoch": 0.6796128178921421, - "grad_norm": 2.0534522691357235, - "learning_rate": 9.834060447596114e-07, - "loss": 1.0246, - "step": 5652 - }, - { - "epoch": 0.6797330607827812, - "grad_norm": 1.7559670488485386, - "learning_rate": 9.827352856387868e-07, - "loss": 1.0069, - "step": 5653 - }, - { - "epoch": 0.6798533036734203, - "grad_norm": 1.2544692727374276, - "learning_rate": 9.820646808362118e-07, - "loss": 0.8865, - "step": 5654 - }, - { - "epoch": 0.6799735465640594, - "grad_norm": 2.185383453941042, - "learning_rate": 9.813942304536154e-07, - "loss": 0.9468, - "step": 5655 - }, - { - "epoch": 0.6800937894546984, - "grad_norm": 2.383726651297902, - "learning_rate": 9.807239345927043e-07, - "loss": 0.867, - "step": 5656 - }, - { - "epoch": 0.6802140323453376, - "grad_norm": 2.085837029598475, - "learning_rate": 9.80053793355162e-07, - "loss": 0.9568, - "step": 5657 - }, - { - "epoch": 0.6803342752359767, - "grad_norm": 2.0955565899958417, - "learning_rate": 9.793838068426472e-07, - "loss": 0.977, - "step": 5658 - }, - { - "epoch": 0.6804545181266157, - "grad_norm": 2.3013725621927077, - "learning_rate": 9.78713975156799e-07, - "loss": 0.8327, - "step": 5659 - }, - { - "epoch": 0.6805747610172549, - "grad_norm": 1.8503755394496026, - "learning_rate": 9.780442983992273e-07, - "loss": 0.951, - "step": 5660 - }, - { - "epoch": 0.680695003907894, - "grad_norm": 1.7054873540180755, - "learning_rate": 9.773747766715238e-07, - "loss": 0.9423, - "step": 5661 - }, - { - "epoch": 0.680815246798533, - "grad_norm": 2.2837861477601558, - "learning_rate": 9.767054100752536e-07, - "loss": 1.0334, - "step": 5662 - }, - { - "epoch": 0.6809354896891722, - "grad_norm": 2.009401872375108, - "learning_rate": 9.760361987119584e-07, - "loss": 1.0409, - "step": 5663 - }, - { - "epoch": 0.6810557325798112, - "grad_norm": 2.5510618280434696, - "learning_rate": 9.753671426831592e-07, - "loss": 0.9091, - "step": 5664 - }, - { - "epoch": 0.6811759754704503, - "grad_norm": 1.998196146023252, - "learning_rate": 9.746982420903483e-07, - "loss": 1.0188, - "step": 5665 - }, - { - "epoch": 0.6812962183610894, - "grad_norm": 1.4075876443789481, - "learning_rate": 9.740294970349993e-07, - "loss": 0.9756, - "step": 5666 - }, - { - "epoch": 0.6814164612517285, - "grad_norm": 0.9813596166139437, - "learning_rate": 9.733609076185594e-07, - "loss": 0.9105, - "step": 5667 - }, - { - "epoch": 0.6815367041423676, - "grad_norm": 1.861339525025753, - "learning_rate": 9.72692473942455e-07, - "loss": 1.0697, - "step": 5668 - }, - { - "epoch": 0.6816569470330067, - "grad_norm": 1.5640914441764107, - "learning_rate": 9.720241961080849e-07, - "loss": 1.0066, - "step": 5669 - }, - { - "epoch": 0.6817771899236458, - "grad_norm": 2.277559927728742, - "learning_rate": 9.713560742168259e-07, - "loss": 0.9456, - "step": 5670 - }, - { - "epoch": 0.6818974328142848, - "grad_norm": 3.0591067764487, - "learning_rate": 9.706881083700333e-07, - "loss": 0.9407, - "step": 5671 - }, - { - "epoch": 0.682017675704924, - "grad_norm": 1.8974863189083435, - "learning_rate": 9.700202986690357e-07, - "loss": 1.0563, - "step": 5672 - }, - { - "epoch": 0.682137918595563, - "grad_norm": 2.027740851230086, - "learning_rate": 9.693526452151413e-07, - "loss": 0.8885, - "step": 5673 - }, - { - "epoch": 0.6822581614862021, - "grad_norm": 1.6858780448265522, - "learning_rate": 9.686851481096305e-07, - "loss": 0.9916, - "step": 5674 - }, - { - "epoch": 0.6823784043768413, - "grad_norm": 1.8510831059368564, - "learning_rate": 9.68017807453762e-07, - "loss": 0.9589, - "step": 5675 - }, - { - "epoch": 0.6824986472674803, - "grad_norm": 2.419303562584859, - "learning_rate": 9.673506233487721e-07, - "loss": 0.9623, - "step": 5676 - }, - { - "epoch": 0.6826188901581194, - "grad_norm": 1.920102426260522, - "learning_rate": 9.666835958958717e-07, - "loss": 1.0952, - "step": 5677 - }, - { - "epoch": 0.6827391330487584, - "grad_norm": 1.8701081357891254, - "learning_rate": 9.660167251962484e-07, - "loss": 1.0199, - "step": 5678 - }, - { - "epoch": 0.6828593759393976, - "grad_norm": 1.5177296448701272, - "learning_rate": 9.653500113510654e-07, - "loss": 1.0065, - "step": 5679 - }, - { - "epoch": 0.6829796188300367, - "grad_norm": 2.641968876091527, - "learning_rate": 9.646834544614627e-07, - "loss": 0.8981, - "step": 5680 - }, - { - "epoch": 0.6830998617206757, - "grad_norm": 2.9944689944489022, - "learning_rate": 9.64017054628558e-07, - "loss": 0.9906, - "step": 5681 - }, - { - "epoch": 0.6832201046113149, - "grad_norm": 1.5773131293413345, - "learning_rate": 9.63350811953441e-07, - "loss": 1.0202, - "step": 5682 - }, - { - "epoch": 0.6833403475019539, - "grad_norm": 2.415378545153521, - "learning_rate": 9.626847265371826e-07, - "loss": 0.9375, - "step": 5683 - }, - { - "epoch": 0.683460590392593, - "grad_norm": 2.01505017031172, - "learning_rate": 9.620187984808262e-07, - "loss": 1.0141, - "step": 5684 - }, - { - "epoch": 0.6835808332832322, - "grad_norm": 1.7824326320292665, - "learning_rate": 9.613530278853919e-07, - "loss": 1.0859, - "step": 5685 - }, - { - "epoch": 0.6837010761738712, - "grad_norm": 1.9173156496478407, - "learning_rate": 9.60687414851879e-07, - "loss": 0.9721, - "step": 5686 - }, - { - "epoch": 0.6838213190645103, - "grad_norm": 2.951513587461561, - "learning_rate": 9.600219594812575e-07, - "loss": 0.9987, - "step": 5687 - }, - { - "epoch": 0.6839415619551494, - "grad_norm": 1.8177682671781323, - "learning_rate": 9.593566618744786e-07, - "loss": 0.9569, - "step": 5688 - }, - { - "epoch": 0.6840618048457885, - "grad_norm": 1.6760111475462265, - "learning_rate": 9.58691522132466e-07, - "loss": 0.9643, - "step": 5689 - }, - { - "epoch": 0.6841820477364275, - "grad_norm": 1.977342052300733, - "learning_rate": 9.58026540356123e-07, - "loss": 1.0728, - "step": 5690 - }, - { - "epoch": 0.6843022906270667, - "grad_norm": 2.533076910376117, - "learning_rate": 9.573617166463246e-07, - "loss": 1.091, - "step": 5691 - }, - { - "epoch": 0.6844225335177058, - "grad_norm": 1.8036660610661606, - "learning_rate": 9.56697051103924e-07, - "loss": 0.8331, - "step": 5692 - }, - { - "epoch": 0.6845427764083448, - "grad_norm": 1.867376533420341, - "learning_rate": 9.560325438297522e-07, - "loss": 1.0385, - "step": 5693 - }, - { - "epoch": 0.684663019298984, - "grad_norm": 3.6185520309032455, - "learning_rate": 9.553681949246127e-07, - "loss": 1.1022, - "step": 5694 - }, - { - "epoch": 0.684783262189623, - "grad_norm": 2.0170010150307336, - "learning_rate": 9.547040044892886e-07, - "loss": 0.9858, - "step": 5695 - }, - { - "epoch": 0.6849035050802621, - "grad_norm": 0.946516704440133, - "learning_rate": 9.540399726245354e-07, - "loss": 0.8721, - "step": 5696 - }, - { - "epoch": 0.6850237479709013, - "grad_norm": 1.7468570005414228, - "learning_rate": 9.533760994310859e-07, - "loss": 0.9194, - "step": 5697 - }, - { - "epoch": 0.6851439908615403, - "grad_norm": 2.0970046524920356, - "learning_rate": 9.527123850096508e-07, - "loss": 0.9792, - "step": 5698 - }, - { - "epoch": 0.6852642337521794, - "grad_norm": 1.8001478823207488, - "learning_rate": 9.520488294609142e-07, - "loss": 0.9441, - "step": 5699 - }, - { - "epoch": 0.6853844766428185, - "grad_norm": 0.9903525859391282, - "learning_rate": 9.513854328855368e-07, - "loss": 0.8295, - "step": 5700 - }, - { - "epoch": 0.6855047195334576, - "grad_norm": 1.9224279585139905, - "learning_rate": 9.507221953841558e-07, - "loss": 1.0381, - "step": 5701 - }, - { - "epoch": 0.6856249624240967, - "grad_norm": 1.8017330012900419, - "learning_rate": 9.500591170573824e-07, - "loss": 1.0068, - "step": 5702 - }, - { - "epoch": 0.6857452053147358, - "grad_norm": 2.293937413513433, - "learning_rate": 9.493961980058078e-07, - "loss": 0.9716, - "step": 5703 - }, - { - "epoch": 0.6858654482053749, - "grad_norm": 2.7372697602737355, - "learning_rate": 9.48733438329993e-07, - "loss": 0.9087, - "step": 5704 - }, - { - "epoch": 0.6859856910960139, - "grad_norm": 1.8866913282087858, - "learning_rate": 9.480708381304807e-07, - "loss": 0.968, - "step": 5705 - }, - { - "epoch": 0.6861059339866531, - "grad_norm": 2.0766890304283794, - "learning_rate": 9.474083975077858e-07, - "loss": 1.0678, - "step": 5706 - }, - { - "epoch": 0.6862261768772921, - "grad_norm": 5.779226247878683, - "learning_rate": 9.467461165623994e-07, - "loss": 1.0333, - "step": 5707 - }, - { - "epoch": 0.6863464197679312, - "grad_norm": 2.0708980092234657, - "learning_rate": 9.46083995394791e-07, - "loss": 1.0196, - "step": 5708 - }, - { - "epoch": 0.6864666626585703, - "grad_norm": 1.7148361727258206, - "learning_rate": 9.454220341054012e-07, - "loss": 0.8607, - "step": 5709 - }, - { - "epoch": 0.6865869055492094, - "grad_norm": 2.375956493313309, - "learning_rate": 9.447602327946512e-07, - "loss": 1.0363, - "step": 5710 - }, - { - "epoch": 0.6867071484398485, - "grad_norm": 2.11396347005675, - "learning_rate": 9.440985915629338e-07, - "loss": 0.9941, - "step": 5711 - }, - { - "epoch": 0.6868273913304875, - "grad_norm": 1.7305347438654162, - "learning_rate": 9.434371105106223e-07, - "loss": 0.95, - "step": 5712 - }, - { - "epoch": 0.6869476342211267, - "grad_norm": 2.177285241311687, - "learning_rate": 9.427757897380602e-07, - "loss": 0.9445, - "step": 5713 - }, - { - "epoch": 0.6870678771117658, - "grad_norm": 3.0039279971681094, - "learning_rate": 9.421146293455695e-07, - "loss": 1.0842, - "step": 5714 - }, - { - "epoch": 0.6871881200024048, - "grad_norm": 1.8335202150751857, - "learning_rate": 9.414536294334489e-07, - "loss": 0.9126, - "step": 5715 - }, - { - "epoch": 0.687308362893044, - "grad_norm": 1.758224371235285, - "learning_rate": 9.407927901019708e-07, - "loss": 0.9316, - "step": 5716 - }, - { - "epoch": 0.687428605783683, - "grad_norm": 2.0293977949144573, - "learning_rate": 9.401321114513854e-07, - "loss": 0.9996, - "step": 5717 - }, - { - "epoch": 0.6875488486743221, - "grad_norm": 4.755235697683906, - "learning_rate": 9.394715935819155e-07, - "loss": 0.9836, - "step": 5718 - }, - { - "epoch": 0.6876690915649613, - "grad_norm": 1.8632028751164387, - "learning_rate": 9.388112365937608e-07, - "loss": 0.8509, - "step": 5719 - }, - { - "epoch": 0.6877893344556003, - "grad_norm": 1.9426600132337135, - "learning_rate": 9.381510405870985e-07, - "loss": 1.05, - "step": 5720 - }, - { - "epoch": 0.6879095773462394, - "grad_norm": 2.828865080889158, - "learning_rate": 9.374910056620791e-07, - "loss": 1.0015, - "step": 5721 - }, - { - "epoch": 0.6880298202368785, - "grad_norm": 2.2168087055714047, - "learning_rate": 9.368311319188293e-07, - "loss": 1.0427, - "step": 5722 - }, - { - "epoch": 0.6881500631275176, - "grad_norm": 9.54357553458739, - "learning_rate": 9.361714194574515e-07, - "loss": 1.0299, - "step": 5723 - }, - { - "epoch": 0.6882703060181566, - "grad_norm": 0.7661159264912109, - "learning_rate": 9.355118683780228e-07, - "loss": 0.829, - "step": 5724 - }, - { - "epoch": 0.6883905489087958, - "grad_norm": 2.2799318241500592, - "learning_rate": 9.348524787805987e-07, - "loss": 1.0242, - "step": 5725 - }, - { - "epoch": 0.6885107917994349, - "grad_norm": 7.851567115592549, - "learning_rate": 9.341932507652053e-07, - "loss": 1.078, - "step": 5726 - }, - { - "epoch": 0.6886310346900739, - "grad_norm": 2.0954717133273313, - "learning_rate": 9.335341844318489e-07, - "loss": 1.013, - "step": 5727 - }, - { - "epoch": 0.6887512775807131, - "grad_norm": 2.1128404196305177, - "learning_rate": 9.328752798805091e-07, - "loss": 0.9629, - "step": 5728 - }, - { - "epoch": 0.6888715204713521, - "grad_norm": 2.1669159403482636, - "learning_rate": 9.322165372111399e-07, - "loss": 0.9824, - "step": 5729 - }, - { - "epoch": 0.6889917633619912, - "grad_norm": 2.0410401557287803, - "learning_rate": 9.315579565236747e-07, - "loss": 0.9844, - "step": 5730 - }, - { - "epoch": 0.6891120062526304, - "grad_norm": 1.8857332740071253, - "learning_rate": 9.308995379180162e-07, - "loss": 0.9736, - "step": 5731 - }, - { - "epoch": 0.6892322491432694, - "grad_norm": 0.8541742921504352, - "learning_rate": 9.302412814940488e-07, - "loss": 0.8678, - "step": 5732 - }, - { - "epoch": 0.6893524920339085, - "grad_norm": 2.047217769896889, - "learning_rate": 9.295831873516276e-07, - "loss": 0.9327, - "step": 5733 - }, - { - "epoch": 0.6894727349245476, - "grad_norm": 1.4776595142833067, - "learning_rate": 9.289252555905873e-07, - "loss": 0.9931, - "step": 5734 - }, - { - "epoch": 0.6895929778151867, - "grad_norm": 1.9826185006783776, - "learning_rate": 9.282674863107334e-07, - "loss": 0.9886, - "step": 5735 - }, - { - "epoch": 0.6897132207058257, - "grad_norm": 1.9659994378229775, - "learning_rate": 9.276098796118488e-07, - "loss": 0.9939, - "step": 5736 - }, - { - "epoch": 0.6898334635964649, - "grad_norm": 2.018924348937671, - "learning_rate": 9.269524355936938e-07, - "loss": 0.8959, - "step": 5737 - }, - { - "epoch": 0.689953706487104, - "grad_norm": 2.625347205315101, - "learning_rate": 9.262951543560002e-07, - "loss": 1.0775, - "step": 5738 - }, - { - "epoch": 0.690073949377743, - "grad_norm": 2.3036840290925307, - "learning_rate": 9.256380359984795e-07, - "loss": 1.0906, - "step": 5739 - }, - { - "epoch": 0.6901941922683821, - "grad_norm": 1.8326545094382507, - "learning_rate": 9.249810806208139e-07, - "loss": 0.9707, - "step": 5740 - }, - { - "epoch": 0.6903144351590212, - "grad_norm": 2.341972389038886, - "learning_rate": 9.243242883226627e-07, - "loss": 1.0418, - "step": 5741 - }, - { - "epoch": 0.6904346780496603, - "grad_norm": 1.931946763945972, - "learning_rate": 9.236676592036628e-07, - "loss": 0.9254, - "step": 5742 - }, - { - "epoch": 0.6905549209402994, - "grad_norm": 1.748280497874774, - "learning_rate": 9.230111933634228e-07, - "loss": 0.9591, - "step": 5743 - }, - { - "epoch": 0.6906751638309385, - "grad_norm": 1.399716862930307, - "learning_rate": 9.223548909015288e-07, - "loss": 1.0345, - "step": 5744 - }, - { - "epoch": 0.6907954067215776, - "grad_norm": 1.9833695274811736, - "learning_rate": 9.216987519175407e-07, - "loss": 0.9406, - "step": 5745 - }, - { - "epoch": 0.6909156496122166, - "grad_norm": 8.279878092435053, - "learning_rate": 9.210427765109942e-07, - "loss": 0.922, - "step": 5746 - }, - { - "epoch": 0.6910358925028558, - "grad_norm": 2.006496606804426, - "learning_rate": 9.20386964781402e-07, - "loss": 1.0425, - "step": 5747 - }, - { - "epoch": 0.6911561353934949, - "grad_norm": 2.0272449623691684, - "learning_rate": 9.197313168282472e-07, - "loss": 1.0682, - "step": 5748 - }, - { - "epoch": 0.6912763782841339, - "grad_norm": 2.319901760750854, - "learning_rate": 9.190758327509935e-07, - "loss": 0.9485, - "step": 5749 - }, - { - "epoch": 0.6913966211747731, - "grad_norm": 0.8933482447549264, - "learning_rate": 9.184205126490767e-07, - "loss": 0.9024, - "step": 5750 - }, - { - "epoch": 0.6915168640654121, - "grad_norm": 0.9566012915316825, - "learning_rate": 9.177653566219075e-07, - "loss": 0.87, - "step": 5751 - }, - { - "epoch": 0.6916371069560512, - "grad_norm": 2.4643395538451083, - "learning_rate": 9.171103647688744e-07, - "loss": 0.9851, - "step": 5752 - }, - { - "epoch": 0.6917573498466904, - "grad_norm": 2.574453710735915, - "learning_rate": 9.164555371893367e-07, - "loss": 0.9202, - "step": 5753 - }, - { - "epoch": 0.6918775927373294, - "grad_norm": 2.052828567913537, - "learning_rate": 9.158008739826333e-07, - "loss": 0.9825, - "step": 5754 - }, - { - "epoch": 0.6919978356279685, - "grad_norm": 1.8655486635983693, - "learning_rate": 9.151463752480744e-07, - "loss": 1.09, - "step": 5755 - }, - { - "epoch": 0.6921180785186076, - "grad_norm": 1.6566176816093174, - "learning_rate": 9.144920410849493e-07, - "loss": 1.0284, - "step": 5756 - }, - { - "epoch": 0.6922383214092467, - "grad_norm": 2.3447641848684064, - "learning_rate": 9.138378715925176e-07, - "loss": 1.0324, - "step": 5757 - }, - { - "epoch": 0.6923585642998857, - "grad_norm": 1.8275168948038365, - "learning_rate": 9.131838668700167e-07, - "loss": 1.0366, - "step": 5758 - }, - { - "epoch": 0.6924788071905249, - "grad_norm": 1.9146958529422609, - "learning_rate": 9.125300270166598e-07, - "loss": 1.0979, - "step": 5759 - }, - { - "epoch": 0.692599050081164, - "grad_norm": 1.701438698936942, - "learning_rate": 9.118763521316324e-07, - "loss": 1.0953, - "step": 5760 - }, - { - "epoch": 0.692719292971803, - "grad_norm": 1.6800418213906771, - "learning_rate": 9.112228423140987e-07, - "loss": 0.9904, - "step": 5761 - }, - { - "epoch": 0.6928395358624422, - "grad_norm": 4.518099941490762, - "learning_rate": 9.105694976631932e-07, - "loss": 1.0902, - "step": 5762 - }, - { - "epoch": 0.6929597787530812, - "grad_norm": 2.654009423820593, - "learning_rate": 9.099163182780283e-07, - "loss": 0.9549, - "step": 5763 - }, - { - "epoch": 0.6930800216437203, - "grad_norm": 3.536725820201888, - "learning_rate": 9.092633042576916e-07, - "loss": 0.7318, - "step": 5764 - }, - { - "epoch": 0.6932002645343595, - "grad_norm": 2.0073056304820747, - "learning_rate": 9.086104557012446e-07, - "loss": 0.801, - "step": 5765 - }, - { - "epoch": 0.6933205074249985, - "grad_norm": 2.307236118954009, - "learning_rate": 9.079577727077239e-07, - "loss": 0.8875, - "step": 5766 - }, - { - "epoch": 0.6934407503156376, - "grad_norm": 1.9963828995333215, - "learning_rate": 9.073052553761404e-07, - "loss": 0.9523, - "step": 5767 - }, - { - "epoch": 0.6935609932062767, - "grad_norm": 1.7144853569333258, - "learning_rate": 9.066529038054805e-07, - "loss": 1.0007, - "step": 5768 - }, - { - "epoch": 0.6936812360969158, - "grad_norm": 1.599053702062782, - "learning_rate": 9.060007180947071e-07, - "loss": 0.9685, - "step": 5769 - }, - { - "epoch": 0.6938014789875548, - "grad_norm": 2.008218805785769, - "learning_rate": 9.053486983427534e-07, - "loss": 0.9643, - "step": 5770 - }, - { - "epoch": 0.6939217218781939, - "grad_norm": 1.8908083487897518, - "learning_rate": 9.046968446485326e-07, - "loss": 0.9335, - "step": 5771 - }, - { - "epoch": 0.6940419647688331, - "grad_norm": 2.538037653746677, - "learning_rate": 9.040451571109295e-07, - "loss": 0.9281, - "step": 5772 - }, - { - "epoch": 0.6941622076594721, - "grad_norm": 0.9489076834657929, - "learning_rate": 9.033936358288042e-07, - "loss": 0.8811, - "step": 5773 - }, - { - "epoch": 0.6942824505501112, - "grad_norm": 2.0489433263459573, - "learning_rate": 9.027422809009937e-07, - "loss": 1.0532, - "step": 5774 - }, - { - "epoch": 0.6944026934407503, - "grad_norm": 1.9214101089546742, - "learning_rate": 9.020910924263054e-07, - "loss": 1.0596, - "step": 5775 - }, - { - "epoch": 0.6945229363313894, - "grad_norm": 0.8621272663840533, - "learning_rate": 9.014400705035261e-07, - "loss": 0.8548, - "step": 5776 - }, - { - "epoch": 0.6946431792220285, - "grad_norm": 1.9086469994954178, - "learning_rate": 9.00789215231414e-07, - "loss": 0.9975, - "step": 5777 - }, - { - "epoch": 0.6947634221126676, - "grad_norm": 1.6152943019982058, - "learning_rate": 9.001385267087056e-07, - "loss": 1.0511, - "step": 5778 - }, - { - "epoch": 0.6948836650033067, - "grad_norm": 3.9575874562820585, - "learning_rate": 8.994880050341072e-07, - "loss": 0.9331, - "step": 5779 - }, - { - "epoch": 0.6950039078939457, - "grad_norm": 11.21566313457835, - "learning_rate": 8.988376503063026e-07, - "loss": 1.007, - "step": 5780 - }, - { - "epoch": 0.6951241507845849, - "grad_norm": 1.9270754908496635, - "learning_rate": 8.981874626239521e-07, - "loss": 1.0476, - "step": 5781 - }, - { - "epoch": 0.695244393675224, - "grad_norm": 3.3273078022627445, - "learning_rate": 8.975374420856872e-07, - "loss": 1.1144, - "step": 5782 - }, - { - "epoch": 0.695364636565863, - "grad_norm": 2.018442949026938, - "learning_rate": 8.968875887901157e-07, - "loss": 0.9528, - "step": 5783 - }, - { - "epoch": 0.6954848794565022, - "grad_norm": 6.268302060189301, - "learning_rate": 8.9623790283582e-07, - "loss": 0.8592, - "step": 5784 - }, - { - "epoch": 0.6956051223471412, - "grad_norm": 2.0711228422224703, - "learning_rate": 8.955883843213561e-07, - "loss": 0.99, - "step": 5785 - }, - { - "epoch": 0.6957253652377803, - "grad_norm": 8.842841543170227, - "learning_rate": 8.949390333452569e-07, - "loss": 1.1075, - "step": 5786 - }, - { - "epoch": 0.6958456081284194, - "grad_norm": 2.0215749526808646, - "learning_rate": 8.942898500060279e-07, - "loss": 0.9068, - "step": 5787 - }, - { - "epoch": 0.6959658510190585, - "grad_norm": 2.649584676477562, - "learning_rate": 8.936408344021493e-07, - "loss": 0.9528, - "step": 5788 - }, - { - "epoch": 0.6960860939096976, - "grad_norm": 2.3432934125572036, - "learning_rate": 8.929919866320765e-07, - "loss": 0.9418, - "step": 5789 - }, - { - "epoch": 0.6962063368003367, - "grad_norm": 1.8087367748944878, - "learning_rate": 8.923433067942385e-07, - "loss": 1.0391, - "step": 5790 - }, - { - "epoch": 0.6963265796909758, - "grad_norm": 1.8053722981432263, - "learning_rate": 8.916947949870417e-07, - "loss": 0.9184, - "step": 5791 - }, - { - "epoch": 0.6964468225816148, - "grad_norm": 1.035111451529154, - "learning_rate": 8.910464513088615e-07, - "loss": 0.8642, - "step": 5792 - }, - { - "epoch": 0.696567065472254, - "grad_norm": 2.1939748578156713, - "learning_rate": 8.903982758580542e-07, - "loss": 1.0195, - "step": 5793 - }, - { - "epoch": 0.696687308362893, - "grad_norm": 2.073583243335318, - "learning_rate": 8.897502687329457e-07, - "loss": 1.0357, - "step": 5794 - }, - { - "epoch": 0.6968075512535321, - "grad_norm": 1.9611303834925762, - "learning_rate": 8.891024300318382e-07, - "loss": 1.0288, - "step": 5795 - }, - { - "epoch": 0.6969277941441713, - "grad_norm": 1.860356881041446, - "learning_rate": 8.884547598530103e-07, - "loss": 0.9829, - "step": 5796 - }, - { - "epoch": 0.6970480370348103, - "grad_norm": 1.9890388289584113, - "learning_rate": 8.8780725829471e-07, - "loss": 0.9794, - "step": 5797 - }, - { - "epoch": 0.6971682799254494, - "grad_norm": 1.9579324988437896, - "learning_rate": 8.87159925455165e-07, - "loss": 0.9976, - "step": 5798 - }, - { - "epoch": 0.6972885228160886, - "grad_norm": 4.450467143024167, - "learning_rate": 8.865127614325738e-07, - "loss": 0.9633, - "step": 5799 - }, - { - "epoch": 0.6974087657067276, - "grad_norm": 1.794311751815851, - "learning_rate": 8.85865766325113e-07, - "loss": 0.8986, - "step": 5800 - }, - { - "epoch": 0.6975290085973667, - "grad_norm": 2.794272805266856, - "learning_rate": 8.852189402309287e-07, - "loss": 0.9517, - "step": 5801 - }, - { - "epoch": 0.6976492514880057, - "grad_norm": 2.37840759362793, - "learning_rate": 8.845722832481441e-07, - "loss": 0.9672, - "step": 5802 - }, - { - "epoch": 0.6977694943786449, - "grad_norm": 1.9432555817916877, - "learning_rate": 8.83925795474858e-07, - "loss": 1.0102, - "step": 5803 - }, - { - "epoch": 0.6978897372692839, - "grad_norm": 3.068805353788095, - "learning_rate": 8.832794770091414e-07, - "loss": 0.8396, - "step": 5804 - }, - { - "epoch": 0.698009980159923, - "grad_norm": 3.9219013836341756, - "learning_rate": 8.826333279490401e-07, - "loss": 1.0493, - "step": 5805 - }, - { - "epoch": 0.6981302230505622, - "grad_norm": 3.3204444769267, - "learning_rate": 8.819873483925748e-07, - "loss": 0.9091, - "step": 5806 - }, - { - "epoch": 0.6982504659412012, - "grad_norm": 1.8743943825813811, - "learning_rate": 8.81341538437739e-07, - "loss": 0.9819, - "step": 5807 - }, - { - "epoch": 0.6983707088318403, - "grad_norm": 1.7244547410635167, - "learning_rate": 8.80695898182503e-07, - "loss": 0.9116, - "step": 5808 - }, - { - "epoch": 0.6984909517224794, - "grad_norm": 1.0986533743735556, - "learning_rate": 8.800504277248093e-07, - "loss": 0.9193, - "step": 5809 - }, - { - "epoch": 0.6986111946131185, - "grad_norm": 1.8201426152535576, - "learning_rate": 8.794051271625753e-07, - "loss": 0.9833, - "step": 5810 - }, - { - "epoch": 0.6987314375037575, - "grad_norm": 1.499858690599337, - "learning_rate": 8.787599965936925e-07, - "loss": 1.0603, - "step": 5811 - }, - { - "epoch": 0.6988516803943967, - "grad_norm": 1.817077906314076, - "learning_rate": 8.781150361160261e-07, - "loss": 0.9508, - "step": 5812 - }, - { - "epoch": 0.6989719232850358, - "grad_norm": 2.3927728316820307, - "learning_rate": 8.774702458274181e-07, - "loss": 0.9604, - "step": 5813 - }, - { - "epoch": 0.6990921661756748, - "grad_norm": 2.564804157714901, - "learning_rate": 8.768256258256799e-07, - "loss": 0.93, - "step": 5814 - }, - { - "epoch": 0.699212409066314, - "grad_norm": 1.7421349487589932, - "learning_rate": 8.76181176208602e-07, - "loss": 0.9746, - "step": 5815 - }, - { - "epoch": 0.699332651956953, - "grad_norm": 1.7217907450532357, - "learning_rate": 8.755368970739461e-07, - "loss": 0.9606, - "step": 5816 - }, - { - "epoch": 0.6994528948475921, - "grad_norm": 7.317367368381706, - "learning_rate": 8.748927885194479e-07, - "loss": 0.8445, - "step": 5817 - }, - { - "epoch": 0.6995731377382313, - "grad_norm": 0.7431396801190746, - "learning_rate": 8.742488506428209e-07, - "loss": 0.8167, - "step": 5818 - }, - { - "epoch": 0.6996933806288703, - "grad_norm": 1.7754456162198196, - "learning_rate": 8.736050835417466e-07, - "loss": 1.0156, - "step": 5819 - }, - { - "epoch": 0.6998136235195094, - "grad_norm": 2.954068795014549, - "learning_rate": 8.729614873138862e-07, - "loss": 0.8501, - "step": 5820 - }, - { - "epoch": 0.6999338664101485, - "grad_norm": 2.068451819006493, - "learning_rate": 8.723180620568716e-07, - "loss": 1.0095, - "step": 5821 - }, - { - "epoch": 0.7000541093007876, - "grad_norm": 1.6653781042278368, - "learning_rate": 8.716748078683116e-07, - "loss": 1.0904, - "step": 5822 - }, - { - "epoch": 0.7001743521914267, - "grad_norm": 2.0606641980076916, - "learning_rate": 8.710317248457855e-07, - "loss": 0.9154, - "step": 5823 - }, - { - "epoch": 0.7002945950820658, - "grad_norm": 2.14646771383718, - "learning_rate": 8.703888130868482e-07, - "loss": 0.9474, - "step": 5824 - }, - { - "epoch": 0.7004148379727049, - "grad_norm": 1.9281159461008242, - "learning_rate": 8.697460726890307e-07, - "loss": 1.0481, - "step": 5825 - }, - { - "epoch": 0.7005350808633439, - "grad_norm": 1.9950758397116566, - "learning_rate": 8.691035037498354e-07, - "loss": 1.1325, - "step": 5826 - }, - { - "epoch": 0.7006553237539831, - "grad_norm": 7.120419016734686, - "learning_rate": 8.684611063667391e-07, - "loss": 0.9568, - "step": 5827 - }, - { - "epoch": 0.7007755666446221, - "grad_norm": 1.8660619687240196, - "learning_rate": 8.678188806371935e-07, - "loss": 0.9908, - "step": 5828 - }, - { - "epoch": 0.7008958095352612, - "grad_norm": 1.6360088256327403, - "learning_rate": 8.671768266586228e-07, - "loss": 1.0806, - "step": 5829 - }, - { - "epoch": 0.7010160524259004, - "grad_norm": 1.674693361369256, - "learning_rate": 8.665349445284275e-07, - "loss": 1.0109, - "step": 5830 - }, - { - "epoch": 0.7011362953165394, - "grad_norm": 4.3385619872473296, - "learning_rate": 8.658932343439799e-07, - "loss": 1.039, - "step": 5831 - }, - { - "epoch": 0.7012565382071785, - "grad_norm": 2.704399910748834, - "learning_rate": 8.65251696202627e-07, - "loss": 0.9988, - "step": 5832 - }, - { - "epoch": 0.7013767810978175, - "grad_norm": 2.54331431113846, - "learning_rate": 8.646103302016896e-07, - "loss": 1.1014, - "step": 5833 - }, - { - "epoch": 0.7014970239884567, - "grad_norm": 2.150112317722216, - "learning_rate": 8.639691364384614e-07, - "loss": 1.119, - "step": 5834 - }, - { - "epoch": 0.7016172668790958, - "grad_norm": 1.9877493695898716, - "learning_rate": 8.633281150102136e-07, - "loss": 0.9568, - "step": 5835 - }, - { - "epoch": 0.7017375097697348, - "grad_norm": 5.150601709269442, - "learning_rate": 8.626872660141855e-07, - "loss": 0.91, - "step": 5836 - }, - { - "epoch": 0.701857752660374, - "grad_norm": 1.6940884016316045, - "learning_rate": 8.620465895475957e-07, - "loss": 0.9738, - "step": 5837 - }, - { - "epoch": 0.701977995551013, - "grad_norm": 1.4504202003658784, - "learning_rate": 8.614060857076333e-07, - "loss": 0.9833, - "step": 5838 - }, - { - "epoch": 0.7020982384416521, - "grad_norm": 1.870682641341577, - "learning_rate": 8.60765754591462e-07, - "loss": 0.9763, - "step": 5839 - }, - { - "epoch": 0.7022184813322913, - "grad_norm": 2.062331915277454, - "learning_rate": 8.601255962962211e-07, - "loss": 0.9584, - "step": 5840 - }, - { - "epoch": 0.7023387242229303, - "grad_norm": 4.953765731428465, - "learning_rate": 8.594856109190194e-07, - "loss": 0.9484, - "step": 5841 - }, - { - "epoch": 0.7024589671135694, - "grad_norm": 1.6853752181430657, - "learning_rate": 8.588457985569446e-07, - "loss": 0.9253, - "step": 5842 - }, - { - "epoch": 0.7025792100042085, - "grad_norm": 2.3117665339072846, - "learning_rate": 8.582061593070542e-07, - "loss": 0.9382, - "step": 5843 - }, - { - "epoch": 0.7026994528948476, - "grad_norm": 2.0278197448196593, - "learning_rate": 8.57566693266383e-07, - "loss": 0.9939, - "step": 5844 - }, - { - "epoch": 0.7028196957854866, - "grad_norm": 6.662798458664441, - "learning_rate": 8.569274005319354e-07, - "loss": 0.9254, - "step": 5845 - }, - { - "epoch": 0.7029399386761258, - "grad_norm": 1.7914366315685122, - "learning_rate": 8.562882812006913e-07, - "loss": 1.0324, - "step": 5846 - }, - { - "epoch": 0.7030601815667649, - "grad_norm": 1.9600226103198626, - "learning_rate": 8.556493353696066e-07, - "loss": 1.0013, - "step": 5847 - }, - { - "epoch": 0.7031804244574039, - "grad_norm": 2.9117427316245603, - "learning_rate": 8.550105631356077e-07, - "loss": 0.9148, - "step": 5848 - }, - { - "epoch": 0.7033006673480431, - "grad_norm": 1.9020601096377183, - "learning_rate": 8.543719645955961e-07, - "loss": 0.9919, - "step": 5849 - }, - { - "epoch": 0.7034209102386821, - "grad_norm": 1.4999756811077956, - "learning_rate": 8.537335398464467e-07, - "loss": 0.972, - "step": 5850 - }, - { - "epoch": 0.7035411531293212, - "grad_norm": 2.6634599558013305, - "learning_rate": 8.53095288985007e-07, - "loss": 1.088, - "step": 5851 - }, - { - "epoch": 0.7036613960199604, - "grad_norm": 1.8993212993495445, - "learning_rate": 8.524572121081009e-07, - "loss": 1.0512, - "step": 5852 - }, - { - "epoch": 0.7037816389105994, - "grad_norm": 2.367157183663905, - "learning_rate": 8.518193093125232e-07, - "loss": 0.8532, - "step": 5853 - }, - { - "epoch": 0.7039018818012385, - "grad_norm": 1.5168606155524509, - "learning_rate": 8.511815806950436e-07, - "loss": 1.0307, - "step": 5854 - }, - { - "epoch": 0.7040221246918776, - "grad_norm": 1.7356257696977666, - "learning_rate": 8.505440263524044e-07, - "loss": 1.0042, - "step": 5855 - }, - { - "epoch": 0.7041423675825167, - "grad_norm": 2.798626910455079, - "learning_rate": 8.49906646381322e-07, - "loss": 1.1076, - "step": 5856 - }, - { - "epoch": 0.7042626104731557, - "grad_norm": 1.7370606669722815, - "learning_rate": 8.492694408784884e-07, - "loss": 0.9456, - "step": 5857 - }, - { - "epoch": 0.7043828533637949, - "grad_norm": 2.4500257023611454, - "learning_rate": 8.486324099405642e-07, - "loss": 0.8524, - "step": 5858 - }, - { - "epoch": 0.704503096254434, - "grad_norm": 1.7891410577176443, - "learning_rate": 8.479955536641887e-07, - "loss": 0.9817, - "step": 5859 - }, - { - "epoch": 0.704623339145073, - "grad_norm": 2.0368574943153086, - "learning_rate": 8.473588721459716e-07, - "loss": 0.8883, - "step": 5860 - }, - { - "epoch": 0.7047435820357122, - "grad_norm": 2.308297436798858, - "learning_rate": 8.467223654824967e-07, - "loss": 0.9401, - "step": 5861 - }, - { - "epoch": 0.7048638249263512, - "grad_norm": 1.8174409277969654, - "learning_rate": 8.460860337703233e-07, - "loss": 0.8631, - "step": 5862 - }, - { - "epoch": 0.7049840678169903, - "grad_norm": 2.389296517845621, - "learning_rate": 8.454498771059797e-07, - "loss": 0.9398, - "step": 5863 - }, - { - "epoch": 0.7051043107076294, - "grad_norm": 2.2925123446099933, - "learning_rate": 8.448138955859725e-07, - "loss": 1.0626, - "step": 5864 - }, - { - "epoch": 0.7052245535982685, - "grad_norm": 2.042584764269545, - "learning_rate": 8.44178089306778e-07, - "loss": 1.127, - "step": 5865 - }, - { - "epoch": 0.7053447964889076, - "grad_norm": 1.8545180284287706, - "learning_rate": 8.4354245836485e-07, - "loss": 1.0053, - "step": 5866 - }, - { - "epoch": 0.7054650393795466, - "grad_norm": 1.5126360493152304, - "learning_rate": 8.429070028566108e-07, - "loss": 0.9596, - "step": 5867 - }, - { - "epoch": 0.7055852822701858, - "grad_norm": 1.9495607125826828, - "learning_rate": 8.422717228784586e-07, - "loss": 0.9821, - "step": 5868 - }, - { - "epoch": 0.7057055251608249, - "grad_norm": 5.294613876024521, - "learning_rate": 8.416366185267663e-07, - "loss": 0.9221, - "step": 5869 - }, - { - "epoch": 0.7058257680514639, - "grad_norm": 1.85398873179035, - "learning_rate": 8.410016898978778e-07, - "loss": 1.0055, - "step": 5870 - }, - { - "epoch": 0.7059460109421031, - "grad_norm": 2.363087140142991, - "learning_rate": 8.403669370881115e-07, - "loss": 1.0202, - "step": 5871 - }, - { - "epoch": 0.7060662538327421, - "grad_norm": 2.1314695273182376, - "learning_rate": 8.397323601937587e-07, - "loss": 1.0058, - "step": 5872 - }, - { - "epoch": 0.7061864967233812, - "grad_norm": 3.190100861632004, - "learning_rate": 8.390979593110838e-07, - "loss": 1.0002, - "step": 5873 - }, - { - "epoch": 0.7063067396140204, - "grad_norm": 4.235471914180925, - "learning_rate": 8.384637345363262e-07, - "loss": 1.0402, - "step": 5874 - }, - { - "epoch": 0.7064269825046594, - "grad_norm": 7.514867161473305, - "learning_rate": 8.378296859656964e-07, - "loss": 0.9974, - "step": 5875 - }, - { - "epoch": 0.7065472253952985, - "grad_norm": 2.6640305503204673, - "learning_rate": 8.371958136953792e-07, - "loss": 0.9054, - "step": 5876 - }, - { - "epoch": 0.7066674682859376, - "grad_norm": 2.8290300975828773, - "learning_rate": 8.365621178215326e-07, - "loss": 0.8916, - "step": 5877 - }, - { - "epoch": 0.7067877111765767, - "grad_norm": 1.8744382652603464, - "learning_rate": 8.359285984402871e-07, - "loss": 0.9772, - "step": 5878 - }, - { - "epoch": 0.7069079540672157, - "grad_norm": 2.236818591310191, - "learning_rate": 8.352952556477489e-07, - "loss": 0.9653, - "step": 5879 - }, - { - "epoch": 0.7070281969578549, - "grad_norm": 1.9110274561172735, - "learning_rate": 8.34662089539993e-07, - "loss": 1.0033, - "step": 5880 - }, - { - "epoch": 0.707148439848494, - "grad_norm": 2.087637676335975, - "learning_rate": 8.340291002130722e-07, - "loss": 1.0152, - "step": 5881 - }, - { - "epoch": 0.707268682739133, - "grad_norm": 2.6992577026913027, - "learning_rate": 8.3339628776301e-07, - "loss": 1.0261, - "step": 5882 - }, - { - "epoch": 0.7073889256297722, - "grad_norm": 1.7577680115898, - "learning_rate": 8.327636522858033e-07, - "loss": 0.7984, - "step": 5883 - }, - { - "epoch": 0.7075091685204112, - "grad_norm": 1.8450223121224345, - "learning_rate": 8.321311938774225e-07, - "loss": 0.9907, - "step": 5884 - }, - { - "epoch": 0.7076294114110503, - "grad_norm": 2.2004571006318643, - "learning_rate": 8.314989126338104e-07, - "loss": 1.0188, - "step": 5885 - }, - { - "epoch": 0.7077496543016895, - "grad_norm": 2.727012968697676, - "learning_rate": 8.308668086508847e-07, - "loss": 1.078, - "step": 5886 - }, - { - "epoch": 0.7078698971923285, - "grad_norm": 1.9077996380846516, - "learning_rate": 8.302348820245342e-07, - "loss": 0.9686, - "step": 5887 - }, - { - "epoch": 0.7079901400829676, - "grad_norm": 2.352724516935183, - "learning_rate": 8.296031328506232e-07, - "loss": 0.9306, - "step": 5888 - }, - { - "epoch": 0.7081103829736067, - "grad_norm": 2.199525638938186, - "learning_rate": 8.289715612249857e-07, - "loss": 0.9857, - "step": 5889 - }, - { - "epoch": 0.7082306258642458, - "grad_norm": 2.535121171000324, - "learning_rate": 8.283401672434305e-07, - "loss": 1.001, - "step": 5890 - }, - { - "epoch": 0.7083508687548848, - "grad_norm": 2.037067240995235, - "learning_rate": 8.277089510017412e-07, - "loss": 0.9379, - "step": 5891 - }, - { - "epoch": 0.708471111645524, - "grad_norm": 1.5983337607331778, - "learning_rate": 8.270779125956719e-07, - "loss": 1.0524, - "step": 5892 - }, - { - "epoch": 0.7085913545361631, - "grad_norm": 1.9666149733371507, - "learning_rate": 8.264470521209505e-07, - "loss": 1.0237, - "step": 5893 - }, - { - "epoch": 0.7087115974268021, - "grad_norm": 2.2030412373953765, - "learning_rate": 8.258163696732785e-07, - "loss": 0.991, - "step": 5894 - }, - { - "epoch": 0.7088318403174413, - "grad_norm": 2.7157721160266273, - "learning_rate": 8.251858653483288e-07, - "loss": 1.0022, - "step": 5895 - }, - { - "epoch": 0.7089520832080803, - "grad_norm": 2.0516154599817154, - "learning_rate": 8.245555392417501e-07, - "loss": 1.0864, - "step": 5896 - }, - { - "epoch": 0.7090723260987194, - "grad_norm": 1.8987331709577413, - "learning_rate": 8.239253914491613e-07, - "loss": 1.0197, - "step": 5897 - }, - { - "epoch": 0.7091925689893585, - "grad_norm": 6.5490768480951465, - "learning_rate": 8.232954220661556e-07, - "loss": 0.9813, - "step": 5898 - }, - { - "epoch": 0.7093128118799976, - "grad_norm": 2.817092028971511, - "learning_rate": 8.226656311882989e-07, - "loss": 0.9172, - "step": 5899 - }, - { - "epoch": 0.7094330547706367, - "grad_norm": 2.241925480117133, - "learning_rate": 8.22036018911129e-07, - "loss": 0.9977, - "step": 5900 - }, - { - "epoch": 0.7095532976612757, - "grad_norm": 2.0157131205377072, - "learning_rate": 8.214065853301599e-07, - "loss": 1.0296, - "step": 5901 - }, - { - "epoch": 0.7096735405519149, - "grad_norm": 0.7821831661383203, - "learning_rate": 8.207773305408734e-07, - "loss": 0.8353, - "step": 5902 - }, - { - "epoch": 0.709793783442554, - "grad_norm": 3.0210806559024164, - "learning_rate": 8.201482546387288e-07, - "loss": 1.0215, - "step": 5903 - }, - { - "epoch": 0.709914026333193, - "grad_norm": 1.517119388642549, - "learning_rate": 8.195193577191553e-07, - "loss": 1.1421, - "step": 5904 - }, - { - "epoch": 0.7100342692238322, - "grad_norm": 1.6242971367427779, - "learning_rate": 8.188906398775579e-07, - "loss": 1.0701, - "step": 5905 - }, - { - "epoch": 0.7101545121144712, - "grad_norm": 1.8188496224821586, - "learning_rate": 8.18262101209311e-07, - "loss": 0.9181, - "step": 5906 - }, - { - "epoch": 0.7102747550051103, - "grad_norm": 2.3753186062379372, - "learning_rate": 8.176337418097626e-07, - "loss": 0.9299, - "step": 5907 - }, - { - "epoch": 0.7103949978957494, - "grad_norm": 3.0814869137693, - "learning_rate": 8.170055617742364e-07, - "loss": 1.0241, - "step": 5908 - }, - { - "epoch": 0.7105152407863885, - "grad_norm": 1.6896391132270754, - "learning_rate": 8.163775611980252e-07, - "loss": 0.9442, - "step": 5909 - }, - { - "epoch": 0.7106354836770276, - "grad_norm": 1.669535345662457, - "learning_rate": 8.157497401763982e-07, - "loss": 1.0152, - "step": 5910 - }, - { - "epoch": 0.7107557265676667, - "grad_norm": 1.667120331373769, - "learning_rate": 8.151220988045935e-07, - "loss": 1.0046, - "step": 5911 - }, - { - "epoch": 0.7108759694583058, - "grad_norm": 3.185830969570546, - "learning_rate": 8.144946371778234e-07, - "loss": 1.0596, - "step": 5912 - }, - { - "epoch": 0.7109962123489448, - "grad_norm": 1.656702663534573, - "learning_rate": 8.138673553912751e-07, - "loss": 1.0084, - "step": 5913 - }, - { - "epoch": 0.711116455239584, - "grad_norm": 2.356536572582722, - "learning_rate": 8.132402535401059e-07, - "loss": 0.8074, - "step": 5914 - }, - { - "epoch": 0.711236698130223, - "grad_norm": 1.9247225623414104, - "learning_rate": 8.126133317194465e-07, - "loss": 0.9752, - "step": 5915 - }, - { - "epoch": 0.7113569410208621, - "grad_norm": 2.377924574076894, - "learning_rate": 8.11986590024401e-07, - "loss": 0.9658, - "step": 5916 - }, - { - "epoch": 0.7114771839115013, - "grad_norm": 1.5687617905618219, - "learning_rate": 8.113600285500442e-07, - "loss": 0.9077, - "step": 5917 - }, - { - "epoch": 0.7115974268021403, - "grad_norm": 1.9400778203280489, - "learning_rate": 8.107336473914268e-07, - "loss": 0.9649, - "step": 5918 - }, - { - "epoch": 0.7117176696927794, - "grad_norm": 0.8451471417317626, - "learning_rate": 8.101074466435694e-07, - "loss": 0.8376, - "step": 5919 - }, - { - "epoch": 0.7118379125834186, - "grad_norm": 1.6204940973708135, - "learning_rate": 8.094814264014662e-07, - "loss": 0.9094, - "step": 5920 - }, - { - "epoch": 0.7119581554740576, - "grad_norm": 2.9364157765744006, - "learning_rate": 8.088555867600844e-07, - "loss": 1.0537, - "step": 5921 - }, - { - "epoch": 0.7120783983646967, - "grad_norm": 1.8583913172526827, - "learning_rate": 8.08229927814362e-07, - "loss": 0.8432, - "step": 5922 - }, - { - "epoch": 0.7121986412553358, - "grad_norm": 3.7578076780926706, - "learning_rate": 8.076044496592134e-07, - "loss": 0.8733, - "step": 5923 - }, - { - "epoch": 0.7123188841459749, - "grad_norm": 2.0651518217715985, - "learning_rate": 8.069791523895204e-07, - "loss": 1.0038, - "step": 5924 - }, - { - "epoch": 0.7124391270366139, - "grad_norm": 2.32048717316541, - "learning_rate": 8.063540361001422e-07, - "loss": 1.0109, - "step": 5925 - }, - { - "epoch": 0.7125593699272531, - "grad_norm": 12.919226083163442, - "learning_rate": 8.057291008859069e-07, - "loss": 1.0228, - "step": 5926 - }, - { - "epoch": 0.7126796128178922, - "grad_norm": 1.7607620246603348, - "learning_rate": 8.051043468416187e-07, - "loss": 0.9054, - "step": 5927 - }, - { - "epoch": 0.7127998557085312, - "grad_norm": 2.0358237784441613, - "learning_rate": 8.044797740620506e-07, - "loss": 1.0546, - "step": 5928 - }, - { - "epoch": 0.7129200985991703, - "grad_norm": 2.0764371284429646, - "learning_rate": 8.038553826419494e-07, - "loss": 1.0131, - "step": 5929 - }, - { - "epoch": 0.7130403414898094, - "grad_norm": 2.3407479960538837, - "learning_rate": 8.032311726760364e-07, - "loss": 1.0323, - "step": 5930 - }, - { - "epoch": 0.7131605843804485, - "grad_norm": 1.7960290659058424, - "learning_rate": 8.026071442590022e-07, - "loss": 0.9219, - "step": 5931 - }, - { - "epoch": 0.7132808272710875, - "grad_norm": 2.0903035736463687, - "learning_rate": 8.019832974855134e-07, - "loss": 1.0471, - "step": 5932 - }, - { - "epoch": 0.7134010701617267, - "grad_norm": 2.2002445085073608, - "learning_rate": 8.013596324502052e-07, - "loss": 1.0522, - "step": 5933 - }, - { - "epoch": 0.7135213130523658, - "grad_norm": 1.7673375424368885, - "learning_rate": 8.007361492476872e-07, - "loss": 1.0154, - "step": 5934 - }, - { - "epoch": 0.7136415559430048, - "grad_norm": 2.245780272791817, - "learning_rate": 8.001128479725426e-07, - "loss": 1.0127, - "step": 5935 - }, - { - "epoch": 0.713761798833644, - "grad_norm": 1.4951053870574487, - "learning_rate": 7.994897287193248e-07, - "loss": 1.0327, - "step": 5936 - }, - { - "epoch": 0.713882041724283, - "grad_norm": 2.491807101405571, - "learning_rate": 7.988667915825605e-07, - "loss": 1.0713, - "step": 5937 - }, - { - "epoch": 0.7140022846149221, - "grad_norm": 2.2443079475195824, - "learning_rate": 7.982440366567491e-07, - "loss": 0.9822, - "step": 5938 - }, - { - "epoch": 0.7141225275055613, - "grad_norm": 1.6469269832970033, - "learning_rate": 7.97621464036361e-07, - "loss": 0.9829, - "step": 5939 - }, - { - "epoch": 0.7142427703962003, - "grad_norm": 1.9125590122069307, - "learning_rate": 7.969990738158417e-07, - "loss": 0.9108, - "step": 5940 - }, - { - "epoch": 0.7143630132868394, - "grad_norm": 2.8356314670017313, - "learning_rate": 7.963768660896062e-07, - "loss": 1.075, - "step": 5941 - }, - { - "epoch": 0.7144832561774785, - "grad_norm": 3.5712855909881953, - "learning_rate": 7.957548409520432e-07, - "loss": 1.0557, - "step": 5942 - }, - { - "epoch": 0.7146034990681176, - "grad_norm": 1.994909722907598, - "learning_rate": 7.951329984975135e-07, - "loss": 1.0787, - "step": 5943 - }, - { - "epoch": 0.7147237419587567, - "grad_norm": 0.7326239955487124, - "learning_rate": 7.94511338820349e-07, - "loss": 0.7943, - "step": 5944 - }, - { - "epoch": 0.7148439848493958, - "grad_norm": 3.5720447036064638, - "learning_rate": 7.938898620148575e-07, - "loss": 1.0128, - "step": 5945 - }, - { - "epoch": 0.7149642277400349, - "grad_norm": 1.9091949228132519, - "learning_rate": 7.932685681753135e-07, - "loss": 0.9376, - "step": 5946 - }, - { - "epoch": 0.7150844706306739, - "grad_norm": 2.0054137153638116, - "learning_rate": 7.92647457395969e-07, - "loss": 0.8545, - "step": 5947 - }, - { - "epoch": 0.7152047135213131, - "grad_norm": 2.0820341128102386, - "learning_rate": 7.920265297710444e-07, - "loss": 0.9721, - "step": 5948 - }, - { - "epoch": 0.7153249564119522, - "grad_norm": 4.364083321395684, - "learning_rate": 7.914057853947363e-07, - "loss": 0.9566, - "step": 5949 - }, - { - "epoch": 0.7154451993025912, - "grad_norm": 1.859790723471454, - "learning_rate": 7.907852243612089e-07, - "loss": 0.8651, - "step": 5950 - }, - { - "epoch": 0.7155654421932304, - "grad_norm": 2.958780514584326, - "learning_rate": 7.901648467646009e-07, - "loss": 0.9489, - "step": 5951 - }, - { - "epoch": 0.7156856850838694, - "grad_norm": 1.5172544549061693, - "learning_rate": 7.895446526990244e-07, - "loss": 0.9557, - "step": 5952 - }, - { - "epoch": 0.7158059279745085, - "grad_norm": 1.538737926048427, - "learning_rate": 7.889246422585609e-07, - "loss": 0.9812, - "step": 5953 - }, - { - "epoch": 0.7159261708651476, - "grad_norm": 1.7415168737338245, - "learning_rate": 7.883048155372675e-07, - "loss": 0.9674, - "step": 5954 - }, - { - "epoch": 0.7160464137557867, - "grad_norm": 2.474325330851929, - "learning_rate": 7.876851726291698e-07, - "loss": 0.9373, - "step": 5955 - }, - { - "epoch": 0.7161666566464258, - "grad_norm": 1.8329193630431364, - "learning_rate": 7.870657136282666e-07, - "loss": 1.0097, - "step": 5956 - }, - { - "epoch": 0.7162868995370649, - "grad_norm": 2.1321865183204474, - "learning_rate": 7.86446438628531e-07, - "loss": 1.0448, - "step": 5957 - }, - { - "epoch": 0.716407142427704, - "grad_norm": 0.7980222857884106, - "learning_rate": 7.858273477239059e-07, - "loss": 0.8301, - "step": 5958 - }, - { - "epoch": 0.716527385318343, - "grad_norm": 1.805777810755187, - "learning_rate": 7.852084410083067e-07, - "loss": 0.9415, - "step": 5959 - }, - { - "epoch": 0.7166476282089821, - "grad_norm": 2.6971690984493137, - "learning_rate": 7.84589718575621e-07, - "loss": 0.8673, - "step": 5960 - }, - { - "epoch": 0.7167678710996213, - "grad_norm": 1.9691080645756358, - "learning_rate": 7.83971180519708e-07, - "loss": 0.9168, - "step": 5961 - }, - { - "epoch": 0.7168881139902603, - "grad_norm": 3.0192640415808625, - "learning_rate": 7.833528269344008e-07, - "loss": 0.9754, - "step": 5962 - }, - { - "epoch": 0.7170083568808994, - "grad_norm": 2.20723400703743, - "learning_rate": 7.827346579135023e-07, - "loss": 1.0076, - "step": 5963 - }, - { - "epoch": 0.7171285997715385, - "grad_norm": 1.925278294251234, - "learning_rate": 7.821166735507885e-07, - "loss": 1.0588, - "step": 5964 - }, - { - "epoch": 0.7172488426621776, - "grad_norm": 2.4594284032430607, - "learning_rate": 7.81498873940007e-07, - "loss": 0.9149, - "step": 5965 - }, - { - "epoch": 0.7173690855528166, - "grad_norm": 2.186994003629553, - "learning_rate": 7.808812591748768e-07, - "loss": 0.9937, - "step": 5966 - }, - { - "epoch": 0.7174893284434558, - "grad_norm": 2.0807460231997377, - "learning_rate": 7.802638293490915e-07, - "loss": 0.879, - "step": 5967 - }, - { - "epoch": 0.7176095713340949, - "grad_norm": 1.7472282666639376, - "learning_rate": 7.796465845563123e-07, - "loss": 1.0056, - "step": 5968 - }, - { - "epoch": 0.7177298142247339, - "grad_norm": 2.1347044214408104, - "learning_rate": 7.790295248901766e-07, - "loss": 1.0331, - "step": 5969 - }, - { - "epoch": 0.7178500571153731, - "grad_norm": 1.660515959422059, - "learning_rate": 7.784126504442902e-07, - "loss": 0.8584, - "step": 5970 - }, - { - "epoch": 0.7179703000060121, - "grad_norm": 1.4375135172332794, - "learning_rate": 7.777959613122351e-07, - "loss": 0.9063, - "step": 5971 - }, - { - "epoch": 0.7180905428966512, - "grad_norm": 1.8678324076482111, - "learning_rate": 7.771794575875604e-07, - "loss": 1.0083, - "step": 5972 - }, - { - "epoch": 0.7182107857872904, - "grad_norm": 2.209412281202043, - "learning_rate": 7.765631393637888e-07, - "loss": 1.007, - "step": 5973 - }, - { - "epoch": 0.7183310286779294, - "grad_norm": 3.2865760810341484, - "learning_rate": 7.75947006734417e-07, - "loss": 0.7136, - "step": 5974 - }, - { - "epoch": 0.7184512715685685, - "grad_norm": 2.404528708169367, - "learning_rate": 7.753310597929101e-07, - "loss": 1.0501, - "step": 5975 - }, - { - "epoch": 0.7185715144592076, - "grad_norm": 0.8293010350400178, - "learning_rate": 7.747152986327095e-07, - "loss": 0.8126, - "step": 5976 - }, - { - "epoch": 0.7186917573498467, - "grad_norm": 4.010753957593211, - "learning_rate": 7.740997233472228e-07, - "loss": 0.9099, - "step": 5977 - }, - { - "epoch": 0.7188120002404857, - "grad_norm": 2.372941982635501, - "learning_rate": 7.734843340298329e-07, - "loss": 0.939, - "step": 5978 - }, - { - "epoch": 0.7189322431311249, - "grad_norm": 2.134381063293513, - "learning_rate": 7.72869130773895e-07, - "loss": 0.9873, - "step": 5979 - }, - { - "epoch": 0.719052486021764, - "grad_norm": 0.8039019926531197, - "learning_rate": 7.722541136727343e-07, - "loss": 0.8426, - "step": 5980 - }, - { - "epoch": 0.719172728912403, - "grad_norm": 2.2611797000043294, - "learning_rate": 7.716392828196483e-07, - "loss": 1.039, - "step": 5981 - }, - { - "epoch": 0.7192929718030422, - "grad_norm": 2.0860661956290807, - "learning_rate": 7.710246383079064e-07, - "loss": 0.9922, - "step": 5982 - }, - { - "epoch": 0.7194132146936812, - "grad_norm": 4.853300658594606, - "learning_rate": 7.704101802307492e-07, - "loss": 1.1496, - "step": 5983 - }, - { - "epoch": 0.7195334575843203, - "grad_norm": 1.8916604855586272, - "learning_rate": 7.697959086813912e-07, - "loss": 1.1068, - "step": 5984 - }, - { - "epoch": 0.7196537004749595, - "grad_norm": 2.233538491044872, - "learning_rate": 7.691818237530145e-07, - "loss": 1.0328, - "step": 5985 - }, - { - "epoch": 0.7197739433655985, - "grad_norm": 1.728304092481134, - "learning_rate": 7.685679255387774e-07, - "loss": 1.0088, - "step": 5986 - }, - { - "epoch": 0.7198941862562376, - "grad_norm": 1.8334076678493876, - "learning_rate": 7.679542141318065e-07, - "loss": 0.9929, - "step": 5987 - }, - { - "epoch": 0.7200144291468767, - "grad_norm": 4.6886383708865, - "learning_rate": 7.673406896252013e-07, - "loss": 0.9934, - "step": 5988 - }, - { - "epoch": 0.7201346720375158, - "grad_norm": 1.9348891109659538, - "learning_rate": 7.667273521120347e-07, - "loss": 1.0126, - "step": 5989 - }, - { - "epoch": 0.7202549149281549, - "grad_norm": 1.7570880668684326, - "learning_rate": 7.661142016853468e-07, - "loss": 1.0291, - "step": 5990 - }, - { - "epoch": 0.7203751578187939, - "grad_norm": 1.645485354307055, - "learning_rate": 7.655012384381543e-07, - "loss": 0.9789, - "step": 5991 - }, - { - "epoch": 0.7204954007094331, - "grad_norm": 1.7671879964839599, - "learning_rate": 7.648884624634415e-07, - "loss": 1.0428, - "step": 5992 - }, - { - "epoch": 0.7206156436000721, - "grad_norm": 1.8596647661883954, - "learning_rate": 7.642758738541683e-07, - "loss": 1.1124, - "step": 5993 - }, - { - "epoch": 0.7207358864907112, - "grad_norm": 0.7798379726115653, - "learning_rate": 7.636634727032621e-07, - "loss": 0.8507, - "step": 5994 - }, - { - "epoch": 0.7208561293813504, - "grad_norm": 2.107560797523713, - "learning_rate": 7.630512591036231e-07, - "loss": 1.0291, - "step": 5995 - }, - { - "epoch": 0.7209763722719894, - "grad_norm": 2.752103347955812, - "learning_rate": 7.624392331481255e-07, - "loss": 0.8746, - "step": 5996 - }, - { - "epoch": 0.7210966151626285, - "grad_norm": 0.7549276597690503, - "learning_rate": 7.618273949296115e-07, - "loss": 0.7741, - "step": 5997 - }, - { - "epoch": 0.7212168580532676, - "grad_norm": 4.858999158056437, - "learning_rate": 7.612157445408987e-07, - "loss": 0.9123, - "step": 5998 - }, - { - "epoch": 0.7213371009439067, - "grad_norm": 2.334937758831264, - "learning_rate": 7.606042820747716e-07, - "loss": 0.9718, - "step": 5999 - }, - { - "epoch": 0.7214573438345457, - "grad_norm": 1.6421963387495544, - "learning_rate": 7.599930076239889e-07, - "loss": 1.0817, - "step": 6000 - }, - { - "epoch": 0.7215775867251849, - "grad_norm": 2.0324015947569527, - "learning_rate": 7.593819212812818e-07, - "loss": 0.9389, - "step": 6001 - }, - { - "epoch": 0.721697829615824, - "grad_norm": 2.424850211482396, - "learning_rate": 7.587710231393508e-07, - "loss": 0.9565, - "step": 6002 - }, - { - "epoch": 0.721818072506463, - "grad_norm": 2.1498093542694874, - "learning_rate": 7.581603132908685e-07, - "loss": 1.0619, - "step": 6003 - }, - { - "epoch": 0.7219383153971022, - "grad_norm": 1.7185485028150094, - "learning_rate": 7.575497918284795e-07, - "loss": 1.0079, - "step": 6004 - }, - { - "epoch": 0.7220585582877412, - "grad_norm": 2.056117963175073, - "learning_rate": 7.569394588447984e-07, - "loss": 0.9778, - "step": 6005 - }, - { - "epoch": 0.7221788011783803, - "grad_norm": 2.65863145772921, - "learning_rate": 7.563293144324146e-07, - "loss": 1.0061, - "step": 6006 - }, - { - "epoch": 0.7222990440690195, - "grad_norm": 2.2802148848210497, - "learning_rate": 7.557193586838834e-07, - "loss": 1.0317, - "step": 6007 - }, - { - "epoch": 0.7224192869596585, - "grad_norm": 2.0302788393536844, - "learning_rate": 7.551095916917371e-07, - "loss": 0.9364, - "step": 6008 - }, - { - "epoch": 0.7225395298502976, - "grad_norm": 2.7964347620747456, - "learning_rate": 7.545000135484758e-07, - "loss": 0.8868, - "step": 6009 - }, - { - "epoch": 0.7226597727409367, - "grad_norm": 2.3005601035086993, - "learning_rate": 7.538906243465714e-07, - "loss": 0.8583, - "step": 6010 - }, - { - "epoch": 0.7227800156315758, - "grad_norm": 2.122058852577777, - "learning_rate": 7.5328142417847e-07, - "loss": 1.01, - "step": 6011 - }, - { - "epoch": 0.7229002585222148, - "grad_norm": 1.920498859249254, - "learning_rate": 7.526724131365838e-07, - "loss": 0.9237, - "step": 6012 - }, - { - "epoch": 0.723020501412854, - "grad_norm": 1.7427956926514545, - "learning_rate": 7.520635913133017e-07, - "loss": 0.9312, - "step": 6013 - }, - { - "epoch": 0.7231407443034931, - "grad_norm": 1.9073824411066191, - "learning_rate": 7.514549588009798e-07, - "loss": 1.049, - "step": 6014 - }, - { - "epoch": 0.7232609871941321, - "grad_norm": 1.9005689547102143, - "learning_rate": 7.508465156919492e-07, - "loss": 0.94, - "step": 6015 - }, - { - "epoch": 0.7233812300847713, - "grad_norm": 2.9497123254716806, - "learning_rate": 7.502382620785083e-07, - "loss": 0.8526, - "step": 6016 - }, - { - "epoch": 0.7235014729754103, - "grad_norm": 0.9048997751873714, - "learning_rate": 7.496301980529289e-07, - "loss": 0.9242, - "step": 6017 - }, - { - "epoch": 0.7236217158660494, - "grad_norm": 1.942729905514096, - "learning_rate": 7.490223237074547e-07, - "loss": 0.9771, - "step": 6018 - }, - { - "epoch": 0.7237419587566886, - "grad_norm": 2.5951204471161415, - "learning_rate": 7.484146391342989e-07, - "loss": 0.8835, - "step": 6019 - }, - { - "epoch": 0.7238622016473276, - "grad_norm": 4.402920161424231, - "learning_rate": 7.478071444256484e-07, - "loss": 0.8084, - "step": 6020 - }, - { - "epoch": 0.7239824445379667, - "grad_norm": 2.082648151893392, - "learning_rate": 7.471998396736579e-07, - "loss": 1.0122, - "step": 6021 - }, - { - "epoch": 0.7241026874286057, - "grad_norm": 1.8558765485254398, - "learning_rate": 7.465927249704549e-07, - "loss": 0.987, - "step": 6022 - }, - { - "epoch": 0.7242229303192449, - "grad_norm": 1.9679603809695139, - "learning_rate": 7.459858004081398e-07, - "loss": 0.9988, - "step": 6023 - }, - { - "epoch": 0.724343173209884, - "grad_norm": 0.9152236475460334, - "learning_rate": 7.453790660787815e-07, - "loss": 0.8318, - "step": 6024 - }, - { - "epoch": 0.724463416100523, - "grad_norm": 2.944269536979296, - "learning_rate": 7.447725220744214e-07, - "loss": 0.8701, - "step": 6025 - }, - { - "epoch": 0.7245836589911622, - "grad_norm": 2.118629893271665, - "learning_rate": 7.441661684870717e-07, - "loss": 0.9936, - "step": 6026 - }, - { - "epoch": 0.7247039018818012, - "grad_norm": 2.087528950381973, - "learning_rate": 7.435600054087152e-07, - "loss": 1.0441, - "step": 6027 - }, - { - "epoch": 0.7248241447724403, - "grad_norm": 16.305507286530332, - "learning_rate": 7.42954032931308e-07, - "loss": 0.9759, - "step": 6028 - }, - { - "epoch": 0.7249443876630794, - "grad_norm": 1.9212051092498847, - "learning_rate": 7.423482511467733e-07, - "loss": 0.9712, - "step": 6029 - }, - { - "epoch": 0.7250646305537185, - "grad_norm": 2.209263791491801, - "learning_rate": 7.417426601470099e-07, - "loss": 0.8856, - "step": 6030 - }, - { - "epoch": 0.7251848734443576, - "grad_norm": 2.1442909870462277, - "learning_rate": 7.411372600238841e-07, - "loss": 1.0094, - "step": 6031 - }, - { - "epoch": 0.7253051163349967, - "grad_norm": 2.611668475389209, - "learning_rate": 7.405320508692346e-07, - "loss": 0.9669, - "step": 6032 - }, - { - "epoch": 0.7254253592256358, - "grad_norm": 2.037855704682721, - "learning_rate": 7.399270327748727e-07, - "loss": 0.9812, - "step": 6033 - }, - { - "epoch": 0.7255456021162748, - "grad_norm": 1.7689801635880744, - "learning_rate": 7.39322205832577e-07, - "loss": 0.9743, - "step": 6034 - }, - { - "epoch": 0.725665845006914, - "grad_norm": 3.9174094747029797, - "learning_rate": 7.387175701341009e-07, - "loss": 1.032, - "step": 6035 - }, - { - "epoch": 0.7257860878975531, - "grad_norm": 2.4759980048755503, - "learning_rate": 7.381131257711659e-07, - "loss": 0.9634, - "step": 6036 - }, - { - "epoch": 0.7259063307881921, - "grad_norm": 1.9916933531416448, - "learning_rate": 7.375088728354677e-07, - "loss": 1.0647, - "step": 6037 - }, - { - "epoch": 0.7260265736788313, - "grad_norm": 1.6266315779002225, - "learning_rate": 7.369048114186691e-07, - "loss": 0.9004, - "step": 6038 - }, - { - "epoch": 0.7261468165694703, - "grad_norm": 1.9872258051182694, - "learning_rate": 7.363009416124055e-07, - "loss": 1.0614, - "step": 6039 - }, - { - "epoch": 0.7262670594601094, - "grad_norm": 2.3368992691432626, - "learning_rate": 7.356972635082852e-07, - "loss": 0.8672, - "step": 6040 - }, - { - "epoch": 0.7263873023507486, - "grad_norm": 1.55497601003019, - "learning_rate": 7.35093777197884e-07, - "loss": 0.9744, - "step": 6041 - }, - { - "epoch": 0.7265075452413876, - "grad_norm": 2.8413736664039435, - "learning_rate": 7.344904827727525e-07, - "loss": 1.0783, - "step": 6042 - }, - { - "epoch": 0.7266277881320267, - "grad_norm": 2.6920118384193072, - "learning_rate": 7.338873803244076e-07, - "loss": 0.9604, - "step": 6043 - }, - { - "epoch": 0.7267480310226658, - "grad_norm": 1.6724145321754533, - "learning_rate": 7.332844699443401e-07, - "loss": 1.0295, - "step": 6044 - }, - { - "epoch": 0.7268682739133049, - "grad_norm": 2.9443707566867374, - "learning_rate": 7.326817517240121e-07, - "loss": 0.9759, - "step": 6045 - }, - { - "epoch": 0.7269885168039439, - "grad_norm": 1.861152120236453, - "learning_rate": 7.320792257548545e-07, - "loss": 1.066, - "step": 6046 - }, - { - "epoch": 0.7271087596945831, - "grad_norm": 2.099442671434409, - "learning_rate": 7.314768921282704e-07, - "loss": 0.9953, - "step": 6047 - }, - { - "epoch": 0.7272290025852222, - "grad_norm": 2.643153227418354, - "learning_rate": 7.30874750935633e-07, - "loss": 0.9475, - "step": 6048 - }, - { - "epoch": 0.7273492454758612, - "grad_norm": 1.9162388476142418, - "learning_rate": 7.30272802268286e-07, - "loss": 1.0182, - "step": 6049 - }, - { - "epoch": 0.7274694883665004, - "grad_norm": 1.7482324256662787, - "learning_rate": 7.29671046217547e-07, - "loss": 0.9914, - "step": 6050 - }, - { - "epoch": 0.7275897312571394, - "grad_norm": 1.6313657054570585, - "learning_rate": 7.290694828746988e-07, - "loss": 1.0476, - "step": 6051 - }, - { - "epoch": 0.7277099741477785, - "grad_norm": 1.792853723630212, - "learning_rate": 7.284681123310004e-07, - "loss": 1.0905, - "step": 6052 - }, - { - "epoch": 0.7278302170384175, - "grad_norm": 1.7015072900311579, - "learning_rate": 7.27866934677678e-07, - "loss": 1.0223, - "step": 6053 - }, - { - "epoch": 0.7279504599290567, - "grad_norm": 1.8553202680104206, - "learning_rate": 7.272659500059297e-07, - "loss": 1.0094, - "step": 6054 - }, - { - "epoch": 0.7280707028196958, - "grad_norm": 2.0834190223873494, - "learning_rate": 7.266651584069264e-07, - "loss": 1.0376, - "step": 6055 - }, - { - "epoch": 0.7281909457103348, - "grad_norm": 1.627058485999816, - "learning_rate": 7.260645599718045e-07, - "loss": 0.8071, - "step": 6056 - }, - { - "epoch": 0.728311188600974, - "grad_norm": 3.4200519600205963, - "learning_rate": 7.254641547916767e-07, - "loss": 0.9012, - "step": 6057 - }, - { - "epoch": 0.728431431491613, - "grad_norm": 5.5370232577077445, - "learning_rate": 7.248639429576226e-07, - "loss": 0.9199, - "step": 6058 - }, - { - "epoch": 0.7285516743822521, - "grad_norm": 1.8257228360443858, - "learning_rate": 7.242639245606959e-07, - "loss": 0.9554, - "step": 6059 - }, - { - "epoch": 0.7286719172728913, - "grad_norm": 1.5850351707575614, - "learning_rate": 7.236640996919168e-07, - "loss": 1.0583, - "step": 6060 - }, - { - "epoch": 0.7287921601635303, - "grad_norm": 1.556997758397012, - "learning_rate": 7.230644684422782e-07, - "loss": 0.9332, - "step": 6061 - }, - { - "epoch": 0.7289124030541694, - "grad_norm": 6.924656431854128, - "learning_rate": 7.224650309027451e-07, - "loss": 1.0466, - "step": 6062 - }, - { - "epoch": 0.7290326459448085, - "grad_norm": 3.5249501975579274, - "learning_rate": 7.218657871642506e-07, - "loss": 0.9114, - "step": 6063 - }, - { - "epoch": 0.7291528888354476, - "grad_norm": 2.5860147608714117, - "learning_rate": 7.212667373177012e-07, - "loss": 0.8442, - "step": 6064 - }, - { - "epoch": 0.7292731317260867, - "grad_norm": 1.8247323793646781, - "learning_rate": 7.206678814539704e-07, - "loss": 0.9866, - "step": 6065 - }, - { - "epoch": 0.7293933746167258, - "grad_norm": 1.592220095674228, - "learning_rate": 7.20069219663904e-07, - "loss": 0.9572, - "step": 6066 - }, - { - "epoch": 0.7295136175073649, - "grad_norm": 1.903114053228055, - "learning_rate": 7.1947075203832e-07, - "loss": 1.0236, - "step": 6067 - }, - { - "epoch": 0.7296338603980039, - "grad_norm": 0.8729799317503588, - "learning_rate": 7.188724786680049e-07, - "loss": 0.8455, - "step": 6068 - }, - { - "epoch": 0.7297541032886431, - "grad_norm": 1.965870886434958, - "learning_rate": 7.182743996437162e-07, - "loss": 0.9778, - "step": 6069 - }, - { - "epoch": 0.7298743461792822, - "grad_norm": 1.910888781047373, - "learning_rate": 7.176765150561819e-07, - "loss": 0.9145, - "step": 6070 - }, - { - "epoch": 0.7299945890699212, - "grad_norm": 2.2011106765214308, - "learning_rate": 7.170788249961002e-07, - "loss": 1.0249, - "step": 6071 - }, - { - "epoch": 0.7301148319605604, - "grad_norm": 1.8869568100005938, - "learning_rate": 7.164813295541418e-07, - "loss": 1.1123, - "step": 6072 - }, - { - "epoch": 0.7302350748511994, - "grad_norm": 1.7166520926711792, - "learning_rate": 7.15884028820944e-07, - "loss": 0.9261, - "step": 6073 - }, - { - "epoch": 0.7303553177418385, - "grad_norm": 2.287642731929839, - "learning_rate": 7.152869228871185e-07, - "loss": 0.8326, - "step": 6074 - }, - { - "epoch": 0.7304755606324776, - "grad_norm": 1.7442731475890663, - "learning_rate": 7.146900118432457e-07, - "loss": 0.9543, - "step": 6075 - }, - { - "epoch": 0.7305958035231167, - "grad_norm": 1.7967218084961998, - "learning_rate": 7.140932957798753e-07, - "loss": 1.083, - "step": 6076 - }, - { - "epoch": 0.7307160464137558, - "grad_norm": 1.8482856565586274, - "learning_rate": 7.134967747875309e-07, - "loss": 0.9434, - "step": 6077 - }, - { - "epoch": 0.7308362893043949, - "grad_norm": 1.960775482658059, - "learning_rate": 7.129004489567014e-07, - "loss": 1.0458, - "step": 6078 - }, - { - "epoch": 0.730956532195034, - "grad_norm": 2.129125293347131, - "learning_rate": 7.123043183778512e-07, - "loss": 1.0099, - "step": 6079 - }, - { - "epoch": 0.731076775085673, - "grad_norm": 1.475033568953946, - "learning_rate": 7.117083831414114e-07, - "loss": 0.8775, - "step": 6080 - }, - { - "epoch": 0.7311970179763122, - "grad_norm": 1.764354822825164, - "learning_rate": 7.11112643337787e-07, - "loss": 0.935, - "step": 6081 - }, - { - "epoch": 0.7313172608669513, - "grad_norm": 2.440625265710013, - "learning_rate": 7.10517099057349e-07, - "loss": 0.9973, - "step": 6082 - }, - { - "epoch": 0.7314375037575903, - "grad_norm": 3.097581898589485, - "learning_rate": 7.099217503904411e-07, - "loss": 0.8488, - "step": 6083 - }, - { - "epoch": 0.7315577466482295, - "grad_norm": 3.198260973227271, - "learning_rate": 7.093265974273788e-07, - "loss": 1.1332, - "step": 6084 - }, - { - "epoch": 0.7316779895388685, - "grad_norm": 1.8004910117245188, - "learning_rate": 7.087316402584447e-07, - "loss": 0.9528, - "step": 6085 - }, - { - "epoch": 0.7317982324295076, - "grad_norm": 1.9583682063900334, - "learning_rate": 7.081368789738953e-07, - "loss": 1.0844, - "step": 6086 - }, - { - "epoch": 0.7319184753201466, - "grad_norm": 3.151222833306748, - "learning_rate": 7.075423136639537e-07, - "loss": 1.0007, - "step": 6087 - }, - { - "epoch": 0.7320387182107858, - "grad_norm": 2.349557725397349, - "learning_rate": 7.069479444188149e-07, - "loss": 0.9744, - "step": 6088 - }, - { - "epoch": 0.7321589611014249, - "grad_norm": 2.033847849531209, - "learning_rate": 7.063537713286453e-07, - "loss": 1.0579, - "step": 6089 - }, - { - "epoch": 0.7322792039920639, - "grad_norm": 1.6831379780231819, - "learning_rate": 7.057597944835803e-07, - "loss": 1.0372, - "step": 6090 - }, - { - "epoch": 0.7323994468827031, - "grad_norm": 1.6695432713405605, - "learning_rate": 7.051660139737253e-07, - "loss": 0.9729, - "step": 6091 - }, - { - "epoch": 0.7325196897733421, - "grad_norm": 2.8470461679834758, - "learning_rate": 7.045724298891565e-07, - "loss": 0.9863, - "step": 6092 - }, - { - "epoch": 0.7326399326639812, - "grad_norm": 1.8169565987282106, - "learning_rate": 7.039790423199192e-07, - "loss": 0.9232, - "step": 6093 - }, - { - "epoch": 0.7327601755546204, - "grad_norm": 2.062108551794091, - "learning_rate": 7.033858513560322e-07, - "loss": 1.0098, - "step": 6094 - }, - { - "epoch": 0.7328804184452594, - "grad_norm": 2.3668140078961915, - "learning_rate": 7.027928570874794e-07, - "loss": 1.0029, - "step": 6095 - }, - { - "epoch": 0.7330006613358985, - "grad_norm": 1.8014803044674215, - "learning_rate": 7.022000596042194e-07, - "loss": 1.0767, - "step": 6096 - }, - { - "epoch": 0.7331209042265376, - "grad_norm": 2.544609606915219, - "learning_rate": 7.016074589961784e-07, - "loss": 1.052, - "step": 6097 - }, - { - "epoch": 0.7332411471171767, - "grad_norm": 2.943103234904639, - "learning_rate": 7.01015055353253e-07, - "loss": 0.895, - "step": 6098 - }, - { - "epoch": 0.7333613900078157, - "grad_norm": 2.5409855998418243, - "learning_rate": 7.004228487653123e-07, - "loss": 1.0016, - "step": 6099 - }, - { - "epoch": 0.7334816328984549, - "grad_norm": 2.3395477260508026, - "learning_rate": 6.998308393221906e-07, - "loss": 1.012, - "step": 6100 - }, - { - "epoch": 0.733601875789094, - "grad_norm": 2.0707240433383847, - "learning_rate": 6.992390271136977e-07, - "loss": 0.9451, - "step": 6101 - }, - { - "epoch": 0.733722118679733, - "grad_norm": 1.7645390639743928, - "learning_rate": 6.986474122296094e-07, - "loss": 1.0907, - "step": 6102 - }, - { - "epoch": 0.7338423615703722, - "grad_norm": 2.0483035860728735, - "learning_rate": 6.980559947596751e-07, - "loss": 0.9579, - "step": 6103 - }, - { - "epoch": 0.7339626044610112, - "grad_norm": 2.4535858304590037, - "learning_rate": 6.974647747936109e-07, - "loss": 0.9895, - "step": 6104 - }, - { - "epoch": 0.7340828473516503, - "grad_norm": 1.9881806413718623, - "learning_rate": 6.968737524211039e-07, - "loss": 1.0503, - "step": 6105 - }, - { - "epoch": 0.7342030902422895, - "grad_norm": 2.2676460459654444, - "learning_rate": 6.962829277318132e-07, - "loss": 1.0325, - "step": 6106 - }, - { - "epoch": 0.7343233331329285, - "grad_norm": 2.181819187872106, - "learning_rate": 6.956923008153652e-07, - "loss": 1.0612, - "step": 6107 - }, - { - "epoch": 0.7344435760235676, - "grad_norm": 2.1750880563383928, - "learning_rate": 6.951018717613593e-07, - "loss": 1.0713, - "step": 6108 - }, - { - "epoch": 0.7345638189142067, - "grad_norm": 2.01023925894515, - "learning_rate": 6.945116406593614e-07, - "loss": 1.0038, - "step": 6109 - }, - { - "epoch": 0.7346840618048458, - "grad_norm": 2.75705715133767, - "learning_rate": 6.939216075989089e-07, - "loss": 0.9688, - "step": 6110 - }, - { - "epoch": 0.7348043046954849, - "grad_norm": 2.518874350381828, - "learning_rate": 6.933317726695109e-07, - "loss": 0.8912, - "step": 6111 - }, - { - "epoch": 0.734924547586124, - "grad_norm": 2.4434418271772036, - "learning_rate": 6.92742135960644e-07, - "loss": 1.0214, - "step": 6112 - }, - { - "epoch": 0.7350447904767631, - "grad_norm": 0.8724962926310388, - "learning_rate": 6.921526975617556e-07, - "loss": 0.8293, - "step": 6113 - }, - { - "epoch": 0.7351650333674021, - "grad_norm": 1.7512848769315252, - "learning_rate": 6.915634575622631e-07, - "loss": 0.9706, - "step": 6114 - }, - { - "epoch": 0.7352852762580413, - "grad_norm": 1.8584724607975025, - "learning_rate": 6.909744160515532e-07, - "loss": 0.9452, - "step": 6115 - }, - { - "epoch": 0.7354055191486804, - "grad_norm": 3.3337407975700963, - "learning_rate": 6.903855731189849e-07, - "loss": 0.9192, - "step": 6116 - }, - { - "epoch": 0.7355257620393194, - "grad_norm": 2.3071415653587883, - "learning_rate": 6.897969288538825e-07, - "loss": 1.0522, - "step": 6117 - }, - { - "epoch": 0.7356460049299585, - "grad_norm": 1.8752537237795834, - "learning_rate": 6.892084833455452e-07, - "loss": 1.0443, - "step": 6118 - }, - { - "epoch": 0.7357662478205976, - "grad_norm": 1.8762741845673376, - "learning_rate": 6.886202366832384e-07, - "loss": 1.071, - "step": 6119 - }, - { - "epoch": 0.7358864907112367, - "grad_norm": 1.784464762713073, - "learning_rate": 6.880321889561987e-07, - "loss": 0.9684, - "step": 6120 - }, - { - "epoch": 0.7360067336018757, - "grad_norm": 2.796935864004743, - "learning_rate": 6.874443402536338e-07, - "loss": 0.892, - "step": 6121 - }, - { - "epoch": 0.7361269764925149, - "grad_norm": 1.8544600465388799, - "learning_rate": 6.868566906647177e-07, - "loss": 1.0321, - "step": 6122 - }, - { - "epoch": 0.736247219383154, - "grad_norm": 2.4111700908734752, - "learning_rate": 6.862692402785984e-07, - "loss": 1.0566, - "step": 6123 - }, - { - "epoch": 0.736367462273793, - "grad_norm": 0.6963694821606284, - "learning_rate": 6.856819891843899e-07, - "loss": 0.7468, - "step": 6124 - }, - { - "epoch": 0.7364877051644322, - "grad_norm": 3.0767605463530514, - "learning_rate": 6.8509493747118e-07, - "loss": 0.9534, - "step": 6125 - }, - { - "epoch": 0.7366079480550712, - "grad_norm": 2.4478220387018124, - "learning_rate": 6.845080852280221e-07, - "loss": 1.1127, - "step": 6126 - }, - { - "epoch": 0.7367281909457103, - "grad_norm": 10.914976110427862, - "learning_rate": 6.839214325439409e-07, - "loss": 0.9771, - "step": 6127 - }, - { - "epoch": 0.7368484338363495, - "grad_norm": 1.5344405896372204, - "learning_rate": 6.833349795079327e-07, - "loss": 0.947, - "step": 6128 - }, - { - "epoch": 0.7369686767269885, - "grad_norm": 1.7189013327819604, - "learning_rate": 6.827487262089613e-07, - "loss": 0.912, - "step": 6129 - }, - { - "epoch": 0.7370889196176276, - "grad_norm": 0.8763091647734436, - "learning_rate": 6.821626727359606e-07, - "loss": 0.8313, - "step": 6130 - }, - { - "epoch": 0.7372091625082667, - "grad_norm": 2.8402433468432084, - "learning_rate": 6.815768191778348e-07, - "loss": 1.0045, - "step": 6131 - }, - { - "epoch": 0.7373294053989058, - "grad_norm": 2.1561593299267736, - "learning_rate": 6.809911656234569e-07, - "loss": 0.9625, - "step": 6132 - }, - { - "epoch": 0.7374496482895448, - "grad_norm": 1.873846971116075, - "learning_rate": 6.804057121616707e-07, - "loss": 1.0109, - "step": 6133 - }, - { - "epoch": 0.737569891180184, - "grad_norm": 2.6514964934514165, - "learning_rate": 6.798204588812888e-07, - "loss": 0.9529, - "step": 6134 - }, - { - "epoch": 0.7376901340708231, - "grad_norm": 1.7827539619912114, - "learning_rate": 6.792354058710937e-07, - "loss": 0.9774, - "step": 6135 - }, - { - "epoch": 0.7378103769614621, - "grad_norm": 2.04172888161462, - "learning_rate": 6.786505532198374e-07, - "loss": 0.8827, - "step": 6136 - }, - { - "epoch": 0.7379306198521013, - "grad_norm": 1.7452602231794974, - "learning_rate": 6.780659010162411e-07, - "loss": 1.0791, - "step": 6137 - }, - { - "epoch": 0.7380508627427403, - "grad_norm": 1.6218215227957695, - "learning_rate": 6.774814493489975e-07, - "loss": 1.0579, - "step": 6138 - }, - { - "epoch": 0.7381711056333794, - "grad_norm": 1.912376564380802, - "learning_rate": 6.768971983067655e-07, - "loss": 0.891, - "step": 6139 - }, - { - "epoch": 0.7382913485240186, - "grad_norm": 1.2380215345398626, - "learning_rate": 6.763131479781772e-07, - "loss": 0.9365, - "step": 6140 - }, - { - "epoch": 0.7384115914146576, - "grad_norm": 1.973103025094721, - "learning_rate": 6.757292984518316e-07, - "loss": 1.0023, - "step": 6141 - }, - { - "epoch": 0.7385318343052967, - "grad_norm": 0.8503145645604415, - "learning_rate": 6.751456498162981e-07, - "loss": 0.8455, - "step": 6142 - }, - { - "epoch": 0.7386520771959358, - "grad_norm": 2.8146083135129287, - "learning_rate": 6.745622021601174e-07, - "loss": 1.0851, - "step": 6143 - }, - { - "epoch": 0.7387723200865749, - "grad_norm": 3.2155651149975277, - "learning_rate": 6.739789555717954e-07, - "loss": 0.9333, - "step": 6144 - }, - { - "epoch": 0.738892562977214, - "grad_norm": 2.2107168566056954, - "learning_rate": 6.733959101398124e-07, - "loss": 1.0106, - "step": 6145 - }, - { - "epoch": 0.7390128058678531, - "grad_norm": 2.242812651035856, - "learning_rate": 6.728130659526143e-07, - "loss": 1.0449, - "step": 6146 - }, - { - "epoch": 0.7391330487584922, - "grad_norm": 2.404411505314689, - "learning_rate": 6.7223042309862e-07, - "loss": 0.9379, - "step": 6147 - }, - { - "epoch": 0.7392532916491312, - "grad_norm": 2.841152472041014, - "learning_rate": 6.716479816662144e-07, - "loss": 0.9623, - "step": 6148 - }, - { - "epoch": 0.7393735345397703, - "grad_norm": 2.1981482602239963, - "learning_rate": 6.710657417437531e-07, - "loss": 0.9603, - "step": 6149 - }, - { - "epoch": 0.7394937774304094, - "grad_norm": 2.8067347458339698, - "learning_rate": 6.704837034195628e-07, - "loss": 1.0201, - "step": 6150 - }, - { - "epoch": 0.7396140203210485, - "grad_norm": 2.0442567342932065, - "learning_rate": 6.699018667819376e-07, - "loss": 1.0814, - "step": 6151 - }, - { - "epoch": 0.7397342632116876, - "grad_norm": 1.7629222607537003, - "learning_rate": 6.693202319191415e-07, - "loss": 0.9521, - "step": 6152 - }, - { - "epoch": 0.7398545061023267, - "grad_norm": 1.8232367606917428, - "learning_rate": 6.687387989194084e-07, - "loss": 0.9674, - "step": 6153 - }, - { - "epoch": 0.7399747489929658, - "grad_norm": 2.1833637914886945, - "learning_rate": 6.681575678709404e-07, - "loss": 1.0262, - "step": 6154 - }, - { - "epoch": 0.7400949918836048, - "grad_norm": 2.0363366626448562, - "learning_rate": 6.67576538861911e-07, - "loss": 0.9333, - "step": 6155 - }, - { - "epoch": 0.740215234774244, - "grad_norm": 1.585929908170522, - "learning_rate": 6.669957119804612e-07, - "loss": 1.0545, - "step": 6156 - }, - { - "epoch": 0.7403354776648831, - "grad_norm": 3.0865153483904155, - "learning_rate": 6.66415087314702e-07, - "loss": 0.9504, - "step": 6157 - }, - { - "epoch": 0.7404557205555221, - "grad_norm": 2.0502647694126788, - "learning_rate": 6.65834664952714e-07, - "loss": 0.9542, - "step": 6158 - }, - { - "epoch": 0.7405759634461613, - "grad_norm": 1.606079208259205, - "learning_rate": 6.652544449825457e-07, - "loss": 0.9841, - "step": 6159 - }, - { - "epoch": 0.7406962063368003, - "grad_norm": 1.9264681134766886, - "learning_rate": 6.646744274922182e-07, - "loss": 0.991, - "step": 6160 - }, - { - "epoch": 0.7408164492274394, - "grad_norm": 3.5351403336140317, - "learning_rate": 6.640946125697171e-07, - "loss": 0.9865, - "step": 6161 - }, - { - "epoch": 0.7409366921180786, - "grad_norm": 2.3427944015449063, - "learning_rate": 6.635150003030017e-07, - "loss": 0.9902, - "step": 6162 - }, - { - "epoch": 0.7410569350087176, - "grad_norm": 2.195881291347717, - "learning_rate": 6.629355907799981e-07, - "loss": 1.0892, - "step": 6163 - }, - { - "epoch": 0.7411771778993567, - "grad_norm": 1.6287474337577366, - "learning_rate": 6.623563840886015e-07, - "loss": 0.9305, - "step": 6164 - }, - { - "epoch": 0.7412974207899958, - "grad_norm": 1.6913163159007627, - "learning_rate": 6.617773803166795e-07, - "loss": 0.9288, - "step": 6165 - }, - { - "epoch": 0.7414176636806349, - "grad_norm": 2.0199558542167746, - "learning_rate": 6.611985795520634e-07, - "loss": 1.0487, - "step": 6166 - }, - { - "epoch": 0.7415379065712739, - "grad_norm": 1.9273141946061167, - "learning_rate": 6.606199818825588e-07, - "loss": 0.9925, - "step": 6167 - }, - { - "epoch": 0.7416581494619131, - "grad_norm": 1.7152563480715088, - "learning_rate": 6.600415873959377e-07, - "loss": 1.0388, - "step": 6168 - }, - { - "epoch": 0.7417783923525522, - "grad_norm": 2.1007192969275175, - "learning_rate": 6.594633961799437e-07, - "loss": 0.8754, - "step": 6169 - }, - { - "epoch": 0.7418986352431912, - "grad_norm": 1.6680791670904815, - "learning_rate": 6.588854083222857e-07, - "loss": 1.0511, - "step": 6170 - }, - { - "epoch": 0.7420188781338304, - "grad_norm": 3.0137905411463684, - "learning_rate": 6.583076239106444e-07, - "loss": 1.0346, - "step": 6171 - }, - { - "epoch": 0.7421391210244694, - "grad_norm": 2.159144352520686, - "learning_rate": 6.577300430326707e-07, - "loss": 0.9909, - "step": 6172 - }, - { - "epoch": 0.7422593639151085, - "grad_norm": 3.7408177172382397, - "learning_rate": 6.571526657759821e-07, - "loss": 0.9463, - "step": 6173 - }, - { - "epoch": 0.7423796068057477, - "grad_norm": 1.6607568709015856, - "learning_rate": 6.565754922281663e-07, - "loss": 0.9425, - "step": 6174 - }, - { - "epoch": 0.7424998496963867, - "grad_norm": 2.288923552270195, - "learning_rate": 6.559985224767801e-07, - "loss": 1.0143, - "step": 6175 - }, - { - "epoch": 0.7426200925870258, - "grad_norm": 14.068615206831504, - "learning_rate": 6.55421756609349e-07, - "loss": 0.9827, - "step": 6176 - }, - { - "epoch": 0.7427403354776649, - "grad_norm": 1.9027311020793423, - "learning_rate": 6.54845194713369e-07, - "loss": 1.0131, - "step": 6177 - }, - { - "epoch": 0.742860578368304, - "grad_norm": 2.955613475445555, - "learning_rate": 6.542688368763034e-07, - "loss": 1.0358, - "step": 6178 - }, - { - "epoch": 0.742980821258943, - "grad_norm": 3.286921078144438, - "learning_rate": 6.536926831855854e-07, - "loss": 1.0033, - "step": 6179 - }, - { - "epoch": 0.7431010641495821, - "grad_norm": 2.346538969687268, - "learning_rate": 6.531167337286165e-07, - "loss": 0.9578, - "step": 6180 - }, - { - "epoch": 0.7432213070402213, - "grad_norm": 1.7176670477420248, - "learning_rate": 6.52540988592768e-07, - "loss": 1.0232, - "step": 6181 - }, - { - "epoch": 0.7433415499308603, - "grad_norm": 3.130409746254212, - "learning_rate": 6.519654478653814e-07, - "loss": 1.0689, - "step": 6182 - }, - { - "epoch": 0.7434617928214994, - "grad_norm": 0.776460999393074, - "learning_rate": 6.51390111633763e-07, - "loss": 0.8108, - "step": 6183 - }, - { - "epoch": 0.7435820357121385, - "grad_norm": 1.8709674704989616, - "learning_rate": 6.508149799851932e-07, - "loss": 0.9913, - "step": 6184 - }, - { - "epoch": 0.7437022786027776, - "grad_norm": 1.9585308489382227, - "learning_rate": 6.502400530069183e-07, - "loss": 0.8427, - "step": 6185 - }, - { - "epoch": 0.7438225214934167, - "grad_norm": 2.354406924070546, - "learning_rate": 6.496653307861535e-07, - "loss": 0.9155, - "step": 6186 - }, - { - "epoch": 0.7439427643840558, - "grad_norm": 1.828740790572741, - "learning_rate": 6.490908134100857e-07, - "loss": 0.881, - "step": 6187 - }, - { - "epoch": 0.7440630072746949, - "grad_norm": 3.6735725537019155, - "learning_rate": 6.48516500965866e-07, - "loss": 0.9315, - "step": 6188 - }, - { - "epoch": 0.7441832501653339, - "grad_norm": 1.6649895176829614, - "learning_rate": 6.479423935406192e-07, - "loss": 1.0451, - "step": 6189 - }, - { - "epoch": 0.7443034930559731, - "grad_norm": 0.9145903082359325, - "learning_rate": 6.473684912214357e-07, - "loss": 0.9406, - "step": 6190 - }, - { - "epoch": 0.7444237359466122, - "grad_norm": 2.1677232757987013, - "learning_rate": 6.467947940953778e-07, - "loss": 0.9279, - "step": 6191 - }, - { - "epoch": 0.7445439788372512, - "grad_norm": 2.080572307786895, - "learning_rate": 6.462213022494732e-07, - "loss": 0.9578, - "step": 6192 - }, - { - "epoch": 0.7446642217278904, - "grad_norm": 0.8419681383705615, - "learning_rate": 6.456480157707201e-07, - "loss": 0.8859, - "step": 6193 - }, - { - "epoch": 0.7447844646185294, - "grad_norm": 3.9018165759249994, - "learning_rate": 6.450749347460866e-07, - "loss": 1.0825, - "step": 6194 - }, - { - "epoch": 0.7449047075091685, - "grad_norm": 2.2340687361809075, - "learning_rate": 6.445020592625083e-07, - "loss": 1.0193, - "step": 6195 - }, - { - "epoch": 0.7450249503998077, - "grad_norm": 2.2822933162774364, - "learning_rate": 6.4392938940689e-07, - "loss": 1.0381, - "step": 6196 - }, - { - "epoch": 0.7451451932904467, - "grad_norm": 1.95848166803541, - "learning_rate": 6.433569252661049e-07, - "loss": 0.9371, - "step": 6197 - }, - { - "epoch": 0.7452654361810858, - "grad_norm": 1.8708859768309272, - "learning_rate": 6.427846669269952e-07, - "loss": 0.9412, - "step": 6198 - }, - { - "epoch": 0.7453856790717249, - "grad_norm": 1.9538412993143772, - "learning_rate": 6.422126144763729e-07, - "loss": 1.0523, - "step": 6199 - }, - { - "epoch": 0.745505921962364, - "grad_norm": 2.0842726560800084, - "learning_rate": 6.416407680010174e-07, - "loss": 1.004, - "step": 6200 - }, - { - "epoch": 0.745626164853003, - "grad_norm": 1.8298201567449937, - "learning_rate": 6.410691275876774e-07, - "loss": 1.0415, - "step": 6201 - }, - { - "epoch": 0.7457464077436422, - "grad_norm": 3.1110982289123252, - "learning_rate": 6.404976933230704e-07, - "loss": 0.9904, - "step": 6202 - }, - { - "epoch": 0.7458666506342813, - "grad_norm": 2.3876339021052155, - "learning_rate": 6.399264652938813e-07, - "loss": 0.9585, - "step": 6203 - }, - { - "epoch": 0.7459868935249203, - "grad_norm": 1.8431324328624843, - "learning_rate": 6.393554435867679e-07, - "loss": 0.9729, - "step": 6204 - }, - { - "epoch": 0.7461071364155595, - "grad_norm": 3.2394145827586285, - "learning_rate": 6.387846282883502e-07, - "loss": 1.0621, - "step": 6205 - }, - { - "epoch": 0.7462273793061985, - "grad_norm": 2.37627898461947, - "learning_rate": 6.38214019485223e-07, - "loss": 1.0006, - "step": 6206 - }, - { - "epoch": 0.7463476221968376, - "grad_norm": 2.1867630943498537, - "learning_rate": 6.376436172639461e-07, - "loss": 0.9428, - "step": 6207 - }, - { - "epoch": 0.7464678650874768, - "grad_norm": 2.602343192969394, - "learning_rate": 6.370734217110487e-07, - "loss": 0.8747, - "step": 6208 - }, - { - "epoch": 0.7465881079781158, - "grad_norm": 1.5305995727520523, - "learning_rate": 6.36503432913031e-07, - "loss": 0.8719, - "step": 6209 - }, - { - "epoch": 0.7467083508687549, - "grad_norm": 1.6410792857141674, - "learning_rate": 6.359336509563569e-07, - "loss": 0.9111, - "step": 6210 - }, - { - "epoch": 0.7468285937593939, - "grad_norm": 2.008698383040809, - "learning_rate": 6.353640759274641e-07, - "loss": 1.0378, - "step": 6211 - }, - { - "epoch": 0.7469488366500331, - "grad_norm": 2.1226146998601747, - "learning_rate": 6.347947079127556e-07, - "loss": 0.9809, - "step": 6212 - }, - { - "epoch": 0.7470690795406721, - "grad_norm": 2.124210098658654, - "learning_rate": 6.342255469986053e-07, - "loss": 0.9942, - "step": 6213 - }, - { - "epoch": 0.7471893224313112, - "grad_norm": 1.7953407039701152, - "learning_rate": 6.336565932713533e-07, - "loss": 1.0043, - "step": 6214 - }, - { - "epoch": 0.7473095653219504, - "grad_norm": 1.9781905393907049, - "learning_rate": 6.330878468173088e-07, - "loss": 1.0092, - "step": 6215 - }, - { - "epoch": 0.7474298082125894, - "grad_norm": 2.0895483646021393, - "learning_rate": 6.32519307722752e-07, - "loss": 0.9592, - "step": 6216 - }, - { - "epoch": 0.7475500511032285, - "grad_norm": 0.7968634436274288, - "learning_rate": 6.31950976073929e-07, - "loss": 0.8114, - "step": 6217 - }, - { - "epoch": 0.7476702939938676, - "grad_norm": 2.1936280654102394, - "learning_rate": 6.31382851957055e-07, - "loss": 1.0277, - "step": 6218 - }, - { - "epoch": 0.7477905368845067, - "grad_norm": 3.2325272004177212, - "learning_rate": 6.308149354583143e-07, - "loss": 0.9377, - "step": 6219 - }, - { - "epoch": 0.7479107797751458, - "grad_norm": 1.7783569801400578, - "learning_rate": 6.302472266638586e-07, - "loss": 1.0436, - "step": 6220 - }, - { - "epoch": 0.7480310226657849, - "grad_norm": 1.7275044215100204, - "learning_rate": 6.296797256598101e-07, - "loss": 0.9344, - "step": 6221 - }, - { - "epoch": 0.748151265556424, - "grad_norm": 2.4502717937819307, - "learning_rate": 6.291124325322576e-07, - "loss": 1.0371, - "step": 6222 - }, - { - "epoch": 0.748271508447063, - "grad_norm": 1.4752005295299482, - "learning_rate": 6.285453473672595e-07, - "loss": 0.8552, - "step": 6223 - }, - { - "epoch": 0.7483917513377022, - "grad_norm": 2.138255346139223, - "learning_rate": 6.279784702508415e-07, - "loss": 0.9862, - "step": 6224 - }, - { - "epoch": 0.7485119942283412, - "grad_norm": 0.8243830367962589, - "learning_rate": 6.274118012689979e-07, - "loss": 0.877, - "step": 6225 - }, - { - "epoch": 0.7486322371189803, - "grad_norm": 3.783121827945233, - "learning_rate": 6.268453405076943e-07, - "loss": 0.9136, - "step": 6226 - }, - { - "epoch": 0.7487524800096195, - "grad_norm": 2.6280963438016247, - "learning_rate": 6.262790880528592e-07, - "loss": 1.0517, - "step": 6227 - }, - { - "epoch": 0.7488727229002585, - "grad_norm": 3.913269571784581, - "learning_rate": 6.257130439903951e-07, - "loss": 1.019, - "step": 6228 - }, - { - "epoch": 0.7489929657908976, - "grad_norm": 1.8469900982874805, - "learning_rate": 6.251472084061695e-07, - "loss": 1.0338, - "step": 6229 - }, - { - "epoch": 0.7491132086815367, - "grad_norm": 2.1910029021417574, - "learning_rate": 6.245815813860191e-07, - "loss": 1.1279, - "step": 6230 - }, - { - "epoch": 0.7492334515721758, - "grad_norm": 2.329188059354294, - "learning_rate": 6.240161630157495e-07, - "loss": 0.9244, - "step": 6231 - }, - { - "epoch": 0.7493536944628149, - "grad_norm": 2.5150779934994083, - "learning_rate": 6.23450953381133e-07, - "loss": 0.921, - "step": 6232 - }, - { - "epoch": 0.749473937353454, - "grad_norm": 3.0168320211822803, - "learning_rate": 6.228859525679131e-07, - "loss": 0.9099, - "step": 6233 - }, - { - "epoch": 0.7495941802440931, - "grad_norm": 2.24107019319104, - "learning_rate": 6.223211606617986e-07, - "loss": 1.029, - "step": 6234 - }, - { - "epoch": 0.7497144231347321, - "grad_norm": 1.6999168628110495, - "learning_rate": 6.217565777484701e-07, - "loss": 1.0615, - "step": 6235 - }, - { - "epoch": 0.7498346660253713, - "grad_norm": 1.7937798421151798, - "learning_rate": 6.211922039135722e-07, - "loss": 1.0284, - "step": 6236 - }, - { - "epoch": 0.7499549089160104, - "grad_norm": 1.768688211608655, - "learning_rate": 6.206280392427201e-07, - "loss": 1.0332, - "step": 6237 - }, - { - "epoch": 0.7500751518066494, - "grad_norm": 1.510410975303012, - "learning_rate": 6.200640838214983e-07, - "loss": 0.9637, - "step": 6238 - }, - { - "epoch": 0.7501953946972886, - "grad_norm": 1.9037062761040897, - "learning_rate": 6.195003377354578e-07, - "loss": 0.9048, - "step": 6239 - }, - { - "epoch": 0.7503156375879276, - "grad_norm": 4.958356627508417, - "learning_rate": 6.189368010701183e-07, - "loss": 0.9569, - "step": 6240 - }, - { - "epoch": 0.7504358804785667, - "grad_norm": 1.953720917390692, - "learning_rate": 6.183734739109683e-07, - "loss": 0.9857, - "step": 6241 - }, - { - "epoch": 0.7505561233692057, - "grad_norm": 2.3534234373153455, - "learning_rate": 6.178103563434629e-07, - "loss": 0.9212, - "step": 6242 - }, - { - "epoch": 0.7506763662598449, - "grad_norm": 1.5978572504501654, - "learning_rate": 6.172474484530283e-07, - "loss": 1.0675, - "step": 6243 - }, - { - "epoch": 0.750796609150484, - "grad_norm": 1.86055218273161, - "learning_rate": 6.166847503250563e-07, - "loss": 0.9829, - "step": 6244 - }, - { - "epoch": 0.750916852041123, - "grad_norm": 2.4274081120646134, - "learning_rate": 6.161222620449078e-07, - "loss": 1.0229, - "step": 6245 - }, - { - "epoch": 0.7510370949317622, - "grad_norm": 2.119791941599785, - "learning_rate": 6.155599836979117e-07, - "loss": 1.0401, - "step": 6246 - }, - { - "epoch": 0.7511573378224012, - "grad_norm": 2.184601962150594, - "learning_rate": 6.149979153693649e-07, - "loss": 1.0398, - "step": 6247 - }, - { - "epoch": 0.7512775807130403, - "grad_norm": 2.833229306125155, - "learning_rate": 6.144360571445343e-07, - "loss": 0.9889, - "step": 6248 - }, - { - "epoch": 0.7513978236036795, - "grad_norm": 1.668246172451966, - "learning_rate": 6.138744091086509e-07, - "loss": 1.0259, - "step": 6249 - }, - { - "epoch": 0.7515180664943185, - "grad_norm": 2.396910836997563, - "learning_rate": 6.133129713469183e-07, - "loss": 0.9651, - "step": 6250 - }, - { - "epoch": 0.7516383093849576, - "grad_norm": 2.2925729751204087, - "learning_rate": 6.127517439445053e-07, - "loss": 0.8699, - "step": 6251 - }, - { - "epoch": 0.7517585522755967, - "grad_norm": 2.046662996684376, - "learning_rate": 6.121907269865498e-07, - "loss": 1.0502, - "step": 6252 - }, - { - "epoch": 0.7518787951662358, - "grad_norm": 0.9587174178036846, - "learning_rate": 6.116299205581577e-07, - "loss": 0.9604, - "step": 6253 - }, - { - "epoch": 0.7519990380568748, - "grad_norm": 3.1678340332888935, - "learning_rate": 6.110693247444018e-07, - "loss": 0.9149, - "step": 6254 - }, - { - "epoch": 0.752119280947514, - "grad_norm": 5.148159638983336, - "learning_rate": 6.105089396303258e-07, - "loss": 1.0542, - "step": 6255 - }, - { - "epoch": 0.7522395238381531, - "grad_norm": 1.837671962139255, - "learning_rate": 6.099487653009383e-07, - "loss": 0.9891, - "step": 6256 - }, - { - "epoch": 0.7523597667287921, - "grad_norm": 1.8272484528220168, - "learning_rate": 6.093888018412192e-07, - "loss": 1.0621, - "step": 6257 - }, - { - "epoch": 0.7524800096194313, - "grad_norm": 0.7465385189811513, - "learning_rate": 6.088290493361125e-07, - "loss": 0.8098, - "step": 6258 - }, - { - "epoch": 0.7526002525100703, - "grad_norm": 1.8776684528267615, - "learning_rate": 6.082695078705322e-07, - "loss": 0.9437, - "step": 6259 - }, - { - "epoch": 0.7527204954007094, - "grad_norm": 2.4805956229856148, - "learning_rate": 6.077101775293618e-07, - "loss": 0.9124, - "step": 6260 - }, - { - "epoch": 0.7528407382913486, - "grad_norm": 2.4249235593408383, - "learning_rate": 6.071510583974504e-07, - "loss": 1.0596, - "step": 6261 - }, - { - "epoch": 0.7529609811819876, - "grad_norm": 2.353299231493781, - "learning_rate": 6.065921505596161e-07, - "loss": 0.9441, - "step": 6262 - }, - { - "epoch": 0.7530812240726267, - "grad_norm": 1.7725762784226067, - "learning_rate": 6.060334541006445e-07, - "loss": 1.0013, - "step": 6263 - }, - { - "epoch": 0.7532014669632658, - "grad_norm": 1.470528601110485, - "learning_rate": 6.05474969105289e-07, - "loss": 0.9149, - "step": 6264 - }, - { - "epoch": 0.7533217098539049, - "grad_norm": 2.017997114725763, - "learning_rate": 6.049166956582725e-07, - "loss": 0.9607, - "step": 6265 - }, - { - "epoch": 0.753441952744544, - "grad_norm": 4.495705356608948, - "learning_rate": 6.043586338442841e-07, - "loss": 1.0976, - "step": 6266 - }, - { - "epoch": 0.7535621956351831, - "grad_norm": 1.6465666943806374, - "learning_rate": 6.038007837479815e-07, - "loss": 0.9591, - "step": 6267 - }, - { - "epoch": 0.7536824385258222, - "grad_norm": 1.936268938340129, - "learning_rate": 6.032431454539897e-07, - "loss": 0.8647, - "step": 6268 - }, - { - "epoch": 0.7538026814164612, - "grad_norm": 1.7316577692816009, - "learning_rate": 6.026857190469014e-07, - "loss": 1.0383, - "step": 6269 - }, - { - "epoch": 0.7539229243071004, - "grad_norm": 2.142662470921268, - "learning_rate": 6.0212850461128e-07, - "loss": 0.9737, - "step": 6270 - }, - { - "epoch": 0.7540431671977395, - "grad_norm": 2.295485199312435, - "learning_rate": 6.015715022316516e-07, - "loss": 0.9814, - "step": 6271 - }, - { - "epoch": 0.7541634100883785, - "grad_norm": 2.650677838994025, - "learning_rate": 6.010147119925154e-07, - "loss": 1.014, - "step": 6272 - }, - { - "epoch": 0.7542836529790176, - "grad_norm": 3.7390356627990466, - "learning_rate": 6.004581339783348e-07, - "loss": 0.8877, - "step": 6273 - }, - { - "epoch": 0.7544038958696567, - "grad_norm": 4.613700879914753, - "learning_rate": 5.999017682735425e-07, - "loss": 0.9063, - "step": 6274 - }, - { - "epoch": 0.7545241387602958, - "grad_norm": 1.733572972066997, - "learning_rate": 5.993456149625387e-07, - "loss": 0.8882, - "step": 6275 - }, - { - "epoch": 0.7546443816509348, - "grad_norm": 1.7922085083160033, - "learning_rate": 5.987896741296909e-07, - "loss": 1.0473, - "step": 6276 - }, - { - "epoch": 0.754764624541574, - "grad_norm": 2.5057488147253624, - "learning_rate": 5.982339458593361e-07, - "loss": 1.0116, - "step": 6277 - }, - { - "epoch": 0.7548848674322131, - "grad_norm": 1.6236510913728088, - "learning_rate": 5.976784302357767e-07, - "loss": 1.069, - "step": 6278 - }, - { - "epoch": 0.7550051103228521, - "grad_norm": 3.0588905706869443, - "learning_rate": 5.971231273432855e-07, - "loss": 0.9571, - "step": 6279 - }, - { - "epoch": 0.7551253532134913, - "grad_norm": 0.8299840537228436, - "learning_rate": 5.965680372661e-07, - "loss": 0.8149, - "step": 6280 - }, - { - "epoch": 0.7552455961041303, - "grad_norm": 1.8055184173024947, - "learning_rate": 5.960131600884266e-07, - "loss": 0.7954, - "step": 6281 - }, - { - "epoch": 0.7553658389947694, - "grad_norm": 2.172713522651726, - "learning_rate": 5.954584958944413e-07, - "loss": 0.9861, - "step": 6282 - }, - { - "epoch": 0.7554860818854086, - "grad_norm": 2.2270737445887754, - "learning_rate": 5.949040447682854e-07, - "loss": 1.0371, - "step": 6283 - }, - { - "epoch": 0.7556063247760476, - "grad_norm": 2.2039187469182204, - "learning_rate": 5.943498067940686e-07, - "loss": 0.9218, - "step": 6284 - }, - { - "epoch": 0.7557265676666867, - "grad_norm": 1.7086604743281715, - "learning_rate": 5.937957820558686e-07, - "loss": 1.0458, - "step": 6285 - }, - { - "epoch": 0.7558468105573258, - "grad_norm": 0.8615400611520684, - "learning_rate": 5.932419706377296e-07, - "loss": 0.9049, - "step": 6286 - }, - { - "epoch": 0.7559670534479649, - "grad_norm": 1.9877000720010325, - "learning_rate": 5.92688372623666e-07, - "loss": 0.9692, - "step": 6287 - }, - { - "epoch": 0.7560872963386039, - "grad_norm": 2.025224993415867, - "learning_rate": 5.921349880976574e-07, - "loss": 0.9616, - "step": 6288 - }, - { - "epoch": 0.7562075392292431, - "grad_norm": 1.7622965269122814, - "learning_rate": 5.915818171436515e-07, - "loss": 1.0419, - "step": 6289 - }, - { - "epoch": 0.7563277821198822, - "grad_norm": 2.7815354447205625, - "learning_rate": 5.910288598455642e-07, - "loss": 0.9758, - "step": 6290 - }, - { - "epoch": 0.7564480250105212, - "grad_norm": 3.732561509072067, - "learning_rate": 5.90476116287278e-07, - "loss": 0.9648, - "step": 6291 - }, - { - "epoch": 0.7565682679011604, - "grad_norm": 1.6642996748848013, - "learning_rate": 5.899235865526456e-07, - "loss": 0.9108, - "step": 6292 - }, - { - "epoch": 0.7566885107917994, - "grad_norm": 1.6223323640200622, - "learning_rate": 5.893712707254825e-07, - "loss": 1.0441, - "step": 6293 - }, - { - "epoch": 0.7568087536824385, - "grad_norm": 2.6936502389512724, - "learning_rate": 5.888191688895769e-07, - "loss": 0.8903, - "step": 6294 - }, - { - "epoch": 0.7569289965730777, - "grad_norm": 2.157171522399362, - "learning_rate": 5.882672811286813e-07, - "loss": 0.8462, - "step": 6295 - }, - { - "epoch": 0.7570492394637167, - "grad_norm": 2.8932793055234933, - "learning_rate": 5.877156075265166e-07, - "loss": 0.9278, - "step": 6296 - }, - { - "epoch": 0.7571694823543558, - "grad_norm": 2.695232180075884, - "learning_rate": 5.871641481667715e-07, - "loss": 0.9277, - "step": 6297 - }, - { - "epoch": 0.7572897252449949, - "grad_norm": 1.9527974578869005, - "learning_rate": 5.866129031331011e-07, - "loss": 1.0702, - "step": 6298 - }, - { - "epoch": 0.757409968135634, - "grad_norm": 3.87929801076631, - "learning_rate": 5.8606187250913e-07, - "loss": 1.0642, - "step": 6299 - }, - { - "epoch": 0.757530211026273, - "grad_norm": 1.9692479064085688, - "learning_rate": 5.855110563784482e-07, - "loss": 1.0646, - "step": 6300 - }, - { - "epoch": 0.7576504539169122, - "grad_norm": 1.4402460323862452, - "learning_rate": 5.849604548246156e-07, - "loss": 0.8726, - "step": 6301 - }, - { - "epoch": 0.7577706968075513, - "grad_norm": 1.7246881728353292, - "learning_rate": 5.844100679311565e-07, - "loss": 1.0291, - "step": 6302 - }, - { - "epoch": 0.7578909396981903, - "grad_norm": 2.1548025829406634, - "learning_rate": 5.838598957815637e-07, - "loss": 0.9885, - "step": 6303 - }, - { - "epoch": 0.7580111825888295, - "grad_norm": 1.8209395085970665, - "learning_rate": 5.833099384592996e-07, - "loss": 1.0876, - "step": 6304 - }, - { - "epoch": 0.7581314254794685, - "grad_norm": 2.238408962154648, - "learning_rate": 5.827601960477913e-07, - "loss": 0.9371, - "step": 6305 - }, - { - "epoch": 0.7582516683701076, - "grad_norm": 1.9768368979106905, - "learning_rate": 5.822106686304344e-07, - "loss": 0.9389, - "step": 6306 - }, - { - "epoch": 0.7583719112607467, - "grad_norm": 2.3604090363619625, - "learning_rate": 5.816613562905919e-07, - "loss": 0.8046, - "step": 6307 - }, - { - "epoch": 0.7584921541513858, - "grad_norm": 1.5453884032829233, - "learning_rate": 5.811122591115933e-07, - "loss": 0.9387, - "step": 6308 - }, - { - "epoch": 0.7586123970420249, - "grad_norm": 2.9373594209837504, - "learning_rate": 5.805633771767376e-07, - "loss": 0.948, - "step": 6309 - }, - { - "epoch": 0.7587326399326639, - "grad_norm": 2.2640525886102822, - "learning_rate": 5.800147105692888e-07, - "loss": 1.0046, - "step": 6310 - }, - { - "epoch": 0.7588528828233031, - "grad_norm": 1.8599401945323237, - "learning_rate": 5.794662593724795e-07, - "loss": 1.0269, - "step": 6311 - }, - { - "epoch": 0.7589731257139422, - "grad_norm": 1.8576285230242409, - "learning_rate": 5.789180236695091e-07, - "loss": 0.9802, - "step": 6312 - }, - { - "epoch": 0.7590933686045812, - "grad_norm": 1.7995804032713112, - "learning_rate": 5.78370003543544e-07, - "loss": 1.0854, - "step": 6313 - }, - { - "epoch": 0.7592136114952204, - "grad_norm": 1.9026807294663155, - "learning_rate": 5.778221990777203e-07, - "loss": 1.0644, - "step": 6314 - }, - { - "epoch": 0.7593338543858594, - "grad_norm": 2.131354871390279, - "learning_rate": 5.772746103551372e-07, - "loss": 1.058, - "step": 6315 - }, - { - "epoch": 0.7594540972764985, - "grad_norm": 1.5310155728007226, - "learning_rate": 5.767272374588648e-07, - "loss": 0.9497, - "step": 6316 - }, - { - "epoch": 0.7595743401671377, - "grad_norm": 1.5914053780689577, - "learning_rate": 5.76180080471939e-07, - "loss": 1.0145, - "step": 6317 - }, - { - "epoch": 0.7596945830577767, - "grad_norm": 1.9748230770395383, - "learning_rate": 5.756331394773631e-07, - "loss": 0.9494, - "step": 6318 - }, - { - "epoch": 0.7598148259484158, - "grad_norm": 7.290138941985744, - "learning_rate": 5.750864145581071e-07, - "loss": 0.9951, - "step": 6319 - }, - { - "epoch": 0.7599350688390549, - "grad_norm": 2.0470978127848807, - "learning_rate": 5.745399057971085e-07, - "loss": 1.0834, - "step": 6320 - }, - { - "epoch": 0.760055311729694, - "grad_norm": 3.090830803200996, - "learning_rate": 5.739936132772738e-07, - "loss": 0.9861, - "step": 6321 - }, - { - "epoch": 0.760175554620333, - "grad_norm": 2.0078054940068064, - "learning_rate": 5.734475370814733e-07, - "loss": 0.9751, - "step": 6322 - }, - { - "epoch": 0.7602957975109722, - "grad_norm": 1.617967205700766, - "learning_rate": 5.729016772925483e-07, - "loss": 1.0082, - "step": 6323 - }, - { - "epoch": 0.7604160404016113, - "grad_norm": 1.726789502717206, - "learning_rate": 5.723560339933038e-07, - "loss": 0.937, - "step": 6324 - }, - { - "epoch": 0.7605362832922503, - "grad_norm": 2.7105774214326672, - "learning_rate": 5.71810607266513e-07, - "loss": 0.8769, - "step": 6325 - }, - { - "epoch": 0.7606565261828895, - "grad_norm": 2.388791646629287, - "learning_rate": 5.712653971949184e-07, - "loss": 0.8363, - "step": 6326 - }, - { - "epoch": 0.7607767690735285, - "grad_norm": 2.5766793561707675, - "learning_rate": 5.707204038612268e-07, - "loss": 0.9999, - "step": 6327 - }, - { - "epoch": 0.7608970119641676, - "grad_norm": 2.4414854479341805, - "learning_rate": 5.701756273481138e-07, - "loss": 0.9608, - "step": 6328 - }, - { - "epoch": 0.7610172548548068, - "grad_norm": 1.4366997689035128, - "learning_rate": 5.696310677382212e-07, - "loss": 0.9677, - "step": 6329 - }, - { - "epoch": 0.7611374977454458, - "grad_norm": 0.8400264452563448, - "learning_rate": 5.690867251141576e-07, - "loss": 0.8801, - "step": 6330 - }, - { - "epoch": 0.7612577406360849, - "grad_norm": 2.742823948049064, - "learning_rate": 5.685425995585013e-07, - "loss": 1.1469, - "step": 6331 - }, - { - "epoch": 0.761377983526724, - "grad_norm": 0.7846868829637393, - "learning_rate": 5.679986911537935e-07, - "loss": 0.8527, - "step": 6332 - }, - { - "epoch": 0.7614982264173631, - "grad_norm": 2.2227039834848594, - "learning_rate": 5.674549999825462e-07, - "loss": 0.9122, - "step": 6333 - }, - { - "epoch": 0.7616184693080021, - "grad_norm": 0.9860439382129722, - "learning_rate": 5.669115261272363e-07, - "loss": 1.0054, - "step": 6334 - }, - { - "epoch": 0.7617387121986413, - "grad_norm": 2.636377654441947, - "learning_rate": 5.663682696703081e-07, - "loss": 0.9585, - "step": 6335 - }, - { - "epoch": 0.7618589550892804, - "grad_norm": 4.376791886811501, - "learning_rate": 5.658252306941746e-07, - "loss": 1.0598, - "step": 6336 - }, - { - "epoch": 0.7619791979799194, - "grad_norm": 2.660302963356249, - "learning_rate": 5.65282409281212e-07, - "loss": 0.9934, - "step": 6337 - }, - { - "epoch": 0.7620994408705585, - "grad_norm": 1.8764933043720537, - "learning_rate": 5.64739805513768e-07, - "loss": 0.9224, - "step": 6338 - }, - { - "epoch": 0.7622196837611976, - "grad_norm": 0.8210939227683832, - "learning_rate": 5.641974194741541e-07, - "loss": 0.8184, - "step": 6339 - }, - { - "epoch": 0.7623399266518367, - "grad_norm": 0.7828822822457467, - "learning_rate": 5.636552512446502e-07, - "loss": 0.8825, - "step": 6340 - }, - { - "epoch": 0.7624601695424758, - "grad_norm": 1.7032231293852549, - "learning_rate": 5.631133009075027e-07, - "loss": 1.0124, - "step": 6341 - }, - { - "epoch": 0.7625804124331149, - "grad_norm": 1.8720859771693998, - "learning_rate": 5.625715685449242e-07, - "loss": 0.9237, - "step": 6342 - }, - { - "epoch": 0.762700655323754, - "grad_norm": 1.9561529263429878, - "learning_rate": 5.620300542390966e-07, - "loss": 0.9457, - "step": 6343 - }, - { - "epoch": 0.762820898214393, - "grad_norm": 1.7204631936909713, - "learning_rate": 5.614887580721659e-07, - "loss": 1.0861, - "step": 6344 - }, - { - "epoch": 0.7629411411050322, - "grad_norm": 2.131635180258932, - "learning_rate": 5.609476801262481e-07, - "loss": 0.9734, - "step": 6345 - }, - { - "epoch": 0.7630613839956712, - "grad_norm": 2.2149130727962993, - "learning_rate": 5.604068204834223e-07, - "loss": 0.8767, - "step": 6346 - }, - { - "epoch": 0.7631816268863103, - "grad_norm": 2.2695075081455025, - "learning_rate": 5.598661792257367e-07, - "loss": 0.9809, - "step": 6347 - }, - { - "epoch": 0.7633018697769495, - "grad_norm": 2.0077101388213086, - "learning_rate": 5.593257564352071e-07, - "loss": 1.0006, - "step": 6348 - }, - { - "epoch": 0.7634221126675885, - "grad_norm": 1.5963972956031138, - "learning_rate": 5.58785552193815e-07, - "loss": 0.988, - "step": 6349 - }, - { - "epoch": 0.7635423555582276, - "grad_norm": 2.1957426362485863, - "learning_rate": 5.582455665835086e-07, - "loss": 0.9839, - "step": 6350 - }, - { - "epoch": 0.7636625984488667, - "grad_norm": 2.962384436562512, - "learning_rate": 5.577057996862036e-07, - "loss": 0.9598, - "step": 6351 - }, - { - "epoch": 0.7637828413395058, - "grad_norm": 1.6574160232099, - "learning_rate": 5.571662515837814e-07, - "loss": 0.9883, - "step": 6352 - }, - { - "epoch": 0.7639030842301449, - "grad_norm": 1.9787275195215335, - "learning_rate": 5.566269223580926e-07, - "loss": 1.0673, - "step": 6353 - }, - { - "epoch": 0.764023327120784, - "grad_norm": 1.9344755377702996, - "learning_rate": 5.560878120909511e-07, - "loss": 0.9764, - "step": 6354 - }, - { - "epoch": 0.7641435700114231, - "grad_norm": 0.8765369268756655, - "learning_rate": 5.55548920864141e-07, - "loss": 0.8805, - "step": 6355 - }, - { - "epoch": 0.7642638129020621, - "grad_norm": 1.7585388717324861, - "learning_rate": 5.550102487594113e-07, - "loss": 0.9956, - "step": 6356 - }, - { - "epoch": 0.7643840557927013, - "grad_norm": 1.8896616853944526, - "learning_rate": 5.54471795858477e-07, - "loss": 0.9461, - "step": 6357 - }, - { - "epoch": 0.7645042986833404, - "grad_norm": 1.8762314884866078, - "learning_rate": 5.539335622430235e-07, - "loss": 1.0591, - "step": 6358 - }, - { - "epoch": 0.7646245415739794, - "grad_norm": 2.4527830626662523, - "learning_rate": 5.533955479946975e-07, - "loss": 0.9765, - "step": 6359 - }, - { - "epoch": 0.7647447844646186, - "grad_norm": 0.8683671574280316, - "learning_rate": 5.528577531951173e-07, - "loss": 0.9063, - "step": 6360 - }, - { - "epoch": 0.7648650273552576, - "grad_norm": 2.069720719888894, - "learning_rate": 5.523201779258653e-07, - "loss": 0.9727, - "step": 6361 - }, - { - "epoch": 0.7649852702458967, - "grad_norm": 2.201285602166863, - "learning_rate": 5.517828222684912e-07, - "loss": 1.0709, - "step": 6362 - }, - { - "epoch": 0.7651055131365359, - "grad_norm": 0.7762696018907613, - "learning_rate": 5.512456863045117e-07, - "loss": 0.8372, - "step": 6363 - }, - { - "epoch": 0.7652257560271749, - "grad_norm": 1.8614322597198198, - "learning_rate": 5.507087701154089e-07, - "loss": 0.9674, - "step": 6364 - }, - { - "epoch": 0.765345998917814, - "grad_norm": 2.010277922854927, - "learning_rate": 5.50172073782634e-07, - "loss": 0.9816, - "step": 6365 - }, - { - "epoch": 0.7654662418084531, - "grad_norm": 1.9916449312862259, - "learning_rate": 5.496355973876023e-07, - "loss": 1.0996, - "step": 6366 - }, - { - "epoch": 0.7655864846990922, - "grad_norm": 2.42690370301537, - "learning_rate": 5.490993410116984e-07, - "loss": 0.9397, - "step": 6367 - }, - { - "epoch": 0.7657067275897312, - "grad_norm": 2.0255037458239675, - "learning_rate": 5.485633047362704e-07, - "loss": 0.9286, - "step": 6368 - }, - { - "epoch": 0.7658269704803703, - "grad_norm": 1.948531585844565, - "learning_rate": 5.480274886426341e-07, - "loss": 1.0114, - "step": 6369 - }, - { - "epoch": 0.7659472133710095, - "grad_norm": 1.7801362956296538, - "learning_rate": 5.474918928120744e-07, - "loss": 1.0085, - "step": 6370 - }, - { - "epoch": 0.7660674562616485, - "grad_norm": 8.943459397985658, - "learning_rate": 5.469565173258392e-07, - "loss": 1.1053, - "step": 6371 - }, - { - "epoch": 0.7661876991522876, - "grad_norm": 4.636014658327931, - "learning_rate": 5.464213622651454e-07, - "loss": 0.8735, - "step": 6372 - }, - { - "epoch": 0.7663079420429267, - "grad_norm": 2.2585956838699133, - "learning_rate": 5.458864277111753e-07, - "loss": 1.069, - "step": 6373 - }, - { - "epoch": 0.7664281849335658, - "grad_norm": 2.3964138378155755, - "learning_rate": 5.453517137450769e-07, - "loss": 0.9202, - "step": 6374 - }, - { - "epoch": 0.7665484278242048, - "grad_norm": 1.92193150631092, - "learning_rate": 5.448172204479684e-07, - "loss": 0.9829, - "step": 6375 - }, - { - "epoch": 0.766668670714844, - "grad_norm": 1.5364591005829873, - "learning_rate": 5.442829479009294e-07, - "loss": 0.9759, - "step": 6376 - }, - { - "epoch": 0.7667889136054831, - "grad_norm": 1.9686121816685898, - "learning_rate": 5.437488961850103e-07, - "loss": 0.9453, - "step": 6377 - }, - { - "epoch": 0.7669091564961221, - "grad_norm": 1.7448502791699236, - "learning_rate": 5.432150653812258e-07, - "loss": 0.9877, - "step": 6378 - }, - { - "epoch": 0.7670293993867613, - "grad_norm": 4.597093675755572, - "learning_rate": 5.42681455570557e-07, - "loss": 1.0598, - "step": 6379 - }, - { - "epoch": 0.7671496422774003, - "grad_norm": 1.8212844807666901, - "learning_rate": 5.42148066833954e-07, - "loss": 0.8806, - "step": 6380 - }, - { - "epoch": 0.7672698851680394, - "grad_norm": 2.4297726729695457, - "learning_rate": 5.416148992523289e-07, - "loss": 0.9905, - "step": 6381 - }, - { - "epoch": 0.7673901280586786, - "grad_norm": 1.593148809998093, - "learning_rate": 5.410819529065644e-07, - "loss": 1.0107, - "step": 6382 - }, - { - "epoch": 0.7675103709493176, - "grad_norm": 1.922426470279259, - "learning_rate": 5.405492278775079e-07, - "loss": 0.8809, - "step": 6383 - }, - { - "epoch": 0.7676306138399567, - "grad_norm": 1.935250853411801, - "learning_rate": 5.400167242459732e-07, - "loss": 1.0288, - "step": 6384 - }, - { - "epoch": 0.7677508567305958, - "grad_norm": 2.234616353264438, - "learning_rate": 5.394844420927405e-07, - "loss": 1.0292, - "step": 6385 - }, - { - "epoch": 0.7678710996212349, - "grad_norm": 1.9372258761512822, - "learning_rate": 5.389523814985562e-07, - "loss": 0.9536, - "step": 6386 - }, - { - "epoch": 0.767991342511874, - "grad_norm": 1.7144398832820187, - "learning_rate": 5.384205425441344e-07, - "loss": 0.9865, - "step": 6387 - }, - { - "epoch": 0.7681115854025131, - "grad_norm": 1.7999372842247405, - "learning_rate": 5.378889253101537e-07, - "loss": 1.0739, - "step": 6388 - }, - { - "epoch": 0.7682318282931522, - "grad_norm": 1.6756936074838114, - "learning_rate": 5.373575298772617e-07, - "loss": 1.0321, - "step": 6389 - }, - { - "epoch": 0.7683520711837912, - "grad_norm": 0.7455793674699354, - "learning_rate": 5.368263563260689e-07, - "loss": 0.8548, - "step": 6390 - }, - { - "epoch": 0.7684723140744304, - "grad_norm": 1.9445448743527576, - "learning_rate": 5.362954047371537e-07, - "loss": 0.8679, - "step": 6391 - }, - { - "epoch": 0.7685925569650695, - "grad_norm": 5.3236577164515, - "learning_rate": 5.357646751910627e-07, - "loss": 0.953, - "step": 6392 - }, - { - "epoch": 0.7687127998557085, - "grad_norm": 2.203113204166948, - "learning_rate": 5.352341677683061e-07, - "loss": 1.024, - "step": 6393 - }, - { - "epoch": 0.7688330427463477, - "grad_norm": 2.1098642417660103, - "learning_rate": 5.347038825493617e-07, - "loss": 1.0183, - "step": 6394 - }, - { - "epoch": 0.7689532856369867, - "grad_norm": 4.278667255641302, - "learning_rate": 5.341738196146732e-07, - "loss": 0.9049, - "step": 6395 - }, - { - "epoch": 0.7690735285276258, - "grad_norm": 2.2924264399750176, - "learning_rate": 5.336439790446503e-07, - "loss": 0.9615, - "step": 6396 - }, - { - "epoch": 0.769193771418265, - "grad_norm": 1.8872520688149577, - "learning_rate": 5.331143609196711e-07, - "loss": 0.8621, - "step": 6397 - }, - { - "epoch": 0.769314014308904, - "grad_norm": 1.9575239159108415, - "learning_rate": 5.325849653200758e-07, - "loss": 0.9985, - "step": 6398 - }, - { - "epoch": 0.7694342571995431, - "grad_norm": 1.881821747643699, - "learning_rate": 5.32055792326175e-07, - "loss": 0.989, - "step": 6399 - }, - { - "epoch": 0.7695545000901821, - "grad_norm": 1.9078866780174908, - "learning_rate": 5.315268420182437e-07, - "loss": 0.9567, - "step": 6400 - }, - { - "epoch": 0.7696747429808213, - "grad_norm": 1.6548457400526375, - "learning_rate": 5.309981144765221e-07, - "loss": 0.9915, - "step": 6401 - }, - { - "epoch": 0.7697949858714603, - "grad_norm": 2.572271371659773, - "learning_rate": 5.304696097812196e-07, - "loss": 0.9847, - "step": 6402 - }, - { - "epoch": 0.7699152287620994, - "grad_norm": 3.2954237215998208, - "learning_rate": 5.299413280125078e-07, - "loss": 0.8296, - "step": 6403 - }, - { - "epoch": 0.7700354716527386, - "grad_norm": 1.9499104405890952, - "learning_rate": 5.294132692505284e-07, - "loss": 0.9553, - "step": 6404 - }, - { - "epoch": 0.7701557145433776, - "grad_norm": 2.756621106135286, - "learning_rate": 5.288854335753861e-07, - "loss": 1.0171, - "step": 6405 - }, - { - "epoch": 0.7702759574340167, - "grad_norm": 1.6439554470457336, - "learning_rate": 5.283578210671551e-07, - "loss": 0.9936, - "step": 6406 - }, - { - "epoch": 0.7703962003246558, - "grad_norm": 2.840979263826161, - "learning_rate": 5.278304318058719e-07, - "loss": 0.9928, - "step": 6407 - }, - { - "epoch": 0.7705164432152949, - "grad_norm": 1.6854300342433648, - "learning_rate": 5.273032658715411e-07, - "loss": 1.0197, - "step": 6408 - }, - { - "epoch": 0.7706366861059339, - "grad_norm": 1.7500667559289045, - "learning_rate": 5.267763233441347e-07, - "loss": 0.9984, - "step": 6409 - }, - { - "epoch": 0.7707569289965731, - "grad_norm": 2.53929179403364, - "learning_rate": 5.26249604303588e-07, - "loss": 0.9224, - "step": 6410 - }, - { - "epoch": 0.7708771718872122, - "grad_norm": 2.2689445643371626, - "learning_rate": 5.257231088298057e-07, - "loss": 1.0076, - "step": 6411 - }, - { - "epoch": 0.7709974147778512, - "grad_norm": 0.8446770625963786, - "learning_rate": 5.25196837002655e-07, - "loss": 0.8014, - "step": 6412 - }, - { - "epoch": 0.7711176576684904, - "grad_norm": 3.215353349457138, - "learning_rate": 5.24670788901971e-07, - "loss": 0.9206, - "step": 6413 - }, - { - "epoch": 0.7712379005591294, - "grad_norm": 2.194586257000737, - "learning_rate": 5.241449646075557e-07, - "loss": 0.9073, - "step": 6414 - }, - { - "epoch": 0.7713581434497685, - "grad_norm": 1.9015175907345492, - "learning_rate": 5.236193641991762e-07, - "loss": 0.9539, - "step": 6415 - }, - { - "epoch": 0.7714783863404077, - "grad_norm": 5.573939187431591, - "learning_rate": 5.23093987756565e-07, - "loss": 0.9355, - "step": 6416 - }, - { - "epoch": 0.7715986292310467, - "grad_norm": 1.8473649874071982, - "learning_rate": 5.225688353594217e-07, - "loss": 0.9902, - "step": 6417 - }, - { - "epoch": 0.7717188721216858, - "grad_norm": 2.1407252378732267, - "learning_rate": 5.220439070874108e-07, - "loss": 1.0118, - "step": 6418 - }, - { - "epoch": 0.7718391150123249, - "grad_norm": 1.6382324211473298, - "learning_rate": 5.215192030201652e-07, - "loss": 0.9451, - "step": 6419 - }, - { - "epoch": 0.771959357902964, - "grad_norm": 1.8664071595232454, - "learning_rate": 5.209947232372798e-07, - "loss": 1.0953, - "step": 6420 - }, - { - "epoch": 0.772079600793603, - "grad_norm": 2.5947029477366783, - "learning_rate": 5.204704678183196e-07, - "loss": 1.0324, - "step": 6421 - }, - { - "epoch": 0.7721998436842422, - "grad_norm": 2.2084872953858294, - "learning_rate": 5.19946436842813e-07, - "loss": 1.0765, - "step": 6422 - }, - { - "epoch": 0.7723200865748813, - "grad_norm": 1.4634747020384293, - "learning_rate": 5.194226303902546e-07, - "loss": 0.9169, - "step": 6423 - }, - { - "epoch": 0.7724403294655203, - "grad_norm": 1.6721781518197574, - "learning_rate": 5.188990485401072e-07, - "loss": 0.9422, - "step": 6424 - }, - { - "epoch": 0.7725605723561595, - "grad_norm": 1.9749527358182069, - "learning_rate": 5.183756913717954e-07, - "loss": 1.0899, - "step": 6425 - }, - { - "epoch": 0.7726808152467985, - "grad_norm": 1.7736021125118424, - "learning_rate": 5.178525589647136e-07, - "loss": 0.9669, - "step": 6426 - }, - { - "epoch": 0.7728010581374376, - "grad_norm": 2.1005588105887307, - "learning_rate": 5.173296513982197e-07, - "loss": 1.015, - "step": 6427 - }, - { - "epoch": 0.7729213010280768, - "grad_norm": 1.895042114595047, - "learning_rate": 5.168069687516398e-07, - "loss": 0.8849, - "step": 6428 - }, - { - "epoch": 0.7730415439187158, - "grad_norm": 1.649234380245909, - "learning_rate": 5.16284511104263e-07, - "loss": 0.9477, - "step": 6429 - }, - { - "epoch": 0.7731617868093549, - "grad_norm": 3.285526939642736, - "learning_rate": 5.157622785353457e-07, - "loss": 1.0292, - "step": 6430 - }, - { - "epoch": 0.7732820296999939, - "grad_norm": 0.6647702652767803, - "learning_rate": 5.152402711241113e-07, - "loss": 0.8526, - "step": 6431 - }, - { - "epoch": 0.7734022725906331, - "grad_norm": 2.9727329239249367, - "learning_rate": 5.147184889497465e-07, - "loss": 1.061, - "step": 6432 - }, - { - "epoch": 0.7735225154812722, - "grad_norm": 2.5410940640516237, - "learning_rate": 5.141969320914072e-07, - "loss": 1.0284, - "step": 6433 - }, - { - "epoch": 0.7736427583719112, - "grad_norm": 2.945213093585246, - "learning_rate": 5.136756006282113e-07, - "loss": 0.8633, - "step": 6434 - }, - { - "epoch": 0.7737630012625504, - "grad_norm": 2.0569919673705352, - "learning_rate": 5.131544946392446e-07, - "loss": 1.0764, - "step": 6435 - }, - { - "epoch": 0.7738832441531894, - "grad_norm": 3.5254195700854676, - "learning_rate": 5.126336142035592e-07, - "loss": 0.8708, - "step": 6436 - }, - { - "epoch": 0.7740034870438285, - "grad_norm": 2.226536586677606, - "learning_rate": 5.121129594001721e-07, - "loss": 0.9494, - "step": 6437 - }, - { - "epoch": 0.7741237299344677, - "grad_norm": 1.9615782036790068, - "learning_rate": 5.115925303080661e-07, - "loss": 1.0448, - "step": 6438 - }, - { - "epoch": 0.7742439728251067, - "grad_norm": 1.9988589011770128, - "learning_rate": 5.110723270061899e-07, - "loss": 1.0249, - "step": 6439 - }, - { - "epoch": 0.7743642157157458, - "grad_norm": 2.107679123331222, - "learning_rate": 5.105523495734572e-07, - "loss": 1.0254, - "step": 6440 - }, - { - "epoch": 0.7744844586063849, - "grad_norm": 1.5081280788195754, - "learning_rate": 5.100325980887499e-07, - "loss": 0.9735, - "step": 6441 - }, - { - "epoch": 0.774604701497024, - "grad_norm": 1.7744498031451428, - "learning_rate": 5.095130726309116e-07, - "loss": 1.0581, - "step": 6442 - }, - { - "epoch": 0.774724944387663, - "grad_norm": 0.8659545794330394, - "learning_rate": 5.089937732787559e-07, - "loss": 0.8971, - "step": 6443 - }, - { - "epoch": 0.7748451872783022, - "grad_norm": 2.3986693031051987, - "learning_rate": 5.084747001110592e-07, - "loss": 0.8963, - "step": 6444 - }, - { - "epoch": 0.7749654301689413, - "grad_norm": 1.648543512632655, - "learning_rate": 5.07955853206564e-07, - "loss": 0.9321, - "step": 6445 - }, - { - "epoch": 0.7750856730595803, - "grad_norm": 3.004232599713876, - "learning_rate": 5.074372326439807e-07, - "loss": 0.9438, - "step": 6446 - }, - { - "epoch": 0.7752059159502195, - "grad_norm": 2.512393368408313, - "learning_rate": 5.069188385019814e-07, - "loss": 0.9635, - "step": 6447 - }, - { - "epoch": 0.7753261588408585, - "grad_norm": 3.626730012784564, - "learning_rate": 5.064006708592077e-07, - "loss": 0.8545, - "step": 6448 - }, - { - "epoch": 0.7754464017314976, - "grad_norm": 2.2182612350903335, - "learning_rate": 5.058827297942641e-07, - "loss": 0.9823, - "step": 6449 - }, - { - "epoch": 0.7755666446221368, - "grad_norm": 2.4239803884235354, - "learning_rate": 5.053650153857237e-07, - "loss": 0.9668, - "step": 6450 - }, - { - "epoch": 0.7756868875127758, - "grad_norm": 1.7464107763268097, - "learning_rate": 5.048475277121214e-07, - "loss": 0.9288, - "step": 6451 - }, - { - "epoch": 0.7758071304034149, - "grad_norm": 1.5882543919023857, - "learning_rate": 5.043302668519598e-07, - "loss": 0.9989, - "step": 6452 - }, - { - "epoch": 0.775927373294054, - "grad_norm": 1.800195164166751, - "learning_rate": 5.038132328837079e-07, - "loss": 0.9588, - "step": 6453 - }, - { - "epoch": 0.7760476161846931, - "grad_norm": 2.1110399245367866, - "learning_rate": 5.032964258857993e-07, - "loss": 0.9666, - "step": 6454 - }, - { - "epoch": 0.7761678590753321, - "grad_norm": 1.8993490409018572, - "learning_rate": 5.027798459366329e-07, - "loss": 0.9158, - "step": 6455 - }, - { - "epoch": 0.7762881019659713, - "grad_norm": 1.5038375244290791, - "learning_rate": 5.02263493114573e-07, - "loss": 0.8666, - "step": 6456 - }, - { - "epoch": 0.7764083448566104, - "grad_norm": 2.2115105738843077, - "learning_rate": 5.017473674979502e-07, - "loss": 0.9966, - "step": 6457 - }, - { - "epoch": 0.7765285877472494, - "grad_norm": 0.7698084917582777, - "learning_rate": 5.01231469165061e-07, - "loss": 0.8304, - "step": 6458 - }, - { - "epoch": 0.7766488306378886, - "grad_norm": 0.9454762340647244, - "learning_rate": 5.007157981941663e-07, - "loss": 0.8469, - "step": 6459 - }, - { - "epoch": 0.7767690735285276, - "grad_norm": 0.8964447145685692, - "learning_rate": 5.002003546634928e-07, - "loss": 0.9308, - "step": 6460 - }, - { - "epoch": 0.7768893164191667, - "grad_norm": 1.7720843290709924, - "learning_rate": 4.996851386512331e-07, - "loss": 0.9903, - "step": 6461 - }, - { - "epoch": 0.7770095593098058, - "grad_norm": 1.7435735825200331, - "learning_rate": 4.991701502355444e-07, - "loss": 1.0601, - "step": 6462 - }, - { - "epoch": 0.7771298022004449, - "grad_norm": 1.927815011453067, - "learning_rate": 4.986553894945518e-07, - "loss": 0.9964, - "step": 6463 - }, - { - "epoch": 0.777250045091084, - "grad_norm": 2.3533802800813515, - "learning_rate": 4.981408565063416e-07, - "loss": 1.0952, - "step": 6464 - }, - { - "epoch": 0.777370287981723, - "grad_norm": 1.8673489552686178, - "learning_rate": 4.976265513489701e-07, - "loss": 0.9883, - "step": 6465 - }, - { - "epoch": 0.7774905308723622, - "grad_norm": 2.7261996751956494, - "learning_rate": 4.971124741004562e-07, - "loss": 1.0354, - "step": 6466 - }, - { - "epoch": 0.7776107737630013, - "grad_norm": 2.080393535358849, - "learning_rate": 4.965986248387846e-07, - "loss": 0.9892, - "step": 6467 - }, - { - "epoch": 0.7777310166536403, - "grad_norm": 2.5152800898123253, - "learning_rate": 4.960850036419073e-07, - "loss": 0.9984, - "step": 6468 - }, - { - "epoch": 0.7778512595442795, - "grad_norm": 2.1382183273362068, - "learning_rate": 4.955716105877378e-07, - "loss": 1.0226, - "step": 6469 - }, - { - "epoch": 0.7779715024349185, - "grad_norm": 3.5966685510111325, - "learning_rate": 4.950584457541598e-07, - "loss": 1.0651, - "step": 6470 - }, - { - "epoch": 0.7780917453255576, - "grad_norm": 1.5748288318931563, - "learning_rate": 4.945455092190183e-07, - "loss": 1.0524, - "step": 6471 - }, - { - "epoch": 0.7782119882161967, - "grad_norm": 0.711899912919871, - "learning_rate": 4.940328010601271e-07, - "loss": 0.8076, - "step": 6472 - }, - { - "epoch": 0.7783322311068358, - "grad_norm": 1.7303674810326515, - "learning_rate": 4.935203213552621e-07, - "loss": 0.9883, - "step": 6473 - }, - { - "epoch": 0.7784524739974749, - "grad_norm": 3.4677900755989906, - "learning_rate": 4.930080701821662e-07, - "loss": 0.8928, - "step": 6474 - }, - { - "epoch": 0.778572716888114, - "grad_norm": 1.8081062599157733, - "learning_rate": 4.92496047618548e-07, - "loss": 1.0005, - "step": 6475 - }, - { - "epoch": 0.7786929597787531, - "grad_norm": 2.548829060404522, - "learning_rate": 4.919842537420811e-07, - "loss": 1.0046, - "step": 6476 - }, - { - "epoch": 0.7788132026693921, - "grad_norm": 1.8013153938955033, - "learning_rate": 4.91472688630404e-07, - "loss": 1.0272, - "step": 6477 - }, - { - "epoch": 0.7789334455600313, - "grad_norm": 1.9276142797966676, - "learning_rate": 4.909613523611202e-07, - "loss": 0.9723, - "step": 6478 - }, - { - "epoch": 0.7790536884506704, - "grad_norm": 2.4298980719770435, - "learning_rate": 4.904502450117991e-07, - "loss": 0.9742, - "step": 6479 - }, - { - "epoch": 0.7791739313413094, - "grad_norm": 2.212154417473619, - "learning_rate": 4.899393666599762e-07, - "loss": 0.9553, - "step": 6480 - }, - { - "epoch": 0.7792941742319486, - "grad_norm": 2.1843501710367437, - "learning_rate": 4.894287173831506e-07, - "loss": 0.9495, - "step": 6481 - }, - { - "epoch": 0.7794144171225876, - "grad_norm": 1.8738604579582916, - "learning_rate": 4.889182972587877e-07, - "loss": 1.0748, - "step": 6482 - }, - { - "epoch": 0.7795346600132267, - "grad_norm": 1.6310231018114298, - "learning_rate": 4.884081063643177e-07, - "loss": 0.8946, - "step": 6483 - }, - { - "epoch": 0.7796549029038659, - "grad_norm": 0.8927070975248729, - "learning_rate": 4.878981447771353e-07, - "loss": 0.7997, - "step": 6484 - }, - { - "epoch": 0.7797751457945049, - "grad_norm": 1.5645668093252794, - "learning_rate": 4.873884125746035e-07, - "loss": 0.9608, - "step": 6485 - }, - { - "epoch": 0.779895388685144, - "grad_norm": 2.443267942527621, - "learning_rate": 4.868789098340456e-07, - "loss": 0.9564, - "step": 6486 - }, - { - "epoch": 0.7800156315757831, - "grad_norm": 3.954848565868873, - "learning_rate": 4.863696366327543e-07, - "loss": 0.9405, - "step": 6487 - }, - { - "epoch": 0.7801358744664222, - "grad_norm": 1.7344882000246586, - "learning_rate": 4.85860593047986e-07, - "loss": 1.0104, - "step": 6488 - }, - { - "epoch": 0.7802561173570612, - "grad_norm": 1.9226968140180745, - "learning_rate": 4.853517791569613e-07, - "loss": 0.9741, - "step": 6489 - }, - { - "epoch": 0.7803763602477004, - "grad_norm": 1.5839091893425008, - "learning_rate": 4.848431950368684e-07, - "loss": 0.8856, - "step": 6490 - }, - { - "epoch": 0.7804966031383395, - "grad_norm": 0.7416484682300699, - "learning_rate": 4.843348407648569e-07, - "loss": 0.8183, - "step": 6491 - }, - { - "epoch": 0.7806168460289785, - "grad_norm": 2.0117160241562235, - "learning_rate": 4.838267164180457e-07, - "loss": 1.0578, - "step": 6492 - }, - { - "epoch": 0.7807370889196176, - "grad_norm": 2.4582300218328794, - "learning_rate": 4.833188220735156e-07, - "loss": 1.0751, - "step": 6493 - }, - { - "epoch": 0.7808573318102567, - "grad_norm": 1.8020370639160486, - "learning_rate": 4.828111578083152e-07, - "loss": 0.9681, - "step": 6494 - }, - { - "epoch": 0.7809775747008958, - "grad_norm": 3.40410217323672, - "learning_rate": 4.823037236994556e-07, - "loss": 1.0389, - "step": 6495 - }, - { - "epoch": 0.7810978175915348, - "grad_norm": 0.7775650476187512, - "learning_rate": 4.817965198239136e-07, - "loss": 0.8235, - "step": 6496 - }, - { - "epoch": 0.781218060482174, - "grad_norm": 2.175021081581111, - "learning_rate": 4.812895462586331e-07, - "loss": 0.9648, - "step": 6497 - }, - { - "epoch": 0.7813383033728131, - "grad_norm": 1.790824189349877, - "learning_rate": 4.807828030805207e-07, - "loss": 1.0477, - "step": 6498 - }, - { - "epoch": 0.7814585462634521, - "grad_norm": 2.4030633608449343, - "learning_rate": 4.802762903664495e-07, - "loss": 0.9102, - "step": 6499 - }, - { - "epoch": 0.7815787891540913, - "grad_norm": 4.536995448284459, - "learning_rate": 4.797700081932565e-07, - "loss": 0.9592, - "step": 6500 - }, - { - "epoch": 0.7816990320447303, - "grad_norm": 2.041653561712233, - "learning_rate": 4.792639566377442e-07, - "loss": 1.0462, - "step": 6501 - }, - { - "epoch": 0.7818192749353694, - "grad_norm": 1.855854130928019, - "learning_rate": 4.78758135776681e-07, - "loss": 1.0074, - "step": 6502 - }, - { - "epoch": 0.7819395178260086, - "grad_norm": 2.0180833595684278, - "learning_rate": 4.782525456867989e-07, - "loss": 1.0105, - "step": 6503 - }, - { - "epoch": 0.7820597607166476, - "grad_norm": 1.6514367033111912, - "learning_rate": 4.777471864447959e-07, - "loss": 1.0634, - "step": 6504 - }, - { - "epoch": 0.7821800036072867, - "grad_norm": 1.8148612065487666, - "learning_rate": 4.772420581273344e-07, - "loss": 1.0289, - "step": 6505 - }, - { - "epoch": 0.7823002464979258, - "grad_norm": 2.7412141555968375, - "learning_rate": 4.7673716081104134e-07, - "loss": 0.9954, - "step": 6506 - }, - { - "epoch": 0.7824204893885649, - "grad_norm": 2.0610136976753974, - "learning_rate": 4.762324945725109e-07, - "loss": 1.0706, - "step": 6507 - }, - { - "epoch": 0.782540732279204, - "grad_norm": 1.7029630251514445, - "learning_rate": 4.7572805948829844e-07, - "loss": 0.9839, - "step": 6508 - }, - { - "epoch": 0.7826609751698431, - "grad_norm": 2.1791447883319743, - "learning_rate": 4.7522385563492795e-07, - "loss": 0.9369, - "step": 6509 - }, - { - "epoch": 0.7827812180604822, - "grad_norm": 1.8016336868660041, - "learning_rate": 4.747198830888863e-07, - "loss": 0.9302, - "step": 6510 - }, - { - "epoch": 0.7829014609511212, - "grad_norm": 2.179360423278633, - "learning_rate": 4.742161419266251e-07, - "loss": 0.9156, - "step": 6511 - }, - { - "epoch": 0.7830217038417604, - "grad_norm": 7.191687384170743, - "learning_rate": 4.7371263222456304e-07, - "loss": 0.884, - "step": 6512 - }, - { - "epoch": 0.7831419467323995, - "grad_norm": 0.8186381372809302, - "learning_rate": 4.7320935405908004e-07, - "loss": 0.8752, - "step": 6513 - }, - { - "epoch": 0.7832621896230385, - "grad_norm": 5.361780099630345, - "learning_rate": 4.7270630750652475e-07, - "loss": 1.0544, - "step": 6514 - }, - { - "epoch": 0.7833824325136777, - "grad_norm": 1.953028928301241, - "learning_rate": 4.7220349264320746e-07, - "loss": 1.0347, - "step": 6515 - }, - { - "epoch": 0.7835026754043167, - "grad_norm": 0.7912864315691448, - "learning_rate": 4.71700909545407e-07, - "loss": 0.8248, - "step": 6516 - }, - { - "epoch": 0.7836229182949558, - "grad_norm": 1.8129004989647746, - "learning_rate": 4.711985582893627e-07, - "loss": 0.9982, - "step": 6517 - }, - { - "epoch": 0.783743161185595, - "grad_norm": 1.8265645466867275, - "learning_rate": 4.706964389512811e-07, - "loss": 0.9493, - "step": 6518 - }, - { - "epoch": 0.783863404076234, - "grad_norm": 14.758402354708878, - "learning_rate": 4.701945516073345e-07, - "loss": 1.1003, - "step": 6519 - }, - { - "epoch": 0.7839836469668731, - "grad_norm": 2.226201292487189, - "learning_rate": 4.696928963336577e-07, - "loss": 0.9819, - "step": 6520 - }, - { - "epoch": 0.7841038898575122, - "grad_norm": 0.8682157932312567, - "learning_rate": 4.6919147320635224e-07, - "loss": 0.8665, - "step": 6521 - }, - { - "epoch": 0.7842241327481513, - "grad_norm": 2.1327358134250294, - "learning_rate": 4.6869028230148286e-07, - "loss": 0.9534, - "step": 6522 - }, - { - "epoch": 0.7843443756387903, - "grad_norm": 3.1916075027865287, - "learning_rate": 4.6818932369507957e-07, - "loss": 0.8341, - "step": 6523 - }, - { - "epoch": 0.7844646185294295, - "grad_norm": 2.811761377620394, - "learning_rate": 4.676885974631386e-07, - "loss": 1.1113, - "step": 6524 - }, - { - "epoch": 0.7845848614200686, - "grad_norm": 1.8925950686362611, - "learning_rate": 4.67188103681619e-07, - "loss": 1.0351, - "step": 6525 - }, - { - "epoch": 0.7847051043107076, - "grad_norm": 2.316659022622796, - "learning_rate": 4.666878424264453e-07, - "loss": 0.9222, - "step": 6526 - }, - { - "epoch": 0.7848253472013467, - "grad_norm": 2.403134397332081, - "learning_rate": 4.661878137735069e-07, - "loss": 0.9652, - "step": 6527 - }, - { - "epoch": 0.7849455900919858, - "grad_norm": 2.402051855202503, - "learning_rate": 4.656880177986571e-07, - "loss": 0.9817, - "step": 6528 - }, - { - "epoch": 0.7850658329826249, - "grad_norm": 1.7486944778495677, - "learning_rate": 4.6518845457771607e-07, - "loss": 1.0409, - "step": 6529 - }, - { - "epoch": 0.7851860758732639, - "grad_norm": 1.8761102568185448, - "learning_rate": 4.646891241864652e-07, - "loss": 1.0248, - "step": 6530 - }, - { - "epoch": 0.7853063187639031, - "grad_norm": 2.6603354955212826, - "learning_rate": 4.6419002670065397e-07, - "loss": 0.9634, - "step": 6531 - }, - { - "epoch": 0.7854265616545422, - "grad_norm": 2.0906859795268717, - "learning_rate": 4.6369116219599445e-07, - "loss": 1.0767, - "step": 6532 - }, - { - "epoch": 0.7855468045451812, - "grad_norm": 1.6195483091332765, - "learning_rate": 4.631925307481637e-07, - "loss": 1.02, - "step": 6533 - }, - { - "epoch": 0.7856670474358204, - "grad_norm": 2.2073693480660954, - "learning_rate": 4.6269413243280533e-07, - "loss": 0.9753, - "step": 6534 - }, - { - "epoch": 0.7857872903264594, - "grad_norm": 4.176226114925805, - "learning_rate": 4.621959673255236e-07, - "loss": 0.9859, - "step": 6535 - }, - { - "epoch": 0.7859075332170985, - "grad_norm": 1.9707224214514583, - "learning_rate": 4.6169803550189135e-07, - "loss": 1.1324, - "step": 6536 - }, - { - "epoch": 0.7860277761077377, - "grad_norm": 1.6517406472203593, - "learning_rate": 4.6120033703744355e-07, - "loss": 0.9944, - "step": 6537 - }, - { - "epoch": 0.7861480189983767, - "grad_norm": 2.604712701136662, - "learning_rate": 4.607028720076822e-07, - "loss": 1.0108, - "step": 6538 - }, - { - "epoch": 0.7862682618890158, - "grad_norm": 1.85289989731771, - "learning_rate": 4.6020564048807074e-07, - "loss": 0.964, - "step": 6539 - }, - { - "epoch": 0.7863885047796549, - "grad_norm": 1.9862578463054306, - "learning_rate": 4.5970864255403883e-07, - "loss": 0.9473, - "step": 6540 - }, - { - "epoch": 0.786508747670294, - "grad_norm": 1.7779031732495805, - "learning_rate": 4.59211878280982e-07, - "loss": 1.0511, - "step": 6541 - }, - { - "epoch": 0.786628990560933, - "grad_norm": 1.9285690834899047, - "learning_rate": 4.587153477442578e-07, - "loss": 0.9344, - "step": 6542 - }, - { - "epoch": 0.7867492334515722, - "grad_norm": 3.6070631145204253, - "learning_rate": 4.582190510191899e-07, - "loss": 1.0468, - "step": 6543 - }, - { - "epoch": 0.7868694763422113, - "grad_norm": 2.2681846103313967, - "learning_rate": 4.5772298818106625e-07, - "loss": 1.0899, - "step": 6544 - }, - { - "epoch": 0.7869897192328503, - "grad_norm": 2.4773577547474495, - "learning_rate": 4.572271593051384e-07, - "loss": 0.9569, - "step": 6545 - }, - { - "epoch": 0.7871099621234895, - "grad_norm": 7.537280189257077, - "learning_rate": 4.567315644666245e-07, - "loss": 0.9993, - "step": 6546 - }, - { - "epoch": 0.7872302050141285, - "grad_norm": 1.9745136241544234, - "learning_rate": 4.5623620374070507e-07, - "loss": 1.0717, - "step": 6547 - }, - { - "epoch": 0.7873504479047676, - "grad_norm": 0.822866732052104, - "learning_rate": 4.557410772025263e-07, - "loss": 0.8587, - "step": 6548 - }, - { - "epoch": 0.7874706907954068, - "grad_norm": 2.1028851172484946, - "learning_rate": 4.5524618492719803e-07, - "loss": 0.8959, - "step": 6549 - }, - { - "epoch": 0.7875909336860458, - "grad_norm": 1.4696814647358407, - "learning_rate": 4.54751526989795e-07, - "loss": 1.0084, - "step": 6550 - }, - { - "epoch": 0.7877111765766849, - "grad_norm": 2.0319990684251827, - "learning_rate": 4.5425710346535775e-07, - "loss": 1.0212, - "step": 6551 - }, - { - "epoch": 0.787831419467324, - "grad_norm": 2.2988814329126095, - "learning_rate": 4.537629144288877e-07, - "loss": 1.0454, - "step": 6552 - }, - { - "epoch": 0.7879516623579631, - "grad_norm": 1.7075909497905641, - "learning_rate": 4.5326895995535477e-07, - "loss": 0.9748, - "step": 6553 - }, - { - "epoch": 0.7880719052486022, - "grad_norm": 2.2463384080406765, - "learning_rate": 4.527752401196907e-07, - "loss": 1.0735, - "step": 6554 - }, - { - "epoch": 0.7881921481392413, - "grad_norm": 2.8049618047605875, - "learning_rate": 4.5228175499679254e-07, - "loss": 0.904, - "step": 6555 - }, - { - "epoch": 0.7883123910298804, - "grad_norm": 0.8618814503274146, - "learning_rate": 4.5178850466152174e-07, - "loss": 0.796, - "step": 6556 - }, - { - "epoch": 0.7884326339205194, - "grad_norm": 1.910813481820698, - "learning_rate": 4.512954891887031e-07, - "loss": 1.0429, - "step": 6557 - }, - { - "epoch": 0.7885528768111585, - "grad_norm": 14.221097312255775, - "learning_rate": 4.5080270865312806e-07, - "loss": 1.0644, - "step": 6558 - }, - { - "epoch": 0.7886731197017977, - "grad_norm": 1.898160058615653, - "learning_rate": 4.5031016312954985e-07, - "loss": 0.9375, - "step": 6559 - }, - { - "epoch": 0.7887933625924367, - "grad_norm": 2.5833766277335974, - "learning_rate": 4.498178526926886e-07, - "loss": 0.9726, - "step": 6560 - }, - { - "epoch": 0.7889136054830758, - "grad_norm": 3.4169024564436405, - "learning_rate": 4.4932577741722635e-07, - "loss": 0.9604, - "step": 6561 - }, - { - "epoch": 0.7890338483737149, - "grad_norm": 1.9593425206560318, - "learning_rate": 4.4883393737780985e-07, - "loss": 0.9815, - "step": 6562 - }, - { - "epoch": 0.789154091264354, - "grad_norm": 1.9369603297593059, - "learning_rate": 4.4834233264905254e-07, - "loss": 1.0135, - "step": 6563 - }, - { - "epoch": 0.789274334154993, - "grad_norm": 2.980487788428299, - "learning_rate": 4.478509633055294e-07, - "loss": 0.9526, - "step": 6564 - }, - { - "epoch": 0.7893945770456322, - "grad_norm": 2.3283394324355453, - "learning_rate": 4.473598294217813e-07, - "loss": 1.0301, - "step": 6565 - }, - { - "epoch": 0.7895148199362713, - "grad_norm": 2.1820087719888703, - "learning_rate": 4.468689310723124e-07, - "loss": 0.9492, - "step": 6566 - }, - { - "epoch": 0.7896350628269103, - "grad_norm": 1.9170010178333938, - "learning_rate": 4.463782683315913e-07, - "loss": 1.0147, - "step": 6567 - }, - { - "epoch": 0.7897553057175495, - "grad_norm": 1.7531320609156213, - "learning_rate": 4.458878412740523e-07, - "loss": 0.9554, - "step": 6568 - }, - { - "epoch": 0.7898755486081885, - "grad_norm": 2.281358167618303, - "learning_rate": 4.453976499740919e-07, - "loss": 1.0046, - "step": 6569 - }, - { - "epoch": 0.7899957914988276, - "grad_norm": 1.6296843721646863, - "learning_rate": 4.4490769450607215e-07, - "loss": 1.0067, - "step": 6570 - }, - { - "epoch": 0.7901160343894668, - "grad_norm": 2.052535638162942, - "learning_rate": 4.4441797494431845e-07, - "loss": 0.9637, - "step": 6571 - }, - { - "epoch": 0.7902362772801058, - "grad_norm": 2.7294576844029006, - "learning_rate": 4.439284913631207e-07, - "loss": 1.0101, - "step": 6572 - }, - { - "epoch": 0.7903565201707449, - "grad_norm": 4.021960059439497, - "learning_rate": 4.434392438367347e-07, - "loss": 1.0666, - "step": 6573 - }, - { - "epoch": 0.790476763061384, - "grad_norm": 2.0948983075354266, - "learning_rate": 4.4295023243937677e-07, - "loss": 0.9694, - "step": 6574 - }, - { - "epoch": 0.7905970059520231, - "grad_norm": 2.017590060203384, - "learning_rate": 4.4246145724523123e-07, - "loss": 1.0378, - "step": 6575 - }, - { - "epoch": 0.7907172488426621, - "grad_norm": 2.192311145995968, - "learning_rate": 4.41972918328444e-07, - "loss": 0.9951, - "step": 6576 - }, - { - "epoch": 0.7908374917333013, - "grad_norm": 2.0004641471153746, - "learning_rate": 4.4148461576312646e-07, - "loss": 1.0077, - "step": 6577 - }, - { - "epoch": 0.7909577346239404, - "grad_norm": 1.361780865774134, - "learning_rate": 4.4099654962335343e-07, - "loss": 0.9776, - "step": 6578 - }, - { - "epoch": 0.7910779775145794, - "grad_norm": 1.6521208772887104, - "learning_rate": 4.405087199831636e-07, - "loss": 0.9743, - "step": 6579 - }, - { - "epoch": 0.7911982204052186, - "grad_norm": 2.23799096799908, - "learning_rate": 4.400211269165619e-07, - "loss": 0.9, - "step": 6580 - }, - { - "epoch": 0.7913184632958576, - "grad_norm": 1.6110590299687417, - "learning_rate": 4.3953377049751416e-07, - "loss": 0.9981, - "step": 6581 - }, - { - "epoch": 0.7914387061864967, - "grad_norm": 2.3871154954869582, - "learning_rate": 4.390466507999537e-07, - "loss": 1.0118, - "step": 6582 - }, - { - "epoch": 0.7915589490771359, - "grad_norm": 2.773252669072206, - "learning_rate": 4.385597678977748e-07, - "loss": 0.9892, - "step": 6583 - }, - { - "epoch": 0.7916791919677749, - "grad_norm": 1.4614169716051713, - "learning_rate": 4.3807312186483726e-07, - "loss": 0.9741, - "step": 6584 - }, - { - "epoch": 0.791799434858414, - "grad_norm": 1.9133447663810728, - "learning_rate": 4.375867127749655e-07, - "loss": 1.0102, - "step": 6585 - }, - { - "epoch": 0.7919196777490531, - "grad_norm": 2.007536160344095, - "learning_rate": 4.3710054070194744e-07, - "loss": 0.906, - "step": 6586 - }, - { - "epoch": 0.7920399206396922, - "grad_norm": 5.418449441038716, - "learning_rate": 4.3661460571953455e-07, - "loss": 0.8918, - "step": 6587 - }, - { - "epoch": 0.7921601635303313, - "grad_norm": 1.551688056467846, - "learning_rate": 4.36128907901443e-07, - "loss": 0.9105, - "step": 6588 - }, - { - "epoch": 0.7922804064209703, - "grad_norm": 2.4704321429953366, - "learning_rate": 4.356434473213519e-07, - "loss": 0.9466, - "step": 6589 - }, - { - "epoch": 0.7924006493116095, - "grad_norm": 1.7835821310927265, - "learning_rate": 4.351582240529068e-07, - "loss": 1.0204, - "step": 6590 - }, - { - "epoch": 0.7925208922022485, - "grad_norm": 0.6966108137842787, - "learning_rate": 4.346732381697149e-07, - "loss": 0.8411, - "step": 6591 - }, - { - "epoch": 0.7926411350928876, - "grad_norm": 2.1351282312625455, - "learning_rate": 4.3418848974534825e-07, - "loss": 1.0403, - "step": 6592 - }, - { - "epoch": 0.7927613779835267, - "grad_norm": 1.5830081890277543, - "learning_rate": 4.3370397885334276e-07, - "loss": 0.9174, - "step": 6593 - }, - { - "epoch": 0.7928816208741658, - "grad_norm": 2.244422357070733, - "learning_rate": 4.3321970556719777e-07, - "loss": 0.981, - "step": 6594 - }, - { - "epoch": 0.7930018637648049, - "grad_norm": 4.798809643012334, - "learning_rate": 4.3273566996037856e-07, - "loss": 0.939, - "step": 6595 - }, - { - "epoch": 0.793122106655444, - "grad_norm": 2.278239733157659, - "learning_rate": 4.322518721063113e-07, - "loss": 1.0291, - "step": 6596 - }, - { - "epoch": 0.7932423495460831, - "grad_norm": 1.831011457823025, - "learning_rate": 4.3176831207838906e-07, - "loss": 0.9275, - "step": 6597 - }, - { - "epoch": 0.7933625924367221, - "grad_norm": 2.310477119482753, - "learning_rate": 4.3128498994996685e-07, - "loss": 0.9781, - "step": 6598 - }, - { - "epoch": 0.7934828353273613, - "grad_norm": 3.590067560734367, - "learning_rate": 4.308019057943646e-07, - "loss": 0.941, - "step": 6599 - }, - { - "epoch": 0.7936030782180004, - "grad_norm": 2.5419266260333817, - "learning_rate": 4.3031905968486535e-07, - "loss": 0.9748, - "step": 6600 - }, - { - "epoch": 0.7937233211086394, - "grad_norm": 2.458280455025981, - "learning_rate": 4.298364516947162e-07, - "loss": 0.915, - "step": 6601 - }, - { - "epoch": 0.7938435639992786, - "grad_norm": 1.9586424752661595, - "learning_rate": 4.293540818971295e-07, - "loss": 0.8904, - "step": 6602 - }, - { - "epoch": 0.7939638068899176, - "grad_norm": 2.2594854895034424, - "learning_rate": 4.2887195036527934e-07, - "loss": 1.0005, - "step": 6603 - }, - { - "epoch": 0.7940840497805567, - "grad_norm": 2.5621731247250317, - "learning_rate": 4.28390057172306e-07, - "loss": 0.9601, - "step": 6604 - }, - { - "epoch": 0.7942042926711959, - "grad_norm": 2.481759187738422, - "learning_rate": 4.279084023913111e-07, - "loss": 0.9545, - "step": 6605 - }, - { - "epoch": 0.7943245355618349, - "grad_norm": 1.6980631088418054, - "learning_rate": 4.2742698609536096e-07, - "loss": 0.9184, - "step": 6606 - }, - { - "epoch": 0.794444778452474, - "grad_norm": 19.248988533694764, - "learning_rate": 4.2694580835748706e-07, - "loss": 1.009, - "step": 6607 - }, - { - "epoch": 0.7945650213431131, - "grad_norm": 4.169409548445201, - "learning_rate": 4.264648692506836e-07, - "loss": 0.9662, - "step": 6608 - }, - { - "epoch": 0.7946852642337522, - "grad_norm": 1.685775016471192, - "learning_rate": 4.2598416884790824e-07, - "loss": 0.948, - "step": 6609 - }, - { - "epoch": 0.7948055071243912, - "grad_norm": 1.8777678088137357, - "learning_rate": 4.255037072220828e-07, - "loss": 1.0415, - "step": 6610 - }, - { - "epoch": 0.7949257500150304, - "grad_norm": 1.6324337606734463, - "learning_rate": 4.2502348444609293e-07, - "loss": 0.9418, - "step": 6611 - }, - { - "epoch": 0.7950459929056695, - "grad_norm": 1.7381040332768243, - "learning_rate": 4.2454350059278844e-07, - "loss": 0.9145, - "step": 6612 - }, - { - "epoch": 0.7951662357963085, - "grad_norm": 1.9450945138399793, - "learning_rate": 4.240637557349824e-07, - "loss": 1.0725, - "step": 6613 - }, - { - "epoch": 0.7952864786869477, - "grad_norm": 1.780448197718631, - "learning_rate": 4.235842499454516e-07, - "loss": 0.8865, - "step": 6614 - }, - { - "epoch": 0.7954067215775867, - "grad_norm": 1.6244952078032675, - "learning_rate": 4.2310498329693687e-07, - "loss": 1.0495, - "step": 6615 - }, - { - "epoch": 0.7955269644682258, - "grad_norm": 1.888827517703001, - "learning_rate": 4.2262595586214164e-07, - "loss": 1.0355, - "step": 6616 - }, - { - "epoch": 0.795647207358865, - "grad_norm": 1.6099379952853248, - "learning_rate": 4.221471677137358e-07, - "loss": 1.0013, - "step": 6617 - }, - { - "epoch": 0.795767450249504, - "grad_norm": 1.4784342694292592, - "learning_rate": 4.216686189243492e-07, - "loss": 0.9325, - "step": 6618 - }, - { - "epoch": 0.7958876931401431, - "grad_norm": 1.8059687174329897, - "learning_rate": 4.211903095665785e-07, - "loss": 0.956, - "step": 6619 - }, - { - "epoch": 0.7960079360307821, - "grad_norm": 1.806266589237049, - "learning_rate": 4.2071223971298277e-07, - "loss": 0.9827, - "step": 6620 - }, - { - "epoch": 0.7961281789214213, - "grad_norm": 2.200583215916485, - "learning_rate": 4.2023440943608433e-07, - "loss": 0.8433, - "step": 6621 - }, - { - "epoch": 0.7962484218120603, - "grad_norm": 1.514761137812317, - "learning_rate": 4.1975681880837023e-07, - "loss": 1.0198, - "step": 6622 - }, - { - "epoch": 0.7963686647026994, - "grad_norm": 2.1538150694721003, - "learning_rate": 4.192794679022895e-07, - "loss": 1.0556, - "step": 6623 - }, - { - "epoch": 0.7964889075933386, - "grad_norm": 1.7414977756850751, - "learning_rate": 4.1880235679025743e-07, - "loss": 0.9438, - "step": 6624 - }, - { - "epoch": 0.7966091504839776, - "grad_norm": 1.9521906944994973, - "learning_rate": 4.1832548554464986e-07, - "loss": 0.8649, - "step": 6625 - }, - { - "epoch": 0.7967293933746167, - "grad_norm": 0.807805142101668, - "learning_rate": 4.178488542378098e-07, - "loss": 0.8394, - "step": 6626 - }, - { - "epoch": 0.7968496362652558, - "grad_norm": 1.66705125504058, - "learning_rate": 4.173724629420401e-07, - "loss": 1.1214, - "step": 6627 - }, - { - "epoch": 0.7969698791558949, - "grad_norm": 2.818373840675584, - "learning_rate": 4.168963117296087e-07, - "loss": 0.9102, - "step": 6628 - }, - { - "epoch": 0.797090122046534, - "grad_norm": 7.209395255071514, - "learning_rate": 4.1642040067274876e-07, - "loss": 0.9811, - "step": 6629 - }, - { - "epoch": 0.7972103649371731, - "grad_norm": 1.6156184006340375, - "learning_rate": 4.1594472984365493e-07, - "loss": 0.9519, - "step": 6630 - }, - { - "epoch": 0.7973306078278122, - "grad_norm": 1.6253967534403286, - "learning_rate": 4.154692993144862e-07, - "loss": 1.0003, - "step": 6631 - }, - { - "epoch": 0.7974508507184512, - "grad_norm": 1.8821946392655453, - "learning_rate": 4.1499410915736476e-07, - "loss": 0.9442, - "step": 6632 - }, - { - "epoch": 0.7975710936090904, - "grad_norm": 0.8103940888686343, - "learning_rate": 4.145191594443762e-07, - "loss": 0.9393, - "step": 6633 - }, - { - "epoch": 0.7976913364997295, - "grad_norm": 1.811341803562623, - "learning_rate": 4.140444502475713e-07, - "loss": 0.9387, - "step": 6634 - }, - { - "epoch": 0.7978115793903685, - "grad_norm": 2.160819367484868, - "learning_rate": 4.1356998163896216e-07, - "loss": 0.9295, - "step": 6635 - }, - { - "epoch": 0.7979318222810077, - "grad_norm": 2.0141718157806467, - "learning_rate": 4.130957536905255e-07, - "loss": 0.9748, - "step": 6636 - }, - { - "epoch": 0.7980520651716467, - "grad_norm": 3.6500381781266653, - "learning_rate": 4.1262176647420134e-07, - "loss": 0.9372, - "step": 6637 - }, - { - "epoch": 0.7981723080622858, - "grad_norm": 1.730633454417542, - "learning_rate": 4.121480200618923e-07, - "loss": 1.0305, - "step": 6638 - }, - { - "epoch": 0.798292550952925, - "grad_norm": 1.6759155504168182, - "learning_rate": 4.116745145254674e-07, - "loss": 1.0255, - "step": 6639 - }, - { - "epoch": 0.798412793843564, - "grad_norm": 0.8052838532308294, - "learning_rate": 4.1120124993675476e-07, - "loss": 0.8405, - "step": 6640 - }, - { - "epoch": 0.7985330367342031, - "grad_norm": 2.116637774392931, - "learning_rate": 4.107282263675498e-07, - "loss": 0.8553, - "step": 6641 - }, - { - "epoch": 0.7986532796248422, - "grad_norm": 0.7407441007242116, - "learning_rate": 4.1025544388960907e-07, - "loss": 0.7732, - "step": 6642 - }, - { - "epoch": 0.7987735225154813, - "grad_norm": 1.9554188190445378, - "learning_rate": 4.097829025746538e-07, - "loss": 0.9445, - "step": 6643 - }, - { - "epoch": 0.7988937654061203, - "grad_norm": 0.9434592626537568, - "learning_rate": 4.0931060249436757e-07, - "loss": 0.8553, - "step": 6644 - }, - { - "epoch": 0.7990140082967595, - "grad_norm": 2.1521660650057433, - "learning_rate": 4.088385437203978e-07, - "loss": 0.9215, - "step": 6645 - }, - { - "epoch": 0.7991342511873986, - "grad_norm": 2.2617398328572635, - "learning_rate": 4.083667263243564e-07, - "loss": 0.9994, - "step": 6646 - }, - { - "epoch": 0.7992544940780376, - "grad_norm": 1.6946202987692354, - "learning_rate": 4.0789515037781653e-07, - "loss": 0.9384, - "step": 6647 - }, - { - "epoch": 0.7993747369686768, - "grad_norm": 2.131266610089233, - "learning_rate": 4.0742381595231755e-07, - "loss": 1.0603, - "step": 6648 - }, - { - "epoch": 0.7994949798593158, - "grad_norm": 2.052984772281501, - "learning_rate": 4.06952723119359e-07, - "loss": 1.0113, - "step": 6649 - }, - { - "epoch": 0.7996152227499549, - "grad_norm": 1.8206890846337995, - "learning_rate": 4.0648187195040504e-07, - "loss": 0.8943, - "step": 6650 - }, - { - "epoch": 0.799735465640594, - "grad_norm": 0.9474855462635118, - "learning_rate": 4.060112625168848e-07, - "loss": 0.9565, - "step": 6651 - }, - { - "epoch": 0.7998557085312331, - "grad_norm": 11.8841658704976, - "learning_rate": 4.055408948901886e-07, - "loss": 0.9677, - "step": 6652 - }, - { - "epoch": 0.7999759514218722, - "grad_norm": 1.8652486910445372, - "learning_rate": 4.050707691416708e-07, - "loss": 0.943, - "step": 6653 - }, - { - "epoch": 0.8000961943125112, - "grad_norm": 0.7304206465115648, - "learning_rate": 4.046008853426495e-07, - "loss": 0.8538, - "step": 6654 - }, - { - "epoch": 0.8002164372031504, - "grad_norm": 2.7815620161621166, - "learning_rate": 4.0413124356440464e-07, - "loss": 0.8599, - "step": 6655 - }, - { - "epoch": 0.8003366800937894, - "grad_norm": 1.9438445899247354, - "learning_rate": 4.0366184387818223e-07, - "loss": 1.0517, - "step": 6656 - }, - { - "epoch": 0.8004569229844285, - "grad_norm": 2.289742326523882, - "learning_rate": 4.0319268635518797e-07, - "loss": 1.0803, - "step": 6657 - }, - { - "epoch": 0.8005771658750677, - "grad_norm": 1.5198326302285625, - "learning_rate": 4.027237710665943e-07, - "loss": 0.9792, - "step": 6658 - }, - { - "epoch": 0.8006974087657067, - "grad_norm": 1.9894414188707976, - "learning_rate": 4.022550980835344e-07, - "loss": 0.9307, - "step": 6659 - }, - { - "epoch": 0.8008176516563458, - "grad_norm": 2.5357120886165685, - "learning_rate": 4.017866674771051e-07, - "loss": 1.0365, - "step": 6660 - }, - { - "epoch": 0.8009378945469849, - "grad_norm": 1.634064117098176, - "learning_rate": 4.013184793183688e-07, - "loss": 0.9795, - "step": 6661 - }, - { - "epoch": 0.801058137437624, - "grad_norm": 1.7242279840661987, - "learning_rate": 4.008505336783472e-07, - "loss": 0.9523, - "step": 6662 - }, - { - "epoch": 0.801178380328263, - "grad_norm": 1.9251641141614597, - "learning_rate": 4.003828306280284e-07, - "loss": 1.0507, - "step": 6663 - }, - { - "epoch": 0.8012986232189022, - "grad_norm": 1.927284443301942, - "learning_rate": 3.999153702383626e-07, - "loss": 1.0074, - "step": 6664 - }, - { - "epoch": 0.8014188661095413, - "grad_norm": 2.0553036770276716, - "learning_rate": 3.9944815258026263e-07, - "loss": 0.9598, - "step": 6665 - }, - { - "epoch": 0.8015391090001803, - "grad_norm": 1.63393805007508, - "learning_rate": 3.989811777246057e-07, - "loss": 1.0624, - "step": 6666 - }, - { - "epoch": 0.8016593518908195, - "grad_norm": 0.8822118690792337, - "learning_rate": 3.985144457422305e-07, - "loss": 0.932, - "step": 6667 - }, - { - "epoch": 0.8017795947814585, - "grad_norm": 1.8609924974490153, - "learning_rate": 3.9804795670394096e-07, - "loss": 0.9868, - "step": 6668 - }, - { - "epoch": 0.8018998376720976, - "grad_norm": 2.055054722449493, - "learning_rate": 3.975817106805022e-07, - "loss": 0.9413, - "step": 6669 - }, - { - "epoch": 0.8020200805627368, - "grad_norm": 2.4634389607242375, - "learning_rate": 3.97115707742645e-07, - "loss": 0.8694, - "step": 6670 - }, - { - "epoch": 0.8021403234533758, - "grad_norm": 4.080053348566467, - "learning_rate": 3.966499479610599e-07, - "loss": 0.8887, - "step": 6671 - }, - { - "epoch": 0.8022605663440149, - "grad_norm": 1.9602860664325898, - "learning_rate": 3.9618443140640225e-07, - "loss": 0.8871, - "step": 6672 - }, - { - "epoch": 0.802380809234654, - "grad_norm": 0.7401756323703944, - "learning_rate": 3.957191581492918e-07, - "loss": 0.7795, - "step": 6673 - }, - { - "epoch": 0.8025010521252931, - "grad_norm": 2.6015619501336693, - "learning_rate": 3.952541282603097e-07, - "loss": 0.9348, - "step": 6674 - }, - { - "epoch": 0.8026212950159322, - "grad_norm": 1.6626988227526671, - "learning_rate": 3.9478934181000013e-07, - "loss": 1.0597, - "step": 6675 - }, - { - "epoch": 0.8027415379065713, - "grad_norm": 2.180852107179924, - "learning_rate": 3.943247988688714e-07, - "loss": 1.0753, - "step": 6676 - }, - { - "epoch": 0.8028617807972104, - "grad_norm": 2.0889975287321016, - "learning_rate": 3.938604995073933e-07, - "loss": 0.945, - "step": 6677 - }, - { - "epoch": 0.8029820236878494, - "grad_norm": 1.8115606833687874, - "learning_rate": 3.9339644379600157e-07, - "loss": 0.8937, - "step": 6678 - }, - { - "epoch": 0.8031022665784886, - "grad_norm": 2.282917615074893, - "learning_rate": 3.929326318050907e-07, - "loss": 0.9438, - "step": 6679 - }, - { - "epoch": 0.8032225094691277, - "grad_norm": 2.5094808097754697, - "learning_rate": 3.924690636050225e-07, - "loss": 1.0118, - "step": 6680 - }, - { - "epoch": 0.8033427523597667, - "grad_norm": 2.113808388109444, - "learning_rate": 3.9200573926611915e-07, - "loss": 0.9562, - "step": 6681 - }, - { - "epoch": 0.8034629952504058, - "grad_norm": 1.8726586027930308, - "learning_rate": 3.9154265885866613e-07, - "loss": 0.9575, - "step": 6682 - }, - { - "epoch": 0.8035832381410449, - "grad_norm": 2.5010518722188264, - "learning_rate": 3.9107982245291394e-07, - "loss": 0.981, - "step": 6683 - }, - { - "epoch": 0.803703481031684, - "grad_norm": 2.2715009588684167, - "learning_rate": 3.9061723011907245e-07, - "loss": 0.9928, - "step": 6684 - }, - { - "epoch": 0.803823723922323, - "grad_norm": 1.6595760273978113, - "learning_rate": 3.901548819273179e-07, - "loss": 1.0146, - "step": 6685 - }, - { - "epoch": 0.8039439668129622, - "grad_norm": 2.0256958366420936, - "learning_rate": 3.896927779477881e-07, - "loss": 0.9216, - "step": 6686 - }, - { - "epoch": 0.8040642097036013, - "grad_norm": 1.9073207537024999, - "learning_rate": 3.892309182505833e-07, - "loss": 0.9083, - "step": 6687 - }, - { - "epoch": 0.8041844525942403, - "grad_norm": 2.5158306535334014, - "learning_rate": 3.887693029057675e-07, - "loss": 1.0933, - "step": 6688 - }, - { - "epoch": 0.8043046954848795, - "grad_norm": 1.6970389580242051, - "learning_rate": 3.8830793198336684e-07, - "loss": 1.042, - "step": 6689 - }, - { - "epoch": 0.8044249383755185, - "grad_norm": 1.5804423323878092, - "learning_rate": 3.878468055533721e-07, - "loss": 0.9345, - "step": 6690 - }, - { - "epoch": 0.8045451812661576, - "grad_norm": 2.570470092023643, - "learning_rate": 3.8738592368573464e-07, - "loss": 1.0811, - "step": 6691 - }, - { - "epoch": 0.8046654241567968, - "grad_norm": 1.9630908465397592, - "learning_rate": 3.8692528645037137e-07, - "loss": 1.1068, - "step": 6692 - }, - { - "epoch": 0.8047856670474358, - "grad_norm": 2.6131644686163678, - "learning_rate": 3.8646489391715907e-07, - "loss": 1.009, - "step": 6693 - }, - { - "epoch": 0.8049059099380749, - "grad_norm": 7.678230465640901, - "learning_rate": 3.8600474615593903e-07, - "loss": 1.1141, - "step": 6694 - }, - { - "epoch": 0.805026152828714, - "grad_norm": 0.8730655471887308, - "learning_rate": 3.8554484323651605e-07, - "loss": 0.8889, - "step": 6695 - }, - { - "epoch": 0.8051463957193531, - "grad_norm": 8.980618270286381, - "learning_rate": 3.85085185228657e-07, - "loss": 1.0168, - "step": 6696 - }, - { - "epoch": 0.8052666386099921, - "grad_norm": 9.924028202015275, - "learning_rate": 3.8462577220209114e-07, - "loss": 0.9694, - "step": 6697 - }, - { - "epoch": 0.8053868815006313, - "grad_norm": 0.9585726722445672, - "learning_rate": 3.8416660422651127e-07, - "loss": 0.84, - "step": 6698 - }, - { - "epoch": 0.8055071243912704, - "grad_norm": 1.6370083864448128, - "learning_rate": 3.837076813715723e-07, - "loss": 0.9158, - "step": 6699 - }, - { - "epoch": 0.8056273672819094, - "grad_norm": 3.011762764654866, - "learning_rate": 3.832490037068941e-07, - "loss": 0.9818, - "step": 6700 - }, - { - "epoch": 0.8057476101725486, - "grad_norm": 2.1526413727344726, - "learning_rate": 3.827905713020554e-07, - "loss": 0.9888, - "step": 6701 - }, - { - "epoch": 0.8058678530631876, - "grad_norm": 2.0747770718238896, - "learning_rate": 3.823323842266017e-07, - "loss": 0.907, - "step": 6702 - }, - { - "epoch": 0.8059880959538267, - "grad_norm": 2.318526997086353, - "learning_rate": 3.818744425500393e-07, - "loss": 0.9666, - "step": 6703 - }, - { - "epoch": 0.8061083388444659, - "grad_norm": 2.078461239796329, - "learning_rate": 3.8141674634183675e-07, - "loss": 1.0405, - "step": 6704 - }, - { - "epoch": 0.8062285817351049, - "grad_norm": 1.7682289569238492, - "learning_rate": 3.809592956714278e-07, - "loss": 0.885, - "step": 6705 - }, - { - "epoch": 0.806348824625744, - "grad_norm": 2.0993705578206505, - "learning_rate": 3.805020906082057e-07, - "loss": 0.9702, - "step": 6706 - }, - { - "epoch": 0.8064690675163831, - "grad_norm": 2.04526648764731, - "learning_rate": 3.8004513122152917e-07, - "loss": 1.0362, - "step": 6707 - }, - { - "epoch": 0.8065893104070222, - "grad_norm": 1.8799468586898413, - "learning_rate": 3.79588417580718e-07, - "loss": 0.8996, - "step": 6708 - }, - { - "epoch": 0.8067095532976613, - "grad_norm": 1.807186361830655, - "learning_rate": 3.791319497550558e-07, - "loss": 0.9901, - "step": 6709 - }, - { - "epoch": 0.8068297961883004, - "grad_norm": 1.9506766270213516, - "learning_rate": 3.78675727813788e-07, - "loss": 0.9435, - "step": 6710 - }, - { - "epoch": 0.8069500390789395, - "grad_norm": 1.7143433694567343, - "learning_rate": 3.782197518261225e-07, - "loss": 0.9661, - "step": 6711 - }, - { - "epoch": 0.8070702819695785, - "grad_norm": 2.115426770703891, - "learning_rate": 3.777640218612319e-07, - "loss": 1.1922, - "step": 6712 - }, - { - "epoch": 0.8071905248602176, - "grad_norm": 2.303439355150034, - "learning_rate": 3.773085379882488e-07, - "loss": 0.9504, - "step": 6713 - }, - { - "epoch": 0.8073107677508568, - "grad_norm": 2.1915641088628037, - "learning_rate": 3.768533002762715e-07, - "loss": 0.9938, - "step": 6714 - }, - { - "epoch": 0.8074310106414958, - "grad_norm": 2.248493750042889, - "learning_rate": 3.763983087943572e-07, - "loss": 0.9884, - "step": 6715 - }, - { - "epoch": 0.8075512535321349, - "grad_norm": 1.69578313667307, - "learning_rate": 3.759435636115282e-07, - "loss": 1.0283, - "step": 6716 - }, - { - "epoch": 0.807671496422774, - "grad_norm": 2.615045838401017, - "learning_rate": 3.7548906479676967e-07, - "loss": 0.9688, - "step": 6717 - }, - { - "epoch": 0.8077917393134131, - "grad_norm": 1.6629268012346932, - "learning_rate": 3.7503481241902855e-07, - "loss": 0.9373, - "step": 6718 - }, - { - "epoch": 0.8079119822040521, - "grad_norm": 1.8412773232850874, - "learning_rate": 3.745808065472145e-07, - "loss": 1.0289, - "step": 6719 - }, - { - "epoch": 0.8080322250946913, - "grad_norm": 1.388605086682846, - "learning_rate": 3.741270472501994e-07, - "loss": 0.9902, - "step": 6720 - }, - { - "epoch": 0.8081524679853304, - "grad_norm": 1.8815964693412612, - "learning_rate": 3.736735345968183e-07, - "loss": 0.9554, - "step": 6721 - }, - { - "epoch": 0.8082727108759694, - "grad_norm": 1.5973994787883756, - "learning_rate": 3.7322026865586986e-07, - "loss": 1.0248, - "step": 6722 - }, - { - "epoch": 0.8083929537666086, - "grad_norm": 2.064338096221812, - "learning_rate": 3.7276724949611206e-07, - "loss": 0.9585, - "step": 6723 - }, - { - "epoch": 0.8085131966572476, - "grad_norm": 2.114627205028716, - "learning_rate": 3.723144771862694e-07, - "loss": 0.9716, - "step": 6724 - }, - { - "epoch": 0.8086334395478867, - "grad_norm": 1.8870227008135383, - "learning_rate": 3.718619517950263e-07, - "loss": 0.9962, - "step": 6725 - }, - { - "epoch": 0.8087536824385259, - "grad_norm": 1.7743894064097723, - "learning_rate": 3.714096733910301e-07, - "loss": 0.9942, - "step": 6726 - }, - { - "epoch": 0.8088739253291649, - "grad_norm": 2.9989386906476883, - "learning_rate": 3.709576420428926e-07, - "loss": 0.9279, - "step": 6727 - }, - { - "epoch": 0.808994168219804, - "grad_norm": 2.203461316999392, - "learning_rate": 3.7050585781918463e-07, - "loss": 0.963, - "step": 6728 - }, - { - "epoch": 0.8091144111104431, - "grad_norm": 2.316806289069718, - "learning_rate": 3.700543207884428e-07, - "loss": 0.9179, - "step": 6729 - }, - { - "epoch": 0.8092346540010822, - "grad_norm": 2.057354838371164, - "learning_rate": 3.6960303101916466e-07, - "loss": 0.9379, - "step": 6730 - }, - { - "epoch": 0.8093548968917212, - "grad_norm": 0.7880251268940953, - "learning_rate": 3.6915198857981047e-07, - "loss": 0.8173, - "step": 6731 - }, - { - "epoch": 0.8094751397823604, - "grad_norm": 1.6125839980374101, - "learning_rate": 3.687011935388027e-07, - "loss": 0.9146, - "step": 6732 - }, - { - "epoch": 0.8095953826729995, - "grad_norm": 3.359058595882741, - "learning_rate": 3.6825064596452646e-07, - "loss": 0.9552, - "step": 6733 - }, - { - "epoch": 0.8097156255636385, - "grad_norm": 1.6559948994654736, - "learning_rate": 3.678003459253305e-07, - "loss": 0.9378, - "step": 6734 - }, - { - "epoch": 0.8098358684542777, - "grad_norm": 2.109246822277468, - "learning_rate": 3.673502934895236e-07, - "loss": 0.9706, - "step": 6735 - }, - { - "epoch": 0.8099561113449167, - "grad_norm": 0.6792922131124511, - "learning_rate": 3.669004887253802e-07, - "loss": 0.8233, - "step": 6736 - }, - { - "epoch": 0.8100763542355558, - "grad_norm": 1.506950962425625, - "learning_rate": 3.664509317011335e-07, - "loss": 1.0144, - "step": 6737 - }, - { - "epoch": 0.810196597126195, - "grad_norm": 2.0699381939787695, - "learning_rate": 3.6600162248498134e-07, - "loss": 0.9603, - "step": 6738 - }, - { - "epoch": 0.810316840016834, - "grad_norm": 1.8620426691781824, - "learning_rate": 3.6555256114508426e-07, - "loss": 0.9939, - "step": 6739 - }, - { - "epoch": 0.8104370829074731, - "grad_norm": 1.713350588764819, - "learning_rate": 3.651037477495642e-07, - "loss": 0.9581, - "step": 6740 - }, - { - "epoch": 0.8105573257981122, - "grad_norm": 3.3811790980663363, - "learning_rate": 3.6465518236650584e-07, - "loss": 0.9066, - "step": 6741 - }, - { - "epoch": 0.8106775686887513, - "grad_norm": 1.7380134290477076, - "learning_rate": 3.642068650639558e-07, - "loss": 1.0139, - "step": 6742 - }, - { - "epoch": 0.8107978115793903, - "grad_norm": 1.9271130396297427, - "learning_rate": 3.6375879590992334e-07, - "loss": 0.8766, - "step": 6743 - }, - { - "epoch": 0.8109180544700295, - "grad_norm": 1.7549336913751634, - "learning_rate": 3.6331097497238173e-07, - "loss": 1.0363, - "step": 6744 - }, - { - "epoch": 0.8110382973606686, - "grad_norm": 2.4737957924435037, - "learning_rate": 3.628634023192627e-07, - "loss": 1.0248, - "step": 6745 - }, - { - "epoch": 0.8111585402513076, - "grad_norm": 2.8940544610899916, - "learning_rate": 3.624160780184644e-07, - "loss": 0.985, - "step": 6746 - }, - { - "epoch": 0.8112787831419467, - "grad_norm": 2.2551940729572832, - "learning_rate": 3.6196900213784496e-07, - "loss": 0.977, - "step": 6747 - }, - { - "epoch": 0.8113990260325858, - "grad_norm": 1.9013041763562355, - "learning_rate": 3.6152217474522527e-07, - "loss": 1.0941, - "step": 6748 - }, - { - "epoch": 0.8115192689232249, - "grad_norm": 1.4907409686835837, - "learning_rate": 3.6107559590838975e-07, - "loss": 0.9544, - "step": 6749 - }, - { - "epoch": 0.811639511813864, - "grad_norm": 2.7614695910081988, - "learning_rate": 3.606292656950822e-07, - "loss": 0.8918, - "step": 6750 - }, - { - "epoch": 0.8117597547045031, - "grad_norm": 1.977754858981912, - "learning_rate": 3.601831841730121e-07, - "loss": 1.0861, - "step": 6751 - }, - { - "epoch": 0.8118799975951422, - "grad_norm": 1.6642369998185178, - "learning_rate": 3.5973735140984916e-07, - "loss": 0.9652, - "step": 6752 - }, - { - "epoch": 0.8120002404857812, - "grad_norm": 2.224817710009842, - "learning_rate": 3.5929176747322607e-07, - "loss": 1.018, - "step": 6753 - }, - { - "epoch": 0.8121204833764204, - "grad_norm": 0.8180814284691406, - "learning_rate": 3.588464324307372e-07, - "loss": 0.8056, - "step": 6754 - }, - { - "epoch": 0.8122407262670595, - "grad_norm": 2.5910841329056002, - "learning_rate": 3.584013463499391e-07, - "loss": 0.9829, - "step": 6755 - }, - { - "epoch": 0.8123609691576985, - "grad_norm": 0.7262661237125084, - "learning_rate": 3.579565092983521e-07, - "loss": 0.8847, - "step": 6756 - }, - { - "epoch": 0.8124812120483377, - "grad_norm": 2.4635586781683947, - "learning_rate": 3.575119213434565e-07, - "loss": 1.0676, - "step": 6757 - }, - { - "epoch": 0.8126014549389767, - "grad_norm": 1.6001845968570485, - "learning_rate": 3.5706758255269765e-07, - "loss": 1.0425, - "step": 6758 - }, - { - "epoch": 0.8127216978296158, - "grad_norm": 1.5532409993071619, - "learning_rate": 3.566234929934795e-07, - "loss": 0.9274, - "step": 6759 - }, - { - "epoch": 0.812841940720255, - "grad_norm": 1.8701136815608455, - "learning_rate": 3.561796527331706e-07, - "loss": 0.9519, - "step": 6760 - }, - { - "epoch": 0.812962183610894, - "grad_norm": 1.645110909337792, - "learning_rate": 3.5573606183910163e-07, - "loss": 0.9991, - "step": 6761 - }, - { - "epoch": 0.8130824265015331, - "grad_norm": 1.8830721980762513, - "learning_rate": 3.5529272037856493e-07, - "loss": 1.0064, - "step": 6762 - }, - { - "epoch": 0.8132026693921722, - "grad_norm": 0.7704414368095089, - "learning_rate": 3.548496284188149e-07, - "loss": 0.8067, - "step": 6763 - }, - { - "epoch": 0.8133229122828113, - "grad_norm": 1.6261107243355455, - "learning_rate": 3.544067860270681e-07, - "loss": 1.0189, - "step": 6764 - }, - { - "epoch": 0.8134431551734503, - "grad_norm": 1.742224791260261, - "learning_rate": 3.539641932705029e-07, - "loss": 0.939, - "step": 6765 - }, - { - "epoch": 0.8135633980640895, - "grad_norm": 2.0184345624860693, - "learning_rate": 3.53521850216262e-07, - "loss": 0.9832, - "step": 6766 - }, - { - "epoch": 0.8136836409547286, - "grad_norm": 1.8837755201771524, - "learning_rate": 3.530797569314461e-07, - "loss": 0.9953, - "step": 6767 - }, - { - "epoch": 0.8138038838453676, - "grad_norm": 5.774497878138749, - "learning_rate": 3.5263791348312235e-07, - "loss": 1.0016, - "step": 6768 - }, - { - "epoch": 0.8139241267360068, - "grad_norm": 2.0357770503796675, - "learning_rate": 3.521963199383171e-07, - "loss": 0.9358, - "step": 6769 - }, - { - "epoch": 0.8140443696266458, - "grad_norm": 1.9851029630232562, - "learning_rate": 3.517549763640197e-07, - "loss": 1.0029, - "step": 6770 - }, - { - "epoch": 0.8141646125172849, - "grad_norm": 2.1632530268422347, - "learning_rate": 3.513138828271829e-07, - "loss": 0.9456, - "step": 6771 - }, - { - "epoch": 0.8142848554079241, - "grad_norm": 1.85948001340632, - "learning_rate": 3.508730393947179e-07, - "loss": 0.9312, - "step": 6772 - }, - { - "epoch": 0.8144050982985631, - "grad_norm": 4.9370076380673105, - "learning_rate": 3.504324461335024e-07, - "loss": 0.9449, - "step": 6773 - }, - { - "epoch": 0.8145253411892022, - "grad_norm": 1.7485198164338642, - "learning_rate": 3.499921031103732e-07, - "loss": 1.1052, - "step": 6774 - }, - { - "epoch": 0.8146455840798413, - "grad_norm": 2.190029534615411, - "learning_rate": 3.4955201039212987e-07, - "loss": 1.0111, - "step": 6775 - }, - { - "epoch": 0.8147658269704804, - "grad_norm": 2.0585082758435314, - "learning_rate": 3.4911216804553465e-07, - "loss": 0.8778, - "step": 6776 - }, - { - "epoch": 0.8148860698611194, - "grad_norm": 1.941903555285325, - "learning_rate": 3.4867257613731017e-07, - "loss": 0.9338, - "step": 6777 - }, - { - "epoch": 0.8150063127517585, - "grad_norm": 1.81145104934547, - "learning_rate": 3.4823323473414343e-07, - "loss": 1.0775, - "step": 6778 - }, - { - "epoch": 0.8151265556423977, - "grad_norm": 2.3355394107120646, - "learning_rate": 3.477941439026812e-07, - "loss": 0.9919, - "step": 6779 - }, - { - "epoch": 0.8152467985330367, - "grad_norm": 1.6982148811553603, - "learning_rate": 3.473553037095349e-07, - "loss": 0.9576, - "step": 6780 - }, - { - "epoch": 0.8153670414236758, - "grad_norm": 1.938061109939515, - "learning_rate": 3.469167142212743e-07, - "loss": 1.0568, - "step": 6781 - }, - { - "epoch": 0.8154872843143149, - "grad_norm": 2.4467961012395905, - "learning_rate": 3.4647837550443337e-07, - "loss": 0.858, - "step": 6782 - }, - { - "epoch": 0.815607527204954, - "grad_norm": 1.649792155266418, - "learning_rate": 3.460402876255086e-07, - "loss": 0.969, - "step": 6783 - }, - { - "epoch": 0.815727770095593, - "grad_norm": 4.219592427101756, - "learning_rate": 3.456024506509574e-07, - "loss": 0.9497, - "step": 6784 - }, - { - "epoch": 0.8158480129862322, - "grad_norm": 2.460722608115217, - "learning_rate": 3.4516486464719873e-07, - "loss": 0.9722, - "step": 6785 - }, - { - "epoch": 0.8159682558768713, - "grad_norm": 4.613076382318685, - "learning_rate": 3.4472752968061445e-07, - "loss": 0.8567, - "step": 6786 - }, - { - "epoch": 0.8160884987675103, - "grad_norm": 2.1364462502626247, - "learning_rate": 3.442904458175475e-07, - "loss": 0.9646, - "step": 6787 - }, - { - "epoch": 0.8162087416581495, - "grad_norm": 1.8016633295856992, - "learning_rate": 3.438536131243044e-07, - "loss": 0.9832, - "step": 6788 - }, - { - "epoch": 0.8163289845487885, - "grad_norm": 2.1980570406328703, - "learning_rate": 3.434170316671503e-07, - "loss": 0.8443, - "step": 6789 - }, - { - "epoch": 0.8164492274394276, - "grad_norm": 3.512430053231696, - "learning_rate": 3.4298070151231583e-07, - "loss": 1.1271, - "step": 6790 - }, - { - "epoch": 0.8165694703300668, - "grad_norm": 1.7969942136277584, - "learning_rate": 3.425446227259916e-07, - "loss": 0.8248, - "step": 6791 - }, - { - "epoch": 0.8166897132207058, - "grad_norm": 1.8476459269003371, - "learning_rate": 3.421087953743296e-07, - "loss": 1.0563, - "step": 6792 - }, - { - "epoch": 0.8168099561113449, - "grad_norm": 2.4704534713603685, - "learning_rate": 3.416732195234464e-07, - "loss": 1.0277, - "step": 6793 - }, - { - "epoch": 0.816930199001984, - "grad_norm": 1.4091593000030316, - "learning_rate": 3.4123789523941613e-07, - "loss": 1.0191, - "step": 6794 - }, - { - "epoch": 0.8170504418926231, - "grad_norm": 1.5266427775489293, - "learning_rate": 3.4080282258827884e-07, - "loss": 0.8678, - "step": 6795 - }, - { - "epoch": 0.8171706847832622, - "grad_norm": 1.8810095801345248, - "learning_rate": 3.403680016360342e-07, - "loss": 0.9445, - "step": 6796 - }, - { - "epoch": 0.8172909276739013, - "grad_norm": 1.433446725899226, - "learning_rate": 3.3993343244864403e-07, - "loss": 0.9039, - "step": 6797 - }, - { - "epoch": 0.8174111705645404, - "grad_norm": 1.5183844860405846, - "learning_rate": 3.394991150920323e-07, - "loss": 0.9585, - "step": 6798 - }, - { - "epoch": 0.8175314134551794, - "grad_norm": 2.083161982801178, - "learning_rate": 3.3906504963208396e-07, - "loss": 0.9782, - "step": 6799 - }, - { - "epoch": 0.8176516563458186, - "grad_norm": 1.87958297121753, - "learning_rate": 3.3863123613464774e-07, - "loss": 0.8816, - "step": 6800 - }, - { - "epoch": 0.8177718992364577, - "grad_norm": 1.932637172632266, - "learning_rate": 3.381976746655317e-07, - "loss": 0.9644, - "step": 6801 - }, - { - "epoch": 0.8178921421270967, - "grad_norm": 2.2484742395555313, - "learning_rate": 3.3776436529050756e-07, - "loss": 0.9061, - "step": 6802 - }, - { - "epoch": 0.8180123850177359, - "grad_norm": 1.7894349356049322, - "learning_rate": 3.373313080753073e-07, - "loss": 0.9473, - "step": 6803 - }, - { - "epoch": 0.8181326279083749, - "grad_norm": 5.137323217761367, - "learning_rate": 3.3689850308562527e-07, - "loss": 1.0067, - "step": 6804 - }, - { - "epoch": 0.818252870799014, - "grad_norm": 1.7975164844038263, - "learning_rate": 3.364659503871183e-07, - "loss": 1.009, - "step": 6805 - }, - { - "epoch": 0.8183731136896532, - "grad_norm": 1.7186221682087996, - "learning_rate": 3.3603365004540417e-07, - "loss": 1.0663, - "step": 6806 - }, - { - "epoch": 0.8184933565802922, - "grad_norm": 2.401313871122995, - "learning_rate": 3.356016021260624e-07, - "loss": 0.9964, - "step": 6807 - }, - { - "epoch": 0.8186135994709313, - "grad_norm": 2.2323334443607488, - "learning_rate": 3.35169806694634e-07, - "loss": 0.8763, - "step": 6808 - }, - { - "epoch": 0.8187338423615703, - "grad_norm": 0.74457043027309, - "learning_rate": 3.3473826381662186e-07, - "loss": 0.8543, - "step": 6809 - }, - { - "epoch": 0.8188540852522095, - "grad_norm": 1.9942766791399396, - "learning_rate": 3.3430697355749216e-07, - "loss": 1.0449, - "step": 6810 - }, - { - "epoch": 0.8189743281428485, - "grad_norm": 2.090079092874582, - "learning_rate": 3.3387593598266907e-07, - "loss": 0.9715, - "step": 6811 - }, - { - "epoch": 0.8190945710334876, - "grad_norm": 1.9072638145203487, - "learning_rate": 3.3344515115754225e-07, - "loss": 1.015, - "step": 6812 - }, - { - "epoch": 0.8192148139241268, - "grad_norm": 3.0121838795536977, - "learning_rate": 3.33014619147461e-07, - "loss": 1.0231, - "step": 6813 - }, - { - "epoch": 0.8193350568147658, - "grad_norm": 2.466990749445389, - "learning_rate": 3.325843400177362e-07, - "loss": 0.9415, - "step": 6814 - }, - { - "epoch": 0.8194552997054049, - "grad_norm": 1.8863465631136433, - "learning_rate": 3.32154313833642e-07, - "loss": 0.9621, - "step": 6815 - }, - { - "epoch": 0.819575542596044, - "grad_norm": 2.6328930035969735, - "learning_rate": 3.3172454066041164e-07, - "loss": 0.8291, - "step": 6816 - }, - { - "epoch": 0.8196957854866831, - "grad_norm": 1.8006618236734135, - "learning_rate": 3.3129502056324234e-07, - "loss": 0.9912, - "step": 6817 - }, - { - "epoch": 0.8198160283773221, - "grad_norm": 0.7942536190648126, - "learning_rate": 3.3086575360729165e-07, - "loss": 0.8572, - "step": 6818 - }, - { - "epoch": 0.8199362712679613, - "grad_norm": 1.7087908651604247, - "learning_rate": 3.3043673985767906e-07, - "loss": 0.9438, - "step": 6819 - }, - { - "epoch": 0.8200565141586004, - "grad_norm": 1.8828341534267703, - "learning_rate": 3.3000797937948564e-07, - "loss": 1.0062, - "step": 6820 - }, - { - "epoch": 0.8201767570492394, - "grad_norm": 0.9368383615975614, - "learning_rate": 3.295794722377534e-07, - "loss": 0.9083, - "step": 6821 - }, - { - "epoch": 0.8202969999398786, - "grad_norm": 1.5877102119459048, - "learning_rate": 3.291512184974876e-07, - "loss": 1.0238, - "step": 6822 - }, - { - "epoch": 0.8204172428305176, - "grad_norm": 4.097513812765099, - "learning_rate": 3.2872321822365346e-07, - "loss": 0.9015, - "step": 6823 - }, - { - "epoch": 0.8205374857211567, - "grad_norm": 1.9255946689929713, - "learning_rate": 3.282954714811783e-07, - "loss": 0.9666, - "step": 6824 - }, - { - "epoch": 0.8206577286117959, - "grad_norm": 2.03206489062888, - "learning_rate": 3.2786797833495093e-07, - "loss": 0.9346, - "step": 6825 - }, - { - "epoch": 0.8207779715024349, - "grad_norm": 1.6857840679448162, - "learning_rate": 3.274407388498213e-07, - "loss": 0.9545, - "step": 6826 - }, - { - "epoch": 0.820898214393074, - "grad_norm": 1.8836812274734385, - "learning_rate": 3.270137530906021e-07, - "loss": 0.9741, - "step": 6827 - }, - { - "epoch": 0.8210184572837131, - "grad_norm": 2.2046945811049135, - "learning_rate": 3.265870211220665e-07, - "loss": 1.0579, - "step": 6828 - }, - { - "epoch": 0.8211387001743522, - "grad_norm": 1.9850800230485657, - "learning_rate": 3.2616054300894934e-07, - "loss": 1.0442, - "step": 6829 - }, - { - "epoch": 0.8212589430649913, - "grad_norm": 2.5757406595426, - "learning_rate": 3.2573431881594693e-07, - "loss": 1.0741, - "step": 6830 - }, - { - "epoch": 0.8213791859556304, - "grad_norm": 2.340342575167282, - "learning_rate": 3.2530834860771663e-07, - "loss": 0.8887, - "step": 6831 - }, - { - "epoch": 0.8214994288462695, - "grad_norm": 1.8325709867584363, - "learning_rate": 3.248826324488794e-07, - "loss": 0.9666, - "step": 6832 - }, - { - "epoch": 0.8216196717369085, - "grad_norm": 1.7639172152082858, - "learning_rate": 3.244571704040138e-07, - "loss": 1.1026, - "step": 6833 - }, - { - "epoch": 0.8217399146275477, - "grad_norm": 2.0587712886337295, - "learning_rate": 3.2403196253766374e-07, - "loss": 0.9641, - "step": 6834 - }, - { - "epoch": 0.8218601575181868, - "grad_norm": 2.197469825580861, - "learning_rate": 3.2360700891433254e-07, - "loss": 1.0159, - "step": 6835 - }, - { - "epoch": 0.8219804004088258, - "grad_norm": 0.8162146530559723, - "learning_rate": 3.231823095984847e-07, - "loss": 0.8257, - "step": 6836 - }, - { - "epoch": 0.822100643299465, - "grad_norm": 2.7080307913794712, - "learning_rate": 3.2275786465454814e-07, - "loss": 0.9807, - "step": 6837 - }, - { - "epoch": 0.822220886190104, - "grad_norm": 1.8676370374862143, - "learning_rate": 3.2233367414690917e-07, - "loss": 0.9946, - "step": 6838 - }, - { - "epoch": 0.8223411290807431, - "grad_norm": 2.290030108924394, - "learning_rate": 3.219097381399183e-07, - "loss": 1.0666, - "step": 6839 - }, - { - "epoch": 0.8224613719713821, - "grad_norm": 2.2273893193106256, - "learning_rate": 3.2148605669788584e-07, - "loss": 1.032, - "step": 6840 - }, - { - "epoch": 0.8225816148620213, - "grad_norm": 2.9277281859716107, - "learning_rate": 3.2106262988508405e-07, - "loss": 1.0031, - "step": 6841 - }, - { - "epoch": 0.8227018577526604, - "grad_norm": 1.7468675098019764, - "learning_rate": 3.206394577657465e-07, - "loss": 0.9693, - "step": 6842 - }, - { - "epoch": 0.8228221006432994, - "grad_norm": 33.92927105863479, - "learning_rate": 3.202165404040675e-07, - "loss": 0.9434, - "step": 6843 - }, - { - "epoch": 0.8229423435339386, - "grad_norm": 2.3307123336326208, - "learning_rate": 3.1979387786420396e-07, - "loss": 0.9735, - "step": 6844 - }, - { - "epoch": 0.8230625864245776, - "grad_norm": 1.8406698069275667, - "learning_rate": 3.1937147021027346e-07, - "loss": 1.0563, - "step": 6845 - }, - { - "epoch": 0.8231828293152167, - "grad_norm": 6.917005354467385, - "learning_rate": 3.189493175063547e-07, - "loss": 0.9988, - "step": 6846 - }, - { - "epoch": 0.8233030722058559, - "grad_norm": 1.9629134599107096, - "learning_rate": 3.1852741981648776e-07, - "loss": 0.9027, - "step": 6847 - }, - { - "epoch": 0.8234233150964949, - "grad_norm": 3.109912614974627, - "learning_rate": 3.1810577720467404e-07, - "loss": 0.9322, - "step": 6848 - }, - { - "epoch": 0.823543557987134, - "grad_norm": 1.4852247516339778, - "learning_rate": 3.176843897348769e-07, - "loss": 0.7972, - "step": 6849 - }, - { - "epoch": 0.8236638008777731, - "grad_norm": 3.131052792976705, - "learning_rate": 3.1726325747102034e-07, - "loss": 0.9891, - "step": 6850 - }, - { - "epoch": 0.8237840437684122, - "grad_norm": 1.4872306259260475, - "learning_rate": 3.1684238047698974e-07, - "loss": 0.8703, - "step": 6851 - }, - { - "epoch": 0.8239042866590512, - "grad_norm": 8.151935239974511, - "learning_rate": 3.1642175881663155e-07, - "loss": 0.7534, - "step": 6852 - }, - { - "epoch": 0.8240245295496904, - "grad_norm": 2.647114381020135, - "learning_rate": 3.160013925537537e-07, - "loss": 1.0778, - "step": 6853 - }, - { - "epoch": 0.8241447724403295, - "grad_norm": 2.502560067689689, - "learning_rate": 3.155812817521266e-07, - "loss": 0.9822, - "step": 6854 - }, - { - "epoch": 0.8242650153309685, - "grad_norm": 1.8652168632604003, - "learning_rate": 3.151614264754787e-07, - "loss": 1.0042, - "step": 6855 - }, - { - "epoch": 0.8243852582216077, - "grad_norm": 2.5369576988076785, - "learning_rate": 3.147418267875035e-07, - "loss": 1.021, - "step": 6856 - }, - { - "epoch": 0.8245055011122467, - "grad_norm": 2.2468852630508174, - "learning_rate": 3.1432248275185315e-07, - "loss": 0.8801, - "step": 6857 - }, - { - "epoch": 0.8246257440028858, - "grad_norm": 2.676751844809336, - "learning_rate": 3.139033944321412e-07, - "loss": 1.0052, - "step": 6858 - }, - { - "epoch": 0.824745986893525, - "grad_norm": 1.5825785879234355, - "learning_rate": 3.1348456189194507e-07, - "loss": 1.0197, - "step": 6859 - }, - { - "epoch": 0.824866229784164, - "grad_norm": 1.7028472393914627, - "learning_rate": 3.1306598519479876e-07, - "loss": 1.059, - "step": 6860 - }, - { - "epoch": 0.8249864726748031, - "grad_norm": 8.91991225263604, - "learning_rate": 3.1264766440420177e-07, - "loss": 1.0103, - "step": 6861 - }, - { - "epoch": 0.8251067155654422, - "grad_norm": 2.2694362810917488, - "learning_rate": 3.122295995836124e-07, - "loss": 0.9089, - "step": 6862 - }, - { - "epoch": 0.8252269584560813, - "grad_norm": 2.99182556991274, - "learning_rate": 3.118117907964508e-07, - "loss": 1.0116, - "step": 6863 - }, - { - "epoch": 0.8253472013467203, - "grad_norm": 2.310634066435217, - "learning_rate": 3.1139423810609856e-07, - "loss": 1.036, - "step": 6864 - }, - { - "epoch": 0.8254674442373595, - "grad_norm": 1.7689780071480707, - "learning_rate": 3.1097694157589714e-07, - "loss": 0.9863, - "step": 6865 - }, - { - "epoch": 0.8255876871279986, - "grad_norm": 6.4615709946760065, - "learning_rate": 3.105599012691511e-07, - "loss": 0.9937, - "step": 6866 - }, - { - "epoch": 0.8257079300186376, - "grad_norm": 1.5617066467335214, - "learning_rate": 3.101431172491249e-07, - "loss": 1.0512, - "step": 6867 - }, - { - "epoch": 0.8258281729092768, - "grad_norm": 3.841117888095733, - "learning_rate": 3.097265895790444e-07, - "loss": 0.9455, - "step": 6868 - }, - { - "epoch": 0.8259484157999158, - "grad_norm": 1.9156549174475785, - "learning_rate": 3.093103183220962e-07, - "loss": 1.0594, - "step": 6869 - }, - { - "epoch": 0.8260686586905549, - "grad_norm": 0.9900605540107447, - "learning_rate": 3.0889430354142796e-07, - "loss": 0.8824, - "step": 6870 - }, - { - "epoch": 0.826188901581194, - "grad_norm": 3.304590679220339, - "learning_rate": 3.084785453001497e-07, - "loss": 0.926, - "step": 6871 - }, - { - "epoch": 0.8263091444718331, - "grad_norm": 2.0558851109625604, - "learning_rate": 3.080630436613314e-07, - "loss": 1.0457, - "step": 6872 - }, - { - "epoch": 0.8264293873624722, - "grad_norm": 2.0426721886434858, - "learning_rate": 3.076477986880039e-07, - "loss": 1.086, - "step": 6873 - }, - { - "epoch": 0.8265496302531112, - "grad_norm": 2.9773113279997188, - "learning_rate": 3.0723281044315986e-07, - "loss": 0.9188, - "step": 6874 - }, - { - "epoch": 0.8266698731437504, - "grad_norm": 1.9147308428381644, - "learning_rate": 3.068180789897521e-07, - "loss": 0.9919, - "step": 6875 - }, - { - "epoch": 0.8267901160343895, - "grad_norm": 1.4163837243807031, - "learning_rate": 3.064036043906966e-07, - "loss": 1.0435, - "step": 6876 - }, - { - "epoch": 0.8269103589250285, - "grad_norm": 2.0470399280524587, - "learning_rate": 3.059893867088668e-07, - "loss": 0.9038, - "step": 6877 - }, - { - "epoch": 0.8270306018156677, - "grad_norm": 1.7912922253526604, - "learning_rate": 3.055754260071004e-07, - "loss": 0.8965, - "step": 6878 - }, - { - "epoch": 0.8271508447063067, - "grad_norm": 2.061997092229234, - "learning_rate": 3.051617223481948e-07, - "loss": 0.967, - "step": 6879 - }, - { - "epoch": 0.8272710875969458, - "grad_norm": 2.9633513502925637, - "learning_rate": 3.047482757949078e-07, - "loss": 0.976, - "step": 6880 - }, - { - "epoch": 0.827391330487585, - "grad_norm": 2.212211107261977, - "learning_rate": 3.043350864099605e-07, - "loss": 1.0765, - "step": 6881 - }, - { - "epoch": 0.827511573378224, - "grad_norm": 2.385624661324417, - "learning_rate": 3.039221542560315e-07, - "loss": 1.0451, - "step": 6882 - }, - { - "epoch": 0.8276318162688631, - "grad_norm": 1.914415412598326, - "learning_rate": 3.0350947939576356e-07, - "loss": 0.9776, - "step": 6883 - }, - { - "epoch": 0.8277520591595022, - "grad_norm": 1.9880504661578893, - "learning_rate": 3.0309706189175876e-07, - "loss": 0.9551, - "step": 6884 - }, - { - "epoch": 0.8278723020501413, - "grad_norm": 0.8051483538266531, - "learning_rate": 3.0268490180658045e-07, - "loss": 0.8263, - "step": 6885 - }, - { - "epoch": 0.8279925449407803, - "grad_norm": 3.3454254801245247, - "learning_rate": 3.0227299920275305e-07, - "loss": 1.0134, - "step": 6886 - }, - { - "epoch": 0.8281127878314195, - "grad_norm": 3.8953687557438124, - "learning_rate": 3.018613541427613e-07, - "loss": 1.0824, - "step": 6887 - }, - { - "epoch": 0.8282330307220586, - "grad_norm": 1.5917614517482985, - "learning_rate": 3.0144996668905243e-07, - "loss": 0.9641, - "step": 6888 - }, - { - "epoch": 0.8283532736126976, - "grad_norm": 2.0465562878064594, - "learning_rate": 3.010388369040331e-07, - "loss": 1.0481, - "step": 6889 - }, - { - "epoch": 0.8284735165033368, - "grad_norm": 1.5945802657702008, - "learning_rate": 3.0062796485007156e-07, - "loss": 1.0561, - "step": 6890 - }, - { - "epoch": 0.8285937593939758, - "grad_norm": 2.292301841333511, - "learning_rate": 3.002173505894965e-07, - "loss": 0.878, - "step": 6891 - }, - { - "epoch": 0.8287140022846149, - "grad_norm": 3.6688913763253352, - "learning_rate": 2.998069941845973e-07, - "loss": 0.8488, - "step": 6892 - }, - { - "epoch": 0.8288342451752541, - "grad_norm": 0.7684004554956686, - "learning_rate": 2.993968956976258e-07, - "loss": 0.8489, - "step": 6893 - }, - { - "epoch": 0.8289544880658931, - "grad_norm": 1.9543830177037755, - "learning_rate": 2.9898705519079313e-07, - "loss": 0.9216, - "step": 6894 - }, - { - "epoch": 0.8290747309565322, - "grad_norm": 1.759510559496965, - "learning_rate": 2.985774727262715e-07, - "loss": 0.9719, - "step": 6895 - }, - { - "epoch": 0.8291949738471713, - "grad_norm": 1.7929331790495548, - "learning_rate": 2.981681483661949e-07, - "loss": 1.0416, - "step": 6896 - }, - { - "epoch": 0.8293152167378104, - "grad_norm": 1.6189022832107065, - "learning_rate": 2.9775908217265633e-07, - "loss": 0.9379, - "step": 6897 - }, - { - "epoch": 0.8294354596284494, - "grad_norm": 0.804836666762521, - "learning_rate": 2.9735027420771253e-07, - "loss": 0.7552, - "step": 6898 - }, - { - "epoch": 0.8295557025190886, - "grad_norm": 1.9764859633271799, - "learning_rate": 2.969417245333774e-07, - "loss": 0.9459, - "step": 6899 - }, - { - "epoch": 0.8296759454097277, - "grad_norm": 2.293892409557867, - "learning_rate": 2.9653343321162915e-07, - "loss": 1.0087, - "step": 6900 - }, - { - "epoch": 0.8297961883003667, - "grad_norm": 2.832475382739141, - "learning_rate": 2.9612540030440446e-07, - "loss": 0.8723, - "step": 6901 - }, - { - "epoch": 0.8299164311910058, - "grad_norm": 0.8434051586054875, - "learning_rate": 2.9571762587360206e-07, - "loss": 0.8765, - "step": 6902 - }, - { - "epoch": 0.8300366740816449, - "grad_norm": 1.7944677398962061, - "learning_rate": 2.953101099810806e-07, - "loss": 0.974, - "step": 6903 - }, - { - "epoch": 0.830156916972284, - "grad_norm": 2.129138394918944, - "learning_rate": 2.9490285268865965e-07, - "loss": 1.07, - "step": 6904 - }, - { - "epoch": 0.830277159862923, - "grad_norm": 2.0028018399135092, - "learning_rate": 2.9449585405812085e-07, - "loss": 1.0344, - "step": 6905 - }, - { - "epoch": 0.8303974027535622, - "grad_norm": 2.4870495105534864, - "learning_rate": 2.940891141512043e-07, - "loss": 0.9718, - "step": 6906 - }, - { - "epoch": 0.8305176456442013, - "grad_norm": 3.892000009009429, - "learning_rate": 2.9368263302961385e-07, - "loss": 0.9472, - "step": 6907 - }, - { - "epoch": 0.8306378885348403, - "grad_norm": 1.787472947456149, - "learning_rate": 2.9327641075501075e-07, - "loss": 1.0323, - "step": 6908 - }, - { - "epoch": 0.8307581314254795, - "grad_norm": 2.11376394798509, - "learning_rate": 2.9287044738901866e-07, - "loss": 0.8807, - "step": 6909 - }, - { - "epoch": 0.8308783743161186, - "grad_norm": 1.889162617216186, - "learning_rate": 2.9246474299322274e-07, - "loss": 1.1433, - "step": 6910 - }, - { - "epoch": 0.8309986172067576, - "grad_norm": 0.91715821539481, - "learning_rate": 2.920592976291678e-07, - "loss": 0.8955, - "step": 6911 - }, - { - "epoch": 0.8311188600973968, - "grad_norm": 2.1788099703302035, - "learning_rate": 2.916541113583595e-07, - "loss": 1.0338, - "step": 6912 - }, - { - "epoch": 0.8312391029880358, - "grad_norm": 2.137538940788839, - "learning_rate": 2.912491842422642e-07, - "loss": 0.8908, - "step": 6913 - }, - { - "epoch": 0.8313593458786749, - "grad_norm": 1.5378213903196887, - "learning_rate": 2.9084451634230857e-07, - "loss": 0.9316, - "step": 6914 - }, - { - "epoch": 0.831479588769314, - "grad_norm": 2.1666892735206638, - "learning_rate": 2.9044010771988125e-07, - "loss": 0.9508, - "step": 6915 - }, - { - "epoch": 0.8315998316599531, - "grad_norm": 1.7975054754351543, - "learning_rate": 2.900359584363303e-07, - "loss": 0.9634, - "step": 6916 - }, - { - "epoch": 0.8317200745505922, - "grad_norm": 2.257380671932928, - "learning_rate": 2.8963206855296494e-07, - "loss": 1.064, - "step": 6917 - }, - { - "epoch": 0.8318403174412313, - "grad_norm": 2.094411147763897, - "learning_rate": 2.892284381310548e-07, - "loss": 1.0024, - "step": 6918 - }, - { - "epoch": 0.8319605603318704, - "grad_norm": 2.2668569438205752, - "learning_rate": 2.888250672318302e-07, - "loss": 0.9415, - "step": 6919 - }, - { - "epoch": 0.8320808032225094, - "grad_norm": 2.7198742701418457, - "learning_rate": 2.884219559164831e-07, - "loss": 0.9199, - "step": 6920 - }, - { - "epoch": 0.8322010461131486, - "grad_norm": 2.107967443971213, - "learning_rate": 2.880191042461635e-07, - "loss": 1.039, - "step": 6921 - }, - { - "epoch": 0.8323212890037877, - "grad_norm": 1.8602788315220145, - "learning_rate": 2.876165122819849e-07, - "loss": 1.0316, - "step": 6922 - }, - { - "epoch": 0.8324415318944267, - "grad_norm": 1.723224226205025, - "learning_rate": 2.872141800850201e-07, - "loss": 1.019, - "step": 6923 - }, - { - "epoch": 0.8325617747850659, - "grad_norm": 1.7164915331913106, - "learning_rate": 2.868121077163024e-07, - "loss": 0.9631, - "step": 6924 - }, - { - "epoch": 0.8326820176757049, - "grad_norm": 2.3311230090532336, - "learning_rate": 2.864102952368257e-07, - "loss": 0.9499, - "step": 6925 - }, - { - "epoch": 0.832802260566344, - "grad_norm": 1.3051601627327283, - "learning_rate": 2.860087427075444e-07, - "loss": 0.829, - "step": 6926 - }, - { - "epoch": 0.8329225034569832, - "grad_norm": 2.6200777862008997, - "learning_rate": 2.856074501893744e-07, - "loss": 1.0931, - "step": 6927 - }, - { - "epoch": 0.8330427463476222, - "grad_norm": 1.5994590530281927, - "learning_rate": 2.8520641774319054e-07, - "loss": 1.0428, - "step": 6928 - }, - { - "epoch": 0.8331629892382613, - "grad_norm": 1.8584646352503118, - "learning_rate": 2.848056454298309e-07, - "loss": 0.9841, - "step": 6929 - }, - { - "epoch": 0.8332832321289004, - "grad_norm": 2.4094989195546366, - "learning_rate": 2.844051333100905e-07, - "loss": 0.8811, - "step": 6930 - }, - { - "epoch": 0.8334034750195395, - "grad_norm": 3.7109058900791885, - "learning_rate": 2.840048814447269e-07, - "loss": 1.0691, - "step": 6931 - }, - { - "epoch": 0.8335237179101785, - "grad_norm": 2.3565980851456425, - "learning_rate": 2.836048898944587e-07, - "loss": 0.966, - "step": 6932 - }, - { - "epoch": 0.8336439608008177, - "grad_norm": 2.455769273923266, - "learning_rate": 2.832051587199642e-07, - "loss": 0.9463, - "step": 6933 - }, - { - "epoch": 0.8337642036914568, - "grad_norm": 0.8646894336233518, - "learning_rate": 2.828056879818821e-07, - "loss": 0.8348, - "step": 6934 - }, - { - "epoch": 0.8338844465820958, - "grad_norm": 1.8637104939521412, - "learning_rate": 2.824064777408117e-07, - "loss": 1.0664, - "step": 6935 - }, - { - "epoch": 0.8340046894727349, - "grad_norm": 2.0484203300076556, - "learning_rate": 2.8200752805731263e-07, - "loss": 0.9861, - "step": 6936 - }, - { - "epoch": 0.834124932363374, - "grad_norm": 1.388664449465357, - "learning_rate": 2.8160883899190625e-07, - "loss": 1.0357, - "step": 6937 - }, - { - "epoch": 0.8342451752540131, - "grad_norm": 2.2257665449562505, - "learning_rate": 2.8121041060507234e-07, - "loss": 0.9693, - "step": 6938 - }, - { - "epoch": 0.8343654181446521, - "grad_norm": 1.7750224286663752, - "learning_rate": 2.808122429572528e-07, - "loss": 0.9486, - "step": 6939 - }, - { - "epoch": 0.8344856610352913, - "grad_norm": 3.2713938595220617, - "learning_rate": 2.804143361088489e-07, - "loss": 0.9858, - "step": 6940 - }, - { - "epoch": 0.8346059039259304, - "grad_norm": 2.02694868821331, - "learning_rate": 2.8001669012022277e-07, - "loss": 0.991, - "step": 6941 - }, - { - "epoch": 0.8347261468165694, - "grad_norm": 1.6110024232852074, - "learning_rate": 2.7961930505169795e-07, - "loss": 0.9274, - "step": 6942 - }, - { - "epoch": 0.8348463897072086, - "grad_norm": 1.978153176732238, - "learning_rate": 2.792221809635558e-07, - "loss": 0.9916, - "step": 6943 - }, - { - "epoch": 0.8349666325978476, - "grad_norm": 1.6344238799099733, - "learning_rate": 2.788253179160411e-07, - "loss": 0.9791, - "step": 6944 - }, - { - "epoch": 0.8350868754884867, - "grad_norm": 3.10470491048292, - "learning_rate": 2.7842871596935725e-07, - "loss": 0.8784, - "step": 6945 - }, - { - "epoch": 0.8352071183791259, - "grad_norm": 1.6713589736458836, - "learning_rate": 2.780323751836682e-07, - "loss": 0.921, - "step": 6946 - }, - { - "epoch": 0.8353273612697649, - "grad_norm": 1.3898482135531631, - "learning_rate": 2.7763629561909876e-07, - "loss": 1.0183, - "step": 6947 - }, - { - "epoch": 0.835447604160404, - "grad_norm": 1.8931377516499988, - "learning_rate": 2.772404773357335e-07, - "loss": 0.995, - "step": 6948 - }, - { - "epoch": 0.8355678470510431, - "grad_norm": 2.14356469779838, - "learning_rate": 2.7684492039361853e-07, - "loss": 1.0078, - "step": 6949 - }, - { - "epoch": 0.8356880899416822, - "grad_norm": 1.7828343355812513, - "learning_rate": 2.764496248527586e-07, - "loss": 1.066, - "step": 6950 - }, - { - "epoch": 0.8358083328323213, - "grad_norm": 2.500755005317125, - "learning_rate": 2.760545907731211e-07, - "loss": 1.0037, - "step": 6951 - }, - { - "epoch": 0.8359285757229604, - "grad_norm": 1.6195329989080192, - "learning_rate": 2.75659818214631e-07, - "loss": 0.9045, - "step": 6952 - }, - { - "epoch": 0.8360488186135995, - "grad_norm": 1.6672418158280695, - "learning_rate": 2.752653072371749e-07, - "loss": 1.0128, - "step": 6953 - }, - { - "epoch": 0.8361690615042385, - "grad_norm": 1.8391762593751901, - "learning_rate": 2.7487105790060105e-07, - "loss": 0.9757, - "step": 6954 - }, - { - "epoch": 0.8362893043948777, - "grad_norm": 1.88805214340732, - "learning_rate": 2.7447707026471587e-07, - "loss": 0.919, - "step": 6955 - }, - { - "epoch": 0.8364095472855168, - "grad_norm": 1.7801964304261761, - "learning_rate": 2.740833443892874e-07, - "loss": 1.0258, - "step": 6956 - }, - { - "epoch": 0.8365297901761558, - "grad_norm": 5.135836611813802, - "learning_rate": 2.7368988033404327e-07, - "loss": 1.0218, - "step": 6957 - }, - { - "epoch": 0.836650033066795, - "grad_norm": 1.5872478615281802, - "learning_rate": 2.732966781586712e-07, - "loss": 1.079, - "step": 6958 - }, - { - "epoch": 0.836770275957434, - "grad_norm": 1.876859569301936, - "learning_rate": 2.729037379228205e-07, - "loss": 0.9021, - "step": 6959 - }, - { - "epoch": 0.8368905188480731, - "grad_norm": 2.5911408135191376, - "learning_rate": 2.725110596860998e-07, - "loss": 1.0334, - "step": 6960 - }, - { - "epoch": 0.8370107617387123, - "grad_norm": 1.9152955112584527, - "learning_rate": 2.7211864350807776e-07, - "loss": 0.9317, - "step": 6961 - }, - { - "epoch": 0.8371310046293513, - "grad_norm": 1.755935480592201, - "learning_rate": 2.717264894482836e-07, - "loss": 0.9666, - "step": 6962 - }, - { - "epoch": 0.8372512475199904, - "grad_norm": 2.634955012681196, - "learning_rate": 2.7133459756620646e-07, - "loss": 1.0358, - "step": 6963 - }, - { - "epoch": 0.8373714904106295, - "grad_norm": 1.672676963624445, - "learning_rate": 2.7094296792129733e-07, - "loss": 0.9661, - "step": 6964 - }, - { - "epoch": 0.8374917333012686, - "grad_norm": 1.7666415098786368, - "learning_rate": 2.7055160057296424e-07, - "loss": 0.9843, - "step": 6965 - }, - { - "epoch": 0.8376119761919076, - "grad_norm": 1.6533415106345553, - "learning_rate": 2.7016049558057896e-07, - "loss": 0.9459, - "step": 6966 - }, - { - "epoch": 0.8377322190825467, - "grad_norm": 2.0764363246958375, - "learning_rate": 2.6976965300347074e-07, - "loss": 0.9429, - "step": 6967 - }, - { - "epoch": 0.8378524619731859, - "grad_norm": 2.970520776372609, - "learning_rate": 2.693790729009309e-07, - "loss": 0.9236, - "step": 6968 - }, - { - "epoch": 0.8379727048638249, - "grad_norm": 1.7935775354926742, - "learning_rate": 2.6898875533220946e-07, - "loss": 1.1057, - "step": 6969 - }, - { - "epoch": 0.838092947754464, - "grad_norm": 2.016597069834584, - "learning_rate": 2.685987003565171e-07, - "loss": 1.0427, - "step": 6970 - }, - { - "epoch": 0.8382131906451031, - "grad_norm": 2.641005449144259, - "learning_rate": 2.6820890803302566e-07, - "loss": 0.9817, - "step": 6971 - }, - { - "epoch": 0.8383334335357422, - "grad_norm": 2.2073192307277014, - "learning_rate": 2.6781937842086557e-07, - "loss": 1.0475, - "step": 6972 - }, - { - "epoch": 0.8384536764263812, - "grad_norm": 1.803581262046413, - "learning_rate": 2.6743011157912933e-07, - "loss": 0.912, - "step": 6973 - }, - { - "epoch": 0.8385739193170204, - "grad_norm": 1.5912364511444952, - "learning_rate": 2.6704110756686725e-07, - "loss": 0.8826, - "step": 6974 - }, - { - "epoch": 0.8386941622076595, - "grad_norm": 1.6958971553605657, - "learning_rate": 2.6665236644309085e-07, - "loss": 1.0697, - "step": 6975 - }, - { - "epoch": 0.8388144050982985, - "grad_norm": 1.8664341128901416, - "learning_rate": 2.662638882667727e-07, - "loss": 1.0231, - "step": 6976 - }, - { - "epoch": 0.8389346479889377, - "grad_norm": 1.8271479807964754, - "learning_rate": 2.658756730968443e-07, - "loss": 0.9563, - "step": 6977 - }, - { - "epoch": 0.8390548908795767, - "grad_norm": 2.251625427595058, - "learning_rate": 2.654877209921975e-07, - "loss": 1.1119, - "step": 6978 - }, - { - "epoch": 0.8391751337702158, - "grad_norm": 2.0068356998935384, - "learning_rate": 2.651000320116843e-07, - "loss": 0.8631, - "step": 6979 - }, - { - "epoch": 0.839295376660855, - "grad_norm": 2.417579336038182, - "learning_rate": 2.647126062141163e-07, - "loss": 0.9873, - "step": 6980 - }, - { - "epoch": 0.839415619551494, - "grad_norm": 2.1501730827036916, - "learning_rate": 2.643254436582669e-07, - "loss": 1.0619, - "step": 6981 - }, - { - "epoch": 0.8395358624421331, - "grad_norm": 1.8060910270453239, - "learning_rate": 2.6393854440286743e-07, - "loss": 1.0517, - "step": 6982 - }, - { - "epoch": 0.8396561053327722, - "grad_norm": 2.2325821733035895, - "learning_rate": 2.6355190850661045e-07, - "loss": 0.9354, - "step": 6983 - }, - { - "epoch": 0.8397763482234113, - "grad_norm": 1.4978566751506008, - "learning_rate": 2.631655360281486e-07, - "loss": 1.0923, - "step": 6984 - }, - { - "epoch": 0.8398965911140504, - "grad_norm": 1.8548819487401906, - "learning_rate": 2.6277942702609323e-07, - "loss": 0.8901, - "step": 6985 - }, - { - "epoch": 0.8400168340046895, - "grad_norm": 2.0156642702993777, - "learning_rate": 2.623935815590186e-07, - "loss": 1.0991, - "step": 6986 - }, - { - "epoch": 0.8401370768953286, - "grad_norm": 2.006698833896342, - "learning_rate": 2.6200799968545516e-07, - "loss": 1.037, - "step": 6987 - }, - { - "epoch": 0.8402573197859676, - "grad_norm": 0.802833532441711, - "learning_rate": 2.616226814638969e-07, - "loss": 0.8252, - "step": 6988 - }, - { - "epoch": 0.8403775626766068, - "grad_norm": 2.2499853769462965, - "learning_rate": 2.612376269527954e-07, - "loss": 1.0012, - "step": 6989 - }, - { - "epoch": 0.8404978055672458, - "grad_norm": 1.9741896306778577, - "learning_rate": 2.608528362105635e-07, - "loss": 0.9046, - "step": 6990 - }, - { - "epoch": 0.8406180484578849, - "grad_norm": 1.938789922720076, - "learning_rate": 2.6046830929557374e-07, - "loss": 0.9569, - "step": 6991 - }, - { - "epoch": 0.8407382913485241, - "grad_norm": 2.0195168470684424, - "learning_rate": 2.6008404626615776e-07, - "loss": 1.0775, - "step": 6992 - }, - { - "epoch": 0.8408585342391631, - "grad_norm": 3.130692599513553, - "learning_rate": 2.597000471806092e-07, - "loss": 0.9568, - "step": 6993 - }, - { - "epoch": 0.8409787771298022, - "grad_norm": 2.0005804650052066, - "learning_rate": 2.593163120971793e-07, - "loss": 0.9558, - "step": 6994 - }, - { - "epoch": 0.8410990200204413, - "grad_norm": 2.213353431028251, - "learning_rate": 2.5893284107408165e-07, - "loss": 0.9138, - "step": 6995 - }, - { - "epoch": 0.8412192629110804, - "grad_norm": 1.9265959527173453, - "learning_rate": 2.5854963416948726e-07, - "loss": 1.0174, - "step": 6996 - }, - { - "epoch": 0.8413395058017195, - "grad_norm": 1.835683472286223, - "learning_rate": 2.5816669144152816e-07, - "loss": 0.9153, - "step": 6997 - }, - { - "epoch": 0.8414597486923585, - "grad_norm": 0.9182262303043411, - "learning_rate": 2.5778401294829777e-07, - "loss": 0.9625, - "step": 6998 - }, - { - "epoch": 0.8415799915829977, - "grad_norm": 1.9273817364026784, - "learning_rate": 2.574015987478473e-07, - "loss": 0.8795, - "step": 6999 - }, - { - "epoch": 0.8417002344736367, - "grad_norm": 2.89765956751967, - "learning_rate": 2.570194488981887e-07, - "loss": 1.0926, - "step": 7000 - }, - { - "epoch": 0.8418204773642758, - "grad_norm": 0.863862372843577, - "learning_rate": 2.566375634572939e-07, - "loss": 0.87, - "step": 7001 - }, - { - "epoch": 0.841940720254915, - "grad_norm": 2.1671266312085593, - "learning_rate": 2.562559424830943e-07, - "loss": 0.9859, - "step": 7002 - }, - { - "epoch": 0.842060963145554, - "grad_norm": 2.4134337955275025, - "learning_rate": 2.5587458603348256e-07, - "loss": 0.9361, - "step": 7003 - }, - { - "epoch": 0.8421812060361931, - "grad_norm": 1.7688244214256506, - "learning_rate": 2.554934941663085e-07, - "loss": 1.0702, - "step": 7004 - }, - { - "epoch": 0.8423014489268322, - "grad_norm": 1.9778444256905312, - "learning_rate": 2.5511266693938484e-07, - "loss": 0.9569, - "step": 7005 - }, - { - "epoch": 0.8424216918174713, - "grad_norm": 1.4739183436204628, - "learning_rate": 2.547321044104822e-07, - "loss": 0.9989, - "step": 7006 - }, - { - "epoch": 0.8425419347081103, - "grad_norm": 1.744883892601533, - "learning_rate": 2.5435180663733113e-07, - "loss": 1.0007, - "step": 7007 - }, - { - "epoch": 0.8426621775987495, - "grad_norm": 2.2003885142856134, - "learning_rate": 2.539717736776241e-07, - "loss": 0.9416, - "step": 7008 - }, - { - "epoch": 0.8427824204893886, - "grad_norm": 2.463971015233319, - "learning_rate": 2.535920055890097e-07, - "loss": 0.9875, - "step": 7009 - }, - { - "epoch": 0.8429026633800276, - "grad_norm": 2.173158883638934, - "learning_rate": 2.5321250242910006e-07, - "loss": 0.8763, - "step": 7010 - }, - { - "epoch": 0.8430229062706668, - "grad_norm": 2.0464199812563058, - "learning_rate": 2.5283326425546493e-07, - "loss": 1.0872, - "step": 7011 - }, - { - "epoch": 0.8431431491613058, - "grad_norm": 2.2027219579528987, - "learning_rate": 2.5245429112563443e-07, - "loss": 0.9196, - "step": 7012 - }, - { - "epoch": 0.8432633920519449, - "grad_norm": 2.074361735155266, - "learning_rate": 2.5207558309709865e-07, - "loss": 1.0522, - "step": 7013 - }, - { - "epoch": 0.8433836349425841, - "grad_norm": 0.6901120071445579, - "learning_rate": 2.516971402273065e-07, - "loss": 0.8156, - "step": 7014 - }, - { - "epoch": 0.8435038778332231, - "grad_norm": 1.682936892078219, - "learning_rate": 2.513189625736687e-07, - "loss": 0.8989, - "step": 7015 - }, - { - "epoch": 0.8436241207238622, - "grad_norm": 2.2803433850687718, - "learning_rate": 2.509410501935534e-07, - "loss": 0.9301, - "step": 7016 - }, - { - "epoch": 0.8437443636145013, - "grad_norm": 4.406765522119751, - "learning_rate": 2.5056340314429116e-07, - "loss": 0.9862, - "step": 7017 - }, - { - "epoch": 0.8438646065051404, - "grad_norm": 1.9956281682630574, - "learning_rate": 2.5018602148316904e-07, - "loss": 1.0324, - "step": 7018 - }, - { - "epoch": 0.8439848493957794, - "grad_norm": 4.720555042347978, - "learning_rate": 2.498089052674359e-07, - "loss": 1.0244, - "step": 7019 - }, - { - "epoch": 0.8441050922864186, - "grad_norm": 1.8978994101428315, - "learning_rate": 2.494320545543007e-07, - "loss": 0.9837, - "step": 7020 - }, - { - "epoch": 0.8442253351770577, - "grad_norm": 1.8391911671499812, - "learning_rate": 2.490554694009308e-07, - "loss": 0.8999, - "step": 7021 - }, - { - "epoch": 0.8443455780676967, - "grad_norm": 1.8008425489075415, - "learning_rate": 2.4867914986445426e-07, - "loss": 1.0185, - "step": 7022 - }, - { - "epoch": 0.8444658209583359, - "grad_norm": 1.8225158977469258, - "learning_rate": 2.483030960019581e-07, - "loss": 0.9344, - "step": 7023 - }, - { - "epoch": 0.8445860638489749, - "grad_norm": 0.7449694245739954, - "learning_rate": 2.479273078704891e-07, - "loss": 0.7996, - "step": 7024 - }, - { - "epoch": 0.844706306739614, - "grad_norm": 0.7946141277587783, - "learning_rate": 2.475517855270552e-07, - "loss": 0.8977, - "step": 7025 - }, - { - "epoch": 0.8448265496302532, - "grad_norm": 1.7690351519200058, - "learning_rate": 2.4717652902862143e-07, - "loss": 0.9585, - "step": 7026 - }, - { - "epoch": 0.8449467925208922, - "grad_norm": 9.03087153186225, - "learning_rate": 2.4680153843211495e-07, - "loss": 1.0444, - "step": 7027 - }, - { - "epoch": 0.8450670354115313, - "grad_norm": 1.8483511845741663, - "learning_rate": 2.464268137944212e-07, - "loss": 0.9532, - "step": 7028 - }, - { - "epoch": 0.8451872783021703, - "grad_norm": 2.2190439808541074, - "learning_rate": 2.46052355172385e-07, - "loss": 1.0163, - "step": 7029 - }, - { - "epoch": 0.8453075211928095, - "grad_norm": 2.100859911018023, - "learning_rate": 2.456781626228128e-07, - "loss": 0.9783, - "step": 7030 - }, - { - "epoch": 0.8454277640834486, - "grad_norm": 1.0603390765829563, - "learning_rate": 2.453042362024675e-07, - "loss": 0.9765, - "step": 7031 - }, - { - "epoch": 0.8455480069740876, - "grad_norm": 1.5163355469373399, - "learning_rate": 2.449305759680751e-07, - "loss": 0.9572, - "step": 7032 - }, - { - "epoch": 0.8456682498647268, - "grad_norm": 1.4651152092221094, - "learning_rate": 2.445571819763188e-07, - "loss": 0.9763, - "step": 7033 - }, - { - "epoch": 0.8457884927553658, - "grad_norm": 1.6707934152047357, - "learning_rate": 2.4418405428384227e-07, - "loss": 0.8177, - "step": 7034 - }, - { - "epoch": 0.8459087356460049, - "grad_norm": 2.2269288949496393, - "learning_rate": 2.4381119294724864e-07, - "loss": 0.9517, - "step": 7035 - }, - { - "epoch": 0.846028978536644, - "grad_norm": 3.055972870351792, - "learning_rate": 2.434385980231004e-07, - "loss": 0.7734, - "step": 7036 - }, - { - "epoch": 0.8461492214272831, - "grad_norm": 1.8668826824039102, - "learning_rate": 2.4306626956792043e-07, - "loss": 0.884, - "step": 7037 - }, - { - "epoch": 0.8462694643179222, - "grad_norm": 1.7725183735098295, - "learning_rate": 2.4269420763819017e-07, - "loss": 0.9911, - "step": 7038 - }, - { - "epoch": 0.8463897072085613, - "grad_norm": 2.7107992439303863, - "learning_rate": 2.4232241229035223e-07, - "loss": 1.0591, - "step": 7039 - }, - { - "epoch": 0.8465099500992004, - "grad_norm": 0.845971045221784, - "learning_rate": 2.419508835808064e-07, - "loss": 0.8269, - "step": 7040 - }, - { - "epoch": 0.8466301929898394, - "grad_norm": 2.350210212882833, - "learning_rate": 2.415796215659134e-07, - "loss": 0.8573, - "step": 7041 - }, - { - "epoch": 0.8467504358804786, - "grad_norm": 2.1291064806927644, - "learning_rate": 2.412086263019939e-07, - "loss": 1.0003, - "step": 7042 - }, - { - "epoch": 0.8468706787711177, - "grad_norm": 1.7784928516226048, - "learning_rate": 2.408378978453276e-07, - "loss": 1.0319, - "step": 7043 - }, - { - "epoch": 0.8469909216617567, - "grad_norm": 0.805561812877103, - "learning_rate": 2.404674362521533e-07, - "loss": 0.8928, - "step": 7044 - }, - { - "epoch": 0.8471111645523959, - "grad_norm": 2.26756456159138, - "learning_rate": 2.4009724157866997e-07, - "loss": 0.9699, - "step": 7045 - }, - { - "epoch": 0.8472314074430349, - "grad_norm": 1.9885151244027885, - "learning_rate": 2.3972731388103564e-07, - "loss": 0.9922, - "step": 7046 - }, - { - "epoch": 0.847351650333674, - "grad_norm": 0.821132794730381, - "learning_rate": 2.393576532153687e-07, - "loss": 0.8803, - "step": 7047 - }, - { - "epoch": 0.8474718932243132, - "grad_norm": 0.9643312398063895, - "learning_rate": 2.389882596377453e-07, - "loss": 0.8573, - "step": 7048 - }, - { - "epoch": 0.8475921361149522, - "grad_norm": 1.8388476742981867, - "learning_rate": 2.386191332042031e-07, - "loss": 0.9928, - "step": 7049 - }, - { - "epoch": 0.8477123790055913, - "grad_norm": 1.8732589267511397, - "learning_rate": 2.3825027397073794e-07, - "loss": 0.9543, - "step": 7050 - }, - { - "epoch": 0.8478326218962304, - "grad_norm": 2.0770897832353925, - "learning_rate": 2.3788168199330515e-07, - "loss": 0.902, - "step": 7051 - }, - { - "epoch": 0.8479528647868695, - "grad_norm": 1.5824685328339607, - "learning_rate": 2.3751335732782074e-07, - "loss": 0.9604, - "step": 7052 - }, - { - "epoch": 0.8480731076775085, - "grad_norm": 2.0136048595328906, - "learning_rate": 2.371453000301582e-07, - "loss": 1.0251, - "step": 7053 - }, - { - "epoch": 0.8481933505681477, - "grad_norm": 1.8397866663409304, - "learning_rate": 2.3677751015615222e-07, - "loss": 0.973, - "step": 7054 - }, - { - "epoch": 0.8483135934587868, - "grad_norm": 2.470512533564101, - "learning_rate": 2.3640998776159593e-07, - "loss": 1.0815, - "step": 7055 - }, - { - "epoch": 0.8484338363494258, - "grad_norm": 1.6601988304511812, - "learning_rate": 2.3604273290224253e-07, - "loss": 1.0393, - "step": 7056 - }, - { - "epoch": 0.848554079240065, - "grad_norm": 2.5913320094305488, - "learning_rate": 2.356757456338039e-07, - "loss": 0.9764, - "step": 7057 - }, - { - "epoch": 0.848674322130704, - "grad_norm": 0.8399126956052403, - "learning_rate": 2.3530902601195147e-07, - "loss": 0.8832, - "step": 7058 - }, - { - "epoch": 0.8487945650213431, - "grad_norm": 2.14870093377832, - "learning_rate": 2.34942574092317e-07, - "loss": 1.013, - "step": 7059 - }, - { - "epoch": 0.8489148079119821, - "grad_norm": 2.3284682462467243, - "learning_rate": 2.3457638993049045e-07, - "loss": 0.9921, - "step": 7060 - }, - { - "epoch": 0.8490350508026213, - "grad_norm": 2.6770172605759623, - "learning_rate": 2.3421047358202252e-07, - "loss": 0.8712, - "step": 7061 - }, - { - "epoch": 0.8491552936932604, - "grad_norm": 2.175260361620793, - "learning_rate": 2.3384482510242144e-07, - "loss": 1.0502, - "step": 7062 - }, - { - "epoch": 0.8492755365838994, - "grad_norm": 1.890274961240403, - "learning_rate": 2.3347944454715575e-07, - "loss": 0.9996, - "step": 7063 - }, - { - "epoch": 0.8493957794745386, - "grad_norm": 3.8367783504727946, - "learning_rate": 2.331143319716542e-07, - "loss": 0.9004, - "step": 7064 - }, - { - "epoch": 0.8495160223651776, - "grad_norm": 2.1072202626713223, - "learning_rate": 2.3274948743130363e-07, - "loss": 0.883, - "step": 7065 - }, - { - "epoch": 0.8496362652558167, - "grad_norm": 1.4707262257741274, - "learning_rate": 2.3238491098145085e-07, - "loss": 1.0258, - "step": 7066 - }, - { - "epoch": 0.8497565081464559, - "grad_norm": 2.0266313127939246, - "learning_rate": 2.3202060267740141e-07, - "loss": 0.9589, - "step": 7067 - }, - { - "epoch": 0.8498767510370949, - "grad_norm": 7.0026294674220635, - "learning_rate": 2.3165656257442044e-07, - "loss": 1.0012, - "step": 7068 - }, - { - "epoch": 0.849996993927734, - "grad_norm": 2.423536751914115, - "learning_rate": 2.31292790727734e-07, - "loss": 1.1341, - "step": 7069 - }, - { - "epoch": 0.8501172368183731, - "grad_norm": 2.4904367641407967, - "learning_rate": 2.3092928719252392e-07, - "loss": 1.0289, - "step": 7070 - }, - { - "epoch": 0.8502374797090122, - "grad_norm": 2.0607839438935427, - "learning_rate": 2.3056605202393475e-07, - "loss": 1.0149, - "step": 7071 - }, - { - "epoch": 0.8503577225996513, - "grad_norm": 2.062118841844331, - "learning_rate": 2.3020308527706888e-07, - "loss": 0.9004, - "step": 7072 - }, - { - "epoch": 0.8504779654902904, - "grad_norm": 1.7267193615112935, - "learning_rate": 2.2984038700698715e-07, - "loss": 1.1158, - "step": 7073 - }, - { - "epoch": 0.8505982083809295, - "grad_norm": 1.7738453400294623, - "learning_rate": 2.2947795726871222e-07, - "loss": 1.0217, - "step": 7074 - }, - { - "epoch": 0.8507184512715685, - "grad_norm": 1.8240575943855222, - "learning_rate": 2.2911579611722253e-07, - "loss": 1.083, - "step": 7075 - }, - { - "epoch": 0.8508386941622077, - "grad_norm": 1.709692445391205, - "learning_rate": 2.2875390360745905e-07, - "loss": 1.1004, - "step": 7076 - }, - { - "epoch": 0.8509589370528468, - "grad_norm": 2.0290415569277997, - "learning_rate": 2.2839227979432008e-07, - "loss": 1.0077, - "step": 7077 - }, - { - "epoch": 0.8510791799434858, - "grad_norm": 1.824359177732842, - "learning_rate": 2.2803092473266373e-07, - "loss": 1.0753, - "step": 7078 - }, - { - "epoch": 0.851199422834125, - "grad_norm": 2.588330685609477, - "learning_rate": 2.2766983847730724e-07, - "loss": 1.0954, - "step": 7079 - }, - { - "epoch": 0.851319665724764, - "grad_norm": 1.9663595794971291, - "learning_rate": 2.2730902108302663e-07, - "loss": 0.8989, - "step": 7080 - }, - { - "epoch": 0.8514399086154031, - "grad_norm": 1.795287517377105, - "learning_rate": 2.269484726045583e-07, - "loss": 0.9196, - "step": 7081 - }, - { - "epoch": 0.8515601515060423, - "grad_norm": 1.6879002131847693, - "learning_rate": 2.2658819309659672e-07, - "loss": 1.0196, - "step": 7082 - }, - { - "epoch": 0.8516803943966813, - "grad_norm": 1.7693971831542374, - "learning_rate": 2.2622818261379706e-07, - "loss": 1.0674, - "step": 7083 - }, - { - "epoch": 0.8518006372873204, - "grad_norm": 3.297999619585134, - "learning_rate": 2.2586844121077142e-07, - "loss": 0.9806, - "step": 7084 - }, - { - "epoch": 0.8519208801779595, - "grad_norm": 2.277842759805254, - "learning_rate": 2.2550896894209215e-07, - "loss": 0.9508, - "step": 7085 - }, - { - "epoch": 0.8520411230685986, - "grad_norm": 0.6891462383294636, - "learning_rate": 2.2514976586229184e-07, - "loss": 0.8218, - "step": 7086 - }, - { - "epoch": 0.8521613659592376, - "grad_norm": 0.9524727449998235, - "learning_rate": 2.247908320258609e-07, - "loss": 0.858, - "step": 7087 - }, - { - "epoch": 0.8522816088498768, - "grad_norm": 4.6259232707836535, - "learning_rate": 2.2443216748724914e-07, - "loss": 1.0281, - "step": 7088 - }, - { - "epoch": 0.8524018517405159, - "grad_norm": 4.25663374236619, - "learning_rate": 2.2407377230086588e-07, - "loss": 0.9738, - "step": 7089 - }, - { - "epoch": 0.8525220946311549, - "grad_norm": 1.7791693062803653, - "learning_rate": 2.23715646521079e-07, - "loss": 1.066, - "step": 7090 - }, - { - "epoch": 0.852642337521794, - "grad_norm": 1.9051064985648503, - "learning_rate": 2.2335779020221724e-07, - "loss": 1.0642, - "step": 7091 - }, - { - "epoch": 0.8527625804124331, - "grad_norm": 0.8483783090082028, - "learning_rate": 2.2300020339856497e-07, - "loss": 0.8547, - "step": 7092 - }, - { - "epoch": 0.8528828233030722, - "grad_norm": 2.1083515086037687, - "learning_rate": 2.2264288616436966e-07, - "loss": 1.009, - "step": 7093 - }, - { - "epoch": 0.8530030661937112, - "grad_norm": 2.229936686587802, - "learning_rate": 2.222858385538351e-07, - "loss": 0.963, - "step": 7094 - }, - { - "epoch": 0.8531233090843504, - "grad_norm": 1.4608620200828624, - "learning_rate": 2.2192906062112527e-07, - "loss": 0.9027, - "step": 7095 - }, - { - "epoch": 0.8532435519749895, - "grad_norm": 1.741617357508174, - "learning_rate": 2.2157255242036377e-07, - "loss": 0.9341, - "step": 7096 - }, - { - "epoch": 0.8533637948656285, - "grad_norm": 1.5634157930247707, - "learning_rate": 2.2121631400563135e-07, - "loss": 0.9729, - "step": 7097 - }, - { - "epoch": 0.8534840377562677, - "grad_norm": 0.8283356812429125, - "learning_rate": 2.208603454309701e-07, - "loss": 0.8364, - "step": 7098 - }, - { - "epoch": 0.8536042806469067, - "grad_norm": 2.1560052926347133, - "learning_rate": 2.2050464675037994e-07, - "loss": 0.9353, - "step": 7099 - }, - { - "epoch": 0.8537245235375458, - "grad_norm": 2.115106439640114, - "learning_rate": 2.2014921801782016e-07, - "loss": 0.9628, - "step": 7100 - }, - { - "epoch": 0.853844766428185, - "grad_norm": 3.3659575089998652, - "learning_rate": 2.1979405928720872e-07, - "loss": 0.9693, - "step": 7101 - }, - { - "epoch": 0.853965009318824, - "grad_norm": 1.416230452453981, - "learning_rate": 2.1943917061242257e-07, - "loss": 1.0185, - "step": 7102 - }, - { - "epoch": 0.8540852522094631, - "grad_norm": 1.6845152561202308, - "learning_rate": 2.1908455204729903e-07, - "loss": 0.892, - "step": 7103 - }, - { - "epoch": 0.8542054951001022, - "grad_norm": 1.811607535741643, - "learning_rate": 2.1873020364563265e-07, - "loss": 1.0076, - "step": 7104 - }, - { - "epoch": 0.8543257379907413, - "grad_norm": 2.691491378965962, - "learning_rate": 2.183761254611789e-07, - "loss": 0.991, - "step": 7105 - }, - { - "epoch": 0.8544459808813804, - "grad_norm": 2.3309551676428253, - "learning_rate": 2.1802231754764987e-07, - "loss": 0.933, - "step": 7106 - }, - { - "epoch": 0.8545662237720195, - "grad_norm": 1.7467052233907856, - "learning_rate": 2.17668779958718e-07, - "loss": 0.9882, - "step": 7107 - }, - { - "epoch": 0.8546864666626586, - "grad_norm": 2.21891182322783, - "learning_rate": 2.1731551274801553e-07, - "loss": 1.0266, - "step": 7108 - }, - { - "epoch": 0.8548067095532976, - "grad_norm": 4.709711373915172, - "learning_rate": 2.169625159691324e-07, - "loss": 0.8406, - "step": 7109 - }, - { - "epoch": 0.8549269524439368, - "grad_norm": 2.47096587581993, - "learning_rate": 2.1660978967561784e-07, - "loss": 0.9805, - "step": 7110 - }, - { - "epoch": 0.8550471953345758, - "grad_norm": 2.4029327910982357, - "learning_rate": 2.1625733392098035e-07, - "loss": 1.0242, - "step": 7111 - }, - { - "epoch": 0.8551674382252149, - "grad_norm": 1.5985552583127662, - "learning_rate": 2.159051487586867e-07, - "loss": 1.023, - "step": 7112 - }, - { - "epoch": 0.8552876811158541, - "grad_norm": 2.2982508891558413, - "learning_rate": 2.155532342421642e-07, - "loss": 0.9565, - "step": 7113 - }, - { - "epoch": 0.8554079240064931, - "grad_norm": 1.711542727927493, - "learning_rate": 2.1520159042479636e-07, - "loss": 1.0146, - "step": 7114 - }, - { - "epoch": 0.8555281668971322, - "grad_norm": 1.9323141160125297, - "learning_rate": 2.148502173599287e-07, - "loss": 0.9354, - "step": 7115 - }, - { - "epoch": 0.8556484097877713, - "grad_norm": 1.492971803351633, - "learning_rate": 2.1449911510086372e-07, - "loss": 0.8846, - "step": 7116 - }, - { - "epoch": 0.8557686526784104, - "grad_norm": 2.0714378896982852, - "learning_rate": 2.141482837008628e-07, - "loss": 0.9967, - "step": 7117 - }, - { - "epoch": 0.8558888955690495, - "grad_norm": 3.5516504077965867, - "learning_rate": 2.1379772321314826e-07, - "loss": 0.9506, - "step": 7118 - }, - { - "epoch": 0.8560091384596886, - "grad_norm": 1.9251398407315947, - "learning_rate": 2.1344743369089802e-07, - "loss": 1.0532, - "step": 7119 - }, - { - "epoch": 0.8561293813503277, - "grad_norm": 1.7282938841925377, - "learning_rate": 2.130974151872522e-07, - "loss": 1.0462, - "step": 7120 - }, - { - "epoch": 0.8562496242409667, - "grad_norm": 1.9135356567266284, - "learning_rate": 2.1274766775530773e-07, - "loss": 1.0185, - "step": 7121 - }, - { - "epoch": 0.8563698671316058, - "grad_norm": 1.960126245481111, - "learning_rate": 2.1239819144812077e-07, - "loss": 1.0284, - "step": 7122 - }, - { - "epoch": 0.856490110022245, - "grad_norm": 1.8615872341867465, - "learning_rate": 2.1204898631870716e-07, - "loss": 0.9251, - "step": 7123 - }, - { - "epoch": 0.856610352912884, - "grad_norm": 1.6752886281503445, - "learning_rate": 2.1170005242004006e-07, - "loss": 0.9959, - "step": 7124 - }, - { - "epoch": 0.8567305958035231, - "grad_norm": 2.4480324142903043, - "learning_rate": 2.1135138980505384e-07, - "loss": 1.0069, - "step": 7125 - }, - { - "epoch": 0.8568508386941622, - "grad_norm": 1.8427452323788762, - "learning_rate": 2.110029985266395e-07, - "loss": 0.9617, - "step": 7126 - }, - { - "epoch": 0.8569710815848013, - "grad_norm": 1.6764421204707827, - "learning_rate": 2.1065487863764787e-07, - "loss": 0.9618, - "step": 7127 - }, - { - "epoch": 0.8570913244754403, - "grad_norm": 1.7125280642472258, - "learning_rate": 2.1030703019088846e-07, - "loss": 1.0861, - "step": 7128 - }, - { - "epoch": 0.8572115673660795, - "grad_norm": 1.9519381159368925, - "learning_rate": 2.099594532391291e-07, - "loss": 0.9363, - "step": 7129 - }, - { - "epoch": 0.8573318102567186, - "grad_norm": 3.254699244045362, - "learning_rate": 2.0961214783509806e-07, - "loss": 1.0142, - "step": 7130 - }, - { - "epoch": 0.8574520531473576, - "grad_norm": 2.385610969548171, - "learning_rate": 2.0926511403148051e-07, - "loss": 0.9743, - "step": 7131 - }, - { - "epoch": 0.8575722960379968, - "grad_norm": 1.891960303025216, - "learning_rate": 2.0891835188092143e-07, - "loss": 0.99, - "step": 7132 - }, - { - "epoch": 0.8576925389286358, - "grad_norm": 1.7723360380193107, - "learning_rate": 2.0857186143602434e-07, - "loss": 1.0408, - "step": 7133 - }, - { - "epoch": 0.8578127818192749, - "grad_norm": 2.1814949444632012, - "learning_rate": 2.0822564274935094e-07, - "loss": 0.9076, - "step": 7134 - }, - { - "epoch": 0.8579330247099141, - "grad_norm": 1.7752845992689423, - "learning_rate": 2.078796958734239e-07, - "loss": 0.8998, - "step": 7135 - }, - { - "epoch": 0.8580532676005531, - "grad_norm": 1.7319684214675553, - "learning_rate": 2.0753402086072124e-07, - "loss": 0.9699, - "step": 7136 - }, - { - "epoch": 0.8581735104911922, - "grad_norm": 2.3940743886405715, - "learning_rate": 2.071886177636828e-07, - "loss": 0.9847, - "step": 7137 - }, - { - "epoch": 0.8582937533818313, - "grad_norm": 2.049964727703055, - "learning_rate": 2.0684348663470575e-07, - "loss": 1.0502, - "step": 7138 - }, - { - "epoch": 0.8584139962724704, - "grad_norm": 1.9683376061987736, - "learning_rate": 2.0649862752614555e-07, - "loss": 0.8476, - "step": 7139 - }, - { - "epoch": 0.8585342391631094, - "grad_norm": 0.8172782366178573, - "learning_rate": 2.0615404049031838e-07, - "loss": 0.8255, - "step": 7140 - }, - { - "epoch": 0.8586544820537486, - "grad_norm": 3.644787680220443, - "learning_rate": 2.0580972557949616e-07, - "loss": 1.0089, - "step": 7141 - }, - { - "epoch": 0.8587747249443877, - "grad_norm": 0.8227028669605724, - "learning_rate": 2.054656828459125e-07, - "loss": 0.7818, - "step": 7142 - }, - { - "epoch": 0.8588949678350267, - "grad_norm": 2.943947878137773, - "learning_rate": 2.051219123417578e-07, - "loss": 0.9996, - "step": 7143 - }, - { - "epoch": 0.8590152107256659, - "grad_norm": 2.139621694407267, - "learning_rate": 2.0477841411918196e-07, - "loss": 0.8333, - "step": 7144 - }, - { - "epoch": 0.859135453616305, - "grad_norm": 1.9688188449993154, - "learning_rate": 2.0443518823029326e-07, - "loss": 0.9761, - "step": 7145 - }, - { - "epoch": 0.859255696506944, - "grad_norm": 2.09559458996387, - "learning_rate": 2.0409223472715854e-07, - "loss": 0.9977, - "step": 7146 - }, - { - "epoch": 0.8593759393975832, - "grad_norm": 1.655442382832951, - "learning_rate": 2.0374955366180434e-07, - "loss": 0.9714, - "step": 7147 - }, - { - "epoch": 0.8594961822882222, - "grad_norm": 1.7841346538771348, - "learning_rate": 2.034071450862147e-07, - "loss": 0.9579, - "step": 7148 - }, - { - "epoch": 0.8596164251788613, - "grad_norm": 10.723495100384424, - "learning_rate": 2.030650090523327e-07, - "loss": 0.9995, - "step": 7149 - }, - { - "epoch": 0.8597366680695004, - "grad_norm": 1.6114050644509845, - "learning_rate": 2.0272314561205995e-07, - "loss": 0.8288, - "step": 7150 - }, - { - "epoch": 0.8598569109601395, - "grad_norm": 1.802616003909332, - "learning_rate": 2.023815548172567e-07, - "loss": 0.9596, - "step": 7151 - }, - { - "epoch": 0.8599771538507786, - "grad_norm": 1.455724298768159, - "learning_rate": 2.0204023671974267e-07, - "loss": 0.8951, - "step": 7152 - }, - { - "epoch": 0.8600973967414177, - "grad_norm": 2.377257228133072, - "learning_rate": 2.0169919137129532e-07, - "loss": 1.0387, - "step": 7153 - }, - { - "epoch": 0.8602176396320568, - "grad_norm": 2.5400253582987675, - "learning_rate": 2.013584188236508e-07, - "loss": 0.9288, - "step": 7154 - }, - { - "epoch": 0.8603378825226958, - "grad_norm": 19.377691660309576, - "learning_rate": 2.0101791912850396e-07, - "loss": 1.0241, - "step": 7155 - }, - { - "epoch": 0.8604581254133349, - "grad_norm": 2.043047055554452, - "learning_rate": 2.006776923375082e-07, - "loss": 0.8625, - "step": 7156 - }, - { - "epoch": 0.860578368303974, - "grad_norm": 1.8020150349928639, - "learning_rate": 2.003377385022764e-07, - "loss": 0.9361, - "step": 7157 - }, - { - "epoch": 0.8606986111946131, - "grad_norm": 2.130896969191741, - "learning_rate": 1.9999805767437826e-07, - "loss": 0.9965, - "step": 7158 - }, - { - "epoch": 0.8608188540852522, - "grad_norm": 1.6691208730895297, - "learning_rate": 1.9965864990534386e-07, - "loss": 0.945, - "step": 7159 - }, - { - "epoch": 0.8609390969758913, - "grad_norm": 1.5185599471120732, - "learning_rate": 1.9931951524666092e-07, - "loss": 1.0029, - "step": 7160 - }, - { - "epoch": 0.8610593398665304, - "grad_norm": 1.5818699884052687, - "learning_rate": 1.9898065374977534e-07, - "loss": 1.0324, - "step": 7161 - }, - { - "epoch": 0.8611795827571694, - "grad_norm": 2.3869147337318015, - "learning_rate": 1.9864206546609342e-07, - "loss": 0.9554, - "step": 7162 - }, - { - "epoch": 0.8612998256478086, - "grad_norm": 2.12679562716778, - "learning_rate": 1.983037504469771e-07, - "loss": 1.0711, - "step": 7163 - }, - { - "epoch": 0.8614200685384477, - "grad_norm": 2.096231498841342, - "learning_rate": 1.9796570874374984e-07, - "loss": 0.8943, - "step": 7164 - }, - { - "epoch": 0.8615403114290867, - "grad_norm": 1.7428165548878312, - "learning_rate": 1.976279404076917e-07, - "loss": 1.0048, - "step": 7165 - }, - { - "epoch": 0.8616605543197259, - "grad_norm": 1.637545720401241, - "learning_rate": 1.9729044549004193e-07, - "loss": 0.9907, - "step": 7166 - }, - { - "epoch": 0.8617807972103649, - "grad_norm": 1.715144450385231, - "learning_rate": 1.9695322404199822e-07, - "loss": 0.9344, - "step": 7167 - }, - { - "epoch": 0.861901040101004, - "grad_norm": 2.1759243546548617, - "learning_rate": 1.9661627611471654e-07, - "loss": 1.0528, - "step": 7168 - }, - { - "epoch": 0.8620212829916432, - "grad_norm": 1.7908623316733394, - "learning_rate": 1.9627960175931246e-07, - "loss": 0.9393, - "step": 7169 - }, - { - "epoch": 0.8621415258822822, - "grad_norm": 3.5500725268624986, - "learning_rate": 1.9594320102685847e-07, - "loss": 0.9766, - "step": 7170 - }, - { - "epoch": 0.8622617687729213, - "grad_norm": 2.953204441515804, - "learning_rate": 1.956070739683864e-07, - "loss": 0.871, - "step": 7171 - }, - { - "epoch": 0.8623820116635604, - "grad_norm": 2.029191837768261, - "learning_rate": 1.9527122063488678e-07, - "loss": 0.976, - "step": 7172 - }, - { - "epoch": 0.8625022545541995, - "grad_norm": 1.5153463835103254, - "learning_rate": 1.9493564107730755e-07, - "loss": 1.0293, - "step": 7173 - }, - { - "epoch": 0.8626224974448385, - "grad_norm": 2.3017119256205154, - "learning_rate": 1.9460033534655684e-07, - "loss": 0.8457, - "step": 7174 - }, - { - "epoch": 0.8627427403354777, - "grad_norm": 1.8374885662527587, - "learning_rate": 1.9426530349349978e-07, - "loss": 1.0645, - "step": 7175 - }, - { - "epoch": 0.8628629832261168, - "grad_norm": 1.7876808128406572, - "learning_rate": 1.9393054556896038e-07, - "loss": 0.882, - "step": 7176 - }, - { - "epoch": 0.8629832261167558, - "grad_norm": 2.1536309742597535, - "learning_rate": 1.9359606162372133e-07, - "loss": 0.9188, - "step": 7177 - }, - { - "epoch": 0.863103469007395, - "grad_norm": 1.6931882989653495, - "learning_rate": 1.9326185170852293e-07, - "loss": 0.9413, - "step": 7178 - }, - { - "epoch": 0.863223711898034, - "grad_norm": 1.9115605409271343, - "learning_rate": 1.9292791587406598e-07, - "loss": 0.9474, - "step": 7179 - }, - { - "epoch": 0.8633439547886731, - "grad_norm": 2.244702779215235, - "learning_rate": 1.9259425417100661e-07, - "loss": 1.0957, - "step": 7180 - }, - { - "epoch": 0.8634641976793123, - "grad_norm": 2.541495041185908, - "learning_rate": 1.9226086664996234e-07, - "loss": 0.9757, - "step": 7181 - }, - { - "epoch": 0.8635844405699513, - "grad_norm": 1.979223217072614, - "learning_rate": 1.9192775336150712e-07, - "loss": 0.9776, - "step": 7182 - }, - { - "epoch": 0.8637046834605904, - "grad_norm": 0.8210897850282071, - "learning_rate": 1.915949143561739e-07, - "loss": 0.8061, - "step": 7183 - }, - { - "epoch": 0.8638249263512295, - "grad_norm": 4.5366923304347395, - "learning_rate": 1.9126234968445498e-07, - "loss": 1.0042, - "step": 7184 - }, - { - "epoch": 0.8639451692418686, - "grad_norm": 1.4797323604820694, - "learning_rate": 1.9093005939679884e-07, - "loss": 0.8972, - "step": 7185 - }, - { - "epoch": 0.8640654121325076, - "grad_norm": 1.7535986683727345, - "learning_rate": 1.9059804354361452e-07, - "loss": 0.9937, - "step": 7186 - }, - { - "epoch": 0.8641856550231467, - "grad_norm": 1.6634865063921778, - "learning_rate": 1.902663021752684e-07, - "loss": 0.9416, - "step": 7187 - }, - { - "epoch": 0.8643058979137859, - "grad_norm": 2.2841809400381345, - "learning_rate": 1.8993483534208556e-07, - "loss": 1.0476, - "step": 7188 - }, - { - "epoch": 0.8644261408044249, - "grad_norm": 2.2068402503083133, - "learning_rate": 1.8960364309434884e-07, - "loss": 0.9764, - "step": 7189 - }, - { - "epoch": 0.864546383695064, - "grad_norm": 2.5680647184232437, - "learning_rate": 1.8927272548229967e-07, - "loss": 1.0192, - "step": 7190 - }, - { - "epoch": 0.8646666265857031, - "grad_norm": 1.5001194429525901, - "learning_rate": 1.8894208255613876e-07, - "loss": 1.0576, - "step": 7191 - }, - { - "epoch": 0.8647868694763422, - "grad_norm": 1.7580757452625078, - "learning_rate": 1.8861171436602397e-07, - "loss": 0.9986, - "step": 7192 - }, - { - "epoch": 0.8649071123669813, - "grad_norm": 3.894844922564337, - "learning_rate": 1.882816209620719e-07, - "loss": 1.0386, - "step": 7193 - }, - { - "epoch": 0.8650273552576204, - "grad_norm": 3.408147720717291, - "learning_rate": 1.8795180239435738e-07, - "loss": 0.9946, - "step": 7194 - }, - { - "epoch": 0.8651475981482595, - "grad_norm": 2.5917409446678072, - "learning_rate": 1.8762225871291348e-07, - "loss": 0.9891, - "step": 7195 - }, - { - "epoch": 0.8652678410388985, - "grad_norm": 1.7040285286104688, - "learning_rate": 1.8729298996773201e-07, - "loss": 1.0337, - "step": 7196 - }, - { - "epoch": 0.8653880839295377, - "grad_norm": 0.8699315961715134, - "learning_rate": 1.8696399620876301e-07, - "loss": 0.867, - "step": 7197 - }, - { - "epoch": 0.8655083268201768, - "grad_norm": 2.564290514378698, - "learning_rate": 1.866352774859141e-07, - "loss": 1.0241, - "step": 7198 - }, - { - "epoch": 0.8656285697108158, - "grad_norm": 2.407172583541601, - "learning_rate": 1.8630683384905188e-07, - "loss": 0.9238, - "step": 7199 - }, - { - "epoch": 0.865748812601455, - "grad_norm": 1.8149500762756998, - "learning_rate": 1.8597866534800045e-07, - "loss": 1.1247, - "step": 7200 - }, - { - "epoch": 0.865869055492094, - "grad_norm": 1.9362092179201817, - "learning_rate": 1.8565077203254398e-07, - "loss": 0.9753, - "step": 7201 - }, - { - "epoch": 0.8659892983827331, - "grad_norm": 2.7525277224884817, - "learning_rate": 1.8532315395242203e-07, - "loss": 0.9551, - "step": 7202 - }, - { - "epoch": 0.8661095412733723, - "grad_norm": 1.9183211303950307, - "learning_rate": 1.849958111573353e-07, - "loss": 0.9458, - "step": 7203 - }, - { - "epoch": 0.8662297841640113, - "grad_norm": 1.668788020811673, - "learning_rate": 1.8466874369694074e-07, - "loss": 0.8664, - "step": 7204 - }, - { - "epoch": 0.8663500270546504, - "grad_norm": 2.3545725874778523, - "learning_rate": 1.843419516208542e-07, - "loss": 0.932, - "step": 7205 - }, - { - "epoch": 0.8664702699452895, - "grad_norm": 2.1458893802570076, - "learning_rate": 1.8401543497865047e-07, - "loss": 1.0169, - "step": 7206 - }, - { - "epoch": 0.8665905128359286, - "grad_norm": 2.994269938702974, - "learning_rate": 1.836891938198608e-07, - "loss": 0.8766, - "step": 7207 - }, - { - "epoch": 0.8667107557265676, - "grad_norm": 2.268299812650343, - "learning_rate": 1.8336322819397677e-07, - "loss": 0.9457, - "step": 7208 - }, - { - "epoch": 0.8668309986172068, - "grad_norm": 2.2111542396662967, - "learning_rate": 1.8303753815044654e-07, - "loss": 0.858, - "step": 7209 - }, - { - "epoch": 0.8669512415078459, - "grad_norm": 2.996690832372305, - "learning_rate": 1.827121237386773e-07, - "loss": 0.9259, - "step": 7210 - }, - { - "epoch": 0.8670714843984849, - "grad_norm": 2.800358909356034, - "learning_rate": 1.8238698500803374e-07, - "loss": 0.9842, - "step": 7211 - }, - { - "epoch": 0.8671917272891241, - "grad_norm": 0.8142409745870707, - "learning_rate": 1.820621220078391e-07, - "loss": 0.8533, - "step": 7212 - }, - { - "epoch": 0.8673119701797631, - "grad_norm": 1.6729653626508578, - "learning_rate": 1.8173753478737553e-07, - "loss": 0.9043, - "step": 7213 - }, - { - "epoch": 0.8674322130704022, - "grad_norm": 2.551982228877934, - "learning_rate": 1.8141322339588205e-07, - "loss": 1.0255, - "step": 7214 - }, - { - "epoch": 0.8675524559610414, - "grad_norm": 2.333567868661665, - "learning_rate": 1.810891878825569e-07, - "loss": 0.9304, - "step": 7215 - }, - { - "epoch": 0.8676726988516804, - "grad_norm": 2.3673484420201136, - "learning_rate": 1.8076542829655561e-07, - "loss": 0.942, - "step": 7216 - }, - { - "epoch": 0.8677929417423195, - "grad_norm": 2.8013580026364977, - "learning_rate": 1.8044194468699203e-07, - "loss": 1.0281, - "step": 7217 - }, - { - "epoch": 0.8679131846329585, - "grad_norm": 2.555373260197399, - "learning_rate": 1.8011873710293912e-07, - "loss": 0.981, - "step": 7218 - }, - { - "epoch": 0.8680334275235977, - "grad_norm": 1.7469190315607015, - "learning_rate": 1.7979580559342677e-07, - "loss": 0.9208, - "step": 7219 - }, - { - "epoch": 0.8681536704142367, - "grad_norm": 1.6347015258910755, - "learning_rate": 1.7947315020744358e-07, - "loss": 0.9004, - "step": 7220 - }, - { - "epoch": 0.8682739133048758, - "grad_norm": 1.9229493278575842, - "learning_rate": 1.7915077099393594e-07, - "loss": 1.0247, - "step": 7221 - }, - { - "epoch": 0.868394156195515, - "grad_norm": 1.697239075552923, - "learning_rate": 1.788286680018083e-07, - "loss": 0.9673, - "step": 7222 - }, - { - "epoch": 0.868514399086154, - "grad_norm": 2.2642599268541193, - "learning_rate": 1.7850684127992443e-07, - "loss": 0.9502, - "step": 7223 - }, - { - "epoch": 0.8686346419767931, - "grad_norm": 1.6868622599089202, - "learning_rate": 1.7818529087710378e-07, - "loss": 0.9347, - "step": 7224 - }, - { - "epoch": 0.8687548848674322, - "grad_norm": 1.6718063340391212, - "learning_rate": 1.7786401684212637e-07, - "loss": 1.0744, - "step": 7225 - }, - { - "epoch": 0.8688751277580713, - "grad_norm": 0.743879657951929, - "learning_rate": 1.7754301922372883e-07, - "loss": 0.8038, - "step": 7226 - }, - { - "epoch": 0.8689953706487104, - "grad_norm": 1.8098487536850172, - "learning_rate": 1.7722229807060617e-07, - "loss": 1.0421, - "step": 7227 - }, - { - "epoch": 0.8691156135393495, - "grad_norm": 2.389244329145822, - "learning_rate": 1.7690185343141172e-07, - "loss": 1.0447, - "step": 7228 - }, - { - "epoch": 0.8692358564299886, - "grad_norm": 2.151244504169047, - "learning_rate": 1.7658168535475615e-07, - "loss": 0.9428, - "step": 7229 - }, - { - "epoch": 0.8693560993206276, - "grad_norm": 3.8149092519223617, - "learning_rate": 1.7626179388920948e-07, - "loss": 0.876, - "step": 7230 - }, - { - "epoch": 0.8694763422112668, - "grad_norm": 2.071118813172037, - "learning_rate": 1.7594217908329866e-07, - "loss": 1.0325, - "step": 7231 - }, - { - "epoch": 0.8695965851019059, - "grad_norm": 1.9514191769579439, - "learning_rate": 1.7562284098550895e-07, - "loss": 0.9665, - "step": 7232 - }, - { - "epoch": 0.8697168279925449, - "grad_norm": 0.8818102185458218, - "learning_rate": 1.753037796442838e-07, - "loss": 0.8949, - "step": 7233 - }, - { - "epoch": 0.8698370708831841, - "grad_norm": 2.061602541708277, - "learning_rate": 1.74984995108024e-07, - "loss": 0.9788, - "step": 7234 - }, - { - "epoch": 0.8699573137738231, - "grad_norm": 1.877180230634803, - "learning_rate": 1.7466648742508981e-07, - "loss": 1.058, - "step": 7235 - }, - { - "epoch": 0.8700775566644622, - "grad_norm": 1.8112064217852928, - "learning_rate": 1.7434825664379837e-07, - "loss": 1.0679, - "step": 7236 - }, - { - "epoch": 0.8701977995551013, - "grad_norm": 3.2596314838618636, - "learning_rate": 1.740303028124246e-07, - "loss": 1.0905, - "step": 7237 - }, - { - "epoch": 0.8703180424457404, - "grad_norm": 2.0611366621126277, - "learning_rate": 1.7371262597920212e-07, - "loss": 0.9865, - "step": 7238 - }, - { - "epoch": 0.8704382853363795, - "grad_norm": 1.4350159785746808, - "learning_rate": 1.7339522619232195e-07, - "loss": 0.9905, - "step": 7239 - }, - { - "epoch": 0.8705585282270186, - "grad_norm": 1.6764926067477113, - "learning_rate": 1.730781034999338e-07, - "loss": 0.9829, - "step": 7240 - }, - { - "epoch": 0.8706787711176577, - "grad_norm": 2.265694557963754, - "learning_rate": 1.7276125795014497e-07, - "loss": 0.969, - "step": 7241 - }, - { - "epoch": 0.8707990140082967, - "grad_norm": 1.9916179366381848, - "learning_rate": 1.7244468959102054e-07, - "loss": 0.9079, - "step": 7242 - }, - { - "epoch": 0.8709192568989359, - "grad_norm": 2.1875223431127355, - "learning_rate": 1.7212839847058348e-07, - "loss": 1.0789, - "step": 7243 - }, - { - "epoch": 0.871039499789575, - "grad_norm": 1.756377248803729, - "learning_rate": 1.718123846368147e-07, - "loss": 0.9661, - "step": 7244 - }, - { - "epoch": 0.871159742680214, - "grad_norm": 1.8968553897151932, - "learning_rate": 1.714966481376543e-07, - "loss": 0.9462, - "step": 7245 - }, - { - "epoch": 0.8712799855708532, - "grad_norm": 2.4287480442355336, - "learning_rate": 1.7118118902099797e-07, - "loss": 1.052, - "step": 7246 - }, - { - "epoch": 0.8714002284614922, - "grad_norm": 1.7996479034439596, - "learning_rate": 1.7086600733470146e-07, - "loss": 1.0381, - "step": 7247 - }, - { - "epoch": 0.8715204713521313, - "grad_norm": 1.7125090605781494, - "learning_rate": 1.7055110312657738e-07, - "loss": 0.9915, - "step": 7248 - }, - { - "epoch": 0.8716407142427703, - "grad_norm": 2.270437561917239, - "learning_rate": 1.702364764443962e-07, - "loss": 0.9718, - "step": 7249 - }, - { - "epoch": 0.8717609571334095, - "grad_norm": 5.497241282011049, - "learning_rate": 1.6992212733588685e-07, - "loss": 0.9588, - "step": 7250 - }, - { - "epoch": 0.8718812000240486, - "grad_norm": 1.745287954624011, - "learning_rate": 1.6960805584873538e-07, - "loss": 0.9817, - "step": 7251 - }, - { - "epoch": 0.8720014429146876, - "grad_norm": 1.813983507432586, - "learning_rate": 1.6929426203058684e-07, - "loss": 1.0142, - "step": 7252 - }, - { - "epoch": 0.8721216858053268, - "grad_norm": 3.565971824367806, - "learning_rate": 1.689807459290431e-07, - "loss": 1.0345, - "step": 7253 - }, - { - "epoch": 0.8722419286959658, - "grad_norm": 4.189897903982958, - "learning_rate": 1.6866750759166437e-07, - "loss": 0.9347, - "step": 7254 - }, - { - "epoch": 0.8723621715866049, - "grad_norm": 2.0444303867414244, - "learning_rate": 1.6835454706596865e-07, - "loss": 1.0032, - "step": 7255 - }, - { - "epoch": 0.8724824144772441, - "grad_norm": 3.364283041451121, - "learning_rate": 1.680418643994317e-07, - "loss": 0.9695, - "step": 7256 - }, - { - "epoch": 0.8726026573678831, - "grad_norm": 0.9524202086101076, - "learning_rate": 1.6772945963948738e-07, - "loss": 0.9206, - "step": 7257 - }, - { - "epoch": 0.8727229002585222, - "grad_norm": 2.9330410493825827, - "learning_rate": 1.6741733283352733e-07, - "loss": 1.0013, - "step": 7258 - }, - { - "epoch": 0.8728431431491613, - "grad_norm": 3.3890894915577414, - "learning_rate": 1.6710548402890102e-07, - "loss": 1.0636, - "step": 7259 - }, - { - "epoch": 0.8729633860398004, - "grad_norm": 1.8401639947163844, - "learning_rate": 1.6679391327291527e-07, - "loss": 0.9013, - "step": 7260 - }, - { - "epoch": 0.8730836289304394, - "grad_norm": 2.3946345993067037, - "learning_rate": 1.6648262061283492e-07, - "loss": 0.9093, - "step": 7261 - }, - { - "epoch": 0.8732038718210786, - "grad_norm": 1.9930145342153804, - "learning_rate": 1.6617160609588353e-07, - "loss": 0.9653, - "step": 7262 - }, - { - "epoch": 0.8733241147117177, - "grad_norm": 2.1383106501449776, - "learning_rate": 1.6586086976924163e-07, - "loss": 0.9391, - "step": 7263 - }, - { - "epoch": 0.8734443576023567, - "grad_norm": 2.5355730228489217, - "learning_rate": 1.6555041168004747e-07, - "loss": 1.0192, - "step": 7264 - }, - { - "epoch": 0.8735646004929959, - "grad_norm": 2.387547425602929, - "learning_rate": 1.6524023187539715e-07, - "loss": 0.9201, - "step": 7265 - }, - { - "epoch": 0.873684843383635, - "grad_norm": 2.4269998778919617, - "learning_rate": 1.649303304023446e-07, - "loss": 0.9785, - "step": 7266 - }, - { - "epoch": 0.873805086274274, - "grad_norm": 1.6465924681164248, - "learning_rate": 1.6462070730790246e-07, - "loss": 1.0113, - "step": 7267 - }, - { - "epoch": 0.8739253291649132, - "grad_norm": 2.6410621416666853, - "learning_rate": 1.6431136263903912e-07, - "loss": 1.0121, - "step": 7268 - }, - { - "epoch": 0.8740455720555522, - "grad_norm": 1.929936875511757, - "learning_rate": 1.6400229644268282e-07, - "loss": 0.9681, - "step": 7269 - }, - { - "epoch": 0.8741658149461913, - "grad_norm": 2.017198761451306, - "learning_rate": 1.6369350876571852e-07, - "loss": 1.0339, - "step": 7270 - }, - { - "epoch": 0.8742860578368304, - "grad_norm": 1.9120860580565684, - "learning_rate": 1.6338499965498874e-07, - "loss": 1.039, - "step": 7271 - }, - { - "epoch": 0.8744063007274695, - "grad_norm": 1.5503888534570378, - "learning_rate": 1.630767691572943e-07, - "loss": 1.0033, - "step": 7272 - }, - { - "epoch": 0.8745265436181086, - "grad_norm": 0.7619285490198493, - "learning_rate": 1.6276881731939306e-07, - "loss": 0.7945, - "step": 7273 - }, - { - "epoch": 0.8746467865087477, - "grad_norm": 1.8002407893085737, - "learning_rate": 1.6246114418800193e-07, - "loss": 0.9856, - "step": 7274 - }, - { - "epoch": 0.8747670293993868, - "grad_norm": 1.9212370022872034, - "learning_rate": 1.6215374980979423e-07, - "loss": 0.9948, - "step": 7275 - }, - { - "epoch": 0.8748872722900258, - "grad_norm": 6.580218129354286, - "learning_rate": 1.6184663423140133e-07, - "loss": 0.9173, - "step": 7276 - }, - { - "epoch": 0.875007515180665, - "grad_norm": 2.0478045267607397, - "learning_rate": 1.615397974994126e-07, - "loss": 0.8781, - "step": 7277 - }, - { - "epoch": 0.875127758071304, - "grad_norm": 1.3822449634810046, - "learning_rate": 1.6123323966037438e-07, - "loss": 1.0289, - "step": 7278 - }, - { - "epoch": 0.8752480009619431, - "grad_norm": 2.3600155800773877, - "learning_rate": 1.6092696076079216e-07, - "loss": 1.0094, - "step": 7279 - }, - { - "epoch": 0.8753682438525822, - "grad_norm": 2.0195182637547897, - "learning_rate": 1.6062096084712785e-07, - "loss": 0.9625, - "step": 7280 - }, - { - "epoch": 0.8754884867432213, - "grad_norm": 2.9854823749247505, - "learning_rate": 1.6031523996580098e-07, - "loss": 0.938, - "step": 7281 - }, - { - "epoch": 0.8756087296338604, - "grad_norm": 2.052263810278484, - "learning_rate": 1.6000979816318981e-07, - "loss": 0.8929, - "step": 7282 - }, - { - "epoch": 0.8757289725244994, - "grad_norm": 2.4813933323892114, - "learning_rate": 1.5970463548562886e-07, - "loss": 0.9814, - "step": 7283 - }, - { - "epoch": 0.8758492154151386, - "grad_norm": 1.5879074414853687, - "learning_rate": 1.5939975197941192e-07, - "loss": 0.9463, - "step": 7284 - }, - { - "epoch": 0.8759694583057777, - "grad_norm": 0.8154743844762433, - "learning_rate": 1.5909514769078892e-07, - "loss": 0.7924, - "step": 7285 - }, - { - "epoch": 0.8760897011964167, - "grad_norm": 1.6267692031489043, - "learning_rate": 1.5879082266596867e-07, - "loss": 1.0083, - "step": 7286 - }, - { - "epoch": 0.8762099440870559, - "grad_norm": 1.9853441770222013, - "learning_rate": 1.5848677695111645e-07, - "loss": 0.9484, - "step": 7287 - }, - { - "epoch": 0.8763301869776949, - "grad_norm": 3.4747206706019544, - "learning_rate": 1.5818301059235562e-07, - "loss": 0.9357, - "step": 7288 - }, - { - "epoch": 0.876450429868334, - "grad_norm": 1.713489527812533, - "learning_rate": 1.578795236357684e-07, - "loss": 1.0466, - "step": 7289 - }, - { - "epoch": 0.8765706727589732, - "grad_norm": 2.1104749284279474, - "learning_rate": 1.5757631612739218e-07, - "loss": 1.0812, - "step": 7290 - }, - { - "epoch": 0.8766909156496122, - "grad_norm": 0.8573810333048572, - "learning_rate": 1.572733881132242e-07, - "loss": 0.9041, - "step": 7291 - }, - { - "epoch": 0.8768111585402513, - "grad_norm": 0.7623030548899216, - "learning_rate": 1.5697073963921814e-07, - "loss": 0.8465, - "step": 7292 - }, - { - "epoch": 0.8769314014308904, - "grad_norm": 2.030432903564879, - "learning_rate": 1.566683707512857e-07, - "loss": 1.0813, - "step": 7293 - }, - { - "epoch": 0.8770516443215295, - "grad_norm": 1.8414970469434284, - "learning_rate": 1.5636628149529553e-07, - "loss": 1.0231, - "step": 7294 - }, - { - "epoch": 0.8771718872121685, - "grad_norm": 2.0035073996955353, - "learning_rate": 1.560644719170743e-07, - "loss": 1.0164, - "step": 7295 - }, - { - "epoch": 0.8772921301028077, - "grad_norm": 2.4435726716077473, - "learning_rate": 1.5576294206240692e-07, - "loss": 0.9446, - "step": 7296 - }, - { - "epoch": 0.8774123729934468, - "grad_norm": 1.9872570585124778, - "learning_rate": 1.5546169197703507e-07, - "loss": 0.9193, - "step": 7297 - }, - { - "epoch": 0.8775326158840858, - "grad_norm": 3.370514325928472, - "learning_rate": 1.5516072170665774e-07, - "loss": 0.9987, - "step": 7298 - }, - { - "epoch": 0.877652858774725, - "grad_norm": 1.7135075466233596, - "learning_rate": 1.5486003129693214e-07, - "loss": 1.0922, - "step": 7299 - }, - { - "epoch": 0.877773101665364, - "grad_norm": 5.30296367339184, - "learning_rate": 1.545596207934725e-07, - "loss": 1.0053, - "step": 7300 - }, - { - "epoch": 0.8778933445560031, - "grad_norm": 1.836905236555086, - "learning_rate": 1.5425949024185147e-07, - "loss": 1.0003, - "step": 7301 - }, - { - "epoch": 0.8780135874466423, - "grad_norm": 2.077788132059791, - "learning_rate": 1.5395963968759818e-07, - "loss": 0.9075, - "step": 7302 - }, - { - "epoch": 0.8781338303372813, - "grad_norm": 1.5937146201601105, - "learning_rate": 1.536600691761998e-07, - "loss": 0.8715, - "step": 7303 - }, - { - "epoch": 0.8782540732279204, - "grad_norm": 2.9221909296111304, - "learning_rate": 1.5336077875310084e-07, - "loss": 0.9422, - "step": 7304 - }, - { - "epoch": 0.8783743161185595, - "grad_norm": 3.190470041067112, - "learning_rate": 1.5306176846370321e-07, - "loss": 0.9703, - "step": 7305 - }, - { - "epoch": 0.8784945590091986, - "grad_norm": 2.1359965831786414, - "learning_rate": 1.5276303835336712e-07, - "loss": 0.9734, - "step": 7306 - }, - { - "epoch": 0.8786148018998376, - "grad_norm": 0.7865938907705154, - "learning_rate": 1.524645884674094e-07, - "loss": 0.7888, - "step": 7307 - }, - { - "epoch": 0.8787350447904768, - "grad_norm": 2.1936490418768, - "learning_rate": 1.521664188511047e-07, - "loss": 1.0172, - "step": 7308 - }, - { - "epoch": 0.8788552876811159, - "grad_norm": 1.8716277632685594, - "learning_rate": 1.518685295496851e-07, - "loss": 1.0414, - "step": 7309 - }, - { - "epoch": 0.8789755305717549, - "grad_norm": 1.7664448682839686, - "learning_rate": 1.5157092060833975e-07, - "loss": 1.076, - "step": 7310 - }, - { - "epoch": 0.879095773462394, - "grad_norm": 1.5271399476649246, - "learning_rate": 1.5127359207221658e-07, - "loss": 0.887, - "step": 7311 - }, - { - "epoch": 0.8792160163530331, - "grad_norm": 3.2320924527664707, - "learning_rate": 1.5097654398641923e-07, - "loss": 0.9567, - "step": 7312 - }, - { - "epoch": 0.8793362592436722, - "grad_norm": 1.2684400358360337, - "learning_rate": 1.5067977639601014e-07, - "loss": 0.9555, - "step": 7313 - }, - { - "epoch": 0.8794565021343113, - "grad_norm": 2.6706767825358657, - "learning_rate": 1.5038328934600864e-07, - "loss": 0.9405, - "step": 7314 - }, - { - "epoch": 0.8795767450249504, - "grad_norm": 1.88068709965996, - "learning_rate": 1.5008708288139161e-07, - "loss": 0.9278, - "step": 7315 - }, - { - "epoch": 0.8796969879155895, - "grad_norm": 2.5416249652213976, - "learning_rate": 1.497911570470931e-07, - "loss": 0.9649, - "step": 7316 - }, - { - "epoch": 0.8798172308062285, - "grad_norm": 1.9188070232852086, - "learning_rate": 1.494955118880048e-07, - "loss": 1.0802, - "step": 7317 - }, - { - "epoch": 0.8799374736968677, - "grad_norm": 2.2272800293273693, - "learning_rate": 1.4920014744897634e-07, - "loss": 0.9576, - "step": 7318 - }, - { - "epoch": 0.8800577165875068, - "grad_norm": 3.0539268854029102, - "learning_rate": 1.4890506377481392e-07, - "loss": 1.0925, - "step": 7319 - }, - { - "epoch": 0.8801779594781458, - "grad_norm": 1.5762415306837871, - "learning_rate": 1.486102609102815e-07, - "loss": 0.8681, - "step": 7320 - }, - { - "epoch": 0.880298202368785, - "grad_norm": 2.5859077198353675, - "learning_rate": 1.483157389001004e-07, - "loss": 1.0811, - "step": 7321 - }, - { - "epoch": 0.880418445259424, - "grad_norm": 2.67197850790917, - "learning_rate": 1.4802149778894933e-07, - "loss": 1.0148, - "step": 7322 - }, - { - "epoch": 0.8805386881500631, - "grad_norm": 1.528875339139056, - "learning_rate": 1.4772753762146484e-07, - "loss": 1.1047, - "step": 7323 - }, - { - "epoch": 0.8806589310407023, - "grad_norm": 1.7086471486479482, - "learning_rate": 1.474338584422401e-07, - "loss": 0.9293, - "step": 7324 - }, - { - "epoch": 0.8807791739313413, - "grad_norm": 2.823184086331414, - "learning_rate": 1.4714046029582595e-07, - "loss": 0.9873, - "step": 7325 - }, - { - "epoch": 0.8808994168219804, - "grad_norm": 1.6358138248072553, - "learning_rate": 1.46847343226731e-07, - "loss": 0.9872, - "step": 7326 - }, - { - "epoch": 0.8810196597126195, - "grad_norm": 1.9373549745488197, - "learning_rate": 1.465545072794203e-07, - "loss": 0.9218, - "step": 7327 - }, - { - "epoch": 0.8811399026032586, - "grad_norm": 2.1185730626767194, - "learning_rate": 1.4626195249831774e-07, - "loss": 0.9877, - "step": 7328 - }, - { - "epoch": 0.8812601454938976, - "grad_norm": 1.8805903700901172, - "learning_rate": 1.4596967892780244e-07, - "loss": 0.9483, - "step": 7329 - }, - { - "epoch": 0.8813803883845368, - "grad_norm": 1.623116061392031, - "learning_rate": 1.4567768661221314e-07, - "loss": 0.9756, - "step": 7330 - }, - { - "epoch": 0.8815006312751759, - "grad_norm": 2.461267067775957, - "learning_rate": 1.4538597559584442e-07, - "loss": 0.9715, - "step": 7331 - }, - { - "epoch": 0.8816208741658149, - "grad_norm": 1.8274474233221358, - "learning_rate": 1.4509454592294823e-07, - "loss": 1.0018, - "step": 7332 - }, - { - "epoch": 0.8817411170564541, - "grad_norm": 2.2620807153819316, - "learning_rate": 1.448033976377354e-07, - "loss": 1.0217, - "step": 7333 - }, - { - "epoch": 0.8818613599470931, - "grad_norm": 2.2070162342202133, - "learning_rate": 1.445125307843713e-07, - "loss": 0.9677, - "step": 7334 - }, - { - "epoch": 0.8819816028377322, - "grad_norm": 1.6317187574810879, - "learning_rate": 1.442219454069813e-07, - "loss": 0.9776, - "step": 7335 - }, - { - "epoch": 0.8821018457283714, - "grad_norm": 2.1311296803807154, - "learning_rate": 1.4393164154964676e-07, - "loss": 0.8995, - "step": 7336 - }, - { - "epoch": 0.8822220886190104, - "grad_norm": 2.6628579020045944, - "learning_rate": 1.4364161925640649e-07, - "loss": 1.1634, - "step": 7337 - }, - { - "epoch": 0.8823423315096495, - "grad_norm": 1.8820735383498473, - "learning_rate": 1.4335187857125663e-07, - "loss": 1.081, - "step": 7338 - }, - { - "epoch": 0.8824625744002886, - "grad_norm": 1.9147584856490043, - "learning_rate": 1.4306241953815023e-07, - "loss": 0.981, - "step": 7339 - }, - { - "epoch": 0.8825828172909277, - "grad_norm": 1.7355930072389476, - "learning_rate": 1.4277324220099862e-07, - "loss": 0.9379, - "step": 7340 - }, - { - "epoch": 0.8827030601815667, - "grad_norm": 2.1296717955919164, - "learning_rate": 1.4248434660366938e-07, - "loss": 0.9713, - "step": 7341 - }, - { - "epoch": 0.8828233030722058, - "grad_norm": 1.9924215141929973, - "learning_rate": 1.4219573278998808e-07, - "loss": 0.9336, - "step": 7342 - }, - { - "epoch": 0.882943545962845, - "grad_norm": 11.370420099849888, - "learning_rate": 1.4190740080373685e-07, - "loss": 0.8848, - "step": 7343 - }, - { - "epoch": 0.883063788853484, - "grad_norm": 1.7334877700352243, - "learning_rate": 1.4161935068865538e-07, - "loss": 1.075, - "step": 7344 - }, - { - "epoch": 0.8831840317441231, - "grad_norm": 2.1173881632738407, - "learning_rate": 1.4133158248844113e-07, - "loss": 0.9825, - "step": 7345 - }, - { - "epoch": 0.8833042746347622, - "grad_norm": 2.205553596532012, - "learning_rate": 1.4104409624674785e-07, - "loss": 0.9583, - "step": 7346 - }, - { - "epoch": 0.8834245175254013, - "grad_norm": 2.6008979017201193, - "learning_rate": 1.407568920071873e-07, - "loss": 1.0144, - "step": 7347 - }, - { - "epoch": 0.8835447604160404, - "grad_norm": 1.8979168715687993, - "learning_rate": 1.4046996981332782e-07, - "loss": 0.9001, - "step": 7348 - }, - { - "epoch": 0.8836650033066795, - "grad_norm": 2.220555712421229, - "learning_rate": 1.4018332970869516e-07, - "loss": 1.0031, - "step": 7349 - }, - { - "epoch": 0.8837852461973186, - "grad_norm": 1.6864077954164605, - "learning_rate": 1.398969717367733e-07, - "loss": 1.0727, - "step": 7350 - }, - { - "epoch": 0.8839054890879576, - "grad_norm": 2.2359712674132264, - "learning_rate": 1.396108959410014e-07, - "loss": 0.9917, - "step": 7351 - }, - { - "epoch": 0.8840257319785968, - "grad_norm": 1.7274743343494807, - "learning_rate": 1.3932510236477745e-07, - "loss": 1.0332, - "step": 7352 - }, - { - "epoch": 0.8841459748692359, - "grad_norm": 1.6876361933250033, - "learning_rate": 1.3903959105145636e-07, - "loss": 0.7873, - "step": 7353 - }, - { - "epoch": 0.8842662177598749, - "grad_norm": 1.792174252608277, - "learning_rate": 1.387543620443492e-07, - "loss": 1.0559, - "step": 7354 - }, - { - "epoch": 0.8843864606505141, - "grad_norm": 1.6503472858584733, - "learning_rate": 1.3846941538672606e-07, - "loss": 1.0689, - "step": 7355 - }, - { - "epoch": 0.8845067035411531, - "grad_norm": 1.9929264867137755, - "learning_rate": 1.3818475112181193e-07, - "loss": 1.0411, - "step": 7356 - }, - { - "epoch": 0.8846269464317922, - "grad_norm": 2.197185979459299, - "learning_rate": 1.3790036929279091e-07, - "loss": 1.0187, - "step": 7357 - }, - { - "epoch": 0.8847471893224313, - "grad_norm": 1.9897210504558973, - "learning_rate": 1.3761626994280363e-07, - "loss": 0.819, - "step": 7358 - }, - { - "epoch": 0.8848674322130704, - "grad_norm": 1.6784490943668746, - "learning_rate": 1.3733245311494735e-07, - "loss": 0.9681, - "step": 7359 - }, - { - "epoch": 0.8849876751037095, - "grad_norm": 1.950505995513612, - "learning_rate": 1.3704891885227676e-07, - "loss": 0.9448, - "step": 7360 - }, - { - "epoch": 0.8851079179943486, - "grad_norm": 1.9155715910045419, - "learning_rate": 1.367656671978037e-07, - "loss": 1.0028, - "step": 7361 - }, - { - "epoch": 0.8852281608849877, - "grad_norm": 2.184363377967227, - "learning_rate": 1.36482698194498e-07, - "loss": 0.9745, - "step": 7362 - }, - { - "epoch": 0.8853484037756267, - "grad_norm": 2.9473274280092308, - "learning_rate": 1.3620001188528506e-07, - "loss": 0.9447, - "step": 7363 - }, - { - "epoch": 0.8854686466662659, - "grad_norm": 2.7371808288402337, - "learning_rate": 1.3591760831304865e-07, - "loss": 0.9588, - "step": 7364 - }, - { - "epoch": 0.885588889556905, - "grad_norm": 1.7309424998288139, - "learning_rate": 1.356354875206287e-07, - "loss": 1.0324, - "step": 7365 - }, - { - "epoch": 0.885709132447544, - "grad_norm": 2.028147044010593, - "learning_rate": 1.3535364955082296e-07, - "loss": 0.924, - "step": 7366 - }, - { - "epoch": 0.8858293753381832, - "grad_norm": 1.6654319322064737, - "learning_rate": 1.3507209444638613e-07, - "loss": 0.8704, - "step": 7367 - }, - { - "epoch": 0.8859496182288222, - "grad_norm": 1.8543374557988732, - "learning_rate": 1.347908222500298e-07, - "loss": 0.9699, - "step": 7368 - }, - { - "epoch": 0.8860698611194613, - "grad_norm": 3.211050212580364, - "learning_rate": 1.3450983300442276e-07, - "loss": 0.9237, - "step": 7369 - }, - { - "epoch": 0.8861901040101005, - "grad_norm": 1.8845443042214116, - "learning_rate": 1.3422912675219068e-07, - "loss": 0.9633, - "step": 7370 - }, - { - "epoch": 0.8863103469007395, - "grad_norm": 2.0253068102836296, - "learning_rate": 1.339487035359166e-07, - "loss": 1.0127, - "step": 7371 - }, - { - "epoch": 0.8864305897913786, - "grad_norm": 1.5157891410745707, - "learning_rate": 1.336685633981409e-07, - "loss": 1.0793, - "step": 7372 - }, - { - "epoch": 0.8865508326820177, - "grad_norm": 1.9548535441885881, - "learning_rate": 1.333887063813597e-07, - "loss": 0.9732, - "step": 7373 - }, - { - "epoch": 0.8866710755726568, - "grad_norm": 1.9155966079873055, - "learning_rate": 1.331091325280278e-07, - "loss": 0.8912, - "step": 7374 - }, - { - "epoch": 0.8867913184632958, - "grad_norm": 1.647863134325665, - "learning_rate": 1.3282984188055625e-07, - "loss": 1.0127, - "step": 7375 - }, - { - "epoch": 0.8869115613539349, - "grad_norm": 1.8914005170732897, - "learning_rate": 1.3255083448131288e-07, - "loss": 1.0187, - "step": 7376 - }, - { - "epoch": 0.8870318042445741, - "grad_norm": 2.3870426838466074, - "learning_rate": 1.3227211037262365e-07, - "loss": 1.0161, - "step": 7377 - }, - { - "epoch": 0.8871520471352131, - "grad_norm": 2.5180807507135783, - "learning_rate": 1.319936695967696e-07, - "loss": 1.0884, - "step": 7378 - }, - { - "epoch": 0.8872722900258522, - "grad_norm": 2.2752320852426307, - "learning_rate": 1.3171551219599097e-07, - "loss": 1.0485, - "step": 7379 - }, - { - "epoch": 0.8873925329164913, - "grad_norm": 2.3777075943833856, - "learning_rate": 1.3143763821248377e-07, - "loss": 1.0048, - "step": 7380 - }, - { - "epoch": 0.8875127758071304, - "grad_norm": 1.753761200546203, - "learning_rate": 1.3116004768840118e-07, - "loss": 0.9519, - "step": 7381 - }, - { - "epoch": 0.8876330186977694, - "grad_norm": 1.6539459036360715, - "learning_rate": 1.3088274066585348e-07, - "loss": 0.9636, - "step": 7382 - }, - { - "epoch": 0.8877532615884086, - "grad_norm": 2.311257930925625, - "learning_rate": 1.3060571718690749e-07, - "loss": 1.1317, - "step": 7383 - }, - { - "epoch": 0.8878735044790477, - "grad_norm": 0.7652584677735258, - "learning_rate": 1.3032897729358805e-07, - "loss": 0.8303, - "step": 7384 - }, - { - "epoch": 0.8879937473696867, - "grad_norm": 3.496827458799094, - "learning_rate": 1.3005252102787645e-07, - "loss": 1.0244, - "step": 7385 - }, - { - "epoch": 0.8881139902603259, - "grad_norm": 1.5237969390173491, - "learning_rate": 1.297763484317105e-07, - "loss": 0.9658, - "step": 7386 - }, - { - "epoch": 0.888234233150965, - "grad_norm": 3.420075522587213, - "learning_rate": 1.2950045954698551e-07, - "loss": 0.9339, - "step": 7387 - }, - { - "epoch": 0.888354476041604, - "grad_norm": 1.5284444837736264, - "learning_rate": 1.2922485441555343e-07, - "loss": 0.9851, - "step": 7388 - }, - { - "epoch": 0.8884747189322432, - "grad_norm": 1.7953788167393292, - "learning_rate": 1.2894953307922363e-07, - "loss": 1.0462, - "step": 7389 - }, - { - "epoch": 0.8885949618228822, - "grad_norm": 2.0247635785773364, - "learning_rate": 1.2867449557976208e-07, - "loss": 1.0698, - "step": 7390 - }, - { - "epoch": 0.8887152047135213, - "grad_norm": 2.0011935249094237, - "learning_rate": 1.283997419588916e-07, - "loss": 0.99, - "step": 7391 - }, - { - "epoch": 0.8888354476041604, - "grad_norm": 1.9377053059736566, - "learning_rate": 1.2812527225829216e-07, - "loss": 0.8441, - "step": 7392 - }, - { - "epoch": 0.8889556904947995, - "grad_norm": 1.8156610088093323, - "learning_rate": 1.2785108651960052e-07, - "loss": 0.9864, - "step": 7393 - }, - { - "epoch": 0.8890759333854386, - "grad_norm": 1.8195334801797396, - "learning_rate": 1.2757718478441094e-07, - "loss": 1.0276, - "step": 7394 - }, - { - "epoch": 0.8891961762760777, - "grad_norm": 1.986409503541321, - "learning_rate": 1.2730356709427302e-07, - "loss": 0.9991, - "step": 7395 - }, - { - "epoch": 0.8893164191667168, - "grad_norm": 2.9923412791444064, - "learning_rate": 1.2703023349069542e-07, - "loss": 0.8256, - "step": 7396 - }, - { - "epoch": 0.8894366620573558, - "grad_norm": 1.776196925144135, - "learning_rate": 1.2675718401514223e-07, - "loss": 0.8452, - "step": 7397 - }, - { - "epoch": 0.889556904947995, - "grad_norm": 5.317887211355671, - "learning_rate": 1.264844187090346e-07, - "loss": 0.9701, - "step": 7398 - }, - { - "epoch": 0.889677147838634, - "grad_norm": 1.5772403465866827, - "learning_rate": 1.262119376137516e-07, - "loss": 0.981, - "step": 7399 - }, - { - "epoch": 0.8897973907292731, - "grad_norm": 1.53930034832799, - "learning_rate": 1.2593974077062707e-07, - "loss": 1.0775, - "step": 7400 - }, - { - "epoch": 0.8899176336199123, - "grad_norm": 2.5350548675438014, - "learning_rate": 1.2566782822095423e-07, - "loss": 0.8575, - "step": 7401 - }, - { - "epoch": 0.8900378765105513, - "grad_norm": 7.896514809488716, - "learning_rate": 1.2539620000598162e-07, - "loss": 0.9479, - "step": 7402 - }, - { - "epoch": 0.8901581194011904, - "grad_norm": 4.919510143053618, - "learning_rate": 1.2512485616691492e-07, - "loss": 1.0232, - "step": 7403 - }, - { - "epoch": 0.8902783622918296, - "grad_norm": 1.4114701339772664, - "learning_rate": 1.2485379674491681e-07, - "loss": 1.0348, - "step": 7404 - }, - { - "epoch": 0.8903986051824686, - "grad_norm": 2.5802110035687025, - "learning_rate": 1.2458302178110657e-07, - "loss": 1.0173, - "step": 7405 - }, - { - "epoch": 0.8905188480731077, - "grad_norm": 2.20343686324681, - "learning_rate": 1.2431253131656118e-07, - "loss": 1.0545, - "step": 7406 - }, - { - "epoch": 0.8906390909637467, - "grad_norm": 1.5810296549709182, - "learning_rate": 1.240423253923133e-07, - "loss": 0.9912, - "step": 7407 - }, - { - "epoch": 0.8907593338543859, - "grad_norm": 4.048034973038688, - "learning_rate": 1.237724040493533e-07, - "loss": 0.9198, - "step": 7408 - }, - { - "epoch": 0.8908795767450249, - "grad_norm": 3.2621577350081847, - "learning_rate": 1.2350276732862773e-07, - "loss": 0.959, - "step": 7409 - }, - { - "epoch": 0.890999819635664, - "grad_norm": 0.8662071873047166, - "learning_rate": 1.2323341527103993e-07, - "loss": 0.8498, - "step": 7410 - }, - { - "epoch": 0.8911200625263032, - "grad_norm": 2.0278947555500513, - "learning_rate": 1.2296434791745135e-07, - "loss": 1.0712, - "step": 7411 - }, - { - "epoch": 0.8912403054169422, - "grad_norm": 1.8387928287748816, - "learning_rate": 1.2269556530867875e-07, - "loss": 0.9998, - "step": 7412 - }, - { - "epoch": 0.8913605483075813, - "grad_norm": 1.9462182567558264, - "learning_rate": 1.2242706748549614e-07, - "loss": 1.0507, - "step": 7413 - }, - { - "epoch": 0.8914807911982204, - "grad_norm": 1.640103429812519, - "learning_rate": 1.2215885448863473e-07, - "loss": 1.0504, - "step": 7414 - }, - { - "epoch": 0.8916010340888595, - "grad_norm": 1.9563424426774676, - "learning_rate": 1.2189092635878152e-07, - "loss": 1.0302, - "step": 7415 - }, - { - "epoch": 0.8917212769794985, - "grad_norm": 1.6898407419771395, - "learning_rate": 1.216232831365822e-07, - "loss": 1.0006, - "step": 7416 - }, - { - "epoch": 0.8918415198701377, - "grad_norm": 1.930854845823069, - "learning_rate": 1.2135592486263678e-07, - "loss": 1.0402, - "step": 7417 - }, - { - "epoch": 0.8919617627607768, - "grad_norm": 1.6824144804760808, - "learning_rate": 1.2108885157750415e-07, - "loss": 0.8406, - "step": 7418 - }, - { - "epoch": 0.8920820056514158, - "grad_norm": 1.5885892600345655, - "learning_rate": 1.2082206332169897e-07, - "loss": 1.0286, - "step": 7419 - }, - { - "epoch": 0.892202248542055, - "grad_norm": 2.314103884845267, - "learning_rate": 1.2055556013569225e-07, - "loss": 0.9583, - "step": 7420 - }, - { - "epoch": 0.892322491432694, - "grad_norm": 1.7609849573085352, - "learning_rate": 1.2028934205991315e-07, - "loss": 1.0402, - "step": 7421 - }, - { - "epoch": 0.8924427343233331, - "grad_norm": 1.5104680370496049, - "learning_rate": 1.2002340913474607e-07, - "loss": 0.995, - "step": 7422 - }, - { - "epoch": 0.8925629772139723, - "grad_norm": 2.0638267411324356, - "learning_rate": 1.1975776140053317e-07, - "loss": 0.9728, - "step": 7423 - }, - { - "epoch": 0.8926832201046113, - "grad_norm": 1.8565730292280602, - "learning_rate": 1.194923988975729e-07, - "loss": 0.9609, - "step": 7424 - }, - { - "epoch": 0.8928034629952504, - "grad_norm": 2.3898222329654106, - "learning_rate": 1.192273216661206e-07, - "loss": 0.9623, - "step": 7425 - }, - { - "epoch": 0.8929237058858895, - "grad_norm": 0.8002178163834718, - "learning_rate": 1.189625297463881e-07, - "loss": 0.8407, - "step": 7426 - }, - { - "epoch": 0.8930439487765286, - "grad_norm": 1.702602201242636, - "learning_rate": 1.1869802317854394e-07, - "loss": 1.0235, - "step": 7427 - }, - { - "epoch": 0.8931641916671677, - "grad_norm": 3.424459517776725, - "learning_rate": 1.1843380200271425e-07, - "loss": 0.9571, - "step": 7428 - }, - { - "epoch": 0.8932844345578068, - "grad_norm": 3.299981989955885, - "learning_rate": 1.181698662589805e-07, - "loss": 1.0324, - "step": 7429 - }, - { - "epoch": 0.8934046774484459, - "grad_norm": 2.0345179166269762, - "learning_rate": 1.1790621598738249e-07, - "loss": 0.9881, - "step": 7430 - }, - { - "epoch": 0.8935249203390849, - "grad_norm": 1.943929525345947, - "learning_rate": 1.1764285122791461e-07, - "loss": 0.9818, - "step": 7431 - }, - { - "epoch": 0.8936451632297241, - "grad_norm": 2.127488754547558, - "learning_rate": 1.173797720205294e-07, - "loss": 1.0011, - "step": 7432 - }, - { - "epoch": 0.8937654061203631, - "grad_norm": 2.292196686459097, - "learning_rate": 1.1711697840513602e-07, - "loss": 0.9503, - "step": 7433 - }, - { - "epoch": 0.8938856490110022, - "grad_norm": 2.1984076893079236, - "learning_rate": 1.1685447042160012e-07, - "loss": 0.9348, - "step": 7434 - }, - { - "epoch": 0.8940058919016414, - "grad_norm": 1.841854608304446, - "learning_rate": 1.1659224810974367e-07, - "loss": 0.9373, - "step": 7435 - }, - { - "epoch": 0.8941261347922804, - "grad_norm": 1.5274431825550296, - "learning_rate": 1.1633031150934591e-07, - "loss": 0.9126, - "step": 7436 - }, - { - "epoch": 0.8942463776829195, - "grad_norm": 2.5963690529986097, - "learning_rate": 1.1606866066014176e-07, - "loss": 1.0289, - "step": 7437 - }, - { - "epoch": 0.8943666205735585, - "grad_norm": 1.9371503083692205, - "learning_rate": 1.1580729560182434e-07, - "loss": 0.9739, - "step": 7438 - }, - { - "epoch": 0.8944868634641977, - "grad_norm": 4.098674811220071, - "learning_rate": 1.1554621637404171e-07, - "loss": 0.9444, - "step": 7439 - }, - { - "epoch": 0.8946071063548368, - "grad_norm": 2.2782205820636268, - "learning_rate": 1.1528542301639999e-07, - "loss": 0.8347, - "step": 7440 - }, - { - "epoch": 0.8947273492454758, - "grad_norm": 1.9887598806887445, - "learning_rate": 1.1502491556846105e-07, - "loss": 1.0498, - "step": 7441 - }, - { - "epoch": 0.894847592136115, - "grad_norm": 2.4054040784279005, - "learning_rate": 1.1476469406974331e-07, - "loss": 1.0468, - "step": 7442 - }, - { - "epoch": 0.894967835026754, - "grad_norm": 2.4536783359690424, - "learning_rate": 1.1450475855972341e-07, - "loss": 0.9992, - "step": 7443 - }, - { - "epoch": 0.8950880779173931, - "grad_norm": 2.0263850714572262, - "learning_rate": 1.1424510907783158e-07, - "loss": 0.933, - "step": 7444 - }, - { - "epoch": 0.8952083208080323, - "grad_norm": 1.6064522879312844, - "learning_rate": 1.1398574566345787e-07, - "loss": 1.0525, - "step": 7445 - }, - { - "epoch": 0.8953285636986713, - "grad_norm": 2.08145414158861, - "learning_rate": 1.1372666835594702e-07, - "loss": 1.0524, - "step": 7446 - }, - { - "epoch": 0.8954488065893104, - "grad_norm": 2.9893815950563782, - "learning_rate": 1.1346787719460071e-07, - "loss": 0.9482, - "step": 7447 - }, - { - "epoch": 0.8955690494799495, - "grad_norm": 2.0081492338312197, - "learning_rate": 1.1320937221867732e-07, - "loss": 0.9562, - "step": 7448 - }, - { - "epoch": 0.8956892923705886, - "grad_norm": 1.7260254590001207, - "learning_rate": 1.1295115346739192e-07, - "loss": 1.0211, - "step": 7449 - }, - { - "epoch": 0.8958095352612276, - "grad_norm": 4.526652562302502, - "learning_rate": 1.1269322097991629e-07, - "loss": 0.9598, - "step": 7450 - }, - { - "epoch": 0.8959297781518668, - "grad_norm": 2.8785602252935565, - "learning_rate": 1.1243557479537846e-07, - "loss": 0.9123, - "step": 7451 - }, - { - "epoch": 0.8960500210425059, - "grad_norm": 2.284476519832394, - "learning_rate": 1.121782149528634e-07, - "loss": 0.9175, - "step": 7452 - }, - { - "epoch": 0.8961702639331449, - "grad_norm": 2.1379238266181586, - "learning_rate": 1.1192114149141208e-07, - "loss": 1.0216, - "step": 7453 - }, - { - "epoch": 0.8962905068237841, - "grad_norm": 2.2576099555695657, - "learning_rate": 1.1166435445002197e-07, - "loss": 0.8845, - "step": 7454 - }, - { - "epoch": 0.8964107497144231, - "grad_norm": 2.063996321305242, - "learning_rate": 1.1140785386764818e-07, - "loss": 0.914, - "step": 7455 - }, - { - "epoch": 0.8965309926050622, - "grad_norm": 2.3433563919162856, - "learning_rate": 1.1115163978320153e-07, - "loss": 0.9269, - "step": 7456 - }, - { - "epoch": 0.8966512354957014, - "grad_norm": 1.7901377549256112, - "learning_rate": 1.1089571223554917e-07, - "loss": 1.0544, - "step": 7457 - }, - { - "epoch": 0.8967714783863404, - "grad_norm": 1.6147454293211894, - "learning_rate": 1.1064007126351537e-07, - "loss": 1.0676, - "step": 7458 - }, - { - "epoch": 0.8968917212769795, - "grad_norm": 2.424610979790431, - "learning_rate": 1.1038471690588003e-07, - "loss": 0.9917, - "step": 7459 - }, - { - "epoch": 0.8970119641676186, - "grad_norm": 1.976440486232947, - "learning_rate": 1.1012964920138145e-07, - "loss": 1.0262, - "step": 7460 - }, - { - "epoch": 0.8971322070582577, - "grad_norm": 1.6747202682350142, - "learning_rate": 1.0987486818871205e-07, - "loss": 0.9826, - "step": 7461 - }, - { - "epoch": 0.8972524499488967, - "grad_norm": 2.2190086321463003, - "learning_rate": 1.0962037390652245e-07, - "loss": 0.9482, - "step": 7462 - }, - { - "epoch": 0.8973726928395359, - "grad_norm": 2.095694365043143, - "learning_rate": 1.0936616639341911e-07, - "loss": 0.9525, - "step": 7463 - }, - { - "epoch": 0.897492935730175, - "grad_norm": 0.7851383719615118, - "learning_rate": 1.0911224568796473e-07, - "loss": 0.7967, - "step": 7464 - }, - { - "epoch": 0.897613178620814, - "grad_norm": 2.0160057240586533, - "learning_rate": 1.0885861182867984e-07, - "loss": 0.9356, - "step": 7465 - }, - { - "epoch": 0.8977334215114532, - "grad_norm": 2.4345184088054252, - "learning_rate": 1.0860526485403942e-07, - "loss": 0.9335, - "step": 7466 - }, - { - "epoch": 0.8978536644020922, - "grad_norm": 1.5601886438360133, - "learning_rate": 1.0835220480247675e-07, - "loss": 1.0056, - "step": 7467 - }, - { - "epoch": 0.8979739072927313, - "grad_norm": 6.612822370837732, - "learning_rate": 1.0809943171238067e-07, - "loss": 1.0717, - "step": 7468 - }, - { - "epoch": 0.8980941501833704, - "grad_norm": 2.0612826367091044, - "learning_rate": 1.078469456220965e-07, - "loss": 0.8665, - "step": 7469 - }, - { - "epoch": 0.8982143930740095, - "grad_norm": 1.924704538930473, - "learning_rate": 1.0759474656992606e-07, - "loss": 0.919, - "step": 7470 - }, - { - "epoch": 0.8983346359646486, - "grad_norm": 3.073148625988823, - "learning_rate": 1.0734283459412785e-07, - "loss": 1.0073, - "step": 7471 - }, - { - "epoch": 0.8984548788552876, - "grad_norm": 8.341151829500134, - "learning_rate": 1.0709120973291707e-07, - "loss": 1.0322, - "step": 7472 - }, - { - "epoch": 0.8985751217459268, - "grad_norm": 2.6780203479452482, - "learning_rate": 1.0683987202446475e-07, - "loss": 1.0084, - "step": 7473 - }, - { - "epoch": 0.8986953646365659, - "grad_norm": 1.780366912915563, - "learning_rate": 1.0658882150689862e-07, - "loss": 0.9348, - "step": 7474 - }, - { - "epoch": 0.8988156075272049, - "grad_norm": 2.3459236746467407, - "learning_rate": 1.0633805821830288e-07, - "loss": 1.0134, - "step": 7475 - }, - { - "epoch": 0.8989358504178441, - "grad_norm": 2.7194547945457077, - "learning_rate": 1.0608758219671753e-07, - "loss": 1.0592, - "step": 7476 - }, - { - "epoch": 0.8990560933084831, - "grad_norm": 1.497769923266585, - "learning_rate": 1.0583739348014065e-07, - "loss": 0.9355, - "step": 7477 - }, - { - "epoch": 0.8991763361991222, - "grad_norm": 2.275569898885898, - "learning_rate": 1.0558749210652518e-07, - "loss": 1.0716, - "step": 7478 - }, - { - "epoch": 0.8992965790897613, - "grad_norm": 1.6638241449424813, - "learning_rate": 1.053378781137808e-07, - "loss": 1.0839, - "step": 7479 - }, - { - "epoch": 0.8994168219804004, - "grad_norm": 1.7375755677839642, - "learning_rate": 1.0508855153977392e-07, - "loss": 1.0031, - "step": 7480 - }, - { - "epoch": 0.8995370648710395, - "grad_norm": 2.420486741415588, - "learning_rate": 1.0483951242232669e-07, - "loss": 0.9011, - "step": 7481 - }, - { - "epoch": 0.8996573077616786, - "grad_norm": 1.0300667650379711, - "learning_rate": 1.0459076079921936e-07, - "loss": 0.84, - "step": 7482 - }, - { - "epoch": 0.8997775506523177, - "grad_norm": 3.023995754541717, - "learning_rate": 1.0434229670818618e-07, - "loss": 1.0675, - "step": 7483 - }, - { - "epoch": 0.8998977935429567, - "grad_norm": 1.5678393118080554, - "learning_rate": 1.0409412018691944e-07, - "loss": 1.0264, - "step": 7484 - }, - { - "epoch": 0.9000180364335959, - "grad_norm": 1.972695167082082, - "learning_rate": 1.0384623127306724e-07, - "loss": 0.9817, - "step": 7485 - }, - { - "epoch": 0.900138279324235, - "grad_norm": 1.8976154195791068, - "learning_rate": 1.0359863000423397e-07, - "loss": 1.0178, - "step": 7486 - }, - { - "epoch": 0.900258522214874, - "grad_norm": 2.2220899131911627, - "learning_rate": 1.0335131641798112e-07, - "loss": 0.9419, - "step": 7487 - }, - { - "epoch": 0.9003787651055132, - "grad_norm": 0.8562630923864339, - "learning_rate": 1.0310429055182512e-07, - "loss": 0.8578, - "step": 7488 - }, - { - "epoch": 0.9004990079961522, - "grad_norm": 1.4877142038707172, - "learning_rate": 1.0285755244324024e-07, - "loss": 0.9605, - "step": 7489 - }, - { - "epoch": 0.9006192508867913, - "grad_norm": 1.4678436891922413, - "learning_rate": 1.0261110212965629e-07, - "loss": 0.9162, - "step": 7490 - }, - { - "epoch": 0.9007394937774305, - "grad_norm": 2.0417559727450265, - "learning_rate": 1.023649396484596e-07, - "loss": 1.0228, - "step": 7491 - }, - { - "epoch": 0.9008597366680695, - "grad_norm": 2.216004190427213, - "learning_rate": 1.0211906503699275e-07, - "loss": 0.9046, - "step": 7492 - }, - { - "epoch": 0.9009799795587086, - "grad_norm": 2.9404820665896856, - "learning_rate": 1.0187347833255455e-07, - "loss": 1.0487, - "step": 7493 - }, - { - "epoch": 0.9011002224493477, - "grad_norm": 1.6973981553145996, - "learning_rate": 1.0162817957240056e-07, - "loss": 1.0264, - "step": 7494 - }, - { - "epoch": 0.9012204653399868, - "grad_norm": 0.9727143726064714, - "learning_rate": 1.0138316879374253e-07, - "loss": 0.9142, - "step": 7495 - }, - { - "epoch": 0.9013407082306258, - "grad_norm": 2.6432874450773354, - "learning_rate": 1.0113844603374833e-07, - "loss": 0.9639, - "step": 7496 - }, - { - "epoch": 0.901460951121265, - "grad_norm": 2.6608210109764268, - "learning_rate": 1.0089401132954178e-07, - "loss": 0.9407, - "step": 7497 - }, - { - "epoch": 0.9015811940119041, - "grad_norm": 2.052421335439018, - "learning_rate": 1.006498647182037e-07, - "loss": 0.9544, - "step": 7498 - }, - { - "epoch": 0.9017014369025431, - "grad_norm": 2.3560889393934246, - "learning_rate": 1.004060062367713e-07, - "loss": 0.9506, - "step": 7499 - }, - { - "epoch": 0.9018216797931822, - "grad_norm": 1.955869348804177, - "learning_rate": 1.0016243592223728e-07, - "loss": 0.9256, - "step": 7500 - }, - { - "epoch": 0.9019419226838213, - "grad_norm": 2.0579066134099966, - "learning_rate": 9.991915381155114e-08, - "loss": 0.8766, - "step": 7501 - }, - { - "epoch": 0.9020621655744604, - "grad_norm": 2.6116309550297827, - "learning_rate": 9.967615994161871e-08, - "loss": 0.9872, - "step": 7502 - }, - { - "epoch": 0.9021824084650995, - "grad_norm": 1.9322954230944647, - "learning_rate": 9.943345434930161e-08, - "loss": 1.0032, - "step": 7503 - }, - { - "epoch": 0.9023026513557386, - "grad_norm": 2.475540576045208, - "learning_rate": 9.919103707141885e-08, - "loss": 0.9189, - "step": 7504 - }, - { - "epoch": 0.9024228942463777, - "grad_norm": 2.541477404775973, - "learning_rate": 9.89489081447441e-08, - "loss": 1.0021, - "step": 7505 - }, - { - "epoch": 0.9025431371370167, - "grad_norm": 1.936831697445846, - "learning_rate": 9.870706760600844e-08, - "loss": 1.0563, - "step": 7506 - }, - { - "epoch": 0.9026633800276559, - "grad_norm": 1.9793171741746358, - "learning_rate": 9.846551549189918e-08, - "loss": 0.9589, - "step": 7507 - }, - { - "epoch": 0.902783622918295, - "grad_norm": 2.071555056257192, - "learning_rate": 9.822425183905902e-08, - "loss": 0.9114, - "step": 7508 - }, - { - "epoch": 0.902903865808934, - "grad_norm": 0.9193747932646498, - "learning_rate": 9.798327668408823e-08, - "loss": 1.0032, - "step": 7509 - }, - { - "epoch": 0.9030241086995732, - "grad_norm": 2.26575190747747, - "learning_rate": 9.774259006354158e-08, - "loss": 0.9218, - "step": 7510 - }, - { - "epoch": 0.9031443515902122, - "grad_norm": 1.7206858743415572, - "learning_rate": 9.750219201393184e-08, - "loss": 0.9871, - "step": 7511 - }, - { - "epoch": 0.9032645944808513, - "grad_norm": 2.6746311361241855, - "learning_rate": 9.726208257172697e-08, - "loss": 1.0076, - "step": 7512 - }, - { - "epoch": 0.9033848373714904, - "grad_norm": 1.9301206280263703, - "learning_rate": 9.702226177335115e-08, - "loss": 0.9765, - "step": 7513 - }, - { - "epoch": 0.9035050802621295, - "grad_norm": 1.4709282812895772, - "learning_rate": 9.67827296551853e-08, - "loss": 0.9542, - "step": 7514 - }, - { - "epoch": 0.9036253231527686, - "grad_norm": 1.8891571273641394, - "learning_rate": 9.65434862535659e-08, - "loss": 0.9079, - "step": 7515 - }, - { - "epoch": 0.9037455660434077, - "grad_norm": 2.5215155779077287, - "learning_rate": 9.630453160478635e-08, - "loss": 0.8769, - "step": 7516 - }, - { - "epoch": 0.9038658089340468, - "grad_norm": 2.0129536514290423, - "learning_rate": 9.60658657450959e-08, - "loss": 1.0501, - "step": 7517 - }, - { - "epoch": 0.9039860518246858, - "grad_norm": 1.83474783804517, - "learning_rate": 9.582748871069979e-08, - "loss": 1.0212, - "step": 7518 - }, - { - "epoch": 0.904106294715325, - "grad_norm": 5.456466743909705, - "learning_rate": 9.558940053775954e-08, - "loss": 1.0516, - "step": 7519 - }, - { - "epoch": 0.904226537605964, - "grad_norm": 2.8933083116823544, - "learning_rate": 9.535160126239294e-08, - "loss": 0.9125, - "step": 7520 - }, - { - "epoch": 0.9043467804966031, - "grad_norm": 1.5661546310044079, - "learning_rate": 9.511409092067424e-08, - "loss": 0.9376, - "step": 7521 - }, - { - "epoch": 0.9044670233872423, - "grad_norm": 1.9988256821609072, - "learning_rate": 9.487686954863327e-08, - "loss": 0.9067, - "step": 7522 - }, - { - "epoch": 0.9045872662778813, - "grad_norm": 1.8269032365285327, - "learning_rate": 9.46399371822566e-08, - "loss": 0.9917, - "step": 7523 - }, - { - "epoch": 0.9047075091685204, - "grad_norm": 2.014822156834055, - "learning_rate": 9.440329385748657e-08, - "loss": 0.9416, - "step": 7524 - }, - { - "epoch": 0.9048277520591596, - "grad_norm": 1.8244379144728025, - "learning_rate": 9.416693961022137e-08, - "loss": 0.9412, - "step": 7525 - }, - { - "epoch": 0.9049479949497986, - "grad_norm": 1.7414272000749875, - "learning_rate": 9.393087447631654e-08, - "loss": 1.0003, - "step": 7526 - }, - { - "epoch": 0.9050682378404377, - "grad_norm": 1.9308466345019242, - "learning_rate": 9.36950984915823e-08, - "loss": 0.9542, - "step": 7527 - }, - { - "epoch": 0.9051884807310768, - "grad_norm": 7.153284782868249, - "learning_rate": 9.345961169178607e-08, - "loss": 0.9254, - "step": 7528 - }, - { - "epoch": 0.9053087236217159, - "grad_norm": 1.3832092147128594, - "learning_rate": 9.322441411265081e-08, - "loss": 0.9572, - "step": 7529 - }, - { - "epoch": 0.9054289665123549, - "grad_norm": 1.742842820430306, - "learning_rate": 9.298950578985554e-08, - "loss": 0.9605, - "step": 7530 - }, - { - "epoch": 0.905549209402994, - "grad_norm": 2.11512019165351, - "learning_rate": 9.275488675903665e-08, - "loss": 0.9399, - "step": 7531 - }, - { - "epoch": 0.9056694522936332, - "grad_norm": 3.0058906261066323, - "learning_rate": 9.252055705578454e-08, - "loss": 0.9574, - "step": 7532 - }, - { - "epoch": 0.9057896951842722, - "grad_norm": 1.8577761146825944, - "learning_rate": 9.228651671564747e-08, - "loss": 0.9453, - "step": 7533 - }, - { - "epoch": 0.9059099380749113, - "grad_norm": 1.4259197847134086, - "learning_rate": 9.205276577412901e-08, - "loss": 1.0108, - "step": 7534 - }, - { - "epoch": 0.9060301809655504, - "grad_norm": 2.793838898953274, - "learning_rate": 9.181930426668905e-08, - "loss": 0.9952, - "step": 7535 - }, - { - "epoch": 0.9061504238561895, - "grad_norm": 1.6875316475620354, - "learning_rate": 9.158613222874346e-08, - "loss": 0.9121, - "step": 7536 - }, - { - "epoch": 0.9062706667468285, - "grad_norm": 1.5616419915957567, - "learning_rate": 9.135324969566394e-08, - "loss": 1.0508, - "step": 7537 - }, - { - "epoch": 0.9063909096374677, - "grad_norm": 2.3045290100430247, - "learning_rate": 9.112065670277913e-08, - "loss": 0.9832, - "step": 7538 - }, - { - "epoch": 0.9065111525281068, - "grad_norm": 1.7102433978409033, - "learning_rate": 9.088835328537303e-08, - "loss": 0.9501, - "step": 7539 - }, - { - "epoch": 0.9066313954187458, - "grad_norm": 2.2836464613822116, - "learning_rate": 9.065633947868568e-08, - "loss": 0.9429, - "step": 7540 - }, - { - "epoch": 0.906751638309385, - "grad_norm": 2.2549143211175573, - "learning_rate": 9.042461531791379e-08, - "loss": 1.0235, - "step": 7541 - }, - { - "epoch": 0.906871881200024, - "grad_norm": 3.0939300176477107, - "learning_rate": 9.019318083820903e-08, - "loss": 1.0053, - "step": 7542 - }, - { - "epoch": 0.9069921240906631, - "grad_norm": 1.5731336768134205, - "learning_rate": 8.996203607468045e-08, - "loss": 1.0752, - "step": 7543 - }, - { - "epoch": 0.9071123669813023, - "grad_norm": 1.4078481705663017, - "learning_rate": 8.973118106239241e-08, - "loss": 0.9846, - "step": 7544 - }, - { - "epoch": 0.9072326098719413, - "grad_norm": 1.9039639382433438, - "learning_rate": 8.95006158363656e-08, - "loss": 1.1709, - "step": 7545 - }, - { - "epoch": 0.9073528527625804, - "grad_norm": 3.168423282551135, - "learning_rate": 8.9270340431576e-08, - "loss": 1.005, - "step": 7546 - }, - { - "epoch": 0.9074730956532195, - "grad_norm": 2.461372651879617, - "learning_rate": 8.904035488295658e-08, - "loss": 0.9643, - "step": 7547 - }, - { - "epoch": 0.9075933385438586, - "grad_norm": 0.7003426071020813, - "learning_rate": 8.881065922539632e-08, - "loss": 0.7955, - "step": 7548 - }, - { - "epoch": 0.9077135814344977, - "grad_norm": 2.78286670458531, - "learning_rate": 8.85812534937389e-08, - "loss": 0.972, - "step": 7549 - }, - { - "epoch": 0.9078338243251368, - "grad_norm": 2.9938217759736374, - "learning_rate": 8.835213772278583e-08, - "loss": 0.8943, - "step": 7550 - }, - { - "epoch": 0.9079540672157759, - "grad_norm": 4.782144101385998, - "learning_rate": 8.812331194729373e-08, - "loss": 1.0189, - "step": 7551 - }, - { - "epoch": 0.9080743101064149, - "grad_norm": 1.896106434808337, - "learning_rate": 8.789477620197461e-08, - "loss": 0.9492, - "step": 7552 - }, - { - "epoch": 0.9081945529970541, - "grad_norm": 3.5080404568626147, - "learning_rate": 8.766653052149831e-08, - "loss": 1.0287, - "step": 7553 - }, - { - "epoch": 0.9083147958876931, - "grad_norm": 2.024778886190753, - "learning_rate": 8.743857494048823e-08, - "loss": 0.9697, - "step": 7554 - }, - { - "epoch": 0.9084350387783322, - "grad_norm": 2.0365852127810555, - "learning_rate": 8.721090949352605e-08, - "loss": 0.8639, - "step": 7555 - }, - { - "epoch": 0.9085552816689714, - "grad_norm": 1.889139521868927, - "learning_rate": 8.698353421514793e-08, - "loss": 0.957, - "step": 7556 - }, - { - "epoch": 0.9086755245596104, - "grad_norm": 2.075824468045757, - "learning_rate": 8.67564491398467e-08, - "loss": 1.0291, - "step": 7557 - }, - { - "epoch": 0.9087957674502495, - "grad_norm": 1.9309264618804585, - "learning_rate": 8.652965430207104e-08, - "loss": 0.9657, - "step": 7558 - }, - { - "epoch": 0.9089160103408886, - "grad_norm": 1.862561228724981, - "learning_rate": 8.630314973622521e-08, - "loss": 0.8833, - "step": 7559 - }, - { - "epoch": 0.9090362532315277, - "grad_norm": 2.0106257460061854, - "learning_rate": 8.607693547666995e-08, - "loss": 0.941, - "step": 7560 - }, - { - "epoch": 0.9091564961221668, - "grad_norm": 0.9186932890120717, - "learning_rate": 8.585101155772201e-08, - "loss": 0.8574, - "step": 7561 - }, - { - "epoch": 0.9092767390128058, - "grad_norm": 2.67418049175635, - "learning_rate": 8.562537801365377e-08, - "loss": 0.9163, - "step": 7562 - }, - { - "epoch": 0.909396981903445, - "grad_norm": 1.9460990571845915, - "learning_rate": 8.540003487869362e-08, - "loss": 0.9252, - "step": 7563 - }, - { - "epoch": 0.909517224794084, - "grad_norm": 2.4240982187975475, - "learning_rate": 8.517498218702557e-08, - "loss": 1.0246, - "step": 7564 - }, - { - "epoch": 0.9096374676847231, - "grad_norm": 1.7627974969299494, - "learning_rate": 8.49502199727905e-08, - "loss": 0.9305, - "step": 7565 - }, - { - "epoch": 0.9097577105753623, - "grad_norm": 2.310642424257973, - "learning_rate": 8.472574827008428e-08, - "loss": 0.8886, - "step": 7566 - }, - { - "epoch": 0.9098779534660013, - "grad_norm": 3.334322893759852, - "learning_rate": 8.450156711295942e-08, - "loss": 1.0634, - "step": 7567 - }, - { - "epoch": 0.9099981963566404, - "grad_norm": 3.6072867822058656, - "learning_rate": 8.427767653542383e-08, - "loss": 1.0922, - "step": 7568 - }, - { - "epoch": 0.9101184392472795, - "grad_norm": 2.1969335686594027, - "learning_rate": 8.405407657144125e-08, - "loss": 0.9327, - "step": 7569 - }, - { - "epoch": 0.9102386821379186, - "grad_norm": 4.079921049214343, - "learning_rate": 8.383076725493232e-08, - "loss": 0.9503, - "step": 7570 - }, - { - "epoch": 0.9103589250285576, - "grad_norm": 1.823306261846129, - "learning_rate": 8.360774861977216e-08, - "loss": 0.9078, - "step": 7571 - }, - { - "epoch": 0.9104791679191968, - "grad_norm": 1.9253886288806477, - "learning_rate": 8.338502069979281e-08, - "loss": 0.9815, - "step": 7572 - }, - { - "epoch": 0.9105994108098359, - "grad_norm": 4.6680181907685085, - "learning_rate": 8.316258352878214e-08, - "loss": 1.0283, - "step": 7573 - }, - { - "epoch": 0.9107196537004749, - "grad_norm": 1.8757508999531165, - "learning_rate": 8.294043714048338e-08, - "loss": 0.9354, - "step": 7574 - }, - { - "epoch": 0.9108398965911141, - "grad_norm": 0.7892530230155231, - "learning_rate": 8.271858156859624e-08, - "loss": 0.8766, - "step": 7575 - }, - { - "epoch": 0.9109601394817531, - "grad_norm": 1.70691057210736, - "learning_rate": 8.249701684677557e-08, - "loss": 0.9681, - "step": 7576 - }, - { - "epoch": 0.9110803823723922, - "grad_norm": 1.862037611535401, - "learning_rate": 8.227574300863294e-08, - "loss": 1.0397, - "step": 7577 - }, - { - "epoch": 0.9112006252630314, - "grad_norm": 1.59478972684251, - "learning_rate": 8.205476008773548e-08, - "loss": 0.9328, - "step": 7578 - }, - { - "epoch": 0.9113208681536704, - "grad_norm": 1.798486534265499, - "learning_rate": 8.183406811760596e-08, - "loss": 1.056, - "step": 7579 - }, - { - "epoch": 0.9114411110443095, - "grad_norm": 1.5792346490135332, - "learning_rate": 8.161366713172313e-08, - "loss": 0.9733, - "step": 7580 - }, - { - "epoch": 0.9115613539349486, - "grad_norm": 3.123319860841185, - "learning_rate": 8.139355716352137e-08, - "loss": 1.0793, - "step": 7581 - }, - { - "epoch": 0.9116815968255877, - "grad_norm": 1.9064053331467807, - "learning_rate": 8.117373824639196e-08, - "loss": 0.9329, - "step": 7582 - }, - { - "epoch": 0.9118018397162267, - "grad_norm": 0.7409664751080438, - "learning_rate": 8.095421041368067e-08, - "loss": 0.8418, - "step": 7583 - }, - { - "epoch": 0.9119220826068659, - "grad_norm": 2.062995851040106, - "learning_rate": 8.073497369868999e-08, - "loss": 0.9382, - "step": 7584 - }, - { - "epoch": 0.912042325497505, - "grad_norm": 2.1468041331937124, - "learning_rate": 8.051602813467772e-08, - "loss": 0.9819, - "step": 7585 - }, - { - "epoch": 0.912162568388144, - "grad_norm": 1.708294643181071, - "learning_rate": 8.029737375485756e-08, - "loss": 0.9417, - "step": 7586 - }, - { - "epoch": 0.9122828112787832, - "grad_norm": 1.8941166060221484, - "learning_rate": 8.007901059239986e-08, - "loss": 0.9572, - "step": 7587 - }, - { - "epoch": 0.9124030541694222, - "grad_norm": 1.7598000373820386, - "learning_rate": 7.986093868042964e-08, - "loss": 1.0325, - "step": 7588 - }, - { - "epoch": 0.9125232970600613, - "grad_norm": 1.9731873656534407, - "learning_rate": 7.964315805202826e-08, - "loss": 0.9089, - "step": 7589 - }, - { - "epoch": 0.9126435399507005, - "grad_norm": 6.140954860534402, - "learning_rate": 7.942566874023304e-08, - "loss": 0.9664, - "step": 7590 - }, - { - "epoch": 0.9127637828413395, - "grad_norm": 2.3608137379793708, - "learning_rate": 7.920847077803649e-08, - "loss": 0.9279, - "step": 7591 - }, - { - "epoch": 0.9128840257319786, - "grad_norm": 2.941463312859509, - "learning_rate": 7.899156419838826e-08, - "loss": 1.0478, - "step": 7592 - }, - { - "epoch": 0.9130042686226177, - "grad_norm": 1.9293942865780012, - "learning_rate": 7.87749490341918e-08, - "loss": 0.8865, - "step": 7593 - }, - { - "epoch": 0.9131245115132568, - "grad_norm": 1.8519441879175904, - "learning_rate": 7.855862531830836e-08, - "loss": 1.0661, - "step": 7594 - }, - { - "epoch": 0.9132447544038959, - "grad_norm": 1.6158275691233388, - "learning_rate": 7.834259308355373e-08, - "loss": 0.955, - "step": 7595 - }, - { - "epoch": 0.9133649972945349, - "grad_norm": 2.152689661936697, - "learning_rate": 7.812685236269989e-08, - "loss": 0.9737, - "step": 7596 - }, - { - "epoch": 0.9134852401851741, - "grad_norm": 0.8911885268959011, - "learning_rate": 7.791140318847445e-08, - "loss": 0.8476, - "step": 7597 - }, - { - "epoch": 0.9136054830758131, - "grad_norm": 1.3672801395092906, - "learning_rate": 7.769624559356081e-08, - "loss": 1.0215, - "step": 7598 - }, - { - "epoch": 0.9137257259664522, - "grad_norm": 5.440256231388296, - "learning_rate": 7.748137961059842e-08, - "loss": 0.989, - "step": 7599 - }, - { - "epoch": 0.9138459688570914, - "grad_norm": 7.851603311461098, - "learning_rate": 7.726680527218211e-08, - "loss": 0.8891, - "step": 7600 - }, - { - "epoch": 0.9139662117477304, - "grad_norm": 1.642203597870968, - "learning_rate": 7.70525226108627e-08, - "loss": 0.9853, - "step": 7601 - }, - { - "epoch": 0.9140864546383695, - "grad_norm": 1.8038728534949262, - "learning_rate": 7.683853165914666e-08, - "loss": 1.0342, - "step": 7602 - }, - { - "epoch": 0.9142066975290086, - "grad_norm": 2.7988678209959716, - "learning_rate": 7.662483244949602e-08, - "loss": 1.0028, - "step": 7603 - }, - { - "epoch": 0.9143269404196477, - "grad_norm": 2.1652913374602325, - "learning_rate": 7.641142501432951e-08, - "loss": 1.0353, - "step": 7604 - }, - { - "epoch": 0.9144471833102867, - "grad_norm": 1.5629023987456032, - "learning_rate": 7.619830938602013e-08, - "loss": 0.9694, - "step": 7605 - }, - { - "epoch": 0.9145674262009259, - "grad_norm": 1.861613360842507, - "learning_rate": 7.598548559689777e-08, - "loss": 1.0386, - "step": 7606 - }, - { - "epoch": 0.914687669091565, - "grad_norm": 2.3032627680449553, - "learning_rate": 7.577295367924751e-08, - "loss": 1.0403, - "step": 7607 - }, - { - "epoch": 0.914807911982204, - "grad_norm": 1.6801080221688145, - "learning_rate": 7.556071366531002e-08, - "loss": 1.0532, - "step": 7608 - }, - { - "epoch": 0.9149281548728432, - "grad_norm": 1.9432515943533686, - "learning_rate": 7.53487655872822e-08, - "loss": 1.0108, - "step": 7609 - }, - { - "epoch": 0.9150483977634822, - "grad_norm": 21.46801152409855, - "learning_rate": 7.513710947731656e-08, - "loss": 0.9718, - "step": 7610 - }, - { - "epoch": 0.9151686406541213, - "grad_norm": 1.9009451297304, - "learning_rate": 7.492574536752095e-08, - "loss": 1.0772, - "step": 7611 - }, - { - "epoch": 0.9152888835447605, - "grad_norm": 1.8126603417341731, - "learning_rate": 7.471467328995907e-08, - "loss": 1.017, - "step": 7612 - }, - { - "epoch": 0.9154091264353995, - "grad_norm": 2.690397430614251, - "learning_rate": 7.450389327665018e-08, - "loss": 0.839, - "step": 7613 - }, - { - "epoch": 0.9155293693260386, - "grad_norm": 2.5305404610526216, - "learning_rate": 7.429340535957029e-08, - "loss": 0.903, - "step": 7614 - }, - { - "epoch": 0.9156496122166777, - "grad_norm": 4.062809858976336, - "learning_rate": 7.40832095706494e-08, - "loss": 0.9406, - "step": 7615 - }, - { - "epoch": 0.9157698551073168, - "grad_norm": 1.7857034791891904, - "learning_rate": 7.387330594177443e-08, - "loss": 1.0267, - "step": 7616 - }, - { - "epoch": 0.9158900979979558, - "grad_norm": 1.633401864748512, - "learning_rate": 7.366369450478749e-08, - "loss": 1.0145, - "step": 7617 - }, - { - "epoch": 0.916010340888595, - "grad_norm": 3.610934758567017, - "learning_rate": 7.345437529148646e-08, - "loss": 0.889, - "step": 7618 - }, - { - "epoch": 0.9161305837792341, - "grad_norm": 2.12304215838655, - "learning_rate": 7.324534833362483e-08, - "loss": 0.966, - "step": 7619 - }, - { - "epoch": 0.9162508266698731, - "grad_norm": 1.9630347355564288, - "learning_rate": 7.303661366291192e-08, - "loss": 0.9088, - "step": 7620 - }, - { - "epoch": 0.9163710695605123, - "grad_norm": 1.8478476040165428, - "learning_rate": 7.28281713110126e-08, - "loss": 1.0469, - "step": 7621 - }, - { - "epoch": 0.9164913124511513, - "grad_norm": 1.7355294724063504, - "learning_rate": 7.262002130954759e-08, - "loss": 1.0097, - "step": 7622 - }, - { - "epoch": 0.9166115553417904, - "grad_norm": 2.00646820783108, - "learning_rate": 7.241216369009296e-08, - "loss": 1.0185, - "step": 7623 - }, - { - "epoch": 0.9167317982324296, - "grad_norm": 2.3486956486851565, - "learning_rate": 7.220459848418037e-08, - "loss": 0.8943, - "step": 7624 - }, - { - "epoch": 0.9168520411230686, - "grad_norm": 2.086285508518184, - "learning_rate": 7.199732572329708e-08, - "loss": 1.0267, - "step": 7625 - }, - { - "epoch": 0.9169722840137077, - "grad_norm": 2.346995738412038, - "learning_rate": 7.179034543888684e-08, - "loss": 0.9984, - "step": 7626 - }, - { - "epoch": 0.9170925269043467, - "grad_norm": 2.131613593535657, - "learning_rate": 7.158365766234808e-08, - "loss": 1.0118, - "step": 7627 - }, - { - "epoch": 0.9172127697949859, - "grad_norm": 2.024845767328891, - "learning_rate": 7.137726242503527e-08, - "loss": 0.9522, - "step": 7628 - }, - { - "epoch": 0.917333012685625, - "grad_norm": 2.776061200519899, - "learning_rate": 7.11711597582585e-08, - "loss": 1.0146, - "step": 7629 - }, - { - "epoch": 0.917453255576264, - "grad_norm": 1.6426170244725073, - "learning_rate": 7.096534969328271e-08, - "loss": 1.0311, - "step": 7630 - }, - { - "epoch": 0.9175734984669032, - "grad_norm": 1.955560249385595, - "learning_rate": 7.075983226132987e-08, - "loss": 1.0686, - "step": 7631 - }, - { - "epoch": 0.9176937413575422, - "grad_norm": 2.410151303872052, - "learning_rate": 7.055460749357656e-08, - "loss": 1.0112, - "step": 7632 - }, - { - "epoch": 0.9178139842481813, - "grad_norm": 1.669450588535901, - "learning_rate": 7.034967542115521e-08, - "loss": 0.934, - "step": 7633 - }, - { - "epoch": 0.9179342271388204, - "grad_norm": 2.519873023694248, - "learning_rate": 7.014503607515388e-08, - "loss": 0.9783, - "step": 7634 - }, - { - "epoch": 0.9180544700294595, - "grad_norm": 1.8666642864530012, - "learning_rate": 6.994068948661592e-08, - "loss": 0.9062, - "step": 7635 - }, - { - "epoch": 0.9181747129200986, - "grad_norm": 2.6905595973059717, - "learning_rate": 6.973663568654142e-08, - "loss": 0.9875, - "step": 7636 - }, - { - "epoch": 0.9182949558107377, - "grad_norm": 2.0513271222909535, - "learning_rate": 6.953287470588386e-08, - "loss": 0.8791, - "step": 7637 - }, - { - "epoch": 0.9184151987013768, - "grad_norm": 1.9652378301375908, - "learning_rate": 6.932940657555452e-08, - "loss": 1.084, - "step": 7638 - }, - { - "epoch": 0.9185354415920158, - "grad_norm": 1.4563611819019617, - "learning_rate": 6.912623132641938e-08, - "loss": 0.9851, - "step": 7639 - }, - { - "epoch": 0.918655684482655, - "grad_norm": 1.9081917550598444, - "learning_rate": 6.892334898929952e-08, - "loss": 0.9884, - "step": 7640 - }, - { - "epoch": 0.918775927373294, - "grad_norm": 1.8992734096982948, - "learning_rate": 6.872075959497236e-08, - "loss": 1.0714, - "step": 7641 - }, - { - "epoch": 0.9188961702639331, - "grad_norm": 3.575235324397108, - "learning_rate": 6.85184631741702e-08, - "loss": 1.0542, - "step": 7642 - }, - { - "epoch": 0.9190164131545723, - "grad_norm": 1.856786512939107, - "learning_rate": 6.831645975758161e-08, - "loss": 1.0104, - "step": 7643 - }, - { - "epoch": 0.9191366560452113, - "grad_norm": 1.9348432722863544, - "learning_rate": 6.811474937585026e-08, - "loss": 0.91, - "step": 7644 - }, - { - "epoch": 0.9192568989358504, - "grad_norm": 1.5410985501293997, - "learning_rate": 6.79133320595755e-08, - "loss": 1.0125, - "step": 7645 - }, - { - "epoch": 0.9193771418264896, - "grad_norm": 1.928179317556294, - "learning_rate": 6.771220783931198e-08, - "loss": 0.9793, - "step": 7646 - }, - { - "epoch": 0.9194973847171286, - "grad_norm": 0.8638912479604203, - "learning_rate": 6.751137674556994e-08, - "loss": 0.9077, - "step": 7647 - }, - { - "epoch": 0.9196176276077677, - "grad_norm": 2.544133307892902, - "learning_rate": 6.731083880881572e-08, - "loss": 1.0092, - "step": 7648 - }, - { - "epoch": 0.9197378704984068, - "grad_norm": 2.572930576631781, - "learning_rate": 6.711059405947072e-08, - "loss": 1.0422, - "step": 7649 - }, - { - "epoch": 0.9198581133890459, - "grad_norm": 1.6237645220969774, - "learning_rate": 6.691064252791156e-08, - "loss": 1.0018, - "step": 7650 - }, - { - "epoch": 0.9199783562796849, - "grad_norm": 4.141949136006341, - "learning_rate": 6.67109842444713e-08, - "loss": 1.0018, - "step": 7651 - }, - { - "epoch": 0.9200985991703241, - "grad_norm": 1.7430103223520175, - "learning_rate": 6.651161923943704e-08, - "loss": 0.9974, - "step": 7652 - }, - { - "epoch": 0.9202188420609632, - "grad_norm": 1.7138130722849032, - "learning_rate": 6.631254754305326e-08, - "loss": 0.9962, - "step": 7653 - }, - { - "epoch": 0.9203390849516022, - "grad_norm": 1.988053704152348, - "learning_rate": 6.611376918551848e-08, - "loss": 1.0151, - "step": 7654 - }, - { - "epoch": 0.9204593278422414, - "grad_norm": 2.4203866630800785, - "learning_rate": 6.591528419698744e-08, - "loss": 1.024, - "step": 7655 - }, - { - "epoch": 0.9205795707328804, - "grad_norm": 2.7925673403086795, - "learning_rate": 6.571709260756986e-08, - "loss": 1.0652, - "step": 7656 - }, - { - "epoch": 0.9206998136235195, - "grad_norm": 2.416304550226101, - "learning_rate": 6.551919444733122e-08, - "loss": 0.9884, - "step": 7657 - }, - { - "epoch": 0.9208200565141585, - "grad_norm": 2.0632215451518197, - "learning_rate": 6.53215897462931e-08, - "loss": 0.88, - "step": 7658 - }, - { - "epoch": 0.9209402994047977, - "grad_norm": 3.40812463533367, - "learning_rate": 6.512427853443103e-08, - "loss": 0.9878, - "step": 7659 - }, - { - "epoch": 0.9210605422954368, - "grad_norm": 1.5466773166920127, - "learning_rate": 6.492726084167799e-08, - "loss": 0.988, - "step": 7660 - }, - { - "epoch": 0.9211807851860758, - "grad_norm": 0.7934772758835614, - "learning_rate": 6.473053669792072e-08, - "loss": 0.8128, - "step": 7661 - }, - { - "epoch": 0.921301028076715, - "grad_norm": 3.9372393809458797, - "learning_rate": 6.453410613300248e-08, - "loss": 0.9587, - "step": 7662 - }, - { - "epoch": 0.921421270967354, - "grad_norm": 1.7169568504816932, - "learning_rate": 6.43379691767214e-08, - "loss": 0.8109, - "step": 7663 - }, - { - "epoch": 0.9215415138579931, - "grad_norm": 0.7653359237207821, - "learning_rate": 6.414212585883105e-08, - "loss": 0.844, - "step": 7664 - }, - { - "epoch": 0.9216617567486323, - "grad_norm": 1.4814886037778958, - "learning_rate": 6.394657620904143e-08, - "loss": 0.9268, - "step": 7665 - }, - { - "epoch": 0.9217819996392713, - "grad_norm": 1.6987709595409666, - "learning_rate": 6.375132025701657e-08, - "loss": 0.9461, - "step": 7666 - }, - { - "epoch": 0.9219022425299104, - "grad_norm": 2.25557484339259, - "learning_rate": 6.355635803237724e-08, - "loss": 0.9208, - "step": 7667 - }, - { - "epoch": 0.9220224854205495, - "grad_norm": 1.8155717799254105, - "learning_rate": 6.336168956469867e-08, - "loss": 1.0325, - "step": 7668 - }, - { - "epoch": 0.9221427283111886, - "grad_norm": 1.7197360591420428, - "learning_rate": 6.316731488351168e-08, - "loss": 0.953, - "step": 7669 - }, - { - "epoch": 0.9222629712018277, - "grad_norm": 2.017778886996864, - "learning_rate": 6.297323401830334e-08, - "loss": 0.8696, - "step": 7670 - }, - { - "epoch": 0.9223832140924668, - "grad_norm": 3.6007607450954597, - "learning_rate": 6.277944699851523e-08, - "loss": 0.9174, - "step": 7671 - }, - { - "epoch": 0.9225034569831059, - "grad_norm": 2.0649030153715757, - "learning_rate": 6.25859538535447e-08, - "loss": 0.9615, - "step": 7672 - }, - { - "epoch": 0.9226236998737449, - "grad_norm": 2.8490907590635555, - "learning_rate": 6.239275461274474e-08, - "loss": 1.0065, - "step": 7673 - }, - { - "epoch": 0.9227439427643841, - "grad_norm": 1.6612034266296214, - "learning_rate": 6.219984930542299e-08, - "loss": 1.0857, - "step": 7674 - }, - { - "epoch": 0.9228641856550232, - "grad_norm": 2.457957470711389, - "learning_rate": 6.200723796084383e-08, - "loss": 0.9855, - "step": 7675 - }, - { - "epoch": 0.9229844285456622, - "grad_norm": 0.7541768275326383, - "learning_rate": 6.181492060822546e-08, - "loss": 0.8758, - "step": 7676 - }, - { - "epoch": 0.9231046714363014, - "grad_norm": 2.1501795139333533, - "learning_rate": 6.162289727674274e-08, - "loss": 1.0517, - "step": 7677 - }, - { - "epoch": 0.9232249143269404, - "grad_norm": 2.640440172577701, - "learning_rate": 6.143116799552527e-08, - "loss": 1.1111, - "step": 7678 - }, - { - "epoch": 0.9233451572175795, - "grad_norm": 2.7116960239364793, - "learning_rate": 6.123973279365802e-08, - "loss": 0.7843, - "step": 7679 - }, - { - "epoch": 0.9234654001082186, - "grad_norm": 1.8567766258145826, - "learning_rate": 6.10485917001824e-08, - "loss": 1.0134, - "step": 7680 - }, - { - "epoch": 0.9235856429988577, - "grad_norm": 1.6747898112267523, - "learning_rate": 6.085774474409322e-08, - "loss": 1.0378, - "step": 7681 - }, - { - "epoch": 0.9237058858894968, - "grad_norm": 1.7334163181296085, - "learning_rate": 6.066719195434267e-08, - "loss": 0.9326, - "step": 7682 - }, - { - "epoch": 0.9238261287801359, - "grad_norm": 2.249436731829582, - "learning_rate": 6.047693335983717e-08, - "loss": 0.8954, - "step": 7683 - }, - { - "epoch": 0.923946371670775, - "grad_norm": 2.9686366009386562, - "learning_rate": 6.028696898943853e-08, - "loss": 1.0472, - "step": 7684 - }, - { - "epoch": 0.924066614561414, - "grad_norm": 1.9155881445670186, - "learning_rate": 6.00972988719648e-08, - "loss": 0.94, - "step": 7685 - }, - { - "epoch": 0.9241868574520532, - "grad_norm": 2.51058967806693, - "learning_rate": 5.990792303618807e-08, - "loss": 0.9439, - "step": 7686 - }, - { - "epoch": 0.9243071003426923, - "grad_norm": 1.5334534653306153, - "learning_rate": 5.971884151083695e-08, - "loss": 0.9334, - "step": 7687 - }, - { - "epoch": 0.9244273432333313, - "grad_norm": 4.320529263182194, - "learning_rate": 5.9530054324595124e-08, - "loss": 0.9747, - "step": 7688 - }, - { - "epoch": 0.9245475861239704, - "grad_norm": 0.7520560454246394, - "learning_rate": 5.934156150610103e-08, - "loss": 0.8285, - "step": 7689 - }, - { - "epoch": 0.9246678290146095, - "grad_norm": 2.0220525413048405, - "learning_rate": 5.915336308394914e-08, - "loss": 1.0138, - "step": 7690 - }, - { - "epoch": 0.9247880719052486, - "grad_norm": 1.5285666172832968, - "learning_rate": 5.89654590866886e-08, - "loss": 1.0041, - "step": 7691 - }, - { - "epoch": 0.9249083147958876, - "grad_norm": 2.0472029794484228, - "learning_rate": 5.877784954282483e-08, - "loss": 1.1131, - "step": 7692 - }, - { - "epoch": 0.9250285576865268, - "grad_norm": 1.9244548570665947, - "learning_rate": 5.8590534480817963e-08, - "loss": 0.9499, - "step": 7693 - }, - { - "epoch": 0.9251488005771659, - "grad_norm": 2.1757781818032678, - "learning_rate": 5.840351392908349e-08, - "loss": 0.9578, - "step": 7694 - }, - { - "epoch": 0.9252690434678049, - "grad_norm": 3.954153537761946, - "learning_rate": 5.821678791599205e-08, - "loss": 0.9352, - "step": 7695 - }, - { - "epoch": 0.9253892863584441, - "grad_norm": 1.7302952802179126, - "learning_rate": 5.803035646986965e-08, - "loss": 1.0388, - "step": 7696 - }, - { - "epoch": 0.9255095292490831, - "grad_norm": 2.4697517583790396, - "learning_rate": 5.7844219618998766e-08, - "loss": 0.9042, - "step": 7697 - }, - { - "epoch": 0.9256297721397222, - "grad_norm": 2.2159543759713825, - "learning_rate": 5.765837739161505e-08, - "loss": 0.9449, - "step": 7698 - }, - { - "epoch": 0.9257500150303614, - "grad_norm": 1.5824280041171261, - "learning_rate": 5.7472829815911504e-08, - "loss": 0.9829, - "step": 7699 - }, - { - "epoch": 0.9258702579210004, - "grad_norm": 1.6146625212252066, - "learning_rate": 5.7287576920035164e-08, - "loss": 1.0412, - "step": 7700 - }, - { - "epoch": 0.9259905008116395, - "grad_norm": 1.815412614648742, - "learning_rate": 5.7102618732088435e-08, - "loss": 0.9885, - "step": 7701 - }, - { - "epoch": 0.9261107437022786, - "grad_norm": 1.7302514622239382, - "learning_rate": 5.6917955280130216e-08, - "loss": 0.9723, - "step": 7702 - }, - { - "epoch": 0.9262309865929177, - "grad_norm": 1.9020751512845209, - "learning_rate": 5.6733586592172755e-08, - "loss": 0.9492, - "step": 7703 - }, - { - "epoch": 0.9263512294835567, - "grad_norm": 1.698279531198815, - "learning_rate": 5.6549512696185244e-08, - "loss": 1.037, - "step": 7704 - }, - { - "epoch": 0.9264714723741959, - "grad_norm": 1.8017255909424061, - "learning_rate": 5.636573362009156e-08, - "loss": 0.9149, - "step": 7705 - }, - { - "epoch": 0.926591715264835, - "grad_norm": 2.4789072964266623, - "learning_rate": 5.618224939177074e-08, - "loss": 0.9925, - "step": 7706 - }, - { - "epoch": 0.926711958155474, - "grad_norm": 1.821514927662853, - "learning_rate": 5.599906003905719e-08, - "loss": 0.9321, - "step": 7707 - }, - { - "epoch": 0.9268322010461132, - "grad_norm": 2.3355226690604636, - "learning_rate": 5.581616558974023e-08, - "loss": 1.0525, - "step": 7708 - }, - { - "epoch": 0.9269524439367522, - "grad_norm": 1.7586761387523417, - "learning_rate": 5.5633566071565444e-08, - "loss": 1.0174, - "step": 7709 - }, - { - "epoch": 0.9270726868273913, - "grad_norm": 1.9451546968853308, - "learning_rate": 5.5451261512232896e-08, - "loss": 0.9252, - "step": 7710 - }, - { - "epoch": 0.9271929297180305, - "grad_norm": 1.8627774819146532, - "learning_rate": 5.5269251939397576e-08, - "loss": 0.8522, - "step": 7711 - }, - { - "epoch": 0.9273131726086695, - "grad_norm": 2.027046431714904, - "learning_rate": 5.508753738067073e-08, - "loss": 0.99, - "step": 7712 - }, - { - "epoch": 0.9274334154993086, - "grad_norm": 1.9419079752056945, - "learning_rate": 5.4906117863617875e-08, - "loss": 1.0207, - "step": 7713 - }, - { - "epoch": 0.9275536583899477, - "grad_norm": 1.8821449837655813, - "learning_rate": 5.4724993415760533e-08, - "loss": 1.0174, - "step": 7714 - }, - { - "epoch": 0.9276739012805868, - "grad_norm": 2.9641228202670566, - "learning_rate": 5.454416406457496e-08, - "loss": 0.9742, - "step": 7715 - }, - { - "epoch": 0.9277941441712259, - "grad_norm": 2.557600594459825, - "learning_rate": 5.436362983749299e-08, - "loss": 0.9703, - "step": 7716 - }, - { - "epoch": 0.927914387061865, - "grad_norm": 2.0683373584241944, - "learning_rate": 5.418339076190137e-08, - "loss": 0.869, - "step": 7717 - }, - { - "epoch": 0.9280346299525041, - "grad_norm": 1.965140652075618, - "learning_rate": 5.400344686514202e-08, - "loss": 1.1186, - "step": 7718 - }, - { - "epoch": 0.9281548728431431, - "grad_norm": 2.5004656358053605, - "learning_rate": 5.38237981745131e-08, - "loss": 0.8974, - "step": 7719 - }, - { - "epoch": 0.9282751157337822, - "grad_norm": 1.698256998726923, - "learning_rate": 5.364444471726592e-08, - "loss": 1.0419, - "step": 7720 - }, - { - "epoch": 0.9283953586244214, - "grad_norm": 2.685065475319082, - "learning_rate": 5.346538652060939e-08, - "loss": 1.0311, - "step": 7721 - }, - { - "epoch": 0.9285156015150604, - "grad_norm": 2.0721050059984387, - "learning_rate": 5.3286623611705994e-08, - "loss": 0.9306, - "step": 7722 - }, - { - "epoch": 0.9286358444056995, - "grad_norm": 0.8863799670522321, - "learning_rate": 5.3108156017673824e-08, - "loss": 0.8821, - "step": 7723 - }, - { - "epoch": 0.9287560872963386, - "grad_norm": 1.622402389129407, - "learning_rate": 5.2929983765586775e-08, - "loss": 0.9424, - "step": 7724 - }, - { - "epoch": 0.9288763301869777, - "grad_norm": 1.8208575435515792, - "learning_rate": 5.275210688247278e-08, - "loss": 0.8558, - "step": 7725 - }, - { - "epoch": 0.9289965730776167, - "grad_norm": 1.858818355552747, - "learning_rate": 5.257452539531604e-08, - "loss": 1.0728, - "step": 7726 - }, - { - "epoch": 0.9291168159682559, - "grad_norm": 1.8082185360146092, - "learning_rate": 5.2397239331055445e-08, - "loss": 0.9174, - "step": 7727 - }, - { - "epoch": 0.929237058858895, - "grad_norm": 2.4829278725861306, - "learning_rate": 5.2220248716585036e-08, - "loss": 1.035, - "step": 7728 - }, - { - "epoch": 0.929357301749534, - "grad_norm": 3.4516569443262926, - "learning_rate": 5.204355357875445e-08, - "loss": 0.9828, - "step": 7729 - }, - { - "epoch": 0.9294775446401732, - "grad_norm": 2.1507438348799064, - "learning_rate": 5.1867153944367584e-08, - "loss": 0.9325, - "step": 7730 - }, - { - "epoch": 0.9295977875308122, - "grad_norm": 1.8778691274605332, - "learning_rate": 5.16910498401848e-08, - "loss": 0.9675, - "step": 7731 - }, - { - "epoch": 0.9297180304214513, - "grad_norm": 1.824526121722475, - "learning_rate": 5.151524129292073e-08, - "loss": 1.0636, - "step": 7732 - }, - { - "epoch": 0.9298382733120905, - "grad_norm": 2.4966589537001864, - "learning_rate": 5.1339728329245155e-08, - "loss": 0.8987, - "step": 7733 - }, - { - "epoch": 0.9299585162027295, - "grad_norm": 12.029027163774963, - "learning_rate": 5.116451097578367e-08, - "loss": 1.0205, - "step": 7734 - }, - { - "epoch": 0.9300787590933686, - "grad_norm": 1.5476509228941557, - "learning_rate": 5.0989589259115895e-08, - "loss": 0.9736, - "step": 7735 - }, - { - "epoch": 0.9301990019840077, - "grad_norm": 2.0945430790852795, - "learning_rate": 5.081496320577816e-08, - "loss": 0.9364, - "step": 7736 - }, - { - "epoch": 0.9303192448746468, - "grad_norm": 0.9506882332883066, - "learning_rate": 5.0640632842260835e-08, - "loss": 0.8915, - "step": 7737 - }, - { - "epoch": 0.9304394877652858, - "grad_norm": 1.4748966568773616, - "learning_rate": 5.0466598195009426e-08, - "loss": 0.9499, - "step": 7738 - }, - { - "epoch": 0.930559730655925, - "grad_norm": 1.8410962937599784, - "learning_rate": 5.0292859290425036e-08, - "loss": 0.9334, - "step": 7739 - }, - { - "epoch": 0.9306799735465641, - "grad_norm": 1.8947215820347707, - "learning_rate": 5.011941615486348e-08, - "loss": 1.0071, - "step": 7740 - }, - { - "epoch": 0.9308002164372031, - "grad_norm": 1.9723919077604917, - "learning_rate": 4.994626881463659e-08, - "loss": 1.0775, - "step": 7741 - }, - { - "epoch": 0.9309204593278423, - "grad_norm": 1.8157684847260984, - "learning_rate": 4.9773417296009814e-08, - "loss": 0.9363, - "step": 7742 - }, - { - "epoch": 0.9310407022184813, - "grad_norm": 2.2311751922120067, - "learning_rate": 4.960086162520527e-08, - "loss": 0.8826, - "step": 7743 - }, - { - "epoch": 0.9311609451091204, - "grad_norm": 1.8457006060870709, - "learning_rate": 4.942860182839936e-08, - "loss": 1.052, - "step": 7744 - }, - { - "epoch": 0.9312811879997596, - "grad_norm": 2.0941499925833313, - "learning_rate": 4.925663793172341e-08, - "loss": 1.0186, - "step": 7745 - }, - { - "epoch": 0.9314014308903986, - "grad_norm": 0.849066479743445, - "learning_rate": 4.908496996126477e-08, - "loss": 0.8428, - "step": 7746 - }, - { - "epoch": 0.9315216737810377, - "grad_norm": 1.549926137702495, - "learning_rate": 4.89135979430646e-08, - "loss": 0.9913, - "step": 7747 - }, - { - "epoch": 0.9316419166716768, - "grad_norm": 2.490348113211401, - "learning_rate": 4.874252190312078e-08, - "loss": 1.0699, - "step": 7748 - }, - { - "epoch": 0.9317621595623159, - "grad_norm": 1.4542137907129513, - "learning_rate": 4.857174186738477e-08, - "loss": 0.8738, - "step": 7749 - }, - { - "epoch": 0.931882402452955, - "grad_norm": 4.136602787341233, - "learning_rate": 4.840125786176408e-08, - "loss": 0.9674, - "step": 7750 - }, - { - "epoch": 0.932002645343594, - "grad_norm": 1.7599325323178008, - "learning_rate": 4.823106991212067e-08, - "loss": 0.998, - "step": 7751 - }, - { - "epoch": 0.9321228882342332, - "grad_norm": 2.0409948320561666, - "learning_rate": 4.806117804427212e-08, - "loss": 1.0694, - "step": 7752 - }, - { - "epoch": 0.9322431311248722, - "grad_norm": 2.0929127627662956, - "learning_rate": 4.7891582283990926e-08, - "loss": 0.8739, - "step": 7753 - }, - { - "epoch": 0.9323633740155113, - "grad_norm": 2.531626638012733, - "learning_rate": 4.772228265700473e-08, - "loss": 0.9519, - "step": 7754 - }, - { - "epoch": 0.9324836169061504, - "grad_norm": 2.0182213436508327, - "learning_rate": 4.75532791889961e-08, - "loss": 0.9793, - "step": 7755 - }, - { - "epoch": 0.9326038597967895, - "grad_norm": 2.003264147705788, - "learning_rate": 4.738457190560252e-08, - "loss": 0.8859, - "step": 7756 - }, - { - "epoch": 0.9327241026874286, - "grad_norm": 2.2751353632301767, - "learning_rate": 4.721616083241664e-08, - "loss": 1.021, - "step": 7757 - }, - { - "epoch": 0.9328443455780677, - "grad_norm": 1.937112461833029, - "learning_rate": 4.7048045994986684e-08, - "loss": 0.9987, - "step": 7758 - }, - { - "epoch": 0.9329645884687068, - "grad_norm": 1.9521900227913418, - "learning_rate": 4.688022741881559e-08, - "loss": 1.1346, - "step": 7759 - }, - { - "epoch": 0.9330848313593458, - "grad_norm": 1.5380814422057325, - "learning_rate": 4.671270512936076e-08, - "loss": 0.9855, - "step": 7760 - }, - { - "epoch": 0.933205074249985, - "grad_norm": 1.6876069494194315, - "learning_rate": 4.6545479152035884e-08, - "loss": 1.0574, - "step": 7761 - }, - { - "epoch": 0.9333253171406241, - "grad_norm": 2.2909679619269414, - "learning_rate": 4.637854951220821e-08, - "loss": 1.0, - "step": 7762 - }, - { - "epoch": 0.9334455600312631, - "grad_norm": 2.054243612750475, - "learning_rate": 4.621191623520171e-08, - "loss": 0.9788, - "step": 7763 - }, - { - "epoch": 0.9335658029219023, - "grad_norm": 2.63945288291886, - "learning_rate": 4.604557934629372e-08, - "loss": 1.0746, - "step": 7764 - }, - { - "epoch": 0.9336860458125413, - "grad_norm": 2.744967450781005, - "learning_rate": 4.587953887071805e-08, - "loss": 1.0368, - "step": 7765 - }, - { - "epoch": 0.9338062887031804, - "grad_norm": 2.15658356671062, - "learning_rate": 4.5713794833662554e-08, - "loss": 1.0903, - "step": 7766 - }, - { - "epoch": 0.9339265315938196, - "grad_norm": 1.7766525098130472, - "learning_rate": 4.5548347260270236e-08, - "loss": 0.8608, - "step": 7767 - }, - { - "epoch": 0.9340467744844586, - "grad_norm": 5.016263451747125, - "learning_rate": 4.538319617564012e-08, - "loss": 0.9264, - "step": 7768 - }, - { - "epoch": 0.9341670173750977, - "grad_norm": 2.028065694403273, - "learning_rate": 4.521834160482485e-08, - "loss": 0.9751, - "step": 7769 - }, - { - "epoch": 0.9342872602657368, - "grad_norm": 1.4528844490705632, - "learning_rate": 4.5053783572832846e-08, - "loss": 1.0449, - "step": 7770 - }, - { - "epoch": 0.9344075031563759, - "grad_norm": 1.6172872319083504, - "learning_rate": 4.488952210462771e-08, - "loss": 0.9897, - "step": 7771 - }, - { - "epoch": 0.9345277460470149, - "grad_norm": 1.8535468819397538, - "learning_rate": 4.4725557225127495e-08, - "loss": 1.0855, - "step": 7772 - }, - { - "epoch": 0.9346479889376541, - "grad_norm": 1.569383108952886, - "learning_rate": 4.456188895920565e-08, - "loss": 1.0232, - "step": 7773 - }, - { - "epoch": 0.9347682318282932, - "grad_norm": 3.176744753614029, - "learning_rate": 4.439851733169031e-08, - "loss": 1.0766, - "step": 7774 - }, - { - "epoch": 0.9348884747189322, - "grad_norm": 2.582322097071723, - "learning_rate": 4.4235442367365204e-08, - "loss": 0.926, - "step": 7775 - }, - { - "epoch": 0.9350087176095714, - "grad_norm": 1.9790885374475788, - "learning_rate": 4.4072664090968545e-08, - "loss": 1.0161, - "step": 7776 - }, - { - "epoch": 0.9351289605002104, - "grad_norm": 2.7247044919273096, - "learning_rate": 4.391018252719347e-08, - "loss": 1.0696, - "step": 7777 - }, - { - "epoch": 0.9352492033908495, - "grad_norm": 1.784889919188465, - "learning_rate": 4.374799770068849e-08, - "loss": 0.9214, - "step": 7778 - }, - { - "epoch": 0.9353694462814887, - "grad_norm": 1.935933926065639, - "learning_rate": 4.358610963605658e-08, - "loss": 0.9727, - "step": 7779 - }, - { - "epoch": 0.9354896891721277, - "grad_norm": 2.1711233471269984, - "learning_rate": 4.342451835785677e-08, - "loss": 0.9127, - "step": 7780 - }, - { - "epoch": 0.9356099320627668, - "grad_norm": 1.7416519915800575, - "learning_rate": 4.3263223890601665e-08, - "loss": 0.9763, - "step": 7781 - }, - { - "epoch": 0.9357301749534058, - "grad_norm": 1.909266976704982, - "learning_rate": 4.31022262587597e-08, - "loss": 1.0258, - "step": 7782 - }, - { - "epoch": 0.935850417844045, - "grad_norm": 2.0387152014311334, - "learning_rate": 4.2941525486754225e-08, - "loss": 0.8877, - "step": 7783 - }, - { - "epoch": 0.935970660734684, - "grad_norm": 1.7942772040694646, - "learning_rate": 4.278112159896286e-08, - "loss": 1.0161, - "step": 7784 - }, - { - "epoch": 0.9360909036253231, - "grad_norm": 2.321997034246767, - "learning_rate": 4.2621014619719896e-08, - "loss": 0.8992, - "step": 7785 - }, - { - "epoch": 0.9362111465159623, - "grad_norm": 0.7808587811359478, - "learning_rate": 4.246120457331215e-08, - "loss": 0.8598, - "step": 7786 - }, - { - "epoch": 0.9363313894066013, - "grad_norm": 1.8413313179161612, - "learning_rate": 4.2301691483983325e-08, - "loss": 0.9546, - "step": 7787 - }, - { - "epoch": 0.9364516322972404, - "grad_norm": 2.7992442235019146, - "learning_rate": 4.214247537593163e-08, - "loss": 0.9865, - "step": 7788 - }, - { - "epoch": 0.9365718751878795, - "grad_norm": 2.036445429013889, - "learning_rate": 4.1983556273309293e-08, - "loss": 1.0314, - "step": 7789 - }, - { - "epoch": 0.9366921180785186, - "grad_norm": 2.92562071254117, - "learning_rate": 4.182493420022526e-08, - "loss": 0.9214, - "step": 7790 - }, - { - "epoch": 0.9368123609691577, - "grad_norm": 2.1376483575744794, - "learning_rate": 4.166660918074139e-08, - "loss": 1.017, - "step": 7791 - }, - { - "epoch": 0.9369326038597968, - "grad_norm": 3.4667148977983735, - "learning_rate": 4.15085812388758e-08, - "loss": 0.9622, - "step": 7792 - }, - { - "epoch": 0.9370528467504359, - "grad_norm": 1.6469507247048143, - "learning_rate": 4.135085039860153e-08, - "loss": 1.0145, - "step": 7793 - }, - { - "epoch": 0.9371730896410749, - "grad_norm": 2.3192772402111976, - "learning_rate": 4.1193416683845906e-08, - "loss": 1.0208, - "step": 7794 - }, - { - "epoch": 0.9372933325317141, - "grad_norm": 3.583481955588083, - "learning_rate": 4.103628011849136e-08, - "loss": 1.0573, - "step": 7795 - }, - { - "epoch": 0.9374135754223532, - "grad_norm": 1.9564500505333464, - "learning_rate": 4.0879440726375506e-08, - "loss": 0.9804, - "step": 7796 - }, - { - "epoch": 0.9375338183129922, - "grad_norm": 4.401225153542233, - "learning_rate": 4.0722898531291074e-08, - "loss": 0.7902, - "step": 7797 - }, - { - "epoch": 0.9376540612036314, - "grad_norm": 1.9087451801351611, - "learning_rate": 4.0566653556985295e-08, - "loss": 0.9928, - "step": 7798 - }, - { - "epoch": 0.9377743040942704, - "grad_norm": 3.310505680514349, - "learning_rate": 4.0410705827159886e-08, - "loss": 1.0414, - "step": 7799 - }, - { - "epoch": 0.9378945469849095, - "grad_norm": 2.148726898122431, - "learning_rate": 4.0255055365472356e-08, - "loss": 0.9348, - "step": 7800 - }, - { - "epoch": 0.9380147898755486, - "grad_norm": 2.1365245891274665, - "learning_rate": 4.009970219553471e-08, - "loss": 0.9682, - "step": 7801 - }, - { - "epoch": 0.9381350327661877, - "grad_norm": 5.156963507096535, - "learning_rate": 3.99446463409141e-08, - "loss": 0.9907, - "step": 7802 - }, - { - "epoch": 0.9382552756568268, - "grad_norm": 3.31582086471766, - "learning_rate": 3.978988782513215e-08, - "loss": 0.9151, - "step": 7803 - }, - { - "epoch": 0.9383755185474659, - "grad_norm": 1.554425701546136, - "learning_rate": 3.963542667166586e-08, - "loss": 0.9907, - "step": 7804 - }, - { - "epoch": 0.938495761438105, - "grad_norm": 2.557700151086169, - "learning_rate": 3.9481262903946486e-08, - "loss": 0.9176, - "step": 7805 - }, - { - "epoch": 0.938616004328744, - "grad_norm": 0.8246069983552469, - "learning_rate": 3.932739654536066e-08, - "loss": 0.8036, - "step": 7806 - }, - { - "epoch": 0.9387362472193832, - "grad_norm": 2.7469557471444674, - "learning_rate": 3.917382761925014e-08, - "loss": 0.9676, - "step": 7807 - }, - { - "epoch": 0.9388564901100223, - "grad_norm": 1.5901072890593277, - "learning_rate": 3.9020556148910754e-08, - "loss": 1.0165, - "step": 7808 - }, - { - "epoch": 0.9389767330006613, - "grad_norm": 0.7321407745079166, - "learning_rate": 3.8867582157593895e-08, - "loss": 0.8201, - "step": 7809 - }, - { - "epoch": 0.9390969758913005, - "grad_norm": 1.5997824848857245, - "learning_rate": 3.871490566850544e-08, - "loss": 0.9954, - "step": 7810 - }, - { - "epoch": 0.9392172187819395, - "grad_norm": 2.1957714103446233, - "learning_rate": 3.856252670480642e-08, - "loss": 0.9368, - "step": 7811 - }, - { - "epoch": 0.9393374616725786, - "grad_norm": 1.986570030343349, - "learning_rate": 3.841044528961279e-08, - "loss": 1.0448, - "step": 7812 - }, - { - "epoch": 0.9394577045632178, - "grad_norm": 2.1940014817294573, - "learning_rate": 3.825866144599477e-08, - "loss": 1.0181, - "step": 7813 - }, - { - "epoch": 0.9395779474538568, - "grad_norm": 3.462439630856639, - "learning_rate": 3.8107175196978145e-08, - "loss": 0.9874, - "step": 7814 - }, - { - "epoch": 0.9396981903444959, - "grad_norm": 2.090423560594649, - "learning_rate": 3.7955986565542996e-08, - "loss": 0.9954, - "step": 7815 - }, - { - "epoch": 0.9398184332351349, - "grad_norm": 1.7953821366286533, - "learning_rate": 3.780509557462497e-08, - "loss": 0.9165, - "step": 7816 - }, - { - "epoch": 0.9399386761257741, - "grad_norm": 1.5157063255907428, - "learning_rate": 3.765450224711375e-08, - "loss": 0.9817, - "step": 7817 - }, - { - "epoch": 0.9400589190164131, - "grad_norm": 1.676636164345426, - "learning_rate": 3.750420660585396e-08, - "loss": 1.0221, - "step": 7818 - }, - { - "epoch": 0.9401791619070522, - "grad_norm": 1.5916409468778052, - "learning_rate": 3.735420867364603e-08, - "loss": 1.0266, - "step": 7819 - }, - { - "epoch": 0.9402994047976914, - "grad_norm": 1.5637755718625523, - "learning_rate": 3.7204508473244186e-08, - "loss": 0.8494, - "step": 7820 - }, - { - "epoch": 0.9404196476883304, - "grad_norm": 1.567291998683386, - "learning_rate": 3.7055106027357395e-08, - "loss": 0.922, - "step": 7821 - }, - { - "epoch": 0.9405398905789695, - "grad_norm": 2.4055297566303726, - "learning_rate": 3.690600135865063e-08, - "loss": 0.945, - "step": 7822 - }, - { - "epoch": 0.9406601334696086, - "grad_norm": 0.7671568590559801, - "learning_rate": 3.675719448974246e-08, - "loss": 0.8328, - "step": 7823 - }, - { - "epoch": 0.9407803763602477, - "grad_norm": 7.544906201446314, - "learning_rate": 3.6608685443207054e-08, - "loss": 0.831, - "step": 7824 - }, - { - "epoch": 0.9409006192508867, - "grad_norm": 2.2227980529924904, - "learning_rate": 3.646047424157306e-08, - "loss": 0.9005, - "step": 7825 - }, - { - "epoch": 0.9410208621415259, - "grad_norm": 2.131721636809276, - "learning_rate": 3.631256090732382e-08, - "loss": 0.9059, - "step": 7826 - }, - { - "epoch": 0.941141105032165, - "grad_norm": 1.7653012274221667, - "learning_rate": 3.6164945462897833e-08, - "loss": 1.0518, - "step": 7827 - }, - { - "epoch": 0.941261347922804, - "grad_norm": 1.6877564129465303, - "learning_rate": 3.6017627930687856e-08, - "loss": 0.9849, - "step": 7828 - }, - { - "epoch": 0.9413815908134432, - "grad_norm": 2.1039711008371667, - "learning_rate": 3.587060833304267e-08, - "loss": 0.996, - "step": 7829 - }, - { - "epoch": 0.9415018337040822, - "grad_norm": 2.194852298815787, - "learning_rate": 3.5723886692264225e-08, - "loss": 0.8673, - "step": 7830 - }, - { - "epoch": 0.9416220765947213, - "grad_norm": 2.3260335040315825, - "learning_rate": 3.557746303061071e-08, - "loss": 0.8464, - "step": 7831 - }, - { - "epoch": 0.9417423194853605, - "grad_norm": 1.6693409048559753, - "learning_rate": 3.543133737029391e-08, - "loss": 0.9552, - "step": 7832 - }, - { - "epoch": 0.9418625623759995, - "grad_norm": 2.2033747538233555, - "learning_rate": 3.5285509733481214e-08, - "loss": 0.9128, - "step": 7833 - }, - { - "epoch": 0.9419828052666386, - "grad_norm": 1.8077256692064845, - "learning_rate": 3.513998014229469e-08, - "loss": 0.997, - "step": 7834 - }, - { - "epoch": 0.9421030481572777, - "grad_norm": 1.9948788284369239, - "learning_rate": 3.499474861881069e-08, - "loss": 1.0931, - "step": 7835 - }, - { - "epoch": 0.9422232910479168, - "grad_norm": 2.0128720431263165, - "learning_rate": 3.4849815185061136e-08, - "loss": 0.9095, - "step": 7836 - }, - { - "epoch": 0.9423435339385559, - "grad_norm": 2.4319558303172393, - "learning_rate": 3.470517986303223e-08, - "loss": 0.9847, - "step": 7837 - }, - { - "epoch": 0.942463776829195, - "grad_norm": 2.1664084378440203, - "learning_rate": 3.4560842674664856e-08, - "loss": 1.0251, - "step": 7838 - }, - { - "epoch": 0.9425840197198341, - "grad_norm": 2.016910350148134, - "learning_rate": 3.441680364185506e-08, - "loss": 0.9848, - "step": 7839 - }, - { - "epoch": 0.9427042626104731, - "grad_norm": 3.906148314106165, - "learning_rate": 3.427306278645314e-08, - "loss": 0.9776, - "step": 7840 - }, - { - "epoch": 0.9428245055011123, - "grad_norm": 1.853042912072282, - "learning_rate": 3.4129620130264767e-08, - "loss": 0.9572, - "step": 7841 - }, - { - "epoch": 0.9429447483917514, - "grad_norm": 3.630891813119959, - "learning_rate": 3.398647569505009e-08, - "loss": 1.0183, - "step": 7842 - }, - { - "epoch": 0.9430649912823904, - "grad_norm": 2.3936941354472814, - "learning_rate": 3.384362950252373e-08, - "loss": 0.9712, - "step": 7843 - }, - { - "epoch": 0.9431852341730296, - "grad_norm": 2.609062861429862, - "learning_rate": 3.3701081574355473e-08, - "loss": 0.7952, - "step": 7844 - }, - { - "epoch": 0.9433054770636686, - "grad_norm": 0.8952939309529628, - "learning_rate": 3.3558831932169796e-08, - "loss": 0.7675, - "step": 7845 - }, - { - "epoch": 0.9434257199543077, - "grad_norm": 1.9320320990129047, - "learning_rate": 3.341688059754588e-08, - "loss": 1.1085, - "step": 7846 - }, - { - "epoch": 0.9435459628449467, - "grad_norm": 2.139434483397994, - "learning_rate": 3.327522759201762e-08, - "loss": 1.0016, - "step": 7847 - }, - { - "epoch": 0.9436662057355859, - "grad_norm": 2.082690966437923, - "learning_rate": 3.313387293707359e-08, - "loss": 0.8961, - "step": 7848 - }, - { - "epoch": 0.943786448626225, - "grad_norm": 4.978995837655581, - "learning_rate": 3.29928166541571e-08, - "loss": 0.9136, - "step": 7849 - }, - { - "epoch": 0.943906691516864, - "grad_norm": 1.836601364557955, - "learning_rate": 3.2852058764666346e-08, - "loss": 1.0304, - "step": 7850 - }, - { - "epoch": 0.9440269344075032, - "grad_norm": 1.5856778538322447, - "learning_rate": 3.2711599289954264e-08, - "loss": 0.9208, - "step": 7851 - }, - { - "epoch": 0.9441471772981422, - "grad_norm": 1.8141434552038491, - "learning_rate": 3.257143825132847e-08, - "loss": 1.0011, - "step": 7852 - }, - { - "epoch": 0.9442674201887813, - "grad_norm": 1.6822188354725736, - "learning_rate": 3.243157567005106e-08, - "loss": 0.9877, - "step": 7853 - }, - { - "epoch": 0.9443876630794205, - "grad_norm": 2.194923013366741, - "learning_rate": 3.2292011567339296e-08, - "loss": 0.8675, - "step": 7854 - }, - { - "epoch": 0.9445079059700595, - "grad_norm": 4.689474885088519, - "learning_rate": 3.21527459643649e-08, - "loss": 0.7956, - "step": 7855 - }, - { - "epoch": 0.9446281488606986, - "grad_norm": 2.0960529245217643, - "learning_rate": 3.2013778882254536e-08, - "loss": 0.9707, - "step": 7856 - }, - { - "epoch": 0.9447483917513377, - "grad_norm": 1.803410462206686, - "learning_rate": 3.1875110342088676e-08, - "loss": 0.9884, - "step": 7857 - }, - { - "epoch": 0.9448686346419768, - "grad_norm": 1.7118341891886095, - "learning_rate": 3.1736740364904035e-08, - "loss": 0.8844, - "step": 7858 - }, - { - "epoch": 0.9449888775326158, - "grad_norm": 2.183517318234119, - "learning_rate": 3.159866897169094e-08, - "loss": 0.9944, - "step": 7859 - }, - { - "epoch": 0.945109120423255, - "grad_norm": 1.684260897761542, - "learning_rate": 3.146089618339487e-08, - "loss": 0.9839, - "step": 7860 - }, - { - "epoch": 0.9452293633138941, - "grad_norm": 4.411738879558292, - "learning_rate": 3.132342202091554e-08, - "loss": 0.9091, - "step": 7861 - }, - { - "epoch": 0.9453496062045331, - "grad_norm": 2.1428789955114818, - "learning_rate": 3.1186246505107595e-08, - "loss": 0.9013, - "step": 7862 - }, - { - "epoch": 0.9454698490951723, - "grad_norm": 1.6520853764961119, - "learning_rate": 3.104936965678084e-08, - "loss": 1.0679, - "step": 7863 - }, - { - "epoch": 0.9455900919858113, - "grad_norm": 2.3614002150518605, - "learning_rate": 3.091279149669956e-08, - "loss": 1.0401, - "step": 7864 - }, - { - "epoch": 0.9457103348764504, - "grad_norm": 1.812089478929732, - "learning_rate": 3.0776512045581624e-08, - "loss": 0.9674, - "step": 7865 - }, - { - "epoch": 0.9458305777670896, - "grad_norm": 1.8155403944778294, - "learning_rate": 3.0640531324101384e-08, - "loss": 0.9997, - "step": 7866 - }, - { - "epoch": 0.9459508206577286, - "grad_norm": 1.811242292031051, - "learning_rate": 3.0504849352886554e-08, - "loss": 0.9871, - "step": 7867 - }, - { - "epoch": 0.9460710635483677, - "grad_norm": 2.6567447874707826, - "learning_rate": 3.036946615252023e-08, - "loss": 0.9432, - "step": 7868 - }, - { - "epoch": 0.9461913064390068, - "grad_norm": 1.9726932333321552, - "learning_rate": 3.0234381743539984e-08, - "loss": 0.8914, - "step": 7869 - }, - { - "epoch": 0.9463115493296459, - "grad_norm": 2.082824441427696, - "learning_rate": 3.0099596146437863e-08, - "loss": 1.0282, - "step": 7870 - }, - { - "epoch": 0.946431792220285, - "grad_norm": 0.79469247268159, - "learning_rate": 2.996510938166086e-08, - "loss": 0.8601, - "step": 7871 - }, - { - "epoch": 0.9465520351109241, - "grad_norm": 1.9426149102077777, - "learning_rate": 2.983092146960997e-08, - "loss": 0.9604, - "step": 7872 - }, - { - "epoch": 0.9466722780015632, - "grad_norm": 2.1908144228198783, - "learning_rate": 2.9697032430642256e-08, - "loss": 1.0267, - "step": 7873 - }, - { - "epoch": 0.9467925208922022, - "grad_norm": 3.080252594684096, - "learning_rate": 2.9563442285067906e-08, - "loss": 0.9649, - "step": 7874 - }, - { - "epoch": 0.9469127637828414, - "grad_norm": 1.9141007400118255, - "learning_rate": 2.943015105315294e-08, - "loss": 1.0259, - "step": 7875 - }, - { - "epoch": 0.9470330066734804, - "grad_norm": 2.534653624095534, - "learning_rate": 2.929715875511718e-08, - "loss": 0.8901, - "step": 7876 - }, - { - "epoch": 0.9471532495641195, - "grad_norm": 1.8171782135008643, - "learning_rate": 2.9164465411135375e-08, - "loss": 0.9268, - "step": 7877 - }, - { - "epoch": 0.9472734924547586, - "grad_norm": 2.1627422395539826, - "learning_rate": 2.9032071041337426e-08, - "loss": 1.0324, - "step": 7878 - }, - { - "epoch": 0.9473937353453977, - "grad_norm": 1.8552906472909556, - "learning_rate": 2.889997566580704e-08, - "loss": 0.9592, - "step": 7879 - }, - { - "epoch": 0.9475139782360368, - "grad_norm": 1.6096756293233656, - "learning_rate": 2.8768179304583086e-08, - "loss": 0.9259, - "step": 7880 - }, - { - "epoch": 0.9476342211266758, - "grad_norm": 1.5667546900241813, - "learning_rate": 2.8636681977659117e-08, - "loss": 0.9645, - "step": 7881 - }, - { - "epoch": 0.947754464017315, - "grad_norm": 4.848245417397859, - "learning_rate": 2.850548370498318e-08, - "loss": 1.007, - "step": 7882 - }, - { - "epoch": 0.9478747069079541, - "grad_norm": 1.5982664104342974, - "learning_rate": 2.8374584506457798e-08, - "loss": 0.9403, - "step": 7883 - }, - { - "epoch": 0.9479949497985931, - "grad_norm": 2.488419701094485, - "learning_rate": 2.824398440193998e-08, - "loss": 0.8977, - "step": 7884 - }, - { - "epoch": 0.9481151926892323, - "grad_norm": 2.028821579286812, - "learning_rate": 2.811368341124232e-08, - "loss": 0.9375, - "step": 7885 - }, - { - "epoch": 0.9482354355798713, - "grad_norm": 6.347820009906852, - "learning_rate": 2.7983681554131222e-08, - "loss": 0.9036, - "step": 7886 - }, - { - "epoch": 0.9483556784705104, - "grad_norm": 2.4069470473496333, - "learning_rate": 2.7853978850327365e-08, - "loss": 0.9299, - "step": 7887 - }, - { - "epoch": 0.9484759213611496, - "grad_norm": 1.7194712426037357, - "learning_rate": 2.7724575319507225e-08, - "loss": 1.1028, - "step": 7888 - }, - { - "epoch": 0.9485961642517886, - "grad_norm": 2.3183030183493476, - "learning_rate": 2.759547098130044e-08, - "loss": 1.0079, - "step": 7889 - }, - { - "epoch": 0.9487164071424277, - "grad_norm": 2.6710368844088084, - "learning_rate": 2.746666585529267e-08, - "loss": 0.9934, - "step": 7890 - }, - { - "epoch": 0.9488366500330668, - "grad_norm": 3.040882183618905, - "learning_rate": 2.73381599610234e-08, - "loss": 0.9644, - "step": 7891 - }, - { - "epoch": 0.9489568929237059, - "grad_norm": 1.7606579617091682, - "learning_rate": 2.7209953317987033e-08, - "loss": 0.9408, - "step": 7892 - }, - { - "epoch": 0.9490771358143449, - "grad_norm": 2.1247795214996144, - "learning_rate": 2.7082045945631793e-08, - "loss": 1.0014, - "step": 7893 - }, - { - "epoch": 0.9491973787049841, - "grad_norm": 2.3695857926256396, - "learning_rate": 2.6954437863361712e-08, - "loss": 0.9214, - "step": 7894 - }, - { - "epoch": 0.9493176215956232, - "grad_norm": 2.2896782889791973, - "learning_rate": 2.6827129090534862e-08, - "loss": 0.9401, - "step": 7895 - }, - { - "epoch": 0.9494378644862622, - "grad_norm": 2.261629356501946, - "learning_rate": 2.670011964646335e-08, - "loss": 1.0108, - "step": 7896 - }, - { - "epoch": 0.9495581073769014, - "grad_norm": 5.524542410703012, - "learning_rate": 2.657340955041487e-08, - "loss": 0.9162, - "step": 7897 - }, - { - "epoch": 0.9496783502675404, - "grad_norm": 2.1828922243516207, - "learning_rate": 2.6446998821611167e-08, - "loss": 0.9437, - "step": 7898 - }, - { - "epoch": 0.9497985931581795, - "grad_norm": 3.498149927816198, - "learning_rate": 2.6320887479228228e-08, - "loss": 0.9486, - "step": 7899 - }, - { - "epoch": 0.9499188360488187, - "grad_norm": 2.173296346681245, - "learning_rate": 2.619507554239786e-08, - "loss": 0.956, - "step": 7900 - }, - { - "epoch": 0.9500390789394577, - "grad_norm": 1.8686676543091745, - "learning_rate": 2.606956303020502e-08, - "loss": 0.9355, - "step": 7901 - }, - { - "epoch": 0.9501593218300968, - "grad_norm": 6.4842123355915104, - "learning_rate": 2.5944349961690036e-08, - "loss": 1.0646, - "step": 7902 - }, - { - "epoch": 0.9502795647207359, - "grad_norm": 3.101864586250984, - "learning_rate": 2.581943635584749e-08, - "loss": 0.9611, - "step": 7903 - }, - { - "epoch": 0.950399807611375, - "grad_norm": 1.89969772142841, - "learning_rate": 2.569482223162689e-08, - "loss": 0.8909, - "step": 7904 - }, - { - "epoch": 0.950520050502014, - "grad_norm": 1.6008484527943572, - "learning_rate": 2.5570507607932e-08, - "loss": 0.9544, - "step": 7905 - }, - { - "epoch": 0.9506402933926532, - "grad_norm": 2.9643745704504187, - "learning_rate": 2.54464925036213e-08, - "loss": 0.849, - "step": 7906 - }, - { - "epoch": 0.9507605362832923, - "grad_norm": 2.1782890400351502, - "learning_rate": 2.532277693750773e-08, - "loss": 0.8316, - "step": 7907 - }, - { - "epoch": 0.9508807791739313, - "grad_norm": 3.1127036215731163, - "learning_rate": 2.5199360928358948e-08, - "loss": 0.9835, - "step": 7908 - }, - { - "epoch": 0.9510010220645704, - "grad_norm": 1.7116342458406304, - "learning_rate": 2.507624449489665e-08, - "loss": 1.0984, - "step": 7909 - }, - { - "epoch": 0.9511212649552095, - "grad_norm": 2.05206607353065, - "learning_rate": 2.495342765579811e-08, - "loss": 0.8801, - "step": 7910 - }, - { - "epoch": 0.9512415078458486, - "grad_norm": 1.6943927854296117, - "learning_rate": 2.4830910429693984e-08, - "loss": 0.9431, - "step": 7911 - }, - { - "epoch": 0.9513617507364877, - "grad_norm": 1.8733049996474418, - "learning_rate": 2.470869283517052e-08, - "loss": 1.0225, - "step": 7912 - }, - { - "epoch": 0.9514819936271268, - "grad_norm": 1.8947366819395588, - "learning_rate": 2.458677489076777e-08, - "loss": 0.9997, - "step": 7913 - }, - { - "epoch": 0.9516022365177659, - "grad_norm": 1.6979861645039402, - "learning_rate": 2.446515661498072e-08, - "loss": 1.0627, - "step": 7914 - }, - { - "epoch": 0.9517224794084049, - "grad_norm": 2.242663396115513, - "learning_rate": 2.434383802625861e-08, - "loss": 0.9679, - "step": 7915 - }, - { - "epoch": 0.9518427222990441, - "grad_norm": 1.807618638362127, - "learning_rate": 2.4222819143005168e-08, - "loss": 0.966, - "step": 7916 - }, - { - "epoch": 0.9519629651896832, - "grad_norm": 2.2872010040822714, - "learning_rate": 2.4102099983579706e-08, - "loss": 1.0413, - "step": 7917 - }, - { - "epoch": 0.9520832080803222, - "grad_norm": 1.5849135527517093, - "learning_rate": 2.3981680566294236e-08, - "loss": 0.9888, - "step": 7918 - }, - { - "epoch": 0.9522034509709614, - "grad_norm": 1.767404250555484, - "learning_rate": 2.3861560909416822e-08, - "loss": 0.9622, - "step": 7919 - }, - { - "epoch": 0.9523236938616004, - "grad_norm": 2.1288430860755887, - "learning_rate": 2.3741741031169325e-08, - "loss": 1.0518, - "step": 7920 - }, - { - "epoch": 0.9524439367522395, - "grad_norm": 1.9001225030457407, - "learning_rate": 2.3622220949728544e-08, - "loss": 0.9364, - "step": 7921 - }, - { - "epoch": 0.9525641796428787, - "grad_norm": 3.038764056381549, - "learning_rate": 2.3503000683225526e-08, - "loss": 0.8385, - "step": 7922 - }, - { - "epoch": 0.9526844225335177, - "grad_norm": 2.004237335397268, - "learning_rate": 2.3384080249745585e-08, - "loss": 1.067, - "step": 7923 - }, - { - "epoch": 0.9528046654241568, - "grad_norm": 2.5482436171266207, - "learning_rate": 2.3265459667329178e-08, - "loss": 1.0674, - "step": 7924 - }, - { - "epoch": 0.9529249083147959, - "grad_norm": 2.126482278290325, - "learning_rate": 2.31471389539708e-08, - "loss": 1.0959, - "step": 7925 - }, - { - "epoch": 0.953045151205435, - "grad_norm": 3.4005213169528554, - "learning_rate": 2.3029118127619872e-08, - "loss": 0.9553, - "step": 7926 - }, - { - "epoch": 0.953165394096074, - "grad_norm": 2.0488944538387504, - "learning_rate": 2.2911397206179628e-08, - "loss": 1.0965, - "step": 7927 - }, - { - "epoch": 0.9532856369867132, - "grad_norm": 2.6782120181961697, - "learning_rate": 2.279397620750845e-08, - "loss": 0.8541, - "step": 7928 - }, - { - "epoch": 0.9534058798773523, - "grad_norm": 2.664783726946967, - "learning_rate": 2.2676855149419195e-08, - "loss": 1.0188, - "step": 7929 - }, - { - "epoch": 0.9535261227679913, - "grad_norm": 2.6307421957652624, - "learning_rate": 2.2560034049678988e-08, - "loss": 0.9777, - "step": 7930 - }, - { - "epoch": 0.9536463656586305, - "grad_norm": 1.81188401903248, - "learning_rate": 2.2443512926008988e-08, - "loss": 0.9843, - "step": 7931 - }, - { - "epoch": 0.9537666085492695, - "grad_norm": 2.291003553223737, - "learning_rate": 2.2327291796085946e-08, - "loss": 0.9325, - "step": 7932 - }, - { - "epoch": 0.9538868514399086, - "grad_norm": 3.524237903432144, - "learning_rate": 2.2211370677540197e-08, - "loss": 1.0056, - "step": 7933 - }, - { - "epoch": 0.9540070943305478, - "grad_norm": 2.1587913022400285, - "learning_rate": 2.2095749587957012e-08, - "loss": 1.0076, - "step": 7934 - }, - { - "epoch": 0.9541273372211868, - "grad_norm": 1.7000378267904248, - "learning_rate": 2.1980428544876138e-08, - "loss": 0.9227, - "step": 7935 - }, - { - "epoch": 0.9542475801118259, - "grad_norm": 1.468888377711992, - "learning_rate": 2.1865407565791584e-08, - "loss": 0.9663, - "step": 7936 - }, - { - "epoch": 0.954367823002465, - "grad_norm": 2.516301887382761, - "learning_rate": 2.175068666815183e-08, - "loss": 1.002, - "step": 7937 - }, - { - "epoch": 0.9544880658931041, - "grad_norm": 2.8633263806875577, - "learning_rate": 2.163626586935985e-08, - "loss": 1.0204, - "step": 7938 - }, - { - "epoch": 0.9546083087837431, - "grad_norm": 10.43990494666547, - "learning_rate": 2.1522145186773755e-08, - "loss": 0.8556, - "step": 7939 - }, - { - "epoch": 0.9547285516743822, - "grad_norm": 2.141929396113081, - "learning_rate": 2.140832463770481e-08, - "loss": 1.0841, - "step": 7940 - }, - { - "epoch": 0.9548487945650214, - "grad_norm": 2.3559125542271904, - "learning_rate": 2.129480423941987e-08, - "loss": 0.9849, - "step": 7941 - }, - { - "epoch": 0.9549690374556604, - "grad_norm": 1.8217422044361349, - "learning_rate": 2.1181584009140052e-08, - "loss": 1.0334, - "step": 7942 - }, - { - "epoch": 0.9550892803462995, - "grad_norm": 4.125014333989063, - "learning_rate": 2.10686639640405e-08, - "loss": 1.0645, - "step": 7943 - }, - { - "epoch": 0.9552095232369386, - "grad_norm": 1.9351414507044749, - "learning_rate": 2.0956044121251294e-08, - "loss": 1.044, - "step": 7944 - }, - { - "epoch": 0.9553297661275777, - "grad_norm": 2.060177391154976, - "learning_rate": 2.084372449785654e-08, - "loss": 1.0437, - "step": 7945 - }, - { - "epoch": 0.9554500090182168, - "grad_norm": 1.6550382103787575, - "learning_rate": 2.0731705110895282e-08, - "loss": 0.908, - "step": 7946 - }, - { - "epoch": 0.9555702519088559, - "grad_norm": 1.856697783743022, - "learning_rate": 2.0619985977360587e-08, - "loss": 1.1006, - "step": 7947 - }, - { - "epoch": 0.955690494799495, - "grad_norm": 2.4627390246883945, - "learning_rate": 2.0508567114200237e-08, - "loss": 1.0043, - "step": 7948 - }, - { - "epoch": 0.955810737690134, - "grad_norm": 2.6749289387328608, - "learning_rate": 2.0397448538316485e-08, - "loss": 1.0115, - "step": 7949 - }, - { - "epoch": 0.9559309805807732, - "grad_norm": 2.190296456464217, - "learning_rate": 2.028663026656563e-08, - "loss": 0.8882, - "step": 7950 - }, - { - "epoch": 0.9560512234714122, - "grad_norm": 2.1452386519689055, - "learning_rate": 2.0176112315758885e-08, - "loss": 0.9444, - "step": 7951 - }, - { - "epoch": 0.9561714663620513, - "grad_norm": 7.503437017636433, - "learning_rate": 2.0065894702661957e-08, - "loss": 0.9275, - "step": 7952 - }, - { - "epoch": 0.9562917092526905, - "grad_norm": 1.5918024917222608, - "learning_rate": 1.9955977443994577e-08, - "loss": 1.0073, - "step": 7953 - }, - { - "epoch": 0.9564119521433295, - "grad_norm": 2.6155470532250567, - "learning_rate": 1.9846360556430965e-08, - "loss": 0.8642, - "step": 7954 - }, - { - "epoch": 0.9565321950339686, - "grad_norm": 3.9955366982521983, - "learning_rate": 1.973704405660004e-08, - "loss": 0.8488, - "step": 7955 - }, - { - "epoch": 0.9566524379246077, - "grad_norm": 1.5002965633957865, - "learning_rate": 1.9628027961085203e-08, - "loss": 1.0046, - "step": 7956 - }, - { - "epoch": 0.9567726808152468, - "grad_norm": 1.9755074923408724, - "learning_rate": 1.9519312286423894e-08, - "loss": 1.064, - "step": 7957 - }, - { - "epoch": 0.9568929237058859, - "grad_norm": 2.0238588108077065, - "learning_rate": 1.9410897049108255e-08, - "loss": 1.0005, - "step": 7958 - }, - { - "epoch": 0.957013166596525, - "grad_norm": 1.9693718185568596, - "learning_rate": 1.9302782265584905e-08, - "loss": 1.1425, - "step": 7959 - }, - { - "epoch": 0.9571334094871641, - "grad_norm": 2.100373566416164, - "learning_rate": 1.9194967952254282e-08, - "loss": 1.093, - "step": 7960 - }, - { - "epoch": 0.9572536523778031, - "grad_norm": 2.314252447128032, - "learning_rate": 1.9087454125472635e-08, - "loss": 1.0316, - "step": 7961 - }, - { - "epoch": 0.9573738952684423, - "grad_norm": 1.8882478003168999, - "learning_rate": 1.8980240801548696e-08, - "loss": 1.0159, - "step": 7962 - }, - { - "epoch": 0.9574941381590814, - "grad_norm": 2.032827263792407, - "learning_rate": 1.8873327996747458e-08, - "loss": 0.9703, - "step": 7963 - }, - { - "epoch": 0.9576143810497204, - "grad_norm": 1.686522589298426, - "learning_rate": 1.8766715727287053e-08, - "loss": 0.8961, - "step": 7964 - }, - { - "epoch": 0.9577346239403596, - "grad_norm": 2.2241643472228017, - "learning_rate": 1.8660404009340546e-08, - "loss": 1.014, - "step": 7965 - }, - { - "epoch": 0.9578548668309986, - "grad_norm": 0.9216603174617487, - "learning_rate": 1.8554392859035485e-08, - "loss": 0.8974, - "step": 7966 - }, - { - "epoch": 0.9579751097216377, - "grad_norm": 1.7053776114240466, - "learning_rate": 1.8448682292453444e-08, - "loss": 1.0198, - "step": 7967 - }, - { - "epoch": 0.9580953526122769, - "grad_norm": 2.020719610206352, - "learning_rate": 1.8343272325631154e-08, - "loss": 0.8968, - "step": 7968 - }, - { - "epoch": 0.9582155955029159, - "grad_norm": 2.407602103693522, - "learning_rate": 1.8238162974558492e-08, - "loss": 1.0072, - "step": 7969 - }, - { - "epoch": 0.958335838393555, - "grad_norm": 2.6727232897038173, - "learning_rate": 1.8133354255181144e-08, - "loss": 0.976, - "step": 7970 - }, - { - "epoch": 0.958456081284194, - "grad_norm": 2.0422944511538055, - "learning_rate": 1.802884618339795e-08, - "loss": 0.9851, - "step": 7971 - }, - { - "epoch": 0.9585763241748332, - "grad_norm": 2.2812143610430504, - "learning_rate": 1.7924638775062894e-08, - "loss": 1.0385, - "step": 7972 - }, - { - "epoch": 0.9586965670654722, - "grad_norm": 2.080754960671694, - "learning_rate": 1.7820732045984444e-08, - "loss": 1.047, - "step": 7973 - }, - { - "epoch": 0.9588168099561113, - "grad_norm": 1.8085772058377017, - "learning_rate": 1.7717126011924655e-08, - "loss": 0.9679, - "step": 7974 - }, - { - "epoch": 0.9589370528467505, - "grad_norm": 3.02228486714543, - "learning_rate": 1.7613820688600957e-08, - "loss": 0.9947, - "step": 7975 - }, - { - "epoch": 0.9590572957373895, - "grad_norm": 1.7220319451393944, - "learning_rate": 1.7510816091684588e-08, - "loss": 1.0183, - "step": 7976 - }, - { - "epoch": 0.9591775386280286, - "grad_norm": 2.8894431507639293, - "learning_rate": 1.740811223680083e-08, - "loss": 1.0231, - "step": 7977 - }, - { - "epoch": 0.9592977815186677, - "grad_norm": 2.332374943593427, - "learning_rate": 1.7305709139530334e-08, - "loss": 0.965, - "step": 7978 - }, - { - "epoch": 0.9594180244093068, - "grad_norm": 2.247503803322091, - "learning_rate": 1.7203606815407334e-08, - "loss": 0.9812, - "step": 7979 - }, - { - "epoch": 0.9595382672999458, - "grad_norm": 1.81157160692359, - "learning_rate": 1.7101805279920557e-08, - "loss": 1.0251, - "step": 7980 - }, - { - "epoch": 0.959658510190585, - "grad_norm": 1.9796432189083775, - "learning_rate": 1.7000304548513643e-08, - "loss": 1.0467, - "step": 7981 - }, - { - "epoch": 0.9597787530812241, - "grad_norm": 2.481810775921087, - "learning_rate": 1.6899104636583394e-08, - "loss": 1.0523, - "step": 7982 - }, - { - "epoch": 0.9598989959718631, - "grad_norm": 0.811878516855471, - "learning_rate": 1.6798205559482638e-08, - "loss": 0.8852, - "step": 7983 - }, - { - "epoch": 0.9600192388625023, - "grad_norm": 1.6632500039494416, - "learning_rate": 1.669760733251713e-08, - "loss": 0.9961, - "step": 7984 - }, - { - "epoch": 0.9601394817531413, - "grad_norm": 1.7492587018103911, - "learning_rate": 1.659730997094755e-08, - "loss": 1.0576, - "step": 7985 - }, - { - "epoch": 0.9602597246437804, - "grad_norm": 1.852003600296245, - "learning_rate": 1.6497313489989283e-08, - "loss": 0.85, - "step": 7986 - }, - { - "epoch": 0.9603799675344196, - "grad_norm": 3.8563505978809625, - "learning_rate": 1.639761790481131e-08, - "loss": 0.9237, - "step": 7987 - }, - { - "epoch": 0.9605002104250586, - "grad_norm": 1.9537667402744965, - "learning_rate": 1.6298223230537754e-08, - "loss": 1.0202, - "step": 7988 - }, - { - "epoch": 0.9606204533156977, - "grad_norm": 4.568369812808726, - "learning_rate": 1.619912948224611e-08, - "loss": 0.9262, - "step": 7989 - }, - { - "epoch": 0.9607406962063368, - "grad_norm": 2.468410033949612, - "learning_rate": 1.6100336674969682e-08, - "loss": 0.8317, - "step": 7990 - }, - { - "epoch": 0.9608609390969759, - "grad_norm": 1.7623066058820116, - "learning_rate": 1.600184482369449e-08, - "loss": 1.002, - "step": 7991 - }, - { - "epoch": 0.960981181987615, - "grad_norm": 2.2957569956564368, - "learning_rate": 1.5903653943362126e-08, - "loss": 1.1247, - "step": 7992 - }, - { - "epoch": 0.9611014248782541, - "grad_norm": 1.8212397755353538, - "learning_rate": 1.580576404886802e-08, - "loss": 0.9904, - "step": 7993 - }, - { - "epoch": 0.9612216677688932, - "grad_norm": 4.341757337876665, - "learning_rate": 1.570817515506162e-08, - "loss": 1.0318, - "step": 7994 - }, - { - "epoch": 0.9613419106595322, - "grad_norm": 1.8561797697094529, - "learning_rate": 1.561088727674753e-08, - "loss": 1.0407, - "step": 7995 - }, - { - "epoch": 0.9614621535501714, - "grad_norm": 2.284664785892873, - "learning_rate": 1.551390042868417e-08, - "loss": 0.9408, - "step": 7996 - }, - { - "epoch": 0.9615823964408104, - "grad_norm": 3.3514216466796607, - "learning_rate": 1.5417214625584207e-08, - "loss": 0.9362, - "step": 7997 - }, - { - "epoch": 0.9617026393314495, - "grad_norm": 1.6436210523728092, - "learning_rate": 1.5320829882114806e-08, - "loss": 1.0778, - "step": 7998 - }, - { - "epoch": 0.9618228822220887, - "grad_norm": 1.9084502482796588, - "learning_rate": 1.5224746212897378e-08, - "loss": 1.0152, - "step": 7999 - }, - { - "epoch": 0.9619431251127277, - "grad_norm": 1.7437965250132235, - "learning_rate": 1.512896363250804e-08, - "loss": 0.9976, - "step": 8000 - }, - { - "epoch": 0.9620633680033668, - "grad_norm": 1.8362127219532767, - "learning_rate": 1.503348215547673e-08, - "loss": 0.987, - "step": 8001 - }, - { - "epoch": 0.962183610894006, - "grad_norm": 1.7946703447136072, - "learning_rate": 1.4938301796288078e-08, - "loss": 1.0324, - "step": 8002 - }, - { - "epoch": 0.962303853784645, - "grad_norm": 2.575503224208845, - "learning_rate": 1.4843422569380537e-08, - "loss": 1.0565, - "step": 8003 - }, - { - "epoch": 0.9624240966752841, - "grad_norm": 1.7898192824877164, - "learning_rate": 1.4748844489147483e-08, - "loss": 1.0561, - "step": 8004 - }, - { - "epoch": 0.9625443395659231, - "grad_norm": 5.32544576005943, - "learning_rate": 1.4654567569936326e-08, - "loss": 0.9402, - "step": 8005 - }, - { - "epoch": 0.9626645824565623, - "grad_norm": 2.9326362113831013, - "learning_rate": 1.456059182604874e-08, - "loss": 1.0568, - "step": 8006 - }, - { - "epoch": 0.9627848253472013, - "grad_norm": 2.7014485711895353, - "learning_rate": 1.4466917271740653e-08, - "loss": 0.9849, - "step": 8007 - }, - { - "epoch": 0.9629050682378404, - "grad_norm": 2.2367557033586767, - "learning_rate": 1.4373543921222697e-08, - "loss": 0.9069, - "step": 8008 - }, - { - "epoch": 0.9630253111284796, - "grad_norm": 2.0931993585991964, - "learning_rate": 1.428047178865932e-08, - "loss": 1.0115, - "step": 8009 - }, - { - "epoch": 0.9631455540191186, - "grad_norm": 1.7088966216309374, - "learning_rate": 1.4187700888169451e-08, - "loss": 0.9731, - "step": 8010 - }, - { - "epoch": 0.9632657969097577, - "grad_norm": 0.8732990694301185, - "learning_rate": 1.40952312338265e-08, - "loss": 0.8795, - "step": 8011 - }, - { - "epoch": 0.9633860398003968, - "grad_norm": 1.728621002261053, - "learning_rate": 1.4003062839657909e-08, - "loss": 0.9133, - "step": 8012 - }, - { - "epoch": 0.9635062826910359, - "grad_norm": 1.7551653787882704, - "learning_rate": 1.391119571964583e-08, - "loss": 1.0261, - "step": 8013 - }, - { - "epoch": 0.9636265255816749, - "grad_norm": 1.9001836085696706, - "learning_rate": 1.3819629887726225e-08, - "loss": 0.9586, - "step": 8014 - }, - { - "epoch": 0.9637467684723141, - "grad_norm": 1.8446070085581179, - "learning_rate": 1.3728365357789317e-08, - "loss": 0.9956, - "step": 8015 - }, - { - "epoch": 0.9638670113629532, - "grad_norm": 3.6620485672034895, - "learning_rate": 1.3637402143680254e-08, - "loss": 0.9959, - "step": 8016 - }, - { - "epoch": 0.9639872542535922, - "grad_norm": 0.7679113387945692, - "learning_rate": 1.3546740259197998e-08, - "loss": 0.8048, - "step": 8017 - }, - { - "epoch": 0.9641074971442314, - "grad_norm": 2.3586836048155617, - "learning_rate": 1.3456379718095989e-08, - "loss": 0.9278, - "step": 8018 - }, - { - "epoch": 0.9642277400348704, - "grad_norm": 0.8732796514431295, - "learning_rate": 1.3366320534081487e-08, - "loss": 0.8836, - "step": 8019 - }, - { - "epoch": 0.9643479829255095, - "grad_norm": 3.839066792580018, - "learning_rate": 1.3276562720816675e-08, - "loss": 0.9799, - "step": 8020 - }, - { - "epoch": 0.9644682258161487, - "grad_norm": 3.27976403876306, - "learning_rate": 1.3187106291917549e-08, - "loss": 1.0577, - "step": 8021 - }, - { - "epoch": 0.9645884687067877, - "grad_norm": 4.578686155748371, - "learning_rate": 1.309795126095503e-08, - "loss": 0.9294, - "step": 8022 - }, - { - "epoch": 0.9647087115974268, - "grad_norm": 2.2987147637923893, - "learning_rate": 1.3009097641453192e-08, - "loss": 1.035, - "step": 8023 - }, - { - "epoch": 0.9648289544880659, - "grad_norm": 1.775705911739146, - "learning_rate": 1.2920545446891474e-08, - "loss": 0.9908, - "step": 8024 - }, - { - "epoch": 0.964949197378705, - "grad_norm": 1.7738791432590684, - "learning_rate": 1.2832294690703127e-08, - "loss": 0.9286, - "step": 8025 - }, - { - "epoch": 0.965069440269344, - "grad_norm": 2.1031538033178068, - "learning_rate": 1.2744345386275668e-08, - "loss": 1.0024, - "step": 8026 - }, - { - "epoch": 0.9651896831599832, - "grad_norm": 1.6229621874293811, - "learning_rate": 1.265669754695109e-08, - "loss": 1.008, - "step": 8027 - }, - { - "epoch": 0.9653099260506223, - "grad_norm": 1.9439785838262211, - "learning_rate": 1.2569351186025201e-08, - "loss": 1.0495, - "step": 8028 - }, - { - "epoch": 0.9654301689412613, - "grad_norm": 1.5537157712006442, - "learning_rate": 1.2482306316748737e-08, - "loss": 0.9768, - "step": 8029 - }, - { - "epoch": 0.9655504118319005, - "grad_norm": 1.7270286931811303, - "learning_rate": 1.2395562952326021e-08, - "loss": 1.0136, - "step": 8030 - }, - { - "epoch": 0.9656706547225395, - "grad_norm": 2.1263989724118724, - "learning_rate": 1.2309121105916309e-08, - "loss": 1.0378, - "step": 8031 - }, - { - "epoch": 0.9657908976131786, - "grad_norm": 3.1429290670364107, - "learning_rate": 1.222298079063222e-08, - "loss": 0.9137, - "step": 8032 - }, - { - "epoch": 0.9659111405038178, - "grad_norm": 1.905291832424976, - "learning_rate": 1.2137142019541524e-08, - "loss": 0.9562, - "step": 8033 - }, - { - "epoch": 0.9660313833944568, - "grad_norm": 2.3609755184159726, - "learning_rate": 1.2051604805666027e-08, - "loss": 0.9667, - "step": 8034 - }, - { - "epoch": 0.9661516262850959, - "grad_norm": 2.622816721916096, - "learning_rate": 1.196636916198135e-08, - "loss": 1.0134, - "step": 8035 - }, - { - "epoch": 0.9662718691757349, - "grad_norm": 1.984091971330677, - "learning_rate": 1.1881435101418036e-08, - "loss": 1.0039, - "step": 8036 - }, - { - "epoch": 0.9663921120663741, - "grad_norm": 0.772233237406005, - "learning_rate": 1.1796802636860003e-08, - "loss": 0.9346, - "step": 8037 - }, - { - "epoch": 0.9665123549570132, - "grad_norm": 3.3929460728972494, - "learning_rate": 1.1712471781146316e-08, - "loss": 0.9576, - "step": 8038 - }, - { - "epoch": 0.9666325978476522, - "grad_norm": 2.0172694395695605, - "learning_rate": 1.1628442547069628e-08, - "loss": 0.903, - "step": 8039 - }, - { - "epoch": 0.9667528407382914, - "grad_norm": 2.469825993018742, - "learning_rate": 1.1544714947377521e-08, - "loss": 0.9968, - "step": 8040 - }, - { - "epoch": 0.9668730836289304, - "grad_norm": 2.5043386005739046, - "learning_rate": 1.1461288994770945e-08, - "loss": 0.9281, - "step": 8041 - }, - { - "epoch": 0.9669933265195695, - "grad_norm": 2.109013392218245, - "learning_rate": 1.1378164701906002e-08, - "loss": 1.0043, - "step": 8042 - }, - { - "epoch": 0.9671135694102087, - "grad_norm": 1.8462723757369999, - "learning_rate": 1.1295342081392156e-08, - "loss": 0.892, - "step": 8043 - }, - { - "epoch": 0.9672338123008477, - "grad_norm": 1.55021191963335, - "learning_rate": 1.1212821145793804e-08, - "loss": 0.9181, - "step": 8044 - }, - { - "epoch": 0.9673540551914868, - "grad_norm": 1.8962628503312473, - "learning_rate": 1.1130601907629156e-08, - "loss": 1.0168, - "step": 8045 - }, - { - "epoch": 0.9674742980821259, - "grad_norm": 0.8389077937253524, - "learning_rate": 1.1048684379370899e-08, - "loss": 0.907, - "step": 8046 - }, - { - "epoch": 0.967594540972765, - "grad_norm": 2.310139972524927, - "learning_rate": 1.0967068573445759e-08, - "loss": 0.9756, - "step": 8047 - }, - { - "epoch": 0.967714783863404, - "grad_norm": 2.326943219189246, - "learning_rate": 1.0885754502234945e-08, - "loss": 0.8784, - "step": 8048 - }, - { - "epoch": 0.9678350267540432, - "grad_norm": 2.2996371522045203, - "learning_rate": 1.08047421780737e-08, - "loss": 1.0121, - "step": 8049 - }, - { - "epoch": 0.9679552696446823, - "grad_norm": 3.3658825675711266, - "learning_rate": 1.0724031613251305e-08, - "loss": 0.9691, - "step": 8050 - }, - { - "epoch": 0.9680755125353213, - "grad_norm": 2.6048607981972127, - "learning_rate": 1.0643622820011744e-08, - "loss": 0.8876, - "step": 8051 - }, - { - "epoch": 0.9681957554259605, - "grad_norm": 2.1762988366919913, - "learning_rate": 1.0563515810552814e-08, - "loss": 0.9128, - "step": 8052 - }, - { - "epoch": 0.9683159983165995, - "grad_norm": 1.4332662514950112, - "learning_rate": 1.0483710597026795e-08, - "loss": 0.9646, - "step": 8053 - }, - { - "epoch": 0.9684362412072386, - "grad_norm": 15.717679239436976, - "learning_rate": 1.0404207191540227e-08, - "loss": 0.9605, - "step": 8054 - }, - { - "epoch": 0.9685564840978778, - "grad_norm": 2.361676741771255, - "learning_rate": 1.0325005606153236e-08, - "loss": 0.9814, - "step": 8055 - }, - { - "epoch": 0.9686767269885168, - "grad_norm": 2.7938497367652997, - "learning_rate": 1.0246105852881104e-08, - "loss": 1.0155, - "step": 8056 - }, - { - "epoch": 0.9687969698791559, - "grad_norm": 1.921987142625053, - "learning_rate": 1.0167507943692476e-08, - "loss": 1.0162, - "step": 8057 - }, - { - "epoch": 0.968917212769795, - "grad_norm": 2.7133325556004917, - "learning_rate": 1.008921189051093e-08, - "loss": 0.9449, - "step": 8058 - }, - { - "epoch": 0.9690374556604341, - "grad_norm": 2.2907080870751844, - "learning_rate": 1.0011217705213848e-08, - "loss": 0.9991, - "step": 8059 - }, - { - "epoch": 0.9691576985510731, - "grad_norm": 1.9304962934306775, - "learning_rate": 9.933525399632658e-09, - "loss": 0.9826, - "step": 8060 - }, - { - "epoch": 0.9692779414417123, - "grad_norm": 1.7279686358660753, - "learning_rate": 9.856134985553488e-09, - "loss": 0.8839, - "step": 8061 - }, - { - "epoch": 0.9693981843323514, - "grad_norm": 1.979568788601738, - "learning_rate": 9.77904647471628e-09, - "loss": 0.959, - "step": 8062 - }, - { - "epoch": 0.9695184272229904, - "grad_norm": 1.7441180698692067, - "learning_rate": 9.702259878815454e-09, - "loss": 0.9658, - "step": 8063 - }, - { - "epoch": 0.9696386701136296, - "grad_norm": 2.0672934345715905, - "learning_rate": 9.625775209499254e-09, - "loss": 0.9742, - "step": 8064 - }, - { - "epoch": 0.9697589130042686, - "grad_norm": 2.204524468247861, - "learning_rate": 9.549592478370172e-09, - "loss": 0.97, - "step": 8065 - }, - { - "epoch": 0.9698791558949077, - "grad_norm": 1.813307220654308, - "learning_rate": 9.473711696985632e-09, - "loss": 1.0268, - "step": 8066 - }, - { - "epoch": 0.9699993987855468, - "grad_norm": 2.158147778383933, - "learning_rate": 9.398132876856201e-09, - "loss": 0.9824, - "step": 8067 - }, - { - "epoch": 0.9701196416761859, - "grad_norm": 0.7812667463415145, - "learning_rate": 9.322856029447379e-09, - "loss": 0.8765, - "step": 8068 - }, - { - "epoch": 0.970239884566825, - "grad_norm": 2.529946073779137, - "learning_rate": 9.247881166178695e-09, - "loss": 1.0288, - "step": 8069 - }, - { - "epoch": 0.970360127457464, - "grad_norm": 3.0754945620912153, - "learning_rate": 9.173208298423274e-09, - "loss": 0.9995, - "step": 8070 - }, - { - "epoch": 0.9704803703481032, - "grad_norm": 1.6967182571771546, - "learning_rate": 9.09883743750961e-09, - "loss": 0.9921, - "step": 8071 - }, - { - "epoch": 0.9706006132387422, - "grad_norm": 1.7847912037436462, - "learning_rate": 9.024768594719124e-09, - "loss": 1.0633, - "step": 8072 - }, - { - "epoch": 0.9707208561293813, - "grad_norm": 2.1097762114823033, - "learning_rate": 8.95100178128816e-09, - "loss": 0.952, - "step": 8073 - }, - { - "epoch": 0.9708410990200205, - "grad_norm": 1.9943298788056167, - "learning_rate": 8.877537008407321e-09, - "loss": 0.9337, - "step": 8074 - }, - { - "epoch": 0.9709613419106595, - "grad_norm": 2.1546297481949517, - "learning_rate": 8.804374287221028e-09, - "loss": 0.9101, - "step": 8075 - }, - { - "epoch": 0.9710815848012986, - "grad_norm": 1.6470096424517682, - "learning_rate": 8.731513628827958e-09, - "loss": 1.0798, - "step": 8076 - }, - { - "epoch": 0.9712018276919377, - "grad_norm": 2.3828541236118634, - "learning_rate": 8.658955044280825e-09, - "loss": 1.0537, - "step": 8077 - }, - { - "epoch": 0.9713220705825768, - "grad_norm": 1.5375961831874096, - "learning_rate": 8.586698544587268e-09, - "loss": 1.0003, - "step": 8078 - }, - { - "epoch": 0.9714423134732159, - "grad_norm": 2.9079274904532757, - "learning_rate": 8.514744140707853e-09, - "loss": 0.9714, - "step": 8079 - }, - { - "epoch": 0.971562556363855, - "grad_norm": 1.6421784085821503, - "learning_rate": 8.443091843558515e-09, - "loss": 0.9918, - "step": 8080 - }, - { - "epoch": 0.9716827992544941, - "grad_norm": 3.8934706733160542, - "learning_rate": 8.37174166400878e-09, - "loss": 0.8807, - "step": 8081 - }, - { - "epoch": 0.9718030421451331, - "grad_norm": 1.8709061116404555, - "learning_rate": 8.300693612881992e-09, - "loss": 1.0813, - "step": 8082 - }, - { - "epoch": 0.9719232850357723, - "grad_norm": 1.9489204010227572, - "learning_rate": 8.22994770095664e-09, - "loss": 1.0302, - "step": 8083 - }, - { - "epoch": 0.9720435279264114, - "grad_norm": 2.343898717612451, - "learning_rate": 8.159503938964585e-09, - "loss": 0.9804, - "step": 8084 - }, - { - "epoch": 0.9721637708170504, - "grad_norm": 1.889957341615956, - "learning_rate": 8.089362337592164e-09, - "loss": 0.9371, - "step": 8085 - }, - { - "epoch": 0.9722840137076896, - "grad_norm": 1.7302817077817998, - "learning_rate": 8.019522907479536e-09, - "loss": 0.9523, - "step": 8086 - }, - { - "epoch": 0.9724042565983286, - "grad_norm": 3.7146345173890687, - "learning_rate": 7.949985659221558e-09, - "loss": 0.9982, - "step": 8087 - }, - { - "epoch": 0.9725244994889677, - "grad_norm": 1.8408908334676555, - "learning_rate": 7.880750603366904e-09, - "loss": 1.0209, - "step": 8088 - }, - { - "epoch": 0.9726447423796069, - "grad_norm": 2.1003984254986965, - "learning_rate": 7.811817750418282e-09, - "loss": 1.0212, - "step": 8089 - }, - { - "epoch": 0.9727649852702459, - "grad_norm": 1.652326723556632, - "learning_rate": 7.743187110833105e-09, - "loss": 1.0273, - "step": 8090 - }, - { - "epoch": 0.972885228160885, - "grad_norm": 1.4381318569558679, - "learning_rate": 7.674858695022602e-09, - "loss": 1.0395, - "step": 8091 - }, - { - "epoch": 0.9730054710515241, - "grad_norm": 2.0767044145185327, - "learning_rate": 7.606832513351591e-09, - "loss": 0.9914, - "step": 8092 - }, - { - "epoch": 0.9731257139421632, - "grad_norm": 0.8484005099781091, - "learning_rate": 7.539108576140264e-09, - "loss": 0.9083, - "step": 8093 - }, - { - "epoch": 0.9732459568328022, - "grad_norm": 2.2070094284734276, - "learning_rate": 7.471686893661732e-09, - "loss": 0.9221, - "step": 8094 - }, - { - "epoch": 0.9733661997234414, - "grad_norm": 2.18188573541243, - "learning_rate": 7.4045674761442636e-09, - "loss": 0.8732, - "step": 8095 - }, - { - "epoch": 0.9734864426140805, - "grad_norm": 1.9440437683664904, - "learning_rate": 7.337750333769488e-09, - "loss": 0.9583, - "step": 8096 - }, - { - "epoch": 0.9736066855047195, - "grad_norm": 1.9754857081709971, - "learning_rate": 7.2712354766737425e-09, - "loss": 0.9562, - "step": 8097 - }, - { - "epoch": 0.9737269283953586, - "grad_norm": 2.95376289381005, - "learning_rate": 7.2050229149469565e-09, - "loss": 1.0347, - "step": 8098 - }, - { - "epoch": 0.9738471712859977, - "grad_norm": 1.7043452659729867, - "learning_rate": 7.139112658633984e-09, - "loss": 0.8594, - "step": 8099 - }, - { - "epoch": 0.9739674141766368, - "grad_norm": 1.8650013403593673, - "learning_rate": 7.073504717733048e-09, - "loss": 0.9345, - "step": 8100 - }, - { - "epoch": 0.9740876570672758, - "grad_norm": 0.7327106771105333, - "learning_rate": 7.008199102196855e-09, - "loss": 0.8233, - "step": 8101 - }, - { - "epoch": 0.974207899957915, - "grad_norm": 0.8272862055058772, - "learning_rate": 6.9431958219321464e-09, - "loss": 0.851, - "step": 8102 - }, - { - "epoch": 0.9743281428485541, - "grad_norm": 1.755899430986029, - "learning_rate": 6.878494886800146e-09, - "loss": 1.011, - "step": 8103 - }, - { - "epoch": 0.9744483857391931, - "grad_norm": 1.7443095058172304, - "learning_rate": 6.814096306615669e-09, - "loss": 0.9861, - "step": 8104 - }, - { - "epoch": 0.9745686286298323, - "grad_norm": 8.140955356330702, - "learning_rate": 6.750000091148011e-09, - "loss": 0.8809, - "step": 8105 - }, - { - "epoch": 0.9746888715204713, - "grad_norm": 2.2366952653052374, - "learning_rate": 6.686206250120729e-09, - "loss": 0.9634, - "step": 8106 - }, - { - "epoch": 0.9748091144111104, - "grad_norm": 1.823951979414938, - "learning_rate": 6.622714793210749e-09, - "loss": 0.9757, - "step": 8107 - }, - { - "epoch": 0.9749293573017496, - "grad_norm": 2.806459934733595, - "learning_rate": 6.559525730050364e-09, - "loss": 1.009, - "step": 8108 - }, - { - "epoch": 0.9750496001923886, - "grad_norm": 2.0718264244679814, - "learning_rate": 6.496639070224574e-09, - "loss": 0.9968, - "step": 8109 - }, - { - "epoch": 0.9751698430830277, - "grad_norm": 2.056316469735642, - "learning_rate": 6.4340548232739714e-09, - "loss": 1.062, - "step": 8110 - }, - { - "epoch": 0.9752900859736668, - "grad_norm": 1.946502811620932, - "learning_rate": 6.371772998692071e-09, - "loss": 1.0243, - "step": 8111 - }, - { - "epoch": 0.9754103288643059, - "grad_norm": 2.562235841859188, - "learning_rate": 6.309793605927094e-09, - "loss": 0.88, - "step": 8112 - }, - { - "epoch": 0.975530571754945, - "grad_norm": 1.8731330477847454, - "learning_rate": 6.248116654381297e-09, - "loss": 1.0258, - "step": 8113 - }, - { - "epoch": 0.9756508146455841, - "grad_norm": 2.4228234187578437, - "learning_rate": 6.186742153410751e-09, - "loss": 0.9562, - "step": 8114 - }, - { - "epoch": 0.9757710575362232, - "grad_norm": 2.010315637240637, - "learning_rate": 6.125670112326453e-09, - "loss": 1.0984, - "step": 8115 - }, - { - "epoch": 0.9758913004268622, - "grad_norm": 2.4617875825925033, - "learning_rate": 6.064900540392548e-09, - "loss": 0.9325, - "step": 8116 - }, - { - "epoch": 0.9760115433175014, - "grad_norm": 6.502304548928901, - "learning_rate": 6.0044334468278835e-09, - "loss": 1.0212, - "step": 8117 - }, - { - "epoch": 0.9761317862081405, - "grad_norm": 1.5487823287138998, - "learning_rate": 5.944268840805345e-09, - "loss": 0.9524, - "step": 8118 - }, - { - "epoch": 0.9762520290987795, - "grad_norm": 2.71157213857146, - "learning_rate": 5.88440673145163e-09, - "loss": 0.8732, - "step": 8119 - }, - { - "epoch": 0.9763722719894187, - "grad_norm": 2.5305679721111245, - "learning_rate": 5.824847127848142e-09, - "loss": 1.0567, - "step": 8120 - }, - { - "epoch": 0.9764925148800577, - "grad_norm": 1.722083586920384, - "learning_rate": 5.765590039029433e-09, - "loss": 1.0087, - "step": 8121 - }, - { - "epoch": 0.9766127577706968, - "grad_norm": 1.56033083073128, - "learning_rate": 5.706635473985422e-09, - "loss": 0.9421, - "step": 8122 - }, - { - "epoch": 0.976733000661336, - "grad_norm": 1.6666048356348968, - "learning_rate": 5.6479834416591764e-09, - "loss": 1.0858, - "step": 8123 - }, - { - "epoch": 0.976853243551975, - "grad_norm": 1.9431423353597979, - "learning_rate": 5.589633950947803e-09, - "loss": 0.9022, - "step": 8124 - }, - { - "epoch": 0.9769734864426141, - "grad_norm": 2.5935055720966433, - "learning_rate": 5.5315870107035535e-09, - "loss": 0.9249, - "step": 8125 - }, - { - "epoch": 0.9770937293332532, - "grad_norm": 1.801534962505688, - "learning_rate": 5.473842629731607e-09, - "loss": 1.0125, - "step": 8126 - }, - { - "epoch": 0.9772139722238923, - "grad_norm": 2.7377755083071946, - "learning_rate": 5.416400816792066e-09, - "loss": 1.0093, - "step": 8127 - }, - { - "epoch": 0.9773342151145313, - "grad_norm": 4.528094014758764, - "learning_rate": 5.359261580598407e-09, - "loss": 1.0115, - "step": 8128 - }, - { - "epoch": 0.9774544580051704, - "grad_norm": 2.4606549782025375, - "learning_rate": 5.302424929819027e-09, - "loss": 1.0141, - "step": 8129 - }, - { - "epoch": 0.9775747008958096, - "grad_norm": 2.0972067921501036, - "learning_rate": 5.24589087307592e-09, - "loss": 0.9567, - "step": 8130 - }, - { - "epoch": 0.9776949437864486, - "grad_norm": 1.5184680975362879, - "learning_rate": 5.189659418944891e-09, - "loss": 0.883, - "step": 8131 - }, - { - "epoch": 0.9778151866770877, - "grad_norm": 2.5215178471950472, - "learning_rate": 5.133730575956674e-09, - "loss": 0.9979, - "step": 8132 - }, - { - "epoch": 0.9779354295677268, - "grad_norm": 2.0832757433242937, - "learning_rate": 5.0781043525953696e-09, - "loss": 0.9469, - "step": 8133 - }, - { - "epoch": 0.9780556724583659, - "grad_norm": 1.801741668711551, - "learning_rate": 5.0227807572995605e-09, - "loss": 0.9569, - "step": 8134 - }, - { - "epoch": 0.9781759153490049, - "grad_norm": 2.804132518210846, - "learning_rate": 4.967759798461646e-09, - "loss": 0.9037, - "step": 8135 - }, - { - "epoch": 0.9782961582396441, - "grad_norm": 2.7394512903764086, - "learning_rate": 4.913041484428282e-09, - "loss": 0.9739, - "step": 8136 - }, - { - "epoch": 0.9784164011302832, - "grad_norm": 3.33433876768696, - "learning_rate": 4.858625823500384e-09, - "loss": 0.9791, - "step": 8137 - }, - { - "epoch": 0.9785366440209222, - "grad_norm": 2.169501345424582, - "learning_rate": 4.80451282393246e-09, - "loss": 0.9682, - "step": 8138 - }, - { - "epoch": 0.9786568869115614, - "grad_norm": 1.810518431208612, - "learning_rate": 4.750702493933722e-09, - "loss": 0.9044, - "step": 8139 - }, - { - "epoch": 0.9787771298022004, - "grad_norm": 1.9216442434828416, - "learning_rate": 4.697194841666974e-09, - "loss": 1.081, - "step": 8140 - }, - { - "epoch": 0.9788973726928395, - "grad_norm": 1.7908612000628705, - "learning_rate": 4.6439898752492764e-09, - "loss": 1.0464, - "step": 8141 - }, - { - "epoch": 0.9790176155834787, - "grad_norm": 0.7494220095947048, - "learning_rate": 4.591087602751731e-09, - "loss": 0.8709, - "step": 8142 - }, - { - "epoch": 0.9791378584741177, - "grad_norm": 2.0515237681094907, - "learning_rate": 4.538488032199916e-09, - "loss": 0.9522, - "step": 8143 - }, - { - "epoch": 0.9792581013647568, - "grad_norm": 25.772136173194635, - "learning_rate": 4.486191171572784e-09, - "loss": 0.9113, - "step": 8144 - }, - { - "epoch": 0.9793783442553959, - "grad_norm": 1.4489186431180323, - "learning_rate": 4.434197028803766e-09, - "loss": 1.0103, - "step": 8145 - }, - { - "epoch": 0.979498587146035, - "grad_norm": 1.932034690474406, - "learning_rate": 4.3825056117805514e-09, - "loss": 1.0443, - "step": 8146 - }, - { - "epoch": 0.979618830036674, - "grad_norm": 25.77524669688258, - "learning_rate": 4.331116928344425e-09, - "loss": 1.032, - "step": 8147 - }, - { - "epoch": 0.9797390729273132, - "grad_norm": 3.538267875876896, - "learning_rate": 4.28003098629115e-09, - "loss": 0.866, - "step": 8148 - }, - { - "epoch": 0.9798593158179523, - "grad_norm": 2.0386438634248707, - "learning_rate": 4.229247793370305e-09, - "loss": 1.0229, - "step": 8149 - }, - { - "epoch": 0.9799795587085913, - "grad_norm": 1.642115397372165, - "learning_rate": 4.178767357285951e-09, - "loss": 0.9348, - "step": 8150 - }, - { - "epoch": 0.9800998015992305, - "grad_norm": 2.1695480505375557, - "learning_rate": 4.128589685695516e-09, - "loss": 0.9339, - "step": 8151 - }, - { - "epoch": 0.9802200444898695, - "grad_norm": 1.9871938313675626, - "learning_rate": 4.078714786211135e-09, - "loss": 1.0759, - "step": 8152 - }, - { - "epoch": 0.9803402873805086, - "grad_norm": 1.716337418076769, - "learning_rate": 4.029142666398977e-09, - "loss": 0.9928, - "step": 8153 - }, - { - "epoch": 0.9804605302711478, - "grad_norm": 2.3819561529535984, - "learning_rate": 3.979873333778805e-09, - "loss": 1.0296, - "step": 8154 - }, - { - "epoch": 0.9805807731617868, - "grad_norm": 2.254602810544019, - "learning_rate": 3.930906795824862e-09, - "loss": 0.9769, - "step": 8155 - }, - { - "epoch": 0.9807010160524259, - "grad_norm": 2.171145748958419, - "learning_rate": 3.882243059965207e-09, - "loss": 0.9997, - "step": 8156 - }, - { - "epoch": 0.980821258943065, - "grad_norm": 2.9316944623117056, - "learning_rate": 3.833882133582156e-09, - "loss": 0.8996, - "step": 8157 - }, - { - "epoch": 0.9809415018337041, - "grad_norm": 1.5811137234915011, - "learning_rate": 3.785824024012285e-09, - "loss": 1.0033, - "step": 8158 - }, - { - "epoch": 0.9810617447243432, - "grad_norm": 1.3836128806499186, - "learning_rate": 3.738068738545541e-09, - "loss": 1.0109, - "step": 8159 - }, - { - "epoch": 0.9811819876149822, - "grad_norm": 2.2545573386664977, - "learning_rate": 3.6906162844265733e-09, - "loss": 1.0108, - "step": 8160 - }, - { - "epoch": 0.9813022305056214, - "grad_norm": 2.0442673474583803, - "learning_rate": 3.643466668853845e-09, - "loss": 0.9426, - "step": 8161 - }, - { - "epoch": 0.9814224733962604, - "grad_norm": 2.180657830509176, - "learning_rate": 3.59661989898008e-09, - "loss": 0.982, - "step": 8162 - }, - { - "epoch": 0.9815427162868995, - "grad_norm": 2.01447422977419, - "learning_rate": 3.5500759819115934e-09, - "loss": 0.9967, - "step": 8163 - }, - { - "epoch": 0.9816629591775387, - "grad_norm": 1.8748949657267002, - "learning_rate": 3.5038349247094034e-09, - "loss": 1.0474, - "step": 8164 - }, - { - "epoch": 0.9817832020681777, - "grad_norm": 3.18812644636895, - "learning_rate": 3.4578967343878994e-09, - "loss": 0.9998, - "step": 8165 - }, - { - "epoch": 0.9819034449588168, - "grad_norm": 2.042699383980773, - "learning_rate": 3.4122614179161733e-09, - "loss": 1.0343, - "step": 8166 - }, - { - "epoch": 0.9820236878494559, - "grad_norm": 2.106568794009069, - "learning_rate": 3.36692898221691e-09, - "loss": 1.0014, - "step": 8167 - }, - { - "epoch": 0.982143930740095, - "grad_norm": 1.8813880185416978, - "learning_rate": 3.3218994341668305e-09, - "loss": 0.9622, - "step": 8168 - }, - { - "epoch": 0.982264173630734, - "grad_norm": 1.4930401508019595, - "learning_rate": 3.2771727805971373e-09, - "loss": 0.986, - "step": 8169 - }, - { - "epoch": 0.9823844165213732, - "grad_norm": 1.6853320713134057, - "learning_rate": 3.232749028292847e-09, - "loss": 0.9989, - "step": 8170 - }, - { - "epoch": 0.9825046594120123, - "grad_norm": 2.065369661509552, - "learning_rate": 3.188628183992792e-09, - "loss": 1.1094, - "step": 8171 - }, - { - "epoch": 0.9826249023026513, - "grad_norm": 0.8069465925159268, - "learning_rate": 3.1448102543902844e-09, - "loss": 0.8879, - "step": 8172 - }, - { - "epoch": 0.9827451451932905, - "grad_norm": 1.8725410868436838, - "learning_rate": 3.1012952461324515e-09, - "loss": 0.9019, - "step": 8173 - }, - { - "epoch": 0.9828653880839295, - "grad_norm": 1.9442669613385182, - "learning_rate": 3.0580831658204575e-09, - "loss": 0.9761, - "step": 8174 - }, - { - "epoch": 0.9829856309745686, - "grad_norm": 1.5503689388374113, - "learning_rate": 3.015174020009281e-09, - "loss": 1.0119, - "step": 8175 - }, - { - "epoch": 0.9831058738652078, - "grad_norm": 2.1902450776251126, - "learning_rate": 2.9725678152086043e-09, - "loss": 0.9767, - "step": 8176 - }, - { - "epoch": 0.9832261167558468, - "grad_norm": 2.565206633455999, - "learning_rate": 2.930264557881257e-09, - "loss": 1.0491, - "step": 8177 - }, - { - "epoch": 0.9833463596464859, - "grad_norm": 0.8124312958646631, - "learning_rate": 2.8882642544452163e-09, - "loss": 0.8608, - "step": 8178 - }, - { - "epoch": 0.983466602537125, - "grad_norm": 2.6462416646661246, - "learning_rate": 2.8465669112716083e-09, - "loss": 0.9759, - "step": 8179 - }, - { - "epoch": 0.9835868454277641, - "grad_norm": 8.31090696744838, - "learning_rate": 2.8051725346858177e-09, - "loss": 0.9821, - "step": 8180 - }, - { - "epoch": 0.9837070883184031, - "grad_norm": 2.4372919434004006, - "learning_rate": 2.7640811309674883e-09, - "loss": 0.9344, - "step": 8181 - }, - { - "epoch": 0.9838273312090423, - "grad_norm": 1.6769457774486716, - "learning_rate": 2.7232927063498557e-09, - "loss": 1.0325, - "step": 8182 - }, - { - "epoch": 0.9839475740996814, - "grad_norm": 1.8252633675531311, - "learning_rate": 2.682807267020859e-09, - "loss": 0.9096, - "step": 8183 - }, - { - "epoch": 0.9840678169903204, - "grad_norm": 1.6209244605224424, - "learning_rate": 2.642624819121808e-09, - "loss": 0.8586, - "step": 8184 - }, - { - "epoch": 0.9841880598809596, - "grad_norm": 2.0710736872716846, - "learning_rate": 2.6027453687487154e-09, - "loss": 0.8438, - "step": 8185 - }, - { - "epoch": 0.9843083027715986, - "grad_norm": 2.425239146130557, - "learning_rate": 2.5631689219509643e-09, - "loss": 0.7585, - "step": 8186 - }, - { - "epoch": 0.9844285456622377, - "grad_norm": 3.4440505988716095, - "learning_rate": 2.523895484732197e-09, - "loss": 1.0634, - "step": 8187 - }, - { - "epoch": 0.9845487885528769, - "grad_norm": 1.7680610123588671, - "learning_rate": 2.4849250630505357e-09, - "loss": 0.974, - "step": 8188 - }, - { - "epoch": 0.9846690314435159, - "grad_norm": 1.7800815832536834, - "learning_rate": 2.4462576628172528e-09, - "loss": 0.9644, - "step": 8189 - }, - { - "epoch": 0.984789274334155, - "grad_norm": 1.8394474323514323, - "learning_rate": 2.407893289898766e-09, - "loss": 0.9667, - "step": 8190 - }, - { - "epoch": 0.984909517224794, - "grad_norm": 1.9640401178873665, - "learning_rate": 2.3698319501144202e-09, - "loss": 1.0663, - "step": 8191 - }, - { - "epoch": 0.9850297601154332, - "grad_norm": 2.1845225097446477, - "learning_rate": 2.3320736492382644e-09, - "loss": 0.9583, - "step": 8192 - }, - { - "epoch": 0.9851500030060723, - "grad_norm": 1.6047877145766667, - "learning_rate": 2.29461839299816e-09, - "loss": 0.9115, - "step": 8193 - }, - { - "epoch": 0.9852702458967113, - "grad_norm": 1.5836264021169681, - "learning_rate": 2.257466187076229e-09, - "loss": 1.0296, - "step": 8194 - }, - { - "epoch": 0.9853904887873505, - "grad_norm": 2.5212360617347396, - "learning_rate": 2.2206170371081854e-09, - "loss": 0.9295, - "step": 8195 - }, - { - "epoch": 0.9855107316779895, - "grad_norm": 1.5341808530340497, - "learning_rate": 2.1840709486842247e-09, - "loss": 1.0751, - "step": 8196 - }, - { - "epoch": 0.9856309745686286, - "grad_norm": 2.3689931900680135, - "learning_rate": 2.1478279273481335e-09, - "loss": 1.0255, - "step": 8197 - }, - { - "epoch": 0.9857512174592677, - "grad_norm": 2.4260914185007234, - "learning_rate": 2.1118879785981815e-09, - "loss": 1.0411, - "step": 8198 - }, - { - "epoch": 0.9858714603499068, - "grad_norm": 1.5969942768737249, - "learning_rate": 2.0762511078862288e-09, - "loss": 1.0218, - "step": 8199 - }, - { - "epoch": 0.9859917032405459, - "grad_norm": 1.8286793838185798, - "learning_rate": 2.0409173206186183e-09, - "loss": 0.8859, - "step": 8200 - }, - { - "epoch": 0.986111946131185, - "grad_norm": 2.46948472690899, - "learning_rate": 2.0058866221550617e-09, - "loss": 1.0997, - "step": 8201 - }, - { - "epoch": 0.9862321890218241, - "grad_norm": 1.8358152308230264, - "learning_rate": 1.971159017809976e-09, - "loss": 0.976, - "step": 8202 - }, - { - "epoch": 0.9863524319124631, - "grad_norm": 4.652613301319615, - "learning_rate": 1.93673451285159e-09, - "loss": 1.0059, - "step": 8203 - }, - { - "epoch": 0.9864726748031023, - "grad_norm": 0.7585458648056981, - "learning_rate": 1.9026131125019495e-09, - "loss": 0.8293, - "step": 8204 - }, - { - "epoch": 0.9865929176937414, - "grad_norm": 2.799127449592987, - "learning_rate": 1.8687948219371363e-09, - "loss": 1.0889, - "step": 8205 - }, - { - "epoch": 0.9867131605843804, - "grad_norm": 1.8935632494004473, - "learning_rate": 1.835279646287491e-09, - "loss": 1.1124, - "step": 8206 - }, - { - "epoch": 0.9868334034750196, - "grad_norm": 1.6448305994716452, - "learning_rate": 1.8020675906371685e-09, - "loss": 1.0046, - "step": 8207 - }, - { - "epoch": 0.9869536463656586, - "grad_norm": 1.793247043396052, - "learning_rate": 1.7691586600243612e-09, - "loss": 0.9779, - "step": 8208 - }, - { - "epoch": 0.9870738892562977, - "grad_norm": 2.528141797815071, - "learning_rate": 1.7365528594415202e-09, - "loss": 1.0938, - "step": 8209 - }, - { - "epoch": 0.9871941321469369, - "grad_norm": 2.5166332048966296, - "learning_rate": 1.7042501938346888e-09, - "loss": 0.9044, - "step": 8210 - }, - { - "epoch": 0.9873143750375759, - "grad_norm": 2.119087070895881, - "learning_rate": 1.6722506681043913e-09, - "loss": 0.9967, - "step": 8211 - }, - { - "epoch": 0.987434617928215, - "grad_norm": 2.8562201099793394, - "learning_rate": 1.640554287104745e-09, - "loss": 0.9161, - "step": 8212 - }, - { - "epoch": 0.9875548608188541, - "grad_norm": 3.265175382334735, - "learning_rate": 1.609161055644348e-09, - "loss": 1.024, - "step": 8213 - }, - { - "epoch": 0.9876751037094932, - "grad_norm": 1.9797026527505324, - "learning_rate": 1.5780709784849467e-09, - "loss": 0.906, - "step": 8214 - }, - { - "epoch": 0.9877953466001322, - "grad_norm": 2.7740983202616514, - "learning_rate": 1.5472840603436565e-09, - "loss": 1.0485, - "step": 8215 - }, - { - "epoch": 0.9879155894907714, - "grad_norm": 2.1000868143574767, - "learning_rate": 1.5168003058900757e-09, - "loss": 1.0215, - "step": 8216 - }, - { - "epoch": 0.9880358323814105, - "grad_norm": 1.847789606358653, - "learning_rate": 1.4866197197491715e-09, - "loss": 1.1475, - "step": 8217 - }, - { - "epoch": 0.9881560752720495, - "grad_norm": 3.127282491632424, - "learning_rate": 1.4567423064988371e-09, - "loss": 1.0036, - "step": 8218 - }, - { - "epoch": 0.9882763181626887, - "grad_norm": 1.9392651546545596, - "learning_rate": 1.4271680706718913e-09, - "loss": 1.0057, - "step": 8219 - }, - { - "epoch": 0.9883965610533277, - "grad_norm": 1.870756943239803, - "learning_rate": 1.3978970167543013e-09, - "loss": 1.0525, - "step": 8220 - }, - { - "epoch": 0.9885168039439668, - "grad_norm": 2.4720712846644215, - "learning_rate": 1.3689291491867372e-09, - "loss": 1.0048, - "step": 8221 - }, - { - "epoch": 0.988637046834606, - "grad_norm": 2.1507090264390136, - "learning_rate": 1.3402644723636836e-09, - "loss": 0.9639, - "step": 8222 - }, - { - "epoch": 0.988757289725245, - "grad_norm": 1.9471875082281385, - "learning_rate": 1.311902990633218e-09, - "loss": 1.0622, - "step": 8223 - }, - { - "epoch": 0.9888775326158841, - "grad_norm": 1.561342649986384, - "learning_rate": 1.2838447082978987e-09, - "loss": 0.9404, - "step": 8224 - }, - { - "epoch": 0.9889977755065231, - "grad_norm": 12.072058808746581, - "learning_rate": 1.2560896296143208e-09, - "loss": 1.0517, - "step": 8225 - }, - { - "epoch": 0.9891180183971623, - "grad_norm": 2.5120820869834577, - "learning_rate": 1.2286377587926722e-09, - "loss": 1.0439, - "step": 8226 - }, - { - "epoch": 0.9892382612878013, - "grad_norm": 2.121727386868306, - "learning_rate": 1.2014890999973992e-09, - "loss": 0.9785, - "step": 8227 - }, - { - "epoch": 0.9893585041784404, - "grad_norm": 1.5352855365853828, - "learning_rate": 1.1746436573472073e-09, - "loss": 1.01, - "step": 8228 - }, - { - "epoch": 0.9894787470690796, - "grad_norm": 1.8699665536796355, - "learning_rate": 1.1481014349141726e-09, - "loss": 0.924, - "step": 8229 - }, - { - "epoch": 0.9895989899597186, - "grad_norm": 2.210300313300341, - "learning_rate": 1.121862436724852e-09, - "loss": 1.067, - "step": 8230 - }, - { - "epoch": 0.9897192328503577, - "grad_norm": 1.6501605533745656, - "learning_rate": 1.0959266667598388e-09, - "loss": 0.9422, - "step": 8231 - }, - { - "epoch": 0.9898394757409968, - "grad_norm": 2.243467171782707, - "learning_rate": 1.0702941289533196e-09, - "loss": 0.9731, - "step": 8232 - }, - { - "epoch": 0.9899597186316359, - "grad_norm": 1.9687713137487157, - "learning_rate": 1.0449648271939615e-09, - "loss": 1.1116, - "step": 8233 - }, - { - "epoch": 0.990079961522275, - "grad_norm": 1.6011943114794074, - "learning_rate": 1.0199387653240243e-09, - "loss": 0.961, - "step": 8234 - }, - { - "epoch": 0.9902002044129141, - "grad_norm": 2.6870621612877263, - "learning_rate": 9.952159471400267e-10, - "loss": 0.9321, - "step": 8235 - }, - { - "epoch": 0.9903204473035532, - "grad_norm": 2.0034233835116018, - "learning_rate": 9.707963763923022e-10, - "loss": 1.0639, - "step": 8236 - }, - { - "epoch": 0.9904406901941922, - "grad_norm": 1.9108283921765827, - "learning_rate": 9.466800567854427e-10, - "loss": 1.0129, - "step": 8237 - }, - { - "epoch": 0.9905609330848314, - "grad_norm": 2.0623394296738002, - "learning_rate": 9.228669919778553e-10, - "loss": 0.9179, - "step": 8238 - }, - { - "epoch": 0.9906811759754705, - "grad_norm": 2.3126721704673576, - "learning_rate": 8.993571855817617e-10, - "loss": 1.0221, - "step": 8239 - }, - { - "epoch": 0.9908014188661095, - "grad_norm": 1.897137985978299, - "learning_rate": 8.761506411638642e-10, - "loss": 0.968, - "step": 8240 - }, - { - "epoch": 0.9909216617567487, - "grad_norm": 2.9265329696542195, - "learning_rate": 8.53247362244236e-10, - "loss": 0.9703, - "step": 8241 - }, - { - "epoch": 0.9910419046473877, - "grad_norm": 1.5343876824563774, - "learning_rate": 8.306473522976532e-10, - "loss": 0.9154, - "step": 8242 - }, - { - "epoch": 0.9911621475380268, - "grad_norm": 1.7892901013453522, - "learning_rate": 8.083506147522623e-10, - "loss": 0.9484, - "step": 8243 - }, - { - "epoch": 0.991282390428666, - "grad_norm": 2.000258309849462, - "learning_rate": 7.863571529906909e-10, - "loss": 1.0807, - "step": 8244 - }, - { - "epoch": 0.991402633319305, - "grad_norm": 0.7950622284724351, - "learning_rate": 7.646669703489372e-10, - "loss": 0.8826, - "step": 8245 - }, - { - "epoch": 0.9915228762099441, - "grad_norm": 2.481868703272682, - "learning_rate": 7.432800701177023e-10, - "loss": 0.8079, - "step": 8246 - }, - { - "epoch": 0.9916431191005832, - "grad_norm": 0.842777610229205, - "learning_rate": 7.221964555415017e-10, - "loss": 0.834, - "step": 8247 - }, - { - "epoch": 0.9917633619912223, - "grad_norm": 1.7558263790437674, - "learning_rate": 7.01416129818222e-10, - "loss": 0.9767, - "step": 8248 - }, - { - "epoch": 0.9918836048818613, - "grad_norm": 3.4216424174935227, - "learning_rate": 6.809390961006745e-10, - "loss": 0.8163, - "step": 8249 - }, - { - "epoch": 0.9920038477725005, - "grad_norm": 1.7792631076996168, - "learning_rate": 6.607653574948191e-10, - "loss": 0.9236, - "step": 8250 - }, - { - "epoch": 0.9921240906631396, - "grad_norm": 1.7458420130378107, - "learning_rate": 6.408949170613187e-10, - "loss": 1.0456, - "step": 8251 - }, - { - "epoch": 0.9922443335537786, - "grad_norm": 1.5602862888902633, - "learning_rate": 6.213277778144288e-10, - "loss": 1.0481, - "step": 8252 - }, - { - "epoch": 0.9923645764444178, - "grad_norm": 2.7088779684345092, - "learning_rate": 6.020639427224416e-10, - "loss": 0.9062, - "step": 8253 - }, - { - "epoch": 0.9924848193350568, - "grad_norm": 2.138230815678739, - "learning_rate": 5.831034147076864e-10, - "loss": 0.9566, - "step": 8254 - }, - { - "epoch": 0.9926050622256959, - "grad_norm": 0.7098108104038442, - "learning_rate": 5.644461966463065e-10, - "loss": 0.8069, - "step": 8255 - }, - { - "epoch": 0.9927253051163349, - "grad_norm": 1.7020213199494953, - "learning_rate": 5.460922913687049e-10, - "loss": 0.986, - "step": 8256 - }, - { - "epoch": 0.9928455480069741, - "grad_norm": 1.9718161427559948, - "learning_rate": 5.280417016593208e-10, - "loss": 0.9788, - "step": 8257 - }, - { - "epoch": 0.9929657908976132, - "grad_norm": 1.5340824011364456, - "learning_rate": 5.102944302559642e-10, - "loss": 0.9816, - "step": 8258 - }, - { - "epoch": 0.9930860337882522, - "grad_norm": 1.8794568181091804, - "learning_rate": 4.9285047985137e-10, - "loss": 1.0174, - "step": 8259 - }, - { - "epoch": 0.9932062766788914, - "grad_norm": 1.910908431972901, - "learning_rate": 4.757098530916436e-10, - "loss": 0.9766, - "step": 8260 - }, - { - "epoch": 0.9933265195695304, - "grad_norm": 3.1361212630474107, - "learning_rate": 4.5887255257670563e-10, - "loss": 1.0147, - "step": 8261 - }, - { - "epoch": 0.9934467624601695, - "grad_norm": 1.9005055407508826, - "learning_rate": 4.4233858086117906e-10, - "loss": 0.9951, - "step": 8262 - }, - { - "epoch": 0.9935670053508087, - "grad_norm": 2.182020791175872, - "learning_rate": 4.261079404528356e-10, - "loss": 0.9143, - "step": 8263 - }, - { - "epoch": 0.9936872482414477, - "grad_norm": 2.4196449102816318, - "learning_rate": 4.1018063381437205e-10, - "loss": 0.9138, - "step": 8264 - }, - { - "epoch": 0.9938074911320868, - "grad_norm": 0.9228479133009034, - "learning_rate": 3.9455666336141167e-10, - "loss": 0.8935, - "step": 8265 - }, - { - "epoch": 0.9939277340227259, - "grad_norm": 4.307639216001541, - "learning_rate": 3.7923603146450267e-10, - "loss": 1.0458, - "step": 8266 - }, - { - "epoch": 0.994047976913365, - "grad_norm": 2.0751007561178256, - "learning_rate": 3.642187404473418e-10, - "loss": 1.0334, - "step": 8267 - }, - { - "epoch": 0.994168219804004, - "grad_norm": 2.2725208440145743, - "learning_rate": 3.495047925885508e-10, - "loss": 1.0857, - "step": 8268 - }, - { - "epoch": 0.9942884626946432, - "grad_norm": 2.6679091439912344, - "learning_rate": 3.350941901199e-10, - "loss": 1.0685, - "step": 8269 - }, - { - "epoch": 0.9944087055852823, - "grad_norm": 2.8612341334818376, - "learning_rate": 3.2098693522764066e-10, - "loss": 1.0564, - "step": 8270 - }, - { - "epoch": 0.9945289484759213, - "grad_norm": 3.1725598427469492, - "learning_rate": 3.071830300516165e-10, - "loss": 1.0523, - "step": 8271 - }, - { - "epoch": 0.9946491913665605, - "grad_norm": 2.634048579255363, - "learning_rate": 2.9368247668615234e-10, - "loss": 0.9334, - "step": 8272 - }, - { - "epoch": 0.9947694342571995, - "grad_norm": 2.9543204335240936, - "learning_rate": 2.804852771789434e-10, - "loss": 0.8451, - "step": 8273 - }, - { - "epoch": 0.9948896771478386, - "grad_norm": 1.7368435175219183, - "learning_rate": 2.675914335321661e-10, - "loss": 0.7894, - "step": 8274 - }, - { - "epoch": 0.9950099200384778, - "grad_norm": 2.348045989578834, - "learning_rate": 2.550009477018111e-10, - "loss": 1.0195, - "step": 8275 - }, - { - "epoch": 0.9951301629291168, - "grad_norm": 2.0902052522105117, - "learning_rate": 2.4271382159790634e-10, - "loss": 0.8563, - "step": 8276 - }, - { - "epoch": 0.9952504058197559, - "grad_norm": 1.669576615898468, - "learning_rate": 2.3073005708429406e-10, - "loss": 1.0883, - "step": 8277 - }, - { - "epoch": 0.995370648710395, - "grad_norm": 1.7146737051315566, - "learning_rate": 2.190496559788535e-10, - "loss": 0.9486, - "step": 8278 - }, - { - "epoch": 0.9954908916010341, - "grad_norm": 2.3234152037955926, - "learning_rate": 2.0767262005372265e-10, - "loss": 0.9911, - "step": 8279 - }, - { - "epoch": 0.9956111344916732, - "grad_norm": 1.9873060671639866, - "learning_rate": 1.965989510346322e-10, - "loss": 0.9754, - "step": 8280 - }, - { - "epoch": 0.9957313773823123, - "grad_norm": 2.3307523303594646, - "learning_rate": 1.8582865060134955e-10, - "loss": 0.9367, - "step": 8281 - }, - { - "epoch": 0.9958516202729514, - "grad_norm": 0.8881351065547204, - "learning_rate": 1.7536172038790098e-10, - "loss": 0.8204, - "step": 8282 - }, - { - "epoch": 0.9959718631635904, - "grad_norm": 2.031154219496616, - "learning_rate": 1.651981619819054e-10, - "loss": 0.9235, - "step": 8283 - }, - { - "epoch": 0.9960921060542296, - "grad_norm": 2.384059292151958, - "learning_rate": 1.5533797692546257e-10, - "loss": 0.9269, - "step": 8284 - }, - { - "epoch": 0.9962123489448687, - "grad_norm": 2.290786874835896, - "learning_rate": 1.4578116671404296e-10, - "loss": 1.0622, - "step": 8285 - }, - { - "epoch": 0.9963325918355077, - "grad_norm": 2.169102937414586, - "learning_rate": 1.3652773279759777e-10, - "loss": 0.9476, - "step": 8286 - }, - { - "epoch": 0.9964528347261468, - "grad_norm": 1.6259746562958046, - "learning_rate": 1.2757767657989305e-10, - "loss": 0.8542, - "step": 8287 - }, - { - "epoch": 0.9965730776167859, - "grad_norm": 2.0290957251125246, - "learning_rate": 1.1893099941850948e-10, - "loss": 1.0941, - "step": 8288 - }, - { - "epoch": 0.996693320507425, - "grad_norm": 3.3516313594689064, - "learning_rate": 1.105877026252866e-10, - "loss": 1.004, - "step": 8289 - }, - { - "epoch": 0.996813563398064, - "grad_norm": 2.526353975371328, - "learning_rate": 1.0254778746565663e-10, - "loss": 0.9505, - "step": 8290 - }, - { - "epoch": 0.9969338062887032, - "grad_norm": 2.1713301160369047, - "learning_rate": 9.481125515953259e-11, - "loss": 0.9629, - "step": 8291 - }, - { - "epoch": 0.9970540491793423, - "grad_norm": 3.3794996148527003, - "learning_rate": 8.737810688064228e-11, - "loss": 1.0281, - "step": 8292 - }, - { - "epoch": 0.9971742920699813, - "grad_norm": 3.1264551970729086, - "learning_rate": 8.024834375608414e-11, - "loss": 1.0191, - "step": 8293 - }, - { - "epoch": 0.9972945349606205, - "grad_norm": 0.8394309160322129, - "learning_rate": 7.342196686788149e-11, - "loss": 0.8952, - "step": 8294 - }, - { - "epoch": 0.9974147778512595, - "grad_norm": 2.4117829754491806, - "learning_rate": 6.689897725142834e-11, - "loss": 0.9115, - "step": 8295 - }, - { - "epoch": 0.9975350207418986, - "grad_norm": 2.2585993784857767, - "learning_rate": 6.067937589615545e-11, - "loss": 1.1104, - "step": 8296 - }, - { - "epoch": 0.9976552636325378, - "grad_norm": 0.7724401806382742, - "learning_rate": 5.476316374575241e-11, - "loss": 0.8145, - "step": 8297 - }, - { - "epoch": 0.9977755065231768, - "grad_norm": 1.9822098341418357, - "learning_rate": 4.9150341697723476e-11, - "loss": 0.9603, - "step": 8298 - }, - { - "epoch": 0.9978957494138159, - "grad_norm": 1.761084127202709, - "learning_rate": 4.384091060338768e-11, - "loss": 0.89, - "step": 8299 - }, - { - "epoch": 0.998015992304455, - "grad_norm": 2.281339251718846, - "learning_rate": 3.883487126810081e-11, - "loss": 0.9714, - "step": 8300 - }, - { - "epoch": 0.9981362351950941, - "grad_norm": 1.5998942548855521, - "learning_rate": 3.41322244516995e-11, - "loss": 1.027, - "step": 8301 - }, - { - "epoch": 0.9982564780857331, - "grad_norm": 1.736891699041837, - "learning_rate": 2.9732970866946925e-11, - "loss": 0.8581, - "step": 8302 - }, - { - "epoch": 0.9983767209763723, - "grad_norm": 2.319007275233139, - "learning_rate": 2.563711118175327e-11, - "loss": 1.015, - "step": 8303 - }, - { - "epoch": 0.9984969638670114, - "grad_norm": 1.8557786542182633, - "learning_rate": 2.184464601717728e-11, - "loss": 1.0619, - "step": 8304 - }, - { - "epoch": 0.9986172067576504, - "grad_norm": 2.3561862845874946, - "learning_rate": 1.8355575948758585e-11, - "loss": 1.008, - "step": 8305 - }, - { - "epoch": 0.9987374496482896, - "grad_norm": 2.46449879501044, - "learning_rate": 1.5169901505407424e-11, - "loss": 0.9677, - "step": 8306 - }, - { - "epoch": 0.9988576925389286, - "grad_norm": 5.658009966607214, - "learning_rate": 1.228762317073695e-11, - "loss": 0.9678, - "step": 8307 - }, - { - "epoch": 0.9989779354295677, - "grad_norm": 1.8178525355578037, - "learning_rate": 9.70874138195299e-12, - "loss": 1.0113, - "step": 8308 - }, - { - "epoch": 0.9990981783202069, - "grad_norm": 3.803306752033161, - "learning_rate": 7.433256530076093e-12, - "loss": 0.9728, - "step": 8309 - }, - { - "epoch": 0.9992184212108459, - "grad_norm": 2.117229053047314, - "learning_rate": 5.46116896038562e-12, - "loss": 0.9792, - "step": 8310 - }, - { - "epoch": 0.999338664101485, - "grad_norm": 5.145972534438491, - "learning_rate": 3.792478972197699e-12, - "loss": 0.8523, - "step": 8311 - }, - { - "epoch": 0.9994589069921241, - "grad_norm": 2.917668869766745, - "learning_rate": 2.4271868181990895e-12, - "loss": 0.9235, - "step": 8312 - }, - { - "epoch": 0.9995791498827632, - "grad_norm": 2.210126640523316, - "learning_rate": 1.3652927060014973e-12, - "loss": 1.0317, - "step": 8313 - }, - { - "epoch": 0.9996993927734023, - "grad_norm": 2.9929007135881545, - "learning_rate": 6.067967965872612e-13, - "loss": 0.8762, - "step": 8314 - }, - { - "epoch": 0.9998196356640414, - "grad_norm": 1.5092442489446207, - "learning_rate": 1.5169920497548615e-13, - "loss": 1.002, - "step": 8315 - }, - { - "epoch": 0.9999398785546805, - "grad_norm": 1.1968082718138762, - "learning_rate": 0.0, - "loss": 0.8078, - "step": 8316 - }, - { - "epoch": 0.9999398785546805, - "step": 8316, - "total_flos": 6.68648365344424e+17, - "train_loss": 0.9973464236774371, - "train_runtime": 151104.6214, - "train_samples_per_second": 2.201, - "train_steps_per_second": 0.055 - } - ], - "logging_steps": 1.0, - "max_steps": 8316, - "num_input_tokens_seen": 0, - "num_train_epochs": 1, - "save_steps": 100, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": true - }, - "attributes": {} - } - }, - "total_flos": 6.68648365344424e+17, - "train_batch_size": 5, - "trial_name": null, - "trial_params": null -} diff --git a/sft/smoe_perturbed/training_args.bin b/sft/smoe_perturbed/training_args.bin deleted file mode 100644 index 1c9fe5a235ef9bbe54066647f1465ca4d56cfc27..0000000000000000000000000000000000000000 --- a/sft/smoe_perturbed/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5535ab3f5cdfe493c63007a35f0c6db959c4db6a57a9b1bffbca65f253ccf5d6 -size 8184 diff --git a/sft_full/hyperrouter/added_tokens.json b/sft_full/hyperrouter/added_tokens.json deleted file mode 100644 index c9d3d3a1b74d87e381e471f7b33784015d2dc0ea..0000000000000000000000000000000000000000 --- a/sft_full/hyperrouter/added_tokens.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "<|assistant|>": 32001, - "<|endoftext|>": 32000, - "<|end|>": 32007, - "<|placeholder1|>": 32002, - "<|placeholder2|>": 32003, - "<|placeholder3|>": 32004, - "<|placeholder4|>": 32005, - "<|placeholder5|>": 32008, - "<|placeholder6|>": 32009, - "<|system|>": 32006, - "<|user|>": 32010 -} diff --git a/sft_full/hyperrouter/config.json b/sft_full/hyperrouter/config.json deleted file mode 100644 index ed0f12ebaccca05d42b7565ebdfedd92efd6bcbb..0000000000000000000000000000000000000000 --- a/sft_full/hyperrouter/config.json +++ /dev/null @@ -1,68 +0,0 @@ -{ - "_name_or_path": "/cm/archive/thongdt4/toolkitmoe/checkpoints/phi3mini-clip/pft", - "architectures": [ - "LlavaPhiForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM" - }, - "balance_loss_coef": 0.1, - "bos_token_id": 1, - "clip_smoe": true, - "dropout": false, - "embd_pdrop": 0.0, - "eos_token_id": 32000, - "freeze_mm_mlp_adapter": false, - "hidden_act": "silu", - "hidden_size": 3072, - "image_aspect_ratio": "pad", - "initializer_range": 0.02, - "intermediate_size": 8192, - "local_rank": 0, - "max_position_embeddings": 4096, - "mlp_smoe": true, - "mm_hidden_size": 1024, - "mm_patch_merge_type": "flat", - "mm_projector_lr": null, - "mm_projector_type": "moe", - "mm_use_im_patch_token": false, - "mm_use_im_start_end": false, - "mm_vision_select_feature": "patch", - "mm_vision_select_layer": -2, - "mm_vision_tower": "openai/clip-vit-large-patch14-336", - "model_type": "llava_phi", - "moe_name": "hyperrouter", - "num_attention_heads": 32, - "num_experts": 4, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "num_layers": 3, - "num_selected": 2, - "original_max_position_embeddings": 4096, - "pad_token_id": 32000, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "router_z_loss_coef": 0.01, - "scales": [ - 1, - 3 - ], - "sliding_window": 2047, - "tie_word_embeddings": false, - "tokenizer_model_max_length": 2048, - "tokenizer_padding_side": "right", - "topk_max": 2, - "topk_min": 1, - "torch_dtype": "bfloat16", - "training": true, - "transformers_version": "4.43.2", - "tune_mm_mlp_adapter": false, - "use_cache": false, - "use_mm_proj": true, - "vocab_size": 32064 -} diff --git a/sft_full/hyperrouter/generation_config.json b/sft_full/hyperrouter/generation_config.json deleted file mode 100644 index 3a20824ea777f1ebd11da590160a7209fe3b62c6..0000000000000000000000000000000000000000 --- a/sft_full/hyperrouter/generation_config.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "_from_model_config": true, - "bos_token_id": 1, - "do_sample": true, - "eos_token_id": [ - 32000, - 32001, - 32007 - ], - "pad_token_id": 32000, - "transformers_version": "4.43.2" -} diff --git a/sft_full/hyperrouter/model-00001-of-00002.safetensors b/sft_full/hyperrouter/model-00001-of-00002.safetensors deleted file mode 100644 index b28de8f8493cfbae9aab5eb97c9a3f0767c91b6b..0000000000000000000000000000000000000000 --- a/sft_full/hyperrouter/model-00001-of-00002.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2a8de169a1222eb2f83724b9ef9f5105179e3d943a2be44fa5907bb9ba038d30 -size 4972489328 diff --git a/sft_full/hyperrouter/model-00002-of-00002.safetensors b/sft_full/hyperrouter/model-00002-of-00002.safetensors deleted file mode 100644 index ecd92778db0031ea56a958c0e482b89f8eb3f754..0000000000000000000000000000000000000000 --- a/sft_full/hyperrouter/model-00002-of-00002.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:36d422a63edd26805cdf482c257ff3af4112f76769e0d1a414cece2133693933 -size 4685134224 diff --git a/sft_full/hyperrouter/model.safetensors.index.json b/sft_full/hyperrouter/model.safetensors.index.json deleted file mode 100644 index aaf7fa0ba3b7b308cc8c6401d2359bf16720b6a4..0000000000000000000000000000000000000000 --- a/sft_full/hyperrouter/model.safetensors.index.json +++ /dev/null @@ -1,1020 +0,0 @@ -{ - "metadata": { - "total_size": 9657478344 - }, - "weight_map": { - "lm_head.weight": "model-00002-of-00002.safetensors", - "model.embed_tokens.weight": "model-00001-of-00002.safetensors", - "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.0.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.0.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.1.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.1.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.10.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.10.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.11.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.11.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.12.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.12.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.13.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.13.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.14.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.14.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.15.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.15.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.16.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.16.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.17.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.17.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.18.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.18.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.19.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.19.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.2.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.2.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.20.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.20.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.20.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.20.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.20.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.21.mlp.gate_up_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.21.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.21.self_attn.qkv_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.22.mlp.gate_up_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.22.self_attn.qkv_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.23.mlp.gate_up_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.23.self_attn.qkv_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.24.mlp.gate_up_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.24.self_attn.qkv_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.25.mlp.gate_up_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.25.self_attn.qkv_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.26.mlp.gate_up_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.26.self_attn.qkv_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.27.mlp.gate_up_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.27.self_attn.qkv_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.28.input_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.28.mlp.down_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.28.mlp.gate_up_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.28.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.28.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.28.self_attn.qkv_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.29.input_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.29.mlp.down_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.29.mlp.gate_up_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.29.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.29.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.29.self_attn.qkv_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.3.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.3.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.30.mlp.down_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.30.mlp.gate_up_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.30.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.30.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.30.self_attn.qkv_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.31.mlp.down_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.31.mlp.gate_up_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", - "model.layers.31.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.31.self_attn.qkv_proj.weight": "model-00002-of-00002.safetensors", - "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.4.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.4.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.5.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.5.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.6.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.6.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.7.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.7.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.8.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.8.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.9.mlp.gate_up_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", - "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", - "model.layers.9.self_attn.qkv_proj.weight": "model-00001-of-00002.safetensors", - "model.mm_projector.moelayer.experts.0.0.bias": "model-00002-of-00002.safetensors", - "model.mm_projector.moelayer.experts.0.0.weight": "model-00002-of-00002.safetensors", - "model.mm_projector.moelayer.experts.0.2.bias": "model-00002-of-00002.safetensors", - "model.mm_projector.moelayer.experts.0.2.weight": "model-00002-of-00002.safetensors", - "model.mm_projector.moelayer.experts.1.0.bias": "model-00002-of-00002.safetensors", - "model.mm_projector.moelayer.experts.1.0.weight": "model-00002-of-00002.safetensors", - "model.mm_projector.moelayer.experts.1.2.bias": "model-00002-of-00002.safetensors", - "model.mm_projector.moelayer.experts.1.2.weight": "model-00002-of-00002.safetensors", - "model.mm_projector.moelayer.experts.2.0.bias": "model-00002-of-00002.safetensors", - "model.mm_projector.moelayer.experts.2.0.weight": "model-00002-of-00002.safetensors", - "model.mm_projector.moelayer.experts.2.2.bias": "model-00002-of-00002.safetensors", - "model.mm_projector.moelayer.experts.2.2.weight": "model-00002-of-00002.safetensors", - "model.mm_projector.moelayer.experts.3.0.bias": "model-00002-of-00002.safetensors", - "model.mm_projector.moelayer.experts.3.0.weight": "model-00002-of-00002.safetensors", - "model.mm_projector.moelayer.experts.3.2.bias": "model-00002-of-00002.safetensors", - "model.mm_projector.moelayer.experts.3.2.weight": "model-00002-of-00002.safetensors", - "model.mm_projector.moelayer.hyper_embedding": "model-00002-of-00002.safetensors", - "model.mm_projector.moelayer.hypernet.0.bias": "model-00002-of-00002.safetensors", - "model.mm_projector.moelayer.hypernet.0.weight": "model-00002-of-00002.safetensors", - "model.mm_projector.moelayer.hypernet.2.bias": "model-00002-of-00002.safetensors", - "model.mm_projector.moelayer.hypernet.2.weight": "model-00002-of-00002.safetensors", - "model.norm.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.embeddings.class_embedding": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.embeddings.patch_embedding.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.embeddings.position_embedding.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.0.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.0.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.0.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.0.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.1.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.1.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.1.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.1.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.2.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.2.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.2.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.2.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.3.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.3.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.3.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.3.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.hyper_embedding": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.hypernet.0.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.hypernet.0.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.hypernet.2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.hypernet.2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.0.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.0.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.0.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.0.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.1.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.1.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.1.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.1.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.2.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.2.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.2.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.2.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.3.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.3.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.3.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.3.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.hyper_embedding": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.hypernet.0.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.hypernet.0.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.hypernet.2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.hypernet.2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.0.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.0.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.0.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.0.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.1.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.1.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.1.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.1.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.2.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.2.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.2.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.2.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.3.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.3.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.3.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.3.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.hyper_embedding": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.hypernet.0.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.hypernet.0.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.hypernet.2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.hypernet.2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.0.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.0.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.0.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.0.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.1.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.1.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.1.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.1.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.2.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.2.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.2.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.2.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.3.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.3.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.3.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.3.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.hyper_embedding": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.hypernet.0.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.hypernet.0.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.hypernet.2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.hypernet.2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.0.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.0.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.0.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.0.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.1.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.1.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.1.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.1.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.2.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.2.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.2.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.2.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.3.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.3.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.3.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.3.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.hyper_embedding": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.hypernet.0.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.hypernet.0.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.hypernet.2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.hypernet.2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.0.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.0.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.0.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.0.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.1.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.1.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.1.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.1.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.2.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.2.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.2.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.2.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.3.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.3.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.3.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.3.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.hyper_embedding": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.hypernet.0.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.hypernet.0.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.hypernet.2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.hypernet.2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.0.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.0.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.0.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.0.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.1.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.1.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.1.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.1.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.2.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.2.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.2.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.2.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.3.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.3.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.3.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.3.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.hyper_embedding": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.hypernet.0.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.hypernet.0.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.hypernet.2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.hypernet.2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.0.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.0.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.0.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.0.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.1.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.1.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.1.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.1.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.2.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.2.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.2.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.2.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.3.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.3.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.3.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.3.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.hyper_embedding": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.hypernet.0.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.hypernet.0.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.hypernet.2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.hypernet.2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.0.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.0.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.0.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.0.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.1.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.1.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.1.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.1.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.2.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.2.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.2.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.2.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.3.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.3.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.3.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.3.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.hyper_embedding": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.hypernet.0.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.hypernet.0.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.hypernet.2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.hypernet.2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.0.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.0.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.0.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.0.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.1.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.1.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.1.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.1.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.2.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.2.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.2.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.2.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.3.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.3.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.3.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.3.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.hyper_embedding": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.hypernet.0.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.hypernet.0.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.hypernet.2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.hypernet.2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.0.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.0.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.0.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.0.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.1.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.1.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.1.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.1.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.2.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.2.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.2.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.2.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.3.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.3.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.3.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.3.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.hyper_embedding": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.hypernet.0.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.hypernet.0.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.hypernet.2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.hypernet.2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.0.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.0.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.0.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.0.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.1.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.1.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.1.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.1.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.2.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.2.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.2.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.2.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.3.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.3.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.3.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.3.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.hyper_embedding": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.hypernet.0.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.hypernet.0.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.hypernet.2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.hypernet.2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.0.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.0.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.0.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.0.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.1.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.1.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.1.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.1.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.2.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.2.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.2.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.2.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.3.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.3.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.3.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.3.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.hyper_embedding": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.hypernet.0.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.hypernet.0.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.hypernet.2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.hypernet.2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.0.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.0.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.0.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.0.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.1.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.1.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.1.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.1.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.2.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.2.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.2.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.2.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.3.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.3.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.3.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.3.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.hyper_embedding": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.hypernet.0.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.hypernet.0.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.hypernet.2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.hypernet.2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.0.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.0.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.0.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.0.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.1.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.1.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.1.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.1.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.2.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.2.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.2.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.2.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.3.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.3.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.3.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.3.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.hyper_embedding": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.hypernet.0.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.hypernet.0.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.hypernet.2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.hypernet.2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.0.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.0.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.0.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.0.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.1.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.1.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.1.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.1.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.2.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.2.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.2.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.2.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.3.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.3.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.3.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.3.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.hyper_embedding": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.hypernet.0.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.hypernet.0.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.hypernet.2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.hypernet.2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.0.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.0.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.0.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.0.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.1.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.1.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.1.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.1.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.2.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.2.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.2.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.2.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.3.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.3.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.3.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.3.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.hyper_embedding": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.hypernet.0.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.hypernet.0.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.hypernet.2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.hypernet.2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.0.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.0.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.0.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.0.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.1.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.1.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.1.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.1.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.2.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.2.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.2.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.2.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.3.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.3.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.3.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.3.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.hyper_embedding": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.hypernet.0.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.hypernet.0.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.hypernet.2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.hypernet.2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.0.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.0.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.0.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.0.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.1.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.1.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.1.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.1.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.2.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.2.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.2.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.2.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.3.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.3.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.3.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.3.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.hyper_embedding": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.hypernet.0.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.hypernet.0.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.hypernet.2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.hypernet.2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.0.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.0.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.0.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.0.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.1.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.1.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.1.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.1.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.2.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.2.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.2.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.2.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.3.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.3.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.3.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.3.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.hyper_embedding": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.hypernet.0.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.hypernet.0.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.hypernet.2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.hypernet.2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.0.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.0.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.0.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.0.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.1.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.1.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.1.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.1.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.2.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.2.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.2.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.2.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.3.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.3.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.3.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.3.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.hyper_embedding": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.hypernet.0.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.hypernet.0.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.hypernet.2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.hypernet.2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.0.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.0.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.0.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.0.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.1.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.1.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.1.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.1.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.2.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.2.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.2.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.2.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.3.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.3.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.3.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.3.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.hyper_embedding": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.hypernet.0.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.hypernet.0.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.hypernet.2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.hypernet.2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.0.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.0.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.0.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.0.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.1.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.1.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.1.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.1.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.2.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.2.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.2.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.2.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.3.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.3.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.3.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.3.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.hyper_embedding": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.hypernet.0.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.hypernet.0.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.hypernet.2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.hypernet.2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.0.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.0.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.0.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.0.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.1.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.1.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.1.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.1.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.2.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.2.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.2.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.2.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.3.fc1.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.3.fc1.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.3.fc2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.3.fc2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.hyper_embedding": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.hypernet.0.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.hypernet.0.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.hypernet.2.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.hypernet.2.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.pre_layrnorm.bias": "model-00002-of-00002.safetensors", - "model.vision_tower.vision_model.pre_layrnorm.weight": "model-00002-of-00002.safetensors" - } -} diff --git a/sft_full/hyperrouter/special_tokens_map.json b/sft_full/hyperrouter/special_tokens_map.json deleted file mode 100644 index 3e4d5a5bc1cb51753cc9ae0305ece0da60052b10..0000000000000000000000000000000000000000 --- a/sft_full/hyperrouter/special_tokens_map.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "bos_token": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - }, - "eos_token": { - "content": "<|endoftext|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - }, - "pad_token": "", - "unk_token": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - } -} diff --git a/sft_full/hyperrouter/tokenizer.model b/sft_full/hyperrouter/tokenizer.model deleted file mode 100644 index 6c00c742ce03c627d6cd5b795984876fa49fa899..0000000000000000000000000000000000000000 --- a/sft_full/hyperrouter/tokenizer.model +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 -size 499723 diff --git a/sft_full/hyperrouter/tokenizer_config.json b/sft_full/hyperrouter/tokenizer_config.json deleted file mode 100644 index 3bd56c6314b14d6a33a69cd1802e04dbc1e47840..0000000000000000000000000000000000000000 --- a/sft_full/hyperrouter/tokenizer_config.json +++ /dev/null @@ -1,132 +0,0 @@ -{ - "add_bos_token": true, - "add_eos_token": false, - "add_prefix_space": true, - "added_tokens_decoder": { - "0": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "1": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "2": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": false - }, - "32000": { - "content": "<|endoftext|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "32001": { - "content": "<|assistant|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32002": { - "content": "<|placeholder1|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32003": { - "content": "<|placeholder2|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32004": { - "content": "<|placeholder3|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32005": { - "content": "<|placeholder4|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32006": { - "content": "<|system|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32007": { - "content": "<|end|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32008": { - "content": "<|placeholder5|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32009": { - "content": "<|placeholder6|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32010": { - "content": "<|user|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - } - }, - "bos_token": "", - "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}", - "clean_up_tokenization_spaces": false, - "eos_token": "<|endoftext|>", - "legacy": false, - "model_max_length": 2048, - "pad_token": "", - "padding_side": "right", - "sp_model_kwargs": {}, - "spaces_between_special_tokens": false, - "tokenizer_class": "LlamaTokenizer", - "unk_token": "", - "use_default_system_prompt": false -} diff --git a/sft_full/hyperrouter/trainer_state.json b/sft_full/hyperrouter/trainer_state.json deleted file mode 100644 index 6e7003a8ee55917577e12a41f39689988d4417df..0000000000000000000000000000000000000000 --- a/sft_full/hyperrouter/trainer_state.json +++ /dev/null @@ -1,77658 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.9999549082382648, - "eval_steps": 500, - "global_step": 11088, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 9.018352347026198e-05, - "grad_norm": 21.170219463639764, - "learning_rate": 0.0, - "loss": 1.8276, - "step": 1 - }, - { - "epoch": 0.00018036704694052397, - "grad_norm": 17.21192954897353, - "learning_rate": 4.773623799730706e-07, - "loss": 1.7923, - "step": 2 - }, - { - "epoch": 0.0002705505704107859, - "grad_norm": 13.285309581774666, - "learning_rate": 7.566014715123208e-07, - "loss": 1.7026, - "step": 3 - }, - { - "epoch": 0.00036073409388104793, - "grad_norm": 16.88982367577673, - "learning_rate": 9.547247599461412e-07, - "loss": 1.8875, - "step": 4 - }, - { - "epoch": 0.0004509176173513099, - "grad_norm": 14.895143349768617, - "learning_rate": 1.108401121501769e-06, - "loss": 1.5898, - "step": 5 - }, - { - "epoch": 0.0005411011408215718, - "grad_norm": 13.98419277988823, - "learning_rate": 1.2339638514853914e-06, - "loss": 1.7509, - "step": 6 - }, - { - "epoch": 0.0006312846642918339, - "grad_norm": 12.585092451068968, - "learning_rate": 1.3401256270225321e-06, - "loss": 1.5623, - "step": 7 - }, - { - "epoch": 0.0007214681877620959, - "grad_norm": 10.430589775461637, - "learning_rate": 1.4320871399192119e-06, - "loss": 1.5204, - "step": 8 - }, - { - "epoch": 0.0008116517112323579, - "grad_norm": 7.855105019394881, - "learning_rate": 1.5132029430246416e-06, - "loss": 1.4845, - "step": 9 - }, - { - "epoch": 0.0009018352347026198, - "grad_norm": 8.35117926579202, - "learning_rate": 1.5857635014748399e-06, - "loss": 1.5665, - "step": 10 - }, - { - "epoch": 0.0009920187581728818, - "grad_norm": 8.585051528162829, - "learning_rate": 1.6514025108267924e-06, - "loss": 1.6114, - "step": 11 - }, - { - "epoch": 0.0010822022816431437, - "grad_norm": 7.432888877593314, - "learning_rate": 1.711326231458462e-06, - "loss": 1.5641, - "step": 12 - }, - { - "epoch": 0.0011723858051134058, - "grad_norm": 6.081750254427303, - "learning_rate": 1.7664507107987104e-06, - "loss": 1.4342, - "step": 13 - }, - { - "epoch": 0.0012625693285836677, - "grad_norm": 5.4376254012457, - "learning_rate": 1.8174880069956024e-06, - "loss": 1.4224, - "step": 14 - }, - { - "epoch": 0.0013527528520539298, - "grad_norm": 5.794374816190099, - "learning_rate": 1.8650025930140899e-06, - "loss": 1.3875, - "step": 15 - }, - { - "epoch": 0.0014429363755241917, - "grad_norm": 5.226310689701959, - "learning_rate": 1.9094495198922823e-06, - "loss": 1.4842, - "step": 16 - }, - { - "epoch": 0.0015331198989944536, - "grad_norm": 11.693238507027322, - "learning_rate": 1.9512009899507514e-06, - "loss": 1.3464, - "step": 17 - }, - { - "epoch": 0.0016233034224647158, - "grad_norm": 4.5572272363377015, - "learning_rate": 1.990565322997712e-06, - "loss": 1.3952, - "step": 18 - }, - { - "epoch": 0.0017134869459349777, - "grad_norm": 4.325581929301491, - "learning_rate": 2.027800787770518e-06, - "loss": 1.307, - "step": 19 - }, - { - "epoch": 0.0018036704694052396, - "grad_norm": 4.832334985338088, - "learning_rate": 2.06312588144791e-06, - "loss": 1.3976, - "step": 20 - }, - { - "epoch": 0.0018938539928755017, - "grad_norm": 3.24478773154586, - "learning_rate": 2.0967270985348526e-06, - "loss": 1.3244, - "step": 21 - }, - { - "epoch": 0.0019840375163457636, - "grad_norm": 2.8793462988928153, - "learning_rate": 2.128764890799863e-06, - "loss": 1.2159, - "step": 22 - }, - { - "epoch": 0.0020742210398160257, - "grad_norm": 2.752098323329749, - "learning_rate": 2.1593783012990145e-06, - "loss": 1.2168, - "step": 23 - }, - { - "epoch": 0.0021644045632862874, - "grad_norm": 2.668359507238368, - "learning_rate": 2.188688611431533e-06, - "loss": 1.3579, - "step": 24 - }, - { - "epoch": 0.0022545880867565495, - "grad_norm": 2.6237738334078515, - "learning_rate": 2.216802243003538e-06, - "loss": 1.3326, - "step": 25 - }, - { - "epoch": 0.0023447716102268116, - "grad_norm": 6.715751511139688, - "learning_rate": 2.243813090771781e-06, - "loss": 1.1712, - "step": 26 - }, - { - "epoch": 0.0024349551336970737, - "grad_norm": 2.4729884016456833, - "learning_rate": 2.269804414536962e-06, - "loss": 1.3425, - "step": 27 - }, - { - "epoch": 0.0025251386571673354, - "grad_norm": 2.3819258243981003, - "learning_rate": 2.2948503869686733e-06, - "loss": 1.2417, - "step": 28 - }, - { - "epoch": 0.0026153221806375975, - "grad_norm": 3.2332916656903015, - "learning_rate": 2.3190173696980436e-06, - "loss": 0.9887, - "step": 29 - }, - { - "epoch": 0.0027055057041078597, - "grad_norm": 2.8560077455987267, - "learning_rate": 2.3423649729871604e-06, - "loss": 1.2477, - "step": 30 - }, - { - "epoch": 0.0027956892275781214, - "grad_norm": 2.048173216088377, - "learning_rate": 2.364946941580084e-06, - "loss": 1.2309, - "step": 31 - }, - { - "epoch": 0.0028858727510483835, - "grad_norm": 2.1344144624829196, - "learning_rate": 2.3868118998653532e-06, - "loss": 1.2913, - "step": 32 - }, - { - "epoch": 0.0029760562745186456, - "grad_norm": 2.5640359322742357, - "learning_rate": 2.408003982339113e-06, - "loss": 0.9405, - "step": 33 - }, - { - "epoch": 0.0030662397979889073, - "grad_norm": 1.559340983843509, - "learning_rate": 2.4285633699238223e-06, - "loss": 1.1402, - "step": 34 - }, - { - "epoch": 0.0031564233214591694, - "grad_norm": 2.7889137548965146, - "learning_rate": 2.4485267485243007e-06, - "loss": 1.1781, - "step": 35 - }, - { - "epoch": 0.0032466068449294315, - "grad_norm": 1.74388053979678, - "learning_rate": 2.467927702970783e-06, - "loss": 1.2152, - "step": 36 - }, - { - "epoch": 0.003336790368399693, - "grad_norm": 1.6500901168720539, - "learning_rate": 2.4867970569753584e-06, - "loss": 1.2407, - "step": 37 - }, - { - "epoch": 0.0034269738918699553, - "grad_norm": 2.1663942410300945, - "learning_rate": 2.5051631677435883e-06, - "loss": 1.0715, - "step": 38 - }, - { - "epoch": 0.0035171574153402174, - "grad_norm": 2.3965601827481176, - "learning_rate": 2.523052182311031e-06, - "loss": 1.0502, - "step": 39 - }, - { - "epoch": 0.003607340938810479, - "grad_norm": 1.971322997101687, - "learning_rate": 2.540488261420981e-06, - "loss": 1.2499, - "step": 40 - }, - { - "epoch": 0.0036975244622807412, - "grad_norm": 2.1633839559093606, - "learning_rate": 2.557493775753984e-06, - "loss": 1.0498, - "step": 41 - }, - { - "epoch": 0.0037877079857510034, - "grad_norm": 1.6195278463958145, - "learning_rate": 2.5740894785079235e-06, - "loss": 1.161, - "step": 42 - }, - { - "epoch": 0.0038778915092212655, - "grad_norm": 1.8098688430054362, - "learning_rate": 2.5902946576685834e-06, - "loss": 1.1762, - "step": 43 - }, - { - "epoch": 0.003968075032691527, - "grad_norm": 4.901376818860083, - "learning_rate": 2.606127270772933e-06, - "loss": 1.087, - "step": 44 - }, - { - "epoch": 0.004058258556161789, - "grad_norm": 2.551541599204431, - "learning_rate": 2.62160406452641e-06, - "loss": 1.0625, - "step": 45 - }, - { - "epoch": 0.004148442079632051, - "grad_norm": 1.6610367179224976, - "learning_rate": 2.636740681272085e-06, - "loss": 1.2332, - "step": 46 - }, - { - "epoch": 0.004238625603102313, - "grad_norm": 1.8030917556316108, - "learning_rate": 2.651551754008722e-06, - "loss": 1.2694, - "step": 47 - }, - { - "epoch": 0.004328809126572575, - "grad_norm": 1.838919178475389, - "learning_rate": 2.6660509914046035e-06, - "loss": 1.1956, - "step": 48 - }, - { - "epoch": 0.004418992650042837, - "grad_norm": 2.074918524739673, - "learning_rate": 2.6802512540450642e-06, - "loss": 1.1602, - "step": 49 - }, - { - "epoch": 0.004509176173513099, - "grad_norm": 2.5930831235720913, - "learning_rate": 2.694164622976609e-06, - "loss": 1.1561, - "step": 50 - }, - { - "epoch": 0.0045993596969833616, - "grad_norm": 1.7506399347008048, - "learning_rate": 2.707802461463072e-06, - "loss": 1.0856, - "step": 51 - }, - { - "epoch": 0.004689543220453623, - "grad_norm": 2.8608696371683178, - "learning_rate": 2.7211754707448516e-06, - "loss": 1.1257, - "step": 52 - }, - { - "epoch": 0.004779726743923885, - "grad_norm": 1.9772885764392643, - "learning_rate": 2.734293740486721e-06, - "loss": 1.1112, - "step": 53 - }, - { - "epoch": 0.0048699102673941475, - "grad_norm": 1.6386910802286374, - "learning_rate": 2.747166794510033e-06, - "loss": 1.1511, - "step": 54 - }, - { - "epoch": 0.004960093790864409, - "grad_norm": 1.6926446834236977, - "learning_rate": 2.759803632328562e-06, - "loss": 1.116, - "step": 55 - }, - { - "epoch": 0.005050277314334671, - "grad_norm": 1.6577618015026285, - "learning_rate": 2.772212766941744e-06, - "loss": 1.2365, - "step": 56 - }, - { - "epoch": 0.005140460837804933, - "grad_norm": 1.7831745124518548, - "learning_rate": 2.7844022592828385e-06, - "loss": 1.2241, - "step": 57 - }, - { - "epoch": 0.005230644361275195, - "grad_norm": 3.0658078138183305, - "learning_rate": 2.7963797496711145e-06, - "loss": 1.08, - "step": 58 - }, - { - "epoch": 0.005320827884745457, - "grad_norm": 1.6175375297001313, - "learning_rate": 2.80815248657541e-06, - "loss": 1.1551, - "step": 59 - }, - { - "epoch": 0.005411011408215719, - "grad_norm": 1.714716530775366, - "learning_rate": 2.819727352960231e-06, - "loss": 0.8741, - "step": 60 - }, - { - "epoch": 0.005501194931685981, - "grad_norm": 1.636174295770887, - "learning_rate": 2.8311108904541717e-06, - "loss": 1.1187, - "step": 61 - }, - { - "epoch": 0.005591378455156243, - "grad_norm": 2.6418578751575073, - "learning_rate": 2.842309321553155e-06, - "loss": 1.075, - "step": 62 - }, - { - "epoch": 0.005681561978626505, - "grad_norm": 2.475257601962963, - "learning_rate": 2.8533285700471737e-06, - "loss": 1.0575, - "step": 63 - }, - { - "epoch": 0.005771745502096767, - "grad_norm": 1.7318269036951073, - "learning_rate": 2.8641742798384237e-06, - "loss": 1.1899, - "step": 64 - }, - { - "epoch": 0.005861929025567029, - "grad_norm": 1.6579376115967523, - "learning_rate": 2.874851832300479e-06, - "loss": 1.1446, - "step": 65 - }, - { - "epoch": 0.005952112549037291, - "grad_norm": 2.125529952443708, - "learning_rate": 2.8853663623121834e-06, - "loss": 1.1442, - "step": 66 - }, - { - "epoch": 0.006042296072507553, - "grad_norm": 1.7135125556799695, - "learning_rate": 2.895722773085839e-06, - "loss": 1.1811, - "step": 67 - }, - { - "epoch": 0.0061324795959778146, - "grad_norm": 1.5767605614611628, - "learning_rate": 2.905925749896893e-06, - "loss": 1.1403, - "step": 68 - }, - { - "epoch": 0.006222663119448077, - "grad_norm": 2.587374653751743, - "learning_rate": 2.915979772811335e-06, - "loss": 1.1291, - "step": 69 - }, - { - "epoch": 0.006312846642918339, - "grad_norm": 1.7839732183746968, - "learning_rate": 2.925889128497372e-06, - "loss": 1.0869, - "step": 70 - }, - { - "epoch": 0.0064030301663886005, - "grad_norm": 2.043890254723098, - "learning_rate": 2.9356579211992906e-06, - "loss": 1.1788, - "step": 71 - }, - { - "epoch": 0.006493213689858863, - "grad_norm": 2.1693749618420366, - "learning_rate": 2.9452900829438533e-06, - "loss": 1.0639, - "step": 72 - }, - { - "epoch": 0.006583397213329125, - "grad_norm": 2.655974878761113, - "learning_rate": 2.954789383042727e-06, - "loss": 1.0918, - "step": 73 - }, - { - "epoch": 0.006673580736799386, - "grad_norm": 2.236138135010916, - "learning_rate": 2.9641594369484293e-06, - "loss": 0.9273, - "step": 74 - }, - { - "epoch": 0.006763764260269649, - "grad_norm": 1.7544529663852304, - "learning_rate": 2.9734037145158586e-06, - "loss": 1.1183, - "step": 75 - }, - { - "epoch": 0.006853947783739911, - "grad_norm": 1.5527814366148347, - "learning_rate": 2.982525547716659e-06, - "loss": 1.1153, - "step": 76 - }, - { - "epoch": 0.006944131307210172, - "grad_norm": 2.5562932278958304, - "learning_rate": 2.9915281378493246e-06, - "loss": 1.0873, - "step": 77 - }, - { - "epoch": 0.007034314830680435, - "grad_norm": 1.81038180260255, - "learning_rate": 3.000414562284102e-06, - "loss": 1.1075, - "step": 78 - }, - { - "epoch": 0.0071244983541506966, - "grad_norm": 1.5449719523380845, - "learning_rate": 3.009187780778246e-06, - "loss": 1.1048, - "step": 79 - }, - { - "epoch": 0.007214681877620958, - "grad_norm": 2.082930322449585, - "learning_rate": 3.017850641394051e-06, - "loss": 1.1023, - "step": 80 - }, - { - "epoch": 0.007304865401091221, - "grad_norm": 1.598051810321073, - "learning_rate": 3.0264058860492832e-06, - "loss": 0.9743, - "step": 81 - }, - { - "epoch": 0.0073950489245614825, - "grad_norm": 1.8226807214538752, - "learning_rate": 3.0348561557270548e-06, - "loss": 1.1542, - "step": 82 - }, - { - "epoch": 0.007485232448031745, - "grad_norm": 1.1297221156049564, - "learning_rate": 3.043203995369939e-06, - "loss": 0.8781, - "step": 83 - }, - { - "epoch": 0.007575415971502007, - "grad_norm": 1.6828497639021733, - "learning_rate": 3.051451858480994e-06, - "loss": 1.1895, - "step": 84 - }, - { - "epoch": 0.007665599494972268, - "grad_norm": 1.9237388319022233, - "learning_rate": 3.05960211145252e-06, - "loss": 1.0222, - "step": 85 - }, - { - "epoch": 0.007755783018442531, - "grad_norm": 1.5234464204967195, - "learning_rate": 3.0676570376416543e-06, - "loss": 1.025, - "step": 86 - }, - { - "epoch": 0.007845966541912792, - "grad_norm": 6.72294478653389, - "learning_rate": 3.0756188412103647e-06, - "loss": 1.1251, - "step": 87 - }, - { - "epoch": 0.007936150065383054, - "grad_norm": 1.8749279008354691, - "learning_rate": 3.083489650746004e-06, - "loss": 1.1083, - "step": 88 - }, - { - "epoch": 0.008026333588853317, - "grad_norm": 1.6822541963953226, - "learning_rate": 3.0912715226772975e-06, - "loss": 1.1212, - "step": 89 - }, - { - "epoch": 0.008116517112323578, - "grad_norm": 1.4215376736763872, - "learning_rate": 3.098966444499481e-06, - "loss": 1.0315, - "step": 90 - }, - { - "epoch": 0.00820670063579384, - "grad_norm": 1.5550247670007438, - "learning_rate": 3.1065763378212426e-06, - "loss": 1.1088, - "step": 91 - }, - { - "epoch": 0.008296884159264103, - "grad_norm": 1.397215709055961, - "learning_rate": 3.1141030612451554e-06, - "loss": 1.1056, - "step": 92 - }, - { - "epoch": 0.008387067682734364, - "grad_norm": 1.1073285421772685, - "learning_rate": 3.1215484130924052e-06, - "loss": 0.9232, - "step": 93 - }, - { - "epoch": 0.008477251206204626, - "grad_norm": 1.910456783937029, - "learning_rate": 3.128914133981793e-06, - "loss": 1.1109, - "step": 94 - }, - { - "epoch": 0.008567434729674889, - "grad_norm": 1.8844336024545774, - "learning_rate": 3.136201909272287e-06, - "loss": 1.1182, - "step": 95 - }, - { - "epoch": 0.00865761825314515, - "grad_norm": 1.6988340446560248, - "learning_rate": 3.1434133713776735e-06, - "loss": 1.0663, - "step": 96 - }, - { - "epoch": 0.008747801776615412, - "grad_norm": 1.7506502850635994, - "learning_rate": 3.15055010196128e-06, - "loss": 1.1, - "step": 97 - }, - { - "epoch": 0.008837985300085675, - "grad_norm": 1.762627276431094, - "learning_rate": 3.157613634018135e-06, - "loss": 1.0864, - "step": 98 - }, - { - "epoch": 0.008928168823555935, - "grad_norm": 1.6187787911347509, - "learning_rate": 3.1646054538514336e-06, - "loss": 1.1117, - "step": 99 - }, - { - "epoch": 0.009018352347026198, - "grad_norm": 1.692721588865366, - "learning_rate": 3.1715270029496797e-06, - "loss": 1.1479, - "step": 100 - }, - { - "epoch": 0.00910853587049646, - "grad_norm": 1.4418135070254883, - "learning_rate": 3.1783796797704243e-06, - "loss": 1.125, - "step": 101 - }, - { - "epoch": 0.009198719393966723, - "grad_norm": 1.724500641899841, - "learning_rate": 3.185164841436142e-06, - "loss": 1.0539, - "step": 102 - }, - { - "epoch": 0.009288902917436984, - "grad_norm": 5.928839219524071, - "learning_rate": 3.1918838053473723e-06, - "loss": 1.0886, - "step": 103 - }, - { - "epoch": 0.009379086440907246, - "grad_norm": 1.4822591860713812, - "learning_rate": 3.198537850717922e-06, - "loss": 1.0013, - "step": 104 - }, - { - "epoch": 0.009469269964377509, - "grad_norm": 1.6880612323104696, - "learning_rate": 3.205128220036622e-06, - "loss": 1.0492, - "step": 105 - }, - { - "epoch": 0.00955945348784777, - "grad_norm": 1.6470370015734321, - "learning_rate": 3.2116561204597917e-06, - "loss": 1.0404, - "step": 106 - }, - { - "epoch": 0.009649637011318032, - "grad_norm": 1.0533120688171174, - "learning_rate": 3.218122725138335e-06, - "loss": 0.9136, - "step": 107 - }, - { - "epoch": 0.009739820534788295, - "grad_norm": 1.8607872480308119, - "learning_rate": 3.224529174483104e-06, - "loss": 1.0818, - "step": 108 - }, - { - "epoch": 0.009830004058258556, - "grad_norm": 3.175430484934041, - "learning_rate": 3.2308765773719435e-06, - "loss": 1.0216, - "step": 109 - }, - { - "epoch": 0.009920187581728818, - "grad_norm": 1.6493098405035027, - "learning_rate": 3.2371660123016323e-06, - "loss": 1.0798, - "step": 110 - }, - { - "epoch": 0.010010371105199081, - "grad_norm": 1.6798213461843006, - "learning_rate": 3.2433985284876787e-06, - "loss": 1.0206, - "step": 111 - }, - { - "epoch": 0.010100554628669342, - "grad_norm": 1.435254457754878, - "learning_rate": 3.2495751469148143e-06, - "loss": 1.0744, - "step": 112 - }, - { - "epoch": 0.010190738152139604, - "grad_norm": 2.168186925958746, - "learning_rate": 3.2556968613407816e-06, - "loss": 1.0506, - "step": 113 - }, - { - "epoch": 0.010280921675609867, - "grad_norm": 1.9154769342398406, - "learning_rate": 3.2617646392559094e-06, - "loss": 1.1113, - "step": 114 - }, - { - "epoch": 0.010371105199080128, - "grad_norm": 1.682423124895236, - "learning_rate": 3.2677794228007836e-06, - "loss": 1.1215, - "step": 115 - }, - { - "epoch": 0.01046128872255039, - "grad_norm": 1.6484865298821465, - "learning_rate": 3.273742129644185e-06, - "loss": 1.0561, - "step": 116 - }, - { - "epoch": 0.010551472246020653, - "grad_norm": 2.1790330786327865, - "learning_rate": 3.279653653823352e-06, - "loss": 1.0779, - "step": 117 - }, - { - "epoch": 0.010641655769490914, - "grad_norm": 2.111853379824456, - "learning_rate": 3.285514866548481e-06, - "loss": 1.0814, - "step": 118 - }, - { - "epoch": 0.010731839292961176, - "grad_norm": 1.7221917804297266, - "learning_rate": 3.2913266169732838e-06, - "loss": 1.1411, - "step": 119 - }, - { - "epoch": 0.010822022816431439, - "grad_norm": 1.8901344482597446, - "learning_rate": 3.2970897329333017e-06, - "loss": 1.0576, - "step": 120 - }, - { - "epoch": 0.0109122063399017, - "grad_norm": 2.091303064524707, - "learning_rate": 3.302805021653585e-06, - "loss": 1.0548, - "step": 121 - }, - { - "epoch": 0.011002389863371962, - "grad_norm": 3.431897122866371, - "learning_rate": 3.3084732704272426e-06, - "loss": 1.0703, - "step": 122 - }, - { - "epoch": 0.011092573386842225, - "grad_norm": 1.777451262523494, - "learning_rate": 3.314095247266304e-06, - "loss": 1.0654, - "step": 123 - }, - { - "epoch": 0.011182756910312485, - "grad_norm": 1.8917198478964625, - "learning_rate": 3.3196717015262255e-06, - "loss": 1.0389, - "step": 124 - }, - { - "epoch": 0.011272940433782748, - "grad_norm": 1.4929147116694141, - "learning_rate": 3.325203364505307e-06, - "loss": 1.0603, - "step": 125 - }, - { - "epoch": 0.01136312395725301, - "grad_norm": 1.930060408541216, - "learning_rate": 3.3306909500202442e-06, - "loss": 1.1043, - "step": 126 - }, - { - "epoch": 0.011453307480723271, - "grad_norm": 6.212354582345338, - "learning_rate": 3.3361351549589145e-06, - "loss": 1.1162, - "step": 127 - }, - { - "epoch": 0.011543491004193534, - "grad_norm": 2.0890753643635556, - "learning_rate": 3.341536659811494e-06, - "loss": 1.0105, - "step": 128 - }, - { - "epoch": 0.011633674527663796, - "grad_norm": 1.8031356546758228, - "learning_rate": 3.346896129180904e-06, - "loss": 1.0979, - "step": 129 - }, - { - "epoch": 0.011723858051134057, - "grad_norm": 1.9426061963051955, - "learning_rate": 3.35221421227355e-06, - "loss": 1.0705, - "step": 130 - }, - { - "epoch": 0.01181404157460432, - "grad_norm": 2.4892898024744174, - "learning_rate": 3.357491543371255e-06, - "loss": 1.0902, - "step": 131 - }, - { - "epoch": 0.011904225098074582, - "grad_norm": 1.6720346392415464, - "learning_rate": 3.3627287422852543e-06, - "loss": 1.0247, - "step": 132 - }, - { - "epoch": 0.011994408621544843, - "grad_norm": 1.4397182763869287, - "learning_rate": 3.3679264147930497e-06, - "loss": 1.0942, - "step": 133 - }, - { - "epoch": 0.012084592145015106, - "grad_norm": 2.509507411631941, - "learning_rate": 3.37308515305891e-06, - "loss": 1.0142, - "step": 134 - }, - { - "epoch": 0.012174775668485368, - "grad_norm": 1.5315699340415727, - "learning_rate": 3.3782055360387313e-06, - "loss": 1.1326, - "step": 135 - }, - { - "epoch": 0.012264959191955629, - "grad_norm": 1.7417664297026276, - "learning_rate": 3.3832881298699633e-06, - "loss": 1.0333, - "step": 136 - }, - { - "epoch": 0.012355142715425892, - "grad_norm": 1.529142837269449, - "learning_rate": 3.388333488247249e-06, - "loss": 1.0477, - "step": 137 - }, - { - "epoch": 0.012445326238896154, - "grad_norm": 1.538770928817708, - "learning_rate": 3.393342152784406e-06, - "loss": 1.1213, - "step": 138 - }, - { - "epoch": 0.012535509762366415, - "grad_norm": 1.8687553686364669, - "learning_rate": 3.3983146533633376e-06, - "loss": 1.089, - "step": 139 - }, - { - "epoch": 0.012625693285836678, - "grad_norm": 3.9165620722230576, - "learning_rate": 3.403251508470442e-06, - "loss": 1.1253, - "step": 140 - }, - { - "epoch": 0.01271587680930694, - "grad_norm": 1.7636801887392297, - "learning_rate": 3.408153225521043e-06, - "loss": 1.0245, - "step": 141 - }, - { - "epoch": 0.012806060332777201, - "grad_norm": 1.933952714371572, - "learning_rate": 3.413020301172361e-06, - "loss": 0.969, - "step": 142 - }, - { - "epoch": 0.012896243856247463, - "grad_norm": 1.5497936880566268, - "learning_rate": 3.4178532216255024e-06, - "loss": 1.0489, - "step": 143 - }, - { - "epoch": 0.012986427379717726, - "grad_norm": 1.9699747423789378, - "learning_rate": 3.422652462916924e-06, - "loss": 1.1052, - "step": 144 - }, - { - "epoch": 0.013076610903187987, - "grad_norm": 1.4298423308035195, - "learning_rate": 3.4274184911998124e-06, - "loss": 0.9803, - "step": 145 - }, - { - "epoch": 0.01316679442665825, - "grad_norm": 1.8650811736617616, - "learning_rate": 3.4321517630157976e-06, - "loss": 1.1528, - "step": 146 - }, - { - "epoch": 0.013256977950128512, - "grad_norm": 1.7446572626832464, - "learning_rate": 3.4368527255573845e-06, - "loss": 1.1299, - "step": 147 - }, - { - "epoch": 0.013347161473598773, - "grad_norm": 1.908727630404213, - "learning_rate": 3.4415218169214994e-06, - "loss": 1.0406, - "step": 148 - }, - { - "epoch": 0.013437344997069035, - "grad_norm": 1.4259736231789406, - "learning_rate": 3.4461594663544882e-06, - "loss": 1.0478, - "step": 149 - }, - { - "epoch": 0.013527528520539298, - "grad_norm": 1.9339768771906707, - "learning_rate": 3.450766094488929e-06, - "loss": 1.0107, - "step": 150 - }, - { - "epoch": 0.013617712044009559, - "grad_norm": 2.8748784246861767, - "learning_rate": 3.4553421135725735e-06, - "loss": 1.1073, - "step": 151 - }, - { - "epoch": 0.013707895567479821, - "grad_norm": 1.643080844220555, - "learning_rate": 3.45988792768973e-06, - "loss": 1.0764, - "step": 152 - }, - { - "epoch": 0.013798079090950084, - "grad_norm": 1.5447392217059752, - "learning_rate": 3.464403932975393e-06, - "loss": 1.1266, - "step": 153 - }, - { - "epoch": 0.013888262614420345, - "grad_norm": 2.517052192295387, - "learning_rate": 3.468890517822395e-06, - "loss": 1.1152, - "step": 154 - }, - { - "epoch": 0.013978446137890607, - "grad_norm": 2.004175714590014, - "learning_rate": 3.473348063081853e-06, - "loss": 1.0614, - "step": 155 - }, - { - "epoch": 0.01406862966136087, - "grad_norm": 1.3921593334632498, - "learning_rate": 3.4777769422571727e-06, - "loss": 1.012, - "step": 156 - }, - { - "epoch": 0.01415881318483113, - "grad_norm": 1.546686565597276, - "learning_rate": 3.4821775216918497e-06, - "loss": 1.0568, - "step": 157 - }, - { - "epoch": 0.014248996708301393, - "grad_norm": 1.93440547050179, - "learning_rate": 3.4865501607513164e-06, - "loss": 1.0002, - "step": 158 - }, - { - "epoch": 0.014339180231771656, - "grad_norm": 1.7351526115204141, - "learning_rate": 3.4908952119990423e-06, - "loss": 1.1337, - "step": 159 - }, - { - "epoch": 0.014429363755241916, - "grad_norm": 1.1068749532155695, - "learning_rate": 3.495213021367122e-06, - "loss": 0.8847, - "step": 160 - }, - { - "epoch": 0.014519547278712179, - "grad_norm": 2.004798616108337, - "learning_rate": 3.4995039283215464e-06, - "loss": 1.0942, - "step": 161 - }, - { - "epoch": 0.014609730802182442, - "grad_norm": 1.6134963365577646, - "learning_rate": 3.5037682660223533e-06, - "loss": 1.0712, - "step": 162 - }, - { - "epoch": 0.014699914325652702, - "grad_norm": 1.8417665837083552, - "learning_rate": 3.508006361478857e-06, - "loss": 1.0907, - "step": 163 - }, - { - "epoch": 0.014790097849122965, - "grad_norm": 1.687839403282369, - "learning_rate": 3.5122185357001253e-06, - "loss": 1.0488, - "step": 164 - }, - { - "epoch": 0.014880281372593228, - "grad_norm": 1.9971043963893111, - "learning_rate": 3.5164051038408817e-06, - "loss": 1.1128, - "step": 165 - }, - { - "epoch": 0.01497046489606349, - "grad_norm": 1.6374122770850736, - "learning_rate": 3.5205663753430093e-06, - "loss": 1.1092, - "step": 166 - }, - { - "epoch": 0.015060648419533751, - "grad_norm": 1.8988450480234553, - "learning_rate": 3.5247026540727915e-06, - "loss": 1.1497, - "step": 167 - }, - { - "epoch": 0.015150831943004013, - "grad_norm": 2.768835585672844, - "learning_rate": 3.5288142384540645e-06, - "loss": 0.9993, - "step": 168 - }, - { - "epoch": 0.015241015466474276, - "grad_norm": 2.1658350987101227, - "learning_rate": 3.532901421597421e-06, - "loss": 1.0865, - "step": 169 - }, - { - "epoch": 0.015331198989944537, - "grad_norm": 2.5358641213679047, - "learning_rate": 3.5369644914255915e-06, - "loss": 1.005, - "step": 170 - }, - { - "epoch": 0.0154213825134148, - "grad_norm": 2.0675463352754013, - "learning_rate": 3.5410037307951596e-06, - "loss": 1.0699, - "step": 171 - }, - { - "epoch": 0.015511566036885062, - "grad_norm": 1.7256238638526302, - "learning_rate": 3.545019417614725e-06, - "loss": 1.1178, - "step": 172 - }, - { - "epoch": 0.015601749560355323, - "grad_norm": 1.5888824180806198, - "learning_rate": 3.5490118249596387e-06, - "loss": 1.1022, - "step": 173 - }, - { - "epoch": 0.015691933083825584, - "grad_norm": 1.7208182635197804, - "learning_rate": 3.5529812211834352e-06, - "loss": 1.1119, - "step": 174 - }, - { - "epoch": 0.015782116607295848, - "grad_norm": 1.336287380156004, - "learning_rate": 3.5569278700260707e-06, - "loss": 1.0648, - "step": 175 - }, - { - "epoch": 0.01587230013076611, - "grad_norm": 1.7809983460527352, - "learning_rate": 3.5608520307190746e-06, - "loss": 1.0453, - "step": 176 - }, - { - "epoch": 0.01596248365423637, - "grad_norm": 1.356505291727039, - "learning_rate": 3.564753958087731e-06, - "loss": 1.0006, - "step": 177 - }, - { - "epoch": 0.016052667177706634, - "grad_norm": 1.4724558888743615, - "learning_rate": 3.5686339026503684e-06, - "loss": 1.0685, - "step": 178 - }, - { - "epoch": 0.016142850701176895, - "grad_norm": 2.162373568857598, - "learning_rate": 3.5724921107148806e-06, - "loss": 1.1382, - "step": 179 - }, - { - "epoch": 0.016233034224647155, - "grad_norm": 1.6868554049778115, - "learning_rate": 3.576328824472552e-06, - "loss": 1.1235, - "step": 180 - }, - { - "epoch": 0.01632321774811742, - "grad_norm": 1.6173642564088448, - "learning_rate": 3.5801442820892838e-06, - "loss": 1.1495, - "step": 181 - }, - { - "epoch": 0.01641340127158768, - "grad_norm": 1.2976241188733493, - "learning_rate": 3.583938717794313e-06, - "loss": 1.0835, - "step": 182 - }, - { - "epoch": 0.01650358479505794, - "grad_norm": 1.6615816347614223, - "learning_rate": 3.5877123619664928e-06, - "loss": 1.1104, - "step": 183 - }, - { - "epoch": 0.016593768318528206, - "grad_norm": 1.5766642391167327, - "learning_rate": 3.5914654412182268e-06, - "loss": 1.0801, - "step": 184 - }, - { - "epoch": 0.016683951841998466, - "grad_norm": 1.639582129950205, - "learning_rate": 3.595198178477127e-06, - "loss": 1.0933, - "step": 185 - }, - { - "epoch": 0.016774135365468727, - "grad_norm": 1.5019097092483613, - "learning_rate": 3.5989107930654757e-06, - "loss": 0.998, - "step": 186 - }, - { - "epoch": 0.01686431888893899, - "grad_norm": 1.6057944605500047, - "learning_rate": 3.6026035007775437e-06, - "loss": 1.1265, - "step": 187 - }, - { - "epoch": 0.016954502412409252, - "grad_norm": 1.718820119641224, - "learning_rate": 3.6062765139548636e-06, - "loss": 1.0349, - "step": 188 - }, - { - "epoch": 0.017044685935879513, - "grad_norm": 1.6308500917821493, - "learning_rate": 3.6099300415594945e-06, - "loss": 0.9263, - "step": 189 - }, - { - "epoch": 0.017134869459349777, - "grad_norm": 1.7536141676990018, - "learning_rate": 3.6135642892453575e-06, - "loss": 0.943, - "step": 190 - }, - { - "epoch": 0.01722505298282004, - "grad_norm": 1.4329292773932076, - "learning_rate": 3.6171794594277004e-06, - "loss": 1.0771, - "step": 191 - }, - { - "epoch": 0.0173152365062903, - "grad_norm": 1.572728968993739, - "learning_rate": 3.620775751350745e-06, - "loss": 1.014, - "step": 192 - }, - { - "epoch": 0.017405420029760563, - "grad_norm": 2.0484029876106167, - "learning_rate": 3.6243533611535794e-06, - "loss": 1.0977, - "step": 193 - }, - { - "epoch": 0.017495603553230824, - "grad_norm": 2.9197582390394032, - "learning_rate": 3.627912481934351e-06, - "loss": 1.0548, - "step": 194 - }, - { - "epoch": 0.017585787076701085, - "grad_norm": 1.6925579141149223, - "learning_rate": 3.6314533038128e-06, - "loss": 1.032, - "step": 195 - }, - { - "epoch": 0.01767597060017135, - "grad_norm": 0.8948920242879164, - "learning_rate": 3.6349760139912048e-06, - "loss": 0.8584, - "step": 196 - }, - { - "epoch": 0.01776615412364161, - "grad_norm": 1.7079826863503502, - "learning_rate": 3.638480796813769e-06, - "loss": 1.1224, - "step": 197 - }, - { - "epoch": 0.01785633764711187, - "grad_norm": 1.3817887476759012, - "learning_rate": 3.641967833824504e-06, - "loss": 1.061, - "step": 198 - }, - { - "epoch": 0.017946521170582135, - "grad_norm": 1.4363887472174566, - "learning_rate": 3.645437303823663e-06, - "loss": 1.0526, - "step": 199 - }, - { - "epoch": 0.018036704694052396, - "grad_norm": 1.7345025642840117, - "learning_rate": 3.64888938292275e-06, - "loss": 1.0394, - "step": 200 - }, - { - "epoch": 0.01812688821752266, - "grad_norm": 1.6589997695269663, - "learning_rate": 3.6523242445981603e-06, - "loss": 1.0632, - "step": 201 - }, - { - "epoch": 0.01821707174099292, - "grad_norm": 1.3871043783928814, - "learning_rate": 3.655742059743495e-06, - "loss": 1.0306, - "step": 202 - }, - { - "epoch": 0.018307255264463182, - "grad_norm": 1.6044239812134822, - "learning_rate": 3.659142996720576e-06, - "loss": 1.1207, - "step": 203 - }, - { - "epoch": 0.018397438787933446, - "grad_norm": 1.8469498233516355, - "learning_rate": 3.6625272214092135e-06, - "loss": 1.0291, - "step": 204 - }, - { - "epoch": 0.018487622311403707, - "grad_norm": 0.6837398917795217, - "learning_rate": 3.6658948972557535e-06, - "loss": 0.7976, - "step": 205 - }, - { - "epoch": 0.018577805834873968, - "grad_norm": 1.5259021873223948, - "learning_rate": 3.6692461853204432e-06, - "loss": 0.9769, - "step": 206 - }, - { - "epoch": 0.018667989358344232, - "grad_norm": 1.5295445033442379, - "learning_rate": 3.672581244323656e-06, - "loss": 0.9959, - "step": 207 - }, - { - "epoch": 0.018758172881814493, - "grad_norm": 1.5118868633307874, - "learning_rate": 3.6759002306909926e-06, - "loss": 0.9419, - "step": 208 - }, - { - "epoch": 0.018848356405284754, - "grad_norm": 1.3259077075899082, - "learning_rate": 3.67920329859731e-06, - "loss": 1.0699, - "step": 209 - }, - { - "epoch": 0.018938539928755018, - "grad_norm": 1.8327835588048287, - "learning_rate": 3.6824906000096923e-06, - "loss": 1.0613, - "step": 210 - }, - { - "epoch": 0.01902872345222528, - "grad_norm": 1.7256513582100421, - "learning_rate": 3.6857622847294067e-06, - "loss": 1.0655, - "step": 211 - }, - { - "epoch": 0.01911890697569554, - "grad_norm": 1.9555732946000717, - "learning_rate": 3.6890185004328626e-06, - "loss": 1.0455, - "step": 212 - }, - { - "epoch": 0.019209090499165804, - "grad_norm": 1.7848847097141696, - "learning_rate": 3.6922593927116113e-06, - "loss": 0.999, - "step": 213 - }, - { - "epoch": 0.019299274022636065, - "grad_norm": 1.9809604613716605, - "learning_rate": 3.695485105111406e-06, - "loss": 1.0491, - "step": 214 - }, - { - "epoch": 0.019389457546106326, - "grad_norm": 2.266120908987782, - "learning_rate": 3.698695779170352e-06, - "loss": 1.0176, - "step": 215 - }, - { - "epoch": 0.01947964106957659, - "grad_norm": 1.5214571870815794, - "learning_rate": 3.7018915544561744e-06, - "loss": 1.046, - "step": 216 - }, - { - "epoch": 0.01956982459304685, - "grad_norm": 1.8899881850988467, - "learning_rate": 3.7050725686026164e-06, - "loss": 1.0072, - "step": 217 - }, - { - "epoch": 0.01966000811651711, - "grad_norm": 1.825579313537785, - "learning_rate": 3.708238957345014e-06, - "loss": 1.1115, - "step": 218 - }, - { - "epoch": 0.019750191639987376, - "grad_norm": 1.3424199641290213, - "learning_rate": 3.7113908545550482e-06, - "loss": 1.0929, - "step": 219 - }, - { - "epoch": 0.019840375163457637, - "grad_norm": 1.4346808279950467, - "learning_rate": 3.7145283922747028e-06, - "loss": 1.0383, - "step": 220 - }, - { - "epoch": 0.019930558686927898, - "grad_norm": 1.4762914499637956, - "learning_rate": 3.7176517007494612e-06, - "loss": 1.1312, - "step": 221 - }, - { - "epoch": 0.020020742210398162, - "grad_norm": 1.513823785849959, - "learning_rate": 3.7207609084607496e-06, - "loss": 0.9942, - "step": 222 - }, - { - "epoch": 0.020110925733868423, - "grad_norm": 3.081632213602809, - "learning_rate": 3.723856142157645e-06, - "loss": 1.0956, - "step": 223 - }, - { - "epoch": 0.020201109257338683, - "grad_norm": 1.6283449079374839, - "learning_rate": 3.726937526887885e-06, - "loss": 1.0577, - "step": 224 - }, - { - "epoch": 0.020291292780808948, - "grad_norm": 1.876571505332004, - "learning_rate": 3.7300051860281798e-06, - "loss": 1.0122, - "step": 225 - }, - { - "epoch": 0.02038147630427921, - "grad_norm": 1.7335237355682687, - "learning_rate": 3.733059241313852e-06, - "loss": 1.1283, - "step": 226 - }, - { - "epoch": 0.02047165982774947, - "grad_norm": 1.5461138769750924, - "learning_rate": 3.736099812867827e-06, - "loss": 1.0454, - "step": 227 - }, - { - "epoch": 0.020561843351219734, - "grad_norm": 1.5887110468713466, - "learning_rate": 3.73912701922898e-06, - "loss": 1.0983, - "step": 228 - }, - { - "epoch": 0.020652026874689994, - "grad_norm": 0.8900856258692695, - "learning_rate": 3.742140977379868e-06, - "loss": 0.8312, - "step": 229 - }, - { - "epoch": 0.020742210398160255, - "grad_norm": 1.4863541108314817, - "learning_rate": 3.745141802773854e-06, - "loss": 1.0835, - "step": 230 - }, - { - "epoch": 0.02083239392163052, - "grad_norm": 1.6139445199896856, - "learning_rate": 3.748129609361645e-06, - "loss": 1.0488, - "step": 231 - }, - { - "epoch": 0.02092257744510078, - "grad_norm": 1.2940118275946453, - "learning_rate": 3.7511045096172555e-06, - "loss": 1.0274, - "step": 232 - }, - { - "epoch": 0.02101276096857104, - "grad_norm": 1.62517964763906, - "learning_rate": 3.7540666145634137e-06, - "loss": 1.0458, - "step": 233 - }, - { - "epoch": 0.021102944492041305, - "grad_norm": 1.5201552808650485, - "learning_rate": 3.7570160337964225e-06, - "loss": 1.0322, - "step": 234 - }, - { - "epoch": 0.021193128015511566, - "grad_norm": 1.6009288743974595, - "learning_rate": 3.7599528755104913e-06, - "loss": 1.073, - "step": 235 - }, - { - "epoch": 0.021283311538981827, - "grad_norm": 1.7098226878052356, - "learning_rate": 3.7628772465215515e-06, - "loss": 1.0445, - "step": 236 - }, - { - "epoch": 0.02137349506245209, - "grad_norm": 1.471004460279392, - "learning_rate": 3.7657892522905666e-06, - "loss": 1.0395, - "step": 237 - }, - { - "epoch": 0.021463678585922352, - "grad_norm": 5.755951827917994, - "learning_rate": 3.7686889969463542e-06, - "loss": 0.9692, - "step": 238 - }, - { - "epoch": 0.021553862109392613, - "grad_norm": 2.0534400779465276, - "learning_rate": 3.771576583307928e-06, - "loss": 1.0333, - "step": 239 - }, - { - "epoch": 0.021644045632862877, - "grad_norm": 1.8393185264739884, - "learning_rate": 3.7744521129063722e-06, - "loss": 1.0878, - "step": 240 - }, - { - "epoch": 0.021734229156333138, - "grad_norm": 1.6060187889558222, - "learning_rate": 3.7773156860062653e-06, - "loss": 1.0678, - "step": 241 - }, - { - "epoch": 0.0218244126798034, - "grad_norm": 1.8878738942461628, - "learning_rate": 3.7801674016266554e-06, - "loss": 1.1548, - "step": 242 - }, - { - "epoch": 0.021914596203273663, - "grad_norm": 0.8503066785922234, - "learning_rate": 3.7830073575616035e-06, - "loss": 0.7749, - "step": 243 - }, - { - "epoch": 0.022004779726743924, - "grad_norm": 1.4846681004314468, - "learning_rate": 3.785835650400313e-06, - "loss": 1.0811, - "step": 244 - }, - { - "epoch": 0.022094963250214185, - "grad_norm": 1.6466436524012, - "learning_rate": 3.7886523755468334e-06, - "loss": 0.9568, - "step": 245 - }, - { - "epoch": 0.02218514677368445, - "grad_norm": 1.4424231439012658, - "learning_rate": 3.7914576272393746e-06, - "loss": 1.0392, - "step": 246 - }, - { - "epoch": 0.02227533029715471, - "grad_norm": 1.6920746854617321, - "learning_rate": 3.7942514985692284e-06, - "loss": 1.1556, - "step": 247 - }, - { - "epoch": 0.02236551382062497, - "grad_norm": 1.5158685704784822, - "learning_rate": 3.797034081499296e-06, - "loss": 1.0482, - "step": 248 - }, - { - "epoch": 0.022455697344095235, - "grad_norm": 1.3548023883540612, - "learning_rate": 3.7998054668822595e-06, - "loss": 1.0285, - "step": 249 - }, - { - "epoch": 0.022545880867565496, - "grad_norm": 2.071088077015052, - "learning_rate": 3.8025657444783776e-06, - "loss": 1.0225, - "step": 250 - }, - { - "epoch": 0.022636064391035757, - "grad_norm": 1.6509131332410225, - "learning_rate": 3.80531500297293e-06, - "loss": 0.9576, - "step": 251 - }, - { - "epoch": 0.02272624791450602, - "grad_norm": 4.039943812895489, - "learning_rate": 3.8080533299933147e-06, - "loss": 1.0191, - "step": 252 - }, - { - "epoch": 0.022816431437976282, - "grad_norm": 1.598650112536202, - "learning_rate": 3.8107808121258067e-06, - "loss": 1.0099, - "step": 253 - }, - { - "epoch": 0.022906614961446543, - "grad_norm": 1.8323364003677243, - "learning_rate": 3.813497534931985e-06, - "loss": 1.105, - "step": 254 - }, - { - "epoch": 0.022996798484916807, - "grad_norm": 1.5203559422338624, - "learning_rate": 3.816203582964841e-06, - "loss": 1.0673, - "step": 255 - }, - { - "epoch": 0.023086982008387068, - "grad_norm": 1.8694247963515054, - "learning_rate": 3.818899039784565e-06, - "loss": 1.1018, - "step": 256 - }, - { - "epoch": 0.02317716553185733, - "grad_norm": 1.6003478864121843, - "learning_rate": 3.821583987974031e-06, - "loss": 1.0993, - "step": 257 - }, - { - "epoch": 0.023267349055327593, - "grad_norm": 1.5070988839151986, - "learning_rate": 3.8242585091539755e-06, - "loss": 0.9837, - "step": 258 - }, - { - "epoch": 0.023357532578797854, - "grad_norm": 1.9142130889821325, - "learning_rate": 3.8269226839978895e-06, - "loss": 1.1261, - "step": 259 - }, - { - "epoch": 0.023447716102268115, - "grad_norm": 2.7079279571765835, - "learning_rate": 3.82957659224662e-06, - "loss": 1.0026, - "step": 260 - }, - { - "epoch": 0.02353789962573838, - "grad_norm": 1.6767553241691278, - "learning_rate": 3.8322203127226855e-06, - "loss": 0.9357, - "step": 261 - }, - { - "epoch": 0.02362808314920864, - "grad_norm": 2.066454592970373, - "learning_rate": 3.834853923344326e-06, - "loss": 1.1619, - "step": 262 - }, - { - "epoch": 0.0237182666726789, - "grad_norm": 1.3707547108250788, - "learning_rate": 3.837477501139285e-06, - "loss": 1.0531, - "step": 263 - }, - { - "epoch": 0.023808450196149165, - "grad_norm": 1.6926652482247067, - "learning_rate": 3.840091122258324e-06, - "loss": 1.0636, - "step": 264 - }, - { - "epoch": 0.023898633719619426, - "grad_norm": 1.651934850433362, - "learning_rate": 3.84269486198849e-06, - "loss": 0.9769, - "step": 265 - }, - { - "epoch": 0.023988817243089686, - "grad_norm": 1.4737128962148531, - "learning_rate": 3.845288794766121e-06, - "loss": 1.0661, - "step": 266 - }, - { - "epoch": 0.02407900076655995, - "grad_norm": 1.588601116464629, - "learning_rate": 3.847872994189619e-06, - "loss": 1.0495, - "step": 267 - }, - { - "epoch": 0.02416918429003021, - "grad_norm": 1.4860159386951919, - "learning_rate": 3.8504475330319805e-06, - "loss": 1.1003, - "step": 268 - }, - { - "epoch": 0.024259367813500472, - "grad_norm": 1.4466677523460945, - "learning_rate": 3.853012483253093e-06, - "loss": 1.025, - "step": 269 - }, - { - "epoch": 0.024349551336970737, - "grad_norm": 1.5762515892849684, - "learning_rate": 3.855567916011802e-06, - "loss": 1.0513, - "step": 270 - }, - { - "epoch": 0.024439734860440997, - "grad_norm": 1.9184752997666714, - "learning_rate": 3.858113901677755e-06, - "loss": 1.0592, - "step": 271 - }, - { - "epoch": 0.024529918383911258, - "grad_norm": 1.4726220083935668, - "learning_rate": 3.860650509843034e-06, - "loss": 1.0096, - "step": 272 - }, - { - "epoch": 0.024620101907381522, - "grad_norm": 1.9729834559037482, - "learning_rate": 3.863177809333563e-06, - "loss": 1.0395, - "step": 273 - }, - { - "epoch": 0.024710285430851783, - "grad_norm": 1.4858673624680152, - "learning_rate": 3.86569586822032e-06, - "loss": 1.0289, - "step": 274 - }, - { - "epoch": 0.024800468954322044, - "grad_norm": 1.7311616978349837, - "learning_rate": 3.868204753830331e-06, - "loss": 1.009, - "step": 275 - }, - { - "epoch": 0.02489065247779231, - "grad_norm": 1.5393265241144931, - "learning_rate": 3.870704532757476e-06, - "loss": 0.9645, - "step": 276 - }, - { - "epoch": 0.02498083600126257, - "grad_norm": 1.6806437764337157, - "learning_rate": 3.8731952708730974e-06, - "loss": 1.0763, - "step": 277 - }, - { - "epoch": 0.02507101952473283, - "grad_norm": 1.4854218656316072, - "learning_rate": 3.8756770333364085e-06, - "loss": 1.0707, - "step": 278 - }, - { - "epoch": 0.025161203048203094, - "grad_norm": 1.6317789557488491, - "learning_rate": 3.878149884604725e-06, - "loss": 0.9698, - "step": 279 - }, - { - "epoch": 0.025251386571673355, - "grad_norm": 1.8406979143243727, - "learning_rate": 3.8806138884435125e-06, - "loss": 1.0216, - "step": 280 - }, - { - "epoch": 0.025341570095143616, - "grad_norm": 1.4031454360934996, - "learning_rate": 3.883069107936248e-06, - "loss": 0.9765, - "step": 281 - }, - { - "epoch": 0.02543175361861388, - "grad_norm": 1.8378537830061348, - "learning_rate": 3.885515605494114e-06, - "loss": 1.1096, - "step": 282 - }, - { - "epoch": 0.02552193714208414, - "grad_norm": 1.9841227333537994, - "learning_rate": 3.8879534428655145e-06, - "loss": 1.0952, - "step": 283 - }, - { - "epoch": 0.025612120665554402, - "grad_norm": 1.6247306747261585, - "learning_rate": 3.890382681145432e-06, - "loss": 1.0872, - "step": 284 - }, - { - "epoch": 0.025702304189024666, - "grad_norm": 1.8635001064021084, - "learning_rate": 3.892803380784608e-06, - "loss": 1.0816, - "step": 285 - }, - { - "epoch": 0.025792487712494927, - "grad_norm": 1.2180721647685675, - "learning_rate": 3.8952156015985725e-06, - "loss": 0.9108, - "step": 286 - }, - { - "epoch": 0.025882671235965188, - "grad_norm": 2.0512397180981834, - "learning_rate": 3.897619402776516e-06, - "loss": 1.0544, - "step": 287 - }, - { - "epoch": 0.025972854759435452, - "grad_norm": 1.9312777742530598, - "learning_rate": 3.900014842889995e-06, - "loss": 1.1216, - "step": 288 - }, - { - "epoch": 0.026063038282905713, - "grad_norm": 1.6301181593585352, - "learning_rate": 3.902401979901503e-06, - "loss": 0.967, - "step": 289 - }, - { - "epoch": 0.026153221806375974, - "grad_norm": 1.6022580008995222, - "learning_rate": 3.904780871172884e-06, - "loss": 1.0899, - "step": 290 - }, - { - "epoch": 0.026243405329846238, - "grad_norm": 2.646099307491119, - "learning_rate": 3.907151573473601e-06, - "loss": 0.9333, - "step": 291 - }, - { - "epoch": 0.0263335888533165, - "grad_norm": 1.3022584975359546, - "learning_rate": 3.909514142988868e-06, - "loss": 1.0591, - "step": 292 - }, - { - "epoch": 0.02642377237678676, - "grad_norm": 1.7466932799347776, - "learning_rate": 3.911868635327639e-06, - "loss": 1.0115, - "step": 293 - }, - { - "epoch": 0.026513955900257024, - "grad_norm": 1.339104050622602, - "learning_rate": 3.914215105530455e-06, - "loss": 1.0166, - "step": 294 - }, - { - "epoch": 0.026604139423727285, - "grad_norm": 1.4474169282678886, - "learning_rate": 3.916553608077179e-06, - "loss": 1.0325, - "step": 295 - }, - { - "epoch": 0.026694322947197546, - "grad_norm": 1.5858587987755817, - "learning_rate": 3.91888419689457e-06, - "loss": 1.0334, - "step": 296 - }, - { - "epoch": 0.02678450647066781, - "grad_norm": 1.4278838239903247, - "learning_rate": 3.921206925363754e-06, - "loss": 0.9764, - "step": 297 - }, - { - "epoch": 0.02687468999413807, - "grad_norm": 8.581468496854137, - "learning_rate": 3.923521846327559e-06, - "loss": 0.9548, - "step": 298 - }, - { - "epoch": 0.02696487351760833, - "grad_norm": 1.6479487123841814, - "learning_rate": 3.925829012097725e-06, - "loss": 1.0028, - "step": 299 - }, - { - "epoch": 0.027055057041078596, - "grad_norm": 1.9980574949696381, - "learning_rate": 3.928128474462e-06, - "loss": 0.9641, - "step": 300 - }, - { - "epoch": 0.027145240564548857, - "grad_norm": 2.1539859772192975, - "learning_rate": 3.930420284691115e-06, - "loss": 1.0521, - "step": 301 - }, - { - "epoch": 0.027235424088019117, - "grad_norm": 1.7521026105270068, - "learning_rate": 3.932704493545644e-06, - "loss": 1.0787, - "step": 302 - }, - { - "epoch": 0.02732560761148938, - "grad_norm": 2.6145386913688142, - "learning_rate": 3.934981151282745e-06, - "loss": 1.1213, - "step": 303 - }, - { - "epoch": 0.027415791134959643, - "grad_norm": 1.598701713256338, - "learning_rate": 3.9372503076628006e-06, - "loss": 1.0326, - "step": 304 - }, - { - "epoch": 0.027505974658429903, - "grad_norm": 1.5483168230658166, - "learning_rate": 3.939512011955941e-06, - "loss": 1.0641, - "step": 305 - }, - { - "epoch": 0.027596158181900168, - "grad_norm": 1.571979150677513, - "learning_rate": 3.941766312948463e-06, - "loss": 1.038, - "step": 306 - }, - { - "epoch": 0.02768634170537043, - "grad_norm": 1.9473569613264694, - "learning_rate": 3.944013258949147e-06, - "loss": 1.0102, - "step": 307 - }, - { - "epoch": 0.02777652522884069, - "grad_norm": 1.8392786668703296, - "learning_rate": 3.946252897795465e-06, - "loss": 0.8629, - "step": 308 - }, - { - "epoch": 0.027866708752310954, - "grad_norm": 1.941265937754489, - "learning_rate": 3.9484852768596935e-06, - "loss": 1.0496, - "step": 309 - }, - { - "epoch": 0.027956892275781214, - "grad_norm": 1.6542467922659818, - "learning_rate": 3.950710443054923e-06, - "loss": 0.9461, - "step": 310 - }, - { - "epoch": 0.028047075799251475, - "grad_norm": 1.6065258991470157, - "learning_rate": 3.952928442840981e-06, - "loss": 0.9504, - "step": 311 - }, - { - "epoch": 0.02813725932272174, - "grad_norm": 1.6196958834508266, - "learning_rate": 3.955139322230243e-06, - "loss": 1.0634, - "step": 312 - }, - { - "epoch": 0.028227442846192, - "grad_norm": 1.7465977975988485, - "learning_rate": 3.957343126793365e-06, - "loss": 0.9695, - "step": 313 - }, - { - "epoch": 0.02831762636966226, - "grad_norm": 1.5587462539444292, - "learning_rate": 3.959539901664921e-06, - "loss": 1.015, - "step": 314 - }, - { - "epoch": 0.028407809893132525, - "grad_norm": 2.013210772937591, - "learning_rate": 3.9617296915489425e-06, - "loss": 1.1384, - "step": 315 - }, - { - "epoch": 0.028497993416602786, - "grad_norm": 4.930641722784359, - "learning_rate": 3.963912540724387e-06, - "loss": 0.9817, - "step": 316 - }, - { - "epoch": 0.028588176940073047, - "grad_norm": 1.7573833704705055, - "learning_rate": 3.966088493050501e-06, - "loss": 1.0383, - "step": 317 - }, - { - "epoch": 0.02867836046354331, - "grad_norm": 1.7238803242746477, - "learning_rate": 3.968257591972113e-06, - "loss": 1.0153, - "step": 318 - }, - { - "epoch": 0.028768543987013572, - "grad_norm": 1.3738036152700874, - "learning_rate": 3.970419880524835e-06, - "loss": 1.111, - "step": 319 - }, - { - "epoch": 0.028858727510483833, - "grad_norm": 1.5216138047684165, - "learning_rate": 3.972575401340192e-06, - "loss": 1.0007, - "step": 320 - }, - { - "epoch": 0.028948911033954097, - "grad_norm": 1.9634337323893503, - "learning_rate": 3.974724196650656e-06, - "loss": 1.0694, - "step": 321 - }, - { - "epoch": 0.029039094557424358, - "grad_norm": 2.454366673435263, - "learning_rate": 3.976866308294617e-06, - "loss": 1.0176, - "step": 322 - }, - { - "epoch": 0.02912927808089462, - "grad_norm": 1.3531885233498129, - "learning_rate": 3.979001777721269e-06, - "loss": 0.9902, - "step": 323 - }, - { - "epoch": 0.029219461604364883, - "grad_norm": 1.638787757677157, - "learning_rate": 3.981130645995424e-06, - "loss": 0.9953, - "step": 324 - }, - { - "epoch": 0.029309645127835144, - "grad_norm": 1.9198204161184618, - "learning_rate": 3.983252953802248e-06, - "loss": 1.1025, - "step": 325 - }, - { - "epoch": 0.029399828651305405, - "grad_norm": 0.9938088633539907, - "learning_rate": 3.9853687414519285e-06, - "loss": 0.8625, - "step": 326 - }, - { - "epoch": 0.02949001217477567, - "grad_norm": 1.8037496664849262, - "learning_rate": 3.987478048884265e-06, - "loss": 1.0641, - "step": 327 - }, - { - "epoch": 0.02958019569824593, - "grad_norm": 1.5330312061505376, - "learning_rate": 3.989580915673196e-06, - "loss": 1.0101, - "step": 328 - }, - { - "epoch": 0.02967037922171619, - "grad_norm": 1.6494869128006104, - "learning_rate": 3.991677381031255e-06, - "loss": 1.0067, - "step": 329 - }, - { - "epoch": 0.029760562745186455, - "grad_norm": 1.7342976045952816, - "learning_rate": 3.993767483813953e-06, - "loss": 1.1007, - "step": 330 - }, - { - "epoch": 0.029850746268656716, - "grad_norm": 1.6469937913241441, - "learning_rate": 3.995851262524104e-06, - "loss": 0.9449, - "step": 331 - }, - { - "epoch": 0.02994092979212698, - "grad_norm": 1.9006536915252716, - "learning_rate": 3.997928755316079e-06, - "loss": 1.0227, - "step": 332 - }, - { - "epoch": 0.03003111331559724, - "grad_norm": 0.9688871040538025, - "learning_rate": 4e-06, - "loss": 0.8635, - "step": 333 - }, - { - "epoch": 0.030121296839067502, - "grad_norm": 1.646674492517529, - "learning_rate": 3.999999914674486e-06, - "loss": 0.9917, - "step": 334 - }, - { - "epoch": 0.030211480362537766, - "grad_norm": 1.5026608549517328, - "learning_rate": 3.999999658697952e-06, - "loss": 1.0031, - "step": 335 - }, - { - "epoch": 0.030301663886008027, - "grad_norm": 0.9794813861042179, - "learning_rate": 3.9999992320704185e-06, - "loss": 0.787, - "step": 336 - }, - { - "epoch": 0.030391847409478288, - "grad_norm": 2.300689021854271, - "learning_rate": 3.999998634791922e-06, - "loss": 1.042, - "step": 337 - }, - { - "epoch": 0.030482030932948552, - "grad_norm": 2.0882830630150777, - "learning_rate": 3.999997866862515e-06, - "loss": 0.9998, - "step": 338 - }, - { - "epoch": 0.030572214456418813, - "grad_norm": 1.4778504582428869, - "learning_rate": 3.999996928282262e-06, - "loss": 1.1081, - "step": 339 - }, - { - "epoch": 0.030662397979889074, - "grad_norm": 2.0831398937701233, - "learning_rate": 3.999995819051244e-06, - "loss": 1.0157, - "step": 340 - }, - { - "epoch": 0.030752581503359338, - "grad_norm": 1.474192013318675, - "learning_rate": 3.9999945391695536e-06, - "loss": 0.9876, - "step": 341 - }, - { - "epoch": 0.0308427650268296, - "grad_norm": 1.4796352002610105, - "learning_rate": 3.999993088637302e-06, - "loss": 1.1104, - "step": 342 - }, - { - "epoch": 0.03093294855029986, - "grad_norm": 2.2938809765416766, - "learning_rate": 3.999991467454612e-06, - "loss": 1.0822, - "step": 343 - }, - { - "epoch": 0.031023132073770124, - "grad_norm": 2.561589242000494, - "learning_rate": 3.999989675621622e-06, - "loss": 0.9405, - "step": 344 - }, - { - "epoch": 0.031113315597240385, - "grad_norm": 1.644828352741495, - "learning_rate": 3.999987713138485e-06, - "loss": 1.0312, - "step": 345 - }, - { - "epoch": 0.031203499120710645, - "grad_norm": 1.154647231787119, - "learning_rate": 3.999985580005369e-06, - "loss": 0.9006, - "step": 346 - }, - { - "epoch": 0.031293682644180906, - "grad_norm": 1.9070950495492187, - "learning_rate": 3.999983276222455e-06, - "loss": 1.0916, - "step": 347 - }, - { - "epoch": 0.03138386616765117, - "grad_norm": 1.6025083400610463, - "learning_rate": 3.999980801789941e-06, - "loss": 1.0347, - "step": 348 - }, - { - "epoch": 0.031474049691121435, - "grad_norm": 1.6160621597638443, - "learning_rate": 3.999978156708036e-06, - "loss": 1.0302, - "step": 349 - }, - { - "epoch": 0.031564233214591696, - "grad_norm": 1.8831350596324132, - "learning_rate": 3.9999753409769675e-06, - "loss": 1.131, - "step": 350 - }, - { - "epoch": 0.031654416738061956, - "grad_norm": 1.2270416004400626, - "learning_rate": 3.999972354596975e-06, - "loss": 0.8095, - "step": 351 - }, - { - "epoch": 0.03174460026153222, - "grad_norm": 1.706550512992981, - "learning_rate": 3.999969197568314e-06, - "loss": 0.976, - "step": 352 - }, - { - "epoch": 0.03183478378500248, - "grad_norm": 1.7558954933194266, - "learning_rate": 3.999965869891253e-06, - "loss": 1.0749, - "step": 353 - }, - { - "epoch": 0.03192496730847274, - "grad_norm": 1.5138045714265107, - "learning_rate": 3.999962371566075e-06, - "loss": 1.0533, - "step": 354 - }, - { - "epoch": 0.03201515083194301, - "grad_norm": 1.5369449512995563, - "learning_rate": 3.999958702593082e-06, - "loss": 1.0909, - "step": 355 - }, - { - "epoch": 0.03210533435541327, - "grad_norm": 1.5056331083555243, - "learning_rate": 3.999954862972583e-06, - "loss": 1.0446, - "step": 356 - }, - { - "epoch": 0.03219551787888353, - "grad_norm": 1.5646634954581018, - "learning_rate": 3.999950852704908e-06, - "loss": 0.9675, - "step": 357 - }, - { - "epoch": 0.03228570140235379, - "grad_norm": 1.374000793140299, - "learning_rate": 3.9999466717903995e-06, - "loss": 1.0774, - "step": 358 - }, - { - "epoch": 0.03237588492582405, - "grad_norm": 0.8887979941949778, - "learning_rate": 3.999942320229413e-06, - "loss": 0.8233, - "step": 359 - }, - { - "epoch": 0.03246606844929431, - "grad_norm": 1.8405398208260828, - "learning_rate": 3.99993779802232e-06, - "loss": 1.1475, - "step": 360 - }, - { - "epoch": 0.03255625197276458, - "grad_norm": 1.6612980046738308, - "learning_rate": 3.999933105169506e-06, - "loss": 1.0123, - "step": 361 - }, - { - "epoch": 0.03264643549623484, - "grad_norm": 1.4205752290263414, - "learning_rate": 3.999928241671373e-06, - "loss": 1.0176, - "step": 362 - }, - { - "epoch": 0.0327366190197051, - "grad_norm": 2.2642734047622586, - "learning_rate": 3.999923207528334e-06, - "loss": 0.9264, - "step": 363 - }, - { - "epoch": 0.03282680254317536, - "grad_norm": 1.348517897099934, - "learning_rate": 3.9999180027408196e-06, - "loss": 1.0209, - "step": 364 - }, - { - "epoch": 0.03291698606664562, - "grad_norm": 1.5275916169157544, - "learning_rate": 3.9999126273092735e-06, - "loss": 0.9914, - "step": 365 - }, - { - "epoch": 0.03300716959011588, - "grad_norm": 1.6634904478146855, - "learning_rate": 3.999907081234156e-06, - "loss": 1.104, - "step": 366 - }, - { - "epoch": 0.03309735311358615, - "grad_norm": 1.7096197897978178, - "learning_rate": 3.999901364515938e-06, - "loss": 0.9884, - "step": 367 - }, - { - "epoch": 0.03318753663705641, - "grad_norm": 1.6133399195313334, - "learning_rate": 3.999895477155108e-06, - "loss": 1.067, - "step": 368 - }, - { - "epoch": 0.03327772016052667, - "grad_norm": 1.3682636058680826, - "learning_rate": 3.999889419152169e-06, - "loss": 0.8024, - "step": 369 - }, - { - "epoch": 0.03336790368399693, - "grad_norm": 1.5631261715280198, - "learning_rate": 3.999883190507638e-06, - "loss": 1.0187, - "step": 370 - }, - { - "epoch": 0.033458087207467194, - "grad_norm": 1.7055476045923672, - "learning_rate": 3.999876791222044e-06, - "loss": 1.0107, - "step": 371 - }, - { - "epoch": 0.033548270730937454, - "grad_norm": 1.8680352237860232, - "learning_rate": 3.999870221295936e-06, - "loss": 0.9702, - "step": 372 - }, - { - "epoch": 0.03363845425440772, - "grad_norm": 2.6424768037959865, - "learning_rate": 3.999863480729875e-06, - "loss": 0.9973, - "step": 373 - }, - { - "epoch": 0.03372863777787798, - "grad_norm": 1.9268942937834663, - "learning_rate": 3.999856569524433e-06, - "loss": 0.9601, - "step": 374 - }, - { - "epoch": 0.033818821301348244, - "grad_norm": 1.3686193325431615, - "learning_rate": 3.999849487680202e-06, - "loss": 0.9698, - "step": 375 - }, - { - "epoch": 0.033909004824818505, - "grad_norm": 1.4978568343237517, - "learning_rate": 3.999842235197786e-06, - "loss": 0.9541, - "step": 376 - }, - { - "epoch": 0.033999188348288766, - "grad_norm": 1.953109008723598, - "learning_rate": 3.999834812077803e-06, - "loss": 0.96, - "step": 377 - }, - { - "epoch": 0.034089371871759026, - "grad_norm": 1.4379091095299754, - "learning_rate": 3.999827218320886e-06, - "loss": 1.0755, - "step": 378 - }, - { - "epoch": 0.034179555395229294, - "grad_norm": 1.903216465347793, - "learning_rate": 3.999819453927685e-06, - "loss": 0.9314, - "step": 379 - }, - { - "epoch": 0.034269738918699555, - "grad_norm": 1.7895406558802909, - "learning_rate": 3.999811518898861e-06, - "loss": 1.0295, - "step": 380 - }, - { - "epoch": 0.034359922442169816, - "grad_norm": 2.168511400097379, - "learning_rate": 3.999803413235092e-06, - "loss": 1.0324, - "step": 381 - }, - { - "epoch": 0.03445010596564008, - "grad_norm": 1.616908394874543, - "learning_rate": 3.999795136937068e-06, - "loss": 1.1318, - "step": 382 - }, - { - "epoch": 0.03454028948911034, - "grad_norm": 1.5938986166681601, - "learning_rate": 3.999786690005496e-06, - "loss": 1.0219, - "step": 383 - }, - { - "epoch": 0.0346304730125806, - "grad_norm": 1.5091069648908038, - "learning_rate": 3.999778072441098e-06, - "loss": 1.0028, - "step": 384 - }, - { - "epoch": 0.034720656536050866, - "grad_norm": 2.1446841103806156, - "learning_rate": 3.999769284244608e-06, - "loss": 1.0462, - "step": 385 - }, - { - "epoch": 0.03481084005952113, - "grad_norm": 1.3153752214445533, - "learning_rate": 3.999760325416775e-06, - "loss": 1.0295, - "step": 386 - }, - { - "epoch": 0.03490102358299139, - "grad_norm": 1.7410440156336597, - "learning_rate": 3.999751195958366e-06, - "loss": 1.1069, - "step": 387 - }, - { - "epoch": 0.03499120710646165, - "grad_norm": 1.4774328825240357, - "learning_rate": 3.999741895870157e-06, - "loss": 0.8309, - "step": 388 - }, - { - "epoch": 0.03508139062993191, - "grad_norm": 1.6915627774940092, - "learning_rate": 3.999732425152944e-06, - "loss": 1.0652, - "step": 389 - }, - { - "epoch": 0.03517157415340217, - "grad_norm": 1.3883773126463563, - "learning_rate": 3.999722783807533e-06, - "loss": 1.0826, - "step": 390 - }, - { - "epoch": 0.03526175767687244, - "grad_norm": 1.9244354064123246, - "learning_rate": 3.999712971834748e-06, - "loss": 0.9594, - "step": 391 - }, - { - "epoch": 0.0353519412003427, - "grad_norm": 1.5682423929218192, - "learning_rate": 3.999702989235427e-06, - "loss": 1.0277, - "step": 392 - }, - { - "epoch": 0.03544212472381296, - "grad_norm": 1.491972663153399, - "learning_rate": 3.999692836010419e-06, - "loss": 1.1111, - "step": 393 - }, - { - "epoch": 0.03553230824728322, - "grad_norm": 1.9250540762898103, - "learning_rate": 3.999682512160593e-06, - "loss": 1.1602, - "step": 394 - }, - { - "epoch": 0.03562249177075348, - "grad_norm": 1.0819464695075924, - "learning_rate": 3.99967201768683e-06, - "loss": 0.8123, - "step": 395 - }, - { - "epoch": 0.03571267529422374, - "grad_norm": 1.67712583152362, - "learning_rate": 3.999661352590023e-06, - "loss": 1.0141, - "step": 396 - }, - { - "epoch": 0.03580285881769401, - "grad_norm": 1.4471403368878317, - "learning_rate": 3.999650516871083e-06, - "loss": 1.0211, - "step": 397 - }, - { - "epoch": 0.03589304234116427, - "grad_norm": 1.6772913675430758, - "learning_rate": 3.9996395105309365e-06, - "loss": 1.0552, - "step": 398 - }, - { - "epoch": 0.03598322586463453, - "grad_norm": 1.4669132516665868, - "learning_rate": 3.99962833357052e-06, - "loss": 1.0379, - "step": 399 - }, - { - "epoch": 0.03607340938810479, - "grad_norm": 1.4527589890768626, - "learning_rate": 3.999616985990789e-06, - "loss": 1.0059, - "step": 400 - }, - { - "epoch": 0.03616359291157505, - "grad_norm": 1.3434120462956263, - "learning_rate": 3.9996054677927104e-06, - "loss": 0.9685, - "step": 401 - }, - { - "epoch": 0.03625377643504532, - "grad_norm": 1.9387896152878534, - "learning_rate": 3.9995937789772675e-06, - "loss": 0.9606, - "step": 402 - }, - { - "epoch": 0.03634395995851558, - "grad_norm": 0.9081511451858889, - "learning_rate": 3.999581919545458e-06, - "loss": 0.8138, - "step": 403 - }, - { - "epoch": 0.03643414348198584, - "grad_norm": 2.064016073947978, - "learning_rate": 3.9995698894982935e-06, - "loss": 1.0539, - "step": 404 - }, - { - "epoch": 0.0365243270054561, - "grad_norm": 1.624492199123787, - "learning_rate": 3.9995576888368e-06, - "loss": 1.0811, - "step": 405 - }, - { - "epoch": 0.036614510528926364, - "grad_norm": 1.7164629892547771, - "learning_rate": 3.9995453175620194e-06, - "loss": 1.022, - "step": 406 - }, - { - "epoch": 0.036704694052396625, - "grad_norm": 1.7617778895024472, - "learning_rate": 3.999532775675007e-06, - "loss": 0.8745, - "step": 407 - }, - { - "epoch": 0.03679487757586689, - "grad_norm": 1.7549467335195232, - "learning_rate": 3.9995200631768326e-06, - "loss": 1.0001, - "step": 408 - }, - { - "epoch": 0.03688506109933715, - "grad_norm": 1.844463210444499, - "learning_rate": 3.9995071800685815e-06, - "loss": 1.1587, - "step": 409 - }, - { - "epoch": 0.036975244622807414, - "grad_norm": 2.0460949059155764, - "learning_rate": 3.999494126351352e-06, - "loss": 0.9998, - "step": 410 - }, - { - "epoch": 0.037065428146277675, - "grad_norm": 1.6789750118384155, - "learning_rate": 3.99948090202626e-06, - "loss": 1.0336, - "step": 411 - }, - { - "epoch": 0.037155611669747936, - "grad_norm": 1.8558256749630475, - "learning_rate": 3.999467507094431e-06, - "loss": 1.0313, - "step": 412 - }, - { - "epoch": 0.0372457951932182, - "grad_norm": 1.494794635127329, - "learning_rate": 3.999453941557011e-06, - "loss": 0.9361, - "step": 413 - }, - { - "epoch": 0.037335978716688464, - "grad_norm": 1.0640383971140994, - "learning_rate": 3.999440205415154e-06, - "loss": 0.7834, - "step": 414 - }, - { - "epoch": 0.037426162240158725, - "grad_norm": 1.9139505781810575, - "learning_rate": 3.999426298670035e-06, - "loss": 1.0679, - "step": 415 - }, - { - "epoch": 0.037516345763628986, - "grad_norm": 1.6001529352291413, - "learning_rate": 3.9994122213228385e-06, - "loss": 1.0653, - "step": 416 - }, - { - "epoch": 0.03760652928709925, - "grad_norm": 2.0833558526411493, - "learning_rate": 3.9993979733747675e-06, - "loss": 1.0547, - "step": 417 - }, - { - "epoch": 0.03769671281056951, - "grad_norm": 1.5107624666595059, - "learning_rate": 3.999383554827037e-06, - "loss": 1.0219, - "step": 418 - }, - { - "epoch": 0.03778689633403977, - "grad_norm": 1.6424816706547716, - "learning_rate": 3.999368965680876e-06, - "loss": 0.9875, - "step": 419 - }, - { - "epoch": 0.037877079857510036, - "grad_norm": 1.7052137532863652, - "learning_rate": 3.999354205937531e-06, - "loss": 1.0616, - "step": 420 - }, - { - "epoch": 0.0379672633809803, - "grad_norm": 1.7004070467849337, - "learning_rate": 3.999339275598261e-06, - "loss": 1.0696, - "step": 421 - }, - { - "epoch": 0.03805744690445056, - "grad_norm": 1.6194253816058817, - "learning_rate": 3.99932417466434e-06, - "loss": 1.0053, - "step": 422 - }, - { - "epoch": 0.03814763042792082, - "grad_norm": 2.526025537393469, - "learning_rate": 3.999308903137056e-06, - "loss": 0.9384, - "step": 423 - }, - { - "epoch": 0.03823781395139108, - "grad_norm": 1.5753397771876, - "learning_rate": 3.999293461017711e-06, - "loss": 1.0718, - "step": 424 - }, - { - "epoch": 0.03832799747486134, - "grad_norm": 1.9979213402951668, - "learning_rate": 3.9992778483076255e-06, - "loss": 1.0164, - "step": 425 - }, - { - "epoch": 0.03841818099833161, - "grad_norm": 1.531662087556444, - "learning_rate": 3.99926206500813e-06, - "loss": 1.0775, - "step": 426 - }, - { - "epoch": 0.03850836452180187, - "grad_norm": 1.9286393009837144, - "learning_rate": 3.999246111120571e-06, - "loss": 0.9483, - "step": 427 - }, - { - "epoch": 0.03859854804527213, - "grad_norm": 1.391066448993959, - "learning_rate": 3.999229986646311e-06, - "loss": 1.0904, - "step": 428 - }, - { - "epoch": 0.03868873156874239, - "grad_norm": 1.6084454268428232, - "learning_rate": 3.999213691586723e-06, - "loss": 0.9871, - "step": 429 - }, - { - "epoch": 0.03877891509221265, - "grad_norm": 1.4641757917183151, - "learning_rate": 3.9991972259432e-06, - "loss": 1.0608, - "step": 430 - }, - { - "epoch": 0.03886909861568291, - "grad_norm": 1.4217631537075641, - "learning_rate": 3.999180589717147e-06, - "loss": 1.0996, - "step": 431 - }, - { - "epoch": 0.03895928213915318, - "grad_norm": 1.5365924668895647, - "learning_rate": 3.999163782909983e-06, - "loss": 0.9379, - "step": 432 - }, - { - "epoch": 0.03904946566262344, - "grad_norm": 1.7622591192253332, - "learning_rate": 3.99914680552314e-06, - "loss": 1.0562, - "step": 433 - }, - { - "epoch": 0.0391396491860937, - "grad_norm": 1.5163024489474255, - "learning_rate": 3.999129657558069e-06, - "loss": 0.9555, - "step": 434 - }, - { - "epoch": 0.03922983270956396, - "grad_norm": 1.8817262800699996, - "learning_rate": 3.999112339016234e-06, - "loss": 0.7898, - "step": 435 - }, - { - "epoch": 0.03932001623303422, - "grad_norm": 1.5231646171116642, - "learning_rate": 3.999094849899109e-06, - "loss": 1.0346, - "step": 436 - }, - { - "epoch": 0.039410199756504484, - "grad_norm": 1.3640995138593235, - "learning_rate": 3.99907719020819e-06, - "loss": 0.9404, - "step": 437 - }, - { - "epoch": 0.03950038327997475, - "grad_norm": 1.5046232025574358, - "learning_rate": 3.999059359944982e-06, - "loss": 1.0551, - "step": 438 - }, - { - "epoch": 0.03959056680344501, - "grad_norm": 1.804188118411387, - "learning_rate": 3.999041359111007e-06, - "loss": 1.0809, - "step": 439 - }, - { - "epoch": 0.03968075032691527, - "grad_norm": 1.7026685750577704, - "learning_rate": 3.999023187707801e-06, - "loss": 1.1028, - "step": 440 - }, - { - "epoch": 0.039770933850385534, - "grad_norm": 1.4188824696811764, - "learning_rate": 3.999004845736913e-06, - "loss": 1.0829, - "step": 441 - }, - { - "epoch": 0.039861117373855795, - "grad_norm": 1.5383460221571474, - "learning_rate": 3.9989863331999096e-06, - "loss": 1.0983, - "step": 442 - }, - { - "epoch": 0.039951300897326056, - "grad_norm": 1.9637920985574853, - "learning_rate": 3.99896765009837e-06, - "loss": 0.9565, - "step": 443 - }, - { - "epoch": 0.040041484420796324, - "grad_norm": 1.516058004603232, - "learning_rate": 3.998948796433888e-06, - "loss": 0.9866, - "step": 444 - }, - { - "epoch": 0.040131667944266584, - "grad_norm": 1.4506518740221177, - "learning_rate": 3.998929772208073e-06, - "loss": 1.0646, - "step": 445 - }, - { - "epoch": 0.040221851467736845, - "grad_norm": 0.8072360891264118, - "learning_rate": 3.998910577422547e-06, - "loss": 0.8338, - "step": 446 - }, - { - "epoch": 0.040312034991207106, - "grad_norm": 1.4664420804832312, - "learning_rate": 3.99889121207895e-06, - "loss": 1.0728, - "step": 447 - }, - { - "epoch": 0.04040221851467737, - "grad_norm": 1.7453513391283344, - "learning_rate": 3.9988716761789324e-06, - "loss": 1.0545, - "step": 448 - }, - { - "epoch": 0.04049240203814763, - "grad_norm": 1.716296299853729, - "learning_rate": 3.998851969724161e-06, - "loss": 0.9579, - "step": 449 - }, - { - "epoch": 0.040582585561617895, - "grad_norm": 1.6516573587277865, - "learning_rate": 3.998832092716319e-06, - "loss": 1.073, - "step": 450 - }, - { - "epoch": 0.040672769085088156, - "grad_norm": 1.4762216002908517, - "learning_rate": 3.998812045157102e-06, - "loss": 1.0027, - "step": 451 - }, - { - "epoch": 0.04076295260855842, - "grad_norm": 1.898967527645227, - "learning_rate": 3.998791827048219e-06, - "loss": 1.0774, - "step": 452 - }, - { - "epoch": 0.04085313613202868, - "grad_norm": 1.3622168290441337, - "learning_rate": 3.998771438391396e-06, - "loss": 0.9625, - "step": 453 - }, - { - "epoch": 0.04094331965549894, - "grad_norm": 0.8400435454003516, - "learning_rate": 3.9987508791883725e-06, - "loss": 0.7974, - "step": 454 - }, - { - "epoch": 0.0410335031789692, - "grad_norm": 1.6673233248610944, - "learning_rate": 3.998730149440904e-06, - "loss": 1.1396, - "step": 455 - }, - { - "epoch": 0.04112368670243947, - "grad_norm": 1.6437264328344103, - "learning_rate": 3.998709249150758e-06, - "loss": 1.0786, - "step": 456 - }, - { - "epoch": 0.04121387022590973, - "grad_norm": 1.5266195858576939, - "learning_rate": 3.998688178319717e-06, - "loss": 1.0426, - "step": 457 - }, - { - "epoch": 0.04130405374937999, - "grad_norm": 1.6066269606647534, - "learning_rate": 3.9986669369495805e-06, - "loss": 1.0102, - "step": 458 - }, - { - "epoch": 0.04139423727285025, - "grad_norm": 1.710609927238204, - "learning_rate": 3.998645525042161e-06, - "loss": 1.0812, - "step": 459 - }, - { - "epoch": 0.04148442079632051, - "grad_norm": 1.792907447847387, - "learning_rate": 3.998623942599284e-06, - "loss": 1.097, - "step": 460 - }, - { - "epoch": 0.04157460431979077, - "grad_norm": 1.8314355723644897, - "learning_rate": 3.998602189622793e-06, - "loss": 1.0186, - "step": 461 - }, - { - "epoch": 0.04166478784326104, - "grad_norm": 2.0441708939212218, - "learning_rate": 3.998580266114542e-06, - "loss": 1.0618, - "step": 462 - }, - { - "epoch": 0.0417549713667313, - "grad_norm": 1.7968975563292122, - "learning_rate": 3.998558172076404e-06, - "loss": 0.9284, - "step": 463 - }, - { - "epoch": 0.04184515489020156, - "grad_norm": 1.5554115487985372, - "learning_rate": 3.998535907510262e-06, - "loss": 0.9624, - "step": 464 - }, - { - "epoch": 0.04193533841367182, - "grad_norm": 1.4998369128260858, - "learning_rate": 3.998513472418016e-06, - "loss": 1.0439, - "step": 465 - }, - { - "epoch": 0.04202552193714208, - "grad_norm": 1.9089134869762663, - "learning_rate": 3.998490866801582e-06, - "loss": 1.0579, - "step": 466 - }, - { - "epoch": 0.04211570546061234, - "grad_norm": 1.5422519225913827, - "learning_rate": 3.998468090662886e-06, - "loss": 1.0117, - "step": 467 - }, - { - "epoch": 0.04220588898408261, - "grad_norm": 1.5208922422661801, - "learning_rate": 3.998445144003874e-06, - "loss": 0.9179, - "step": 468 - }, - { - "epoch": 0.04229607250755287, - "grad_norm": 1.6169421613418953, - "learning_rate": 3.998422026826504e-06, - "loss": 1.0436, - "step": 469 - }, - { - "epoch": 0.04238625603102313, - "grad_norm": 1.779200328297036, - "learning_rate": 3.998398739132746e-06, - "loss": 1.0574, - "step": 470 - }, - { - "epoch": 0.04247643955449339, - "grad_norm": 1.4528436696070437, - "learning_rate": 3.99837528092459e-06, - "loss": 0.9767, - "step": 471 - }, - { - "epoch": 0.042566623077963654, - "grad_norm": 1.484057342269716, - "learning_rate": 3.998351652204034e-06, - "loss": 1.0746, - "step": 472 - }, - { - "epoch": 0.042656806601433915, - "grad_norm": 1.3970753093018429, - "learning_rate": 3.998327852973098e-06, - "loss": 1.056, - "step": 473 - }, - { - "epoch": 0.04274699012490418, - "grad_norm": 1.7835624140669808, - "learning_rate": 3.99830388323381e-06, - "loss": 0.9733, - "step": 474 - }, - { - "epoch": 0.042837173648374444, - "grad_norm": 1.342976458373702, - "learning_rate": 3.998279742988216e-06, - "loss": 1.0535, - "step": 475 - }, - { - "epoch": 0.042927357171844704, - "grad_norm": 1.3982803853179682, - "learning_rate": 3.998255432238377e-06, - "loss": 0.9914, - "step": 476 - }, - { - "epoch": 0.043017540695314965, - "grad_norm": 1.8302704049536023, - "learning_rate": 3.9982309509863656e-06, - "loss": 1.0939, - "step": 477 - }, - { - "epoch": 0.043107724218785226, - "grad_norm": 1.821891656344717, - "learning_rate": 3.998206299234272e-06, - "loss": 0.9768, - "step": 478 - }, - { - "epoch": 0.04319790774225549, - "grad_norm": 1.350364109669489, - "learning_rate": 3.998181476984198e-06, - "loss": 1.0468, - "step": 479 - }, - { - "epoch": 0.043288091265725755, - "grad_norm": 1.4123288481329532, - "learning_rate": 3.998156484238263e-06, - "loss": 0.9485, - "step": 480 - }, - { - "epoch": 0.043378274789196015, - "grad_norm": 1.7610165027406202, - "learning_rate": 3.998131320998599e-06, - "loss": 0.9613, - "step": 481 - }, - { - "epoch": 0.043468458312666276, - "grad_norm": 1.7275826042730322, - "learning_rate": 3.998105987267353e-06, - "loss": 1.0665, - "step": 482 - }, - { - "epoch": 0.04355864183613654, - "grad_norm": 1.7991665367537513, - "learning_rate": 3.998080483046687e-06, - "loss": 0.9501, - "step": 483 - }, - { - "epoch": 0.0436488253596068, - "grad_norm": 1.2067950860249352, - "learning_rate": 3.998054808338776e-06, - "loss": 0.9687, - "step": 484 - }, - { - "epoch": 0.04373900888307706, - "grad_norm": 1.5191501639692722, - "learning_rate": 3.998028963145812e-06, - "loss": 0.9889, - "step": 485 - }, - { - "epoch": 0.043829192406547327, - "grad_norm": 1.5402688657462473, - "learning_rate": 3.99800294747e-06, - "loss": 1.0455, - "step": 486 - }, - { - "epoch": 0.04391937593001759, - "grad_norm": 1.3823527202140526, - "learning_rate": 3.99797676131356e-06, - "loss": 1.0492, - "step": 487 - }, - { - "epoch": 0.04400955945348785, - "grad_norm": 1.9797584117542582, - "learning_rate": 3.997950404678726e-06, - "loss": 0.8768, - "step": 488 - }, - { - "epoch": 0.04409974297695811, - "grad_norm": 1.5173664902805835, - "learning_rate": 3.997923877567746e-06, - "loss": 1.1409, - "step": 489 - }, - { - "epoch": 0.04418992650042837, - "grad_norm": 1.5127064240886683, - "learning_rate": 3.9978971799828855e-06, - "loss": 0.8594, - "step": 490 - }, - { - "epoch": 0.04428011002389863, - "grad_norm": 1.794218272051556, - "learning_rate": 3.997870311926421e-06, - "loss": 1.0152, - "step": 491 - }, - { - "epoch": 0.0443702935473689, - "grad_norm": 1.4962321483850565, - "learning_rate": 3.997843273400645e-06, - "loss": 0.978, - "step": 492 - }, - { - "epoch": 0.04446047707083916, - "grad_norm": 1.5566418502975101, - "learning_rate": 3.997816064407865e-06, - "loss": 1.0024, - "step": 493 - }, - { - "epoch": 0.04455066059430942, - "grad_norm": 1.3731462814355713, - "learning_rate": 3.997788684950402e-06, - "loss": 1.0454, - "step": 494 - }, - { - "epoch": 0.04464084411777968, - "grad_norm": 1.5051879969436337, - "learning_rate": 3.997761135030593e-06, - "loss": 1.0636, - "step": 495 - }, - { - "epoch": 0.04473102764124994, - "grad_norm": 1.6117372675399133, - "learning_rate": 3.997733414650789e-06, - "loss": 0.917, - "step": 496 - }, - { - "epoch": 0.0448212111647202, - "grad_norm": 1.7674604345099167, - "learning_rate": 3.9977055238133554e-06, - "loss": 0.9885, - "step": 497 - }, - { - "epoch": 0.04491139468819047, - "grad_norm": 1.3502033875161983, - "learning_rate": 3.99767746252067e-06, - "loss": 0.9001, - "step": 498 - }, - { - "epoch": 0.04500157821166073, - "grad_norm": 1.3915377846541934, - "learning_rate": 3.997649230775129e-06, - "loss": 1.0211, - "step": 499 - }, - { - "epoch": 0.04509176173513099, - "grad_norm": 1.3766109826061776, - "learning_rate": 3.9976208285791395e-06, - "loss": 0.9658, - "step": 500 - }, - { - "epoch": 0.04518194525860125, - "grad_norm": 1.0656198249758357, - "learning_rate": 3.997592255935127e-06, - "loss": 0.9367, - "step": 501 - }, - { - "epoch": 0.045272128782071513, - "grad_norm": 1.4693169818361804, - "learning_rate": 3.997563512845529e-06, - "loss": 1.0472, - "step": 502 - }, - { - "epoch": 0.045362312305541774, - "grad_norm": 1.5205534411807342, - "learning_rate": 3.9975345993127975e-06, - "loss": 0.9798, - "step": 503 - }, - { - "epoch": 0.04545249582901204, - "grad_norm": 1.526470900787076, - "learning_rate": 3.9975055153393985e-06, - "loss": 1.0357, - "step": 504 - }, - { - "epoch": 0.0455426793524823, - "grad_norm": 1.7976169547072118, - "learning_rate": 3.997476260927816e-06, - "loss": 1.0356, - "step": 505 - }, - { - "epoch": 0.045632862875952564, - "grad_norm": 1.4098332000713119, - "learning_rate": 3.997446836080545e-06, - "loss": 0.9972, - "step": 506 - }, - { - "epoch": 0.045723046399422825, - "grad_norm": 1.9845462034368058, - "learning_rate": 3.997417240800095e-06, - "loss": 1.0721, - "step": 507 - }, - { - "epoch": 0.045813229922893085, - "grad_norm": 1.4109379089287284, - "learning_rate": 3.997387475088994e-06, - "loss": 1.1188, - "step": 508 - }, - { - "epoch": 0.045903413446363346, - "grad_norm": 1.9071427427937824, - "learning_rate": 3.99735753894978e-06, - "loss": 1.1135, - "step": 509 - }, - { - "epoch": 0.045993596969833614, - "grad_norm": 1.4315715112175627, - "learning_rate": 3.997327432385006e-06, - "loss": 1.0155, - "step": 510 - }, - { - "epoch": 0.046083780493303875, - "grad_norm": 1.7450304314001093, - "learning_rate": 3.997297155397244e-06, - "loss": 0.9481, - "step": 511 - }, - { - "epoch": 0.046173964016774136, - "grad_norm": 1.573156561673186, - "learning_rate": 3.997266707989074e-06, - "loss": 0.9439, - "step": 512 - }, - { - "epoch": 0.046264147540244396, - "grad_norm": 2.040120869589991, - "learning_rate": 3.997236090163097e-06, - "loss": 0.9375, - "step": 513 - }, - { - "epoch": 0.04635433106371466, - "grad_norm": 1.5969920374911262, - "learning_rate": 3.9972053019219235e-06, - "loss": 1.1508, - "step": 514 - }, - { - "epoch": 0.04644451458718492, - "grad_norm": 1.6290098747389687, - "learning_rate": 3.997174343268181e-06, - "loss": 1.0365, - "step": 515 - }, - { - "epoch": 0.046534698110655186, - "grad_norm": 1.2811385315578625, - "learning_rate": 3.9971432142045115e-06, - "loss": 1.029, - "step": 516 - }, - { - "epoch": 0.04662488163412545, - "grad_norm": 0.9996387305948696, - "learning_rate": 3.99711191473357e-06, - "loss": 0.8898, - "step": 517 - }, - { - "epoch": 0.04671506515759571, - "grad_norm": 1.5890076334285617, - "learning_rate": 3.99708044485803e-06, - "loss": 1.0365, - "step": 518 - }, - { - "epoch": 0.04680524868106597, - "grad_norm": 1.6794519303606792, - "learning_rate": 3.997048804580574e-06, - "loss": 1.0327, - "step": 519 - }, - { - "epoch": 0.04689543220453623, - "grad_norm": 1.6822817618095203, - "learning_rate": 3.997016993903901e-06, - "loss": 0.9664, - "step": 520 - }, - { - "epoch": 0.04698561572800649, - "grad_norm": 1.4765141393421766, - "learning_rate": 3.996985012830728e-06, - "loss": 1.0906, - "step": 521 - }, - { - "epoch": 0.04707579925147676, - "grad_norm": 1.44424316438441, - "learning_rate": 3.996952861363782e-06, - "loss": 0.9693, - "step": 522 - }, - { - "epoch": 0.04716598277494702, - "grad_norm": 1.410218129160863, - "learning_rate": 3.9969205395058064e-06, - "loss": 1.0267, - "step": 523 - }, - { - "epoch": 0.04725616629841728, - "grad_norm": 1.8979757869932627, - "learning_rate": 3.99688804725956e-06, - "loss": 1.0127, - "step": 524 - }, - { - "epoch": 0.04734634982188754, - "grad_norm": 1.1362501404177807, - "learning_rate": 3.996855384627815e-06, - "loss": 0.9739, - "step": 525 - }, - { - "epoch": 0.0474365333453578, - "grad_norm": 1.7013517475182989, - "learning_rate": 3.996822551613357e-06, - "loss": 1.0545, - "step": 526 - }, - { - "epoch": 0.04752671686882806, - "grad_norm": 2.4640829662375316, - "learning_rate": 3.996789548218989e-06, - "loss": 0.9503, - "step": 527 - }, - { - "epoch": 0.04761690039229833, - "grad_norm": 1.4613260170748796, - "learning_rate": 3.996756374447526e-06, - "loss": 0.91, - "step": 528 - }, - { - "epoch": 0.04770708391576859, - "grad_norm": 2.625968663553804, - "learning_rate": 3.9967230303018005e-06, - "loss": 1.0346, - "step": 529 - }, - { - "epoch": 0.04779726743923885, - "grad_norm": 1.5135722465952697, - "learning_rate": 3.996689515784655e-06, - "loss": 1.0004, - "step": 530 - }, - { - "epoch": 0.04788745096270911, - "grad_norm": 2.2203368838620268, - "learning_rate": 3.996655830898951e-06, - "loss": 0.9902, - "step": 531 - }, - { - "epoch": 0.04797763448617937, - "grad_norm": 2.122244113733759, - "learning_rate": 3.996621975647562e-06, - "loss": 1.028, - "step": 532 - }, - { - "epoch": 0.04806781800964964, - "grad_norm": 1.2080473835538301, - "learning_rate": 3.996587950033377e-06, - "loss": 0.96, - "step": 533 - }, - { - "epoch": 0.0481580015331199, - "grad_norm": 1.9500339920798109, - "learning_rate": 3.996553754059299e-06, - "loss": 0.9481, - "step": 534 - }, - { - "epoch": 0.04824818505659016, - "grad_norm": 1.6884978840740976, - "learning_rate": 3.996519387728245e-06, - "loss": 1.0217, - "step": 535 - }, - { - "epoch": 0.04833836858006042, - "grad_norm": 1.2412011411026662, - "learning_rate": 3.9964848510431495e-06, - "loss": 1.003, - "step": 536 - }, - { - "epoch": 0.048428552103530684, - "grad_norm": 1.4051740132696589, - "learning_rate": 3.996450144006957e-06, - "loss": 1.0751, - "step": 537 - }, - { - "epoch": 0.048518735627000945, - "grad_norm": 2.076920009404278, - "learning_rate": 3.99641526662263e-06, - "loss": 0.9956, - "step": 538 - }, - { - "epoch": 0.04860891915047121, - "grad_norm": 1.5819113603540533, - "learning_rate": 3.996380218893145e-06, - "loss": 1.1033, - "step": 539 - }, - { - "epoch": 0.04869910267394147, - "grad_norm": 11.242888512933757, - "learning_rate": 3.996345000821491e-06, - "loss": 1.049, - "step": 540 - }, - { - "epoch": 0.048789286197411734, - "grad_norm": 1.5736129011409792, - "learning_rate": 3.996309612410674e-06, - "loss": 1.1249, - "step": 541 - }, - { - "epoch": 0.048879469720881995, - "grad_norm": 2.001249519076459, - "learning_rate": 3.996274053663713e-06, - "loss": 0.9526, - "step": 542 - }, - { - "epoch": 0.048969653244352256, - "grad_norm": 1.0645001042349402, - "learning_rate": 3.996238324583643e-06, - "loss": 0.8498, - "step": 543 - }, - { - "epoch": 0.049059836767822516, - "grad_norm": 1.8615732101688178, - "learning_rate": 3.996202425173512e-06, - "loss": 1.0792, - "step": 544 - }, - { - "epoch": 0.049150020291292784, - "grad_norm": 1.6608323100703037, - "learning_rate": 3.996166355436383e-06, - "loss": 0.974, - "step": 545 - }, - { - "epoch": 0.049240203814763045, - "grad_norm": 1.4611771330927237, - "learning_rate": 3.996130115375333e-06, - "loss": 1.0322, - "step": 546 - }, - { - "epoch": 0.049330387338233306, - "grad_norm": 1.1557462471239475, - "learning_rate": 3.996093704993456e-06, - "loss": 0.8507, - "step": 547 - }, - { - "epoch": 0.04942057086170357, - "grad_norm": 1.5244598088557562, - "learning_rate": 3.996057124293857e-06, - "loss": 1.1071, - "step": 548 - }, - { - "epoch": 0.04951075438517383, - "grad_norm": 1.285579437798576, - "learning_rate": 3.996020373279659e-06, - "loss": 1.0613, - "step": 549 - }, - { - "epoch": 0.04960093790864409, - "grad_norm": 1.5331922397105626, - "learning_rate": 3.995983451953996e-06, - "loss": 1.0316, - "step": 550 - }, - { - "epoch": 0.049691121432114356, - "grad_norm": 1.611556861132302, - "learning_rate": 3.99594636032002e-06, - "loss": 1.0461, - "step": 551 - }, - { - "epoch": 0.04978130495558462, - "grad_norm": 1.994108343657053, - "learning_rate": 3.995909098380894e-06, - "loss": 0.9155, - "step": 552 - }, - { - "epoch": 0.04987148847905488, - "grad_norm": 1.2527981909878898, - "learning_rate": 3.995871666139799e-06, - "loss": 0.8983, - "step": 553 - }, - { - "epoch": 0.04996167200252514, - "grad_norm": 1.5681336122825946, - "learning_rate": 3.995834063599928e-06, - "loss": 0.9977, - "step": 554 - }, - { - "epoch": 0.0500518555259954, - "grad_norm": 1.511610870060596, - "learning_rate": 3.99579629076449e-06, - "loss": 1.0397, - "step": 555 - }, - { - "epoch": 0.05014203904946566, - "grad_norm": 1.623428318265737, - "learning_rate": 3.9957583476367084e-06, - "loss": 0.9941, - "step": 556 - }, - { - "epoch": 0.05023222257293593, - "grad_norm": 1.6074921788430274, - "learning_rate": 3.995720234219819e-06, - "loss": 1.0297, - "step": 557 - }, - { - "epoch": 0.05032240609640619, - "grad_norm": 1.5465346453214968, - "learning_rate": 3.995681950517075e-06, - "loss": 0.9248, - "step": 558 - }, - { - "epoch": 0.05041258961987645, - "grad_norm": 1.9326207651005922, - "learning_rate": 3.995643496531743e-06, - "loss": 1.0438, - "step": 559 - }, - { - "epoch": 0.05050277314334671, - "grad_norm": 1.6400301626967273, - "learning_rate": 3.9956048722671044e-06, - "loss": 1.0317, - "step": 560 - }, - { - "epoch": 0.05059295666681697, - "grad_norm": 1.49573012432478, - "learning_rate": 3.995566077726454e-06, - "loss": 0.995, - "step": 561 - }, - { - "epoch": 0.05068314019028723, - "grad_norm": 1.5639601941713979, - "learning_rate": 3.995527112913103e-06, - "loss": 0.9859, - "step": 562 - }, - { - "epoch": 0.0507733237137575, - "grad_norm": 1.809391644916828, - "learning_rate": 3.995487977830375e-06, - "loss": 0.9725, - "step": 563 - }, - { - "epoch": 0.05086350723722776, - "grad_norm": 10.677829583613855, - "learning_rate": 3.9954486724816105e-06, - "loss": 1.1142, - "step": 564 - }, - { - "epoch": 0.05095369076069802, - "grad_norm": 1.8325024875139992, - "learning_rate": 3.995409196870161e-06, - "loss": 1.0643, - "step": 565 - }, - { - "epoch": 0.05104387428416828, - "grad_norm": 0.8855054848197921, - "learning_rate": 3.995369550999398e-06, - "loss": 0.8217, - "step": 566 - }, - { - "epoch": 0.05113405780763854, - "grad_norm": 1.885954518466574, - "learning_rate": 3.995329734872702e-06, - "loss": 1.0358, - "step": 567 - }, - { - "epoch": 0.051224241331108804, - "grad_norm": 1.593919483223743, - "learning_rate": 3.9952897484934706e-06, - "loss": 1.1348, - "step": 568 - }, - { - "epoch": 0.05131442485457907, - "grad_norm": 1.7118940074766524, - "learning_rate": 3.995249591865115e-06, - "loss": 1.0267, - "step": 569 - }, - { - "epoch": 0.05140460837804933, - "grad_norm": 1.729536024730287, - "learning_rate": 3.995209264991063e-06, - "loss": 0.9767, - "step": 570 - }, - { - "epoch": 0.05149479190151959, - "grad_norm": 1.569875855731222, - "learning_rate": 3.995168767874756e-06, - "loss": 1.0672, - "step": 571 - }, - { - "epoch": 0.051584975424989854, - "grad_norm": 1.323884229689632, - "learning_rate": 3.995128100519648e-06, - "loss": 1.0042, - "step": 572 - }, - { - "epoch": 0.051675158948460115, - "grad_norm": 1.7137340526548335, - "learning_rate": 3.995087262929209e-06, - "loss": 1.0397, - "step": 573 - }, - { - "epoch": 0.051765342471930376, - "grad_norm": 1.6406214396120222, - "learning_rate": 3.995046255106925e-06, - "loss": 1.0271, - "step": 574 - }, - { - "epoch": 0.05185552599540064, - "grad_norm": 1.5128796282308752, - "learning_rate": 3.995005077056293e-06, - "loss": 1.0835, - "step": 575 - }, - { - "epoch": 0.051945709518870904, - "grad_norm": 2.2332275218475073, - "learning_rate": 3.9949637287808284e-06, - "loss": 0.9157, - "step": 576 - }, - { - "epoch": 0.052035893042341165, - "grad_norm": 1.3250632504895197, - "learning_rate": 3.994922210284057e-06, - "loss": 0.9679, - "step": 577 - }, - { - "epoch": 0.052126076565811426, - "grad_norm": 1.7466604520721145, - "learning_rate": 3.994880521569524e-06, - "loss": 1.0508, - "step": 578 - }, - { - "epoch": 0.05221626008928169, - "grad_norm": 1.5162087329050942, - "learning_rate": 3.994838662640785e-06, - "loss": 1.0309, - "step": 579 - }, - { - "epoch": 0.05230644361275195, - "grad_norm": 1.9981192447983969, - "learning_rate": 3.9947966335014116e-06, - "loss": 1.0598, - "step": 580 - }, - { - "epoch": 0.052396627136222215, - "grad_norm": 2.054232586879123, - "learning_rate": 3.99475443415499e-06, - "loss": 0.9991, - "step": 581 - }, - { - "epoch": 0.052486810659692476, - "grad_norm": 1.4920853185369727, - "learning_rate": 3.994712064605121e-06, - "loss": 1.0155, - "step": 582 - }, - { - "epoch": 0.05257699418316274, - "grad_norm": 2.2873840427712957, - "learning_rate": 3.99466952485542e-06, - "loss": 1.1671, - "step": 583 - }, - { - "epoch": 0.052667177706633, - "grad_norm": 1.6579306370729057, - "learning_rate": 3.994626814909518e-06, - "loss": 1.0409, - "step": 584 - }, - { - "epoch": 0.05275736123010326, - "grad_norm": 1.5492675465773127, - "learning_rate": 3.994583934771056e-06, - "loss": 1.0043, - "step": 585 - }, - { - "epoch": 0.05284754475357352, - "grad_norm": 1.4212958026037879, - "learning_rate": 3.9945408844436955e-06, - "loss": 1.0726, - "step": 586 - }, - { - "epoch": 0.05293772827704379, - "grad_norm": 1.6934685589394853, - "learning_rate": 3.994497663931109e-06, - "loss": 1.049, - "step": 587 - }, - { - "epoch": 0.05302791180051405, - "grad_norm": 1.5459246557907782, - "learning_rate": 3.994454273236984e-06, - "loss": 1.0364, - "step": 588 - }, - { - "epoch": 0.05311809532398431, - "grad_norm": 1.3266699505929862, - "learning_rate": 3.994410712365023e-06, - "loss": 1.0261, - "step": 589 - }, - { - "epoch": 0.05320827884745457, - "grad_norm": 1.8720101997065015, - "learning_rate": 3.994366981318943e-06, - "loss": 1.1193, - "step": 590 - }, - { - "epoch": 0.05329846237092483, - "grad_norm": 1.5546326747409474, - "learning_rate": 3.9943230801024765e-06, - "loss": 1.0241, - "step": 591 - }, - { - "epoch": 0.05338864589439509, - "grad_norm": 1.7355383330793268, - "learning_rate": 3.9942790087193666e-06, - "loss": 1.0548, - "step": 592 - }, - { - "epoch": 0.05347882941786536, - "grad_norm": 1.606611527305344, - "learning_rate": 3.994234767173376e-06, - "loss": 1.018, - "step": 593 - }, - { - "epoch": 0.05356901294133562, - "grad_norm": 1.6562392756276836, - "learning_rate": 3.994190355468279e-06, - "loss": 0.9118, - "step": 594 - }, - { - "epoch": 0.05365919646480588, - "grad_norm": 1.4103653348581247, - "learning_rate": 3.994145773607865e-06, - "loss": 1.019, - "step": 595 - }, - { - "epoch": 0.05374937998827614, - "grad_norm": 1.573258630156355, - "learning_rate": 3.994101021595938e-06, - "loss": 0.9904, - "step": 596 - }, - { - "epoch": 0.0538395635117464, - "grad_norm": 1.5191430230880725, - "learning_rate": 3.9940560994363165e-06, - "loss": 1.0405, - "step": 597 - }, - { - "epoch": 0.05392974703521666, - "grad_norm": 1.6027959772256093, - "learning_rate": 3.994011007132833e-06, - "loss": 0.9002, - "step": 598 - }, - { - "epoch": 0.05401993055868693, - "grad_norm": 0.8778099513059332, - "learning_rate": 3.993965744689337e-06, - "loss": 0.8641, - "step": 599 - }, - { - "epoch": 0.05411011408215719, - "grad_norm": 1.8423624151405538, - "learning_rate": 3.993920312109687e-06, - "loss": 1.0013, - "step": 600 - }, - { - "epoch": 0.05420029760562745, - "grad_norm": 1.6457107916815694, - "learning_rate": 3.993874709397764e-06, - "loss": 1.0237, - "step": 601 - }, - { - "epoch": 0.05429048112909771, - "grad_norm": 1.7784563208431898, - "learning_rate": 3.993828936557454e-06, - "loss": 1.0122, - "step": 602 - }, - { - "epoch": 0.054380664652567974, - "grad_norm": 1.4286480235955552, - "learning_rate": 3.993782993592667e-06, - "loss": 0.9846, - "step": 603 - }, - { - "epoch": 0.054470848176038235, - "grad_norm": 1.6352069582597006, - "learning_rate": 3.993736880507321e-06, - "loss": 1.0595, - "step": 604 - }, - { - "epoch": 0.0545610316995085, - "grad_norm": 1.7371604477871931, - "learning_rate": 3.99369059730535e-06, - "loss": 0.962, - "step": 605 - }, - { - "epoch": 0.05465121522297876, - "grad_norm": 1.2861889803011437, - "learning_rate": 3.993644143990706e-06, - "loss": 1.0233, - "step": 606 - }, - { - "epoch": 0.054741398746449024, - "grad_norm": 1.8099042128407827, - "learning_rate": 3.99359752056735e-06, - "loss": 0.944, - "step": 607 - }, - { - "epoch": 0.054831582269919285, - "grad_norm": 1.7391528741021247, - "learning_rate": 3.993550727039261e-06, - "loss": 0.876, - "step": 608 - }, - { - "epoch": 0.054921765793389546, - "grad_norm": 1.557156543121589, - "learning_rate": 3.993503763410431e-06, - "loss": 1.037, - "step": 609 - }, - { - "epoch": 0.05501194931685981, - "grad_norm": 1.677977787430049, - "learning_rate": 3.9934566296848686e-06, - "loss": 0.9432, - "step": 610 - }, - { - "epoch": 0.055102132840330074, - "grad_norm": 1.7881087380356113, - "learning_rate": 3.993409325866595e-06, - "loss": 1.1036, - "step": 611 - }, - { - "epoch": 0.055192316363800335, - "grad_norm": 0.8076687004146667, - "learning_rate": 3.993361851959645e-06, - "loss": 0.839, - "step": 612 - }, - { - "epoch": 0.055282499887270596, - "grad_norm": 1.3228151878223287, - "learning_rate": 3.993314207968071e-06, - "loss": 1.072, - "step": 613 - }, - { - "epoch": 0.05537268341074086, - "grad_norm": 1.5897614928764592, - "learning_rate": 3.993266393895938e-06, - "loss": 1.0072, - "step": 614 - }, - { - "epoch": 0.05546286693421112, - "grad_norm": 1.6893352136644126, - "learning_rate": 3.993218409747326e-06, - "loss": 1.036, - "step": 615 - }, - { - "epoch": 0.05555305045768138, - "grad_norm": 1.3744205207450717, - "learning_rate": 3.993170255526328e-06, - "loss": 1.0185, - "step": 616 - }, - { - "epoch": 0.055643233981151646, - "grad_norm": 1.455056823984187, - "learning_rate": 3.993121931237054e-06, - "loss": 1.042, - "step": 617 - }, - { - "epoch": 0.05573341750462191, - "grad_norm": 1.5008243044267968, - "learning_rate": 3.993073436883627e-06, - "loss": 1.035, - "step": 618 - }, - { - "epoch": 0.05582360102809217, - "grad_norm": 1.4405910595518063, - "learning_rate": 3.993024772470184e-06, - "loss": 1.0028, - "step": 619 - }, - { - "epoch": 0.05591378455156243, - "grad_norm": 1.679888336239974, - "learning_rate": 3.992975938000878e-06, - "loss": 1.0041, - "step": 620 - }, - { - "epoch": 0.05600396807503269, - "grad_norm": 0.7057614383615137, - "learning_rate": 3.992926933479876e-06, - "loss": 0.798, - "step": 621 - }, - { - "epoch": 0.05609415159850295, - "grad_norm": 1.7193405523978165, - "learning_rate": 3.9928777589113595e-06, - "loss": 1.0424, - "step": 622 - }, - { - "epoch": 0.05618433512197322, - "grad_norm": 1.8323105719109536, - "learning_rate": 3.992828414299524e-06, - "loss": 0.9766, - "step": 623 - }, - { - "epoch": 0.05627451864544348, - "grad_norm": 1.3174430318745483, - "learning_rate": 3.992778899648579e-06, - "loss": 1.0178, - "step": 624 - }, - { - "epoch": 0.05636470216891374, - "grad_norm": 1.5175944637955716, - "learning_rate": 3.992729214962751e-06, - "loss": 1.0437, - "step": 625 - }, - { - "epoch": 0.056454885692384, - "grad_norm": 1.8751534081326178, - "learning_rate": 3.992679360246279e-06, - "loss": 1.0533, - "step": 626 - }, - { - "epoch": 0.05654506921585426, - "grad_norm": 1.6745022247609498, - "learning_rate": 3.992629335503416e-06, - "loss": 1.0426, - "step": 627 - }, - { - "epoch": 0.05663525273932452, - "grad_norm": 1.3597579065995302, - "learning_rate": 3.9925791407384304e-06, - "loss": 1.0066, - "step": 628 - }, - { - "epoch": 0.05672543626279479, - "grad_norm": 2.7626424105431187, - "learning_rate": 3.992528775955606e-06, - "loss": 1.0637, - "step": 629 - }, - { - "epoch": 0.05681561978626505, - "grad_norm": 1.64966403835383, - "learning_rate": 3.992478241159239e-06, - "loss": 0.9947, - "step": 630 - }, - { - "epoch": 0.05690580330973531, - "grad_norm": 2.107284074720308, - "learning_rate": 3.992427536353643e-06, - "loss": 1.046, - "step": 631 - }, - { - "epoch": 0.05699598683320557, - "grad_norm": 1.6283035444009635, - "learning_rate": 3.992376661543143e-06, - "loss": 1.0049, - "step": 632 - }, - { - "epoch": 0.05708617035667583, - "grad_norm": 1.782925269716883, - "learning_rate": 3.992325616732081e-06, - "loss": 0.9948, - "step": 633 - }, - { - "epoch": 0.057176353880146094, - "grad_norm": 1.4215728941860482, - "learning_rate": 3.992274401924811e-06, - "loss": 1.1237, - "step": 634 - }, - { - "epoch": 0.05726653740361636, - "grad_norm": 1.357732284511803, - "learning_rate": 3.992223017125704e-06, - "loss": 0.9324, - "step": 635 - }, - { - "epoch": 0.05735672092708662, - "grad_norm": 1.51917794246259, - "learning_rate": 3.992171462339145e-06, - "loss": 1.0517, - "step": 636 - }, - { - "epoch": 0.057446904450556883, - "grad_norm": 1.5369542587711216, - "learning_rate": 3.992119737569532e-06, - "loss": 1.09, - "step": 637 - }, - { - "epoch": 0.057537087974027144, - "grad_norm": 1.6384935611648395, - "learning_rate": 3.992067842821277e-06, - "loss": 1.0156, - "step": 638 - }, - { - "epoch": 0.057627271497497405, - "grad_norm": 1.8995339951491832, - "learning_rate": 3.99201577809881e-06, - "loss": 1.1469, - "step": 639 - }, - { - "epoch": 0.057717455020967666, - "grad_norm": 1.7943353369144934, - "learning_rate": 3.991963543406574e-06, - "loss": 1.0366, - "step": 640 - }, - { - "epoch": 0.057807638544437934, - "grad_norm": 1.6086936174899815, - "learning_rate": 3.991911138749024e-06, - "loss": 1.0246, - "step": 641 - }, - { - "epoch": 0.057897822067908195, - "grad_norm": 1.5964431447491338, - "learning_rate": 3.991858564130633e-06, - "loss": 1.0485, - "step": 642 - }, - { - "epoch": 0.057988005591378455, - "grad_norm": 1.7782930291401369, - "learning_rate": 3.991805819555885e-06, - "loss": 0.9892, - "step": 643 - }, - { - "epoch": 0.058078189114848716, - "grad_norm": 1.4278221259958386, - "learning_rate": 3.991752905029283e-06, - "loss": 1.0228, - "step": 644 - }, - { - "epoch": 0.05816837263831898, - "grad_norm": 1.4284869714021966, - "learning_rate": 3.991699820555341e-06, - "loss": 0.8919, - "step": 645 - }, - { - "epoch": 0.05825855616178924, - "grad_norm": 1.5021227280754412, - "learning_rate": 3.991646566138588e-06, - "loss": 1.0209, - "step": 646 - }, - { - "epoch": 0.058348739685259506, - "grad_norm": 1.3028908882888828, - "learning_rate": 3.991593141783567e-06, - "loss": 0.9423, - "step": 647 - }, - { - "epoch": 0.058438923208729766, - "grad_norm": 1.6343637790622965, - "learning_rate": 3.991539547494839e-06, - "loss": 1.0359, - "step": 648 - }, - { - "epoch": 0.05852910673220003, - "grad_norm": 1.5669579806606584, - "learning_rate": 3.991485783276974e-06, - "loss": 1.0112, - "step": 649 - }, - { - "epoch": 0.05861929025567029, - "grad_norm": 1.3214302072183508, - "learning_rate": 3.991431849134563e-06, - "loss": 1.0711, - "step": 650 - }, - { - "epoch": 0.05870947377914055, - "grad_norm": 1.7124257343088112, - "learning_rate": 3.991377745072205e-06, - "loss": 0.9143, - "step": 651 - }, - { - "epoch": 0.05879965730261081, - "grad_norm": 1.417154798372231, - "learning_rate": 3.991323471094517e-06, - "loss": 1.044, - "step": 652 - }, - { - "epoch": 0.05888984082608108, - "grad_norm": 1.59737671736092, - "learning_rate": 3.991269027206131e-06, - "loss": 1.0013, - "step": 653 - }, - { - "epoch": 0.05898002434955134, - "grad_norm": 1.1384637292337256, - "learning_rate": 3.9912144134116916e-06, - "loss": 0.8777, - "step": 654 - }, - { - "epoch": 0.0590702078730216, - "grad_norm": 1.5824603970409892, - "learning_rate": 3.99115962971586e-06, - "loss": 0.9414, - "step": 655 - }, - { - "epoch": 0.05916039139649186, - "grad_norm": 1.928353161040863, - "learning_rate": 3.991104676123308e-06, - "loss": 1.055, - "step": 656 - }, - { - "epoch": 0.05925057491996212, - "grad_norm": 0.928384910180309, - "learning_rate": 3.991049552638727e-06, - "loss": 0.8243, - "step": 657 - }, - { - "epoch": 0.05934075844343238, - "grad_norm": 1.7679500293962669, - "learning_rate": 3.99099425926682e-06, - "loss": 0.9684, - "step": 658 - }, - { - "epoch": 0.05943094196690265, - "grad_norm": 1.459975634854951, - "learning_rate": 3.990938796012304e-06, - "loss": 0.988, - "step": 659 - }, - { - "epoch": 0.05952112549037291, - "grad_norm": 1.6556372318701604, - "learning_rate": 3.990883162879912e-06, - "loss": 1.1668, - "step": 660 - }, - { - "epoch": 0.05961130901384317, - "grad_norm": 1.554532757443092, - "learning_rate": 3.990827359874391e-06, - "loss": 1.0848, - "step": 661 - }, - { - "epoch": 0.05970149253731343, - "grad_norm": 1.63424619674065, - "learning_rate": 3.990771387000503e-06, - "loss": 1.0176, - "step": 662 - }, - { - "epoch": 0.05979167606078369, - "grad_norm": 1.6953526505317167, - "learning_rate": 3.990715244263023e-06, - "loss": 1.0436, - "step": 663 - }, - { - "epoch": 0.05988185958425396, - "grad_norm": 0.7360957661914683, - "learning_rate": 3.990658931666741e-06, - "loss": 0.8252, - "step": 664 - }, - { - "epoch": 0.05997204310772422, - "grad_norm": 1.5723248393595133, - "learning_rate": 3.990602449216463e-06, - "loss": 0.941, - "step": 665 - }, - { - "epoch": 0.06006222663119448, - "grad_norm": 1.8519097497507604, - "learning_rate": 3.990545796917008e-06, - "loss": 0.919, - "step": 666 - }, - { - "epoch": 0.06015241015466474, - "grad_norm": 1.3858473497937054, - "learning_rate": 3.99048897477321e-06, - "loss": 1.0719, - "step": 667 - }, - { - "epoch": 0.060242593678135004, - "grad_norm": 1.6607892434098919, - "learning_rate": 3.990431982789917e-06, - "loss": 1.031, - "step": 668 - }, - { - "epoch": 0.060332777201605264, - "grad_norm": 1.4493126029088126, - "learning_rate": 3.990374820971992e-06, - "loss": 0.9519, - "step": 669 - }, - { - "epoch": 0.06042296072507553, - "grad_norm": 1.607767209378084, - "learning_rate": 3.990317489324312e-06, - "loss": 0.9087, - "step": 670 - }, - { - "epoch": 0.06051314424854579, - "grad_norm": 1.6292679565760697, - "learning_rate": 3.99025998785177e-06, - "loss": 1.0044, - "step": 671 - }, - { - "epoch": 0.060603327772016054, - "grad_norm": 1.4252035564462815, - "learning_rate": 3.990202316559271e-06, - "loss": 0.9553, - "step": 672 - }, - { - "epoch": 0.060693511295486315, - "grad_norm": 2.3352227281620292, - "learning_rate": 3.990144475451738e-06, - "loss": 1.0648, - "step": 673 - }, - { - "epoch": 0.060783694818956575, - "grad_norm": 1.2998301835285881, - "learning_rate": 3.9900864645341036e-06, - "loss": 1.0162, - "step": 674 - }, - { - "epoch": 0.060873878342426836, - "grad_norm": 1.6748728746596715, - "learning_rate": 3.990028283811319e-06, - "loss": 1.0231, - "step": 675 - }, - { - "epoch": 0.060964061865897104, - "grad_norm": 1.5104108963779415, - "learning_rate": 3.989969933288348e-06, - "loss": 1.0121, - "step": 676 - }, - { - "epoch": 0.061054245389367365, - "grad_norm": 0.8666673767258107, - "learning_rate": 3.98991141297017e-06, - "loss": 0.7852, - "step": 677 - }, - { - "epoch": 0.061144428912837626, - "grad_norm": 1.335426322001497, - "learning_rate": 3.989852722861778e-06, - "loss": 0.9561, - "step": 678 - }, - { - "epoch": 0.061234612436307886, - "grad_norm": 0.7904125033603939, - "learning_rate": 3.98979386296818e-06, - "loss": 0.794, - "step": 679 - }, - { - "epoch": 0.06132479595977815, - "grad_norm": 1.7999621864161692, - "learning_rate": 3.989734833294398e-06, - "loss": 0.9999, - "step": 680 - }, - { - "epoch": 0.06141497948324841, - "grad_norm": 1.5990243291334008, - "learning_rate": 3.989675633845469e-06, - "loss": 1.0912, - "step": 681 - }, - { - "epoch": 0.061505163006718676, - "grad_norm": 2.1903018990672534, - "learning_rate": 3.989616264626443e-06, - "loss": 0.9954, - "step": 682 - }, - { - "epoch": 0.06159534653018894, - "grad_norm": 1.2756177340744004, - "learning_rate": 3.989556725642388e-06, - "loss": 1.0498, - "step": 683 - }, - { - "epoch": 0.0616855300536592, - "grad_norm": 2.024710472036669, - "learning_rate": 3.989497016898382e-06, - "loss": 1.0638, - "step": 684 - }, - { - "epoch": 0.06177571357712946, - "grad_norm": 1.3169761804962559, - "learning_rate": 3.98943713839952e-06, - "loss": 1.0037, - "step": 685 - }, - { - "epoch": 0.06186589710059972, - "grad_norm": 0.7926390765927973, - "learning_rate": 3.9893770901509125e-06, - "loss": 0.8326, - "step": 686 - }, - { - "epoch": 0.06195608062406998, - "grad_norm": 1.4879923863062345, - "learning_rate": 3.989316872157682e-06, - "loss": 1.0104, - "step": 687 - }, - { - "epoch": 0.06204626414754025, - "grad_norm": 1.6262520588210119, - "learning_rate": 3.989256484424968e-06, - "loss": 1.0871, - "step": 688 - }, - { - "epoch": 0.06213644767101051, - "grad_norm": 1.49766000380068, - "learning_rate": 3.98919592695792e-06, - "loss": 0.9808, - "step": 689 - }, - { - "epoch": 0.06222663119448077, - "grad_norm": 1.4859897865057368, - "learning_rate": 3.9891351997617096e-06, - "loss": 1.0435, - "step": 690 - }, - { - "epoch": 0.06231681471795103, - "grad_norm": 1.6206644615844596, - "learning_rate": 3.989074302841514e-06, - "loss": 0.9583, - "step": 691 - }, - { - "epoch": 0.06240699824142129, - "grad_norm": 1.6183099951405182, - "learning_rate": 3.989013236202533e-06, - "loss": 1.1294, - "step": 692 - }, - { - "epoch": 0.06249718176489155, - "grad_norm": 1.3343393682863858, - "learning_rate": 3.988951999849974e-06, - "loss": 1.0507, - "step": 693 - }, - { - "epoch": 0.06258736528836181, - "grad_norm": 1.7281980750860004, - "learning_rate": 3.988890593789064e-06, - "loss": 0.9322, - "step": 694 - }, - { - "epoch": 0.06267754881183207, - "grad_norm": 1.2916165567748046, - "learning_rate": 3.9888290180250415e-06, - "loss": 0.9962, - "step": 695 - }, - { - "epoch": 0.06276773233530233, - "grad_norm": 1.4044035763585538, - "learning_rate": 3.988767272563161e-06, - "loss": 0.978, - "step": 696 - }, - { - "epoch": 0.06285791585877261, - "grad_norm": 1.5488451347042405, - "learning_rate": 3.988705357408691e-06, - "loss": 0.9551, - "step": 697 - }, - { - "epoch": 0.06294809938224287, - "grad_norm": 1.4837057914028997, - "learning_rate": 3.9886432725669146e-06, - "loss": 0.9701, - "step": 698 - }, - { - "epoch": 0.06303828290571313, - "grad_norm": 1.5481893946931073, - "learning_rate": 3.988581018043128e-06, - "loss": 0.9743, - "step": 699 - }, - { - "epoch": 0.06312846642918339, - "grad_norm": 1.5560455525013883, - "learning_rate": 3.988518593842645e-06, - "loss": 1.0373, - "step": 700 - }, - { - "epoch": 0.06321864995265365, - "grad_norm": 1.5514192205773025, - "learning_rate": 3.9884559999707906e-06, - "loss": 1.0075, - "step": 701 - }, - { - "epoch": 0.06330883347612391, - "grad_norm": 1.5411912167763937, - "learning_rate": 3.988393236432906e-06, - "loss": 1.0285, - "step": 702 - }, - { - "epoch": 0.06339901699959417, - "grad_norm": 1.4995432794169306, - "learning_rate": 3.988330303234347e-06, - "loss": 1.0917, - "step": 703 - }, - { - "epoch": 0.06348920052306443, - "grad_norm": 1.4337837446743178, - "learning_rate": 3.988267200380483e-06, - "loss": 0.9666, - "step": 704 - }, - { - "epoch": 0.0635793840465347, - "grad_norm": 1.280561448210322, - "learning_rate": 3.988203927876698e-06, - "loss": 1.0608, - "step": 705 - }, - { - "epoch": 0.06366956757000496, - "grad_norm": 1.428905335640341, - "learning_rate": 3.988140485728391e-06, - "loss": 0.8983, - "step": 706 - }, - { - "epoch": 0.06375975109347522, - "grad_norm": 1.4049563497502955, - "learning_rate": 3.988076873940975e-06, - "loss": 0.9629, - "step": 707 - }, - { - "epoch": 0.06384993461694548, - "grad_norm": 1.845229831946603, - "learning_rate": 3.9880130925198786e-06, - "loss": 1.0113, - "step": 708 - }, - { - "epoch": 0.06394011814041575, - "grad_norm": 1.8437267237341666, - "learning_rate": 3.987949141470543e-06, - "loss": 1.0303, - "step": 709 - }, - { - "epoch": 0.06403030166388601, - "grad_norm": 1.415488351453945, - "learning_rate": 3.987885020798425e-06, - "loss": 1.0076, - "step": 710 - }, - { - "epoch": 0.06412048518735627, - "grad_norm": 1.4404301840571179, - "learning_rate": 3.987820730508996e-06, - "loss": 0.9847, - "step": 711 - }, - { - "epoch": 0.06421066871082654, - "grad_norm": 1.4708568801652053, - "learning_rate": 3.987756270607742e-06, - "loss": 1.1106, - "step": 712 - }, - { - "epoch": 0.0643008522342968, - "grad_norm": 0.8478524117332622, - "learning_rate": 3.987691641100162e-06, - "loss": 0.8099, - "step": 713 - }, - { - "epoch": 0.06439103575776706, - "grad_norm": 1.7719501190139868, - "learning_rate": 3.987626841991771e-06, - "loss": 1.1029, - "step": 714 - }, - { - "epoch": 0.06448121928123732, - "grad_norm": 1.7417964068675433, - "learning_rate": 3.987561873288099e-06, - "loss": 1.0507, - "step": 715 - }, - { - "epoch": 0.06457140280470758, - "grad_norm": 1.5570976706356758, - "learning_rate": 3.987496734994688e-06, - "loss": 0.9612, - "step": 716 - }, - { - "epoch": 0.06466158632817784, - "grad_norm": 1.806637985639964, - "learning_rate": 3.987431427117097e-06, - "loss": 1.0408, - "step": 717 - }, - { - "epoch": 0.0647517698516481, - "grad_norm": 1.91199397205456, - "learning_rate": 3.9873659496608985e-06, - "loss": 1.0274, - "step": 718 - }, - { - "epoch": 0.06484195337511836, - "grad_norm": 1.5930880499558426, - "learning_rate": 3.987300302631678e-06, - "loss": 0.9649, - "step": 719 - }, - { - "epoch": 0.06493213689858862, - "grad_norm": 1.561835033062618, - "learning_rate": 3.987234486035039e-06, - "loss": 1.0473, - "step": 720 - }, - { - "epoch": 0.0650223204220589, - "grad_norm": 1.786133279690856, - "learning_rate": 3.987168499876595e-06, - "loss": 1.0576, - "step": 721 - }, - { - "epoch": 0.06511250394552916, - "grad_norm": 1.8897490560865107, - "learning_rate": 3.987102344161978e-06, - "loss": 1.0243, - "step": 722 - }, - { - "epoch": 0.06520268746899942, - "grad_norm": 1.5117059751458775, - "learning_rate": 3.987036018896832e-06, - "loss": 1.0471, - "step": 723 - }, - { - "epoch": 0.06529287099246968, - "grad_norm": 1.4999816416570662, - "learning_rate": 3.986969524086817e-06, - "loss": 1.0775, - "step": 724 - }, - { - "epoch": 0.06538305451593994, - "grad_norm": 1.8743366975550022, - "learning_rate": 3.986902859737605e-06, - "loss": 0.9977, - "step": 725 - }, - { - "epoch": 0.0654732380394102, - "grad_norm": 1.6055407728389495, - "learning_rate": 3.986836025854886e-06, - "loss": 0.9512, - "step": 726 - }, - { - "epoch": 0.06556342156288046, - "grad_norm": 1.5387482298059143, - "learning_rate": 3.986769022444362e-06, - "loss": 0.9699, - "step": 727 - }, - { - "epoch": 0.06565360508635072, - "grad_norm": 1.5267888692952305, - "learning_rate": 3.986701849511751e-06, - "loss": 1.0101, - "step": 728 - }, - { - "epoch": 0.06574378860982098, - "grad_norm": 1.7956485709300702, - "learning_rate": 3.986634507062782e-06, - "loss": 0.9943, - "step": 729 - }, - { - "epoch": 0.06583397213329124, - "grad_norm": 1.695425425341465, - "learning_rate": 3.986566995103204e-06, - "loss": 1.0303, - "step": 730 - }, - { - "epoch": 0.0659241556567615, - "grad_norm": 1.5436384461904844, - "learning_rate": 3.986499313638776e-06, - "loss": 0.9566, - "step": 731 - }, - { - "epoch": 0.06601433918023177, - "grad_norm": 1.3577904509585765, - "learning_rate": 3.986431462675272e-06, - "loss": 0.9471, - "step": 732 - }, - { - "epoch": 0.06610452270370204, - "grad_norm": 1.2617487948120722, - "learning_rate": 3.9863634422184835e-06, - "loss": 1.0259, - "step": 733 - }, - { - "epoch": 0.0661947062271723, - "grad_norm": 1.546262754790141, - "learning_rate": 3.986295252274213e-06, - "loss": 1.0488, - "step": 734 - }, - { - "epoch": 0.06628488975064256, - "grad_norm": 1.2855603820406483, - "learning_rate": 3.9862268928482796e-06, - "loss": 1.061, - "step": 735 - }, - { - "epoch": 0.06637507327411282, - "grad_norm": 1.3447536446386683, - "learning_rate": 3.986158363946515e-06, - "loss": 0.9269, - "step": 736 - }, - { - "epoch": 0.06646525679758308, - "grad_norm": 1.525209697001455, - "learning_rate": 3.9860896655747685e-06, - "loss": 1.011, - "step": 737 - }, - { - "epoch": 0.06655544032105334, - "grad_norm": 1.5839940081396349, - "learning_rate": 3.9860207977388994e-06, - "loss": 1.0847, - "step": 738 - }, - { - "epoch": 0.0666456238445236, - "grad_norm": 4.9585837720353165, - "learning_rate": 3.9859517604447854e-06, - "loss": 0.9452, - "step": 739 - }, - { - "epoch": 0.06673580736799387, - "grad_norm": 1.5163043357881223, - "learning_rate": 3.985882553698317e-06, - "loss": 0.9865, - "step": 740 - }, - { - "epoch": 0.06682599089146413, - "grad_norm": 1.8411196680291546, - "learning_rate": 3.985813177505399e-06, - "loss": 1.0783, - "step": 741 - }, - { - "epoch": 0.06691617441493439, - "grad_norm": 1.7140441977011485, - "learning_rate": 3.985743631871951e-06, - "loss": 0.966, - "step": 742 - }, - { - "epoch": 0.06700635793840465, - "grad_norm": 1.3143314118170923, - "learning_rate": 3.985673916803907e-06, - "loss": 1.0177, - "step": 743 - }, - { - "epoch": 0.06709654146187491, - "grad_norm": 1.6760632115463667, - "learning_rate": 3.985604032307215e-06, - "loss": 1.0323, - "step": 744 - }, - { - "epoch": 0.06718672498534518, - "grad_norm": 1.4332551062476495, - "learning_rate": 3.985533978387839e-06, - "loss": 1.026, - "step": 745 - }, - { - "epoch": 0.06727690850881544, - "grad_norm": 1.41328514187152, - "learning_rate": 3.985463755051756e-06, - "loss": 1.0521, - "step": 746 - }, - { - "epoch": 0.0673670920322857, - "grad_norm": 1.3963708791428677, - "learning_rate": 3.9853933623049575e-06, - "loss": 1.0059, - "step": 747 - }, - { - "epoch": 0.06745727555575597, - "grad_norm": 1.641261603960096, - "learning_rate": 3.98532280015345e-06, - "loss": 1.0116, - "step": 748 - }, - { - "epoch": 0.06754745907922623, - "grad_norm": 1.4849272654281962, - "learning_rate": 3.985252068603254e-06, - "loss": 0.9315, - "step": 749 - }, - { - "epoch": 0.06763764260269649, - "grad_norm": 1.3866374677020026, - "learning_rate": 3.985181167660406e-06, - "loss": 1.0404, - "step": 750 - }, - { - "epoch": 0.06772782612616675, - "grad_norm": 1.3968885143744245, - "learning_rate": 3.985110097330953e-06, - "loss": 1.0347, - "step": 751 - }, - { - "epoch": 0.06781800964963701, - "grad_norm": 1.510806811577367, - "learning_rate": 3.985038857620962e-06, - "loss": 1.0103, - "step": 752 - }, - { - "epoch": 0.06790819317310727, - "grad_norm": 0.8007182026676507, - "learning_rate": 3.9849674485365094e-06, - "loss": 0.8138, - "step": 753 - }, - { - "epoch": 0.06799837669657753, - "grad_norm": 2.1207275074435272, - "learning_rate": 3.98489587008369e-06, - "loss": 0.9473, - "step": 754 - }, - { - "epoch": 0.06808856022004779, - "grad_norm": 1.845267754124485, - "learning_rate": 3.98482412226861e-06, - "loss": 0.8576, - "step": 755 - }, - { - "epoch": 0.06817874374351805, - "grad_norm": 1.4430730016830906, - "learning_rate": 3.984752205097391e-06, - "loss": 1.0452, - "step": 756 - }, - { - "epoch": 0.06826892726698833, - "grad_norm": 3.4740993732301506, - "learning_rate": 3.984680118576171e-06, - "loss": 1.0211, - "step": 757 - }, - { - "epoch": 0.06835911079045859, - "grad_norm": 1.4048942388321521, - "learning_rate": 3.984607862711099e-06, - "loss": 0.9735, - "step": 758 - }, - { - "epoch": 0.06844929431392885, - "grad_norm": 1.9092795889783036, - "learning_rate": 3.984535437508341e-06, - "loss": 1.0446, - "step": 759 - }, - { - "epoch": 0.06853947783739911, - "grad_norm": 0.773138123187335, - "learning_rate": 3.984462842974078e-06, - "loss": 0.8163, - "step": 760 - }, - { - "epoch": 0.06862966136086937, - "grad_norm": 1.6245109849337087, - "learning_rate": 3.984390079114502e-06, - "loss": 1.0292, - "step": 761 - }, - { - "epoch": 0.06871984488433963, - "grad_norm": 1.3668556028506411, - "learning_rate": 3.984317145935824e-06, - "loss": 1.1337, - "step": 762 - }, - { - "epoch": 0.06881002840780989, - "grad_norm": 1.3401604992555756, - "learning_rate": 3.984244043444264e-06, - "loss": 0.9838, - "step": 763 - }, - { - "epoch": 0.06890021193128015, - "grad_norm": 1.2782950402450763, - "learning_rate": 3.984170771646062e-06, - "loss": 0.9626, - "step": 764 - }, - { - "epoch": 0.06899039545475041, - "grad_norm": 1.6415568294024168, - "learning_rate": 3.9840973305474695e-06, - "loss": 1.0541, - "step": 765 - }, - { - "epoch": 0.06908057897822067, - "grad_norm": 1.4815230427757897, - "learning_rate": 3.984023720154752e-06, - "loss": 1.0619, - "step": 766 - }, - { - "epoch": 0.06917076250169094, - "grad_norm": 1.6562643950214653, - "learning_rate": 3.9839499404741915e-06, - "loss": 1.0282, - "step": 767 - }, - { - "epoch": 0.0692609460251612, - "grad_norm": 0.905500101726659, - "learning_rate": 3.983875991512082e-06, - "loss": 0.8502, - "step": 768 - }, - { - "epoch": 0.06935112954863147, - "grad_norm": 1.5271771820637774, - "learning_rate": 3.9838018732747345e-06, - "loss": 1.0322, - "step": 769 - }, - { - "epoch": 0.06944131307210173, - "grad_norm": 1.5687465926053554, - "learning_rate": 3.9837275857684716e-06, - "loss": 0.9955, - "step": 770 - }, - { - "epoch": 0.06953149659557199, - "grad_norm": 1.892944426464911, - "learning_rate": 3.983653128999634e-06, - "loss": 1.1336, - "step": 771 - }, - { - "epoch": 0.06962168011904225, - "grad_norm": 1.3295161870922887, - "learning_rate": 3.983578502974574e-06, - "loss": 1.0443, - "step": 772 - }, - { - "epoch": 0.06971186364251251, - "grad_norm": 1.852589833273218, - "learning_rate": 3.983503707699658e-06, - "loss": 1.0014, - "step": 773 - }, - { - "epoch": 0.06980204716598278, - "grad_norm": 6.286601202183336, - "learning_rate": 3.983428743181268e-06, - "loss": 1.0414, - "step": 774 - }, - { - "epoch": 0.06989223068945304, - "grad_norm": 1.6413430232163049, - "learning_rate": 3.983353609425802e-06, - "loss": 1.087, - "step": 775 - }, - { - "epoch": 0.0699824142129233, - "grad_norm": 1.7267314431229055, - "learning_rate": 3.983278306439671e-06, - "loss": 1.0161, - "step": 776 - }, - { - "epoch": 0.07007259773639356, - "grad_norm": 1.39047621884769, - "learning_rate": 3.983202834229297e-06, - "loss": 1.0471, - "step": 777 - }, - { - "epoch": 0.07016278125986382, - "grad_norm": 1.3646400041921551, - "learning_rate": 3.983127192801123e-06, - "loss": 1.0929, - "step": 778 - }, - { - "epoch": 0.07025296478333408, - "grad_norm": 1.4346733497714501, - "learning_rate": 3.983051382161602e-06, - "loss": 1.0506, - "step": 779 - }, - { - "epoch": 0.07034314830680434, - "grad_norm": 1.5388463831944228, - "learning_rate": 3.982975402317203e-06, - "loss": 1.039, - "step": 780 - }, - { - "epoch": 0.07043333183027461, - "grad_norm": 1.7632436982408264, - "learning_rate": 3.982899253274409e-06, - "loss": 1.0402, - "step": 781 - }, - { - "epoch": 0.07052351535374488, - "grad_norm": 1.8214842336361328, - "learning_rate": 3.982822935039717e-06, - "loss": 1.0, - "step": 782 - }, - { - "epoch": 0.07061369887721514, - "grad_norm": 1.4938102805771254, - "learning_rate": 3.982746447619638e-06, - "loss": 0.9456, - "step": 783 - }, - { - "epoch": 0.0707038824006854, - "grad_norm": 1.0209319333058118, - "learning_rate": 3.9826697910207e-06, - "loss": 0.8254, - "step": 784 - }, - { - "epoch": 0.07079406592415566, - "grad_norm": 1.5374712003166038, - "learning_rate": 3.982592965249442e-06, - "loss": 0.9978, - "step": 785 - }, - { - "epoch": 0.07088424944762592, - "grad_norm": 1.7862307864224267, - "learning_rate": 3.982515970312422e-06, - "loss": 1.0617, - "step": 786 - }, - { - "epoch": 0.07097443297109618, - "grad_norm": 1.67660580425384, - "learning_rate": 3.982438806216207e-06, - "loss": 0.9859, - "step": 787 - }, - { - "epoch": 0.07106461649456644, - "grad_norm": 1.5343287909672079, - "learning_rate": 3.982361472967382e-06, - "loss": 0.9999, - "step": 788 - }, - { - "epoch": 0.0711548000180367, - "grad_norm": 1.612988387674438, - "learning_rate": 3.982283970572546e-06, - "loss": 1.0848, - "step": 789 - }, - { - "epoch": 0.07124498354150696, - "grad_norm": 1.6623262472349083, - "learning_rate": 3.982206299038311e-06, - "loss": 0.9874, - "step": 790 - }, - { - "epoch": 0.07133516706497722, - "grad_norm": 1.3997597862973943, - "learning_rate": 3.9821284583713054e-06, - "loss": 1.084, - "step": 791 - }, - { - "epoch": 0.07142535058844748, - "grad_norm": 1.818075286930424, - "learning_rate": 3.98205044857817e-06, - "loss": 1.0011, - "step": 792 - }, - { - "epoch": 0.07151553411191776, - "grad_norm": 1.130466689481617, - "learning_rate": 3.981972269665561e-06, - "loss": 0.8632, - "step": 793 - }, - { - "epoch": 0.07160571763538802, - "grad_norm": 1.6225767407247693, - "learning_rate": 3.98189392164015e-06, - "loss": 1.0027, - "step": 794 - }, - { - "epoch": 0.07169590115885828, - "grad_norm": 1.7166900775140246, - "learning_rate": 3.981815404508621e-06, - "loss": 1.0536, - "step": 795 - }, - { - "epoch": 0.07178608468232854, - "grad_norm": 1.4750430246317463, - "learning_rate": 3.981736718277674e-06, - "loss": 1.0132, - "step": 796 - }, - { - "epoch": 0.0718762682057988, - "grad_norm": 1.4823614015168607, - "learning_rate": 3.9816578629540235e-06, - "loss": 1.0525, - "step": 797 - }, - { - "epoch": 0.07196645172926906, - "grad_norm": 1.803760769521771, - "learning_rate": 3.981578838544398e-06, - "loss": 1.0217, - "step": 798 - }, - { - "epoch": 0.07205663525273932, - "grad_norm": 1.5855977864349111, - "learning_rate": 3.981499645055539e-06, - "loss": 0.7949, - "step": 799 - }, - { - "epoch": 0.07214681877620958, - "grad_norm": 1.1617710678951785, - "learning_rate": 3.981420282494204e-06, - "loss": 0.8412, - "step": 800 - }, - { - "epoch": 0.07223700229967984, - "grad_norm": 1.1485158154001112, - "learning_rate": 3.981340750867166e-06, - "loss": 0.8398, - "step": 801 - }, - { - "epoch": 0.0723271858231501, - "grad_norm": 1.6398252445247226, - "learning_rate": 3.981261050181209e-06, - "loss": 1.007, - "step": 802 - }, - { - "epoch": 0.07241736934662037, - "grad_norm": 1.7359385273241081, - "learning_rate": 3.9811811804431355e-06, - "loss": 0.9745, - "step": 803 - }, - { - "epoch": 0.07250755287009064, - "grad_norm": 1.8623895650803999, - "learning_rate": 3.981101141659759e-06, - "loss": 1.0504, - "step": 804 - }, - { - "epoch": 0.0725977363935609, - "grad_norm": 1.5914730187231358, - "learning_rate": 3.98102093383791e-06, - "loss": 1.0421, - "step": 805 - }, - { - "epoch": 0.07268791991703116, - "grad_norm": 1.649117352193452, - "learning_rate": 3.9809405569844315e-06, - "loss": 0.9366, - "step": 806 - }, - { - "epoch": 0.07277810344050142, - "grad_norm": 1.5735938864821426, - "learning_rate": 3.980860011106182e-06, - "loss": 0.9769, - "step": 807 - }, - { - "epoch": 0.07286828696397168, - "grad_norm": 1.2628333775211236, - "learning_rate": 3.980779296210033e-06, - "loss": 0.9502, - "step": 808 - }, - { - "epoch": 0.07295847048744195, - "grad_norm": 1.7357960970445627, - "learning_rate": 3.980698412302874e-06, - "loss": 1.089, - "step": 809 - }, - { - "epoch": 0.0730486540109122, - "grad_norm": 1.5450645409840569, - "learning_rate": 3.980617359391604e-06, - "loss": 1.0246, - "step": 810 - }, - { - "epoch": 0.07313883753438247, - "grad_norm": 1.4001671095295944, - "learning_rate": 3.98053613748314e-06, - "loss": 0.9696, - "step": 811 - }, - { - "epoch": 0.07322902105785273, - "grad_norm": 1.423057232341829, - "learning_rate": 3.980454746584413e-06, - "loss": 0.878, - "step": 812 - }, - { - "epoch": 0.07331920458132299, - "grad_norm": 1.7734160569024733, - "learning_rate": 3.9803731867023665e-06, - "loss": 1.094, - "step": 813 - }, - { - "epoch": 0.07340938810479325, - "grad_norm": 1.7154080585556861, - "learning_rate": 3.9802914578439596e-06, - "loss": 0.9986, - "step": 814 - }, - { - "epoch": 0.07349957162826351, - "grad_norm": 1.768391492656029, - "learning_rate": 3.980209560016167e-06, - "loss": 1.1065, - "step": 815 - }, - { - "epoch": 0.07358975515173379, - "grad_norm": 1.6802362299773335, - "learning_rate": 3.980127493225975e-06, - "loss": 1.1083, - "step": 816 - }, - { - "epoch": 0.07367993867520405, - "grad_norm": 1.678240769582385, - "learning_rate": 3.980045257480387e-06, - "loss": 1.025, - "step": 817 - }, - { - "epoch": 0.0737701221986743, - "grad_norm": 1.5911119356514365, - "learning_rate": 3.9799628527864205e-06, - "loss": 0.9696, - "step": 818 - }, - { - "epoch": 0.07386030572214457, - "grad_norm": 1.457693322841197, - "learning_rate": 3.979880279151106e-06, - "loss": 0.9913, - "step": 819 - }, - { - "epoch": 0.07395048924561483, - "grad_norm": 1.8777477475811661, - "learning_rate": 3.979797536581489e-06, - "loss": 0.9609, - "step": 820 - }, - { - "epoch": 0.07404067276908509, - "grad_norm": 1.7938415796381644, - "learning_rate": 3.97971462508463e-06, - "loss": 0.8708, - "step": 821 - }, - { - "epoch": 0.07413085629255535, - "grad_norm": 1.3216678953145022, - "learning_rate": 3.979631544667603e-06, - "loss": 0.9926, - "step": 822 - }, - { - "epoch": 0.07422103981602561, - "grad_norm": 1.5557150967088047, - "learning_rate": 3.979548295337496e-06, - "loss": 1.0233, - "step": 823 - }, - { - "epoch": 0.07431122333949587, - "grad_norm": 1.4750846450570245, - "learning_rate": 3.9794648771014146e-06, - "loss": 0.9777, - "step": 824 - }, - { - "epoch": 0.07440140686296613, - "grad_norm": 1.458912225902845, - "learning_rate": 3.9793812899664745e-06, - "loss": 1.0207, - "step": 825 - }, - { - "epoch": 0.0744915903864364, - "grad_norm": 2.4094407365789134, - "learning_rate": 3.979297533939809e-06, - "loss": 0.9681, - "step": 826 - }, - { - "epoch": 0.07458177390990665, - "grad_norm": 1.772852393605381, - "learning_rate": 3.979213609028564e-06, - "loss": 0.9855, - "step": 827 - }, - { - "epoch": 0.07467195743337693, - "grad_norm": 1.541422394998086, - "learning_rate": 3.979129515239901e-06, - "loss": 1.0265, - "step": 828 - }, - { - "epoch": 0.07476214095684719, - "grad_norm": 1.2503510935766209, - "learning_rate": 3.979045252580994e-06, - "loss": 1.0203, - "step": 829 - }, - { - "epoch": 0.07485232448031745, - "grad_norm": 1.3810329301210287, - "learning_rate": 3.978960821059034e-06, - "loss": 0.9597, - "step": 830 - }, - { - "epoch": 0.07494250800378771, - "grad_norm": 1.3899222752226015, - "learning_rate": 3.978876220681225e-06, - "loss": 1.0441, - "step": 831 - }, - { - "epoch": 0.07503269152725797, - "grad_norm": 1.5255180823380643, - "learning_rate": 3.978791451454786e-06, - "loss": 0.9353, - "step": 832 - }, - { - "epoch": 0.07512287505072823, - "grad_norm": 1.4839801463935212, - "learning_rate": 3.978706513386949e-06, - "loss": 0.9788, - "step": 833 - }, - { - "epoch": 0.0752130585741985, - "grad_norm": 1.757897472447089, - "learning_rate": 3.978621406484962e-06, - "loss": 1.0267, - "step": 834 - }, - { - "epoch": 0.07530324209766875, - "grad_norm": 1.496031199609309, - "learning_rate": 3.978536130756086e-06, - "loss": 0.9999, - "step": 835 - }, - { - "epoch": 0.07539342562113902, - "grad_norm": 1.355710626397972, - "learning_rate": 3.978450686207599e-06, - "loss": 1.0635, - "step": 836 - }, - { - "epoch": 0.07548360914460928, - "grad_norm": 1.6867628960179355, - "learning_rate": 3.978365072846789e-06, - "loss": 0.9918, - "step": 837 - }, - { - "epoch": 0.07557379266807954, - "grad_norm": 1.569660184130052, - "learning_rate": 3.9782792906809625e-06, - "loss": 0.9642, - "step": 838 - }, - { - "epoch": 0.0756639761915498, - "grad_norm": 1.62631905645875, - "learning_rate": 3.97819333971744e-06, - "loss": 0.9942, - "step": 839 - }, - { - "epoch": 0.07575415971502007, - "grad_norm": 1.7169947591533892, - "learning_rate": 3.978107219963553e-06, - "loss": 1.0169, - "step": 840 - }, - { - "epoch": 0.07584434323849033, - "grad_norm": 1.40355228638431, - "learning_rate": 3.978020931426651e-06, - "loss": 0.9157, - "step": 841 - }, - { - "epoch": 0.0759345267619606, - "grad_norm": 1.4040256710558485, - "learning_rate": 3.977934474114096e-06, - "loss": 0.9534, - "step": 842 - }, - { - "epoch": 0.07602471028543085, - "grad_norm": 0.7840366256780757, - "learning_rate": 3.977847848033267e-06, - "loss": 0.7914, - "step": 843 - }, - { - "epoch": 0.07611489380890112, - "grad_norm": 1.5884297899550288, - "learning_rate": 3.977761053191553e-06, - "loss": 1.0603, - "step": 844 - }, - { - "epoch": 0.07620507733237138, - "grad_norm": 1.359836587695257, - "learning_rate": 3.977674089596361e-06, - "loss": 0.9926, - "step": 845 - }, - { - "epoch": 0.07629526085584164, - "grad_norm": 0.8510510369807921, - "learning_rate": 3.97758695725511e-06, - "loss": 0.7918, - "step": 846 - }, - { - "epoch": 0.0763854443793119, - "grad_norm": 1.2234418496240926, - "learning_rate": 3.977499656175236e-06, - "loss": 0.9987, - "step": 847 - }, - { - "epoch": 0.07647562790278216, - "grad_norm": 1.5459019847065272, - "learning_rate": 3.977412186364187e-06, - "loss": 0.9168, - "step": 848 - }, - { - "epoch": 0.07656581142625242, - "grad_norm": 6.598512204411505, - "learning_rate": 3.977324547829428e-06, - "loss": 1.0294, - "step": 849 - }, - { - "epoch": 0.07665599494972268, - "grad_norm": 2.518810080292092, - "learning_rate": 3.977236740578435e-06, - "loss": 1.026, - "step": 850 - }, - { - "epoch": 0.07674617847319294, - "grad_norm": 2.0327722567445985, - "learning_rate": 3.9771487646187015e-06, - "loss": 1.0011, - "step": 851 - }, - { - "epoch": 0.07683636199666322, - "grad_norm": 1.737927347225251, - "learning_rate": 3.9770606199577325e-06, - "loss": 1.0502, - "step": 852 - }, - { - "epoch": 0.07692654552013348, - "grad_norm": 1.878787855831642, - "learning_rate": 3.9769723066030505e-06, - "loss": 0.9435, - "step": 853 - }, - { - "epoch": 0.07701672904360374, - "grad_norm": 1.7149373161493493, - "learning_rate": 3.976883824562191e-06, - "loss": 1.073, - "step": 854 - }, - { - "epoch": 0.077106912567074, - "grad_norm": 1.5231038045993324, - "learning_rate": 3.976795173842703e-06, - "loss": 1.0102, - "step": 855 - }, - { - "epoch": 0.07719709609054426, - "grad_norm": 1.629328245392788, - "learning_rate": 3.97670635445215e-06, - "loss": 0.9801, - "step": 856 - }, - { - "epoch": 0.07728727961401452, - "grad_norm": 1.7347602759976501, - "learning_rate": 3.976617366398112e-06, - "loss": 0.985, - "step": 857 - }, - { - "epoch": 0.07737746313748478, - "grad_norm": 1.2347433771858314, - "learning_rate": 3.976528209688181e-06, - "loss": 1.038, - "step": 858 - }, - { - "epoch": 0.07746764666095504, - "grad_norm": 1.2608752191565513, - "learning_rate": 3.976438884329965e-06, - "loss": 1.0072, - "step": 859 - }, - { - "epoch": 0.0775578301844253, - "grad_norm": 1.450024815051079, - "learning_rate": 3.976349390331085e-06, - "loss": 1.1366, - "step": 860 - }, - { - "epoch": 0.07764801370789556, - "grad_norm": 1.573096545041261, - "learning_rate": 3.976259727699178e-06, - "loss": 0.9701, - "step": 861 - }, - { - "epoch": 0.07773819723136582, - "grad_norm": 1.8612720845323238, - "learning_rate": 3.976169896441895e-06, - "loss": 0.9704, - "step": 862 - }, - { - "epoch": 0.07782838075483609, - "grad_norm": 1.608766829751644, - "learning_rate": 3.976079896566898e-06, - "loss": 1.0404, - "step": 863 - }, - { - "epoch": 0.07791856427830636, - "grad_norm": 1.609402628541101, - "learning_rate": 3.97598972808187e-06, - "loss": 1.0667, - "step": 864 - }, - { - "epoch": 0.07800874780177662, - "grad_norm": 1.9634604466755066, - "learning_rate": 3.975899390994501e-06, - "loss": 1.0064, - "step": 865 - }, - { - "epoch": 0.07809893132524688, - "grad_norm": 1.5281622759245295, - "learning_rate": 3.975808885312502e-06, - "loss": 1.0703, - "step": 866 - }, - { - "epoch": 0.07818911484871714, - "grad_norm": 1.4846126968402031, - "learning_rate": 3.975718211043594e-06, - "loss": 1.0539, - "step": 867 - }, - { - "epoch": 0.0782792983721874, - "grad_norm": 1.594409432213231, - "learning_rate": 3.975627368195515e-06, - "loss": 0.9261, - "step": 868 - }, - { - "epoch": 0.07836948189565766, - "grad_norm": 1.2545109891900494, - "learning_rate": 3.975536356776015e-06, - "loss": 0.9797, - "step": 869 - }, - { - "epoch": 0.07845966541912792, - "grad_norm": 0.7964163937819829, - "learning_rate": 3.975445176792861e-06, - "loss": 0.8772, - "step": 870 - }, - { - "epoch": 0.07854984894259819, - "grad_norm": 1.4585281332792372, - "learning_rate": 3.975353828253831e-06, - "loss": 1.0062, - "step": 871 - }, - { - "epoch": 0.07864003246606845, - "grad_norm": 1.4309551684956399, - "learning_rate": 3.97526231116672e-06, - "loss": 1.0113, - "step": 872 - }, - { - "epoch": 0.07873021598953871, - "grad_norm": 1.559532322716504, - "learning_rate": 3.975170625539338e-06, - "loss": 0.9523, - "step": 873 - }, - { - "epoch": 0.07882039951300897, - "grad_norm": 1.2616214773751981, - "learning_rate": 3.975078771379507e-06, - "loss": 1.0056, - "step": 874 - }, - { - "epoch": 0.07891058303647923, - "grad_norm": 1.7574230525616332, - "learning_rate": 3.974986748695064e-06, - "loss": 0.9143, - "step": 875 - }, - { - "epoch": 0.0790007665599495, - "grad_norm": 1.0101896535686075, - "learning_rate": 3.974894557493862e-06, - "loss": 0.8277, - "step": 876 - }, - { - "epoch": 0.07909095008341976, - "grad_norm": 1.366436823071569, - "learning_rate": 3.974802197783768e-06, - "loss": 1.0536, - "step": 877 - }, - { - "epoch": 0.07918113360689003, - "grad_norm": 1.9243365395380099, - "learning_rate": 3.974709669572661e-06, - "loss": 1.0676, - "step": 878 - }, - { - "epoch": 0.07927131713036029, - "grad_norm": 1.920199524525839, - "learning_rate": 3.974616972868436e-06, - "loss": 1.0004, - "step": 879 - }, - { - "epoch": 0.07936150065383055, - "grad_norm": 1.5368224752354431, - "learning_rate": 3.974524107679003e-06, - "loss": 1.0127, - "step": 880 - }, - { - "epoch": 0.07945168417730081, - "grad_norm": 1.3157479652332515, - "learning_rate": 3.974431074012286e-06, - "loss": 1.0003, - "step": 881 - }, - { - "epoch": 0.07954186770077107, - "grad_norm": 1.764612701003494, - "learning_rate": 3.974337871876223e-06, - "loss": 1.0338, - "step": 882 - }, - { - "epoch": 0.07963205122424133, - "grad_norm": 1.5019098679919696, - "learning_rate": 3.974244501278766e-06, - "loss": 0.9779, - "step": 883 - }, - { - "epoch": 0.07972223474771159, - "grad_norm": 1.3413779591780206, - "learning_rate": 3.974150962227883e-06, - "loss": 0.9545, - "step": 884 - }, - { - "epoch": 0.07981241827118185, - "grad_norm": 1.5107456594376718, - "learning_rate": 3.9740572547315535e-06, - "loss": 0.9321, - "step": 885 - }, - { - "epoch": 0.07990260179465211, - "grad_norm": 1.9632184262590042, - "learning_rate": 3.973963378797775e-06, - "loss": 1.0402, - "step": 886 - }, - { - "epoch": 0.07999278531812237, - "grad_norm": 1.3714451787786053, - "learning_rate": 3.973869334434556e-06, - "loss": 1.0221, - "step": 887 - }, - { - "epoch": 0.08008296884159265, - "grad_norm": 1.438490982138776, - "learning_rate": 3.973775121649922e-06, - "loss": 1.0241, - "step": 888 - }, - { - "epoch": 0.08017315236506291, - "grad_norm": 1.4890728144573662, - "learning_rate": 3.973680740451911e-06, - "loss": 0.9167, - "step": 889 - }, - { - "epoch": 0.08026333588853317, - "grad_norm": 1.476559755030104, - "learning_rate": 3.9735861908485776e-06, - "loss": 0.9732, - "step": 890 - }, - { - "epoch": 0.08035351941200343, - "grad_norm": 1.4387464509683248, - "learning_rate": 3.973491472847987e-06, - "loss": 1.0227, - "step": 891 - }, - { - "epoch": 0.08044370293547369, - "grad_norm": 1.4657128769286993, - "learning_rate": 3.973396586458222e-06, - "loss": 0.9305, - "step": 892 - }, - { - "epoch": 0.08053388645894395, - "grad_norm": 1.5259343739918367, - "learning_rate": 3.97330153168738e-06, - "loss": 0.9909, - "step": 893 - }, - { - "epoch": 0.08062406998241421, - "grad_norm": 1.4964282904311552, - "learning_rate": 3.973206308543571e-06, - "loss": 0.9431, - "step": 894 - }, - { - "epoch": 0.08071425350588447, - "grad_norm": 1.6286555767428668, - "learning_rate": 3.973110917034918e-06, - "loss": 0.9725, - "step": 895 - }, - { - "epoch": 0.08080443702935473, - "grad_norm": 1.66991445082414, - "learning_rate": 3.973015357169563e-06, - "loss": 0.9208, - "step": 896 - }, - { - "epoch": 0.080894620552825, - "grad_norm": 1.7225095379348025, - "learning_rate": 3.972919628955659e-06, - "loss": 0.9007, - "step": 897 - }, - { - "epoch": 0.08098480407629526, - "grad_norm": 1.7673928516721242, - "learning_rate": 3.972823732401373e-06, - "loss": 0.9847, - "step": 898 - }, - { - "epoch": 0.08107498759976552, - "grad_norm": 1.463934695142251, - "learning_rate": 3.972727667514888e-06, - "loss": 0.9967, - "step": 899 - }, - { - "epoch": 0.08116517112323579, - "grad_norm": 2.0480923126843806, - "learning_rate": 3.972631434304402e-06, - "loss": 0.9598, - "step": 900 - }, - { - "epoch": 0.08125535464670605, - "grad_norm": 1.612746623496804, - "learning_rate": 3.972535032778124e-06, - "loss": 1.1111, - "step": 901 - }, - { - "epoch": 0.08134553817017631, - "grad_norm": 1.2325262880907066, - "learning_rate": 3.97243846294428e-06, - "loss": 1.0911, - "step": 902 - }, - { - "epoch": 0.08143572169364657, - "grad_norm": 1.4321092616599311, - "learning_rate": 3.972341724811111e-06, - "loss": 1.1384, - "step": 903 - }, - { - "epoch": 0.08152590521711683, - "grad_norm": 1.5654298398023163, - "learning_rate": 3.972244818386872e-06, - "loss": 0.9456, - "step": 904 - }, - { - "epoch": 0.0816160887405871, - "grad_norm": 1.529807364859341, - "learning_rate": 3.972147743679828e-06, - "loss": 0.9535, - "step": 905 - }, - { - "epoch": 0.08170627226405736, - "grad_norm": 1.3082776043598623, - "learning_rate": 3.972050500698265e-06, - "loss": 0.9649, - "step": 906 - }, - { - "epoch": 0.08179645578752762, - "grad_norm": 1.586449300932725, - "learning_rate": 3.971953089450481e-06, - "loss": 0.9928, - "step": 907 - }, - { - "epoch": 0.08188663931099788, - "grad_norm": 1.5666896344929446, - "learning_rate": 3.971855509944784e-06, - "loss": 1.0706, - "step": 908 - }, - { - "epoch": 0.08197682283446814, - "grad_norm": 1.4520886735348173, - "learning_rate": 3.971757762189504e-06, - "loss": 0.9892, - "step": 909 - }, - { - "epoch": 0.0820670063579384, - "grad_norm": 1.6700944777849496, - "learning_rate": 3.9716598461929785e-06, - "loss": 0.9869, - "step": 910 - }, - { - "epoch": 0.08215718988140866, - "grad_norm": 1.76072146995403, - "learning_rate": 3.971561761963563e-06, - "loss": 0.9734, - "step": 911 - }, - { - "epoch": 0.08224737340487893, - "grad_norm": 1.713800760487087, - "learning_rate": 3.971463509509628e-06, - "loss": 1.0768, - "step": 912 - }, - { - "epoch": 0.0823375569283492, - "grad_norm": 1.877944922422602, - "learning_rate": 3.9713650888395555e-06, - "loss": 1.0034, - "step": 913 - }, - { - "epoch": 0.08242774045181946, - "grad_norm": 1.712991117793993, - "learning_rate": 3.9712664999617425e-06, - "loss": 0.9985, - "step": 914 - }, - { - "epoch": 0.08251792397528972, - "grad_norm": 1.8855844567036575, - "learning_rate": 3.971167742884603e-06, - "loss": 0.9743, - "step": 915 - }, - { - "epoch": 0.08260810749875998, - "grad_norm": 5.39136267880615, - "learning_rate": 3.971068817616564e-06, - "loss": 1.0507, - "step": 916 - }, - { - "epoch": 0.08269829102223024, - "grad_norm": 1.3419769921384248, - "learning_rate": 3.970969724166064e-06, - "loss": 1.027, - "step": 917 - }, - { - "epoch": 0.0827884745457005, - "grad_norm": 1.1007801280614957, - "learning_rate": 3.970870462541559e-06, - "loss": 0.8166, - "step": 918 - }, - { - "epoch": 0.08287865806917076, - "grad_norm": 1.3798097238804305, - "learning_rate": 3.97077103275152e-06, - "loss": 1.0262, - "step": 919 - }, - { - "epoch": 0.08296884159264102, - "grad_norm": 1.4599208456294952, - "learning_rate": 3.970671434804428e-06, - "loss": 0.9811, - "step": 920 - }, - { - "epoch": 0.08305902511611128, - "grad_norm": 1.7318270413639292, - "learning_rate": 3.970571668708784e-06, - "loss": 0.9972, - "step": 921 - }, - { - "epoch": 0.08314920863958154, - "grad_norm": 0.9301282014467449, - "learning_rate": 3.9704717344731e-06, - "loss": 0.8237, - "step": 922 - }, - { - "epoch": 0.0832393921630518, - "grad_norm": 0.7001432774573607, - "learning_rate": 3.9703716321059026e-06, - "loss": 0.8366, - "step": 923 - }, - { - "epoch": 0.08332957568652208, - "grad_norm": 1.7513468191868935, - "learning_rate": 3.9702713616157325e-06, - "loss": 1.0908, - "step": 924 - }, - { - "epoch": 0.08341975920999234, - "grad_norm": 1.8917310017584201, - "learning_rate": 3.9701709230111455e-06, - "loss": 1.0204, - "step": 925 - }, - { - "epoch": 0.0835099427334626, - "grad_norm": 1.4598323293323086, - "learning_rate": 3.970070316300713e-06, - "loss": 0.9669, - "step": 926 - }, - { - "epoch": 0.08360012625693286, - "grad_norm": 1.4897528954234684, - "learning_rate": 3.969969541493017e-06, - "loss": 1.0247, - "step": 927 - }, - { - "epoch": 0.08369030978040312, - "grad_norm": 1.4836817678624192, - "learning_rate": 3.969868598596658e-06, - "loss": 1.0523, - "step": 928 - }, - { - "epoch": 0.08378049330387338, - "grad_norm": 1.2703527052406667, - "learning_rate": 3.969767487620249e-06, - "loss": 0.9477, - "step": 929 - }, - { - "epoch": 0.08387067682734364, - "grad_norm": 1.453880031867901, - "learning_rate": 3.969666208572416e-06, - "loss": 1.0492, - "step": 930 - }, - { - "epoch": 0.0839608603508139, - "grad_norm": 1.6940843906617273, - "learning_rate": 3.969564761461802e-06, - "loss": 1.0556, - "step": 931 - }, - { - "epoch": 0.08405104387428416, - "grad_norm": 1.3984837444480407, - "learning_rate": 3.969463146297062e-06, - "loss": 0.9812, - "step": 932 - }, - { - "epoch": 0.08414122739775443, - "grad_norm": 1.991423636203803, - "learning_rate": 3.969361363086867e-06, - "loss": 1.0347, - "step": 933 - }, - { - "epoch": 0.08423141092122469, - "grad_norm": 1.5573652966634344, - "learning_rate": 3.9692594118399014e-06, - "loss": 0.9766, - "step": 934 - }, - { - "epoch": 0.08432159444469496, - "grad_norm": 1.6403088219561976, - "learning_rate": 3.969157292564865e-06, - "loss": 1.0415, - "step": 935 - }, - { - "epoch": 0.08441177796816522, - "grad_norm": 1.6123639101072478, - "learning_rate": 3.96905500527047e-06, - "loss": 0.9931, - "step": 936 - }, - { - "epoch": 0.08450196149163548, - "grad_norm": 1.5321910068721578, - "learning_rate": 3.968952549965445e-06, - "loss": 0.9856, - "step": 937 - }, - { - "epoch": 0.08459214501510574, - "grad_norm": 1.4557463270164297, - "learning_rate": 3.968849926658532e-06, - "loss": 1.0352, - "step": 938 - }, - { - "epoch": 0.084682328538576, - "grad_norm": 2.99254349067463, - "learning_rate": 3.9687471353584866e-06, - "loss": 0.9211, - "step": 939 - }, - { - "epoch": 0.08477251206204627, - "grad_norm": 1.3273307220376458, - "learning_rate": 3.9686441760740795e-06, - "loss": 1.0182, - "step": 940 - }, - { - "epoch": 0.08486269558551653, - "grad_norm": 1.641339754902921, - "learning_rate": 3.968541048814098e-06, - "loss": 0.9427, - "step": 941 - }, - { - "epoch": 0.08495287910898679, - "grad_norm": 1.7519917733682984, - "learning_rate": 3.968437753587339e-06, - "loss": 0.9272, - "step": 942 - }, - { - "epoch": 0.08504306263245705, - "grad_norm": 1.3047123067176443, - "learning_rate": 3.968334290402616e-06, - "loss": 1.0417, - "step": 943 - }, - { - "epoch": 0.08513324615592731, - "grad_norm": 1.4003838149241665, - "learning_rate": 3.968230659268759e-06, - "loss": 0.8944, - "step": 944 - }, - { - "epoch": 0.08522342967939757, - "grad_norm": 1.6926070745454391, - "learning_rate": 3.968126860194609e-06, - "loss": 1.0679, - "step": 945 - }, - { - "epoch": 0.08531361320286783, - "grad_norm": 1.4511052523243668, - "learning_rate": 3.968022893189025e-06, - "loss": 1.0547, - "step": 946 - }, - { - "epoch": 0.0854037967263381, - "grad_norm": 1.340259053896148, - "learning_rate": 3.967918758260874e-06, - "loss": 1.0388, - "step": 947 - }, - { - "epoch": 0.08549398024980837, - "grad_norm": 1.7423755903450324, - "learning_rate": 3.967814455419044e-06, - "loss": 1.0437, - "step": 948 - }, - { - "epoch": 0.08558416377327863, - "grad_norm": 1.5843847029951283, - "learning_rate": 3.967709984672434e-06, - "loss": 1.0467, - "step": 949 - }, - { - "epoch": 0.08567434729674889, - "grad_norm": 1.5292171296707164, - "learning_rate": 3.967605346029959e-06, - "loss": 1.0333, - "step": 950 - }, - { - "epoch": 0.08576453082021915, - "grad_norm": 1.263867983569474, - "learning_rate": 3.9675005395005466e-06, - "loss": 1.0719, - "step": 951 - }, - { - "epoch": 0.08585471434368941, - "grad_norm": 1.864152556655727, - "learning_rate": 3.967395565093139e-06, - "loss": 1.0227, - "step": 952 - }, - { - "epoch": 0.08594489786715967, - "grad_norm": 1.3827931893072554, - "learning_rate": 3.967290422816693e-06, - "loss": 1.0067, - "step": 953 - }, - { - "epoch": 0.08603508139062993, - "grad_norm": 1.6661312514828983, - "learning_rate": 3.967185112680183e-06, - "loss": 0.9717, - "step": 954 - }, - { - "epoch": 0.08612526491410019, - "grad_norm": 1.4672491643620196, - "learning_rate": 3.96707963469259e-06, - "loss": 1.0926, - "step": 955 - }, - { - "epoch": 0.08621544843757045, - "grad_norm": 1.6322133548389812, - "learning_rate": 3.966973988862917e-06, - "loss": 0.9915, - "step": 956 - }, - { - "epoch": 0.08630563196104071, - "grad_norm": 1.3971264196878255, - "learning_rate": 3.966868175200178e-06, - "loss": 1.0541, - "step": 957 - }, - { - "epoch": 0.08639581548451097, - "grad_norm": 1.4785128030283918, - "learning_rate": 3.9667621937134e-06, - "loss": 1.0582, - "step": 958 - }, - { - "epoch": 0.08648599900798125, - "grad_norm": 1.6451579370018108, - "learning_rate": 3.966656044411627e-06, - "loss": 0.9216, - "step": 959 - }, - { - "epoch": 0.08657618253145151, - "grad_norm": 1.3326593523484744, - "learning_rate": 3.966549727303918e-06, - "loss": 0.9799, - "step": 960 - }, - { - "epoch": 0.08666636605492177, - "grad_norm": 1.4468517458140742, - "learning_rate": 3.966443242399341e-06, - "loss": 1.0404, - "step": 961 - }, - { - "epoch": 0.08675654957839203, - "grad_norm": 0.9016084193232593, - "learning_rate": 3.966336589706985e-06, - "loss": 0.8202, - "step": 962 - }, - { - "epoch": 0.08684673310186229, - "grad_norm": 1.566694047702111, - "learning_rate": 3.966229769235948e-06, - "loss": 1.0247, - "step": 963 - }, - { - "epoch": 0.08693691662533255, - "grad_norm": 1.6389546201566971, - "learning_rate": 3.966122780995345e-06, - "loss": 1.0812, - "step": 964 - }, - { - "epoch": 0.08702710014880281, - "grad_norm": 1.7360142702008363, - "learning_rate": 3.966015624994306e-06, - "loss": 1.0407, - "step": 965 - }, - { - "epoch": 0.08711728367227307, - "grad_norm": 1.3899780654548757, - "learning_rate": 3.9659083012419735e-06, - "loss": 0.9763, - "step": 966 - }, - { - "epoch": 0.08720746719574334, - "grad_norm": 1.5260286645259433, - "learning_rate": 3.965800809747505e-06, - "loss": 0.9401, - "step": 967 - }, - { - "epoch": 0.0872976507192136, - "grad_norm": 1.5275279370639157, - "learning_rate": 3.965693150520071e-06, - "loss": 0.9796, - "step": 968 - }, - { - "epoch": 0.08738783424268386, - "grad_norm": 1.887492398379347, - "learning_rate": 3.96558532356886e-06, - "loss": 0.9421, - "step": 969 - }, - { - "epoch": 0.08747801776615412, - "grad_norm": 1.889090679958729, - "learning_rate": 3.9654773289030704e-06, - "loss": 1.0567, - "step": 970 - }, - { - "epoch": 0.08756820128962439, - "grad_norm": 1.624136915617833, - "learning_rate": 3.9653691665319176e-06, - "loss": 1.0279, - "step": 971 - }, - { - "epoch": 0.08765838481309465, - "grad_norm": 2.7853830039235437, - "learning_rate": 3.96526083646463e-06, - "loss": 0.941, - "step": 972 - }, - { - "epoch": 0.08774856833656491, - "grad_norm": 1.4197157509486966, - "learning_rate": 3.9651523387104526e-06, - "loss": 0.9635, - "step": 973 - }, - { - "epoch": 0.08783875186003517, - "grad_norm": 1.7268292665448715, - "learning_rate": 3.965043673278641e-06, - "loss": 1.0191, - "step": 974 - }, - { - "epoch": 0.08792893538350544, - "grad_norm": 1.3122513399268105, - "learning_rate": 3.964934840178469e-06, - "loss": 1.0171, - "step": 975 - }, - { - "epoch": 0.0880191189069757, - "grad_norm": 1.498420599085352, - "learning_rate": 3.964825839419221e-06, - "loss": 1.0475, - "step": 976 - }, - { - "epoch": 0.08810930243044596, - "grad_norm": 1.8439195199396323, - "learning_rate": 3.964716671010199e-06, - "loss": 0.9972, - "step": 977 - }, - { - "epoch": 0.08819948595391622, - "grad_norm": 1.3472913607436805, - "learning_rate": 3.9646073349607165e-06, - "loss": 0.9767, - "step": 978 - }, - { - "epoch": 0.08828966947738648, - "grad_norm": 1.5759293796285467, - "learning_rate": 3.964497831280105e-06, - "loss": 1.1389, - "step": 979 - }, - { - "epoch": 0.08837985300085674, - "grad_norm": 1.562024921672671, - "learning_rate": 3.964388159977705e-06, - "loss": 1.0089, - "step": 980 - }, - { - "epoch": 0.088470036524327, - "grad_norm": 1.3223597888859568, - "learning_rate": 3.964278321062876e-06, - "loss": 0.9221, - "step": 981 - }, - { - "epoch": 0.08856022004779726, - "grad_norm": 1.530121289974452, - "learning_rate": 3.96416831454499e-06, - "loss": 1.035, - "step": 982 - }, - { - "epoch": 0.08865040357126754, - "grad_norm": 0.9015767524425701, - "learning_rate": 3.964058140433434e-06, - "loss": 0.8575, - "step": 983 - }, - { - "epoch": 0.0887405870947378, - "grad_norm": 1.7084325280892563, - "learning_rate": 3.963947798737606e-06, - "loss": 1.0771, - "step": 984 - }, - { - "epoch": 0.08883077061820806, - "grad_norm": 1.5368678522428558, - "learning_rate": 3.963837289466923e-06, - "loss": 1.0314, - "step": 985 - }, - { - "epoch": 0.08892095414167832, - "grad_norm": 1.3405165692919976, - "learning_rate": 3.9637266126308145e-06, - "loss": 0.9732, - "step": 986 - }, - { - "epoch": 0.08901113766514858, - "grad_norm": 0.9707984800726863, - "learning_rate": 3.963615768238724e-06, - "loss": 0.8669, - "step": 987 - }, - { - "epoch": 0.08910132118861884, - "grad_norm": 1.5122376963259476, - "learning_rate": 3.963504756300107e-06, - "loss": 0.8994, - "step": 988 - }, - { - "epoch": 0.0891915047120891, - "grad_norm": 1.4964803091704426, - "learning_rate": 3.96339357682444e-06, - "loss": 1.0527, - "step": 989 - }, - { - "epoch": 0.08928168823555936, - "grad_norm": 1.6993477607948675, - "learning_rate": 3.963282229821206e-06, - "loss": 1.0303, - "step": 990 - }, - { - "epoch": 0.08937187175902962, - "grad_norm": 1.4602452234191046, - "learning_rate": 3.963170715299906e-06, - "loss": 1.0174, - "step": 991 - }, - { - "epoch": 0.08946205528249988, - "grad_norm": 1.3957980943852917, - "learning_rate": 3.963059033270056e-06, - "loss": 0.933, - "step": 992 - }, - { - "epoch": 0.08955223880597014, - "grad_norm": 1.3358166288607343, - "learning_rate": 3.9629471837411855e-06, - "loss": 0.9525, - "step": 993 - }, - { - "epoch": 0.0896424223294404, - "grad_norm": 2.1496681711376286, - "learning_rate": 3.962835166722838e-06, - "loss": 0.9537, - "step": 994 - }, - { - "epoch": 0.08973260585291068, - "grad_norm": 1.6163139010820438, - "learning_rate": 3.96272298222457e-06, - "loss": 1.0179, - "step": 995 - }, - { - "epoch": 0.08982278937638094, - "grad_norm": 1.5271332344482016, - "learning_rate": 3.962610630255956e-06, - "loss": 1.139, - "step": 996 - }, - { - "epoch": 0.0899129728998512, - "grad_norm": 1.565649901171111, - "learning_rate": 3.96249811082658e-06, - "loss": 0.981, - "step": 997 - }, - { - "epoch": 0.09000315642332146, - "grad_norm": 0.8466400524396898, - "learning_rate": 3.962385423946046e-06, - "loss": 0.8937, - "step": 998 - }, - { - "epoch": 0.09009333994679172, - "grad_norm": 1.7031956929225405, - "learning_rate": 3.962272569623966e-06, - "loss": 0.9912, - "step": 999 - }, - { - "epoch": 0.09018352347026198, - "grad_norm": 1.676186679267092, - "learning_rate": 3.9621595478699704e-06, - "loss": 0.8882, - "step": 1000 - }, - { - "epoch": 0.09027370699373224, - "grad_norm": 1.6116646340142693, - "learning_rate": 3.962046358693703e-06, - "loss": 1.0013, - "step": 1001 - }, - { - "epoch": 0.0903638905172025, - "grad_norm": 4.096770824559856, - "learning_rate": 3.961933002104822e-06, - "loss": 1.0924, - "step": 1002 - }, - { - "epoch": 0.09045407404067277, - "grad_norm": 1.6418991953704645, - "learning_rate": 3.961819478112999e-06, - "loss": 0.9352, - "step": 1003 - }, - { - "epoch": 0.09054425756414303, - "grad_norm": 1.6236381693118056, - "learning_rate": 3.961705786727921e-06, - "loss": 1.0283, - "step": 1004 - }, - { - "epoch": 0.09063444108761329, - "grad_norm": 1.3878562452726937, - "learning_rate": 3.961591927959288e-06, - "loss": 0.9348, - "step": 1005 - }, - { - "epoch": 0.09072462461108355, - "grad_norm": 1.2637997876480582, - "learning_rate": 3.961477901816816e-06, - "loss": 0.8009, - "step": 1006 - }, - { - "epoch": 0.09081480813455382, - "grad_norm": 1.5717304716916067, - "learning_rate": 3.961363708310233e-06, - "loss": 1.0727, - "step": 1007 - }, - { - "epoch": 0.09090499165802408, - "grad_norm": 1.574323324133039, - "learning_rate": 3.961249347449286e-06, - "loss": 1.0328, - "step": 1008 - }, - { - "epoch": 0.09099517518149434, - "grad_norm": 1.5176803181335412, - "learning_rate": 3.961134819243728e-06, - "loss": 0.993, - "step": 1009 - }, - { - "epoch": 0.0910853587049646, - "grad_norm": 1.6583155581545632, - "learning_rate": 3.961020123703335e-06, - "loss": 1.0124, - "step": 1010 - }, - { - "epoch": 0.09117554222843487, - "grad_norm": 1.5255668430894023, - "learning_rate": 3.960905260837892e-06, - "loss": 1.0237, - "step": 1011 - }, - { - "epoch": 0.09126572575190513, - "grad_norm": 1.5754494979304081, - "learning_rate": 3.960790230657199e-06, - "loss": 0.9837, - "step": 1012 - }, - { - "epoch": 0.09135590927537539, - "grad_norm": 1.1885914555027632, - "learning_rate": 3.960675033171072e-06, - "loss": 0.9596, - "step": 1013 - }, - { - "epoch": 0.09144609279884565, - "grad_norm": 1.5284303668192445, - "learning_rate": 3.960559668389341e-06, - "loss": 0.9516, - "step": 1014 - }, - { - "epoch": 0.09153627632231591, - "grad_norm": 1.4712490167995333, - "learning_rate": 3.960444136321847e-06, - "loss": 1.0073, - "step": 1015 - }, - { - "epoch": 0.09162645984578617, - "grad_norm": 1.833676118304986, - "learning_rate": 3.960328436978451e-06, - "loss": 1.1012, - "step": 1016 - }, - { - "epoch": 0.09171664336925643, - "grad_norm": 1.2235301736588178, - "learning_rate": 3.960212570369024e-06, - "loss": 0.8064, - "step": 1017 - }, - { - "epoch": 0.09180682689272669, - "grad_norm": 1.5954058404081766, - "learning_rate": 3.9600965365034515e-06, - "loss": 0.9395, - "step": 1018 - }, - { - "epoch": 0.09189701041619697, - "grad_norm": 1.4448692673147758, - "learning_rate": 3.959980335391634e-06, - "loss": 1.0645, - "step": 1019 - }, - { - "epoch": 0.09198719393966723, - "grad_norm": 1.483234730490023, - "learning_rate": 3.959863967043487e-06, - "loss": 0.9516, - "step": 1020 - }, - { - "epoch": 0.09207737746313749, - "grad_norm": 1.4135645627233888, - "learning_rate": 3.9597474314689405e-06, - "loss": 1.0412, - "step": 1021 - }, - { - "epoch": 0.09216756098660775, - "grad_norm": 1.716798888672916, - "learning_rate": 3.959630728677937e-06, - "loss": 1.0177, - "step": 1022 - }, - { - "epoch": 0.09225774451007801, - "grad_norm": 1.555172791770086, - "learning_rate": 3.959513858680434e-06, - "loss": 0.9885, - "step": 1023 - }, - { - "epoch": 0.09234792803354827, - "grad_norm": 1.631968594996899, - "learning_rate": 3.959396821486405e-06, - "loss": 1.0932, - "step": 1024 - }, - { - "epoch": 0.09243811155701853, - "grad_norm": 1.2952573407879422, - "learning_rate": 3.959279617105835e-06, - "loss": 0.871, - "step": 1025 - }, - { - "epoch": 0.09252829508048879, - "grad_norm": 1.6017313054141298, - "learning_rate": 3.9591622455487235e-06, - "loss": 1.0166, - "step": 1026 - }, - { - "epoch": 0.09261847860395905, - "grad_norm": 1.7023955716722659, - "learning_rate": 3.959044706825087e-06, - "loss": 1.0775, - "step": 1027 - }, - { - "epoch": 0.09270866212742931, - "grad_norm": 1.6461037119965447, - "learning_rate": 3.958927000944954e-06, - "loss": 0.9199, - "step": 1028 - }, - { - "epoch": 0.09279884565089958, - "grad_norm": 1.6542325958770752, - "learning_rate": 3.958809127918368e-06, - "loss": 1.0938, - "step": 1029 - }, - { - "epoch": 0.09288902917436984, - "grad_norm": 1.388401525607929, - "learning_rate": 3.958691087755387e-06, - "loss": 0.9475, - "step": 1030 - }, - { - "epoch": 0.09297921269784011, - "grad_norm": 1.5490951788900829, - "learning_rate": 3.958572880466081e-06, - "loss": 0.8844, - "step": 1031 - }, - { - "epoch": 0.09306939622131037, - "grad_norm": 1.3566546350706368, - "learning_rate": 3.9584545060605385e-06, - "loss": 0.9914, - "step": 1032 - }, - { - "epoch": 0.09315957974478063, - "grad_norm": 1.6849485818505634, - "learning_rate": 3.958335964548859e-06, - "loss": 0.9751, - "step": 1033 - }, - { - "epoch": 0.0932497632682509, - "grad_norm": 1.67013216535026, - "learning_rate": 3.958217255941156e-06, - "loss": 1.0504, - "step": 1034 - }, - { - "epoch": 0.09333994679172115, - "grad_norm": 1.3556320575760792, - "learning_rate": 3.95809838024756e-06, - "loss": 0.9575, - "step": 1035 - }, - { - "epoch": 0.09343013031519141, - "grad_norm": 1.6781760579161278, - "learning_rate": 3.957979337478212e-06, - "loss": 0.9733, - "step": 1036 - }, - { - "epoch": 0.09352031383866168, - "grad_norm": 1.7198002467489215, - "learning_rate": 3.957860127643272e-06, - "loss": 1.031, - "step": 1037 - }, - { - "epoch": 0.09361049736213194, - "grad_norm": 1.5925890115866868, - "learning_rate": 3.95774075075291e-06, - "loss": 1.0013, - "step": 1038 - }, - { - "epoch": 0.0937006808856022, - "grad_norm": 1.4088524790793118, - "learning_rate": 3.957621206817312e-06, - "loss": 1.0545, - "step": 1039 - }, - { - "epoch": 0.09379086440907246, - "grad_norm": 1.5630653884301515, - "learning_rate": 3.957501495846679e-06, - "loss": 0.9871, - "step": 1040 - }, - { - "epoch": 0.09388104793254272, - "grad_norm": 0.8317684421174503, - "learning_rate": 3.957381617851225e-06, - "loss": 0.8254, - "step": 1041 - }, - { - "epoch": 0.09397123145601298, - "grad_norm": 1.461382711370436, - "learning_rate": 3.9572615728411776e-06, - "loss": 0.9276, - "step": 1042 - }, - { - "epoch": 0.09406141497948325, - "grad_norm": 2.105636561682718, - "learning_rate": 3.957141360826781e-06, - "loss": 0.8948, - "step": 1043 - }, - { - "epoch": 0.09415159850295352, - "grad_norm": 1.6798698858556103, - "learning_rate": 3.957020981818292e-06, - "loss": 0.9909, - "step": 1044 - }, - { - "epoch": 0.09424178202642378, - "grad_norm": 1.3240065503088043, - "learning_rate": 3.956900435825982e-06, - "loss": 1.001, - "step": 1045 - }, - { - "epoch": 0.09433196554989404, - "grad_norm": 1.4029956885480264, - "learning_rate": 3.9567797228601364e-06, - "loss": 0.9577, - "step": 1046 - }, - { - "epoch": 0.0944221490733643, - "grad_norm": 1.4410769698998243, - "learning_rate": 3.956658842931055e-06, - "loss": 1.0837, - "step": 1047 - }, - { - "epoch": 0.09451233259683456, - "grad_norm": 1.3399768018137885, - "learning_rate": 3.956537796049052e-06, - "loss": 1.0149, - "step": 1048 - }, - { - "epoch": 0.09460251612030482, - "grad_norm": 1.1532924044702368, - "learning_rate": 3.956416582224457e-06, - "loss": 0.8617, - "step": 1049 - }, - { - "epoch": 0.09469269964377508, - "grad_norm": 1.5323412373933432, - "learning_rate": 3.956295201467611e-06, - "loss": 1.0452, - "step": 1050 - }, - { - "epoch": 0.09478288316724534, - "grad_norm": 1.2544598649630205, - "learning_rate": 3.956173653788872e-06, - "loss": 1.0338, - "step": 1051 - }, - { - "epoch": 0.0948730666907156, - "grad_norm": 1.8238476655574676, - "learning_rate": 3.95605193919861e-06, - "loss": 1.0473, - "step": 1052 - }, - { - "epoch": 0.09496325021418586, - "grad_norm": 6.8996749580228185, - "learning_rate": 3.955930057707211e-06, - "loss": 0.9896, - "step": 1053 - }, - { - "epoch": 0.09505343373765612, - "grad_norm": 1.5668872275558199, - "learning_rate": 3.955808009325075e-06, - "loss": 1.0197, - "step": 1054 - }, - { - "epoch": 0.0951436172611264, - "grad_norm": 1.5134041629438084, - "learning_rate": 3.955685794062615e-06, - "loss": 0.9792, - "step": 1055 - }, - { - "epoch": 0.09523380078459666, - "grad_norm": 1.7103129601334202, - "learning_rate": 3.95556341193026e-06, - "loss": 1.0999, - "step": 1056 - }, - { - "epoch": 0.09532398430806692, - "grad_norm": 1.465196002568596, - "learning_rate": 3.955440862938452e-06, - "loss": 1.0925, - "step": 1057 - }, - { - "epoch": 0.09541416783153718, - "grad_norm": 1.6166944258885843, - "learning_rate": 3.955318147097647e-06, - "loss": 1.0083, - "step": 1058 - }, - { - "epoch": 0.09550435135500744, - "grad_norm": 1.7495804011165972, - "learning_rate": 3.955195264418316e-06, - "loss": 0.9766, - "step": 1059 - }, - { - "epoch": 0.0955945348784777, - "grad_norm": 1.412761827693768, - "learning_rate": 3.955072214910944e-06, - "loss": 0.8689, - "step": 1060 - }, - { - "epoch": 0.09568471840194796, - "grad_norm": 1.7316711932512636, - "learning_rate": 3.954948998586032e-06, - "loss": 0.9312, - "step": 1061 - }, - { - "epoch": 0.09577490192541822, - "grad_norm": 1.4917148021127515, - "learning_rate": 3.954825615454089e-06, - "loss": 1.0144, - "step": 1062 - }, - { - "epoch": 0.09586508544888848, - "grad_norm": 1.5915047781270621, - "learning_rate": 3.954702065525649e-06, - "loss": 1.0057, - "step": 1063 - }, - { - "epoch": 0.09595526897235875, - "grad_norm": 1.8248683254867981, - "learning_rate": 3.954578348811248e-06, - "loss": 1.0372, - "step": 1064 - }, - { - "epoch": 0.096045452495829, - "grad_norm": 1.588872289385632, - "learning_rate": 3.954454465321447e-06, - "loss": 0.9944, - "step": 1065 - }, - { - "epoch": 0.09613563601929928, - "grad_norm": 1.8129012880379554, - "learning_rate": 3.954330415066813e-06, - "loss": 1.0816, - "step": 1066 - }, - { - "epoch": 0.09622581954276954, - "grad_norm": 1.5060763468698086, - "learning_rate": 3.954206198057932e-06, - "loss": 0.996, - "step": 1067 - }, - { - "epoch": 0.0963160030662398, - "grad_norm": 1.6036567951094942, - "learning_rate": 3.954081814305403e-06, - "loss": 1.0588, - "step": 1068 - }, - { - "epoch": 0.09640618658971006, - "grad_norm": 1.514894997889133, - "learning_rate": 3.953957263819839e-06, - "loss": 0.944, - "step": 1069 - }, - { - "epoch": 0.09649637011318032, - "grad_norm": 1.5207818775965454, - "learning_rate": 3.953832546611867e-06, - "loss": 0.9436, - "step": 1070 - }, - { - "epoch": 0.09658655363665059, - "grad_norm": 1.6386295354054188, - "learning_rate": 3.953707662692129e-06, - "loss": 1.0355, - "step": 1071 - }, - { - "epoch": 0.09667673716012085, - "grad_norm": 1.4574786360992291, - "learning_rate": 3.95358261207128e-06, - "loss": 0.9329, - "step": 1072 - }, - { - "epoch": 0.0967669206835911, - "grad_norm": 1.3881194441682878, - "learning_rate": 3.953457394759992e-06, - "loss": 0.9951, - "step": 1073 - }, - { - "epoch": 0.09685710420706137, - "grad_norm": 1.4912987903629713, - "learning_rate": 3.953332010768947e-06, - "loss": 1.0198, - "step": 1074 - }, - { - "epoch": 0.09694728773053163, - "grad_norm": 2.190025288858241, - "learning_rate": 3.9532064601088436e-06, - "loss": 0.9758, - "step": 1075 - }, - { - "epoch": 0.09703747125400189, - "grad_norm": 1.7982683170127653, - "learning_rate": 3.953080742790396e-06, - "loss": 1.1497, - "step": 1076 - }, - { - "epoch": 0.09712765477747215, - "grad_norm": 1.1803647049116885, - "learning_rate": 3.95295485882433e-06, - "loss": 0.9152, - "step": 1077 - }, - { - "epoch": 0.09721783830094242, - "grad_norm": 0.7175052894503647, - "learning_rate": 3.952828808221387e-06, - "loss": 0.8494, - "step": 1078 - }, - { - "epoch": 0.09730802182441269, - "grad_norm": 1.8363555432260619, - "learning_rate": 3.9527025909923225e-06, - "loss": 0.9855, - "step": 1079 - }, - { - "epoch": 0.09739820534788295, - "grad_norm": 1.5665958128118447, - "learning_rate": 3.952576207147906e-06, - "loss": 1.0939, - "step": 1080 - }, - { - "epoch": 0.09748838887135321, - "grad_norm": 2.027986810381733, - "learning_rate": 3.95244965669892e-06, - "loss": 1.0383, - "step": 1081 - }, - { - "epoch": 0.09757857239482347, - "grad_norm": 1.4235689741065223, - "learning_rate": 3.952322939656165e-06, - "loss": 0.9549, - "step": 1082 - }, - { - "epoch": 0.09766875591829373, - "grad_norm": 1.4355892253000533, - "learning_rate": 3.952196056030451e-06, - "loss": 0.9964, - "step": 1083 - }, - { - "epoch": 0.09775893944176399, - "grad_norm": 1.7227133397613839, - "learning_rate": 3.952069005832605e-06, - "loss": 0.9717, - "step": 1084 - }, - { - "epoch": 0.09784912296523425, - "grad_norm": 1.4788356016507023, - "learning_rate": 3.951941789073468e-06, - "loss": 1.0366, - "step": 1085 - }, - { - "epoch": 0.09793930648870451, - "grad_norm": 1.4317942454936858, - "learning_rate": 3.9518144057638955e-06, - "loss": 1.0316, - "step": 1086 - }, - { - "epoch": 0.09802949001217477, - "grad_norm": 1.547924186855992, - "learning_rate": 3.951686855914755e-06, - "loss": 0.9967, - "step": 1087 - }, - { - "epoch": 0.09811967353564503, - "grad_norm": 1.928993440050329, - "learning_rate": 3.9515591395369305e-06, - "loss": 1.0321, - "step": 1088 - }, - { - "epoch": 0.0982098570591153, - "grad_norm": 1.5868722200012495, - "learning_rate": 3.95143125664132e-06, - "loss": 0.9856, - "step": 1089 - }, - { - "epoch": 0.09830004058258557, - "grad_norm": 1.5788355823314113, - "learning_rate": 3.951303207238833e-06, - "loss": 1.0301, - "step": 1090 - }, - { - "epoch": 0.09839022410605583, - "grad_norm": 1.4209987749152169, - "learning_rate": 3.951174991340399e-06, - "loss": 0.9752, - "step": 1091 - }, - { - "epoch": 0.09848040762952609, - "grad_norm": 1.5923268565098834, - "learning_rate": 3.9510466089569546e-06, - "loss": 0.9619, - "step": 1092 - }, - { - "epoch": 0.09857059115299635, - "grad_norm": 1.532024265644935, - "learning_rate": 3.950918060099456e-06, - "loss": 0.9686, - "step": 1093 - }, - { - "epoch": 0.09866077467646661, - "grad_norm": 1.3877752013971993, - "learning_rate": 3.950789344778871e-06, - "loss": 1.0601, - "step": 1094 - }, - { - "epoch": 0.09875095819993687, - "grad_norm": 1.5461986874305524, - "learning_rate": 3.950660463006184e-06, - "loss": 0.9485, - "step": 1095 - }, - { - "epoch": 0.09884114172340713, - "grad_norm": 1.297007680333441, - "learning_rate": 3.950531414792389e-06, - "loss": 1.0831, - "step": 1096 - }, - { - "epoch": 0.0989313252468774, - "grad_norm": 1.3949327505302511, - "learning_rate": 3.950402200148498e-06, - "loss": 1.0091, - "step": 1097 - }, - { - "epoch": 0.09902150877034765, - "grad_norm": 1.5809743105630782, - "learning_rate": 3.950272819085538e-06, - "loss": 0.9842, - "step": 1098 - }, - { - "epoch": 0.09911169229381792, - "grad_norm": 2.176917062156437, - "learning_rate": 3.9501432716145474e-06, - "loss": 1.0954, - "step": 1099 - }, - { - "epoch": 0.09920187581728818, - "grad_norm": 1.4655828213671929, - "learning_rate": 3.950013557746579e-06, - "loss": 0.9039, - "step": 1100 - }, - { - "epoch": 0.09929205934075844, - "grad_norm": 1.316084870633775, - "learning_rate": 3.949883677492703e-06, - "loss": 1.0456, - "step": 1101 - }, - { - "epoch": 0.09938224286422871, - "grad_norm": 1.5335634622094716, - "learning_rate": 3.9497536308639994e-06, - "loss": 1.0201, - "step": 1102 - }, - { - "epoch": 0.09947242638769897, - "grad_norm": 1.302522885200232, - "learning_rate": 3.949623417871565e-06, - "loss": 1.0479, - "step": 1103 - }, - { - "epoch": 0.09956260991116923, - "grad_norm": 1.401578178870919, - "learning_rate": 3.949493038526511e-06, - "loss": 0.9895, - "step": 1104 - }, - { - "epoch": 0.0996527934346395, - "grad_norm": 1.3461025846593244, - "learning_rate": 3.949362492839961e-06, - "loss": 0.9607, - "step": 1105 - }, - { - "epoch": 0.09974297695810976, - "grad_norm": 1.4403725822896545, - "learning_rate": 3.949231780823054e-06, - "loss": 0.9639, - "step": 1106 - }, - { - "epoch": 0.09983316048158002, - "grad_norm": 1.6052293436539027, - "learning_rate": 3.949100902486945e-06, - "loss": 0.9484, - "step": 1107 - }, - { - "epoch": 0.09992334400505028, - "grad_norm": 1.4818557241176415, - "learning_rate": 3.948969857842799e-06, - "loss": 0.9738, - "step": 1108 - }, - { - "epoch": 0.10001352752852054, - "grad_norm": 1.3961100470088559, - "learning_rate": 3.948838646901798e-06, - "loss": 0.9886, - "step": 1109 - }, - { - "epoch": 0.1001037110519908, - "grad_norm": 1.594423563123039, - "learning_rate": 3.948707269675138e-06, - "loss": 0.993, - "step": 1110 - }, - { - "epoch": 0.10019389457546106, - "grad_norm": 1.6260529554595509, - "learning_rate": 3.948575726174028e-06, - "loss": 0.989, - "step": 1111 - }, - { - "epoch": 0.10028407809893132, - "grad_norm": 1.8230107819012877, - "learning_rate": 3.9484440164096935e-06, - "loss": 1.0707, - "step": 1112 - }, - { - "epoch": 0.10037426162240158, - "grad_norm": 2.135267695991517, - "learning_rate": 3.948312140393372e-06, - "loss": 1.0206, - "step": 1113 - }, - { - "epoch": 0.10046444514587186, - "grad_norm": 1.724446030827139, - "learning_rate": 3.948180098136316e-06, - "loss": 0.975, - "step": 1114 - }, - { - "epoch": 0.10055462866934212, - "grad_norm": 1.466868311212972, - "learning_rate": 3.948047889649791e-06, - "loss": 0.9722, - "step": 1115 - }, - { - "epoch": 0.10064481219281238, - "grad_norm": 1.5082637921876731, - "learning_rate": 3.947915514945079e-06, - "loss": 0.9819, - "step": 1116 - }, - { - "epoch": 0.10073499571628264, - "grad_norm": 1.477760111831456, - "learning_rate": 3.947782974033474e-06, - "loss": 1.071, - "step": 1117 - }, - { - "epoch": 0.1008251792397529, - "grad_norm": 1.6847126157059906, - "learning_rate": 3.9476502669262866e-06, - "loss": 0.9617, - "step": 1118 - }, - { - "epoch": 0.10091536276322316, - "grad_norm": 1.509203096635009, - "learning_rate": 3.947517393634839e-06, - "loss": 0.9773, - "step": 1119 - }, - { - "epoch": 0.10100554628669342, - "grad_norm": 1.5840054474168608, - "learning_rate": 3.947384354170469e-06, - "loss": 1.0414, - "step": 1120 - }, - { - "epoch": 0.10109572981016368, - "grad_norm": 1.4736605591686616, - "learning_rate": 3.947251148544528e-06, - "loss": 1.0297, - "step": 1121 - }, - { - "epoch": 0.10118591333363394, - "grad_norm": 1.54469122050997, - "learning_rate": 3.947117776768382e-06, - "loss": 1.0361, - "step": 1122 - }, - { - "epoch": 0.1012760968571042, - "grad_norm": 1.2811508140319303, - "learning_rate": 3.9469842388534105e-06, - "loss": 0.9809, - "step": 1123 - }, - { - "epoch": 0.10136628038057446, - "grad_norm": 1.35240698214243, - "learning_rate": 3.946850534811009e-06, - "loss": 1.0125, - "step": 1124 - }, - { - "epoch": 0.10145646390404472, - "grad_norm": 1.4175847481987824, - "learning_rate": 3.946716664652585e-06, - "loss": 0.9855, - "step": 1125 - }, - { - "epoch": 0.101546647427515, - "grad_norm": 2.438952966609594, - "learning_rate": 3.94658262838956e-06, - "loss": 0.9075, - "step": 1126 - }, - { - "epoch": 0.10163683095098526, - "grad_norm": 1.2969050576277226, - "learning_rate": 3.946448426033373e-06, - "loss": 0.9257, - "step": 1127 - }, - { - "epoch": 0.10172701447445552, - "grad_norm": 1.3761339280383484, - "learning_rate": 3.946314057595473e-06, - "loss": 1.0303, - "step": 1128 - }, - { - "epoch": 0.10181719799792578, - "grad_norm": 1.63838253262995, - "learning_rate": 3.946179523087326e-06, - "loss": 1.0591, - "step": 1129 - }, - { - "epoch": 0.10190738152139604, - "grad_norm": 1.4867480132309547, - "learning_rate": 3.9460448225204104e-06, - "loss": 0.9178, - "step": 1130 - }, - { - "epoch": 0.1019975650448663, - "grad_norm": 1.6028164304498032, - "learning_rate": 3.945909955906221e-06, - "loss": 0.997, - "step": 1131 - }, - { - "epoch": 0.10208774856833656, - "grad_norm": 1.7300910623651946, - "learning_rate": 3.945774923256264e-06, - "loss": 1.0028, - "step": 1132 - }, - { - "epoch": 0.10217793209180683, - "grad_norm": 2.5597703863524885, - "learning_rate": 3.945639724582062e-06, - "loss": 1.0752, - "step": 1133 - }, - { - "epoch": 0.10226811561527709, - "grad_norm": 1.5247097083036287, - "learning_rate": 3.94550435989515e-06, - "loss": 0.9576, - "step": 1134 - }, - { - "epoch": 0.10235829913874735, - "grad_norm": 1.351456169399934, - "learning_rate": 3.945368829207079e-06, - "loss": 1.0258, - "step": 1135 - }, - { - "epoch": 0.10244848266221761, - "grad_norm": 1.643391628869212, - "learning_rate": 3.945233132529414e-06, - "loss": 0.9573, - "step": 1136 - }, - { - "epoch": 0.10253866618568787, - "grad_norm": 1.7817625680252456, - "learning_rate": 3.9450972698737304e-06, - "loss": 1.0025, - "step": 1137 - }, - { - "epoch": 0.10262884970915814, - "grad_norm": 1.4365056786907906, - "learning_rate": 3.944961241251623e-06, - "loss": 0.9837, - "step": 1138 - }, - { - "epoch": 0.1027190332326284, - "grad_norm": 1.2618196996216744, - "learning_rate": 3.9448250466746985e-06, - "loss": 0.9698, - "step": 1139 - }, - { - "epoch": 0.10280921675609866, - "grad_norm": 1.6329631827152489, - "learning_rate": 3.944688686154578e-06, - "loss": 1.0293, - "step": 1140 - }, - { - "epoch": 0.10289940027956893, - "grad_norm": 1.4732417149488006, - "learning_rate": 3.944552159702894e-06, - "loss": 0.9634, - "step": 1141 - }, - { - "epoch": 0.10298958380303919, - "grad_norm": 1.5130616366935339, - "learning_rate": 3.944415467331299e-06, - "loss": 0.9897, - "step": 1142 - }, - { - "epoch": 0.10307976732650945, - "grad_norm": 1.3729785885929913, - "learning_rate": 3.944278609051455e-06, - "loss": 0.9521, - "step": 1143 - }, - { - "epoch": 0.10316995084997971, - "grad_norm": 1.492928126414751, - "learning_rate": 3.944141584875039e-06, - "loss": 0.9293, - "step": 1144 - }, - { - "epoch": 0.10326013437344997, - "grad_norm": 1.4864055999334191, - "learning_rate": 3.944004394813743e-06, - "loss": 1.0019, - "step": 1145 - }, - { - "epoch": 0.10335031789692023, - "grad_norm": 1.4436408195662689, - "learning_rate": 3.943867038879273e-06, - "loss": 1.0276, - "step": 1146 - }, - { - "epoch": 0.10344050142039049, - "grad_norm": 1.4664704509454303, - "learning_rate": 3.943729517083349e-06, - "loss": 1.0351, - "step": 1147 - }, - { - "epoch": 0.10353068494386075, - "grad_norm": 1.4955296660669029, - "learning_rate": 3.943591829437705e-06, - "loss": 1.039, - "step": 1148 - }, - { - "epoch": 0.10362086846733101, - "grad_norm": 1.403896821276421, - "learning_rate": 3.9434539759540895e-06, - "loss": 0.9439, - "step": 1149 - }, - { - "epoch": 0.10371105199080129, - "grad_norm": 1.3614559250420906, - "learning_rate": 3.943315956644264e-06, - "loss": 0.936, - "step": 1150 - }, - { - "epoch": 0.10380123551427155, - "grad_norm": 1.602577818053804, - "learning_rate": 3.943177771520006e-06, - "loss": 1.074, - "step": 1151 - }, - { - "epoch": 0.10389141903774181, - "grad_norm": 1.5945043835392791, - "learning_rate": 3.9430394205931065e-06, - "loss": 1.0525, - "step": 1152 - }, - { - "epoch": 0.10398160256121207, - "grad_norm": 1.535033941856654, - "learning_rate": 3.942900903875369e-06, - "loss": 0.8784, - "step": 1153 - }, - { - "epoch": 0.10407178608468233, - "grad_norm": 1.413236851203034, - "learning_rate": 3.942762221378614e-06, - "loss": 1.0087, - "step": 1154 - }, - { - "epoch": 0.10416196960815259, - "grad_norm": 1.3702626113770595, - "learning_rate": 3.942623373114673e-06, - "loss": 1.0257, - "step": 1155 - }, - { - "epoch": 0.10425215313162285, - "grad_norm": 1.4828755886858431, - "learning_rate": 3.942484359095396e-06, - "loss": 0.989, - "step": 1156 - }, - { - "epoch": 0.10434233665509311, - "grad_norm": 1.3565449688351374, - "learning_rate": 3.942345179332642e-06, - "loss": 0.9277, - "step": 1157 - }, - { - "epoch": 0.10443252017856337, - "grad_norm": 1.2222803775120157, - "learning_rate": 3.942205833838287e-06, - "loss": 0.953, - "step": 1158 - }, - { - "epoch": 0.10452270370203363, - "grad_norm": 1.4925348965959766, - "learning_rate": 3.9420663226242204e-06, - "loss": 1.1056, - "step": 1159 - }, - { - "epoch": 0.1046128872255039, - "grad_norm": 1.4125196776243165, - "learning_rate": 3.941926645702348e-06, - "loss": 1.0472, - "step": 1160 - }, - { - "epoch": 0.10470307074897416, - "grad_norm": 1.8709225506597051, - "learning_rate": 3.941786803084586e-06, - "loss": 0.9912, - "step": 1161 - }, - { - "epoch": 0.10479325427244443, - "grad_norm": 2.0238969789303116, - "learning_rate": 3.941646794782867e-06, - "loss": 0.9485, - "step": 1162 - }, - { - "epoch": 0.10488343779591469, - "grad_norm": 1.4673340648046735, - "learning_rate": 3.941506620809137e-06, - "loss": 1.015, - "step": 1163 - }, - { - "epoch": 0.10497362131938495, - "grad_norm": 1.5934239970366568, - "learning_rate": 3.941366281175357e-06, - "loss": 0.9892, - "step": 1164 - }, - { - "epoch": 0.10506380484285521, - "grad_norm": 1.211203908840534, - "learning_rate": 3.941225775893502e-06, - "loss": 0.94, - "step": 1165 - }, - { - "epoch": 0.10515398836632547, - "grad_norm": 1.301963337773218, - "learning_rate": 3.941085104975559e-06, - "loss": 0.9806, - "step": 1166 - }, - { - "epoch": 0.10524417188979573, - "grad_norm": 0.8418055841477772, - "learning_rate": 3.9409442684335325e-06, - "loss": 0.8652, - "step": 1167 - }, - { - "epoch": 0.105334355413266, - "grad_norm": 1.5496246867941523, - "learning_rate": 3.940803266279438e-06, - "loss": 0.9963, - "step": 1168 - }, - { - "epoch": 0.10542453893673626, - "grad_norm": 1.5385002232236207, - "learning_rate": 3.9406620985253076e-06, - "loss": 0.9394, - "step": 1169 - }, - { - "epoch": 0.10551472246020652, - "grad_norm": 1.332988138018727, - "learning_rate": 3.940520765183187e-06, - "loss": 0.9781, - "step": 1170 - }, - { - "epoch": 0.10560490598367678, - "grad_norm": 1.6263268262468844, - "learning_rate": 3.940379266265134e-06, - "loss": 1.0419, - "step": 1171 - }, - { - "epoch": 0.10569508950714704, - "grad_norm": 1.7387317545171421, - "learning_rate": 3.940237601783223e-06, - "loss": 0.9665, - "step": 1172 - }, - { - "epoch": 0.1057852730306173, - "grad_norm": 1.3323151853551967, - "learning_rate": 3.940095771749542e-06, - "loss": 0.9961, - "step": 1173 - }, - { - "epoch": 0.10587545655408757, - "grad_norm": 1.5675080721706636, - "learning_rate": 3.939953776176192e-06, - "loss": 1.0539, - "step": 1174 - }, - { - "epoch": 0.10596564007755783, - "grad_norm": 1.1298501579557698, - "learning_rate": 3.939811615075288e-06, - "loss": 1.0215, - "step": 1175 - }, - { - "epoch": 0.1060558236010281, - "grad_norm": 1.650903962787704, - "learning_rate": 3.9396692884589616e-06, - "loss": 1.067, - "step": 1176 - }, - { - "epoch": 0.10614600712449836, - "grad_norm": 1.690562824227095, - "learning_rate": 3.9395267963393565e-06, - "loss": 0.9778, - "step": 1177 - }, - { - "epoch": 0.10623619064796862, - "grad_norm": 1.4594763318318973, - "learning_rate": 3.939384138728631e-06, - "loss": 1.0349, - "step": 1178 - }, - { - "epoch": 0.10632637417143888, - "grad_norm": 1.4938043752054875, - "learning_rate": 3.939241315638956e-06, - "loss": 0.9912, - "step": 1179 - }, - { - "epoch": 0.10641655769490914, - "grad_norm": 1.3140109765365109, - "learning_rate": 3.93909832708252e-06, - "loss": 0.9861, - "step": 1180 - }, - { - "epoch": 0.1065067412183794, - "grad_norm": 1.242375149738469, - "learning_rate": 3.938955173071523e-06, - "loss": 1.0036, - "step": 1181 - }, - { - "epoch": 0.10659692474184966, - "grad_norm": 0.8040279907933853, - "learning_rate": 3.938811853618179e-06, - "loss": 0.8417, - "step": 1182 - }, - { - "epoch": 0.10668710826531992, - "grad_norm": 1.9731747993804076, - "learning_rate": 3.938668368734717e-06, - "loss": 1.039, - "step": 1183 - }, - { - "epoch": 0.10677729178879018, - "grad_norm": 1.5179925104893817, - "learning_rate": 3.93852471843338e-06, - "loss": 0.8761, - "step": 1184 - }, - { - "epoch": 0.10686747531226044, - "grad_norm": 1.5560547457235696, - "learning_rate": 3.9383809027264254e-06, - "loss": 0.9749, - "step": 1185 - }, - { - "epoch": 0.10695765883573072, - "grad_norm": 1.3660346265763377, - "learning_rate": 3.938236921626124e-06, - "loss": 1.0399, - "step": 1186 - }, - { - "epoch": 0.10704784235920098, - "grad_norm": 1.6488807407742205, - "learning_rate": 3.938092775144761e-06, - "loss": 1.014, - "step": 1187 - }, - { - "epoch": 0.10713802588267124, - "grad_norm": 1.256517774596602, - "learning_rate": 3.9379484632946355e-06, - "loss": 0.9175, - "step": 1188 - }, - { - "epoch": 0.1072282094061415, - "grad_norm": 1.4180055723847522, - "learning_rate": 3.937803986088062e-06, - "loss": 1.0583, - "step": 1189 - }, - { - "epoch": 0.10731839292961176, - "grad_norm": 2.381367930298026, - "learning_rate": 3.937659343537367e-06, - "loss": 0.9544, - "step": 1190 - }, - { - "epoch": 0.10740857645308202, - "grad_norm": 1.5944312640423277, - "learning_rate": 3.937514535654893e-06, - "loss": 1.0689, - "step": 1191 - }, - { - "epoch": 0.10749875997655228, - "grad_norm": 3.6118106963187424, - "learning_rate": 3.937369562452996e-06, - "loss": 1.0106, - "step": 1192 - }, - { - "epoch": 0.10758894350002254, - "grad_norm": 1.5894467469708757, - "learning_rate": 3.937224423944044e-06, - "loss": 1.057, - "step": 1193 - }, - { - "epoch": 0.1076791270234928, - "grad_norm": 1.4897854630061007, - "learning_rate": 3.937079120140423e-06, - "loss": 0.9309, - "step": 1194 - }, - { - "epoch": 0.10776931054696307, - "grad_norm": 1.3637297146806855, - "learning_rate": 3.936933651054531e-06, - "loss": 1.0399, - "step": 1195 - }, - { - "epoch": 0.10785949407043333, - "grad_norm": 1.4069896871897811, - "learning_rate": 3.936788016698779e-06, - "loss": 1.1145, - "step": 1196 - }, - { - "epoch": 0.1079496775939036, - "grad_norm": 1.414896092323441, - "learning_rate": 3.936642217085594e-06, - "loss": 1.0282, - "step": 1197 - }, - { - "epoch": 0.10803986111737386, - "grad_norm": 1.9252356323094428, - "learning_rate": 3.936496252227417e-06, - "loss": 0.9158, - "step": 1198 - }, - { - "epoch": 0.10813004464084412, - "grad_norm": 1.4349645562396949, - "learning_rate": 3.936350122136703e-06, - "loss": 0.9798, - "step": 1199 - }, - { - "epoch": 0.10822022816431438, - "grad_norm": 0.8732623489362934, - "learning_rate": 3.936203826825919e-06, - "loss": 0.8703, - "step": 1200 - }, - { - "epoch": 0.10831041168778464, - "grad_norm": 1.4162713602581432, - "learning_rate": 3.9360573663075475e-06, - "loss": 0.959, - "step": 1201 - }, - { - "epoch": 0.1084005952112549, - "grad_norm": 1.8179996187862253, - "learning_rate": 3.935910740594087e-06, - "loss": 1.0397, - "step": 1202 - }, - { - "epoch": 0.10849077873472517, - "grad_norm": 1.651530974492347, - "learning_rate": 3.935763949698047e-06, - "loss": 1.0029, - "step": 1203 - }, - { - "epoch": 0.10858096225819543, - "grad_norm": 1.5463457068981403, - "learning_rate": 3.935616993631954e-06, - "loss": 1.068, - "step": 1204 - }, - { - "epoch": 0.10867114578166569, - "grad_norm": 1.5616668768889057, - "learning_rate": 3.935469872408345e-06, - "loss": 0.9896, - "step": 1205 - }, - { - "epoch": 0.10876132930513595, - "grad_norm": 1.4683637719927016, - "learning_rate": 3.935322586039776e-06, - "loss": 1.0286, - "step": 1206 - }, - { - "epoch": 0.10885151282860621, - "grad_norm": 1.8696132847553506, - "learning_rate": 3.935175134538811e-06, - "loss": 0.9415, - "step": 1207 - }, - { - "epoch": 0.10894169635207647, - "grad_norm": 2.0734427342064916, - "learning_rate": 3.935027517918034e-06, - "loss": 0.9372, - "step": 1208 - }, - { - "epoch": 0.10903187987554674, - "grad_norm": 2.01736020649729, - "learning_rate": 3.93487973619004e-06, - "loss": 1.0261, - "step": 1209 - }, - { - "epoch": 0.109122063399017, - "grad_norm": 1.722998483445734, - "learning_rate": 3.934731789367438e-06, - "loss": 1.0197, - "step": 1210 - }, - { - "epoch": 0.10921224692248727, - "grad_norm": 0.895508711054194, - "learning_rate": 3.9345836774628505e-06, - "loss": 0.8555, - "step": 1211 - }, - { - "epoch": 0.10930243044595753, - "grad_norm": 1.3934015137738804, - "learning_rate": 3.934435400488917e-06, - "loss": 0.9905, - "step": 1212 - }, - { - "epoch": 0.10939261396942779, - "grad_norm": 1.699942318554831, - "learning_rate": 3.934286958458289e-06, - "loss": 1.0324, - "step": 1213 - }, - { - "epoch": 0.10948279749289805, - "grad_norm": 1.3678370649307707, - "learning_rate": 3.934138351383632e-06, - "loss": 1.0625, - "step": 1214 - }, - { - "epoch": 0.10957298101636831, - "grad_norm": 1.3869129313593385, - "learning_rate": 3.933989579277626e-06, - "loss": 0.9315, - "step": 1215 - }, - { - "epoch": 0.10966316453983857, - "grad_norm": 1.305371773186589, - "learning_rate": 3.933840642152966e-06, - "loss": 0.9958, - "step": 1216 - }, - { - "epoch": 0.10975334806330883, - "grad_norm": 2.0494001115596587, - "learning_rate": 3.933691540022359e-06, - "loss": 1.0336, - "step": 1217 - }, - { - "epoch": 0.10984353158677909, - "grad_norm": 1.4502231787884539, - "learning_rate": 3.933542272898527e-06, - "loss": 0.9663, - "step": 1218 - }, - { - "epoch": 0.10993371511024935, - "grad_norm": 1.0120531508959114, - "learning_rate": 3.933392840794207e-06, - "loss": 0.788, - "step": 1219 - }, - { - "epoch": 0.11002389863371961, - "grad_norm": 1.9980257542184645, - "learning_rate": 3.93324324372215e-06, - "loss": 1.1041, - "step": 1220 - }, - { - "epoch": 0.11011408215718989, - "grad_norm": 1.539009441874729, - "learning_rate": 3.9330934816951185e-06, - "loss": 1.0331, - "step": 1221 - }, - { - "epoch": 0.11020426568066015, - "grad_norm": 1.78703453032311, - "learning_rate": 3.932943554725893e-06, - "loss": 1.0205, - "step": 1222 - }, - { - "epoch": 0.11029444920413041, - "grad_norm": 1.2023718321893637, - "learning_rate": 3.932793462827265e-06, - "loss": 0.8769, - "step": 1223 - }, - { - "epoch": 0.11038463272760067, - "grad_norm": 1.3056521475796803, - "learning_rate": 3.932643206012041e-06, - "loss": 1.0539, - "step": 1224 - }, - { - "epoch": 0.11047481625107093, - "grad_norm": 1.529408963554717, - "learning_rate": 3.932492784293043e-06, - "loss": 0.9267, - "step": 1225 - }, - { - "epoch": 0.11056499977454119, - "grad_norm": 1.4930289726946575, - "learning_rate": 3.932342197683104e-06, - "loss": 0.9974, - "step": 1226 - }, - { - "epoch": 0.11065518329801145, - "grad_norm": 1.5980962692978178, - "learning_rate": 3.932191446195075e-06, - "loss": 0.8737, - "step": 1227 - }, - { - "epoch": 0.11074536682148171, - "grad_norm": 1.701465042728171, - "learning_rate": 3.9320405298418175e-06, - "loss": 0.9985, - "step": 1228 - }, - { - "epoch": 0.11083555034495197, - "grad_norm": 1.6738662982446997, - "learning_rate": 3.9318894486362076e-06, - "loss": 0.9194, - "step": 1229 - }, - { - "epoch": 0.11092573386842224, - "grad_norm": 4.088155636715628, - "learning_rate": 3.9317382025911395e-06, - "loss": 1.0766, - "step": 1230 - }, - { - "epoch": 0.1110159173918925, - "grad_norm": 1.8157049322342937, - "learning_rate": 3.9315867917195145e-06, - "loss": 1.0984, - "step": 1231 - }, - { - "epoch": 0.11110610091536276, - "grad_norm": 1.3259238009674557, - "learning_rate": 3.931435216034256e-06, - "loss": 1.0066, - "step": 1232 - }, - { - "epoch": 0.11119628443883303, - "grad_norm": 1.7323476953978505, - "learning_rate": 3.931283475548293e-06, - "loss": 1.0089, - "step": 1233 - }, - { - "epoch": 0.11128646796230329, - "grad_norm": 1.210058225302709, - "learning_rate": 3.931131570274576e-06, - "loss": 0.9991, - "step": 1234 - }, - { - "epoch": 0.11137665148577355, - "grad_norm": 1.4782771090739917, - "learning_rate": 3.930979500226065e-06, - "loss": 1.026, - "step": 1235 - }, - { - "epoch": 0.11146683500924381, - "grad_norm": 1.5600389553977165, - "learning_rate": 3.930827265415736e-06, - "loss": 0.8427, - "step": 1236 - }, - { - "epoch": 0.11155701853271408, - "grad_norm": 1.54887746032416, - "learning_rate": 3.930674865856578e-06, - "loss": 1.0089, - "step": 1237 - }, - { - "epoch": 0.11164720205618434, - "grad_norm": 1.7463326173433682, - "learning_rate": 3.930522301561595e-06, - "loss": 1.0326, - "step": 1238 - }, - { - "epoch": 0.1117373855796546, - "grad_norm": 1.6529915624173566, - "learning_rate": 3.930369572543804e-06, - "loss": 1.0448, - "step": 1239 - }, - { - "epoch": 0.11182756910312486, - "grad_norm": 1.709992796787907, - "learning_rate": 3.930216678816237e-06, - "loss": 0.939, - "step": 1240 - }, - { - "epoch": 0.11191775262659512, - "grad_norm": 1.3963733975789958, - "learning_rate": 3.930063620391941e-06, - "loss": 0.8313, - "step": 1241 - }, - { - "epoch": 0.11200793615006538, - "grad_norm": 1.7200933581898024, - "learning_rate": 3.9299103972839735e-06, - "loss": 1.0096, - "step": 1242 - }, - { - "epoch": 0.11209811967353564, - "grad_norm": 0.7906338747755284, - "learning_rate": 3.92975700950541e-06, - "loss": 0.804, - "step": 1243 - }, - { - "epoch": 0.1121883031970059, - "grad_norm": 1.6588727230389544, - "learning_rate": 3.929603457069338e-06, - "loss": 1.0316, - "step": 1244 - }, - { - "epoch": 0.11227848672047618, - "grad_norm": 1.4450262662618862, - "learning_rate": 3.929449739988859e-06, - "loss": 0.975, - "step": 1245 - }, - { - "epoch": 0.11236867024394644, - "grad_norm": 1.5248939789986318, - "learning_rate": 3.929295858277089e-06, - "loss": 0.9992, - "step": 1246 - }, - { - "epoch": 0.1124588537674167, - "grad_norm": 1.91292609909658, - "learning_rate": 3.9291418119471585e-06, - "loss": 0.9947, - "step": 1247 - }, - { - "epoch": 0.11254903729088696, - "grad_norm": 1.6160579551369971, - "learning_rate": 3.928987601012212e-06, - "loss": 1.0306, - "step": 1248 - }, - { - "epoch": 0.11263922081435722, - "grad_norm": 1.5797056081999872, - "learning_rate": 3.928833225485407e-06, - "loss": 0.9846, - "step": 1249 - }, - { - "epoch": 0.11272940433782748, - "grad_norm": 1.483969783686121, - "learning_rate": 3.928678685379915e-06, - "loss": 0.9913, - "step": 1250 - }, - { - "epoch": 0.11281958786129774, - "grad_norm": 1.4689635263619094, - "learning_rate": 3.928523980708924e-06, - "loss": 1.0471, - "step": 1251 - }, - { - "epoch": 0.112909771384768, - "grad_norm": 1.758812920611363, - "learning_rate": 3.928369111485632e-06, - "loss": 0.9633, - "step": 1252 - }, - { - "epoch": 0.11299995490823826, - "grad_norm": 1.5060813334515475, - "learning_rate": 3.928214077723255e-06, - "loss": 0.9918, - "step": 1253 - }, - { - "epoch": 0.11309013843170852, - "grad_norm": 1.903139735072427, - "learning_rate": 3.928058879435021e-06, - "loss": 0.9853, - "step": 1254 - }, - { - "epoch": 0.11318032195517878, - "grad_norm": 1.720380391263313, - "learning_rate": 3.9279035166341725e-06, - "loss": 0.9795, - "step": 1255 - }, - { - "epoch": 0.11327050547864904, - "grad_norm": 1.6220129876798608, - "learning_rate": 3.927747989333965e-06, - "loss": 0.9812, - "step": 1256 - }, - { - "epoch": 0.11336068900211932, - "grad_norm": 1.6843274989180252, - "learning_rate": 3.927592297547669e-06, - "loss": 0.9701, - "step": 1257 - }, - { - "epoch": 0.11345087252558958, - "grad_norm": 1.9307525413819495, - "learning_rate": 3.927436441288571e-06, - "loss": 1.0203, - "step": 1258 - }, - { - "epoch": 0.11354105604905984, - "grad_norm": 1.4984442272174494, - "learning_rate": 3.927280420569968e-06, - "loss": 1.0237, - "step": 1259 - }, - { - "epoch": 0.1136312395725301, - "grad_norm": 2.962514331638099, - "learning_rate": 3.927124235405171e-06, - "loss": 1.0324, - "step": 1260 - }, - { - "epoch": 0.11372142309600036, - "grad_norm": 1.9689484753548772, - "learning_rate": 3.92696788580751e-06, - "loss": 0.857, - "step": 1261 - }, - { - "epoch": 0.11381160661947062, - "grad_norm": 1.4721545267835745, - "learning_rate": 3.9268113717903225e-06, - "loss": 0.8335, - "step": 1262 - }, - { - "epoch": 0.11390179014294088, - "grad_norm": 1.5781932098453373, - "learning_rate": 3.926654693366965e-06, - "loss": 1.0608, - "step": 1263 - }, - { - "epoch": 0.11399197366641114, - "grad_norm": 1.7175338604072723, - "learning_rate": 3.926497850550805e-06, - "loss": 1.0076, - "step": 1264 - }, - { - "epoch": 0.1140821571898814, - "grad_norm": 1.7199607745954122, - "learning_rate": 3.926340843355226e-06, - "loss": 1.0007, - "step": 1265 - }, - { - "epoch": 0.11417234071335167, - "grad_norm": 1.5372797521372539, - "learning_rate": 3.926183671793625e-06, - "loss": 1.0199, - "step": 1266 - }, - { - "epoch": 0.11426252423682193, - "grad_norm": 1.474552299961261, - "learning_rate": 3.926026335879412e-06, - "loss": 0.9721, - "step": 1267 - }, - { - "epoch": 0.11435270776029219, - "grad_norm": 0.7858781674629017, - "learning_rate": 3.925868835626012e-06, - "loss": 0.7612, - "step": 1268 - }, - { - "epoch": 0.11444289128376246, - "grad_norm": 1.7462440784438595, - "learning_rate": 3.925711171046864e-06, - "loss": 1.0116, - "step": 1269 - }, - { - "epoch": 0.11453307480723272, - "grad_norm": 1.3025709332793993, - "learning_rate": 3.925553342155421e-06, - "loss": 0.9567, - "step": 1270 - }, - { - "epoch": 0.11462325833070298, - "grad_norm": 0.8060244222495342, - "learning_rate": 3.9253953489651485e-06, - "loss": 0.867, - "step": 1271 - }, - { - "epoch": 0.11471344185417325, - "grad_norm": 0.7688458096106404, - "learning_rate": 3.925237191489529e-06, - "loss": 0.8187, - "step": 1272 - }, - { - "epoch": 0.1148036253776435, - "grad_norm": 1.4088307753035432, - "learning_rate": 3.925078869742056e-06, - "loss": 0.9677, - "step": 1273 - }, - { - "epoch": 0.11489380890111377, - "grad_norm": 1.3802106269481482, - "learning_rate": 3.92492038373624e-06, - "loss": 0.9375, - "step": 1274 - }, - { - "epoch": 0.11498399242458403, - "grad_norm": 1.433932895718352, - "learning_rate": 3.924761733485602e-06, - "loss": 1.0484, - "step": 1275 - }, - { - "epoch": 0.11507417594805429, - "grad_norm": 1.5930218996888221, - "learning_rate": 3.92460291900368e-06, - "loss": 0.9936, - "step": 1276 - }, - { - "epoch": 0.11516435947152455, - "grad_norm": 1.466151841073971, - "learning_rate": 3.924443940304025e-06, - "loss": 1.0188, - "step": 1277 - }, - { - "epoch": 0.11525454299499481, - "grad_norm": 1.9470972047629087, - "learning_rate": 3.924284797400202e-06, - "loss": 1.0409, - "step": 1278 - }, - { - "epoch": 0.11534472651846507, - "grad_norm": 1.702330727809697, - "learning_rate": 3.924125490305789e-06, - "loss": 1.0215, - "step": 1279 - }, - { - "epoch": 0.11543491004193533, - "grad_norm": 1.3858499733726228, - "learning_rate": 3.923966019034381e-06, - "loss": 0.9772, - "step": 1280 - }, - { - "epoch": 0.1155250935654056, - "grad_norm": 1.4689377198075697, - "learning_rate": 3.923806383599583e-06, - "loss": 1.0083, - "step": 1281 - }, - { - "epoch": 0.11561527708887587, - "grad_norm": 1.3682570279521333, - "learning_rate": 3.923646584015017e-06, - "loss": 0.9668, - "step": 1282 - }, - { - "epoch": 0.11570546061234613, - "grad_norm": 1.822152382622325, - "learning_rate": 3.923486620294316e-06, - "loss": 0.9915, - "step": 1283 - }, - { - "epoch": 0.11579564413581639, - "grad_norm": 1.568883444896999, - "learning_rate": 3.923326492451132e-06, - "loss": 0.9988, - "step": 1284 - }, - { - "epoch": 0.11588582765928665, - "grad_norm": 1.424720432229712, - "learning_rate": 3.923166200499125e-06, - "loss": 0.9738, - "step": 1285 - }, - { - "epoch": 0.11597601118275691, - "grad_norm": 1.2789718098460416, - "learning_rate": 3.923005744451975e-06, - "loss": 0.9672, - "step": 1286 - }, - { - "epoch": 0.11606619470622717, - "grad_norm": 2.1620367050019373, - "learning_rate": 3.9228451243233715e-06, - "loss": 0.9462, - "step": 1287 - }, - { - "epoch": 0.11615637822969743, - "grad_norm": 1.4392849993509589, - "learning_rate": 3.9226843401270195e-06, - "loss": 1.0297, - "step": 1288 - }, - { - "epoch": 0.1162465617531677, - "grad_norm": 1.4096108858755516, - "learning_rate": 3.9225233918766376e-06, - "loss": 0.9967, - "step": 1289 - }, - { - "epoch": 0.11633674527663795, - "grad_norm": 1.3038430136311048, - "learning_rate": 3.92236227958596e-06, - "loss": 1.0075, - "step": 1290 - }, - { - "epoch": 0.11642692880010821, - "grad_norm": 2.0750394013122477, - "learning_rate": 3.922201003268731e-06, - "loss": 1.0154, - "step": 1291 - }, - { - "epoch": 0.11651711232357848, - "grad_norm": 0.8738507148097603, - "learning_rate": 3.922039562938715e-06, - "loss": 0.8516, - "step": 1292 - }, - { - "epoch": 0.11660729584704875, - "grad_norm": 1.3678853461058962, - "learning_rate": 3.921877958609685e-06, - "loss": 1.0505, - "step": 1293 - }, - { - "epoch": 0.11669747937051901, - "grad_norm": 0.9229274496534999, - "learning_rate": 3.921716190295431e-06, - "loss": 0.9517, - "step": 1294 - }, - { - "epoch": 0.11678766289398927, - "grad_norm": 1.5806751608591516, - "learning_rate": 3.921554258009755e-06, - "loss": 0.9555, - "step": 1295 - }, - { - "epoch": 0.11687784641745953, - "grad_norm": 1.3614455491243822, - "learning_rate": 3.921392161766474e-06, - "loss": 0.9789, - "step": 1296 - }, - { - "epoch": 0.1169680299409298, - "grad_norm": 2.1338169960271145, - "learning_rate": 3.92122990157942e-06, - "loss": 0.9691, - "step": 1297 - }, - { - "epoch": 0.11705821346440005, - "grad_norm": 1.6536858341054816, - "learning_rate": 3.921067477462437e-06, - "loss": 1.0379, - "step": 1298 - }, - { - "epoch": 0.11714839698787032, - "grad_norm": 1.772358435827479, - "learning_rate": 3.920904889429385e-06, - "loss": 1.0538, - "step": 1299 - }, - { - "epoch": 0.11723858051134058, - "grad_norm": 1.6399756463498865, - "learning_rate": 3.920742137494135e-06, - "loss": 1.0349, - "step": 1300 - }, - { - "epoch": 0.11732876403481084, - "grad_norm": 2.2330674834041653, - "learning_rate": 3.920579221670575e-06, - "loss": 1.0585, - "step": 1301 - }, - { - "epoch": 0.1174189475582811, - "grad_norm": 1.6315347152650073, - "learning_rate": 3.920416141972606e-06, - "loss": 1.0006, - "step": 1302 - }, - { - "epoch": 0.11750913108175136, - "grad_norm": 1.3128663414636297, - "learning_rate": 3.920252898414143e-06, - "loss": 0.9954, - "step": 1303 - }, - { - "epoch": 0.11759931460522162, - "grad_norm": 1.7934361598635804, - "learning_rate": 3.920089491009114e-06, - "loss": 1.0063, - "step": 1304 - }, - { - "epoch": 0.1176894981286919, - "grad_norm": 1.5102705610795617, - "learning_rate": 3.919925919771463e-06, - "loss": 1.0675, - "step": 1305 - }, - { - "epoch": 0.11777968165216215, - "grad_norm": 1.8925846757175073, - "learning_rate": 3.919762184715146e-06, - "loss": 0.915, - "step": 1306 - }, - { - "epoch": 0.11786986517563242, - "grad_norm": 1.6021743722090998, - "learning_rate": 3.919598285854134e-06, - "loss": 0.9994, - "step": 1307 - }, - { - "epoch": 0.11796004869910268, - "grad_norm": 1.6463693236928791, - "learning_rate": 3.919434223202411e-06, - "loss": 1.0559, - "step": 1308 - }, - { - "epoch": 0.11805023222257294, - "grad_norm": 1.4596917864361505, - "learning_rate": 3.919269996773977e-06, - "loss": 1.0496, - "step": 1309 - }, - { - "epoch": 0.1181404157460432, - "grad_norm": 1.2947350736952015, - "learning_rate": 3.919105606582844e-06, - "loss": 1.0063, - "step": 1310 - }, - { - "epoch": 0.11823059926951346, - "grad_norm": 1.6332399821690224, - "learning_rate": 3.918941052643039e-06, - "loss": 0.972, - "step": 1311 - }, - { - "epoch": 0.11832078279298372, - "grad_norm": 1.7991907870413406, - "learning_rate": 3.918776334968602e-06, - "loss": 1.0086, - "step": 1312 - }, - { - "epoch": 0.11841096631645398, - "grad_norm": 2.427043887187554, - "learning_rate": 3.918611453573589e-06, - "loss": 1.1042, - "step": 1313 - }, - { - "epoch": 0.11850114983992424, - "grad_norm": 1.7665288181626615, - "learning_rate": 3.918446408472066e-06, - "loss": 0.9321, - "step": 1314 - }, - { - "epoch": 0.1185913333633945, - "grad_norm": 1.473617685087407, - "learning_rate": 3.918281199678119e-06, - "loss": 1.0382, - "step": 1315 - }, - { - "epoch": 0.11868151688686476, - "grad_norm": 0.7943662909173658, - "learning_rate": 3.9181158272058414e-06, - "loss": 0.82, - "step": 1316 - }, - { - "epoch": 0.11877170041033504, - "grad_norm": 1.4879909442502997, - "learning_rate": 3.9179502910693455e-06, - "loss": 1.0273, - "step": 1317 - }, - { - "epoch": 0.1188618839338053, - "grad_norm": 1.3581214035414901, - "learning_rate": 3.917784591282756e-06, - "loss": 0.9564, - "step": 1318 - }, - { - "epoch": 0.11895206745727556, - "grad_norm": 1.3274793514075431, - "learning_rate": 3.9176187278602105e-06, - "loss": 1.0271, - "step": 1319 - }, - { - "epoch": 0.11904225098074582, - "grad_norm": 3.1336705276643473, - "learning_rate": 3.9174527008158606e-06, - "loss": 0.9417, - "step": 1320 - }, - { - "epoch": 0.11913243450421608, - "grad_norm": 1.470535005279272, - "learning_rate": 3.917286510163874e-06, - "loss": 0.9215, - "step": 1321 - }, - { - "epoch": 0.11922261802768634, - "grad_norm": 1.1170766281918907, - "learning_rate": 3.917120155918431e-06, - "loss": 0.9767, - "step": 1322 - }, - { - "epoch": 0.1193128015511566, - "grad_norm": 0.8102690071058763, - "learning_rate": 3.916953638093725e-06, - "loss": 0.8748, - "step": 1323 - }, - { - "epoch": 0.11940298507462686, - "grad_norm": 1.501131663998632, - "learning_rate": 3.916786956703964e-06, - "loss": 0.991, - "step": 1324 - }, - { - "epoch": 0.11949316859809712, - "grad_norm": 1.8059971008657714, - "learning_rate": 3.916620111763372e-06, - "loss": 0.9875, - "step": 1325 - }, - { - "epoch": 0.11958335212156739, - "grad_norm": 1.4840202309136854, - "learning_rate": 3.916453103286183e-06, - "loss": 1.0498, - "step": 1326 - }, - { - "epoch": 0.11967353564503765, - "grad_norm": 1.8602272452721291, - "learning_rate": 3.916285931286648e-06, - "loss": 1.0418, - "step": 1327 - }, - { - "epoch": 0.11976371916850792, - "grad_norm": 1.522853719903764, - "learning_rate": 3.916118595779031e-06, - "loss": 0.9436, - "step": 1328 - }, - { - "epoch": 0.11985390269197818, - "grad_norm": 1.58698534996131, - "learning_rate": 3.915951096777611e-06, - "loss": 0.9678, - "step": 1329 - }, - { - "epoch": 0.11994408621544844, - "grad_norm": 1.3215546492593502, - "learning_rate": 3.915783434296678e-06, - "loss": 0.9389, - "step": 1330 - }, - { - "epoch": 0.1200342697389187, - "grad_norm": 1.7595396246040491, - "learning_rate": 3.91561560835054e-06, - "loss": 0.9766, - "step": 1331 - }, - { - "epoch": 0.12012445326238896, - "grad_norm": 1.404376031716498, - "learning_rate": 3.915447618953515e-06, - "loss": 1.0157, - "step": 1332 - }, - { - "epoch": 0.12021463678585922, - "grad_norm": 1.5954952034491128, - "learning_rate": 3.915279466119937e-06, - "loss": 0.9498, - "step": 1333 - }, - { - "epoch": 0.12030482030932949, - "grad_norm": 0.94287061962904, - "learning_rate": 3.9151111498641546e-06, - "loss": 0.842, - "step": 1334 - }, - { - "epoch": 0.12039500383279975, - "grad_norm": 1.8022796950183169, - "learning_rate": 3.914942670200529e-06, - "loss": 0.8736, - "step": 1335 - }, - { - "epoch": 0.12048518735627001, - "grad_norm": 1.7613007923001203, - "learning_rate": 3.914774027143436e-06, - "loss": 1.0433, - "step": 1336 - }, - { - "epoch": 0.12057537087974027, - "grad_norm": 3.638868309202232, - "learning_rate": 3.914605220707265e-06, - "loss": 0.9831, - "step": 1337 - }, - { - "epoch": 0.12066555440321053, - "grad_norm": 1.638024003147602, - "learning_rate": 3.9144362509064194e-06, - "loss": 1.147, - "step": 1338 - }, - { - "epoch": 0.12075573792668079, - "grad_norm": 1.7698978271736572, - "learning_rate": 3.914267117755317e-06, - "loss": 1.065, - "step": 1339 - }, - { - "epoch": 0.12084592145015106, - "grad_norm": 1.4213790867140945, - "learning_rate": 3.914097821268389e-06, - "loss": 0.9585, - "step": 1340 - }, - { - "epoch": 0.12093610497362133, - "grad_norm": 1.2899177257412655, - "learning_rate": 3.913928361460081e-06, - "loss": 1.0088, - "step": 1341 - }, - { - "epoch": 0.12102628849709159, - "grad_norm": 1.3988068162221319, - "learning_rate": 3.913758738344851e-06, - "loss": 0.9817, - "step": 1342 - }, - { - "epoch": 0.12111647202056185, - "grad_norm": 0.8639067373207113, - "learning_rate": 3.913588951937174e-06, - "loss": 0.8329, - "step": 1343 - }, - { - "epoch": 0.12120665554403211, - "grad_norm": 0.7577051696277768, - "learning_rate": 3.9134190022515355e-06, - "loss": 0.8292, - "step": 1344 - }, - { - "epoch": 0.12129683906750237, - "grad_norm": 1.6608442249824613, - "learning_rate": 3.913248889302438e-06, - "loss": 0.959, - "step": 1345 - }, - { - "epoch": 0.12138702259097263, - "grad_norm": 1.696371705672478, - "learning_rate": 3.913078613104395e-06, - "loss": 0.9321, - "step": 1346 - }, - { - "epoch": 0.12147720611444289, - "grad_norm": 1.6174740744728482, - "learning_rate": 3.912908173671936e-06, - "loss": 0.8786, - "step": 1347 - }, - { - "epoch": 0.12156738963791315, - "grad_norm": 1.668546697112291, - "learning_rate": 3.9127375710196044e-06, - "loss": 0.9299, - "step": 1348 - }, - { - "epoch": 0.12165757316138341, - "grad_norm": 2.087644871236111, - "learning_rate": 3.912566805161957e-06, - "loss": 0.9414, - "step": 1349 - }, - { - "epoch": 0.12174775668485367, - "grad_norm": 1.6608384828667913, - "learning_rate": 3.912395876113564e-06, - "loss": 1.0157, - "step": 1350 - }, - { - "epoch": 0.12183794020832393, - "grad_norm": 1.5137340114514592, - "learning_rate": 3.912224783889009e-06, - "loss": 1.004, - "step": 1351 - }, - { - "epoch": 0.12192812373179421, - "grad_norm": 1.5252621409757234, - "learning_rate": 3.912053528502892e-06, - "loss": 0.9975, - "step": 1352 - }, - { - "epoch": 0.12201830725526447, - "grad_norm": 1.3681790924880408, - "learning_rate": 3.911882109969825e-06, - "loss": 0.9168, - "step": 1353 - }, - { - "epoch": 0.12210849077873473, - "grad_norm": 1.6562367564697522, - "learning_rate": 3.911710528304435e-06, - "loss": 0.998, - "step": 1354 - }, - { - "epoch": 0.12219867430220499, - "grad_norm": 1.9334324624967205, - "learning_rate": 3.911538783521361e-06, - "loss": 0.9454, - "step": 1355 - }, - { - "epoch": 0.12228885782567525, - "grad_norm": 1.7024513802328938, - "learning_rate": 3.9113668756352575e-06, - "loss": 1.0103, - "step": 1356 - }, - { - "epoch": 0.12237904134914551, - "grad_norm": 1.2927011172543792, - "learning_rate": 3.911194804660793e-06, - "loss": 0.9242, - "step": 1357 - }, - { - "epoch": 0.12246922487261577, - "grad_norm": 1.4775700436689132, - "learning_rate": 3.91102257061265e-06, - "loss": 1.075, - "step": 1358 - }, - { - "epoch": 0.12255940839608603, - "grad_norm": 1.323157411231502, - "learning_rate": 3.910850173505524e-06, - "loss": 1.0352, - "step": 1359 - }, - { - "epoch": 0.1226495919195563, - "grad_norm": 1.5131176530737644, - "learning_rate": 3.9106776133541255e-06, - "loss": 1.0732, - "step": 1360 - }, - { - "epoch": 0.12273977544302656, - "grad_norm": 1.6073329525497113, - "learning_rate": 3.9105048901731766e-06, - "loss": 0.9543, - "step": 1361 - }, - { - "epoch": 0.12282995896649682, - "grad_norm": 1.5454200298361525, - "learning_rate": 3.9103320039774165e-06, - "loss": 0.9744, - "step": 1362 - }, - { - "epoch": 0.12292014248996708, - "grad_norm": 1.4037588731004653, - "learning_rate": 3.9101589547815965e-06, - "loss": 1.0558, - "step": 1363 - }, - { - "epoch": 0.12301032601343735, - "grad_norm": 1.2193290117547686, - "learning_rate": 3.909985742600482e-06, - "loss": 0.981, - "step": 1364 - }, - { - "epoch": 0.12310050953690761, - "grad_norm": 1.4588837901708507, - "learning_rate": 3.909812367448852e-06, - "loss": 0.9591, - "step": 1365 - }, - { - "epoch": 0.12319069306037787, - "grad_norm": 2.0059084640186673, - "learning_rate": 3.909638829341501e-06, - "loss": 0.9743, - "step": 1366 - }, - { - "epoch": 0.12328087658384813, - "grad_norm": 1.448585721593291, - "learning_rate": 3.909465128293234e-06, - "loss": 1.0511, - "step": 1367 - }, - { - "epoch": 0.1233710601073184, - "grad_norm": 1.5617561095417294, - "learning_rate": 3.9092912643188745e-06, - "loss": 1.0542, - "step": 1368 - }, - { - "epoch": 0.12346124363078866, - "grad_norm": 1.6268941403764072, - "learning_rate": 3.909117237433256e-06, - "loss": 0.9855, - "step": 1369 - }, - { - "epoch": 0.12355142715425892, - "grad_norm": 1.7383957042946474, - "learning_rate": 3.908943047651229e-06, - "loss": 1.0384, - "step": 1370 - }, - { - "epoch": 0.12364161067772918, - "grad_norm": 1.5227964960044424, - "learning_rate": 3.908768694987655e-06, - "loss": 0.9756, - "step": 1371 - }, - { - "epoch": 0.12373179420119944, - "grad_norm": 2.2313825936549647, - "learning_rate": 3.908594179457411e-06, - "loss": 0.9143, - "step": 1372 - }, - { - "epoch": 0.1238219777246697, - "grad_norm": 1.7504807901687691, - "learning_rate": 3.908419501075388e-06, - "loss": 0.993, - "step": 1373 - }, - { - "epoch": 0.12391216124813996, - "grad_norm": 1.4240299704483854, - "learning_rate": 3.90824465985649e-06, - "loss": 1.0706, - "step": 1374 - }, - { - "epoch": 0.12400234477161022, - "grad_norm": 1.6360183710405212, - "learning_rate": 3.908069655815636e-06, - "loss": 1.0352, - "step": 1375 - }, - { - "epoch": 0.1240925282950805, - "grad_norm": 1.883393194153135, - "learning_rate": 3.907894488967758e-06, - "loss": 0.9351, - "step": 1376 - }, - { - "epoch": 0.12418271181855076, - "grad_norm": 1.6056221473344798, - "learning_rate": 3.9077191593278005e-06, - "loss": 1.0571, - "step": 1377 - }, - { - "epoch": 0.12427289534202102, - "grad_norm": 1.007830715754105, - "learning_rate": 3.9075436669107265e-06, - "loss": 0.8395, - "step": 1378 - }, - { - "epoch": 0.12436307886549128, - "grad_norm": 1.7285690040751527, - "learning_rate": 3.90736801173151e-06, - "loss": 1.0084, - "step": 1379 - }, - { - "epoch": 0.12445326238896154, - "grad_norm": 1.5167082228923539, - "learning_rate": 3.907192193805136e-06, - "loss": 1.0103, - "step": 1380 - }, - { - "epoch": 0.1245434459124318, - "grad_norm": 1.869303952279155, - "learning_rate": 3.907016213146608e-06, - "loss": 1.0437, - "step": 1381 - }, - { - "epoch": 0.12463362943590206, - "grad_norm": 1.4770522339868888, - "learning_rate": 3.906840069770942e-06, - "loss": 1.109, - "step": 1382 - }, - { - "epoch": 0.12472381295937232, - "grad_norm": 1.266371000859516, - "learning_rate": 3.906663763693167e-06, - "loss": 1.0273, - "step": 1383 - }, - { - "epoch": 0.12481399648284258, - "grad_norm": 1.532315097874319, - "learning_rate": 3.906487294928327e-06, - "loss": 0.9724, - "step": 1384 - }, - { - "epoch": 0.12490418000631284, - "grad_norm": 1.6990400549223643, - "learning_rate": 3.906310663491478e-06, - "loss": 0.9954, - "step": 1385 - }, - { - "epoch": 0.1249943635297831, - "grad_norm": 1.5455831662882378, - "learning_rate": 3.906133869397692e-06, - "loss": 1.0519, - "step": 1386 - }, - { - "epoch": 0.12508454705325336, - "grad_norm": 1.5198709064174076, - "learning_rate": 3.905956912662054e-06, - "loss": 0.9525, - "step": 1387 - }, - { - "epoch": 0.12517473057672363, - "grad_norm": 1.955581645930007, - "learning_rate": 3.905779793299662e-06, - "loss": 1.0594, - "step": 1388 - }, - { - "epoch": 0.12526491410019389, - "grad_norm": 1.536099520235823, - "learning_rate": 3.905602511325631e-06, - "loss": 1.0305, - "step": 1389 - }, - { - "epoch": 0.12535509762366415, - "grad_norm": 1.6318593140530884, - "learning_rate": 3.905425066755086e-06, - "loss": 0.9577, - "step": 1390 - }, - { - "epoch": 0.1254452811471344, - "grad_norm": 1.258873107329957, - "learning_rate": 3.905247459603168e-06, - "loss": 0.9548, - "step": 1391 - }, - { - "epoch": 0.12553546467060467, - "grad_norm": 1.4791444678275047, - "learning_rate": 3.905069689885031e-06, - "loss": 0.8763, - "step": 1392 - }, - { - "epoch": 0.12562564819407493, - "grad_norm": 1.326338879683841, - "learning_rate": 3.904891757615843e-06, - "loss": 0.8768, - "step": 1393 - }, - { - "epoch": 0.12571583171754522, - "grad_norm": 4.066282653396044, - "learning_rate": 3.9047136628107874e-06, - "loss": 0.791, - "step": 1394 - }, - { - "epoch": 0.12580601524101548, - "grad_norm": 1.3939584362368456, - "learning_rate": 3.904535405485059e-06, - "loss": 1.1232, - "step": 1395 - }, - { - "epoch": 0.12589619876448574, - "grad_norm": 1.5615777918151041, - "learning_rate": 3.90435698565387e-06, - "loss": 0.95, - "step": 1396 - }, - { - "epoch": 0.125986382287956, - "grad_norm": 1.5043171903297867, - "learning_rate": 3.904178403332441e-06, - "loss": 0.965, - "step": 1397 - }, - { - "epoch": 0.12607656581142626, - "grad_norm": 1.706784507674181, - "learning_rate": 3.903999658536012e-06, - "loss": 1.072, - "step": 1398 - }, - { - "epoch": 0.12616674933489652, - "grad_norm": 1.4078839875592883, - "learning_rate": 3.903820751279833e-06, - "loss": 0.9939, - "step": 1399 - }, - { - "epoch": 0.12625693285836678, - "grad_norm": 1.7493844312036784, - "learning_rate": 3.90364168157917e-06, - "loss": 1.0251, - "step": 1400 - }, - { - "epoch": 0.12634711638183704, - "grad_norm": 1.6341642050535707, - "learning_rate": 3.903462449449302e-06, - "loss": 1.0253, - "step": 1401 - }, - { - "epoch": 0.1264372999053073, - "grad_norm": 1.475883206530533, - "learning_rate": 3.903283054905522e-06, - "loss": 0.9723, - "step": 1402 - }, - { - "epoch": 0.12652748342877757, - "grad_norm": 1.836426625128595, - "learning_rate": 3.9031034979631385e-06, - "loss": 0.9115, - "step": 1403 - }, - { - "epoch": 0.12661766695224783, - "grad_norm": 1.3801059852895425, - "learning_rate": 3.902923778637469e-06, - "loss": 1.0279, - "step": 1404 - }, - { - "epoch": 0.1267078504757181, - "grad_norm": 1.3655229735507421, - "learning_rate": 3.902743896943852e-06, - "loss": 0.9315, - "step": 1405 - }, - { - "epoch": 0.12679803399918835, - "grad_norm": 0.870203666799312, - "learning_rate": 3.902563852897633e-06, - "loss": 0.872, - "step": 1406 - }, - { - "epoch": 0.1268882175226586, - "grad_norm": 0.9617812011671142, - "learning_rate": 3.9023836465141755e-06, - "loss": 0.8413, - "step": 1407 - }, - { - "epoch": 0.12697840104612887, - "grad_norm": 1.6258318312556714, - "learning_rate": 3.902203277808856e-06, - "loss": 1.0342, - "step": 1408 - }, - { - "epoch": 0.12706858456959913, - "grad_norm": 1.5831048783907202, - "learning_rate": 3.902022746797064e-06, - "loss": 1.0501, - "step": 1409 - }, - { - "epoch": 0.1271587680930694, - "grad_norm": 1.728313541991838, - "learning_rate": 3.9018420534942035e-06, - "loss": 1.1079, - "step": 1410 - }, - { - "epoch": 0.12724895161653965, - "grad_norm": 1.8221027918899793, - "learning_rate": 3.9016611979156935e-06, - "loss": 0.9862, - "step": 1411 - }, - { - "epoch": 0.1273391351400099, - "grad_norm": 1.9281244163574127, - "learning_rate": 3.9014801800769635e-06, - "loss": 0.9065, - "step": 1412 - }, - { - "epoch": 0.12742931866348017, - "grad_norm": 1.5212160003548865, - "learning_rate": 3.901298999993459e-06, - "loss": 1.0211, - "step": 1413 - }, - { - "epoch": 0.12751950218695043, - "grad_norm": 1.6248075664727244, - "learning_rate": 3.901117657680642e-06, - "loss": 0.9277, - "step": 1414 - }, - { - "epoch": 0.1276096857104207, - "grad_norm": 1.3822996406162282, - "learning_rate": 3.900936153153982e-06, - "loss": 0.9237, - "step": 1415 - }, - { - "epoch": 0.12769986923389096, - "grad_norm": 0.7938054373297587, - "learning_rate": 3.900754486428968e-06, - "loss": 0.7911, - "step": 1416 - }, - { - "epoch": 0.12779005275736122, - "grad_norm": 1.5130858240488212, - "learning_rate": 3.900572657521102e-06, - "loss": 1.0836, - "step": 1417 - }, - { - "epoch": 0.1278802362808315, - "grad_norm": 1.2844214382269037, - "learning_rate": 3.900390666445896e-06, - "loss": 0.9745, - "step": 1418 - }, - { - "epoch": 0.12797041980430177, - "grad_norm": 1.5540763382148863, - "learning_rate": 3.9002085132188795e-06, - "loss": 0.9589, - "step": 1419 - }, - { - "epoch": 0.12806060332777203, - "grad_norm": 3.4423679908812126, - "learning_rate": 3.9000261978555964e-06, - "loss": 1.0351, - "step": 1420 - }, - { - "epoch": 0.1281507868512423, - "grad_norm": 1.5608324689535154, - "learning_rate": 3.8998437203716e-06, - "loss": 1.0581, - "step": 1421 - }, - { - "epoch": 0.12824097037471255, - "grad_norm": 1.5608713435758532, - "learning_rate": 3.899661080782462e-06, - "loss": 1.0189, - "step": 1422 - }, - { - "epoch": 0.1283311538981828, - "grad_norm": 1.5284061103113626, - "learning_rate": 3.899478279103767e-06, - "loss": 1.0241, - "step": 1423 - }, - { - "epoch": 0.12842133742165307, - "grad_norm": 1.7580368237940631, - "learning_rate": 3.8992953153511105e-06, - "loss": 1.0243, - "step": 1424 - }, - { - "epoch": 0.12851152094512333, - "grad_norm": 1.3606259958511515, - "learning_rate": 3.899112189540106e-06, - "loss": 1.0526, - "step": 1425 - }, - { - "epoch": 0.1286017044685936, - "grad_norm": 1.7805329602904474, - "learning_rate": 3.898928901686377e-06, - "loss": 1.0367, - "step": 1426 - }, - { - "epoch": 0.12869188799206385, - "grad_norm": 0.8696835654385956, - "learning_rate": 3.898745451805564e-06, - "loss": 0.814, - "step": 1427 - }, - { - "epoch": 0.1287820715155341, - "grad_norm": 1.5218424736792615, - "learning_rate": 3.898561839913319e-06, - "loss": 0.9346, - "step": 1428 - }, - { - "epoch": 0.12887225503900437, - "grad_norm": 2.350068083243574, - "learning_rate": 3.89837806602531e-06, - "loss": 0.8478, - "step": 1429 - }, - { - "epoch": 0.12896243856247463, - "grad_norm": 1.5038323718664057, - "learning_rate": 3.898194130157217e-06, - "loss": 0.9993, - "step": 1430 - }, - { - "epoch": 0.1290526220859449, - "grad_norm": 1.8253323996931372, - "learning_rate": 3.8980100323247335e-06, - "loss": 0.9307, - "step": 1431 - }, - { - "epoch": 0.12914280560941516, - "grad_norm": 1.6615159155295454, - "learning_rate": 3.897825772543568e-06, - "loss": 0.93, - "step": 1432 - }, - { - "epoch": 0.12923298913288542, - "grad_norm": 1.731613779236376, - "learning_rate": 3.897641350829444e-06, - "loss": 1.0513, - "step": 1433 - }, - { - "epoch": 0.12932317265635568, - "grad_norm": 1.3579695440677828, - "learning_rate": 3.897456767198096e-06, - "loss": 1.0744, - "step": 1434 - }, - { - "epoch": 0.12941335617982594, - "grad_norm": 1.4648442382811686, - "learning_rate": 3.897272021665275e-06, - "loss": 0.934, - "step": 1435 - }, - { - "epoch": 0.1295035397032962, - "grad_norm": 1.3365675126651348, - "learning_rate": 3.897087114246743e-06, - "loss": 1.0992, - "step": 1436 - }, - { - "epoch": 0.12959372322676646, - "grad_norm": 1.677008972730841, - "learning_rate": 3.896902044958279e-06, - "loss": 1.1165, - "step": 1437 - }, - { - "epoch": 0.12968390675023672, - "grad_norm": 1.569585071816261, - "learning_rate": 3.896716813815672e-06, - "loss": 1.052, - "step": 1438 - }, - { - "epoch": 0.12977409027370698, - "grad_norm": 1.712867867156578, - "learning_rate": 3.896531420834728e-06, - "loss": 0.9386, - "step": 1439 - }, - { - "epoch": 0.12986427379717724, - "grad_norm": 1.426779536004197, - "learning_rate": 3.896345866031266e-06, - "loss": 0.9001, - "step": 1440 - }, - { - "epoch": 0.1299544573206475, - "grad_norm": 1.4089465249109328, - "learning_rate": 3.896160149421119e-06, - "loss": 1.0569, - "step": 1441 - }, - { - "epoch": 0.1300446408441178, - "grad_norm": 1.5848298362340056, - "learning_rate": 3.8959742710201314e-06, - "loss": 1.0638, - "step": 1442 - }, - { - "epoch": 0.13013482436758805, - "grad_norm": 2.049666852165127, - "learning_rate": 3.895788230844166e-06, - "loss": 0.891, - "step": 1443 - }, - { - "epoch": 0.13022500789105831, - "grad_norm": 1.4954132843333623, - "learning_rate": 3.895602028909095e-06, - "loss": 1.1467, - "step": 1444 - }, - { - "epoch": 0.13031519141452858, - "grad_norm": 0.7534055875699099, - "learning_rate": 3.895415665230807e-06, - "loss": 0.8156, - "step": 1445 - }, - { - "epoch": 0.13040537493799884, - "grad_norm": 1.2373205363064717, - "learning_rate": 3.895229139825203e-06, - "loss": 1.0146, - "step": 1446 - }, - { - "epoch": 0.1304955584614691, - "grad_norm": 1.5456174883137401, - "learning_rate": 3.895042452708198e-06, - "loss": 0.9132, - "step": 1447 - }, - { - "epoch": 0.13058574198493936, - "grad_norm": 1.4645785078870885, - "learning_rate": 3.894855603895723e-06, - "loss": 0.9011, - "step": 1448 - }, - { - "epoch": 0.13067592550840962, - "grad_norm": 1.3455290882534352, - "learning_rate": 3.894668593403718e-06, - "loss": 1.0722, - "step": 1449 - }, - { - "epoch": 0.13076610903187988, - "grad_norm": 1.3983342649405963, - "learning_rate": 3.8944814212481425e-06, - "loss": 1.0012, - "step": 1450 - }, - { - "epoch": 0.13085629255535014, - "grad_norm": 1.6916037922091225, - "learning_rate": 3.894294087444966e-06, - "loss": 0.9818, - "step": 1451 - }, - { - "epoch": 0.1309464760788204, - "grad_norm": 1.5323499504672624, - "learning_rate": 3.894106592010173e-06, - "loss": 0.8826, - "step": 1452 - }, - { - "epoch": 0.13103665960229066, - "grad_norm": 1.4963927605132386, - "learning_rate": 3.893918934959762e-06, - "loss": 0.8764, - "step": 1453 - }, - { - "epoch": 0.13112684312576092, - "grad_norm": 1.548190472681032, - "learning_rate": 3.893731116309743e-06, - "loss": 0.9281, - "step": 1454 - }, - { - "epoch": 0.13121702664923118, - "grad_norm": 1.4148080451626583, - "learning_rate": 3.893543136076145e-06, - "loss": 1.0134, - "step": 1455 - }, - { - "epoch": 0.13130721017270144, - "grad_norm": 1.6002742144923832, - "learning_rate": 3.893354994275006e-06, - "loss": 1.0175, - "step": 1456 - }, - { - "epoch": 0.1313973936961717, - "grad_norm": 1.520193784181554, - "learning_rate": 3.893166690922378e-06, - "loss": 0.9678, - "step": 1457 - }, - { - "epoch": 0.13148757721964197, - "grad_norm": 2.4488577690770335, - "learning_rate": 3.892978226034329e-06, - "loss": 1.0276, - "step": 1458 - }, - { - "epoch": 0.13157776074311223, - "grad_norm": 1.622288716200337, - "learning_rate": 3.89278959962694e-06, - "loss": 1.0448, - "step": 1459 - }, - { - "epoch": 0.1316679442665825, - "grad_norm": 1.46240910956803, - "learning_rate": 3.8926008117163056e-06, - "loss": 0.9709, - "step": 1460 - }, - { - "epoch": 0.13175812779005275, - "grad_norm": 1.7361990469808768, - "learning_rate": 3.892411862318535e-06, - "loss": 0.9741, - "step": 1461 - }, - { - "epoch": 0.131848311313523, - "grad_norm": 1.3271684680182207, - "learning_rate": 3.892222751449749e-06, - "loss": 1.0195, - "step": 1462 - }, - { - "epoch": 0.13193849483699327, - "grad_norm": 1.391871804215283, - "learning_rate": 3.892033479126084e-06, - "loss": 0.9436, - "step": 1463 - }, - { - "epoch": 0.13202867836046353, - "grad_norm": 2.2412772108597343, - "learning_rate": 3.891844045363691e-06, - "loss": 0.9772, - "step": 1464 - }, - { - "epoch": 0.13211886188393382, - "grad_norm": 1.5334163055914274, - "learning_rate": 3.891654450178732e-06, - "loss": 0.9155, - "step": 1465 - }, - { - "epoch": 0.13220904540740408, - "grad_norm": 2.216359852468905, - "learning_rate": 3.891464693587385e-06, - "loss": 1.0368, - "step": 1466 - }, - { - "epoch": 0.13229922893087434, - "grad_norm": 1.787156201358857, - "learning_rate": 3.89127477560584e-06, - "loss": 1.0286, - "step": 1467 - }, - { - "epoch": 0.1323894124543446, - "grad_norm": 1.6086415962266032, - "learning_rate": 3.891084696250304e-06, - "loss": 1.0337, - "step": 1468 - }, - { - "epoch": 0.13247959597781486, - "grad_norm": 1.611844288776365, - "learning_rate": 3.890894455536993e-06, - "loss": 1.0422, - "step": 1469 - }, - { - "epoch": 0.13256977950128512, - "grad_norm": 2.3711700678551377, - "learning_rate": 3.890704053482142e-06, - "loss": 1.0317, - "step": 1470 - }, - { - "epoch": 0.13265996302475538, - "grad_norm": 1.5612662212668356, - "learning_rate": 3.890513490101995e-06, - "loss": 0.9405, - "step": 1471 - }, - { - "epoch": 0.13275014654822564, - "grad_norm": 0.8118574095619829, - "learning_rate": 3.890322765412814e-06, - "loss": 0.8214, - "step": 1472 - }, - { - "epoch": 0.1328403300716959, - "grad_norm": 1.2369661297027494, - "learning_rate": 3.890131879430871e-06, - "loss": 1.015, - "step": 1473 - }, - { - "epoch": 0.13293051359516617, - "grad_norm": 1.4413433112899807, - "learning_rate": 3.889940832172454e-06, - "loss": 1.0157, - "step": 1474 - }, - { - "epoch": 0.13302069711863643, - "grad_norm": 1.6829602671628938, - "learning_rate": 3.889749623653864e-06, - "loss": 1.0428, - "step": 1475 - }, - { - "epoch": 0.1331108806421067, - "grad_norm": 1.3222896060994662, - "learning_rate": 3.889558253891416e-06, - "loss": 0.9363, - "step": 1476 - }, - { - "epoch": 0.13320106416557695, - "grad_norm": 0.7253711736861813, - "learning_rate": 3.8893667229014385e-06, - "loss": 0.8155, - "step": 1477 - }, - { - "epoch": 0.1332912476890472, - "grad_norm": 2.18985893579036, - "learning_rate": 3.8891750307002746e-06, - "loss": 0.9013, - "step": 1478 - }, - { - "epoch": 0.13338143121251747, - "grad_norm": 1.280338148959306, - "learning_rate": 3.888983177304281e-06, - "loss": 0.9313, - "step": 1479 - }, - { - "epoch": 0.13347161473598773, - "grad_norm": 1.0162452564386861, - "learning_rate": 3.888791162729826e-06, - "loss": 0.8199, - "step": 1480 - }, - { - "epoch": 0.133561798259458, - "grad_norm": 1.833581589738517, - "learning_rate": 3.888598986993295e-06, - "loss": 1.0168, - "step": 1481 - }, - { - "epoch": 0.13365198178292825, - "grad_norm": 1.5285838524526163, - "learning_rate": 3.888406650111085e-06, - "loss": 1.0292, - "step": 1482 - }, - { - "epoch": 0.1337421653063985, - "grad_norm": 1.5730745686816034, - "learning_rate": 3.888214152099607e-06, - "loss": 0.998, - "step": 1483 - }, - { - "epoch": 0.13383234882986877, - "grad_norm": 1.5690339610336914, - "learning_rate": 3.888021492975285e-06, - "loss": 1.06, - "step": 1484 - }, - { - "epoch": 0.13392253235333904, - "grad_norm": 1.6381206471932226, - "learning_rate": 3.88782867275456e-06, - "loss": 1.0511, - "step": 1485 - }, - { - "epoch": 0.1340127158768093, - "grad_norm": 1.2438753764539425, - "learning_rate": 3.8876356914538824e-06, - "loss": 0.958, - "step": 1486 - }, - { - "epoch": 0.13410289940027956, - "grad_norm": 1.3771542233194214, - "learning_rate": 3.88744254908972e-06, - "loss": 0.956, - "step": 1487 - }, - { - "epoch": 0.13419308292374982, - "grad_norm": 1.7141650688202266, - "learning_rate": 3.887249245678552e-06, - "loss": 0.9725, - "step": 1488 - }, - { - "epoch": 0.1342832664472201, - "grad_norm": 1.4464105507402538, - "learning_rate": 3.887055781236872e-06, - "loss": 1.0622, - "step": 1489 - }, - { - "epoch": 0.13437344997069037, - "grad_norm": 1.7797949352440539, - "learning_rate": 3.886862155781186e-06, - "loss": 0.8784, - "step": 1490 - }, - { - "epoch": 0.13446363349416063, - "grad_norm": 0.7966082631401125, - "learning_rate": 3.886668369328019e-06, - "loss": 0.827, - "step": 1491 - }, - { - "epoch": 0.1345538170176309, - "grad_norm": 1.8266967671877428, - "learning_rate": 3.886474421893904e-06, - "loss": 1.0277, - "step": 1492 - }, - { - "epoch": 0.13464400054110115, - "grad_norm": 1.3995198158613935, - "learning_rate": 3.886280313495388e-06, - "loss": 0.9197, - "step": 1493 - }, - { - "epoch": 0.1347341840645714, - "grad_norm": 1.604469898182825, - "learning_rate": 3.886086044149035e-06, - "loss": 0.9645, - "step": 1494 - }, - { - "epoch": 0.13482436758804167, - "grad_norm": 1.3563109757740168, - "learning_rate": 3.885891613871421e-06, - "loss": 0.9964, - "step": 1495 - }, - { - "epoch": 0.13491455111151193, - "grad_norm": 1.4924196392258844, - "learning_rate": 3.885697022679136e-06, - "loss": 1.0468, - "step": 1496 - }, - { - "epoch": 0.1350047346349822, - "grad_norm": 1.5749398749849166, - "learning_rate": 3.885502270588784e-06, - "loss": 1.018, - "step": 1497 - }, - { - "epoch": 0.13509491815845245, - "grad_norm": 1.5033109204946888, - "learning_rate": 3.885307357616981e-06, - "loss": 0.9924, - "step": 1498 - }, - { - "epoch": 0.13518510168192271, - "grad_norm": 1.4685991087977175, - "learning_rate": 3.885112283780359e-06, - "loss": 0.9916, - "step": 1499 - }, - { - "epoch": 0.13527528520539298, - "grad_norm": 1.4466102341369937, - "learning_rate": 3.8849170490955624e-06, - "loss": 0.8882, - "step": 1500 - }, - { - "epoch": 0.13536546872886324, - "grad_norm": 1.7541136757041946, - "learning_rate": 3.88472165357925e-06, - "loss": 0.9883, - "step": 1501 - }, - { - "epoch": 0.1354556522523335, - "grad_norm": 1.4527915654076229, - "learning_rate": 3.884526097248093e-06, - "loss": 0.9511, - "step": 1502 - }, - { - "epoch": 0.13554583577580376, - "grad_norm": 1.5476674063606366, - "learning_rate": 3.884330380118779e-06, - "loss": 1.0097, - "step": 1503 - }, - { - "epoch": 0.13563601929927402, - "grad_norm": 1.4628660966474618, - "learning_rate": 3.884134502208007e-06, - "loss": 0.9448, - "step": 1504 - }, - { - "epoch": 0.13572620282274428, - "grad_norm": 1.6319274694042374, - "learning_rate": 3.88393846353249e-06, - "loss": 1.1032, - "step": 1505 - }, - { - "epoch": 0.13581638634621454, - "grad_norm": 1.314893719544709, - "learning_rate": 3.883742264108955e-06, - "loss": 1.0506, - "step": 1506 - }, - { - "epoch": 0.1359065698696848, - "grad_norm": 1.3989262499317496, - "learning_rate": 3.883545903954145e-06, - "loss": 1.0037, - "step": 1507 - }, - { - "epoch": 0.13599675339315506, - "grad_norm": 1.3744775039452715, - "learning_rate": 3.883349383084811e-06, - "loss": 0.9841, - "step": 1508 - }, - { - "epoch": 0.13608693691662532, - "grad_norm": 1.5229842856755524, - "learning_rate": 3.883152701517723e-06, - "loss": 0.9158, - "step": 1509 - }, - { - "epoch": 0.13617712044009558, - "grad_norm": 1.5543033757904312, - "learning_rate": 3.882955859269664e-06, - "loss": 0.9834, - "step": 1510 - }, - { - "epoch": 0.13626730396356584, - "grad_norm": 1.2838125422025368, - "learning_rate": 3.882758856357428e-06, - "loss": 1.0014, - "step": 1511 - }, - { - "epoch": 0.1363574874870361, - "grad_norm": 1.6262854113817387, - "learning_rate": 3.882561692797824e-06, - "loss": 0.9803, - "step": 1512 - }, - { - "epoch": 0.1364476710105064, - "grad_norm": 1.4606034820107705, - "learning_rate": 3.882364368607677e-06, - "loss": 0.9987, - "step": 1513 - }, - { - "epoch": 0.13653785453397665, - "grad_norm": 1.6468705960817467, - "learning_rate": 3.8821668838038225e-06, - "loss": 1.0067, - "step": 1514 - }, - { - "epoch": 0.13662803805744692, - "grad_norm": 1.5499938964723579, - "learning_rate": 3.881969238403111e-06, - "loss": 0.897, - "step": 1515 - }, - { - "epoch": 0.13671822158091718, - "grad_norm": 1.8656084734058487, - "learning_rate": 3.881771432422408e-06, - "loss": 1.1118, - "step": 1516 - }, - { - "epoch": 0.13680840510438744, - "grad_norm": 1.3627216753733944, - "learning_rate": 3.88157346587859e-06, - "loss": 1.048, - "step": 1517 - }, - { - "epoch": 0.1368985886278577, - "grad_norm": 1.2146036579313808, - "learning_rate": 3.881375338788549e-06, - "loss": 0.971, - "step": 1518 - }, - { - "epoch": 0.13698877215132796, - "grad_norm": 3.9071967846747064, - "learning_rate": 3.88117705116919e-06, - "loss": 1.0115, - "step": 1519 - }, - { - "epoch": 0.13707895567479822, - "grad_norm": 0.909876211413722, - "learning_rate": 3.880978603037432e-06, - "loss": 0.874, - "step": 1520 - }, - { - "epoch": 0.13716913919826848, - "grad_norm": 1.4065995099924233, - "learning_rate": 3.880779994410209e-06, - "loss": 0.9953, - "step": 1521 - }, - { - "epoch": 0.13725932272173874, - "grad_norm": 1.6124454577780551, - "learning_rate": 3.880581225304466e-06, - "loss": 0.8671, - "step": 1522 - }, - { - "epoch": 0.137349506245209, - "grad_norm": 1.587662233488365, - "learning_rate": 3.880382295737163e-06, - "loss": 1.022, - "step": 1523 - }, - { - "epoch": 0.13743968976867926, - "grad_norm": 1.9977579066340037, - "learning_rate": 3.880183205725274e-06, - "loss": 0.9677, - "step": 1524 - }, - { - "epoch": 0.13752987329214952, - "grad_norm": 1.9183659346213382, - "learning_rate": 3.879983955285788e-06, - "loss": 0.9359, - "step": 1525 - }, - { - "epoch": 0.13762005681561978, - "grad_norm": 1.6426446744702738, - "learning_rate": 3.879784544435703e-06, - "loss": 0.9707, - "step": 1526 - }, - { - "epoch": 0.13771024033909005, - "grad_norm": 1.9366282070672334, - "learning_rate": 3.879584973192037e-06, - "loss": 0.9308, - "step": 1527 - }, - { - "epoch": 0.1378004238625603, - "grad_norm": 1.547584679155588, - "learning_rate": 3.8793852415718165e-06, - "loss": 0.9234, - "step": 1528 - }, - { - "epoch": 0.13789060738603057, - "grad_norm": 1.653418513816098, - "learning_rate": 3.879185349592085e-06, - "loss": 0.9949, - "step": 1529 - }, - { - "epoch": 0.13798079090950083, - "grad_norm": 1.277983665579819, - "learning_rate": 3.878985297269897e-06, - "loss": 1.0154, - "step": 1530 - }, - { - "epoch": 0.1380709744329711, - "grad_norm": 1.6587925953918945, - "learning_rate": 3.878785084622323e-06, - "loss": 1.0331, - "step": 1531 - }, - { - "epoch": 0.13816115795644135, - "grad_norm": 1.7993417807954695, - "learning_rate": 3.878584711666447e-06, - "loss": 1.1737, - "step": 1532 - }, - { - "epoch": 0.1382513414799116, - "grad_norm": 1.5510340563824598, - "learning_rate": 3.8783841784193635e-06, - "loss": 1.0112, - "step": 1533 - }, - { - "epoch": 0.13834152500338187, - "grad_norm": 1.4724436639394194, - "learning_rate": 3.8781834848981855e-06, - "loss": 0.972, - "step": 1534 - }, - { - "epoch": 0.13843170852685213, - "grad_norm": 1.510956249101008, - "learning_rate": 3.877982631120037e-06, - "loss": 0.9281, - "step": 1535 - }, - { - "epoch": 0.1385218920503224, - "grad_norm": 2.0549935812745472, - "learning_rate": 3.877781617102053e-06, - "loss": 1.0608, - "step": 1536 - }, - { - "epoch": 0.13861207557379268, - "grad_norm": 1.7027703712195816, - "learning_rate": 3.877580442861389e-06, - "loss": 1.0326, - "step": 1537 - }, - { - "epoch": 0.13870225909726294, - "grad_norm": 1.4815911942273867, - "learning_rate": 3.877379108415209e-06, - "loss": 1.0259, - "step": 1538 - }, - { - "epoch": 0.1387924426207332, - "grad_norm": 1.3767083998920886, - "learning_rate": 3.8771776137806915e-06, - "loss": 1.0168, - "step": 1539 - }, - { - "epoch": 0.13888262614420346, - "grad_norm": 1.6180405877566637, - "learning_rate": 3.8769759589750295e-06, - "loss": 0.9321, - "step": 1540 - }, - { - "epoch": 0.13897280966767372, - "grad_norm": 1.391180506133351, - "learning_rate": 3.876774144015429e-06, - "loss": 0.9984, - "step": 1541 - }, - { - "epoch": 0.13906299319114399, - "grad_norm": 1.8804649184995825, - "learning_rate": 3.87657216891911e-06, - "loss": 1.0248, - "step": 1542 - }, - { - "epoch": 0.13915317671461425, - "grad_norm": 1.6064321778290926, - "learning_rate": 3.876370033703307e-06, - "loss": 0.9938, - "step": 1543 - }, - { - "epoch": 0.1392433602380845, - "grad_norm": 1.7829493397411762, - "learning_rate": 3.876167738385265e-06, - "loss": 0.8931, - "step": 1544 - }, - { - "epoch": 0.13933354376155477, - "grad_norm": 2.0745709078998233, - "learning_rate": 3.875965282982247e-06, - "loss": 1.0456, - "step": 1545 - }, - { - "epoch": 0.13942372728502503, - "grad_norm": 1.4569242300048062, - "learning_rate": 3.875762667511528e-06, - "loss": 1.0049, - "step": 1546 - }, - { - "epoch": 0.1395139108084953, - "grad_norm": 1.4248989772122864, - "learning_rate": 3.875559891990394e-06, - "loss": 1.0087, - "step": 1547 - }, - { - "epoch": 0.13960409433196555, - "grad_norm": 1.8279028211384765, - "learning_rate": 3.875356956436149e-06, - "loss": 0.9642, - "step": 1548 - }, - { - "epoch": 0.1396942778554358, - "grad_norm": 1.5718401410160328, - "learning_rate": 3.875153860866108e-06, - "loss": 0.9778, - "step": 1549 - }, - { - "epoch": 0.13978446137890607, - "grad_norm": 1.700267809922238, - "learning_rate": 3.8749506052976e-06, - "loss": 1.0455, - "step": 1550 - }, - { - "epoch": 0.13987464490237633, - "grad_norm": 2.438833214431635, - "learning_rate": 3.874747189747968e-06, - "loss": 1.0247, - "step": 1551 - }, - { - "epoch": 0.1399648284258466, - "grad_norm": 1.5842668725912588, - "learning_rate": 3.874543614234568e-06, - "loss": 1.0068, - "step": 1552 - }, - { - "epoch": 0.14005501194931685, - "grad_norm": 1.3676172507474198, - "learning_rate": 3.874339878774771e-06, - "loss": 0.9774, - "step": 1553 - }, - { - "epoch": 0.14014519547278712, - "grad_norm": 1.5171952126618298, - "learning_rate": 3.874135983385961e-06, - "loss": 1.0014, - "step": 1554 - }, - { - "epoch": 0.14023537899625738, - "grad_norm": 1.5720403474332743, - "learning_rate": 3.873931928085535e-06, - "loss": 1.0022, - "step": 1555 - }, - { - "epoch": 0.14032556251972764, - "grad_norm": 1.2800202399382166, - "learning_rate": 3.873727712890904e-06, - "loss": 1.0755, - "step": 1556 - }, - { - "epoch": 0.1404157460431979, - "grad_norm": 1.3389783906282648, - "learning_rate": 3.873523337819493e-06, - "loss": 0.9665, - "step": 1557 - }, - { - "epoch": 0.14050592956666816, - "grad_norm": 1.257517098752331, - "learning_rate": 3.873318802888739e-06, - "loss": 0.9251, - "step": 1558 - }, - { - "epoch": 0.14059611309013842, - "grad_norm": 0.8472732170753239, - "learning_rate": 3.873114108116097e-06, - "loss": 0.7462, - "step": 1559 - }, - { - "epoch": 0.14068629661360868, - "grad_norm": 1.6888222635593353, - "learning_rate": 3.872909253519031e-06, - "loss": 1.0829, - "step": 1560 - }, - { - "epoch": 0.14077648013707897, - "grad_norm": 1.7198453014134927, - "learning_rate": 3.8727042391150195e-06, - "loss": 1.0721, - "step": 1561 - }, - { - "epoch": 0.14086666366054923, - "grad_norm": 1.5000507028275634, - "learning_rate": 3.872499064921556e-06, - "loss": 0.9306, - "step": 1562 - }, - { - "epoch": 0.1409568471840195, - "grad_norm": 1.5161481131516088, - "learning_rate": 3.872293730956149e-06, - "loss": 1.0098, - "step": 1563 - }, - { - "epoch": 0.14104703070748975, - "grad_norm": 1.7329993075915855, - "learning_rate": 3.872088237236316e-06, - "loss": 0.9894, - "step": 1564 - }, - { - "epoch": 0.14113721423096, - "grad_norm": 1.6708629075982113, - "learning_rate": 3.871882583779592e-06, - "loss": 0.9114, - "step": 1565 - }, - { - "epoch": 0.14122739775443027, - "grad_norm": 1.3514509210118162, - "learning_rate": 3.871676770603525e-06, - "loss": 0.9896, - "step": 1566 - }, - { - "epoch": 0.14131758127790053, - "grad_norm": 1.6412397413666773, - "learning_rate": 3.871470797725676e-06, - "loss": 0.9885, - "step": 1567 - }, - { - "epoch": 0.1414077648013708, - "grad_norm": 1.517471682867207, - "learning_rate": 3.8712646651636185e-06, - "loss": 1.0739, - "step": 1568 - }, - { - "epoch": 0.14149794832484106, - "grad_norm": 1.2156531774398154, - "learning_rate": 3.871058372934942e-06, - "loss": 0.989, - "step": 1569 - }, - { - "epoch": 0.14158813184831132, - "grad_norm": 1.3743667444776562, - "learning_rate": 3.8708519210572485e-06, - "loss": 0.8318, - "step": 1570 - }, - { - "epoch": 0.14167831537178158, - "grad_norm": 1.4821794032648425, - "learning_rate": 3.870645309548153e-06, - "loss": 0.9318, - "step": 1571 - }, - { - "epoch": 0.14176849889525184, - "grad_norm": 0.8729955670942254, - "learning_rate": 3.870438538425284e-06, - "loss": 0.7728, - "step": 1572 - }, - { - "epoch": 0.1418586824187221, - "grad_norm": 1.7650953865752146, - "learning_rate": 3.870231607706287e-06, - "loss": 0.9456, - "step": 1573 - }, - { - "epoch": 0.14194886594219236, - "grad_norm": 1.528761651906285, - "learning_rate": 3.870024517408817e-06, - "loss": 1.0007, - "step": 1574 - }, - { - "epoch": 0.14203904946566262, - "grad_norm": 1.9376066855314475, - "learning_rate": 3.8698172675505425e-06, - "loss": 0.9246, - "step": 1575 - }, - { - "epoch": 0.14212923298913288, - "grad_norm": 1.0981099558772967, - "learning_rate": 3.86960985814915e-06, - "loss": 0.8068, - "step": 1576 - }, - { - "epoch": 0.14221941651260314, - "grad_norm": 1.3553923219796264, - "learning_rate": 3.869402289222335e-06, - "loss": 0.9229, - "step": 1577 - }, - { - "epoch": 0.1423096000360734, - "grad_norm": 1.585284244188542, - "learning_rate": 3.869194560787808e-06, - "loss": 0.9751, - "step": 1578 - }, - { - "epoch": 0.14239978355954366, - "grad_norm": 3.650062738166799, - "learning_rate": 3.868986672863296e-06, - "loss": 0.9266, - "step": 1579 - }, - { - "epoch": 0.14248996708301392, - "grad_norm": 1.5287713990854293, - "learning_rate": 3.868778625466535e-06, - "loss": 1.0956, - "step": 1580 - }, - { - "epoch": 0.14258015060648419, - "grad_norm": 1.7272791691253857, - "learning_rate": 3.868570418615278e-06, - "loss": 0.9201, - "step": 1581 - }, - { - "epoch": 0.14267033412995445, - "grad_norm": 1.12936043760013, - "learning_rate": 3.8683620523272885e-06, - "loss": 0.7921, - "step": 1582 - }, - { - "epoch": 0.1427605176534247, - "grad_norm": 1.647150486411012, - "learning_rate": 3.8681535266203464e-06, - "loss": 1.102, - "step": 1583 - }, - { - "epoch": 0.14285070117689497, - "grad_norm": 1.5590348539732266, - "learning_rate": 3.867944841512246e-06, - "loss": 1.0243, - "step": 1584 - }, - { - "epoch": 0.14294088470036526, - "grad_norm": 1.7075101062927975, - "learning_rate": 3.867735997020791e-06, - "loss": 1.0078, - "step": 1585 - }, - { - "epoch": 0.14303106822383552, - "grad_norm": 1.609925999806673, - "learning_rate": 3.867526993163802e-06, - "loss": 0.8975, - "step": 1586 - }, - { - "epoch": 0.14312125174730578, - "grad_norm": 2.2641604195027614, - "learning_rate": 3.867317829959113e-06, - "loss": 0.8644, - "step": 1587 - }, - { - "epoch": 0.14321143527077604, - "grad_norm": 1.5681380974476151, - "learning_rate": 3.8671085074245704e-06, - "loss": 0.7954, - "step": 1588 - }, - { - "epoch": 0.1433016187942463, - "grad_norm": 1.6334124471357152, - "learning_rate": 3.866899025578035e-06, - "loss": 1.0565, - "step": 1589 - }, - { - "epoch": 0.14339180231771656, - "grad_norm": 1.404890293168774, - "learning_rate": 3.86668938443738e-06, - "loss": 0.9673, - "step": 1590 - }, - { - "epoch": 0.14348198584118682, - "grad_norm": 1.643568395873647, - "learning_rate": 3.866479584020495e-06, - "loss": 1.0112, - "step": 1591 - }, - { - "epoch": 0.14357216936465708, - "grad_norm": 1.5514391985531535, - "learning_rate": 3.866269624345279e-06, - "loss": 1.0299, - "step": 1592 - }, - { - "epoch": 0.14366235288812734, - "grad_norm": 1.5594908921592525, - "learning_rate": 3.866059505429649e-06, - "loss": 1.073, - "step": 1593 - }, - { - "epoch": 0.1437525364115976, - "grad_norm": 0.901050383832509, - "learning_rate": 3.865849227291532e-06, - "loss": 0.7938, - "step": 1594 - }, - { - "epoch": 0.14384271993506786, - "grad_norm": 1.6168663701202037, - "learning_rate": 3.865638789948872e-06, - "loss": 0.9917, - "step": 1595 - }, - { - "epoch": 0.14393290345853813, - "grad_norm": 1.7422478498603513, - "learning_rate": 3.865428193419622e-06, - "loss": 0.9564, - "step": 1596 - }, - { - "epoch": 0.14402308698200839, - "grad_norm": 1.600158671617608, - "learning_rate": 3.865217437721753e-06, - "loss": 0.984, - "step": 1597 - }, - { - "epoch": 0.14411327050547865, - "grad_norm": 1.5695766413928873, - "learning_rate": 3.865006522873249e-06, - "loss": 0.9291, - "step": 1598 - }, - { - "epoch": 0.1442034540289489, - "grad_norm": 1.666460413886395, - "learning_rate": 3.864795448892103e-06, - "loss": 0.9997, - "step": 1599 - }, - { - "epoch": 0.14429363755241917, - "grad_norm": 1.527771326494564, - "learning_rate": 3.864584215796327e-06, - "loss": 1.0409, - "step": 1600 - }, - { - "epoch": 0.14438382107588943, - "grad_norm": 1.5875746071172379, - "learning_rate": 3.8643728236039455e-06, - "loss": 1.0168, - "step": 1601 - }, - { - "epoch": 0.1444740045993597, - "grad_norm": 1.38267697462202, - "learning_rate": 3.864161272332994e-06, - "loss": 0.9766, - "step": 1602 - }, - { - "epoch": 0.14456418812282995, - "grad_norm": 1.4304598197054073, - "learning_rate": 3.863949562001524e-06, - "loss": 1.0396, - "step": 1603 - }, - { - "epoch": 0.1446543716463002, - "grad_norm": 1.492757558532295, - "learning_rate": 3.8637376926276005e-06, - "loss": 0.9696, - "step": 1604 - }, - { - "epoch": 0.14474455516977047, - "grad_norm": 1.8868385419074785, - "learning_rate": 3.8635256642293e-06, - "loss": 0.993, - "step": 1605 - }, - { - "epoch": 0.14483473869324073, - "grad_norm": 1.5436642395167393, - "learning_rate": 3.863313476824714e-06, - "loss": 0.9737, - "step": 1606 - }, - { - "epoch": 0.144924922216711, - "grad_norm": 1.7868373309418277, - "learning_rate": 3.863101130431948e-06, - "loss": 0.9688, - "step": 1607 - }, - { - "epoch": 0.14501510574018128, - "grad_norm": 1.4532354169343902, - "learning_rate": 3.862888625069121e-06, - "loss": 1.0323, - "step": 1608 - }, - { - "epoch": 0.14510528926365154, - "grad_norm": 1.314282886653133, - "learning_rate": 3.8626759607543645e-06, - "loss": 0.9647, - "step": 1609 - }, - { - "epoch": 0.1451954727871218, - "grad_norm": 2.4726717739009625, - "learning_rate": 3.862463137505825e-06, - "loss": 0.9394, - "step": 1610 - }, - { - "epoch": 0.14528565631059207, - "grad_norm": 1.518406390342289, - "learning_rate": 3.862250155341659e-06, - "loss": 0.9488, - "step": 1611 - }, - { - "epoch": 0.14537583983406233, - "grad_norm": 1.4359492354385623, - "learning_rate": 3.862037014280043e-06, - "loss": 1.1215, - "step": 1612 - }, - { - "epoch": 0.1454660233575326, - "grad_norm": 1.3202821175053392, - "learning_rate": 3.861823714339162e-06, - "loss": 0.9805, - "step": 1613 - }, - { - "epoch": 0.14555620688100285, - "grad_norm": 1.3108359641945821, - "learning_rate": 3.861610255537215e-06, - "loss": 0.9993, - "step": 1614 - }, - { - "epoch": 0.1456463904044731, - "grad_norm": 1.4427612882551621, - "learning_rate": 3.8613966378924165e-06, - "loss": 1.028, - "step": 1615 - }, - { - "epoch": 0.14573657392794337, - "grad_norm": 1.620810242646136, - "learning_rate": 3.861182861422993e-06, - "loss": 0.9401, - "step": 1616 - }, - { - "epoch": 0.14582675745141363, - "grad_norm": 1.3756758156125501, - "learning_rate": 3.860968926147185e-06, - "loss": 1.0679, - "step": 1617 - }, - { - "epoch": 0.1459169409748839, - "grad_norm": 4.742592305686216, - "learning_rate": 3.860754832083247e-06, - "loss": 0.8327, - "step": 1618 - }, - { - "epoch": 0.14600712449835415, - "grad_norm": 1.3533041432400619, - "learning_rate": 3.8605405792494475e-06, - "loss": 1.064, - "step": 1619 - }, - { - "epoch": 0.1460973080218244, - "grad_norm": 1.6329181399691197, - "learning_rate": 3.860326167664066e-06, - "loss": 1.0066, - "step": 1620 - }, - { - "epoch": 0.14618749154529467, - "grad_norm": 1.5032702401589375, - "learning_rate": 3.860111597345399e-06, - "loss": 0.984, - "step": 1621 - }, - { - "epoch": 0.14627767506876493, - "grad_norm": 2.072328557560837, - "learning_rate": 3.859896868311753e-06, - "loss": 0.856, - "step": 1622 - }, - { - "epoch": 0.1463678585922352, - "grad_norm": 1.3332690084358785, - "learning_rate": 3.859681980581452e-06, - "loss": 1.075, - "step": 1623 - }, - { - "epoch": 0.14645804211570546, - "grad_norm": 1.6617093350063485, - "learning_rate": 3.859466934172829e-06, - "loss": 0.9618, - "step": 1624 - }, - { - "epoch": 0.14654822563917572, - "grad_norm": 1.3321186383798762, - "learning_rate": 3.859251729104235e-06, - "loss": 0.9292, - "step": 1625 - }, - { - "epoch": 0.14663840916264598, - "grad_norm": 1.2743247749941933, - "learning_rate": 3.859036365394031e-06, - "loss": 0.94, - "step": 1626 - }, - { - "epoch": 0.14672859268611624, - "grad_norm": 1.5844649069292394, - "learning_rate": 3.858820843060594e-06, - "loss": 0.9776, - "step": 1627 - }, - { - "epoch": 0.1468187762095865, - "grad_norm": 1.441119157686484, - "learning_rate": 3.858605162122314e-06, - "loss": 0.9686, - "step": 1628 - }, - { - "epoch": 0.14690895973305676, - "grad_norm": 1.466718445571928, - "learning_rate": 3.858389322597592e-06, - "loss": 0.9424, - "step": 1629 - }, - { - "epoch": 0.14699914325652702, - "grad_norm": 1.9524026984710583, - "learning_rate": 3.858173324504847e-06, - "loss": 1.0043, - "step": 1630 - }, - { - "epoch": 0.14708932677999728, - "grad_norm": 1.4777708407568264, - "learning_rate": 3.857957167862508e-06, - "loss": 0.9513, - "step": 1631 - }, - { - "epoch": 0.14717951030346757, - "grad_norm": 1.0479340391949963, - "learning_rate": 3.857740852689018e-06, - "loss": 0.8193, - "step": 1632 - }, - { - "epoch": 0.14726969382693783, - "grad_norm": 1.756485841059335, - "learning_rate": 3.857524379002835e-06, - "loss": 0.9911, - "step": 1633 - }, - { - "epoch": 0.1473598773504081, - "grad_norm": 2.767562876937309, - "learning_rate": 3.85730774682243e-06, - "loss": 1.0159, - "step": 1634 - }, - { - "epoch": 0.14745006087387835, - "grad_norm": 1.484215255975752, - "learning_rate": 3.8570909561662875e-06, - "loss": 1.0621, - "step": 1635 - }, - { - "epoch": 0.1475402443973486, - "grad_norm": 1.4694094699206066, - "learning_rate": 3.8568740070529045e-06, - "loss": 1.0694, - "step": 1636 - }, - { - "epoch": 0.14763042792081887, - "grad_norm": 1.6036655667230586, - "learning_rate": 3.856656899500792e-06, - "loss": 0.9527, - "step": 1637 - }, - { - "epoch": 0.14772061144428913, - "grad_norm": 1.6053993971201315, - "learning_rate": 3.856439633528476e-06, - "loss": 0.9238, - "step": 1638 - }, - { - "epoch": 0.1478107949677594, - "grad_norm": 2.072269421716892, - "learning_rate": 3.856222209154494e-06, - "loss": 1.0449, - "step": 1639 - }, - { - "epoch": 0.14790097849122966, - "grad_norm": 1.4896840770594886, - "learning_rate": 3.856004626397397e-06, - "loss": 1.0143, - "step": 1640 - }, - { - "epoch": 0.14799116201469992, - "grad_norm": 1.547999118065398, - "learning_rate": 3.855786885275753e-06, - "loss": 0.9635, - "step": 1641 - }, - { - "epoch": 0.14808134553817018, - "grad_norm": 1.871851057049145, - "learning_rate": 3.855568985808138e-06, - "loss": 0.9083, - "step": 1642 - }, - { - "epoch": 0.14817152906164044, - "grad_norm": 1.7336863619807763, - "learning_rate": 3.855350928013145e-06, - "loss": 0.954, - "step": 1643 - }, - { - "epoch": 0.1482617125851107, - "grad_norm": 1.4733731980921614, - "learning_rate": 3.8551327119093825e-06, - "loss": 0.999, - "step": 1644 - }, - { - "epoch": 0.14835189610858096, - "grad_norm": 1.6456394403260888, - "learning_rate": 3.854914337515467e-06, - "loss": 0.9302, - "step": 1645 - }, - { - "epoch": 0.14844207963205122, - "grad_norm": 1.4559759250473983, - "learning_rate": 3.8546958048500324e-06, - "loss": 0.8833, - "step": 1646 - }, - { - "epoch": 0.14853226315552148, - "grad_norm": 0.762271660869504, - "learning_rate": 3.854477113931725e-06, - "loss": 0.8455, - "step": 1647 - }, - { - "epoch": 0.14862244667899174, - "grad_norm": 1.5171391111096226, - "learning_rate": 3.854258264779205e-06, - "loss": 1.0087, - "step": 1648 - }, - { - "epoch": 0.148712630202462, - "grad_norm": 1.413608077571072, - "learning_rate": 3.854039257411145e-06, - "loss": 0.9646, - "step": 1649 - }, - { - "epoch": 0.14880281372593226, - "grad_norm": 1.6782676906053922, - "learning_rate": 3.853820091846232e-06, - "loss": 0.9513, - "step": 1650 - }, - { - "epoch": 0.14889299724940253, - "grad_norm": 1.5004006486521626, - "learning_rate": 3.853600768103169e-06, - "loss": 0.9899, - "step": 1651 - }, - { - "epoch": 0.1489831807728728, - "grad_norm": 1.8144804559297882, - "learning_rate": 3.853381286200667e-06, - "loss": 0.9752, - "step": 1652 - }, - { - "epoch": 0.14907336429634305, - "grad_norm": 1.5730778272646633, - "learning_rate": 3.853161646157453e-06, - "loss": 1.0744, - "step": 1653 - }, - { - "epoch": 0.1491635478198133, - "grad_norm": 2.918697812514404, - "learning_rate": 3.852941847992269e-06, - "loss": 0.9545, - "step": 1654 - }, - { - "epoch": 0.14925373134328357, - "grad_norm": 7.9086300128669516, - "learning_rate": 3.852721891723871e-06, - "loss": 0.99, - "step": 1655 - }, - { - "epoch": 0.14934391486675386, - "grad_norm": 1.56751978385645, - "learning_rate": 3.852501777371025e-06, - "loss": 0.9674, - "step": 1656 - }, - { - "epoch": 0.14943409839022412, - "grad_norm": 1.4866592499557947, - "learning_rate": 3.8522815049525125e-06, - "loss": 1.0213, - "step": 1657 - }, - { - "epoch": 0.14952428191369438, - "grad_norm": 1.5380331557163798, - "learning_rate": 3.852061074487129e-06, - "loss": 1.0434, - "step": 1658 - }, - { - "epoch": 0.14961446543716464, - "grad_norm": 1.4971396672158697, - "learning_rate": 3.851840485993682e-06, - "loss": 0.8953, - "step": 1659 - }, - { - "epoch": 0.1497046489606349, - "grad_norm": 1.3893846273496242, - "learning_rate": 3.851619739490994e-06, - "loss": 0.9885, - "step": 1660 - }, - { - "epoch": 0.14979483248410516, - "grad_norm": 1.4350718017053379, - "learning_rate": 3.8513988349978996e-06, - "loss": 1.0156, - "step": 1661 - }, - { - "epoch": 0.14988501600757542, - "grad_norm": 1.4854621519995699, - "learning_rate": 3.851177772533249e-06, - "loss": 1.0151, - "step": 1662 - }, - { - "epoch": 0.14997519953104568, - "grad_norm": 1.9720226516699397, - "learning_rate": 3.850956552115903e-06, - "loss": 1.1515, - "step": 1663 - }, - { - "epoch": 0.15006538305451594, - "grad_norm": 0.8978548565601452, - "learning_rate": 3.850735173764738e-06, - "loss": 0.802, - "step": 1664 - }, - { - "epoch": 0.1501555665779862, - "grad_norm": 1.7888492640390707, - "learning_rate": 3.850513637498642e-06, - "loss": 1.0141, - "step": 1665 - }, - { - "epoch": 0.15024575010145647, - "grad_norm": 1.7856906616828099, - "learning_rate": 3.850291943336521e-06, - "loss": 1.0122, - "step": 1666 - }, - { - "epoch": 0.15033593362492673, - "grad_norm": 1.7240327279287955, - "learning_rate": 3.850070091297287e-06, - "loss": 1.04, - "step": 1667 - }, - { - "epoch": 0.150426117148397, - "grad_norm": 0.928261408430863, - "learning_rate": 3.8498480813998735e-06, - "loss": 0.8649, - "step": 1668 - }, - { - "epoch": 0.15051630067186725, - "grad_norm": 1.5140562295537805, - "learning_rate": 3.84962591366322e-06, - "loss": 0.9888, - "step": 1669 - }, - { - "epoch": 0.1506064841953375, - "grad_norm": 1.8413268507945322, - "learning_rate": 3.8494035881062855e-06, - "loss": 1.0356, - "step": 1670 - }, - { - "epoch": 0.15069666771880777, - "grad_norm": 1.6822881393474374, - "learning_rate": 3.84918110474804e-06, - "loss": 0.9668, - "step": 1671 - }, - { - "epoch": 0.15078685124227803, - "grad_norm": 1.4905500765030857, - "learning_rate": 3.8489584636074655e-06, - "loss": 0.9744, - "step": 1672 - }, - { - "epoch": 0.1508770347657483, - "grad_norm": 3.8015084585416576, - "learning_rate": 3.848735664703561e-06, - "loss": 0.7903, - "step": 1673 - }, - { - "epoch": 0.15096721828921855, - "grad_norm": 1.37094189272947, - "learning_rate": 3.8485127080553346e-06, - "loss": 0.9837, - "step": 1674 - }, - { - "epoch": 0.1510574018126888, - "grad_norm": 1.5719181032129563, - "learning_rate": 3.8482895936818115e-06, - "loss": 0.9823, - "step": 1675 - }, - { - "epoch": 0.15114758533615907, - "grad_norm": 1.555761225288863, - "learning_rate": 3.848066321602029e-06, - "loss": 1.0064, - "step": 1676 - }, - { - "epoch": 0.15123776885962933, - "grad_norm": 1.575060522339568, - "learning_rate": 3.847842891835038e-06, - "loss": 0.9527, - "step": 1677 - }, - { - "epoch": 0.1513279523830996, - "grad_norm": 0.7369345841329464, - "learning_rate": 3.847619304399902e-06, - "loss": 0.8062, - "step": 1678 - }, - { - "epoch": 0.15141813590656986, - "grad_norm": 1.484724866894249, - "learning_rate": 3.8473955593157e-06, - "loss": 1.0147, - "step": 1679 - }, - { - "epoch": 0.15150831943004014, - "grad_norm": 1.5014592066713286, - "learning_rate": 3.847171656601522e-06, - "loss": 0.955, - "step": 1680 - }, - { - "epoch": 0.1515985029535104, - "grad_norm": 1.4877013030200137, - "learning_rate": 3.846947596276473e-06, - "loss": 1.01, - "step": 1681 - }, - { - "epoch": 0.15168868647698067, - "grad_norm": 1.64671930376987, - "learning_rate": 3.846723378359672e-06, - "loss": 1.0338, - "step": 1682 - }, - { - "epoch": 0.15177887000045093, - "grad_norm": 1.5912380243826154, - "learning_rate": 3.846499002870249e-06, - "loss": 0.9936, - "step": 1683 - }, - { - "epoch": 0.1518690535239212, - "grad_norm": 1.6666473228603607, - "learning_rate": 3.846274469827349e-06, - "loss": 1.002, - "step": 1684 - }, - { - "epoch": 0.15195923704739145, - "grad_norm": 1.5884884768059964, - "learning_rate": 3.846049779250132e-06, - "loss": 0.9979, - "step": 1685 - }, - { - "epoch": 0.1520494205708617, - "grad_norm": 0.8206430768174132, - "learning_rate": 3.845824931157769e-06, - "loss": 0.8477, - "step": 1686 - }, - { - "epoch": 0.15213960409433197, - "grad_norm": 1.3531057995145357, - "learning_rate": 3.845599925569444e-06, - "loss": 0.98, - "step": 1687 - }, - { - "epoch": 0.15222978761780223, - "grad_norm": 1.7147650559428016, - "learning_rate": 3.845374762504357e-06, - "loss": 0.9504, - "step": 1688 - }, - { - "epoch": 0.1523199711412725, - "grad_norm": 1.7252073191023383, - "learning_rate": 3.8451494419817204e-06, - "loss": 1.0361, - "step": 1689 - }, - { - "epoch": 0.15241015466474275, - "grad_norm": 1.9967542775992955, - "learning_rate": 3.8449239640207594e-06, - "loss": 0.9222, - "step": 1690 - }, - { - "epoch": 0.152500338188213, - "grad_norm": 1.7027466379954934, - "learning_rate": 3.844698328640713e-06, - "loss": 1.0197, - "step": 1691 - }, - { - "epoch": 0.15259052171168327, - "grad_norm": 1.2980408541240986, - "learning_rate": 3.844472535860833e-06, - "loss": 0.9996, - "step": 1692 - }, - { - "epoch": 0.15268070523515354, - "grad_norm": 1.954758655158637, - "learning_rate": 3.8442465857003864e-06, - "loss": 1.0428, - "step": 1693 - }, - { - "epoch": 0.1527708887586238, - "grad_norm": 1.477918053106463, - "learning_rate": 3.844020478178653e-06, - "loss": 0.9338, - "step": 1694 - }, - { - "epoch": 0.15286107228209406, - "grad_norm": 1.6351452093940904, - "learning_rate": 3.843794213314923e-06, - "loss": 1.0164, - "step": 1695 - }, - { - "epoch": 0.15295125580556432, - "grad_norm": 1.5626705839500759, - "learning_rate": 3.843567791128505e-06, - "loss": 1.0192, - "step": 1696 - }, - { - "epoch": 0.15304143932903458, - "grad_norm": 1.377829328379277, - "learning_rate": 3.843341211638717e-06, - "loss": 0.9754, - "step": 1697 - }, - { - "epoch": 0.15313162285250484, - "grad_norm": 1.5810212101729064, - "learning_rate": 3.843114474864894e-06, - "loss": 0.9479, - "step": 1698 - }, - { - "epoch": 0.1532218063759751, - "grad_norm": 1.5572572106809766, - "learning_rate": 3.84288758082638e-06, - "loss": 0.9872, - "step": 1699 - }, - { - "epoch": 0.15331198989944536, - "grad_norm": 1.4268891511032429, - "learning_rate": 3.842660529542536e-06, - "loss": 1.0726, - "step": 1700 - }, - { - "epoch": 0.15340217342291562, - "grad_norm": 1.4778091577115804, - "learning_rate": 3.842433321032736e-06, - "loss": 1.0047, - "step": 1701 - }, - { - "epoch": 0.15349235694638588, - "grad_norm": 1.394619337874076, - "learning_rate": 3.842205955316365e-06, - "loss": 0.9156, - "step": 1702 - }, - { - "epoch": 0.15358254046985614, - "grad_norm": 1.7365350417608703, - "learning_rate": 3.8419784324128256e-06, - "loss": 0.9001, - "step": 1703 - }, - { - "epoch": 0.15367272399332643, - "grad_norm": 1.5667113199701725, - "learning_rate": 3.841750752341529e-06, - "loss": 0.9583, - "step": 1704 - }, - { - "epoch": 0.1537629075167967, - "grad_norm": 1.5763743293003951, - "learning_rate": 3.841522915121902e-06, - "loss": 0.9866, - "step": 1705 - }, - { - "epoch": 0.15385309104026695, - "grad_norm": 1.4024219544262833, - "learning_rate": 3.841294920773387e-06, - "loss": 0.9203, - "step": 1706 - }, - { - "epoch": 0.15394327456373721, - "grad_norm": 1.7716464102093332, - "learning_rate": 3.841066769315436e-06, - "loss": 1.0038, - "step": 1707 - }, - { - "epoch": 0.15403345808720748, - "grad_norm": 1.4104086103260893, - "learning_rate": 3.840838460767518e-06, - "loss": 0.9983, - "step": 1708 - }, - { - "epoch": 0.15412364161067774, - "grad_norm": 1.61660815195324, - "learning_rate": 3.840609995149111e-06, - "loss": 0.9594, - "step": 1709 - }, - { - "epoch": 0.154213825134148, - "grad_norm": 1.6076083069380978, - "learning_rate": 3.84038137247971e-06, - "loss": 0.9866, - "step": 1710 - }, - { - "epoch": 0.15430400865761826, - "grad_norm": 1.42192874010416, - "learning_rate": 3.840152592778823e-06, - "loss": 0.9719, - "step": 1711 - }, - { - "epoch": 0.15439419218108852, - "grad_norm": 1.3207649042753284, - "learning_rate": 3.83992365606597e-06, - "loss": 1.033, - "step": 1712 - }, - { - "epoch": 0.15448437570455878, - "grad_norm": 1.7299639307793342, - "learning_rate": 3.8396945623606855e-06, - "loss": 0.944, - "step": 1713 - }, - { - "epoch": 0.15457455922802904, - "grad_norm": 1.3438397754576439, - "learning_rate": 3.8394653116825174e-06, - "loss": 0.9496, - "step": 1714 - }, - { - "epoch": 0.1546647427514993, - "grad_norm": 1.666605980086141, - "learning_rate": 3.839235904051026e-06, - "loss": 0.9762, - "step": 1715 - }, - { - "epoch": 0.15475492627496956, - "grad_norm": 1.5067133084113569, - "learning_rate": 3.8390063394857855e-06, - "loss": 0.9642, - "step": 1716 - }, - { - "epoch": 0.15484510979843982, - "grad_norm": 1.2139339658823907, - "learning_rate": 3.838776618006385e-06, - "loss": 1.0687, - "step": 1717 - }, - { - "epoch": 0.15493529332191008, - "grad_norm": 1.3541228898626279, - "learning_rate": 3.838546739632423e-06, - "loss": 0.9954, - "step": 1718 - }, - { - "epoch": 0.15502547684538034, - "grad_norm": 1.392128249768902, - "learning_rate": 3.838316704383517e-06, - "loss": 1.0388, - "step": 1719 - }, - { - "epoch": 0.1551156603688506, - "grad_norm": 1.550993782296621, - "learning_rate": 3.838086512279292e-06, - "loss": 0.99, - "step": 1720 - }, - { - "epoch": 0.15520584389232087, - "grad_norm": 1.861410165201599, - "learning_rate": 3.837856163339391e-06, - "loss": 0.9777, - "step": 1721 - }, - { - "epoch": 0.15529602741579113, - "grad_norm": 1.386676927392545, - "learning_rate": 3.837625657583469e-06, - "loss": 0.9139, - "step": 1722 - }, - { - "epoch": 0.1553862109392614, - "grad_norm": 1.429727105597891, - "learning_rate": 3.837394995031193e-06, - "loss": 0.942, - "step": 1723 - }, - { - "epoch": 0.15547639446273165, - "grad_norm": 1.4590302936319026, - "learning_rate": 3.837164175702245e-06, - "loss": 0.9942, - "step": 1724 - }, - { - "epoch": 0.1555665779862019, - "grad_norm": 1.7870848273587299, - "learning_rate": 3.836933199616319e-06, - "loss": 0.8981, - "step": 1725 - }, - { - "epoch": 0.15565676150967217, - "grad_norm": 1.3904836347070257, - "learning_rate": 3.836702066793124e-06, - "loss": 1.0556, - "step": 1726 - }, - { - "epoch": 0.15574694503314246, - "grad_norm": 1.4829239026620435, - "learning_rate": 3.836470777252381e-06, - "loss": 1.0461, - "step": 1727 - }, - { - "epoch": 0.15583712855661272, - "grad_norm": 1.7072154628775573, - "learning_rate": 3.836239331013825e-06, - "loss": 1.004, - "step": 1728 - }, - { - "epoch": 0.15592731208008298, - "grad_norm": 1.7238266617046745, - "learning_rate": 3.836007728097205e-06, - "loss": 0.9491, - "step": 1729 - }, - { - "epoch": 0.15601749560355324, - "grad_norm": 1.307111487060577, - "learning_rate": 3.835775968522282e-06, - "loss": 0.9466, - "step": 1730 - }, - { - "epoch": 0.1561076791270235, - "grad_norm": 1.7059724435268935, - "learning_rate": 3.83554405230883e-06, - "loss": 1.0026, - "step": 1731 - }, - { - "epoch": 0.15619786265049376, - "grad_norm": 1.2228520148288176, - "learning_rate": 3.835311979476639e-06, - "loss": 0.971, - "step": 1732 - }, - { - "epoch": 0.15628804617396402, - "grad_norm": 1.4169740997779487, - "learning_rate": 3.83507975004551e-06, - "loss": 0.8878, - "step": 1733 - }, - { - "epoch": 0.15637822969743428, - "grad_norm": 1.5844981610283573, - "learning_rate": 3.834847364035258e-06, - "loss": 0.9664, - "step": 1734 - }, - { - "epoch": 0.15646841322090455, - "grad_norm": 1.611492726559607, - "learning_rate": 3.834614821465712e-06, - "loss": 0.923, - "step": 1735 - }, - { - "epoch": 0.1565585967443748, - "grad_norm": 1.477221710151111, - "learning_rate": 3.834382122356713e-06, - "loss": 1.0234, - "step": 1736 - }, - { - "epoch": 0.15664878026784507, - "grad_norm": 1.4669107324370787, - "learning_rate": 3.834149266728117e-06, - "loss": 0.9486, - "step": 1737 - }, - { - "epoch": 0.15673896379131533, - "grad_norm": 0.7975175548283557, - "learning_rate": 3.833916254599792e-06, - "loss": 0.8532, - "step": 1738 - }, - { - "epoch": 0.1568291473147856, - "grad_norm": 0.7284024229895498, - "learning_rate": 3.83368308599162e-06, - "loss": 0.8092, - "step": 1739 - }, - { - "epoch": 0.15691933083825585, - "grad_norm": 0.6736913348686085, - "learning_rate": 3.833449760923498e-06, - "loss": 0.7586, - "step": 1740 - }, - { - "epoch": 0.1570095143617261, - "grad_norm": 1.4536306670420658, - "learning_rate": 3.83321627941533e-06, - "loss": 1.0286, - "step": 1741 - }, - { - "epoch": 0.15709969788519637, - "grad_norm": 1.8408203125, - "learning_rate": 3.832982641487043e-06, - "loss": 1.0413, - "step": 1742 - }, - { - "epoch": 0.15718988140866663, - "grad_norm": 1.456056488670499, - "learning_rate": 3.832748847158568e-06, - "loss": 1.0725, - "step": 1743 - }, - { - "epoch": 0.1572800649321369, - "grad_norm": 1.3964892834130302, - "learning_rate": 3.832514896449858e-06, - "loss": 1.01, - "step": 1744 - }, - { - "epoch": 0.15737024845560715, - "grad_norm": 1.493718984114287, - "learning_rate": 3.832280789380871e-06, - "loss": 0.9218, - "step": 1745 - }, - { - "epoch": 0.15746043197907741, - "grad_norm": 1.5478904550762427, - "learning_rate": 3.832046525971584e-06, - "loss": 0.8882, - "step": 1746 - }, - { - "epoch": 0.15755061550254768, - "grad_norm": 1.543081776788835, - "learning_rate": 3.831812106241987e-06, - "loss": 1.0415, - "step": 1747 - }, - { - "epoch": 0.15764079902601794, - "grad_norm": 1.4285378758713654, - "learning_rate": 3.8315775302120796e-06, - "loss": 0.9043, - "step": 1748 - }, - { - "epoch": 0.1577309825494882, - "grad_norm": 1.3591245223168795, - "learning_rate": 3.831342797901878e-06, - "loss": 1.0005, - "step": 1749 - }, - { - "epoch": 0.15782116607295846, - "grad_norm": 0.7454602013191539, - "learning_rate": 3.831107909331411e-06, - "loss": 0.838, - "step": 1750 - }, - { - "epoch": 0.15791134959642875, - "grad_norm": 1.6862681095629786, - "learning_rate": 3.830872864520721e-06, - "loss": 1.0287, - "step": 1751 - }, - { - "epoch": 0.158001533119899, - "grad_norm": 0.8131075568237749, - "learning_rate": 3.830637663489862e-06, - "loss": 0.8955, - "step": 1752 - }, - { - "epoch": 0.15809171664336927, - "grad_norm": 0.8013792456831423, - "learning_rate": 3.830402306258904e-06, - "loss": 0.9283, - "step": 1753 - }, - { - "epoch": 0.15818190016683953, - "grad_norm": 1.639475247004593, - "learning_rate": 3.830166792847929e-06, - "loss": 0.9764, - "step": 1754 - }, - { - "epoch": 0.1582720836903098, - "grad_norm": 1.5098309225196567, - "learning_rate": 3.829931123277031e-06, - "loss": 0.9262, - "step": 1755 - }, - { - "epoch": 0.15836226721378005, - "grad_norm": 1.3960477132950455, - "learning_rate": 3.8296952975663204e-06, - "loss": 0.9743, - "step": 1756 - }, - { - "epoch": 0.1584524507372503, - "grad_norm": 1.3788920717888293, - "learning_rate": 3.829459315735918e-06, - "loss": 1.0483, - "step": 1757 - }, - { - "epoch": 0.15854263426072057, - "grad_norm": 1.5319655945602237, - "learning_rate": 3.829223177805959e-06, - "loss": 0.9116, - "step": 1758 - }, - { - "epoch": 0.15863281778419083, - "grad_norm": 1.465606002847172, - "learning_rate": 3.828986883796591e-06, - "loss": 1.0252, - "step": 1759 - }, - { - "epoch": 0.1587230013076611, - "grad_norm": 1.4428140025454292, - "learning_rate": 3.828750433727979e-06, - "loss": 0.9731, - "step": 1760 - }, - { - "epoch": 0.15881318483113135, - "grad_norm": 1.5426804671603043, - "learning_rate": 3.828513827620296e-06, - "loss": 1.0313, - "step": 1761 - }, - { - "epoch": 0.15890336835460162, - "grad_norm": 1.594779336666961, - "learning_rate": 3.82827706549373e-06, - "loss": 1.0397, - "step": 1762 - }, - { - "epoch": 0.15899355187807188, - "grad_norm": 1.6286437191251575, - "learning_rate": 3.828040147368484e-06, - "loss": 0.9713, - "step": 1763 - }, - { - "epoch": 0.15908373540154214, - "grad_norm": 1.3910646493698635, - "learning_rate": 3.827803073264774e-06, - "loss": 0.9811, - "step": 1764 - }, - { - "epoch": 0.1591739189250124, - "grad_norm": 1.5017390501646513, - "learning_rate": 3.827565843202826e-06, - "loss": 0.981, - "step": 1765 - }, - { - "epoch": 0.15926410244848266, - "grad_norm": 1.6612766927924014, - "learning_rate": 3.827328457202884e-06, - "loss": 0.8632, - "step": 1766 - }, - { - "epoch": 0.15935428597195292, - "grad_norm": 1.2814683960654047, - "learning_rate": 3.8270909152852014e-06, - "loss": 0.9172, - "step": 1767 - }, - { - "epoch": 0.15944446949542318, - "grad_norm": 1.7949797877286786, - "learning_rate": 3.826853217470048e-06, - "loss": 0.9868, - "step": 1768 - }, - { - "epoch": 0.15953465301889344, - "grad_norm": 1.4504506923443627, - "learning_rate": 3.826615363777705e-06, - "loss": 1.0586, - "step": 1769 - }, - { - "epoch": 0.1596248365423637, - "grad_norm": 1.4237332619879657, - "learning_rate": 3.826377354228468e-06, - "loss": 0.9637, - "step": 1770 - }, - { - "epoch": 0.15971502006583396, - "grad_norm": 0.8581115104242935, - "learning_rate": 3.826139188842643e-06, - "loss": 0.8154, - "step": 1771 - }, - { - "epoch": 0.15980520358930422, - "grad_norm": 1.3302811567087878, - "learning_rate": 3.825900867640554e-06, - "loss": 1.0416, - "step": 1772 - }, - { - "epoch": 0.15989538711277448, - "grad_norm": 1.4708963497736147, - "learning_rate": 3.825662390642535e-06, - "loss": 1.0248, - "step": 1773 - }, - { - "epoch": 0.15998557063624474, - "grad_norm": 1.5635685890653443, - "learning_rate": 3.825423757868934e-06, - "loss": 1.0834, - "step": 1774 - }, - { - "epoch": 0.16007575415971503, - "grad_norm": 1.53282854886456, - "learning_rate": 3.825184969340114e-06, - "loss": 1.0115, - "step": 1775 - }, - { - "epoch": 0.1601659376831853, - "grad_norm": 1.2880904723254614, - "learning_rate": 3.824946025076447e-06, - "loss": 1.0099, - "step": 1776 - }, - { - "epoch": 0.16025612120665556, - "grad_norm": 1.6110086390182565, - "learning_rate": 3.824706925098323e-06, - "loss": 0.9284, - "step": 1777 - }, - { - "epoch": 0.16034630473012582, - "grad_norm": 1.915260344953026, - "learning_rate": 3.824467669426143e-06, - "loss": 1.0347, - "step": 1778 - }, - { - "epoch": 0.16043648825359608, - "grad_norm": 1.506158029747988, - "learning_rate": 3.824228258080321e-06, - "loss": 0.9481, - "step": 1779 - }, - { - "epoch": 0.16052667177706634, - "grad_norm": 1.600467661222538, - "learning_rate": 3.823988691081285e-06, - "loss": 0.9004, - "step": 1780 - }, - { - "epoch": 0.1606168553005366, - "grad_norm": 1.3507869457788884, - "learning_rate": 3.823748968449478e-06, - "loss": 1.012, - "step": 1781 - }, - { - "epoch": 0.16070703882400686, - "grad_norm": 1.5730803280331314, - "learning_rate": 3.823509090205352e-06, - "loss": 0.8798, - "step": 1782 - }, - { - "epoch": 0.16079722234747712, - "grad_norm": 2.4310762907208265, - "learning_rate": 3.823269056369376e-06, - "loss": 0.9335, - "step": 1783 - }, - { - "epoch": 0.16088740587094738, - "grad_norm": 1.343335730989902, - "learning_rate": 3.8230288669620295e-06, - "loss": 1.0118, - "step": 1784 - }, - { - "epoch": 0.16097758939441764, - "grad_norm": 1.3244146241565318, - "learning_rate": 3.822788522003809e-06, - "loss": 0.9865, - "step": 1785 - }, - { - "epoch": 0.1610677729178879, - "grad_norm": 1.7300305640547153, - "learning_rate": 3.822548021515221e-06, - "loss": 1.0552, - "step": 1786 - }, - { - "epoch": 0.16115795644135816, - "grad_norm": 1.401626105763033, - "learning_rate": 3.822307365516787e-06, - "loss": 0.9579, - "step": 1787 - }, - { - "epoch": 0.16124813996482842, - "grad_norm": 1.6274972946683604, - "learning_rate": 3.8220665540290395e-06, - "loss": 0.9993, - "step": 1788 - }, - { - "epoch": 0.16133832348829868, - "grad_norm": 1.474552299961261, - "learning_rate": 3.8218255870725265e-06, - "loss": 1.0448, - "step": 1789 - }, - { - "epoch": 0.16142850701176895, - "grad_norm": 1.4162031799280705, - "learning_rate": 3.82158446466781e-06, - "loss": 0.9302, - "step": 1790 - }, - { - "epoch": 0.1615186905352392, - "grad_norm": 1.4048524057509653, - "learning_rate": 3.821343186835462e-06, - "loss": 0.9942, - "step": 1791 - }, - { - "epoch": 0.16160887405870947, - "grad_norm": 1.3810056530686898, - "learning_rate": 3.821101753596072e-06, - "loss": 1.0517, - "step": 1792 - }, - { - "epoch": 0.16169905758217973, - "grad_norm": 1.6401009585544113, - "learning_rate": 3.820860164970237e-06, - "loss": 0.9722, - "step": 1793 - }, - { - "epoch": 0.16178924110565, - "grad_norm": 1.3101437171696, - "learning_rate": 3.820618420978574e-06, - "loss": 1.0543, - "step": 1794 - }, - { - "epoch": 0.16187942462912025, - "grad_norm": 1.824658163272239, - "learning_rate": 3.820376521641708e-06, - "loss": 0.9762, - "step": 1795 - }, - { - "epoch": 0.1619696081525905, - "grad_norm": 1.4873110995840113, - "learning_rate": 3.82013446698028e-06, - "loss": 0.9943, - "step": 1796 - }, - { - "epoch": 0.16205979167606077, - "grad_norm": 1.595644329953581, - "learning_rate": 3.819892257014943e-06, - "loss": 0.9438, - "step": 1797 - }, - { - "epoch": 0.16214997519953103, - "grad_norm": 1.451821347451039, - "learning_rate": 3.819649891766364e-06, - "loss": 1.0077, - "step": 1798 - }, - { - "epoch": 0.16224015872300132, - "grad_norm": 1.4132088460769006, - "learning_rate": 3.819407371255222e-06, - "loss": 0.9696, - "step": 1799 - }, - { - "epoch": 0.16233034224647158, - "grad_norm": 1.44091904425896, - "learning_rate": 3.819164695502212e-06, - "loss": 0.9858, - "step": 1800 - }, - { - "epoch": 0.16242052576994184, - "grad_norm": 1.5378041036431267, - "learning_rate": 3.818921864528039e-06, - "loss": 0.9291, - "step": 1801 - }, - { - "epoch": 0.1625107092934121, - "grad_norm": 1.705226057174501, - "learning_rate": 3.818678878353423e-06, - "loss": 1.0387, - "step": 1802 - }, - { - "epoch": 0.16260089281688236, - "grad_norm": 1.5346383733077358, - "learning_rate": 3.818435736999097e-06, - "loss": 0.9335, - "step": 1803 - }, - { - "epoch": 0.16269107634035262, - "grad_norm": 1.4746090516348984, - "learning_rate": 3.818192440485807e-06, - "loss": 0.9803, - "step": 1804 - }, - { - "epoch": 0.16278125986382289, - "grad_norm": 1.8357867239866335, - "learning_rate": 3.817948988834314e-06, - "loss": 0.923, - "step": 1805 - }, - { - "epoch": 0.16287144338729315, - "grad_norm": 1.2918877822546282, - "learning_rate": 3.817705382065388e-06, - "loss": 0.981, - "step": 1806 - }, - { - "epoch": 0.1629616269107634, - "grad_norm": 1.2196722209422028, - "learning_rate": 3.8174616201998155e-06, - "loss": 0.9525, - "step": 1807 - }, - { - "epoch": 0.16305181043423367, - "grad_norm": 1.7794060783499368, - "learning_rate": 3.817217703258397e-06, - "loss": 1.0233, - "step": 1808 - }, - { - "epoch": 0.16314199395770393, - "grad_norm": 1.3333691453097993, - "learning_rate": 3.816973631261943e-06, - "loss": 0.9725, - "step": 1809 - }, - { - "epoch": 0.1632321774811742, - "grad_norm": 1.4710912508950909, - "learning_rate": 3.816729404231281e-06, - "loss": 1.01, - "step": 1810 - }, - { - "epoch": 0.16332236100464445, - "grad_norm": 2.2319540499807755, - "learning_rate": 3.816485022187249e-06, - "loss": 1.048, - "step": 1811 - }, - { - "epoch": 0.1634125445281147, - "grad_norm": 1.5114067603403911, - "learning_rate": 3.816240485150698e-06, - "loss": 0.9313, - "step": 1812 - }, - { - "epoch": 0.16350272805158497, - "grad_norm": 1.3497475582326108, - "learning_rate": 3.815995793142495e-06, - "loss": 0.9778, - "step": 1813 - }, - { - "epoch": 0.16359291157505523, - "grad_norm": 1.667936588307831, - "learning_rate": 3.815750946183518e-06, - "loss": 0.922, - "step": 1814 - }, - { - "epoch": 0.1636830950985255, - "grad_norm": 1.4856148611195379, - "learning_rate": 3.815505944294658e-06, - "loss": 1.0348, - "step": 1815 - }, - { - "epoch": 0.16377327862199575, - "grad_norm": 1.224189893313056, - "learning_rate": 3.81526078749682e-06, - "loss": 0.9387, - "step": 1816 - }, - { - "epoch": 0.16386346214546602, - "grad_norm": 1.4786492190356413, - "learning_rate": 3.8150154758109225e-06, - "loss": 0.92, - "step": 1817 - }, - { - "epoch": 0.16395364566893628, - "grad_norm": 1.40555165222859, - "learning_rate": 3.814770009257896e-06, - "loss": 0.9534, - "step": 1818 - }, - { - "epoch": 0.16404382919240654, - "grad_norm": 1.448673032439129, - "learning_rate": 3.814524387858687e-06, - "loss": 0.9997, - "step": 1819 - }, - { - "epoch": 0.1641340127158768, - "grad_norm": 1.525541447059226, - "learning_rate": 3.814278611634251e-06, - "loss": 1.0069, - "step": 1820 - }, - { - "epoch": 0.16422419623934706, - "grad_norm": 1.514576106554899, - "learning_rate": 3.8140326806055606e-06, - "loss": 0.9522, - "step": 1821 - }, - { - "epoch": 0.16431437976281732, - "grad_norm": 1.6154216763852713, - "learning_rate": 3.8137865947935992e-06, - "loss": 1.0372, - "step": 1822 - }, - { - "epoch": 0.1644045632862876, - "grad_norm": 1.5869046724481803, - "learning_rate": 3.8135403542193646e-06, - "loss": 0.9966, - "step": 1823 - }, - { - "epoch": 0.16449474680975787, - "grad_norm": 1.8119023093948203, - "learning_rate": 3.813293958903867e-06, - "loss": 0.9166, - "step": 1824 - }, - { - "epoch": 0.16458493033322813, - "grad_norm": 1.4679610385661777, - "learning_rate": 3.8130474088681306e-06, - "loss": 0.9467, - "step": 1825 - }, - { - "epoch": 0.1646751138566984, - "grad_norm": 1.6930634659139117, - "learning_rate": 3.8128007041331927e-06, - "loss": 0.919, - "step": 1826 - }, - { - "epoch": 0.16476529738016865, - "grad_norm": 1.4291694411240645, - "learning_rate": 3.812553844720102e-06, - "loss": 0.8961, - "step": 1827 - }, - { - "epoch": 0.1648554809036389, - "grad_norm": 1.54458487171103, - "learning_rate": 3.8123068306499236e-06, - "loss": 1.0048, - "step": 1828 - }, - { - "epoch": 0.16494566442710917, - "grad_norm": 1.4618763074669245, - "learning_rate": 3.812059661943733e-06, - "loss": 0.9837, - "step": 1829 - }, - { - "epoch": 0.16503584795057943, - "grad_norm": 1.3051173706913155, - "learning_rate": 3.811812338622621e-06, - "loss": 0.9585, - "step": 1830 - }, - { - "epoch": 0.1651260314740497, - "grad_norm": 1.3147399498714427, - "learning_rate": 3.81156486070769e-06, - "loss": 0.9726, - "step": 1831 - }, - { - "epoch": 0.16521621499751996, - "grad_norm": 1.4293053955424666, - "learning_rate": 3.811317228220056e-06, - "loss": 1.0292, - "step": 1832 - }, - { - "epoch": 0.16530639852099022, - "grad_norm": 1.5775048576558282, - "learning_rate": 3.811069441180849e-06, - "loss": 1.0235, - "step": 1833 - }, - { - "epoch": 0.16539658204446048, - "grad_norm": 1.7318334429520632, - "learning_rate": 3.8108214996112107e-06, - "loss": 0.9853, - "step": 1834 - }, - { - "epoch": 0.16548676556793074, - "grad_norm": 1.4565111310895613, - "learning_rate": 3.810573403532297e-06, - "loss": 1.0231, - "step": 1835 - }, - { - "epoch": 0.165576949091401, - "grad_norm": 1.3041648960203067, - "learning_rate": 3.8103251529652774e-06, - "loss": 0.9597, - "step": 1836 - }, - { - "epoch": 0.16566713261487126, - "grad_norm": 1.4011293641430052, - "learning_rate": 3.810076747931334e-06, - "loss": 0.9584, - "step": 1837 - }, - { - "epoch": 0.16575731613834152, - "grad_norm": 1.4175613700862533, - "learning_rate": 3.809828188451662e-06, - "loss": 0.9399, - "step": 1838 - }, - { - "epoch": 0.16584749966181178, - "grad_norm": 1.4032168419904858, - "learning_rate": 3.809579474547469e-06, - "loss": 0.9766, - "step": 1839 - }, - { - "epoch": 0.16593768318528204, - "grad_norm": 1.7179281870939667, - "learning_rate": 3.809330606239977e-06, - "loss": 1.0261, - "step": 1840 - }, - { - "epoch": 0.1660278667087523, - "grad_norm": 1.420435165966656, - "learning_rate": 3.809081583550422e-06, - "loss": 1.0516, - "step": 1841 - }, - { - "epoch": 0.16611805023222256, - "grad_norm": 1.7401620355335043, - "learning_rate": 3.808832406500051e-06, - "loss": 1.0608, - "step": 1842 - }, - { - "epoch": 0.16620823375569282, - "grad_norm": 1.5224990693138047, - "learning_rate": 3.8085830751101253e-06, - "loss": 1.051, - "step": 1843 - }, - { - "epoch": 0.16629841727916309, - "grad_norm": 1.527694700852982, - "learning_rate": 3.808333589401919e-06, - "loss": 1.0195, - "step": 1844 - }, - { - "epoch": 0.16638860080263335, - "grad_norm": 1.3182815189684578, - "learning_rate": 3.8080839493967194e-06, - "loss": 0.9091, - "step": 1845 - }, - { - "epoch": 0.1664787843261036, - "grad_norm": 0.7688160394975209, - "learning_rate": 3.807834155115828e-06, - "loss": 0.7841, - "step": 1846 - }, - { - "epoch": 0.1665689678495739, - "grad_norm": 0.806694971059604, - "learning_rate": 3.8075842065805584e-06, - "loss": 0.8938, - "step": 1847 - }, - { - "epoch": 0.16665915137304416, - "grad_norm": 1.4671338706325676, - "learning_rate": 3.8073341038122374e-06, - "loss": 0.9696, - "step": 1848 - }, - { - "epoch": 0.16674933489651442, - "grad_norm": 1.3435167065750437, - "learning_rate": 3.8070838468322048e-06, - "loss": 1.0459, - "step": 1849 - }, - { - "epoch": 0.16683951841998468, - "grad_norm": 1.6565905526762816, - "learning_rate": 3.8068334356618143e-06, - "loss": 0.9239, - "step": 1850 - }, - { - "epoch": 0.16692970194345494, - "grad_norm": 1.9076686020678622, - "learning_rate": 3.8065828703224324e-06, - "loss": 1.0078, - "step": 1851 - }, - { - "epoch": 0.1670198854669252, - "grad_norm": 1.6228567073977824, - "learning_rate": 3.8063321508354386e-06, - "loss": 0.9722, - "step": 1852 - }, - { - "epoch": 0.16711006899039546, - "grad_norm": 1.4490446824160337, - "learning_rate": 3.8060812772222255e-06, - "loss": 0.9662, - "step": 1853 - }, - { - "epoch": 0.16720025251386572, - "grad_norm": 1.2083032319276752, - "learning_rate": 3.8058302495041993e-06, - "loss": 1.0468, - "step": 1854 - }, - { - "epoch": 0.16729043603733598, - "grad_norm": 1.8230192827679108, - "learning_rate": 3.805579067702779e-06, - "loss": 1.0084, - "step": 1855 - }, - { - "epoch": 0.16738061956080624, - "grad_norm": 1.6487701225065055, - "learning_rate": 3.8053277318393967e-06, - "loss": 0.9852, - "step": 1856 - }, - { - "epoch": 0.1674708030842765, - "grad_norm": 1.3970777838043356, - "learning_rate": 3.805076241935498e-06, - "loss": 0.9861, - "step": 1857 - }, - { - "epoch": 0.16756098660774676, - "grad_norm": 1.7732714604695665, - "learning_rate": 3.804824598012541e-06, - "loss": 0.8535, - "step": 1858 - }, - { - "epoch": 0.16765117013121703, - "grad_norm": 1.300019599693442, - "learning_rate": 3.8045728000919975e-06, - "loss": 1.0237, - "step": 1859 - }, - { - "epoch": 0.1677413536546873, - "grad_norm": 1.4864101713095335, - "learning_rate": 3.8043208481953524e-06, - "loss": 1.0121, - "step": 1860 - }, - { - "epoch": 0.16783153717815755, - "grad_norm": 1.4358516032332758, - "learning_rate": 3.804068742344104e-06, - "loss": 0.9174, - "step": 1861 - }, - { - "epoch": 0.1679217207016278, - "grad_norm": 1.3470362467175887, - "learning_rate": 3.8038164825597628e-06, - "loss": 0.9496, - "step": 1862 - }, - { - "epoch": 0.16801190422509807, - "grad_norm": 2.19179842834472, - "learning_rate": 3.8035640688638537e-06, - "loss": 1.0644, - "step": 1863 - }, - { - "epoch": 0.16810208774856833, - "grad_norm": 2.0843770909334642, - "learning_rate": 3.8033115012779125e-06, - "loss": 1.06, - "step": 1864 - }, - { - "epoch": 0.1681922712720386, - "grad_norm": 1.2734542096685768, - "learning_rate": 3.8030587798234915e-06, - "loss": 1.0236, - "step": 1865 - }, - { - "epoch": 0.16828245479550885, - "grad_norm": 1.4799666144497532, - "learning_rate": 3.802805904522153e-06, - "loss": 0.9207, - "step": 1866 - }, - { - "epoch": 0.1683726383189791, - "grad_norm": 1.2010544634245104, - "learning_rate": 3.8025528753954742e-06, - "loss": 0.974, - "step": 1867 - }, - { - "epoch": 0.16846282184244937, - "grad_norm": 1.6511766717469731, - "learning_rate": 3.802299692465045e-06, - "loss": 0.919, - "step": 1868 - }, - { - "epoch": 0.16855300536591963, - "grad_norm": 1.4649479129632264, - "learning_rate": 3.802046355752468e-06, - "loss": 0.9723, - "step": 1869 - }, - { - "epoch": 0.16864318888938992, - "grad_norm": 1.7274158147015743, - "learning_rate": 3.80179286527936e-06, - "loss": 1.0419, - "step": 1870 - }, - { - "epoch": 0.16873337241286018, - "grad_norm": 1.3834451639787335, - "learning_rate": 3.801539221067349e-06, - "loss": 0.8981, - "step": 1871 - }, - { - "epoch": 0.16882355593633044, - "grad_norm": 1.347884758635979, - "learning_rate": 3.801285423138079e-06, - "loss": 0.9362, - "step": 1872 - }, - { - "epoch": 0.1689137394598007, - "grad_norm": 1.336291082337688, - "learning_rate": 3.8010314715132037e-06, - "loss": 0.9302, - "step": 1873 - }, - { - "epoch": 0.16900392298327097, - "grad_norm": 1.463197316920937, - "learning_rate": 3.800777366214393e-06, - "loss": 0.9374, - "step": 1874 - }, - { - "epoch": 0.16909410650674123, - "grad_norm": 1.8634413164590062, - "learning_rate": 3.800523107263328e-06, - "loss": 0.9602, - "step": 1875 - }, - { - "epoch": 0.1691842900302115, - "grad_norm": 1.4809413346874645, - "learning_rate": 3.800268694681703e-06, - "loss": 1.0324, - "step": 1876 - }, - { - "epoch": 0.16927447355368175, - "grad_norm": 0.941413404508764, - "learning_rate": 3.800014128491227e-06, - "loss": 0.8205, - "step": 1877 - }, - { - "epoch": 0.169364657077152, - "grad_norm": 1.4667271420986578, - "learning_rate": 3.79975940871362e-06, - "loss": 1.0324, - "step": 1878 - }, - { - "epoch": 0.16945484060062227, - "grad_norm": 1.8251807476052428, - "learning_rate": 3.799504535370617e-06, - "loss": 0.989, - "step": 1879 - }, - { - "epoch": 0.16954502412409253, - "grad_norm": 2.1979255736352834, - "learning_rate": 3.799249508483964e-06, - "loss": 0.9169, - "step": 1880 - }, - { - "epoch": 0.1696352076475628, - "grad_norm": 1.2467033784394168, - "learning_rate": 3.798994328075422e-06, - "loss": 0.8715, - "step": 1881 - }, - { - "epoch": 0.16972539117103305, - "grad_norm": 1.5652696095730845, - "learning_rate": 3.798738994166765e-06, - "loss": 0.9166, - "step": 1882 - }, - { - "epoch": 0.1698155746945033, - "grad_norm": 1.5585779939777693, - "learning_rate": 3.7984835067797788e-06, - "loss": 0.9812, - "step": 1883 - }, - { - "epoch": 0.16990575821797357, - "grad_norm": 1.4948371410652417, - "learning_rate": 3.798227865936263e-06, - "loss": 1.0726, - "step": 1884 - }, - { - "epoch": 0.16999594174144383, - "grad_norm": 1.5130311458934358, - "learning_rate": 3.7979720716580297e-06, - "loss": 0.8463, - "step": 1885 - }, - { - "epoch": 0.1700861252649141, - "grad_norm": 1.5241177336254368, - "learning_rate": 3.7977161239669057e-06, - "loss": 0.9687, - "step": 1886 - }, - { - "epoch": 0.17017630878838436, - "grad_norm": 1.6615281125266677, - "learning_rate": 3.7974600228847294e-06, - "loss": 0.9636, - "step": 1887 - }, - { - "epoch": 0.17026649231185462, - "grad_norm": 1.9216137219976923, - "learning_rate": 3.7972037684333534e-06, - "loss": 0.9124, - "step": 1888 - }, - { - "epoch": 0.17035667583532488, - "grad_norm": 1.5082708265022833, - "learning_rate": 3.796947360634642e-06, - "loss": 0.9135, - "step": 1889 - }, - { - "epoch": 0.17044685935879514, - "grad_norm": 1.6710767890458222, - "learning_rate": 3.796690799510473e-06, - "loss": 1.0272, - "step": 1890 - }, - { - "epoch": 0.1705370428822654, - "grad_norm": 1.4259800602507267, - "learning_rate": 3.7964340850827387e-06, - "loss": 0.9446, - "step": 1891 - }, - { - "epoch": 0.17062722640573566, - "grad_norm": 1.0397406638025068, - "learning_rate": 3.7961772173733425e-06, - "loss": 0.7836, - "step": 1892 - }, - { - "epoch": 0.17071740992920592, - "grad_norm": 1.2909132185355905, - "learning_rate": 3.7959201964042024e-06, - "loss": 0.9718, - "step": 1893 - }, - { - "epoch": 0.1708075934526762, - "grad_norm": 1.5199080835960332, - "learning_rate": 3.795663022197248e-06, - "loss": 1.0046, - "step": 1894 - }, - { - "epoch": 0.17089777697614647, - "grad_norm": 1.9461530225403512, - "learning_rate": 3.7954056947744242e-06, - "loss": 0.9835, - "step": 1895 - }, - { - "epoch": 0.17098796049961673, - "grad_norm": 1.5241976675416413, - "learning_rate": 3.7951482141576863e-06, - "loss": 0.959, - "step": 1896 - }, - { - "epoch": 0.171078144023087, - "grad_norm": 1.2394409044434531, - "learning_rate": 3.794890580369004e-06, - "loss": 1.0239, - "step": 1897 - }, - { - "epoch": 0.17116832754655725, - "grad_norm": 1.6579687449381832, - "learning_rate": 3.7946327934303612e-06, - "loss": 1.0135, - "step": 1898 - }, - { - "epoch": 0.1712585110700275, - "grad_norm": 1.4321779333099334, - "learning_rate": 3.794374853363752e-06, - "loss": 1.0739, - "step": 1899 - }, - { - "epoch": 0.17134869459349777, - "grad_norm": 1.3115984227039827, - "learning_rate": 3.794116760191187e-06, - "loss": 0.998, - "step": 1900 - }, - { - "epoch": 0.17143887811696804, - "grad_norm": 1.4813594504050398, - "learning_rate": 3.7938585139346877e-06, - "loss": 1.0776, - "step": 1901 - }, - { - "epoch": 0.1715290616404383, - "grad_norm": 1.5074365176725095, - "learning_rate": 3.793600114616288e-06, - "loss": 0.9579, - "step": 1902 - }, - { - "epoch": 0.17161924516390856, - "grad_norm": 1.1733871875852129, - "learning_rate": 3.793341562258037e-06, - "loss": 0.7897, - "step": 1903 - }, - { - "epoch": 0.17170942868737882, - "grad_norm": 1.392565241643048, - "learning_rate": 3.7930828568819953e-06, - "loss": 0.9925, - "step": 1904 - }, - { - "epoch": 0.17179961221084908, - "grad_norm": 1.6308258966904352, - "learning_rate": 3.7928239985102378e-06, - "loss": 0.9786, - "step": 1905 - }, - { - "epoch": 0.17188979573431934, - "grad_norm": 1.421209493980831, - "learning_rate": 3.7925649871648505e-06, - "loss": 0.975, - "step": 1906 - }, - { - "epoch": 0.1719799792577896, - "grad_norm": 1.4106237846372656, - "learning_rate": 3.792305822867935e-06, - "loss": 0.9087, - "step": 1907 - }, - { - "epoch": 0.17207016278125986, - "grad_norm": 1.5325993412048764, - "learning_rate": 3.792046505641604e-06, - "loss": 0.9454, - "step": 1908 - }, - { - "epoch": 0.17216034630473012, - "grad_norm": 1.3537542840074435, - "learning_rate": 3.791787035507984e-06, - "loss": 0.9427, - "step": 1909 - }, - { - "epoch": 0.17225052982820038, - "grad_norm": 1.6125635208511655, - "learning_rate": 3.7915274124892136e-06, - "loss": 0.931, - "step": 1910 - }, - { - "epoch": 0.17234071335167064, - "grad_norm": 1.3560419107588217, - "learning_rate": 3.7912676366074466e-06, - "loss": 0.9643, - "step": 1911 - }, - { - "epoch": 0.1724308968751409, - "grad_norm": 1.8754154380868333, - "learning_rate": 3.7910077078848478e-06, - "loss": 0.9478, - "step": 1912 - }, - { - "epoch": 0.17252108039861117, - "grad_norm": 1.624093316295779, - "learning_rate": 3.7907476263435957e-06, - "loss": 0.9404, - "step": 1913 - }, - { - "epoch": 0.17261126392208143, - "grad_norm": 1.5412670167353288, - "learning_rate": 3.7904873920058826e-06, - "loss": 0.9493, - "step": 1914 - }, - { - "epoch": 0.1727014474455517, - "grad_norm": 1.8659151602426591, - "learning_rate": 3.7902270048939114e-06, - "loss": 0.8517, - "step": 1915 - }, - { - "epoch": 0.17279163096902195, - "grad_norm": 2.1846833760415367, - "learning_rate": 3.7899664650299023e-06, - "loss": 0.9403, - "step": 1916 - }, - { - "epoch": 0.1728818144924922, - "grad_norm": 1.136408340701841, - "learning_rate": 3.7897057724360836e-06, - "loss": 0.9834, - "step": 1917 - }, - { - "epoch": 0.1729719980159625, - "grad_norm": 1.7415273467684154, - "learning_rate": 3.7894449271347004e-06, - "loss": 0.9919, - "step": 1918 - }, - { - "epoch": 0.17306218153943276, - "grad_norm": 1.4560325001715402, - "learning_rate": 3.789183929148009e-06, - "loss": 1.0358, - "step": 1919 - }, - { - "epoch": 0.17315236506290302, - "grad_norm": 1.4022817783954469, - "learning_rate": 3.7889227784982795e-06, - "loss": 1.0029, - "step": 1920 - }, - { - "epoch": 0.17324254858637328, - "grad_norm": 1.3268946614832429, - "learning_rate": 3.7886614752077945e-06, - "loss": 0.98, - "step": 1921 - }, - { - "epoch": 0.17333273210984354, - "grad_norm": 1.8075255170316293, - "learning_rate": 3.7884000192988495e-06, - "loss": 0.9785, - "step": 1922 - }, - { - "epoch": 0.1734229156333138, - "grad_norm": 1.6558562476472147, - "learning_rate": 3.7881384107937546e-06, - "loss": 1.0302, - "step": 1923 - }, - { - "epoch": 0.17351309915678406, - "grad_norm": 1.6228902031924533, - "learning_rate": 3.78787664971483e-06, - "loss": 0.9704, - "step": 1924 - }, - { - "epoch": 0.17360328268025432, - "grad_norm": 1.258351938897491, - "learning_rate": 3.7876147360844115e-06, - "loss": 0.9463, - "step": 1925 - }, - { - "epoch": 0.17369346620372458, - "grad_norm": 1.730641582460542, - "learning_rate": 3.7873526699248474e-06, - "loss": 1.049, - "step": 1926 - }, - { - "epoch": 0.17378364972719484, - "grad_norm": 1.376146185594829, - "learning_rate": 3.7870904512584974e-06, - "loss": 0.9791, - "step": 1927 - }, - { - "epoch": 0.1738738332506651, - "grad_norm": 1.5165725577504536, - "learning_rate": 3.7868280801077368e-06, - "loss": 0.9812, - "step": 1928 - }, - { - "epoch": 0.17396401677413537, - "grad_norm": 1.1818937172459105, - "learning_rate": 3.7865655564949517e-06, - "loss": 0.9715, - "step": 1929 - }, - { - "epoch": 0.17405420029760563, - "grad_norm": 1.309099424218892, - "learning_rate": 3.786302880442542e-06, - "loss": 0.9494, - "step": 1930 - }, - { - "epoch": 0.1741443838210759, - "grad_norm": 1.3731793140335624, - "learning_rate": 3.7860400519729215e-06, - "loss": 0.9802, - "step": 1931 - }, - { - "epoch": 0.17423456734454615, - "grad_norm": 1.7912855038581643, - "learning_rate": 3.7857770711085157e-06, - "loss": 0.9985, - "step": 1932 - }, - { - "epoch": 0.1743247508680164, - "grad_norm": 1.7225565978829713, - "learning_rate": 3.785513937871763e-06, - "loss": 1.0278, - "step": 1933 - }, - { - "epoch": 0.17441493439148667, - "grad_norm": 1.4932198673096657, - "learning_rate": 3.785250652285116e-06, - "loss": 0.9518, - "step": 1934 - }, - { - "epoch": 0.17450511791495693, - "grad_norm": 1.6358434845034033, - "learning_rate": 3.78498721437104e-06, - "loss": 1.0471, - "step": 1935 - }, - { - "epoch": 0.1745953014384272, - "grad_norm": 2.210935343279646, - "learning_rate": 3.784723624152012e-06, - "loss": 0.9752, - "step": 1936 - }, - { - "epoch": 0.17468548496189745, - "grad_norm": 2.7213409180143757, - "learning_rate": 3.784459881650524e-06, - "loss": 0.9232, - "step": 1937 - }, - { - "epoch": 0.1747756684853677, - "grad_norm": 1.3597829360101081, - "learning_rate": 3.784195986889079e-06, - "loss": 0.9542, - "step": 1938 - }, - { - "epoch": 0.17486585200883797, - "grad_norm": 2.2192301700724983, - "learning_rate": 3.7839319398901946e-06, - "loss": 0.9027, - "step": 1939 - }, - { - "epoch": 0.17495603553230824, - "grad_norm": 1.373480433985351, - "learning_rate": 3.7836677406764013e-06, - "loss": 1.0451, - "step": 1940 - }, - { - "epoch": 0.1750462190557785, - "grad_norm": 1.5087654383516602, - "learning_rate": 3.7834033892702407e-06, - "loss": 1.0252, - "step": 1941 - }, - { - "epoch": 0.17513640257924878, - "grad_norm": 1.4759311840173424, - "learning_rate": 3.783138885694269e-06, - "loss": 0.9343, - "step": 1942 - }, - { - "epoch": 0.17522658610271905, - "grad_norm": 1.593511675798763, - "learning_rate": 3.7828742299710558e-06, - "loss": 1.0553, - "step": 1943 - }, - { - "epoch": 0.1753167696261893, - "grad_norm": 1.5176935139781074, - "learning_rate": 3.782609422123183e-06, - "loss": 0.9584, - "step": 1944 - }, - { - "epoch": 0.17540695314965957, - "grad_norm": 1.7494824870438528, - "learning_rate": 3.7823444621732444e-06, - "loss": 0.9764, - "step": 1945 - }, - { - "epoch": 0.17549713667312983, - "grad_norm": 1.9180576902072723, - "learning_rate": 3.782079350143849e-06, - "loss": 0.9965, - "step": 1946 - }, - { - "epoch": 0.1755873201966001, - "grad_norm": 1.6744392196454505, - "learning_rate": 3.781814086057617e-06, - "loss": 1.0797, - "step": 1947 - }, - { - "epoch": 0.17567750372007035, - "grad_norm": 1.404054029156017, - "learning_rate": 3.7815486699371826e-06, - "loss": 0.9687, - "step": 1948 - }, - { - "epoch": 0.1757676872435406, - "grad_norm": 1.437831259996635, - "learning_rate": 3.7812831018051918e-06, - "loss": 0.9558, - "step": 1949 - }, - { - "epoch": 0.17585787076701087, - "grad_norm": 1.6963443692183735, - "learning_rate": 3.7810173816843058e-06, - "loss": 0.9985, - "step": 1950 - }, - { - "epoch": 0.17594805429048113, - "grad_norm": 1.4574299693745507, - "learning_rate": 3.7807515095971955e-06, - "loss": 1.0422, - "step": 1951 - }, - { - "epoch": 0.1760382378139514, - "grad_norm": 1.4043222565671731, - "learning_rate": 3.7804854855665475e-06, - "loss": 0.9713, - "step": 1952 - }, - { - "epoch": 0.17612842133742165, - "grad_norm": 1.4175867664350228, - "learning_rate": 3.7802193096150606e-06, - "loss": 0.9848, - "step": 1953 - }, - { - "epoch": 0.17621860486089191, - "grad_norm": 1.3147287519079534, - "learning_rate": 3.779952981765446e-06, - "loss": 0.9362, - "step": 1954 - }, - { - "epoch": 0.17630878838436218, - "grad_norm": 1.280804161098614, - "learning_rate": 3.779686502040429e-06, - "loss": 0.9885, - "step": 1955 - }, - { - "epoch": 0.17639897190783244, - "grad_norm": 0.7804669843163053, - "learning_rate": 3.779419870462746e-06, - "loss": 0.798, - "step": 1956 - }, - { - "epoch": 0.1764891554313027, - "grad_norm": 1.5710220631653364, - "learning_rate": 3.779153087055148e-06, - "loss": 0.9554, - "step": 1957 - }, - { - "epoch": 0.17657933895477296, - "grad_norm": 1.6743183286160224, - "learning_rate": 3.7788861518403988e-06, - "loss": 0.9833, - "step": 1958 - }, - { - "epoch": 0.17666952247824322, - "grad_norm": 1.6619697987352113, - "learning_rate": 3.7786190648412742e-06, - "loss": 0.8767, - "step": 1959 - }, - { - "epoch": 0.17675970600171348, - "grad_norm": 0.7996803345383436, - "learning_rate": 3.778351826080564e-06, - "loss": 0.8113, - "step": 1960 - }, - { - "epoch": 0.17684988952518374, - "grad_norm": 1.4304672366206665, - "learning_rate": 3.7780844355810704e-06, - "loss": 1.0829, - "step": 1961 - }, - { - "epoch": 0.176940073048654, - "grad_norm": 1.4196306058452715, - "learning_rate": 3.777816893365608e-06, - "loss": 0.9897, - "step": 1962 - }, - { - "epoch": 0.17703025657212426, - "grad_norm": 1.4963104648849512, - "learning_rate": 3.7775491994570057e-06, - "loss": 1.0549, - "step": 1963 - }, - { - "epoch": 0.17712044009559452, - "grad_norm": 1.4604543611212124, - "learning_rate": 3.777281353878105e-06, - "loss": 0.9459, - "step": 1964 - }, - { - "epoch": 0.17721062361906478, - "grad_norm": 0.6698608370631051, - "learning_rate": 3.777013356651758e-06, - "loss": 0.8109, - "step": 1965 - }, - { - "epoch": 0.17730080714253507, - "grad_norm": 1.5727955937893328, - "learning_rate": 3.776745207800834e-06, - "loss": 0.9925, - "step": 1966 - }, - { - "epoch": 0.17739099066600533, - "grad_norm": 1.4490169580018815, - "learning_rate": 3.7764769073482122e-06, - "loss": 1.0127, - "step": 1967 - }, - { - "epoch": 0.1774811741894756, - "grad_norm": 1.3208518196440857, - "learning_rate": 3.7762084553167846e-06, - "loss": 1.044, - "step": 1968 - }, - { - "epoch": 0.17757135771294585, - "grad_norm": 1.5303894271662846, - "learning_rate": 3.775939851729458e-06, - "loss": 1.0667, - "step": 1969 - }, - { - "epoch": 0.17766154123641612, - "grad_norm": 1.528471703389413, - "learning_rate": 3.775671096609151e-06, - "loss": 0.9387, - "step": 1970 - }, - { - "epoch": 0.17775172475988638, - "grad_norm": 1.849223143907169, - "learning_rate": 3.775402189978795e-06, - "loss": 0.9644, - "step": 1971 - }, - { - "epoch": 0.17784190828335664, - "grad_norm": 1.2653205411188675, - "learning_rate": 3.7751331318613343e-06, - "loss": 0.9644, - "step": 1972 - }, - { - "epoch": 0.1779320918068269, - "grad_norm": 1.5592517658839549, - "learning_rate": 3.774863922279727e-06, - "loss": 0.9988, - "step": 1973 - }, - { - "epoch": 0.17802227533029716, - "grad_norm": 1.4362022512813761, - "learning_rate": 3.7745945612569435e-06, - "loss": 1.0456, - "step": 1974 - }, - { - "epoch": 0.17811245885376742, - "grad_norm": 1.2714485128241073, - "learning_rate": 3.7743250488159674e-06, - "loss": 0.8606, - "step": 1975 - }, - { - "epoch": 0.17820264237723768, - "grad_norm": 1.5314433695865115, - "learning_rate": 3.774055384979794e-06, - "loss": 0.8769, - "step": 1976 - }, - { - "epoch": 0.17829282590070794, - "grad_norm": 1.4326855019765687, - "learning_rate": 3.773785569771433e-06, - "loss": 0.9635, - "step": 1977 - }, - { - "epoch": 0.1783830094241782, - "grad_norm": 1.50942551111721, - "learning_rate": 3.7735156032139066e-06, - "loss": 0.9642, - "step": 1978 - }, - { - "epoch": 0.17847319294764846, - "grad_norm": 1.5881685246039616, - "learning_rate": 3.773245485330251e-06, - "loss": 1.0785, - "step": 1979 - }, - { - "epoch": 0.17856337647111872, - "grad_norm": 1.5390918554614372, - "learning_rate": 3.7729752161435115e-06, - "loss": 0.9514, - "step": 1980 - }, - { - "epoch": 0.17865355999458898, - "grad_norm": 1.5322695860405275, - "learning_rate": 3.7727047956767514e-06, - "loss": 1.0512, - "step": 1981 - }, - { - "epoch": 0.17874374351805924, - "grad_norm": 1.3117290684996783, - "learning_rate": 3.7724342239530436e-06, - "loss": 1.1063, - "step": 1982 - }, - { - "epoch": 0.1788339270415295, - "grad_norm": 1.6061575124512124, - "learning_rate": 3.772163500995474e-06, - "loss": 0.9304, - "step": 1983 - }, - { - "epoch": 0.17892411056499977, - "grad_norm": 1.7784309164276286, - "learning_rate": 3.7718926268271437e-06, - "loss": 1.0691, - "step": 1984 - }, - { - "epoch": 0.17901429408847003, - "grad_norm": 2.3890030289028377, - "learning_rate": 3.771621601471164e-06, - "loss": 0.9927, - "step": 1985 - }, - { - "epoch": 0.1791044776119403, - "grad_norm": 1.4091657292870297, - "learning_rate": 3.771350424950661e-06, - "loss": 1.0225, - "step": 1986 - }, - { - "epoch": 0.17919466113541055, - "grad_norm": 1.5873824428975933, - "learning_rate": 3.771079097288772e-06, - "loss": 0.9274, - "step": 1987 - }, - { - "epoch": 0.1792848446588808, - "grad_norm": 1.5795921078036326, - "learning_rate": 3.770807618508649e-06, - "loss": 0.989, - "step": 1988 - }, - { - "epoch": 0.17937502818235107, - "grad_norm": 1.4047007609962416, - "learning_rate": 3.7705359886334555e-06, - "loss": 0.9713, - "step": 1989 - }, - { - "epoch": 0.17946521170582136, - "grad_norm": 1.5745942304560014, - "learning_rate": 3.7702642076863694e-06, - "loss": 0.9527, - "step": 1990 - }, - { - "epoch": 0.17955539522929162, - "grad_norm": 1.5974619404493122, - "learning_rate": 3.7699922756905795e-06, - "loss": 1.0348, - "step": 1991 - }, - { - "epoch": 0.17964557875276188, - "grad_norm": 1.5135690961830213, - "learning_rate": 3.7697201926692895e-06, - "loss": 0.9373, - "step": 1992 - }, - { - "epoch": 0.17973576227623214, - "grad_norm": 1.6190532297216165, - "learning_rate": 3.7694479586457144e-06, - "loss": 0.9876, - "step": 1993 - }, - { - "epoch": 0.1798259457997024, - "grad_norm": 1.959612638186931, - "learning_rate": 3.7691755736430827e-06, - "loss": 0.9774, - "step": 1994 - }, - { - "epoch": 0.17991612932317266, - "grad_norm": 1.4209264588624428, - "learning_rate": 3.768903037684636e-06, - "loss": 1.0291, - "step": 1995 - }, - { - "epoch": 0.18000631284664292, - "grad_norm": 1.7250186006952943, - "learning_rate": 3.7686303507936284e-06, - "loss": 1.026, - "step": 1996 - }, - { - "epoch": 0.18009649637011318, - "grad_norm": 1.600784336010782, - "learning_rate": 3.7683575129933272e-06, - "loss": 1.0222, - "step": 1997 - }, - { - "epoch": 0.18018667989358345, - "grad_norm": 1.281125923289654, - "learning_rate": 3.7680845243070128e-06, - "loss": 0.9818, - "step": 1998 - }, - { - "epoch": 0.1802768634170537, - "grad_norm": 0.8585888127470505, - "learning_rate": 3.7678113847579767e-06, - "loss": 0.7771, - "step": 1999 - }, - { - "epoch": 0.18036704694052397, - "grad_norm": 1.7350354011287972, - "learning_rate": 3.7675380943695264e-06, - "loss": 1.0513, - "step": 2000 - }, - { - "epoch": 0.18045723046399423, - "grad_norm": 1.6610429617270965, - "learning_rate": 3.7672646531649795e-06, - "loss": 0.9436, - "step": 2001 - }, - { - "epoch": 0.1805474139874645, - "grad_norm": 0.9806495894879965, - "learning_rate": 3.7669910611676682e-06, - "loss": 0.7989, - "step": 2002 - }, - { - "epoch": 0.18063759751093475, - "grad_norm": 1.5754709115014027, - "learning_rate": 3.7667173184009356e-06, - "loss": 0.9745, - "step": 2003 - }, - { - "epoch": 0.180727781034405, - "grad_norm": 1.35094486302512, - "learning_rate": 3.7664434248881403e-06, - "loss": 1.0422, - "step": 2004 - }, - { - "epoch": 0.18081796455787527, - "grad_norm": 1.7791126880304169, - "learning_rate": 3.766169380652652e-06, - "loss": 0.9943, - "step": 2005 - }, - { - "epoch": 0.18090814808134553, - "grad_norm": 1.6140194143949897, - "learning_rate": 3.7658951857178537e-06, - "loss": 1.027, - "step": 2006 - }, - { - "epoch": 0.1809983316048158, - "grad_norm": 1.7830274982182597, - "learning_rate": 3.7656208401071414e-06, - "loss": 0.9965, - "step": 2007 - }, - { - "epoch": 0.18108851512828605, - "grad_norm": 1.6331412974499553, - "learning_rate": 3.7653463438439225e-06, - "loss": 0.9267, - "step": 2008 - }, - { - "epoch": 0.18117869865175631, - "grad_norm": 1.5709089218827519, - "learning_rate": 3.7650716969516203e-06, - "loss": 0.9897, - "step": 2009 - }, - { - "epoch": 0.18126888217522658, - "grad_norm": 1.660436126454553, - "learning_rate": 3.764796899453668e-06, - "loss": 0.8892, - "step": 2010 - }, - { - "epoch": 0.18135906569869684, - "grad_norm": 1.5092188152120094, - "learning_rate": 3.7645219513735134e-06, - "loss": 1.0243, - "step": 2011 - }, - { - "epoch": 0.1814492492221671, - "grad_norm": 1.4863045448708552, - "learning_rate": 3.764246852734617e-06, - "loss": 1.0201, - "step": 2012 - }, - { - "epoch": 0.18153943274563739, - "grad_norm": 1.7158083624904013, - "learning_rate": 3.7639716035604502e-06, - "loss": 0.9487, - "step": 2013 - }, - { - "epoch": 0.18162961626910765, - "grad_norm": 1.0714358261407744, - "learning_rate": 3.7636962038745e-06, - "loss": 0.7955, - "step": 2014 - }, - { - "epoch": 0.1817197997925779, - "grad_norm": 1.5669821729248492, - "learning_rate": 3.763420653700265e-06, - "loss": 0.947, - "step": 2015 - }, - { - "epoch": 0.18180998331604817, - "grad_norm": 1.4427451761363719, - "learning_rate": 3.7631449530612565e-06, - "loss": 0.9489, - "step": 2016 - }, - { - "epoch": 0.18190016683951843, - "grad_norm": 1.4125839427884948, - "learning_rate": 3.762869101980999e-06, - "loss": 1.0727, - "step": 2017 - }, - { - "epoch": 0.1819903503629887, - "grad_norm": 1.542765698170232, - "learning_rate": 3.7625931004830287e-06, - "loss": 0.9783, - "step": 2018 - }, - { - "epoch": 0.18208053388645895, - "grad_norm": 1.5201212465716556, - "learning_rate": 3.7623169485908966e-06, - "loss": 0.9401, - "step": 2019 - }, - { - "epoch": 0.1821707174099292, - "grad_norm": 1.0667201547821572, - "learning_rate": 3.7620406463281647e-06, - "loss": 0.857, - "step": 2020 - }, - { - "epoch": 0.18226090093339947, - "grad_norm": 1.654027563375128, - "learning_rate": 3.7617641937184095e-06, - "loss": 0.9997, - "step": 2021 - }, - { - "epoch": 0.18235108445686973, - "grad_norm": 1.5699110134685854, - "learning_rate": 3.761487590785219e-06, - "loss": 1.0111, - "step": 2022 - }, - { - "epoch": 0.18244126798034, - "grad_norm": 1.4579320173753672, - "learning_rate": 3.7612108375521942e-06, - "loss": 0.9584, - "step": 2023 - }, - { - "epoch": 0.18253145150381025, - "grad_norm": 1.4886227513788883, - "learning_rate": 3.76093393404295e-06, - "loss": 1.0274, - "step": 2024 - }, - { - "epoch": 0.18262163502728052, - "grad_norm": 2.966114681483327, - "learning_rate": 3.7606568802811126e-06, - "loss": 0.9731, - "step": 2025 - }, - { - "epoch": 0.18271181855075078, - "grad_norm": 1.856341347549975, - "learning_rate": 3.760379676290322e-06, - "loss": 0.996, - "step": 2026 - }, - { - "epoch": 0.18280200207422104, - "grad_norm": 1.6116698116651829, - "learning_rate": 3.760102322094231e-06, - "loss": 0.9189, - "step": 2027 - }, - { - "epoch": 0.1828921855976913, - "grad_norm": 1.2463448011485083, - "learning_rate": 3.759824817716504e-06, - "loss": 0.9437, - "step": 2028 - }, - { - "epoch": 0.18298236912116156, - "grad_norm": 1.1938347751413987, - "learning_rate": 3.759547163180821e-06, - "loss": 1.0308, - "step": 2029 - }, - { - "epoch": 0.18307255264463182, - "grad_norm": 1.299891720443876, - "learning_rate": 3.759269358510871e-06, - "loss": 0.7876, - "step": 2030 - }, - { - "epoch": 0.18316273616810208, - "grad_norm": 1.5945214293204126, - "learning_rate": 3.75899140373036e-06, - "loss": 0.884, - "step": 2031 - }, - { - "epoch": 0.18325291969157234, - "grad_norm": 2.4892125086292762, - "learning_rate": 3.7587132988630028e-06, - "loss": 1.0544, - "step": 2032 - }, - { - "epoch": 0.1833431032150426, - "grad_norm": 1.6696138153581181, - "learning_rate": 3.7584350439325295e-06, - "loss": 1.0146, - "step": 2033 - }, - { - "epoch": 0.18343328673851286, - "grad_norm": 1.5773958088945497, - "learning_rate": 3.758156638962682e-06, - "loss": 0.9728, - "step": 2034 - }, - { - "epoch": 0.18352347026198312, - "grad_norm": 1.4605026005790671, - "learning_rate": 3.757878083977216e-06, - "loss": 0.9372, - "step": 2035 - }, - { - "epoch": 0.18361365378545338, - "grad_norm": 1.3365843249918898, - "learning_rate": 3.7575993789999e-06, - "loss": 0.9749, - "step": 2036 - }, - { - "epoch": 0.18370383730892367, - "grad_norm": 1.3630142168995436, - "learning_rate": 3.757320524054512e-06, - "loss": 1.0115, - "step": 2037 - }, - { - "epoch": 0.18379402083239393, - "grad_norm": 1.4259082476348952, - "learning_rate": 3.757041519164848e-06, - "loss": 0.9168, - "step": 2038 - }, - { - "epoch": 0.1838842043558642, - "grad_norm": 1.7284576923554407, - "learning_rate": 3.7567623643547133e-06, - "loss": 0.9906, - "step": 2039 - }, - { - "epoch": 0.18397438787933446, - "grad_norm": 1.4379834730394996, - "learning_rate": 3.756483059647927e-06, - "loss": 0.9869, - "step": 2040 - }, - { - "epoch": 0.18406457140280472, - "grad_norm": 1.360347487641398, - "learning_rate": 3.756203605068321e-06, - "loss": 0.9539, - "step": 2041 - }, - { - "epoch": 0.18415475492627498, - "grad_norm": 1.506800494763015, - "learning_rate": 3.7559240006397396e-06, - "loss": 0.9688, - "step": 2042 - }, - { - "epoch": 0.18424493844974524, - "grad_norm": 2.9316502214512323, - "learning_rate": 3.7556442463860406e-06, - "loss": 1.0989, - "step": 2043 - }, - { - "epoch": 0.1843351219732155, - "grad_norm": 1.4249946828374878, - "learning_rate": 3.7553643423310934e-06, - "loss": 1.0874, - "step": 2044 - }, - { - "epoch": 0.18442530549668576, - "grad_norm": 2.0933348970144148, - "learning_rate": 3.755084288498782e-06, - "loss": 0.8506, - "step": 2045 - }, - { - "epoch": 0.18451548902015602, - "grad_norm": 1.3716738090622738, - "learning_rate": 3.754804084913002e-06, - "loss": 0.9935, - "step": 2046 - }, - { - "epoch": 0.18460567254362628, - "grad_norm": 1.3532537121597643, - "learning_rate": 3.754523731597661e-06, - "loss": 1.0476, - "step": 2047 - }, - { - "epoch": 0.18469585606709654, - "grad_norm": 1.8192795208769865, - "learning_rate": 3.754243228576681e-06, - "loss": 0.8782, - "step": 2048 - }, - { - "epoch": 0.1847860395905668, - "grad_norm": 1.6570208213294533, - "learning_rate": 3.753962575873996e-06, - "loss": 1.0311, - "step": 2049 - }, - { - "epoch": 0.18487622311403706, - "grad_norm": 1.3828675857175539, - "learning_rate": 3.7536817735135527e-06, - "loss": 0.94, - "step": 2050 - }, - { - "epoch": 0.18496640663750732, - "grad_norm": 1.3580443786763177, - "learning_rate": 3.753400821519311e-06, - "loss": 1.0263, - "step": 2051 - }, - { - "epoch": 0.18505659016097759, - "grad_norm": 1.346129904785629, - "learning_rate": 3.7531197199152426e-06, - "loss": 0.9098, - "step": 2052 - }, - { - "epoch": 0.18514677368444785, - "grad_norm": 1.3202025689401584, - "learning_rate": 3.7528384687253335e-06, - "loss": 0.8515, - "step": 2053 - }, - { - "epoch": 0.1852369572079181, - "grad_norm": 1.552664893173203, - "learning_rate": 3.7525570679735815e-06, - "loss": 1.0908, - "step": 2054 - }, - { - "epoch": 0.18532714073138837, - "grad_norm": 1.694610731723948, - "learning_rate": 3.7522755176839965e-06, - "loss": 0.9929, - "step": 2055 - }, - { - "epoch": 0.18541732425485863, - "grad_norm": 1.6827318857589808, - "learning_rate": 3.7519938178806027e-06, - "loss": 0.9894, - "step": 2056 - }, - { - "epoch": 0.1855075077783289, - "grad_norm": 1.9002627166013395, - "learning_rate": 3.7517119685874358e-06, - "loss": 0.9281, - "step": 2057 - }, - { - "epoch": 0.18559769130179915, - "grad_norm": 1.441783907244709, - "learning_rate": 3.7514299698285447e-06, - "loss": 0.9682, - "step": 2058 - }, - { - "epoch": 0.1856878748252694, - "grad_norm": 1.3076276714653934, - "learning_rate": 3.751147821627991e-06, - "loss": 0.9642, - "step": 2059 - }, - { - "epoch": 0.18577805834873967, - "grad_norm": 1.6856169614221836, - "learning_rate": 3.75086552400985e-06, - "loss": 1.0006, - "step": 2060 - }, - { - "epoch": 0.18586824187220996, - "grad_norm": 1.5096096733056248, - "learning_rate": 3.750583076998208e-06, - "loss": 0.9573, - "step": 2061 - }, - { - "epoch": 0.18595842539568022, - "grad_norm": 1.5840590302998843, - "learning_rate": 3.7503004806171655e-06, - "loss": 0.967, - "step": 2062 - }, - { - "epoch": 0.18604860891915048, - "grad_norm": 1.8044737321818451, - "learning_rate": 3.7500177348908354e-06, - "loss": 0.9151, - "step": 2063 - }, - { - "epoch": 0.18613879244262074, - "grad_norm": 1.3360118733723632, - "learning_rate": 3.749734839843342e-06, - "loss": 0.9982, - "step": 2064 - }, - { - "epoch": 0.186228975966091, - "grad_norm": 1.4805830858512758, - "learning_rate": 3.7494517954988245e-06, - "loss": 0.9814, - "step": 2065 - }, - { - "epoch": 0.18631915948956126, - "grad_norm": 1.7020676156496863, - "learning_rate": 3.749168601881433e-06, - "loss": 0.9731, - "step": 2066 - }, - { - "epoch": 0.18640934301303153, - "grad_norm": 1.7558289589968474, - "learning_rate": 3.7488852590153315e-06, - "loss": 0.9613, - "step": 2067 - }, - { - "epoch": 0.1864995265365018, - "grad_norm": 1.4227671915670308, - "learning_rate": 3.748601766924697e-06, - "loss": 0.9778, - "step": 2068 - }, - { - "epoch": 0.18658971005997205, - "grad_norm": 1.6233402726079262, - "learning_rate": 3.7483181256337176e-06, - "loss": 0.9597, - "step": 2069 - }, - { - "epoch": 0.1866798935834423, - "grad_norm": 1.485198585512868, - "learning_rate": 3.7480343351665962e-06, - "loss": 0.9828, - "step": 2070 - }, - { - "epoch": 0.18677007710691257, - "grad_norm": 1.4555766435766713, - "learning_rate": 3.747750395547546e-06, - "loss": 0.9676, - "step": 2071 - }, - { - "epoch": 0.18686026063038283, - "grad_norm": 1.2261165610065878, - "learning_rate": 3.7474663068007956e-06, - "loss": 0.9615, - "step": 2072 - }, - { - "epoch": 0.1869504441538531, - "grad_norm": 1.223214262842013, - "learning_rate": 3.747182068950584e-06, - "loss": 0.9637, - "step": 2073 - }, - { - "epoch": 0.18704062767732335, - "grad_norm": 1.4708932700476054, - "learning_rate": 3.7468976820211643e-06, - "loss": 0.99, - "step": 2074 - }, - { - "epoch": 0.1871308112007936, - "grad_norm": 1.5012569723991707, - "learning_rate": 3.746613146036803e-06, - "loss": 0.9829, - "step": 2075 - }, - { - "epoch": 0.18722099472426387, - "grad_norm": 1.6084211911860107, - "learning_rate": 3.7463284610217766e-06, - "loss": 1.0171, - "step": 2076 - }, - { - "epoch": 0.18731117824773413, - "grad_norm": 1.713455576796964, - "learning_rate": 3.746043627000377e-06, - "loss": 0.916, - "step": 2077 - }, - { - "epoch": 0.1874013617712044, - "grad_norm": 1.4205462776388005, - "learning_rate": 3.7457586439969076e-06, - "loss": 0.9317, - "step": 2078 - }, - { - "epoch": 0.18749154529467466, - "grad_norm": 1.205593351400606, - "learning_rate": 3.7454735120356842e-06, - "loss": 0.9667, - "step": 2079 - }, - { - "epoch": 0.18758172881814492, - "grad_norm": 1.4550026688436566, - "learning_rate": 3.7451882311410373e-06, - "loss": 1.047, - "step": 2080 - }, - { - "epoch": 0.18767191234161518, - "grad_norm": 1.457036731699432, - "learning_rate": 3.7449028013373074e-06, - "loss": 1.0116, - "step": 2081 - }, - { - "epoch": 0.18776209586508544, - "grad_norm": 1.400742873577268, - "learning_rate": 3.7446172226488485e-06, - "loss": 0.9781, - "step": 2082 - }, - { - "epoch": 0.1878522793885557, - "grad_norm": 1.60587478292539, - "learning_rate": 3.7443314951000285e-06, - "loss": 1.0414, - "step": 2083 - }, - { - "epoch": 0.18794246291202596, - "grad_norm": 1.8743508168765932, - "learning_rate": 3.7440456187152276e-06, - "loss": 0.9612, - "step": 2084 - }, - { - "epoch": 0.18803264643549625, - "grad_norm": 1.7733337775327849, - "learning_rate": 3.7437595935188377e-06, - "loss": 1.0688, - "step": 2085 - }, - { - "epoch": 0.1881228299589665, - "grad_norm": 1.867252620056251, - "learning_rate": 3.7434734195352647e-06, - "loss": 0.9726, - "step": 2086 - }, - { - "epoch": 0.18821301348243677, - "grad_norm": 0.7550795522045811, - "learning_rate": 3.743187096788926e-06, - "loss": 0.8108, - "step": 2087 - }, - { - "epoch": 0.18830319700590703, - "grad_norm": 1.5479737820191677, - "learning_rate": 3.7429006253042524e-06, - "loss": 0.9852, - "step": 2088 - }, - { - "epoch": 0.1883933805293773, - "grad_norm": 1.663360447608459, - "learning_rate": 3.7426140051056867e-06, - "loss": 0.921, - "step": 2089 - }, - { - "epoch": 0.18848356405284755, - "grad_norm": 2.2131394922255443, - "learning_rate": 3.7423272362176856e-06, - "loss": 0.9161, - "step": 2090 - }, - { - "epoch": 0.1885737475763178, - "grad_norm": 3.9094551059922713, - "learning_rate": 3.742040318664718e-06, - "loss": 1.0245, - "step": 2091 - }, - { - "epoch": 0.18866393109978807, - "grad_norm": 1.596767411461526, - "learning_rate": 3.7417532524712643e-06, - "loss": 1.0303, - "step": 2092 - }, - { - "epoch": 0.18875411462325833, - "grad_norm": 1.3888426942560586, - "learning_rate": 3.7414660376618195e-06, - "loss": 1.0091, - "step": 2093 - }, - { - "epoch": 0.1888442981467286, - "grad_norm": 0.7309535248799084, - "learning_rate": 3.74117867426089e-06, - "loss": 0.8141, - "step": 2094 - }, - { - "epoch": 0.18893448167019886, - "grad_norm": 1.476412568730319, - "learning_rate": 3.7408911622929954e-06, - "loss": 0.9601, - "step": 2095 - }, - { - "epoch": 0.18902466519366912, - "grad_norm": 1.418202362573087, - "learning_rate": 3.740603501782668e-06, - "loss": 1.0135, - "step": 2096 - }, - { - "epoch": 0.18911484871713938, - "grad_norm": 1.5860799340240224, - "learning_rate": 3.7403156927544516e-06, - "loss": 0.9222, - "step": 2097 - }, - { - "epoch": 0.18920503224060964, - "grad_norm": 1.4747379878463975, - "learning_rate": 3.740027735232904e-06, - "loss": 0.9018, - "step": 2098 - }, - { - "epoch": 0.1892952157640799, - "grad_norm": 1.445066771367021, - "learning_rate": 3.7397396292425966e-06, - "loss": 0.9569, - "step": 2099 - }, - { - "epoch": 0.18938539928755016, - "grad_norm": 1.6886435978234042, - "learning_rate": 3.7394513748081105e-06, - "loss": 0.9573, - "step": 2100 - }, - { - "epoch": 0.18947558281102042, - "grad_norm": 1.425491178085876, - "learning_rate": 3.7391629719540418e-06, - "loss": 1.0062, - "step": 2101 - }, - { - "epoch": 0.18956576633449068, - "grad_norm": 1.6873246561010924, - "learning_rate": 3.7388744207049998e-06, - "loss": 0.9678, - "step": 2102 - }, - { - "epoch": 0.18965594985796094, - "grad_norm": 1.7680491457404515, - "learning_rate": 3.7385857210856027e-06, - "loss": 0.9551, - "step": 2103 - }, - { - "epoch": 0.1897461333814312, - "grad_norm": 1.378118879170568, - "learning_rate": 3.738296873120486e-06, - "loss": 0.9612, - "step": 2104 - }, - { - "epoch": 0.18983631690490146, - "grad_norm": 2.272601416310879, - "learning_rate": 3.7380078768342955e-06, - "loss": 0.9607, - "step": 2105 - }, - { - "epoch": 0.18992650042837173, - "grad_norm": 1.6103820612825954, - "learning_rate": 3.7377187322516895e-06, - "loss": 1.0029, - "step": 2106 - }, - { - "epoch": 0.19001668395184199, - "grad_norm": 1.48133667639954, - "learning_rate": 3.7374294393973395e-06, - "loss": 0.9373, - "step": 2107 - }, - { - "epoch": 0.19010686747531225, - "grad_norm": 1.490320125727502, - "learning_rate": 3.7371399982959294e-06, - "loss": 1.0368, - "step": 2108 - }, - { - "epoch": 0.19019705099878254, - "grad_norm": 1.5812528979610379, - "learning_rate": 3.7368504089721565e-06, - "loss": 0.8909, - "step": 2109 - }, - { - "epoch": 0.1902872345222528, - "grad_norm": 1.339698805907024, - "learning_rate": 3.73656067145073e-06, - "loss": 0.9258, - "step": 2110 - }, - { - "epoch": 0.19037741804572306, - "grad_norm": 1.6020920040985653, - "learning_rate": 3.736270785756371e-06, - "loss": 1.0215, - "step": 2111 - }, - { - "epoch": 0.19046760156919332, - "grad_norm": 1.8294875455121196, - "learning_rate": 3.7359807519138156e-06, - "loss": 0.9786, - "step": 2112 - }, - { - "epoch": 0.19055778509266358, - "grad_norm": 1.266551961640838, - "learning_rate": 3.73569056994781e-06, - "loss": 0.9762, - "step": 2113 - }, - { - "epoch": 0.19064796861613384, - "grad_norm": 1.7792964060141052, - "learning_rate": 3.7354002398831144e-06, - "loss": 1.0174, - "step": 2114 - }, - { - "epoch": 0.1907381521396041, - "grad_norm": 1.8835469313269013, - "learning_rate": 3.7351097617445015e-06, - "loss": 0.9729, - "step": 2115 - }, - { - "epoch": 0.19082833566307436, - "grad_norm": 0.9809344883897141, - "learning_rate": 3.7348191355567567e-06, - "loss": 0.8468, - "step": 2116 - }, - { - "epoch": 0.19091851918654462, - "grad_norm": 1.5484154477363148, - "learning_rate": 3.734528361344677e-06, - "loss": 1.0037, - "step": 2117 - }, - { - "epoch": 0.19100870271001488, - "grad_norm": 1.7220889171500893, - "learning_rate": 3.734237439133074e-06, - "loss": 0.9698, - "step": 2118 - }, - { - "epoch": 0.19109888623348514, - "grad_norm": 1.586870642434831, - "learning_rate": 3.7339463689467702e-06, - "loss": 1.0231, - "step": 2119 - }, - { - "epoch": 0.1911890697569554, - "grad_norm": 1.4548570708241753, - "learning_rate": 3.733655150810601e-06, - "loss": 0.9446, - "step": 2120 - }, - { - "epoch": 0.19127925328042567, - "grad_norm": 1.4709414102904725, - "learning_rate": 3.7333637847494154e-06, - "loss": 1.1067, - "step": 2121 - }, - { - "epoch": 0.19136943680389593, - "grad_norm": 1.4407370236216361, - "learning_rate": 3.7330722707880734e-06, - "loss": 1.133, - "step": 2122 - }, - { - "epoch": 0.1914596203273662, - "grad_norm": 1.4799200566068234, - "learning_rate": 3.7327806089514497e-06, - "loss": 0.8888, - "step": 2123 - }, - { - "epoch": 0.19154980385083645, - "grad_norm": 1.863434215480706, - "learning_rate": 3.7324887992644297e-06, - "loss": 1.0728, - "step": 2124 - }, - { - "epoch": 0.1916399873743067, - "grad_norm": 0.8513939366926521, - "learning_rate": 3.7321968417519123e-06, - "loss": 0.8057, - "step": 2125 - }, - { - "epoch": 0.19173017089777697, - "grad_norm": 1.3618850772759683, - "learning_rate": 3.7319047364388097e-06, - "loss": 0.9645, - "step": 2126 - }, - { - "epoch": 0.19182035442124723, - "grad_norm": 1.000048159393315, - "learning_rate": 3.7316124833500453e-06, - "loss": 0.7902, - "step": 2127 - }, - { - "epoch": 0.1919105379447175, - "grad_norm": 1.307914033546645, - "learning_rate": 3.731320082510556e-06, - "loss": 0.9586, - "step": 2128 - }, - { - "epoch": 0.19200072146818775, - "grad_norm": 1.4483198066638727, - "learning_rate": 3.7310275339452906e-06, - "loss": 0.9985, - "step": 2129 - }, - { - "epoch": 0.192090904991658, - "grad_norm": 1.8376404351785731, - "learning_rate": 3.7307348376792113e-06, - "loss": 0.9643, - "step": 2130 - }, - { - "epoch": 0.19218108851512827, - "grad_norm": 1.345931522381796, - "learning_rate": 3.730441993737292e-06, - "loss": 0.9648, - "step": 2131 - }, - { - "epoch": 0.19227127203859856, - "grad_norm": 1.6357915250255262, - "learning_rate": 3.7301490021445205e-06, - "loss": 0.9598, - "step": 2132 - }, - { - "epoch": 0.19236145556206882, - "grad_norm": 7.202820214962489, - "learning_rate": 3.7298558629258966e-06, - "loss": 0.9134, - "step": 2133 - }, - { - "epoch": 0.19245163908553908, - "grad_norm": 1.3548152348576603, - "learning_rate": 3.7295625761064314e-06, - "loss": 1.0162, - "step": 2134 - }, - { - "epoch": 0.19254182260900934, - "grad_norm": 1.8015295364791788, - "learning_rate": 3.7292691417111504e-06, - "loss": 0.9414, - "step": 2135 - }, - { - "epoch": 0.1926320061324796, - "grad_norm": 1.4570908930539397, - "learning_rate": 3.728975559765092e-06, - "loss": 0.9363, - "step": 2136 - }, - { - "epoch": 0.19272218965594987, - "grad_norm": 1.6604564440217293, - "learning_rate": 3.728681830293305e-06, - "loss": 1.0076, - "step": 2137 - }, - { - "epoch": 0.19281237317942013, - "grad_norm": 1.542015766449118, - "learning_rate": 3.7283879533208523e-06, - "loss": 0.9318, - "step": 2138 - }, - { - "epoch": 0.1929025567028904, - "grad_norm": 1.4780471039261776, - "learning_rate": 3.7280939288728094e-06, - "loss": 0.951, - "step": 2139 - }, - { - "epoch": 0.19299274022636065, - "grad_norm": 1.5521381889053874, - "learning_rate": 3.7277997569742637e-06, - "loss": 0.9432, - "step": 2140 - }, - { - "epoch": 0.1930829237498309, - "grad_norm": 2.09935000214794, - "learning_rate": 3.7275054376503155e-06, - "loss": 0.9329, - "step": 2141 - }, - { - "epoch": 0.19317310727330117, - "grad_norm": 1.6976869189761317, - "learning_rate": 3.7272109709260783e-06, - "loss": 0.9175, - "step": 2142 - }, - { - "epoch": 0.19326329079677143, - "grad_norm": 0.7975120615869189, - "learning_rate": 3.7269163568266774e-06, - "loss": 0.812, - "step": 2143 - }, - { - "epoch": 0.1933534743202417, - "grad_norm": 1.693408864144008, - "learning_rate": 3.7266215953772512e-06, - "loss": 0.9565, - "step": 2144 - }, - { - "epoch": 0.19344365784371195, - "grad_norm": 1.628640791305026, - "learning_rate": 3.7263266866029492e-06, - "loss": 1.0088, - "step": 2145 - }, - { - "epoch": 0.1935338413671822, - "grad_norm": 1.3367818651398349, - "learning_rate": 3.726031630528936e-06, - "loss": 0.9361, - "step": 2146 - }, - { - "epoch": 0.19362402489065247, - "grad_norm": 1.2934328381532028, - "learning_rate": 3.7257364271803865e-06, - "loss": 0.8652, - "step": 2147 - }, - { - "epoch": 0.19371420841412273, - "grad_norm": 1.8663980898193708, - "learning_rate": 3.7254410765824896e-06, - "loss": 0.9047, - "step": 2148 - }, - { - "epoch": 0.193804391937593, - "grad_norm": 1.7073200601292444, - "learning_rate": 3.725145578760446e-06, - "loss": 0.8901, - "step": 2149 - }, - { - "epoch": 0.19389457546106326, - "grad_norm": 1.9152431661336773, - "learning_rate": 3.7248499337394696e-06, - "loss": 0.8844, - "step": 2150 - }, - { - "epoch": 0.19398475898453352, - "grad_norm": 1.8288809362694125, - "learning_rate": 3.7245541415447848e-06, - "loss": 1.0672, - "step": 2151 - }, - { - "epoch": 0.19407494250800378, - "grad_norm": 1.5282012015535813, - "learning_rate": 3.724258202201633e-06, - "loss": 0.9952, - "step": 2152 - }, - { - "epoch": 0.19416512603147404, - "grad_norm": 1.4694828883340034, - "learning_rate": 3.7239621157352633e-06, - "loss": 0.9928, - "step": 2153 - }, - { - "epoch": 0.1942553095549443, - "grad_norm": 1.481817270455237, - "learning_rate": 3.7236658821709403e-06, - "loss": 0.9592, - "step": 2154 - }, - { - "epoch": 0.19434549307841456, - "grad_norm": 1.2947559279125487, - "learning_rate": 3.7233695015339404e-06, - "loss": 1.0147, - "step": 2155 - }, - { - "epoch": 0.19443567660188485, - "grad_norm": 1.5959822785246922, - "learning_rate": 3.7230729738495513e-06, - "loss": 0.9556, - "step": 2156 - }, - { - "epoch": 0.1945258601253551, - "grad_norm": 1.3140608271566623, - "learning_rate": 3.722776299143075e-06, - "loss": 0.9679, - "step": 2157 - }, - { - "epoch": 0.19461604364882537, - "grad_norm": 0.9669637208561316, - "learning_rate": 3.722479477439826e-06, - "loss": 0.8162, - "step": 2158 - }, - { - "epoch": 0.19470622717229563, - "grad_norm": 1.32745124332238, - "learning_rate": 3.7221825087651306e-06, - "loss": 0.9494, - "step": 2159 - }, - { - "epoch": 0.1947964106957659, - "grad_norm": 1.8808002403208823, - "learning_rate": 3.7218853931443274e-06, - "loss": 0.9032, - "step": 2160 - }, - { - "epoch": 0.19488659421923615, - "grad_norm": 1.2647969869318378, - "learning_rate": 3.721588130602768e-06, - "loss": 0.9106, - "step": 2161 - }, - { - "epoch": 0.19497677774270641, - "grad_norm": 1.642025812756953, - "learning_rate": 3.7212907211658164e-06, - "loss": 0.9465, - "step": 2162 - }, - { - "epoch": 0.19506696126617667, - "grad_norm": 1.3874068151576342, - "learning_rate": 3.72099316485885e-06, - "loss": 0.9285, - "step": 2163 - }, - { - "epoch": 0.19515714478964694, - "grad_norm": 1.3961569235436955, - "learning_rate": 3.720695461707256e-06, - "loss": 1.0154, - "step": 2164 - }, - { - "epoch": 0.1952473283131172, - "grad_norm": 1.575448892595549, - "learning_rate": 3.7203976117364383e-06, - "loss": 0.9957, - "step": 2165 - }, - { - "epoch": 0.19533751183658746, - "grad_norm": 1.7711176550505097, - "learning_rate": 3.7200996149718105e-06, - "loss": 0.9473, - "step": 2166 - }, - { - "epoch": 0.19542769536005772, - "grad_norm": 1.1885953168381038, - "learning_rate": 3.7198014714387985e-06, - "loss": 0.9587, - "step": 2167 - }, - { - "epoch": 0.19551787888352798, - "grad_norm": 1.526440599742098, - "learning_rate": 3.7195031811628422e-06, - "loss": 0.9022, - "step": 2168 - }, - { - "epoch": 0.19560806240699824, - "grad_norm": 1.9643617801057571, - "learning_rate": 3.719204744169393e-06, - "loss": 1.0081, - "step": 2169 - }, - { - "epoch": 0.1956982459304685, - "grad_norm": 1.5105819962440665, - "learning_rate": 3.718906160483916e-06, - "loss": 0.9432, - "step": 2170 - }, - { - "epoch": 0.19578842945393876, - "grad_norm": 1.5070703290793637, - "learning_rate": 3.7186074301318868e-06, - "loss": 0.941, - "step": 2171 - }, - { - "epoch": 0.19587861297740902, - "grad_norm": 1.3371280473836817, - "learning_rate": 3.7183085531387957e-06, - "loss": 1.0419, - "step": 2172 - }, - { - "epoch": 0.19596879650087928, - "grad_norm": 1.4391787304986488, - "learning_rate": 3.7180095295301443e-06, - "loss": 0.9527, - "step": 2173 - }, - { - "epoch": 0.19605898002434954, - "grad_norm": 1.155078294255409, - "learning_rate": 3.7177103593314465e-06, - "loss": 1.0317, - "step": 2174 - }, - { - "epoch": 0.1961491635478198, - "grad_norm": 1.4854950543979017, - "learning_rate": 3.7174110425682297e-06, - "loss": 1.0012, - "step": 2175 - }, - { - "epoch": 0.19623934707129007, - "grad_norm": 2.8282940218667503, - "learning_rate": 3.7171115792660333e-06, - "loss": 1.097, - "step": 2176 - }, - { - "epoch": 0.19632953059476033, - "grad_norm": 1.8032397122974844, - "learning_rate": 3.7168119694504083e-06, - "loss": 1.0304, - "step": 2177 - }, - { - "epoch": 0.1964197141182306, - "grad_norm": 1.4756184946100166, - "learning_rate": 3.71651221314692e-06, - "loss": 0.9684, - "step": 2178 - }, - { - "epoch": 0.19650989764170085, - "grad_norm": 1.5049240829482469, - "learning_rate": 3.716212310381145e-06, - "loss": 0.9758, - "step": 2179 - }, - { - "epoch": 0.19660008116517114, - "grad_norm": 1.8507056205114645, - "learning_rate": 3.7159122611786725e-06, - "loss": 1.0169, - "step": 2180 - }, - { - "epoch": 0.1966902646886414, - "grad_norm": 1.6275142146367276, - "learning_rate": 3.7156120655651045e-06, - "loss": 0.9568, - "step": 2181 - }, - { - "epoch": 0.19678044821211166, - "grad_norm": 1.2985582573255838, - "learning_rate": 3.7153117235660553e-06, - "loss": 0.9587, - "step": 2182 - }, - { - "epoch": 0.19687063173558192, - "grad_norm": 0.9495533796816629, - "learning_rate": 3.7150112352071514e-06, - "loss": 0.828, - "step": 2183 - }, - { - "epoch": 0.19696081525905218, - "grad_norm": 1.6138058010532692, - "learning_rate": 3.7147106005140326e-06, - "loss": 0.9393, - "step": 2184 - }, - { - "epoch": 0.19705099878252244, - "grad_norm": 0.997746999223288, - "learning_rate": 3.714409819512351e-06, - "loss": 0.8121, - "step": 2185 - }, - { - "epoch": 0.1971411823059927, - "grad_norm": 1.4913835207054922, - "learning_rate": 3.7141088922277695e-06, - "loss": 0.9988, - "step": 2186 - }, - { - "epoch": 0.19723136582946296, - "grad_norm": 1.9690724668869592, - "learning_rate": 3.7138078186859664e-06, - "loss": 1.0155, - "step": 2187 - }, - { - "epoch": 0.19732154935293322, - "grad_norm": 1.4649273250898147, - "learning_rate": 3.7135065989126303e-06, - "loss": 0.9847, - "step": 2188 - }, - { - "epoch": 0.19741173287640348, - "grad_norm": 1.6508976084279678, - "learning_rate": 3.713205232933463e-06, - "loss": 0.9028, - "step": 2189 - }, - { - "epoch": 0.19750191639987374, - "grad_norm": 1.5704105023979782, - "learning_rate": 3.7129037207741792e-06, - "loss": 0.9841, - "step": 2190 - }, - { - "epoch": 0.197592099923344, - "grad_norm": 1.7546347143941474, - "learning_rate": 3.7126020624605046e-06, - "loss": 0.982, - "step": 2191 - }, - { - "epoch": 0.19768228344681427, - "grad_norm": 1.4803669675599285, - "learning_rate": 3.7123002580181785e-06, - "loss": 0.9254, - "step": 2192 - }, - { - "epoch": 0.19777246697028453, - "grad_norm": 1.294644241144352, - "learning_rate": 3.7119983074729532e-06, - "loss": 1.0431, - "step": 2193 - }, - { - "epoch": 0.1978626504937548, - "grad_norm": 1.565699924648264, - "learning_rate": 3.7116962108505926e-06, - "loss": 1.022, - "step": 2194 - }, - { - "epoch": 0.19795283401722505, - "grad_norm": 1.2985249789409261, - "learning_rate": 3.711393968176873e-06, - "loss": 1.0736, - "step": 2195 - }, - { - "epoch": 0.1980430175406953, - "grad_norm": 1.2502475970145421, - "learning_rate": 3.711091579477584e-06, - "loss": 0.9884, - "step": 2196 - }, - { - "epoch": 0.19813320106416557, - "grad_norm": 1.8159691069239714, - "learning_rate": 3.7107890447785255e-06, - "loss": 0.9949, - "step": 2197 - }, - { - "epoch": 0.19822338458763583, - "grad_norm": 1.6802558824401876, - "learning_rate": 3.710486364105513e-06, - "loss": 1.0265, - "step": 2198 - }, - { - "epoch": 0.1983135681111061, - "grad_norm": 1.6837257988428778, - "learning_rate": 3.7101835374843728e-06, - "loss": 0.9658, - "step": 2199 - }, - { - "epoch": 0.19840375163457635, - "grad_norm": 1.359877964673307, - "learning_rate": 3.7098805649409427e-06, - "loss": 0.9976, - "step": 2200 - }, - { - "epoch": 0.1984939351580466, - "grad_norm": 1.4841881082232908, - "learning_rate": 3.7095774465010748e-06, - "loss": 1.0407, - "step": 2201 - }, - { - "epoch": 0.19858411868151687, - "grad_norm": 1.5981704145713718, - "learning_rate": 3.7092741821906328e-06, - "loss": 0.9621, - "step": 2202 - }, - { - "epoch": 0.19867430220498714, - "grad_norm": 1.7857452689616446, - "learning_rate": 3.708970772035493e-06, - "loss": 0.9543, - "step": 2203 - }, - { - "epoch": 0.19876448572845742, - "grad_norm": 1.36153183757865, - "learning_rate": 3.7086672160615427e-06, - "loss": 0.9419, - "step": 2204 - }, - { - "epoch": 0.19885466925192768, - "grad_norm": 1.3424140143962158, - "learning_rate": 3.7083635142946852e-06, - "loss": 0.957, - "step": 2205 - }, - { - "epoch": 0.19894485277539795, - "grad_norm": 1.442637178982257, - "learning_rate": 3.7080596667608327e-06, - "loss": 1.0031, - "step": 2206 - }, - { - "epoch": 0.1990350362988682, - "grad_norm": 1.3118692426477945, - "learning_rate": 3.707755673485911e-06, - "loss": 1.0169, - "step": 2207 - }, - { - "epoch": 0.19912521982233847, - "grad_norm": 1.611681498301815, - "learning_rate": 3.7074515344958584e-06, - "loss": 0.8906, - "step": 2208 - }, - { - "epoch": 0.19921540334580873, - "grad_norm": 0.9381914132313421, - "learning_rate": 3.707147249816627e-06, - "loss": 0.8352, - "step": 2209 - }, - { - "epoch": 0.199305586869279, - "grad_norm": 1.3893178732964293, - "learning_rate": 3.706842819474178e-06, - "loss": 1.0002, - "step": 2210 - }, - { - "epoch": 0.19939577039274925, - "grad_norm": 1.712031811387427, - "learning_rate": 3.706538243494489e-06, - "loss": 0.9142, - "step": 2211 - }, - { - "epoch": 0.1994859539162195, - "grad_norm": 1.8649790324446358, - "learning_rate": 3.706233521903547e-06, - "loss": 0.9492, - "step": 2212 - }, - { - "epoch": 0.19957613743968977, - "grad_norm": 1.7260182070782948, - "learning_rate": 3.705928654727353e-06, - "loss": 1.056, - "step": 2213 - }, - { - "epoch": 0.19966632096316003, - "grad_norm": 1.4237842526891378, - "learning_rate": 3.7056236419919195e-06, - "loss": 0.9441, - "step": 2214 - }, - { - "epoch": 0.1997565044866303, - "grad_norm": 1.4029513772028412, - "learning_rate": 3.705318483723273e-06, - "loss": 1.029, - "step": 2215 - }, - { - "epoch": 0.19984668801010055, - "grad_norm": 1.6447487005508978, - "learning_rate": 3.7050131799474493e-06, - "loss": 0.9531, - "step": 2216 - }, - { - "epoch": 0.19993687153357081, - "grad_norm": 1.3991806357350776, - "learning_rate": 3.7047077306905e-06, - "loss": 0.9729, - "step": 2217 - }, - { - "epoch": 0.20002705505704108, - "grad_norm": 1.3568748195091067, - "learning_rate": 3.704402135978488e-06, - "loss": 1.0152, - "step": 2218 - }, - { - "epoch": 0.20011723858051134, - "grad_norm": 2.0005398260193425, - "learning_rate": 3.7040963958374877e-06, - "loss": 0.9979, - "step": 2219 - }, - { - "epoch": 0.2002074221039816, - "grad_norm": 1.3502884962742114, - "learning_rate": 3.7037905102935864e-06, - "loss": 1.0053, - "step": 2220 - }, - { - "epoch": 0.20029760562745186, - "grad_norm": 1.2993414531302039, - "learning_rate": 3.7034844793728837e-06, - "loss": 0.975, - "step": 2221 - }, - { - "epoch": 0.20038778915092212, - "grad_norm": 1.3270582964265598, - "learning_rate": 3.7031783031014933e-06, - "loss": 0.8657, - "step": 2222 - }, - { - "epoch": 0.20047797267439238, - "grad_norm": 1.409194956780688, - "learning_rate": 3.702871981505538e-06, - "loss": 0.9525, - "step": 2223 - }, - { - "epoch": 0.20056815619786264, - "grad_norm": 1.4126399772625764, - "learning_rate": 3.7025655146111563e-06, - "loss": 0.9094, - "step": 2224 - }, - { - "epoch": 0.2006583397213329, - "grad_norm": 0.730567884603044, - "learning_rate": 3.702258902444497e-06, - "loss": 0.8915, - "step": 2225 - }, - { - "epoch": 0.20074852324480316, - "grad_norm": 1.3814956084190921, - "learning_rate": 3.701952145031722e-06, - "loss": 1.0293, - "step": 2226 - }, - { - "epoch": 0.20083870676827342, - "grad_norm": 1.4855862945534546, - "learning_rate": 3.701645242399005e-06, - "loss": 1.0538, - "step": 2227 - }, - { - "epoch": 0.2009288902917437, - "grad_norm": 1.6160021874724586, - "learning_rate": 3.701338194572533e-06, - "loss": 1.0471, - "step": 2228 - }, - { - "epoch": 0.20101907381521397, - "grad_norm": 1.3552575290360316, - "learning_rate": 3.7010310015785056e-06, - "loss": 0.9615, - "step": 2229 - }, - { - "epoch": 0.20110925733868423, - "grad_norm": 1.6264090664454671, - "learning_rate": 3.700723663443134e-06, - "loss": 0.9174, - "step": 2230 - }, - { - "epoch": 0.2011994408621545, - "grad_norm": 0.9832790571255299, - "learning_rate": 3.7004161801926416e-06, - "loss": 0.814, - "step": 2231 - }, - { - "epoch": 0.20128962438562475, - "grad_norm": 1.5653294693629995, - "learning_rate": 3.7001085518532643e-06, - "loss": 0.9967, - "step": 2232 - }, - { - "epoch": 0.20137980790909502, - "grad_norm": 1.5225993663013362, - "learning_rate": 3.6998007784512515e-06, - "loss": 0.9726, - "step": 2233 - }, - { - "epoch": 0.20146999143256528, - "grad_norm": 1.410585755430618, - "learning_rate": 3.6994928600128637e-06, - "loss": 0.8561, - "step": 2234 - }, - { - "epoch": 0.20156017495603554, - "grad_norm": 1.5342193928612422, - "learning_rate": 3.6991847965643742e-06, - "loss": 0.9476, - "step": 2235 - }, - { - "epoch": 0.2016503584795058, - "grad_norm": 1.4527072100806186, - "learning_rate": 3.698876588132068e-06, - "loss": 0.9347, - "step": 2236 - }, - { - "epoch": 0.20174054200297606, - "grad_norm": 1.2934109948542274, - "learning_rate": 3.6985682347422446e-06, - "loss": 0.9282, - "step": 2237 - }, - { - "epoch": 0.20183072552644632, - "grad_norm": 1.525814998661696, - "learning_rate": 3.698259736421213e-06, - "loss": 0.9836, - "step": 2238 - }, - { - "epoch": 0.20192090904991658, - "grad_norm": 1.6277280063564066, - "learning_rate": 3.697951093195297e-06, - "loss": 0.9508, - "step": 2239 - }, - { - "epoch": 0.20201109257338684, - "grad_norm": 1.348724472610395, - "learning_rate": 3.6976423050908307e-06, - "loss": 0.9443, - "step": 2240 - }, - { - "epoch": 0.2021012760968571, - "grad_norm": 1.3832346385004244, - "learning_rate": 3.697333372134163e-06, - "loss": 0.9632, - "step": 2241 - }, - { - "epoch": 0.20219145962032736, - "grad_norm": 1.7524512017784883, - "learning_rate": 3.697024294351653e-06, - "loss": 1.0714, - "step": 2242 - }, - { - "epoch": 0.20228164314379762, - "grad_norm": 2.2678747122805984, - "learning_rate": 3.696715071769672e-06, - "loss": 0.9344, - "step": 2243 - }, - { - "epoch": 0.20237182666726788, - "grad_norm": 1.4901210996840601, - "learning_rate": 3.696405704414606e-06, - "loss": 0.9753, - "step": 2244 - }, - { - "epoch": 0.20246201019073815, - "grad_norm": 1.362457507459544, - "learning_rate": 3.6960961923128514e-06, - "loss": 1.0743, - "step": 2245 - }, - { - "epoch": 0.2025521937142084, - "grad_norm": 1.9125376953199895, - "learning_rate": 3.6957865354908177e-06, - "loss": 0.9156, - "step": 2246 - }, - { - "epoch": 0.20264237723767867, - "grad_norm": 1.8321824001271858, - "learning_rate": 3.6954767339749262e-06, - "loss": 0.9906, - "step": 2247 - }, - { - "epoch": 0.20273256076114893, - "grad_norm": 1.776915446225987, - "learning_rate": 3.6951667877916113e-06, - "loss": 0.9551, - "step": 2248 - }, - { - "epoch": 0.2028227442846192, - "grad_norm": 1.4659582348920293, - "learning_rate": 3.694856696967319e-06, - "loss": 0.9833, - "step": 2249 - }, - { - "epoch": 0.20291292780808945, - "grad_norm": 1.574408205052449, - "learning_rate": 3.6945464615285077e-06, - "loss": 0.9581, - "step": 2250 - }, - { - "epoch": 0.2030031113315597, - "grad_norm": 1.0247225665077557, - "learning_rate": 3.694236081501648e-06, - "loss": 0.8997, - "step": 2251 - }, - { - "epoch": 0.20309329485503, - "grad_norm": 1.5021838344579248, - "learning_rate": 3.6939255569132246e-06, - "loss": 0.9382, - "step": 2252 - }, - { - "epoch": 0.20318347837850026, - "grad_norm": 0.9197071902117727, - "learning_rate": 3.693614887789733e-06, - "loss": 0.8542, - "step": 2253 - }, - { - "epoch": 0.20327366190197052, - "grad_norm": 0.802922321973389, - "learning_rate": 3.69330407415768e-06, - "loss": 0.8514, - "step": 2254 - }, - { - "epoch": 0.20336384542544078, - "grad_norm": 1.5315219092774357, - "learning_rate": 3.6929931160435867e-06, - "loss": 0.9776, - "step": 2255 - }, - { - "epoch": 0.20345402894891104, - "grad_norm": 1.4876783857049962, - "learning_rate": 3.6926820134739858e-06, - "loss": 1.0244, - "step": 2256 - }, - { - "epoch": 0.2035442124723813, - "grad_norm": 1.4154103636393083, - "learning_rate": 3.692370766475422e-06, - "loss": 0.9121, - "step": 2257 - }, - { - "epoch": 0.20363439599585156, - "grad_norm": 1.4894330231205224, - "learning_rate": 3.692059375074453e-06, - "loss": 1.0564, - "step": 2258 - }, - { - "epoch": 0.20372457951932182, - "grad_norm": 1.3100647814217456, - "learning_rate": 3.6917478392976475e-06, - "loss": 1.0341, - "step": 2259 - }, - { - "epoch": 0.20381476304279209, - "grad_norm": 1.691229196492409, - "learning_rate": 3.691436159171589e-06, - "loss": 1.0296, - "step": 2260 - }, - { - "epoch": 0.20390494656626235, - "grad_norm": 1.62590243883692, - "learning_rate": 3.6911243347228703e-06, - "loss": 0.952, - "step": 2261 - }, - { - "epoch": 0.2039951300897326, - "grad_norm": 0.6993372939226145, - "learning_rate": 3.690812365978099e-06, - "loss": 0.7507, - "step": 2262 - }, - { - "epoch": 0.20408531361320287, - "grad_norm": 1.3146004898878236, - "learning_rate": 3.690500252963893e-06, - "loss": 0.9335, - "step": 2263 - }, - { - "epoch": 0.20417549713667313, - "grad_norm": 1.3880613892471052, - "learning_rate": 3.6901879957068846e-06, - "loss": 0.9618, - "step": 2264 - }, - { - "epoch": 0.2042656806601434, - "grad_norm": 1.4421076528281727, - "learning_rate": 3.689875594233717e-06, - "loss": 1.0614, - "step": 2265 - }, - { - "epoch": 0.20435586418361365, - "grad_norm": 1.993743169408117, - "learning_rate": 3.689563048571046e-06, - "loss": 0.9483, - "step": 2266 - }, - { - "epoch": 0.2044460477070839, - "grad_norm": 1.324948540174107, - "learning_rate": 3.6892503587455395e-06, - "loss": 0.9708, - "step": 2267 - }, - { - "epoch": 0.20453623123055417, - "grad_norm": 1.4852738720642134, - "learning_rate": 3.6889375247838766e-06, - "loss": 0.9806, - "step": 2268 - }, - { - "epoch": 0.20462641475402443, - "grad_norm": 1.3712612386710643, - "learning_rate": 3.688624546712753e-06, - "loss": 0.969, - "step": 2269 - }, - { - "epoch": 0.2047165982774947, - "grad_norm": 1.331500397894496, - "learning_rate": 3.688311424558871e-06, - "loss": 0.9303, - "step": 2270 - }, - { - "epoch": 0.20480678180096495, - "grad_norm": 1.9994066669595263, - "learning_rate": 3.6879981583489496e-06, - "loss": 0.8782, - "step": 2271 - }, - { - "epoch": 0.20489696532443522, - "grad_norm": 1.4043969979492323, - "learning_rate": 3.687684748109718e-06, - "loss": 0.9876, - "step": 2272 - }, - { - "epoch": 0.20498714884790548, - "grad_norm": 1.6905752347436696, - "learning_rate": 3.6873711938679174e-06, - "loss": 1.0297, - "step": 2273 - }, - { - "epoch": 0.20507733237137574, - "grad_norm": 1.431462254673156, - "learning_rate": 3.6870574956503027e-06, - "loss": 0.9549, - "step": 2274 - }, - { - "epoch": 0.20516751589484603, - "grad_norm": 1.6832511535577124, - "learning_rate": 3.68674365348364e-06, - "loss": 0.97, - "step": 2275 - }, - { - "epoch": 0.2052576994183163, - "grad_norm": 1.5574579139521876, - "learning_rate": 3.6864296673947086e-06, - "loss": 0.9722, - "step": 2276 - }, - { - "epoch": 0.20534788294178655, - "grad_norm": 1.395661096716797, - "learning_rate": 3.686115537410298e-06, - "loss": 0.9563, - "step": 2277 - }, - { - "epoch": 0.2054380664652568, - "grad_norm": 1.505767859066579, - "learning_rate": 3.685801263557214e-06, - "loss": 0.9236, - "step": 2278 - }, - { - "epoch": 0.20552824998872707, - "grad_norm": 1.5329891371370112, - "learning_rate": 3.68548684586227e-06, - "loss": 1.0539, - "step": 2279 - }, - { - "epoch": 0.20561843351219733, - "grad_norm": 1.5677327843805, - "learning_rate": 3.685172284352295e-06, - "loss": 0.9756, - "step": 2280 - }, - { - "epoch": 0.2057086170356676, - "grad_norm": 1.1377006563380399, - "learning_rate": 3.684857579054128e-06, - "loss": 0.9077, - "step": 2281 - }, - { - "epoch": 0.20579880055913785, - "grad_norm": 1.7859653500765893, - "learning_rate": 3.6845427299946233e-06, - "loss": 1.034, - "step": 2282 - }, - { - "epoch": 0.2058889840826081, - "grad_norm": 1.4290323058975358, - "learning_rate": 3.6842277372006434e-06, - "loss": 1.068, - "step": 2283 - }, - { - "epoch": 0.20597916760607837, - "grad_norm": 1.4220996721105705, - "learning_rate": 3.6839126006990664e-06, - "loss": 0.95, - "step": 2284 - }, - { - "epoch": 0.20606935112954863, - "grad_norm": 1.2968422299576576, - "learning_rate": 3.6835973205167818e-06, - "loss": 1.0228, - "step": 2285 - }, - { - "epoch": 0.2061595346530189, - "grad_norm": 1.5559664119341197, - "learning_rate": 3.6832818966806904e-06, - "loss": 1.0072, - "step": 2286 - }, - { - "epoch": 0.20624971817648916, - "grad_norm": 1.564792477191012, - "learning_rate": 3.682966329217706e-06, - "loss": 1.0481, - "step": 2287 - }, - { - "epoch": 0.20633990169995942, - "grad_norm": 1.7099602403535843, - "learning_rate": 3.6826506181547543e-06, - "loss": 1.006, - "step": 2288 - }, - { - "epoch": 0.20643008522342968, - "grad_norm": 1.4805746317420412, - "learning_rate": 3.682334763518774e-06, - "loss": 0.9899, - "step": 2289 - }, - { - "epoch": 0.20652026874689994, - "grad_norm": 1.823498863619426, - "learning_rate": 3.6820187653367158e-06, - "loss": 0.9436, - "step": 2290 - }, - { - "epoch": 0.2066104522703702, - "grad_norm": 1.428870045582339, - "learning_rate": 3.6817026236355412e-06, - "loss": 0.8715, - "step": 2291 - }, - { - "epoch": 0.20670063579384046, - "grad_norm": 1.8578259723453965, - "learning_rate": 3.681386338442227e-06, - "loss": 0.965, - "step": 2292 - }, - { - "epoch": 0.20679081931731072, - "grad_norm": 1.374395844638436, - "learning_rate": 3.681069909783758e-06, - "loss": 0.9459, - "step": 2293 - }, - { - "epoch": 0.20688100284078098, - "grad_norm": 1.4244734945887512, - "learning_rate": 3.680753337687136e-06, - "loss": 0.9818, - "step": 2294 - }, - { - "epoch": 0.20697118636425124, - "grad_norm": 1.4908550764658677, - "learning_rate": 3.680436622179371e-06, - "loss": 0.9077, - "step": 2295 - }, - { - "epoch": 0.2070613698877215, - "grad_norm": 1.4754939173667092, - "learning_rate": 3.680119763287488e-06, - "loss": 0.9583, - "step": 2296 - }, - { - "epoch": 0.20715155341119176, - "grad_norm": 1.540959926512428, - "learning_rate": 3.6798027610385227e-06, - "loss": 0.9131, - "step": 2297 - }, - { - "epoch": 0.20724173693466202, - "grad_norm": 1.6680500805789915, - "learning_rate": 3.6794856154595235e-06, - "loss": 1.0215, - "step": 2298 - }, - { - "epoch": 0.2073319204581323, - "grad_norm": 1.8267474078458503, - "learning_rate": 3.6791683265775506e-06, - "loss": 0.9815, - "step": 2299 - }, - { - "epoch": 0.20742210398160257, - "grad_norm": 1.6898795411106542, - "learning_rate": 3.6788508944196773e-06, - "loss": 0.9369, - "step": 2300 - }, - { - "epoch": 0.20751228750507283, - "grad_norm": 1.2751537529484227, - "learning_rate": 3.678533319012989e-06, - "loss": 1.0402, - "step": 2301 - }, - { - "epoch": 0.2076024710285431, - "grad_norm": 1.407826324293403, - "learning_rate": 3.6782156003845826e-06, - "loss": 0.9275, - "step": 2302 - }, - { - "epoch": 0.20769265455201336, - "grad_norm": 0.8799155310708882, - "learning_rate": 3.6778977385615676e-06, - "loss": 0.824, - "step": 2303 - }, - { - "epoch": 0.20778283807548362, - "grad_norm": 1.4236637643313954, - "learning_rate": 3.6775797335710656e-06, - "loss": 1.0153, - "step": 2304 - }, - { - "epoch": 0.20787302159895388, - "grad_norm": 1.6248474049446824, - "learning_rate": 3.6772615854402105e-06, - "loss": 0.9246, - "step": 2305 - }, - { - "epoch": 0.20796320512242414, - "grad_norm": 1.2519290344491387, - "learning_rate": 3.6769432941961487e-06, - "loss": 0.8504, - "step": 2306 - }, - { - "epoch": 0.2080533886458944, - "grad_norm": 1.4166803733312199, - "learning_rate": 3.676624859866038e-06, - "loss": 0.9701, - "step": 2307 - }, - { - "epoch": 0.20814357216936466, - "grad_norm": 2.230341865952342, - "learning_rate": 3.67630628247705e-06, - "loss": 0.8989, - "step": 2308 - }, - { - "epoch": 0.20823375569283492, - "grad_norm": 1.4668933415089045, - "learning_rate": 3.675987562056367e-06, - "loss": 1.0276, - "step": 2309 - }, - { - "epoch": 0.20832393921630518, - "grad_norm": 1.3600664243885607, - "learning_rate": 3.675668698631184e-06, - "loss": 1.0039, - "step": 2310 - }, - { - "epoch": 0.20841412273977544, - "grad_norm": 1.4905571944131815, - "learning_rate": 3.675349692228708e-06, - "loss": 1.0431, - "step": 2311 - }, - { - "epoch": 0.2085043062632457, - "grad_norm": 1.442354464139373, - "learning_rate": 3.6750305428761578e-06, - "loss": 0.9377, - "step": 2312 - }, - { - "epoch": 0.20859448978671596, - "grad_norm": 1.757125516799272, - "learning_rate": 3.674711250600766e-06, - "loss": 0.9908, - "step": 2313 - }, - { - "epoch": 0.20868467331018623, - "grad_norm": 0.7801152571473343, - "learning_rate": 3.6743918154297765e-06, - "loss": 0.7917, - "step": 2314 - }, - { - "epoch": 0.20877485683365649, - "grad_norm": 1.4262804803424736, - "learning_rate": 3.6740722373904446e-06, - "loss": 0.9597, - "step": 2315 - }, - { - "epoch": 0.20886504035712675, - "grad_norm": 1.2627032900010227, - "learning_rate": 3.6737525165100383e-06, - "loss": 0.9499, - "step": 2316 - }, - { - "epoch": 0.208955223880597, - "grad_norm": 1.526810184559922, - "learning_rate": 3.6734326528158385e-06, - "loss": 0.9955, - "step": 2317 - }, - { - "epoch": 0.20904540740406727, - "grad_norm": 1.262289290265641, - "learning_rate": 3.673112646335138e-06, - "loss": 0.9612, - "step": 2318 - }, - { - "epoch": 0.20913559092753753, - "grad_norm": 1.508223087363439, - "learning_rate": 3.672792497095241e-06, - "loss": 0.9269, - "step": 2319 - }, - { - "epoch": 0.2092257744510078, - "grad_norm": 1.6900811411773777, - "learning_rate": 3.672472205123464e-06, - "loss": 0.9747, - "step": 2320 - }, - { - "epoch": 0.20931595797447805, - "grad_norm": 1.6079150494345527, - "learning_rate": 3.6721517704471363e-06, - "loss": 0.9009, - "step": 2321 - }, - { - "epoch": 0.2094061414979483, - "grad_norm": 1.4209459224775984, - "learning_rate": 3.6718311930936e-06, - "loss": 1.001, - "step": 2322 - }, - { - "epoch": 0.2094963250214186, - "grad_norm": 1.7328599378113787, - "learning_rate": 3.6715104730902074e-06, - "loss": 0.9321, - "step": 2323 - }, - { - "epoch": 0.20958650854488886, - "grad_norm": 1.917952527756466, - "learning_rate": 3.671189610464325e-06, - "loss": 1.0072, - "step": 2324 - }, - { - "epoch": 0.20967669206835912, - "grad_norm": 1.5339492828808134, - "learning_rate": 3.6708686052433303e-06, - "loss": 0.9235, - "step": 2325 - }, - { - "epoch": 0.20976687559182938, - "grad_norm": 1.3491043934735911, - "learning_rate": 3.6705474574546127e-06, - "loss": 0.9842, - "step": 2326 - }, - { - "epoch": 0.20985705911529964, - "grad_norm": 1.4806659335673196, - "learning_rate": 3.670226167125575e-06, - "loss": 0.9874, - "step": 2327 - }, - { - "epoch": 0.2099472426387699, - "grad_norm": 2.0401699522694345, - "learning_rate": 3.6699047342836313e-06, - "loss": 0.9689, - "step": 2328 - }, - { - "epoch": 0.21003742616224017, - "grad_norm": 1.263868690976761, - "learning_rate": 3.669583158956208e-06, - "loss": 1.0122, - "step": 2329 - }, - { - "epoch": 0.21012760968571043, - "grad_norm": 0.7124420008560277, - "learning_rate": 3.669261441170743e-06, - "loss": 0.8279, - "step": 2330 - }, - { - "epoch": 0.2102177932091807, - "grad_norm": 1.4646594122294772, - "learning_rate": 3.668939580954688e-06, - "loss": 0.975, - "step": 2331 - }, - { - "epoch": 0.21030797673265095, - "grad_norm": 1.2160261967999337, - "learning_rate": 3.668617578335506e-06, - "loss": 0.9438, - "step": 2332 - }, - { - "epoch": 0.2103981602561212, - "grad_norm": 1.478064202314409, - "learning_rate": 3.6682954333406707e-06, - "loss": 1.0039, - "step": 2333 - }, - { - "epoch": 0.21048834377959147, - "grad_norm": 2.1723534585323834, - "learning_rate": 3.6679731459976707e-06, - "loss": 1.0995, - "step": 2334 - }, - { - "epoch": 0.21057852730306173, - "grad_norm": 1.3561386959267945, - "learning_rate": 3.6676507163340046e-06, - "loss": 0.9723, - "step": 2335 - }, - { - "epoch": 0.210668710826532, - "grad_norm": 1.5194199234453973, - "learning_rate": 3.6673281443771842e-06, - "loss": 1.0081, - "step": 2336 - }, - { - "epoch": 0.21075889435000225, - "grad_norm": 1.7397017600432854, - "learning_rate": 3.667005430154733e-06, - "loss": 0.8574, - "step": 2337 - }, - { - "epoch": 0.2108490778734725, - "grad_norm": 0.7370343855984633, - "learning_rate": 3.666682573694186e-06, - "loss": 0.896, - "step": 2338 - }, - { - "epoch": 0.21093926139694277, - "grad_norm": 0.6669520174723081, - "learning_rate": 3.6663595750230924e-06, - "loss": 0.7808, - "step": 2339 - }, - { - "epoch": 0.21102944492041303, - "grad_norm": 1.545056362543739, - "learning_rate": 3.666036434169012e-06, - "loss": 1.0233, - "step": 2340 - }, - { - "epoch": 0.2111196284438833, - "grad_norm": 1.3693920563088702, - "learning_rate": 3.665713151159516e-06, - "loss": 0.9548, - "step": 2341 - }, - { - "epoch": 0.21120981196735356, - "grad_norm": 1.5153282897592286, - "learning_rate": 3.665389726022189e-06, - "loss": 1.0729, - "step": 2342 - }, - { - "epoch": 0.21129999549082382, - "grad_norm": 1.4198616781159705, - "learning_rate": 3.6650661587846283e-06, - "loss": 0.9484, - "step": 2343 - }, - { - "epoch": 0.21139017901429408, - "grad_norm": 1.5904899614791406, - "learning_rate": 3.6647424494744418e-06, - "loss": 0.9379, - "step": 2344 - }, - { - "epoch": 0.21148036253776434, - "grad_norm": 1.3012161215293863, - "learning_rate": 3.6644185981192503e-06, - "loss": 1.0243, - "step": 2345 - }, - { - "epoch": 0.2115705460612346, - "grad_norm": 1.7240944047070517, - "learning_rate": 3.6640946047466868e-06, - "loss": 0.9014, - "step": 2346 - }, - { - "epoch": 0.2116607295847049, - "grad_norm": 1.597451119906369, - "learning_rate": 3.6637704693843953e-06, - "loss": 0.9385, - "step": 2347 - }, - { - "epoch": 0.21175091310817515, - "grad_norm": 1.5513076465356022, - "learning_rate": 3.6634461920600337e-06, - "loss": 0.911, - "step": 2348 - }, - { - "epoch": 0.2118410966316454, - "grad_norm": 1.6392867670373419, - "learning_rate": 3.66312177280127e-06, - "loss": 0.9989, - "step": 2349 - }, - { - "epoch": 0.21193128015511567, - "grad_norm": 1.3257298582655754, - "learning_rate": 3.6627972116357872e-06, - "loss": 1.0219, - "step": 2350 - }, - { - "epoch": 0.21202146367858593, - "grad_norm": 1.4437761543844378, - "learning_rate": 3.662472508591278e-06, - "loss": 1.035, - "step": 2351 - }, - { - "epoch": 0.2121116472020562, - "grad_norm": 1.6763162536954548, - "learning_rate": 3.662147663695447e-06, - "loss": 0.968, - "step": 2352 - }, - { - "epoch": 0.21220183072552645, - "grad_norm": 1.4637653908380548, - "learning_rate": 3.6618226769760127e-06, - "loss": 0.9608, - "step": 2353 - }, - { - "epoch": 0.2122920142489967, - "grad_norm": 1.7297786257895202, - "learning_rate": 3.661497548460704e-06, - "loss": 0.8507, - "step": 2354 - }, - { - "epoch": 0.21238219777246697, - "grad_norm": 3.260576205671808, - "learning_rate": 3.6611722781772635e-06, - "loss": 0.9901, - "step": 2355 - }, - { - "epoch": 0.21247238129593723, - "grad_norm": 0.9087821181339625, - "learning_rate": 3.6608468661534444e-06, - "loss": 0.8883, - "step": 2356 - }, - { - "epoch": 0.2125625648194075, - "grad_norm": 4.421958032913546, - "learning_rate": 3.660521312417013e-06, - "loss": 0.8927, - "step": 2357 - }, - { - "epoch": 0.21265274834287776, - "grad_norm": 1.6108890559463203, - "learning_rate": 3.660195616995747e-06, - "loss": 0.9381, - "step": 2358 - }, - { - "epoch": 0.21274293186634802, - "grad_norm": 1.4884402372890486, - "learning_rate": 3.6598697799174367e-06, - "loss": 0.9961, - "step": 2359 - }, - { - "epoch": 0.21283311538981828, - "grad_norm": 1.6466231643989704, - "learning_rate": 3.6595438012098844e-06, - "loss": 1.0329, - "step": 2360 - }, - { - "epoch": 0.21292329891328854, - "grad_norm": 1.3754811312131123, - "learning_rate": 3.6592176809009045e-06, - "loss": 0.9477, - "step": 2361 - }, - { - "epoch": 0.2130134824367588, - "grad_norm": 1.4109879680069177, - "learning_rate": 3.6588914190183227e-06, - "loss": 1.0053, - "step": 2362 - }, - { - "epoch": 0.21310366596022906, - "grad_norm": 1.5889485156083272, - "learning_rate": 3.658565015589978e-06, - "loss": 0.8397, - "step": 2363 - }, - { - "epoch": 0.21319384948369932, - "grad_norm": 1.96386828012308, - "learning_rate": 3.6582384706437217e-06, - "loss": 0.9922, - "step": 2364 - }, - { - "epoch": 0.21328403300716958, - "grad_norm": 1.5241741257963508, - "learning_rate": 3.6579117842074156e-06, - "loss": 0.9883, - "step": 2365 - }, - { - "epoch": 0.21337421653063984, - "grad_norm": 1.630353910955987, - "learning_rate": 3.657584956308934e-06, - "loss": 0.9101, - "step": 2366 - }, - { - "epoch": 0.2134644000541101, - "grad_norm": 1.5851913726510267, - "learning_rate": 3.6572579869761648e-06, - "loss": 1.0335, - "step": 2367 - }, - { - "epoch": 0.21355458357758036, - "grad_norm": 1.7126407593286794, - "learning_rate": 3.6569308762370056e-06, - "loss": 1.0215, - "step": 2368 - }, - { - "epoch": 0.21364476710105063, - "grad_norm": 3.45661163369651, - "learning_rate": 3.6566036241193676e-06, - "loss": 1.0109, - "step": 2369 - }, - { - "epoch": 0.2137349506245209, - "grad_norm": 0.9315540905316627, - "learning_rate": 3.656276230651174e-06, - "loss": 0.8586, - "step": 2370 - }, - { - "epoch": 0.21382513414799117, - "grad_norm": 0.8005626100862767, - "learning_rate": 3.65594869586036e-06, - "loss": 0.8533, - "step": 2371 - }, - { - "epoch": 0.21391531767146144, - "grad_norm": 1.4954423009008024, - "learning_rate": 3.6556210197748724e-06, - "loss": 0.8978, - "step": 2372 - }, - { - "epoch": 0.2140055011949317, - "grad_norm": 1.4566271019428343, - "learning_rate": 3.655293202422671e-06, - "loss": 0.9355, - "step": 2373 - }, - { - "epoch": 0.21409568471840196, - "grad_norm": 1.3367047253959317, - "learning_rate": 3.654965243831725e-06, - "loss": 0.9902, - "step": 2374 - }, - { - "epoch": 0.21418586824187222, - "grad_norm": 1.3654942517573394, - "learning_rate": 3.65463714403002e-06, - "loss": 1.0084, - "step": 2375 - }, - { - "epoch": 0.21427605176534248, - "grad_norm": 1.4449359300130409, - "learning_rate": 3.65430890304555e-06, - "loss": 0.9275, - "step": 2376 - }, - { - "epoch": 0.21436623528881274, - "grad_norm": 1.266114458171093, - "learning_rate": 3.653980520906323e-06, - "loss": 1.0001, - "step": 2377 - }, - { - "epoch": 0.214456418812283, - "grad_norm": 1.3784403675839094, - "learning_rate": 3.653651997640358e-06, - "loss": 0.9565, - "step": 2378 - }, - { - "epoch": 0.21454660233575326, - "grad_norm": 1.4613170436754066, - "learning_rate": 3.653323333275686e-06, - "loss": 0.9726, - "step": 2379 - }, - { - "epoch": 0.21463678585922352, - "grad_norm": 1.457526483311519, - "learning_rate": 3.652994527840351e-06, - "loss": 0.9557, - "step": 2380 - }, - { - "epoch": 0.21472696938269378, - "grad_norm": 1.4526999067139348, - "learning_rate": 3.6526655813624087e-06, - "loss": 0.9327, - "step": 2381 - }, - { - "epoch": 0.21481715290616404, - "grad_norm": 1.1152807650295824, - "learning_rate": 3.652336493869925e-06, - "loss": 0.8315, - "step": 2382 - }, - { - "epoch": 0.2149073364296343, - "grad_norm": 1.4918762201912297, - "learning_rate": 3.6520072653909823e-06, - "loss": 0.9782, - "step": 2383 - }, - { - "epoch": 0.21499751995310457, - "grad_norm": 1.7000972187068626, - "learning_rate": 3.6516778959536702e-06, - "loss": 0.9684, - "step": 2384 - }, - { - "epoch": 0.21508770347657483, - "grad_norm": 1.5719470726159437, - "learning_rate": 3.6513483855860923e-06, - "loss": 1.0264, - "step": 2385 - }, - { - "epoch": 0.2151778870000451, - "grad_norm": 1.6296980509137573, - "learning_rate": 3.6510187343163654e-06, - "loss": 0.9145, - "step": 2386 - }, - { - "epoch": 0.21526807052351535, - "grad_norm": 1.44024148002426, - "learning_rate": 3.650688942172616e-06, - "loss": 1.0014, - "step": 2387 - }, - { - "epoch": 0.2153582540469856, - "grad_norm": 1.5279773851488665, - "learning_rate": 3.650359009182984e-06, - "loss": 0.9109, - "step": 2388 - }, - { - "epoch": 0.21544843757045587, - "grad_norm": 1.6321004382670297, - "learning_rate": 3.650028935375622e-06, - "loss": 1.0607, - "step": 2389 - }, - { - "epoch": 0.21553862109392613, - "grad_norm": 1.430327142095016, - "learning_rate": 3.6496987207786926e-06, - "loss": 0.9449, - "step": 2390 - }, - { - "epoch": 0.2156288046173964, - "grad_norm": 1.7315850026710267, - "learning_rate": 3.6493683654203724e-06, - "loss": 0.9168, - "step": 2391 - }, - { - "epoch": 0.21571898814086665, - "grad_norm": 2.041230200480524, - "learning_rate": 3.6490378693288484e-06, - "loss": 0.9506, - "step": 2392 - }, - { - "epoch": 0.2158091716643369, - "grad_norm": 1.5923197443345454, - "learning_rate": 3.648707232532321e-06, - "loss": 1.0184, - "step": 2393 - }, - { - "epoch": 0.2158993551878072, - "grad_norm": 1.744409418699348, - "learning_rate": 3.6483764550590017e-06, - "loss": 0.9788, - "step": 2394 - }, - { - "epoch": 0.21598953871127746, - "grad_norm": 1.5342435574606852, - "learning_rate": 3.6480455369371133e-06, - "loss": 0.9891, - "step": 2395 - }, - { - "epoch": 0.21607972223474772, - "grad_norm": 1.4039360081443508, - "learning_rate": 3.647714478194893e-06, - "loss": 0.9826, - "step": 2396 - }, - { - "epoch": 0.21616990575821798, - "grad_norm": 1.8051594699146416, - "learning_rate": 3.647383278860588e-06, - "loss": 0.918, - "step": 2397 - }, - { - "epoch": 0.21626008928168824, - "grad_norm": 1.5519033835462124, - "learning_rate": 3.6470519389624587e-06, - "loss": 0.9656, - "step": 2398 - }, - { - "epoch": 0.2163502728051585, - "grad_norm": 1.323484549198387, - "learning_rate": 3.646720458528776e-06, - "loss": 1.0282, - "step": 2399 - }, - { - "epoch": 0.21644045632862877, - "grad_norm": 2.006262273542943, - "learning_rate": 3.6463888375878235e-06, - "loss": 0.9354, - "step": 2400 - }, - { - "epoch": 0.21653063985209903, - "grad_norm": 1.7719814692404499, - "learning_rate": 3.646057076167897e-06, - "loss": 0.9782, - "step": 2401 - }, - { - "epoch": 0.2166208233755693, - "grad_norm": 1.4393887757782133, - "learning_rate": 3.645725174297305e-06, - "loss": 0.9676, - "step": 2402 - }, - { - "epoch": 0.21671100689903955, - "grad_norm": 1.6294641797089962, - "learning_rate": 3.645393132004367e-06, - "loss": 0.9493, - "step": 2403 - }, - { - "epoch": 0.2168011904225098, - "grad_norm": 1.4229456465280121, - "learning_rate": 3.6450609493174135e-06, - "loss": 0.9305, - "step": 2404 - }, - { - "epoch": 0.21689137394598007, - "grad_norm": 1.4028946158512796, - "learning_rate": 3.6447286262647896e-06, - "loss": 0.9356, - "step": 2405 - }, - { - "epoch": 0.21698155746945033, - "grad_norm": 1.5485744196370728, - "learning_rate": 3.64439616287485e-06, - "loss": 0.9455, - "step": 2406 - }, - { - "epoch": 0.2170717409929206, - "grad_norm": 1.2695367431521782, - "learning_rate": 3.644063559175963e-06, - "loss": 0.9397, - "step": 2407 - }, - { - "epoch": 0.21716192451639085, - "grad_norm": 1.4449821300787593, - "learning_rate": 3.6437308151965074e-06, - "loss": 0.957, - "step": 2408 - }, - { - "epoch": 0.2172521080398611, - "grad_norm": 1.2361292867479146, - "learning_rate": 3.643397930964876e-06, - "loss": 0.9818, - "step": 2409 - }, - { - "epoch": 0.21734229156333137, - "grad_norm": 1.6007951340229174, - "learning_rate": 3.6430649065094707e-06, - "loss": 1.0354, - "step": 2410 - }, - { - "epoch": 0.21743247508680164, - "grad_norm": 1.7415819013614549, - "learning_rate": 3.6427317418587086e-06, - "loss": 1.0339, - "step": 2411 - }, - { - "epoch": 0.2175226586102719, - "grad_norm": 1.4542172336621697, - "learning_rate": 3.6423984370410157e-06, - "loss": 0.921, - "step": 2412 - }, - { - "epoch": 0.21761284213374216, - "grad_norm": 1.3834032854907863, - "learning_rate": 3.6420649920848324e-06, - "loss": 0.9341, - "step": 2413 - }, - { - "epoch": 0.21770302565721242, - "grad_norm": 1.2929772321993298, - "learning_rate": 3.6417314070186096e-06, - "loss": 0.9725, - "step": 2414 - }, - { - "epoch": 0.21779320918068268, - "grad_norm": 1.3097531776280456, - "learning_rate": 3.641397681870811e-06, - "loss": 0.976, - "step": 2415 - }, - { - "epoch": 0.21788339270415294, - "grad_norm": 2.880632936016067, - "learning_rate": 3.641063816669911e-06, - "loss": 0.9942, - "step": 2416 - }, - { - "epoch": 0.2179735762276232, - "grad_norm": 1.4817388315437998, - "learning_rate": 3.640729811444398e-06, - "loss": 1.0396, - "step": 2417 - }, - { - "epoch": 0.2180637597510935, - "grad_norm": 1.352590225298685, - "learning_rate": 3.6403956662227706e-06, - "loss": 0.9698, - "step": 2418 - }, - { - "epoch": 0.21815394327456375, - "grad_norm": 1.4793134625443953, - "learning_rate": 3.6400613810335396e-06, - "loss": 1.0323, - "step": 2419 - }, - { - "epoch": 0.218244126798034, - "grad_norm": 1.8888148950906385, - "learning_rate": 3.639726955905228e-06, - "loss": 0.8869, - "step": 2420 - }, - { - "epoch": 0.21833431032150427, - "grad_norm": 1.4932326406494576, - "learning_rate": 3.639392390866372e-06, - "loss": 1.0101, - "step": 2421 - }, - { - "epoch": 0.21842449384497453, - "grad_norm": 1.6340002093367518, - "learning_rate": 3.639057685945517e-06, - "loss": 0.9494, - "step": 2422 - }, - { - "epoch": 0.2185146773684448, - "grad_norm": 1.3301503614171863, - "learning_rate": 3.638722841171223e-06, - "loss": 0.8609, - "step": 2423 - }, - { - "epoch": 0.21860486089191505, - "grad_norm": 18.747606455298083, - "learning_rate": 3.638387856572061e-06, - "loss": 1.0162, - "step": 2424 - }, - { - "epoch": 0.21869504441538531, - "grad_norm": 0.8401517436364443, - "learning_rate": 3.638052732176612e-06, - "loss": 0.8367, - "step": 2425 - }, - { - "epoch": 0.21878522793885558, - "grad_norm": 1.521078386116513, - "learning_rate": 3.637717468013472e-06, - "loss": 0.9756, - "step": 2426 - }, - { - "epoch": 0.21887541146232584, - "grad_norm": 0.8106984927377868, - "learning_rate": 3.6373820641112475e-06, - "loss": 0.7885, - "step": 2427 - }, - { - "epoch": 0.2189655949857961, - "grad_norm": 1.783151180851978, - "learning_rate": 3.6370465204985567e-06, - "loss": 0.993, - "step": 2428 - }, - { - "epoch": 0.21905577850926636, - "grad_norm": 0.760522186239081, - "learning_rate": 3.6367108372040304e-06, - "loss": 0.8578, - "step": 2429 - }, - { - "epoch": 0.21914596203273662, - "grad_norm": 1.4779224087553682, - "learning_rate": 3.6363750142563107e-06, - "loss": 0.9753, - "step": 2430 - }, - { - "epoch": 0.21923614555620688, - "grad_norm": 0.8498283717844846, - "learning_rate": 3.636039051684052e-06, - "loss": 0.8854, - "step": 2431 - }, - { - "epoch": 0.21932632907967714, - "grad_norm": 1.7284745206094292, - "learning_rate": 3.6357029495159203e-06, - "loss": 0.9451, - "step": 2432 - }, - { - "epoch": 0.2194165126031474, - "grad_norm": 1.316811926389918, - "learning_rate": 3.6353667077805934e-06, - "loss": 0.9435, - "step": 2433 - }, - { - "epoch": 0.21950669612661766, - "grad_norm": 1.7202381973781986, - "learning_rate": 3.6350303265067625e-06, - "loss": 1.0196, - "step": 2434 - }, - { - "epoch": 0.21959687965008792, - "grad_norm": 1.398550018387138, - "learning_rate": 3.6346938057231285e-06, - "loss": 0.999, - "step": 2435 - }, - { - "epoch": 0.21968706317355818, - "grad_norm": 1.8242978848726583, - "learning_rate": 3.6343571454584047e-06, - "loss": 0.9156, - "step": 2436 - }, - { - "epoch": 0.21977724669702844, - "grad_norm": 1.4155922723822767, - "learning_rate": 3.6340203457413176e-06, - "loss": 1.0119, - "step": 2437 - }, - { - "epoch": 0.2198674302204987, - "grad_norm": 1.4771297919208644, - "learning_rate": 3.633683406600605e-06, - "loss": 0.9951, - "step": 2438 - }, - { - "epoch": 0.21995761374396897, - "grad_norm": 1.3424953103294368, - "learning_rate": 3.6333463280650165e-06, - "loss": 1.0381, - "step": 2439 - }, - { - "epoch": 0.22004779726743923, - "grad_norm": 0.8201226514292229, - "learning_rate": 3.6330091101633126e-06, - "loss": 0.8059, - "step": 2440 - }, - { - "epoch": 0.2201379807909095, - "grad_norm": 1.74236450664618, - "learning_rate": 3.632671752924267e-06, - "loss": 1.0096, - "step": 2441 - }, - { - "epoch": 0.22022816431437978, - "grad_norm": 1.8674137505118105, - "learning_rate": 3.632334256376665e-06, - "loss": 0.989, - "step": 2442 - }, - { - "epoch": 0.22031834783785004, - "grad_norm": 1.6267409169078484, - "learning_rate": 3.6319966205493044e-06, - "loss": 1.03, - "step": 2443 - }, - { - "epoch": 0.2204085313613203, - "grad_norm": 1.0853242858018324, - "learning_rate": 3.6316588454709922e-06, - "loss": 0.8785, - "step": 2444 - }, - { - "epoch": 0.22049871488479056, - "grad_norm": 1.5659997274610493, - "learning_rate": 3.6313209311705514e-06, - "loss": 0.9252, - "step": 2445 - }, - { - "epoch": 0.22058889840826082, - "grad_norm": 1.6904533115721792, - "learning_rate": 3.6309828776768133e-06, - "loss": 1.009, - "step": 2446 - }, - { - "epoch": 0.22067908193173108, - "grad_norm": 1.8032205407599966, - "learning_rate": 3.630644685018623e-06, - "loss": 0.9331, - "step": 2447 - }, - { - "epoch": 0.22076926545520134, - "grad_norm": 3.7466340535555656, - "learning_rate": 3.6303063532248367e-06, - "loss": 1.0618, - "step": 2448 - }, - { - "epoch": 0.2208594489786716, - "grad_norm": 1.514893896208381, - "learning_rate": 3.6299678823243236e-06, - "loss": 1.0548, - "step": 2449 - }, - { - "epoch": 0.22094963250214186, - "grad_norm": 1.7456580520926244, - "learning_rate": 3.629629272345963e-06, - "loss": 0.8761, - "step": 2450 - }, - { - "epoch": 0.22103981602561212, - "grad_norm": 1.6136002860145144, - "learning_rate": 3.6292905233186468e-06, - "loss": 1.0901, - "step": 2451 - }, - { - "epoch": 0.22112999954908238, - "grad_norm": 1.5046260547867174, - "learning_rate": 3.6289516352712796e-06, - "loss": 0.9411, - "step": 2452 - }, - { - "epoch": 0.22122018307255265, - "grad_norm": 0.854877354598146, - "learning_rate": 3.6286126082327764e-06, - "loss": 0.852, - "step": 2453 - }, - { - "epoch": 0.2213103665960229, - "grad_norm": 1.2996026275432087, - "learning_rate": 3.628273442232066e-06, - "loss": 1.0092, - "step": 2454 - }, - { - "epoch": 0.22140055011949317, - "grad_norm": 1.3706332571856814, - "learning_rate": 3.627934137298087e-06, - "loss": 0.9694, - "step": 2455 - }, - { - "epoch": 0.22149073364296343, - "grad_norm": 1.8499853442874539, - "learning_rate": 3.627594693459792e-06, - "loss": 0.9455, - "step": 2456 - }, - { - "epoch": 0.2215809171664337, - "grad_norm": 1.5339430657505622, - "learning_rate": 3.6272551107461424e-06, - "loss": 0.9397, - "step": 2457 - }, - { - "epoch": 0.22167110068990395, - "grad_norm": 1.43576243332719, - "learning_rate": 3.6269153891861137e-06, - "loss": 0.9592, - "step": 2458 - }, - { - "epoch": 0.2217612842133742, - "grad_norm": 1.352721450572079, - "learning_rate": 3.6265755288086944e-06, - "loss": 0.9494, - "step": 2459 - }, - { - "epoch": 0.22185146773684447, - "grad_norm": 1.684825685466697, - "learning_rate": 3.626235529642881e-06, - "loss": 0.9545, - "step": 2460 - }, - { - "epoch": 0.22194165126031473, - "grad_norm": 1.5347444792562916, - "learning_rate": 3.625895391717686e-06, - "loss": 0.8984, - "step": 2461 - }, - { - "epoch": 0.222031834783785, - "grad_norm": 1.4871769209627967, - "learning_rate": 3.625555115062131e-06, - "loss": 0.9433, - "step": 2462 - }, - { - "epoch": 0.22212201830725525, - "grad_norm": 1.4636139042992706, - "learning_rate": 3.6252146997052507e-06, - "loss": 1.0457, - "step": 2463 - }, - { - "epoch": 0.22221220183072551, - "grad_norm": 1.1507183443630282, - "learning_rate": 3.6248741456760898e-06, - "loss": 0.787, - "step": 2464 - }, - { - "epoch": 0.22230238535419578, - "grad_norm": 1.4568856910630774, - "learning_rate": 3.624533453003708e-06, - "loss": 0.9631, - "step": 2465 - }, - { - "epoch": 0.22239256887766606, - "grad_norm": 1.6824554347899743, - "learning_rate": 3.6241926217171745e-06, - "loss": 0.9878, - "step": 2466 - }, - { - "epoch": 0.22248275240113632, - "grad_norm": 1.7523045352216393, - "learning_rate": 3.6238516518455703e-06, - "loss": 0.8893, - "step": 2467 - }, - { - "epoch": 0.22257293592460659, - "grad_norm": 1.8035714798297362, - "learning_rate": 3.62351054341799e-06, - "loss": 1.0873, - "step": 2468 - }, - { - "epoch": 0.22266311944807685, - "grad_norm": 1.5158785578416312, - "learning_rate": 3.623169296463538e-06, - "loss": 0.9875, - "step": 2469 - }, - { - "epoch": 0.2227533029715471, - "grad_norm": 1.690045379729143, - "learning_rate": 3.6228279110113316e-06, - "loss": 0.9807, - "step": 2470 - }, - { - "epoch": 0.22284348649501737, - "grad_norm": 1.723436589729297, - "learning_rate": 3.6224863870904994e-06, - "loss": 0.984, - "step": 2471 - }, - { - "epoch": 0.22293367001848763, - "grad_norm": 1.512158785707218, - "learning_rate": 3.6221447247301827e-06, - "loss": 0.9828, - "step": 2472 - }, - { - "epoch": 0.2230238535419579, - "grad_norm": 1.401367315099624, - "learning_rate": 3.6218029239595332e-06, - "loss": 0.9258, - "step": 2473 - }, - { - "epoch": 0.22311403706542815, - "grad_norm": 1.4838781227899676, - "learning_rate": 3.621460984807716e-06, - "loss": 1.0008, - "step": 2474 - }, - { - "epoch": 0.2232042205888984, - "grad_norm": 1.6154356972858528, - "learning_rate": 3.621118907303907e-06, - "loss": 0.9474, - "step": 2475 - }, - { - "epoch": 0.22329440411236867, - "grad_norm": 1.5058870028523046, - "learning_rate": 3.620776691477294e-06, - "loss": 0.9707, - "step": 2476 - }, - { - "epoch": 0.22338458763583893, - "grad_norm": 0.9868386939475009, - "learning_rate": 3.6204343373570765e-06, - "loss": 0.784, - "step": 2477 - }, - { - "epoch": 0.2234747711593092, - "grad_norm": 1.476764079871347, - "learning_rate": 3.620091844972467e-06, - "loss": 1.0005, - "step": 2478 - }, - { - "epoch": 0.22356495468277945, - "grad_norm": 1.7418653934516348, - "learning_rate": 3.619749214352688e-06, - "loss": 0.9975, - "step": 2479 - }, - { - "epoch": 0.22365513820624972, - "grad_norm": 1.7152129103955784, - "learning_rate": 3.6194064455269744e-06, - "loss": 1.0827, - "step": 2480 - }, - { - "epoch": 0.22374532172971998, - "grad_norm": 1.632057562981618, - "learning_rate": 3.6190635385245737e-06, - "loss": 0.8913, - "step": 2481 - }, - { - "epoch": 0.22383550525319024, - "grad_norm": 1.474244088913878, - "learning_rate": 3.618720493374745e-06, - "loss": 0.9073, - "step": 2482 - }, - { - "epoch": 0.2239256887766605, - "grad_norm": 1.6824525297590354, - "learning_rate": 3.6183773101067575e-06, - "loss": 0.8389, - "step": 2483 - }, - { - "epoch": 0.22401587230013076, - "grad_norm": 1.66099652735412, - "learning_rate": 3.6180339887498948e-06, - "loss": 0.9008, - "step": 2484 - }, - { - "epoch": 0.22410605582360102, - "grad_norm": 1.5842450511843642, - "learning_rate": 3.61769052933345e-06, - "loss": 0.9774, - "step": 2485 - }, - { - "epoch": 0.22419623934707128, - "grad_norm": 1.7249921328600184, - "learning_rate": 3.6173469318867297e-06, - "loss": 1.1253, - "step": 2486 - }, - { - "epoch": 0.22428642287054154, - "grad_norm": 1.6358807952001686, - "learning_rate": 3.617003196439051e-06, - "loss": 1.0008, - "step": 2487 - }, - { - "epoch": 0.2243766063940118, - "grad_norm": 1.3484038117798753, - "learning_rate": 3.616659323019744e-06, - "loss": 0.9839, - "step": 2488 - }, - { - "epoch": 0.22446678991748206, - "grad_norm": 1.5378803030941315, - "learning_rate": 3.616315311658149e-06, - "loss": 1.003, - "step": 2489 - }, - { - "epoch": 0.22455697344095235, - "grad_norm": 1.871116430985315, - "learning_rate": 3.6159711623836195e-06, - "loss": 1.0306, - "step": 2490 - }, - { - "epoch": 0.2246471569644226, - "grad_norm": 0.9559447680244109, - "learning_rate": 3.6156268752255203e-06, - "loss": 0.8028, - "step": 2491 - }, - { - "epoch": 0.22473734048789287, - "grad_norm": 1.508253596325113, - "learning_rate": 3.615282450213227e-06, - "loss": 0.941, - "step": 2492 - }, - { - "epoch": 0.22482752401136313, - "grad_norm": 1.689940841926011, - "learning_rate": 3.614937887376128e-06, - "loss": 0.992, - "step": 2493 - }, - { - "epoch": 0.2249177075348334, - "grad_norm": 1.8203707837118377, - "learning_rate": 3.614593186743625e-06, - "loss": 0.9855, - "step": 2494 - }, - { - "epoch": 0.22500789105830366, - "grad_norm": 2.0578579537738158, - "learning_rate": 3.614248348345128e-06, - "loss": 0.9213, - "step": 2495 - }, - { - "epoch": 0.22509807458177392, - "grad_norm": 1.4804436271367725, - "learning_rate": 3.6139033722100614e-06, - "loss": 1.0219, - "step": 2496 - }, - { - "epoch": 0.22518825810524418, - "grad_norm": 1.9527832342583777, - "learning_rate": 3.6135582583678596e-06, - "loss": 1.0499, - "step": 2497 - }, - { - "epoch": 0.22527844162871444, - "grad_norm": 1.3692327841135596, - "learning_rate": 3.61321300684797e-06, - "loss": 0.9943, - "step": 2498 - }, - { - "epoch": 0.2253686251521847, - "grad_norm": 1.577698224969419, - "learning_rate": 3.6128676176798527e-06, - "loss": 0.9998, - "step": 2499 - }, - { - "epoch": 0.22545880867565496, - "grad_norm": 1.480152267407206, - "learning_rate": 3.612522090892976e-06, - "loss": 0.992, - "step": 2500 - }, - { - "epoch": 0.22554899219912522, - "grad_norm": 1.2450392036700018, - "learning_rate": 3.6121764265168232e-06, - "loss": 0.9382, - "step": 2501 - }, - { - "epoch": 0.22563917572259548, - "grad_norm": 1.8680697475620636, - "learning_rate": 3.611830624580888e-06, - "loss": 0.9974, - "step": 2502 - }, - { - "epoch": 0.22572935924606574, - "grad_norm": 1.6900510931474026, - "learning_rate": 3.6114846851146767e-06, - "loss": 0.9837, - "step": 2503 - }, - { - "epoch": 0.225819542769536, - "grad_norm": 1.497991807067224, - "learning_rate": 3.6111386081477068e-06, - "loss": 0.8807, - "step": 2504 - }, - { - "epoch": 0.22590972629300626, - "grad_norm": 1.4692573482737947, - "learning_rate": 3.6107923937095066e-06, - "loss": 0.9838, - "step": 2505 - }, - { - "epoch": 0.22599990981647652, - "grad_norm": 1.8239641359049203, - "learning_rate": 3.6104460418296173e-06, - "loss": 0.9009, - "step": 2506 - }, - { - "epoch": 0.22609009333994678, - "grad_norm": 1.5082588128215453, - "learning_rate": 3.6100995525375924e-06, - "loss": 1.0365, - "step": 2507 - }, - { - "epoch": 0.22618027686341705, - "grad_norm": 1.4124994227315348, - "learning_rate": 3.6097529258629952e-06, - "loss": 0.9253, - "step": 2508 - }, - { - "epoch": 0.2262704603868873, - "grad_norm": 1.5296473777426565, - "learning_rate": 3.6094061618354027e-06, - "loss": 1.0074, - "step": 2509 - }, - { - "epoch": 0.22636064391035757, - "grad_norm": 1.540939348506341, - "learning_rate": 3.609059260484402e-06, - "loss": 0.9787, - "step": 2510 - }, - { - "epoch": 0.22645082743382783, - "grad_norm": 2.030048425782732, - "learning_rate": 3.6087122218395935e-06, - "loss": 0.9172, - "step": 2511 - }, - { - "epoch": 0.2265410109572981, - "grad_norm": 1.4282305021152017, - "learning_rate": 3.608365045930587e-06, - "loss": 0.9347, - "step": 2512 - }, - { - "epoch": 0.22663119448076835, - "grad_norm": 1.7767847544495674, - "learning_rate": 3.608017732787007e-06, - "loss": 0.9589, - "step": 2513 - }, - { - "epoch": 0.22672137800423864, - "grad_norm": 1.2871485980574038, - "learning_rate": 3.6076702824384875e-06, - "loss": 0.907, - "step": 2514 - }, - { - "epoch": 0.2268115615277089, - "grad_norm": 1.3134476101885766, - "learning_rate": 3.607322694914675e-06, - "loss": 0.9921, - "step": 2515 - }, - { - "epoch": 0.22690174505117916, - "grad_norm": 1.9296137374803286, - "learning_rate": 3.606974970245227e-06, - "loss": 0.9501, - "step": 2516 - }, - { - "epoch": 0.22699192857464942, - "grad_norm": 1.627312335490824, - "learning_rate": 3.606627108459814e-06, - "loss": 0.923, - "step": 2517 - }, - { - "epoch": 0.22708211209811968, - "grad_norm": 1.2226058413318117, - "learning_rate": 3.6062791095881174e-06, - "loss": 0.9011, - "step": 2518 - }, - { - "epoch": 0.22717229562158994, - "grad_norm": 1.6614792522520594, - "learning_rate": 3.6059309736598303e-06, - "loss": 1.0473, - "step": 2519 - }, - { - "epoch": 0.2272624791450602, - "grad_norm": 2.7102778179034943, - "learning_rate": 3.605582700704657e-06, - "loss": 0.9524, - "step": 2520 - }, - { - "epoch": 0.22735266266853046, - "grad_norm": 1.429775714758416, - "learning_rate": 3.6052342907523146e-06, - "loss": 0.9786, - "step": 2521 - }, - { - "epoch": 0.22744284619200072, - "grad_norm": 1.473331367497118, - "learning_rate": 3.604885743832532e-06, - "loss": 0.9055, - "step": 2522 - }, - { - "epoch": 0.22753302971547099, - "grad_norm": 0.9697677434530731, - "learning_rate": 3.6045370599750482e-06, - "loss": 0.7998, - "step": 2523 - }, - { - "epoch": 0.22762321323894125, - "grad_norm": 1.4188850741848358, - "learning_rate": 3.604188239209615e-06, - "loss": 0.9522, - "step": 2524 - }, - { - "epoch": 0.2277133967624115, - "grad_norm": 1.5097169066312845, - "learning_rate": 3.603839281565996e-06, - "loss": 0.9031, - "step": 2525 - }, - { - "epoch": 0.22780358028588177, - "grad_norm": 2.063146778767159, - "learning_rate": 3.603490187073966e-06, - "loss": 0.9926, - "step": 2526 - }, - { - "epoch": 0.22789376380935203, - "grad_norm": 1.6219541908879138, - "learning_rate": 3.6031409557633117e-06, - "loss": 1.0341, - "step": 2527 - }, - { - "epoch": 0.2279839473328223, - "grad_norm": 1.5537959470576481, - "learning_rate": 3.602791587663831e-06, - "loss": 1.0194, - "step": 2528 - }, - { - "epoch": 0.22807413085629255, - "grad_norm": 1.4581702640733385, - "learning_rate": 3.6024420828053348e-06, - "loss": 1.0165, - "step": 2529 - }, - { - "epoch": 0.2281643143797628, - "grad_norm": 1.4931218282890244, - "learning_rate": 3.6020924412176445e-06, - "loss": 1.0487, - "step": 2530 - }, - { - "epoch": 0.22825449790323307, - "grad_norm": 7.239961712140591, - "learning_rate": 3.601742662930593e-06, - "loss": 0.9549, - "step": 2531 - }, - { - "epoch": 0.22834468142670333, - "grad_norm": 1.6038217380145092, - "learning_rate": 3.6013927479740248e-06, - "loss": 0.9383, - "step": 2532 - }, - { - "epoch": 0.2284348649501736, - "grad_norm": 1.4305123206377706, - "learning_rate": 3.6010426963777985e-06, - "loss": 1.0562, - "step": 2533 - }, - { - "epoch": 0.22852504847364385, - "grad_norm": 1.4889521651004358, - "learning_rate": 3.6006925081717804e-06, - "loss": 1.0187, - "step": 2534 - }, - { - "epoch": 0.22861523199711412, - "grad_norm": 1.2942420245008306, - "learning_rate": 3.600342183385852e-06, - "loss": 0.9391, - "step": 2535 - }, - { - "epoch": 0.22870541552058438, - "grad_norm": 1.2914346312204088, - "learning_rate": 3.5999917220499043e-06, - "loss": 0.9122, - "step": 2536 - }, - { - "epoch": 0.22879559904405466, - "grad_norm": 1.6975883291238816, - "learning_rate": 3.5996411241938404e-06, - "loss": 0.9808, - "step": 2537 - }, - { - "epoch": 0.22888578256752493, - "grad_norm": 1.4490444356135, - "learning_rate": 3.5992903898475752e-06, - "loss": 0.9269, - "step": 2538 - }, - { - "epoch": 0.2289759660909952, - "grad_norm": 1.325626716306907, - "learning_rate": 3.5989395190410365e-06, - "loss": 1.076, - "step": 2539 - }, - { - "epoch": 0.22906614961446545, - "grad_norm": 1.4487372984177154, - "learning_rate": 3.598588511804161e-06, - "loss": 0.9831, - "step": 2540 - }, - { - "epoch": 0.2291563331379357, - "grad_norm": 1.9244121149243545, - "learning_rate": 3.5982373681668987e-06, - "loss": 0.9882, - "step": 2541 - }, - { - "epoch": 0.22924651666140597, - "grad_norm": 1.702392490594299, - "learning_rate": 3.597886088159212e-06, - "loss": 0.9821, - "step": 2542 - }, - { - "epoch": 0.22933670018487623, - "grad_norm": 1.3397748390026394, - "learning_rate": 3.597534671811074e-06, - "loss": 0.9035, - "step": 2543 - }, - { - "epoch": 0.2294268837083465, - "grad_norm": 1.4779031308786326, - "learning_rate": 3.5971831191524684e-06, - "loss": 0.9509, - "step": 2544 - }, - { - "epoch": 0.22951706723181675, - "grad_norm": 1.3834813973265623, - "learning_rate": 3.5968314302133925e-06, - "loss": 1.0213, - "step": 2545 - }, - { - "epoch": 0.229607250755287, - "grad_norm": 1.6324832712107897, - "learning_rate": 3.596479605023854e-06, - "loss": 0.9753, - "step": 2546 - }, - { - "epoch": 0.22969743427875727, - "grad_norm": 1.4725139356902541, - "learning_rate": 3.596127643613873e-06, - "loss": 0.9892, - "step": 2547 - }, - { - "epoch": 0.22978761780222753, - "grad_norm": 1.343568434757928, - "learning_rate": 3.59577554601348e-06, - "loss": 0.9963, - "step": 2548 - }, - { - "epoch": 0.2298778013256978, - "grad_norm": 1.3005832372483561, - "learning_rate": 3.595423312252719e-06, - "loss": 1.0246, - "step": 2549 - }, - { - "epoch": 0.22996798484916806, - "grad_norm": 1.691305602429113, - "learning_rate": 3.5950709423616436e-06, - "loss": 0.9754, - "step": 2550 - }, - { - "epoch": 0.23005816837263832, - "grad_norm": 1.596761737546817, - "learning_rate": 3.5947184363703203e-06, - "loss": 0.9506, - "step": 2551 - }, - { - "epoch": 0.23014835189610858, - "grad_norm": 1.2570366688025343, - "learning_rate": 3.5943657943088274e-06, - "loss": 0.9248, - "step": 2552 - }, - { - "epoch": 0.23023853541957884, - "grad_norm": 1.2257245840245936, - "learning_rate": 3.5940130162072525e-06, - "loss": 0.9011, - "step": 2553 - }, - { - "epoch": 0.2303287189430491, - "grad_norm": 1.8201967783805622, - "learning_rate": 3.5936601020956985e-06, - "loss": 0.9835, - "step": 2554 - }, - { - "epoch": 0.23041890246651936, - "grad_norm": 1.4588167842427655, - "learning_rate": 3.5933070520042772e-06, - "loss": 1.0086, - "step": 2555 - }, - { - "epoch": 0.23050908598998962, - "grad_norm": 0.8975945872350166, - "learning_rate": 3.5929538659631133e-06, - "loss": 0.848, - "step": 2556 - }, - { - "epoch": 0.23059926951345988, - "grad_norm": 1.4357370263625606, - "learning_rate": 3.592600544002341e-06, - "loss": 1.0069, - "step": 2557 - }, - { - "epoch": 0.23068945303693014, - "grad_norm": 1.5577873878406296, - "learning_rate": 3.5922470861521098e-06, - "loss": 0.9745, - "step": 2558 - }, - { - "epoch": 0.2307796365604004, - "grad_norm": 1.5211526021754769, - "learning_rate": 3.591893492442577e-06, - "loss": 0.9439, - "step": 2559 - }, - { - "epoch": 0.23086982008387066, - "grad_norm": 1.7556710317246118, - "learning_rate": 3.591539762903914e-06, - "loss": 1.042, - "step": 2560 - }, - { - "epoch": 0.23096000360734095, - "grad_norm": 1.3896434191400735, - "learning_rate": 3.591185897566303e-06, - "loss": 0.9117, - "step": 2561 - }, - { - "epoch": 0.2310501871308112, - "grad_norm": 1.4210902133790688, - "learning_rate": 3.590831896459937e-06, - "loss": 0.9737, - "step": 2562 - }, - { - "epoch": 0.23114037065428147, - "grad_norm": 1.4901860580510955, - "learning_rate": 3.5904777596150222e-06, - "loss": 0.9806, - "step": 2563 - }, - { - "epoch": 0.23123055417775173, - "grad_norm": 1.5921309980525211, - "learning_rate": 3.590123487061775e-06, - "loss": 0.9793, - "step": 2564 - }, - { - "epoch": 0.231320737701222, - "grad_norm": 1.6523620027296002, - "learning_rate": 3.589769078830424e-06, - "loss": 1.0711, - "step": 2565 - }, - { - "epoch": 0.23141092122469226, - "grad_norm": 1.38649677058056, - "learning_rate": 3.58941453495121e-06, - "loss": 0.9557, - "step": 2566 - }, - { - "epoch": 0.23150110474816252, - "grad_norm": 1.4905493567125319, - "learning_rate": 3.5890598554543834e-06, - "loss": 0.9816, - "step": 2567 - }, - { - "epoch": 0.23159128827163278, - "grad_norm": 2.22529217805437, - "learning_rate": 3.5887050403702073e-06, - "loss": 1.0337, - "step": 2568 - }, - { - "epoch": 0.23168147179510304, - "grad_norm": 1.9707435323997908, - "learning_rate": 3.588350089728958e-06, - "loss": 0.9518, - "step": 2569 - }, - { - "epoch": 0.2317716553185733, - "grad_norm": 1.4319826474763373, - "learning_rate": 3.5879950035609204e-06, - "loss": 0.9947, - "step": 2570 - }, - { - "epoch": 0.23186183884204356, - "grad_norm": 1.4246450417114598, - "learning_rate": 3.5876397818963933e-06, - "loss": 0.9745, - "step": 2571 - }, - { - "epoch": 0.23195202236551382, - "grad_norm": 1.238859071904952, - "learning_rate": 3.5872844247656858e-06, - "loss": 1.0253, - "step": 2572 - }, - { - "epoch": 0.23204220588898408, - "grad_norm": 1.2857401254872953, - "learning_rate": 3.5869289321991195e-06, - "loss": 0.9814, - "step": 2573 - }, - { - "epoch": 0.23213238941245434, - "grad_norm": 2.032276891683669, - "learning_rate": 3.5865733042270263e-06, - "loss": 0.9653, - "step": 2574 - }, - { - "epoch": 0.2322225729359246, - "grad_norm": 1.5992850136644314, - "learning_rate": 3.5862175408797498e-06, - "loss": 1.0075, - "step": 2575 - }, - { - "epoch": 0.23231275645939486, - "grad_norm": 1.2636178666944686, - "learning_rate": 3.585861642187647e-06, - "loss": 0.915, - "step": 2576 - }, - { - "epoch": 0.23240293998286513, - "grad_norm": 1.342159261173282, - "learning_rate": 3.5855056081810845e-06, - "loss": 0.9682, - "step": 2577 - }, - { - "epoch": 0.2324931235063354, - "grad_norm": 1.3453603897697863, - "learning_rate": 3.5851494388904406e-06, - "loss": 0.9744, - "step": 2578 - }, - { - "epoch": 0.23258330702980565, - "grad_norm": 1.425501547773438, - "learning_rate": 3.5847931343461064e-06, - "loss": 0.9139, - "step": 2579 - }, - { - "epoch": 0.2326734905532759, - "grad_norm": 1.6317149584867792, - "learning_rate": 3.5844366945784835e-06, - "loss": 0.9044, - "step": 2580 - }, - { - "epoch": 0.23276367407674617, - "grad_norm": 1.5126470037224427, - "learning_rate": 3.5840801196179856e-06, - "loss": 1.032, - "step": 2581 - }, - { - "epoch": 0.23285385760021643, - "grad_norm": 1.504473056743389, - "learning_rate": 3.583723409495037e-06, - "loss": 1.0252, - "step": 2582 - }, - { - "epoch": 0.2329440411236867, - "grad_norm": 1.5220238019508217, - "learning_rate": 3.5833665642400747e-06, - "loss": 1.0268, - "step": 2583 - }, - { - "epoch": 0.23303422464715695, - "grad_norm": 1.4569725863431153, - "learning_rate": 3.5830095838835472e-06, - "loss": 0.9034, - "step": 2584 - }, - { - "epoch": 0.23312440817062724, - "grad_norm": 1.2871441988365688, - "learning_rate": 3.5826524684559125e-06, - "loss": 0.9419, - "step": 2585 - }, - { - "epoch": 0.2332145916940975, - "grad_norm": 1.8598298870763166, - "learning_rate": 3.5822952179876433e-06, - "loss": 1.0626, - "step": 2586 - }, - { - "epoch": 0.23330477521756776, - "grad_norm": 1.2616868147927307, - "learning_rate": 3.5819378325092205e-06, - "loss": 0.9481, - "step": 2587 - }, - { - "epoch": 0.23339495874103802, - "grad_norm": 1.313524482164905, - "learning_rate": 3.581580312051139e-06, - "loss": 0.9999, - "step": 2588 - }, - { - "epoch": 0.23348514226450828, - "grad_norm": 1.3891716981753601, - "learning_rate": 3.5812226566439057e-06, - "loss": 0.9031, - "step": 2589 - }, - { - "epoch": 0.23357532578797854, - "grad_norm": 1.4297366107781324, - "learning_rate": 3.580864866318036e-06, - "loss": 0.954, - "step": 2590 - }, - { - "epoch": 0.2336655093114488, - "grad_norm": 1.563551205837454, - "learning_rate": 3.580506941104059e-06, - "loss": 0.9875, - "step": 2591 - }, - { - "epoch": 0.23375569283491907, - "grad_norm": 1.3694113383483821, - "learning_rate": 3.580148881032515e-06, - "loss": 0.9684, - "step": 2592 - }, - { - "epoch": 0.23384587635838933, - "grad_norm": 1.3555109638599503, - "learning_rate": 3.5797906861339556e-06, - "loss": 0.9876, - "step": 2593 - }, - { - "epoch": 0.2339360598818596, - "grad_norm": 1.4221656418430115, - "learning_rate": 3.5794323564389435e-06, - "loss": 1.0083, - "step": 2594 - }, - { - "epoch": 0.23402624340532985, - "grad_norm": 1.5403682378713208, - "learning_rate": 3.579073891978055e-06, - "loss": 0.9865, - "step": 2595 - }, - { - "epoch": 0.2341164269288001, - "grad_norm": 1.9036018864002888, - "learning_rate": 3.5787152927818746e-06, - "loss": 0.9428, - "step": 2596 - }, - { - "epoch": 0.23420661045227037, - "grad_norm": 1.399007457326083, - "learning_rate": 3.5783565588810003e-06, - "loss": 1.031, - "step": 2597 - }, - { - "epoch": 0.23429679397574063, - "grad_norm": 1.4431713017849879, - "learning_rate": 3.5779976903060412e-06, - "loss": 0.9528, - "step": 2598 - }, - { - "epoch": 0.2343869774992109, - "grad_norm": 1.5009894286716783, - "learning_rate": 3.577638687087619e-06, - "loss": 1.0038, - "step": 2599 - }, - { - "epoch": 0.23447716102268115, - "grad_norm": 1.333242765966733, - "learning_rate": 3.577279549256364e-06, - "loss": 0.9806, - "step": 2600 - }, - { - "epoch": 0.2345673445461514, - "grad_norm": 1.4121934119157993, - "learning_rate": 3.5769202768429213e-06, - "loss": 1.0135, - "step": 2601 - }, - { - "epoch": 0.23465752806962167, - "grad_norm": 1.5858601518382238, - "learning_rate": 3.5765608698779454e-06, - "loss": 0.9617, - "step": 2602 - }, - { - "epoch": 0.23474771159309193, - "grad_norm": 1.4293732845027556, - "learning_rate": 3.5762013283921033e-06, - "loss": 0.9876, - "step": 2603 - }, - { - "epoch": 0.2348378951165622, - "grad_norm": 1.6117429626983384, - "learning_rate": 3.5758416524160728e-06, - "loss": 0.9773, - "step": 2604 - }, - { - "epoch": 0.23492807864003246, - "grad_norm": 1.4708237313595633, - "learning_rate": 3.5754818419805427e-06, - "loss": 0.967, - "step": 2605 - }, - { - "epoch": 0.23501826216350272, - "grad_norm": 1.3159019794802023, - "learning_rate": 3.575121897116216e-06, - "loss": 0.9508, - "step": 2606 - }, - { - "epoch": 0.23510844568697298, - "grad_norm": 1.3759085081339646, - "learning_rate": 3.574761817853803e-06, - "loss": 1.0126, - "step": 2607 - }, - { - "epoch": 0.23519862921044324, - "grad_norm": 5.287326364394216, - "learning_rate": 3.5744016042240287e-06, - "loss": 0.9131, - "step": 2608 - }, - { - "epoch": 0.23528881273391353, - "grad_norm": 1.6767928620253851, - "learning_rate": 3.5740412562576286e-06, - "loss": 0.9093, - "step": 2609 - }, - { - "epoch": 0.2353789962573838, - "grad_norm": 1.5823697705190358, - "learning_rate": 3.573680773985349e-06, - "loss": 0.9634, - "step": 2610 - }, - { - "epoch": 0.23546917978085405, - "grad_norm": 1.383483422230012, - "learning_rate": 3.5733201574379486e-06, - "loss": 0.9721, - "step": 2611 - }, - { - "epoch": 0.2355593633043243, - "grad_norm": 1.8003281294242424, - "learning_rate": 3.5729594066461975e-06, - "loss": 0.9241, - "step": 2612 - }, - { - "epoch": 0.23564954682779457, - "grad_norm": 1.1897961853098284, - "learning_rate": 3.572598521640876e-06, - "loss": 0.9956, - "step": 2613 - }, - { - "epoch": 0.23573973035126483, - "grad_norm": 0.853121529037625, - "learning_rate": 3.5722375024527782e-06, - "loss": 0.8514, - "step": 2614 - }, - { - "epoch": 0.2358299138747351, - "grad_norm": 1.231832614287549, - "learning_rate": 3.571876349112707e-06, - "loss": 0.9959, - "step": 2615 - }, - { - "epoch": 0.23592009739820535, - "grad_norm": 1.358981020606129, - "learning_rate": 3.5715150616514784e-06, - "loss": 0.9756, - "step": 2616 - }, - { - "epoch": 0.2360102809216756, - "grad_norm": 1.729246375150302, - "learning_rate": 3.5711536400999196e-06, - "loss": 1.033, - "step": 2617 - }, - { - "epoch": 0.23610046444514587, - "grad_norm": 0.8090614772066714, - "learning_rate": 3.570792084488869e-06, - "loss": 0.8649, - "step": 2618 - }, - { - "epoch": 0.23619064796861614, - "grad_norm": 0.8481189150721973, - "learning_rate": 3.5704303948491764e-06, - "loss": 0.9155, - "step": 2619 - }, - { - "epoch": 0.2362808314920864, - "grad_norm": 1.7798274868508546, - "learning_rate": 3.5700685712117035e-06, - "loss": 0.9958, - "step": 2620 - }, - { - "epoch": 0.23637101501555666, - "grad_norm": 1.4023963259561245, - "learning_rate": 3.5697066136073227e-06, - "loss": 0.9611, - "step": 2621 - }, - { - "epoch": 0.23646119853902692, - "grad_norm": 1.3904531994254536, - "learning_rate": 3.5693445220669184e-06, - "loss": 0.9766, - "step": 2622 - }, - { - "epoch": 0.23655138206249718, - "grad_norm": 1.2101939718180486, - "learning_rate": 3.568982296621386e-06, - "loss": 0.9628, - "step": 2623 - }, - { - "epoch": 0.23664156558596744, - "grad_norm": 1.5095743746609693, - "learning_rate": 3.5686199373016325e-06, - "loss": 1.0436, - "step": 2624 - }, - { - "epoch": 0.2367317491094377, - "grad_norm": 1.7505400369353727, - "learning_rate": 3.568257444138577e-06, - "loss": 0.9232, - "step": 2625 - }, - { - "epoch": 0.23682193263290796, - "grad_norm": 1.5294712400846953, - "learning_rate": 3.5678948171631495e-06, - "loss": 0.9756, - "step": 2626 - }, - { - "epoch": 0.23691211615637822, - "grad_norm": 1.5412373159711148, - "learning_rate": 3.5675320564062908e-06, - "loss": 0.986, - "step": 2627 - }, - { - "epoch": 0.23700229967984848, - "grad_norm": 1.4968903415347692, - "learning_rate": 3.5671691618989533e-06, - "loss": 0.8905, - "step": 2628 - }, - { - "epoch": 0.23709248320331874, - "grad_norm": 1.7013177644910151, - "learning_rate": 3.5668061336721024e-06, - "loss": 0.9812, - "step": 2629 - }, - { - "epoch": 0.237182666726789, - "grad_norm": 1.5089940792900407, - "learning_rate": 3.5664429717567117e-06, - "loss": 0.9694, - "step": 2630 - }, - { - "epoch": 0.23727285025025927, - "grad_norm": 1.603316820949192, - "learning_rate": 3.56607967618377e-06, - "loss": 1.006, - "step": 2631 - }, - { - "epoch": 0.23736303377372953, - "grad_norm": 1.638046709188836, - "learning_rate": 3.5657162469842754e-06, - "loss": 0.9965, - "step": 2632 - }, - { - "epoch": 0.23745321729719981, - "grad_norm": 1.4004329318541835, - "learning_rate": 3.5653526841892374e-06, - "loss": 0.9772, - "step": 2633 - }, - { - "epoch": 0.23754340082067008, - "grad_norm": 1.4721076408918168, - "learning_rate": 3.564988987829676e-06, - "loss": 1.009, - "step": 2634 - }, - { - "epoch": 0.23763358434414034, - "grad_norm": 1.6370284850357617, - "learning_rate": 3.564625157936626e-06, - "loss": 0.9198, - "step": 2635 - }, - { - "epoch": 0.2377237678676106, - "grad_norm": 1.4492668699105549, - "learning_rate": 3.56426119454113e-06, - "loss": 1.013, - "step": 2636 - }, - { - "epoch": 0.23781395139108086, - "grad_norm": 1.8068710262712757, - "learning_rate": 3.5638970976742436e-06, - "loss": 1.0453, - "step": 2637 - }, - { - "epoch": 0.23790413491455112, - "grad_norm": 2.142700636915875, - "learning_rate": 3.5635328673670335e-06, - "loss": 0.9968, - "step": 2638 - }, - { - "epoch": 0.23799431843802138, - "grad_norm": 1.3211327888229574, - "learning_rate": 3.5631685036505783e-06, - "loss": 1.069, - "step": 2639 - }, - { - "epoch": 0.23808450196149164, - "grad_norm": 1.2738657974151826, - "learning_rate": 3.562804006555966e-06, - "loss": 0.987, - "step": 2640 - }, - { - "epoch": 0.2381746854849619, - "grad_norm": 1.4853215462224358, - "learning_rate": 3.5624393761143e-06, - "loss": 0.9268, - "step": 2641 - }, - { - "epoch": 0.23826486900843216, - "grad_norm": 1.7617592817494803, - "learning_rate": 3.5620746123566906e-06, - "loss": 0.8923, - "step": 2642 - }, - { - "epoch": 0.23835505253190242, - "grad_norm": 1.6751588319086588, - "learning_rate": 3.5617097153142623e-06, - "loss": 0.9209, - "step": 2643 - }, - { - "epoch": 0.23844523605537268, - "grad_norm": 1.40129624030991, - "learning_rate": 3.5613446850181497e-06, - "loss": 1.0217, - "step": 2644 - }, - { - "epoch": 0.23853541957884294, - "grad_norm": 0.7051948387361204, - "learning_rate": 3.5609795214994996e-06, - "loss": 0.7601, - "step": 2645 - }, - { - "epoch": 0.2386256031023132, - "grad_norm": 1.2983914437141941, - "learning_rate": 3.560614224789469e-06, - "loss": 1.027, - "step": 2646 - }, - { - "epoch": 0.23871578662578347, - "grad_norm": 1.425595206050112, - "learning_rate": 3.5602487949192285e-06, - "loss": 0.9217, - "step": 2647 - }, - { - "epoch": 0.23880597014925373, - "grad_norm": 2.10486125497857, - "learning_rate": 3.559883231919957e-06, - "loss": 0.926, - "step": 2648 - }, - { - "epoch": 0.238896153672724, - "grad_norm": 1.4848675362432868, - "learning_rate": 3.5595175358228473e-06, - "loss": 0.9509, - "step": 2649 - }, - { - "epoch": 0.23898633719619425, - "grad_norm": 1.6361712357088531, - "learning_rate": 3.5591517066591027e-06, - "loss": 0.9811, - "step": 2650 - }, - { - "epoch": 0.2390765207196645, - "grad_norm": 1.571848180098411, - "learning_rate": 3.5587857444599364e-06, - "loss": 0.9687, - "step": 2651 - }, - { - "epoch": 0.23916670424313477, - "grad_norm": 1.453064209681694, - "learning_rate": 3.5584196492565766e-06, - "loss": 0.9423, - "step": 2652 - }, - { - "epoch": 0.23925688776660503, - "grad_norm": 1.622920980517131, - "learning_rate": 3.5580534210802587e-06, - "loss": 1.0353, - "step": 2653 - }, - { - "epoch": 0.2393470712900753, - "grad_norm": 1.4303228915409842, - "learning_rate": 3.557687059962232e-06, - "loss": 1.0212, - "step": 2654 - }, - { - "epoch": 0.23943725481354555, - "grad_norm": 1.3715355316243385, - "learning_rate": 3.5573205659337558e-06, - "loss": 0.9738, - "step": 2655 - }, - { - "epoch": 0.23952743833701584, - "grad_norm": 1.3565055114262925, - "learning_rate": 3.5569539390261025e-06, - "loss": 0.9582, - "step": 2656 - }, - { - "epoch": 0.2396176218604861, - "grad_norm": 1.1650051933794958, - "learning_rate": 3.5565871792705543e-06, - "loss": 0.9948, - "step": 2657 - }, - { - "epoch": 0.23970780538395636, - "grad_norm": 1.3173893729711785, - "learning_rate": 3.5562202866984045e-06, - "loss": 0.9703, - "step": 2658 - }, - { - "epoch": 0.23979798890742662, - "grad_norm": 1.4848166360901465, - "learning_rate": 3.5558532613409594e-06, - "loss": 0.9833, - "step": 2659 - }, - { - "epoch": 0.23988817243089688, - "grad_norm": 1.7047813830235854, - "learning_rate": 3.555486103229535e-06, - "loss": 0.9128, - "step": 2660 - }, - { - "epoch": 0.23997835595436715, - "grad_norm": 1.5577045858611138, - "learning_rate": 3.5551188123954595e-06, - "loss": 1.0023, - "step": 2661 - }, - { - "epoch": 0.2400685394778374, - "grad_norm": 1.4911445048285548, - "learning_rate": 3.5547513888700715e-06, - "loss": 0.8706, - "step": 2662 - }, - { - "epoch": 0.24015872300130767, - "grad_norm": 1.250095840594631, - "learning_rate": 3.5543838326847224e-06, - "loss": 1.0089, - "step": 2663 - }, - { - "epoch": 0.24024890652477793, - "grad_norm": 1.4530514113958755, - "learning_rate": 3.5540161438707744e-06, - "loss": 0.9517, - "step": 2664 - }, - { - "epoch": 0.2403390900482482, - "grad_norm": 1.3193921826145911, - "learning_rate": 3.5536483224596e-06, - "loss": 1.1342, - "step": 2665 - }, - { - "epoch": 0.24042927357171845, - "grad_norm": 2.8692463231466596, - "learning_rate": 3.553280368482584e-06, - "loss": 0.9535, - "step": 2666 - }, - { - "epoch": 0.2405194570951887, - "grad_norm": 1.8279073210663497, - "learning_rate": 3.5529122819711227e-06, - "loss": 0.8063, - "step": 2667 - }, - { - "epoch": 0.24060964061865897, - "grad_norm": 1.333834414898794, - "learning_rate": 3.5525440629566223e-06, - "loss": 0.9256, - "step": 2668 - }, - { - "epoch": 0.24069982414212923, - "grad_norm": 1.3518956826612383, - "learning_rate": 3.552175711470502e-06, - "loss": 0.999, - "step": 2669 - }, - { - "epoch": 0.2407900076655995, - "grad_norm": 1.7836708884282209, - "learning_rate": 3.5518072275441912e-06, - "loss": 1.0248, - "step": 2670 - }, - { - "epoch": 0.24088019118906975, - "grad_norm": 1.4725174977634332, - "learning_rate": 3.551438611209131e-06, - "loss": 1.0056, - "step": 2671 - }, - { - "epoch": 0.24097037471254001, - "grad_norm": 1.4650426297283594, - "learning_rate": 3.551069862496774e-06, - "loss": 1.0702, - "step": 2672 - }, - { - "epoch": 0.24106055823601027, - "grad_norm": 0.8078972585693679, - "learning_rate": 3.5507009814385846e-06, - "loss": 0.8266, - "step": 2673 - }, - { - "epoch": 0.24115074175948054, - "grad_norm": 0.7059643319591868, - "learning_rate": 3.550331968066036e-06, - "loss": 0.8193, - "step": 2674 - }, - { - "epoch": 0.2412409252829508, - "grad_norm": 1.6739088148077654, - "learning_rate": 3.549962822410616e-06, - "loss": 0.9996, - "step": 2675 - }, - { - "epoch": 0.24133110880642106, - "grad_norm": 0.8990433681813915, - "learning_rate": 3.5495935445038217e-06, - "loss": 0.8036, - "step": 2676 - }, - { - "epoch": 0.24142129232989132, - "grad_norm": 1.429084859273116, - "learning_rate": 3.5492241343771612e-06, - "loss": 1.0188, - "step": 2677 - }, - { - "epoch": 0.24151147585336158, - "grad_norm": 1.4546773678665417, - "learning_rate": 3.548854592062156e-06, - "loss": 0.9787, - "step": 2678 - }, - { - "epoch": 0.24160165937683184, - "grad_norm": 1.426208515683927, - "learning_rate": 3.548484917590336e-06, - "loss": 0.951, - "step": 2679 - }, - { - "epoch": 0.24169184290030213, - "grad_norm": 1.7096453799799727, - "learning_rate": 3.5481151109932447e-06, - "loss": 0.8981, - "step": 2680 - }, - { - "epoch": 0.2417820264237724, - "grad_norm": 1.3414143512638939, - "learning_rate": 3.5477451723024364e-06, - "loss": 0.9966, - "step": 2681 - }, - { - "epoch": 0.24187220994724265, - "grad_norm": 0.7466812616721324, - "learning_rate": 3.5473751015494757e-06, - "loss": 0.7821, - "step": 2682 - }, - { - "epoch": 0.2419623934707129, - "grad_norm": 1.3959203901709027, - "learning_rate": 3.547004898765939e-06, - "loss": 0.9838, - "step": 2683 - }, - { - "epoch": 0.24205257699418317, - "grad_norm": 1.2639457016822944, - "learning_rate": 3.546634563983414e-06, - "loss": 0.997, - "step": 2684 - }, - { - "epoch": 0.24214276051765343, - "grad_norm": 1.4064793717559632, - "learning_rate": 3.5462640972335002e-06, - "loss": 0.9397, - "step": 2685 - }, - { - "epoch": 0.2422329440411237, - "grad_norm": 1.3553107881436257, - "learning_rate": 3.5458934985478077e-06, - "loss": 0.9985, - "step": 2686 - }, - { - "epoch": 0.24232312756459395, - "grad_norm": 1.7922575294228984, - "learning_rate": 3.5455227679579577e-06, - "loss": 0.9557, - "step": 2687 - }, - { - "epoch": 0.24241331108806422, - "grad_norm": 1.3566997116494577, - "learning_rate": 3.545151905495584e-06, - "loss": 1.0737, - "step": 2688 - }, - { - "epoch": 0.24250349461153448, - "grad_norm": 2.02053589614666, - "learning_rate": 3.544780911192329e-06, - "loss": 0.9582, - "step": 2689 - }, - { - "epoch": 0.24259367813500474, - "grad_norm": 1.4647870268965482, - "learning_rate": 3.544409785079849e-06, - "loss": 0.9239, - "step": 2690 - }, - { - "epoch": 0.242683861658475, - "grad_norm": 1.3223062844272562, - "learning_rate": 3.5440385271898103e-06, - "loss": 0.9258, - "step": 2691 - }, - { - "epoch": 0.24277404518194526, - "grad_norm": 1.472257767654063, - "learning_rate": 3.5436671375538903e-06, - "loss": 0.9098, - "step": 2692 - }, - { - "epoch": 0.24286422870541552, - "grad_norm": 1.65973771148789, - "learning_rate": 3.543295616203779e-06, - "loss": 0.9793, - "step": 2693 - }, - { - "epoch": 0.24295441222888578, - "grad_norm": 1.5879540612568839, - "learning_rate": 3.542923963171176e-06, - "loss": 1.073, - "step": 2694 - }, - { - "epoch": 0.24304459575235604, - "grad_norm": 1.360098591414466, - "learning_rate": 3.542552178487793e-06, - "loss": 0.894, - "step": 2695 - }, - { - "epoch": 0.2431347792758263, - "grad_norm": 1.6061605554754512, - "learning_rate": 3.5421802621853523e-06, - "loss": 0.9689, - "step": 2696 - }, - { - "epoch": 0.24322496279929656, - "grad_norm": 1.6812757213567782, - "learning_rate": 3.5418082142955887e-06, - "loss": 0.9748, - "step": 2697 - }, - { - "epoch": 0.24331514632276682, - "grad_norm": 0.9629266202129654, - "learning_rate": 3.5414360348502463e-06, - "loss": 0.8339, - "step": 2698 - }, - { - "epoch": 0.24340532984623708, - "grad_norm": 1.3514820141062995, - "learning_rate": 3.5410637238810825e-06, - "loss": 0.8809, - "step": 2699 - }, - { - "epoch": 0.24349551336970734, - "grad_norm": 1.5166647579195078, - "learning_rate": 3.5406912814198635e-06, - "loss": 0.9252, - "step": 2700 - }, - { - "epoch": 0.2435856968931776, - "grad_norm": 1.827517416645349, - "learning_rate": 3.54031870749837e-06, - "loss": 0.9397, - "step": 2701 - }, - { - "epoch": 0.24367588041664787, - "grad_norm": 1.4803133357059568, - "learning_rate": 3.539946002148391e-06, - "loss": 0.9894, - "step": 2702 - }, - { - "epoch": 0.24376606394011813, - "grad_norm": 1.7344162394801488, - "learning_rate": 3.5395731654017277e-06, - "loss": 0.9731, - "step": 2703 - }, - { - "epoch": 0.24385624746358842, - "grad_norm": 1.9631072423035238, - "learning_rate": 3.5392001972901923e-06, - "loss": 1.0201, - "step": 2704 - }, - { - "epoch": 0.24394643098705868, - "grad_norm": 1.420153739289868, - "learning_rate": 3.5388270978456098e-06, - "loss": 0.9309, - "step": 2705 - }, - { - "epoch": 0.24403661451052894, - "grad_norm": 1.442885056378085, - "learning_rate": 3.5384538670998137e-06, - "loss": 0.9413, - "step": 2706 - }, - { - "epoch": 0.2441267980339992, - "grad_norm": 1.5318356386666163, - "learning_rate": 3.538080505084651e-06, - "loss": 0.9965, - "step": 2707 - }, - { - "epoch": 0.24421698155746946, - "grad_norm": 1.3361989261868974, - "learning_rate": 3.5377070118319788e-06, - "loss": 0.9902, - "step": 2708 - }, - { - "epoch": 0.24430716508093972, - "grad_norm": 2.6744589998301453, - "learning_rate": 3.5373333873736657e-06, - "loss": 0.9347, - "step": 2709 - }, - { - "epoch": 0.24439734860440998, - "grad_norm": 1.3548358681992503, - "learning_rate": 3.536959631741591e-06, - "loss": 0.9838, - "step": 2710 - }, - { - "epoch": 0.24448753212788024, - "grad_norm": 1.0540306589093884, - "learning_rate": 3.536585744967646e-06, - "loss": 1.0024, - "step": 2711 - }, - { - "epoch": 0.2445777156513505, - "grad_norm": 1.5806879062284789, - "learning_rate": 3.5362117270837326e-06, - "loss": 0.9632, - "step": 2712 - }, - { - "epoch": 0.24466789917482076, - "grad_norm": 1.3162415172162276, - "learning_rate": 3.5358375781217634e-06, - "loss": 0.8795, - "step": 2713 - }, - { - "epoch": 0.24475808269829102, - "grad_norm": 1.4729206849528549, - "learning_rate": 3.535463298113664e-06, - "loss": 1.0075, - "step": 2714 - }, - { - "epoch": 0.24484826622176128, - "grad_norm": 1.526221133497286, - "learning_rate": 3.5350888870913697e-06, - "loss": 0.9402, - "step": 2715 - }, - { - "epoch": 0.24493844974523155, - "grad_norm": 1.5114663872043061, - "learning_rate": 3.5347143450868273e-06, - "loss": 0.9811, - "step": 2716 - }, - { - "epoch": 0.2450286332687018, - "grad_norm": 1.5796048619117982, - "learning_rate": 3.534339672131994e-06, - "loss": 0.9848, - "step": 2717 - }, - { - "epoch": 0.24511881679217207, - "grad_norm": 1.4789812571773961, - "learning_rate": 3.5339648682588397e-06, - "loss": 0.9065, - "step": 2718 - }, - { - "epoch": 0.24520900031564233, - "grad_norm": 1.6682376689458016, - "learning_rate": 3.533589933499345e-06, - "loss": 1.0207, - "step": 2719 - }, - { - "epoch": 0.2452991838391126, - "grad_norm": 1.5311234090988162, - "learning_rate": 3.533214867885501e-06, - "loss": 0.9139, - "step": 2720 - }, - { - "epoch": 0.24538936736258285, - "grad_norm": 3.5215442248276525, - "learning_rate": 3.53283967144931e-06, - "loss": 0.9207, - "step": 2721 - }, - { - "epoch": 0.2454795508860531, - "grad_norm": 1.4747045221011201, - "learning_rate": 3.532464344222787e-06, - "loss": 0.9886, - "step": 2722 - }, - { - "epoch": 0.24556973440952337, - "grad_norm": 1.5740332861001838, - "learning_rate": 3.532088886237956e-06, - "loss": 1.0132, - "step": 2723 - }, - { - "epoch": 0.24565991793299363, - "grad_norm": 0.9598187082195069, - "learning_rate": 3.5317132975268535e-06, - "loss": 0.7524, - "step": 2724 - }, - { - "epoch": 0.2457501014564639, - "grad_norm": 1.3709337624785216, - "learning_rate": 3.531337578121526e-06, - "loss": 0.9991, - "step": 2725 - }, - { - "epoch": 0.24584028497993415, - "grad_norm": 1.4942189713425769, - "learning_rate": 3.530961728054033e-06, - "loss": 1.0073, - "step": 2726 - }, - { - "epoch": 0.24593046850340441, - "grad_norm": 1.4274376849253676, - "learning_rate": 3.5305857473564435e-06, - "loss": 0.9439, - "step": 2727 - }, - { - "epoch": 0.2460206520268747, - "grad_norm": 1.6847954021947205, - "learning_rate": 3.5302096360608385e-06, - "loss": 0.901, - "step": 2728 - }, - { - "epoch": 0.24611083555034496, - "grad_norm": 1.4440728722410123, - "learning_rate": 3.5298333941993105e-06, - "loss": 1.0198, - "step": 2729 - }, - { - "epoch": 0.24620101907381522, - "grad_norm": 1.5404794434658864, - "learning_rate": 3.529457021803962e-06, - "loss": 0.9672, - "step": 2730 - }, - { - "epoch": 0.24629120259728549, - "grad_norm": 1.6819266365145258, - "learning_rate": 3.529080518906906e-06, - "loss": 0.9914, - "step": 2731 - }, - { - "epoch": 0.24638138612075575, - "grad_norm": 1.4074451877633556, - "learning_rate": 3.5287038855402696e-06, - "loss": 0.9562, - "step": 2732 - }, - { - "epoch": 0.246471569644226, - "grad_norm": 1.6737495679904646, - "learning_rate": 3.528327121736188e-06, - "loss": 1.0105, - "step": 2733 - }, - { - "epoch": 0.24656175316769627, - "grad_norm": 1.5075112471785652, - "learning_rate": 3.52795022752681e-06, - "loss": 1.0581, - "step": 2734 - }, - { - "epoch": 0.24665193669116653, - "grad_norm": 1.5867569784032314, - "learning_rate": 3.5275732029442925e-06, - "loss": 1.0403, - "step": 2735 - }, - { - "epoch": 0.2467421202146368, - "grad_norm": 1.888620496206855, - "learning_rate": 3.5271960480208077e-06, - "loss": 0.9189, - "step": 2736 - }, - { - "epoch": 0.24683230373810705, - "grad_norm": 1.0296039738445764, - "learning_rate": 3.526818762788534e-06, - "loss": 0.8481, - "step": 2737 - }, - { - "epoch": 0.2469224872615773, - "grad_norm": 1.4571431707600917, - "learning_rate": 3.5264413472796653e-06, - "loss": 1.0066, - "step": 2738 - }, - { - "epoch": 0.24701267078504757, - "grad_norm": 1.6104959084317598, - "learning_rate": 3.5260638015264037e-06, - "loss": 0.9452, - "step": 2739 - }, - { - "epoch": 0.24710285430851783, - "grad_norm": 1.3010284830386991, - "learning_rate": 3.5256861255609644e-06, - "loss": 0.9538, - "step": 2740 - }, - { - "epoch": 0.2471930378319881, - "grad_norm": 1.6176958989608932, - "learning_rate": 3.5253083194155723e-06, - "loss": 0.9897, - "step": 2741 - }, - { - "epoch": 0.24728322135545835, - "grad_norm": 1.2191293077270537, - "learning_rate": 3.5249303831224637e-06, - "loss": 0.9944, - "step": 2742 - }, - { - "epoch": 0.24737340487892862, - "grad_norm": 1.3142221142947237, - "learning_rate": 3.524552316713887e-06, - "loss": 0.9673, - "step": 2743 - }, - { - "epoch": 0.24746358840239888, - "grad_norm": 1.6046117272543878, - "learning_rate": 3.5241741202220995e-06, - "loss": 1.0377, - "step": 2744 - }, - { - "epoch": 0.24755377192586914, - "grad_norm": 1.3320685686270237, - "learning_rate": 3.5237957936793724e-06, - "loss": 0.9593, - "step": 2745 - }, - { - "epoch": 0.2476439554493394, - "grad_norm": 1.2756288081125837, - "learning_rate": 3.523417337117986e-06, - "loss": 0.9327, - "step": 2746 - }, - { - "epoch": 0.24773413897280966, - "grad_norm": 1.2050163958055327, - "learning_rate": 3.523038750570232e-06, - "loss": 0.9205, - "step": 2747 - }, - { - "epoch": 0.24782432249627992, - "grad_norm": 1.4368121948056016, - "learning_rate": 3.522660034068414e-06, - "loss": 1.007, - "step": 2748 - }, - { - "epoch": 0.24791450601975018, - "grad_norm": 1.5871000491973117, - "learning_rate": 3.5222811876448464e-06, - "loss": 0.9957, - "step": 2749 - }, - { - "epoch": 0.24800468954322044, - "grad_norm": 1.1187186252733836, - "learning_rate": 3.521902211331854e-06, - "loss": 0.9923, - "step": 2750 - }, - { - "epoch": 0.2480948730666907, - "grad_norm": 1.5534715354136142, - "learning_rate": 3.5215231051617726e-06, - "loss": 0.9277, - "step": 2751 - }, - { - "epoch": 0.248185056590161, - "grad_norm": 16.75715598218648, - "learning_rate": 3.521143869166951e-06, - "loss": 0.8804, - "step": 2752 - }, - { - "epoch": 0.24827524011363125, - "grad_norm": 1.739947191511296, - "learning_rate": 3.5207645033797464e-06, - "loss": 0.999, - "step": 2753 - }, - { - "epoch": 0.2483654236371015, - "grad_norm": 1.4748212447390754, - "learning_rate": 3.5203850078325293e-06, - "loss": 0.9058, - "step": 2754 - }, - { - "epoch": 0.24845560716057177, - "grad_norm": 1.5147867145788785, - "learning_rate": 3.5200053825576797e-06, - "loss": 0.8926, - "step": 2755 - }, - { - "epoch": 0.24854579068404203, - "grad_norm": 1.2726164903512673, - "learning_rate": 3.51962562758759e-06, - "loss": 0.9768, - "step": 2756 - }, - { - "epoch": 0.2486359742075123, - "grad_norm": 1.4597981952303303, - "learning_rate": 3.5192457429546627e-06, - "loss": 0.9506, - "step": 2757 - }, - { - "epoch": 0.24872615773098256, - "grad_norm": 1.822723167685498, - "learning_rate": 3.5188657286913115e-06, - "loss": 0.9444, - "step": 2758 - }, - { - "epoch": 0.24881634125445282, - "grad_norm": 1.7715061779425918, - "learning_rate": 3.518485584829961e-06, - "loss": 0.9387, - "step": 2759 - }, - { - "epoch": 0.24890652477792308, - "grad_norm": 1.251194335659674, - "learning_rate": 3.5181053114030485e-06, - "loss": 0.9915, - "step": 2760 - }, - { - "epoch": 0.24899670830139334, - "grad_norm": 1.494053816922713, - "learning_rate": 3.5177249084430198e-06, - "loss": 0.9787, - "step": 2761 - }, - { - "epoch": 0.2490868918248636, - "grad_norm": 1.821512440746042, - "learning_rate": 3.517344375982333e-06, - "loss": 1.0166, - "step": 2762 - }, - { - "epoch": 0.24917707534833386, - "grad_norm": 1.529574898772996, - "learning_rate": 3.5169637140534565e-06, - "loss": 1.0129, - "step": 2763 - }, - { - "epoch": 0.24926725887180412, - "grad_norm": 1.8501576433803923, - "learning_rate": 3.5165829226888733e-06, - "loss": 0.9471, - "step": 2764 - }, - { - "epoch": 0.24935744239527438, - "grad_norm": 1.3456524533502399, - "learning_rate": 3.516202001921072e-06, - "loss": 0.9987, - "step": 2765 - }, - { - "epoch": 0.24944762591874464, - "grad_norm": 1.3306471269952866, - "learning_rate": 3.515820951782555e-06, - "loss": 1.0058, - "step": 2766 - }, - { - "epoch": 0.2495378094422149, - "grad_norm": 1.5078956699657522, - "learning_rate": 3.5154397723058366e-06, - "loss": 0.9, - "step": 2767 - }, - { - "epoch": 0.24962799296568516, - "grad_norm": 1.5149490579532234, - "learning_rate": 3.5150584635234416e-06, - "loss": 1.0226, - "step": 2768 - }, - { - "epoch": 0.24971817648915542, - "grad_norm": 1.195681446455922, - "learning_rate": 3.5146770254679035e-06, - "loss": 1.0129, - "step": 2769 - }, - { - "epoch": 0.24980836001262569, - "grad_norm": 1.3967877678654539, - "learning_rate": 3.51429545817177e-06, - "loss": 0.8975, - "step": 2770 - }, - { - "epoch": 0.24989854353609595, - "grad_norm": 1.4753377366124525, - "learning_rate": 3.5139137616675985e-06, - "loss": 0.9817, - "step": 2771 - }, - { - "epoch": 0.2499887270595662, - "grad_norm": 1.3894711540195708, - "learning_rate": 3.513531935987957e-06, - "loss": 0.921, - "step": 2772 - }, - { - "epoch": 0.2500789105830365, - "grad_norm": 1.4155003103284856, - "learning_rate": 3.5131499811654253e-06, - "loss": 0.9934, - "step": 2773 - }, - { - "epoch": 0.25016909410650673, - "grad_norm": 1.5980074247061224, - "learning_rate": 3.512767897232594e-06, - "loss": 0.9346, - "step": 2774 - }, - { - "epoch": 0.250259277629977, - "grad_norm": 1.4940692161500064, - "learning_rate": 3.512385684222064e-06, - "loss": 0.9643, - "step": 2775 - }, - { - "epoch": 0.25034946115344725, - "grad_norm": 1.540701056841175, - "learning_rate": 3.512003342166449e-06, - "loss": 1.0702, - "step": 2776 - }, - { - "epoch": 0.25043964467691754, - "grad_norm": 1.2477390823146481, - "learning_rate": 3.511620871098371e-06, - "loss": 1.0156, - "step": 2777 - }, - { - "epoch": 0.25052982820038777, - "grad_norm": 1.415029374346795, - "learning_rate": 3.511238271050465e-06, - "loss": 0.9981, - "step": 2778 - }, - { - "epoch": 0.25062001172385806, - "grad_norm": 1.425777069503717, - "learning_rate": 3.5108555420553778e-06, - "loss": 0.8874, - "step": 2779 - }, - { - "epoch": 0.2507101952473283, - "grad_norm": 1.6351850876176715, - "learning_rate": 3.510472684145764e-06, - "loss": 0.787, - "step": 2780 - }, - { - "epoch": 0.2508003787707986, - "grad_norm": 1.353731168508538, - "learning_rate": 3.5100896973542926e-06, - "loss": 0.8625, - "step": 2781 - }, - { - "epoch": 0.2508905622942688, - "grad_norm": 1.6778425863449207, - "learning_rate": 3.509706581713642e-06, - "loss": 1.0109, - "step": 2782 - }, - { - "epoch": 0.2509807458177391, - "grad_norm": 1.41431319401344, - "learning_rate": 3.509323337256501e-06, - "loss": 0.989, - "step": 2783 - }, - { - "epoch": 0.25107092934120934, - "grad_norm": 1.4172454754832382, - "learning_rate": 3.5089399640155703e-06, - "loss": 0.9225, - "step": 2784 - }, - { - "epoch": 0.2511611128646796, - "grad_norm": 1.1779960480803437, - "learning_rate": 3.508556462023562e-06, - "loss": 0.9607, - "step": 2785 - }, - { - "epoch": 0.25125129638814986, - "grad_norm": 1.6681862896964919, - "learning_rate": 3.5081728313131984e-06, - "loss": 0.958, - "step": 2786 - }, - { - "epoch": 0.25134147991162015, - "grad_norm": 1.528837600281756, - "learning_rate": 3.5077890719172125e-06, - "loss": 0.9632, - "step": 2787 - }, - { - "epoch": 0.25143166343509044, - "grad_norm": 1.3678551923587812, - "learning_rate": 3.5074051838683497e-06, - "loss": 0.9704, - "step": 2788 - }, - { - "epoch": 0.25152184695856067, - "grad_norm": 1.5105357506823052, - "learning_rate": 3.5070211671993643e-06, - "loss": 0.8953, - "step": 2789 - }, - { - "epoch": 0.25161203048203096, - "grad_norm": 1.4303369766618041, - "learning_rate": 3.5066370219430238e-06, - "loss": 0.9896, - "step": 2790 - }, - { - "epoch": 0.2517022140055012, - "grad_norm": 1.3887716651826612, - "learning_rate": 3.5062527481321044e-06, - "loss": 0.9712, - "step": 2791 - }, - { - "epoch": 0.2517923975289715, - "grad_norm": 1.5575542760120396, - "learning_rate": 3.5058683457993954e-06, - "loss": 0.9818, - "step": 2792 - }, - { - "epoch": 0.2518825810524417, - "grad_norm": 1.6274440432303119, - "learning_rate": 3.5054838149776963e-06, - "loss": 0.7846, - "step": 2793 - }, - { - "epoch": 0.251972764575912, - "grad_norm": 1.6110160386691887, - "learning_rate": 3.505099155699816e-06, - "loss": 1.007, - "step": 2794 - }, - { - "epoch": 0.25206294809938223, - "grad_norm": 1.4944834995169298, - "learning_rate": 3.5047143679985775e-06, - "loss": 0.8585, - "step": 2795 - }, - { - "epoch": 0.2521531316228525, - "grad_norm": 1.335219385430008, - "learning_rate": 3.5043294519068126e-06, - "loss": 0.9856, - "step": 2796 - }, - { - "epoch": 0.25224331514632276, - "grad_norm": 1.6362323630062148, - "learning_rate": 3.503944407457363e-06, - "loss": 0.8937, - "step": 2797 - }, - { - "epoch": 0.25233349866979304, - "grad_norm": 1.376490002389026, - "learning_rate": 3.5035592346830846e-06, - "loss": 0.9877, - "step": 2798 - }, - { - "epoch": 0.2524236821932633, - "grad_norm": 1.5306759750955992, - "learning_rate": 3.503173933616841e-06, - "loss": 1.0388, - "step": 2799 - }, - { - "epoch": 0.25251386571673357, - "grad_norm": 1.5174151987328515, - "learning_rate": 3.50278850429151e-06, - "loss": 1.0488, - "step": 2800 - }, - { - "epoch": 0.2526040492402038, - "grad_norm": 1.44385946279942, - "learning_rate": 3.502402946739977e-06, - "loss": 0.9965, - "step": 2801 - }, - { - "epoch": 0.2526942327636741, - "grad_norm": 1.9211186106819624, - "learning_rate": 3.5020172609951405e-06, - "loss": 0.9919, - "step": 2802 - }, - { - "epoch": 0.2527844162871443, - "grad_norm": 1.2176792625348725, - "learning_rate": 3.501631447089909e-06, - "loss": 0.9841, - "step": 2803 - }, - { - "epoch": 0.2528745998106146, - "grad_norm": 1.9012887524148467, - "learning_rate": 3.501245505057203e-06, - "loss": 0.9462, - "step": 2804 - }, - { - "epoch": 0.25296478333408484, - "grad_norm": 3.9551691492226224, - "learning_rate": 3.5008594349299526e-06, - "loss": 1.0439, - "step": 2805 - }, - { - "epoch": 0.25305496685755513, - "grad_norm": 1.5186280849173317, - "learning_rate": 3.500473236741099e-06, - "loss": 0.9743, - "step": 2806 - }, - { - "epoch": 0.25314515038102536, - "grad_norm": 1.317104211167304, - "learning_rate": 3.500086910523596e-06, - "loss": 0.9309, - "step": 2807 - }, - { - "epoch": 0.25323533390449565, - "grad_norm": 1.2318515818328122, - "learning_rate": 3.499700456310406e-06, - "loss": 0.9224, - "step": 2808 - }, - { - "epoch": 0.2533255174279659, - "grad_norm": 1.2121996792768777, - "learning_rate": 3.499313874134504e-06, - "loss": 1.0216, - "step": 2809 - }, - { - "epoch": 0.2534157009514362, - "grad_norm": 1.504451187290224, - "learning_rate": 3.498927164028875e-06, - "loss": 0.846, - "step": 2810 - }, - { - "epoch": 0.25350588447490646, - "grad_norm": 1.1105165652534277, - "learning_rate": 3.498540326026515e-06, - "loss": 1.0093, - "step": 2811 - }, - { - "epoch": 0.2535960679983767, - "grad_norm": 1.3584371817662806, - "learning_rate": 3.4981533601604323e-06, - "loss": 1.0423, - "step": 2812 - }, - { - "epoch": 0.253686251521847, - "grad_norm": 1.4447238857630755, - "learning_rate": 3.4977662664636443e-06, - "loss": 0.9812, - "step": 2813 - }, - { - "epoch": 0.2537764350453172, - "grad_norm": 1.5937224741035971, - "learning_rate": 3.497379044969179e-06, - "loss": 0.9369, - "step": 2814 - }, - { - "epoch": 0.2538666185687875, - "grad_norm": 1.5490960253854078, - "learning_rate": 3.4969916957100777e-06, - "loss": 0.965, - "step": 2815 - }, - { - "epoch": 0.25395680209225774, - "grad_norm": 1.3225976253885652, - "learning_rate": 3.4966042187193905e-06, - "loss": 0.9667, - "step": 2816 - }, - { - "epoch": 0.254046985615728, - "grad_norm": 1.3826030421702111, - "learning_rate": 3.496216614030179e-06, - "loss": 0.976, - "step": 2817 - }, - { - "epoch": 0.25413716913919826, - "grad_norm": 1.5990305914275584, - "learning_rate": 3.495828881675516e-06, - "loss": 0.8738, - "step": 2818 - }, - { - "epoch": 0.25422735266266855, - "grad_norm": 1.8384958825250726, - "learning_rate": 3.4954410216884845e-06, - "loss": 1.0475, - "step": 2819 - }, - { - "epoch": 0.2543175361861388, - "grad_norm": 1.555727740060335, - "learning_rate": 3.49505303410218e-06, - "loss": 0.9873, - "step": 2820 - }, - { - "epoch": 0.25440771970960907, - "grad_norm": 1.23710860796496, - "learning_rate": 3.4946649189497067e-06, - "loss": 1.0035, - "step": 2821 - }, - { - "epoch": 0.2544979032330793, - "grad_norm": 2.0581502418580384, - "learning_rate": 3.4942766762641805e-06, - "loss": 0.9536, - "step": 2822 - }, - { - "epoch": 0.2545880867565496, - "grad_norm": 1.9021583795801758, - "learning_rate": 3.49388830607873e-06, - "loss": 0.9944, - "step": 2823 - }, - { - "epoch": 0.2546782702800198, - "grad_norm": 1.1556318022309595, - "learning_rate": 3.493499808426491e-06, - "loss": 0.9351, - "step": 2824 - }, - { - "epoch": 0.2547684538034901, - "grad_norm": 1.816778526303717, - "learning_rate": 3.493111183340614e-06, - "loss": 1.0183, - "step": 2825 - }, - { - "epoch": 0.25485863732696035, - "grad_norm": 1.5480828241818114, - "learning_rate": 3.4927224308542576e-06, - "loss": 0.8804, - "step": 2826 - }, - { - "epoch": 0.25494882085043064, - "grad_norm": 1.2560536662924924, - "learning_rate": 3.4923335510005923e-06, - "loss": 1.0501, - "step": 2827 - }, - { - "epoch": 0.25503900437390087, - "grad_norm": 1.5455550910759477, - "learning_rate": 3.4919445438128e-06, - "loss": 0.9952, - "step": 2828 - }, - { - "epoch": 0.25512918789737116, - "grad_norm": 1.3458437908384386, - "learning_rate": 3.491555409324073e-06, - "loss": 1.052, - "step": 2829 - }, - { - "epoch": 0.2552193714208414, - "grad_norm": 0.7458741828628853, - "learning_rate": 3.4911661475676136e-06, - "loss": 0.7879, - "step": 2830 - }, - { - "epoch": 0.2553095549443117, - "grad_norm": 1.3508892256657599, - "learning_rate": 3.490776758576637e-06, - "loss": 0.9493, - "step": 2831 - }, - { - "epoch": 0.2553997384677819, - "grad_norm": 1.3567023476603461, - "learning_rate": 3.4903872423843668e-06, - "loss": 1.0244, - "step": 2832 - }, - { - "epoch": 0.2554899219912522, - "grad_norm": 1.4463713195611534, - "learning_rate": 3.4899975990240396e-06, - "loss": 0.9364, - "step": 2833 - }, - { - "epoch": 0.25558010551472243, - "grad_norm": 1.6616089691786928, - "learning_rate": 3.489607828528901e-06, - "loss": 0.9961, - "step": 2834 - }, - { - "epoch": 0.2556702890381927, - "grad_norm": 0.7277510570962685, - "learning_rate": 3.4892179309322093e-06, - "loss": 0.8817, - "step": 2835 - }, - { - "epoch": 0.255760472561663, - "grad_norm": 1.457222933599179, - "learning_rate": 3.488827906267232e-06, - "loss": 0.8903, - "step": 2836 - }, - { - "epoch": 0.25585065608513324, - "grad_norm": 1.8672624517105556, - "learning_rate": 3.4884377545672485e-06, - "loss": 1.0111, - "step": 2837 - }, - { - "epoch": 0.25594083960860353, - "grad_norm": 1.3449106305151175, - "learning_rate": 3.4880474758655485e-06, - "loss": 0.9505, - "step": 2838 - }, - { - "epoch": 0.25603102313207377, - "grad_norm": 1.347816833658944, - "learning_rate": 3.487657070195433e-06, - "loss": 1.0378, - "step": 2839 - }, - { - "epoch": 0.25612120665554405, - "grad_norm": 1.3787431917688213, - "learning_rate": 3.487266537590213e-06, - "loss": 1.0454, - "step": 2840 - }, - { - "epoch": 0.2562113901790143, - "grad_norm": 1.4743250288505552, - "learning_rate": 3.4868758780832116e-06, - "loss": 0.8896, - "step": 2841 - }, - { - "epoch": 0.2563015737024846, - "grad_norm": 1.5239824934126611, - "learning_rate": 3.486485091707762e-06, - "loss": 0.9789, - "step": 2842 - }, - { - "epoch": 0.2563917572259548, - "grad_norm": 1.5372222897694603, - "learning_rate": 3.4860941784972077e-06, - "loss": 0.8504, - "step": 2843 - }, - { - "epoch": 0.2564819407494251, - "grad_norm": 1.9109777388621023, - "learning_rate": 3.485703138484904e-06, - "loss": 1.073, - "step": 2844 - }, - { - "epoch": 0.25657212427289533, - "grad_norm": 1.744677897677473, - "learning_rate": 3.485311971704216e-06, - "loss": 1.0206, - "step": 2845 - }, - { - "epoch": 0.2566623077963656, - "grad_norm": 1.345156886795962, - "learning_rate": 3.484920678188521e-06, - "loss": 1.0025, - "step": 2846 - }, - { - "epoch": 0.25675249131983585, - "grad_norm": 1.6690969470822106, - "learning_rate": 3.4845292579712063e-06, - "loss": 0.9894, - "step": 2847 - }, - { - "epoch": 0.25684267484330614, - "grad_norm": 1.5040719869866481, - "learning_rate": 3.484137711085669e-06, - "loss": 0.8566, - "step": 2848 - }, - { - "epoch": 0.2569328583667764, - "grad_norm": 1.2944936835502148, - "learning_rate": 3.4837460375653198e-06, - "loss": 0.9657, - "step": 2849 - }, - { - "epoch": 0.25702304189024666, - "grad_norm": 1.3410640753358858, - "learning_rate": 3.483354237443576e-06, - "loss": 1.0062, - "step": 2850 - }, - { - "epoch": 0.2571132254137169, - "grad_norm": 1.2520467214367532, - "learning_rate": 3.48296231075387e-06, - "loss": 0.9177, - "step": 2851 - }, - { - "epoch": 0.2572034089371872, - "grad_norm": 1.3828076292480205, - "learning_rate": 3.4825702575296433e-06, - "loss": 0.9885, - "step": 2852 - }, - { - "epoch": 0.2572935924606574, - "grad_norm": 0.8818216755707521, - "learning_rate": 3.482178077804347e-06, - "loss": 0.8363, - "step": 2853 - }, - { - "epoch": 0.2573837759841277, - "grad_norm": 1.4194612241194067, - "learning_rate": 3.4817857716114443e-06, - "loss": 0.9312, - "step": 2854 - }, - { - "epoch": 0.25747395950759794, - "grad_norm": 2.1108551483693665, - "learning_rate": 3.4813933389844094e-06, - "loss": 0.9263, - "step": 2855 - }, - { - "epoch": 0.2575641430310682, - "grad_norm": 1.6201001166972857, - "learning_rate": 3.4810007799567264e-06, - "loss": 0.9776, - "step": 2856 - }, - { - "epoch": 0.25765432655453846, - "grad_norm": 1.8762907671581646, - "learning_rate": 3.480608094561891e-06, - "loss": 0.9928, - "step": 2857 - }, - { - "epoch": 0.25774451007800875, - "grad_norm": 1.289024352462889, - "learning_rate": 3.4802152828334083e-06, - "loss": 0.9973, - "step": 2858 - }, - { - "epoch": 0.25783469360147904, - "grad_norm": 1.4129403651612527, - "learning_rate": 3.479822344804796e-06, - "loss": 0.9171, - "step": 2859 - }, - { - "epoch": 0.25792487712494927, - "grad_norm": 1.7324057732041318, - "learning_rate": 3.479429280509582e-06, - "loss": 1.0261, - "step": 2860 - }, - { - "epoch": 0.25801506064841956, - "grad_norm": 1.389739150881773, - "learning_rate": 3.4790360899813038e-06, - "loss": 1.0172, - "step": 2861 - }, - { - "epoch": 0.2581052441718898, - "grad_norm": 2.0852296463993256, - "learning_rate": 3.4786427732535115e-06, - "loss": 0.9037, - "step": 2862 - }, - { - "epoch": 0.2581954276953601, - "grad_norm": 1.4407125318170577, - "learning_rate": 3.478249330359764e-06, - "loss": 1.0189, - "step": 2863 - }, - { - "epoch": 0.2582856112188303, - "grad_norm": 1.5813034832620707, - "learning_rate": 3.4778557613336333e-06, - "loss": 0.9102, - "step": 2864 - }, - { - "epoch": 0.2583757947423006, - "grad_norm": 1.4229762245921027, - "learning_rate": 3.4774620662087004e-06, - "loss": 0.9855, - "step": 2865 - }, - { - "epoch": 0.25846597826577083, - "grad_norm": 1.5799823067072798, - "learning_rate": 3.477068245018557e-06, - "loss": 0.9778, - "step": 2866 - }, - { - "epoch": 0.2585561617892411, - "grad_norm": 1.441104516182813, - "learning_rate": 3.476674297796807e-06, - "loss": 0.9787, - "step": 2867 - }, - { - "epoch": 0.25864634531271136, - "grad_norm": 1.4932969846871411, - "learning_rate": 3.4762802245770627e-06, - "loss": 1.0118, - "step": 2868 - }, - { - "epoch": 0.25873652883618165, - "grad_norm": 1.2655391899500539, - "learning_rate": 3.4758860253929497e-06, - "loss": 0.9267, - "step": 2869 - }, - { - "epoch": 0.2588267123596519, - "grad_norm": 1.5013355031943578, - "learning_rate": 3.4754917002781038e-06, - "loss": 0.9891, - "step": 2870 - }, - { - "epoch": 0.25891689588312217, - "grad_norm": 1.5493240236034378, - "learning_rate": 3.475097249266169e-06, - "loss": 1.017, - "step": 2871 - }, - { - "epoch": 0.2590070794065924, - "grad_norm": 1.4472566467912726, - "learning_rate": 3.4747026723908044e-06, - "loss": 0.9595, - "step": 2872 - }, - { - "epoch": 0.2590972629300627, - "grad_norm": 1.335546737541466, - "learning_rate": 3.474307969685676e-06, - "loss": 0.9511, - "step": 2873 - }, - { - "epoch": 0.2591874464535329, - "grad_norm": 1.4141907607442332, - "learning_rate": 3.473913141184462e-06, - "loss": 0.913, - "step": 2874 - }, - { - "epoch": 0.2592776299770032, - "grad_norm": 1.3038676992743545, - "learning_rate": 3.4735181869208523e-06, - "loss": 0.8427, - "step": 2875 - }, - { - "epoch": 0.25936781350047344, - "grad_norm": 1.8621412584581296, - "learning_rate": 3.473123106928546e-06, - "loss": 0.8561, - "step": 2876 - }, - { - "epoch": 0.25945799702394373, - "grad_norm": 1.4903017281326771, - "learning_rate": 3.4727279012412533e-06, - "loss": 0.9305, - "step": 2877 - }, - { - "epoch": 0.25954818054741396, - "grad_norm": 1.0418725128235675, - "learning_rate": 3.4723325698926953e-06, - "loss": 0.8064, - "step": 2878 - }, - { - "epoch": 0.25963836407088425, - "grad_norm": 1.5485826564772902, - "learning_rate": 3.4719371129166045e-06, - "loss": 0.9804, - "step": 2879 - }, - { - "epoch": 0.2597285475943545, - "grad_norm": 1.4772242117964776, - "learning_rate": 3.471541530346723e-06, - "loss": 0.8935, - "step": 2880 - }, - { - "epoch": 0.2598187311178248, - "grad_norm": 1.760172027503633, - "learning_rate": 3.4711458222168037e-06, - "loss": 1.0301, - "step": 2881 - }, - { - "epoch": 0.259908914641295, - "grad_norm": 1.3731722821935992, - "learning_rate": 3.4707499885606114e-06, - "loss": 1.0106, - "step": 2882 - }, - { - "epoch": 0.2599990981647653, - "grad_norm": 1.3680769288813412, - "learning_rate": 3.4703540294119204e-06, - "loss": 1.0074, - "step": 2883 - }, - { - "epoch": 0.2600892816882356, - "grad_norm": 1.4342284084036, - "learning_rate": 3.4699579448045163e-06, - "loss": 1.0233, - "step": 2884 - }, - { - "epoch": 0.2601794652117058, - "grad_norm": 1.2098764015943984, - "learning_rate": 3.4695617347721947e-06, - "loss": 0.8621, - "step": 2885 - }, - { - "epoch": 0.2602696487351761, - "grad_norm": 1.6342957253130441, - "learning_rate": 3.469165399348763e-06, - "loss": 0.9329, - "step": 2886 - }, - { - "epoch": 0.26035983225864634, - "grad_norm": 1.501387987108143, - "learning_rate": 3.4687689385680384e-06, - "loss": 0.9535, - "step": 2887 - }, - { - "epoch": 0.26045001578211663, - "grad_norm": 1.3714138428998428, - "learning_rate": 3.4683723524638494e-06, - "loss": 0.9479, - "step": 2888 - }, - { - "epoch": 0.26054019930558686, - "grad_norm": 1.6427901014426862, - "learning_rate": 3.4679756410700354e-06, - "loss": 1.0393, - "step": 2889 - }, - { - "epoch": 0.26063038282905715, - "grad_norm": 1.6857207066088076, - "learning_rate": 3.4675788044204445e-06, - "loss": 0.9712, - "step": 2890 - }, - { - "epoch": 0.2607205663525274, - "grad_norm": 1.542229506704134, - "learning_rate": 3.467181842548938e-06, - "loss": 0.9069, - "step": 2891 - }, - { - "epoch": 0.26081074987599767, - "grad_norm": 1.2960480500868208, - "learning_rate": 3.466784755489387e-06, - "loss": 0.9626, - "step": 2892 - }, - { - "epoch": 0.2609009333994679, - "grad_norm": 1.37143061920138, - "learning_rate": 3.4663875432756726e-06, - "loss": 1.086, - "step": 2893 - }, - { - "epoch": 0.2609911169229382, - "grad_norm": 1.5610980801851118, - "learning_rate": 3.465990205941687e-06, - "loss": 1.1427, - "step": 2894 - }, - { - "epoch": 0.2610813004464084, - "grad_norm": 1.447084155526858, - "learning_rate": 3.465592743521335e-06, - "loss": 0.9542, - "step": 2895 - }, - { - "epoch": 0.2611714839698787, - "grad_norm": 1.4830557331254117, - "learning_rate": 3.465195156048528e-06, - "loss": 1.0276, - "step": 2896 - }, - { - "epoch": 0.26126166749334895, - "grad_norm": 0.6253630537332769, - "learning_rate": 3.464797443557191e-06, - "loss": 0.8102, - "step": 2897 - }, - { - "epoch": 0.26135185101681924, - "grad_norm": 1.4755639630643729, - "learning_rate": 3.46439960608126e-06, - "loss": 0.9479, - "step": 2898 - }, - { - "epoch": 0.26144203454028947, - "grad_norm": 1.4724667374071259, - "learning_rate": 3.4640016436546797e-06, - "loss": 0.9157, - "step": 2899 - }, - { - "epoch": 0.26153221806375976, - "grad_norm": 1.5868996393575323, - "learning_rate": 3.4636035563114065e-06, - "loss": 0.9533, - "step": 2900 - }, - { - "epoch": 0.26162240158723, - "grad_norm": 1.2816426675647108, - "learning_rate": 3.4632053440854085e-06, - "loss": 0.908, - "step": 2901 - }, - { - "epoch": 0.2617125851107003, - "grad_norm": 1.5192950145988395, - "learning_rate": 3.462807007010662e-06, - "loss": 1.026, - "step": 2902 - }, - { - "epoch": 0.2618027686341705, - "grad_norm": 1.42168081445724, - "learning_rate": 3.462408545121155e-06, - "loss": 1.0082, - "step": 2903 - }, - { - "epoch": 0.2618929521576408, - "grad_norm": 1.3959732080473677, - "learning_rate": 3.4620099584508883e-06, - "loss": 1.0243, - "step": 2904 - }, - { - "epoch": 0.26198313568111103, - "grad_norm": 1.4312602080268475, - "learning_rate": 3.46161124703387e-06, - "loss": 0.9318, - "step": 2905 - }, - { - "epoch": 0.2620733192045813, - "grad_norm": 1.4498251908260724, - "learning_rate": 3.461212410904122e-06, - "loss": 0.9656, - "step": 2906 - }, - { - "epoch": 0.2621635027280516, - "grad_norm": 1.475844758799243, - "learning_rate": 3.4608134500956726e-06, - "loss": 1.0253, - "step": 2907 - }, - { - "epoch": 0.26225368625152184, - "grad_norm": 1.2007248457577746, - "learning_rate": 3.4604143646425655e-06, - "loss": 0.9558, - "step": 2908 - }, - { - "epoch": 0.26234386977499213, - "grad_norm": 1.220619088904253, - "learning_rate": 3.460015154578852e-06, - "loss": 1.0428, - "step": 2909 - }, - { - "epoch": 0.26243405329846237, - "grad_norm": 1.4377118659417139, - "learning_rate": 3.459615819938595e-06, - "loss": 0.9425, - "step": 2910 - }, - { - "epoch": 0.26252423682193265, - "grad_norm": 1.6359233516748062, - "learning_rate": 3.4592163607558684e-06, - "loss": 0.9328, - "step": 2911 - }, - { - "epoch": 0.2626144203454029, - "grad_norm": 1.375470861095094, - "learning_rate": 3.4588167770647553e-06, - "loss": 0.9717, - "step": 2912 - }, - { - "epoch": 0.2627046038688732, - "grad_norm": 1.7097381150331388, - "learning_rate": 3.458417068899351e-06, - "loss": 1.0708, - "step": 2913 - }, - { - "epoch": 0.2627947873923434, - "grad_norm": 1.42681821973315, - "learning_rate": 3.4580172362937612e-06, - "loss": 0.9442, - "step": 2914 - }, - { - "epoch": 0.2628849709158137, - "grad_norm": 1.1462379723669773, - "learning_rate": 3.457617279282101e-06, - "loss": 0.9353, - "step": 2915 - }, - { - "epoch": 0.26297515443928393, - "grad_norm": 1.6684679628839645, - "learning_rate": 3.4572171978984975e-06, - "loss": 0.9461, - "step": 2916 - }, - { - "epoch": 0.2630653379627542, - "grad_norm": 2.0223355035643937, - "learning_rate": 3.456816992177088e-06, - "loss": 0.9736, - "step": 2917 - }, - { - "epoch": 0.26315552148622445, - "grad_norm": 1.239761094028492, - "learning_rate": 3.4564166621520193e-06, - "loss": 0.9433, - "step": 2918 - }, - { - "epoch": 0.26324570500969474, - "grad_norm": 1.3534094038068245, - "learning_rate": 3.4560162078574507e-06, - "loss": 0.9702, - "step": 2919 - }, - { - "epoch": 0.263335888533165, - "grad_norm": 1.7775678891551288, - "learning_rate": 3.455615629327551e-06, - "loss": 0.9771, - "step": 2920 - }, - { - "epoch": 0.26342607205663526, - "grad_norm": 2.207213159676921, - "learning_rate": 3.4552149265964994e-06, - "loss": 0.9511, - "step": 2921 - }, - { - "epoch": 0.2635162555801055, - "grad_norm": 1.3552047076556735, - "learning_rate": 3.4548140996984866e-06, - "loss": 0.9225, - "step": 2922 - }, - { - "epoch": 0.2636064391035758, - "grad_norm": 1.2813564349370132, - "learning_rate": 3.4544131486677124e-06, - "loss": 0.9977, - "step": 2923 - }, - { - "epoch": 0.263696622627046, - "grad_norm": 1.386463410415735, - "learning_rate": 3.454012073538389e-06, - "loss": 0.9125, - "step": 2924 - }, - { - "epoch": 0.2637868061505163, - "grad_norm": 1.2576670592198154, - "learning_rate": 3.453610874344738e-06, - "loss": 0.9717, - "step": 2925 - }, - { - "epoch": 0.26387698967398654, - "grad_norm": 1.4709877660723663, - "learning_rate": 3.453209551120993e-06, - "loss": 1.0145, - "step": 2926 - }, - { - "epoch": 0.26396717319745683, - "grad_norm": 1.5905187425177651, - "learning_rate": 3.452808103901395e-06, - "loss": 0.9456, - "step": 2927 - }, - { - "epoch": 0.26405735672092706, - "grad_norm": 1.4690108372007447, - "learning_rate": 3.4524065327202e-06, - "loss": 1.0394, - "step": 2928 - }, - { - "epoch": 0.26414754024439735, - "grad_norm": 0.8003598476764717, - "learning_rate": 3.4520048376116702e-06, - "loss": 0.7577, - "step": 2929 - }, - { - "epoch": 0.26423772376786764, - "grad_norm": 1.3220716866694457, - "learning_rate": 3.4516030186100817e-06, - "loss": 1.016, - "step": 2930 - }, - { - "epoch": 0.26432790729133787, - "grad_norm": 1.2174704265723448, - "learning_rate": 3.4512010757497197e-06, - "loss": 0.8877, - "step": 2931 - }, - { - "epoch": 0.26441809081480816, - "grad_norm": 1.5462595167923379, - "learning_rate": 3.4507990090648804e-06, - "loss": 0.8897, - "step": 2932 - }, - { - "epoch": 0.2645082743382784, - "grad_norm": 1.4515864933851927, - "learning_rate": 3.4503968185898696e-06, - "loss": 0.9918, - "step": 2933 - }, - { - "epoch": 0.2645984578617487, - "grad_norm": 1.3719984151119275, - "learning_rate": 3.4499945043590047e-06, - "loss": 0.9773, - "step": 2934 - }, - { - "epoch": 0.2646886413852189, - "grad_norm": 1.9750249257506665, - "learning_rate": 3.4495920664066137e-06, - "loss": 0.9877, - "step": 2935 - }, - { - "epoch": 0.2647788249086892, - "grad_norm": 1.8303457612006109, - "learning_rate": 3.449189504767035e-06, - "loss": 0.9649, - "step": 2936 - }, - { - "epoch": 0.26486900843215944, - "grad_norm": 1.4666065239857313, - "learning_rate": 3.4487868194746163e-06, - "loss": 0.9879, - "step": 2937 - }, - { - "epoch": 0.2649591919556297, - "grad_norm": 1.4285362069011378, - "learning_rate": 3.4483840105637175e-06, - "loss": 1.0115, - "step": 2938 - }, - { - "epoch": 0.26504937547909996, - "grad_norm": 1.5072198206983227, - "learning_rate": 3.4479810780687097e-06, - "loss": 0.9626, - "step": 2939 - }, - { - "epoch": 0.26513955900257025, - "grad_norm": 1.3338096135553632, - "learning_rate": 3.4475780220239714e-06, - "loss": 0.9651, - "step": 2940 - }, - { - "epoch": 0.2652297425260405, - "grad_norm": 1.3284922821228482, - "learning_rate": 3.4471748424638948e-06, - "loss": 0.9586, - "step": 2941 - }, - { - "epoch": 0.26531992604951077, - "grad_norm": 0.8820092589590864, - "learning_rate": 3.4467715394228803e-06, - "loss": 0.8038, - "step": 2942 - }, - { - "epoch": 0.265410109572981, - "grad_norm": 1.4354448139474223, - "learning_rate": 3.4463681129353413e-06, - "loss": 0.9771, - "step": 2943 - }, - { - "epoch": 0.2655002930964513, - "grad_norm": 1.4587362096244163, - "learning_rate": 3.4459645630357e-06, - "loss": 0.9662, - "step": 2944 - }, - { - "epoch": 0.2655904766199215, - "grad_norm": 1.5325015655808285, - "learning_rate": 3.4455608897583884e-06, - "loss": 1.0057, - "step": 2945 - }, - { - "epoch": 0.2656806601433918, - "grad_norm": 1.8081233358260143, - "learning_rate": 3.4451570931378514e-06, - "loss": 0.9752, - "step": 2946 - }, - { - "epoch": 0.26577084366686204, - "grad_norm": 1.2691158143140076, - "learning_rate": 3.444753173208543e-06, - "loss": 0.981, - "step": 2947 - }, - { - "epoch": 0.26586102719033233, - "grad_norm": 1.522303545574727, - "learning_rate": 3.444349130004927e-06, - "loss": 0.9665, - "step": 2948 - }, - { - "epoch": 0.26595121071380257, - "grad_norm": 1.7274655013221123, - "learning_rate": 3.4439449635614794e-06, - "loss": 0.8952, - "step": 2949 - }, - { - "epoch": 0.26604139423727285, - "grad_norm": 1.3450153957606095, - "learning_rate": 3.4435406739126854e-06, - "loss": 0.9981, - "step": 2950 - }, - { - "epoch": 0.2661315777607431, - "grad_norm": 1.7781357562069975, - "learning_rate": 3.443136261093042e-06, - "loss": 0.7874, - "step": 2951 - }, - { - "epoch": 0.2662217612842134, - "grad_norm": 1.370247952236127, - "learning_rate": 3.4427317251370553e-06, - "loss": 0.9216, - "step": 2952 - }, - { - "epoch": 0.2663119448076836, - "grad_norm": 1.5482328216127001, - "learning_rate": 3.4423270660792422e-06, - "loss": 0.8927, - "step": 2953 - }, - { - "epoch": 0.2664021283311539, - "grad_norm": 1.5046544975551495, - "learning_rate": 3.4419222839541314e-06, - "loss": 0.8635, - "step": 2954 - }, - { - "epoch": 0.2664923118546242, - "grad_norm": 1.330128448923517, - "learning_rate": 3.4415173787962607e-06, - "loss": 0.9443, - "step": 2955 - }, - { - "epoch": 0.2665824953780944, - "grad_norm": 2.627287639888789, - "learning_rate": 3.4411123506401783e-06, - "loss": 0.8294, - "step": 2956 - }, - { - "epoch": 0.2666726789015647, - "grad_norm": 1.6426623092651373, - "learning_rate": 3.440707199520444e-06, - "loss": 0.83, - "step": 2957 - }, - { - "epoch": 0.26676286242503494, - "grad_norm": 1.3769149883210552, - "learning_rate": 3.440301925471628e-06, - "loss": 0.9319, - "step": 2958 - }, - { - "epoch": 0.26685304594850523, - "grad_norm": 1.3120577384961547, - "learning_rate": 3.43989652852831e-06, - "loss": 1.0079, - "step": 2959 - }, - { - "epoch": 0.26694322947197546, - "grad_norm": 1.323584930963354, - "learning_rate": 3.4394910087250804e-06, - "loss": 1.0297, - "step": 2960 - }, - { - "epoch": 0.26703341299544575, - "grad_norm": 1.2327004675490383, - "learning_rate": 3.4390853660965405e-06, - "loss": 0.984, - "step": 2961 - }, - { - "epoch": 0.267123596518916, - "grad_norm": 1.5765578543043544, - "learning_rate": 3.438679600677302e-06, - "loss": 1.041, - "step": 2962 - }, - { - "epoch": 0.2672137800423863, - "grad_norm": 1.1430621303916717, - "learning_rate": 3.4382737125019874e-06, - "loss": 0.9256, - "step": 2963 - }, - { - "epoch": 0.2673039635658565, - "grad_norm": 1.3293230375349074, - "learning_rate": 3.4378677016052294e-06, - "loss": 0.9402, - "step": 2964 - }, - { - "epoch": 0.2673941470893268, - "grad_norm": 1.5327486761806373, - "learning_rate": 3.43746156802167e-06, - "loss": 0.9885, - "step": 2965 - }, - { - "epoch": 0.267484330612797, - "grad_norm": 1.312881232707311, - "learning_rate": 3.4370553117859643e-06, - "loss": 0.9039, - "step": 2966 - }, - { - "epoch": 0.2675745141362673, - "grad_norm": 1.363578436083502, - "learning_rate": 3.4366489329327754e-06, - "loss": 0.9979, - "step": 2967 - }, - { - "epoch": 0.26766469765973755, - "grad_norm": 1.669416313160051, - "learning_rate": 3.4362424314967777e-06, - "loss": 0.9878, - "step": 2968 - }, - { - "epoch": 0.26775488118320784, - "grad_norm": 1.5311979168189138, - "learning_rate": 3.4358358075126567e-06, - "loss": 0.9801, - "step": 2969 - }, - { - "epoch": 0.26784506470667807, - "grad_norm": 1.4295488300716432, - "learning_rate": 3.4354290610151077e-06, - "loss": 0.9911, - "step": 2970 - }, - { - "epoch": 0.26793524823014836, - "grad_norm": 1.243547091394216, - "learning_rate": 3.4350221920388354e-06, - "loss": 0.9705, - "step": 2971 - }, - { - "epoch": 0.2680254317536186, - "grad_norm": 1.3294892428889364, - "learning_rate": 3.4346152006185574e-06, - "loss": 1.0607, - "step": 2972 - }, - { - "epoch": 0.2681156152770889, - "grad_norm": 1.648970748202382, - "learning_rate": 3.4342080867890006e-06, - "loss": 1.0651, - "step": 2973 - }, - { - "epoch": 0.2682057988005591, - "grad_norm": 1.7885172317011768, - "learning_rate": 3.4338008505849016e-06, - "loss": 0.8894, - "step": 2974 - }, - { - "epoch": 0.2682959823240294, - "grad_norm": 1.3030360843319835, - "learning_rate": 3.433393492041008e-06, - "loss": 0.9758, - "step": 2975 - }, - { - "epoch": 0.26838616584749964, - "grad_norm": 1.3703113725542326, - "learning_rate": 3.432986011192078e-06, - "loss": 1.0242, - "step": 2976 - }, - { - "epoch": 0.2684763493709699, - "grad_norm": 1.5775872249035725, - "learning_rate": 3.4325784080728796e-06, - "loss": 1.037, - "step": 2977 - }, - { - "epoch": 0.2685665328944402, - "grad_norm": 1.3679190721973868, - "learning_rate": 3.4321706827181926e-06, - "loss": 1.0039, - "step": 2978 - }, - { - "epoch": 0.26865671641791045, - "grad_norm": 1.1479281379325004, - "learning_rate": 3.4317628351628064e-06, - "loss": 0.9217, - "step": 2979 - }, - { - "epoch": 0.26874689994138073, - "grad_norm": 1.281816961870775, - "learning_rate": 3.43135486544152e-06, - "loss": 0.9499, - "step": 2980 - }, - { - "epoch": 0.26883708346485097, - "grad_norm": 1.3697839985718663, - "learning_rate": 3.4309467735891442e-06, - "loss": 1.0092, - "step": 2981 - }, - { - "epoch": 0.26892726698832126, - "grad_norm": 1.449557694028092, - "learning_rate": 3.4305385596405e-06, - "loss": 1.0489, - "step": 2982 - }, - { - "epoch": 0.2690174505117915, - "grad_norm": 1.2707974736308343, - "learning_rate": 3.4301302236304174e-06, - "loss": 0.9367, - "step": 2983 - }, - { - "epoch": 0.2691076340352618, - "grad_norm": 0.7644891682637754, - "learning_rate": 3.429721765593739e-06, - "loss": 0.7678, - "step": 2984 - }, - { - "epoch": 0.269197817558732, - "grad_norm": 1.233597039477436, - "learning_rate": 3.4293131855653155e-06, - "loss": 0.9871, - "step": 2985 - }, - { - "epoch": 0.2692880010822023, - "grad_norm": 1.4065406498952453, - "learning_rate": 3.4289044835800102e-06, - "loss": 0.9044, - "step": 2986 - }, - { - "epoch": 0.26937818460567253, - "grad_norm": 1.6648965813762928, - "learning_rate": 3.4284956596726953e-06, - "loss": 0.9659, - "step": 2987 - }, - { - "epoch": 0.2694683681291428, - "grad_norm": 1.2399525720232223, - "learning_rate": 3.4280867138782544e-06, - "loss": 0.963, - "step": 2988 - }, - { - "epoch": 0.26955855165261305, - "grad_norm": 1.6173775234165875, - "learning_rate": 3.4276776462315803e-06, - "loss": 0.9687, - "step": 2989 - }, - { - "epoch": 0.26964873517608334, - "grad_norm": 1.3529093211477532, - "learning_rate": 3.427268456767578e-06, - "loss": 0.9723, - "step": 2990 - }, - { - "epoch": 0.2697389186995536, - "grad_norm": 1.4341841061625467, - "learning_rate": 3.42685914552116e-06, - "loss": 1.0161, - "step": 2991 - }, - { - "epoch": 0.26982910222302386, - "grad_norm": 6.441709160839365, - "learning_rate": 3.426449712527253e-06, - "loss": 0.9431, - "step": 2992 - }, - { - "epoch": 0.2699192857464941, - "grad_norm": 1.3422088656873188, - "learning_rate": 3.4260401578207904e-06, - "loss": 0.8852, - "step": 2993 - }, - { - "epoch": 0.2700094692699644, - "grad_norm": 2.0556435337083623, - "learning_rate": 3.4256304814367185e-06, - "loss": 0.9427, - "step": 2994 - }, - { - "epoch": 0.2700996527934346, - "grad_norm": 1.5390750477855408, - "learning_rate": 3.4252206834099936e-06, - "loss": 0.9285, - "step": 2995 - }, - { - "epoch": 0.2701898363169049, - "grad_norm": 1.4706241743520745, - "learning_rate": 3.424810763775581e-06, - "loss": 0.972, - "step": 2996 - }, - { - "epoch": 0.27028001984037514, - "grad_norm": 1.337423067465054, - "learning_rate": 3.4244007225684587e-06, - "loss": 0.9125, - "step": 2997 - }, - { - "epoch": 0.27037020336384543, - "grad_norm": 1.3151765054378166, - "learning_rate": 3.4239905598236115e-06, - "loss": 0.9804, - "step": 2998 - }, - { - "epoch": 0.27046038688731566, - "grad_norm": 1.3235731323351212, - "learning_rate": 3.4235802755760386e-06, - "loss": 1.0, - "step": 2999 - }, - { - "epoch": 0.27055057041078595, - "grad_norm": 3.702687684324251, - "learning_rate": 3.4231698698607464e-06, - "loss": 0.9393, - "step": 3000 - }, - { - "epoch": 0.2706407539342562, - "grad_norm": 1.6450406189515467, - "learning_rate": 3.4227593427127543e-06, - "loss": 1.0242, - "step": 3001 - }, - { - "epoch": 0.2707309374577265, - "grad_norm": 1.5741377210942897, - "learning_rate": 3.42234869416709e-06, - "loss": 0.9388, - "step": 3002 - }, - { - "epoch": 0.27082112098119676, - "grad_norm": 1.8140720258753444, - "learning_rate": 3.421937924258792e-06, - "loss": 0.9789, - "step": 3003 - }, - { - "epoch": 0.270911304504667, - "grad_norm": 1.5924502289808011, - "learning_rate": 3.4215270330229096e-06, - "loss": 1.0118, - "step": 3004 - }, - { - "epoch": 0.2710014880281373, - "grad_norm": 1.53094876498773, - "learning_rate": 3.421116020494503e-06, - "loss": 0.9351, - "step": 3005 - }, - { - "epoch": 0.2710916715516075, - "grad_norm": 1.238123646041692, - "learning_rate": 3.420704886708642e-06, - "loss": 0.9174, - "step": 3006 - }, - { - "epoch": 0.2711818550750778, - "grad_norm": 1.06846905412073, - "learning_rate": 3.4202936317004056e-06, - "loss": 0.9231, - "step": 3007 - }, - { - "epoch": 0.27127203859854804, - "grad_norm": 1.5378873569724136, - "learning_rate": 3.4198822555048856e-06, - "loss": 0.9815, - "step": 3008 - }, - { - "epoch": 0.2713622221220183, - "grad_norm": 1.631456606120374, - "learning_rate": 3.419470758157182e-06, - "loss": 1.0531, - "step": 3009 - }, - { - "epoch": 0.27145240564548856, - "grad_norm": 1.469740837762899, - "learning_rate": 3.4190591396924068e-06, - "loss": 0.9684, - "step": 3010 - }, - { - "epoch": 0.27154258916895885, - "grad_norm": 0.6914285343695409, - "learning_rate": 3.418647400145681e-06, - "loss": 0.8198, - "step": 3011 - }, - { - "epoch": 0.2716327726924291, - "grad_norm": 0.7758600846078862, - "learning_rate": 3.4182355395521367e-06, - "loss": 0.8015, - "step": 3012 - }, - { - "epoch": 0.27172295621589937, - "grad_norm": 1.3590682549616824, - "learning_rate": 3.417823557946916e-06, - "loss": 0.9692, - "step": 3013 - }, - { - "epoch": 0.2718131397393696, - "grad_norm": 1.4316707675565725, - "learning_rate": 3.417411455365172e-06, - "loss": 0.864, - "step": 3014 - }, - { - "epoch": 0.2719033232628399, - "grad_norm": 1.5031247969724395, - "learning_rate": 3.416999231842066e-06, - "loss": 0.873, - "step": 3015 - }, - { - "epoch": 0.2719935067863101, - "grad_norm": 1.6643030413644244, - "learning_rate": 3.416586887412773e-06, - "loss": 1.0214, - "step": 3016 - }, - { - "epoch": 0.2720836903097804, - "grad_norm": 1.4230499443462987, - "learning_rate": 3.416174422112476e-06, - "loss": 0.9325, - "step": 3017 - }, - { - "epoch": 0.27217387383325065, - "grad_norm": 0.901737554470104, - "learning_rate": 3.4157618359763687e-06, - "loss": 0.7673, - "step": 3018 - }, - { - "epoch": 0.27226405735672093, - "grad_norm": 1.350697058397568, - "learning_rate": 3.4153491290396542e-06, - "loss": 0.8945, - "step": 3019 - }, - { - "epoch": 0.27235424088019117, - "grad_norm": 1.598035921158567, - "learning_rate": 3.4149363013375485e-06, - "loss": 0.981, - "step": 3020 - }, - { - "epoch": 0.27244442440366146, - "grad_norm": 0.9326043732247106, - "learning_rate": 3.414523352905276e-06, - "loss": 0.7823, - "step": 3021 - }, - { - "epoch": 0.2725346079271317, - "grad_norm": 1.4428487036915494, - "learning_rate": 3.414110283778071e-06, - "loss": 0.9362, - "step": 3022 - }, - { - "epoch": 0.272624791450602, - "grad_norm": 1.9294098565524558, - "learning_rate": 3.4136970939911797e-06, - "loss": 0.919, - "step": 3023 - }, - { - "epoch": 0.2727149749740722, - "grad_norm": 1.5093414776793508, - "learning_rate": 3.413283783579857e-06, - "loss": 0.9425, - "step": 3024 - }, - { - "epoch": 0.2728051584975425, - "grad_norm": 1.7273812402515472, - "learning_rate": 3.412870352579369e-06, - "loss": 0.9008, - "step": 3025 - }, - { - "epoch": 0.2728953420210128, - "grad_norm": 1.044514514272599, - "learning_rate": 3.4124568010249915e-06, - "loss": 0.8095, - "step": 3026 - }, - { - "epoch": 0.272985525544483, - "grad_norm": 1.842594625250744, - "learning_rate": 3.4120431289520124e-06, - "loss": 0.9567, - "step": 3027 - }, - { - "epoch": 0.2730757090679533, - "grad_norm": 1.5237230864628817, - "learning_rate": 3.4116293363957276e-06, - "loss": 0.986, - "step": 3028 - }, - { - "epoch": 0.27316589259142354, - "grad_norm": 1.883665785802391, - "learning_rate": 3.4112154233914438e-06, - "loss": 0.9798, - "step": 3029 - }, - { - "epoch": 0.27325607611489383, - "grad_norm": 1.704531657423187, - "learning_rate": 3.410801389974479e-06, - "loss": 0.8689, - "step": 3030 - }, - { - "epoch": 0.27334625963836406, - "grad_norm": 1.6010169612026581, - "learning_rate": 3.410387236180161e-06, - "loss": 1.0063, - "step": 3031 - }, - { - "epoch": 0.27343644316183435, - "grad_norm": 1.5145473778543954, - "learning_rate": 3.409972962043826e-06, - "loss": 0.8484, - "step": 3032 - }, - { - "epoch": 0.2735266266853046, - "grad_norm": 1.5561784658881808, - "learning_rate": 3.4095585676008234e-06, - "loss": 1.0223, - "step": 3033 - }, - { - "epoch": 0.2736168102087749, - "grad_norm": 1.394840238303303, - "learning_rate": 3.4091440528865125e-06, - "loss": 0.9683, - "step": 3034 - }, - { - "epoch": 0.2737069937322451, - "grad_norm": 2.463298815208791, - "learning_rate": 3.4087294179362606e-06, - "loss": 0.9682, - "step": 3035 - }, - { - "epoch": 0.2737971772557154, - "grad_norm": 1.5008277993079884, - "learning_rate": 3.4083146627854474e-06, - "loss": 1.0176, - "step": 3036 - }, - { - "epoch": 0.27388736077918563, - "grad_norm": 3.257396497626444, - "learning_rate": 3.4078997874694614e-06, - "loss": 0.8886, - "step": 3037 - }, - { - "epoch": 0.2739775443026559, - "grad_norm": 1.5983541964752797, - "learning_rate": 3.407484792023703e-06, - "loss": 0.8668, - "step": 3038 - }, - { - "epoch": 0.27406772782612615, - "grad_norm": 1.873033764169768, - "learning_rate": 3.407069676483581e-06, - "loss": 0.948, - "step": 3039 - }, - { - "epoch": 0.27415791134959644, - "grad_norm": 1.4747146265728435, - "learning_rate": 3.406654440884516e-06, - "loss": 1.0038, - "step": 3040 - }, - { - "epoch": 0.2742480948730667, - "grad_norm": 1.7039480058187848, - "learning_rate": 3.4062390852619372e-06, - "loss": 0.9969, - "step": 3041 - }, - { - "epoch": 0.27433827839653696, - "grad_norm": 1.9690932322835621, - "learning_rate": 3.4058236096512867e-06, - "loss": 0.9666, - "step": 3042 - }, - { - "epoch": 0.2744284619200072, - "grad_norm": 1.9613915635823813, - "learning_rate": 3.405408014088013e-06, - "loss": 0.8723, - "step": 3043 - }, - { - "epoch": 0.2745186454434775, - "grad_norm": 2.2200305091204897, - "learning_rate": 3.404992298607579e-06, - "loss": 0.8587, - "step": 3044 - }, - { - "epoch": 0.2746088289669477, - "grad_norm": 5.374434507918691, - "learning_rate": 3.4045764632454547e-06, - "loss": 0.9435, - "step": 3045 - }, - { - "epoch": 0.274699012490418, - "grad_norm": 1.3847179323923045, - "learning_rate": 3.4041605080371223e-06, - "loss": 0.9257, - "step": 3046 - }, - { - "epoch": 0.27478919601388824, - "grad_norm": 1.361081597706708, - "learning_rate": 3.4037444330180726e-06, - "loss": 0.9727, - "step": 3047 - }, - { - "epoch": 0.2748793795373585, - "grad_norm": 1.8038246767443233, - "learning_rate": 3.403328238223808e-06, - "loss": 0.9795, - "step": 3048 - }, - { - "epoch": 0.27496956306082876, - "grad_norm": 1.5625892613663863, - "learning_rate": 3.4029119236898395e-06, - "loss": 0.9956, - "step": 3049 - }, - { - "epoch": 0.27505974658429905, - "grad_norm": 1.2018776905587125, - "learning_rate": 3.4024954894516906e-06, - "loss": 0.8571, - "step": 3050 - }, - { - "epoch": 0.27514993010776934, - "grad_norm": 1.5417044179607253, - "learning_rate": 3.4020789355448933e-06, - "loss": 0.9699, - "step": 3051 - }, - { - "epoch": 0.27524011363123957, - "grad_norm": 1.3460694632439776, - "learning_rate": 3.40166226200499e-06, - "loss": 1.0389, - "step": 3052 - }, - { - "epoch": 0.27533029715470986, - "grad_norm": 1.80716037181722, - "learning_rate": 3.401245468867534e-06, - "loss": 0.9506, - "step": 3053 - }, - { - "epoch": 0.2754204806781801, - "grad_norm": 1.159158708082056, - "learning_rate": 3.400828556168088e-06, - "loss": 0.8682, - "step": 3054 - }, - { - "epoch": 0.2755106642016504, - "grad_norm": 1.607135807506298, - "learning_rate": 3.4004115239422255e-06, - "loss": 0.9774, - "step": 3055 - }, - { - "epoch": 0.2756008477251206, - "grad_norm": 1.6640521125290157, - "learning_rate": 3.3999943722255305e-06, - "loss": 0.9528, - "step": 3056 - }, - { - "epoch": 0.2756910312485909, - "grad_norm": 1.5284107120578803, - "learning_rate": 3.3995771010535955e-06, - "loss": 0.8713, - "step": 3057 - }, - { - "epoch": 0.27578121477206113, - "grad_norm": 1.436571152839218, - "learning_rate": 3.3991597104620253e-06, - "loss": 0.7899, - "step": 3058 - }, - { - "epoch": 0.2758713982955314, - "grad_norm": 2.2032823438799274, - "learning_rate": 3.398742200486434e-06, - "loss": 0.9472, - "step": 3059 - }, - { - "epoch": 0.27596158181900166, - "grad_norm": 1.7126216873037787, - "learning_rate": 3.3983245711624453e-06, - "loss": 0.98, - "step": 3060 - }, - { - "epoch": 0.27605176534247194, - "grad_norm": 1.7418779175086854, - "learning_rate": 3.3979068225256946e-06, - "loss": 0.8975, - "step": 3061 - }, - { - "epoch": 0.2761419488659422, - "grad_norm": 2.4237095929102934, - "learning_rate": 3.3974889546118246e-06, - "loss": 0.934, - "step": 3062 - }, - { - "epoch": 0.27623213238941247, - "grad_norm": 1.576761695519652, - "learning_rate": 3.3970709674564918e-06, - "loss": 0.9106, - "step": 3063 - }, - { - "epoch": 0.2763223159128827, - "grad_norm": 1.3942728096975636, - "learning_rate": 3.3966528610953607e-06, - "loss": 0.9596, - "step": 3064 - }, - { - "epoch": 0.276412499436353, - "grad_norm": 1.44271179455871, - "learning_rate": 3.3962346355641067e-06, - "loss": 0.9701, - "step": 3065 - }, - { - "epoch": 0.2765026829598232, - "grad_norm": 1.4021875831122785, - "learning_rate": 3.3958162908984146e-06, - "loss": 1.0034, - "step": 3066 - }, - { - "epoch": 0.2765928664832935, - "grad_norm": 1.3870557348718653, - "learning_rate": 3.39539782713398e-06, - "loss": 0.9559, - "step": 3067 - }, - { - "epoch": 0.27668305000676374, - "grad_norm": 1.820675989622767, - "learning_rate": 3.394979244306509e-06, - "loss": 0.9851, - "step": 3068 - }, - { - "epoch": 0.27677323353023403, - "grad_norm": 1.3770152409388108, - "learning_rate": 3.3945605424517166e-06, - "loss": 0.9771, - "step": 3069 - }, - { - "epoch": 0.27686341705370426, - "grad_norm": 1.5454923056513068, - "learning_rate": 3.3941417216053294e-06, - "loss": 0.9617, - "step": 3070 - }, - { - "epoch": 0.27695360057717455, - "grad_norm": 1.8292156780631277, - "learning_rate": 3.3937227818030835e-06, - "loss": 0.9295, - "step": 3071 - }, - { - "epoch": 0.2770437841006448, - "grad_norm": 1.7195859090115473, - "learning_rate": 3.393303723080725e-06, - "loss": 0.934, - "step": 3072 - }, - { - "epoch": 0.2771339676241151, - "grad_norm": 1.5105183095823016, - "learning_rate": 3.3928845454740097e-06, - "loss": 0.9742, - "step": 3073 - }, - { - "epoch": 0.27722415114758536, - "grad_norm": 1.8673375919302106, - "learning_rate": 3.392465249018705e-06, - "loss": 0.9506, - "step": 3074 - }, - { - "epoch": 0.2773143346710556, - "grad_norm": 1.6292941502923528, - "learning_rate": 3.3920458337505872e-06, - "loss": 0.9517, - "step": 3075 - }, - { - "epoch": 0.2774045181945259, - "grad_norm": 5.0643311002943125, - "learning_rate": 3.391626299705443e-06, - "loss": 0.8047, - "step": 3076 - }, - { - "epoch": 0.2774947017179961, - "grad_norm": 1.422161199250652, - "learning_rate": 3.39120664691907e-06, - "loss": 0.9685, - "step": 3077 - }, - { - "epoch": 0.2775848852414664, - "grad_norm": 1.916836592840173, - "learning_rate": 3.390786875427275e-06, - "loss": 0.942, - "step": 3078 - }, - { - "epoch": 0.27767506876493664, - "grad_norm": 1.431638210237348, - "learning_rate": 3.390366985265875e-06, - "loss": 0.9519, - "step": 3079 - }, - { - "epoch": 0.2777652522884069, - "grad_norm": 1.3664867676206962, - "learning_rate": 3.389946976470697e-06, - "loss": 0.9199, - "step": 3080 - }, - { - "epoch": 0.27785543581187716, - "grad_norm": 1.3452780263916586, - "learning_rate": 3.3895268490775787e-06, - "loss": 1.0516, - "step": 3081 - }, - { - "epoch": 0.27794561933534745, - "grad_norm": 1.5637761054844477, - "learning_rate": 3.3891066031223685e-06, - "loss": 0.926, - "step": 3082 - }, - { - "epoch": 0.2780358028588177, - "grad_norm": 1.5488945464507196, - "learning_rate": 3.3886862386409237e-06, - "loss": 0.9931, - "step": 3083 - }, - { - "epoch": 0.27812598638228797, - "grad_norm": 1.8458900316656692, - "learning_rate": 3.388265755669111e-06, - "loss": 0.9313, - "step": 3084 - }, - { - "epoch": 0.2782161699057582, - "grad_norm": 3.0220447259871417, - "learning_rate": 3.3878451542428093e-06, - "loss": 1.0198, - "step": 3085 - }, - { - "epoch": 0.2783063534292285, - "grad_norm": 1.407823784007111, - "learning_rate": 3.387424434397907e-06, - "loss": 0.9483, - "step": 3086 - }, - { - "epoch": 0.2783965369526987, - "grad_norm": 1.7606500399443745, - "learning_rate": 3.3870035961703013e-06, - "loss": 0.8801, - "step": 3087 - }, - { - "epoch": 0.278486720476169, - "grad_norm": 1.8356691854300442, - "learning_rate": 3.3865826395959014e-06, - "loss": 1.087, - "step": 3088 - }, - { - "epoch": 0.27857690399963925, - "grad_norm": 1.2445492634783681, - "learning_rate": 3.3861615647106253e-06, - "loss": 1.0496, - "step": 3089 - }, - { - "epoch": 0.27866708752310954, - "grad_norm": 1.9402149006692038, - "learning_rate": 3.3857403715504012e-06, - "loss": 0.9733, - "step": 3090 - }, - { - "epoch": 0.27875727104657977, - "grad_norm": 1.582663327386853, - "learning_rate": 3.385319060151167e-06, - "loss": 0.9262, - "step": 3091 - }, - { - "epoch": 0.27884745457005006, - "grad_norm": 1.7675874319937985, - "learning_rate": 3.3848976305488728e-06, - "loss": 0.9522, - "step": 3092 - }, - { - "epoch": 0.2789376380935203, - "grad_norm": 1.7480803587706157, - "learning_rate": 3.384476082779476e-06, - "loss": 0.9957, - "step": 3093 - }, - { - "epoch": 0.2790278216169906, - "grad_norm": 1.8868612231686348, - "learning_rate": 3.3840544168789463e-06, - "loss": 0.9501, - "step": 3094 - }, - { - "epoch": 0.2791180051404608, - "grad_norm": 1.5306547915455488, - "learning_rate": 3.3836326328832617e-06, - "loss": 1.072, - "step": 3095 - }, - { - "epoch": 0.2792081886639311, - "grad_norm": 1.4904434637373576, - "learning_rate": 3.383210730828412e-06, - "loss": 0.9175, - "step": 3096 - }, - { - "epoch": 0.2792983721874014, - "grad_norm": 1.2080619386652625, - "learning_rate": 3.3827887107503953e-06, - "loss": 1.0258, - "step": 3097 - }, - { - "epoch": 0.2793885557108716, - "grad_norm": 1.380451320156041, - "learning_rate": 3.3823665726852216e-06, - "loss": 0.9078, - "step": 3098 - }, - { - "epoch": 0.2794787392343419, - "grad_norm": 1.216776252192187, - "learning_rate": 3.3819443166689095e-06, - "loss": 1.0777, - "step": 3099 - }, - { - "epoch": 0.27956892275781214, - "grad_norm": 1.3832642846320478, - "learning_rate": 3.3815219427374886e-06, - "loss": 0.9496, - "step": 3100 - }, - { - "epoch": 0.27965910628128243, - "grad_norm": 1.347930791781831, - "learning_rate": 3.3810994509269975e-06, - "loss": 0.9727, - "step": 3101 - }, - { - "epoch": 0.27974928980475267, - "grad_norm": 1.4106974738707585, - "learning_rate": 3.3806768412734864e-06, - "loss": 0.9921, - "step": 3102 - }, - { - "epoch": 0.27983947332822295, - "grad_norm": 1.5942708080764896, - "learning_rate": 3.380254113813014e-06, - "loss": 1.0474, - "step": 3103 - }, - { - "epoch": 0.2799296568516932, - "grad_norm": 1.5227349642141388, - "learning_rate": 3.3798312685816496e-06, - "loss": 0.9998, - "step": 3104 - }, - { - "epoch": 0.2800198403751635, - "grad_norm": 1.4245348355180263, - "learning_rate": 3.3794083056154738e-06, - "loss": 0.9623, - "step": 3105 - }, - { - "epoch": 0.2801100238986337, - "grad_norm": 1.430056248205509, - "learning_rate": 3.3789852249505746e-06, - "loss": 0.97, - "step": 3106 - }, - { - "epoch": 0.280200207422104, - "grad_norm": 1.553899440683335, - "learning_rate": 3.378562026623053e-06, - "loss": 1.0253, - "step": 3107 - }, - { - "epoch": 0.28029039094557423, - "grad_norm": 1.8699712711310157, - "learning_rate": 3.3781387106690175e-06, - "loss": 1.033, - "step": 3108 - }, - { - "epoch": 0.2803805744690445, - "grad_norm": 1.2801262183841509, - "learning_rate": 3.3777152771245885e-06, - "loss": 0.9883, - "step": 3109 - }, - { - "epoch": 0.28047075799251475, - "grad_norm": 1.9033894572804657, - "learning_rate": 3.377291726025895e-06, - "loss": 0.9986, - "step": 3110 - }, - { - "epoch": 0.28056094151598504, - "grad_norm": 1.824841999373538, - "learning_rate": 3.3768680574090782e-06, - "loss": 1.0721, - "step": 3111 - }, - { - "epoch": 0.2806511250394553, - "grad_norm": 2.1340396690292516, - "learning_rate": 3.3764442713102857e-06, - "loss": 0.9375, - "step": 3112 - }, - { - "epoch": 0.28074130856292556, - "grad_norm": 1.3241586587150393, - "learning_rate": 3.3760203677656786e-06, - "loss": 0.9658, - "step": 3113 - }, - { - "epoch": 0.2808314920863958, - "grad_norm": 1.5592068109419353, - "learning_rate": 3.3755963468114262e-06, - "loss": 1.0179, - "step": 3114 - }, - { - "epoch": 0.2809216756098661, - "grad_norm": 1.3964213323701675, - "learning_rate": 3.3751722084837095e-06, - "loss": 0.8708, - "step": 3115 - }, - { - "epoch": 0.2810118591333363, - "grad_norm": 1.389445415364975, - "learning_rate": 3.3747479528187166e-06, - "loss": 0.9442, - "step": 3116 - }, - { - "epoch": 0.2811020426568066, - "grad_norm": 1.4596070948452642, - "learning_rate": 3.3743235798526485e-06, - "loss": 1.0096, - "step": 3117 - }, - { - "epoch": 0.28119222618027684, - "grad_norm": 1.8025690500945828, - "learning_rate": 3.373899089621714e-06, - "loss": 0.901, - "step": 3118 - }, - { - "epoch": 0.2812824097037471, - "grad_norm": 1.4882527346144547, - "learning_rate": 3.373474482162134e-06, - "loss": 0.9449, - "step": 3119 - }, - { - "epoch": 0.28137259322721736, - "grad_norm": 1.6594787651420544, - "learning_rate": 3.3730497575101376e-06, - "loss": 0.9665, - "step": 3120 - }, - { - "epoch": 0.28146277675068765, - "grad_norm": 1.6575266398364699, - "learning_rate": 3.3726249157019654e-06, - "loss": 0.9579, - "step": 3121 - }, - { - "epoch": 0.28155296027415794, - "grad_norm": 1.6502084484789783, - "learning_rate": 3.372199956773866e-06, - "loss": 1.0421, - "step": 3122 - }, - { - "epoch": 0.28164314379762817, - "grad_norm": 1.4637074857380288, - "learning_rate": 3.371774880762101e-06, - "loss": 0.9179, - "step": 3123 - }, - { - "epoch": 0.28173332732109846, - "grad_norm": 1.6166321911363937, - "learning_rate": 3.3713496877029392e-06, - "loss": 0.9445, - "step": 3124 - }, - { - "epoch": 0.2818235108445687, - "grad_norm": 1.7676267501296896, - "learning_rate": 3.37092437763266e-06, - "loss": 1.0259, - "step": 3125 - }, - { - "epoch": 0.281913694368039, - "grad_norm": 1.6395436674342365, - "learning_rate": 3.3704989505875537e-06, - "loss": 0.9935, - "step": 3126 - }, - { - "epoch": 0.2820038778915092, - "grad_norm": 1.5726700730528789, - "learning_rate": 3.3700734066039205e-06, - "loss": 0.9343, - "step": 3127 - }, - { - "epoch": 0.2820940614149795, - "grad_norm": 1.6421892973711247, - "learning_rate": 3.36964774571807e-06, - "loss": 1.0007, - "step": 3128 - }, - { - "epoch": 0.28218424493844974, - "grad_norm": 1.8289500274046664, - "learning_rate": 3.3692219679663206e-06, - "loss": 0.9582, - "step": 3129 - }, - { - "epoch": 0.28227442846192, - "grad_norm": 1.9455661838071021, - "learning_rate": 3.3687960733850043e-06, - "loss": 0.9857, - "step": 3130 - }, - { - "epoch": 0.28236461198539026, - "grad_norm": 1.4310290603352707, - "learning_rate": 3.3683700620104586e-06, - "loss": 0.9093, - "step": 3131 - }, - { - "epoch": 0.28245479550886055, - "grad_norm": 1.4516590886989431, - "learning_rate": 3.3679439338790347e-06, - "loss": 0.9854, - "step": 3132 - }, - { - "epoch": 0.2825449790323308, - "grad_norm": 1.426558024304358, - "learning_rate": 3.3675176890270916e-06, - "loss": 1.0899, - "step": 3133 - }, - { - "epoch": 0.28263516255580107, - "grad_norm": 1.4767778834934158, - "learning_rate": 3.367091327490998e-06, - "loss": 0.9881, - "step": 3134 - }, - { - "epoch": 0.2827253460792713, - "grad_norm": 1.627493119584173, - "learning_rate": 3.3666648493071347e-06, - "loss": 0.9468, - "step": 3135 - }, - { - "epoch": 0.2828155296027416, - "grad_norm": 1.7114027949597574, - "learning_rate": 3.3662382545118914e-06, - "loss": 0.9511, - "step": 3136 - }, - { - "epoch": 0.2829057131262118, - "grad_norm": 1.5208307118153732, - "learning_rate": 3.3658115431416663e-06, - "loss": 1.0489, - "step": 3137 - }, - { - "epoch": 0.2829958966496821, - "grad_norm": 0.7930655960550481, - "learning_rate": 3.36538471523287e-06, - "loss": 0.8446, - "step": 3138 - }, - { - "epoch": 0.28308608017315234, - "grad_norm": 1.3726568197076578, - "learning_rate": 3.3649577708219204e-06, - "loss": 1.0278, - "step": 3139 - }, - { - "epoch": 0.28317626369662263, - "grad_norm": 1.736828417625215, - "learning_rate": 3.3645307099452477e-06, - "loss": 0.911, - "step": 3140 - }, - { - "epoch": 0.28326644722009287, - "grad_norm": 1.5275966893388224, - "learning_rate": 3.3641035326392907e-06, - "loss": 0.9733, - "step": 3141 - }, - { - "epoch": 0.28335663074356315, - "grad_norm": 1.644690426623281, - "learning_rate": 3.363676238940499e-06, - "loss": 0.9743, - "step": 3142 - }, - { - "epoch": 0.2834468142670334, - "grad_norm": 1.5235349183846203, - "learning_rate": 3.363248828885331e-06, - "loss": 0.9948, - "step": 3143 - }, - { - "epoch": 0.2835369977905037, - "grad_norm": 1.669619455894798, - "learning_rate": 3.3628213025102562e-06, - "loss": 0.9908, - "step": 3144 - }, - { - "epoch": 0.28362718131397396, - "grad_norm": 1.4237996583874155, - "learning_rate": 3.3623936598517536e-06, - "loss": 1.0054, - "step": 3145 - }, - { - "epoch": 0.2837173648374442, - "grad_norm": 1.5721010868388807, - "learning_rate": 3.3619659009463117e-06, - "loss": 0.8153, - "step": 3146 - }, - { - "epoch": 0.2838075483609145, - "grad_norm": 1.3514219442500006, - "learning_rate": 3.3615380258304287e-06, - "loss": 1.0201, - "step": 3147 - }, - { - "epoch": 0.2838977318843847, - "grad_norm": 1.4782359817487924, - "learning_rate": 3.3611100345406146e-06, - "loss": 0.9561, - "step": 3148 - }, - { - "epoch": 0.283987915407855, - "grad_norm": 1.414320611316776, - "learning_rate": 3.3606819271133873e-06, - "loss": 0.9538, - "step": 3149 - }, - { - "epoch": 0.28407809893132524, - "grad_norm": 0.8669954035614031, - "learning_rate": 3.360253703585275e-06, - "loss": 0.8271, - "step": 3150 - }, - { - "epoch": 0.28416828245479553, - "grad_norm": 1.4605434926787861, - "learning_rate": 3.3598253639928164e-06, - "loss": 0.9124, - "step": 3151 - }, - { - "epoch": 0.28425846597826576, - "grad_norm": 1.7514786604077623, - "learning_rate": 3.3593969083725596e-06, - "loss": 0.927, - "step": 3152 - }, - { - "epoch": 0.28434864950173605, - "grad_norm": 1.5296188541708795, - "learning_rate": 3.358968336761063e-06, - "loss": 0.9281, - "step": 3153 - }, - { - "epoch": 0.2844388330252063, - "grad_norm": 1.3738731188181181, - "learning_rate": 3.3585396491948945e-06, - "loss": 0.9317, - "step": 3154 - }, - { - "epoch": 0.28452901654867657, - "grad_norm": 1.2864640836805972, - "learning_rate": 3.358110845710633e-06, - "loss": 1.0195, - "step": 3155 - }, - { - "epoch": 0.2846192000721468, - "grad_norm": 1.6717511425308196, - "learning_rate": 3.357681926344865e-06, - "loss": 0.953, - "step": 3156 - }, - { - "epoch": 0.2847093835956171, - "grad_norm": 1.4144521776989976, - "learning_rate": 3.357252891134189e-06, - "loss": 0.9602, - "step": 3157 - }, - { - "epoch": 0.2847995671190873, - "grad_norm": 1.4994877894388703, - "learning_rate": 3.356823740115212e-06, - "loss": 1.0054, - "step": 3158 - }, - { - "epoch": 0.2848897506425576, - "grad_norm": 1.1850814785690784, - "learning_rate": 3.3563944733245525e-06, - "loss": 0.9115, - "step": 3159 - }, - { - "epoch": 0.28497993416602785, - "grad_norm": 1.6830204742192638, - "learning_rate": 3.3559650907988375e-06, - "loss": 0.9589, - "step": 3160 - }, - { - "epoch": 0.28507011768949814, - "grad_norm": 1.263734559475141, - "learning_rate": 3.3555355925747045e-06, - "loss": 0.8203, - "step": 3161 - }, - { - "epoch": 0.28516030121296837, - "grad_norm": 1.5086289799279242, - "learning_rate": 3.3551059786888e-06, - "loss": 0.9479, - "step": 3162 - }, - { - "epoch": 0.28525048473643866, - "grad_norm": 1.4120966276276727, - "learning_rate": 3.3546762491777807e-06, - "loss": 1.0148, - "step": 3163 - }, - { - "epoch": 0.2853406682599089, - "grad_norm": 1.473191222303447, - "learning_rate": 3.3542464040783156e-06, - "loss": 0.9577, - "step": 3164 - }, - { - "epoch": 0.2854308517833792, - "grad_norm": 1.9840908297620474, - "learning_rate": 3.353816443427079e-06, - "loss": 0.982, - "step": 3165 - }, - { - "epoch": 0.2855210353068494, - "grad_norm": 1.7584598959741766, - "learning_rate": 3.3533863672607597e-06, - "loss": 0.9483, - "step": 3166 - }, - { - "epoch": 0.2856112188303197, - "grad_norm": 1.3893074909511003, - "learning_rate": 3.352956175616052e-06, - "loss": 0.9922, - "step": 3167 - }, - { - "epoch": 0.28570140235378993, - "grad_norm": 1.6467345060178755, - "learning_rate": 3.352525868529664e-06, - "loss": 0.9357, - "step": 3168 - }, - { - "epoch": 0.2857915858772602, - "grad_norm": 1.4305319871576196, - "learning_rate": 3.3520954460383103e-06, - "loss": 0.9012, - "step": 3169 - }, - { - "epoch": 0.2858817694007305, - "grad_norm": 1.6245884741034633, - "learning_rate": 3.3516649081787182e-06, - "loss": 0.9068, - "step": 3170 - }, - { - "epoch": 0.28597195292420075, - "grad_norm": 2.2906959095969373, - "learning_rate": 3.3512342549876236e-06, - "loss": 1.0569, - "step": 3171 - }, - { - "epoch": 0.28606213644767103, - "grad_norm": 1.3355485227161294, - "learning_rate": 3.350803486501771e-06, - "loss": 0.9225, - "step": 3172 - }, - { - "epoch": 0.28615231997114127, - "grad_norm": 1.2888085779887277, - "learning_rate": 3.3503726027579175e-06, - "loss": 0.9868, - "step": 3173 - }, - { - "epoch": 0.28624250349461156, - "grad_norm": 1.3722232611176493, - "learning_rate": 3.349941603792827e-06, - "loss": 1.0236, - "step": 3174 - }, - { - "epoch": 0.2863326870180818, - "grad_norm": 1.4611590212391363, - "learning_rate": 3.3495104896432755e-06, - "loss": 0.9539, - "step": 3175 - }, - { - "epoch": 0.2864228705415521, - "grad_norm": 1.5206997259666932, - "learning_rate": 3.3490792603460477e-06, - "loss": 0.9297, - "step": 3176 - }, - { - "epoch": 0.2865130540650223, - "grad_norm": 1.5850599144041768, - "learning_rate": 3.3486479159379393e-06, - "loss": 0.9008, - "step": 3177 - }, - { - "epoch": 0.2866032375884926, - "grad_norm": 0.8302756399889556, - "learning_rate": 3.3482164564557537e-06, - "loss": 0.8106, - "step": 3178 - }, - { - "epoch": 0.28669342111196283, - "grad_norm": 1.5458410641596017, - "learning_rate": 3.3477848819363065e-06, - "loss": 1.0177, - "step": 3179 - }, - { - "epoch": 0.2867836046354331, - "grad_norm": 0.8262737382719249, - "learning_rate": 3.3473531924164213e-06, - "loss": 0.8416, - "step": 3180 - }, - { - "epoch": 0.28687378815890335, - "grad_norm": 2.0176723048338654, - "learning_rate": 3.3469213879329325e-06, - "loss": 0.9156, - "step": 3181 - }, - { - "epoch": 0.28696397168237364, - "grad_norm": 1.6095509849547933, - "learning_rate": 3.3464894685226837e-06, - "loss": 0.9299, - "step": 3182 - }, - { - "epoch": 0.2870541552058439, - "grad_norm": 1.3423371095063494, - "learning_rate": 3.34605743422253e-06, - "loss": 1.0347, - "step": 3183 - }, - { - "epoch": 0.28714433872931416, - "grad_norm": 1.5470748155795861, - "learning_rate": 3.345625285069333e-06, - "loss": 0.9337, - "step": 3184 - }, - { - "epoch": 0.2872345222527844, - "grad_norm": 1.543054119597388, - "learning_rate": 3.345193021099967e-06, - "loss": 0.9485, - "step": 3185 - }, - { - "epoch": 0.2873247057762547, - "grad_norm": 1.4562617256445596, - "learning_rate": 3.3447606423513157e-06, - "loss": 0.931, - "step": 3186 - }, - { - "epoch": 0.2874148892997249, - "grad_norm": 1.5029437267101349, - "learning_rate": 3.344328148860271e-06, - "loss": 0.9323, - "step": 3187 - }, - { - "epoch": 0.2875050728231952, - "grad_norm": 1.2985334248462033, - "learning_rate": 3.3438955406637365e-06, - "loss": 0.9753, - "step": 3188 - }, - { - "epoch": 0.28759525634666544, - "grad_norm": 1.3651627288486483, - "learning_rate": 3.343462817798624e-06, - "loss": 0.9828, - "step": 3189 - }, - { - "epoch": 0.28768543987013573, - "grad_norm": 1.6911387595438088, - "learning_rate": 3.343029980301856e-06, - "loss": 0.9272, - "step": 3190 - }, - { - "epoch": 0.28777562339360596, - "grad_norm": 2.06115250930043, - "learning_rate": 3.342597028210365e-06, - "loss": 0.9849, - "step": 3191 - }, - { - "epoch": 0.28786580691707625, - "grad_norm": 1.415595977687807, - "learning_rate": 3.342163961561092e-06, - "loss": 0.9038, - "step": 3192 - }, - { - "epoch": 0.28795599044054654, - "grad_norm": 1.316701657800043, - "learning_rate": 3.34173078039099e-06, - "loss": 0.9352, - "step": 3193 - }, - { - "epoch": 0.28804617396401677, - "grad_norm": 1.4824821047804464, - "learning_rate": 3.3412974847370193e-06, - "loss": 1.1333, - "step": 3194 - }, - { - "epoch": 0.28813635748748706, - "grad_norm": 1.322016231846964, - "learning_rate": 3.3408640746361514e-06, - "loss": 0.9852, - "step": 3195 - }, - { - "epoch": 0.2882265410109573, - "grad_norm": 0.7894133363839618, - "learning_rate": 3.3404305501253663e-06, - "loss": 0.8147, - "step": 3196 - }, - { - "epoch": 0.2883167245344276, - "grad_norm": 1.4708818425868493, - "learning_rate": 3.3399969112416565e-06, - "loss": 1.0385, - "step": 3197 - }, - { - "epoch": 0.2884069080578978, - "grad_norm": 1.540903529742541, - "learning_rate": 3.3395631580220213e-06, - "loss": 0.8986, - "step": 3198 - }, - { - "epoch": 0.2884970915813681, - "grad_norm": 1.4337468286427657, - "learning_rate": 3.3391292905034714e-06, - "loss": 0.9424, - "step": 3199 - }, - { - "epoch": 0.28858727510483834, - "grad_norm": 1.961301063179199, - "learning_rate": 3.338695308723027e-06, - "loss": 1.032, - "step": 3200 - }, - { - "epoch": 0.2886774586283086, - "grad_norm": 1.4582123660825084, - "learning_rate": 3.338261212717716e-06, - "loss": 1.0179, - "step": 3201 - }, - { - "epoch": 0.28876764215177886, - "grad_norm": 1.3918939865454891, - "learning_rate": 3.33782700252458e-06, - "loss": 0.993, - "step": 3202 - }, - { - "epoch": 0.28885782567524915, - "grad_norm": 1.9426797723247347, - "learning_rate": 3.337392678180668e-06, - "loss": 1.002, - "step": 3203 - }, - { - "epoch": 0.2889480091987194, - "grad_norm": 1.574459994551967, - "learning_rate": 3.3369582397230377e-06, - "loss": 0.9657, - "step": 3204 - }, - { - "epoch": 0.28903819272218967, - "grad_norm": 1.5838237638690542, - "learning_rate": 3.336523687188759e-06, - "loss": 1.0097, - "step": 3205 - }, - { - "epoch": 0.2891283762456599, - "grad_norm": 0.7321222938746804, - "learning_rate": 3.336089020614909e-06, - "loss": 0.8252, - "step": 3206 - }, - { - "epoch": 0.2892185597691302, - "grad_norm": 1.4729910148766103, - "learning_rate": 3.3356542400385774e-06, - "loss": 0.846, - "step": 3207 - }, - { - "epoch": 0.2893087432926004, - "grad_norm": 1.2600570932956026, - "learning_rate": 3.3352193454968607e-06, - "loss": 0.8987, - "step": 3208 - }, - { - "epoch": 0.2893989268160707, - "grad_norm": 1.3860836240166134, - "learning_rate": 3.3347843370268675e-06, - "loss": 1.027, - "step": 3209 - }, - { - "epoch": 0.28948911033954094, - "grad_norm": 1.3440190422945708, - "learning_rate": 3.334349214665715e-06, - "loss": 0.985, - "step": 3210 - }, - { - "epoch": 0.28957929386301123, - "grad_norm": 1.3938938254829025, - "learning_rate": 3.3339139784505293e-06, - "loss": 1.0503, - "step": 3211 - }, - { - "epoch": 0.28966947738648147, - "grad_norm": 1.2175929617199637, - "learning_rate": 3.333478628418448e-06, - "loss": 1.0363, - "step": 3212 - }, - { - "epoch": 0.28975966090995176, - "grad_norm": 1.2007474816363626, - "learning_rate": 3.333043164606618e-06, - "loss": 0.8499, - "step": 3213 - }, - { - "epoch": 0.289849844433422, - "grad_norm": 1.3995414936746569, - "learning_rate": 3.3326075870521948e-06, - "loss": 1.0468, - "step": 3214 - }, - { - "epoch": 0.2899400279568923, - "grad_norm": 1.778773811915472, - "learning_rate": 3.3321718957923437e-06, - "loss": 0.9731, - "step": 3215 - }, - { - "epoch": 0.29003021148036257, - "grad_norm": 1.3747139980071612, - "learning_rate": 3.3317360908642413e-06, - "loss": 0.963, - "step": 3216 - }, - { - "epoch": 0.2901203950038328, - "grad_norm": 1.6995028554247633, - "learning_rate": 3.331300172305072e-06, - "loss": 0.9148, - "step": 3217 - }, - { - "epoch": 0.2902105785273031, - "grad_norm": 1.494914972445359, - "learning_rate": 3.330864140152032e-06, - "loss": 0.9565, - "step": 3218 - }, - { - "epoch": 0.2903007620507733, - "grad_norm": 1.3603902949551478, - "learning_rate": 3.330427994442325e-06, - "loss": 0.9765, - "step": 3219 - }, - { - "epoch": 0.2903909455742436, - "grad_norm": 1.37109375, - "learning_rate": 3.3299917352131657e-06, - "loss": 1.0027, - "step": 3220 - }, - { - "epoch": 0.29048112909771384, - "grad_norm": 2.242953817489914, - "learning_rate": 3.329555362501778e-06, - "loss": 0.9329, - "step": 3221 - }, - { - "epoch": 0.29057131262118413, - "grad_norm": 1.4175130989505569, - "learning_rate": 3.3291188763453954e-06, - "loss": 0.929, - "step": 3222 - }, - { - "epoch": 0.29066149614465436, - "grad_norm": 1.6056689209534512, - "learning_rate": 3.3286822767812618e-06, - "loss": 0.9519, - "step": 3223 - }, - { - "epoch": 0.29075167966812465, - "grad_norm": 2.3142218494349454, - "learning_rate": 3.32824556384663e-06, - "loss": 0.95, - "step": 3224 - }, - { - "epoch": 0.2908418631915949, - "grad_norm": 1.3610591759580457, - "learning_rate": 3.3278087375787628e-06, - "loss": 0.9719, - "step": 3225 - }, - { - "epoch": 0.2909320467150652, - "grad_norm": 1.6328847768748291, - "learning_rate": 3.327371798014933e-06, - "loss": 0.9774, - "step": 3226 - }, - { - "epoch": 0.2910222302385354, - "grad_norm": 1.6410301071650302, - "learning_rate": 3.3269347451924218e-06, - "loss": 0.9693, - "step": 3227 - }, - { - "epoch": 0.2911124137620057, - "grad_norm": 1.4456262196470955, - "learning_rate": 3.326497579148522e-06, - "loss": 1.0271, - "step": 3228 - }, - { - "epoch": 0.29120259728547593, - "grad_norm": 1.557473910926065, - "learning_rate": 3.3260602999205345e-06, - "loss": 0.97, - "step": 3229 - }, - { - "epoch": 0.2912927808089462, - "grad_norm": 1.4249043315428873, - "learning_rate": 3.32562290754577e-06, - "loss": 0.9797, - "step": 3230 - }, - { - "epoch": 0.29138296433241645, - "grad_norm": 1.3659522443580823, - "learning_rate": 3.3251854020615494e-06, - "loss": 0.9388, - "step": 3231 - }, - { - "epoch": 0.29147314785588674, - "grad_norm": 1.5214959708519926, - "learning_rate": 3.324747783505204e-06, - "loss": 0.9715, - "step": 3232 - }, - { - "epoch": 0.29156333137935697, - "grad_norm": 1.4792439973344162, - "learning_rate": 3.324310051914073e-06, - "loss": 0.8813, - "step": 3233 - }, - { - "epoch": 0.29165351490282726, - "grad_norm": 1.6860883247571719, - "learning_rate": 3.3238722073255056e-06, - "loss": 0.9338, - "step": 3234 - }, - { - "epoch": 0.2917436984262975, - "grad_norm": 1.7480914744223193, - "learning_rate": 3.323434249776863e-06, - "loss": 0.9306, - "step": 3235 - }, - { - "epoch": 0.2918338819497678, - "grad_norm": 0.9409854945849522, - "learning_rate": 3.3229961793055117e-06, - "loss": 0.8253, - "step": 3236 - }, - { - "epoch": 0.291924065473238, - "grad_norm": 1.293153179188009, - "learning_rate": 3.3225579959488314e-06, - "loss": 0.8985, - "step": 3237 - }, - { - "epoch": 0.2920142489967083, - "grad_norm": 1.4206725684952883, - "learning_rate": 3.322119699744211e-06, - "loss": 0.9859, - "step": 3238 - }, - { - "epoch": 0.29210443252017854, - "grad_norm": 1.731325234661665, - "learning_rate": 3.3216812907290476e-06, - "loss": 0.9812, - "step": 3239 - }, - { - "epoch": 0.2921946160436488, - "grad_norm": 1.3059859068170019, - "learning_rate": 3.3212427689407484e-06, - "loss": 0.9855, - "step": 3240 - }, - { - "epoch": 0.2922847995671191, - "grad_norm": 0.7258728610099703, - "learning_rate": 3.3208041344167317e-06, - "loss": 0.778, - "step": 3241 - }, - { - "epoch": 0.29237498309058935, - "grad_norm": 1.8034183944459548, - "learning_rate": 3.3203653871944224e-06, - "loss": 0.8781, - "step": 3242 - }, - { - "epoch": 0.29246516661405964, - "grad_norm": 1.4786254358267603, - "learning_rate": 3.3199265273112587e-06, - "loss": 0.9332, - "step": 3243 - }, - { - "epoch": 0.29255535013752987, - "grad_norm": 1.575475905437237, - "learning_rate": 3.3194875548046852e-06, - "loss": 0.9053, - "step": 3244 - }, - { - "epoch": 0.29264553366100016, - "grad_norm": 1.4605978504639905, - "learning_rate": 3.319048469712158e-06, - "loss": 0.9438, - "step": 3245 - }, - { - "epoch": 0.2927357171844704, - "grad_norm": 1.6207698636347787, - "learning_rate": 3.3186092720711423e-06, - "loss": 1.0627, - "step": 3246 - }, - { - "epoch": 0.2928259007079407, - "grad_norm": 1.5577388703939379, - "learning_rate": 3.3181699619191125e-06, - "loss": 0.8907, - "step": 3247 - }, - { - "epoch": 0.2929160842314109, - "grad_norm": 1.5016371217221098, - "learning_rate": 3.3177305392935536e-06, - "loss": 1.0028, - "step": 3248 - }, - { - "epoch": 0.2930062677548812, - "grad_norm": 0.8989341358333247, - "learning_rate": 3.3172910042319595e-06, - "loss": 0.8635, - "step": 3249 - }, - { - "epoch": 0.29309645127835143, - "grad_norm": 1.3448067878722034, - "learning_rate": 3.316851356771833e-06, - "loss": 1.0046, - "step": 3250 - }, - { - "epoch": 0.2931866348018217, - "grad_norm": 1.285004487976074, - "learning_rate": 3.3164115969506876e-06, - "loss": 0.9933, - "step": 3251 - }, - { - "epoch": 0.29327681832529195, - "grad_norm": 2.872426747316835, - "learning_rate": 3.315971724806046e-06, - "loss": 1.052, - "step": 3252 - }, - { - "epoch": 0.29336700184876224, - "grad_norm": 1.6591201960162807, - "learning_rate": 3.315531740375441e-06, - "loss": 0.9059, - "step": 3253 - }, - { - "epoch": 0.2934571853722325, - "grad_norm": 1.471882260055617, - "learning_rate": 3.315091643696414e-06, - "loss": 0.9271, - "step": 3254 - }, - { - "epoch": 0.29354736889570276, - "grad_norm": 1.6877867137208342, - "learning_rate": 3.3146514348065164e-06, - "loss": 0.8934, - "step": 3255 - }, - { - "epoch": 0.293637552419173, - "grad_norm": 2.0444555762006327, - "learning_rate": 3.31421111374331e-06, - "loss": 0.9729, - "step": 3256 - }, - { - "epoch": 0.2937277359426433, - "grad_norm": 1.3540593275908568, - "learning_rate": 3.3137706805443647e-06, - "loss": 0.9806, - "step": 3257 - }, - { - "epoch": 0.2938179194661135, - "grad_norm": 1.470766104151414, - "learning_rate": 3.313330135247261e-06, - "loss": 1.0154, - "step": 3258 - }, - { - "epoch": 0.2939081029895838, - "grad_norm": 1.254655133087612, - "learning_rate": 3.312889477889588e-06, - "loss": 0.9332, - "step": 3259 - }, - { - "epoch": 0.29399828651305404, - "grad_norm": 1.398270623676854, - "learning_rate": 3.3124487085089464e-06, - "loss": 0.9743, - "step": 3260 - }, - { - "epoch": 0.29408847003652433, - "grad_norm": 1.7148337396490285, - "learning_rate": 3.312007827142943e-06, - "loss": 1.0001, - "step": 3261 - }, - { - "epoch": 0.29417865355999456, - "grad_norm": 1.268857102031218, - "learning_rate": 3.3115668338291983e-06, - "loss": 0.8842, - "step": 3262 - }, - { - "epoch": 0.29426883708346485, - "grad_norm": 1.4194277148786065, - "learning_rate": 3.3111257286053394e-06, - "loss": 1.0403, - "step": 3263 - }, - { - "epoch": 0.29435902060693514, - "grad_norm": 1.4208227602806733, - "learning_rate": 3.3106845115090043e-06, - "loss": 0.9702, - "step": 3264 - }, - { - "epoch": 0.2944492041304054, - "grad_norm": 1.40797306038541, - "learning_rate": 3.310243182577839e-06, - "loss": 0.9167, - "step": 3265 - }, - { - "epoch": 0.29453938765387566, - "grad_norm": 1.3826502473114382, - "learning_rate": 3.3098017418495007e-06, - "loss": 0.8628, - "step": 3266 - }, - { - "epoch": 0.2946295711773459, - "grad_norm": 1.4227438986109011, - "learning_rate": 3.309360189361656e-06, - "loss": 0.9272, - "step": 3267 - }, - { - "epoch": 0.2947197547008162, - "grad_norm": 1.3955392907385067, - "learning_rate": 3.3089185251519797e-06, - "loss": 1.0439, - "step": 3268 - }, - { - "epoch": 0.2948099382242864, - "grad_norm": 1.4522654134175719, - "learning_rate": 3.3084767492581574e-06, - "loss": 0.9463, - "step": 3269 - }, - { - "epoch": 0.2949001217477567, - "grad_norm": 1.6740805792744657, - "learning_rate": 3.3080348617178846e-06, - "loss": 0.98, - "step": 3270 - }, - { - "epoch": 0.29499030527122694, - "grad_norm": 1.3357367559936955, - "learning_rate": 3.307592862568865e-06, - "loss": 0.9324, - "step": 3271 - }, - { - "epoch": 0.2950804887946972, - "grad_norm": 1.2922781860560248, - "learning_rate": 3.307150751848812e-06, - "loss": 1.0246, - "step": 3272 - }, - { - "epoch": 0.29517067231816746, - "grad_norm": 1.3002828382039415, - "learning_rate": 3.3067085295954497e-06, - "loss": 0.8679, - "step": 3273 - }, - { - "epoch": 0.29526085584163775, - "grad_norm": 2.5153235971663594, - "learning_rate": 3.3062661958465098e-06, - "loss": 0.8374, - "step": 3274 - }, - { - "epoch": 0.295351039365108, - "grad_norm": 1.553939486033569, - "learning_rate": 3.305823750639736e-06, - "loss": 0.8594, - "step": 3275 - }, - { - "epoch": 0.29544122288857827, - "grad_norm": 1.3874908876549426, - "learning_rate": 3.3053811940128795e-06, - "loss": 0.9595, - "step": 3276 - }, - { - "epoch": 0.2955314064120485, - "grad_norm": 1.4530223687137593, - "learning_rate": 3.3049385260037016e-06, - "loss": 0.9463, - "step": 3277 - }, - { - "epoch": 0.2956215899355188, - "grad_norm": 1.3558689373166029, - "learning_rate": 3.3044957466499736e-06, - "loss": 0.8571, - "step": 3278 - }, - { - "epoch": 0.295711773458989, - "grad_norm": 1.3888151843329715, - "learning_rate": 3.304052855989475e-06, - "loss": 0.8835, - "step": 3279 - }, - { - "epoch": 0.2958019569824593, - "grad_norm": 1.299786069580458, - "learning_rate": 3.3036098540599966e-06, - "loss": 1.0178, - "step": 3280 - }, - { - "epoch": 0.29589214050592955, - "grad_norm": 3.756381263914798, - "learning_rate": 3.3031667408993373e-06, - "loss": 0.9291, - "step": 3281 - }, - { - "epoch": 0.29598232402939983, - "grad_norm": 1.5666780687823563, - "learning_rate": 3.302723516545306e-06, - "loss": 0.9616, - "step": 3282 - }, - { - "epoch": 0.29607250755287007, - "grad_norm": 1.4429514802850396, - "learning_rate": 3.302280181035722e-06, - "loss": 0.9883, - "step": 3283 - }, - { - "epoch": 0.29616269107634036, - "grad_norm": 0.8228463050281357, - "learning_rate": 3.3018367344084117e-06, - "loss": 0.8367, - "step": 3284 - }, - { - "epoch": 0.2962528745998106, - "grad_norm": 2.091053265097952, - "learning_rate": 3.3013931767012125e-06, - "loss": 0.9515, - "step": 3285 - }, - { - "epoch": 0.2963430581232809, - "grad_norm": 1.4049463375201703, - "learning_rate": 3.300949507951972e-06, - "loss": 0.9591, - "step": 3286 - }, - { - "epoch": 0.2964332416467511, - "grad_norm": 1.996777441659379, - "learning_rate": 3.300505728198546e-06, - "loss": 1.0114, - "step": 3287 - }, - { - "epoch": 0.2965234251702214, - "grad_norm": 1.519561139673294, - "learning_rate": 3.3000618374788e-06, - "loss": 0.9437, - "step": 3288 - }, - { - "epoch": 0.2966136086936917, - "grad_norm": 1.6046167790801915, - "learning_rate": 3.2996178358306104e-06, - "loss": 0.9999, - "step": 3289 - }, - { - "epoch": 0.2967037922171619, - "grad_norm": 1.4035304581802972, - "learning_rate": 3.2991737232918606e-06, - "loss": 0.9307, - "step": 3290 - }, - { - "epoch": 0.2967939757406322, - "grad_norm": 1.5619745515418317, - "learning_rate": 3.298729499900445e-06, - "loss": 0.9085, - "step": 3291 - }, - { - "epoch": 0.29688415926410244, - "grad_norm": 1.6996445396396003, - "learning_rate": 3.2982851656942677e-06, - "loss": 0.9074, - "step": 3292 - }, - { - "epoch": 0.29697434278757273, - "grad_norm": 1.2736617282907137, - "learning_rate": 3.2978407207112416e-06, - "loss": 0.9823, - "step": 3293 - }, - { - "epoch": 0.29706452631104296, - "grad_norm": 1.7092888261180201, - "learning_rate": 3.2973961649892888e-06, - "loss": 0.9665, - "step": 3294 - }, - { - "epoch": 0.29715470983451325, - "grad_norm": 1.4460977431643527, - "learning_rate": 3.296951498566341e-06, - "loss": 1.0216, - "step": 3295 - }, - { - "epoch": 0.2972448933579835, - "grad_norm": 1.5553739939423266, - "learning_rate": 3.2965067214803404e-06, - "loss": 0.9583, - "step": 3296 - }, - { - "epoch": 0.2973350768814538, - "grad_norm": 1.1328233389500313, - "learning_rate": 3.2960618337692372e-06, - "loss": 0.9891, - "step": 3297 - }, - { - "epoch": 0.297425260404924, - "grad_norm": 1.3523313948200772, - "learning_rate": 3.2956168354709927e-06, - "loss": 0.9223, - "step": 3298 - }, - { - "epoch": 0.2975154439283943, - "grad_norm": 1.5450291264822378, - "learning_rate": 3.2951717266235754e-06, - "loss": 1.0072, - "step": 3299 - }, - { - "epoch": 0.29760562745186453, - "grad_norm": 1.2907189101812553, - "learning_rate": 3.294726507264964e-06, - "loss": 0.8383, - "step": 3300 - }, - { - "epoch": 0.2976958109753348, - "grad_norm": 1.5689149017596917, - "learning_rate": 3.2942811774331487e-06, - "loss": 0.9298, - "step": 3301 - }, - { - "epoch": 0.29778599449880505, - "grad_norm": 1.6602475848037483, - "learning_rate": 3.293835737166127e-06, - "loss": 0.9497, - "step": 3302 - }, - { - "epoch": 0.29787617802227534, - "grad_norm": 1.4247229423892178, - "learning_rate": 3.293390186501906e-06, - "loss": 0.8974, - "step": 3303 - }, - { - "epoch": 0.2979663615457456, - "grad_norm": 1.502211133129403, - "learning_rate": 3.2929445254785024e-06, - "loss": 0.9722, - "step": 3304 - }, - { - "epoch": 0.29805654506921586, - "grad_norm": 1.9134056365521723, - "learning_rate": 3.2924987541339423e-06, - "loss": 0.9191, - "step": 3305 - }, - { - "epoch": 0.2981467285926861, - "grad_norm": 1.5691395644435284, - "learning_rate": 3.292052872506262e-06, - "loss": 0.983, - "step": 3306 - }, - { - "epoch": 0.2982369121161564, - "grad_norm": 1.3085221000528036, - "learning_rate": 3.291606880633506e-06, - "loss": 1.0005, - "step": 3307 - }, - { - "epoch": 0.2983270956396266, - "grad_norm": 1.563093759254296, - "learning_rate": 3.2911607785537297e-06, - "loss": 1.0109, - "step": 3308 - }, - { - "epoch": 0.2984172791630969, - "grad_norm": 1.5388131496384225, - "learning_rate": 3.290714566304997e-06, - "loss": 1.0487, - "step": 3309 - }, - { - "epoch": 0.29850746268656714, - "grad_norm": 1.6325338755622802, - "learning_rate": 3.2902682439253794e-06, - "loss": 1.03, - "step": 3310 - }, - { - "epoch": 0.2985976462100374, - "grad_norm": 2.0142752689643433, - "learning_rate": 3.289821811452961e-06, - "loss": 0.9726, - "step": 3311 - }, - { - "epoch": 0.2986878297335077, - "grad_norm": 1.934605374276071, - "learning_rate": 3.289375268925834e-06, - "loss": 1.0171, - "step": 3312 - }, - { - "epoch": 0.29877801325697795, - "grad_norm": 1.8885786473363195, - "learning_rate": 3.288928616382099e-06, - "loss": 0.9449, - "step": 3313 - }, - { - "epoch": 0.29886819678044824, - "grad_norm": 1.2096073844078759, - "learning_rate": 3.288481853859868e-06, - "loss": 0.9958, - "step": 3314 - }, - { - "epoch": 0.29895838030391847, - "grad_norm": 1.2108749373487053, - "learning_rate": 3.2880349813972604e-06, - "loss": 0.9386, - "step": 3315 - }, - { - "epoch": 0.29904856382738876, - "grad_norm": 1.4807162518317682, - "learning_rate": 3.2875879990324052e-06, - "loss": 0.9603, - "step": 3316 - }, - { - "epoch": 0.299138747350859, - "grad_norm": 1.6734543942864024, - "learning_rate": 3.287140906803443e-06, - "loss": 1.0181, - "step": 3317 - }, - { - "epoch": 0.2992289308743293, - "grad_norm": 1.4462909582564805, - "learning_rate": 3.2866937047485216e-06, - "loss": 0.8775, - "step": 3318 - }, - { - "epoch": 0.2993191143977995, - "grad_norm": 1.6236429416758578, - "learning_rate": 3.2862463929057985e-06, - "loss": 1.0259, - "step": 3319 - }, - { - "epoch": 0.2994092979212698, - "grad_norm": 1.4184860252791895, - "learning_rate": 3.285798971313441e-06, - "loss": 0.8077, - "step": 3320 - }, - { - "epoch": 0.29949948144474003, - "grad_norm": 1.2337250748950532, - "learning_rate": 3.2853514400096248e-06, - "loss": 0.9834, - "step": 3321 - }, - { - "epoch": 0.2995896649682103, - "grad_norm": 1.2929086815286985, - "learning_rate": 3.2849037990325367e-06, - "loss": 0.9898, - "step": 3322 - }, - { - "epoch": 0.29967984849168056, - "grad_norm": 1.1897350659893016, - "learning_rate": 3.2844560484203717e-06, - "loss": 0.9132, - "step": 3323 - }, - { - "epoch": 0.29977003201515084, - "grad_norm": 1.2126730510273778, - "learning_rate": 3.2840081882113333e-06, - "loss": 1.0466, - "step": 3324 - }, - { - "epoch": 0.2998602155386211, - "grad_norm": 2.0405323095140284, - "learning_rate": 3.283560218443638e-06, - "loss": 0.9397, - "step": 3325 - }, - { - "epoch": 0.29995039906209137, - "grad_norm": 1.2945768837491352, - "learning_rate": 3.2831121391555064e-06, - "loss": 0.9729, - "step": 3326 - }, - { - "epoch": 0.3000405825855616, - "grad_norm": 1.298383960928366, - "learning_rate": 3.2826639503851724e-06, - "loss": 0.9102, - "step": 3327 - }, - { - "epoch": 0.3001307661090319, - "grad_norm": 1.3029427654814867, - "learning_rate": 3.282215652170877e-06, - "loss": 0.994, - "step": 3328 - }, - { - "epoch": 0.3002209496325021, - "grad_norm": 1.4376775383158271, - "learning_rate": 3.281767244550873e-06, - "loss": 0.9971, - "step": 3329 - }, - { - "epoch": 0.3003111331559724, - "grad_norm": 0.6846575062466144, - "learning_rate": 3.2813187275634193e-06, - "loss": 0.762, - "step": 3330 - }, - { - "epoch": 0.30040131667944264, - "grad_norm": 1.42923858765601, - "learning_rate": 3.280870101246787e-06, - "loss": 0.9883, - "step": 3331 - }, - { - "epoch": 0.30049150020291293, - "grad_norm": 1.5546691740096492, - "learning_rate": 3.280421365639255e-06, - "loss": 0.879, - "step": 3332 - }, - { - "epoch": 0.30058168372638316, - "grad_norm": 1.571777192062745, - "learning_rate": 3.279972520779112e-06, - "loss": 0.9452, - "step": 3333 - }, - { - "epoch": 0.30067186724985345, - "grad_norm": 1.471723590036516, - "learning_rate": 3.279523566704656e-06, - "loss": 1.0289, - "step": 3334 - }, - { - "epoch": 0.30076205077332374, - "grad_norm": 1.5041422869255512, - "learning_rate": 3.2790745034541935e-06, - "loss": 1.021, - "step": 3335 - }, - { - "epoch": 0.300852234296794, - "grad_norm": 1.4683012581193025, - "learning_rate": 3.278625331066042e-06, - "loss": 1.0101, - "step": 3336 - }, - { - "epoch": 0.30094241782026426, - "grad_norm": 4.514275055367439, - "learning_rate": 3.278176049578527e-06, - "loss": 1.005, - "step": 3337 - }, - { - "epoch": 0.3010326013437345, - "grad_norm": 1.5360828350301357, - "learning_rate": 3.2777266590299835e-06, - "loss": 0.9192, - "step": 3338 - }, - { - "epoch": 0.3011227848672048, - "grad_norm": 1.5851950575363343, - "learning_rate": 3.2772771594587562e-06, - "loss": 0.8936, - "step": 3339 - }, - { - "epoch": 0.301212968390675, - "grad_norm": 1.582037202800336, - "learning_rate": 3.2768275509031988e-06, - "loss": 1.0598, - "step": 3340 - }, - { - "epoch": 0.3013031519141453, - "grad_norm": 1.5483072759258139, - "learning_rate": 3.276377833401675e-06, - "loss": 0.9464, - "step": 3341 - }, - { - "epoch": 0.30139333543761554, - "grad_norm": 1.4404205011624445, - "learning_rate": 3.2759280069925557e-06, - "loss": 1.0195, - "step": 3342 - }, - { - "epoch": 0.30148351896108583, - "grad_norm": 1.5679075889652394, - "learning_rate": 3.2754780717142233e-06, - "loss": 1.006, - "step": 3343 - }, - { - "epoch": 0.30157370248455606, - "grad_norm": 1.617667306681507, - "learning_rate": 3.27502802760507e-06, - "loss": 0.941, - "step": 3344 - }, - { - "epoch": 0.30166388600802635, - "grad_norm": 1.450842757146044, - "learning_rate": 3.2745778747034943e-06, - "loss": 0.8757, - "step": 3345 - }, - { - "epoch": 0.3017540695314966, - "grad_norm": 1.6368157624329398, - "learning_rate": 3.274127613047906e-06, - "loss": 0.9517, - "step": 3346 - }, - { - "epoch": 0.30184425305496687, - "grad_norm": 1.492117435747729, - "learning_rate": 3.273677242676725e-06, - "loss": 0.9833, - "step": 3347 - }, - { - "epoch": 0.3019344365784371, - "grad_norm": 1.5828562151491536, - "learning_rate": 3.2732267636283782e-06, - "loss": 0.951, - "step": 3348 - }, - { - "epoch": 0.3020246201019074, - "grad_norm": 1.608975777599595, - "learning_rate": 3.2727761759413034e-06, - "loss": 0.9144, - "step": 3349 - }, - { - "epoch": 0.3021148036253776, - "grad_norm": 1.5472998758062917, - "learning_rate": 3.2723254796539477e-06, - "loss": 0.9763, - "step": 3350 - }, - { - "epoch": 0.3022049871488479, - "grad_norm": 1.4493148237191094, - "learning_rate": 3.271874674804766e-06, - "loss": 0.9439, - "step": 3351 - }, - { - "epoch": 0.30229517067231815, - "grad_norm": 1.4102147901224447, - "learning_rate": 3.2714237614322242e-06, - "loss": 0.9861, - "step": 3352 - }, - { - "epoch": 0.30238535419578844, - "grad_norm": 1.2726153194432865, - "learning_rate": 3.2709727395747974e-06, - "loss": 0.7933, - "step": 3353 - }, - { - "epoch": 0.30247553771925867, - "grad_norm": 0.7324158121493856, - "learning_rate": 3.2705216092709673e-06, - "loss": 0.8032, - "step": 3354 - }, - { - "epoch": 0.30256572124272896, - "grad_norm": 1.640245884280106, - "learning_rate": 3.2700703705592282e-06, - "loss": 1.0482, - "step": 3355 - }, - { - "epoch": 0.3026559047661992, - "grad_norm": 1.3920665941066446, - "learning_rate": 3.269619023478082e-06, - "loss": 0.8641, - "step": 3356 - }, - { - "epoch": 0.3027460882896695, - "grad_norm": 1.4139385274422933, - "learning_rate": 3.26916756806604e-06, - "loss": 1.0568, - "step": 3357 - }, - { - "epoch": 0.3028362718131397, - "grad_norm": 1.4932481281773964, - "learning_rate": 3.268716004361623e-06, - "loss": 0.9714, - "step": 3358 - }, - { - "epoch": 0.30292645533661, - "grad_norm": 0.7392000405075219, - "learning_rate": 3.268264332403361e-06, - "loss": 0.8269, - "step": 3359 - }, - { - "epoch": 0.3030166388600803, - "grad_norm": 1.240802880898334, - "learning_rate": 3.2678125522297933e-06, - "loss": 0.9966, - "step": 3360 - }, - { - "epoch": 0.3031068223835505, - "grad_norm": 1.9036254951305573, - "learning_rate": 3.267360663879468e-06, - "loss": 0.909, - "step": 3361 - }, - { - "epoch": 0.3031970059070208, - "grad_norm": 1.7040441989807888, - "learning_rate": 3.266908667390942e-06, - "loss": 0.9488, - "step": 3362 - }, - { - "epoch": 0.30328718943049104, - "grad_norm": 1.6774435268623435, - "learning_rate": 3.2664565628027833e-06, - "loss": 1.0125, - "step": 3363 - }, - { - "epoch": 0.30337737295396133, - "grad_norm": 1.7567431778987708, - "learning_rate": 3.2660043501535675e-06, - "loss": 0.9631, - "step": 3364 - }, - { - "epoch": 0.30346755647743157, - "grad_norm": 1.5817527996898828, - "learning_rate": 3.2655520294818797e-06, - "loss": 1.028, - "step": 3365 - }, - { - "epoch": 0.30355774000090185, - "grad_norm": 1.2784057775126973, - "learning_rate": 3.2650996008263146e-06, - "loss": 1.0326, - "step": 3366 - }, - { - "epoch": 0.3036479235243721, - "grad_norm": 1.6328189247406495, - "learning_rate": 3.2646470642254756e-06, - "loss": 0.9182, - "step": 3367 - }, - { - "epoch": 0.3037381070478424, - "grad_norm": 1.5954350557991852, - "learning_rate": 3.2641944197179767e-06, - "loss": 0.9831, - "step": 3368 - }, - { - "epoch": 0.3038282905713126, - "grad_norm": 0.8359437746186824, - "learning_rate": 3.2637416673424383e-06, - "loss": 0.7852, - "step": 3369 - }, - { - "epoch": 0.3039184740947829, - "grad_norm": 1.3860074220241192, - "learning_rate": 3.2632888071374937e-06, - "loss": 0.9813, - "step": 3370 - }, - { - "epoch": 0.30400865761825313, - "grad_norm": 1.495255357373948, - "learning_rate": 3.2628358391417815e-06, - "loss": 0.9967, - "step": 3371 - }, - { - "epoch": 0.3040988411417234, - "grad_norm": 1.5392046247977886, - "learning_rate": 3.2623827633939526e-06, - "loss": 0.9483, - "step": 3372 - }, - { - "epoch": 0.30418902466519365, - "grad_norm": 1.496088491086955, - "learning_rate": 3.2619295799326657e-06, - "loss": 0.9655, - "step": 3373 - }, - { - "epoch": 0.30427920818866394, - "grad_norm": 1.6258999459942653, - "learning_rate": 3.2614762887965883e-06, - "loss": 0.8532, - "step": 3374 - }, - { - "epoch": 0.3043693917121342, - "grad_norm": 1.4559824751019343, - "learning_rate": 3.2610228900243984e-06, - "loss": 0.9579, - "step": 3375 - }, - { - "epoch": 0.30445957523560446, - "grad_norm": 2.0392901414103073, - "learning_rate": 3.260569383654783e-06, - "loss": 0.8414, - "step": 3376 - }, - { - "epoch": 0.3045497587590747, - "grad_norm": 0.7388658580521016, - "learning_rate": 3.2601157697264365e-06, - "loss": 0.8462, - "step": 3377 - }, - { - "epoch": 0.304639942282545, - "grad_norm": 1.5819834760822922, - "learning_rate": 3.2596620482780647e-06, - "loss": 0.9406, - "step": 3378 - }, - { - "epoch": 0.3047301258060152, - "grad_norm": 0.7078278832521824, - "learning_rate": 3.2592082193483803e-06, - "loss": 0.8056, - "step": 3379 - }, - { - "epoch": 0.3048203093294855, - "grad_norm": 1.4528291462621334, - "learning_rate": 3.258754282976109e-06, - "loss": 0.9448, - "step": 3380 - }, - { - "epoch": 0.30491049285295574, - "grad_norm": 1.5238027281552873, - "learning_rate": 3.25830023919998e-06, - "loss": 1.0309, - "step": 3381 - }, - { - "epoch": 0.305000676376426, - "grad_norm": 1.408008323877829, - "learning_rate": 3.2578460880587374e-06, - "loss": 0.9247, - "step": 3382 - }, - { - "epoch": 0.3050908598998963, - "grad_norm": 1.6708569858744593, - "learning_rate": 3.2573918295911306e-06, - "loss": 1.0214, - "step": 3383 - }, - { - "epoch": 0.30518104342336655, - "grad_norm": 1.5000476034875967, - "learning_rate": 3.2569374638359196e-06, - "loss": 0.9065, - "step": 3384 - }, - { - "epoch": 0.30527122694683684, - "grad_norm": 1.2534384642211889, - "learning_rate": 3.2564829908318736e-06, - "loss": 0.9819, - "step": 3385 - }, - { - "epoch": 0.30536141047030707, - "grad_norm": 1.2191404059696214, - "learning_rate": 3.2560284106177705e-06, - "loss": 1.0211, - "step": 3386 - }, - { - "epoch": 0.30545159399377736, - "grad_norm": 1.4520526333614356, - "learning_rate": 3.2555737232323978e-06, - "loss": 0.9401, - "step": 3387 - }, - { - "epoch": 0.3055417775172476, - "grad_norm": 1.272511713240099, - "learning_rate": 3.255118928714552e-06, - "loss": 0.9948, - "step": 3388 - }, - { - "epoch": 0.3056319610407179, - "grad_norm": 1.725895057887134, - "learning_rate": 3.2546640271030386e-06, - "loss": 0.9058, - "step": 3389 - }, - { - "epoch": 0.3057221445641881, - "grad_norm": 0.712288631073102, - "learning_rate": 3.2542090184366717e-06, - "loss": 0.8446, - "step": 3390 - }, - { - "epoch": 0.3058123280876584, - "grad_norm": 1.500336768175143, - "learning_rate": 3.253753902754276e-06, - "loss": 1.0396, - "step": 3391 - }, - { - "epoch": 0.30590251161112864, - "grad_norm": 1.2305055643054286, - "learning_rate": 3.253298680094685e-06, - "loss": 0.9049, - "step": 3392 - }, - { - "epoch": 0.3059926951345989, - "grad_norm": 1.6597193243975947, - "learning_rate": 3.2528433504967394e-06, - "loss": 0.9055, - "step": 3393 - }, - { - "epoch": 0.30608287865806916, - "grad_norm": 1.4596479303627237, - "learning_rate": 3.252387913999291e-06, - "loss": 0.9694, - "step": 3394 - }, - { - "epoch": 0.30617306218153945, - "grad_norm": 1.4929167877688383, - "learning_rate": 3.2519323706411998e-06, - "loss": 0.9652, - "step": 3395 - }, - { - "epoch": 0.3062632457050097, - "grad_norm": 1.3283592466334004, - "learning_rate": 3.251476720461336e-06, - "loss": 0.854, - "step": 3396 - }, - { - "epoch": 0.30635342922847997, - "grad_norm": 1.749315604850344, - "learning_rate": 3.251020963498578e-06, - "loss": 0.8897, - "step": 3397 - }, - { - "epoch": 0.3064436127519502, - "grad_norm": 1.3311048849468283, - "learning_rate": 3.250565099791813e-06, - "loss": 1.01, - "step": 3398 - }, - { - "epoch": 0.3065337962754205, - "grad_norm": 1.1735643036416024, - "learning_rate": 3.2501091293799387e-06, - "loss": 1.0112, - "step": 3399 - }, - { - "epoch": 0.3066239797988907, - "grad_norm": 1.3323937870801532, - "learning_rate": 3.24965305230186e-06, - "loss": 0.9766, - "step": 3400 - }, - { - "epoch": 0.306714163322361, - "grad_norm": 1.8794984895294689, - "learning_rate": 3.249196868596492e-06, - "loss": 1.0367, - "step": 3401 - }, - { - "epoch": 0.30680434684583124, - "grad_norm": 1.55563019202574, - "learning_rate": 3.24874057830276e-06, - "loss": 0.8961, - "step": 3402 - }, - { - "epoch": 0.30689453036930153, - "grad_norm": 1.4526244090864993, - "learning_rate": 3.2482841814595954e-06, - "loss": 0.892, - "step": 3403 - }, - { - "epoch": 0.30698471389277177, - "grad_norm": 1.5401981249845529, - "learning_rate": 3.247827678105943e-06, - "loss": 0.8988, - "step": 3404 - }, - { - "epoch": 0.30707489741624205, - "grad_norm": 1.4232889208836625, - "learning_rate": 3.247371068280751e-06, - "loss": 0.9556, - "step": 3405 - }, - { - "epoch": 0.3071650809397123, - "grad_norm": 1.21032034621489, - "learning_rate": 3.2469143520229823e-06, - "loss": 0.826, - "step": 3406 - }, - { - "epoch": 0.3072552644631826, - "grad_norm": 1.2370606673311935, - "learning_rate": 3.2464575293716054e-06, - "loss": 0.9194, - "step": 3407 - }, - { - "epoch": 0.30734544798665286, - "grad_norm": 1.471022937142008, - "learning_rate": 3.2460006003655997e-06, - "loss": 0.9035, - "step": 3408 - }, - { - "epoch": 0.3074356315101231, - "grad_norm": 1.6102149771281316, - "learning_rate": 3.245543565043952e-06, - "loss": 1.0238, - "step": 3409 - }, - { - "epoch": 0.3075258150335934, - "grad_norm": 1.7079157512755716, - "learning_rate": 3.2450864234456592e-06, - "loss": 0.9541, - "step": 3410 - }, - { - "epoch": 0.3076159985570636, - "grad_norm": 1.2831681942068325, - "learning_rate": 3.244629175609728e-06, - "loss": 0.9567, - "step": 3411 - }, - { - "epoch": 0.3077061820805339, - "grad_norm": 1.476308245692836, - "learning_rate": 3.2441718215751726e-06, - "loss": 0.9476, - "step": 3412 - }, - { - "epoch": 0.30779636560400414, - "grad_norm": 1.5814304294100214, - "learning_rate": 3.2437143613810173e-06, - "loss": 0.9249, - "step": 3413 - }, - { - "epoch": 0.30788654912747443, - "grad_norm": 1.4501722134892274, - "learning_rate": 3.2432567950662947e-06, - "loss": 1.0025, - "step": 3414 - }, - { - "epoch": 0.30797673265094466, - "grad_norm": 1.3055961323016978, - "learning_rate": 3.2427991226700468e-06, - "loss": 0.9526, - "step": 3415 - }, - { - "epoch": 0.30806691617441495, - "grad_norm": 1.4934827365956653, - "learning_rate": 3.2423413442313246e-06, - "loss": 0.8941, - "step": 3416 - }, - { - "epoch": 0.3081570996978852, - "grad_norm": 1.5506030540035722, - "learning_rate": 3.2418834597891904e-06, - "loss": 0.8747, - "step": 3417 - }, - { - "epoch": 0.3082472832213555, - "grad_norm": 1.8718365367574077, - "learning_rate": 3.2414254693827098e-06, - "loss": 1.048, - "step": 3418 - }, - { - "epoch": 0.3083374667448257, - "grad_norm": 1.2720613591461696, - "learning_rate": 3.2409673730509644e-06, - "loss": 0.9431, - "step": 3419 - }, - { - "epoch": 0.308427650268296, - "grad_norm": 1.5181046484458218, - "learning_rate": 3.2405091708330393e-06, - "loss": 0.9374, - "step": 3420 - }, - { - "epoch": 0.3085178337917662, - "grad_norm": 1.6282825327861679, - "learning_rate": 3.2400508627680323e-06, - "loss": 0.9564, - "step": 3421 - }, - { - "epoch": 0.3086080173152365, - "grad_norm": 1.678288573634526, - "learning_rate": 3.2395924488950474e-06, - "loss": 1.0501, - "step": 3422 - }, - { - "epoch": 0.30869820083870675, - "grad_norm": 2.2459228450604343, - "learning_rate": 3.2391339292532004e-06, - "loss": 1.0841, - "step": 3423 - }, - { - "epoch": 0.30878838436217704, - "grad_norm": 1.660545967584585, - "learning_rate": 3.238675303881614e-06, - "loss": 0.9084, - "step": 3424 - }, - { - "epoch": 0.30887856788564727, - "grad_norm": 1.6404354894267263, - "learning_rate": 3.2382165728194203e-06, - "loss": 0.9285, - "step": 3425 - }, - { - "epoch": 0.30896875140911756, - "grad_norm": 1.3723425761342363, - "learning_rate": 3.237757736105761e-06, - "loss": 0.9437, - "step": 3426 - }, - { - "epoch": 0.3090589349325878, - "grad_norm": 1.2390806584031846, - "learning_rate": 3.2372987937797867e-06, - "loss": 1.034, - "step": 3427 - }, - { - "epoch": 0.3091491184560581, - "grad_norm": 1.2110914071013283, - "learning_rate": 3.2368397458806573e-06, - "loss": 0.8656, - "step": 3428 - }, - { - "epoch": 0.3092393019795283, - "grad_norm": 1.5687023657143748, - "learning_rate": 3.2363805924475412e-06, - "loss": 0.9937, - "step": 3429 - }, - { - "epoch": 0.3093294855029986, - "grad_norm": 1.824520372046499, - "learning_rate": 3.2359213335196153e-06, - "loss": 0.9075, - "step": 3430 - }, - { - "epoch": 0.3094196690264689, - "grad_norm": 1.4861667460230912, - "learning_rate": 3.2354619691360663e-06, - "loss": 0.9132, - "step": 3431 - }, - { - "epoch": 0.3095098525499391, - "grad_norm": 1.2167138917893832, - "learning_rate": 3.2350024993360898e-06, - "loss": 0.9984, - "step": 3432 - }, - { - "epoch": 0.3096000360734094, - "grad_norm": 1.7600636626780541, - "learning_rate": 3.2345429241588902e-06, - "loss": 0.9622, - "step": 3433 - }, - { - "epoch": 0.30969021959687965, - "grad_norm": 1.5802330818157777, - "learning_rate": 3.234083243643681e-06, - "loss": 0.9831, - "step": 3434 - }, - { - "epoch": 0.30978040312034993, - "grad_norm": 1.4703238962118954, - "learning_rate": 3.233623457829686e-06, - "loss": 1.0297, - "step": 3435 - }, - { - "epoch": 0.30987058664382017, - "grad_norm": 1.7836140120596449, - "learning_rate": 3.2331635667561344e-06, - "loss": 0.9337, - "step": 3436 - }, - { - "epoch": 0.30996077016729046, - "grad_norm": 1.4101576871151351, - "learning_rate": 3.2327035704622674e-06, - "loss": 0.9813, - "step": 3437 - }, - { - "epoch": 0.3100509536907607, - "grad_norm": 1.3795165961616367, - "learning_rate": 3.2322434689873353e-06, - "loss": 0.8595, - "step": 3438 - }, - { - "epoch": 0.310141137214231, - "grad_norm": 1.3825136281974926, - "learning_rate": 3.2317832623705957e-06, - "loss": 1.0398, - "step": 3439 - }, - { - "epoch": 0.3102313207377012, - "grad_norm": 1.9202172798157193, - "learning_rate": 3.231322950651316e-06, - "loss": 0.9055, - "step": 3440 - }, - { - "epoch": 0.3103215042611715, - "grad_norm": 1.1329847533706, - "learning_rate": 3.2308625338687735e-06, - "loss": 0.9782, - "step": 3441 - }, - { - "epoch": 0.31041168778464173, - "grad_norm": 2.130701270737137, - "learning_rate": 3.230402012062252e-06, - "loss": 1.0763, - "step": 3442 - }, - { - "epoch": 0.310501871308112, - "grad_norm": 0.7912878251036646, - "learning_rate": 3.2299413852710466e-06, - "loss": 0.8403, - "step": 3443 - }, - { - "epoch": 0.31059205483158225, - "grad_norm": 1.4775569735674965, - "learning_rate": 3.2294806535344606e-06, - "loss": 0.9228, - "step": 3444 - }, - { - "epoch": 0.31068223835505254, - "grad_norm": 1.5765201226428347, - "learning_rate": 3.2290198168918056e-06, - "loss": 0.9986, - "step": 3445 - }, - { - "epoch": 0.3107724218785228, - "grad_norm": 1.409863216989115, - "learning_rate": 3.2285588753824035e-06, - "loss": 0.9999, - "step": 3446 - }, - { - "epoch": 0.31086260540199306, - "grad_norm": 1.551301345282838, - "learning_rate": 3.228097829045584e-06, - "loss": 0.9712, - "step": 3447 - }, - { - "epoch": 0.3109527889254633, - "grad_norm": 1.17133664798937, - "learning_rate": 3.227636677920685e-06, - "loss": 0.9922, - "step": 3448 - }, - { - "epoch": 0.3110429724489336, - "grad_norm": 1.4751480028285378, - "learning_rate": 3.2271754220470567e-06, - "loss": 1.0088, - "step": 3449 - }, - { - "epoch": 0.3111331559724038, - "grad_norm": 1.584193957817962, - "learning_rate": 3.2267140614640547e-06, - "loss": 1.0445, - "step": 3450 - }, - { - "epoch": 0.3112233394958741, - "grad_norm": 1.4678281772581152, - "learning_rate": 3.2262525962110445e-06, - "loss": 1.002, - "step": 3451 - }, - { - "epoch": 0.31131352301934434, - "grad_norm": 1.4315029770276888, - "learning_rate": 3.2257910263274015e-06, - "loss": 0.8875, - "step": 3452 - }, - { - "epoch": 0.31140370654281463, - "grad_norm": 1.6681259760133782, - "learning_rate": 3.225329351852509e-06, - "loss": 0.9062, - "step": 3453 - }, - { - "epoch": 0.3114938900662849, - "grad_norm": 1.7508145888770272, - "learning_rate": 3.2248675728257596e-06, - "loss": 0.9665, - "step": 3454 - }, - { - "epoch": 0.31158407358975515, - "grad_norm": 1.4888748226798192, - "learning_rate": 3.2244056892865557e-06, - "loss": 1.0295, - "step": 3455 - }, - { - "epoch": 0.31167425711322544, - "grad_norm": 1.7077978582007585, - "learning_rate": 3.2239437012743063e-06, - "loss": 0.9505, - "step": 3456 - }, - { - "epoch": 0.31176444063669567, - "grad_norm": 1.2410414105006788, - "learning_rate": 3.223481608828432e-06, - "loss": 0.9834, - "step": 3457 - }, - { - "epoch": 0.31185462416016596, - "grad_norm": 1.6838116781355144, - "learning_rate": 3.223019411988361e-06, - "loss": 0.98, - "step": 3458 - }, - { - "epoch": 0.3119448076836362, - "grad_norm": 1.445930390118922, - "learning_rate": 3.22255711079353e-06, - "loss": 0.9631, - "step": 3459 - }, - { - "epoch": 0.3120349912071065, - "grad_norm": 1.3631945913147077, - "learning_rate": 3.222094705283385e-06, - "loss": 0.9267, - "step": 3460 - }, - { - "epoch": 0.3121251747305767, - "grad_norm": 1.3999639949255884, - "learning_rate": 3.2216321954973805e-06, - "loss": 0.957, - "step": 3461 - }, - { - "epoch": 0.312215358254047, - "grad_norm": 1.364800205420426, - "learning_rate": 3.2211695814749816e-06, - "loss": 0.9523, - "step": 3462 - }, - { - "epoch": 0.31230554177751724, - "grad_norm": 1.9685997678566483, - "learning_rate": 3.220706863255661e-06, - "loss": 1.0584, - "step": 3463 - }, - { - "epoch": 0.3123957253009875, - "grad_norm": 1.3902931245982977, - "learning_rate": 3.2202440408788994e-06, - "loss": 0.9553, - "step": 3464 - }, - { - "epoch": 0.31248590882445776, - "grad_norm": 1.4956585522533508, - "learning_rate": 3.2197811143841883e-06, - "loss": 1.0306, - "step": 3465 - }, - { - "epoch": 0.31257609234792805, - "grad_norm": 1.1968124054559048, - "learning_rate": 3.2193180838110267e-06, - "loss": 0.9534, - "step": 3466 - }, - { - "epoch": 0.3126662758713983, - "grad_norm": 1.637038607058788, - "learning_rate": 3.2188549491989225e-06, - "loss": 0.8539, - "step": 3467 - }, - { - "epoch": 0.31275645939486857, - "grad_norm": 1.5600028874297196, - "learning_rate": 3.2183917105873934e-06, - "loss": 0.9851, - "step": 3468 - }, - { - "epoch": 0.3128466429183388, - "grad_norm": 1.163642756420153, - "learning_rate": 3.217928368015966e-06, - "loss": 1.0175, - "step": 3469 - }, - { - "epoch": 0.3129368264418091, - "grad_norm": 1.3093066195383647, - "learning_rate": 3.217464921524174e-06, - "loss": 0.9904, - "step": 3470 - }, - { - "epoch": 0.3130270099652793, - "grad_norm": 1.4606420860874516, - "learning_rate": 3.2170013711515616e-06, - "loss": 0.9766, - "step": 3471 - }, - { - "epoch": 0.3131171934887496, - "grad_norm": 1.9224717369035924, - "learning_rate": 3.216537716937682e-06, - "loss": 0.8853, - "step": 3472 - }, - { - "epoch": 0.31320737701221985, - "grad_norm": 1.4665406024825425, - "learning_rate": 3.2160739589220968e-06, - "loss": 0.9325, - "step": 3473 - }, - { - "epoch": 0.31329756053569013, - "grad_norm": 1.3469270364219426, - "learning_rate": 3.215610097144376e-06, - "loss": 1.0159, - "step": 3474 - }, - { - "epoch": 0.31338774405916037, - "grad_norm": 1.578298578064295, - "learning_rate": 3.215146131644099e-06, - "loss": 0.9188, - "step": 3475 - }, - { - "epoch": 0.31347792758263066, - "grad_norm": 1.4680024537525926, - "learning_rate": 3.214682062460854e-06, - "loss": 1.0339, - "step": 3476 - }, - { - "epoch": 0.3135681111061009, - "grad_norm": 1.5063532750351043, - "learning_rate": 3.2142178896342367e-06, - "loss": 0.878, - "step": 3477 - }, - { - "epoch": 0.3136582946295712, - "grad_norm": 1.294954555553629, - "learning_rate": 3.2137536132038552e-06, - "loss": 0.9451, - "step": 3478 - }, - { - "epoch": 0.31374847815304147, - "grad_norm": 1.4647914215913815, - "learning_rate": 3.2132892332093226e-06, - "loss": 0.9606, - "step": 3479 - }, - { - "epoch": 0.3138386616765117, - "grad_norm": 1.3873516518958222, - "learning_rate": 3.2128247496902623e-06, - "loss": 1.0652, - "step": 3480 - }, - { - "epoch": 0.313928845199982, - "grad_norm": 1.3551340267013412, - "learning_rate": 3.2123601626863064e-06, - "loss": 0.9808, - "step": 3481 - }, - { - "epoch": 0.3140190287234522, - "grad_norm": 0.8910987330255072, - "learning_rate": 3.2118954722370974e-06, - "loss": 0.7722, - "step": 3482 - }, - { - "epoch": 0.3141092122469225, - "grad_norm": 1.4020477662622342, - "learning_rate": 3.2114306783822837e-06, - "loss": 0.911, - "step": 3483 - }, - { - "epoch": 0.31419939577039274, - "grad_norm": 1.3095842399112592, - "learning_rate": 3.210965781161525e-06, - "loss": 0.9345, - "step": 3484 - }, - { - "epoch": 0.31428957929386303, - "grad_norm": 1.395073195010443, - "learning_rate": 3.2105007806144892e-06, - "loss": 1.085, - "step": 3485 - }, - { - "epoch": 0.31437976281733326, - "grad_norm": 1.6138876451905901, - "learning_rate": 3.2100356767808513e-06, - "loss": 0.9616, - "step": 3486 - }, - { - "epoch": 0.31446994634080355, - "grad_norm": 1.5850193767386065, - "learning_rate": 3.2095704697002977e-06, - "loss": 1.0216, - "step": 3487 - }, - { - "epoch": 0.3145601298642738, - "grad_norm": 1.3889806219112792, - "learning_rate": 3.209105159412522e-06, - "loss": 0.9655, - "step": 3488 - }, - { - "epoch": 0.3146503133877441, - "grad_norm": 1.6741167529602332, - "learning_rate": 3.208639745957228e-06, - "loss": 0.9228, - "step": 3489 - }, - { - "epoch": 0.3147404969112143, - "grad_norm": 1.2328126411776348, - "learning_rate": 3.2081742293741256e-06, - "loss": 0.9826, - "step": 3490 - }, - { - "epoch": 0.3148306804346846, - "grad_norm": 1.2507030893907571, - "learning_rate": 3.2077086097029366e-06, - "loss": 1.0176, - "step": 3491 - }, - { - "epoch": 0.31492086395815483, - "grad_norm": 1.3308026865179603, - "learning_rate": 3.2072428869833895e-06, - "loss": 0.9641, - "step": 3492 - }, - { - "epoch": 0.3150110474816251, - "grad_norm": 1.2904465152456792, - "learning_rate": 3.206777061255223e-06, - "loss": 0.9725, - "step": 3493 - }, - { - "epoch": 0.31510123100509535, - "grad_norm": 1.8766403016948108, - "learning_rate": 3.206311132558183e-06, - "loss": 1.0009, - "step": 3494 - }, - { - "epoch": 0.31519141452856564, - "grad_norm": 1.5520938727855202, - "learning_rate": 3.205845100932026e-06, - "loss": 0.9792, - "step": 3495 - }, - { - "epoch": 0.31528159805203587, - "grad_norm": 1.431970576506155, - "learning_rate": 3.205378966416516e-06, - "loss": 0.9908, - "step": 3496 - }, - { - "epoch": 0.31537178157550616, - "grad_norm": 1.537635645271419, - "learning_rate": 3.204912729051426e-06, - "loss": 0.9617, - "step": 3497 - }, - { - "epoch": 0.3154619650989764, - "grad_norm": 1.7086800091689258, - "learning_rate": 3.2044463888765384e-06, - "loss": 0.9398, - "step": 3498 - }, - { - "epoch": 0.3155521486224467, - "grad_norm": 1.7018538464686062, - "learning_rate": 3.2039799459316436e-06, - "loss": 0.8652, - "step": 3499 - }, - { - "epoch": 0.3156423321459169, - "grad_norm": 1.6229161325763155, - "learning_rate": 3.2035134002565407e-06, - "loss": 0.9976, - "step": 3500 - }, - { - "epoch": 0.3157325156693872, - "grad_norm": 1.4610265204903194, - "learning_rate": 3.203046751891039e-06, - "loss": 0.9724, - "step": 3501 - }, - { - "epoch": 0.3158226991928575, - "grad_norm": 1.4676848262351037, - "learning_rate": 3.2025800008749545e-06, - "loss": 0.9816, - "step": 3502 - }, - { - "epoch": 0.3159128827163277, - "grad_norm": 1.7314681015623434, - "learning_rate": 3.202113147248114e-06, - "loss": 0.9455, - "step": 3503 - }, - { - "epoch": 0.316003066239798, - "grad_norm": 1.4074585278026617, - "learning_rate": 3.20164619105035e-06, - "loss": 0.9378, - "step": 3504 - }, - { - "epoch": 0.31609324976326825, - "grad_norm": 1.2169344657392664, - "learning_rate": 3.201179132321508e-06, - "loss": 0.8128, - "step": 3505 - }, - { - "epoch": 0.31618343328673854, - "grad_norm": 2.2192755063400242, - "learning_rate": 3.200711971101439e-06, - "loss": 0.9082, - "step": 3506 - }, - { - "epoch": 0.31627361681020877, - "grad_norm": 1.653984751975869, - "learning_rate": 3.2002447074300047e-06, - "loss": 0.9673, - "step": 3507 - }, - { - "epoch": 0.31636380033367906, - "grad_norm": 1.176997775228856, - "learning_rate": 3.1997773413470736e-06, - "loss": 0.9636, - "step": 3508 - }, - { - "epoch": 0.3164539838571493, - "grad_norm": 1.8280802012114121, - "learning_rate": 3.199309872892524e-06, - "loss": 0.9564, - "step": 3509 - }, - { - "epoch": 0.3165441673806196, - "grad_norm": 1.4372582439638044, - "learning_rate": 3.198842302106243e-06, - "loss": 1.009, - "step": 3510 - }, - { - "epoch": 0.3166343509040898, - "grad_norm": 1.502671326756691, - "learning_rate": 3.1983746290281265e-06, - "loss": 0.9523, - "step": 3511 - }, - { - "epoch": 0.3167245344275601, - "grad_norm": 1.2325725674883228, - "learning_rate": 3.197906853698079e-06, - "loss": 0.9678, - "step": 3512 - }, - { - "epoch": 0.31681471795103033, - "grad_norm": 1.3428050422416973, - "learning_rate": 3.1974389761560137e-06, - "loss": 0.9095, - "step": 3513 - }, - { - "epoch": 0.3169049014745006, - "grad_norm": 1.329734679482222, - "learning_rate": 3.1969709964418525e-06, - "loss": 0.916, - "step": 3514 - }, - { - "epoch": 0.31699508499797086, - "grad_norm": 1.620974763846765, - "learning_rate": 3.196502914595525e-06, - "loss": 0.9775, - "step": 3515 - }, - { - "epoch": 0.31708526852144114, - "grad_norm": 1.6192128495511298, - "learning_rate": 3.1960347306569723e-06, - "loss": 0.9928, - "step": 3516 - }, - { - "epoch": 0.3171754520449114, - "grad_norm": 1.4421205482357988, - "learning_rate": 3.195566444666141e-06, - "loss": 0.9091, - "step": 3517 - }, - { - "epoch": 0.31726563556838167, - "grad_norm": 1.359169604689843, - "learning_rate": 3.1950980566629886e-06, - "loss": 1.0702, - "step": 3518 - }, - { - "epoch": 0.3173558190918519, - "grad_norm": 1.317561290915033, - "learning_rate": 3.1946295666874797e-06, - "loss": 0.9783, - "step": 3519 - }, - { - "epoch": 0.3174460026153222, - "grad_norm": 1.2612525385669457, - "learning_rate": 3.19416097477959e-06, - "loss": 0.9771, - "step": 3520 - }, - { - "epoch": 0.3175361861387924, - "grad_norm": 1.5904584816217917, - "learning_rate": 3.1936922809793005e-06, - "loss": 1.0238, - "step": 3521 - }, - { - "epoch": 0.3176263696622627, - "grad_norm": 1.2145445231305363, - "learning_rate": 3.193223485326604e-06, - "loss": 1.0832, - "step": 3522 - }, - { - "epoch": 0.31771655318573294, - "grad_norm": 1.6340553627580523, - "learning_rate": 3.1927545878615005e-06, - "loss": 1.0437, - "step": 3523 - }, - { - "epoch": 0.31780673670920323, - "grad_norm": 1.500272726061324, - "learning_rate": 3.192285588623999e-06, - "loss": 0.9163, - "step": 3524 - }, - { - "epoch": 0.31789692023267346, - "grad_norm": 1.1284703135678054, - "learning_rate": 3.191816487654117e-06, - "loss": 0.9002, - "step": 3525 - }, - { - "epoch": 0.31798710375614375, - "grad_norm": 1.7579852210064855, - "learning_rate": 3.19134728499188e-06, - "loss": 0.9355, - "step": 3526 - }, - { - "epoch": 0.31807728727961404, - "grad_norm": 2.2104394574178383, - "learning_rate": 3.1908779806773235e-06, - "loss": 0.8481, - "step": 3527 - }, - { - "epoch": 0.3181674708030843, - "grad_norm": 1.3154155047925802, - "learning_rate": 3.190408574750492e-06, - "loss": 1.0207, - "step": 3528 - }, - { - "epoch": 0.31825765432655456, - "grad_norm": 1.301859365910546, - "learning_rate": 3.1899390672514367e-06, - "loss": 1.0349, - "step": 3529 - }, - { - "epoch": 0.3183478378500248, - "grad_norm": 1.5994508545967085, - "learning_rate": 3.189469458220219e-06, - "loss": 0.8774, - "step": 3530 - }, - { - "epoch": 0.3184380213734951, - "grad_norm": 1.1947202524240241, - "learning_rate": 3.1889997476969086e-06, - "loss": 0.9578, - "step": 3531 - }, - { - "epoch": 0.3185282048969653, - "grad_norm": 1.2980376397951452, - "learning_rate": 3.188529935721583e-06, - "loss": 0.9469, - "step": 3532 - }, - { - "epoch": 0.3186183884204356, - "grad_norm": 1.347190842913022, - "learning_rate": 3.18806002233433e-06, - "loss": 1.0229, - "step": 3533 - }, - { - "epoch": 0.31870857194390584, - "grad_norm": 1.532498454082263, - "learning_rate": 3.187590007575245e-06, - "loss": 1.0316, - "step": 3534 - }, - { - "epoch": 0.3187987554673761, - "grad_norm": 1.3520260054666138, - "learning_rate": 3.1871198914844327e-06, - "loss": 0.899, - "step": 3535 - }, - { - "epoch": 0.31888893899084636, - "grad_norm": 1.5517383763876798, - "learning_rate": 3.1866496741020057e-06, - "loss": 0.8998, - "step": 3536 - }, - { - "epoch": 0.31897912251431665, - "grad_norm": 1.0809970140795266, - "learning_rate": 3.186179355468085e-06, - "loss": 0.9279, - "step": 3537 - }, - { - "epoch": 0.3190693060377869, - "grad_norm": 1.717240936864044, - "learning_rate": 3.1857089356228015e-06, - "loss": 0.9635, - "step": 3538 - }, - { - "epoch": 0.31915948956125717, - "grad_norm": 1.4535970433884797, - "learning_rate": 3.1852384146062933e-06, - "loss": 0.9088, - "step": 3539 - }, - { - "epoch": 0.3192496730847274, - "grad_norm": 0.747281193109913, - "learning_rate": 3.184767792458708e-06, - "loss": 0.7622, - "step": 3540 - }, - { - "epoch": 0.3193398566081977, - "grad_norm": 1.8375038120982792, - "learning_rate": 3.1842970692202023e-06, - "loss": 1.0895, - "step": 3541 - }, - { - "epoch": 0.3194300401316679, - "grad_norm": 1.6684248789702723, - "learning_rate": 3.1838262449309403e-06, - "loss": 0.952, - "step": 3542 - }, - { - "epoch": 0.3195202236551382, - "grad_norm": 1.5751231887113113, - "learning_rate": 3.1833553196310956e-06, - "loss": 0.8772, - "step": 3543 - }, - { - "epoch": 0.31961040717860845, - "grad_norm": 1.3620021470454877, - "learning_rate": 3.18288429336085e-06, - "loss": 0.8313, - "step": 3544 - }, - { - "epoch": 0.31970059070207874, - "grad_norm": 1.457677375770402, - "learning_rate": 3.182413166160394e-06, - "loss": 1.0714, - "step": 3545 - }, - { - "epoch": 0.31979077422554897, - "grad_norm": 1.5220129150313866, - "learning_rate": 3.1819419380699275e-06, - "loss": 1.0113, - "step": 3546 - }, - { - "epoch": 0.31988095774901926, - "grad_norm": 1.2678996238269273, - "learning_rate": 3.181470609129658e-06, - "loss": 0.9597, - "step": 3547 - }, - { - "epoch": 0.3199711412724895, - "grad_norm": 1.7272902806088262, - "learning_rate": 3.1809991793798e-06, - "loss": 1.0208, - "step": 3548 - }, - { - "epoch": 0.3200613247959598, - "grad_norm": 1.501433164681049, - "learning_rate": 3.1805276488605806e-06, - "loss": 0.9243, - "step": 3549 - }, - { - "epoch": 0.32015150831943007, - "grad_norm": 1.3939181564227918, - "learning_rate": 3.1800560176122336e-06, - "loss": 0.9606, - "step": 3550 - }, - { - "epoch": 0.3202416918429003, - "grad_norm": 1.4327919196228471, - "learning_rate": 3.179584285675e-06, - "loss": 0.9709, - "step": 3551 - }, - { - "epoch": 0.3203318753663706, - "grad_norm": 1.5942815754314712, - "learning_rate": 3.1791124530891315e-06, - "loss": 0.8733, - "step": 3552 - }, - { - "epoch": 0.3204220588898408, - "grad_norm": 1.5821760700015646, - "learning_rate": 3.178640519894886e-06, - "loss": 0.9605, - "step": 3553 - }, - { - "epoch": 0.3205122424133111, - "grad_norm": 1.655023660634718, - "learning_rate": 3.1781684861325324e-06, - "loss": 0.917, - "step": 3554 - }, - { - "epoch": 0.32060242593678134, - "grad_norm": 1.4801147359758404, - "learning_rate": 3.177696351842348e-06, - "loss": 0.9663, - "step": 3555 - }, - { - "epoch": 0.32069260946025163, - "grad_norm": 0.9213654354449321, - "learning_rate": 3.1772241170646167e-06, - "loss": 0.8124, - "step": 3556 - }, - { - "epoch": 0.32078279298372187, - "grad_norm": 1.6069695727465265, - "learning_rate": 3.1767517818396334e-06, - "loss": 1.0662, - "step": 3557 - }, - { - "epoch": 0.32087297650719215, - "grad_norm": 1.2948046785142227, - "learning_rate": 3.1762793462076986e-06, - "loss": 1.1102, - "step": 3558 - }, - { - "epoch": 0.3209631600306624, - "grad_norm": 1.831683746110163, - "learning_rate": 3.1758068102091236e-06, - "loss": 0.8468, - "step": 3559 - }, - { - "epoch": 0.3210533435541327, - "grad_norm": 1.7115921092053643, - "learning_rate": 3.175334173884229e-06, - "loss": 0.9318, - "step": 3560 - }, - { - "epoch": 0.3211435270776029, - "grad_norm": 1.2320307429862325, - "learning_rate": 3.174861437273342e-06, - "loss": 0.9599, - "step": 3561 - }, - { - "epoch": 0.3212337106010732, - "grad_norm": 1.4104670131981567, - "learning_rate": 3.174388600416799e-06, - "loss": 0.9293, - "step": 3562 - }, - { - "epoch": 0.32132389412454343, - "grad_norm": 0.8163297699510034, - "learning_rate": 3.1739156633549445e-06, - "loss": 0.8113, - "step": 3563 - }, - { - "epoch": 0.3214140776480137, - "grad_norm": 1.3347929574670332, - "learning_rate": 3.173442626128133e-06, - "loss": 1.0222, - "step": 3564 - }, - { - "epoch": 0.32150426117148395, - "grad_norm": 1.7697459379069524, - "learning_rate": 3.1729694887767265e-06, - "loss": 1.0767, - "step": 3565 - }, - { - "epoch": 0.32159444469495424, - "grad_norm": 1.6657845626467493, - "learning_rate": 3.172496251341096e-06, - "loss": 0.9599, - "step": 3566 - }, - { - "epoch": 0.3216846282184245, - "grad_norm": 1.588077097849276, - "learning_rate": 3.172022913861619e-06, - "loss": 0.8201, - "step": 3567 - }, - { - "epoch": 0.32177481174189476, - "grad_norm": 1.756544003025318, - "learning_rate": 3.171549476378686e-06, - "loss": 0.9207, - "step": 3568 - }, - { - "epoch": 0.321864995265365, - "grad_norm": 1.5129456580741023, - "learning_rate": 3.1710759389326906e-06, - "loss": 0.9311, - "step": 3569 - }, - { - "epoch": 0.3219551787888353, - "grad_norm": 1.539029813312877, - "learning_rate": 3.1706023015640396e-06, - "loss": 0.9482, - "step": 3570 - }, - { - "epoch": 0.3220453623123055, - "grad_norm": 1.4440261477641725, - "learning_rate": 3.1701285643131453e-06, - "loss": 0.9817, - "step": 3571 - }, - { - "epoch": 0.3221355458357758, - "grad_norm": 1.6064252023104861, - "learning_rate": 3.16965472722043e-06, - "loss": 0.9747, - "step": 3572 - }, - { - "epoch": 0.32222572935924604, - "grad_norm": 1.7353306105190922, - "learning_rate": 3.169180790326324e-06, - "loss": 0.8843, - "step": 3573 - }, - { - "epoch": 0.3223159128827163, - "grad_norm": 1.3639595508506883, - "learning_rate": 3.168706753671266e-06, - "loss": 0.9724, - "step": 3574 - }, - { - "epoch": 0.3224060964061866, - "grad_norm": 1.311719071715942, - "learning_rate": 3.168232617295704e-06, - "loss": 0.8951, - "step": 3575 - }, - { - "epoch": 0.32249627992965685, - "grad_norm": 1.5247473929639834, - "learning_rate": 3.167758381240093e-06, - "loss": 0.9585, - "step": 3576 - }, - { - "epoch": 0.32258646345312714, - "grad_norm": 1.4051196747851924, - "learning_rate": 3.1672840455448978e-06, - "loss": 1.0038, - "step": 3577 - }, - { - "epoch": 0.32267664697659737, - "grad_norm": 1.3939506967875381, - "learning_rate": 3.166809610250592e-06, - "loss": 0.8994, - "step": 3578 - }, - { - "epoch": 0.32276683050006766, - "grad_norm": 1.5089575022280362, - "learning_rate": 3.166335075397656e-06, - "loss": 0.89, - "step": 3579 - }, - { - "epoch": 0.3228570140235379, - "grad_norm": 1.8547294348432601, - "learning_rate": 3.1658604410265808e-06, - "loss": 0.9297, - "step": 3580 - }, - { - "epoch": 0.3229471975470082, - "grad_norm": 1.2424575224279668, - "learning_rate": 3.1653857071778644e-06, - "loss": 0.8294, - "step": 3581 - }, - { - "epoch": 0.3230373810704784, - "grad_norm": 1.5000064372878228, - "learning_rate": 3.1649108738920133e-06, - "loss": 0.953, - "step": 3582 - }, - { - "epoch": 0.3231275645939487, - "grad_norm": 1.5191722141707944, - "learning_rate": 3.1644359412095432e-06, - "loss": 0.9714, - "step": 3583 - }, - { - "epoch": 0.32321774811741893, - "grad_norm": 5.0251756096515585, - "learning_rate": 3.163960909170978e-06, - "loss": 0.8299, - "step": 3584 - }, - { - "epoch": 0.3233079316408892, - "grad_norm": 2.0171328087328324, - "learning_rate": 3.1634857778168496e-06, - "loss": 0.8604, - "step": 3585 - }, - { - "epoch": 0.32339811516435946, - "grad_norm": 1.5201868050376879, - "learning_rate": 3.1630105471877002e-06, - "loss": 1.0027, - "step": 3586 - }, - { - "epoch": 0.32348829868782975, - "grad_norm": 1.5739764081454453, - "learning_rate": 3.162535217324077e-06, - "loss": 0.9453, - "step": 3587 - }, - { - "epoch": 0.3235784822113, - "grad_norm": 1.8251906752591238, - "learning_rate": 3.1620597882665393e-06, - "loss": 0.9939, - "step": 3588 - }, - { - "epoch": 0.32366866573477027, - "grad_norm": 1.5828914611341245, - "learning_rate": 3.1615842600556535e-06, - "loss": 1.0165, - "step": 3589 - }, - { - "epoch": 0.3237588492582405, - "grad_norm": 2.23421429676402, - "learning_rate": 3.1611086327319932e-06, - "loss": 0.9567, - "step": 3590 - }, - { - "epoch": 0.3238490327817108, - "grad_norm": 1.5578639872315596, - "learning_rate": 3.160632906336142e-06, - "loss": 1.019, - "step": 3591 - }, - { - "epoch": 0.323939216305181, - "grad_norm": 1.6790667429206043, - "learning_rate": 3.160157080908692e-06, - "loss": 0.9859, - "step": 3592 - }, - { - "epoch": 0.3240293998286513, - "grad_norm": 1.3274392994510567, - "learning_rate": 3.1596811564902426e-06, - "loss": 1.014, - "step": 3593 - }, - { - "epoch": 0.32411958335212154, - "grad_norm": 1.8552868563187543, - "learning_rate": 3.1592051331214023e-06, - "loss": 0.9039, - "step": 3594 - }, - { - "epoch": 0.32420976687559183, - "grad_norm": 1.413576833034068, - "learning_rate": 3.158729010842789e-06, - "loss": 0.9927, - "step": 3595 - }, - { - "epoch": 0.32429995039906206, - "grad_norm": 1.4611071319832103, - "learning_rate": 3.1582527896950266e-06, - "loss": 1.0154, - "step": 3596 - }, - { - "epoch": 0.32439013392253235, - "grad_norm": 1.653438340833757, - "learning_rate": 3.157776469718749e-06, - "loss": 0.9821, - "step": 3597 - }, - { - "epoch": 0.32448031744600264, - "grad_norm": 1.7120317417571251, - "learning_rate": 3.1573000509546004e-06, - "loss": 0.9533, - "step": 3598 - }, - { - "epoch": 0.3245705009694729, - "grad_norm": 1.353739974459424, - "learning_rate": 3.1568235334432296e-06, - "loss": 1.0122, - "step": 3599 - }, - { - "epoch": 0.32466068449294316, - "grad_norm": 1.5842961429030111, - "learning_rate": 3.1563469172252964e-06, - "loss": 1.0292, - "step": 3600 - }, - { - "epoch": 0.3247508680164134, - "grad_norm": 1.5810636599064047, - "learning_rate": 3.155870202341468e-06, - "loss": 1.0262, - "step": 3601 - }, - { - "epoch": 0.3248410515398837, - "grad_norm": 1.8807850919099405, - "learning_rate": 3.155393388832421e-06, - "loss": 0.9586, - "step": 3602 - }, - { - "epoch": 0.3249312350633539, - "grad_norm": 1.7102153072554989, - "learning_rate": 3.1549164767388386e-06, - "loss": 0.9812, - "step": 3603 - }, - { - "epoch": 0.3250214185868242, - "grad_norm": 1.3706733080712947, - "learning_rate": 3.1544394661014145e-06, - "loss": 0.9627, - "step": 3604 - }, - { - "epoch": 0.32511160211029444, - "grad_norm": 1.5614828995529582, - "learning_rate": 3.15396235696085e-06, - "loss": 0.8827, - "step": 3605 - }, - { - "epoch": 0.32520178563376473, - "grad_norm": 1.3197445974208684, - "learning_rate": 3.153485149357854e-06, - "loss": 0.9505, - "step": 3606 - }, - { - "epoch": 0.32529196915723496, - "grad_norm": 1.3951821397670627, - "learning_rate": 3.153007843333145e-06, - "loss": 0.9469, - "step": 3607 - }, - { - "epoch": 0.32538215268070525, - "grad_norm": 1.438337413845411, - "learning_rate": 3.152530438927449e-06, - "loss": 0.9848, - "step": 3608 - }, - { - "epoch": 0.3254723362041755, - "grad_norm": 1.4688515323688967, - "learning_rate": 3.1520529361815008e-06, - "loss": 1.0164, - "step": 3609 - }, - { - "epoch": 0.32556251972764577, - "grad_norm": 1.4609077164985056, - "learning_rate": 3.151575335136044e-06, - "loss": 1.0003, - "step": 3610 - }, - { - "epoch": 0.325652703251116, - "grad_norm": 1.433211356155244, - "learning_rate": 3.1510976358318298e-06, - "loss": 0.9489, - "step": 3611 - }, - { - "epoch": 0.3257428867745863, - "grad_norm": 1.5495982233891028, - "learning_rate": 3.1506198383096186e-06, - "loss": 0.9139, - "step": 3612 - }, - { - "epoch": 0.3258330702980565, - "grad_norm": 1.691416046607563, - "learning_rate": 3.150141942610178e-06, - "loss": 0.9785, - "step": 3613 - }, - { - "epoch": 0.3259232538215268, - "grad_norm": 0.7063487220227663, - "learning_rate": 3.1496639487742853e-06, - "loss": 0.8086, - "step": 3614 - }, - { - "epoch": 0.32601343734499705, - "grad_norm": 1.4135651530500528, - "learning_rate": 3.1491858568427247e-06, - "loss": 1.0037, - "step": 3615 - }, - { - "epoch": 0.32610362086846734, - "grad_norm": 1.4889324695857822, - "learning_rate": 3.1487076668562903e-06, - "loss": 1.013, - "step": 3616 - }, - { - "epoch": 0.32619380439193757, - "grad_norm": 1.566035657313529, - "learning_rate": 3.1482293788557847e-06, - "loss": 1.0055, - "step": 3617 - }, - { - "epoch": 0.32628398791540786, - "grad_norm": 1.386463281444468, - "learning_rate": 3.1477509928820165e-06, - "loss": 0.9378, - "step": 3618 - }, - { - "epoch": 0.3263741714388781, - "grad_norm": 1.4468037927625836, - "learning_rate": 3.147272508975805e-06, - "loss": 0.9611, - "step": 3619 - }, - { - "epoch": 0.3264643549623484, - "grad_norm": 0.8983070942320549, - "learning_rate": 3.1467939271779775e-06, - "loss": 0.8251, - "step": 3620 - }, - { - "epoch": 0.32655453848581867, - "grad_norm": 1.2023957754954315, - "learning_rate": 3.146315247529368e-06, - "loss": 0.9361, - "step": 3621 - }, - { - "epoch": 0.3266447220092889, - "grad_norm": 1.6322540350274162, - "learning_rate": 3.1458364700708212e-06, - "loss": 0.9271, - "step": 3622 - }, - { - "epoch": 0.3267349055327592, - "grad_norm": 1.4041268745843856, - "learning_rate": 3.1453575948431892e-06, - "loss": 0.992, - "step": 3623 - }, - { - "epoch": 0.3268250890562294, - "grad_norm": 1.5544601350852372, - "learning_rate": 3.144878621887331e-06, - "loss": 0.9658, - "step": 3624 - }, - { - "epoch": 0.3269152725796997, - "grad_norm": 1.5776280291030804, - "learning_rate": 3.1443995512441167e-06, - "loss": 1.0124, - "step": 3625 - }, - { - "epoch": 0.32700545610316994, - "grad_norm": 1.3970733894261997, - "learning_rate": 3.1439203829544224e-06, - "loss": 1.0386, - "step": 3626 - }, - { - "epoch": 0.32709563962664023, - "grad_norm": 1.3950151303882499, - "learning_rate": 3.143441117059133e-06, - "loss": 1.048, - "step": 3627 - }, - { - "epoch": 0.32718582315011047, - "grad_norm": 1.6606046183365117, - "learning_rate": 3.142961753599143e-06, - "loss": 0.915, - "step": 3628 - }, - { - "epoch": 0.32727600667358075, - "grad_norm": 1.330511574271577, - "learning_rate": 3.1424822926153543e-06, - "loss": 0.9377, - "step": 3629 - }, - { - "epoch": 0.327366190197051, - "grad_norm": 1.519574319184978, - "learning_rate": 3.142002734148676e-06, - "loss": 1.0051, - "step": 3630 - }, - { - "epoch": 0.3274563737205213, - "grad_norm": 1.567993881563978, - "learning_rate": 3.141523078240028e-06, - "loss": 0.9938, - "step": 3631 - }, - { - "epoch": 0.3275465572439915, - "grad_norm": 1.5707886386789771, - "learning_rate": 3.1410433249303366e-06, - "loss": 1.0181, - "step": 3632 - }, - { - "epoch": 0.3276367407674618, - "grad_norm": 1.444650034252609, - "learning_rate": 3.1405634742605366e-06, - "loss": 1.0478, - "step": 3633 - }, - { - "epoch": 0.32772692429093203, - "grad_norm": 1.632713789442995, - "learning_rate": 3.1400835262715727e-06, - "loss": 0.9991, - "step": 3634 - }, - { - "epoch": 0.3278171078144023, - "grad_norm": 1.537816429140916, - "learning_rate": 3.139603481004396e-06, - "loss": 0.9554, - "step": 3635 - }, - { - "epoch": 0.32790729133787255, - "grad_norm": 1.6080082395162192, - "learning_rate": 3.139123338499966e-06, - "loss": 0.978, - "step": 3636 - }, - { - "epoch": 0.32799747486134284, - "grad_norm": 1.537252921123524, - "learning_rate": 3.1386430987992524e-06, - "loss": 1.0031, - "step": 3637 - }, - { - "epoch": 0.3280876583848131, - "grad_norm": 1.6865085232267953, - "learning_rate": 3.1381627619432307e-06, - "loss": 0.9224, - "step": 3638 - }, - { - "epoch": 0.32817784190828336, - "grad_norm": 2.0504308201343293, - "learning_rate": 3.1376823279728864e-06, - "loss": 0.975, - "step": 3639 - }, - { - "epoch": 0.3282680254317536, - "grad_norm": 1.4604013040399384, - "learning_rate": 3.1372017969292125e-06, - "loss": 1.0453, - "step": 3640 - }, - { - "epoch": 0.3283582089552239, - "grad_norm": 1.4319239566314523, - "learning_rate": 3.136721168853211e-06, - "loss": 0.953, - "step": 3641 - }, - { - "epoch": 0.3284483924786941, - "grad_norm": 1.5636837861872248, - "learning_rate": 3.1362404437858924e-06, - "loss": 0.9361, - "step": 3642 - }, - { - "epoch": 0.3285385760021644, - "grad_norm": 1.4445639656616323, - "learning_rate": 3.135759621768273e-06, - "loss": 1.0315, - "step": 3643 - }, - { - "epoch": 0.32862875952563464, - "grad_norm": 1.4468711902411708, - "learning_rate": 3.13527870284138e-06, - "loss": 1.016, - "step": 3644 - }, - { - "epoch": 0.32871894304910493, - "grad_norm": 1.86181890335936, - "learning_rate": 3.134797687046249e-06, - "loss": 1.0276, - "step": 3645 - }, - { - "epoch": 0.3288091265725752, - "grad_norm": 1.6822769432092817, - "learning_rate": 3.1343165744239218e-06, - "loss": 0.9077, - "step": 3646 - }, - { - "epoch": 0.32889931009604545, - "grad_norm": 1.680147755573986, - "learning_rate": 3.13383536501545e-06, - "loss": 0.9785, - "step": 3647 - }, - { - "epoch": 0.32898949361951574, - "grad_norm": 1.3608885921620455, - "learning_rate": 3.133354058861893e-06, - "loss": 0.993, - "step": 3648 - }, - { - "epoch": 0.32907967714298597, - "grad_norm": 1.431433523480628, - "learning_rate": 3.132872656004318e-06, - "loss": 0.9392, - "step": 3649 - }, - { - "epoch": 0.32916986066645626, - "grad_norm": 1.4510484029372095, - "learning_rate": 3.132391156483802e-06, - "loss": 0.8708, - "step": 3650 - }, - { - "epoch": 0.3292600441899265, - "grad_norm": 1.5203103076256435, - "learning_rate": 3.131909560341428e-06, - "loss": 0.9312, - "step": 3651 - }, - { - "epoch": 0.3293502277133968, - "grad_norm": 1.3786478205677213, - "learning_rate": 3.1314278676182893e-06, - "loss": 0.916, - "step": 3652 - }, - { - "epoch": 0.329440411236867, - "grad_norm": 1.4416050554167819, - "learning_rate": 3.130946078355486e-06, - "loss": 1.04, - "step": 3653 - }, - { - "epoch": 0.3295305947603373, - "grad_norm": 1.2754667998344207, - "learning_rate": 3.130464192594128e-06, - "loss": 1.0058, - "step": 3654 - }, - { - "epoch": 0.32962077828380754, - "grad_norm": 1.2941886010965056, - "learning_rate": 3.1299822103753315e-06, - "loss": 0.8899, - "step": 3655 - }, - { - "epoch": 0.3297109618072778, - "grad_norm": 1.5727611827001033, - "learning_rate": 3.1295001317402217e-06, - "loss": 0.9688, - "step": 3656 - }, - { - "epoch": 0.32980114533074806, - "grad_norm": 1.7217147226387777, - "learning_rate": 3.1290179567299335e-06, - "loss": 0.9091, - "step": 3657 - }, - { - "epoch": 0.32989132885421835, - "grad_norm": 1.2137521334785544, - "learning_rate": 3.128535685385607e-06, - "loss": 0.9936, - "step": 3658 - }, - { - "epoch": 0.3299815123776886, - "grad_norm": 1.3610963993618206, - "learning_rate": 3.1280533177483935e-06, - "loss": 1.0511, - "step": 3659 - }, - { - "epoch": 0.33007169590115887, - "grad_norm": 1.377798570305986, - "learning_rate": 3.127570853859451e-06, - "loss": 0.9839, - "step": 3660 - }, - { - "epoch": 0.3301618794246291, - "grad_norm": 1.4073127545466937, - "learning_rate": 3.1270882937599456e-06, - "loss": 0.8574, - "step": 3661 - }, - { - "epoch": 0.3302520629480994, - "grad_norm": 1.4219089545922186, - "learning_rate": 3.1266056374910532e-06, - "loss": 0.9912, - "step": 3662 - }, - { - "epoch": 0.3303422464715696, - "grad_norm": 2.2785299100907097, - "learning_rate": 3.126122885093955e-06, - "loss": 0.8677, - "step": 3663 - }, - { - "epoch": 0.3304324299950399, - "grad_norm": 1.637808716770891, - "learning_rate": 3.1256400366098427e-06, - "loss": 0.9592, - "step": 3664 - }, - { - "epoch": 0.33052261351851014, - "grad_norm": 1.51574124549176, - "learning_rate": 3.125157092079916e-06, - "loss": 0.9277, - "step": 3665 - }, - { - "epoch": 0.33061279704198043, - "grad_norm": 2.1118655724948647, - "learning_rate": 3.1246740515453824e-06, - "loss": 0.9469, - "step": 3666 - }, - { - "epoch": 0.33070298056545067, - "grad_norm": 1.3579544888652733, - "learning_rate": 3.124190915047457e-06, - "loss": 0.9025, - "step": 3667 - }, - { - "epoch": 0.33079316408892095, - "grad_norm": 1.5650947388562326, - "learning_rate": 3.123707682627364e-06, - "loss": 0.9407, - "step": 3668 - }, - { - "epoch": 0.33088334761239124, - "grad_norm": 1.5402523031020936, - "learning_rate": 3.1232243543263356e-06, - "loss": 0.9446, - "step": 3669 - }, - { - "epoch": 0.3309735311358615, - "grad_norm": 1.6113201349233717, - "learning_rate": 3.1227409301856122e-06, - "loss": 0.873, - "step": 3670 - }, - { - "epoch": 0.33106371465933176, - "grad_norm": 1.2582294886520449, - "learning_rate": 3.1222574102464413e-06, - "loss": 0.9486, - "step": 3671 - }, - { - "epoch": 0.331153898182802, - "grad_norm": 1.324115760421675, - "learning_rate": 3.12177379455008e-06, - "loss": 0.9977, - "step": 3672 - }, - { - "epoch": 0.3312440817062723, - "grad_norm": 1.3774835958040623, - "learning_rate": 3.121290083137794e-06, - "loss": 0.9615, - "step": 3673 - }, - { - "epoch": 0.3313342652297425, - "grad_norm": 1.4163226196040661, - "learning_rate": 3.1208062760508547e-06, - "loss": 0.9453, - "step": 3674 - }, - { - "epoch": 0.3314244487532128, - "grad_norm": 1.3010524432690689, - "learning_rate": 3.1203223733305438e-06, - "loss": 0.905, - "step": 3675 - }, - { - "epoch": 0.33151463227668304, - "grad_norm": 0.7475260622208036, - "learning_rate": 3.1198383750181512e-06, - "loss": 0.7856, - "step": 3676 - }, - { - "epoch": 0.33160481580015333, - "grad_norm": 3.585717952880547, - "learning_rate": 3.1193542811549734e-06, - "loss": 0.8887, - "step": 3677 - }, - { - "epoch": 0.33169499932362356, - "grad_norm": 1.415374989779859, - "learning_rate": 3.1188700917823166e-06, - "loss": 0.8914, - "step": 3678 - }, - { - "epoch": 0.33178518284709385, - "grad_norm": 0.862856972117168, - "learning_rate": 3.1183858069414937e-06, - "loss": 0.8435, - "step": 3679 - }, - { - "epoch": 0.3318753663705641, - "grad_norm": 2.0389886016901286, - "learning_rate": 3.117901426673827e-06, - "loss": 1.0255, - "step": 3680 - }, - { - "epoch": 0.3319655498940344, - "grad_norm": 1.5051816610153275, - "learning_rate": 3.1174169510206466e-06, - "loss": 0.929, - "step": 3681 - }, - { - "epoch": 0.3320557334175046, - "grad_norm": 1.4004373582519918, - "learning_rate": 3.1169323800232908e-06, - "loss": 0.967, - "step": 3682 - }, - { - "epoch": 0.3321459169409749, - "grad_norm": 1.6488902839633035, - "learning_rate": 3.1164477137231054e-06, - "loss": 0.9956, - "step": 3683 - }, - { - "epoch": 0.3322361004644451, - "grad_norm": 1.61437020484605, - "learning_rate": 3.115962952161445e-06, - "loss": 0.9682, - "step": 3684 - }, - { - "epoch": 0.3323262839879154, - "grad_norm": 1.867411005537261, - "learning_rate": 3.1154780953796727e-06, - "loss": 1.0021, - "step": 3685 - }, - { - "epoch": 0.33241646751138565, - "grad_norm": 1.4426596549551804, - "learning_rate": 3.114993143419158e-06, - "loss": 0.8717, - "step": 3686 - }, - { - "epoch": 0.33250665103485594, - "grad_norm": 1.1679313424030207, - "learning_rate": 3.1145080963212806e-06, - "loss": 0.871, - "step": 3687 - }, - { - "epoch": 0.33259683455832617, - "grad_norm": 1.2228047826081332, - "learning_rate": 3.114022954127427e-06, - "loss": 0.9696, - "step": 3688 - }, - { - "epoch": 0.33268701808179646, - "grad_norm": 1.370876066745744, - "learning_rate": 3.1135377168789923e-06, - "loss": 1.1674, - "step": 3689 - }, - { - "epoch": 0.3327772016052667, - "grad_norm": 1.3378552063200628, - "learning_rate": 3.1130523846173803e-06, - "loss": 1.0239, - "step": 3690 - }, - { - "epoch": 0.332867385128737, - "grad_norm": 1.567991144602935, - "learning_rate": 3.1125669573840006e-06, - "loss": 0.9173, - "step": 3691 - }, - { - "epoch": 0.3329575686522072, - "grad_norm": 1.5889273586966144, - "learning_rate": 3.112081435220274e-06, - "loss": 0.9085, - "step": 3692 - }, - { - "epoch": 0.3330477521756775, - "grad_norm": 1.4696022158517261, - "learning_rate": 3.111595818167627e-06, - "loss": 0.9467, - "step": 3693 - }, - { - "epoch": 0.3331379356991478, - "grad_norm": 1.5498466846566348, - "learning_rate": 3.1111101062674953e-06, - "loss": 1.0198, - "step": 3694 - }, - { - "epoch": 0.333228119222618, - "grad_norm": 1.24875640997325, - "learning_rate": 3.1106242995613233e-06, - "loss": 0.9472, - "step": 3695 - }, - { - "epoch": 0.3333183027460883, - "grad_norm": 1.3236324846735337, - "learning_rate": 3.1101383980905616e-06, - "loss": 0.9613, - "step": 3696 - }, - { - "epoch": 0.33340848626955855, - "grad_norm": 0.8053253386558039, - "learning_rate": 3.109652401896671e-06, - "loss": 0.7809, - "step": 3697 - }, - { - "epoch": 0.33349866979302883, - "grad_norm": 1.2819028912155217, - "learning_rate": 3.109166311021119e-06, - "loss": 1.0078, - "step": 3698 - }, - { - "epoch": 0.33358885331649907, - "grad_norm": 1.708036637321988, - "learning_rate": 3.1086801255053807e-06, - "loss": 1.0055, - "step": 3699 - }, - { - "epoch": 0.33367903683996936, - "grad_norm": 1.263144004033282, - "learning_rate": 3.108193845390942e-06, - "loss": 1.0042, - "step": 3700 - }, - { - "epoch": 0.3337692203634396, - "grad_norm": 1.2114111189471917, - "learning_rate": 3.1077074707192933e-06, - "loss": 0.9388, - "step": 3701 - }, - { - "epoch": 0.3338594038869099, - "grad_norm": 1.415320832372637, - "learning_rate": 3.1072210015319353e-06, - "loss": 0.9822, - "step": 3702 - }, - { - "epoch": 0.3339495874103801, - "grad_norm": 1.4925722754532802, - "learning_rate": 3.106734437870376e-06, - "loss": 0.9197, - "step": 3703 - }, - { - "epoch": 0.3340397709338504, - "grad_norm": 1.6187024967484718, - "learning_rate": 3.1062477797761327e-06, - "loss": 0.9032, - "step": 3704 - }, - { - "epoch": 0.33412995445732063, - "grad_norm": 1.698841202107312, - "learning_rate": 3.105761027290729e-06, - "loss": 1.0086, - "step": 3705 - }, - { - "epoch": 0.3342201379807909, - "grad_norm": 1.543099776873394, - "learning_rate": 3.105274180455697e-06, - "loss": 0.9289, - "step": 3706 - }, - { - "epoch": 0.33431032150426115, - "grad_norm": 1.367465966339894, - "learning_rate": 3.1047872393125775e-06, - "loss": 0.9884, - "step": 3707 - }, - { - "epoch": 0.33440050502773144, - "grad_norm": 0.7590091341999451, - "learning_rate": 3.1043002039029186e-06, - "loss": 0.8172, - "step": 3708 - }, - { - "epoch": 0.3344906885512017, - "grad_norm": 1.3401837153965492, - "learning_rate": 3.1038130742682782e-06, - "loss": 0.9513, - "step": 3709 - }, - { - "epoch": 0.33458087207467196, - "grad_norm": 1.372457841875378, - "learning_rate": 3.103325850450219e-06, - "loss": 0.9385, - "step": 3710 - }, - { - "epoch": 0.3346710555981422, - "grad_norm": 1.455667875452037, - "learning_rate": 3.1028385324903154e-06, - "loss": 1.0047, - "step": 3711 - }, - { - "epoch": 0.3347612391216125, - "grad_norm": 1.3095080924845526, - "learning_rate": 3.1023511204301465e-06, - "loss": 0.9648, - "step": 3712 - }, - { - "epoch": 0.3348514226450827, - "grad_norm": 1.264492375822602, - "learning_rate": 3.1018636143113022e-06, - "loss": 0.9298, - "step": 3713 - }, - { - "epoch": 0.334941606168553, - "grad_norm": 1.3176635714481, - "learning_rate": 3.1013760141753787e-06, - "loss": 0.8711, - "step": 3714 - }, - { - "epoch": 0.33503178969202324, - "grad_norm": 1.505033947127717, - "learning_rate": 3.100888320063981e-06, - "loss": 0.9322, - "step": 3715 - }, - { - "epoch": 0.33512197321549353, - "grad_norm": 1.2774225613140366, - "learning_rate": 3.100400532018721e-06, - "loss": 0.9638, - "step": 3716 - }, - { - "epoch": 0.3352121567389638, - "grad_norm": 1.4865513156536694, - "learning_rate": 3.0999126500812204e-06, - "loss": 1.0265, - "step": 3717 - }, - { - "epoch": 0.33530234026243405, - "grad_norm": 0.8395057885366314, - "learning_rate": 3.0994246742931076e-06, - "loss": 0.8663, - "step": 3718 - }, - { - "epoch": 0.33539252378590434, - "grad_norm": 1.3753281981914276, - "learning_rate": 3.098936604696019e-06, - "loss": 0.9038, - "step": 3719 - }, - { - "epoch": 0.3354827073093746, - "grad_norm": 1.569295525401421, - "learning_rate": 3.0984484413316e-06, - "loss": 0.9994, - "step": 3720 - }, - { - "epoch": 0.33557289083284486, - "grad_norm": 1.5306575173864387, - "learning_rate": 3.0979601842415033e-06, - "loss": 0.9466, - "step": 3721 - }, - { - "epoch": 0.3356630743563151, - "grad_norm": 1.3529495002107597, - "learning_rate": 3.0974718334673896e-06, - "loss": 0.8808, - "step": 3722 - }, - { - "epoch": 0.3357532578797854, - "grad_norm": 1.311611874141616, - "learning_rate": 3.0969833890509282e-06, - "loss": 0.9581, - "step": 3723 - }, - { - "epoch": 0.3358434414032556, - "grad_norm": 1.4334890550848212, - "learning_rate": 3.096494851033795e-06, - "loss": 0.958, - "step": 3724 - }, - { - "epoch": 0.3359336249267259, - "grad_norm": 1.5362028868662136, - "learning_rate": 3.0960062194576747e-06, - "loss": 0.891, - "step": 3725 - }, - { - "epoch": 0.33602380845019614, - "grad_norm": 1.440853436743282, - "learning_rate": 3.0955174943642606e-06, - "loss": 0.9102, - "step": 3726 - }, - { - "epoch": 0.3361139919736664, - "grad_norm": 1.2922543398857487, - "learning_rate": 3.0950286757952534e-06, - "loss": 0.8838, - "step": 3727 - }, - { - "epoch": 0.33620417549713666, - "grad_norm": 1.3694262675965518, - "learning_rate": 3.0945397637923617e-06, - "loss": 1.0272, - "step": 3728 - }, - { - "epoch": 0.33629435902060695, - "grad_norm": 1.7149831936738449, - "learning_rate": 3.0940507583973025e-06, - "loss": 0.9671, - "step": 3729 - }, - { - "epoch": 0.3363845425440772, - "grad_norm": 1.3492847720251577, - "learning_rate": 3.093561659651799e-06, - "loss": 0.9335, - "step": 3730 - }, - { - "epoch": 0.33647472606754747, - "grad_norm": 1.3480570321597158, - "learning_rate": 3.093072467597586e-06, - "loss": 0.8934, - "step": 3731 - }, - { - "epoch": 0.3365649095910177, - "grad_norm": 0.942990849069266, - "learning_rate": 3.092583182276402e-06, - "loss": 0.8286, - "step": 3732 - }, - { - "epoch": 0.336655093114488, - "grad_norm": 0.7042187554846324, - "learning_rate": 3.092093803729997e-06, - "loss": 0.8265, - "step": 3733 - }, - { - "epoch": 0.3367452766379582, - "grad_norm": 1.4974612368245919, - "learning_rate": 3.0916043320001264e-06, - "loss": 0.9786, - "step": 3734 - }, - { - "epoch": 0.3368354601614285, - "grad_norm": 1.5857042413562807, - "learning_rate": 3.0911147671285557e-06, - "loss": 0.9732, - "step": 3735 - }, - { - "epoch": 0.33692564368489875, - "grad_norm": 1.4871490256792885, - "learning_rate": 3.0906251091570565e-06, - "loss": 0.9885, - "step": 3736 - }, - { - "epoch": 0.33701582720836903, - "grad_norm": 2.3304278358297763, - "learning_rate": 3.0901353581274094e-06, - "loss": 0.9772, - "step": 3737 - }, - { - "epoch": 0.33710601073183927, - "grad_norm": 1.2729155049836414, - "learning_rate": 3.089645514081402e-06, - "loss": 0.8433, - "step": 3738 - }, - { - "epoch": 0.33719619425530956, - "grad_norm": 2.1871907696820085, - "learning_rate": 3.0891555770608323e-06, - "loss": 0.8886, - "step": 3739 - }, - { - "epoch": 0.33728637777877984, - "grad_norm": 1.5628018659822187, - "learning_rate": 3.088665547107503e-06, - "loss": 0.9912, - "step": 3740 - }, - { - "epoch": 0.3373765613022501, - "grad_norm": 1.3061057508482943, - "learning_rate": 3.0881754242632254e-06, - "loss": 1.0434, - "step": 3741 - }, - { - "epoch": 0.33746674482572037, - "grad_norm": 1.2498528870798098, - "learning_rate": 3.0876852085698213e-06, - "loss": 1.0069, - "step": 3742 - }, - { - "epoch": 0.3375569283491906, - "grad_norm": 1.581157452434462, - "learning_rate": 3.087194900069117e-06, - "loss": 0.9875, - "step": 3743 - }, - { - "epoch": 0.3376471118726609, - "grad_norm": 1.4394751538023527, - "learning_rate": 3.08670449880295e-06, - "loss": 1.0156, - "step": 3744 - }, - { - "epoch": 0.3377372953961311, - "grad_norm": 1.4688966556134697, - "learning_rate": 3.086214004813163e-06, - "loss": 0.9879, - "step": 3745 - }, - { - "epoch": 0.3378274789196014, - "grad_norm": 1.3712925780370386, - "learning_rate": 3.0857234181416074e-06, - "loss": 1.0299, - "step": 3746 - }, - { - "epoch": 0.33791766244307164, - "grad_norm": 1.422062368937907, - "learning_rate": 3.085232738830143e-06, - "loss": 1.0112, - "step": 3747 - }, - { - "epoch": 0.33800784596654193, - "grad_norm": 1.5792360077516348, - "learning_rate": 3.084741966920638e-06, - "loss": 1.0254, - "step": 3748 - }, - { - "epoch": 0.33809802949001216, - "grad_norm": 1.3077117224801262, - "learning_rate": 3.084251102454966e-06, - "loss": 0.9209, - "step": 3749 - }, - { - "epoch": 0.33818821301348245, - "grad_norm": 0.8274201686532859, - "learning_rate": 3.083760145475013e-06, - "loss": 0.8353, - "step": 3750 - }, - { - "epoch": 0.3382783965369527, - "grad_norm": 1.3894164588086255, - "learning_rate": 3.0832690960226678e-06, - "loss": 0.9107, - "step": 3751 - }, - { - "epoch": 0.338368580060423, - "grad_norm": 1.610361482070343, - "learning_rate": 3.08277795413983e-06, - "loss": 0.9356, - "step": 3752 - }, - { - "epoch": 0.3384587635838932, - "grad_norm": 1.414211286441444, - "learning_rate": 3.0822867198684073e-06, - "loss": 1.0128, - "step": 3753 - }, - { - "epoch": 0.3385489471073635, - "grad_norm": 1.6081326328160197, - "learning_rate": 3.081795393250314e-06, - "loss": 0.969, - "step": 3754 - }, - { - "epoch": 0.33863913063083373, - "grad_norm": 3.0047379273752033, - "learning_rate": 3.081303974327473e-06, - "loss": 0.9302, - "step": 3755 - }, - { - "epoch": 0.338729314154304, - "grad_norm": 1.4433076717629796, - "learning_rate": 3.080812463141814e-06, - "loss": 0.9598, - "step": 3756 - }, - { - "epoch": 0.33881949767777425, - "grad_norm": 1.2480276760003626, - "learning_rate": 3.080320859735276e-06, - "loss": 0.8321, - "step": 3757 - }, - { - "epoch": 0.33890968120124454, - "grad_norm": 1.584325939374434, - "learning_rate": 3.079829164149806e-06, - "loss": 1.0131, - "step": 3758 - }, - { - "epoch": 0.3389998647247148, - "grad_norm": 1.6999542398464793, - "learning_rate": 3.0793373764273573e-06, - "loss": 0.9504, - "step": 3759 - }, - { - "epoch": 0.33909004824818506, - "grad_norm": 1.5253696806540673, - "learning_rate": 3.078845496609892e-06, - "loss": 0.9909, - "step": 3760 - }, - { - "epoch": 0.3391802317716553, - "grad_norm": 1.5025059906495768, - "learning_rate": 3.078353524739381e-06, - "loss": 1.0491, - "step": 3761 - }, - { - "epoch": 0.3392704152951256, - "grad_norm": 1.3548424232889076, - "learning_rate": 3.077861460857801e-06, - "loss": 1.0439, - "step": 3762 - }, - { - "epoch": 0.3393605988185958, - "grad_norm": 1.2696969731076295, - "learning_rate": 3.077369305007138e-06, - "loss": 0.9828, - "step": 3763 - }, - { - "epoch": 0.3394507823420661, - "grad_norm": 1.3025890538090006, - "learning_rate": 3.0768770572293852e-06, - "loss": 1.0025, - "step": 3764 - }, - { - "epoch": 0.3395409658655364, - "grad_norm": 1.5600070903078975, - "learning_rate": 3.0763847175665437e-06, - "loss": 1.0421, - "step": 3765 - }, - { - "epoch": 0.3396311493890066, - "grad_norm": 1.529465082693445, - "learning_rate": 3.0758922860606237e-06, - "loss": 0.9078, - "step": 3766 - }, - { - "epoch": 0.3397213329124769, - "grad_norm": 1.3925269760861017, - "learning_rate": 3.0753997627536404e-06, - "loss": 0.8993, - "step": 3767 - }, - { - "epoch": 0.33981151643594715, - "grad_norm": 1.5908562063121818, - "learning_rate": 3.0749071476876203e-06, - "loss": 0.9223, - "step": 3768 - }, - { - "epoch": 0.33990169995941744, - "grad_norm": 1.3708477179904026, - "learning_rate": 3.0744144409045952e-06, - "loss": 0.9337, - "step": 3769 - }, - { - "epoch": 0.33999188348288767, - "grad_norm": 1.639254333499674, - "learning_rate": 3.0739216424466056e-06, - "loss": 0.9805, - "step": 3770 - }, - { - "epoch": 0.34008206700635796, - "grad_norm": 1.7659673443299069, - "learning_rate": 3.0734287523557002e-06, - "loss": 0.9418, - "step": 3771 - }, - { - "epoch": 0.3401722505298282, - "grad_norm": 1.6781388351466828, - "learning_rate": 3.0729357706739348e-06, - "loss": 0.9191, - "step": 3772 - }, - { - "epoch": 0.3402624340532985, - "grad_norm": 1.489453912550195, - "learning_rate": 3.0724426974433737e-06, - "loss": 0.9556, - "step": 3773 - }, - { - "epoch": 0.3403526175767687, - "grad_norm": 1.1932311043736767, - "learning_rate": 3.0719495327060874e-06, - "loss": 0.9348, - "step": 3774 - }, - { - "epoch": 0.340442801100239, - "grad_norm": 1.4049456162974487, - "learning_rate": 3.071456276504157e-06, - "loss": 0.9677, - "step": 3775 - }, - { - "epoch": 0.34053298462370923, - "grad_norm": 1.4416543390412166, - "learning_rate": 3.070962928879669e-06, - "loss": 0.9625, - "step": 3776 - }, - { - "epoch": 0.3406231681471795, - "grad_norm": 1.5327134437712626, - "learning_rate": 3.0704694898747185e-06, - "loss": 0.9829, - "step": 3777 - }, - { - "epoch": 0.34071335167064976, - "grad_norm": 1.4442534819738124, - "learning_rate": 3.069975959531408e-06, - "loss": 1.0914, - "step": 3778 - }, - { - "epoch": 0.34080353519412004, - "grad_norm": 0.7990886011735383, - "learning_rate": 3.06948233789185e-06, - "loss": 0.8684, - "step": 3779 - }, - { - "epoch": 0.3408937187175903, - "grad_norm": 1.3985248730179343, - "learning_rate": 3.0689886249981614e-06, - "loss": 0.98, - "step": 3780 - }, - { - "epoch": 0.34098390224106057, - "grad_norm": 1.4250328294418984, - "learning_rate": 3.0684948208924693e-06, - "loss": 0.9856, - "step": 3781 - }, - { - "epoch": 0.3410740857645308, - "grad_norm": 1.654327860680518, - "learning_rate": 3.068000925616907e-06, - "loss": 0.9898, - "step": 3782 - }, - { - "epoch": 0.3411642692880011, - "grad_norm": 1.7032345377842988, - "learning_rate": 3.067506939213617e-06, - "loss": 0.9617, - "step": 3783 - }, - { - "epoch": 0.3412544528114713, - "grad_norm": 1.2764221506462747, - "learning_rate": 3.0670128617247493e-06, - "loss": 0.9685, - "step": 3784 - }, - { - "epoch": 0.3413446363349416, - "grad_norm": 1.2310027886087176, - "learning_rate": 3.06651869319246e-06, - "loss": 0.881, - "step": 3785 - }, - { - "epoch": 0.34143481985841184, - "grad_norm": 1.4874427015023757, - "learning_rate": 3.0660244336589154e-06, - "loss": 0.9632, - "step": 3786 - }, - { - "epoch": 0.34152500338188213, - "grad_norm": 1.3471647389045869, - "learning_rate": 3.065530083166288e-06, - "loss": 0.979, - "step": 3787 - }, - { - "epoch": 0.3416151869053524, - "grad_norm": 1.2722654507209112, - "learning_rate": 3.0650356417567586e-06, - "loss": 0.926, - "step": 3788 - }, - { - "epoch": 0.34170537042882265, - "grad_norm": 1.4406602371110007, - "learning_rate": 3.0645411094725156e-06, - "loss": 1.0084, - "step": 3789 - }, - { - "epoch": 0.34179555395229294, - "grad_norm": 1.4231635323100664, - "learning_rate": 3.0640464863557556e-06, - "loss": 0.96, - "step": 3790 - }, - { - "epoch": 0.3418857374757632, - "grad_norm": 1.4821079811418203, - "learning_rate": 3.063551772448682e-06, - "loss": 0.9975, - "step": 3791 - }, - { - "epoch": 0.34197592099923346, - "grad_norm": 1.6468396148644202, - "learning_rate": 3.0630569677935075e-06, - "loss": 0.894, - "step": 3792 - }, - { - "epoch": 0.3420661045227037, - "grad_norm": 1.589532470222679, - "learning_rate": 3.06256207243245e-06, - "loss": 1.0442, - "step": 3793 - }, - { - "epoch": 0.342156288046174, - "grad_norm": 0.7922120641628115, - "learning_rate": 3.0620670864077385e-06, - "loss": 0.8093, - "step": 3794 - }, - { - "epoch": 0.3422464715696442, - "grad_norm": 0.6524368493809303, - "learning_rate": 3.0615720097616063e-06, - "loss": 0.8021, - "step": 3795 - }, - { - "epoch": 0.3423366550931145, - "grad_norm": 1.5207896378741557, - "learning_rate": 3.0610768425362967e-06, - "loss": 0.9232, - "step": 3796 - }, - { - "epoch": 0.34242683861658474, - "grad_norm": 1.497163793304097, - "learning_rate": 3.0605815847740603e-06, - "loss": 0.946, - "step": 3797 - }, - { - "epoch": 0.342517022140055, - "grad_norm": 1.863414383776371, - "learning_rate": 3.0600862365171553e-06, - "loss": 0.9328, - "step": 3798 - }, - { - "epoch": 0.34260720566352526, - "grad_norm": 1.4910259419364869, - "learning_rate": 3.0595907978078474e-06, - "loss": 0.9437, - "step": 3799 - }, - { - "epoch": 0.34269738918699555, - "grad_norm": 1.3985405995814613, - "learning_rate": 3.05909526868841e-06, - "loss": 0.9385, - "step": 3800 - }, - { - "epoch": 0.3427875727104658, - "grad_norm": 1.6696762885935124, - "learning_rate": 3.0585996492011243e-06, - "loss": 0.9719, - "step": 3801 - }, - { - "epoch": 0.34287775623393607, - "grad_norm": 1.7322351813115369, - "learning_rate": 3.05810393938828e-06, - "loss": 1.0057, - "step": 3802 - }, - { - "epoch": 0.3429679397574063, - "grad_norm": 1.5087729443920619, - "learning_rate": 3.0576081392921723e-06, - "loss": 0.8835, - "step": 3803 - }, - { - "epoch": 0.3430581232808766, - "grad_norm": 1.4311713351158042, - "learning_rate": 3.057112248955107e-06, - "loss": 1.015, - "step": 3804 - }, - { - "epoch": 0.3431483068043468, - "grad_norm": 1.6761831944158327, - "learning_rate": 3.0566162684193963e-06, - "loss": 0.9863, - "step": 3805 - }, - { - "epoch": 0.3432384903278171, - "grad_norm": 1.2201722966928228, - "learning_rate": 3.056120197727359e-06, - "loss": 0.9121, - "step": 3806 - }, - { - "epoch": 0.34332867385128735, - "grad_norm": 1.5858274525012, - "learning_rate": 3.0556240369213236e-06, - "loss": 0.9561, - "step": 3807 - }, - { - "epoch": 0.34341885737475764, - "grad_norm": 1.8553734042114476, - "learning_rate": 3.055127786043624e-06, - "loss": 0.8728, - "step": 3808 - }, - { - "epoch": 0.34350904089822787, - "grad_norm": 1.4229669255791275, - "learning_rate": 3.054631445136604e-06, - "loss": 1.0504, - "step": 3809 - }, - { - "epoch": 0.34359922442169816, - "grad_norm": 2.606105103341357, - "learning_rate": 3.0541350142426147e-06, - "loss": 0.977, - "step": 3810 - }, - { - "epoch": 0.3436894079451684, - "grad_norm": 1.249782447956871, - "learning_rate": 3.053638493404012e-06, - "loss": 0.9187, - "step": 3811 - }, - { - "epoch": 0.3437795914686387, - "grad_norm": 1.3235717813404446, - "learning_rate": 3.0531418826631643e-06, - "loss": 0.9005, - "step": 3812 - }, - { - "epoch": 0.34386977499210897, - "grad_norm": 1.8094108169515686, - "learning_rate": 3.052645182062444e-06, - "loss": 1.0506, - "step": 3813 - }, - { - "epoch": 0.3439599585155792, - "grad_norm": 1.4903839556216916, - "learning_rate": 3.0521483916442324e-06, - "loss": 0.9991, - "step": 3814 - }, - { - "epoch": 0.3440501420390495, - "grad_norm": 1.3256332809427, - "learning_rate": 3.0516515114509183e-06, - "loss": 0.9582, - "step": 3815 - }, - { - "epoch": 0.3441403255625197, - "grad_norm": 1.2690965113512453, - "learning_rate": 3.0511545415249e-06, - "loss": 0.9147, - "step": 3816 - }, - { - "epoch": 0.34423050908599, - "grad_norm": 1.4424750441271723, - "learning_rate": 3.050657481908579e-06, - "loss": 0.9932, - "step": 3817 - }, - { - "epoch": 0.34432069260946024, - "grad_norm": 1.503733122137138, - "learning_rate": 3.0501603326443677e-06, - "loss": 1.0702, - "step": 3818 - }, - { - "epoch": 0.34441087613293053, - "grad_norm": 1.3253816432856544, - "learning_rate": 3.049663093774687e-06, - "loss": 0.9548, - "step": 3819 - }, - { - "epoch": 0.34450105965640077, - "grad_norm": 1.6165778443754755, - "learning_rate": 3.0491657653419643e-06, - "loss": 0.8651, - "step": 3820 - }, - { - "epoch": 0.34459124317987105, - "grad_norm": 1.4302581315378704, - "learning_rate": 3.0486683473886325e-06, - "loss": 1.0157, - "step": 3821 - }, - { - "epoch": 0.3446814267033413, - "grad_norm": 3.8958360940580055, - "learning_rate": 3.0481708399571355e-06, - "loss": 1.0215, - "step": 3822 - }, - { - "epoch": 0.3447716102268116, - "grad_norm": 1.432409561391417, - "learning_rate": 3.047673243089922e-06, - "loss": 0.9853, - "step": 3823 - }, - { - "epoch": 0.3448617937502818, - "grad_norm": 1.5904945335018423, - "learning_rate": 3.047175556829451e-06, - "loss": 0.9945, - "step": 3824 - }, - { - "epoch": 0.3449519772737521, - "grad_norm": 1.330468522417337, - "learning_rate": 3.046677781218188e-06, - "loss": 1.0084, - "step": 3825 - }, - { - "epoch": 0.34504216079722233, - "grad_norm": 1.2992869549050832, - "learning_rate": 3.0461799162986043e-06, - "loss": 0.9388, - "step": 3826 - }, - { - "epoch": 0.3451323443206926, - "grad_norm": 1.4786199535413074, - "learning_rate": 3.045681962113183e-06, - "loss": 0.8889, - "step": 3827 - }, - { - "epoch": 0.34522252784416285, - "grad_norm": 1.4806981374535537, - "learning_rate": 3.0451839187044095e-06, - "loss": 0.9133, - "step": 3828 - }, - { - "epoch": 0.34531271136763314, - "grad_norm": 1.4270911483365036, - "learning_rate": 3.0446857861147816e-06, - "loss": 0.9772, - "step": 3829 - }, - { - "epoch": 0.3454028948911034, - "grad_norm": 1.4722408447447355, - "learning_rate": 3.044187564386802e-06, - "loss": 0.9336, - "step": 3830 - }, - { - "epoch": 0.34549307841457366, - "grad_norm": 1.6778703663325845, - "learning_rate": 3.0436892535629818e-06, - "loss": 1.0295, - "step": 3831 - }, - { - "epoch": 0.3455832619380439, - "grad_norm": 1.519354018047266, - "learning_rate": 3.0431908536858393e-06, - "loss": 1.0414, - "step": 3832 - }, - { - "epoch": 0.3456734454615142, - "grad_norm": 1.700967314341642, - "learning_rate": 3.0426923647979016e-06, - "loss": 0.9519, - "step": 3833 - }, - { - "epoch": 0.3457636289849844, - "grad_norm": 1.494110226789427, - "learning_rate": 3.0421937869417016e-06, - "loss": 0.9065, - "step": 3834 - }, - { - "epoch": 0.3458538125084547, - "grad_norm": 1.479716328679452, - "learning_rate": 3.041695120159782e-06, - "loss": 1.0315, - "step": 3835 - }, - { - "epoch": 0.345943996031925, - "grad_norm": 1.3819840099138383, - "learning_rate": 3.04119636449469e-06, - "loss": 0.9066, - "step": 3836 - }, - { - "epoch": 0.3460341795553952, - "grad_norm": 1.2599369376159681, - "learning_rate": 3.040697519988983e-06, - "loss": 0.9929, - "step": 3837 - }, - { - "epoch": 0.3461243630788655, - "grad_norm": 0.7160642034073039, - "learning_rate": 3.040198586685226e-06, - "loss": 0.8083, - "step": 3838 - }, - { - "epoch": 0.34621454660233575, - "grad_norm": 1.448261942506869, - "learning_rate": 3.039699564625989e-06, - "loss": 0.9221, - "step": 3839 - }, - { - "epoch": 0.34630473012580604, - "grad_norm": 0.9290189823822855, - "learning_rate": 3.039200453853853e-06, - "loss": 0.8199, - "step": 3840 - }, - { - "epoch": 0.34639491364927627, - "grad_norm": 1.573283104391101, - "learning_rate": 3.038701254411404e-06, - "loss": 1.0105, - "step": 3841 - }, - { - "epoch": 0.34648509717274656, - "grad_norm": 1.5254058641754382, - "learning_rate": 3.0382019663412367e-06, - "loss": 0.9826, - "step": 3842 - }, - { - "epoch": 0.3465752806962168, - "grad_norm": 1.6932632788612398, - "learning_rate": 3.0377025896859532e-06, - "loss": 1.0095, - "step": 3843 - }, - { - "epoch": 0.3466654642196871, - "grad_norm": 1.343466617954592, - "learning_rate": 3.0372031244881627e-06, - "loss": 1.067, - "step": 3844 - }, - { - "epoch": 0.3467556477431573, - "grad_norm": 1.393926152614356, - "learning_rate": 3.0367035707904826e-06, - "loss": 0.93, - "step": 3845 - }, - { - "epoch": 0.3468458312666276, - "grad_norm": 1.620948435693362, - "learning_rate": 3.036203928635537e-06, - "loss": 0.8847, - "step": 3846 - }, - { - "epoch": 0.34693601479009784, - "grad_norm": 2.1929731292191645, - "learning_rate": 3.035704198065959e-06, - "loss": 1.0306, - "step": 3847 - }, - { - "epoch": 0.3470261983135681, - "grad_norm": 1.514667956031555, - "learning_rate": 3.0352043791243886e-06, - "loss": 0.9478, - "step": 3848 - }, - { - "epoch": 0.34711638183703836, - "grad_norm": 0.7272332489612775, - "learning_rate": 3.034704471853472e-06, - "loss": 0.8114, - "step": 3849 - }, - { - "epoch": 0.34720656536050865, - "grad_norm": 1.3007974709539005, - "learning_rate": 3.0342044762958646e-06, - "loss": 1.0176, - "step": 3850 - }, - { - "epoch": 0.3472967488839789, - "grad_norm": 1.4809004423484062, - "learning_rate": 3.0337043924942286e-06, - "loss": 1.0064, - "step": 3851 - }, - { - "epoch": 0.34738693240744917, - "grad_norm": 1.2752668662437523, - "learning_rate": 3.0332042204912343e-06, - "loss": 0.9497, - "step": 3852 - }, - { - "epoch": 0.3474771159309194, - "grad_norm": 1.0757615541249432, - "learning_rate": 3.0327039603295587e-06, - "loss": 0.8533, - "step": 3853 - }, - { - "epoch": 0.3475672994543897, - "grad_norm": 1.7054417103446904, - "learning_rate": 3.032203612051887e-06, - "loss": 0.9338, - "step": 3854 - }, - { - "epoch": 0.3476574829778599, - "grad_norm": 1.6638661300059367, - "learning_rate": 3.0317031757009116e-06, - "loss": 0.955, - "step": 3855 - }, - { - "epoch": 0.3477476665013302, - "grad_norm": 1.5677975685179608, - "learning_rate": 3.0312026513193326e-06, - "loss": 0.9304, - "step": 3856 - }, - { - "epoch": 0.34783785002480044, - "grad_norm": 1.3724830305374123, - "learning_rate": 3.0307020389498573e-06, - "loss": 0.9358, - "step": 3857 - }, - { - "epoch": 0.34792803354827073, - "grad_norm": 1.2546488146679067, - "learning_rate": 3.0302013386352004e-06, - "loss": 0.8993, - "step": 3858 - }, - { - "epoch": 0.348018217071741, - "grad_norm": 1.3357907487899883, - "learning_rate": 3.0297005504180854e-06, - "loss": 0.9572, - "step": 3859 - }, - { - "epoch": 0.34810840059521125, - "grad_norm": 0.9911235601689184, - "learning_rate": 3.0291996743412417e-06, - "loss": 0.7458, - "step": 3860 - }, - { - "epoch": 0.34819858411868154, - "grad_norm": 1.8989711686406967, - "learning_rate": 3.0286987104474063e-06, - "loss": 1.054, - "step": 3861 - }, - { - "epoch": 0.3482887676421518, - "grad_norm": 1.1777893357239624, - "learning_rate": 3.028197658779325e-06, - "loss": 0.942, - "step": 3862 - }, - { - "epoch": 0.34837895116562206, - "grad_norm": 1.6969682193673294, - "learning_rate": 3.0276965193797503e-06, - "loss": 0.9875, - "step": 3863 - }, - { - "epoch": 0.3484691346890923, - "grad_norm": 1.3231269138702548, - "learning_rate": 3.0271952922914423e-06, - "loss": 0.9325, - "step": 3864 - }, - { - "epoch": 0.3485593182125626, - "grad_norm": 1.4133068194166585, - "learning_rate": 3.0266939775571675e-06, - "loss": 0.9523, - "step": 3865 - }, - { - "epoch": 0.3486495017360328, - "grad_norm": 1.4402153243565612, - "learning_rate": 3.026192575219701e-06, - "loss": 0.9541, - "step": 3866 - }, - { - "epoch": 0.3487396852595031, - "grad_norm": 1.322736737888653, - "learning_rate": 3.025691085321826e-06, - "loss": 0.8637, - "step": 3867 - }, - { - "epoch": 0.34882986878297334, - "grad_norm": 1.5940893971204886, - "learning_rate": 3.025189507906332e-06, - "loss": 0.9439, - "step": 3868 - }, - { - "epoch": 0.34892005230644363, - "grad_norm": 1.480837974705647, - "learning_rate": 3.0246878430160166e-06, - "loss": 0.9862, - "step": 3869 - }, - { - "epoch": 0.34901023582991386, - "grad_norm": 1.4250753248952723, - "learning_rate": 3.024186090693684e-06, - "loss": 0.9073, - "step": 3870 - }, - { - "epoch": 0.34910041935338415, - "grad_norm": 1.5057054730449835, - "learning_rate": 3.023684250982147e-06, - "loss": 0.9861, - "step": 3871 - }, - { - "epoch": 0.3491906028768544, - "grad_norm": 1.501096642480396, - "learning_rate": 3.0231823239242252e-06, - "loss": 0.8834, - "step": 3872 - }, - { - "epoch": 0.34928078640032467, - "grad_norm": 1.6377955424801642, - "learning_rate": 3.0226803095627457e-06, - "loss": 0.9176, - "step": 3873 - }, - { - "epoch": 0.3493709699237949, - "grad_norm": 1.6892254097122459, - "learning_rate": 3.022178207940543e-06, - "loss": 0.9435, - "step": 3874 - }, - { - "epoch": 0.3494611534472652, - "grad_norm": 1.6560966132771162, - "learning_rate": 3.02167601910046e-06, - "loss": 0.9755, - "step": 3875 - }, - { - "epoch": 0.3495513369707354, - "grad_norm": 1.3771559978402557, - "learning_rate": 3.021173743085345e-06, - "loss": 0.9855, - "step": 3876 - }, - { - "epoch": 0.3496415204942057, - "grad_norm": 0.9459424717712306, - "learning_rate": 3.0206713799380557e-06, - "loss": 0.7962, - "step": 3877 - }, - { - "epoch": 0.34973170401767595, - "grad_norm": 1.5689783454455875, - "learning_rate": 3.0201689297014565e-06, - "loss": 0.9648, - "step": 3878 - }, - { - "epoch": 0.34982188754114624, - "grad_norm": 1.3856743546618966, - "learning_rate": 3.0196663924184187e-06, - "loss": 1.0088, - "step": 3879 - }, - { - "epoch": 0.34991207106461647, - "grad_norm": 1.69126225446616, - "learning_rate": 3.019163768131822e-06, - "loss": 0.991, - "step": 3880 - }, - { - "epoch": 0.35000225458808676, - "grad_norm": 1.5682208806476425, - "learning_rate": 3.0186610568845533e-06, - "loss": 0.8834, - "step": 3881 - }, - { - "epoch": 0.350092438111557, - "grad_norm": 1.5885581927664532, - "learning_rate": 3.018158258719507e-06, - "loss": 0.8942, - "step": 3882 - }, - { - "epoch": 0.3501826216350273, - "grad_norm": 1.3082020187487158, - "learning_rate": 3.0176553736795827e-06, - "loss": 0.9887, - "step": 3883 - }, - { - "epoch": 0.35027280515849757, - "grad_norm": 1.236210098779813, - "learning_rate": 3.017152401807691e-06, - "loss": 0.7279, - "step": 3884 - }, - { - "epoch": 0.3503629886819678, - "grad_norm": 1.3100613236087846, - "learning_rate": 3.0166493431467476e-06, - "loss": 0.9369, - "step": 3885 - }, - { - "epoch": 0.3504531722054381, - "grad_norm": 1.3473031728660287, - "learning_rate": 3.016146197739677e-06, - "loss": 0.9361, - "step": 3886 - }, - { - "epoch": 0.3505433557289083, - "grad_norm": 1.6456444386436337, - "learning_rate": 3.0156429656294097e-06, - "loss": 0.8911, - "step": 3887 - }, - { - "epoch": 0.3506335392523786, - "grad_norm": 1.4877227776661368, - "learning_rate": 3.0151396468588844e-06, - "loss": 0.9035, - "step": 3888 - }, - { - "epoch": 0.35072372277584885, - "grad_norm": 1.724766828838346, - "learning_rate": 3.014636241471047e-06, - "loss": 0.9167, - "step": 3889 - }, - { - "epoch": 0.35081390629931913, - "grad_norm": 1.5237418628753872, - "learning_rate": 3.0141327495088514e-06, - "loss": 0.9603, - "step": 3890 - }, - { - "epoch": 0.35090408982278937, - "grad_norm": 1.5448549740818505, - "learning_rate": 3.0136291710152566e-06, - "loss": 0.9251, - "step": 3891 - }, - { - "epoch": 0.35099427334625966, - "grad_norm": 1.561219720844619, - "learning_rate": 3.0131255060332325e-06, - "loss": 0.9959, - "step": 3892 - }, - { - "epoch": 0.3510844568697299, - "grad_norm": 1.6031145716396775, - "learning_rate": 3.012621754605754e-06, - "loss": 0.9764, - "step": 3893 - }, - { - "epoch": 0.3511746403932002, - "grad_norm": 1.4759402301027278, - "learning_rate": 3.0121179167758035e-06, - "loss": 0.9927, - "step": 3894 - }, - { - "epoch": 0.3512648239166704, - "grad_norm": 1.7134090322007753, - "learning_rate": 3.0116139925863717e-06, - "loss": 1.0361, - "step": 3895 - }, - { - "epoch": 0.3513550074401407, - "grad_norm": 1.3927998627073734, - "learning_rate": 3.011109982080456e-06, - "loss": 0.9946, - "step": 3896 - }, - { - "epoch": 0.35144519096361093, - "grad_norm": 1.904977092054506, - "learning_rate": 3.0106058853010614e-06, - "loss": 0.9055, - "step": 3897 - }, - { - "epoch": 0.3515353744870812, - "grad_norm": 1.3963144905954132, - "learning_rate": 3.010101702291201e-06, - "loss": 0.9518, - "step": 3898 - }, - { - "epoch": 0.35162555801055145, - "grad_norm": 1.657055281159541, - "learning_rate": 3.009597433093893e-06, - "loss": 0.9527, - "step": 3899 - }, - { - "epoch": 0.35171574153402174, - "grad_norm": 2.453389439027663, - "learning_rate": 3.009093077752165e-06, - "loss": 0.9736, - "step": 3900 - }, - { - "epoch": 0.351805925057492, - "grad_norm": 1.4752978202026457, - "learning_rate": 3.008588636309052e-06, - "loss": 0.9942, - "step": 3901 - }, - { - "epoch": 0.35189610858096226, - "grad_norm": 1.4728875016594294, - "learning_rate": 3.0080841088075947e-06, - "loss": 0.9277, - "step": 3902 - }, - { - "epoch": 0.3519862921044325, - "grad_norm": 1.5845385615627106, - "learning_rate": 3.0075794952908436e-06, - "loss": 0.9997, - "step": 3903 - }, - { - "epoch": 0.3520764756279028, - "grad_norm": 1.3113999525210098, - "learning_rate": 3.0070747958018528e-06, - "loss": 0.9699, - "step": 3904 - }, - { - "epoch": 0.352166659151373, - "grad_norm": 1.203213576982373, - "learning_rate": 3.0065700103836894e-06, - "loss": 0.9734, - "step": 3905 - }, - { - "epoch": 0.3522568426748433, - "grad_norm": 1.4884575366446182, - "learning_rate": 3.0060651390794214e-06, - "loss": 0.9817, - "step": 3906 - }, - { - "epoch": 0.3523470261983136, - "grad_norm": 1.2463352842238296, - "learning_rate": 3.005560181932128e-06, - "loss": 0.9358, - "step": 3907 - }, - { - "epoch": 0.35243720972178383, - "grad_norm": 1.5199379658588714, - "learning_rate": 3.005055138984896e-06, - "loss": 0.9064, - "step": 3908 - }, - { - "epoch": 0.3525273932452541, - "grad_norm": 1.297043065590687, - "learning_rate": 3.0045500102808174e-06, - "loss": 0.9409, - "step": 3909 - }, - { - "epoch": 0.35261757676872435, - "grad_norm": 2.2064561482801888, - "learning_rate": 3.0040447958629927e-06, - "loss": 1.0469, - "step": 3910 - }, - { - "epoch": 0.35270776029219464, - "grad_norm": 1.4427524472686684, - "learning_rate": 3.00353949577453e-06, - "loss": 0.9867, - "step": 3911 - }, - { - "epoch": 0.35279794381566487, - "grad_norm": 1.8043980882447215, - "learning_rate": 3.003034110058544e-06, - "loss": 0.7983, - "step": 3912 - }, - { - "epoch": 0.35288812733913516, - "grad_norm": 1.358554241533858, - "learning_rate": 3.002528638758157e-06, - "loss": 0.944, - "step": 3913 - }, - { - "epoch": 0.3529783108626054, - "grad_norm": 1.7288413910005789, - "learning_rate": 3.0020230819164985e-06, - "loss": 0.9668, - "step": 3914 - }, - { - "epoch": 0.3530684943860757, - "grad_norm": 1.4476763390898604, - "learning_rate": 3.0015174395767064e-06, - "loss": 0.9643, - "step": 3915 - }, - { - "epoch": 0.3531586779095459, - "grad_norm": 1.7303997224081789, - "learning_rate": 3.001011711781923e-06, - "loss": 0.9933, - "step": 3916 - }, - { - "epoch": 0.3532488614330162, - "grad_norm": 1.4168924918354955, - "learning_rate": 3.0005058985753017e-06, - "loss": 0.9162, - "step": 3917 - }, - { - "epoch": 0.35333904495648644, - "grad_norm": 1.3276238337372395, - "learning_rate": 3e-06, - "loss": 0.9454, - "step": 3918 - }, - { - "epoch": 0.3534292284799567, - "grad_norm": 0.950597946313734, - "learning_rate": 2.9994940160991843e-06, - "loss": 0.8212, - "step": 3919 - }, - { - "epoch": 0.35351941200342696, - "grad_norm": 0.6852094031565561, - "learning_rate": 2.9989879469160285e-06, - "loss": 0.7769, - "step": 3920 - }, - { - "epoch": 0.35360959552689725, - "grad_norm": 1.957131510772274, - "learning_rate": 2.9984817924937124e-06, - "loss": 0.9759, - "step": 3921 - }, - { - "epoch": 0.3536997790503675, - "grad_norm": 1.7349134417002656, - "learning_rate": 2.997975552875424e-06, - "loss": 0.9041, - "step": 3922 - }, - { - "epoch": 0.35378996257383777, - "grad_norm": 1.5073031024547379, - "learning_rate": 2.997469228104358e-06, - "loss": 0.9429, - "step": 3923 - }, - { - "epoch": 0.353880146097308, - "grad_norm": 1.2522966743707153, - "learning_rate": 2.996962818223718e-06, - "loss": 0.8438, - "step": 3924 - }, - { - "epoch": 0.3539703296207783, - "grad_norm": 1.5206595892055117, - "learning_rate": 2.9964563232767135e-06, - "loss": 0.901, - "step": 3925 - }, - { - "epoch": 0.3540605131442485, - "grad_norm": 1.644005915805169, - "learning_rate": 2.9959497433065617e-06, - "loss": 0.9577, - "step": 3926 - }, - { - "epoch": 0.3541506966677188, - "grad_norm": 1.4329430041268816, - "learning_rate": 2.9954430783564848e-06, - "loss": 0.8797, - "step": 3927 - }, - { - "epoch": 0.35424088019118904, - "grad_norm": 1.7041657094167744, - "learning_rate": 2.994936328469716e-06, - "loss": 0.9793, - "step": 3928 - }, - { - "epoch": 0.35433106371465933, - "grad_norm": 1.2782147905292929, - "learning_rate": 2.994429493689494e-06, - "loss": 0.955, - "step": 3929 - }, - { - "epoch": 0.35442124723812957, - "grad_norm": 1.3009892661071893, - "learning_rate": 2.9939225740590642e-06, - "loss": 0.9825, - "step": 3930 - }, - { - "epoch": 0.35451143076159986, - "grad_norm": 1.4138444764049167, - "learning_rate": 2.99341556962168e-06, - "loss": 0.9197, - "step": 3931 - }, - { - "epoch": 0.35460161428507014, - "grad_norm": 1.3080328806935255, - "learning_rate": 2.992908480420602e-06, - "loss": 1.0, - "step": 3932 - }, - { - "epoch": 0.3546917978085404, - "grad_norm": 1.609065794638563, - "learning_rate": 2.9924013064990974e-06, - "loss": 0.95, - "step": 3933 - }, - { - "epoch": 0.35478198133201067, - "grad_norm": 0.875259395024843, - "learning_rate": 2.991894047900441e-06, - "loss": 0.8306, - "step": 3934 - }, - { - "epoch": 0.3548721648554809, - "grad_norm": 1.3823634566192384, - "learning_rate": 2.991386704667916e-06, - "loss": 0.9665, - "step": 3935 - }, - { - "epoch": 0.3549623483789512, - "grad_norm": 1.742456047675012, - "learning_rate": 2.9908792768448097e-06, - "loss": 1.0171, - "step": 3936 - }, - { - "epoch": 0.3550525319024214, - "grad_norm": 1.541170023059762, - "learning_rate": 2.990371764474421e-06, - "loss": 0.9553, - "step": 3937 - }, - { - "epoch": 0.3551427154258917, - "grad_norm": 3.2140605060458083, - "learning_rate": 2.9898641676000518e-06, - "loss": 0.9338, - "step": 3938 - }, - { - "epoch": 0.35523289894936194, - "grad_norm": 1.4632406592658334, - "learning_rate": 2.9893564862650138e-06, - "loss": 0.9563, - "step": 3939 - }, - { - "epoch": 0.35532308247283223, - "grad_norm": 1.692993969414792, - "learning_rate": 2.9888487205126254e-06, - "loss": 1.0092, - "step": 3940 - }, - { - "epoch": 0.35541326599630246, - "grad_norm": 1.3541773379956883, - "learning_rate": 2.9883408703862115e-06, - "loss": 0.949, - "step": 3941 - }, - { - "epoch": 0.35550344951977275, - "grad_norm": 1.9545172649996005, - "learning_rate": 2.987832935929105e-06, - "loss": 1.0407, - "step": 3942 - }, - { - "epoch": 0.355593633043243, - "grad_norm": 1.4610351693047756, - "learning_rate": 2.9873249171846454e-06, - "loss": 1.0203, - "step": 3943 - }, - { - "epoch": 0.3556838165667133, - "grad_norm": 2.70043423304421, - "learning_rate": 2.98681681419618e-06, - "loss": 0.9501, - "step": 3944 - }, - { - "epoch": 0.3557740000901835, - "grad_norm": 1.5120495180518705, - "learning_rate": 2.9863086270070627e-06, - "loss": 1.0052, - "step": 3945 - }, - { - "epoch": 0.3558641836136538, - "grad_norm": 1.541613947336385, - "learning_rate": 2.985800355660655e-06, - "loss": 0.967, - "step": 3946 - }, - { - "epoch": 0.35595436713712403, - "grad_norm": 0.7496080964093935, - "learning_rate": 2.9852920002003252e-06, - "loss": 0.825, - "step": 3947 - }, - { - "epoch": 0.3560445506605943, - "grad_norm": 1.6550004672139134, - "learning_rate": 2.9847835606694494e-06, - "loss": 0.993, - "step": 3948 - }, - { - "epoch": 0.35613473418406455, - "grad_norm": 1.6310970658561768, - "learning_rate": 2.98427503711141e-06, - "loss": 0.8855, - "step": 3949 - }, - { - "epoch": 0.35622491770753484, - "grad_norm": 1.6507424966981596, - "learning_rate": 2.9837664295695973e-06, - "loss": 0.9945, - "step": 3950 - }, - { - "epoch": 0.35631510123100507, - "grad_norm": 1.316037768853914, - "learning_rate": 2.983257738087408e-06, - "loss": 0.9525, - "step": 3951 - }, - { - "epoch": 0.35640528475447536, - "grad_norm": 1.4395057948089596, - "learning_rate": 2.982748962708247e-06, - "loss": 0.9866, - "step": 3952 - }, - { - "epoch": 0.3564954682779456, - "grad_norm": 1.3050950835655277, - "learning_rate": 2.982240103475526e-06, - "loss": 1.0079, - "step": 3953 - }, - { - "epoch": 0.3565856518014159, - "grad_norm": 1.8660948042014966, - "learning_rate": 2.981731160432663e-06, - "loss": 0.9601, - "step": 3954 - }, - { - "epoch": 0.35667583532488617, - "grad_norm": 1.4859580181963556, - "learning_rate": 2.981222133623084e-06, - "loss": 0.8645, - "step": 3955 - }, - { - "epoch": 0.3567660188483564, - "grad_norm": 1.4279778269053414, - "learning_rate": 2.980713023090222e-06, - "loss": 0.9674, - "step": 3956 - }, - { - "epoch": 0.3568562023718267, - "grad_norm": 1.5048343321400866, - "learning_rate": 2.980203828877518e-06, - "loss": 0.9361, - "step": 3957 - }, - { - "epoch": 0.3569463858952969, - "grad_norm": 1.70701923846246, - "learning_rate": 2.9796945510284182e-06, - "loss": 0.9959, - "step": 3958 - }, - { - "epoch": 0.3570365694187672, - "grad_norm": 1.449884308112972, - "learning_rate": 2.9791851895863774e-06, - "loss": 1.0111, - "step": 3959 - }, - { - "epoch": 0.35712675294223745, - "grad_norm": 1.3970607608451047, - "learning_rate": 2.978675744594857e-06, - "loss": 0.9927, - "step": 3960 - }, - { - "epoch": 0.35721693646570774, - "grad_norm": 1.3550964635739093, - "learning_rate": 2.978166216097326e-06, - "loss": 0.9139, - "step": 3961 - }, - { - "epoch": 0.35730711998917797, - "grad_norm": 1.4306049009205308, - "learning_rate": 2.9776566041372596e-06, - "loss": 0.9492, - "step": 3962 - }, - { - "epoch": 0.35739730351264826, - "grad_norm": 1.3563460004028611, - "learning_rate": 2.977146908758141e-06, - "loss": 1.0227, - "step": 3963 - }, - { - "epoch": 0.3574874870361185, - "grad_norm": 1.4463056296791243, - "learning_rate": 2.9766371300034604e-06, - "loss": 0.9753, - "step": 3964 - }, - { - "epoch": 0.3575776705595888, - "grad_norm": 1.2445542921865447, - "learning_rate": 2.9761272679167142e-06, - "loss": 1.0047, - "step": 3965 - }, - { - "epoch": 0.357667854083059, - "grad_norm": 1.5785333841519482, - "learning_rate": 2.9756173225414072e-06, - "loss": 0.8108, - "step": 3966 - }, - { - "epoch": 0.3577580376065293, - "grad_norm": 1.487651862078, - "learning_rate": 2.975107293921051e-06, - "loss": 1.1339, - "step": 3967 - }, - { - "epoch": 0.35784822112999953, - "grad_norm": 1.4869470899053876, - "learning_rate": 2.9745971820991643e-06, - "loss": 1.006, - "step": 3968 - }, - { - "epoch": 0.3579384046534698, - "grad_norm": 1.4246876324057178, - "learning_rate": 2.9740869871192715e-06, - "loss": 0.9934, - "step": 3969 - }, - { - "epoch": 0.35802858817694005, - "grad_norm": 1.2763438379535057, - "learning_rate": 2.9735767090249065e-06, - "loss": 1.0173, - "step": 3970 - }, - { - "epoch": 0.35811877170041034, - "grad_norm": 1.4911887936050277, - "learning_rate": 2.973066347859608e-06, - "loss": 0.8737, - "step": 3971 - }, - { - "epoch": 0.3582089552238806, - "grad_norm": 1.2365179172332996, - "learning_rate": 2.972555903666923e-06, - "loss": 0.9743, - "step": 3972 - }, - { - "epoch": 0.35829913874735086, - "grad_norm": 1.7385681601353808, - "learning_rate": 2.972045376490406e-06, - "loss": 0.8681, - "step": 3973 - }, - { - "epoch": 0.3583893222708211, - "grad_norm": 1.4208480983351037, - "learning_rate": 2.9715347663736177e-06, - "loss": 0.884, - "step": 3974 - }, - { - "epoch": 0.3584795057942914, - "grad_norm": 1.3199302074737223, - "learning_rate": 2.9710240733601266e-06, - "loss": 0.9525, - "step": 3975 - }, - { - "epoch": 0.3585696893177616, - "grad_norm": 1.577696109316189, - "learning_rate": 2.970513297493507e-06, - "loss": 1.0406, - "step": 3976 - }, - { - "epoch": 0.3586598728412319, - "grad_norm": 1.2095396279745985, - "learning_rate": 2.9700024388173416e-06, - "loss": 1.0072, - "step": 3977 - }, - { - "epoch": 0.35875005636470214, - "grad_norm": 1.6961425296300137, - "learning_rate": 2.969491497375219e-06, - "loss": 1.0088, - "step": 3978 - }, - { - "epoch": 0.35884023988817243, - "grad_norm": 1.359006064324957, - "learning_rate": 2.9689804732107364e-06, - "loss": 0.9815, - "step": 3979 - }, - { - "epoch": 0.3589304234116427, - "grad_norm": 1.5471811473256034, - "learning_rate": 2.9684693663674968e-06, - "loss": 0.9201, - "step": 3980 - }, - { - "epoch": 0.35902060693511295, - "grad_norm": 1.3666336681677593, - "learning_rate": 2.9679581768891115e-06, - "loss": 1.0492, - "step": 3981 - }, - { - "epoch": 0.35911079045858324, - "grad_norm": 1.501969236924919, - "learning_rate": 2.967446904819197e-06, - "loss": 0.8833, - "step": 3982 - }, - { - "epoch": 0.3592009739820535, - "grad_norm": 1.3899712472452947, - "learning_rate": 2.966935550201378e-06, - "loss": 0.9816, - "step": 3983 - }, - { - "epoch": 0.35929115750552376, - "grad_norm": 1.3575537308795689, - "learning_rate": 2.966424113079286e-06, - "loss": 0.957, - "step": 3984 - }, - { - "epoch": 0.359381341028994, - "grad_norm": 1.359755495686269, - "learning_rate": 2.9659125934965596e-06, - "loss": 1.0085, - "step": 3985 - }, - { - "epoch": 0.3594715245524643, - "grad_norm": 1.9853566662694175, - "learning_rate": 2.9654009914968457e-06, - "loss": 1.0257, - "step": 3986 - }, - { - "epoch": 0.3595617080759345, - "grad_norm": 2.8038378095205805, - "learning_rate": 2.9648893071237956e-06, - "loss": 0.8446, - "step": 3987 - }, - { - "epoch": 0.3596518915994048, - "grad_norm": 1.6327320425844776, - "learning_rate": 2.964377540421069e-06, - "loss": 0.9377, - "step": 3988 - }, - { - "epoch": 0.35974207512287504, - "grad_norm": 1.7325602479397617, - "learning_rate": 2.963865691432334e-06, - "loss": 0.9879, - "step": 3989 - }, - { - "epoch": 0.3598322586463453, - "grad_norm": 1.3621251579313283, - "learning_rate": 2.963353760201263e-06, - "loss": 0.8786, - "step": 3990 - }, - { - "epoch": 0.35992244216981556, - "grad_norm": 2.9708626708677186, - "learning_rate": 2.962841746771537e-06, - "loss": 0.9445, - "step": 3991 - }, - { - "epoch": 0.36001262569328585, - "grad_norm": 1.37619815988638, - "learning_rate": 2.9623296511868445e-06, - "loss": 0.9596, - "step": 3992 - }, - { - "epoch": 0.3601028092167561, - "grad_norm": 1.2449197530354328, - "learning_rate": 2.96181747349088e-06, - "loss": 0.8847, - "step": 3993 - }, - { - "epoch": 0.36019299274022637, - "grad_norm": 1.4556048163279547, - "learning_rate": 2.961305213727345e-06, - "loss": 0.9807, - "step": 3994 - }, - { - "epoch": 0.3602831762636966, - "grad_norm": 0.7234939256583137, - "learning_rate": 2.960792871939949e-06, - "loss": 0.8224, - "step": 3995 - }, - { - "epoch": 0.3603733597871669, - "grad_norm": 3.2267291970407364, - "learning_rate": 2.9602804481724064e-06, - "loss": 1.0475, - "step": 3996 - }, - { - "epoch": 0.3604635433106371, - "grad_norm": 1.686276168676744, - "learning_rate": 2.9597679424684427e-06, - "loss": 0.9568, - "step": 3997 - }, - { - "epoch": 0.3605537268341074, - "grad_norm": 1.5262752610224204, - "learning_rate": 2.9592553548717848e-06, - "loss": 0.9702, - "step": 3998 - }, - { - "epoch": 0.36064391035757765, - "grad_norm": 1.498075522257053, - "learning_rate": 2.958742685426171e-06, - "loss": 1.0437, - "step": 3999 - }, - { - "epoch": 0.36073409388104793, - "grad_norm": 1.460607970909319, - "learning_rate": 2.9582299341753446e-06, - "loss": 1.0279, - "step": 4000 - }, - { - "epoch": 0.36082427740451817, - "grad_norm": 1.572621787347886, - "learning_rate": 2.957717101163057e-06, - "loss": 0.9054, - "step": 4001 - }, - { - "epoch": 0.36091446092798846, - "grad_norm": 1.3350502324744116, - "learning_rate": 2.9572041864330655e-06, - "loss": 0.9612, - "step": 4002 - }, - { - "epoch": 0.36100464445145874, - "grad_norm": 1.631361028758608, - "learning_rate": 2.9566911900291346e-06, - "loss": 0.9803, - "step": 4003 - }, - { - "epoch": 0.361094827974929, - "grad_norm": 1.6340773214724753, - "learning_rate": 2.9561781119950368e-06, - "loss": 0.8778, - "step": 4004 - }, - { - "epoch": 0.36118501149839927, - "grad_norm": 1.2551189512136116, - "learning_rate": 2.9556649523745493e-06, - "loss": 0.9121, - "step": 4005 - }, - { - "epoch": 0.3612751950218695, - "grad_norm": 1.4258091754007556, - "learning_rate": 2.955151711211459e-06, - "loss": 1.021, - "step": 4006 - }, - { - "epoch": 0.3613653785453398, - "grad_norm": 1.2091811702462238, - "learning_rate": 2.9546383885495583e-06, - "loss": 0.9301, - "step": 4007 - }, - { - "epoch": 0.36145556206881, - "grad_norm": 1.5211597336229759, - "learning_rate": 2.9541249844326464e-06, - "loss": 0.9094, - "step": 4008 - }, - { - "epoch": 0.3615457455922803, - "grad_norm": 1.4322254604874818, - "learning_rate": 2.9536114989045295e-06, - "loss": 0.9536, - "step": 4009 - }, - { - "epoch": 0.36163592911575054, - "grad_norm": 1.2818556958869511, - "learning_rate": 2.9530979320090216e-06, - "loss": 1.0245, - "step": 4010 - }, - { - "epoch": 0.36172611263922083, - "grad_norm": 1.433817999240803, - "learning_rate": 2.9525842837899422e-06, - "loss": 1.0387, - "step": 4011 - }, - { - "epoch": 0.36181629616269106, - "grad_norm": 1.4244916544569692, - "learning_rate": 2.95207055429112e-06, - "loss": 1.0434, - "step": 4012 - }, - { - "epoch": 0.36190647968616135, - "grad_norm": 1.519498378525597, - "learning_rate": 2.951556743556388e-06, - "loss": 0.9526, - "step": 4013 - }, - { - "epoch": 0.3619966632096316, - "grad_norm": 1.858455526785035, - "learning_rate": 2.951042851629588e-06, - "loss": 1.009, - "step": 4014 - }, - { - "epoch": 0.3620868467331019, - "grad_norm": 1.7519610180459042, - "learning_rate": 2.950528878554568e-06, - "loss": 0.918, - "step": 4015 - }, - { - "epoch": 0.3621770302565721, - "grad_norm": 1.2508951319948745, - "learning_rate": 2.950014824375183e-06, - "loss": 0.9949, - "step": 4016 - }, - { - "epoch": 0.3622672137800424, - "grad_norm": 0.7062336759453028, - "learning_rate": 2.949500689135295e-06, - "loss": 0.7619, - "step": 4017 - }, - { - "epoch": 0.36235739730351263, - "grad_norm": 1.2800374224526927, - "learning_rate": 2.9489864728787722e-06, - "loss": 1.0418, - "step": 4018 - }, - { - "epoch": 0.3624475808269829, - "grad_norm": 1.3993879530095503, - "learning_rate": 2.9484721756494915e-06, - "loss": 1.0143, - "step": 4019 - }, - { - "epoch": 0.36253776435045315, - "grad_norm": 1.4867862591873866, - "learning_rate": 2.9479577974913343e-06, - "loss": 0.9632, - "step": 4020 - }, - { - "epoch": 0.36262794787392344, - "grad_norm": 1.4014873386672606, - "learning_rate": 2.9474433384481908e-06, - "loss": 0.9582, - "step": 4021 - }, - { - "epoch": 0.3627181313973937, - "grad_norm": 1.2543103764133154, - "learning_rate": 2.9469287985639577e-06, - "loss": 0.9536, - "step": 4022 - }, - { - "epoch": 0.36280831492086396, - "grad_norm": 1.372291324504051, - "learning_rate": 2.9464141778825384e-06, - "loss": 1.0112, - "step": 4023 - }, - { - "epoch": 0.3628984984443342, - "grad_norm": 1.3928664926767063, - "learning_rate": 2.9458994764478427e-06, - "loss": 0.9985, - "step": 4024 - }, - { - "epoch": 0.3629886819678045, - "grad_norm": 1.5413639269672283, - "learning_rate": 2.9453846943037883e-06, - "loss": 1.0033, - "step": 4025 - }, - { - "epoch": 0.36307886549127477, - "grad_norm": 1.3447334771404467, - "learning_rate": 2.9448698314942987e-06, - "loss": 0.9145, - "step": 4026 - }, - { - "epoch": 0.363169049014745, - "grad_norm": 0.8288340502043625, - "learning_rate": 2.944354888063305e-06, - "loss": 0.8688, - "step": 4027 - }, - { - "epoch": 0.3632592325382153, - "grad_norm": 1.3603963851390528, - "learning_rate": 2.9438398640547453e-06, - "loss": 0.9325, - "step": 4028 - }, - { - "epoch": 0.3633494160616855, - "grad_norm": 1.6434353690488106, - "learning_rate": 2.943324759512564e-06, - "loss": 0.9052, - "step": 4029 - }, - { - "epoch": 0.3634395995851558, - "grad_norm": 1.4836122661316975, - "learning_rate": 2.9428095744807134e-06, - "loss": 0.993, - "step": 4030 - }, - { - "epoch": 0.36352978310862605, - "grad_norm": 1.4913872775054529, - "learning_rate": 2.942294309003151e-06, - "loss": 0.919, - "step": 4031 - }, - { - "epoch": 0.36361996663209634, - "grad_norm": 1.5311670086227296, - "learning_rate": 2.941778963123843e-06, - "loss": 0.983, - "step": 4032 - }, - { - "epoch": 0.36371015015556657, - "grad_norm": 1.4723352542807377, - "learning_rate": 2.94126353688676e-06, - "loss": 0.9619, - "step": 4033 - }, - { - "epoch": 0.36380033367903686, - "grad_norm": 1.6959166263429548, - "learning_rate": 2.9407480303358825e-06, - "loss": 0.9107, - "step": 4034 - }, - { - "epoch": 0.3638905172025071, - "grad_norm": 1.2795914405475661, - "learning_rate": 2.940232443515195e-06, - "loss": 0.9729, - "step": 4035 - }, - { - "epoch": 0.3639807007259774, - "grad_norm": 1.4430374798312176, - "learning_rate": 2.9397167764686916e-06, - "loss": 0.9814, - "step": 4036 - }, - { - "epoch": 0.3640708842494476, - "grad_norm": 1.4940858120307374, - "learning_rate": 2.9392010292403714e-06, - "loss": 0.9367, - "step": 4037 - }, - { - "epoch": 0.3641610677729179, - "grad_norm": 1.470775992524823, - "learning_rate": 2.9386852018742404e-06, - "loss": 0.9552, - "step": 4038 - }, - { - "epoch": 0.36425125129638813, - "grad_norm": 1.493462941187916, - "learning_rate": 2.938169294414312e-06, - "loss": 0.9373, - "step": 4039 - }, - { - "epoch": 0.3643414348198584, - "grad_norm": 1.3480676437719115, - "learning_rate": 2.9376533069046067e-06, - "loss": 0.9931, - "step": 4040 - }, - { - "epoch": 0.36443161834332866, - "grad_norm": 1.0041036448877272, - "learning_rate": 2.9371372393891514e-06, - "loss": 0.8756, - "step": 4041 - }, - { - "epoch": 0.36452180186679894, - "grad_norm": 1.1568686917801247, - "learning_rate": 2.936621091911979e-06, - "loss": 1.0369, - "step": 4042 - }, - { - "epoch": 0.3646119853902692, - "grad_norm": 1.618211873279992, - "learning_rate": 2.936104864517131e-06, - "loss": 0.8845, - "step": 4043 - }, - { - "epoch": 0.36470216891373947, - "grad_norm": 1.4904878533248145, - "learning_rate": 2.9355885572486535e-06, - "loss": 0.9181, - "step": 4044 - }, - { - "epoch": 0.3647923524372097, - "grad_norm": 1.5136251725406156, - "learning_rate": 2.9350721701506026e-06, - "loss": 0.9978, - "step": 4045 - }, - { - "epoch": 0.36488253596068, - "grad_norm": 1.2987365690386798, - "learning_rate": 2.9345557032670375e-06, - "loss": 0.9538, - "step": 4046 - }, - { - "epoch": 0.3649727194841502, - "grad_norm": 1.6885298659304306, - "learning_rate": 2.934039156642027e-06, - "loss": 1.0083, - "step": 4047 - }, - { - "epoch": 0.3650629030076205, - "grad_norm": 1.6703749727961241, - "learning_rate": 2.9335225303196454e-06, - "loss": 0.8234, - "step": 4048 - }, - { - "epoch": 0.36515308653109074, - "grad_norm": 1.5052150827364312, - "learning_rate": 2.933005824343974e-06, - "loss": 0.9312, - "step": 4049 - }, - { - "epoch": 0.36524327005456103, - "grad_norm": 1.5079802582862627, - "learning_rate": 2.932489038759101e-06, - "loss": 0.9324, - "step": 4050 - }, - { - "epoch": 0.3653334535780313, - "grad_norm": 1.229658120178803, - "learning_rate": 2.9319721736091215e-06, - "loss": 0.8516, - "step": 4051 - }, - { - "epoch": 0.36542363710150155, - "grad_norm": 1.233697632950773, - "learning_rate": 2.9314552289381377e-06, - "loss": 0.8683, - "step": 4052 - }, - { - "epoch": 0.36551382062497184, - "grad_norm": 1.594885253485811, - "learning_rate": 2.9309382047902574e-06, - "loss": 0.9297, - "step": 4053 - }, - { - "epoch": 0.3656040041484421, - "grad_norm": 1.2376897223335668, - "learning_rate": 2.9304211012095963e-06, - "loss": 1.0087, - "step": 4054 - }, - { - "epoch": 0.36569418767191236, - "grad_norm": 1.5681441029874694, - "learning_rate": 2.929903918240277e-06, - "loss": 1.0225, - "step": 4055 - }, - { - "epoch": 0.3657843711953826, - "grad_norm": 0.744922217462518, - "learning_rate": 2.9293866559264273e-06, - "loss": 0.7968, - "step": 4056 - }, - { - "epoch": 0.3658745547188529, - "grad_norm": 1.5819347210857764, - "learning_rate": 2.928869314312184e-06, - "loss": 0.9632, - "step": 4057 - }, - { - "epoch": 0.3659647382423231, - "grad_norm": 2.0669238878990974, - "learning_rate": 2.9283518934416892e-06, - "loss": 0.9673, - "step": 4058 - }, - { - "epoch": 0.3660549217657934, - "grad_norm": 1.4578803403387453, - "learning_rate": 2.927834393359092e-06, - "loss": 0.9742, - "step": 4059 - }, - { - "epoch": 0.36614510528926364, - "grad_norm": 1.463216870017433, - "learning_rate": 2.927316814108548e-06, - "loss": 1.0296, - "step": 4060 - }, - { - "epoch": 0.36623528881273393, - "grad_norm": 1.5418224084217118, - "learning_rate": 2.92679915573422e-06, - "loss": 0.8608, - "step": 4061 - }, - { - "epoch": 0.36632547233620416, - "grad_norm": 1.3876268878687161, - "learning_rate": 2.926281418280278e-06, - "loss": 0.9029, - "step": 4062 - }, - { - "epoch": 0.36641565585967445, - "grad_norm": 1.3086107850745212, - "learning_rate": 2.925763601790899e-06, - "loss": 0.9614, - "step": 4063 - }, - { - "epoch": 0.3665058393831447, - "grad_norm": 1.3878991489379482, - "learning_rate": 2.9252457063102635e-06, - "loss": 0.9181, - "step": 4064 - }, - { - "epoch": 0.36659602290661497, - "grad_norm": 1.5362393584166065, - "learning_rate": 2.9247277318825626e-06, - "loss": 0.9258, - "step": 4065 - }, - { - "epoch": 0.3666862064300852, - "grad_norm": 1.249653100515422, - "learning_rate": 2.924209678551993e-06, - "loss": 1.0528, - "step": 4066 - }, - { - "epoch": 0.3667763899535555, - "grad_norm": 1.5742777394490093, - "learning_rate": 2.923691546362757e-06, - "loss": 0.968, - "step": 4067 - }, - { - "epoch": 0.3668665734770257, - "grad_norm": 1.4886695175132212, - "learning_rate": 2.9231733353590663e-06, - "loss": 0.9243, - "step": 4068 - }, - { - "epoch": 0.366956757000496, - "grad_norm": 1.5138982788090265, - "learning_rate": 2.922655045585136e-06, - "loss": 1.025, - "step": 4069 - }, - { - "epoch": 0.36704694052396625, - "grad_norm": 0.8722199144015655, - "learning_rate": 2.92213667708519e-06, - "loss": 0.8826, - "step": 4070 - }, - { - "epoch": 0.36713712404743654, - "grad_norm": 1.6739965507672323, - "learning_rate": 2.921618229903457e-06, - "loss": 0.9719, - "step": 4071 - }, - { - "epoch": 0.36722730757090677, - "grad_norm": 1.463749672802531, - "learning_rate": 2.9210997040841752e-06, - "loss": 0.8796, - "step": 4072 - }, - { - "epoch": 0.36731749109437706, - "grad_norm": 24.402532277265177, - "learning_rate": 2.9205810996715885e-06, - "loss": 0.9261, - "step": 4073 - }, - { - "epoch": 0.36740767461784735, - "grad_norm": 1.4207021046703787, - "learning_rate": 2.9200624167099456e-06, - "loss": 0.9248, - "step": 4074 - }, - { - "epoch": 0.3674978581413176, - "grad_norm": 1.3612601702461704, - "learning_rate": 2.919543655243505e-06, - "loss": 0.8734, - "step": 4075 - }, - { - "epoch": 0.36758804166478787, - "grad_norm": 1.5451799603931609, - "learning_rate": 2.919024815316529e-06, - "loss": 0.9913, - "step": 4076 - }, - { - "epoch": 0.3676782251882581, - "grad_norm": 1.81637678225005, - "learning_rate": 2.9185058969732877e-06, - "loss": 1.0833, - "step": 4077 - }, - { - "epoch": 0.3677684087117284, - "grad_norm": 1.523174321843935, - "learning_rate": 2.917986900258059e-06, - "loss": 0.9929, - "step": 4078 - }, - { - "epoch": 0.3678585922351986, - "grad_norm": 1.3711138775426805, - "learning_rate": 2.917467825215126e-06, - "loss": 0.954, - "step": 4079 - }, - { - "epoch": 0.3679487757586689, - "grad_norm": 1.431033642003534, - "learning_rate": 2.9169486718887803e-06, - "loss": 0.9126, - "step": 4080 - }, - { - "epoch": 0.36803895928213914, - "grad_norm": 1.2928025522049884, - "learning_rate": 2.9164294403233173e-06, - "loss": 1.012, - "step": 4081 - }, - { - "epoch": 0.36812914280560943, - "grad_norm": 1.733973001166516, - "learning_rate": 2.915910130563041e-06, - "loss": 0.9002, - "step": 4082 - }, - { - "epoch": 0.36821932632907967, - "grad_norm": 1.490329004485492, - "learning_rate": 2.915390742652262e-06, - "loss": 0.8994, - "step": 4083 - }, - { - "epoch": 0.36830950985254995, - "grad_norm": 2.1862161411043055, - "learning_rate": 2.914871276635298e-06, - "loss": 0.8915, - "step": 4084 - }, - { - "epoch": 0.3683996933760202, - "grad_norm": 1.8386883843715873, - "learning_rate": 2.914351732556472e-06, - "loss": 0.9358, - "step": 4085 - }, - { - "epoch": 0.3684898768994905, - "grad_norm": 1.2557868995933372, - "learning_rate": 2.9138321104601144e-06, - "loss": 0.8796, - "step": 4086 - }, - { - "epoch": 0.3685800604229607, - "grad_norm": 1.4555932688417754, - "learning_rate": 2.9133124103905623e-06, - "loss": 0.8903, - "step": 4087 - }, - { - "epoch": 0.368670243946431, - "grad_norm": 1.806504494976889, - "learning_rate": 2.9127926323921596e-06, - "loss": 0.9427, - "step": 4088 - }, - { - "epoch": 0.36876042746990123, - "grad_norm": 1.6438774204545255, - "learning_rate": 2.912272776509256e-06, - "loss": 0.9786, - "step": 4089 - }, - { - "epoch": 0.3688506109933715, - "grad_norm": 1.612784468524186, - "learning_rate": 2.911752842786209e-06, - "loss": 0.9256, - "step": 4090 - }, - { - "epoch": 0.36894079451684175, - "grad_norm": 1.5341030711196049, - "learning_rate": 2.911232831267383e-06, - "loss": 0.9041, - "step": 4091 - }, - { - "epoch": 0.36903097804031204, - "grad_norm": 1.6159050320631763, - "learning_rate": 2.910712741997146e-06, - "loss": 0.9864, - "step": 4092 - }, - { - "epoch": 0.3691211615637823, - "grad_norm": 1.367210605971045, - "learning_rate": 2.910192575019877e-06, - "loss": 0.9489, - "step": 4093 - }, - { - "epoch": 0.36921134508725256, - "grad_norm": 1.2605711261696781, - "learning_rate": 2.9096723303799583e-06, - "loss": 0.9919, - "step": 4094 - }, - { - "epoch": 0.3693015286107228, - "grad_norm": 1.6414958277593243, - "learning_rate": 2.9091520081217805e-06, - "loss": 0.9665, - "step": 4095 - }, - { - "epoch": 0.3693917121341931, - "grad_norm": 1.507612620333314, - "learning_rate": 2.908631608289741e-06, - "loss": 0.9016, - "step": 4096 - }, - { - "epoch": 0.3694818956576633, - "grad_norm": 1.518329841673961, - "learning_rate": 2.9081111309282423e-06, - "loss": 0.9459, - "step": 4097 - }, - { - "epoch": 0.3695720791811336, - "grad_norm": 0.7524243748325968, - "learning_rate": 2.9075905760816942e-06, - "loss": 0.8529, - "step": 4098 - }, - { - "epoch": 0.3696622627046039, - "grad_norm": 1.540913819019105, - "learning_rate": 2.907069943794514e-06, - "loss": 0.9822, - "step": 4099 - }, - { - "epoch": 0.3697524462280741, - "grad_norm": 1.6487038202882534, - "learning_rate": 2.906549234111125e-06, - "loss": 0.9358, - "step": 4100 - }, - { - "epoch": 0.3698426297515444, - "grad_norm": 1.6047096406919121, - "learning_rate": 2.906028447075956e-06, - "loss": 1.0158, - "step": 4101 - }, - { - "epoch": 0.36993281327501465, - "grad_norm": 1.5676814570187854, - "learning_rate": 2.905507582733445e-06, - "loss": 1.0417, - "step": 4102 - }, - { - "epoch": 0.37002299679848494, - "grad_norm": 1.3540104653673848, - "learning_rate": 2.904986641128033e-06, - "loss": 1.0717, - "step": 4103 - }, - { - "epoch": 0.37011318032195517, - "grad_norm": 1.2824600832194621, - "learning_rate": 2.9044656223041716e-06, - "loss": 0.943, - "step": 4104 - }, - { - "epoch": 0.37020336384542546, - "grad_norm": 1.4716068647221938, - "learning_rate": 2.9039445263063157e-06, - "loss": 0.9074, - "step": 4105 - }, - { - "epoch": 0.3702935473688957, - "grad_norm": 1.806019279237669, - "learning_rate": 2.903423353178929e-06, - "loss": 0.8587, - "step": 4106 - }, - { - "epoch": 0.370383730892366, - "grad_norm": 1.4009739127905856, - "learning_rate": 2.9029021029664802e-06, - "loss": 0.9574, - "step": 4107 - }, - { - "epoch": 0.3704739144158362, - "grad_norm": 1.3215151844949218, - "learning_rate": 2.9023807757134455e-06, - "loss": 1.0088, - "step": 4108 - }, - { - "epoch": 0.3705640979393065, - "grad_norm": 1.4687899320328002, - "learning_rate": 2.901859371464307e-06, - "loss": 1.0201, - "step": 4109 - }, - { - "epoch": 0.37065428146277674, - "grad_norm": 2.5301296439144476, - "learning_rate": 2.9013378902635535e-06, - "loss": 0.9953, - "step": 4110 - }, - { - "epoch": 0.370744464986247, - "grad_norm": 1.5302273975255498, - "learning_rate": 2.9008163321556823e-06, - "loss": 0.9323, - "step": 4111 - }, - { - "epoch": 0.37083464850971726, - "grad_norm": 1.511167046074241, - "learning_rate": 2.900294697185194e-06, - "loss": 0.9648, - "step": 4112 - }, - { - "epoch": 0.37092483203318755, - "grad_norm": 1.2677726890175056, - "learning_rate": 2.899772985396599e-06, - "loss": 0.9795, - "step": 4113 - }, - { - "epoch": 0.3710150155566578, - "grad_norm": 1.4972534465817005, - "learning_rate": 2.8992511968344104e-06, - "loss": 0.9511, - "step": 4114 - }, - { - "epoch": 0.37110519908012807, - "grad_norm": 2.8939674641771322, - "learning_rate": 2.8987293315431523e-06, - "loss": 0.9013, - "step": 4115 - }, - { - "epoch": 0.3711953826035983, - "grad_norm": 1.2013573480090578, - "learning_rate": 2.898207389567351e-06, - "loss": 0.8934, - "step": 4116 - }, - { - "epoch": 0.3712855661270686, - "grad_norm": 1.4453796371125611, - "learning_rate": 2.897685370951543e-06, - "loss": 0.9507, - "step": 4117 - }, - { - "epoch": 0.3713757496505388, - "grad_norm": 2.104526514184953, - "learning_rate": 2.89716327574027e-06, - "loss": 0.8559, - "step": 4118 - }, - { - "epoch": 0.3714659331740091, - "grad_norm": 1.3184505628909318, - "learning_rate": 2.8966411039780787e-06, - "loss": 1.0385, - "step": 4119 - }, - { - "epoch": 0.37155611669747934, - "grad_norm": 1.4021727901373244, - "learning_rate": 2.8961188557095248e-06, - "loss": 0.9629, - "step": 4120 - }, - { - "epoch": 0.37164630022094963, - "grad_norm": 1.5367310190414043, - "learning_rate": 2.895596530979168e-06, - "loss": 0.8981, - "step": 4121 - }, - { - "epoch": 0.3717364837444199, - "grad_norm": 1.4776463642981645, - "learning_rate": 2.895074129831578e-06, - "loss": 0.9711, - "step": 4122 - }, - { - "epoch": 0.37182666726789015, - "grad_norm": 1.3960380641289432, - "learning_rate": 2.8945516523113275e-06, - "loss": 0.8587, - "step": 4123 - }, - { - "epoch": 0.37191685079136044, - "grad_norm": 0.7799657565938684, - "learning_rate": 2.894029098462998e-06, - "loss": 0.7792, - "step": 4124 - }, - { - "epoch": 0.3720070343148307, - "grad_norm": 1.3374256969027167, - "learning_rate": 2.8935064683311756e-06, - "loss": 0.9808, - "step": 4125 - }, - { - "epoch": 0.37209721783830096, - "grad_norm": 0.7124368974281706, - "learning_rate": 2.8929837619604544e-06, - "loss": 0.8828, - "step": 4126 - }, - { - "epoch": 0.3721874013617712, - "grad_norm": 1.5063591312113582, - "learning_rate": 2.8924609793954346e-06, - "loss": 1.0044, - "step": 4127 - }, - { - "epoch": 0.3722775848852415, - "grad_norm": 1.5968678961370013, - "learning_rate": 2.891938120680724e-06, - "loss": 0.9178, - "step": 4128 - }, - { - "epoch": 0.3723677684087117, - "grad_norm": 1.2138440597184557, - "learning_rate": 2.8914151858609343e-06, - "loss": 0.9445, - "step": 4129 - }, - { - "epoch": 0.372457951932182, - "grad_norm": 1.4952929871181866, - "learning_rate": 2.8908921749806858e-06, - "loss": 0.9804, - "step": 4130 - }, - { - "epoch": 0.37254813545565224, - "grad_norm": 1.3736150442379316, - "learning_rate": 2.890369088084605e-06, - "loss": 0.9657, - "step": 4131 - }, - { - "epoch": 0.37263831897912253, - "grad_norm": 1.342856398360501, - "learning_rate": 2.889845925217323e-06, - "loss": 0.9845, - "step": 4132 - }, - { - "epoch": 0.37272850250259276, - "grad_norm": 1.466676750292938, - "learning_rate": 2.8893226864234813e-06, - "loss": 0.9676, - "step": 4133 - }, - { - "epoch": 0.37281868602606305, - "grad_norm": 1.4144681907089554, - "learning_rate": 2.8887993717477236e-06, - "loss": 0.9747, - "step": 4134 - }, - { - "epoch": 0.3729088695495333, - "grad_norm": 1.513820793455783, - "learning_rate": 2.8882759812347035e-06, - "loss": 0.8437, - "step": 4135 - }, - { - "epoch": 0.3729990530730036, - "grad_norm": 1.4874462278247795, - "learning_rate": 2.887752514929078e-06, - "loss": 0.9556, - "step": 4136 - }, - { - "epoch": 0.3730892365964738, - "grad_norm": 1.5889651708512893, - "learning_rate": 2.887228972875513e-06, - "loss": 0.9071, - "step": 4137 - }, - { - "epoch": 0.3731794201199441, - "grad_norm": 1.5465056623866418, - "learning_rate": 2.88670535511868e-06, - "loss": 0.9628, - "step": 4138 - }, - { - "epoch": 0.3732696036434143, - "grad_norm": 1.5777609376874273, - "learning_rate": 2.886181661703257e-06, - "loss": 0.8947, - "step": 4139 - }, - { - "epoch": 0.3733597871668846, - "grad_norm": 1.5294573664339701, - "learning_rate": 2.8856578926739285e-06, - "loss": 0.9191, - "step": 4140 - }, - { - "epoch": 0.37344997069035485, - "grad_norm": 1.5651249008794101, - "learning_rate": 2.8851340480753846e-06, - "loss": 0.9191, - "step": 4141 - }, - { - "epoch": 0.37354015421382514, - "grad_norm": 1.5979196196142944, - "learning_rate": 2.8846101279523232e-06, - "loss": 0.9681, - "step": 4142 - }, - { - "epoch": 0.37363033773729537, - "grad_norm": 1.4456183857319116, - "learning_rate": 2.8840861323494487e-06, - "loss": 0.9352, - "step": 4143 - }, - { - "epoch": 0.37372052126076566, - "grad_norm": 1.5720561960188197, - "learning_rate": 2.88356206131147e-06, - "loss": 1.0194, - "step": 4144 - }, - { - "epoch": 0.37381070478423595, - "grad_norm": 1.6122190659522275, - "learning_rate": 2.883037914883104e-06, - "loss": 0.9485, - "step": 4145 - }, - { - "epoch": 0.3739008883077062, - "grad_norm": 2.4379546890679666, - "learning_rate": 2.882513693109075e-06, - "loss": 0.9259, - "step": 4146 - }, - { - "epoch": 0.37399107183117647, - "grad_norm": 1.262843478095909, - "learning_rate": 2.8819893960341106e-06, - "loss": 0.9383, - "step": 4147 - }, - { - "epoch": 0.3740812553546467, - "grad_norm": 1.7900417929304393, - "learning_rate": 2.881465023702948e-06, - "loss": 1.0081, - "step": 4148 - }, - { - "epoch": 0.374171438878117, - "grad_norm": 1.3493270910043909, - "learning_rate": 2.8809405761603294e-06, - "loss": 1.0402, - "step": 4149 - }, - { - "epoch": 0.3742616224015872, - "grad_norm": 1.7083150195869146, - "learning_rate": 2.880416053451003e-06, - "loss": 0.8805, - "step": 4150 - }, - { - "epoch": 0.3743518059250575, - "grad_norm": 1.5427415125144668, - "learning_rate": 2.879891455619725e-06, - "loss": 1.0492, - "step": 4151 - }, - { - "epoch": 0.37444198944852775, - "grad_norm": 1.2280881589843164, - "learning_rate": 2.879366782711256e-06, - "loss": 0.9602, - "step": 4152 - }, - { - "epoch": 0.37453217297199803, - "grad_norm": 1.2806092497646915, - "learning_rate": 2.8788420347703643e-06, - "loss": 0.981, - "step": 4153 - }, - { - "epoch": 0.37462235649546827, - "grad_norm": 0.7347699584508668, - "learning_rate": 2.8783172118418244e-06, - "loss": 0.8266, - "step": 4154 - }, - { - "epoch": 0.37471254001893856, - "grad_norm": 1.3340622081921585, - "learning_rate": 2.877792313970417e-06, - "loss": 0.9656, - "step": 4155 - }, - { - "epoch": 0.3748027235424088, - "grad_norm": 1.2528552823410972, - "learning_rate": 2.8772673412009293e-06, - "loss": 1.0107, - "step": 4156 - }, - { - "epoch": 0.3748929070658791, - "grad_norm": 1.5550837179628134, - "learning_rate": 2.8767422935781545e-06, - "loss": 1.0115, - "step": 4157 - }, - { - "epoch": 0.3749830905893493, - "grad_norm": 1.2708466273795436, - "learning_rate": 2.8762171711468935e-06, - "loss": 0.9966, - "step": 4158 - }, - { - "epoch": 0.3750732741128196, - "grad_norm": 1.3390124886965222, - "learning_rate": 2.875691973951952e-06, - "loss": 0.9589, - "step": 4159 - }, - { - "epoch": 0.37516345763628983, - "grad_norm": 0.6928516732008412, - "learning_rate": 2.8751667020381425e-06, - "loss": 0.8751, - "step": 4160 - }, - { - "epoch": 0.3752536411597601, - "grad_norm": 1.4630509862874097, - "learning_rate": 2.8746413554502837e-06, - "loss": 1.0249, - "step": 4161 - }, - { - "epoch": 0.37534382468323035, - "grad_norm": 1.3751108818428683, - "learning_rate": 2.8741159342332027e-06, - "loss": 0.9024, - "step": 4162 - }, - { - "epoch": 0.37543400820670064, - "grad_norm": 1.7293039366993903, - "learning_rate": 2.87359043843173e-06, - "loss": 1.0127, - "step": 4163 - }, - { - "epoch": 0.3755241917301709, - "grad_norm": 1.2896003756940262, - "learning_rate": 2.873064868090704e-06, - "loss": 0.9959, - "step": 4164 - }, - { - "epoch": 0.37561437525364116, - "grad_norm": 1.4241074025230325, - "learning_rate": 2.8725392232549697e-06, - "loss": 1.0528, - "step": 4165 - }, - { - "epoch": 0.3757045587771114, - "grad_norm": 1.766168696320112, - "learning_rate": 2.872013503969378e-06, - "loss": 1.0323, - "step": 4166 - }, - { - "epoch": 0.3757947423005817, - "grad_norm": 1.4006422766420428, - "learning_rate": 2.8714877102787853e-06, - "loss": 0.9748, - "step": 4167 - }, - { - "epoch": 0.3758849258240519, - "grad_norm": 1.4936402124895556, - "learning_rate": 2.8709618422280564e-06, - "loss": 0.9662, - "step": 4168 - }, - { - "epoch": 0.3759751093475222, - "grad_norm": 1.2433912574546617, - "learning_rate": 2.8704358998620605e-06, - "loss": 0.9406, - "step": 4169 - }, - { - "epoch": 0.3760652928709925, - "grad_norm": 1.4184897230258082, - "learning_rate": 2.8699098832256735e-06, - "loss": 0.9853, - "step": 4170 - }, - { - "epoch": 0.37615547639446273, - "grad_norm": 1.5447501027986486, - "learning_rate": 2.86938379236378e-06, - "loss": 0.8764, - "step": 4171 - }, - { - "epoch": 0.376245659917933, - "grad_norm": 2.2181735297274194, - "learning_rate": 2.868857627321266e-06, - "loss": 0.8885, - "step": 4172 - }, - { - "epoch": 0.37633584344140325, - "grad_norm": 1.2007379507921256, - "learning_rate": 2.8683313881430296e-06, - "loss": 0.9621, - "step": 4173 - }, - { - "epoch": 0.37642602696487354, - "grad_norm": 1.4014376207256447, - "learning_rate": 2.8678050748739706e-06, - "loss": 0.9548, - "step": 4174 - }, - { - "epoch": 0.37651621048834377, - "grad_norm": 1.5314520877687776, - "learning_rate": 2.8672786875589976e-06, - "loss": 0.9273, - "step": 4175 - }, - { - "epoch": 0.37660639401181406, - "grad_norm": 1.4201289764448013, - "learning_rate": 2.866752226243025e-06, - "loss": 0.9173, - "step": 4176 - }, - { - "epoch": 0.3766965775352843, - "grad_norm": 1.4795204688664498, - "learning_rate": 2.8662256909709733e-06, - "loss": 1.0038, - "step": 4177 - }, - { - "epoch": 0.3767867610587546, - "grad_norm": 1.4552563039764033, - "learning_rate": 2.865699081787769e-06, - "loss": 1.0001, - "step": 4178 - }, - { - "epoch": 0.3768769445822248, - "grad_norm": 1.3920412031685592, - "learning_rate": 2.8651723987383465e-06, - "loss": 0.9469, - "step": 4179 - }, - { - "epoch": 0.3769671281056951, - "grad_norm": 1.4061658198274314, - "learning_rate": 2.8646456418676437e-06, - "loss": 0.9295, - "step": 4180 - }, - { - "epoch": 0.37705731162916534, - "grad_norm": 0.764966019393091, - "learning_rate": 2.8641188112206067e-06, - "loss": 0.8266, - "step": 4181 - }, - { - "epoch": 0.3771474951526356, - "grad_norm": 1.8377784751540716, - "learning_rate": 2.863591906842189e-06, - "loss": 0.9912, - "step": 4182 - }, - { - "epoch": 0.37723767867610586, - "grad_norm": 1.3822461277643072, - "learning_rate": 2.863064928777347e-06, - "loss": 0.9905, - "step": 4183 - }, - { - "epoch": 0.37732786219957615, - "grad_norm": 1.7178036945707895, - "learning_rate": 2.862537877071047e-06, - "loss": 0.9433, - "step": 4184 - }, - { - "epoch": 0.3774180457230464, - "grad_norm": 0.7191246548961313, - "learning_rate": 2.8620107517682597e-06, - "loss": 0.7703, - "step": 4185 - }, - { - "epoch": 0.37750822924651667, - "grad_norm": 1.2949689623526603, - "learning_rate": 2.8614835529139618e-06, - "loss": 0.8991, - "step": 4186 - }, - { - "epoch": 0.3775984127699869, - "grad_norm": 1.475719069730453, - "learning_rate": 2.8609562805531367e-06, - "loss": 0.9159, - "step": 4187 - }, - { - "epoch": 0.3776885962934572, - "grad_norm": 6.143782485767369, - "learning_rate": 2.8604289347307746e-06, - "loss": 0.9966, - "step": 4188 - }, - { - "epoch": 0.3777787798169274, - "grad_norm": 1.4909240806485797, - "learning_rate": 2.859901515491871e-06, - "loss": 0.9587, - "step": 4189 - }, - { - "epoch": 0.3778689633403977, - "grad_norm": 1.4592659102842513, - "learning_rate": 2.8593740228814298e-06, - "loss": 0.8535, - "step": 4190 - }, - { - "epoch": 0.37795914686386795, - "grad_norm": 1.8160966506773368, - "learning_rate": 2.8588464569444574e-06, - "loss": 0.9488, - "step": 4191 - }, - { - "epoch": 0.37804933038733823, - "grad_norm": 1.3362971485409256, - "learning_rate": 2.8583188177259697e-06, - "loss": 0.9468, - "step": 4192 - }, - { - "epoch": 0.3781395139108085, - "grad_norm": 1.6139023441943658, - "learning_rate": 2.857791105270988e-06, - "loss": 0.9443, - "step": 4193 - }, - { - "epoch": 0.37822969743427876, - "grad_norm": 1.5244036618549344, - "learning_rate": 2.857263319624539e-06, - "loss": 0.984, - "step": 4194 - }, - { - "epoch": 0.37831988095774904, - "grad_norm": 1.4430008831484096, - "learning_rate": 2.856735460831657e-06, - "loss": 0.9413, - "step": 4195 - }, - { - "epoch": 0.3784100644812193, - "grad_norm": 1.3554235885088446, - "learning_rate": 2.856207528937382e-06, - "loss": 0.9761, - "step": 4196 - }, - { - "epoch": 0.37850024800468957, - "grad_norm": 1.404386514871991, - "learning_rate": 2.855679523986759e-06, - "loss": 0.9456, - "step": 4197 - }, - { - "epoch": 0.3785904315281598, - "grad_norm": 2.102987494090032, - "learning_rate": 2.8551514460248406e-06, - "loss": 1.008, - "step": 4198 - }, - { - "epoch": 0.3786806150516301, - "grad_norm": 1.3579747672623075, - "learning_rate": 2.8546232950966868e-06, - "loss": 0.964, - "step": 4199 - }, - { - "epoch": 0.3787707985751003, - "grad_norm": 1.887299568700693, - "learning_rate": 2.85409507124736e-06, - "loss": 0.9139, - "step": 4200 - }, - { - "epoch": 0.3788609820985706, - "grad_norm": 1.3745776308019844, - "learning_rate": 2.8535667745219324e-06, - "loss": 0.9549, - "step": 4201 - }, - { - "epoch": 0.37895116562204084, - "grad_norm": 1.4057743539669658, - "learning_rate": 2.853038404965481e-06, - "loss": 0.9493, - "step": 4202 - }, - { - "epoch": 0.37904134914551113, - "grad_norm": 2.4904022040566, - "learning_rate": 2.8525099626230894e-06, - "loss": 1.0378, - "step": 4203 - }, - { - "epoch": 0.37913153266898136, - "grad_norm": 1.6570895963951247, - "learning_rate": 2.8519814475398472e-06, - "loss": 0.8979, - "step": 4204 - }, - { - "epoch": 0.37922171619245165, - "grad_norm": 1.1313390686209732, - "learning_rate": 2.8514528597608502e-06, - "loss": 0.9357, - "step": 4205 - }, - { - "epoch": 0.3793118997159219, - "grad_norm": 1.2160259517203007, - "learning_rate": 2.8509241993312004e-06, - "loss": 0.9965, - "step": 4206 - }, - { - "epoch": 0.3794020832393922, - "grad_norm": 1.9209581374572007, - "learning_rate": 2.850395466296006e-06, - "loss": 1.0018, - "step": 4207 - }, - { - "epoch": 0.3794922667628624, - "grad_norm": 1.5305920958652475, - "learning_rate": 2.849866660700381e-06, - "loss": 0.9864, - "step": 4208 - }, - { - "epoch": 0.3795824502863327, - "grad_norm": 1.5271552474439303, - "learning_rate": 2.8493377825894464e-06, - "loss": 1.0195, - "step": 4209 - }, - { - "epoch": 0.37967263380980293, - "grad_norm": 1.759322539338457, - "learning_rate": 2.848808832008329e-06, - "loss": 1.0228, - "step": 4210 - }, - { - "epoch": 0.3797628173332732, - "grad_norm": 1.3564488717809, - "learning_rate": 2.848279809002162e-06, - "loss": 0.9953, - "step": 4211 - }, - { - "epoch": 0.37985300085674345, - "grad_norm": 1.457949678727388, - "learning_rate": 2.8477507136160842e-06, - "loss": 0.9704, - "step": 4212 - }, - { - "epoch": 0.37994318438021374, - "grad_norm": 0.7695255182507827, - "learning_rate": 2.847221545895241e-06, - "loss": 0.8174, - "step": 4213 - }, - { - "epoch": 0.38003336790368397, - "grad_norm": 1.2407397104267464, - "learning_rate": 2.846692305884785e-06, - "loss": 0.9571, - "step": 4214 - }, - { - "epoch": 0.38012355142715426, - "grad_norm": 1.323652973671641, - "learning_rate": 2.8461629936298718e-06, - "loss": 1.003, - "step": 4215 - }, - { - "epoch": 0.3802137349506245, - "grad_norm": 2.1194799195544483, - "learning_rate": 2.845633609175666e-06, - "loss": 1.0145, - "step": 4216 - }, - { - "epoch": 0.3803039184740948, - "grad_norm": 1.4873366674941266, - "learning_rate": 2.8451041525673383e-06, - "loss": 0.9949, - "step": 4217 - }, - { - "epoch": 0.38039410199756507, - "grad_norm": 1.7669313037646683, - "learning_rate": 2.8445746238500647e-06, - "loss": 0.9937, - "step": 4218 - }, - { - "epoch": 0.3804842855210353, - "grad_norm": 1.6190306254571631, - "learning_rate": 2.844045023069027e-06, - "loss": 0.9489, - "step": 4219 - }, - { - "epoch": 0.3805744690445056, - "grad_norm": 1.4460179437339165, - "learning_rate": 2.8435153502694136e-06, - "loss": 1.003, - "step": 4220 - }, - { - "epoch": 0.3806646525679758, - "grad_norm": 1.849381784295791, - "learning_rate": 2.84298560549642e-06, - "loss": 0.9025, - "step": 4221 - }, - { - "epoch": 0.3807548360914461, - "grad_norm": 1.7477851202852923, - "learning_rate": 2.8424557887952462e-06, - "loss": 0.9807, - "step": 4222 - }, - { - "epoch": 0.38084501961491635, - "grad_norm": 1.6189183357523267, - "learning_rate": 2.841925900211099e-06, - "loss": 0.8917, - "step": 4223 - }, - { - "epoch": 0.38093520313838664, - "grad_norm": 1.3120479713601902, - "learning_rate": 2.841395939789192e-06, - "loss": 1.0119, - "step": 4224 - }, - { - "epoch": 0.38102538666185687, - "grad_norm": 1.5113745009109372, - "learning_rate": 2.8408659075747435e-06, - "loss": 0.9883, - "step": 4225 - }, - { - "epoch": 0.38111557018532716, - "grad_norm": 1.4056982865486414, - "learning_rate": 2.8403358036129796e-06, - "loss": 0.9675, - "step": 4226 - }, - { - "epoch": 0.3812057537087974, - "grad_norm": 1.4013380944824156, - "learning_rate": 2.839805627949132e-06, - "loss": 1.0109, - "step": 4227 - }, - { - "epoch": 0.3812959372322677, - "grad_norm": 1.5958257882493472, - "learning_rate": 2.8392753806284367e-06, - "loss": 1.0205, - "step": 4228 - }, - { - "epoch": 0.3813861207557379, - "grad_norm": 1.3701200587160312, - "learning_rate": 2.838745061696139e-06, - "loss": 0.9148, - "step": 4229 - }, - { - "epoch": 0.3814763042792082, - "grad_norm": 1.4649093409794405, - "learning_rate": 2.838214671197487e-06, - "loss": 0.8991, - "step": 4230 - }, - { - "epoch": 0.38156648780267843, - "grad_norm": 1.289951780660005, - "learning_rate": 2.8376842091777377e-06, - "loss": 1.0664, - "step": 4231 - }, - { - "epoch": 0.3816566713261487, - "grad_norm": 1.335795389391291, - "learning_rate": 2.8371536756821524e-06, - "loss": 0.8703, - "step": 4232 - }, - { - "epoch": 0.38174685484961896, - "grad_norm": 1.3304969251497527, - "learning_rate": 2.836623070756e-06, - "loss": 0.9465, - "step": 4233 - }, - { - "epoch": 0.38183703837308924, - "grad_norm": 1.4381142837940812, - "learning_rate": 2.8360923944445542e-06, - "loss": 0.9893, - "step": 4234 - }, - { - "epoch": 0.3819272218965595, - "grad_norm": 1.3760256843110403, - "learning_rate": 2.8355616467930947e-06, - "loss": 0.9526, - "step": 4235 - }, - { - "epoch": 0.38201740542002977, - "grad_norm": 1.5869012920159233, - "learning_rate": 2.8350308278469085e-06, - "loss": 0.9402, - "step": 4236 - }, - { - "epoch": 0.3821075889435, - "grad_norm": 1.3314032542527736, - "learning_rate": 2.8344999376512877e-06, - "loss": 0.9999, - "step": 4237 - }, - { - "epoch": 0.3821977724669703, - "grad_norm": 1.6353250546723386, - "learning_rate": 2.8339689762515307e-06, - "loss": 0.9533, - "step": 4238 - }, - { - "epoch": 0.3822879559904405, - "grad_norm": 1.333504809836834, - "learning_rate": 2.8334379436929424e-06, - "loss": 0.8954, - "step": 4239 - }, - { - "epoch": 0.3823781395139108, - "grad_norm": 1.3510174837597626, - "learning_rate": 2.832906840020833e-06, - "loss": 0.9342, - "step": 4240 - }, - { - "epoch": 0.3824683230373811, - "grad_norm": 1.3216900833688376, - "learning_rate": 2.83237566528052e-06, - "loss": 0.9745, - "step": 4241 - }, - { - "epoch": 0.38255850656085133, - "grad_norm": 1.3033293547817557, - "learning_rate": 2.831844419517325e-06, - "loss": 0.9507, - "step": 4242 - }, - { - "epoch": 0.3826486900843216, - "grad_norm": 1.3502061244995778, - "learning_rate": 2.8313131027765774e-06, - "loss": 1.004, - "step": 4243 - }, - { - "epoch": 0.38273887360779185, - "grad_norm": 1.346003660795043, - "learning_rate": 2.8307817151036124e-06, - "loss": 0.9401, - "step": 4244 - }, - { - "epoch": 0.38282905713126214, - "grad_norm": 1.4564646419446177, - "learning_rate": 2.8302502565437704e-06, - "loss": 0.9426, - "step": 4245 - }, - { - "epoch": 0.3829192406547324, - "grad_norm": 1.68662359297011, - "learning_rate": 2.829718727142398e-06, - "loss": 0.896, - "step": 4246 - }, - { - "epoch": 0.38300942417820266, - "grad_norm": 1.3420745251396242, - "learning_rate": 2.829187126944849e-06, - "loss": 0.9821, - "step": 4247 - }, - { - "epoch": 0.3830996077016729, - "grad_norm": 1.6300086080873821, - "learning_rate": 2.8286554559964826e-06, - "loss": 0.9121, - "step": 4248 - }, - { - "epoch": 0.3831897912251432, - "grad_norm": 1.5366394798262517, - "learning_rate": 2.8281237143426637e-06, - "loss": 0.9886, - "step": 4249 - }, - { - "epoch": 0.3832799747486134, - "grad_norm": 1.3104278919768155, - "learning_rate": 2.8275919020287626e-06, - "loss": 0.8765, - "step": 4250 - }, - { - "epoch": 0.3833701582720837, - "grad_norm": 1.401469943967838, - "learning_rate": 2.827060019100158e-06, - "loss": 0.9867, - "step": 4251 - }, - { - "epoch": 0.38346034179555394, - "grad_norm": 1.5362889428165238, - "learning_rate": 2.8265280656022315e-06, - "loss": 0.956, - "step": 4252 - }, - { - "epoch": 0.3835505253190242, - "grad_norm": 1.4751627104934628, - "learning_rate": 2.825996041580373e-06, - "loss": 0.8534, - "step": 4253 - }, - { - "epoch": 0.38364070884249446, - "grad_norm": 1.5077365994497556, - "learning_rate": 2.825463947079978e-06, - "loss": 0.9826, - "step": 4254 - }, - { - "epoch": 0.38373089236596475, - "grad_norm": 1.2971374981423187, - "learning_rate": 2.8249317821464483e-06, - "loss": 0.966, - "step": 4255 - }, - { - "epoch": 0.383821075889435, - "grad_norm": 5.346249180013947, - "learning_rate": 2.824399546825189e-06, - "loss": 0.8975, - "step": 4256 - }, - { - "epoch": 0.38391125941290527, - "grad_norm": 1.2560039810767527, - "learning_rate": 2.823867241161616e-06, - "loss": 0.9227, - "step": 4257 - }, - { - "epoch": 0.3840014429363755, - "grad_norm": 1.5629406879761847, - "learning_rate": 2.8233348652011456e-06, - "loss": 1.0128, - "step": 4258 - }, - { - "epoch": 0.3840916264598458, - "grad_norm": 1.563159116962191, - "learning_rate": 2.8228024189892057e-06, - "loss": 0.8794, - "step": 4259 - }, - { - "epoch": 0.384181809983316, - "grad_norm": 1.2092386449785846, - "learning_rate": 2.822269902571226e-06, - "loss": 0.9682, - "step": 4260 - }, - { - "epoch": 0.3842719935067863, - "grad_norm": 1.3528680835001816, - "learning_rate": 2.8217373159926446e-06, - "loss": 0.9444, - "step": 4261 - }, - { - "epoch": 0.38436217703025655, - "grad_norm": 1.148205065398878, - "learning_rate": 2.8212046592989046e-06, - "loss": 0.9021, - "step": 4262 - }, - { - "epoch": 0.38445236055372684, - "grad_norm": 1.4254748707633307, - "learning_rate": 2.820671932535455e-06, - "loss": 0.972, - "step": 4263 - }, - { - "epoch": 0.3845425440771971, - "grad_norm": 1.7969913610846169, - "learning_rate": 2.8201391357477506e-06, - "loss": 0.9475, - "step": 4264 - }, - { - "epoch": 0.38463272760066736, - "grad_norm": 1.3132865456333083, - "learning_rate": 2.8196062689812525e-06, - "loss": 0.9363, - "step": 4265 - }, - { - "epoch": 0.38472291112413765, - "grad_norm": 1.3691532499027859, - "learning_rate": 2.819073332281429e-06, - "loss": 0.9084, - "step": 4266 - }, - { - "epoch": 0.3848130946476079, - "grad_norm": 0.948938901488038, - "learning_rate": 2.8185403256937524e-06, - "loss": 0.7692, - "step": 4267 - }, - { - "epoch": 0.38490327817107817, - "grad_norm": 1.5463732281293054, - "learning_rate": 2.8180072492637016e-06, - "loss": 0.922, - "step": 4268 - }, - { - "epoch": 0.3849934616945484, - "grad_norm": 1.431402959521055, - "learning_rate": 2.817474103036762e-06, - "loss": 0.9497, - "step": 4269 - }, - { - "epoch": 0.3850836452180187, - "grad_norm": 2.353347253118886, - "learning_rate": 2.816940887058425e-06, - "loss": 0.9831, - "step": 4270 - }, - { - "epoch": 0.3851738287414889, - "grad_norm": 1.4920967433951429, - "learning_rate": 2.816407601374186e-06, - "loss": 1.0077, - "step": 4271 - }, - { - "epoch": 0.3852640122649592, - "grad_norm": 1.435037826278789, - "learning_rate": 2.815874246029549e-06, - "loss": 0.9016, - "step": 4272 - }, - { - "epoch": 0.38535419578842944, - "grad_norm": 1.2792922158192828, - "learning_rate": 2.815340821070023e-06, - "loss": 0.9163, - "step": 4273 - }, - { - "epoch": 0.38544437931189973, - "grad_norm": 1.543745400734664, - "learning_rate": 2.814807326541122e-06, - "loss": 0.942, - "step": 4274 - }, - { - "epoch": 0.38553456283536996, - "grad_norm": 1.357000227833267, - "learning_rate": 2.8142737624883676e-06, - "loss": 0.8982, - "step": 4275 - }, - { - "epoch": 0.38562474635884025, - "grad_norm": 1.36654102845054, - "learning_rate": 2.8137401289572854e-06, - "loss": 0.9729, - "step": 4276 - }, - { - "epoch": 0.3857149298823105, - "grad_norm": 1.4430463190714273, - "learning_rate": 2.8132064259934086e-06, - "loss": 1.0037, - "step": 4277 - }, - { - "epoch": 0.3858051134057808, - "grad_norm": 1.5642374678236703, - "learning_rate": 2.812672653642276e-06, - "loss": 0.96, - "step": 4278 - }, - { - "epoch": 0.385895296929251, - "grad_norm": 1.3580989768093292, - "learning_rate": 2.812138811949431e-06, - "loss": 0.977, - "step": 4279 - }, - { - "epoch": 0.3859854804527213, - "grad_norm": 1.5421716876702507, - "learning_rate": 2.8116049009604247e-06, - "loss": 1.0289, - "step": 4280 - }, - { - "epoch": 0.38607566397619153, - "grad_norm": 1.5566272998272987, - "learning_rate": 2.8110709207208132e-06, - "loss": 0.8432, - "step": 4281 - }, - { - "epoch": 0.3861658474996618, - "grad_norm": 1.599255570442978, - "learning_rate": 2.810536871276158e-06, - "loss": 0.9927, - "step": 4282 - }, - { - "epoch": 0.38625603102313205, - "grad_norm": 0.7619312870178171, - "learning_rate": 2.8100027526720283e-06, - "loss": 0.876, - "step": 4283 - }, - { - "epoch": 0.38634621454660234, - "grad_norm": 1.3799187938321247, - "learning_rate": 2.8094685649539974e-06, - "loss": 0.8931, - "step": 4284 - }, - { - "epoch": 0.3864363980700726, - "grad_norm": 0.8277317768972469, - "learning_rate": 2.8089343081676455e-06, - "loss": 0.8552, - "step": 4285 - }, - { - "epoch": 0.38652658159354286, - "grad_norm": 1.5513472208915922, - "learning_rate": 2.8083999823585577e-06, - "loss": 0.9546, - "step": 4286 - }, - { - "epoch": 0.3866167651170131, - "grad_norm": 1.2290122479355368, - "learning_rate": 2.8078655875723254e-06, - "loss": 0.9922, - "step": 4287 - }, - { - "epoch": 0.3867069486404834, - "grad_norm": 2.0594873099606406, - "learning_rate": 2.807331123854547e-06, - "loss": 1.0361, - "step": 4288 - }, - { - "epoch": 0.38679713216395367, - "grad_norm": 1.489880761105402, - "learning_rate": 2.806796591250826e-06, - "loss": 0.8336, - "step": 4289 - }, - { - "epoch": 0.3868873156874239, - "grad_norm": 1.5886250541771647, - "learning_rate": 2.8062619898067707e-06, - "loss": 1.0338, - "step": 4290 - }, - { - "epoch": 0.3869774992108942, - "grad_norm": 1.4760388449904618, - "learning_rate": 2.8057273195679963e-06, - "loss": 0.9771, - "step": 4291 - }, - { - "epoch": 0.3870676827343644, - "grad_norm": 1.4606529407497604, - "learning_rate": 2.8051925805801253e-06, - "loss": 0.9719, - "step": 4292 - }, - { - "epoch": 0.3871578662578347, - "grad_norm": 1.3564272962435204, - "learning_rate": 2.804657772888783e-06, - "loss": 0.9961, - "step": 4293 - }, - { - "epoch": 0.38724804978130495, - "grad_norm": 1.533144499028426, - "learning_rate": 2.804122896539602e-06, - "loss": 0.9628, - "step": 4294 - }, - { - "epoch": 0.38733823330477524, - "grad_norm": 1.3662057021656302, - "learning_rate": 2.8035879515782225e-06, - "loss": 0.9101, - "step": 4295 - }, - { - "epoch": 0.38742841682824547, - "grad_norm": 1.59930744978067, - "learning_rate": 2.803052938050288e-06, - "loss": 1.0109, - "step": 4296 - }, - { - "epoch": 0.38751860035171576, - "grad_norm": 1.4283420923618053, - "learning_rate": 2.802517856001449e-06, - "loss": 1.0102, - "step": 4297 - }, - { - "epoch": 0.387608783875186, - "grad_norm": 1.498292825693719, - "learning_rate": 2.801982705477361e-06, - "loss": 0.9237, - "step": 4298 - }, - { - "epoch": 0.3876989673986563, - "grad_norm": 1.3272849174874384, - "learning_rate": 2.8014474865236867e-06, - "loss": 0.9461, - "step": 4299 - }, - { - "epoch": 0.3877891509221265, - "grad_norm": 1.36410261621727, - "learning_rate": 2.800912199186094e-06, - "loss": 0.9495, - "step": 4300 - }, - { - "epoch": 0.3878793344455968, - "grad_norm": 1.3954296050103594, - "learning_rate": 2.800376843510256e-06, - "loss": 0.9714, - "step": 4301 - }, - { - "epoch": 0.38796951796906703, - "grad_norm": 1.3891515748307002, - "learning_rate": 2.799841419541852e-06, - "loss": 0.9821, - "step": 4302 - }, - { - "epoch": 0.3880597014925373, - "grad_norm": 0.7471039409174558, - "learning_rate": 2.799305927326568e-06, - "loss": 0.8247, - "step": 4303 - }, - { - "epoch": 0.38814988501600756, - "grad_norm": 0.7635901522019884, - "learning_rate": 2.7987703669100955e-06, - "loss": 0.819, - "step": 4304 - }, - { - "epoch": 0.38824006853947785, - "grad_norm": 1.549336334398507, - "learning_rate": 2.79823473833813e-06, - "loss": 0.966, - "step": 4305 - }, - { - "epoch": 0.3883302520629481, - "grad_norm": 1.5446247726629192, - "learning_rate": 2.797699041656376e-06, - "loss": 0.9107, - "step": 4306 - }, - { - "epoch": 0.38842043558641837, - "grad_norm": 1.9531657710588315, - "learning_rate": 2.7971632769105412e-06, - "loss": 0.9785, - "step": 4307 - }, - { - "epoch": 0.3885106191098886, - "grad_norm": 1.7982102450204844, - "learning_rate": 2.79662744414634e-06, - "loss": 0.9259, - "step": 4308 - }, - { - "epoch": 0.3886008026333589, - "grad_norm": 1.4959374886523316, - "learning_rate": 2.7960915434094923e-06, - "loss": 0.9099, - "step": 4309 - }, - { - "epoch": 0.3886909861568291, - "grad_norm": 1.6161476514705455, - "learning_rate": 2.7955555747457256e-06, - "loss": 0.9369, - "step": 4310 - }, - { - "epoch": 0.3887811696802994, - "grad_norm": 1.36383535060355, - "learning_rate": 2.79501953820077e-06, - "loss": 0.959, - "step": 4311 - }, - { - "epoch": 0.3888713532037697, - "grad_norm": 1.4258631852527375, - "learning_rate": 2.7944834338203637e-06, - "loss": 0.9781, - "step": 4312 - }, - { - "epoch": 0.38896153672723993, - "grad_norm": 1.4214164078185862, - "learning_rate": 2.79394726165025e-06, - "loss": 0.9189, - "step": 4313 - }, - { - "epoch": 0.3890517202507102, - "grad_norm": 1.846442955332098, - "learning_rate": 2.793411021736178e-06, - "loss": 0.938, - "step": 4314 - }, - { - "epoch": 0.38914190377418045, - "grad_norm": 1.4628113341902382, - "learning_rate": 2.7928747141239027e-06, - "loss": 1.0578, - "step": 4315 - }, - { - "epoch": 0.38923208729765074, - "grad_norm": 0.7524355047094774, - "learning_rate": 2.7923383388591856e-06, - "loss": 0.8299, - "step": 4316 - }, - { - "epoch": 0.389322270821121, - "grad_norm": 1.4627092195497404, - "learning_rate": 2.7918018959877923e-06, - "loss": 0.924, - "step": 4317 - }, - { - "epoch": 0.38941245434459126, - "grad_norm": 1.2712421332972061, - "learning_rate": 2.791265385555495e-06, - "loss": 0.9826, - "step": 4318 - }, - { - "epoch": 0.3895026378680615, - "grad_norm": 1.4236165374817848, - "learning_rate": 2.790728807608072e-06, - "loss": 1.018, - "step": 4319 - }, - { - "epoch": 0.3895928213915318, - "grad_norm": 1.581064338489692, - "learning_rate": 2.790192162191307e-06, - "loss": 0.9535, - "step": 4320 - }, - { - "epoch": 0.389683004915002, - "grad_norm": 1.5940297947041075, - "learning_rate": 2.78965544935099e-06, - "loss": 0.984, - "step": 4321 - }, - { - "epoch": 0.3897731884384723, - "grad_norm": 1.5153926395961985, - "learning_rate": 2.789118669132916e-06, - "loss": 0.9406, - "step": 4322 - }, - { - "epoch": 0.38986337196194254, - "grad_norm": 1.7314497188677715, - "learning_rate": 2.7885818215828856e-06, - "loss": 1.0259, - "step": 4323 - }, - { - "epoch": 0.38995355548541283, - "grad_norm": 1.6286963457944514, - "learning_rate": 2.7880449067467064e-06, - "loss": 0.9167, - "step": 4324 - }, - { - "epoch": 0.39004373900888306, - "grad_norm": 1.5436956697239541, - "learning_rate": 2.78750792467019e-06, - "loss": 0.9102, - "step": 4325 - }, - { - "epoch": 0.39013392253235335, - "grad_norm": 1.5177112653499323, - "learning_rate": 2.786970875399156e-06, - "loss": 0.8238, - "step": 4326 - }, - { - "epoch": 0.3902241060558236, - "grad_norm": 1.2252476441882794, - "learning_rate": 2.7864337589794267e-06, - "loss": 0.8847, - "step": 4327 - }, - { - "epoch": 0.39031428957929387, - "grad_norm": 1.4461925404838047, - "learning_rate": 2.7858965754568335e-06, - "loss": 0.9799, - "step": 4328 - }, - { - "epoch": 0.3904044731027641, - "grad_norm": 1.4121649218090426, - "learning_rate": 2.785359324877211e-06, - "loss": 0.8958, - "step": 4329 - }, - { - "epoch": 0.3904946566262344, - "grad_norm": 1.4144751015309363, - "learning_rate": 2.7848220072864e-06, - "loss": 0.978, - "step": 4330 - }, - { - "epoch": 0.3905848401497046, - "grad_norm": 1.4456823752067811, - "learning_rate": 2.784284622730248e-06, - "loss": 0.9494, - "step": 4331 - }, - { - "epoch": 0.3906750236731749, - "grad_norm": 1.6217176592586389, - "learning_rate": 2.7837471712546073e-06, - "loss": 0.9716, - "step": 4332 - }, - { - "epoch": 0.39076520719664515, - "grad_norm": 1.2002004277218852, - "learning_rate": 2.783209652905337e-06, - "loss": 0.9348, - "step": 4333 - }, - { - "epoch": 0.39085539072011544, - "grad_norm": 1.3823715627654978, - "learning_rate": 2.7826720677283e-06, - "loss": 0.9252, - "step": 4334 - }, - { - "epoch": 0.39094557424358567, - "grad_norm": 1.3785020273585973, - "learning_rate": 2.782134415769367e-06, - "loss": 0.9274, - "step": 4335 - }, - { - "epoch": 0.39103575776705596, - "grad_norm": 1.2914822149348089, - "learning_rate": 2.7815966970744126e-06, - "loss": 1.006, - "step": 4336 - }, - { - "epoch": 0.39112594129052625, - "grad_norm": 1.6026567396141065, - "learning_rate": 2.7810589116893184e-06, - "loss": 1.0227, - "step": 4337 - }, - { - "epoch": 0.3912161248139965, - "grad_norm": 1.351271978476568, - "learning_rate": 2.780521059659972e-06, - "loss": 0.9557, - "step": 4338 - }, - { - "epoch": 0.39130630833746677, - "grad_norm": 1.4577347844088153, - "learning_rate": 2.7799831410322637e-06, - "loss": 0.9473, - "step": 4339 - }, - { - "epoch": 0.391396491860937, - "grad_norm": 1.2515777167872746, - "learning_rate": 2.779445155852094e-06, - "loss": 0.9122, - "step": 4340 - }, - { - "epoch": 0.3914866753844073, - "grad_norm": 0.7721806341936649, - "learning_rate": 2.7789071041653655e-06, - "loss": 0.8872, - "step": 4341 - }, - { - "epoch": 0.3915768589078775, - "grad_norm": 1.5031238452803841, - "learning_rate": 2.7783689860179875e-06, - "loss": 1.0056, - "step": 4342 - }, - { - "epoch": 0.3916670424313478, - "grad_norm": 1.5497142282201417, - "learning_rate": 2.7778308014558767e-06, - "loss": 0.9024, - "step": 4343 - }, - { - "epoch": 0.39175722595481804, - "grad_norm": 1.5152037516894918, - "learning_rate": 2.7772925505249524e-06, - "loss": 1.0315, - "step": 4344 - }, - { - "epoch": 0.39184740947828833, - "grad_norm": 1.1964379790125337, - "learning_rate": 2.7767542332711417e-06, - "loss": 0.9282, - "step": 4345 - }, - { - "epoch": 0.39193759300175857, - "grad_norm": 1.3411414532142394, - "learning_rate": 2.776215849740377e-06, - "loss": 0.9281, - "step": 4346 - }, - { - "epoch": 0.39202777652522885, - "grad_norm": 1.3689904232595786, - "learning_rate": 2.775677399978596e-06, - "loss": 0.97, - "step": 4347 - }, - { - "epoch": 0.3921179600486991, - "grad_norm": 0.7440128166597081, - "learning_rate": 2.775138884031742e-06, - "loss": 0.7979, - "step": 4348 - }, - { - "epoch": 0.3922081435721694, - "grad_norm": 1.2845638962292956, - "learning_rate": 2.774600301945764e-06, - "loss": 0.9195, - "step": 4349 - }, - { - "epoch": 0.3922983270956396, - "grad_norm": 1.3010173045011078, - "learning_rate": 2.774061653766618e-06, - "loss": 0.9796, - "step": 4350 - }, - { - "epoch": 0.3923885106191099, - "grad_norm": 1.4346034680424444, - "learning_rate": 2.773522939540263e-06, - "loss": 0.9438, - "step": 4351 - }, - { - "epoch": 0.39247869414258013, - "grad_norm": 1.5958445380127115, - "learning_rate": 2.7729841593126663e-06, - "loss": 0.8796, - "step": 4352 - }, - { - "epoch": 0.3925688776660504, - "grad_norm": 1.3003481582119873, - "learning_rate": 2.7724453131297988e-06, - "loss": 1.0197, - "step": 4353 - }, - { - "epoch": 0.39265906118952065, - "grad_norm": 1.371604802454264, - "learning_rate": 2.771906401037637e-06, - "loss": 0.9849, - "step": 4354 - }, - { - "epoch": 0.39274924471299094, - "grad_norm": 1.5138781991439276, - "learning_rate": 2.7713674230821664e-06, - "loss": 0.9162, - "step": 4355 - }, - { - "epoch": 0.3928394282364612, - "grad_norm": 1.3130322694059746, - "learning_rate": 2.7708283793093724e-06, - "loss": 0.9109, - "step": 4356 - }, - { - "epoch": 0.39292961175993146, - "grad_norm": 1.4773749482861438, - "learning_rate": 2.7702892697652514e-06, - "loss": 0.9368, - "step": 4357 - }, - { - "epoch": 0.3930197952834017, - "grad_norm": 1.361569660930526, - "learning_rate": 2.7697500944958024e-06, - "loss": 0.9314, - "step": 4358 - }, - { - "epoch": 0.393109978806872, - "grad_norm": 1.536683310724069, - "learning_rate": 2.7692108535470312e-06, - "loss": 1.0433, - "step": 4359 - }, - { - "epoch": 0.3932001623303423, - "grad_norm": 1.732421875, - "learning_rate": 2.768671546964948e-06, - "loss": 1.0018, - "step": 4360 - }, - { - "epoch": 0.3932903458538125, - "grad_norm": 1.56398763410748, - "learning_rate": 2.7681321747955713e-06, - "loss": 0.9811, - "step": 4361 - }, - { - "epoch": 0.3933805293772828, - "grad_norm": 1.2711124374244531, - "learning_rate": 2.767592737084921e-06, - "loss": 0.9728, - "step": 4362 - }, - { - "epoch": 0.39347071290075303, - "grad_norm": 1.5699468538072545, - "learning_rate": 2.767053233879026e-06, - "loss": 1.0459, - "step": 4363 - }, - { - "epoch": 0.3935608964242233, - "grad_norm": 1.4557566449660442, - "learning_rate": 2.76651366522392e-06, - "loss": 1.0297, - "step": 4364 - }, - { - "epoch": 0.39365107994769355, - "grad_norm": 1.6518858507357836, - "learning_rate": 2.7659740311656413e-06, - "loss": 0.9261, - "step": 4365 - }, - { - "epoch": 0.39374126347116384, - "grad_norm": 1.8867182445327997, - "learning_rate": 2.7654343317502352e-06, - "loss": 0.9856, - "step": 4366 - }, - { - "epoch": 0.39383144699463407, - "grad_norm": 1.3152488351949796, - "learning_rate": 2.7648945670237502e-06, - "loss": 0.9323, - "step": 4367 - }, - { - "epoch": 0.39392163051810436, - "grad_norm": 1.6434933973411276, - "learning_rate": 2.7643547370322446e-06, - "loss": 1.0051, - "step": 4368 - }, - { - "epoch": 0.3940118140415746, - "grad_norm": 1.6240445042182552, - "learning_rate": 2.7638148418217775e-06, - "loss": 0.9913, - "step": 4369 - }, - { - "epoch": 0.3941019975650449, - "grad_norm": 1.7092592552373032, - "learning_rate": 2.7632748814384163e-06, - "loss": 0.999, - "step": 4370 - }, - { - "epoch": 0.3941921810885151, - "grad_norm": 1.1878902898183976, - "learning_rate": 2.7627348559282335e-06, - "loss": 0.9849, - "step": 4371 - }, - { - "epoch": 0.3942823646119854, - "grad_norm": 1.3615563090570857, - "learning_rate": 2.7621947653373075e-06, - "loss": 0.8944, - "step": 4372 - }, - { - "epoch": 0.39437254813545564, - "grad_norm": 0.8393750679430788, - "learning_rate": 2.7616546097117213e-06, - "loss": 0.8168, - "step": 4373 - }, - { - "epoch": 0.3944627316589259, - "grad_norm": 1.6113531307245323, - "learning_rate": 2.761114389097564e-06, - "loss": 0.9591, - "step": 4374 - }, - { - "epoch": 0.39455291518239616, - "grad_norm": 3.60203698067583, - "learning_rate": 2.7605741035409305e-06, - "loss": 0.9958, - "step": 4375 - }, - { - "epoch": 0.39464309870586645, - "grad_norm": 1.4538381323827931, - "learning_rate": 2.76003375308792e-06, - "loss": 0.9361, - "step": 4376 - }, - { - "epoch": 0.3947332822293367, - "grad_norm": 1.2143986122556907, - "learning_rate": 2.75949333778464e-06, - "loss": 0.9267, - "step": 4377 - }, - { - "epoch": 0.39482346575280697, - "grad_norm": 1.7840254058696732, - "learning_rate": 2.7589528576772e-06, - "loss": 0.8388, - "step": 4378 - }, - { - "epoch": 0.3949136492762772, - "grad_norm": 1.5945411663130227, - "learning_rate": 2.758412312811717e-06, - "loss": 0.9452, - "step": 4379 - }, - { - "epoch": 0.3950038327997475, - "grad_norm": 1.098173515103557, - "learning_rate": 2.7578717032343146e-06, - "loss": 0.843, - "step": 4380 - }, - { - "epoch": 0.3950940163232177, - "grad_norm": 1.3708686752556052, - "learning_rate": 2.757331028991119e-06, - "loss": 0.986, - "step": 4381 - }, - { - "epoch": 0.395184199846688, - "grad_norm": 1.6886543987780842, - "learning_rate": 2.7567902901282642e-06, - "loss": 0.9736, - "step": 4382 - }, - { - "epoch": 0.3952743833701583, - "grad_norm": 1.9635236488008043, - "learning_rate": 2.7562494866918892e-06, - "loss": 0.9485, - "step": 4383 - }, - { - "epoch": 0.39536456689362853, - "grad_norm": 1.2999534378516315, - "learning_rate": 2.7557086187281378e-06, - "loss": 0.9309, - "step": 4384 - }, - { - "epoch": 0.3954547504170988, - "grad_norm": 1.7646869429338676, - "learning_rate": 2.75516768628316e-06, - "loss": 1.0497, - "step": 4385 - }, - { - "epoch": 0.39554493394056905, - "grad_norm": 1.3589330809919218, - "learning_rate": 2.7546266894031114e-06, - "loss": 1.031, - "step": 4386 - }, - { - "epoch": 0.39563511746403934, - "grad_norm": 1.7388802514286583, - "learning_rate": 2.7540856281341526e-06, - "loss": 0.903, - "step": 4387 - }, - { - "epoch": 0.3957253009875096, - "grad_norm": 1.4744227815512496, - "learning_rate": 2.7535445025224506e-06, - "loss": 0.9207, - "step": 4388 - }, - { - "epoch": 0.39581548451097986, - "grad_norm": 1.8174230867296637, - "learning_rate": 2.753003312614176e-06, - "loss": 0.9517, - "step": 4389 - }, - { - "epoch": 0.3959056680344501, - "grad_norm": 1.588092786393922, - "learning_rate": 2.7524620584555065e-06, - "loss": 0.9551, - "step": 4390 - }, - { - "epoch": 0.3959958515579204, - "grad_norm": 1.6469086703309492, - "learning_rate": 2.7519207400926253e-06, - "loss": 0.9882, - "step": 4391 - }, - { - "epoch": 0.3960860350813906, - "grad_norm": 1.3536702297586194, - "learning_rate": 2.751379357571721e-06, - "loss": 0.9372, - "step": 4392 - }, - { - "epoch": 0.3961762186048609, - "grad_norm": 1.6150920025628737, - "learning_rate": 2.7508379109389865e-06, - "loss": 1.0227, - "step": 4393 - }, - { - "epoch": 0.39626640212833114, - "grad_norm": 1.6972187768859912, - "learning_rate": 2.750296400240622e-06, - "loss": 0.9644, - "step": 4394 - }, - { - "epoch": 0.39635658565180143, - "grad_norm": 1.2429089640319977, - "learning_rate": 2.7497548255228305e-06, - "loss": 0.9855, - "step": 4395 - }, - { - "epoch": 0.39644676917527166, - "grad_norm": 1.536827672328161, - "learning_rate": 2.749213186831824e-06, - "loss": 0.9109, - "step": 4396 - }, - { - "epoch": 0.39653695269874195, - "grad_norm": 1.410527484246045, - "learning_rate": 2.7486714842138173e-06, - "loss": 0.9957, - "step": 4397 - }, - { - "epoch": 0.3966271362222122, - "grad_norm": 1.8342331787602528, - "learning_rate": 2.748129717715031e-06, - "loss": 0.9084, - "step": 4398 - }, - { - "epoch": 0.3967173197456825, - "grad_norm": 2.5370324103330493, - "learning_rate": 2.747587887381692e-06, - "loss": 0.9753, - "step": 4399 - }, - { - "epoch": 0.3968075032691527, - "grad_norm": 1.3336570416333358, - "learning_rate": 2.7470459932600328e-06, - "loss": 0.9094, - "step": 4400 - }, - { - "epoch": 0.396897686792623, - "grad_norm": 1.4075181541152937, - "learning_rate": 2.7465040353962897e-06, - "loss": 0.9552, - "step": 4401 - }, - { - "epoch": 0.3969878703160932, - "grad_norm": 1.6760440789255344, - "learning_rate": 2.745962013836706e-06, - "loss": 0.9441, - "step": 4402 - }, - { - "epoch": 0.3970780538395635, - "grad_norm": 1.6292799559844018, - "learning_rate": 2.74541992862753e-06, - "loss": 0.9311, - "step": 4403 - }, - { - "epoch": 0.39716823736303375, - "grad_norm": 1.6583735689556849, - "learning_rate": 2.744877779815016e-06, - "loss": 0.9571, - "step": 4404 - }, - { - "epoch": 0.39725842088650404, - "grad_norm": 1.2576472961736433, - "learning_rate": 2.7443355674454234e-06, - "loss": 0.9516, - "step": 4405 - }, - { - "epoch": 0.39734860440997427, - "grad_norm": 1.7509783326207589, - "learning_rate": 2.743793291565015e-06, - "loss": 0.9954, - "step": 4406 - }, - { - "epoch": 0.39743878793344456, - "grad_norm": 2.0206748926508564, - "learning_rate": 2.7432509522200617e-06, - "loss": 0.8802, - "step": 4407 - }, - { - "epoch": 0.39752897145691485, - "grad_norm": 1.82451579842767, - "learning_rate": 2.7427085494568383e-06, - "loss": 0.9002, - "step": 4408 - }, - { - "epoch": 0.3976191549803851, - "grad_norm": 0.7564587643838458, - "learning_rate": 2.742166083321628e-06, - "loss": 0.8335, - "step": 4409 - }, - { - "epoch": 0.39770933850385537, - "grad_norm": 1.4720515216176209, - "learning_rate": 2.7416235538607137e-06, - "loss": 0.9983, - "step": 4410 - }, - { - "epoch": 0.3977995220273256, - "grad_norm": 1.3863000801884948, - "learning_rate": 2.7410809611203894e-06, - "loss": 0.9368, - "step": 4411 - }, - { - "epoch": 0.3978897055507959, - "grad_norm": 1.3861157892887639, - "learning_rate": 2.7405383051469507e-06, - "loss": 0.9101, - "step": 4412 - }, - { - "epoch": 0.3979798890742661, - "grad_norm": 1.3875154167899664, - "learning_rate": 2.7399955859867e-06, - "loss": 0.7645, - "step": 4413 - }, - { - "epoch": 0.3980700725977364, - "grad_norm": 1.0151630818373591, - "learning_rate": 2.7394528036859465e-06, - "loss": 0.7747, - "step": 4414 - }, - { - "epoch": 0.39816025612120665, - "grad_norm": 1.3946795558339928, - "learning_rate": 2.738909958291002e-06, - "loss": 0.9531, - "step": 4415 - }, - { - "epoch": 0.39825043964467693, - "grad_norm": 1.311475990101811, - "learning_rate": 2.7383670498481863e-06, - "loss": 0.8665, - "step": 4416 - }, - { - "epoch": 0.39834062316814717, - "grad_norm": 0.7658998813363589, - "learning_rate": 2.737824078403822e-06, - "loss": 0.737, - "step": 4417 - }, - { - "epoch": 0.39843080669161746, - "grad_norm": 1.6897355565722139, - "learning_rate": 2.737281044004239e-06, - "loss": 0.9309, - "step": 4418 - }, - { - "epoch": 0.3985209902150877, - "grad_norm": 1.4089172922636424, - "learning_rate": 2.736737946695772e-06, - "loss": 0.942, - "step": 4419 - }, - { - "epoch": 0.398611173738558, - "grad_norm": 1.4577755088242523, - "learning_rate": 2.736194786524761e-06, - "loss": 0.9466, - "step": 4420 - }, - { - "epoch": 0.3987013572620282, - "grad_norm": 1.4309560848777836, - "learning_rate": 2.7356515635375517e-06, - "loss": 0.9544, - "step": 4421 - }, - { - "epoch": 0.3987915407854985, - "grad_norm": 1.893172636241934, - "learning_rate": 2.735108277780495e-06, - "loss": 0.8738, - "step": 4422 - }, - { - "epoch": 0.39888172430896873, - "grad_norm": 1.3255647553347336, - "learning_rate": 2.7345649292999456e-06, - "loss": 0.9451, - "step": 4423 - }, - { - "epoch": 0.398971907832439, - "grad_norm": 1.5125476230658776, - "learning_rate": 2.734021518142267e-06, - "loss": 0.9567, - "step": 4424 - }, - { - "epoch": 0.39906209135590925, - "grad_norm": 1.705445554805334, - "learning_rate": 2.733478044353825e-06, - "loss": 0.7904, - "step": 4425 - }, - { - "epoch": 0.39915227487937954, - "grad_norm": 1.534424818671369, - "learning_rate": 2.7329345079809917e-06, - "loss": 0.958, - "step": 4426 - }, - { - "epoch": 0.3992424584028498, - "grad_norm": 1.433130839186633, - "learning_rate": 2.7323909090701447e-06, - "loss": 0.8855, - "step": 4427 - }, - { - "epoch": 0.39933264192632006, - "grad_norm": 1.9066332447461714, - "learning_rate": 2.731847247667667e-06, - "loss": 0.9967, - "step": 4428 - }, - { - "epoch": 0.3994228254497903, - "grad_norm": 1.5484025137112, - "learning_rate": 2.731303523819947e-06, - "loss": 0.9299, - "step": 4429 - }, - { - "epoch": 0.3995130089732606, - "grad_norm": 1.4622309054091431, - "learning_rate": 2.7307597375733783e-06, - "loss": 1.0108, - "step": 4430 - }, - { - "epoch": 0.3996031924967309, - "grad_norm": 1.5851437691788952, - "learning_rate": 2.7302158889743587e-06, - "loss": 0.9574, - "step": 4431 - }, - { - "epoch": 0.3996933760202011, - "grad_norm": 1.44389645052506, - "learning_rate": 2.7296719780692937e-06, - "loss": 0.9107, - "step": 4432 - }, - { - "epoch": 0.3997835595436714, - "grad_norm": 1.4465449671528499, - "learning_rate": 2.7291280049045916e-06, - "loss": 0.8639, - "step": 4433 - }, - { - "epoch": 0.39987374306714163, - "grad_norm": 2.118245103480806, - "learning_rate": 2.7285839695266683e-06, - "loss": 0.8958, - "step": 4434 - }, - { - "epoch": 0.3999639265906119, - "grad_norm": 2.121952002677313, - "learning_rate": 2.7280398719819423e-06, - "loss": 0.9009, - "step": 4435 - }, - { - "epoch": 0.40005411011408215, - "grad_norm": 1.5923818063516384, - "learning_rate": 2.727495712316841e-06, - "loss": 1.02, - "step": 4436 - }, - { - "epoch": 0.40014429363755244, - "grad_norm": 1.7606517326321538, - "learning_rate": 2.7269514905777945e-06, - "loss": 0.9007, - "step": 4437 - }, - { - "epoch": 0.4002344771610227, - "grad_norm": 1.4454925424969955, - "learning_rate": 2.7264072068112377e-06, - "loss": 1.0766, - "step": 4438 - }, - { - "epoch": 0.40032466068449296, - "grad_norm": 3.0235143055388938, - "learning_rate": 2.7258628610636133e-06, - "loss": 0.921, - "step": 4439 - }, - { - "epoch": 0.4004148442079632, - "grad_norm": 1.4394385493665034, - "learning_rate": 2.7253184533813667e-06, - "loss": 1.0329, - "step": 4440 - }, - { - "epoch": 0.4005050277314335, - "grad_norm": 1.460182770171357, - "learning_rate": 2.72477398381095e-06, - "loss": 0.9602, - "step": 4441 - }, - { - "epoch": 0.4005952112549037, - "grad_norm": 1.2684568113373385, - "learning_rate": 2.724229452398821e-06, - "loss": 0.9641, - "step": 4442 - }, - { - "epoch": 0.400685394778374, - "grad_norm": 1.5476278919623996, - "learning_rate": 2.7236848591914422e-06, - "loss": 0.9691, - "step": 4443 - }, - { - "epoch": 0.40077557830184424, - "grad_norm": 1.5935858847697701, - "learning_rate": 2.7231402042352803e-06, - "loss": 0.9388, - "step": 4444 - }, - { - "epoch": 0.4008657618253145, - "grad_norm": 1.251090289506906, - "learning_rate": 2.722595487576809e-06, - "loss": 0.9878, - "step": 4445 - }, - { - "epoch": 0.40095594534878476, - "grad_norm": 1.4065958233631772, - "learning_rate": 2.722050709262506e-06, - "loss": 0.9989, - "step": 4446 - }, - { - "epoch": 0.40104612887225505, - "grad_norm": 1.3790708881246683, - "learning_rate": 2.7215058693388557e-06, - "loss": 0.9359, - "step": 4447 - }, - { - "epoch": 0.4011363123957253, - "grad_norm": 1.6031887822541082, - "learning_rate": 2.720960967852346e-06, - "loss": 0.858, - "step": 4448 - }, - { - "epoch": 0.40122649591919557, - "grad_norm": 1.468993876886558, - "learning_rate": 2.720416004849471e-06, - "loss": 0.8881, - "step": 4449 - }, - { - "epoch": 0.4013166794426658, - "grad_norm": 1.3146380312865795, - "learning_rate": 2.7198709803767304e-06, - "loss": 0.904, - "step": 4450 - }, - { - "epoch": 0.4014068629661361, - "grad_norm": 1.3258469286033185, - "learning_rate": 2.7193258944806286e-06, - "loss": 0.9198, - "step": 4451 - }, - { - "epoch": 0.4014970464896063, - "grad_norm": 1.5362744323674793, - "learning_rate": 2.718780747207675e-06, - "loss": 0.9562, - "step": 4452 - }, - { - "epoch": 0.4015872300130766, - "grad_norm": 1.6439694421299638, - "learning_rate": 2.7182355386043847e-06, - "loss": 1.0161, - "step": 4453 - }, - { - "epoch": 0.40167741353654685, - "grad_norm": 1.2486042813651896, - "learning_rate": 2.717690268717278e-06, - "loss": 1.0099, - "step": 4454 - }, - { - "epoch": 0.40176759706001713, - "grad_norm": 1.2402185153698073, - "learning_rate": 2.7171449375928803e-06, - "loss": 0.9783, - "step": 4455 - }, - { - "epoch": 0.4018577805834874, - "grad_norm": 1.7833181722240143, - "learning_rate": 2.716599545277722e-06, - "loss": 0.9736, - "step": 4456 - }, - { - "epoch": 0.40194796410695766, - "grad_norm": 1.361791589291287, - "learning_rate": 2.7160540918183394e-06, - "loss": 0.9159, - "step": 4457 - }, - { - "epoch": 0.40203814763042794, - "grad_norm": 1.3682628653095885, - "learning_rate": 2.715508577261273e-06, - "loss": 0.8759, - "step": 4458 - }, - { - "epoch": 0.4021283311538982, - "grad_norm": 2.0354742874324363, - "learning_rate": 2.7149630016530702e-06, - "loss": 0.9352, - "step": 4459 - }, - { - "epoch": 0.40221851467736847, - "grad_norm": 1.2473688089500161, - "learning_rate": 2.7144173650402815e-06, - "loss": 0.9673, - "step": 4460 - }, - { - "epoch": 0.4023086982008387, - "grad_norm": 1.3990228376290073, - "learning_rate": 2.7138716674694636e-06, - "loss": 0.9767, - "step": 4461 - }, - { - "epoch": 0.402398881724309, - "grad_norm": 1.4330867525662427, - "learning_rate": 2.7133259089871795e-06, - "loss": 0.9045, - "step": 4462 - }, - { - "epoch": 0.4024890652477792, - "grad_norm": 1.4971238218176892, - "learning_rate": 2.712780089639995e-06, - "loss": 0.9518, - "step": 4463 - }, - { - "epoch": 0.4025792487712495, - "grad_norm": 2.2374255290504923, - "learning_rate": 2.712234209474483e-06, - "loss": 0.825, - "step": 4464 - }, - { - "epoch": 0.40266943229471974, - "grad_norm": 1.4238729168998316, - "learning_rate": 2.7116882685372218e-06, - "loss": 0.9938, - "step": 4465 - }, - { - "epoch": 0.40275961581819003, - "grad_norm": 1.4546599126344897, - "learning_rate": 2.7111422668747927e-06, - "loss": 0.9277, - "step": 4466 - }, - { - "epoch": 0.40284979934166026, - "grad_norm": 1.3786810672159797, - "learning_rate": 2.7105962045337846e-06, - "loss": 0.9534, - "step": 4467 - }, - { - "epoch": 0.40293998286513055, - "grad_norm": 1.3352066182322782, - "learning_rate": 2.7100500815607898e-06, - "loss": 0.9453, - "step": 4468 - }, - { - "epoch": 0.4030301663886008, - "grad_norm": 1.2065114009838616, - "learning_rate": 2.709503898002407e-06, - "loss": 0.9783, - "step": 4469 - }, - { - "epoch": 0.4031203499120711, - "grad_norm": 1.7921771793351005, - "learning_rate": 2.708957653905239e-06, - "loss": 0.949, - "step": 4470 - }, - { - "epoch": 0.4032105334355413, - "grad_norm": 1.3124834241274492, - "learning_rate": 2.7084113493158956e-06, - "loss": 0.966, - "step": 4471 - }, - { - "epoch": 0.4033007169590116, - "grad_norm": 2.339375291396787, - "learning_rate": 2.7078649842809888e-06, - "loss": 0.9668, - "step": 4472 - }, - { - "epoch": 0.40339090048248183, - "grad_norm": 1.3193371121212851, - "learning_rate": 2.707318558847139e-06, - "loss": 0.9988, - "step": 4473 - }, - { - "epoch": 0.4034810840059521, - "grad_norm": 0.7883609636907133, - "learning_rate": 2.7067720730609697e-06, - "loss": 0.8178, - "step": 4474 - }, - { - "epoch": 0.40357126752942235, - "grad_norm": 1.4105147225775534, - "learning_rate": 2.70622552696911e-06, - "loss": 0.9278, - "step": 4475 - }, - { - "epoch": 0.40366145105289264, - "grad_norm": 1.3748930542710143, - "learning_rate": 2.7056789206181943e-06, - "loss": 0.9028, - "step": 4476 - }, - { - "epoch": 0.40375163457636287, - "grad_norm": 1.3743379039190324, - "learning_rate": 2.7051322540548615e-06, - "loss": 1.0037, - "step": 4477 - }, - { - "epoch": 0.40384181809983316, - "grad_norm": 1.5562968143637652, - "learning_rate": 2.704585527325757e-06, - "loss": 0.9164, - "step": 4478 - }, - { - "epoch": 0.40393200162330345, - "grad_norm": 1.515255597903383, - "learning_rate": 2.7040387404775303e-06, - "loss": 0.9128, - "step": 4479 - }, - { - "epoch": 0.4040221851467737, - "grad_norm": 1.4795126532722276, - "learning_rate": 2.703491893556837e-06, - "loss": 0.9714, - "step": 4480 - }, - { - "epoch": 0.40411236867024397, - "grad_norm": 1.1799179256312242, - "learning_rate": 2.702944986610335e-06, - "loss": 0.9262, - "step": 4481 - }, - { - "epoch": 0.4042025521937142, - "grad_norm": 1.8406640429152805, - "learning_rate": 2.7023980196846917e-06, - "loss": 0.9712, - "step": 4482 - }, - { - "epoch": 0.4042927357171845, - "grad_norm": 0.7536706506895741, - "learning_rate": 2.7018509928265763e-06, - "loss": 0.8384, - "step": 4483 - }, - { - "epoch": 0.4043829192406547, - "grad_norm": 1.5385206788873569, - "learning_rate": 2.7013039060826635e-06, - "loss": 0.9758, - "step": 4484 - }, - { - "epoch": 0.404473102764125, - "grad_norm": 1.5797760135895194, - "learning_rate": 2.7007567594996347e-06, - "loss": 1.0214, - "step": 4485 - }, - { - "epoch": 0.40456328628759525, - "grad_norm": 1.5453368737561732, - "learning_rate": 2.7002095531241757e-06, - "loss": 0.8568, - "step": 4486 - }, - { - "epoch": 0.40465346981106554, - "grad_norm": 2.2819126159693686, - "learning_rate": 2.6996622870029767e-06, - "loss": 0.9338, - "step": 4487 - }, - { - "epoch": 0.40474365333453577, - "grad_norm": 1.7453070795014305, - "learning_rate": 2.6991149611827335e-06, - "loss": 1.0653, - "step": 4488 - }, - { - "epoch": 0.40483383685800606, - "grad_norm": 1.7015671911253774, - "learning_rate": 2.6985675757101466e-06, - "loss": 0.9921, - "step": 4489 - }, - { - "epoch": 0.4049240203814763, - "grad_norm": 1.4214938983980843, - "learning_rate": 2.698020130631922e-06, - "loss": 0.9102, - "step": 4490 - }, - { - "epoch": 0.4050142039049466, - "grad_norm": 1.424303851825884, - "learning_rate": 2.6974726259947713e-06, - "loss": 0.8876, - "step": 4491 - }, - { - "epoch": 0.4051043874284168, - "grad_norm": 1.7783582537739322, - "learning_rate": 2.6969250618454106e-06, - "loss": 0.9358, - "step": 4492 - }, - { - "epoch": 0.4051945709518871, - "grad_norm": 1.212416895741807, - "learning_rate": 2.696377438230561e-06, - "loss": 1.0247, - "step": 4493 - }, - { - "epoch": 0.40528475447535733, - "grad_norm": 1.673563024833424, - "learning_rate": 2.6958297551969484e-06, - "loss": 0.8911, - "step": 4494 - }, - { - "epoch": 0.4053749379988276, - "grad_norm": 2.094382233318336, - "learning_rate": 2.695282012791304e-06, - "loss": 0.9169, - "step": 4495 - }, - { - "epoch": 0.40546512152229786, - "grad_norm": 1.501266580532983, - "learning_rate": 2.6947342110603646e-06, - "loss": 1.0038, - "step": 4496 - }, - { - "epoch": 0.40555530504576814, - "grad_norm": 1.3967575979714035, - "learning_rate": 2.6941863500508717e-06, - "loss": 1.0075, - "step": 4497 - }, - { - "epoch": 0.4056454885692384, - "grad_norm": 1.7106281854434697, - "learning_rate": 2.693638429809572e-06, - "loss": 0.9999, - "step": 4498 - }, - { - "epoch": 0.40573567209270867, - "grad_norm": 1.6171911119798508, - "learning_rate": 2.6930904503832167e-06, - "loss": 0.9388, - "step": 4499 - }, - { - "epoch": 0.4058258556161789, - "grad_norm": 1.3398748455387386, - "learning_rate": 2.692542411818562e-06, - "loss": 0.9305, - "step": 4500 - }, - { - "epoch": 0.4059160391396492, - "grad_norm": 1.7015007743463764, - "learning_rate": 2.69199431416237e-06, - "loss": 0.9633, - "step": 4501 - }, - { - "epoch": 0.4060062226631194, - "grad_norm": 1.4250518187085826, - "learning_rate": 2.691446157461408e-06, - "loss": 0.9124, - "step": 4502 - }, - { - "epoch": 0.4060964061865897, - "grad_norm": 1.4243102964440861, - "learning_rate": 2.690897941762447e-06, - "loss": 1.0117, - "step": 4503 - }, - { - "epoch": 0.40618658971006, - "grad_norm": 1.3332192948369785, - "learning_rate": 2.6903496671122642e-06, - "loss": 0.9315, - "step": 4504 - }, - { - "epoch": 0.40627677323353023, - "grad_norm": 0.7540949451913997, - "learning_rate": 2.689801333557641e-06, - "loss": 0.7639, - "step": 4505 - }, - { - "epoch": 0.4063669567570005, - "grad_norm": 1.91310431961801, - "learning_rate": 2.689252941145365e-06, - "loss": 0.9583, - "step": 4506 - }, - { - "epoch": 0.40645714028047075, - "grad_norm": 1.3675196216995709, - "learning_rate": 2.6887044899222277e-06, - "loss": 0.9489, - "step": 4507 - }, - { - "epoch": 0.40654732380394104, - "grad_norm": 1.4607642575935438, - "learning_rate": 2.688155979935025e-06, - "loss": 1.0019, - "step": 4508 - }, - { - "epoch": 0.4066375073274113, - "grad_norm": 1.3545024406633606, - "learning_rate": 2.68760741123056e-06, - "loss": 0.9107, - "step": 4509 - }, - { - "epoch": 0.40672769085088156, - "grad_norm": 1.6582927702091406, - "learning_rate": 2.6870587838556394e-06, - "loss": 0.967, - "step": 4510 - }, - { - "epoch": 0.4068178743743518, - "grad_norm": 1.5108838991995222, - "learning_rate": 2.686510097857075e-06, - "loss": 0.9664, - "step": 4511 - }, - { - "epoch": 0.4069080578978221, - "grad_norm": 1.3199446126125862, - "learning_rate": 2.685961353281683e-06, - "loss": 0.9495, - "step": 4512 - }, - { - "epoch": 0.4069982414212923, - "grad_norm": 1.5437999948908063, - "learning_rate": 2.6854125501762863e-06, - "loss": 0.9344, - "step": 4513 - }, - { - "epoch": 0.4070884249447626, - "grad_norm": 1.3437935134584664, - "learning_rate": 2.684863688587712e-06, - "loss": 0.9541, - "step": 4514 - }, - { - "epoch": 0.40717860846823284, - "grad_norm": 1.7545272305566797, - "learning_rate": 2.6843147685627916e-06, - "loss": 0.9228, - "step": 4515 - }, - { - "epoch": 0.4072687919917031, - "grad_norm": 1.6615334935263457, - "learning_rate": 2.683765790148361e-06, - "loss": 0.9263, - "step": 4516 - }, - { - "epoch": 0.40735897551517336, - "grad_norm": 1.5213956794790755, - "learning_rate": 2.6832167533912637e-06, - "loss": 1.0031, - "step": 4517 - }, - { - "epoch": 0.40744915903864365, - "grad_norm": 2.0201647593091607, - "learning_rate": 2.682667658338345e-06, - "loss": 1.0278, - "step": 4518 - }, - { - "epoch": 0.4075393425621139, - "grad_norm": 1.3796877713057358, - "learning_rate": 2.682118505036458e-06, - "loss": 0.9213, - "step": 4519 - }, - { - "epoch": 0.40762952608558417, - "grad_norm": 0.7244596506575837, - "learning_rate": 2.681569293532459e-06, - "loss": 0.781, - "step": 4520 - }, - { - "epoch": 0.4077197096090544, - "grad_norm": 0.7165750269370884, - "learning_rate": 2.6810200238732102e-06, - "loss": 0.8118, - "step": 4521 - }, - { - "epoch": 0.4078098931325247, - "grad_norm": 1.3581751647476135, - "learning_rate": 2.6804706961055776e-06, - "loss": 0.9973, - "step": 4522 - }, - { - "epoch": 0.4079000766559949, - "grad_norm": 1.403403049437738, - "learning_rate": 2.6799213102764326e-06, - "loss": 0.9821, - "step": 4523 - }, - { - "epoch": 0.4079902601794652, - "grad_norm": 2.2706864219063005, - "learning_rate": 2.679371866432653e-06, - "loss": 1.0461, - "step": 4524 - }, - { - "epoch": 0.40808044370293545, - "grad_norm": 1.5960052839158245, - "learning_rate": 2.6788223646211194e-06, - "loss": 0.9253, - "step": 4525 - }, - { - "epoch": 0.40817062722640574, - "grad_norm": 1.3067237360000592, - "learning_rate": 2.6782728048887183e-06, - "loss": 0.8416, - "step": 4526 - }, - { - "epoch": 0.408260810749876, - "grad_norm": 1.0845483361038557, - "learning_rate": 2.6777231872823416e-06, - "loss": 0.9363, - "step": 4527 - }, - { - "epoch": 0.40835099427334626, - "grad_norm": 1.516439710480793, - "learning_rate": 2.6771735118488864e-06, - "loss": 0.9502, - "step": 4528 - }, - { - "epoch": 0.40844117779681655, - "grad_norm": 1.435337762458749, - "learning_rate": 2.6766237786352523e-06, - "loss": 0.9894, - "step": 4529 - }, - { - "epoch": 0.4085313613202868, - "grad_norm": 1.4120108964488796, - "learning_rate": 2.676073987688347e-06, - "loss": 0.8847, - "step": 4530 - }, - { - "epoch": 0.40862154484375707, - "grad_norm": 1.2972134527681023, - "learning_rate": 2.6755241390550818e-06, - "loss": 0.9609, - "step": 4531 - }, - { - "epoch": 0.4087117283672273, - "grad_norm": 1.2789814567474371, - "learning_rate": 2.6749742327823716e-06, - "loss": 0.9515, - "step": 4532 - }, - { - "epoch": 0.4088019118906976, - "grad_norm": 1.384971743202474, - "learning_rate": 2.674424268917138e-06, - "loss": 0.9708, - "step": 4533 - }, - { - "epoch": 0.4088920954141678, - "grad_norm": 1.4507025759497318, - "learning_rate": 2.6738742475063074e-06, - "loss": 1.0207, - "step": 4534 - }, - { - "epoch": 0.4089822789376381, - "grad_norm": 1.701354830481005, - "learning_rate": 2.6733241685968104e-06, - "loss": 0.9905, - "step": 4535 - }, - { - "epoch": 0.40907246246110834, - "grad_norm": 2.457451376603733, - "learning_rate": 2.6727740322355826e-06, - "loss": 0.9652, - "step": 4536 - }, - { - "epoch": 0.40916264598457863, - "grad_norm": 1.6639708014640762, - "learning_rate": 2.6722238384695644e-06, - "loss": 0.9395, - "step": 4537 - }, - { - "epoch": 0.40925282950804887, - "grad_norm": 1.468863219097628, - "learning_rate": 2.671673587345702e-06, - "loss": 0.962, - "step": 4538 - }, - { - "epoch": 0.40934301303151915, - "grad_norm": 0.8306746908441187, - "learning_rate": 2.6711232789109455e-06, - "loss": 0.7977, - "step": 4539 - }, - { - "epoch": 0.4094331965549894, - "grad_norm": 1.5961737808276655, - "learning_rate": 2.6705729132122497e-06, - "loss": 0.9544, - "step": 4540 - }, - { - "epoch": 0.4095233800784597, - "grad_norm": 1.650036005147488, - "learning_rate": 2.670022490296576e-06, - "loss": 1.0272, - "step": 4541 - }, - { - "epoch": 0.4096135636019299, - "grad_norm": 1.4912970317576983, - "learning_rate": 2.669472010210889e-06, - "loss": 1.0102, - "step": 4542 - }, - { - "epoch": 0.4097037471254002, - "grad_norm": 1.7573668868769414, - "learning_rate": 2.668921473002159e-06, - "loss": 0.9285, - "step": 4543 - }, - { - "epoch": 0.40979393064887043, - "grad_norm": 1.370600815545294, - "learning_rate": 2.6683708787173596e-06, - "loss": 1.0079, - "step": 4544 - }, - { - "epoch": 0.4098841141723407, - "grad_norm": 1.4383278826230506, - "learning_rate": 2.6678202274034718e-06, - "loss": 0.9752, - "step": 4545 - }, - { - "epoch": 0.40997429769581095, - "grad_norm": 1.590233082631391, - "learning_rate": 2.66726951910748e-06, - "loss": 1.0352, - "step": 4546 - }, - { - "epoch": 0.41006448121928124, - "grad_norm": 1.4141924466443507, - "learning_rate": 2.6667187538763737e-06, - "loss": 0.9545, - "step": 4547 - }, - { - "epoch": 0.4101546647427515, - "grad_norm": 1.461870762369147, - "learning_rate": 2.6661679317571473e-06, - "loss": 0.9389, - "step": 4548 - }, - { - "epoch": 0.41024484826622176, - "grad_norm": 1.4232033194601321, - "learning_rate": 2.665617052796799e-06, - "loss": 0.9481, - "step": 4549 - }, - { - "epoch": 0.41033503178969205, - "grad_norm": 1.5210459399436518, - "learning_rate": 2.6650661170423346e-06, - "loss": 0.8412, - "step": 4550 - }, - { - "epoch": 0.4104252153131623, - "grad_norm": 1.6502101099734894, - "learning_rate": 2.6645151245407614e-06, - "loss": 1.0291, - "step": 4551 - }, - { - "epoch": 0.4105153988366326, - "grad_norm": 1.4845144909016212, - "learning_rate": 2.6639640753390936e-06, - "loss": 0.9517, - "step": 4552 - }, - { - "epoch": 0.4106055823601028, - "grad_norm": 1.4785642426936811, - "learning_rate": 2.66341296948435e-06, - "loss": 0.8643, - "step": 4553 - }, - { - "epoch": 0.4106957658835731, - "grad_norm": 1.184645534809559, - "learning_rate": 2.6628618070235534e-06, - "loss": 0.961, - "step": 4554 - }, - { - "epoch": 0.4107859494070433, - "grad_norm": 1.5400536922904113, - "learning_rate": 2.662310588003733e-06, - "loss": 0.9619, - "step": 4555 - }, - { - "epoch": 0.4108761329305136, - "grad_norm": 1.6312349720697485, - "learning_rate": 2.6617593124719205e-06, - "loss": 0.9335, - "step": 4556 - }, - { - "epoch": 0.41096631645398385, - "grad_norm": 1.3342774992080484, - "learning_rate": 2.661207980475155e-06, - "loss": 0.9604, - "step": 4557 - }, - { - "epoch": 0.41105649997745414, - "grad_norm": 1.7520331425079518, - "learning_rate": 2.6606565920604793e-06, - "loss": 0.911, - "step": 4558 - }, - { - "epoch": 0.41114668350092437, - "grad_norm": 1.3437471611525245, - "learning_rate": 2.66010514727494e-06, - "loss": 1.03, - "step": 4559 - }, - { - "epoch": 0.41123686702439466, - "grad_norm": 1.78700384443368, - "learning_rate": 2.659553646165589e-06, - "loss": 0.8466, - "step": 4560 - }, - { - "epoch": 0.4113270505478649, - "grad_norm": 1.3907441934659446, - "learning_rate": 2.659002088779485e-06, - "loss": 0.9294, - "step": 4561 - }, - { - "epoch": 0.4114172340713352, - "grad_norm": 0.7542393398220684, - "learning_rate": 2.6584504751636888e-06, - "loss": 0.7505, - "step": 4562 - }, - { - "epoch": 0.4115074175948054, - "grad_norm": 1.5558839727550462, - "learning_rate": 2.657898805365268e-06, - "loss": 0.9239, - "step": 4563 - }, - { - "epoch": 0.4115976011182757, - "grad_norm": 1.3536206489540263, - "learning_rate": 2.657347079431293e-06, - "loss": 0.9253, - "step": 4564 - }, - { - "epoch": 0.41168778464174594, - "grad_norm": 1.2172865762407732, - "learning_rate": 2.6567952974088403e-06, - "loss": 0.922, - "step": 4565 - }, - { - "epoch": 0.4117779681652162, - "grad_norm": 1.2889905042483514, - "learning_rate": 2.6562434593449917e-06, - "loss": 1.0087, - "step": 4566 - }, - { - "epoch": 0.41186815168868646, - "grad_norm": 1.682205583793018, - "learning_rate": 2.6556915652868325e-06, - "loss": 0.9749, - "step": 4567 - }, - { - "epoch": 0.41195833521215675, - "grad_norm": 1.7379523834141057, - "learning_rate": 2.6551396152814534e-06, - "loss": 0.9564, - "step": 4568 - }, - { - "epoch": 0.412048518735627, - "grad_norm": 1.408233176663349, - "learning_rate": 2.65458760937595e-06, - "loss": 0.9417, - "step": 4569 - }, - { - "epoch": 0.41213870225909727, - "grad_norm": 1.1451357654389878, - "learning_rate": 2.654035547617423e-06, - "loss": 0.9011, - "step": 4570 - }, - { - "epoch": 0.4122288857825675, - "grad_norm": 1.5795758065450471, - "learning_rate": 2.653483430052976e-06, - "loss": 0.8598, - "step": 4571 - }, - { - "epoch": 0.4123190693060378, - "grad_norm": 1.5987659881245708, - "learning_rate": 2.6529312567297197e-06, - "loss": 0.9351, - "step": 4572 - }, - { - "epoch": 0.412409252829508, - "grad_norm": 1.534534901291349, - "learning_rate": 2.652379027694768e-06, - "loss": 1.0237, - "step": 4573 - }, - { - "epoch": 0.4124994363529783, - "grad_norm": 1.6473753341024935, - "learning_rate": 2.651826742995241e-06, - "loss": 1.0064, - "step": 4574 - }, - { - "epoch": 0.4125896198764486, - "grad_norm": 1.3087584890037458, - "learning_rate": 2.651274402678262e-06, - "loss": 0.9453, - "step": 4575 - }, - { - "epoch": 0.41267980339991883, - "grad_norm": 1.3229549983120736, - "learning_rate": 2.6507220067909597e-06, - "loss": 0.9774, - "step": 4576 - }, - { - "epoch": 0.4127699869233891, - "grad_norm": 1.7178294404200614, - "learning_rate": 2.650169555380468e-06, - "loss": 0.9104, - "step": 4577 - }, - { - "epoch": 0.41286017044685935, - "grad_norm": 1.3150668249084967, - "learning_rate": 2.6496170484939253e-06, - "loss": 0.9466, - "step": 4578 - }, - { - "epoch": 0.41295035397032964, - "grad_norm": 1.4752441656750093, - "learning_rate": 2.6490644861784735e-06, - "loss": 0.9647, - "step": 4579 - }, - { - "epoch": 0.4130405374937999, - "grad_norm": 1.3847063963884976, - "learning_rate": 2.648511868481261e-06, - "loss": 0.9731, - "step": 4580 - }, - { - "epoch": 0.41313072101727016, - "grad_norm": 2.772226590433631, - "learning_rate": 2.6479591954494397e-06, - "loss": 0.9478, - "step": 4581 - }, - { - "epoch": 0.4132209045407404, - "grad_norm": 1.4011608437071204, - "learning_rate": 2.647406467130167e-06, - "loss": 0.8519, - "step": 4582 - }, - { - "epoch": 0.4133110880642107, - "grad_norm": 1.539979922671541, - "learning_rate": 2.646853683570605e-06, - "loss": 0.9716, - "step": 4583 - }, - { - "epoch": 0.4134012715876809, - "grad_norm": 1.3721905964430063, - "learning_rate": 2.6463008448179196e-06, - "loss": 0.8944, - "step": 4584 - }, - { - "epoch": 0.4134914551111512, - "grad_norm": 1.1485581626563173, - "learning_rate": 2.6457479509192828e-06, - "loss": 0.9066, - "step": 4585 - }, - { - "epoch": 0.41358163863462144, - "grad_norm": 1.612250564563266, - "learning_rate": 2.645195001921871e-06, - "loss": 0.9397, - "step": 4586 - }, - { - "epoch": 0.41367182215809173, - "grad_norm": 1.5584436788947058, - "learning_rate": 2.644641997872863e-06, - "loss": 0.9226, - "step": 4587 - }, - { - "epoch": 0.41376200568156196, - "grad_norm": 1.7864038961437223, - "learning_rate": 2.644088938819445e-06, - "loss": 0.9571, - "step": 4588 - }, - { - "epoch": 0.41385218920503225, - "grad_norm": 1.5224395613426775, - "learning_rate": 2.6435358248088077e-06, - "loss": 1.0269, - "step": 4589 - }, - { - "epoch": 0.4139423727285025, - "grad_norm": 1.569714332638295, - "learning_rate": 2.642982655888146e-06, - "loss": 0.9956, - "step": 4590 - }, - { - "epoch": 0.41403255625197277, - "grad_norm": 1.5471435467897505, - "learning_rate": 2.6424294321046585e-06, - "loss": 0.9179, - "step": 4591 - }, - { - "epoch": 0.414122739775443, - "grad_norm": 0.8680402669619905, - "learning_rate": 2.641876153505549e-06, - "loss": 0.7941, - "step": 4592 - }, - { - "epoch": 0.4142129232989133, - "grad_norm": 1.9197691879018959, - "learning_rate": 2.641322820138027e-06, - "loss": 0.9618, - "step": 4593 - }, - { - "epoch": 0.4143031068223835, - "grad_norm": 1.6553578403075793, - "learning_rate": 2.640769432049306e-06, - "loss": 0.9093, - "step": 4594 - }, - { - "epoch": 0.4143932903458538, - "grad_norm": 1.3597276164514924, - "learning_rate": 2.6402159892866038e-06, - "loss": 0.8754, - "step": 4595 - }, - { - "epoch": 0.41448347386932405, - "grad_norm": 1.1825068049809666, - "learning_rate": 2.639662491897143e-06, - "loss": 0.9417, - "step": 4596 - }, - { - "epoch": 0.41457365739279434, - "grad_norm": 1.3850705519929691, - "learning_rate": 2.639108939928152e-06, - "loss": 0.964, - "step": 4597 - }, - { - "epoch": 0.4146638409162646, - "grad_norm": 1.6124822748673782, - "learning_rate": 2.638555333426862e-06, - "loss": 0.959, - "step": 4598 - }, - { - "epoch": 0.41475402443973486, - "grad_norm": 1.254746295316205, - "learning_rate": 2.6380016724405093e-06, - "loss": 0.9875, - "step": 4599 - }, - { - "epoch": 0.41484420796320515, - "grad_norm": 1.4399764249779188, - "learning_rate": 2.637447957016336e-06, - "loss": 0.9083, - "step": 4600 - }, - { - "epoch": 0.4149343914866754, - "grad_norm": 1.599548189625573, - "learning_rate": 2.636894187201589e-06, - "loss": 1.0107, - "step": 4601 - }, - { - "epoch": 0.41502457501014567, - "grad_norm": 1.4680454918259709, - "learning_rate": 2.6363403630435176e-06, - "loss": 0.8553, - "step": 4602 - }, - { - "epoch": 0.4151147585336159, - "grad_norm": 1.5478339257423832, - "learning_rate": 2.635786484589378e-06, - "loss": 0.9419, - "step": 4603 - }, - { - "epoch": 0.4152049420570862, - "grad_norm": 1.6359800433408327, - "learning_rate": 2.63523255188643e-06, - "loss": 0.8623, - "step": 4604 - }, - { - "epoch": 0.4152951255805564, - "grad_norm": 1.2458196834892383, - "learning_rate": 2.6346785649819375e-06, - "loss": 0.9755, - "step": 4605 - }, - { - "epoch": 0.4153853091040267, - "grad_norm": 1.4299661333568734, - "learning_rate": 2.6341245239231706e-06, - "loss": 0.9776, - "step": 4606 - }, - { - "epoch": 0.41547549262749695, - "grad_norm": 1.0359208647944373, - "learning_rate": 2.6335704287574024e-06, - "loss": 0.9595, - "step": 4607 - }, - { - "epoch": 0.41556567615096723, - "grad_norm": 1.6195790763598175, - "learning_rate": 2.6330162795319124e-06, - "loss": 1.0149, - "step": 4608 - }, - { - "epoch": 0.41565585967443747, - "grad_norm": 1.6112417116980384, - "learning_rate": 2.632462076293983e-06, - "loss": 1.0076, - "step": 4609 - }, - { - "epoch": 0.41574604319790776, - "grad_norm": 1.326372493800128, - "learning_rate": 2.6319078190909017e-06, - "loss": 0.9531, - "step": 4610 - }, - { - "epoch": 0.415836226721378, - "grad_norm": 1.683405181874796, - "learning_rate": 2.6313535079699606e-06, - "loss": 0.8704, - "step": 4611 - }, - { - "epoch": 0.4159264102448483, - "grad_norm": 1.539675750161103, - "learning_rate": 2.6307991429784572e-06, - "loss": 0.9339, - "step": 4612 - }, - { - "epoch": 0.4160165937683185, - "grad_norm": 1.514909398356711, - "learning_rate": 2.6302447241636924e-06, - "loss": 1.0028, - "step": 4613 - }, - { - "epoch": 0.4161067772917888, - "grad_norm": 1.456459321789394, - "learning_rate": 2.629690251572973e-06, - "loss": 1.0316, - "step": 4614 - }, - { - "epoch": 0.41619696081525903, - "grad_norm": 1.7574685671560746, - "learning_rate": 2.629135725253609e-06, - "loss": 0.9261, - "step": 4615 - }, - { - "epoch": 0.4162871443387293, - "grad_norm": 1.5473991816007362, - "learning_rate": 2.6285811452529162e-06, - "loss": 0.9435, - "step": 4616 - }, - { - "epoch": 0.41637732786219955, - "grad_norm": 1.2582345100569603, - "learning_rate": 2.6280265116182136e-06, - "loss": 0.9682, - "step": 4617 - }, - { - "epoch": 0.41646751138566984, - "grad_norm": 1.5027478797807525, - "learning_rate": 2.6274718243968266e-06, - "loss": 0.9528, - "step": 4618 - }, - { - "epoch": 0.4165576949091401, - "grad_norm": 1.500622540670125, - "learning_rate": 2.626917083636084e-06, - "loss": 0.8738, - "step": 4619 - }, - { - "epoch": 0.41664787843261036, - "grad_norm": 1.3005193498221315, - "learning_rate": 2.6263622893833183e-06, - "loss": 0.8822, - "step": 4620 - }, - { - "epoch": 0.4167380619560806, - "grad_norm": 1.4635033743884274, - "learning_rate": 2.625807441685869e-06, - "loss": 0.8935, - "step": 4621 - }, - { - "epoch": 0.4168282454795509, - "grad_norm": 0.7089918151969499, - "learning_rate": 2.625252540591078e-06, - "loss": 0.7725, - "step": 4622 - }, - { - "epoch": 0.4169184290030212, - "grad_norm": 1.5110659902667778, - "learning_rate": 2.6246975861462927e-06, - "loss": 1.0208, - "step": 4623 - }, - { - "epoch": 0.4170086125264914, - "grad_norm": 1.43958231166393, - "learning_rate": 2.624142578398864e-06, - "loss": 0.9896, - "step": 4624 - }, - { - "epoch": 0.4170987960499617, - "grad_norm": 1.2586395672083874, - "learning_rate": 2.6235875173961498e-06, - "loss": 0.9386, - "step": 4625 - }, - { - "epoch": 0.41718897957343193, - "grad_norm": 1.2997133837702362, - "learning_rate": 2.62303240318551e-06, - "loss": 0.8816, - "step": 4626 - }, - { - "epoch": 0.4172791630969022, - "grad_norm": 1.7532823978152086, - "learning_rate": 2.62247723581431e-06, - "loss": 1.0217, - "step": 4627 - }, - { - "epoch": 0.41736934662037245, - "grad_norm": 1.7399520559392327, - "learning_rate": 2.62192201532992e-06, - "loss": 0.9536, - "step": 4628 - }, - { - "epoch": 0.41745953014384274, - "grad_norm": 1.6745828108997516, - "learning_rate": 2.6213667417797145e-06, - "loss": 0.9689, - "step": 4629 - }, - { - "epoch": 0.41754971366731297, - "grad_norm": 1.3001891383717432, - "learning_rate": 2.6208114152110725e-06, - "loss": 0.9784, - "step": 4630 - }, - { - "epoch": 0.41763989719078326, - "grad_norm": 1.535760736238216, - "learning_rate": 2.6202560356713774e-06, - "loss": 0.9689, - "step": 4631 - }, - { - "epoch": 0.4177300807142535, - "grad_norm": 0.7170656204548078, - "learning_rate": 2.619700603208017e-06, - "loss": 0.8691, - "step": 4632 - }, - { - "epoch": 0.4178202642377238, - "grad_norm": 1.2245804145953862, - "learning_rate": 2.6191451178683842e-06, - "loss": 0.8707, - "step": 4633 - }, - { - "epoch": 0.417910447761194, - "grad_norm": 1.4483230990072768, - "learning_rate": 2.6185895796998764e-06, - "loss": 0.9735, - "step": 4634 - }, - { - "epoch": 0.4180006312846643, - "grad_norm": 2.1953618054266304, - "learning_rate": 2.6180339887498946e-06, - "loss": 1.0371, - "step": 4635 - }, - { - "epoch": 0.41809081480813454, - "grad_norm": 1.6838357490392082, - "learning_rate": 2.617478345065846e-06, - "loss": 0.9013, - "step": 4636 - }, - { - "epoch": 0.4181809983316048, - "grad_norm": 1.6507880640859645, - "learning_rate": 2.616922648695139e-06, - "loss": 0.9644, - "step": 4637 - }, - { - "epoch": 0.41827118185507506, - "grad_norm": 3.7791824362922446, - "learning_rate": 2.61636689968519e-06, - "loss": 0.8454, - "step": 4638 - }, - { - "epoch": 0.41836136537854535, - "grad_norm": 2.0107287887712277, - "learning_rate": 2.6158110980834186e-06, - "loss": 1.0241, - "step": 4639 - }, - { - "epoch": 0.4184515489020156, - "grad_norm": 1.5696605638596148, - "learning_rate": 2.615255243937249e-06, - "loss": 1.0051, - "step": 4640 - }, - { - "epoch": 0.41854173242548587, - "grad_norm": 2.117655234383403, - "learning_rate": 2.61469933729411e-06, - "loss": 0.974, - "step": 4641 - }, - { - "epoch": 0.4186319159489561, - "grad_norm": 1.5214966760023039, - "learning_rate": 2.614143378201433e-06, - "loss": 0.9258, - "step": 4642 - }, - { - "epoch": 0.4187220994724264, - "grad_norm": 1.285713932816896, - "learning_rate": 2.6135873667066567e-06, - "loss": 0.9282, - "step": 4643 - }, - { - "epoch": 0.4188122829958966, - "grad_norm": 1.4795330381891239, - "learning_rate": 2.613031302857224e-06, - "loss": 0.9087, - "step": 4644 - }, - { - "epoch": 0.4189024665193669, - "grad_norm": 1.529730529491677, - "learning_rate": 2.6124751867005792e-06, - "loss": 0.9735, - "step": 4645 - }, - { - "epoch": 0.4189926500428372, - "grad_norm": 1.2585422933941393, - "learning_rate": 2.611919018284175e-06, - "loss": 0.9998, - "step": 4646 - }, - { - "epoch": 0.41908283356630743, - "grad_norm": 1.6981141457265185, - "learning_rate": 2.611362797655466e-06, - "loss": 0.9973, - "step": 4647 - }, - { - "epoch": 0.4191730170897777, - "grad_norm": 1.2694412434289717, - "learning_rate": 2.6108065248619124e-06, - "loss": 1.019, - "step": 4648 - }, - { - "epoch": 0.41926320061324795, - "grad_norm": 1.2120008660398354, - "learning_rate": 2.610250199950978e-06, - "loss": 0.8882, - "step": 4649 - }, - { - "epoch": 0.41935338413671824, - "grad_norm": 1.5962183667949095, - "learning_rate": 2.609693822970131e-06, - "loss": 0.9271, - "step": 4650 - }, - { - "epoch": 0.4194435676601885, - "grad_norm": 1.8937207562797882, - "learning_rate": 2.609137393966846e-06, - "loss": 0.9132, - "step": 4651 - }, - { - "epoch": 0.41953375118365877, - "grad_norm": 1.4126427198567348, - "learning_rate": 2.6085809129886e-06, - "loss": 0.9076, - "step": 4652 - }, - { - "epoch": 0.419623934707129, - "grad_norm": 1.7558525857592602, - "learning_rate": 2.608024380082874e-06, - "loss": 0.9682, - "step": 4653 - }, - { - "epoch": 0.4197141182305993, - "grad_norm": 1.5121401808056223, - "learning_rate": 2.6074677952971554e-06, - "loss": 0.959, - "step": 4654 - }, - { - "epoch": 0.4198043017540695, - "grad_norm": 2.783261032840562, - "learning_rate": 2.606911158678935e-06, - "loss": 0.8797, - "step": 4655 - }, - { - "epoch": 0.4198944852775398, - "grad_norm": 1.545743200685421, - "learning_rate": 2.606354470275708e-06, - "loss": 0.9503, - "step": 4656 - }, - { - "epoch": 0.41998466880101004, - "grad_norm": 1.6792213680296264, - "learning_rate": 2.6057977301349744e-06, - "loss": 0.9229, - "step": 4657 - }, - { - "epoch": 0.42007485232448033, - "grad_norm": 1.4447776010823796, - "learning_rate": 2.6052409383042383e-06, - "loss": 0.9177, - "step": 4658 - }, - { - "epoch": 0.42016503584795056, - "grad_norm": 1.340594867999011, - "learning_rate": 2.6046840948310074e-06, - "loss": 0.9281, - "step": 4659 - }, - { - "epoch": 0.42025521937142085, - "grad_norm": 1.3563051309235528, - "learning_rate": 2.6041271997627962e-06, - "loss": 0.9545, - "step": 4660 - }, - { - "epoch": 0.4203454028948911, - "grad_norm": 1.6726495250012594, - "learning_rate": 2.6035702531471202e-06, - "loss": 0.9393, - "step": 4661 - }, - { - "epoch": 0.4204355864183614, - "grad_norm": 1.2794336143972447, - "learning_rate": 2.6030132550315035e-06, - "loss": 0.9542, - "step": 4662 - }, - { - "epoch": 0.4205257699418316, - "grad_norm": 0.6898033614124012, - "learning_rate": 2.60245620546347e-06, - "loss": 0.8323, - "step": 4663 - }, - { - "epoch": 0.4206159534653019, - "grad_norm": 1.4947847461429473, - "learning_rate": 2.6018991044905517e-06, - "loss": 0.9221, - "step": 4664 - }, - { - "epoch": 0.42070613698877213, - "grad_norm": 1.3405251508044413, - "learning_rate": 2.6013419521602825e-06, - "loss": 0.9912, - "step": 4665 - }, - { - "epoch": 0.4207963205122424, - "grad_norm": 1.595021582071785, - "learning_rate": 2.600784748520202e-06, - "loss": 1.0799, - "step": 4666 - }, - { - "epoch": 0.42088650403571265, - "grad_norm": 1.597955950768454, - "learning_rate": 2.6002274936178544e-06, - "loss": 0.8804, - "step": 4667 - }, - { - "epoch": 0.42097668755918294, - "grad_norm": 1.5008073859096076, - "learning_rate": 2.5996701875007873e-06, - "loss": 0.9616, - "step": 4668 - }, - { - "epoch": 0.4210668710826532, - "grad_norm": 1.3916102573056224, - "learning_rate": 2.5991128302165533e-06, - "loss": 0.9418, - "step": 4669 - }, - { - "epoch": 0.42115705460612346, - "grad_norm": 1.352024197962868, - "learning_rate": 2.5985554218127094e-06, - "loss": 1.0221, - "step": 4670 - }, - { - "epoch": 0.42124723812959375, - "grad_norm": 1.5976034216319326, - "learning_rate": 2.597997962336816e-06, - "loss": 0.8826, - "step": 4671 - }, - { - "epoch": 0.421337421653064, - "grad_norm": 1.7955198402296548, - "learning_rate": 2.5974404518364393e-06, - "loss": 0.9437, - "step": 4672 - }, - { - "epoch": 0.42142760517653427, - "grad_norm": 1.552436387360573, - "learning_rate": 2.596882890359149e-06, - "loss": 1.0581, - "step": 4673 - }, - { - "epoch": 0.4215177887000045, - "grad_norm": 1.7736680473337079, - "learning_rate": 2.5963252779525196e-06, - "loss": 0.872, - "step": 4674 - }, - { - "epoch": 0.4216079722234748, - "grad_norm": 1.6545444559976497, - "learning_rate": 2.595767614664129e-06, - "loss": 0.9508, - "step": 4675 - }, - { - "epoch": 0.421698155746945, - "grad_norm": 1.5256043502991636, - "learning_rate": 2.5952099005415607e-06, - "loss": 0.9232, - "step": 4676 - }, - { - "epoch": 0.4217883392704153, - "grad_norm": 1.5918706397066882, - "learning_rate": 2.594652135632402e-06, - "loss": 0.9095, - "step": 4677 - }, - { - "epoch": 0.42187852279388555, - "grad_norm": 1.2180634545582771, - "learning_rate": 2.594094319984244e-06, - "loss": 0.9042, - "step": 4678 - }, - { - "epoch": 0.42196870631735584, - "grad_norm": 1.3970905402217888, - "learning_rate": 2.5935364536446825e-06, - "loss": 1.0064, - "step": 4679 - }, - { - "epoch": 0.42205888984082607, - "grad_norm": 2.0397786603789645, - "learning_rate": 2.5929785366613185e-06, - "loss": 0.9463, - "step": 4680 - }, - { - "epoch": 0.42214907336429636, - "grad_norm": 1.5383162649963575, - "learning_rate": 2.592420569081756e-06, - "loss": 0.9914, - "step": 4681 - }, - { - "epoch": 0.4222392568877666, - "grad_norm": 1.4999175049031506, - "learning_rate": 2.5918625509536037e-06, - "loss": 0.9091, - "step": 4682 - }, - { - "epoch": 0.4223294404112369, - "grad_norm": 0.7502978448735416, - "learning_rate": 2.591304482324475e-06, - "loss": 0.7956, - "step": 4683 - }, - { - "epoch": 0.4224196239347071, - "grad_norm": 1.4363898261177093, - "learning_rate": 2.5907463632419878e-06, - "loss": 0.9632, - "step": 4684 - }, - { - "epoch": 0.4225098074581774, - "grad_norm": 1.4351430727250416, - "learning_rate": 2.5901881937537632e-06, - "loss": 0.918, - "step": 4685 - }, - { - "epoch": 0.42259999098164763, - "grad_norm": 0.8678791964777139, - "learning_rate": 2.589629973907428e-06, - "loss": 0.8562, - "step": 4686 - }, - { - "epoch": 0.4226901745051179, - "grad_norm": 1.5063258140130362, - "learning_rate": 2.589071703750612e-06, - "loss": 0.9025, - "step": 4687 - }, - { - "epoch": 0.42278035802858815, - "grad_norm": 1.3172018663521157, - "learning_rate": 2.5885133833309504e-06, - "loss": 0.9923, - "step": 4688 - }, - { - "epoch": 0.42287054155205844, - "grad_norm": 1.4305086539685734, - "learning_rate": 2.5879550126960814e-06, - "loss": 1.031, - "step": 4689 - }, - { - "epoch": 0.4229607250755287, - "grad_norm": 1.424382775466673, - "learning_rate": 2.5873965918936494e-06, - "loss": 1.0079, - "step": 4690 - }, - { - "epoch": 0.42305090859899896, - "grad_norm": 1.7168258300006005, - "learning_rate": 2.586838120971301e-06, - "loss": 0.9699, - "step": 4691 - }, - { - "epoch": 0.4231410921224692, - "grad_norm": 1.2974475837513022, - "learning_rate": 2.586279599976689e-06, - "loss": 1.0052, - "step": 4692 - }, - { - "epoch": 0.4232312756459395, - "grad_norm": 1.4217888942259547, - "learning_rate": 2.585721028957468e-06, - "loss": 0.9511, - "step": 4693 - }, - { - "epoch": 0.4233214591694098, - "grad_norm": 1.2943811452766691, - "learning_rate": 2.585162407961299e-06, - "loss": 0.9624, - "step": 4694 - }, - { - "epoch": 0.42341164269288, - "grad_norm": 1.9237170192179218, - "learning_rate": 2.584603737035847e-06, - "loss": 0.8873, - "step": 4695 - }, - { - "epoch": 0.4235018262163503, - "grad_norm": 1.6926761643703052, - "learning_rate": 2.5840450162287806e-06, - "loss": 0.9454, - "step": 4696 - }, - { - "epoch": 0.42359200973982053, - "grad_norm": 1.4825436186525087, - "learning_rate": 2.583486245587774e-06, - "loss": 0.8678, - "step": 4697 - }, - { - "epoch": 0.4236821932632908, - "grad_norm": 1.4041140122922287, - "learning_rate": 2.5829274251605023e-06, - "loss": 1.0524, - "step": 4698 - }, - { - "epoch": 0.42377237678676105, - "grad_norm": 1.3672720310781936, - "learning_rate": 2.582368554994649e-06, - "loss": 0.9275, - "step": 4699 - }, - { - "epoch": 0.42386256031023134, - "grad_norm": 0.8383722943782304, - "learning_rate": 2.5818096351378994e-06, - "loss": 0.854, - "step": 4700 - }, - { - "epoch": 0.4239527438337016, - "grad_norm": 1.4966659845946753, - "learning_rate": 2.5812506656379435e-06, - "loss": 1.003, - "step": 4701 - }, - { - "epoch": 0.42404292735717186, - "grad_norm": 1.4354861707243796, - "learning_rate": 2.580691646542476e-06, - "loss": 0.8968, - "step": 4702 - }, - { - "epoch": 0.4241331108806421, - "grad_norm": 1.4977846475642371, - "learning_rate": 2.5801325778991958e-06, - "loss": 0.9954, - "step": 4703 - }, - { - "epoch": 0.4242232944041124, - "grad_norm": 2.0249235962947587, - "learning_rate": 2.5795734597558043e-06, - "loss": 0.8036, - "step": 4704 - }, - { - "epoch": 0.4243134779275826, - "grad_norm": 1.5377775143397723, - "learning_rate": 2.579014292160011e-06, - "loss": 0.9529, - "step": 4705 - }, - { - "epoch": 0.4244036614510529, - "grad_norm": 1.186703314313522, - "learning_rate": 2.5784550751595236e-06, - "loss": 0.9382, - "step": 4706 - }, - { - "epoch": 0.42449384497452314, - "grad_norm": 1.2683551211413546, - "learning_rate": 2.577895808802061e-06, - "loss": 1.0109, - "step": 4707 - }, - { - "epoch": 0.4245840284979934, - "grad_norm": 1.5562922950726952, - "learning_rate": 2.577336493135341e-06, - "loss": 0.8717, - "step": 4708 - }, - { - "epoch": 0.42467421202146366, - "grad_norm": 1.1334085409137122, - "learning_rate": 2.576777128207088e-06, - "loss": 1.0401, - "step": 4709 - }, - { - "epoch": 0.42476439554493395, - "grad_norm": 1.3349859853430361, - "learning_rate": 2.5762177140650306e-06, - "loss": 0.9156, - "step": 4710 - }, - { - "epoch": 0.4248545790684042, - "grad_norm": 1.6331677210140292, - "learning_rate": 2.5756582507569003e-06, - "loss": 0.964, - "step": 4711 - }, - { - "epoch": 0.42494476259187447, - "grad_norm": 1.2946197939918436, - "learning_rate": 2.5750987383304335e-06, - "loss": 0.9718, - "step": 4712 - }, - { - "epoch": 0.4250349461153447, - "grad_norm": 1.7155581574195167, - "learning_rate": 2.574539176833372e-06, - "loss": 0.937, - "step": 4713 - }, - { - "epoch": 0.425125129638815, - "grad_norm": 1.383929862623887, - "learning_rate": 2.5739795663134594e-06, - "loss": 0.8922, - "step": 4714 - }, - { - "epoch": 0.4252153131622852, - "grad_norm": 1.3520123830008128, - "learning_rate": 2.5734199068184454e-06, - "loss": 1.041, - "step": 4715 - }, - { - "epoch": 0.4253054966857555, - "grad_norm": 1.8534363226210682, - "learning_rate": 2.572860198396083e-06, - "loss": 0.8724, - "step": 4716 - }, - { - "epoch": 0.4253956802092258, - "grad_norm": 1.3102267879270737, - "learning_rate": 2.57230044109413e-06, - "loss": 0.9814, - "step": 4717 - }, - { - "epoch": 0.42548586373269603, - "grad_norm": 1.5630704220010438, - "learning_rate": 2.5717406349603483e-06, - "loss": 0.9312, - "step": 4718 - }, - { - "epoch": 0.4255760472561663, - "grad_norm": 1.4185229182739978, - "learning_rate": 2.5711807800425026e-06, - "loss": 0.9773, - "step": 4719 - }, - { - "epoch": 0.42566623077963656, - "grad_norm": 1.8277159665267673, - "learning_rate": 2.5706208763883633e-06, - "loss": 0.795, - "step": 4720 - }, - { - "epoch": 0.42575641430310684, - "grad_norm": 1.5134941144361889, - "learning_rate": 2.570060924045704e-06, - "loss": 0.9044, - "step": 4721 - }, - { - "epoch": 0.4258465978265771, - "grad_norm": 1.417126870672198, - "learning_rate": 2.569500923062304e-06, - "loss": 0.9771, - "step": 4722 - }, - { - "epoch": 0.42593678135004737, - "grad_norm": 1.3525295436262514, - "learning_rate": 2.5689408734859445e-06, - "loss": 0.8696, - "step": 4723 - }, - { - "epoch": 0.4260269648735176, - "grad_norm": 1.5155281743848004, - "learning_rate": 2.5683807753644127e-06, - "loss": 0.9065, - "step": 4724 - }, - { - "epoch": 0.4261171483969879, - "grad_norm": 1.3863567040862772, - "learning_rate": 2.5678206287454996e-06, - "loss": 0.9274, - "step": 4725 - }, - { - "epoch": 0.4262073319204581, - "grad_norm": 1.5914154157341827, - "learning_rate": 2.567260433676999e-06, - "loss": 0.9755, - "step": 4726 - }, - { - "epoch": 0.4262975154439284, - "grad_norm": 1.4926934308277366, - "learning_rate": 2.5667001902067107e-06, - "loss": 0.9292, - "step": 4727 - }, - { - "epoch": 0.42638769896739864, - "grad_norm": 1.5288315962867485, - "learning_rate": 2.566139898382437e-06, - "loss": 0.9974, - "step": 4728 - }, - { - "epoch": 0.42647788249086893, - "grad_norm": 1.378008715163245, - "learning_rate": 2.5655795582519853e-06, - "loss": 0.9618, - "step": 4729 - }, - { - "epoch": 0.42656806601433916, - "grad_norm": 1.428933700539787, - "learning_rate": 2.565019169863168e-06, - "loss": 1.0147, - "step": 4730 - }, - { - "epoch": 0.42665824953780945, - "grad_norm": 0.6573122147271775, - "learning_rate": 2.5644587332637994e-06, - "loss": 0.7662, - "step": 4731 - }, - { - "epoch": 0.4267484330612797, - "grad_norm": 1.3130459785197348, - "learning_rate": 2.5638982485016994e-06, - "loss": 0.979, - "step": 4732 - }, - { - "epoch": 0.42683861658475, - "grad_norm": 0.9681079952157692, - "learning_rate": 2.5633377156246917e-06, - "loss": 0.794, - "step": 4733 - }, - { - "epoch": 0.4269288001082202, - "grad_norm": 1.6542270470015947, - "learning_rate": 2.562777134680603e-06, - "loss": 0.9352, - "step": 4734 - }, - { - "epoch": 0.4270189836316905, - "grad_norm": 1.649555712908113, - "learning_rate": 2.562216505717267e-06, - "loss": 0.957, - "step": 4735 - }, - { - "epoch": 0.42710916715516073, - "grad_norm": 1.3874859903676475, - "learning_rate": 2.561655828782518e-06, - "loss": 0.9777, - "step": 4736 - }, - { - "epoch": 0.427199350678631, - "grad_norm": 1.5663397339660792, - "learning_rate": 2.561095103924197e-06, - "loss": 0.9302, - "step": 4737 - }, - { - "epoch": 0.42728953420210125, - "grad_norm": 1.5672622586767455, - "learning_rate": 2.560534331190148e-06, - "loss": 0.9036, - "step": 4738 - }, - { - "epoch": 0.42737971772557154, - "grad_norm": 0.6724152499901127, - "learning_rate": 2.559973510628218e-06, - "loss": 0.7691, - "step": 4739 - }, - { - "epoch": 0.4274699012490418, - "grad_norm": 22.47780820214163, - "learning_rate": 2.5594126422862615e-06, - "loss": 0.8992, - "step": 4740 - }, - { - "epoch": 0.42756008477251206, - "grad_norm": 1.8478238632598531, - "learning_rate": 2.558851726212134e-06, - "loss": 0.9062, - "step": 4741 - }, - { - "epoch": 0.42765026829598235, - "grad_norm": 1.6414534883814569, - "learning_rate": 2.5582907624536953e-06, - "loss": 0.9274, - "step": 4742 - }, - { - "epoch": 0.4277404518194526, - "grad_norm": 1.7298035731735313, - "learning_rate": 2.557729751058811e-06, - "loss": 0.9654, - "step": 4743 - }, - { - "epoch": 0.42783063534292287, - "grad_norm": 1.474202525621175, - "learning_rate": 2.557168692075348e-06, - "loss": 0.9233, - "step": 4744 - }, - { - "epoch": 0.4279208188663931, - "grad_norm": 1.2482906575074662, - "learning_rate": 2.556607585551181e-06, - "loss": 0.8931, - "step": 4745 - }, - { - "epoch": 0.4280110023898634, - "grad_norm": 1.1030222101923757, - "learning_rate": 2.5560464315341844e-06, - "loss": 0.9013, - "step": 4746 - }, - { - "epoch": 0.4281011859133336, - "grad_norm": 1.5656731238364332, - "learning_rate": 2.555485230072242e-06, - "loss": 0.9461, - "step": 4747 - }, - { - "epoch": 0.4281913694368039, - "grad_norm": 1.7636657917513443, - "learning_rate": 2.5549239812132354e-06, - "loss": 1.0043, - "step": 4748 - }, - { - "epoch": 0.42828155296027415, - "grad_norm": 1.4039928972086335, - "learning_rate": 2.5543626850050556e-06, - "loss": 1.0121, - "step": 4749 - }, - { - "epoch": 0.42837173648374444, - "grad_norm": 1.3103149936614995, - "learning_rate": 2.5538013414955944e-06, - "loss": 1.0027, - "step": 4750 - }, - { - "epoch": 0.42846192000721467, - "grad_norm": 1.3341121584455544, - "learning_rate": 2.5532399507327494e-06, - "loss": 0.9011, - "step": 4751 - }, - { - "epoch": 0.42855210353068496, - "grad_norm": 0.8400460642718183, - "learning_rate": 2.552678512764421e-06, - "loss": 0.8108, - "step": 4752 - }, - { - "epoch": 0.4286422870541552, - "grad_norm": 1.645900129419446, - "learning_rate": 2.5521170276385147e-06, - "loss": 1.0009, - "step": 4753 - }, - { - "epoch": 0.4287324705776255, - "grad_norm": 1.5151438785129334, - "learning_rate": 2.5515554954029394e-06, - "loss": 1.0, - "step": 4754 - }, - { - "epoch": 0.4288226541010957, - "grad_norm": 2.4238185836272215, - "learning_rate": 2.550993916105608e-06, - "loss": 0.9696, - "step": 4755 - }, - { - "epoch": 0.428912837624566, - "grad_norm": 1.362228643010869, - "learning_rate": 2.550432289794437e-06, - "loss": 0.9108, - "step": 4756 - }, - { - "epoch": 0.42900302114803623, - "grad_norm": 1.5358365713586617, - "learning_rate": 2.5498706165173483e-06, - "loss": 0.9586, - "step": 4757 - }, - { - "epoch": 0.4290932046715065, - "grad_norm": 1.2632051575909946, - "learning_rate": 2.5493088963222668e-06, - "loss": 0.9256, - "step": 4758 - }, - { - "epoch": 0.42918338819497676, - "grad_norm": 1.52510511630131, - "learning_rate": 2.548747129257121e-06, - "loss": 0.9957, - "step": 4759 - }, - { - "epoch": 0.42927357171844704, - "grad_norm": 1.5354996702342298, - "learning_rate": 2.548185315369845e-06, - "loss": 0.8764, - "step": 4760 - }, - { - "epoch": 0.4293637552419173, - "grad_norm": 2.1398749603747453, - "learning_rate": 2.5476234547083746e-06, - "loss": 1.0354, - "step": 4761 - }, - { - "epoch": 0.42945393876538757, - "grad_norm": 1.4697955042489084, - "learning_rate": 2.547061547320652e-06, - "loss": 1.0183, - "step": 4762 - }, - { - "epoch": 0.4295441222888578, - "grad_norm": 1.1981401594354129, - "learning_rate": 2.5464995932546217e-06, - "loss": 0.9343, - "step": 4763 - }, - { - "epoch": 0.4296343058123281, - "grad_norm": 1.6164012232204945, - "learning_rate": 2.545937592558232e-06, - "loss": 0.9108, - "step": 4764 - }, - { - "epoch": 0.4297244893357984, - "grad_norm": 1.4386679838585208, - "learning_rate": 2.5453755452794374e-06, - "loss": 0.8993, - "step": 4765 - }, - { - "epoch": 0.4298146728592686, - "grad_norm": 1.4215683658836904, - "learning_rate": 2.5448134514661938e-06, - "loss": 0.9896, - "step": 4766 - }, - { - "epoch": 0.4299048563827389, - "grad_norm": 1.653697512162051, - "learning_rate": 2.5442513111664623e-06, - "loss": 0.9517, - "step": 4767 - }, - { - "epoch": 0.42999503990620913, - "grad_norm": 1.62735614158392, - "learning_rate": 2.5436891244282084e-06, - "loss": 0.9395, - "step": 4768 - }, - { - "epoch": 0.4300852234296794, - "grad_norm": 1.4524483694857206, - "learning_rate": 2.5431268912994004e-06, - "loss": 1.0459, - "step": 4769 - }, - { - "epoch": 0.43017540695314965, - "grad_norm": 1.5242187062023476, - "learning_rate": 2.5425646118280108e-06, - "loss": 0.9216, - "step": 4770 - }, - { - "epoch": 0.43026559047661994, - "grad_norm": 0.9951474650290669, - "learning_rate": 2.5420022860620172e-06, - "loss": 0.8014, - "step": 4771 - }, - { - "epoch": 0.4303557740000902, - "grad_norm": 1.7563537648455072, - "learning_rate": 2.5414399140493995e-06, - "loss": 0.8774, - "step": 4772 - }, - { - "epoch": 0.43044595752356046, - "grad_norm": 1.69455276552983, - "learning_rate": 2.5408774958381436e-06, - "loss": 0.883, - "step": 4773 - }, - { - "epoch": 0.4305361410470307, - "grad_norm": 1.90386820345413, - "learning_rate": 2.540315031476237e-06, - "loss": 0.9921, - "step": 4774 - }, - { - "epoch": 0.430626324570501, - "grad_norm": 1.5518273349406086, - "learning_rate": 2.5397525210116737e-06, - "loss": 0.9546, - "step": 4775 - }, - { - "epoch": 0.4307165080939712, - "grad_norm": 1.2959438336146085, - "learning_rate": 2.539189964492448e-06, - "loss": 0.9015, - "step": 4776 - }, - { - "epoch": 0.4308066916174415, - "grad_norm": 1.7849790789876798, - "learning_rate": 2.5386273619665613e-06, - "loss": 0.9973, - "step": 4777 - }, - { - "epoch": 0.43089687514091174, - "grad_norm": 0.7710275190639834, - "learning_rate": 2.5380647134820186e-06, - "loss": 0.8045, - "step": 4778 - }, - { - "epoch": 0.43098705866438203, - "grad_norm": 1.2763650393595638, - "learning_rate": 2.5375020190868277e-06, - "loss": 1.0049, - "step": 4779 - }, - { - "epoch": 0.43107724218785226, - "grad_norm": 1.4814254367126225, - "learning_rate": 2.536939278829001e-06, - "loss": 0.9425, - "step": 4780 - }, - { - "epoch": 0.43116742571132255, - "grad_norm": 1.6594100890966574, - "learning_rate": 2.5363764927565536e-06, - "loss": 1.0125, - "step": 4781 - }, - { - "epoch": 0.4312576092347928, - "grad_norm": 2.3513262557177463, - "learning_rate": 2.5358136609175064e-06, - "loss": 0.8963, - "step": 4782 - }, - { - "epoch": 0.43134779275826307, - "grad_norm": 1.2529621313329151, - "learning_rate": 2.535250783359884e-06, - "loss": 0.9517, - "step": 4783 - }, - { - "epoch": 0.4314379762817333, - "grad_norm": 1.3880559357384812, - "learning_rate": 2.5346878601317124e-06, - "loss": 0.843, - "step": 4784 - }, - { - "epoch": 0.4315281598052036, - "grad_norm": 1.2521213174518069, - "learning_rate": 2.534124891281025e-06, - "loss": 0.78, - "step": 4785 - }, - { - "epoch": 0.4316183433286738, - "grad_norm": 1.516111630056468, - "learning_rate": 2.533561876855857e-06, - "loss": 0.9615, - "step": 4786 - }, - { - "epoch": 0.4317085268521441, - "grad_norm": 1.5341533461943373, - "learning_rate": 2.532998816904247e-06, - "loss": 0.9012, - "step": 4787 - }, - { - "epoch": 0.4317987103756144, - "grad_norm": 1.948730958132299, - "learning_rate": 2.53243571147424e-06, - "loss": 0.8432, - "step": 4788 - }, - { - "epoch": 0.43188889389908464, - "grad_norm": 1.8641742349856676, - "learning_rate": 2.5318725606138815e-06, - "loss": 0.8602, - "step": 4789 - }, - { - "epoch": 0.4319790774225549, - "grad_norm": 2.0529721137827184, - "learning_rate": 2.5313093643712235e-06, - "loss": 0.9557, - "step": 4790 - }, - { - "epoch": 0.43206926094602516, - "grad_norm": 1.3487384376348464, - "learning_rate": 2.530746122794321e-06, - "loss": 0.9967, - "step": 4791 - }, - { - "epoch": 0.43215944446949545, - "grad_norm": 1.250893226009675, - "learning_rate": 2.5301828359312323e-06, - "loss": 0.8772, - "step": 4792 - }, - { - "epoch": 0.4322496279929657, - "grad_norm": 1.479577271687108, - "learning_rate": 2.529619503830021e-06, - "loss": 0.905, - "step": 4793 - }, - { - "epoch": 0.43233981151643597, - "grad_norm": 1.792612942232592, - "learning_rate": 2.529056126538753e-06, - "loss": 0.8787, - "step": 4794 - }, - { - "epoch": 0.4324299950399062, - "grad_norm": 1.472725864050092, - "learning_rate": 2.5284927041054995e-06, - "loss": 0.9705, - "step": 4795 - }, - { - "epoch": 0.4325201785633765, - "grad_norm": 0.7772008870026007, - "learning_rate": 2.5279292365783348e-06, - "loss": 0.769, - "step": 4796 - }, - { - "epoch": 0.4326103620868467, - "grad_norm": 1.494042726189192, - "learning_rate": 2.527365724005336e-06, - "loss": 1.0456, - "step": 4797 - }, - { - "epoch": 0.432700545610317, - "grad_norm": 1.5883207407262372, - "learning_rate": 2.526802166434586e-06, - "loss": 1.0044, - "step": 4798 - }, - { - "epoch": 0.43279072913378724, - "grad_norm": 1.3187378742691056, - "learning_rate": 2.5262385639141708e-06, - "loss": 0.9166, - "step": 4799 - }, - { - "epoch": 0.43288091265725753, - "grad_norm": 1.4432633178297354, - "learning_rate": 2.525674916492179e-06, - "loss": 1.0181, - "step": 4800 - }, - { - "epoch": 0.43297109618072777, - "grad_norm": 1.2217292562136184, - "learning_rate": 2.5251112242167056e-06, - "loss": 0.9573, - "step": 4801 - }, - { - "epoch": 0.43306127970419805, - "grad_norm": 1.5796192761941084, - "learning_rate": 2.5245474871358464e-06, - "loss": 0.9117, - "step": 4802 - }, - { - "epoch": 0.4331514632276683, - "grad_norm": 1.5213961496103086, - "learning_rate": 2.5239837052977037e-06, - "loss": 0.868, - "step": 4803 - }, - { - "epoch": 0.4332416467511386, - "grad_norm": 1.39481212020741, - "learning_rate": 2.523419878750381e-06, - "loss": 1.0594, - "step": 4804 - }, - { - "epoch": 0.4333318302746088, - "grad_norm": 1.561512367920231, - "learning_rate": 2.522856007541989e-06, - "loss": 0.9039, - "step": 4805 - }, - { - "epoch": 0.4334220137980791, - "grad_norm": 1.3631622787198503, - "learning_rate": 2.5222920917206397e-06, - "loss": 0.8692, - "step": 4806 - }, - { - "epoch": 0.43351219732154933, - "grad_norm": 0.8144759579758051, - "learning_rate": 2.5217281313344493e-06, - "loss": 0.7781, - "step": 4807 - }, - { - "epoch": 0.4336023808450196, - "grad_norm": 1.2739897858466245, - "learning_rate": 2.5211641264315372e-06, - "loss": 0.9924, - "step": 4808 - }, - { - "epoch": 0.43369256436848985, - "grad_norm": 1.3463262214441323, - "learning_rate": 2.5206000770600286e-06, - "loss": 0.9258, - "step": 4809 - }, - { - "epoch": 0.43378274789196014, - "grad_norm": 1.369300604383307, - "learning_rate": 2.520035983268051e-06, - "loss": 0.905, - "step": 4810 - }, - { - "epoch": 0.4338729314154304, - "grad_norm": 1.4295349039657947, - "learning_rate": 2.5194718451037357e-06, - "loss": 0.9517, - "step": 4811 - }, - { - "epoch": 0.43396311493890066, - "grad_norm": 1.3966820210194768, - "learning_rate": 2.518907662615218e-06, - "loss": 0.977, - "step": 4812 - }, - { - "epoch": 0.43405329846237095, - "grad_norm": 1.4004248451297518, - "learning_rate": 2.5183434358506373e-06, - "loss": 0.9056, - "step": 4813 - }, - { - "epoch": 0.4341434819858412, - "grad_norm": 1.5181644833779007, - "learning_rate": 2.5177791648581368e-06, - "loss": 0.935, - "step": 4814 - }, - { - "epoch": 0.4342336655093115, - "grad_norm": 1.3570691425696937, - "learning_rate": 2.517214849685863e-06, - "loss": 0.9536, - "step": 4815 - }, - { - "epoch": 0.4343238490327817, - "grad_norm": 1.4310209799027669, - "learning_rate": 2.5166504903819663e-06, - "loss": 0.9695, - "step": 4816 - }, - { - "epoch": 0.434414032556252, - "grad_norm": 1.605008618578246, - "learning_rate": 2.5160860869946014e-06, - "loss": 1.011, - "step": 4817 - }, - { - "epoch": 0.4345042160797222, - "grad_norm": 1.2300329011688116, - "learning_rate": 2.5155216395719253e-06, - "loss": 1.0279, - "step": 4818 - }, - { - "epoch": 0.4345943996031925, - "grad_norm": 1.2137735933786318, - "learning_rate": 2.5149571481621e-06, - "loss": 0.9063, - "step": 4819 - }, - { - "epoch": 0.43468458312666275, - "grad_norm": 0.708674297360164, - "learning_rate": 2.514392612813292e-06, - "loss": 0.7709, - "step": 4820 - }, - { - "epoch": 0.43477476665013304, - "grad_norm": 1.5063327782389246, - "learning_rate": 2.5138280335736695e-06, - "loss": 1.002, - "step": 4821 - }, - { - "epoch": 0.43486495017360327, - "grad_norm": 1.3603424926367824, - "learning_rate": 2.5132634104914064e-06, - "loss": 1.0283, - "step": 4822 - }, - { - "epoch": 0.43495513369707356, - "grad_norm": 1.2795556658419438, - "learning_rate": 2.5126987436146794e-06, - "loss": 0.9675, - "step": 4823 - }, - { - "epoch": 0.4350453172205438, - "grad_norm": 1.4827341748404352, - "learning_rate": 2.5121340329916675e-06, - "loss": 0.932, - "step": 4824 - }, - { - "epoch": 0.4351355007440141, - "grad_norm": 1.2834228141767392, - "learning_rate": 2.5115692786705566e-06, - "loss": 0.8672, - "step": 4825 - }, - { - "epoch": 0.4352256842674843, - "grad_norm": 1.4362689012523775, - "learning_rate": 2.511004480699534e-06, - "loss": 0.9824, - "step": 4826 - }, - { - "epoch": 0.4353158677909546, - "grad_norm": 0.8943490921522621, - "learning_rate": 2.510439639126791e-06, - "loss": 0.8618, - "step": 4827 - }, - { - "epoch": 0.43540605131442484, - "grad_norm": 1.3993244022628382, - "learning_rate": 2.509874754000524e-06, - "loss": 0.9495, - "step": 4828 - }, - { - "epoch": 0.4354962348378951, - "grad_norm": 1.6868409176606254, - "learning_rate": 2.509309825368932e-06, - "loss": 0.9387, - "step": 4829 - }, - { - "epoch": 0.43558641836136536, - "grad_norm": 1.463417029814969, - "learning_rate": 2.5087448532802173e-06, - "loss": 0.9459, - "step": 4830 - }, - { - "epoch": 0.43567660188483565, - "grad_norm": 2.2204601520621243, - "learning_rate": 2.508179837782586e-06, - "loss": 0.9282, - "step": 4831 - }, - { - "epoch": 0.4357667854083059, - "grad_norm": 23.158553571318418, - "learning_rate": 2.5076147789242493e-06, - "loss": 0.9209, - "step": 4832 - }, - { - "epoch": 0.43585696893177617, - "grad_norm": 1.4827800011565553, - "learning_rate": 2.5070496767534202e-06, - "loss": 0.9649, - "step": 4833 - }, - { - "epoch": 0.4359471524552464, - "grad_norm": 1.8323574141835972, - "learning_rate": 2.506484531318317e-06, - "loss": 0.9825, - "step": 4834 - }, - { - "epoch": 0.4360373359787167, - "grad_norm": 1.1474179263880406, - "learning_rate": 2.5059193426671613e-06, - "loss": 0.858, - "step": 4835 - }, - { - "epoch": 0.436127519502187, - "grad_norm": 1.6320845884205162, - "learning_rate": 2.5053541108481772e-06, - "loss": 1.0811, - "step": 4836 - }, - { - "epoch": 0.4362177030256572, - "grad_norm": 1.5896131643526752, - "learning_rate": 2.5047888359095935e-06, - "loss": 0.9338, - "step": 4837 - }, - { - "epoch": 0.4363078865491275, - "grad_norm": 1.4633560153416936, - "learning_rate": 2.5042235178996436e-06, - "loss": 0.9318, - "step": 4838 - }, - { - "epoch": 0.43639807007259773, - "grad_norm": 1.5777190035557371, - "learning_rate": 2.5036581568665627e-06, - "loss": 1.0108, - "step": 4839 - }, - { - "epoch": 0.436488253596068, - "grad_norm": 1.4501850371847673, - "learning_rate": 2.503092752858591e-06, - "loss": 1.0119, - "step": 4840 - }, - { - "epoch": 0.43657843711953825, - "grad_norm": 0.9572415801938506, - "learning_rate": 2.502527305923971e-06, - "loss": 0.8587, - "step": 4841 - }, - { - "epoch": 0.43666862064300854, - "grad_norm": 1.9614613961634642, - "learning_rate": 2.5019618161109506e-06, - "loss": 0.9375, - "step": 4842 - }, - { - "epoch": 0.4367588041664788, - "grad_norm": 1.5667767550294802, - "learning_rate": 2.5013962834677804e-06, - "loss": 0.9606, - "step": 4843 - }, - { - "epoch": 0.43684898768994906, - "grad_norm": 1.6186856319635143, - "learning_rate": 2.500830708042715e-06, - "loss": 0.9345, - "step": 4844 - }, - { - "epoch": 0.4369391712134193, - "grad_norm": 1.3497196489211747, - "learning_rate": 2.500265089884011e-06, - "loss": 1.0143, - "step": 4845 - }, - { - "epoch": 0.4370293547368896, - "grad_norm": 1.394392375282084, - "learning_rate": 2.499699429039932e-06, - "loss": 0.949, - "step": 4846 - }, - { - "epoch": 0.4371195382603598, - "grad_norm": 1.5450469495682775, - "learning_rate": 2.4991337255587425e-06, - "loss": 0.9062, - "step": 4847 - }, - { - "epoch": 0.4372097217838301, - "grad_norm": 1.8240456345814164, - "learning_rate": 2.4985679794887106e-06, - "loss": 0.8171, - "step": 4848 - }, - { - "epoch": 0.43729990530730034, - "grad_norm": 2.446503857835464, - "learning_rate": 2.49800219087811e-06, - "loss": 0.9833, - "step": 4849 - }, - { - "epoch": 0.43739008883077063, - "grad_norm": 1.674503577387153, - "learning_rate": 2.4974363597752163e-06, - "loss": 0.9778, - "step": 4850 - }, - { - "epoch": 0.43748027235424086, - "grad_norm": 2.018732322402889, - "learning_rate": 2.4968704862283097e-06, - "loss": 1.0059, - "step": 4851 - }, - { - "epoch": 0.43757045587771115, - "grad_norm": 1.636622460202716, - "learning_rate": 2.4963045702856737e-06, - "loss": 1.0165, - "step": 4852 - }, - { - "epoch": 0.4376606394011814, - "grad_norm": 1.297078266070588, - "learning_rate": 2.4957386119955946e-06, - "loss": 0.8942, - "step": 4853 - }, - { - "epoch": 0.4377508229246517, - "grad_norm": 0.8978209992593, - "learning_rate": 2.495172611406364e-06, - "loss": 0.7813, - "step": 4854 - }, - { - "epoch": 0.4378410064481219, - "grad_norm": 1.85388102813681, - "learning_rate": 2.4946065685662757e-06, - "loss": 0.974, - "step": 4855 - }, - { - "epoch": 0.4379311899715922, - "grad_norm": 1.5490039855482176, - "learning_rate": 2.4940404835236283e-06, - "loss": 0.9491, - "step": 4856 - }, - { - "epoch": 0.4380213734950624, - "grad_norm": 1.3221642865293748, - "learning_rate": 2.4934743563267223e-06, - "loss": 0.9471, - "step": 4857 - }, - { - "epoch": 0.4381115570185327, - "grad_norm": 1.6007544735427919, - "learning_rate": 2.4929081870238635e-06, - "loss": 0.9072, - "step": 4858 - }, - { - "epoch": 0.43820174054200295, - "grad_norm": 1.3924665366214934, - "learning_rate": 2.49234197566336e-06, - "loss": 0.9869, - "step": 4859 - }, - { - "epoch": 0.43829192406547324, - "grad_norm": 1.491708408957983, - "learning_rate": 2.4917757222935247e-06, - "loss": 0.9575, - "step": 4860 - }, - { - "epoch": 0.4383821075889435, - "grad_norm": 1.6773519202194618, - "learning_rate": 2.4912094269626725e-06, - "loss": 0.9627, - "step": 4861 - }, - { - "epoch": 0.43847229111241376, - "grad_norm": 1.6194163272933018, - "learning_rate": 2.4906430897191245e-06, - "loss": 0.9936, - "step": 4862 - }, - { - "epoch": 0.43856247463588405, - "grad_norm": 1.3680607214161546, - "learning_rate": 2.490076710611202e-06, - "loss": 0.99, - "step": 4863 - }, - { - "epoch": 0.4386526581593543, - "grad_norm": 1.6605290252436895, - "learning_rate": 2.4895102896872326e-06, - "loss": 0.9952, - "step": 4864 - }, - { - "epoch": 0.43874284168282457, - "grad_norm": 1.5511415000675508, - "learning_rate": 2.4889438269955457e-06, - "loss": 0.8811, - "step": 4865 - }, - { - "epoch": 0.4388330252062948, - "grad_norm": 1.364546399186945, - "learning_rate": 2.4883773225844755e-06, - "loss": 0.9315, - "step": 4866 - }, - { - "epoch": 0.4389232087297651, - "grad_norm": 1.4539383283598897, - "learning_rate": 2.48781077650236e-06, - "loss": 0.9355, - "step": 4867 - }, - { - "epoch": 0.4390133922532353, - "grad_norm": 1.4025112466480196, - "learning_rate": 2.4872441887975386e-06, - "loss": 0.9145, - "step": 4868 - }, - { - "epoch": 0.4391035757767056, - "grad_norm": 1.1836347415485728, - "learning_rate": 2.486677559518356e-06, - "loss": 0.8968, - "step": 4869 - }, - { - "epoch": 0.43919375930017585, - "grad_norm": 1.0914016035413854, - "learning_rate": 2.4861108887131614e-06, - "loss": 0.9341, - "step": 4870 - }, - { - "epoch": 0.43928394282364613, - "grad_norm": 1.3157554851709747, - "learning_rate": 2.485544176430305e-06, - "loss": 0.9167, - "step": 4871 - }, - { - "epoch": 0.43937412634711637, - "grad_norm": 1.5181585942175657, - "learning_rate": 2.4849774227181425e-06, - "loss": 1.0262, - "step": 4872 - }, - { - "epoch": 0.43946430987058666, - "grad_norm": 1.2234738087534063, - "learning_rate": 2.484410627625032e-06, - "loss": 0.9484, - "step": 4873 - }, - { - "epoch": 0.4395544933940569, - "grad_norm": 1.5329871153069166, - "learning_rate": 2.4838437911993356e-06, - "loss": 0.9936, - "step": 4874 - }, - { - "epoch": 0.4396446769175272, - "grad_norm": 1.7651660288887407, - "learning_rate": 2.483276913489419e-06, - "loss": 0.9857, - "step": 4875 - }, - { - "epoch": 0.4397348604409974, - "grad_norm": 1.6316494975032587, - "learning_rate": 2.4827099945436516e-06, - "loss": 0.8526, - "step": 4876 - }, - { - "epoch": 0.4398250439644677, - "grad_norm": 1.5251956282664307, - "learning_rate": 2.482143034410405e-06, - "loss": 1.0272, - "step": 4877 - }, - { - "epoch": 0.43991522748793793, - "grad_norm": 1.478950063758719, - "learning_rate": 2.4815760331380573e-06, - "loss": 0.9812, - "step": 4878 - }, - { - "epoch": 0.4400054110114082, - "grad_norm": 1.2488371208279765, - "learning_rate": 2.481008990774987e-06, - "loss": 0.9102, - "step": 4879 - }, - { - "epoch": 0.44009559453487845, - "grad_norm": 1.5559872508862835, - "learning_rate": 2.480441907369577e-06, - "loss": 0.9521, - "step": 4880 - }, - { - "epoch": 0.44018577805834874, - "grad_norm": 1.598132521843244, - "learning_rate": 2.479874782970214e-06, - "loss": 0.8912, - "step": 4881 - }, - { - "epoch": 0.440275961581819, - "grad_norm": 1.6752184654220086, - "learning_rate": 2.4793076176252887e-06, - "loss": 0.9721, - "step": 4882 - }, - { - "epoch": 0.44036614510528926, - "grad_norm": 1.9217599353615338, - "learning_rate": 2.478740411383195e-06, - "loss": 0.8345, - "step": 4883 - }, - { - "epoch": 0.44045632862875955, - "grad_norm": 1.318836447969311, - "learning_rate": 2.4781731642923296e-06, - "loss": 0.9394, - "step": 4884 - }, - { - "epoch": 0.4405465121522298, - "grad_norm": 1.2600037815400553, - "learning_rate": 2.477605876401093e-06, - "loss": 0.934, - "step": 4885 - }, - { - "epoch": 0.4406366956757001, - "grad_norm": 1.2775152715378597, - "learning_rate": 2.4770385477578894e-06, - "loss": 0.8973, - "step": 4886 - }, - { - "epoch": 0.4407268791991703, - "grad_norm": 1.1525215141125342, - "learning_rate": 2.476471178411127e-06, - "loss": 0.903, - "step": 4887 - }, - { - "epoch": 0.4408170627226406, - "grad_norm": 1.3771614945122315, - "learning_rate": 2.475903768409216e-06, - "loss": 0.9722, - "step": 4888 - }, - { - "epoch": 0.44090724624611083, - "grad_norm": 1.5356603673169953, - "learning_rate": 2.475336317800572e-06, - "loss": 0.9154, - "step": 4889 - }, - { - "epoch": 0.4409974297695811, - "grad_norm": 1.3871110817832404, - "learning_rate": 2.4747688266336118e-06, - "loss": 0.937, - "step": 4890 - }, - { - "epoch": 0.44108761329305135, - "grad_norm": 2.688547329537656, - "learning_rate": 2.4742012949567574e-06, - "loss": 1.0502, - "step": 4891 - }, - { - "epoch": 0.44117779681652164, - "grad_norm": 1.6646650056373662, - "learning_rate": 2.4736337228184338e-06, - "loss": 0.9245, - "step": 4892 - }, - { - "epoch": 0.44126798033999187, - "grad_norm": 1.5083718324809612, - "learning_rate": 2.4730661102670692e-06, - "loss": 1.0452, - "step": 4893 - }, - { - "epoch": 0.44135816386346216, - "grad_norm": 1.4949587508159665, - "learning_rate": 2.472498457351096e-06, - "loss": 0.95, - "step": 4894 - }, - { - "epoch": 0.4414483473869324, - "grad_norm": 1.8182706719058266, - "learning_rate": 2.4719307641189495e-06, - "loss": 0.9323, - "step": 4895 - }, - { - "epoch": 0.4415385309104027, - "grad_norm": 1.6726777475784935, - "learning_rate": 2.4713630306190673e-06, - "loss": 0.9073, - "step": 4896 - }, - { - "epoch": 0.4416287144338729, - "grad_norm": 1.719623066497268, - "learning_rate": 2.4707952568998923e-06, - "loss": 1.0224, - "step": 4897 - }, - { - "epoch": 0.4417188979573432, - "grad_norm": 1.7734564557511856, - "learning_rate": 2.4702274430098703e-06, - "loss": 0.9134, - "step": 4898 - }, - { - "epoch": 0.44180908148081344, - "grad_norm": 1.5771124584711893, - "learning_rate": 2.4696595889974497e-06, - "loss": 0.9009, - "step": 4899 - }, - { - "epoch": 0.4418992650042837, - "grad_norm": 1.3754397469361508, - "learning_rate": 2.469091694911084e-06, - "loss": 1.0057, - "step": 4900 - }, - { - "epoch": 0.44198944852775396, - "grad_norm": 1.421957579509816, - "learning_rate": 2.4685237607992276e-06, - "loss": 0.9428, - "step": 4901 - }, - { - "epoch": 0.44207963205122425, - "grad_norm": 1.4174578458246796, - "learning_rate": 2.4679557867103416e-06, - "loss": 1.0098, - "step": 4902 - }, - { - "epoch": 0.4421698155746945, - "grad_norm": 1.2143655798797492, - "learning_rate": 2.4673877726928865e-06, - "loss": 0.9542, - "step": 4903 - }, - { - "epoch": 0.44225999909816477, - "grad_norm": 1.7367073391613719, - "learning_rate": 2.46681971879533e-06, - "loss": 0.8357, - "step": 4904 - }, - { - "epoch": 0.442350182621635, - "grad_norm": 1.602849973172481, - "learning_rate": 2.4662516250661407e-06, - "loss": 0.8694, - "step": 4905 - }, - { - "epoch": 0.4424403661451053, - "grad_norm": 1.555437070254758, - "learning_rate": 2.465683491553792e-06, - "loss": 0.9252, - "step": 4906 - }, - { - "epoch": 0.4425305496685755, - "grad_norm": 1.4478088265563223, - "learning_rate": 2.4651153183067604e-06, - "loss": 0.9554, - "step": 4907 - }, - { - "epoch": 0.4426207331920458, - "grad_norm": 1.6340423770671524, - "learning_rate": 2.4645471053735245e-06, - "loss": 0.8846, - "step": 4908 - }, - { - "epoch": 0.4427109167155161, - "grad_norm": 1.3931689192915746, - "learning_rate": 2.4639788528025684e-06, - "loss": 0.8824, - "step": 4909 - }, - { - "epoch": 0.44280110023898633, - "grad_norm": 1.1411733485006232, - "learning_rate": 2.463410560642378e-06, - "loss": 0.9507, - "step": 4910 - }, - { - "epoch": 0.4428912837624566, - "grad_norm": 0.7474633871954892, - "learning_rate": 2.4628422289414448e-06, - "loss": 0.8194, - "step": 4911 - }, - { - "epoch": 0.44298146728592686, - "grad_norm": 0.7305863637636895, - "learning_rate": 2.4622738577482592e-06, - "loss": 0.8017, - "step": 4912 - }, - { - "epoch": 0.44307165080939714, - "grad_norm": 1.2799133213318317, - "learning_rate": 2.461705447111319e-06, - "loss": 0.9343, - "step": 4913 - }, - { - "epoch": 0.4431618343328674, - "grad_norm": 1.4186908155019626, - "learning_rate": 2.4611369970791246e-06, - "loss": 0.9115, - "step": 4914 - }, - { - "epoch": 0.44325201785633767, - "grad_norm": 1.4448724850156578, - "learning_rate": 2.460568507700179e-06, - "loss": 0.953, - "step": 4915 - }, - { - "epoch": 0.4433422013798079, - "grad_norm": 1.4816351249063728, - "learning_rate": 2.4599999790229887e-06, - "loss": 0.9867, - "step": 4916 - }, - { - "epoch": 0.4434323849032782, - "grad_norm": 1.5211029945880237, - "learning_rate": 2.459431411096064e-06, - "loss": 0.9015, - "step": 4917 - }, - { - "epoch": 0.4435225684267484, - "grad_norm": 1.4412938516104423, - "learning_rate": 2.458862803967918e-06, - "loss": 0.9105, - "step": 4918 - }, - { - "epoch": 0.4436127519502187, - "grad_norm": 1.4250778344296635, - "learning_rate": 2.4582941576870667e-06, - "loss": 0.9596, - "step": 4919 - }, - { - "epoch": 0.44370293547368894, - "grad_norm": 1.4631329527992796, - "learning_rate": 2.4577254723020315e-06, - "loss": 0.9229, - "step": 4920 - }, - { - "epoch": 0.44379311899715923, - "grad_norm": 1.4678889246295337, - "learning_rate": 2.457156747861335e-06, - "loss": 0.97, - "step": 4921 - }, - { - "epoch": 0.44388330252062946, - "grad_norm": 1.5322314639769834, - "learning_rate": 2.456587984413504e-06, - "loss": 1.0067, - "step": 4922 - }, - { - "epoch": 0.44397348604409975, - "grad_norm": 1.929413934382145, - "learning_rate": 2.4560191820070683e-06, - "loss": 0.8794, - "step": 4923 - }, - { - "epoch": 0.44406366956757, - "grad_norm": 1.7622605397849116, - "learning_rate": 2.4554503406905617e-06, - "loss": 0.9388, - "step": 4924 - }, - { - "epoch": 0.4441538530910403, - "grad_norm": 1.324197414530143, - "learning_rate": 2.454881460512521e-06, - "loss": 0.9864, - "step": 4925 - }, - { - "epoch": 0.4442440366145105, - "grad_norm": 1.1887498101074838, - "learning_rate": 2.4543125415214856e-06, - "loss": 0.9218, - "step": 4926 - }, - { - "epoch": 0.4443342201379808, - "grad_norm": 1.821079141827436, - "learning_rate": 2.4537435837659996e-06, - "loss": 0.9723, - "step": 4927 - }, - { - "epoch": 0.44442440366145103, - "grad_norm": 1.5647585757572584, - "learning_rate": 2.4531745872946085e-06, - "loss": 0.8987, - "step": 4928 - }, - { - "epoch": 0.4445145871849213, - "grad_norm": 1.781144992343237, - "learning_rate": 2.4526055521558632e-06, - "loss": 1.0084, - "step": 4929 - }, - { - "epoch": 0.44460477070839155, - "grad_norm": 1.3824911229019334, - "learning_rate": 2.4520364783983164e-06, - "loss": 0.956, - "step": 4930 - }, - { - "epoch": 0.44469495423186184, - "grad_norm": 1.2966486262183607, - "learning_rate": 2.451467366070525e-06, - "loss": 0.9327, - "step": 4931 - }, - { - "epoch": 0.4447851377553321, - "grad_norm": 1.6085263579500246, - "learning_rate": 2.450898215221048e-06, - "loss": 1.017, - "step": 4932 - }, - { - "epoch": 0.44487532127880236, - "grad_norm": 1.6142554493706236, - "learning_rate": 2.4503290258984498e-06, - "loss": 1.0898, - "step": 4933 - }, - { - "epoch": 0.44496550480227265, - "grad_norm": 1.7696279200406997, - "learning_rate": 2.4497597981512952e-06, - "loss": 0.8193, - "step": 4934 - }, - { - "epoch": 0.4450556883257429, - "grad_norm": 0.7368021686207256, - "learning_rate": 2.4491905320281555e-06, - "loss": 0.824, - "step": 4935 - }, - { - "epoch": 0.44514587184921317, - "grad_norm": 1.3886765105737502, - "learning_rate": 2.448621227577602e-06, - "loss": 0.9294, - "step": 4936 - }, - { - "epoch": 0.4452360553726834, - "grad_norm": 1.5472164355218694, - "learning_rate": 2.4480518848482123e-06, - "loss": 0.8971, - "step": 4937 - }, - { - "epoch": 0.4453262388961537, - "grad_norm": 1.3297565088171968, - "learning_rate": 2.447482503888565e-06, - "loss": 0.9552, - "step": 4938 - }, - { - "epoch": 0.4454164224196239, - "grad_norm": 1.8324287812886135, - "learning_rate": 2.4469130847472434e-06, - "loss": 0.9282, - "step": 4939 - }, - { - "epoch": 0.4455066059430942, - "grad_norm": 1.562764641285086, - "learning_rate": 2.4463436274728326e-06, - "loss": 0.9803, - "step": 4940 - }, - { - "epoch": 0.44559678946656445, - "grad_norm": 1.4335397819318534, - "learning_rate": 2.4457741321139227e-06, - "loss": 0.7909, - "step": 4941 - }, - { - "epoch": 0.44568697299003474, - "grad_norm": 1.3684835340061894, - "learning_rate": 2.4452045987191063e-06, - "loss": 0.9689, - "step": 4942 - }, - { - "epoch": 0.44577715651350497, - "grad_norm": 1.2567809714603873, - "learning_rate": 2.4446350273369776e-06, - "loss": 0.917, - "step": 4943 - }, - { - "epoch": 0.44586734003697526, - "grad_norm": 1.3909230287922247, - "learning_rate": 2.4440654180161374e-06, - "loss": 0.9898, - "step": 4944 - }, - { - "epoch": 0.4459575235604455, - "grad_norm": 1.4042686066945758, - "learning_rate": 2.4434957708051875e-06, - "loss": 0.896, - "step": 4945 - }, - { - "epoch": 0.4460477070839158, - "grad_norm": 1.5391756587375975, - "learning_rate": 2.4429260857527324e-06, - "loss": 0.89, - "step": 4946 - }, - { - "epoch": 0.446137890607386, - "grad_norm": 1.6067806183832885, - "learning_rate": 2.4423563629073815e-06, - "loss": 0.8539, - "step": 4947 - }, - { - "epoch": 0.4462280741308563, - "grad_norm": 1.3265615797994452, - "learning_rate": 2.4417866023177466e-06, - "loss": 0.9396, - "step": 4948 - }, - { - "epoch": 0.44631825765432653, - "grad_norm": 1.6148112802696886, - "learning_rate": 2.441216804032443e-06, - "loss": 0.9713, - "step": 4949 - }, - { - "epoch": 0.4464084411777968, - "grad_norm": 1.3887407631828717, - "learning_rate": 2.440646968100089e-06, - "loss": 0.9645, - "step": 4950 - }, - { - "epoch": 0.44649862470126706, - "grad_norm": 0.8683025991915592, - "learning_rate": 2.4400770945693055e-06, - "loss": 0.8247, - "step": 4951 - }, - { - "epoch": 0.44658880822473734, - "grad_norm": 1.9155024503305367, - "learning_rate": 2.4395071834887177e-06, - "loss": 0.9756, - "step": 4952 - }, - { - "epoch": 0.4466789917482076, - "grad_norm": 0.8430002908400716, - "learning_rate": 2.438937234906954e-06, - "loss": 0.8786, - "step": 4953 - }, - { - "epoch": 0.44676917527167787, - "grad_norm": 1.4635447528123275, - "learning_rate": 2.4383672488726447e-06, - "loss": 0.9456, - "step": 4954 - }, - { - "epoch": 0.44685935879514815, - "grad_norm": 1.677528732895124, - "learning_rate": 2.4377972254344256e-06, - "loss": 0.8693, - "step": 4955 - }, - { - "epoch": 0.4469495423186184, - "grad_norm": 1.6280288345726408, - "learning_rate": 2.437227164640932e-06, - "loss": 0.8453, - "step": 4956 - }, - { - "epoch": 0.4470397258420887, - "grad_norm": 1.5424334792719996, - "learning_rate": 2.436657066540807e-06, - "loss": 0.8861, - "step": 4957 - }, - { - "epoch": 0.4471299093655589, - "grad_norm": 1.7492753299560009, - "learning_rate": 2.4360869311826927e-06, - "loss": 0.8264, - "step": 4958 - }, - { - "epoch": 0.4472200928890292, - "grad_norm": 1.903695568125606, - "learning_rate": 2.4355167586152367e-06, - "loss": 1.036, - "step": 4959 - }, - { - "epoch": 0.44731027641249943, - "grad_norm": 1.6545288211575202, - "learning_rate": 2.4349465488870896e-06, - "loss": 0.912, - "step": 4960 - }, - { - "epoch": 0.4474004599359697, - "grad_norm": 1.3840668156570122, - "learning_rate": 2.434376302046905e-06, - "loss": 1.0265, - "step": 4961 - }, - { - "epoch": 0.44749064345943995, - "grad_norm": 1.737955127084107, - "learning_rate": 2.433806018143339e-06, - "loss": 0.9687, - "step": 4962 - }, - { - "epoch": 0.44758082698291024, - "grad_norm": 0.698802525758846, - "learning_rate": 2.433235697225051e-06, - "loss": 0.8245, - "step": 4963 - }, - { - "epoch": 0.4476710105063805, - "grad_norm": 1.581955896160655, - "learning_rate": 2.4326653393407048e-06, - "loss": 0.9593, - "step": 4964 - }, - { - "epoch": 0.44776119402985076, - "grad_norm": 1.7123246513865926, - "learning_rate": 2.432094944538966e-06, - "loss": 0.9106, - "step": 4965 - }, - { - "epoch": 0.447851377553321, - "grad_norm": 1.3222140100680013, - "learning_rate": 2.4315245128685047e-06, - "loss": 0.9155, - "step": 4966 - }, - { - "epoch": 0.4479415610767913, - "grad_norm": 1.4005395020072584, - "learning_rate": 2.4309540443779925e-06, - "loss": 0.9566, - "step": 4967 - }, - { - "epoch": 0.4480317446002615, - "grad_norm": 1.5606535873880758, - "learning_rate": 2.4303835391161047e-06, - "loss": 0.8832, - "step": 4968 - }, - { - "epoch": 0.4481219281237318, - "grad_norm": 1.3371648671623355, - "learning_rate": 2.42981299713152e-06, - "loss": 0.8956, - "step": 4969 - }, - { - "epoch": 0.44821211164720204, - "grad_norm": 1.5859172049290255, - "learning_rate": 2.4292424184729204e-06, - "loss": 0.9853, - "step": 4970 - }, - { - "epoch": 0.4483022951706723, - "grad_norm": 1.344153144155091, - "learning_rate": 2.4286718031889913e-06, - "loss": 0.9383, - "step": 4971 - }, - { - "epoch": 0.44839247869414256, - "grad_norm": 1.2586983351346936, - "learning_rate": 2.4281011513284202e-06, - "loss": 0.9261, - "step": 4972 - }, - { - "epoch": 0.44848266221761285, - "grad_norm": 1.5314286575413947, - "learning_rate": 2.4275304629398985e-06, - "loss": 0.8855, - "step": 4973 - }, - { - "epoch": 0.4485728457410831, - "grad_norm": 1.2418384661433817, - "learning_rate": 2.4269597380721194e-06, - "loss": 1.0234, - "step": 4974 - }, - { - "epoch": 0.44866302926455337, - "grad_norm": 1.2570096409412015, - "learning_rate": 2.426388976773782e-06, - "loss": 0.9436, - "step": 4975 - }, - { - "epoch": 0.4487532127880236, - "grad_norm": 1.57686171627006, - "learning_rate": 2.425818179093586e-06, - "loss": 1.0443, - "step": 4976 - }, - { - "epoch": 0.4488433963114939, - "grad_norm": 1.4134626433459505, - "learning_rate": 2.4252473450802346e-06, - "loss": 0.8595, - "step": 4977 - }, - { - "epoch": 0.4489335798349641, - "grad_norm": 1.5154258361208954, - "learning_rate": 2.4246764747824355e-06, - "loss": 0.981, - "step": 4978 - }, - { - "epoch": 0.4490237633584344, - "grad_norm": 1.4976040460425928, - "learning_rate": 2.424105568248897e-06, - "loss": 0.9559, - "step": 4979 - }, - { - "epoch": 0.4491139468819047, - "grad_norm": 1.690461985402386, - "learning_rate": 2.4235346255283337e-06, - "loss": 0.8913, - "step": 4980 - }, - { - "epoch": 0.44920413040537494, - "grad_norm": 1.6372489707665114, - "learning_rate": 2.42296364666946e-06, - "loss": 0.9578, - "step": 4981 - }, - { - "epoch": 0.4492943139288452, - "grad_norm": 1.5267052449983889, - "learning_rate": 2.4223926317209965e-06, - "loss": 0.898, - "step": 4982 - }, - { - "epoch": 0.44938449745231546, - "grad_norm": 1.2073228601085335, - "learning_rate": 2.4218215807316647e-06, - "loss": 0.9522, - "step": 4983 - }, - { - "epoch": 0.44947468097578575, - "grad_norm": 1.4555374137501715, - "learning_rate": 2.4212504937501894e-06, - "loss": 1.0309, - "step": 4984 - }, - { - "epoch": 0.449564864499256, - "grad_norm": 1.2088068768631977, - "learning_rate": 2.4206793708253e-06, - "loss": 1.0019, - "step": 4985 - }, - { - "epoch": 0.44965504802272627, - "grad_norm": 1.7457074926479192, - "learning_rate": 2.420108212005726e-06, - "loss": 0.9041, - "step": 4986 - }, - { - "epoch": 0.4497452315461965, - "grad_norm": 1.8220279452738273, - "learning_rate": 2.4195370173402034e-06, - "loss": 0.9508, - "step": 4987 - }, - { - "epoch": 0.4498354150696668, - "grad_norm": 1.421384454304062, - "learning_rate": 2.4189657868774696e-06, - "loss": 1.0653, - "step": 4988 - }, - { - "epoch": 0.449925598593137, - "grad_norm": 1.6014474967046337, - "learning_rate": 2.418394520666264e-06, - "loss": 0.8794, - "step": 4989 - }, - { - "epoch": 0.4500157821166073, - "grad_norm": 1.522763381876662, - "learning_rate": 2.4178232187553307e-06, - "loss": 0.9251, - "step": 4990 - }, - { - "epoch": 0.45010596564007754, - "grad_norm": 1.3164810023605016, - "learning_rate": 2.417251881193417e-06, - "loss": 0.9711, - "step": 4991 - }, - { - "epoch": 0.45019614916354783, - "grad_norm": 1.5458494697955594, - "learning_rate": 2.4166805080292723e-06, - "loss": 1.0035, - "step": 4992 - }, - { - "epoch": 0.45028633268701806, - "grad_norm": 1.366832752525511, - "learning_rate": 2.4161090993116485e-06, - "loss": 0.9032, - "step": 4993 - }, - { - "epoch": 0.45037651621048835, - "grad_norm": 2.183417516158035, - "learning_rate": 2.4155376550893026e-06, - "loss": 0.8298, - "step": 4994 - }, - { - "epoch": 0.4504666997339586, - "grad_norm": 1.5015919502855473, - "learning_rate": 2.4149661754109926e-06, - "loss": 0.9579, - "step": 4995 - }, - { - "epoch": 0.4505568832574289, - "grad_norm": 1.4471522813241864, - "learning_rate": 2.41439466032548e-06, - "loss": 1.0017, - "step": 4996 - }, - { - "epoch": 0.4506470667808991, - "grad_norm": 1.445686003391124, - "learning_rate": 2.41382310988153e-06, - "loss": 0.8903, - "step": 4997 - }, - { - "epoch": 0.4507372503043694, - "grad_norm": 1.638980295394489, - "learning_rate": 2.413251524127911e-06, - "loss": 1.0041, - "step": 4998 - }, - { - "epoch": 0.45082743382783963, - "grad_norm": 1.650257497982241, - "learning_rate": 2.412679903113393e-06, - "loss": 0.9145, - "step": 4999 - }, - { - "epoch": 0.4509176173513099, - "grad_norm": 1.7895444529006455, - "learning_rate": 2.4121082468867505e-06, - "loss": 0.9589, - "step": 5000 - }, - { - "epoch": 0.45100780087478015, - "grad_norm": 1.843709718943093, - "learning_rate": 2.4115365554967597e-06, - "loss": 0.8864, - "step": 5001 - }, - { - "epoch": 0.45109798439825044, - "grad_norm": 1.4698121308925909, - "learning_rate": 2.4109648289922006e-06, - "loss": 0.954, - "step": 5002 - }, - { - "epoch": 0.45118816792172073, - "grad_norm": 1.4246211937136142, - "learning_rate": 2.4103930674218565e-06, - "loss": 0.9565, - "step": 5003 - }, - { - "epoch": 0.45127835144519096, - "grad_norm": 1.629004752201949, - "learning_rate": 2.409821270834513e-06, - "loss": 0.9249, - "step": 5004 - }, - { - "epoch": 0.45136853496866125, - "grad_norm": 1.847453325995758, - "learning_rate": 2.409249439278959e-06, - "loss": 1.0043, - "step": 5005 - }, - { - "epoch": 0.4514587184921315, - "grad_norm": 0.6881115534245591, - "learning_rate": 2.408677572803986e-06, - "loss": 0.7769, - "step": 5006 - }, - { - "epoch": 0.45154890201560177, - "grad_norm": 1.4995397020447385, - "learning_rate": 2.408105671458389e-06, - "loss": 0.9525, - "step": 5007 - }, - { - "epoch": 0.451639085539072, - "grad_norm": 1.3194402488284858, - "learning_rate": 2.4075337352909663e-06, - "loss": 0.8869, - "step": 5008 - }, - { - "epoch": 0.4517292690625423, - "grad_norm": 1.2928517913933415, - "learning_rate": 2.4069617643505177e-06, - "loss": 1.0757, - "step": 5009 - }, - { - "epoch": 0.4518194525860125, - "grad_norm": 1.3394163912644668, - "learning_rate": 2.406389758685848e-06, - "loss": 1.036, - "step": 5010 - }, - { - "epoch": 0.4519096361094828, - "grad_norm": 1.2904733508705035, - "learning_rate": 2.405817718345763e-06, - "loss": 0.9899, - "step": 5011 - }, - { - "epoch": 0.45199981963295305, - "grad_norm": 1.362859141411734, - "learning_rate": 2.4052456433790726e-06, - "loss": 0.8946, - "step": 5012 - }, - { - "epoch": 0.45209000315642334, - "grad_norm": 1.4147028869237044, - "learning_rate": 2.4046735338345897e-06, - "loss": 1.0012, - "step": 5013 - }, - { - "epoch": 0.45218018667989357, - "grad_norm": 1.454003427446358, - "learning_rate": 2.404101389761129e-06, - "loss": 0.9666, - "step": 5014 - }, - { - "epoch": 0.45227037020336386, - "grad_norm": 0.7113024435167885, - "learning_rate": 2.4035292112075097e-06, - "loss": 0.7754, - "step": 5015 - }, - { - "epoch": 0.4523605537268341, - "grad_norm": 1.3404054493069621, - "learning_rate": 2.4029569982225534e-06, - "loss": 1.0078, - "step": 5016 - }, - { - "epoch": 0.4524507372503044, - "grad_norm": 0.7518172896918641, - "learning_rate": 2.402384750855084e-06, - "loss": 0.8607, - "step": 5017 - }, - { - "epoch": 0.4525409207737746, - "grad_norm": 1.3496504030740268, - "learning_rate": 2.4018124691539286e-06, - "loss": 0.9483, - "step": 5018 - }, - { - "epoch": 0.4526311042972449, - "grad_norm": 1.5543146501029508, - "learning_rate": 2.4012401531679178e-06, - "loss": 0.9358, - "step": 5019 - }, - { - "epoch": 0.45272128782071513, - "grad_norm": 1.5149099491915505, - "learning_rate": 2.4006678029458847e-06, - "loss": 0.9516, - "step": 5020 - }, - { - "epoch": 0.4528114713441854, - "grad_norm": 1.3105892168871365, - "learning_rate": 2.400095418536666e-06, - "loss": 0.8503, - "step": 5021 - }, - { - "epoch": 0.45290165486765566, - "grad_norm": 1.7164938250196435, - "learning_rate": 2.3995229999890996e-06, - "loss": 0.9231, - "step": 5022 - }, - { - "epoch": 0.45299183839112594, - "grad_norm": 1.5477528248771777, - "learning_rate": 2.398950547352028e-06, - "loss": 0.8895, - "step": 5023 - }, - { - "epoch": 0.4530820219145962, - "grad_norm": 2.094446891841701, - "learning_rate": 2.398378060674295e-06, - "loss": 0.8663, - "step": 5024 - }, - { - "epoch": 0.45317220543806647, - "grad_norm": 1.3105630660442655, - "learning_rate": 2.39780554000475e-06, - "loss": 0.9496, - "step": 5025 - }, - { - "epoch": 0.4532623889615367, - "grad_norm": 1.4411139463208722, - "learning_rate": 2.3972329853922434e-06, - "loss": 0.9807, - "step": 5026 - }, - { - "epoch": 0.453352572485007, - "grad_norm": 0.6840733181101064, - "learning_rate": 2.3966603968856278e-06, - "loss": 0.8033, - "step": 5027 - }, - { - "epoch": 0.4534427560084773, - "grad_norm": 1.490900253377549, - "learning_rate": 2.39608777453376e-06, - "loss": 1.024, - "step": 5028 - }, - { - "epoch": 0.4535329395319475, - "grad_norm": 1.5443466022123702, - "learning_rate": 2.3955151183854993e-06, - "loss": 0.9724, - "step": 5029 - }, - { - "epoch": 0.4536231230554178, - "grad_norm": 1.4024440123862745, - "learning_rate": 2.3949424284897073e-06, - "loss": 0.9076, - "step": 5030 - }, - { - "epoch": 0.45371330657888803, - "grad_norm": 1.5103509121317138, - "learning_rate": 2.39436970489525e-06, - "loss": 0.8139, - "step": 5031 - }, - { - "epoch": 0.4538034901023583, - "grad_norm": 1.474681645321324, - "learning_rate": 2.3937969476509955e-06, - "loss": 0.9547, - "step": 5032 - }, - { - "epoch": 0.45389367362582855, - "grad_norm": 1.3691033155552015, - "learning_rate": 2.393224156805813e-06, - "loss": 1.011, - "step": 5033 - }, - { - "epoch": 0.45398385714929884, - "grad_norm": 1.4900013352714547, - "learning_rate": 2.392651332408578e-06, - "loss": 0.9211, - "step": 5034 - }, - { - "epoch": 0.4540740406727691, - "grad_norm": 1.7576369134309155, - "learning_rate": 2.3920784745081655e-06, - "loss": 0.9324, - "step": 5035 - }, - { - "epoch": 0.45416422419623936, - "grad_norm": 1.5713565069865432, - "learning_rate": 2.391505583153456e-06, - "loss": 0.9849, - "step": 5036 - }, - { - "epoch": 0.4542544077197096, - "grad_norm": 1.484942759300111, - "learning_rate": 2.3909326583933315e-06, - "loss": 0.9562, - "step": 5037 - }, - { - "epoch": 0.4543445912431799, - "grad_norm": 1.2861970437940204, - "learning_rate": 2.3903597002766777e-06, - "loss": 0.9107, - "step": 5038 - }, - { - "epoch": 0.4544347747666501, - "grad_norm": 1.334044693885866, - "learning_rate": 2.389786708852381e-06, - "loss": 0.9131, - "step": 5039 - }, - { - "epoch": 0.4545249582901204, - "grad_norm": 0.9102748089630923, - "learning_rate": 2.389213684169333e-06, - "loss": 0.9573, - "step": 5040 - }, - { - "epoch": 0.45461514181359064, - "grad_norm": 1.399914137727721, - "learning_rate": 2.388640626276428e-06, - "loss": 0.9096, - "step": 5041 - }, - { - "epoch": 0.45470532533706093, - "grad_norm": 0.8276869496994188, - "learning_rate": 2.388067535222561e-06, - "loss": 0.8108, - "step": 5042 - }, - { - "epoch": 0.45479550886053116, - "grad_norm": 1.6634022293780386, - "learning_rate": 2.3874944110566332e-06, - "loss": 0.9642, - "step": 5043 - }, - { - "epoch": 0.45488569238400145, - "grad_norm": 0.8228269640806296, - "learning_rate": 2.3869212538275447e-06, - "loss": 0.9314, - "step": 5044 - }, - { - "epoch": 0.4549758759074717, - "grad_norm": 1.2422007434067033, - "learning_rate": 2.386348063584202e-06, - "loss": 0.9635, - "step": 5045 - }, - { - "epoch": 0.45506605943094197, - "grad_norm": 1.6202981122370417, - "learning_rate": 2.385774840375511e-06, - "loss": 1.0014, - "step": 5046 - }, - { - "epoch": 0.4551562429544122, - "grad_norm": 1.4669988216351209, - "learning_rate": 2.385201584250385e-06, - "loss": 0.9072, - "step": 5047 - }, - { - "epoch": 0.4552464264778825, - "grad_norm": 0.8083974258995317, - "learning_rate": 2.3846282952577346e-06, - "loss": 0.8679, - "step": 5048 - }, - { - "epoch": 0.4553366100013527, - "grad_norm": 1.2754200673465617, - "learning_rate": 2.3840549734464785e-06, - "loss": 1.0313, - "step": 5049 - }, - { - "epoch": 0.455426793524823, - "grad_norm": 1.600698619551143, - "learning_rate": 2.3834816188655336e-06, - "loss": 0.8863, - "step": 5050 - }, - { - "epoch": 0.4555169770482933, - "grad_norm": 1.5019362191983776, - "learning_rate": 2.3829082315638224e-06, - "loss": 0.9003, - "step": 5051 - }, - { - "epoch": 0.45560716057176354, - "grad_norm": 1.5571060155057426, - "learning_rate": 2.3823348115902695e-06, - "loss": 0.8119, - "step": 5052 - }, - { - "epoch": 0.4556973440952338, - "grad_norm": 1.4746954684356475, - "learning_rate": 2.3817613589938026e-06, - "loss": 0.9598, - "step": 5053 - }, - { - "epoch": 0.45578752761870406, - "grad_norm": 1.575050834562005, - "learning_rate": 2.3811878738233517e-06, - "loss": 0.9892, - "step": 5054 - }, - { - "epoch": 0.45587771114217435, - "grad_norm": 1.707422626244723, - "learning_rate": 2.380614356127849e-06, - "loss": 0.8967, - "step": 5055 - }, - { - "epoch": 0.4559678946656446, - "grad_norm": 1.4708172473982417, - "learning_rate": 2.3800408059562318e-06, - "loss": 0.9402, - "step": 5056 - }, - { - "epoch": 0.45605807818911487, - "grad_norm": 1.7537794173499868, - "learning_rate": 2.3794672233574365e-06, - "loss": 0.954, - "step": 5057 - }, - { - "epoch": 0.4561482617125851, - "grad_norm": 1.5761284615400522, - "learning_rate": 2.3788936083804058e-06, - "loss": 0.9656, - "step": 5058 - }, - { - "epoch": 0.4562384452360554, - "grad_norm": 1.3478485412519574, - "learning_rate": 2.378319961074083e-06, - "loss": 0.947, - "step": 5059 - }, - { - "epoch": 0.4563286287595256, - "grad_norm": 1.4141616786508404, - "learning_rate": 2.377746281487415e-06, - "loss": 0.9934, - "step": 5060 - }, - { - "epoch": 0.4564188122829959, - "grad_norm": 1.6546237807212096, - "learning_rate": 2.377172569669352e-06, - "loss": 1.0349, - "step": 5061 - }, - { - "epoch": 0.45650899580646614, - "grad_norm": 1.5253176311460541, - "learning_rate": 2.376598825668845e-06, - "loss": 0.8022, - "step": 5062 - }, - { - "epoch": 0.45659917932993643, - "grad_norm": 1.5426406704705076, - "learning_rate": 2.3760250495348495e-06, - "loss": 0.9351, - "step": 5063 - }, - { - "epoch": 0.45668936285340667, - "grad_norm": 1.454170671182418, - "learning_rate": 2.3754512413163236e-06, - "loss": 0.9867, - "step": 5064 - }, - { - "epoch": 0.45677954637687695, - "grad_norm": 1.2873026076172556, - "learning_rate": 2.3748774010622285e-06, - "loss": 0.9304, - "step": 5065 - }, - { - "epoch": 0.4568697299003472, - "grad_norm": 1.605197410208017, - "learning_rate": 2.3743035288215254e-06, - "loss": 0.9617, - "step": 5066 - }, - { - "epoch": 0.4569599134238175, - "grad_norm": 1.4037820140146737, - "learning_rate": 2.3737296246431815e-06, - "loss": 0.9572, - "step": 5067 - }, - { - "epoch": 0.4570500969472877, - "grad_norm": 1.3399629678875675, - "learning_rate": 2.3731556885761656e-06, - "loss": 0.9182, - "step": 5068 - }, - { - "epoch": 0.457140280470758, - "grad_norm": 0.6517043034871374, - "learning_rate": 2.372581720669449e-06, - "loss": 0.788, - "step": 5069 - }, - { - "epoch": 0.45723046399422823, - "grad_norm": 1.6598949271579548, - "learning_rate": 2.3720077209720046e-06, - "loss": 0.9736, - "step": 5070 - }, - { - "epoch": 0.4573206475176985, - "grad_norm": 0.737275826602977, - "learning_rate": 2.3714336895328112e-06, - "loss": 0.8059, - "step": 5071 - }, - { - "epoch": 0.45741083104116875, - "grad_norm": 1.6374174461230226, - "learning_rate": 2.370859626400847e-06, - "loss": 0.9034, - "step": 5072 - }, - { - "epoch": 0.45750101456463904, - "grad_norm": 1.3552348789876725, - "learning_rate": 2.3702855316250943e-06, - "loss": 0.9275, - "step": 5073 - }, - { - "epoch": 0.45759119808810933, - "grad_norm": 1.511555349898132, - "learning_rate": 2.369711405254539e-06, - "loss": 0.9478, - "step": 5074 - }, - { - "epoch": 0.45768138161157956, - "grad_norm": 1.7363569602674556, - "learning_rate": 2.3691372473381673e-06, - "loss": 0.9982, - "step": 5075 - }, - { - "epoch": 0.45777156513504985, - "grad_norm": 1.5174212478992277, - "learning_rate": 2.3685630579249708e-06, - "loss": 1.0075, - "step": 5076 - }, - { - "epoch": 0.4578617486585201, - "grad_norm": 1.3953031225626236, - "learning_rate": 2.367988837063942e-06, - "loss": 0.9212, - "step": 5077 - }, - { - "epoch": 0.4579519321819904, - "grad_norm": 1.499418702341354, - "learning_rate": 2.367414584804076e-06, - "loss": 1.0241, - "step": 5078 - }, - { - "epoch": 0.4580421157054606, - "grad_norm": 1.3011622515287886, - "learning_rate": 2.366840301194372e-06, - "loss": 0.9807, - "step": 5079 - }, - { - "epoch": 0.4581322992289309, - "grad_norm": 1.3042967433877966, - "learning_rate": 2.3662659862838308e-06, - "loss": 0.9381, - "step": 5080 - }, - { - "epoch": 0.45822248275240113, - "grad_norm": 1.406415124311485, - "learning_rate": 2.365691640121456e-06, - "loss": 1.0018, - "step": 5081 - }, - { - "epoch": 0.4583126662758714, - "grad_norm": 1.778716711937824, - "learning_rate": 2.365117262756254e-06, - "loss": 0.8876, - "step": 5082 - }, - { - "epoch": 0.45840284979934165, - "grad_norm": 1.6682153023739061, - "learning_rate": 2.3645428542372342e-06, - "loss": 0.9432, - "step": 5083 - }, - { - "epoch": 0.45849303332281194, - "grad_norm": 0.7853569799016747, - "learning_rate": 2.3639684146134083e-06, - "loss": 0.7571, - "step": 5084 - }, - { - "epoch": 0.45858321684628217, - "grad_norm": 2.10340431931899, - "learning_rate": 2.3633939439337897e-06, - "loss": 0.9143, - "step": 5085 - }, - { - "epoch": 0.45867340036975246, - "grad_norm": 1.6977856435324477, - "learning_rate": 2.362819442247396e-06, - "loss": 0.914, - "step": 5086 - }, - { - "epoch": 0.4587635838932227, - "grad_norm": 0.7372681059193805, - "learning_rate": 2.3622449096032477e-06, - "loss": 0.7592, - "step": 5087 - }, - { - "epoch": 0.458853767416693, - "grad_norm": 1.527464488688995, - "learning_rate": 2.361670346050366e-06, - "loss": 0.9826, - "step": 5088 - }, - { - "epoch": 0.4589439509401632, - "grad_norm": 1.7080342643525963, - "learning_rate": 2.3610957516377757e-06, - "loss": 0.8716, - "step": 5089 - }, - { - "epoch": 0.4590341344636335, - "grad_norm": 1.4601522365002608, - "learning_rate": 2.3605211264145048e-06, - "loss": 0.9439, - "step": 5090 - }, - { - "epoch": 0.45912431798710374, - "grad_norm": 1.5203048972462136, - "learning_rate": 2.3599464704295836e-06, - "loss": 0.9173, - "step": 5091 - }, - { - "epoch": 0.459214501510574, - "grad_norm": 1.7637126322247663, - "learning_rate": 2.359371783732045e-06, - "loss": 0.8345, - "step": 5092 - }, - { - "epoch": 0.45930468503404426, - "grad_norm": 1.4877821518467547, - "learning_rate": 2.358797066370924e-06, - "loss": 0.9326, - "step": 5093 - }, - { - "epoch": 0.45939486855751455, - "grad_norm": 1.3401775333599841, - "learning_rate": 2.3582223183952594e-06, - "loss": 0.9524, - "step": 5094 - }, - { - "epoch": 0.4594850520809848, - "grad_norm": 1.6507079773007893, - "learning_rate": 2.357647539854091e-06, - "loss": 0.9814, - "step": 5095 - }, - { - "epoch": 0.45957523560445507, - "grad_norm": 1.6426983765481984, - "learning_rate": 2.3570727307964624e-06, - "loss": 0.9561, - "step": 5096 - }, - { - "epoch": 0.4596654191279253, - "grad_norm": 1.440948248156948, - "learning_rate": 2.35649789127142e-06, - "loss": 0.9933, - "step": 5097 - }, - { - "epoch": 0.4597556026513956, - "grad_norm": 1.4948984655137734, - "learning_rate": 2.3559230213280115e-06, - "loss": 0.9178, - "step": 5098 - }, - { - "epoch": 0.4598457861748659, - "grad_norm": 1.2884023189622107, - "learning_rate": 2.3553481210152886e-06, - "loss": 0.8652, - "step": 5099 - }, - { - "epoch": 0.4599359696983361, - "grad_norm": 0.817622287539876, - "learning_rate": 2.3547731903823043e-06, - "loss": 0.8524, - "step": 5100 - }, - { - "epoch": 0.4600261532218064, - "grad_norm": 1.2978149592603405, - "learning_rate": 2.3541982294781155e-06, - "loss": 0.9204, - "step": 5101 - }, - { - "epoch": 0.46011633674527663, - "grad_norm": 1.345797996332653, - "learning_rate": 2.3536232383517804e-06, - "loss": 0.9421, - "step": 5102 - }, - { - "epoch": 0.4602065202687469, - "grad_norm": 1.2234134462216004, - "learning_rate": 2.3530482170523602e-06, - "loss": 0.9495, - "step": 5103 - }, - { - "epoch": 0.46029670379221715, - "grad_norm": 1.264172792279451, - "learning_rate": 2.3524731656289206e-06, - "loss": 1.0335, - "step": 5104 - }, - { - "epoch": 0.46038688731568744, - "grad_norm": 1.4202782185092475, - "learning_rate": 2.351898084130526e-06, - "loss": 1.0321, - "step": 5105 - }, - { - "epoch": 0.4604770708391577, - "grad_norm": 13.315633915341126, - "learning_rate": 2.351322972606247e-06, - "loss": 1.0109, - "step": 5106 - }, - { - "epoch": 0.46056725436262796, - "grad_norm": 1.7043649202166944, - "learning_rate": 2.350747831105155e-06, - "loss": 1.0951, - "step": 5107 - }, - { - "epoch": 0.4606574378860982, - "grad_norm": 1.995507379990869, - "learning_rate": 2.350172659676323e-06, - "loss": 1.0275, - "step": 5108 - }, - { - "epoch": 0.4607476214095685, - "grad_norm": 1.3521081341240577, - "learning_rate": 2.3495974583688306e-06, - "loss": 0.9836, - "step": 5109 - }, - { - "epoch": 0.4608378049330387, - "grad_norm": 1.2969992244926964, - "learning_rate": 2.3490222272317543e-06, - "loss": 1.0198, - "step": 5110 - }, - { - "epoch": 0.460927988456509, - "grad_norm": 1.5059241294544343, - "learning_rate": 2.348446966314177e-06, - "loss": 0.9955, - "step": 5111 - }, - { - "epoch": 0.46101817197997924, - "grad_norm": 1.316598668736853, - "learning_rate": 2.3478716756651837e-06, - "loss": 0.9251, - "step": 5112 - }, - { - "epoch": 0.46110835550344953, - "grad_norm": 1.4501953947022284, - "learning_rate": 2.347296355333861e-06, - "loss": 0.9379, - "step": 5113 - }, - { - "epoch": 0.46119853902691976, - "grad_norm": 1.4373649036116902, - "learning_rate": 2.3467210053692972e-06, - "loss": 0.954, - "step": 5114 - }, - { - "epoch": 0.46128872255039005, - "grad_norm": 1.7532673035119566, - "learning_rate": 2.3461456258205866e-06, - "loss": 0.9551, - "step": 5115 - }, - { - "epoch": 0.4613789060738603, - "grad_norm": 1.6600302255705284, - "learning_rate": 2.345570216736822e-06, - "loss": 0.8908, - "step": 5116 - }, - { - "epoch": 0.4614690895973306, - "grad_norm": 1.502831964618598, - "learning_rate": 2.3449947781671013e-06, - "loss": 0.9553, - "step": 5117 - }, - { - "epoch": 0.4615592731208008, - "grad_norm": 1.5261078734460558, - "learning_rate": 2.3444193101605237e-06, - "loss": 0.977, - "step": 5118 - }, - { - "epoch": 0.4616494566442711, - "grad_norm": 1.4616353209442787, - "learning_rate": 2.3438438127661913e-06, - "loss": 0.9634, - "step": 5119 - }, - { - "epoch": 0.4617396401677413, - "grad_norm": 1.6838873587287828, - "learning_rate": 2.3432682860332096e-06, - "loss": 0.7898, - "step": 5120 - }, - { - "epoch": 0.4618298236912116, - "grad_norm": 1.3552553739991817, - "learning_rate": 2.342692730010684e-06, - "loss": 0.8969, - "step": 5121 - }, - { - "epoch": 0.4619200072146819, - "grad_norm": 2.025202978816621, - "learning_rate": 2.342117144747726e-06, - "loss": 0.9543, - "step": 5122 - }, - { - "epoch": 0.46201019073815214, - "grad_norm": 1.4838435777755228, - "learning_rate": 2.3415415302934457e-06, - "loss": 0.9411, - "step": 5123 - }, - { - "epoch": 0.4621003742616224, - "grad_norm": 1.466814673426261, - "learning_rate": 2.340965886696959e-06, - "loss": 0.9744, - "step": 5124 - }, - { - "epoch": 0.46219055778509266, - "grad_norm": 1.3454375205614786, - "learning_rate": 2.340390214007384e-06, - "loss": 0.9033, - "step": 5125 - }, - { - "epoch": 0.46228074130856295, - "grad_norm": 1.5529135546675685, - "learning_rate": 2.339814512273838e-06, - "loss": 0.834, - "step": 5126 - }, - { - "epoch": 0.4623709248320332, - "grad_norm": 1.3872258508326112, - "learning_rate": 2.3392387815454447e-06, - "loss": 1.0502, - "step": 5127 - }, - { - "epoch": 0.46246110835550347, - "grad_norm": 1.5516085399853892, - "learning_rate": 2.3386630218713273e-06, - "loss": 0.9478, - "step": 5128 - }, - { - "epoch": 0.4625512918789737, - "grad_norm": 1.4932696827169751, - "learning_rate": 2.3380872333006135e-06, - "loss": 0.9619, - "step": 5129 - }, - { - "epoch": 0.462641475402444, - "grad_norm": 13.427849573369485, - "learning_rate": 2.3375114158824335e-06, - "loss": 0.9941, - "step": 5130 - }, - { - "epoch": 0.4627316589259142, - "grad_norm": 1.4393804938161394, - "learning_rate": 2.3369355696659184e-06, - "loss": 0.9857, - "step": 5131 - }, - { - "epoch": 0.4628218424493845, - "grad_norm": 1.3946778890837164, - "learning_rate": 2.336359694700202e-06, - "loss": 0.934, - "step": 5132 - }, - { - "epoch": 0.46291202597285475, - "grad_norm": 1.4100542110816474, - "learning_rate": 2.335783791034422e-06, - "loss": 0.9528, - "step": 5133 - }, - { - "epoch": 0.46300220949632503, - "grad_norm": 1.44331940013571, - "learning_rate": 2.3352078587177173e-06, - "loss": 0.9324, - "step": 5134 - }, - { - "epoch": 0.46309239301979527, - "grad_norm": 1.3881331846226905, - "learning_rate": 2.33463189779923e-06, - "loss": 0.9533, - "step": 5135 - }, - { - "epoch": 0.46318257654326556, - "grad_norm": 1.2960320916205648, - "learning_rate": 2.334055908328104e-06, - "loss": 1.0222, - "step": 5136 - }, - { - "epoch": 0.4632727600667358, - "grad_norm": 1.518947695200129, - "learning_rate": 2.3334798903534866e-06, - "loss": 0.8668, - "step": 5137 - }, - { - "epoch": 0.4633629435902061, - "grad_norm": 1.5778615750564664, - "learning_rate": 2.3329038439245252e-06, - "loss": 0.8502, - "step": 5138 - }, - { - "epoch": 0.4634531271136763, - "grad_norm": 1.6254953216372765, - "learning_rate": 2.3323277690903724e-06, - "loss": 0.9492, - "step": 5139 - }, - { - "epoch": 0.4635433106371466, - "grad_norm": 1.2917541146158402, - "learning_rate": 2.3317516659001827e-06, - "loss": 0.9272, - "step": 5140 - }, - { - "epoch": 0.46363349416061683, - "grad_norm": 1.348119506824746, - "learning_rate": 2.331175534403111e-06, - "loss": 0.959, - "step": 5141 - }, - { - "epoch": 0.4637236776840871, - "grad_norm": 1.3471164672785214, - "learning_rate": 2.3305993746483167e-06, - "loss": 1.0271, - "step": 5142 - }, - { - "epoch": 0.46381386120755735, - "grad_norm": 1.8471696466916778, - "learning_rate": 2.3300231866849606e-06, - "loss": 0.925, - "step": 5143 - }, - { - "epoch": 0.46390404473102764, - "grad_norm": 1.5062845028293113, - "learning_rate": 2.3294469705622067e-06, - "loss": 0.9817, - "step": 5144 - }, - { - "epoch": 0.4639942282544979, - "grad_norm": 1.3937917934019026, - "learning_rate": 2.3288707263292203e-06, - "loss": 0.923, - "step": 5145 - }, - { - "epoch": 0.46408441177796816, - "grad_norm": 1.2586324163709572, - "learning_rate": 2.3282944540351707e-06, - "loss": 0.9679, - "step": 5146 - }, - { - "epoch": 0.46417459530143845, - "grad_norm": 2.378096820198154, - "learning_rate": 2.327718153729228e-06, - "loss": 0.9254, - "step": 5147 - }, - { - "epoch": 0.4642647788249087, - "grad_norm": 1.6206200332653762, - "learning_rate": 2.327141825460566e-06, - "loss": 0.9914, - "step": 5148 - }, - { - "epoch": 0.464354962348379, - "grad_norm": 0.802462158083353, - "learning_rate": 2.326565469278358e-06, - "loss": 0.8216, - "step": 5149 - }, - { - "epoch": 0.4644451458718492, - "grad_norm": 0.7620679788379231, - "learning_rate": 2.3259890852317846e-06, - "loss": 0.8152, - "step": 5150 - }, - { - "epoch": 0.4645353293953195, - "grad_norm": 2.7373597339943205, - "learning_rate": 2.3254126733700246e-06, - "loss": 0.9504, - "step": 5151 - }, - { - "epoch": 0.46462551291878973, - "grad_norm": 1.547893073551215, - "learning_rate": 2.324836233742262e-06, - "loss": 0.9756, - "step": 5152 - }, - { - "epoch": 0.46471569644226, - "grad_norm": 1.6679562427489576, - "learning_rate": 2.3242597663976793e-06, - "loss": 0.9879, - "step": 5153 - }, - { - "epoch": 0.46480587996573025, - "grad_norm": 1.5579160206335905, - "learning_rate": 2.3236832713854663e-06, - "loss": 0.9255, - "step": 5154 - }, - { - "epoch": 0.46489606348920054, - "grad_norm": 1.5171178956963558, - "learning_rate": 2.323106748754812e-06, - "loss": 0.9444, - "step": 5155 - }, - { - "epoch": 0.4649862470126708, - "grad_norm": 1.3232145299519655, - "learning_rate": 2.3225301985549077e-06, - "loss": 1.0394, - "step": 5156 - }, - { - "epoch": 0.46507643053614106, - "grad_norm": 1.5265732014182385, - "learning_rate": 2.321953620834948e-06, - "loss": 0.8831, - "step": 5157 - }, - { - "epoch": 0.4651666140596113, - "grad_norm": 1.2851024952693357, - "learning_rate": 2.3213770156441314e-06, - "loss": 0.9224, - "step": 5158 - }, - { - "epoch": 0.4652567975830816, - "grad_norm": 1.689658867981619, - "learning_rate": 2.3208003830316554e-06, - "loss": 0.9946, - "step": 5159 - }, - { - "epoch": 0.4653469811065518, - "grad_norm": 1.4304865704212388, - "learning_rate": 2.3202237230467215e-06, - "loss": 1.035, - "step": 5160 - }, - { - "epoch": 0.4654371646300221, - "grad_norm": 1.2315670379810604, - "learning_rate": 2.3196470357385338e-06, - "loss": 0.9021, - "step": 5161 - }, - { - "epoch": 0.46552734815349234, - "grad_norm": 1.304242497921807, - "learning_rate": 2.319070321156299e-06, - "loss": 0.9371, - "step": 5162 - }, - { - "epoch": 0.4656175316769626, - "grad_norm": 1.445874244191759, - "learning_rate": 2.318493579349224e-06, - "loss": 1.0815, - "step": 5163 - }, - { - "epoch": 0.46570771520043286, - "grad_norm": 1.9325186158303898, - "learning_rate": 2.317916810366522e-06, - "loss": 0.9424, - "step": 5164 - }, - { - "epoch": 0.46579789872390315, - "grad_norm": 1.3833885070797667, - "learning_rate": 2.317340014257404e-06, - "loss": 0.89, - "step": 5165 - }, - { - "epoch": 0.4658880822473734, - "grad_norm": 1.3105046681914423, - "learning_rate": 2.316763191071086e-06, - "loss": 0.8732, - "step": 5166 - }, - { - "epoch": 0.46597826577084367, - "grad_norm": 1.3793239663228398, - "learning_rate": 2.316186340856787e-06, - "loss": 0.9067, - "step": 5167 - }, - { - "epoch": 0.4660684492943139, - "grad_norm": 1.6003712759787283, - "learning_rate": 2.315609463663725e-06, - "loss": 0.8966, - "step": 5168 - }, - { - "epoch": 0.4661586328177842, - "grad_norm": 1.5448282746367201, - "learning_rate": 2.315032559541123e-06, - "loss": 0.9258, - "step": 5169 - }, - { - "epoch": 0.4662488163412545, - "grad_norm": 1.5460741686629118, - "learning_rate": 2.314455628538207e-06, - "loss": 0.9725, - "step": 5170 - }, - { - "epoch": 0.4663389998647247, - "grad_norm": 1.215628514615742, - "learning_rate": 2.3138786707042023e-06, - "loss": 0.9172, - "step": 5171 - }, - { - "epoch": 0.466429183388195, - "grad_norm": 1.3245548058184071, - "learning_rate": 2.3133016860883387e-06, - "loss": 0.9576, - "step": 5172 - }, - { - "epoch": 0.46651936691166523, - "grad_norm": 1.7271896535115614, - "learning_rate": 2.3127246747398475e-06, - "loss": 0.9329, - "step": 5173 - }, - { - "epoch": 0.4666095504351355, - "grad_norm": 1.1969562771842008, - "learning_rate": 2.312147636707963e-06, - "loss": 0.9424, - "step": 5174 - }, - { - "epoch": 0.46669973395860576, - "grad_norm": 1.5821328212957315, - "learning_rate": 2.3115705720419214e-06, - "loss": 0.9378, - "step": 5175 - }, - { - "epoch": 0.46678991748207604, - "grad_norm": 1.4532483776588119, - "learning_rate": 2.31099348079096e-06, - "loss": 0.9714, - "step": 5176 - }, - { - "epoch": 0.4668801010055463, - "grad_norm": 1.182325836001568, - "learning_rate": 2.31041636300432e-06, - "loss": 0.9847, - "step": 5177 - }, - { - "epoch": 0.46697028452901657, - "grad_norm": 1.541330902377085, - "learning_rate": 2.3098392187312445e-06, - "loss": 1.0282, - "step": 5178 - }, - { - "epoch": 0.4670604680524868, - "grad_norm": 1.453229920896095, - "learning_rate": 2.309262048020978e-06, - "loss": 0.9282, - "step": 5179 - }, - { - "epoch": 0.4671506515759571, - "grad_norm": 2.747320690603895, - "learning_rate": 2.308684850922769e-06, - "loss": 0.9817, - "step": 5180 - }, - { - "epoch": 0.4672408350994273, - "grad_norm": 1.4476065909714977, - "learning_rate": 2.3081076274858664e-06, - "loss": 0.9533, - "step": 5181 - }, - { - "epoch": 0.4673310186228976, - "grad_norm": 1.694742906928395, - "learning_rate": 2.307530377759522e-06, - "loss": 0.9836, - "step": 5182 - }, - { - "epoch": 0.46742120214636784, - "grad_norm": 1.7588392183818748, - "learning_rate": 2.30695310179299e-06, - "loss": 0.8865, - "step": 5183 - }, - { - "epoch": 0.46751138566983813, - "grad_norm": 1.344520082045017, - "learning_rate": 2.3063757996355267e-06, - "loss": 0.9395, - "step": 5184 - }, - { - "epoch": 0.46760156919330836, - "grad_norm": 1.5925077196829396, - "learning_rate": 2.3057984713363903e-06, - "loss": 0.9806, - "step": 5185 - }, - { - "epoch": 0.46769175271677865, - "grad_norm": 0.6600275534961629, - "learning_rate": 2.3052211169448436e-06, - "loss": 0.7968, - "step": 5186 - }, - { - "epoch": 0.4677819362402489, - "grad_norm": 1.546218038982639, - "learning_rate": 2.3046437365101474e-06, - "loss": 0.8939, - "step": 5187 - }, - { - "epoch": 0.4678721197637192, - "grad_norm": 1.4971019246346426, - "learning_rate": 2.3040663300815673e-06, - "loss": 0.9334, - "step": 5188 - }, - { - "epoch": 0.4679623032871894, - "grad_norm": 0.6827113096210915, - "learning_rate": 2.3034888977083723e-06, - "loss": 0.7839, - "step": 5189 - }, - { - "epoch": 0.4680524868106597, - "grad_norm": 1.4875213204787299, - "learning_rate": 2.30291143943983e-06, - "loss": 0.868, - "step": 5190 - }, - { - "epoch": 0.46814267033412993, - "grad_norm": 1.4398665643168882, - "learning_rate": 2.3023339553252145e-06, - "loss": 0.9098, - "step": 5191 - }, - { - "epoch": 0.4682328538576002, - "grad_norm": 1.41092717873483, - "learning_rate": 2.301756445413799e-06, - "loss": 0.9849, - "step": 5192 - }, - { - "epoch": 0.4683230373810705, - "grad_norm": 1.5209899804251141, - "learning_rate": 2.3011789097548585e-06, - "loss": 0.9477, - "step": 5193 - }, - { - "epoch": 0.46841322090454074, - "grad_norm": 1.3259775640191984, - "learning_rate": 2.3006013483976738e-06, - "loss": 0.9437, - "step": 5194 - }, - { - "epoch": 0.468503404428011, - "grad_norm": 1.6479751878740871, - "learning_rate": 2.300023761391524e-06, - "loss": 0.9487, - "step": 5195 - }, - { - "epoch": 0.46859358795148126, - "grad_norm": 1.6373051068754683, - "learning_rate": 2.299446148785693e-06, - "loss": 0.9987, - "step": 5196 - }, - { - "epoch": 0.46868377147495155, - "grad_norm": 1.6185647011529187, - "learning_rate": 2.2988685106294654e-06, - "loss": 0.9271, - "step": 5197 - }, - { - "epoch": 0.4687739549984218, - "grad_norm": 1.418793073496058, - "learning_rate": 2.2982908469721284e-06, - "loss": 0.8503, - "step": 5198 - }, - { - "epoch": 0.46886413852189207, - "grad_norm": 1.6729924398184157, - "learning_rate": 2.2977131578629714e-06, - "loss": 1.0212, - "step": 5199 - }, - { - "epoch": 0.4689543220453623, - "grad_norm": 2.2570242908406146, - "learning_rate": 2.297135443351286e-06, - "loss": 0.9749, - "step": 5200 - }, - { - "epoch": 0.4690445055688326, - "grad_norm": 1.2300428349704418, - "learning_rate": 2.296557703486367e-06, - "loss": 0.9618, - "step": 5201 - }, - { - "epoch": 0.4691346890923028, - "grad_norm": 6.526405691683701, - "learning_rate": 2.295979938317509e-06, - "loss": 0.9746, - "step": 5202 - }, - { - "epoch": 0.4692248726157731, - "grad_norm": 1.4479715990948436, - "learning_rate": 2.295402147894011e-06, - "loss": 0.946, - "step": 5203 - }, - { - "epoch": 0.46931505613924335, - "grad_norm": 1.2192812030953646, - "learning_rate": 2.2948243322651723e-06, - "loss": 0.8694, - "step": 5204 - }, - { - "epoch": 0.46940523966271364, - "grad_norm": 1.46789452820426, - "learning_rate": 2.2942464914802962e-06, - "loss": 0.9479, - "step": 5205 - }, - { - "epoch": 0.46949542318618387, - "grad_norm": 1.513074794032478, - "learning_rate": 2.293668625588687e-06, - "loss": 0.9248, - "step": 5206 - }, - { - "epoch": 0.46958560670965416, - "grad_norm": 1.5697354447342642, - "learning_rate": 2.293090734639651e-06, - "loss": 0.984, - "step": 5207 - }, - { - "epoch": 0.4696757902331244, - "grad_norm": 1.5655393413326457, - "learning_rate": 2.2925128186824983e-06, - "loss": 1.0716, - "step": 5208 - }, - { - "epoch": 0.4697659737565947, - "grad_norm": 3.7255692987832907, - "learning_rate": 2.2919348777665384e-06, - "loss": 1.0448, - "step": 5209 - }, - { - "epoch": 0.4698561572800649, - "grad_norm": 1.5096142533824668, - "learning_rate": 2.2913569119410856e-06, - "loss": 0.9532, - "step": 5210 - }, - { - "epoch": 0.4699463408035352, - "grad_norm": 1.3768829111364604, - "learning_rate": 2.290778921255454e-06, - "loss": 0.9723, - "step": 5211 - }, - { - "epoch": 0.47003652432700543, - "grad_norm": 1.309499626328574, - "learning_rate": 2.2902009057589613e-06, - "loss": 0.984, - "step": 5212 - }, - { - "epoch": 0.4701267078504757, - "grad_norm": 1.1640247108418968, - "learning_rate": 2.2896228655009276e-06, - "loss": 0.9099, - "step": 5213 - }, - { - "epoch": 0.47021689137394596, - "grad_norm": 1.727086397666038, - "learning_rate": 2.289044800530674e-06, - "loss": 0.9159, - "step": 5214 - }, - { - "epoch": 0.47030707489741624, - "grad_norm": 1.7876518051669692, - "learning_rate": 2.2884667108975245e-06, - "loss": 0.9157, - "step": 5215 - }, - { - "epoch": 0.4703972584208865, - "grad_norm": 1.46269145266716, - "learning_rate": 2.287888596650804e-06, - "loss": 0.987, - "step": 5216 - }, - { - "epoch": 0.47048744194435677, - "grad_norm": 1.5088478447287124, - "learning_rate": 2.287310457839841e-06, - "loss": 0.9515, - "step": 5217 - }, - { - "epoch": 0.47057762546782705, - "grad_norm": 1.6559338537817825, - "learning_rate": 2.286732294513966e-06, - "loss": 1.0222, - "step": 5218 - }, - { - "epoch": 0.4706678089912973, - "grad_norm": 1.5304044607776452, - "learning_rate": 2.2861541067225106e-06, - "loss": 0.9573, - "step": 5219 - }, - { - "epoch": 0.4707579925147676, - "grad_norm": 1.8897453342404873, - "learning_rate": 2.2855758945148095e-06, - "loss": 0.9397, - "step": 5220 - }, - { - "epoch": 0.4708481760382378, - "grad_norm": 1.3262802326952334, - "learning_rate": 2.2849976579401977e-06, - "loss": 1.0065, - "step": 5221 - }, - { - "epoch": 0.4709383595617081, - "grad_norm": 1.856580477494103, - "learning_rate": 2.284419397048014e-06, - "loss": 0.945, - "step": 5222 - }, - { - "epoch": 0.47102854308517833, - "grad_norm": 1.3499766400753328, - "learning_rate": 2.2838411118875997e-06, - "loss": 0.9604, - "step": 5223 - }, - { - "epoch": 0.4711187266086486, - "grad_norm": 1.3414440329667703, - "learning_rate": 2.283262802508296e-06, - "loss": 0.8782, - "step": 5224 - }, - { - "epoch": 0.47120891013211885, - "grad_norm": 1.110031148799957, - "learning_rate": 2.2826844689594492e-06, - "loss": 0.9288, - "step": 5225 - }, - { - "epoch": 0.47129909365558914, - "grad_norm": 1.38352801243775, - "learning_rate": 2.282106111290404e-06, - "loss": 1.0303, - "step": 5226 - }, - { - "epoch": 0.4713892771790594, - "grad_norm": 0.6789662272097912, - "learning_rate": 2.2815277295505098e-06, - "loss": 0.8236, - "step": 5227 - }, - { - "epoch": 0.47147946070252966, - "grad_norm": 1.5199185934352426, - "learning_rate": 2.2809493237891174e-06, - "loss": 0.9043, - "step": 5228 - }, - { - "epoch": 0.4715696442259999, - "grad_norm": 1.3416391693677776, - "learning_rate": 2.2803708940555796e-06, - "loss": 0.9791, - "step": 5229 - }, - { - "epoch": 0.4716598277494702, - "grad_norm": 1.2500655633898297, - "learning_rate": 2.2797924403992514e-06, - "loss": 1.0269, - "step": 5230 - }, - { - "epoch": 0.4717500112729404, - "grad_norm": 1.5492950928499722, - "learning_rate": 2.2792139628694892e-06, - "loss": 0.9675, - "step": 5231 - }, - { - "epoch": 0.4718401947964107, - "grad_norm": 1.2552104121569545, - "learning_rate": 2.2786354615156524e-06, - "loss": 0.9198, - "step": 5232 - }, - { - "epoch": 0.47193037831988094, - "grad_norm": 1.6258404099104946, - "learning_rate": 2.2780569363871016e-06, - "loss": 1.0123, - "step": 5233 - }, - { - "epoch": 0.4720205618433512, - "grad_norm": 0.6502768385502542, - "learning_rate": 2.277478387533199e-06, - "loss": 0.7886, - "step": 5234 - }, - { - "epoch": 0.47211074536682146, - "grad_norm": 1.7262754590078968, - "learning_rate": 2.276899815003311e-06, - "loss": 0.9912, - "step": 5235 - }, - { - "epoch": 0.47220092889029175, - "grad_norm": 1.3984686139444182, - "learning_rate": 2.2763212188468045e-06, - "loss": 0.9391, - "step": 5236 - }, - { - "epoch": 0.472291112413762, - "grad_norm": 1.2364599751196708, - "learning_rate": 2.2757425991130473e-06, - "loss": 0.9663, - "step": 5237 - }, - { - "epoch": 0.47238129593723227, - "grad_norm": 0.8515786685633548, - "learning_rate": 2.2751639558514117e-06, - "loss": 0.8286, - "step": 5238 - }, - { - "epoch": 0.4724714794607025, - "grad_norm": 1.2126405123276918, - "learning_rate": 2.2745852891112697e-06, - "loss": 0.9889, - "step": 5239 - }, - { - "epoch": 0.4725616629841728, - "grad_norm": 1.683008220487069, - "learning_rate": 2.274006598941997e-06, - "loss": 0.962, - "step": 5240 - }, - { - "epoch": 0.4726518465076431, - "grad_norm": 1.6737599665008167, - "learning_rate": 2.27342788539297e-06, - "loss": 0.9368, - "step": 5241 - }, - { - "epoch": 0.4727420300311133, - "grad_norm": 1.406365877251589, - "learning_rate": 2.2728491485135684e-06, - "loss": 0.9137, - "step": 5242 - }, - { - "epoch": 0.4728322135545836, - "grad_norm": 1.3642320787519036, - "learning_rate": 2.272270388353173e-06, - "loss": 1.0054, - "step": 5243 - }, - { - "epoch": 0.47292239707805384, - "grad_norm": 1.6256409628090602, - "learning_rate": 2.2716916049611666e-06, - "loss": 0.9268, - "step": 5244 - }, - { - "epoch": 0.4730125806015241, - "grad_norm": 1.3245217755847232, - "learning_rate": 2.2711127983869346e-06, - "loss": 0.9366, - "step": 5245 - }, - { - "epoch": 0.47310276412499436, - "grad_norm": 1.2412520431086849, - "learning_rate": 2.270533968679864e-06, - "loss": 0.9607, - "step": 5246 - }, - { - "epoch": 0.47319294764846465, - "grad_norm": 1.277156851157841, - "learning_rate": 2.269955115889343e-06, - "loss": 0.9562, - "step": 5247 - }, - { - "epoch": 0.4732831311719349, - "grad_norm": 1.5883732774214359, - "learning_rate": 2.269376240064763e-06, - "loss": 0.9116, - "step": 5248 - }, - { - "epoch": 0.47337331469540517, - "grad_norm": 1.4927016565777391, - "learning_rate": 2.268797341255517e-06, - "loss": 0.9692, - "step": 5249 - }, - { - "epoch": 0.4734634982188754, - "grad_norm": 1.4696225760036685, - "learning_rate": 2.268218419511e-06, - "loss": 0.8667, - "step": 5250 - }, - { - "epoch": 0.4735536817423457, - "grad_norm": 1.4272555319297495, - "learning_rate": 2.267639474880608e-06, - "loss": 0.8743, - "step": 5251 - }, - { - "epoch": 0.4736438652658159, - "grad_norm": 1.2245818261267822, - "learning_rate": 2.2670605074137407e-06, - "loss": 0.9272, - "step": 5252 - }, - { - "epoch": 0.4737340487892862, - "grad_norm": 1.5659181210048259, - "learning_rate": 2.2664815171597983e-06, - "loss": 0.9374, - "step": 5253 - }, - { - "epoch": 0.47382423231275644, - "grad_norm": 1.455662634278281, - "learning_rate": 2.265902504168183e-06, - "loss": 0.9008, - "step": 5254 - }, - { - "epoch": 0.47391441583622673, - "grad_norm": 1.4285447186289124, - "learning_rate": 2.2653234684883007e-06, - "loss": 0.9103, - "step": 5255 - }, - { - "epoch": 0.47400459935969697, - "grad_norm": 1.657196494230392, - "learning_rate": 2.264744410169556e-06, - "loss": 0.8729, - "step": 5256 - }, - { - "epoch": 0.47409478288316725, - "grad_norm": 1.2938236971149928, - "learning_rate": 2.264165329261359e-06, - "loss": 0.931, - "step": 5257 - }, - { - "epoch": 0.4741849664066375, - "grad_norm": 0.649979768034703, - "learning_rate": 2.26358622581312e-06, - "loss": 0.8021, - "step": 5258 - }, - { - "epoch": 0.4742751499301078, - "grad_norm": 1.417910487972088, - "learning_rate": 2.2630070998742504e-06, - "loss": 1.0168, - "step": 5259 - }, - { - "epoch": 0.474365333453578, - "grad_norm": 1.2398273911625821, - "learning_rate": 2.262427951494165e-06, - "loss": 0.9728, - "step": 5260 - }, - { - "epoch": 0.4744555169770483, - "grad_norm": 0.7107297153536942, - "learning_rate": 2.2618487807222794e-06, - "loss": 0.8388, - "step": 5261 - }, - { - "epoch": 0.47454570050051853, - "grad_norm": 1.4607318590648024, - "learning_rate": 2.261269587608012e-06, - "loss": 1.0326, - "step": 5262 - }, - { - "epoch": 0.4746358840239888, - "grad_norm": 1.181350697660666, - "learning_rate": 2.260690372200783e-06, - "loss": 0.95, - "step": 5263 - }, - { - "epoch": 0.47472606754745905, - "grad_norm": 1.280788059232807, - "learning_rate": 2.2601111345500138e-06, - "loss": 1.0343, - "step": 5264 - }, - { - "epoch": 0.47481625107092934, - "grad_norm": 1.4626216055368786, - "learning_rate": 2.2595318747051286e-06, - "loss": 0.9412, - "step": 5265 - }, - { - "epoch": 0.47490643459439963, - "grad_norm": 2.6060331039375457, - "learning_rate": 2.258952592715553e-06, - "loss": 0.9805, - "step": 5266 - }, - { - "epoch": 0.47499661811786986, - "grad_norm": 0.7824648757217676, - "learning_rate": 2.2583732886307142e-06, - "loss": 0.8516, - "step": 5267 - }, - { - "epoch": 0.47508680164134015, - "grad_norm": 1.4739900816968043, - "learning_rate": 2.2577939625000414e-06, - "loss": 0.9024, - "step": 5268 - }, - { - "epoch": 0.4751769851648104, - "grad_norm": 0.7477508038688989, - "learning_rate": 2.257214614372967e-06, - "loss": 0.8081, - "step": 5269 - }, - { - "epoch": 0.4752671686882807, - "grad_norm": 1.4184074458846068, - "learning_rate": 2.2566352442989227e-06, - "loss": 0.9775, - "step": 5270 - }, - { - "epoch": 0.4753573522117509, - "grad_norm": 1.2972074794861845, - "learning_rate": 2.256055852327344e-06, - "loss": 0.9464, - "step": 5271 - }, - { - "epoch": 0.4754475357352212, - "grad_norm": 1.460526025933951, - "learning_rate": 2.2554764385076685e-06, - "loss": 0.8905, - "step": 5272 - }, - { - "epoch": 0.4755377192586914, - "grad_norm": 1.290510901333015, - "learning_rate": 2.2548970028893348e-06, - "loss": 0.9665, - "step": 5273 - }, - { - "epoch": 0.4756279027821617, - "grad_norm": 1.5915032800562139, - "learning_rate": 2.254317545521783e-06, - "loss": 0.9694, - "step": 5274 - }, - { - "epoch": 0.47571808630563195, - "grad_norm": 1.4353824443456331, - "learning_rate": 2.253738066454457e-06, - "loss": 0.9832, - "step": 5275 - }, - { - "epoch": 0.47580826982910224, - "grad_norm": 1.2521238404046122, - "learning_rate": 2.2531585657367986e-06, - "loss": 0.9222, - "step": 5276 - }, - { - "epoch": 0.47589845335257247, - "grad_norm": 1.3808007126667088, - "learning_rate": 2.252579043418256e-06, - "loss": 0.9001, - "step": 5277 - }, - { - "epoch": 0.47598863687604276, - "grad_norm": 1.3550373457234892, - "learning_rate": 2.251999499548277e-06, - "loss": 0.9223, - "step": 5278 - }, - { - "epoch": 0.476078820399513, - "grad_norm": 1.552978419672857, - "learning_rate": 2.251419934176311e-06, - "loss": 0.9169, - "step": 5279 - }, - { - "epoch": 0.4761690039229833, - "grad_norm": 1.2126319597148942, - "learning_rate": 2.25084034735181e-06, - "loss": 1.0067, - "step": 5280 - }, - { - "epoch": 0.4762591874464535, - "grad_norm": 1.3521343631058726, - "learning_rate": 2.2502607391242274e-06, - "loss": 1.0017, - "step": 5281 - }, - { - "epoch": 0.4763493709699238, - "grad_norm": 0.7005857818067349, - "learning_rate": 2.2496811095430182e-06, - "loss": 0.8217, - "step": 5282 - }, - { - "epoch": 0.47643955449339404, - "grad_norm": 1.762369378302058, - "learning_rate": 2.249101458657641e-06, - "loss": 0.9482, - "step": 5283 - }, - { - "epoch": 0.4765297380168643, - "grad_norm": 1.5836131534601534, - "learning_rate": 2.2485217865175526e-06, - "loss": 1.0192, - "step": 5284 - }, - { - "epoch": 0.47661992154033456, - "grad_norm": 1.5514745435725452, - "learning_rate": 2.2479420931722156e-06, - "loss": 1.0805, - "step": 5285 - }, - { - "epoch": 0.47671010506380485, - "grad_norm": 0.7246773988612789, - "learning_rate": 2.2473623786710923e-06, - "loss": 0.8346, - "step": 5286 - }, - { - "epoch": 0.4768002885872751, - "grad_norm": 1.6199898879595096, - "learning_rate": 2.2467826430636465e-06, - "loss": 0.8665, - "step": 5287 - }, - { - "epoch": 0.47689047211074537, - "grad_norm": 1.2195447628975893, - "learning_rate": 2.246202886399345e-06, - "loss": 0.9741, - "step": 5288 - }, - { - "epoch": 0.47698065563421566, - "grad_norm": 1.3415242324869843, - "learning_rate": 2.2456231087276556e-06, - "loss": 0.9524, - "step": 5289 - }, - { - "epoch": 0.4770708391576859, - "grad_norm": 1.3548685113515861, - "learning_rate": 2.245043310098048e-06, - "loss": 0.8778, - "step": 5290 - }, - { - "epoch": 0.4771610226811562, - "grad_norm": 1.6570574393713993, - "learning_rate": 2.244463490559995e-06, - "loss": 0.9939, - "step": 5291 - }, - { - "epoch": 0.4772512062046264, - "grad_norm": 1.3945901468272188, - "learning_rate": 2.2438836501629683e-06, - "loss": 0.9604, - "step": 5292 - }, - { - "epoch": 0.4773413897280967, - "grad_norm": 1.2278691517922562, - "learning_rate": 2.2433037889564437e-06, - "loss": 0.9549, - "step": 5293 - }, - { - "epoch": 0.47743157325156693, - "grad_norm": 4.8836169259243505, - "learning_rate": 2.242723906989899e-06, - "loss": 0.8885, - "step": 5294 - }, - { - "epoch": 0.4775217567750372, - "grad_norm": 1.1661698895872783, - "learning_rate": 2.2421440043128114e-06, - "loss": 0.8765, - "step": 5295 - }, - { - "epoch": 0.47761194029850745, - "grad_norm": 1.4532238506206872, - "learning_rate": 2.241564080974662e-06, - "loss": 0.8607, - "step": 5296 - }, - { - "epoch": 0.47770212382197774, - "grad_norm": 9.145435182656744, - "learning_rate": 2.2409841370249343e-06, - "loss": 0.9074, - "step": 5297 - }, - { - "epoch": 0.477792307345448, - "grad_norm": 1.6299746003541535, - "learning_rate": 2.2404041725131106e-06, - "loss": 1.045, - "step": 5298 - }, - { - "epoch": 0.47788249086891826, - "grad_norm": 1.5083851887902642, - "learning_rate": 2.239824187488677e-06, - "loss": 0.9317, - "step": 5299 - }, - { - "epoch": 0.4779726743923885, - "grad_norm": 0.7868661433684248, - "learning_rate": 2.239244182001122e-06, - "loss": 0.8661, - "step": 5300 - }, - { - "epoch": 0.4780628579158588, - "grad_norm": 1.2584357287363905, - "learning_rate": 2.2386641560999336e-06, - "loss": 0.9668, - "step": 5301 - }, - { - "epoch": 0.478153041439329, - "grad_norm": 1.5637790022858449, - "learning_rate": 2.238084109834604e-06, - "loss": 0.9852, - "step": 5302 - }, - { - "epoch": 0.4782432249627993, - "grad_norm": 1.4321607032781056, - "learning_rate": 2.237504043254625e-06, - "loss": 0.9479, - "step": 5303 - }, - { - "epoch": 0.47833340848626954, - "grad_norm": 1.2940471105298408, - "learning_rate": 2.2369239564094915e-06, - "loss": 0.9233, - "step": 5304 - }, - { - "epoch": 0.47842359200973983, - "grad_norm": 1.322864301150739, - "learning_rate": 2.2363438493486995e-06, - "loss": 0.8756, - "step": 5305 - }, - { - "epoch": 0.47851377553321006, - "grad_norm": 1.5236576801525472, - "learning_rate": 2.235763722121747e-06, - "loss": 0.8358, - "step": 5306 - }, - { - "epoch": 0.47860395905668035, - "grad_norm": 1.4473334951612984, - "learning_rate": 2.2351835747781346e-06, - "loss": 0.9749, - "step": 5307 - }, - { - "epoch": 0.4786941425801506, - "grad_norm": 1.4002046554707077, - "learning_rate": 2.234603407367362e-06, - "loss": 0.9915, - "step": 5308 - }, - { - "epoch": 0.47878432610362087, - "grad_norm": 1.3484645023051407, - "learning_rate": 2.2340232199389337e-06, - "loss": 0.9685, - "step": 5309 - }, - { - "epoch": 0.4788745096270911, - "grad_norm": 1.386422138997954, - "learning_rate": 2.2334430125423538e-06, - "loss": 0.8314, - "step": 5310 - }, - { - "epoch": 0.4789646931505614, - "grad_norm": 1.4526459919609476, - "learning_rate": 2.232862785227128e-06, - "loss": 0.9993, - "step": 5311 - }, - { - "epoch": 0.4790548766740317, - "grad_norm": 1.4068178937550055, - "learning_rate": 2.232282538042766e-06, - "loss": 0.9782, - "step": 5312 - }, - { - "epoch": 0.4791450601975019, - "grad_norm": 1.6042839189094036, - "learning_rate": 2.231702271038777e-06, - "loss": 0.9927, - "step": 5313 - }, - { - "epoch": 0.4792352437209722, - "grad_norm": 1.6186616232765683, - "learning_rate": 2.231121984264673e-06, - "loss": 0.9295, - "step": 5314 - }, - { - "epoch": 0.47932542724444244, - "grad_norm": 1.4273337912460038, - "learning_rate": 2.2305416777699665e-06, - "loss": 0.9755, - "step": 5315 - }, - { - "epoch": 0.4794156107679127, - "grad_norm": 1.4401801458898662, - "learning_rate": 2.229961351604173e-06, - "loss": 0.9597, - "step": 5316 - }, - { - "epoch": 0.47950579429138296, - "grad_norm": 1.509269603263878, - "learning_rate": 2.2293810058168085e-06, - "loss": 0.9087, - "step": 5317 - }, - { - "epoch": 0.47959597781485325, - "grad_norm": 1.3179967310840317, - "learning_rate": 2.2288006404573922e-06, - "loss": 0.968, - "step": 5318 - }, - { - "epoch": 0.4796861613383235, - "grad_norm": 1.5110789283117843, - "learning_rate": 2.228220255575444e-06, - "loss": 0.9407, - "step": 5319 - }, - { - "epoch": 0.47977634486179377, - "grad_norm": 1.7126258636777585, - "learning_rate": 2.2276398512204847e-06, - "loss": 0.903, - "step": 5320 - }, - { - "epoch": 0.479866528385264, - "grad_norm": 1.5372733933844331, - "learning_rate": 2.2270594274420382e-06, - "loss": 0.9558, - "step": 5321 - }, - { - "epoch": 0.4799567119087343, - "grad_norm": 0.7116877610864493, - "learning_rate": 2.22647898428963e-06, - "loss": 0.8333, - "step": 5322 - }, - { - "epoch": 0.4800468954322045, - "grad_norm": 1.1950523741669037, - "learning_rate": 2.225898521812785e-06, - "loss": 0.9992, - "step": 5323 - }, - { - "epoch": 0.4801370789556748, - "grad_norm": 1.4218541238111608, - "learning_rate": 2.2253180400610337e-06, - "loss": 0.9966, - "step": 5324 - }, - { - "epoch": 0.48022726247914505, - "grad_norm": 1.5842270671093028, - "learning_rate": 2.2247375390839037e-06, - "loss": 0.9306, - "step": 5325 - }, - { - "epoch": 0.48031744600261533, - "grad_norm": 1.419038899453726, - "learning_rate": 2.224157018930928e-06, - "loss": 0.9212, - "step": 5326 - }, - { - "epoch": 0.48040762952608557, - "grad_norm": 1.7090008369742855, - "learning_rate": 2.2235764796516395e-06, - "loss": 0.9497, - "step": 5327 - }, - { - "epoch": 0.48049781304955586, - "grad_norm": 1.3532416877033218, - "learning_rate": 2.222995921295573e-06, - "loss": 0.956, - "step": 5328 - }, - { - "epoch": 0.4805879965730261, - "grad_norm": 1.5956733168545822, - "learning_rate": 2.222415343912265e-06, - "loss": 0.9261, - "step": 5329 - }, - { - "epoch": 0.4806781800964964, - "grad_norm": 2.0841095304938304, - "learning_rate": 2.221834747551254e-06, - "loss": 1.0177, - "step": 5330 - }, - { - "epoch": 0.4807683636199666, - "grad_norm": 1.3241205319689604, - "learning_rate": 2.221254132262078e-06, - "loss": 0.9197, - "step": 5331 - }, - { - "epoch": 0.4808585471434369, - "grad_norm": 1.4870583623939895, - "learning_rate": 2.2206734980942802e-06, - "loss": 0.9907, - "step": 5332 - }, - { - "epoch": 0.48094873066690713, - "grad_norm": 1.6808976183456223, - "learning_rate": 2.2200928450974024e-06, - "loss": 0.9238, - "step": 5333 - }, - { - "epoch": 0.4810389141903774, - "grad_norm": 1.4796512329426423, - "learning_rate": 2.21951217332099e-06, - "loss": 0.9743, - "step": 5334 - }, - { - "epoch": 0.48112909771384765, - "grad_norm": 1.8742514705624205, - "learning_rate": 2.2189314828145883e-06, - "loss": 0.871, - "step": 5335 - }, - { - "epoch": 0.48121928123731794, - "grad_norm": 1.298481830524153, - "learning_rate": 2.2183507736277453e-06, - "loss": 0.9359, - "step": 5336 - }, - { - "epoch": 0.48130946476078823, - "grad_norm": 1.4144326247252235, - "learning_rate": 2.2177700458100107e-06, - "loss": 0.9637, - "step": 5337 - }, - { - "epoch": 0.48139964828425846, - "grad_norm": 1.4629565480024975, - "learning_rate": 2.2171892994109346e-06, - "loss": 0.9311, - "step": 5338 - }, - { - "epoch": 0.48148983180772875, - "grad_norm": 1.528809295527426, - "learning_rate": 2.21660853448007e-06, - "loss": 1.0128, - "step": 5339 - }, - { - "epoch": 0.481580015331199, - "grad_norm": 1.9316551894515384, - "learning_rate": 2.2160277510669703e-06, - "loss": 0.9094, - "step": 5340 - }, - { - "epoch": 0.4816701988546693, - "grad_norm": 1.2207299313462945, - "learning_rate": 2.215446949221193e-06, - "loss": 0.947, - "step": 5341 - }, - { - "epoch": 0.4817603823781395, - "grad_norm": 1.596929781397757, - "learning_rate": 2.2148661289922924e-06, - "loss": 0.956, - "step": 5342 - }, - { - "epoch": 0.4818505659016098, - "grad_norm": 1.7610136596132588, - "learning_rate": 2.21428529042983e-06, - "loss": 1.0109, - "step": 5343 - }, - { - "epoch": 0.48194074942508003, - "grad_norm": 1.5216879947138393, - "learning_rate": 2.2137044335833647e-06, - "loss": 1.0016, - "step": 5344 - }, - { - "epoch": 0.4820309329485503, - "grad_norm": 1.5792583512771858, - "learning_rate": 2.213123558502459e-06, - "loss": 0.9353, - "step": 5345 - }, - { - "epoch": 0.48212111647202055, - "grad_norm": 1.9937202690562503, - "learning_rate": 2.2125426652366763e-06, - "loss": 0.9671, - "step": 5346 - }, - { - "epoch": 0.48221129999549084, - "grad_norm": 1.4808473933204858, - "learning_rate": 2.211961753835581e-06, - "loss": 1.0444, - "step": 5347 - }, - { - "epoch": 0.48230148351896107, - "grad_norm": 3.139272977647219, - "learning_rate": 2.21138082434874e-06, - "loss": 0.9444, - "step": 5348 - }, - { - "epoch": 0.48239166704243136, - "grad_norm": 1.6431047863041586, - "learning_rate": 2.210799876825722e-06, - "loss": 0.9855, - "step": 5349 - }, - { - "epoch": 0.4824818505659016, - "grad_norm": 1.395499697235195, - "learning_rate": 2.210218911316096e-06, - "loss": 0.922, - "step": 5350 - }, - { - "epoch": 0.4825720340893719, - "grad_norm": 1.479958076269892, - "learning_rate": 2.2096379278694336e-06, - "loss": 0.9017, - "step": 5351 - }, - { - "epoch": 0.4826622176128421, - "grad_norm": 1.374489082434609, - "learning_rate": 2.2090569265353074e-06, - "loss": 0.8938, - "step": 5352 - }, - { - "epoch": 0.4827524011363124, - "grad_norm": 1.985043030279638, - "learning_rate": 2.2084759073632912e-06, - "loss": 0.9448, - "step": 5353 - }, - { - "epoch": 0.48284258465978264, - "grad_norm": 1.433509429198097, - "learning_rate": 2.2078948704029606e-06, - "loss": 0.9087, - "step": 5354 - }, - { - "epoch": 0.4829327681832529, - "grad_norm": 1.4461618762917154, - "learning_rate": 2.2073138157038935e-06, - "loss": 0.8676, - "step": 5355 - }, - { - "epoch": 0.48302295170672316, - "grad_norm": 1.4921169563914565, - "learning_rate": 2.2067327433156687e-06, - "loss": 0.9436, - "step": 5356 - }, - { - "epoch": 0.48311313523019345, - "grad_norm": 1.6172747749698893, - "learning_rate": 2.2061516532878667e-06, - "loss": 0.9703, - "step": 5357 - }, - { - "epoch": 0.4832033187536637, - "grad_norm": 1.324412463937798, - "learning_rate": 2.2055705456700686e-06, - "loss": 0.9967, - "step": 5358 - }, - { - "epoch": 0.48329350227713397, - "grad_norm": 1.347754477639067, - "learning_rate": 2.204989420511858e-06, - "loss": 0.8513, - "step": 5359 - }, - { - "epoch": 0.48338368580060426, - "grad_norm": 1.2473908851010393, - "learning_rate": 2.20440827786282e-06, - "loss": 0.9, - "step": 5360 - }, - { - "epoch": 0.4834738693240745, - "grad_norm": 1.3462160680350062, - "learning_rate": 2.20382711777254e-06, - "loss": 0.9526, - "step": 5361 - }, - { - "epoch": 0.4835640528475448, - "grad_norm": 1.3504111617526466, - "learning_rate": 2.203245940290607e-06, - "loss": 0.9598, - "step": 5362 - }, - { - "epoch": 0.483654236371015, - "grad_norm": 1.5284972067156355, - "learning_rate": 2.2026647454666097e-06, - "loss": 0.9405, - "step": 5363 - }, - { - "epoch": 0.4837444198944853, - "grad_norm": 1.6825242329591064, - "learning_rate": 2.2020835333501384e-06, - "loss": 0.9869, - "step": 5364 - }, - { - "epoch": 0.48383460341795553, - "grad_norm": 1.4125955043002318, - "learning_rate": 2.2015023039907863e-06, - "loss": 1.0069, - "step": 5365 - }, - { - "epoch": 0.4839247869414258, - "grad_norm": 1.3032963354236717, - "learning_rate": 2.2009210574381464e-06, - "loss": 0.9313, - "step": 5366 - }, - { - "epoch": 0.48401497046489605, - "grad_norm": 1.4940909184184912, - "learning_rate": 2.2003397937418134e-06, - "loss": 0.8988, - "step": 5367 - }, - { - "epoch": 0.48410515398836634, - "grad_norm": 0.6977471791097272, - "learning_rate": 2.1997585129513852e-06, - "loss": 0.8345, - "step": 5368 - }, - { - "epoch": 0.4841953375118366, - "grad_norm": 1.4025561668586528, - "learning_rate": 2.1991772151164595e-06, - "loss": 0.974, - "step": 5369 - }, - { - "epoch": 0.48428552103530687, - "grad_norm": 1.4630502529678708, - "learning_rate": 2.1985959002866346e-06, - "loss": 0.9947, - "step": 5370 - }, - { - "epoch": 0.4843757045587771, - "grad_norm": 1.8123538517401165, - "learning_rate": 2.198014568511513e-06, - "loss": 1.0158, - "step": 5371 - }, - { - "epoch": 0.4844658880822474, - "grad_norm": 0.7653934362129557, - "learning_rate": 2.1974332198406965e-06, - "loss": 0.8134, - "step": 5372 - }, - { - "epoch": 0.4845560716057176, - "grad_norm": 1.4283470164874736, - "learning_rate": 2.196851854323789e-06, - "loss": 0.9271, - "step": 5373 - }, - { - "epoch": 0.4846462551291879, - "grad_norm": 1.566738026934926, - "learning_rate": 2.196270472010396e-06, - "loss": 0.9261, - "step": 5374 - }, - { - "epoch": 0.48473643865265814, - "grad_norm": 1.4354206470864321, - "learning_rate": 2.195689072950124e-06, - "loss": 0.9472, - "step": 5375 - }, - { - "epoch": 0.48482662217612843, - "grad_norm": 0.7146474016197185, - "learning_rate": 2.195107657192581e-06, - "loss": 0.7807, - "step": 5376 - }, - { - "epoch": 0.48491680569959866, - "grad_norm": 0.645984652440253, - "learning_rate": 2.194526224787378e-06, - "loss": 0.7501, - "step": 5377 - }, - { - "epoch": 0.48500698922306895, - "grad_norm": 1.511767246148596, - "learning_rate": 2.1939447757841236e-06, - "loss": 0.9153, - "step": 5378 - }, - { - "epoch": 0.4850971727465392, - "grad_norm": 1.5961178412278767, - "learning_rate": 2.193363310232432e-06, - "loss": 1.0107, - "step": 5379 - }, - { - "epoch": 0.4851873562700095, - "grad_norm": 1.5285464183533692, - "learning_rate": 2.192781828181917e-06, - "loss": 1.0518, - "step": 5380 - }, - { - "epoch": 0.4852775397934797, - "grad_norm": 1.2775896868045251, - "learning_rate": 2.192200329682193e-06, - "loss": 0.9681, - "step": 5381 - }, - { - "epoch": 0.48536772331695, - "grad_norm": 1.4305585698627865, - "learning_rate": 2.1916188147828767e-06, - "loss": 0.9467, - "step": 5382 - }, - { - "epoch": 0.48545790684042023, - "grad_norm": 1.3890939921358947, - "learning_rate": 2.191037283533587e-06, - "loss": 0.9627, - "step": 5383 - }, - { - "epoch": 0.4855480903638905, - "grad_norm": 1.9693279326349138, - "learning_rate": 2.1904557359839428e-06, - "loss": 0.9548, - "step": 5384 - }, - { - "epoch": 0.4856382738873608, - "grad_norm": 1.956958518132951, - "learning_rate": 2.189874172183565e-06, - "loss": 0.931, - "step": 5385 - }, - { - "epoch": 0.48572845741083104, - "grad_norm": 1.3200267505102625, - "learning_rate": 2.1892925921820763e-06, - "loss": 0.9134, - "step": 5386 - }, - { - "epoch": 0.4858186409343013, - "grad_norm": 1.4753576136121076, - "learning_rate": 2.1887109960290994e-06, - "loss": 0.9422, - "step": 5387 - }, - { - "epoch": 0.48590882445777156, - "grad_norm": 2.130408752514246, - "learning_rate": 2.18812938377426e-06, - "loss": 1.0416, - "step": 5388 - }, - { - "epoch": 0.48599900798124185, - "grad_norm": 1.3608636706852895, - "learning_rate": 2.187547755467184e-06, - "loss": 0.9887, - "step": 5389 - }, - { - "epoch": 0.4860891915047121, - "grad_norm": 1.452492689156969, - "learning_rate": 2.1869661111574994e-06, - "loss": 0.8473, - "step": 5390 - }, - { - "epoch": 0.48617937502818237, - "grad_norm": 1.6597882749359167, - "learning_rate": 2.1863844508948353e-06, - "loss": 0.9983, - "step": 5391 - }, - { - "epoch": 0.4862695585516526, - "grad_norm": 1.4210638729937264, - "learning_rate": 2.185802774728823e-06, - "loss": 0.9626, - "step": 5392 - }, - { - "epoch": 0.4863597420751229, - "grad_norm": 1.2317783713026103, - "learning_rate": 2.1852210827090927e-06, - "loss": 0.9029, - "step": 5393 - }, - { - "epoch": 0.4864499255985931, - "grad_norm": 1.4084807610326875, - "learning_rate": 2.184639374885278e-06, - "loss": 0.8893, - "step": 5394 - }, - { - "epoch": 0.4865401091220634, - "grad_norm": 1.7891601136076765, - "learning_rate": 2.184057651307014e-06, - "loss": 1.0495, - "step": 5395 - }, - { - "epoch": 0.48663029264553365, - "grad_norm": 1.902501845791256, - "learning_rate": 2.183475912023937e-06, - "loss": 1.0214, - "step": 5396 - }, - { - "epoch": 0.48672047616900393, - "grad_norm": 1.448679450936456, - "learning_rate": 2.1828941570856826e-06, - "loss": 0.9574, - "step": 5397 - }, - { - "epoch": 0.48681065969247417, - "grad_norm": 0.7185367184855066, - "learning_rate": 2.1823123865418903e-06, - "loss": 0.8232, - "step": 5398 - }, - { - "epoch": 0.48690084321594446, - "grad_norm": 1.7319252652789954, - "learning_rate": 2.1817306004422e-06, - "loss": 0.9185, - "step": 5399 - }, - { - "epoch": 0.4869910267394147, - "grad_norm": 1.4166985489492712, - "learning_rate": 2.1811487988362527e-06, - "loss": 0.9795, - "step": 5400 - }, - { - "epoch": 0.487081210262885, - "grad_norm": 1.5513983971055751, - "learning_rate": 2.1805669817736917e-06, - "loss": 1.0172, - "step": 5401 - }, - { - "epoch": 0.4871713937863552, - "grad_norm": 1.4801055543219932, - "learning_rate": 2.17998514930416e-06, - "loss": 0.7681, - "step": 5402 - }, - { - "epoch": 0.4872615773098255, - "grad_norm": 1.740780454641773, - "learning_rate": 2.1794033014773025e-06, - "loss": 0.9515, - "step": 5403 - }, - { - "epoch": 0.48735176083329573, - "grad_norm": 1.3781898085391597, - "learning_rate": 2.178821438342766e-06, - "loss": 1.0208, - "step": 5404 - }, - { - "epoch": 0.487441944356766, - "grad_norm": 1.9546232046748595, - "learning_rate": 2.1782395599501996e-06, - "loss": 0.9414, - "step": 5405 - }, - { - "epoch": 0.48753212788023625, - "grad_norm": 0.7028690508068829, - "learning_rate": 2.1776576663492498e-06, - "loss": 0.8174, - "step": 5406 - }, - { - "epoch": 0.48762231140370654, - "grad_norm": 1.7530052402546403, - "learning_rate": 2.177075757589569e-06, - "loss": 1.0288, - "step": 5407 - }, - { - "epoch": 0.48771249492717683, - "grad_norm": 1.2552258924255923, - "learning_rate": 2.176493833720808e-06, - "loss": 0.8711, - "step": 5408 - }, - { - "epoch": 0.48780267845064706, - "grad_norm": 1.3937069035183596, - "learning_rate": 2.1759118947926195e-06, - "loss": 0.9124, - "step": 5409 - }, - { - "epoch": 0.48789286197411735, - "grad_norm": 1.3375065384464342, - "learning_rate": 2.1753299408546587e-06, - "loss": 0.9591, - "step": 5410 - }, - { - "epoch": 0.4879830454975876, - "grad_norm": 1.1737852168055898, - "learning_rate": 2.1747479719565803e-06, - "loss": 0.9588, - "step": 5411 - }, - { - "epoch": 0.4880732290210579, - "grad_norm": 1.1766765139026343, - "learning_rate": 2.174165988148042e-06, - "loss": 1.0088, - "step": 5412 - }, - { - "epoch": 0.4881634125445281, - "grad_norm": 1.5781113274614635, - "learning_rate": 2.1735839894787003e-06, - "loss": 0.9154, - "step": 5413 - }, - { - "epoch": 0.4882535960679984, - "grad_norm": 0.8243361836883014, - "learning_rate": 2.1730019759982163e-06, - "loss": 0.8626, - "step": 5414 - }, - { - "epoch": 0.48834377959146863, - "grad_norm": 1.394628654946344, - "learning_rate": 2.172419947756249e-06, - "loss": 1.0002, - "step": 5415 - }, - { - "epoch": 0.4884339631149389, - "grad_norm": 1.4734288624968446, - "learning_rate": 2.171837904802461e-06, - "loss": 0.9922, - "step": 5416 - }, - { - "epoch": 0.48852414663840915, - "grad_norm": 1.289088254729105, - "learning_rate": 2.171255847186516e-06, - "loss": 0.9371, - "step": 5417 - }, - { - "epoch": 0.48861433016187944, - "grad_norm": 1.612309123725726, - "learning_rate": 2.1706737749580783e-06, - "loss": 0.9295, - "step": 5418 - }, - { - "epoch": 0.4887045136853497, - "grad_norm": 1.209718693828924, - "learning_rate": 2.1700916881668127e-06, - "loss": 1.0205, - "step": 5419 - }, - { - "epoch": 0.48879469720881996, - "grad_norm": 1.3057155556179214, - "learning_rate": 2.1695095868623862e-06, - "loss": 1.035, - "step": 5420 - }, - { - "epoch": 0.4888848807322902, - "grad_norm": 1.371328697832538, - "learning_rate": 2.168927471094467e-06, - "loss": 0.8895, - "step": 5421 - }, - { - "epoch": 0.4889750642557605, - "grad_norm": 1.768366348121399, - "learning_rate": 2.168345340912725e-06, - "loss": 0.9716, - "step": 5422 - }, - { - "epoch": 0.4890652477792307, - "grad_norm": 1.2887980796806762, - "learning_rate": 2.1677631963668298e-06, - "loss": 0.9656, - "step": 5423 - }, - { - "epoch": 0.489155431302701, - "grad_norm": 1.4228901854583358, - "learning_rate": 2.167181037506453e-06, - "loss": 0.9041, - "step": 5424 - }, - { - "epoch": 0.48924561482617124, - "grad_norm": 1.47956703928329, - "learning_rate": 2.1665988643812693e-06, - "loss": 0.8859, - "step": 5425 - }, - { - "epoch": 0.4893357983496415, - "grad_norm": 1.7651002492342474, - "learning_rate": 2.166016677040951e-06, - "loss": 0.9427, - "step": 5426 - }, - { - "epoch": 0.48942598187311176, - "grad_norm": 1.358992204810797, - "learning_rate": 2.165434475535175e-06, - "loss": 0.9371, - "step": 5427 - }, - { - "epoch": 0.48951616539658205, - "grad_norm": 1.347269550073336, - "learning_rate": 2.1648522599136173e-06, - "loss": 0.9892, - "step": 5428 - }, - { - "epoch": 0.4896063489200523, - "grad_norm": 1.3035746871101277, - "learning_rate": 2.164270030225956e-06, - "loss": 0.9219, - "step": 5429 - }, - { - "epoch": 0.48969653244352257, - "grad_norm": 1.245459798910633, - "learning_rate": 2.16368778652187e-06, - "loss": 0.8672, - "step": 5430 - }, - { - "epoch": 0.4897867159669928, - "grad_norm": 1.5287166581218024, - "learning_rate": 2.163105528851039e-06, - "loss": 0.9878, - "step": 5431 - }, - { - "epoch": 0.4898768994904631, - "grad_norm": 1.4177300538644488, - "learning_rate": 2.1625232572631448e-06, - "loss": 0.9533, - "step": 5432 - }, - { - "epoch": 0.4899670830139334, - "grad_norm": 0.8288785636801085, - "learning_rate": 2.161940971807871e-06, - "loss": 0.8416, - "step": 5433 - }, - { - "epoch": 0.4900572665374036, - "grad_norm": 1.6812272931802614, - "learning_rate": 2.1613586725348994e-06, - "loss": 0.9383, - "step": 5434 - }, - { - "epoch": 0.4901474500608739, - "grad_norm": 1.5035165257003993, - "learning_rate": 2.1607763594939176e-06, - "loss": 1.0366, - "step": 5435 - }, - { - "epoch": 0.49023763358434413, - "grad_norm": 1.5179251294978242, - "learning_rate": 2.1601940327346093e-06, - "loss": 0.8658, - "step": 5436 - }, - { - "epoch": 0.4903278171078144, - "grad_norm": 1.2743021364134406, - "learning_rate": 2.159611692306663e-06, - "loss": 1.0228, - "step": 5437 - }, - { - "epoch": 0.49041800063128466, - "grad_norm": 1.615741101341527, - "learning_rate": 2.1590293382597667e-06, - "loss": 0.9082, - "step": 5438 - }, - { - "epoch": 0.49050818415475494, - "grad_norm": 1.5967070877570588, - "learning_rate": 2.1584469706436102e-06, - "loss": 0.884, - "step": 5439 - }, - { - "epoch": 0.4905983676782252, - "grad_norm": 1.3254609259936936, - "learning_rate": 2.1578645895078855e-06, - "loss": 0.8652, - "step": 5440 - }, - { - "epoch": 0.49068855120169547, - "grad_norm": 1.3885126250602773, - "learning_rate": 2.157282194902283e-06, - "loss": 1.0073, - "step": 5441 - }, - { - "epoch": 0.4907787347251657, - "grad_norm": 1.5934184608436737, - "learning_rate": 2.1566997868764965e-06, - "loss": 0.8862, - "step": 5442 - }, - { - "epoch": 0.490868918248636, - "grad_norm": 1.7588971669780176, - "learning_rate": 2.15611736548022e-06, - "loss": 0.9115, - "step": 5443 - }, - { - "epoch": 0.4909591017721062, - "grad_norm": 1.480363102266056, - "learning_rate": 2.155534930763149e-06, - "loss": 0.9634, - "step": 5444 - }, - { - "epoch": 0.4910492852955765, - "grad_norm": 1.5693853117719114, - "learning_rate": 2.1549524827749804e-06, - "loss": 0.8417, - "step": 5445 - }, - { - "epoch": 0.49113946881904674, - "grad_norm": 1.2030269037854846, - "learning_rate": 2.1543700215654115e-06, - "loss": 0.8053, - "step": 5446 - }, - { - "epoch": 0.49122965234251703, - "grad_norm": 1.3738702554455107, - "learning_rate": 2.153787547184141e-06, - "loss": 0.9327, - "step": 5447 - }, - { - "epoch": 0.49131983586598726, - "grad_norm": 1.5056722997295837, - "learning_rate": 2.1532050596808695e-06, - "loss": 0.8765, - "step": 5448 - }, - { - "epoch": 0.49141001938945755, - "grad_norm": 1.5038662991852685, - "learning_rate": 2.152622559105297e-06, - "loss": 0.9954, - "step": 5449 - }, - { - "epoch": 0.4915002029129278, - "grad_norm": 1.5904408676202353, - "learning_rate": 2.152040045507126e-06, - "loss": 0.9953, - "step": 5450 - }, - { - "epoch": 0.4915903864363981, - "grad_norm": 1.939534810815184, - "learning_rate": 2.1514575189360607e-06, - "loss": 0.9653, - "step": 5451 - }, - { - "epoch": 0.4916805699598683, - "grad_norm": 1.3005963443237203, - "learning_rate": 2.1508749794418043e-06, - "loss": 0.9067, - "step": 5452 - }, - { - "epoch": 0.4917707534833386, - "grad_norm": 1.2952878501028346, - "learning_rate": 2.1502924270740626e-06, - "loss": 0.9211, - "step": 5453 - }, - { - "epoch": 0.49186093700680883, - "grad_norm": 1.8808425791654997, - "learning_rate": 2.1497098618825427e-06, - "loss": 0.9492, - "step": 5454 - }, - { - "epoch": 0.4919511205302791, - "grad_norm": 1.5219399941604779, - "learning_rate": 2.1491272839169516e-06, - "loss": 0.9858, - "step": 5455 - }, - { - "epoch": 0.4920413040537494, - "grad_norm": 1.355372796523806, - "learning_rate": 2.1485446932269986e-06, - "loss": 1.0575, - "step": 5456 - }, - { - "epoch": 0.49213148757721964, - "grad_norm": 1.4373206151076396, - "learning_rate": 2.147962089862393e-06, - "loss": 0.9815, - "step": 5457 - }, - { - "epoch": 0.49222167110068993, - "grad_norm": 1.770398733626239, - "learning_rate": 2.1473794738728462e-06, - "loss": 0.8556, - "step": 5458 - }, - { - "epoch": 0.49231185462416016, - "grad_norm": 1.408981806343176, - "learning_rate": 2.14679684530807e-06, - "loss": 0.9811, - "step": 5459 - }, - { - "epoch": 0.49240203814763045, - "grad_norm": 1.5139591461273247, - "learning_rate": 2.1462142042177774e-06, - "loss": 0.9322, - "step": 5460 - }, - { - "epoch": 0.4924922216711007, - "grad_norm": 1.3586308865245489, - "learning_rate": 2.145631550651683e-06, - "loss": 0.948, - "step": 5461 - }, - { - "epoch": 0.49258240519457097, - "grad_norm": 1.320960658862893, - "learning_rate": 2.1450488846595016e-06, - "loss": 1.0482, - "step": 5462 - }, - { - "epoch": 0.4926725887180412, - "grad_norm": 1.5210367702476204, - "learning_rate": 2.14446620629095e-06, - "loss": 0.9323, - "step": 5463 - }, - { - "epoch": 0.4927627722415115, - "grad_norm": 1.2751470686669995, - "learning_rate": 2.1438835155957445e-06, - "loss": 1.0183, - "step": 5464 - }, - { - "epoch": 0.4928529557649817, - "grad_norm": 1.3857148741059324, - "learning_rate": 2.143300812623604e-06, - "loss": 0.9839, - "step": 5465 - }, - { - "epoch": 0.492943139288452, - "grad_norm": 1.794053225524032, - "learning_rate": 2.1427180974242485e-06, - "loss": 0.9296, - "step": 5466 - }, - { - "epoch": 0.49303332281192225, - "grad_norm": 1.4166207119089083, - "learning_rate": 2.142135370047398e-06, - "loss": 0.973, - "step": 5467 - }, - { - "epoch": 0.49312350633539254, - "grad_norm": 1.5655388844576104, - "learning_rate": 2.1415526305427735e-06, - "loss": 0.9783, - "step": 5468 - }, - { - "epoch": 0.49321368985886277, - "grad_norm": 1.2210564430087762, - "learning_rate": 2.140969878960098e-06, - "loss": 0.871, - "step": 5469 - }, - { - "epoch": 0.49330387338233306, - "grad_norm": 1.401661528934372, - "learning_rate": 2.1403871153490956e-06, - "loss": 1.0017, - "step": 5470 - }, - { - "epoch": 0.4933940569058033, - "grad_norm": 1.3501606986863532, - "learning_rate": 2.13980433975949e-06, - "loss": 1.0708, - "step": 5471 - }, - { - "epoch": 0.4934842404292736, - "grad_norm": 1.2202923136859727, - "learning_rate": 2.1392215522410076e-06, - "loss": 0.9896, - "step": 5472 - }, - { - "epoch": 0.4935744239527438, - "grad_norm": 1.910328613492857, - "learning_rate": 2.1386387528433743e-06, - "loss": 1.065, - "step": 5473 - }, - { - "epoch": 0.4936646074762141, - "grad_norm": 1.6734075206988348, - "learning_rate": 2.1380559416163186e-06, - "loss": 0.9799, - "step": 5474 - }, - { - "epoch": 0.49375479099968433, - "grad_norm": 1.3167974869809558, - "learning_rate": 2.1374731186095685e-06, - "loss": 0.9996, - "step": 5475 - }, - { - "epoch": 0.4938449745231546, - "grad_norm": 1.2547860074832704, - "learning_rate": 2.136890283872854e-06, - "loss": 0.8962, - "step": 5476 - }, - { - "epoch": 0.49393515804662486, - "grad_norm": 1.5353248251263614, - "learning_rate": 2.136307437455906e-06, - "loss": 0.93, - "step": 5477 - }, - { - "epoch": 0.49402534157009514, - "grad_norm": 1.4721689404536593, - "learning_rate": 2.135724579408456e-06, - "loss": 0.9028, - "step": 5478 - }, - { - "epoch": 0.49411552509356543, - "grad_norm": 1.390303713941604, - "learning_rate": 2.1351417097802356e-06, - "loss": 1.04, - "step": 5479 - }, - { - "epoch": 0.49420570861703567, - "grad_norm": 1.7937960575584004, - "learning_rate": 2.1345588286209798e-06, - "loss": 0.8288, - "step": 5480 - }, - { - "epoch": 0.49429589214050595, - "grad_norm": 1.8816676161059454, - "learning_rate": 2.1339759359804227e-06, - "loss": 0.7979, - "step": 5481 - }, - { - "epoch": 0.4943860756639762, - "grad_norm": 1.2770935186796193, - "learning_rate": 2.1333930319082997e-06, - "loss": 0.9278, - "step": 5482 - }, - { - "epoch": 0.4944762591874465, - "grad_norm": 1.4140313919607872, - "learning_rate": 2.132810116454348e-06, - "loss": 0.9559, - "step": 5483 - }, - { - "epoch": 0.4945664427109167, - "grad_norm": 1.9350890879859726, - "learning_rate": 2.132227189668305e-06, - "loss": 0.9439, - "step": 5484 - }, - { - "epoch": 0.494656626234387, - "grad_norm": 1.4137588933771221, - "learning_rate": 2.1316442515999096e-06, - "loss": 0.8943, - "step": 5485 - }, - { - "epoch": 0.49474680975785723, - "grad_norm": 1.2716944179698273, - "learning_rate": 2.1310613022989e-06, - "loss": 0.9814, - "step": 5486 - }, - { - "epoch": 0.4948369932813275, - "grad_norm": 1.5562779711312322, - "learning_rate": 2.130478341815017e-06, - "loss": 0.8023, - "step": 5487 - }, - { - "epoch": 0.49492717680479775, - "grad_norm": 1.369234786557327, - "learning_rate": 2.1298953701980033e-06, - "loss": 0.8529, - "step": 5488 - }, - { - "epoch": 0.49501736032826804, - "grad_norm": 1.8364401920466218, - "learning_rate": 2.1293123874976003e-06, - "loss": 0.9647, - "step": 5489 - }, - { - "epoch": 0.4951075438517383, - "grad_norm": 1.4268704369263574, - "learning_rate": 2.1287293937635513e-06, - "loss": 0.8953, - "step": 5490 - }, - { - "epoch": 0.49519772737520856, - "grad_norm": 1.503113059394978, - "learning_rate": 2.1281463890456005e-06, - "loss": 0.9987, - "step": 5491 - }, - { - "epoch": 0.4952879108986788, - "grad_norm": 1.390142935875301, - "learning_rate": 2.127563373393493e-06, - "loss": 0.9446, - "step": 5492 - }, - { - "epoch": 0.4953780944221491, - "grad_norm": 1.52413986838788, - "learning_rate": 2.1269803468569756e-06, - "loss": 0.9418, - "step": 5493 - }, - { - "epoch": 0.4954682779456193, - "grad_norm": 1.2683963809005945, - "learning_rate": 2.126397309485794e-06, - "loss": 0.9821, - "step": 5494 - }, - { - "epoch": 0.4955584614690896, - "grad_norm": 1.42790711658595, - "learning_rate": 2.1258142613296983e-06, - "loss": 0.9032, - "step": 5495 - }, - { - "epoch": 0.49564864499255984, - "grad_norm": 1.663086439256934, - "learning_rate": 2.125231202438435e-06, - "loss": 0.9265, - "step": 5496 - }, - { - "epoch": 0.49573882851603013, - "grad_norm": 1.3656392077093182, - "learning_rate": 2.1246481328617553e-06, - "loss": 0.9797, - "step": 5497 - }, - { - "epoch": 0.49582901203950036, - "grad_norm": 3.270027369113442, - "learning_rate": 2.1240650526494096e-06, - "loss": 0.93, - "step": 5498 - }, - { - "epoch": 0.49591919556297065, - "grad_norm": 1.5750434173170325, - "learning_rate": 2.1234819618511493e-06, - "loss": 0.9083, - "step": 5499 - }, - { - "epoch": 0.4960093790864409, - "grad_norm": 0.7218976549829393, - "learning_rate": 2.122898860516728e-06, - "loss": 0.8338, - "step": 5500 - }, - { - "epoch": 0.49609956260991117, - "grad_norm": 1.307540788761912, - "learning_rate": 2.1223157486958976e-06, - "loss": 0.9376, - "step": 5501 - }, - { - "epoch": 0.4961897461333814, - "grad_norm": 1.2197842733200923, - "learning_rate": 2.1217326264384127e-06, - "loss": 0.8368, - "step": 5502 - }, - { - "epoch": 0.4962799296568517, - "grad_norm": 1.8962682567464417, - "learning_rate": 2.1211494937940296e-06, - "loss": 0.9717, - "step": 5503 - }, - { - "epoch": 0.496370113180322, - "grad_norm": 1.2273419084541206, - "learning_rate": 2.1205663508125034e-06, - "loss": 0.9601, - "step": 5504 - }, - { - "epoch": 0.4964602967037922, - "grad_norm": 1.3582494175427158, - "learning_rate": 2.1199831975435914e-06, - "loss": 0.9127, - "step": 5505 - }, - { - "epoch": 0.4965504802272625, - "grad_norm": 1.268614781507593, - "learning_rate": 2.1194000340370517e-06, - "loss": 0.9927, - "step": 5506 - }, - { - "epoch": 0.49664066375073274, - "grad_norm": 1.6247718357415586, - "learning_rate": 2.1188168603426423e-06, - "loss": 0.8926, - "step": 5507 - }, - { - "epoch": 0.496730847274203, - "grad_norm": 2.187420652857796, - "learning_rate": 2.118233676510123e-06, - "loss": 0.918, - "step": 5508 - }, - { - "epoch": 0.49682103079767326, - "grad_norm": 1.4673651801961234, - "learning_rate": 2.117650482589255e-06, - "loss": 0.9612, - "step": 5509 - }, - { - "epoch": 0.49691121432114355, - "grad_norm": 1.5793200962681506, - "learning_rate": 2.1170672786297988e-06, - "loss": 0.9838, - "step": 5510 - }, - { - "epoch": 0.4970013978446138, - "grad_norm": 1.194568876443559, - "learning_rate": 2.1164840646815174e-06, - "loss": 0.915, - "step": 5511 - }, - { - "epoch": 0.49709158136808407, - "grad_norm": 1.4005586956898046, - "learning_rate": 2.1159008407941726e-06, - "loss": 1.0274, - "step": 5512 - }, - { - "epoch": 0.4971817648915543, - "grad_norm": 1.5946575647437777, - "learning_rate": 2.1153176070175293e-06, - "loss": 0.9439, - "step": 5513 - }, - { - "epoch": 0.4972719484150246, - "grad_norm": 1.8596398942251824, - "learning_rate": 2.114734363401352e-06, - "loss": 0.8317, - "step": 5514 - }, - { - "epoch": 0.4973621319384948, - "grad_norm": 1.5351648694444329, - "learning_rate": 2.1141511099954056e-06, - "loss": 0.9461, - "step": 5515 - }, - { - "epoch": 0.4974523154619651, - "grad_norm": 1.3312740467030941, - "learning_rate": 2.1135678468494576e-06, - "loss": 0.9321, - "step": 5516 - }, - { - "epoch": 0.49754249898543534, - "grad_norm": 1.9107017440282428, - "learning_rate": 2.112984574013275e-06, - "loss": 0.9869, - "step": 5517 - }, - { - "epoch": 0.49763268250890563, - "grad_norm": 1.3345045915698304, - "learning_rate": 2.112401291536625e-06, - "loss": 0.9081, - "step": 5518 - }, - { - "epoch": 0.49772286603237587, - "grad_norm": 1.3611898475156263, - "learning_rate": 2.111817999469278e-06, - "loss": 0.99, - "step": 5519 - }, - { - "epoch": 0.49781304955584615, - "grad_norm": 1.432029515175359, - "learning_rate": 2.1112346978610016e-06, - "loss": 0.8957, - "step": 5520 - }, - { - "epoch": 0.4979032330793164, - "grad_norm": 1.434822657589111, - "learning_rate": 2.1106513867615678e-06, - "loss": 0.9066, - "step": 5521 - }, - { - "epoch": 0.4979934166027867, - "grad_norm": 1.2070873951122105, - "learning_rate": 2.110068066220748e-06, - "loss": 0.9451, - "step": 5522 - }, - { - "epoch": 0.4980836001262569, - "grad_norm": 1.2963776611471483, - "learning_rate": 2.109484736288313e-06, - "loss": 1.0157, - "step": 5523 - }, - { - "epoch": 0.4981737836497272, - "grad_norm": 1.597145577218079, - "learning_rate": 2.108901397014037e-06, - "loss": 0.9604, - "step": 5524 - }, - { - "epoch": 0.49826396717319743, - "grad_norm": 1.3303040075983164, - "learning_rate": 2.1083180484476934e-06, - "loss": 0.9544, - "step": 5525 - }, - { - "epoch": 0.4983541506966677, - "grad_norm": 1.5109033874637352, - "learning_rate": 2.1077346906390567e-06, - "loss": 0.9665, - "step": 5526 - }, - { - "epoch": 0.498444334220138, - "grad_norm": 1.4999372946030407, - "learning_rate": 2.107151323637902e-06, - "loss": 0.9664, - "step": 5527 - }, - { - "epoch": 0.49853451774360824, - "grad_norm": 1.4540490934539716, - "learning_rate": 2.106567947494006e-06, - "loss": 0.9227, - "step": 5528 - }, - { - "epoch": 0.49862470126707853, - "grad_norm": 1.755790802462098, - "learning_rate": 2.1059845622571447e-06, - "loss": 1.0135, - "step": 5529 - }, - { - "epoch": 0.49871488479054876, - "grad_norm": 1.377896898556771, - "learning_rate": 2.1054011679770956e-06, - "loss": 0.9885, - "step": 5530 - }, - { - "epoch": 0.49880506831401905, - "grad_norm": 1.525229002221079, - "learning_rate": 2.104817764703638e-06, - "loss": 0.9495, - "step": 5531 - }, - { - "epoch": 0.4988952518374893, - "grad_norm": 1.6374341179793024, - "learning_rate": 2.1042343524865516e-06, - "loss": 0.9563, - "step": 5532 - }, - { - "epoch": 0.4989854353609596, - "grad_norm": 1.7660889817671037, - "learning_rate": 2.103650931375615e-06, - "loss": 1.0567, - "step": 5533 - }, - { - "epoch": 0.4990756188844298, - "grad_norm": 1.9043915240901403, - "learning_rate": 2.1030675014206094e-06, - "loss": 0.8913, - "step": 5534 - }, - { - "epoch": 0.4991658024079001, - "grad_norm": 2.093658217510797, - "learning_rate": 2.1024840626713166e-06, - "loss": 0.8849, - "step": 5535 - }, - { - "epoch": 0.4992559859313703, - "grad_norm": 1.6011021394380454, - "learning_rate": 2.1019006151775177e-06, - "loss": 1.0512, - "step": 5536 - }, - { - "epoch": 0.4993461694548406, - "grad_norm": 1.583995438053806, - "learning_rate": 2.101317158988997e-06, - "loss": 0.9422, - "step": 5537 - }, - { - "epoch": 0.49943635297831085, - "grad_norm": 1.7661490548802967, - "learning_rate": 2.1007336941555374e-06, - "loss": 0.9178, - "step": 5538 - }, - { - "epoch": 0.49952653650178114, - "grad_norm": 1.3618564538274964, - "learning_rate": 2.1001502207269238e-06, - "loss": 0.9095, - "step": 5539 - }, - { - "epoch": 0.49961672002525137, - "grad_norm": 1.4518730759330807, - "learning_rate": 2.0995667387529407e-06, - "loss": 0.9047, - "step": 5540 - }, - { - "epoch": 0.49970690354872166, - "grad_norm": 1.315077430780663, - "learning_rate": 2.098983248283375e-06, - "loss": 0.8947, - "step": 5541 - }, - { - "epoch": 0.4997970870721919, - "grad_norm": 1.5798481513343028, - "learning_rate": 2.098399749368012e-06, - "loss": 0.9555, - "step": 5542 - }, - { - "epoch": 0.4998872705956622, - "grad_norm": 0.7084849887137963, - "learning_rate": 2.09781624205664e-06, - "loss": 0.8026, - "step": 5543 - }, - { - "epoch": 0.4999774541191324, - "grad_norm": 1.5081074480536452, - "learning_rate": 2.0972327263990477e-06, - "loss": 0.9966, - "step": 5544 - }, - { - "epoch": 0.5000676376426026, - "grad_norm": 1.5314284240157294, - "learning_rate": 2.0966492024450226e-06, - "loss": 0.8828, - "step": 5545 - }, - { - "epoch": 0.500157821166073, - "grad_norm": 1.6008795794125652, - "learning_rate": 2.0960656702443545e-06, - "loss": 1.0845, - "step": 5546 - }, - { - "epoch": 0.5002480046895432, - "grad_norm": 1.3789836224253567, - "learning_rate": 2.0954821298468343e-06, - "loss": 0.9436, - "step": 5547 - }, - { - "epoch": 0.5003381882130135, - "grad_norm": 1.2915694856489373, - "learning_rate": 2.0948985813022513e-06, - "loss": 1.0054, - "step": 5548 - }, - { - "epoch": 0.5004283717364837, - "grad_norm": 1.2522901060688756, - "learning_rate": 2.094315024660399e-06, - "loss": 0.989, - "step": 5549 - }, - { - "epoch": 0.500518555259954, - "grad_norm": 1.3870658762413788, - "learning_rate": 2.0937314599710676e-06, - "loss": 0.9747, - "step": 5550 - }, - { - "epoch": 0.5006087387834243, - "grad_norm": 1.3840937739973405, - "learning_rate": 2.0931478872840526e-06, - "loss": 0.9378, - "step": 5551 - }, - { - "epoch": 0.5006989223068945, - "grad_norm": 1.417104326223718, - "learning_rate": 2.092564306649145e-06, - "loss": 0.927, - "step": 5552 - }, - { - "epoch": 0.5007891058303648, - "grad_norm": 1.3121789812146265, - "learning_rate": 2.091980718116141e-06, - "loss": 0.9843, - "step": 5553 - }, - { - "epoch": 0.5008792893538351, - "grad_norm": 1.3111548569802307, - "learning_rate": 2.091397121734835e-06, - "loss": 0.9888, - "step": 5554 - }, - { - "epoch": 0.5009694728773053, - "grad_norm": 1.4311091957629032, - "learning_rate": 2.090813517555022e-06, - "loss": 0.9727, - "step": 5555 - }, - { - "epoch": 0.5010596564007755, - "grad_norm": 1.490605019489468, - "learning_rate": 2.0902299056265e-06, - "loss": 0.8986, - "step": 5556 - }, - { - "epoch": 0.5011498399242459, - "grad_norm": 1.5333398843017179, - "learning_rate": 2.0896462859990643e-06, - "loss": 0.9315, - "step": 5557 - }, - { - "epoch": 0.5012400234477161, - "grad_norm": 1.361589666607305, - "learning_rate": 2.089062658722513e-06, - "loss": 1.0147, - "step": 5558 - }, - { - "epoch": 0.5013302069711864, - "grad_norm": 1.4492554364354966, - "learning_rate": 2.0884790238466452e-06, - "loss": 0.9876, - "step": 5559 - }, - { - "epoch": 0.5014203904946566, - "grad_norm": 2.5724609115492845, - "learning_rate": 2.087895381421259e-06, - "loss": 0.9904, - "step": 5560 - }, - { - "epoch": 0.5015105740181269, - "grad_norm": 1.4698211335333626, - "learning_rate": 2.087311731496154e-06, - "loss": 0.9378, - "step": 5561 - }, - { - "epoch": 0.5016007575415972, - "grad_norm": 1.2824830890306496, - "learning_rate": 2.08672807412113e-06, - "loss": 0.9365, - "step": 5562 - }, - { - "epoch": 0.5016909410650674, - "grad_norm": 1.4594042070557915, - "learning_rate": 2.08614440934599e-06, - "loss": 1.039, - "step": 5563 - }, - { - "epoch": 0.5017811245885376, - "grad_norm": 1.4943456571435143, - "learning_rate": 2.0855607372205337e-06, - "loss": 0.8639, - "step": 5564 - }, - { - "epoch": 0.501871308112008, - "grad_norm": 1.5281090733608942, - "learning_rate": 2.0849770577945623e-06, - "loss": 0.9635, - "step": 5565 - }, - { - "epoch": 0.5019614916354782, - "grad_norm": 1.2594684575618644, - "learning_rate": 2.084393371117881e-06, - "loss": 0.9826, - "step": 5566 - }, - { - "epoch": 0.5020516751589484, - "grad_norm": 1.3740115080466722, - "learning_rate": 2.0838096772402902e-06, - "loss": 0.8817, - "step": 5567 - }, - { - "epoch": 0.5021418586824187, - "grad_norm": 1.5395455159182876, - "learning_rate": 2.0832259762115973e-06, - "loss": 0.8399, - "step": 5568 - }, - { - "epoch": 0.502232042205889, - "grad_norm": 1.1608858222806113, - "learning_rate": 2.082642268081605e-06, - "loss": 0.9725, - "step": 5569 - }, - { - "epoch": 0.5023222257293593, - "grad_norm": 1.577889226542733, - "learning_rate": 2.082058552900118e-06, - "loss": 0.9033, - "step": 5570 - }, - { - "epoch": 0.5024124092528295, - "grad_norm": 1.5947231238959407, - "learning_rate": 2.081474830716944e-06, - "loss": 0.9291, - "step": 5571 - }, - { - "epoch": 0.5025025927762997, - "grad_norm": 1.9791854522884333, - "learning_rate": 2.080891101581887e-06, - "loss": 0.9711, - "step": 5572 - }, - { - "epoch": 0.5025927762997701, - "grad_norm": 1.4917366185471146, - "learning_rate": 2.080307365544755e-06, - "loss": 0.9928, - "step": 5573 - }, - { - "epoch": 0.5026829598232403, - "grad_norm": 1.4903526009275634, - "learning_rate": 2.0797236226553567e-06, - "loss": 0.9844, - "step": 5574 - }, - { - "epoch": 0.5027731433467105, - "grad_norm": 1.4908705087330067, - "learning_rate": 2.079139872963499e-06, - "loss": 0.8805, - "step": 5575 - }, - { - "epoch": 0.5028633268701809, - "grad_norm": 1.8787747850642353, - "learning_rate": 2.078556116518991e-06, - "loss": 0.8466, - "step": 5576 - }, - { - "epoch": 0.5029535103936511, - "grad_norm": 1.2643679274598334, - "learning_rate": 2.077972353371642e-06, - "loss": 0.9209, - "step": 5577 - }, - { - "epoch": 0.5030436939171213, - "grad_norm": 1.3913402646679143, - "learning_rate": 2.077388583571262e-06, - "loss": 0.8928, - "step": 5578 - }, - { - "epoch": 0.5031338774405916, - "grad_norm": 0.8012333407879556, - "learning_rate": 2.0768048071676608e-06, - "loss": 0.7637, - "step": 5579 - }, - { - "epoch": 0.5032240609640619, - "grad_norm": 1.6024817085114862, - "learning_rate": 2.0762210242106505e-06, - "loss": 0.9105, - "step": 5580 - }, - { - "epoch": 0.5033142444875321, - "grad_norm": 2.7142725062228203, - "learning_rate": 2.0756372347500424e-06, - "loss": 0.9625, - "step": 5581 - }, - { - "epoch": 0.5034044280110024, - "grad_norm": 1.427966723863788, - "learning_rate": 2.0750534388356473e-06, - "loss": 1.0523, - "step": 5582 - }, - { - "epoch": 0.5034946115344726, - "grad_norm": 1.5971246781717205, - "learning_rate": 2.07446963651728e-06, - "loss": 0.901, - "step": 5583 - }, - { - "epoch": 0.503584795057943, - "grad_norm": 1.4752167720236369, - "learning_rate": 2.0738858278447516e-06, - "loss": 0.8893, - "step": 5584 - }, - { - "epoch": 0.5036749785814132, - "grad_norm": 1.4006377657811144, - "learning_rate": 2.073302012867878e-06, - "loss": 1.0384, - "step": 5585 - }, - { - "epoch": 0.5037651621048834, - "grad_norm": 1.295784733260572, - "learning_rate": 2.0727181916364725e-06, - "loss": 0.9856, - "step": 5586 - }, - { - "epoch": 0.5038553456283537, - "grad_norm": 1.293824434212363, - "learning_rate": 2.0721343642003493e-06, - "loss": 0.9792, - "step": 5587 - }, - { - "epoch": 0.503945529151824, - "grad_norm": 1.6293683391474405, - "learning_rate": 2.0715505306093247e-06, - "loss": 0.9825, - "step": 5588 - }, - { - "epoch": 0.5040357126752942, - "grad_norm": 1.6938376625714124, - "learning_rate": 2.070966690913214e-06, - "loss": 0.9792, - "step": 5589 - }, - { - "epoch": 0.5041258961987645, - "grad_norm": 1.5806776496079822, - "learning_rate": 2.0703828451618346e-06, - "loss": 0.8333, - "step": 5590 - }, - { - "epoch": 0.5042160797222347, - "grad_norm": 1.2877337919387928, - "learning_rate": 2.069798993405002e-06, - "loss": 0.9371, - "step": 5591 - }, - { - "epoch": 0.504306263245705, - "grad_norm": 1.5043201223839062, - "learning_rate": 2.0692151356925345e-06, - "loss": 0.9903, - "step": 5592 - }, - { - "epoch": 0.5043964467691753, - "grad_norm": 1.4061143597778374, - "learning_rate": 2.068631272074251e-06, - "loss": 0.9679, - "step": 5593 - }, - { - "epoch": 0.5044866302926455, - "grad_norm": 1.2572903704485128, - "learning_rate": 2.0680474025999676e-06, - "loss": 1.0781, - "step": 5594 - }, - { - "epoch": 0.5045768138161157, - "grad_norm": 0.900299067610559, - "learning_rate": 2.0674635273195055e-06, - "loss": 0.843, - "step": 5595 - }, - { - "epoch": 0.5046669973395861, - "grad_norm": 1.8495398954495734, - "learning_rate": 2.066879646282682e-06, - "loss": 0.9456, - "step": 5596 - }, - { - "epoch": 0.5047571808630563, - "grad_norm": 1.9102869281651647, - "learning_rate": 2.0662957595393194e-06, - "loss": 0.9161, - "step": 5597 - }, - { - "epoch": 0.5048473643865266, - "grad_norm": 1.4346098663922537, - "learning_rate": 2.0657118671392373e-06, - "loss": 0.9761, - "step": 5598 - }, - { - "epoch": 0.5049375479099969, - "grad_norm": 2.1792599932183383, - "learning_rate": 2.0651279691322558e-06, - "loss": 1.0047, - "step": 5599 - }, - { - "epoch": 0.5050277314334671, - "grad_norm": 1.3669238026945931, - "learning_rate": 2.0645440655681973e-06, - "loss": 0.9091, - "step": 5600 - }, - { - "epoch": 0.5051179149569374, - "grad_norm": 1.7492569980980204, - "learning_rate": 2.0639601564968826e-06, - "loss": 1.0177, - "step": 5601 - }, - { - "epoch": 0.5052080984804076, - "grad_norm": 0.7802294359464306, - "learning_rate": 2.0633762419681355e-06, - "loss": 0.8924, - "step": 5602 - }, - { - "epoch": 0.5052982820038779, - "grad_norm": 1.1860532983026058, - "learning_rate": 2.062792322031777e-06, - "loss": 0.9617, - "step": 5603 - }, - { - "epoch": 0.5053884655273482, - "grad_norm": 1.569326138421297, - "learning_rate": 2.062208396737632e-06, - "loss": 0.9104, - "step": 5604 - }, - { - "epoch": 0.5054786490508184, - "grad_norm": 1.2816057409150774, - "learning_rate": 2.0616244661355235e-06, - "loss": 0.9694, - "step": 5605 - }, - { - "epoch": 0.5055688325742886, - "grad_norm": 1.7116458071162255, - "learning_rate": 2.0610405302752752e-06, - "loss": 0.9821, - "step": 5606 - }, - { - "epoch": 0.505659016097759, - "grad_norm": 1.3454425266002032, - "learning_rate": 2.060456589206713e-06, - "loss": 0.9421, - "step": 5607 - }, - { - "epoch": 0.5057491996212292, - "grad_norm": 1.573838332335547, - "learning_rate": 2.0598726429796614e-06, - "loss": 0.8973, - "step": 5608 - }, - { - "epoch": 0.5058393831446995, - "grad_norm": 1.442623296588751, - "learning_rate": 2.059288691643945e-06, - "loss": 0.912, - "step": 5609 - }, - { - "epoch": 0.5059295666681697, - "grad_norm": 1.4441256212028277, - "learning_rate": 2.0587047352493913e-06, - "loss": 1.0297, - "step": 5610 - }, - { - "epoch": 0.50601975019164, - "grad_norm": 1.491505172270382, - "learning_rate": 2.0581207738458248e-06, - "loss": 0.9273, - "step": 5611 - }, - { - "epoch": 0.5061099337151103, - "grad_norm": 2.594882947453763, - "learning_rate": 2.0575368074830743e-06, - "loss": 0.9038, - "step": 5612 - }, - { - "epoch": 0.5062001172385805, - "grad_norm": 1.5192648058294294, - "learning_rate": 2.0569528362109667e-06, - "loss": 0.8653, - "step": 5613 - }, - { - "epoch": 0.5062903007620507, - "grad_norm": 1.354195252141262, - "learning_rate": 2.056368860079327e-06, - "loss": 0.9517, - "step": 5614 - }, - { - "epoch": 0.5063804842855211, - "grad_norm": 1.7765805796271115, - "learning_rate": 2.0557848791379874e-06, - "loss": 0.8864, - "step": 5615 - }, - { - "epoch": 0.5064706678089913, - "grad_norm": 1.5787863525892896, - "learning_rate": 2.0552008934367734e-06, - "loss": 0.8505, - "step": 5616 - }, - { - "epoch": 0.5065608513324615, - "grad_norm": 1.9367780878687573, - "learning_rate": 2.0546169030255154e-06, - "loss": 1.0242, - "step": 5617 - }, - { - "epoch": 0.5066510348559318, - "grad_norm": 1.6011045964352848, - "learning_rate": 2.054032907954041e-06, - "loss": 1.0271, - "step": 5618 - }, - { - "epoch": 0.5067412183794021, - "grad_norm": 1.507564702203307, - "learning_rate": 2.053448908272182e-06, - "loss": 0.9123, - "step": 5619 - }, - { - "epoch": 0.5068314019028723, - "grad_norm": 1.4179855639929424, - "learning_rate": 2.0528649040297673e-06, - "loss": 0.9785, - "step": 5620 - }, - { - "epoch": 0.5069215854263426, - "grad_norm": 1.3165597797821142, - "learning_rate": 2.0522808952766266e-06, - "loss": 0.9543, - "step": 5621 - }, - { - "epoch": 0.5070117689498129, - "grad_norm": 1.2667283793942492, - "learning_rate": 2.0516968820625925e-06, - "loss": 0.9971, - "step": 5622 - }, - { - "epoch": 0.5071019524732832, - "grad_norm": 1.4534905906926456, - "learning_rate": 2.051112864437495e-06, - "loss": 0.9358, - "step": 5623 - }, - { - "epoch": 0.5071921359967534, - "grad_norm": 1.7394108563043462, - "learning_rate": 2.050528842451166e-06, - "loss": 0.9206, - "step": 5624 - }, - { - "epoch": 0.5072823195202236, - "grad_norm": 1.4072713745521548, - "learning_rate": 2.049944816153438e-06, - "loss": 0.8334, - "step": 5625 - }, - { - "epoch": 0.507372503043694, - "grad_norm": 1.4051278617482923, - "learning_rate": 2.049360785594142e-06, - "loss": 0.9976, - "step": 5626 - }, - { - "epoch": 0.5074626865671642, - "grad_norm": 0.7378634930979553, - "learning_rate": 2.048776750823113e-06, - "loss": 0.7519, - "step": 5627 - }, - { - "epoch": 0.5075528700906344, - "grad_norm": 0.7036513583636748, - "learning_rate": 2.0481927118901817e-06, - "loss": 0.7968, - "step": 5628 - }, - { - "epoch": 0.5076430536141047, - "grad_norm": 1.63703933525947, - "learning_rate": 2.0476086688451824e-06, - "loss": 0.8933, - "step": 5629 - }, - { - "epoch": 0.507733237137575, - "grad_norm": 2.65159153559921, - "learning_rate": 2.04702462173795e-06, - "loss": 0.9441, - "step": 5630 - }, - { - "epoch": 0.5078234206610452, - "grad_norm": 1.7129666910713264, - "learning_rate": 2.0464405706183167e-06, - "loss": 0.9231, - "step": 5631 - }, - { - "epoch": 0.5079136041845155, - "grad_norm": 1.3978166534000522, - "learning_rate": 2.045856515536118e-06, - "loss": 0.9629, - "step": 5632 - }, - { - "epoch": 0.5080037877079857, - "grad_norm": 1.390789022309319, - "learning_rate": 2.045272456541188e-06, - "loss": 0.9666, - "step": 5633 - }, - { - "epoch": 0.508093971231456, - "grad_norm": 1.260056714870504, - "learning_rate": 2.0446883936833635e-06, - "loss": 1.0136, - "step": 5634 - }, - { - "epoch": 0.5081841547549263, - "grad_norm": 1.3838802461053514, - "learning_rate": 2.0441043270124782e-06, - "loss": 0.9218, - "step": 5635 - }, - { - "epoch": 0.5082743382783965, - "grad_norm": 1.477628857694624, - "learning_rate": 2.0435202565783683e-06, - "loss": 0.9781, - "step": 5636 - }, - { - "epoch": 0.5083645218018668, - "grad_norm": 1.8812801726193324, - "learning_rate": 2.042936182430871e-06, - "loss": 0.9107, - "step": 5637 - }, - { - "epoch": 0.5084547053253371, - "grad_norm": 1.4358105890853055, - "learning_rate": 2.0423521046198206e-06, - "loss": 0.9503, - "step": 5638 - }, - { - "epoch": 0.5085448888488073, - "grad_norm": 1.4453679254575307, - "learning_rate": 2.041768023195056e-06, - "loss": 0.8665, - "step": 5639 - }, - { - "epoch": 0.5086350723722776, - "grad_norm": 1.8475714698357741, - "learning_rate": 2.0411839382064126e-06, - "loss": 1.0201, - "step": 5640 - }, - { - "epoch": 0.5087252558957478, - "grad_norm": 1.4425798306118065, - "learning_rate": 2.040599849703729e-06, - "loss": 0.842, - "step": 5641 - }, - { - "epoch": 0.5088154394192181, - "grad_norm": 3.2214283586416816, - "learning_rate": 2.040015757736843e-06, - "loss": 0.8856, - "step": 5642 - }, - { - "epoch": 0.5089056229426884, - "grad_norm": 1.446789950349016, - "learning_rate": 2.039431662355591e-06, - "loss": 0.8735, - "step": 5643 - }, - { - "epoch": 0.5089958064661586, - "grad_norm": 1.4095437365428043, - "learning_rate": 2.0388475636098126e-06, - "loss": 0.9617, - "step": 5644 - }, - { - "epoch": 0.5090859899896288, - "grad_norm": 1.2269848624821396, - "learning_rate": 2.038263461549346e-06, - "loss": 0.9875, - "step": 5645 - }, - { - "epoch": 0.5091761735130992, - "grad_norm": 1.5860084555401721, - "learning_rate": 2.0376793562240297e-06, - "loss": 0.9473, - "step": 5646 - }, - { - "epoch": 0.5092663570365694, - "grad_norm": 1.3405598320049426, - "learning_rate": 2.037095247683703e-06, - "loss": 0.9542, - "step": 5647 - }, - { - "epoch": 0.5093565405600397, - "grad_norm": 1.469630363078843, - "learning_rate": 2.0365111359782046e-06, - "loss": 0.9049, - "step": 5648 - }, - { - "epoch": 0.50944672408351, - "grad_norm": 1.3510813656015799, - "learning_rate": 2.0359270211573757e-06, - "loss": 0.9746, - "step": 5649 - }, - { - "epoch": 0.5095369076069802, - "grad_norm": 1.7817865868520324, - "learning_rate": 2.0353429032710545e-06, - "loss": 1.0123, - "step": 5650 - }, - { - "epoch": 0.5096270911304505, - "grad_norm": 1.292902089039952, - "learning_rate": 2.0347587823690825e-06, - "loss": 0.9296, - "step": 5651 - }, - { - "epoch": 0.5097172746539207, - "grad_norm": 1.4370161154313899, - "learning_rate": 2.034174658501299e-06, - "loss": 0.9414, - "step": 5652 - }, - { - "epoch": 0.509807458177391, - "grad_norm": 1.4310235623139917, - "learning_rate": 2.0335905317175453e-06, - "loss": 0.9867, - "step": 5653 - }, - { - "epoch": 0.5098976417008613, - "grad_norm": 2.261704940621972, - "learning_rate": 2.033006402067663e-06, - "loss": 0.9445, - "step": 5654 - }, - { - "epoch": 0.5099878252243315, - "grad_norm": 1.826953871463593, - "learning_rate": 2.0324222696014912e-06, - "loss": 0.861, - "step": 5655 - }, - { - "epoch": 0.5100780087478017, - "grad_norm": 1.4557401853450338, - "learning_rate": 2.0318381343688733e-06, - "loss": 0.9815, - "step": 5656 - }, - { - "epoch": 0.5101681922712721, - "grad_norm": 1.3314913553293208, - "learning_rate": 2.0312539964196505e-06, - "loss": 0.9262, - "step": 5657 - }, - { - "epoch": 0.5102583757947423, - "grad_norm": 1.5576402237961497, - "learning_rate": 2.030669855803664e-06, - "loss": 0.9314, - "step": 5658 - }, - { - "epoch": 0.5103485593182125, - "grad_norm": 1.4279976952905515, - "learning_rate": 2.0300857125707563e-06, - "loss": 0.9938, - "step": 5659 - }, - { - "epoch": 0.5104387428416828, - "grad_norm": 2.008781229095689, - "learning_rate": 2.0295015667707697e-06, - "loss": 0.9777, - "step": 5660 - }, - { - "epoch": 0.5105289263651531, - "grad_norm": 1.389548796072511, - "learning_rate": 2.0289174184535472e-06, - "loss": 0.9534, - "step": 5661 - }, - { - "epoch": 0.5106191098886234, - "grad_norm": 1.3091540147852756, - "learning_rate": 2.02833326766893e-06, - "loss": 0.9194, - "step": 5662 - }, - { - "epoch": 0.5107092934120936, - "grad_norm": 1.4691750743248178, - "learning_rate": 2.027749114466763e-06, - "loss": 1.0188, - "step": 5663 - }, - { - "epoch": 0.5107994769355638, - "grad_norm": 1.3843218222709854, - "learning_rate": 2.027164958896889e-06, - "loss": 0.9923, - "step": 5664 - }, - { - "epoch": 0.5108896604590342, - "grad_norm": 1.9150647089212074, - "learning_rate": 2.02658080100915e-06, - "loss": 1.0648, - "step": 5665 - }, - { - "epoch": 0.5109798439825044, - "grad_norm": 1.1718688964684802, - "learning_rate": 2.0259966408533915e-06, - "loss": 1.018, - "step": 5666 - }, - { - "epoch": 0.5110700275059746, - "grad_norm": 1.9164022042083644, - "learning_rate": 2.025412478479455e-06, - "loss": 1.0407, - "step": 5667 - }, - { - "epoch": 0.5111602110294449, - "grad_norm": 1.5899781866196754, - "learning_rate": 2.0248283139371862e-06, - "loss": 0.9953, - "step": 5668 - }, - { - "epoch": 0.5112503945529152, - "grad_norm": 1.4490148190042673, - "learning_rate": 2.024244147276429e-06, - "loss": 0.9196, - "step": 5669 - }, - { - "epoch": 0.5113405780763854, - "grad_norm": 1.2020387576260614, - "learning_rate": 2.023659978547027e-06, - "loss": 1.0173, - "step": 5670 - }, - { - "epoch": 0.5114307615998557, - "grad_norm": 1.227547996810168, - "learning_rate": 2.023075807798826e-06, - "loss": 0.9431, - "step": 5671 - }, - { - "epoch": 0.511520945123326, - "grad_norm": 1.3037893893179213, - "learning_rate": 2.0224916350816696e-06, - "loss": 0.8782, - "step": 5672 - }, - { - "epoch": 0.5116111286467963, - "grad_norm": 1.380054935591975, - "learning_rate": 2.0219074604454026e-06, - "loss": 0.9631, - "step": 5673 - }, - { - "epoch": 0.5117013121702665, - "grad_norm": 1.5871089874274615, - "learning_rate": 2.02132328393987e-06, - "loss": 0.9544, - "step": 5674 - }, - { - "epoch": 0.5117914956937367, - "grad_norm": 5.02059984521632, - "learning_rate": 2.0207391056149174e-06, - "loss": 0.9015, - "step": 5675 - }, - { - "epoch": 0.5118816792172071, - "grad_norm": 1.6727314831893687, - "learning_rate": 2.020154925520391e-06, - "loss": 0.9207, - "step": 5676 - }, - { - "epoch": 0.5119718627406773, - "grad_norm": 1.3535430597164357, - "learning_rate": 2.0195707437061332e-06, - "loss": 1.0512, - "step": 5677 - }, - { - "epoch": 0.5120620462641475, - "grad_norm": 1.716433750326427, - "learning_rate": 2.0189865602219934e-06, - "loss": 0.9587, - "step": 5678 - }, - { - "epoch": 0.5121522297876178, - "grad_norm": 1.5630954370826071, - "learning_rate": 2.0184023751178154e-06, - "loss": 1.0266, - "step": 5679 - }, - { - "epoch": 0.5122424133110881, - "grad_norm": 1.2999193698380447, - "learning_rate": 2.017818188443444e-06, - "loss": 0.9032, - "step": 5680 - }, - { - "epoch": 0.5123325968345583, - "grad_norm": 1.6304470614648174, - "learning_rate": 2.017234000248728e-06, - "loss": 0.9664, - "step": 5681 - }, - { - "epoch": 0.5124227803580286, - "grad_norm": 1.360545388986771, - "learning_rate": 2.0166498105835108e-06, - "loss": 0.8892, - "step": 5682 - }, - { - "epoch": 0.5125129638814988, - "grad_norm": 1.190674303691019, - "learning_rate": 2.0160656194976407e-06, - "loss": 0.8901, - "step": 5683 - }, - { - "epoch": 0.5126031474049692, - "grad_norm": 1.573339325494724, - "learning_rate": 2.0154814270409634e-06, - "loss": 0.8982, - "step": 5684 - }, - { - "epoch": 0.5126933309284394, - "grad_norm": 1.8167480147227417, - "learning_rate": 2.0148972332633247e-06, - "loss": 0.9093, - "step": 5685 - }, - { - "epoch": 0.5127835144519096, - "grad_norm": 1.5636702923239008, - "learning_rate": 2.0143130382145733e-06, - "loss": 1.0125, - "step": 5686 - }, - { - "epoch": 0.5128736979753798, - "grad_norm": 1.9012143897057978, - "learning_rate": 2.0137288419445533e-06, - "loss": 0.9268, - "step": 5687 - }, - { - "epoch": 0.5129638814988502, - "grad_norm": 1.4962405936982015, - "learning_rate": 2.0131446445031134e-06, - "loss": 0.991, - "step": 5688 - }, - { - "epoch": 0.5130540650223204, - "grad_norm": 1.5524499788687713, - "learning_rate": 2.0125604459400994e-06, - "loss": 1.0282, - "step": 5689 - }, - { - "epoch": 0.5131442485457907, - "grad_norm": 1.625439657742832, - "learning_rate": 2.0119762463053596e-06, - "loss": 1.0139, - "step": 5690 - }, - { - "epoch": 0.5132344320692609, - "grad_norm": 1.5525434268645806, - "learning_rate": 2.0113920456487406e-06, - "loss": 0.9445, - "step": 5691 - }, - { - "epoch": 0.5133246155927312, - "grad_norm": 1.3147608041241408, - "learning_rate": 2.010807844020088e-06, - "loss": 0.9448, - "step": 5692 - }, - { - "epoch": 0.5134147991162015, - "grad_norm": 1.3917289809402027, - "learning_rate": 2.0102236414692524e-06, - "loss": 0.9999, - "step": 5693 - }, - { - "epoch": 0.5135049826396717, - "grad_norm": 1.2911678345675242, - "learning_rate": 2.0096394380460777e-06, - "loss": 0.9392, - "step": 5694 - }, - { - "epoch": 0.513595166163142, - "grad_norm": 1.9362713394318978, - "learning_rate": 2.0090552338004136e-06, - "loss": 0.9756, - "step": 5695 - }, - { - "epoch": 0.5136853496866123, - "grad_norm": 1.4927633083288834, - "learning_rate": 2.0084710287821077e-06, - "loss": 0.9476, - "step": 5696 - }, - { - "epoch": 0.5137755332100825, - "grad_norm": 1.4963322939824872, - "learning_rate": 2.007886823041006e-06, - "loss": 0.9983, - "step": 5697 - }, - { - "epoch": 0.5138657167335527, - "grad_norm": 1.60326075880687, - "learning_rate": 2.0073026166269577e-06, - "loss": 0.9702, - "step": 5698 - }, - { - "epoch": 0.5139559002570231, - "grad_norm": 1.2519602663785694, - "learning_rate": 2.0067184095898093e-06, - "loss": 1.0074, - "step": 5699 - }, - { - "epoch": 0.5140460837804933, - "grad_norm": 1.5081558231977517, - "learning_rate": 2.0061342019794094e-06, - "loss": 0.9678, - "step": 5700 - }, - { - "epoch": 0.5141362673039636, - "grad_norm": 1.2895661728332928, - "learning_rate": 2.0055499938456058e-06, - "loss": 1.0262, - "step": 5701 - }, - { - "epoch": 0.5142264508274338, - "grad_norm": 1.454909428795877, - "learning_rate": 2.0049657852382464e-06, - "loss": 0.9223, - "step": 5702 - }, - { - "epoch": 0.5143166343509041, - "grad_norm": 1.9862028099297542, - "learning_rate": 2.0043815762071782e-06, - "loss": 0.8855, - "step": 5703 - }, - { - "epoch": 0.5144068178743744, - "grad_norm": 1.4528613928143135, - "learning_rate": 2.0037973668022492e-06, - "loss": 0.9194, - "step": 5704 - }, - { - "epoch": 0.5144970013978446, - "grad_norm": 1.6062306176585857, - "learning_rate": 2.003213157073309e-06, - "loss": 0.8393, - "step": 5705 - }, - { - "epoch": 0.5145871849213148, - "grad_norm": 1.4053187359685726, - "learning_rate": 2.002628947070204e-06, - "loss": 0.945, - "step": 5706 - }, - { - "epoch": 0.5146773684447852, - "grad_norm": 1.4329430041268816, - "learning_rate": 2.002044736842783e-06, - "loss": 1.0139, - "step": 5707 - }, - { - "epoch": 0.5147675519682554, - "grad_norm": 1.4290172902923848, - "learning_rate": 2.001460526440894e-06, - "loss": 0.9424, - "step": 5708 - }, - { - "epoch": 0.5148577354917256, - "grad_norm": 0.672086771037641, - "learning_rate": 2.0008763159143843e-06, - "loss": 0.791, - "step": 5709 - }, - { - "epoch": 0.5149479190151959, - "grad_norm": 1.3374936593876932, - "learning_rate": 2.000292105313103e-06, - "loss": 0.9745, - "step": 5710 - }, - { - "epoch": 0.5150381025386662, - "grad_norm": 1.3342108918166435, - "learning_rate": 1.999707894686897e-06, - "loss": 0.9953, - "step": 5711 - }, - { - "epoch": 0.5151282860621365, - "grad_norm": 1.3719320315799932, - "learning_rate": 1.9991236840856155e-06, - "loss": 0.9465, - "step": 5712 - }, - { - "epoch": 0.5152184695856067, - "grad_norm": 1.4651027603043216, - "learning_rate": 1.9985394735591065e-06, - "loss": 0.9222, - "step": 5713 - }, - { - "epoch": 0.5153086531090769, - "grad_norm": 1.5041458533512255, - "learning_rate": 1.997955263157217e-06, - "loss": 0.9976, - "step": 5714 - }, - { - "epoch": 0.5153988366325473, - "grad_norm": 1.4070534847860126, - "learning_rate": 1.997371052929796e-06, - "loss": 0.9783, - "step": 5715 - }, - { - "epoch": 0.5154890201560175, - "grad_norm": 1.5119933044144058, - "learning_rate": 1.996786842926691e-06, - "loss": 0.9038, - "step": 5716 - }, - { - "epoch": 0.5155792036794877, - "grad_norm": 1.7905688212105144, - "learning_rate": 1.9962026331977506e-06, - "loss": 0.9632, - "step": 5717 - }, - { - "epoch": 0.5156693872029581, - "grad_norm": 1.4925008714376236, - "learning_rate": 1.9956184237928224e-06, - "loss": 0.9662, - "step": 5718 - }, - { - "epoch": 0.5157595707264283, - "grad_norm": 1.479821055909608, - "learning_rate": 1.995034214761754e-06, - "loss": 0.9371, - "step": 5719 - }, - { - "epoch": 0.5158497542498985, - "grad_norm": 1.1481320760028149, - "learning_rate": 1.9944500061543945e-06, - "loss": 0.9265, - "step": 5720 - }, - { - "epoch": 0.5159399377733688, - "grad_norm": 1.701629401959354, - "learning_rate": 1.99386579802059e-06, - "loss": 0.9728, - "step": 5721 - }, - { - "epoch": 0.5160301212968391, - "grad_norm": 1.740085034539749, - "learning_rate": 1.993281590410191e-06, - "loss": 0.9316, - "step": 5722 - }, - { - "epoch": 0.5161203048203094, - "grad_norm": 1.5230317969406244, - "learning_rate": 1.992697383373043e-06, - "loss": 0.9026, - "step": 5723 - }, - { - "epoch": 0.5162104883437796, - "grad_norm": 0.8657719511487272, - "learning_rate": 1.9921131769589937e-06, - "loss": 0.8932, - "step": 5724 - }, - { - "epoch": 0.5163006718672498, - "grad_norm": 1.6104848053578693, - "learning_rate": 1.991528971217893e-06, - "loss": 0.9524, - "step": 5725 - }, - { - "epoch": 0.5163908553907202, - "grad_norm": 1.4072861562866184, - "learning_rate": 1.9909447661995858e-06, - "loss": 0.946, - "step": 5726 - }, - { - "epoch": 0.5164810389141904, - "grad_norm": 1.6635142394758649, - "learning_rate": 1.990360561953922e-06, - "loss": 0.9663, - "step": 5727 - }, - { - "epoch": 0.5165712224376606, - "grad_norm": 1.2291182115131278, - "learning_rate": 1.9897763585307483e-06, - "loss": 0.976, - "step": 5728 - }, - { - "epoch": 0.5166614059611309, - "grad_norm": 1.4039573630409645, - "learning_rate": 1.989192155979912e-06, - "loss": 0.9872, - "step": 5729 - }, - { - "epoch": 0.5167515894846012, - "grad_norm": 1.357657300992812, - "learning_rate": 1.98860795435126e-06, - "loss": 0.9833, - "step": 5730 - }, - { - "epoch": 0.5168417730080714, - "grad_norm": 1.3864835297864668, - "learning_rate": 1.9880237536946406e-06, - "loss": 0.9579, - "step": 5731 - }, - { - "epoch": 0.5169319565315417, - "grad_norm": 1.6363033231784827, - "learning_rate": 1.987439554059901e-06, - "loss": 0.9155, - "step": 5732 - }, - { - "epoch": 0.5170221400550119, - "grad_norm": 1.4210326665550317, - "learning_rate": 1.9868553554968864e-06, - "loss": 0.925, - "step": 5733 - }, - { - "epoch": 0.5171123235784822, - "grad_norm": 1.5589180898087724, - "learning_rate": 1.986271158055447e-06, - "loss": 0.9047, - "step": 5734 - }, - { - "epoch": 0.5172025071019525, - "grad_norm": 1.429678411402763, - "learning_rate": 1.9856869617854273e-06, - "loss": 0.9455, - "step": 5735 - }, - { - "epoch": 0.5172926906254227, - "grad_norm": 1.5025589097037517, - "learning_rate": 1.9851027667366746e-06, - "loss": 0.9244, - "step": 5736 - }, - { - "epoch": 0.517382874148893, - "grad_norm": 1.338324884258946, - "learning_rate": 1.984518572959037e-06, - "loss": 1.0485, - "step": 5737 - }, - { - "epoch": 0.5174730576723633, - "grad_norm": 1.5891179107937579, - "learning_rate": 1.9839343805023587e-06, - "loss": 0.8687, - "step": 5738 - }, - { - "epoch": 0.5175632411958335, - "grad_norm": 1.617140027515029, - "learning_rate": 1.9833501894164886e-06, - "loss": 1.0392, - "step": 5739 - }, - { - "epoch": 0.5176534247193038, - "grad_norm": 1.3821018785335162, - "learning_rate": 1.982765999751273e-06, - "loss": 0.9891, - "step": 5740 - }, - { - "epoch": 0.5177436082427741, - "grad_norm": 1.7344303981422131, - "learning_rate": 1.9821818115565553e-06, - "loss": 0.9809, - "step": 5741 - }, - { - "epoch": 0.5178337917662443, - "grad_norm": 1.4293939674763612, - "learning_rate": 1.9815976248821853e-06, - "loss": 0.8628, - "step": 5742 - }, - { - "epoch": 0.5179239752897146, - "grad_norm": 1.21094153618909, - "learning_rate": 1.981013439778007e-06, - "loss": 0.9637, - "step": 5743 - }, - { - "epoch": 0.5180141588131848, - "grad_norm": 1.516112101826232, - "learning_rate": 1.9804292562938666e-06, - "loss": 0.9603, - "step": 5744 - }, - { - "epoch": 0.5181043423366551, - "grad_norm": 1.4871927120264097, - "learning_rate": 1.97984507447961e-06, - "loss": 0.9937, - "step": 5745 - }, - { - "epoch": 0.5181945258601254, - "grad_norm": 1.5115391824219935, - "learning_rate": 1.9792608943850824e-06, - "loss": 0.8939, - "step": 5746 - }, - { - "epoch": 0.5182847093835956, - "grad_norm": 1.4083260364832502, - "learning_rate": 1.9786767160601305e-06, - "loss": 0.9316, - "step": 5747 - }, - { - "epoch": 0.5183748929070658, - "grad_norm": 1.4289165982413439, - "learning_rate": 1.9780925395545977e-06, - "loss": 0.879, - "step": 5748 - }, - { - "epoch": 0.5184650764305362, - "grad_norm": 1.0402799658645505, - "learning_rate": 1.9775083649183306e-06, - "loss": 0.8022, - "step": 5749 - }, - { - "epoch": 0.5185552599540064, - "grad_norm": 1.4714167308727784, - "learning_rate": 1.976924192201174e-06, - "loss": 0.9937, - "step": 5750 - }, - { - "epoch": 0.5186454434774767, - "grad_norm": 1.413239255231672, - "learning_rate": 1.9763400214529723e-06, - "loss": 0.8842, - "step": 5751 - }, - { - "epoch": 0.5187356270009469, - "grad_norm": 1.4146381702758886, - "learning_rate": 1.9757558527235713e-06, - "loss": 0.8226, - "step": 5752 - }, - { - "epoch": 0.5188258105244172, - "grad_norm": 1.4595854515577942, - "learning_rate": 1.9751716860628136e-06, - "loss": 0.9442, - "step": 5753 - }, - { - "epoch": 0.5189159940478875, - "grad_norm": 1.5410145419930446, - "learning_rate": 1.974587521520545e-06, - "loss": 0.975, - "step": 5754 - }, - { - "epoch": 0.5190061775713577, - "grad_norm": 1.3408493850123375, - "learning_rate": 1.9740033591466088e-06, - "loss": 0.9895, - "step": 5755 - }, - { - "epoch": 0.5190963610948279, - "grad_norm": 1.3416974559749866, - "learning_rate": 1.97341919899085e-06, - "loss": 1.057, - "step": 5756 - }, - { - "epoch": 0.5191865446182983, - "grad_norm": 1.5038894454215352, - "learning_rate": 1.9728350411031114e-06, - "loss": 0.9772, - "step": 5757 - }, - { - "epoch": 0.5192767281417685, - "grad_norm": 1.5821956596186713, - "learning_rate": 1.9722508855332367e-06, - "loss": 1.0221, - "step": 5758 - }, - { - "epoch": 0.5193669116652387, - "grad_norm": 1.2928403577414975, - "learning_rate": 1.97166673233107e-06, - "loss": 0.8859, - "step": 5759 - }, - { - "epoch": 0.519457095188709, - "grad_norm": 1.308310042414924, - "learning_rate": 1.971082581546453e-06, - "loss": 0.9938, - "step": 5760 - }, - { - "epoch": 0.5195472787121793, - "grad_norm": 1.4520487747932789, - "learning_rate": 1.9704984332292306e-06, - "loss": 0.9759, - "step": 5761 - }, - { - "epoch": 0.5196374622356495, - "grad_norm": 1.3979774869298531, - "learning_rate": 1.9699142874292444e-06, - "loss": 1.0391, - "step": 5762 - }, - { - "epoch": 0.5197276457591198, - "grad_norm": 1.773218754793061, - "learning_rate": 1.969330144196336e-06, - "loss": 1.0433, - "step": 5763 - }, - { - "epoch": 0.51981782928259, - "grad_norm": 2.3888733873184713, - "learning_rate": 1.9687460035803497e-06, - "loss": 0.9044, - "step": 5764 - }, - { - "epoch": 0.5199080128060604, - "grad_norm": 2.3645813951750445, - "learning_rate": 1.9681618656311265e-06, - "loss": 0.9263, - "step": 5765 - }, - { - "epoch": 0.5199981963295306, - "grad_norm": 1.3602546388995354, - "learning_rate": 1.9675777303985086e-06, - "loss": 0.8639, - "step": 5766 - }, - { - "epoch": 0.5200883798530008, - "grad_norm": 1.5309089748001137, - "learning_rate": 1.9669935979323376e-06, - "loss": 0.9845, - "step": 5767 - }, - { - "epoch": 0.5201785633764712, - "grad_norm": 1.4360877023867773, - "learning_rate": 1.9664094682824545e-06, - "loss": 1.0114, - "step": 5768 - }, - { - "epoch": 0.5202687468999414, - "grad_norm": 1.3383078711111702, - "learning_rate": 1.965825341498701e-06, - "loss": 0.9137, - "step": 5769 - }, - { - "epoch": 0.5203589304234116, - "grad_norm": 1.2755513345548488, - "learning_rate": 1.9652412176309177e-06, - "loss": 1.0236, - "step": 5770 - }, - { - "epoch": 0.5204491139468819, - "grad_norm": 1.176764143930711, - "learning_rate": 1.9646570967289453e-06, - "loss": 0.8185, - "step": 5771 - }, - { - "epoch": 0.5205392974703522, - "grad_norm": 1.3505773757806625, - "learning_rate": 1.9640729788426246e-06, - "loss": 0.9875, - "step": 5772 - }, - { - "epoch": 0.5206294809938224, - "grad_norm": 1.2973777072436097, - "learning_rate": 1.963488864021795e-06, - "loss": 0.9863, - "step": 5773 - }, - { - "epoch": 0.5207196645172927, - "grad_norm": 1.4231872372091985, - "learning_rate": 1.962904752316298e-06, - "loss": 0.8444, - "step": 5774 - }, - { - "epoch": 0.5208098480407629, - "grad_norm": 1.610831703241783, - "learning_rate": 1.9623206437759706e-06, - "loss": 0.8769, - "step": 5775 - }, - { - "epoch": 0.5209000315642333, - "grad_norm": 1.4037767489594595, - "learning_rate": 1.9617365384506545e-06, - "loss": 0.91, - "step": 5776 - }, - { - "epoch": 0.5209902150877035, - "grad_norm": 1.1529426699344767, - "learning_rate": 1.9611524363901872e-06, - "loss": 0.9361, - "step": 5777 - }, - { - "epoch": 0.5210803986111737, - "grad_norm": 1.3882442626147606, - "learning_rate": 1.960568337644409e-06, - "loss": 0.9453, - "step": 5778 - }, - { - "epoch": 0.521170582134644, - "grad_norm": 1.3067414796506567, - "learning_rate": 1.9599842422631576e-06, - "loss": 0.9349, - "step": 5779 - }, - { - "epoch": 0.5212607656581143, - "grad_norm": 1.4204553077002127, - "learning_rate": 1.9594001502962703e-06, - "loss": 0.9428, - "step": 5780 - }, - { - "epoch": 0.5213509491815845, - "grad_norm": 1.1928043354605489, - "learning_rate": 1.9588160617935868e-06, - "loss": 1.0105, - "step": 5781 - }, - { - "epoch": 0.5214411327050548, - "grad_norm": 1.660467859082904, - "learning_rate": 1.958231976804944e-06, - "loss": 1.04, - "step": 5782 - }, - { - "epoch": 0.521531316228525, - "grad_norm": 1.6518089926121424, - "learning_rate": 1.957647895380179e-06, - "loss": 0.8786, - "step": 5783 - }, - { - "epoch": 0.5216214997519953, - "grad_norm": 1.3032437405358173, - "learning_rate": 1.9570638175691297e-06, - "loss": 0.8755, - "step": 5784 - }, - { - "epoch": 0.5217116832754656, - "grad_norm": 1.6176784341492736, - "learning_rate": 1.956479743421632e-06, - "loss": 0.9879, - "step": 5785 - }, - { - "epoch": 0.5218018667989358, - "grad_norm": 1.4296568987130163, - "learning_rate": 1.955895672987522e-06, - "loss": 0.8896, - "step": 5786 - }, - { - "epoch": 0.521892050322406, - "grad_norm": 1.337894456398483, - "learning_rate": 1.9553116063166367e-06, - "loss": 0.8493, - "step": 5787 - }, - { - "epoch": 0.5219822338458764, - "grad_norm": 1.3433486427794663, - "learning_rate": 1.954727543458812e-06, - "loss": 0.8614, - "step": 5788 - }, - { - "epoch": 0.5220724173693466, - "grad_norm": 1.4547716061117968, - "learning_rate": 1.954143484463883e-06, - "loss": 0.9762, - "step": 5789 - }, - { - "epoch": 0.5221626008928169, - "grad_norm": 1.6636349840769613, - "learning_rate": 1.9535594293816836e-06, - "loss": 0.9421, - "step": 5790 - }, - { - "epoch": 0.5222527844162872, - "grad_norm": 2.6338414748156787, - "learning_rate": 1.952975378262051e-06, - "loss": 0.899, - "step": 5791 - }, - { - "epoch": 0.5223429679397574, - "grad_norm": 1.6133590568640808, - "learning_rate": 1.952391331154817e-06, - "loss": 0.9019, - "step": 5792 - }, - { - "epoch": 0.5224331514632277, - "grad_norm": 1.5702355968566055, - "learning_rate": 1.9518072881098185e-06, - "loss": 1.0062, - "step": 5793 - }, - { - "epoch": 0.5225233349866979, - "grad_norm": 1.424736831858544, - "learning_rate": 1.9512232491768867e-06, - "loss": 0.9387, - "step": 5794 - }, - { - "epoch": 0.5226135185101682, - "grad_norm": 1.256776560793917, - "learning_rate": 1.9506392144058573e-06, - "loss": 0.9435, - "step": 5795 - }, - { - "epoch": 0.5227037020336385, - "grad_norm": 1.4961520748870967, - "learning_rate": 1.9500551838465623e-06, - "loss": 0.9424, - "step": 5796 - }, - { - "epoch": 0.5227938855571087, - "grad_norm": 1.4771251918240265, - "learning_rate": 1.9494711575488337e-06, - "loss": 0.9568, - "step": 5797 - }, - { - "epoch": 0.5228840690805789, - "grad_norm": 2.0172245273397733, - "learning_rate": 1.948887135562505e-06, - "loss": 0.9029, - "step": 5798 - }, - { - "epoch": 0.5229742526040493, - "grad_norm": 1.3774384204787518, - "learning_rate": 1.9483031179374074e-06, - "loss": 0.9998, - "step": 5799 - }, - { - "epoch": 0.5230644361275195, - "grad_norm": 1.4975878871544024, - "learning_rate": 1.9477191047233736e-06, - "loss": 1.0052, - "step": 5800 - }, - { - "epoch": 0.5231546196509897, - "grad_norm": 1.6537586403964863, - "learning_rate": 1.9471350959702334e-06, - "loss": 0.8305, - "step": 5801 - }, - { - "epoch": 0.52324480317446, - "grad_norm": 1.5783158743972607, - "learning_rate": 1.9465510917278184e-06, - "loss": 0.9518, - "step": 5802 - }, - { - "epoch": 0.5233349866979303, - "grad_norm": 1.4297055102605347, - "learning_rate": 1.9459670920459593e-06, - "loss": 0.9422, - "step": 5803 - }, - { - "epoch": 0.5234251702214006, - "grad_norm": 1.5652053300996374, - "learning_rate": 1.945383096974485e-06, - "loss": 0.8686, - "step": 5804 - }, - { - "epoch": 0.5235153537448708, - "grad_norm": 2.2674018995099585, - "learning_rate": 1.944799106563227e-06, - "loss": 0.8684, - "step": 5805 - }, - { - "epoch": 0.523605537268341, - "grad_norm": 1.430164778625738, - "learning_rate": 1.9442151208620133e-06, - "loss": 0.9762, - "step": 5806 - }, - { - "epoch": 0.5236957207918114, - "grad_norm": 1.4317011592708158, - "learning_rate": 1.943631139920672e-06, - "loss": 0.9954, - "step": 5807 - }, - { - "epoch": 0.5237859043152816, - "grad_norm": 1.2714049611876783, - "learning_rate": 1.943047163789034e-06, - "loss": 0.8983, - "step": 5808 - }, - { - "epoch": 0.5238760878387518, - "grad_norm": 1.3388005413270876, - "learning_rate": 1.942463192516925e-06, - "loss": 0.9099, - "step": 5809 - }, - { - "epoch": 0.5239662713622221, - "grad_norm": 1.3460651680274782, - "learning_rate": 1.9418792261541746e-06, - "loss": 0.9636, - "step": 5810 - }, - { - "epoch": 0.5240564548856924, - "grad_norm": 1.4924344163092236, - "learning_rate": 1.9412952647506094e-06, - "loss": 0.9046, - "step": 5811 - }, - { - "epoch": 0.5241466384091626, - "grad_norm": 1.3493546108809802, - "learning_rate": 1.9407113083560552e-06, - "loss": 0.9251, - "step": 5812 - }, - { - "epoch": 0.5242368219326329, - "grad_norm": 1.648270946603919, - "learning_rate": 1.940127357020339e-06, - "loss": 0.933, - "step": 5813 - }, - { - "epoch": 0.5243270054561032, - "grad_norm": 1.65156114590823, - "learning_rate": 1.939543410793287e-06, - "loss": 0.9556, - "step": 5814 - }, - { - "epoch": 0.5244171889795735, - "grad_norm": 1.4522894641401858, - "learning_rate": 1.9389594697247246e-06, - "loss": 0.9856, - "step": 5815 - }, - { - "epoch": 0.5245073725030437, - "grad_norm": 1.5348543836133428, - "learning_rate": 1.9383755338644763e-06, - "loss": 1.0194, - "step": 5816 - }, - { - "epoch": 0.5245975560265139, - "grad_norm": 3.413348160304212, - "learning_rate": 1.937791603262368e-06, - "loss": 0.9747, - "step": 5817 - }, - { - "epoch": 0.5246877395499843, - "grad_norm": 1.439539913216429, - "learning_rate": 1.9372076779682235e-06, - "loss": 0.8937, - "step": 5818 - }, - { - "epoch": 0.5247779230734545, - "grad_norm": 1.6004666184457061, - "learning_rate": 1.9366237580318648e-06, - "loss": 0.8892, - "step": 5819 - }, - { - "epoch": 0.5248681065969247, - "grad_norm": 1.2206950195043396, - "learning_rate": 1.9360398435031176e-06, - "loss": 0.9493, - "step": 5820 - }, - { - "epoch": 0.524958290120395, - "grad_norm": 1.6069355967256205, - "learning_rate": 1.9354559344318025e-06, - "loss": 0.9643, - "step": 5821 - }, - { - "epoch": 0.5250484736438653, - "grad_norm": 1.8859578685403287, - "learning_rate": 1.934872030867744e-06, - "loss": 1.0006, - "step": 5822 - }, - { - "epoch": 0.5251386571673355, - "grad_norm": 1.5143928631946364, - "learning_rate": 1.934288132860763e-06, - "loss": 0.9535, - "step": 5823 - }, - { - "epoch": 0.5252288406908058, - "grad_norm": 1.3127417568804491, - "learning_rate": 1.93370424046068e-06, - "loss": 0.9489, - "step": 5824 - }, - { - "epoch": 0.525319024214276, - "grad_norm": 1.8487818315820728, - "learning_rate": 1.9331203537173177e-06, - "loss": 1.0082, - "step": 5825 - }, - { - "epoch": 0.5254092077377464, - "grad_norm": 1.5424965437691174, - "learning_rate": 1.9325364726804947e-06, - "loss": 1.059, - "step": 5826 - }, - { - "epoch": 0.5254993912612166, - "grad_norm": 1.5290202040350984, - "learning_rate": 1.9319525974000327e-06, - "loss": 0.8757, - "step": 5827 - }, - { - "epoch": 0.5255895747846868, - "grad_norm": 1.9914196853513717, - "learning_rate": 1.93136872792575e-06, - "loss": 1.0051, - "step": 5828 - }, - { - "epoch": 0.525679758308157, - "grad_norm": 1.3848810188196408, - "learning_rate": 1.9307848643074653e-06, - "loss": 0.8971, - "step": 5829 - }, - { - "epoch": 0.5257699418316274, - "grad_norm": 1.3287244902321504, - "learning_rate": 1.9302010065949985e-06, - "loss": 0.984, - "step": 5830 - }, - { - "epoch": 0.5258601253550976, - "grad_norm": 1.4362701462410887, - "learning_rate": 1.9296171548381657e-06, - "loss": 0.9074, - "step": 5831 - }, - { - "epoch": 0.5259503088785679, - "grad_norm": 1.370730403564569, - "learning_rate": 1.9290333090867862e-06, - "loss": 0.9358, - "step": 5832 - }, - { - "epoch": 0.5260404924020381, - "grad_norm": 1.5638957084264655, - "learning_rate": 1.928449469390676e-06, - "loss": 0.9501, - "step": 5833 - }, - { - "epoch": 0.5261306759255084, - "grad_norm": 1.4467798156408178, - "learning_rate": 1.927865635799651e-06, - "loss": 0.919, - "step": 5834 - }, - { - "epoch": 0.5262208594489787, - "grad_norm": 1.2677831733739988, - "learning_rate": 1.927281808363528e-06, - "loss": 0.8791, - "step": 5835 - }, - { - "epoch": 0.5263110429724489, - "grad_norm": 1.757402160221924, - "learning_rate": 1.9266979871321216e-06, - "loss": 1.0229, - "step": 5836 - }, - { - "epoch": 0.5264012264959192, - "grad_norm": 1.417498802305654, - "learning_rate": 1.9261141721552482e-06, - "loss": 0.8988, - "step": 5837 - }, - { - "epoch": 0.5264914100193895, - "grad_norm": 1.346232184289556, - "learning_rate": 1.9255303634827204e-06, - "loss": 1.0145, - "step": 5838 - }, - { - "epoch": 0.5265815935428597, - "grad_norm": 1.4232117793212744, - "learning_rate": 1.924946561164352e-06, - "loss": 1.011, - "step": 5839 - }, - { - "epoch": 0.52667177706633, - "grad_norm": 1.4065133166152317, - "learning_rate": 1.9243627652499582e-06, - "loss": 0.8943, - "step": 5840 - }, - { - "epoch": 0.5267619605898003, - "grad_norm": 1.666092185511888, - "learning_rate": 1.9237789757893493e-06, - "loss": 1.011, - "step": 5841 - }, - { - "epoch": 0.5268521441132705, - "grad_norm": 1.4492978796730016, - "learning_rate": 1.9231951928323395e-06, - "loss": 0.9817, - "step": 5842 - }, - { - "epoch": 0.5269423276367408, - "grad_norm": 1.6516585135362922, - "learning_rate": 1.922611416428738e-06, - "loss": 0.9034, - "step": 5843 - }, - { - "epoch": 0.527032511160211, - "grad_norm": 1.4563706769590132, - "learning_rate": 1.922027646628358e-06, - "loss": 0.9648, - "step": 5844 - }, - { - "epoch": 0.5271226946836813, - "grad_norm": 0.7522934576953607, - "learning_rate": 1.9214438834810092e-06, - "loss": 0.8469, - "step": 5845 - }, - { - "epoch": 0.5272128782071516, - "grad_norm": 1.3450808477388023, - "learning_rate": 1.9208601270365008e-06, - "loss": 0.8759, - "step": 5846 - }, - { - "epoch": 0.5273030617306218, - "grad_norm": 1.6350356304007732, - "learning_rate": 1.9202763773446435e-06, - "loss": 1.0356, - "step": 5847 - }, - { - "epoch": 0.527393245254092, - "grad_norm": 1.4448554888735508, - "learning_rate": 1.9196926344552444e-06, - "loss": 0.8471, - "step": 5848 - }, - { - "epoch": 0.5274834287775624, - "grad_norm": 1.400364746535646, - "learning_rate": 1.919108898418113e-06, - "loss": 0.9729, - "step": 5849 - }, - { - "epoch": 0.5275736123010326, - "grad_norm": 1.2229374571315905, - "learning_rate": 1.918525169283057e-06, - "loss": 0.839, - "step": 5850 - }, - { - "epoch": 0.5276637958245028, - "grad_norm": 1.4999689257899882, - "learning_rate": 1.9179414470998817e-06, - "loss": 0.9763, - "step": 5851 - }, - { - "epoch": 0.5277539793479731, - "grad_norm": 1.9366536291452885, - "learning_rate": 1.917357731918395e-06, - "loss": 0.9062, - "step": 5852 - }, - { - "epoch": 0.5278441628714434, - "grad_norm": 1.3248582944129, - "learning_rate": 1.9167740237884025e-06, - "loss": 1.0773, - "step": 5853 - }, - { - "epoch": 0.5279343463949137, - "grad_norm": 2.4497773478017324, - "learning_rate": 1.916190322759709e-06, - "loss": 0.9843, - "step": 5854 - }, - { - "epoch": 0.5280245299183839, - "grad_norm": 1.43472793990739, - "learning_rate": 1.91560662888212e-06, - "loss": 1.0477, - "step": 5855 - }, - { - "epoch": 0.5281147134418541, - "grad_norm": 1.2623504380120856, - "learning_rate": 1.915022942205438e-06, - "loss": 0.9881, - "step": 5856 - }, - { - "epoch": 0.5282048969653245, - "grad_norm": 1.4893850003321765, - "learning_rate": 1.914439262779468e-06, - "loss": 0.9437, - "step": 5857 - }, - { - "epoch": 0.5282950804887947, - "grad_norm": 2.102583172907298, - "learning_rate": 1.9138555906540103e-06, - "loss": 0.8883, - "step": 5858 - }, - { - "epoch": 0.5283852640122649, - "grad_norm": 1.4136812739443223, - "learning_rate": 1.91327192587887e-06, - "loss": 0.861, - "step": 5859 - }, - { - "epoch": 0.5284754475357353, - "grad_norm": 1.5852670990834326, - "learning_rate": 1.912688268503846e-06, - "loss": 0.9236, - "step": 5860 - }, - { - "epoch": 0.5285656310592055, - "grad_norm": 1.5016006036185108, - "learning_rate": 1.912104618578741e-06, - "loss": 0.9015, - "step": 5861 - }, - { - "epoch": 0.5286558145826757, - "grad_norm": 1.4332311519898595, - "learning_rate": 1.9115209761533554e-06, - "loss": 0.9257, - "step": 5862 - }, - { - "epoch": 0.528745998106146, - "grad_norm": 1.763389928170336, - "learning_rate": 1.9109373412774863e-06, - "loss": 0.8604, - "step": 5863 - }, - { - "epoch": 0.5288361816296163, - "grad_norm": 1.2947454317921603, - "learning_rate": 1.910353714000936e-06, - "loss": 0.947, - "step": 5864 - }, - { - "epoch": 0.5289263651530866, - "grad_norm": 1.6944117812185666, - "learning_rate": 1.9097700943734997e-06, - "loss": 0.9028, - "step": 5865 - }, - { - "epoch": 0.5290165486765568, - "grad_norm": 1.3936541280573986, - "learning_rate": 1.909186482444977e-06, - "loss": 0.8484, - "step": 5866 - }, - { - "epoch": 0.529106732200027, - "grad_norm": 1.518307308151243, - "learning_rate": 1.9086028782651652e-06, - "loss": 0.8501, - "step": 5867 - }, - { - "epoch": 0.5291969157234974, - "grad_norm": 1.4611187990676917, - "learning_rate": 1.908019281883859e-06, - "loss": 0.9448, - "step": 5868 - }, - { - "epoch": 0.5292870992469676, - "grad_norm": 1.2381092517665788, - "learning_rate": 1.9074356933508545e-06, - "loss": 0.9384, - "step": 5869 - }, - { - "epoch": 0.5293772827704378, - "grad_norm": 1.886351113209932, - "learning_rate": 1.9068521127159477e-06, - "loss": 0.9831, - "step": 5870 - }, - { - "epoch": 0.5294674662939081, - "grad_norm": 1.346907742272704, - "learning_rate": 1.9062685400289322e-06, - "loss": 0.8993, - "step": 5871 - }, - { - "epoch": 0.5295576498173784, - "grad_norm": 1.2634371461379512, - "learning_rate": 1.9056849753396018e-06, - "loss": 0.9203, - "step": 5872 - }, - { - "epoch": 0.5296478333408486, - "grad_norm": 1.7131899286190093, - "learning_rate": 1.9051014186977485e-06, - "loss": 0.9813, - "step": 5873 - }, - { - "epoch": 0.5297380168643189, - "grad_norm": 1.262743931990057, - "learning_rate": 1.9045178701531664e-06, - "loss": 0.9438, - "step": 5874 - }, - { - "epoch": 0.5298282003877891, - "grad_norm": 1.2254509718545457, - "learning_rate": 1.903934329755645e-06, - "loss": 0.9344, - "step": 5875 - }, - { - "epoch": 0.5299183839112594, - "grad_norm": 1.3393341518907211, - "learning_rate": 1.9033507975549775e-06, - "loss": 0.9848, - "step": 5876 - }, - { - "epoch": 0.5300085674347297, - "grad_norm": 1.3591403977977785, - "learning_rate": 1.9027672736009525e-06, - "loss": 0.935, - "step": 5877 - }, - { - "epoch": 0.5300987509581999, - "grad_norm": 1.2927788540245013, - "learning_rate": 1.9021837579433593e-06, - "loss": 0.8941, - "step": 5878 - }, - { - "epoch": 0.5301889344816701, - "grad_norm": 1.3619272673172005, - "learning_rate": 1.901600250631988e-06, - "loss": 0.927, - "step": 5879 - }, - { - "epoch": 0.5302791180051405, - "grad_norm": 1.3976519205571132, - "learning_rate": 1.901016751716625e-06, - "loss": 0.9332, - "step": 5880 - }, - { - "epoch": 0.5303693015286107, - "grad_norm": 1.3957136681727274, - "learning_rate": 1.9004332612470593e-06, - "loss": 0.914, - "step": 5881 - }, - { - "epoch": 0.530459485052081, - "grad_norm": 1.1149517175458747, - "learning_rate": 1.8998497792730763e-06, - "loss": 0.973, - "step": 5882 - }, - { - "epoch": 0.5305496685755512, - "grad_norm": 1.453667980186209, - "learning_rate": 1.8992663058444629e-06, - "loss": 0.9956, - "step": 5883 - }, - { - "epoch": 0.5306398520990215, - "grad_norm": 1.2512178210713758, - "learning_rate": 1.8986828410110032e-06, - "loss": 0.9817, - "step": 5884 - }, - { - "epoch": 0.5307300356224918, - "grad_norm": 1.560632963515417, - "learning_rate": 1.8980993848224823e-06, - "loss": 0.9348, - "step": 5885 - }, - { - "epoch": 0.530820219145962, - "grad_norm": 1.35713146588557, - "learning_rate": 1.8975159373286843e-06, - "loss": 0.9017, - "step": 5886 - }, - { - "epoch": 0.5309104026694323, - "grad_norm": 1.326438775824824, - "learning_rate": 1.8969324985793904e-06, - "loss": 0.975, - "step": 5887 - }, - { - "epoch": 0.5310005861929026, - "grad_norm": 1.5202193479629906, - "learning_rate": 1.8963490686243851e-06, - "loss": 0.9393, - "step": 5888 - }, - { - "epoch": 0.5310907697163728, - "grad_norm": 1.6173122192057072, - "learning_rate": 1.8957656475134486e-06, - "loss": 0.8776, - "step": 5889 - }, - { - "epoch": 0.531180953239843, - "grad_norm": 1.5024582270243811, - "learning_rate": 1.895182235296361e-06, - "loss": 0.9259, - "step": 5890 - }, - { - "epoch": 0.5312711367633134, - "grad_norm": 1.4064491978599472, - "learning_rate": 1.8945988320229042e-06, - "loss": 0.8673, - "step": 5891 - }, - { - "epoch": 0.5313613202867836, - "grad_norm": 1.5160164868205648, - "learning_rate": 1.8940154377428553e-06, - "loss": 1.0344, - "step": 5892 - }, - { - "epoch": 0.5314515038102539, - "grad_norm": 1.3688924131814237, - "learning_rate": 1.8934320525059944e-06, - "loss": 0.9717, - "step": 5893 - }, - { - "epoch": 0.5315416873337241, - "grad_norm": 2.9197154504793064, - "learning_rate": 1.8928486763620984e-06, - "loss": 0.8349, - "step": 5894 - }, - { - "epoch": 0.5316318708571944, - "grad_norm": 1.4734613054603656, - "learning_rate": 1.892265309360943e-06, - "loss": 0.9616, - "step": 5895 - }, - { - "epoch": 0.5317220543806647, - "grad_norm": 1.5345587501901106, - "learning_rate": 1.8916819515523067e-06, - "loss": 0.932, - "step": 5896 - }, - { - "epoch": 0.5318122379041349, - "grad_norm": 1.3478176738976688, - "learning_rate": 1.891098602985963e-06, - "loss": 0.8661, - "step": 5897 - }, - { - "epoch": 0.5319024214276051, - "grad_norm": 1.3930031233572466, - "learning_rate": 1.8905152637116872e-06, - "loss": 0.9622, - "step": 5898 - }, - { - "epoch": 0.5319926049510755, - "grad_norm": 1.512909255342743, - "learning_rate": 1.8899319337792527e-06, - "loss": 0.9797, - "step": 5899 - }, - { - "epoch": 0.5320827884745457, - "grad_norm": 1.1634498878739612, - "learning_rate": 1.8893486132384325e-06, - "loss": 0.9813, - "step": 5900 - }, - { - "epoch": 0.5321729719980159, - "grad_norm": 1.4888492811843412, - "learning_rate": 1.888765302138999e-06, - "loss": 0.9201, - "step": 5901 - }, - { - "epoch": 0.5322631555214862, - "grad_norm": 1.456461531702385, - "learning_rate": 1.8881820005307224e-06, - "loss": 0.8896, - "step": 5902 - }, - { - "epoch": 0.5323533390449565, - "grad_norm": 1.2488028992988855, - "learning_rate": 1.8875987084633748e-06, - "loss": 0.8823, - "step": 5903 - }, - { - "epoch": 0.5324435225684268, - "grad_norm": 1.67934211906042, - "learning_rate": 1.8870154259867246e-06, - "loss": 0.9763, - "step": 5904 - }, - { - "epoch": 0.532533706091897, - "grad_norm": 1.4203937066660939, - "learning_rate": 1.886432153150542e-06, - "loss": 0.8765, - "step": 5905 - }, - { - "epoch": 0.5326238896153672, - "grad_norm": 1.4402298921288215, - "learning_rate": 1.8858488900045944e-06, - "loss": 0.9256, - "step": 5906 - }, - { - "epoch": 0.5327140731388376, - "grad_norm": 2.8043721056235236, - "learning_rate": 1.885265636598648e-06, - "loss": 0.8933, - "step": 5907 - }, - { - "epoch": 0.5328042566623078, - "grad_norm": 1.5301565820691077, - "learning_rate": 1.884682392982471e-06, - "loss": 0.9771, - "step": 5908 - }, - { - "epoch": 0.532894440185778, - "grad_norm": 1.7745412341131983, - "learning_rate": 1.8840991592058274e-06, - "loss": 1.0075, - "step": 5909 - }, - { - "epoch": 0.5329846237092484, - "grad_norm": 1.2308951471639522, - "learning_rate": 1.8835159353184828e-06, - "loss": 0.9614, - "step": 5910 - }, - { - "epoch": 0.5330748072327186, - "grad_norm": 1.4543086327859545, - "learning_rate": 1.8829327213702013e-06, - "loss": 1.0119, - "step": 5911 - }, - { - "epoch": 0.5331649907561888, - "grad_norm": 1.394481198564003, - "learning_rate": 1.8823495174107452e-06, - "loss": 1.0107, - "step": 5912 - }, - { - "epoch": 0.5332551742796591, - "grad_norm": 1.6180662264856334, - "learning_rate": 1.8817663234898773e-06, - "loss": 0.9174, - "step": 5913 - }, - { - "epoch": 0.5333453578031294, - "grad_norm": 1.4590966361491708, - "learning_rate": 1.881183139657358e-06, - "loss": 0.9511, - "step": 5914 - }, - { - "epoch": 0.5334355413265996, - "grad_norm": 1.304866070433707, - "learning_rate": 1.8805999659629488e-06, - "loss": 0.9541, - "step": 5915 - }, - { - "epoch": 0.5335257248500699, - "grad_norm": 0.683181158621081, - "learning_rate": 1.880016802456409e-06, - "loss": 0.7638, - "step": 5916 - }, - { - "epoch": 0.5336159083735401, - "grad_norm": 1.7281611019730072, - "learning_rate": 1.8794336491874964e-06, - "loss": 0.8865, - "step": 5917 - }, - { - "epoch": 0.5337060918970105, - "grad_norm": 3.626630087719432, - "learning_rate": 1.8788505062059708e-06, - "loss": 0.9106, - "step": 5918 - }, - { - "epoch": 0.5337962754204807, - "grad_norm": 1.3037308252082112, - "learning_rate": 1.8782673735615869e-06, - "loss": 0.9332, - "step": 5919 - }, - { - "epoch": 0.5338864589439509, - "grad_norm": 1.4952594233453362, - "learning_rate": 1.8776842513041026e-06, - "loss": 0.9033, - "step": 5920 - }, - { - "epoch": 0.5339766424674212, - "grad_norm": 1.6606084948126163, - "learning_rate": 1.8771011394832727e-06, - "loss": 0.9113, - "step": 5921 - }, - { - "epoch": 0.5340668259908915, - "grad_norm": 1.4513066718234713, - "learning_rate": 1.8765180381488501e-06, - "loss": 0.9259, - "step": 5922 - }, - { - "epoch": 0.5341570095143617, - "grad_norm": 0.7864525944245655, - "learning_rate": 1.8759349473505905e-06, - "loss": 0.9021, - "step": 5923 - }, - { - "epoch": 0.534247193037832, - "grad_norm": 1.5209673295584716, - "learning_rate": 1.8753518671382447e-06, - "loss": 0.8758, - "step": 5924 - }, - { - "epoch": 0.5343373765613022, - "grad_norm": 1.2272953832506717, - "learning_rate": 1.8747687975615649e-06, - "loss": 0.9242, - "step": 5925 - }, - { - "epoch": 0.5344275600847725, - "grad_norm": 1.4333768675351104, - "learning_rate": 1.874185738670302e-06, - "loss": 0.8422, - "step": 5926 - }, - { - "epoch": 0.5345177436082428, - "grad_norm": 1.319736106604322, - "learning_rate": 1.8736026905142057e-06, - "loss": 0.923, - "step": 5927 - }, - { - "epoch": 0.534607927131713, - "grad_norm": 1.3427263397280003, - "learning_rate": 1.873019653143025e-06, - "loss": 0.9176, - "step": 5928 - }, - { - "epoch": 0.5346981106551832, - "grad_norm": 1.3256590445758745, - "learning_rate": 1.8724366266065069e-06, - "loss": 0.9542, - "step": 5929 - }, - { - "epoch": 0.5347882941786536, - "grad_norm": 1.5666187170820838, - "learning_rate": 1.8718536109543998e-06, - "loss": 1.0096, - "step": 5930 - }, - { - "epoch": 0.5348784777021238, - "grad_norm": 1.386042728321413, - "learning_rate": 1.8712706062364485e-06, - "loss": 0.9526, - "step": 5931 - }, - { - "epoch": 0.534968661225594, - "grad_norm": 1.428477875166675, - "learning_rate": 1.8706876125024e-06, - "loss": 0.8351, - "step": 5932 - }, - { - "epoch": 0.5350588447490644, - "grad_norm": 1.559478890851281, - "learning_rate": 1.870104629801997e-06, - "loss": 0.9811, - "step": 5933 - }, - { - "epoch": 0.5351490282725346, - "grad_norm": 1.993988898144027, - "learning_rate": 1.8695216581849823e-06, - "loss": 0.9581, - "step": 5934 - }, - { - "epoch": 0.5352392117960049, - "grad_norm": 1.3361419610506442, - "learning_rate": 1.8689386977011003e-06, - "loss": 0.9128, - "step": 5935 - }, - { - "epoch": 0.5353293953194751, - "grad_norm": 0.7568601936637211, - "learning_rate": 1.8683557484000903e-06, - "loss": 0.7968, - "step": 5936 - }, - { - "epoch": 0.5354195788429454, - "grad_norm": 1.4485361800173773, - "learning_rate": 1.8677728103316947e-06, - "loss": 0.9728, - "step": 5937 - }, - { - "epoch": 0.5355097623664157, - "grad_norm": 1.2140517029349664, - "learning_rate": 1.8671898835456518e-06, - "loss": 0.8993, - "step": 5938 - }, - { - "epoch": 0.5355999458898859, - "grad_norm": 1.41534332100328, - "learning_rate": 1.8666069680917003e-06, - "loss": 1.0429, - "step": 5939 - }, - { - "epoch": 0.5356901294133561, - "grad_norm": 1.610175146799935, - "learning_rate": 1.8660240640195775e-06, - "loss": 0.9013, - "step": 5940 - }, - { - "epoch": 0.5357803129368265, - "grad_norm": 1.2499908446930346, - "learning_rate": 1.8654411713790203e-06, - "loss": 0.9788, - "step": 5941 - }, - { - "epoch": 0.5358704964602967, - "grad_norm": 1.601307992949608, - "learning_rate": 1.8648582902197648e-06, - "loss": 0.9655, - "step": 5942 - }, - { - "epoch": 0.535960679983767, - "grad_norm": 1.6863919611643337, - "learning_rate": 1.8642754205915452e-06, - "loss": 0.9507, - "step": 5943 - }, - { - "epoch": 0.5360508635072372, - "grad_norm": 1.5750957914014163, - "learning_rate": 1.8636925625440943e-06, - "loss": 0.8946, - "step": 5944 - }, - { - "epoch": 0.5361410470307075, - "grad_norm": 1.6731262515964322, - "learning_rate": 1.863109716127146e-06, - "loss": 0.8572, - "step": 5945 - }, - { - "epoch": 0.5362312305541778, - "grad_norm": 1.259714950606895, - "learning_rate": 1.8625268813904311e-06, - "loss": 0.9862, - "step": 5946 - }, - { - "epoch": 0.536321414077648, - "grad_norm": 2.1609947963794935, - "learning_rate": 1.8619440583836814e-06, - "loss": 0.9656, - "step": 5947 - }, - { - "epoch": 0.5364115976011182, - "grad_norm": 1.4544809229580922, - "learning_rate": 1.8613612471566249e-06, - "loss": 0.9462, - "step": 5948 - }, - { - "epoch": 0.5365017811245886, - "grad_norm": 1.5325815289266038, - "learning_rate": 1.8607784477589922e-06, - "loss": 0.89, - "step": 5949 - }, - { - "epoch": 0.5365919646480588, - "grad_norm": 1.3917075240634398, - "learning_rate": 1.8601956602405103e-06, - "loss": 0.966, - "step": 5950 - }, - { - "epoch": 0.536682148171529, - "grad_norm": 1.3639900529112805, - "learning_rate": 1.8596128846509043e-06, - "loss": 0.9854, - "step": 5951 - }, - { - "epoch": 0.5367723316949993, - "grad_norm": 1.3444064998438938, - "learning_rate": 1.859030121039902e-06, - "loss": 0.8966, - "step": 5952 - }, - { - "epoch": 0.5368625152184696, - "grad_norm": 1.3349166003261608, - "learning_rate": 1.8584473694572268e-06, - "loss": 0.8928, - "step": 5953 - }, - { - "epoch": 0.5369526987419398, - "grad_norm": 1.480042086595676, - "learning_rate": 1.8578646299526026e-06, - "loss": 0.9686, - "step": 5954 - }, - { - "epoch": 0.5370428822654101, - "grad_norm": 1.3145321599770592, - "learning_rate": 1.8572819025757518e-06, - "loss": 0.9105, - "step": 5955 - }, - { - "epoch": 0.5371330657888804, - "grad_norm": 1.4329769460310398, - "learning_rate": 1.8566991873763959e-06, - "loss": 0.9614, - "step": 5956 - }, - { - "epoch": 0.5372232493123507, - "grad_norm": 1.6258058750500222, - "learning_rate": 1.856116484404256e-06, - "loss": 0.8913, - "step": 5957 - }, - { - "epoch": 0.5373134328358209, - "grad_norm": 1.4700178801818022, - "learning_rate": 1.8555337937090502e-06, - "loss": 0.9054, - "step": 5958 - }, - { - "epoch": 0.5374036163592911, - "grad_norm": 1.6181227333993018, - "learning_rate": 1.8549511153404984e-06, - "loss": 0.9202, - "step": 5959 - }, - { - "epoch": 0.5374937998827615, - "grad_norm": 1.491393192447914, - "learning_rate": 1.854368449348317e-06, - "loss": 1.0525, - "step": 5960 - }, - { - "epoch": 0.5375839834062317, - "grad_norm": 1.443895707477398, - "learning_rate": 1.853785795782222e-06, - "loss": 0.908, - "step": 5961 - }, - { - "epoch": 0.5376741669297019, - "grad_norm": 1.4508807992958135, - "learning_rate": 1.85320315469193e-06, - "loss": 0.8076, - "step": 5962 - }, - { - "epoch": 0.5377643504531722, - "grad_norm": 0.8036999614795781, - "learning_rate": 1.8526205261271534e-06, - "loss": 0.8392, - "step": 5963 - }, - { - "epoch": 0.5378545339766425, - "grad_norm": 1.3135740789921446, - "learning_rate": 1.852037910137607e-06, - "loss": 0.8575, - "step": 5964 - }, - { - "epoch": 0.5379447175001127, - "grad_norm": 1.2250889473366469, - "learning_rate": 1.851455306773002e-06, - "loss": 1.0309, - "step": 5965 - }, - { - "epoch": 0.538034901023583, - "grad_norm": 1.227557319499277, - "learning_rate": 1.8508727160830483e-06, - "loss": 0.9181, - "step": 5966 - }, - { - "epoch": 0.5381250845470532, - "grad_norm": 1.3838229178605967, - "learning_rate": 1.8502901381174575e-06, - "loss": 1.0114, - "step": 5967 - }, - { - "epoch": 0.5382152680705236, - "grad_norm": 1.5603369427200313, - "learning_rate": 1.8497075729259372e-06, - "loss": 0.847, - "step": 5968 - }, - { - "epoch": 0.5383054515939938, - "grad_norm": 0.8515918271675532, - "learning_rate": 1.8491250205581963e-06, - "loss": 0.8841, - "step": 5969 - }, - { - "epoch": 0.538395635117464, - "grad_norm": 1.486302940766847, - "learning_rate": 1.8485424810639393e-06, - "loss": 0.9418, - "step": 5970 - }, - { - "epoch": 0.5384858186409343, - "grad_norm": 1.71276270389815, - "learning_rate": 1.847959954492874e-06, - "loss": 0.9969, - "step": 5971 - }, - { - "epoch": 0.5385760021644046, - "grad_norm": 1.5905536688078634, - "learning_rate": 1.8473774408947035e-06, - "loss": 1.0353, - "step": 5972 - }, - { - "epoch": 0.5386661856878748, - "grad_norm": 0.8419338389270273, - "learning_rate": 1.8467949403191308e-06, - "loss": 0.7939, - "step": 5973 - }, - { - "epoch": 0.5387563692113451, - "grad_norm": 1.406712307701795, - "learning_rate": 1.8462124528158592e-06, - "loss": 0.9935, - "step": 5974 - }, - { - "epoch": 0.5388465527348153, - "grad_norm": 1.353748428118384, - "learning_rate": 1.8456299784345881e-06, - "loss": 0.9264, - "step": 5975 - }, - { - "epoch": 0.5389367362582856, - "grad_norm": 1.2959485249157323, - "learning_rate": 1.8450475172250194e-06, - "loss": 0.9668, - "step": 5976 - }, - { - "epoch": 0.5390269197817559, - "grad_norm": 1.4063086603433608, - "learning_rate": 1.844465069236851e-06, - "loss": 0.9225, - "step": 5977 - }, - { - "epoch": 0.5391171033052261, - "grad_norm": 1.4097315182977026, - "learning_rate": 1.8438826345197796e-06, - "loss": 0.8577, - "step": 5978 - }, - { - "epoch": 0.5392072868286965, - "grad_norm": 1.9700327130811157, - "learning_rate": 1.8433002131235036e-06, - "loss": 0.9408, - "step": 5979 - }, - { - "epoch": 0.5392974703521667, - "grad_norm": 1.4359437562559234, - "learning_rate": 1.8427178050977167e-06, - "loss": 0.9673, - "step": 5980 - }, - { - "epoch": 0.5393876538756369, - "grad_norm": 1.2940545262724625, - "learning_rate": 1.8421354104921143e-06, - "loss": 1.0236, - "step": 5981 - }, - { - "epoch": 0.5394778373991072, - "grad_norm": 0.7087544049861131, - "learning_rate": 1.8415530293563894e-06, - "loss": 0.7824, - "step": 5982 - }, - { - "epoch": 0.5395680209225775, - "grad_norm": 1.320278641306102, - "learning_rate": 1.8409706617402333e-06, - "loss": 0.916, - "step": 5983 - }, - { - "epoch": 0.5396582044460477, - "grad_norm": 1.2784785091934645, - "learning_rate": 1.8403883076933378e-06, - "loss": 0.9615, - "step": 5984 - }, - { - "epoch": 0.539748387969518, - "grad_norm": 1.5038563905848217, - "learning_rate": 1.839805967265391e-06, - "loss": 0.9325, - "step": 5985 - }, - { - "epoch": 0.5398385714929882, - "grad_norm": 1.4031052500942693, - "learning_rate": 1.839223640506083e-06, - "loss": 1.0021, - "step": 5986 - }, - { - "epoch": 0.5399287550164585, - "grad_norm": 1.600616547883585, - "learning_rate": 1.8386413274650998e-06, - "loss": 0.9416, - "step": 5987 - }, - { - "epoch": 0.5400189385399288, - "grad_norm": 1.3645277036242722, - "learning_rate": 1.8380590281921294e-06, - "loss": 0.9871, - "step": 5988 - }, - { - "epoch": 0.540109122063399, - "grad_norm": 1.342286355193967, - "learning_rate": 1.8374767427368552e-06, - "loss": 0.8534, - "step": 5989 - }, - { - "epoch": 0.5401993055868692, - "grad_norm": 5.454246952096423, - "learning_rate": 1.8368944711489608e-06, - "loss": 0.9722, - "step": 5990 - }, - { - "epoch": 0.5402894891103396, - "grad_norm": 1.6220046092672273, - "learning_rate": 1.8363122134781304e-06, - "loss": 0.9697, - "step": 5991 - }, - { - "epoch": 0.5403796726338098, - "grad_norm": 1.5051342199046498, - "learning_rate": 1.835729969774044e-06, - "loss": 0.9672, - "step": 5992 - }, - { - "epoch": 0.54046985615728, - "grad_norm": 1.2828061492799894, - "learning_rate": 1.8351477400863823e-06, - "loss": 0.9495, - "step": 5993 - }, - { - "epoch": 0.5405600396807503, - "grad_norm": 1.4297465327848338, - "learning_rate": 1.8345655244648249e-06, - "loss": 0.926, - "step": 5994 - }, - { - "epoch": 0.5406502232042206, - "grad_norm": 1.1672190425752227, - "learning_rate": 1.8339833229590486e-06, - "loss": 0.9341, - "step": 5995 - }, - { - "epoch": 0.5407404067276909, - "grad_norm": 1.3273524112879107, - "learning_rate": 1.833401135618731e-06, - "loss": 1.0208, - "step": 5996 - }, - { - "epoch": 0.5408305902511611, - "grad_norm": 1.360595812644386, - "learning_rate": 1.8328189624935466e-06, - "loss": 0.8733, - "step": 5997 - }, - { - "epoch": 0.5409207737746313, - "grad_norm": 1.3477236081304091, - "learning_rate": 1.832236803633171e-06, - "loss": 0.9687, - "step": 5998 - }, - { - "epoch": 0.5410109572981017, - "grad_norm": 1.8564431934454964, - "learning_rate": 1.831654659087276e-06, - "loss": 0.9123, - "step": 5999 - }, - { - "epoch": 0.5411011408215719, - "grad_norm": 1.3952361390341752, - "learning_rate": 1.831072528905533e-06, - "loss": 0.9588, - "step": 6000 - }, - { - "epoch": 0.5411913243450421, - "grad_norm": 1.8163364848203314, - "learning_rate": 1.8304904131376142e-06, - "loss": 0.9958, - "step": 6001 - }, - { - "epoch": 0.5412815078685124, - "grad_norm": 1.4745071072515477, - "learning_rate": 1.8299083118331874e-06, - "loss": 0.9528, - "step": 6002 - }, - { - "epoch": 0.5413716913919827, - "grad_norm": 1.4838972427080332, - "learning_rate": 1.8293262250419217e-06, - "loss": 1.0822, - "step": 6003 - }, - { - "epoch": 0.541461874915453, - "grad_norm": 1.6019811315723091, - "learning_rate": 1.828744152813484e-06, - "loss": 1.0553, - "step": 6004 - }, - { - "epoch": 0.5415520584389232, - "grad_norm": 1.4815775159112285, - "learning_rate": 1.8281620951975382e-06, - "loss": 0.8974, - "step": 6005 - }, - { - "epoch": 0.5416422419623935, - "grad_norm": 1.460728431470611, - "learning_rate": 1.827580052243751e-06, - "loss": 0.916, - "step": 6006 - }, - { - "epoch": 0.5417324254858638, - "grad_norm": 1.2702234816475633, - "learning_rate": 1.826998024001784e-06, - "loss": 0.9773, - "step": 6007 - }, - { - "epoch": 0.541822609009334, - "grad_norm": 1.968350748420612, - "learning_rate": 1.8264160105212995e-06, - "loss": 0.9699, - "step": 6008 - }, - { - "epoch": 0.5419127925328042, - "grad_norm": 1.5867887570720662, - "learning_rate": 1.8258340118519582e-06, - "loss": 0.9101, - "step": 6009 - }, - { - "epoch": 0.5420029760562746, - "grad_norm": 1.9445261764000392, - "learning_rate": 1.82525202804342e-06, - "loss": 0.9289, - "step": 6010 - }, - { - "epoch": 0.5420931595797448, - "grad_norm": 1.5360784890863453, - "learning_rate": 1.8246700591453415e-06, - "loss": 0.9223, - "step": 6011 - }, - { - "epoch": 0.542183343103215, - "grad_norm": 1.5508166868835687, - "learning_rate": 1.8240881052073801e-06, - "loss": 0.9418, - "step": 6012 - }, - { - "epoch": 0.5422735266266853, - "grad_norm": 1.4606278851292476, - "learning_rate": 1.8235061662791923e-06, - "loss": 0.9159, - "step": 6013 - }, - { - "epoch": 0.5423637101501556, - "grad_norm": 1.492110405173631, - "learning_rate": 1.8229242424104309e-06, - "loss": 1.0133, - "step": 6014 - }, - { - "epoch": 0.5424538936736258, - "grad_norm": 0.6698383690751615, - "learning_rate": 1.8223423336507503e-06, - "loss": 0.7852, - "step": 6015 - }, - { - "epoch": 0.5425440771970961, - "grad_norm": 2.447114420708012, - "learning_rate": 1.8217604400498012e-06, - "loss": 0.9319, - "step": 6016 - }, - { - "epoch": 0.5426342607205663, - "grad_norm": 1.2665720092808002, - "learning_rate": 1.8211785616572333e-06, - "loss": 0.9868, - "step": 6017 - }, - { - "epoch": 0.5427244442440367, - "grad_norm": 1.4746889206608038, - "learning_rate": 1.8205966985226975e-06, - "loss": 1.0156, - "step": 6018 - }, - { - "epoch": 0.5428146277675069, - "grad_norm": 1.2193019301881094, - "learning_rate": 1.8200148506958397e-06, - "loss": 0.9527, - "step": 6019 - }, - { - "epoch": 0.5429048112909771, - "grad_norm": 0.7411328099907805, - "learning_rate": 1.819433018226308e-06, - "loss": 0.8345, - "step": 6020 - }, - { - "epoch": 0.5429949948144474, - "grad_norm": 3.8184320545352826, - "learning_rate": 1.8188512011637471e-06, - "loss": 0.7666, - "step": 6021 - }, - { - "epoch": 0.5430851783379177, - "grad_norm": 2.395972261685404, - "learning_rate": 1.8182693995578e-06, - "loss": 0.9255, - "step": 6022 - }, - { - "epoch": 0.5431753618613879, - "grad_norm": 1.1524680894914803, - "learning_rate": 1.8176876134581098e-06, - "loss": 0.8226, - "step": 6023 - }, - { - "epoch": 0.5432655453848582, - "grad_norm": 1.4644094408245176, - "learning_rate": 1.8171058429143176e-06, - "loss": 0.8887, - "step": 6024 - }, - { - "epoch": 0.5433557289083284, - "grad_norm": 1.292477609525313, - "learning_rate": 1.8165240879760637e-06, - "loss": 0.9823, - "step": 6025 - }, - { - "epoch": 0.5434459124317987, - "grad_norm": 1.2607493263710305, - "learning_rate": 1.8159423486929862e-06, - "loss": 0.9722, - "step": 6026 - }, - { - "epoch": 0.543536095955269, - "grad_norm": 1.304894573659414, - "learning_rate": 1.815360625114722e-06, - "loss": 0.9403, - "step": 6027 - }, - { - "epoch": 0.5436262794787392, - "grad_norm": 1.519127407197937, - "learning_rate": 1.814778917290908e-06, - "loss": 0.9335, - "step": 6028 - }, - { - "epoch": 0.5437164630022095, - "grad_norm": 1.5226619213423356, - "learning_rate": 1.8141972252711773e-06, - "loss": 0.9575, - "step": 6029 - }, - { - "epoch": 0.5438066465256798, - "grad_norm": 1.5674000771780847, - "learning_rate": 1.8136155491051645e-06, - "loss": 0.9381, - "step": 6030 - }, - { - "epoch": 0.54389683004915, - "grad_norm": 0.8046707966376413, - "learning_rate": 1.8130338888424998e-06, - "loss": 0.8482, - "step": 6031 - }, - { - "epoch": 0.5439870135726202, - "grad_norm": 2.255278858612065, - "learning_rate": 1.812452244532816e-06, - "loss": 0.9633, - "step": 6032 - }, - { - "epoch": 0.5440771970960906, - "grad_norm": 1.414034131856223, - "learning_rate": 1.8118706162257405e-06, - "loss": 0.8882, - "step": 6033 - }, - { - "epoch": 0.5441673806195608, - "grad_norm": 1.3091159974087976, - "learning_rate": 1.8112890039709002e-06, - "loss": 1.0242, - "step": 6034 - }, - { - "epoch": 0.5442575641430311, - "grad_norm": 1.4399010881255716, - "learning_rate": 1.8107074078179238e-06, - "loss": 0.945, - "step": 6035 - }, - { - "epoch": 0.5443477476665013, - "grad_norm": 1.5455673547616287, - "learning_rate": 1.8101258278164348e-06, - "loss": 0.9024, - "step": 6036 - }, - { - "epoch": 0.5444379311899716, - "grad_norm": 1.5223484156773082, - "learning_rate": 1.8095442640160575e-06, - "loss": 1.0467, - "step": 6037 - }, - { - "epoch": 0.5445281147134419, - "grad_norm": 2.872743301379808, - "learning_rate": 1.8089627164664132e-06, - "loss": 0.9101, - "step": 6038 - }, - { - "epoch": 0.5446182982369121, - "grad_norm": 1.1752972794926397, - "learning_rate": 1.8083811852171233e-06, - "loss": 0.8631, - "step": 6039 - }, - { - "epoch": 0.5447084817603823, - "grad_norm": 1.3991638513612095, - "learning_rate": 1.8077996703178078e-06, - "loss": 0.9444, - "step": 6040 - }, - { - "epoch": 0.5447986652838527, - "grad_norm": 1.3657239655483981, - "learning_rate": 1.8072181718180833e-06, - "loss": 0.9028, - "step": 6041 - }, - { - "epoch": 0.5448888488073229, - "grad_norm": 1.4615941331469002, - "learning_rate": 1.806636689767568e-06, - "loss": 1.0292, - "step": 6042 - }, - { - "epoch": 0.5449790323307931, - "grad_norm": 1.2872997368920083, - "learning_rate": 1.8060552242158769e-06, - "loss": 0.8828, - "step": 6043 - }, - { - "epoch": 0.5450692158542634, - "grad_norm": 1.408564464330245, - "learning_rate": 1.8054737752126224e-06, - "loss": 0.9195, - "step": 6044 - }, - { - "epoch": 0.5451593993777337, - "grad_norm": 2.134089943210377, - "learning_rate": 1.804892342807419e-06, - "loss": 0.9379, - "step": 6045 - }, - { - "epoch": 0.545249582901204, - "grad_norm": 1.8511484705292083, - "learning_rate": 1.8043109270498756e-06, - "loss": 1.0103, - "step": 6046 - }, - { - "epoch": 0.5453397664246742, - "grad_norm": 1.240630607688395, - "learning_rate": 1.803729527989604e-06, - "loss": 0.9551, - "step": 6047 - }, - { - "epoch": 0.5454299499481444, - "grad_norm": 1.354569898382465, - "learning_rate": 1.8031481456762112e-06, - "loss": 0.9382, - "step": 6048 - }, - { - "epoch": 0.5455201334716148, - "grad_norm": 1.5183683912805925, - "learning_rate": 1.8025667801593033e-06, - "loss": 0.8703, - "step": 6049 - }, - { - "epoch": 0.545610316995085, - "grad_norm": 2.614953023805863, - "learning_rate": 1.8019854314884871e-06, - "loss": 0.8897, - "step": 6050 - }, - { - "epoch": 0.5457005005185552, - "grad_norm": 1.344934961242563, - "learning_rate": 1.8014040997133652e-06, - "loss": 0.9978, - "step": 6051 - }, - { - "epoch": 0.5457906840420256, - "grad_norm": 0.9066940403644089, - "learning_rate": 1.8008227848835414e-06, - "loss": 0.8441, - "step": 6052 - }, - { - "epoch": 0.5458808675654958, - "grad_norm": 2.0706434453205422, - "learning_rate": 1.8002414870486144e-06, - "loss": 0.8933, - "step": 6053 - }, - { - "epoch": 0.545971051088966, - "grad_norm": 1.1586395522136017, - "learning_rate": 1.7996602062581864e-06, - "loss": 0.9021, - "step": 6054 - }, - { - "epoch": 0.5460612346124363, - "grad_norm": 1.4883755231020044, - "learning_rate": 1.7990789425618544e-06, - "loss": 0.9941, - "step": 6055 - }, - { - "epoch": 0.5461514181359066, - "grad_norm": 1.219914295907102, - "learning_rate": 1.7984976960092137e-06, - "loss": 0.9236, - "step": 6056 - }, - { - "epoch": 0.5462416016593769, - "grad_norm": 1.595769388230777, - "learning_rate": 1.7979164666498617e-06, - "loss": 0.9076, - "step": 6057 - }, - { - "epoch": 0.5463317851828471, - "grad_norm": 0.715296450826937, - "learning_rate": 1.7973352545333901e-06, - "loss": 0.7633, - "step": 6058 - }, - { - "epoch": 0.5464219687063173, - "grad_norm": 2.0435711937009597, - "learning_rate": 1.796754059709393e-06, - "loss": 0.8634, - "step": 6059 - }, - { - "epoch": 0.5465121522297877, - "grad_norm": 1.3537855003119206, - "learning_rate": 1.7961728822274603e-06, - "loss": 1.0019, - "step": 6060 - }, - { - "epoch": 0.5466023357532579, - "grad_norm": 1.3816123107476541, - "learning_rate": 1.7955917221371802e-06, - "loss": 0.8962, - "step": 6061 - }, - { - "epoch": 0.5466925192767281, - "grad_norm": 1.6575778460741741, - "learning_rate": 1.7950105794881422e-06, - "loss": 0.8594, - "step": 6062 - }, - { - "epoch": 0.5467827028001984, - "grad_norm": 1.233991877335906, - "learning_rate": 1.7944294543299317e-06, - "loss": 0.9294, - "step": 6063 - }, - { - "epoch": 0.5468728863236687, - "grad_norm": 1.7836036524788073, - "learning_rate": 1.7938483467121333e-06, - "loss": 0.9951, - "step": 6064 - }, - { - "epoch": 0.5469630698471389, - "grad_norm": 1.5041307157973598, - "learning_rate": 1.7932672566843313e-06, - "loss": 0.9217, - "step": 6065 - }, - { - "epoch": 0.5470532533706092, - "grad_norm": 1.3920110588566192, - "learning_rate": 1.7926861842961065e-06, - "loss": 0.8662, - "step": 6066 - }, - { - "epoch": 0.5471434368940794, - "grad_norm": 1.3256693858584865, - "learning_rate": 1.7921051295970399e-06, - "loss": 0.866, - "step": 6067 - }, - { - "epoch": 0.5472336204175497, - "grad_norm": 1.4146598270878914, - "learning_rate": 1.7915240926367092e-06, - "loss": 0.9074, - "step": 6068 - }, - { - "epoch": 0.54732380394102, - "grad_norm": 1.2436398347151378, - "learning_rate": 1.7909430734646932e-06, - "loss": 0.9915, - "step": 6069 - }, - { - "epoch": 0.5474139874644902, - "grad_norm": 0.8275299093367338, - "learning_rate": 1.790362072130567e-06, - "loss": 0.8345, - "step": 6070 - }, - { - "epoch": 0.5475041709879604, - "grad_norm": 1.5690531829028576, - "learning_rate": 1.7897810886839037e-06, - "loss": 0.9386, - "step": 6071 - }, - { - "epoch": 0.5475943545114308, - "grad_norm": 1.6225830590570143, - "learning_rate": 1.7892001231742782e-06, - "loss": 0.957, - "step": 6072 - }, - { - "epoch": 0.547684538034901, - "grad_norm": 1.480900281352714, - "learning_rate": 1.7886191756512594e-06, - "loss": 0.9546, - "step": 6073 - }, - { - "epoch": 0.5477747215583713, - "grad_norm": 1.310652249643724, - "learning_rate": 1.7880382461644192e-06, - "loss": 1.0227, - "step": 6074 - }, - { - "epoch": 0.5478649050818416, - "grad_norm": 1.4592174665553854, - "learning_rate": 1.7874573347633235e-06, - "loss": 0.9264, - "step": 6075 - }, - { - "epoch": 0.5479550886053118, - "grad_norm": 1.4681073669959674, - "learning_rate": 1.7868764414975408e-06, - "loss": 1.0141, - "step": 6076 - }, - { - "epoch": 0.5480452721287821, - "grad_norm": 1.641765960685483, - "learning_rate": 1.7862955664166353e-06, - "loss": 0.883, - "step": 6077 - }, - { - "epoch": 0.5481354556522523, - "grad_norm": 3.1088796249747284, - "learning_rate": 1.78571470957017e-06, - "loss": 1.0268, - "step": 6078 - }, - { - "epoch": 0.5482256391757226, - "grad_norm": 1.4626389657535537, - "learning_rate": 1.7851338710077074e-06, - "loss": 1.0065, - "step": 6079 - }, - { - "epoch": 0.5483158226991929, - "grad_norm": 1.5085130556355246, - "learning_rate": 1.7845530507788076e-06, - "loss": 1.0456, - "step": 6080 - }, - { - "epoch": 0.5484060062226631, - "grad_norm": 1.5114922563289508, - "learning_rate": 1.7839722489330298e-06, - "loss": 0.9159, - "step": 6081 - }, - { - "epoch": 0.5484961897461333, - "grad_norm": 1.4078415659148982, - "learning_rate": 1.7833914655199308e-06, - "loss": 0.9168, - "step": 6082 - }, - { - "epoch": 0.5485863732696037, - "grad_norm": 1.5006289752874187, - "learning_rate": 1.7828107005890658e-06, - "loss": 0.9984, - "step": 6083 - }, - { - "epoch": 0.5486765567930739, - "grad_norm": 1.541871813278709, - "learning_rate": 1.7822299541899898e-06, - "loss": 0.8599, - "step": 6084 - }, - { - "epoch": 0.5487667403165442, - "grad_norm": 1.3742871170430493, - "learning_rate": 1.7816492263722545e-06, - "loss": 0.945, - "step": 6085 - }, - { - "epoch": 0.5488569238400144, - "grad_norm": 1.496347191760383, - "learning_rate": 1.781068517185412e-06, - "loss": 0.9694, - "step": 6086 - }, - { - "epoch": 0.5489471073634847, - "grad_norm": 1.3386208873218606, - "learning_rate": 1.7804878266790104e-06, - "loss": 1.0111, - "step": 6087 - }, - { - "epoch": 0.549037290886955, - "grad_norm": 1.2525613292366748, - "learning_rate": 1.779907154902597e-06, - "loss": 0.8772, - "step": 6088 - }, - { - "epoch": 0.5491274744104252, - "grad_norm": 1.2656023470888185, - "learning_rate": 1.7793265019057198e-06, - "loss": 0.871, - "step": 6089 - }, - { - "epoch": 0.5492176579338954, - "grad_norm": 0.9229599339064278, - "learning_rate": 1.7787458677379212e-06, - "loss": 0.8229, - "step": 6090 - }, - { - "epoch": 0.5493078414573658, - "grad_norm": 1.689729700988399, - "learning_rate": 1.7781652524487463e-06, - "loss": 1.0046, - "step": 6091 - }, - { - "epoch": 0.549398024980836, - "grad_norm": 1.372564803645126, - "learning_rate": 1.777584656087735e-06, - "loss": 0.8941, - "step": 6092 - }, - { - "epoch": 0.5494882085043062, - "grad_norm": 1.4901879779597478, - "learning_rate": 1.777004078704427e-06, - "loss": 0.8657, - "step": 6093 - }, - { - "epoch": 0.5495783920277765, - "grad_norm": 1.479094015495362, - "learning_rate": 1.7764235203483603e-06, - "loss": 0.9646, - "step": 6094 - }, - { - "epoch": 0.5496685755512468, - "grad_norm": 1.5769126692244164, - "learning_rate": 1.775842981069072e-06, - "loss": 0.9137, - "step": 6095 - }, - { - "epoch": 0.549758759074717, - "grad_norm": 1.761161428462099, - "learning_rate": 1.7752624609160966e-06, - "loss": 0.9487, - "step": 6096 - }, - { - "epoch": 0.5498489425981873, - "grad_norm": 1.7062026048722, - "learning_rate": 1.7746819599389665e-06, - "loss": 0.9853, - "step": 6097 - }, - { - "epoch": 0.5499391261216575, - "grad_norm": 1.2948341397444043, - "learning_rate": 1.774101478187215e-06, - "loss": 0.9441, - "step": 6098 - }, - { - "epoch": 0.5500293096451279, - "grad_norm": 1.624899274198525, - "learning_rate": 1.773521015710371e-06, - "loss": 1.0121, - "step": 6099 - }, - { - "epoch": 0.5501194931685981, - "grad_norm": 1.6236145275476321, - "learning_rate": 1.7729405725579614e-06, - "loss": 1.0232, - "step": 6100 - }, - { - "epoch": 0.5502096766920683, - "grad_norm": 1.2467818318600927, - "learning_rate": 1.7723601487795151e-06, - "loss": 0.9189, - "step": 6101 - }, - { - "epoch": 0.5502998602155387, - "grad_norm": 1.2373917329995747, - "learning_rate": 1.7717797444245557e-06, - "loss": 0.8974, - "step": 6102 - }, - { - "epoch": 0.5503900437390089, - "grad_norm": 1.2033461330363573, - "learning_rate": 1.7711993595426076e-06, - "loss": 0.8751, - "step": 6103 - }, - { - "epoch": 0.5504802272624791, - "grad_norm": 1.594026952877025, - "learning_rate": 1.7706189941831915e-06, - "loss": 1.0542, - "step": 6104 - }, - { - "epoch": 0.5505704107859494, - "grad_norm": 1.5453436621753311, - "learning_rate": 1.770038648395827e-06, - "loss": 0.9065, - "step": 6105 - }, - { - "epoch": 0.5506605943094197, - "grad_norm": 0.674353773226689, - "learning_rate": 1.7694583222300336e-06, - "loss": 0.8235, - "step": 6106 - }, - { - "epoch": 0.55075077783289, - "grad_norm": 1.456343255720318, - "learning_rate": 1.7688780157353272e-06, - "loss": 0.9249, - "step": 6107 - }, - { - "epoch": 0.5508409613563602, - "grad_norm": 1.4414575252355184, - "learning_rate": 1.768297728961223e-06, - "loss": 0.8891, - "step": 6108 - }, - { - "epoch": 0.5509311448798304, - "grad_norm": 1.4012738665301603, - "learning_rate": 1.7677174619572342e-06, - "loss": 0.9505, - "step": 6109 - }, - { - "epoch": 0.5510213284033008, - "grad_norm": 1.4770859693639107, - "learning_rate": 1.7671372147728717e-06, - "loss": 0.9603, - "step": 6110 - }, - { - "epoch": 0.551111511926771, - "grad_norm": 1.6577634554267866, - "learning_rate": 1.7665569874576471e-06, - "loss": 1.0174, - "step": 6111 - }, - { - "epoch": 0.5512016954502412, - "grad_norm": 1.6840666706361305, - "learning_rate": 1.7659767800610664e-06, - "loss": 0.9418, - "step": 6112 - }, - { - "epoch": 0.5512918789737115, - "grad_norm": 1.5484333088314794, - "learning_rate": 1.7653965926326379e-06, - "loss": 0.8532, - "step": 6113 - }, - { - "epoch": 0.5513820624971818, - "grad_norm": 1.1491254445779207, - "learning_rate": 1.764816425221866e-06, - "loss": 0.9036, - "step": 6114 - }, - { - "epoch": 0.551472246020652, - "grad_norm": 1.1699958994377857, - "learning_rate": 1.7642362778782524e-06, - "loss": 0.8631, - "step": 6115 - }, - { - "epoch": 0.5515624295441223, - "grad_norm": 1.5777910086848612, - "learning_rate": 1.7636561506513005e-06, - "loss": 0.9233, - "step": 6116 - }, - { - "epoch": 0.5516526130675925, - "grad_norm": 1.1368597932860736, - "learning_rate": 1.7630760435905083e-06, - "loss": 0.919, - "step": 6117 - }, - { - "epoch": 0.5517427965910628, - "grad_norm": 1.6010142806951304, - "learning_rate": 1.762495956745375e-06, - "loss": 0.9032, - "step": 6118 - }, - { - "epoch": 0.5518329801145331, - "grad_norm": 2.433580915928488, - "learning_rate": 1.7619158901653962e-06, - "loss": 0.9925, - "step": 6119 - }, - { - "epoch": 0.5519231636380033, - "grad_norm": 1.9683230103564697, - "learning_rate": 1.761335843900066e-06, - "loss": 0.941, - "step": 6120 - }, - { - "epoch": 0.5520133471614735, - "grad_norm": 1.2592605406286124, - "learning_rate": 1.7607558179988785e-06, - "loss": 0.9294, - "step": 6121 - }, - { - "epoch": 0.5521035306849439, - "grad_norm": 1.3748490944329395, - "learning_rate": 1.760175812511323e-06, - "loss": 0.8775, - "step": 6122 - }, - { - "epoch": 0.5521937142084141, - "grad_norm": 1.453506501683567, - "learning_rate": 1.75959582748689e-06, - "loss": 0.9, - "step": 6123 - }, - { - "epoch": 0.5522838977318844, - "grad_norm": 1.4614632216480785, - "learning_rate": 1.7590158629750657e-06, - "loss": 0.978, - "step": 6124 - }, - { - "epoch": 0.5523740812553547, - "grad_norm": 1.3472586667264679, - "learning_rate": 1.7584359190253376e-06, - "loss": 1.0021, - "step": 6125 - }, - { - "epoch": 0.5524642647788249, - "grad_norm": 1.7483687290988206, - "learning_rate": 1.7578559956871892e-06, - "loss": 1.0052, - "step": 6126 - }, - { - "epoch": 0.5525544483022952, - "grad_norm": 1.256053096845566, - "learning_rate": 1.7572760930101012e-06, - "loss": 0.9414, - "step": 6127 - }, - { - "epoch": 0.5526446318257654, - "grad_norm": 0.7601168882052903, - "learning_rate": 1.7566962110435563e-06, - "loss": 0.8777, - "step": 6128 - }, - { - "epoch": 0.5527348153492357, - "grad_norm": 1.193806865595962, - "learning_rate": 1.7561163498370313e-06, - "loss": 0.963, - "step": 6129 - }, - { - "epoch": 0.552824998872706, - "grad_norm": 1.3217693620320283, - "learning_rate": 1.755536509440005e-06, - "loss": 0.9088, - "step": 6130 - }, - { - "epoch": 0.5529151823961762, - "grad_norm": 1.8004642629596468, - "learning_rate": 1.7549566899019519e-06, - "loss": 0.9463, - "step": 6131 - }, - { - "epoch": 0.5530053659196464, - "grad_norm": 1.3197940508252142, - "learning_rate": 1.754376891272344e-06, - "loss": 0.9591, - "step": 6132 - }, - { - "epoch": 0.5530955494431168, - "grad_norm": 1.9317681836476637, - "learning_rate": 1.753797113600655e-06, - "loss": 0.8981, - "step": 6133 - }, - { - "epoch": 0.553185732966587, - "grad_norm": 1.4429855172216306, - "learning_rate": 1.7532173569363535e-06, - "loss": 1.0775, - "step": 6134 - }, - { - "epoch": 0.5532759164900573, - "grad_norm": 1.7306756784961714, - "learning_rate": 1.7526376213289077e-06, - "loss": 0.7685, - "step": 6135 - }, - { - "epoch": 0.5533661000135275, - "grad_norm": 1.3751660593500223, - "learning_rate": 1.7520579068277844e-06, - "loss": 0.9249, - "step": 6136 - }, - { - "epoch": 0.5534562835369978, - "grad_norm": 1.3724491560226593, - "learning_rate": 1.7514782134824472e-06, - "loss": 0.978, - "step": 6137 - }, - { - "epoch": 0.5535464670604681, - "grad_norm": 1.3441005959130807, - "learning_rate": 1.7508985413423599e-06, - "loss": 0.9444, - "step": 6138 - }, - { - "epoch": 0.5536366505839383, - "grad_norm": 1.3380447633038053, - "learning_rate": 1.7503188904569814e-06, - "loss": 1.0117, - "step": 6139 - }, - { - "epoch": 0.5537268341074085, - "grad_norm": 1.5074780345221122, - "learning_rate": 1.7497392608757728e-06, - "loss": 0.9133, - "step": 6140 - }, - { - "epoch": 0.5538170176308789, - "grad_norm": 1.2121425908092647, - "learning_rate": 1.7491596526481897e-06, - "loss": 0.9644, - "step": 6141 - }, - { - "epoch": 0.5539072011543491, - "grad_norm": 1.3067171675952898, - "learning_rate": 1.7485800658236888e-06, - "loss": 0.8745, - "step": 6142 - }, - { - "epoch": 0.5539973846778193, - "grad_norm": 1.3739935052210133, - "learning_rate": 1.7480005004517232e-06, - "loss": 1.0074, - "step": 6143 - }, - { - "epoch": 0.5540875682012896, - "grad_norm": 1.9007515951619296, - "learning_rate": 1.7474209565817435e-06, - "loss": 1.0099, - "step": 6144 - }, - { - "epoch": 0.5541777517247599, - "grad_norm": 1.480493389317145, - "learning_rate": 1.7468414342632014e-06, - "loss": 0.9462, - "step": 6145 - }, - { - "epoch": 0.5542679352482301, - "grad_norm": 1.483776654564046, - "learning_rate": 1.746261933545543e-06, - "loss": 0.9995, - "step": 6146 - }, - { - "epoch": 0.5543581187717004, - "grad_norm": 1.3289102364760073, - "learning_rate": 1.7456824544782165e-06, - "loss": 0.9448, - "step": 6147 - }, - { - "epoch": 0.5544483022951707, - "grad_norm": 1.3193534663551498, - "learning_rate": 1.7451029971106653e-06, - "loss": 0.9488, - "step": 6148 - }, - { - "epoch": 0.554538485818641, - "grad_norm": 1.2695473537890891, - "learning_rate": 1.7445235614923313e-06, - "loss": 0.9731, - "step": 6149 - }, - { - "epoch": 0.5546286693421112, - "grad_norm": 1.2828103310570171, - "learning_rate": 1.7439441476726556e-06, - "loss": 0.9393, - "step": 6150 - }, - { - "epoch": 0.5547188528655814, - "grad_norm": 1.463178904182859, - "learning_rate": 1.7433647557010776e-06, - "loss": 0.9537, - "step": 6151 - }, - { - "epoch": 0.5548090363890518, - "grad_norm": 1.4442066807971534, - "learning_rate": 1.7427853856270338e-06, - "loss": 0.9107, - "step": 6152 - }, - { - "epoch": 0.554899219912522, - "grad_norm": 1.4397623255407925, - "learning_rate": 1.7422060374999593e-06, - "loss": 0.9045, - "step": 6153 - }, - { - "epoch": 0.5549894034359922, - "grad_norm": 1.534706030290821, - "learning_rate": 1.7416267113692862e-06, - "loss": 0.9036, - "step": 6154 - }, - { - "epoch": 0.5550795869594625, - "grad_norm": 1.2752329333086354, - "learning_rate": 1.7410474072844475e-06, - "loss": 0.9763, - "step": 6155 - }, - { - "epoch": 0.5551697704829328, - "grad_norm": 1.3139222930970322, - "learning_rate": 1.740468125294871e-06, - "loss": 0.934, - "step": 6156 - }, - { - "epoch": 0.555259954006403, - "grad_norm": 2.3066458964469296, - "learning_rate": 1.739888865449986e-06, - "loss": 0.8584, - "step": 6157 - }, - { - "epoch": 0.5553501375298733, - "grad_norm": 1.5873270946867672, - "learning_rate": 1.7393096277992174e-06, - "loss": 0.9723, - "step": 6158 - }, - { - "epoch": 0.5554403210533435, - "grad_norm": 1.3213080089758558, - "learning_rate": 1.738730412391988e-06, - "loss": 0.9582, - "step": 6159 - }, - { - "epoch": 0.5555305045768139, - "grad_norm": 1.3698874707194317, - "learning_rate": 1.738151219277721e-06, - "loss": 0.9952, - "step": 6160 - }, - { - "epoch": 0.5556206881002841, - "grad_norm": 1.4338182486642568, - "learning_rate": 1.7375720485058349e-06, - "loss": 0.8993, - "step": 6161 - }, - { - "epoch": 0.5557108716237543, - "grad_norm": 1.7581069360612593, - "learning_rate": 1.7369929001257498e-06, - "loss": 0.9636, - "step": 6162 - }, - { - "epoch": 0.5558010551472246, - "grad_norm": 1.5162393956705915, - "learning_rate": 1.73641377418688e-06, - "loss": 0.8589, - "step": 6163 - }, - { - "epoch": 0.5558912386706949, - "grad_norm": 1.4641141574991943, - "learning_rate": 1.7358346707386408e-06, - "loss": 0.8772, - "step": 6164 - }, - { - "epoch": 0.5559814221941651, - "grad_norm": 1.47477686859192, - "learning_rate": 1.7352555898304439e-06, - "loss": 0.9894, - "step": 6165 - }, - { - "epoch": 0.5560716057176354, - "grad_norm": 1.435574278277989, - "learning_rate": 1.7346765315116996e-06, - "loss": 0.8646, - "step": 6166 - }, - { - "epoch": 0.5561617892411056, - "grad_norm": 1.532614819828799, - "learning_rate": 1.734097495831817e-06, - "loss": 0.9077, - "step": 6167 - }, - { - "epoch": 0.5562519727645759, - "grad_norm": 1.432461741204507, - "learning_rate": 1.7335184828402015e-06, - "loss": 0.957, - "step": 6168 - }, - { - "epoch": 0.5563421562880462, - "grad_norm": 1.3407587869396416, - "learning_rate": 1.7329394925862595e-06, - "loss": 0.9197, - "step": 6169 - }, - { - "epoch": 0.5564323398115164, - "grad_norm": 1.4306857266917743, - "learning_rate": 1.7323605251193922e-06, - "loss": 0.9897, - "step": 6170 - }, - { - "epoch": 0.5565225233349868, - "grad_norm": 1.5059005395232858, - "learning_rate": 1.7317815804890001e-06, - "loss": 0.9661, - "step": 6171 - }, - { - "epoch": 0.556612706858457, - "grad_norm": 1.445770768379385, - "learning_rate": 1.731202658744483e-06, - "loss": 0.9031, - "step": 6172 - }, - { - "epoch": 0.5567028903819272, - "grad_norm": 1.1999361458319757, - "learning_rate": 1.7306237599352365e-06, - "loss": 1.0038, - "step": 6173 - }, - { - "epoch": 0.5567930739053975, - "grad_norm": 1.198968270948617, - "learning_rate": 1.730044884110657e-06, - "loss": 0.9274, - "step": 6174 - }, - { - "epoch": 0.5568832574288678, - "grad_norm": 1.5937806668790506, - "learning_rate": 1.7294660313201366e-06, - "loss": 0.9044, - "step": 6175 - }, - { - "epoch": 0.556973440952338, - "grad_norm": 1.8218517426303502, - "learning_rate": 1.7288872016130652e-06, - "loss": 0.9137, - "step": 6176 - }, - { - "epoch": 0.5570636244758083, - "grad_norm": 1.497144365065511, - "learning_rate": 1.7283083950388334e-06, - "loss": 0.8566, - "step": 6177 - }, - { - "epoch": 0.5571538079992785, - "grad_norm": 1.3805490708961052, - "learning_rate": 1.727729611646827e-06, - "loss": 0.8482, - "step": 6178 - }, - { - "epoch": 0.5572439915227488, - "grad_norm": 1.3971472814031856, - "learning_rate": 1.7271508514864318e-06, - "loss": 1.0016, - "step": 6179 - }, - { - "epoch": 0.5573341750462191, - "grad_norm": 1.50514808015003, - "learning_rate": 1.7265721146070302e-06, - "loss": 0.9497, - "step": 6180 - }, - { - "epoch": 0.5574243585696893, - "grad_norm": 1.3974580798063243, - "learning_rate": 1.7259934010580035e-06, - "loss": 0.9453, - "step": 6181 - }, - { - "epoch": 0.5575145420931595, - "grad_norm": 1.3622895489470248, - "learning_rate": 1.725414710888731e-06, - "loss": 0.8924, - "step": 6182 - }, - { - "epoch": 0.5576047256166299, - "grad_norm": 1.2560247190800766, - "learning_rate": 1.7248360441485885e-06, - "loss": 0.9307, - "step": 6183 - }, - { - "epoch": 0.5576949091401001, - "grad_norm": 1.4003003888073597, - "learning_rate": 1.7242574008869528e-06, - "loss": 1.0165, - "step": 6184 - }, - { - "epoch": 0.5577850926635703, - "grad_norm": 0.702211974005656, - "learning_rate": 1.7236787811531951e-06, - "loss": 0.8704, - "step": 6185 - }, - { - "epoch": 0.5578752761870406, - "grad_norm": 1.4718841228485855, - "learning_rate": 1.7231001849966887e-06, - "loss": 0.9168, - "step": 6186 - }, - { - "epoch": 0.5579654597105109, - "grad_norm": 0.6838204362215398, - "learning_rate": 1.722521612466801e-06, - "loss": 0.8354, - "step": 6187 - }, - { - "epoch": 0.5580556432339812, - "grad_norm": 1.448623740723415, - "learning_rate": 1.7219430636128984e-06, - "loss": 0.8897, - "step": 6188 - }, - { - "epoch": 0.5581458267574514, - "grad_norm": 1.321693420564297, - "learning_rate": 1.7213645384843479e-06, - "loss": 0.8912, - "step": 6189 - }, - { - "epoch": 0.5582360102809216, - "grad_norm": 1.4137527801054612, - "learning_rate": 1.7207860371305108e-06, - "loss": 0.9278, - "step": 6190 - }, - { - "epoch": 0.558326193804392, - "grad_norm": 1.5941303771628312, - "learning_rate": 1.7202075596007487e-06, - "loss": 0.907, - "step": 6191 - }, - { - "epoch": 0.5584163773278622, - "grad_norm": 1.458231413776592, - "learning_rate": 1.7196291059444206e-06, - "loss": 0.9079, - "step": 6192 - }, - { - "epoch": 0.5585065608513324, - "grad_norm": 1.4915528071048796, - "learning_rate": 1.7190506762108828e-06, - "loss": 0.9333, - "step": 6193 - }, - { - "epoch": 0.5585967443748028, - "grad_norm": 1.6263676536745384, - "learning_rate": 1.7184722704494907e-06, - "loss": 0.9426, - "step": 6194 - }, - { - "epoch": 0.558686927898273, - "grad_norm": 1.3804316310069549, - "learning_rate": 1.717893888709596e-06, - "loss": 0.9548, - "step": 6195 - }, - { - "epoch": 0.5587771114217432, - "grad_norm": 1.4428477122418137, - "learning_rate": 1.7173155310405515e-06, - "loss": 0.964, - "step": 6196 - }, - { - "epoch": 0.5588672949452135, - "grad_norm": 1.537042289152113, - "learning_rate": 1.7167371974917043e-06, - "loss": 1.0192, - "step": 6197 - }, - { - "epoch": 0.5589574784686838, - "grad_norm": 1.570261332878682, - "learning_rate": 1.7161588881124003e-06, - "loss": 0.9677, - "step": 6198 - }, - { - "epoch": 0.559047661992154, - "grad_norm": 2.230299427132592, - "learning_rate": 1.7155806029519861e-06, - "loss": 0.9354, - "step": 6199 - }, - { - "epoch": 0.5591378455156243, - "grad_norm": 1.4953884125417094, - "learning_rate": 1.7150023420598023e-06, - "loss": 0.9941, - "step": 6200 - }, - { - "epoch": 0.5592280290390945, - "grad_norm": 1.1962723711367447, - "learning_rate": 1.714424105485191e-06, - "loss": 0.9953, - "step": 6201 - }, - { - "epoch": 0.5593182125625649, - "grad_norm": 2.0068361751064336, - "learning_rate": 1.7138458932774896e-06, - "loss": 0.8989, - "step": 6202 - }, - { - "epoch": 0.5594083960860351, - "grad_norm": 0.7137983839534594, - "learning_rate": 1.7132677054860335e-06, - "loss": 0.8356, - "step": 6203 - }, - { - "epoch": 0.5594985796095053, - "grad_norm": 1.334278571332026, - "learning_rate": 1.7126895421601586e-06, - "loss": 0.9691, - "step": 6204 - }, - { - "epoch": 0.5595887631329756, - "grad_norm": 1.286485813274658, - "learning_rate": 1.712111403349196e-06, - "loss": 0.8871, - "step": 6205 - }, - { - "epoch": 0.5596789466564459, - "grad_norm": 1.2371036935331925, - "learning_rate": 1.7115332891024757e-06, - "loss": 0.9733, - "step": 6206 - }, - { - "epoch": 0.5597691301799161, - "grad_norm": 0.772736896745653, - "learning_rate": 1.7109551994693257e-06, - "loss": 0.8934, - "step": 6207 - }, - { - "epoch": 0.5598593137033864, - "grad_norm": 1.2486490101496859, - "learning_rate": 1.7103771344990725e-06, - "loss": 0.8801, - "step": 6208 - }, - { - "epoch": 0.5599494972268566, - "grad_norm": 1.6424386308838095, - "learning_rate": 1.709799094241039e-06, - "loss": 0.8642, - "step": 6209 - }, - { - "epoch": 0.560039680750327, - "grad_norm": 1.4318506937615612, - "learning_rate": 1.709221078744546e-06, - "loss": 0.9777, - "step": 6210 - }, - { - "epoch": 0.5601298642737972, - "grad_norm": 1.2554199494121554, - "learning_rate": 1.7086430880589148e-06, - "loss": 0.9531, - "step": 6211 - }, - { - "epoch": 0.5602200477972674, - "grad_norm": 1.597544398041566, - "learning_rate": 1.7080651222334612e-06, - "loss": 0.9493, - "step": 6212 - }, - { - "epoch": 0.5603102313207377, - "grad_norm": 1.595036155984467, - "learning_rate": 1.7074871813175018e-06, - "loss": 0.9937, - "step": 6213 - }, - { - "epoch": 0.560400414844208, - "grad_norm": 2.1255731370718016, - "learning_rate": 1.706909265360349e-06, - "loss": 0.8556, - "step": 6214 - }, - { - "epoch": 0.5604905983676782, - "grad_norm": 0.7897803940534982, - "learning_rate": 1.7063313744113128e-06, - "loss": 0.8088, - "step": 6215 - }, - { - "epoch": 0.5605807818911485, - "grad_norm": 0.6791656068976936, - "learning_rate": 1.7057535085197042e-06, - "loss": 0.7885, - "step": 6216 - }, - { - "epoch": 0.5606709654146187, - "grad_norm": 1.605537802888841, - "learning_rate": 1.705175667734828e-06, - "loss": 0.8629, - "step": 6217 - }, - { - "epoch": 0.560761148938089, - "grad_norm": 1.6542274073187495, - "learning_rate": 1.7045978521059894e-06, - "loss": 0.912, - "step": 6218 - }, - { - "epoch": 0.5608513324615593, - "grad_norm": 1.2385475522161622, - "learning_rate": 1.7040200616824914e-06, - "loss": 0.9394, - "step": 6219 - }, - { - "epoch": 0.5609415159850295, - "grad_norm": 1.5552580280204067, - "learning_rate": 1.7034422965136333e-06, - "loss": 0.8914, - "step": 6220 - }, - { - "epoch": 0.5610316995084998, - "grad_norm": 1.37472795916431, - "learning_rate": 1.7028645566487137e-06, - "loss": 0.9854, - "step": 6221 - }, - { - "epoch": 0.5611218830319701, - "grad_norm": 1.4445699072823568, - "learning_rate": 1.7022868421370284e-06, - "loss": 0.9217, - "step": 6222 - }, - { - "epoch": 0.5612120665554403, - "grad_norm": 1.3799545582601096, - "learning_rate": 1.701709153027872e-06, - "loss": 0.9857, - "step": 6223 - }, - { - "epoch": 0.5613022500789105, - "grad_norm": 1.4293794560665298, - "learning_rate": 1.7011314893705353e-06, - "loss": 0.9071, - "step": 6224 - }, - { - "epoch": 0.5613924336023809, - "grad_norm": 1.3640668294341955, - "learning_rate": 1.700553851214307e-06, - "loss": 0.9454, - "step": 6225 - }, - { - "epoch": 0.5614826171258511, - "grad_norm": 1.3513324078636846, - "learning_rate": 1.699976238608476e-06, - "loss": 0.8133, - "step": 6226 - }, - { - "epoch": 0.5615728006493214, - "grad_norm": 1.6018253761966803, - "learning_rate": 1.699398651602326e-06, - "loss": 0.8874, - "step": 6227 - }, - { - "epoch": 0.5616629841727916, - "grad_norm": 1.2949578695918815, - "learning_rate": 1.6988210902451413e-06, - "loss": 0.9892, - "step": 6228 - }, - { - "epoch": 0.5617531676962619, - "grad_norm": 1.536061415616702, - "learning_rate": 1.6982435545862011e-06, - "loss": 0.9199, - "step": 6229 - }, - { - "epoch": 0.5618433512197322, - "grad_norm": 1.4460244564557727, - "learning_rate": 1.6976660446747853e-06, - "loss": 0.9447, - "step": 6230 - }, - { - "epoch": 0.5619335347432024, - "grad_norm": 1.6387840478125817, - "learning_rate": 1.6970885605601696e-06, - "loss": 0.8683, - "step": 6231 - }, - { - "epoch": 0.5620237182666726, - "grad_norm": 1.437995410635977, - "learning_rate": 1.6965111022916282e-06, - "loss": 1.0496, - "step": 6232 - }, - { - "epoch": 0.562113901790143, - "grad_norm": 1.437061408762239, - "learning_rate": 1.6959336699184323e-06, - "loss": 0.9142, - "step": 6233 - }, - { - "epoch": 0.5622040853136132, - "grad_norm": 1.5342732382163342, - "learning_rate": 1.6953562634898529e-06, - "loss": 0.9289, - "step": 6234 - }, - { - "epoch": 0.5622942688370834, - "grad_norm": 1.640362527832578, - "learning_rate": 1.6947788830551569e-06, - "loss": 0.912, - "step": 6235 - }, - { - "epoch": 0.5623844523605537, - "grad_norm": 0.7873901169423864, - "learning_rate": 1.6942015286636093e-06, - "loss": 0.8625, - "step": 6236 - }, - { - "epoch": 0.562474635884024, - "grad_norm": 1.3423330687696375, - "learning_rate": 1.6936242003644735e-06, - "loss": 0.9224, - "step": 6237 - }, - { - "epoch": 0.5625648194074943, - "grad_norm": 1.4252321623024025, - "learning_rate": 1.6930468982070106e-06, - "loss": 1.0095, - "step": 6238 - }, - { - "epoch": 0.5626550029309645, - "grad_norm": 0.6797623647916998, - "learning_rate": 1.692469622240478e-06, - "loss": 0.7608, - "step": 6239 - }, - { - "epoch": 0.5627451864544347, - "grad_norm": 1.5932776555238426, - "learning_rate": 1.6918923725141339e-06, - "loss": 0.898, - "step": 6240 - }, - { - "epoch": 0.5628353699779051, - "grad_norm": 1.2908047548598363, - "learning_rate": 1.6913151490772312e-06, - "loss": 0.9911, - "step": 6241 - }, - { - "epoch": 0.5629255535013753, - "grad_norm": 1.3191421106639083, - "learning_rate": 1.6907379519790215e-06, - "loss": 0.983, - "step": 6242 - }, - { - "epoch": 0.5630157370248455, - "grad_norm": 1.2327432591244902, - "learning_rate": 1.6901607812687558e-06, - "loss": 0.8463, - "step": 6243 - }, - { - "epoch": 0.5631059205483159, - "grad_norm": 1.2925218345671947, - "learning_rate": 1.6895836369956794e-06, - "loss": 0.8973, - "step": 6244 - }, - { - "epoch": 0.5631961040717861, - "grad_norm": 0.8080472343183639, - "learning_rate": 1.6890065192090402e-06, - "loss": 0.7815, - "step": 6245 - }, - { - "epoch": 0.5632862875952563, - "grad_norm": 2.075698123828084, - "learning_rate": 1.6884294279580793e-06, - "loss": 0.847, - "step": 6246 - }, - { - "epoch": 0.5633764711187266, - "grad_norm": 1.4244910686583823, - "learning_rate": 1.6878523632920371e-06, - "loss": 0.966, - "step": 6247 - }, - { - "epoch": 0.5634666546421969, - "grad_norm": 1.7199275751168888, - "learning_rate": 1.6872753252601525e-06, - "loss": 1.0036, - "step": 6248 - }, - { - "epoch": 0.5635568381656672, - "grad_norm": 1.5195130491760216, - "learning_rate": 1.6866983139116616e-06, - "loss": 0.8828, - "step": 6249 - }, - { - "epoch": 0.5636470216891374, - "grad_norm": 1.57100711471724, - "learning_rate": 1.6861213292957981e-06, - "loss": 0.9099, - "step": 6250 - }, - { - "epoch": 0.5637372052126076, - "grad_norm": 1.3185046758760497, - "learning_rate": 1.685544371461793e-06, - "loss": 0.9719, - "step": 6251 - }, - { - "epoch": 0.563827388736078, - "grad_norm": 1.5181315823094665, - "learning_rate": 1.6849674404588767e-06, - "loss": 0.9607, - "step": 6252 - }, - { - "epoch": 0.5639175722595482, - "grad_norm": 1.551112449504842, - "learning_rate": 1.6843905363362758e-06, - "loss": 0.9714, - "step": 6253 - }, - { - "epoch": 0.5640077557830184, - "grad_norm": 1.4639401509918482, - "learning_rate": 1.6838136591432136e-06, - "loss": 0.8844, - "step": 6254 - }, - { - "epoch": 0.5640979393064887, - "grad_norm": 1.4843665674873556, - "learning_rate": 1.6832368089289139e-06, - "loss": 0.9637, - "step": 6255 - }, - { - "epoch": 0.564188122829959, - "grad_norm": 1.43773939378382, - "learning_rate": 1.682659985742596e-06, - "loss": 0.9547, - "step": 6256 - }, - { - "epoch": 0.5642783063534292, - "grad_norm": 1.5440243743593307, - "learning_rate": 1.6820831896334782e-06, - "loss": 0.9874, - "step": 6257 - }, - { - "epoch": 0.5643684898768995, - "grad_norm": 1.693519241548064, - "learning_rate": 1.681506420650776e-06, - "loss": 0.9601, - "step": 6258 - }, - { - "epoch": 0.5644586734003697, - "grad_norm": 1.282846107925486, - "learning_rate": 1.680929678843701e-06, - "loss": 0.9315, - "step": 6259 - }, - { - "epoch": 0.56454885692384, - "grad_norm": 1.3287302321132572, - "learning_rate": 1.6803529642614662e-06, - "loss": 0.9918, - "step": 6260 - }, - { - "epoch": 0.5646390404473103, - "grad_norm": 1.8793189216165835, - "learning_rate": 1.6797762769532785e-06, - "loss": 0.9522, - "step": 6261 - }, - { - "epoch": 0.5647292239707805, - "grad_norm": 1.3452211798710807, - "learning_rate": 1.679199616968345e-06, - "loss": 0.9719, - "step": 6262 - }, - { - "epoch": 0.5648194074942507, - "grad_norm": 1.247631069399815, - "learning_rate": 1.6786229843558689e-06, - "loss": 0.9691, - "step": 6263 - }, - { - "epoch": 0.5649095910177211, - "grad_norm": 1.640309839404567, - "learning_rate": 1.6780463791650514e-06, - "loss": 0.9222, - "step": 6264 - }, - { - "epoch": 0.5649997745411913, - "grad_norm": 1.6262291880983137, - "learning_rate": 1.6774698014450928e-06, - "loss": 0.8849, - "step": 6265 - }, - { - "epoch": 0.5650899580646616, - "grad_norm": 0.75131999522806, - "learning_rate": 1.6768932512451883e-06, - "loss": 0.8036, - "step": 6266 - }, - { - "epoch": 0.5651801415881319, - "grad_norm": 1.3621875124618694, - "learning_rate": 1.676316728614534e-06, - "loss": 0.897, - "step": 6267 - }, - { - "epoch": 0.5652703251116021, - "grad_norm": 0.6041590646287428, - "learning_rate": 1.675740233602321e-06, - "loss": 0.8027, - "step": 6268 - }, - { - "epoch": 0.5653605086350724, - "grad_norm": 1.2784175733805057, - "learning_rate": 1.6751637662577385e-06, - "loss": 0.984, - "step": 6269 - }, - { - "epoch": 0.5654506921585426, - "grad_norm": 0.7107149551344634, - "learning_rate": 1.6745873266299753e-06, - "loss": 0.7377, - "step": 6270 - }, - { - "epoch": 0.565540875682013, - "grad_norm": 1.4611611424588211, - "learning_rate": 1.6740109147682148e-06, - "loss": 0.9054, - "step": 6271 - }, - { - "epoch": 0.5656310592054832, - "grad_norm": 1.4476670340451845, - "learning_rate": 1.6734345307216418e-06, - "loss": 0.9617, - "step": 6272 - }, - { - "epoch": 0.5657212427289534, - "grad_norm": 1.7353825434901569, - "learning_rate": 1.6728581745394346e-06, - "loss": 0.9083, - "step": 6273 - }, - { - "epoch": 0.5658114262524236, - "grad_norm": 1.2713891153351806, - "learning_rate": 1.672281846270772e-06, - "loss": 1.0191, - "step": 6274 - }, - { - "epoch": 0.565901609775894, - "grad_norm": 1.7516104917051305, - "learning_rate": 1.6717055459648295e-06, - "loss": 0.9409, - "step": 6275 - }, - { - "epoch": 0.5659917932993642, - "grad_norm": 1.50504907559702, - "learning_rate": 1.6711292736707793e-06, - "loss": 0.8957, - "step": 6276 - }, - { - "epoch": 0.5660819768228345, - "grad_norm": 1.7465347996042442, - "learning_rate": 1.6705530294377938e-06, - "loss": 1.0143, - "step": 6277 - }, - { - "epoch": 0.5661721603463047, - "grad_norm": 1.5612578222782816, - "learning_rate": 1.6699768133150395e-06, - "loss": 0.9435, - "step": 6278 - }, - { - "epoch": 0.566262343869775, - "grad_norm": 1.474559818470889, - "learning_rate": 1.6694006253516837e-06, - "loss": 0.9361, - "step": 6279 - }, - { - "epoch": 0.5663525273932453, - "grad_norm": 1.9653365804489276, - "learning_rate": 1.6688244655968896e-06, - "loss": 0.9493, - "step": 6280 - }, - { - "epoch": 0.5664427109167155, - "grad_norm": 1.477798832398404, - "learning_rate": 1.6682483340998175e-06, - "loss": 0.865, - "step": 6281 - }, - { - "epoch": 0.5665328944401857, - "grad_norm": 1.5578785261512023, - "learning_rate": 1.6676722309096276e-06, - "loss": 0.8987, - "step": 6282 - }, - { - "epoch": 0.5666230779636561, - "grad_norm": 0.7758822864149152, - "learning_rate": 1.6670961560754744e-06, - "loss": 0.8618, - "step": 6283 - }, - { - "epoch": 0.5667132614871263, - "grad_norm": 1.618694101200631, - "learning_rate": 1.6665201096465138e-06, - "loss": 0.9372, - "step": 6284 - }, - { - "epoch": 0.5668034450105965, - "grad_norm": 1.409786947400403, - "learning_rate": 1.6659440916718961e-06, - "loss": 0.9873, - "step": 6285 - }, - { - "epoch": 0.5668936285340668, - "grad_norm": 1.5279270629002049, - "learning_rate": 1.6653681022007696e-06, - "loss": 0.9577, - "step": 6286 - }, - { - "epoch": 0.5669838120575371, - "grad_norm": 1.8586271689490386, - "learning_rate": 1.6647921412822825e-06, - "loss": 0.8685, - "step": 6287 - }, - { - "epoch": 0.5670739955810074, - "grad_norm": 1.5233791242321264, - "learning_rate": 1.6642162089655782e-06, - "loss": 0.9393, - "step": 6288 - }, - { - "epoch": 0.5671641791044776, - "grad_norm": 1.4734948803437669, - "learning_rate": 1.663640305299798e-06, - "loss": 0.9037, - "step": 6289 - }, - { - "epoch": 0.5672543626279479, - "grad_norm": 0.7604303271664787, - "learning_rate": 1.6630644303340824e-06, - "loss": 0.836, - "step": 6290 - }, - { - "epoch": 0.5673445461514182, - "grad_norm": 1.3847316635551399, - "learning_rate": 1.662488584117567e-06, - "loss": 0.9508, - "step": 6291 - }, - { - "epoch": 0.5674347296748884, - "grad_norm": 2.027108651055689, - "learning_rate": 1.6619127666993867e-06, - "loss": 0.9933, - "step": 6292 - }, - { - "epoch": 0.5675249131983586, - "grad_norm": 2.051125575632599, - "learning_rate": 1.6613369781286727e-06, - "loss": 0.9469, - "step": 6293 - }, - { - "epoch": 0.567615096721829, - "grad_norm": 1.7044445140890554, - "learning_rate": 1.6607612184545562e-06, - "loss": 0.874, - "step": 6294 - }, - { - "epoch": 0.5677052802452992, - "grad_norm": 1.4583566482133448, - "learning_rate": 1.6601854877261617e-06, - "loss": 1.0007, - "step": 6295 - }, - { - "epoch": 0.5677954637687694, - "grad_norm": 1.3616906535784057, - "learning_rate": 1.6596097859926163e-06, - "loss": 0.9332, - "step": 6296 - }, - { - "epoch": 0.5678856472922397, - "grad_norm": 1.4702995729435542, - "learning_rate": 1.6590341133030407e-06, - "loss": 1.0174, - "step": 6297 - }, - { - "epoch": 0.56797583081571, - "grad_norm": 1.355293152618055, - "learning_rate": 1.658458469706554e-06, - "loss": 1.0174, - "step": 6298 - }, - { - "epoch": 0.5680660143391802, - "grad_norm": 1.4177661257065384, - "learning_rate": 1.6578828552522746e-06, - "loss": 0.8989, - "step": 6299 - }, - { - "epoch": 0.5681561978626505, - "grad_norm": 1.3854328479634535, - "learning_rate": 1.6573072699893156e-06, - "loss": 0.9889, - "step": 6300 - }, - { - "epoch": 0.5682463813861207, - "grad_norm": 1.5454381569590105, - "learning_rate": 1.6567317139667906e-06, - "loss": 0.9099, - "step": 6301 - }, - { - "epoch": 0.5683365649095911, - "grad_norm": 1.7134150155829146, - "learning_rate": 1.6561561872338087e-06, - "loss": 0.9573, - "step": 6302 - }, - { - "epoch": 0.5684267484330613, - "grad_norm": 1.3499184901744052, - "learning_rate": 1.6555806898394764e-06, - "loss": 0.8549, - "step": 6303 - }, - { - "epoch": 0.5685169319565315, - "grad_norm": 1.4537396515390186, - "learning_rate": 1.6550052218328987e-06, - "loss": 0.9132, - "step": 6304 - }, - { - "epoch": 0.5686071154800018, - "grad_norm": 1.3945035958419838, - "learning_rate": 1.6544297832631777e-06, - "loss": 0.915, - "step": 6305 - }, - { - "epoch": 0.5686972990034721, - "grad_norm": 1.6813173415300537, - "learning_rate": 1.6538543741794135e-06, - "loss": 0.9435, - "step": 6306 - }, - { - "epoch": 0.5687874825269423, - "grad_norm": 1.332724695566901, - "learning_rate": 1.6532789946307028e-06, - "loss": 0.9099, - "step": 6307 - }, - { - "epoch": 0.5688776660504126, - "grad_norm": 0.7056156339268751, - "learning_rate": 1.6527036446661393e-06, - "loss": 0.8647, - "step": 6308 - }, - { - "epoch": 0.5689678495738828, - "grad_norm": 1.3501170814041075, - "learning_rate": 1.6521283243348165e-06, - "loss": 0.9765, - "step": 6309 - }, - { - "epoch": 0.5690580330973531, - "grad_norm": 1.3674667945052275, - "learning_rate": 1.6515530336858227e-06, - "loss": 0.936, - "step": 6310 - }, - { - "epoch": 0.5691482166208234, - "grad_norm": 1.4034963987091578, - "learning_rate": 1.6509777727682457e-06, - "loss": 0.9849, - "step": 6311 - }, - { - "epoch": 0.5692384001442936, - "grad_norm": 1.378363137635988, - "learning_rate": 1.65040254163117e-06, - "loss": 0.977, - "step": 6312 - }, - { - "epoch": 0.569328583667764, - "grad_norm": 1.9873563342682357, - "learning_rate": 1.649827340323676e-06, - "loss": 0.8411, - "step": 6313 - }, - { - "epoch": 0.5694187671912342, - "grad_norm": 1.5689185488913757, - "learning_rate": 1.6492521688948454e-06, - "loss": 0.9073, - "step": 6314 - }, - { - "epoch": 0.5695089507147044, - "grad_norm": 1.5538398310379418, - "learning_rate": 1.6486770273937526e-06, - "loss": 0.9621, - "step": 6315 - }, - { - "epoch": 0.5695991342381747, - "grad_norm": 1.303986183856836, - "learning_rate": 1.6481019158694738e-06, - "loss": 0.9128, - "step": 6316 - }, - { - "epoch": 0.569689317761645, - "grad_norm": 0.8795009720185912, - "learning_rate": 1.6475268343710792e-06, - "loss": 0.8923, - "step": 6317 - }, - { - "epoch": 0.5697795012851152, - "grad_norm": 1.803127853417742, - "learning_rate": 1.6469517829476396e-06, - "loss": 0.8717, - "step": 6318 - }, - { - "epoch": 0.5698696848085855, - "grad_norm": 1.4816615953276402, - "learning_rate": 1.64637676164822e-06, - "loss": 0.9212, - "step": 6319 - }, - { - "epoch": 0.5699598683320557, - "grad_norm": 1.2729338603671687, - "learning_rate": 1.6458017705218848e-06, - "loss": 0.9711, - "step": 6320 - }, - { - "epoch": 0.570050051855526, - "grad_norm": 1.6213541947693726, - "learning_rate": 1.645226809617696e-06, - "loss": 0.954, - "step": 6321 - }, - { - "epoch": 0.5701402353789963, - "grad_norm": 1.5523836327523248, - "learning_rate": 1.6446518789847112e-06, - "loss": 0.9738, - "step": 6322 - }, - { - "epoch": 0.5702304189024665, - "grad_norm": 1.5866274530843658, - "learning_rate": 1.6440769786719883e-06, - "loss": 0.9608, - "step": 6323 - }, - { - "epoch": 0.5703206024259367, - "grad_norm": 1.1728690953655427, - "learning_rate": 1.6435021087285803e-06, - "loss": 0.98, - "step": 6324 - }, - { - "epoch": 0.5704107859494071, - "grad_norm": 1.555428946353166, - "learning_rate": 1.642927269203537e-06, - "loss": 0.9719, - "step": 6325 - }, - { - "epoch": 0.5705009694728773, - "grad_norm": 1.5335727124707241, - "learning_rate": 1.642352460145909e-06, - "loss": 0.9917, - "step": 6326 - }, - { - "epoch": 0.5705911529963476, - "grad_norm": 1.2821537180166191, - "learning_rate": 1.6417776816047402e-06, - "loss": 0.9462, - "step": 6327 - }, - { - "epoch": 0.5706813365198178, - "grad_norm": 1.4860517969307163, - "learning_rate": 1.6412029336290755e-06, - "loss": 0.9284, - "step": 6328 - }, - { - "epoch": 0.5707715200432881, - "grad_norm": 1.8360907957319292, - "learning_rate": 1.6406282162679551e-06, - "loss": 0.9348, - "step": 6329 - }, - { - "epoch": 0.5708617035667584, - "grad_norm": 1.4577123773175356, - "learning_rate": 1.6400535295704162e-06, - "loss": 0.9129, - "step": 6330 - }, - { - "epoch": 0.5709518870902286, - "grad_norm": 1.3166497794810723, - "learning_rate": 1.6394788735854955e-06, - "loss": 0.8766, - "step": 6331 - }, - { - "epoch": 0.5710420706136988, - "grad_norm": 1.4740092490021217, - "learning_rate": 1.6389042483622246e-06, - "loss": 1.0022, - "step": 6332 - }, - { - "epoch": 0.5711322541371692, - "grad_norm": 1.4767435760074172, - "learning_rate": 1.638329653949635e-06, - "loss": 0.9363, - "step": 6333 - }, - { - "epoch": 0.5712224376606394, - "grad_norm": 1.4349049073455669, - "learning_rate": 1.637755090396753e-06, - "loss": 0.9048, - "step": 6334 - }, - { - "epoch": 0.5713126211841096, - "grad_norm": 0.7305587060382569, - "learning_rate": 1.6371805577526039e-06, - "loss": 0.8537, - "step": 6335 - }, - { - "epoch": 0.5714028047075799, - "grad_norm": 1.5649690575023971, - "learning_rate": 1.636606056066211e-06, - "loss": 0.8863, - "step": 6336 - }, - { - "epoch": 0.5714929882310502, - "grad_norm": 1.471620311691034, - "learning_rate": 1.636031585386592e-06, - "loss": 0.8653, - "step": 6337 - }, - { - "epoch": 0.5715831717545204, - "grad_norm": 1.2006869197766814, - "learning_rate": 1.635457145762766e-06, - "loss": 0.9991, - "step": 6338 - }, - { - "epoch": 0.5716733552779907, - "grad_norm": 1.7236161444617235, - "learning_rate": 1.6348827372437456e-06, - "loss": 0.9677, - "step": 6339 - }, - { - "epoch": 0.571763538801461, - "grad_norm": 1.174589649538449, - "learning_rate": 1.634308359878544e-06, - "loss": 0.9389, - "step": 6340 - }, - { - "epoch": 0.5718537223249313, - "grad_norm": 1.4260007924402718, - "learning_rate": 1.6337340137161695e-06, - "loss": 1.0206, - "step": 6341 - }, - { - "epoch": 0.5719439058484015, - "grad_norm": 1.406150475265053, - "learning_rate": 1.6331596988056277e-06, - "loss": 0.9429, - "step": 6342 - }, - { - "epoch": 0.5720340893718717, - "grad_norm": 1.3141448295648008, - "learning_rate": 1.632585415195924e-06, - "loss": 0.9698, - "step": 6343 - }, - { - "epoch": 0.5721242728953421, - "grad_norm": 1.1895240299031289, - "learning_rate": 1.6320111629360583e-06, - "loss": 0.9712, - "step": 6344 - }, - { - "epoch": 0.5722144564188123, - "grad_norm": 1.6994620313451803, - "learning_rate": 1.631436942075029e-06, - "loss": 0.9159, - "step": 6345 - }, - { - "epoch": 0.5723046399422825, - "grad_norm": 1.4201176441513501, - "learning_rate": 1.630862752661833e-06, - "loss": 0.917, - "step": 6346 - }, - { - "epoch": 0.5723948234657528, - "grad_norm": 1.3924477022578816, - "learning_rate": 1.6302885947454612e-06, - "loss": 0.8729, - "step": 6347 - }, - { - "epoch": 0.5724850069892231, - "grad_norm": 1.4945180378881748, - "learning_rate": 1.6297144683749057e-06, - "loss": 0.9591, - "step": 6348 - }, - { - "epoch": 0.5725751905126933, - "grad_norm": 1.525179839864812, - "learning_rate": 1.629140373599153e-06, - "loss": 0.9464, - "step": 6349 - }, - { - "epoch": 0.5726653740361636, - "grad_norm": 1.4082653439303991, - "learning_rate": 1.628566310467189e-06, - "loss": 0.9542, - "step": 6350 - }, - { - "epoch": 0.5727555575596338, - "grad_norm": 1.6062307660921702, - "learning_rate": 1.6279922790279957e-06, - "loss": 0.9094, - "step": 6351 - }, - { - "epoch": 0.5728457410831042, - "grad_norm": 1.420593354812111, - "learning_rate": 1.6274182793305512e-06, - "loss": 0.9602, - "step": 6352 - }, - { - "epoch": 0.5729359246065744, - "grad_norm": 1.7843089022979122, - "learning_rate": 1.626844311423835e-06, - "loss": 0.9636, - "step": 6353 - }, - { - "epoch": 0.5730261081300446, - "grad_norm": 1.5925213434774281, - "learning_rate": 1.6262703753568181e-06, - "loss": 1.0266, - "step": 6354 - }, - { - "epoch": 0.5731162916535149, - "grad_norm": 1.7965084199567405, - "learning_rate": 1.6256964711784747e-06, - "loss": 1.0159, - "step": 6355 - }, - { - "epoch": 0.5732064751769852, - "grad_norm": 1.4729341199273367, - "learning_rate": 1.6251225989377723e-06, - "loss": 0.9608, - "step": 6356 - }, - { - "epoch": 0.5732966587004554, - "grad_norm": 1.5571447534912082, - "learning_rate": 1.624548758683676e-06, - "loss": 0.9501, - "step": 6357 - }, - { - "epoch": 0.5733868422239257, - "grad_norm": 1.4573405655934186, - "learning_rate": 1.6239749504651505e-06, - "loss": 1.039, - "step": 6358 - }, - { - "epoch": 0.5734770257473959, - "grad_norm": 1.4318930701285717, - "learning_rate": 1.6234011743311552e-06, - "loss": 0.9188, - "step": 6359 - }, - { - "epoch": 0.5735672092708662, - "grad_norm": 1.481167107689159, - "learning_rate": 1.6228274303306483e-06, - "loss": 0.8813, - "step": 6360 - }, - { - "epoch": 0.5736573927943365, - "grad_norm": 1.3202987760473863, - "learning_rate": 1.6222537185125847e-06, - "loss": 1.0259, - "step": 6361 - }, - { - "epoch": 0.5737475763178067, - "grad_norm": 1.2348747207636652, - "learning_rate": 1.6216800389259172e-06, - "loss": 0.9673, - "step": 6362 - }, - { - "epoch": 0.573837759841277, - "grad_norm": 1.3697413107702057, - "learning_rate": 1.6211063916195949e-06, - "loss": 0.9378, - "step": 6363 - }, - { - "epoch": 0.5739279433647473, - "grad_norm": 1.6402474831886094, - "learning_rate": 1.6205327766425633e-06, - "loss": 0.9458, - "step": 6364 - }, - { - "epoch": 0.5740181268882175, - "grad_norm": 1.249291553010066, - "learning_rate": 1.6199591940437689e-06, - "loss": 0.9581, - "step": 6365 - }, - { - "epoch": 0.5741083104116878, - "grad_norm": 1.2966733109239394, - "learning_rate": 1.6193856438721505e-06, - "loss": 0.9116, - "step": 6366 - }, - { - "epoch": 0.5741984939351581, - "grad_norm": 1.701507920579974, - "learning_rate": 1.6188121261766483e-06, - "loss": 0.9519, - "step": 6367 - }, - { - "epoch": 0.5742886774586283, - "grad_norm": 1.2667428248917272, - "learning_rate": 1.6182386410061976e-06, - "loss": 0.8748, - "step": 6368 - }, - { - "epoch": 0.5743788609820986, - "grad_norm": 1.6219802822273914, - "learning_rate": 1.61766518840973e-06, - "loss": 0.8586, - "step": 6369 - }, - { - "epoch": 0.5744690445055688, - "grad_norm": 1.3935947211970352, - "learning_rate": 1.6170917684361779e-06, - "loss": 0.8234, - "step": 6370 - }, - { - "epoch": 0.5745592280290391, - "grad_norm": 1.4118368431902997, - "learning_rate": 1.6165183811344662e-06, - "loss": 1.0257, - "step": 6371 - }, - { - "epoch": 0.5746494115525094, - "grad_norm": 1.6142468091493938, - "learning_rate": 1.6159450265535218e-06, - "loss": 0.9359, - "step": 6372 - }, - { - "epoch": 0.5747395950759796, - "grad_norm": 1.3065434577975765, - "learning_rate": 1.6153717047422652e-06, - "loss": 1.0286, - "step": 6373 - }, - { - "epoch": 0.5748297785994498, - "grad_norm": 2.0135107262673575, - "learning_rate": 1.6147984157496155e-06, - "loss": 0.8317, - "step": 6374 - }, - { - "epoch": 0.5749199621229202, - "grad_norm": 1.3254846244418126, - "learning_rate": 1.6142251596244886e-06, - "loss": 0.8775, - "step": 6375 - }, - { - "epoch": 0.5750101456463904, - "grad_norm": 1.3772367144802677, - "learning_rate": 1.6136519364157983e-06, - "loss": 0.9987, - "step": 6376 - }, - { - "epoch": 0.5751003291698606, - "grad_norm": 1.5258603123867107, - "learning_rate": 1.6130787461724555e-06, - "loss": 0.8878, - "step": 6377 - }, - { - "epoch": 0.5751905126933309, - "grad_norm": 1.3651968278210387, - "learning_rate": 1.6125055889433679e-06, - "loss": 1.0195, - "step": 6378 - }, - { - "epoch": 0.5752806962168012, - "grad_norm": 1.212516542695447, - "learning_rate": 1.6119324647774386e-06, - "loss": 0.9751, - "step": 6379 - }, - { - "epoch": 0.5753708797402715, - "grad_norm": 1.5876360286794822, - "learning_rate": 1.6113593737235724e-06, - "loss": 0.9619, - "step": 6380 - }, - { - "epoch": 0.5754610632637417, - "grad_norm": 1.6206162818118097, - "learning_rate": 1.6107863158306665e-06, - "loss": 0.8947, - "step": 6381 - }, - { - "epoch": 0.5755512467872119, - "grad_norm": 1.5548881516294857, - "learning_rate": 1.610213291147619e-06, - "loss": 0.8391, - "step": 6382 - }, - { - "epoch": 0.5756414303106823, - "grad_norm": 1.784486073044942, - "learning_rate": 1.609640299723322e-06, - "loss": 0.972, - "step": 6383 - }, - { - "epoch": 0.5757316138341525, - "grad_norm": 1.2695148173515816, - "learning_rate": 1.609067341606668e-06, - "loss": 0.9303, - "step": 6384 - }, - { - "epoch": 0.5758217973576227, - "grad_norm": 1.579192904989968, - "learning_rate": 1.6084944168465438e-06, - "loss": 0.939, - "step": 6385 - }, - { - "epoch": 0.5759119808810931, - "grad_norm": 1.2773977379078423, - "learning_rate": 1.6079215254918339e-06, - "loss": 0.9688, - "step": 6386 - }, - { - "epoch": 0.5760021644045633, - "grad_norm": 1.8582719367908598, - "learning_rate": 1.6073486675914222e-06, - "loss": 0.8523, - "step": 6387 - }, - { - "epoch": 0.5760923479280335, - "grad_norm": 1.569265747399692, - "learning_rate": 1.606775843194187e-06, - "loss": 0.9694, - "step": 6388 - }, - { - "epoch": 0.5761825314515038, - "grad_norm": 1.9619741549291585, - "learning_rate": 1.6062030523490053e-06, - "loss": 0.9406, - "step": 6389 - }, - { - "epoch": 0.5762727149749741, - "grad_norm": 1.5463391541518587, - "learning_rate": 1.60563029510475e-06, - "loss": 0.8704, - "step": 6390 - }, - { - "epoch": 0.5763628984984444, - "grad_norm": 0.8124466658473629, - "learning_rate": 1.6050575715102927e-06, - "loss": 0.788, - "step": 6391 - }, - { - "epoch": 0.5764530820219146, - "grad_norm": 1.56887075559753, - "learning_rate": 1.6044848816145014e-06, - "loss": 0.9975, - "step": 6392 - }, - { - "epoch": 0.5765432655453848, - "grad_norm": 1.420704034569538, - "learning_rate": 1.60391222546624e-06, - "loss": 0.9685, - "step": 6393 - }, - { - "epoch": 0.5766334490688552, - "grad_norm": 1.466374119206916, - "learning_rate": 1.6033396031143725e-06, - "loss": 0.9372, - "step": 6394 - }, - { - "epoch": 0.5767236325923254, - "grad_norm": 1.5027201148859561, - "learning_rate": 1.602767014607757e-06, - "loss": 0.9459, - "step": 6395 - }, - { - "epoch": 0.5768138161157956, - "grad_norm": 1.6734647946308936, - "learning_rate": 1.6021944599952493e-06, - "loss": 0.9612, - "step": 6396 - }, - { - "epoch": 0.5769039996392659, - "grad_norm": 1.6068706842080391, - "learning_rate": 1.6016219393257048e-06, - "loss": 0.9627, - "step": 6397 - }, - { - "epoch": 0.5769941831627362, - "grad_norm": 1.3230414544471185, - "learning_rate": 1.6010494526479722e-06, - "loss": 0.9015, - "step": 6398 - }, - { - "epoch": 0.5770843666862064, - "grad_norm": 2.2731509732725717, - "learning_rate": 1.6004770000109006e-06, - "loss": 0.8983, - "step": 6399 - }, - { - "epoch": 0.5771745502096767, - "grad_norm": 1.297634499842415, - "learning_rate": 1.5999045814633348e-06, - "loss": 0.9113, - "step": 6400 - }, - { - "epoch": 0.5772647337331469, - "grad_norm": 1.7520014353271598, - "learning_rate": 1.5993321970541151e-06, - "loss": 0.9548, - "step": 6401 - }, - { - "epoch": 0.5773549172566173, - "grad_norm": 1.6262873172216918, - "learning_rate": 1.5987598468320825e-06, - "loss": 0.9631, - "step": 6402 - }, - { - "epoch": 0.5774451007800875, - "grad_norm": 1.2846020370261755, - "learning_rate": 1.5981875308460717e-06, - "loss": 0.9389, - "step": 6403 - }, - { - "epoch": 0.5775352843035577, - "grad_norm": 1.6246360224459746, - "learning_rate": 1.5976152491449169e-06, - "loss": 0.9344, - "step": 6404 - }, - { - "epoch": 0.577625467827028, - "grad_norm": 1.4369880759873357, - "learning_rate": 1.5970430017774468e-06, - "loss": 1.0137, - "step": 6405 - }, - { - "epoch": 0.5777156513504983, - "grad_norm": 1.3408844578924088, - "learning_rate": 1.5964707887924904e-06, - "loss": 0.9635, - "step": 6406 - }, - { - "epoch": 0.5778058348739685, - "grad_norm": 1.2335538909637982, - "learning_rate": 1.5958986102388714e-06, - "loss": 0.8355, - "step": 6407 - }, - { - "epoch": 0.5778960183974388, - "grad_norm": 1.3636462753109075, - "learning_rate": 1.5953264661654104e-06, - "loss": 0.904, - "step": 6408 - }, - { - "epoch": 0.5779862019209091, - "grad_norm": 1.4499506580244605, - "learning_rate": 1.5947543566209276e-06, - "loss": 0.9889, - "step": 6409 - }, - { - "epoch": 0.5780763854443793, - "grad_norm": 1.364270307829096, - "learning_rate": 1.5941822816542367e-06, - "loss": 0.9235, - "step": 6410 - }, - { - "epoch": 0.5781665689678496, - "grad_norm": 1.3357928013656297, - "learning_rate": 1.5936102413141519e-06, - "loss": 0.9046, - "step": 6411 - }, - { - "epoch": 0.5782567524913198, - "grad_norm": 1.7522445997605862, - "learning_rate": 1.5930382356494823e-06, - "loss": 0.8745, - "step": 6412 - }, - { - "epoch": 0.5783469360147901, - "grad_norm": 1.3196718817438648, - "learning_rate": 1.5924662647090335e-06, - "loss": 0.9966, - "step": 6413 - }, - { - "epoch": 0.5784371195382604, - "grad_norm": 1.354543672558146, - "learning_rate": 1.5918943285416108e-06, - "loss": 0.9614, - "step": 6414 - }, - { - "epoch": 0.5785273030617306, - "grad_norm": 1.1826043352709201, - "learning_rate": 1.5913224271960139e-06, - "loss": 0.8819, - "step": 6415 - }, - { - "epoch": 0.5786174865852008, - "grad_norm": 1.2962620217217389, - "learning_rate": 1.590750560721041e-06, - "loss": 0.9567, - "step": 6416 - }, - { - "epoch": 0.5787076701086712, - "grad_norm": 1.2476239032409941, - "learning_rate": 1.5901787291654874e-06, - "loss": 0.9165, - "step": 6417 - }, - { - "epoch": 0.5787978536321414, - "grad_norm": 1.305586225519142, - "learning_rate": 1.5896069325781435e-06, - "loss": 0.8187, - "step": 6418 - }, - { - "epoch": 0.5788880371556117, - "grad_norm": 1.347872730504039, - "learning_rate": 1.5890351710077998e-06, - "loss": 0.9775, - "step": 6419 - }, - { - "epoch": 0.5789782206790819, - "grad_norm": 1.2713199632235332, - "learning_rate": 1.5884634445032406e-06, - "loss": 0.9996, - "step": 6420 - }, - { - "epoch": 0.5790684042025522, - "grad_norm": 0.6546290451210363, - "learning_rate": 1.5878917531132501e-06, - "loss": 0.8183, - "step": 6421 - }, - { - "epoch": 0.5791585877260225, - "grad_norm": 1.7701862798610561, - "learning_rate": 1.5873200968866077e-06, - "loss": 0.8173, - "step": 6422 - }, - { - "epoch": 0.5792487712494927, - "grad_norm": 1.929265829494184, - "learning_rate": 1.586748475872089e-06, - "loss": 0.9195, - "step": 6423 - }, - { - "epoch": 0.5793389547729629, - "grad_norm": 1.404912821484876, - "learning_rate": 1.58617689011847e-06, - "loss": 0.808, - "step": 6424 - }, - { - "epoch": 0.5794291382964333, - "grad_norm": 1.4340477829903493, - "learning_rate": 1.5856053396745198e-06, - "loss": 0.9863, - "step": 6425 - }, - { - "epoch": 0.5795193218199035, - "grad_norm": 1.2774415051679162, - "learning_rate": 1.5850338245890078e-06, - "loss": 1.0146, - "step": 6426 - }, - { - "epoch": 0.5796095053433737, - "grad_norm": 1.4531176987331273, - "learning_rate": 1.5844623449106974e-06, - "loss": 0.9279, - "step": 6427 - }, - { - "epoch": 0.579699688866844, - "grad_norm": 1.433988012830359, - "learning_rate": 1.583890900688351e-06, - "loss": 0.9889, - "step": 6428 - }, - { - "epoch": 0.5797898723903143, - "grad_norm": 1.783134668045228, - "learning_rate": 1.583319491970728e-06, - "loss": 1.0158, - "step": 6429 - }, - { - "epoch": 0.5798800559137846, - "grad_norm": 1.2297517170794234, - "learning_rate": 1.5827481188065828e-06, - "loss": 0.9616, - "step": 6430 - }, - { - "epoch": 0.5799702394372548, - "grad_norm": 1.335562714769813, - "learning_rate": 1.5821767812446689e-06, - "loss": 0.9147, - "step": 6431 - }, - { - "epoch": 0.5800604229607251, - "grad_norm": 1.5159308528027016, - "learning_rate": 1.581605479333736e-06, - "loss": 1.063, - "step": 6432 - }, - { - "epoch": 0.5801506064841954, - "grad_norm": 1.548843595372178, - "learning_rate": 1.5810342131225308e-06, - "loss": 0.9577, - "step": 6433 - }, - { - "epoch": 0.5802407900076656, - "grad_norm": 1.6032996456205093, - "learning_rate": 1.580462982659797e-06, - "loss": 0.981, - "step": 6434 - }, - { - "epoch": 0.5803309735311358, - "grad_norm": 1.5708531450123497, - "learning_rate": 1.5798917879942736e-06, - "loss": 1.0102, - "step": 6435 - }, - { - "epoch": 0.5804211570546062, - "grad_norm": 1.2824463724892563, - "learning_rate": 1.5793206291747006e-06, - "loss": 0.8902, - "step": 6436 - }, - { - "epoch": 0.5805113405780764, - "grad_norm": 1.193856942544405, - "learning_rate": 1.57874950624981e-06, - "loss": 0.8841, - "step": 6437 - }, - { - "epoch": 0.5806015241015466, - "grad_norm": 1.4873699291645173, - "learning_rate": 1.5781784192683351e-06, - "loss": 0.9433, - "step": 6438 - }, - { - "epoch": 0.5806917076250169, - "grad_norm": 1.1253815639651443, - "learning_rate": 1.5776073682790033e-06, - "loss": 0.9104, - "step": 6439 - }, - { - "epoch": 0.5807818911484872, - "grad_norm": 1.5226742128317703, - "learning_rate": 1.5770363533305393e-06, - "loss": 0.9661, - "step": 6440 - }, - { - "epoch": 0.5808720746719575, - "grad_norm": 1.3991777815537294, - "learning_rate": 1.5764653744716665e-06, - "loss": 0.9374, - "step": 6441 - }, - { - "epoch": 0.5809622581954277, - "grad_norm": 1.6112346830150506, - "learning_rate": 1.575894431751103e-06, - "loss": 0.9651, - "step": 6442 - }, - { - "epoch": 0.5810524417188979, - "grad_norm": 1.413697717308678, - "learning_rate": 1.575323525217565e-06, - "loss": 0.9635, - "step": 6443 - }, - { - "epoch": 0.5811426252423683, - "grad_norm": 1.4673905269725172, - "learning_rate": 1.574752654919766e-06, - "loss": 1.0219, - "step": 6444 - }, - { - "epoch": 0.5812328087658385, - "grad_norm": 1.3404243034466992, - "learning_rate": 1.5741818209064146e-06, - "loss": 0.9208, - "step": 6445 - }, - { - "epoch": 0.5813229922893087, - "grad_norm": 1.6163821219058083, - "learning_rate": 1.5736110232262183e-06, - "loss": 0.9547, - "step": 6446 - }, - { - "epoch": 0.581413175812779, - "grad_norm": 1.6853426812919678, - "learning_rate": 1.5730402619278804e-06, - "loss": 0.9906, - "step": 6447 - }, - { - "epoch": 0.5815033593362493, - "grad_norm": 0.7312585161805449, - "learning_rate": 1.5724695370601024e-06, - "loss": 0.7818, - "step": 6448 - }, - { - "epoch": 0.5815935428597195, - "grad_norm": 2.2674735059105107, - "learning_rate": 1.5718988486715798e-06, - "loss": 0.8741, - "step": 6449 - }, - { - "epoch": 0.5816837263831898, - "grad_norm": 1.3098013245279252, - "learning_rate": 1.5713281968110087e-06, - "loss": 0.935, - "step": 6450 - }, - { - "epoch": 0.58177390990666, - "grad_norm": 1.5273087831675116, - "learning_rate": 1.5707575815270796e-06, - "loss": 0.9322, - "step": 6451 - }, - { - "epoch": 0.5818640934301303, - "grad_norm": 1.6143306247284908, - "learning_rate": 1.57018700286848e-06, - "loss": 0.9414, - "step": 6452 - }, - { - "epoch": 0.5819542769536006, - "grad_norm": 1.4200744966656933, - "learning_rate": 1.5696164608838956e-06, - "loss": 0.9632, - "step": 6453 - }, - { - "epoch": 0.5820444604770708, - "grad_norm": 2.3648798304345697, - "learning_rate": 1.5690459556220073e-06, - "loss": 0.9574, - "step": 6454 - }, - { - "epoch": 0.582134644000541, - "grad_norm": 1.513511678770971, - "learning_rate": 1.5684754871314949e-06, - "loss": 1.0069, - "step": 6455 - }, - { - "epoch": 0.5822248275240114, - "grad_norm": 1.4209003671324956, - "learning_rate": 1.5679050554610335e-06, - "loss": 0.9447, - "step": 6456 - }, - { - "epoch": 0.5823150110474816, - "grad_norm": 1.278287299934211, - "learning_rate": 1.567334660659295e-06, - "loss": 0.9491, - "step": 6457 - }, - { - "epoch": 0.5824051945709519, - "grad_norm": 1.3370301535493672, - "learning_rate": 1.5667643027749488e-06, - "loss": 0.9585, - "step": 6458 - }, - { - "epoch": 0.5824953780944222, - "grad_norm": 1.649682898828442, - "learning_rate": 1.5661939818566614e-06, - "loss": 1.0183, - "step": 6459 - }, - { - "epoch": 0.5825855616178924, - "grad_norm": 1.498592113358351, - "learning_rate": 1.5656236979530956e-06, - "loss": 0.8114, - "step": 6460 - }, - { - "epoch": 0.5826757451413627, - "grad_norm": 1.5658385658413587, - "learning_rate": 1.5650534511129106e-06, - "loss": 0.8909, - "step": 6461 - }, - { - "epoch": 0.5827659286648329, - "grad_norm": 1.5445841771011923, - "learning_rate": 1.5644832413847635e-06, - "loss": 0.9263, - "step": 6462 - }, - { - "epoch": 0.5828561121883032, - "grad_norm": 1.222347866569764, - "learning_rate": 1.5639130688173082e-06, - "loss": 0.9907, - "step": 6463 - }, - { - "epoch": 0.5829462957117735, - "grad_norm": 1.31298828125, - "learning_rate": 1.5633429334591932e-06, - "loss": 0.9188, - "step": 6464 - }, - { - "epoch": 0.5830364792352437, - "grad_norm": 1.5309297654968224, - "learning_rate": 1.562772835359068e-06, - "loss": 0.975, - "step": 6465 - }, - { - "epoch": 0.5831266627587139, - "grad_norm": 2.0702273945140273, - "learning_rate": 1.5622027745655753e-06, - "loss": 0.9151, - "step": 6466 - }, - { - "epoch": 0.5832168462821843, - "grad_norm": 1.3983143587079188, - "learning_rate": 1.561632751127355e-06, - "loss": 0.9035, - "step": 6467 - }, - { - "epoch": 0.5833070298056545, - "grad_norm": 1.5656577436179202, - "learning_rate": 1.561062765093046e-06, - "loss": 0.9939, - "step": 6468 - }, - { - "epoch": 0.5833972133291248, - "grad_norm": 1.628904493591679, - "learning_rate": 1.5604928165112817e-06, - "loss": 0.9446, - "step": 6469 - }, - { - "epoch": 0.583487396852595, - "grad_norm": 1.4325496186988358, - "learning_rate": 1.5599229054306945e-06, - "loss": 0.9358, - "step": 6470 - }, - { - "epoch": 0.5835775803760653, - "grad_norm": 1.4082660634529711, - "learning_rate": 1.5593530318999111e-06, - "loss": 0.9469, - "step": 6471 - }, - { - "epoch": 0.5836677638995356, - "grad_norm": 1.354182707862722, - "learning_rate": 1.5587831959675572e-06, - "loss": 0.9738, - "step": 6472 - }, - { - "epoch": 0.5837579474230058, - "grad_norm": 1.3472011074015173, - "learning_rate": 1.5582133976822534e-06, - "loss": 0.9283, - "step": 6473 - }, - { - "epoch": 0.583848130946476, - "grad_norm": 1.5468002455402157, - "learning_rate": 1.5576436370926185e-06, - "loss": 0.9021, - "step": 6474 - }, - { - "epoch": 0.5839383144699464, - "grad_norm": 1.1713401082327297, - "learning_rate": 1.5570739142472679e-06, - "loss": 0.9287, - "step": 6475 - }, - { - "epoch": 0.5840284979934166, - "grad_norm": 1.2502278120348602, - "learning_rate": 1.5565042291948127e-06, - "loss": 1.01, - "step": 6476 - }, - { - "epoch": 0.5841186815168868, - "grad_norm": 1.2978594157581955, - "learning_rate": 1.5559345819838624e-06, - "loss": 0.9845, - "step": 6477 - }, - { - "epoch": 0.5842088650403571, - "grad_norm": 1.1919952437542747, - "learning_rate": 1.5553649726630226e-06, - "loss": 0.9236, - "step": 6478 - }, - { - "epoch": 0.5842990485638274, - "grad_norm": 1.315879693828222, - "learning_rate": 1.5547954012808942e-06, - "loss": 0.8765, - "step": 6479 - }, - { - "epoch": 0.5843892320872976, - "grad_norm": 1.672638620696155, - "learning_rate": 1.5542258678860776e-06, - "loss": 0.9163, - "step": 6480 - }, - { - "epoch": 0.5844794156107679, - "grad_norm": 1.381440036525761, - "learning_rate": 1.553656372527167e-06, - "loss": 1.0613, - "step": 6481 - }, - { - "epoch": 0.5845695991342382, - "grad_norm": 1.7573145860565391, - "learning_rate": 1.5530869152527568e-06, - "loss": 1.0458, - "step": 6482 - }, - { - "epoch": 0.5846597826577085, - "grad_norm": 1.8941729334559605, - "learning_rate": 1.5525174961114353e-06, - "loss": 0.9629, - "step": 6483 - }, - { - "epoch": 0.5847499661811787, - "grad_norm": 1.3675079406145099, - "learning_rate": 1.5519481151517875e-06, - "loss": 0.9893, - "step": 6484 - }, - { - "epoch": 0.5848401497046489, - "grad_norm": 1.5047819684282218, - "learning_rate": 1.551378772422398e-06, - "loss": 0.9506, - "step": 6485 - }, - { - "epoch": 0.5849303332281193, - "grad_norm": 1.2533818274274273, - "learning_rate": 1.5508094679718447e-06, - "loss": 0.9547, - "step": 6486 - }, - { - "epoch": 0.5850205167515895, - "grad_norm": 1.7666550728879333, - "learning_rate": 1.5502402018487048e-06, - "loss": 0.8546, - "step": 6487 - }, - { - "epoch": 0.5851107002750597, - "grad_norm": 1.4423164450551036, - "learning_rate": 1.54967097410155e-06, - "loss": 0.9457, - "step": 6488 - }, - { - "epoch": 0.58520088379853, - "grad_norm": 1.2853505645687795, - "learning_rate": 1.5491017847789519e-06, - "loss": 0.8746, - "step": 6489 - }, - { - "epoch": 0.5852910673220003, - "grad_norm": 1.4543498629769387, - "learning_rate": 1.5485326339294755e-06, - "loss": 0.8926, - "step": 6490 - }, - { - "epoch": 0.5853812508454705, - "grad_norm": 1.4175888687613845, - "learning_rate": 1.5479635216016832e-06, - "loss": 0.9111, - "step": 6491 - }, - { - "epoch": 0.5854714343689408, - "grad_norm": 1.4185425829407057, - "learning_rate": 1.547394447844137e-06, - "loss": 0.8593, - "step": 6492 - }, - { - "epoch": 0.585561617892411, - "grad_norm": 1.4458660818269993, - "learning_rate": 1.546825412705391e-06, - "loss": 0.8661, - "step": 6493 - }, - { - "epoch": 0.5856518014158814, - "grad_norm": 1.4791892770821091, - "learning_rate": 1.5462564162340007e-06, - "loss": 0.976, - "step": 6494 - }, - { - "epoch": 0.5857419849393516, - "grad_norm": 1.6100181109434508, - "learning_rate": 1.5456874584785144e-06, - "loss": 0.946, - "step": 6495 - }, - { - "epoch": 0.5858321684628218, - "grad_norm": 1.6906969375958965, - "learning_rate": 1.5451185394874785e-06, - "loss": 0.9177, - "step": 6496 - }, - { - "epoch": 0.5859223519862921, - "grad_norm": 1.494029321451739, - "learning_rate": 1.5445496593094381e-06, - "loss": 0.9586, - "step": 6497 - }, - { - "epoch": 0.5860125355097624, - "grad_norm": 1.53687739287908, - "learning_rate": 1.5439808179929316e-06, - "loss": 0.9557, - "step": 6498 - }, - { - "epoch": 0.5861027190332326, - "grad_norm": 1.4282401841873156, - "learning_rate": 1.543412015586496e-06, - "loss": 0.9084, - "step": 6499 - }, - { - "epoch": 0.5861929025567029, - "grad_norm": 1.3888575862959822, - "learning_rate": 1.5428432521386655e-06, - "loss": 0.9128, - "step": 6500 - }, - { - "epoch": 0.5862830860801731, - "grad_norm": 1.453543079899141, - "learning_rate": 1.5422745276979688e-06, - "loss": 0.9832, - "step": 6501 - }, - { - "epoch": 0.5863732696036434, - "grad_norm": 1.3320705821922791, - "learning_rate": 1.5417058423129336e-06, - "loss": 0.9353, - "step": 6502 - }, - { - "epoch": 0.5864634531271137, - "grad_norm": 1.2900288052263014, - "learning_rate": 1.5411371960320822e-06, - "loss": 0.9813, - "step": 6503 - }, - { - "epoch": 0.5865536366505839, - "grad_norm": 1.3437570527357385, - "learning_rate": 1.5405685889039363e-06, - "loss": 0.981, - "step": 6504 - }, - { - "epoch": 0.5866438201740543, - "grad_norm": 1.465267028688815, - "learning_rate": 1.5400000209770118e-06, - "loss": 0.9401, - "step": 6505 - }, - { - "epoch": 0.5867340036975245, - "grad_norm": 1.420648905548339, - "learning_rate": 1.5394314922998208e-06, - "loss": 0.9537, - "step": 6506 - }, - { - "epoch": 0.5868241872209947, - "grad_norm": 1.9897829391718915, - "learning_rate": 1.5388630029208756e-06, - "loss": 0.9962, - "step": 6507 - }, - { - "epoch": 0.586914370744465, - "grad_norm": 1.3705786365020267, - "learning_rate": 1.5382945528886806e-06, - "loss": 0.9676, - "step": 6508 - }, - { - "epoch": 0.5870045542679353, - "grad_norm": 1.7536877196517204, - "learning_rate": 1.5377261422517412e-06, - "loss": 0.9296, - "step": 6509 - }, - { - "epoch": 0.5870947377914055, - "grad_norm": 1.3964768629605646, - "learning_rate": 1.5371577710585553e-06, - "loss": 0.9128, - "step": 6510 - }, - { - "epoch": 0.5871849213148758, - "grad_norm": 0.8204327086518503, - "learning_rate": 1.536589439357621e-06, - "loss": 0.8141, - "step": 6511 - }, - { - "epoch": 0.587275104838346, - "grad_norm": 1.3262622560874988, - "learning_rate": 1.5360211471974315e-06, - "loss": 0.9523, - "step": 6512 - }, - { - "epoch": 0.5873652883618163, - "grad_norm": 1.3915401833366203, - "learning_rate": 1.5354528946264753e-06, - "loss": 0.9971, - "step": 6513 - }, - { - "epoch": 0.5874554718852866, - "grad_norm": 1.1793728844684819, - "learning_rate": 1.5348846816932399e-06, - "loss": 0.8803, - "step": 6514 - }, - { - "epoch": 0.5875456554087568, - "grad_norm": 0.679717951125352, - "learning_rate": 1.5343165084462077e-06, - "loss": 0.7896, - "step": 6515 - }, - { - "epoch": 0.587635838932227, - "grad_norm": 1.6238168664192785, - "learning_rate": 1.5337483749338595e-06, - "loss": 0.9015, - "step": 6516 - }, - { - "epoch": 0.5877260224556974, - "grad_norm": 2.096431779708134, - "learning_rate": 1.5331802812046708e-06, - "loss": 0.9188, - "step": 6517 - }, - { - "epoch": 0.5878162059791676, - "grad_norm": 1.3893789216336714, - "learning_rate": 1.5326122273071133e-06, - "loss": 0.8022, - "step": 6518 - }, - { - "epoch": 0.5879063895026378, - "grad_norm": 1.3750716537532244, - "learning_rate": 1.532044213289659e-06, - "loss": 0.9871, - "step": 6519 - }, - { - "epoch": 0.5879965730261081, - "grad_norm": 1.4575921582093745, - "learning_rate": 1.5314762392007718e-06, - "loss": 0.8365, - "step": 6520 - }, - { - "epoch": 0.5880867565495784, - "grad_norm": 1.4315014780658495, - "learning_rate": 1.530908305088916e-06, - "loss": 1.0011, - "step": 6521 - }, - { - "epoch": 0.5881769400730487, - "grad_norm": 1.5196083657277684, - "learning_rate": 1.5303404110025501e-06, - "loss": 0.9373, - "step": 6522 - }, - { - "epoch": 0.5882671235965189, - "grad_norm": 1.2424774311403026, - "learning_rate": 1.5297725569901293e-06, - "loss": 0.9076, - "step": 6523 - }, - { - "epoch": 0.5883573071199891, - "grad_norm": 1.418155037909414, - "learning_rate": 1.5292047431001077e-06, - "loss": 0.8429, - "step": 6524 - }, - { - "epoch": 0.5884474906434595, - "grad_norm": 0.779731648328546, - "learning_rate": 1.5286369693809321e-06, - "loss": 0.8303, - "step": 6525 - }, - { - "epoch": 0.5885376741669297, - "grad_norm": 1.781526778284055, - "learning_rate": 1.5280692358810506e-06, - "loss": 0.9509, - "step": 6526 - }, - { - "epoch": 0.5886278576903999, - "grad_norm": 1.676671714105207, - "learning_rate": 1.527501542648904e-06, - "loss": 0.8968, - "step": 6527 - }, - { - "epoch": 0.5887180412138703, - "grad_norm": 1.72944690164719, - "learning_rate": 1.5269338897329308e-06, - "loss": 0.8553, - "step": 6528 - }, - { - "epoch": 0.5888082247373405, - "grad_norm": 1.4179643783291769, - "learning_rate": 1.5263662771815662e-06, - "loss": 0.9676, - "step": 6529 - }, - { - "epoch": 0.5888984082608107, - "grad_norm": 1.4699114813070948, - "learning_rate": 1.5257987050432429e-06, - "loss": 1.0188, - "step": 6530 - }, - { - "epoch": 0.588988591784281, - "grad_norm": 1.4556754914990164, - "learning_rate": 1.5252311733663887e-06, - "loss": 0.849, - "step": 6531 - }, - { - "epoch": 0.5890787753077513, - "grad_norm": 1.503212350063944, - "learning_rate": 1.5246636821994281e-06, - "loss": 0.8919, - "step": 6532 - }, - { - "epoch": 0.5891689588312216, - "grad_norm": 1.363937001610926, - "learning_rate": 1.524096231590784e-06, - "loss": 0.992, - "step": 6533 - }, - { - "epoch": 0.5892591423546918, - "grad_norm": 1.572905870952147, - "learning_rate": 1.5235288215888736e-06, - "loss": 0.9856, - "step": 6534 - }, - { - "epoch": 0.589349325878162, - "grad_norm": 1.5087015959932446, - "learning_rate": 1.5229614522421102e-06, - "loss": 0.9471, - "step": 6535 - }, - { - "epoch": 0.5894395094016324, - "grad_norm": 1.4424476893045393, - "learning_rate": 1.5223941235989071e-06, - "loss": 0.7813, - "step": 6536 - }, - { - "epoch": 0.5895296929251026, - "grad_norm": 1.313194636042246, - "learning_rate": 1.52182683570767e-06, - "loss": 0.9131, - "step": 6537 - }, - { - "epoch": 0.5896198764485728, - "grad_norm": 1.4729838120813434, - "learning_rate": 1.5212595886168046e-06, - "loss": 0.9589, - "step": 6538 - }, - { - "epoch": 0.5897100599720431, - "grad_norm": 1.407506000377271, - "learning_rate": 1.520692382374711e-06, - "loss": 0.9536, - "step": 6539 - }, - { - "epoch": 0.5898002434955134, - "grad_norm": 1.5882024517486675, - "learning_rate": 1.5201252170297854e-06, - "loss": 0.9675, - "step": 6540 - }, - { - "epoch": 0.5898904270189836, - "grad_norm": 1.7434248015998908, - "learning_rate": 1.5195580926304232e-06, - "loss": 0.8361, - "step": 6541 - }, - { - "epoch": 0.5899806105424539, - "grad_norm": 1.632467863225151, - "learning_rate": 1.5189910092250131e-06, - "loss": 0.9558, - "step": 6542 - }, - { - "epoch": 0.5900707940659241, - "grad_norm": 1.6218526144081493, - "learning_rate": 1.5184239668619427e-06, - "loss": 0.9542, - "step": 6543 - }, - { - "epoch": 0.5901609775893945, - "grad_norm": 1.4311750833819086, - "learning_rate": 1.5178569655895946e-06, - "loss": 0.947, - "step": 6544 - }, - { - "epoch": 0.5902511611128647, - "grad_norm": 1.2305190787543303, - "learning_rate": 1.5172900054563487e-06, - "loss": 0.9727, - "step": 6545 - }, - { - "epoch": 0.5903413446363349, - "grad_norm": 1.4263509369955698, - "learning_rate": 1.5167230865105814e-06, - "loss": 0.9678, - "step": 6546 - }, - { - "epoch": 0.5904315281598052, - "grad_norm": 1.6129579381738364, - "learning_rate": 1.5161562088006644e-06, - "loss": 0.8618, - "step": 6547 - }, - { - "epoch": 0.5905217116832755, - "grad_norm": 1.8519537146244909, - "learning_rate": 1.5155893723749685e-06, - "loss": 0.9759, - "step": 6548 - }, - { - "epoch": 0.5906118952067457, - "grad_norm": 1.3802046673804007, - "learning_rate": 1.5150225772818582e-06, - "loss": 0.9511, - "step": 6549 - }, - { - "epoch": 0.590702078730216, - "grad_norm": 1.4162230451532156, - "learning_rate": 1.5144558235696949e-06, - "loss": 1.0003, - "step": 6550 - }, - { - "epoch": 0.5907922622536863, - "grad_norm": 1.3740440426754352, - "learning_rate": 1.5138891112868388e-06, - "loss": 0.8606, - "step": 6551 - }, - { - "epoch": 0.5908824457771565, - "grad_norm": 0.752413997361355, - "learning_rate": 1.5133224404816433e-06, - "loss": 0.8172, - "step": 6552 - }, - { - "epoch": 0.5909726293006268, - "grad_norm": 1.6842060436091875, - "learning_rate": 1.5127558112024617e-06, - "loss": 0.8756, - "step": 6553 - }, - { - "epoch": 0.591062812824097, - "grad_norm": 0.6815667754559086, - "learning_rate": 1.5121892234976404e-06, - "loss": 0.8192, - "step": 6554 - }, - { - "epoch": 0.5911529963475673, - "grad_norm": 1.480077364677985, - "learning_rate": 1.5116226774155243e-06, - "loss": 0.9295, - "step": 6555 - }, - { - "epoch": 0.5912431798710376, - "grad_norm": 1.708861253935505, - "learning_rate": 1.5110561730044547e-06, - "loss": 0.9112, - "step": 6556 - }, - { - "epoch": 0.5913333633945078, - "grad_norm": 1.3830252877003957, - "learning_rate": 1.510489710312768e-06, - "loss": 0.9412, - "step": 6557 - }, - { - "epoch": 0.591423546917978, - "grad_norm": 1.4354051169644544, - "learning_rate": 1.5099232893887987e-06, - "loss": 0.9398, - "step": 6558 - }, - { - "epoch": 0.5915137304414484, - "grad_norm": 1.3375017700896628, - "learning_rate": 1.5093569102808758e-06, - "loss": 0.9141, - "step": 6559 - }, - { - "epoch": 0.5916039139649186, - "grad_norm": 1.6011582771611708, - "learning_rate": 1.5087905730373275e-06, - "loss": 0.9491, - "step": 6560 - }, - { - "epoch": 0.5916940974883889, - "grad_norm": 0.6728866634431655, - "learning_rate": 1.508224277706476e-06, - "loss": 0.8319, - "step": 6561 - }, - { - "epoch": 0.5917842810118591, - "grad_norm": 2.2529671495991352, - "learning_rate": 1.5076580243366399e-06, - "loss": 0.9063, - "step": 6562 - }, - { - "epoch": 0.5918744645353294, - "grad_norm": 1.6813399592159881, - "learning_rate": 1.507091812976137e-06, - "loss": 0.8536, - "step": 6563 - }, - { - "epoch": 0.5919646480587997, - "grad_norm": 1.3934720929803726, - "learning_rate": 1.5065256436732773e-06, - "loss": 0.8957, - "step": 6564 - }, - { - "epoch": 0.5920548315822699, - "grad_norm": 1.4936143534213293, - "learning_rate": 1.5059595164763717e-06, - "loss": 0.8725, - "step": 6565 - }, - { - "epoch": 0.5921450151057401, - "grad_norm": 0.6445611657800462, - "learning_rate": 1.5053934314337243e-06, - "loss": 0.8246, - "step": 6566 - }, - { - "epoch": 0.5922351986292105, - "grad_norm": 1.3118949131138111, - "learning_rate": 1.5048273885936356e-06, - "loss": 0.936, - "step": 6567 - }, - { - "epoch": 0.5923253821526807, - "grad_norm": 1.20942224020944, - "learning_rate": 1.5042613880044053e-06, - "loss": 0.8626, - "step": 6568 - }, - { - "epoch": 0.592415565676151, - "grad_norm": 1.433761961004889, - "learning_rate": 1.5036954297143265e-06, - "loss": 0.9748, - "step": 6569 - }, - { - "epoch": 0.5925057491996212, - "grad_norm": 1.179207811659944, - "learning_rate": 1.50312951377169e-06, - "loss": 0.8968, - "step": 6570 - }, - { - "epoch": 0.5925959327230915, - "grad_norm": 0.6218566287983684, - "learning_rate": 1.502563640224784e-06, - "loss": 0.7962, - "step": 6571 - }, - { - "epoch": 0.5926861162465618, - "grad_norm": 1.5089305626153908, - "learning_rate": 1.5019978091218903e-06, - "loss": 0.9402, - "step": 6572 - }, - { - "epoch": 0.592776299770032, - "grad_norm": 1.2440209443454289, - "learning_rate": 1.50143202051129e-06, - "loss": 1.0026, - "step": 6573 - }, - { - "epoch": 0.5928664832935022, - "grad_norm": 1.4134430766833768, - "learning_rate": 1.500866274441258e-06, - "loss": 0.9993, - "step": 6574 - }, - { - "epoch": 0.5929566668169726, - "grad_norm": 1.6405107730882635, - "learning_rate": 1.5003005709600682e-06, - "loss": 0.9259, - "step": 6575 - }, - { - "epoch": 0.5930468503404428, - "grad_norm": 1.3801342302498643, - "learning_rate": 1.4997349101159885e-06, - "loss": 1.0152, - "step": 6576 - }, - { - "epoch": 0.593137033863913, - "grad_norm": 1.564505244040547, - "learning_rate": 1.4991692919572854e-06, - "loss": 0.8887, - "step": 6577 - }, - { - "epoch": 0.5932272173873834, - "grad_norm": 1.4318143106933932, - "learning_rate": 1.4986037165322199e-06, - "loss": 0.9501, - "step": 6578 - }, - { - "epoch": 0.5933174009108536, - "grad_norm": 0.7187169938179832, - "learning_rate": 1.498038183889049e-06, - "loss": 0.8389, - "step": 6579 - }, - { - "epoch": 0.5934075844343238, - "grad_norm": 1.4677430617386622, - "learning_rate": 1.4974726940760292e-06, - "loss": 0.8652, - "step": 6580 - }, - { - "epoch": 0.5934977679577941, - "grad_norm": 1.4979071321922341, - "learning_rate": 1.496907247141409e-06, - "loss": 1.0097, - "step": 6581 - }, - { - "epoch": 0.5935879514812644, - "grad_norm": 0.8239907767511105, - "learning_rate": 1.4963418431334372e-06, - "loss": 0.9249, - "step": 6582 - }, - { - "epoch": 0.5936781350047347, - "grad_norm": 1.5410819964022062, - "learning_rate": 1.4957764821003566e-06, - "loss": 0.9251, - "step": 6583 - }, - { - "epoch": 0.5937683185282049, - "grad_norm": 1.4236508692123422, - "learning_rate": 1.4952111640904063e-06, - "loss": 0.9482, - "step": 6584 - }, - { - "epoch": 0.5938585020516751, - "grad_norm": 1.4219885979851183, - "learning_rate": 1.494645889151823e-06, - "loss": 0.9218, - "step": 6585 - }, - { - "epoch": 0.5939486855751455, - "grad_norm": 0.724606948396059, - "learning_rate": 1.494080657332839e-06, - "loss": 0.7761, - "step": 6586 - }, - { - "epoch": 0.5940388690986157, - "grad_norm": 2.0005279082716347, - "learning_rate": 1.4935154686816832e-06, - "loss": 0.9038, - "step": 6587 - }, - { - "epoch": 0.5941290526220859, - "grad_norm": 1.5748880255722375, - "learning_rate": 1.4929503232465802e-06, - "loss": 1.0094, - "step": 6588 - }, - { - "epoch": 0.5942192361455562, - "grad_norm": 1.3340495639583994, - "learning_rate": 1.492385221075751e-06, - "loss": 0.8575, - "step": 6589 - }, - { - "epoch": 0.5943094196690265, - "grad_norm": 1.4995466182899433, - "learning_rate": 1.4918201622174142e-06, - "loss": 0.9733, - "step": 6590 - }, - { - "epoch": 0.5943996031924967, - "grad_norm": 1.5079349606449473, - "learning_rate": 1.4912551467197827e-06, - "loss": 0.9664, - "step": 6591 - }, - { - "epoch": 0.594489786715967, - "grad_norm": 1.6567568453314483, - "learning_rate": 1.4906901746310678e-06, - "loss": 1.02, - "step": 6592 - }, - { - "epoch": 0.5945799702394372, - "grad_norm": 1.2370166759492525, - "learning_rate": 1.4901252459994757e-06, - "loss": 0.9043, - "step": 6593 - }, - { - "epoch": 0.5946701537629075, - "grad_norm": 1.420417038162256, - "learning_rate": 1.489560360873208e-06, - "loss": 0.9265, - "step": 6594 - }, - { - "epoch": 0.5947603372863778, - "grad_norm": 1.4217835281628495, - "learning_rate": 1.4889955193004659e-06, - "loss": 0.9195, - "step": 6595 - }, - { - "epoch": 0.594850520809848, - "grad_norm": 0.6800994282496691, - "learning_rate": 1.4884307213294428e-06, - "loss": 0.8005, - "step": 6596 - }, - { - "epoch": 0.5949407043333182, - "grad_norm": 1.5937387802626675, - "learning_rate": 1.4878659670083321e-06, - "loss": 0.9269, - "step": 6597 - }, - { - "epoch": 0.5950308878567886, - "grad_norm": 1.5398686810027913, - "learning_rate": 1.4873012563853208e-06, - "loss": 0.8671, - "step": 6598 - }, - { - "epoch": 0.5951210713802588, - "grad_norm": 2.2448540226771723, - "learning_rate": 1.4867365895085935e-06, - "loss": 0.9533, - "step": 6599 - }, - { - "epoch": 0.5952112549037291, - "grad_norm": 1.2752623325567465, - "learning_rate": 1.4861719664263301e-06, - "loss": 0.9185, - "step": 6600 - }, - { - "epoch": 0.5953014384271994, - "grad_norm": 1.4491558215521718, - "learning_rate": 1.485607387186708e-06, - "loss": 1.0359, - "step": 6601 - }, - { - "epoch": 0.5953916219506696, - "grad_norm": 1.551301960040332, - "learning_rate": 1.4850428518379001e-06, - "loss": 0.9373, - "step": 6602 - }, - { - "epoch": 0.5954818054741399, - "grad_norm": 1.2613176115607347, - "learning_rate": 1.4844783604280746e-06, - "loss": 0.9359, - "step": 6603 - }, - { - "epoch": 0.5955719889976101, - "grad_norm": 1.8068939196382177, - "learning_rate": 1.483913913005399e-06, - "loss": 0.8869, - "step": 6604 - }, - { - "epoch": 0.5956621725210804, - "grad_norm": 1.5081176449042635, - "learning_rate": 1.483349509618034e-06, - "loss": 0.9988, - "step": 6605 - }, - { - "epoch": 0.5957523560445507, - "grad_norm": 1.4020215357651493, - "learning_rate": 1.4827851503141367e-06, - "loss": 0.8744, - "step": 6606 - }, - { - "epoch": 0.5958425395680209, - "grad_norm": 1.357490768330975, - "learning_rate": 1.482220835141863e-06, - "loss": 0.8683, - "step": 6607 - }, - { - "epoch": 0.5959327230914911, - "grad_norm": 1.452915217525867, - "learning_rate": 1.481656564149362e-06, - "loss": 1.0183, - "step": 6608 - }, - { - "epoch": 0.5960229066149615, - "grad_norm": 2.971324727711902, - "learning_rate": 1.4810923373847818e-06, - "loss": 0.9384, - "step": 6609 - }, - { - "epoch": 0.5961130901384317, - "grad_norm": 1.4981543789602032, - "learning_rate": 1.4805281548962647e-06, - "loss": 0.9463, - "step": 6610 - }, - { - "epoch": 0.596203273661902, - "grad_norm": 1.502612620241218, - "learning_rate": 1.4799640167319488e-06, - "loss": 0.885, - "step": 6611 - }, - { - "epoch": 0.5962934571853722, - "grad_norm": 1.4865791419627155, - "learning_rate": 1.4793999229399714e-06, - "loss": 0.9281, - "step": 6612 - }, - { - "epoch": 0.5963836407088425, - "grad_norm": 1.4831701910478086, - "learning_rate": 1.4788358735684626e-06, - "loss": 1.0032, - "step": 6613 - }, - { - "epoch": 0.5964738242323128, - "grad_norm": 1.3500001289226329, - "learning_rate": 1.4782718686655514e-06, - "loss": 1.0034, - "step": 6614 - }, - { - "epoch": 0.596564007755783, - "grad_norm": 3.227230270780517, - "learning_rate": 1.4777079082793605e-06, - "loss": 0.9861, - "step": 6615 - }, - { - "epoch": 0.5966541912792532, - "grad_norm": 1.4952139795069264, - "learning_rate": 1.4771439924580108e-06, - "loss": 0.9096, - "step": 6616 - }, - { - "epoch": 0.5967443748027236, - "grad_norm": 1.239381079108199, - "learning_rate": 1.4765801212496189e-06, - "loss": 0.9797, - "step": 6617 - }, - { - "epoch": 0.5968345583261938, - "grad_norm": 1.5088748648236319, - "learning_rate": 1.4760162947022968e-06, - "loss": 0.8596, - "step": 6618 - }, - { - "epoch": 0.596924741849664, - "grad_norm": 1.4353963967688277, - "learning_rate": 1.475452512864154e-06, - "loss": 0.9295, - "step": 6619 - }, - { - "epoch": 0.5970149253731343, - "grad_norm": 1.1535933104442806, - "learning_rate": 1.4748887757832945e-06, - "loss": 0.9495, - "step": 6620 - }, - { - "epoch": 0.5971051088966046, - "grad_norm": 1.435351383106959, - "learning_rate": 1.4743250835078209e-06, - "loss": 0.9724, - "step": 6621 - }, - { - "epoch": 0.5971952924200749, - "grad_norm": 1.311483034603729, - "learning_rate": 1.4737614360858297e-06, - "loss": 0.9037, - "step": 6622 - }, - { - "epoch": 0.5972854759435451, - "grad_norm": 1.5814736219456176, - "learning_rate": 1.4731978335654133e-06, - "loss": 0.897, - "step": 6623 - }, - { - "epoch": 0.5973756594670154, - "grad_norm": 1.664400666311507, - "learning_rate": 1.4726342759946638e-06, - "loss": 0.8833, - "step": 6624 - }, - { - "epoch": 0.5974658429904857, - "grad_norm": 1.488214286038618, - "learning_rate": 1.4720707634216653e-06, - "loss": 0.9429, - "step": 6625 - }, - { - "epoch": 0.5975560265139559, - "grad_norm": 1.6605052626014394, - "learning_rate": 1.4715072958945e-06, - "loss": 0.9503, - "step": 6626 - }, - { - "epoch": 0.5976462100374261, - "grad_norm": 1.6248749171313563, - "learning_rate": 1.470943873461247e-06, - "loss": 0.9421, - "step": 6627 - }, - { - "epoch": 0.5977363935608965, - "grad_norm": 2.295397951782993, - "learning_rate": 1.470380496169979e-06, - "loss": 0.9078, - "step": 6628 - }, - { - "epoch": 0.5978265770843667, - "grad_norm": 1.477928377586723, - "learning_rate": 1.4698171640687682e-06, - "loss": 0.9367, - "step": 6629 - }, - { - "epoch": 0.5979167606078369, - "grad_norm": 1.3468549032105979, - "learning_rate": 1.4692538772056792e-06, - "loss": 0.8833, - "step": 6630 - }, - { - "epoch": 0.5980069441313072, - "grad_norm": 1.2231276215928197, - "learning_rate": 1.4686906356287772e-06, - "loss": 0.8562, - "step": 6631 - }, - { - "epoch": 0.5980971276547775, - "grad_norm": 1.3539496712564807, - "learning_rate": 1.4681274393861194e-06, - "loss": 0.9809, - "step": 6632 - }, - { - "epoch": 0.5981873111782477, - "grad_norm": 1.5325331469337966, - "learning_rate": 1.4675642885257603e-06, - "loss": 1.0356, - "step": 6633 - }, - { - "epoch": 0.598277494701718, - "grad_norm": 1.475578585795096, - "learning_rate": 1.4670011830957529e-06, - "loss": 1.0361, - "step": 6634 - }, - { - "epoch": 0.5983676782251882, - "grad_norm": 1.8186689824152957, - "learning_rate": 1.4664381231441427e-06, - "loss": 0.9075, - "step": 6635 - }, - { - "epoch": 0.5984578617486586, - "grad_norm": 1.4453482958510657, - "learning_rate": 1.4658751087189746e-06, - "loss": 0.943, - "step": 6636 - }, - { - "epoch": 0.5985480452721288, - "grad_norm": 1.5546881900958347, - "learning_rate": 1.4653121398682874e-06, - "loss": 0.9884, - "step": 6637 - }, - { - "epoch": 0.598638228795599, - "grad_norm": 1.2168277350809287, - "learning_rate": 1.4647492166401159e-06, - "loss": 0.8833, - "step": 6638 - }, - { - "epoch": 0.5987284123190693, - "grad_norm": 1.5262391762189544, - "learning_rate": 1.4641863390824934e-06, - "loss": 0.9311, - "step": 6639 - }, - { - "epoch": 0.5988185958425396, - "grad_norm": 1.413960574359878, - "learning_rate": 1.4636235072434465e-06, - "loss": 0.9348, - "step": 6640 - }, - { - "epoch": 0.5989087793660098, - "grad_norm": 1.308524833117038, - "learning_rate": 1.4630607211709994e-06, - "loss": 0.965, - "step": 6641 - }, - { - "epoch": 0.5989989628894801, - "grad_norm": 1.313220870645714, - "learning_rate": 1.4624979809131723e-06, - "loss": 0.9497, - "step": 6642 - }, - { - "epoch": 0.5990891464129503, - "grad_norm": 1.5822688171598622, - "learning_rate": 1.4619352865179814e-06, - "loss": 0.8519, - "step": 6643 - }, - { - "epoch": 0.5991793299364206, - "grad_norm": 1.633218742100687, - "learning_rate": 1.4613726380334391e-06, - "loss": 0.9156, - "step": 6644 - }, - { - "epoch": 0.5992695134598909, - "grad_norm": 1.6454057339211448, - "learning_rate": 1.4608100355075522e-06, - "loss": 0.7976, - "step": 6645 - }, - { - "epoch": 0.5993596969833611, - "grad_norm": 0.9359673369528396, - "learning_rate": 1.460247478988327e-06, - "loss": 0.8026, - "step": 6646 - }, - { - "epoch": 0.5994498805068315, - "grad_norm": 1.3008317908930183, - "learning_rate": 1.4596849685237623e-06, - "loss": 0.9507, - "step": 6647 - }, - { - "epoch": 0.5995400640303017, - "grad_norm": 1.4176759864243067, - "learning_rate": 1.459122504161856e-06, - "loss": 0.8493, - "step": 6648 - }, - { - "epoch": 0.5996302475537719, - "grad_norm": 1.3499697522907372, - "learning_rate": 1.4585600859506001e-06, - "loss": 0.9116, - "step": 6649 - }, - { - "epoch": 0.5997204310772422, - "grad_norm": 1.5796659141748561, - "learning_rate": 1.4579977139379826e-06, - "loss": 1.0006, - "step": 6650 - }, - { - "epoch": 0.5998106146007125, - "grad_norm": 1.4891523079943936, - "learning_rate": 1.4574353881719895e-06, - "loss": 1.0102, - "step": 6651 - }, - { - "epoch": 0.5999007981241827, - "grad_norm": 1.7028378585800603, - "learning_rate": 1.4568731087005998e-06, - "loss": 0.9439, - "step": 6652 - }, - { - "epoch": 0.599990981647653, - "grad_norm": 1.2019822775126123, - "learning_rate": 1.4563108755717916e-06, - "loss": 0.9598, - "step": 6653 - }, - { - "epoch": 0.6000811651711232, - "grad_norm": 1.4432445681780477, - "learning_rate": 1.455748688833538e-06, - "loss": 0.9615, - "step": 6654 - }, - { - "epoch": 0.6001713486945935, - "grad_norm": 1.4717233470371507, - "learning_rate": 1.4551865485338065e-06, - "loss": 0.8515, - "step": 6655 - }, - { - "epoch": 0.6002615322180638, - "grad_norm": 1.3466197132590019, - "learning_rate": 1.4546244547205629e-06, - "loss": 1.0034, - "step": 6656 - }, - { - "epoch": 0.600351715741534, - "grad_norm": 1.3877454798285844, - "learning_rate": 1.4540624074417678e-06, - "loss": 0.9614, - "step": 6657 - }, - { - "epoch": 0.6004418992650042, - "grad_norm": 1.3401085060939912, - "learning_rate": 1.453500406745379e-06, - "loss": 0.9972, - "step": 6658 - }, - { - "epoch": 0.6005320827884746, - "grad_norm": 1.6727169448124715, - "learning_rate": 1.4529384526793486e-06, - "loss": 1.0159, - "step": 6659 - }, - { - "epoch": 0.6006222663119448, - "grad_norm": 1.581069013166646, - "learning_rate": 1.4523765452916252e-06, - "loss": 0.9948, - "step": 6660 - }, - { - "epoch": 0.600712449835415, - "grad_norm": 1.399296629850677, - "learning_rate": 1.4518146846301554e-06, - "loss": 0.9691, - "step": 6661 - }, - { - "epoch": 0.6008026333588853, - "grad_norm": 1.5248653665115253, - "learning_rate": 1.4512528707428787e-06, - "loss": 0.9181, - "step": 6662 - }, - { - "epoch": 0.6008928168823556, - "grad_norm": 1.316260717480563, - "learning_rate": 1.4506911036777335e-06, - "loss": 0.9161, - "step": 6663 - }, - { - "epoch": 0.6009830004058259, - "grad_norm": 0.7241909330389881, - "learning_rate": 1.450129383482651e-06, - "loss": 0.7898, - "step": 6664 - }, - { - "epoch": 0.6010731839292961, - "grad_norm": 1.173624030474117, - "learning_rate": 1.4495677102055629e-06, - "loss": 0.8968, - "step": 6665 - }, - { - "epoch": 0.6011633674527663, - "grad_norm": 1.3695456957232024, - "learning_rate": 1.4490060838943924e-06, - "loss": 0.9775, - "step": 6666 - }, - { - "epoch": 0.6012535509762367, - "grad_norm": 3.1318564918350296, - "learning_rate": 1.4484445045970609e-06, - "loss": 0.941, - "step": 6667 - }, - { - "epoch": 0.6013437344997069, - "grad_norm": 1.398477777508415, - "learning_rate": 1.447882972361485e-06, - "loss": 0.9384, - "step": 6668 - }, - { - "epoch": 0.6014339180231771, - "grad_norm": 1.427446286715805, - "learning_rate": 1.4473214872355785e-06, - "loss": 0.9252, - "step": 6669 - }, - { - "epoch": 0.6015241015466475, - "grad_norm": 1.5989021349380252, - "learning_rate": 1.4467600492672508e-06, - "loss": 0.9841, - "step": 6670 - }, - { - "epoch": 0.6016142850701177, - "grad_norm": 1.540273664236046, - "learning_rate": 1.4461986585044054e-06, - "loss": 0.9188, - "step": 6671 - }, - { - "epoch": 0.601704468593588, - "grad_norm": 1.415660397927028, - "learning_rate": 1.4456373149949446e-06, - "loss": 0.9211, - "step": 6672 - }, - { - "epoch": 0.6017946521170582, - "grad_norm": 1.5583555570163505, - "learning_rate": 1.4450760187867648e-06, - "loss": 0.9487, - "step": 6673 - }, - { - "epoch": 0.6018848356405285, - "grad_norm": 1.3759442468426224, - "learning_rate": 1.4445147699277581e-06, - "loss": 0.9309, - "step": 6674 - }, - { - "epoch": 0.6019750191639988, - "grad_norm": 1.4651903073124857, - "learning_rate": 1.4439535684658154e-06, - "loss": 0.99, - "step": 6675 - }, - { - "epoch": 0.602065202687469, - "grad_norm": 0.9140935061159944, - "learning_rate": 1.44339241444882e-06, - "loss": 0.8574, - "step": 6676 - }, - { - "epoch": 0.6021553862109392, - "grad_norm": 1.4925806616067983, - "learning_rate": 1.4428313079246518e-06, - "loss": 0.8882, - "step": 6677 - }, - { - "epoch": 0.6022455697344096, - "grad_norm": 0.6988579655732278, - "learning_rate": 1.4422702489411896e-06, - "loss": 0.8181, - "step": 6678 - }, - { - "epoch": 0.6023357532578798, - "grad_norm": 1.622471676477867, - "learning_rate": 1.4417092375463043e-06, - "loss": 0.8912, - "step": 6679 - }, - { - "epoch": 0.60242593678135, - "grad_norm": 1.361636637205198, - "learning_rate": 1.441148273787866e-06, - "loss": 0.9769, - "step": 6680 - }, - { - "epoch": 0.6025161203048203, - "grad_norm": 0.8668998034833553, - "learning_rate": 1.4405873577137383e-06, - "loss": 0.8626, - "step": 6681 - }, - { - "epoch": 0.6026063038282906, - "grad_norm": 1.8085030518564846, - "learning_rate": 1.4400264893717816e-06, - "loss": 0.9535, - "step": 6682 - }, - { - "epoch": 0.6026964873517608, - "grad_norm": 1.2800128360194092, - "learning_rate": 1.4394656688098526e-06, - "loss": 0.9885, - "step": 6683 - }, - { - "epoch": 0.6027866708752311, - "grad_norm": 1.377518644604793, - "learning_rate": 1.4389048960758032e-06, - "loss": 0.9723, - "step": 6684 - }, - { - "epoch": 0.6028768543987013, - "grad_norm": 1.9375068910537736, - "learning_rate": 1.4383441712174826e-06, - "loss": 0.9737, - "step": 6685 - }, - { - "epoch": 0.6029670379221717, - "grad_norm": 1.4714151915551679, - "learning_rate": 1.4377834942827333e-06, - "loss": 0.9996, - "step": 6686 - }, - { - "epoch": 0.6030572214456419, - "grad_norm": 1.3650183772851678, - "learning_rate": 1.437222865319397e-06, - "loss": 0.8805, - "step": 6687 - }, - { - "epoch": 0.6031474049691121, - "grad_norm": 1.6755349571795608, - "learning_rate": 1.4366622843753092e-06, - "loss": 0.873, - "step": 6688 - }, - { - "epoch": 0.6032375884925824, - "grad_norm": 1.2718715752032985, - "learning_rate": 1.4361017514983006e-06, - "loss": 0.9648, - "step": 6689 - }, - { - "epoch": 0.6033277720160527, - "grad_norm": 1.520884717656751, - "learning_rate": 1.4355412667362006e-06, - "loss": 1.0019, - "step": 6690 - }, - { - "epoch": 0.6034179555395229, - "grad_norm": 1.4382895084539775, - "learning_rate": 1.4349808301368311e-06, - "loss": 0.896, - "step": 6691 - }, - { - "epoch": 0.6035081390629932, - "grad_norm": 1.6501461051063424, - "learning_rate": 1.4344204417480139e-06, - "loss": 0.982, - "step": 6692 - }, - { - "epoch": 0.6035983225864634, - "grad_norm": 1.4939833614388018, - "learning_rate": 1.4338601016175628e-06, - "loss": 0.89, - "step": 6693 - }, - { - "epoch": 0.6036885061099337, - "grad_norm": 1.2679373257186979, - "learning_rate": 1.433299809793289e-06, - "loss": 0.939, - "step": 6694 - }, - { - "epoch": 0.603778689633404, - "grad_norm": 2.2839615266302, - "learning_rate": 1.432739566323001e-06, - "loss": 0.807, - "step": 6695 - }, - { - "epoch": 0.6038688731568742, - "grad_norm": 1.5949700117730115, - "learning_rate": 1.4321793712545004e-06, - "loss": 0.9065, - "step": 6696 - }, - { - "epoch": 0.6039590566803446, - "grad_norm": 1.5967450143124955, - "learning_rate": 1.4316192246355873e-06, - "loss": 0.9793, - "step": 6697 - }, - { - "epoch": 0.6040492402038148, - "grad_norm": 0.7026733855435077, - "learning_rate": 1.4310591265140555e-06, - "loss": 0.8063, - "step": 6698 - }, - { - "epoch": 0.604139423727285, - "grad_norm": 1.3265476059723411, - "learning_rate": 1.4304990769376963e-06, - "loss": 0.9543, - "step": 6699 - }, - { - "epoch": 0.6042296072507553, - "grad_norm": 1.3217527671194793, - "learning_rate": 1.4299390759542962e-06, - "loss": 0.9316, - "step": 6700 - }, - { - "epoch": 0.6043197907742256, - "grad_norm": 1.4990705153438062, - "learning_rate": 1.4293791236116368e-06, - "loss": 0.9222, - "step": 6701 - }, - { - "epoch": 0.6044099742976958, - "grad_norm": 1.5001491631411104, - "learning_rate": 1.4288192199574978e-06, - "loss": 0.911, - "step": 6702 - }, - { - "epoch": 0.6045001578211661, - "grad_norm": 0.7820657667227318, - "learning_rate": 1.4282593650396524e-06, - "loss": 0.7617, - "step": 6703 - }, - { - "epoch": 0.6045903413446363, - "grad_norm": 1.2506184478542375, - "learning_rate": 1.4276995589058695e-06, - "loss": 1.0413, - "step": 6704 - }, - { - "epoch": 0.6046805248681066, - "grad_norm": 0.7150679690059605, - "learning_rate": 1.4271398016039168e-06, - "loss": 0.7875, - "step": 6705 - }, - { - "epoch": 0.6047707083915769, - "grad_norm": 2.3051245323517033, - "learning_rate": 1.4265800931815542e-06, - "loss": 0.9809, - "step": 6706 - }, - { - "epoch": 0.6048608919150471, - "grad_norm": 1.3355266987921453, - "learning_rate": 1.4260204336865406e-06, - "loss": 0.9492, - "step": 6707 - }, - { - "epoch": 0.6049510754385173, - "grad_norm": 1.4360176404437974, - "learning_rate": 1.4254608231666281e-06, - "loss": 0.9372, - "step": 6708 - }, - { - "epoch": 0.6050412589619877, - "grad_norm": 1.3240985647029688, - "learning_rate": 1.4249012616695661e-06, - "loss": 0.9477, - "step": 6709 - }, - { - "epoch": 0.6051314424854579, - "grad_norm": 1.3969467998033114, - "learning_rate": 1.4243417492431e-06, - "loss": 0.9507, - "step": 6710 - }, - { - "epoch": 0.6052216260089281, - "grad_norm": 1.5666429145852185, - "learning_rate": 1.4237822859349696e-06, - "loss": 0.9273, - "step": 6711 - }, - { - "epoch": 0.6053118095323984, - "grad_norm": 1.2417043550049836, - "learning_rate": 1.423222871792912e-06, - "loss": 0.9112, - "step": 6712 - }, - { - "epoch": 0.6054019930558687, - "grad_norm": 1.31368197852091, - "learning_rate": 1.4226635068646586e-06, - "loss": 1.0044, - "step": 6713 - }, - { - "epoch": 0.605492176579339, - "grad_norm": 0.690357209843404, - "learning_rate": 1.4221041911979393e-06, - "loss": 0.8029, - "step": 6714 - }, - { - "epoch": 0.6055823601028092, - "grad_norm": 1.4859821653823273, - "learning_rate": 1.4215449248404765e-06, - "loss": 0.9533, - "step": 6715 - }, - { - "epoch": 0.6056725436262794, - "grad_norm": 1.4364247654493656, - "learning_rate": 1.4209857078399896e-06, - "loss": 0.9596, - "step": 6716 - }, - { - "epoch": 0.6057627271497498, - "grad_norm": 1.5568888046406073, - "learning_rate": 1.4204265402441955e-06, - "loss": 0.9114, - "step": 6717 - }, - { - "epoch": 0.60585291067322, - "grad_norm": 1.309395433507729, - "learning_rate": 1.419867422100804e-06, - "loss": 0.8213, - "step": 6718 - }, - { - "epoch": 0.6059430941966902, - "grad_norm": 1.3476884478717088, - "learning_rate": 1.4193083534575236e-06, - "loss": 0.9291, - "step": 6719 - }, - { - "epoch": 0.6060332777201606, - "grad_norm": 1.9454519689116678, - "learning_rate": 1.4187493343620567e-06, - "loss": 0.9676, - "step": 6720 - }, - { - "epoch": 0.6061234612436308, - "grad_norm": 1.4908435621314053, - "learning_rate": 1.4181903648621006e-06, - "loss": 0.9717, - "step": 6721 - }, - { - "epoch": 0.606213644767101, - "grad_norm": 1.3886054729998296, - "learning_rate": 1.4176314450053512e-06, - "loss": 0.9702, - "step": 6722 - }, - { - "epoch": 0.6063038282905713, - "grad_norm": 1.6346784816836657, - "learning_rate": 1.4170725748394977e-06, - "loss": 0.9337, - "step": 6723 - }, - { - "epoch": 0.6063940118140416, - "grad_norm": 1.4961237095156286, - "learning_rate": 1.4165137544122266e-06, - "loss": 0.8502, - "step": 6724 - }, - { - "epoch": 0.6064841953375119, - "grad_norm": 1.9757908456975861, - "learning_rate": 1.4159549837712194e-06, - "loss": 1.0063, - "step": 6725 - }, - { - "epoch": 0.6065743788609821, - "grad_norm": 1.2408485633952147, - "learning_rate": 1.415396262964153e-06, - "loss": 0.8622, - "step": 6726 - }, - { - "epoch": 0.6066645623844523, - "grad_norm": 1.6617358064695271, - "learning_rate": 1.4148375920387016e-06, - "loss": 1.0173, - "step": 6727 - }, - { - "epoch": 0.6067547459079227, - "grad_norm": 0.733659883063379, - "learning_rate": 1.4142789710425325e-06, - "loss": 0.8108, - "step": 6728 - }, - { - "epoch": 0.6068449294313929, - "grad_norm": 1.7568562937432401, - "learning_rate": 1.4137204000233118e-06, - "loss": 0.8672, - "step": 6729 - }, - { - "epoch": 0.6069351129548631, - "grad_norm": 1.707507872221241, - "learning_rate": 1.4131618790286987e-06, - "loss": 0.9356, - "step": 6730 - }, - { - "epoch": 0.6070252964783334, - "grad_norm": 1.370432506773998, - "learning_rate": 1.4126034081063506e-06, - "loss": 0.9315, - "step": 6731 - }, - { - "epoch": 0.6071154800018037, - "grad_norm": 1.5145741388530702, - "learning_rate": 1.4120449873039186e-06, - "loss": 0.931, - "step": 6732 - }, - { - "epoch": 0.6072056635252739, - "grad_norm": 1.4500687977484845, - "learning_rate": 1.4114866166690494e-06, - "loss": 0.9761, - "step": 6733 - }, - { - "epoch": 0.6072958470487442, - "grad_norm": 1.1171531938740848, - "learning_rate": 1.4109282962493877e-06, - "loss": 0.8983, - "step": 6734 - }, - { - "epoch": 0.6073860305722144, - "grad_norm": 1.561882888999942, - "learning_rate": 1.4103700260925716e-06, - "loss": 0.9319, - "step": 6735 - }, - { - "epoch": 0.6074762140956848, - "grad_norm": 1.5575192985747366, - "learning_rate": 1.4098118062462364e-06, - "loss": 0.9798, - "step": 6736 - }, - { - "epoch": 0.607566397619155, - "grad_norm": 1.207851785314521, - "learning_rate": 1.4092536367580123e-06, - "loss": 0.9366, - "step": 6737 - }, - { - "epoch": 0.6076565811426252, - "grad_norm": 1.52053250860544, - "learning_rate": 1.4086955176755248e-06, - "loss": 0.8557, - "step": 6738 - }, - { - "epoch": 0.6077467646660955, - "grad_norm": 1.4198207897950632, - "learning_rate": 1.4081374490463964e-06, - "loss": 0.8808, - "step": 6739 - }, - { - "epoch": 0.6078369481895658, - "grad_norm": 1.7310891169632285, - "learning_rate": 1.4075794309182443e-06, - "loss": 0.9602, - "step": 6740 - }, - { - "epoch": 0.607927131713036, - "grad_norm": 1.6779090159850958, - "learning_rate": 1.407021463338682e-06, - "loss": 0.9527, - "step": 6741 - }, - { - "epoch": 0.6080173152365063, - "grad_norm": 0.6610093783349561, - "learning_rate": 1.4064635463553177e-06, - "loss": 0.7659, - "step": 6742 - }, - { - "epoch": 0.6081074987599766, - "grad_norm": 1.4796639622847412, - "learning_rate": 1.4059056800157563e-06, - "loss": 0.965, - "step": 6743 - }, - { - "epoch": 0.6081976822834468, - "grad_norm": 1.4077291550770537, - "learning_rate": 1.4053478643675982e-06, - "loss": 0.8725, - "step": 6744 - }, - { - "epoch": 0.6082878658069171, - "grad_norm": 1.3860276770257234, - "learning_rate": 1.4047900994584389e-06, - "loss": 0.8376, - "step": 6745 - }, - { - "epoch": 0.6083780493303873, - "grad_norm": 1.6375626093419362, - "learning_rate": 1.404232385335871e-06, - "loss": 0.8524, - "step": 6746 - }, - { - "epoch": 0.6084682328538576, - "grad_norm": 1.4223017628517536, - "learning_rate": 1.4036747220474806e-06, - "loss": 0.9466, - "step": 6747 - }, - { - "epoch": 0.6085584163773279, - "grad_norm": 1.4713017637210215, - "learning_rate": 1.4031171096408506e-06, - "loss": 0.8998, - "step": 6748 - }, - { - "epoch": 0.6086485999007981, - "grad_norm": 1.4126671498066639, - "learning_rate": 1.4025595481635607e-06, - "loss": 0.9652, - "step": 6749 - }, - { - "epoch": 0.6087387834242683, - "grad_norm": 1.627715849032893, - "learning_rate": 1.4020020376631836e-06, - "loss": 0.9621, - "step": 6750 - }, - { - "epoch": 0.6088289669477387, - "grad_norm": 1.5823332321738053, - "learning_rate": 1.4014445781872908e-06, - "loss": 0.9607, - "step": 6751 - }, - { - "epoch": 0.6089191504712089, - "grad_norm": 1.3421515338950474, - "learning_rate": 1.4008871697834465e-06, - "loss": 1.001, - "step": 6752 - }, - { - "epoch": 0.6090093339946792, - "grad_norm": 1.4485380728329702, - "learning_rate": 1.400329812499213e-06, - "loss": 0.9735, - "step": 6753 - }, - { - "epoch": 0.6090995175181494, - "grad_norm": 1.2893454963537083, - "learning_rate": 1.3997725063821458e-06, - "loss": 0.953, - "step": 6754 - }, - { - "epoch": 0.6091897010416197, - "grad_norm": 1.4385628295638189, - "learning_rate": 1.3992152514797978e-06, - "loss": 0.9077, - "step": 6755 - }, - { - "epoch": 0.60927988456509, - "grad_norm": 0.6275048843512853, - "learning_rate": 1.398658047839718e-06, - "loss": 0.8235, - "step": 6756 - }, - { - "epoch": 0.6093700680885602, - "grad_norm": 0.6592324468903594, - "learning_rate": 1.3981008955094481e-06, - "loss": 0.7911, - "step": 6757 - }, - { - "epoch": 0.6094602516120304, - "grad_norm": 1.18970916447755, - "learning_rate": 1.39754379453653e-06, - "loss": 0.9157, - "step": 6758 - }, - { - "epoch": 0.6095504351355008, - "grad_norm": 2.008581585663647, - "learning_rate": 1.3969867449684972e-06, - "loss": 0.9488, - "step": 6759 - }, - { - "epoch": 0.609640618658971, - "grad_norm": 1.8926825558536398, - "learning_rate": 1.396429746852879e-06, - "loss": 0.9239, - "step": 6760 - }, - { - "epoch": 0.6097308021824412, - "grad_norm": 1.8312884616393115, - "learning_rate": 1.395872800237204e-06, - "loss": 0.9195, - "step": 6761 - }, - { - "epoch": 0.6098209857059115, - "grad_norm": 0.9311412696599941, - "learning_rate": 1.3953159051689918e-06, - "loss": 0.8401, - "step": 6762 - }, - { - "epoch": 0.6099111692293818, - "grad_norm": 1.4486053073303926, - "learning_rate": 1.3947590616957618e-06, - "loss": 0.9821, - "step": 6763 - }, - { - "epoch": 0.610001352752852, - "grad_norm": 1.5513309302109903, - "learning_rate": 1.3942022698650258e-06, - "loss": 0.9788, - "step": 6764 - }, - { - "epoch": 0.6100915362763223, - "grad_norm": 1.3849868920459938, - "learning_rate": 1.3936455297242917e-06, - "loss": 0.974, - "step": 6765 - }, - { - "epoch": 0.6101817197997926, - "grad_norm": 1.4371232907491691, - "learning_rate": 1.3930888413210652e-06, - "loss": 0.951, - "step": 6766 - }, - { - "epoch": 0.6102719033232629, - "grad_norm": 1.5307836795673517, - "learning_rate": 1.392532204702845e-06, - "loss": 0.9104, - "step": 6767 - }, - { - "epoch": 0.6103620868467331, - "grad_norm": 1.7534655588692774, - "learning_rate": 1.3919756199171266e-06, - "loss": 0.8648, - "step": 6768 - }, - { - "epoch": 0.6104522703702033, - "grad_norm": 1.2216349958726906, - "learning_rate": 1.3914190870114009e-06, - "loss": 1.0149, - "step": 6769 - }, - { - "epoch": 0.6105424538936737, - "grad_norm": 1.3376977489135673, - "learning_rate": 1.3908626060331541e-06, - "loss": 0.9861, - "step": 6770 - }, - { - "epoch": 0.6106326374171439, - "grad_norm": 1.3512884314168392, - "learning_rate": 1.3903061770298693e-06, - "loss": 0.9694, - "step": 6771 - }, - { - "epoch": 0.6107228209406141, - "grad_norm": 1.396554243567408, - "learning_rate": 1.3897498000490223e-06, - "loss": 0.9107, - "step": 6772 - }, - { - "epoch": 0.6108130044640844, - "grad_norm": 2.477561484458305, - "learning_rate": 1.3891934751380879e-06, - "loss": 1.007, - "step": 6773 - }, - { - "epoch": 0.6109031879875547, - "grad_norm": 1.3352413036561062, - "learning_rate": 1.3886372023445334e-06, - "loss": 0.953, - "step": 6774 - }, - { - "epoch": 0.610993371511025, - "grad_norm": 1.2978954207312747, - "learning_rate": 1.3880809817158246e-06, - "loss": 1.0092, - "step": 6775 - }, - { - "epoch": 0.6110835550344952, - "grad_norm": 1.5529965353323465, - "learning_rate": 1.3875248132994206e-06, - "loss": 0.9336, - "step": 6776 - }, - { - "epoch": 0.6111737385579654, - "grad_norm": 1.538844369059945, - "learning_rate": 1.386968697142776e-06, - "loss": 0.8662, - "step": 6777 - }, - { - "epoch": 0.6112639220814358, - "grad_norm": 1.319543377474428, - "learning_rate": 1.386412633293343e-06, - "loss": 0.9868, - "step": 6778 - }, - { - "epoch": 0.611354105604906, - "grad_norm": 1.5325800510426866, - "learning_rate": 1.3858566217985672e-06, - "loss": 0.9144, - "step": 6779 - }, - { - "epoch": 0.6114442891283762, - "grad_norm": 1.5370958029548931, - "learning_rate": 1.3853006627058905e-06, - "loss": 1.0267, - "step": 6780 - }, - { - "epoch": 0.6115344726518465, - "grad_norm": 1.7454261954490558, - "learning_rate": 1.3847447560627512e-06, - "loss": 0.8276, - "step": 6781 - }, - { - "epoch": 0.6116246561753168, - "grad_norm": 1.4635722834777356, - "learning_rate": 1.3841889019165812e-06, - "loss": 0.9855, - "step": 6782 - }, - { - "epoch": 0.611714839698787, - "grad_norm": 1.8542945117666174, - "learning_rate": 1.3836331003148106e-06, - "loss": 0.8952, - "step": 6783 - }, - { - "epoch": 0.6118050232222573, - "grad_norm": 1.2761490393896664, - "learning_rate": 1.3830773513048612e-06, - "loss": 0.9845, - "step": 6784 - }, - { - "epoch": 0.6118952067457275, - "grad_norm": 1.5283444923706235, - "learning_rate": 1.382521654934155e-06, - "loss": 0.9619, - "step": 6785 - }, - { - "epoch": 0.6119853902691978, - "grad_norm": 1.7375510064192208, - "learning_rate": 1.3819660112501057e-06, - "loss": 0.8903, - "step": 6786 - }, - { - "epoch": 0.6120755737926681, - "grad_norm": 1.4436688123722436, - "learning_rate": 1.3814104203001234e-06, - "loss": 0.9088, - "step": 6787 - }, - { - "epoch": 0.6121657573161383, - "grad_norm": 1.4439667080821783, - "learning_rate": 1.3808548821316156e-06, - "loss": 0.9849, - "step": 6788 - }, - { - "epoch": 0.6122559408396087, - "grad_norm": 1.2401321969782317, - "learning_rate": 1.3802993967919824e-06, - "loss": 0.8731, - "step": 6789 - }, - { - "epoch": 0.6123461243630789, - "grad_norm": 1.7091395716361615, - "learning_rate": 1.3797439643286227e-06, - "loss": 0.9029, - "step": 6790 - }, - { - "epoch": 0.6124363078865491, - "grad_norm": 1.2684889050255235, - "learning_rate": 1.3791885847889277e-06, - "loss": 0.9259, - "step": 6791 - }, - { - "epoch": 0.6125264914100194, - "grad_norm": 1.4388605396484306, - "learning_rate": 1.3786332582202853e-06, - "loss": 0.9735, - "step": 6792 - }, - { - "epoch": 0.6126166749334897, - "grad_norm": 1.3145992656937275, - "learning_rate": 1.3780779846700799e-06, - "loss": 0.9441, - "step": 6793 - }, - { - "epoch": 0.6127068584569599, - "grad_norm": 1.2825324455603844, - "learning_rate": 1.3775227641856899e-06, - "loss": 0.9527, - "step": 6794 - }, - { - "epoch": 0.6127970419804302, - "grad_norm": 1.3518354989117052, - "learning_rate": 1.37696759681449e-06, - "loss": 0.9439, - "step": 6795 - }, - { - "epoch": 0.6128872255039004, - "grad_norm": 0.7429209247523423, - "learning_rate": 1.37641248260385e-06, - "loss": 0.8446, - "step": 6796 - }, - { - "epoch": 0.6129774090273707, - "grad_norm": 1.4990066577084058, - "learning_rate": 1.375857421601136e-06, - "loss": 0.9921, - "step": 6797 - }, - { - "epoch": 0.613067592550841, - "grad_norm": 1.4717574475558244, - "learning_rate": 1.3753024138537082e-06, - "loss": 0.9498, - "step": 6798 - }, - { - "epoch": 0.6131577760743112, - "grad_norm": 1.279219530402859, - "learning_rate": 1.3747474594089221e-06, - "loss": 1.0064, - "step": 6799 - }, - { - "epoch": 0.6132479595977814, - "grad_norm": 1.6602553394141586, - "learning_rate": 1.374192558314131e-06, - "loss": 0.9202, - "step": 6800 - }, - { - "epoch": 0.6133381431212518, - "grad_norm": 1.805447703180651, - "learning_rate": 1.373637710616681e-06, - "loss": 1.0045, - "step": 6801 - }, - { - "epoch": 0.613428326644722, - "grad_norm": 1.6442639638833587, - "learning_rate": 1.373082916363916e-06, - "loss": 1.0426, - "step": 6802 - }, - { - "epoch": 0.6135185101681923, - "grad_norm": 1.357317320824462, - "learning_rate": 1.3725281756031732e-06, - "loss": 0.9842, - "step": 6803 - }, - { - "epoch": 0.6136086936916625, - "grad_norm": 1.2303149899279975, - "learning_rate": 1.3719734883817858e-06, - "loss": 0.9391, - "step": 6804 - }, - { - "epoch": 0.6136988772151328, - "grad_norm": 1.4476102966821494, - "learning_rate": 1.371418854747084e-06, - "loss": 1.0585, - "step": 6805 - }, - { - "epoch": 0.6137890607386031, - "grad_norm": 5.44868949042666, - "learning_rate": 1.3708642747463905e-06, - "loss": 0.9796, - "step": 6806 - }, - { - "epoch": 0.6138792442620733, - "grad_norm": 1.3561376410848118, - "learning_rate": 1.370309748427027e-06, - "loss": 0.979, - "step": 6807 - }, - { - "epoch": 0.6139694277855435, - "grad_norm": 1.4443029228393756, - "learning_rate": 1.3697552758363079e-06, - "loss": 1.0229, - "step": 6808 - }, - { - "epoch": 0.6140596113090139, - "grad_norm": 1.7103374944554615, - "learning_rate": 1.3692008570215432e-06, - "loss": 0.9466, - "step": 6809 - }, - { - "epoch": 0.6141497948324841, - "grad_norm": 1.3956892404151244, - "learning_rate": 1.3686464920300398e-06, - "loss": 0.8967, - "step": 6810 - }, - { - "epoch": 0.6142399783559543, - "grad_norm": 1.8773347305925725, - "learning_rate": 1.3680921809090985e-06, - "loss": 0.9204, - "step": 6811 - }, - { - "epoch": 0.6143301618794246, - "grad_norm": 1.473251910369116, - "learning_rate": 1.3675379237060175e-06, - "loss": 0.9783, - "step": 6812 - }, - { - "epoch": 0.6144203454028949, - "grad_norm": 1.3741268940512354, - "learning_rate": 1.366983720468088e-06, - "loss": 0.9377, - "step": 6813 - }, - { - "epoch": 0.6145105289263652, - "grad_norm": 1.1825438523175849, - "learning_rate": 1.3664295712425972e-06, - "loss": 0.9497, - "step": 6814 - }, - { - "epoch": 0.6146007124498354, - "grad_norm": 1.410788523771129, - "learning_rate": 1.3658754760768296e-06, - "loss": 0.8822, - "step": 6815 - }, - { - "epoch": 0.6146908959733057, - "grad_norm": 1.4143888823956907, - "learning_rate": 1.3653214350180621e-06, - "loss": 0.9854, - "step": 6816 - }, - { - "epoch": 0.614781079496776, - "grad_norm": 1.3710180625782997, - "learning_rate": 1.3647674481135703e-06, - "loss": 1.0157, - "step": 6817 - }, - { - "epoch": 0.6148712630202462, - "grad_norm": 1.240322464345215, - "learning_rate": 1.3642135154106217e-06, - "loss": 0.8452, - "step": 6818 - }, - { - "epoch": 0.6149614465437164, - "grad_norm": 1.3874614177528932, - "learning_rate": 1.363659636956482e-06, - "loss": 0.8926, - "step": 6819 - }, - { - "epoch": 0.6150516300671868, - "grad_norm": 1.2951515880072353, - "learning_rate": 1.3631058127984112e-06, - "loss": 0.9095, - "step": 6820 - }, - { - "epoch": 0.615141813590657, - "grad_norm": 1.5492106827503254, - "learning_rate": 1.3625520429836632e-06, - "loss": 0.9241, - "step": 6821 - }, - { - "epoch": 0.6152319971141272, - "grad_norm": 1.56641652396393, - "learning_rate": 1.361998327559491e-06, - "loss": 0.8529, - "step": 6822 - }, - { - "epoch": 0.6153221806375975, - "grad_norm": 0.6535483562438013, - "learning_rate": 1.3614446665731385e-06, - "loss": 0.8057, - "step": 6823 - }, - { - "epoch": 0.6154123641610678, - "grad_norm": 1.592659446243796, - "learning_rate": 1.3608910600718484e-06, - "loss": 0.9535, - "step": 6824 - }, - { - "epoch": 0.615502547684538, - "grad_norm": 1.457755882741971, - "learning_rate": 1.360337508102857e-06, - "loss": 0.9504, - "step": 6825 - }, - { - "epoch": 0.6155927312080083, - "grad_norm": 1.823152675791305, - "learning_rate": 1.3597840107133962e-06, - "loss": 1.0448, - "step": 6826 - }, - { - "epoch": 0.6156829147314785, - "grad_norm": 1.7621393145802229, - "learning_rate": 1.3592305679506944e-06, - "loss": 0.9493, - "step": 6827 - }, - { - "epoch": 0.6157730982549489, - "grad_norm": 1.3086025864032347, - "learning_rate": 1.3586771798619726e-06, - "loss": 0.8552, - "step": 6828 - }, - { - "epoch": 0.6158632817784191, - "grad_norm": 2.9549793698386244, - "learning_rate": 1.358123846494451e-06, - "loss": 1.005, - "step": 6829 - }, - { - "epoch": 0.6159534653018893, - "grad_norm": 1.9378783564149413, - "learning_rate": 1.3575705678953422e-06, - "loss": 0.9445, - "step": 6830 - }, - { - "epoch": 0.6160436488253596, - "grad_norm": 1.562736874745109, - "learning_rate": 1.357017344111854e-06, - "loss": 0.9806, - "step": 6831 - }, - { - "epoch": 0.6161338323488299, - "grad_norm": 1.631442284487044, - "learning_rate": 1.356464175191192e-06, - "loss": 0.9546, - "step": 6832 - }, - { - "epoch": 0.6162240158723001, - "grad_norm": 1.985459099149706, - "learning_rate": 1.3559110611805542e-06, - "loss": 0.9736, - "step": 6833 - }, - { - "epoch": 0.6163141993957704, - "grad_norm": 1.2957371695470405, - "learning_rate": 1.3553580021271372e-06, - "loss": 0.9858, - "step": 6834 - }, - { - "epoch": 0.6164043829192406, - "grad_norm": 1.4205314240892213, - "learning_rate": 1.3548049980781297e-06, - "loss": 0.89, - "step": 6835 - }, - { - "epoch": 0.616494566442711, - "grad_norm": 1.7749972437447652, - "learning_rate": 1.3542520490807166e-06, - "loss": 0.9404, - "step": 6836 - }, - { - "epoch": 0.6165847499661812, - "grad_norm": 1.672697987719459, - "learning_rate": 1.3536991551820802e-06, - "loss": 0.9438, - "step": 6837 - }, - { - "epoch": 0.6166749334896514, - "grad_norm": 1.2188715751849426, - "learning_rate": 1.3531463164293952e-06, - "loss": 0.92, - "step": 6838 - }, - { - "epoch": 0.6167651170131218, - "grad_norm": 1.8443089947060094, - "learning_rate": 1.3525935328698332e-06, - "loss": 0.8561, - "step": 6839 - }, - { - "epoch": 0.616855300536592, - "grad_norm": 1.566220622110638, - "learning_rate": 1.3520408045505605e-06, - "loss": 0.8824, - "step": 6840 - }, - { - "epoch": 0.6169454840600622, - "grad_norm": 1.4927749675706457, - "learning_rate": 1.3514881315187396e-06, - "loss": 0.9096, - "step": 6841 - }, - { - "epoch": 0.6170356675835325, - "grad_norm": 1.2536555719994473, - "learning_rate": 1.3509355138215273e-06, - "loss": 0.9977, - "step": 6842 - }, - { - "epoch": 0.6171258511070028, - "grad_norm": 1.4417854781980648, - "learning_rate": 1.350382951506075e-06, - "loss": 1.0255, - "step": 6843 - }, - { - "epoch": 0.617216034630473, - "grad_norm": 0.6674026187126881, - "learning_rate": 1.349830444619532e-06, - "loss": 0.7884, - "step": 6844 - }, - { - "epoch": 0.6173062181539433, - "grad_norm": 1.393224237260981, - "learning_rate": 1.3492779932090397e-06, - "loss": 0.9622, - "step": 6845 - }, - { - "epoch": 0.6173964016774135, - "grad_norm": 1.2992744310000395, - "learning_rate": 1.3487255973217377e-06, - "loss": 0.9688, - "step": 6846 - }, - { - "epoch": 0.6174865852008838, - "grad_norm": 1.5522374155594547, - "learning_rate": 1.3481732570047592e-06, - "loss": 1.0193, - "step": 6847 - }, - { - "epoch": 0.6175767687243541, - "grad_norm": 1.709996909869341, - "learning_rate": 1.3476209723052314e-06, - "loss": 0.8719, - "step": 6848 - }, - { - "epoch": 0.6176669522478243, - "grad_norm": 1.5252641730457397, - "learning_rate": 1.3470687432702806e-06, - "loss": 0.9565, - "step": 6849 - }, - { - "epoch": 0.6177571357712945, - "grad_norm": 1.561919676777885, - "learning_rate": 1.346516569947024e-06, - "loss": 0.8772, - "step": 6850 - }, - { - "epoch": 0.6178473192947649, - "grad_norm": 1.681152485568539, - "learning_rate": 1.3459644523825774e-06, - "loss": 0.9347, - "step": 6851 - }, - { - "epoch": 0.6179375028182351, - "grad_norm": 1.5185332563550025, - "learning_rate": 1.34541239062405e-06, - "loss": 0.9915, - "step": 6852 - }, - { - "epoch": 0.6180276863417054, - "grad_norm": 1.294657592501664, - "learning_rate": 1.3448603847185464e-06, - "loss": 0.9454, - "step": 6853 - }, - { - "epoch": 0.6181178698651756, - "grad_norm": 1.461756267708366, - "learning_rate": 1.344308434713168e-06, - "loss": 0.8871, - "step": 6854 - }, - { - "epoch": 0.6182080533886459, - "grad_norm": 1.6555668123589353, - "learning_rate": 1.3437565406550083e-06, - "loss": 0.885, - "step": 6855 - }, - { - "epoch": 0.6182982369121162, - "grad_norm": 2.238054780004999, - "learning_rate": 1.34320470259116e-06, - "loss": 0.9342, - "step": 6856 - }, - { - "epoch": 0.6183884204355864, - "grad_norm": 1.3780908524138435, - "learning_rate": 1.3426529205687078e-06, - "loss": 0.8471, - "step": 6857 - }, - { - "epoch": 0.6184786039590566, - "grad_norm": 1.4263142464800365, - "learning_rate": 1.3421011946347323e-06, - "loss": 1.0013, - "step": 6858 - }, - { - "epoch": 0.618568787482527, - "grad_norm": 1.3456648556786321, - "learning_rate": 1.3415495248363113e-06, - "loss": 0.9071, - "step": 6859 - }, - { - "epoch": 0.6186589710059972, - "grad_norm": 1.8463052403289952, - "learning_rate": 1.3409979112205148e-06, - "loss": 0.9694, - "step": 6860 - }, - { - "epoch": 0.6187491545294674, - "grad_norm": 1.578787334178901, - "learning_rate": 1.3404463538344107e-06, - "loss": 0.9739, - "step": 6861 - }, - { - "epoch": 0.6188393380529378, - "grad_norm": 1.7617751152681076, - "learning_rate": 1.3398948527250602e-06, - "loss": 1.0356, - "step": 6862 - }, - { - "epoch": 0.618929521576408, - "grad_norm": 0.7728793124060558, - "learning_rate": 1.3393434079395207e-06, - "loss": 0.8156, - "step": 6863 - }, - { - "epoch": 0.6190197050998782, - "grad_norm": 1.349669216391643, - "learning_rate": 1.3387920195248449e-06, - "loss": 0.9313, - "step": 6864 - }, - { - "epoch": 0.6191098886233485, - "grad_norm": 1.2412175164097135, - "learning_rate": 1.3382406875280791e-06, - "loss": 0.954, - "step": 6865 - }, - { - "epoch": 0.6192000721468188, - "grad_norm": 1.3630220882811823, - "learning_rate": 1.3376894119962672e-06, - "loss": 0.9648, - "step": 6866 - }, - { - "epoch": 0.6192902556702891, - "grad_norm": 2.6848759815408387, - "learning_rate": 1.3371381929764464e-06, - "loss": 0.9698, - "step": 6867 - }, - { - "epoch": 0.6193804391937593, - "grad_norm": 0.7257053282153273, - "learning_rate": 1.3365870305156506e-06, - "loss": 0.7926, - "step": 6868 - }, - { - "epoch": 0.6194706227172295, - "grad_norm": 1.805001830102727, - "learning_rate": 1.3360359246609073e-06, - "loss": 0.9532, - "step": 6869 - }, - { - "epoch": 0.6195608062406999, - "grad_norm": 1.6195961526543805, - "learning_rate": 1.3354848754592387e-06, - "loss": 0.9449, - "step": 6870 - }, - { - "epoch": 0.6196509897641701, - "grad_norm": 1.4504487198368452, - "learning_rate": 1.334933882957666e-06, - "loss": 1.0352, - "step": 6871 - }, - { - "epoch": 0.6197411732876403, - "grad_norm": 1.3384544799354643, - "learning_rate": 1.3343829472032004e-06, - "loss": 0.9055, - "step": 6872 - }, - { - "epoch": 0.6198313568111106, - "grad_norm": 1.5230830636119288, - "learning_rate": 1.3338320682428527e-06, - "loss": 0.9308, - "step": 6873 - }, - { - "epoch": 0.6199215403345809, - "grad_norm": 1.31897582159086, - "learning_rate": 1.3332812461236263e-06, - "loss": 0.9117, - "step": 6874 - }, - { - "epoch": 0.6200117238580511, - "grad_norm": 1.2626208219218316, - "learning_rate": 1.3327304808925192e-06, - "loss": 0.9088, - "step": 6875 - }, - { - "epoch": 0.6201019073815214, - "grad_norm": 1.5581074579860381, - "learning_rate": 1.332179772596528e-06, - "loss": 0.8049, - "step": 6876 - }, - { - "epoch": 0.6201920909049916, - "grad_norm": 1.4844638195569086, - "learning_rate": 1.3316291212826402e-06, - "loss": 0.8674, - "step": 6877 - }, - { - "epoch": 0.620282274428462, - "grad_norm": 1.5089205292642995, - "learning_rate": 1.3310785269978413e-06, - "loss": 0.9904, - "step": 6878 - }, - { - "epoch": 0.6203724579519322, - "grad_norm": 1.5728494827709614, - "learning_rate": 1.3305279897891111e-06, - "loss": 0.9878, - "step": 6879 - }, - { - "epoch": 0.6204626414754024, - "grad_norm": 1.577567804756463, - "learning_rate": 1.329977509703424e-06, - "loss": 0.8076, - "step": 6880 - }, - { - "epoch": 0.6205528249988727, - "grad_norm": 1.5706249324800088, - "learning_rate": 1.32942708678775e-06, - "loss": 1.0311, - "step": 6881 - }, - { - "epoch": 0.620643008522343, - "grad_norm": 1.427383818165798, - "learning_rate": 1.3288767210890548e-06, - "loss": 0.9523, - "step": 6882 - }, - { - "epoch": 0.6207331920458132, - "grad_norm": 1.4242260119262973, - "learning_rate": 1.3283264126542986e-06, - "loss": 0.9086, - "step": 6883 - }, - { - "epoch": 0.6208233755692835, - "grad_norm": 1.3300008653336772, - "learning_rate": 1.3277761615304356e-06, - "loss": 0.9448, - "step": 6884 - }, - { - "epoch": 0.6209135590927538, - "grad_norm": 1.1855754817421031, - "learning_rate": 1.3272259677644178e-06, - "loss": 1.0369, - "step": 6885 - }, - { - "epoch": 0.621003742616224, - "grad_norm": 1.585955164017047, - "learning_rate": 1.32667583140319e-06, - "loss": 0.8174, - "step": 6886 - }, - { - "epoch": 0.6210939261396943, - "grad_norm": 1.616248332625353, - "learning_rate": 1.3261257524936924e-06, - "loss": 0.9851, - "step": 6887 - }, - { - "epoch": 0.6211841096631645, - "grad_norm": 1.3574905487911777, - "learning_rate": 1.3255757310828619e-06, - "loss": 0.981, - "step": 6888 - }, - { - "epoch": 0.6212742931866349, - "grad_norm": 1.5449411653840708, - "learning_rate": 1.3250257672176282e-06, - "loss": 0.9645, - "step": 6889 - }, - { - "epoch": 0.6213644767101051, - "grad_norm": 1.398532629773266, - "learning_rate": 1.3244758609449183e-06, - "loss": 0.851, - "step": 6890 - }, - { - "epoch": 0.6214546602335753, - "grad_norm": 1.3835185775281587, - "learning_rate": 1.323926012311653e-06, - "loss": 0.966, - "step": 6891 - }, - { - "epoch": 0.6215448437570456, - "grad_norm": 1.4044014118530816, - "learning_rate": 1.3233762213647476e-06, - "loss": 0.9679, - "step": 6892 - }, - { - "epoch": 0.6216350272805159, - "grad_norm": 1.6569219701578135, - "learning_rate": 1.3228264881511137e-06, - "loss": 0.9143, - "step": 6893 - }, - { - "epoch": 0.6217252108039861, - "grad_norm": 1.5925002340319594, - "learning_rate": 1.322276812717658e-06, - "loss": 1.0312, - "step": 6894 - }, - { - "epoch": 0.6218153943274564, - "grad_norm": 1.262428532834323, - "learning_rate": 1.3217271951112818e-06, - "loss": 0.854, - "step": 6895 - }, - { - "epoch": 0.6219055778509266, - "grad_norm": 1.42316403489153, - "learning_rate": 1.321177635378881e-06, - "loss": 0.9241, - "step": 6896 - }, - { - "epoch": 0.6219957613743969, - "grad_norm": 1.6135047591511296, - "learning_rate": 1.3206281335673475e-06, - "loss": 0.8406, - "step": 6897 - }, - { - "epoch": 0.6220859448978672, - "grad_norm": 1.77107451049424, - "learning_rate": 1.3200786897235677e-06, - "loss": 0.9151, - "step": 6898 - }, - { - "epoch": 0.6221761284213374, - "grad_norm": 1.4208789732343814, - "learning_rate": 1.3195293038944227e-06, - "loss": 0.9684, - "step": 6899 - }, - { - "epoch": 0.6222663119448076, - "grad_norm": 1.238165624376667, - "learning_rate": 1.3189799761267902e-06, - "loss": 0.9389, - "step": 6900 - }, - { - "epoch": 0.622356495468278, - "grad_norm": 1.6900044682686328, - "learning_rate": 1.3184307064675412e-06, - "loss": 0.9422, - "step": 6901 - }, - { - "epoch": 0.6224466789917482, - "grad_norm": 0.779025802696703, - "learning_rate": 1.3178814949635416e-06, - "loss": 0.8468, - "step": 6902 - }, - { - "epoch": 0.6225368625152184, - "grad_norm": 1.8077714340042905, - "learning_rate": 1.3173323416616549e-06, - "loss": 0.9008, - "step": 6903 - }, - { - "epoch": 0.6226270460386887, - "grad_norm": 1.4631194278263848, - "learning_rate": 1.3167832466087361e-06, - "loss": 0.9894, - "step": 6904 - }, - { - "epoch": 0.622717229562159, - "grad_norm": 1.3835109520062676, - "learning_rate": 1.3162342098516388e-06, - "loss": 0.8973, - "step": 6905 - }, - { - "epoch": 0.6228074130856293, - "grad_norm": 1.3800039271976041, - "learning_rate": 1.3156852314372086e-06, - "loss": 0.8668, - "step": 6906 - }, - { - "epoch": 0.6228975966090995, - "grad_norm": 0.7942277281691048, - "learning_rate": 1.3151363114122882e-06, - "loss": 0.8034, - "step": 6907 - }, - { - "epoch": 0.6229877801325698, - "grad_norm": 1.3713921116342105, - "learning_rate": 1.3145874498237133e-06, - "loss": 0.9508, - "step": 6908 - }, - { - "epoch": 0.6230779636560401, - "grad_norm": 1.4343640495331085, - "learning_rate": 1.3140386467183166e-06, - "loss": 0.9935, - "step": 6909 - }, - { - "epoch": 0.6231681471795103, - "grad_norm": 1.448611232375159, - "learning_rate": 1.3134899021429258e-06, - "loss": 0.9858, - "step": 6910 - }, - { - "epoch": 0.6232583307029805, - "grad_norm": 1.6197346700798685, - "learning_rate": 1.3129412161443604e-06, - "loss": 0.9288, - "step": 6911 - }, - { - "epoch": 0.6233485142264509, - "grad_norm": 1.5502968903739769, - "learning_rate": 1.3123925887694402e-06, - "loss": 0.9493, - "step": 6912 - }, - { - "epoch": 0.6234386977499211, - "grad_norm": 1.2566822731674698, - "learning_rate": 1.3118440200649756e-06, - "loss": 0.9527, - "step": 6913 - }, - { - "epoch": 0.6235288812733913, - "grad_norm": 2.183113604935493, - "learning_rate": 1.3112955100777727e-06, - "loss": 0.9092, - "step": 6914 - }, - { - "epoch": 0.6236190647968616, - "grad_norm": 1.2165732384870902, - "learning_rate": 1.3107470588546353e-06, - "loss": 0.975, - "step": 6915 - }, - { - "epoch": 0.6237092483203319, - "grad_norm": 1.5092482772294114, - "learning_rate": 1.3101986664423583e-06, - "loss": 0.9254, - "step": 6916 - }, - { - "epoch": 0.6237994318438022, - "grad_norm": 1.4424186810557953, - "learning_rate": 1.3096503328877358e-06, - "loss": 0.9155, - "step": 6917 - }, - { - "epoch": 0.6238896153672724, - "grad_norm": 1.3245515658319198, - "learning_rate": 1.309102058237553e-06, - "loss": 0.9673, - "step": 6918 - }, - { - "epoch": 0.6239797988907426, - "grad_norm": 1.090521215621634, - "learning_rate": 1.3085538425385917e-06, - "loss": 0.9859, - "step": 6919 - }, - { - "epoch": 0.624069982414213, - "grad_norm": 1.8096414583963247, - "learning_rate": 1.3080056858376298e-06, - "loss": 0.9738, - "step": 6920 - }, - { - "epoch": 0.6241601659376832, - "grad_norm": 1.7647926596735304, - "learning_rate": 1.3074575881814383e-06, - "loss": 0.9282, - "step": 6921 - }, - { - "epoch": 0.6242503494611534, - "grad_norm": 1.4559209034258938, - "learning_rate": 1.3069095496167841e-06, - "loss": 0.875, - "step": 6922 - }, - { - "epoch": 0.6243405329846237, - "grad_norm": 1.3178442732861564, - "learning_rate": 1.3063615701904285e-06, - "loss": 0.8561, - "step": 6923 - }, - { - "epoch": 0.624430716508094, - "grad_norm": 2.4181795536768567, - "learning_rate": 1.3058136499491283e-06, - "loss": 0.9709, - "step": 6924 - }, - { - "epoch": 0.6245209000315642, - "grad_norm": 1.776858353619017, - "learning_rate": 1.3052657889396357e-06, - "loss": 0.9204, - "step": 6925 - }, - { - "epoch": 0.6246110835550345, - "grad_norm": 1.3973401841855335, - "learning_rate": 1.304717987208696e-06, - "loss": 0.9316, - "step": 6926 - }, - { - "epoch": 0.6247012670785047, - "grad_norm": 1.2608153708305232, - "learning_rate": 1.304170244803052e-06, - "loss": 0.9358, - "step": 6927 - }, - { - "epoch": 0.624791450601975, - "grad_norm": 1.3547527609904528, - "learning_rate": 1.3036225617694387e-06, - "loss": 0.9139, - "step": 6928 - }, - { - "epoch": 0.6248816341254453, - "grad_norm": 1.4104586881897987, - "learning_rate": 1.3030749381545892e-06, - "loss": 0.9768, - "step": 6929 - }, - { - "epoch": 0.6249718176489155, - "grad_norm": 1.7027857031265252, - "learning_rate": 1.3025273740052285e-06, - "loss": 0.8847, - "step": 6930 - }, - { - "epoch": 0.6250620011723858, - "grad_norm": 1.1986789544684682, - "learning_rate": 1.3019798693680774e-06, - "loss": 0.9904, - "step": 6931 - }, - { - "epoch": 0.6251521846958561, - "grad_norm": 1.2909798436825954, - "learning_rate": 1.3014324242898536e-06, - "loss": 1.0581, - "step": 6932 - }, - { - "epoch": 0.6252423682193263, - "grad_norm": 1.6209112225460507, - "learning_rate": 1.3008850388172668e-06, - "loss": 0.9972, - "step": 6933 - }, - { - "epoch": 0.6253325517427966, - "grad_norm": 1.3128229379727052, - "learning_rate": 1.3003377129970233e-06, - "loss": 0.9009, - "step": 6934 - }, - { - "epoch": 0.6254227352662669, - "grad_norm": 1.2748490449889176, - "learning_rate": 1.2997904468758243e-06, - "loss": 0.8662, - "step": 6935 - }, - { - "epoch": 0.6255129187897371, - "grad_norm": 2.244015257730818, - "learning_rate": 1.299243240500365e-06, - "loss": 0.9593, - "step": 6936 - }, - { - "epoch": 0.6256031023132074, - "grad_norm": 1.5237942009039063, - "learning_rate": 1.2986960939173368e-06, - "loss": 0.9607, - "step": 6937 - }, - { - "epoch": 0.6256932858366776, - "grad_norm": 1.4761585308682144, - "learning_rate": 1.298149007173424e-06, - "loss": 0.9851, - "step": 6938 - }, - { - "epoch": 0.625783469360148, - "grad_norm": 1.797991397523085, - "learning_rate": 1.2976019803153087e-06, - "loss": 0.9731, - "step": 6939 - }, - { - "epoch": 0.6258736528836182, - "grad_norm": 1.473057780684446, - "learning_rate": 1.2970550133896652e-06, - "loss": 0.89, - "step": 6940 - }, - { - "epoch": 0.6259638364070884, - "grad_norm": 1.4749854006287217, - "learning_rate": 1.2965081064431634e-06, - "loss": 1.0293, - "step": 6941 - }, - { - "epoch": 0.6260540199305586, - "grad_norm": 1.5521096178257097, - "learning_rate": 1.2959612595224698e-06, - "loss": 0.9626, - "step": 6942 - }, - { - "epoch": 0.626144203454029, - "grad_norm": 1.7074455963002015, - "learning_rate": 1.2954144726742424e-06, - "loss": 0.9561, - "step": 6943 - }, - { - "epoch": 0.6262343869774992, - "grad_norm": 1.673765521842885, - "learning_rate": 1.2948677459451385e-06, - "loss": 0.9404, - "step": 6944 - }, - { - "epoch": 0.6263245705009695, - "grad_norm": 1.297333601790502, - "learning_rate": 1.2943210793818064e-06, - "loss": 0.9908, - "step": 6945 - }, - { - "epoch": 0.6264147540244397, - "grad_norm": 1.5208282818993413, - "learning_rate": 1.2937744730308899e-06, - "loss": 0.9205, - "step": 6946 - }, - { - "epoch": 0.62650493754791, - "grad_norm": 1.7476093447942904, - "learning_rate": 1.2932279269390305e-06, - "loss": 0.9381, - "step": 6947 - }, - { - "epoch": 0.6265951210713803, - "grad_norm": 1.5977088527833627, - "learning_rate": 1.292681441152861e-06, - "loss": 0.8527, - "step": 6948 - }, - { - "epoch": 0.6266853045948505, - "grad_norm": 1.6362394300196639, - "learning_rate": 1.292135015719011e-06, - "loss": 0.9534, - "step": 6949 - }, - { - "epoch": 0.6267754881183207, - "grad_norm": 3.3587076633228405, - "learning_rate": 1.2915886506841046e-06, - "loss": 0.9065, - "step": 6950 - }, - { - "epoch": 0.6268656716417911, - "grad_norm": 1.422151559615731, - "learning_rate": 1.2910423460947613e-06, - "loss": 0.879, - "step": 6951 - }, - { - "epoch": 0.6269558551652613, - "grad_norm": 1.5484330778703568, - "learning_rate": 1.290496101997594e-06, - "loss": 0.9182, - "step": 6952 - }, - { - "epoch": 0.6270460386887315, - "grad_norm": 0.7703419786116963, - "learning_rate": 1.2899499184392105e-06, - "loss": 0.8219, - "step": 6953 - }, - { - "epoch": 0.6271362222122018, - "grad_norm": 2.9887473148092973, - "learning_rate": 1.289403795466216e-06, - "loss": 0.9179, - "step": 6954 - }, - { - "epoch": 0.6272264057356721, - "grad_norm": 1.4631068804505794, - "learning_rate": 1.288857733125207e-06, - "loss": 0.9543, - "step": 6955 - }, - { - "epoch": 0.6273165892591424, - "grad_norm": 1.4399746864796015, - "learning_rate": 1.2883117314627785e-06, - "loss": 0.9368, - "step": 6956 - }, - { - "epoch": 0.6274067727826126, - "grad_norm": 1.3758088247209987, - "learning_rate": 1.2877657905255168e-06, - "loss": 0.8807, - "step": 6957 - }, - { - "epoch": 0.6274969563060829, - "grad_norm": 1.4207931427104963, - "learning_rate": 1.2872199103600046e-06, - "loss": 1.0055, - "step": 6958 - }, - { - "epoch": 0.6275871398295532, - "grad_norm": 1.5548841649176461, - "learning_rate": 1.286674091012821e-06, - "loss": 1.0119, - "step": 6959 - }, - { - "epoch": 0.6276773233530234, - "grad_norm": 1.3114634916988346, - "learning_rate": 1.2861283325305356e-06, - "loss": 0.9967, - "step": 6960 - }, - { - "epoch": 0.6277675068764936, - "grad_norm": 1.4523057987041947, - "learning_rate": 1.2855826349597185e-06, - "loss": 0.947, - "step": 6961 - }, - { - "epoch": 0.627857690399964, - "grad_norm": 1.2865976060029973, - "learning_rate": 1.2850369983469302e-06, - "loss": 0.8078, - "step": 6962 - }, - { - "epoch": 0.6279478739234342, - "grad_norm": 1.4359205110061708, - "learning_rate": 1.2844914227387266e-06, - "loss": 0.9739, - "step": 6963 - }, - { - "epoch": 0.6280380574469044, - "grad_norm": 1.356284388093031, - "learning_rate": 1.2839459081816606e-06, - "loss": 0.9661, - "step": 6964 - }, - { - "epoch": 0.6281282409703747, - "grad_norm": 1.4509849787380844, - "learning_rate": 1.283400454722278e-06, - "loss": 1.0086, - "step": 6965 - }, - { - "epoch": 0.628218424493845, - "grad_norm": 1.3538835033832628, - "learning_rate": 1.28285506240712e-06, - "loss": 1.0104, - "step": 6966 - }, - { - "epoch": 0.6283086080173153, - "grad_norm": 1.2985696865326695, - "learning_rate": 1.2823097312827225e-06, - "loss": 0.9673, - "step": 6967 - }, - { - "epoch": 0.6283987915407855, - "grad_norm": 1.3840465750244293, - "learning_rate": 1.2817644613956153e-06, - "loss": 0.9544, - "step": 6968 - }, - { - "epoch": 0.6284889750642557, - "grad_norm": 1.488416930894889, - "learning_rate": 1.2812192527923253e-06, - "loss": 0.9997, - "step": 6969 - }, - { - "epoch": 0.6285791585877261, - "grad_norm": 1.428036179065535, - "learning_rate": 1.2806741055193712e-06, - "loss": 0.985, - "step": 6970 - }, - { - "epoch": 0.6286693421111963, - "grad_norm": 1.468046628663635, - "learning_rate": 1.2801290196232695e-06, - "loss": 0.8983, - "step": 6971 - }, - { - "epoch": 0.6287595256346665, - "grad_norm": 2.0702690839713584, - "learning_rate": 1.2795839951505282e-06, - "loss": 0.9344, - "step": 6972 - }, - { - "epoch": 0.6288497091581368, - "grad_norm": 1.5524561218842794, - "learning_rate": 1.2790390321476538e-06, - "loss": 0.9679, - "step": 6973 - }, - { - "epoch": 0.6289398926816071, - "grad_norm": 1.2829348023782503, - "learning_rate": 1.2784941306611446e-06, - "loss": 0.9226, - "step": 6974 - }, - { - "epoch": 0.6290300762050773, - "grad_norm": 5.557303412251334, - "learning_rate": 1.2779492907374935e-06, - "loss": 1.0049, - "step": 6975 - }, - { - "epoch": 0.6291202597285476, - "grad_norm": 1.39472101026412, - "learning_rate": 1.2774045124231911e-06, - "loss": 0.9336, - "step": 6976 - }, - { - "epoch": 0.6292104432520178, - "grad_norm": 1.2728157163673484, - "learning_rate": 1.2768597957647197e-06, - "loss": 0.8343, - "step": 6977 - }, - { - "epoch": 0.6293006267754881, - "grad_norm": 1.5361609823138245, - "learning_rate": 1.2763151408085582e-06, - "loss": 0.9458, - "step": 6978 - }, - { - "epoch": 0.6293908102989584, - "grad_norm": 1.3856302636872053, - "learning_rate": 1.2757705476011788e-06, - "loss": 0.9156, - "step": 6979 - }, - { - "epoch": 0.6294809938224286, - "grad_norm": 1.3408821908518482, - "learning_rate": 1.27522601618905e-06, - "loss": 0.89, - "step": 6980 - }, - { - "epoch": 0.629571177345899, - "grad_norm": 0.6250632969275506, - "learning_rate": 1.2746815466186337e-06, - "loss": 0.8196, - "step": 6981 - }, - { - "epoch": 0.6296613608693692, - "grad_norm": 1.3883410782767818, - "learning_rate": 1.274137138936387e-06, - "loss": 0.9426, - "step": 6982 - }, - { - "epoch": 0.6297515443928394, - "grad_norm": 1.53198956126947, - "learning_rate": 1.2735927931887625e-06, - "loss": 0.9439, - "step": 6983 - }, - { - "epoch": 0.6298417279163097, - "grad_norm": 0.7209597908479579, - "learning_rate": 1.2730485094222061e-06, - "loss": 0.8169, - "step": 6984 - }, - { - "epoch": 0.62993191143978, - "grad_norm": 1.8685828705116951, - "learning_rate": 1.2725042876831586e-06, - "loss": 0.9833, - "step": 6985 - }, - { - "epoch": 0.6300220949632502, - "grad_norm": 2.283351609465509, - "learning_rate": 1.2719601280180573e-06, - "loss": 0.8404, - "step": 6986 - }, - { - "epoch": 0.6301122784867205, - "grad_norm": 1.668864025549327, - "learning_rate": 1.2714160304733317e-06, - "loss": 1.0061, - "step": 6987 - }, - { - "epoch": 0.6302024620101907, - "grad_norm": 1.594326139544039, - "learning_rate": 1.2708719950954082e-06, - "loss": 0.9638, - "step": 6988 - }, - { - "epoch": 0.630292645533661, - "grad_norm": 1.4609566753577392, - "learning_rate": 1.2703280219307065e-06, - "loss": 0.9564, - "step": 6989 - }, - { - "epoch": 0.6303828290571313, - "grad_norm": 1.5032592174222696, - "learning_rate": 1.2697841110256411e-06, - "loss": 0.8794, - "step": 6990 - }, - { - "epoch": 0.6304730125806015, - "grad_norm": 1.5358955602653608, - "learning_rate": 1.2692402624266221e-06, - "loss": 0.9831, - "step": 6991 - }, - { - "epoch": 0.6305631961040717, - "grad_norm": 1.6847249278002876, - "learning_rate": 1.2686964761800529e-06, - "loss": 0.8617, - "step": 6992 - }, - { - "epoch": 0.6306533796275421, - "grad_norm": 0.6542475895325245, - "learning_rate": 1.268152752332333e-06, - "loss": 0.8026, - "step": 6993 - }, - { - "epoch": 0.6307435631510123, - "grad_norm": 1.5888956978273365, - "learning_rate": 1.2676090909298549e-06, - "loss": 0.858, - "step": 6994 - }, - { - "epoch": 0.6308337466744826, - "grad_norm": 1.3588538212140329, - "learning_rate": 1.2670654920190086e-06, - "loss": 0.9923, - "step": 6995 - }, - { - "epoch": 0.6309239301979528, - "grad_norm": 1.176946980953797, - "learning_rate": 1.2665219556461754e-06, - "loss": 0.9868, - "step": 6996 - }, - { - "epoch": 0.6310141137214231, - "grad_norm": 1.594252937097642, - "learning_rate": 1.2659784818577329e-06, - "loss": 1.0365, - "step": 6997 - }, - { - "epoch": 0.6311042972448934, - "grad_norm": 1.3543043164449573, - "learning_rate": 1.2654350707000542e-06, - "loss": 0.8269, - "step": 6998 - }, - { - "epoch": 0.6311944807683636, - "grad_norm": 1.6087277045850208, - "learning_rate": 1.264891722219505e-06, - "loss": 0.9299, - "step": 6999 - }, - { - "epoch": 0.6312846642918338, - "grad_norm": 1.5474377003641144, - "learning_rate": 1.2643484364624483e-06, - "loss": 0.9381, - "step": 7000 - }, - { - "epoch": 0.6313748478153042, - "grad_norm": 1.4988777412909307, - "learning_rate": 1.2638052134752393e-06, - "loss": 0.9951, - "step": 7001 - }, - { - "epoch": 0.6314650313387744, - "grad_norm": 1.5096293359510842, - "learning_rate": 1.2632620533042277e-06, - "loss": 1.0168, - "step": 7002 - }, - { - "epoch": 0.6315552148622446, - "grad_norm": 1.5256344335405803, - "learning_rate": 1.2627189559957612e-06, - "loss": 0.9097, - "step": 7003 - }, - { - "epoch": 0.631645398385715, - "grad_norm": 1.7047327834564963, - "learning_rate": 1.262175921596178e-06, - "loss": 0.9995, - "step": 7004 - }, - { - "epoch": 0.6317355819091852, - "grad_norm": 2.0639886974276607, - "learning_rate": 1.2616329501518137e-06, - "loss": 0.8135, - "step": 7005 - }, - { - "epoch": 0.6318257654326555, - "grad_norm": 1.484053486587615, - "learning_rate": 1.2610900417089978e-06, - "loss": 0.8639, - "step": 7006 - }, - { - "epoch": 0.6319159489561257, - "grad_norm": 1.3857517363076135, - "learning_rate": 1.2605471963140535e-06, - "loss": 0.9519, - "step": 7007 - }, - { - "epoch": 0.632006132479596, - "grad_norm": 1.4149399032231993, - "learning_rate": 1.2600044140133e-06, - "loss": 1.0145, - "step": 7008 - }, - { - "epoch": 0.6320963160030663, - "grad_norm": 1.4901505392948533, - "learning_rate": 1.2594616948530493e-06, - "loss": 0.898, - "step": 7009 - }, - { - "epoch": 0.6321864995265365, - "grad_norm": 2.2072364914247746, - "learning_rate": 1.258919038879611e-06, - "loss": 0.9722, - "step": 7010 - }, - { - "epoch": 0.6322766830500067, - "grad_norm": 1.4674828927701746, - "learning_rate": 1.2583764461392859e-06, - "loss": 0.945, - "step": 7011 - }, - { - "epoch": 0.6323668665734771, - "grad_norm": 1.29362895049136, - "learning_rate": 1.2578339166783724e-06, - "loss": 0.9054, - "step": 7012 - }, - { - "epoch": 0.6324570500969473, - "grad_norm": 1.250980135981796, - "learning_rate": 1.2572914505431613e-06, - "loss": 0.8804, - "step": 7013 - }, - { - "epoch": 0.6325472336204175, - "grad_norm": 1.3839571681781722, - "learning_rate": 1.2567490477799383e-06, - "loss": 0.8443, - "step": 7014 - }, - { - "epoch": 0.6326374171438878, - "grad_norm": 1.3713320446246342, - "learning_rate": 1.2562067084349852e-06, - "loss": 1.0306, - "step": 7015 - }, - { - "epoch": 0.6327276006673581, - "grad_norm": 1.4070841116879997, - "learning_rate": 1.2556644325545764e-06, - "loss": 0.9269, - "step": 7016 - }, - { - "epoch": 0.6328177841908283, - "grad_norm": 1.5478785178548016, - "learning_rate": 1.255122220184983e-06, - "loss": 0.9217, - "step": 7017 - }, - { - "epoch": 0.6329079677142986, - "grad_norm": 1.3577954553515428, - "learning_rate": 1.2545800713724694e-06, - "loss": 0.8581, - "step": 7018 - }, - { - "epoch": 0.6329981512377688, - "grad_norm": 1.2942039835786474, - "learning_rate": 1.254037986163294e-06, - "loss": 0.8089, - "step": 7019 - }, - { - "epoch": 0.6330883347612392, - "grad_norm": 1.384117114500481, - "learning_rate": 1.2534959646037104e-06, - "loss": 0.9047, - "step": 7020 - }, - { - "epoch": 0.6331785182847094, - "grad_norm": 1.4511201215220544, - "learning_rate": 1.2529540067399675e-06, - "loss": 0.9489, - "step": 7021 - }, - { - "epoch": 0.6332687018081796, - "grad_norm": 1.456932821349668, - "learning_rate": 1.252412112618308e-06, - "loss": 0.8826, - "step": 7022 - }, - { - "epoch": 0.6333588853316499, - "grad_norm": 1.5213977950684803, - "learning_rate": 1.2518702822849696e-06, - "loss": 0.8268, - "step": 7023 - }, - { - "epoch": 0.6334490688551202, - "grad_norm": 1.549223917819285, - "learning_rate": 1.2513285157861831e-06, - "loss": 0.8872, - "step": 7024 - }, - { - "epoch": 0.6335392523785904, - "grad_norm": 1.4563585625649844, - "learning_rate": 1.2507868131681764e-06, - "loss": 0.9334, - "step": 7025 - }, - { - "epoch": 0.6336294359020607, - "grad_norm": 1.5331161960328807, - "learning_rate": 1.250245174477169e-06, - "loss": 0.9225, - "step": 7026 - }, - { - "epoch": 0.6337196194255309, - "grad_norm": 0.7523541854816003, - "learning_rate": 1.2497035997593783e-06, - "loss": 0.7677, - "step": 7027 - }, - { - "epoch": 0.6338098029490012, - "grad_norm": 1.386450986128594, - "learning_rate": 1.2491620890610135e-06, - "loss": 0.9387, - "step": 7028 - }, - { - "epoch": 0.6338999864724715, - "grad_norm": 1.319128374534794, - "learning_rate": 1.2486206424282788e-06, - "loss": 0.9286, - "step": 7029 - }, - { - "epoch": 0.6339901699959417, - "grad_norm": 1.4819242624809186, - "learning_rate": 1.2480792599073743e-06, - "loss": 0.9952, - "step": 7030 - }, - { - "epoch": 0.634080353519412, - "grad_norm": 1.6562445298590387, - "learning_rate": 1.247537941544493e-06, - "loss": 0.9864, - "step": 7031 - }, - { - "epoch": 0.6341705370428823, - "grad_norm": 1.5184287653779955, - "learning_rate": 1.2469966873858242e-06, - "loss": 0.9701, - "step": 7032 - }, - { - "epoch": 0.6342607205663525, - "grad_norm": 1.609490547846864, - "learning_rate": 1.2464554974775496e-06, - "loss": 0.8655, - "step": 7033 - }, - { - "epoch": 0.6343509040898228, - "grad_norm": 1.4608624280928977, - "learning_rate": 1.2459143718658474e-06, - "loss": 0.9359, - "step": 7034 - }, - { - "epoch": 0.6344410876132931, - "grad_norm": 1.7917333930549852, - "learning_rate": 1.2453733105968886e-06, - "loss": 0.8767, - "step": 7035 - }, - { - "epoch": 0.6345312711367633, - "grad_norm": 0.737531856883318, - "learning_rate": 1.2448323137168399e-06, - "loss": 0.8014, - "step": 7036 - }, - { - "epoch": 0.6346214546602336, - "grad_norm": 1.2689410436956825, - "learning_rate": 1.2442913812718625e-06, - "loss": 0.8559, - "step": 7037 - }, - { - "epoch": 0.6347116381837038, - "grad_norm": 1.4482461385229315, - "learning_rate": 1.2437505133081108e-06, - "loss": 0.9222, - "step": 7038 - }, - { - "epoch": 0.6348018217071741, - "grad_norm": 1.635392409547683, - "learning_rate": 1.2432097098717358e-06, - "loss": 0.9526, - "step": 7039 - }, - { - "epoch": 0.6348920052306444, - "grad_norm": 1.5675006952847, - "learning_rate": 1.2426689710088813e-06, - "loss": 0.9463, - "step": 7040 - }, - { - "epoch": 0.6349821887541146, - "grad_norm": 1.4433297243286072, - "learning_rate": 1.2421282967656855e-06, - "loss": 0.9904, - "step": 7041 - }, - { - "epoch": 0.6350723722775848, - "grad_norm": 1.4152193342942132, - "learning_rate": 1.2415876871882827e-06, - "loss": 0.9676, - "step": 7042 - }, - { - "epoch": 0.6351625558010552, - "grad_norm": 0.7246476237851965, - "learning_rate": 1.2410471423227998e-06, - "loss": 0.8553, - "step": 7043 - }, - { - "epoch": 0.6352527393245254, - "grad_norm": 1.4235011433046423, - "learning_rate": 1.24050666221536e-06, - "loss": 0.9305, - "step": 7044 - }, - { - "epoch": 0.6353429228479957, - "grad_norm": 1.3647395425547144, - "learning_rate": 1.23996624691208e-06, - "loss": 1.003, - "step": 7045 - }, - { - "epoch": 0.6354331063714659, - "grad_norm": 1.238435609264023, - "learning_rate": 1.2394258964590693e-06, - "loss": 0.9285, - "step": 7046 - }, - { - "epoch": 0.6355232898949362, - "grad_norm": 1.6273935736154375, - "learning_rate": 1.238885610902436e-06, - "loss": 0.8921, - "step": 7047 - }, - { - "epoch": 0.6356134734184065, - "grad_norm": 1.3837236322993842, - "learning_rate": 1.2383453902882787e-06, - "loss": 0.9495, - "step": 7048 - }, - { - "epoch": 0.6357036569418767, - "grad_norm": 1.303561930052609, - "learning_rate": 1.2378052346626927e-06, - "loss": 1.0025, - "step": 7049 - }, - { - "epoch": 0.6357938404653469, - "grad_norm": 2.0217089247263957, - "learning_rate": 1.2372651440717665e-06, - "loss": 0.872, - "step": 7050 - }, - { - "epoch": 0.6358840239888173, - "grad_norm": 1.43582985093633, - "learning_rate": 1.236725118561584e-06, - "loss": 0.9742, - "step": 7051 - }, - { - "epoch": 0.6359742075122875, - "grad_norm": 1.227908276953296, - "learning_rate": 1.2361851581782232e-06, - "loss": 0.9434, - "step": 7052 - }, - { - "epoch": 0.6360643910357577, - "grad_norm": 1.3884192557534663, - "learning_rate": 1.2356452629677554e-06, - "loss": 1.0103, - "step": 7053 - }, - { - "epoch": 0.6361545745592281, - "grad_norm": 1.488142032069212, - "learning_rate": 1.2351054329762494e-06, - "loss": 0.8817, - "step": 7054 - }, - { - "epoch": 0.6362447580826983, - "grad_norm": 1.4232444456398479, - "learning_rate": 1.2345656682497648e-06, - "loss": 0.9691, - "step": 7055 - }, - { - "epoch": 0.6363349416061685, - "grad_norm": 1.292583949980028, - "learning_rate": 1.2340259688343583e-06, - "loss": 0.9196, - "step": 7056 - }, - { - "epoch": 0.6364251251296388, - "grad_norm": 1.4381587136373237, - "learning_rate": 1.2334863347760803e-06, - "loss": 0.8411, - "step": 7057 - }, - { - "epoch": 0.6365153086531091, - "grad_norm": 0.6410667245641432, - "learning_rate": 1.2329467661209734e-06, - "loss": 0.7535, - "step": 7058 - }, - { - "epoch": 0.6366054921765794, - "grad_norm": 1.4680495519564425, - "learning_rate": 1.2324072629150788e-06, - "loss": 0.98, - "step": 7059 - }, - { - "epoch": 0.6366956757000496, - "grad_norm": 1.571680336761625, - "learning_rate": 1.2318678252044287e-06, - "loss": 0.9842, - "step": 7060 - }, - { - "epoch": 0.6367858592235198, - "grad_norm": 1.6297531304900639, - "learning_rate": 1.2313284530350512e-06, - "loss": 0.9911, - "step": 7061 - }, - { - "epoch": 0.6368760427469902, - "grad_norm": 1.4581883312980157, - "learning_rate": 1.230789146452969e-06, - "loss": 0.8736, - "step": 7062 - }, - { - "epoch": 0.6369662262704604, - "grad_norm": 1.8451643869002725, - "learning_rate": 1.2302499055041974e-06, - "loss": 0.9432, - "step": 7063 - }, - { - "epoch": 0.6370564097939306, - "grad_norm": 0.6922368378632971, - "learning_rate": 1.2297107302347488e-06, - "loss": 0.8153, - "step": 7064 - }, - { - "epoch": 0.6371465933174009, - "grad_norm": 1.3638733722885776, - "learning_rate": 1.2291716206906275e-06, - "loss": 1.0067, - "step": 7065 - }, - { - "epoch": 0.6372367768408712, - "grad_norm": 1.550147861688337, - "learning_rate": 1.2286325769178345e-06, - "loss": 1.0038, - "step": 7066 - }, - { - "epoch": 0.6373269603643414, - "grad_norm": 1.513798035317042, - "learning_rate": 1.2280935989623633e-06, - "loss": 0.9632, - "step": 7067 - }, - { - "epoch": 0.6374171438878117, - "grad_norm": 1.5259793716926258, - "learning_rate": 1.2275546868702017e-06, - "loss": 0.8777, - "step": 7068 - }, - { - "epoch": 0.6375073274112819, - "grad_norm": 1.5509070049964222, - "learning_rate": 1.2270158406873341e-06, - "loss": 0.8842, - "step": 7069 - }, - { - "epoch": 0.6375975109347523, - "grad_norm": 1.4437735947831862, - "learning_rate": 1.2264770604597363e-06, - "loss": 0.988, - "step": 7070 - }, - { - "epoch": 0.6376876944582225, - "grad_norm": 2.497702591996819, - "learning_rate": 1.2259383462333819e-06, - "loss": 1.0385, - "step": 7071 - }, - { - "epoch": 0.6377778779816927, - "grad_norm": 1.5471416975594932, - "learning_rate": 1.2253996980542359e-06, - "loss": 0.8972, - "step": 7072 - }, - { - "epoch": 0.637868061505163, - "grad_norm": 2.0188830163553377, - "learning_rate": 1.2248611159682578e-06, - "loss": 0.9259, - "step": 7073 - }, - { - "epoch": 0.6379582450286333, - "grad_norm": 0.7747352578606055, - "learning_rate": 1.2243226000214044e-06, - "loss": 0.7936, - "step": 7074 - }, - { - "epoch": 0.6380484285521035, - "grad_norm": 1.6061956611674448, - "learning_rate": 1.2237841502596232e-06, - "loss": 0.9398, - "step": 7075 - }, - { - "epoch": 0.6381386120755738, - "grad_norm": 1.3251157889965868, - "learning_rate": 1.2232457667288583e-06, - "loss": 0.9349, - "step": 7076 - }, - { - "epoch": 0.6382287955990441, - "grad_norm": 1.5904901863330234, - "learning_rate": 1.2227074494750476e-06, - "loss": 0.9404, - "step": 7077 - }, - { - "epoch": 0.6383189791225143, - "grad_norm": 1.502768663451987, - "learning_rate": 1.2221691985441238e-06, - "loss": 0.972, - "step": 7078 - }, - { - "epoch": 0.6384091626459846, - "grad_norm": 1.3875470333891184, - "learning_rate": 1.2216310139820128e-06, - "loss": 0.8851, - "step": 7079 - }, - { - "epoch": 0.6384993461694548, - "grad_norm": 1.2851959500659493, - "learning_rate": 1.2210928958346347e-06, - "loss": 0.984, - "step": 7080 - }, - { - "epoch": 0.6385895296929252, - "grad_norm": 1.3441742515625876, - "learning_rate": 1.2205548441479065e-06, - "loss": 0.8672, - "step": 7081 - }, - { - "epoch": 0.6386797132163954, - "grad_norm": 1.4546090208328006, - "learning_rate": 1.2200168589677357e-06, - "loss": 0.8719, - "step": 7082 - }, - { - "epoch": 0.6387698967398656, - "grad_norm": 1.25188804135931, - "learning_rate": 1.2194789403400284e-06, - "loss": 0.9043, - "step": 7083 - }, - { - "epoch": 0.6388600802633358, - "grad_norm": 1.7286453457017812, - "learning_rate": 1.2189410883106816e-06, - "loss": 0.9211, - "step": 7084 - }, - { - "epoch": 0.6389502637868062, - "grad_norm": 1.529148606168581, - "learning_rate": 1.2184033029255872e-06, - "loss": 1.009, - "step": 7085 - }, - { - "epoch": 0.6390404473102764, - "grad_norm": 0.8029522379736092, - "learning_rate": 1.2178655842306334e-06, - "loss": 0.8182, - "step": 7086 - }, - { - "epoch": 0.6391306308337467, - "grad_norm": 1.7069577129245963, - "learning_rate": 1.2173279322716999e-06, - "loss": 0.9332, - "step": 7087 - }, - { - "epoch": 0.6392208143572169, - "grad_norm": 1.4746169740594763, - "learning_rate": 1.216790347094663e-06, - "loss": 0.939, - "step": 7088 - }, - { - "epoch": 0.6393109978806872, - "grad_norm": 1.6169358209673002, - "learning_rate": 1.2162528287453927e-06, - "loss": 0.9773, - "step": 7089 - }, - { - "epoch": 0.6394011814041575, - "grad_norm": 3.8944942736616017, - "learning_rate": 1.215715377269752e-06, - "loss": 0.9397, - "step": 7090 - }, - { - "epoch": 0.6394913649276277, - "grad_norm": 1.3770949703230797, - "learning_rate": 1.2151779927136003e-06, - "loss": 0.9227, - "step": 7091 - }, - { - "epoch": 0.6395815484510979, - "grad_norm": 1.4961184507078924, - "learning_rate": 1.2146406751227893e-06, - "loss": 1.0099, - "step": 7092 - }, - { - "epoch": 0.6396717319745683, - "grad_norm": 1.4202882905246013, - "learning_rate": 1.214103424543167e-06, - "loss": 0.9761, - "step": 7093 - }, - { - "epoch": 0.6397619154980385, - "grad_norm": 1.9227164062612692, - "learning_rate": 1.2135662410205735e-06, - "loss": 0.8532, - "step": 7094 - }, - { - "epoch": 0.6398520990215087, - "grad_norm": 1.5555354733911637, - "learning_rate": 1.2130291246008444e-06, - "loss": 0.8566, - "step": 7095 - }, - { - "epoch": 0.639942282544979, - "grad_norm": 1.6862604745775662, - "learning_rate": 1.21249207532981e-06, - "loss": 0.8595, - "step": 7096 - }, - { - "epoch": 0.6400324660684493, - "grad_norm": 1.5412183659656946, - "learning_rate": 1.2119550932532936e-06, - "loss": 0.8555, - "step": 7097 - }, - { - "epoch": 0.6401226495919196, - "grad_norm": 1.427848007752505, - "learning_rate": 1.2114181784171144e-06, - "loss": 0.9176, - "step": 7098 - }, - { - "epoch": 0.6402128331153898, - "grad_norm": 1.3186535768376724, - "learning_rate": 1.2108813308670837e-06, - "loss": 0.837, - "step": 7099 - }, - { - "epoch": 0.6403030166388601, - "grad_norm": 1.777241597017156, - "learning_rate": 1.2103445506490099e-06, - "loss": 0.9435, - "step": 7100 - }, - { - "epoch": 0.6403932001623304, - "grad_norm": 0.7164304456712107, - "learning_rate": 1.209807837808693e-06, - "loss": 0.8292, - "step": 7101 - }, - { - "epoch": 0.6404833836858006, - "grad_norm": 1.5378791403638359, - "learning_rate": 1.2092711923919282e-06, - "loss": 0.9797, - "step": 7102 - }, - { - "epoch": 0.6405735672092708, - "grad_norm": 1.3034561651224608, - "learning_rate": 1.2087346144445053e-06, - "loss": 0.9025, - "step": 7103 - }, - { - "epoch": 0.6406637507327412, - "grad_norm": 1.9377924175479864, - "learning_rate": 1.2081981040122081e-06, - "loss": 0.953, - "step": 7104 - }, - { - "epoch": 0.6407539342562114, - "grad_norm": 9.144822003200218, - "learning_rate": 1.2076616611408148e-06, - "loss": 0.8915, - "step": 7105 - }, - { - "epoch": 0.6408441177796816, - "grad_norm": 1.8873823748183685, - "learning_rate": 1.2071252858760972e-06, - "loss": 0.9748, - "step": 7106 - }, - { - "epoch": 0.6409343013031519, - "grad_norm": 1.436258111305008, - "learning_rate": 1.2065889782638218e-06, - "loss": 0.9254, - "step": 7107 - }, - { - "epoch": 0.6410244848266222, - "grad_norm": 1.9447037077431704, - "learning_rate": 1.2060527383497506e-06, - "loss": 0.9175, - "step": 7108 - }, - { - "epoch": 0.6411146683500925, - "grad_norm": 1.3294306900418855, - "learning_rate": 1.2055165661796363e-06, - "loss": 1.0571, - "step": 7109 - }, - { - "epoch": 0.6412048518735627, - "grad_norm": 1.3177293416435596, - "learning_rate": 1.2049804617992303e-06, - "loss": 1.008, - "step": 7110 - }, - { - "epoch": 0.6412950353970329, - "grad_norm": 0.6965100765567368, - "learning_rate": 1.204444425254275e-06, - "loss": 0.818, - "step": 7111 - }, - { - "epoch": 0.6413852189205033, - "grad_norm": 1.573063959294643, - "learning_rate": 1.203908456590507e-06, - "loss": 0.9255, - "step": 7112 - }, - { - "epoch": 0.6414754024439735, - "grad_norm": 1.4459106032568105, - "learning_rate": 1.20337255585366e-06, - "loss": 0.9066, - "step": 7113 - }, - { - "epoch": 0.6415655859674437, - "grad_norm": 0.6835956682450764, - "learning_rate": 1.2028367230894582e-06, - "loss": 0.7538, - "step": 7114 - }, - { - "epoch": 0.641655769490914, - "grad_norm": 1.5247035316833197, - "learning_rate": 1.2023009583436237e-06, - "loss": 0.8627, - "step": 7115 - }, - { - "epoch": 0.6417459530143843, - "grad_norm": 1.6432436435100046, - "learning_rate": 1.2017652616618698e-06, - "loss": 0.9969, - "step": 7116 - }, - { - "epoch": 0.6418361365378545, - "grad_norm": 1.4443917305611649, - "learning_rate": 1.2012296330899048e-06, - "loss": 0.9359, - "step": 7117 - }, - { - "epoch": 0.6419263200613248, - "grad_norm": 1.4377262932499941, - "learning_rate": 1.200694072673432e-06, - "loss": 0.9686, - "step": 7118 - }, - { - "epoch": 0.642016503584795, - "grad_norm": 1.5367878792151228, - "learning_rate": 1.200158580458148e-06, - "loss": 0.9692, - "step": 7119 - }, - { - "epoch": 0.6421066871082654, - "grad_norm": 1.787801039628502, - "learning_rate": 1.1996231564897448e-06, - "loss": 0.9194, - "step": 7120 - }, - { - "epoch": 0.6421968706317356, - "grad_norm": 1.4604422806014148, - "learning_rate": 1.1990878008139062e-06, - "loss": 0.9009, - "step": 7121 - }, - { - "epoch": 0.6422870541552058, - "grad_norm": 1.3941916257893252, - "learning_rate": 1.1985525134763132e-06, - "loss": 0.9703, - "step": 7122 - }, - { - "epoch": 0.6423772376786762, - "grad_norm": 1.4772237276073825, - "learning_rate": 1.1980172945226393e-06, - "loss": 0.9891, - "step": 7123 - }, - { - "epoch": 0.6424674212021464, - "grad_norm": 1.6596923179894116, - "learning_rate": 1.197482143998551e-06, - "loss": 0.947, - "step": 7124 - }, - { - "epoch": 0.6425576047256166, - "grad_norm": 1.4565552452969055, - "learning_rate": 1.196947061949712e-06, - "loss": 0.9229, - "step": 7125 - }, - { - "epoch": 0.6426477882490869, - "grad_norm": 1.439373619751428, - "learning_rate": 1.1964120484217768e-06, - "loss": 0.9969, - "step": 7126 - }, - { - "epoch": 0.6427379717725572, - "grad_norm": 0.7900829326581815, - "learning_rate": 1.1958771034603975e-06, - "loss": 0.8168, - "step": 7127 - }, - { - "epoch": 0.6428281552960274, - "grad_norm": 1.5160356732099243, - "learning_rate": 1.1953422271112175e-06, - "loss": 0.8837, - "step": 7128 - }, - { - "epoch": 0.6429183388194977, - "grad_norm": 1.641374979715374, - "learning_rate": 1.1948074194198748e-06, - "loss": 0.9306, - "step": 7129 - }, - { - "epoch": 0.6430085223429679, - "grad_norm": 1.5733109878849794, - "learning_rate": 1.1942726804320033e-06, - "loss": 0.8446, - "step": 7130 - }, - { - "epoch": 0.6430987058664382, - "grad_norm": 1.6810765400287575, - "learning_rate": 1.1937380101932295e-06, - "loss": 0.879, - "step": 7131 - }, - { - "epoch": 0.6431888893899085, - "grad_norm": 1.2590031645222253, - "learning_rate": 1.1932034087491745e-06, - "loss": 0.9328, - "step": 7132 - }, - { - "epoch": 0.6432790729133787, - "grad_norm": 1.5336795916947839, - "learning_rate": 1.1926688761454531e-06, - "loss": 0.8552, - "step": 7133 - }, - { - "epoch": 0.643369256436849, - "grad_norm": 1.033116702988592, - "learning_rate": 1.1921344124276746e-06, - "loss": 0.9569, - "step": 7134 - }, - { - "epoch": 0.6434594399603193, - "grad_norm": 1.4170899413322187, - "learning_rate": 1.1916000176414431e-06, - "loss": 0.9687, - "step": 7135 - }, - { - "epoch": 0.6435496234837895, - "grad_norm": 1.647959852396248, - "learning_rate": 1.1910656918323546e-06, - "loss": 0.9705, - "step": 7136 - }, - { - "epoch": 0.6436398070072598, - "grad_norm": 1.4074655577407575, - "learning_rate": 1.1905314350460024e-06, - "loss": 0.9652, - "step": 7137 - }, - { - "epoch": 0.64372999053073, - "grad_norm": 1.2863526500376128, - "learning_rate": 1.1899972473279717e-06, - "loss": 0.9927, - "step": 7138 - }, - { - "epoch": 0.6438201740542003, - "grad_norm": 1.3459971069429546, - "learning_rate": 1.1894631287238414e-06, - "loss": 0.9337, - "step": 7139 - }, - { - "epoch": 0.6439103575776706, - "grad_norm": 1.3179576572568716, - "learning_rate": 1.188929079279187e-06, - "loss": 0.9512, - "step": 7140 - }, - { - "epoch": 0.6440005411011408, - "grad_norm": 1.3874512792745317, - "learning_rate": 1.1883950990395751e-06, - "loss": 0.8814, - "step": 7141 - }, - { - "epoch": 0.644090724624611, - "grad_norm": 1.3764480421996923, - "learning_rate": 1.187861188050569e-06, - "loss": 0.8814, - "step": 7142 - }, - { - "epoch": 0.6441809081480814, - "grad_norm": 1.2453942323520815, - "learning_rate": 1.187327346357724e-06, - "loss": 0.9266, - "step": 7143 - }, - { - "epoch": 0.6442710916715516, - "grad_norm": 1.9785135764349586, - "learning_rate": 1.1867935740065912e-06, - "loss": 0.9696, - "step": 7144 - }, - { - "epoch": 0.6443612751950218, - "grad_norm": 1.4602072619594206, - "learning_rate": 1.1862598710427148e-06, - "loss": 0.8265, - "step": 7145 - }, - { - "epoch": 0.6444514587184921, - "grad_norm": 1.8062312801671394, - "learning_rate": 1.1857262375116328e-06, - "loss": 0.9052, - "step": 7146 - }, - { - "epoch": 0.6445416422419624, - "grad_norm": 1.7160244919556298, - "learning_rate": 1.1851926734588783e-06, - "loss": 0.9507, - "step": 7147 - }, - { - "epoch": 0.6446318257654327, - "grad_norm": 1.4101362147717558, - "learning_rate": 1.184659178929977e-06, - "loss": 0.9671, - "step": 7148 - }, - { - "epoch": 0.6447220092889029, - "grad_norm": 1.3867686947706963, - "learning_rate": 1.1841257539704513e-06, - "loss": 0.9383, - "step": 7149 - }, - { - "epoch": 0.6448121928123732, - "grad_norm": 1.4420976505415677, - "learning_rate": 1.1835923986258146e-06, - "loss": 0.9569, - "step": 7150 - }, - { - "epoch": 0.6449023763358435, - "grad_norm": 1.299298652949105, - "learning_rate": 1.1830591129415754e-06, - "loss": 0.8987, - "step": 7151 - }, - { - "epoch": 0.6449925598593137, - "grad_norm": 1.500511082365199, - "learning_rate": 1.182525896963238e-06, - "loss": 0.8887, - "step": 7152 - }, - { - "epoch": 0.6450827433827839, - "grad_norm": 1.6018595350265405, - "learning_rate": 1.181992750736298e-06, - "loss": 0.8675, - "step": 7153 - }, - { - "epoch": 0.6451729269062543, - "grad_norm": 1.5656986303014173, - "learning_rate": 1.1814596743062474e-06, - "loss": 0.9345, - "step": 7154 - }, - { - "epoch": 0.6452631104297245, - "grad_norm": 1.3439299884283396, - "learning_rate": 1.1809266677185711e-06, - "loss": 0.965, - "step": 7155 - }, - { - "epoch": 0.6453532939531947, - "grad_norm": 0.6889292642232104, - "learning_rate": 1.180393731018747e-06, - "loss": 0.8412, - "step": 7156 - }, - { - "epoch": 0.645443477476665, - "grad_norm": 0.5909419667495243, - "learning_rate": 1.1798608642522498e-06, - "loss": 0.7662, - "step": 7157 - }, - { - "epoch": 0.6455336610001353, - "grad_norm": 1.739106194700402, - "learning_rate": 1.1793280674645454e-06, - "loss": 0.9273, - "step": 7158 - }, - { - "epoch": 0.6456238445236056, - "grad_norm": 1.452169042362853, - "learning_rate": 1.1787953407010954e-06, - "loss": 1.0141, - "step": 7159 - }, - { - "epoch": 0.6457140280470758, - "grad_norm": 1.5371275225759249, - "learning_rate": 1.1782626840073554e-06, - "loss": 0.9492, - "step": 7160 - }, - { - "epoch": 0.645804211570546, - "grad_norm": 1.5288082038735131, - "learning_rate": 1.1777300974287738e-06, - "loss": 0.8425, - "step": 7161 - }, - { - "epoch": 0.6458943950940164, - "grad_norm": 1.4360414652084297, - "learning_rate": 1.1771975810107947e-06, - "loss": 0.9657, - "step": 7162 - }, - { - "epoch": 0.6459845786174866, - "grad_norm": 1.4350952268365718, - "learning_rate": 1.1766651347988542e-06, - "loss": 1.0031, - "step": 7163 - }, - { - "epoch": 0.6460747621409568, - "grad_norm": 1.9885029547393387, - "learning_rate": 1.1761327588383848e-06, - "loss": 1.0497, - "step": 7164 - }, - { - "epoch": 0.6461649456644271, - "grad_norm": 2.81352452585158, - "learning_rate": 1.1756004531748105e-06, - "loss": 0.9551, - "step": 7165 - }, - { - "epoch": 0.6462551291878974, - "grad_norm": 1.289325941505138, - "learning_rate": 1.1750682178535521e-06, - "loss": 0.9707, - "step": 7166 - }, - { - "epoch": 0.6463453127113676, - "grad_norm": 1.4696633765746994, - "learning_rate": 1.1745360529200218e-06, - "loss": 0.8962, - "step": 7167 - }, - { - "epoch": 0.6464354962348379, - "grad_norm": 1.4108959593207604, - "learning_rate": 1.1740039584196261e-06, - "loss": 0.9664, - "step": 7168 - }, - { - "epoch": 0.6465256797583081, - "grad_norm": 1.3827823269240769, - "learning_rate": 1.1734719343977683e-06, - "loss": 0.9676, - "step": 7169 - }, - { - "epoch": 0.6466158632817784, - "grad_norm": 2.0353951048579337, - "learning_rate": 1.1729399808998416e-06, - "loss": 0.887, - "step": 7170 - }, - { - "epoch": 0.6467060468052487, - "grad_norm": 1.300606609912884, - "learning_rate": 1.1724080979712368e-06, - "loss": 0.8269, - "step": 7171 - }, - { - "epoch": 0.6467962303287189, - "grad_norm": 1.2630915776001916, - "learning_rate": 1.1718762856573365e-06, - "loss": 1.0142, - "step": 7172 - }, - { - "epoch": 0.6468864138521893, - "grad_norm": 1.30674613218755, - "learning_rate": 1.1713445440035172e-06, - "loss": 0.9334, - "step": 7173 - }, - { - "epoch": 0.6469765973756595, - "grad_norm": 1.5583203680685669, - "learning_rate": 1.1708128730551506e-06, - "loss": 0.9805, - "step": 7174 - }, - { - "epoch": 0.6470667808991297, - "grad_norm": 1.5813392161656057, - "learning_rate": 1.1702812728576019e-06, - "loss": 0.9261, - "step": 7175 - }, - { - "epoch": 0.6471569644226, - "grad_norm": 1.4062918338910977, - "learning_rate": 1.1697497434562303e-06, - "loss": 0.9715, - "step": 7176 - }, - { - "epoch": 0.6472471479460703, - "grad_norm": 1.4137681686353694, - "learning_rate": 1.1692182848963885e-06, - "loss": 0.9805, - "step": 7177 - }, - { - "epoch": 0.6473373314695405, - "grad_norm": 1.5068801606389348, - "learning_rate": 1.1686868972234227e-06, - "loss": 0.9063, - "step": 7178 - }, - { - "epoch": 0.6474275149930108, - "grad_norm": 1.1947706901928337, - "learning_rate": 1.1681555804826755e-06, - "loss": 0.9477, - "step": 7179 - }, - { - "epoch": 0.647517698516481, - "grad_norm": 1.3468589746358257, - "learning_rate": 1.1676243347194806e-06, - "loss": 0.8828, - "step": 7180 - }, - { - "epoch": 0.6476078820399513, - "grad_norm": 1.334578017574462, - "learning_rate": 1.167093159979167e-06, - "loss": 0.9579, - "step": 7181 - }, - { - "epoch": 0.6476980655634216, - "grad_norm": 1.6285496602718874, - "learning_rate": 1.1665620563070575e-06, - "loss": 0.9686, - "step": 7182 - }, - { - "epoch": 0.6477882490868918, - "grad_norm": 1.4793412638316008, - "learning_rate": 1.1660310237484691e-06, - "loss": 1.0452, - "step": 7183 - }, - { - "epoch": 0.647878432610362, - "grad_norm": 1.3569853375738372, - "learning_rate": 1.165500062348713e-06, - "loss": 0.8887, - "step": 7184 - }, - { - "epoch": 0.6479686161338324, - "grad_norm": 1.5997326329476824, - "learning_rate": 1.164969172153091e-06, - "loss": 0.9937, - "step": 7185 - }, - { - "epoch": 0.6480587996573026, - "grad_norm": 1.368579526414245, - "learning_rate": 1.1644383532069055e-06, - "loss": 0.9598, - "step": 7186 - }, - { - "epoch": 0.6481489831807729, - "grad_norm": 1.3018535512987763, - "learning_rate": 1.1639076055554454e-06, - "loss": 0.9798, - "step": 7187 - }, - { - "epoch": 0.6482391667042431, - "grad_norm": 1.417320754947872, - "learning_rate": 1.163376929244e-06, - "loss": 0.9203, - "step": 7188 - }, - { - "epoch": 0.6483293502277134, - "grad_norm": 1.2372325704583658, - "learning_rate": 1.1628463243178472e-06, - "loss": 0.9032, - "step": 7189 - }, - { - "epoch": 0.6484195337511837, - "grad_norm": 1.5373379877841054, - "learning_rate": 1.1623157908222623e-06, - "loss": 1.0024, - "step": 7190 - }, - { - "epoch": 0.6485097172746539, - "grad_norm": 1.3183756961518664, - "learning_rate": 1.1617853288025129e-06, - "loss": 0.9839, - "step": 7191 - }, - { - "epoch": 0.6485999007981241, - "grad_norm": 1.261781814935971, - "learning_rate": 1.1612549383038612e-06, - "loss": 0.9185, - "step": 7192 - }, - { - "epoch": 0.6486900843215945, - "grad_norm": 1.2002247619740547, - "learning_rate": 1.1607246193715629e-06, - "loss": 0.9232, - "step": 7193 - }, - { - "epoch": 0.6487802678450647, - "grad_norm": 1.8064356012255522, - "learning_rate": 1.1601943720508684e-06, - "loss": 0.9599, - "step": 7194 - }, - { - "epoch": 0.6488704513685349, - "grad_norm": 1.382735600461524, - "learning_rate": 1.1596641963870202e-06, - "loss": 0.9848, - "step": 7195 - }, - { - "epoch": 0.6489606348920053, - "grad_norm": 1.5972920867825167, - "learning_rate": 1.1591340924252561e-06, - "loss": 0.9519, - "step": 7196 - }, - { - "epoch": 0.6490508184154755, - "grad_norm": 0.8027348576379518, - "learning_rate": 1.158604060210808e-06, - "loss": 0.8706, - "step": 7197 - }, - { - "epoch": 0.6491410019389457, - "grad_norm": 1.6738064028956245, - "learning_rate": 1.1580740997889008e-06, - "loss": 1.0068, - "step": 7198 - }, - { - "epoch": 0.649231185462416, - "grad_norm": 3.4704363048991844, - "learning_rate": 1.1575442112047544e-06, - "loss": 0.9338, - "step": 7199 - }, - { - "epoch": 0.6493213689858863, - "grad_norm": 1.6758290530769298, - "learning_rate": 1.1570143945035797e-06, - "loss": 0.8776, - "step": 7200 - }, - { - "epoch": 0.6494115525093566, - "grad_norm": 2.881087947764932, - "learning_rate": 1.1564846497305864e-06, - "loss": 0.8976, - "step": 7201 - }, - { - "epoch": 0.6495017360328268, - "grad_norm": 1.5295396711947902, - "learning_rate": 1.1559549769309726e-06, - "loss": 0.9173, - "step": 7202 - }, - { - "epoch": 0.649591919556297, - "grad_norm": 1.2544008509505316, - "learning_rate": 1.1554253761499358e-06, - "loss": 0.9289, - "step": 7203 - }, - { - "epoch": 0.6496821030797674, - "grad_norm": 1.4812008298285928, - "learning_rate": 1.1548958474326617e-06, - "loss": 0.9716, - "step": 7204 - }, - { - "epoch": 0.6497722866032376, - "grad_norm": 1.366211286521578, - "learning_rate": 1.154366390824334e-06, - "loss": 0.8711, - "step": 7205 - }, - { - "epoch": 0.6498624701267078, - "grad_norm": 0.8626467414087323, - "learning_rate": 1.1538370063701287e-06, - "loss": 0.8015, - "step": 7206 - }, - { - "epoch": 0.6499526536501781, - "grad_norm": 1.327848787195705, - "learning_rate": 1.1533076941152153e-06, - "loss": 0.9037, - "step": 7207 - }, - { - "epoch": 0.6500428371736484, - "grad_norm": 1.602355091012058, - "learning_rate": 1.1527784541047583e-06, - "loss": 0.8555, - "step": 7208 - }, - { - "epoch": 0.6501330206971186, - "grad_norm": 1.8158067595854268, - "learning_rate": 1.1522492863839152e-06, - "loss": 0.9033, - "step": 7209 - }, - { - "epoch": 0.6502232042205889, - "grad_norm": 1.6735414417516323, - "learning_rate": 1.1517201909978382e-06, - "loss": 0.9016, - "step": 7210 - }, - { - "epoch": 0.6503133877440591, - "grad_norm": 0.6636065769235817, - "learning_rate": 1.151191167991671e-06, - "loss": 0.8049, - "step": 7211 - }, - { - "epoch": 0.6504035712675295, - "grad_norm": 1.4231572500267962, - "learning_rate": 1.1506622174105536e-06, - "loss": 0.9512, - "step": 7212 - }, - { - "epoch": 0.6504937547909997, - "grad_norm": 1.6860497918972726, - "learning_rate": 1.1501333392996194e-06, - "loss": 0.9708, - "step": 7213 - }, - { - "epoch": 0.6505839383144699, - "grad_norm": 0.685811570410523, - "learning_rate": 1.1496045337039943e-06, - "loss": 0.8049, - "step": 7214 - }, - { - "epoch": 0.6506741218379402, - "grad_norm": 1.52718553430951, - "learning_rate": 1.1490758006687995e-06, - "loss": 0.8965, - "step": 7215 - }, - { - "epoch": 0.6507643053614105, - "grad_norm": 1.6545845150981375, - "learning_rate": 1.1485471402391502e-06, - "loss": 0.9206, - "step": 7216 - }, - { - "epoch": 0.6508544888848807, - "grad_norm": 1.4939484915273733, - "learning_rate": 1.1480185524601522e-06, - "loss": 0.9119, - "step": 7217 - }, - { - "epoch": 0.650944672408351, - "grad_norm": 1.360233781019233, - "learning_rate": 1.1474900373769108e-06, - "loss": 0.9499, - "step": 7218 - }, - { - "epoch": 0.6510348559318213, - "grad_norm": 0.5940363344132145, - "learning_rate": 1.1469615950345184e-06, - "loss": 0.7351, - "step": 7219 - }, - { - "epoch": 0.6511250394552915, - "grad_norm": 0.7114370917243928, - "learning_rate": 1.1464332254780678e-06, - "loss": 0.7818, - "step": 7220 - }, - { - "epoch": 0.6512152229787618, - "grad_norm": 1.324916599455949, - "learning_rate": 1.1459049287526404e-06, - "loss": 0.9264, - "step": 7221 - }, - { - "epoch": 0.651305406502232, - "grad_norm": 1.706683160483609, - "learning_rate": 1.1453767049033137e-06, - "loss": 0.906, - "step": 7222 - }, - { - "epoch": 0.6513955900257024, - "grad_norm": 1.3889035059901467, - "learning_rate": 1.1448485539751586e-06, - "loss": 0.9878, - "step": 7223 - }, - { - "epoch": 0.6514857735491726, - "grad_norm": 1.5200820356115698, - "learning_rate": 1.1443204760132408e-06, - "loss": 0.9134, - "step": 7224 - }, - { - "epoch": 0.6515759570726428, - "grad_norm": 1.415367578002045, - "learning_rate": 1.1437924710626185e-06, - "loss": 0.9227, - "step": 7225 - }, - { - "epoch": 0.651666140596113, - "grad_norm": 0.6901901887265974, - "learning_rate": 1.1432645391683429e-06, - "loss": 0.7622, - "step": 7226 - }, - { - "epoch": 0.6517563241195834, - "grad_norm": 1.655801748401038, - "learning_rate": 1.1427366803754609e-06, - "loss": 0.9696, - "step": 7227 - }, - { - "epoch": 0.6518465076430536, - "grad_norm": 1.3086881688913907, - "learning_rate": 1.142208894729012e-06, - "loss": 0.9561, - "step": 7228 - }, - { - "epoch": 0.6519366911665239, - "grad_norm": 1.3379552226934723, - "learning_rate": 1.1416811822740301e-06, - "loss": 0.9838, - "step": 7229 - }, - { - "epoch": 0.6520268746899941, - "grad_norm": 1.2610424579487844, - "learning_rate": 1.1411535430555428e-06, - "loss": 0.9895, - "step": 7230 - }, - { - "epoch": 0.6521170582134644, - "grad_norm": 1.3277628236502237, - "learning_rate": 1.1406259771185705e-06, - "loss": 0.9942, - "step": 7231 - }, - { - "epoch": 0.6522072417369347, - "grad_norm": 1.1805185081553058, - "learning_rate": 1.1400984845081282e-06, - "loss": 0.8983, - "step": 7232 - }, - { - "epoch": 0.6522974252604049, - "grad_norm": 1.4372581610216517, - "learning_rate": 1.139571065269226e-06, - "loss": 1.02, - "step": 7233 - }, - { - "epoch": 0.6523876087838751, - "grad_norm": 1.3972950962916193, - "learning_rate": 1.139043719446863e-06, - "loss": 0.9849, - "step": 7234 - }, - { - "epoch": 0.6524777923073455, - "grad_norm": 1.3201428817955303, - "learning_rate": 1.1385164470860385e-06, - "loss": 1.0051, - "step": 7235 - }, - { - "epoch": 0.6525679758308157, - "grad_norm": 1.5382259825876334, - "learning_rate": 1.1379892482317403e-06, - "loss": 0.9464, - "step": 7236 - }, - { - "epoch": 0.652658159354286, - "grad_norm": 1.3524327205346272, - "learning_rate": 1.1374621229289524e-06, - "loss": 0.9274, - "step": 7237 - }, - { - "epoch": 0.6527483428777562, - "grad_norm": 1.5847418275794818, - "learning_rate": 1.1369350712226525e-06, - "loss": 0.9263, - "step": 7238 - }, - { - "epoch": 0.6528385264012265, - "grad_norm": 1.65246841285053, - "learning_rate": 1.136408093157811e-06, - "loss": 0.9006, - "step": 7239 - }, - { - "epoch": 0.6529287099246968, - "grad_norm": 1.4834943869664248, - "learning_rate": 1.1358811887793935e-06, - "loss": 0.9026, - "step": 7240 - }, - { - "epoch": 0.653018893448167, - "grad_norm": 1.4674346390681712, - "learning_rate": 1.135354358132356e-06, - "loss": 0.9797, - "step": 7241 - }, - { - "epoch": 0.6531090769716373, - "grad_norm": 1.3258672934705868, - "learning_rate": 1.1348276012616542e-06, - "loss": 0.9844, - "step": 7242 - }, - { - "epoch": 0.6531992604951076, - "grad_norm": 1.4321453875202348, - "learning_rate": 1.134300918212231e-06, - "loss": 0.9081, - "step": 7243 - }, - { - "epoch": 0.6532894440185778, - "grad_norm": 1.53234038164234, - "learning_rate": 1.133774309029027e-06, - "loss": 0.9426, - "step": 7244 - }, - { - "epoch": 0.653379627542048, - "grad_norm": 1.3915843868868791, - "learning_rate": 1.133247773756975e-06, - "loss": 0.9805, - "step": 7245 - }, - { - "epoch": 0.6534698110655184, - "grad_norm": 0.673927366163366, - "learning_rate": 1.1327213124410024e-06, - "loss": 0.8293, - "step": 7246 - }, - { - "epoch": 0.6535599945889886, - "grad_norm": 1.2771258153689715, - "learning_rate": 1.1321949251260292e-06, - "loss": 0.9782, - "step": 7247 - }, - { - "epoch": 0.6536501781124588, - "grad_norm": 1.4848140669509862, - "learning_rate": 1.1316686118569712e-06, - "loss": 0.8283, - "step": 7248 - }, - { - "epoch": 0.6537403616359291, - "grad_norm": 1.5169638786164619, - "learning_rate": 1.1311423726787335e-06, - "loss": 0.8451, - "step": 7249 - }, - { - "epoch": 0.6538305451593994, - "grad_norm": 1.5083345290261827, - "learning_rate": 1.130616207636221e-06, - "loss": 0.9152, - "step": 7250 - }, - { - "epoch": 0.6539207286828697, - "grad_norm": 1.3951580444830232, - "learning_rate": 1.1300901167743263e-06, - "loss": 0.9403, - "step": 7251 - }, - { - "epoch": 0.6540109122063399, - "grad_norm": 1.1355027973975422, - "learning_rate": 1.12956410013794e-06, - "loss": 0.9378, - "step": 7252 - }, - { - "epoch": 0.6541010957298101, - "grad_norm": 1.6058997993327062, - "learning_rate": 1.1290381577719436e-06, - "loss": 0.9256, - "step": 7253 - }, - { - "epoch": 0.6541912792532805, - "grad_norm": 1.7182463168001099, - "learning_rate": 1.1285122897212143e-06, - "loss": 0.8534, - "step": 7254 - }, - { - "epoch": 0.6542814627767507, - "grad_norm": 0.7942795841589642, - "learning_rate": 1.1279864960306228e-06, - "loss": 0.8582, - "step": 7255 - }, - { - "epoch": 0.6543716463002209, - "grad_norm": 1.4560388043546013, - "learning_rate": 1.1274607767450297e-06, - "loss": 1.0094, - "step": 7256 - }, - { - "epoch": 0.6544618298236912, - "grad_norm": 1.5091939339718816, - "learning_rate": 1.126935131909296e-06, - "loss": 0.9203, - "step": 7257 - }, - { - "epoch": 0.6545520133471615, - "grad_norm": 1.2470946402928804, - "learning_rate": 1.1264095615682693e-06, - "loss": 0.8853, - "step": 7258 - }, - { - "epoch": 0.6546421968706317, - "grad_norm": 1.4610080803942094, - "learning_rate": 1.1258840657667973e-06, - "loss": 0.8616, - "step": 7259 - }, - { - "epoch": 0.654732380394102, - "grad_norm": 2.6585946001516145, - "learning_rate": 1.125358644549716e-06, - "loss": 0.8703, - "step": 7260 - }, - { - "epoch": 0.6548225639175722, - "grad_norm": 1.420966895902395, - "learning_rate": 1.1248332979618578e-06, - "loss": 0.8073, - "step": 7261 - }, - { - "epoch": 0.6549127474410426, - "grad_norm": 1.2202050741780772, - "learning_rate": 1.1243080260480482e-06, - "loss": 0.8367, - "step": 7262 - }, - { - "epoch": 0.6550029309645128, - "grad_norm": 1.6905286243207558, - "learning_rate": 1.1237828288531063e-06, - "loss": 0.9982, - "step": 7263 - }, - { - "epoch": 0.655093114487983, - "grad_norm": 1.560133782665789, - "learning_rate": 1.1232577064218449e-06, - "loss": 0.8634, - "step": 7264 - }, - { - "epoch": 0.6551832980114533, - "grad_norm": 1.4215536907306325, - "learning_rate": 1.1227326587990711e-06, - "loss": 1.009, - "step": 7265 - }, - { - "epoch": 0.6552734815349236, - "grad_norm": 1.257762315590118, - "learning_rate": 1.1222076860295832e-06, - "loss": 0.9256, - "step": 7266 - }, - { - "epoch": 0.6553636650583938, - "grad_norm": 1.5276705106975002, - "learning_rate": 1.1216827881581756e-06, - "loss": 0.955, - "step": 7267 - }, - { - "epoch": 0.6554538485818641, - "grad_norm": 1.5714693884688475, - "learning_rate": 1.1211579652296355e-06, - "loss": 0.9314, - "step": 7268 - }, - { - "epoch": 0.6555440321053344, - "grad_norm": 1.5293808252670287, - "learning_rate": 1.1206332172887438e-06, - "loss": 1.0139, - "step": 7269 - }, - { - "epoch": 0.6556342156288046, - "grad_norm": 1.5368350413100973, - "learning_rate": 1.1201085443802756e-06, - "loss": 0.9569, - "step": 7270 - }, - { - "epoch": 0.6557243991522749, - "grad_norm": 1.327070243726979, - "learning_rate": 1.1195839465489964e-06, - "loss": 0.909, - "step": 7271 - }, - { - "epoch": 0.6558145826757451, - "grad_norm": 1.545350296283424, - "learning_rate": 1.1190594238396708e-06, - "loss": 0.9766, - "step": 7272 - }, - { - "epoch": 0.6559047661992154, - "grad_norm": 1.1387724791065739, - "learning_rate": 1.1185349762970515e-06, - "loss": 0.9127, - "step": 7273 - }, - { - "epoch": 0.6559949497226857, - "grad_norm": 1.3633345454163204, - "learning_rate": 1.1180106039658896e-06, - "loss": 0.9444, - "step": 7274 - }, - { - "epoch": 0.6560851332461559, - "grad_norm": 0.7572988755531481, - "learning_rate": 1.117486306890925e-06, - "loss": 0.7891, - "step": 7275 - }, - { - "epoch": 0.6561753167696261, - "grad_norm": 1.7868828968728871, - "learning_rate": 1.116962085116896e-06, - "loss": 0.8888, - "step": 7276 - }, - { - "epoch": 0.6562655002930965, - "grad_norm": 0.7054372491066369, - "learning_rate": 1.1164379386885302e-06, - "loss": 0.7453, - "step": 7277 - }, - { - "epoch": 0.6563556838165667, - "grad_norm": 1.521552303497064, - "learning_rate": 1.1159138676505516e-06, - "loss": 0.9801, - "step": 7278 - }, - { - "epoch": 0.656445867340037, - "grad_norm": 1.3194486060272084, - "learning_rate": 1.1153898720476761e-06, - "loss": 1.0016, - "step": 7279 - }, - { - "epoch": 0.6565360508635072, - "grad_norm": 1.606114018840541, - "learning_rate": 1.114865951924615e-06, - "loss": 0.9278, - "step": 7280 - }, - { - "epoch": 0.6566262343869775, - "grad_norm": 1.4186014912723162, - "learning_rate": 1.1143421073260721e-06, - "loss": 0.8925, - "step": 7281 - }, - { - "epoch": 0.6567164179104478, - "grad_norm": 1.414982701792308, - "learning_rate": 1.1138183382967432e-06, - "loss": 0.8783, - "step": 7282 - }, - { - "epoch": 0.656806601433918, - "grad_norm": 1.5479418225852024, - "learning_rate": 1.11329464488132e-06, - "loss": 0.8532, - "step": 7283 - }, - { - "epoch": 0.6568967849573882, - "grad_norm": 1.4281647290603101, - "learning_rate": 1.112771027124487e-06, - "loss": 0.9179, - "step": 7284 - }, - { - "epoch": 0.6569869684808586, - "grad_norm": 1.3992937759059254, - "learning_rate": 1.112247485070922e-06, - "loss": 0.9524, - "step": 7285 - }, - { - "epoch": 0.6570771520043288, - "grad_norm": 1.3340030964868343, - "learning_rate": 1.1117240187652968e-06, - "loss": 0.9466, - "step": 7286 - }, - { - "epoch": 0.657167335527799, - "grad_norm": 1.5821457056157053, - "learning_rate": 1.1112006282522767e-06, - "loss": 1.0176, - "step": 7287 - }, - { - "epoch": 0.6572575190512693, - "grad_norm": 1.4086381343507133, - "learning_rate": 1.1106773135765183e-06, - "loss": 0.9245, - "step": 7288 - }, - { - "epoch": 0.6573477025747396, - "grad_norm": 1.2464367146407518, - "learning_rate": 1.110154074782677e-06, - "loss": 0.912, - "step": 7289 - }, - { - "epoch": 0.6574378860982099, - "grad_norm": 1.4491147726345137, - "learning_rate": 1.1096309119153948e-06, - "loss": 0.9288, - "step": 7290 - }, - { - "epoch": 0.6575280696216801, - "grad_norm": 0.7216940995817579, - "learning_rate": 1.1091078250193145e-06, - "loss": 0.8407, - "step": 7291 - }, - { - "epoch": 0.6576182531451504, - "grad_norm": 1.9054680768364112, - "learning_rate": 1.108584814139066e-06, - "loss": 0.9497, - "step": 7292 - }, - { - "epoch": 0.6577084366686207, - "grad_norm": 1.6956116021340937, - "learning_rate": 1.108061879319276e-06, - "loss": 0.9383, - "step": 7293 - }, - { - "epoch": 0.6577986201920909, - "grad_norm": 1.4250423659334484, - "learning_rate": 1.1075390206045648e-06, - "loss": 1.0001, - "step": 7294 - }, - { - "epoch": 0.6578888037155611, - "grad_norm": 1.5497511510129354, - "learning_rate": 1.1070162380395454e-06, - "loss": 0.9806, - "step": 7295 - }, - { - "epoch": 0.6579789872390315, - "grad_norm": 1.5358014097878658, - "learning_rate": 1.1064935316688253e-06, - "loss": 0.8839, - "step": 7296 - }, - { - "epoch": 0.6580691707625017, - "grad_norm": 1.5024671927375604, - "learning_rate": 1.105970901537002e-06, - "loss": 1.031, - "step": 7297 - }, - { - "epoch": 0.6581593542859719, - "grad_norm": 1.3462249231438046, - "learning_rate": 1.1054483476886727e-06, - "loss": 0.9854, - "step": 7298 - }, - { - "epoch": 0.6582495378094422, - "grad_norm": 1.3471743841734023, - "learning_rate": 1.1049258701684222e-06, - "loss": 0.8575, - "step": 7299 - }, - { - "epoch": 0.6583397213329125, - "grad_norm": 1.4199797186947836, - "learning_rate": 1.1044034690208315e-06, - "loss": 0.8871, - "step": 7300 - }, - { - "epoch": 0.6584299048563828, - "grad_norm": 1.865340975765981, - "learning_rate": 1.1038811442904755e-06, - "loss": 0.9783, - "step": 7301 - }, - { - "epoch": 0.658520088379853, - "grad_norm": 1.7710764624547428, - "learning_rate": 1.103358896021921e-06, - "loss": 0.8713, - "step": 7302 - }, - { - "epoch": 0.6586102719033232, - "grad_norm": 1.8845780827799639, - "learning_rate": 1.1028367242597298e-06, - "loss": 0.9167, - "step": 7303 - }, - { - "epoch": 0.6587004554267936, - "grad_norm": 1.681086184097001, - "learning_rate": 1.102314629048457e-06, - "loss": 0.9704, - "step": 7304 - }, - { - "epoch": 0.6587906389502638, - "grad_norm": 1.359075315918956, - "learning_rate": 1.1017926104326484e-06, - "loss": 0.9909, - "step": 7305 - }, - { - "epoch": 0.658880822473734, - "grad_norm": 1.3588309240589254, - "learning_rate": 1.1012706684568483e-06, - "loss": 0.9955, - "step": 7306 - }, - { - "epoch": 0.6589710059972043, - "grad_norm": 1.5960807960346817, - "learning_rate": 1.1007488031655894e-06, - "loss": 0.956, - "step": 7307 - }, - { - "epoch": 0.6590611895206746, - "grad_norm": 1.5152887974303513, - "learning_rate": 1.1002270146034013e-06, - "loss": 0.9039, - "step": 7308 - }, - { - "epoch": 0.6591513730441448, - "grad_norm": 1.1067131775442807, - "learning_rate": 1.0997053028148052e-06, - "loss": 0.9678, - "step": 7309 - }, - { - "epoch": 0.6592415565676151, - "grad_norm": 1.1888289293626269, - "learning_rate": 1.0991836678443173e-06, - "loss": 0.9417, - "step": 7310 - }, - { - "epoch": 0.6593317400910853, - "grad_norm": 1.4308735248221156, - "learning_rate": 1.0986621097364465e-06, - "loss": 0.892, - "step": 7311 - }, - { - "epoch": 0.6594219236145556, - "grad_norm": 1.7264271678791465, - "learning_rate": 1.0981406285356932e-06, - "loss": 0.8588, - "step": 7312 - }, - { - "epoch": 0.6595121071380259, - "grad_norm": 1.5699975756000826, - "learning_rate": 1.0976192242865554e-06, - "loss": 0.8612, - "step": 7313 - }, - { - "epoch": 0.6596022906614961, - "grad_norm": 1.5634065668376864, - "learning_rate": 1.0970978970335202e-06, - "loss": 1.0034, - "step": 7314 - }, - { - "epoch": 0.6596924741849665, - "grad_norm": 1.516551727439025, - "learning_rate": 1.0965766468210714e-06, - "loss": 0.9628, - "step": 7315 - }, - { - "epoch": 0.6597826577084367, - "grad_norm": 1.4763431285195292, - "learning_rate": 1.0960554736936843e-06, - "loss": 0.9554, - "step": 7316 - }, - { - "epoch": 0.6598728412319069, - "grad_norm": 1.5499922044619523, - "learning_rate": 1.0955343776958283e-06, - "loss": 0.8453, - "step": 7317 - }, - { - "epoch": 0.6599630247553772, - "grad_norm": 1.4029532465496095, - "learning_rate": 1.0950133588719665e-06, - "loss": 0.959, - "step": 7318 - }, - { - "epoch": 0.6600532082788475, - "grad_norm": 1.4832774069597752, - "learning_rate": 1.0944924172665551e-06, - "loss": 0.9626, - "step": 7319 - }, - { - "epoch": 0.6601433918023177, - "grad_norm": 1.473630466547793, - "learning_rate": 1.0939715529240437e-06, - "loss": 0.9778, - "step": 7320 - }, - { - "epoch": 0.660233575325788, - "grad_norm": 1.8160945501836927, - "learning_rate": 1.0934507658888755e-06, - "loss": 0.9688, - "step": 7321 - }, - { - "epoch": 0.6603237588492582, - "grad_norm": 1.3141213801887563, - "learning_rate": 1.092930056205486e-06, - "loss": 0.9104, - "step": 7322 - }, - { - "epoch": 0.6604139423727285, - "grad_norm": 1.755318869290062, - "learning_rate": 1.092409423918306e-06, - "loss": 0.9083, - "step": 7323 - }, - { - "epoch": 0.6605041258961988, - "grad_norm": 1.2018439668883185, - "learning_rate": 1.0918888690717581e-06, - "loss": 0.8655, - "step": 7324 - }, - { - "epoch": 0.660594309419669, - "grad_norm": 1.6486927576084267, - "learning_rate": 1.091368391710259e-06, - "loss": 1.0067, - "step": 7325 - }, - { - "epoch": 0.6606844929431392, - "grad_norm": 1.4013216336917629, - "learning_rate": 1.0908479918782198e-06, - "loss": 0.8868, - "step": 7326 - }, - { - "epoch": 0.6607746764666096, - "grad_norm": 1.2772407606876723, - "learning_rate": 1.0903276696200413e-06, - "loss": 0.9678, - "step": 7327 - }, - { - "epoch": 0.6608648599900798, - "grad_norm": 1.5621428272188256, - "learning_rate": 1.0898074249801234e-06, - "loss": 0.9482, - "step": 7328 - }, - { - "epoch": 0.6609550435135501, - "grad_norm": 1.3515114747046137, - "learning_rate": 1.0892872580028533e-06, - "loss": 0.9546, - "step": 7329 - }, - { - "epoch": 0.6610452270370203, - "grad_norm": 1.6051843395911274, - "learning_rate": 1.0887671687326178e-06, - "loss": 0.9699, - "step": 7330 - }, - { - "epoch": 0.6611354105604906, - "grad_norm": 1.3332419612491246, - "learning_rate": 1.0882471572137908e-06, - "loss": 0.9024, - "step": 7331 - }, - { - "epoch": 0.6612255940839609, - "grad_norm": 1.4283975086610252, - "learning_rate": 1.087727223490744e-06, - "loss": 0.9856, - "step": 7332 - }, - { - "epoch": 0.6613157776074311, - "grad_norm": 1.8840646972238495, - "learning_rate": 1.0872073676078405e-06, - "loss": 0.994, - "step": 7333 - }, - { - "epoch": 0.6614059611309013, - "grad_norm": 1.66121533887998, - "learning_rate": 1.0866875896094375e-06, - "loss": 0.8872, - "step": 7334 - }, - { - "epoch": 0.6614961446543717, - "grad_norm": 1.494128018992982, - "learning_rate": 1.0861678895398854e-06, - "loss": 0.9608, - "step": 7335 - }, - { - "epoch": 0.6615863281778419, - "grad_norm": 1.5501900033474192, - "learning_rate": 1.0856482674435286e-06, - "loss": 0.8961, - "step": 7336 - }, - { - "epoch": 0.6616765117013121, - "grad_norm": 0.7090932799207406, - "learning_rate": 1.0851287233647024e-06, - "loss": 0.7992, - "step": 7337 - }, - { - "epoch": 0.6617666952247825, - "grad_norm": 1.4491561505969142, - "learning_rate": 1.084609257347738e-06, - "loss": 1.0032, - "step": 7338 - }, - { - "epoch": 0.6618568787482527, - "grad_norm": 1.5132449624787407, - "learning_rate": 1.0840898694369594e-06, - "loss": 0.9195, - "step": 7339 - }, - { - "epoch": 0.661947062271723, - "grad_norm": 1.552099249158665, - "learning_rate": 1.083570559676683e-06, - "loss": 0.9351, - "step": 7340 - }, - { - "epoch": 0.6620372457951932, - "grad_norm": 1.4521036147567095, - "learning_rate": 1.08305132811122e-06, - "loss": 0.9169, - "step": 7341 - }, - { - "epoch": 0.6621274293186635, - "grad_norm": 1.3115102123920184, - "learning_rate": 1.0825321747848735e-06, - "loss": 0.9392, - "step": 7342 - }, - { - "epoch": 0.6622176128421338, - "grad_norm": 0.764703351578689, - "learning_rate": 1.0820130997419417e-06, - "loss": 0.8554, - "step": 7343 - }, - { - "epoch": 0.662307796365604, - "grad_norm": 1.6388228191197642, - "learning_rate": 1.0814941030267123e-06, - "loss": 0.9386, - "step": 7344 - }, - { - "epoch": 0.6623979798890742, - "grad_norm": 0.5810601755091409, - "learning_rate": 1.080975184683472e-06, - "loss": 0.7827, - "step": 7345 - }, - { - "epoch": 0.6624881634125446, - "grad_norm": 1.5060403321781315, - "learning_rate": 1.0804563447564948e-06, - "loss": 1.0018, - "step": 7346 - }, - { - "epoch": 0.6625783469360148, - "grad_norm": 1.539299573889657, - "learning_rate": 1.0799375832900545e-06, - "loss": 0.8887, - "step": 7347 - }, - { - "epoch": 0.662668530459485, - "grad_norm": 1.3970924600738632, - "learning_rate": 1.0794189003284118e-06, - "loss": 0.974, - "step": 7348 - }, - { - "epoch": 0.6627587139829553, - "grad_norm": 1.4659344897445643, - "learning_rate": 1.0789002959158242e-06, - "loss": 0.9583, - "step": 7349 - }, - { - "epoch": 0.6628488975064256, - "grad_norm": 1.7097335829857905, - "learning_rate": 1.0783817700965428e-06, - "loss": 0.9014, - "step": 7350 - }, - { - "epoch": 0.6629390810298958, - "grad_norm": 1.284380554038706, - "learning_rate": 1.0778633229148102e-06, - "loss": 0.9839, - "step": 7351 - }, - { - "epoch": 0.6630292645533661, - "grad_norm": 1.5727547400116153, - "learning_rate": 1.0773449544148645e-06, - "loss": 0.9461, - "step": 7352 - }, - { - "epoch": 0.6631194480768363, - "grad_norm": 1.319141884741889, - "learning_rate": 1.076826664640934e-06, - "loss": 0.9844, - "step": 7353 - }, - { - "epoch": 0.6632096316003067, - "grad_norm": 0.8012316297903925, - "learning_rate": 1.0763084536372424e-06, - "loss": 0.8716, - "step": 7354 - }, - { - "epoch": 0.6632998151237769, - "grad_norm": 1.467145652296073, - "learning_rate": 1.0757903214480068e-06, - "loss": 0.9588, - "step": 7355 - }, - { - "epoch": 0.6633899986472471, - "grad_norm": 1.6452848832438227, - "learning_rate": 1.0752722681174376e-06, - "loss": 0.8827, - "step": 7356 - }, - { - "epoch": 0.6634801821707174, - "grad_norm": 2.5550484591851053, - "learning_rate": 1.074754293689737e-06, - "loss": 0.8597, - "step": 7357 - }, - { - "epoch": 0.6635703656941877, - "grad_norm": 1.5535521842754327, - "learning_rate": 1.0742363982091023e-06, - "loss": 0.9382, - "step": 7358 - }, - { - "epoch": 0.6636605492176579, - "grad_norm": 1.8232068802653938, - "learning_rate": 1.0737185817197215e-06, - "loss": 0.9092, - "step": 7359 - }, - { - "epoch": 0.6637507327411282, - "grad_norm": 1.5195330543801546, - "learning_rate": 1.0732008442657803e-06, - "loss": 0.8994, - "step": 7360 - }, - { - "epoch": 0.6638409162645985, - "grad_norm": 1.3239007978845942, - "learning_rate": 1.0726831858914516e-06, - "loss": 0.9233, - "step": 7361 - }, - { - "epoch": 0.6639310997880687, - "grad_norm": 2.0046148701784285, - "learning_rate": 1.0721656066409084e-06, - "loss": 0.959, - "step": 7362 - }, - { - "epoch": 0.664021283311539, - "grad_norm": 1.6816925147155448, - "learning_rate": 1.0716481065583108e-06, - "loss": 0.9472, - "step": 7363 - }, - { - "epoch": 0.6641114668350092, - "grad_norm": 0.6445223720979479, - "learning_rate": 1.071130685687816e-06, - "loss": 0.7947, - "step": 7364 - }, - { - "epoch": 0.6642016503584796, - "grad_norm": 1.3538592894477937, - "learning_rate": 1.0706133440735723e-06, - "loss": 0.8764, - "step": 7365 - }, - { - "epoch": 0.6642918338819498, - "grad_norm": 1.5168354667098365, - "learning_rate": 1.070096081759723e-06, - "loss": 0.8897, - "step": 7366 - }, - { - "epoch": 0.66438201740542, - "grad_norm": 1.634165882860638, - "learning_rate": 1.069578898790404e-06, - "loss": 1.0428, - "step": 7367 - }, - { - "epoch": 0.6644722009288903, - "grad_norm": 1.466537513609472, - "learning_rate": 1.0690617952097424e-06, - "loss": 0.9272, - "step": 7368 - }, - { - "epoch": 0.6645623844523606, - "grad_norm": 1.5022154183409238, - "learning_rate": 1.068544771061863e-06, - "loss": 0.9866, - "step": 7369 - }, - { - "epoch": 0.6646525679758308, - "grad_norm": 1.916007661171013, - "learning_rate": 1.0680278263908787e-06, - "loss": 0.9814, - "step": 7370 - }, - { - "epoch": 0.6647427514993011, - "grad_norm": 0.7837233112055996, - "learning_rate": 1.0675109612408991e-06, - "loss": 0.8458, - "step": 7371 - }, - { - "epoch": 0.6648329350227713, - "grad_norm": 1.7049332564817947, - "learning_rate": 1.0669941756560264e-06, - "loss": 0.912, - "step": 7372 - }, - { - "epoch": 0.6649231185462416, - "grad_norm": 2.78132817876429, - "learning_rate": 1.0664774696803548e-06, - "loss": 0.8975, - "step": 7373 - }, - { - "epoch": 0.6650133020697119, - "grad_norm": 1.5696258561980414, - "learning_rate": 1.065960843357973e-06, - "loss": 0.9117, - "step": 7374 - }, - { - "epoch": 0.6651034855931821, - "grad_norm": 1.4221984998310724, - "learning_rate": 1.065444296732963e-06, - "loss": 0.957, - "step": 7375 - }, - { - "epoch": 0.6651936691166523, - "grad_norm": 0.6915523353170956, - "learning_rate": 1.064927829849397e-06, - "loss": 0.8018, - "step": 7376 - }, - { - "epoch": 0.6652838526401227, - "grad_norm": 1.7305078781905305, - "learning_rate": 1.0644114427513465e-06, - "loss": 0.9393, - "step": 7377 - }, - { - "epoch": 0.6653740361635929, - "grad_norm": 1.2241213372089157, - "learning_rate": 1.0638951354828693e-06, - "loss": 0.866, - "step": 7378 - }, - { - "epoch": 0.6654642196870632, - "grad_norm": 1.5020039999968275, - "learning_rate": 1.063378908088021e-06, - "loss": 0.9951, - "step": 7379 - }, - { - "epoch": 0.6655544032105334, - "grad_norm": 1.4852984316701563, - "learning_rate": 1.0628627606108486e-06, - "loss": 0.9342, - "step": 7380 - }, - { - "epoch": 0.6656445867340037, - "grad_norm": 1.600181914955205, - "learning_rate": 1.062346693095393e-06, - "loss": 1.0008, - "step": 7381 - }, - { - "epoch": 0.665734770257474, - "grad_norm": 1.4213884799832355, - "learning_rate": 1.0618307055856882e-06, - "loss": 0.8831, - "step": 7382 - }, - { - "epoch": 0.6658249537809442, - "grad_norm": 1.3597457204850625, - "learning_rate": 1.061314798125759e-06, - "loss": 0.9939, - "step": 7383 - }, - { - "epoch": 0.6659151373044144, - "grad_norm": 1.191344262683675, - "learning_rate": 1.0607989707596293e-06, - "loss": 0.9011, - "step": 7384 - }, - { - "epoch": 0.6660053208278848, - "grad_norm": 1.3385451894434228, - "learning_rate": 1.0602832235313078e-06, - "loss": 1.0578, - "step": 7385 - }, - { - "epoch": 0.666095504351355, - "grad_norm": 1.5522430218296908, - "learning_rate": 1.0597675564848053e-06, - "loss": 0.9224, - "step": 7386 - }, - { - "epoch": 0.6661856878748252, - "grad_norm": 1.6716284882105372, - "learning_rate": 1.059251969664118e-06, - "loss": 1.0113, - "step": 7387 - }, - { - "epoch": 0.6662758713982956, - "grad_norm": 1.3911085252556687, - "learning_rate": 1.0587364631132402e-06, - "loss": 0.9497, - "step": 7388 - }, - { - "epoch": 0.6663660549217658, - "grad_norm": 1.330209957989104, - "learning_rate": 1.0582210368761573e-06, - "loss": 0.9828, - "step": 7389 - }, - { - "epoch": 0.666456238445236, - "grad_norm": 1.8956233386879222, - "learning_rate": 1.0577056909968485e-06, - "loss": 0.943, - "step": 7390 - }, - { - "epoch": 0.6665464219687063, - "grad_norm": 1.415859197986922, - "learning_rate": 1.0571904255192857e-06, - "loss": 0.9603, - "step": 7391 - }, - { - "epoch": 0.6666366054921766, - "grad_norm": 1.9209408854772305, - "learning_rate": 1.0566752404874354e-06, - "loss": 0.9752, - "step": 7392 - }, - { - "epoch": 0.6667267890156469, - "grad_norm": 1.595802332059899, - "learning_rate": 1.0561601359452543e-06, - "loss": 0.9348, - "step": 7393 - }, - { - "epoch": 0.6668169725391171, - "grad_norm": 1.5351894074176446, - "learning_rate": 1.0556451119366947e-06, - "loss": 0.8789, - "step": 7394 - }, - { - "epoch": 0.6669071560625873, - "grad_norm": 1.5227319110452928, - "learning_rate": 1.0551301685057011e-06, - "loss": 0.8734, - "step": 7395 - }, - { - "epoch": 0.6669973395860577, - "grad_norm": 5.653367968009324, - "learning_rate": 1.0546153056962117e-06, - "loss": 1.0325, - "step": 7396 - }, - { - "epoch": 0.6670875231095279, - "grad_norm": 1.5686753882184468, - "learning_rate": 1.0541005235521578e-06, - "loss": 0.9276, - "step": 7397 - }, - { - "epoch": 0.6671777066329981, - "grad_norm": 1.5353718768781157, - "learning_rate": 1.0535858221174614e-06, - "loss": 0.9534, - "step": 7398 - }, - { - "epoch": 0.6672678901564684, - "grad_norm": 1.5350117312054505, - "learning_rate": 1.0530712014360426e-06, - "loss": 0.9851, - "step": 7399 - }, - { - "epoch": 0.6673580736799387, - "grad_norm": 1.3404987835990625, - "learning_rate": 1.0525566615518088e-06, - "loss": 0.9862, - "step": 7400 - }, - { - "epoch": 0.667448257203409, - "grad_norm": 1.308882223188579, - "learning_rate": 1.0520422025086662e-06, - "loss": 0.9577, - "step": 7401 - }, - { - "epoch": 0.6675384407268792, - "grad_norm": 1.3816572633215463, - "learning_rate": 1.0515278243505092e-06, - "loss": 0.9913, - "step": 7402 - }, - { - "epoch": 0.6676286242503494, - "grad_norm": 1.282410398554511, - "learning_rate": 1.0510135271212278e-06, - "loss": 0.9682, - "step": 7403 - }, - { - "epoch": 0.6677188077738198, - "grad_norm": 1.1879860234738873, - "learning_rate": 1.0504993108647052e-06, - "loss": 0.998, - "step": 7404 - }, - { - "epoch": 0.66780899129729, - "grad_norm": 1.2647587201552477, - "learning_rate": 1.0499851756248168e-06, - "loss": 0.913, - "step": 7405 - }, - { - "epoch": 0.6678991748207602, - "grad_norm": 1.3865616400464331, - "learning_rate": 1.0494711214454316e-06, - "loss": 0.9212, - "step": 7406 - }, - { - "epoch": 0.6679893583442305, - "grad_norm": 1.3666478427117827, - "learning_rate": 1.0489571483704111e-06, - "loss": 0.8535, - "step": 7407 - }, - { - "epoch": 0.6680795418677008, - "grad_norm": 1.570904824057638, - "learning_rate": 1.048443256443612e-06, - "loss": 0.8615, - "step": 7408 - }, - { - "epoch": 0.668169725391171, - "grad_norm": 1.7227655838851212, - "learning_rate": 1.0479294457088801e-06, - "loss": 0.9638, - "step": 7409 - }, - { - "epoch": 0.6682599089146413, - "grad_norm": 1.3908593483958986, - "learning_rate": 1.0474157162100574e-06, - "loss": 1.0565, - "step": 7410 - }, - { - "epoch": 0.6683500924381116, - "grad_norm": 1.4210694095483691, - "learning_rate": 1.0469020679909786e-06, - "loss": 0.8756, - "step": 7411 - }, - { - "epoch": 0.6684402759615818, - "grad_norm": 2.3558273422044484, - "learning_rate": 1.0463885010954705e-06, - "loss": 0.8614, - "step": 7412 - }, - { - "epoch": 0.6685304594850521, - "grad_norm": 1.618499076571951, - "learning_rate": 1.0458750155673536e-06, - "loss": 1.0089, - "step": 7413 - }, - { - "epoch": 0.6686206430085223, - "grad_norm": 1.690990934414196, - "learning_rate": 1.0453616114504421e-06, - "loss": 0.8975, - "step": 7414 - }, - { - "epoch": 0.6687108265319927, - "grad_norm": 1.416260502129021, - "learning_rate": 1.0448482887885406e-06, - "loss": 0.9704, - "step": 7415 - }, - { - "epoch": 0.6688010100554629, - "grad_norm": 1.46164918588228, - "learning_rate": 1.044335047625451e-06, - "loss": 0.9466, - "step": 7416 - }, - { - "epoch": 0.6688911935789331, - "grad_norm": 1.4988084670013726, - "learning_rate": 1.0438218880049637e-06, - "loss": 0.9481, - "step": 7417 - }, - { - "epoch": 0.6689813771024034, - "grad_norm": 1.6409088615805418, - "learning_rate": 1.0433088099708653e-06, - "loss": 1.0005, - "step": 7418 - }, - { - "epoch": 0.6690715606258737, - "grad_norm": 0.7269863000153467, - "learning_rate": 1.0427958135669346e-06, - "loss": 0.7523, - "step": 7419 - }, - { - "epoch": 0.6691617441493439, - "grad_norm": 3.5281550254929464, - "learning_rate": 1.0422828988369428e-06, - "loss": 0.8771, - "step": 7420 - }, - { - "epoch": 0.6692519276728142, - "grad_norm": 1.472843148187616, - "learning_rate": 1.041770065824655e-06, - "loss": 1.0066, - "step": 7421 - }, - { - "epoch": 0.6693421111962844, - "grad_norm": 1.7576248407859614, - "learning_rate": 1.0412573145738287e-06, - "loss": 0.7969, - "step": 7422 - }, - { - "epoch": 0.6694322947197547, - "grad_norm": 1.3955881510754453, - "learning_rate": 1.040744645128216e-06, - "loss": 0.913, - "step": 7423 - }, - { - "epoch": 0.669522478243225, - "grad_norm": 1.4996202306014188, - "learning_rate": 1.040232057531558e-06, - "loss": 1.0033, - "step": 7424 - }, - { - "epoch": 0.6696126617666952, - "grad_norm": 4.151396247656168, - "learning_rate": 1.0397195518275932e-06, - "loss": 0.9109, - "step": 7425 - }, - { - "epoch": 0.6697028452901654, - "grad_norm": 1.357464072031291, - "learning_rate": 1.0392071280600512e-06, - "loss": 0.9134, - "step": 7426 - }, - { - "epoch": 0.6697930288136358, - "grad_norm": 1.4494006924410292, - "learning_rate": 1.0386947862726549e-06, - "loss": 0.9445, - "step": 7427 - }, - { - "epoch": 0.669883212337106, - "grad_norm": 2.044958134341561, - "learning_rate": 1.0381825265091197e-06, - "loss": 0.8619, - "step": 7428 - }, - { - "epoch": 0.6699733958605762, - "grad_norm": 1.8214700317472545, - "learning_rate": 1.037670348813155e-06, - "loss": 0.8648, - "step": 7429 - }, - { - "epoch": 0.6700635793840465, - "grad_norm": 1.6338764721368604, - "learning_rate": 1.0371582532284624e-06, - "loss": 0.9674, - "step": 7430 - }, - { - "epoch": 0.6701537629075168, - "grad_norm": 1.4161164767426104, - "learning_rate": 1.0366462397987375e-06, - "loss": 0.8764, - "step": 7431 - }, - { - "epoch": 0.6702439464309871, - "grad_norm": 1.6987110300204633, - "learning_rate": 1.0361343085676665e-06, - "loss": 0.9393, - "step": 7432 - }, - { - "epoch": 0.6703341299544573, - "grad_norm": 1.3293800258496633, - "learning_rate": 1.0356224595789309e-06, - "loss": 1.0553, - "step": 7433 - }, - { - "epoch": 0.6704243134779276, - "grad_norm": 1.3847949370602943, - "learning_rate": 1.0351106928762046e-06, - "loss": 0.9992, - "step": 7434 - }, - { - "epoch": 0.6705144970013979, - "grad_norm": 2.199869875961051, - "learning_rate": 1.034599008503154e-06, - "loss": 1.0147, - "step": 7435 - }, - { - "epoch": 0.6706046805248681, - "grad_norm": 1.2858468377192642, - "learning_rate": 1.0340874065034406e-06, - "loss": 0.9319, - "step": 7436 - }, - { - "epoch": 0.6706948640483383, - "grad_norm": 1.4919477340042333, - "learning_rate": 1.0335758869207137e-06, - "loss": 0.8665, - "step": 7437 - }, - { - "epoch": 0.6707850475718087, - "grad_norm": 3.4282566987631986, - "learning_rate": 1.0330644497986227e-06, - "loss": 0.9563, - "step": 7438 - }, - { - "epoch": 0.6708752310952789, - "grad_norm": 1.3600531892332888, - "learning_rate": 1.0325530951808029e-06, - "loss": 0.9829, - "step": 7439 - }, - { - "epoch": 0.6709654146187491, - "grad_norm": 1.5075019951549378, - "learning_rate": 1.0320418231108887e-06, - "loss": 0.9549, - "step": 7440 - }, - { - "epoch": 0.6710555981422194, - "grad_norm": 1.5580786138149103, - "learning_rate": 1.0315306336325028e-06, - "loss": 0.9836, - "step": 7441 - }, - { - "epoch": 0.6711457816656897, - "grad_norm": 1.5177298805035564, - "learning_rate": 1.0310195267892635e-06, - "loss": 0.9025, - "step": 7442 - }, - { - "epoch": 0.67123596518916, - "grad_norm": 1.7132478904694435, - "learning_rate": 1.030508502624781e-06, - "loss": 0.8863, - "step": 7443 - }, - { - "epoch": 0.6713261487126302, - "grad_norm": 1.793097464402229, - "learning_rate": 1.0299975611826587e-06, - "loss": 0.899, - "step": 7444 - }, - { - "epoch": 0.6714163322361004, - "grad_norm": 1.3522765638211047, - "learning_rate": 1.0294867025064928e-06, - "loss": 0.9515, - "step": 7445 - }, - { - "epoch": 0.6715065157595708, - "grad_norm": 1.3225957326005973, - "learning_rate": 1.028975926639874e-06, - "loss": 0.9505, - "step": 7446 - }, - { - "epoch": 0.671596699283041, - "grad_norm": 2.0741756456282183, - "learning_rate": 1.0284652336263823e-06, - "loss": 0.8633, - "step": 7447 - }, - { - "epoch": 0.6716868828065112, - "grad_norm": 3.803369940186795, - "learning_rate": 1.0279546235095938e-06, - "loss": 0.8743, - "step": 7448 - }, - { - "epoch": 0.6717770663299815, - "grad_norm": 0.7532893606942893, - "learning_rate": 1.0274440963330768e-06, - "loss": 0.8344, - "step": 7449 - }, - { - "epoch": 0.6718672498534518, - "grad_norm": 1.3544543865405032, - "learning_rate": 1.0269336521403919e-06, - "loss": 0.9425, - "step": 7450 - }, - { - "epoch": 0.671957433376922, - "grad_norm": 1.4635685367312303, - "learning_rate": 1.0264232909750936e-06, - "loss": 1.0694, - "step": 7451 - }, - { - "epoch": 0.6720476169003923, - "grad_norm": 1.2529068050539807, - "learning_rate": 1.025913012880728e-06, - "loss": 1.0003, - "step": 7452 - }, - { - "epoch": 0.6721378004238625, - "grad_norm": 1.324923167610179, - "learning_rate": 1.0254028179008362e-06, - "loss": 0.8515, - "step": 7453 - }, - { - "epoch": 0.6722279839473329, - "grad_norm": 1.3245274006827121, - "learning_rate": 1.0248927060789483e-06, - "loss": 0.8399, - "step": 7454 - }, - { - "epoch": 0.6723181674708031, - "grad_norm": 1.3950779374887217, - "learning_rate": 1.0243826774585928e-06, - "loss": 0.8922, - "step": 7455 - }, - { - "epoch": 0.6724083509942733, - "grad_norm": 1.6236785504109787, - "learning_rate": 1.0238727320832854e-06, - "loss": 0.9333, - "step": 7456 - }, - { - "epoch": 0.6724985345177437, - "grad_norm": 1.600615281772269, - "learning_rate": 1.0233628699965403e-06, - "loss": 0.9426, - "step": 7457 - }, - { - "epoch": 0.6725887180412139, - "grad_norm": 1.3485483948337258, - "learning_rate": 1.0228530912418594e-06, - "loss": 0.9633, - "step": 7458 - }, - { - "epoch": 0.6726789015646841, - "grad_norm": 1.4934507285675873, - "learning_rate": 1.0223433958627404e-06, - "loss": 0.9117, - "step": 7459 - }, - { - "epoch": 0.6727690850881544, - "grad_norm": 1.6035472202324628, - "learning_rate": 1.021833783902674e-06, - "loss": 0.9699, - "step": 7460 - }, - { - "epoch": 0.6728592686116247, - "grad_norm": 1.358322130441786, - "learning_rate": 1.0213242554051427e-06, - "loss": 0.9453, - "step": 7461 - }, - { - "epoch": 0.6729494521350949, - "grad_norm": 1.4377773680744508, - "learning_rate": 1.0208148104136229e-06, - "loss": 0.8827, - "step": 7462 - }, - { - "epoch": 0.6730396356585652, - "grad_norm": 3.255697684572399, - "learning_rate": 1.020305448971582e-06, - "loss": 0.9109, - "step": 7463 - }, - { - "epoch": 0.6731298191820354, - "grad_norm": 1.281588207360041, - "learning_rate": 1.0197961711224824e-06, - "loss": 0.9032, - "step": 7464 - }, - { - "epoch": 0.6732200027055057, - "grad_norm": 1.4881896944494843, - "learning_rate": 1.0192869769097777e-06, - "loss": 0.9557, - "step": 7465 - }, - { - "epoch": 0.673310186228976, - "grad_norm": 1.6415731687433286, - "learning_rate": 1.018777866376916e-06, - "loss": 0.9551, - "step": 7466 - }, - { - "epoch": 0.6734003697524462, - "grad_norm": 1.6190522725441785, - "learning_rate": 1.0182688395673374e-06, - "loss": 0.8174, - "step": 7467 - }, - { - "epoch": 0.6734905532759164, - "grad_norm": 1.3222272182729953, - "learning_rate": 1.017759896524475e-06, - "loss": 0.9074, - "step": 7468 - }, - { - "epoch": 0.6735807367993868, - "grad_norm": 1.7814901758865793, - "learning_rate": 1.0172510372917528e-06, - "loss": 0.9745, - "step": 7469 - }, - { - "epoch": 0.673670920322857, - "grad_norm": 2.0172114080523667, - "learning_rate": 1.0167422619125925e-06, - "loss": 0.8394, - "step": 7470 - }, - { - "epoch": 0.6737611038463273, - "grad_norm": 1.4747171324711166, - "learning_rate": 1.0162335704304026e-06, - "loss": 0.9549, - "step": 7471 - }, - { - "epoch": 0.6738512873697975, - "grad_norm": 0.7234009076415199, - "learning_rate": 1.0157249628885903e-06, - "loss": 0.8019, - "step": 7472 - }, - { - "epoch": 0.6739414708932678, - "grad_norm": 1.218809615413319, - "learning_rate": 1.0152164393305506e-06, - "loss": 0.8563, - "step": 7473 - }, - { - "epoch": 0.6740316544167381, - "grad_norm": 1.674068473732626, - "learning_rate": 1.0147079997996746e-06, - "loss": 0.9323, - "step": 7474 - }, - { - "epoch": 0.6741218379402083, - "grad_norm": 1.6388195457803132, - "learning_rate": 1.0141996443393446e-06, - "loss": 0.9681, - "step": 7475 - }, - { - "epoch": 0.6742120214636785, - "grad_norm": 1.4843749196905818, - "learning_rate": 1.0136913729929369e-06, - "loss": 0.9317, - "step": 7476 - }, - { - "epoch": 0.6743022049871489, - "grad_norm": 1.4445724654726426, - "learning_rate": 1.0131831858038203e-06, - "loss": 0.9082, - "step": 7477 - }, - { - "epoch": 0.6743923885106191, - "grad_norm": 1.3577349185179752, - "learning_rate": 1.0126750828153538e-06, - "loss": 1.0008, - "step": 7478 - }, - { - "epoch": 0.6744825720340893, - "grad_norm": 1.5928050212897833, - "learning_rate": 1.012167064070895e-06, - "loss": 0.9818, - "step": 7479 - }, - { - "epoch": 0.6745727555575597, - "grad_norm": 1.4094556510500487, - "learning_rate": 1.0116591296137885e-06, - "loss": 0.9667, - "step": 7480 - }, - { - "epoch": 0.6746629390810299, - "grad_norm": 1.3524938913232065, - "learning_rate": 1.0111512794873746e-06, - "loss": 0.9682, - "step": 7481 - }, - { - "epoch": 0.6747531226045002, - "grad_norm": 1.9637385572346548, - "learning_rate": 1.010643513734986e-06, - "loss": 0.9404, - "step": 7482 - }, - { - "epoch": 0.6748433061279704, - "grad_norm": 1.4260512840925674, - "learning_rate": 1.010135832399948e-06, - "loss": 0.9823, - "step": 7483 - }, - { - "epoch": 0.6749334896514407, - "grad_norm": 1.150574337550007, - "learning_rate": 1.0096282355255792e-06, - "loss": 0.9451, - "step": 7484 - }, - { - "epoch": 0.675023673174911, - "grad_norm": 1.668350426201745, - "learning_rate": 1.0091207231551905e-06, - "loss": 0.9976, - "step": 7485 - }, - { - "epoch": 0.6751138566983812, - "grad_norm": 1.3240574651302344, - "learning_rate": 1.0086132953320842e-06, - "loss": 0.902, - "step": 7486 - }, - { - "epoch": 0.6752040402218514, - "grad_norm": 1.753215084379363, - "learning_rate": 1.0081059520995591e-06, - "loss": 0.9677, - "step": 7487 - }, - { - "epoch": 0.6752942237453218, - "grad_norm": 2.125092560491311, - "learning_rate": 1.0075986935009028e-06, - "loss": 1.0013, - "step": 7488 - }, - { - "epoch": 0.675384407268792, - "grad_norm": 1.4189004490606387, - "learning_rate": 1.0070915195793982e-06, - "loss": 0.9484, - "step": 7489 - }, - { - "epoch": 0.6754745907922622, - "grad_norm": 1.7568060133942278, - "learning_rate": 1.0065844303783197e-06, - "loss": 0.9215, - "step": 7490 - }, - { - "epoch": 0.6755647743157325, - "grad_norm": 1.274558434036901, - "learning_rate": 1.0060774259409356e-06, - "loss": 0.8627, - "step": 7491 - }, - { - "epoch": 0.6756549578392028, - "grad_norm": 1.2765462176289575, - "learning_rate": 1.0055705063105065e-06, - "loss": 0.9754, - "step": 7492 - }, - { - "epoch": 0.675745141362673, - "grad_norm": 1.4327819355041544, - "learning_rate": 1.0050636715302837e-06, - "loss": 0.9368, - "step": 7493 - }, - { - "epoch": 0.6758353248861433, - "grad_norm": 1.4831011476137912, - "learning_rate": 1.0045569216435157e-06, - "loss": 0.9551, - "step": 7494 - }, - { - "epoch": 0.6759255084096135, - "grad_norm": 1.2991030294571384, - "learning_rate": 1.0040502566934384e-06, - "loss": 1.026, - "step": 7495 - }, - { - "epoch": 0.6760156919330839, - "grad_norm": 1.9045449432774604, - "learning_rate": 1.0035436767232866e-06, - "loss": 0.8805, - "step": 7496 - }, - { - "epoch": 0.6761058754565541, - "grad_norm": 1.3813557683572337, - "learning_rate": 1.0030371817762816e-06, - "loss": 0.957, - "step": 7497 - }, - { - "epoch": 0.6761960589800243, - "grad_norm": 1.393936286759149, - "learning_rate": 1.0025307718956417e-06, - "loss": 0.9494, - "step": 7498 - }, - { - "epoch": 0.6762862425034946, - "grad_norm": 1.3067250131860413, - "learning_rate": 1.0020244471245765e-06, - "loss": 0.9622, - "step": 7499 - }, - { - "epoch": 0.6763764260269649, - "grad_norm": 1.5638412059894888, - "learning_rate": 1.001518207506288e-06, - "loss": 0.8924, - "step": 7500 - }, - { - "epoch": 0.6764666095504351, - "grad_norm": 1.4530167898270017, - "learning_rate": 1.0010120530839717e-06, - "loss": 1.0005, - "step": 7501 - }, - { - "epoch": 0.6765567930739054, - "grad_norm": 1.1348972474022725, - "learning_rate": 1.0005059839008161e-06, - "loss": 0.9633, - "step": 7502 - }, - { - "epoch": 0.6766469765973756, - "grad_norm": 1.5495919921146202, - "learning_rate": 1.0000000000000004e-06, - "loss": 0.8922, - "step": 7503 - }, - { - "epoch": 0.676737160120846, - "grad_norm": 1.4569641588734086, - "learning_rate": 9.994941014246985e-07, - "loss": 0.9916, - "step": 7504 - }, - { - "epoch": 0.6768273436443162, - "grad_norm": 1.2626256842419235, - "learning_rate": 9.989882882180766e-07, - "loss": 0.834, - "step": 7505 - }, - { - "epoch": 0.6769175271677864, - "grad_norm": 1.5465825893032317, - "learning_rate": 9.984825604232938e-07, - "loss": 0.9573, - "step": 7506 - }, - { - "epoch": 0.6770077106912568, - "grad_norm": 1.3637287531245699, - "learning_rate": 9.97976918083502e-07, - "loss": 0.8818, - "step": 7507 - }, - { - "epoch": 0.677097894214727, - "grad_norm": 1.3119891852944947, - "learning_rate": 9.974713612418427e-07, - "loss": 1.0144, - "step": 7508 - }, - { - "epoch": 0.6771880777381972, - "grad_norm": 1.3671316080930342, - "learning_rate": 9.969658899414563e-07, - "loss": 0.9512, - "step": 7509 - }, - { - "epoch": 0.6772782612616675, - "grad_norm": 1.516545438986898, - "learning_rate": 9.964605042254696e-07, - "loss": 0.8627, - "step": 7510 - }, - { - "epoch": 0.6773684447851378, - "grad_norm": 1.5082517784509049, - "learning_rate": 9.959552041370076e-07, - "loss": 0.8809, - "step": 7511 - }, - { - "epoch": 0.677458628308608, - "grad_norm": 1.3652199674836611, - "learning_rate": 9.954499897191824e-07, - "loss": 0.8586, - "step": 7512 - }, - { - "epoch": 0.6775488118320783, - "grad_norm": 1.8396380202163183, - "learning_rate": 9.949448610151043e-07, - "loss": 0.9051, - "step": 7513 - }, - { - "epoch": 0.6776389953555485, - "grad_norm": 1.3192325667313218, - "learning_rate": 9.944398180678719e-07, - "loss": 0.921, - "step": 7514 - }, - { - "epoch": 0.6777291788790188, - "grad_norm": 1.467199765480131, - "learning_rate": 9.939348609205789e-07, - "loss": 0.8994, - "step": 7515 - }, - { - "epoch": 0.6778193624024891, - "grad_norm": 1.3212157551235184, - "learning_rate": 9.93429989616311e-07, - "loss": 0.9474, - "step": 7516 - }, - { - "epoch": 0.6779095459259593, - "grad_norm": 1.5997610986690924, - "learning_rate": 9.929252041981464e-07, - "loss": 0.8664, - "step": 7517 - }, - { - "epoch": 0.6779997294494295, - "grad_norm": 1.3987395743492017, - "learning_rate": 9.924205047091572e-07, - "loss": 0.9105, - "step": 7518 - }, - { - "epoch": 0.6780899129728999, - "grad_norm": 1.4685905857013535, - "learning_rate": 9.919158911924056e-07, - "loss": 1.062, - "step": 7519 - }, - { - "epoch": 0.6781800964963701, - "grad_norm": 1.3209420232648004, - "learning_rate": 9.914113636909483e-07, - "loss": 0.9735, - "step": 7520 - }, - { - "epoch": 0.6782702800198404, - "grad_norm": 0.7987581329648025, - "learning_rate": 9.90906922247835e-07, - "loss": 0.8822, - "step": 7521 - }, - { - "epoch": 0.6783604635433106, - "grad_norm": 1.2036654757001957, - "learning_rate": 9.904025669061072e-07, - "loss": 0.9752, - "step": 7522 - }, - { - "epoch": 0.6784506470667809, - "grad_norm": 1.1917901093007424, - "learning_rate": 9.89898297708799e-07, - "loss": 0.9349, - "step": 7523 - }, - { - "epoch": 0.6785408305902512, - "grad_norm": 1.6393311257695902, - "learning_rate": 9.893941146989388e-07, - "loss": 0.9014, - "step": 7524 - }, - { - "epoch": 0.6786310141137214, - "grad_norm": 1.5223574991671258, - "learning_rate": 9.888900179195437e-07, - "loss": 0.8977, - "step": 7525 - }, - { - "epoch": 0.6787211976371916, - "grad_norm": 1.4817956297754529, - "learning_rate": 9.883860074136285e-07, - "loss": 1.0177, - "step": 7526 - }, - { - "epoch": 0.678811381160662, - "grad_norm": 2.4603219367473406, - "learning_rate": 9.87882083224196e-07, - "loss": 0.9395, - "step": 7527 - }, - { - "epoch": 0.6789015646841322, - "grad_norm": 0.7378128018796659, - "learning_rate": 9.873782453942462e-07, - "loss": 0.8354, - "step": 7528 - }, - { - "epoch": 0.6789917482076024, - "grad_norm": 1.6082552375346448, - "learning_rate": 9.868744939667676e-07, - "loss": 0.8755, - "step": 7529 - }, - { - "epoch": 0.6790819317310728, - "grad_norm": 1.4147411425081182, - "learning_rate": 9.863708289847432e-07, - "loss": 0.8224, - "step": 7530 - }, - { - "epoch": 0.679172115254543, - "grad_norm": 1.6600379811962984, - "learning_rate": 9.85867250491149e-07, - "loss": 0.8472, - "step": 7531 - }, - { - "epoch": 0.6792622987780133, - "grad_norm": 1.4383931080732628, - "learning_rate": 9.853637585289528e-07, - "loss": 1.049, - "step": 7532 - }, - { - "epoch": 0.6793524823014835, - "grad_norm": 2.6896166782887763, - "learning_rate": 9.848603531411159e-07, - "loss": 0.9405, - "step": 7533 - }, - { - "epoch": 0.6794426658249538, - "grad_norm": 1.6944333095219417, - "learning_rate": 9.843570343705899e-07, - "loss": 0.9531, - "step": 7534 - }, - { - "epoch": 0.6795328493484241, - "grad_norm": 1.194178573210075, - "learning_rate": 9.83853802260323e-07, - "loss": 1.0045, - "step": 7535 - }, - { - "epoch": 0.6796230328718943, - "grad_norm": 1.507319948062463, - "learning_rate": 9.833506568532524e-07, - "loss": 0.8883, - "step": 7536 - }, - { - "epoch": 0.6797132163953645, - "grad_norm": 1.3460351011282128, - "learning_rate": 9.828475981923093e-07, - "loss": 0.7916, - "step": 7537 - }, - { - "epoch": 0.6798033999188349, - "grad_norm": 1.765285222941313, - "learning_rate": 9.823446263204175e-07, - "loss": 0.9213, - "step": 7538 - }, - { - "epoch": 0.6798935834423051, - "grad_norm": 1.6528095283272424, - "learning_rate": 9.818417412804937e-07, - "loss": 0.9451, - "step": 7539 - }, - { - "epoch": 0.6799837669657753, - "grad_norm": 1.4738990942527452, - "learning_rate": 9.813389431154463e-07, - "loss": 1.022, - "step": 7540 - }, - { - "epoch": 0.6800739504892456, - "grad_norm": 1.883143289086642, - "learning_rate": 9.808362318681783e-07, - "loss": 0.9695, - "step": 7541 - }, - { - "epoch": 0.6801641340127159, - "grad_norm": 1.6509693823874538, - "learning_rate": 9.803336075815807e-07, - "loss": 0.956, - "step": 7542 - }, - { - "epoch": 0.6802543175361861, - "grad_norm": 1.4855446473675942, - "learning_rate": 9.79831070298544e-07, - "loss": 0.8695, - "step": 7543 - }, - { - "epoch": 0.6803445010596564, - "grad_norm": 1.5286984106983241, - "learning_rate": 9.793286200619443e-07, - "loss": 0.9147, - "step": 7544 - }, - { - "epoch": 0.6804346845831266, - "grad_norm": 1.285175682778967, - "learning_rate": 9.78826256914655e-07, - "loss": 0.9942, - "step": 7545 - }, - { - "epoch": 0.680524868106597, - "grad_norm": 1.730270133234407, - "learning_rate": 9.7832398089954e-07, - "loss": 0.9886, - "step": 7546 - }, - { - "epoch": 0.6806150516300672, - "grad_norm": 1.673693799534679, - "learning_rate": 9.778217920594565e-07, - "loss": 1.0442, - "step": 7547 - }, - { - "epoch": 0.6807052351535374, - "grad_norm": 1.4780544433529026, - "learning_rate": 9.773196904372547e-07, - "loss": 0.9856, - "step": 7548 - }, - { - "epoch": 0.6807954186770077, - "grad_norm": 1.6114102429359254, - "learning_rate": 9.768176760757742e-07, - "loss": 1.0328, - "step": 7549 - }, - { - "epoch": 0.680885602200478, - "grad_norm": 1.2853063246460157, - "learning_rate": 9.76315749017853e-07, - "loss": 0.9197, - "step": 7550 - }, - { - "epoch": 0.6809757857239482, - "grad_norm": 1.8215256606333003, - "learning_rate": 9.758139093063161e-07, - "loss": 0.948, - "step": 7551 - }, - { - "epoch": 0.6810659692474185, - "grad_norm": 1.2635149850987006, - "learning_rate": 9.753121569839834e-07, - "loss": 1.0091, - "step": 7552 - }, - { - "epoch": 0.6811561527708888, - "grad_norm": 0.8028950774004492, - "learning_rate": 9.748104920936678e-07, - "loss": 0.7746, - "step": 7553 - }, - { - "epoch": 0.681246336294359, - "grad_norm": 1.4144279050014021, - "learning_rate": 9.743089146781738e-07, - "loss": 0.9224, - "step": 7554 - }, - { - "epoch": 0.6813365198178293, - "grad_norm": 1.3709291103881096, - "learning_rate": 9.738074247802988e-07, - "loss": 0.8744, - "step": 7555 - }, - { - "epoch": 0.6814267033412995, - "grad_norm": 1.526877251528541, - "learning_rate": 9.733060224428325e-07, - "loss": 0.9622, - "step": 7556 - }, - { - "epoch": 0.6815168868647699, - "grad_norm": 1.557085268085609, - "learning_rate": 9.728047077085577e-07, - "loss": 0.9531, - "step": 7557 - }, - { - "epoch": 0.6816070703882401, - "grad_norm": 1.7328135704539427, - "learning_rate": 9.723034806202497e-07, - "loss": 0.8409, - "step": 7558 - }, - { - "epoch": 0.6816972539117103, - "grad_norm": 1.5675617626865732, - "learning_rate": 9.718023412206748e-07, - "loss": 0.9555, - "step": 7559 - }, - { - "epoch": 0.6817874374351806, - "grad_norm": 1.300382031646964, - "learning_rate": 9.713012895525935e-07, - "loss": 0.9639, - "step": 7560 - }, - { - "epoch": 0.6818776209586509, - "grad_norm": 1.4531996512977776, - "learning_rate": 9.708003256587584e-07, - "loss": 0.9261, - "step": 7561 - }, - { - "epoch": 0.6819678044821211, - "grad_norm": 1.358892684084553, - "learning_rate": 9.702994495819147e-07, - "loss": 0.9903, - "step": 7562 - }, - { - "epoch": 0.6820579880055914, - "grad_norm": 1.5287057408860665, - "learning_rate": 9.697986613647999e-07, - "loss": 0.8473, - "step": 7563 - }, - { - "epoch": 0.6821481715290616, - "grad_norm": 0.7331240937080212, - "learning_rate": 9.692979610501425e-07, - "loss": 0.8366, - "step": 7564 - }, - { - "epoch": 0.6822383550525319, - "grad_norm": 1.3931710584650154, - "learning_rate": 9.68797348680668e-07, - "loss": 1.0157, - "step": 7565 - }, - { - "epoch": 0.6823285385760022, - "grad_norm": 1.283469069515013, - "learning_rate": 9.682968242990878e-07, - "loss": 0.9055, - "step": 7566 - }, - { - "epoch": 0.6824187220994724, - "grad_norm": 1.2771043932341462, - "learning_rate": 9.677963879481132e-07, - "loss": 0.9936, - "step": 7567 - }, - { - "epoch": 0.6825089056229426, - "grad_norm": 1.6495700940805862, - "learning_rate": 9.672960396704416e-07, - "loss": 0.9258, - "step": 7568 - }, - { - "epoch": 0.682599089146413, - "grad_norm": 1.4115415832442635, - "learning_rate": 9.667957795087657e-07, - "loss": 0.9416, - "step": 7569 - }, - { - "epoch": 0.6826892726698832, - "grad_norm": 1.7004370520274237, - "learning_rate": 9.662956075057712e-07, - "loss": 1.0028, - "step": 7570 - }, - { - "epoch": 0.6827794561933535, - "grad_norm": 1.9988367751073943, - "learning_rate": 9.657955237041354e-07, - "loss": 1.0027, - "step": 7571 - }, - { - "epoch": 0.6828696397168237, - "grad_norm": 1.29529558085584, - "learning_rate": 9.652955281465278e-07, - "loss": 0.923, - "step": 7572 - }, - { - "epoch": 0.682959823240294, - "grad_norm": 1.5577533339549083, - "learning_rate": 9.64795620875612e-07, - "loss": 0.9489, - "step": 7573 - }, - { - "epoch": 0.6830500067637643, - "grad_norm": 1.3834965194062794, - "learning_rate": 9.64295801934041e-07, - "loss": 0.8866, - "step": 7574 - }, - { - "epoch": 0.6831401902872345, - "grad_norm": 0.6919897171480301, - "learning_rate": 9.63796071364463e-07, - "loss": 0.8061, - "step": 7575 - }, - { - "epoch": 0.6832303738107048, - "grad_norm": 1.4871785241241102, - "learning_rate": 9.632964292095179e-07, - "loss": 1.0291, - "step": 7576 - }, - { - "epoch": 0.6833205573341751, - "grad_norm": 1.2215842035121633, - "learning_rate": 9.627968755118374e-07, - "loss": 0.9287, - "step": 7577 - }, - { - "epoch": 0.6834107408576453, - "grad_norm": 0.9756795019240185, - "learning_rate": 9.622974103140468e-07, - "loss": 0.824, - "step": 7578 - }, - { - "epoch": 0.6835009243811155, - "grad_norm": 1.374886768187, - "learning_rate": 9.617980336587632e-07, - "loss": 1.0026, - "step": 7579 - }, - { - "epoch": 0.6835911079045859, - "grad_norm": 1.517804888619213, - "learning_rate": 9.612987455885964e-07, - "loss": 0.8214, - "step": 7580 - }, - { - "epoch": 0.6836812914280561, - "grad_norm": 1.4423863663368117, - "learning_rate": 9.607995461461467e-07, - "loss": 0.9423, - "step": 7581 - }, - { - "epoch": 0.6837714749515263, - "grad_norm": 1.3705884214182984, - "learning_rate": 9.603004353740111e-07, - "loss": 0.84, - "step": 7582 - }, - { - "epoch": 0.6838616584749966, - "grad_norm": 0.709982261704773, - "learning_rate": 9.598014133147738e-07, - "loss": 0.8478, - "step": 7583 - }, - { - "epoch": 0.6839518419984669, - "grad_norm": 1.242494221372618, - "learning_rate": 9.59302480011017e-07, - "loss": 0.9107, - "step": 7584 - }, - { - "epoch": 0.6840420255219372, - "grad_norm": 0.7104654107100279, - "learning_rate": 9.588036355053102e-07, - "loss": 0.8125, - "step": 7585 - }, - { - "epoch": 0.6841322090454074, - "grad_norm": 1.8178164022796282, - "learning_rate": 9.583048798402182e-07, - "loss": 0.8121, - "step": 7586 - }, - { - "epoch": 0.6842223925688776, - "grad_norm": 1.5011366670059676, - "learning_rate": 9.57806213058298e-07, - "loss": 0.9447, - "step": 7587 - }, - { - "epoch": 0.684312576092348, - "grad_norm": 0.7471334193883193, - "learning_rate": 9.57307635202098e-07, - "loss": 0.8433, - "step": 7588 - }, - { - "epoch": 0.6844027596158182, - "grad_norm": 1.367640000801043, - "learning_rate": 9.568091463141607e-07, - "loss": 1.0458, - "step": 7589 - }, - { - "epoch": 0.6844929431392884, - "grad_norm": 1.24694608519186, - "learning_rate": 9.563107464370187e-07, - "loss": 0.9146, - "step": 7590 - }, - { - "epoch": 0.6845831266627587, - "grad_norm": 1.641394516450394, - "learning_rate": 9.558124356131982e-07, - "loss": 0.9637, - "step": 7591 - }, - { - "epoch": 0.684673310186229, - "grad_norm": 1.7506140585260874, - "learning_rate": 9.553142138852187e-07, - "loss": 0.8783, - "step": 7592 - }, - { - "epoch": 0.6847634937096992, - "grad_norm": 1.7883903873416482, - "learning_rate": 9.548160812955905e-07, - "loss": 0.8538, - "step": 7593 - }, - { - "epoch": 0.6848536772331695, - "grad_norm": 1.4118486641246735, - "learning_rate": 9.543180378868175e-07, - "loss": 0.7686, - "step": 7594 - }, - { - "epoch": 0.6849438607566397, - "grad_norm": 1.6063892853309254, - "learning_rate": 9.538200837013962e-07, - "loss": 0.9752, - "step": 7595 - }, - { - "epoch": 0.68503404428011, - "grad_norm": 0.7363303915540381, - "learning_rate": 9.533222187818122e-07, - "loss": 0.8541, - "step": 7596 - }, - { - "epoch": 0.6851242278035803, - "grad_norm": 1.480042972585206, - "learning_rate": 9.528244431705492e-07, - "loss": 0.9519, - "step": 7597 - }, - { - "epoch": 0.6852144113270505, - "grad_norm": 1.3250439078775256, - "learning_rate": 9.523267569100774e-07, - "loss": 0.9516, - "step": 7598 - }, - { - "epoch": 0.6853045948505209, - "grad_norm": 1.4278601135744242, - "learning_rate": 9.518291600428652e-07, - "loss": 0.9041, - "step": 7599 - }, - { - "epoch": 0.6853947783739911, - "grad_norm": 1.644461514768122, - "learning_rate": 9.513316526113677e-07, - "loss": 0.9631, - "step": 7600 - }, - { - "epoch": 0.6854849618974613, - "grad_norm": 1.3744897762735693, - "learning_rate": 9.50834234658036e-07, - "loss": 0.9678, - "step": 7601 - }, - { - "epoch": 0.6855751454209316, - "grad_norm": 1.3797346441869722, - "learning_rate": 9.503369062253123e-07, - "loss": 0.9522, - "step": 7602 - }, - { - "epoch": 0.6856653289444019, - "grad_norm": 0.7547748286107914, - "learning_rate": 9.498396673556317e-07, - "loss": 0.8116, - "step": 7603 - }, - { - "epoch": 0.6857555124678721, - "grad_norm": 1.368982455576569, - "learning_rate": 9.493425180914219e-07, - "loss": 0.9642, - "step": 7604 - }, - { - "epoch": 0.6858456959913424, - "grad_norm": 1.4507624791851188, - "learning_rate": 9.488454584751e-07, - "loss": 0.8727, - "step": 7605 - }, - { - "epoch": 0.6859358795148126, - "grad_norm": 1.408285871341766, - "learning_rate": 9.483484885490813e-07, - "loss": 0.9488, - "step": 7606 - }, - { - "epoch": 0.686026063038283, - "grad_norm": 1.5149536218877948, - "learning_rate": 9.478516083557675e-07, - "loss": 0.9617, - "step": 7607 - }, - { - "epoch": 0.6861162465617532, - "grad_norm": 1.7022716939381064, - "learning_rate": 9.473548179375561e-07, - "loss": 0.9606, - "step": 7608 - }, - { - "epoch": 0.6862064300852234, - "grad_norm": 0.8088270947178886, - "learning_rate": 9.468581173368358e-07, - "loss": 0.8277, - "step": 7609 - }, - { - "epoch": 0.6862966136086937, - "grad_norm": 1.3552480732366294, - "learning_rate": 9.463615065959878e-07, - "loss": 0.9518, - "step": 7610 - }, - { - "epoch": 0.686386797132164, - "grad_norm": 1.5738148514260544, - "learning_rate": 9.458649857573857e-07, - "loss": 0.9967, - "step": 7611 - }, - { - "epoch": 0.6864769806556342, - "grad_norm": 1.3510068511986824, - "learning_rate": 9.453685548633963e-07, - "loss": 0.9543, - "step": 7612 - }, - { - "epoch": 0.6865671641791045, - "grad_norm": 1.8180335845992601, - "learning_rate": 9.448722139563756e-07, - "loss": 0.8979, - "step": 7613 - }, - { - "epoch": 0.6866573477025747, - "grad_norm": 1.4884399970188056, - "learning_rate": 9.443759630786769e-07, - "loss": 0.8416, - "step": 7614 - }, - { - "epoch": 0.686747531226045, - "grad_norm": 1.5813303960935858, - "learning_rate": 9.438798022726408e-07, - "loss": 1.0067, - "step": 7615 - }, - { - "epoch": 0.6868377147495153, - "grad_norm": 1.4735487603649136, - "learning_rate": 9.433837315806037e-07, - "loss": 0.898, - "step": 7616 - }, - { - "epoch": 0.6869278982729855, - "grad_norm": 1.6238111401941282, - "learning_rate": 9.428877510448925e-07, - "loss": 0.8948, - "step": 7617 - }, - { - "epoch": 0.6870180817964557, - "grad_norm": 1.2557296567283296, - "learning_rate": 9.423918607078272e-07, - "loss": 0.8692, - "step": 7618 - }, - { - "epoch": 0.6871082653199261, - "grad_norm": 1.5366506510056488, - "learning_rate": 9.418960606117208e-07, - "loss": 0.9562, - "step": 7619 - }, - { - "epoch": 0.6871984488433963, - "grad_norm": 1.419620277245779, - "learning_rate": 9.414003507988752e-07, - "loss": 0.8875, - "step": 7620 - }, - { - "epoch": 0.6872886323668665, - "grad_norm": 1.359106190729795, - "learning_rate": 9.409047313115904e-07, - "loss": 0.8716, - "step": 7621 - }, - { - "epoch": 0.6873788158903368, - "grad_norm": 1.779926678508269, - "learning_rate": 9.404092021921521e-07, - "loss": 0.8838, - "step": 7622 - }, - { - "epoch": 0.6874689994138071, - "grad_norm": 2.167966330587051, - "learning_rate": 9.399137634828447e-07, - "loss": 0.9264, - "step": 7623 - }, - { - "epoch": 0.6875591829372774, - "grad_norm": 1.373324023040829, - "learning_rate": 9.394184152259396e-07, - "loss": 0.9636, - "step": 7624 - }, - { - "epoch": 0.6876493664607476, - "grad_norm": 1.4019740900537816, - "learning_rate": 9.389231574637033e-07, - "loss": 1.0423, - "step": 7625 - }, - { - "epoch": 0.6877395499842179, - "grad_norm": 1.4458845501420496, - "learning_rate": 9.384279902383938e-07, - "loss": 1.0328, - "step": 7626 - }, - { - "epoch": 0.6878297335076882, - "grad_norm": 1.30413605691659, - "learning_rate": 9.379329135922615e-07, - "loss": 0.9519, - "step": 7627 - }, - { - "epoch": 0.6879199170311584, - "grad_norm": 1.5745352528642245, - "learning_rate": 9.374379275675495e-07, - "loss": 0.9001, - "step": 7628 - }, - { - "epoch": 0.6880101005546286, - "grad_norm": 1.8220697524603195, - "learning_rate": 9.369430322064931e-07, - "loss": 0.9447, - "step": 7629 - }, - { - "epoch": 0.688100284078099, - "grad_norm": 1.5660826236954175, - "learning_rate": 9.364482275513179e-07, - "loss": 1.0225, - "step": 7630 - }, - { - "epoch": 0.6881904676015692, - "grad_norm": 1.683007512176122, - "learning_rate": 9.359535136442444e-07, - "loss": 1.0078, - "step": 7631 - }, - { - "epoch": 0.6882806511250394, - "grad_norm": 1.3858010707654544, - "learning_rate": 9.354588905274843e-07, - "loss": 0.8695, - "step": 7632 - }, - { - "epoch": 0.6883708346485097, - "grad_norm": 1.3976294884377203, - "learning_rate": 9.349643582432414e-07, - "loss": 0.9726, - "step": 7633 - }, - { - "epoch": 0.68846101817198, - "grad_norm": 1.349206580022673, - "learning_rate": 9.344699168337127e-07, - "loss": 0.9372, - "step": 7634 - }, - { - "epoch": 0.6885512016954503, - "grad_norm": 1.49265829119108, - "learning_rate": 9.339755663410845e-07, - "loss": 0.9051, - "step": 7635 - }, - { - "epoch": 0.6886413852189205, - "grad_norm": 1.5048058927707595, - "learning_rate": 9.334813068075405e-07, - "loss": 0.9675, - "step": 7636 - }, - { - "epoch": 0.6887315687423907, - "grad_norm": 1.3887493471406764, - "learning_rate": 9.329871382752506e-07, - "loss": 0.9492, - "step": 7637 - }, - { - "epoch": 0.6888217522658611, - "grad_norm": 1.5423307620894748, - "learning_rate": 9.32493060786383e-07, - "loss": 0.9291, - "step": 7638 - }, - { - "epoch": 0.6889119357893313, - "grad_norm": 1.239137996956761, - "learning_rate": 9.31999074383093e-07, - "loss": 1.0071, - "step": 7639 - }, - { - "epoch": 0.6890021193128015, - "grad_norm": 1.5862293444672184, - "learning_rate": 9.315051791075308e-07, - "loss": 0.9396, - "step": 7640 - }, - { - "epoch": 0.6890923028362718, - "grad_norm": 1.1643720893598246, - "learning_rate": 9.310113750018382e-07, - "loss": 0.9331, - "step": 7641 - }, - { - "epoch": 0.6891824863597421, - "grad_norm": 1.7389916500661047, - "learning_rate": 9.305176621081496e-07, - "loss": 1.0153, - "step": 7642 - }, - { - "epoch": 0.6892726698832123, - "grad_norm": 1.4590646091036608, - "learning_rate": 9.300240404685911e-07, - "loss": 0.9759, - "step": 7643 - }, - { - "epoch": 0.6893628534066826, - "grad_norm": 1.5805259796054103, - "learning_rate": 9.295305101252812e-07, - "loss": 0.9491, - "step": 7644 - }, - { - "epoch": 0.6894530369301528, - "grad_norm": 1.929738156043745, - "learning_rate": 9.290370711203314e-07, - "loss": 0.9241, - "step": 7645 - }, - { - "epoch": 0.6895432204536232, - "grad_norm": 1.5019778880842238, - "learning_rate": 9.285437234958433e-07, - "loss": 0.9406, - "step": 7646 - }, - { - "epoch": 0.6896334039770934, - "grad_norm": 1.5137102282187427, - "learning_rate": 9.280504672939124e-07, - "loss": 0.9341, - "step": 7647 - }, - { - "epoch": 0.6897235875005636, - "grad_norm": 1.3552946039293234, - "learning_rate": 9.275573025566266e-07, - "loss": 0.9764, - "step": 7648 - }, - { - "epoch": 0.689813771024034, - "grad_norm": 1.1985182813253545, - "learning_rate": 9.27064229326065e-07, - "loss": 0.9792, - "step": 7649 - }, - { - "epoch": 0.6899039545475042, - "grad_norm": 1.3765427864040054, - "learning_rate": 9.265712476442995e-07, - "loss": 0.9085, - "step": 7650 - }, - { - "epoch": 0.6899941380709744, - "grad_norm": 1.2788224367002048, - "learning_rate": 9.260783575533949e-07, - "loss": 0.8741, - "step": 7651 - }, - { - "epoch": 0.6900843215944447, - "grad_norm": 2.387228454006837, - "learning_rate": 9.255855590954045e-07, - "loss": 0.8996, - "step": 7652 - }, - { - "epoch": 0.690174505117915, - "grad_norm": 1.3812315357061564, - "learning_rate": 9.250928523123802e-07, - "loss": 1.0094, - "step": 7653 - }, - { - "epoch": 0.6902646886413852, - "grad_norm": 1.2898465170966353, - "learning_rate": 9.24600237246359e-07, - "loss": 0.9524, - "step": 7654 - }, - { - "epoch": 0.6903548721648555, - "grad_norm": 1.5956619612241436, - "learning_rate": 9.241077139393769e-07, - "loss": 0.951, - "step": 7655 - }, - { - "epoch": 0.6904450556883257, - "grad_norm": 1.579178939744803, - "learning_rate": 9.236152824334564e-07, - "loss": 0.9719, - "step": 7656 - }, - { - "epoch": 0.690535239211796, - "grad_norm": 1.4525797651650219, - "learning_rate": 9.231229427706151e-07, - "loss": 0.8392, - "step": 7657 - }, - { - "epoch": 0.6906254227352663, - "grad_norm": 1.595109845627159, - "learning_rate": 9.226306949928622e-07, - "loss": 0.9476, - "step": 7658 - }, - { - "epoch": 0.6907156062587365, - "grad_norm": 1.6140720747509527, - "learning_rate": 9.221385391421988e-07, - "loss": 1.0182, - "step": 7659 - }, - { - "epoch": 0.6908057897822067, - "grad_norm": 1.2902064478338036, - "learning_rate": 9.216464752606192e-07, - "loss": 0.9532, - "step": 7660 - }, - { - "epoch": 0.6908959733056771, - "grad_norm": 1.2768788556205508, - "learning_rate": 9.211545033901078e-07, - "loss": 1.0226, - "step": 7661 - }, - { - "epoch": 0.6909861568291473, - "grad_norm": 1.3662830521895002, - "learning_rate": 9.206626235726426e-07, - "loss": 0.9738, - "step": 7662 - }, - { - "epoch": 0.6910763403526176, - "grad_norm": 1.6732199419719591, - "learning_rate": 9.20170835850194e-07, - "loss": 0.8767, - "step": 7663 - }, - { - "epoch": 0.6911665238760878, - "grad_norm": 0.7406368319053204, - "learning_rate": 9.196791402647237e-07, - "loss": 0.8242, - "step": 7664 - }, - { - "epoch": 0.6912567073995581, - "grad_norm": 1.328657155852883, - "learning_rate": 9.191875368581861e-07, - "loss": 0.8687, - "step": 7665 - }, - { - "epoch": 0.6913468909230284, - "grad_norm": 1.4819849951186537, - "learning_rate": 9.186960256725271e-07, - "loss": 0.9561, - "step": 7666 - }, - { - "epoch": 0.6914370744464986, - "grad_norm": 1.4338041146001044, - "learning_rate": 9.182046067496856e-07, - "loss": 1.0025, - "step": 7667 - }, - { - "epoch": 0.6915272579699688, - "grad_norm": 1.509420061722214, - "learning_rate": 9.177132801315927e-07, - "loss": 1.0238, - "step": 7668 - }, - { - "epoch": 0.6916174414934392, - "grad_norm": 1.322649585867067, - "learning_rate": 9.172220458601692e-07, - "loss": 0.9157, - "step": 7669 - }, - { - "epoch": 0.6917076250169094, - "grad_norm": 1.6766754112369533, - "learning_rate": 9.167309039773324e-07, - "loss": 0.9121, - "step": 7670 - }, - { - "epoch": 0.6917978085403796, - "grad_norm": 1.162714337514168, - "learning_rate": 9.162398545249872e-07, - "loss": 0.8427, - "step": 7671 - }, - { - "epoch": 0.69188799206385, - "grad_norm": 1.2250031198734337, - "learning_rate": 9.157488975450334e-07, - "loss": 0.9914, - "step": 7672 - }, - { - "epoch": 0.6919781755873202, - "grad_norm": 1.3315329416678747, - "learning_rate": 9.15258033079362e-07, - "loss": 0.7775, - "step": 7673 - }, - { - "epoch": 0.6920683591107905, - "grad_norm": 1.3689209330528618, - "learning_rate": 9.147672611698567e-07, - "loss": 0.9359, - "step": 7674 - }, - { - "epoch": 0.6921585426342607, - "grad_norm": 1.5055696874254803, - "learning_rate": 9.142765818583933e-07, - "loss": 0.9351, - "step": 7675 - }, - { - "epoch": 0.692248726157731, - "grad_norm": 1.2210291556799067, - "learning_rate": 9.13785995186837e-07, - "loss": 0.9189, - "step": 7676 - }, - { - "epoch": 0.6923389096812013, - "grad_norm": 1.352950337261884, - "learning_rate": 9.132955011970502e-07, - "loss": 0.908, - "step": 7677 - }, - { - "epoch": 0.6924290932046715, - "grad_norm": 2.206222304907893, - "learning_rate": 9.128050999308827e-07, - "loss": 0.933, - "step": 7678 - }, - { - "epoch": 0.6925192767281417, - "grad_norm": 1.5813790189556132, - "learning_rate": 9.123147914301789e-07, - "loss": 1.0355, - "step": 7679 - }, - { - "epoch": 0.6926094602516121, - "grad_norm": 1.4361012329132397, - "learning_rate": 9.118245757367745e-07, - "loss": 0.9886, - "step": 7680 - }, - { - "epoch": 0.6926996437750823, - "grad_norm": 0.691579872357879, - "learning_rate": 9.113344528924973e-07, - "loss": 0.8149, - "step": 7681 - }, - { - "epoch": 0.6927898272985525, - "grad_norm": 1.3814915527819995, - "learning_rate": 9.108444229391676e-07, - "loss": 0.9222, - "step": 7682 - }, - { - "epoch": 0.6928800108220228, - "grad_norm": 1.6473482700572815, - "learning_rate": 9.103544859185979e-07, - "loss": 0.9143, - "step": 7683 - }, - { - "epoch": 0.6929701943454931, - "grad_norm": 1.4996243642309783, - "learning_rate": 9.098646418725902e-07, - "loss": 1.0257, - "step": 7684 - }, - { - "epoch": 0.6930603778689634, - "grad_norm": 0.651762011968603, - "learning_rate": 9.093748908429437e-07, - "loss": 0.8281, - "step": 7685 - }, - { - "epoch": 0.6931505613924336, - "grad_norm": 1.367624049653669, - "learning_rate": 9.088852328714444e-07, - "loss": 0.9329, - "step": 7686 - }, - { - "epoch": 0.6932407449159038, - "grad_norm": 1.2601238362447236, - "learning_rate": 9.083956679998735e-07, - "loss": 0.8555, - "step": 7687 - }, - { - "epoch": 0.6933309284393742, - "grad_norm": 1.2131827919355598, - "learning_rate": 9.079061962700032e-07, - "loss": 0.9106, - "step": 7688 - }, - { - "epoch": 0.6934211119628444, - "grad_norm": 1.3491905876538197, - "learning_rate": 9.074168177235979e-07, - "loss": 0.9631, - "step": 7689 - }, - { - "epoch": 0.6935112954863146, - "grad_norm": 1.470401727966771, - "learning_rate": 9.069275324024151e-07, - "loss": 0.9559, - "step": 7690 - }, - { - "epoch": 0.6936014790097849, - "grad_norm": 1.717190537870877, - "learning_rate": 9.064383403482005e-07, - "loss": 0.8734, - "step": 7691 - }, - { - "epoch": 0.6936916625332552, - "grad_norm": 1.5256449039321984, - "learning_rate": 9.059492416026983e-07, - "loss": 0.9088, - "step": 7692 - }, - { - "epoch": 0.6937818460567254, - "grad_norm": 1.241181355986008, - "learning_rate": 9.054602362076378e-07, - "loss": 0.9398, - "step": 7693 - }, - { - "epoch": 0.6938720295801957, - "grad_norm": 1.6164510035783963, - "learning_rate": 9.049713242047468e-07, - "loss": 0.9322, - "step": 7694 - }, - { - "epoch": 0.693962213103666, - "grad_norm": 1.5971193040870262, - "learning_rate": 9.044825056357395e-07, - "loss": 0.9182, - "step": 7695 - }, - { - "epoch": 0.6940523966271362, - "grad_norm": 1.36478518189495, - "learning_rate": 9.039937805423255e-07, - "loss": 0.8973, - "step": 7696 - }, - { - "epoch": 0.6941425801506065, - "grad_norm": 0.6429754495561385, - "learning_rate": 9.035051489662051e-07, - "loss": 0.7572, - "step": 7697 - }, - { - "epoch": 0.6942327636740767, - "grad_norm": 1.4624018546535076, - "learning_rate": 9.030166109490718e-07, - "loss": 0.8974, - "step": 7698 - }, - { - "epoch": 0.6943229471975471, - "grad_norm": 1.9088720205459988, - "learning_rate": 9.025281665326099e-07, - "loss": 0.9608, - "step": 7699 - }, - { - "epoch": 0.6944131307210173, - "grad_norm": 1.5082281458868105, - "learning_rate": 9.020398157584967e-07, - "loss": 0.947, - "step": 7700 - }, - { - "epoch": 0.6945033142444875, - "grad_norm": 1.566242009634961, - "learning_rate": 9.015515586684002e-07, - "loss": 0.8466, - "step": 7701 - }, - { - "epoch": 0.6945934977679578, - "grad_norm": 0.7408110453324972, - "learning_rate": 9.010633953039812e-07, - "loss": 0.8088, - "step": 7702 - }, - { - "epoch": 0.6946836812914281, - "grad_norm": 1.3487297316080953, - "learning_rate": 9.005753257068929e-07, - "loss": 0.9567, - "step": 7703 - }, - { - "epoch": 0.6947738648148983, - "grad_norm": 14.365470049264943, - "learning_rate": 9.000873499187797e-07, - "loss": 0.9579, - "step": 7704 - }, - { - "epoch": 0.6948640483383686, - "grad_norm": 1.5068991469193305, - "learning_rate": 8.995994679812797e-07, - "loss": 0.9804, - "step": 7705 - }, - { - "epoch": 0.6949542318618388, - "grad_norm": 1.2695197002178946, - "learning_rate": 8.991116799360192e-07, - "loss": 0.9732, - "step": 7706 - }, - { - "epoch": 0.6950444153853091, - "grad_norm": 1.630751116237466, - "learning_rate": 8.986239858246217e-07, - "loss": 0.9351, - "step": 7707 - }, - { - "epoch": 0.6951345989087794, - "grad_norm": 1.5590470116251778, - "learning_rate": 8.981363856886972e-07, - "loss": 1.017, - "step": 7708 - }, - { - "epoch": 0.6952247824322496, - "grad_norm": 1.6605930606410821, - "learning_rate": 8.976488795698533e-07, - "loss": 0.9245, - "step": 7709 - }, - { - "epoch": 0.6953149659557198, - "grad_norm": 1.479817350303337, - "learning_rate": 8.971614675096841e-07, - "loss": 0.9595, - "step": 7710 - }, - { - "epoch": 0.6954051494791902, - "grad_norm": 1.6913659353054904, - "learning_rate": 8.966741495497807e-07, - "loss": 1.0052, - "step": 7711 - }, - { - "epoch": 0.6954953330026604, - "grad_norm": 1.2666751131395175, - "learning_rate": 8.961869257317218e-07, - "loss": 0.9134, - "step": 7712 - }, - { - "epoch": 0.6955855165261307, - "grad_norm": 1.6593534076672352, - "learning_rate": 8.956997960970809e-07, - "loss": 0.9297, - "step": 7713 - }, - { - "epoch": 0.6956757000496009, - "grad_norm": 1.4956987223008704, - "learning_rate": 8.952127606874224e-07, - "loss": 0.8507, - "step": 7714 - }, - { - "epoch": 0.6957658835730712, - "grad_norm": 1.3213454500220854, - "learning_rate": 8.947258195443028e-07, - "loss": 0.9423, - "step": 7715 - }, - { - "epoch": 0.6958560670965415, - "grad_norm": 1.4034302309400546, - "learning_rate": 8.942389727092716e-07, - "loss": 0.9096, - "step": 7716 - }, - { - "epoch": 0.6959462506200117, - "grad_norm": 0.9771168775568579, - "learning_rate": 8.937522202238677e-07, - "loss": 0.8083, - "step": 7717 - }, - { - "epoch": 0.696036434143482, - "grad_norm": 1.3954185419913891, - "learning_rate": 8.932655621296239e-07, - "loss": 0.9562, - "step": 7718 - }, - { - "epoch": 0.6961266176669523, - "grad_norm": 1.1625499755617867, - "learning_rate": 8.927789984680649e-07, - "loss": 0.9387, - "step": 7719 - }, - { - "epoch": 0.6962168011904225, - "grad_norm": 1.2847598777728277, - "learning_rate": 8.922925292807068e-07, - "loss": 0.8105, - "step": 7720 - }, - { - "epoch": 0.6963069847138927, - "grad_norm": 1.4576081879966802, - "learning_rate": 8.91806154609058e-07, - "loss": 1.0185, - "step": 7721 - }, - { - "epoch": 0.6963971682373631, - "grad_norm": 1.3677686491559276, - "learning_rate": 8.913198744946195e-07, - "loss": 0.9156, - "step": 7722 - }, - { - "epoch": 0.6964873517608333, - "grad_norm": 1.3219410259810667, - "learning_rate": 8.908336889788807e-07, - "loss": 0.9632, - "step": 7723 - }, - { - "epoch": 0.6965775352843036, - "grad_norm": 1.4576566034268132, - "learning_rate": 8.903475981033293e-07, - "loss": 0.8222, - "step": 7724 - }, - { - "epoch": 0.6966677188077738, - "grad_norm": 0.8210337057036631, - "learning_rate": 8.898616019094376e-07, - "loss": 0.8853, - "step": 7725 - }, - { - "epoch": 0.6967579023312441, - "grad_norm": 1.5594262980430722, - "learning_rate": 8.89375700438677e-07, - "loss": 0.9778, - "step": 7726 - }, - { - "epoch": 0.6968480858547144, - "grad_norm": 1.6816129781862832, - "learning_rate": 8.888898937325047e-07, - "loss": 0.9054, - "step": 7727 - }, - { - "epoch": 0.6969382693781846, - "grad_norm": 3.5655257108406224, - "learning_rate": 8.884041818323733e-07, - "loss": 0.9148, - "step": 7728 - }, - { - "epoch": 0.6970284529016548, - "grad_norm": 1.515361409026385, - "learning_rate": 8.879185647797262e-07, - "loss": 0.9711, - "step": 7729 - }, - { - "epoch": 0.6971186364251252, - "grad_norm": 1.5094169026436428, - "learning_rate": 8.874330426159993e-07, - "loss": 0.9114, - "step": 7730 - }, - { - "epoch": 0.6972088199485954, - "grad_norm": 1.432203569881836, - "learning_rate": 8.869476153826205e-07, - "loss": 0.9661, - "step": 7731 - }, - { - "epoch": 0.6972990034720656, - "grad_norm": 1.4361228150842438, - "learning_rate": 8.864622831210071e-07, - "loss": 0.9418, - "step": 7732 - }, - { - "epoch": 0.6973891869955359, - "grad_norm": 1.6566369666284249, - "learning_rate": 8.85977045872573e-07, - "loss": 0.9519, - "step": 7733 - }, - { - "epoch": 0.6974793705190062, - "grad_norm": 1.71184024771338, - "learning_rate": 8.854919036787194e-07, - "loss": 0.925, - "step": 7734 - }, - { - "epoch": 0.6975695540424764, - "grad_norm": 1.7558816435253648, - "learning_rate": 8.850068565808417e-07, - "loss": 0.9139, - "step": 7735 - }, - { - "epoch": 0.6976597375659467, - "grad_norm": 1.7468043850460442, - "learning_rate": 8.845219046203271e-07, - "loss": 0.9923, - "step": 7736 - }, - { - "epoch": 0.6977499210894169, - "grad_norm": 1.7290296232157978, - "learning_rate": 8.840370478385544e-07, - "loss": 0.9669, - "step": 7737 - }, - { - "epoch": 0.6978401046128873, - "grad_norm": 1.5467036759262123, - "learning_rate": 8.83552286276894e-07, - "loss": 0.9616, - "step": 7738 - }, - { - "epoch": 0.6979302881363575, - "grad_norm": 1.7207302823126505, - "learning_rate": 8.830676199767095e-07, - "loss": 0.9585, - "step": 7739 - }, - { - "epoch": 0.6980204716598277, - "grad_norm": 1.3908113076210868, - "learning_rate": 8.825830489793527e-07, - "loss": 0.9264, - "step": 7740 - }, - { - "epoch": 0.698110655183298, - "grad_norm": 2.9571618639622774, - "learning_rate": 8.820985733261732e-07, - "loss": 0.8966, - "step": 7741 - }, - { - "epoch": 0.6982008387067683, - "grad_norm": 1.5040234012305043, - "learning_rate": 8.816141930585066e-07, - "loss": 0.8442, - "step": 7742 - }, - { - "epoch": 0.6982910222302385, - "grad_norm": 0.6967552304518114, - "learning_rate": 8.811299082176837e-07, - "loss": 0.8099, - "step": 7743 - }, - { - "epoch": 0.6983812057537088, - "grad_norm": 1.3497246390777706, - "learning_rate": 8.806457188450265e-07, - "loss": 0.9094, - "step": 7744 - }, - { - "epoch": 0.6984713892771791, - "grad_norm": 1.3457567622287512, - "learning_rate": 8.801616249818487e-07, - "loss": 0.9727, - "step": 7745 - }, - { - "epoch": 0.6985615728006493, - "grad_norm": 1.4583069481052224, - "learning_rate": 8.796776266694564e-07, - "loss": 0.9063, - "step": 7746 - }, - { - "epoch": 0.6986517563241196, - "grad_norm": 1.2772464540155706, - "learning_rate": 8.79193723949145e-07, - "loss": 0.8879, - "step": 7747 - }, - { - "epoch": 0.6987419398475898, - "grad_norm": 1.4751082429123992, - "learning_rate": 8.787099168622063e-07, - "loss": 0.8408, - "step": 7748 - }, - { - "epoch": 0.6988321233710602, - "grad_norm": 1.6069868572151065, - "learning_rate": 8.782262054499199e-07, - "loss": 0.8737, - "step": 7749 - }, - { - "epoch": 0.6989223068945304, - "grad_norm": 1.2421768955761732, - "learning_rate": 8.777425897535588e-07, - "loss": 0.9722, - "step": 7750 - }, - { - "epoch": 0.6990124904180006, - "grad_norm": 1.4407890673240082, - "learning_rate": 8.77259069814388e-07, - "loss": 0.9444, - "step": 7751 - }, - { - "epoch": 0.6991026739414709, - "grad_norm": 1.2805816023451582, - "learning_rate": 8.767756456736641e-07, - "loss": 0.9465, - "step": 7752 - }, - { - "epoch": 0.6991928574649412, - "grad_norm": 1.271600111515032, - "learning_rate": 8.762923173726358e-07, - "loss": 0.866, - "step": 7753 - }, - { - "epoch": 0.6992830409884114, - "grad_norm": 1.4565720230770067, - "learning_rate": 8.758090849525428e-07, - "loss": 1.04, - "step": 7754 - }, - { - "epoch": 0.6993732245118817, - "grad_norm": 1.539170004870821, - "learning_rate": 8.753259484546174e-07, - "loss": 0.8824, - "step": 7755 - }, - { - "epoch": 0.6994634080353519, - "grad_norm": 1.8035529727881785, - "learning_rate": 8.748429079200841e-07, - "loss": 0.9033, - "step": 7756 - }, - { - "epoch": 0.6995535915588222, - "grad_norm": 1.7237707153137822, - "learning_rate": 8.743599633901575e-07, - "loss": 0.9982, - "step": 7757 - }, - { - "epoch": 0.6996437750822925, - "grad_norm": 1.4558530241856131, - "learning_rate": 8.738771149060453e-07, - "loss": 0.9492, - "step": 7758 - }, - { - "epoch": 0.6997339586057627, - "grad_norm": 1.4176395759287266, - "learning_rate": 8.73394362508947e-07, - "loss": 0.9054, - "step": 7759 - }, - { - "epoch": 0.6998241421292329, - "grad_norm": 1.4983688863850884, - "learning_rate": 8.72911706240054e-07, - "loss": 0.9052, - "step": 7760 - }, - { - "epoch": 0.6999143256527033, - "grad_norm": 1.33774137012462, - "learning_rate": 8.724291461405493e-07, - "loss": 0.8181, - "step": 7761 - }, - { - "epoch": 0.7000045091761735, - "grad_norm": 1.5293828518646058, - "learning_rate": 8.71946682251606e-07, - "loss": 0.91, - "step": 7762 - }, - { - "epoch": 0.7000946926996438, - "grad_norm": 1.4312838621173367, - "learning_rate": 8.714643146143932e-07, - "loss": 0.9369, - "step": 7763 - }, - { - "epoch": 0.700184876223114, - "grad_norm": 1.3340656931542476, - "learning_rate": 8.709820432700663e-07, - "loss": 0.8976, - "step": 7764 - }, - { - "epoch": 0.7002750597465843, - "grad_norm": 1.2527792551145187, - "learning_rate": 8.704998682597784e-07, - "loss": 0.8576, - "step": 7765 - }, - { - "epoch": 0.7003652432700546, - "grad_norm": 1.4366710594770948, - "learning_rate": 8.700177896246688e-07, - "loss": 0.9596, - "step": 7766 - }, - { - "epoch": 0.7004554267935248, - "grad_norm": 1.5344732963945715, - "learning_rate": 8.695358074058721e-07, - "loss": 0.8652, - "step": 7767 - }, - { - "epoch": 0.7005456103169951, - "grad_norm": 1.5246403567122555, - "learning_rate": 8.690539216445136e-07, - "loss": 0.9802, - "step": 7768 - }, - { - "epoch": 0.7006357938404654, - "grad_norm": 1.7167975693702378, - "learning_rate": 8.685721323817106e-07, - "loss": 0.8423, - "step": 7769 - }, - { - "epoch": 0.7007259773639356, - "grad_norm": 1.404049274544457, - "learning_rate": 8.680904396585718e-07, - "loss": 0.9805, - "step": 7770 - }, - { - "epoch": 0.7008161608874058, - "grad_norm": 1.4294132323962812, - "learning_rate": 8.676088435161988e-07, - "loss": 0.8885, - "step": 7771 - }, - { - "epoch": 0.7009063444108762, - "grad_norm": 1.6735204282527587, - "learning_rate": 8.671273439956824e-07, - "loss": 0.9542, - "step": 7772 - }, - { - "epoch": 0.7009965279343464, - "grad_norm": 1.4305153206328507, - "learning_rate": 8.666459411381075e-07, - "loss": 0.9418, - "step": 7773 - }, - { - "epoch": 0.7010867114578166, - "grad_norm": 1.3380063640346198, - "learning_rate": 8.661646349845501e-07, - "loss": 0.9658, - "step": 7774 - }, - { - "epoch": 0.7011768949812869, - "grad_norm": 1.4536850373045413, - "learning_rate": 8.656834255760783e-07, - "loss": 0.9926, - "step": 7775 - }, - { - "epoch": 0.7012670785047572, - "grad_norm": 1.9306461563013209, - "learning_rate": 8.652023129537509e-07, - "loss": 0.9933, - "step": 7776 - }, - { - "epoch": 0.7013572620282275, - "grad_norm": 1.4118511549517918, - "learning_rate": 8.647212971586195e-07, - "loss": 0.9672, - "step": 7777 - }, - { - "epoch": 0.7014474455516977, - "grad_norm": 1.4639299721515533, - "learning_rate": 8.642403782317275e-07, - "loss": 0.9008, - "step": 7778 - }, - { - "epoch": 0.7015376290751679, - "grad_norm": 1.3615455836868682, - "learning_rate": 8.637595562141075e-07, - "loss": 0.9441, - "step": 7779 - }, - { - "epoch": 0.7016278125986383, - "grad_norm": 1.296681860824122, - "learning_rate": 8.632788311467889e-07, - "loss": 0.9061, - "step": 7780 - }, - { - "epoch": 0.7017179961221085, - "grad_norm": 1.2446553888567682, - "learning_rate": 8.627982030707867e-07, - "loss": 0.93, - "step": 7781 - }, - { - "epoch": 0.7018081796455787, - "grad_norm": 1.482716889107708, - "learning_rate": 8.623176720271139e-07, - "loss": 1.0076, - "step": 7782 - }, - { - "epoch": 0.701898363169049, - "grad_norm": 1.8633399170410638, - "learning_rate": 8.618372380567696e-07, - "loss": 1.0106, - "step": 7783 - }, - { - "epoch": 0.7019885466925193, - "grad_norm": 1.6490137620655836, - "learning_rate": 8.613569012007478e-07, - "loss": 0.8957, - "step": 7784 - }, - { - "epoch": 0.7020787302159895, - "grad_norm": 1.64464976411496, - "learning_rate": 8.608766615000338e-07, - "loss": 0.9801, - "step": 7785 - }, - { - "epoch": 0.7021689137394598, - "grad_norm": 1.662938987883529, - "learning_rate": 8.603965189956039e-07, - "loss": 0.9613, - "step": 7786 - }, - { - "epoch": 0.70225909726293, - "grad_norm": 1.6194932505321802, - "learning_rate": 8.599164737284276e-07, - "loss": 0.9364, - "step": 7787 - }, - { - "epoch": 0.7023492807864004, - "grad_norm": 1.5297832860366, - "learning_rate": 8.594365257394634e-07, - "loss": 0.8943, - "step": 7788 - }, - { - "epoch": 0.7024394643098706, - "grad_norm": 1.670449620783995, - "learning_rate": 8.589566750696637e-07, - "loss": 0.8826, - "step": 7789 - }, - { - "epoch": 0.7025296478333408, - "grad_norm": 1.5578318480855982, - "learning_rate": 8.584769217599721e-07, - "loss": 0.8804, - "step": 7790 - }, - { - "epoch": 0.7026198313568112, - "grad_norm": 1.9066583164850313, - "learning_rate": 8.579972658513239e-07, - "loss": 0.8864, - "step": 7791 - }, - { - "epoch": 0.7027100148802814, - "grad_norm": 1.616479248640514, - "learning_rate": 8.57517707384646e-07, - "loss": 0.9454, - "step": 7792 - }, - { - "epoch": 0.7028001984037516, - "grad_norm": 1.3822174084899164, - "learning_rate": 8.570382464008574e-07, - "loss": 0.9728, - "step": 7793 - }, - { - "epoch": 0.7028903819272219, - "grad_norm": 1.5398113926662387, - "learning_rate": 8.565588829408665e-07, - "loss": 0.9076, - "step": 7794 - }, - { - "epoch": 0.7029805654506922, - "grad_norm": 1.4878701270452381, - "learning_rate": 8.560796170455782e-07, - "loss": 0.9177, - "step": 7795 - }, - { - "epoch": 0.7030707489741624, - "grad_norm": 1.5486366952209256, - "learning_rate": 8.556004487558828e-07, - "loss": 0.9706, - "step": 7796 - }, - { - "epoch": 0.7031609324976327, - "grad_norm": 1.3071780161703943, - "learning_rate": 8.55121378112669e-07, - "loss": 0.8551, - "step": 7797 - }, - { - "epoch": 0.7032511160211029, - "grad_norm": 1.540822837770462, - "learning_rate": 8.546424051568111e-07, - "loss": 0.9169, - "step": 7798 - }, - { - "epoch": 0.7033412995445733, - "grad_norm": 1.4461824840193407, - "learning_rate": 8.541635299291785e-07, - "loss": 0.9873, - "step": 7799 - }, - { - "epoch": 0.7034314830680435, - "grad_norm": 0.6610557477428939, - "learning_rate": 8.536847524706317e-07, - "loss": 0.8164, - "step": 7800 - }, - { - "epoch": 0.7035216665915137, - "grad_norm": 1.4555315168998721, - "learning_rate": 8.532060728220225e-07, - "loss": 0.8762, - "step": 7801 - }, - { - "epoch": 0.703611850114984, - "grad_norm": 1.7559147742233783, - "learning_rate": 8.527274910241955e-07, - "loss": 0.9575, - "step": 7802 - }, - { - "epoch": 0.7037020336384543, - "grad_norm": 1.388109911648136, - "learning_rate": 8.522490071179833e-07, - "loss": 0.9351, - "step": 7803 - }, - { - "epoch": 0.7037922171619245, - "grad_norm": 1.2658139311504981, - "learning_rate": 8.517706211442159e-07, - "loss": 0.8169, - "step": 7804 - }, - { - "epoch": 0.7038824006853948, - "grad_norm": 1.4595610310103577, - "learning_rate": 8.512923331437097e-07, - "loss": 1.0175, - "step": 7805 - }, - { - "epoch": 0.703972584208865, - "grad_norm": 1.3626384804722582, - "learning_rate": 8.508141431572755e-07, - "loss": 0.9069, - "step": 7806 - }, - { - "epoch": 0.7040627677323353, - "grad_norm": 1.660796421264181, - "learning_rate": 8.503360512257152e-07, - "loss": 0.9282, - "step": 7807 - }, - { - "epoch": 0.7041529512558056, - "grad_norm": 1.559234410983408, - "learning_rate": 8.498580573898219e-07, - "loss": 0.9011, - "step": 7808 - }, - { - "epoch": 0.7042431347792758, - "grad_norm": 1.7815997800889412, - "learning_rate": 8.493801616903813e-07, - "loss": 0.9461, - "step": 7809 - }, - { - "epoch": 0.704333318302746, - "grad_norm": 2.3421545065807536, - "learning_rate": 8.489023641681705e-07, - "loss": 0.9565, - "step": 7810 - }, - { - "epoch": 0.7044235018262164, - "grad_norm": 1.364271269003517, - "learning_rate": 8.484246648639555e-07, - "loss": 0.9901, - "step": 7811 - }, - { - "epoch": 0.7045136853496866, - "grad_norm": 1.2573415218014492, - "learning_rate": 8.479470638184994e-07, - "loss": 0.818, - "step": 7812 - }, - { - "epoch": 0.7046038688731568, - "grad_norm": 2.314598780688343, - "learning_rate": 8.474695610725513e-07, - "loss": 0.8663, - "step": 7813 - }, - { - "epoch": 0.7046940523966272, - "grad_norm": 1.2776200581850545, - "learning_rate": 8.469921566668552e-07, - "loss": 1.0121, - "step": 7814 - }, - { - "epoch": 0.7047842359200974, - "grad_norm": 1.4097877929838725, - "learning_rate": 8.46514850642146e-07, - "loss": 0.9939, - "step": 7815 - }, - { - "epoch": 0.7048744194435677, - "grad_norm": 1.5242500681277262, - "learning_rate": 8.460376430391499e-07, - "loss": 0.9268, - "step": 7816 - }, - { - "epoch": 0.7049646029670379, - "grad_norm": 1.125975344972374, - "learning_rate": 8.455605338985858e-07, - "loss": 0.9633, - "step": 7817 - }, - { - "epoch": 0.7050547864905082, - "grad_norm": 1.3510083071144803, - "learning_rate": 8.45083523261161e-07, - "loss": 0.7977, - "step": 7818 - }, - { - "epoch": 0.7051449700139785, - "grad_norm": 1.2035637092827913, - "learning_rate": 8.446066111675796e-07, - "loss": 0.9469, - "step": 7819 - }, - { - "epoch": 0.7052351535374487, - "grad_norm": 1.573206270685218, - "learning_rate": 8.441297976585314e-07, - "loss": 0.8345, - "step": 7820 - }, - { - "epoch": 0.7053253370609189, - "grad_norm": 1.4952996838378163, - "learning_rate": 8.436530827747037e-07, - "loss": 1.0182, - "step": 7821 - }, - { - "epoch": 0.7054155205843893, - "grad_norm": 2.010036913594487, - "learning_rate": 8.431764665567704e-07, - "loss": 1.0697, - "step": 7822 - }, - { - "epoch": 0.7055057041078595, - "grad_norm": 1.6317212414341227, - "learning_rate": 8.426999490453996e-07, - "loss": 0.8801, - "step": 7823 - }, - { - "epoch": 0.7055958876313297, - "grad_norm": 1.7707965697417933, - "learning_rate": 8.422235302812504e-07, - "loss": 0.948, - "step": 7824 - }, - { - "epoch": 0.7056860711548, - "grad_norm": 1.7315106495224886, - "learning_rate": 8.417472103049734e-07, - "loss": 1.0103, - "step": 7825 - }, - { - "epoch": 0.7057762546782703, - "grad_norm": 1.4835507965699786, - "learning_rate": 8.412709891572112e-07, - "loss": 0.9727, - "step": 7826 - }, - { - "epoch": 0.7058664382017406, - "grad_norm": 1.7347120352775887, - "learning_rate": 8.407948668785978e-07, - "loss": 0.9717, - "step": 7827 - }, - { - "epoch": 0.7059566217252108, - "grad_norm": 1.3435803683355745, - "learning_rate": 8.403188435097576e-07, - "loss": 0.9331, - "step": 7828 - }, - { - "epoch": 0.706046805248681, - "grad_norm": 1.3348636438261954, - "learning_rate": 8.398429190913081e-07, - "loss": 1.0036, - "step": 7829 - }, - { - "epoch": 0.7061369887721514, - "grad_norm": 1.456917438715397, - "learning_rate": 8.393670936638578e-07, - "loss": 0.9386, - "step": 7830 - }, - { - "epoch": 0.7062271722956216, - "grad_norm": 1.5287625876381796, - "learning_rate": 8.388913672680067e-07, - "loss": 0.937, - "step": 7831 - }, - { - "epoch": 0.7063173558190918, - "grad_norm": 1.5070016687000456, - "learning_rate": 8.384157399443472e-07, - "loss": 0.8887, - "step": 7832 - }, - { - "epoch": 0.7064075393425621, - "grad_norm": 1.4573757388241233, - "learning_rate": 8.379402117334601e-07, - "loss": 0.9707, - "step": 7833 - }, - { - "epoch": 0.7064977228660324, - "grad_norm": 1.4218661968252126, - "learning_rate": 8.374647826759232e-07, - "loss": 0.936, - "step": 7834 - }, - { - "epoch": 0.7065879063895026, - "grad_norm": 1.5954692020103673, - "learning_rate": 8.369894528122998e-07, - "loss": 0.9397, - "step": 7835 - }, - { - "epoch": 0.7066780899129729, - "grad_norm": 1.879043669245448, - "learning_rate": 8.365142221831505e-07, - "loss": 0.8096, - "step": 7836 - }, - { - "epoch": 0.7067682734364432, - "grad_norm": 1.734712928636448, - "learning_rate": 8.360390908290222e-07, - "loss": 0.9026, - "step": 7837 - }, - { - "epoch": 0.7068584569599135, - "grad_norm": 1.5023590452116864, - "learning_rate": 8.355640587904569e-07, - "loss": 0.9177, - "step": 7838 - }, - { - "epoch": 0.7069486404833837, - "grad_norm": 1.4704981201274, - "learning_rate": 8.350891261079866e-07, - "loss": 0.9517, - "step": 7839 - }, - { - "epoch": 0.7070388240068539, - "grad_norm": 1.5078736920367284, - "learning_rate": 8.346142928221356e-07, - "loss": 0.9801, - "step": 7840 - }, - { - "epoch": 0.7071290075303243, - "grad_norm": 1.5600471317067195, - "learning_rate": 8.341395589734189e-07, - "loss": 0.7843, - "step": 7841 - }, - { - "epoch": 0.7072191910537945, - "grad_norm": 1.756758106621761, - "learning_rate": 8.336649246023433e-07, - "loss": 0.8909, - "step": 7842 - }, - { - "epoch": 0.7073093745772647, - "grad_norm": 1.3474976819501483, - "learning_rate": 8.331903897494084e-07, - "loss": 0.8451, - "step": 7843 - }, - { - "epoch": 0.707399558100735, - "grad_norm": 1.722652443948637, - "learning_rate": 8.327159544551024e-07, - "loss": 0.9449, - "step": 7844 - }, - { - "epoch": 0.7074897416242053, - "grad_norm": 1.4240862242225314, - "learning_rate": 8.322416187599073e-07, - "loss": 0.8877, - "step": 7845 - }, - { - "epoch": 0.7075799251476755, - "grad_norm": 1.6075292576905265, - "learning_rate": 8.317673827042963e-07, - "loss": 0.9622, - "step": 7846 - }, - { - "epoch": 0.7076701086711458, - "grad_norm": 1.4070972009904925, - "learning_rate": 8.312932463287339e-07, - "loss": 0.9259, - "step": 7847 - }, - { - "epoch": 0.707760292194616, - "grad_norm": 1.7092974043674052, - "learning_rate": 8.308192096736759e-07, - "loss": 0.9052, - "step": 7848 - }, - { - "epoch": 0.7078504757180863, - "grad_norm": 1.3283801114274845, - "learning_rate": 8.303452727795703e-07, - "loss": 0.9134, - "step": 7849 - }, - { - "epoch": 0.7079406592415566, - "grad_norm": 1.5371044890775667, - "learning_rate": 8.298714356868542e-07, - "loss": 1.0099, - "step": 7850 - }, - { - "epoch": 0.7080308427650268, - "grad_norm": 1.7834102848717681, - "learning_rate": 8.293976984359605e-07, - "loss": 0.9518, - "step": 7851 - }, - { - "epoch": 0.708121026288497, - "grad_norm": 1.5842603262007497, - "learning_rate": 8.289240610673092e-07, - "loss": 0.9354, - "step": 7852 - }, - { - "epoch": 0.7082112098119674, - "grad_norm": 1.572850619649421, - "learning_rate": 8.284505236213144e-07, - "loss": 0.9667, - "step": 7853 - }, - { - "epoch": 0.7083013933354376, - "grad_norm": 1.4166803733312199, - "learning_rate": 8.279770861383806e-07, - "loss": 0.9277, - "step": 7854 - }, - { - "epoch": 0.7083915768589079, - "grad_norm": 1.316538908751825, - "learning_rate": 8.275037486589042e-07, - "loss": 0.9279, - "step": 7855 - }, - { - "epoch": 0.7084817603823781, - "grad_norm": 0.7332685535891603, - "learning_rate": 8.270305112232739e-07, - "loss": 0.7872, - "step": 7856 - }, - { - "epoch": 0.7085719439058484, - "grad_norm": 1.3790544208745508, - "learning_rate": 8.265573738718665e-07, - "loss": 0.9305, - "step": 7857 - }, - { - "epoch": 0.7086621274293187, - "grad_norm": 1.5358735173074578, - "learning_rate": 8.260843366450559e-07, - "loss": 0.8999, - "step": 7858 - }, - { - "epoch": 0.7087523109527889, - "grad_norm": 1.4644119643570004, - "learning_rate": 8.256113995832017e-07, - "loss": 0.9381, - "step": 7859 - }, - { - "epoch": 0.7088424944762591, - "grad_norm": 1.2425974523293783, - "learning_rate": 8.251385627266583e-07, - "loss": 0.9825, - "step": 7860 - }, - { - "epoch": 0.7089326779997295, - "grad_norm": 1.5926333985082906, - "learning_rate": 8.24665826115771e-07, - "loss": 0.9314, - "step": 7861 - }, - { - "epoch": 0.7090228615231997, - "grad_norm": 1.6167622619890631, - "learning_rate": 8.241931897908763e-07, - "loss": 0.9534, - "step": 7862 - }, - { - "epoch": 0.7091130450466699, - "grad_norm": 1.3832164971675616, - "learning_rate": 8.237206537923016e-07, - "loss": 0.9629, - "step": 7863 - }, - { - "epoch": 0.7092032285701403, - "grad_norm": 1.771471252729272, - "learning_rate": 8.232482181603671e-07, - "loss": 0.9257, - "step": 7864 - }, - { - "epoch": 0.7092934120936105, - "grad_norm": 1.5468766954200779, - "learning_rate": 8.227758829353828e-07, - "loss": 0.9099, - "step": 7865 - }, - { - "epoch": 0.7093835956170808, - "grad_norm": 1.513615406582795, - "learning_rate": 8.223036481576522e-07, - "loss": 0.9072, - "step": 7866 - }, - { - "epoch": 0.709473779140551, - "grad_norm": 1.7786580015988491, - "learning_rate": 8.218315138674672e-07, - "loss": 0.9455, - "step": 7867 - }, - { - "epoch": 0.7095639626640213, - "grad_norm": 0.7414644835487586, - "learning_rate": 8.21359480105114e-07, - "loss": 0.7903, - "step": 7868 - }, - { - "epoch": 0.7096541461874916, - "grad_norm": 1.6186647164402692, - "learning_rate": 8.208875469108689e-07, - "loss": 0.9698, - "step": 7869 - }, - { - "epoch": 0.7097443297109618, - "grad_norm": 1.9194373201359782, - "learning_rate": 8.204157143249997e-07, - "loss": 0.924, - "step": 7870 - }, - { - "epoch": 0.709834513234432, - "grad_norm": 1.219904914808907, - "learning_rate": 8.199439823877668e-07, - "loss": 0.9122, - "step": 7871 - }, - { - "epoch": 0.7099246967579024, - "grad_norm": 1.3861903943661504, - "learning_rate": 8.194723511394186e-07, - "loss": 0.9272, - "step": 7872 - }, - { - "epoch": 0.7100148802813726, - "grad_norm": 1.9771225328113495, - "learning_rate": 8.190008206202002e-07, - "loss": 0.8955, - "step": 7873 - }, - { - "epoch": 0.7101050638048428, - "grad_norm": 1.4597549140012531, - "learning_rate": 8.185293908703423e-07, - "loss": 0.9803, - "step": 7874 - }, - { - "epoch": 0.7101952473283131, - "grad_norm": 1.6447189840049987, - "learning_rate": 8.180580619300727e-07, - "loss": 0.9018, - "step": 7875 - }, - { - "epoch": 0.7102854308517834, - "grad_norm": 1.2341264402044396, - "learning_rate": 8.175868338396057e-07, - "loss": 0.8913, - "step": 7876 - }, - { - "epoch": 0.7103756143752537, - "grad_norm": 1.632867182495869, - "learning_rate": 8.171157066391499e-07, - "loss": 0.915, - "step": 7877 - }, - { - "epoch": 0.7104657978987239, - "grad_norm": 1.6768261335136394, - "learning_rate": 8.166446803689045e-07, - "loss": 0.963, - "step": 7878 - }, - { - "epoch": 0.7105559814221941, - "grad_norm": 0.6760379105411175, - "learning_rate": 8.161737550690595e-07, - "loss": 0.8025, - "step": 7879 - }, - { - "epoch": 0.7106461649456645, - "grad_norm": 1.3568668246099809, - "learning_rate": 8.157029307797976e-07, - "loss": 0.9642, - "step": 7880 - }, - { - "epoch": 0.7107363484691347, - "grad_norm": 1.2822792757098265, - "learning_rate": 8.152322075412925e-07, - "loss": 0.9855, - "step": 7881 - }, - { - "epoch": 0.7108265319926049, - "grad_norm": 1.3179701392998013, - "learning_rate": 8.147615853937073e-07, - "loss": 0.8929, - "step": 7882 - }, - { - "epoch": 0.7109167155160752, - "grad_norm": 1.4400478671648218, - "learning_rate": 8.142910643771992e-07, - "loss": 0.8644, - "step": 7883 - }, - { - "epoch": 0.7110068990395455, - "grad_norm": 1.7325107074313093, - "learning_rate": 8.138206445319152e-07, - "loss": 0.8979, - "step": 7884 - }, - { - "epoch": 0.7110970825630157, - "grad_norm": 1.442792520465294, - "learning_rate": 8.133503258979944e-07, - "loss": 0.8392, - "step": 7885 - }, - { - "epoch": 0.711187266086486, - "grad_norm": 1.4984215537645789, - "learning_rate": 8.12880108515567e-07, - "loss": 0.8579, - "step": 7886 - }, - { - "epoch": 0.7112774496099563, - "grad_norm": 1.3211102304722153, - "learning_rate": 8.124099924247543e-07, - "loss": 0.9066, - "step": 7887 - }, - { - "epoch": 0.7113676331334265, - "grad_norm": 1.4780408936135596, - "learning_rate": 8.119399776656701e-07, - "loss": 0.8485, - "step": 7888 - }, - { - "epoch": 0.7114578166568968, - "grad_norm": 1.3283621183664438, - "learning_rate": 8.114700642784167e-07, - "loss": 0.9383, - "step": 7889 - }, - { - "epoch": 0.711548000180367, - "grad_norm": 1.2248022484272227, - "learning_rate": 8.110002523030921e-07, - "loss": 0.9878, - "step": 7890 - }, - { - "epoch": 0.7116381837038374, - "grad_norm": 1.2865792602455481, - "learning_rate": 8.105305417797808e-07, - "loss": 0.9633, - "step": 7891 - }, - { - "epoch": 0.7117283672273076, - "grad_norm": 1.3530506034291783, - "learning_rate": 8.100609327485635e-07, - "loss": 0.9719, - "step": 7892 - }, - { - "epoch": 0.7118185507507778, - "grad_norm": 1.9255593812355054, - "learning_rate": 8.095914252495082e-07, - "loss": 0.9918, - "step": 7893 - }, - { - "epoch": 0.7119087342742481, - "grad_norm": 1.7824448041676284, - "learning_rate": 8.091220193226762e-07, - "loss": 0.9091, - "step": 7894 - }, - { - "epoch": 0.7119989177977184, - "grad_norm": 1.2763512164563615, - "learning_rate": 8.0865271500812e-07, - "loss": 0.9208, - "step": 7895 - }, - { - "epoch": 0.7120891013211886, - "grad_norm": 1.5588659369112166, - "learning_rate": 8.081835123458831e-07, - "loss": 0.9801, - "step": 7896 - }, - { - "epoch": 0.7121792848446589, - "grad_norm": 1.5099816408367561, - "learning_rate": 8.077144113760013e-07, - "loss": 0.9272, - "step": 7897 - }, - { - "epoch": 0.7122694683681291, - "grad_norm": 1.4077520190254789, - "learning_rate": 8.072454121384995e-07, - "loss": 0.9137, - "step": 7898 - }, - { - "epoch": 0.7123596518915994, - "grad_norm": 1.2871329923324013, - "learning_rate": 8.067765146733958e-07, - "loss": 0.9743, - "step": 7899 - }, - { - "epoch": 0.7124498354150697, - "grad_norm": 1.5391023117570541, - "learning_rate": 8.063077190206993e-07, - "loss": 1.0107, - "step": 7900 - }, - { - "epoch": 0.7125400189385399, - "grad_norm": 0.6584598934672723, - "learning_rate": 8.058390252204101e-07, - "loss": 0.7106, - "step": 7901 - }, - { - "epoch": 0.7126302024620101, - "grad_norm": 1.297469726606234, - "learning_rate": 8.0537043331252e-07, - "loss": 0.9322, - "step": 7902 - }, - { - "epoch": 0.7127203859854805, - "grad_norm": 1.5616404648777449, - "learning_rate": 8.049019433370121e-07, - "loss": 0.8717, - "step": 7903 - }, - { - "epoch": 0.7128105695089507, - "grad_norm": 1.3991963975272197, - "learning_rate": 8.044335553338588e-07, - "loss": 0.9401, - "step": 7904 - }, - { - "epoch": 0.712900753032421, - "grad_norm": 2.9788967801715627, - "learning_rate": 8.039652693430281e-07, - "loss": 0.9432, - "step": 7905 - }, - { - "epoch": 0.7129909365558912, - "grad_norm": 3.6650073169255304, - "learning_rate": 8.034970854044742e-07, - "loss": 0.9145, - "step": 7906 - }, - { - "epoch": 0.7130811200793615, - "grad_norm": 1.4196810721491826, - "learning_rate": 8.03029003558148e-07, - "loss": 1.0318, - "step": 7907 - }, - { - "epoch": 0.7131713036028318, - "grad_norm": 1.616630421392768, - "learning_rate": 8.025610238439864e-07, - "loss": 0.997, - "step": 7908 - }, - { - "epoch": 0.713261487126302, - "grad_norm": 1.94702851010194, - "learning_rate": 8.020931463019207e-07, - "loss": 1.0134, - "step": 7909 - }, - { - "epoch": 0.7133516706497723, - "grad_norm": 1.4879681115117536, - "learning_rate": 8.016253709718732e-07, - "loss": 0.8776, - "step": 7910 - }, - { - "epoch": 0.7134418541732426, - "grad_norm": 1.4867573142249435, - "learning_rate": 8.011576978937567e-07, - "loss": 0.8492, - "step": 7911 - }, - { - "epoch": 0.7135320376967128, - "grad_norm": 2.21985555694398, - "learning_rate": 8.006901271074764e-07, - "loss": 0.9385, - "step": 7912 - }, - { - "epoch": 0.713622221220183, - "grad_norm": 1.6169562427825956, - "learning_rate": 8.002226586529261e-07, - "loss": 0.9768, - "step": 7913 - }, - { - "epoch": 0.7137124047436534, - "grad_norm": 1.464726720585555, - "learning_rate": 7.997552925699956e-07, - "loss": 1.0192, - "step": 7914 - }, - { - "epoch": 0.7138025882671236, - "grad_norm": 1.5760090305389147, - "learning_rate": 7.992880288985606e-07, - "loss": 1.0283, - "step": 7915 - }, - { - "epoch": 0.7138927717905938, - "grad_norm": 1.392231088762971, - "learning_rate": 7.988208676784918e-07, - "loss": 0.9443, - "step": 7916 - }, - { - "epoch": 0.7139829553140641, - "grad_norm": 1.5629952980844524, - "learning_rate": 7.983538089496497e-07, - "loss": 0.939, - "step": 7917 - }, - { - "epoch": 0.7140731388375344, - "grad_norm": 1.4620371061539847, - "learning_rate": 7.978868527518864e-07, - "loss": 0.8945, - "step": 7918 - }, - { - "epoch": 0.7141633223610047, - "grad_norm": 1.4559705212302227, - "learning_rate": 7.974199991250455e-07, - "loss": 0.9139, - "step": 7919 - }, - { - "epoch": 0.7142535058844749, - "grad_norm": 1.780208082145136, - "learning_rate": 7.969532481089616e-07, - "loss": 1.0347, - "step": 7920 - }, - { - "epoch": 0.7143436894079451, - "grad_norm": 1.5286334512221007, - "learning_rate": 7.964865997434589e-07, - "loss": 0.9515, - "step": 7921 - }, - { - "epoch": 0.7144338729314155, - "grad_norm": 1.8896151911844392, - "learning_rate": 7.96020054068357e-07, - "loss": 1.0116, - "step": 7922 - }, - { - "epoch": 0.7145240564548857, - "grad_norm": 1.2187364039518573, - "learning_rate": 7.95553611123462e-07, - "loss": 0.897, - "step": 7923 - }, - { - "epoch": 0.7146142399783559, - "grad_norm": 2.2790519890071423, - "learning_rate": 7.950872709485741e-07, - "loss": 1.1111, - "step": 7924 - }, - { - "epoch": 0.7147044235018262, - "grad_norm": 2.9264154905431687, - "learning_rate": 7.946210335834842e-07, - "loss": 0.99, - "step": 7925 - }, - { - "epoch": 0.7147946070252965, - "grad_norm": 1.2168944979429566, - "learning_rate": 7.94154899067974e-07, - "loss": 0.862, - "step": 7926 - }, - { - "epoch": 0.7148847905487667, - "grad_norm": 1.3000162985586947, - "learning_rate": 7.936888674418177e-07, - "loss": 0.8589, - "step": 7927 - }, - { - "epoch": 0.714974974072237, - "grad_norm": 1.729763119637479, - "learning_rate": 7.932229387447771e-07, - "loss": 0.9398, - "step": 7928 - }, - { - "epoch": 0.7150651575957072, - "grad_norm": 1.804687632110723, - "learning_rate": 7.927571130166109e-07, - "loss": 0.9239, - "step": 7929 - }, - { - "epoch": 0.7151553411191776, - "grad_norm": 0.7782076152162247, - "learning_rate": 7.922913902970632e-07, - "loss": 0.7894, - "step": 7930 - }, - { - "epoch": 0.7152455246426478, - "grad_norm": 1.4300178187934058, - "learning_rate": 7.918257706258744e-07, - "loss": 0.9356, - "step": 7931 - }, - { - "epoch": 0.715335708166118, - "grad_norm": 1.7663152240175912, - "learning_rate": 7.913602540427724e-07, - "loss": 0.9439, - "step": 7932 - }, - { - "epoch": 0.7154258916895884, - "grad_norm": 1.4806776075570103, - "learning_rate": 7.908948405874775e-07, - "loss": 0.9303, - "step": 7933 - }, - { - "epoch": 0.7155160752130586, - "grad_norm": 1.5391953309341246, - "learning_rate": 7.904295302997019e-07, - "loss": 0.8712, - "step": 7934 - }, - { - "epoch": 0.7156062587365288, - "grad_norm": 1.3080682866706095, - "learning_rate": 7.899643232191484e-07, - "loss": 0.9539, - "step": 7935 - }, - { - "epoch": 0.7156964422599991, - "grad_norm": 1.2923944125405054, - "learning_rate": 7.894992193855108e-07, - "loss": 0.9143, - "step": 7936 - }, - { - "epoch": 0.7157866257834694, - "grad_norm": 1.7315888579324046, - "learning_rate": 7.890342188384751e-07, - "loss": 0.8814, - "step": 7937 - }, - { - "epoch": 0.7158768093069396, - "grad_norm": 1.4436696381104908, - "learning_rate": 7.885693216177165e-07, - "loss": 0.8774, - "step": 7938 - }, - { - "epoch": 0.7159669928304099, - "grad_norm": 2.435413347758133, - "learning_rate": 7.88104527762903e-07, - "loss": 0.928, - "step": 7939 - }, - { - "epoch": 0.7160571763538801, - "grad_norm": 1.5822994805403192, - "learning_rate": 7.876398373136936e-07, - "loss": 0.8963, - "step": 7940 - }, - { - "epoch": 0.7161473598773505, - "grad_norm": 1.5527017458000498, - "learning_rate": 7.87175250309738e-07, - "loss": 0.9359, - "step": 7941 - }, - { - "epoch": 0.7162375434008207, - "grad_norm": 1.3962553676819656, - "learning_rate": 7.867107667906785e-07, - "loss": 1.0071, - "step": 7942 - }, - { - "epoch": 0.7163277269242909, - "grad_norm": 1.5937451381235201, - "learning_rate": 7.862463867961446e-07, - "loss": 0.9294, - "step": 7943 - }, - { - "epoch": 0.7164179104477612, - "grad_norm": 1.2769045760155258, - "learning_rate": 7.857821103657632e-07, - "loss": 0.9988, - "step": 7944 - }, - { - "epoch": 0.7165080939712315, - "grad_norm": 1.4445065287346426, - "learning_rate": 7.853179375391459e-07, - "loss": 0.9619, - "step": 7945 - }, - { - "epoch": 0.7165982774947017, - "grad_norm": 1.4055918530957034, - "learning_rate": 7.848538683559012e-07, - "loss": 0.9234, - "step": 7946 - }, - { - "epoch": 0.716688461018172, - "grad_norm": 1.5358650570741101, - "learning_rate": 7.843899028556238e-07, - "loss": 0.9768, - "step": 7947 - }, - { - "epoch": 0.7167786445416422, - "grad_norm": 1.2389145925209455, - "learning_rate": 7.839260410779029e-07, - "loss": 0.9333, - "step": 7948 - }, - { - "epoch": 0.7168688280651125, - "grad_norm": 1.2926029944196153, - "learning_rate": 7.834622830623175e-07, - "loss": 0.9582, - "step": 7949 - }, - { - "epoch": 0.7169590115885828, - "grad_norm": 1.6482250926494926, - "learning_rate": 7.82998628848438e-07, - "loss": 0.9104, - "step": 7950 - }, - { - "epoch": 0.717049195112053, - "grad_norm": 1.309220303521906, - "learning_rate": 7.825350784758261e-07, - "loss": 0.9311, - "step": 7951 - }, - { - "epoch": 0.7171393786355232, - "grad_norm": 1.2568552390678693, - "learning_rate": 7.820716319840342e-07, - "loss": 0.8947, - "step": 7952 - }, - { - "epoch": 0.7172295621589936, - "grad_norm": 1.4609747897198293, - "learning_rate": 7.816082894126069e-07, - "loss": 1.0117, - "step": 7953 - }, - { - "epoch": 0.7173197456824638, - "grad_norm": 1.4381574702835136, - "learning_rate": 7.811450508010778e-07, - "loss": 0.903, - "step": 7954 - }, - { - "epoch": 0.717409929205934, - "grad_norm": 1.3852058862182708, - "learning_rate": 7.806819161889737e-07, - "loss": 0.9082, - "step": 7955 - }, - { - "epoch": 0.7175001127294043, - "grad_norm": 1.3794554570339246, - "learning_rate": 7.802188856158119e-07, - "loss": 0.9647, - "step": 7956 - }, - { - "epoch": 0.7175902962528746, - "grad_norm": 1.9277362920100334, - "learning_rate": 7.797559591211002e-07, - "loss": 0.9164, - "step": 7957 - }, - { - "epoch": 0.7176804797763449, - "grad_norm": 1.3649785534777323, - "learning_rate": 7.79293136744339e-07, - "loss": 0.8974, - "step": 7958 - }, - { - "epoch": 0.7177706632998151, - "grad_norm": 1.5363355770749934, - "learning_rate": 7.788304185250185e-07, - "loss": 0.9359, - "step": 7959 - }, - { - "epoch": 0.7178608468232854, - "grad_norm": 8.442208339814414, - "learning_rate": 7.78367804502619e-07, - "loss": 0.9638, - "step": 7960 - }, - { - "epoch": 0.7179510303467557, - "grad_norm": 1.6866651518567386, - "learning_rate": 7.779052947166156e-07, - "loss": 0.9469, - "step": 7961 - }, - { - "epoch": 0.7180412138702259, - "grad_norm": 1.6139233952982754, - "learning_rate": 7.774428892064697e-07, - "loss": 0.8892, - "step": 7962 - }, - { - "epoch": 0.7181313973936961, - "grad_norm": 1.4747700786811293, - "learning_rate": 7.769805880116391e-07, - "loss": 0.9236, - "step": 7963 - }, - { - "epoch": 0.7182215809171665, - "grad_norm": 2.1382300351594674, - "learning_rate": 7.765183911715678e-07, - "loss": 0.9129, - "step": 7964 - }, - { - "epoch": 0.7183117644406367, - "grad_norm": 1.3451485563863081, - "learning_rate": 7.760562987256933e-07, - "loss": 0.8325, - "step": 7965 - }, - { - "epoch": 0.718401947964107, - "grad_norm": 1.471135576152943, - "learning_rate": 7.755943107134444e-07, - "loss": 0.937, - "step": 7966 - }, - { - "epoch": 0.7184921314875772, - "grad_norm": 0.7007941075807244, - "learning_rate": 7.751324271742401e-07, - "loss": 0.8247, - "step": 7967 - }, - { - "epoch": 0.7185823150110475, - "grad_norm": 1.409278533070123, - "learning_rate": 7.746706481474916e-07, - "loss": 0.9536, - "step": 7968 - }, - { - "epoch": 0.7186724985345178, - "grad_norm": 0.6715146362516117, - "learning_rate": 7.742089736725992e-07, - "loss": 0.757, - "step": 7969 - }, - { - "epoch": 0.718762682057988, - "grad_norm": 1.5914995348729237, - "learning_rate": 7.737474037889559e-07, - "loss": 0.8528, - "step": 7970 - }, - { - "epoch": 0.7188528655814582, - "grad_norm": 1.4260563833156759, - "learning_rate": 7.732859385359458e-07, - "loss": 0.9052, - "step": 7971 - }, - { - "epoch": 0.7189430491049286, - "grad_norm": 1.5284845720934463, - "learning_rate": 7.728245779529434e-07, - "loss": 0.8585, - "step": 7972 - }, - { - "epoch": 0.7190332326283988, - "grad_norm": 1.4147258066932191, - "learning_rate": 7.723633220793146e-07, - "loss": 0.9609, - "step": 7973 - }, - { - "epoch": 0.719123416151869, - "grad_norm": 1.514941661202478, - "learning_rate": 7.719021709544162e-07, - "loss": 0.827, - "step": 7974 - }, - { - "epoch": 0.7192135996753393, - "grad_norm": 1.407299751956885, - "learning_rate": 7.714411246175964e-07, - "loss": 1.0217, - "step": 7975 - }, - { - "epoch": 0.7193037831988096, - "grad_norm": 1.4539171746287827, - "learning_rate": 7.709801831081946e-07, - "loss": 0.8513, - "step": 7976 - }, - { - "epoch": 0.7193939667222798, - "grad_norm": 1.5329193046915566, - "learning_rate": 7.705193464655391e-07, - "loss": 0.9001, - "step": 7977 - }, - { - "epoch": 0.7194841502457501, - "grad_norm": 1.2649619638054628, - "learning_rate": 7.700586147289534e-07, - "loss": 0.9102, - "step": 7978 - }, - { - "epoch": 0.7195743337692203, - "grad_norm": 1.3309817388615985, - "learning_rate": 7.695979879377481e-07, - "loss": 0.8935, - "step": 7979 - }, - { - "epoch": 0.7196645172926907, - "grad_norm": 1.341590165786663, - "learning_rate": 7.691374661312266e-07, - "loss": 0.9531, - "step": 7980 - }, - { - "epoch": 0.7197547008161609, - "grad_norm": 1.548079512986278, - "learning_rate": 7.686770493486834e-07, - "loss": 0.9109, - "step": 7981 - }, - { - "epoch": 0.7198448843396311, - "grad_norm": 1.4538080394439143, - "learning_rate": 7.68216737629404e-07, - "loss": 0.875, - "step": 7982 - }, - { - "epoch": 0.7199350678631015, - "grad_norm": 1.3492390059566939, - "learning_rate": 7.67756531012665e-07, - "loss": 1.0006, - "step": 7983 - }, - { - "epoch": 0.7200252513865717, - "grad_norm": 1.4312576260426013, - "learning_rate": 7.67296429537732e-07, - "loss": 0.9852, - "step": 7984 - }, - { - "epoch": 0.7201154349100419, - "grad_norm": 1.4653043709381188, - "learning_rate": 7.668364332438661e-07, - "loss": 0.9922, - "step": 7985 - }, - { - "epoch": 0.7202056184335122, - "grad_norm": 1.2820515800133383, - "learning_rate": 7.663765421703145e-07, - "loss": 0.9383, - "step": 7986 - }, - { - "epoch": 0.7202958019569825, - "grad_norm": 1.4986843856434904, - "learning_rate": 7.659167563563187e-07, - "loss": 0.8677, - "step": 7987 - }, - { - "epoch": 0.7203859854804527, - "grad_norm": 1.3803502375751302, - "learning_rate": 7.654570758411096e-07, - "loss": 0.9513, - "step": 7988 - }, - { - "epoch": 0.720476169003923, - "grad_norm": 1.6283128421370034, - "learning_rate": 7.649975006639103e-07, - "loss": 0.9959, - "step": 7989 - }, - { - "epoch": 0.7205663525273932, - "grad_norm": 0.6566932180607183, - "learning_rate": 7.645380308639337e-07, - "loss": 0.7625, - "step": 7990 - }, - { - "epoch": 0.7206565360508635, - "grad_norm": 1.4000202364821408, - "learning_rate": 7.640786664803853e-07, - "loss": 0.9827, - "step": 7991 - }, - { - "epoch": 0.7207467195743338, - "grad_norm": 1.244568468244981, - "learning_rate": 7.636194075524587e-07, - "loss": 0.8568, - "step": 7992 - }, - { - "epoch": 0.720836903097804, - "grad_norm": 1.3134969376469154, - "learning_rate": 7.631602541193429e-07, - "loss": 0.894, - "step": 7993 - }, - { - "epoch": 0.7209270866212742, - "grad_norm": 1.8528536387209882, - "learning_rate": 7.627012062202132e-07, - "loss": 0.995, - "step": 7994 - }, - { - "epoch": 0.7210172701447446, - "grad_norm": 1.5334487232382918, - "learning_rate": 7.622422638942391e-07, - "loss": 0.9299, - "step": 7995 - }, - { - "epoch": 0.7211074536682148, - "grad_norm": 1.1764469226742922, - "learning_rate": 7.617834271805801e-07, - "loss": 0.9985, - "step": 7996 - }, - { - "epoch": 0.7211976371916851, - "grad_norm": 0.660273908253741, - "learning_rate": 7.613246961183863e-07, - "loss": 0.7706, - "step": 7997 - }, - { - "epoch": 0.7212878207151553, - "grad_norm": 1.668780234416569, - "learning_rate": 7.608660707468002e-07, - "loss": 0.8945, - "step": 7998 - }, - { - "epoch": 0.7213780042386256, - "grad_norm": 1.4714546461876945, - "learning_rate": 7.604075511049522e-07, - "loss": 0.8307, - "step": 7999 - }, - { - "epoch": 0.7214681877620959, - "grad_norm": 1.3445478776328075, - "learning_rate": 7.599491372319682e-07, - "loss": 0.8793, - "step": 8000 - }, - { - "epoch": 0.7215583712855661, - "grad_norm": 1.1321774511394096, - "learning_rate": 7.594908291669601e-07, - "loss": 0.9322, - "step": 8001 - }, - { - "epoch": 0.7216485548090363, - "grad_norm": 1.4819866038964111, - "learning_rate": 7.590326269490359e-07, - "loss": 0.9704, - "step": 8002 - }, - { - "epoch": 0.7217387383325067, - "grad_norm": 1.2946742584855164, - "learning_rate": 7.585745306172899e-07, - "loss": 0.8907, - "step": 8003 - }, - { - "epoch": 0.7218289218559769, - "grad_norm": 1.4108492345227415, - "learning_rate": 7.5811654021081e-07, - "loss": 0.9187, - "step": 8004 - }, - { - "epoch": 0.7219191053794471, - "grad_norm": 1.3087132185874621, - "learning_rate": 7.576586557686748e-07, - "loss": 0.9602, - "step": 8005 - }, - { - "epoch": 0.7220092889029175, - "grad_norm": 1.5134026663326077, - "learning_rate": 7.572008773299531e-07, - "loss": 0.9083, - "step": 8006 - }, - { - "epoch": 0.7220994724263877, - "grad_norm": 1.4147879072909046, - "learning_rate": 7.567432049337055e-07, - "loss": 0.9661, - "step": 8007 - }, - { - "epoch": 0.722189655949858, - "grad_norm": 1.570891164563389, - "learning_rate": 7.562856386189834e-07, - "loss": 0.8723, - "step": 8008 - }, - { - "epoch": 0.7222798394733282, - "grad_norm": 3.1487626506555833, - "learning_rate": 7.558281784248275e-07, - "loss": 0.9464, - "step": 8009 - }, - { - "epoch": 0.7223700229967985, - "grad_norm": 1.411288960935226, - "learning_rate": 7.553708243902721e-07, - "loss": 0.9253, - "step": 8010 - }, - { - "epoch": 0.7224602065202688, - "grad_norm": 2.160439443391555, - "learning_rate": 7.549135765543404e-07, - "loss": 0.8258, - "step": 8011 - }, - { - "epoch": 0.722550390043739, - "grad_norm": 1.5273462476002597, - "learning_rate": 7.544564349560481e-07, - "loss": 0.9591, - "step": 8012 - }, - { - "epoch": 0.7226405735672092, - "grad_norm": 1.2120217176354888, - "learning_rate": 7.539993996344009e-07, - "loss": 0.9207, - "step": 8013 - }, - { - "epoch": 0.7227307570906796, - "grad_norm": 1.4724488453811917, - "learning_rate": 7.535424706283941e-07, - "loss": 0.9095, - "step": 8014 - }, - { - "epoch": 0.7228209406141498, - "grad_norm": 1.5560517579323183, - "learning_rate": 7.530856479770181e-07, - "loss": 0.894, - "step": 8015 - }, - { - "epoch": 0.72291112413762, - "grad_norm": 1.32054352855185, - "learning_rate": 7.526289317192484e-07, - "loss": 1.0131, - "step": 8016 - }, - { - "epoch": 0.7230013076610903, - "grad_norm": 1.4679657485874387, - "learning_rate": 7.521723218940579e-07, - "loss": 0.9366, - "step": 8017 - }, - { - "epoch": 0.7230914911845606, - "grad_norm": 1.5367224083814723, - "learning_rate": 7.517158185404038e-07, - "loss": 0.9939, - "step": 8018 - }, - { - "epoch": 0.7231816747080309, - "grad_norm": 1.3845492733349574, - "learning_rate": 7.512594216972403e-07, - "loss": 0.953, - "step": 8019 - }, - { - "epoch": 0.7232718582315011, - "grad_norm": 1.2802764170922611, - "learning_rate": 7.508031314035078e-07, - "loss": 0.9674, - "step": 8020 - }, - { - "epoch": 0.7233620417549713, - "grad_norm": 1.3950895159039989, - "learning_rate": 7.503469476981401e-07, - "loss": 0.9542, - "step": 8021 - }, - { - "epoch": 0.7234522252784417, - "grad_norm": 1.5066507241719387, - "learning_rate": 7.498908706200613e-07, - "loss": 0.8053, - "step": 8022 - }, - { - "epoch": 0.7235424088019119, - "grad_norm": 1.2670223857585088, - "learning_rate": 7.494349002081866e-07, - "loss": 0.9865, - "step": 8023 - }, - { - "epoch": 0.7236325923253821, - "grad_norm": 1.457139571103363, - "learning_rate": 7.489790365014224e-07, - "loss": 0.9551, - "step": 8024 - }, - { - "epoch": 0.7237227758488524, - "grad_norm": 1.4006222329884765, - "learning_rate": 7.485232795386642e-07, - "loss": 0.9498, - "step": 8025 - }, - { - "epoch": 0.7238129593723227, - "grad_norm": 1.415022466231825, - "learning_rate": 7.480676293588002e-07, - "loss": 0.9576, - "step": 8026 - }, - { - "epoch": 0.7239031428957929, - "grad_norm": 0.7444442970440214, - "learning_rate": 7.476120860007093e-07, - "loss": 0.8264, - "step": 8027 - }, - { - "epoch": 0.7239933264192632, - "grad_norm": 1.7280036124836284, - "learning_rate": 7.471566495032608e-07, - "loss": 0.98, - "step": 8028 - }, - { - "epoch": 0.7240835099427335, - "grad_norm": 1.3806173714132997, - "learning_rate": 7.467013199053152e-07, - "loss": 0.8699, - "step": 8029 - }, - { - "epoch": 0.7241736934662037, - "grad_norm": 1.4589227665937283, - "learning_rate": 7.46246097245724e-07, - "loss": 0.9321, - "step": 8030 - }, - { - "epoch": 0.724263876989674, - "grad_norm": 2.0004988286692598, - "learning_rate": 7.457909815633276e-07, - "loss": 0.9396, - "step": 8031 - }, - { - "epoch": 0.7243540605131442, - "grad_norm": 1.760780575249692, - "learning_rate": 7.453359728969618e-07, - "loss": 0.9315, - "step": 8032 - }, - { - "epoch": 0.7244442440366146, - "grad_norm": 1.2570525533270962, - "learning_rate": 7.448810712854475e-07, - "loss": 0.8531, - "step": 8033 - }, - { - "epoch": 0.7245344275600848, - "grad_norm": 1.3961659742052093, - "learning_rate": 7.444262767676022e-07, - "loss": 0.9671, - "step": 8034 - }, - { - "epoch": 0.724624611083555, - "grad_norm": 1.661571877485197, - "learning_rate": 7.439715893822296e-07, - "loss": 1.0103, - "step": 8035 - }, - { - "epoch": 0.7247147946070253, - "grad_norm": 1.3230633942141843, - "learning_rate": 7.435170091681264e-07, - "loss": 0.9552, - "step": 8036 - }, - { - "epoch": 0.7248049781304956, - "grad_norm": 1.5843558105269744, - "learning_rate": 7.430625361640803e-07, - "loss": 0.8947, - "step": 8037 - }, - { - "epoch": 0.7248951616539658, - "grad_norm": 1.902725463420288, - "learning_rate": 7.426081704088694e-07, - "loss": 0.9741, - "step": 8038 - }, - { - "epoch": 0.7249853451774361, - "grad_norm": 1.4904565807981072, - "learning_rate": 7.42153911941263e-07, - "loss": 1.0035, - "step": 8039 - }, - { - "epoch": 0.7250755287009063, - "grad_norm": 1.4045635283250855, - "learning_rate": 7.416997608000192e-07, - "loss": 0.9813, - "step": 8040 - }, - { - "epoch": 0.7251657122243766, - "grad_norm": 1.5091406156474056, - "learning_rate": 7.412457170238918e-07, - "loss": 0.9152, - "step": 8041 - }, - { - "epoch": 0.7252558957478469, - "grad_norm": 1.3687713168500504, - "learning_rate": 7.407917806516193e-07, - "loss": 0.8747, - "step": 8042 - }, - { - "epoch": 0.7253460792713171, - "grad_norm": 1.565278977095874, - "learning_rate": 7.403379517219354e-07, - "loss": 0.9279, - "step": 8043 - }, - { - "epoch": 0.7254362627947873, - "grad_norm": 1.553035836375572, - "learning_rate": 7.398842302735636e-07, - "loss": 0.963, - "step": 8044 - }, - { - "epoch": 0.7255264463182577, - "grad_norm": 0.6902725062391812, - "learning_rate": 7.394306163452171e-07, - "loss": 0.7703, - "step": 8045 - }, - { - "epoch": 0.7256166298417279, - "grad_norm": 1.1144864702768333, - "learning_rate": 7.38977109975601e-07, - "loss": 0.91, - "step": 8046 - }, - { - "epoch": 0.7257068133651982, - "grad_norm": 1.622123225920522, - "learning_rate": 7.385237112034119e-07, - "loss": 0.8448, - "step": 8047 - }, - { - "epoch": 0.7257969968886684, - "grad_norm": 1.5310217239562847, - "learning_rate": 7.380704200673342e-07, - "loss": 0.9932, - "step": 8048 - }, - { - "epoch": 0.7258871804121387, - "grad_norm": 1.2520810447244572, - "learning_rate": 7.376172366060478e-07, - "loss": 0.96, - "step": 8049 - }, - { - "epoch": 0.725977363935609, - "grad_norm": 1.3686414997870224, - "learning_rate": 7.371641608582187e-07, - "loss": 0.7448, - "step": 8050 - }, - { - "epoch": 0.7260675474590792, - "grad_norm": 1.8311223945815278, - "learning_rate": 7.367111928625067e-07, - "loss": 0.8026, - "step": 8051 - }, - { - "epoch": 0.7261577309825495, - "grad_norm": 1.5032671474568067, - "learning_rate": 7.362583326575613e-07, - "loss": 0.8425, - "step": 8052 - }, - { - "epoch": 0.7262479145060198, - "grad_norm": 1.6414841355188403, - "learning_rate": 7.358055802820234e-07, - "loss": 0.9173, - "step": 8053 - }, - { - "epoch": 0.72633809802949, - "grad_norm": 1.551740988369503, - "learning_rate": 7.353529357745245e-07, - "loss": 0.9042, - "step": 8054 - }, - { - "epoch": 0.7264282815529602, - "grad_norm": 1.7541545870524353, - "learning_rate": 7.349003991736851e-07, - "loss": 0.9765, - "step": 8055 - }, - { - "epoch": 0.7265184650764306, - "grad_norm": 1.5489267171144168, - "learning_rate": 7.344479705181206e-07, - "loss": 0.7838, - "step": 8056 - }, - { - "epoch": 0.7266086485999008, - "grad_norm": 1.4887149208990273, - "learning_rate": 7.339956498464322e-07, - "loss": 0.887, - "step": 8057 - }, - { - "epoch": 0.726698832123371, - "grad_norm": 1.3530109119553242, - "learning_rate": 7.335434371972169e-07, - "loss": 0.9851, - "step": 8058 - }, - { - "epoch": 0.7267890156468413, - "grad_norm": 1.379365666180287, - "learning_rate": 7.33091332609058e-07, - "loss": 0.919, - "step": 8059 - }, - { - "epoch": 0.7268791991703116, - "grad_norm": 1.5090218867439402, - "learning_rate": 7.326393361205323e-07, - "loss": 0.9451, - "step": 8060 - }, - { - "epoch": 0.7269693826937819, - "grad_norm": 1.4833517462769858, - "learning_rate": 7.321874477702068e-07, - "loss": 1.0144, - "step": 8061 - }, - { - "epoch": 0.7270595662172521, - "grad_norm": 0.7680584906517575, - "learning_rate": 7.317356675966386e-07, - "loss": 0.8462, - "step": 8062 - }, - { - "epoch": 0.7271497497407223, - "grad_norm": 1.220574993275294, - "learning_rate": 7.312839956383765e-07, - "loss": 0.9644, - "step": 8063 - }, - { - "epoch": 0.7272399332641927, - "grad_norm": 1.3767737305568286, - "learning_rate": 7.308324319339603e-07, - "loss": 0.9101, - "step": 8064 - }, - { - "epoch": 0.7273301167876629, - "grad_norm": 1.4357747215017125, - "learning_rate": 7.303809765219182e-07, - "loss": 0.9329, - "step": 8065 - }, - { - "epoch": 0.7274203003111331, - "grad_norm": 1.4457894852701667, - "learning_rate": 7.299296294407719e-07, - "loss": 0.9777, - "step": 8066 - }, - { - "epoch": 0.7275104838346034, - "grad_norm": 1.3466508293794315, - "learning_rate": 7.294783907290327e-07, - "loss": 0.9728, - "step": 8067 - }, - { - "epoch": 0.7276006673580737, - "grad_norm": 1.4944413025943923, - "learning_rate": 7.290272604252028e-07, - "loss": 0.8001, - "step": 8068 - }, - { - "epoch": 0.727690850881544, - "grad_norm": 1.5914718202426534, - "learning_rate": 7.285762385677758e-07, - "loss": 0.8468, - "step": 8069 - }, - { - "epoch": 0.7277810344050142, - "grad_norm": 1.316059010153719, - "learning_rate": 7.281253251952335e-07, - "loss": 0.9633, - "step": 8070 - }, - { - "epoch": 0.7278712179284844, - "grad_norm": 1.4265603641021884, - "learning_rate": 7.276745203460526e-07, - "loss": 0.9177, - "step": 8071 - }, - { - "epoch": 0.7279614014519548, - "grad_norm": 1.5952140217274817, - "learning_rate": 7.272238240586959e-07, - "loss": 0.8749, - "step": 8072 - }, - { - "epoch": 0.728051584975425, - "grad_norm": 1.22532105034022, - "learning_rate": 7.267732363716219e-07, - "loss": 0.8706, - "step": 8073 - }, - { - "epoch": 0.7281417684988952, - "grad_norm": 1.4271604790916312, - "learning_rate": 7.263227573232753e-07, - "loss": 0.8944, - "step": 8074 - }, - { - "epoch": 0.7282319520223655, - "grad_norm": 1.4412593611639017, - "learning_rate": 7.258723869520937e-07, - "loss": 1.0168, - "step": 8075 - }, - { - "epoch": 0.7283221355458358, - "grad_norm": 1.3977937548630102, - "learning_rate": 7.254221252965059e-07, - "loss": 0.9828, - "step": 8076 - }, - { - "epoch": 0.728412319069306, - "grad_norm": 1.7826049068659449, - "learning_rate": 7.249719723949301e-07, - "loss": 0.8454, - "step": 8077 - }, - { - "epoch": 0.7285025025927763, - "grad_norm": 1.3311103478910509, - "learning_rate": 7.245219282857761e-07, - "loss": 0.9505, - "step": 8078 - }, - { - "epoch": 0.7285926861162466, - "grad_norm": 2.0568220415371328, - "learning_rate": 7.240719930074442e-07, - "loss": 0.9671, - "step": 8079 - }, - { - "epoch": 0.7286828696397168, - "grad_norm": 1.646725167510727, - "learning_rate": 7.236221665983257e-07, - "loss": 0.9626, - "step": 8080 - }, - { - "epoch": 0.7287730531631871, - "grad_norm": 0.8571178966248456, - "learning_rate": 7.231724490968012e-07, - "loss": 0.8148, - "step": 8081 - }, - { - "epoch": 0.7288632366866573, - "grad_norm": 1.3371158779118772, - "learning_rate": 7.227228405412438e-07, - "loss": 0.9066, - "step": 8082 - }, - { - "epoch": 0.7289534202101277, - "grad_norm": 1.9178387817352274, - "learning_rate": 7.222733409700165e-07, - "loss": 0.8971, - "step": 8083 - }, - { - "epoch": 0.7290436037335979, - "grad_norm": 1.282374377130695, - "learning_rate": 7.21823950421473e-07, - "loss": 0.948, - "step": 8084 - }, - { - "epoch": 0.7291337872570681, - "grad_norm": 2.1298758722233324, - "learning_rate": 7.213746689339577e-07, - "loss": 0.906, - "step": 8085 - }, - { - "epoch": 0.7292239707805384, - "grad_norm": 1.5106672232633296, - "learning_rate": 7.20925496545807e-07, - "loss": 0.9397, - "step": 8086 - }, - { - "epoch": 0.7293141543040087, - "grad_norm": 1.4594767402293145, - "learning_rate": 7.20476433295344e-07, - "loss": 0.8448, - "step": 8087 - }, - { - "epoch": 0.7294043378274789, - "grad_norm": 1.4028756665654194, - "learning_rate": 7.200274792208882e-07, - "loss": 0.9659, - "step": 8088 - }, - { - "epoch": 0.7294945213509492, - "grad_norm": 1.5978301683445422, - "learning_rate": 7.195786343607444e-07, - "loss": 0.8253, - "step": 8089 - }, - { - "epoch": 0.7295847048744194, - "grad_norm": 1.4510281929363629, - "learning_rate": 7.191298987532131e-07, - "loss": 0.9261, - "step": 8090 - }, - { - "epoch": 0.7296748883978897, - "grad_norm": 1.3888355700536916, - "learning_rate": 7.186812724365805e-07, - "loss": 0.9446, - "step": 8091 - }, - { - "epoch": 0.72976507192136, - "grad_norm": 1.6873777133290504, - "learning_rate": 7.182327554491272e-07, - "loss": 0.9426, - "step": 8092 - }, - { - "epoch": 0.7298552554448302, - "grad_norm": 0.7147675353731541, - "learning_rate": 7.177843478291225e-07, - "loss": 0.8201, - "step": 8093 - }, - { - "epoch": 0.7299454389683004, - "grad_norm": 1.76626501045949, - "learning_rate": 7.173360496148276e-07, - "loss": 0.9608, - "step": 8094 - }, - { - "epoch": 0.7300356224917708, - "grad_norm": 1.55765797911418, - "learning_rate": 7.168878608444939e-07, - "loss": 0.9471, - "step": 8095 - }, - { - "epoch": 0.730125806015241, - "grad_norm": 1.862190743187623, - "learning_rate": 7.164397815563623e-07, - "loss": 0.9367, - "step": 8096 - }, - { - "epoch": 0.7302159895387113, - "grad_norm": 1.6687085835892916, - "learning_rate": 7.159918117886661e-07, - "loss": 0.936, - "step": 8097 - }, - { - "epoch": 0.7303061730621815, - "grad_norm": 1.4857294428361378, - "learning_rate": 7.155439515796284e-07, - "loss": 0.9528, - "step": 8098 - }, - { - "epoch": 0.7303963565856518, - "grad_norm": 1.687140603117627, - "learning_rate": 7.150962009674633e-07, - "loss": 0.9248, - "step": 8099 - }, - { - "epoch": 0.7304865401091221, - "grad_norm": 1.3365311669919566, - "learning_rate": 7.146485599903751e-07, - "loss": 0.9833, - "step": 8100 - }, - { - "epoch": 0.7305767236325923, - "grad_norm": 1.4121452527481049, - "learning_rate": 7.142010286865592e-07, - "loss": 1.0014, - "step": 8101 - }, - { - "epoch": 0.7306669071560626, - "grad_norm": 0.5705875882923802, - "learning_rate": 7.137536070942012e-07, - "loss": 0.7619, - "step": 8102 - }, - { - "epoch": 0.7307570906795329, - "grad_norm": 1.3383781936905255, - "learning_rate": 7.133062952514786e-07, - "loss": 0.9409, - "step": 8103 - }, - { - "epoch": 0.7308472742030031, - "grad_norm": 0.6640204248001511, - "learning_rate": 7.128590931965562e-07, - "loss": 0.8049, - "step": 8104 - }, - { - "epoch": 0.7309374577264733, - "grad_norm": 1.2591071248614356, - "learning_rate": 7.124120009675945e-07, - "loss": 0.999, - "step": 8105 - }, - { - "epoch": 0.7310276412499437, - "grad_norm": 1.3978521731072702, - "learning_rate": 7.119650186027399e-07, - "loss": 0.9607, - "step": 8106 - }, - { - "epoch": 0.7311178247734139, - "grad_norm": 1.5343165151478346, - "learning_rate": 7.11518146140132e-07, - "loss": 1.0552, - "step": 8107 - }, - { - "epoch": 0.7312080082968841, - "grad_norm": 1.6061018463563077, - "learning_rate": 7.110713836179007e-07, - "loss": 0.892, - "step": 8108 - }, - { - "epoch": 0.7312981918203544, - "grad_norm": 1.384103291113489, - "learning_rate": 7.106247310741659e-07, - "loss": 0.8625, - "step": 8109 - }, - { - "epoch": 0.7313883753438247, - "grad_norm": 1.4580504097563798, - "learning_rate": 7.101781885470393e-07, - "loss": 0.9989, - "step": 8110 - }, - { - "epoch": 0.731478558867295, - "grad_norm": 1.5324164638185513, - "learning_rate": 7.097317560746203e-07, - "loss": 0.9733, - "step": 8111 - }, - { - "epoch": 0.7315687423907652, - "grad_norm": 1.4079971480286497, - "learning_rate": 7.092854336950036e-07, - "loss": 0.9953, - "step": 8112 - }, - { - "epoch": 0.7316589259142354, - "grad_norm": 1.4733240854539806, - "learning_rate": 7.0883922144627e-07, - "loss": 0.9889, - "step": 8113 - }, - { - "epoch": 0.7317491094377058, - "grad_norm": 1.522087320482135, - "learning_rate": 7.083931193664934e-07, - "loss": 0.9667, - "step": 8114 - }, - { - "epoch": 0.731839292961176, - "grad_norm": 1.4254820627337046, - "learning_rate": 7.079471274937378e-07, - "loss": 0.9837, - "step": 8115 - }, - { - "epoch": 0.7319294764846462, - "grad_norm": 1.464131581438466, - "learning_rate": 7.075012458660574e-07, - "loss": 0.9219, - "step": 8116 - }, - { - "epoch": 0.7320196600081165, - "grad_norm": 0.6813879555824849, - "learning_rate": 7.070554745214976e-07, - "loss": 0.8178, - "step": 8117 - }, - { - "epoch": 0.7321098435315868, - "grad_norm": 1.6312362144150792, - "learning_rate": 7.066098134980947e-07, - "loss": 0.8942, - "step": 8118 - }, - { - "epoch": 0.732200027055057, - "grad_norm": 1.5584691506968325, - "learning_rate": 7.061642628338727e-07, - "loss": 0.9351, - "step": 8119 - }, - { - "epoch": 0.7322902105785273, - "grad_norm": 1.4499751582155256, - "learning_rate": 7.057188225668513e-07, - "loss": 0.932, - "step": 8120 - }, - { - "epoch": 0.7323803941019975, - "grad_norm": 1.3838553510361, - "learning_rate": 7.052734927350358e-07, - "loss": 1.0831, - "step": 8121 - }, - { - "epoch": 0.7324705776254679, - "grad_norm": 1.6816124819572693, - "learning_rate": 7.048282733764252e-07, - "loss": 0.9585, - "step": 8122 - }, - { - "epoch": 0.7325607611489381, - "grad_norm": 1.3388719509938738, - "learning_rate": 7.043831645290077e-07, - "loss": 1.0134, - "step": 8123 - }, - { - "epoch": 0.7326509446724083, - "grad_norm": 1.5102213853898354, - "learning_rate": 7.039381662307624e-07, - "loss": 0.9097, - "step": 8124 - }, - { - "epoch": 0.7327411281958787, - "grad_norm": 1.3741735228642569, - "learning_rate": 7.034932785196601e-07, - "loss": 0.9149, - "step": 8125 - }, - { - "epoch": 0.7328313117193489, - "grad_norm": 1.4493879440538942, - "learning_rate": 7.030485014336585e-07, - "loss": 0.9256, - "step": 8126 - }, - { - "epoch": 0.7329214952428191, - "grad_norm": 1.6103832456896858, - "learning_rate": 7.026038350107118e-07, - "loss": 0.8956, - "step": 8127 - }, - { - "epoch": 0.7330116787662894, - "grad_norm": 1.9357132825713104, - "learning_rate": 7.021592792887579e-07, - "loss": 1.0702, - "step": 8128 - }, - { - "epoch": 0.7331018622897597, - "grad_norm": 1.5003774485944303, - "learning_rate": 7.01714834305732e-07, - "loss": 0.9946, - "step": 8129 - }, - { - "epoch": 0.7331920458132299, - "grad_norm": 1.3795151703329809, - "learning_rate": 7.012705000995544e-07, - "loss": 0.8981, - "step": 8130 - }, - { - "epoch": 0.7332822293367002, - "grad_norm": 1.2455641239830793, - "learning_rate": 7.008262767081392e-07, - "loss": 0.9361, - "step": 8131 - }, - { - "epoch": 0.7333724128601704, - "grad_norm": 1.82027052147395, - "learning_rate": 7.003821641693892e-07, - "loss": 0.9955, - "step": 8132 - }, - { - "epoch": 0.7334625963836408, - "grad_norm": 1.4916558242204896, - "learning_rate": 6.999381625211993e-07, - "loss": 0.921, - "step": 8133 - }, - { - "epoch": 0.733552779907111, - "grad_norm": 1.521440654709214, - "learning_rate": 6.994942718014536e-07, - "loss": 0.8586, - "step": 8134 - }, - { - "epoch": 0.7336429634305812, - "grad_norm": 1.9780883930308348, - "learning_rate": 6.990504920480282e-07, - "loss": 0.848, - "step": 8135 - }, - { - "epoch": 0.7337331469540515, - "grad_norm": 1.5257222575841245, - "learning_rate": 6.986068232987879e-07, - "loss": 0.937, - "step": 8136 - }, - { - "epoch": 0.7338233304775218, - "grad_norm": 1.1938407663719899, - "learning_rate": 6.981632655915888e-07, - "loss": 0.8944, - "step": 8137 - }, - { - "epoch": 0.733913514000992, - "grad_norm": 1.3664643909436203, - "learning_rate": 6.977198189642783e-07, - "loss": 1.0001, - "step": 8138 - }, - { - "epoch": 0.7340036975244623, - "grad_norm": 1.5965356604335559, - "learning_rate": 6.972764834546935e-07, - "loss": 0.9625, - "step": 8139 - }, - { - "epoch": 0.7340938810479325, - "grad_norm": 1.4920031049148939, - "learning_rate": 6.96833259100663e-07, - "loss": 0.9549, - "step": 8140 - }, - { - "epoch": 0.7341840645714028, - "grad_norm": 1.599996107811962, - "learning_rate": 6.96390145940003e-07, - "loss": 0.9094, - "step": 8141 - }, - { - "epoch": 0.7342742480948731, - "grad_norm": 1.3890319014140953, - "learning_rate": 6.959471440105253e-07, - "loss": 0.8901, - "step": 8142 - }, - { - "epoch": 0.7343644316183433, - "grad_norm": 2.282263961570441, - "learning_rate": 6.955042533500261e-07, - "loss": 0.9737, - "step": 8143 - }, - { - "epoch": 0.7344546151418135, - "grad_norm": 1.6461790985156046, - "learning_rate": 6.950614739962986e-07, - "loss": 0.8977, - "step": 8144 - }, - { - "epoch": 0.7345447986652839, - "grad_norm": 1.385278001696061, - "learning_rate": 6.946188059871198e-07, - "loss": 0.8296, - "step": 8145 - }, - { - "epoch": 0.7346349821887541, - "grad_norm": 1.4010267103264291, - "learning_rate": 6.941762493602638e-07, - "loss": 1.0519, - "step": 8146 - }, - { - "epoch": 0.7347251657122243, - "grad_norm": 1.338674897367763, - "learning_rate": 6.937338041534899e-07, - "loss": 0.8704, - "step": 8147 - }, - { - "epoch": 0.7348153492356947, - "grad_norm": 1.3101029532504458, - "learning_rate": 6.932914704045505e-07, - "loss": 0.9312, - "step": 8148 - }, - { - "epoch": 0.7349055327591649, - "grad_norm": 1.6335670881316757, - "learning_rate": 6.928492481511878e-07, - "loss": 0.964, - "step": 8149 - }, - { - "epoch": 0.7349957162826352, - "grad_norm": 1.8965298208179673, - "learning_rate": 6.924071374311349e-07, - "loss": 0.8862, - "step": 8150 - }, - { - "epoch": 0.7350858998061054, - "grad_norm": 0.6562861250970637, - "learning_rate": 6.919651382821157e-07, - "loss": 0.7926, - "step": 8151 - }, - { - "epoch": 0.7351760833295757, - "grad_norm": 3.1369963222648702, - "learning_rate": 6.915232507418425e-07, - "loss": 0.9093, - "step": 8152 - }, - { - "epoch": 0.735266266853046, - "grad_norm": 1.5387479199194773, - "learning_rate": 6.910814748480204e-07, - "loss": 0.8279, - "step": 8153 - }, - { - "epoch": 0.7353564503765162, - "grad_norm": 1.9619550762380231, - "learning_rate": 6.906398106383445e-07, - "loss": 0.9502, - "step": 8154 - }, - { - "epoch": 0.7354466338999864, - "grad_norm": 1.3977703015963603, - "learning_rate": 6.901982581504994e-07, - "loss": 0.9313, - "step": 8155 - }, - { - "epoch": 0.7355368174234568, - "grad_norm": 1.539367180888444, - "learning_rate": 6.897568174221611e-07, - "loss": 0.8232, - "step": 8156 - }, - { - "epoch": 0.735627000946927, - "grad_norm": 1.4659213159009734, - "learning_rate": 6.893154884909966e-07, - "loss": 1.0108, - "step": 8157 - }, - { - "epoch": 0.7357171844703972, - "grad_norm": 0.6362685501751969, - "learning_rate": 6.888742713946602e-07, - "loss": 0.7896, - "step": 8158 - }, - { - "epoch": 0.7358073679938675, - "grad_norm": 1.6224849017223977, - "learning_rate": 6.884331661708018e-07, - "loss": 0.9389, - "step": 8159 - }, - { - "epoch": 0.7358975515173378, - "grad_norm": 1.2936565955164099, - "learning_rate": 6.879921728570561e-07, - "loss": 0.9327, - "step": 8160 - }, - { - "epoch": 0.735987735040808, - "grad_norm": 1.241151341554001, - "learning_rate": 6.875512914910539e-07, - "loss": 1.0159, - "step": 8161 - }, - { - "epoch": 0.7360779185642783, - "grad_norm": 1.3540400470322065, - "learning_rate": 6.871105221104119e-07, - "loss": 0.889, - "step": 8162 - }, - { - "epoch": 0.7361681020877485, - "grad_norm": 1.6214433040393708, - "learning_rate": 6.866698647527391e-07, - "loss": 0.9156, - "step": 8163 - }, - { - "epoch": 0.7362582856112189, - "grad_norm": 1.466107365141389, - "learning_rate": 6.862293194556353e-07, - "loss": 0.9711, - "step": 8164 - }, - { - "epoch": 0.7363484691346891, - "grad_norm": 1.339845040101078, - "learning_rate": 6.857888862566896e-07, - "loss": 0.8785, - "step": 8165 - }, - { - "epoch": 0.7364386526581593, - "grad_norm": 1.298843268973038, - "learning_rate": 6.853485651934836e-07, - "loss": 0.894, - "step": 8166 - }, - { - "epoch": 0.7365288361816296, - "grad_norm": 1.3617925522152263, - "learning_rate": 6.849083563035855e-07, - "loss": 0.7966, - "step": 8167 - }, - { - "epoch": 0.7366190197050999, - "grad_norm": 0.7647396340192953, - "learning_rate": 6.844682596245592e-07, - "loss": 0.8925, - "step": 8168 - }, - { - "epoch": 0.7367092032285701, - "grad_norm": 1.5436135791625125, - "learning_rate": 6.840282751939539e-07, - "loss": 0.9918, - "step": 8169 - }, - { - "epoch": 0.7367993867520404, - "grad_norm": 1.344461563139732, - "learning_rate": 6.835884030493126e-07, - "loss": 0.9798, - "step": 8170 - }, - { - "epoch": 0.7368895702755107, - "grad_norm": 1.498466820773074, - "learning_rate": 6.831486432281672e-07, - "loss": 0.9581, - "step": 8171 - }, - { - "epoch": 0.736979753798981, - "grad_norm": 1.810587432966713, - "learning_rate": 6.827089957680407e-07, - "loss": 0.9515, - "step": 8172 - }, - { - "epoch": 0.7370699373224512, - "grad_norm": 1.3927158111090003, - "learning_rate": 6.822694607064461e-07, - "loss": 1.0474, - "step": 8173 - }, - { - "epoch": 0.7371601208459214, - "grad_norm": 1.4119380778440298, - "learning_rate": 6.818300380808877e-07, - "loss": 0.9668, - "step": 8174 - }, - { - "epoch": 0.7372503043693918, - "grad_norm": 1.5486655612984854, - "learning_rate": 6.813907279288574e-07, - "loss": 0.927, - "step": 8175 - }, - { - "epoch": 0.737340487892862, - "grad_norm": 1.3564578358543786, - "learning_rate": 6.809515302878422e-07, - "loss": 0.953, - "step": 8176 - }, - { - "epoch": 0.7374306714163322, - "grad_norm": 1.7338334346351838, - "learning_rate": 6.80512445195315e-07, - "loss": 0.9238, - "step": 8177 - }, - { - "epoch": 0.7375208549398025, - "grad_norm": 1.3949229654624606, - "learning_rate": 6.800734726887416e-07, - "loss": 0.9597, - "step": 8178 - }, - { - "epoch": 0.7376110384632728, - "grad_norm": 1.4304837370357721, - "learning_rate": 6.796346128055775e-07, - "loss": 0.9373, - "step": 8179 - }, - { - "epoch": 0.737701221986743, - "grad_norm": 1.4383908704003323, - "learning_rate": 6.791958655832684e-07, - "loss": 0.9062, - "step": 8180 - }, - { - "epoch": 0.7377914055102133, - "grad_norm": 1.5470919986683889, - "learning_rate": 6.787572310592518e-07, - "loss": 0.9041, - "step": 8181 - }, - { - "epoch": 0.7378815890336835, - "grad_norm": 1.677431800922716, - "learning_rate": 6.783187092709521e-07, - "loss": 1.016, - "step": 8182 - }, - { - "epoch": 0.7379717725571538, - "grad_norm": 2.0990457319390945, - "learning_rate": 6.778803002557891e-07, - "loss": 0.9483, - "step": 8183 - }, - { - "epoch": 0.7380619560806241, - "grad_norm": 1.2206239720431529, - "learning_rate": 6.774420040511686e-07, - "loss": 0.9765, - "step": 8184 - }, - { - "epoch": 0.7381521396040943, - "grad_norm": 1.7029971853469283, - "learning_rate": 6.770038206944886e-07, - "loss": 0.9335, - "step": 8185 - }, - { - "epoch": 0.7382423231275645, - "grad_norm": 1.2217578939140306, - "learning_rate": 6.765657502231375e-07, - "loss": 0.9504, - "step": 8186 - }, - { - "epoch": 0.7383325066510349, - "grad_norm": 1.2908826981757575, - "learning_rate": 6.761277926744939e-07, - "loss": 1.0063, - "step": 8187 - }, - { - "epoch": 0.7384226901745051, - "grad_norm": 1.4116025149568643, - "learning_rate": 6.756899480859268e-07, - "loss": 0.9255, - "step": 8188 - }, - { - "epoch": 0.7385128736979754, - "grad_norm": 1.390915401011505, - "learning_rate": 6.752522164947956e-07, - "loss": 0.9859, - "step": 8189 - }, - { - "epoch": 0.7386030572214456, - "grad_norm": 1.1085226518710856, - "learning_rate": 6.748145979384498e-07, - "loss": 0.9902, - "step": 8190 - }, - { - "epoch": 0.7386932407449159, - "grad_norm": 1.4950476592259303, - "learning_rate": 6.743770924542303e-07, - "loss": 0.8814, - "step": 8191 - }, - { - "epoch": 0.7387834242683862, - "grad_norm": 0.6628015101995167, - "learning_rate": 6.739397000794658e-07, - "loss": 0.8059, - "step": 8192 - }, - { - "epoch": 0.7388736077918564, - "grad_norm": 1.6894354846893356, - "learning_rate": 6.735024208514782e-07, - "loss": 0.9143, - "step": 8193 - }, - { - "epoch": 0.7389637913153266, - "grad_norm": 1.1968746075741594, - "learning_rate": 6.73065254807578e-07, - "loss": 0.8558, - "step": 8194 - }, - { - "epoch": 0.739053974838797, - "grad_norm": 1.4845878046985361, - "learning_rate": 6.726282019850669e-07, - "loss": 0.9967, - "step": 8195 - }, - { - "epoch": 0.7391441583622672, - "grad_norm": 1.1199412519510366, - "learning_rate": 6.721912624212376e-07, - "loss": 1.0015, - "step": 8196 - }, - { - "epoch": 0.7392343418857374, - "grad_norm": 1.5106707742847871, - "learning_rate": 6.717544361533696e-07, - "loss": 0.834, - "step": 8197 - }, - { - "epoch": 0.7393245254092078, - "grad_norm": 1.3174104567469949, - "learning_rate": 6.713177232187386e-07, - "loss": 0.9191, - "step": 8198 - }, - { - "epoch": 0.739414708932678, - "grad_norm": 1.8695819138493968, - "learning_rate": 6.708811236546041e-07, - "loss": 0.9312, - "step": 8199 - }, - { - "epoch": 0.7395048924561483, - "grad_norm": 1.7118971410895376, - "learning_rate": 6.704446374982224e-07, - "loss": 0.9057, - "step": 8200 - }, - { - "epoch": 0.7395950759796185, - "grad_norm": 4.494065716387334, - "learning_rate": 6.700082647868346e-07, - "loss": 0.9767, - "step": 8201 - }, - { - "epoch": 0.7396852595030888, - "grad_norm": 1.3649991641897918, - "learning_rate": 6.695720055576751e-07, - "loss": 0.9959, - "step": 8202 - }, - { - "epoch": 0.7397754430265591, - "grad_norm": 1.4793511754685253, - "learning_rate": 6.691358598479679e-07, - "loss": 0.9521, - "step": 8203 - }, - { - "epoch": 0.7398656265500293, - "grad_norm": 1.3841895881299826, - "learning_rate": 6.686998276949276e-07, - "loss": 0.934, - "step": 8204 - }, - { - "epoch": 0.7399558100734995, - "grad_norm": 0.7194118355103829, - "learning_rate": 6.682639091357587e-07, - "loss": 0.8463, - "step": 8205 - }, - { - "epoch": 0.7400459935969699, - "grad_norm": 1.4979553592585124, - "learning_rate": 6.678281042076568e-07, - "loss": 0.9245, - "step": 8206 - }, - { - "epoch": 0.7401361771204401, - "grad_norm": 1.1788268328612899, - "learning_rate": 6.673924129478059e-07, - "loss": 0.942, - "step": 8207 - }, - { - "epoch": 0.7402263606439103, - "grad_norm": 1.4521483554083283, - "learning_rate": 6.669568353933824e-07, - "loss": 0.8588, - "step": 8208 - }, - { - "epoch": 0.7403165441673806, - "grad_norm": 1.414557607395292, - "learning_rate": 6.665213715815519e-07, - "loss": 0.8754, - "step": 8209 - }, - { - "epoch": 0.7404067276908509, - "grad_norm": 1.6580156236011374, - "learning_rate": 6.660860215494706e-07, - "loss": 0.9447, - "step": 8210 - }, - { - "epoch": 0.7404969112143212, - "grad_norm": 1.4389666663120457, - "learning_rate": 6.656507853342852e-07, - "loss": 0.8929, - "step": 8211 - }, - { - "epoch": 0.7405870947377914, - "grad_norm": 2.6116974140816827, - "learning_rate": 6.652156629731323e-07, - "loss": 0.9451, - "step": 8212 - }, - { - "epoch": 0.7406772782612616, - "grad_norm": 1.538958163524987, - "learning_rate": 6.647806545031396e-07, - "loss": 0.9998, - "step": 8213 - }, - { - "epoch": 0.740767461784732, - "grad_norm": 1.4327946652432908, - "learning_rate": 6.643457599614224e-07, - "loss": 0.8616, - "step": 8214 - }, - { - "epoch": 0.7408576453082022, - "grad_norm": 1.586533157410903, - "learning_rate": 6.63910979385091e-07, - "loss": 0.9942, - "step": 8215 - }, - { - "epoch": 0.7409478288316724, - "grad_norm": 1.5746373833919398, - "learning_rate": 6.634763128112409e-07, - "loss": 1.0234, - "step": 8216 - }, - { - "epoch": 0.7410380123551427, - "grad_norm": 1.6833201316592794, - "learning_rate": 6.630417602769622e-07, - "loss": 0.9304, - "step": 8217 - }, - { - "epoch": 0.741128195878613, - "grad_norm": 1.470955106468988, - "learning_rate": 6.62607321819332e-07, - "loss": 0.9524, - "step": 8218 - }, - { - "epoch": 0.7412183794020832, - "grad_norm": 1.2459551216909637, - "learning_rate": 6.621729974754196e-07, - "loss": 0.9425, - "step": 8219 - }, - { - "epoch": 0.7413085629255535, - "grad_norm": 1.47342683984454, - "learning_rate": 6.617387872822835e-07, - "loss": 0.8859, - "step": 8220 - }, - { - "epoch": 0.7413987464490238, - "grad_norm": 4.509947272963092, - "learning_rate": 6.613046912769731e-07, - "loss": 0.9144, - "step": 8221 - }, - { - "epoch": 0.741488929972494, - "grad_norm": 1.6606366348864015, - "learning_rate": 6.608707094965289e-07, - "loss": 1.0061, - "step": 8222 - }, - { - "epoch": 0.7415791134959643, - "grad_norm": 1.9384370814225653, - "learning_rate": 6.604368419779787e-07, - "loss": 0.8652, - "step": 8223 - }, - { - "epoch": 0.7416692970194345, - "grad_norm": 2.6912045340943216, - "learning_rate": 6.600030887583434e-07, - "loss": 0.9444, - "step": 8224 - }, - { - "epoch": 0.7417594805429049, - "grad_norm": 1.5148580911477874, - "learning_rate": 6.595694498746336e-07, - "loss": 1.0372, - "step": 8225 - }, - { - "epoch": 0.7418496640663751, - "grad_norm": 1.6337739586915796, - "learning_rate": 6.591359253638491e-07, - "loss": 0.9123, - "step": 8226 - }, - { - "epoch": 0.7419398475898453, - "grad_norm": 1.8153766298056262, - "learning_rate": 6.587025152629808e-07, - "loss": 0.8367, - "step": 8227 - }, - { - "epoch": 0.7420300311133156, - "grad_norm": 1.95441327142039, - "learning_rate": 6.582692196090107e-07, - "loss": 0.9821, - "step": 8228 - }, - { - "epoch": 0.7421202146367859, - "grad_norm": 1.4413652284982554, - "learning_rate": 6.578360384389074e-07, - "loss": 0.9481, - "step": 8229 - }, - { - "epoch": 0.7422103981602561, - "grad_norm": 1.315293790032092, - "learning_rate": 6.574029717896355e-07, - "loss": 0.9184, - "step": 8230 - }, - { - "epoch": 0.7423005816837264, - "grad_norm": 1.7471549203939911, - "learning_rate": 6.569700196981436e-07, - "loss": 0.9271, - "step": 8231 - }, - { - "epoch": 0.7423907652071966, - "grad_norm": 1.5395039346513957, - "learning_rate": 6.565371822013763e-07, - "loss": 0.8427, - "step": 8232 - }, - { - "epoch": 0.742480948730667, - "grad_norm": 1.5722897355973533, - "learning_rate": 6.561044593362636e-07, - "loss": 0.9905, - "step": 8233 - }, - { - "epoch": 0.7425711322541372, - "grad_norm": 1.3271275982543427, - "learning_rate": 6.556718511397288e-07, - "loss": 0.9223, - "step": 8234 - }, - { - "epoch": 0.7426613157776074, - "grad_norm": 1.3069354125352781, - "learning_rate": 6.552393576486843e-07, - "loss": 1.0489, - "step": 8235 - }, - { - "epoch": 0.7427514993010776, - "grad_norm": 1.5962853554295555, - "learning_rate": 6.548069789000325e-07, - "loss": 0.9472, - "step": 8236 - }, - { - "epoch": 0.742841682824548, - "grad_norm": 2.537880396400332, - "learning_rate": 6.543747149306673e-07, - "loss": 0.922, - "step": 8237 - }, - { - "epoch": 0.7429318663480182, - "grad_norm": 1.3866216061374415, - "learning_rate": 6.5394256577747e-07, - "loss": 0.988, - "step": 8238 - }, - { - "epoch": 0.7430220498714885, - "grad_norm": 1.5014769117909323, - "learning_rate": 6.535105314773161e-07, - "loss": 0.9671, - "step": 8239 - }, - { - "epoch": 0.7431122333949587, - "grad_norm": 1.5911252717116717, - "learning_rate": 6.530786120670677e-07, - "loss": 0.871, - "step": 8240 - }, - { - "epoch": 0.743202416918429, - "grad_norm": 1.513037291375455, - "learning_rate": 6.526468075835787e-07, - "loss": 0.9851, - "step": 8241 - }, - { - "epoch": 0.7432926004418993, - "grad_norm": 1.3278937194504479, - "learning_rate": 6.522151180636937e-07, - "loss": 0.9774, - "step": 8242 - }, - { - "epoch": 0.7433827839653695, - "grad_norm": 3.1310917797203306, - "learning_rate": 6.517835435442461e-07, - "loss": 0.9028, - "step": 8243 - }, - { - "epoch": 0.7434729674888398, - "grad_norm": 2.302320913998293, - "learning_rate": 6.513520840620606e-07, - "loss": 0.9616, - "step": 8244 - }, - { - "epoch": 0.7435631510123101, - "grad_norm": 1.6499249585744806, - "learning_rate": 6.509207396539525e-07, - "loss": 0.891, - "step": 8245 - }, - { - "epoch": 0.7436533345357803, - "grad_norm": 0.6824153006749564, - "learning_rate": 6.50489510356724e-07, - "loss": 0.7761, - "step": 8246 - }, - { - "epoch": 0.7437435180592505, - "grad_norm": 1.6306648549553036, - "learning_rate": 6.500583962071732e-07, - "loss": 0.8638, - "step": 8247 - }, - { - "epoch": 0.7438337015827209, - "grad_norm": 2.585832392005696, - "learning_rate": 6.496273972420827e-07, - "loss": 0.9642, - "step": 8248 - }, - { - "epoch": 0.7439238851061911, - "grad_norm": 1.703751055240654, - "learning_rate": 6.491965134982287e-07, - "loss": 0.994, - "step": 8249 - }, - { - "epoch": 0.7440140686296614, - "grad_norm": 1.632915146809064, - "learning_rate": 6.487657450123765e-07, - "loss": 0.9091, - "step": 8250 - }, - { - "epoch": 0.7441042521531316, - "grad_norm": 1.3736802182857843, - "learning_rate": 6.483350918212814e-07, - "loss": 0.9491, - "step": 8251 - }, - { - "epoch": 0.7441944356766019, - "grad_norm": 0.7564858297444146, - "learning_rate": 6.479045539616898e-07, - "loss": 0.8144, - "step": 8252 - }, - { - "epoch": 0.7442846192000722, - "grad_norm": 1.6222736522862664, - "learning_rate": 6.474741314703358e-07, - "loss": 0.9441, - "step": 8253 - }, - { - "epoch": 0.7443748027235424, - "grad_norm": 1.836110857631546, - "learning_rate": 6.47043824383948e-07, - "loss": 0.9961, - "step": 8254 - }, - { - "epoch": 0.7444649862470126, - "grad_norm": 1.868953811095181, - "learning_rate": 6.466136327392399e-07, - "loss": 0.9784, - "step": 8255 - }, - { - "epoch": 0.744555169770483, - "grad_norm": 1.6541015077276306, - "learning_rate": 6.461835565729206e-07, - "loss": 0.9719, - "step": 8256 - }, - { - "epoch": 0.7446453532939532, - "grad_norm": 1.3764246582574031, - "learning_rate": 6.457535959216844e-07, - "loss": 0.9659, - "step": 8257 - }, - { - "epoch": 0.7447355368174234, - "grad_norm": 1.3657636803366582, - "learning_rate": 6.453237508222186e-07, - "loss": 0.9027, - "step": 8258 - }, - { - "epoch": 0.7448257203408937, - "grad_norm": 1.39214109435484, - "learning_rate": 6.448940213112e-07, - "loss": 0.9543, - "step": 8259 - }, - { - "epoch": 0.744915903864364, - "grad_norm": 1.3100351167296316, - "learning_rate": 6.444644074252954e-07, - "loss": 1.0234, - "step": 8260 - }, - { - "epoch": 0.7450060873878342, - "grad_norm": 1.458172716650906, - "learning_rate": 6.440349092011628e-07, - "loss": 1.0108, - "step": 8261 - }, - { - "epoch": 0.7450962709113045, - "grad_norm": 1.3759456763716607, - "learning_rate": 6.436055266754475e-07, - "loss": 0.9361, - "step": 8262 - }, - { - "epoch": 0.7451864544347747, - "grad_norm": 1.7475496575936305, - "learning_rate": 6.431762598847879e-07, - "loss": 0.921, - "step": 8263 - }, - { - "epoch": 0.7452766379582451, - "grad_norm": 1.4045393393586274, - "learning_rate": 6.427471088658111e-07, - "loss": 0.9177, - "step": 8264 - }, - { - "epoch": 0.7453668214817153, - "grad_norm": 1.4290189587007487, - "learning_rate": 6.42318073655135e-07, - "loss": 0.937, - "step": 8265 - }, - { - "epoch": 0.7454570050051855, - "grad_norm": 1.5955694695928326, - "learning_rate": 6.41889154289367e-07, - "loss": 0.9023, - "step": 8266 - }, - { - "epoch": 0.7455471885286559, - "grad_norm": 1.5378858841873129, - "learning_rate": 6.414603508051055e-07, - "loss": 1.0032, - "step": 8267 - }, - { - "epoch": 0.7456373720521261, - "grad_norm": 0.7024915809058978, - "learning_rate": 6.410316632389365e-07, - "loss": 0.8508, - "step": 8268 - }, - { - "epoch": 0.7457275555755963, - "grad_norm": 1.5571436051458158, - "learning_rate": 6.406030916274406e-07, - "loss": 0.9424, - "step": 8269 - }, - { - "epoch": 0.7458177390990666, - "grad_norm": 1.4287392228221512, - "learning_rate": 6.401746360071831e-07, - "loss": 0.9012, - "step": 8270 - }, - { - "epoch": 0.7459079226225369, - "grad_norm": 1.488737501961546, - "learning_rate": 6.397462964147251e-07, - "loss": 0.9039, - "step": 8271 - }, - { - "epoch": 0.7459981061460071, - "grad_norm": 1.4728682388193213, - "learning_rate": 6.393180728866128e-07, - "loss": 0.9331, - "step": 8272 - }, - { - "epoch": 0.7460882896694774, - "grad_norm": 1.2700442172785849, - "learning_rate": 6.388899654593853e-07, - "loss": 0.9709, - "step": 8273 - }, - { - "epoch": 0.7461784731929476, - "grad_norm": 1.5432603005306953, - "learning_rate": 6.384619741695709e-07, - "loss": 0.9711, - "step": 8274 - }, - { - "epoch": 0.746268656716418, - "grad_norm": 1.3277194582041452, - "learning_rate": 6.380340990536883e-07, - "loss": 0.9475, - "step": 8275 - }, - { - "epoch": 0.7463588402398882, - "grad_norm": 1.6124534422784507, - "learning_rate": 6.37606340148247e-07, - "loss": 0.9424, - "step": 8276 - }, - { - "epoch": 0.7464490237633584, - "grad_norm": 1.65583148202983, - "learning_rate": 6.371786974897433e-07, - "loss": 0.8439, - "step": 8277 - }, - { - "epoch": 0.7465392072868287, - "grad_norm": 1.1787100778495985, - "learning_rate": 6.367511711146691e-07, - "loss": 0.9653, - "step": 8278 - }, - { - "epoch": 0.746629390810299, - "grad_norm": 1.3893700841627101, - "learning_rate": 6.363237610595014e-07, - "loss": 0.8834, - "step": 8279 - }, - { - "epoch": 0.7467195743337692, - "grad_norm": 1.7510733718618985, - "learning_rate": 6.358964673607094e-07, - "loss": 0.919, - "step": 8280 - }, - { - "epoch": 0.7468097578572395, - "grad_norm": 1.2429183633255427, - "learning_rate": 6.354692900547525e-07, - "loss": 0.9784, - "step": 8281 - }, - { - "epoch": 0.7468999413807097, - "grad_norm": 1.4503327481172141, - "learning_rate": 6.350422291780797e-07, - "loss": 0.9125, - "step": 8282 - }, - { - "epoch": 0.74699012490418, - "grad_norm": 1.7893644519512335, - "learning_rate": 6.346152847671302e-07, - "loss": 1.0369, - "step": 8283 - }, - { - "epoch": 0.7470803084276503, - "grad_norm": 1.5211345774888003, - "learning_rate": 6.34188456858334e-07, - "loss": 0.9372, - "step": 8284 - }, - { - "epoch": 0.7471704919511205, - "grad_norm": 1.6621196311697009, - "learning_rate": 6.337617454881081e-07, - "loss": 0.8958, - "step": 8285 - }, - { - "epoch": 0.7472606754745907, - "grad_norm": 1.3228938583850856, - "learning_rate": 6.333351506928651e-07, - "loss": 0.8571, - "step": 8286 - }, - { - "epoch": 0.7473508589980611, - "grad_norm": 1.6058244519993579, - "learning_rate": 6.329086725090018e-07, - "loss": 0.9122, - "step": 8287 - }, - { - "epoch": 0.7474410425215313, - "grad_norm": 1.424183993352255, - "learning_rate": 6.324823109729087e-07, - "loss": 0.9056, - "step": 8288 - }, - { - "epoch": 0.7475312260450016, - "grad_norm": 1.4469876041632743, - "learning_rate": 6.320560661209653e-07, - "loss": 0.8745, - "step": 8289 - }, - { - "epoch": 0.7476214095684719, - "grad_norm": 1.360075583741801, - "learning_rate": 6.316299379895411e-07, - "loss": 0.8285, - "step": 8290 - }, - { - "epoch": 0.7477115930919421, - "grad_norm": 1.5125756016271774, - "learning_rate": 6.312039266149965e-07, - "loss": 0.9718, - "step": 8291 - }, - { - "epoch": 0.7478017766154124, - "grad_norm": 1.2952805794715962, - "learning_rate": 6.307780320336789e-07, - "loss": 0.9714, - "step": 8292 - }, - { - "epoch": 0.7478919601388826, - "grad_norm": 1.5483941219347357, - "learning_rate": 6.303522542819306e-07, - "loss": 0.9955, - "step": 8293 - }, - { - "epoch": 0.7479821436623529, - "grad_norm": 1.9250989095865396, - "learning_rate": 6.299265933960796e-07, - "loss": 0.9141, - "step": 8294 - }, - { - "epoch": 0.7480723271858232, - "grad_norm": 1.7876884814573342, - "learning_rate": 6.295010494124462e-07, - "loss": 0.8917, - "step": 8295 - }, - { - "epoch": 0.7481625107092934, - "grad_norm": 1.3162091840626768, - "learning_rate": 6.290756223673399e-07, - "loss": 0.9097, - "step": 8296 - }, - { - "epoch": 0.7482526942327636, - "grad_norm": 1.1692770580417755, - "learning_rate": 6.28650312297061e-07, - "loss": 0.9676, - "step": 8297 - }, - { - "epoch": 0.748342877756234, - "grad_norm": 1.579768165770081, - "learning_rate": 6.282251192378987e-07, - "loss": 0.8536, - "step": 8298 - }, - { - "epoch": 0.7484330612797042, - "grad_norm": 1.1932262090376864, - "learning_rate": 6.278000432261334e-07, - "loss": 0.8637, - "step": 8299 - }, - { - "epoch": 0.7485232448031744, - "grad_norm": 2.382535424304366, - "learning_rate": 6.273750842980345e-07, - "loss": 0.9632, - "step": 8300 - }, - { - "epoch": 0.7486134283266447, - "grad_norm": 1.4528211050256696, - "learning_rate": 6.269502424898625e-07, - "loss": 0.9728, - "step": 8301 - }, - { - "epoch": 0.748703611850115, - "grad_norm": 1.5553460188172703, - "learning_rate": 6.265255178378663e-07, - "loss": 0.9843, - "step": 8302 - }, - { - "epoch": 0.7487937953735853, - "grad_norm": 1.7190934271597083, - "learning_rate": 6.261009103782861e-07, - "loss": 0.8375, - "step": 8303 - }, - { - "epoch": 0.7488839788970555, - "grad_norm": 1.4649394499989021, - "learning_rate": 6.256764201473519e-07, - "loss": 0.995, - "step": 8304 - }, - { - "epoch": 0.7489741624205257, - "grad_norm": 0.7374253736336797, - "learning_rate": 6.252520471812835e-07, - "loss": 0.7922, - "step": 8305 - }, - { - "epoch": 0.7490643459439961, - "grad_norm": 1.711660711296045, - "learning_rate": 6.248277915162912e-07, - "loss": 0.9513, - "step": 8306 - }, - { - "epoch": 0.7491545294674663, - "grad_norm": 1.3264932818826798, - "learning_rate": 6.244036531885731e-07, - "loss": 0.793, - "step": 8307 - }, - { - "epoch": 0.7492447129909365, - "grad_norm": 1.5243178733766323, - "learning_rate": 6.239796322343216e-07, - "loss": 0.8725, - "step": 8308 - }, - { - "epoch": 0.7493348965144068, - "grad_norm": 1.3602273395503837, - "learning_rate": 6.235557286897137e-07, - "loss": 0.8785, - "step": 8309 - }, - { - "epoch": 0.7494250800378771, - "grad_norm": 1.4405658205003928, - "learning_rate": 6.231319425909223e-07, - "loss": 0.9382, - "step": 8310 - }, - { - "epoch": 0.7495152635613473, - "grad_norm": 1.4300752541274362, - "learning_rate": 6.227082739741045e-07, - "loss": 0.9748, - "step": 8311 - }, - { - "epoch": 0.7496054470848176, - "grad_norm": 1.7144350160001762, - "learning_rate": 6.222847228754113e-07, - "loss": 0.9581, - "step": 8312 - }, - { - "epoch": 0.7496956306082878, - "grad_norm": 1.520617805084504, - "learning_rate": 6.218612893309823e-07, - "loss": 1.039, - "step": 8313 - }, - { - "epoch": 0.7497858141317582, - "grad_norm": 1.412323614939809, - "learning_rate": 6.214379733769468e-07, - "loss": 0.8838, - "step": 8314 - }, - { - "epoch": 0.7498759976552284, - "grad_norm": 1.8111725912144945, - "learning_rate": 6.21014775049425e-07, - "loss": 0.9756, - "step": 8315 - }, - { - "epoch": 0.7499661811786986, - "grad_norm": 0.6061965260076075, - "learning_rate": 6.205916943845267e-07, - "loss": 0.776, - "step": 8316 - }, - { - "epoch": 0.750056364702169, - "grad_norm": 1.6596797483676553, - "learning_rate": 6.201687314183504e-07, - "loss": 0.9856, - "step": 8317 - }, - { - "epoch": 0.7501465482256392, - "grad_norm": 0.6973709539790782, - "learning_rate": 6.197458861869862e-07, - "loss": 0.8419, - "step": 8318 - }, - { - "epoch": 0.7502367317491094, - "grad_norm": 1.3664872474284726, - "learning_rate": 6.193231587265138e-07, - "loss": 0.9787, - "step": 8319 - }, - { - "epoch": 0.7503269152725797, - "grad_norm": 1.56160405221085, - "learning_rate": 6.189005490730024e-07, - "loss": 0.9229, - "step": 8320 - }, - { - "epoch": 0.75041709879605, - "grad_norm": 1.4538290307694395, - "learning_rate": 6.184780572625115e-07, - "loss": 0.96, - "step": 8321 - }, - { - "epoch": 0.7505072823195202, - "grad_norm": 1.8415386710703985, - "learning_rate": 6.180556833310902e-07, - "loss": 0.8424, - "step": 8322 - }, - { - "epoch": 0.7505974658429905, - "grad_norm": 1.396840680957424, - "learning_rate": 6.176334273147788e-07, - "loss": 0.9758, - "step": 8323 - }, - { - "epoch": 0.7506876493664607, - "grad_norm": 1.7089357554467948, - "learning_rate": 6.172112892496042e-07, - "loss": 0.8484, - "step": 8324 - }, - { - "epoch": 0.750777832889931, - "grad_norm": 1.401612752738618, - "learning_rate": 6.167892691715883e-07, - "loss": 0.9797, - "step": 8325 - }, - { - "epoch": 0.7508680164134013, - "grad_norm": 1.450508797924944, - "learning_rate": 6.163673671167378e-07, - "loss": 0.9245, - "step": 8326 - }, - { - "epoch": 0.7509581999368715, - "grad_norm": 1.4793365900285473, - "learning_rate": 6.15945583121054e-07, - "loss": 0.9846, - "step": 8327 - }, - { - "epoch": 0.7510483834603418, - "grad_norm": 1.945723402143199, - "learning_rate": 6.15523917220524e-07, - "loss": 0.8883, - "step": 8328 - }, - { - "epoch": 0.7511385669838121, - "grad_norm": 1.4122570588483632, - "learning_rate": 6.151023694511273e-07, - "loss": 1.0025, - "step": 8329 - }, - { - "epoch": 0.7512287505072823, - "grad_norm": 1.4337348556716467, - "learning_rate": 6.146809398488328e-07, - "loss": 0.8774, - "step": 8330 - }, - { - "epoch": 0.7513189340307526, - "grad_norm": 1.4128633757545646, - "learning_rate": 6.142596284495989e-07, - "loss": 0.9799, - "step": 8331 - }, - { - "epoch": 0.7514091175542228, - "grad_norm": 1.4337735180310314, - "learning_rate": 6.138384352893751e-07, - "loss": 0.8979, - "step": 8332 - }, - { - "epoch": 0.7514993010776931, - "grad_norm": 1.8483216456578913, - "learning_rate": 6.134173604040987e-07, - "loss": 0.9242, - "step": 8333 - }, - { - "epoch": 0.7515894846011634, - "grad_norm": 1.4984859136530941, - "learning_rate": 6.129964038296984e-07, - "loss": 0.9458, - "step": 8334 - }, - { - "epoch": 0.7516796681246336, - "grad_norm": 1.5374851567241932, - "learning_rate": 6.12575565602093e-07, - "loss": 1.0113, - "step": 8335 - }, - { - "epoch": 0.7517698516481038, - "grad_norm": 1.1941401897475796, - "learning_rate": 6.121548457571905e-07, - "loss": 1.0071, - "step": 8336 - }, - { - "epoch": 0.7518600351715742, - "grad_norm": 1.4770057456977264, - "learning_rate": 6.11734244330889e-07, - "loss": 0.8841, - "step": 8337 - }, - { - "epoch": 0.7519502186950444, - "grad_norm": 1.497109807657435, - "learning_rate": 6.113137613590773e-07, - "loss": 0.951, - "step": 8338 - }, - { - "epoch": 0.7520404022185146, - "grad_norm": 1.5584170592692246, - "learning_rate": 6.108933968776313e-07, - "loss": 0.9933, - "step": 8339 - }, - { - "epoch": 0.752130585741985, - "grad_norm": 1.320372224619378, - "learning_rate": 6.104731509224212e-07, - "loss": 0.9356, - "step": 8340 - }, - { - "epoch": 0.7522207692654552, - "grad_norm": 1.2475010211000106, - "learning_rate": 6.100530235293027e-07, - "loss": 0.8717, - "step": 8341 - }, - { - "epoch": 0.7523109527889255, - "grad_norm": 1.6090051911283956, - "learning_rate": 6.096330147341253e-07, - "loss": 0.9615, - "step": 8342 - }, - { - "epoch": 0.7524011363123957, - "grad_norm": 1.7364593902505345, - "learning_rate": 6.09213124572725e-07, - "loss": 0.9978, - "step": 8343 - }, - { - "epoch": 0.752491319835866, - "grad_norm": 1.313189280124522, - "learning_rate": 6.087933530809297e-07, - "loss": 1.0109, - "step": 8344 - }, - { - "epoch": 0.7525815033593363, - "grad_norm": 1.4227814352731707, - "learning_rate": 6.083737002945566e-07, - "loss": 0.929, - "step": 8345 - }, - { - "epoch": 0.7526716868828065, - "grad_norm": 1.3964545826968502, - "learning_rate": 6.079541662494126e-07, - "loss": 0.8902, - "step": 8346 - }, - { - "epoch": 0.7527618704062767, - "grad_norm": 1.4315707617727196, - "learning_rate": 6.075347509812954e-07, - "loss": 0.9514, - "step": 8347 - }, - { - "epoch": 0.7528520539297471, - "grad_norm": 1.3463491099082483, - "learning_rate": 6.0711545452599e-07, - "loss": 0.9054, - "step": 8348 - }, - { - "epoch": 0.7529422374532173, - "grad_norm": 1.508063102808368, - "learning_rate": 6.066962769192756e-07, - "loss": 0.8686, - "step": 8349 - }, - { - "epoch": 0.7530324209766875, - "grad_norm": 1.9436412817672766, - "learning_rate": 6.062772181969167e-07, - "loss": 0.8775, - "step": 8350 - }, - { - "epoch": 0.7531226045001578, - "grad_norm": 1.3675241546310803, - "learning_rate": 6.058582783946706e-07, - "loss": 1.0034, - "step": 8351 - }, - { - "epoch": 0.7532127880236281, - "grad_norm": 1.6497827618093632, - "learning_rate": 6.054394575482833e-07, - "loss": 0.924, - "step": 8352 - }, - { - "epoch": 0.7533029715470984, - "grad_norm": 1.430208538561994, - "learning_rate": 6.05020755693491e-07, - "loss": 0.855, - "step": 8353 - }, - { - "epoch": 0.7533931550705686, - "grad_norm": 1.5883037785078349, - "learning_rate": 6.046021728660198e-07, - "loss": 0.8551, - "step": 8354 - }, - { - "epoch": 0.7534833385940388, - "grad_norm": 1.452624327021726, - "learning_rate": 6.041837091015858e-07, - "loss": 0.981, - "step": 8355 - }, - { - "epoch": 0.7535735221175092, - "grad_norm": 1.3298920488646064, - "learning_rate": 6.037653644358931e-07, - "loss": 0.9893, - "step": 8356 - }, - { - "epoch": 0.7536637056409794, - "grad_norm": 1.4443874388637432, - "learning_rate": 6.033471389046393e-07, - "loss": 0.9524, - "step": 8357 - }, - { - "epoch": 0.7537538891644496, - "grad_norm": 1.4461552817568544, - "learning_rate": 6.029290325435084e-07, - "loss": 0.8889, - "step": 8358 - }, - { - "epoch": 0.7538440726879199, - "grad_norm": 1.591804963073777, - "learning_rate": 6.025110453881756e-07, - "loss": 1.0006, - "step": 8359 - }, - { - "epoch": 0.7539342562113902, - "grad_norm": 1.8523585543770944, - "learning_rate": 6.020931774743061e-07, - "loss": 0.879, - "step": 8360 - }, - { - "epoch": 0.7540244397348604, - "grad_norm": 1.4494215008578588, - "learning_rate": 6.016754288375546e-07, - "loss": 0.8631, - "step": 8361 - }, - { - "epoch": 0.7541146232583307, - "grad_norm": 1.4063750317408847, - "learning_rate": 6.012577995135665e-07, - "loss": 0.9346, - "step": 8362 - }, - { - "epoch": 0.754204806781801, - "grad_norm": 1.5109991682341917, - "learning_rate": 6.008402895379743e-07, - "loss": 0.889, - "step": 8363 - }, - { - "epoch": 0.7542949903052713, - "grad_norm": 1.6679011382575204, - "learning_rate": 6.004228989464047e-07, - "loss": 1.0283, - "step": 8364 - }, - { - "epoch": 0.7543851738287415, - "grad_norm": 2.338610188613812, - "learning_rate": 6.000056277744692e-07, - "loss": 0.9597, - "step": 8365 - }, - { - "epoch": 0.7544753573522117, - "grad_norm": 1.2819943475613533, - "learning_rate": 5.995884760577745e-07, - "loss": 0.91, - "step": 8366 - }, - { - "epoch": 0.7545655408756821, - "grad_norm": 0.6880641053477564, - "learning_rate": 5.99171443831912e-07, - "loss": 0.7954, - "step": 8367 - }, - { - "epoch": 0.7546557243991523, - "grad_norm": 1.2220524768768504, - "learning_rate": 5.98754531132466e-07, - "loss": 0.9314, - "step": 8368 - }, - { - "epoch": 0.7547459079226225, - "grad_norm": 1.6900124390274123, - "learning_rate": 5.983377379950099e-07, - "loss": 0.9477, - "step": 8369 - }, - { - "epoch": 0.7548360914460928, - "grad_norm": 1.6487394661960932, - "learning_rate": 5.979210644551067e-07, - "loss": 0.8826, - "step": 8370 - }, - { - "epoch": 0.7549262749695631, - "grad_norm": 1.8750202813641066, - "learning_rate": 5.975045105483091e-07, - "loss": 0.9357, - "step": 8371 - }, - { - "epoch": 0.7550164584930333, - "grad_norm": 0.6242966747701317, - "learning_rate": 5.970880763101607e-07, - "loss": 0.8078, - "step": 8372 - }, - { - "epoch": 0.7551066420165036, - "grad_norm": 1.4048335677295423, - "learning_rate": 5.966717617761925e-07, - "loss": 0.9575, - "step": 8373 - }, - { - "epoch": 0.7551968255399738, - "grad_norm": 1.4142803214054307, - "learning_rate": 5.962555669819276e-07, - "loss": 0.92, - "step": 8374 - }, - { - "epoch": 0.7552870090634441, - "grad_norm": 1.3537579824506403, - "learning_rate": 5.958394919628777e-07, - "loss": 0.955, - "step": 8375 - }, - { - "epoch": 0.7553771925869144, - "grad_norm": 1.2388264030660607, - "learning_rate": 5.954235367545451e-07, - "loss": 0.8786, - "step": 8376 - }, - { - "epoch": 0.7554673761103846, - "grad_norm": 1.2893871014277234, - "learning_rate": 5.950077013924213e-07, - "loss": 0.9283, - "step": 8377 - }, - { - "epoch": 0.7555575596338548, - "grad_norm": 1.6342965276781505, - "learning_rate": 5.945919859119865e-07, - "loss": 1.0212, - "step": 8378 - }, - { - "epoch": 0.7556477431573252, - "grad_norm": 1.7292116994240962, - "learning_rate": 5.94176390348714e-07, - "loss": 0.8387, - "step": 8379 - }, - { - "epoch": 0.7557379266807954, - "grad_norm": 1.1634873370967966, - "learning_rate": 5.937609147380622e-07, - "loss": 1.0435, - "step": 8380 - }, - { - "epoch": 0.7558281102042657, - "grad_norm": 1.4321502153311543, - "learning_rate": 5.933455591154844e-07, - "loss": 0.8847, - "step": 8381 - }, - { - "epoch": 0.7559182937277359, - "grad_norm": 2.175947912329059, - "learning_rate": 5.929303235164191e-07, - "loss": 0.9091, - "step": 8382 - }, - { - "epoch": 0.7560084772512062, - "grad_norm": 1.5513820301216588, - "learning_rate": 5.92515207976297e-07, - "loss": 0.9127, - "step": 8383 - }, - { - "epoch": 0.7560986607746765, - "grad_norm": 1.4180341553118894, - "learning_rate": 5.921002125305383e-07, - "loss": 0.9251, - "step": 8384 - }, - { - "epoch": 0.7561888442981467, - "grad_norm": 1.5660079487712135, - "learning_rate": 5.916853372145525e-07, - "loss": 0.8701, - "step": 8385 - }, - { - "epoch": 0.756279027821617, - "grad_norm": 1.5360943982849833, - "learning_rate": 5.912705820637389e-07, - "loss": 0.8738, - "step": 8386 - }, - { - "epoch": 0.7563692113450873, - "grad_norm": 1.4634580848511969, - "learning_rate": 5.908559471134871e-07, - "loss": 1.005, - "step": 8387 - }, - { - "epoch": 0.7564593948685575, - "grad_norm": 1.5877916743942306, - "learning_rate": 5.904414323991764e-07, - "loss": 0.8773, - "step": 8388 - }, - { - "epoch": 0.7565495783920277, - "grad_norm": 1.6144214507954775, - "learning_rate": 5.900270379561743e-07, - "loss": 0.9474, - "step": 8389 - }, - { - "epoch": 0.7566397619154981, - "grad_norm": 1.4546310659703159, - "learning_rate": 5.896127638198399e-07, - "loss": 0.9541, - "step": 8390 - }, - { - "epoch": 0.7567299454389683, - "grad_norm": 1.7220905092934244, - "learning_rate": 5.89198610025521e-07, - "loss": 0.9518, - "step": 8391 - }, - { - "epoch": 0.7568201289624386, - "grad_norm": 1.492928525661277, - "learning_rate": 5.887845766085559e-07, - "loss": 1.0123, - "step": 8392 - }, - { - "epoch": 0.7569103124859088, - "grad_norm": 1.56905781738944, - "learning_rate": 5.883706636042722e-07, - "loss": 0.8936, - "step": 8393 - }, - { - "epoch": 0.7570004960093791, - "grad_norm": 1.8766796853656815, - "learning_rate": 5.879568710479879e-07, - "loss": 0.8848, - "step": 8394 - }, - { - "epoch": 0.7570906795328494, - "grad_norm": 1.4065487438343698, - "learning_rate": 5.875431989750078e-07, - "loss": 1.0313, - "step": 8395 - }, - { - "epoch": 0.7571808630563196, - "grad_norm": 1.2484327504780246, - "learning_rate": 5.871296474206313e-07, - "loss": 0.8679, - "step": 8396 - }, - { - "epoch": 0.7572710465797898, - "grad_norm": 1.4322185520815298, - "learning_rate": 5.867162164201427e-07, - "loss": 0.945, - "step": 8397 - }, - { - "epoch": 0.7573612301032602, - "grad_norm": 1.494377646003145, - "learning_rate": 5.863029060088205e-07, - "loss": 0.9692, - "step": 8398 - }, - { - "epoch": 0.7574514136267304, - "grad_norm": 1.5570043428491531, - "learning_rate": 5.858897162219289e-07, - "loss": 0.8336, - "step": 8399 - }, - { - "epoch": 0.7575415971502006, - "grad_norm": 1.7612067787601373, - "learning_rate": 5.854766470947238e-07, - "loss": 0.9839, - "step": 8400 - }, - { - "epoch": 0.7576317806736709, - "grad_norm": 1.253439890807636, - "learning_rate": 5.850636986624511e-07, - "loss": 0.925, - "step": 8401 - }, - { - "epoch": 0.7577219641971412, - "grad_norm": 1.5030810978241835, - "learning_rate": 5.846508709603453e-07, - "loss": 0.9464, - "step": 8402 - }, - { - "epoch": 0.7578121477206115, - "grad_norm": 3.2672290421722305, - "learning_rate": 5.842381640236318e-07, - "loss": 0.9184, - "step": 8403 - }, - { - "epoch": 0.7579023312440817, - "grad_norm": 1.4367977583185902, - "learning_rate": 5.838255778875242e-07, - "loss": 0.9384, - "step": 8404 - }, - { - "epoch": 0.7579925147675519, - "grad_norm": 1.5668941508501018, - "learning_rate": 5.83413112587227e-07, - "loss": 0.9632, - "step": 8405 - }, - { - "epoch": 0.7580826982910223, - "grad_norm": 1.4632334899440178, - "learning_rate": 5.830007681579338e-07, - "loss": 0.9212, - "step": 8406 - }, - { - "epoch": 0.7581728818144925, - "grad_norm": 1.563082090671224, - "learning_rate": 5.825885446348284e-07, - "loss": 0.9521, - "step": 8407 - }, - { - "epoch": 0.7582630653379627, - "grad_norm": 1.4173928344422713, - "learning_rate": 5.821764420530842e-07, - "loss": 0.8857, - "step": 8408 - }, - { - "epoch": 0.7583532488614331, - "grad_norm": 1.2623270652849745, - "learning_rate": 5.817644604478633e-07, - "loss": 0.9544, - "step": 8409 - }, - { - "epoch": 0.7584434323849033, - "grad_norm": 1.4496853223892439, - "learning_rate": 5.81352599854319e-07, - "loss": 0.8764, - "step": 8410 - }, - { - "epoch": 0.7585336159083735, - "grad_norm": 1.5379259589421035, - "learning_rate": 5.809408603075938e-07, - "loss": 0.9762, - "step": 8411 - }, - { - "epoch": 0.7586237994318438, - "grad_norm": 1.5540938274233718, - "learning_rate": 5.805292418428176e-07, - "loss": 0.926, - "step": 8412 - }, - { - "epoch": 0.7587139829553141, - "grad_norm": 2.325960112829145, - "learning_rate": 5.801177444951148e-07, - "loss": 0.9249, - "step": 8413 - }, - { - "epoch": 0.7588041664787843, - "grad_norm": 1.2560414231266848, - "learning_rate": 5.797063682995944e-07, - "loss": 0.976, - "step": 8414 - }, - { - "epoch": 0.7588943500022546, - "grad_norm": 1.5209604323417711, - "learning_rate": 5.792951132913584e-07, - "loss": 1.0089, - "step": 8415 - }, - { - "epoch": 0.7589845335257248, - "grad_norm": 1.5388181076037495, - "learning_rate": 5.788839795054968e-07, - "loss": 0.9868, - "step": 8416 - }, - { - "epoch": 0.7590747170491952, - "grad_norm": 1.159728771021177, - "learning_rate": 5.784729669770898e-07, - "loss": 0.8541, - "step": 8417 - }, - { - "epoch": 0.7591649005726654, - "grad_norm": 1.3165154567438466, - "learning_rate": 5.780620757412084e-07, - "loss": 0.94, - "step": 8418 - }, - { - "epoch": 0.7592550840961356, - "grad_norm": 1.4766437165060589, - "learning_rate": 5.776513058329098e-07, - "loss": 0.9552, - "step": 8419 - }, - { - "epoch": 0.7593452676196059, - "grad_norm": 1.2934945872561847, - "learning_rate": 5.772406572872459e-07, - "loss": 0.9617, - "step": 8420 - }, - { - "epoch": 0.7594354511430762, - "grad_norm": 1.4045360292571296, - "learning_rate": 5.768301301392535e-07, - "loss": 0.9715, - "step": 8421 - }, - { - "epoch": 0.7595256346665464, - "grad_norm": 1.3863359809538964, - "learning_rate": 5.764197244239615e-07, - "loss": 0.9516, - "step": 8422 - }, - { - "epoch": 0.7596158181900167, - "grad_norm": 1.8287569567638646, - "learning_rate": 5.760094401763884e-07, - "loss": 0.9816, - "step": 8423 - }, - { - "epoch": 0.7597060017134869, - "grad_norm": 1.6159782126865336, - "learning_rate": 5.755992774315414e-07, - "loss": 0.9172, - "step": 8424 - }, - { - "epoch": 0.7597961852369572, - "grad_norm": 1.6278326581152431, - "learning_rate": 5.751892362244183e-07, - "loss": 0.9356, - "step": 8425 - }, - { - "epoch": 0.7598863687604275, - "grad_norm": 2.056085147452663, - "learning_rate": 5.747793165900065e-07, - "loss": 0.9916, - "step": 8426 - }, - { - "epoch": 0.7599765522838977, - "grad_norm": 1.5170294949474943, - "learning_rate": 5.743695185632806e-07, - "loss": 0.9928, - "step": 8427 - }, - { - "epoch": 0.7600667358073679, - "grad_norm": 1.7087999342436189, - "learning_rate": 5.739598421792098e-07, - "loss": 0.9847, - "step": 8428 - }, - { - "epoch": 0.7601569193308383, - "grad_norm": 1.3165477371021943, - "learning_rate": 5.735502874727474e-07, - "loss": 0.9198, - "step": 8429 - }, - { - "epoch": 0.7602471028543085, - "grad_norm": 1.4479878177130363, - "learning_rate": 5.731408544788398e-07, - "loss": 1.0644, - "step": 8430 - }, - { - "epoch": 0.7603372863777788, - "grad_norm": 1.2763099803523281, - "learning_rate": 5.727315432324225e-07, - "loss": 0.8558, - "step": 8431 - }, - { - "epoch": 0.760427469901249, - "grad_norm": 1.4934055489863716, - "learning_rate": 5.723223537684196e-07, - "loss": 0.9347, - "step": 8432 - }, - { - "epoch": 0.7605176534247193, - "grad_norm": 1.5992878461482931, - "learning_rate": 5.719132861217462e-07, - "loss": 1.0133, - "step": 8433 - }, - { - "epoch": 0.7606078369481896, - "grad_norm": 1.2266762067244146, - "learning_rate": 5.715043403273044e-07, - "loss": 0.9054, - "step": 8434 - }, - { - "epoch": 0.7606980204716598, - "grad_norm": 1.8603741822156676, - "learning_rate": 5.710955164199902e-07, - "loss": 0.8997, - "step": 8435 - }, - { - "epoch": 0.7607882039951301, - "grad_norm": 1.3503622999412417, - "learning_rate": 5.706868144346841e-07, - "loss": 0.9545, - "step": 8436 - }, - { - "epoch": 0.7608783875186004, - "grad_norm": 1.3324654059007923, - "learning_rate": 5.702782344062613e-07, - "loss": 0.9316, - "step": 8437 - }, - { - "epoch": 0.7609685710420706, - "grad_norm": 1.4641791298252063, - "learning_rate": 5.698697763695826e-07, - "loss": 0.9178, - "step": 8438 - }, - { - "epoch": 0.7610587545655408, - "grad_norm": 1.3815547158212456, - "learning_rate": 5.694614403595002e-07, - "loss": 0.9539, - "step": 8439 - }, - { - "epoch": 0.7611489380890112, - "grad_norm": 1.3742470847326693, - "learning_rate": 5.690532264108554e-07, - "loss": 0.8518, - "step": 8440 - }, - { - "epoch": 0.7612391216124814, - "grad_norm": 1.456722523398884, - "learning_rate": 5.686451345584795e-07, - "loss": 0.9776, - "step": 8441 - }, - { - "epoch": 0.7613293051359517, - "grad_norm": 1.61458684449173, - "learning_rate": 5.682371648371933e-07, - "loss": 1.0324, - "step": 8442 - }, - { - "epoch": 0.7614194886594219, - "grad_norm": 1.5229854597773937, - "learning_rate": 5.678293172818074e-07, - "loss": 0.8985, - "step": 8443 - }, - { - "epoch": 0.7615096721828922, - "grad_norm": 1.6398142673410065, - "learning_rate": 5.674215919271204e-07, - "loss": 0.9565, - "step": 8444 - }, - { - "epoch": 0.7615998557063625, - "grad_norm": 1.546785448352836, - "learning_rate": 5.670139888079224e-07, - "loss": 0.8724, - "step": 8445 - }, - { - "epoch": 0.7616900392298327, - "grad_norm": 1.3097372041162036, - "learning_rate": 5.666065079589924e-07, - "loss": 0.9987, - "step": 8446 - }, - { - "epoch": 0.7617802227533029, - "grad_norm": 1.5450045133111208, - "learning_rate": 5.661991494150986e-07, - "loss": 0.8916, - "step": 8447 - }, - { - "epoch": 0.7618704062767733, - "grad_norm": 1.4663781026700564, - "learning_rate": 5.657919132109999e-07, - "loss": 0.9853, - "step": 8448 - }, - { - "epoch": 0.7619605898002435, - "grad_norm": 1.5218137254853774, - "learning_rate": 5.653847993814421e-07, - "loss": 0.9665, - "step": 8449 - }, - { - "epoch": 0.7620507733237137, - "grad_norm": 1.4357005757163235, - "learning_rate": 5.649778079611647e-07, - "loss": 0.9546, - "step": 8450 - }, - { - "epoch": 0.762140956847184, - "grad_norm": 1.3459457378077784, - "learning_rate": 5.645709389848923e-07, - "loss": 0.9186, - "step": 8451 - }, - { - "epoch": 0.7622311403706543, - "grad_norm": 1.5430335695196098, - "learning_rate": 5.641641924873435e-07, - "loss": 0.8943, - "step": 8452 - }, - { - "epoch": 0.7623213238941245, - "grad_norm": 1.4285902805446873, - "learning_rate": 5.637575685032217e-07, - "loss": 0.9357, - "step": 8453 - }, - { - "epoch": 0.7624115074175948, - "grad_norm": 2.4882040206446194, - "learning_rate": 5.633510670672246e-07, - "loss": 1.0165, - "step": 8454 - }, - { - "epoch": 0.762501690941065, - "grad_norm": 0.6678981185958552, - "learning_rate": 5.629446882140354e-07, - "loss": 0.8219, - "step": 8455 - }, - { - "epoch": 0.7625918744645354, - "grad_norm": 1.3016552895469218, - "learning_rate": 5.625384319783295e-07, - "loss": 0.9424, - "step": 8456 - }, - { - "epoch": 0.7626820579880056, - "grad_norm": 1.763590830887276, - "learning_rate": 5.621322983947705e-07, - "loss": 0.8894, - "step": 8457 - }, - { - "epoch": 0.7627722415114758, - "grad_norm": 1.5273441402503098, - "learning_rate": 5.617262874980122e-07, - "loss": 0.9803, - "step": 8458 - }, - { - "epoch": 0.7628624250349462, - "grad_norm": 1.3532246418621996, - "learning_rate": 5.613203993226981e-07, - "loss": 0.9847, - "step": 8459 - }, - { - "epoch": 0.7629526085584164, - "grad_norm": 1.8963334468417514, - "learning_rate": 5.609146339034599e-07, - "loss": 0.988, - "step": 8460 - }, - { - "epoch": 0.7630427920818866, - "grad_norm": 1.570827039238878, - "learning_rate": 5.605089912749199e-07, - "loss": 0.8822, - "step": 8461 - }, - { - "epoch": 0.7631329756053569, - "grad_norm": 1.8182982076975407, - "learning_rate": 5.601034714716901e-07, - "loss": 0.8796, - "step": 8462 - }, - { - "epoch": 0.7632231591288272, - "grad_norm": 1.2323723010749332, - "learning_rate": 5.59698074528372e-07, - "loss": 0.9126, - "step": 8463 - }, - { - "epoch": 0.7633133426522974, - "grad_norm": 1.3271306972146033, - "learning_rate": 5.592928004795555e-07, - "loss": 0.9135, - "step": 8464 - }, - { - "epoch": 0.7634035261757677, - "grad_norm": 1.3538964906793114, - "learning_rate": 5.58887649359822e-07, - "loss": 0.9423, - "step": 8465 - }, - { - "epoch": 0.7634937096992379, - "grad_norm": 1.407908796431615, - "learning_rate": 5.584826212037393e-07, - "loss": 0.8022, - "step": 8466 - }, - { - "epoch": 0.7635838932227083, - "grad_norm": 1.2163124160982193, - "learning_rate": 5.580777160458689e-07, - "loss": 0.8534, - "step": 8467 - }, - { - "epoch": 0.7636740767461785, - "grad_norm": 1.2914661077437792, - "learning_rate": 5.576729339207574e-07, - "loss": 0.9629, - "step": 8468 - }, - { - "epoch": 0.7637642602696487, - "grad_norm": 1.1589635504486824, - "learning_rate": 5.572682748629449e-07, - "loss": 0.9924, - "step": 8469 - }, - { - "epoch": 0.763854443793119, - "grad_norm": 1.280364172285917, - "learning_rate": 5.568637389069582e-07, - "loss": 0.9235, - "step": 8470 - }, - { - "epoch": 0.7639446273165893, - "grad_norm": 1.3520046679512114, - "learning_rate": 5.564593260873145e-07, - "loss": 0.955, - "step": 8471 - }, - { - "epoch": 0.7640348108400595, - "grad_norm": 1.2668127914685086, - "learning_rate": 5.560550364385206e-07, - "loss": 0.9778, - "step": 8472 - }, - { - "epoch": 0.7641249943635298, - "grad_norm": 1.433574707570013, - "learning_rate": 5.556508699950728e-07, - "loss": 1.0142, - "step": 8473 - }, - { - "epoch": 0.764215177887, - "grad_norm": 1.7677463177489, - "learning_rate": 5.552468267914577e-07, - "loss": 0.8945, - "step": 8474 - }, - { - "epoch": 0.7643053614104703, - "grad_norm": 1.6103776937739216, - "learning_rate": 5.548429068621481e-07, - "loss": 0.8533, - "step": 8475 - }, - { - "epoch": 0.7643955449339406, - "grad_norm": 0.6654388973493354, - "learning_rate": 5.544391102416115e-07, - "loss": 0.807, - "step": 8476 - }, - { - "epoch": 0.7644857284574108, - "grad_norm": 1.6987915905260398, - "learning_rate": 5.540354369643003e-07, - "loss": 0.9976, - "step": 8477 - }, - { - "epoch": 0.764575911980881, - "grad_norm": 1.9243486194097077, - "learning_rate": 5.536318870646586e-07, - "loss": 0.801, - "step": 8478 - }, - { - "epoch": 0.7646660955043514, - "grad_norm": 1.744585858253539, - "learning_rate": 5.532284605771194e-07, - "loss": 0.842, - "step": 8479 - }, - { - "epoch": 0.7647562790278216, - "grad_norm": 1.2992087817426146, - "learning_rate": 5.528251575361052e-07, - "loss": 0.9056, - "step": 8480 - }, - { - "epoch": 0.7648464625512919, - "grad_norm": 1.274783634077985, - "learning_rate": 5.524219779760284e-07, - "loss": 0.9296, - "step": 8481 - }, - { - "epoch": 0.7649366460747622, - "grad_norm": 1.5222005664293605, - "learning_rate": 5.520189219312907e-07, - "loss": 0.9642, - "step": 8482 - }, - { - "epoch": 0.7650268295982324, - "grad_norm": 2.015112641738129, - "learning_rate": 5.516159894362817e-07, - "loss": 0.8203, - "step": 8483 - }, - { - "epoch": 0.7651170131217027, - "grad_norm": 1.3615077596661147, - "learning_rate": 5.512131805253839e-07, - "loss": 0.8782, - "step": 8484 - }, - { - "epoch": 0.7652071966451729, - "grad_norm": 1.3021520316756823, - "learning_rate": 5.508104952329653e-07, - "loss": 0.9513, - "step": 8485 - }, - { - "epoch": 0.7652973801686432, - "grad_norm": 1.8075194494696818, - "learning_rate": 5.504079335933862e-07, - "loss": 0.9636, - "step": 8486 - }, - { - "epoch": 0.7653875636921135, - "grad_norm": 1.4868142414547048, - "learning_rate": 5.500054956409952e-07, - "loss": 0.969, - "step": 8487 - }, - { - "epoch": 0.7654777472155837, - "grad_norm": 1.452003456374329, - "learning_rate": 5.496031814101303e-07, - "loss": 0.8962, - "step": 8488 - }, - { - "epoch": 0.7655679307390539, - "grad_norm": 1.3956839021244578, - "learning_rate": 5.492009909351203e-07, - "loss": 0.9482, - "step": 8489 - }, - { - "epoch": 0.7656581142625243, - "grad_norm": 1.583130631103621, - "learning_rate": 5.4879892425028e-07, - "loss": 0.8621, - "step": 8490 - }, - { - "epoch": 0.7657482977859945, - "grad_norm": 1.3323911477121533, - "learning_rate": 5.483969813899184e-07, - "loss": 1.0709, - "step": 8491 - }, - { - "epoch": 0.7658384813094647, - "grad_norm": 1.2609973179503557, - "learning_rate": 5.479951623883299e-07, - "loss": 1.0129, - "step": 8492 - }, - { - "epoch": 0.765928664832935, - "grad_norm": 1.5756651336751772, - "learning_rate": 5.475934672798004e-07, - "loss": 0.8688, - "step": 8493 - }, - { - "epoch": 0.7660188483564053, - "grad_norm": 1.5421747796520577, - "learning_rate": 5.471918960986047e-07, - "loss": 0.9049, - "step": 8494 - }, - { - "epoch": 0.7661090318798756, - "grad_norm": 1.5784069602547963, - "learning_rate": 5.467904488790071e-07, - "loss": 1.0322, - "step": 8495 - }, - { - "epoch": 0.7661992154033458, - "grad_norm": 1.3829879649418542, - "learning_rate": 5.463891256552615e-07, - "loss": 0.944, - "step": 8496 - }, - { - "epoch": 0.766289398926816, - "grad_norm": 1.667762261145616, - "learning_rate": 5.459879264616107e-07, - "loss": 0.8482, - "step": 8497 - }, - { - "epoch": 0.7663795824502864, - "grad_norm": 1.6539296144502837, - "learning_rate": 5.455868513322874e-07, - "loss": 0.8134, - "step": 8498 - }, - { - "epoch": 0.7664697659737566, - "grad_norm": 1.4704834468756867, - "learning_rate": 5.451859003015143e-07, - "loss": 0.831, - "step": 8499 - }, - { - "epoch": 0.7665599494972268, - "grad_norm": 1.6501068051767438, - "learning_rate": 5.447850734035009e-07, - "loss": 0.9767, - "step": 8500 - }, - { - "epoch": 0.7666501330206971, - "grad_norm": 1.4719341744030658, - "learning_rate": 5.443843706724494e-07, - "loss": 0.9288, - "step": 8501 - }, - { - "epoch": 0.7667403165441674, - "grad_norm": 1.513893239231839, - "learning_rate": 5.439837921425494e-07, - "loss": 0.867, - "step": 8502 - }, - { - "epoch": 0.7668305000676376, - "grad_norm": 1.2314888252962464, - "learning_rate": 5.435833378479807e-07, - "loss": 0.925, - "step": 8503 - }, - { - "epoch": 0.7669206835911079, - "grad_norm": 1.4316720165438424, - "learning_rate": 5.431830078229128e-07, - "loss": 0.7995, - "step": 8504 - }, - { - "epoch": 0.7670108671145782, - "grad_norm": 1.304892198414385, - "learning_rate": 5.427828021015022e-07, - "loss": 0.9536, - "step": 8505 - }, - { - "epoch": 0.7671010506380485, - "grad_norm": 1.3530874304368243, - "learning_rate": 5.42382720717899e-07, - "loss": 0.9676, - "step": 8506 - }, - { - "epoch": 0.7671912341615187, - "grad_norm": 1.322782024029777, - "learning_rate": 5.419827637062384e-07, - "loss": 0.9647, - "step": 8507 - }, - { - "epoch": 0.7672814176849889, - "grad_norm": 1.6009235130829396, - "learning_rate": 5.415829311006487e-07, - "loss": 1.0847, - "step": 8508 - }, - { - "epoch": 0.7673716012084593, - "grad_norm": 1.4804081967332787, - "learning_rate": 5.411832229352447e-07, - "loss": 1.0121, - "step": 8509 - }, - { - "epoch": 0.7674617847319295, - "grad_norm": 1.4368769912739114, - "learning_rate": 5.407836392441319e-07, - "loss": 1.0259, - "step": 8510 - }, - { - "epoch": 0.7675519682553997, - "grad_norm": 1.321881417333298, - "learning_rate": 5.403841800614049e-07, - "loss": 0.9551, - "step": 8511 - }, - { - "epoch": 0.76764215177887, - "grad_norm": 1.1910419344850303, - "learning_rate": 5.39984845421148e-07, - "loss": 0.8902, - "step": 8512 - }, - { - "epoch": 0.7677323353023403, - "grad_norm": 1.548908015111331, - "learning_rate": 5.395856353574344e-07, - "loss": 0.917, - "step": 8513 - }, - { - "epoch": 0.7678225188258105, - "grad_norm": 0.642558459876711, - "learning_rate": 5.391865499043275e-07, - "loss": 0.8066, - "step": 8514 - }, - { - "epoch": 0.7679127023492808, - "grad_norm": 1.2657506432980872, - "learning_rate": 5.387875890958788e-07, - "loss": 0.939, - "step": 8515 - }, - { - "epoch": 0.768002885872751, - "grad_norm": 1.2389032865313967, - "learning_rate": 5.383887529661298e-07, - "loss": 0.9633, - "step": 8516 - }, - { - "epoch": 0.7680930693962214, - "grad_norm": 1.596802201076703, - "learning_rate": 5.379900415491116e-07, - "loss": 0.8926, - "step": 8517 - }, - { - "epoch": 0.7681832529196916, - "grad_norm": 1.3862903201789658, - "learning_rate": 5.375914548788447e-07, - "loss": 0.8888, - "step": 8518 - }, - { - "epoch": 0.7682734364431618, - "grad_norm": 1.4705536501688412, - "learning_rate": 5.371929929893384e-07, - "loss": 0.82, - "step": 8519 - }, - { - "epoch": 0.768363619966632, - "grad_norm": 1.4379746026779392, - "learning_rate": 5.367946559145917e-07, - "loss": 0.9025, - "step": 8520 - }, - { - "epoch": 0.7684538034901024, - "grad_norm": 1.2866088171704386, - "learning_rate": 5.363964436885935e-07, - "loss": 0.9632, - "step": 8521 - }, - { - "epoch": 0.7685439870135726, - "grad_norm": 1.213655485564955, - "learning_rate": 5.359983563453199e-07, - "loss": 1.0187, - "step": 8522 - }, - { - "epoch": 0.7686341705370429, - "grad_norm": 1.720621096228658, - "learning_rate": 5.356003939187402e-07, - "loss": 1.0543, - "step": 8523 - }, - { - "epoch": 0.7687243540605131, - "grad_norm": 1.6256361229797238, - "learning_rate": 5.352025564428082e-07, - "loss": 0.9144, - "step": 8524 - }, - { - "epoch": 0.7688145375839834, - "grad_norm": 1.4856878799014974, - "learning_rate": 5.348048439514723e-07, - "loss": 0.9815, - "step": 8525 - }, - { - "epoch": 0.7689047211074537, - "grad_norm": 1.569359409462965, - "learning_rate": 5.344072564786653e-07, - "loss": 0.8278, - "step": 8526 - }, - { - "epoch": 0.7689949046309239, - "grad_norm": 1.3213712070415342, - "learning_rate": 5.340097940583123e-07, - "loss": 0.948, - "step": 8527 - }, - { - "epoch": 0.7690850881543942, - "grad_norm": 1.6750692353244665, - "learning_rate": 5.336124567243275e-07, - "loss": 0.8981, - "step": 8528 - }, - { - "epoch": 0.7691752716778645, - "grad_norm": 2.5387592780179875, - "learning_rate": 5.33215244510613e-07, - "loss": 0.9498, - "step": 8529 - }, - { - "epoch": 0.7692654552013347, - "grad_norm": 1.567887516701982, - "learning_rate": 5.328181574510624e-07, - "loss": 0.9396, - "step": 8530 - }, - { - "epoch": 0.769355638724805, - "grad_norm": 1.4234164758798291, - "learning_rate": 5.324211955795559e-07, - "loss": 0.9899, - "step": 8531 - }, - { - "epoch": 0.7694458222482753, - "grad_norm": 1.578948835112911, - "learning_rate": 5.320243589299651e-07, - "loss": 0.8669, - "step": 8532 - }, - { - "epoch": 0.7695360057717455, - "grad_norm": 1.4862751092867126, - "learning_rate": 5.316276475361505e-07, - "loss": 0.8998, - "step": 8533 - }, - { - "epoch": 0.7696261892952158, - "grad_norm": 2.0244152402161313, - "learning_rate": 5.312310614319613e-07, - "loss": 0.9732, - "step": 8534 - }, - { - "epoch": 0.769716372818686, - "grad_norm": 1.6727737435380532, - "learning_rate": 5.308346006512367e-07, - "loss": 0.9075, - "step": 8535 - }, - { - "epoch": 0.7698065563421563, - "grad_norm": 1.4567672858560425, - "learning_rate": 5.30438265227805e-07, - "loss": 0.8635, - "step": 8536 - }, - { - "epoch": 0.7698967398656266, - "grad_norm": 0.6661479963834678, - "learning_rate": 5.300420551954837e-07, - "loss": 0.7953, - "step": 8537 - }, - { - "epoch": 0.7699869233890968, - "grad_norm": 1.2682510729183063, - "learning_rate": 5.296459705880798e-07, - "loss": 0.8648, - "step": 8538 - }, - { - "epoch": 0.770077106912567, - "grad_norm": 1.29220618496247, - "learning_rate": 5.292500114393881e-07, - "loss": 0.9897, - "step": 8539 - }, - { - "epoch": 0.7701672904360374, - "grad_norm": 2.1062942047461037, - "learning_rate": 5.288541777831963e-07, - "loss": 0.9587, - "step": 8540 - }, - { - "epoch": 0.7702574739595076, - "grad_norm": 1.2821443274247057, - "learning_rate": 5.284584696532772e-07, - "loss": 0.9902, - "step": 8541 - }, - { - "epoch": 0.7703476574829778, - "grad_norm": 1.3221446310156413, - "learning_rate": 5.280628870833954e-07, - "loss": 0.9412, - "step": 8542 - }, - { - "epoch": 0.7704378410064481, - "grad_norm": 1.7747783549368135, - "learning_rate": 5.276674301073045e-07, - "loss": 0.9712, - "step": 8543 - }, - { - "epoch": 0.7705280245299184, - "grad_norm": 1.2256889391167003, - "learning_rate": 5.272720987587467e-07, - "loss": 0.9462, - "step": 8544 - }, - { - "epoch": 0.7706182080533887, - "grad_norm": 1.7370450199422445, - "learning_rate": 5.268768930714545e-07, - "loss": 0.9009, - "step": 8545 - }, - { - "epoch": 0.7707083915768589, - "grad_norm": 1.4980135320859496, - "learning_rate": 5.264818130791473e-07, - "loss": 0.9714, - "step": 8546 - }, - { - "epoch": 0.7707985751003291, - "grad_norm": 1.7441543629821845, - "learning_rate": 5.260868588155378e-07, - "loss": 0.9412, - "step": 8547 - }, - { - "epoch": 0.7708887586237995, - "grad_norm": 1.2293757352453456, - "learning_rate": 5.256920303143242e-07, - "loss": 0.9642, - "step": 8548 - }, - { - "epoch": 0.7709789421472697, - "grad_norm": 1.5430728152950568, - "learning_rate": 5.252973276091956e-07, - "loss": 1.0452, - "step": 8549 - }, - { - "epoch": 0.7710691256707399, - "grad_norm": 1.3472776461645897, - "learning_rate": 5.249027507338307e-07, - "loss": 1.0028, - "step": 8550 - }, - { - "epoch": 0.7711593091942102, - "grad_norm": 1.2385237302943852, - "learning_rate": 5.245082997218966e-07, - "loss": 0.9103, - "step": 8551 - }, - { - "epoch": 0.7712494927176805, - "grad_norm": 1.734358916336953, - "learning_rate": 5.241139746070499e-07, - "loss": 0.9643, - "step": 8552 - }, - { - "epoch": 0.7713396762411507, - "grad_norm": 1.4118356610914187, - "learning_rate": 5.237197754229376e-07, - "loss": 0.8217, - "step": 8553 - }, - { - "epoch": 0.771429859764621, - "grad_norm": 1.342594981039171, - "learning_rate": 5.233257022031931e-07, - "loss": 0.8457, - "step": 8554 - }, - { - "epoch": 0.7715200432880913, - "grad_norm": 1.3218497181451963, - "learning_rate": 5.229317549814432e-07, - "loss": 0.9436, - "step": 8555 - }, - { - "epoch": 0.7716102268115616, - "grad_norm": 1.5878237326633304, - "learning_rate": 5.225379337912998e-07, - "loss": 1.02, - "step": 8556 - }, - { - "epoch": 0.7717004103350318, - "grad_norm": 1.3913109191416704, - "learning_rate": 5.221442386663663e-07, - "loss": 0.9506, - "step": 8557 - }, - { - "epoch": 0.771790593858502, - "grad_norm": 1.2793059137622005, - "learning_rate": 5.217506696402354e-07, - "loss": 0.9827, - "step": 8558 - }, - { - "epoch": 0.7718807773819724, - "grad_norm": 1.6797222754848522, - "learning_rate": 5.213572267464883e-07, - "loss": 0.95, - "step": 8559 - }, - { - "epoch": 0.7719709609054426, - "grad_norm": 1.4598494777426216, - "learning_rate": 5.209639100186965e-07, - "loss": 0.9227, - "step": 8560 - }, - { - "epoch": 0.7720611444289128, - "grad_norm": 0.7878293892587039, - "learning_rate": 5.205707194904179e-07, - "loss": 0.8705, - "step": 8561 - }, - { - "epoch": 0.7721513279523831, - "grad_norm": 1.6965244725016846, - "learning_rate": 5.201776551952042e-07, - "loss": 0.8681, - "step": 8562 - }, - { - "epoch": 0.7722415114758534, - "grad_norm": 1.9283574883814532, - "learning_rate": 5.197847171665914e-07, - "loss": 0.9, - "step": 8563 - }, - { - "epoch": 0.7723316949993236, - "grad_norm": 1.5881014187806168, - "learning_rate": 5.193919054381095e-07, - "loss": 0.9214, - "step": 8564 - }, - { - "epoch": 0.7724218785227939, - "grad_norm": 1.5081634903575478, - "learning_rate": 5.189992200432738e-07, - "loss": 0.9079, - "step": 8565 - }, - { - "epoch": 0.7725120620462641, - "grad_norm": 2.4906645040597994, - "learning_rate": 5.186066610155906e-07, - "loss": 0.9467, - "step": 8566 - }, - { - "epoch": 0.7726022455697344, - "grad_norm": 1.4567802969886907, - "learning_rate": 5.182142283885555e-07, - "loss": 0.9892, - "step": 8567 - }, - { - "epoch": 0.7726924290932047, - "grad_norm": 1.1697150606439284, - "learning_rate": 5.178219221956528e-07, - "loss": 0.7776, - "step": 8568 - }, - { - "epoch": 0.7727826126166749, - "grad_norm": 1.633034795933514, - "learning_rate": 5.174297424703565e-07, - "loss": 0.9559, - "step": 8569 - }, - { - "epoch": 0.7728727961401451, - "grad_norm": 1.502394592726161, - "learning_rate": 5.170376892461299e-07, - "loss": 0.8756, - "step": 8570 - }, - { - "epoch": 0.7729629796636155, - "grad_norm": 0.7265092563603932, - "learning_rate": 5.16645762556424e-07, - "loss": 0.8566, - "step": 8571 - }, - { - "epoch": 0.7730531631870857, - "grad_norm": 1.3213428336942965, - "learning_rate": 5.162539624346809e-07, - "loss": 0.9881, - "step": 8572 - }, - { - "epoch": 0.773143346710556, - "grad_norm": 1.4162290215011706, - "learning_rate": 5.158622889143309e-07, - "loss": 0.9969, - "step": 8573 - }, - { - "epoch": 0.7732335302340262, - "grad_norm": 1.5635868107502688, - "learning_rate": 5.154707420287939e-07, - "loss": 0.9002, - "step": 8574 - }, - { - "epoch": 0.7733237137574965, - "grad_norm": 1.719068393693705, - "learning_rate": 5.150793218114793e-07, - "loss": 0.8679, - "step": 8575 - }, - { - "epoch": 0.7734138972809668, - "grad_norm": 1.3800754507143103, - "learning_rate": 5.146880282957837e-07, - "loss": 0.9272, - "step": 8576 - }, - { - "epoch": 0.773504080804437, - "grad_norm": 1.478467489489511, - "learning_rate": 5.142968615150964e-07, - "loss": 0.9338, - "step": 8577 - }, - { - "epoch": 0.7735942643279073, - "grad_norm": 1.6745410945311434, - "learning_rate": 5.139058215027921e-07, - "loss": 0.8721, - "step": 8578 - }, - { - "epoch": 0.7736844478513776, - "grad_norm": 1.2045237844327106, - "learning_rate": 5.135149082922383e-07, - "loss": 0.9279, - "step": 8579 - }, - { - "epoch": 0.7737746313748478, - "grad_norm": 1.55197059512057, - "learning_rate": 5.131241219167879e-07, - "loss": 0.8759, - "step": 8580 - }, - { - "epoch": 0.773864814898318, - "grad_norm": 0.7902209776243798, - "learning_rate": 5.127334624097869e-07, - "loss": 0.8391, - "step": 8581 - }, - { - "epoch": 0.7739549984217884, - "grad_norm": 1.4090766895533193, - "learning_rate": 5.123429298045672e-07, - "loss": 0.9956, - "step": 8582 - }, - { - "epoch": 0.7740451819452586, - "grad_norm": 1.4950309942853697, - "learning_rate": 5.119525241344515e-07, - "loss": 0.8314, - "step": 8583 - }, - { - "epoch": 0.7741353654687289, - "grad_norm": 2.3223319465272056, - "learning_rate": 5.115622454327515e-07, - "loss": 0.9498, - "step": 8584 - }, - { - "epoch": 0.7742255489921991, - "grad_norm": 1.5943781045115597, - "learning_rate": 5.11172093732768e-07, - "loss": 0.922, - "step": 8585 - }, - { - "epoch": 0.7743157325156694, - "grad_norm": 1.5030866495105628, - "learning_rate": 5.107820690677911e-07, - "loss": 0.9643, - "step": 8586 - }, - { - "epoch": 0.7744059160391397, - "grad_norm": 1.9275487870351704, - "learning_rate": 5.103921714710991e-07, - "loss": 0.8821, - "step": 8587 - }, - { - "epoch": 0.7744960995626099, - "grad_norm": 1.4897200870650753, - "learning_rate": 5.100024009759605e-07, - "loss": 0.9556, - "step": 8588 - }, - { - "epoch": 0.7745862830860801, - "grad_norm": 1.7140341131316505, - "learning_rate": 5.09612757615633e-07, - "loss": 0.9663, - "step": 8589 - }, - { - "epoch": 0.7746764666095505, - "grad_norm": 1.3899874565189754, - "learning_rate": 5.092232414233628e-07, - "loss": 0.8903, - "step": 8590 - }, - { - "epoch": 0.7747666501330207, - "grad_norm": 1.4756576752912567, - "learning_rate": 5.088338524323858e-07, - "loss": 0.9063, - "step": 8591 - }, - { - "epoch": 0.7748568336564909, - "grad_norm": 1.6897570033565348, - "learning_rate": 5.084445906759271e-07, - "loss": 0.9985, - "step": 8592 - }, - { - "epoch": 0.7749470171799612, - "grad_norm": 1.5361930316391046, - "learning_rate": 5.080554561871995e-07, - "loss": 1.0049, - "step": 8593 - }, - { - "epoch": 0.7750372007034315, - "grad_norm": 1.4087853359065763, - "learning_rate": 5.076664489994078e-07, - "loss": 0.9634, - "step": 8594 - }, - { - "epoch": 0.7751273842269017, - "grad_norm": 1.52633797781752, - "learning_rate": 5.07277569145742e-07, - "loss": 0.9109, - "step": 8595 - }, - { - "epoch": 0.775217567750372, - "grad_norm": 1.400693554747402, - "learning_rate": 5.068888166593861e-07, - "loss": 0.8582, - "step": 8596 - }, - { - "epoch": 0.7753077512738422, - "grad_norm": 1.5161274342636506, - "learning_rate": 5.065001915735087e-07, - "loss": 0.925, - "step": 8597 - }, - { - "epoch": 0.7753979347973126, - "grad_norm": 1.2928118653788034, - "learning_rate": 5.061116939212702e-07, - "loss": 0.9044, - "step": 8598 - }, - { - "epoch": 0.7754881183207828, - "grad_norm": 1.52436080732069, - "learning_rate": 5.05723323735819e-07, - "loss": 0.9355, - "step": 8599 - }, - { - "epoch": 0.775578301844253, - "grad_norm": 0.6420196842981697, - "learning_rate": 5.053350810502932e-07, - "loss": 0.8085, - "step": 8600 - }, - { - "epoch": 0.7756684853677234, - "grad_norm": 1.8304574546839931, - "learning_rate": 5.049469658978202e-07, - "loss": 0.9308, - "step": 8601 - }, - { - "epoch": 0.7757586688911936, - "grad_norm": 0.833684255304106, - "learning_rate": 5.045589783115147e-07, - "loss": 0.7555, - "step": 8602 - }, - { - "epoch": 0.7758488524146638, - "grad_norm": 1.5732132419454676, - "learning_rate": 5.041711183244842e-07, - "loss": 0.9745, - "step": 8603 - }, - { - "epoch": 0.7759390359381341, - "grad_norm": 1.5989558896057474, - "learning_rate": 5.037833859698211e-07, - "loss": 0.9612, - "step": 8604 - }, - { - "epoch": 0.7760292194616044, - "grad_norm": 1.5334581296694145, - "learning_rate": 5.033957812806096e-07, - "loss": 1.004, - "step": 8605 - }, - { - "epoch": 0.7761194029850746, - "grad_norm": 1.607378860584099, - "learning_rate": 5.030083042899223e-07, - "loss": 0.9903, - "step": 8606 - }, - { - "epoch": 0.7762095865085449, - "grad_norm": 1.5810508421673821, - "learning_rate": 5.026209550308207e-07, - "loss": 0.9423, - "step": 8607 - }, - { - "epoch": 0.7762997700320151, - "grad_norm": 1.8269885842938078, - "learning_rate": 5.022337335363558e-07, - "loss": 0.9881, - "step": 8608 - }, - { - "epoch": 0.7763899535554855, - "grad_norm": 1.526393741271745, - "learning_rate": 5.018466398395677e-07, - "loss": 0.959, - "step": 8609 - }, - { - "epoch": 0.7764801370789557, - "grad_norm": 1.2935220047561349, - "learning_rate": 5.01459673973484e-07, - "loss": 0.8719, - "step": 8610 - }, - { - "epoch": 0.7765703206024259, - "grad_norm": 2.037052142764767, - "learning_rate": 5.01072835971125e-07, - "loss": 1.0029, - "step": 8611 - }, - { - "epoch": 0.7766605041258962, - "grad_norm": 1.8033470030364638, - "learning_rate": 5.006861258654959e-07, - "loss": 0.8417, - "step": 8612 - }, - { - "epoch": 0.7767506876493665, - "grad_norm": 1.4545886144411975, - "learning_rate": 5.002995436895938e-07, - "loss": 0.9697, - "step": 8613 - }, - { - "epoch": 0.7768408711728367, - "grad_norm": 1.3038194246908708, - "learning_rate": 4.999130894764039e-07, - "loss": 0.8956, - "step": 8614 - }, - { - "epoch": 0.776931054696307, - "grad_norm": 1.431461421893082, - "learning_rate": 4.995267632589006e-07, - "loss": 0.8779, - "step": 8615 - }, - { - "epoch": 0.7770212382197772, - "grad_norm": 0.7891073308616505, - "learning_rate": 4.99140565070048e-07, - "loss": 0.8353, - "step": 8616 - }, - { - "epoch": 0.7771114217432475, - "grad_norm": 1.2992791102753904, - "learning_rate": 4.987544949427969e-07, - "loss": 0.9678, - "step": 8617 - }, - { - "epoch": 0.7772016052667178, - "grad_norm": 0.6667388166684771, - "learning_rate": 4.98368552910091e-07, - "loss": 0.8061, - "step": 8618 - }, - { - "epoch": 0.777291788790188, - "grad_norm": 2.2726836642503163, - "learning_rate": 4.979827390048596e-07, - "loss": 1.046, - "step": 8619 - }, - { - "epoch": 0.7773819723136582, - "grad_norm": 1.257688528076322, - "learning_rate": 4.975970532600231e-07, - "loss": 0.9057, - "step": 8620 - }, - { - "epoch": 0.7774721558371286, - "grad_norm": 2.0113404149702205, - "learning_rate": 4.972114957084901e-07, - "loss": 0.9569, - "step": 8621 - }, - { - "epoch": 0.7775623393605988, - "grad_norm": 1.2434844917339587, - "learning_rate": 4.968260663831585e-07, - "loss": 0.9172, - "step": 8622 - }, - { - "epoch": 0.777652522884069, - "grad_norm": 1.2393332744736543, - "learning_rate": 4.964407653169154e-07, - "loss": 1.0434, - "step": 8623 - }, - { - "epoch": 0.7777427064075394, - "grad_norm": 1.3079854433842069, - "learning_rate": 4.960555925426366e-07, - "loss": 0.9159, - "step": 8624 - }, - { - "epoch": 0.7778328899310096, - "grad_norm": 1.6913493016899699, - "learning_rate": 4.956705480931876e-07, - "loss": 0.9985, - "step": 8625 - }, - { - "epoch": 0.7779230734544799, - "grad_norm": 1.3361283550793122, - "learning_rate": 4.952856320014225e-07, - "loss": 0.9164, - "step": 8626 - }, - { - "epoch": 0.7780132569779501, - "grad_norm": 1.2009302407967721, - "learning_rate": 4.949008443001838e-07, - "loss": 0.8773, - "step": 8627 - }, - { - "epoch": 0.7781034405014204, - "grad_norm": 1.4076688178745542, - "learning_rate": 4.945161850223041e-07, - "loss": 0.9454, - "step": 8628 - }, - { - "epoch": 0.7781936240248907, - "grad_norm": 1.4783057362425878, - "learning_rate": 4.941316542006044e-07, - "loss": 0.8698, - "step": 8629 - }, - { - "epoch": 0.7782838075483609, - "grad_norm": 1.222925321081538, - "learning_rate": 4.937472518678956e-07, - "loss": 0.9637, - "step": 8630 - }, - { - "epoch": 0.7783739910718311, - "grad_norm": 1.5566093030049442, - "learning_rate": 4.93362978056977e-07, - "loss": 0.9357, - "step": 8631 - }, - { - "epoch": 0.7784641745953015, - "grad_norm": 1.2767145782988922, - "learning_rate": 4.929788328006355e-07, - "loss": 0.8817, - "step": 8632 - }, - { - "epoch": 0.7785543581187717, - "grad_norm": 1.5218417686891768, - "learning_rate": 4.925948161316506e-07, - "loss": 1.0028, - "step": 8633 - }, - { - "epoch": 0.778644541642242, - "grad_norm": 1.5207381371093893, - "learning_rate": 4.922109280827868e-07, - "loss": 0.9301, - "step": 8634 - }, - { - "epoch": 0.7787347251657122, - "grad_norm": 1.4259313216985767, - "learning_rate": 4.918271686868016e-07, - "loss": 0.9334, - "step": 8635 - }, - { - "epoch": 0.7788249086891825, - "grad_norm": 1.5194812756634004, - "learning_rate": 4.914435379764379e-07, - "loss": 0.986, - "step": 8636 - }, - { - "epoch": 0.7789150922126528, - "grad_norm": 1.295586692954067, - "learning_rate": 4.910600359844294e-07, - "loss": 0.9258, - "step": 8637 - }, - { - "epoch": 0.779005275736123, - "grad_norm": 1.5077261628252894, - "learning_rate": 4.90676662743499e-07, - "loss": 0.8511, - "step": 8638 - }, - { - "epoch": 0.7790954592595932, - "grad_norm": 1.6912576024236334, - "learning_rate": 4.902934182863581e-07, - "loss": 0.9268, - "step": 8639 - }, - { - "epoch": 0.7791856427830636, - "grad_norm": 1.8180588290492112, - "learning_rate": 4.899103026457069e-07, - "loss": 0.8125, - "step": 8640 - }, - { - "epoch": 0.7792758263065338, - "grad_norm": 1.082559339645668, - "learning_rate": 4.895273158542361e-07, - "loss": 0.7747, - "step": 8641 - }, - { - "epoch": 0.779366009830004, - "grad_norm": 1.2907719692390214, - "learning_rate": 4.891444579446227e-07, - "loss": 0.9499, - "step": 8642 - }, - { - "epoch": 0.7794561933534743, - "grad_norm": 1.501943441888081, - "learning_rate": 4.887617289495349e-07, - "loss": 0.8924, - "step": 8643 - }, - { - "epoch": 0.7795463768769446, - "grad_norm": 0.6090789467013445, - "learning_rate": 4.883791289016292e-07, - "loss": 0.727, - "step": 8644 - }, - { - "epoch": 0.7796365604004148, - "grad_norm": 1.543856826360227, - "learning_rate": 4.879966578335514e-07, - "loss": 0.9504, - "step": 8645 - }, - { - "epoch": 0.7797267439238851, - "grad_norm": 1.776191488828435, - "learning_rate": 4.876143157779358e-07, - "loss": 0.8308, - "step": 8646 - }, - { - "epoch": 0.7798169274473554, - "grad_norm": 9.104494230079279, - "learning_rate": 4.872321027674058e-07, - "loss": 0.8289, - "step": 8647 - }, - { - "epoch": 0.7799071109708257, - "grad_norm": 1.264031148363763, - "learning_rate": 4.868500188345748e-07, - "loss": 1.0152, - "step": 8648 - }, - { - "epoch": 0.7799972944942959, - "grad_norm": 1.4257280731735817, - "learning_rate": 4.864680640120425e-07, - "loss": 1.0018, - "step": 8649 - }, - { - "epoch": 0.7800874780177661, - "grad_norm": 1.7560834731028605, - "learning_rate": 4.860862383324016e-07, - "loss": 0.941, - "step": 8650 - }, - { - "epoch": 0.7801776615412365, - "grad_norm": 1.781341884987019, - "learning_rate": 4.857045418282295e-07, - "loss": 0.9261, - "step": 8651 - }, - { - "epoch": 0.7802678450647067, - "grad_norm": 1.5787939032628997, - "learning_rate": 4.853229745320966e-07, - "loss": 0.8866, - "step": 8652 - }, - { - "epoch": 0.7803580285881769, - "grad_norm": 1.385725842541466, - "learning_rate": 4.849415364765587e-07, - "loss": 0.9235, - "step": 8653 - }, - { - "epoch": 0.7804482121116472, - "grad_norm": 1.477518085298049, - "learning_rate": 4.845602276941631e-07, - "loss": 1.0053, - "step": 8654 - }, - { - "epoch": 0.7805383956351175, - "grad_norm": 1.318964117303913, - "learning_rate": 4.841790482174449e-07, - "loss": 0.9707, - "step": 8655 - }, - { - "epoch": 0.7806285791585877, - "grad_norm": 1.47825654555442, - "learning_rate": 4.837979980789282e-07, - "loss": 0.9286, - "step": 8656 - }, - { - "epoch": 0.780718762682058, - "grad_norm": 2.983632579549797, - "learning_rate": 4.834170773111273e-07, - "loss": 0.8737, - "step": 8657 - }, - { - "epoch": 0.7808089462055282, - "grad_norm": 1.4579498422574255, - "learning_rate": 4.830362859465431e-07, - "loss": 0.9562, - "step": 8658 - }, - { - "epoch": 0.7808991297289986, - "grad_norm": 1.4492539558350075, - "learning_rate": 4.826556240176675e-07, - "loss": 1.0077, - "step": 8659 - }, - { - "epoch": 0.7809893132524688, - "grad_norm": 1.5175278506794827, - "learning_rate": 4.822750915569807e-07, - "loss": 0.9561, - "step": 8660 - }, - { - "epoch": 0.781079496775939, - "grad_norm": 1.8405401446693317, - "learning_rate": 4.818946885969514e-07, - "loss": 1.0108, - "step": 8661 - }, - { - "epoch": 0.7811696802994093, - "grad_norm": 1.5292295243789997, - "learning_rate": 4.815144151700383e-07, - "loss": 0.9779, - "step": 8662 - }, - { - "epoch": 0.7812598638228796, - "grad_norm": 1.4754860804439953, - "learning_rate": 4.811342713086885e-07, - "loss": 0.9318, - "step": 8663 - }, - { - "epoch": 0.7813500473463498, - "grad_norm": 1.7295166565312738, - "learning_rate": 4.807542570453367e-07, - "loss": 0.9753, - "step": 8664 - }, - { - "epoch": 0.7814402308698201, - "grad_norm": 1.4968903415347692, - "learning_rate": 4.803743724124098e-07, - "loss": 0.9128, - "step": 8665 - }, - { - "epoch": 0.7815304143932903, - "grad_norm": 1.4107834116122453, - "learning_rate": 4.799946174423192e-07, - "loss": 0.9412, - "step": 8666 - }, - { - "epoch": 0.7816205979167606, - "grad_norm": 0.6185095907622603, - "learning_rate": 4.796149921674706e-07, - "loss": 0.7482, - "step": 8667 - }, - { - "epoch": 0.7817107814402309, - "grad_norm": 1.4307485509928755, - "learning_rate": 4.792354966202534e-07, - "loss": 0.9022, - "step": 8668 - }, - { - "epoch": 0.7818009649637011, - "grad_norm": 1.3975507172882433, - "learning_rate": 4.788561308330489e-07, - "loss": 0.7588, - "step": 8669 - }, - { - "epoch": 0.7818911484871713, - "grad_norm": 1.9264678659578298, - "learning_rate": 4.784768948382272e-07, - "loss": 0.8857, - "step": 8670 - }, - { - "epoch": 0.7819813320106417, - "grad_norm": 1.330079066070358, - "learning_rate": 4.780977886681461e-07, - "loss": 0.9421, - "step": 8671 - }, - { - "epoch": 0.7820715155341119, - "grad_norm": 1.4367508802197178, - "learning_rate": 4.777188123551541e-07, - "loss": 0.8839, - "step": 8672 - }, - { - "epoch": 0.7821616990575821, - "grad_norm": 3.7637643609712064, - "learning_rate": 4.773399659315856e-07, - "loss": 0.9709, - "step": 8673 - }, - { - "epoch": 0.7822518825810525, - "grad_norm": 1.657040029715624, - "learning_rate": 4.769612494297681e-07, - "loss": 0.9221, - "step": 8674 - }, - { - "epoch": 0.7823420661045227, - "grad_norm": 0.6846266436289834, - "learning_rate": 4.765826628820142e-07, - "loss": 0.7993, - "step": 8675 - }, - { - "epoch": 0.782432249627993, - "grad_norm": 1.6813369813562355, - "learning_rate": 4.7620420632062775e-07, - "loss": 0.857, - "step": 8676 - }, - { - "epoch": 0.7825224331514632, - "grad_norm": 1.563603126221013, - "learning_rate": 4.758258797779002e-07, - "loss": 0.8989, - "step": 8677 - }, - { - "epoch": 0.7826126166749335, - "grad_norm": 0.6798742410480565, - "learning_rate": 4.7544768328611317e-07, - "loss": 0.8068, - "step": 8678 - }, - { - "epoch": 0.7827028001984038, - "grad_norm": 1.4348597121033675, - "learning_rate": 4.750696168775359e-07, - "loss": 0.9189, - "step": 8679 - }, - { - "epoch": 0.782792983721874, - "grad_norm": 0.6764835611151916, - "learning_rate": 4.746916805844279e-07, - "loss": 0.7787, - "step": 8680 - }, - { - "epoch": 0.7828831672453442, - "grad_norm": 1.5089087577673368, - "learning_rate": 4.743138744390356e-07, - "loss": 0.9693, - "step": 8681 - }, - { - "epoch": 0.7829733507688146, - "grad_norm": 1.437415576611884, - "learning_rate": 4.739361984735959e-07, - "loss": 1.0047, - "step": 8682 - }, - { - "epoch": 0.7830635342922848, - "grad_norm": 1.6871387660192785, - "learning_rate": 4.7355865272033455e-07, - "loss": 0.8964, - "step": 8683 - }, - { - "epoch": 0.783153717815755, - "grad_norm": 1.477876754464153, - "learning_rate": 4.7318123721146563e-07, - "loss": 0.9266, - "step": 8684 - }, - { - "epoch": 0.7832439013392253, - "grad_norm": 1.3499443643196645, - "learning_rate": 4.728039519791924e-07, - "loss": 0.9265, - "step": 8685 - }, - { - "epoch": 0.7833340848626956, - "grad_norm": 0.6877508789425735, - "learning_rate": 4.72426797055707e-07, - "loss": 0.8274, - "step": 8686 - }, - { - "epoch": 0.7834242683861659, - "grad_norm": 1.631675141571968, - "learning_rate": 4.720497724731904e-07, - "loss": 0.9084, - "step": 8687 - }, - { - "epoch": 0.7835144519096361, - "grad_norm": 1.534907663034607, - "learning_rate": 4.7167287826381153e-07, - "loss": 0.9816, - "step": 8688 - }, - { - "epoch": 0.7836046354331063, - "grad_norm": 1.2343028566548857, - "learning_rate": 4.712961144597307e-07, - "loss": 0.9437, - "step": 8689 - }, - { - "epoch": 0.7836948189565767, - "grad_norm": 1.7505036719163019, - "learning_rate": 4.7091948109309343e-07, - "loss": 0.9908, - "step": 8690 - }, - { - "epoch": 0.7837850024800469, - "grad_norm": 1.5366992912359856, - "learning_rate": 4.705429781960384e-07, - "loss": 0.9569, - "step": 8691 - }, - { - "epoch": 0.7838751860035171, - "grad_norm": 1.6851663347772823, - "learning_rate": 4.7016660580068923e-07, - "loss": 0.9422, - "step": 8692 - }, - { - "epoch": 0.7839653695269874, - "grad_norm": 1.552352301656113, - "learning_rate": 4.6979036393916093e-07, - "loss": 0.9697, - "step": 8693 - }, - { - "epoch": 0.7840555530504577, - "grad_norm": 1.3349071790451423, - "learning_rate": 4.6941425264355603e-07, - "loss": 1.0113, - "step": 8694 - }, - { - "epoch": 0.7841457365739279, - "grad_norm": 1.268913987559719, - "learning_rate": 4.6903827194596666e-07, - "loss": 0.952, - "step": 8695 - }, - { - "epoch": 0.7842359200973982, - "grad_norm": 1.4156563559589999, - "learning_rate": 4.686624218784743e-07, - "loss": 0.9916, - "step": 8696 - }, - { - "epoch": 0.7843261036208685, - "grad_norm": 1.8041546187611572, - "learning_rate": 4.6828670247314696e-07, - "loss": 0.9317, - "step": 8697 - }, - { - "epoch": 0.7844162871443388, - "grad_norm": 1.6032281913041369, - "learning_rate": 4.679111137620442e-07, - "loss": 0.9589, - "step": 8698 - }, - { - "epoch": 0.784506470667809, - "grad_norm": 1.4170552823973952, - "learning_rate": 4.67535655777213e-07, - "loss": 0.9047, - "step": 8699 - }, - { - "epoch": 0.7845966541912792, - "grad_norm": 1.4002059750943143, - "learning_rate": 4.6716032855068956e-07, - "loss": 0.9512, - "step": 8700 - }, - { - "epoch": 0.7846868377147496, - "grad_norm": 1.7046022220753294, - "learning_rate": 4.6678513211449867e-07, - "loss": 0.9874, - "step": 8701 - }, - { - "epoch": 0.7847770212382198, - "grad_norm": 2.3704070804764426, - "learning_rate": 4.6641006650065516e-07, - "loss": 0.9043, - "step": 8702 - }, - { - "epoch": 0.78486720476169, - "grad_norm": 1.376405950817535, - "learning_rate": 4.6603513174115973e-07, - "loss": 0.9271, - "step": 8703 - }, - { - "epoch": 0.7849573882851603, - "grad_norm": 1.7377055725775765, - "learning_rate": 4.6566032786800625e-07, - "loss": 0.9532, - "step": 8704 - }, - { - "epoch": 0.7850475718086306, - "grad_norm": 1.4656142179503289, - "learning_rate": 4.6528565491317274e-07, - "loss": 0.9186, - "step": 8705 - }, - { - "epoch": 0.7851377553321008, - "grad_norm": 1.5948227656559113, - "learning_rate": 4.649111129086305e-07, - "loss": 1.0077, - "step": 8706 - }, - { - "epoch": 0.7852279388555711, - "grad_norm": 1.4056695799926668, - "learning_rate": 4.6453670188633596e-07, - "loss": 0.9564, - "step": 8707 - }, - { - "epoch": 0.7853181223790413, - "grad_norm": 1.8333045422576346, - "learning_rate": 4.641624218782365e-07, - "loss": 0.9439, - "step": 8708 - }, - { - "epoch": 0.7854083059025116, - "grad_norm": 1.4603743666301627, - "learning_rate": 4.6378827291626765e-07, - "loss": 0.9058, - "step": 8709 - }, - { - "epoch": 0.7854984894259819, - "grad_norm": 1.1689951287715503, - "learning_rate": 4.634142550323541e-07, - "loss": 0.9967, - "step": 8710 - }, - { - "epoch": 0.7855886729494521, - "grad_norm": 1.7140308443274253, - "learning_rate": 4.6304036825840943e-07, - "loss": 0.994, - "step": 8711 - }, - { - "epoch": 0.7856788564729223, - "grad_norm": 1.3521472349578918, - "learning_rate": 4.626666126263341e-07, - "loss": 0.8469, - "step": 8712 - }, - { - "epoch": 0.7857690399963927, - "grad_norm": 1.5960885636464317, - "learning_rate": 4.622929881680213e-07, - "loss": 0.9669, - "step": 8713 - }, - { - "epoch": 0.7858592235198629, - "grad_norm": 1.8236757568976967, - "learning_rate": 4.6191949491534887e-07, - "loss": 0.857, - "step": 8714 - }, - { - "epoch": 0.7859494070433332, - "grad_norm": 1.5234853883699246, - "learning_rate": 4.6154613290018617e-07, - "loss": 0.946, - "step": 8715 - }, - { - "epoch": 0.7860395905668034, - "grad_norm": 2.6532743054761787, - "learning_rate": 4.6117290215439043e-07, - "loss": 0.8865, - "step": 8716 - }, - { - "epoch": 0.7861297740902737, - "grad_norm": 1.522302136021283, - "learning_rate": 4.6079980270980744e-07, - "loss": 0.8139, - "step": 8717 - }, - { - "epoch": 0.786219957613744, - "grad_norm": 1.5383671772885736, - "learning_rate": 4.6042683459827245e-07, - "loss": 0.9092, - "step": 8718 - }, - { - "epoch": 0.7863101411372142, - "grad_norm": 0.7649731488595563, - "learning_rate": 4.600539978516098e-07, - "loss": 0.8088, - "step": 8719 - }, - { - "epoch": 0.7864003246606845, - "grad_norm": 1.309668438426161, - "learning_rate": 4.5968129250163004e-07, - "loss": 0.9026, - "step": 8720 - }, - { - "epoch": 0.7864905081841548, - "grad_norm": 1.5394973527836626, - "learning_rate": 4.5930871858013653e-07, - "loss": 0.8693, - "step": 8721 - }, - { - "epoch": 0.786580691707625, - "grad_norm": 0.658023119120281, - "learning_rate": 4.589362761189182e-07, - "loss": 0.8452, - "step": 8722 - }, - { - "epoch": 0.7866708752310952, - "grad_norm": 2.281401851250864, - "learning_rate": 4.585639651497539e-07, - "loss": 1.0024, - "step": 8723 - }, - { - "epoch": 0.7867610587545656, - "grad_norm": 1.6527096318462193, - "learning_rate": 4.581917857044115e-07, - "loss": 0.8974, - "step": 8724 - }, - { - "epoch": 0.7868512422780358, - "grad_norm": 1.3812485811390214, - "learning_rate": 4.5781973781464734e-07, - "loss": 0.976, - "step": 8725 - }, - { - "epoch": 0.7869414258015061, - "grad_norm": 1.5286969290603611, - "learning_rate": 4.574478215122073e-07, - "loss": 0.9742, - "step": 8726 - }, - { - "epoch": 0.7870316093249763, - "grad_norm": 1.9168611579845496, - "learning_rate": 4.5707603682882357e-07, - "loss": 0.9238, - "step": 8727 - }, - { - "epoch": 0.7871217928484466, - "grad_norm": 1.5738106096760132, - "learning_rate": 4.56704383796221e-07, - "loss": 0.8074, - "step": 8728 - }, - { - "epoch": 0.7872119763719169, - "grad_norm": 1.4153027232904862, - "learning_rate": 4.5633286244610956e-07, - "loss": 0.9724, - "step": 8729 - }, - { - "epoch": 0.7873021598953871, - "grad_norm": 1.2203920013671992, - "learning_rate": 4.5596147281018993e-07, - "loss": 1.0023, - "step": 8730 - }, - { - "epoch": 0.7873923434188573, - "grad_norm": 1.5511434982344052, - "learning_rate": 4.5559021492015137e-07, - "loss": 1.0436, - "step": 8731 - }, - { - "epoch": 0.7874825269423277, - "grad_norm": 1.3263631915831475, - "learning_rate": 4.552190888076712e-07, - "loss": 0.851, - "step": 8732 - }, - { - "epoch": 0.7875727104657979, - "grad_norm": 1.5815591743583515, - "learning_rate": 4.548480945044164e-07, - "loss": 0.8869, - "step": 8733 - }, - { - "epoch": 0.7876628939892681, - "grad_norm": 1.3507678391434563, - "learning_rate": 4.54477232042042e-07, - "loss": 0.9327, - "step": 8734 - }, - { - "epoch": 0.7877530775127384, - "grad_norm": 1.3099484392021792, - "learning_rate": 4.541065014521921e-07, - "loss": 0.9237, - "step": 8735 - }, - { - "epoch": 0.7878432610362087, - "grad_norm": 1.9061226724093465, - "learning_rate": 4.5373590276649996e-07, - "loss": 0.889, - "step": 8736 - }, - { - "epoch": 0.787933444559679, - "grad_norm": 1.2507679487637438, - "learning_rate": 4.533654360165862e-07, - "loss": 0.9052, - "step": 8737 - }, - { - "epoch": 0.7880236280831492, - "grad_norm": 1.6933887307549547, - "learning_rate": 4.5299510123406115e-07, - "loss": 0.9976, - "step": 8738 - }, - { - "epoch": 0.7881138116066194, - "grad_norm": 1.2886452664313492, - "learning_rate": 4.5262489845052456e-07, - "loss": 0.9033, - "step": 8739 - }, - { - "epoch": 0.7882039951300898, - "grad_norm": 1.7455259761183255, - "learning_rate": 4.5225482769756353e-07, - "loss": 0.9038, - "step": 8740 - }, - { - "epoch": 0.78829417865356, - "grad_norm": 2.4221124348078034, - "learning_rate": 4.5188488900675545e-07, - "loss": 0.8703, - "step": 8741 - }, - { - "epoch": 0.7883843621770302, - "grad_norm": 1.2841704973827528, - "learning_rate": 4.5151508240966363e-07, - "loss": 0.9601, - "step": 8742 - }, - { - "epoch": 0.7884745457005006, - "grad_norm": 1.3423177049783437, - "learning_rate": 4.511454079378445e-07, - "loss": 0.9843, - "step": 8743 - }, - { - "epoch": 0.7885647292239708, - "grad_norm": 1.5792983574757682, - "learning_rate": 4.507758656228382e-07, - "loss": 0.8323, - "step": 8744 - }, - { - "epoch": 0.788654912747441, - "grad_norm": 1.2379823919717239, - "learning_rate": 4.5040645549617864e-07, - "loss": 0.9922, - "step": 8745 - }, - { - "epoch": 0.7887450962709113, - "grad_norm": 1.367836934203254, - "learning_rate": 4.5003717758938384e-07, - "loss": 0.8534, - "step": 8746 - }, - { - "epoch": 0.7888352797943816, - "grad_norm": 1.4429921262424494, - "learning_rate": 4.4966803193396365e-07, - "loss": 0.8775, - "step": 8747 - }, - { - "epoch": 0.7889254633178518, - "grad_norm": 1.4335298861796753, - "learning_rate": 4.492990185614154e-07, - "loss": 0.949, - "step": 8748 - }, - { - "epoch": 0.7890156468413221, - "grad_norm": 1.5513433019227343, - "learning_rate": 4.489301375032255e-07, - "loss": 0.9289, - "step": 8749 - }, - { - "epoch": 0.7891058303647923, - "grad_norm": 1.5632170748370597, - "learning_rate": 4.4856138879086857e-07, - "loss": 0.943, - "step": 8750 - }, - { - "epoch": 0.7891960138882627, - "grad_norm": 1.135361795528593, - "learning_rate": 4.481927724558092e-07, - "loss": 0.8913, - "step": 8751 - }, - { - "epoch": 0.7892861974117329, - "grad_norm": 1.3662569639511832, - "learning_rate": 4.478242885294985e-07, - "loss": 0.9097, - "step": 8752 - }, - { - "epoch": 0.7893763809352031, - "grad_norm": 1.3956988065809206, - "learning_rate": 4.474559370433779e-07, - "loss": 0.8939, - "step": 8753 - }, - { - "epoch": 0.7894665644586734, - "grad_norm": 1.5642490515795213, - "learning_rate": 4.470877180288777e-07, - "loss": 0.8826, - "step": 8754 - }, - { - "epoch": 0.7895567479821437, - "grad_norm": 1.2805826728792453, - "learning_rate": 4.4671963151741574e-07, - "loss": 0.9068, - "step": 8755 - }, - { - "epoch": 0.7896469315056139, - "grad_norm": 1.1334452473323342, - "learning_rate": 4.4635167754039973e-07, - "loss": 0.9155, - "step": 8756 - }, - { - "epoch": 0.7897371150290842, - "grad_norm": 1.178702694943866, - "learning_rate": 4.459838561292253e-07, - "loss": 0.9605, - "step": 8757 - }, - { - "epoch": 0.7898272985525544, - "grad_norm": 1.6214723443688013, - "learning_rate": 4.456161673152774e-07, - "loss": 0.8574, - "step": 8758 - }, - { - "epoch": 0.7899174820760247, - "grad_norm": 1.3405959795320603, - "learning_rate": 4.4524861112992806e-07, - "loss": 0.93, - "step": 8759 - }, - { - "epoch": 0.790007665599495, - "grad_norm": 1.7060651686051116, - "learning_rate": 4.448811876045411e-07, - "loss": 0.9326, - "step": 8760 - }, - { - "epoch": 0.7900978491229652, - "grad_norm": 1.6876551945282798, - "learning_rate": 4.445138967704647e-07, - "loss": 1.0062, - "step": 8761 - }, - { - "epoch": 0.7901880326464354, - "grad_norm": 1.5137600780753235, - "learning_rate": 4.4414673865904075e-07, - "loss": 0.9116, - "step": 8762 - }, - { - "epoch": 0.7902782161699058, - "grad_norm": 1.2876114972918147, - "learning_rate": 4.437797133015955e-07, - "loss": 0.9497, - "step": 8763 - }, - { - "epoch": 0.790368399693376, - "grad_norm": 1.6896616195183907, - "learning_rate": 4.4341282072944586e-07, - "loss": 0.8545, - "step": 8764 - }, - { - "epoch": 0.7904585832168463, - "grad_norm": 1.2635639975960113, - "learning_rate": 4.430460609738973e-07, - "loss": 0.9555, - "step": 8765 - }, - { - "epoch": 0.7905487667403166, - "grad_norm": 0.7595231716194897, - "learning_rate": 4.4267943406624386e-07, - "loss": 0.846, - "step": 8766 - }, - { - "epoch": 0.7906389502637868, - "grad_norm": 2.7398586545810204, - "learning_rate": 4.4231294003776853e-07, - "loss": 1.0432, - "step": 8767 - }, - { - "epoch": 0.7907291337872571, - "grad_norm": 1.6545436634517048, - "learning_rate": 4.419465789197416e-07, - "loss": 0.9733, - "step": 8768 - }, - { - "epoch": 0.7908193173107273, - "grad_norm": 1.3747250975722232, - "learning_rate": 4.415803507434237e-07, - "loss": 0.9311, - "step": 8769 - }, - { - "epoch": 0.7909095008341976, - "grad_norm": 1.4478864689520679, - "learning_rate": 4.4121425554006307e-07, - "loss": 0.9656, - "step": 8770 - }, - { - "epoch": 0.7909996843576679, - "grad_norm": 1.5922908461164056, - "learning_rate": 4.4084829334089744e-07, - "loss": 0.9207, - "step": 8771 - }, - { - "epoch": 0.7910898678811381, - "grad_norm": 1.34516371060373, - "learning_rate": 4.404824641771525e-07, - "loss": 0.8869, - "step": 8772 - }, - { - "epoch": 0.7911800514046083, - "grad_norm": 1.2956825657232096, - "learning_rate": 4.4011676808004327e-07, - "loss": 1.0075, - "step": 8773 - }, - { - "epoch": 0.7912702349280787, - "grad_norm": 1.3398777370757675, - "learning_rate": 4.3975120508077145e-07, - "loss": 0.9466, - "step": 8774 - }, - { - "epoch": 0.7913604184515489, - "grad_norm": 1.3782503551120808, - "learning_rate": 4.39385775210531e-07, - "loss": 0.9836, - "step": 8775 - }, - { - "epoch": 0.7914506019750192, - "grad_norm": 1.9491730036747366, - "learning_rate": 4.390204785005003e-07, - "loss": 0.9576, - "step": 8776 - }, - { - "epoch": 0.7915407854984894, - "grad_norm": 1.341797363636533, - "learning_rate": 4.386553149818504e-07, - "loss": 0.9461, - "step": 8777 - }, - { - "epoch": 0.7916309690219597, - "grad_norm": 1.5164404179822837, - "learning_rate": 4.3829028468573793e-07, - "loss": 0.8079, - "step": 8778 - }, - { - "epoch": 0.79172115254543, - "grad_norm": 1.4905483969912527, - "learning_rate": 4.3792538764330935e-07, - "loss": 0.8732, - "step": 8779 - }, - { - "epoch": 0.7918113360689002, - "grad_norm": 1.523802258766544, - "learning_rate": 4.3756062388569994e-07, - "loss": 0.908, - "step": 8780 - }, - { - "epoch": 0.7919015195923704, - "grad_norm": 0.6459277888925888, - "learning_rate": 4.3719599344403346e-07, - "loss": 0.8037, - "step": 8781 - }, - { - "epoch": 0.7919917031158408, - "grad_norm": 1.8812816300378699, - "learning_rate": 4.3683149634942243e-07, - "loss": 0.9187, - "step": 8782 - }, - { - "epoch": 0.792081886639311, - "grad_norm": 1.461194428887491, - "learning_rate": 4.364671326329663e-07, - "loss": 0.8235, - "step": 8783 - }, - { - "epoch": 0.7921720701627812, - "grad_norm": 1.8463351988972674, - "learning_rate": 4.3610290232575673e-07, - "loss": 0.9504, - "step": 8784 - }, - { - "epoch": 0.7922622536862515, - "grad_norm": 1.409947852800442, - "learning_rate": 4.357388054588702e-07, - "loss": 0.9404, - "step": 8785 - }, - { - "epoch": 0.7923524372097218, - "grad_norm": 1.2566200432531183, - "learning_rate": 4.3537484206337405e-07, - "loss": 0.8884, - "step": 8786 - }, - { - "epoch": 0.792442620733192, - "grad_norm": 1.4679104454882494, - "learning_rate": 4.3501101217032366e-07, - "loss": 0.9649, - "step": 8787 - }, - { - "epoch": 0.7925328042566623, - "grad_norm": 1.5397261529665431, - "learning_rate": 4.346473158107629e-07, - "loss": 0.8675, - "step": 8788 - }, - { - "epoch": 0.7926229877801325, - "grad_norm": 1.543403506293719, - "learning_rate": 4.342837530157244e-07, - "loss": 0.941, - "step": 8789 - }, - { - "epoch": 0.7927131713036029, - "grad_norm": 1.6620076705351647, - "learning_rate": 4.3392032381622987e-07, - "loss": 0.8438, - "step": 8790 - }, - { - "epoch": 0.7928033548270731, - "grad_norm": 1.2692776940303443, - "learning_rate": 4.3355702824328765e-07, - "loss": 0.9052, - "step": 8791 - }, - { - "epoch": 0.7928935383505433, - "grad_norm": 1.4852485896927168, - "learning_rate": 4.3319386632789823e-07, - "loss": 0.9784, - "step": 8792 - }, - { - "epoch": 0.7929837218740137, - "grad_norm": 1.4918105363282728, - "learning_rate": 4.328308381010466e-07, - "loss": 0.9316, - "step": 8793 - }, - { - "epoch": 0.7930739053974839, - "grad_norm": 1.3902391048306992, - "learning_rate": 4.3246794359370933e-07, - "loss": 0.9443, - "step": 8794 - }, - { - "epoch": 0.7931640889209541, - "grad_norm": 1.3884517962062957, - "learning_rate": 4.3210518283685025e-07, - "loss": 0.9196, - "step": 8795 - }, - { - "epoch": 0.7932542724444244, - "grad_norm": 1.5885928619973604, - "learning_rate": 4.317425558614225e-07, - "loss": 0.9369, - "step": 8796 - }, - { - "epoch": 0.7933444559678947, - "grad_norm": 1.3525289266599003, - "learning_rate": 4.3138006269836744e-07, - "loss": 0.898, - "step": 8797 - }, - { - "epoch": 0.793434639491365, - "grad_norm": 1.2714754212704376, - "learning_rate": 4.3101770337861376e-07, - "loss": 0.9211, - "step": 8798 - }, - { - "epoch": 0.7935248230148352, - "grad_norm": 0.6511206667970655, - "learning_rate": 4.30655477933082e-07, - "loss": 0.8254, - "step": 8799 - }, - { - "epoch": 0.7936150065383054, - "grad_norm": 1.1977909796195032, - "learning_rate": 4.30293386392677e-07, - "loss": 0.9594, - "step": 8800 - }, - { - "epoch": 0.7937051900617758, - "grad_norm": 1.188356542365094, - "learning_rate": 4.299314287882967e-07, - "loss": 0.9534, - "step": 8801 - }, - { - "epoch": 0.793795373585246, - "grad_norm": 1.602326299385412, - "learning_rate": 4.2956960515082353e-07, - "loss": 0.9692, - "step": 8802 - }, - { - "epoch": 0.7938855571087162, - "grad_norm": 1.5799077605834793, - "learning_rate": 4.29207915511131e-07, - "loss": 0.8928, - "step": 8803 - }, - { - "epoch": 0.7939757406321865, - "grad_norm": 1.4139500778779348, - "learning_rate": 4.2884635990008024e-07, - "loss": 0.9925, - "step": 8804 - }, - { - "epoch": 0.7940659241556568, - "grad_norm": 1.8747835034313054, - "learning_rate": 4.284849383485214e-07, - "loss": 0.9028, - "step": 8805 - }, - { - "epoch": 0.794156107679127, - "grad_norm": 1.666477438992872, - "learning_rate": 4.2812365088729296e-07, - "loss": 0.9573, - "step": 8806 - }, - { - "epoch": 0.7942462912025973, - "grad_norm": 1.4546840057167183, - "learning_rate": 4.2776249754722227e-07, - "loss": 0.9616, - "step": 8807 - }, - { - "epoch": 0.7943364747260675, - "grad_norm": 1.9774922231142698, - "learning_rate": 4.27401478359124e-07, - "loss": 1.001, - "step": 8808 - }, - { - "epoch": 0.7944266582495378, - "grad_norm": 1.3086878045286374, - "learning_rate": 4.2704059335380283e-07, - "loss": 1.0282, - "step": 8809 - }, - { - "epoch": 0.7945168417730081, - "grad_norm": 1.2458546089133347, - "learning_rate": 4.266798425620515e-07, - "loss": 1.0361, - "step": 8810 - }, - { - "epoch": 0.7946070252964783, - "grad_norm": 1.3926698031215918, - "learning_rate": 4.263192260146511e-07, - "loss": 0.919, - "step": 8811 - }, - { - "epoch": 0.7946972088199485, - "grad_norm": 1.5127251008304097, - "learning_rate": 4.2595874374237216e-07, - "loss": 0.9808, - "step": 8812 - }, - { - "epoch": 0.7947873923434189, - "grad_norm": 1.1756461943561929, - "learning_rate": 4.255983957759712e-07, - "loss": 0.9364, - "step": 8813 - }, - { - "epoch": 0.7948775758668891, - "grad_norm": 1.2311264460192348, - "learning_rate": 4.2523818214619745e-07, - "loss": 0.8764, - "step": 8814 - }, - { - "epoch": 0.7949677593903594, - "grad_norm": 1.3451195767933803, - "learning_rate": 4.24878102883784e-07, - "loss": 0.9848, - "step": 8815 - }, - { - "epoch": 0.7950579429138297, - "grad_norm": 1.865280518285588, - "learning_rate": 4.24518158019457e-07, - "loss": 0.9454, - "step": 8816 - }, - { - "epoch": 0.7951481264372999, - "grad_norm": 1.9492114722473417, - "learning_rate": 4.241583475839274e-07, - "loss": 0.8822, - "step": 8817 - }, - { - "epoch": 0.7952383099607702, - "grad_norm": 1.5477701544788816, - "learning_rate": 4.237986716078965e-07, - "loss": 0.9883, - "step": 8818 - }, - { - "epoch": 0.7953284934842404, - "grad_norm": 1.4921458772776, - "learning_rate": 4.2343913012205433e-07, - "loss": 0.9233, - "step": 8819 - }, - { - "epoch": 0.7954186770077107, - "grad_norm": 1.2409101912702876, - "learning_rate": 4.230797231570784e-07, - "loss": 0.9064, - "step": 8820 - }, - { - "epoch": 0.795508860531181, - "grad_norm": 1.742734404112568, - "learning_rate": 4.227204507436357e-07, - "loss": 0.9375, - "step": 8821 - }, - { - "epoch": 0.7955990440546512, - "grad_norm": 1.3996490259965908, - "learning_rate": 4.223613129123811e-07, - "loss": 0.8575, - "step": 8822 - }, - { - "epoch": 0.7956892275781214, - "grad_norm": 1.3136236739468194, - "learning_rate": 4.220023096939589e-07, - "loss": 0.8476, - "step": 8823 - }, - { - "epoch": 0.7957794111015918, - "grad_norm": 14.771923776396358, - "learning_rate": 4.21643441119e-07, - "loss": 0.9248, - "step": 8824 - }, - { - "epoch": 0.795869594625062, - "grad_norm": 0.708992193509963, - "learning_rate": 4.212847072181256e-07, - "loss": 0.8733, - "step": 8825 - }, - { - "epoch": 0.7959597781485322, - "grad_norm": 1.4914560172808162, - "learning_rate": 4.2092610802194505e-07, - "loss": 0.9082, - "step": 8826 - }, - { - "epoch": 0.7960499616720025, - "grad_norm": 1.2999117582890285, - "learning_rate": 4.2056764356105587e-07, - "loss": 0.8444, - "step": 8827 - }, - { - "epoch": 0.7961401451954728, - "grad_norm": 1.31398775157997, - "learning_rate": 4.202093138660443e-07, - "loss": 0.9813, - "step": 8828 - }, - { - "epoch": 0.7962303287189431, - "grad_norm": 1.2467478407647385, - "learning_rate": 4.198511189674854e-07, - "loss": 0.9642, - "step": 8829 - }, - { - "epoch": 0.7963205122424133, - "grad_norm": 1.4241307568845172, - "learning_rate": 4.1949305889594066e-07, - "loss": 0.952, - "step": 8830 - }, - { - "epoch": 0.7964106957658835, - "grad_norm": 1.3491420792539728, - "learning_rate": 4.191351336819642e-07, - "loss": 0.9307, - "step": 8831 - }, - { - "epoch": 0.7965008792893539, - "grad_norm": 1.638438484385135, - "learning_rate": 4.187773433560939e-07, - "loss": 0.8673, - "step": 8832 - }, - { - "epoch": 0.7965910628128241, - "grad_norm": 1.4222727627930012, - "learning_rate": 4.184196879488604e-07, - "loss": 0.9275, - "step": 8833 - }, - { - "epoch": 0.7966812463362943, - "grad_norm": 1.5435795987250212, - "learning_rate": 4.1806216749077936e-07, - "loss": 1.0004, - "step": 8834 - }, - { - "epoch": 0.7967714298597646, - "grad_norm": 1.2268351355132927, - "learning_rate": 4.177047820123569e-07, - "loss": 0.9779, - "step": 8835 - }, - { - "epoch": 0.7968616133832349, - "grad_norm": 1.4522625404377276, - "learning_rate": 4.1734753154408733e-07, - "loss": 0.8111, - "step": 8836 - }, - { - "epoch": 0.7969517969067051, - "grad_norm": 1.4813757058414654, - "learning_rate": 4.169904161164528e-07, - "loss": 0.9193, - "step": 8837 - }, - { - "epoch": 0.7970419804301754, - "grad_norm": 2.2974767610289906, - "learning_rate": 4.1663343575992526e-07, - "loss": 1.0484, - "step": 8838 - }, - { - "epoch": 0.7971321639536457, - "grad_norm": 1.8836166753841281, - "learning_rate": 4.1627659050496275e-07, - "loss": 0.9479, - "step": 8839 - }, - { - "epoch": 0.797222347477116, - "grad_norm": 1.3740588348318967, - "learning_rate": 4.1591988038201453e-07, - "loss": 0.8571, - "step": 8840 - }, - { - "epoch": 0.7973125310005862, - "grad_norm": 1.386891743789689, - "learning_rate": 4.155633054215164e-07, - "loss": 0.9823, - "step": 8841 - }, - { - "epoch": 0.7974027145240564, - "grad_norm": 1.5340189130181663, - "learning_rate": 4.152068656538934e-07, - "loss": 0.8465, - "step": 8842 - }, - { - "epoch": 0.7974928980475268, - "grad_norm": 1.3462356377475726, - "learning_rate": 4.148505611095594e-07, - "loss": 0.9987, - "step": 8843 - }, - { - "epoch": 0.797583081570997, - "grad_norm": 1.4462226270295915, - "learning_rate": 4.1449439181891563e-07, - "loss": 0.812, - "step": 8844 - }, - { - "epoch": 0.7976732650944672, - "grad_norm": 1.4738939179092911, - "learning_rate": 4.14138357812353e-07, - "loss": 0.9515, - "step": 8845 - }, - { - "epoch": 0.7977634486179375, - "grad_norm": 1.2405520535410277, - "learning_rate": 4.137824591202506e-07, - "loss": 0.9901, - "step": 8846 - }, - { - "epoch": 0.7978536321414078, - "grad_norm": 1.75372531017018, - "learning_rate": 4.134266957729737e-07, - "loss": 0.9236, - "step": 8847 - }, - { - "epoch": 0.797943815664878, - "grad_norm": 1.377599123661303, - "learning_rate": 4.1307106780088065e-07, - "loss": 1.0324, - "step": 8848 - }, - { - "epoch": 0.7980339991883483, - "grad_norm": 1.699403669445866, - "learning_rate": 4.1271557523431387e-07, - "loss": 0.9386, - "step": 8849 - }, - { - "epoch": 0.7981241827118185, - "grad_norm": 1.3905211849251733, - "learning_rate": 4.1236021810360634e-07, - "loss": 0.9233, - "step": 8850 - }, - { - "epoch": 0.7982143662352889, - "grad_norm": 1.9038216178783152, - "learning_rate": 4.120049964390793e-07, - "loss": 0.8815, - "step": 8851 - }, - { - "epoch": 0.7983045497587591, - "grad_norm": 1.660239901959568, - "learning_rate": 4.116499102710418e-07, - "loss": 0.8895, - "step": 8852 - }, - { - "epoch": 0.7983947332822293, - "grad_norm": 1.5816457019591583, - "learning_rate": 4.112949596297928e-07, - "loss": 0.9118, - "step": 8853 - }, - { - "epoch": 0.7984849168056996, - "grad_norm": 1.5048771090093087, - "learning_rate": 4.1094014454561664e-07, - "loss": 0.8799, - "step": 8854 - }, - { - "epoch": 0.7985751003291699, - "grad_norm": 1.6050458290711138, - "learning_rate": 4.1058546504879057e-07, - "loss": 0.8388, - "step": 8855 - }, - { - "epoch": 0.7986652838526401, - "grad_norm": 1.5670936959547903, - "learning_rate": 4.1023092116957583e-07, - "loss": 0.9936, - "step": 8856 - }, - { - "epoch": 0.7987554673761104, - "grad_norm": 1.2546537078829387, - "learning_rate": 4.098765129382249e-07, - "loss": 0.9763, - "step": 8857 - }, - { - "epoch": 0.7988456508995806, - "grad_norm": 1.5266963435430023, - "learning_rate": 4.0952224038497764e-07, - "loss": 1.0062, - "step": 8858 - }, - { - "epoch": 0.7989358344230509, - "grad_norm": 1.474737260338271, - "learning_rate": 4.091681035400627e-07, - "loss": 0.9241, - "step": 8859 - }, - { - "epoch": 0.7990260179465212, - "grad_norm": 1.4404453289669823, - "learning_rate": 4.088141024336971e-07, - "loss": 0.9636, - "step": 8860 - }, - { - "epoch": 0.7991162014699914, - "grad_norm": 1.2042892074712073, - "learning_rate": 4.0846023709608636e-07, - "loss": 1.0114, - "step": 8861 - }, - { - "epoch": 0.7992063849934617, - "grad_norm": 1.7248470708533614, - "learning_rate": 4.081065075574226e-07, - "loss": 1.0028, - "step": 8862 - }, - { - "epoch": 0.799296568516932, - "grad_norm": 1.3764602103532562, - "learning_rate": 4.077529138478906e-07, - "loss": 0.8647, - "step": 8863 - }, - { - "epoch": 0.7993867520404022, - "grad_norm": 0.7188267044933949, - "learning_rate": 4.073994559976588e-07, - "loss": 0.7715, - "step": 8864 - }, - { - "epoch": 0.7994769355638724, - "grad_norm": 1.362743369949048, - "learning_rate": 4.0704613403688716e-07, - "loss": 0.8993, - "step": 8865 - }, - { - "epoch": 0.7995671190873428, - "grad_norm": 1.2572429149228554, - "learning_rate": 4.0669294799572264e-07, - "loss": 0.9884, - "step": 8866 - }, - { - "epoch": 0.799657302610813, - "grad_norm": 1.4731124859271467, - "learning_rate": 4.0633989790430113e-07, - "loss": 1.0353, - "step": 8867 - }, - { - "epoch": 0.7997474861342833, - "grad_norm": 1.4498615330319722, - "learning_rate": 4.059869837927477e-07, - "loss": 0.9443, - "step": 8868 - }, - { - "epoch": 0.7998376696577535, - "grad_norm": 1.5432909665289936, - "learning_rate": 4.056342056911728e-07, - "loss": 0.9284, - "step": 8869 - }, - { - "epoch": 0.7999278531812238, - "grad_norm": 1.4654061420414521, - "learning_rate": 4.052815636296798e-07, - "loss": 0.9498, - "step": 8870 - }, - { - "epoch": 0.8000180367046941, - "grad_norm": 1.2135230241688817, - "learning_rate": 4.0492905763835593e-07, - "loss": 0.9642, - "step": 8871 - }, - { - "epoch": 0.8001082202281643, - "grad_norm": 1.1435989384351715, - "learning_rate": 4.0457668774728115e-07, - "loss": 0.968, - "step": 8872 - }, - { - "epoch": 0.8001984037516345, - "grad_norm": 1.410119645338406, - "learning_rate": 4.0422445398651985e-07, - "loss": 0.9567, - "step": 8873 - }, - { - "epoch": 0.8002885872751049, - "grad_norm": 1.4299327035554463, - "learning_rate": 4.0387235638612706e-07, - "loss": 0.8567, - "step": 8874 - }, - { - "epoch": 0.8003787707985751, - "grad_norm": 1.7699403942438359, - "learning_rate": 4.0352039497614586e-07, - "loss": 0.942, - "step": 8875 - }, - { - "epoch": 0.8004689543220453, - "grad_norm": 1.8890848112721348, - "learning_rate": 4.031685697866074e-07, - "loss": 0.9154, - "step": 8876 - }, - { - "epoch": 0.8005591378455156, - "grad_norm": 1.2967165654852715, - "learning_rate": 4.0281688084753165e-07, - "loss": 0.9748, - "step": 8877 - }, - { - "epoch": 0.8006493213689859, - "grad_norm": 1.3482234479941113, - "learning_rate": 4.0246532818892675e-07, - "loss": 0.9873, - "step": 8878 - }, - { - "epoch": 0.8007395048924562, - "grad_norm": 3.3562145891932147, - "learning_rate": 4.0211391184078814e-07, - "loss": 0.9736, - "step": 8879 - }, - { - "epoch": 0.8008296884159264, - "grad_norm": 1.397157776136089, - "learning_rate": 4.0176263183310135e-07, - "loss": 0.9645, - "step": 8880 - }, - { - "epoch": 0.8009198719393966, - "grad_norm": 1.263716683632486, - "learning_rate": 4.0141148819583925e-07, - "loss": 0.991, - "step": 8881 - }, - { - "epoch": 0.801010055462867, - "grad_norm": 0.635705715369659, - "learning_rate": 4.010604809589637e-07, - "loss": 0.8023, - "step": 8882 - }, - { - "epoch": 0.8011002389863372, - "grad_norm": 1.5050534319343267, - "learning_rate": 4.0070961015242475e-07, - "loss": 0.9489, - "step": 8883 - }, - { - "epoch": 0.8011904225098074, - "grad_norm": 1.2665753505232769, - "learning_rate": 4.0035887580615933e-07, - "loss": 1.0098, - "step": 8884 - }, - { - "epoch": 0.8012806060332777, - "grad_norm": 1.378174411986424, - "learning_rate": 4.0000827795009594e-07, - "loss": 0.9915, - "step": 8885 - }, - { - "epoch": 0.801370789556748, - "grad_norm": 1.6871581967658833, - "learning_rate": 3.996578166141475e-07, - "loss": 0.8992, - "step": 8886 - }, - { - "epoch": 0.8014609730802182, - "grad_norm": 1.7091997632127165, - "learning_rate": 3.9930749182821955e-07, - "loss": 0.9438, - "step": 8887 - }, - { - "epoch": 0.8015511566036885, - "grad_norm": 0.6344158676212914, - "learning_rate": 3.9895730362220116e-07, - "loss": 0.8068, - "step": 8888 - }, - { - "epoch": 0.8016413401271588, - "grad_norm": 1.7177959915597587, - "learning_rate": 3.986072520259749e-07, - "loss": 0.9747, - "step": 8889 - }, - { - "epoch": 0.801731523650629, - "grad_norm": 1.420116385002051, - "learning_rate": 3.9825733706940736e-07, - "loss": 0.9253, - "step": 8890 - }, - { - "epoch": 0.8018217071740993, - "grad_norm": 1.7761001428130159, - "learning_rate": 3.979075587823557e-07, - "loss": 0.9532, - "step": 8891 - }, - { - "epoch": 0.8019118906975695, - "grad_norm": 1.687617757008785, - "learning_rate": 3.9755791719466504e-07, - "loss": 0.8187, - "step": 8892 - }, - { - "epoch": 0.8020020742210399, - "grad_norm": 2.096833421093099, - "learning_rate": 3.9720841233616875e-07, - "loss": 0.8932, - "step": 8893 - }, - { - "epoch": 0.8020922577445101, - "grad_norm": 1.541628871488802, - "learning_rate": 3.968590442366888e-07, - "loss": 0.9127, - "step": 8894 - }, - { - "epoch": 0.8021824412679803, - "grad_norm": 1.458361389253586, - "learning_rate": 3.9650981292603423e-07, - "loss": 0.9185, - "step": 8895 - }, - { - "epoch": 0.8022726247914506, - "grad_norm": 1.449147019577581, - "learning_rate": 3.961607184340041e-07, - "loss": 0.9102, - "step": 8896 - }, - { - "epoch": 0.8023628083149209, - "grad_norm": 1.3661668291804236, - "learning_rate": 3.9581176079038505e-07, - "loss": 0.8334, - "step": 8897 - }, - { - "epoch": 0.8024529918383911, - "grad_norm": 2.1951940104046956, - "learning_rate": 3.954629400249516e-07, - "loss": 0.8912, - "step": 8898 - }, - { - "epoch": 0.8025431753618614, - "grad_norm": 1.543158642636587, - "learning_rate": 3.9511425616746787e-07, - "loss": 1.0045, - "step": 8899 - }, - { - "epoch": 0.8026333588853316, - "grad_norm": 1.5332113666677372, - "learning_rate": 3.947657092476853e-07, - "loss": 0.9962, - "step": 8900 - }, - { - "epoch": 0.802723542408802, - "grad_norm": 1.343851440504196, - "learning_rate": 3.944172992953425e-07, - "loss": 0.9526, - "step": 8901 - }, - { - "epoch": 0.8028137259322722, - "grad_norm": 1.4247959860940604, - "learning_rate": 3.9406902634017e-07, - "loss": 0.8906, - "step": 8902 - }, - { - "epoch": 0.8029039094557424, - "grad_norm": 1.35197830419231, - "learning_rate": 3.9372089041188227e-07, - "loss": 0.9764, - "step": 8903 - }, - { - "epoch": 0.8029940929792126, - "grad_norm": 1.5176122163259775, - "learning_rate": 3.9337289154018593e-07, - "loss": 0.8564, - "step": 8904 - }, - { - "epoch": 0.803084276502683, - "grad_norm": 1.5523570627948118, - "learning_rate": 3.930250297547728e-07, - "loss": 0.9415, - "step": 8905 - }, - { - "epoch": 0.8031744600261532, - "grad_norm": 1.7327367932883122, - "learning_rate": 3.9267730508532513e-07, - "loss": 0.8105, - "step": 8906 - }, - { - "epoch": 0.8032646435496235, - "grad_norm": 1.3739571518081932, - "learning_rate": 3.923297175615121e-07, - "loss": 0.9283, - "step": 8907 - }, - { - "epoch": 0.8033548270730937, - "grad_norm": 1.3058903340450556, - "learning_rate": 3.9198226721299243e-07, - "loss": 0.9819, - "step": 8908 - }, - { - "epoch": 0.803445010596564, - "grad_norm": 1.5770327121097034, - "learning_rate": 3.916349540694128e-07, - "loss": 0.9111, - "step": 8909 - }, - { - "epoch": 0.8035351941200343, - "grad_norm": 1.6051057651170262, - "learning_rate": 3.912877781604063e-07, - "loss": 0.9379, - "step": 8910 - }, - { - "epoch": 0.8036253776435045, - "grad_norm": 1.4830799275657607, - "learning_rate": 3.909407395155977e-07, - "loss": 0.9738, - "step": 8911 - }, - { - "epoch": 0.8037155611669748, - "grad_norm": 1.5429955588800348, - "learning_rate": 3.9059383816459725e-07, - "loss": 0.8801, - "step": 8912 - }, - { - "epoch": 0.8038057446904451, - "grad_norm": 3.092082797798659, - "learning_rate": 3.902470741370045e-07, - "loss": 0.9934, - "step": 8913 - }, - { - "epoch": 0.8038959282139153, - "grad_norm": 1.2729866306205209, - "learning_rate": 3.8990044746240746e-07, - "loss": 0.9284, - "step": 8914 - }, - { - "epoch": 0.8039861117373855, - "grad_norm": 1.7313509860004856, - "learning_rate": 3.8955395817038237e-07, - "loss": 1.0392, - "step": 8915 - }, - { - "epoch": 0.8040762952608559, - "grad_norm": 1.5485456288420154, - "learning_rate": 3.892076062904934e-07, - "loss": 0.9098, - "step": 8916 - }, - { - "epoch": 0.8041664787843261, - "grad_norm": 1.5994073276748317, - "learning_rate": 3.8886139185229384e-07, - "loss": 0.9026, - "step": 8917 - }, - { - "epoch": 0.8042566623077964, - "grad_norm": 1.4378778541534625, - "learning_rate": 3.8851531488532284e-07, - "loss": 0.8515, - "step": 8918 - }, - { - "epoch": 0.8043468458312666, - "grad_norm": 1.336239563104535, - "learning_rate": 3.88169375419112e-07, - "loss": 0.9602, - "step": 8919 - }, - { - "epoch": 0.8044370293547369, - "grad_norm": 1.9232194121464872, - "learning_rate": 3.8782357348317717e-07, - "loss": 0.9156, - "step": 8920 - }, - { - "epoch": 0.8045272128782072, - "grad_norm": 1.7563076104866135, - "learning_rate": 3.8747790910702437e-07, - "loss": 0.9873, - "step": 8921 - }, - { - "epoch": 0.8046173964016774, - "grad_norm": 1.3769344247400381, - "learning_rate": 3.8713238232014776e-07, - "loss": 0.9415, - "step": 8922 - }, - { - "epoch": 0.8047075799251476, - "grad_norm": 1.792516713722868, - "learning_rate": 3.867869931520296e-07, - "loss": 0.9955, - "step": 8923 - }, - { - "epoch": 0.804797763448618, - "grad_norm": 1.4536954518889613, - "learning_rate": 3.864417416321406e-07, - "loss": 0.9531, - "step": 8924 - }, - { - "epoch": 0.8048879469720882, - "grad_norm": 1.456884054567404, - "learning_rate": 3.8609662778993847e-07, - "loss": 0.9188, - "step": 8925 - }, - { - "epoch": 0.8049781304955584, - "grad_norm": 0.6081963168175392, - "learning_rate": 3.85751651654872e-07, - "loss": 0.7721, - "step": 8926 - }, - { - "epoch": 0.8050683140190287, - "grad_norm": 1.6941572199512906, - "learning_rate": 3.8540681325637505e-07, - "loss": 1.0498, - "step": 8927 - }, - { - "epoch": 0.805158497542499, - "grad_norm": 1.3597254684993103, - "learning_rate": 3.8506211262387155e-07, - "loss": 0.9498, - "step": 8928 - }, - { - "epoch": 0.8052486810659693, - "grad_norm": 1.588114705058016, - "learning_rate": 3.847175497867732e-07, - "loss": 0.9476, - "step": 8929 - }, - { - "epoch": 0.8053388645894395, - "grad_norm": 1.659668830676078, - "learning_rate": 3.843731247744801e-07, - "loss": 0.8871, - "step": 8930 - }, - { - "epoch": 0.8054290481129097, - "grad_norm": 1.4472760857784484, - "learning_rate": 3.8402883761638047e-07, - "loss": 0.9836, - "step": 8931 - }, - { - "epoch": 0.8055192316363801, - "grad_norm": 0.720374303719116, - "learning_rate": 3.8368468834185076e-07, - "loss": 0.875, - "step": 8932 - }, - { - "epoch": 0.8056094151598503, - "grad_norm": 3.6380062262265285, - "learning_rate": 3.8334067698025583e-07, - "loss": 0.9803, - "step": 8933 - }, - { - "epoch": 0.8056995986833205, - "grad_norm": 1.8496604195372022, - "learning_rate": 3.8299680356094897e-07, - "loss": 0.8836, - "step": 8934 - }, - { - "epoch": 0.8057897822067909, - "grad_norm": 1.986583832042224, - "learning_rate": 3.8265306811327024e-07, - "loss": 0.8323, - "step": 8935 - }, - { - "epoch": 0.8058799657302611, - "grad_norm": 1.5415499187631174, - "learning_rate": 3.8230947066654994e-07, - "loss": 0.908, - "step": 8936 - }, - { - "epoch": 0.8059701492537313, - "grad_norm": 1.3549091602154026, - "learning_rate": 3.819660112501053e-07, - "loss": 0.9985, - "step": 8937 - }, - { - "epoch": 0.8060603327772016, - "grad_norm": 0.6345367958185326, - "learning_rate": 3.816226898932422e-07, - "loss": 0.8291, - "step": 8938 - }, - { - "epoch": 0.8061505163006719, - "grad_norm": 1.275708426215068, - "learning_rate": 3.812795066252557e-07, - "loss": 0.9323, - "step": 8939 - }, - { - "epoch": 0.8062406998241421, - "grad_norm": 2.433460703438258, - "learning_rate": 3.8093646147542577e-07, - "loss": 0.9569, - "step": 8940 - }, - { - "epoch": 0.8063308833476124, - "grad_norm": 1.3517036584777382, - "learning_rate": 3.805935544730259e-07, - "loss": 0.9084, - "step": 8941 - }, - { - "epoch": 0.8064210668710826, - "grad_norm": 1.249000053992167, - "learning_rate": 3.802507856473118e-07, - "loss": 0.8763, - "step": 8942 - }, - { - "epoch": 0.806511250394553, - "grad_norm": 2.1178224181671506, - "learning_rate": 3.7990815502753317e-07, - "loss": 0.9378, - "step": 8943 - }, - { - "epoch": 0.8066014339180232, - "grad_norm": 1.2383803078293267, - "learning_rate": 3.795656626429231e-07, - "loss": 0.9422, - "step": 8944 - }, - { - "epoch": 0.8066916174414934, - "grad_norm": 1.6732911146519511, - "learning_rate": 3.792233085227059e-07, - "loss": 0.8632, - "step": 8945 - }, - { - "epoch": 0.8067818009649637, - "grad_norm": 1.6652093078745334, - "learning_rate": 3.788810926960928e-07, - "loss": 0.9501, - "step": 8946 - }, - { - "epoch": 0.806871984488434, - "grad_norm": 1.4855386288915806, - "learning_rate": 3.785390151922836e-07, - "loss": 0.9275, - "step": 8947 - }, - { - "epoch": 0.8069621680119042, - "grad_norm": 1.6281343456153845, - "learning_rate": 3.781970760404665e-07, - "loss": 0.9186, - "step": 8948 - }, - { - "epoch": 0.8070523515353745, - "grad_norm": 1.5938904644270737, - "learning_rate": 3.778552752698176e-07, - "loss": 1.0099, - "step": 8949 - }, - { - "epoch": 0.8071425350588447, - "grad_norm": 1.8407675980930371, - "learning_rate": 3.775136129095007e-07, - "loss": 0.8511, - "step": 8950 - }, - { - "epoch": 0.807232718582315, - "grad_norm": 1.4135743452477274, - "learning_rate": 3.771720889886685e-07, - "loss": 0.9125, - "step": 8951 - }, - { - "epoch": 0.8073229021057853, - "grad_norm": 0.7818883195530805, - "learning_rate": 3.7683070353646194e-07, - "loss": 0.8487, - "step": 8952 - }, - { - "epoch": 0.8074130856292555, - "grad_norm": 1.484903422211043, - "learning_rate": 3.7648945658200983e-07, - "loss": 0.9019, - "step": 8953 - }, - { - "epoch": 0.8075032691527257, - "grad_norm": 1.3944808138747886, - "learning_rate": 3.761483481544292e-07, - "loss": 0.8258, - "step": 8954 - }, - { - "epoch": 0.8075934526761961, - "grad_norm": 1.4817894351732435, - "learning_rate": 3.7580737828282525e-07, - "loss": 0.9114, - "step": 8955 - }, - { - "epoch": 0.8076836361996663, - "grad_norm": 1.3141128983965022, - "learning_rate": 3.754665469962921e-07, - "loss": 0.9207, - "step": 8956 - }, - { - "epoch": 0.8077738197231366, - "grad_norm": 1.3807101459073203, - "learning_rate": 3.7512585432390973e-07, - "loss": 0.898, - "step": 8957 - }, - { - "epoch": 0.8078640032466069, - "grad_norm": 1.3758537936215205, - "learning_rate": 3.7478530029474987e-07, - "loss": 0.8743, - "step": 8958 - }, - { - "epoch": 0.8079541867700771, - "grad_norm": 0.638537401513878, - "learning_rate": 3.7444488493786854e-07, - "loss": 0.8418, - "step": 8959 - }, - { - "epoch": 0.8080443702935474, - "grad_norm": 1.5734897941983257, - "learning_rate": 3.7410460828231405e-07, - "loss": 0.9963, - "step": 8960 - }, - { - "epoch": 0.8081345538170176, - "grad_norm": 1.2214790502712132, - "learning_rate": 3.737644703571188e-07, - "loss": 0.8697, - "step": 8961 - }, - { - "epoch": 0.8082247373404879, - "grad_norm": 2.025682419125238, - "learning_rate": 3.734244711913059e-07, - "loss": 1.0139, - "step": 8962 - }, - { - "epoch": 0.8083149208639582, - "grad_norm": 1.402880425154546, - "learning_rate": 3.7308461081388584e-07, - "loss": 0.9082, - "step": 8963 - }, - { - "epoch": 0.8084051043874284, - "grad_norm": 1.444066433268649, - "learning_rate": 3.727448892538576e-07, - "loss": 0.8718, - "step": 8964 - }, - { - "epoch": 0.8084952879108986, - "grad_norm": 1.3581416794463235, - "learning_rate": 3.724053065402086e-07, - "loss": 1.0201, - "step": 8965 - }, - { - "epoch": 0.808585471434369, - "grad_norm": 1.2230160217389783, - "learning_rate": 3.7206586270191285e-07, - "loss": 0.9066, - "step": 8966 - }, - { - "epoch": 0.8086756549578392, - "grad_norm": 1.3671306489296025, - "learning_rate": 3.7172655776793385e-07, - "loss": 0.9863, - "step": 8967 - }, - { - "epoch": 0.8087658384813095, - "grad_norm": 1.3850262266216733, - "learning_rate": 3.7138739176722323e-07, - "loss": 0.8293, - "step": 8968 - }, - { - "epoch": 0.8088560220047797, - "grad_norm": 1.5997445558245793, - "learning_rate": 3.710483647287206e-07, - "loss": 0.9108, - "step": 8969 - }, - { - "epoch": 0.80894620552825, - "grad_norm": 2.4533190802319074, - "learning_rate": 3.707094766813532e-07, - "loss": 0.8385, - "step": 8970 - }, - { - "epoch": 0.8090363890517203, - "grad_norm": 1.5097443849320527, - "learning_rate": 3.7037072765403754e-07, - "loss": 0.905, - "step": 8971 - }, - { - "epoch": 0.8091265725751905, - "grad_norm": 1.9770977516841082, - "learning_rate": 3.700321176756762e-07, - "loss": 0.8969, - "step": 8972 - }, - { - "epoch": 0.8092167560986607, - "grad_norm": 2.052460947164703, - "learning_rate": 3.69693646775163e-07, - "loss": 0.9889, - "step": 8973 - }, - { - "epoch": 0.8093069396221311, - "grad_norm": 1.6032867082384508, - "learning_rate": 3.693553149813764e-07, - "loss": 0.9166, - "step": 8974 - }, - { - "epoch": 0.8093971231456013, - "grad_norm": 0.5915529107043562, - "learning_rate": 3.690171223231866e-07, - "loss": 0.7686, - "step": 8975 - }, - { - "epoch": 0.8094873066690715, - "grad_norm": 1.2591481196116612, - "learning_rate": 3.6867906882944854e-07, - "loss": 1.0316, - "step": 8976 - }, - { - "epoch": 0.8095774901925418, - "grad_norm": 1.3684530449903585, - "learning_rate": 3.6834115452900737e-07, - "loss": 0.9396, - "step": 8977 - }, - { - "epoch": 0.8096676737160121, - "grad_norm": 1.4195105207375172, - "learning_rate": 3.680033794506958e-07, - "loss": 0.8388, - "step": 8978 - }, - { - "epoch": 0.8097578572394823, - "grad_norm": 1.6733300837957636, - "learning_rate": 3.676657436233346e-07, - "loss": 0.7749, - "step": 8979 - }, - { - "epoch": 0.8098480407629526, - "grad_norm": 1.5082548609319655, - "learning_rate": 3.6732824707573305e-07, - "loss": 0.9328, - "step": 8980 - }, - { - "epoch": 0.8099382242864229, - "grad_norm": 1.468401522588395, - "learning_rate": 3.6699088983668716e-07, - "loss": 0.9673, - "step": 8981 - }, - { - "epoch": 0.8100284078098932, - "grad_norm": 1.907209498831597, - "learning_rate": 3.6665367193498376e-07, - "loss": 0.8854, - "step": 8982 - }, - { - "epoch": 0.8101185913333634, - "grad_norm": 2.7008061688956095, - "learning_rate": 3.663165933993948e-07, - "loss": 0.9401, - "step": 8983 - }, - { - "epoch": 0.8102087748568336, - "grad_norm": 1.7218739642851495, - "learning_rate": 3.659796542586822e-07, - "loss": 0.9601, - "step": 8984 - }, - { - "epoch": 0.810298958380304, - "grad_norm": 1.7158151712270013, - "learning_rate": 3.6564285454159526e-07, - "loss": 0.895, - "step": 8985 - }, - { - "epoch": 0.8103891419037742, - "grad_norm": 1.308484200973666, - "learning_rate": 3.653061942768718e-07, - "loss": 0.9743, - "step": 8986 - }, - { - "epoch": 0.8104793254272444, - "grad_norm": 1.5536312176328855, - "learning_rate": 3.649696734932375e-07, - "loss": 0.8998, - "step": 8987 - }, - { - "epoch": 0.8105695089507147, - "grad_norm": 1.4686327948373539, - "learning_rate": 3.646332922194064e-07, - "loss": 0.8664, - "step": 8988 - }, - { - "epoch": 0.810659692474185, - "grad_norm": 1.424726623948493, - "learning_rate": 3.6429705048407943e-07, - "loss": 0.9235, - "step": 8989 - }, - { - "epoch": 0.8107498759976552, - "grad_norm": 1.725278834598526, - "learning_rate": 3.6396094831594804e-07, - "loss": 0.9295, - "step": 8990 - }, - { - "epoch": 0.8108400595211255, - "grad_norm": 1.6160303666122702, - "learning_rate": 3.6362498574368926e-07, - "loss": 0.8913, - "step": 8991 - }, - { - "epoch": 0.8109302430445957, - "grad_norm": 0.7234627012802649, - "learning_rate": 3.6328916279596935e-07, - "loss": 0.856, - "step": 8992 - }, - { - "epoch": 0.811020426568066, - "grad_norm": 1.502142409363514, - "learning_rate": 3.6295347950144305e-07, - "loss": 0.9825, - "step": 8993 - }, - { - "epoch": 0.8111106100915363, - "grad_norm": 1.4003446137811528, - "learning_rate": 3.626179358887522e-07, - "loss": 0.7943, - "step": 8994 - }, - { - "epoch": 0.8112007936150065, - "grad_norm": 1.6220850108455067, - "learning_rate": 3.6228253198652816e-07, - "loss": 0.9966, - "step": 8995 - }, - { - "epoch": 0.8112909771384768, - "grad_norm": 1.621565196413102, - "learning_rate": 3.6194726782338767e-07, - "loss": 0.8838, - "step": 8996 - }, - { - "epoch": 0.8113811606619471, - "grad_norm": 1.5703221410958312, - "learning_rate": 3.6161214342793953e-07, - "loss": 0.9059, - "step": 8997 - }, - { - "epoch": 0.8114713441854173, - "grad_norm": 1.5084979619058883, - "learning_rate": 3.612771588287764e-07, - "loss": 0.9661, - "step": 8998 - }, - { - "epoch": 0.8115615277088876, - "grad_norm": 1.998066563663075, - "learning_rate": 3.609423140544827e-07, - "loss": 0.995, - "step": 8999 - }, - { - "epoch": 0.8116517112323578, - "grad_norm": 1.7143915575140896, - "learning_rate": 3.6060760913362787e-07, - "loss": 0.9539, - "step": 9000 - }, - { - "epoch": 0.8117418947558281, - "grad_norm": 1.3236634657171147, - "learning_rate": 3.6027304409477146e-07, - "loss": 0.9384, - "step": 9001 - }, - { - "epoch": 0.8118320782792984, - "grad_norm": 1.2968601088071086, - "learning_rate": 3.599386189664604e-07, - "loss": 0.9094, - "step": 9002 - }, - { - "epoch": 0.8119222618027686, - "grad_norm": 1.6713342950942676, - "learning_rate": 3.5960433377722945e-07, - "loss": 0.9269, - "step": 9003 - }, - { - "epoch": 0.8120124453262388, - "grad_norm": 1.416372530452187, - "learning_rate": 3.5927018855560174e-07, - "loss": 0.9823, - "step": 9004 - }, - { - "epoch": 0.8121026288497092, - "grad_norm": 1.316961878665821, - "learning_rate": 3.5893618333008904e-07, - "loss": 0.933, - "step": 9005 - }, - { - "epoch": 0.8121928123731794, - "grad_norm": 1.5936236611947934, - "learning_rate": 3.586023181291893e-07, - "loss": 1.0252, - "step": 9006 - }, - { - "epoch": 0.8122829958966497, - "grad_norm": 0.7400894627104847, - "learning_rate": 3.5826859298139044e-07, - "loss": 0.8567, - "step": 9007 - }, - { - "epoch": 0.81237317942012, - "grad_norm": 1.5553498510559247, - "learning_rate": 3.5793500791516773e-07, - "loss": 0.9235, - "step": 9008 - }, - { - "epoch": 0.8124633629435902, - "grad_norm": 1.34234763312619, - "learning_rate": 3.5760156295898415e-07, - "loss": 0.9056, - "step": 9009 - }, - { - "epoch": 0.8125535464670605, - "grad_norm": 1.3139170308736299, - "learning_rate": 3.5726825814129203e-07, - "loss": 0.9394, - "step": 9010 - }, - { - "epoch": 0.8126437299905307, - "grad_norm": 0.7678625535932748, - "learning_rate": 3.5693509349052886e-07, - "loss": 0.8565, - "step": 9011 - }, - { - "epoch": 0.812733913514001, - "grad_norm": 1.3225519723901804, - "learning_rate": 3.5660206903512433e-07, - "loss": 1.0063, - "step": 9012 - }, - { - "epoch": 0.8128240970374713, - "grad_norm": 1.5942910715800298, - "learning_rate": 3.56269184803492e-07, - "loss": 0.9864, - "step": 9013 - }, - { - "epoch": 0.8129142805609415, - "grad_norm": 1.1837027219456964, - "learning_rate": 3.5593644082403727e-07, - "loss": 0.956, - "step": 9014 - }, - { - "epoch": 0.8130044640844117, - "grad_norm": 1.5725573535642583, - "learning_rate": 3.5560383712514994e-07, - "loss": 1.0448, - "step": 9015 - }, - { - "epoch": 0.8130946476078821, - "grad_norm": 1.532834926056201, - "learning_rate": 3.5527137373521066e-07, - "loss": 0.9398, - "step": 9016 - }, - { - "epoch": 0.8131848311313523, - "grad_norm": 1.379082817033436, - "learning_rate": 3.5493905068258645e-07, - "loss": 0.8964, - "step": 9017 - }, - { - "epoch": 0.8132750146548225, - "grad_norm": 1.3414806009881222, - "learning_rate": 3.546068679956333e-07, - "loss": 0.9533, - "step": 9018 - }, - { - "epoch": 0.8133651981782928, - "grad_norm": 1.2989249057134435, - "learning_rate": 3.5427482570269487e-07, - "loss": 1.0001, - "step": 9019 - }, - { - "epoch": 0.8134553817017631, - "grad_norm": 3.0932115799137945, - "learning_rate": 3.539429238321026e-07, - "loss": 0.7806, - "step": 9020 - }, - { - "epoch": 0.8135455652252334, - "grad_norm": 1.5533655575178023, - "learning_rate": 3.536111624121769e-07, - "loss": 0.7902, - "step": 9021 - }, - { - "epoch": 0.8136357487487036, - "grad_norm": 1.4373576881619488, - "learning_rate": 3.532795414712244e-07, - "loss": 0.894, - "step": 9022 - }, - { - "epoch": 0.8137259322721738, - "grad_norm": 1.5010344594040743, - "learning_rate": 3.5294806103754124e-07, - "loss": 0.9545, - "step": 9023 - }, - { - "epoch": 0.8138161157956442, - "grad_norm": 1.5500708809919768, - "learning_rate": 3.526167211394115e-07, - "loss": 0.9487, - "step": 9024 - }, - { - "epoch": 0.8139062993191144, - "grad_norm": 1.3439671984058725, - "learning_rate": 3.522855218051066e-07, - "loss": 0.8689, - "step": 9025 - }, - { - "epoch": 0.8139964828425846, - "grad_norm": 1.5601939159081528, - "learning_rate": 3.5195446306288633e-07, - "loss": 0.9379, - "step": 9026 - }, - { - "epoch": 0.8140866663660549, - "grad_norm": 1.196147552217035, - "learning_rate": 3.51623544940999e-07, - "loss": 0.8848, - "step": 9027 - }, - { - "epoch": 0.8141768498895252, - "grad_norm": 1.427118212791088, - "learning_rate": 3.5129276746767886e-07, - "loss": 0.9183, - "step": 9028 - }, - { - "epoch": 0.8142670334129954, - "grad_norm": 1.4126461797371543, - "learning_rate": 3.5096213067115165e-07, - "loss": 0.908, - "step": 9029 - }, - { - "epoch": 0.8143572169364657, - "grad_norm": 1.7387432727368204, - "learning_rate": 3.506316345796272e-07, - "loss": 0.952, - "step": 9030 - }, - { - "epoch": 0.814447400459936, - "grad_norm": 0.6224462310518795, - "learning_rate": 3.5030127922130714e-07, - "loss": 0.7475, - "step": 9031 - }, - { - "epoch": 0.8145375839834063, - "grad_norm": 1.3508160244120855, - "learning_rate": 3.4997106462437784e-07, - "loss": 0.8306, - "step": 9032 - }, - { - "epoch": 0.8146277675068765, - "grad_norm": 0.6841943336898932, - "learning_rate": 3.496409908170157e-07, - "loss": 0.7814, - "step": 9033 - }, - { - "epoch": 0.8147179510303467, - "grad_norm": 1.6230026587971587, - "learning_rate": 3.493110578273839e-07, - "loss": 0.9132, - "step": 9034 - }, - { - "epoch": 0.8148081345538171, - "grad_norm": 1.5062017187714283, - "learning_rate": 3.489812656836346e-07, - "loss": 0.9977, - "step": 9035 - }, - { - "epoch": 0.8148983180772873, - "grad_norm": 1.3077963605862475, - "learning_rate": 3.486516144139078e-07, - "loss": 0.8587, - "step": 9036 - }, - { - "epoch": 0.8149885016007575, - "grad_norm": 1.6487897885187284, - "learning_rate": 3.4832210404632957e-07, - "loss": 0.899, - "step": 9037 - }, - { - "epoch": 0.8150786851242278, - "grad_norm": 1.3661766020664212, - "learning_rate": 3.479927346090179e-07, - "loss": 0.9422, - "step": 9038 - }, - { - "epoch": 0.8151688686476981, - "grad_norm": 2.001348517695563, - "learning_rate": 3.4766350613007455e-07, - "loss": 0.9204, - "step": 9039 - }, - { - "epoch": 0.8152590521711683, - "grad_norm": 1.4477009601142674, - "learning_rate": 3.4733441863759173e-07, - "loss": 0.9218, - "step": 9040 - }, - { - "epoch": 0.8153492356946386, - "grad_norm": 1.4204917299432038, - "learning_rate": 3.4700547215964916e-07, - "loss": 0.8269, - "step": 9041 - }, - { - "epoch": 0.8154394192181088, - "grad_norm": 1.3933650250610408, - "learning_rate": 3.46676666724314e-07, - "loss": 0.9104, - "step": 9042 - }, - { - "epoch": 0.8155296027415792, - "grad_norm": 1.660686237783944, - "learning_rate": 3.463480023596421e-07, - "loss": 0.9312, - "step": 9043 - }, - { - "epoch": 0.8156197862650494, - "grad_norm": 1.6768539303406038, - "learning_rate": 3.460194790936772e-07, - "loss": 1.0083, - "step": 9044 - }, - { - "epoch": 0.8157099697885196, - "grad_norm": 1.2802092814720043, - "learning_rate": 3.456910969544495e-07, - "loss": 0.9269, - "step": 9045 - }, - { - "epoch": 0.8158001533119899, - "grad_norm": 2.2634696442237225, - "learning_rate": 3.4536285596997994e-07, - "loss": 0.9638, - "step": 9046 - }, - { - "epoch": 0.8158903368354602, - "grad_norm": 1.7546224852123584, - "learning_rate": 3.450347561682747e-07, - "loss": 0.9038, - "step": 9047 - }, - { - "epoch": 0.8159805203589304, - "grad_norm": 1.8312945155438907, - "learning_rate": 3.4470679757732945e-07, - "loss": 0.8824, - "step": 9048 - }, - { - "epoch": 0.8160707038824007, - "grad_norm": 1.4303365599442444, - "learning_rate": 3.4437898022512735e-07, - "loss": 1.0253, - "step": 9049 - }, - { - "epoch": 0.8161608874058709, - "grad_norm": 1.2960796445110043, - "learning_rate": 3.4405130413963977e-07, - "loss": 0.9393, - "step": 9050 - }, - { - "epoch": 0.8162510709293412, - "grad_norm": 1.1940778451875476, - "learning_rate": 3.437237693488262e-07, - "loss": 1.0027, - "step": 9051 - }, - { - "epoch": 0.8163412544528115, - "grad_norm": 1.3529400723360148, - "learning_rate": 3.433963758806322e-07, - "loss": 0.949, - "step": 9052 - }, - { - "epoch": 0.8164314379762817, - "grad_norm": 1.4177011284608905, - "learning_rate": 3.430691237629948e-07, - "loss": 0.7976, - "step": 9053 - }, - { - "epoch": 0.816521621499752, - "grad_norm": 1.4989959217242546, - "learning_rate": 3.427420130238354e-07, - "loss": 0.8834, - "step": 9054 - }, - { - "epoch": 0.8166118050232223, - "grad_norm": 1.2542334869343983, - "learning_rate": 3.424150436910658e-07, - "loss": 1.0178, - "step": 9055 - }, - { - "epoch": 0.8167019885466925, - "grad_norm": 1.756493781604613, - "learning_rate": 3.420882157925842e-07, - "loss": 0.9935, - "step": 9056 - }, - { - "epoch": 0.8167921720701627, - "grad_norm": 1.5857528804564653, - "learning_rate": 3.417615293562777e-07, - "loss": 0.9186, - "step": 9057 - }, - { - "epoch": 0.8168823555936331, - "grad_norm": 1.3371685669145454, - "learning_rate": 3.4143498441002105e-07, - "loss": 0.8524, - "step": 9058 - }, - { - "epoch": 0.8169725391171033, - "grad_norm": 1.3165902934478464, - "learning_rate": 3.411085809816767e-07, - "loss": 0.9431, - "step": 9059 - }, - { - "epoch": 0.8170627226405736, - "grad_norm": 1.5496163016362319, - "learning_rate": 3.407823190990953e-07, - "loss": 1.0545, - "step": 9060 - }, - { - "epoch": 0.8171529061640438, - "grad_norm": 1.5261943423958524, - "learning_rate": 3.4045619879011577e-07, - "loss": 1.0138, - "step": 9061 - }, - { - "epoch": 0.8172430896875141, - "grad_norm": 1.560234105321834, - "learning_rate": 3.4013022008256334e-07, - "loss": 0.8609, - "step": 9062 - }, - { - "epoch": 0.8173332732109844, - "grad_norm": 1.4999667799767886, - "learning_rate": 3.398043830042532e-07, - "loss": 0.8727, - "step": 9063 - }, - { - "epoch": 0.8174234567344546, - "grad_norm": 1.5230746106002018, - "learning_rate": 3.394786875829871e-07, - "loss": 0.9345, - "step": 9064 - }, - { - "epoch": 0.8175136402579248, - "grad_norm": 1.5191821013458395, - "learning_rate": 3.3915313384655564e-07, - "loss": 0.8228, - "step": 9065 - }, - { - "epoch": 0.8176038237813952, - "grad_norm": 1.237836499172929, - "learning_rate": 3.388277218227369e-07, - "loss": 0.948, - "step": 9066 - }, - { - "epoch": 0.8176940073048654, - "grad_norm": 1.8822343500555536, - "learning_rate": 3.3850245153929557e-07, - "loss": 0.9587, - "step": 9067 - }, - { - "epoch": 0.8177841908283356, - "grad_norm": 1.435443069960766, - "learning_rate": 3.381773230239875e-07, - "loss": 0.8901, - "step": 9068 - }, - { - "epoch": 0.8178743743518059, - "grad_norm": 1.5177774777062267, - "learning_rate": 3.3785233630455247e-07, - "loss": 0.9674, - "step": 9069 - }, - { - "epoch": 0.8179645578752762, - "grad_norm": 1.576977454208767, - "learning_rate": 3.375274914087221e-07, - "loss": 0.9498, - "step": 9070 - }, - { - "epoch": 0.8180547413987465, - "grad_norm": 1.328982715772719, - "learning_rate": 3.3720278836421234e-07, - "loss": 0.9098, - "step": 9071 - }, - { - "epoch": 0.8181449249222167, - "grad_norm": 1.6417514385404712, - "learning_rate": 3.368782271987294e-07, - "loss": 0.9607, - "step": 9072 - }, - { - "epoch": 0.8182351084456869, - "grad_norm": 1.5005172791240864, - "learning_rate": 3.3655380793996636e-07, - "loss": 0.945, - "step": 9073 - }, - { - "epoch": 0.8183252919691573, - "grad_norm": 1.2709501817275486, - "learning_rate": 3.362295306156047e-07, - "loss": 0.8614, - "step": 9074 - }, - { - "epoch": 0.8184154754926275, - "grad_norm": 2.409703736199212, - "learning_rate": 3.3590539525331327e-07, - "loss": 0.9462, - "step": 9075 - }, - { - "epoch": 0.8185056590160977, - "grad_norm": 1.3748040059593734, - "learning_rate": 3.3558140188074967e-07, - "loss": 0.8914, - "step": 9076 - }, - { - "epoch": 0.8185958425395681, - "grad_norm": 1.632475165606142, - "learning_rate": 3.3525755052555817e-07, - "loss": 0.9626, - "step": 9077 - }, - { - "epoch": 0.8186860260630383, - "grad_norm": 0.7616901490148286, - "learning_rate": 3.3493384121537147e-07, - "loss": 0.8802, - "step": 9078 - }, - { - "epoch": 0.8187762095865085, - "grad_norm": 1.8144393279800386, - "learning_rate": 3.3461027397781075e-07, - "loss": 0.9424, - "step": 9079 - }, - { - "epoch": 0.8188663931099788, - "grad_norm": 1.4977599743348746, - "learning_rate": 3.3428684884048397e-07, - "loss": 0.9202, - "step": 9080 - }, - { - "epoch": 0.8189565766334491, - "grad_norm": 1.3168200286848453, - "learning_rate": 3.3396356583098826e-07, - "loss": 0.9369, - "step": 9081 - }, - { - "epoch": 0.8190467601569194, - "grad_norm": 1.3274720774873134, - "learning_rate": 3.3364042497690736e-07, - "loss": 0.9659, - "step": 9082 - }, - { - "epoch": 0.8191369436803896, - "grad_norm": 1.1752806957255433, - "learning_rate": 3.3331742630581405e-07, - "loss": 0.9889, - "step": 9083 - }, - { - "epoch": 0.8192271272038598, - "grad_norm": 1.2607420456677156, - "learning_rate": 3.3299456984526717e-07, - "loss": 0.8814, - "step": 9084 - }, - { - "epoch": 0.8193173107273302, - "grad_norm": 1.278570769904257, - "learning_rate": 3.3267185562281605e-07, - "loss": 0.9731, - "step": 9085 - }, - { - "epoch": 0.8194074942508004, - "grad_norm": 1.5110110023395873, - "learning_rate": 3.3234928366599514e-07, - "loss": 0.9295, - "step": 9086 - }, - { - "epoch": 0.8194976777742706, - "grad_norm": 1.5532232702470494, - "learning_rate": 3.3202685400232946e-07, - "loss": 0.9733, - "step": 9087 - }, - { - "epoch": 0.8195878612977409, - "grad_norm": 1.8407492707404023, - "learning_rate": 3.317045666593292e-07, - "loss": 0.9624, - "step": 9088 - }, - { - "epoch": 0.8196780448212112, - "grad_norm": 1.5582258894628094, - "learning_rate": 3.3138242166449426e-07, - "loss": 0.9373, - "step": 9089 - }, - { - "epoch": 0.8197682283446814, - "grad_norm": 1.3288593281709986, - "learning_rate": 3.310604190453117e-07, - "loss": 0.9998, - "step": 9090 - }, - { - "epoch": 0.8198584118681517, - "grad_norm": 1.2246072334136975, - "learning_rate": 3.307385588292566e-07, - "loss": 0.888, - "step": 9091 - }, - { - "epoch": 0.8199485953916219, - "grad_norm": 1.4738969913654094, - "learning_rate": 3.304168410437924e-07, - "loss": 0.9243, - "step": 9092 - }, - { - "epoch": 0.8200387789150922, - "grad_norm": 0.6445137946337222, - "learning_rate": 3.300952657163687e-07, - "loss": 0.8386, - "step": 9093 - }, - { - "epoch": 0.8201289624385625, - "grad_norm": 1.6018817118289403, - "learning_rate": 3.297738328744248e-07, - "loss": 0.9139, - "step": 9094 - }, - { - "epoch": 0.8202191459620327, - "grad_norm": 1.596281920183344, - "learning_rate": 3.2945254254538714e-07, - "loss": 0.9277, - "step": 9095 - }, - { - "epoch": 0.820309329485503, - "grad_norm": 1.2624884935364562, - "learning_rate": 3.2913139475666963e-07, - "loss": 0.9051, - "step": 9096 - }, - { - "epoch": 0.8203995130089733, - "grad_norm": 1.703138928837645, - "learning_rate": 3.288103895356749e-07, - "loss": 0.8895, - "step": 9097 - }, - { - "epoch": 0.8204896965324435, - "grad_norm": 1.4036728025356797, - "learning_rate": 3.284895269097927e-07, - "loss": 0.8712, - "step": 9098 - }, - { - "epoch": 0.8205798800559138, - "grad_norm": 0.6952921189310716, - "learning_rate": 3.281688069063999e-07, - "loss": 0.8131, - "step": 9099 - }, - { - "epoch": 0.8206700635793841, - "grad_norm": 1.5020001110173373, - "learning_rate": 3.2784822955286396e-07, - "loss": 0.7587, - "step": 9100 - }, - { - "epoch": 0.8207602471028543, - "grad_norm": 1.32015227299264, - "learning_rate": 3.275277948765365e-07, - "loss": 1.0053, - "step": 9101 - }, - { - "epoch": 0.8208504306263246, - "grad_norm": 1.3617932962923494, - "learning_rate": 3.2720750290475964e-07, - "loss": 0.9844, - "step": 9102 - }, - { - "epoch": 0.8209406141497948, - "grad_norm": 1.7559894517829397, - "learning_rate": 3.268873536648622e-07, - "loss": 0.9401, - "step": 9103 - }, - { - "epoch": 0.8210307976732651, - "grad_norm": 1.496653160906532, - "learning_rate": 3.265673471841612e-07, - "loss": 0.9092, - "step": 9104 - }, - { - "epoch": 0.8211209811967354, - "grad_norm": 0.6692419132460862, - "learning_rate": 3.262474834899616e-07, - "loss": 0.7616, - "step": 9105 - }, - { - "epoch": 0.8212111647202056, - "grad_norm": 0.6534008438358866, - "learning_rate": 3.2592776260955534e-07, - "loss": 0.8436, - "step": 9106 - }, - { - "epoch": 0.8213013482436758, - "grad_norm": 1.6460378093293668, - "learning_rate": 3.256081845702239e-07, - "loss": 0.9792, - "step": 9107 - }, - { - "epoch": 0.8213915317671462, - "grad_norm": 1.3551389969200616, - "learning_rate": 3.2528874939923335e-07, - "loss": 0.9878, - "step": 9108 - }, - { - "epoch": 0.8214817152906164, - "grad_norm": 1.4832332034372777, - "learning_rate": 3.2496945712384217e-07, - "loss": 0.8968, - "step": 9109 - }, - { - "epoch": 0.8215718988140867, - "grad_norm": 1.549103643822517, - "learning_rate": 3.246503077712923e-07, - "loss": 0.9246, - "step": 9110 - }, - { - "epoch": 0.8216620823375569, - "grad_norm": 1.2548370234228758, - "learning_rate": 3.2433130136881625e-07, - "loss": 1.0272, - "step": 9111 - }, - { - "epoch": 0.8217522658610272, - "grad_norm": 1.944962129101625, - "learning_rate": 3.2401243794363287e-07, - "loss": 0.9282, - "step": 9112 - }, - { - "epoch": 0.8218424493844975, - "grad_norm": 1.2464109871975024, - "learning_rate": 3.236937175229495e-07, - "loss": 0.9937, - "step": 9113 - }, - { - "epoch": 0.8219326329079677, - "grad_norm": 1.8703550023194897, - "learning_rate": 3.233751401339615e-07, - "loss": 0.907, - "step": 9114 - }, - { - "epoch": 0.8220228164314379, - "grad_norm": 1.5687250872453806, - "learning_rate": 3.2305670580385157e-07, - "loss": 1.0162, - "step": 9115 - }, - { - "epoch": 0.8221129999549083, - "grad_norm": 1.2768392704069327, - "learning_rate": 3.227384145597898e-07, - "loss": 0.935, - "step": 9116 - }, - { - "epoch": 0.8222031834783785, - "grad_norm": 1.659894208984091, - "learning_rate": 3.224202664289346e-07, - "loss": 0.9233, - "step": 9117 - }, - { - "epoch": 0.8222933670018487, - "grad_norm": 1.590191102569449, - "learning_rate": 3.2210226143843257e-07, - "loss": 0.8822, - "step": 9118 - }, - { - "epoch": 0.822383550525319, - "grad_norm": 0.6986503635128994, - "learning_rate": 3.217843996154173e-07, - "loss": 0.8376, - "step": 9119 - }, - { - "epoch": 0.8224737340487893, - "grad_norm": 0.6716853472933477, - "learning_rate": 3.2146668098701055e-07, - "loss": 0.752, - "step": 9120 - }, - { - "epoch": 0.8225639175722596, - "grad_norm": 1.475810187294877, - "learning_rate": 3.2114910558032215e-07, - "loss": 1.0654, - "step": 9121 - }, - { - "epoch": 0.8226541010957298, - "grad_norm": 1.6550377782099859, - "learning_rate": 3.2083167342244945e-07, - "loss": 0.9637, - "step": 9122 - }, - { - "epoch": 0.8227442846192, - "grad_norm": 1.5950826421392423, - "learning_rate": 3.205143845404763e-07, - "loss": 1.0317, - "step": 9123 - }, - { - "epoch": 0.8228344681426704, - "grad_norm": 1.4656160887099574, - "learning_rate": 3.201972389614773e-07, - "loss": 0.9148, - "step": 9124 - }, - { - "epoch": 0.8229246516661406, - "grad_norm": 1.8789172261446876, - "learning_rate": 3.198802367125115e-07, - "loss": 0.9429, - "step": 9125 - }, - { - "epoch": 0.8230148351896108, - "grad_norm": 1.4926994204560113, - "learning_rate": 3.195633778206288e-07, - "loss": 0.9458, - "step": 9126 - }, - { - "epoch": 0.8231050187130812, - "grad_norm": 1.6604929145080596, - "learning_rate": 3.19246662312864e-07, - "loss": 1.0136, - "step": 9127 - }, - { - "epoch": 0.8231952022365514, - "grad_norm": 0.6748073152747656, - "learning_rate": 3.189300902162417e-07, - "loss": 0.8029, - "step": 9128 - }, - { - "epoch": 0.8232853857600216, - "grad_norm": 1.599890454833787, - "learning_rate": 3.1861366155777327e-07, - "loss": 0.9439, - "step": 9129 - }, - { - "epoch": 0.8233755692834919, - "grad_norm": 1.218388675045073, - "learning_rate": 3.182973763644583e-07, - "loss": 0.9485, - "step": 9130 - }, - { - "epoch": 0.8234657528069622, - "grad_norm": 1.4798327365643043, - "learning_rate": 3.1798123466328463e-07, - "loss": 0.8646, - "step": 9131 - }, - { - "epoch": 0.8235559363304324, - "grad_norm": 1.5928726025087117, - "learning_rate": 3.17665236481226e-07, - "loss": 0.9413, - "step": 9132 - }, - { - "epoch": 0.8236461198539027, - "grad_norm": 1.6599240129383177, - "learning_rate": 3.1734938184524576e-07, - "loss": 0.9037, - "step": 9133 - }, - { - "epoch": 0.8237363033773729, - "grad_norm": 1.4758413663042762, - "learning_rate": 3.1703367078229427e-07, - "loss": 0.9511, - "step": 9134 - }, - { - "epoch": 0.8238264869008433, - "grad_norm": 1.4997853284446567, - "learning_rate": 3.167181033193096e-07, - "loss": 0.8991, - "step": 9135 - }, - { - "epoch": 0.8239166704243135, - "grad_norm": 1.4063408292471307, - "learning_rate": 3.16402679483218e-07, - "loss": 0.9491, - "step": 9136 - }, - { - "epoch": 0.8240068539477837, - "grad_norm": 1.227424027447904, - "learning_rate": 3.1608739930093366e-07, - "loss": 0.8721, - "step": 9137 - }, - { - "epoch": 0.824097037471254, - "grad_norm": 1.3851008473529831, - "learning_rate": 3.157722627993562e-07, - "loss": 0.8493, - "step": 9138 - }, - { - "epoch": 0.8241872209947243, - "grad_norm": 1.433079016472226, - "learning_rate": 3.1545727000537727e-07, - "loss": 0.9003, - "step": 9139 - }, - { - "epoch": 0.8242774045181945, - "grad_norm": 1.5181139929016285, - "learning_rate": 3.151424209458713e-07, - "loss": 0.9411, - "step": 9140 - }, - { - "epoch": 0.8243675880416648, - "grad_norm": 1.4250349207832278, - "learning_rate": 3.148277156477053e-07, - "loss": 0.9551, - "step": 9141 - }, - { - "epoch": 0.824457771565135, - "grad_norm": 1.851805208289863, - "learning_rate": 3.145131541377299e-07, - "loss": 0.9435, - "step": 9142 - }, - { - "epoch": 0.8245479550886053, - "grad_norm": 1.5118448367506323, - "learning_rate": 3.1419873644278606e-07, - "loss": 0.8994, - "step": 9143 - }, - { - "epoch": 0.8246381386120756, - "grad_norm": 1.6269175146631811, - "learning_rate": 3.1388446258970147e-07, - "loss": 0.9507, - "step": 9144 - }, - { - "epoch": 0.8247283221355458, - "grad_norm": 1.4804228521393725, - "learning_rate": 3.1357033260529145e-07, - "loss": 0.925, - "step": 9145 - }, - { - "epoch": 0.824818505659016, - "grad_norm": 1.6536383280746472, - "learning_rate": 3.1325634651636025e-07, - "loss": 1.0127, - "step": 9146 - }, - { - "epoch": 0.8249086891824864, - "grad_norm": 1.2644847867087492, - "learning_rate": 3.1294250434969694e-07, - "loss": 0.9752, - "step": 9147 - }, - { - "epoch": 0.8249988727059566, - "grad_norm": 1.6842792292670783, - "learning_rate": 3.1262880613208274e-07, - "loss": 0.9772, - "step": 9148 - }, - { - "epoch": 0.8250890562294269, - "grad_norm": 2.284589022148638, - "learning_rate": 3.123152518902823e-07, - "loss": 0.9749, - "step": 9149 - }, - { - "epoch": 0.8251792397528972, - "grad_norm": 1.290644512998843, - "learning_rate": 3.1200184165105017e-07, - "loss": 0.8951, - "step": 9150 - }, - { - "epoch": 0.8252694232763674, - "grad_norm": 1.5032869723601345, - "learning_rate": 3.116885754411287e-07, - "loss": 0.9451, - "step": 9151 - }, - { - "epoch": 0.8253596067998377, - "grad_norm": 1.6638761604103856, - "learning_rate": 3.1137545328724703e-07, - "loss": 0.872, - "step": 9152 - }, - { - "epoch": 0.8254497903233079, - "grad_norm": 1.6053746699405846, - "learning_rate": 3.1106247521612285e-07, - "loss": 0.9364, - "step": 9153 - }, - { - "epoch": 0.8255399738467782, - "grad_norm": 1.5469811913617444, - "learning_rate": 3.107496412544612e-07, - "loss": 0.9556, - "step": 9154 - }, - { - "epoch": 0.8256301573702485, - "grad_norm": 1.3238428983429653, - "learning_rate": 3.1043695142895397e-07, - "loss": 0.9614, - "step": 9155 - }, - { - "epoch": 0.8257203408937187, - "grad_norm": 1.3493129995501432, - "learning_rate": 3.101244057662828e-07, - "loss": 0.8599, - "step": 9156 - }, - { - "epoch": 0.8258105244171889, - "grad_norm": 1.375310299279364, - "learning_rate": 3.098120042931152e-07, - "loss": 0.9254, - "step": 9157 - }, - { - "epoch": 0.8259007079406593, - "grad_norm": 1.363468758440928, - "learning_rate": 3.0949974703610647e-07, - "loss": 0.9823, - "step": 9158 - }, - { - "epoch": 0.8259908914641295, - "grad_norm": 1.461130221297913, - "learning_rate": 3.0918763402190107e-07, - "loss": 0.926, - "step": 9159 - }, - { - "epoch": 0.8260810749875998, - "grad_norm": 1.5224176367669733, - "learning_rate": 3.088756652771296e-07, - "loss": 0.8992, - "step": 9160 - }, - { - "epoch": 0.82617125851107, - "grad_norm": 1.5841406805056597, - "learning_rate": 3.0856384082841147e-07, - "loss": 1.0327, - "step": 9161 - }, - { - "epoch": 0.8262614420345403, - "grad_norm": 1.3147784393101343, - "learning_rate": 3.0825216070235207e-07, - "loss": 0.8866, - "step": 9162 - }, - { - "epoch": 0.8263516255580106, - "grad_norm": 1.7545258716796714, - "learning_rate": 3.0794062492554764e-07, - "loss": 1.0555, - "step": 9163 - }, - { - "epoch": 0.8264418090814808, - "grad_norm": 1.8231992956591154, - "learning_rate": 3.076292335245783e-07, - "loss": 0.9537, - "step": 9164 - }, - { - "epoch": 0.826531992604951, - "grad_norm": 1.491751642133251, - "learning_rate": 3.073179865260145e-07, - "loss": 1.0079, - "step": 9165 - }, - { - "epoch": 0.8266221761284214, - "grad_norm": 1.2919566639048234, - "learning_rate": 3.070068839564135e-07, - "loss": 0.9422, - "step": 9166 - }, - { - "epoch": 0.8267123596518916, - "grad_norm": 2.3389070447739084, - "learning_rate": 3.0669592584232006e-07, - "loss": 0.9711, - "step": 9167 - }, - { - "epoch": 0.8268025431753618, - "grad_norm": 1.456482566632257, - "learning_rate": 3.063851122102672e-07, - "loss": 0.8503, - "step": 9168 - }, - { - "epoch": 0.8268927266988321, - "grad_norm": 1.596307310958989, - "learning_rate": 3.06074443086775e-07, - "loss": 0.8644, - "step": 9169 - }, - { - "epoch": 0.8269829102223024, - "grad_norm": 0.6379002333861739, - "learning_rate": 3.057639184983514e-07, - "loss": 0.7654, - "step": 9170 - }, - { - "epoch": 0.8270730937457726, - "grad_norm": 1.2939809657529104, - "learning_rate": 3.054535384714927e-07, - "loss": 0.9373, - "step": 9171 - }, - { - "epoch": 0.8271632772692429, - "grad_norm": 1.8307370167143848, - "learning_rate": 3.0514330303268135e-07, - "loss": 1.0985, - "step": 9172 - }, - { - "epoch": 0.8272534607927132, - "grad_norm": 1.4469379254822836, - "learning_rate": 3.0483321220838876e-07, - "loss": 0.89, - "step": 9173 - }, - { - "epoch": 0.8273436443161835, - "grad_norm": 1.3233200219267178, - "learning_rate": 3.045232660250734e-07, - "loss": 0.7951, - "step": 9174 - }, - { - "epoch": 0.8274338278396537, - "grad_norm": 1.4018262583081973, - "learning_rate": 3.0421346450918185e-07, - "loss": 0.9655, - "step": 9175 - }, - { - "epoch": 0.8275240113631239, - "grad_norm": 1.3703775301284211, - "learning_rate": 3.039038076871485e-07, - "loss": 0.8856, - "step": 9176 - }, - { - "epoch": 0.8276141948865943, - "grad_norm": 1.3138347832120367, - "learning_rate": 3.035942955853934e-07, - "loss": 0.9555, - "step": 9177 - }, - { - "epoch": 0.8277043784100645, - "grad_norm": 1.4579166452837202, - "learning_rate": 3.0328492823032804e-07, - "loss": 0.9129, - "step": 9178 - }, - { - "epoch": 0.8277945619335347, - "grad_norm": 1.4224133153693375, - "learning_rate": 3.029757056483471e-07, - "loss": 0.8767, - "step": 9179 - }, - { - "epoch": 0.827884745457005, - "grad_norm": 1.2968494918336897, - "learning_rate": 3.026666278658372e-07, - "loss": 0.8278, - "step": 9180 - }, - { - "epoch": 0.8279749289804753, - "grad_norm": 1.2111004627461557, - "learning_rate": 3.023576949091691e-07, - "loss": 0.94, - "step": 9181 - }, - { - "epoch": 0.8280651125039455, - "grad_norm": 1.4668683924808574, - "learning_rate": 3.020489068047032e-07, - "loss": 0.8587, - "step": 9182 - }, - { - "epoch": 0.8281552960274158, - "grad_norm": 1.5089652443151318, - "learning_rate": 3.017402635787869e-07, - "loss": 0.9056, - "step": 9183 - }, - { - "epoch": 0.828245479550886, - "grad_norm": 1.4667379517282277, - "learning_rate": 3.0143176525775537e-07, - "loss": 0.9075, - "step": 9184 - }, - { - "epoch": 0.8283356630743564, - "grad_norm": 1.4197594970879868, - "learning_rate": 3.0112341186793155e-07, - "loss": 0.8822, - "step": 9185 - }, - { - "epoch": 0.8284258465978266, - "grad_norm": 1.3962094763302653, - "learning_rate": 3.008152034356264e-07, - "loss": 0.8756, - "step": 9186 - }, - { - "epoch": 0.8285160301212968, - "grad_norm": 0.6234285865471563, - "learning_rate": 3.005071399871366e-07, - "loss": 0.7809, - "step": 9187 - }, - { - "epoch": 0.828606213644767, - "grad_norm": 1.4736813486042772, - "learning_rate": 3.0019922154874853e-07, - "loss": 0.9997, - "step": 9188 - }, - { - "epoch": 0.8286963971682374, - "grad_norm": 1.8836296492659783, - "learning_rate": 2.998914481467356e-07, - "loss": 0.8969, - "step": 9189 - }, - { - "epoch": 0.8287865806917076, - "grad_norm": 1.5658922374717839, - "learning_rate": 2.9958381980735837e-07, - "loss": 0.8854, - "step": 9190 - }, - { - "epoch": 0.8288767642151779, - "grad_norm": 1.5803756528052095, - "learning_rate": 2.992763365568658e-07, - "loss": 0.8668, - "step": 9191 - }, - { - "epoch": 0.8289669477386481, - "grad_norm": 1.4371860824813538, - "learning_rate": 2.98968998421494e-07, - "loss": 0.9387, - "step": 9192 - }, - { - "epoch": 0.8290571312621184, - "grad_norm": 1.429944791728463, - "learning_rate": 2.98661805427467e-07, - "loss": 0.9259, - "step": 9193 - }, - { - "epoch": 0.8291473147855887, - "grad_norm": 1.3375587664376904, - "learning_rate": 2.9835475760099483e-07, - "loss": 0.9547, - "step": 9194 - }, - { - "epoch": 0.8292374983090589, - "grad_norm": 1.2845753571473757, - "learning_rate": 2.9804785496827856e-07, - "loss": 0.9532, - "step": 9195 - }, - { - "epoch": 0.8293276818325293, - "grad_norm": 1.7121576983425126, - "learning_rate": 2.977410975555028e-07, - "loss": 0.9295, - "step": 9196 - }, - { - "epoch": 0.8294178653559995, - "grad_norm": 1.6393634123543546, - "learning_rate": 2.9743448538884376e-07, - "loss": 0.9877, - "step": 9197 - }, - { - "epoch": 0.8295080488794697, - "grad_norm": 1.2713209477884673, - "learning_rate": 2.9712801849446154e-07, - "loss": 0.9196, - "step": 9198 - }, - { - "epoch": 0.82959823240294, - "grad_norm": 1.4015431363438415, - "learning_rate": 2.9682169689850665e-07, - "loss": 0.8857, - "step": 9199 - }, - { - "epoch": 0.8296884159264103, - "grad_norm": 0.6886767806442637, - "learning_rate": 2.9651552062711573e-07, - "loss": 0.768, - "step": 9200 - }, - { - "epoch": 0.8297785994498805, - "grad_norm": 1.2646757262831612, - "learning_rate": 2.9620948970641333e-07, - "loss": 1.0078, - "step": 9201 - }, - { - "epoch": 0.8298687829733508, - "grad_norm": 1.2934577685563207, - "learning_rate": 2.959036041625125e-07, - "loss": 0.9503, - "step": 9202 - }, - { - "epoch": 0.829958966496821, - "grad_norm": 1.4322510129765533, - "learning_rate": 2.95597864021512e-07, - "loss": 0.8453, - "step": 9203 - }, - { - "epoch": 0.8300491500202913, - "grad_norm": 2.273797285312841, - "learning_rate": 2.9529226930949966e-07, - "loss": 0.9796, - "step": 9204 - }, - { - "epoch": 0.8301393335437616, - "grad_norm": 1.5507880145869466, - "learning_rate": 2.949868200525505e-07, - "loss": 0.9436, - "step": 9205 - }, - { - "epoch": 0.8302295170672318, - "grad_norm": 1.5973605229675243, - "learning_rate": 2.9468151627672734e-07, - "loss": 0.9335, - "step": 9206 - }, - { - "epoch": 0.830319700590702, - "grad_norm": 1.3480972231999675, - "learning_rate": 2.9437635800808026e-07, - "loss": 0.8883, - "step": 9207 - }, - { - "epoch": 0.8304098841141724, - "grad_norm": 1.7138732388381417, - "learning_rate": 2.940713452726473e-07, - "loss": 0.9189, - "step": 9208 - }, - { - "epoch": 0.8305000676376426, - "grad_norm": 1.293590661155901, - "learning_rate": 2.937664780964526e-07, - "loss": 0.9257, - "step": 9209 - }, - { - "epoch": 0.8305902511611128, - "grad_norm": 1.6393274898530912, - "learning_rate": 2.9346175650551133e-07, - "loss": 0.947, - "step": 9210 - }, - { - "epoch": 0.8306804346845831, - "grad_norm": 1.4205373823113798, - "learning_rate": 2.931571805258215e-07, - "loss": 0.9576, - "step": 9211 - }, - { - "epoch": 0.8307706182080534, - "grad_norm": 1.5399933144808227, - "learning_rate": 2.9285275018337353e-07, - "loss": 0.9864, - "step": 9212 - }, - { - "epoch": 0.8308608017315237, - "grad_norm": 1.7166172319843553, - "learning_rate": 2.9254846550414146e-07, - "loss": 1.0364, - "step": 9213 - }, - { - "epoch": 0.8309509852549939, - "grad_norm": 1.597636476874699, - "learning_rate": 2.922443265140893e-07, - "loss": 0.9788, - "step": 9214 - }, - { - "epoch": 0.8310411687784641, - "grad_norm": 1.5504500566328452, - "learning_rate": 2.919403332391674e-07, - "loss": 0.9305, - "step": 9215 - }, - { - "epoch": 0.8311313523019345, - "grad_norm": 1.3450509360924918, - "learning_rate": 2.9163648570531464e-07, - "loss": 0.9546, - "step": 9216 - }, - { - "epoch": 0.8312215358254047, - "grad_norm": 1.6193543443794265, - "learning_rate": 2.9133278393845717e-07, - "loss": 0.887, - "step": 9217 - }, - { - "epoch": 0.8313117193488749, - "grad_norm": 1.3034632072392158, - "learning_rate": 2.9102922796450703e-07, - "loss": 1.0026, - "step": 9218 - }, - { - "epoch": 0.8314019028723453, - "grad_norm": 1.2787238548492201, - "learning_rate": 2.907258178093672e-07, - "loss": 0.8969, - "step": 9219 - }, - { - "epoch": 0.8314920863958155, - "grad_norm": 1.3445391444717576, - "learning_rate": 2.904225534989251e-07, - "loss": 0.852, - "step": 9220 - }, - { - "epoch": 0.8315822699192857, - "grad_norm": 1.4186289697857595, - "learning_rate": 2.901194350590572e-07, - "loss": 0.8409, - "step": 9221 - }, - { - "epoch": 0.831672453442756, - "grad_norm": 1.456210234921804, - "learning_rate": 2.898164625156274e-07, - "loss": 0.8224, - "step": 9222 - }, - { - "epoch": 0.8317626369662263, - "grad_norm": 1.2931855817996927, - "learning_rate": 2.8951363589448676e-07, - "loss": 0.8907, - "step": 9223 - }, - { - "epoch": 0.8318528204896966, - "grad_norm": 1.566010917567055, - "learning_rate": 2.8921095522147434e-07, - "loss": 0.9288, - "step": 9224 - }, - { - "epoch": 0.8319430040131668, - "grad_norm": 1.5310334811595463, - "learning_rate": 2.8890842052241683e-07, - "loss": 0.9658, - "step": 9225 - }, - { - "epoch": 0.832033187536637, - "grad_norm": 0.6016178848730387, - "learning_rate": 2.886060318231267e-07, - "loss": 0.7399, - "step": 9226 - }, - { - "epoch": 0.8321233710601074, - "grad_norm": 1.788667460309304, - "learning_rate": 2.8830378914940755e-07, - "loss": 0.9572, - "step": 9227 - }, - { - "epoch": 0.8322135545835776, - "grad_norm": 2.61755295664385, - "learning_rate": 2.8800169252704675e-07, - "loss": 0.9352, - "step": 9228 - }, - { - "epoch": 0.8323037381070478, - "grad_norm": 1.3101923501415762, - "learning_rate": 2.8769974198182143e-07, - "loss": 0.9572, - "step": 9229 - }, - { - "epoch": 0.8323939216305181, - "grad_norm": 1.3401161561981376, - "learning_rate": 2.873979375394955e-07, - "loss": 0.9357, - "step": 9230 - }, - { - "epoch": 0.8324841051539884, - "grad_norm": 1.3420798990197205, - "learning_rate": 2.870962792258209e-07, - "loss": 0.9647, - "step": 9231 - }, - { - "epoch": 0.8325742886774586, - "grad_norm": 1.2829759184702465, - "learning_rate": 2.8679476706653716e-07, - "loss": 0.9499, - "step": 9232 - }, - { - "epoch": 0.8326644722009289, - "grad_norm": 1.5578574829341063, - "learning_rate": 2.864934010873692e-07, - "loss": 0.96, - "step": 9233 - }, - { - "epoch": 0.8327546557243991, - "grad_norm": 1.3110426577561227, - "learning_rate": 2.8619218131403357e-07, - "loss": 0.9105, - "step": 9234 - }, - { - "epoch": 0.8328448392478695, - "grad_norm": 1.7422936924614678, - "learning_rate": 2.858911077722299e-07, - "loss": 0.9681, - "step": 9235 - }, - { - "epoch": 0.8329350227713397, - "grad_norm": 1.6733114898568024, - "learning_rate": 2.855901804876493e-07, - "loss": 0.7959, - "step": 9236 - }, - { - "epoch": 0.8330252062948099, - "grad_norm": 1.2811926386948935, - "learning_rate": 2.852893994859673e-07, - "loss": 0.9754, - "step": 9237 - }, - { - "epoch": 0.8331153898182801, - "grad_norm": 1.2551329129724407, - "learning_rate": 2.849887647928484e-07, - "loss": 0.8858, - "step": 9238 - }, - { - "epoch": 0.8332055733417505, - "grad_norm": 2.5896447106655436, - "learning_rate": 2.8468827643394465e-07, - "loss": 0.8346, - "step": 9239 - }, - { - "epoch": 0.8332957568652207, - "grad_norm": 1.3612601702461704, - "learning_rate": 2.843879344348954e-07, - "loss": 0.9503, - "step": 9240 - }, - { - "epoch": 0.833385940388691, - "grad_norm": 1.4364859279815665, - "learning_rate": 2.840877388213272e-07, - "loss": 0.9069, - "step": 9241 - }, - { - "epoch": 0.8334761239121612, - "grad_norm": 1.5021616142428103, - "learning_rate": 2.8378768961885515e-07, - "loss": 0.9384, - "step": 9242 - }, - { - "epoch": 0.8335663074356315, - "grad_norm": 1.4346927099812574, - "learning_rate": 2.8348778685307983e-07, - "loss": 0.9677, - "step": 9243 - }, - { - "epoch": 0.8336564909591018, - "grad_norm": 1.423664936609154, - "learning_rate": 2.831880305495915e-07, - "loss": 1.0093, - "step": 9244 - }, - { - "epoch": 0.833746674482572, - "grad_norm": 0.6858845284065865, - "learning_rate": 2.828884207339668e-07, - "loss": 0.8235, - "step": 9245 - }, - { - "epoch": 0.8338368580060423, - "grad_norm": 1.7808287272961245, - "learning_rate": 2.8258895743177014e-07, - "loss": 1.0181, - "step": 9246 - }, - { - "epoch": 0.8339270415295126, - "grad_norm": 1.4192826670115417, - "learning_rate": 2.8228964066855356e-07, - "loss": 1.0064, - "step": 9247 - }, - { - "epoch": 0.8340172250529828, - "grad_norm": 1.389658559948639, - "learning_rate": 2.819904704698555e-07, - "loss": 0.9177, - "step": 9248 - }, - { - "epoch": 0.834107408576453, - "grad_norm": 0.6680144132655299, - "learning_rate": 2.8169144686120437e-07, - "loss": 0.7559, - "step": 9249 - }, - { - "epoch": 0.8341975920999234, - "grad_norm": 1.5127098915180686, - "learning_rate": 2.8139256986811254e-07, - "loss": 0.8476, - "step": 9250 - }, - { - "epoch": 0.8342877756233936, - "grad_norm": 1.791656279718616, - "learning_rate": 2.8109383951608424e-07, - "loss": 0.9909, - "step": 9251 - }, - { - "epoch": 0.8343779591468639, - "grad_norm": 1.4490098005743939, - "learning_rate": 2.8079525583060683e-07, - "loss": 1.0076, - "step": 9252 - }, - { - "epoch": 0.8344681426703341, - "grad_norm": 1.1754751218810437, - "learning_rate": 2.804968188371577e-07, - "loss": 0.9404, - "step": 9253 - }, - { - "epoch": 0.8345583261938044, - "grad_norm": 1.3152798324447172, - "learning_rate": 2.801985285612014e-07, - "loss": 0.9335, - "step": 9254 - }, - { - "epoch": 0.8346485097172747, - "grad_norm": 1.4480489857907548, - "learning_rate": 2.7990038502818934e-07, - "loss": 0.9122, - "step": 9255 - }, - { - "epoch": 0.8347386932407449, - "grad_norm": 1.368097187942817, - "learning_rate": 2.796023882635612e-07, - "loss": 0.915, - "step": 9256 - }, - { - "epoch": 0.8348288767642151, - "grad_norm": 2.0516199921732965, - "learning_rate": 2.7930453829274323e-07, - "loss": 0.9811, - "step": 9257 - }, - { - "epoch": 0.8349190602876855, - "grad_norm": 1.671385648895911, - "learning_rate": 2.7900683514115054e-07, - "loss": 0.9783, - "step": 9258 - }, - { - "epoch": 0.8350092438111557, - "grad_norm": 1.5526209759839353, - "learning_rate": 2.787092788341836e-07, - "loss": 0.874, - "step": 9259 - }, - { - "epoch": 0.8350994273346259, - "grad_norm": 1.4803234018927365, - "learning_rate": 2.7841186939723195e-07, - "loss": 0.9303, - "step": 9260 - }, - { - "epoch": 0.8351896108580962, - "grad_norm": 1.4329758645611994, - "learning_rate": 2.7811460685567255e-07, - "loss": 0.9079, - "step": 9261 - }, - { - "epoch": 0.8352797943815665, - "grad_norm": 1.7016110471975738, - "learning_rate": 2.778174912348692e-07, - "loss": 0.9296, - "step": 9262 - }, - { - "epoch": 0.8353699779050368, - "grad_norm": 1.3098356815832735, - "learning_rate": 2.7752052256017354e-07, - "loss": 0.9022, - "step": 9263 - }, - { - "epoch": 0.835460161428507, - "grad_norm": 1.5919795955327987, - "learning_rate": 2.7722370085692493e-07, - "loss": 0.9105, - "step": 9264 - }, - { - "epoch": 0.8355503449519772, - "grad_norm": 1.510385482290778, - "learning_rate": 2.769270261504486e-07, - "loss": 0.9108, - "step": 9265 - }, - { - "epoch": 0.8356405284754476, - "grad_norm": 1.521999365015687, - "learning_rate": 2.7663049846606015e-07, - "loss": 0.9009, - "step": 9266 - }, - { - "epoch": 0.8357307119989178, - "grad_norm": 1.300416591725362, - "learning_rate": 2.763341178290592e-07, - "loss": 0.9479, - "step": 9267 - }, - { - "epoch": 0.835820895522388, - "grad_norm": 1.258821355318181, - "learning_rate": 2.7603788426473663e-07, - "loss": 0.959, - "step": 9268 - }, - { - "epoch": 0.8359110790458584, - "grad_norm": 1.733906038134238, - "learning_rate": 2.7574179779836695e-07, - "loss": 0.8742, - "step": 9269 - }, - { - "epoch": 0.8360012625693286, - "grad_norm": 1.4153588185782249, - "learning_rate": 2.754458584552146e-07, - "loss": 0.8916, - "step": 9270 - }, - { - "epoch": 0.8360914460927988, - "grad_norm": 2.0708453950076557, - "learning_rate": 2.751500662605308e-07, - "loss": 0.9267, - "step": 9271 - }, - { - "epoch": 0.8361816296162691, - "grad_norm": 1.3793780678635892, - "learning_rate": 2.7485442123955383e-07, - "loss": 0.8948, - "step": 9272 - }, - { - "epoch": 0.8362718131397394, - "grad_norm": 1.710260056803878, - "learning_rate": 2.7455892341751075e-07, - "loss": 0.8179, - "step": 9273 - }, - { - "epoch": 0.8363619966632097, - "grad_norm": 1.543434478392253, - "learning_rate": 2.7426357281961365e-07, - "loss": 0.8109, - "step": 9274 - }, - { - "epoch": 0.8364521801866799, - "grad_norm": 1.6255761372322253, - "learning_rate": 2.7396836947106416e-07, - "loss": 0.936, - "step": 9275 - }, - { - "epoch": 0.8365423637101501, - "grad_norm": 1.380951744777865, - "learning_rate": 2.736733133970506e-07, - "loss": 1.0352, - "step": 9276 - }, - { - "epoch": 0.8366325472336205, - "grad_norm": 1.183636050837539, - "learning_rate": 2.7337840462274896e-07, - "loss": 0.9744, - "step": 9277 - }, - { - "epoch": 0.8367227307570907, - "grad_norm": 1.6585639767663036, - "learning_rate": 2.730836431733221e-07, - "loss": 0.9099, - "step": 9278 - }, - { - "epoch": 0.8368129142805609, - "grad_norm": 1.316034915518865, - "learning_rate": 2.727890290739212e-07, - "loss": 0.9873, - "step": 9279 - }, - { - "epoch": 0.8369030978040312, - "grad_norm": 1.3736380420912333, - "learning_rate": 2.7249456234968395e-07, - "loss": 0.9735, - "step": 9280 - }, - { - "epoch": 0.8369932813275015, - "grad_norm": 1.5007569469448079, - "learning_rate": 2.722002430257364e-07, - "loss": 0.9987, - "step": 9281 - }, - { - "epoch": 0.8370834648509717, - "grad_norm": 1.3732347427706748, - "learning_rate": 2.7190607112719035e-07, - "loss": 0.894, - "step": 9282 - }, - { - "epoch": 0.837173648374442, - "grad_norm": 1.5963495035606428, - "learning_rate": 2.716120466791476e-07, - "loss": 0.8608, - "step": 9283 - }, - { - "epoch": 0.8372638318979122, - "grad_norm": 1.5779084916360946, - "learning_rate": 2.7131816970669483e-07, - "loss": 0.9091, - "step": 9284 - }, - { - "epoch": 0.8373540154213825, - "grad_norm": 1.8234083838536723, - "learning_rate": 2.7102444023490777e-07, - "loss": 0.948, - "step": 9285 - }, - { - "epoch": 0.8374441989448528, - "grad_norm": 2.805994520898474, - "learning_rate": 2.70730858288849e-07, - "loss": 0.9937, - "step": 9286 - }, - { - "epoch": 0.837534382468323, - "grad_norm": 1.3561796143721518, - "learning_rate": 2.704374238935685e-07, - "loss": 0.8755, - "step": 9287 - }, - { - "epoch": 0.8376245659917932, - "grad_norm": 0.6870875855548121, - "learning_rate": 2.70144137074104e-07, - "loss": 0.7985, - "step": 9288 - }, - { - "epoch": 0.8377147495152636, - "grad_norm": 1.2487092983879367, - "learning_rate": 2.6985099785547926e-07, - "loss": 0.9541, - "step": 9289 - }, - { - "epoch": 0.8378049330387338, - "grad_norm": 1.3744092018829694, - "learning_rate": 2.695580062627083e-07, - "loss": 0.9579, - "step": 9290 - }, - { - "epoch": 0.8378951165622041, - "grad_norm": 1.749212496543157, - "learning_rate": 2.692651623207891e-07, - "loss": 0.9354, - "step": 9291 - }, - { - "epoch": 0.8379853000856744, - "grad_norm": 1.361136906026924, - "learning_rate": 2.689724660547097e-07, - "loss": 0.9207, - "step": 9292 - }, - { - "epoch": 0.8380754836091446, - "grad_norm": 0.6240793361312534, - "learning_rate": 2.686799174894441e-07, - "loss": 0.7971, - "step": 9293 - }, - { - "epoch": 0.8381656671326149, - "grad_norm": 0.6836975236871567, - "learning_rate": 2.683875166499545e-07, - "loss": 0.8177, - "step": 9294 - }, - { - "epoch": 0.8382558506560851, - "grad_norm": 1.4432928872700141, - "learning_rate": 2.680952635611899e-07, - "loss": 0.9498, - "step": 9295 - }, - { - "epoch": 0.8383460341795554, - "grad_norm": 1.588176105732363, - "learning_rate": 2.678031582480875e-07, - "loss": 0.8538, - "step": 9296 - }, - { - "epoch": 0.8384362177030257, - "grad_norm": 1.3226063231651188, - "learning_rate": 2.6751120073557e-07, - "loss": 0.9733, - "step": 9297 - }, - { - "epoch": 0.8385264012264959, - "grad_norm": 1.5213449828086063, - "learning_rate": 2.672193910485505e-07, - "loss": 0.9049, - "step": 9298 - }, - { - "epoch": 0.8386165847499661, - "grad_norm": 1.3139206146315119, - "learning_rate": 2.669277292119265e-07, - "loss": 0.9316, - "step": 9299 - }, - { - "epoch": 0.8387067682734365, - "grad_norm": 1.416090885661601, - "learning_rate": 2.666362152505848e-07, - "loss": 0.9395, - "step": 9300 - }, - { - "epoch": 0.8387969517969067, - "grad_norm": 1.5710978655426888, - "learning_rate": 2.663448491893989e-07, - "loss": 0.8707, - "step": 9301 - }, - { - "epoch": 0.838887135320377, - "grad_norm": 1.4222492941044789, - "learning_rate": 2.6605363105322974e-07, - "loss": 0.9125, - "step": 9302 - }, - { - "epoch": 0.8389773188438472, - "grad_norm": 1.489770979721878, - "learning_rate": 2.657625608669263e-07, - "loss": 0.9358, - "step": 9303 - }, - { - "epoch": 0.8390675023673175, - "grad_norm": 1.2668620527722798, - "learning_rate": 2.654716386553224e-07, - "loss": 0.8452, - "step": 9304 - }, - { - "epoch": 0.8391576858907878, - "grad_norm": 1.455064525156663, - "learning_rate": 2.651808644432436e-07, - "loss": 0.9348, - "step": 9305 - }, - { - "epoch": 0.839247869414258, - "grad_norm": 1.5770972653995186, - "learning_rate": 2.6489023825549807e-07, - "loss": 1.0644, - "step": 9306 - }, - { - "epoch": 0.8393380529377282, - "grad_norm": 1.9768139223160395, - "learning_rate": 2.6459976011688547e-07, - "loss": 0.8607, - "step": 9307 - }, - { - "epoch": 0.8394282364611986, - "grad_norm": 1.827081235611109, - "learning_rate": 2.6430943005219e-07, - "loss": 0.9301, - "step": 9308 - }, - { - "epoch": 0.8395184199846688, - "grad_norm": 1.3195967681139835, - "learning_rate": 2.6401924808618447e-07, - "loss": 0.9679, - "step": 9309 - }, - { - "epoch": 0.839608603508139, - "grad_norm": 1.3490344091145703, - "learning_rate": 2.637292142436287e-07, - "loss": 0.9281, - "step": 9310 - }, - { - "epoch": 0.8396987870316093, - "grad_norm": 1.875156523211262, - "learning_rate": 2.6343932854927e-07, - "loss": 0.9099, - "step": 9311 - }, - { - "epoch": 0.8397889705550796, - "grad_norm": 0.7554216324828331, - "learning_rate": 2.6314959102784316e-07, - "loss": 0.8119, - "step": 9312 - }, - { - "epoch": 0.8398791540785498, - "grad_norm": 1.576344382525578, - "learning_rate": 2.6286000170407074e-07, - "loss": 0.9317, - "step": 9313 - }, - { - "epoch": 0.8399693376020201, - "grad_norm": 1.429697505728307, - "learning_rate": 2.625705606026607e-07, - "loss": 0.9119, - "step": 9314 - }, - { - "epoch": 0.8400595211254904, - "grad_norm": 1.3001323834284693, - "learning_rate": 2.622812677483106e-07, - "loss": 1.0067, - "step": 9315 - }, - { - "epoch": 0.8401497046489607, - "grad_norm": 1.7014044373854953, - "learning_rate": 2.6199212316570453e-07, - "loss": 0.9816, - "step": 9316 - }, - { - "epoch": 0.8402398881724309, - "grad_norm": 1.9277230584180562, - "learning_rate": 2.617031268795138e-07, - "loss": 0.9166, - "step": 9317 - }, - { - "epoch": 0.8403300716959011, - "grad_norm": 1.191609999947719, - "learning_rate": 2.614142789143976e-07, - "loss": 0.9669, - "step": 9318 - }, - { - "epoch": 0.8404202552193715, - "grad_norm": 1.3519173746085797, - "learning_rate": 2.6112557929500047e-07, - "loss": 0.9247, - "step": 9319 - }, - { - "epoch": 0.8405104387428417, - "grad_norm": 1.8485453041350703, - "learning_rate": 2.6083702804595817e-07, - "loss": 0.9562, - "step": 9320 - }, - { - "epoch": 0.8406006222663119, - "grad_norm": 1.4775514873176367, - "learning_rate": 2.6054862519188915e-07, - "loss": 0.9207, - "step": 9321 - }, - { - "epoch": 0.8406908057897822, - "grad_norm": 1.499346590778725, - "learning_rate": 2.6026037075740357e-07, - "loss": 0.8647, - "step": 9322 - }, - { - "epoch": 0.8407809893132525, - "grad_norm": 1.6026973518336851, - "learning_rate": 2.5997226476709524e-07, - "loss": 0.905, - "step": 9323 - }, - { - "epoch": 0.8408711728367227, - "grad_norm": 1.467928880086146, - "learning_rate": 2.5968430724554856e-07, - "loss": 0.9341, - "step": 9324 - }, - { - "epoch": 0.840961356360193, - "grad_norm": 1.4227785865433504, - "learning_rate": 2.5939649821733225e-07, - "loss": 0.8779, - "step": 9325 - }, - { - "epoch": 0.8410515398836632, - "grad_norm": 1.6346444981774333, - "learning_rate": 2.5910883770700433e-07, - "loss": 0.9032, - "step": 9326 - }, - { - "epoch": 0.8411417234071336, - "grad_norm": 1.1959935660237027, - "learning_rate": 2.5882132573910965e-07, - "loss": 0.777, - "step": 9327 - }, - { - "epoch": 0.8412319069306038, - "grad_norm": 1.1602956257853487, - "learning_rate": 2.585339623381801e-07, - "loss": 0.9679, - "step": 9328 - }, - { - "epoch": 0.841322090454074, - "grad_norm": 1.4283779796683767, - "learning_rate": 2.582467475287358e-07, - "loss": 0.9087, - "step": 9329 - }, - { - "epoch": 0.8414122739775443, - "grad_norm": 1.5687799519266998, - "learning_rate": 2.5795968133528224e-07, - "loss": 0.9434, - "step": 9330 - }, - { - "epoch": 0.8415024575010146, - "grad_norm": 1.5478098192813672, - "learning_rate": 2.576727637823144e-07, - "loss": 0.9366, - "step": 9331 - }, - { - "epoch": 0.8415926410244848, - "grad_norm": 1.586913611778782, - "learning_rate": 2.5738599489431335e-07, - "loss": 0.9708, - "step": 9332 - }, - { - "epoch": 0.8416828245479551, - "grad_norm": 1.2315434198225177, - "learning_rate": 2.5709937469574794e-07, - "loss": 0.9965, - "step": 9333 - }, - { - "epoch": 0.8417730080714253, - "grad_norm": 1.2453210521677158, - "learning_rate": 2.568129032110742e-07, - "loss": 0.9764, - "step": 9334 - }, - { - "epoch": 0.8418631915948956, - "grad_norm": 1.3912199654983115, - "learning_rate": 2.5652658046473565e-07, - "loss": 1.003, - "step": 9335 - }, - { - "epoch": 0.8419533751183659, - "grad_norm": 1.353866333546415, - "learning_rate": 2.5624040648116184e-07, - "loss": 0.9269, - "step": 9336 - }, - { - "epoch": 0.8420435586418361, - "grad_norm": 1.3918270958963315, - "learning_rate": 2.5595438128477245e-07, - "loss": 0.8726, - "step": 9337 - }, - { - "epoch": 0.8421337421653065, - "grad_norm": 1.5876192093192367, - "learning_rate": 2.5566850489997096e-07, - "loss": 0.9428, - "step": 9338 - }, - { - "epoch": 0.8422239256887767, - "grad_norm": 1.6384010136550882, - "learning_rate": 2.5538277735115166e-07, - "loss": 0.9612, - "step": 9339 - }, - { - "epoch": 0.8423141092122469, - "grad_norm": 1.2483057938522957, - "learning_rate": 2.5509719866269306e-07, - "loss": 0.9859, - "step": 9340 - }, - { - "epoch": 0.8424042927357172, - "grad_norm": 1.4739569224787814, - "learning_rate": 2.548117688589628e-07, - "loss": 0.9937, - "step": 9341 - }, - { - "epoch": 0.8424944762591875, - "grad_norm": 1.4216203566354375, - "learning_rate": 2.545264879643152e-07, - "loss": 0.9228, - "step": 9342 - }, - { - "epoch": 0.8425846597826577, - "grad_norm": 1.4052875192028134, - "learning_rate": 2.542413560030923e-07, - "loss": 0.8887, - "step": 9343 - }, - { - "epoch": 0.842674843306128, - "grad_norm": 1.5360111253852067, - "learning_rate": 2.53956372999623e-07, - "loss": 0.8255, - "step": 9344 - }, - { - "epoch": 0.8427650268295982, - "grad_norm": 0.7459980369010234, - "learning_rate": 2.5367153897822293e-07, - "loss": 0.8431, - "step": 9345 - }, - { - "epoch": 0.8428552103530685, - "grad_norm": 1.2509870446843234, - "learning_rate": 2.5338685396319715e-07, - "loss": 0.925, - "step": 9346 - }, - { - "epoch": 0.8429453938765388, - "grad_norm": 1.6420337986179923, - "learning_rate": 2.531023179788352e-07, - "loss": 0.8698, - "step": 9347 - }, - { - "epoch": 0.843035577400009, - "grad_norm": 1.4592035785118778, - "learning_rate": 2.528179310494158e-07, - "loss": 0.9152, - "step": 9348 - }, - { - "epoch": 0.8431257609234792, - "grad_norm": 1.2969719264566457, - "learning_rate": 2.5253369319920436e-07, - "loss": 0.953, - "step": 9349 - }, - { - "epoch": 0.8432159444469496, - "grad_norm": 1.838801840149293, - "learning_rate": 2.522496044524538e-07, - "loss": 0.9318, - "step": 9350 - }, - { - "epoch": 0.8433061279704198, - "grad_norm": 1.5388475451938841, - "learning_rate": 2.5196566483340386e-07, - "loss": 1.0274, - "step": 9351 - }, - { - "epoch": 0.84339631149389, - "grad_norm": 1.4309695806195775, - "learning_rate": 2.516818743662825e-07, - "loss": 0.9383, - "step": 9352 - }, - { - "epoch": 0.8434864950173603, - "grad_norm": 1.634867638762785, - "learning_rate": 2.5139823307530285e-07, - "loss": 0.9369, - "step": 9353 - }, - { - "epoch": 0.8435766785408306, - "grad_norm": 1.5041491027539224, - "learning_rate": 2.5111474098466836e-07, - "loss": 0.9692, - "step": 9354 - }, - { - "epoch": 0.8436668620643009, - "grad_norm": 1.2869270901839074, - "learning_rate": 2.50831398118567e-07, - "loss": 0.9736, - "step": 9355 - }, - { - "epoch": 0.8437570455877711, - "grad_norm": 1.4642098237684447, - "learning_rate": 2.5054820450117576e-07, - "loss": 0.8827, - "step": 9356 - }, - { - "epoch": 0.8438472291112413, - "grad_norm": 1.34676670072747, - "learning_rate": 2.502651601566579e-07, - "loss": 0.8955, - "step": 9357 - }, - { - "epoch": 0.8439374126347117, - "grad_norm": 1.5014379284935147, - "learning_rate": 2.499822651091645e-07, - "loss": 0.8947, - "step": 9358 - }, - { - "epoch": 0.8440275961581819, - "grad_norm": 1.5872736972296215, - "learning_rate": 2.496995193828344e-07, - "loss": 0.9557, - "step": 9359 - }, - { - "epoch": 0.8441177796816521, - "grad_norm": 1.4340202674125124, - "learning_rate": 2.494169230017913e-07, - "loss": 0.921, - "step": 9360 - }, - { - "epoch": 0.8442079632051224, - "grad_norm": 1.3500083852436886, - "learning_rate": 2.491344759901499e-07, - "loss": 0.8925, - "step": 9361 - }, - { - "epoch": 0.8442981467285927, - "grad_norm": 1.548727140465755, - "learning_rate": 2.488521783720088e-07, - "loss": 0.9294, - "step": 9362 - }, - { - "epoch": 0.844388330252063, - "grad_norm": 1.5715111859391069, - "learning_rate": 2.4857003017145526e-07, - "loss": 0.9748, - "step": 9363 - }, - { - "epoch": 0.8444785137755332, - "grad_norm": 1.4902422143572476, - "learning_rate": 2.482880314125644e-07, - "loss": 0.9711, - "step": 9364 - }, - { - "epoch": 0.8445686972990035, - "grad_norm": 1.3898027110567548, - "learning_rate": 2.4800618211939726e-07, - "loss": 0.9247, - "step": 9365 - }, - { - "epoch": 0.8446588808224738, - "grad_norm": 1.5674642666480878, - "learning_rate": 2.477244823160034e-07, - "loss": 0.9415, - "step": 9366 - }, - { - "epoch": 0.844749064345944, - "grad_norm": 1.4575976377993558, - "learning_rate": 2.474429320264184e-07, - "loss": 0.9501, - "step": 9367 - }, - { - "epoch": 0.8448392478694142, - "grad_norm": 1.367320419389724, - "learning_rate": 2.47161531274666e-07, - "loss": 0.8953, - "step": 9368 - }, - { - "epoch": 0.8449294313928846, - "grad_norm": 1.7739749043785107, - "learning_rate": 2.4688028008475714e-07, - "loss": 0.867, - "step": 9369 - }, - { - "epoch": 0.8450196149163548, - "grad_norm": 1.725660545925294, - "learning_rate": 2.465991784806891e-07, - "loss": 0.9297, - "step": 9370 - }, - { - "epoch": 0.845109798439825, - "grad_norm": 1.3583517059690653, - "learning_rate": 2.463182264864472e-07, - "loss": 1.0221, - "step": 9371 - }, - { - "epoch": 0.8451999819632953, - "grad_norm": 1.654951846410456, - "learning_rate": 2.460374241260039e-07, - "loss": 0.8536, - "step": 9372 - }, - { - "epoch": 0.8452901654867656, - "grad_norm": 1.486850000261899, - "learning_rate": 2.4575677142331884e-07, - "loss": 0.9943, - "step": 9373 - }, - { - "epoch": 0.8453803490102358, - "grad_norm": 1.3475174984476979, - "learning_rate": 2.454762684023395e-07, - "loss": 0.8717, - "step": 9374 - }, - { - "epoch": 0.8454705325337061, - "grad_norm": 1.765250579833889, - "learning_rate": 2.4519591508699823e-07, - "loss": 0.8791, - "step": 9375 - }, - { - "epoch": 0.8455607160571763, - "grad_norm": 1.4884202146357404, - "learning_rate": 2.4491571150121815e-07, - "loss": 0.9781, - "step": 9376 - }, - { - "epoch": 0.8456508995806467, - "grad_norm": 1.729662981005996, - "learning_rate": 2.446356576689062e-07, - "loss": 0.9292, - "step": 9377 - }, - { - "epoch": 0.8457410831041169, - "grad_norm": 1.4570257682799446, - "learning_rate": 2.4435575361395976e-07, - "loss": 0.9516, - "step": 9378 - }, - { - "epoch": 0.8458312666275871, - "grad_norm": 1.4344125846642635, - "learning_rate": 2.440759993602606e-07, - "loss": 0.9122, - "step": 9379 - }, - { - "epoch": 0.8459214501510574, - "grad_norm": 1.471589205263222, - "learning_rate": 2.437963949316793e-07, - "loss": 0.8764, - "step": 9380 - }, - { - "epoch": 0.8460116336745277, - "grad_norm": 1.4848134246655014, - "learning_rate": 2.435169403520729e-07, - "loss": 0.9329, - "step": 9381 - }, - { - "epoch": 0.8461018171979979, - "grad_norm": 1.6323527003572627, - "learning_rate": 2.4323763564528653e-07, - "loss": 0.9768, - "step": 9382 - }, - { - "epoch": 0.8461920007214682, - "grad_norm": 1.5991338411944207, - "learning_rate": 2.429584808351517e-07, - "loss": 0.9527, - "step": 9383 - }, - { - "epoch": 0.8462821842449384, - "grad_norm": 2.2646682297539598, - "learning_rate": 2.42679475945488e-07, - "loss": 1.055, - "step": 9384 - }, - { - "epoch": 0.8463723677684087, - "grad_norm": 1.258877747388108, - "learning_rate": 2.424006210001008e-07, - "loss": 0.9323, - "step": 9385 - }, - { - "epoch": 0.846462551291879, - "grad_norm": 1.4033449896844064, - "learning_rate": 2.421219160227839e-07, - "loss": 0.9565, - "step": 9386 - }, - { - "epoch": 0.8465527348153492, - "grad_norm": 1.373122580816528, - "learning_rate": 2.4184336103731785e-07, - "loss": 0.9269, - "step": 9387 - }, - { - "epoch": 0.8466429183388196, - "grad_norm": 1.3868953968420317, - "learning_rate": 2.4156495606747065e-07, - "loss": 0.9559, - "step": 9388 - }, - { - "epoch": 0.8467331018622898, - "grad_norm": 2.1701295274320653, - "learning_rate": 2.412867011369972e-07, - "loss": 1.0481, - "step": 9389 - }, - { - "epoch": 0.84682328538576, - "grad_norm": 1.4579300549897136, - "learning_rate": 2.4100859626963997e-07, - "loss": 0.9501, - "step": 9390 - }, - { - "epoch": 0.8469134689092302, - "grad_norm": 1.2834072095901798, - "learning_rate": 2.407306414891288e-07, - "loss": 0.9307, - "step": 9391 - }, - { - "epoch": 0.8470036524327006, - "grad_norm": 1.1759452832136998, - "learning_rate": 2.4045283681917893e-07, - "loss": 1.0413, - "step": 9392 - }, - { - "epoch": 0.8470938359561708, - "grad_norm": 1.3827284447937078, - "learning_rate": 2.4017518228349586e-07, - "loss": 1.0457, - "step": 9393 - }, - { - "epoch": 0.8471840194796411, - "grad_norm": 0.6366841827557803, - "learning_rate": 2.3989767790576887e-07, - "loss": 0.8196, - "step": 9394 - }, - { - "epoch": 0.8472742030031113, - "grad_norm": 1.4749119328215727, - "learning_rate": 2.396203237096781e-07, - "loss": 0.9005, - "step": 9395 - }, - { - "epoch": 0.8473643865265816, - "grad_norm": 1.5884405218557567, - "learning_rate": 2.393431197188873e-07, - "loss": 0.9295, - "step": 9396 - }, - { - "epoch": 0.8474545700500519, - "grad_norm": 1.7669628779684452, - "learning_rate": 2.3906606595705004e-07, - "loss": 0.9988, - "step": 9397 - }, - { - "epoch": 0.8475447535735221, - "grad_norm": 1.337546110695285, - "learning_rate": 2.387891624478056e-07, - "loss": 0.984, - "step": 9398 - }, - { - "epoch": 0.8476349370969923, - "grad_norm": 1.3261384805286662, - "learning_rate": 2.3851240921478075e-07, - "loss": 0.9483, - "step": 9399 - }, - { - "epoch": 0.8477251206204627, - "grad_norm": 1.394044507989518, - "learning_rate": 2.3823580628159057e-07, - "loss": 0.9111, - "step": 9400 - }, - { - "epoch": 0.8478153041439329, - "grad_norm": 1.74350616773866, - "learning_rate": 2.3795935367183517e-07, - "loss": 0.9225, - "step": 9401 - }, - { - "epoch": 0.8479054876674031, - "grad_norm": 1.3097318340599207, - "learning_rate": 2.376830514091035e-07, - "loss": 0.947, - "step": 9402 - }, - { - "epoch": 0.8479956711908734, - "grad_norm": 1.2387513913821384, - "learning_rate": 2.3740689951697135e-07, - "loss": 1.0091, - "step": 9403 - }, - { - "epoch": 0.8480858547143437, - "grad_norm": 1.4714180271389907, - "learning_rate": 2.371308980190012e-07, - "loss": 0.9337, - "step": 9404 - }, - { - "epoch": 0.848176038237814, - "grad_norm": 0.6650864504428946, - "learning_rate": 2.3685504693874337e-07, - "loss": 0.8223, - "step": 9405 - }, - { - "epoch": 0.8482662217612842, - "grad_norm": 2.169260550875678, - "learning_rate": 2.3657934629973497e-07, - "loss": 0.9734, - "step": 9406 - }, - { - "epoch": 0.8483564052847544, - "grad_norm": 1.7996753267359054, - "learning_rate": 2.3630379612549944e-07, - "loss": 0.9426, - "step": 9407 - }, - { - "epoch": 0.8484465888082248, - "grad_norm": 0.7598956077807176, - "learning_rate": 2.3602839643954997e-07, - "loss": 0.8679, - "step": 9408 - }, - { - "epoch": 0.848536772331695, - "grad_norm": 1.3988926322697723, - "learning_rate": 2.3575314726538308e-07, - "loss": 0.9384, - "step": 9409 - }, - { - "epoch": 0.8486269558551652, - "grad_norm": 1.4125261338103463, - "learning_rate": 2.3547804862648645e-07, - "loss": 0.9723, - "step": 9410 - }, - { - "epoch": 0.8487171393786356, - "grad_norm": 1.344195003900607, - "learning_rate": 2.3520310054633174e-07, - "loss": 0.9776, - "step": 9411 - }, - { - "epoch": 0.8488073229021058, - "grad_norm": 1.5160173517859674, - "learning_rate": 2.3492830304837973e-07, - "loss": 0.8935, - "step": 9412 - }, - { - "epoch": 0.848897506425576, - "grad_norm": 1.4127319989371385, - "learning_rate": 2.3465365615607723e-07, - "loss": 0.8773, - "step": 9413 - }, - { - "epoch": 0.8489876899490463, - "grad_norm": 1.7468241757555418, - "learning_rate": 2.3437915989285884e-07, - "loss": 0.9256, - "step": 9414 - }, - { - "epoch": 0.8490778734725166, - "grad_norm": 1.7769365116599796, - "learning_rate": 2.3410481428214647e-07, - "loss": 1.007, - "step": 9415 - }, - { - "epoch": 0.8491680569959869, - "grad_norm": 1.3722138353538502, - "learning_rate": 2.338306193473476e-07, - "loss": 0.9499, - "step": 9416 - }, - { - "epoch": 0.8492582405194571, - "grad_norm": 1.3173750756264888, - "learning_rate": 2.3355657511185957e-07, - "loss": 0.9304, - "step": 9417 - }, - { - "epoch": 0.8493484240429273, - "grad_norm": 0.7270398776413335, - "learning_rate": 2.3328268159906428e-07, - "loss": 0.7852, - "step": 9418 - }, - { - "epoch": 0.8494386075663977, - "grad_norm": 1.448667519091797, - "learning_rate": 2.330089388323322e-07, - "loss": 0.9803, - "step": 9419 - }, - { - "epoch": 0.8495287910898679, - "grad_norm": 1.5054488556383272, - "learning_rate": 2.327353468350204e-07, - "loss": 0.9691, - "step": 9420 - }, - { - "epoch": 0.8496189746133381, - "grad_norm": 1.324323666897751, - "learning_rate": 2.3246190563047352e-07, - "loss": 0.922, - "step": 9421 - }, - { - "epoch": 0.8497091581368084, - "grad_norm": 1.413370121006259, - "learning_rate": 2.3218861524202293e-07, - "loss": 0.9151, - "step": 9422 - }, - { - "epoch": 0.8497993416602787, - "grad_norm": 1.562515716473692, - "learning_rate": 2.3191547569298775e-07, - "loss": 0.9383, - "step": 9423 - }, - { - "epoch": 0.8498895251837489, - "grad_norm": 1.5073087176782314, - "learning_rate": 2.3164248700667245e-07, - "loss": 0.9204, - "step": 9424 - }, - { - "epoch": 0.8499797087072192, - "grad_norm": 1.253369558162561, - "learning_rate": 2.313696492063717e-07, - "loss": 0.9598, - "step": 9425 - }, - { - "epoch": 0.8500698922306894, - "grad_norm": 1.6286486232120865, - "learning_rate": 2.3109696231536401e-07, - "loss": 0.9215, - "step": 9426 - }, - { - "epoch": 0.8501600757541597, - "grad_norm": 1.6837784030843725, - "learning_rate": 2.3082442635691722e-07, - "loss": 0.899, - "step": 9427 - }, - { - "epoch": 0.85025025927763, - "grad_norm": 1.477134069191036, - "learning_rate": 2.305520413542854e-07, - "loss": 0.9482, - "step": 9428 - }, - { - "epoch": 0.8503404428011002, - "grad_norm": 1.2680647140920553, - "learning_rate": 2.3027980733071018e-07, - "loss": 0.8633, - "step": 9429 - }, - { - "epoch": 0.8504306263245704, - "grad_norm": 0.6204088381274857, - "learning_rate": 2.3000772430942027e-07, - "loss": 0.76, - "step": 9430 - }, - { - "epoch": 0.8505208098480408, - "grad_norm": 1.4760861713355795, - "learning_rate": 2.2973579231363028e-07, - "loss": 0.9598, - "step": 9431 - }, - { - "epoch": 0.850610993371511, - "grad_norm": 1.4829374881721393, - "learning_rate": 2.2946401136654446e-07, - "loss": 0.9558, - "step": 9432 - }, - { - "epoch": 0.8507011768949813, - "grad_norm": 1.3721423364381418, - "learning_rate": 2.2919238149135077e-07, - "loss": 0.9694, - "step": 9433 - }, - { - "epoch": 0.8507913604184516, - "grad_norm": 1.3081115289642422, - "learning_rate": 2.289209027112282e-07, - "loss": 0.9379, - "step": 9434 - }, - { - "epoch": 0.8508815439419218, - "grad_norm": 1.4867549889819012, - "learning_rate": 2.2864957504933934e-07, - "loss": 0.994, - "step": 9435 - }, - { - "epoch": 0.8509717274653921, - "grad_norm": 2.932318480094066, - "learning_rate": 2.2837839852883589e-07, - "loss": 0.916, - "step": 9436 - }, - { - "epoch": 0.8510619109888623, - "grad_norm": 1.8788964792739065, - "learning_rate": 2.2810737317285623e-07, - "loss": 0.9414, - "step": 9437 - }, - { - "epoch": 0.8511520945123326, - "grad_norm": 1.2623452913275128, - "learning_rate": 2.278364990045254e-07, - "loss": 0.8744, - "step": 9438 - }, - { - "epoch": 0.8512422780358029, - "grad_norm": 1.692962987280008, - "learning_rate": 2.2756577604695625e-07, - "loss": 0.9211, - "step": 9439 - }, - { - "epoch": 0.8513324615592731, - "grad_norm": 1.2690999398794558, - "learning_rate": 2.2729520432324855e-07, - "loss": 0.9491, - "step": 9440 - }, - { - "epoch": 0.8514226450827433, - "grad_norm": 1.3668703855864854, - "learning_rate": 2.2702478385648826e-07, - "loss": 0.861, - "step": 9441 - }, - { - "epoch": 0.8515128286062137, - "grad_norm": 1.714397815603982, - "learning_rate": 2.2675451466974938e-07, - "loss": 0.9336, - "step": 9442 - }, - { - "epoch": 0.8516030121296839, - "grad_norm": 1.5097885228798038, - "learning_rate": 2.26484396786093e-07, - "loss": 0.867, - "step": 9443 - }, - { - "epoch": 0.8516931956531542, - "grad_norm": 0.6673509518166201, - "learning_rate": 2.2621443022856667e-07, - "loss": 0.8304, - "step": 9444 - }, - { - "epoch": 0.8517833791766244, - "grad_norm": 2.0096910053187607, - "learning_rate": 2.2594461502020646e-07, - "loss": 0.9689, - "step": 9445 - }, - { - "epoch": 0.8518735627000947, - "grad_norm": 1.6639548253428817, - "learning_rate": 2.2567495118403278e-07, - "loss": 0.8156, - "step": 9446 - }, - { - "epoch": 0.851963746223565, - "grad_norm": 1.3324709079994872, - "learning_rate": 2.254054387430566e-07, - "loss": 0.9514, - "step": 9447 - }, - { - "epoch": 0.8520539297470352, - "grad_norm": 1.4053908796722114, - "learning_rate": 2.2513607772027243e-07, - "loss": 0.9234, - "step": 9448 - }, - { - "epoch": 0.8521441132705054, - "grad_norm": 1.4362437522493332, - "learning_rate": 2.2486686813866562e-07, - "loss": 0.8967, - "step": 9449 - }, - { - "epoch": 0.8522342967939758, - "grad_norm": 1.4628053851689056, - "learning_rate": 2.2459781002120514e-07, - "loss": 0.9801, - "step": 9450 - }, - { - "epoch": 0.852324480317446, - "grad_norm": 1.4706131501050719, - "learning_rate": 2.243289033908491e-07, - "loss": 1.0034, - "step": 9451 - }, - { - "epoch": 0.8524146638409162, - "grad_norm": 1.4332482859789069, - "learning_rate": 2.2406014827054176e-07, - "loss": 0.9433, - "step": 9452 - }, - { - "epoch": 0.8525048473643865, - "grad_norm": 1.4900054955861173, - "learning_rate": 2.2379154468321525e-07, - "loss": 0.9993, - "step": 9453 - }, - { - "epoch": 0.8525950308878568, - "grad_norm": 1.277183172649373, - "learning_rate": 2.2352309265178793e-07, - "loss": 0.9716, - "step": 9454 - }, - { - "epoch": 0.852685214411327, - "grad_norm": 2.155581716278225, - "learning_rate": 2.2325479219916565e-07, - "loss": 1.0045, - "step": 9455 - }, - { - "epoch": 0.8527753979347973, - "grad_norm": 1.4636969795038992, - "learning_rate": 2.229866433482419e-07, - "loss": 0.9617, - "step": 9456 - }, - { - "epoch": 0.8528655814582676, - "grad_norm": 1.4250139235769963, - "learning_rate": 2.2271864612189552e-07, - "loss": 0.9768, - "step": 9457 - }, - { - "epoch": 0.8529557649817379, - "grad_norm": 1.403979311969958, - "learning_rate": 2.2245080054299415e-07, - "loss": 0.9699, - "step": 9458 - }, - { - "epoch": 0.8530459485052081, - "grad_norm": 1.3060403993354686, - "learning_rate": 2.2218310663439198e-07, - "loss": 0.8899, - "step": 9459 - }, - { - "epoch": 0.8531361320286783, - "grad_norm": 1.5182181914659676, - "learning_rate": 2.2191556441892968e-07, - "loss": 0.9565, - "step": 9460 - }, - { - "epoch": 0.8532263155521487, - "grad_norm": 1.3572407773193407, - "learning_rate": 2.216481739194358e-07, - "loss": 0.941, - "step": 9461 - }, - { - "epoch": 0.8533164990756189, - "grad_norm": 1.4561097859345833, - "learning_rate": 2.2138093515872592e-07, - "loss": 0.8795, - "step": 9462 - }, - { - "epoch": 0.8534066825990891, - "grad_norm": 1.2078124052494303, - "learning_rate": 2.2111384815960087e-07, - "loss": 1.0112, - "step": 9463 - }, - { - "epoch": 0.8534968661225594, - "grad_norm": 1.9804530397051623, - "learning_rate": 2.208469129448518e-07, - "loss": 0.8567, - "step": 9464 - }, - { - "epoch": 0.8535870496460297, - "grad_norm": 0.6948502107478142, - "learning_rate": 2.2058012953725357e-07, - "loss": 0.8042, - "step": 9465 - }, - { - "epoch": 0.8536772331695, - "grad_norm": 1.6220619343834655, - "learning_rate": 2.203134979595711e-07, - "loss": 0.9046, - "step": 9466 - }, - { - "epoch": 0.8537674166929702, - "grad_norm": 1.5152597675521617, - "learning_rate": 2.2004701823455374e-07, - "loss": 0.8856, - "step": 9467 - }, - { - "epoch": 0.8538576002164404, - "grad_norm": 2.8328736624240514, - "learning_rate": 2.1978069038493906e-07, - "loss": 0.9648, - "step": 9468 - }, - { - "epoch": 0.8539477837399108, - "grad_norm": 1.2127222997851483, - "learning_rate": 2.1951451443345225e-07, - "loss": 0.9421, - "step": 9469 - }, - { - "epoch": 0.854037967263381, - "grad_norm": 1.6899651076873763, - "learning_rate": 2.1924849040280425e-07, - "loss": 0.9262, - "step": 9470 - }, - { - "epoch": 0.8541281507868512, - "grad_norm": 2.037662771077205, - "learning_rate": 2.1898261831569465e-07, - "loss": 1.0173, - "step": 9471 - }, - { - "epoch": 0.8542183343103215, - "grad_norm": 1.835261313726371, - "learning_rate": 2.1871689819480798e-07, - "loss": 0.9461, - "step": 9472 - }, - { - "epoch": 0.8543085178337918, - "grad_norm": 1.397495997087072, - "learning_rate": 2.1845133006281745e-07, - "loss": 0.9177, - "step": 9473 - }, - { - "epoch": 0.854398701357262, - "grad_norm": 1.7602123239714054, - "learning_rate": 2.1818591394238294e-07, - "loss": 0.7943, - "step": 9474 - }, - { - "epoch": 0.8544888848807323, - "grad_norm": 2.079590818248627, - "learning_rate": 2.1792064985615076e-07, - "loss": 0.9231, - "step": 9475 - }, - { - "epoch": 0.8545790684042025, - "grad_norm": 1.2899229472457794, - "learning_rate": 2.1765553782675528e-07, - "loss": 0.9103, - "step": 9476 - }, - { - "epoch": 0.8546692519276728, - "grad_norm": 1.2806648220649341, - "learning_rate": 2.1739057787681703e-07, - "loss": 1.0088, - "step": 9477 - }, - { - "epoch": 0.8547594354511431, - "grad_norm": 2.019172680896089, - "learning_rate": 2.1712577002894372e-07, - "loss": 0.9584, - "step": 9478 - }, - { - "epoch": 0.8548496189746133, - "grad_norm": 2.1620381385763507, - "learning_rate": 2.1686111430573105e-07, - "loss": 0.911, - "step": 9479 - }, - { - "epoch": 0.8549398024980835, - "grad_norm": 1.4067300612882456, - "learning_rate": 2.165966107297592e-07, - "loss": 0.9884, - "step": 9480 - }, - { - "epoch": 0.8550299860215539, - "grad_norm": 1.4923994303290946, - "learning_rate": 2.16332259323599e-07, - "loss": 0.9686, - "step": 9481 - }, - { - "epoch": 0.8551201695450241, - "grad_norm": 1.5658912477987932, - "learning_rate": 2.1606806010980504e-07, - "loss": 0.8857, - "step": 9482 - }, - { - "epoch": 0.8552103530684944, - "grad_norm": 1.4462426569034308, - "learning_rate": 2.1580401311092067e-07, - "loss": 0.9348, - "step": 9483 - }, - { - "epoch": 0.8553005365919647, - "grad_norm": 1.2392896523379704, - "learning_rate": 2.1554011834947604e-07, - "loss": 0.9275, - "step": 9484 - }, - { - "epoch": 0.8553907201154349, - "grad_norm": 1.5803721829761663, - "learning_rate": 2.1527637584798764e-07, - "loss": 0.964, - "step": 9485 - }, - { - "epoch": 0.8554809036389052, - "grad_norm": 1.5582939758362553, - "learning_rate": 2.150127856289603e-07, - "loss": 1.0023, - "step": 9486 - }, - { - "epoch": 0.8555710871623754, - "grad_norm": 0.6435240534204636, - "learning_rate": 2.1474934771488363e-07, - "loss": 0.7854, - "step": 9487 - }, - { - "epoch": 0.8556612706858457, - "grad_norm": 1.4415968688797474, - "learning_rate": 2.1448606212823715e-07, - "loss": 0.9811, - "step": 9488 - }, - { - "epoch": 0.855751454209316, - "grad_norm": 1.6551753462362622, - "learning_rate": 2.1422292889148452e-07, - "loss": 0.9212, - "step": 9489 - }, - { - "epoch": 0.8558416377327862, - "grad_norm": 1.401013266501284, - "learning_rate": 2.139599480270784e-07, - "loss": 0.9549, - "step": 9490 - }, - { - "epoch": 0.8559318212562564, - "grad_norm": 1.7419543602107521, - "learning_rate": 2.1369711955745773e-07, - "loss": 0.9234, - "step": 9491 - }, - { - "epoch": 0.8560220047797268, - "grad_norm": 9.32415860959344, - "learning_rate": 2.1343444350504813e-07, - "loss": 0.8868, - "step": 9492 - }, - { - "epoch": 0.856112188303197, - "grad_norm": 1.388323905259474, - "learning_rate": 2.1317191989226302e-07, - "loss": 0.9413, - "step": 9493 - }, - { - "epoch": 0.8562023718266673, - "grad_norm": 1.3728709209888292, - "learning_rate": 2.129095487415027e-07, - "loss": 0.9207, - "step": 9494 - }, - { - "epoch": 0.8562925553501375, - "grad_norm": 1.9865797515499317, - "learning_rate": 2.1264733007515257e-07, - "loss": 0.9499, - "step": 9495 - }, - { - "epoch": 0.8563827388736078, - "grad_norm": 1.4920866767414767, - "learning_rate": 2.1238526391558852e-07, - "loss": 0.9206, - "step": 9496 - }, - { - "epoch": 0.8564729223970781, - "grad_norm": 1.7756826834085482, - "learning_rate": 2.1212335028517003e-07, - "loss": 1.0743, - "step": 9497 - }, - { - "epoch": 0.8565631059205483, - "grad_norm": 1.4370186041130069, - "learning_rate": 2.1186158920624563e-07, - "loss": 0.9468, - "step": 9498 - }, - { - "epoch": 0.8566532894440185, - "grad_norm": 1.5829624779038245, - "learning_rate": 2.1159998070115015e-07, - "loss": 0.9384, - "step": 9499 - }, - { - "epoch": 0.8567434729674889, - "grad_norm": 1.5174797743026036, - "learning_rate": 2.113385247922055e-07, - "loss": 0.9219, - "step": 9500 - }, - { - "epoch": 0.8568336564909591, - "grad_norm": 1.4318337095610394, - "learning_rate": 2.1107722150172068e-07, - "loss": 0.9367, - "step": 9501 - }, - { - "epoch": 0.8569238400144293, - "grad_norm": 1.318124391312366, - "learning_rate": 2.108160708519906e-07, - "loss": 0.9798, - "step": 9502 - }, - { - "epoch": 0.8570140235378996, - "grad_norm": 1.5611096872358665, - "learning_rate": 2.1055507286529984e-07, - "loss": 0.9149, - "step": 9503 - }, - { - "epoch": 0.8571042070613699, - "grad_norm": 3.8555250231376434, - "learning_rate": 2.1029422756391612e-07, - "loss": 0.9628, - "step": 9504 - }, - { - "epoch": 0.8571943905848401, - "grad_norm": 1.4125846601113192, - "learning_rate": 2.1003353497009812e-07, - "loss": 0.8553, - "step": 9505 - }, - { - "epoch": 0.8572845741083104, - "grad_norm": 1.3631713298216155, - "learning_rate": 2.0977299510608825e-07, - "loss": 0.8626, - "step": 9506 - }, - { - "epoch": 0.8573747576317807, - "grad_norm": 1.3417677785947575, - "learning_rate": 2.0951260799411784e-07, - "loss": 0.9728, - "step": 9507 - }, - { - "epoch": 0.857464941155251, - "grad_norm": 1.6597950261888308, - "learning_rate": 2.0925237365640424e-07, - "loss": 0.9268, - "step": 9508 - }, - { - "epoch": 0.8575551246787212, - "grad_norm": 0.6350990006850569, - "learning_rate": 2.0899229211515211e-07, - "loss": 0.8109, - "step": 9509 - }, - { - "epoch": 0.8576453082021914, - "grad_norm": 1.4774504721355384, - "learning_rate": 2.0873236339255306e-07, - "loss": 0.8618, - "step": 9510 - }, - { - "epoch": 0.8577354917256618, - "grad_norm": 2.3076965344219134, - "learning_rate": 2.0847258751078644e-07, - "loss": 0.88, - "step": 9511 - }, - { - "epoch": 0.857825675249132, - "grad_norm": 1.8124127202559388, - "learning_rate": 2.082129644920163e-07, - "loss": 0.916, - "step": 9512 - }, - { - "epoch": 0.8579158587726022, - "grad_norm": 1.7032492356090772, - "learning_rate": 2.0795349435839605e-07, - "loss": 0.9474, - "step": 9513 - }, - { - "epoch": 0.8580060422960725, - "grad_norm": 1.6316813516134592, - "learning_rate": 2.0769417713206484e-07, - "loss": 0.9113, - "step": 9514 - }, - { - "epoch": 0.8580962258195428, - "grad_norm": 1.1373589145232725, - "learning_rate": 2.074350128351492e-07, - "loss": 0.9981, - "step": 9515 - }, - { - "epoch": 0.858186409343013, - "grad_norm": 1.4291454183854384, - "learning_rate": 2.0717600148976256e-07, - "loss": 0.854, - "step": 9516 - }, - { - "epoch": 0.8582765928664833, - "grad_norm": 1.4468082420816757, - "learning_rate": 2.0691714311800436e-07, - "loss": 0.8703, - "step": 9517 - }, - { - "epoch": 0.8583667763899535, - "grad_norm": 1.4668618097675281, - "learning_rate": 2.066584377419631e-07, - "loss": 0.9995, - "step": 9518 - }, - { - "epoch": 0.8584569599134239, - "grad_norm": 1.3432338077996981, - "learning_rate": 2.0639988538371167e-07, - "loss": 0.9588, - "step": 9519 - }, - { - "epoch": 0.8585471434368941, - "grad_norm": 1.5220747892925068, - "learning_rate": 2.0614148606531258e-07, - "loss": 0.9035, - "step": 9520 - }, - { - "epoch": 0.8586373269603643, - "grad_norm": 1.8214137466746312, - "learning_rate": 2.0588323980881285e-07, - "loss": 0.9551, - "step": 9521 - }, - { - "epoch": 0.8587275104838346, - "grad_norm": 1.28373468035181, - "learning_rate": 2.0562514663624752e-07, - "loss": 0.9197, - "step": 9522 - }, - { - "epoch": 0.8588176940073049, - "grad_norm": 1.3478936027821637, - "learning_rate": 2.0536720656963902e-07, - "loss": 0.9966, - "step": 9523 - }, - { - "epoch": 0.8589078775307751, - "grad_norm": 10.057068679895197, - "learning_rate": 2.051094196309957e-07, - "loss": 0.959, - "step": 9524 - }, - { - "epoch": 0.8589980610542454, - "grad_norm": 1.3787019054086187, - "learning_rate": 2.0485178584231378e-07, - "loss": 0.8971, - "step": 9525 - }, - { - "epoch": 0.8590882445777156, - "grad_norm": 0.692565205255049, - "learning_rate": 2.0459430522557587e-07, - "loss": 0.8033, - "step": 9526 - }, - { - "epoch": 0.8591784281011859, - "grad_norm": 1.7487421965135237, - "learning_rate": 2.0433697780275195e-07, - "loss": 0.9415, - "step": 9527 - }, - { - "epoch": 0.8592686116246562, - "grad_norm": 1.2444046674532343, - "learning_rate": 2.040798035957978e-07, - "loss": 0.958, - "step": 9528 - }, - { - "epoch": 0.8593587951481264, - "grad_norm": 0.7146776767284131, - "learning_rate": 2.038227826266574e-07, - "loss": 0.8289, - "step": 9529 - }, - { - "epoch": 0.8594489786715968, - "grad_norm": 1.6791281545054653, - "learning_rate": 2.0356591491726126e-07, - "loss": 0.9407, - "step": 9530 - }, - { - "epoch": 0.859539162195067, - "grad_norm": 1.2482474916548674, - "learning_rate": 2.033092004895267e-07, - "loss": 0.9761, - "step": 9531 - }, - { - "epoch": 0.8596293457185372, - "grad_norm": 1.3398631903569842, - "learning_rate": 2.03052639365358e-07, - "loss": 0.9501, - "step": 9532 - }, - { - "epoch": 0.8597195292420075, - "grad_norm": 1.3373744442199926, - "learning_rate": 2.0279623156664694e-07, - "loss": 0.9523, - "step": 9533 - }, - { - "epoch": 0.8598097127654778, - "grad_norm": 1.3346065114930747, - "learning_rate": 2.0253997711527005e-07, - "loss": 1.0005, - "step": 9534 - }, - { - "epoch": 0.859899896288948, - "grad_norm": 1.6031769593501939, - "learning_rate": 2.0228387603309428e-07, - "loss": 0.9957, - "step": 9535 - }, - { - "epoch": 0.8599900798124183, - "grad_norm": 1.3716521253699574, - "learning_rate": 2.0202792834197035e-07, - "loss": 0.955, - "step": 9536 - }, - { - "epoch": 0.8600802633358885, - "grad_norm": 1.5955189630140032, - "learning_rate": 2.017721340637375e-07, - "loss": 0.8723, - "step": 9537 - }, - { - "epoch": 0.8601704468593588, - "grad_norm": 1.4945323156421808, - "learning_rate": 2.0151649322022134e-07, - "loss": 0.925, - "step": 9538 - }, - { - "epoch": 0.8602606303828291, - "grad_norm": 1.9099982884783842, - "learning_rate": 2.012610058332349e-07, - "loss": 0.7485, - "step": 9539 - }, - { - "epoch": 0.8603508139062993, - "grad_norm": 0.7146583691970528, - "learning_rate": 2.010056719245774e-07, - "loss": 0.8009, - "step": 9540 - }, - { - "epoch": 0.8604409974297695, - "grad_norm": 1.5013191462592357, - "learning_rate": 2.0075049151603563e-07, - "loss": 0.9528, - "step": 9541 - }, - { - "epoch": 0.8605311809532399, - "grad_norm": 1.420308854000838, - "learning_rate": 2.0049546462938326e-07, - "loss": 0.9463, - "step": 9542 - }, - { - "epoch": 0.8606213644767101, - "grad_norm": 1.5207310820742819, - "learning_rate": 2.0024059128637961e-07, - "loss": 0.9543, - "step": 9543 - }, - { - "epoch": 0.8607115480001803, - "grad_norm": 1.297034012571315, - "learning_rate": 1.99985871508773e-07, - "loss": 0.7999, - "step": 9544 - }, - { - "epoch": 0.8608017315236506, - "grad_norm": 1.4284314751190264, - "learning_rate": 1.9973130531829674e-07, - "loss": 0.9674, - "step": 9545 - }, - { - "epoch": 0.8608919150471209, - "grad_norm": 1.5521961742916768, - "learning_rate": 1.994768927366721e-07, - "loss": 0.895, - "step": 9546 - }, - { - "epoch": 0.8609820985705912, - "grad_norm": 1.5811203583055022, - "learning_rate": 1.992226337856069e-07, - "loss": 0.9059, - "step": 9547 - }, - { - "epoch": 0.8610722820940614, - "grad_norm": 1.415709237461842, - "learning_rate": 1.9896852848679592e-07, - "loss": 0.89, - "step": 9548 - }, - { - "epoch": 0.8611624656175316, - "grad_norm": 1.3836820207005998, - "learning_rate": 1.9871457686192094e-07, - "loss": 0.9499, - "step": 9549 - }, - { - "epoch": 0.861252649141002, - "grad_norm": 1.9443735821239632, - "learning_rate": 1.984607789326509e-07, - "loss": 0.9259, - "step": 9550 - }, - { - "epoch": 0.8613428326644722, - "grad_norm": 1.447128392352889, - "learning_rate": 1.982071347206402e-07, - "loss": 0.8889, - "step": 9551 - }, - { - "epoch": 0.8614330161879424, - "grad_norm": 1.2956200928141974, - "learning_rate": 1.9795364424753202e-07, - "loss": 0.9169, - "step": 9552 - }, - { - "epoch": 0.8615231997114128, - "grad_norm": 1.9012755228369753, - "learning_rate": 1.9770030753495505e-07, - "loss": 0.9034, - "step": 9553 - }, - { - "epoch": 0.861613383234883, - "grad_norm": 1.521193823047488, - "learning_rate": 1.9744712460452573e-07, - "loss": 0.8897, - "step": 9554 - }, - { - "epoch": 0.8617035667583532, - "grad_norm": 1.3616874144073443, - "learning_rate": 1.9719409547784703e-07, - "loss": 0.9173, - "step": 9555 - }, - { - "epoch": 0.8617937502818235, - "grad_norm": 2.28626963561622, - "learning_rate": 1.9694122017650837e-07, - "loss": 0.9582, - "step": 9556 - }, - { - "epoch": 0.8618839338052938, - "grad_norm": 1.619196210949313, - "learning_rate": 1.9668849872208738e-07, - "loss": 0.9487, - "step": 9557 - }, - { - "epoch": 0.8619741173287641, - "grad_norm": 1.2470027274963849, - "learning_rate": 1.9643593113614632e-07, - "loss": 0.7843, - "step": 9558 - }, - { - "epoch": 0.8620643008522343, - "grad_norm": 1.3335517863290083, - "learning_rate": 1.961835174402371e-07, - "loss": 0.9787, - "step": 9559 - }, - { - "epoch": 0.8621544843757045, - "grad_norm": 1.7507468401052242, - "learning_rate": 1.9593125765589535e-07, - "loss": 0.9103, - "step": 9560 - }, - { - "epoch": 0.8622446678991749, - "grad_norm": 2.1900424895671815, - "learning_rate": 1.9567915180464721e-07, - "loss": 0.8821, - "step": 9561 - }, - { - "epoch": 0.8623348514226451, - "grad_norm": 2.244380928101833, - "learning_rate": 1.9542719990800217e-07, - "loss": 0.9243, - "step": 9562 - }, - { - "epoch": 0.8624250349461153, - "grad_norm": 1.5062958199904017, - "learning_rate": 1.9517540198745896e-07, - "loss": 0.9979, - "step": 9563 - }, - { - "epoch": 0.8625152184695856, - "grad_norm": 1.8615643090250045, - "learning_rate": 1.94923758064502e-07, - "loss": 0.9249, - "step": 9564 - }, - { - "epoch": 0.8626054019930559, - "grad_norm": 1.297734263151813, - "learning_rate": 1.9467226816060322e-07, - "loss": 0.9044, - "step": 9565 - }, - { - "epoch": 0.8626955855165261, - "grad_norm": 1.6811211433806053, - "learning_rate": 1.9442093229722122e-07, - "loss": 0.8816, - "step": 9566 - }, - { - "epoch": 0.8627857690399964, - "grad_norm": 1.7117057016311483, - "learning_rate": 1.9416975049580085e-07, - "loss": 1.0114, - "step": 9567 - }, - { - "epoch": 0.8628759525634666, - "grad_norm": 1.5890303646953088, - "learning_rate": 1.9391872277777456e-07, - "loss": 0.9206, - "step": 9568 - }, - { - "epoch": 0.862966136086937, - "grad_norm": 1.672601844868503, - "learning_rate": 1.9366784916456158e-07, - "loss": 0.9329, - "step": 9569 - }, - { - "epoch": 0.8630563196104072, - "grad_norm": 1.415967216979672, - "learning_rate": 1.9341712967756774e-07, - "loss": 0.8942, - "step": 9570 - }, - { - "epoch": 0.8631465031338774, - "grad_norm": 1.3952814216185623, - "learning_rate": 1.9316656433818566e-07, - "loss": 0.8537, - "step": 9571 - }, - { - "epoch": 0.8632366866573477, - "grad_norm": 1.4282633875068027, - "learning_rate": 1.929161531677954e-07, - "loss": 0.8337, - "step": 9572 - }, - { - "epoch": 0.863326870180818, - "grad_norm": 1.4491742479426926, - "learning_rate": 1.9266589618776251e-07, - "loss": 0.8892, - "step": 9573 - }, - { - "epoch": 0.8634170537042882, - "grad_norm": 1.5646845995169685, - "learning_rate": 1.924157934194417e-07, - "loss": 0.9854, - "step": 9574 - }, - { - "epoch": 0.8635072372277585, - "grad_norm": 1.4903430024199331, - "learning_rate": 1.9216584488417142e-07, - "loss": 0.8243, - "step": 9575 - }, - { - "epoch": 0.8635974207512288, - "grad_norm": 1.6404461717782242, - "learning_rate": 1.919160506032802e-07, - "loss": 0.9866, - "step": 9576 - }, - { - "epoch": 0.863687604274699, - "grad_norm": 1.3235855163887336, - "learning_rate": 1.916664105980812e-07, - "loss": 0.9938, - "step": 9577 - }, - { - "epoch": 0.8637777877981693, - "grad_norm": 1.4948942390720295, - "learning_rate": 1.914169248898747e-07, - "loss": 0.9724, - "step": 9578 - }, - { - "epoch": 0.8638679713216395, - "grad_norm": 1.3021727671109058, - "learning_rate": 1.9116759349994882e-07, - "loss": 0.8509, - "step": 9579 - }, - { - "epoch": 0.8639581548451098, - "grad_norm": 1.3133690545425039, - "learning_rate": 1.9091841644957763e-07, - "loss": 0.9622, - "step": 9580 - }, - { - "epoch": 0.8640483383685801, - "grad_norm": 1.2081321461126275, - "learning_rate": 1.9066939376002278e-07, - "loss": 1.0184, - "step": 9581 - }, - { - "epoch": 0.8641385218920503, - "grad_norm": 1.8331922419038236, - "learning_rate": 1.9042052545253085e-07, - "loss": 0.8543, - "step": 9582 - }, - { - "epoch": 0.8642287054155205, - "grad_norm": 1.483080490222468, - "learning_rate": 1.901718115483384e-07, - "loss": 0.9151, - "step": 9583 - }, - { - "epoch": 0.8643188889389909, - "grad_norm": 1.2988486381574431, - "learning_rate": 1.8992325206866598e-07, - "loss": 0.92, - "step": 9584 - }, - { - "epoch": 0.8644090724624611, - "grad_norm": 1.2073420152322187, - "learning_rate": 1.8967484703472225e-07, - "loss": 0.9097, - "step": 9585 - }, - { - "epoch": 0.8644992559859314, - "grad_norm": 1.1968982126559657, - "learning_rate": 1.8942659646770288e-07, - "loss": 0.9774, - "step": 9586 - }, - { - "epoch": 0.8645894395094016, - "grad_norm": 1.7284815553227824, - "learning_rate": 1.8917850038878936e-07, - "loss": 1.0025, - "step": 9587 - }, - { - "epoch": 0.8646796230328719, - "grad_norm": 1.464854085249998, - "learning_rate": 1.8893055881915121e-07, - "loss": 0.8434, - "step": 9588 - }, - { - "epoch": 0.8647698065563422, - "grad_norm": 1.492898102770084, - "learning_rate": 1.886827717799442e-07, - "loss": 0.8901, - "step": 9589 - }, - { - "epoch": 0.8648599900798124, - "grad_norm": 1.5162303541503592, - "learning_rate": 1.884351392923096e-07, - "loss": 0.9514, - "step": 9590 - }, - { - "epoch": 0.8649501736032826, - "grad_norm": 1.8216744759134267, - "learning_rate": 1.8818766137737896e-07, - "loss": 0.9753, - "step": 9591 - }, - { - "epoch": 0.865040357126753, - "grad_norm": 1.9477100228706834, - "learning_rate": 1.8794033805626653e-07, - "loss": 0.9134, - "step": 9592 - }, - { - "epoch": 0.8651305406502232, - "grad_norm": 1.3988652347690507, - "learning_rate": 1.876931693500763e-07, - "loss": 0.9285, - "step": 9593 - }, - { - "epoch": 0.8652207241736934, - "grad_norm": 1.8190799846375731, - "learning_rate": 1.8744615527989783e-07, - "loss": 0.7875, - "step": 9594 - }, - { - "epoch": 0.8653109076971637, - "grad_norm": 1.671813108065115, - "learning_rate": 1.871992958668076e-07, - "loss": 1.0182, - "step": 9595 - }, - { - "epoch": 0.865401091220634, - "grad_norm": 1.5184932978447305, - "learning_rate": 1.8695259113186944e-07, - "loss": 0.9722, - "step": 9596 - }, - { - "epoch": 0.8654912747441043, - "grad_norm": 1.3168525731549237, - "learning_rate": 1.8670604109613252e-07, - "loss": 0.9013, - "step": 9597 - }, - { - "epoch": 0.8655814582675745, - "grad_norm": 1.252788057000806, - "learning_rate": 1.8645964578063533e-07, - "loss": 0.9796, - "step": 9598 - }, - { - "epoch": 0.8656716417910447, - "grad_norm": 1.4013722064097842, - "learning_rate": 1.862134052064006e-07, - "loss": 0.9888, - "step": 9599 - }, - { - "epoch": 0.8657618253145151, - "grad_norm": 2.073882397178456, - "learning_rate": 1.8596731939443932e-07, - "loss": 0.9733, - "step": 9600 - }, - { - "epoch": 0.8658520088379853, - "grad_norm": 1.4185872896301344, - "learning_rate": 1.857213883657487e-07, - "loss": 0.9604, - "step": 9601 - }, - { - "epoch": 0.8659421923614555, - "grad_norm": 0.6538591429447236, - "learning_rate": 1.8547561214131303e-07, - "loss": 0.7728, - "step": 9602 - }, - { - "epoch": 0.8660323758849259, - "grad_norm": 1.5551804570916756, - "learning_rate": 1.8522999074210355e-07, - "loss": 1.0184, - "step": 9603 - }, - { - "epoch": 0.8661225594083961, - "grad_norm": 1.5047002108503478, - "learning_rate": 1.849845241890775e-07, - "loss": 0.9768, - "step": 9604 - }, - { - "epoch": 0.8662127429318663, - "grad_norm": 1.5412777676521243, - "learning_rate": 1.8473921250317992e-07, - "loss": 0.9415, - "step": 9605 - }, - { - "epoch": 0.8663029264553366, - "grad_norm": 1.781777755454752, - "learning_rate": 1.8449405570534225e-07, - "loss": 0.869, - "step": 9606 - }, - { - "epoch": 0.8663931099788069, - "grad_norm": 1.975313176301473, - "learning_rate": 1.8424905381648204e-07, - "loss": 0.9747, - "step": 9607 - }, - { - "epoch": 0.8664832935022772, - "grad_norm": 1.2972734138050488, - "learning_rate": 1.8400420685750452e-07, - "loss": 0.8913, - "step": 9608 - }, - { - "epoch": 0.8665734770257474, - "grad_norm": 1.4078344108409122, - "learning_rate": 1.8375951484930142e-07, - "loss": 0.8722, - "step": 9609 - }, - { - "epoch": 0.8666636605492176, - "grad_norm": 1.3575433690430307, - "learning_rate": 1.8351497781275094e-07, - "loss": 0.935, - "step": 9610 - }, - { - "epoch": 0.866753844072688, - "grad_norm": 1.4795125726988763, - "learning_rate": 1.8327059576871907e-07, - "loss": 0.9221, - "step": 9611 - }, - { - "epoch": 0.8668440275961582, - "grad_norm": 1.2453509661047668, - "learning_rate": 1.8302636873805665e-07, - "loss": 0.9826, - "step": 9612 - }, - { - "epoch": 0.8669342111196284, - "grad_norm": 1.4377614488432728, - "learning_rate": 1.8278229674160373e-07, - "loss": 1.0088, - "step": 9613 - }, - { - "epoch": 0.8670243946430987, - "grad_norm": 1.3374937930809816, - "learning_rate": 1.825383798001845e-07, - "loss": 0.8689, - "step": 9614 - }, - { - "epoch": 0.867114578166569, - "grad_norm": 1.3030160487892715, - "learning_rate": 1.8229461793461297e-07, - "loss": 0.8784, - "step": 9615 - }, - { - "epoch": 0.8672047616900392, - "grad_norm": 1.3514169603607662, - "learning_rate": 1.8205101116568698e-07, - "loss": 0.9468, - "step": 9616 - }, - { - "epoch": 0.8672949452135095, - "grad_norm": 1.927246217459459, - "learning_rate": 1.818075595141928e-07, - "loss": 0.9872, - "step": 9617 - }, - { - "epoch": 0.8673851287369797, - "grad_norm": 1.4975123440292637, - "learning_rate": 1.8156426300090288e-07, - "loss": 0.9406, - "step": 9618 - }, - { - "epoch": 0.86747531226045, - "grad_norm": 1.4763207616166685, - "learning_rate": 1.8132112164657686e-07, - "loss": 0.9297, - "step": 9619 - }, - { - "epoch": 0.8675654957839203, - "grad_norm": 1.4378257879859055, - "learning_rate": 1.8107813547196106e-07, - "loss": 0.8266, - "step": 9620 - }, - { - "epoch": 0.8676556793073905, - "grad_norm": 1.49372361291539, - "learning_rate": 1.8083530449778817e-07, - "loss": 0.9711, - "step": 9621 - }, - { - "epoch": 0.8677458628308607, - "grad_norm": 1.3331164193536715, - "learning_rate": 1.8059262874477787e-07, - "loss": 0.8512, - "step": 9622 - }, - { - "epoch": 0.8678360463543311, - "grad_norm": 1.5874642976365647, - "learning_rate": 1.8035010823363627e-07, - "loss": 0.9267, - "step": 9623 - }, - { - "epoch": 0.8679262298778013, - "grad_norm": 1.4010026304722665, - "learning_rate": 1.8010774298505705e-07, - "loss": 0.9736, - "step": 9624 - }, - { - "epoch": 0.8680164134012716, - "grad_norm": 2.1864886534540604, - "learning_rate": 1.7986553301972007e-07, - "loss": 1.0276, - "step": 9625 - }, - { - "epoch": 0.8681065969247419, - "grad_norm": 1.8000736459924922, - "learning_rate": 1.7962347835829171e-07, - "loss": 0.9299, - "step": 9626 - }, - { - "epoch": 0.8681967804482121, - "grad_norm": 1.2552576121342762, - "learning_rate": 1.793815790214257e-07, - "loss": 0.9468, - "step": 9627 - }, - { - "epoch": 0.8682869639716824, - "grad_norm": 1.517713071893832, - "learning_rate": 1.791398350297626e-07, - "loss": 0.8922, - "step": 9628 - }, - { - "epoch": 0.8683771474951526, - "grad_norm": 1.3334405329446068, - "learning_rate": 1.7889824640392813e-07, - "loss": 0.9393, - "step": 9629 - }, - { - "epoch": 0.868467331018623, - "grad_norm": 1.4792603566044282, - "learning_rate": 1.7865681316453741e-07, - "loss": 0.9189, - "step": 9630 - }, - { - "epoch": 0.8685575145420932, - "grad_norm": 1.4657071023487225, - "learning_rate": 1.7841553533218968e-07, - "loss": 0.9251, - "step": 9631 - }, - { - "epoch": 0.8686476980655634, - "grad_norm": 1.5225162165738086, - "learning_rate": 1.7817441292747292e-07, - "loss": 1.0352, - "step": 9632 - }, - { - "epoch": 0.8687378815890336, - "grad_norm": 1.6516782173319986, - "learning_rate": 1.779334459709607e-07, - "loss": 0.9232, - "step": 9633 - }, - { - "epoch": 0.868828065112504, - "grad_norm": 1.889239915163373, - "learning_rate": 1.7769263448321347e-07, - "loss": 0.9576, - "step": 9634 - }, - { - "epoch": 0.8689182486359742, - "grad_norm": 1.3591105762994447, - "learning_rate": 1.7745197848477879e-07, - "loss": 0.9265, - "step": 9635 - }, - { - "epoch": 0.8690084321594445, - "grad_norm": 1.7237505907662136, - "learning_rate": 1.7721147799619063e-07, - "loss": 1.0318, - "step": 9636 - }, - { - "epoch": 0.8690986156829147, - "grad_norm": 0.6038751117924922, - "learning_rate": 1.769711330379704e-07, - "loss": 0.7662, - "step": 9637 - }, - { - "epoch": 0.869188799206385, - "grad_norm": 1.6650627604389456, - "learning_rate": 1.767309436306248e-07, - "loss": 0.9797, - "step": 9638 - }, - { - "epoch": 0.8692789827298553, - "grad_norm": 1.4458809224559253, - "learning_rate": 1.764909097946483e-07, - "loss": 0.9999, - "step": 9639 - }, - { - "epoch": 0.8693691662533255, - "grad_norm": 2.0666155357598672, - "learning_rate": 1.7625103155052236e-07, - "loss": 1.0062, - "step": 9640 - }, - { - "epoch": 0.8694593497767957, - "grad_norm": 1.8231345636838177, - "learning_rate": 1.760113089187143e-07, - "loss": 0.9421, - "step": 9641 - }, - { - "epoch": 0.8695495333002661, - "grad_norm": 1.4679316411954122, - "learning_rate": 1.7577174191967868e-07, - "loss": 0.9613, - "step": 9642 - }, - { - "epoch": 0.8696397168237363, - "grad_norm": 1.6236997684405998, - "learning_rate": 1.755323305738574e-07, - "loss": 0.9477, - "step": 9643 - }, - { - "epoch": 0.8697299003472065, - "grad_norm": 1.237359651640884, - "learning_rate": 1.7529307490167677e-07, - "loss": 0.9709, - "step": 9644 - }, - { - "epoch": 0.8698200838706768, - "grad_norm": 1.6949930197647989, - "learning_rate": 1.7505397492355288e-07, - "loss": 0.9157, - "step": 9645 - }, - { - "epoch": 0.8699102673941471, - "grad_norm": 1.387099866477284, - "learning_rate": 1.7481503065988589e-07, - "loss": 0.9694, - "step": 9646 - }, - { - "epoch": 0.8700004509176174, - "grad_norm": 1.1476597650213147, - "learning_rate": 1.7457624213106526e-07, - "loss": 0.9022, - "step": 9647 - }, - { - "epoch": 0.8700906344410876, - "grad_norm": 1.702291582208928, - "learning_rate": 1.7433760935746465e-07, - "loss": 0.9411, - "step": 9648 - }, - { - "epoch": 0.8701808179645579, - "grad_norm": 1.4252488906071816, - "learning_rate": 1.740991323594456e-07, - "loss": 0.8824, - "step": 9649 - }, - { - "epoch": 0.8702710014880282, - "grad_norm": 1.3976552469624162, - "learning_rate": 1.7386081115735651e-07, - "loss": 0.9112, - "step": 9650 - }, - { - "epoch": 0.8703611850114984, - "grad_norm": 1.486945245982777, - "learning_rate": 1.736226457715324e-07, - "loss": 0.8997, - "step": 9651 - }, - { - "epoch": 0.8704513685349686, - "grad_norm": 1.2910551910570227, - "learning_rate": 1.7338463622229505e-07, - "loss": 0.8905, - "step": 9652 - }, - { - "epoch": 0.870541552058439, - "grad_norm": 1.492346310777597, - "learning_rate": 1.7314678252995152e-07, - "loss": 0.8879, - "step": 9653 - }, - { - "epoch": 0.8706317355819092, - "grad_norm": 1.3468272880016017, - "learning_rate": 1.7290908471479847e-07, - "loss": 0.8735, - "step": 9654 - }, - { - "epoch": 0.8707219191053794, - "grad_norm": 1.4897865032365645, - "learning_rate": 1.7267154279711637e-07, - "loss": 0.933, - "step": 9655 - }, - { - "epoch": 0.8708121026288497, - "grad_norm": 1.2296931653213385, - "learning_rate": 1.724341567971741e-07, - "loss": 0.9847, - "step": 9656 - }, - { - "epoch": 0.87090228615232, - "grad_norm": 1.324826846414754, - "learning_rate": 1.7219692673522657e-07, - "loss": 1.0266, - "step": 9657 - }, - { - "epoch": 0.8709924696757902, - "grad_norm": 1.3448854572773496, - "learning_rate": 1.7195985263151558e-07, - "loss": 0.959, - "step": 9658 - }, - { - "epoch": 0.8710826531992605, - "grad_norm": 1.7296421668806703, - "learning_rate": 1.7172293450626985e-07, - "loss": 0.979, - "step": 9659 - }, - { - "epoch": 0.8711728367227307, - "grad_norm": 1.5255684840491186, - "learning_rate": 1.7148617237970475e-07, - "loss": 0.9367, - "step": 9660 - }, - { - "epoch": 0.8712630202462011, - "grad_norm": 1.2292915647165947, - "learning_rate": 1.7124956627202102e-07, - "loss": 0.8933, - "step": 9661 - }, - { - "epoch": 0.8713532037696713, - "grad_norm": 1.7222999671424393, - "learning_rate": 1.7101311620340852e-07, - "loss": 0.9567, - "step": 9662 - }, - { - "epoch": 0.8714433872931415, - "grad_norm": 1.5654252707263798, - "learning_rate": 1.7077682219404155e-07, - "loss": 0.8739, - "step": 9663 - }, - { - "epoch": 0.8715335708166118, - "grad_norm": 1.7600368413537153, - "learning_rate": 1.705406842640824e-07, - "loss": 1.0633, - "step": 9664 - }, - { - "epoch": 0.8716237543400821, - "grad_norm": 1.265292512524302, - "learning_rate": 1.7030470243367946e-07, - "loss": 0.9242, - "step": 9665 - }, - { - "epoch": 0.8717139378635523, - "grad_norm": 1.2552992074237486, - "learning_rate": 1.7006887672296834e-07, - "loss": 0.8588, - "step": 9666 - }, - { - "epoch": 0.8718041213870226, - "grad_norm": 1.5637825851643603, - "learning_rate": 1.6983320715207094e-07, - "loss": 0.8876, - "step": 9667 - }, - { - "epoch": 0.8718943049104928, - "grad_norm": 1.3074898689854904, - "learning_rate": 1.6959769374109523e-07, - "loss": 0.9542, - "step": 9668 - }, - { - "epoch": 0.8719844884339631, - "grad_norm": 1.4020107798440822, - "learning_rate": 1.6936233651013754e-07, - "loss": 0.8706, - "step": 9669 - }, - { - "epoch": 0.8720746719574334, - "grad_norm": 1.346097226838566, - "learning_rate": 1.691271354792787e-07, - "loss": 0.9625, - "step": 9670 - }, - { - "epoch": 0.8721648554809036, - "grad_norm": 1.3372451004563293, - "learning_rate": 1.6889209066858866e-07, - "loss": 0.8169, - "step": 9671 - }, - { - "epoch": 0.872255039004374, - "grad_norm": 1.5415321325763471, - "learning_rate": 1.6865720209812185e-07, - "loss": 0.9783, - "step": 9672 - }, - { - "epoch": 0.8723452225278442, - "grad_norm": 1.6904146666495765, - "learning_rate": 1.684224697879204e-07, - "loss": 0.9087, - "step": 9673 - }, - { - "epoch": 0.8724354060513144, - "grad_norm": 1.5408284855723864, - "learning_rate": 1.6818789375801302e-07, - "loss": 0.9982, - "step": 9674 - }, - { - "epoch": 0.8725255895747847, - "grad_norm": 1.37875201089389, - "learning_rate": 1.679534740284152e-07, - "loss": 0.8976, - "step": 9675 - }, - { - "epoch": 0.872615773098255, - "grad_norm": 1.625315635544639, - "learning_rate": 1.6771921061912853e-07, - "loss": 0.8859, - "step": 9676 - }, - { - "epoch": 0.8727059566217252, - "grad_norm": 1.69591585313131, - "learning_rate": 1.6748510355014234e-07, - "loss": 0.8813, - "step": 9677 - }, - { - "epoch": 0.8727961401451955, - "grad_norm": 1.3096308911085366, - "learning_rate": 1.6725115284143132e-07, - "loss": 0.9436, - "step": 9678 - }, - { - "epoch": 0.8728863236686657, - "grad_norm": 1.7303203579416089, - "learning_rate": 1.670173585129575e-07, - "loss": 0.9171, - "step": 9679 - }, - { - "epoch": 0.872976507192136, - "grad_norm": 1.4798575475800908, - "learning_rate": 1.667837205846696e-07, - "loss": 0.9945, - "step": 9680 - }, - { - "epoch": 0.8730666907156063, - "grad_norm": 1.4860544441487065, - "learning_rate": 1.6655023907650278e-07, - "loss": 0.9323, - "step": 9681 - }, - { - "epoch": 0.8731568742390765, - "grad_norm": 1.7038487287814221, - "learning_rate": 1.6631691400837954e-07, - "loss": 0.9753, - "step": 9682 - }, - { - "epoch": 0.8732470577625467, - "grad_norm": 2.2705521249036096, - "learning_rate": 1.6608374540020752e-07, - "loss": 0.9874, - "step": 9683 - }, - { - "epoch": 0.8733372412860171, - "grad_norm": 0.6837851338190594, - "learning_rate": 1.658507332718828e-07, - "loss": 0.7565, - "step": 9684 - }, - { - "epoch": 0.8734274248094873, - "grad_norm": 1.5032285277943944, - "learning_rate": 1.656178776432864e-07, - "loss": 0.8662, - "step": 9685 - }, - { - "epoch": 0.8735176083329576, - "grad_norm": 1.3008693630858437, - "learning_rate": 1.6538517853428814e-07, - "loss": 0.9081, - "step": 9686 - }, - { - "epoch": 0.8736077918564278, - "grad_norm": 1.1745793989877258, - "learning_rate": 1.6515263596474194e-07, - "loss": 0.9154, - "step": 9687 - }, - { - "epoch": 0.8736979753798981, - "grad_norm": 1.6251327020306972, - "learning_rate": 1.6492024995449017e-07, - "loss": 0.9177, - "step": 9688 - }, - { - "epoch": 0.8737881589033684, - "grad_norm": 1.4496362295621208, - "learning_rate": 1.6468802052336116e-07, - "loss": 0.8963, - "step": 9689 - }, - { - "epoch": 0.8738783424268386, - "grad_norm": 1.427633508962677, - "learning_rate": 1.6445594769116998e-07, - "loss": 0.9588, - "step": 9690 - }, - { - "epoch": 0.8739685259503088, - "grad_norm": 1.438782824449255, - "learning_rate": 1.6422403147771836e-07, - "loss": 1.0085, - "step": 9691 - }, - { - "epoch": 0.8740587094737792, - "grad_norm": 1.3947643010496371, - "learning_rate": 1.6399227190279485e-07, - "loss": 0.9034, - "step": 9692 - }, - { - "epoch": 0.8741488929972494, - "grad_norm": 1.2700846243621235, - "learning_rate": 1.637606689861748e-07, - "loss": 0.862, - "step": 9693 - }, - { - "epoch": 0.8742390765207196, - "grad_norm": 1.6768107064313005, - "learning_rate": 1.6352922274761883e-07, - "loss": 0.8961, - "step": 9694 - }, - { - "epoch": 0.87432926004419, - "grad_norm": 1.3512625830079454, - "learning_rate": 1.6329793320687602e-07, - "loss": 0.8007, - "step": 9695 - }, - { - "epoch": 0.8744194435676602, - "grad_norm": 1.4312858610366477, - "learning_rate": 1.630668003836808e-07, - "loss": 0.9639, - "step": 9696 - }, - { - "epoch": 0.8745096270911304, - "grad_norm": 1.5004747751995473, - "learning_rate": 1.62835824297755e-07, - "loss": 0.9319, - "step": 9697 - }, - { - "epoch": 0.8745998106146007, - "grad_norm": 1.4289735772991539, - "learning_rate": 1.626050049688066e-07, - "loss": 0.9405, - "step": 9698 - }, - { - "epoch": 0.874689994138071, - "grad_norm": 1.5021343146714126, - "learning_rate": 1.623743424165309e-07, - "loss": 0.9134, - "step": 9699 - }, - { - "epoch": 0.8747801776615413, - "grad_norm": 1.775017996146083, - "learning_rate": 1.6214383666060826e-07, - "loss": 0.8801, - "step": 9700 - }, - { - "epoch": 0.8748703611850115, - "grad_norm": 1.6998088645072844, - "learning_rate": 1.619134877207078e-07, - "loss": 0.9562, - "step": 9701 - }, - { - "epoch": 0.8749605447084817, - "grad_norm": 1.5285711405592113, - "learning_rate": 1.616832956164831e-07, - "loss": 0.9383, - "step": 9702 - }, - { - "epoch": 0.8750507282319521, - "grad_norm": 1.2987545594914636, - "learning_rate": 1.6145326036757667e-07, - "loss": 0.9013, - "step": 9703 - }, - { - "epoch": 0.8751409117554223, - "grad_norm": 1.6332393982533104, - "learning_rate": 1.612233819936155e-07, - "loss": 0.8993, - "step": 9704 - }, - { - "epoch": 0.8752310952788925, - "grad_norm": 1.5709590814313452, - "learning_rate": 1.6099366051421414e-07, - "loss": 0.9353, - "step": 9705 - }, - { - "epoch": 0.8753212788023628, - "grad_norm": 1.3762595736441732, - "learning_rate": 1.6076409594897378e-07, - "loss": 1.0, - "step": 9706 - }, - { - "epoch": 0.8754114623258331, - "grad_norm": 0.6999697780897063, - "learning_rate": 1.605346883174823e-07, - "loss": 0.7609, - "step": 9707 - }, - { - "epoch": 0.8755016458493033, - "grad_norm": 1.5762006905910342, - "learning_rate": 1.6030543763931427e-07, - "loss": 0.9155, - "step": 9708 - }, - { - "epoch": 0.8755918293727736, - "grad_norm": 1.5956089174063044, - "learning_rate": 1.600763439340298e-07, - "loss": 0.9504, - "step": 9709 - }, - { - "epoch": 0.8756820128962438, - "grad_norm": 1.3252508591751857, - "learning_rate": 1.5984740722117707e-07, - "loss": 0.9909, - "step": 9710 - }, - { - "epoch": 0.8757721964197142, - "grad_norm": 1.1472211871374682, - "learning_rate": 1.5961862752028998e-07, - "loss": 0.9443, - "step": 9711 - }, - { - "epoch": 0.8758623799431844, - "grad_norm": 1.7506675809280166, - "learning_rate": 1.5939000485088937e-07, - "loss": 0.8429, - "step": 9712 - }, - { - "epoch": 0.8759525634666546, - "grad_norm": 1.3756896370274152, - "learning_rate": 1.5916153923248254e-07, - "loss": 1.0233, - "step": 9713 - }, - { - "epoch": 0.8760427469901249, - "grad_norm": 1.7120313936055729, - "learning_rate": 1.5893323068456342e-07, - "loss": 0.9816, - "step": 9714 - }, - { - "epoch": 0.8761329305135952, - "grad_norm": 1.3312051398370626, - "learning_rate": 1.5870507922661248e-07, - "loss": 0.9747, - "step": 9715 - }, - { - "epoch": 0.8762231140370654, - "grad_norm": 0.6218212832920323, - "learning_rate": 1.5847708487809763e-07, - "loss": 0.7856, - "step": 9716 - }, - { - "epoch": 0.8763132975605357, - "grad_norm": 1.5695643374504793, - "learning_rate": 1.5824924765847113e-07, - "loss": 1.0242, - "step": 9717 - }, - { - "epoch": 0.8764034810840059, - "grad_norm": 1.2002315655440599, - "learning_rate": 1.5802156758717478e-07, - "loss": 0.9097, - "step": 9718 - }, - { - "epoch": 0.8764936646074762, - "grad_norm": 1.3747229730530333, - "learning_rate": 1.5779404468363433e-07, - "loss": 0.9371, - "step": 9719 - }, - { - "epoch": 0.8765838481309465, - "grad_norm": 1.8467246160584292, - "learning_rate": 1.5756667896726405e-07, - "loss": 0.9912, - "step": 9720 - }, - { - "epoch": 0.8766740316544167, - "grad_norm": 1.6412383613260917, - "learning_rate": 1.5733947045746377e-07, - "loss": 0.953, - "step": 9721 - }, - { - "epoch": 0.876764215177887, - "grad_norm": 1.708839558596396, - "learning_rate": 1.5711241917362018e-07, - "loss": 1.0016, - "step": 9722 - }, - { - "epoch": 0.8768543987013573, - "grad_norm": 1.706033794971709, - "learning_rate": 1.5688552513510688e-07, - "loss": 0.8842, - "step": 9723 - }, - { - "epoch": 0.8769445822248275, - "grad_norm": 1.312961224851388, - "learning_rate": 1.5665878836128266e-07, - "loss": 0.9632, - "step": 9724 - }, - { - "epoch": 0.8770347657482978, - "grad_norm": 1.2132141369829537, - "learning_rate": 1.5643220887149554e-07, - "loss": 0.9743, - "step": 9725 - }, - { - "epoch": 0.8771249492717681, - "grad_norm": 0.7298122454576897, - "learning_rate": 1.562057866850772e-07, - "loss": 0.8335, - "step": 9726 - }, - { - "epoch": 0.8772151327952383, - "grad_norm": 1.4139380637367027, - "learning_rate": 1.5597952182134777e-07, - "loss": 0.8077, - "step": 9727 - }, - { - "epoch": 0.8773053163187086, - "grad_norm": 1.3804384531561957, - "learning_rate": 1.557534142996133e-07, - "loss": 0.9878, - "step": 9728 - }, - { - "epoch": 0.8773954998421788, - "grad_norm": 1.466939093835594, - "learning_rate": 1.5552746413916662e-07, - "loss": 0.9673, - "step": 9729 - }, - { - "epoch": 0.8774856833656491, - "grad_norm": 1.767196158768195, - "learning_rate": 1.5530167135928697e-07, - "loss": 0.9258, - "step": 9730 - }, - { - "epoch": 0.8775758668891194, - "grad_norm": 1.5574519437475847, - "learning_rate": 1.5507603597924068e-07, - "loss": 0.8827, - "step": 9731 - }, - { - "epoch": 0.8776660504125896, - "grad_norm": 1.6069167538191995, - "learning_rate": 1.548505580182793e-07, - "loss": 0.8877, - "step": 9732 - }, - { - "epoch": 0.8777562339360598, - "grad_norm": 1.4310918695641766, - "learning_rate": 1.5462523749564271e-07, - "loss": 0.9363, - "step": 9733 - }, - { - "epoch": 0.8778464174595302, - "grad_norm": 1.4658008754592802, - "learning_rate": 1.5440007443055602e-07, - "loss": 0.932, - "step": 9734 - }, - { - "epoch": 0.8779366009830004, - "grad_norm": 1.6977937181888394, - "learning_rate": 1.541750688422314e-07, - "loss": 0.8759, - "step": 9735 - }, - { - "epoch": 0.8780267845064706, - "grad_norm": 1.408846979128777, - "learning_rate": 1.5395022074986797e-07, - "loss": 0.9432, - "step": 9736 - }, - { - "epoch": 0.8781169680299409, - "grad_norm": 1.3637908595159103, - "learning_rate": 1.5372553017265033e-07, - "loss": 0.8867, - "step": 9737 - }, - { - "epoch": 0.8782071515534112, - "grad_norm": 2.5696980863824197, - "learning_rate": 1.5350099712975116e-07, - "loss": 0.9939, - "step": 9738 - }, - { - "epoch": 0.8782973350768815, - "grad_norm": 1.4946747666569657, - "learning_rate": 1.5327662164032785e-07, - "loss": 0.9263, - "step": 9739 - }, - { - "epoch": 0.8783875186003517, - "grad_norm": 1.5375796692838852, - "learning_rate": 1.5305240372352656e-07, - "loss": 0.7899, - "step": 9740 - }, - { - "epoch": 0.8784777021238219, - "grad_norm": 1.3815273196390498, - "learning_rate": 1.5282834339847738e-07, - "loss": 0.9466, - "step": 9741 - }, - { - "epoch": 0.8785678856472923, - "grad_norm": 1.3949879558275733, - "learning_rate": 1.526044406842999e-07, - "loss": 0.9157, - "step": 9742 - }, - { - "epoch": 0.8786580691707625, - "grad_norm": 1.3347288319219728, - "learning_rate": 1.523806956000977e-07, - "loss": 0.8648, - "step": 9743 - }, - { - "epoch": 0.8787482526942327, - "grad_norm": 1.3592538889285035, - "learning_rate": 1.5215710816496197e-07, - "loss": 0.9815, - "step": 9744 - }, - { - "epoch": 0.8788384362177031, - "grad_norm": 2.0153794249928647, - "learning_rate": 1.5193367839797077e-07, - "loss": 0.8855, - "step": 9745 - }, - { - "epoch": 0.8789286197411733, - "grad_norm": 1.384032966262493, - "learning_rate": 1.5171040631818842e-07, - "loss": 0.9367, - "step": 9746 - }, - { - "epoch": 0.8790188032646435, - "grad_norm": 1.5510505807525214, - "learning_rate": 1.5148729194466547e-07, - "loss": 0.8623, - "step": 9747 - }, - { - "epoch": 0.8791089867881138, - "grad_norm": 1.4155894933967643, - "learning_rate": 1.5126433529643956e-07, - "loss": 1.0043, - "step": 9748 - }, - { - "epoch": 0.8791991703115841, - "grad_norm": 1.473245760758957, - "learning_rate": 1.5104153639253436e-07, - "loss": 0.9376, - "step": 9749 - }, - { - "epoch": 0.8792893538350544, - "grad_norm": 0.6566449974772276, - "learning_rate": 1.5081889525196002e-07, - "loss": 0.8479, - "step": 9750 - }, - { - "epoch": 0.8793795373585246, - "grad_norm": 1.5046912584426224, - "learning_rate": 1.5059641189371398e-07, - "loss": 0.9936, - "step": 9751 - }, - { - "epoch": 0.8794697208819948, - "grad_norm": 1.4255327400079145, - "learning_rate": 1.503740863367795e-07, - "loss": 0.9515, - "step": 9752 - }, - { - "epoch": 0.8795599044054652, - "grad_norm": 1.6071685183459463, - "learning_rate": 1.50151918600127e-07, - "loss": 0.9629, - "step": 9753 - }, - { - "epoch": 0.8796500879289354, - "grad_norm": 1.451254348156943, - "learning_rate": 1.4992990870271217e-07, - "loss": 0.9842, - "step": 9754 - }, - { - "epoch": 0.8797402714524056, - "grad_norm": 1.611202868592778, - "learning_rate": 1.497080566634794e-07, - "loss": 0.9343, - "step": 9755 - }, - { - "epoch": 0.8798304549758759, - "grad_norm": 1.2918043165797148, - "learning_rate": 1.4948636250135693e-07, - "loss": 0.9493, - "step": 9756 - }, - { - "epoch": 0.8799206384993462, - "grad_norm": 1.6077735117642085, - "learning_rate": 1.4926482623526249e-07, - "loss": 1.0, - "step": 9757 - }, - { - "epoch": 0.8800108220228164, - "grad_norm": 1.5990186632270778, - "learning_rate": 1.4904344788409694e-07, - "loss": 0.9373, - "step": 9758 - }, - { - "epoch": 0.8801010055462867, - "grad_norm": 1.5810308613669521, - "learning_rate": 1.4882222746675143e-07, - "loss": 0.8926, - "step": 9759 - }, - { - "epoch": 0.8801911890697569, - "grad_norm": 1.7397522606366984, - "learning_rate": 1.4860116500210018e-07, - "loss": 0.9751, - "step": 9760 - }, - { - "epoch": 0.8802813725932273, - "grad_norm": 3.30151243519409, - "learning_rate": 1.4838026050900632e-07, - "loss": 0.9923, - "step": 9761 - }, - { - "epoch": 0.8803715561166975, - "grad_norm": 1.3637353091755475, - "learning_rate": 1.481595140063181e-07, - "loss": 0.9867, - "step": 9762 - }, - { - "epoch": 0.8804617396401677, - "grad_norm": 1.7382277576916851, - "learning_rate": 1.4793892551287136e-07, - "loss": 1.0238, - "step": 9763 - }, - { - "epoch": 0.880551923163638, - "grad_norm": 1.6692321423724725, - "learning_rate": 1.4771849504748768e-07, - "loss": 0.8651, - "step": 9764 - }, - { - "epoch": 0.8806421066871083, - "grad_norm": 1.3497052082755758, - "learning_rate": 1.4749822262897517e-07, - "loss": 1.0195, - "step": 9765 - }, - { - "epoch": 0.8807322902105785, - "grad_norm": 1.5556209962945824, - "learning_rate": 1.4727810827612895e-07, - "loss": 0.8759, - "step": 9766 - }, - { - "epoch": 0.8808224737340488, - "grad_norm": 2.6367359302102327, - "learning_rate": 1.470581520077303e-07, - "loss": 0.9404, - "step": 9767 - }, - { - "epoch": 0.8809126572575191, - "grad_norm": 1.8596415609135284, - "learning_rate": 1.4683835384254705e-07, - "loss": 0.9307, - "step": 9768 - }, - { - "epoch": 0.8810028407809893, - "grad_norm": 1.534002516027026, - "learning_rate": 1.4661871379933376e-07, - "loss": 1.0252, - "step": 9769 - }, - { - "epoch": 0.8810930243044596, - "grad_norm": 1.5319836474409938, - "learning_rate": 1.4639923189683169e-07, - "loss": 0.991, - "step": 9770 - }, - { - "epoch": 0.8811832078279298, - "grad_norm": 1.6087049552343666, - "learning_rate": 1.461799081537669e-07, - "loss": 0.9852, - "step": 9771 - }, - { - "epoch": 0.8812733913514001, - "grad_norm": 1.8060691135738332, - "learning_rate": 1.4596074258885514e-07, - "loss": 0.9904, - "step": 9772 - }, - { - "epoch": 0.8813635748748704, - "grad_norm": 1.460694563143255, - "learning_rate": 1.4574173522079502e-07, - "loss": 0.9078, - "step": 9773 - }, - { - "epoch": 0.8814537583983406, - "grad_norm": 1.3218126521275713, - "learning_rate": 1.4552288606827513e-07, - "loss": 0.9116, - "step": 9774 - }, - { - "epoch": 0.8815439419218108, - "grad_norm": 1.4266518639007502, - "learning_rate": 1.4530419514996761e-07, - "loss": 1.0129, - "step": 9775 - }, - { - "epoch": 0.8816341254452812, - "grad_norm": 0.7520832296003304, - "learning_rate": 1.4508566248453291e-07, - "loss": 0.8399, - "step": 9776 - }, - { - "epoch": 0.8817243089687514, - "grad_norm": 1.6218292406275066, - "learning_rate": 1.448672880906172e-07, - "loss": 0.9388, - "step": 9777 - }, - { - "epoch": 0.8818144924922217, - "grad_norm": 1.5313157826520762, - "learning_rate": 1.4464907198685382e-07, - "loss": 0.8938, - "step": 9778 - }, - { - "epoch": 0.8819046760156919, - "grad_norm": 1.22375906546394, - "learning_rate": 1.444310141918621e-07, - "loss": 0.9358, - "step": 9779 - }, - { - "epoch": 0.8819948595391622, - "grad_norm": 3.750871684170665, - "learning_rate": 1.4421311472424735e-07, - "loss": 0.9624, - "step": 9780 - }, - { - "epoch": 0.8820850430626325, - "grad_norm": 1.6112443012050945, - "learning_rate": 1.4399537360260273e-07, - "loss": 0.9696, - "step": 9781 - }, - { - "epoch": 0.8821752265861027, - "grad_norm": 1.5557240620050332, - "learning_rate": 1.4377779084550645e-07, - "loss": 0.963, - "step": 9782 - }, - { - "epoch": 0.8822654101095729, - "grad_norm": 1.2942013584374479, - "learning_rate": 1.4356036647152413e-07, - "loss": 0.9353, - "step": 9783 - }, - { - "epoch": 0.8823555936330433, - "grad_norm": 1.4029425402571278, - "learning_rate": 1.4334310049920785e-07, - "loss": 0.9218, - "step": 9784 - }, - { - "epoch": 0.8824457771565135, - "grad_norm": 0.63268530709432, - "learning_rate": 1.431259929470956e-07, - "loss": 0.7552, - "step": 9785 - }, - { - "epoch": 0.8825359606799837, - "grad_norm": 1.432157623492125, - "learning_rate": 1.4290904383371237e-07, - "loss": 1.0094, - "step": 9786 - }, - { - "epoch": 0.882626144203454, - "grad_norm": 1.6559971310457082, - "learning_rate": 1.4269225317756961e-07, - "loss": 0.919, - "step": 9787 - }, - { - "epoch": 0.8827163277269243, - "grad_norm": 1.5578564116354503, - "learning_rate": 1.424756209971645e-07, - "loss": 0.9009, - "step": 9788 - }, - { - "epoch": 0.8828065112503946, - "grad_norm": 1.65256615983852, - "learning_rate": 1.4225914731098199e-07, - "loss": 0.9328, - "step": 9789 - }, - { - "epoch": 0.8828966947738648, - "grad_norm": 1.3971543205620085, - "learning_rate": 1.4204283213749248e-07, - "loss": 0.8785, - "step": 9790 - }, - { - "epoch": 0.8829868782973351, - "grad_norm": 1.2905115941357108, - "learning_rate": 1.4182667549515315e-07, - "loss": 0.9926, - "step": 9791 - }, - { - "epoch": 0.8830770618208054, - "grad_norm": 1.5284841041424706, - "learning_rate": 1.4161067740240752e-07, - "loss": 0.9179, - "step": 9792 - }, - { - "epoch": 0.8831672453442756, - "grad_norm": 1.2848608262955616, - "learning_rate": 1.4139483787768614e-07, - "loss": 0.935, - "step": 9793 - }, - { - "epoch": 0.8832574288677458, - "grad_norm": 1.4643189978056677, - "learning_rate": 1.4117915693940584e-07, - "loss": 0.8468, - "step": 9794 - }, - { - "epoch": 0.8833476123912162, - "grad_norm": 1.3630659485365388, - "learning_rate": 1.409636346059684e-07, - "loss": 0.9297, - "step": 9795 - }, - { - "epoch": 0.8834377959146864, - "grad_norm": 1.7229899042224508, - "learning_rate": 1.4074827089576501e-07, - "loss": 0.9601, - "step": 9796 - }, - { - "epoch": 0.8835279794381566, - "grad_norm": 1.6920015551692948, - "learning_rate": 1.4053306582717085e-07, - "loss": 0.9579, - "step": 9797 - }, - { - "epoch": 0.8836181629616269, - "grad_norm": 1.5317834975743674, - "learning_rate": 1.4031801941854827e-07, - "loss": 0.9684, - "step": 9798 - }, - { - "epoch": 0.8837083464850972, - "grad_norm": 1.2936360921794265, - "learning_rate": 1.401031316882466e-07, - "loss": 0.9033, - "step": 9799 - }, - { - "epoch": 0.8837985300085675, - "grad_norm": 1.4590277607912892, - "learning_rate": 1.39888402654601e-07, - "loss": 0.9548, - "step": 9800 - }, - { - "epoch": 0.8838887135320377, - "grad_norm": 0.6294121218191951, - "learning_rate": 1.3967383233593344e-07, - "loss": 0.824, - "step": 9801 - }, - { - "epoch": 0.8839788970555079, - "grad_norm": 1.5518411622370087, - "learning_rate": 1.3945942075055218e-07, - "loss": 0.8989, - "step": 9802 - }, - { - "epoch": 0.8840690805789783, - "grad_norm": 1.6226734966744518, - "learning_rate": 1.3924516791675212e-07, - "loss": 0.9674, - "step": 9803 - }, - { - "epoch": 0.8841592641024485, - "grad_norm": 1.288966782223995, - "learning_rate": 1.3903107385281487e-07, - "loss": 0.8495, - "step": 9804 - }, - { - "epoch": 0.8842494476259187, - "grad_norm": 1.366088992628348, - "learning_rate": 1.3881713857700717e-07, - "loss": 0.9475, - "step": 9805 - }, - { - "epoch": 0.884339631149389, - "grad_norm": 1.7090378060843863, - "learning_rate": 1.3860336210758372e-07, - "loss": 0.9163, - "step": 9806 - }, - { - "epoch": 0.8844298146728593, - "grad_norm": 1.3628499570307522, - "learning_rate": 1.3838974446278506e-07, - "loss": 0.8916, - "step": 9807 - }, - { - "epoch": 0.8845199981963295, - "grad_norm": 1.346185207525627, - "learning_rate": 1.3817628566083817e-07, - "loss": 0.9132, - "step": 9808 - }, - { - "epoch": 0.8846101817197998, - "grad_norm": 1.2509492607621637, - "learning_rate": 1.3796298571995712e-07, - "loss": 0.9513, - "step": 9809 - }, - { - "epoch": 0.88470036524327, - "grad_norm": 1.5602885809542808, - "learning_rate": 1.377498446583405e-07, - "loss": 0.8416, - "step": 9810 - }, - { - "epoch": 0.8847905487667403, - "grad_norm": 1.378343807851037, - "learning_rate": 1.3753686249417596e-07, - "loss": 0.9127, - "step": 9811 - }, - { - "epoch": 0.8848807322902106, - "grad_norm": 2.1870050142751385, - "learning_rate": 1.373240392456354e-07, - "loss": 0.8891, - "step": 9812 - }, - { - "epoch": 0.8849709158136808, - "grad_norm": 1.4307280542429277, - "learning_rate": 1.37111374930879e-07, - "loss": 0.8783, - "step": 9813 - }, - { - "epoch": 0.885061099337151, - "grad_norm": 1.5018366854094767, - "learning_rate": 1.3689886956805176e-07, - "loss": 0.871, - "step": 9814 - }, - { - "epoch": 0.8851512828606214, - "grad_norm": 1.2586925105553917, - "learning_rate": 1.3668652317528585e-07, - "loss": 0.9266, - "step": 9815 - }, - { - "epoch": 0.8852414663840916, - "grad_norm": 1.5736179009732794, - "learning_rate": 1.3647433577070012e-07, - "loss": 0.9497, - "step": 9816 - }, - { - "epoch": 0.8853316499075619, - "grad_norm": 0.7191187285944353, - "learning_rate": 1.3626230737239942e-07, - "loss": 0.7942, - "step": 9817 - }, - { - "epoch": 0.8854218334310322, - "grad_norm": 0.757292460913856, - "learning_rate": 1.3605043799847527e-07, - "loss": 0.8079, - "step": 9818 - }, - { - "epoch": 0.8855120169545024, - "grad_norm": 1.2908282122295955, - "learning_rate": 1.3583872766700567e-07, - "loss": 0.8691, - "step": 9819 - }, - { - "epoch": 0.8856022004779727, - "grad_norm": 1.4127764676027519, - "learning_rate": 1.3562717639605437e-07, - "loss": 0.924, - "step": 9820 - }, - { - "epoch": 0.8856923840014429, - "grad_norm": 1.4420789683989683, - "learning_rate": 1.3541578420367229e-07, - "loss": 0.9055, - "step": 9821 - }, - { - "epoch": 0.8857825675249132, - "grad_norm": 1.387116195245131, - "learning_rate": 1.3520455110789697e-07, - "loss": 0.9549, - "step": 9822 - }, - { - "epoch": 0.8858727510483835, - "grad_norm": 1.3829319358152068, - "learning_rate": 1.3499347712675158e-07, - "loss": 0.9136, - "step": 9823 - }, - { - "epoch": 0.8859629345718537, - "grad_norm": 1.4511887150194256, - "learning_rate": 1.3478256227824635e-07, - "loss": 0.9054, - "step": 9824 - }, - { - "epoch": 0.8860531180953239, - "grad_norm": 1.4204018475554085, - "learning_rate": 1.3457180658037759e-07, - "loss": 0.849, - "step": 9825 - }, - { - "epoch": 0.8861433016187943, - "grad_norm": 1.2977503384740539, - "learning_rate": 1.3436121005112843e-07, - "loss": 0.9651, - "step": 9826 - }, - { - "epoch": 0.8862334851422645, - "grad_norm": 1.4415572586146213, - "learning_rate": 1.3415077270846719e-07, - "loss": 0.9086, - "step": 9827 - }, - { - "epoch": 0.8863236686657348, - "grad_norm": 2.04066877566435, - "learning_rate": 1.3394049457035105e-07, - "loss": 1.045, - "step": 9828 - }, - { - "epoch": 0.886413852189205, - "grad_norm": 1.3970868711638185, - "learning_rate": 1.3373037565472034e-07, - "loss": 0.9489, - "step": 9829 - }, - { - "epoch": 0.8865040357126753, - "grad_norm": 1.2752286332028102, - "learning_rate": 1.3352041597950537e-07, - "loss": 0.9157, - "step": 9830 - }, - { - "epoch": 0.8865942192361456, - "grad_norm": 1.6257370964334825, - "learning_rate": 1.333106155626196e-07, - "loss": 0.9256, - "step": 9831 - }, - { - "epoch": 0.8866844027596158, - "grad_norm": 1.4923257014801077, - "learning_rate": 1.331009744219651e-07, - "loss": 0.8797, - "step": 9832 - }, - { - "epoch": 0.886774586283086, - "grad_norm": 0.6966435411560551, - "learning_rate": 1.3289149257542943e-07, - "loss": 0.8343, - "step": 9833 - }, - { - "epoch": 0.8868647698065564, - "grad_norm": 1.421876676789018, - "learning_rate": 1.3268217004088666e-07, - "loss": 0.8372, - "step": 9834 - }, - { - "epoch": 0.8869549533300266, - "grad_norm": 1.5034329548974994, - "learning_rate": 1.3247300683619788e-07, - "loss": 0.8284, - "step": 9835 - }, - { - "epoch": 0.8870451368534968, - "grad_norm": 1.6660664987790499, - "learning_rate": 1.3226400297920903e-07, - "loss": 0.9151, - "step": 9836 - }, - { - "epoch": 0.8871353203769671, - "grad_norm": 1.407663567357073, - "learning_rate": 1.3205515848775428e-07, - "loss": 0.9129, - "step": 9837 - }, - { - "epoch": 0.8872255039004374, - "grad_norm": 1.5066091054003423, - "learning_rate": 1.3184647337965316e-07, - "loss": 0.8729, - "step": 9838 - }, - { - "epoch": 0.8873156874239077, - "grad_norm": 1.2381019823533421, - "learning_rate": 1.3163794767271163e-07, - "loss": 0.8867, - "step": 9839 - }, - { - "epoch": 0.8874058709473779, - "grad_norm": 1.2905814729236524, - "learning_rate": 1.314295813847226e-07, - "loss": 0.9052, - "step": 9840 - }, - { - "epoch": 0.8874960544708482, - "grad_norm": 1.5952638654179525, - "learning_rate": 1.3122137453346515e-07, - "loss": 1.0378, - "step": 9841 - }, - { - "epoch": 0.8875862379943185, - "grad_norm": 1.5240170672404671, - "learning_rate": 1.3101332713670376e-07, - "loss": 0.9885, - "step": 9842 - }, - { - "epoch": 0.8876764215177887, - "grad_norm": 0.6619575123780534, - "learning_rate": 1.3080543921219133e-07, - "loss": 0.8213, - "step": 9843 - }, - { - "epoch": 0.8877666050412589, - "grad_norm": 1.5866830510941738, - "learning_rate": 1.3059771077766478e-07, - "loss": 0.9349, - "step": 9844 - }, - { - "epoch": 0.8878567885647293, - "grad_norm": 1.597807562268523, - "learning_rate": 1.3039014185085018e-07, - "loss": 0.9744, - "step": 9845 - }, - { - "epoch": 0.8879469720881995, - "grad_norm": 1.7573020363414855, - "learning_rate": 1.301827324494571e-07, - "loss": 0.9449, - "step": 9846 - }, - { - "epoch": 0.8880371556116697, - "grad_norm": 1.9947047467795707, - "learning_rate": 1.2997548259118342e-07, - "loss": 0.9667, - "step": 9847 - }, - { - "epoch": 0.88812733913514, - "grad_norm": 1.491582298379186, - "learning_rate": 1.2976839229371272e-07, - "loss": 0.967, - "step": 9848 - }, - { - "epoch": 0.8882175226586103, - "grad_norm": 0.6353481495504946, - "learning_rate": 1.2956146157471515e-07, - "loss": 0.8138, - "step": 9849 - }, - { - "epoch": 0.8883077061820805, - "grad_norm": 1.3067073605406083, - "learning_rate": 1.2935469045184745e-07, - "loss": 0.9576, - "step": 9850 - }, - { - "epoch": 0.8883978897055508, - "grad_norm": 1.6961166654397901, - "learning_rate": 1.291480789427517e-07, - "loss": 0.9137, - "step": 9851 - }, - { - "epoch": 0.888488073229021, - "grad_norm": 1.6071539061030284, - "learning_rate": 1.2894162706505807e-07, - "loss": 0.9737, - "step": 9852 - }, - { - "epoch": 0.8885782567524914, - "grad_norm": 1.3730015971069605, - "learning_rate": 1.2873533483638155e-07, - "loss": 0.9337, - "step": 9853 - }, - { - "epoch": 0.8886684402759616, - "grad_norm": 1.4184196322791303, - "learning_rate": 1.285292022743243e-07, - "loss": 0.8817, - "step": 9854 - }, - { - "epoch": 0.8887586237994318, - "grad_norm": 1.677693376495389, - "learning_rate": 1.2832322939647467e-07, - "loss": 0.8808, - "step": 9855 - }, - { - "epoch": 0.8888488073229021, - "grad_norm": 1.5718515170639265, - "learning_rate": 1.281174162204075e-07, - "loss": 0.9199, - "step": 9856 - }, - { - "epoch": 0.8889389908463724, - "grad_norm": 1.293606741893306, - "learning_rate": 1.2791176276368366e-07, - "loss": 0.8172, - "step": 9857 - }, - { - "epoch": 0.8890291743698426, - "grad_norm": 1.2677123670218664, - "learning_rate": 1.2770626904385128e-07, - "loss": 0.9459, - "step": 9858 - }, - { - "epoch": 0.8891193578933129, - "grad_norm": 1.2952994001989273, - "learning_rate": 1.2750093507844306e-07, - "loss": 0.7961, - "step": 9859 - }, - { - "epoch": 0.8892095414167831, - "grad_norm": 1.3937681872630974, - "learning_rate": 1.272957608849805e-07, - "loss": 0.9763, - "step": 9860 - }, - { - "epoch": 0.8892997249402534, - "grad_norm": 1.5136776241070256, - "learning_rate": 1.270907464809694e-07, - "loss": 0.9153, - "step": 9861 - }, - { - "epoch": 0.8893899084637237, - "grad_norm": 1.37125171935983, - "learning_rate": 1.2688589188390285e-07, - "loss": 0.9386, - "step": 9862 - }, - { - "epoch": 0.8894800919871939, - "grad_norm": 1.6122138161238901, - "learning_rate": 1.2668119711126023e-07, - "loss": 0.9442, - "step": 9863 - }, - { - "epoch": 0.8895702755106643, - "grad_norm": 1.2991993309124976, - "learning_rate": 1.2647666218050735e-07, - "loss": 0.9362, - "step": 9864 - }, - { - "epoch": 0.8896604590341345, - "grad_norm": 1.5949599964781564, - "learning_rate": 1.2627228710909643e-07, - "loss": 0.8997, - "step": 9865 - }, - { - "epoch": 0.8897506425576047, - "grad_norm": 1.3072511988401068, - "learning_rate": 1.260680719144649e-07, - "loss": 0.909, - "step": 9866 - }, - { - "epoch": 0.889840826081075, - "grad_norm": 2.0138230660157412, - "learning_rate": 1.2586401661403877e-07, - "loss": 0.8613, - "step": 9867 - }, - { - "epoch": 0.8899310096045453, - "grad_norm": 1.5991846063246078, - "learning_rate": 1.2566012122522817e-07, - "loss": 0.9076, - "step": 9868 - }, - { - "epoch": 0.8900211931280155, - "grad_norm": 1.707980662283319, - "learning_rate": 1.254563857654316e-07, - "loss": 0.9159, - "step": 9869 - }, - { - "epoch": 0.8901113766514858, - "grad_norm": 1.5292628102996875, - "learning_rate": 1.2525281025203205e-07, - "loss": 0.8251, - "step": 9870 - }, - { - "epoch": 0.890201560174956, - "grad_norm": 1.6226841490270563, - "learning_rate": 1.2504939470240006e-07, - "loss": 0.9155, - "step": 9871 - }, - { - "epoch": 0.8902917436984263, - "grad_norm": 1.370744927095846, - "learning_rate": 1.2484613913389196e-07, - "loss": 1.0024, - "step": 9872 - }, - { - "epoch": 0.8903819272218966, - "grad_norm": 1.4621594872168868, - "learning_rate": 1.2464304356385057e-07, - "loss": 0.8518, - "step": 9873 - }, - { - "epoch": 0.8904721107453668, - "grad_norm": 1.7449528254387257, - "learning_rate": 1.2444010800960558e-07, - "loss": 1.0009, - "step": 9874 - }, - { - "epoch": 0.890562294268837, - "grad_norm": 1.2994462227579269, - "learning_rate": 1.2423733248847267e-07, - "loss": 0.9156, - "step": 9875 - }, - { - "epoch": 0.8906524777923074, - "grad_norm": 1.4702433035739702, - "learning_rate": 1.2403471701775293e-07, - "loss": 0.8798, - "step": 9876 - }, - { - "epoch": 0.8907426613157776, - "grad_norm": 1.4084841041819014, - "learning_rate": 1.2383226161473515e-07, - "loss": 0.9725, - "step": 9877 - }, - { - "epoch": 0.8908328448392479, - "grad_norm": 1.6933145308139013, - "learning_rate": 1.2362996629669376e-07, - "loss": 1.0144, - "step": 9878 - }, - { - "epoch": 0.8909230283627181, - "grad_norm": 1.3356593773879926, - "learning_rate": 1.2342783108089007e-07, - "loss": 0.9451, - "step": 9879 - }, - { - "epoch": 0.8910132118861884, - "grad_norm": 1.5936190981494205, - "learning_rate": 1.2322585598457135e-07, - "loss": 0.9903, - "step": 9880 - }, - { - "epoch": 0.8911033954096587, - "grad_norm": 1.136857486399115, - "learning_rate": 1.2302404102497034e-07, - "loss": 0.9171, - "step": 9881 - }, - { - "epoch": 0.8911935789331289, - "grad_norm": 1.6585483798174807, - "learning_rate": 1.228223862193083e-07, - "loss": 0.8702, - "step": 9882 - }, - { - "epoch": 0.8912837624565991, - "grad_norm": 2.2499801846797265, - "learning_rate": 1.2262089158479038e-07, - "loss": 0.9473, - "step": 9883 - }, - { - "epoch": 0.8913739459800695, - "grad_norm": 1.5823275065009739, - "learning_rate": 1.2241955713861042e-07, - "loss": 0.9752, - "step": 9884 - }, - { - "epoch": 0.8914641295035397, - "grad_norm": 1.4623324012906997, - "learning_rate": 1.222183828979464e-07, - "loss": 0.878, - "step": 9885 - }, - { - "epoch": 0.8915543130270099, - "grad_norm": 2.092265841418287, - "learning_rate": 1.2201736887996372e-07, - "loss": 0.9304, - "step": 9886 - }, - { - "epoch": 0.8916444965504803, - "grad_norm": 1.4408449149930327, - "learning_rate": 1.2181651510181444e-07, - "loss": 1.0092, - "step": 9887 - }, - { - "epoch": 0.8917346800739505, - "grad_norm": 1.3541747410851397, - "learning_rate": 1.2161582158063622e-07, - "loss": 0.9624, - "step": 9888 - }, - { - "epoch": 0.8918248635974207, - "grad_norm": 1.4280261616968368, - "learning_rate": 1.214152883335533e-07, - "loss": 0.8502, - "step": 9889 - }, - { - "epoch": 0.891915047120891, - "grad_norm": 1.5428221041369052, - "learning_rate": 1.2121491537767648e-07, - "loss": 0.8844, - "step": 9890 - }, - { - "epoch": 0.8920052306443613, - "grad_norm": 1.3023249084171509, - "learning_rate": 1.2101470273010294e-07, - "loss": 0.9723, - "step": 9891 - }, - { - "epoch": 0.8920954141678316, - "grad_norm": 1.722996269456718, - "learning_rate": 1.2081465040791528e-07, - "loss": 0.9151, - "step": 9892 - }, - { - "epoch": 0.8921855976913018, - "grad_norm": 1.546915535443874, - "learning_rate": 1.2061475842818335e-07, - "loss": 0.9006, - "step": 9893 - }, - { - "epoch": 0.892275781214772, - "grad_norm": 1.2926446329796215, - "learning_rate": 1.2041502680796313e-07, - "loss": 0.9075, - "step": 9894 - }, - { - "epoch": 0.8923659647382424, - "grad_norm": 1.3056542931863855, - "learning_rate": 1.2021545556429648e-07, - "loss": 0.9415, - "step": 9895 - }, - { - "epoch": 0.8924561482617126, - "grad_norm": 1.769982960290301, - "learning_rate": 1.2001604471421245e-07, - "loss": 0.9323, - "step": 9896 - }, - { - "epoch": 0.8925463317851828, - "grad_norm": 1.2943557721620433, - "learning_rate": 1.1981679427472567e-07, - "loss": 0.9875, - "step": 9897 - }, - { - "epoch": 0.8926365153086531, - "grad_norm": 1.4336790636335668, - "learning_rate": 1.196177042628368e-07, - "loss": 0.9847, - "step": 9898 - }, - { - "epoch": 0.8927266988321234, - "grad_norm": 1.6798232622505473, - "learning_rate": 1.194187746955344e-07, - "loss": 0.9426, - "step": 9899 - }, - { - "epoch": 0.8928168823555936, - "grad_norm": 1.3173784237527666, - "learning_rate": 1.1922000558979094e-07, - "loss": 0.9128, - "step": 9900 - }, - { - "epoch": 0.8929070658790639, - "grad_norm": 1.1896854668558041, - "learning_rate": 1.1902139696256752e-07, - "loss": 0.9683, - "step": 9901 - }, - { - "epoch": 0.8929972494025341, - "grad_norm": 1.562777609005629, - "learning_rate": 1.188229488308099e-07, - "loss": 0.872, - "step": 9902 - }, - { - "epoch": 0.8930874329260045, - "grad_norm": 0.558850156183657, - "learning_rate": 1.1862466121145098e-07, - "loss": 0.7702, - "step": 9903 - }, - { - "epoch": 0.8931776164494747, - "grad_norm": 1.8851787845610057, - "learning_rate": 1.184265341214099e-07, - "loss": 0.8803, - "step": 9904 - }, - { - "epoch": 0.8932677999729449, - "grad_norm": 1.429345095129395, - "learning_rate": 1.182285675775918e-07, - "loss": 0.9608, - "step": 9905 - }, - { - "epoch": 0.8933579834964152, - "grad_norm": 1.940257171810281, - "learning_rate": 1.1803076159688851e-07, - "loss": 0.917, - "step": 9906 - }, - { - "epoch": 0.8934481670198855, - "grad_norm": 1.376953125, - "learning_rate": 1.1783311619617741e-07, - "loss": 0.966, - "step": 9907 - }, - { - "epoch": 0.8935383505433557, - "grad_norm": 1.4323169312994284, - "learning_rate": 1.1763563139232302e-07, - "loss": 0.9032, - "step": 9908 - }, - { - "epoch": 0.893628534066826, - "grad_norm": 1.6676534671879963, - "learning_rate": 1.1743830720217562e-07, - "loss": 0.8761, - "step": 9909 - }, - { - "epoch": 0.8937187175902963, - "grad_norm": 1.6365392035646678, - "learning_rate": 1.1724114364257243e-07, - "loss": 0.9141, - "step": 9910 - }, - { - "epoch": 0.8938089011137665, - "grad_norm": 1.3835812603102993, - "learning_rate": 1.1704414073033619e-07, - "loss": 0.9394, - "step": 9911 - }, - { - "epoch": 0.8938990846372368, - "grad_norm": 1.5311146111872038, - "learning_rate": 1.1684729848227636e-07, - "loss": 0.9763, - "step": 9912 - }, - { - "epoch": 0.893989268160707, - "grad_norm": 1.3594738277597136, - "learning_rate": 1.1665061691518884e-07, - "loss": 0.9865, - "step": 9913 - }, - { - "epoch": 0.8940794516841774, - "grad_norm": 1.7584856566971396, - "learning_rate": 1.1645409604585532e-07, - "loss": 0.9337, - "step": 9914 - }, - { - "epoch": 0.8941696352076476, - "grad_norm": 1.3171549855265787, - "learning_rate": 1.162577358910437e-07, - "loss": 0.9485, - "step": 9915 - }, - { - "epoch": 0.8942598187311178, - "grad_norm": 1.4611848835921093, - "learning_rate": 1.160615364675095e-07, - "loss": 0.9155, - "step": 9916 - }, - { - "epoch": 0.894350002254588, - "grad_norm": 0.6278887508450274, - "learning_rate": 1.1586549779199262e-07, - "loss": 0.7891, - "step": 9917 - }, - { - "epoch": 0.8944401857780584, - "grad_norm": 1.3669121165179008, - "learning_rate": 1.1566961988122037e-07, - "loss": 0.9968, - "step": 9918 - }, - { - "epoch": 0.8945303693015286, - "grad_norm": 2.0178571066874467, - "learning_rate": 1.1547390275190627e-07, - "loss": 0.8997, - "step": 9919 - }, - { - "epoch": 0.8946205528249989, - "grad_norm": 1.6340143626409454, - "learning_rate": 1.1527834642075007e-07, - "loss": 0.8827, - "step": 9920 - }, - { - "epoch": 0.8947107363484691, - "grad_norm": 1.5580983533849206, - "learning_rate": 1.1508295090443797e-07, - "loss": 0.9161, - "step": 9921 - }, - { - "epoch": 0.8948009198719394, - "grad_norm": 1.9920288621159714, - "learning_rate": 1.148877162196411e-07, - "loss": 0.8803, - "step": 9922 - }, - { - "epoch": 0.8948911033954097, - "grad_norm": 1.850565903688559, - "learning_rate": 1.1469264238301924e-07, - "loss": 0.868, - "step": 9923 - }, - { - "epoch": 0.8949812869188799, - "grad_norm": 1.4546576999844827, - "learning_rate": 1.1449772941121638e-07, - "loss": 0.9562, - "step": 9924 - }, - { - "epoch": 0.8950714704423501, - "grad_norm": 1.617531781245498, - "learning_rate": 1.1430297732086369e-07, - "loss": 0.9044, - "step": 9925 - }, - { - "epoch": 0.8951616539658205, - "grad_norm": 1.473589047742065, - "learning_rate": 1.1410838612857876e-07, - "loss": 0.9318, - "step": 9926 - }, - { - "epoch": 0.8952518374892907, - "grad_norm": 1.7855352420966222, - "learning_rate": 1.1391395585096497e-07, - "loss": 1.0267, - "step": 9927 - }, - { - "epoch": 0.895342021012761, - "grad_norm": 1.5046124274202433, - "learning_rate": 1.1371968650461216e-07, - "loss": 0.8754, - "step": 9928 - }, - { - "epoch": 0.8954322045362312, - "grad_norm": 0.7227131589184277, - "learning_rate": 1.1352557810609687e-07, - "loss": 0.7983, - "step": 9929 - }, - { - "epoch": 0.8955223880597015, - "grad_norm": 1.4436058897285675, - "learning_rate": 1.1333163067198048e-07, - "loss": 0.9203, - "step": 9930 - }, - { - "epoch": 0.8956125715831718, - "grad_norm": 1.2746426081438627, - "learning_rate": 1.1313784421881311e-07, - "loss": 1.0003, - "step": 9931 - }, - { - "epoch": 0.895702755106642, - "grad_norm": 1.9631875795666336, - "learning_rate": 1.1294421876312865e-07, - "loss": 0.966, - "step": 9932 - }, - { - "epoch": 0.8957929386301122, - "grad_norm": 1.6338924505215193, - "learning_rate": 1.1275075432144831e-07, - "loss": 0.906, - "step": 9933 - }, - { - "epoch": 0.8958831221535826, - "grad_norm": 1.6198898808999536, - "learning_rate": 1.1255745091028002e-07, - "loss": 0.95, - "step": 9934 - }, - { - "epoch": 0.8959733056770528, - "grad_norm": 1.4288408451117955, - "learning_rate": 1.1236430854611723e-07, - "loss": 0.9502, - "step": 9935 - }, - { - "epoch": 0.896063489200523, - "grad_norm": 1.5011595376843083, - "learning_rate": 1.1217132724544032e-07, - "loss": 0.9077, - "step": 9936 - }, - { - "epoch": 0.8961536727239934, - "grad_norm": 1.6333493896914837, - "learning_rate": 1.1197850702471434e-07, - "loss": 0.9845, - "step": 9937 - }, - { - "epoch": 0.8962438562474636, - "grad_norm": 1.5065900363395666, - "learning_rate": 1.1178584790039348e-07, - "loss": 1.0064, - "step": 9938 - }, - { - "epoch": 0.8963340397709338, - "grad_norm": 1.380744681120682, - "learning_rate": 1.1159334988891478e-07, - "loss": 0.8837, - "step": 9939 - }, - { - "epoch": 0.8964242232944041, - "grad_norm": 2.079984707592722, - "learning_rate": 1.1140101300670446e-07, - "loss": 0.9128, - "step": 9940 - }, - { - "epoch": 0.8965144068178744, - "grad_norm": 1.3548738784792436, - "learning_rate": 1.1120883727017338e-07, - "loss": 0.8009, - "step": 9941 - }, - { - "epoch": 0.8966045903413447, - "grad_norm": 1.558305603773402, - "learning_rate": 1.1101682269571889e-07, - "loss": 0.9901, - "step": 9942 - }, - { - "epoch": 0.8966947738648149, - "grad_norm": 1.5445619494214486, - "learning_rate": 1.1082496929972473e-07, - "loss": 0.9166, - "step": 9943 - }, - { - "epoch": 0.8967849573882851, - "grad_norm": 1.4314693332842232, - "learning_rate": 1.1063327709856096e-07, - "loss": 0.9101, - "step": 9944 - }, - { - "epoch": 0.8968751409117555, - "grad_norm": 1.6138001870433563, - "learning_rate": 1.1044174610858403e-07, - "loss": 0.8179, - "step": 9945 - }, - { - "epoch": 0.8969653244352257, - "grad_norm": 1.6250596402301085, - "learning_rate": 1.1025037634613643e-07, - "loss": 0.8883, - "step": 9946 - }, - { - "epoch": 0.8970555079586959, - "grad_norm": 1.2735163190355487, - "learning_rate": 1.1005916782754643e-07, - "loss": 0.9614, - "step": 9947 - }, - { - "epoch": 0.8971456914821662, - "grad_norm": 1.6397091443636402, - "learning_rate": 1.0986812056912919e-07, - "loss": 0.9175, - "step": 9948 - }, - { - "epoch": 0.8972358750056365, - "grad_norm": 1.6425792861916229, - "learning_rate": 1.0967723458718613e-07, - "loss": 0.8133, - "step": 9949 - }, - { - "epoch": 0.8973260585291067, - "grad_norm": 1.8022089199634095, - "learning_rate": 1.0948650989800445e-07, - "loss": 0.9019, - "step": 9950 - }, - { - "epoch": 0.897416242052577, - "grad_norm": 1.3257370967852111, - "learning_rate": 1.0929594651785823e-07, - "loss": 0.9142, - "step": 9951 - }, - { - "epoch": 0.8975064255760472, - "grad_norm": 1.4350316790496642, - "learning_rate": 1.0910554446300646e-07, - "loss": 0.9851, - "step": 9952 - }, - { - "epoch": 0.8975966090995176, - "grad_norm": 1.3767778866771372, - "learning_rate": 1.089153037496966e-07, - "loss": 0.8327, - "step": 9953 - }, - { - "epoch": 0.8976867926229878, - "grad_norm": 2.0994107600463026, - "learning_rate": 1.0872522439415966e-07, - "loss": 0.954, - "step": 9954 - }, - { - "epoch": 0.897776976146458, - "grad_norm": 1.6571313204622125, - "learning_rate": 1.0853530641261554e-07, - "loss": 0.8819, - "step": 9955 - }, - { - "epoch": 0.8978671596699282, - "grad_norm": 1.3644160054047842, - "learning_rate": 1.083455498212682e-07, - "loss": 0.9455, - "step": 9956 - }, - { - "epoch": 0.8979573431933986, - "grad_norm": 1.569203226950325, - "learning_rate": 1.0815595463630911e-07, - "loss": 0.9727, - "step": 9957 - }, - { - "epoch": 0.8980475267168688, - "grad_norm": 1.4343455159932275, - "learning_rate": 1.0796652087391556e-07, - "loss": 0.8079, - "step": 9958 - }, - { - "epoch": 0.8981377102403391, - "grad_norm": 1.343774662235075, - "learning_rate": 1.0777724855025083e-07, - "loss": 1.0264, - "step": 9959 - }, - { - "epoch": 0.8982278937638094, - "grad_norm": 1.3857510050949424, - "learning_rate": 1.075881376814649e-07, - "loss": 0.932, - "step": 9960 - }, - { - "epoch": 0.8983180772872796, - "grad_norm": 1.374306677368659, - "learning_rate": 1.0739918828369377e-07, - "loss": 0.9884, - "step": 9961 - }, - { - "epoch": 0.8984082608107499, - "grad_norm": 1.6440853864581024, - "learning_rate": 1.0721040037305983e-07, - "loss": 0.9333, - "step": 9962 - }, - { - "epoch": 0.8984984443342201, - "grad_norm": 1.4174729838815905, - "learning_rate": 1.0702177396567114e-07, - "loss": 0.8861, - "step": 9963 - }, - { - "epoch": 0.8985886278576904, - "grad_norm": 0.6321487949306447, - "learning_rate": 1.0683330907762233e-07, - "loss": 0.7802, - "step": 9964 - }, - { - "epoch": 0.8986788113811607, - "grad_norm": 1.3670821231013905, - "learning_rate": 1.0664500572499435e-07, - "loss": 0.8713, - "step": 9965 - }, - { - "epoch": 0.8987689949046309, - "grad_norm": 1.558169505573564, - "learning_rate": 1.0645686392385455e-07, - "loss": 0.8806, - "step": 9966 - }, - { - "epoch": 0.8988591784281011, - "grad_norm": 1.273717276140661, - "learning_rate": 1.0626888369025588e-07, - "loss": 0.8701, - "step": 9967 - }, - { - "epoch": 0.8989493619515715, - "grad_norm": 1.3095851957085614, - "learning_rate": 1.0608106504023817e-07, - "loss": 0.967, - "step": 9968 - }, - { - "epoch": 0.8990395454750417, - "grad_norm": 2.4163691787022077, - "learning_rate": 1.0589340798982637e-07, - "loss": 0.9168, - "step": 9969 - }, - { - "epoch": 0.899129728998512, - "grad_norm": 1.345149265359351, - "learning_rate": 1.057059125550337e-07, - "loss": 0.8776, - "step": 9970 - }, - { - "epoch": 0.8992199125219822, - "grad_norm": 1.188877962607883, - "learning_rate": 1.0551857875185732e-07, - "loss": 0.9428, - "step": 9971 - }, - { - "epoch": 0.8993100960454525, - "grad_norm": 1.25511040312135, - "learning_rate": 1.0533140659628181e-07, - "loss": 0.8133, - "step": 9972 - }, - { - "epoch": 0.8994002795689228, - "grad_norm": 1.3288194075190871, - "learning_rate": 1.0514439610427772e-07, - "loss": 0.9867, - "step": 9973 - }, - { - "epoch": 0.899490463092393, - "grad_norm": 1.420699839133238, - "learning_rate": 1.0495754729180206e-07, - "loss": 0.9602, - "step": 9974 - }, - { - "epoch": 0.8995806466158632, - "grad_norm": 1.2926734517955374, - "learning_rate": 1.0477086017479741e-07, - "loss": 1.0316, - "step": 9975 - }, - { - "epoch": 0.8996708301393336, - "grad_norm": 1.4906810725959871, - "learning_rate": 1.0458433476919327e-07, - "loss": 0.8934, - "step": 9976 - }, - { - "epoch": 0.8997610136628038, - "grad_norm": 1.2176924787957135, - "learning_rate": 1.0439797109090509e-07, - "loss": 0.9616, - "step": 9977 - }, - { - "epoch": 0.899851197186274, - "grad_norm": 1.4823138734177665, - "learning_rate": 1.0421176915583396e-07, - "loss": 0.897, - "step": 9978 - }, - { - "epoch": 0.8999413807097443, - "grad_norm": 1.5288092175521724, - "learning_rate": 1.0402572897986828e-07, - "loss": 0.9585, - "step": 9979 - }, - { - "epoch": 0.9000315642332146, - "grad_norm": 0.6050112811132341, - "learning_rate": 1.0383985057888134e-07, - "loss": 0.7936, - "step": 9980 - }, - { - "epoch": 0.9001217477566849, - "grad_norm": 1.6996513429903446, - "learning_rate": 1.036541339687338e-07, - "loss": 0.8448, - "step": 9981 - }, - { - "epoch": 0.9002119312801551, - "grad_norm": 0.7028633902559522, - "learning_rate": 1.0346857916527186e-07, - "loss": 0.7746, - "step": 9982 - }, - { - "epoch": 0.9003021148036254, - "grad_norm": 1.3586939717158002, - "learning_rate": 1.0328318618432819e-07, - "loss": 0.9362, - "step": 9983 - }, - { - "epoch": 0.9003922983270957, - "grad_norm": 1.4561219023983414, - "learning_rate": 1.0309795504172148e-07, - "loss": 0.9958, - "step": 9984 - }, - { - "epoch": 0.9004824818505659, - "grad_norm": 1.5289284370695482, - "learning_rate": 1.0291288575325685e-07, - "loss": 0.9086, - "step": 9985 - }, - { - "epoch": 0.9005726653740361, - "grad_norm": 1.6236469798190996, - "learning_rate": 1.0272797833472502e-07, - "loss": 0.8954, - "step": 9986 - }, - { - "epoch": 0.9006628488975065, - "grad_norm": 0.696933934580711, - "learning_rate": 1.0254323280190335e-07, - "loss": 0.7792, - "step": 9987 - }, - { - "epoch": 0.9007530324209767, - "grad_norm": 1.6805395050263179, - "learning_rate": 1.023586491705557e-07, - "loss": 0.9099, - "step": 9988 - }, - { - "epoch": 0.9008432159444469, - "grad_norm": 1.4666306646581146, - "learning_rate": 1.0217422745643145e-07, - "loss": 0.8717, - "step": 9989 - }, - { - "epoch": 0.9009333994679172, - "grad_norm": 1.4231481197256703, - "learning_rate": 1.0198996767526691e-07, - "loss": 0.8554, - "step": 9990 - }, - { - "epoch": 0.9010235829913875, - "grad_norm": 1.4929436171016655, - "learning_rate": 1.018058698427835e-07, - "loss": 0.9172, - "step": 9991 - }, - { - "epoch": 0.9011137665148578, - "grad_norm": 1.2935004394436131, - "learning_rate": 1.0162193397469021e-07, - "loss": 0.9735, - "step": 9992 - }, - { - "epoch": 0.901203950038328, - "grad_norm": 1.4706411158646941, - "learning_rate": 1.0143816008668049e-07, - "loss": 0.8663, - "step": 9993 - }, - { - "epoch": 0.9012941335617982, - "grad_norm": 0.6433108479971426, - "learning_rate": 1.0125454819443624e-07, - "loss": 0.8098, - "step": 9994 - }, - { - "epoch": 0.9013843170852686, - "grad_norm": 1.2324224553658498, - "learning_rate": 1.0107109831362315e-07, - "loss": 0.8373, - "step": 9995 - }, - { - "epoch": 0.9014745006087388, - "grad_norm": 1.37052466599745, - "learning_rate": 1.0088781045989447e-07, - "loss": 0.9425, - "step": 9996 - }, - { - "epoch": 0.901564684132209, - "grad_norm": 1.5755820606619513, - "learning_rate": 1.0070468464888926e-07, - "loss": 0.9787, - "step": 9997 - }, - { - "epoch": 0.9016548676556793, - "grad_norm": 1.5861461483362231, - "learning_rate": 1.0052172089623324e-07, - "loss": 0.9485, - "step": 9998 - }, - { - "epoch": 0.9017450511791496, - "grad_norm": 1.517268754411694, - "learning_rate": 1.0033891921753746e-07, - "loss": 0.9706, - "step": 9999 - }, - { - "epoch": 0.9018352347026198, - "grad_norm": 1.5524673328249214, - "learning_rate": 1.0015627962839968e-07, - "loss": 0.849, - "step": 10000 - }, - { - "epoch": 0.9019254182260901, - "grad_norm": 1.3453353578617173, - "learning_rate": 9.99738021444041e-08, - "loss": 0.9086, - "step": 10001 - }, - { - "epoch": 0.9020156017495603, - "grad_norm": 1.211106762285231, - "learning_rate": 9.979148678112003e-08, - "loss": 0.8642, - "step": 10002 - }, - { - "epoch": 0.9021057852730306, - "grad_norm": 1.765519468141279, - "learning_rate": 9.960933355410417e-08, - "loss": 1.0312, - "step": 10003 - }, - { - "epoch": 0.9021959687965009, - "grad_norm": 1.4281204056613737, - "learning_rate": 9.942734247889828e-08, - "loss": 0.8498, - "step": 10004 - }, - { - "epoch": 0.9022861523199711, - "grad_norm": 1.323118940293568, - "learning_rate": 9.92455135710315e-08, - "loss": 0.9419, - "step": 10005 - }, - { - "epoch": 0.9023763358434415, - "grad_norm": 1.5016412497996903, - "learning_rate": 9.906384684601787e-08, - "loss": 0.8756, - "step": 10006 - }, - { - "epoch": 0.9024665193669117, - "grad_norm": 1.3093794556316596, - "learning_rate": 9.8882342319359e-08, - "loss": 0.9768, - "step": 10007 - }, - { - "epoch": 0.9025567028903819, - "grad_norm": 1.2659565055812294, - "learning_rate": 9.870100000654048e-08, - "loss": 0.8808, - "step": 10008 - }, - { - "epoch": 0.9026468864138522, - "grad_norm": 1.2340849523561812, - "learning_rate": 9.851981992303704e-08, - "loss": 0.9049, - "step": 10009 - }, - { - "epoch": 0.9027370699373225, - "grad_norm": 1.267827272543009, - "learning_rate": 9.833880208430678e-08, - "loss": 0.9333, - "step": 10010 - }, - { - "epoch": 0.9028272534607927, - "grad_norm": 1.5716391505923624, - "learning_rate": 9.815794650579601e-08, - "loss": 0.9203, - "step": 10011 - }, - { - "epoch": 0.902917436984263, - "grad_norm": 1.3997888695371492, - "learning_rate": 9.797725320293548e-08, - "loss": 0.9117, - "step": 10012 - }, - { - "epoch": 0.9030076205077332, - "grad_norm": 1.6342885769519828, - "learning_rate": 9.779672219114354e-08, - "loss": 1.0023, - "step": 10013 - }, - { - "epoch": 0.9030978040312035, - "grad_norm": 1.4700952416012583, - "learning_rate": 9.761635348582386e-08, - "loss": 0.9151, - "step": 10014 - }, - { - "epoch": 0.9031879875546738, - "grad_norm": 1.6923609763540073, - "learning_rate": 9.743614710236658e-08, - "loss": 0.9638, - "step": 10015 - }, - { - "epoch": 0.903278171078144, - "grad_norm": 1.5083986240116662, - "learning_rate": 9.725610305614806e-08, - "loss": 0.8913, - "step": 10016 - }, - { - "epoch": 0.9033683546016142, - "grad_norm": 1.3135303358677843, - "learning_rate": 9.707622136253002e-08, - "loss": 0.9293, - "step": 10017 - }, - { - "epoch": 0.9034585381250846, - "grad_norm": 1.3362737756653327, - "learning_rate": 9.689650203686173e-08, - "loss": 0.9565, - "step": 10018 - }, - { - "epoch": 0.9035487216485548, - "grad_norm": 1.5538431299584012, - "learning_rate": 9.671694509447715e-08, - "loss": 0.9113, - "step": 10019 - }, - { - "epoch": 0.903638905172025, - "grad_norm": 1.4808641373767313, - "learning_rate": 9.653755055069757e-08, - "loss": 0.9376, - "step": 10020 - }, - { - "epoch": 0.9037290886954953, - "grad_norm": 1.1814880272560953, - "learning_rate": 9.635831842082987e-08, - "loss": 0.9036, - "step": 10021 - }, - { - "epoch": 0.9038192722189656, - "grad_norm": 1.5311247326740645, - "learning_rate": 9.617924872016691e-08, - "loss": 0.945, - "step": 10022 - }, - { - "epoch": 0.9039094557424359, - "grad_norm": 1.4579848372635114, - "learning_rate": 9.600034146398806e-08, - "loss": 0.972, - "step": 10023 - }, - { - "epoch": 0.9039996392659061, - "grad_norm": 0.8047171652057107, - "learning_rate": 9.582159666755863e-08, - "loss": 0.7982, - "step": 10024 - }, - { - "epoch": 0.9040898227893763, - "grad_norm": 1.2894191826458665, - "learning_rate": 9.564301434612976e-08, - "loss": 0.9022, - "step": 10025 - }, - { - "epoch": 0.9041800063128467, - "grad_norm": 1.4619558936118193, - "learning_rate": 9.546459451494015e-08, - "loss": 0.9717, - "step": 10026 - }, - { - "epoch": 0.9042701898363169, - "grad_norm": 0.681876483324712, - "learning_rate": 9.528633718921231e-08, - "loss": 0.8301, - "step": 10027 - }, - { - "epoch": 0.9043603733597871, - "grad_norm": 1.2114601238015552, - "learning_rate": 9.510824238415672e-08, - "loss": 0.9821, - "step": 10028 - }, - { - "epoch": 0.9044505568832575, - "grad_norm": 1.317640410872827, - "learning_rate": 9.493031011496944e-08, - "loss": 0.9636, - "step": 10029 - }, - { - "epoch": 0.9045407404067277, - "grad_norm": 1.4762909654687761, - "learning_rate": 9.475254039683234e-08, - "loss": 0.7915, - "step": 10030 - }, - { - "epoch": 0.904630923930198, - "grad_norm": 1.5107164633495915, - "learning_rate": 9.45749332449144e-08, - "loss": 0.9001, - "step": 10031 - }, - { - "epoch": 0.9047211074536682, - "grad_norm": 0.640885416459992, - "learning_rate": 9.439748867436903e-08, - "loss": 0.83, - "step": 10032 - }, - { - "epoch": 0.9048112909771385, - "grad_norm": 1.401081675510102, - "learning_rate": 9.42202067003377e-08, - "loss": 0.8937, - "step": 10033 - }, - { - "epoch": 0.9049014745006088, - "grad_norm": 1.617116954204627, - "learning_rate": 9.404308733794652e-08, - "loss": 1.0671, - "step": 10034 - }, - { - "epoch": 0.904991658024079, - "grad_norm": 1.2849851917645585, - "learning_rate": 9.38661306023083e-08, - "loss": 0.95, - "step": 10035 - }, - { - "epoch": 0.9050818415475492, - "grad_norm": 1.4933707454087877, - "learning_rate": 9.368933650852229e-08, - "loss": 0.9255, - "step": 10036 - }, - { - "epoch": 0.9051720250710196, - "grad_norm": 0.745243806692326, - "learning_rate": 9.351270507167352e-08, - "loss": 0.8401, - "step": 10037 - }, - { - "epoch": 0.9052622085944898, - "grad_norm": 1.5067258091236755, - "learning_rate": 9.333623630683285e-08, - "loss": 0.8738, - "step": 10038 - }, - { - "epoch": 0.90535239211796, - "grad_norm": 2.305223512549284, - "learning_rate": 9.315993022905799e-08, - "loss": 0.7902, - "step": 10039 - }, - { - "epoch": 0.9054425756414303, - "grad_norm": 1.402353398281899, - "learning_rate": 9.298378685339158e-08, - "loss": 0.9123, - "step": 10040 - }, - { - "epoch": 0.9055327591649006, - "grad_norm": 1.6908396417165774, - "learning_rate": 9.280780619486406e-08, - "loss": 0.9985, - "step": 10041 - }, - { - "epoch": 0.9056229426883708, - "grad_norm": 1.3533098689207728, - "learning_rate": 9.26319882684905e-08, - "loss": 1.0143, - "step": 10042 - }, - { - "epoch": 0.9057131262118411, - "grad_norm": 1.3003544837648178, - "learning_rate": 9.245633308927293e-08, - "loss": 0.9067, - "step": 10043 - }, - { - "epoch": 0.9058033097353113, - "grad_norm": 1.6348112731175297, - "learning_rate": 9.228084067219888e-08, - "loss": 0.9221, - "step": 10044 - }, - { - "epoch": 0.9058934932587817, - "grad_norm": 1.4661695660239435, - "learning_rate": 9.210551103224284e-08, - "loss": 0.8799, - "step": 10045 - }, - { - "epoch": 0.9059836767822519, - "grad_norm": 1.283994062228749, - "learning_rate": 9.193034418436463e-08, - "loss": 0.9814, - "step": 10046 - }, - { - "epoch": 0.9060738603057221, - "grad_norm": 0.6536276968052102, - "learning_rate": 9.175534014351005e-08, - "loss": 0.8269, - "step": 10047 - }, - { - "epoch": 0.9061640438291924, - "grad_norm": 2.158196165323903, - "learning_rate": 9.158049892461228e-08, - "loss": 0.8794, - "step": 10048 - }, - { - "epoch": 0.9062542273526627, - "grad_norm": 1.228732237042304, - "learning_rate": 9.140582054258871e-08, - "loss": 0.8723, - "step": 10049 - }, - { - "epoch": 0.9063444108761329, - "grad_norm": 1.3846694202658967, - "learning_rate": 9.123130501234499e-08, - "loss": 0.9421, - "step": 10050 - }, - { - "epoch": 0.9064345943996032, - "grad_norm": 1.594293838163852, - "learning_rate": 9.105695234877098e-08, - "loss": 0.9857, - "step": 10051 - }, - { - "epoch": 0.9065247779230734, - "grad_norm": 1.2476544307917623, - "learning_rate": 9.088276256674344e-08, - "loss": 0.9528, - "step": 10052 - }, - { - "epoch": 0.9066149614465437, - "grad_norm": 1.320243607930141, - "learning_rate": 9.070873568112536e-08, - "loss": 0.9109, - "step": 10053 - }, - { - "epoch": 0.906705144970014, - "grad_norm": 1.5332731000154398, - "learning_rate": 9.053487170676577e-08, - "loss": 0.9866, - "step": 10054 - }, - { - "epoch": 0.9067953284934842, - "grad_norm": 1.4298703436326734, - "learning_rate": 9.036117065849968e-08, - "loss": 0.9924, - "step": 10055 - }, - { - "epoch": 0.9068855120169546, - "grad_norm": 1.6753610648822506, - "learning_rate": 9.018763255114837e-08, - "loss": 0.9095, - "step": 10056 - }, - { - "epoch": 0.9069756955404248, - "grad_norm": 1.502412048786953, - "learning_rate": 9.00142573995184e-08, - "loss": 0.8991, - "step": 10057 - }, - { - "epoch": 0.907065879063895, - "grad_norm": 1.3372000366821422, - "learning_rate": 8.984104521840375e-08, - "loss": 1.0136, - "step": 10058 - }, - { - "epoch": 0.9071560625873653, - "grad_norm": 1.4760347260729478, - "learning_rate": 8.966799602258346e-08, - "loss": 0.958, - "step": 10059 - }, - { - "epoch": 0.9072462461108356, - "grad_norm": 1.3541901023988687, - "learning_rate": 8.949510982682329e-08, - "loss": 0.9725, - "step": 10060 - }, - { - "epoch": 0.9073364296343058, - "grad_norm": 1.6339557058804552, - "learning_rate": 8.932238664587499e-08, - "loss": 1.001, - "step": 10061 - }, - { - "epoch": 0.9074266131577761, - "grad_norm": 1.4692177534967175, - "learning_rate": 8.914982649447567e-08, - "loss": 1.0208, - "step": 10062 - }, - { - "epoch": 0.9075167966812463, - "grad_norm": 1.3896496384723376, - "learning_rate": 8.897742938734975e-08, - "loss": 0.999, - "step": 10063 - }, - { - "epoch": 0.9076069802047166, - "grad_norm": 1.5485949731659718, - "learning_rate": 8.880519533920661e-08, - "loss": 0.9582, - "step": 10064 - }, - { - "epoch": 0.9076971637281869, - "grad_norm": 1.213715449429745, - "learning_rate": 8.863312436474268e-08, - "loss": 0.9289, - "step": 10065 - }, - { - "epoch": 0.9077873472516571, - "grad_norm": 1.7950793374680032, - "learning_rate": 8.846121647863936e-08, - "loss": 0.9359, - "step": 10066 - }, - { - "epoch": 0.9078775307751273, - "grad_norm": 1.4226507229726435, - "learning_rate": 8.828947169556555e-08, - "loss": 0.9329, - "step": 10067 - }, - { - "epoch": 0.9079677142985977, - "grad_norm": 1.2153275192011461, - "learning_rate": 8.81178900301749e-08, - "loss": 0.8962, - "step": 10068 - }, - { - "epoch": 0.9080578978220679, - "grad_norm": 1.4357048103491599, - "learning_rate": 8.794647149710787e-08, - "loss": 0.8591, - "step": 10069 - }, - { - "epoch": 0.9081480813455381, - "grad_norm": 1.6040714615482885, - "learning_rate": 8.777521611099081e-08, - "loss": 0.9231, - "step": 10070 - }, - { - "epoch": 0.9082382648690084, - "grad_norm": 2.0238171077767824, - "learning_rate": 8.760412388643624e-08, - "loss": 0.8739, - "step": 10071 - }, - { - "epoch": 0.9083284483924787, - "grad_norm": 1.2776648908082433, - "learning_rate": 8.74331948380429e-08, - "loss": 0.9383, - "step": 10072 - }, - { - "epoch": 0.908418631915949, - "grad_norm": 1.450011496662734, - "learning_rate": 8.726242898039516e-08, - "loss": 0.9322, - "step": 10073 - }, - { - "epoch": 0.9085088154394192, - "grad_norm": 1.4507611644630163, - "learning_rate": 8.709182632806334e-08, - "loss": 0.9343, - "step": 10074 - }, - { - "epoch": 0.9085989989628894, - "grad_norm": 1.4884281436386624, - "learning_rate": 8.692138689560469e-08, - "loss": 0.9407, - "step": 10075 - }, - { - "epoch": 0.9086891824863598, - "grad_norm": 1.3646447650777567, - "learning_rate": 8.675111069756203e-08, - "loss": 0.9226, - "step": 10076 - }, - { - "epoch": 0.90877936600983, - "grad_norm": 1.3149501093403968, - "learning_rate": 8.658099774846395e-08, - "loss": 0.9003, - "step": 10077 - }, - { - "epoch": 0.9088695495333002, - "grad_norm": 1.2214007284023274, - "learning_rate": 8.641104806282595e-08, - "loss": 0.8759, - "step": 10078 - }, - { - "epoch": 0.9089597330567706, - "grad_norm": 1.5601899427539914, - "learning_rate": 8.624126165514845e-08, - "loss": 0.9437, - "step": 10079 - }, - { - "epoch": 0.9090499165802408, - "grad_norm": 1.5864251801033795, - "learning_rate": 8.607163853991917e-08, - "loss": 0.8932, - "step": 10080 - }, - { - "epoch": 0.909140100103711, - "grad_norm": 1.5393066212641255, - "learning_rate": 8.590217873161054e-08, - "loss": 0.7789, - "step": 10081 - }, - { - "epoch": 0.9092302836271813, - "grad_norm": 1.3684945098856027, - "learning_rate": 8.573288224468255e-08, - "loss": 0.95, - "step": 10082 - }, - { - "epoch": 0.9093204671506516, - "grad_norm": 3.146578332124486, - "learning_rate": 8.556374909358011e-08, - "loss": 0.8544, - "step": 10083 - }, - { - "epoch": 0.9094106506741219, - "grad_norm": 1.6320959097551608, - "learning_rate": 8.539477929273476e-08, - "loss": 0.9089, - "step": 10084 - }, - { - "epoch": 0.9095008341975921, - "grad_norm": 1.2758948832057924, - "learning_rate": 8.522597285656386e-08, - "loss": 0.9662, - "step": 10085 - }, - { - "epoch": 0.9095910177210623, - "grad_norm": 0.8059469570212986, - "learning_rate": 8.505732979947078e-08, - "loss": 0.8115, - "step": 10086 - }, - { - "epoch": 0.9096812012445327, - "grad_norm": 1.1279088138118591, - "learning_rate": 8.488885013584557e-08, - "loss": 0.9492, - "step": 10087 - }, - { - "epoch": 0.9097713847680029, - "grad_norm": 1.421586311293533, - "learning_rate": 8.472053388006295e-08, - "loss": 0.8758, - "step": 10088 - }, - { - "epoch": 0.9098615682914731, - "grad_norm": 4.855934225098846, - "learning_rate": 8.455238104648565e-08, - "loss": 1.0197, - "step": 10089 - }, - { - "epoch": 0.9099517518149434, - "grad_norm": 1.4752200043393537, - "learning_rate": 8.438439164946043e-08, - "loss": 0.804, - "step": 10090 - }, - { - "epoch": 0.9100419353384137, - "grad_norm": 1.4619044403595332, - "learning_rate": 8.42165657033218e-08, - "loss": 0.9493, - "step": 10091 - }, - { - "epoch": 0.9101321188618839, - "grad_norm": 1.4478757655818417, - "learning_rate": 8.4048903222389e-08, - "loss": 0.971, - "step": 10092 - }, - { - "epoch": 0.9102223023853542, - "grad_norm": 1.3133964655962644, - "learning_rate": 8.388140422096856e-08, - "loss": 0.9802, - "step": 10093 - }, - { - "epoch": 0.9103124859088244, - "grad_norm": 1.5260299144027616, - "learning_rate": 8.371406871335173e-08, - "loss": 0.9387, - "step": 10094 - }, - { - "epoch": 0.9104026694322948, - "grad_norm": 1.5318458332100597, - "learning_rate": 8.354689671381732e-08, - "loss": 0.9269, - "step": 10095 - }, - { - "epoch": 0.910492852955765, - "grad_norm": 1.2956857858946158, - "learning_rate": 8.337988823662834e-08, - "loss": 0.9233, - "step": 10096 - }, - { - "epoch": 0.9105830364792352, - "grad_norm": 1.400701512260217, - "learning_rate": 8.321304329603607e-08, - "loss": 0.8866, - "step": 10097 - }, - { - "epoch": 0.9106732200027055, - "grad_norm": 0.8001055588221961, - "learning_rate": 8.304636190627557e-08, - "loss": 0.8167, - "step": 10098 - }, - { - "epoch": 0.9107634035261758, - "grad_norm": 1.76697279539037, - "learning_rate": 8.287984408156945e-08, - "loss": 0.9477, - "step": 10099 - }, - { - "epoch": 0.910853587049646, - "grad_norm": 1.5462079391988177, - "learning_rate": 8.271348983612591e-08, - "loss": 0.839, - "step": 10100 - }, - { - "epoch": 0.9109437705731163, - "grad_norm": 1.3215386830851767, - "learning_rate": 8.254729918413938e-08, - "loss": 0.8918, - "step": 10101 - }, - { - "epoch": 0.9110339540965866, - "grad_norm": 1.3205975106407226, - "learning_rate": 8.238127213979006e-08, - "loss": 0.9026, - "step": 10102 - }, - { - "epoch": 0.9111241376200568, - "grad_norm": 1.607531037453695, - "learning_rate": 8.221540871724398e-08, - "loss": 1.0011, - "step": 10103 - }, - { - "epoch": 0.9112143211435271, - "grad_norm": 1.5772674043269255, - "learning_rate": 8.2049708930654e-08, - "loss": 0.822, - "step": 10104 - }, - { - "epoch": 0.9113045046669973, - "grad_norm": 1.7268364460272794, - "learning_rate": 8.188417279415793e-08, - "loss": 0.903, - "step": 10105 - }, - { - "epoch": 0.9113946881904677, - "grad_norm": 1.5682844284221522, - "learning_rate": 8.171880032188117e-08, - "loss": 1.0318, - "step": 10106 - }, - { - "epoch": 0.9114848717139379, - "grad_norm": 1.5359970003635204, - "learning_rate": 8.155359152793351e-08, - "loss": 0.9124, - "step": 10107 - }, - { - "epoch": 0.9115750552374081, - "grad_norm": 1.5030278799043535, - "learning_rate": 8.138854642641147e-08, - "loss": 0.9207, - "step": 10108 - }, - { - "epoch": 0.9116652387608783, - "grad_norm": 1.393528211592069, - "learning_rate": 8.122366503139777e-08, - "loss": 0.86, - "step": 10109 - }, - { - "epoch": 0.9117554222843487, - "grad_norm": 3.301795650108541, - "learning_rate": 8.105894735696117e-08, - "loss": 0.8773, - "step": 10110 - }, - { - "epoch": 0.9118456058078189, - "grad_norm": 1.5134030601778041, - "learning_rate": 8.089439341715576e-08, - "loss": 0.909, - "step": 10111 - }, - { - "epoch": 0.9119357893312892, - "grad_norm": 2.0426477942346066, - "learning_rate": 8.073000322602319e-08, - "loss": 0.9595, - "step": 10112 - }, - { - "epoch": 0.9120259728547594, - "grad_norm": 1.3668126927649127, - "learning_rate": 8.056577679758891e-08, - "loss": 0.9492, - "step": 10113 - }, - { - "epoch": 0.9121161563782297, - "grad_norm": 1.5303824945185465, - "learning_rate": 8.040171414586638e-08, - "loss": 0.8841, - "step": 10114 - }, - { - "epoch": 0.9122063399017, - "grad_norm": 1.4374980926501018, - "learning_rate": 8.023781528485419e-08, - "loss": 0.8966, - "step": 10115 - }, - { - "epoch": 0.9122965234251702, - "grad_norm": 1.5538751214516584, - "learning_rate": 8.00740802285369e-08, - "loss": 0.9479, - "step": 10116 - }, - { - "epoch": 0.9123867069486404, - "grad_norm": 1.3991782501510666, - "learning_rate": 7.99105089908858e-08, - "loss": 0.8013, - "step": 10117 - }, - { - "epoch": 0.9124768904721108, - "grad_norm": 1.1696143152047118, - "learning_rate": 7.974710158585685e-08, - "loss": 0.906, - "step": 10118 - }, - { - "epoch": 0.912567073995581, - "grad_norm": 1.4279860080386126, - "learning_rate": 7.958385802739375e-08, - "loss": 0.9635, - "step": 10119 - }, - { - "epoch": 0.9126572575190512, - "grad_norm": 1.6955519827881036, - "learning_rate": 7.942077832942452e-08, - "loss": 0.9883, - "step": 10120 - }, - { - "epoch": 0.9127474410425215, - "grad_norm": 0.7137323714048428, - "learning_rate": 7.925786250586508e-08, - "loss": 0.8194, - "step": 10121 - }, - { - "epoch": 0.9128376245659918, - "grad_norm": 2.5249378952093666, - "learning_rate": 7.909511057061524e-08, - "loss": 0.8081, - "step": 10122 - }, - { - "epoch": 0.9129278080894621, - "grad_norm": 1.38125634860828, - "learning_rate": 7.893252253756234e-08, - "loss": 0.9168, - "step": 10123 - }, - { - "epoch": 0.9130179916129323, - "grad_norm": 1.665501505154491, - "learning_rate": 7.877009842057925e-08, - "loss": 0.8596, - "step": 10124 - }, - { - "epoch": 0.9131081751364026, - "grad_norm": 1.4531010451444533, - "learning_rate": 7.860783823352512e-08, - "loss": 0.9598, - "step": 10125 - }, - { - "epoch": 0.9131983586598729, - "grad_norm": 1.8145092974981962, - "learning_rate": 7.844574199024445e-08, - "loss": 0.9367, - "step": 10126 - }, - { - "epoch": 0.9132885421833431, - "grad_norm": 1.4600027429215259, - "learning_rate": 7.82838097045686e-08, - "loss": 0.9727, - "step": 10127 - }, - { - "epoch": 0.9133787257068133, - "grad_norm": 1.7240582424689987, - "learning_rate": 7.812204139031454e-08, - "loss": 0.926, - "step": 10128 - }, - { - "epoch": 0.9134689092302837, - "grad_norm": 1.796278215902136, - "learning_rate": 7.796043706128474e-08, - "loss": 0.9522, - "step": 10129 - }, - { - "epoch": 0.9135590927537539, - "grad_norm": 1.4650025141827878, - "learning_rate": 7.779899673126844e-08, - "loss": 0.9621, - "step": 10130 - }, - { - "epoch": 0.9136492762772241, - "grad_norm": 1.3317157002438396, - "learning_rate": 7.76377204140406e-08, - "loss": 0.9786, - "step": 10131 - }, - { - "epoch": 0.9137394598006944, - "grad_norm": 1.3661615936770244, - "learning_rate": 7.74766081233622e-08, - "loss": 0.9247, - "step": 10132 - }, - { - "epoch": 0.9138296433241647, - "grad_norm": 1.3169001436662013, - "learning_rate": 7.73156598729805e-08, - "loss": 0.9941, - "step": 10133 - }, - { - "epoch": 0.913919826847635, - "grad_norm": 1.5879147235688642, - "learning_rate": 7.715487567662849e-08, - "loss": 0.9787, - "step": 10134 - }, - { - "epoch": 0.9140100103711052, - "grad_norm": 1.7458181142333327, - "learning_rate": 7.69942555480243e-08, - "loss": 0.9019, - "step": 10135 - }, - { - "epoch": 0.9141001938945754, - "grad_norm": 1.6337392266990172, - "learning_rate": 7.68337995008741e-08, - "loss": 1.0326, - "step": 10136 - }, - { - "epoch": 0.9141903774180458, - "grad_norm": 1.2777727905581537, - "learning_rate": 7.667350754886803e-08, - "loss": 0.7869, - "step": 10137 - }, - { - "epoch": 0.914280560941516, - "grad_norm": 2.148628476064561, - "learning_rate": 7.651337970568361e-08, - "loss": 0.9216, - "step": 10138 - }, - { - "epoch": 0.9143707444649862, - "grad_norm": 1.327136625640156, - "learning_rate": 7.635341598498368e-08, - "loss": 0.8982, - "step": 10139 - }, - { - "epoch": 0.9144609279884565, - "grad_norm": 1.3785903612060817, - "learning_rate": 7.61936164004171e-08, - "loss": 0.9249, - "step": 10140 - }, - { - "epoch": 0.9145511115119268, - "grad_norm": 1.3460920018357998, - "learning_rate": 7.603398096561875e-08, - "loss": 0.9719, - "step": 10141 - }, - { - "epoch": 0.914641295035397, - "grad_norm": 1.7704237819620523, - "learning_rate": 7.587450969420994e-08, - "loss": 0.9434, - "step": 10142 - }, - { - "epoch": 0.9147314785588673, - "grad_norm": 0.7113399414555801, - "learning_rate": 7.571520259979757e-08, - "loss": 0.8133, - "step": 10143 - }, - { - "epoch": 0.9148216620823375, - "grad_norm": 1.474243846329966, - "learning_rate": 7.555605969597455e-08, - "loss": 0.7715, - "step": 10144 - }, - { - "epoch": 0.9149118456058078, - "grad_norm": 1.3825889880907953, - "learning_rate": 7.539708099631959e-08, - "loss": 1.0196, - "step": 10145 - }, - { - "epoch": 0.9150020291292781, - "grad_norm": 1.349090697325419, - "learning_rate": 7.52382665143978e-08, - "loss": 0.9384, - "step": 10146 - }, - { - "epoch": 0.9150922126527483, - "grad_norm": 1.6841485686968005, - "learning_rate": 7.507961626376014e-08, - "loss": 0.9895, - "step": 10147 - }, - { - "epoch": 0.9151823961762187, - "grad_norm": 1.5161873472246612, - "learning_rate": 7.492113025794378e-08, - "loss": 0.9704, - "step": 10148 - }, - { - "epoch": 0.9152725796996889, - "grad_norm": 1.3659066003757403, - "learning_rate": 7.476280851047101e-08, - "loss": 0.8838, - "step": 10149 - }, - { - "epoch": 0.9153627632231591, - "grad_norm": 1.1723673485586463, - "learning_rate": 7.460465103485125e-08, - "loss": 0.9581, - "step": 10150 - }, - { - "epoch": 0.9154529467466294, - "grad_norm": 1.7539331650739678, - "learning_rate": 7.444665784457948e-08, - "loss": 0.9213, - "step": 10151 - }, - { - "epoch": 0.9155431302700997, - "grad_norm": 1.5056437178382933, - "learning_rate": 7.42888289531356e-08, - "loss": 0.8931, - "step": 10152 - }, - { - "epoch": 0.9156333137935699, - "grad_norm": 1.4026270078245744, - "learning_rate": 7.41311643739877e-08, - "loss": 0.856, - "step": 10153 - }, - { - "epoch": 0.9157234973170402, - "grad_norm": 1.3188176015729267, - "learning_rate": 7.39736641205877e-08, - "loss": 0.9714, - "step": 10154 - }, - { - "epoch": 0.9158136808405104, - "grad_norm": 1.3811811317777953, - "learning_rate": 7.381632820637462e-08, - "loss": 0.9594, - "step": 10155 - }, - { - "epoch": 0.9159038643639807, - "grad_norm": 1.3698582747864079, - "learning_rate": 7.365915664477352e-08, - "loss": 0.9224, - "step": 10156 - }, - { - "epoch": 0.915994047887451, - "grad_norm": 2.9638305058479677, - "learning_rate": 7.350214944919474e-08, - "loss": 0.9428, - "step": 10157 - }, - { - "epoch": 0.9160842314109212, - "grad_norm": 1.4205628932898797, - "learning_rate": 7.334530663303539e-08, - "loss": 0.9561, - "step": 10158 - }, - { - "epoch": 0.9161744149343914, - "grad_norm": 1.2125872295503088, - "learning_rate": 7.318862820967742e-08, - "loss": 0.9343, - "step": 10159 - }, - { - "epoch": 0.9162645984578618, - "grad_norm": 1.960557695667894, - "learning_rate": 7.303211419249056e-08, - "loss": 0.961, - "step": 10160 - }, - { - "epoch": 0.916354781981332, - "grad_norm": 1.3741362199404208, - "learning_rate": 7.287576459482858e-08, - "loss": 0.9477, - "step": 10161 - }, - { - "epoch": 0.9164449655048023, - "grad_norm": 1.4466865400335125, - "learning_rate": 7.271957943003259e-08, - "loss": 0.9749, - "step": 10162 - }, - { - "epoch": 0.9165351490282725, - "grad_norm": 1.6925998907320658, - "learning_rate": 7.256355871142883e-08, - "loss": 0.9356, - "step": 10163 - }, - { - "epoch": 0.9166253325517428, - "grad_norm": 1.524400612028265, - "learning_rate": 7.240770245233019e-08, - "loss": 0.9852, - "step": 10164 - }, - { - "epoch": 0.9167155160752131, - "grad_norm": 1.4172010629293024, - "learning_rate": 7.225201066603492e-08, - "loss": 0.851, - "step": 10165 - }, - { - "epoch": 0.9168056995986833, - "grad_norm": 1.3206075304846454, - "learning_rate": 7.209648336582774e-08, - "loss": 1.0437, - "step": 10166 - }, - { - "epoch": 0.9168958831221535, - "grad_norm": 1.4267676716698199, - "learning_rate": 7.19411205649787e-08, - "loss": 0.9183, - "step": 10167 - }, - { - "epoch": 0.9169860666456239, - "grad_norm": 1.3567141217797853, - "learning_rate": 7.178592227674474e-08, - "loss": 0.9548, - "step": 10168 - }, - { - "epoch": 0.9170762501690941, - "grad_norm": 1.6173392699154436, - "learning_rate": 7.163088851436771e-08, - "loss": 0.853, - "step": 10169 - }, - { - "epoch": 0.9171664336925643, - "grad_norm": 1.3098890583730216, - "learning_rate": 7.147601929107639e-08, - "loss": 0.8923, - "step": 10170 - }, - { - "epoch": 0.9172566172160346, - "grad_norm": 1.3222378568677715, - "learning_rate": 7.132131462008461e-08, - "loss": 0.9743, - "step": 10171 - }, - { - "epoch": 0.9173468007395049, - "grad_norm": 1.4839039908555904, - "learning_rate": 7.116677451459297e-08, - "loss": 0.9338, - "step": 10172 - }, - { - "epoch": 0.9174369842629752, - "grad_norm": 1.20774064929827, - "learning_rate": 7.101239898778799e-08, - "loss": 0.957, - "step": 10173 - }, - { - "epoch": 0.9175271677864454, - "grad_norm": 1.2910954945568758, - "learning_rate": 7.085818805284094e-08, - "loss": 0.9662, - "step": 10174 - }, - { - "epoch": 0.9176173513099157, - "grad_norm": 1.5648947103261415, - "learning_rate": 7.070414172291083e-08, - "loss": 1.013, - "step": 10175 - }, - { - "epoch": 0.917707534833386, - "grad_norm": 1.2214989593289065, - "learning_rate": 7.055026001114095e-08, - "loss": 0.8687, - "step": 10176 - }, - { - "epoch": 0.9177977183568562, - "grad_norm": 0.6758993188988642, - "learning_rate": 7.039654293066211e-08, - "loss": 0.7925, - "step": 10177 - }, - { - "epoch": 0.9178879018803264, - "grad_norm": 1.6386737664340245, - "learning_rate": 7.024299049459003e-08, - "loss": 0.8586, - "step": 10178 - }, - { - "epoch": 0.9179780854037968, - "grad_norm": 1.7088039804344395, - "learning_rate": 7.008960271602627e-08, - "loss": 0.8526, - "step": 10179 - }, - { - "epoch": 0.918068268927267, - "grad_norm": 1.6405538634363883, - "learning_rate": 6.993637960805921e-08, - "loss": 0.9025, - "step": 10180 - }, - { - "epoch": 0.9181584524507372, - "grad_norm": 1.634359329761196, - "learning_rate": 6.97833211837624e-08, - "loss": 0.9585, - "step": 10181 - }, - { - "epoch": 0.9182486359742075, - "grad_norm": 2.1012113483420967, - "learning_rate": 6.963042745619562e-08, - "loss": 0.8785, - "step": 10182 - }, - { - "epoch": 0.9183388194976778, - "grad_norm": 1.3133201761280546, - "learning_rate": 6.947769843840511e-08, - "loss": 0.8988, - "step": 10183 - }, - { - "epoch": 0.918429003021148, - "grad_norm": 1.4955053541928747, - "learning_rate": 6.9325134143422e-08, - "loss": 0.9647, - "step": 10184 - }, - { - "epoch": 0.9185191865446183, - "grad_norm": 1.651810435990136, - "learning_rate": 6.917273458426387e-08, - "loss": 0.9149, - "step": 10185 - }, - { - "epoch": 0.9186093700680885, - "grad_norm": 0.7331369393523672, - "learning_rate": 6.902049977393476e-08, - "loss": 0.7802, - "step": 10186 - }, - { - "epoch": 0.9186995535915589, - "grad_norm": 1.3594535717235345, - "learning_rate": 6.886842972542362e-08, - "loss": 0.9334, - "step": 10187 - }, - { - "epoch": 0.9187897371150291, - "grad_norm": 1.460281143031408, - "learning_rate": 6.871652445170672e-08, - "loss": 0.8214, - "step": 10188 - }, - { - "epoch": 0.9188799206384993, - "grad_norm": 1.5572290397307604, - "learning_rate": 6.856478396574416e-08, - "loss": 1.0135, - "step": 10189 - }, - { - "epoch": 0.9189701041619696, - "grad_norm": 1.5535911643367284, - "learning_rate": 6.841320828048491e-08, - "loss": 0.9878, - "step": 10190 - }, - { - "epoch": 0.9190602876854399, - "grad_norm": 1.404597180001308, - "learning_rate": 6.826179740886062e-08, - "loss": 0.9277, - "step": 10191 - }, - { - "epoch": 0.9191504712089101, - "grad_norm": 1.3659582224722506, - "learning_rate": 6.811055136379184e-08, - "loss": 0.9925, - "step": 10192 - }, - { - "epoch": 0.9192406547323804, - "grad_norm": 1.7030881825238506, - "learning_rate": 6.79594701581827e-08, - "loss": 0.9031, - "step": 10193 - }, - { - "epoch": 0.9193308382558506, - "grad_norm": 1.739581018625269, - "learning_rate": 6.780855380492511e-08, - "loss": 0.9403, - "step": 10194 - }, - { - "epoch": 0.919421021779321, - "grad_norm": 1.2846993326891656, - "learning_rate": 6.765780231689544e-08, - "loss": 0.9472, - "step": 10195 - }, - { - "epoch": 0.9195112053027912, - "grad_norm": 1.5066062569252698, - "learning_rate": 6.750721570695695e-08, - "loss": 0.9926, - "step": 10196 - }, - { - "epoch": 0.9196013888262614, - "grad_norm": 2.3681548002396187, - "learning_rate": 6.735679398795868e-08, - "loss": 0.9013, - "step": 10197 - }, - { - "epoch": 0.9196915723497318, - "grad_norm": 2.0104458531713645, - "learning_rate": 6.720653717273506e-08, - "loss": 0.8736, - "step": 10198 - }, - { - "epoch": 0.919781755873202, - "grad_norm": 1.3539159493778543, - "learning_rate": 6.705644527410714e-08, - "loss": 0.9329, - "step": 10199 - }, - { - "epoch": 0.9198719393966722, - "grad_norm": 1.6729993515515025, - "learning_rate": 6.690651830488136e-08, - "loss": 0.9031, - "step": 10200 - }, - { - "epoch": 0.9199621229201425, - "grad_norm": 1.6658314360467368, - "learning_rate": 6.675675627785037e-08, - "loss": 0.9204, - "step": 10201 - }, - { - "epoch": 0.9200523064436128, - "grad_norm": 1.7843816566677158, - "learning_rate": 6.660715920579263e-08, - "loss": 0.9518, - "step": 10202 - }, - { - "epoch": 0.920142489967083, - "grad_norm": 0.641158509819926, - "learning_rate": 6.645772710147279e-08, - "loss": 0.7988, - "step": 10203 - }, - { - "epoch": 0.9202326734905533, - "grad_norm": 1.3904116177526769, - "learning_rate": 6.630845997764112e-08, - "loss": 0.9005, - "step": 10204 - }, - { - "epoch": 0.9203228570140235, - "grad_norm": 1.3720700952930212, - "learning_rate": 6.615935784703409e-08, - "loss": 0.8759, - "step": 10205 - }, - { - "epoch": 0.9204130405374938, - "grad_norm": 1.5947747021783383, - "learning_rate": 6.601042072237328e-08, - "loss": 0.8553, - "step": 10206 - }, - { - "epoch": 0.9205032240609641, - "grad_norm": 1.293537395165304, - "learning_rate": 6.586164861636767e-08, - "loss": 0.9663, - "step": 10207 - }, - { - "epoch": 0.9205934075844343, - "grad_norm": 1.7130153353973923, - "learning_rate": 6.571304154171065e-08, - "loss": 0.849, - "step": 10208 - }, - { - "epoch": 0.9206835911079045, - "grad_norm": 1.452793452678959, - "learning_rate": 6.556459951108273e-08, - "loss": 0.9543, - "step": 10209 - }, - { - "epoch": 0.9207737746313749, - "grad_norm": 1.6045887709632545, - "learning_rate": 6.541632253714957e-08, - "loss": 0.9164, - "step": 10210 - }, - { - "epoch": 0.9208639581548451, - "grad_norm": 1.3915021465996358, - "learning_rate": 6.526821063256261e-08, - "loss": 0.8618, - "step": 10211 - }, - { - "epoch": 0.9209541416783154, - "grad_norm": 1.549509675396097, - "learning_rate": 6.512026380996016e-08, - "loss": 0.8373, - "step": 10212 - }, - { - "epoch": 0.9210443252017856, - "grad_norm": 2.0688361896181817, - "learning_rate": 6.49724820819657e-08, - "loss": 0.9047, - "step": 10213 - }, - { - "epoch": 0.9211345087252559, - "grad_norm": 1.3704396396581464, - "learning_rate": 6.48248654611887e-08, - "loss": 0.9641, - "step": 10214 - }, - { - "epoch": 0.9212246922487262, - "grad_norm": 1.505716477873611, - "learning_rate": 6.467741396022419e-08, - "loss": 0.9819, - "step": 10215 - }, - { - "epoch": 0.9213148757721964, - "grad_norm": 1.5368127015507866, - "learning_rate": 6.453012759165455e-08, - "loss": 0.938, - "step": 10216 - }, - { - "epoch": 0.9214050592956666, - "grad_norm": 1.3579045377558163, - "learning_rate": 6.438300636804639e-08, - "loss": 0.8971, - "step": 10217 - }, - { - "epoch": 0.921495242819137, - "grad_norm": 1.376474500218323, - "learning_rate": 6.423605030195278e-08, - "loss": 0.9695, - "step": 10218 - }, - { - "epoch": 0.9215854263426072, - "grad_norm": 1.715984477753227, - "learning_rate": 6.408925940591304e-08, - "loss": 1.0062, - "step": 10219 - }, - { - "epoch": 0.9216756098660774, - "grad_norm": 1.6784418499566534, - "learning_rate": 6.394263369245222e-08, - "loss": 0.8708, - "step": 10220 - }, - { - "epoch": 0.9217657933895478, - "grad_norm": 1.5553522270391487, - "learning_rate": 6.379617317408126e-08, - "loss": 0.9397, - "step": 10221 - }, - { - "epoch": 0.921855976913018, - "grad_norm": 1.9316857991273562, - "learning_rate": 6.364987786329723e-08, - "loss": 0.9254, - "step": 10222 - }, - { - "epoch": 0.9219461604364882, - "grad_norm": 1.407909981827541, - "learning_rate": 6.350374777258193e-08, - "loss": 0.9213, - "step": 10223 - }, - { - "epoch": 0.9220363439599585, - "grad_norm": 1.5999377685603873, - "learning_rate": 6.335778291440519e-08, - "loss": 0.9156, - "step": 10224 - }, - { - "epoch": 0.9221265274834288, - "grad_norm": 1.3557201669017886, - "learning_rate": 6.321198330122057e-08, - "loss": 0.8419, - "step": 10225 - }, - { - "epoch": 0.9222167110068991, - "grad_norm": 1.7636196935629147, - "learning_rate": 6.306634894546902e-08, - "loss": 0.8769, - "step": 10226 - }, - { - "epoch": 0.9223068945303693, - "grad_norm": 1.2156267004319876, - "learning_rate": 6.292087985957661e-08, - "loss": 0.883, - "step": 10227 - }, - { - "epoch": 0.9223970780538395, - "grad_norm": 1.2972468107444919, - "learning_rate": 6.277557605595585e-08, - "loss": 0.9728, - "step": 10228 - }, - { - "epoch": 0.9224872615773099, - "grad_norm": 1.251706246301801, - "learning_rate": 6.263043754700481e-08, - "loss": 0.8601, - "step": 10229 - }, - { - "epoch": 0.9225774451007801, - "grad_norm": 1.6257032193289234, - "learning_rate": 6.248546434510671e-08, - "loss": 0.9311, - "step": 10230 - }, - { - "epoch": 0.9226676286242503, - "grad_norm": 1.4106040095777599, - "learning_rate": 6.234065646263298e-08, - "loss": 0.8631, - "step": 10231 - }, - { - "epoch": 0.9227578121477206, - "grad_norm": 3.3284129703209873, - "learning_rate": 6.219601391193796e-08, - "loss": 0.8418, - "step": 10232 - }, - { - "epoch": 0.9228479956711909, - "grad_norm": 2.539088040736926, - "learning_rate": 6.205153670536423e-08, - "loss": 0.864, - "step": 10233 - }, - { - "epoch": 0.9229381791946611, - "grad_norm": 1.2894589825866651, - "learning_rate": 6.190722485523902e-08, - "loss": 0.9596, - "step": 10234 - }, - { - "epoch": 0.9230283627181314, - "grad_norm": 1.4386179350188026, - "learning_rate": 6.176307837387607e-08, - "loss": 0.906, - "step": 10235 - }, - { - "epoch": 0.9231185462416016, - "grad_norm": 1.4629451400316793, - "learning_rate": 6.16190972735744e-08, - "loss": 0.888, - "step": 10236 - }, - { - "epoch": 0.923208729765072, - "grad_norm": 1.4300265717872014, - "learning_rate": 6.147528156661974e-08, - "loss": 0.9552, - "step": 10237 - }, - { - "epoch": 0.9232989132885422, - "grad_norm": 1.3974004981661792, - "learning_rate": 6.133163126528273e-08, - "loss": 0.851, - "step": 10238 - }, - { - "epoch": 0.9233890968120124, - "grad_norm": 1.3806934391878443, - "learning_rate": 6.11881463818209e-08, - "loss": 0.9404, - "step": 10239 - }, - { - "epoch": 0.9234792803354827, - "grad_norm": 1.3467266470153887, - "learning_rate": 6.104482692847668e-08, - "loss": 0.9199, - "step": 10240 - }, - { - "epoch": 0.923569463858953, - "grad_norm": 1.3809091863861043, - "learning_rate": 6.090167291747917e-08, - "loss": 0.9037, - "step": 10241 - }, - { - "epoch": 0.9236596473824232, - "grad_norm": 1.3608867088326433, - "learning_rate": 6.075868436104303e-08, - "loss": 0.8276, - "step": 10242 - }, - { - "epoch": 0.9237498309058935, - "grad_norm": 5.388958925052077, - "learning_rate": 6.061586127136875e-08, - "loss": 0.9184, - "step": 10243 - }, - { - "epoch": 0.9238400144293638, - "grad_norm": 1.3920705333000596, - "learning_rate": 6.047320366064324e-08, - "loss": 0.9025, - "step": 10244 - }, - { - "epoch": 0.923930197952834, - "grad_norm": 1.3689545465507065, - "learning_rate": 6.033071154103786e-08, - "loss": 0.9395, - "step": 10245 - }, - { - "epoch": 0.9240203814763043, - "grad_norm": 1.461301054572509, - "learning_rate": 6.018838492471178e-08, - "loss": 0.9528, - "step": 10246 - }, - { - "epoch": 0.9241105649997745, - "grad_norm": 0.6331264282315894, - "learning_rate": 6.00462238238082e-08, - "loss": 0.8091, - "step": 10247 - }, - { - "epoch": 0.9242007485232449, - "grad_norm": 1.3062170964527646, - "learning_rate": 5.990422825045827e-08, - "loss": 0.8972, - "step": 10248 - }, - { - "epoch": 0.9242909320467151, - "grad_norm": 1.3433721144089625, - "learning_rate": 5.976239821677675e-08, - "loss": 0.9077, - "step": 10249 - }, - { - "epoch": 0.9243811155701853, - "grad_norm": 1.342455617572392, - "learning_rate": 5.962073373486598e-08, - "loss": 0.8758, - "step": 10250 - }, - { - "epoch": 0.9244712990936556, - "grad_norm": 1.661914854400299, - "learning_rate": 5.947923481681316e-08, - "loss": 0.8949, - "step": 10251 - }, - { - "epoch": 0.9245614826171259, - "grad_norm": 1.4156683976213942, - "learning_rate": 5.933790147469198e-08, - "loss": 0.912, - "step": 10252 - }, - { - "epoch": 0.9246516661405961, - "grad_norm": 1.6122359244403122, - "learning_rate": 5.9196733720561665e-08, - "loss": 0.8849, - "step": 10253 - }, - { - "epoch": 0.9247418496640664, - "grad_norm": 2.404659786151419, - "learning_rate": 5.905573156646793e-08, - "loss": 1.0094, - "step": 10254 - }, - { - "epoch": 0.9248320331875366, - "grad_norm": 1.7002204836187775, - "learning_rate": 5.8914895024441134e-08, - "loss": 1.0143, - "step": 10255 - }, - { - "epoch": 0.9249222167110069, - "grad_norm": 1.6428879163359562, - "learning_rate": 5.877422410649857e-08, - "loss": 0.9739, - "step": 10256 - }, - { - "epoch": 0.9250124002344772, - "grad_norm": 1.6287486780004632, - "learning_rate": 5.863371882464285e-08, - "loss": 0.9401, - "step": 10257 - }, - { - "epoch": 0.9251025837579474, - "grad_norm": 1.3195514629977305, - "learning_rate": 5.849337919086283e-08, - "loss": 0.9139, - "step": 10258 - }, - { - "epoch": 0.9251927672814176, - "grad_norm": 1.7165807733373928, - "learning_rate": 5.835320521713316e-08, - "loss": 0.9104, - "step": 10259 - }, - { - "epoch": 0.925282950804888, - "grad_norm": 1.341346365247888, - "learning_rate": 5.8213196915414264e-08, - "loss": 0.9234, - "step": 10260 - }, - { - "epoch": 0.9253731343283582, - "grad_norm": 2.0177735698383463, - "learning_rate": 5.807335429765237e-08, - "loss": 0.9912, - "step": 10261 - }, - { - "epoch": 0.9254633178518284, - "grad_norm": 1.7299510448342732, - "learning_rate": 5.7933677375779034e-08, - "loss": 0.9736, - "step": 10262 - }, - { - "epoch": 0.9255535013752987, - "grad_norm": 5.363344196303047, - "learning_rate": 5.77941661617134e-08, - "loss": 0.9809, - "step": 10263 - }, - { - "epoch": 0.925643684898769, - "grad_norm": 1.7240941281344957, - "learning_rate": 5.765482066735816e-08, - "loss": 0.8643, - "step": 10264 - }, - { - "epoch": 0.9257338684222393, - "grad_norm": 1.494639195055039, - "learning_rate": 5.7515640904604256e-08, - "loss": 0.934, - "step": 10265 - }, - { - "epoch": 0.9258240519457095, - "grad_norm": 1.354403030112494, - "learning_rate": 5.7376626885326187e-08, - "loss": 0.9104, - "step": 10266 - }, - { - "epoch": 0.9259142354691798, - "grad_norm": 1.4100395429085004, - "learning_rate": 5.723777862138601e-08, - "loss": 0.9957, - "step": 10267 - }, - { - "epoch": 0.9260044189926501, - "grad_norm": 1.4394628972170833, - "learning_rate": 5.7099096124630705e-08, - "loss": 1.0115, - "step": 10268 - }, - { - "epoch": 0.9260946025161203, - "grad_norm": 1.1766050880698202, - "learning_rate": 5.696057940689347e-08, - "loss": 0.944, - "step": 10269 - }, - { - "epoch": 0.9261847860395905, - "grad_norm": 1.600327029185534, - "learning_rate": 5.6822228479993736e-08, - "loss": 0.9477, - "step": 10270 - }, - { - "epoch": 0.9262749695630609, - "grad_norm": 1.610075566668627, - "learning_rate": 5.668404335573584e-08, - "loss": 0.9722, - "step": 10271 - }, - { - "epoch": 0.9263651530865311, - "grad_norm": 1.406788447398198, - "learning_rate": 5.654602404591058e-08, - "loss": 0.8761, - "step": 10272 - }, - { - "epoch": 0.9264553366100013, - "grad_norm": 1.3643615290020012, - "learning_rate": 5.640817056229474e-08, - "loss": 0.983, - "step": 10273 - }, - { - "epoch": 0.9265455201334716, - "grad_norm": 1.3930339735917465, - "learning_rate": 5.6270482916650706e-08, - "loss": 0.975, - "step": 10274 - }, - { - "epoch": 0.9266357036569419, - "grad_norm": 1.5264969841945732, - "learning_rate": 5.613296112072663e-08, - "loss": 1.0108, - "step": 10275 - }, - { - "epoch": 0.9267258871804122, - "grad_norm": 1.9592485183217623, - "learning_rate": 5.59956051862569e-08, - "loss": 0.9393, - "step": 10276 - }, - { - "epoch": 0.9268160707038824, - "grad_norm": 1.4048006430134572, - "learning_rate": 5.585841512496081e-08, - "loss": 0.998, - "step": 10277 - }, - { - "epoch": 0.9269062542273526, - "grad_norm": 1.7147666548862672, - "learning_rate": 5.5721390948545e-08, - "loss": 1.0359, - "step": 10278 - }, - { - "epoch": 0.926996437750823, - "grad_norm": 1.4129231958323734, - "learning_rate": 5.558453266870056e-08, - "loss": 0.9808, - "step": 10279 - }, - { - "epoch": 0.9270866212742932, - "grad_norm": 1.4679026492890859, - "learning_rate": 5.544784029710525e-08, - "loss": 0.8747, - "step": 10280 - }, - { - "epoch": 0.9271768047977634, - "grad_norm": 1.5740557034777254, - "learning_rate": 5.531131384542242e-08, - "loss": 1.0047, - "step": 10281 - }, - { - "epoch": 0.9272669883212337, - "grad_norm": 1.632710065777072, - "learning_rate": 5.51749533253012e-08, - "loss": 0.8887, - "step": 10282 - }, - { - "epoch": 0.927357171844704, - "grad_norm": 1.3650301670052614, - "learning_rate": 5.503875874837649e-08, - "loss": 0.8741, - "step": 10283 - }, - { - "epoch": 0.9274473553681742, - "grad_norm": 1.5253316987557979, - "learning_rate": 5.4902730126269225e-08, - "loss": 0.8949, - "step": 10284 - }, - { - "epoch": 0.9275375388916445, - "grad_norm": 1.4285023263839562, - "learning_rate": 5.476686747058656e-08, - "loss": 0.9604, - "step": 10285 - }, - { - "epoch": 0.9276277224151147, - "grad_norm": 2.1090962543688874, - "learning_rate": 5.4631170792920124e-08, - "loss": 0.9046, - "step": 10286 - }, - { - "epoch": 0.927717905938585, - "grad_norm": 1.7627071475321179, - "learning_rate": 5.449564010484953e-08, - "loss": 0.912, - "step": 10287 - }, - { - "epoch": 0.9278080894620553, - "grad_norm": 1.8540544440208417, - "learning_rate": 5.436027541793775e-08, - "loss": 0.962, - "step": 10288 - }, - { - "epoch": 0.9278982729855255, - "grad_norm": 1.3349176719368518, - "learning_rate": 5.4225076743735554e-08, - "loss": 0.911, - "step": 10289 - }, - { - "epoch": 0.9279884565089958, - "grad_norm": 1.4461584965963559, - "learning_rate": 5.409004409377882e-08, - "loss": 0.8533, - "step": 10290 - }, - { - "epoch": 0.9280786400324661, - "grad_norm": 1.192765607944025, - "learning_rate": 5.3955177479589e-08, - "loss": 0.8605, - "step": 10291 - }, - { - "epoch": 0.9281688235559363, - "grad_norm": 1.3329857681184323, - "learning_rate": 5.3820476912674e-08, - "loss": 0.9375, - "step": 10292 - }, - { - "epoch": 0.9282590070794066, - "grad_norm": 1.3610147255010507, - "learning_rate": 5.3685942404527063e-08, - "loss": 0.9569, - "step": 10293 - }, - { - "epoch": 0.9283491906028769, - "grad_norm": 1.476203834528918, - "learning_rate": 5.355157396662702e-08, - "loss": 0.9187, - "step": 10294 - }, - { - "epoch": 0.9284393741263471, - "grad_norm": 1.2648620660691525, - "learning_rate": 5.34173716104398e-08, - "loss": 0.9179, - "step": 10295 - }, - { - "epoch": 0.9285295576498174, - "grad_norm": 2.452080473875217, - "learning_rate": 5.328333534741536e-08, - "loss": 0.9395, - "step": 10296 - }, - { - "epoch": 0.9286197411732876, - "grad_norm": 1.5939259899432676, - "learning_rate": 5.314946518899099e-08, - "loss": 0.8912, - "step": 10297 - }, - { - "epoch": 0.928709924696758, - "grad_norm": 1.4780155681724505, - "learning_rate": 5.301576114658912e-08, - "loss": 1.0057, - "step": 10298 - }, - { - "epoch": 0.9288001082202282, - "grad_norm": 1.5714705263457345, - "learning_rate": 5.288222323161795e-08, - "loss": 0.9639, - "step": 10299 - }, - { - "epoch": 0.9288902917436984, - "grad_norm": 1.5515048167098302, - "learning_rate": 5.274885145547214e-08, - "loss": 0.9779, - "step": 10300 - }, - { - "epoch": 0.9289804752671686, - "grad_norm": 1.3752721603846598, - "learning_rate": 5.261564582953082e-08, - "loss": 0.9267, - "step": 10301 - }, - { - "epoch": 0.929070658790639, - "grad_norm": 1.8578560659627852, - "learning_rate": 5.248260636516066e-08, - "loss": 0.9137, - "step": 10302 - }, - { - "epoch": 0.9291608423141092, - "grad_norm": 1.6825555490106685, - "learning_rate": 5.2349733073712824e-08, - "loss": 0.8767, - "step": 10303 - }, - { - "epoch": 0.9292510258375795, - "grad_norm": 1.2914566002530705, - "learning_rate": 5.221702596652533e-08, - "loss": 1.0216, - "step": 10304 - }, - { - "epoch": 0.9293412093610497, - "grad_norm": 1.830359438319877, - "learning_rate": 5.208448505492091e-08, - "loss": 0.8829, - "step": 10305 - }, - { - "epoch": 0.92943139288452, - "grad_norm": 1.4050532440875714, - "learning_rate": 5.1952110350208965e-08, - "loss": 0.9187, - "step": 10306 - }, - { - "epoch": 0.9295215764079903, - "grad_norm": 1.47529588091636, - "learning_rate": 5.181990186368446e-08, - "loss": 0.8924, - "step": 10307 - }, - { - "epoch": 0.9296117599314605, - "grad_norm": 1.3581687135152403, - "learning_rate": 5.1687859606627915e-08, - "loss": 1.0304, - "step": 10308 - }, - { - "epoch": 0.9297019434549307, - "grad_norm": 2.4195255820208272, - "learning_rate": 5.1555983590306327e-08, - "loss": 0.8481, - "step": 10309 - }, - { - "epoch": 0.9297921269784011, - "grad_norm": 0.7420340931609598, - "learning_rate": 5.1424273825971806e-08, - "loss": 0.8171, - "step": 10310 - }, - { - "epoch": 0.9298823105018713, - "grad_norm": 1.9027389335225304, - "learning_rate": 5.1292730324862475e-08, - "loss": 0.8977, - "step": 10311 - }, - { - "epoch": 0.9299724940253415, - "grad_norm": 1.274817953015882, - "learning_rate": 5.116135309820224e-08, - "loss": 0.9471, - "step": 10312 - }, - { - "epoch": 0.9300626775488118, - "grad_norm": 1.7008223760396135, - "learning_rate": 5.103014215720147e-08, - "loss": 0.9686, - "step": 10313 - }, - { - "epoch": 0.9301528610722821, - "grad_norm": 1.4500549042928916, - "learning_rate": 5.0899097513055214e-08, - "loss": 0.9936, - "step": 10314 - }, - { - "epoch": 0.9302430445957524, - "grad_norm": 4.0527701429144525, - "learning_rate": 5.076821917694563e-08, - "loss": 0.9253, - "step": 10315 - }, - { - "epoch": 0.9303332281192226, - "grad_norm": 1.4428439942992368, - "learning_rate": 5.063750716003889e-08, - "loss": 0.9849, - "step": 10316 - }, - { - "epoch": 0.9304234116426929, - "grad_norm": 1.5266772130465869, - "learning_rate": 5.050696147348921e-08, - "loss": 0.917, - "step": 10317 - }, - { - "epoch": 0.9305135951661632, - "grad_norm": 1.5696534249284613, - "learning_rate": 5.037658212843454e-08, - "loss": 0.9084, - "step": 10318 - }, - { - "epoch": 0.9306037786896334, - "grad_norm": 1.3261693580839518, - "learning_rate": 5.0246369136000444e-08, - "loss": 0.8225, - "step": 10319 - }, - { - "epoch": 0.9306939622131036, - "grad_norm": 1.4448043342297199, - "learning_rate": 5.011632250729691e-08, - "loss": 1.0068, - "step": 10320 - }, - { - "epoch": 0.930784145736574, - "grad_norm": 1.466636029198016, - "learning_rate": 4.998644225342019e-08, - "loss": 1.0212, - "step": 10321 - }, - { - "epoch": 0.9308743292600442, - "grad_norm": 1.436077077098722, - "learning_rate": 4.9856728385452296e-08, - "loss": 0.9427, - "step": 10322 - }, - { - "epoch": 0.9309645127835144, - "grad_norm": 1.3823064535462108, - "learning_rate": 4.9727180914461485e-08, - "loss": 1.0207, - "step": 10323 - }, - { - "epoch": 0.9310546963069847, - "grad_norm": 1.5464180936241292, - "learning_rate": 4.959779985150137e-08, - "loss": 0.9352, - "step": 10324 - }, - { - "epoch": 0.931144879830455, - "grad_norm": 1.8746951809112293, - "learning_rate": 4.9468585207611105e-08, - "loss": 0.8825, - "step": 10325 - }, - { - "epoch": 0.9312350633539253, - "grad_norm": 1.7073880658525675, - "learning_rate": 4.9339536993816764e-08, - "loss": 0.9052, - "step": 10326 - }, - { - "epoch": 0.9313252468773955, - "grad_norm": 1.3691407556030386, - "learning_rate": 4.921065522112844e-08, - "loss": 0.877, - "step": 10327 - }, - { - "epoch": 0.9314154304008657, - "grad_norm": 1.523237557617539, - "learning_rate": 4.908193990054377e-08, - "loss": 0.9773, - "step": 10328 - }, - { - "epoch": 0.9315056139243361, - "grad_norm": 1.322832580457251, - "learning_rate": 4.89533910430453e-08, - "loss": 0.9556, - "step": 10329 - }, - { - "epoch": 0.9315957974478063, - "grad_norm": 1.4794897702641145, - "learning_rate": 4.8825008659601376e-08, - "loss": 0.924, - "step": 10330 - }, - { - "epoch": 0.9316859809712765, - "grad_norm": 1.753362286670696, - "learning_rate": 4.869679276116634e-08, - "loss": 0.9465, - "step": 10331 - }, - { - "epoch": 0.9317761644947468, - "grad_norm": 1.319720615239763, - "learning_rate": 4.856874335868055e-08, - "loss": 0.9543, - "step": 10332 - }, - { - "epoch": 0.9318663480182171, - "grad_norm": 1.4834301802633607, - "learning_rate": 4.844086046306928e-08, - "loss": 1.0417, - "step": 10333 - }, - { - "epoch": 0.9319565315416873, - "grad_norm": 1.3666151756185412, - "learning_rate": 4.8313144085244896e-08, - "loss": 0.934, - "step": 10334 - }, - { - "epoch": 0.9320467150651576, - "grad_norm": 0.6768455503543627, - "learning_rate": 4.818559423610424e-08, - "loss": 0.8026, - "step": 10335 - }, - { - "epoch": 0.9321368985886278, - "grad_norm": 1.3294630154776816, - "learning_rate": 4.8058210926531284e-08, - "loss": 0.8832, - "step": 10336 - }, - { - "epoch": 0.9322270821120981, - "grad_norm": 1.377153877071187, - "learning_rate": 4.7930994167394435e-08, - "loss": 0.9689, - "step": 10337 - }, - { - "epoch": 0.9323172656355684, - "grad_norm": 1.9463764636163032, - "learning_rate": 4.7803943969548786e-08, - "loss": 0.9761, - "step": 10338 - }, - { - "epoch": 0.9324074491590386, - "grad_norm": 1.448132789271467, - "learning_rate": 4.7677060343834784e-08, - "loss": 0.8869, - "step": 10339 - }, - { - "epoch": 0.932497632682509, - "grad_norm": 1.6965521573819313, - "learning_rate": 4.75503433010791e-08, - "loss": 0.9268, - "step": 10340 - }, - { - "epoch": 0.9325878162059792, - "grad_norm": 1.6448506024822085, - "learning_rate": 4.742379285209419e-08, - "loss": 0.9135, - "step": 10341 - }, - { - "epoch": 0.9326779997294494, - "grad_norm": 1.4227957626219618, - "learning_rate": 4.72974090076772e-08, - "loss": 1.0341, - "step": 10342 - }, - { - "epoch": 0.9327681832529197, - "grad_norm": 1.1933913406111607, - "learning_rate": 4.717119177861262e-08, - "loss": 0.8895, - "step": 10343 - }, - { - "epoch": 0.93285836677639, - "grad_norm": 1.4712146614128254, - "learning_rate": 4.70451411756696e-08, - "loss": 0.9431, - "step": 10344 - }, - { - "epoch": 0.9329485502998602, - "grad_norm": 1.3292495006153546, - "learning_rate": 4.691925720960355e-08, - "loss": 0.9717, - "step": 10345 - }, - { - "epoch": 0.9330387338233305, - "grad_norm": 1.5750218465591745, - "learning_rate": 4.6793539891155645e-08, - "loss": 0.9576, - "step": 10346 - }, - { - "epoch": 0.9331289173468007, - "grad_norm": 1.4391940542389814, - "learning_rate": 4.6667989231052864e-08, - "loss": 0.9103, - "step": 10347 - }, - { - "epoch": 0.933219100870271, - "grad_norm": 1.6060101044219401, - "learning_rate": 4.654260524000797e-08, - "loss": 0.9417, - "step": 10348 - }, - { - "epoch": 0.9333092843937413, - "grad_norm": 1.2097424424548893, - "learning_rate": 4.6417387928719076e-08, - "loss": 0.9279, - "step": 10349 - }, - { - "epoch": 0.9333994679172115, - "grad_norm": 1.7705195279793722, - "learning_rate": 4.629233730787052e-08, - "loss": 0.8093, - "step": 10350 - }, - { - "epoch": 0.9334896514406817, - "grad_norm": 1.6049709615724657, - "learning_rate": 4.616745338813266e-08, - "loss": 0.8972, - "step": 10351 - }, - { - "epoch": 0.9335798349641521, - "grad_norm": 1.2542246476687353, - "learning_rate": 4.6042736180160744e-08, - "loss": 0.9527, - "step": 10352 - }, - { - "epoch": 0.9336700184876223, - "grad_norm": 1.5849107697938765, - "learning_rate": 4.591818569459671e-08, - "loss": 1.0518, - "step": 10353 - }, - { - "epoch": 0.9337602020110926, - "grad_norm": 1.598494032590035, - "learning_rate": 4.5793801942067614e-08, - "loss": 0.9579, - "step": 10354 - }, - { - "epoch": 0.9338503855345628, - "grad_norm": 1.6366624481058754, - "learning_rate": 4.566958493318673e-08, - "loss": 0.7975, - "step": 10355 - }, - { - "epoch": 0.9339405690580331, - "grad_norm": 0.6294611739455648, - "learning_rate": 4.554553467855316e-08, - "loss": 0.8267, - "step": 10356 - }, - { - "epoch": 0.9340307525815034, - "grad_norm": 1.6207119043884286, - "learning_rate": 4.5421651188751074e-08, - "loss": 0.9463, - "step": 10357 - }, - { - "epoch": 0.9341209361049736, - "grad_norm": 1.407912140938984, - "learning_rate": 4.529793447435137e-08, - "loss": 0.9496, - "step": 10358 - }, - { - "epoch": 0.9342111196284438, - "grad_norm": 1.330297690038705, - "learning_rate": 4.5174384545909824e-08, - "loss": 0.9364, - "step": 10359 - }, - { - "epoch": 0.9343013031519142, - "grad_norm": 1.6650688459579637, - "learning_rate": 4.505100141396867e-08, - "loss": 0.9755, - "step": 10360 - }, - { - "epoch": 0.9343914866753844, - "grad_norm": 1.6475602116095502, - "learning_rate": 4.492778508905548e-08, - "loss": 0.9609, - "step": 10361 - }, - { - "epoch": 0.9344816701988546, - "grad_norm": 1.4716962928569242, - "learning_rate": 4.480473558168385e-08, - "loss": 0.8837, - "step": 10362 - }, - { - "epoch": 0.934571853722325, - "grad_norm": 1.6189701740658382, - "learning_rate": 4.4681852902352936e-08, - "loss": 0.8439, - "step": 10363 - }, - { - "epoch": 0.9346620372457952, - "grad_norm": 1.5496394568702303, - "learning_rate": 4.455913706154812e-08, - "loss": 0.8463, - "step": 10364 - }, - { - "epoch": 0.9347522207692655, - "grad_norm": 1.5173907661401391, - "learning_rate": 4.443658806973949e-08, - "loss": 0.9617, - "step": 10365 - }, - { - "epoch": 0.9348424042927357, - "grad_norm": 0.6576458753917178, - "learning_rate": 4.431420593738444e-08, - "loss": 0.8065, - "step": 10366 - }, - { - "epoch": 0.934932587816206, - "grad_norm": 1.545180114691413, - "learning_rate": 4.419199067492485e-08, - "loss": 0.8405, - "step": 10367 - }, - { - "epoch": 0.9350227713396763, - "grad_norm": 1.599370731349554, - "learning_rate": 4.4069942292788596e-08, - "loss": 0.9415, - "step": 10368 - }, - { - "epoch": 0.9351129548631465, - "grad_norm": 1.5904177067851275, - "learning_rate": 4.39480608013898e-08, - "loss": 0.9267, - "step": 10369 - }, - { - "epoch": 0.9352031383866167, - "grad_norm": 1.2639859736304133, - "learning_rate": 4.3826346211128126e-08, - "loss": 1.0171, - "step": 10370 - }, - { - "epoch": 0.9352933219100871, - "grad_norm": 1.3316435935003306, - "learning_rate": 4.370479853238884e-08, - "loss": 0.9356, - "step": 10371 - }, - { - "epoch": 0.9353835054335573, - "grad_norm": 1.4648593749166676, - "learning_rate": 4.3583417775542756e-08, - "loss": 0.9039, - "step": 10372 - }, - { - "epoch": 0.9354736889570275, - "grad_norm": 1.2451556749566333, - "learning_rate": 4.3462203950947575e-08, - "loss": 0.8879, - "step": 10373 - }, - { - "epoch": 0.9355638724804978, - "grad_norm": 1.4883463687617786, - "learning_rate": 4.3341157068944814e-08, - "loss": 0.916, - "step": 10374 - }, - { - "epoch": 0.9356540560039681, - "grad_norm": 1.510055691784664, - "learning_rate": 4.322027713986376e-08, - "loss": 0.9449, - "step": 10375 - }, - { - "epoch": 0.9357442395274383, - "grad_norm": 1.6759934368824427, - "learning_rate": 4.309956417401816e-08, - "loss": 1.0061, - "step": 10376 - }, - { - "epoch": 0.9358344230509086, - "grad_norm": 1.3278441637105918, - "learning_rate": 4.297901818170801e-08, - "loss": 0.9002, - "step": 10377 - }, - { - "epoch": 0.9359246065743788, - "grad_norm": 1.2508827430391671, - "learning_rate": 4.285863917321886e-08, - "loss": 0.8635, - "step": 10378 - }, - { - "epoch": 0.9360147900978492, - "grad_norm": 1.4464557971444034, - "learning_rate": 4.2738427158822253e-08, - "loss": 0.8679, - "step": 10379 - }, - { - "epoch": 0.9361049736213194, - "grad_norm": 1.5900569838383238, - "learning_rate": 4.261838214877511e-08, - "loss": 0.9448, - "step": 10380 - }, - { - "epoch": 0.9361951571447896, - "grad_norm": 1.2577877160704882, - "learning_rate": 4.249850415332079e-08, - "loss": 0.9402, - "step": 10381 - }, - { - "epoch": 0.9362853406682599, - "grad_norm": 1.4732054639940233, - "learning_rate": 4.237879318268756e-08, - "loss": 0.9958, - "step": 10382 - }, - { - "epoch": 0.9363755241917302, - "grad_norm": 1.3073136630493774, - "learning_rate": 4.225924924708968e-08, - "loss": 0.8616, - "step": 10383 - }, - { - "epoch": 0.9364657077152004, - "grad_norm": 1.5640242961596413, - "learning_rate": 4.2139872356727665e-08, - "loss": 0.9462, - "step": 10384 - }, - { - "epoch": 0.9365558912386707, - "grad_norm": 1.564980407324075, - "learning_rate": 4.202066252178738e-08, - "loss": 0.9061, - "step": 10385 - }, - { - "epoch": 0.936646074762141, - "grad_norm": 1.3205876261256055, - "learning_rate": 4.1901619752440445e-08, - "loss": 1.0285, - "step": 10386 - }, - { - "epoch": 0.9367362582856112, - "grad_norm": 1.4858114419730462, - "learning_rate": 4.178274405884363e-08, - "loss": 0.9345, - "step": 10387 - }, - { - "epoch": 0.9368264418090815, - "grad_norm": 1.3896701406257184, - "learning_rate": 4.166403545114105e-08, - "loss": 0.943, - "step": 10388 - }, - { - "epoch": 0.9369166253325517, - "grad_norm": 1.729446419143135, - "learning_rate": 4.154549393946083e-08, - "loss": 0.9865, - "step": 10389 - }, - { - "epoch": 0.937006808856022, - "grad_norm": 1.3778044104992946, - "learning_rate": 4.14271195339182e-08, - "loss": 0.96, - "step": 10390 - }, - { - "epoch": 0.9370969923794923, - "grad_norm": 1.311846388695206, - "learning_rate": 4.1308912244613084e-08, - "loss": 0.8999, - "step": 10391 - }, - { - "epoch": 0.9371871759029625, - "grad_norm": 1.3176361134556858, - "learning_rate": 4.1190872081631636e-08, - "loss": 0.8884, - "step": 10392 - }, - { - "epoch": 0.9372773594264328, - "grad_norm": 1.6218396780177216, - "learning_rate": 4.107299905504558e-08, - "loss": 0.9937, - "step": 10393 - }, - { - "epoch": 0.9373675429499031, - "grad_norm": 1.3708741536580014, - "learning_rate": 4.095529317491286e-08, - "loss": 0.9302, - "step": 10394 - }, - { - "epoch": 0.9374577264733733, - "grad_norm": 1.7267264033751952, - "learning_rate": 4.0837754451276575e-08, - "loss": 0.9463, - "step": 10395 - }, - { - "epoch": 0.9375479099968436, - "grad_norm": 1.305429807090109, - "learning_rate": 4.072038289416557e-08, - "loss": 0.9241, - "step": 10396 - }, - { - "epoch": 0.9376380935203138, - "grad_norm": 1.3288698239635184, - "learning_rate": 4.0603178513595185e-08, - "loss": 0.9396, - "step": 10397 - }, - { - "epoch": 0.9377282770437841, - "grad_norm": 2.279818778281335, - "learning_rate": 4.0486141319565624e-08, - "loss": 0.9407, - "step": 10398 - }, - { - "epoch": 0.9378184605672544, - "grad_norm": 1.4787750621229347, - "learning_rate": 4.0369271322062916e-08, - "loss": 0.8805, - "step": 10399 - }, - { - "epoch": 0.9379086440907246, - "grad_norm": 1.4374435247854547, - "learning_rate": 4.0252568531059295e-08, - "loss": 0.9549, - "step": 10400 - }, - { - "epoch": 0.9379988276141948, - "grad_norm": 1.7122424997128403, - "learning_rate": 4.013603295651235e-08, - "loss": 0.9341, - "step": 10401 - }, - { - "epoch": 0.9380890111376652, - "grad_norm": 1.3964612412037092, - "learning_rate": 4.001966460836592e-08, - "loss": 0.9632, - "step": 10402 - }, - { - "epoch": 0.9381791946611354, - "grad_norm": 1.6749463257801382, - "learning_rate": 3.990346349654894e-08, - "loss": 1.0082, - "step": 10403 - }, - { - "epoch": 0.9382693781846057, - "grad_norm": 1.1728162926953833, - "learning_rate": 3.9787429630975924e-08, - "loss": 0.9101, - "step": 10404 - }, - { - "epoch": 0.9383595617080759, - "grad_norm": 1.33531593899024, - "learning_rate": 3.967156302154828e-08, - "loss": 0.9253, - "step": 10405 - }, - { - "epoch": 0.9384497452315462, - "grad_norm": 2.2699987878670167, - "learning_rate": 3.955586367815189e-08, - "loss": 0.8912, - "step": 10406 - }, - { - "epoch": 0.9385399287550165, - "grad_norm": 1.6214257325495072, - "learning_rate": 3.944033161065907e-08, - "loss": 1.0034, - "step": 10407 - }, - { - "epoch": 0.9386301122784867, - "grad_norm": 1.4420317659781166, - "learning_rate": 3.93249668289275e-08, - "loss": 0.8296, - "step": 10408 - }, - { - "epoch": 0.9387202958019569, - "grad_norm": 1.7430696178677902, - "learning_rate": 3.920976934280063e-08, - "loss": 0.8649, - "step": 10409 - }, - { - "epoch": 0.9388104793254273, - "grad_norm": 1.5883176635230747, - "learning_rate": 3.909473916210815e-08, - "loss": 1.0147, - "step": 10410 - }, - { - "epoch": 0.9389006628488975, - "grad_norm": 1.7770445184020265, - "learning_rate": 3.897987629666488e-08, - "loss": 0.9012, - "step": 10411 - }, - { - "epoch": 0.9389908463723677, - "grad_norm": 1.1720452248599993, - "learning_rate": 3.886518075627143e-08, - "loss": 1.0049, - "step": 10412 - }, - { - "epoch": 0.9390810298958381, - "grad_norm": 1.7480611960394512, - "learning_rate": 3.875065255071419e-08, - "loss": 0.9264, - "step": 10413 - }, - { - "epoch": 0.9391712134193083, - "grad_norm": 1.5715161924580856, - "learning_rate": 3.863629168976579e-08, - "loss": 0.9076, - "step": 10414 - }, - { - "epoch": 0.9392613969427785, - "grad_norm": 1.365612627056296, - "learning_rate": 3.852209818318375e-08, - "loss": 0.9845, - "step": 10415 - }, - { - "epoch": 0.9393515804662488, - "grad_norm": 1.5675516483248695, - "learning_rate": 3.840807204071161e-08, - "loss": 0.8382, - "step": 10416 - }, - { - "epoch": 0.9394417639897191, - "grad_norm": 1.5207501305939573, - "learning_rate": 3.829421327207894e-08, - "loss": 0.9179, - "step": 10417 - }, - { - "epoch": 0.9395319475131894, - "grad_norm": 1.4732090243951557, - "learning_rate": 3.8180521887000825e-08, - "loss": 0.8903, - "step": 10418 - }, - { - "epoch": 0.9396221310366596, - "grad_norm": 0.6519526148497526, - "learning_rate": 3.806699789517775e-08, - "loss": 0.8418, - "step": 10419 - }, - { - "epoch": 0.9397123145601298, - "grad_norm": 1.4327824347117415, - "learning_rate": 3.7953641306296635e-08, - "loss": 0.9197, - "step": 10420 - }, - { - "epoch": 0.9398024980836002, - "grad_norm": 1.6243120351097395, - "learning_rate": 3.784045213002951e-08, - "loss": 0.7858, - "step": 10421 - }, - { - "epoch": 0.9398926816070704, - "grad_norm": 1.461200629216896, - "learning_rate": 3.7727430376033986e-08, - "loss": 0.9548, - "step": 10422 - }, - { - "epoch": 0.9399828651305406, - "grad_norm": 1.7881436049591142, - "learning_rate": 3.7614576053954126e-08, - "loss": 0.9385, - "step": 10423 - }, - { - "epoch": 0.9400730486540109, - "grad_norm": 1.4521146974318802, - "learning_rate": 3.75018891734189e-08, - "loss": 0.9345, - "step": 10424 - }, - { - "epoch": 0.9401632321774812, - "grad_norm": 1.5996071392784283, - "learning_rate": 3.738936974404372e-08, - "loss": 0.9761, - "step": 10425 - }, - { - "epoch": 0.9402534157009514, - "grad_norm": 1.1478830672999092, - "learning_rate": 3.7277017775429354e-08, - "loss": 0.9975, - "step": 10426 - }, - { - "epoch": 0.9403435992244217, - "grad_norm": 1.2596360250832623, - "learning_rate": 3.7164833277162136e-08, - "loss": 0.9601, - "step": 10427 - }, - { - "epoch": 0.9404337827478919, - "grad_norm": 1.6550804183184193, - "learning_rate": 3.705281625881418e-08, - "loss": 0.9145, - "step": 10428 - }, - { - "epoch": 0.9405239662713623, - "grad_norm": 1.376463631273854, - "learning_rate": 3.694096672994362e-08, - "loss": 0.8467, - "step": 10429 - }, - { - "epoch": 0.9406141497948325, - "grad_norm": 1.4943376797651098, - "learning_rate": 3.682928470009394e-08, - "loss": 0.9892, - "step": 10430 - }, - { - "epoch": 0.9407043333183027, - "grad_norm": 1.4703798382021254, - "learning_rate": 3.6717770178794406e-08, - "loss": 0.8967, - "step": 10431 - }, - { - "epoch": 0.940794516841773, - "grad_norm": 1.3147760819210759, - "learning_rate": 3.6606423175560287e-08, - "loss": 1.0301, - "step": 10432 - }, - { - "epoch": 0.9408847003652433, - "grad_norm": 0.6521845411993009, - "learning_rate": 3.649524369989221e-08, - "loss": 0.7677, - "step": 10433 - }, - { - "epoch": 0.9409748838887135, - "grad_norm": 1.4068289942360204, - "learning_rate": 3.638423176127636e-08, - "loss": 0.9671, - "step": 10434 - }, - { - "epoch": 0.9410650674121838, - "grad_norm": 1.5383953836779434, - "learning_rate": 3.6273387369185396e-08, - "loss": 0.9721, - "step": 10435 - }, - { - "epoch": 0.9411552509356541, - "grad_norm": 1.4208740232277537, - "learning_rate": 3.616271053307685e-08, - "loss": 0.8845, - "step": 10436 - }, - { - "epoch": 0.9412454344591243, - "grad_norm": 1.5010907657736972, - "learning_rate": 3.6052201262394275e-08, - "loss": 0.9562, - "step": 10437 - }, - { - "epoch": 0.9413356179825946, - "grad_norm": 1.845052226911677, - "learning_rate": 3.5941859566566816e-08, - "loss": 0.9846, - "step": 10438 - }, - { - "epoch": 0.9414258015060648, - "grad_norm": 0.7086992347203953, - "learning_rate": 3.583168545500981e-08, - "loss": 0.8144, - "step": 10439 - }, - { - "epoch": 0.9415159850295352, - "grad_norm": 1.3847772896387018, - "learning_rate": 3.5721678937123746e-08, - "loss": 0.8832, - "step": 10440 - }, - { - "epoch": 0.9416061685530054, - "grad_norm": 1.348995704084135, - "learning_rate": 3.561184002229467e-08, - "loss": 1.0154, - "step": 10441 - }, - { - "epoch": 0.9416963520764756, - "grad_norm": 1.3238704076944308, - "learning_rate": 3.550216871989531e-08, - "loss": 0.8805, - "step": 10442 - }, - { - "epoch": 0.9417865355999459, - "grad_norm": 0.6314281108482483, - "learning_rate": 3.539266503928262e-08, - "loss": 0.8608, - "step": 10443 - }, - { - "epoch": 0.9418767191234162, - "grad_norm": 1.3230377602411065, - "learning_rate": 3.528332898980091e-08, - "loss": 0.9475, - "step": 10444 - }, - { - "epoch": 0.9419669026468864, - "grad_norm": 1.3318764157513703, - "learning_rate": 3.517416058077849e-08, - "loss": 0.9026, - "step": 10445 - }, - { - "epoch": 0.9420570861703567, - "grad_norm": 1.42955216564572, - "learning_rate": 3.506515982153102e-08, - "loss": 0.8439, - "step": 10446 - }, - { - "epoch": 0.9421472696938269, - "grad_norm": 1.3595605920839529, - "learning_rate": 3.495632672135862e-08, - "loss": 0.942, - "step": 10447 - }, - { - "epoch": 0.9422374532172972, - "grad_norm": 1.2060966453512552, - "learning_rate": 3.4847661289547417e-08, - "loss": 0.964, - "step": 10448 - }, - { - "epoch": 0.9423276367407675, - "grad_norm": 1.5098171052631557, - "learning_rate": 3.473916353536932e-08, - "loss": 0.889, - "step": 10449 - }, - { - "epoch": 0.9424178202642377, - "grad_norm": 1.4167407895312676, - "learning_rate": 3.463083346808249e-08, - "loss": 0.9099, - "step": 10450 - }, - { - "epoch": 0.9425080037877079, - "grad_norm": 2.348134326972135, - "learning_rate": 3.452267109692975e-08, - "loss": 0.8319, - "step": 10451 - }, - { - "epoch": 0.9425981873111783, - "grad_norm": 1.3701632132382093, - "learning_rate": 3.441467643114016e-08, - "loss": 0.9298, - "step": 10452 - }, - { - "epoch": 0.9426883708346485, - "grad_norm": 0.5957703599352218, - "learning_rate": 3.430684947992857e-08, - "loss": 0.7766, - "step": 10453 - }, - { - "epoch": 0.9427785543581187, - "grad_norm": 1.6999475779585107, - "learning_rate": 3.419919025249518e-08, - "loss": 0.9508, - "step": 10454 - }, - { - "epoch": 0.942868737881589, - "grad_norm": 1.431752532290655, - "learning_rate": 3.40916987580262e-08, - "loss": 0.9547, - "step": 10455 - }, - { - "epoch": 0.9429589214050593, - "grad_norm": 1.4367394301153629, - "learning_rate": 3.398437500569362e-08, - "loss": 0.9149, - "step": 10456 - }, - { - "epoch": 0.9430491049285296, - "grad_norm": 1.4105210189118684, - "learning_rate": 3.3877219004654347e-08, - "loss": 0.8218, - "step": 10457 - }, - { - "epoch": 0.9431392884519998, - "grad_norm": 1.5574498005916322, - "learning_rate": 3.3770230764051946e-08, - "loss": 0.973, - "step": 10458 - }, - { - "epoch": 0.9432294719754701, - "grad_norm": 1.541544582928634, - "learning_rate": 3.366341029301534e-08, - "loss": 0.9488, - "step": 10459 - }, - { - "epoch": 0.9433196554989404, - "grad_norm": 1.6592937787252175, - "learning_rate": 3.355675760065857e-08, - "loss": 0.9398, - "step": 10460 - }, - { - "epoch": 0.9434098390224106, - "grad_norm": 0.7316006545163497, - "learning_rate": 3.345027269608236e-08, - "loss": 0.7448, - "step": 10461 - }, - { - "epoch": 0.9435000225458808, - "grad_norm": 0.9612085843930424, - "learning_rate": 3.334395558837211e-08, - "loss": 0.8107, - "step": 10462 - }, - { - "epoch": 0.9435902060693512, - "grad_norm": 1.2332960314307686, - "learning_rate": 3.3237806286599667e-08, - "loss": 0.8457, - "step": 10463 - }, - { - "epoch": 0.9436803895928214, - "grad_norm": 1.368373509017426, - "learning_rate": 3.313182479982224e-08, - "loss": 0.9272, - "step": 10464 - }, - { - "epoch": 0.9437705731162916, - "grad_norm": 1.4318620164499867, - "learning_rate": 3.302601113708259e-08, - "loss": 1.0, - "step": 10465 - }, - { - "epoch": 0.9438607566397619, - "grad_norm": 1.1823366747564164, - "learning_rate": 3.292036530740972e-08, - "loss": 0.984, - "step": 10466 - }, - { - "epoch": 0.9439509401632322, - "grad_norm": 1.2959828352880554, - "learning_rate": 3.2814887319817294e-08, - "loss": 0.932, - "step": 10467 - }, - { - "epoch": 0.9440411236867025, - "grad_norm": 1.466563199829741, - "learning_rate": 3.270957718330591e-08, - "loss": 0.9811, - "step": 10468 - }, - { - "epoch": 0.9441313072101727, - "grad_norm": 1.207679993934879, - "learning_rate": 3.260443490686082e-08, - "loss": 0.9779, - "step": 10469 - }, - { - "epoch": 0.9442214907336429, - "grad_norm": 1.5910296691679282, - "learning_rate": 3.249946049945351e-08, - "loss": 0.9467, - "step": 10470 - }, - { - "epoch": 0.9443116742571133, - "grad_norm": 1.622159455919885, - "learning_rate": 3.239465397004082e-08, - "loss": 0.9462, - "step": 10471 - }, - { - "epoch": 0.9444018577805835, - "grad_norm": 1.4706257955578987, - "learning_rate": 3.229001532756559e-08, - "loss": 0.8959, - "step": 10472 - }, - { - "epoch": 0.9444920413040537, - "grad_norm": 1.8796551455077966, - "learning_rate": 3.218554458095602e-08, - "loss": 0.9308, - "step": 10473 - }, - { - "epoch": 0.944582224827524, - "grad_norm": 1.4743762912073501, - "learning_rate": 3.20812417391263e-08, - "loss": 0.9457, - "step": 10474 - }, - { - "epoch": 0.9446724083509943, - "grad_norm": 1.1471918837656565, - "learning_rate": 3.1977106810975764e-08, - "loss": 0.9292, - "step": 10475 - }, - { - "epoch": 0.9447625918744645, - "grad_norm": 0.8301608414669281, - "learning_rate": 3.187313980539042e-08, - "loss": 0.8698, - "step": 10476 - }, - { - "epoch": 0.9448527753979348, - "grad_norm": 1.2325685054164064, - "learning_rate": 3.176934073124071e-08, - "loss": 1.023, - "step": 10477 - }, - { - "epoch": 0.944942958921405, - "grad_norm": 1.5198163154944144, - "learning_rate": 3.166570959738357e-08, - "loss": 0.9321, - "step": 10478 - }, - { - "epoch": 0.9450331424448754, - "grad_norm": 1.4681966023139628, - "learning_rate": 3.1562246412661476e-08, - "loss": 0.8763, - "step": 10479 - }, - { - "epoch": 0.9451233259683456, - "grad_norm": 1.3793137248178702, - "learning_rate": 3.145895118590225e-08, - "loss": 0.9713, - "step": 10480 - }, - { - "epoch": 0.9452135094918158, - "grad_norm": 1.5134034540228978, - "learning_rate": 3.135582392591996e-08, - "loss": 0.8506, - "step": 10481 - }, - { - "epoch": 0.9453036930152862, - "grad_norm": 0.5967843421272248, - "learning_rate": 3.125286464151333e-08, - "loss": 0.7599, - "step": 10482 - }, - { - "epoch": 0.9453938765387564, - "grad_norm": 1.65423598284387, - "learning_rate": 3.115007334146824e-08, - "loss": 0.909, - "step": 10483 - }, - { - "epoch": 0.9454840600622266, - "grad_norm": 0.6558588769762013, - "learning_rate": 3.104745003455478e-08, - "loss": 0.8045, - "step": 10484 - }, - { - "epoch": 0.9455742435856969, - "grad_norm": 1.3830052473143841, - "learning_rate": 3.094499472952972e-08, - "loss": 0.9538, - "step": 10485 - }, - { - "epoch": 0.9456644271091672, - "grad_norm": 1.9823779773080035, - "learning_rate": 3.084270743513495e-08, - "loss": 0.9034, - "step": 10486 - }, - { - "epoch": 0.9457546106326374, - "grad_norm": 1.5623211567570177, - "learning_rate": 3.074058816009817e-08, - "loss": 0.8292, - "step": 10487 - }, - { - "epoch": 0.9458447941561077, - "grad_norm": 1.501894946019456, - "learning_rate": 3.063863691313284e-08, - "loss": 0.8903, - "step": 10488 - }, - { - "epoch": 0.9459349776795779, - "grad_norm": 1.4656167394083988, - "learning_rate": 3.0536853702937794e-08, - "loss": 0.8799, - "step": 10489 - }, - { - "epoch": 0.9460251612030482, - "grad_norm": 1.7157198464557717, - "learning_rate": 3.043523853819807e-08, - "loss": 0.94, - "step": 10490 - }, - { - "epoch": 0.9461153447265185, - "grad_norm": 1.8711696919935807, - "learning_rate": 3.0333791427583855e-08, - "loss": 0.951, - "step": 10491 - }, - { - "epoch": 0.9462055282499887, - "grad_norm": 1.337677475037118, - "learning_rate": 3.023251237975111e-08, - "loss": 1.0575, - "step": 10492 - }, - { - "epoch": 0.946295711773459, - "grad_norm": 0.6711053210005902, - "learning_rate": 3.0131401403341584e-08, - "loss": 0.8219, - "step": 10493 - }, - { - "epoch": 0.9463858952969293, - "grad_norm": 1.2455165566623039, - "learning_rate": 3.00304585069826e-08, - "loss": 0.9185, - "step": 10494 - }, - { - "epoch": 0.9464760788203995, - "grad_norm": 1.926827291714054, - "learning_rate": 2.992968369928728e-08, - "loss": 0.9292, - "step": 10495 - }, - { - "epoch": 0.9465662623438698, - "grad_norm": 1.420988875719375, - "learning_rate": 2.982907698885429e-08, - "loss": 0.9506, - "step": 10496 - }, - { - "epoch": 0.94665644586734, - "grad_norm": 1.945348716306796, - "learning_rate": 2.9728638384267645e-08, - "loss": 0.9591, - "step": 10497 - }, - { - "epoch": 0.9467466293908103, - "grad_norm": 1.5543727843756199, - "learning_rate": 2.962836789409784e-08, - "loss": 0.9424, - "step": 10498 - }, - { - "epoch": 0.9468368129142806, - "grad_norm": 1.381078851030149, - "learning_rate": 2.95282655268998e-08, - "loss": 1.0311, - "step": 10499 - }, - { - "epoch": 0.9469269964377508, - "grad_norm": 2.2087173367963464, - "learning_rate": 2.942833129121558e-08, - "loss": 0.9347, - "step": 10500 - }, - { - "epoch": 0.947017179961221, - "grad_norm": 1.366240604015824, - "learning_rate": 2.9328565195571475e-08, - "loss": 0.8635, - "step": 10501 - }, - { - "epoch": 0.9471073634846914, - "grad_norm": 1.487002647023003, - "learning_rate": 2.9228967248480675e-08, - "loss": 1.0056, - "step": 10502 - }, - { - "epoch": 0.9471975470081616, - "grad_norm": 1.6503758175879744, - "learning_rate": 2.912953745844082e-08, - "loss": 0.9798, - "step": 10503 - }, - { - "epoch": 0.9472877305316318, - "grad_norm": 1.3728130027445309, - "learning_rate": 2.9030275833936247e-08, - "loss": 0.9271, - "step": 10504 - }, - { - "epoch": 0.9473779140551022, - "grad_norm": 1.5930283352426555, - "learning_rate": 2.893118238343617e-08, - "loss": 0.8441, - "step": 10505 - }, - { - "epoch": 0.9474680975785724, - "grad_norm": 1.5902503990840906, - "learning_rate": 2.8832257115396052e-08, - "loss": 0.8515, - "step": 10506 - }, - { - "epoch": 0.9475582811020427, - "grad_norm": 1.4729797655516967, - "learning_rate": 2.873350003825692e-08, - "loss": 0.9206, - "step": 10507 - }, - { - "epoch": 0.9476484646255129, - "grad_norm": 0.5981721832734556, - "learning_rate": 2.8634911160444696e-08, - "loss": 0.6991, - "step": 10508 - }, - { - "epoch": 0.9477386481489832, - "grad_norm": 1.344225865759501, - "learning_rate": 2.853649049037199e-08, - "loss": 0.999, - "step": 10509 - }, - { - "epoch": 0.9478288316724535, - "grad_norm": 1.539871235703293, - "learning_rate": 2.8438238036436525e-08, - "loss": 0.9917, - "step": 10510 - }, - { - "epoch": 0.9479190151959237, - "grad_norm": 2.04628518029453, - "learning_rate": 2.834015380702137e-08, - "loss": 0.9363, - "step": 10511 - }, - { - "epoch": 0.9480091987193939, - "grad_norm": 0.6150428110434254, - "learning_rate": 2.824223781049606e-08, - "loss": 0.7385, - "step": 10512 - }, - { - "epoch": 0.9480993822428643, - "grad_norm": 2.032429162136477, - "learning_rate": 2.8144490055215465e-08, - "loss": 1.0, - "step": 10513 - }, - { - "epoch": 0.9481895657663345, - "grad_norm": 1.4200736572074462, - "learning_rate": 2.8046910549519355e-08, - "loss": 0.9733, - "step": 10514 - }, - { - "epoch": 0.9482797492898047, - "grad_norm": 1.6150434351179244, - "learning_rate": 2.794949930173418e-08, - "loss": 0.9441, - "step": 10515 - }, - { - "epoch": 0.948369932813275, - "grad_norm": 1.451164563906339, - "learning_rate": 2.7852256320171296e-08, - "loss": 0.9707, - "step": 10516 - }, - { - "epoch": 0.9484601163367453, - "grad_norm": 1.6049891588552008, - "learning_rate": 2.775518161312851e-08, - "loss": 0.7917, - "step": 10517 - }, - { - "epoch": 0.9485502998602156, - "grad_norm": 1.2390451571969365, - "learning_rate": 2.76582751888883e-08, - "loss": 0.9668, - "step": 10518 - }, - { - "epoch": 0.9486404833836858, - "grad_norm": 1.634039969596264, - "learning_rate": 2.756153705571962e-08, - "loss": 0.9529, - "step": 10519 - }, - { - "epoch": 0.948730666907156, - "grad_norm": 1.23590230011518, - "learning_rate": 2.74649672218763e-08, - "loss": 0.9225, - "step": 10520 - }, - { - "epoch": 0.9488208504306264, - "grad_norm": 1.40184543437283, - "learning_rate": 2.7368565695598424e-08, - "loss": 0.9288, - "step": 10521 - }, - { - "epoch": 0.9489110339540966, - "grad_norm": 1.3508379543177556, - "learning_rate": 2.727233248511185e-08, - "loss": 0.8666, - "step": 10522 - }, - { - "epoch": 0.9490012174775668, - "grad_norm": 1.6618425489042676, - "learning_rate": 2.71762675986269e-08, - "loss": 0.8899, - "step": 10523 - }, - { - "epoch": 0.9490914010010371, - "grad_norm": 1.2869458015014452, - "learning_rate": 2.7080371044341242e-08, - "loss": 0.9763, - "step": 10524 - }, - { - "epoch": 0.9491815845245074, - "grad_norm": 1.307740437152708, - "learning_rate": 2.6984642830436556e-08, - "loss": 0.9608, - "step": 10525 - }, - { - "epoch": 0.9492717680479776, - "grad_norm": 1.457990560665893, - "learning_rate": 2.688908296508141e-08, - "loss": 0.9194, - "step": 10526 - }, - { - "epoch": 0.9493619515714479, - "grad_norm": 1.6459757425367882, - "learning_rate": 2.679369145642929e-08, - "loss": 0.9884, - "step": 10527 - }, - { - "epoch": 0.9494521350949181, - "grad_norm": 1.4816833988756788, - "learning_rate": 2.669846831261946e-08, - "loss": 0.9591, - "step": 10528 - }, - { - "epoch": 0.9495423186183884, - "grad_norm": 1.9106532037442137, - "learning_rate": 2.6603413541776976e-08, - "loss": 0.9279, - "step": 10529 - }, - { - "epoch": 0.9496325021418587, - "grad_norm": 1.286497952035222, - "learning_rate": 2.6508527152012683e-08, - "loss": 0.8938, - "step": 10530 - }, - { - "epoch": 0.9497226856653289, - "grad_norm": 1.3001214264133907, - "learning_rate": 2.641380915142233e-08, - "loss": 0.8405, - "step": 10531 - }, - { - "epoch": 0.9498128691887993, - "grad_norm": 1.5060093034454365, - "learning_rate": 2.6319259548088334e-08, - "loss": 0.9547, - "step": 10532 - }, - { - "epoch": 0.9499030527122695, - "grad_norm": 1.7403259595753606, - "learning_rate": 2.6224878350077585e-08, - "loss": 0.9371, - "step": 10533 - }, - { - "epoch": 0.9499932362357397, - "grad_norm": 1.422804224906399, - "learning_rate": 2.6130665565443633e-08, - "loss": 0.8518, - "step": 10534 - }, - { - "epoch": 0.95008341975921, - "grad_norm": 1.221027398335501, - "learning_rate": 2.603662120222494e-08, - "loss": 0.9401, - "step": 10535 - }, - { - "epoch": 0.9501736032826803, - "grad_norm": 1.5757891297793503, - "learning_rate": 2.59427452684462e-08, - "loss": 0.7829, - "step": 10536 - }, - { - "epoch": 0.9502637868061505, - "grad_norm": 1.3060576958990326, - "learning_rate": 2.5849037772117443e-08, - "loss": 0.9146, - "step": 10537 - }, - { - "epoch": 0.9503539703296208, - "grad_norm": 1.4509326434706304, - "learning_rate": 2.575549872123384e-08, - "loss": 0.9644, - "step": 10538 - }, - { - "epoch": 0.950444153853091, - "grad_norm": 1.3303923606782833, - "learning_rate": 2.5662128123776994e-08, - "loss": 0.9375, - "step": 10539 - }, - { - "epoch": 0.9505343373765613, - "grad_norm": 1.4231680555368496, - "learning_rate": 2.5568925987713875e-08, - "loss": 0.9276, - "step": 10540 - }, - { - "epoch": 0.9506245209000316, - "grad_norm": 1.6574472382722323, - "learning_rate": 2.5475892320996785e-08, - "loss": 0.9135, - "step": 10541 - }, - { - "epoch": 0.9507147044235018, - "grad_norm": 0.7542978959395198, - "learning_rate": 2.5383027131564038e-08, - "loss": 0.8135, - "step": 10542 - }, - { - "epoch": 0.950804887946972, - "grad_norm": 1.664294231201215, - "learning_rate": 2.52903304273393e-08, - "loss": 0.9178, - "step": 10543 - }, - { - "epoch": 0.9508950714704424, - "grad_norm": 1.2451607012157555, - "learning_rate": 2.519780221623202e-08, - "loss": 0.9214, - "step": 10544 - }, - { - "epoch": 0.9509852549939126, - "grad_norm": 1.663281897907122, - "learning_rate": 2.510544250613722e-08, - "loss": 0.8923, - "step": 10545 - }, - { - "epoch": 0.9510754385173829, - "grad_norm": 1.4684947989309682, - "learning_rate": 2.501325130493548e-08, - "loss": 1.029, - "step": 10546 - }, - { - "epoch": 0.9511656220408531, - "grad_norm": 0.7311373827689976, - "learning_rate": 2.4921228620493395e-08, - "loss": 0.811, - "step": 10547 - }, - { - "epoch": 0.9512558055643234, - "grad_norm": 0.7336942073789368, - "learning_rate": 2.4829374460662244e-08, - "loss": 0.839, - "step": 10548 - }, - { - "epoch": 0.9513459890877937, - "grad_norm": 1.2778607643287527, - "learning_rate": 2.473768883327976e-08, - "loss": 0.9495, - "step": 10549 - }, - { - "epoch": 0.9514361726112639, - "grad_norm": 1.4539351307226074, - "learning_rate": 2.464617174616923e-08, - "loss": 0.904, - "step": 10550 - }, - { - "epoch": 0.9515263561347341, - "grad_norm": 1.5173097665316415, - "learning_rate": 2.455482320713953e-08, - "loss": 0.9338, - "step": 10551 - }, - { - "epoch": 0.9516165396582045, - "grad_norm": 1.3965581701027616, - "learning_rate": 2.4463643223984643e-08, - "loss": 0.993, - "step": 10552 - }, - { - "epoch": 0.9517067231816747, - "grad_norm": 1.7681417167361462, - "learning_rate": 2.4372631804484567e-08, - "loss": 1.0115, - "step": 10553 - }, - { - "epoch": 0.9517969067051449, - "grad_norm": 1.516378864117687, - "learning_rate": 2.4281788956405313e-08, - "loss": 1.0152, - "step": 10554 - }, - { - "epoch": 0.9518870902286153, - "grad_norm": 1.5934791334765175, - "learning_rate": 2.4191114687497572e-08, - "loss": 0.8736, - "step": 10555 - }, - { - "epoch": 0.9519772737520855, - "grad_norm": 1.7880605365609454, - "learning_rate": 2.4100609005498706e-08, - "loss": 0.9034, - "step": 10556 - }, - { - "epoch": 0.9520674572755558, - "grad_norm": 1.5164085800884097, - "learning_rate": 2.4010271918130764e-08, - "loss": 0.87, - "step": 10557 - }, - { - "epoch": 0.952157640799026, - "grad_norm": 1.3973289656686105, - "learning_rate": 2.39201034331018e-08, - "loss": 0.9006, - "step": 10558 - }, - { - "epoch": 0.9522478243224963, - "grad_norm": 1.5528003990483348, - "learning_rate": 2.3830103558105663e-08, - "loss": 0.9267, - "step": 10559 - }, - { - "epoch": 0.9523380078459666, - "grad_norm": 1.8935400188599902, - "learning_rate": 2.374027230082154e-08, - "loss": 0.9448, - "step": 10560 - }, - { - "epoch": 0.9524281913694368, - "grad_norm": 0.7286487692900363, - "learning_rate": 2.365060966891441e-08, - "loss": 0.828, - "step": 10561 - }, - { - "epoch": 0.952518374892907, - "grad_norm": 1.4629388656098103, - "learning_rate": 2.3561115670034827e-08, - "loss": 0.9307, - "step": 10562 - }, - { - "epoch": 0.9526085584163774, - "grad_norm": 1.4802458502220532, - "learning_rate": 2.3471790311818675e-08, - "loss": 0.9578, - "step": 10563 - }, - { - "epoch": 0.9526987419398476, - "grad_norm": 1.3709349798453692, - "learning_rate": 2.338263360188808e-08, - "loss": 0.929, - "step": 10564 - }, - { - "epoch": 0.9527889254633178, - "grad_norm": 2.2240431067385145, - "learning_rate": 2.329364554784985e-08, - "loss": 0.8819, - "step": 10565 - }, - { - "epoch": 0.9528791089867881, - "grad_norm": 1.416938176036009, - "learning_rate": 2.3204826157297465e-08, - "loss": 1.0038, - "step": 10566 - }, - { - "epoch": 0.9529692925102584, - "grad_norm": 1.4400024194167262, - "learning_rate": 2.3116175437809082e-08, - "loss": 0.8719, - "step": 10567 - }, - { - "epoch": 0.9530594760337286, - "grad_norm": 0.7209539209685826, - "learning_rate": 2.30276933969491e-08, - "loss": 0.8341, - "step": 10568 - }, - { - "epoch": 0.9531496595571989, - "grad_norm": 1.3110016489986056, - "learning_rate": 2.2939380042267255e-08, - "loss": 0.9913, - "step": 10569 - }, - { - "epoch": 0.9532398430806691, - "grad_norm": 1.4460427578715553, - "learning_rate": 2.2851235381298627e-08, - "loss": 0.8442, - "step": 10570 - }, - { - "epoch": 0.9533300266041395, - "grad_norm": 1.4516967809877013, - "learning_rate": 2.2763259421564986e-08, - "loss": 0.8627, - "step": 10571 - }, - { - "epoch": 0.9534202101276097, - "grad_norm": 1.4147083641082727, - "learning_rate": 2.2675452170571873e-08, - "loss": 0.9105, - "step": 10572 - }, - { - "epoch": 0.9535103936510799, - "grad_norm": 1.3836405370847797, - "learning_rate": 2.2587813635812414e-08, - "loss": 0.932, - "step": 10573 - }, - { - "epoch": 0.9536005771745502, - "grad_norm": 1.4260926625078456, - "learning_rate": 2.2500343824763958e-08, - "loss": 1.0131, - "step": 10574 - }, - { - "epoch": 0.9536907606980205, - "grad_norm": 1.6332924607411383, - "learning_rate": 2.2413042744890088e-08, - "loss": 0.8791, - "step": 10575 - }, - { - "epoch": 0.9537809442214907, - "grad_norm": 1.3468424233305007, - "learning_rate": 2.2325910403639514e-08, - "loss": 0.9291, - "step": 10576 - }, - { - "epoch": 0.953871127744961, - "grad_norm": 1.3884896160117954, - "learning_rate": 2.223894680844718e-08, - "loss": 0.8881, - "step": 10577 - }, - { - "epoch": 0.9539613112684313, - "grad_norm": 1.26806640625, - "learning_rate": 2.2152151966733146e-08, - "loss": 0.7733, - "step": 10578 - }, - { - "epoch": 0.9540514947919015, - "grad_norm": 1.1938950854906354, - "learning_rate": 2.2065525885903267e-08, - "loss": 0.9645, - "step": 10579 - }, - { - "epoch": 0.9541416783153718, - "grad_norm": 1.376179795860951, - "learning_rate": 2.1979068573348747e-08, - "loss": 0.9841, - "step": 10580 - }, - { - "epoch": 0.954231861838842, - "grad_norm": 1.3568955533137173, - "learning_rate": 2.1892780036447013e-08, - "loss": 0.897, - "step": 10581 - }, - { - "epoch": 0.9543220453623124, - "grad_norm": 1.84588719010269, - "learning_rate": 2.1806660282560175e-08, - "loss": 1.0789, - "step": 10582 - }, - { - "epoch": 0.9544122288857826, - "grad_norm": 1.2915545794347738, - "learning_rate": 2.1720709319037024e-08, - "loss": 0.9575, - "step": 10583 - }, - { - "epoch": 0.9545024124092528, - "grad_norm": 1.452355622093757, - "learning_rate": 2.1634927153211023e-08, - "loss": 0.9027, - "step": 10584 - }, - { - "epoch": 0.954592595932723, - "grad_norm": 1.6333749340883823, - "learning_rate": 2.1549313792401437e-08, - "loss": 1.0071, - "step": 10585 - }, - { - "epoch": 0.9546827794561934, - "grad_norm": 1.4110796327701054, - "learning_rate": 2.1463869243913746e-08, - "loss": 1.0256, - "step": 10586 - }, - { - "epoch": 0.9547729629796636, - "grad_norm": 1.6767662727973442, - "learning_rate": 2.1378593515037902e-08, - "loss": 0.9206, - "step": 10587 - }, - { - "epoch": 0.9548631465031339, - "grad_norm": 1.3989664281562135, - "learning_rate": 2.129348661305075e-08, - "loss": 0.9999, - "step": 10588 - }, - { - "epoch": 0.9549533300266041, - "grad_norm": 1.4970518386009537, - "learning_rate": 2.1208548545213813e-08, - "loss": 0.9635, - "step": 10589 - }, - { - "epoch": 0.9550435135500744, - "grad_norm": 1.4843782123731688, - "learning_rate": 2.1123779318774404e-08, - "loss": 0.8951, - "step": 10590 - }, - { - "epoch": 0.9551336970735447, - "grad_norm": 1.2963820750122808, - "learning_rate": 2.1039178940965408e-08, - "loss": 0.8788, - "step": 10591 - }, - { - "epoch": 0.9552238805970149, - "grad_norm": 1.2534228667701095, - "learning_rate": 2.0954747419005712e-08, - "loss": 0.9972, - "step": 10592 - }, - { - "epoch": 0.9553140641204851, - "grad_norm": 1.4429168642275076, - "learning_rate": 2.087048476009934e-08, - "loss": 0.9184, - "step": 10593 - }, - { - "epoch": 0.9554042476439555, - "grad_norm": 1.643292755815009, - "learning_rate": 2.0786390971435862e-08, - "loss": 0.9061, - "step": 10594 - }, - { - "epoch": 0.9554944311674257, - "grad_norm": 1.600139599908951, - "learning_rate": 2.070246606019088e-08, - "loss": 0.8953, - "step": 10595 - }, - { - "epoch": 0.955584614690896, - "grad_norm": 2.480519307740171, - "learning_rate": 2.0618710033525112e-08, - "loss": 0.8538, - "step": 10596 - }, - { - "epoch": 0.9556747982143662, - "grad_norm": 1.459098188462596, - "learning_rate": 2.053512289858528e-08, - "loss": 0.899, - "step": 10597 - }, - { - "epoch": 0.9557649817378365, - "grad_norm": 1.7430617529500119, - "learning_rate": 2.0451704662503456e-08, - "loss": 0.9747, - "step": 10598 - }, - { - "epoch": 0.9558551652613068, - "grad_norm": 1.6568023192567678, - "learning_rate": 2.0368455332397282e-08, - "loss": 1.0126, - "step": 10599 - }, - { - "epoch": 0.955945348784777, - "grad_norm": 1.338220575118539, - "learning_rate": 2.0285374915369967e-08, - "loss": 0.9508, - "step": 10600 - }, - { - "epoch": 0.9560355323082473, - "grad_norm": 1.542525061059094, - "learning_rate": 2.020246341851073e-08, - "loss": 0.9751, - "step": 10601 - }, - { - "epoch": 0.9561257158317176, - "grad_norm": 1.4814630958731994, - "learning_rate": 2.0119720848893463e-08, - "loss": 0.981, - "step": 10602 - }, - { - "epoch": 0.9562158993551878, - "grad_norm": 1.3945766836934264, - "learning_rate": 2.0037147213578964e-08, - "loss": 0.8898, - "step": 10603 - }, - { - "epoch": 0.956306082878658, - "grad_norm": 1.3152743490662957, - "learning_rate": 1.9954742519612265e-08, - "loss": 0.9647, - "step": 10604 - }, - { - "epoch": 0.9563962664021284, - "grad_norm": 1.4672613513303996, - "learning_rate": 1.9872506774024633e-08, - "loss": 0.9061, - "step": 10605 - }, - { - "epoch": 0.9564864499255986, - "grad_norm": 1.4730104379691988, - "learning_rate": 1.979043998383334e-08, - "loss": 0.9459, - "step": 10606 - }, - { - "epoch": 0.9565766334490688, - "grad_norm": 1.1859591174260062, - "learning_rate": 1.970854215604034e-08, - "loss": 0.8861, - "step": 10607 - }, - { - "epoch": 0.9566668169725391, - "grad_norm": 1.48058316636637, - "learning_rate": 1.9626813297633826e-08, - "loss": 0.9145, - "step": 10608 - }, - { - "epoch": 0.9567570004960094, - "grad_norm": 1.6300260870548575, - "learning_rate": 1.954525341558688e-08, - "loss": 0.8333, - "step": 10609 - }, - { - "epoch": 0.9568471840194797, - "grad_norm": 1.6080660635818884, - "learning_rate": 1.9463862516859498e-08, - "loss": 0.8967, - "step": 10610 - }, - { - "epoch": 0.9569373675429499, - "grad_norm": 1.500817632358287, - "learning_rate": 1.938264060839545e-08, - "loss": 1.0174, - "step": 10611 - }, - { - "epoch": 0.9570275510664201, - "grad_norm": 1.5407713876626594, - "learning_rate": 1.9301587697126086e-08, - "loss": 0.9112, - "step": 10612 - }, - { - "epoch": 0.9571177345898905, - "grad_norm": 1.615114588141584, - "learning_rate": 1.9220703789966318e-08, - "loss": 0.8925, - "step": 10613 - }, - { - "epoch": 0.9572079181133607, - "grad_norm": 1.504711777599162, - "learning_rate": 1.913998889381818e-08, - "loss": 0.8972, - "step": 10614 - }, - { - "epoch": 0.9572981016368309, - "grad_norm": 1.4350323436162447, - "learning_rate": 1.9059443015568387e-08, - "loss": 0.9422, - "step": 10615 - }, - { - "epoch": 0.9573882851603012, - "grad_norm": 1.3619200023202653, - "learning_rate": 1.8979066162089884e-08, - "loss": 0.9427, - "step": 10616 - }, - { - "epoch": 0.9574784686837715, - "grad_norm": 1.5382554315092631, - "learning_rate": 1.889885834024052e-08, - "loss": 0.9364, - "step": 10617 - }, - { - "epoch": 0.9575686522072417, - "grad_norm": 1.3825916178527164, - "learning_rate": 1.8818819556864374e-08, - "loss": 0.8947, - "step": 10618 - }, - { - "epoch": 0.957658835730712, - "grad_norm": 1.4666234306262678, - "learning_rate": 1.873894981879065e-08, - "loss": 0.923, - "step": 10619 - }, - { - "epoch": 0.9577490192541822, - "grad_norm": 0.6632252969494623, - "learning_rate": 1.8659249132834342e-08, - "loss": 0.7973, - "step": 10620 - }, - { - "epoch": 0.9578392027776526, - "grad_norm": 1.4015036699244954, - "learning_rate": 1.857971750579579e-08, - "loss": 0.9173, - "step": 10621 - }, - { - "epoch": 0.9579293863011228, - "grad_norm": 1.325887208479775, - "learning_rate": 1.8500354944461116e-08, - "loss": 0.8212, - "step": 10622 - }, - { - "epoch": 0.958019569824593, - "grad_norm": 1.1984689462209053, - "learning_rate": 1.8421161455602242e-08, - "loss": 0.9455, - "step": 10623 - }, - { - "epoch": 0.9581097533480634, - "grad_norm": 1.3285273223065879, - "learning_rate": 1.834213704597598e-08, - "loss": 0.9654, - "step": 10624 - }, - { - "epoch": 0.9581999368715336, - "grad_norm": 1.3289656727292103, - "learning_rate": 1.8263281722325385e-08, - "loss": 0.9452, - "step": 10625 - }, - { - "epoch": 0.9582901203950038, - "grad_norm": 1.438239197787096, - "learning_rate": 1.818459549137885e-08, - "loss": 0.9435, - "step": 10626 - }, - { - "epoch": 0.9583803039184741, - "grad_norm": 1.465549146564138, - "learning_rate": 1.8106078359850117e-08, - "loss": 0.9226, - "step": 10627 - }, - { - "epoch": 0.9584704874419444, - "grad_norm": 1.4462319413675704, - "learning_rate": 1.802773033443894e-08, - "loss": 0.9489, - "step": 10628 - }, - { - "epoch": 0.9585606709654146, - "grad_norm": 1.5181911806182324, - "learning_rate": 1.7949551421830413e-08, - "loss": 0.9058, - "step": 10629 - }, - { - "epoch": 0.9586508544888849, - "grad_norm": 1.4182643109451367, - "learning_rate": 1.7871541628694752e-08, - "loss": 0.9079, - "step": 10630 - }, - { - "epoch": 0.9587410380123551, - "grad_norm": 1.291087369324827, - "learning_rate": 1.779370096168864e-08, - "loss": 0.845, - "step": 10631 - }, - { - "epoch": 0.9588312215358255, - "grad_norm": 1.3675018385154738, - "learning_rate": 1.771602942745387e-08, - "loss": 0.8964, - "step": 10632 - }, - { - "epoch": 0.9589214050592957, - "grad_norm": 1.8342238849737318, - "learning_rate": 1.763852703261759e-08, - "loss": 0.9193, - "step": 10633 - }, - { - "epoch": 0.9590115885827659, - "grad_norm": 1.3661571871127796, - "learning_rate": 1.756119378379295e-08, - "loss": 0.9969, - "step": 10634 - }, - { - "epoch": 0.9591017721062362, - "grad_norm": 1.4801442940711431, - "learning_rate": 1.7484029687578005e-08, - "loss": 1.0277, - "step": 10635 - }, - { - "epoch": 0.9591919556297065, - "grad_norm": 1.3614300507709127, - "learning_rate": 1.740703475055727e-08, - "loss": 0.9123, - "step": 10636 - }, - { - "epoch": 0.9592821391531767, - "grad_norm": 2.092091373118431, - "learning_rate": 1.7330208979300153e-08, - "loss": 0.9734, - "step": 10637 - }, - { - "epoch": 0.959372322676647, - "grad_norm": 1.232764968632658, - "learning_rate": 1.725355238036208e-08, - "loss": 0.9782, - "step": 10638 - }, - { - "epoch": 0.9594625062001172, - "grad_norm": 1.573285604833277, - "learning_rate": 1.7177064960283594e-08, - "loss": 0.9842, - "step": 10639 - }, - { - "epoch": 0.9595526897235875, - "grad_norm": 1.5065102759921427, - "learning_rate": 1.7100746725591253e-08, - "loss": 0.9289, - "step": 10640 - }, - { - "epoch": 0.9596428732470578, - "grad_norm": 2.1496756020599475, - "learning_rate": 1.7024597682796517e-08, - "loss": 0.9254, - "step": 10641 - }, - { - "epoch": 0.959733056770528, - "grad_norm": 1.7422468919423877, - "learning_rate": 1.6948617838397293e-08, - "loss": 1.0027, - "step": 10642 - }, - { - "epoch": 0.9598232402939982, - "grad_norm": 1.712548599110456, - "learning_rate": 1.6872807198876404e-08, - "loss": 0.9649, - "step": 10643 - }, - { - "epoch": 0.9599134238174686, - "grad_norm": 1.4840009519208954, - "learning_rate": 1.679716577070245e-08, - "loss": 1.0287, - "step": 10644 - }, - { - "epoch": 0.9600036073409388, - "grad_norm": 1.4861189385482303, - "learning_rate": 1.6721693560329596e-08, - "loss": 1.028, - "step": 10645 - }, - { - "epoch": 0.960093790864409, - "grad_norm": 1.4457499074182245, - "learning_rate": 1.6646390574197366e-08, - "loss": 0.8427, - "step": 10646 - }, - { - "epoch": 0.9601839743878793, - "grad_norm": 0.7891822572985487, - "learning_rate": 1.6571256818731504e-08, - "loss": 0.8513, - "step": 10647 - }, - { - "epoch": 0.9602741579113496, - "grad_norm": 1.2519445552919453, - "learning_rate": 1.6496292300342218e-08, - "loss": 0.8672, - "step": 10648 - }, - { - "epoch": 0.9603643414348199, - "grad_norm": 1.5542781425089287, - "learning_rate": 1.642149702542639e-08, - "loss": 0.8768, - "step": 10649 - }, - { - "epoch": 0.9604545249582901, - "grad_norm": 1.3715568694987554, - "learning_rate": 1.634687100036558e-08, - "loss": 0.9065, - "step": 10650 - }, - { - "epoch": 0.9605447084817604, - "grad_norm": 1.5469578422172772, - "learning_rate": 1.627241423152781e-08, - "loss": 0.8691, - "step": 10651 - }, - { - "epoch": 0.9606348920052307, - "grad_norm": 1.5649149733288321, - "learning_rate": 1.619812672526555e-08, - "loss": 0.9895, - "step": 10652 - }, - { - "epoch": 0.9607250755287009, - "grad_norm": 1.5070333098164626, - "learning_rate": 1.6124008487917727e-08, - "loss": 0.9202, - "step": 10653 - }, - { - "epoch": 0.9608152590521711, - "grad_norm": 0.7224997273612911, - "learning_rate": 1.6050059525808623e-08, - "loss": 0.8224, - "step": 10654 - }, - { - "epoch": 0.9609054425756415, - "grad_norm": 0.6653472813535884, - "learning_rate": 1.597627984524763e-08, - "loss": 0.8076, - "step": 10655 - }, - { - "epoch": 0.9609956260991117, - "grad_norm": 1.2815042569093107, - "learning_rate": 1.590266945253038e-08, - "loss": 0.972, - "step": 10656 - }, - { - "epoch": 0.9610858096225819, - "grad_norm": 1.2734233176722956, - "learning_rate": 1.582922835393763e-08, - "loss": 0.9451, - "step": 10657 - }, - { - "epoch": 0.9611759931460522, - "grad_norm": 1.3920561038089425, - "learning_rate": 1.5755956555735473e-08, - "loss": 0.9151, - "step": 10658 - }, - { - "epoch": 0.9612661766695225, - "grad_norm": 0.8073818689630838, - "learning_rate": 1.5682854064176244e-08, - "loss": 0.9137, - "step": 10659 - }, - { - "epoch": 0.9613563601929928, - "grad_norm": 1.4933538222752263, - "learning_rate": 1.5609920885497395e-08, - "loss": 0.8677, - "step": 10660 - }, - { - "epoch": 0.961446543716463, - "grad_norm": 1.3816926376392893, - "learning_rate": 1.5537157025921732e-08, - "loss": 0.9497, - "step": 10661 - }, - { - "epoch": 0.9615367272399332, - "grad_norm": 1.7504245379345948, - "learning_rate": 1.5464562491658285e-08, - "loss": 0.915, - "step": 10662 - }, - { - "epoch": 0.9616269107634036, - "grad_norm": 1.3276922530708843, - "learning_rate": 1.5392137288900764e-08, - "loss": 0.9723, - "step": 10663 - }, - { - "epoch": 0.9617170942868738, - "grad_norm": 0.6324518376887284, - "learning_rate": 1.531988142382934e-08, - "loss": 0.8106, - "step": 10664 - }, - { - "epoch": 0.961807277810344, - "grad_norm": 1.4369157350132988, - "learning_rate": 1.5247794902608634e-08, - "loss": 0.8736, - "step": 10665 - }, - { - "epoch": 0.9618974613338143, - "grad_norm": 1.5674254034322357, - "learning_rate": 1.5175877731390398e-08, - "loss": 0.8913, - "step": 10666 - }, - { - "epoch": 0.9619876448572846, - "grad_norm": 1.5402580303917124, - "learning_rate": 1.510412991631016e-08, - "loss": 0.9645, - "step": 10667 - }, - { - "epoch": 0.9620778283807548, - "grad_norm": 1.267582074880691, - "learning_rate": 1.503255146349014e-08, - "loss": 0.9635, - "step": 10668 - }, - { - "epoch": 0.9621680119042251, - "grad_norm": 1.5160392902889117, - "learning_rate": 1.4961142379037893e-08, - "loss": 0.904, - "step": 10669 - }, - { - "epoch": 0.9622581954276953, - "grad_norm": 1.3820124752651965, - "learning_rate": 1.4889902669046327e-08, - "loss": 0.9743, - "step": 10670 - }, - { - "epoch": 0.9623483789511657, - "grad_norm": 1.6917478996028181, - "learning_rate": 1.4818832339594135e-08, - "loss": 0.93, - "step": 10671 - }, - { - "epoch": 0.9624385624746359, - "grad_norm": 1.3085643252580337, - "learning_rate": 1.474793139674535e-08, - "loss": 0.8856, - "step": 10672 - }, - { - "epoch": 0.9625287459981061, - "grad_norm": 1.3535225387770977, - "learning_rate": 1.4677199846549581e-08, - "loss": 0.9276, - "step": 10673 - }, - { - "epoch": 0.9626189295215765, - "grad_norm": 1.4470928052919363, - "learning_rate": 1.4606637695042224e-08, - "loss": 0.9357, - "step": 10674 - }, - { - "epoch": 0.9627091130450467, - "grad_norm": 1.7225920996107102, - "learning_rate": 1.4536244948243793e-08, - "loss": 1.0323, - "step": 10675 - }, - { - "epoch": 0.9627992965685169, - "grad_norm": 1.4300762544321197, - "learning_rate": 1.4466021612160595e-08, - "loss": 0.9004, - "step": 10676 - }, - { - "epoch": 0.9628894800919872, - "grad_norm": 1.4705825087491904, - "learning_rate": 1.4395967692784505e-08, - "loss": 0.9004, - "step": 10677 - }, - { - "epoch": 0.9629796636154575, - "grad_norm": 1.375843049740384, - "learning_rate": 1.4326083196092963e-08, - "loss": 0.866, - "step": 10678 - }, - { - "epoch": 0.9630698471389277, - "grad_norm": 1.337679970300032, - "learning_rate": 1.42563681280492e-08, - "loss": 0.9492, - "step": 10679 - }, - { - "epoch": 0.963160030662398, - "grad_norm": 1.380797388824876, - "learning_rate": 1.4186822494600902e-08, - "loss": 0.9132, - "step": 10680 - }, - { - "epoch": 0.9632502141858682, - "grad_norm": 1.3143668295206823, - "learning_rate": 1.4117446301682877e-08, - "loss": 0.9288, - "step": 10681 - }, - { - "epoch": 0.9633403977093385, - "grad_norm": 1.246438005780973, - "learning_rate": 1.4048239555214392e-08, - "loss": 0.8869, - "step": 10682 - }, - { - "epoch": 0.9634305812328088, - "grad_norm": 1.330982813640354, - "learning_rate": 1.3979202261100497e-08, - "loss": 0.9678, - "step": 10683 - }, - { - "epoch": 0.963520764756279, - "grad_norm": 1.5611034255481533, - "learning_rate": 1.3910334425231817e-08, - "loss": 0.9913, - "step": 10684 - }, - { - "epoch": 0.9636109482797492, - "grad_norm": 1.5072433108904073, - "learning_rate": 1.384163605348454e-08, - "loss": 0.9364, - "step": 10685 - }, - { - "epoch": 0.9637011318032196, - "grad_norm": 1.5994416126781037, - "learning_rate": 1.3773107151720642e-08, - "loss": 0.9955, - "step": 10686 - }, - { - "epoch": 0.9637913153266898, - "grad_norm": 1.5395558142708328, - "learning_rate": 1.3704747725787003e-08, - "loss": 0.9953, - "step": 10687 - }, - { - "epoch": 0.9638814988501601, - "grad_norm": 0.8013198901803328, - "learning_rate": 1.3636557781516512e-08, - "loss": 0.83, - "step": 10688 - }, - { - "epoch": 0.9639716823736303, - "grad_norm": 1.5924209588285936, - "learning_rate": 1.3568537324727847e-08, - "loss": 0.935, - "step": 10689 - }, - { - "epoch": 0.9640618658971006, - "grad_norm": 1.413402930441464, - "learning_rate": 1.3500686361224589e-08, - "loss": 0.9335, - "step": 10690 - }, - { - "epoch": 0.9641520494205709, - "grad_norm": 1.5741643020641742, - "learning_rate": 1.3433004896796108e-08, - "loss": 0.9391, - "step": 10691 - }, - { - "epoch": 0.9642422329440411, - "grad_norm": 1.7006654390674338, - "learning_rate": 1.336549293721756e-08, - "loss": 0.8276, - "step": 10692 - }, - { - "epoch": 0.9643324164675113, - "grad_norm": 0.7018533864009651, - "learning_rate": 1.3298150488249227e-08, - "loss": 0.8094, - "step": 10693 - }, - { - "epoch": 0.9644225999909817, - "grad_norm": 1.6464170394649638, - "learning_rate": 1.3230977555637401e-08, - "loss": 1.0112, - "step": 10694 - }, - { - "epoch": 0.9645127835144519, - "grad_norm": 1.3940612257314784, - "learning_rate": 1.3163974145113499e-08, - "loss": 0.8937, - "step": 10695 - }, - { - "epoch": 0.9646029670379221, - "grad_norm": 1.5431964175143664, - "learning_rate": 1.3097140262394723e-08, - "loss": 0.8366, - "step": 10696 - }, - { - "epoch": 0.9646931505613925, - "grad_norm": 1.6432919578424634, - "learning_rate": 1.303047591318318e-08, - "loss": 0.9713, - "step": 10697 - }, - { - "epoch": 0.9647833340848627, - "grad_norm": 1.6497290013030863, - "learning_rate": 1.2963981103167875e-08, - "loss": 0.8688, - "step": 10698 - }, - { - "epoch": 0.964873517608333, - "grad_norm": 1.4120720611749698, - "learning_rate": 1.2897655838021825e-08, - "loss": 0.9165, - "step": 10699 - }, - { - "epoch": 0.9649637011318032, - "grad_norm": 1.8477002515654453, - "learning_rate": 1.2831500123404726e-08, - "loss": 0.9428, - "step": 10700 - }, - { - "epoch": 0.9650538846552735, - "grad_norm": 1.690784436956056, - "learning_rate": 1.2765513964961172e-08, - "loss": 1.0838, - "step": 10701 - }, - { - "epoch": 0.9651440681787438, - "grad_norm": 1.4027578863405954, - "learning_rate": 1.2699697368321549e-08, - "loss": 0.9269, - "step": 10702 - }, - { - "epoch": 0.965234251702214, - "grad_norm": 1.319237130034914, - "learning_rate": 1.2634050339101366e-08, - "loss": 0.9167, - "step": 10703 - }, - { - "epoch": 0.9653244352256842, - "grad_norm": 1.2956423589092834, - "learning_rate": 1.2568572882902361e-08, - "loss": 0.9104, - "step": 10704 - }, - { - "epoch": 0.9654146187491546, - "grad_norm": 1.316179295567224, - "learning_rate": 1.2503265005311402e-08, - "loss": 0.9063, - "step": 10705 - }, - { - "epoch": 0.9655048022726248, - "grad_norm": 2.7020482912244796, - "learning_rate": 1.2438126711900698e-08, - "loss": 0.9014, - "step": 10706 - }, - { - "epoch": 0.965594985796095, - "grad_norm": 1.4216763703498445, - "learning_rate": 1.2373158008228247e-08, - "loss": 0.783, - "step": 10707 - }, - { - "epoch": 0.9656851693195653, - "grad_norm": 1.2390943679463609, - "learning_rate": 1.2308358899837833e-08, - "loss": 0.9033, - "step": 10708 - }, - { - "epoch": 0.9657753528430356, - "grad_norm": 1.4899373289663684, - "learning_rate": 1.224372939225815e-08, - "loss": 0.9149, - "step": 10709 - }, - { - "epoch": 0.9658655363665059, - "grad_norm": 1.3238737393937707, - "learning_rate": 1.2179269491003674e-08, - "loss": 1.0224, - "step": 10710 - }, - { - "epoch": 0.9659557198899761, - "grad_norm": 1.3121351007300084, - "learning_rate": 1.2114979201574894e-08, - "loss": 0.8114, - "step": 10711 - }, - { - "epoch": 0.9660459034134463, - "grad_norm": 1.816934553862469, - "learning_rate": 1.2050858529456975e-08, - "loss": 0.954, - "step": 10712 - }, - { - "epoch": 0.9661360869369167, - "grad_norm": 1.3997280623087514, - "learning_rate": 1.1986907480121545e-08, - "loss": 0.9347, - "step": 10713 - }, - { - "epoch": 0.9662262704603869, - "grad_norm": 1.4384138271017268, - "learning_rate": 1.192312605902468e-08, - "loss": 0.9516, - "step": 10714 - }, - { - "epoch": 0.9663164539838571, - "grad_norm": 0.8152693356674946, - "learning_rate": 1.1859514271608917e-08, - "loss": 0.8483, - "step": 10715 - }, - { - "epoch": 0.9664066375073274, - "grad_norm": 1.9118617894045788, - "learning_rate": 1.1796072123301914e-08, - "loss": 0.9644, - "step": 10716 - }, - { - "epoch": 0.9664968210307977, - "grad_norm": 0.6702157757738857, - "learning_rate": 1.1732799619516897e-08, - "loss": 0.8265, - "step": 10717 - }, - { - "epoch": 0.9665870045542679, - "grad_norm": 2.0453824724157523, - "learning_rate": 1.1669696765652659e-08, - "loss": 0.9196, - "step": 10718 - }, - { - "epoch": 0.9666771880777382, - "grad_norm": 1.3587340235889493, - "learning_rate": 1.1606763567093336e-08, - "loss": 0.9642, - "step": 10719 - }, - { - "epoch": 0.9667673716012085, - "grad_norm": 1.1074575051794078, - "learning_rate": 1.1544000029208857e-08, - "loss": 0.8056, - "step": 10720 - }, - { - "epoch": 0.9668575551246787, - "grad_norm": 1.4510507853984367, - "learning_rate": 1.148140615735449e-08, - "loss": 0.8004, - "step": 10721 - }, - { - "epoch": 0.966947738648149, - "grad_norm": 1.3431242883253454, - "learning_rate": 1.1418981956871076e-08, - "loss": 0.9107, - "step": 10722 - }, - { - "epoch": 0.9670379221716192, - "grad_norm": 1.8638843384087254, - "learning_rate": 1.1356727433085245e-08, - "loss": 0.925, - "step": 10723 - }, - { - "epoch": 0.9671281056950896, - "grad_norm": 1.2231341515745964, - "learning_rate": 1.1294642591308524e-08, - "loss": 0.9626, - "step": 10724 - }, - { - "epoch": 0.9672182892185598, - "grad_norm": 1.2541825890190434, - "learning_rate": 1.1232727436838452e-08, - "loss": 0.9232, - "step": 10725 - }, - { - "epoch": 0.96730847274203, - "grad_norm": 1.460129050077509, - "learning_rate": 1.1170981974958138e-08, - "loss": 0.9813, - "step": 10726 - }, - { - "epoch": 0.9673986562655003, - "grad_norm": 1.7599844844090864, - "learning_rate": 1.1109406210936035e-08, - "loss": 1.001, - "step": 10727 - }, - { - "epoch": 0.9674888397889706, - "grad_norm": 1.5528321049481932, - "learning_rate": 1.1048000150025939e-08, - "loss": 1.0555, - "step": 10728 - }, - { - "epoch": 0.9675790233124408, - "grad_norm": 1.6388044155930075, - "learning_rate": 1.0986763797467213e-08, - "loss": 0.8311, - "step": 10729 - }, - { - "epoch": 0.9676692068359111, - "grad_norm": 0.6435736970308086, - "learning_rate": 1.0925697158485459e-08, - "loss": 0.7546, - "step": 10730 - }, - { - "epoch": 0.9677593903593813, - "grad_norm": 1.3733999739761762, - "learning_rate": 1.0864800238290727e-08, - "loss": 0.9411, - "step": 10731 - }, - { - "epoch": 0.9678495738828516, - "grad_norm": 2.032186321719862, - "learning_rate": 1.0804073042079309e-08, - "loss": 0.8983, - "step": 10732 - }, - { - "epoch": 0.9679397574063219, - "grad_norm": 0.6947674061220366, - "learning_rate": 1.0743515575032392e-08, - "loss": 0.8097, - "step": 10733 - }, - { - "epoch": 0.9680299409297921, - "grad_norm": 1.4619580136752865, - "learning_rate": 1.0683127842317619e-08, - "loss": 0.9456, - "step": 10734 - }, - { - "epoch": 0.9681201244532623, - "grad_norm": 1.5760249148544008, - "learning_rate": 1.0622909849087314e-08, - "loss": 0.9578, - "step": 10735 - }, - { - "epoch": 0.9682103079767327, - "grad_norm": 1.3488790962581956, - "learning_rate": 1.0562861600479588e-08, - "loss": 0.9954, - "step": 10736 - }, - { - "epoch": 0.9683004915002029, - "grad_norm": 1.8024879692194642, - "learning_rate": 1.0502983101618345e-08, - "loss": 1.0276, - "step": 10737 - }, - { - "epoch": 0.9683906750236732, - "grad_norm": 1.5337494672480547, - "learning_rate": 1.0443274357612386e-08, - "loss": 0.8099, - "step": 10738 - }, - { - "epoch": 0.9684808585471434, - "grad_norm": 1.4448391525873616, - "learning_rate": 1.0383735373556524e-08, - "loss": 0.9531, - "step": 10739 - }, - { - "epoch": 0.9685710420706137, - "grad_norm": 1.3185839652448679, - "learning_rate": 1.0324366154531139e-08, - "loss": 0.9662, - "step": 10740 - }, - { - "epoch": 0.968661225594084, - "grad_norm": 1.5540886880711233, - "learning_rate": 1.0265166705601735e-08, - "loss": 0.8994, - "step": 10741 - }, - { - "epoch": 0.9687514091175542, - "grad_norm": 0.7853462786379349, - "learning_rate": 1.0206137031819606e-08, - "loss": 0.8752, - "step": 10742 - }, - { - "epoch": 0.9688415926410244, - "grad_norm": 1.3426332490740593, - "learning_rate": 1.0147277138221388e-08, - "loss": 0.9454, - "step": 10743 - }, - { - "epoch": 0.9689317761644948, - "grad_norm": 2.1348190091535306, - "learning_rate": 1.0088587029829287e-08, - "loss": 0.9754, - "step": 10744 - }, - { - "epoch": 0.969021959687965, - "grad_norm": 1.3712223786012403, - "learning_rate": 1.003006671165152e-08, - "loss": 1.0343, - "step": 10745 - }, - { - "epoch": 0.9691121432114352, - "grad_norm": 1.3514338525834813, - "learning_rate": 9.971716188680978e-09, - "loss": 0.9248, - "step": 10746 - }, - { - "epoch": 0.9692023267349056, - "grad_norm": 1.832811982267783, - "learning_rate": 9.91353546589635e-09, - "loss": 0.9064, - "step": 10747 - }, - { - "epoch": 0.9692925102583758, - "grad_norm": 1.3649494708842969, - "learning_rate": 9.855524548262106e-09, - "loss": 0.8658, - "step": 10748 - }, - { - "epoch": 0.969382693781846, - "grad_norm": 1.6723247885624826, - "learning_rate": 9.797683440728288e-09, - "loss": 0.8822, - "step": 10749 - }, - { - "epoch": 0.9694728773053163, - "grad_norm": 1.4143943607962288, - "learning_rate": 9.740012148229836e-09, - "loss": 0.8556, - "step": 10750 - }, - { - "epoch": 0.9695630608287866, - "grad_norm": 1.2888636580186368, - "learning_rate": 9.682510675687705e-09, - "loss": 0.8894, - "step": 10751 - }, - { - "epoch": 0.9696532443522569, - "grad_norm": 1.4129066590824968, - "learning_rate": 9.625179028008191e-09, - "loss": 0.8534, - "step": 10752 - }, - { - "epoch": 0.9697434278757271, - "grad_norm": 1.50011276774906, - "learning_rate": 9.568017210083379e-09, - "loss": 0.7856, - "step": 10753 - }, - { - "epoch": 0.9698336113991973, - "grad_norm": 1.4983089769793232, - "learning_rate": 9.511025226790259e-09, - "loss": 0.916, - "step": 10754 - }, - { - "epoch": 0.9699237949226677, - "grad_norm": 1.3388562358516805, - "learning_rate": 9.454203082992052e-09, - "loss": 0.9138, - "step": 10755 - }, - { - "epoch": 0.9700139784461379, - "grad_norm": 1.6005541348512637, - "learning_rate": 9.3975507835371e-09, - "loss": 0.8379, - "step": 10756 - }, - { - "epoch": 0.9701041619696081, - "grad_norm": 0.6660147300557587, - "learning_rate": 9.341068333259094e-09, - "loss": 0.8116, - "step": 10757 - }, - { - "epoch": 0.9701943454930784, - "grad_norm": 1.4117586536604982, - "learning_rate": 9.28475573697729e-09, - "loss": 0.9127, - "step": 10758 - }, - { - "epoch": 0.9702845290165487, - "grad_norm": 1.3554635171486933, - "learning_rate": 9.228612999497177e-09, - "loss": 0.9383, - "step": 10759 - }, - { - "epoch": 0.970374712540019, - "grad_norm": 2.1706345123601025, - "learning_rate": 9.172640125608478e-09, - "loss": 0.8819, - "step": 10760 - }, - { - "epoch": 0.9704648960634892, - "grad_norm": 1.2248829805184434, - "learning_rate": 9.116837120087817e-09, - "loss": 0.9441, - "step": 10761 - }, - { - "epoch": 0.9705550795869594, - "grad_norm": 1.547462120763463, - "learning_rate": 9.061203987695832e-09, - "loss": 1.0062, - "step": 10762 - }, - { - "epoch": 0.9706452631104298, - "grad_norm": 1.5363617257818907, - "learning_rate": 9.005740733180055e-09, - "loss": 0.8834, - "step": 10763 - }, - { - "epoch": 0.9707354466339, - "grad_norm": 1.637153731564185, - "learning_rate": 8.950447361272483e-09, - "loss": 1.0193, - "step": 10764 - }, - { - "epoch": 0.9708256301573702, - "grad_norm": 1.9045793059790583, - "learning_rate": 8.895323876691784e-09, - "loss": 0.9408, - "step": 10765 - }, - { - "epoch": 0.9709158136808405, - "grad_norm": 1.5792693719537791, - "learning_rate": 8.840370284140419e-09, - "loss": 0.9854, - "step": 10766 - }, - { - "epoch": 0.9710059972043108, - "grad_norm": 1.454604841233737, - "learning_rate": 8.78558658830797e-09, - "loss": 0.8494, - "step": 10767 - }, - { - "epoch": 0.971096180727781, - "grad_norm": 0.6766085549552573, - "learning_rate": 8.730972793868696e-09, - "loss": 0.8233, - "step": 10768 - }, - { - "epoch": 0.9711863642512513, - "grad_norm": 1.7084940702912068, - "learning_rate": 8.67652890548265e-09, - "loss": 0.8943, - "step": 10769 - }, - { - "epoch": 0.9712765477747216, - "grad_norm": 1.2914274773581027, - "learning_rate": 8.622254927795004e-09, - "loss": 0.9088, - "step": 10770 - }, - { - "epoch": 0.9713667312981918, - "grad_norm": 1.4711175869040147, - "learning_rate": 8.568150865436941e-09, - "loss": 0.8956, - "step": 10771 - }, - { - "epoch": 0.9714569148216621, - "grad_norm": 1.2739412680372137, - "learning_rate": 8.514216723024991e-09, - "loss": 0.9421, - "step": 10772 - }, - { - "epoch": 0.9715470983451323, - "grad_norm": 3.571405222680199, - "learning_rate": 8.460452505161031e-09, - "loss": 0.8588, - "step": 10773 - }, - { - "epoch": 0.9716372818686027, - "grad_norm": 1.3279081728595468, - "learning_rate": 8.4068582164325e-09, - "loss": 0.9605, - "step": 10774 - }, - { - "epoch": 0.9717274653920729, - "grad_norm": 1.4140062268267066, - "learning_rate": 8.353433861412406e-09, - "loss": 0.9125, - "step": 10775 - }, - { - "epoch": 0.9718176489155431, - "grad_norm": 1.4731197690162796, - "learning_rate": 8.300179444658883e-09, - "loss": 0.8902, - "step": 10776 - }, - { - "epoch": 0.9719078324390134, - "grad_norm": 1.5868891224001733, - "learning_rate": 8.247094970716296e-09, - "loss": 1.0294, - "step": 10777 - }, - { - "epoch": 0.9719980159624837, - "grad_norm": 1.380403176236098, - "learning_rate": 8.19418044411413e-09, - "loss": 0.968, - "step": 10778 - }, - { - "epoch": 0.9720881994859539, - "grad_norm": 1.2309690881159085, - "learning_rate": 8.141435869367219e-09, - "loss": 0.9323, - "step": 10779 - }, - { - "epoch": 0.9721783830094242, - "grad_norm": 1.3253241682269796, - "learning_rate": 8.088861250975742e-09, - "loss": 0.9713, - "step": 10780 - }, - { - "epoch": 0.9722685665328944, - "grad_norm": 1.5476724900111394, - "learning_rate": 8.036456593426111e-09, - "loss": 0.9065, - "step": 10781 - }, - { - "epoch": 0.9723587500563647, - "grad_norm": 1.472190236797797, - "learning_rate": 7.984221901189415e-09, - "loss": 0.9681, - "step": 10782 - }, - { - "epoch": 0.972448933579835, - "grad_norm": 1.3877560027110702, - "learning_rate": 7.932157178722976e-09, - "loss": 0.8586, - "step": 10783 - }, - { - "epoch": 0.9725391171033052, - "grad_norm": 1.5224162273191626, - "learning_rate": 7.880262430468799e-09, - "loss": 0.9029, - "step": 10784 - }, - { - "epoch": 0.9726293006267754, - "grad_norm": 1.7688347327759, - "learning_rate": 7.828537660855339e-09, - "loss": 0.8681, - "step": 10785 - }, - { - "epoch": 0.9727194841502458, - "grad_norm": 2.0518486805348246, - "learning_rate": 7.776982874295512e-09, - "loss": 0.9383, - "step": 10786 - }, - { - "epoch": 0.972809667673716, - "grad_norm": 1.7932001101750568, - "learning_rate": 7.725598075188688e-09, - "loss": 0.8417, - "step": 10787 - }, - { - "epoch": 0.9728998511971862, - "grad_norm": 4.839040324696425, - "learning_rate": 7.674383267918916e-09, - "loss": 0.8857, - "step": 10788 - }, - { - "epoch": 0.9729900347206565, - "grad_norm": 1.4127558366786241, - "learning_rate": 7.623338456856476e-09, - "loss": 0.922, - "step": 10789 - }, - { - "epoch": 0.9730802182441268, - "grad_norm": 1.5163741472433843, - "learning_rate": 7.572463646356554e-09, - "loss": 0.8261, - "step": 10790 - }, - { - "epoch": 0.9731704017675971, - "grad_norm": 1.4966046529183452, - "learning_rate": 7.521758840760339e-09, - "loss": 0.9055, - "step": 10791 - }, - { - "epoch": 0.9732605852910673, - "grad_norm": 1.8301970647798982, - "learning_rate": 7.471224044393931e-09, - "loss": 0.8876, - "step": 10792 - }, - { - "epoch": 0.9733507688145376, - "grad_norm": 1.3215042243569584, - "learning_rate": 7.420859261569434e-09, - "loss": 0.9972, - "step": 10793 - }, - { - "epoch": 0.9734409523380079, - "grad_norm": 1.5413174448470357, - "learning_rate": 7.370664496584078e-09, - "loss": 0.9912, - "step": 10794 - }, - { - "epoch": 0.9735311358614781, - "grad_norm": 1.372306222421001, - "learning_rate": 7.3206397537211026e-09, - "loss": 0.8511, - "step": 10795 - }, - { - "epoch": 0.9736213193849483, - "grad_norm": 2.139535445655849, - "learning_rate": 7.270785037248428e-09, - "loss": 0.8998, - "step": 10796 - }, - { - "epoch": 0.9737115029084187, - "grad_norm": 1.4657648471247529, - "learning_rate": 7.221100351420428e-09, - "loss": 0.8936, - "step": 10797 - }, - { - "epoch": 0.9738016864318889, - "grad_norm": 1.3460493154603155, - "learning_rate": 7.171585700475935e-09, - "loss": 0.8607, - "step": 10798 - }, - { - "epoch": 0.9738918699553591, - "grad_norm": 1.5233878103124368, - "learning_rate": 7.122241088640235e-09, - "loss": 0.9907, - "step": 10799 - }, - { - "epoch": 0.9739820534788294, - "grad_norm": 1.4389890339309608, - "learning_rate": 7.073066520123516e-09, - "loss": 1.0533, - "step": 10800 - }, - { - "epoch": 0.9740722370022997, - "grad_norm": 1.5696406659068054, - "learning_rate": 7.0240619991217555e-09, - "loss": 0.823, - "step": 10801 - }, - { - "epoch": 0.97416242052577, - "grad_norm": 2.0061641116192424, - "learning_rate": 6.975227529816052e-09, - "loss": 0.9934, - "step": 10802 - }, - { - "epoch": 0.9742526040492402, - "grad_norm": 1.516853463892759, - "learning_rate": 6.926563116373296e-09, - "loss": 0.9735, - "step": 10803 - }, - { - "epoch": 0.9743427875727104, - "grad_norm": 1.4994656087729055, - "learning_rate": 6.878068762945943e-09, - "loss": 0.8742, - "step": 10804 - }, - { - "epoch": 0.9744329710961808, - "grad_norm": 1.2971260563159681, - "learning_rate": 6.829744473671794e-09, - "loss": 0.9623, - "step": 10805 - }, - { - "epoch": 0.974523154619651, - "grad_norm": 0.6176457454329506, - "learning_rate": 6.781590252674219e-09, - "loss": 0.7066, - "step": 10806 - }, - { - "epoch": 0.9746133381431212, - "grad_norm": 1.129964471106563, - "learning_rate": 6.733606104061484e-09, - "loss": 0.9378, - "step": 10807 - }, - { - "epoch": 0.9747035216665915, - "grad_norm": 1.3567515522568572, - "learning_rate": 6.6857920319283165e-09, - "loss": 0.884, - "step": 10808 - }, - { - "epoch": 0.9747937051900618, - "grad_norm": 1.3831781022556384, - "learning_rate": 6.638148040354563e-09, - "loss": 0.9092, - "step": 10809 - }, - { - "epoch": 0.974883888713532, - "grad_norm": 1.4442367261712754, - "learning_rate": 6.590674133405194e-09, - "loss": 0.8897, - "step": 10810 - }, - { - "epoch": 0.9749740722370023, - "grad_norm": 1.3501458212899238, - "learning_rate": 6.5433703151311914e-09, - "loss": 0.94, - "step": 10811 - }, - { - "epoch": 0.9750642557604725, - "grad_norm": 1.5769025392429534, - "learning_rate": 6.49623658956866e-09, - "loss": 0.9326, - "step": 10812 - }, - { - "epoch": 0.9751544392839429, - "grad_norm": 1.8580153166074942, - "learning_rate": 6.44927296073905e-09, - "loss": 0.8156, - "step": 10813 - }, - { - "epoch": 0.9752446228074131, - "grad_norm": 1.3857357355657147, - "learning_rate": 6.402479432649821e-09, - "loss": 1.0152, - "step": 10814 - }, - { - "epoch": 0.9753348063308833, - "grad_norm": 1.6026450615509344, - "learning_rate": 6.355856009293781e-09, - "loss": 0.928, - "step": 10815 - }, - { - "epoch": 0.9754249898543537, - "grad_norm": 1.8428092108055596, - "learning_rate": 6.3094026946488575e-09, - "loss": 0.91, - "step": 10816 - }, - { - "epoch": 0.9755151733778239, - "grad_norm": 1.322979327342365, - "learning_rate": 6.2631194926787704e-09, - "loss": 0.9323, - "step": 10817 - }, - { - "epoch": 0.9756053569012941, - "grad_norm": 2.075620705480363, - "learning_rate": 6.217006407332581e-09, - "loss": 0.8345, - "step": 10818 - }, - { - "epoch": 0.9756955404247644, - "grad_norm": 1.3464138330142243, - "learning_rate": 6.1710634425453654e-09, - "loss": 0.8783, - "step": 10819 - }, - { - "epoch": 0.9757857239482347, - "grad_norm": 1.4975335983812315, - "learning_rate": 6.1252906022366544e-09, - "loss": 0.9472, - "step": 10820 - }, - { - "epoch": 0.9758759074717049, - "grad_norm": 1.4944314910409224, - "learning_rate": 6.079687890312213e-09, - "loss": 0.9077, - "step": 10821 - }, - { - "epoch": 0.9759660909951752, - "grad_norm": 1.562483520420908, - "learning_rate": 6.034255310663372e-09, - "loss": 0.9127, - "step": 10822 - }, - { - "epoch": 0.9760562745186454, - "grad_norm": 1.6041386424337423, - "learning_rate": 5.988992867166143e-09, - "loss": 1.0228, - "step": 10823 - }, - { - "epoch": 0.9761464580421157, - "grad_norm": 1.6628353268311673, - "learning_rate": 5.943900563682991e-09, - "loss": 0.9695, - "step": 10824 - }, - { - "epoch": 0.976236641565586, - "grad_norm": 0.7746043795195151, - "learning_rate": 5.898978404061506e-09, - "loss": 0.8775, - "step": 10825 - }, - { - "epoch": 0.9763268250890562, - "grad_norm": 1.9029874554620039, - "learning_rate": 5.85422639213462e-09, - "loss": 0.9296, - "step": 10826 - }, - { - "epoch": 0.9764170086125264, - "grad_norm": 1.3374363037343613, - "learning_rate": 5.809644531720614e-09, - "loss": 0.9417, - "step": 10827 - }, - { - "epoch": 0.9765071921359968, - "grad_norm": 1.333707230515883, - "learning_rate": 5.765232826623556e-09, - "loss": 0.9619, - "step": 10828 - }, - { - "epoch": 0.976597375659467, - "grad_norm": 1.4132814305074988, - "learning_rate": 5.720991280633081e-09, - "loss": 0.8988, - "step": 10829 - }, - { - "epoch": 0.9766875591829373, - "grad_norm": 1.2643940437757075, - "learning_rate": 5.676919897523724e-09, - "loss": 0.9156, - "step": 10830 - }, - { - "epoch": 0.9767777427064075, - "grad_norm": 1.246403814023858, - "learning_rate": 5.633018681056256e-09, - "loss": 0.9392, - "step": 10831 - }, - { - "epoch": 0.9768679262298778, - "grad_norm": 1.554669634078996, - "learning_rate": 5.589287634976569e-09, - "loss": 0.9319, - "step": 10832 - }, - { - "epoch": 0.9769581097533481, - "grad_norm": 1.2791848170000284, - "learning_rate": 5.5457267630159014e-09, - "loss": 0.9015, - "step": 10833 - }, - { - "epoch": 0.9770482932768183, - "grad_norm": 1.311867334380705, - "learning_rate": 5.5023360688910555e-09, - "loss": 0.9687, - "step": 10834 - }, - { - "epoch": 0.9771384768002885, - "grad_norm": 1.3820255863813298, - "learning_rate": 5.459115556304183e-09, - "loss": 0.9762, - "step": 10835 - }, - { - "epoch": 0.9772286603237589, - "grad_norm": 1.7018838262031417, - "learning_rate": 5.416065228943889e-09, - "loss": 0.8753, - "step": 10836 - }, - { - "epoch": 0.9773188438472291, - "grad_norm": 1.5230132465901078, - "learning_rate": 5.373185090482568e-09, - "loss": 0.887, - "step": 10837 - }, - { - "epoch": 0.9774090273706993, - "grad_norm": 1.6347925327506634, - "learning_rate": 5.330475144579516e-09, - "loss": 0.9226, - "step": 10838 - }, - { - "epoch": 0.9774992108941697, - "grad_norm": 1.3782931685832418, - "learning_rate": 5.2879353948787065e-09, - "loss": 1.0137, - "step": 10839 - }, - { - "epoch": 0.9775893944176399, - "grad_norm": 1.5966858843295268, - "learning_rate": 5.245565845010125e-09, - "loss": 0.9869, - "step": 10840 - }, - { - "epoch": 0.9776795779411102, - "grad_norm": 1.287462339498572, - "learning_rate": 5.2033664985886575e-09, - "loss": 0.8611, - "step": 10841 - }, - { - "epoch": 0.9777697614645804, - "grad_norm": 1.575144379673888, - "learning_rate": 5.161337359215201e-09, - "loss": 1.0755, - "step": 10842 - }, - { - "epoch": 0.9778599449880507, - "grad_norm": 1.5091464610075076, - "learning_rate": 5.119478430475999e-09, - "loss": 0.9488, - "step": 10843 - }, - { - "epoch": 0.977950128511521, - "grad_norm": 2.0081854447909353, - "learning_rate": 5.077789715942416e-09, - "loss": 0.8688, - "step": 10844 - }, - { - "epoch": 0.9780403120349912, - "grad_norm": 1.5559128576113594, - "learning_rate": 5.036271219171606e-09, - "loss": 0.9486, - "step": 10845 - }, - { - "epoch": 0.9781304955584614, - "grad_norm": 1.7839207620557531, - "learning_rate": 4.994922943706514e-09, - "loss": 0.9675, - "step": 10846 - }, - { - "epoch": 0.9782206790819318, - "grad_norm": 1.340561388191624, - "learning_rate": 4.953744893074763e-09, - "loss": 0.9124, - "step": 10847 - }, - { - "epoch": 0.978310862605402, - "grad_norm": 3.8953213822821664, - "learning_rate": 4.912737070789985e-09, - "loss": 0.8608, - "step": 10848 - }, - { - "epoch": 0.9784010461288722, - "grad_norm": 1.5623922692353251, - "learning_rate": 4.871899480351604e-09, - "loss": 1.0034, - "step": 10849 - }, - { - "epoch": 0.9784912296523425, - "grad_norm": 1.61142096974465, - "learning_rate": 4.831232125243501e-09, - "loss": 0.9427, - "step": 10850 - }, - { - "epoch": 0.9785814131758128, - "grad_norm": 1.3028623869767688, - "learning_rate": 4.7907350089360086e-09, - "loss": 1.0261, - "step": 10851 - }, - { - "epoch": 0.978671596699283, - "grad_norm": 1.4449239672552607, - "learning_rate": 4.750408134884365e-09, - "loss": 0.8241, - "step": 10852 - }, - { - "epoch": 0.9787617802227533, - "grad_norm": 1.4385981304441342, - "learning_rate": 4.710251506529816e-09, - "loss": 0.8399, - "step": 10853 - }, - { - "epoch": 0.9788519637462235, - "grad_norm": 1.3695199307693686, - "learning_rate": 4.6702651272982894e-09, - "loss": 0.9515, - "step": 10854 - }, - { - "epoch": 0.9789421472696939, - "grad_norm": 1.564661285991435, - "learning_rate": 4.630449000602166e-09, - "loss": 0.9027, - "step": 10855 - }, - { - "epoch": 0.9790323307931641, - "grad_norm": 1.6760595130642013, - "learning_rate": 4.590803129838283e-09, - "loss": 0.9956, - "step": 10856 - }, - { - "epoch": 0.9791225143166343, - "grad_norm": 1.4444249907838256, - "learning_rate": 4.551327518389714e-09, - "loss": 0.931, - "step": 10857 - }, - { - "epoch": 0.9792126978401046, - "grad_norm": 1.8603989162075287, - "learning_rate": 4.512022169624652e-09, - "loss": 0.9249, - "step": 10858 - }, - { - "epoch": 0.9793028813635749, - "grad_norm": 1.2991339072615193, - "learning_rate": 4.472887086896637e-09, - "loss": 0.9076, - "step": 10859 - }, - { - "epoch": 0.9793930648870451, - "grad_norm": 1.54089509712631, - "learning_rate": 4.433922273545443e-09, - "loss": 0.8804, - "step": 10860 - }, - { - "epoch": 0.9794832484105154, - "grad_norm": 0.5893842663243835, - "learning_rate": 4.395127732895299e-09, - "loss": 0.791, - "step": 10861 - }, - { - "epoch": 0.9795734319339856, - "grad_norm": 1.8850587607010494, - "learning_rate": 4.356503468256445e-09, - "loss": 1.0119, - "step": 10862 - }, - { - "epoch": 0.979663615457456, - "grad_norm": 1.8472867116291254, - "learning_rate": 4.318049482924913e-09, - "loss": 0.9442, - "step": 10863 - }, - { - "epoch": 0.9797537989809262, - "grad_norm": 1.6144764608472943, - "learning_rate": 4.279765780181188e-09, - "loss": 0.9045, - "step": 10864 - }, - { - "epoch": 0.9798439825043964, - "grad_norm": 1.7960797747797972, - "learning_rate": 4.241652363291992e-09, - "loss": 0.9473, - "step": 10865 - }, - { - "epoch": 0.9799341660278668, - "grad_norm": 4.837751846263447, - "learning_rate": 4.203709235509834e-09, - "loss": 0.9056, - "step": 10866 - }, - { - "epoch": 0.980024349551337, - "grad_norm": 1.485678411734475, - "learning_rate": 4.165936400071679e-09, - "loss": 0.947, - "step": 10867 - }, - { - "epoch": 0.9801145330748072, - "grad_norm": 1.331890467925811, - "learning_rate": 4.12833386020095e-09, - "loss": 0.8784, - "step": 10868 - }, - { - "epoch": 0.9802047165982775, - "grad_norm": 1.8252215682050144, - "learning_rate": 4.090901619105746e-09, - "loss": 0.9752, - "step": 10869 - }, - { - "epoch": 0.9802949001217478, - "grad_norm": 1.392593961521322, - "learning_rate": 4.053639679980181e-09, - "loss": 0.8314, - "step": 10870 - }, - { - "epoch": 0.980385083645218, - "grad_norm": 1.5565066022625675, - "learning_rate": 4.01654804600371e-09, - "loss": 0.9389, - "step": 10871 - }, - { - "epoch": 0.9804752671686883, - "grad_norm": 1.2948786065192899, - "learning_rate": 3.9796267203409114e-09, - "loss": 0.9414, - "step": 10872 - }, - { - "epoch": 0.9805654506921585, - "grad_norm": 1.2761117202317427, - "learning_rate": 3.942875706142379e-09, - "loss": 0.9163, - "step": 10873 - }, - { - "epoch": 0.9806556342156288, - "grad_norm": 1.6169042661751318, - "learning_rate": 3.906295006543825e-09, - "loss": 0.9895, - "step": 10874 - }, - { - "epoch": 0.9807458177390991, - "grad_norm": 1.4874047128627068, - "learning_rate": 3.8698846246665305e-09, - "loss": 0.9147, - "step": 10875 - }, - { - "epoch": 0.9808360012625693, - "grad_norm": 1.4129440774211464, - "learning_rate": 3.833644563617344e-09, - "loss": 0.9346, - "step": 10876 - }, - { - "epoch": 0.9809261847860395, - "grad_norm": 1.607942554785763, - "learning_rate": 3.797574826488237e-09, - "loss": 0.9416, - "step": 10877 - }, - { - "epoch": 0.9810163683095099, - "grad_norm": 1.4762190969464293, - "learning_rate": 3.761675416356969e-09, - "loss": 0.8605, - "step": 10878 - }, - { - "epoch": 0.9811065518329801, - "grad_norm": 1.2974152416409515, - "learning_rate": 3.725946336286867e-09, - "loss": 0.8396, - "step": 10879 - }, - { - "epoch": 0.9811967353564504, - "grad_norm": 1.2348573442454402, - "learning_rate": 3.6903875893261604e-09, - "loss": 0.9105, - "step": 10880 - }, - { - "epoch": 0.9812869188799206, - "grad_norm": 1.6464654054827317, - "learning_rate": 3.6549991785093105e-09, - "loss": 0.8679, - "step": 10881 - }, - { - "epoch": 0.9813771024033909, - "grad_norm": 1.4287680081945544, - "learning_rate": 3.6197811068554575e-09, - "loss": 0.9194, - "step": 10882 - }, - { - "epoch": 0.9814672859268612, - "grad_norm": 1.2187835982166486, - "learning_rate": 3.584733377369975e-09, - "loss": 1.0099, - "step": 10883 - }, - { - "epoch": 0.9815574694503314, - "grad_norm": 1.6398828190133632, - "learning_rate": 3.549855993043138e-09, - "loss": 0.9058, - "step": 10884 - }, - { - "epoch": 0.9816476529738016, - "grad_norm": 1.384983535214277, - "learning_rate": 3.5151489568507887e-09, - "loss": 0.9202, - "step": 10885 - }, - { - "epoch": 0.981737836497272, - "grad_norm": 1.583214663358329, - "learning_rate": 3.4806122717545572e-09, - "loss": 0.9511, - "step": 10886 - }, - { - "epoch": 0.9818280200207422, - "grad_norm": 1.865345385378681, - "learning_rate": 3.446245940701198e-09, - "loss": 0.8947, - "step": 10887 - }, - { - "epoch": 0.9819182035442124, - "grad_norm": 1.4977689681720086, - "learning_rate": 3.41204996662281e-09, - "loss": 0.8702, - "step": 10888 - }, - { - "epoch": 0.9820083870676828, - "grad_norm": 1.1984583030993958, - "learning_rate": 3.3780243524375028e-09, - "loss": 0.9313, - "step": 10889 - }, - { - "epoch": 0.982098570591153, - "grad_norm": 1.403674118898808, - "learning_rate": 3.3441691010485107e-09, - "loss": 0.9578, - "step": 10890 - }, - { - "epoch": 0.9821887541146233, - "grad_norm": 1.5827087458715905, - "learning_rate": 3.3104842153444113e-09, - "loss": 1.012, - "step": 10891 - }, - { - "epoch": 0.9822789376380935, - "grad_norm": 1.5743779180059767, - "learning_rate": 3.27696969819935e-09, - "loss": 0.928, - "step": 10892 - }, - { - "epoch": 0.9823691211615638, - "grad_norm": 1.301925797151338, - "learning_rate": 3.2436255524732615e-09, - "loss": 0.8894, - "step": 10893 - }, - { - "epoch": 0.9824593046850341, - "grad_norm": 1.260748380827521, - "learning_rate": 3.210451781010759e-09, - "loss": 0.9123, - "step": 10894 - }, - { - "epoch": 0.9825494882085043, - "grad_norm": 1.3896832652764106, - "learning_rate": 3.1774483866426895e-09, - "loss": 0.7995, - "step": 10895 - }, - { - "epoch": 0.9826396717319745, - "grad_norm": 1.28119468569563, - "learning_rate": 3.144615372185244e-09, - "loss": 0.8958, - "step": 10896 - }, - { - "epoch": 0.9827298552554449, - "grad_norm": 1.2000714479475099, - "learning_rate": 3.1119527404399604e-09, - "loss": 1.0142, - "step": 10897 - }, - { - "epoch": 0.9828200387789151, - "grad_norm": 1.7867559364895953, - "learning_rate": 3.0794604941932754e-09, - "loss": 0.9363, - "step": 10898 - }, - { - "epoch": 0.9829102223023853, - "grad_norm": 1.3160847347656806, - "learning_rate": 3.0471386362180827e-09, - "loss": 0.829, - "step": 10899 - }, - { - "epoch": 0.9830004058258556, - "grad_norm": 2.114982554767347, - "learning_rate": 3.0149871692719542e-09, - "loss": 0.984, - "step": 10900 - }, - { - "epoch": 0.9830905893493259, - "grad_norm": 1.3139606703993554, - "learning_rate": 2.9830060960984728e-09, - "loss": 0.9198, - "step": 10901 - }, - { - "epoch": 0.9831807728727961, - "grad_norm": 0.6825743349347111, - "learning_rate": 2.9511954194263442e-09, - "loss": 0.8048, - "step": 10902 - }, - { - "epoch": 0.9832709563962664, - "grad_norm": 1.4041175780879973, - "learning_rate": 2.9195551419698426e-09, - "loss": 0.9362, - "step": 10903 - }, - { - "epoch": 0.9833611399197366, - "grad_norm": 1.6172306220898194, - "learning_rate": 2.888085266428808e-09, - "loss": 0.8936, - "step": 10904 - }, - { - "epoch": 0.983451323443207, - "grad_norm": 1.7147423924968241, - "learning_rate": 2.8567857954882037e-09, - "loss": 0.887, - "step": 10905 - }, - { - "epoch": 0.9835415069666772, - "grad_norm": 1.5343773494166395, - "learning_rate": 2.82565673181856e-09, - "loss": 0.9855, - "step": 10906 - }, - { - "epoch": 0.9836316904901474, - "grad_norm": 1.3441798831047933, - "learning_rate": 2.7946980780764184e-09, - "loss": 0.9565, - "step": 10907 - }, - { - "epoch": 0.9837218740136177, - "grad_norm": 1.6236306069215818, - "learning_rate": 2.763909836903e-09, - "loss": 0.9902, - "step": 10908 - }, - { - "epoch": 0.983812057537088, - "grad_norm": 1.2578275217616435, - "learning_rate": 2.7332920109255364e-09, - "loss": 0.8528, - "step": 10909 - }, - { - "epoch": 0.9839022410605582, - "grad_norm": 1.664910758436358, - "learning_rate": 2.702844602756382e-09, - "loss": 0.9135, - "step": 10910 - }, - { - "epoch": 0.9839924245840285, - "grad_norm": 2.1128325206280287, - "learning_rate": 2.6725676149936814e-09, - "loss": 0.9045, - "step": 10911 - }, - { - "epoch": 0.9840826081074988, - "grad_norm": 1.481136765155719, - "learning_rate": 2.642461050220479e-09, - "loss": 1.0034, - "step": 10912 - }, - { - "epoch": 0.984172791630969, - "grad_norm": 1.3221955273753534, - "learning_rate": 2.612524911005609e-09, - "loss": 0.9072, - "step": 10913 - }, - { - "epoch": 0.9842629751544393, - "grad_norm": 1.4100965238904488, - "learning_rate": 2.582759199903917e-09, - "loss": 0.8127, - "step": 10914 - }, - { - "epoch": 0.9843531586779095, - "grad_norm": 1.4318535244420616, - "learning_rate": 2.553163919454704e-09, - "loss": 0.8903, - "step": 10915 - }, - { - "epoch": 0.9844433422013799, - "grad_norm": 1.3906028402898876, - "learning_rate": 2.523739072183506e-09, - "loss": 0.916, - "step": 10916 - }, - { - "epoch": 0.9845335257248501, - "grad_norm": 1.2824797427564887, - "learning_rate": 2.4944846606007597e-09, - "loss": 0.8972, - "step": 10917 - }, - { - "epoch": 0.9846237092483203, - "grad_norm": 0.6522510028625241, - "learning_rate": 2.46540068720269e-09, - "loss": 0.843, - "step": 10918 - }, - { - "epoch": 0.9847138927717906, - "grad_norm": 1.2842874576036063, - "learning_rate": 2.4364871544708674e-09, - "loss": 0.9678, - "step": 10919 - }, - { - "epoch": 0.9848040762952609, - "grad_norm": 1.5320154729066753, - "learning_rate": 2.4077440648726523e-09, - "loss": 0.879, - "step": 10920 - }, - { - "epoch": 0.9848942598187311, - "grad_norm": 1.7697659435801538, - "learning_rate": 2.379171420860082e-09, - "loss": 0.9969, - "step": 10921 - }, - { - "epoch": 0.9849844433422014, - "grad_norm": 1.4948798052846672, - "learning_rate": 2.3507692248714296e-09, - "loss": 0.8562, - "step": 10922 - }, - { - "epoch": 0.9850746268656716, - "grad_norm": 1.7125389233920174, - "learning_rate": 2.322537479330089e-09, - "loss": 0.9426, - "step": 10923 - }, - { - "epoch": 0.9851648103891419, - "grad_norm": 1.354039914972639, - "learning_rate": 2.2944761866450223e-09, - "loss": 1.0043, - "step": 10924 - }, - { - "epoch": 0.9852549939126122, - "grad_norm": 2.116148521302081, - "learning_rate": 2.266585349210315e-09, - "loss": 0.9178, - "step": 10925 - }, - { - "epoch": 0.9853451774360824, - "grad_norm": 1.7707543598591549, - "learning_rate": 2.2388649694060623e-09, - "loss": 0.885, - "step": 10926 - }, - { - "epoch": 0.9854353609595526, - "grad_norm": 1.3196868768573566, - "learning_rate": 2.211315049597262e-09, - "loss": 0.9947, - "step": 10927 - }, - { - "epoch": 0.985525544483023, - "grad_norm": 1.356086260496578, - "learning_rate": 2.1839355921349224e-09, - "loss": 0.9709, - "step": 10928 - }, - { - "epoch": 0.9856157280064932, - "grad_norm": 1.7275166356435387, - "learning_rate": 2.156726599354952e-09, - "loss": 0.9236, - "step": 10929 - }, - { - "epoch": 0.9857059115299635, - "grad_norm": 1.5934899061809158, - "learning_rate": 2.129688073578828e-09, - "loss": 0.9522, - "step": 10930 - }, - { - "epoch": 0.9857960950534337, - "grad_norm": 1.5515947106159367, - "learning_rate": 2.1028200171142597e-09, - "loss": 1.0609, - "step": 10931 - }, - { - "epoch": 0.985886278576904, - "grad_norm": 1.4986726928320535, - "learning_rate": 2.076122432253191e-09, - "loss": 0.9215, - "step": 10932 - }, - { - "epoch": 0.9859764621003743, - "grad_norm": 1.631787795356025, - "learning_rate": 2.0495953212738005e-09, - "loss": 0.9717, - "step": 10933 - }, - { - "epoch": 0.9860666456238445, - "grad_norm": 1.1989071220554863, - "learning_rate": 2.0232386864396102e-09, - "loss": 1.014, - "step": 10934 - }, - { - "epoch": 0.9861568291473148, - "grad_norm": 1.3397527724983185, - "learning_rate": 1.9970525299992656e-09, - "loss": 0.9522, - "step": 10935 - }, - { - "epoch": 0.9862470126707851, - "grad_norm": 1.934223665840872, - "learning_rate": 1.9710368541874245e-09, - "loss": 0.8972, - "step": 10936 - }, - { - "epoch": 0.9863371961942553, - "grad_norm": 0.5779869584935406, - "learning_rate": 1.945191661223644e-09, - "loss": 0.7962, - "step": 10937 - }, - { - "epoch": 0.9864273797177255, - "grad_norm": 1.3930963137973218, - "learning_rate": 1.9195169533132714e-09, - "loss": 0.9759, - "step": 10938 - }, - { - "epoch": 0.9865175632411959, - "grad_norm": 0.6007703544511697, - "learning_rate": 1.894012732646999e-09, - "loss": 0.8044, - "step": 10939 - }, - { - "epoch": 0.9866077467646661, - "grad_norm": 1.6336944242567317, - "learning_rate": 1.8686790014010854e-09, - "loss": 0.8922, - "step": 10940 - }, - { - "epoch": 0.9866979302881363, - "grad_norm": 0.6041184658247315, - "learning_rate": 1.8435157617369134e-09, - "loss": 0.7645, - "step": 10941 - }, - { - "epoch": 0.9867881138116066, - "grad_norm": 1.6070504298199155, - "learning_rate": 1.818523015801876e-09, - "loss": 1.0773, - "step": 10942 - }, - { - "epoch": 0.9868782973350769, - "grad_norm": 1.5566178802398478, - "learning_rate": 1.7937007657282677e-09, - "loss": 0.8926, - "step": 10943 - }, - { - "epoch": 0.9869684808585472, - "grad_norm": 1.3756990389763504, - "learning_rate": 1.7690490136341718e-09, - "loss": 0.8725, - "step": 10944 - }, - { - "epoch": 0.9870586643820174, - "grad_norm": 1.171071196498606, - "learning_rate": 1.744567761622795e-09, - "loss": 0.93, - "step": 10945 - }, - { - "epoch": 0.9871488479054876, - "grad_norm": 1.3533730700176003, - "learning_rate": 1.7202570117831327e-09, - "loss": 0.9302, - "step": 10946 - }, - { - "epoch": 0.987239031428958, - "grad_norm": 1.680044446765957, - "learning_rate": 1.696116766189526e-09, - "loss": 0.9081, - "step": 10947 - }, - { - "epoch": 0.9873292149524282, - "grad_norm": 1.3475604037050086, - "learning_rate": 1.6721470269021042e-09, - "loss": 0.9746, - "step": 10948 - }, - { - "epoch": 0.9874193984758984, - "grad_norm": 1.6937368779536308, - "learning_rate": 1.6483477959654546e-09, - "loss": 0.9001, - "step": 10949 - }, - { - "epoch": 0.9875095819993687, - "grad_norm": 1.260158554425057, - "learning_rate": 1.6247190754106187e-09, - "loss": 0.9155, - "step": 10950 - }, - { - "epoch": 0.987599765522839, - "grad_norm": 1.4199198600828207, - "learning_rate": 1.6012608672537619e-09, - "loss": 0.8977, - "step": 10951 - }, - { - "epoch": 0.9876899490463092, - "grad_norm": 1.6018485953498591, - "learning_rate": 1.5779731734963942e-09, - "loss": 0.9364, - "step": 10952 - }, - { - "epoch": 0.9877801325697795, - "grad_norm": 1.5275146700939717, - "learning_rate": 1.5548559961253705e-09, - "loss": 0.9851, - "step": 10953 - }, - { - "epoch": 0.9878703160932497, - "grad_norm": 1.5605085270339143, - "learning_rate": 1.5319093371135573e-09, - "loss": 0.8594, - "step": 10954 - }, - { - "epoch": 0.9879604996167201, - "grad_norm": 1.303348196478256, - "learning_rate": 1.5091331984184997e-09, - "loss": 1.0245, - "step": 10955 - }, - { - "epoch": 0.9880506831401903, - "grad_norm": 1.2123518037190504, - "learning_rate": 1.486527581983754e-09, - "loss": 0.9291, - "step": 10956 - }, - { - "epoch": 0.9881408666636605, - "grad_norm": 1.4648576659495245, - "learning_rate": 1.4640924897382223e-09, - "loss": 0.9853, - "step": 10957 - }, - { - "epoch": 0.9882310501871309, - "grad_norm": 1.2888846534809224, - "learning_rate": 1.4418279235961506e-09, - "loss": 0.9686, - "step": 10958 - }, - { - "epoch": 0.9883212337106011, - "grad_norm": 1.451904686875946, - "learning_rate": 1.4197338854573526e-09, - "loss": 0.8349, - "step": 10959 - }, - { - "epoch": 0.9884114172340713, - "grad_norm": 1.5127771895963924, - "learning_rate": 1.3978103772067651e-09, - "loss": 0.895, - "step": 10960 - }, - { - "epoch": 0.9885016007575416, - "grad_norm": 1.5724886718244928, - "learning_rate": 1.3760574007153358e-09, - "loss": 0.8552, - "step": 10961 - }, - { - "epoch": 0.9885917842810119, - "grad_norm": 1.6818113162863624, - "learning_rate": 1.3544749578389137e-09, - "loss": 1.0179, - "step": 10962 - }, - { - "epoch": 0.9886819678044821, - "grad_norm": 1.437690639293918, - "learning_rate": 1.3330630504189143e-09, - "loss": 0.9708, - "step": 10963 - }, - { - "epoch": 0.9887721513279524, - "grad_norm": 1.343307599719662, - "learning_rate": 1.3118216802827652e-09, - "loss": 0.9022, - "step": 10964 - }, - { - "epoch": 0.9888623348514226, - "grad_norm": 1.6156084395543082, - "learning_rate": 1.2907508492425722e-09, - "loss": 0.9101, - "step": 10965 - }, - { - "epoch": 0.988952518374893, - "grad_norm": 1.6994670817999336, - "learning_rate": 1.2698505590962305e-09, - "loss": 0.8969, - "step": 10966 - }, - { - "epoch": 0.9890427018983632, - "grad_norm": 1.398156036523587, - "learning_rate": 1.2491208116272022e-09, - "loss": 0.9289, - "step": 10967 - }, - { - "epoch": 0.9891328854218334, - "grad_norm": 1.4862808841611752, - "learning_rate": 1.2285616086040728e-09, - "loss": 0.9654, - "step": 10968 - }, - { - "epoch": 0.9892230689453037, - "grad_norm": 1.2485580233380853, - "learning_rate": 1.2081729517812167e-09, - "loss": 0.9944, - "step": 10969 - }, - { - "epoch": 0.989313252468774, - "grad_norm": 1.3762682787576124, - "learning_rate": 1.1879548428983533e-09, - "loss": 0.9272, - "step": 10970 - }, - { - "epoch": 0.9894034359922442, - "grad_norm": 1.638063811287724, - "learning_rate": 1.167907283680547e-09, - "loss": 0.8791, - "step": 10971 - }, - { - "epoch": 0.9894936195157145, - "grad_norm": 1.383289448873123, - "learning_rate": 1.1480302758382077e-09, - "loss": 0.8725, - "step": 10972 - }, - { - "epoch": 0.9895838030391847, - "grad_norm": 1.5408240756466278, - "learning_rate": 1.1283238210675338e-09, - "loss": 0.9193, - "step": 10973 - }, - { - "epoch": 0.989673986562655, - "grad_norm": 1.6933179100090154, - "learning_rate": 1.1087879210498474e-09, - "loss": 0.8655, - "step": 10974 - }, - { - "epoch": 0.9897641700861253, - "grad_norm": 1.309462802464976, - "learning_rate": 1.0894225774522592e-09, - "loss": 0.9799, - "step": 10975 - }, - { - "epoch": 0.9898543536095955, - "grad_norm": 0.6336536938854243, - "learning_rate": 1.070227791927003e-09, - "loss": 0.8124, - "step": 10976 - }, - { - "epoch": 0.9899445371330657, - "grad_norm": 1.538465045044644, - "learning_rate": 1.0512035661118802e-09, - "loss": 1.0272, - "step": 10977 - }, - { - "epoch": 0.9900347206565361, - "grad_norm": 1.9906064210800234, - "learning_rate": 1.0323499016300364e-09, - "loss": 0.9526, - "step": 10978 - }, - { - "epoch": 0.9901249041800063, - "grad_norm": 6.952263870920623, - "learning_rate": 1.013666800090407e-09, - "loss": 0.886, - "step": 10979 - }, - { - "epoch": 0.9902150877034765, - "grad_norm": 1.4487196070434587, - "learning_rate": 9.951542630870502e-10, - "loss": 0.9238, - "step": 10980 - }, - { - "epoch": 0.9903052712269468, - "grad_norm": 1.5031732531632103, - "learning_rate": 9.768122921995915e-10, - "loss": 0.9623, - "step": 10981 - }, - { - "epoch": 0.9903954547504171, - "grad_norm": 1.5692077850259536, - "learning_rate": 9.58640888992779e-10, - "loss": 0.866, - "step": 10982 - }, - { - "epoch": 0.9904856382738874, - "grad_norm": 1.501427686278028, - "learning_rate": 9.40640055017594e-10, - "loss": 0.8495, - "step": 10983 - }, - { - "epoch": 0.9905758217973576, - "grad_norm": 1.570394864935114, - "learning_rate": 9.228097918094757e-10, - "loss": 0.9331, - "step": 10984 - }, - { - "epoch": 0.9906660053208279, - "grad_norm": 1.5330957460158927, - "learning_rate": 9.051501008900952e-10, - "loss": 0.9029, - "step": 10985 - }, - { - "epoch": 0.9907561888442982, - "grad_norm": 1.3537921925712266, - "learning_rate": 8.876609837662475e-10, - "loss": 0.9427, - "step": 10986 - }, - { - "epoch": 0.9908463723677684, - "grad_norm": 1.3396284636651785, - "learning_rate": 8.70342441930294e-10, - "loss": 1.0192, - "step": 10987 - }, - { - "epoch": 0.9909365558912386, - "grad_norm": 1.5495633740877883, - "learning_rate": 8.531944768594979e-10, - "loss": 0.9091, - "step": 10988 - }, - { - "epoch": 0.991026739414709, - "grad_norm": 1.2698707478749929, - "learning_rate": 8.362170900175769e-10, - "loss": 1.0105, - "step": 10989 - }, - { - "epoch": 0.9911169229381792, - "grad_norm": 1.399033190480083, - "learning_rate": 8.194102828527061e-10, - "loss": 0.8882, - "step": 10990 - }, - { - "epoch": 0.9912071064616494, - "grad_norm": 1.7942141527174975, - "learning_rate": 8.027740567992936e-10, - "loss": 0.9711, - "step": 10991 - }, - { - "epoch": 0.9912972899851197, - "grad_norm": 1.3789994853975194, - "learning_rate": 7.863084132766484e-10, - "loss": 0.9263, - "step": 10992 - }, - { - "epoch": 0.99138747350859, - "grad_norm": 1.5681954912235425, - "learning_rate": 7.700133536896469e-10, - "loss": 0.9545, - "step": 10993 - }, - { - "epoch": 0.9914776570320603, - "grad_norm": 1.3241894023872776, - "learning_rate": 7.538888794287324e-10, - "loss": 0.8952, - "step": 10994 - }, - { - "epoch": 0.9915678405555305, - "grad_norm": 1.450229754825633, - "learning_rate": 7.379349918696931e-10, - "loss": 0.9582, - "step": 10995 - }, - { - "epoch": 0.9916580240790007, - "grad_norm": 1.5998737643034662, - "learning_rate": 7.221516923738846e-10, - "loss": 0.906, - "step": 10996 - }, - { - "epoch": 0.9917482076024711, - "grad_norm": 2.080711418779583, - "learning_rate": 7.065389822880075e-10, - "loss": 1.0344, - "step": 10997 - }, - { - "epoch": 0.9918383911259413, - "grad_norm": 1.4210887873197517, - "learning_rate": 6.910968629443292e-10, - "loss": 1.0001, - "step": 10998 - }, - { - "epoch": 0.9919285746494115, - "grad_norm": 1.6241234836282097, - "learning_rate": 6.758253356602406e-10, - "loss": 0.93, - "step": 10999 - }, - { - "epoch": 0.9920187581728818, - "grad_norm": 1.4264101914388871, - "learning_rate": 6.607244017389213e-10, - "loss": 1.0242, - "step": 11000 - }, - { - "epoch": 0.9921089416963521, - "grad_norm": 1.7983645029109645, - "learning_rate": 6.457940624686742e-10, - "loss": 0.9554, - "step": 11001 - }, - { - "epoch": 0.9921991252198223, - "grad_norm": 1.6365515138680924, - "learning_rate": 6.310343191238132e-10, - "loss": 1.0125, - "step": 11002 - }, - { - "epoch": 0.9922893087432926, - "grad_norm": 1.926273801385476, - "learning_rate": 6.164451729635534e-10, - "loss": 0.8314, - "step": 11003 - }, - { - "epoch": 0.9923794922667628, - "grad_norm": 1.6490108704124802, - "learning_rate": 6.020266252324546e-10, - "loss": 0.8029, - "step": 11004 - }, - { - "epoch": 0.9924696757902332, - "grad_norm": 1.9517804209685128, - "learning_rate": 5.877786771610882e-10, - "loss": 0.9018, - "step": 11005 - }, - { - "epoch": 0.9925598593137034, - "grad_norm": 0.795345783848714, - "learning_rate": 5.737013299651483e-10, - "loss": 0.9041, - "step": 11006 - }, - { - "epoch": 0.9926500428371736, - "grad_norm": 1.6688151657582093, - "learning_rate": 5.597945848458963e-10, - "loss": 0.9305, - "step": 11007 - }, - { - "epoch": 0.992740226360644, - "grad_norm": 1.5741374181746672, - "learning_rate": 5.460584429894944e-10, - "loss": 0.9102, - "step": 11008 - }, - { - "epoch": 0.9928304098841142, - "grad_norm": 1.693957653196429, - "learning_rate": 5.32492905568338e-10, - "loss": 0.934, - "step": 11009 - }, - { - "epoch": 0.9929205934075844, - "grad_norm": 1.3817074341889213, - "learning_rate": 5.190979737399459e-10, - "loss": 0.9662, - "step": 11010 - }, - { - "epoch": 0.9930107769310547, - "grad_norm": 1.5250928444037477, - "learning_rate": 5.058736486469594e-10, - "loss": 0.8208, - "step": 11011 - }, - { - "epoch": 0.993100960454525, - "grad_norm": 1.4016121573778793, - "learning_rate": 4.928199314180314e-10, - "loss": 0.9666, - "step": 11012 - }, - { - "epoch": 0.9931911439779952, - "grad_norm": 1.9836363731874107, - "learning_rate": 4.799368231669376e-10, - "loss": 0.8453, - "step": 11013 - }, - { - "epoch": 0.9932813275014655, - "grad_norm": 1.596275945824498, - "learning_rate": 4.672243249927988e-10, - "loss": 0.9157, - "step": 11014 - }, - { - "epoch": 0.9933715110249357, - "grad_norm": 1.4540865598254342, - "learning_rate": 4.546824379803027e-10, - "loss": 0.9582, - "step": 11015 - }, - { - "epoch": 0.993461694548406, - "grad_norm": 1.520803355439732, - "learning_rate": 4.4231116319970454e-10, - "loss": 0.9535, - "step": 11016 - }, - { - "epoch": 0.9935518780718763, - "grad_norm": 1.7484743416103232, - "learning_rate": 4.3011050170660423e-10, - "loss": 0.9275, - "step": 11017 - }, - { - "epoch": 0.9936420615953465, - "grad_norm": 1.5443850427329595, - "learning_rate": 4.18080454542169e-10, - "loss": 0.929, - "step": 11018 - }, - { - "epoch": 0.9937322451188167, - "grad_norm": 0.6592299830710905, - "learning_rate": 4.0622102273246694e-10, - "loss": 0.7925, - "step": 11019 - }, - { - "epoch": 0.9938224286422871, - "grad_norm": 1.2365169531604434, - "learning_rate": 3.945322072897994e-10, - "loss": 0.9897, - "step": 11020 - }, - { - "epoch": 0.9939126121657573, - "grad_norm": 1.714326958784584, - "learning_rate": 3.830140092111467e-10, - "loss": 0.9853, - "step": 11021 - }, - { - "epoch": 0.9940027956892276, - "grad_norm": 2.2654907186768902, - "learning_rate": 3.7166642947972225e-10, - "loss": 0.934, - "step": 11022 - }, - { - "epoch": 0.9940929792126978, - "grad_norm": 1.544832673131972, - "learning_rate": 3.604894690634186e-10, - "loss": 0.9082, - "step": 11023 - }, - { - "epoch": 0.9941831627361681, - "grad_norm": 1.396009073534009, - "learning_rate": 3.494831289161393e-10, - "loss": 1.0154, - "step": 11024 - }, - { - "epoch": 0.9942733462596384, - "grad_norm": 1.2858793780887445, - "learning_rate": 3.3864740997668897e-10, - "loss": 0.9688, - "step": 11025 - }, - { - "epoch": 0.9943635297831086, - "grad_norm": 1.4243970867083269, - "learning_rate": 3.279823131701054e-10, - "loss": 0.9579, - "step": 11026 - }, - { - "epoch": 0.9944537133065788, - "grad_norm": 1.2230237707041, - "learning_rate": 3.1748783940610536e-10, - "loss": 0.8872, - "step": 11027 - }, - { - "epoch": 0.9945438968300492, - "grad_norm": 1.441845503870387, - "learning_rate": 3.071639895801947e-10, - "loss": 0.888, - "step": 11028 - }, - { - "epoch": 0.9946340803535194, - "grad_norm": 1.621223462901582, - "learning_rate": 2.9701076457322447e-10, - "loss": 0.9978, - "step": 11029 - }, - { - "epoch": 0.9947242638769896, - "grad_norm": 1.8619592483273206, - "learning_rate": 2.870281652513906e-10, - "loss": 0.9678, - "step": 11030 - }, - { - "epoch": 0.99481444740046, - "grad_norm": 1.5136504534774533, - "learning_rate": 2.772161924669003e-10, - "loss": 0.9165, - "step": 11031 - }, - { - "epoch": 0.9949046309239302, - "grad_norm": 1.3948229743723883, - "learning_rate": 2.6757484705641765e-10, - "loss": 0.9164, - "step": 11032 - }, - { - "epoch": 0.9949948144474005, - "grad_norm": 1.5965829242468372, - "learning_rate": 2.58104129843062e-10, - "loss": 0.9314, - "step": 11033 - }, - { - "epoch": 0.9950849979708707, - "grad_norm": 1.6584315058468864, - "learning_rate": 2.4880404163463154e-10, - "loss": 0.8828, - "step": 11034 - }, - { - "epoch": 0.995175181494341, - "grad_norm": 1.7223154020567741, - "learning_rate": 2.3967458322471377e-10, - "loss": 0.9141, - "step": 11035 - }, - { - "epoch": 0.9952653650178113, - "grad_norm": 1.3767642926537007, - "learning_rate": 2.307157553922412e-10, - "loss": 0.9395, - "step": 11036 - }, - { - "epoch": 0.9953555485412815, - "grad_norm": 1.3548500341921432, - "learning_rate": 2.2192755890193538e-10, - "loss": 0.9696, - "step": 11037 - }, - { - "epoch": 0.9954457320647517, - "grad_norm": 0.6908873857934303, - "learning_rate": 2.133099945034189e-10, - "loss": 0.7673, - "step": 11038 - }, - { - "epoch": 0.9955359155882221, - "grad_norm": 1.6051262631767464, - "learning_rate": 2.048630629318815e-10, - "loss": 0.9553, - "step": 11039 - }, - { - "epoch": 0.9956260991116923, - "grad_norm": 1.29681217087371, - "learning_rate": 1.965867649080799e-10, - "loss": 0.925, - "step": 11040 - }, - { - "epoch": 0.9957162826351625, - "grad_norm": 0.6350347329950843, - "learning_rate": 1.8848110113856008e-10, - "loss": 0.7798, - "step": 11041 - }, - { - "epoch": 0.9958064661586328, - "grad_norm": 1.395876665534024, - "learning_rate": 1.8054607231454687e-10, - "loss": 0.9697, - "step": 11042 - }, - { - "epoch": 0.9958966496821031, - "grad_norm": 1.2846678295592142, - "learning_rate": 1.7278167911327635e-10, - "loss": 0.8846, - "step": 11043 - }, - { - "epoch": 0.9959868332055734, - "grad_norm": 1.40047264124968, - "learning_rate": 1.6518792219710753e-10, - "loss": 0.9641, - "step": 11044 - }, - { - "epoch": 0.9960770167290436, - "grad_norm": 1.42315021383661, - "learning_rate": 1.5776480221418865e-10, - "loss": 0.9369, - "step": 11045 - }, - { - "epoch": 0.9961672002525138, - "grad_norm": 1.7360207936847867, - "learning_rate": 1.505123197977909e-10, - "loss": 0.9335, - "step": 11046 - }, - { - "epoch": 0.9962573837759842, - "grad_norm": 1.5787681553175021, - "learning_rate": 1.4343047556675258e-10, - "loss": 0.8736, - "step": 11047 - }, - { - "epoch": 0.9963475672994544, - "grad_norm": 1.4810639244414336, - "learning_rate": 1.3651927012503506e-10, - "loss": 0.936, - "step": 11048 - }, - { - "epoch": 0.9964377508229246, - "grad_norm": 1.5035566443786628, - "learning_rate": 1.297787040630549e-10, - "loss": 0.9408, - "step": 11049 - }, - { - "epoch": 0.9965279343463949, - "grad_norm": 1.8636409004562202, - "learning_rate": 1.2320877795524153e-10, - "loss": 0.9517, - "step": 11050 - }, - { - "epoch": 0.9966181178698652, - "grad_norm": 1.6727296302660057, - "learning_rate": 1.1680949236247962e-10, - "loss": 0.9229, - "step": 11051 - }, - { - "epoch": 0.9967083013933354, - "grad_norm": 1.3416134016153538, - "learning_rate": 1.1058084783099886e-10, - "loss": 0.9381, - "step": 11052 - }, - { - "epoch": 0.9967984849168057, - "grad_norm": 1.5178877466786975, - "learning_rate": 1.0452284489170793e-10, - "loss": 0.9404, - "step": 11053 - }, - { - "epoch": 0.996888668440276, - "grad_norm": 1.3347199005431378, - "learning_rate": 9.86354840621928e-11, - "loss": 0.9166, - "step": 11054 - }, - { - "epoch": 0.9969788519637462, - "grad_norm": 1.5205720999375196, - "learning_rate": 9.291876584427427e-11, - "loss": 0.8106, - "step": 11055 - }, - { - "epoch": 0.9970690354872165, - "grad_norm": 1.8958947377862905, - "learning_rate": 8.737269072578435e-11, - "loss": 0.9118, - "step": 11056 - }, - { - "epoch": 0.9971592190106867, - "grad_norm": 1.3260510126919753, - "learning_rate": 8.199725918012212e-11, - "loss": 0.9214, - "step": 11057 - }, - { - "epoch": 0.9972494025341571, - "grad_norm": 1.513388251527899, - "learning_rate": 7.679247166603175e-11, - "loss": 0.9364, - "step": 11058 - }, - { - "epoch": 0.9973395860576273, - "grad_norm": 1.34354008654072, - "learning_rate": 7.17583286273804e-11, - "loss": 0.9348, - "step": 11059 - }, - { - "epoch": 0.9974297695810975, - "grad_norm": 1.3424276010608416, - "learning_rate": 6.689483049360233e-11, - "loss": 0.9561, - "step": 11060 - }, - { - "epoch": 0.9975199531045678, - "grad_norm": 1.6806444857408636, - "learning_rate": 6.220197768014302e-11, - "loss": 0.904, - "step": 11061 - }, - { - "epoch": 0.9976101366280381, - "grad_norm": 1.4286286649137119, - "learning_rate": 5.7679770587126806e-11, - "loss": 0.9435, - "step": 11062 - }, - { - "epoch": 0.9977003201515083, - "grad_norm": 0.6370274316234925, - "learning_rate": 5.33282096002452e-11, - "loss": 0.8712, - "step": 11063 - }, - { - "epoch": 0.9977905036749786, - "grad_norm": 1.5663329604357394, - "learning_rate": 4.914729509120086e-11, - "loss": 0.9055, - "step": 11064 - }, - { - "epoch": 0.9978806871984488, - "grad_norm": 1.2389930580566615, - "learning_rate": 4.513702741637537e-11, - "loss": 0.856, - "step": 11065 - }, - { - "epoch": 0.9979708707219191, - "grad_norm": 1.551211511225137, - "learning_rate": 4.129740691816153e-11, - "loss": 0.9908, - "step": 11066 - }, - { - "epoch": 0.9980610542453894, - "grad_norm": 1.3215533413031828, - "learning_rate": 3.762843392429715e-11, - "loss": 0.9566, - "step": 11067 - }, - { - "epoch": 0.9981512377688596, - "grad_norm": 1.7362409983896636, - "learning_rate": 3.413010874742106e-11, - "loss": 0.919, - "step": 11068 - }, - { - "epoch": 0.9982414212923298, - "grad_norm": 1.5595544899058429, - "learning_rate": 3.080243168618324e-11, - "loss": 0.9267, - "step": 11069 - }, - { - "epoch": 0.9983316048158002, - "grad_norm": 1.515486563426341, - "learning_rate": 2.7645403024800783e-11, - "loss": 0.8792, - "step": 11070 - }, - { - "epoch": 0.9984217883392704, - "grad_norm": 1.1970353021666398, - "learning_rate": 2.4659023032391756e-11, - "loss": 1.0054, - "step": 11071 - }, - { - "epoch": 0.9985119718627407, - "grad_norm": 1.1942420105326514, - "learning_rate": 2.1843291963863364e-11, - "loss": 0.9813, - "step": 11072 - }, - { - "epoch": 0.9986021553862109, - "grad_norm": 2.4050374134032477, - "learning_rate": 1.9198210059245822e-11, - "loss": 0.9479, - "step": 11073 - }, - { - "epoch": 0.9986923389096812, - "grad_norm": 1.6574752162501618, - "learning_rate": 1.672377754458054e-11, - "loss": 0.9342, - "step": 11074 - }, - { - "epoch": 0.9987825224331515, - "grad_norm": 1.3915259625161092, - "learning_rate": 1.4419994630809895e-11, - "loss": 0.9206, - "step": 11075 - }, - { - "epoch": 0.9988727059566217, - "grad_norm": 1.410096988859317, - "learning_rate": 1.2286861514443358e-11, - "loss": 0.8833, - "step": 11076 - }, - { - "epoch": 0.998962889480092, - "grad_norm": 1.2392585339555375, - "learning_rate": 1.0324378377779553e-11, - "loss": 0.9747, - "step": 11077 - }, - { - "epoch": 0.9990530730035623, - "grad_norm": 1.36618127033997, - "learning_rate": 8.532545388018064e-12, - "loss": 0.9251, - "step": 11078 - }, - { - "epoch": 0.9991432565270325, - "grad_norm": 1.3225839251478329, - "learning_rate": 6.911362697925582e-12, - "loss": 0.7729, - "step": 11079 - }, - { - "epoch": 0.9992334400505027, - "grad_norm": 1.4803428898358064, - "learning_rate": 5.46083044605794e-12, - "loss": 0.9064, - "step": 11080 - }, - { - "epoch": 0.9993236235739731, - "grad_norm": 1.4422428835466623, - "learning_rate": 4.1809487563160276e-12, - "loss": 0.9783, - "step": 11081 - }, - { - "epoch": 0.9994138070974433, - "grad_norm": 1.2982344798029575, - "learning_rate": 3.0717177375017e-12, - "loss": 0.9354, - "step": 11082 - }, - { - "epoch": 0.9995039906209136, - "grad_norm": 1.5371787844428464, - "learning_rate": 2.1331374846500495e-12, - "loss": 0.9337, - "step": 11083 - }, - { - "epoch": 0.9995941741443838, - "grad_norm": 1.5940980718158901, - "learning_rate": 1.3652080774750885e-12, - "loss": 1.0315, - "step": 11084 - }, - { - "epoch": 0.9996843576678541, - "grad_norm": 1.1117544311888055, - "learning_rate": 7.679295817020204e-13, - "loss": 0.852, - "step": 11085 - }, - { - "epoch": 0.9997745411913244, - "grad_norm": 1.4196586522323007, - "learning_rate": 3.413020484011042e-13, - "loss": 0.9151, - "step": 11086 - }, - { - "epoch": 0.9998647247147946, - "grad_norm": 1.3630249307133806, - "learning_rate": 8.53255139876552e-14, - "loss": 0.9719, - "step": 11087 - }, - { - "epoch": 0.9999549082382648, - "grad_norm": 1.4799297227154524, - "learning_rate": 0.0, - "loss": 0.8842, - "step": 11088 - }, - { - "epoch": 0.9999549082382648, - "step": 11088, - "total_flos": 7.509663169988526e+17, - "train_loss": 0.9493302904381209, - "train_runtime": 163917.1841, - "train_samples_per_second": 4.059, - "train_steps_per_second": 0.068 - } - ], - "logging_steps": 1.0, - "max_steps": 11088, - "num_input_tokens_seen": 0, - "num_train_epochs": 1, - "save_steps": 100, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": true - }, - "attributes": {} - } - }, - "total_flos": 7.509663169988526e+17, - "train_batch_size": 5, - "trial_name": null, - "trial_params": null -} diff --git a/sft_full/hyperrouter/training_args.bin b/sft_full/hyperrouter/training_args.bin deleted file mode 100644 index 695b10af59e57e629f5c3603f5093fcda249cd2d..0000000000000000000000000000000000000000 --- a/sft_full/hyperrouter/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:953a708f11a4229615120840f8e667fc9bcb7ef98e2b43f617c0e181a1c8f93a -size 8184 diff --git a/sft_full/smoe_cosinegating/added_tokens.json b/sft_full/smoe_cosinegating/added_tokens.json deleted file mode 100644 index c9d3d3a1b74d87e381e471f7b33784015d2dc0ea..0000000000000000000000000000000000000000 --- a/sft_full/smoe_cosinegating/added_tokens.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "<|assistant|>": 32001, - "<|endoftext|>": 32000, - "<|end|>": 32007, - "<|placeholder1|>": 32002, - "<|placeholder2|>": 32003, - "<|placeholder3|>": 32004, - "<|placeholder4|>": 32005, - "<|placeholder5|>": 32008, - "<|placeholder6|>": 32009, - "<|system|>": 32006, - "<|user|>": 32010 -} diff --git a/sft_full/smoe_cosinegating/config.json b/sft_full/smoe_cosinegating/config.json deleted file mode 100644 index f49f6397fb1e14e8b81f93c64484d7d9b21dd28f..0000000000000000000000000000000000000000 --- a/sft_full/smoe_cosinegating/config.json +++ /dev/null @@ -1,66 +0,0 @@ -{ - "_name_or_path": "/cm/archive/thongdt4/toolkitmoe/checkpoints/phi3mini-siglip224-full/pft", - "architectures": [ - "LlavaPhiForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" - }, - "balance_loss_coef": 0.1, - "bos_token_id": 1, - "clip_smoe": true, - "dropout": false, - "embd_pdrop": 0.0, - "eos_token_id": 32000, - "freeze_mm_mlp_adapter": false, - "hidden_act": "silu", - "hidden_size": 3072, - "image_aspect_ratio": "pad", - "initializer_range": 0.02, - "intermediate_size": 8192, - "local_rank": 0, - "max_position_embeddings": 4096, - "mlp_smoe": true, - "mm_hidden_size": 1152, - "mm_patch_merge_type": "flat", - "mm_projector_lr": null, - "mm_projector_type": "moe", - "mm_use_im_patch_token": false, - "mm_use_im_start_end": false, - "mm_vision_select_feature": "patch", - "mm_vision_select_layer": -2, - "mm_vision_tower": "google/siglip-so400m-patch14-224", - "model_type": "llava_phi", - "moe_name": "smoe_cosinegating", - "num_attention_heads": 32, - "num_experts": 4, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "num_layers": 3, - "num_selected": 2, - "original_max_position_embeddings": 4096, - "pad_token_id": 32000, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "router_z_loss_coef": 0.01, - "scales": [ - 1, - 3 - ], - "sliding_window": 2047, - "tie_word_embeddings": false, - "tokenizer_model_max_length": 2048, - "tokenizer_padding_side": "right", - "torch_dtype": "bfloat16", - "training": true, - "transformers_version": "4.43.2", - "tune_mm_mlp_adapter": false, - "use_cache": true, - "use_mm_proj": true, - "vocab_size": 32064 -} diff --git a/sft_full/smoe_cosinegating/generation_config.json b/sft_full/smoe_cosinegating/generation_config.json deleted file mode 100644 index 3a20824ea777f1ebd11da590160a7209fe3b62c6..0000000000000000000000000000000000000000 --- a/sft_full/smoe_cosinegating/generation_config.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "_from_model_config": true, - "bos_token_id": 1, - "do_sample": true, - "eos_token_id": [ - 32000, - 32001, - 32007 - ], - "pad_token_id": 32000, - "transformers_version": "4.43.2" -} diff --git a/sft_full/smoe_cosinegating/model-00001-of-00003.safetensors b/sft_full/smoe_cosinegating/model-00001-of-00003.safetensors deleted file mode 100644 index 41c0a7697e77f4a30ba8d5c89e37a96df2f0325a..0000000000000000000000000000000000000000 --- a/sft_full/smoe_cosinegating/model-00001-of-00003.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c923725d1cdf98d72a2876aed7d759b645b3b40941c4a0f73a10492e88abe076 -size 4972489328 diff --git a/sft_full/smoe_cosinegating/model-00002-of-00003.safetensors b/sft_full/smoe_cosinegating/model-00002-of-00003.safetensors deleted file mode 100644 index 76df3bf177387f800fa472a28d4168e5ab4d4854..0000000000000000000000000000000000000000 --- a/sft_full/smoe_cosinegating/model-00002-of-00003.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ae1caa7956750feb617e7a26214ced62a497cc09d90c808e29e5e5bcce83578b -size 4985533608 diff --git a/sft_full/smoe_cosinegating/model-00003-of-00003.safetensors b/sft_full/smoe_cosinegating/model-00003-of-00003.safetensors deleted file mode 100644 index 509f790ecfb800c9ed0ab8be94aedc5c53bfad84..0000000000000000000000000000000000000000 --- a/sft_full/smoe_cosinegating/model-00003-of-00003.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ca48769d22b5f9c3c2e7875e3b602baa79bfa4175b3b44fce38194826466e5ea -size 248943664 diff --git a/sft_full/smoe_cosinegating/model.safetensors.index.json b/sft_full/smoe_cosinegating/model.safetensors.index.json deleted file mode 100644 index f5e0d563e520320e7e1cb47747945b2591e60790..0000000000000000000000000000000000000000 --- a/sft_full/smoe_cosinegating/model.safetensors.index.json +++ /dev/null @@ -1,1033 +0,0 @@ -{ - "metadata": { - "total_size": 10206819680 - }, - "weight_map": { - "lm_head.weight": "model-00003-of-00003.safetensors", - "model.embed_tokens.weight": "model-00001-of-00003.safetensors", - "model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.0.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.0.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.1.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.1.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.10.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.10.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.10.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.10.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.11.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.11.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.11.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.11.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.12.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.12.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.12.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.12.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.13.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.13.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.13.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.13.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.14.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.14.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.14.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.14.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.15.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.15.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.15.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.15.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.16.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.16.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.16.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.16.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.17.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.17.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.17.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.17.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.18.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.18.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.18.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.18.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.19.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.19.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.19.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.19.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.2.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.2.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.20.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.20.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.20.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.20.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.20.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.21.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.21.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.21.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.22.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.22.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.22.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.22.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.23.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.23.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.23.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.23.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.24.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.24.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.24.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.24.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.25.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.25.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.25.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.25.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.26.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.26.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.26.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.26.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.27.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.27.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.27.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.27.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.28.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.28.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.28.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.28.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.28.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.28.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.29.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.29.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.29.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.29.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.29.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.29.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.3.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.3.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.30.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.30.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.30.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.30.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.30.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.30.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.31.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.31.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.31.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.31.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.31.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.31.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.4.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.4.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.5.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.5.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.6.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.6.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.7.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.7.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.8.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.8.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.9.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.9.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.mm_projector.moelayer.experts.0.0.bias": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.0.0.weight": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.0.2.bias": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.0.2.weight": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.1.0.bias": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.1.0.weight": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.1.2.bias": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.1.2.weight": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.2.0.bias": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.2.0.weight": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.2.2.bias": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.2.2.weight": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.3.0.bias": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.3.0.weight": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.3.2.bias": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.3.2.weight": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.gate.bias": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.gate.weight": "model-00003-of-00003.safetensors", - "model.norm.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.embeddings.patch_embedding.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.embeddings.patch_embedding.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.embeddings.position_embedding.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00002-of-00003.safetensors" - } -} diff --git a/sft_full/smoe_cosinegating/special_tokens_map.json b/sft_full/smoe_cosinegating/special_tokens_map.json deleted file mode 100644 index 3e4d5a5bc1cb51753cc9ae0305ece0da60052b10..0000000000000000000000000000000000000000 --- a/sft_full/smoe_cosinegating/special_tokens_map.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "bos_token": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - }, - "eos_token": { - "content": "<|endoftext|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - }, - "pad_token": "", - "unk_token": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - } -} diff --git a/sft_full/smoe_cosinegating/tokenizer.model b/sft_full/smoe_cosinegating/tokenizer.model deleted file mode 100644 index 6c00c742ce03c627d6cd5b795984876fa49fa899..0000000000000000000000000000000000000000 --- a/sft_full/smoe_cosinegating/tokenizer.model +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 -size 499723 diff --git a/sft_full/smoe_cosinegating/tokenizer_config.json b/sft_full/smoe_cosinegating/tokenizer_config.json deleted file mode 100644 index 3bd56c6314b14d6a33a69cd1802e04dbc1e47840..0000000000000000000000000000000000000000 --- a/sft_full/smoe_cosinegating/tokenizer_config.json +++ /dev/null @@ -1,132 +0,0 @@ -{ - "add_bos_token": true, - "add_eos_token": false, - "add_prefix_space": true, - "added_tokens_decoder": { - "0": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "1": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "2": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": false - }, - "32000": { - "content": "<|endoftext|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "32001": { - "content": "<|assistant|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32002": { - "content": "<|placeholder1|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32003": { - "content": "<|placeholder2|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32004": { - "content": "<|placeholder3|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32005": { - "content": "<|placeholder4|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32006": { - "content": "<|system|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32007": { - "content": "<|end|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32008": { - "content": "<|placeholder5|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32009": { - "content": "<|placeholder6|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32010": { - "content": "<|user|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - } - }, - "bos_token": "", - "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}", - "clean_up_tokenization_spaces": false, - "eos_token": "<|endoftext|>", - "legacy": false, - "model_max_length": 2048, - "pad_token": "", - "padding_side": "right", - "sp_model_kwargs": {}, - "spaces_between_special_tokens": false, - "tokenizer_class": "LlamaTokenizer", - "unk_token": "", - "use_default_system_prompt": false -} diff --git a/sft_full/smoe_cosinegating/trainer_state.json b/sft_full/smoe_cosinegating/trainer_state.json deleted file mode 100644 index a3688321cad87a6e3443dbd67a8c80021f92c4f1..0000000000000000000000000000000000000000 --- a/sft_full/smoe_cosinegating/trainer_state.json +++ /dev/null @@ -1,77658 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.9999549082382648, - "eval_steps": 500, - "global_step": 11088, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 9.018352347026198e-05, - "grad_norm": 20.419542175002817, - "learning_rate": 0.0, - "loss": 1.6703, - "step": 1 - }, - { - "epoch": 0.00018036704694052397, - "grad_norm": 16.607597248757966, - "learning_rate": 4.773623799730706e-07, - "loss": 1.6371, - "step": 2 - }, - { - "epoch": 0.0002705505704107859, - "grad_norm": 12.11758353338142, - "learning_rate": 7.566014715123208e-07, - "loss": 1.5544, - "step": 3 - }, - { - "epoch": 0.00036073409388104793, - "grad_norm": 15.98588272993623, - "learning_rate": 9.547247599461412e-07, - "loss": 1.7338, - "step": 4 - }, - { - "epoch": 0.0004509176173513099, - "grad_norm": 13.947955259915956, - "learning_rate": 1.108401121501769e-06, - "loss": 1.4375, - "step": 5 - }, - { - "epoch": 0.0005411011408215718, - "grad_norm": 12.931346664694285, - "learning_rate": 1.2339638514853914e-06, - "loss": 1.5997, - "step": 6 - }, - { - "epoch": 0.0006312846642918339, - "grad_norm": 11.676886814084707, - "learning_rate": 1.3401256270225321e-06, - "loss": 1.4071, - "step": 7 - }, - { - "epoch": 0.0007214681877620959, - "grad_norm": 9.112077734102373, - "learning_rate": 1.4320871399192119e-06, - "loss": 1.3708, - "step": 8 - }, - { - "epoch": 0.0008116517112323579, - "grad_norm": 6.000981250633848, - "learning_rate": 1.5132029430246416e-06, - "loss": 1.3375, - "step": 9 - }, - { - "epoch": 0.0009018352347026198, - "grad_norm": 6.755062041883647, - "learning_rate": 1.5857635014748399e-06, - "loss": 1.4231, - "step": 10 - }, - { - "epoch": 0.0009920187581728818, - "grad_norm": 7.0587164272336125, - "learning_rate": 1.6514025108267924e-06, - "loss": 1.4669, - "step": 11 - }, - { - "epoch": 0.0010822022816431437, - "grad_norm": 5.177872145469989, - "learning_rate": 1.711326231458462e-06, - "loss": 1.4223, - "step": 12 - }, - { - "epoch": 0.0011723858051134058, - "grad_norm": 6.120145781532253, - "learning_rate": 1.7664507107987104e-06, - "loss": 1.3054, - "step": 13 - }, - { - "epoch": 0.0012625693285836677, - "grad_norm": 3.4260106287232968, - "learning_rate": 1.8174880069956024e-06, - "loss": 1.3017, - "step": 14 - }, - { - "epoch": 0.0013527528520539298, - "grad_norm": 2.6542138766686807, - "learning_rate": 1.8650025930140899e-06, - "loss": 1.2668, - "step": 15 - }, - { - "epoch": 0.0014429363755241917, - "grad_norm": 3.129267868118978, - "learning_rate": 1.9094495198922823e-06, - "loss": 1.3634, - "step": 16 - }, - { - "epoch": 0.0015331198989944536, - "grad_norm": 2.726550345407409, - "learning_rate": 1.9512009899507514e-06, - "loss": 1.2319, - "step": 17 - }, - { - "epoch": 0.0016233034224647158, - "grad_norm": 2.5231099108003123, - "learning_rate": 1.990565322997712e-06, - "loss": 1.2829, - "step": 18 - }, - { - "epoch": 0.0017134869459349777, - "grad_norm": 2.025502451032381, - "learning_rate": 2.027800787770518e-06, - "loss": 1.1941, - "step": 19 - }, - { - "epoch": 0.0018036704694052396, - "grad_norm": 2.6829114376924967, - "learning_rate": 2.06312588144791e-06, - "loss": 1.2862, - "step": 20 - }, - { - "epoch": 0.0018938539928755017, - "grad_norm": 2.178223367711019, - "learning_rate": 2.0967270985348526e-06, - "loss": 1.2412, - "step": 21 - }, - { - "epoch": 0.0019840375163457636, - "grad_norm": 2.129682542846911, - "learning_rate": 2.128764890799863e-06, - "loss": 1.1416, - "step": 22 - }, - { - "epoch": 0.0020742210398160257, - "grad_norm": 2.7619431119912177, - "learning_rate": 2.1593783012990145e-06, - "loss": 1.1473, - "step": 23 - }, - { - "epoch": 0.0021644045632862874, - "grad_norm": 5.594881593271175, - "learning_rate": 2.188688611431533e-06, - "loss": 1.2958, - "step": 24 - }, - { - "epoch": 0.0022545880867565495, - "grad_norm": 2.243548149869142, - "learning_rate": 2.216802243003538e-06, - "loss": 1.2722, - "step": 25 - }, - { - "epoch": 0.0023447716102268116, - "grad_norm": 3.078319717677279, - "learning_rate": 2.243813090771781e-06, - "loss": 1.1141, - "step": 26 - }, - { - "epoch": 0.0024349551336970737, - "grad_norm": 2.1951794567044973, - "learning_rate": 2.269804414536962e-06, - "loss": 1.288, - "step": 27 - }, - { - "epoch": 0.0025251386571673354, - "grad_norm": 1.7791658891353055, - "learning_rate": 2.2948503869686733e-06, - "loss": 1.1916, - "step": 28 - }, - { - "epoch": 0.0026153221806375975, - "grad_norm": 2.142446703431541, - "learning_rate": 2.3190173696980436e-06, - "loss": 0.9349, - "step": 29 - }, - { - "epoch": 0.0027055057041078597, - "grad_norm": 2.5118632178636533, - "learning_rate": 2.3423649729871604e-06, - "loss": 1.2013, - "step": 30 - }, - { - "epoch": 0.0027956892275781214, - "grad_norm": 1.8387398617463007, - "learning_rate": 2.364946941580084e-06, - "loss": 1.1871, - "step": 31 - }, - { - "epoch": 0.0028858727510483835, - "grad_norm": 2.07700300402509, - "learning_rate": 2.3868118998653532e-06, - "loss": 1.2493, - "step": 32 - }, - { - "epoch": 0.0029760562745186456, - "grad_norm": 1.935107261089994, - "learning_rate": 2.408003982339113e-06, - "loss": 0.9093, - "step": 33 - }, - { - "epoch": 0.0030662397979889073, - "grad_norm": 1.5912473139474257, - "learning_rate": 2.4285633699238223e-06, - "loss": 1.1102, - "step": 34 - }, - { - "epoch": 0.0031564233214591694, - "grad_norm": 2.0904906225425246, - "learning_rate": 2.4485267485243007e-06, - "loss": 1.1502, - "step": 35 - }, - { - "epoch": 0.0032466068449294315, - "grad_norm": 2.044239239944629, - "learning_rate": 2.467927702970783e-06, - "loss": 1.1868, - "step": 36 - }, - { - "epoch": 0.003336790368399693, - "grad_norm": 1.6782272733925065, - "learning_rate": 2.4867970569753584e-06, - "loss": 1.2123, - "step": 37 - }, - { - "epoch": 0.0034269738918699553, - "grad_norm": 1.9643189959988658, - "learning_rate": 2.5051631677435883e-06, - "loss": 1.044, - "step": 38 - }, - { - "epoch": 0.0035171574153402174, - "grad_norm": 1.768955229590004, - "learning_rate": 2.523052182311031e-06, - "loss": 1.0194, - "step": 39 - }, - { - "epoch": 0.003607340938810479, - "grad_norm": 1.7670716292408868, - "learning_rate": 2.540488261420981e-06, - "loss": 1.2224, - "step": 40 - }, - { - "epoch": 0.0036975244622807412, - "grad_norm": 1.8030752932144816, - "learning_rate": 2.557493775753984e-06, - "loss": 1.023, - "step": 41 - }, - { - "epoch": 0.0037877079857510034, - "grad_norm": 1.6139632808474387, - "learning_rate": 2.5740894785079235e-06, - "loss": 1.1367, - "step": 42 - }, - { - "epoch": 0.0038778915092212655, - "grad_norm": 1.7991908532985093, - "learning_rate": 2.5902946576685834e-06, - "loss": 1.1515, - "step": 43 - }, - { - "epoch": 0.003968075032691527, - "grad_norm": 10.639525414114939, - "learning_rate": 2.606127270772933e-06, - "loss": 1.0619, - "step": 44 - }, - { - "epoch": 0.004058258556161789, - "grad_norm": 2.547282461325139, - "learning_rate": 2.62160406452641e-06, - "loss": 1.0389, - "step": 45 - }, - { - "epoch": 0.004148442079632051, - "grad_norm": 1.5509159212227297, - "learning_rate": 2.636740681272085e-06, - "loss": 1.2126, - "step": 46 - }, - { - "epoch": 0.004238625603102313, - "grad_norm": 2.2256851341567274, - "learning_rate": 2.651551754008722e-06, - "loss": 1.2472, - "step": 47 - }, - { - "epoch": 0.004328809126572575, - "grad_norm": 1.7209194709826778, - "learning_rate": 2.6660509914046035e-06, - "loss": 1.1741, - "step": 48 - }, - { - "epoch": 0.004418992650042837, - "grad_norm": 3.4100091472066762, - "learning_rate": 2.6802512540450642e-06, - "loss": 1.1415, - "step": 49 - }, - { - "epoch": 0.004509176173513099, - "grad_norm": 2.2526249937884426, - "learning_rate": 2.694164622976609e-06, - "loss": 1.1363, - "step": 50 - }, - { - "epoch": 0.0045993596969833616, - "grad_norm": 2.8321028075550214, - "learning_rate": 2.707802461463072e-06, - "loss": 1.0649, - "step": 51 - }, - { - "epoch": 0.004689543220453623, - "grad_norm": 1.6816195000397143, - "learning_rate": 2.7211754707448516e-06, - "loss": 1.1064, - "step": 52 - }, - { - "epoch": 0.004779726743923885, - "grad_norm": 1.8476594759568163, - "learning_rate": 2.734293740486721e-06, - "loss": 1.0941, - "step": 53 - }, - { - "epoch": 0.0048699102673941475, - "grad_norm": 1.6668018127325357, - "learning_rate": 2.747166794510033e-06, - "loss": 1.1343, - "step": 54 - }, - { - "epoch": 0.004960093790864409, - "grad_norm": 1.8141926721202208, - "learning_rate": 2.759803632328562e-06, - "loss": 1.1037, - "step": 55 - }, - { - "epoch": 0.005050277314334671, - "grad_norm": 2.6399781828759097, - "learning_rate": 2.772212766941744e-06, - "loss": 1.2244, - "step": 56 - }, - { - "epoch": 0.005140460837804933, - "grad_norm": 1.660466064267164, - "learning_rate": 2.7844022592828385e-06, - "loss": 1.2114, - "step": 57 - }, - { - "epoch": 0.005230644361275195, - "grad_norm": 1.700712192239089, - "learning_rate": 2.7963797496711145e-06, - "loss": 1.0696, - "step": 58 - }, - { - "epoch": 0.005320827884745457, - "grad_norm": 1.5951513225798049, - "learning_rate": 2.80815248657541e-06, - "loss": 1.147, - "step": 59 - }, - { - "epoch": 0.005411011408215719, - "grad_norm": 1.2454342905794686, - "learning_rate": 2.819727352960231e-06, - "loss": 0.8706, - "step": 60 - }, - { - "epoch": 0.005501194931685981, - "grad_norm": 1.94832729832498, - "learning_rate": 2.8311108904541717e-06, - "loss": 1.1111, - "step": 61 - }, - { - "epoch": 0.005591378455156243, - "grad_norm": 1.6710369826310119, - "learning_rate": 2.842309321553155e-06, - "loss": 1.0662, - "step": 62 - }, - { - "epoch": 0.005681561978626505, - "grad_norm": 1.7858600856659939, - "learning_rate": 2.8533285700471737e-06, - "loss": 1.0513, - "step": 63 - }, - { - "epoch": 0.005771745502096767, - "grad_norm": 1.651056315680979, - "learning_rate": 2.8641742798384237e-06, - "loss": 1.1837, - "step": 64 - }, - { - "epoch": 0.005861929025567029, - "grad_norm": 1.7622974062877748, - "learning_rate": 2.874851832300479e-06, - "loss": 1.1454, - "step": 65 - }, - { - "epoch": 0.005952112549037291, - "grad_norm": 3.960431369444505, - "learning_rate": 2.8853663623121834e-06, - "loss": 1.1407, - "step": 66 - }, - { - "epoch": 0.006042296072507553, - "grad_norm": 1.6284537658874618, - "learning_rate": 2.895722773085839e-06, - "loss": 1.178, - "step": 67 - }, - { - "epoch": 0.0061324795959778146, - "grad_norm": 1.652893696343214, - "learning_rate": 2.905925749896893e-06, - "loss": 1.1371, - "step": 68 - }, - { - "epoch": 0.006222663119448077, - "grad_norm": 1.657399624278009, - "learning_rate": 2.915979772811335e-06, - "loss": 1.1287, - "step": 69 - }, - { - "epoch": 0.006312846642918339, - "grad_norm": 1.7191009856692052, - "learning_rate": 2.925889128497372e-06, - "loss": 1.089, - "step": 70 - }, - { - "epoch": 0.0064030301663886005, - "grad_norm": 1.6661672400192176, - "learning_rate": 2.9356579211992906e-06, - "loss": 1.18, - "step": 71 - }, - { - "epoch": 0.006493213689858863, - "grad_norm": 1.6034579342164725, - "learning_rate": 2.9452900829438533e-06, - "loss": 1.0675, - "step": 72 - }, - { - "epoch": 0.006583397213329125, - "grad_norm": 2.165589382654888, - "learning_rate": 2.954789383042727e-06, - "loss": 1.0925, - "step": 73 - }, - { - "epoch": 0.006673580736799386, - "grad_norm": 1.3427759232931535, - "learning_rate": 2.9641594369484293e-06, - "loss": 0.9357, - "step": 74 - }, - { - "epoch": 0.006763764260269649, - "grad_norm": 1.8558416052184163, - "learning_rate": 2.9734037145158586e-06, - "loss": 1.1214, - "step": 75 - }, - { - "epoch": 0.006853947783739911, - "grad_norm": 1.6821279849643704, - "learning_rate": 2.982525547716659e-06, - "loss": 1.1204, - "step": 76 - }, - { - "epoch": 0.006944131307210172, - "grad_norm": 19.13310040233572, - "learning_rate": 2.9915281378493246e-06, - "loss": 1.0924, - "step": 77 - }, - { - "epoch": 0.007034314830680435, - "grad_norm": 28.392826345586396, - "learning_rate": 3.000414562284102e-06, - "loss": 1.1124, - "step": 78 - }, - { - "epoch": 0.0071244983541506966, - "grad_norm": 2.124763924444111, - "learning_rate": 3.009187780778246e-06, - "loss": 1.1089, - "step": 79 - }, - { - "epoch": 0.007214681877620958, - "grad_norm": 1.9812050077221415, - "learning_rate": 3.017850641394051e-06, - "loss": 1.1047, - "step": 80 - }, - { - "epoch": 0.007304865401091221, - "grad_norm": 2.0685765311818765, - "learning_rate": 3.0264058860492832e-06, - "loss": 0.9764, - "step": 81 - }, - { - "epoch": 0.0073950489245614825, - "grad_norm": 1.8583287092188394, - "learning_rate": 3.0348561557270548e-06, - "loss": 1.1595, - "step": 82 - }, - { - "epoch": 0.007485232448031745, - "grad_norm": 0.8314222515869779, - "learning_rate": 3.043203995369939e-06, - "loss": 0.8988, - "step": 83 - }, - { - "epoch": 0.007575415971502007, - "grad_norm": 1.695495033106142, - "learning_rate": 3.051451858480994e-06, - "loss": 1.1988, - "step": 84 - }, - { - "epoch": 0.007665599494972268, - "grad_norm": 1.9235144964653068, - "learning_rate": 3.05960211145252e-06, - "loss": 1.0286, - "step": 85 - }, - { - "epoch": 0.007755783018442531, - "grad_norm": 2.24941034007843, - "learning_rate": 3.0676570376416543e-06, - "loss": 1.0332, - "step": 86 - }, - { - "epoch": 0.007845966541912792, - "grad_norm": 2.065176989380581, - "learning_rate": 3.0756188412103647e-06, - "loss": 1.1299, - "step": 87 - }, - { - "epoch": 0.007936150065383054, - "grad_norm": 1.537642467690137, - "learning_rate": 3.083489650746004e-06, - "loss": 1.1173, - "step": 88 - }, - { - "epoch": 0.008026333588853317, - "grad_norm": 1.6124732554975367, - "learning_rate": 3.0912715226772975e-06, - "loss": 1.1311, - "step": 89 - }, - { - "epoch": 0.008116517112323578, - "grad_norm": 1.3197768891231574, - "learning_rate": 3.098966444499481e-06, - "loss": 1.0422, - "step": 90 - }, - { - "epoch": 0.00820670063579384, - "grad_norm": 4.514575031004722, - "learning_rate": 3.1065763378212426e-06, - "loss": 1.1191, - "step": 91 - }, - { - "epoch": 0.008296884159264103, - "grad_norm": 1.3343020684995344, - "learning_rate": 3.1141030612451554e-06, - "loss": 1.115, - "step": 92 - }, - { - "epoch": 0.008387067682734364, - "grad_norm": 0.9113968020060127, - "learning_rate": 3.1215484130924052e-06, - "loss": 0.9492, - "step": 93 - }, - { - "epoch": 0.008477251206204626, - "grad_norm": 2.4216127315150016, - "learning_rate": 3.128914133981793e-06, - "loss": 1.1242, - "step": 94 - }, - { - "epoch": 0.008567434729674889, - "grad_norm": 2.1226060509669047, - "learning_rate": 3.136201909272287e-06, - "loss": 1.1251, - "step": 95 - }, - { - "epoch": 0.00865761825314515, - "grad_norm": 1.639366030157683, - "learning_rate": 3.1434133713776735e-06, - "loss": 1.0759, - "step": 96 - }, - { - "epoch": 0.008747801776615412, - "grad_norm": 1.6545368187209295, - "learning_rate": 3.15055010196128e-06, - "loss": 1.1104, - "step": 97 - }, - { - "epoch": 0.008837985300085675, - "grad_norm": 1.8582967629134388, - "learning_rate": 3.157613634018135e-06, - "loss": 1.097, - "step": 98 - }, - { - "epoch": 0.008928168823555935, - "grad_norm": 1.4424038048045014, - "learning_rate": 3.1646054538514336e-06, - "loss": 1.1212, - "step": 99 - }, - { - "epoch": 0.009018352347026198, - "grad_norm": 1.7514007956516628, - "learning_rate": 3.1715270029496797e-06, - "loss": 1.1606, - "step": 100 - }, - { - "epoch": 0.00910853587049646, - "grad_norm": 1.362654796171337, - "learning_rate": 3.1783796797704243e-06, - "loss": 1.1316, - "step": 101 - }, - { - "epoch": 0.009198719393966723, - "grad_norm": 1.9069311457157367, - "learning_rate": 3.185164841436142e-06, - "loss": 1.0639, - "step": 102 - }, - { - "epoch": 0.009288902917436984, - "grad_norm": 1.8478421204449405, - "learning_rate": 3.1918838053473723e-06, - "loss": 1.1015, - "step": 103 - }, - { - "epoch": 0.009379086440907246, - "grad_norm": 1.4182810373821984, - "learning_rate": 3.198537850717922e-06, - "loss": 1.0148, - "step": 104 - }, - { - "epoch": 0.009469269964377509, - "grad_norm": 1.6054456574572875, - "learning_rate": 3.205128220036622e-06, - "loss": 1.0617, - "step": 105 - }, - { - "epoch": 0.00955945348784777, - "grad_norm": 1.342626589981642, - "learning_rate": 3.2116561204597917e-06, - "loss": 1.0536, - "step": 106 - }, - { - "epoch": 0.009649637011318032, - "grad_norm": 0.9162703690600154, - "learning_rate": 3.218122725138335e-06, - "loss": 0.9432, - "step": 107 - }, - { - "epoch": 0.009739820534788295, - "grad_norm": 1.918184784883229, - "learning_rate": 3.224529174483104e-06, - "loss": 1.0935, - "step": 108 - }, - { - "epoch": 0.009830004058258556, - "grad_norm": 1.9292985161146594, - "learning_rate": 3.2308765773719435e-06, - "loss": 1.0313, - "step": 109 - }, - { - "epoch": 0.009920187581728818, - "grad_norm": 1.640270085773885, - "learning_rate": 3.2371660123016323e-06, - "loss": 1.0908, - "step": 110 - }, - { - "epoch": 0.010010371105199081, - "grad_norm": 1.5107428187619882, - "learning_rate": 3.2433985284876787e-06, - "loss": 1.0308, - "step": 111 - }, - { - "epoch": 0.010100554628669342, - "grad_norm": 1.36077620101169, - "learning_rate": 3.2495751469148143e-06, - "loss": 1.0891, - "step": 112 - }, - { - "epoch": 0.010190738152139604, - "grad_norm": 2.1087115339333415, - "learning_rate": 3.2556968613407816e-06, - "loss": 1.0634, - "step": 113 - }, - { - "epoch": 0.010280921675609867, - "grad_norm": 1.950677971479695, - "learning_rate": 3.2617646392559094e-06, - "loss": 1.1283, - "step": 114 - }, - { - "epoch": 0.010371105199080128, - "grad_norm": 3.1030116311192466, - "learning_rate": 3.2677794228007836e-06, - "loss": 1.1357, - "step": 115 - }, - { - "epoch": 0.01046128872255039, - "grad_norm": 1.9152380000093168, - "learning_rate": 3.273742129644185e-06, - "loss": 1.073, - "step": 116 - }, - { - "epoch": 0.010551472246020653, - "grad_norm": 1.7556474025189215, - "learning_rate": 3.279653653823352e-06, - "loss": 1.0936, - "step": 117 - }, - { - "epoch": 0.010641655769490914, - "grad_norm": 1.7830944216165745, - "learning_rate": 3.285514866548481e-06, - "loss": 1.0929, - "step": 118 - }, - { - "epoch": 0.010731839292961176, - "grad_norm": 1.5560592657045584, - "learning_rate": 3.2913266169732838e-06, - "loss": 1.1567, - "step": 119 - }, - { - "epoch": 0.010822022816431439, - "grad_norm": 2.1195149034037795, - "learning_rate": 3.2970897329333017e-06, - "loss": 1.0731, - "step": 120 - }, - { - "epoch": 0.0109122063399017, - "grad_norm": 1.6132940333416432, - "learning_rate": 3.302805021653585e-06, - "loss": 1.0695, - "step": 121 - }, - { - "epoch": 0.011002389863371962, - "grad_norm": 3.025183516050908, - "learning_rate": 3.3084732704272426e-06, - "loss": 1.0862, - "step": 122 - }, - { - "epoch": 0.011092573386842225, - "grad_norm": 1.818522543300955, - "learning_rate": 3.314095247266304e-06, - "loss": 1.0774, - "step": 123 - }, - { - "epoch": 0.011182756910312485, - "grad_norm": 2.1245975113215034, - "learning_rate": 3.3196717015262255e-06, - "loss": 1.0512, - "step": 124 - }, - { - "epoch": 0.011272940433782748, - "grad_norm": 1.5988679874836904, - "learning_rate": 3.325203364505307e-06, - "loss": 1.0778, - "step": 125 - }, - { - "epoch": 0.01136312395725301, - "grad_norm": 1.7009192365639687, - "learning_rate": 3.3306909500202442e-06, - "loss": 1.119, - "step": 126 - }, - { - "epoch": 0.011453307480723271, - "grad_norm": 1.6498160722117772, - "learning_rate": 3.3361351549589145e-06, - "loss": 1.1314, - "step": 127 - }, - { - "epoch": 0.011543491004193534, - "grad_norm": 1.771172375105378, - "learning_rate": 3.341536659811494e-06, - "loss": 1.0264, - "step": 128 - }, - { - "epoch": 0.011633674527663796, - "grad_norm": 1.7359880386810635, - "learning_rate": 3.346896129180904e-06, - "loss": 1.1123, - "step": 129 - }, - { - "epoch": 0.011723858051134057, - "grad_norm": 1.9899157567619288, - "learning_rate": 3.35221421227355e-06, - "loss": 1.0876, - "step": 130 - }, - { - "epoch": 0.01181404157460432, - "grad_norm": 2.098571350418916, - "learning_rate": 3.357491543371255e-06, - "loss": 1.1122, - "step": 131 - }, - { - "epoch": 0.011904225098074582, - "grad_norm": 1.6066306705848168, - "learning_rate": 3.3627287422852543e-06, - "loss": 1.0435, - "step": 132 - }, - { - "epoch": 0.011994408621544843, - "grad_norm": 1.3689601632334678, - "learning_rate": 3.3679264147930497e-06, - "loss": 1.1134, - "step": 133 - }, - { - "epoch": 0.012084592145015106, - "grad_norm": 1.803082698015082, - "learning_rate": 3.37308515305891e-06, - "loss": 1.0277, - "step": 134 - }, - { - "epoch": 0.012174775668485368, - "grad_norm": 1.396582113193568, - "learning_rate": 3.3782055360387313e-06, - "loss": 1.1471, - "step": 135 - }, - { - "epoch": 0.012264959191955629, - "grad_norm": 1.492375466855736, - "learning_rate": 3.3832881298699633e-06, - "loss": 1.0504, - "step": 136 - }, - { - "epoch": 0.012355142715425892, - "grad_norm": 1.6982256212353284, - "learning_rate": 3.388333488247249e-06, - "loss": 1.0629, - "step": 137 - }, - { - "epoch": 0.012445326238896154, - "grad_norm": 1.7144944654249124, - "learning_rate": 3.393342152784406e-06, - "loss": 1.139, - "step": 138 - }, - { - "epoch": 0.012535509762366415, - "grad_norm": 1.808766828457384, - "learning_rate": 3.3983146533633376e-06, - "loss": 1.1012, - "step": 139 - }, - { - "epoch": 0.012625693285836678, - "grad_norm": 1.931722764601374, - "learning_rate": 3.403251508470442e-06, - "loss": 1.1445, - "step": 140 - }, - { - "epoch": 0.01271587680930694, - "grad_norm": 1.6283703844247046, - "learning_rate": 3.408153225521043e-06, - "loss": 1.0441, - "step": 141 - }, - { - "epoch": 0.012806060332777201, - "grad_norm": 1.9216304716532213, - "learning_rate": 3.413020301172361e-06, - "loss": 0.9836, - "step": 142 - }, - { - "epoch": 0.012896243856247463, - "grad_norm": 1.928424931820429, - "learning_rate": 3.4178532216255024e-06, - "loss": 1.0686, - "step": 143 - }, - { - "epoch": 0.012986427379717726, - "grad_norm": 1.5425773027484988, - "learning_rate": 3.422652462916924e-06, - "loss": 1.1241, - "step": 144 - }, - { - "epoch": 0.013076610903187987, - "grad_norm": 2.1661641076037053, - "learning_rate": 3.4274184911998124e-06, - "loss": 0.9954, - "step": 145 - }, - { - "epoch": 0.01316679442665825, - "grad_norm": 2.4764966504701835, - "learning_rate": 3.4321517630157976e-06, - "loss": 1.1729, - "step": 146 - }, - { - "epoch": 0.013256977950128512, - "grad_norm": 1.627498539866834, - "learning_rate": 3.4368527255573845e-06, - "loss": 1.1478, - "step": 147 - }, - { - "epoch": 0.013347161473598773, - "grad_norm": 1.9582996636059682, - "learning_rate": 3.4415218169214994e-06, - "loss": 1.0551, - "step": 148 - }, - { - "epoch": 0.013437344997069035, - "grad_norm": 1.5167190692917387, - "learning_rate": 3.4461594663544882e-06, - "loss": 1.0647, - "step": 149 - }, - { - "epoch": 0.013527528520539298, - "grad_norm": 1.519183042978202, - "learning_rate": 3.450766094488929e-06, - "loss": 1.0271, - "step": 150 - }, - { - "epoch": 0.013617712044009559, - "grad_norm": 1.529554245534613, - "learning_rate": 3.4553421135725735e-06, - "loss": 1.1236, - "step": 151 - }, - { - "epoch": 0.013707895567479821, - "grad_norm": 1.7655281107827168, - "learning_rate": 3.45988792768973e-06, - "loss": 1.0933, - "step": 152 - }, - { - "epoch": 0.013798079090950084, - "grad_norm": 1.4040041049315823, - "learning_rate": 3.464403932975393e-06, - "loss": 1.1437, - "step": 153 - }, - { - "epoch": 0.013888262614420345, - "grad_norm": 3.9432106386789956, - "learning_rate": 3.468890517822395e-06, - "loss": 1.1319, - "step": 154 - }, - { - "epoch": 0.013978446137890607, - "grad_norm": 1.9311785786579232, - "learning_rate": 3.473348063081853e-06, - "loss": 1.0796, - "step": 155 - }, - { - "epoch": 0.01406862966136087, - "grad_norm": 2.0046077340744213, - "learning_rate": 3.4777769422571727e-06, - "loss": 1.0301, - "step": 156 - }, - { - "epoch": 0.01415881318483113, - "grad_norm": 1.4903729175648102, - "learning_rate": 3.4821775216918497e-06, - "loss": 1.0738, - "step": 157 - }, - { - "epoch": 0.014248996708301393, - "grad_norm": 1.6665918889754636, - "learning_rate": 3.4865501607513164e-06, - "loss": 1.0187, - "step": 158 - }, - { - "epoch": 0.014339180231771656, - "grad_norm": 2.5289051827801754, - "learning_rate": 3.4908952119990423e-06, - "loss": 1.1517, - "step": 159 - }, - { - "epoch": 0.014429363755241916, - "grad_norm": 1.0125104244307777, - "learning_rate": 3.495213021367122e-06, - "loss": 0.9159, - "step": 160 - }, - { - "epoch": 0.014519547278712179, - "grad_norm": 2.602156379433904, - "learning_rate": 3.4995039283215464e-06, - "loss": 1.1082, - "step": 161 - }, - { - "epoch": 0.014609730802182442, - "grad_norm": 1.6607208369018909, - "learning_rate": 3.5037682660223533e-06, - "loss": 1.0891, - "step": 162 - }, - { - "epoch": 0.014699914325652702, - "grad_norm": 2.0831348578960607, - "learning_rate": 3.508006361478857e-06, - "loss": 1.1034, - "step": 163 - }, - { - "epoch": 0.014790097849122965, - "grad_norm": 1.656293400609686, - "learning_rate": 3.5122185357001253e-06, - "loss": 1.0666, - "step": 164 - }, - { - "epoch": 0.014880281372593228, - "grad_norm": 2.139263861954208, - "learning_rate": 3.5164051038408817e-06, - "loss": 1.1318, - "step": 165 - }, - { - "epoch": 0.01497046489606349, - "grad_norm": 1.7308570305651931, - "learning_rate": 3.5205663753430093e-06, - "loss": 1.1306, - "step": 166 - }, - { - "epoch": 0.015060648419533751, - "grad_norm": 1.6172983619911383, - "learning_rate": 3.5247026540727915e-06, - "loss": 1.1693, - "step": 167 - }, - { - "epoch": 0.015150831943004013, - "grad_norm": 1.7673460412855846, - "learning_rate": 3.5288142384540645e-06, - "loss": 1.0176, - "step": 168 - }, - { - "epoch": 0.015241015466474276, - "grad_norm": 1.711711064177355, - "learning_rate": 3.532901421597421e-06, - "loss": 1.106, - "step": 169 - }, - { - "epoch": 0.015331198989944537, - "grad_norm": 2.944032264346086, - "learning_rate": 3.5369644914255915e-06, - "loss": 1.0207, - "step": 170 - }, - { - "epoch": 0.0154213825134148, - "grad_norm": 3.46358924010835, - "learning_rate": 3.5410037307951596e-06, - "loss": 1.0855, - "step": 171 - }, - { - "epoch": 0.015511566036885062, - "grad_norm": 1.393099779437259, - "learning_rate": 3.545019417614725e-06, - "loss": 1.1397, - "step": 172 - }, - { - "epoch": 0.015601749560355323, - "grad_norm": 1.542960174150416, - "learning_rate": 3.5490118249596387e-06, - "loss": 1.1224, - "step": 173 - }, - { - "epoch": 0.015691933083825584, - "grad_norm": 1.5772907582602327, - "learning_rate": 3.5529812211834352e-06, - "loss": 1.1312, - "step": 174 - }, - { - "epoch": 0.015782116607295848, - "grad_norm": 1.3881704978580087, - "learning_rate": 3.5569278700260707e-06, - "loss": 1.0854, - "step": 175 - }, - { - "epoch": 0.01587230013076611, - "grad_norm": 1.896726237312998, - "learning_rate": 3.5608520307190746e-06, - "loss": 1.0636, - "step": 176 - }, - { - "epoch": 0.01596248365423637, - "grad_norm": 1.4209448318510416, - "learning_rate": 3.564753958087731e-06, - "loss": 1.019, - "step": 177 - }, - { - "epoch": 0.016052667177706634, - "grad_norm": 1.4752806089477788, - "learning_rate": 3.5686339026503684e-06, - "loss": 1.0897, - "step": 178 - }, - { - "epoch": 0.016142850701176895, - "grad_norm": 2.151577960411047, - "learning_rate": 3.5724921107148806e-06, - "loss": 1.1586, - "step": 179 - }, - { - "epoch": 0.016233034224647155, - "grad_norm": 1.510163761757661, - "learning_rate": 3.576328824472552e-06, - "loss": 1.1415, - "step": 180 - }, - { - "epoch": 0.01632321774811742, - "grad_norm": 1.9930722534626242, - "learning_rate": 3.5801442820892838e-06, - "loss": 1.1711, - "step": 181 - }, - { - "epoch": 0.01641340127158768, - "grad_norm": 1.3745845687288667, - "learning_rate": 3.583938717794313e-06, - "loss": 1.1036, - "step": 182 - }, - { - "epoch": 0.01650358479505794, - "grad_norm": 1.7219584256158917, - "learning_rate": 3.5877123619664928e-06, - "loss": 1.1313, - "step": 183 - }, - { - "epoch": 0.016593768318528206, - "grad_norm": 1.6271949762326825, - "learning_rate": 3.5914654412182268e-06, - "loss": 1.0987, - "step": 184 - }, - { - "epoch": 0.016683951841998466, - "grad_norm": 1.5261149036249406, - "learning_rate": 3.595198178477127e-06, - "loss": 1.1106, - "step": 185 - }, - { - "epoch": 0.016774135365468727, - "grad_norm": 1.9185143838660876, - "learning_rate": 3.5989107930654757e-06, - "loss": 1.0166, - "step": 186 - }, - { - "epoch": 0.01686431888893899, - "grad_norm": 1.561026832384165, - "learning_rate": 3.6026035007775437e-06, - "loss": 1.1414, - "step": 187 - }, - { - "epoch": 0.016954502412409252, - "grad_norm": 2.082134537702418, - "learning_rate": 3.6062765139548636e-06, - "loss": 1.0582, - "step": 188 - }, - { - "epoch": 0.017044685935879513, - "grad_norm": 1.575807361452638, - "learning_rate": 3.6099300415594945e-06, - "loss": 0.9485, - "step": 189 - }, - { - "epoch": 0.017134869459349777, - "grad_norm": 1.434114699337439, - "learning_rate": 3.6135642892453575e-06, - "loss": 0.9641, - "step": 190 - }, - { - "epoch": 0.01722505298282004, - "grad_norm": 1.5231967833661328, - "learning_rate": 3.6171794594277004e-06, - "loss": 1.0973, - "step": 191 - }, - { - "epoch": 0.0173152365062903, - "grad_norm": 2.166253808618577, - "learning_rate": 3.620775751350745e-06, - "loss": 1.036, - "step": 192 - }, - { - "epoch": 0.017405420029760563, - "grad_norm": 2.18056631659291, - "learning_rate": 3.6243533611535794e-06, - "loss": 1.1132, - "step": 193 - }, - { - "epoch": 0.017495603553230824, - "grad_norm": 1.6306017643657316, - "learning_rate": 3.627912481934351e-06, - "loss": 1.0727, - "step": 194 - }, - { - "epoch": 0.017585787076701085, - "grad_norm": 1.9307522944123465, - "learning_rate": 3.6314533038128e-06, - "loss": 1.0532, - "step": 195 - }, - { - "epoch": 0.01767597060017135, - "grad_norm": 0.8376572290552499, - "learning_rate": 3.6349760139912048e-06, - "loss": 0.8918, - "step": 196 - }, - { - "epoch": 0.01776615412364161, - "grad_norm": 1.768713486365326, - "learning_rate": 3.638480796813769e-06, - "loss": 1.1421, - "step": 197 - }, - { - "epoch": 0.01785633764711187, - "grad_norm": 1.6346951814739739, - "learning_rate": 3.641967833824504e-06, - "loss": 1.079, - "step": 198 - }, - { - "epoch": 0.017946521170582135, - "grad_norm": 1.4845244483000972, - "learning_rate": 3.645437303823663e-06, - "loss": 1.0746, - "step": 199 - }, - { - "epoch": 0.018036704694052396, - "grad_norm": 2.018938047549689, - "learning_rate": 3.64888938292275e-06, - "loss": 1.0592, - "step": 200 - }, - { - "epoch": 0.01812688821752266, - "grad_norm": 1.6733506010058736, - "learning_rate": 3.6523242445981603e-06, - "loss": 1.0804, - "step": 201 - }, - { - "epoch": 0.01821707174099292, - "grad_norm": 1.6810564716799887, - "learning_rate": 3.655742059743495e-06, - "loss": 1.0505, - "step": 202 - }, - { - "epoch": 0.018307255264463182, - "grad_norm": 1.4908737870697029, - "learning_rate": 3.659142996720576e-06, - "loss": 1.1394, - "step": 203 - }, - { - "epoch": 0.018397438787933446, - "grad_norm": 1.5815792992315278, - "learning_rate": 3.6625272214092135e-06, - "loss": 1.0479, - "step": 204 - }, - { - "epoch": 0.018487622311403707, - "grad_norm": 0.646456184809814, - "learning_rate": 3.6658948972557535e-06, - "loss": 0.8315, - "step": 205 - }, - { - "epoch": 0.018577805834873968, - "grad_norm": 1.5973007440846019, - "learning_rate": 3.6692461853204432e-06, - "loss": 0.9972, - "step": 206 - }, - { - "epoch": 0.018667989358344232, - "grad_norm": 1.839975923297756, - "learning_rate": 3.672581244323656e-06, - "loss": 1.0166, - "step": 207 - }, - { - "epoch": 0.018758172881814493, - "grad_norm": 1.5791916971898594, - "learning_rate": 3.6759002306909926e-06, - "loss": 0.9627, - "step": 208 - }, - { - "epoch": 0.018848356405284754, - "grad_norm": 1.3194533492786118, - "learning_rate": 3.67920329859731e-06, - "loss": 1.0919, - "step": 209 - }, - { - "epoch": 0.018938539928755018, - "grad_norm": 1.6841388713814975, - "learning_rate": 3.6824906000096923e-06, - "loss": 1.0804, - "step": 210 - }, - { - "epoch": 0.01902872345222528, - "grad_norm": 1.6858796004569658, - "learning_rate": 3.6857622847294067e-06, - "loss": 1.085, - "step": 211 - }, - { - "epoch": 0.01911890697569554, - "grad_norm": 1.9158168926863783, - "learning_rate": 3.6890185004328626e-06, - "loss": 1.0604, - "step": 212 - }, - { - "epoch": 0.019209090499165804, - "grad_norm": 2.665606993494786, - "learning_rate": 3.6922593927116113e-06, - "loss": 1.0184, - "step": 213 - }, - { - "epoch": 0.019299274022636065, - "grad_norm": 1.745294716658191, - "learning_rate": 3.695485105111406e-06, - "loss": 1.0712, - "step": 214 - }, - { - "epoch": 0.019389457546106326, - "grad_norm": 1.5867200903086258, - "learning_rate": 3.698695779170352e-06, - "loss": 1.0414, - "step": 215 - }, - { - "epoch": 0.01947964106957659, - "grad_norm": 1.7707470891557784, - "learning_rate": 3.7018915544561744e-06, - "loss": 1.0671, - "step": 216 - }, - { - "epoch": 0.01956982459304685, - "grad_norm": 3.752392069505282, - "learning_rate": 3.7050725686026164e-06, - "loss": 1.0245, - "step": 217 - }, - { - "epoch": 0.01966000811651711, - "grad_norm": 2.301810741827535, - "learning_rate": 3.708238957345014e-06, - "loss": 1.1312, - "step": 218 - }, - { - "epoch": 0.019750191639987376, - "grad_norm": 1.8955895682877575, - "learning_rate": 3.7113908545550482e-06, - "loss": 1.1169, - "step": 219 - }, - { - "epoch": 0.019840375163457637, - "grad_norm": 1.5463963548355832, - "learning_rate": 3.7145283922747028e-06, - "loss": 1.0607, - "step": 220 - }, - { - "epoch": 0.019930558686927898, - "grad_norm": 2.0041914173694275, - "learning_rate": 3.7176517007494612e-06, - "loss": 1.1511, - "step": 221 - }, - { - "epoch": 0.020020742210398162, - "grad_norm": 1.6796892033058224, - "learning_rate": 3.7207609084607496e-06, - "loss": 1.015, - "step": 222 - }, - { - "epoch": 0.020110925733868423, - "grad_norm": 3.131056752590003, - "learning_rate": 3.723856142157645e-06, - "loss": 1.1171, - "step": 223 - }, - { - "epoch": 0.020201109257338683, - "grad_norm": 1.6836780075620734, - "learning_rate": 3.726937526887885e-06, - "loss": 1.0796, - "step": 224 - }, - { - "epoch": 0.020291292780808948, - "grad_norm": 2.231978191296532, - "learning_rate": 3.7300051860281798e-06, - "loss": 1.0292, - "step": 225 - }, - { - "epoch": 0.02038147630427921, - "grad_norm": 1.6890577261519393, - "learning_rate": 3.733059241313852e-06, - "loss": 1.1462, - "step": 226 - }, - { - "epoch": 0.02047165982774947, - "grad_norm": 1.5481515876418146, - "learning_rate": 3.736099812867827e-06, - "loss": 1.0669, - "step": 227 - }, - { - "epoch": 0.020561843351219734, - "grad_norm": 1.819416857244154, - "learning_rate": 3.73912701922898e-06, - "loss": 1.1249, - "step": 228 - }, - { - "epoch": 0.020652026874689994, - "grad_norm": 0.813731031240711, - "learning_rate": 3.742140977379868e-06, - "loss": 0.8662, - "step": 229 - }, - { - "epoch": 0.020742210398160255, - "grad_norm": 1.7228583046539374, - "learning_rate": 3.745141802773854e-06, - "loss": 1.1063, - "step": 230 - }, - { - "epoch": 0.02083239392163052, - "grad_norm": 1.631000663439103, - "learning_rate": 3.748129609361645e-06, - "loss": 1.0722, - "step": 231 - }, - { - "epoch": 0.02092257744510078, - "grad_norm": 1.3093476358617926, - "learning_rate": 3.7511045096172555e-06, - "loss": 1.0487, - "step": 232 - }, - { - "epoch": 0.02101276096857104, - "grad_norm": 1.4990085663197608, - "learning_rate": 3.7540666145634137e-06, - "loss": 1.064, - "step": 233 - }, - { - "epoch": 0.021102944492041305, - "grad_norm": 1.4377684964413189, - "learning_rate": 3.7570160337964225e-06, - "loss": 1.0529, - "step": 234 - }, - { - "epoch": 0.021193128015511566, - "grad_norm": 1.809038869571993, - "learning_rate": 3.7599528755104913e-06, - "loss": 1.0981, - "step": 235 - }, - { - "epoch": 0.021283311538981827, - "grad_norm": 1.8004018255585978, - "learning_rate": 3.7628772465215515e-06, - "loss": 1.0651, - "step": 236 - }, - { - "epoch": 0.02137349506245209, - "grad_norm": 1.5470440704736077, - "learning_rate": 3.7657892522905666e-06, - "loss": 1.0599, - "step": 237 - }, - { - "epoch": 0.021463678585922352, - "grad_norm": 1.933143517066041, - "learning_rate": 3.7686889969463542e-06, - "loss": 0.9874, - "step": 238 - }, - { - "epoch": 0.021553862109392613, - "grad_norm": 2.3542039806368287, - "learning_rate": 3.771576583307928e-06, - "loss": 1.0545, - "step": 239 - }, - { - "epoch": 0.021644045632862877, - "grad_norm": 1.6908882880958824, - "learning_rate": 3.7744521129063722e-06, - "loss": 1.1088, - "step": 240 - }, - { - "epoch": 0.021734229156333138, - "grad_norm": 1.526183250898422, - "learning_rate": 3.7773156860062653e-06, - "loss": 1.0902, - "step": 241 - }, - { - "epoch": 0.0218244126798034, - "grad_norm": 1.8063748880789803, - "learning_rate": 3.7801674016266554e-06, - "loss": 1.1723, - "step": 242 - }, - { - "epoch": 0.021914596203273663, - "grad_norm": 0.6133434725740524, - "learning_rate": 3.7830073575616035e-06, - "loss": 0.8089, - "step": 243 - }, - { - "epoch": 0.022004779726743924, - "grad_norm": 1.4181376375027839, - "learning_rate": 3.785835650400313e-06, - "loss": 1.1043, - "step": 244 - }, - { - "epoch": 0.022094963250214185, - "grad_norm": 1.6120637821140205, - "learning_rate": 3.7886523755468334e-06, - "loss": 0.9792, - "step": 245 - }, - { - "epoch": 0.02218514677368445, - "grad_norm": 1.3589554939589292, - "learning_rate": 3.7914576272393746e-06, - "loss": 1.06, - "step": 246 - }, - { - "epoch": 0.02227533029715471, - "grad_norm": 1.905552595795513, - "learning_rate": 3.7942514985692284e-06, - "loss": 1.1738, - "step": 247 - }, - { - "epoch": 0.02236551382062497, - "grad_norm": 1.340954423102878, - "learning_rate": 3.797034081499296e-06, - "loss": 1.0686, - "step": 248 - }, - { - "epoch": 0.022455697344095235, - "grad_norm": 4.569264035720085, - "learning_rate": 3.7998054668822595e-06, - "loss": 1.0517, - "step": 249 - }, - { - "epoch": 0.022545880867565496, - "grad_norm": 1.5738656757918672, - "learning_rate": 3.8025657444783776e-06, - "loss": 1.0425, - "step": 250 - }, - { - "epoch": 0.022636064391035757, - "grad_norm": 1.931762938296045, - "learning_rate": 3.80531500297293e-06, - "loss": 0.9826, - "step": 251 - }, - { - "epoch": 0.02272624791450602, - "grad_norm": 3.102177095415934, - "learning_rate": 3.8080533299933147e-06, - "loss": 1.0313, - "step": 252 - }, - { - "epoch": 0.022816431437976282, - "grad_norm": 1.6462922080269362, - "learning_rate": 3.8107808121258067e-06, - "loss": 1.0349, - "step": 253 - }, - { - "epoch": 0.022906614961446543, - "grad_norm": 4.059080592191568, - "learning_rate": 3.813497534931985e-06, - "loss": 1.1278, - "step": 254 - }, - { - "epoch": 0.022996798484916807, - "grad_norm": 4.339274344185494, - "learning_rate": 3.816203582964841e-06, - "loss": 1.0914, - "step": 255 - }, - { - "epoch": 0.023086982008387068, - "grad_norm": 1.5359251314831346, - "learning_rate": 3.818899039784565e-06, - "loss": 1.127, - "step": 256 - }, - { - "epoch": 0.02317716553185733, - "grad_norm": 1.9092266418458763, - "learning_rate": 3.821583987974031e-06, - "loss": 1.1212, - "step": 257 - }, - { - "epoch": 0.023267349055327593, - "grad_norm": 1.560314786646898, - "learning_rate": 3.8242585091539755e-06, - "loss": 1.0064, - "step": 258 - }, - { - "epoch": 0.023357532578797854, - "grad_norm": 2.0617809776290823, - "learning_rate": 3.8269226839978895e-06, - "loss": 1.1508, - "step": 259 - }, - { - "epoch": 0.023447716102268115, - "grad_norm": 1.980762226699172, - "learning_rate": 3.82957659224662e-06, - "loss": 1.0277, - "step": 260 - }, - { - "epoch": 0.02353789962573838, - "grad_norm": 1.7201355637865712, - "learning_rate": 3.8322203127226855e-06, - "loss": 0.9543, - "step": 261 - }, - { - "epoch": 0.02362808314920864, - "grad_norm": 1.7027724014800292, - "learning_rate": 3.834853923344326e-06, - "loss": 1.1811, - "step": 262 - }, - { - "epoch": 0.0237182666726789, - "grad_norm": 1.3749698288815186, - "learning_rate": 3.837477501139285e-06, - "loss": 1.0733, - "step": 263 - }, - { - "epoch": 0.023808450196149165, - "grad_norm": 2.2746024276939134, - "learning_rate": 3.840091122258324e-06, - "loss": 1.0881, - "step": 264 - }, - { - "epoch": 0.023898633719619426, - "grad_norm": 2.4874853660881477, - "learning_rate": 3.84269486198849e-06, - "loss": 0.9995, - "step": 265 - }, - { - "epoch": 0.023988817243089686, - "grad_norm": 1.6190487383456595, - "learning_rate": 3.845288794766121e-06, - "loss": 1.0886, - "step": 266 - }, - { - "epoch": 0.02407900076655995, - "grad_norm": 1.5889813007625275, - "learning_rate": 3.847872994189619e-06, - "loss": 1.0716, - "step": 267 - }, - { - "epoch": 0.02416918429003021, - "grad_norm": 1.7659730821331472, - "learning_rate": 3.8504475330319805e-06, - "loss": 1.1252, - "step": 268 - }, - { - "epoch": 0.024259367813500472, - "grad_norm": 1.456328521707179, - "learning_rate": 3.853012483253093e-06, - "loss": 1.0515, - "step": 269 - }, - { - "epoch": 0.024349551336970737, - "grad_norm": 1.766530842628156, - "learning_rate": 3.855567916011802e-06, - "loss": 1.0786, - "step": 270 - }, - { - "epoch": 0.024439734860440997, - "grad_norm": 1.8201847277154104, - "learning_rate": 3.858113901677755e-06, - "loss": 1.0762, - "step": 271 - }, - { - "epoch": 0.024529918383911258, - "grad_norm": 1.618319055642902, - "learning_rate": 3.860650509843034e-06, - "loss": 1.0341, - "step": 272 - }, - { - "epoch": 0.024620101907381522, - "grad_norm": 2.4060604466671984, - "learning_rate": 3.863177809333563e-06, - "loss": 1.0612, - "step": 273 - }, - { - "epoch": 0.024710285430851783, - "grad_norm": 1.5585732518428075, - "learning_rate": 3.86569586822032e-06, - "loss": 1.0561, - "step": 274 - }, - { - "epoch": 0.024800468954322044, - "grad_norm": 1.7747187754324791, - "learning_rate": 3.868204753830331e-06, - "loss": 1.032, - "step": 275 - }, - { - "epoch": 0.02489065247779231, - "grad_norm": 1.5367927661441532, - "learning_rate": 3.870704532757476e-06, - "loss": 0.9915, - "step": 276 - }, - { - "epoch": 0.02498083600126257, - "grad_norm": 1.9891021173192138, - "learning_rate": 3.8731952708730974e-06, - "loss": 1.1004, - "step": 277 - }, - { - "epoch": 0.02507101952473283, - "grad_norm": 1.7113183699353542, - "learning_rate": 3.8756770333364085e-06, - "loss": 1.092, - "step": 278 - }, - { - "epoch": 0.025161203048203094, - "grad_norm": 1.5850167443868133, - "learning_rate": 3.878149884604725e-06, - "loss": 0.9934, - "step": 279 - }, - { - "epoch": 0.025251386571673355, - "grad_norm": 1.9746799475248793, - "learning_rate": 3.8806138884435125e-06, - "loss": 1.0445, - "step": 280 - }, - { - "epoch": 0.025341570095143616, - "grad_norm": 1.536174873023587, - "learning_rate": 3.883069107936248e-06, - "loss": 0.9978, - "step": 281 - }, - { - "epoch": 0.02543175361861388, - "grad_norm": 2.0238733006427165, - "learning_rate": 3.885515605494114e-06, - "loss": 1.1315, - "step": 282 - }, - { - "epoch": 0.02552193714208414, - "grad_norm": 2.072253429438077, - "learning_rate": 3.8879534428655145e-06, - "loss": 1.1167, - "step": 283 - }, - { - "epoch": 0.025612120665554402, - "grad_norm": 2.0265100194390153, - "learning_rate": 3.890382681145432e-06, - "loss": 1.1091, - "step": 284 - }, - { - "epoch": 0.025702304189024666, - "grad_norm": 1.710174808601905, - "learning_rate": 3.892803380784608e-06, - "loss": 1.1026, - "step": 285 - }, - { - "epoch": 0.025792487712494927, - "grad_norm": 0.9914562804912987, - "learning_rate": 3.8952156015985725e-06, - "loss": 0.9444, - "step": 286 - }, - { - "epoch": 0.025882671235965188, - "grad_norm": 1.5590122970722473, - "learning_rate": 3.897619402776516e-06, - "loss": 1.0748, - "step": 287 - }, - { - "epoch": 0.025972854759435452, - "grad_norm": 4.495401363933098, - "learning_rate": 3.900014842889995e-06, - "loss": 1.1423, - "step": 288 - }, - { - "epoch": 0.026063038282905713, - "grad_norm": 1.492231278501291, - "learning_rate": 3.902401979901503e-06, - "loss": 0.9883, - "step": 289 - }, - { - "epoch": 0.026153221806375974, - "grad_norm": 1.6818838973644605, - "learning_rate": 3.904780871172884e-06, - "loss": 1.1127, - "step": 290 - }, - { - "epoch": 0.026243405329846238, - "grad_norm": 2.2454583637638925, - "learning_rate": 3.907151573473601e-06, - "loss": 0.9503, - "step": 291 - }, - { - "epoch": 0.0263335888533165, - "grad_norm": 1.3044828151566668, - "learning_rate": 3.909514142988868e-06, - "loss": 1.079, - "step": 292 - }, - { - "epoch": 0.02642377237678676, - "grad_norm": 1.629641872217129, - "learning_rate": 3.911868635327639e-06, - "loss": 1.0362, - "step": 293 - }, - { - "epoch": 0.026513955900257024, - "grad_norm": 1.3423330687696375, - "learning_rate": 3.914215105530455e-06, - "loss": 1.0394, - "step": 294 - }, - { - "epoch": 0.026604139423727285, - "grad_norm": 1.6169339778304863, - "learning_rate": 3.916553608077179e-06, - "loss": 1.056, - "step": 295 - }, - { - "epoch": 0.026694322947197546, - "grad_norm": 1.8551215881024286, - "learning_rate": 3.91888419689457e-06, - "loss": 1.055, - "step": 296 - }, - { - "epoch": 0.02678450647066781, - "grad_norm": 1.5934362663687707, - "learning_rate": 3.921206925363754e-06, - "loss": 0.9991, - "step": 297 - }, - { - "epoch": 0.02687468999413807, - "grad_norm": 1.6176989939711326, - "learning_rate": 3.923521846327559e-06, - "loss": 0.9782, - "step": 298 - }, - { - "epoch": 0.02696487351760833, - "grad_norm": 1.6768375793217234, - "learning_rate": 3.925829012097725e-06, - "loss": 1.0255, - "step": 299 - }, - { - "epoch": 0.027055057041078596, - "grad_norm": 2.176883986501356, - "learning_rate": 3.928128474462e-06, - "loss": 0.983, - "step": 300 - }, - { - "epoch": 0.027145240564548857, - "grad_norm": 2.148947915221303, - "learning_rate": 3.930420284691115e-06, - "loss": 1.0751, - "step": 301 - }, - { - "epoch": 0.027235424088019117, - "grad_norm": 1.7721090172327207, - "learning_rate": 3.932704493545644e-06, - "loss": 1.1006, - "step": 302 - }, - { - "epoch": 0.02732560761148938, - "grad_norm": 1.8585791287001396, - "learning_rate": 3.934981151282745e-06, - "loss": 1.1487, - "step": 303 - }, - { - "epoch": 0.027415791134959643, - "grad_norm": 1.6230559090593568, - "learning_rate": 3.9372503076628006e-06, - "loss": 1.0569, - "step": 304 - }, - { - "epoch": 0.027505974658429903, - "grad_norm": 2.2681898660390183, - "learning_rate": 3.939512011955941e-06, - "loss": 1.0865, - "step": 305 - }, - { - "epoch": 0.027596158181900168, - "grad_norm": 1.8126360085857232, - "learning_rate": 3.941766312948463e-06, - "loss": 1.0623, - "step": 306 - }, - { - "epoch": 0.02768634170537043, - "grad_norm": 3.4811479481612535, - "learning_rate": 3.944013258949147e-06, - "loss": 1.0296, - "step": 307 - }, - { - "epoch": 0.02777652522884069, - "grad_norm": 1.7138985568487812, - "learning_rate": 3.946252897795465e-06, - "loss": 0.8863, - "step": 308 - }, - { - "epoch": 0.027866708752310954, - "grad_norm": 1.9107736163229, - "learning_rate": 3.9484852768596935e-06, - "loss": 1.0775, - "step": 309 - }, - { - "epoch": 0.027956892275781214, - "grad_norm": 1.5341716841422768, - "learning_rate": 3.950710443054923e-06, - "loss": 0.9668, - "step": 310 - }, - { - "epoch": 0.028047075799251475, - "grad_norm": 1.794091365643787, - "learning_rate": 3.952928442840981e-06, - "loss": 0.9759, - "step": 311 - }, - { - "epoch": 0.02813725932272174, - "grad_norm": 1.635476380572126, - "learning_rate": 3.955139322230243e-06, - "loss": 1.0873, - "step": 312 - }, - { - "epoch": 0.028227442846192, - "grad_norm": 1.8118292126326094, - "learning_rate": 3.957343126793365e-06, - "loss": 0.993, - "step": 313 - }, - { - "epoch": 0.02831762636966226, - "grad_norm": 1.6381600891337254, - "learning_rate": 3.959539901664921e-06, - "loss": 1.0392, - "step": 314 - }, - { - "epoch": 0.028407809893132525, - "grad_norm": 2.135045821080094, - "learning_rate": 3.9617296915489425e-06, - "loss": 1.1632, - "step": 315 - }, - { - "epoch": 0.028497993416602786, - "grad_norm": 1.8253618534823943, - "learning_rate": 3.963912540724387e-06, - "loss": 1.0097, - "step": 316 - }, - { - "epoch": 0.028588176940073047, - "grad_norm": 1.8329558489612707, - "learning_rate": 3.966088493050501e-06, - "loss": 1.0599, - "step": 317 - }, - { - "epoch": 0.02867836046354331, - "grad_norm": 1.55089339996612, - "learning_rate": 3.968257591972113e-06, - "loss": 1.037, - "step": 318 - }, - { - "epoch": 0.028768543987013572, - "grad_norm": 1.9617506066412904, - "learning_rate": 3.970419880524835e-06, - "loss": 1.1316, - "step": 319 - }, - { - "epoch": 0.028858727510483833, - "grad_norm": 1.8040917804614296, - "learning_rate": 3.972575401340192e-06, - "loss": 1.0268, - "step": 320 - }, - { - "epoch": 0.028948911033954097, - "grad_norm": 1.869870162067113, - "learning_rate": 3.974724196650656e-06, - "loss": 1.0959, - "step": 321 - }, - { - "epoch": 0.029039094557424358, - "grad_norm": 1.724950460745397, - "learning_rate": 3.976866308294617e-06, - "loss": 1.0396, - "step": 322 - }, - { - "epoch": 0.02912927808089462, - "grad_norm": 1.2317216579737313, - "learning_rate": 3.979001777721269e-06, - "loss": 1.014, - "step": 323 - }, - { - "epoch": 0.029219461604364883, - "grad_norm": 2.4416882649617566, - "learning_rate": 3.981130645995424e-06, - "loss": 1.0209, - "step": 324 - }, - { - "epoch": 0.029309645127835144, - "grad_norm": 1.7242044079231191, - "learning_rate": 3.983252953802248e-06, - "loss": 1.1271, - "step": 325 - }, - { - "epoch": 0.029399828651305405, - "grad_norm": 0.7330173767262766, - "learning_rate": 3.9853687414519285e-06, - "loss": 0.8965, - "step": 326 - }, - { - "epoch": 0.02949001217477567, - "grad_norm": 1.821134782653713, - "learning_rate": 3.987478048884265e-06, - "loss": 1.0872, - "step": 327 - }, - { - "epoch": 0.02958019569824593, - "grad_norm": 1.660007102100852, - "learning_rate": 3.989580915673196e-06, - "loss": 1.034, - "step": 328 - }, - { - "epoch": 0.02967037922171619, - "grad_norm": 2.317530135830159, - "learning_rate": 3.991677381031255e-06, - "loss": 1.0295, - "step": 329 - }, - { - "epoch": 0.029760562745186455, - "grad_norm": 1.571602666178107, - "learning_rate": 3.993767483813953e-06, - "loss": 1.1294, - "step": 330 - }, - { - "epoch": 0.029850746268656716, - "grad_norm": 1.5996678752705153, - "learning_rate": 3.995851262524104e-06, - "loss": 0.9697, - "step": 331 - }, - { - "epoch": 0.02994092979212698, - "grad_norm": 1.9642118192372477, - "learning_rate": 3.997928755316079e-06, - "loss": 1.0465, - "step": 332 - }, - { - "epoch": 0.03003111331559724, - "grad_norm": 0.9102195423041703, - "learning_rate": 4e-06, - "loss": 0.899, - "step": 333 - }, - { - "epoch": 0.030121296839067502, - "grad_norm": 2.0000743852133898, - "learning_rate": 3.999999914674486e-06, - "loss": 1.0183, - "step": 334 - }, - { - "epoch": 0.030211480362537766, - "grad_norm": 1.8936651708154602, - "learning_rate": 3.999999658697952e-06, - "loss": 1.024, - "step": 335 - }, - { - "epoch": 0.030301663886008027, - "grad_norm": 0.7282072749335269, - "learning_rate": 3.9999992320704185e-06, - "loss": 0.8222, - "step": 336 - }, - { - "epoch": 0.030391847409478288, - "grad_norm": 2.93091445661622, - "learning_rate": 3.999998634791922e-06, - "loss": 1.0669, - "step": 337 - }, - { - "epoch": 0.030482030932948552, - "grad_norm": 1.8001901976446608, - "learning_rate": 3.999997866862515e-06, - "loss": 1.0215, - "step": 338 - }, - { - "epoch": 0.030572214456418813, - "grad_norm": 1.47597980607441, - "learning_rate": 3.999996928282262e-06, - "loss": 1.1304, - "step": 339 - }, - { - "epoch": 0.030662397979889074, - "grad_norm": 2.8976760234142676, - "learning_rate": 3.999995819051244e-06, - "loss": 1.0436, - "step": 340 - }, - { - "epoch": 0.030752581503359338, - "grad_norm": 1.6382967459291884, - "learning_rate": 3.9999945391695536e-06, - "loss": 1.0114, - "step": 341 - }, - { - "epoch": 0.0308427650268296, - "grad_norm": 1.7336948195121797, - "learning_rate": 3.999993088637302e-06, - "loss": 1.1345, - "step": 342 - }, - { - "epoch": 0.03093294855029986, - "grad_norm": 1.7814880345895856, - "learning_rate": 3.999991467454612e-06, - "loss": 1.1083, - "step": 343 - }, - { - "epoch": 0.031023132073770124, - "grad_norm": 1.4247926393853185, - "learning_rate": 3.999989675621622e-06, - "loss": 0.9651, - "step": 344 - }, - { - "epoch": 0.031113315597240385, - "grad_norm": 1.7607525461815716, - "learning_rate": 3.999987713138485e-06, - "loss": 1.0568, - "step": 345 - }, - { - "epoch": 0.031203499120710645, - "grad_norm": 0.8314988488024773, - "learning_rate": 3.999985580005369e-06, - "loss": 0.9357, - "step": 346 - }, - { - "epoch": 0.031293682644180906, - "grad_norm": 2.03892569247414, - "learning_rate": 3.999983276222455e-06, - "loss": 1.1169, - "step": 347 - }, - { - "epoch": 0.03138386616765117, - "grad_norm": 1.8237077213691801, - "learning_rate": 3.999980801789941e-06, - "loss": 1.0602, - "step": 348 - }, - { - "epoch": 0.031474049691121435, - "grad_norm": 1.7114945989641115, - "learning_rate": 3.999978156708036e-06, - "loss": 1.0532, - "step": 349 - }, - { - "epoch": 0.031564233214591696, - "grad_norm": 2.0188954162188884, - "learning_rate": 3.9999753409769675e-06, - "loss": 1.1574, - "step": 350 - }, - { - "epoch": 0.031654416738061956, - "grad_norm": 0.7122231899422855, - "learning_rate": 3.999972354596975e-06, - "loss": 0.8437, - "step": 351 - }, - { - "epoch": 0.03174460026153222, - "grad_norm": 1.61543931318258, - "learning_rate": 3.999969197568314e-06, - "loss": 1.0012, - "step": 352 - }, - { - "epoch": 0.03183478378500248, - "grad_norm": 1.8142903791846086, - "learning_rate": 3.999965869891253e-06, - "loss": 1.101, - "step": 353 - }, - { - "epoch": 0.03192496730847274, - "grad_norm": 1.6927609557750931, - "learning_rate": 3.999962371566075e-06, - "loss": 1.0756, - "step": 354 - }, - { - "epoch": 0.03201515083194301, - "grad_norm": 1.6025159277399643, - "learning_rate": 3.999958702593082e-06, - "loss": 1.1144, - "step": 355 - }, - { - "epoch": 0.03210533435541327, - "grad_norm": 1.3398374329354088, - "learning_rate": 3.999954862972583e-06, - "loss": 1.0724, - "step": 356 - }, - { - "epoch": 0.03219551787888353, - "grad_norm": 1.691516546657276, - "learning_rate": 3.999950852704908e-06, - "loss": 0.9932, - "step": 357 - }, - { - "epoch": 0.03228570140235379, - "grad_norm": 1.6280901211390784, - "learning_rate": 3.9999466717903995e-06, - "loss": 1.1044, - "step": 358 - }, - { - "epoch": 0.03237588492582405, - "grad_norm": 0.6620369480545985, - "learning_rate": 3.999942320229413e-06, - "loss": 0.8594, - "step": 359 - }, - { - "epoch": 0.03246606844929431, - "grad_norm": 4.395103478369001, - "learning_rate": 3.99993779802232e-06, - "loss": 1.1732, - "step": 360 - }, - { - "epoch": 0.03255625197276458, - "grad_norm": 1.44347210806474, - "learning_rate": 3.999933105169506e-06, - "loss": 1.0329, - "step": 361 - }, - { - "epoch": 0.03264643549623484, - "grad_norm": 1.4455955434421228, - "learning_rate": 3.999928241671373e-06, - "loss": 1.0419, - "step": 362 - }, - { - "epoch": 0.0327366190197051, - "grad_norm": 1.9712148707034816, - "learning_rate": 3.999923207528334e-06, - "loss": 0.9485, - "step": 363 - }, - { - "epoch": 0.03282680254317536, - "grad_norm": 1.3295963888185858, - "learning_rate": 3.9999180027408196e-06, - "loss": 1.0468, - "step": 364 - }, - { - "epoch": 0.03291698606664562, - "grad_norm": 1.5529801851913385, - "learning_rate": 3.9999126273092735e-06, - "loss": 1.0176, - "step": 365 - }, - { - "epoch": 0.03300716959011588, - "grad_norm": 1.587889573963751, - "learning_rate": 3.999907081234156e-06, - "loss": 1.1317, - "step": 366 - }, - { - "epoch": 0.03309735311358615, - "grad_norm": 1.7189997404896096, - "learning_rate": 3.999901364515938e-06, - "loss": 1.01, - "step": 367 - }, - { - "epoch": 0.03318753663705641, - "grad_norm": 2.0390712694001443, - "learning_rate": 3.999895477155108e-06, - "loss": 1.0935, - "step": 368 - }, - { - "epoch": 0.03327772016052667, - "grad_norm": 0.7378218902029118, - "learning_rate": 3.999889419152169e-06, - "loss": 0.8361, - "step": 369 - }, - { - "epoch": 0.03336790368399693, - "grad_norm": 4.946934245773521, - "learning_rate": 3.999883190507638e-06, - "loss": 1.0423, - "step": 370 - }, - { - "epoch": 0.033458087207467194, - "grad_norm": 1.7217514187588308, - "learning_rate": 3.999876791222044e-06, - "loss": 1.0351, - "step": 371 - }, - { - "epoch": 0.033548270730937454, - "grad_norm": 1.8528612949523555, - "learning_rate": 3.999870221295936e-06, - "loss": 0.9953, - "step": 372 - }, - { - "epoch": 0.03363845425440772, - "grad_norm": 1.7045380216494248, - "learning_rate": 3.999863480729875e-06, - "loss": 1.0152, - "step": 373 - }, - { - "epoch": 0.03372863777787798, - "grad_norm": 2.674311904109434, - "learning_rate": 3.999856569524433e-06, - "loss": 0.9886, - "step": 374 - }, - { - "epoch": 0.033818821301348244, - "grad_norm": 1.4958745332673975, - "learning_rate": 3.999849487680202e-06, - "loss": 0.9923, - "step": 375 - }, - { - "epoch": 0.033909004824818505, - "grad_norm": 1.4777426872453239, - "learning_rate": 3.999842235197786e-06, - "loss": 0.9794, - "step": 376 - }, - { - "epoch": 0.033999188348288766, - "grad_norm": 2.811458309965545, - "learning_rate": 3.999834812077803e-06, - "loss": 0.9813, - "step": 377 - }, - { - "epoch": 0.034089371871759026, - "grad_norm": 1.7533988643530736, - "learning_rate": 3.999827218320886e-06, - "loss": 1.1032, - "step": 378 - }, - { - "epoch": 0.034179555395229294, - "grad_norm": 1.7858177645537634, - "learning_rate": 3.999819453927685e-06, - "loss": 0.9627, - "step": 379 - }, - { - "epoch": 0.034269738918699555, - "grad_norm": 1.9416516031927953, - "learning_rate": 3.999811518898861e-06, - "loss": 1.0547, - "step": 380 - }, - { - "epoch": 0.034359922442169816, - "grad_norm": 3.0306376989109975, - "learning_rate": 3.999803413235092e-06, - "loss": 1.0568, - "step": 381 - }, - { - "epoch": 0.03445010596564008, - "grad_norm": 1.5410085080832174, - "learning_rate": 3.999795136937068e-06, - "loss": 1.1533, - "step": 382 - }, - { - "epoch": 0.03454028948911034, - "grad_norm": 2.9234259258063573, - "learning_rate": 3.999786690005496e-06, - "loss": 1.0408, - "step": 383 - }, - { - "epoch": 0.0346304730125806, - "grad_norm": 1.4978848485348315, - "learning_rate": 3.999778072441098e-06, - "loss": 1.0267, - "step": 384 - }, - { - "epoch": 0.034720656536050866, - "grad_norm": 3.257087022976841, - "learning_rate": 3.999769284244608e-06, - "loss": 1.0756, - "step": 385 - }, - { - "epoch": 0.03481084005952113, - "grad_norm": 1.393711821713166, - "learning_rate": 3.999760325416775e-06, - "loss": 1.0592, - "step": 386 - }, - { - "epoch": 0.03490102358299139, - "grad_norm": 1.5274840775829013, - "learning_rate": 3.999751195958366e-06, - "loss": 1.13, - "step": 387 - }, - { - "epoch": 0.03499120710646165, - "grad_norm": 0.7720688163420513, - "learning_rate": 3.999741895870157e-06, - "loss": 0.8638, - "step": 388 - }, - { - "epoch": 0.03508139062993191, - "grad_norm": 2.1353955399623423, - "learning_rate": 3.999732425152944e-06, - "loss": 1.09, - "step": 389 - }, - { - "epoch": 0.03517157415340217, - "grad_norm": 1.3235446260550185, - "learning_rate": 3.999722783807533e-06, - "loss": 1.1106, - "step": 390 - }, - { - "epoch": 0.03526175767687244, - "grad_norm": 2.6217727804888447, - "learning_rate": 3.999712971834748e-06, - "loss": 0.9835, - "step": 391 - }, - { - "epoch": 0.0353519412003427, - "grad_norm": 1.4087527997209945, - "learning_rate": 3.999702989235427e-06, - "loss": 1.0545, - "step": 392 - }, - { - "epoch": 0.03544212472381296, - "grad_norm": 1.718157024397316, - "learning_rate": 3.999692836010419e-06, - "loss": 1.1355, - "step": 393 - }, - { - "epoch": 0.03553230824728322, - "grad_norm": 2.118455908065524, - "learning_rate": 3.999682512160593e-06, - "loss": 1.1844, - "step": 394 - }, - { - "epoch": 0.03562249177075348, - "grad_norm": 0.7971283379063583, - "learning_rate": 3.99967201768683e-06, - "loss": 0.8473, - "step": 395 - }, - { - "epoch": 0.03571267529422374, - "grad_norm": 1.7798303669026243, - "learning_rate": 3.999661352590023e-06, - "loss": 1.04, - "step": 396 - }, - { - "epoch": 0.03580285881769401, - "grad_norm": 2.443475490284004, - "learning_rate": 3.999650516871083e-06, - "loss": 1.0491, - "step": 397 - }, - { - "epoch": 0.03589304234116427, - "grad_norm": 2.688672541837597, - "learning_rate": 3.9996395105309365e-06, - "loss": 1.0821, - "step": 398 - }, - { - "epoch": 0.03598322586463453, - "grad_norm": 1.5859540365324754, - "learning_rate": 3.99962833357052e-06, - "loss": 1.0691, - "step": 399 - }, - { - "epoch": 0.03607340938810479, - "grad_norm": 1.9658998112301653, - "learning_rate": 3.999616985990789e-06, - "loss": 1.0315, - "step": 400 - }, - { - "epoch": 0.03616359291157505, - "grad_norm": 1.3503883862773423, - "learning_rate": 3.9996054677927104e-06, - "loss": 0.9926, - "step": 401 - }, - { - "epoch": 0.03625377643504532, - "grad_norm": 3.574357227081814, - "learning_rate": 3.9995937789772675e-06, - "loss": 0.9878, - "step": 402 - }, - { - "epoch": 0.03634395995851558, - "grad_norm": 0.6904189609411961, - "learning_rate": 3.999581919545458e-06, - "loss": 0.8494, - "step": 403 - }, - { - "epoch": 0.03643414348198584, - "grad_norm": 2.0216626959392467, - "learning_rate": 3.9995698894982935e-06, - "loss": 1.0744, - "step": 404 - }, - { - "epoch": 0.0365243270054561, - "grad_norm": 2.109752140944979, - "learning_rate": 3.9995576888368e-06, - "loss": 1.1019, - "step": 405 - }, - { - "epoch": 0.036614510528926364, - "grad_norm": 1.5480467086664842, - "learning_rate": 3.9995453175620194e-06, - "loss": 1.0445, - "step": 406 - }, - { - "epoch": 0.036704694052396625, - "grad_norm": 3.359837699784989, - "learning_rate": 3.999532775675007e-06, - "loss": 0.8984, - "step": 407 - }, - { - "epoch": 0.03679487757586689, - "grad_norm": 2.2411450877104544, - "learning_rate": 3.9995200631768326e-06, - "loss": 1.0282, - "step": 408 - }, - { - "epoch": 0.03688506109933715, - "grad_norm": 1.9657615505026158, - "learning_rate": 3.9995071800685815e-06, - "loss": 1.1781, - "step": 409 - }, - { - "epoch": 0.036975244622807414, - "grad_norm": 2.1381435072565385, - "learning_rate": 3.999494126351352e-06, - "loss": 1.0221, - "step": 410 - }, - { - "epoch": 0.037065428146277675, - "grad_norm": 2.484189488424382, - "learning_rate": 3.99948090202626e-06, - "loss": 1.0529, - "step": 411 - }, - { - "epoch": 0.037155611669747936, - "grad_norm": 1.9226761055439978, - "learning_rate": 3.999467507094431e-06, - "loss": 1.0564, - "step": 412 - }, - { - "epoch": 0.0372457951932182, - "grad_norm": 1.430528070541753, - "learning_rate": 3.999453941557011e-06, - "loss": 0.959, - "step": 413 - }, - { - "epoch": 0.037335978716688464, - "grad_norm": 0.738733183577401, - "learning_rate": 3.999440205415154e-06, - "loss": 0.8189, - "step": 414 - }, - { - "epoch": 0.037426162240158725, - "grad_norm": 1.929902408385705, - "learning_rate": 3.999426298670035e-06, - "loss": 1.0914, - "step": 415 - }, - { - "epoch": 0.037516345763628986, - "grad_norm": 1.3756837878562544, - "learning_rate": 3.9994122213228385e-06, - "loss": 1.0904, - "step": 416 - }, - { - "epoch": 0.03760652928709925, - "grad_norm": 1.9826863831196258, - "learning_rate": 3.9993979733747675e-06, - "loss": 1.0776, - "step": 417 - }, - { - "epoch": 0.03769671281056951, - "grad_norm": 1.9111112301971833, - "learning_rate": 3.999383554827037e-06, - "loss": 1.0495, - "step": 418 - }, - { - "epoch": 0.03778689633403977, - "grad_norm": 1.7251163830403384, - "learning_rate": 3.999368965680876e-06, - "loss": 1.0108, - "step": 419 - }, - { - "epoch": 0.037877079857510036, - "grad_norm": 1.9934562681982753, - "learning_rate": 3.999354205937531e-06, - "loss": 1.0874, - "step": 420 - }, - { - "epoch": 0.0379672633809803, - "grad_norm": 1.7059854407142028, - "learning_rate": 3.999339275598261e-06, - "loss": 1.0934, - "step": 421 - }, - { - "epoch": 0.03805744690445056, - "grad_norm": 1.7113585628962564, - "learning_rate": 3.99932417466434e-06, - "loss": 1.0289, - "step": 422 - }, - { - "epoch": 0.03814763042792082, - "grad_norm": 2.3349309175062047, - "learning_rate": 3.999308903137056e-06, - "loss": 0.9634, - "step": 423 - }, - { - "epoch": 0.03823781395139108, - "grad_norm": 1.8258004040217342, - "learning_rate": 3.999293461017711e-06, - "loss": 1.0964, - "step": 424 - }, - { - "epoch": 0.03832799747486134, - "grad_norm": 3.3626753807418814, - "learning_rate": 3.9992778483076255e-06, - "loss": 1.0375, - "step": 425 - }, - { - "epoch": 0.03841818099833161, - "grad_norm": 1.5317188245895292, - "learning_rate": 3.99926206500813e-06, - "loss": 1.106, - "step": 426 - }, - { - "epoch": 0.03850836452180187, - "grad_norm": 1.572327872045167, - "learning_rate": 3.999246111120571e-06, - "loss": 0.9791, - "step": 427 - }, - { - "epoch": 0.03859854804527213, - "grad_norm": 1.450139085084209, - "learning_rate": 3.999229986646311e-06, - "loss": 1.1166, - "step": 428 - }, - { - "epoch": 0.03868873156874239, - "grad_norm": 2.224757697809998, - "learning_rate": 3.999213691586723e-06, - "loss": 1.0154, - "step": 429 - }, - { - "epoch": 0.03877891509221265, - "grad_norm": 1.4218480872656925, - "learning_rate": 3.9991972259432e-06, - "loss": 1.086, - "step": 430 - }, - { - "epoch": 0.03886909861568291, - "grad_norm": 1.5869818945839245, - "learning_rate": 3.999180589717147e-06, - "loss": 1.1273, - "step": 431 - }, - { - "epoch": 0.03895928213915318, - "grad_norm": 1.9530043297683997, - "learning_rate": 3.999163782909983e-06, - "loss": 0.9653, - "step": 432 - }, - { - "epoch": 0.03904946566262344, - "grad_norm": 2.0220754155584935, - "learning_rate": 3.99914680552314e-06, - "loss": 1.0833, - "step": 433 - }, - { - "epoch": 0.0391396491860937, - "grad_norm": 1.6667369430348695, - "learning_rate": 3.999129657558069e-06, - "loss": 0.9838, - "step": 434 - }, - { - "epoch": 0.03922983270956396, - "grad_norm": 0.7155410200925515, - "learning_rate": 3.999112339016234e-06, - "loss": 0.8253, - "step": 435 - }, - { - "epoch": 0.03932001623303422, - "grad_norm": 1.5252350203974794, - "learning_rate": 3.999094849899109e-06, - "loss": 1.0569, - "step": 436 - }, - { - "epoch": 0.039410199756504484, - "grad_norm": 1.362508166577424, - "learning_rate": 3.99907719020819e-06, - "loss": 0.9671, - "step": 437 - }, - { - "epoch": 0.03950038327997475, - "grad_norm": 1.4740049626643414, - "learning_rate": 3.999059359944982e-06, - "loss": 1.0821, - "step": 438 - }, - { - "epoch": 0.03959056680344501, - "grad_norm": 1.4885736613731488, - "learning_rate": 3.999041359111007e-06, - "loss": 1.1063, - "step": 439 - }, - { - "epoch": 0.03968075032691527, - "grad_norm": 1.9917402176729888, - "learning_rate": 3.999023187707801e-06, - "loss": 1.1298, - "step": 440 - }, - { - "epoch": 0.039770933850385534, - "grad_norm": 1.603796020228106, - "learning_rate": 3.999004845736913e-06, - "loss": 1.1083, - "step": 441 - }, - { - "epoch": 0.039861117373855795, - "grad_norm": 1.96065437127082, - "learning_rate": 3.9989863331999096e-06, - "loss": 1.1226, - "step": 442 - }, - { - "epoch": 0.039951300897326056, - "grad_norm": 3.668114867463695, - "learning_rate": 3.99896765009837e-06, - "loss": 0.983, - "step": 443 - }, - { - "epoch": 0.040041484420796324, - "grad_norm": 1.8345719473811317, - "learning_rate": 3.998948796433888e-06, - "loss": 1.0144, - "step": 444 - }, - { - "epoch": 0.040131667944266584, - "grad_norm": 2.2302777263661255, - "learning_rate": 3.998929772208073e-06, - "loss": 1.0909, - "step": 445 - }, - { - "epoch": 0.040221851467736845, - "grad_norm": 0.7029730738642306, - "learning_rate": 3.998910577422547e-06, - "loss": 0.8691, - "step": 446 - }, - { - "epoch": 0.040312034991207106, - "grad_norm": 1.5506373417957806, - "learning_rate": 3.99889121207895e-06, - "loss": 1.0984, - "step": 447 - }, - { - "epoch": 0.04040221851467737, - "grad_norm": 1.5309711903129455, - "learning_rate": 3.9988716761789324e-06, - "loss": 1.0807, - "step": 448 - }, - { - "epoch": 0.04049240203814763, - "grad_norm": 2.7222111035680014, - "learning_rate": 3.998851969724161e-06, - "loss": 0.9884, - "step": 449 - }, - { - "epoch": 0.040582585561617895, - "grad_norm": 3.383400247096944, - "learning_rate": 3.998832092716319e-06, - "loss": 1.099, - "step": 450 - }, - { - "epoch": 0.040672769085088156, - "grad_norm": 1.290322537575998, - "learning_rate": 3.998812045157102e-06, - "loss": 1.025, - "step": 451 - }, - { - "epoch": 0.04076295260855842, - "grad_norm": 2.9108392720380767, - "learning_rate": 3.998791827048219e-06, - "loss": 1.1051, - "step": 452 - }, - { - "epoch": 0.04085313613202868, - "grad_norm": 2.2922923765522603, - "learning_rate": 3.998771438391396e-06, - "loss": 0.9912, - "step": 453 - }, - { - "epoch": 0.04094331965549894, - "grad_norm": 0.6459922184903605, - "learning_rate": 3.9987508791883725e-06, - "loss": 0.8325, - "step": 454 - }, - { - "epoch": 0.0410335031789692, - "grad_norm": 1.8321734863245016, - "learning_rate": 3.998730149440904e-06, - "loss": 1.1648, - "step": 455 - }, - { - "epoch": 0.04112368670243947, - "grad_norm": 1.6486031688587075, - "learning_rate": 3.998709249150758e-06, - "loss": 1.1041, - "step": 456 - }, - { - "epoch": 0.04121387022590973, - "grad_norm": 1.559732580372283, - "learning_rate": 3.998688178319717e-06, - "loss": 1.0688, - "step": 457 - }, - { - "epoch": 0.04130405374937999, - "grad_norm": 1.5678133079218024, - "learning_rate": 3.9986669369495805e-06, - "loss": 1.0328, - "step": 458 - }, - { - "epoch": 0.04139423727285025, - "grad_norm": 2.340180183385555, - "learning_rate": 3.998645525042161e-06, - "loss": 1.1068, - "step": 459 - }, - { - "epoch": 0.04148442079632051, - "grad_norm": 1.808949577641022, - "learning_rate": 3.998623942599284e-06, - "loss": 1.1234, - "step": 460 - }, - { - "epoch": 0.04157460431979077, - "grad_norm": 1.8951821379081082, - "learning_rate": 3.998602189622793e-06, - "loss": 1.0465, - "step": 461 - }, - { - "epoch": 0.04166478784326104, - "grad_norm": 1.6971057601459625, - "learning_rate": 3.998580266114542e-06, - "loss": 1.0889, - "step": 462 - }, - { - "epoch": 0.0417549713667313, - "grad_norm": 1.8140535602464303, - "learning_rate": 3.998558172076404e-06, - "loss": 0.9519, - "step": 463 - }, - { - "epoch": 0.04184515489020156, - "grad_norm": 1.7935867075744876, - "learning_rate": 3.998535907510262e-06, - "loss": 0.9874, - "step": 464 - }, - { - "epoch": 0.04193533841367182, - "grad_norm": 1.8927371624556715, - "learning_rate": 3.998513472418016e-06, - "loss": 1.0702, - "step": 465 - }, - { - "epoch": 0.04202552193714208, - "grad_norm": 1.6333120941545805, - "learning_rate": 3.998490866801582e-06, - "loss": 1.0864, - "step": 466 - }, - { - "epoch": 0.04211570546061234, - "grad_norm": 1.564059128124097, - "learning_rate": 3.998468090662886e-06, - "loss": 1.0389, - "step": 467 - }, - { - "epoch": 0.04220588898408261, - "grad_norm": 1.6032236555948796, - "learning_rate": 3.998445144003874e-06, - "loss": 0.946, - "step": 468 - }, - { - "epoch": 0.04229607250755287, - "grad_norm": 1.69608461578735, - "learning_rate": 3.998422026826504e-06, - "loss": 1.0726, - "step": 469 - }, - { - "epoch": 0.04238625603102313, - "grad_norm": 1.7225831723445262, - "learning_rate": 3.998398739132746e-06, - "loss": 1.0821, - "step": 470 - }, - { - "epoch": 0.04247643955449339, - "grad_norm": 1.4846202446974701, - "learning_rate": 3.99837528092459e-06, - "loss": 1.0053, - "step": 471 - }, - { - "epoch": 0.042566623077963654, - "grad_norm": 1.582802440916372, - "learning_rate": 3.998351652204034e-06, - "loss": 1.1056, - "step": 472 - }, - { - "epoch": 0.042656806601433915, - "grad_norm": 1.5344531752178152, - "learning_rate": 3.998327852973098e-06, - "loss": 1.0869, - "step": 473 - }, - { - "epoch": 0.04274699012490418, - "grad_norm": 2.0271826296013202, - "learning_rate": 3.99830388323381e-06, - "loss": 1.0027, - "step": 474 - }, - { - "epoch": 0.042837173648374444, - "grad_norm": 1.411842584799357, - "learning_rate": 3.998279742988216e-06, - "loss": 1.0774, - "step": 475 - }, - { - "epoch": 0.042927357171844704, - "grad_norm": 1.355037433698397, - "learning_rate": 3.998255432238377e-06, - "loss": 1.0172, - "step": 476 - }, - { - "epoch": 0.043017540695314965, - "grad_norm": 1.5878144230687359, - "learning_rate": 3.9982309509863656e-06, - "loss": 1.1217, - "step": 477 - }, - { - "epoch": 0.043107724218785226, - "grad_norm": 1.925783664167953, - "learning_rate": 3.998206299234272e-06, - "loss": 1.007, - "step": 478 - }, - { - "epoch": 0.04319790774225549, - "grad_norm": 1.484887687083627, - "learning_rate": 3.998181476984198e-06, - "loss": 1.0699, - "step": 479 - }, - { - "epoch": 0.043288091265725755, - "grad_norm": 1.6475436422214411, - "learning_rate": 3.998156484238263e-06, - "loss": 0.9769, - "step": 480 - }, - { - "epoch": 0.043378274789196015, - "grad_norm": 12.960365324169588, - "learning_rate": 3.998131320998599e-06, - "loss": 0.9856, - "step": 481 - }, - { - "epoch": 0.043468458312666276, - "grad_norm": 2.1132690654974526, - "learning_rate": 3.998105987267353e-06, - "loss": 1.096, - "step": 482 - }, - { - "epoch": 0.04355864183613654, - "grad_norm": 1.7233867869520167, - "learning_rate": 3.998080483046687e-06, - "loss": 0.9785, - "step": 483 - }, - { - "epoch": 0.0436488253596068, - "grad_norm": 1.386776216417664, - "learning_rate": 3.998054808338776e-06, - "loss": 0.9949, - "step": 484 - }, - { - "epoch": 0.04373900888307706, - "grad_norm": 1.481125738669952, - "learning_rate": 3.998028963145812e-06, - "loss": 1.0136, - "step": 485 - }, - { - "epoch": 0.043829192406547327, - "grad_norm": 1.5048212611875034, - "learning_rate": 3.99800294747e-06, - "loss": 1.0686, - "step": 486 - }, - { - "epoch": 0.04391937593001759, - "grad_norm": 1.33127297215946, - "learning_rate": 3.99797676131356e-06, - "loss": 1.0754, - "step": 487 - }, - { - "epoch": 0.04400955945348785, - "grad_norm": 1.8731873969707147, - "learning_rate": 3.997950404678726e-06, - "loss": 0.9033, - "step": 488 - }, - { - "epoch": 0.04409974297695811, - "grad_norm": 1.6346016166935726, - "learning_rate": 3.997923877567746e-06, - "loss": 1.1678, - "step": 489 - }, - { - "epoch": 0.04418992650042837, - "grad_norm": 1.414575304655607, - "learning_rate": 3.9978971799828855e-06, - "loss": 0.8856, - "step": 490 - }, - { - "epoch": 0.04428011002389863, - "grad_norm": 1.8776480096009407, - "learning_rate": 3.997870311926421e-06, - "loss": 1.0412, - "step": 491 - }, - { - "epoch": 0.0443702935473689, - "grad_norm": 1.565741876134334, - "learning_rate": 3.997843273400645e-06, - "loss": 1.0045, - "step": 492 - }, - { - "epoch": 0.04446047707083916, - "grad_norm": 1.7767375876542097, - "learning_rate": 3.997816064407865e-06, - "loss": 1.0273, - "step": 493 - }, - { - "epoch": 0.04455066059430942, - "grad_norm": 2.023926782550265, - "learning_rate": 3.997788684950402e-06, - "loss": 1.0729, - "step": 494 - }, - { - "epoch": 0.04464084411777968, - "grad_norm": 1.5497560739855096, - "learning_rate": 3.997761135030593e-06, - "loss": 1.0916, - "step": 495 - }, - { - "epoch": 0.04473102764124994, - "grad_norm": 1.4700292332655407, - "learning_rate": 3.997733414650789e-06, - "loss": 0.9489, - "step": 496 - }, - { - "epoch": 0.0448212111647202, - "grad_norm": 1.859463921993168, - "learning_rate": 3.9977055238133554e-06, - "loss": 1.0108, - "step": 497 - }, - { - "epoch": 0.04491139468819047, - "grad_norm": 1.6152986562131593, - "learning_rate": 3.99767746252067e-06, - "loss": 0.9253, - "step": 498 - }, - { - "epoch": 0.04500157821166073, - "grad_norm": 1.5264584836790125, - "learning_rate": 3.997649230775129e-06, - "loss": 1.0479, - "step": 499 - }, - { - "epoch": 0.04509176173513099, - "grad_norm": 1.6275197080951238, - "learning_rate": 3.9976208285791395e-06, - "loss": 0.9933, - "step": 500 - }, - { - "epoch": 0.04518194525860125, - "grad_norm": 0.8726612237773187, - "learning_rate": 3.997592255935127e-06, - "loss": 0.972, - "step": 501 - }, - { - "epoch": 0.045272128782071513, - "grad_norm": 1.6533913323909475, - "learning_rate": 3.997563512845529e-06, - "loss": 1.0726, - "step": 502 - }, - { - "epoch": 0.045362312305541774, - "grad_norm": 1.631434173710366, - "learning_rate": 3.9975345993127975e-06, - "loss": 1.0066, - "step": 503 - }, - { - "epoch": 0.04545249582901204, - "grad_norm": 1.619846240858884, - "learning_rate": 3.9975055153393985e-06, - "loss": 1.0622, - "step": 504 - }, - { - "epoch": 0.0455426793524823, - "grad_norm": 2.6386695257553385, - "learning_rate": 3.997476260927816e-06, - "loss": 1.0636, - "step": 505 - }, - { - "epoch": 0.045632862875952564, - "grad_norm": 1.7944934487253807, - "learning_rate": 3.997446836080545e-06, - "loss": 1.0273, - "step": 506 - }, - { - "epoch": 0.045723046399422825, - "grad_norm": 1.9626000032640094, - "learning_rate": 3.997417240800095e-06, - "loss": 1.102, - "step": 507 - }, - { - "epoch": 0.045813229922893085, - "grad_norm": 1.6556922405293566, - "learning_rate": 3.997387475088994e-06, - "loss": 1.1501, - "step": 508 - }, - { - "epoch": 0.045903413446363346, - "grad_norm": 1.862175315359998, - "learning_rate": 3.99735753894978e-06, - "loss": 1.1377, - "step": 509 - }, - { - "epoch": 0.045993596969833614, - "grad_norm": 1.8033343770393742, - "learning_rate": 3.997327432385006e-06, - "loss": 1.0436, - "step": 510 - }, - { - "epoch": 0.046083780493303875, - "grad_norm": 1.6642755363101682, - "learning_rate": 3.997297155397244e-06, - "loss": 0.9783, - "step": 511 - }, - { - "epoch": 0.046173964016774136, - "grad_norm": 2.4005856911942556, - "learning_rate": 3.997266707989074e-06, - "loss": 0.9716, - "step": 512 - }, - { - "epoch": 0.046264147540244396, - "grad_norm": 1.9017046534393724, - "learning_rate": 3.997236090163097e-06, - "loss": 0.9611, - "step": 513 - }, - { - "epoch": 0.04635433106371466, - "grad_norm": 1.9579345080573551, - "learning_rate": 3.9972053019219235e-06, - "loss": 1.1778, - "step": 514 - }, - { - "epoch": 0.04644451458718492, - "grad_norm": 1.8806741054788512, - "learning_rate": 3.997174343268181e-06, - "loss": 1.0657, - "step": 515 - }, - { - "epoch": 0.046534698110655186, - "grad_norm": 1.5164271326214178, - "learning_rate": 3.9971432142045115e-06, - "loss": 1.057, - "step": 516 - }, - { - "epoch": 0.04662488163412545, - "grad_norm": 0.8084949304024235, - "learning_rate": 3.99711191473357e-06, - "loss": 0.9264, - "step": 517 - }, - { - "epoch": 0.04671506515759571, - "grad_norm": 1.9487305910955863, - "learning_rate": 3.99708044485803e-06, - "loss": 1.0684, - "step": 518 - }, - { - "epoch": 0.04680524868106597, - "grad_norm": 1.8925206792752292, - "learning_rate": 3.997048804580574e-06, - "loss": 1.0603, - "step": 519 - }, - { - "epoch": 0.04689543220453623, - "grad_norm": 1.9792778452381345, - "learning_rate": 3.997016993903901e-06, - "loss": 0.9935, - "step": 520 - }, - { - "epoch": 0.04698561572800649, - "grad_norm": 1.422410884941899, - "learning_rate": 3.996985012830728e-06, - "loss": 1.1193, - "step": 521 - }, - { - "epoch": 0.04707579925147676, - "grad_norm": 1.4876250174422792, - "learning_rate": 3.996952861363782e-06, - "loss": 0.9986, - "step": 522 - }, - { - "epoch": 0.04716598277494702, - "grad_norm": 1.4060740466771566, - "learning_rate": 3.9969205395058064e-06, - "loss": 1.0547, - "step": 523 - }, - { - "epoch": 0.04725616629841728, - "grad_norm": 1.5410936768543482, - "learning_rate": 3.99688804725956e-06, - "loss": 1.0397, - "step": 524 - }, - { - "epoch": 0.04734634982188754, - "grad_norm": 1.4003062202842318, - "learning_rate": 3.996855384627815e-06, - "loss": 1.0016, - "step": 525 - }, - { - "epoch": 0.0474365333453578, - "grad_norm": 1.7428567738387457, - "learning_rate": 3.996822551613357e-06, - "loss": 1.0775, - "step": 526 - }, - { - "epoch": 0.04752671686882806, - "grad_norm": 2.012561806624129, - "learning_rate": 3.996789548218989e-06, - "loss": 0.9779, - "step": 527 - }, - { - "epoch": 0.04761690039229833, - "grad_norm": 1.843742758526563, - "learning_rate": 3.996756374447526e-06, - "loss": 0.9385, - "step": 528 - }, - { - "epoch": 0.04770708391576859, - "grad_norm": 4.651995667472783, - "learning_rate": 3.9967230303018005e-06, - "loss": 1.0624, - "step": 529 - }, - { - "epoch": 0.04779726743923885, - "grad_norm": 1.4038821312260708, - "learning_rate": 3.996689515784655e-06, - "loss": 1.0282, - "step": 530 - }, - { - "epoch": 0.04788745096270911, - "grad_norm": 2.409761912824888, - "learning_rate": 3.996655830898951e-06, - "loss": 1.0202, - "step": 531 - }, - { - "epoch": 0.04797763448617937, - "grad_norm": 1.6519891886045615, - "learning_rate": 3.996621975647562e-06, - "loss": 1.0556, - "step": 532 - }, - { - "epoch": 0.04806781800964964, - "grad_norm": 1.2448977288213716, - "learning_rate": 3.996587950033377e-06, - "loss": 0.9892, - "step": 533 - }, - { - "epoch": 0.0481580015331199, - "grad_norm": 1.858706698807965, - "learning_rate": 3.996553754059299e-06, - "loss": 0.9744, - "step": 534 - }, - { - "epoch": 0.04824818505659016, - "grad_norm": 1.695625522387066, - "learning_rate": 3.996519387728245e-06, - "loss": 1.0462, - "step": 535 - }, - { - "epoch": 0.04833836858006042, - "grad_norm": 1.3525220078036893, - "learning_rate": 3.9964848510431495e-06, - "loss": 1.0339, - "step": 536 - }, - { - "epoch": 0.048428552103530684, - "grad_norm": 1.5116510104988234, - "learning_rate": 3.996450144006957e-06, - "loss": 1.1024, - "step": 537 - }, - { - "epoch": 0.048518735627000945, - "grad_norm": 11.622703407429372, - "learning_rate": 3.99641526662263e-06, - "loss": 1.0254, - "step": 538 - }, - { - "epoch": 0.04860891915047121, - "grad_norm": 4.487669900637185, - "learning_rate": 3.996380218893145e-06, - "loss": 1.1294, - "step": 539 - }, - { - "epoch": 0.04869910267394147, - "grad_norm": 1.5542084995943053, - "learning_rate": 3.996345000821491e-06, - "loss": 1.0776, - "step": 540 - }, - { - "epoch": 0.048789286197411734, - "grad_norm": 1.9738416679215163, - "learning_rate": 3.996309612410674e-06, - "loss": 1.1537, - "step": 541 - }, - { - "epoch": 0.048879469720881995, - "grad_norm": 2.268068561203901, - "learning_rate": 3.996274053663713e-06, - "loss": 0.9837, - "step": 542 - }, - { - "epoch": 0.048969653244352256, - "grad_norm": 0.8113114760724254, - "learning_rate": 3.996238324583643e-06, - "loss": 0.8855, - "step": 543 - }, - { - "epoch": 0.049059836767822516, - "grad_norm": 6.285548622871238, - "learning_rate": 3.996202425173512e-06, - "loss": 1.1091, - "step": 544 - }, - { - "epoch": 0.049150020291292784, - "grad_norm": 1.7390654091754993, - "learning_rate": 3.996166355436383e-06, - "loss": 1.0034, - "step": 545 - }, - { - "epoch": 0.049240203814763045, - "grad_norm": 1.917943826113357, - "learning_rate": 3.996130115375333e-06, - "loss": 1.0609, - "step": 546 - }, - { - "epoch": 0.049330387338233306, - "grad_norm": 0.870561616812647, - "learning_rate": 3.996093704993456e-06, - "loss": 0.8863, - "step": 547 - }, - { - "epoch": 0.04942057086170357, - "grad_norm": 1.6609665991027516, - "learning_rate": 3.996057124293857e-06, - "loss": 1.1305, - "step": 548 - }, - { - "epoch": 0.04951075438517383, - "grad_norm": 2.0679576220643527, - "learning_rate": 3.996020373279659e-06, - "loss": 1.088, - "step": 549 - }, - { - "epoch": 0.04960093790864409, - "grad_norm": 1.3842867734880542, - "learning_rate": 3.995983451953996e-06, - "loss": 1.0569, - "step": 550 - }, - { - "epoch": 0.049691121432114356, - "grad_norm": 2.006522628941753, - "learning_rate": 3.99594636032002e-06, - "loss": 1.0716, - "step": 551 - }, - { - "epoch": 0.04978130495558462, - "grad_norm": 1.9379496514085843, - "learning_rate": 3.995909098380894e-06, - "loss": 0.9408, - "step": 552 - }, - { - "epoch": 0.04987148847905488, - "grad_norm": 1.4436128262174914, - "learning_rate": 3.995871666139799e-06, - "loss": 0.9277, - "step": 553 - }, - { - "epoch": 0.04996167200252514, - "grad_norm": 1.7021574719397718, - "learning_rate": 3.995834063599928e-06, - "loss": 1.0237, - "step": 554 - }, - { - "epoch": 0.0500518555259954, - "grad_norm": 1.354792709401024, - "learning_rate": 3.99579629076449e-06, - "loss": 1.0691, - "step": 555 - }, - { - "epoch": 0.05014203904946566, - "grad_norm": 1.7301851819545158, - "learning_rate": 3.9957583476367084e-06, - "loss": 1.0227, - "step": 556 - }, - { - "epoch": 0.05023222257293593, - "grad_norm": 3.858102071160339, - "learning_rate": 3.995720234219819e-06, - "loss": 1.0568, - "step": 557 - }, - { - "epoch": 0.05032240609640619, - "grad_norm": 1.5900662803030683, - "learning_rate": 3.995681950517075e-06, - "loss": 0.9527, - "step": 558 - }, - { - "epoch": 0.05041258961987645, - "grad_norm": 1.6986718010043806, - "learning_rate": 3.995643496531743e-06, - "loss": 1.0687, - "step": 559 - }, - { - "epoch": 0.05050277314334671, - "grad_norm": 1.863518593824604, - "learning_rate": 3.9956048722671044e-06, - "loss": 1.0644, - "step": 560 - }, - { - "epoch": 0.05059295666681697, - "grad_norm": 1.3914893817859726, - "learning_rate": 3.995566077726454e-06, - "loss": 1.0224, - "step": 561 - }, - { - "epoch": 0.05068314019028723, - "grad_norm": 2.3240833403307066, - "learning_rate": 3.995527112913103e-06, - "loss": 1.0092, - "step": 562 - }, - { - "epoch": 0.0507733237137575, - "grad_norm": 1.5490133744777392, - "learning_rate": 3.995487977830375e-06, - "loss": 1.0006, - "step": 563 - }, - { - "epoch": 0.05086350723722776, - "grad_norm": 1.5340682583559202, - "learning_rate": 3.9954486724816105e-06, - "loss": 1.1422, - "step": 564 - }, - { - "epoch": 0.05095369076069802, - "grad_norm": 1.90761461036112, - "learning_rate": 3.995409196870161e-06, - "loss": 1.0901, - "step": 565 - }, - { - "epoch": 0.05104387428416828, - "grad_norm": 0.7660802538665432, - "learning_rate": 3.995369550999398e-06, - "loss": 0.8579, - "step": 566 - }, - { - "epoch": 0.05113405780763854, - "grad_norm": 1.6943594367516663, - "learning_rate": 3.995329734872702e-06, - "loss": 1.0644, - "step": 567 - }, - { - "epoch": 0.051224241331108804, - "grad_norm": 1.3907981079026157, - "learning_rate": 3.9952897484934706e-06, - "loss": 1.161, - "step": 568 - }, - { - "epoch": 0.05131442485457907, - "grad_norm": 2.8809681189751597, - "learning_rate": 3.995249591865115e-06, - "loss": 1.0576, - "step": 569 - }, - { - "epoch": 0.05140460837804933, - "grad_norm": 1.6207635382340313, - "learning_rate": 3.995209264991063e-06, - "loss": 1.0047, - "step": 570 - }, - { - "epoch": 0.05149479190151959, - "grad_norm": 1.5800702786810747, - "learning_rate": 3.995168767874756e-06, - "loss": 1.0946, - "step": 571 - }, - { - "epoch": 0.051584975424989854, - "grad_norm": 1.3653833747291921, - "learning_rate": 3.995128100519648e-06, - "loss": 1.0314, - "step": 572 - }, - { - "epoch": 0.051675158948460115, - "grad_norm": 1.562224859808523, - "learning_rate": 3.995087262929209e-06, - "loss": 1.0673, - "step": 573 - }, - { - "epoch": 0.051765342471930376, - "grad_norm": 2.813256734133669, - "learning_rate": 3.995046255106925e-06, - "loss": 1.0552, - "step": 574 - }, - { - "epoch": 0.05185552599540064, - "grad_norm": 1.701515907511418, - "learning_rate": 3.995005077056293e-06, - "loss": 1.111, - "step": 575 - }, - { - "epoch": 0.051945709518870904, - "grad_norm": 1.9282746490323495, - "learning_rate": 3.9949637287808284e-06, - "loss": 0.9472, - "step": 576 - }, - { - "epoch": 0.052035893042341165, - "grad_norm": 1.5467851400765937, - "learning_rate": 3.994922210284057e-06, - "loss": 0.9951, - "step": 577 - }, - { - "epoch": 0.052126076565811426, - "grad_norm": 1.7040450384608463, - "learning_rate": 3.994880521569524e-06, - "loss": 1.0773, - "step": 578 - }, - { - "epoch": 0.05221626008928169, - "grad_norm": 1.5282408282296325, - "learning_rate": 3.994838662640785e-06, - "loss": 1.061, - "step": 579 - }, - { - "epoch": 0.05230644361275195, - "grad_norm": 3.3435512020160436, - "learning_rate": 3.9947966335014116e-06, - "loss": 1.0876, - "step": 580 - }, - { - "epoch": 0.052396627136222215, - "grad_norm": 2.0170585797965943, - "learning_rate": 3.99475443415499e-06, - "loss": 1.0258, - "step": 581 - }, - { - "epoch": 0.052486810659692476, - "grad_norm": 1.4744944950853454, - "learning_rate": 3.994712064605121e-06, - "loss": 1.0435, - "step": 582 - }, - { - "epoch": 0.05257699418316274, - "grad_norm": 1.6300546819489825, - "learning_rate": 3.99466952485542e-06, - "loss": 1.1925, - "step": 583 - }, - { - "epoch": 0.052667177706633, - "grad_norm": 4.622665434662689, - "learning_rate": 3.994626814909518e-06, - "loss": 1.0651, - "step": 584 - }, - { - "epoch": 0.05275736123010326, - "grad_norm": 1.7912449747082961, - "learning_rate": 3.994583934771056e-06, - "loss": 1.0325, - "step": 585 - }, - { - "epoch": 0.05284754475357352, - "grad_norm": 1.4011206433278414, - "learning_rate": 3.9945408844436955e-06, - "loss": 1.1056, - "step": 586 - }, - { - "epoch": 0.05293772827704379, - "grad_norm": 1.9442980468981907, - "learning_rate": 3.994497663931109e-06, - "loss": 1.0757, - "step": 587 - }, - { - "epoch": 0.05302791180051405, - "grad_norm": 1.5869283352723536, - "learning_rate": 3.994454273236984e-06, - "loss": 1.0677, - "step": 588 - }, - { - "epoch": 0.05311809532398431, - "grad_norm": 1.5448698669244607, - "learning_rate": 3.994410712365023e-06, - "loss": 1.0571, - "step": 589 - }, - { - "epoch": 0.05320827884745457, - "grad_norm": 2.319812964924161, - "learning_rate": 3.994366981318943e-06, - "loss": 1.1551, - "step": 590 - }, - { - "epoch": 0.05329846237092483, - "grad_norm": 1.6154883114749945, - "learning_rate": 3.9943230801024765e-06, - "loss": 1.0543, - "step": 591 - }, - { - "epoch": 0.05338864589439509, - "grad_norm": 1.8360557355752558, - "learning_rate": 3.9942790087193666e-06, - "loss": 1.0825, - "step": 592 - }, - { - "epoch": 0.05347882941786536, - "grad_norm": 1.8398626047128033, - "learning_rate": 3.994234767173376e-06, - "loss": 1.0449, - "step": 593 - }, - { - "epoch": 0.05356901294133562, - "grad_norm": 2.0463357462595617, - "learning_rate": 3.994190355468279e-06, - "loss": 0.9417, - "step": 594 - }, - { - "epoch": 0.05365919646480588, - "grad_norm": 1.5737790234277806, - "learning_rate": 3.994145773607865e-06, - "loss": 1.0469, - "step": 595 - }, - { - "epoch": 0.05374937998827614, - "grad_norm": 1.4463423900058408, - "learning_rate": 3.994101021595938e-06, - "loss": 1.0216, - "step": 596 - }, - { - "epoch": 0.0538395635117464, - "grad_norm": 1.511776945191631, - "learning_rate": 3.9940560994363165e-06, - "loss": 1.07, - "step": 597 - }, - { - "epoch": 0.05392974703521666, - "grad_norm": 1.593122396126524, - "learning_rate": 3.994011007132833e-06, - "loss": 0.9315, - "step": 598 - }, - { - "epoch": 0.05401993055868693, - "grad_norm": 0.7496292071877205, - "learning_rate": 3.993965744689337e-06, - "loss": 0.8999, - "step": 599 - }, - { - "epoch": 0.05411011408215719, - "grad_norm": 1.6280253198608903, - "learning_rate": 3.993920312109687e-06, - "loss": 1.0303, - "step": 600 - }, - { - "epoch": 0.05420029760562745, - "grad_norm": 1.638908141861812, - "learning_rate": 3.993874709397764e-06, - "loss": 1.0478, - "step": 601 - }, - { - "epoch": 0.05429048112909771, - "grad_norm": 1.6996221655414994, - "learning_rate": 3.993828936557454e-06, - "loss": 1.0334, - "step": 602 - }, - { - "epoch": 0.054380664652567974, - "grad_norm": 1.8249687270528965, - "learning_rate": 3.993782993592667e-06, - "loss": 1.0131, - "step": 603 - }, - { - "epoch": 0.054470848176038235, - "grad_norm": 1.570657644725745, - "learning_rate": 3.993736880507321e-06, - "loss": 1.0877, - "step": 604 - }, - { - "epoch": 0.0545610316995085, - "grad_norm": 1.686706921746681, - "learning_rate": 3.99369059730535e-06, - "loss": 0.9918, - "step": 605 - }, - { - "epoch": 0.05465121522297876, - "grad_norm": 1.2673479291442284, - "learning_rate": 3.993644143990706e-06, - "loss": 1.0518, - "step": 606 - }, - { - "epoch": 0.054741398746449024, - "grad_norm": 2.063407467119912, - "learning_rate": 3.99359752056735e-06, - "loss": 0.9695, - "step": 607 - }, - { - "epoch": 0.054831582269919285, - "grad_norm": 1.984974868096442, - "learning_rate": 3.993550727039261e-06, - "loss": 0.9059, - "step": 608 - }, - { - "epoch": 0.054921765793389546, - "grad_norm": 2.126571635073199, - "learning_rate": 3.993503763410431e-06, - "loss": 1.0693, - "step": 609 - }, - { - "epoch": 0.05501194931685981, - "grad_norm": 1.6206559290878713, - "learning_rate": 3.9934566296848686e-06, - "loss": 0.9685, - "step": 610 - }, - { - "epoch": 0.055102132840330074, - "grad_norm": 1.5171783195555715, - "learning_rate": 3.993409325866595e-06, - "loss": 1.1309, - "step": 611 - }, - { - "epoch": 0.055192316363800335, - "grad_norm": 0.6595074552225975, - "learning_rate": 3.993361851959645e-06, - "loss": 0.8751, - "step": 612 - }, - { - "epoch": 0.055282499887270596, - "grad_norm": 1.6855371857295016, - "learning_rate": 3.993314207968071e-06, - "loss": 1.0977, - "step": 613 - }, - { - "epoch": 0.05537268341074086, - "grad_norm": 1.5088323593099986, - "learning_rate": 3.993266393895938e-06, - "loss": 1.0368, - "step": 614 - }, - { - "epoch": 0.05546286693421112, - "grad_norm": 1.4993513612207552, - "learning_rate": 3.993218409747326e-06, - "loss": 1.0606, - "step": 615 - }, - { - "epoch": 0.05555305045768138, - "grad_norm": 1.4359559598614084, - "learning_rate": 3.993170255526328e-06, - "loss": 1.0457, - "step": 616 - }, - { - "epoch": 0.055643233981151646, - "grad_norm": 1.4338538326323544, - "learning_rate": 3.993121931237054e-06, - "loss": 1.0725, - "step": 617 - }, - { - "epoch": 0.05573341750462191, - "grad_norm": 1.6837839961708594, - "learning_rate": 3.993073436883627e-06, - "loss": 1.0633, - "step": 618 - }, - { - "epoch": 0.05582360102809217, - "grad_norm": 1.6552024983646718, - "learning_rate": 3.993024772470184e-06, - "loss": 1.032, - "step": 619 - }, - { - "epoch": 0.05591378455156243, - "grad_norm": 1.6363001176518523, - "learning_rate": 3.992975938000878e-06, - "loss": 1.0297, - "step": 620 - }, - { - "epoch": 0.05600396807503269, - "grad_norm": 0.6854087712220618, - "learning_rate": 3.992926933479876e-06, - "loss": 0.8338, - "step": 621 - }, - { - "epoch": 0.05609415159850295, - "grad_norm": 1.7535255885632504, - "learning_rate": 3.9928777589113595e-06, - "loss": 1.0708, - "step": 622 - }, - { - "epoch": 0.05618433512197322, - "grad_norm": 1.555278416574976, - "learning_rate": 3.992828414299524e-06, - "loss": 1.0047, - "step": 623 - }, - { - "epoch": 0.05627451864544348, - "grad_norm": 1.3768855517966179, - "learning_rate": 3.992778899648579e-06, - "loss": 1.048, - "step": 624 - }, - { - "epoch": 0.05636470216891374, - "grad_norm": 1.5817448863125736, - "learning_rate": 3.992729214962751e-06, - "loss": 1.0719, - "step": 625 - }, - { - "epoch": 0.056454885692384, - "grad_norm": 1.5730610795915498, - "learning_rate": 3.992679360246279e-06, - "loss": 1.0796, - "step": 626 - }, - { - "epoch": 0.05654506921585426, - "grad_norm": 1.6350540034390262, - "learning_rate": 3.992629335503416e-06, - "loss": 1.0647, - "step": 627 - }, - { - "epoch": 0.05663525273932452, - "grad_norm": 1.6350906030825008, - "learning_rate": 3.9925791407384304e-06, - "loss": 1.0363, - "step": 628 - }, - { - "epoch": 0.05672543626279479, - "grad_norm": 2.31607820140337, - "learning_rate": 3.992528775955606e-06, - "loss": 1.0915, - "step": 629 - }, - { - "epoch": 0.05681561978626505, - "grad_norm": 1.6155785559466214, - "learning_rate": 3.992478241159239e-06, - "loss": 1.0243, - "step": 630 - }, - { - "epoch": 0.05690580330973531, - "grad_norm": 1.7229234137935365, - "learning_rate": 3.992427536353643e-06, - "loss": 1.068, - "step": 631 - }, - { - "epoch": 0.05699598683320557, - "grad_norm": 1.6729132022402162, - "learning_rate": 3.992376661543143e-06, - "loss": 1.0297, - "step": 632 - }, - { - "epoch": 0.05708617035667583, - "grad_norm": 1.8661784234600576, - "learning_rate": 3.992325616732081e-06, - "loss": 1.0224, - "step": 633 - }, - { - "epoch": 0.057176353880146094, - "grad_norm": 1.519932162000775, - "learning_rate": 3.992274401924811e-06, - "loss": 1.1501, - "step": 634 - }, - { - "epoch": 0.05726653740361636, - "grad_norm": 1.441542538865025, - "learning_rate": 3.992223017125704e-06, - "loss": 0.9588, - "step": 635 - }, - { - "epoch": 0.05735672092708662, - "grad_norm": 1.5337673436857642, - "learning_rate": 3.992171462339145e-06, - "loss": 1.0789, - "step": 636 - }, - { - "epoch": 0.057446904450556883, - "grad_norm": 3.727986637435344, - "learning_rate": 3.992119737569532e-06, - "loss": 1.1213, - "step": 637 - }, - { - "epoch": 0.057537087974027144, - "grad_norm": 1.7521677895722005, - "learning_rate": 3.992067842821277e-06, - "loss": 1.0438, - "step": 638 - }, - { - "epoch": 0.057627271497497405, - "grad_norm": 1.874205230117126, - "learning_rate": 3.99201577809881e-06, - "loss": 1.1756, - "step": 639 - }, - { - "epoch": 0.057717455020967666, - "grad_norm": 1.9190775041936297, - "learning_rate": 3.991963543406574e-06, - "loss": 1.0639, - "step": 640 - }, - { - "epoch": 0.057807638544437934, - "grad_norm": 1.8891422983736927, - "learning_rate": 3.991911138749024e-06, - "loss": 1.052, - "step": 641 - }, - { - "epoch": 0.057897822067908195, - "grad_norm": 1.5716655462725555, - "learning_rate": 3.991858564130633e-06, - "loss": 1.0746, - "step": 642 - }, - { - "epoch": 0.057988005591378455, - "grad_norm": 1.8111109837270014, - "learning_rate": 3.991805819555885e-06, - "loss": 1.0209, - "step": 643 - }, - { - "epoch": 0.058078189114848716, - "grad_norm": 3.107039451444418, - "learning_rate": 3.991752905029283e-06, - "loss": 1.0519, - "step": 644 - }, - { - "epoch": 0.05816837263831898, - "grad_norm": 1.9105604871266135, - "learning_rate": 3.991699820555341e-06, - "loss": 0.9192, - "step": 645 - }, - { - "epoch": 0.05825855616178924, - "grad_norm": 1.5345285311644628, - "learning_rate": 3.991646566138588e-06, - "loss": 1.047, - "step": 646 - }, - { - "epoch": 0.058348739685259506, - "grad_norm": 1.488703229859749, - "learning_rate": 3.991593141783567e-06, - "loss": 0.9689, - "step": 647 - }, - { - "epoch": 0.058438923208729766, - "grad_norm": 1.5544435702707666, - "learning_rate": 3.991539547494839e-06, - "loss": 1.0636, - "step": 648 - }, - { - "epoch": 0.05852910673220003, - "grad_norm": 1.6495815844750181, - "learning_rate": 3.991485783276974e-06, - "loss": 1.0372, - "step": 649 - }, - { - "epoch": 0.05861929025567029, - "grad_norm": 1.4519418801689934, - "learning_rate": 3.991431849134563e-06, - "loss": 1.1004, - "step": 650 - }, - { - "epoch": 0.05870947377914055, - "grad_norm": 1.7747809745083494, - "learning_rate": 3.991377745072205e-06, - "loss": 0.937, - "step": 651 - }, - { - "epoch": 0.05879965730261081, - "grad_norm": 1.5876367044536257, - "learning_rate": 3.991323471094517e-06, - "loss": 1.0723, - "step": 652 - }, - { - "epoch": 0.05888984082608108, - "grad_norm": 2.025779046801492, - "learning_rate": 3.991269027206131e-06, - "loss": 1.0278, - "step": 653 - }, - { - "epoch": 0.05898002434955134, - "grad_norm": 0.9187080166105801, - "learning_rate": 3.9912144134116916e-06, - "loss": 0.9126, - "step": 654 - }, - { - "epoch": 0.0590702078730216, - "grad_norm": 1.6853097901410046, - "learning_rate": 3.99115962971586e-06, - "loss": 0.9714, - "step": 655 - }, - { - "epoch": 0.05916039139649186, - "grad_norm": 2.2561642659560066, - "learning_rate": 3.991104676123308e-06, - "loss": 1.0804, - "step": 656 - }, - { - "epoch": 0.05925057491996212, - "grad_norm": 0.7044740239343494, - "learning_rate": 3.991049552638727e-06, - "loss": 0.8604, - "step": 657 - }, - { - "epoch": 0.05934075844343238, - "grad_norm": 2.1250281612549773, - "learning_rate": 3.99099425926682e-06, - "loss": 1.0, - "step": 658 - }, - { - "epoch": 0.05943094196690265, - "grad_norm": 1.4632473397386367, - "learning_rate": 3.990938796012304e-06, - "loss": 1.0151, - "step": 659 - }, - { - "epoch": 0.05952112549037291, - "grad_norm": 1.5874140588521108, - "learning_rate": 3.990883162879912e-06, - "loss": 1.1977, - "step": 660 - }, - { - "epoch": 0.05961130901384317, - "grad_norm": 1.6889431751600401, - "learning_rate": 3.990827359874391e-06, - "loss": 1.114, - "step": 661 - }, - { - "epoch": 0.05970149253731343, - "grad_norm": 1.5203848748068707, - "learning_rate": 3.990771387000503e-06, - "loss": 1.0413, - "step": 662 - }, - { - "epoch": 0.05979167606078369, - "grad_norm": 1.823536126379575, - "learning_rate": 3.990715244263023e-06, - "loss": 1.0712, - "step": 663 - }, - { - "epoch": 0.05988185958425396, - "grad_norm": 0.6816031328181347, - "learning_rate": 3.990658931666741e-06, - "loss": 0.8618, - "step": 664 - }, - { - "epoch": 0.05997204310772422, - "grad_norm": 1.6338416693609454, - "learning_rate": 3.990602449216463e-06, - "loss": 0.9682, - "step": 665 - }, - { - "epoch": 0.06006222663119448, - "grad_norm": 1.4463850011436108, - "learning_rate": 3.990545796917008e-06, - "loss": 0.9475, - "step": 666 - }, - { - "epoch": 0.06015241015466474, - "grad_norm": 3.754163084264832, - "learning_rate": 3.99048897477321e-06, - "loss": 1.102, - "step": 667 - }, - { - "epoch": 0.060242593678135004, - "grad_norm": 1.7510084243998758, - "learning_rate": 3.990431982789917e-06, - "loss": 1.0541, - "step": 668 - }, - { - "epoch": 0.060332777201605264, - "grad_norm": 1.5801333499117782, - "learning_rate": 3.990374820971992e-06, - "loss": 0.9801, - "step": 669 - }, - { - "epoch": 0.06042296072507553, - "grad_norm": 1.7495438798763852, - "learning_rate": 3.990317489324312e-06, - "loss": 0.9365, - "step": 670 - }, - { - "epoch": 0.06051314424854579, - "grad_norm": 2.670809706307188, - "learning_rate": 3.99025998785177e-06, - "loss": 1.028, - "step": 671 - }, - { - "epoch": 0.060603327772016054, - "grad_norm": 1.609740947116753, - "learning_rate": 3.990202316559271e-06, - "loss": 0.9841, - "step": 672 - }, - { - "epoch": 0.060693511295486315, - "grad_norm": 2.1070461274082546, - "learning_rate": 3.990144475451738e-06, - "loss": 1.0928, - "step": 673 - }, - { - "epoch": 0.060783694818956575, - "grad_norm": 1.4195770306676372, - "learning_rate": 3.9900864645341036e-06, - "loss": 1.0436, - "step": 674 - }, - { - "epoch": 0.060873878342426836, - "grad_norm": 1.492646790766685, - "learning_rate": 3.990028283811319e-06, - "loss": 1.0475, - "step": 675 - }, - { - "epoch": 0.060964061865897104, - "grad_norm": 1.5098865060911848, - "learning_rate": 3.989969933288348e-06, - "loss": 1.0353, - "step": 676 - }, - { - "epoch": 0.061054245389367365, - "grad_norm": 0.6993851703384983, - "learning_rate": 3.98991141297017e-06, - "loss": 0.821, - "step": 677 - }, - { - "epoch": 0.061144428912837626, - "grad_norm": 1.4137289591740434, - "learning_rate": 3.989852722861778e-06, - "loss": 0.9863, - "step": 678 - }, - { - "epoch": 0.061234612436307886, - "grad_norm": 0.7428196674237041, - "learning_rate": 3.98979386296818e-06, - "loss": 0.8304, - "step": 679 - }, - { - "epoch": 0.06132479595977815, - "grad_norm": 1.4990631197669055, - "learning_rate": 3.989734833294398e-06, - "loss": 1.0303, - "step": 680 - }, - { - "epoch": 0.06141497948324841, - "grad_norm": 1.5304487039169412, - "learning_rate": 3.989675633845469e-06, - "loss": 1.1193, - "step": 681 - }, - { - "epoch": 0.061505163006718676, - "grad_norm": 1.7495745414167156, - "learning_rate": 3.989616264626443e-06, - "loss": 1.0263, - "step": 682 - }, - { - "epoch": 0.06159534653018894, - "grad_norm": 1.359133731872582, - "learning_rate": 3.989556725642388e-06, - "loss": 1.0809, - "step": 683 - }, - { - "epoch": 0.0616855300536592, - "grad_norm": 2.113042059881008, - "learning_rate": 3.989497016898382e-06, - "loss": 1.091, - "step": 684 - }, - { - "epoch": 0.06177571357712946, - "grad_norm": 1.3604495306947726, - "learning_rate": 3.98943713839952e-06, - "loss": 1.029, - "step": 685 - }, - { - "epoch": 0.06186589710059972, - "grad_norm": 0.7074797245935087, - "learning_rate": 3.9893770901509125e-06, - "loss": 0.8688, - "step": 686 - }, - { - "epoch": 0.06195608062406998, - "grad_norm": 1.4794064537865756, - "learning_rate": 3.989316872157682e-06, - "loss": 1.037, - "step": 687 - }, - { - "epoch": 0.06204626414754025, - "grad_norm": 1.5527244711506083, - "learning_rate": 3.989256484424968e-06, - "loss": 1.1162, - "step": 688 - }, - { - "epoch": 0.06213644767101051, - "grad_norm": 1.6656553856116045, - "learning_rate": 3.98919592695792e-06, - "loss": 1.0094, - "step": 689 - }, - { - "epoch": 0.06222663119448077, - "grad_norm": 2.814860561298414, - "learning_rate": 3.9891351997617096e-06, - "loss": 1.074, - "step": 690 - }, - { - "epoch": 0.06231681471795103, - "grad_norm": 1.4691622541032818, - "learning_rate": 3.989074302841514e-06, - "loss": 0.9822, - "step": 691 - }, - { - "epoch": 0.06240699824142129, - "grad_norm": 1.6999339034750975, - "learning_rate": 3.989013236202533e-06, - "loss": 1.1568, - "step": 692 - }, - { - "epoch": 0.06249718176489155, - "grad_norm": 1.36938844361664, - "learning_rate": 3.988951999849974e-06, - "loss": 1.0819, - "step": 693 - }, - { - "epoch": 0.06258736528836181, - "grad_norm": 2.1435392270584255, - "learning_rate": 3.988890593789064e-06, - "loss": 0.959, - "step": 694 - }, - { - "epoch": 0.06267754881183207, - "grad_norm": 1.2788361396750474, - "learning_rate": 3.9888290180250415e-06, - "loss": 1.0251, - "step": 695 - }, - { - "epoch": 0.06276773233530233, - "grad_norm": 1.496902366818064, - "learning_rate": 3.988767272563161e-06, - "loss": 1.0102, - "step": 696 - }, - { - "epoch": 0.06285791585877261, - "grad_norm": 1.8634315925899858, - "learning_rate": 3.988705357408691e-06, - "loss": 0.9846, - "step": 697 - }, - { - "epoch": 0.06294809938224287, - "grad_norm": 1.506155259567908, - "learning_rate": 3.9886432725669146e-06, - "loss": 1.0035, - "step": 698 - }, - { - "epoch": 0.06303828290571313, - "grad_norm": 1.5755716194815914, - "learning_rate": 3.988581018043128e-06, - "loss": 1.0014, - "step": 699 - }, - { - "epoch": 0.06312846642918339, - "grad_norm": 1.7107439323561857, - "learning_rate": 3.988518593842645e-06, - "loss": 1.0647, - "step": 700 - }, - { - "epoch": 0.06321864995265365, - "grad_norm": 1.5559936863886366, - "learning_rate": 3.9884559999707906e-06, - "loss": 1.0384, - "step": 701 - }, - { - "epoch": 0.06330883347612391, - "grad_norm": 1.7377462528184517, - "learning_rate": 3.988393236432906e-06, - "loss": 1.0536, - "step": 702 - }, - { - "epoch": 0.06339901699959417, - "grad_norm": 1.5852193474843386, - "learning_rate": 3.988330303234347e-06, - "loss": 1.1206, - "step": 703 - }, - { - "epoch": 0.06348920052306443, - "grad_norm": 1.6693855361291785, - "learning_rate": 3.988267200380483e-06, - "loss": 0.9973, - "step": 704 - }, - { - "epoch": 0.0635793840465347, - "grad_norm": 1.3074169276234775, - "learning_rate": 3.988203927876698e-06, - "loss": 1.09, - "step": 705 - }, - { - "epoch": 0.06366956757000496, - "grad_norm": 1.4986900331591995, - "learning_rate": 3.988140485728391e-06, - "loss": 0.9271, - "step": 706 - }, - { - "epoch": 0.06375975109347522, - "grad_norm": 1.4422494959705547, - "learning_rate": 3.988076873940975e-06, - "loss": 0.9925, - "step": 707 - }, - { - "epoch": 0.06384993461694548, - "grad_norm": 2.601264730372372, - "learning_rate": 3.9880130925198786e-06, - "loss": 1.0398, - "step": 708 - }, - { - "epoch": 0.06394011814041575, - "grad_norm": 2.1147196558213777, - "learning_rate": 3.987949141470543e-06, - "loss": 1.0608, - "step": 709 - }, - { - "epoch": 0.06403030166388601, - "grad_norm": 2.539156867987924, - "learning_rate": 3.987885020798425e-06, - "loss": 1.042, - "step": 710 - }, - { - "epoch": 0.06412048518735627, - "grad_norm": 2.07429196790648, - "learning_rate": 3.987820730508996e-06, - "loss": 1.0127, - "step": 711 - }, - { - "epoch": 0.06421066871082654, - "grad_norm": 1.5732671166214305, - "learning_rate": 3.987756270607742e-06, - "loss": 1.1398, - "step": 712 - }, - { - "epoch": 0.0643008522342968, - "grad_norm": 0.6879472144969279, - "learning_rate": 3.987691641100162e-06, - "loss": 0.8457, - "step": 713 - }, - { - "epoch": 0.06439103575776706, - "grad_norm": 2.251021577132603, - "learning_rate": 3.987626841991771e-06, - "loss": 1.1263, - "step": 714 - }, - { - "epoch": 0.06448121928123732, - "grad_norm": 1.583032813748438, - "learning_rate": 3.987561873288099e-06, - "loss": 1.0729, - "step": 715 - }, - { - "epoch": 0.06457140280470758, - "grad_norm": 1.3904166333399834, - "learning_rate": 3.987496734994688e-06, - "loss": 0.9896, - "step": 716 - }, - { - "epoch": 0.06466158632817784, - "grad_norm": 1.747410083907831, - "learning_rate": 3.987431427117097e-06, - "loss": 1.0679, - "step": 717 - }, - { - "epoch": 0.0647517698516481, - "grad_norm": 3.3248502711951855, - "learning_rate": 3.9873659496608985e-06, - "loss": 1.0611, - "step": 718 - }, - { - "epoch": 0.06484195337511836, - "grad_norm": 1.9510988025137042, - "learning_rate": 3.987300302631678e-06, - "loss": 0.9944, - "step": 719 - }, - { - "epoch": 0.06493213689858862, - "grad_norm": 1.600072400123953, - "learning_rate": 3.987234486035039e-06, - "loss": 1.0788, - "step": 720 - }, - { - "epoch": 0.0650223204220589, - "grad_norm": 1.6621956539509872, - "learning_rate": 3.987168499876595e-06, - "loss": 1.0908, - "step": 721 - }, - { - "epoch": 0.06511250394552916, - "grad_norm": 1.5724053551797401, - "learning_rate": 3.987102344161978e-06, - "loss": 1.0527, - "step": 722 - }, - { - "epoch": 0.06520268746899942, - "grad_norm": 1.6116603439480544, - "learning_rate": 3.987036018896832e-06, - "loss": 1.08, - "step": 723 - }, - { - "epoch": 0.06529287099246968, - "grad_norm": 1.9626162209083338, - "learning_rate": 3.986969524086817e-06, - "loss": 1.1073, - "step": 724 - }, - { - "epoch": 0.06538305451593994, - "grad_norm": 2.0410304602854983, - "learning_rate": 3.986902859737605e-06, - "loss": 1.029, - "step": 725 - }, - { - "epoch": 0.0654732380394102, - "grad_norm": 1.9570945379261189, - "learning_rate": 3.986836025854886e-06, - "loss": 0.9804, - "step": 726 - }, - { - "epoch": 0.06556342156288046, - "grad_norm": 1.7089514505757046, - "learning_rate": 3.986769022444362e-06, - "loss": 0.9982, - "step": 727 - }, - { - "epoch": 0.06565360508635072, - "grad_norm": 1.743985262077949, - "learning_rate": 3.986701849511751e-06, - "loss": 1.0398, - "step": 728 - }, - { - "epoch": 0.06574378860982098, - "grad_norm": 1.7917287357431628, - "learning_rate": 3.986634507062782e-06, - "loss": 1.023, - "step": 729 - }, - { - "epoch": 0.06583397213329124, - "grad_norm": 1.7881435382926052, - "learning_rate": 3.986566995103204e-06, - "loss": 1.0625, - "step": 730 - }, - { - "epoch": 0.0659241556567615, - "grad_norm": 1.5923616682782618, - "learning_rate": 3.986499313638776e-06, - "loss": 0.9819, - "step": 731 - }, - { - "epoch": 0.06601433918023177, - "grad_norm": 1.4253362091708692, - "learning_rate": 3.986431462675272e-06, - "loss": 0.977, - "step": 732 - }, - { - "epoch": 0.06610452270370204, - "grad_norm": 1.344675898376672, - "learning_rate": 3.9863634422184835e-06, - "loss": 1.0547, - "step": 733 - }, - { - "epoch": 0.0661947062271723, - "grad_norm": 1.6076628827619912, - "learning_rate": 3.986295252274213e-06, - "loss": 1.0772, - "step": 734 - }, - { - "epoch": 0.06628488975064256, - "grad_norm": 1.373855591413466, - "learning_rate": 3.9862268928482796e-06, - "loss": 1.0948, - "step": 735 - }, - { - "epoch": 0.06637507327411282, - "grad_norm": 1.384026420228076, - "learning_rate": 3.986158363946515e-06, - "loss": 0.9536, - "step": 736 - }, - { - "epoch": 0.06646525679758308, - "grad_norm": 2.850828813201553, - "learning_rate": 3.9860896655747685e-06, - "loss": 1.044, - "step": 737 - }, - { - "epoch": 0.06655544032105334, - "grad_norm": 1.6378841211693402, - "learning_rate": 3.9860207977388994e-06, - "loss": 1.1167, - "step": 738 - }, - { - "epoch": 0.0666456238445236, - "grad_norm": 1.4861103554998043, - "learning_rate": 3.9859517604447854e-06, - "loss": 0.9734, - "step": 739 - }, - { - "epoch": 0.06673580736799387, - "grad_norm": 2.364908966183468, - "learning_rate": 3.985882553698317e-06, - "loss": 1.0135, - "step": 740 - }, - { - "epoch": 0.06682599089146413, - "grad_norm": 2.318619235482439, - "learning_rate": 3.985813177505399e-06, - "loss": 1.101, - "step": 741 - }, - { - "epoch": 0.06691617441493439, - "grad_norm": 1.5668573277028055, - "learning_rate": 3.985743631871951e-06, - "loss": 0.9975, - "step": 742 - }, - { - "epoch": 0.06700635793840465, - "grad_norm": 1.6825977751618884, - "learning_rate": 3.985673916803907e-06, - "loss": 1.0491, - "step": 743 - }, - { - "epoch": 0.06709654146187491, - "grad_norm": 1.7977517684102395, - "learning_rate": 3.985604032307215e-06, - "loss": 1.0616, - "step": 744 - }, - { - "epoch": 0.06718672498534518, - "grad_norm": 1.669460513982095, - "learning_rate": 3.985533978387839e-06, - "loss": 1.0533, - "step": 745 - }, - { - "epoch": 0.06727690850881544, - "grad_norm": 1.6016502542412343, - "learning_rate": 3.985463755051756e-06, - "loss": 1.077, - "step": 746 - }, - { - "epoch": 0.0673670920322857, - "grad_norm": 1.4750569251762373, - "learning_rate": 3.9853933623049575e-06, - "loss": 1.0293, - "step": 747 - }, - { - "epoch": 0.06745727555575597, - "grad_norm": 1.8354388269940596, - "learning_rate": 3.98532280015345e-06, - "loss": 1.0391, - "step": 748 - }, - { - "epoch": 0.06754745907922623, - "grad_norm": 2.1203504071259127, - "learning_rate": 3.985252068603254e-06, - "loss": 0.9588, - "step": 749 - }, - { - "epoch": 0.06763764260269649, - "grad_norm": 2.759770894838608, - "learning_rate": 3.985181167660406e-06, - "loss": 1.07, - "step": 750 - }, - { - "epoch": 0.06772782612616675, - "grad_norm": 1.5613423445843604, - "learning_rate": 3.985110097330953e-06, - "loss": 1.0605, - "step": 751 - }, - { - "epoch": 0.06781800964963701, - "grad_norm": 1.5705801512709123, - "learning_rate": 3.985038857620962e-06, - "loss": 1.041, - "step": 752 - }, - { - "epoch": 0.06790819317310727, - "grad_norm": 0.7223522062175923, - "learning_rate": 3.9849674485365094e-06, - "loss": 0.85, - "step": 753 - }, - { - "epoch": 0.06799837669657753, - "grad_norm": 1.8540146440341212, - "learning_rate": 3.98489587008369e-06, - "loss": 0.9694, - "step": 754 - }, - { - "epoch": 0.06808856022004779, - "grad_norm": 2.6827680936236438, - "learning_rate": 3.98482412226861e-06, - "loss": 0.8876, - "step": 755 - }, - { - "epoch": 0.06817874374351805, - "grad_norm": 1.390291924181821, - "learning_rate": 3.984752205097391e-06, - "loss": 1.0715, - "step": 756 - }, - { - "epoch": 0.06826892726698833, - "grad_norm": 1.5153839863541625, - "learning_rate": 3.984680118576171e-06, - "loss": 1.0498, - "step": 757 - }, - { - "epoch": 0.06835911079045859, - "grad_norm": 1.6457570778608925, - "learning_rate": 3.984607862711099e-06, - "loss": 1.0049, - "step": 758 - }, - { - "epoch": 0.06844929431392885, - "grad_norm": 1.9443840047737535, - "learning_rate": 3.984535437508341e-06, - "loss": 1.0734, - "step": 759 - }, - { - "epoch": 0.06853947783739911, - "grad_norm": 0.675224670354541, - "learning_rate": 3.984462842974078e-06, - "loss": 0.8521, - "step": 760 - }, - { - "epoch": 0.06862966136086937, - "grad_norm": 1.491823881094132, - "learning_rate": 3.984390079114502e-06, - "loss": 1.0563, - "step": 761 - }, - { - "epoch": 0.06871984488433963, - "grad_norm": 1.4774558780749851, - "learning_rate": 3.984317145935824e-06, - "loss": 1.1589, - "step": 762 - }, - { - "epoch": 0.06881002840780989, - "grad_norm": 1.5648019237518187, - "learning_rate": 3.984244043444264e-06, - "loss": 1.0184, - "step": 763 - }, - { - "epoch": 0.06890021193128015, - "grad_norm": 1.3503689650259747, - "learning_rate": 3.984170771646062e-06, - "loss": 0.9922, - "step": 764 - }, - { - "epoch": 0.06899039545475041, - "grad_norm": 1.584659305623351, - "learning_rate": 3.9840973305474695e-06, - "loss": 1.0855, - "step": 765 - }, - { - "epoch": 0.06908057897822067, - "grad_norm": 1.5067324550297705, - "learning_rate": 3.984023720154752e-06, - "loss": 1.0901, - "step": 766 - }, - { - "epoch": 0.06917076250169094, - "grad_norm": 1.5684130363737852, - "learning_rate": 3.9839499404741915e-06, - "loss": 1.0572, - "step": 767 - }, - { - "epoch": 0.0692609460251612, - "grad_norm": 0.7392562805724302, - "learning_rate": 3.983875991512082e-06, - "loss": 0.8864, - "step": 768 - }, - { - "epoch": 0.06935112954863147, - "grad_norm": 1.5362429279260834, - "learning_rate": 3.9838018732747345e-06, - "loss": 1.059, - "step": 769 - }, - { - "epoch": 0.06944131307210173, - "grad_norm": 1.331454378656491, - "learning_rate": 3.9837275857684716e-06, - "loss": 1.0244, - "step": 770 - }, - { - "epoch": 0.06953149659557199, - "grad_norm": 1.6395691880323229, - "learning_rate": 3.983653128999634e-06, - "loss": 1.162, - "step": 771 - }, - { - "epoch": 0.06962168011904225, - "grad_norm": 1.3514541406136664, - "learning_rate": 3.983578502974574e-06, - "loss": 1.0743, - "step": 772 - }, - { - "epoch": 0.06971186364251251, - "grad_norm": 1.756393334455185, - "learning_rate": 3.983503707699658e-06, - "loss": 1.0312, - "step": 773 - }, - { - "epoch": 0.06980204716598278, - "grad_norm": 1.4916676519522085, - "learning_rate": 3.983428743181268e-06, - "loss": 1.0702, - "step": 774 - }, - { - "epoch": 0.06989223068945304, - "grad_norm": 4.079335234097393, - "learning_rate": 3.983353609425802e-06, - "loss": 1.1172, - "step": 775 - }, - { - "epoch": 0.0699824142129233, - "grad_norm": 1.486176371503179, - "learning_rate": 3.983278306439671e-06, - "loss": 1.044, - "step": 776 - }, - { - "epoch": 0.07007259773639356, - "grad_norm": 1.372798805020886, - "learning_rate": 3.983202834229297e-06, - "loss": 1.0746, - "step": 777 - }, - { - "epoch": 0.07016278125986382, - "grad_norm": 1.4807162518317682, - "learning_rate": 3.983127192801123e-06, - "loss": 1.1203, - "step": 778 - }, - { - "epoch": 0.07025296478333408, - "grad_norm": 1.5024774278668513, - "learning_rate": 3.983051382161602e-06, - "loss": 1.0771, - "step": 779 - }, - { - "epoch": 0.07034314830680434, - "grad_norm": 1.4400058135604066, - "learning_rate": 3.982975402317203e-06, - "loss": 1.0704, - "step": 780 - }, - { - "epoch": 0.07043333183027461, - "grad_norm": 1.8230043735294748, - "learning_rate": 3.982899253274409e-06, - "loss": 1.0637, - "step": 781 - }, - { - "epoch": 0.07052351535374488, - "grad_norm": 1.7788725932291762, - "learning_rate": 3.982822935039717e-06, - "loss": 1.0282, - "step": 782 - }, - { - "epoch": 0.07061369887721514, - "grad_norm": 1.9025261574148795, - "learning_rate": 3.982746447619638e-06, - "loss": 0.9727, - "step": 783 - }, - { - "epoch": 0.0707038824006854, - "grad_norm": 0.715292284385296, - "learning_rate": 3.9826697910207e-06, - "loss": 0.8615, - "step": 784 - }, - { - "epoch": 0.07079406592415566, - "grad_norm": 1.3754843379009192, - "learning_rate": 3.982592965249442e-06, - "loss": 1.0289, - "step": 785 - }, - { - "epoch": 0.07088424944762592, - "grad_norm": 1.9648646207576932, - "learning_rate": 3.982515970312422e-06, - "loss": 1.09, - "step": 786 - }, - { - "epoch": 0.07097443297109618, - "grad_norm": 3.855837293207463, - "learning_rate": 3.982438806216207e-06, - "loss": 1.015, - "step": 787 - }, - { - "epoch": 0.07106461649456644, - "grad_norm": 1.5533342462272308, - "learning_rate": 3.982361472967382e-06, - "loss": 1.0272, - "step": 788 - }, - { - "epoch": 0.0711548000180367, - "grad_norm": 3.5889907504239154, - "learning_rate": 3.982283970572546e-06, - "loss": 1.1075, - "step": 789 - }, - { - "epoch": 0.07124498354150696, - "grad_norm": 1.8507427863733161, - "learning_rate": 3.982206299038311e-06, - "loss": 1.0186, - "step": 790 - }, - { - "epoch": 0.07133516706497722, - "grad_norm": 1.4518427780457828, - "learning_rate": 3.9821284583713054e-06, - "loss": 1.1163, - "step": 791 - }, - { - "epoch": 0.07142535058844748, - "grad_norm": 1.6058818350677873, - "learning_rate": 3.98205044857817e-06, - "loss": 1.0281, - "step": 792 - }, - { - "epoch": 0.07151553411191776, - "grad_norm": 0.8204088063700617, - "learning_rate": 3.981972269665561e-06, - "loss": 0.8997, - "step": 793 - }, - { - "epoch": 0.07160571763538802, - "grad_norm": 1.3891581396212296, - "learning_rate": 3.98189392164015e-06, - "loss": 1.0329, - "step": 794 - }, - { - "epoch": 0.07169590115885828, - "grad_norm": 1.4363484123650123, - "learning_rate": 3.981815404508621e-06, - "loss": 1.0832, - "step": 795 - }, - { - "epoch": 0.07178608468232854, - "grad_norm": 1.5454073792925287, - "learning_rate": 3.981736718277674e-06, - "loss": 1.0395, - "step": 796 - }, - { - "epoch": 0.0718762682057988, - "grad_norm": 1.6477852204276844, - "learning_rate": 3.9816578629540235e-06, - "loss": 1.0789, - "step": 797 - }, - { - "epoch": 0.07196645172926906, - "grad_norm": 1.7064464274373647, - "learning_rate": 3.981578838544398e-06, - "loss": 1.0533, - "step": 798 - }, - { - "epoch": 0.07205663525273932, - "grad_norm": 0.7012151423203844, - "learning_rate": 3.981499645055539e-06, - "loss": 0.8292, - "step": 799 - }, - { - "epoch": 0.07214681877620958, - "grad_norm": 0.6743389902055162, - "learning_rate": 3.981420282494204e-06, - "loss": 0.8774, - "step": 800 - }, - { - "epoch": 0.07223700229967984, - "grad_norm": 0.7672890465807574, - "learning_rate": 3.981340750867166e-06, - "loss": 0.8761, - "step": 801 - }, - { - "epoch": 0.0723271858231501, - "grad_norm": 1.360187068723717, - "learning_rate": 3.981261050181209e-06, - "loss": 1.0332, - "step": 802 - }, - { - "epoch": 0.07241736934662037, - "grad_norm": 1.6651406534027762, - "learning_rate": 3.9811811804431355e-06, - "loss": 1.0017, - "step": 803 - }, - { - "epoch": 0.07250755287009064, - "grad_norm": 1.5867741074101844, - "learning_rate": 3.981101141659759e-06, - "loss": 1.0768, - "step": 804 - }, - { - "epoch": 0.0725977363935609, - "grad_norm": 1.5176042041477011, - "learning_rate": 3.98102093383791e-06, - "loss": 1.0671, - "step": 805 - }, - { - "epoch": 0.07268791991703116, - "grad_norm": 1.583876826048858, - "learning_rate": 3.9809405569844315e-06, - "loss": 0.9655, - "step": 806 - }, - { - "epoch": 0.07277810344050142, - "grad_norm": 1.4620796675521512, - "learning_rate": 3.980860011106182e-06, - "loss": 1.0075, - "step": 807 - }, - { - "epoch": 0.07286828696397168, - "grad_norm": 1.341119764747467, - "learning_rate": 3.980779296210033e-06, - "loss": 0.9839, - "step": 808 - }, - { - "epoch": 0.07295847048744195, - "grad_norm": 1.9257588414248363, - "learning_rate": 3.980698412302874e-06, - "loss": 1.1202, - "step": 809 - }, - { - "epoch": 0.0730486540109122, - "grad_norm": 1.442711381415534, - "learning_rate": 3.980617359391604e-06, - "loss": 1.0575, - "step": 810 - }, - { - "epoch": 0.07313883753438247, - "grad_norm": 1.5405577546275302, - "learning_rate": 3.98053613748314e-06, - "loss": 0.9998, - "step": 811 - }, - { - "epoch": 0.07322902105785273, - "grad_norm": 1.4969496706374295, - "learning_rate": 3.980454746584413e-06, - "loss": 0.9086, - "step": 812 - }, - { - "epoch": 0.07331920458132299, - "grad_norm": 2.259323089974105, - "learning_rate": 3.9803731867023665e-06, - "loss": 1.1195, - "step": 813 - }, - { - "epoch": 0.07340938810479325, - "grad_norm": 1.7882174032603648, - "learning_rate": 3.9802914578439596e-06, - "loss": 1.0254, - "step": 814 - }, - { - "epoch": 0.07349957162826351, - "grad_norm": 1.457545131030418, - "learning_rate": 3.980209560016167e-06, - "loss": 1.1341, - "step": 815 - }, - { - "epoch": 0.07358975515173379, - "grad_norm": 2.0536138826329724, - "learning_rate": 3.980127493225975e-06, - "loss": 1.1372, - "step": 816 - }, - { - "epoch": 0.07367993867520405, - "grad_norm": 2.297709203250628, - "learning_rate": 3.980045257480387e-06, - "loss": 1.0539, - "step": 817 - }, - { - "epoch": 0.0737701221986743, - "grad_norm": 1.679634838609439, - "learning_rate": 3.9799628527864205e-06, - "loss": 0.9971, - "step": 818 - }, - { - "epoch": 0.07386030572214457, - "grad_norm": 1.4566499349184585, - "learning_rate": 3.979880279151106e-06, - "loss": 1.0232, - "step": 819 - }, - { - "epoch": 0.07395048924561483, - "grad_norm": 1.6921176603633896, - "learning_rate": 3.979797536581489e-06, - "loss": 0.9885, - "step": 820 - }, - { - "epoch": 0.07404067276908509, - "grad_norm": 1.8743602932985532, - "learning_rate": 3.97971462508463e-06, - "loss": 0.8973, - "step": 821 - }, - { - "epoch": 0.07413085629255535, - "grad_norm": 1.4287589971838117, - "learning_rate": 3.979631544667603e-06, - "loss": 1.0264, - "step": 822 - }, - { - "epoch": 0.07422103981602561, - "grad_norm": 1.7696353300698069, - "learning_rate": 3.979548295337496e-06, - "loss": 1.0546, - "step": 823 - }, - { - "epoch": 0.07431122333949587, - "grad_norm": 1.2666114447920669, - "learning_rate": 3.9794648771014146e-06, - "loss": 1.0049, - "step": 824 - }, - { - "epoch": 0.07440140686296613, - "grad_norm": 2.1471638181221824, - "learning_rate": 3.9793812899664745e-06, - "loss": 1.0509, - "step": 825 - }, - { - "epoch": 0.0744915903864364, - "grad_norm": 1.9437933202497466, - "learning_rate": 3.979297533939809e-06, - "loss": 1.0032, - "step": 826 - }, - { - "epoch": 0.07458177390990665, - "grad_norm": 1.6286825122450197, - "learning_rate": 3.979213609028564e-06, - "loss": 1.0179, - "step": 827 - }, - { - "epoch": 0.07467195743337693, - "grad_norm": 1.528906683514845, - "learning_rate": 3.979129515239901e-06, - "loss": 1.0572, - "step": 828 - }, - { - "epoch": 0.07476214095684719, - "grad_norm": 1.40934874008988, - "learning_rate": 3.979045252580994e-06, - "loss": 1.0482, - "step": 829 - }, - { - "epoch": 0.07485232448031745, - "grad_norm": 1.3542711755576913, - "learning_rate": 3.978960821059034e-06, - "loss": 0.9858, - "step": 830 - }, - { - "epoch": 0.07494250800378771, - "grad_norm": 1.5408940140346425, - "learning_rate": 3.978876220681225e-06, - "loss": 1.0718, - "step": 831 - }, - { - "epoch": 0.07503269152725797, - "grad_norm": 1.7215920965337679, - "learning_rate": 3.978791451454786e-06, - "loss": 0.9625, - "step": 832 - }, - { - "epoch": 0.07512287505072823, - "grad_norm": 2.30081295904773, - "learning_rate": 3.978706513386949e-06, - "loss": 1.0076, - "step": 833 - }, - { - "epoch": 0.0752130585741985, - "grad_norm": 1.736824642630527, - "learning_rate": 3.978621406484962e-06, - "loss": 1.0577, - "step": 834 - }, - { - "epoch": 0.07530324209766875, - "grad_norm": 1.5190553679982541, - "learning_rate": 3.978536130756086e-06, - "loss": 1.029, - "step": 835 - }, - { - "epoch": 0.07539342562113902, - "grad_norm": 4.410518661758802, - "learning_rate": 3.978450686207599e-06, - "loss": 1.0963, - "step": 836 - }, - { - "epoch": 0.07548360914460928, - "grad_norm": 1.7555546480222093, - "learning_rate": 3.978365072846789e-06, - "loss": 1.0224, - "step": 837 - }, - { - "epoch": 0.07557379266807954, - "grad_norm": 1.3941169785930898, - "learning_rate": 3.9782792906809625e-06, - "loss": 0.9908, - "step": 838 - }, - { - "epoch": 0.0756639761915498, - "grad_norm": 1.7136113424155504, - "learning_rate": 3.97819333971744e-06, - "loss": 1.0206, - "step": 839 - }, - { - "epoch": 0.07575415971502007, - "grad_norm": 4.178648061829781, - "learning_rate": 3.978107219963553e-06, - "loss": 1.0463, - "step": 840 - }, - { - "epoch": 0.07584434323849033, - "grad_norm": 1.4699193479509653, - "learning_rate": 3.978020931426651e-06, - "loss": 0.9493, - "step": 841 - }, - { - "epoch": 0.0759345267619606, - "grad_norm": 1.3666071940872067, - "learning_rate": 3.977934474114096e-06, - "loss": 0.9859, - "step": 842 - }, - { - "epoch": 0.07602471028543085, - "grad_norm": 0.6596498522187328, - "learning_rate": 3.977847848033267e-06, - "loss": 0.8276, - "step": 843 - }, - { - "epoch": 0.07611489380890112, - "grad_norm": 1.7431404689185876, - "learning_rate": 3.977761053191553e-06, - "loss": 1.0895, - "step": 844 - }, - { - "epoch": 0.07620507733237138, - "grad_norm": 1.3807939354742371, - "learning_rate": 3.977674089596361e-06, - "loss": 1.0198, - "step": 845 - }, - { - "epoch": 0.07629526085584164, - "grad_norm": 0.6860728189033451, - "learning_rate": 3.97758695725511e-06, - "loss": 0.8283, - "step": 846 - }, - { - "epoch": 0.0763854443793119, - "grad_norm": 1.2991576271566434, - "learning_rate": 3.977499656175236e-06, - "loss": 1.0304, - "step": 847 - }, - { - "epoch": 0.07647562790278216, - "grad_norm": 1.579143082468605, - "learning_rate": 3.977412186364187e-06, - "loss": 0.945, - "step": 848 - }, - { - "epoch": 0.07656581142625242, - "grad_norm": 1.6218440881624143, - "learning_rate": 3.977324547829428e-06, - "loss": 1.057, - "step": 849 - }, - { - "epoch": 0.07665599494972268, - "grad_norm": 4.198164384346812, - "learning_rate": 3.977236740578435e-06, - "loss": 1.0563, - "step": 850 - }, - { - "epoch": 0.07674617847319294, - "grad_norm": 1.533683866708824, - "learning_rate": 3.9771487646187015e-06, - "loss": 1.025, - "step": 851 - }, - { - "epoch": 0.07683636199666322, - "grad_norm": 2.1441962531141816, - "learning_rate": 3.9770606199577325e-06, - "loss": 1.0756, - "step": 852 - }, - { - "epoch": 0.07692654552013348, - "grad_norm": 1.934866068539638, - "learning_rate": 3.9769723066030505e-06, - "loss": 0.9697, - "step": 853 - }, - { - "epoch": 0.07701672904360374, - "grad_norm": 1.742271797638274, - "learning_rate": 3.976883824562191e-06, - "loss": 1.1003, - "step": 854 - }, - { - "epoch": 0.077106912567074, - "grad_norm": 1.502507339434224, - "learning_rate": 3.976795173842703e-06, - "loss": 1.0423, - "step": 855 - }, - { - "epoch": 0.07719709609054426, - "grad_norm": 2.114518401052564, - "learning_rate": 3.97670635445215e-06, - "loss": 1.012, - "step": 856 - }, - { - "epoch": 0.07728727961401452, - "grad_norm": 1.6451586616086136, - "learning_rate": 3.976617366398112e-06, - "loss": 1.0153, - "step": 857 - }, - { - "epoch": 0.07737746313748478, - "grad_norm": 1.4494532475111368, - "learning_rate": 3.976528209688181e-06, - "loss": 1.0686, - "step": 858 - }, - { - "epoch": 0.07746764666095504, - "grad_norm": 5.898874350027478, - "learning_rate": 3.976438884329965e-06, - "loss": 1.0365, - "step": 859 - }, - { - "epoch": 0.0775578301844253, - "grad_norm": 1.5017416697296972, - "learning_rate": 3.976349390331085e-06, - "loss": 1.1664, - "step": 860 - }, - { - "epoch": 0.07764801370789556, - "grad_norm": 2.0847701903377733, - "learning_rate": 3.976259727699178e-06, - "loss": 0.9995, - "step": 861 - }, - { - "epoch": 0.07773819723136582, - "grad_norm": 1.9772714542117749, - "learning_rate": 3.976169896441895e-06, - "loss": 0.9963, - "step": 862 - }, - { - "epoch": 0.07782838075483609, - "grad_norm": 2.1500415709269127, - "learning_rate": 3.976079896566898e-06, - "loss": 1.07, - "step": 863 - }, - { - "epoch": 0.07791856427830636, - "grad_norm": 1.7479676979929946, - "learning_rate": 3.97598972808187e-06, - "loss": 1.0977, - "step": 864 - }, - { - "epoch": 0.07800874780177662, - "grad_norm": 1.669937508493159, - "learning_rate": 3.975899390994501e-06, - "loss": 1.0344, - "step": 865 - }, - { - "epoch": 0.07809893132524688, - "grad_norm": 1.7013523781247615, - "learning_rate": 3.975808885312502e-06, - "loss": 1.0967, - "step": 866 - }, - { - "epoch": 0.07818911484871714, - "grad_norm": 1.4535492308539513, - "learning_rate": 3.975718211043594e-06, - "loss": 1.0848, - "step": 867 - }, - { - "epoch": 0.0782792983721874, - "grad_norm": 9.62138477153237, - "learning_rate": 3.975627368195515e-06, - "loss": 0.9551, - "step": 868 - }, - { - "epoch": 0.07836948189565766, - "grad_norm": 1.363024493416505, - "learning_rate": 3.975536356776015e-06, - "loss": 1.0077, - "step": 869 - }, - { - "epoch": 0.07845966541912792, - "grad_norm": 0.718128018152918, - "learning_rate": 3.975445176792861e-06, - "loss": 0.9139, - "step": 870 - }, - { - "epoch": 0.07854984894259819, - "grad_norm": 1.4862729437030042, - "learning_rate": 3.975353828253831e-06, - "loss": 1.0377, - "step": 871 - }, - { - "epoch": 0.07864003246606845, - "grad_norm": 1.5453060940724466, - "learning_rate": 3.97526231116672e-06, - "loss": 1.044, - "step": 872 - }, - { - "epoch": 0.07873021598953871, - "grad_norm": 2.456395101750641, - "learning_rate": 3.975170625539338e-06, - "loss": 0.98, - "step": 873 - }, - { - "epoch": 0.07882039951300897, - "grad_norm": 1.2696358975048598, - "learning_rate": 3.975078771379507e-06, - "loss": 1.0313, - "step": 874 - }, - { - "epoch": 0.07891058303647923, - "grad_norm": 1.6562810211155572, - "learning_rate": 3.974986748695064e-06, - "loss": 0.9479, - "step": 875 - }, - { - "epoch": 0.0790007665599495, - "grad_norm": 0.7595707268295668, - "learning_rate": 3.974894557493862e-06, - "loss": 0.8642, - "step": 876 - }, - { - "epoch": 0.07909095008341976, - "grad_norm": 1.4548083164020387, - "learning_rate": 3.974802197783768e-06, - "loss": 1.0834, - "step": 877 - }, - { - "epoch": 0.07918113360689003, - "grad_norm": 1.6971173501517676, - "learning_rate": 3.974709669572661e-06, - "loss": 1.1027, - "step": 878 - }, - { - "epoch": 0.07927131713036029, - "grad_norm": 1.4494409930563903, - "learning_rate": 3.974616972868436e-06, - "loss": 1.0282, - "step": 879 - }, - { - "epoch": 0.07936150065383055, - "grad_norm": 1.5772367942790522, - "learning_rate": 3.974524107679003e-06, - "loss": 1.0431, - "step": 880 - }, - { - "epoch": 0.07945168417730081, - "grad_norm": 1.2712254414394606, - "learning_rate": 3.974431074012286e-06, - "loss": 1.0322, - "step": 881 - }, - { - "epoch": 0.07954186770077107, - "grad_norm": 1.8529644258093083, - "learning_rate": 3.974337871876223e-06, - "loss": 1.0581, - "step": 882 - }, - { - "epoch": 0.07963205122424133, - "grad_norm": 1.3785031948031823, - "learning_rate": 3.974244501278766e-06, - "loss": 1.0086, - "step": 883 - }, - { - "epoch": 0.07972223474771159, - "grad_norm": 1.598413488481789, - "learning_rate": 3.974150962227883e-06, - "loss": 0.9859, - "step": 884 - }, - { - "epoch": 0.07981241827118185, - "grad_norm": 1.6151399780882676, - "learning_rate": 3.9740572547315535e-06, - "loss": 0.9592, - "step": 885 - }, - { - "epoch": 0.07990260179465211, - "grad_norm": 2.0834462580275765, - "learning_rate": 3.973963378797775e-06, - "loss": 1.0728, - "step": 886 - }, - { - "epoch": 0.07999278531812237, - "grad_norm": 1.517094636976456, - "learning_rate": 3.973869334434556e-06, - "loss": 1.0466, - "step": 887 - }, - { - "epoch": 0.08008296884159265, - "grad_norm": 1.3974554353681647, - "learning_rate": 3.973775121649922e-06, - "loss": 1.0541, - "step": 888 - }, - { - "epoch": 0.08017315236506291, - "grad_norm": 1.5446363491485107, - "learning_rate": 3.973680740451911e-06, - "loss": 0.9454, - "step": 889 - }, - { - "epoch": 0.08026333588853317, - "grad_norm": 1.5745494107161258, - "learning_rate": 3.9735861908485776e-06, - "loss": 1.0009, - "step": 890 - }, - { - "epoch": 0.08035351941200343, - "grad_norm": 1.404610886569, - "learning_rate": 3.973491472847987e-06, - "loss": 1.0547, - "step": 891 - }, - { - "epoch": 0.08044370293547369, - "grad_norm": 1.7153985379533385, - "learning_rate": 3.973396586458222e-06, - "loss": 0.9592, - "step": 892 - }, - { - "epoch": 0.08053388645894395, - "grad_norm": 2.939145540385566, - "learning_rate": 3.97330153168738e-06, - "loss": 1.0204, - "step": 893 - }, - { - "epoch": 0.08062406998241421, - "grad_norm": 1.5954266872508087, - "learning_rate": 3.973206308543571e-06, - "loss": 0.9727, - "step": 894 - }, - { - "epoch": 0.08071425350588447, - "grad_norm": 1.7894054900305478, - "learning_rate": 3.973110917034918e-06, - "loss": 1.003, - "step": 895 - }, - { - "epoch": 0.08080443702935473, - "grad_norm": 1.331143125085573, - "learning_rate": 3.973015357169563e-06, - "loss": 0.9528, - "step": 896 - }, - { - "epoch": 0.080894620552825, - "grad_norm": 1.6615679315116285, - "learning_rate": 3.972919628955659e-06, - "loss": 0.9322, - "step": 897 - }, - { - "epoch": 0.08098480407629526, - "grad_norm": 1.7358197217759532, - "learning_rate": 3.972823732401373e-06, - "loss": 1.0163, - "step": 898 - }, - { - "epoch": 0.08107498759976552, - "grad_norm": 1.4351395009516574, - "learning_rate": 3.972727667514888e-06, - "loss": 1.0229, - "step": 899 - }, - { - "epoch": 0.08116517112323579, - "grad_norm": 1.255437753481072, - "learning_rate": 3.972631434304402e-06, - "loss": 0.9877, - "step": 900 - }, - { - "epoch": 0.08125535464670605, - "grad_norm": 1.641881915405719, - "learning_rate": 3.972535032778124e-06, - "loss": 1.1418, - "step": 901 - }, - { - "epoch": 0.08134553817017631, - "grad_norm": 1.268303521056996, - "learning_rate": 3.97243846294428e-06, - "loss": 1.1236, - "step": 902 - }, - { - "epoch": 0.08143572169364657, - "grad_norm": 1.4819669766884542, - "learning_rate": 3.972341724811111e-06, - "loss": 1.1665, - "step": 903 - }, - { - "epoch": 0.08152590521711683, - "grad_norm": 2.9146081653888682, - "learning_rate": 3.972244818386872e-06, - "loss": 0.9717, - "step": 904 - }, - { - "epoch": 0.0816160887405871, - "grad_norm": 1.6825223199687407, - "learning_rate": 3.972147743679828e-06, - "loss": 0.9828, - "step": 905 - }, - { - "epoch": 0.08170627226405736, - "grad_norm": 1.5873392609447257, - "learning_rate": 3.972050500698265e-06, - "loss": 0.9974, - "step": 906 - }, - { - "epoch": 0.08179645578752762, - "grad_norm": 1.4681794702031745, - "learning_rate": 3.971953089450481e-06, - "loss": 1.0234, - "step": 907 - }, - { - "epoch": 0.08188663931099788, - "grad_norm": 1.4642922952882902, - "learning_rate": 3.971855509944784e-06, - "loss": 1.0981, - "step": 908 - }, - { - "epoch": 0.08197682283446814, - "grad_norm": 1.540537093867719, - "learning_rate": 3.971757762189504e-06, - "loss": 1.0157, - "step": 909 - }, - { - "epoch": 0.0820670063579384, - "grad_norm": 1.644201612621575, - "learning_rate": 3.9716598461929785e-06, - "loss": 1.0202, - "step": 910 - }, - { - "epoch": 0.08215718988140866, - "grad_norm": 1.5152420661339672, - "learning_rate": 3.971561761963563e-06, - "loss": 1.0069, - "step": 911 - }, - { - "epoch": 0.08224737340487893, - "grad_norm": 1.656909379519685, - "learning_rate": 3.971463509509628e-06, - "loss": 1.1032, - "step": 912 - }, - { - "epoch": 0.0823375569283492, - "grad_norm": 1.8382837764853945, - "learning_rate": 3.9713650888395555e-06, - "loss": 1.0283, - "step": 913 - }, - { - "epoch": 0.08242774045181946, - "grad_norm": 1.5604121946195209, - "learning_rate": 3.9712664999617425e-06, - "loss": 1.0301, - "step": 914 - }, - { - "epoch": 0.08251792397528972, - "grad_norm": 1.7091792578615475, - "learning_rate": 3.971167742884603e-06, - "loss": 1.0062, - "step": 915 - }, - { - "epoch": 0.08260810749875998, - "grad_norm": 1.5106059076430811, - "learning_rate": 3.971068817616564e-06, - "loss": 1.0798, - "step": 916 - }, - { - "epoch": 0.08269829102223024, - "grad_norm": 1.3408203125, - "learning_rate": 3.970969724166064e-06, - "loss": 1.0572, - "step": 917 - }, - { - "epoch": 0.0827884745457005, - "grad_norm": 0.7036463605952646, - "learning_rate": 3.970870462541559e-06, - "loss": 0.8523, - "step": 918 - }, - { - "epoch": 0.08287865806917076, - "grad_norm": 1.3589225980879096, - "learning_rate": 3.97077103275152e-06, - "loss": 1.0542, - "step": 919 - }, - { - "epoch": 0.08296884159264102, - "grad_norm": 1.2358090725809108, - "learning_rate": 3.970671434804428e-06, - "loss": 1.0123, - "step": 920 - }, - { - "epoch": 0.08305902511611128, - "grad_norm": 1.9170364009443785, - "learning_rate": 3.970571668708784e-06, - "loss": 1.0287, - "step": 921 - }, - { - "epoch": 0.08314920863958154, - "grad_norm": 0.6692116756987131, - "learning_rate": 3.9704717344731e-06, - "loss": 0.86, - "step": 922 - }, - { - "epoch": 0.0832393921630518, - "grad_norm": 0.7559592011054312, - "learning_rate": 3.9703716321059026e-06, - "loss": 0.8734, - "step": 923 - }, - { - "epoch": 0.08332957568652208, - "grad_norm": 2.1458526042579065, - "learning_rate": 3.9702713616157325e-06, - "loss": 1.1191, - "step": 924 - }, - { - "epoch": 0.08341975920999234, - "grad_norm": 1.8429964756386021, - "learning_rate": 3.9701709230111455e-06, - "loss": 1.0444, - "step": 925 - }, - { - "epoch": 0.0835099427334626, - "grad_norm": 1.574472184522742, - "learning_rate": 3.970070316300713e-06, - "loss": 1.0004, - "step": 926 - }, - { - "epoch": 0.08360012625693286, - "grad_norm": 1.3217410874229516, - "learning_rate": 3.969969541493017e-06, - "loss": 1.0519, - "step": 927 - }, - { - "epoch": 0.08369030978040312, - "grad_norm": 1.3823572476239974, - "learning_rate": 3.969868598596658e-06, - "loss": 1.0799, - "step": 928 - }, - { - "epoch": 0.08378049330387338, - "grad_norm": 1.2103837746874635, - "learning_rate": 3.969767487620249e-06, - "loss": 0.977, - "step": 929 - }, - { - "epoch": 0.08387067682734364, - "grad_norm": 1.2139303815478102, - "learning_rate": 3.969666208572416e-06, - "loss": 1.0755, - "step": 930 - }, - { - "epoch": 0.0839608603508139, - "grad_norm": 1.8286658245121352, - "learning_rate": 3.969564761461802e-06, - "loss": 1.089, - "step": 931 - }, - { - "epoch": 0.08405104387428416, - "grad_norm": 1.5695916034644999, - "learning_rate": 3.969463146297062e-06, - "loss": 1.0093, - "step": 932 - }, - { - "epoch": 0.08414122739775443, - "grad_norm": 2.3438215117671004, - "learning_rate": 3.969361363086867e-06, - "loss": 1.0638, - "step": 933 - }, - { - "epoch": 0.08423141092122469, - "grad_norm": 1.5864598959474472, - "learning_rate": 3.9692594118399014e-06, - "loss": 0.9996, - "step": 934 - }, - { - "epoch": 0.08432159444469496, - "grad_norm": 1.518373573026821, - "learning_rate": 3.969157292564865e-06, - "loss": 1.067, - "step": 935 - }, - { - "epoch": 0.08441177796816522, - "grad_norm": 1.899106006108275, - "learning_rate": 3.96905500527047e-06, - "loss": 1.0229, - "step": 936 - }, - { - "epoch": 0.08450196149163548, - "grad_norm": 1.611883782033378, - "learning_rate": 3.968952549965445e-06, - "loss": 1.0117, - "step": 937 - }, - { - "epoch": 0.08459214501510574, - "grad_norm": 1.5431337678767592, - "learning_rate": 3.968849926658532e-06, - "loss": 1.0666, - "step": 938 - }, - { - "epoch": 0.084682328538576, - "grad_norm": 3.599089396068955, - "learning_rate": 3.9687471353584866e-06, - "loss": 0.9526, - "step": 939 - }, - { - "epoch": 0.08477251206204627, - "grad_norm": 1.3103358728501786, - "learning_rate": 3.9686441760740795e-06, - "loss": 1.0511, - "step": 940 - }, - { - "epoch": 0.08486269558551653, - "grad_norm": 2.0403509636582435, - "learning_rate": 3.968541048814098e-06, - "loss": 0.9724, - "step": 941 - }, - { - "epoch": 0.08495287910898679, - "grad_norm": 1.9020179297186748, - "learning_rate": 3.968437753587339e-06, - "loss": 0.953, - "step": 942 - }, - { - "epoch": 0.08504306263245705, - "grad_norm": 1.5347280123425442, - "learning_rate": 3.968334290402616e-06, - "loss": 1.0702, - "step": 943 - }, - { - "epoch": 0.08513324615592731, - "grad_norm": 1.5502879705874153, - "learning_rate": 3.968230659268759e-06, - "loss": 0.9252, - "step": 944 - }, - { - "epoch": 0.08522342967939757, - "grad_norm": 1.704500185625464, - "learning_rate": 3.968126860194609e-06, - "loss": 1.0955, - "step": 945 - }, - { - "epoch": 0.08531361320286783, - "grad_norm": 1.4092353497864174, - "learning_rate": 3.968022893189025e-06, - "loss": 1.0832, - "step": 946 - }, - { - "epoch": 0.0854037967263381, - "grad_norm": 1.415335572152172, - "learning_rate": 3.967918758260874e-06, - "loss": 1.0685, - "step": 947 - }, - { - "epoch": 0.08549398024980837, - "grad_norm": 2.8292999356042743, - "learning_rate": 3.967814455419044e-06, - "loss": 1.0741, - "step": 948 - }, - { - "epoch": 0.08558416377327863, - "grad_norm": 1.625038366598423, - "learning_rate": 3.967709984672434e-06, - "loss": 1.078, - "step": 949 - }, - { - "epoch": 0.08567434729674889, - "grad_norm": 1.5412376253570252, - "learning_rate": 3.967605346029959e-06, - "loss": 1.0616, - "step": 950 - }, - { - "epoch": 0.08576453082021915, - "grad_norm": 1.3420968199396428, - "learning_rate": 3.9675005395005466e-06, - "loss": 1.1042, - "step": 951 - }, - { - "epoch": 0.08585471434368941, - "grad_norm": 1.810021975578818, - "learning_rate": 3.967395565093139e-06, - "loss": 1.0565, - "step": 952 - }, - { - "epoch": 0.08594489786715967, - "grad_norm": 1.3250897898930043, - "learning_rate": 3.967290422816693e-06, - "loss": 1.0374, - "step": 953 - }, - { - "epoch": 0.08603508139062993, - "grad_norm": 1.708530282863833, - "learning_rate": 3.967185112680183e-06, - "loss": 1.0003, - "step": 954 - }, - { - "epoch": 0.08612526491410019, - "grad_norm": 1.6408103474869629, - "learning_rate": 3.96707963469259e-06, - "loss": 1.1188, - "step": 955 - }, - { - "epoch": 0.08621544843757045, - "grad_norm": 1.3733126951413777, - "learning_rate": 3.966973988862917e-06, - "loss": 1.0203, - "step": 956 - }, - { - "epoch": 0.08630563196104071, - "grad_norm": 1.438850763340326, - "learning_rate": 3.966868175200178e-06, - "loss": 1.0849, - "step": 957 - }, - { - "epoch": 0.08639581548451097, - "grad_norm": 1.6149192050529577, - "learning_rate": 3.9667621937134e-06, - "loss": 1.0883, - "step": 958 - }, - { - "epoch": 0.08648599900798125, - "grad_norm": 1.661884655761213, - "learning_rate": 3.966656044411627e-06, - "loss": 0.9511, - "step": 959 - }, - { - "epoch": 0.08657618253145151, - "grad_norm": 1.2941539207579205, - "learning_rate": 3.966549727303918e-06, - "loss": 1.0112, - "step": 960 - }, - { - "epoch": 0.08666636605492177, - "grad_norm": 2.1998491755717415, - "learning_rate": 3.966443242399341e-06, - "loss": 1.0696, - "step": 961 - }, - { - "epoch": 0.08675654957839203, - "grad_norm": 0.7226860658835447, - "learning_rate": 3.966336589706985e-06, - "loss": 0.8561, - "step": 962 - }, - { - "epoch": 0.08684673310186229, - "grad_norm": 1.5260513183844027, - "learning_rate": 3.966229769235948e-06, - "loss": 1.0501, - "step": 963 - }, - { - "epoch": 0.08693691662533255, - "grad_norm": 1.5421052858648339, - "learning_rate": 3.966122780995345e-06, - "loss": 1.1117, - "step": 964 - }, - { - "epoch": 0.08702710014880281, - "grad_norm": 1.8074647085878406, - "learning_rate": 3.966015624994306e-06, - "loss": 1.0683, - "step": 965 - }, - { - "epoch": 0.08711728367227307, - "grad_norm": 1.453535944758952, - "learning_rate": 3.9659083012419735e-06, - "loss": 1.0085, - "step": 966 - }, - { - "epoch": 0.08720746719574334, - "grad_norm": 1.634561213722025, - "learning_rate": 3.965800809747505e-06, - "loss": 0.9681, - "step": 967 - }, - { - "epoch": 0.0872976507192136, - "grad_norm": 1.437336953909377, - "learning_rate": 3.965693150520071e-06, - "loss": 1.0087, - "step": 968 - }, - { - "epoch": 0.08738783424268386, - "grad_norm": 2.0179714768495054, - "learning_rate": 3.96558532356886e-06, - "loss": 0.9733, - "step": 969 - }, - { - "epoch": 0.08747801776615412, - "grad_norm": 1.9494853171896078, - "learning_rate": 3.9654773289030704e-06, - "loss": 1.0842, - "step": 970 - }, - { - "epoch": 0.08756820128962439, - "grad_norm": 1.6287316244732666, - "learning_rate": 3.9653691665319176e-06, - "loss": 1.056, - "step": 971 - }, - { - "epoch": 0.08765838481309465, - "grad_norm": 1.6198287256382484, - "learning_rate": 3.96526083646463e-06, - "loss": 0.9696, - "step": 972 - }, - { - "epoch": 0.08774856833656491, - "grad_norm": 2.0838689369899233, - "learning_rate": 3.9651523387104526e-06, - "loss": 0.9916, - "step": 973 - }, - { - "epoch": 0.08783875186003517, - "grad_norm": 2.0802192174492724, - "learning_rate": 3.965043673278641e-06, - "loss": 1.0483, - "step": 974 - }, - { - "epoch": 0.08792893538350544, - "grad_norm": 1.385766403492054, - "learning_rate": 3.964934840178469e-06, - "loss": 1.0495, - "step": 975 - }, - { - "epoch": 0.0880191189069757, - "grad_norm": 1.9578683246955892, - "learning_rate": 3.964825839419221e-06, - "loss": 1.0762, - "step": 976 - }, - { - "epoch": 0.08810930243044596, - "grad_norm": 3.0572690860016216, - "learning_rate": 3.964716671010199e-06, - "loss": 1.0267, - "step": 977 - }, - { - "epoch": 0.08819948595391622, - "grad_norm": 1.4066653168176138, - "learning_rate": 3.9646073349607165e-06, - "loss": 1.0017, - "step": 978 - }, - { - "epoch": 0.08828966947738648, - "grad_norm": 2.0271894510164805, - "learning_rate": 3.964497831280105e-06, - "loss": 1.1712, - "step": 979 - }, - { - "epoch": 0.08837985300085674, - "grad_norm": 1.8901907051593305, - "learning_rate": 3.964388159977705e-06, - "loss": 1.0408, - "step": 980 - }, - { - "epoch": 0.088470036524327, - "grad_norm": 1.4176315873577188, - "learning_rate": 3.964278321062876e-06, - "loss": 0.9508, - "step": 981 - }, - { - "epoch": 0.08856022004779726, - "grad_norm": 1.4610042454777314, - "learning_rate": 3.96416831454499e-06, - "loss": 1.0646, - "step": 982 - }, - { - "epoch": 0.08865040357126754, - "grad_norm": 0.7198846816723242, - "learning_rate": 3.964058140433434e-06, - "loss": 0.8941, - "step": 983 - }, - { - "epoch": 0.0887405870947378, - "grad_norm": 1.6191568224272759, - "learning_rate": 3.963947798737606e-06, - "loss": 1.1044, - "step": 984 - }, - { - "epoch": 0.08883077061820806, - "grad_norm": 1.949179180714509, - "learning_rate": 3.963837289466923e-06, - "loss": 1.0612, - "step": 985 - }, - { - "epoch": 0.08892095414167832, - "grad_norm": 1.4437620352371403, - "learning_rate": 3.9637266126308145e-06, - "loss": 1.0049, - "step": 986 - }, - { - "epoch": 0.08901113766514858, - "grad_norm": 0.7346514323608542, - "learning_rate": 3.963615768238724e-06, - "loss": 0.9031, - "step": 987 - }, - { - "epoch": 0.08910132118861884, - "grad_norm": 1.3462131015605339, - "learning_rate": 3.963504756300107e-06, - "loss": 0.9257, - "step": 988 - }, - { - "epoch": 0.0891915047120891, - "grad_norm": 1.4306674787981821, - "learning_rate": 3.96339357682444e-06, - "loss": 1.0817, - "step": 989 - }, - { - "epoch": 0.08928168823555936, - "grad_norm": 2.049827368375889, - "learning_rate": 3.963282229821206e-06, - "loss": 1.0596, - "step": 990 - }, - { - "epoch": 0.08937187175902962, - "grad_norm": 1.392891269482352, - "learning_rate": 3.963170715299906e-06, - "loss": 1.0512, - "step": 991 - }, - { - "epoch": 0.08946205528249988, - "grad_norm": 1.4537124266816066, - "learning_rate": 3.963059033270056e-06, - "loss": 0.9642, - "step": 992 - }, - { - "epoch": 0.08955223880597014, - "grad_norm": 1.2693898753163761, - "learning_rate": 3.9629471837411855e-06, - "loss": 0.9857, - "step": 993 - }, - { - "epoch": 0.0896424223294404, - "grad_norm": 1.4293166549911134, - "learning_rate": 3.962835166722838e-06, - "loss": 0.9869, - "step": 994 - }, - { - "epoch": 0.08973260585291068, - "grad_norm": 1.436424516478512, - "learning_rate": 3.96272298222457e-06, - "loss": 1.0505, - "step": 995 - }, - { - "epoch": 0.08982278937638094, - "grad_norm": 1.4393609481967078, - "learning_rate": 3.962610630255956e-06, - "loss": 1.1664, - "step": 996 - }, - { - "epoch": 0.0899129728998512, - "grad_norm": 1.6329835500969032, - "learning_rate": 3.96249811082658e-06, - "loss": 1.0101, - "step": 997 - }, - { - "epoch": 0.09000315642332146, - "grad_norm": 0.7836609070924698, - "learning_rate": 3.962385423946046e-06, - "loss": 0.9301, - "step": 998 - }, - { - "epoch": 0.09009333994679172, - "grad_norm": 1.2111888009466965, - "learning_rate": 3.962272569623966e-06, - "loss": 1.0235, - "step": 999 - }, - { - "epoch": 0.09018352347026198, - "grad_norm": 1.6014020141581273, - "learning_rate": 3.9621595478699704e-06, - "loss": 0.9231, - "step": 1000 - }, - { - "epoch": 0.09027370699373224, - "grad_norm": 1.4289158474040389, - "learning_rate": 3.962046358693703e-06, - "loss": 1.034, - "step": 1001 - }, - { - "epoch": 0.0903638905172025, - "grad_norm": 1.8906729195567162, - "learning_rate": 3.961933002104822e-06, - "loss": 1.1189, - "step": 1002 - }, - { - "epoch": 0.09045407404067277, - "grad_norm": 1.6105600826990345, - "learning_rate": 3.961819478112999e-06, - "loss": 0.9653, - "step": 1003 - }, - { - "epoch": 0.09054425756414303, - "grad_norm": 1.4474398241728188, - "learning_rate": 3.961705786727921e-06, - "loss": 1.0543, - "step": 1004 - }, - { - "epoch": 0.09063444108761329, - "grad_norm": 1.4404902662049648, - "learning_rate": 3.961591927959288e-06, - "loss": 0.9631, - "step": 1005 - }, - { - "epoch": 0.09072462461108355, - "grad_norm": 0.7488137639299873, - "learning_rate": 3.961477901816816e-06, - "loss": 0.8359, - "step": 1006 - }, - { - "epoch": 0.09081480813455382, - "grad_norm": 1.8314057606147298, - "learning_rate": 3.961363708310233e-06, - "loss": 1.1024, - "step": 1007 - }, - { - "epoch": 0.09090499165802408, - "grad_norm": 1.6074976665663803, - "learning_rate": 3.961249347449286e-06, - "loss": 1.0642, - "step": 1008 - }, - { - "epoch": 0.09099517518149434, - "grad_norm": 1.642319449106232, - "learning_rate": 3.961134819243728e-06, - "loss": 1.0269, - "step": 1009 - }, - { - "epoch": 0.0910853587049646, - "grad_norm": 2.1414603949141435, - "learning_rate": 3.961020123703335e-06, - "loss": 1.0406, - "step": 1010 - }, - { - "epoch": 0.09117554222843487, - "grad_norm": 1.521535772158194, - "learning_rate": 3.960905260837892e-06, - "loss": 1.0569, - "step": 1011 - }, - { - "epoch": 0.09126572575190513, - "grad_norm": 1.5546729312386642, - "learning_rate": 3.960790230657199e-06, - "loss": 1.014, - "step": 1012 - }, - { - "epoch": 0.09135590927537539, - "grad_norm": 1.457988107781909, - "learning_rate": 3.960675033171072e-06, - "loss": 0.9879, - "step": 1013 - }, - { - "epoch": 0.09144609279884565, - "grad_norm": 1.3661595430993916, - "learning_rate": 3.960559668389341e-06, - "loss": 0.9829, - "step": 1014 - }, - { - "epoch": 0.09153627632231591, - "grad_norm": 1.4050855689955848, - "learning_rate": 3.960444136321847e-06, - "loss": 1.0382, - "step": 1015 - }, - { - "epoch": 0.09162645984578617, - "grad_norm": 1.6111075695408155, - "learning_rate": 3.960328436978451e-06, - "loss": 1.125, - "step": 1016 - }, - { - "epoch": 0.09171664336925643, - "grad_norm": 0.7101922793003884, - "learning_rate": 3.960212570369024e-06, - "loss": 0.8416, - "step": 1017 - }, - { - "epoch": 0.09180682689272669, - "grad_norm": 1.5858929256695873, - "learning_rate": 3.9600965365034515e-06, - "loss": 0.967, - "step": 1018 - }, - { - "epoch": 0.09189701041619697, - "grad_norm": 3.2368006064938153, - "learning_rate": 3.959980335391634e-06, - "loss": 1.0938, - "step": 1019 - }, - { - "epoch": 0.09198719393966723, - "grad_norm": 1.4939377191933858, - "learning_rate": 3.959863967043487e-06, - "loss": 0.9851, - "step": 1020 - }, - { - "epoch": 0.09207737746313749, - "grad_norm": 1.4283557797199802, - "learning_rate": 3.9597474314689405e-06, - "loss": 1.0714, - "step": 1021 - }, - { - "epoch": 0.09216756098660775, - "grad_norm": 1.4936659114903559, - "learning_rate": 3.959630728677937e-06, - "loss": 1.0527, - "step": 1022 - }, - { - "epoch": 0.09225774451007801, - "grad_norm": 1.4640619658608618, - "learning_rate": 3.959513858680434e-06, - "loss": 1.0169, - "step": 1023 - }, - { - "epoch": 0.09234792803354827, - "grad_norm": 1.4601621967519103, - "learning_rate": 3.959396821486405e-06, - "loss": 1.1236, - "step": 1024 - }, - { - "epoch": 0.09243811155701853, - "grad_norm": 0.8474869097483596, - "learning_rate": 3.959279617105835e-06, - "loss": 0.9067, - "step": 1025 - }, - { - "epoch": 0.09252829508048879, - "grad_norm": 2.5083897006607367, - "learning_rate": 3.9591622455487235e-06, - "loss": 1.0493, - "step": 1026 - }, - { - "epoch": 0.09261847860395905, - "grad_norm": 1.4769617580352195, - "learning_rate": 3.959044706825087e-06, - "loss": 1.1078, - "step": 1027 - }, - { - "epoch": 0.09270866212742931, - "grad_norm": 1.4267265634946316, - "learning_rate": 3.958927000944954e-06, - "loss": 0.943, - "step": 1028 - }, - { - "epoch": 0.09279884565089958, - "grad_norm": 1.6531947764794328, - "learning_rate": 3.958809127918368e-06, - "loss": 1.1222, - "step": 1029 - }, - { - "epoch": 0.09288902917436984, - "grad_norm": 1.4281396042893004, - "learning_rate": 3.958691087755387e-06, - "loss": 0.9772, - "step": 1030 - }, - { - "epoch": 0.09297921269784011, - "grad_norm": 2.1246814489018964, - "learning_rate": 3.958572880466081e-06, - "loss": 0.909, - "step": 1031 - }, - { - "epoch": 0.09306939622131037, - "grad_norm": 1.468076023731843, - "learning_rate": 3.9584545060605385e-06, - "loss": 1.0223, - "step": 1032 - }, - { - "epoch": 0.09315957974478063, - "grad_norm": 1.570104101216418, - "learning_rate": 3.958335964548859e-06, - "loss": 1.0038, - "step": 1033 - }, - { - "epoch": 0.0932497632682509, - "grad_norm": 1.7501546246473654, - "learning_rate": 3.958217255941156e-06, - "loss": 1.0828, - "step": 1034 - }, - { - "epoch": 0.09333994679172115, - "grad_norm": 1.2278112384404212, - "learning_rate": 3.95809838024756e-06, - "loss": 0.9878, - "step": 1035 - }, - { - "epoch": 0.09343013031519141, - "grad_norm": 1.4500205400261759, - "learning_rate": 3.957979337478212e-06, - "loss": 1.0015, - "step": 1036 - }, - { - "epoch": 0.09352031383866168, - "grad_norm": 1.7240042397033177, - "learning_rate": 3.957860127643272e-06, - "loss": 1.0551, - "step": 1037 - }, - { - "epoch": 0.09361049736213194, - "grad_norm": 1.574365651586384, - "learning_rate": 3.95774075075291e-06, - "loss": 1.0337, - "step": 1038 - }, - { - "epoch": 0.0937006808856022, - "grad_norm": 1.507506502558157, - "learning_rate": 3.957621206817312e-06, - "loss": 1.0898, - "step": 1039 - }, - { - "epoch": 0.09379086440907246, - "grad_norm": 1.519116656466708, - "learning_rate": 3.957501495846679e-06, - "loss": 1.0155, - "step": 1040 - }, - { - "epoch": 0.09388104793254272, - "grad_norm": 0.6791949186319214, - "learning_rate": 3.957381617851225e-06, - "loss": 0.8616, - "step": 1041 - }, - { - "epoch": 0.09397123145601298, - "grad_norm": 1.49981775766195, - "learning_rate": 3.9572615728411776e-06, - "loss": 0.9615, - "step": 1042 - }, - { - "epoch": 0.09406141497948325, - "grad_norm": 8.520835024608889, - "learning_rate": 3.957141360826781e-06, - "loss": 0.9245, - "step": 1043 - }, - { - "epoch": 0.09415159850295352, - "grad_norm": 1.6295526260347795, - "learning_rate": 3.957020981818292e-06, - "loss": 1.0247, - "step": 1044 - }, - { - "epoch": 0.09424178202642378, - "grad_norm": 1.2649692202204619, - "learning_rate": 3.956900435825982e-06, - "loss": 1.0306, - "step": 1045 - }, - { - "epoch": 0.09433196554989404, - "grad_norm": 1.240945446994747, - "learning_rate": 3.9567797228601364e-06, - "loss": 0.9865, - "step": 1046 - }, - { - "epoch": 0.0944221490733643, - "grad_norm": 1.4378795951869987, - "learning_rate": 3.956658842931055e-06, - "loss": 1.1108, - "step": 1047 - }, - { - "epoch": 0.09451233259683456, - "grad_norm": 1.2453419202059934, - "learning_rate": 3.956537796049052e-06, - "loss": 1.0452, - "step": 1048 - }, - { - "epoch": 0.09460251612030482, - "grad_norm": 0.693115062384401, - "learning_rate": 3.956416582224457e-06, - "loss": 0.8976, - "step": 1049 - }, - { - "epoch": 0.09469269964377508, - "grad_norm": 3.1964870243558003, - "learning_rate": 3.956295201467611e-06, - "loss": 1.075, - "step": 1050 - }, - { - "epoch": 0.09478288316724534, - "grad_norm": 1.212857944705537, - "learning_rate": 3.956173653788872e-06, - "loss": 1.0641, - "step": 1051 - }, - { - "epoch": 0.0948730666907156, - "grad_norm": 1.8379918069584231, - "learning_rate": 3.95605193919861e-06, - "loss": 1.0792, - "step": 1052 - }, - { - "epoch": 0.09496325021418586, - "grad_norm": 1.66357937815644, - "learning_rate": 3.955930057707211e-06, - "loss": 1.0202, - "step": 1053 - }, - { - "epoch": 0.09505343373765612, - "grad_norm": 1.7273079484104645, - "learning_rate": 3.955808009325075e-06, - "loss": 1.0531, - "step": 1054 - }, - { - "epoch": 0.0951436172611264, - "grad_norm": 4.473753362304243, - "learning_rate": 3.955685794062615e-06, - "loss": 1.0053, - "step": 1055 - }, - { - "epoch": 0.09523380078459666, - "grad_norm": 1.849020456336062, - "learning_rate": 3.95556341193026e-06, - "loss": 1.1329, - "step": 1056 - }, - { - "epoch": 0.09532398430806692, - "grad_norm": 1.4191826282588456, - "learning_rate": 3.955440862938452e-06, - "loss": 1.1267, - "step": 1057 - }, - { - "epoch": 0.09541416783153718, - "grad_norm": 1.8806052031256304, - "learning_rate": 3.955318147097647e-06, - "loss": 1.0396, - "step": 1058 - }, - { - "epoch": 0.09550435135500744, - "grad_norm": 1.5828456713259285, - "learning_rate": 3.955195264418316e-06, - "loss": 1.0062, - "step": 1059 - }, - { - "epoch": 0.0955945348784777, - "grad_norm": 1.3780907226590147, - "learning_rate": 3.955072214910944e-06, - "loss": 0.8966, - "step": 1060 - }, - { - "epoch": 0.09568471840194796, - "grad_norm": 1.77429615357511, - "learning_rate": 3.954948998586032e-06, - "loss": 0.9637, - "step": 1061 - }, - { - "epoch": 0.09577490192541822, - "grad_norm": 1.3243603925993026, - "learning_rate": 3.954825615454089e-06, - "loss": 1.0418, - "step": 1062 - }, - { - "epoch": 0.09586508544888848, - "grad_norm": 1.6975331331595382, - "learning_rate": 3.954702065525649e-06, - "loss": 1.0372, - "step": 1063 - }, - { - "epoch": 0.09595526897235875, - "grad_norm": 2.1305453928418125, - "learning_rate": 3.954578348811248e-06, - "loss": 1.0655, - "step": 1064 - }, - { - "epoch": 0.096045452495829, - "grad_norm": 1.8498501484983625, - "learning_rate": 3.954454465321447e-06, - "loss": 1.0262, - "step": 1065 - }, - { - "epoch": 0.09613563601929928, - "grad_norm": 1.9001183623539324, - "learning_rate": 3.954330415066813e-06, - "loss": 1.1112, - "step": 1066 - }, - { - "epoch": 0.09622581954276954, - "grad_norm": 1.801637987737279, - "learning_rate": 3.954206198057932e-06, - "loss": 1.0241, - "step": 1067 - }, - { - "epoch": 0.0963160030662398, - "grad_norm": 1.5443897512450055, - "learning_rate": 3.954081814305403e-06, - "loss": 1.0939, - "step": 1068 - }, - { - "epoch": 0.09640618658971006, - "grad_norm": 1.273766504392305, - "learning_rate": 3.953957263819839e-06, - "loss": 0.9718, - "step": 1069 - }, - { - "epoch": 0.09649637011318032, - "grad_norm": 1.4532933291491146, - "learning_rate": 3.953832546611867e-06, - "loss": 0.972, - "step": 1070 - }, - { - "epoch": 0.09658655363665059, - "grad_norm": 1.5742808440956226, - "learning_rate": 3.953707662692129e-06, - "loss": 1.0655, - "step": 1071 - }, - { - "epoch": 0.09667673716012085, - "grad_norm": 1.4704823119219397, - "learning_rate": 3.95358261207128e-06, - "loss": 0.965, - "step": 1072 - }, - { - "epoch": 0.0967669206835911, - "grad_norm": 1.2525934019306915, - "learning_rate": 3.953457394759992e-06, - "loss": 1.0297, - "step": 1073 - }, - { - "epoch": 0.09685710420706137, - "grad_norm": 1.4224383736721748, - "learning_rate": 3.953332010768947e-06, - "loss": 1.0478, - "step": 1074 - }, - { - "epoch": 0.09694728773053163, - "grad_norm": 2.0655219163436036, - "learning_rate": 3.9532064601088436e-06, - "loss": 1.0016, - "step": 1075 - }, - { - "epoch": 0.09703747125400189, - "grad_norm": 2.237082275098594, - "learning_rate": 3.953080742790396e-06, - "loss": 1.1832, - "step": 1076 - }, - { - "epoch": 0.09712765477747215, - "grad_norm": 2.018781216501099, - "learning_rate": 3.95295485882433e-06, - "loss": 0.9503, - "step": 1077 - }, - { - "epoch": 0.09721783830094242, - "grad_norm": 0.6795375921743295, - "learning_rate": 3.952828808221387e-06, - "loss": 0.8855, - "step": 1078 - }, - { - "epoch": 0.09730802182441269, - "grad_norm": 1.8119067174761185, - "learning_rate": 3.9527025909923225e-06, - "loss": 1.0137, - "step": 1079 - }, - { - "epoch": 0.09739820534788295, - "grad_norm": 1.3354871113368387, - "learning_rate": 3.952576207147906e-06, - "loss": 1.1208, - "step": 1080 - }, - { - "epoch": 0.09748838887135321, - "grad_norm": 1.7487389925868655, - "learning_rate": 3.95244965669892e-06, - "loss": 1.0734, - "step": 1081 - }, - { - "epoch": 0.09757857239482347, - "grad_norm": 1.3455190768014706, - "learning_rate": 3.952322939656165e-06, - "loss": 0.9877, - "step": 1082 - }, - { - "epoch": 0.09766875591829373, - "grad_norm": 1.3023605153378273, - "learning_rate": 3.952196056030451e-06, - "loss": 1.0261, - "step": 1083 - }, - { - "epoch": 0.09775893944176399, - "grad_norm": 1.8604280711383707, - "learning_rate": 3.952069005832605e-06, - "loss": 1.0031, - "step": 1084 - }, - { - "epoch": 0.09784912296523425, - "grad_norm": 1.7361417140382738, - "learning_rate": 3.951941789073468e-06, - "loss": 1.0648, - "step": 1085 - }, - { - "epoch": 0.09793930648870451, - "grad_norm": 1.3656203961676678, - "learning_rate": 3.9518144057638955e-06, - "loss": 1.0579, - "step": 1086 - }, - { - "epoch": 0.09802949001217477, - "grad_norm": 1.6116971050071953, - "learning_rate": 3.951686855914755e-06, - "loss": 1.0262, - "step": 1087 - }, - { - "epoch": 0.09811967353564503, - "grad_norm": 1.9006215158146462, - "learning_rate": 3.9515591395369305e-06, - "loss": 1.0577, - "step": 1088 - }, - { - "epoch": 0.0982098570591153, - "grad_norm": 1.3753876573118367, - "learning_rate": 3.95143125664132e-06, - "loss": 1.0174, - "step": 1089 - }, - { - "epoch": 0.09830004058258557, - "grad_norm": 2.262137840327724, - "learning_rate": 3.951303207238833e-06, - "loss": 1.0589, - "step": 1090 - }, - { - "epoch": 0.09839022410605583, - "grad_norm": 1.3725936380348136, - "learning_rate": 3.951174991340399e-06, - "loss": 1.0082, - "step": 1091 - }, - { - "epoch": 0.09848040762952609, - "grad_norm": 1.3566395213405207, - "learning_rate": 3.9510466089569546e-06, - "loss": 0.9928, - "step": 1092 - }, - { - "epoch": 0.09857059115299635, - "grad_norm": 1.4464894219706292, - "learning_rate": 3.950918060099456e-06, - "loss": 1.0004, - "step": 1093 - }, - { - "epoch": 0.09866077467646661, - "grad_norm": 1.4199514267901419, - "learning_rate": 3.950789344778871e-06, - "loss": 1.0888, - "step": 1094 - }, - { - "epoch": 0.09875095819993687, - "grad_norm": 1.6271939505838793, - "learning_rate": 3.950660463006184e-06, - "loss": 0.9748, - "step": 1095 - }, - { - "epoch": 0.09884114172340713, - "grad_norm": 1.3224660251053648, - "learning_rate": 3.950531414792389e-06, - "loss": 1.1104, - "step": 1096 - }, - { - "epoch": 0.0989313252468774, - "grad_norm": 1.4492989489633208, - "learning_rate": 3.950402200148498e-06, - "loss": 1.0405, - "step": 1097 - }, - { - "epoch": 0.09902150877034765, - "grad_norm": 1.3613138074956606, - "learning_rate": 3.950272819085538e-06, - "loss": 1.0108, - "step": 1098 - }, - { - "epoch": 0.09911169229381792, - "grad_norm": 2.1311850574253097, - "learning_rate": 3.9501432716145474e-06, - "loss": 1.1209, - "step": 1099 - }, - { - "epoch": 0.09920187581728818, - "grad_norm": 1.5225859780926014, - "learning_rate": 3.950013557746579e-06, - "loss": 0.9327, - "step": 1100 - }, - { - "epoch": 0.09929205934075844, - "grad_norm": 1.3618073461428852, - "learning_rate": 3.949883677492703e-06, - "loss": 1.0746, - "step": 1101 - }, - { - "epoch": 0.09938224286422871, - "grad_norm": 1.546949596725624, - "learning_rate": 3.9497536308639994e-06, - "loss": 1.049, - "step": 1102 - }, - { - "epoch": 0.09947242638769897, - "grad_norm": 1.5139502484702527, - "learning_rate": 3.949623417871565e-06, - "loss": 1.0803, - "step": 1103 - }, - { - "epoch": 0.09956260991116923, - "grad_norm": 1.3858145331328826, - "learning_rate": 3.949493038526511e-06, - "loss": 1.0205, - "step": 1104 - }, - { - "epoch": 0.0996527934346395, - "grad_norm": 1.4516429110983744, - "learning_rate": 3.949362492839961e-06, - "loss": 0.99, - "step": 1105 - }, - { - "epoch": 0.09974297695810976, - "grad_norm": 1.5061349975243994, - "learning_rate": 3.949231780823054e-06, - "loss": 0.9936, - "step": 1106 - }, - { - "epoch": 0.09983316048158002, - "grad_norm": 1.56207200864423, - "learning_rate": 3.949100902486945e-06, - "loss": 0.9777, - "step": 1107 - }, - { - "epoch": 0.09992334400505028, - "grad_norm": 2.4296939763905847, - "learning_rate": 3.948969857842799e-06, - "loss": 1.0025, - "step": 1108 - }, - { - "epoch": 0.10001352752852054, - "grad_norm": 2.1933717667865666, - "learning_rate": 3.948838646901798e-06, - "loss": 1.0214, - "step": 1109 - }, - { - "epoch": 0.1001037110519908, - "grad_norm": 1.5532733101906158, - "learning_rate": 3.948707269675138e-06, - "loss": 1.0194, - "step": 1110 - }, - { - "epoch": 0.10019389457546106, - "grad_norm": 1.486948212291944, - "learning_rate": 3.948575726174028e-06, - "loss": 1.0202, - "step": 1111 - }, - { - "epoch": 0.10028407809893132, - "grad_norm": 1.663590700131155, - "learning_rate": 3.9484440164096935e-06, - "loss": 1.1032, - "step": 1112 - }, - { - "epoch": 0.10037426162240158, - "grad_norm": 1.9809408434027087, - "learning_rate": 3.948312140393372e-06, - "loss": 1.0522, - "step": 1113 - }, - { - "epoch": 0.10046444514587186, - "grad_norm": 2.019680703866046, - "learning_rate": 3.948180098136316e-06, - "loss": 1.0078, - "step": 1114 - }, - { - "epoch": 0.10055462866934212, - "grad_norm": 2.167680710911833, - "learning_rate": 3.948047889649791e-06, - "loss": 1.0023, - "step": 1115 - }, - { - "epoch": 0.10064481219281238, - "grad_norm": 1.8711034977605234, - "learning_rate": 3.947915514945079e-06, - "loss": 1.0168, - "step": 1116 - }, - { - "epoch": 0.10073499571628264, - "grad_norm": 1.6997532497172396, - "learning_rate": 3.947782974033474e-06, - "loss": 1.0997, - "step": 1117 - }, - { - "epoch": 0.1008251792397529, - "grad_norm": 1.5821361365693762, - "learning_rate": 3.9476502669262866e-06, - "loss": 0.9959, - "step": 1118 - }, - { - "epoch": 0.10091536276322316, - "grad_norm": 1.4855312461943706, - "learning_rate": 3.947517393634839e-06, - "loss": 1.0075, - "step": 1119 - }, - { - "epoch": 0.10100554628669342, - "grad_norm": 1.4669299922430974, - "learning_rate": 3.947384354170469e-06, - "loss": 1.0748, - "step": 1120 - }, - { - "epoch": 0.10109572981016368, - "grad_norm": 1.430491487165053, - "learning_rate": 3.947251148544528e-06, - "loss": 1.0615, - "step": 1121 - }, - { - "epoch": 0.10118591333363394, - "grad_norm": 1.7337575277262705, - "learning_rate": 3.947117776768382e-06, - "loss": 1.066, - "step": 1122 - }, - { - "epoch": 0.1012760968571042, - "grad_norm": 1.3680500906089073, - "learning_rate": 3.9469842388534105e-06, - "loss": 1.0134, - "step": 1123 - }, - { - "epoch": 0.10136628038057446, - "grad_norm": 1.4371433644942537, - "learning_rate": 3.946850534811009e-06, - "loss": 1.0425, - "step": 1124 - }, - { - "epoch": 0.10145646390404472, - "grad_norm": 1.3405909553953455, - "learning_rate": 3.946716664652585e-06, - "loss": 1.0162, - "step": 1125 - }, - { - "epoch": 0.101546647427515, - "grad_norm": 2.5356290637917662, - "learning_rate": 3.94658262838956e-06, - "loss": 0.9393, - "step": 1126 - }, - { - "epoch": 0.10163683095098526, - "grad_norm": 1.3350124167818556, - "learning_rate": 3.946448426033373e-06, - "loss": 0.9521, - "step": 1127 - }, - { - "epoch": 0.10172701447445552, - "grad_norm": 1.327288016080389, - "learning_rate": 3.946314057595473e-06, - "loss": 1.0635, - "step": 1128 - }, - { - "epoch": 0.10181719799792578, - "grad_norm": 1.8530551350435724, - "learning_rate": 3.946179523087326e-06, - "loss": 1.0919, - "step": 1129 - }, - { - "epoch": 0.10190738152139604, - "grad_norm": 1.4927386319620897, - "learning_rate": 3.9460448225204104e-06, - "loss": 0.9492, - "step": 1130 - }, - { - "epoch": 0.1019975650448663, - "grad_norm": 1.6538591222305794, - "learning_rate": 3.945909955906221e-06, - "loss": 1.0298, - "step": 1131 - }, - { - "epoch": 0.10208774856833656, - "grad_norm": 1.6235010863436492, - "learning_rate": 3.945774923256264e-06, - "loss": 1.0295, - "step": 1132 - }, - { - "epoch": 0.10217793209180683, - "grad_norm": 3.216029230184237, - "learning_rate": 3.945639724582062e-06, - "loss": 1.1014, - "step": 1133 - }, - { - "epoch": 0.10226811561527709, - "grad_norm": 1.9670085774105224, - "learning_rate": 3.94550435989515e-06, - "loss": 0.9957, - "step": 1134 - }, - { - "epoch": 0.10235829913874735, - "grad_norm": 1.40900456535948, - "learning_rate": 3.945368829207079e-06, - "loss": 1.0595, - "step": 1135 - }, - { - "epoch": 0.10244848266221761, - "grad_norm": 1.6139249464213863, - "learning_rate": 3.945233132529414e-06, - "loss": 0.9871, - "step": 1136 - }, - { - "epoch": 0.10253866618568787, - "grad_norm": 1.5900619319634213, - "learning_rate": 3.9450972698737304e-06, - "loss": 1.0277, - "step": 1137 - }, - { - "epoch": 0.10262884970915814, - "grad_norm": 1.3320931339151971, - "learning_rate": 3.944961241251623e-06, - "loss": 1.013, - "step": 1138 - }, - { - "epoch": 0.1027190332326284, - "grad_norm": 1.1992188494057878, - "learning_rate": 3.9448250466746985e-06, - "loss": 0.9993, - "step": 1139 - }, - { - "epoch": 0.10280921675609866, - "grad_norm": 1.557577466308773, - "learning_rate": 3.944688686154578e-06, - "loss": 1.059, - "step": 1140 - }, - { - "epoch": 0.10289940027956893, - "grad_norm": 2.3293464068107954, - "learning_rate": 3.944552159702894e-06, - "loss": 0.9927, - "step": 1141 - }, - { - "epoch": 0.10298958380303919, - "grad_norm": 1.4923030149475778, - "learning_rate": 3.944415467331299e-06, - "loss": 1.0216, - "step": 1142 - }, - { - "epoch": 0.10307976732650945, - "grad_norm": 1.2525814580412027, - "learning_rate": 3.944278609051455e-06, - "loss": 0.9815, - "step": 1143 - }, - { - "epoch": 0.10316995084997971, - "grad_norm": 1.3508542801732955, - "learning_rate": 3.944141584875039e-06, - "loss": 0.9621, - "step": 1144 - }, - { - "epoch": 0.10326013437344997, - "grad_norm": 1.3304255139711516, - "learning_rate": 3.944004394813743e-06, - "loss": 1.0336, - "step": 1145 - }, - { - "epoch": 0.10335031789692023, - "grad_norm": 1.3040213797489932, - "learning_rate": 3.943867038879273e-06, - "loss": 1.0582, - "step": 1146 - }, - { - "epoch": 0.10344050142039049, - "grad_norm": 1.5179277996639615, - "learning_rate": 3.943729517083349e-06, - "loss": 1.0653, - "step": 1147 - }, - { - "epoch": 0.10353068494386075, - "grad_norm": 1.822519887703179, - "learning_rate": 3.943591829437705e-06, - "loss": 1.0676, - "step": 1148 - }, - { - "epoch": 0.10362086846733101, - "grad_norm": 1.555428869712383, - "learning_rate": 3.9434539759540895e-06, - "loss": 0.975, - "step": 1149 - }, - { - "epoch": 0.10371105199080129, - "grad_norm": 1.6640917278463119, - "learning_rate": 3.943315956644264e-06, - "loss": 0.9674, - "step": 1150 - }, - { - "epoch": 0.10380123551427155, - "grad_norm": 1.6124174377832816, - "learning_rate": 3.943177771520006e-06, - "loss": 1.1045, - "step": 1151 - }, - { - "epoch": 0.10389141903774181, - "grad_norm": 1.7012352915176379, - "learning_rate": 3.9430394205931065e-06, - "loss": 1.0842, - "step": 1152 - }, - { - "epoch": 0.10398160256121207, - "grad_norm": 1.4211569849072736, - "learning_rate": 3.942900903875369e-06, - "loss": 0.908, - "step": 1153 - }, - { - "epoch": 0.10407178608468233, - "grad_norm": 1.4168309883110535, - "learning_rate": 3.942762221378614e-06, - "loss": 1.0388, - "step": 1154 - }, - { - "epoch": 0.10416196960815259, - "grad_norm": 1.297674644932366, - "learning_rate": 3.942623373114673e-06, - "loss": 1.0579, - "step": 1155 - }, - { - "epoch": 0.10425215313162285, - "grad_norm": 1.4891120414057974, - "learning_rate": 3.942484359095396e-06, - "loss": 1.0177, - "step": 1156 - }, - { - "epoch": 0.10434233665509311, - "grad_norm": 1.5834656041092185, - "learning_rate": 3.942345179332642e-06, - "loss": 0.9558, - "step": 1157 - }, - { - "epoch": 0.10443252017856337, - "grad_norm": 1.2714046330211382, - "learning_rate": 3.942205833838287e-06, - "loss": 0.9831, - "step": 1158 - }, - { - "epoch": 0.10452270370203363, - "grad_norm": 1.7162238543714508, - "learning_rate": 3.9420663226242204e-06, - "loss": 1.1348, - "step": 1159 - }, - { - "epoch": 0.1046128872255039, - "grad_norm": 1.822814924034784, - "learning_rate": 3.941926645702348e-06, - "loss": 1.0738, - "step": 1160 - }, - { - "epoch": 0.10470307074897416, - "grad_norm": 1.8901195638671913, - "learning_rate": 3.941786803084586e-06, - "loss": 1.0224, - "step": 1161 - }, - { - "epoch": 0.10479325427244443, - "grad_norm": 1.4306950588579068, - "learning_rate": 3.941646794782867e-06, - "loss": 0.9816, - "step": 1162 - }, - { - "epoch": 0.10488343779591469, - "grad_norm": 1.506510829898005, - "learning_rate": 3.941506620809137e-06, - "loss": 1.0451, - "step": 1163 - }, - { - "epoch": 0.10497362131938495, - "grad_norm": 1.5585884725151486, - "learning_rate": 3.941366281175357e-06, - "loss": 1.0235, - "step": 1164 - }, - { - "epoch": 0.10506380484285521, - "grad_norm": 1.1790498437513097, - "learning_rate": 3.941225775893502e-06, - "loss": 0.9702, - "step": 1165 - }, - { - "epoch": 0.10515398836632547, - "grad_norm": 1.4300829231121281, - "learning_rate": 3.941085104975559e-06, - "loss": 1.007, - "step": 1166 - }, - { - "epoch": 0.10524417188979573, - "grad_norm": 0.7346029130861811, - "learning_rate": 3.9409442684335325e-06, - "loss": 0.9017, - "step": 1167 - }, - { - "epoch": 0.105334355413266, - "grad_norm": 1.6089642194914968, - "learning_rate": 3.940803266279438e-06, - "loss": 1.027, - "step": 1168 - }, - { - "epoch": 0.10542453893673626, - "grad_norm": 1.5855822235700727, - "learning_rate": 3.9406620985253076e-06, - "loss": 0.9735, - "step": 1169 - }, - { - "epoch": 0.10551472246020652, - "grad_norm": 1.3735487388375927, - "learning_rate": 3.940520765183187e-06, - "loss": 1.0105, - "step": 1170 - }, - { - "epoch": 0.10560490598367678, - "grad_norm": 1.7062660440102841, - "learning_rate": 3.940379266265134e-06, - "loss": 1.0717, - "step": 1171 - }, - { - "epoch": 0.10569508950714704, - "grad_norm": 3.155668299486154, - "learning_rate": 3.940237601783223e-06, - "loss": 0.9974, - "step": 1172 - }, - { - "epoch": 0.1057852730306173, - "grad_norm": 1.4124660437224268, - "learning_rate": 3.940095771749542e-06, - "loss": 1.0284, - "step": 1173 - }, - { - "epoch": 0.10587545655408757, - "grad_norm": 1.5781493232287847, - "learning_rate": 3.939953776176192e-06, - "loss": 1.0803, - "step": 1174 - }, - { - "epoch": 0.10596564007755783, - "grad_norm": 1.0966309343814675, - "learning_rate": 3.939811615075288e-06, - "loss": 1.0546, - "step": 1175 - }, - { - "epoch": 0.1060558236010281, - "grad_norm": 1.8576650370087529, - "learning_rate": 3.9396692884589616e-06, - "loss": 1.0905, - "step": 1176 - }, - { - "epoch": 0.10614600712449836, - "grad_norm": 1.5677197816078003, - "learning_rate": 3.9395267963393565e-06, - "loss": 1.0097, - "step": 1177 - }, - { - "epoch": 0.10623619064796862, - "grad_norm": 1.6827405993899371, - "learning_rate": 3.939384138728631e-06, - "loss": 1.064, - "step": 1178 - }, - { - "epoch": 0.10632637417143888, - "grad_norm": 1.4798622197378202, - "learning_rate": 3.939241315638956e-06, - "loss": 1.0225, - "step": 1179 - }, - { - "epoch": 0.10641655769490914, - "grad_norm": 1.3532089172137127, - "learning_rate": 3.93909832708252e-06, - "loss": 1.02, - "step": 1180 - }, - { - "epoch": 0.1065067412183794, - "grad_norm": 6.81628424855003, - "learning_rate": 3.938955173071523e-06, - "loss": 1.0333, - "step": 1181 - }, - { - "epoch": 0.10659692474184966, - "grad_norm": 0.6395179673713791, - "learning_rate": 3.938811853618179e-06, - "loss": 0.8779, - "step": 1182 - }, - { - "epoch": 0.10668710826531992, - "grad_norm": 2.028225921800148, - "learning_rate": 3.938668368734717e-06, - "loss": 1.0714, - "step": 1183 - }, - { - "epoch": 0.10677729178879018, - "grad_norm": 1.4023148897649407, - "learning_rate": 3.93852471843338e-06, - "loss": 0.9049, - "step": 1184 - }, - { - "epoch": 0.10686747531226044, - "grad_norm": 1.5010354124208498, - "learning_rate": 3.9383809027264254e-06, - "loss": 1.0081, - "step": 1185 - }, - { - "epoch": 0.10695765883573072, - "grad_norm": 1.2940018321235103, - "learning_rate": 3.938236921626124e-06, - "loss": 1.0762, - "step": 1186 - }, - { - "epoch": 0.10704784235920098, - "grad_norm": 2.0268997558813426, - "learning_rate": 3.938092775144761e-06, - "loss": 1.0349, - "step": 1187 - }, - { - "epoch": 0.10713802588267124, - "grad_norm": 1.3490803146688268, - "learning_rate": 3.9379484632946355e-06, - "loss": 0.9502, - "step": 1188 - }, - { - "epoch": 0.1072282094061415, - "grad_norm": 1.4628503689492014, - "learning_rate": 3.937803986088062e-06, - "loss": 1.0859, - "step": 1189 - }, - { - "epoch": 0.10731839292961176, - "grad_norm": 1.648122820717127, - "learning_rate": 3.937659343537367e-06, - "loss": 0.9818, - "step": 1190 - }, - { - "epoch": 0.10740857645308202, - "grad_norm": 1.7885819501863336, - "learning_rate": 3.937514535654893e-06, - "loss": 1.0943, - "step": 1191 - }, - { - "epoch": 0.10749875997655228, - "grad_norm": 2.0078034753262646, - "learning_rate": 3.937369562452996e-06, - "loss": 1.0342, - "step": 1192 - }, - { - "epoch": 0.10758894350002254, - "grad_norm": 1.7971871809742723, - "learning_rate": 3.937224423944044e-06, - "loss": 1.0865, - "step": 1193 - }, - { - "epoch": 0.1076791270234928, - "grad_norm": 1.5770205419311607, - "learning_rate": 3.937079120140423e-06, - "loss": 0.9633, - "step": 1194 - }, - { - "epoch": 0.10776931054696307, - "grad_norm": 1.3992223827968788, - "learning_rate": 3.936933651054531e-06, - "loss": 1.0691, - "step": 1195 - }, - { - "epoch": 0.10785949407043333, - "grad_norm": 1.4073291876153737, - "learning_rate": 3.936788016698779e-06, - "loss": 1.1432, - "step": 1196 - }, - { - "epoch": 0.1079496775939036, - "grad_norm": 1.3863661196810089, - "learning_rate": 3.936642217085594e-06, - "loss": 1.0574, - "step": 1197 - }, - { - "epoch": 0.10803986111737386, - "grad_norm": 1.7072072931294775, - "learning_rate": 3.936496252227417e-06, - "loss": 0.9425, - "step": 1198 - }, - { - "epoch": 0.10813004464084412, - "grad_norm": 1.5170213225292593, - "learning_rate": 3.936350122136703e-06, - "loss": 1.0113, - "step": 1199 - }, - { - "epoch": 0.10822022816431438, - "grad_norm": 0.71143801331068, - "learning_rate": 3.936203826825919e-06, - "loss": 0.9067, - "step": 1200 - }, - { - "epoch": 0.10831041168778464, - "grad_norm": 1.8591121199246385, - "learning_rate": 3.9360573663075475e-06, - "loss": 0.9913, - "step": 1201 - }, - { - "epoch": 0.1084005952112549, - "grad_norm": 2.2199354631476953, - "learning_rate": 3.935910740594087e-06, - "loss": 1.0688, - "step": 1202 - }, - { - "epoch": 0.10849077873472517, - "grad_norm": 1.4704900944443273, - "learning_rate": 3.935763949698047e-06, - "loss": 1.0305, - "step": 1203 - }, - { - "epoch": 0.10858096225819543, - "grad_norm": 1.6403009003723088, - "learning_rate": 3.935616993631954e-06, - "loss": 1.0967, - "step": 1204 - }, - { - "epoch": 0.10867114578166569, - "grad_norm": 2.0024464664613886, - "learning_rate": 3.935469872408345e-06, - "loss": 1.0225, - "step": 1205 - }, - { - "epoch": 0.10876132930513595, - "grad_norm": 1.9212300528767137, - "learning_rate": 3.935322586039776e-06, - "loss": 1.0612, - "step": 1206 - }, - { - "epoch": 0.10885151282860621, - "grad_norm": 1.3486579599874144, - "learning_rate": 3.935175134538811e-06, - "loss": 0.9686, - "step": 1207 - }, - { - "epoch": 0.10894169635207647, - "grad_norm": 2.1947626803282003, - "learning_rate": 3.935027517918034e-06, - "loss": 0.9658, - "step": 1208 - }, - { - "epoch": 0.10903187987554674, - "grad_norm": 1.7338649927796124, - "learning_rate": 3.93487973619004e-06, - "loss": 1.0557, - "step": 1209 - }, - { - "epoch": 0.109122063399017, - "grad_norm": 1.8165882962789037, - "learning_rate": 3.934731789367438e-06, - "loss": 1.0494, - "step": 1210 - }, - { - "epoch": 0.10921224692248727, - "grad_norm": 0.79612539600968, - "learning_rate": 3.9345836774628505e-06, - "loss": 0.8924, - "step": 1211 - }, - { - "epoch": 0.10930243044595753, - "grad_norm": 1.466019628952824, - "learning_rate": 3.934435400488917e-06, - "loss": 1.0171, - "step": 1212 - }, - { - "epoch": 0.10939261396942779, - "grad_norm": 1.6024699547818029, - "learning_rate": 3.934286958458289e-06, - "loss": 1.0618, - "step": 1213 - }, - { - "epoch": 0.10948279749289805, - "grad_norm": 1.8637679961760119, - "learning_rate": 3.934138351383632e-06, - "loss": 1.0923, - "step": 1214 - }, - { - "epoch": 0.10957298101636831, - "grad_norm": 2.008102217971552, - "learning_rate": 3.933989579277626e-06, - "loss": 0.9606, - "step": 1215 - }, - { - "epoch": 0.10966316453983857, - "grad_norm": 1.301089780033414, - "learning_rate": 3.933840642152966e-06, - "loss": 1.0219, - "step": 1216 - }, - { - "epoch": 0.10975334806330883, - "grad_norm": 2.147974692694619, - "learning_rate": 3.933691540022359e-06, - "loss": 1.0633, - "step": 1217 - }, - { - "epoch": 0.10984353158677909, - "grad_norm": 2.2257373016828144, - "learning_rate": 3.933542272898527e-06, - "loss": 0.9968, - "step": 1218 - }, - { - "epoch": 0.10993371511024935, - "grad_norm": 0.7708817844800576, - "learning_rate": 3.933392840794207e-06, - "loss": 0.8236, - "step": 1219 - }, - { - "epoch": 0.11002389863371961, - "grad_norm": 1.6713469197167754, - "learning_rate": 3.93324324372215e-06, - "loss": 1.1319, - "step": 1220 - }, - { - "epoch": 0.11011408215718989, - "grad_norm": 1.5116514836606978, - "learning_rate": 3.9330934816951185e-06, - "loss": 1.0597, - "step": 1221 - }, - { - "epoch": 0.11020426568066015, - "grad_norm": 1.6345951260308167, - "learning_rate": 3.932943554725893e-06, - "loss": 1.05, - "step": 1222 - }, - { - "epoch": 0.11029444920413041, - "grad_norm": 0.870906895324606, - "learning_rate": 3.932793462827265e-06, - "loss": 0.9129, - "step": 1223 - }, - { - "epoch": 0.11038463272760067, - "grad_norm": 1.5250595456364373, - "learning_rate": 3.932643206012041e-06, - "loss": 1.0871, - "step": 1224 - }, - { - "epoch": 0.11047481625107093, - "grad_norm": 1.82362960679507, - "learning_rate": 3.932492784293043e-06, - "loss": 0.9594, - "step": 1225 - }, - { - "epoch": 0.11056499977454119, - "grad_norm": 1.4829444818477446, - "learning_rate": 3.932342197683104e-06, - "loss": 1.0285, - "step": 1226 - }, - { - "epoch": 0.11065518329801145, - "grad_norm": 1.87843656319998, - "learning_rate": 3.932191446195075e-06, - "loss": 0.8969, - "step": 1227 - }, - { - "epoch": 0.11074536682148171, - "grad_norm": 1.6119126987671062, - "learning_rate": 3.9320405298418175e-06, - "loss": 1.0289, - "step": 1228 - }, - { - "epoch": 0.11083555034495197, - "grad_norm": 1.4618532299363176, - "learning_rate": 3.9318894486362076e-06, - "loss": 0.9463, - "step": 1229 - }, - { - "epoch": 0.11092573386842224, - "grad_norm": 1.6173969814977573, - "learning_rate": 3.9317382025911395e-06, - "loss": 1.1076, - "step": 1230 - }, - { - "epoch": 0.1110159173918925, - "grad_norm": 1.5572626457784864, - "learning_rate": 3.9315867917195145e-06, - "loss": 1.1254, - "step": 1231 - }, - { - "epoch": 0.11110610091536276, - "grad_norm": 1.3386974268829768, - "learning_rate": 3.931435216034256e-06, - "loss": 1.037, - "step": 1232 - }, - { - "epoch": 0.11119628443883303, - "grad_norm": 1.812114937755864, - "learning_rate": 3.931283475548293e-06, - "loss": 1.0373, - "step": 1233 - }, - { - "epoch": 0.11128646796230329, - "grad_norm": 1.2432280686555681, - "learning_rate": 3.931131570274576e-06, - "loss": 1.0304, - "step": 1234 - }, - { - "epoch": 0.11137665148577355, - "grad_norm": 1.4115179783604823, - "learning_rate": 3.930979500226065e-06, - "loss": 1.0551, - "step": 1235 - }, - { - "epoch": 0.11146683500924381, - "grad_norm": 1.8468759191417181, - "learning_rate": 3.930827265415736e-06, - "loss": 0.8697, - "step": 1236 - }, - { - "epoch": 0.11155701853271408, - "grad_norm": 1.5617093183758393, - "learning_rate": 3.930674865856578e-06, - "loss": 1.0394, - "step": 1237 - }, - { - "epoch": 0.11164720205618434, - "grad_norm": 2.007115105658687, - "learning_rate": 3.930522301561595e-06, - "loss": 1.0648, - "step": 1238 - }, - { - "epoch": 0.1117373855796546, - "grad_norm": 1.747338519077885, - "learning_rate": 3.930369572543804e-06, - "loss": 1.079, - "step": 1239 - }, - { - "epoch": 0.11182756910312486, - "grad_norm": 1.298498263813631, - "learning_rate": 3.930216678816237e-06, - "loss": 0.9687, - "step": 1240 - }, - { - "epoch": 0.11191775262659512, - "grad_norm": 0.6686787094906129, - "learning_rate": 3.930063620391941e-06, - "loss": 0.8654, - "step": 1241 - }, - { - "epoch": 0.11200793615006538, - "grad_norm": 1.7227704276309075, - "learning_rate": 3.9299103972839735e-06, - "loss": 1.0413, - "step": 1242 - }, - { - "epoch": 0.11209811967353564, - "grad_norm": 0.6608673870152524, - "learning_rate": 3.92975700950541e-06, - "loss": 0.8404, - "step": 1243 - }, - { - "epoch": 0.1121883031970059, - "grad_norm": 1.640041211122171, - "learning_rate": 3.929603457069338e-06, - "loss": 1.0619, - "step": 1244 - }, - { - "epoch": 0.11227848672047618, - "grad_norm": 2.0734458388482113, - "learning_rate": 3.929449739988859e-06, - "loss": 1.0031, - "step": 1245 - }, - { - "epoch": 0.11236867024394644, - "grad_norm": 1.2563998896818893, - "learning_rate": 3.929295858277089e-06, - "loss": 1.0306, - "step": 1246 - }, - { - "epoch": 0.1124588537674167, - "grad_norm": 1.6160443084657172, - "learning_rate": 3.9291418119471585e-06, - "loss": 1.0261, - "step": 1247 - }, - { - "epoch": 0.11254903729088696, - "grad_norm": 1.5806740296083877, - "learning_rate": 3.928987601012212e-06, - "loss": 1.0598, - "step": 1248 - }, - { - "epoch": 0.11263922081435722, - "grad_norm": 1.8277485125606987, - "learning_rate": 3.928833225485407e-06, - "loss": 1.0136, - "step": 1249 - }, - { - "epoch": 0.11272940433782748, - "grad_norm": 1.4767566533078458, - "learning_rate": 3.928678685379915e-06, - "loss": 1.0233, - "step": 1250 - }, - { - "epoch": 0.11281958786129774, - "grad_norm": 1.5368116931512132, - "learning_rate": 3.928523980708924e-06, - "loss": 1.082, - "step": 1251 - }, - { - "epoch": 0.112909771384768, - "grad_norm": 2.1095194449241714, - "learning_rate": 3.928369111485632e-06, - "loss": 0.9919, - "step": 1252 - }, - { - "epoch": 0.11299995490823826, - "grad_norm": 2.0009429616047787, - "learning_rate": 3.928214077723255e-06, - "loss": 1.023, - "step": 1253 - }, - { - "epoch": 0.11309013843170852, - "grad_norm": 1.8231441101600094, - "learning_rate": 3.928058879435021e-06, - "loss": 1.0166, - "step": 1254 - }, - { - "epoch": 0.11318032195517878, - "grad_norm": 2.416316292016669, - "learning_rate": 3.9279035166341725e-06, - "loss": 1.0043, - "step": 1255 - }, - { - "epoch": 0.11327050547864904, - "grad_norm": 1.6301727128529875, - "learning_rate": 3.927747989333965e-06, - "loss": 1.0155, - "step": 1256 - }, - { - "epoch": 0.11336068900211932, - "grad_norm": 2.6659412191997998, - "learning_rate": 3.927592297547669e-06, - "loss": 1.0015, - "step": 1257 - }, - { - "epoch": 0.11345087252558958, - "grad_norm": 1.756466159211153, - "learning_rate": 3.927436441288571e-06, - "loss": 1.0529, - "step": 1258 - }, - { - "epoch": 0.11354105604905984, - "grad_norm": 1.4442124588022593, - "learning_rate": 3.927280420569968e-06, - "loss": 1.0496, - "step": 1259 - }, - { - "epoch": 0.1136312395725301, - "grad_norm": 1.3323027034935668, - "learning_rate": 3.927124235405171e-06, - "loss": 1.0629, - "step": 1260 - }, - { - "epoch": 0.11372142309600036, - "grad_norm": 1.5976646068699532, - "learning_rate": 3.92696788580751e-06, - "loss": 0.8824, - "step": 1261 - }, - { - "epoch": 0.11381160661947062, - "grad_norm": 1.4873361865970516, - "learning_rate": 3.9268113717903225e-06, - "loss": 0.8628, - "step": 1262 - }, - { - "epoch": 0.11390179014294088, - "grad_norm": 1.56723829892589, - "learning_rate": 3.926654693366965e-06, - "loss": 1.0915, - "step": 1263 - }, - { - "epoch": 0.11399197366641114, - "grad_norm": 1.9703429907209997, - "learning_rate": 3.926497850550805e-06, - "loss": 1.0359, - "step": 1264 - }, - { - "epoch": 0.1140821571898814, - "grad_norm": 1.9665118619908781, - "learning_rate": 3.926340843355226e-06, - "loss": 1.0366, - "step": 1265 - }, - { - "epoch": 0.11417234071335167, - "grad_norm": 1.8769158430098385, - "learning_rate": 3.926183671793625e-06, - "loss": 1.0512, - "step": 1266 - }, - { - "epoch": 0.11426252423682193, - "grad_norm": 1.7022387097091152, - "learning_rate": 3.926026335879412e-06, - "loss": 1.0032, - "step": 1267 - }, - { - "epoch": 0.11435270776029219, - "grad_norm": 0.6170924391150888, - "learning_rate": 3.925868835626012e-06, - "loss": 0.798, - "step": 1268 - }, - { - "epoch": 0.11444289128376246, - "grad_norm": 1.7264053480339474, - "learning_rate": 3.925711171046864e-06, - "loss": 1.0392, - "step": 1269 - }, - { - "epoch": 0.11453307480723272, - "grad_norm": 1.5855882382219364, - "learning_rate": 3.925553342155421e-06, - "loss": 0.985, - "step": 1270 - }, - { - "epoch": 0.11462325833070298, - "grad_norm": 0.6937725501648422, - "learning_rate": 3.9253953489651485e-06, - "loss": 0.9035, - "step": 1271 - }, - { - "epoch": 0.11471344185417325, - "grad_norm": 1.1522160863472837, - "learning_rate": 3.925237191489529e-06, - "loss": 0.8557, - "step": 1272 - }, - { - "epoch": 0.1148036253776435, - "grad_norm": 1.4399422341257369, - "learning_rate": 3.925078869742056e-06, - "loss": 0.9991, - "step": 1273 - }, - { - "epoch": 0.11489380890111377, - "grad_norm": 1.4172154466929052, - "learning_rate": 3.92492038373624e-06, - "loss": 0.9684, - "step": 1274 - }, - { - "epoch": 0.11498399242458403, - "grad_norm": 1.4285165963549045, - "learning_rate": 3.924761733485602e-06, - "loss": 1.0748, - "step": 1275 - }, - { - "epoch": 0.11507417594805429, - "grad_norm": 1.4305597364905698, - "learning_rate": 3.92460291900368e-06, - "loss": 1.0241, - "step": 1276 - }, - { - "epoch": 0.11516435947152455, - "grad_norm": 1.3809055174978193, - "learning_rate": 3.924443940304025e-06, - "loss": 1.0475, - "step": 1277 - }, - { - "epoch": 0.11525454299499481, - "grad_norm": 1.5885557914064237, - "learning_rate": 3.924284797400202e-06, - "loss": 1.0726, - "step": 1278 - }, - { - "epoch": 0.11534472651846507, - "grad_norm": 1.869840006792286, - "learning_rate": 3.924125490305789e-06, - "loss": 1.0518, - "step": 1279 - }, - { - "epoch": 0.11543491004193533, - "grad_norm": 1.3635089323438783, - "learning_rate": 3.923966019034381e-06, - "loss": 1.0071, - "step": 1280 - }, - { - "epoch": 0.1155250935654056, - "grad_norm": 2.79481532802132, - "learning_rate": 3.923806383599583e-06, - "loss": 1.0352, - "step": 1281 - }, - { - "epoch": 0.11561527708887587, - "grad_norm": 1.6741628946376073, - "learning_rate": 3.923646584015017e-06, - "loss": 0.9968, - "step": 1282 - }, - { - "epoch": 0.11570546061234613, - "grad_norm": 1.6700241682165466, - "learning_rate": 3.923486620294316e-06, - "loss": 1.0267, - "step": 1283 - }, - { - "epoch": 0.11579564413581639, - "grad_norm": 2.3353223724340326, - "learning_rate": 3.923326492451132e-06, - "loss": 1.0292, - "step": 1284 - }, - { - "epoch": 0.11588582765928665, - "grad_norm": 1.350563782836412, - "learning_rate": 3.923166200499125e-06, - "loss": 1.0066, - "step": 1285 - }, - { - "epoch": 0.11597601118275691, - "grad_norm": 2.502191060267241, - "learning_rate": 3.923005744451975e-06, - "loss": 0.9953, - "step": 1286 - }, - { - "epoch": 0.11606619470622717, - "grad_norm": 2.719454969888627, - "learning_rate": 3.9228451243233715e-06, - "loss": 0.9805, - "step": 1287 - }, - { - "epoch": 0.11615637822969743, - "grad_norm": 1.4848608727578296, - "learning_rate": 3.9226843401270195e-06, - "loss": 1.0549, - "step": 1288 - }, - { - "epoch": 0.1162465617531677, - "grad_norm": 1.5902407288805067, - "learning_rate": 3.9225233918766376e-06, - "loss": 1.0238, - "step": 1289 - }, - { - "epoch": 0.11633674527663795, - "grad_norm": 5.068951110989391, - "learning_rate": 3.92236227958596e-06, - "loss": 1.0419, - "step": 1290 - }, - { - "epoch": 0.11642692880010821, - "grad_norm": 1.7931827591973692, - "learning_rate": 3.922201003268731e-06, - "loss": 1.0416, - "step": 1291 - }, - { - "epoch": 0.11651711232357848, - "grad_norm": 0.6882203836189977, - "learning_rate": 3.922039562938715e-06, - "loss": 0.8884, - "step": 1292 - }, - { - "epoch": 0.11660729584704875, - "grad_norm": 1.4230921639401544, - "learning_rate": 3.921877958609685e-06, - "loss": 1.0807, - "step": 1293 - }, - { - "epoch": 0.11669747937051901, - "grad_norm": 0.8192228073202994, - "learning_rate": 3.921716190295431e-06, - "loss": 0.9884, - "step": 1294 - }, - { - "epoch": 0.11678766289398927, - "grad_norm": 1.676615331835918, - "learning_rate": 3.921554258009755e-06, - "loss": 0.9835, - "step": 1295 - }, - { - "epoch": 0.11687784641745953, - "grad_norm": 1.5321058878597067, - "learning_rate": 3.921392161766474e-06, - "loss": 1.015, - "step": 1296 - }, - { - "epoch": 0.1169680299409298, - "grad_norm": 1.9272112692215662, - "learning_rate": 3.92122990157942e-06, - "loss": 1.0018, - "step": 1297 - }, - { - "epoch": 0.11705821346440005, - "grad_norm": 1.5263232165621494, - "learning_rate": 3.921067477462437e-06, - "loss": 1.0682, - "step": 1298 - }, - { - "epoch": 0.11714839698787032, - "grad_norm": 1.9809898277841873, - "learning_rate": 3.920904889429385e-06, - "loss": 1.0883, - "step": 1299 - }, - { - "epoch": 0.11723858051134058, - "grad_norm": 1.6537285811579088, - "learning_rate": 3.920742137494135e-06, - "loss": 1.0629, - "step": 1300 - }, - { - "epoch": 0.11732876403481084, - "grad_norm": 1.5287716330169652, - "learning_rate": 3.920579221670575e-06, - "loss": 1.0892, - "step": 1301 - }, - { - "epoch": 0.1174189475582811, - "grad_norm": 2.6143724475531687, - "learning_rate": 3.920416141972606e-06, - "loss": 1.0312, - "step": 1302 - }, - { - "epoch": 0.11750913108175136, - "grad_norm": 1.2965060249215103, - "learning_rate": 3.920252898414143e-06, - "loss": 1.0257, - "step": 1303 - }, - { - "epoch": 0.11759931460522162, - "grad_norm": 1.7800283425464198, - "learning_rate": 3.920089491009114e-06, - "loss": 1.0335, - "step": 1304 - }, - { - "epoch": 0.1176894981286919, - "grad_norm": 1.429941206970572, - "learning_rate": 3.919925919771463e-06, - "loss": 1.0975, - "step": 1305 - }, - { - "epoch": 0.11777968165216215, - "grad_norm": 1.5665483291383784, - "learning_rate": 3.919762184715146e-06, - "loss": 0.9434, - "step": 1306 - }, - { - "epoch": 0.11786986517563242, - "grad_norm": 2.250234591652226, - "learning_rate": 3.919598285854134e-06, - "loss": 1.0285, - "step": 1307 - }, - { - "epoch": 0.11796004869910268, - "grad_norm": 1.70967661761232, - "learning_rate": 3.919434223202411e-06, - "loss": 1.0861, - "step": 1308 - }, - { - "epoch": 0.11805023222257294, - "grad_norm": 1.3674015858471256, - "learning_rate": 3.919269996773977e-06, - "loss": 1.0805, - "step": 1309 - }, - { - "epoch": 0.1181404157460432, - "grad_norm": 1.252147070372684, - "learning_rate": 3.919105606582844e-06, - "loss": 1.0408, - "step": 1310 - }, - { - "epoch": 0.11823059926951346, - "grad_norm": 1.7184814243283861, - "learning_rate": 3.918941052643039e-06, - "loss": 1.0014, - "step": 1311 - }, - { - "epoch": 0.11832078279298372, - "grad_norm": 1.2559297105899299, - "learning_rate": 3.918776334968602e-06, - "loss": 1.0369, - "step": 1312 - }, - { - "epoch": 0.11841096631645398, - "grad_norm": 2.031111727189727, - "learning_rate": 3.918611453573589e-06, - "loss": 1.1348, - "step": 1313 - }, - { - "epoch": 0.11850114983992424, - "grad_norm": 1.4554772156137994, - "learning_rate": 3.918446408472066e-06, - "loss": 0.9641, - "step": 1314 - }, - { - "epoch": 0.1185913333633945, - "grad_norm": 1.7763830250290416, - "learning_rate": 3.918281199678119e-06, - "loss": 1.0703, - "step": 1315 - }, - { - "epoch": 0.11868151688686476, - "grad_norm": 0.7142747793041812, - "learning_rate": 3.9181158272058414e-06, - "loss": 0.8568, - "step": 1316 - }, - { - "epoch": 0.11877170041033504, - "grad_norm": 1.4100446577643022, - "learning_rate": 3.9179502910693455e-06, - "loss": 1.0637, - "step": 1317 - }, - { - "epoch": 0.1188618839338053, - "grad_norm": 1.4729698919727046, - "learning_rate": 3.917784591282756e-06, - "loss": 0.9867, - "step": 1318 - }, - { - "epoch": 0.11895206745727556, - "grad_norm": 1.4955829116759196, - "learning_rate": 3.9176187278602105e-06, - "loss": 1.057, - "step": 1319 - }, - { - "epoch": 0.11904225098074582, - "grad_norm": 1.4982745259971804, - "learning_rate": 3.9174527008158606e-06, - "loss": 0.9743, - "step": 1320 - }, - { - "epoch": 0.11913243450421608, - "grad_norm": 1.6844862411086576, - "learning_rate": 3.917286510163874e-06, - "loss": 0.9518, - "step": 1321 - }, - { - "epoch": 0.11922261802768634, - "grad_norm": 1.0484237601777018, - "learning_rate": 3.917120155918431e-06, - "loss": 1.0076, - "step": 1322 - }, - { - "epoch": 0.1193128015511566, - "grad_norm": 0.7130959829031939, - "learning_rate": 3.916953638093725e-06, - "loss": 0.9111, - "step": 1323 - }, - { - "epoch": 0.11940298507462686, - "grad_norm": 1.720316918252231, - "learning_rate": 3.916786956703964e-06, - "loss": 1.027, - "step": 1324 - }, - { - "epoch": 0.11949316859809712, - "grad_norm": 2.792766088855812, - "learning_rate": 3.916620111763372e-06, - "loss": 1.0237, - "step": 1325 - }, - { - "epoch": 0.11958335212156739, - "grad_norm": 1.4402697872053427, - "learning_rate": 3.916453103286183e-06, - "loss": 1.0838, - "step": 1326 - }, - { - "epoch": 0.11967353564503765, - "grad_norm": 1.8352702774862781, - "learning_rate": 3.916285931286648e-06, - "loss": 1.0738, - "step": 1327 - }, - { - "epoch": 0.11976371916850792, - "grad_norm": 1.4006559793569011, - "learning_rate": 3.916118595779031e-06, - "loss": 0.9712, - "step": 1328 - }, - { - "epoch": 0.11985390269197818, - "grad_norm": 1.5574061714186274, - "learning_rate": 3.915951096777611e-06, - "loss": 0.9998, - "step": 1329 - }, - { - "epoch": 0.11994408621544844, - "grad_norm": 1.8740954442425977, - "learning_rate": 3.915783434296678e-06, - "loss": 0.9686, - "step": 1330 - }, - { - "epoch": 0.1200342697389187, - "grad_norm": 2.577463885996885, - "learning_rate": 3.91561560835054e-06, - "loss": 1.005, - "step": 1331 - }, - { - "epoch": 0.12012445326238896, - "grad_norm": 1.4782920275267983, - "learning_rate": 3.915447618953515e-06, - "loss": 1.0457, - "step": 1332 - }, - { - "epoch": 0.12021463678585922, - "grad_norm": 1.7392057210756202, - "learning_rate": 3.915279466119937e-06, - "loss": 0.9839, - "step": 1333 - }, - { - "epoch": 0.12030482030932949, - "grad_norm": 0.7221806791372168, - "learning_rate": 3.9151111498641546e-06, - "loss": 0.8787, - "step": 1334 - }, - { - "epoch": 0.12039500383279975, - "grad_norm": 1.7172065740746056, - "learning_rate": 3.914942670200529e-06, - "loss": 0.9075, - "step": 1335 - }, - { - "epoch": 0.12048518735627001, - "grad_norm": 1.44301129223165, - "learning_rate": 3.914774027143436e-06, - "loss": 1.076, - "step": 1336 - }, - { - "epoch": 0.12057537087974027, - "grad_norm": 1.4932340776433466, - "learning_rate": 3.914605220707265e-06, - "loss": 1.0166, - "step": 1337 - }, - { - "epoch": 0.12066555440321053, - "grad_norm": 1.4972328252108267, - "learning_rate": 3.9144362509064194e-06, - "loss": 1.1804, - "step": 1338 - }, - { - "epoch": 0.12075573792668079, - "grad_norm": 1.5459416974413418, - "learning_rate": 3.914267117755317e-06, - "loss": 1.0975, - "step": 1339 - }, - { - "epoch": 0.12084592145015106, - "grad_norm": 1.9875060483252665, - "learning_rate": 3.914097821268389e-06, - "loss": 0.991, - "step": 1340 - }, - { - "epoch": 0.12093610497362133, - "grad_norm": 1.166257031369453, - "learning_rate": 3.913928361460081e-06, - "loss": 1.0385, - "step": 1341 - }, - { - "epoch": 0.12102628849709159, - "grad_norm": 1.3823999339030832, - "learning_rate": 3.913758738344851e-06, - "loss": 1.0129, - "step": 1342 - }, - { - "epoch": 0.12111647202056185, - "grad_norm": 0.7537582331218834, - "learning_rate": 3.913588951937174e-06, - "loss": 0.8696, - "step": 1343 - }, - { - "epoch": 0.12120665554403211, - "grad_norm": 0.7034724966297408, - "learning_rate": 3.9134190022515355e-06, - "loss": 0.8657, - "step": 1344 - }, - { - "epoch": 0.12129683906750237, - "grad_norm": 1.6124814616475764, - "learning_rate": 3.913248889302438e-06, - "loss": 0.9928, - "step": 1345 - }, - { - "epoch": 0.12138702259097263, - "grad_norm": 1.5478535649139256, - "learning_rate": 3.913078613104395e-06, - "loss": 0.9662, - "step": 1346 - }, - { - "epoch": 0.12147720611444289, - "grad_norm": 1.516206531451918, - "learning_rate": 3.912908173671936e-06, - "loss": 0.9095, - "step": 1347 - }, - { - "epoch": 0.12156738963791315, - "grad_norm": 1.481194149847631, - "learning_rate": 3.9127375710196044e-06, - "loss": 0.9591, - "step": 1348 - }, - { - "epoch": 0.12165757316138341, - "grad_norm": 1.9495361925841947, - "learning_rate": 3.912566805161957e-06, - "loss": 0.9686, - "step": 1349 - }, - { - "epoch": 0.12174775668485367, - "grad_norm": 1.642031765857049, - "learning_rate": 3.912395876113564e-06, - "loss": 1.0451, - "step": 1350 - }, - { - "epoch": 0.12183794020832393, - "grad_norm": 1.576082475246939, - "learning_rate": 3.912224783889009e-06, - "loss": 1.0323, - "step": 1351 - }, - { - "epoch": 0.12192812373179421, - "grad_norm": 1.5644962528680078, - "learning_rate": 3.912053528502892e-06, - "loss": 1.0306, - "step": 1352 - }, - { - "epoch": 0.12201830725526447, - "grad_norm": 1.3724409478413213, - "learning_rate": 3.911882109969825e-06, - "loss": 0.9458, - "step": 1353 - }, - { - "epoch": 0.12210849077873473, - "grad_norm": 1.6065076450672957, - "learning_rate": 3.911710528304435e-06, - "loss": 1.028, - "step": 1354 - }, - { - "epoch": 0.12219867430220499, - "grad_norm": 1.8337244859206339, - "learning_rate": 3.911538783521361e-06, - "loss": 0.9783, - "step": 1355 - }, - { - "epoch": 0.12228885782567525, - "grad_norm": 1.6446571573730684, - "learning_rate": 3.9113668756352575e-06, - "loss": 1.039, - "step": 1356 - }, - { - "epoch": 0.12237904134914551, - "grad_norm": 1.350618109661216, - "learning_rate": 3.911194804660793e-06, - "loss": 0.9566, - "step": 1357 - }, - { - "epoch": 0.12246922487261577, - "grad_norm": 1.5776811485445272, - "learning_rate": 3.91102257061265e-06, - "loss": 1.1057, - "step": 1358 - }, - { - "epoch": 0.12255940839608603, - "grad_norm": 1.3667380765928965, - "learning_rate": 3.910850173505524e-06, - "loss": 1.0697, - "step": 1359 - }, - { - "epoch": 0.1226495919195563, - "grad_norm": 1.4793906601921845, - "learning_rate": 3.9106776133541255e-06, - "loss": 1.1005, - "step": 1360 - }, - { - "epoch": 0.12273977544302656, - "grad_norm": 1.8228930081240047, - "learning_rate": 3.9105048901731766e-06, - "loss": 0.9873, - "step": 1361 - }, - { - "epoch": 0.12282995896649682, - "grad_norm": 1.5335888031283642, - "learning_rate": 3.9103320039774165e-06, - "loss": 1.0065, - "step": 1362 - }, - { - "epoch": 0.12292014248996708, - "grad_norm": 1.5487978765124353, - "learning_rate": 3.9101589547815965e-06, - "loss": 1.09, - "step": 1363 - }, - { - "epoch": 0.12301032601343735, - "grad_norm": 1.3201731320472352, - "learning_rate": 3.909985742600482e-06, - "loss": 1.0136, - "step": 1364 - }, - { - "epoch": 0.12310050953690761, - "grad_norm": 1.4575757193158312, - "learning_rate": 3.909812367448852e-06, - "loss": 0.9947, - "step": 1365 - }, - { - "epoch": 0.12319069306037787, - "grad_norm": 2.1108395613759066, - "learning_rate": 3.909638829341501e-06, - "loss": 1.0022, - "step": 1366 - }, - { - "epoch": 0.12328087658384813, - "grad_norm": 1.5554609052306025, - "learning_rate": 3.909465128293234e-06, - "loss": 1.0821, - "step": 1367 - }, - { - "epoch": 0.1233710601073184, - "grad_norm": 1.478994637098513, - "learning_rate": 3.9092912643188745e-06, - "loss": 1.0838, - "step": 1368 - }, - { - "epoch": 0.12346124363078866, - "grad_norm": 1.467953892298164, - "learning_rate": 3.909117237433256e-06, - "loss": 1.0177, - "step": 1369 - }, - { - "epoch": 0.12355142715425892, - "grad_norm": 1.5025773158944196, - "learning_rate": 3.908943047651229e-06, - "loss": 1.0705, - "step": 1370 - }, - { - "epoch": 0.12364161067772918, - "grad_norm": 1.6007362281198552, - "learning_rate": 3.908768694987655e-06, - "loss": 1.0122, - "step": 1371 - }, - { - "epoch": 0.12373179420119944, - "grad_norm": 2.4687559876188265, - "learning_rate": 3.908594179457411e-06, - "loss": 0.9365, - "step": 1372 - }, - { - "epoch": 0.1238219777246697, - "grad_norm": 1.669277633518248, - "learning_rate": 3.908419501075388e-06, - "loss": 1.0245, - "step": 1373 - }, - { - "epoch": 0.12391216124813996, - "grad_norm": 1.3887884463972313, - "learning_rate": 3.90824465985649e-06, - "loss": 1.1003, - "step": 1374 - }, - { - "epoch": 0.12400234477161022, - "grad_norm": 1.5930156886063986, - "learning_rate": 3.908069655815636e-06, - "loss": 1.0666, - "step": 1375 - }, - { - "epoch": 0.1240925282950805, - "grad_norm": 2.083996959530302, - "learning_rate": 3.907894488967758e-06, - "loss": 0.9757, - "step": 1376 - }, - { - "epoch": 0.12418271181855076, - "grad_norm": 1.7750057059182882, - "learning_rate": 3.9077191593278005e-06, - "loss": 1.0912, - "step": 1377 - }, - { - "epoch": 0.12427289534202102, - "grad_norm": 0.7569925650681256, - "learning_rate": 3.9075436669107265e-06, - "loss": 0.876, - "step": 1378 - }, - { - "epoch": 0.12436307886549128, - "grad_norm": 1.6966368956036768, - "learning_rate": 3.90736801173151e-06, - "loss": 1.0402, - "step": 1379 - }, - { - "epoch": 0.12445326238896154, - "grad_norm": 1.2940118275946453, - "learning_rate": 3.907192193805136e-06, - "loss": 1.0381, - "step": 1380 - }, - { - "epoch": 0.1245434459124318, - "grad_norm": 1.5800155042539492, - "learning_rate": 3.907016213146608e-06, - "loss": 1.0744, - "step": 1381 - }, - { - "epoch": 0.12463362943590206, - "grad_norm": 1.4431517249037016, - "learning_rate": 3.906840069770942e-06, - "loss": 1.1366, - "step": 1382 - }, - { - "epoch": 0.12472381295937232, - "grad_norm": 1.277259007247253, - "learning_rate": 3.906663763693167e-06, - "loss": 1.0596, - "step": 1383 - }, - { - "epoch": 0.12481399648284258, - "grad_norm": 1.4803612501424988, - "learning_rate": 3.906487294928327e-06, - "loss": 1.0039, - "step": 1384 - }, - { - "epoch": 0.12490418000631284, - "grad_norm": 1.694974171151936, - "learning_rate": 3.906310663491478e-06, - "loss": 1.0245, - "step": 1385 - }, - { - "epoch": 0.1249943635297831, - "grad_norm": 1.49721514952402, - "learning_rate": 3.906133869397692e-06, - "loss": 1.0827, - "step": 1386 - }, - { - "epoch": 0.12508454705325336, - "grad_norm": 5.337877225387957, - "learning_rate": 3.905956912662054e-06, - "loss": 0.9827, - "step": 1387 - }, - { - "epoch": 0.12517473057672363, - "grad_norm": 1.8376636587946702, - "learning_rate": 3.905779793299662e-06, - "loss": 1.0892, - "step": 1388 - }, - { - "epoch": 0.12526491410019389, - "grad_norm": 1.553364406378589, - "learning_rate": 3.905602511325631e-06, - "loss": 1.059, - "step": 1389 - }, - { - "epoch": 0.12535509762366415, - "grad_norm": 1.6903395603952645, - "learning_rate": 3.905425066755086e-06, - "loss": 0.9886, - "step": 1390 - }, - { - "epoch": 0.1254452811471344, - "grad_norm": 1.3142874217910812, - "learning_rate": 3.905247459603168e-06, - "loss": 0.9847, - "step": 1391 - }, - { - "epoch": 0.12553546467060467, - "grad_norm": 2.5285148442628276, - "learning_rate": 3.905069689885031e-06, - "loss": 0.9097, - "step": 1392 - }, - { - "epoch": 0.12562564819407493, - "grad_norm": 1.5910230756772972, - "learning_rate": 3.904891757615843e-06, - "loss": 0.9106, - "step": 1393 - }, - { - "epoch": 0.12571583171754522, - "grad_norm": 0.6234071939315813, - "learning_rate": 3.9047136628107874e-06, - "loss": 0.8276, - "step": 1394 - }, - { - "epoch": 0.12580601524101548, - "grad_norm": 1.512219644209992, - "learning_rate": 3.904535405485059e-06, - "loss": 1.1533, - "step": 1395 - }, - { - "epoch": 0.12589619876448574, - "grad_norm": 2.187821391882697, - "learning_rate": 3.90435698565387e-06, - "loss": 0.9816, - "step": 1396 - }, - { - "epoch": 0.125986382287956, - "grad_norm": 1.3909963476296254, - "learning_rate": 3.904178403332441e-06, - "loss": 1.001, - "step": 1397 - }, - { - "epoch": 0.12607656581142626, - "grad_norm": 1.768458632878644, - "learning_rate": 3.903999658536012e-06, - "loss": 1.1026, - "step": 1398 - }, - { - "epoch": 0.12616674933489652, - "grad_norm": 1.4199334607182943, - "learning_rate": 3.903820751279833e-06, - "loss": 1.0239, - "step": 1399 - }, - { - "epoch": 0.12625693285836678, - "grad_norm": 3.0179974804685075, - "learning_rate": 3.90364168157917e-06, - "loss": 1.0612, - "step": 1400 - }, - { - "epoch": 0.12634711638183704, - "grad_norm": 1.5591689652154652, - "learning_rate": 3.903462449449302e-06, - "loss": 1.0537, - "step": 1401 - }, - { - "epoch": 0.1264372999053073, - "grad_norm": 1.323964727562871, - "learning_rate": 3.903283054905522e-06, - "loss": 1.0024, - "step": 1402 - }, - { - "epoch": 0.12652748342877757, - "grad_norm": 2.3650939546919196, - "learning_rate": 3.9031034979631385e-06, - "loss": 0.9397, - "step": 1403 - }, - { - "epoch": 0.12661766695224783, - "grad_norm": 1.4176780886183606, - "learning_rate": 3.902923778637469e-06, - "loss": 1.0611, - "step": 1404 - }, - { - "epoch": 0.1267078504757181, - "grad_norm": 1.3622424695973663, - "learning_rate": 3.902743896943852e-06, - "loss": 0.9621, - "step": 1405 - }, - { - "epoch": 0.12679803399918835, - "grad_norm": 1.085343122766977, - "learning_rate": 3.902563852897633e-06, - "loss": 0.9093, - "step": 1406 - }, - { - "epoch": 0.1268882175226586, - "grad_norm": 0.7045563434083801, - "learning_rate": 3.9023836465141755e-06, - "loss": 0.878, - "step": 1407 - }, - { - "epoch": 0.12697840104612887, - "grad_norm": 2.823967422551399, - "learning_rate": 3.902203277808856e-06, - "loss": 1.0666, - "step": 1408 - }, - { - "epoch": 0.12706858456959913, - "grad_norm": 1.578460204528151, - "learning_rate": 3.902022746797064e-06, - "loss": 1.0826, - "step": 1409 - }, - { - "epoch": 0.1271587680930694, - "grad_norm": 1.59614525112988, - "learning_rate": 3.9018420534942035e-06, - "loss": 1.1401, - "step": 1410 - }, - { - "epoch": 0.12724895161653965, - "grad_norm": 1.6886303259674984, - "learning_rate": 3.9016611979156935e-06, - "loss": 1.013, - "step": 1411 - }, - { - "epoch": 0.1273391351400099, - "grad_norm": 2.1979050718848367, - "learning_rate": 3.9014801800769635e-06, - "loss": 0.9396, - "step": 1412 - }, - { - "epoch": 0.12742931866348017, - "grad_norm": 1.8504090398218824, - "learning_rate": 3.901298999993459e-06, - "loss": 1.0562, - "step": 1413 - }, - { - "epoch": 0.12751950218695043, - "grad_norm": 1.8453678856014668, - "learning_rate": 3.901117657680642e-06, - "loss": 0.958, - "step": 1414 - }, - { - "epoch": 0.1276096857104207, - "grad_norm": 1.37154005128329, - "learning_rate": 3.900936153153982e-06, - "loss": 0.9542, - "step": 1415 - }, - { - "epoch": 0.12769986923389096, - "grad_norm": 0.7840139705619901, - "learning_rate": 3.900754486428968e-06, - "loss": 0.8271, - "step": 1416 - }, - { - "epoch": 0.12779005275736122, - "grad_norm": 1.5051946496396513, - "learning_rate": 3.900572657521102e-06, - "loss": 1.1148, - "step": 1417 - }, - { - "epoch": 0.1278802362808315, - "grad_norm": 1.2441518354883963, - "learning_rate": 3.900390666445896e-06, - "loss": 1.0052, - "step": 1418 - }, - { - "epoch": 0.12797041980430177, - "grad_norm": 1.783234210542476, - "learning_rate": 3.9002085132188795e-06, - "loss": 0.9914, - "step": 1419 - }, - { - "epoch": 0.12806060332777203, - "grad_norm": 1.2272965973963472, - "learning_rate": 3.9000261978555964e-06, - "loss": 1.0673, - "step": 1420 - }, - { - "epoch": 0.1281507868512423, - "grad_norm": 1.4934705241372106, - "learning_rate": 3.8998437203716e-06, - "loss": 1.0919, - "step": 1421 - }, - { - "epoch": 0.12824097037471255, - "grad_norm": 1.373489851056942, - "learning_rate": 3.899661080782462e-06, - "loss": 1.0503, - "step": 1422 - }, - { - "epoch": 0.1283311538981828, - "grad_norm": 1.7715474278375272, - "learning_rate": 3.899478279103767e-06, - "loss": 1.0558, - "step": 1423 - }, - { - "epoch": 0.12842133742165307, - "grad_norm": 1.7228450887895959, - "learning_rate": 3.8992953153511105e-06, - "loss": 1.0587, - "step": 1424 - }, - { - "epoch": 0.12851152094512333, - "grad_norm": 1.4299087770082348, - "learning_rate": 3.899112189540106e-06, - "loss": 1.0844, - "step": 1425 - }, - { - "epoch": 0.1286017044685936, - "grad_norm": 1.75904179292338, - "learning_rate": 3.898928901686377e-06, - "loss": 1.0659, - "step": 1426 - }, - { - "epoch": 0.12869188799206385, - "grad_norm": 0.7867768297472403, - "learning_rate": 3.898745451805564e-06, - "loss": 0.8505, - "step": 1427 - }, - { - "epoch": 0.1287820715155341, - "grad_norm": 1.6294442072935842, - "learning_rate": 3.898561839913319e-06, - "loss": 0.9634, - "step": 1428 - }, - { - "epoch": 0.12887225503900437, - "grad_norm": 2.095563416388178, - "learning_rate": 3.89837806602531e-06, - "loss": 0.8714, - "step": 1429 - }, - { - "epoch": 0.12896243856247463, - "grad_norm": 1.821517021905955, - "learning_rate": 3.898194130157217e-06, - "loss": 1.0329, - "step": 1430 - }, - { - "epoch": 0.1290526220859449, - "grad_norm": 1.8118163167681753, - "learning_rate": 3.8980100323247335e-06, - "loss": 0.9571, - "step": 1431 - }, - { - "epoch": 0.12914280560941516, - "grad_norm": 1.8305008276922152, - "learning_rate": 3.897825772543568e-06, - "loss": 0.9589, - "step": 1432 - }, - { - "epoch": 0.12923298913288542, - "grad_norm": 9.417775136697129, - "learning_rate": 3.897641350829444e-06, - "loss": 1.0836, - "step": 1433 - }, - { - "epoch": 0.12932317265635568, - "grad_norm": 1.3743963216851187, - "learning_rate": 3.897456767198096e-06, - "loss": 1.11, - "step": 1434 - }, - { - "epoch": 0.12941335617982594, - "grad_norm": 1.8953018351708462, - "learning_rate": 3.897272021665275e-06, - "loss": 0.9629, - "step": 1435 - }, - { - "epoch": 0.1295035397032962, - "grad_norm": 1.349935312840606, - "learning_rate": 3.897087114246743e-06, - "loss": 1.1306, - "step": 1436 - }, - { - "epoch": 0.12959372322676646, - "grad_norm": 1.697967068288807, - "learning_rate": 3.896902044958279e-06, - "loss": 1.1509, - "step": 1437 - }, - { - "epoch": 0.12968390675023672, - "grad_norm": 1.8079719545670438, - "learning_rate": 3.896716813815672e-06, - "loss": 1.0789, - "step": 1438 - }, - { - "epoch": 0.12977409027370698, - "grad_norm": 1.5204656321923158, - "learning_rate": 3.896531420834728e-06, - "loss": 0.9715, - "step": 1439 - }, - { - "epoch": 0.12986427379717724, - "grad_norm": 1.729860220317219, - "learning_rate": 3.896345866031266e-06, - "loss": 0.9317, - "step": 1440 - }, - { - "epoch": 0.1299544573206475, - "grad_norm": 1.6731761967004783, - "learning_rate": 3.896160149421119e-06, - "loss": 1.0868, - "step": 1441 - }, - { - "epoch": 0.1300446408441178, - "grad_norm": 1.560031925265993, - "learning_rate": 3.8959742710201314e-06, - "loss": 1.097, - "step": 1442 - }, - { - "epoch": 0.13013482436758805, - "grad_norm": 1.961824558432338, - "learning_rate": 3.895788230844166e-06, - "loss": 0.9224, - "step": 1443 - }, - { - "epoch": 0.13022500789105831, - "grad_norm": 1.5543160306253945, - "learning_rate": 3.895602028909095e-06, - "loss": 1.1727, - "step": 1444 - }, - { - "epoch": 0.13031519141452858, - "grad_norm": 0.6295192881924988, - "learning_rate": 3.895415665230807e-06, - "loss": 0.8526, - "step": 1445 - }, - { - "epoch": 0.13040537493799884, - "grad_norm": 1.2280511751384182, - "learning_rate": 3.895229139825203e-06, - "loss": 1.0463, - "step": 1446 - }, - { - "epoch": 0.1304955584614691, - "grad_norm": 2.114483109026737, - "learning_rate": 3.895042452708198e-06, - "loss": 0.9449, - "step": 1447 - }, - { - "epoch": 0.13058574198493936, - "grad_norm": 2.9982332749464464, - "learning_rate": 3.894855603895723e-06, - "loss": 0.933, - "step": 1448 - }, - { - "epoch": 0.13067592550840962, - "grad_norm": 1.4735231150149626, - "learning_rate": 3.894668593403718e-06, - "loss": 1.0994, - "step": 1449 - }, - { - "epoch": 0.13076610903187988, - "grad_norm": 1.639855776702265, - "learning_rate": 3.8944814212481425e-06, - "loss": 1.0325, - "step": 1450 - }, - { - "epoch": 0.13085629255535014, - "grad_norm": 1.583746764097694, - "learning_rate": 3.894294087444966e-06, - "loss": 1.0127, - "step": 1451 - }, - { - "epoch": 0.1309464760788204, - "grad_norm": 1.4587923508276468, - "learning_rate": 3.894106592010173e-06, - "loss": 0.9156, - "step": 1452 - }, - { - "epoch": 0.13103665960229066, - "grad_norm": 1.570726254740441, - "learning_rate": 3.893918934959762e-06, - "loss": 0.9073, - "step": 1453 - }, - { - "epoch": 0.13112684312576092, - "grad_norm": 1.7647905656654053, - "learning_rate": 3.893731116309743e-06, - "loss": 0.9616, - "step": 1454 - }, - { - "epoch": 0.13121702664923118, - "grad_norm": 2.3129918374438305, - "learning_rate": 3.893543136076145e-06, - "loss": 1.0448, - "step": 1455 - }, - { - "epoch": 0.13130721017270144, - "grad_norm": 1.8959392545964522, - "learning_rate": 3.893354994275006e-06, - "loss": 1.0507, - "step": 1456 - }, - { - "epoch": 0.1313973936961717, - "grad_norm": 1.477798187063935, - "learning_rate": 3.893166690922378e-06, - "loss": 0.9997, - "step": 1457 - }, - { - "epoch": 0.13148757721964197, - "grad_norm": 2.977266483710663, - "learning_rate": 3.892978226034329e-06, - "loss": 1.0567, - "step": 1458 - }, - { - "epoch": 0.13157776074311223, - "grad_norm": 1.7381879117101826, - "learning_rate": 3.89278959962694e-06, - "loss": 1.081, - "step": 1459 - }, - { - "epoch": 0.1316679442665825, - "grad_norm": 1.5610171338789955, - "learning_rate": 3.8926008117163056e-06, - "loss": 1.0004, - "step": 1460 - }, - { - "epoch": 0.13175812779005275, - "grad_norm": 1.5253871863957722, - "learning_rate": 3.892411862318535e-06, - "loss": 1.0081, - "step": 1461 - }, - { - "epoch": 0.131848311313523, - "grad_norm": 1.4070783930224775, - "learning_rate": 3.892222751449749e-06, - "loss": 1.0498, - "step": 1462 - }, - { - "epoch": 0.13193849483699327, - "grad_norm": 1.3967595182811086, - "learning_rate": 3.892033479126084e-06, - "loss": 0.9788, - "step": 1463 - }, - { - "epoch": 0.13202867836046353, - "grad_norm": 2.5413429700335826, - "learning_rate": 3.891844045363691e-06, - "loss": 1.0066, - "step": 1464 - }, - { - "epoch": 0.13211886188393382, - "grad_norm": 1.863330000713464, - "learning_rate": 3.891654450178732e-06, - "loss": 0.9445, - "step": 1465 - }, - { - "epoch": 0.13220904540740408, - "grad_norm": 2.4698512844150082, - "learning_rate": 3.891464693587385e-06, - "loss": 1.0708, - "step": 1466 - }, - { - "epoch": 0.13229922893087434, - "grad_norm": 1.4804141555422698, - "learning_rate": 3.89127477560584e-06, - "loss": 1.0599, - "step": 1467 - }, - { - "epoch": 0.1323894124543446, - "grad_norm": 2.392513252468735, - "learning_rate": 3.891084696250304e-06, - "loss": 1.0658, - "step": 1468 - }, - { - "epoch": 0.13247959597781486, - "grad_norm": 1.6673154601280926, - "learning_rate": 3.890894455536993e-06, - "loss": 1.075, - "step": 1469 - }, - { - "epoch": 0.13256977950128512, - "grad_norm": 3.13693384784558, - "learning_rate": 3.890704053482142e-06, - "loss": 1.0683, - "step": 1470 - }, - { - "epoch": 0.13265996302475538, - "grad_norm": 1.9522029073347498, - "learning_rate": 3.890513490101995e-06, - "loss": 0.972, - "step": 1471 - }, - { - "epoch": 0.13275014654822564, - "grad_norm": 0.6270242335794125, - "learning_rate": 3.890322765412814e-06, - "loss": 0.8578, - "step": 1472 - }, - { - "epoch": 0.1328403300716959, - "grad_norm": 1.508119225805163, - "learning_rate": 3.890131879430871e-06, - "loss": 1.0481, - "step": 1473 - }, - { - "epoch": 0.13293051359516617, - "grad_norm": 1.5038004255837814, - "learning_rate": 3.889940832172454e-06, - "loss": 1.0439, - "step": 1474 - }, - { - "epoch": 0.13302069711863643, - "grad_norm": 1.473174552877095, - "learning_rate": 3.889749623653864e-06, - "loss": 1.0735, - "step": 1475 - }, - { - "epoch": 0.1331108806421067, - "grad_norm": 1.255795727862658, - "learning_rate": 3.889558253891416e-06, - "loss": 0.9676, - "step": 1476 - }, - { - "epoch": 0.13320106416557695, - "grad_norm": 0.6391713000813051, - "learning_rate": 3.8893667229014385e-06, - "loss": 0.852, - "step": 1477 - }, - { - "epoch": 0.1332912476890472, - "grad_norm": 3.6794893571834755, - "learning_rate": 3.8891750307002746e-06, - "loss": 0.9371, - "step": 1478 - }, - { - "epoch": 0.13338143121251747, - "grad_norm": 1.3324326165000324, - "learning_rate": 3.888983177304281e-06, - "loss": 0.96, - "step": 1479 - }, - { - "epoch": 0.13347161473598773, - "grad_norm": 0.7847236846152513, - "learning_rate": 3.888791162729826e-06, - "loss": 0.8567, - "step": 1480 - }, - { - "epoch": 0.133561798259458, - "grad_norm": 1.877345207934502, - "learning_rate": 3.888598986993295e-06, - "loss": 1.0498, - "step": 1481 - }, - { - "epoch": 0.13365198178292825, - "grad_norm": 1.45974862586687, - "learning_rate": 3.888406650111085e-06, - "loss": 1.0591, - "step": 1482 - }, - { - "epoch": 0.1337421653063985, - "grad_norm": 1.7048730541189894, - "learning_rate": 3.888214152099607e-06, - "loss": 1.0325, - "step": 1483 - }, - { - "epoch": 0.13383234882986877, - "grad_norm": 1.69005814672355, - "learning_rate": 3.888021492975285e-06, - "loss": 1.0899, - "step": 1484 - }, - { - "epoch": 0.13392253235333904, - "grad_norm": 1.9610486246059398, - "learning_rate": 3.88782867275456e-06, - "loss": 1.0841, - "step": 1485 - }, - { - "epoch": 0.1340127158768093, - "grad_norm": 1.1866508760812535, - "learning_rate": 3.8876356914538824e-06, - "loss": 0.9888, - "step": 1486 - }, - { - "epoch": 0.13410289940027956, - "grad_norm": 1.9748208435502406, - "learning_rate": 3.88744254908972e-06, - "loss": 0.9874, - "step": 1487 - }, - { - "epoch": 0.13419308292374982, - "grad_norm": 1.6250663156916114, - "learning_rate": 3.887249245678552e-06, - "loss": 1.0017, - "step": 1488 - }, - { - "epoch": 0.1342832664472201, - "grad_norm": 1.429688250432052, - "learning_rate": 3.887055781236872e-06, - "loss": 1.0931, - "step": 1489 - }, - { - "epoch": 0.13437344997069037, - "grad_norm": 2.1034388904513097, - "learning_rate": 3.886862155781186e-06, - "loss": 0.9061, - "step": 1490 - }, - { - "epoch": 0.13446363349416063, - "grad_norm": 0.6904199105829046, - "learning_rate": 3.886668369328019e-06, - "loss": 0.8618, - "step": 1491 - }, - { - "epoch": 0.1345538170176309, - "grad_norm": 1.7962956697077639, - "learning_rate": 3.886474421893904e-06, - "loss": 1.0583, - "step": 1492 - }, - { - "epoch": 0.13464400054110115, - "grad_norm": 1.520802649967949, - "learning_rate": 3.886280313495388e-06, - "loss": 0.9544, - "step": 1493 - }, - { - "epoch": 0.1347341840645714, - "grad_norm": 1.714614887650186, - "learning_rate": 3.886086044149035e-06, - "loss": 0.9945, - "step": 1494 - }, - { - "epoch": 0.13482436758804167, - "grad_norm": 1.326778761778198, - "learning_rate": 3.885891613871421e-06, - "loss": 1.0279, - "step": 1495 - }, - { - "epoch": 0.13491455111151193, - "grad_norm": 1.5774429660407536, - "learning_rate": 3.885697022679136e-06, - "loss": 1.0774, - "step": 1496 - }, - { - "epoch": 0.1350047346349822, - "grad_norm": 2.64979342969065, - "learning_rate": 3.885502270588784e-06, - "loss": 1.0486, - "step": 1497 - }, - { - "epoch": 0.13509491815845245, - "grad_norm": 1.5474050365145642, - "learning_rate": 3.885307357616981e-06, - "loss": 1.0223, - "step": 1498 - }, - { - "epoch": 0.13518510168192271, - "grad_norm": 1.498496494186012, - "learning_rate": 3.885112283780359e-06, - "loss": 1.0173, - "step": 1499 - }, - { - "epoch": 0.13527528520539298, - "grad_norm": 1.4739609663257962, - "learning_rate": 3.8849170490955624e-06, - "loss": 0.9211, - "step": 1500 - }, - { - "epoch": 0.13536546872886324, - "grad_norm": 2.035680545919271, - "learning_rate": 3.88472165357925e-06, - "loss": 1.02, - "step": 1501 - }, - { - "epoch": 0.1354556522523335, - "grad_norm": 2.00595731412554, - "learning_rate": 3.884526097248093e-06, - "loss": 0.9825, - "step": 1502 - }, - { - "epoch": 0.13554583577580376, - "grad_norm": 1.9296125019032242, - "learning_rate": 3.884330380118779e-06, - "loss": 1.04, - "step": 1503 - }, - { - "epoch": 0.13563601929927402, - "grad_norm": 1.563544877684581, - "learning_rate": 3.884134502208007e-06, - "loss": 0.9756, - "step": 1504 - }, - { - "epoch": 0.13572620282274428, - "grad_norm": 1.533942444036151, - "learning_rate": 3.88393846353249e-06, - "loss": 1.1325, - "step": 1505 - }, - { - "epoch": 0.13581638634621454, - "grad_norm": 1.4312224772776319, - "learning_rate": 3.883742264108955e-06, - "loss": 1.0806, - "step": 1506 - }, - { - "epoch": 0.1359065698696848, - "grad_norm": 1.486174446412149, - "learning_rate": 3.883545903954145e-06, - "loss": 1.0367, - "step": 1507 - }, - { - "epoch": 0.13599675339315506, - "grad_norm": 1.6214518323925897, - "learning_rate": 3.883349383084811e-06, - "loss": 1.0161, - "step": 1508 - }, - { - "epoch": 0.13608693691662532, - "grad_norm": 1.5772450325972762, - "learning_rate": 3.883152701517723e-06, - "loss": 0.942, - "step": 1509 - }, - { - "epoch": 0.13617712044009558, - "grad_norm": 1.7166473011047962, - "learning_rate": 3.882955859269664e-06, - "loss": 1.0134, - "step": 1510 - }, - { - "epoch": 0.13626730396356584, - "grad_norm": 1.3115138936210362, - "learning_rate": 3.882758856357428e-06, - "loss": 1.0284, - "step": 1511 - }, - { - "epoch": 0.1363574874870361, - "grad_norm": 1.681422699067142, - "learning_rate": 3.882561692797824e-06, - "loss": 1.0132, - "step": 1512 - }, - { - "epoch": 0.1364476710105064, - "grad_norm": 1.835607620796017, - "learning_rate": 3.882364368607677e-06, - "loss": 1.03, - "step": 1513 - }, - { - "epoch": 0.13653785453397665, - "grad_norm": 1.6205157984711172, - "learning_rate": 3.8821668838038225e-06, - "loss": 1.0384, - "step": 1514 - }, - { - "epoch": 0.13662803805744692, - "grad_norm": 1.8358559448308842, - "learning_rate": 3.881969238403111e-06, - "loss": 0.9324, - "step": 1515 - }, - { - "epoch": 0.13671822158091718, - "grad_norm": 2.1848118204750384, - "learning_rate": 3.881771432422408e-06, - "loss": 1.1476, - "step": 1516 - }, - { - "epoch": 0.13680840510438744, - "grad_norm": 1.4748123534504363, - "learning_rate": 3.88157346587859e-06, - "loss": 1.0803, - "step": 1517 - }, - { - "epoch": 0.1368985886278577, - "grad_norm": 5.824090046679067, - "learning_rate": 3.881375338788549e-06, - "loss": 1.003, - "step": 1518 - }, - { - "epoch": 0.13698877215132796, - "grad_norm": 2.2157690951000046, - "learning_rate": 3.88117705116919e-06, - "loss": 1.0375, - "step": 1519 - }, - { - "epoch": 0.13707895567479822, - "grad_norm": 0.7746318111738878, - "learning_rate": 3.880978603037432e-06, - "loss": 0.9109, - "step": 1520 - }, - { - "epoch": 0.13716913919826848, - "grad_norm": 1.5268032356601695, - "learning_rate": 3.880779994410209e-06, - "loss": 1.0251, - "step": 1521 - }, - { - "epoch": 0.13725932272173874, - "grad_norm": 1.83538374970529, - "learning_rate": 3.880581225304466e-06, - "loss": 0.8977, - "step": 1522 - }, - { - "epoch": 0.137349506245209, - "grad_norm": 1.7125032828640623, - "learning_rate": 3.880382295737163e-06, - "loss": 1.0564, - "step": 1523 - }, - { - "epoch": 0.13743968976867926, - "grad_norm": 2.1209414376497286, - "learning_rate": 3.880183205725274e-06, - "loss": 0.9976, - "step": 1524 - }, - { - "epoch": 0.13752987329214952, - "grad_norm": 2.9027304719013927, - "learning_rate": 3.879983955285788e-06, - "loss": 0.9683, - "step": 1525 - }, - { - "epoch": 0.13762005681561978, - "grad_norm": 1.705245002192304, - "learning_rate": 3.879784544435703e-06, - "loss": 0.9981, - "step": 1526 - }, - { - "epoch": 0.13771024033909005, - "grad_norm": 1.647247897643674, - "learning_rate": 3.879584973192037e-06, - "loss": 0.9581, - "step": 1527 - }, - { - "epoch": 0.1378004238625603, - "grad_norm": 1.957665803106399, - "learning_rate": 3.8793852415718165e-06, - "loss": 0.9565, - "step": 1528 - }, - { - "epoch": 0.13789060738603057, - "grad_norm": 1.845381968173384, - "learning_rate": 3.879185349592085e-06, - "loss": 1.0258, - "step": 1529 - }, - { - "epoch": 0.13798079090950083, - "grad_norm": 1.5071282291800139, - "learning_rate": 3.878985297269897e-06, - "loss": 1.0471, - "step": 1530 - }, - { - "epoch": 0.1380709744329711, - "grad_norm": 1.5382411721018243, - "learning_rate": 3.878785084622323e-06, - "loss": 1.0635, - "step": 1531 - }, - { - "epoch": 0.13816115795644135, - "grad_norm": 1.7446649154248095, - "learning_rate": 3.878584711666447e-06, - "loss": 1.2054, - "step": 1532 - }, - { - "epoch": 0.1382513414799116, - "grad_norm": 1.7062501844468907, - "learning_rate": 3.8783841784193635e-06, - "loss": 1.0451, - "step": 1533 - }, - { - "epoch": 0.13834152500338187, - "grad_norm": 1.4712230882800479, - "learning_rate": 3.8781834848981855e-06, - "loss": 1.0019, - "step": 1534 - }, - { - "epoch": 0.13843170852685213, - "grad_norm": 1.4002785524775074, - "learning_rate": 3.877982631120037e-06, - "loss": 0.9574, - "step": 1535 - }, - { - "epoch": 0.1385218920503224, - "grad_norm": 1.9954065980283513, - "learning_rate": 3.877781617102053e-06, - "loss": 1.0933, - "step": 1536 - }, - { - "epoch": 0.13861207557379268, - "grad_norm": 1.3981681862761963, - "learning_rate": 3.877580442861389e-06, - "loss": 1.0638, - "step": 1537 - }, - { - "epoch": 0.13870225909726294, - "grad_norm": 1.5658544010668987, - "learning_rate": 3.877379108415209e-06, - "loss": 1.0559, - "step": 1538 - }, - { - "epoch": 0.1387924426207332, - "grad_norm": 1.4787114566736062, - "learning_rate": 3.8771776137806915e-06, - "loss": 1.0497, - "step": 1539 - }, - { - "epoch": 0.13888262614420346, - "grad_norm": 1.6302823993404092, - "learning_rate": 3.8769759589750295e-06, - "loss": 0.9661, - "step": 1540 - }, - { - "epoch": 0.13897280966767372, - "grad_norm": 1.523894334424019, - "learning_rate": 3.876774144015429e-06, - "loss": 1.0273, - "step": 1541 - }, - { - "epoch": 0.13906299319114399, - "grad_norm": 2.234320686587195, - "learning_rate": 3.87657216891911e-06, - "loss": 1.059, - "step": 1542 - }, - { - "epoch": 0.13915317671461425, - "grad_norm": 1.9210996847227808, - "learning_rate": 3.876370033703307e-06, - "loss": 1.0238, - "step": 1543 - }, - { - "epoch": 0.1392433602380845, - "grad_norm": 5.768505669045372, - "learning_rate": 3.876167738385265e-06, - "loss": 0.9277, - "step": 1544 - }, - { - "epoch": 0.13933354376155477, - "grad_norm": 2.5476799377444466, - "learning_rate": 3.875965282982247e-06, - "loss": 1.0748, - "step": 1545 - }, - { - "epoch": 0.13942372728502503, - "grad_norm": 1.3940981665270646, - "learning_rate": 3.875762667511528e-06, - "loss": 1.0374, - "step": 1546 - }, - { - "epoch": 0.1395139108084953, - "grad_norm": 1.539803108906063, - "learning_rate": 3.875559891990394e-06, - "loss": 1.0363, - "step": 1547 - }, - { - "epoch": 0.13960409433196555, - "grad_norm": 1.8823498675903303, - "learning_rate": 3.875356956436149e-06, - "loss": 0.9935, - "step": 1548 - }, - { - "epoch": 0.1396942778554358, - "grad_norm": 1.9894826440813889, - "learning_rate": 3.875153860866108e-06, - "loss": 1.0043, - "step": 1549 - }, - { - "epoch": 0.13978446137890607, - "grad_norm": 1.5568290797398279, - "learning_rate": 3.8749506052976e-06, - "loss": 1.0741, - "step": 1550 - }, - { - "epoch": 0.13987464490237633, - "grad_norm": 1.3871103512871459, - "learning_rate": 3.874747189747968e-06, - "loss": 1.0557, - "step": 1551 - }, - { - "epoch": 0.1399648284258466, - "grad_norm": 2.415889998849647, - "learning_rate": 3.874543614234568e-06, - "loss": 1.0401, - "step": 1552 - }, - { - "epoch": 0.14005501194931685, - "grad_norm": 1.4268854751235827, - "learning_rate": 3.874339878774771e-06, - "loss": 1.0049, - "step": 1553 - }, - { - "epoch": 0.14014519547278712, - "grad_norm": 1.6388680632753365, - "learning_rate": 3.874135983385961e-06, - "loss": 1.0328, - "step": 1554 - }, - { - "epoch": 0.14023537899625738, - "grad_norm": 1.633857866971519, - "learning_rate": 3.873931928085535e-06, - "loss": 1.0295, - "step": 1555 - }, - { - "epoch": 0.14032556251972764, - "grad_norm": 1.4920760507555917, - "learning_rate": 3.873727712890904e-06, - "loss": 1.1066, - "step": 1556 - }, - { - "epoch": 0.1404157460431979, - "grad_norm": 1.4652732931412964, - "learning_rate": 3.873523337819493e-06, - "loss": 0.994, - "step": 1557 - }, - { - "epoch": 0.14050592956666816, - "grad_norm": 1.3209386841710244, - "learning_rate": 3.873318802888739e-06, - "loss": 0.9602, - "step": 1558 - }, - { - "epoch": 0.14059611309013842, - "grad_norm": 0.5788262470269988, - "learning_rate": 3.873114108116097e-06, - "loss": 0.7824, - "step": 1559 - }, - { - "epoch": 0.14068629661360868, - "grad_norm": 1.7798323762383084, - "learning_rate": 3.872909253519031e-06, - "loss": 1.1139, - "step": 1560 - }, - { - "epoch": 0.14077648013707897, - "grad_norm": 1.9921347816822188, - "learning_rate": 3.8727042391150195e-06, - "loss": 1.1037, - "step": 1561 - }, - { - "epoch": 0.14086666366054923, - "grad_norm": 1.4215386799887733, - "learning_rate": 3.872499064921556e-06, - "loss": 0.962, - "step": 1562 - }, - { - "epoch": 0.1409568471840195, - "grad_norm": 1.6353034043018115, - "learning_rate": 3.872293730956149e-06, - "loss": 1.0398, - "step": 1563 - }, - { - "epoch": 0.14104703070748975, - "grad_norm": 2.1654996541176454, - "learning_rate": 3.872088237236316e-06, - "loss": 1.0207, - "step": 1564 - }, - { - "epoch": 0.14113721423096, - "grad_norm": 1.7337243861676466, - "learning_rate": 3.871882583779592e-06, - "loss": 0.9415, - "step": 1565 - }, - { - "epoch": 0.14122739775443027, - "grad_norm": 3.665959824720179, - "learning_rate": 3.871676770603525e-06, - "loss": 1.0257, - "step": 1566 - }, - { - "epoch": 0.14131758127790053, - "grad_norm": 1.7639089701686717, - "learning_rate": 3.871470797725676e-06, - "loss": 1.0173, - "step": 1567 - }, - { - "epoch": 0.1414077648013708, - "grad_norm": 1.6428628101014189, - "learning_rate": 3.8712646651636185e-06, - "loss": 1.1044, - "step": 1568 - }, - { - "epoch": 0.14149794832484106, - "grad_norm": 1.3433417210108023, - "learning_rate": 3.871058372934942e-06, - "loss": 1.0184, - "step": 1569 - }, - { - "epoch": 0.14158813184831132, - "grad_norm": 1.5612944720785669, - "learning_rate": 3.8708519210572485e-06, - "loss": 0.8633, - "step": 1570 - }, - { - "epoch": 0.14167831537178158, - "grad_norm": 2.2884016417522974, - "learning_rate": 3.870645309548153e-06, - "loss": 0.9629, - "step": 1571 - }, - { - "epoch": 0.14176849889525184, - "grad_norm": 0.6036946299734443, - "learning_rate": 3.870438538425284e-06, - "loss": 0.8094, - "step": 1572 - }, - { - "epoch": 0.1418586824187221, - "grad_norm": 3.1342280131872124, - "learning_rate": 3.870231607706287e-06, - "loss": 0.9794, - "step": 1573 - }, - { - "epoch": 0.14194886594219236, - "grad_norm": 1.6267533013588924, - "learning_rate": 3.870024517408817e-06, - "loss": 1.033, - "step": 1574 - }, - { - "epoch": 0.14203904946566262, - "grad_norm": 2.0700698422429897, - "learning_rate": 3.8698172675505425e-06, - "loss": 0.9537, - "step": 1575 - }, - { - "epoch": 0.14212923298913288, - "grad_norm": 0.7922871107568452, - "learning_rate": 3.86960985814915e-06, - "loss": 0.8438, - "step": 1576 - }, - { - "epoch": 0.14221941651260314, - "grad_norm": 1.4649846937401996, - "learning_rate": 3.869402289222335e-06, - "loss": 0.9541, - "step": 1577 - }, - { - "epoch": 0.1423096000360734, - "grad_norm": 1.709262044964886, - "learning_rate": 3.869194560787808e-06, - "loss": 1.0104, - "step": 1578 - }, - { - "epoch": 0.14239978355954366, - "grad_norm": 1.8255852557762917, - "learning_rate": 3.868986672863296e-06, - "loss": 0.9538, - "step": 1579 - }, - { - "epoch": 0.14248996708301392, - "grad_norm": 1.396490563865766, - "learning_rate": 3.868778625466535e-06, - "loss": 1.1247, - "step": 1580 - }, - { - "epoch": 0.14258015060648419, - "grad_norm": 1.4391069138224655, - "learning_rate": 3.868570418615278e-06, - "loss": 0.9516, - "step": 1581 - }, - { - "epoch": 0.14267033412995445, - "grad_norm": 0.7143191721437117, - "learning_rate": 3.8683620523272885e-06, - "loss": 0.8279, - "step": 1582 - }, - { - "epoch": 0.1427605176534247, - "grad_norm": 1.589462496996362, - "learning_rate": 3.8681535266203464e-06, - "loss": 1.1307, - "step": 1583 - }, - { - "epoch": 0.14285070117689497, - "grad_norm": 1.7670645457699716, - "learning_rate": 3.867944841512246e-06, - "loss": 1.0558, - "step": 1584 - }, - { - "epoch": 0.14294088470036526, - "grad_norm": 1.5745224576906927, - "learning_rate": 3.867735997020791e-06, - "loss": 1.0348, - "step": 1585 - }, - { - "epoch": 0.14303106822383552, - "grad_norm": 1.7123048100297948, - "learning_rate": 3.867526993163802e-06, - "loss": 0.9279, - "step": 1586 - }, - { - "epoch": 0.14312125174730578, - "grad_norm": 1.3020624947469863, - "learning_rate": 3.867317829959113e-06, - "loss": 0.9002, - "step": 1587 - }, - { - "epoch": 0.14321143527077604, - "grad_norm": 2.548443364008572, - "learning_rate": 3.8671085074245704e-06, - "loss": 0.8249, - "step": 1588 - }, - { - "epoch": 0.1433016187942463, - "grad_norm": 1.882223836594607, - "learning_rate": 3.866899025578035e-06, - "loss": 1.0922, - "step": 1589 - }, - { - "epoch": 0.14339180231771656, - "grad_norm": 1.395937085447299, - "learning_rate": 3.86668938443738e-06, - "loss": 0.9985, - "step": 1590 - }, - { - "epoch": 0.14348198584118682, - "grad_norm": 1.7554070862965079, - "learning_rate": 3.866479584020495e-06, - "loss": 1.0439, - "step": 1591 - }, - { - "epoch": 0.14357216936465708, - "grad_norm": 1.7051453811996826, - "learning_rate": 3.866269624345279e-06, - "loss": 1.059, - "step": 1592 - }, - { - "epoch": 0.14366235288812734, - "grad_norm": 1.6631449287421871, - "learning_rate": 3.866059505429649e-06, - "loss": 1.1028, - "step": 1593 - }, - { - "epoch": 0.1437525364115976, - "grad_norm": 0.5707395078535973, - "learning_rate": 3.865849227291532e-06, - "loss": 0.8304, - "step": 1594 - }, - { - "epoch": 0.14384271993506786, - "grad_norm": 1.7499651905412517, - "learning_rate": 3.865638789948872e-06, - "loss": 1.0187, - "step": 1595 - }, - { - "epoch": 0.14393290345853813, - "grad_norm": 3.850112239638281, - "learning_rate": 3.865428193419622e-06, - "loss": 0.9876, - "step": 1596 - }, - { - "epoch": 0.14402308698200839, - "grad_norm": 1.9714250707200365, - "learning_rate": 3.865217437721753e-06, - "loss": 1.0142, - "step": 1597 - }, - { - "epoch": 0.14411327050547865, - "grad_norm": 1.7528384894234472, - "learning_rate": 3.865006522873249e-06, - "loss": 0.9603, - "step": 1598 - }, - { - "epoch": 0.1442034540289489, - "grad_norm": 1.542287555451895, - "learning_rate": 3.864795448892103e-06, - "loss": 1.0285, - "step": 1599 - }, - { - "epoch": 0.14429363755241917, - "grad_norm": 1.4619438254996757, - "learning_rate": 3.864584215796327e-06, - "loss": 1.0731, - "step": 1600 - }, - { - "epoch": 0.14438382107588943, - "grad_norm": 1.6948660689022343, - "learning_rate": 3.8643728236039455e-06, - "loss": 1.045, - "step": 1601 - }, - { - "epoch": 0.1444740045993597, - "grad_norm": 1.5689027445928465, - "learning_rate": 3.864161272332994e-06, - "loss": 1.0049, - "step": 1602 - }, - { - "epoch": 0.14456418812282995, - "grad_norm": 1.3570544726768097, - "learning_rate": 3.863949562001524e-06, - "loss": 1.0684, - "step": 1603 - }, - { - "epoch": 0.1446543716463002, - "grad_norm": 2.019480012874577, - "learning_rate": 3.8637376926276005e-06, - "loss": 1.0022, - "step": 1604 - }, - { - "epoch": 0.14474455516977047, - "grad_norm": 1.969024033624022, - "learning_rate": 3.8635256642293e-06, - "loss": 1.022, - "step": 1605 - }, - { - "epoch": 0.14483473869324073, - "grad_norm": 1.565593099364295, - "learning_rate": 3.863313476824714e-06, - "loss": 1.0037, - "step": 1606 - }, - { - "epoch": 0.144924922216711, - "grad_norm": 1.8288233148737352, - "learning_rate": 3.863101130431948e-06, - "loss": 0.9998, - "step": 1607 - }, - { - "epoch": 0.14501510574018128, - "grad_norm": 1.337475878061265, - "learning_rate": 3.862888625069121e-06, - "loss": 1.0671, - "step": 1608 - }, - { - "epoch": 0.14510528926365154, - "grad_norm": 1.3578854873759318, - "learning_rate": 3.8626759607543645e-06, - "loss": 0.9996, - "step": 1609 - }, - { - "epoch": 0.1451954727871218, - "grad_norm": 2.3687031240530505, - "learning_rate": 3.862463137505825e-06, - "loss": 0.9756, - "step": 1610 - }, - { - "epoch": 0.14528565631059207, - "grad_norm": 1.4263373975530211, - "learning_rate": 3.862250155341659e-06, - "loss": 0.9822, - "step": 1611 - }, - { - "epoch": 0.14537583983406233, - "grad_norm": 1.5455734480032346, - "learning_rate": 3.862037014280043e-06, - "loss": 1.1534, - "step": 1612 - }, - { - "epoch": 0.1454660233575326, - "grad_norm": 1.8423318743248183, - "learning_rate": 3.861823714339162e-06, - "loss": 1.0126, - "step": 1613 - }, - { - "epoch": 0.14555620688100285, - "grad_norm": 1.6385363407941702, - "learning_rate": 3.861610255537215e-06, - "loss": 1.0282, - "step": 1614 - }, - { - "epoch": 0.1456463904044731, - "grad_norm": 8.45483290928483, - "learning_rate": 3.8613966378924165e-06, - "loss": 1.0585, - "step": 1615 - }, - { - "epoch": 0.14573657392794337, - "grad_norm": 1.6077179757702742, - "learning_rate": 3.861182861422993e-06, - "loss": 0.9763, - "step": 1616 - }, - { - "epoch": 0.14582675745141363, - "grad_norm": 1.442181386552378, - "learning_rate": 3.860968926147185e-06, - "loss": 1.1026, - "step": 1617 - }, - { - "epoch": 0.1459169409748839, - "grad_norm": 1.4736798116521506, - "learning_rate": 3.860754832083247e-06, - "loss": 0.8632, - "step": 1618 - }, - { - "epoch": 0.14600712449835415, - "grad_norm": 1.3924018138327945, - "learning_rate": 3.8605405792494475e-06, - "loss": 1.1028, - "step": 1619 - }, - { - "epoch": 0.1460973080218244, - "grad_norm": 1.6004222998022952, - "learning_rate": 3.860326167664066e-06, - "loss": 1.0389, - "step": 1620 - }, - { - "epoch": 0.14618749154529467, - "grad_norm": 1.4531350084185743, - "learning_rate": 3.860111597345399e-06, - "loss": 1.0182, - "step": 1621 - }, - { - "epoch": 0.14627767506876493, - "grad_norm": 1.6973575615428484, - "learning_rate": 3.859896868311753e-06, - "loss": 0.8816, - "step": 1622 - }, - { - "epoch": 0.1463678585922352, - "grad_norm": 1.453774173893108, - "learning_rate": 3.859681980581452e-06, - "loss": 1.1089, - "step": 1623 - }, - { - "epoch": 0.14645804211570546, - "grad_norm": 1.6195153330836414, - "learning_rate": 3.859466934172829e-06, - "loss": 0.9951, - "step": 1624 - }, - { - "epoch": 0.14654822563917572, - "grad_norm": 1.4917040136481856, - "learning_rate": 3.859251729104235e-06, - "loss": 0.9577, - "step": 1625 - }, - { - "epoch": 0.14663840916264598, - "grad_norm": 1.258381732531986, - "learning_rate": 3.859036365394031e-06, - "loss": 0.9744, - "step": 1626 - }, - { - "epoch": 0.14672859268611624, - "grad_norm": 1.8280147291003264, - "learning_rate": 3.858820843060594e-06, - "loss": 1.0056, - "step": 1627 - }, - { - "epoch": 0.1468187762095865, - "grad_norm": 1.8471393789710238, - "learning_rate": 3.858605162122314e-06, - "loss": 1.0009, - "step": 1628 - }, - { - "epoch": 0.14690895973305676, - "grad_norm": 1.629661476461461, - "learning_rate": 3.858389322597592e-06, - "loss": 0.9725, - "step": 1629 - }, - { - "epoch": 0.14699914325652702, - "grad_norm": 1.3500825132655512, - "learning_rate": 3.858173324504847e-06, - "loss": 1.0373, - "step": 1630 - }, - { - "epoch": 0.14708932677999728, - "grad_norm": 1.381287202290931, - "learning_rate": 3.857957167862508e-06, - "loss": 0.9816, - "step": 1631 - }, - { - "epoch": 0.14717951030346757, - "grad_norm": 0.6722666685549556, - "learning_rate": 3.857740852689018e-06, - "loss": 0.8556, - "step": 1632 - }, - { - "epoch": 0.14726969382693783, - "grad_norm": 1.8171504639306104, - "learning_rate": 3.857524379002835e-06, - "loss": 1.0256, - "step": 1633 - }, - { - "epoch": 0.1473598773504081, - "grad_norm": 1.8409061158923632, - "learning_rate": 3.85730774682243e-06, - "loss": 1.0417, - "step": 1634 - }, - { - "epoch": 0.14745006087387835, - "grad_norm": 1.3889752578371761, - "learning_rate": 3.8570909561662875e-06, - "loss": 1.0916, - "step": 1635 - }, - { - "epoch": 0.1475402443973486, - "grad_norm": 1.342648032141181, - "learning_rate": 3.8568740070529045e-06, - "loss": 1.0971, - "step": 1636 - }, - { - "epoch": 0.14763042792081887, - "grad_norm": 1.6131692990675912, - "learning_rate": 3.856656899500792e-06, - "loss": 0.9852, - "step": 1637 - }, - { - "epoch": 0.14772061144428913, - "grad_norm": 1.5306979371431093, - "learning_rate": 3.856439633528476e-06, - "loss": 0.9579, - "step": 1638 - }, - { - "epoch": 0.1478107949677594, - "grad_norm": 1.6820413109903252, - "learning_rate": 3.856222209154494e-06, - "loss": 1.0768, - "step": 1639 - }, - { - "epoch": 0.14790097849122966, - "grad_norm": 1.4834800029778727, - "learning_rate": 3.856004626397397e-06, - "loss": 1.0447, - "step": 1640 - }, - { - "epoch": 0.14799116201469992, - "grad_norm": 1.416442638335885, - "learning_rate": 3.855786885275753e-06, - "loss": 0.9908, - "step": 1641 - }, - { - "epoch": 0.14808134553817018, - "grad_norm": 2.083102696685928, - "learning_rate": 3.855568985808138e-06, - "loss": 0.935, - "step": 1642 - }, - { - "epoch": 0.14817152906164044, - "grad_norm": 1.4397561156852914, - "learning_rate": 3.855350928013145e-06, - "loss": 0.9865, - "step": 1643 - }, - { - "epoch": 0.1482617125851107, - "grad_norm": 1.9647773746632278, - "learning_rate": 3.8551327119093825e-06, - "loss": 1.0245, - "step": 1644 - }, - { - "epoch": 0.14835189610858096, - "grad_norm": 1.5635894791800262, - "learning_rate": 3.854914337515467e-06, - "loss": 0.9578, - "step": 1645 - }, - { - "epoch": 0.14844207963205122, - "grad_norm": 1.348855278565962, - "learning_rate": 3.8546958048500324e-06, - "loss": 0.9194, - "step": 1646 - }, - { - "epoch": 0.14853226315552148, - "grad_norm": 0.7799092422772169, - "learning_rate": 3.854477113931725e-06, - "loss": 0.8817, - "step": 1647 - }, - { - "epoch": 0.14862244667899174, - "grad_norm": 1.5753401555481195, - "learning_rate": 3.854258264779205e-06, - "loss": 1.0369, - "step": 1648 - }, - { - "epoch": 0.148712630202462, - "grad_norm": 1.73958704904247, - "learning_rate": 3.854039257411145e-06, - "loss": 0.9921, - "step": 1649 - }, - { - "epoch": 0.14880281372593226, - "grad_norm": 1.6663312892444697, - "learning_rate": 3.853820091846232e-06, - "loss": 0.9799, - "step": 1650 - }, - { - "epoch": 0.14889299724940253, - "grad_norm": 4.59801155733346, - "learning_rate": 3.853600768103169e-06, - "loss": 1.0204, - "step": 1651 - }, - { - "epoch": 0.1489831807728728, - "grad_norm": 1.8410228668400692, - "learning_rate": 3.853381286200667e-06, - "loss": 1.0023, - "step": 1652 - }, - { - "epoch": 0.14907336429634305, - "grad_norm": 3.4188333169942124, - "learning_rate": 3.853161646157453e-06, - "loss": 1.1056, - "step": 1653 - }, - { - "epoch": 0.1491635478198133, - "grad_norm": 2.1636466718184812, - "learning_rate": 3.852941847992269e-06, - "loss": 0.9826, - "step": 1654 - }, - { - "epoch": 0.14925373134328357, - "grad_norm": 1.9334853633211195, - "learning_rate": 3.852721891723871e-06, - "loss": 1.023, - "step": 1655 - }, - { - "epoch": 0.14934391486675386, - "grad_norm": 1.9010640251900262, - "learning_rate": 3.852501777371025e-06, - "loss": 0.9995, - "step": 1656 - }, - { - "epoch": 0.14943409839022412, - "grad_norm": 1.6660996982697598, - "learning_rate": 3.8522815049525125e-06, - "loss": 1.0516, - "step": 1657 - }, - { - "epoch": 0.14952428191369438, - "grad_norm": 4.172974173799808, - "learning_rate": 3.852061074487129e-06, - "loss": 1.0747, - "step": 1658 - }, - { - "epoch": 0.14961446543716464, - "grad_norm": 1.5827870011982375, - "learning_rate": 3.851840485993682e-06, - "loss": 0.9279, - "step": 1659 - }, - { - "epoch": 0.1497046489606349, - "grad_norm": 1.4425627248206971, - "learning_rate": 3.851619739490994e-06, - "loss": 1.0217, - "step": 1660 - }, - { - "epoch": 0.14979483248410516, - "grad_norm": 1.4636689625108632, - "learning_rate": 3.8513988349978996e-06, - "loss": 1.0475, - "step": 1661 - }, - { - "epoch": 0.14988501600757542, - "grad_norm": 1.4114100833918046, - "learning_rate": 3.851177772533249e-06, - "loss": 1.0458, - "step": 1662 - }, - { - "epoch": 0.14997519953104568, - "grad_norm": 1.7836713562639201, - "learning_rate": 3.850956552115903e-06, - "loss": 1.1834, - "step": 1663 - }, - { - "epoch": 0.15006538305451594, - "grad_norm": 0.6696062150648536, - "learning_rate": 3.850735173764738e-06, - "loss": 0.8385, - "step": 1664 - }, - { - "epoch": 0.1501555665779862, - "grad_norm": 1.6605794210212552, - "learning_rate": 3.850513637498642e-06, - "loss": 1.0434, - "step": 1665 - }, - { - "epoch": 0.15024575010145647, - "grad_norm": 2.248356112663336, - "learning_rate": 3.850291943336521e-06, - "loss": 1.0403, - "step": 1666 - }, - { - "epoch": 0.15033593362492673, - "grad_norm": 1.6568597354998786, - "learning_rate": 3.850070091297287e-06, - "loss": 1.0663, - "step": 1667 - }, - { - "epoch": 0.150426117148397, - "grad_norm": 0.6703668790158569, - "learning_rate": 3.8498480813998735e-06, - "loss": 0.9011, - "step": 1668 - }, - { - "epoch": 0.15051630067186725, - "grad_norm": 1.4387471966753445, - "learning_rate": 3.84962591366322e-06, - "loss": 1.0139, - "step": 1669 - }, - { - "epoch": 0.1506064841953375, - "grad_norm": 1.6634432934565742, - "learning_rate": 3.8494035881062855e-06, - "loss": 1.0671, - "step": 1670 - }, - { - "epoch": 0.15069666771880777, - "grad_norm": 2.149482278811586, - "learning_rate": 3.84918110474804e-06, - "loss": 0.9967, - "step": 1671 - }, - { - "epoch": 0.15078685124227803, - "grad_norm": 2.056959861041066, - "learning_rate": 3.8489584636074655e-06, - "loss": 1.0028, - "step": 1672 - }, - { - "epoch": 0.1508770347657483, - "grad_norm": 0.6555058256264326, - "learning_rate": 3.848735664703561e-06, - "loss": 0.8257, - "step": 1673 - }, - { - "epoch": 0.15096721828921855, - "grad_norm": 1.4380823697200413, - "learning_rate": 3.8485127080553346e-06, - "loss": 1.0171, - "step": 1674 - }, - { - "epoch": 0.1510574018126888, - "grad_norm": 1.8268512298572328, - "learning_rate": 3.8482895936818115e-06, - "loss": 1.0183, - "step": 1675 - }, - { - "epoch": 0.15114758533615907, - "grad_norm": 1.5484601000879328, - "learning_rate": 3.848066321602029e-06, - "loss": 1.0374, - "step": 1676 - }, - { - "epoch": 0.15123776885962933, - "grad_norm": 1.4944141013000884, - "learning_rate": 3.847842891835038e-06, - "loss": 0.9868, - "step": 1677 - }, - { - "epoch": 0.1513279523830996, - "grad_norm": 0.6382134568480979, - "learning_rate": 3.847619304399902e-06, - "loss": 0.8427, - "step": 1678 - }, - { - "epoch": 0.15141813590656986, - "grad_norm": 1.43066514571738, - "learning_rate": 3.8473955593157e-06, - "loss": 1.0431, - "step": 1679 - }, - { - "epoch": 0.15150831943004014, - "grad_norm": 1.4485906591990372, - "learning_rate": 3.847171656601522e-06, - "loss": 0.9836, - "step": 1680 - }, - { - "epoch": 0.1515985029535104, - "grad_norm": 1.7756393812296072, - "learning_rate": 3.846947596276473e-06, - "loss": 1.0371, - "step": 1681 - }, - { - "epoch": 0.15168868647698067, - "grad_norm": 1.564853574100153, - "learning_rate": 3.846723378359672e-06, - "loss": 1.0609, - "step": 1682 - }, - { - "epoch": 0.15177887000045093, - "grad_norm": 1.9471218779221064, - "learning_rate": 3.846499002870249e-06, - "loss": 1.0256, - "step": 1683 - }, - { - "epoch": 0.1518690535239212, - "grad_norm": 1.5567853565970142, - "learning_rate": 3.846274469827349e-06, - "loss": 1.0276, - "step": 1684 - }, - { - "epoch": 0.15195923704739145, - "grad_norm": 1.73100888883445, - "learning_rate": 3.846049779250132e-06, - "loss": 1.0305, - "step": 1685 - }, - { - "epoch": 0.1520494205708617, - "grad_norm": 0.7510886635716232, - "learning_rate": 3.845824931157769e-06, - "loss": 0.8841, - "step": 1686 - }, - { - "epoch": 0.15213960409433197, - "grad_norm": 1.433488639283576, - "learning_rate": 3.845599925569444e-06, - "loss": 1.0064, - "step": 1687 - }, - { - "epoch": 0.15222978761780223, - "grad_norm": 1.9394573046436585, - "learning_rate": 3.845374762504357e-06, - "loss": 0.9777, - "step": 1688 - }, - { - "epoch": 0.1523199711412725, - "grad_norm": 1.4231183830177763, - "learning_rate": 3.8451494419817204e-06, - "loss": 1.0668, - "step": 1689 - }, - { - "epoch": 0.15241015466474275, - "grad_norm": 1.9127130227723421, - "learning_rate": 3.8449239640207594e-06, - "loss": 0.9508, - "step": 1690 - }, - { - "epoch": 0.152500338188213, - "grad_norm": 1.6205459588397715, - "learning_rate": 3.844698328640713e-06, - "loss": 1.0542, - "step": 1691 - }, - { - "epoch": 0.15259052171168327, - "grad_norm": 1.6752553261263918, - "learning_rate": 3.844472535860833e-06, - "loss": 1.0335, - "step": 1692 - }, - { - "epoch": 0.15268070523515354, - "grad_norm": 1.7105770341018138, - "learning_rate": 3.8442465857003864e-06, - "loss": 1.0758, - "step": 1693 - }, - { - "epoch": 0.1527708887586238, - "grad_norm": 2.0144670102430733, - "learning_rate": 3.844020478178653e-06, - "loss": 0.9591, - "step": 1694 - }, - { - "epoch": 0.15286107228209406, - "grad_norm": 1.5650616818298662, - "learning_rate": 3.843794213314923e-06, - "loss": 1.05, - "step": 1695 - }, - { - "epoch": 0.15295125580556432, - "grad_norm": 1.540266002124245, - "learning_rate": 3.843567791128505e-06, - "loss": 1.0501, - "step": 1696 - }, - { - "epoch": 0.15304143932903458, - "grad_norm": 1.207197554405976, - "learning_rate": 3.843341211638717e-06, - "loss": 1.0059, - "step": 1697 - }, - { - "epoch": 0.15313162285250484, - "grad_norm": 1.6464460013229152, - "learning_rate": 3.843114474864894e-06, - "loss": 0.9818, - "step": 1698 - }, - { - "epoch": 0.1532218063759751, - "grad_norm": 1.8925535596074068, - "learning_rate": 3.84288758082638e-06, - "loss": 1.0185, - "step": 1699 - }, - { - "epoch": 0.15331198989944536, - "grad_norm": 1.326451672349113, - "learning_rate": 3.842660529542536e-06, - "loss": 1.1066, - "step": 1700 - }, - { - "epoch": 0.15340217342291562, - "grad_norm": 1.4599473831415117, - "learning_rate": 3.842433321032736e-06, - "loss": 1.0399, - "step": 1701 - }, - { - "epoch": 0.15349235694638588, - "grad_norm": 1.8871524541348255, - "learning_rate": 3.842205955316365e-06, - "loss": 0.9471, - "step": 1702 - }, - { - "epoch": 0.15358254046985614, - "grad_norm": 1.6212480954149482, - "learning_rate": 3.8419784324128256e-06, - "loss": 0.928, - "step": 1703 - }, - { - "epoch": 0.15367272399332643, - "grad_norm": 2.122135924980829, - "learning_rate": 3.841750752341529e-06, - "loss": 0.9861, - "step": 1704 - }, - { - "epoch": 0.1537629075167967, - "grad_norm": 1.6402044574710006, - "learning_rate": 3.841522915121902e-06, - "loss": 1.0192, - "step": 1705 - }, - { - "epoch": 0.15385309104026695, - "grad_norm": 1.7006395034704327, - "learning_rate": 3.841294920773387e-06, - "loss": 0.9525, - "step": 1706 - }, - { - "epoch": 0.15394327456373721, - "grad_norm": 1.7936958384561121, - "learning_rate": 3.841066769315436e-06, - "loss": 1.0339, - "step": 1707 - }, - { - "epoch": 0.15403345808720748, - "grad_norm": 1.453112694472846, - "learning_rate": 3.840838460767518e-06, - "loss": 1.0299, - "step": 1708 - }, - { - "epoch": 0.15412364161067774, - "grad_norm": 1.5603110430034708, - "learning_rate": 3.840609995149111e-06, - "loss": 0.9947, - "step": 1709 - }, - { - "epoch": 0.154213825134148, - "grad_norm": 1.7997787233612816, - "learning_rate": 3.84038137247971e-06, - "loss": 1.0194, - "step": 1710 - }, - { - "epoch": 0.15430400865761826, - "grad_norm": 2.1728279712553586, - "learning_rate": 3.840152592778823e-06, - "loss": 0.9995, - "step": 1711 - }, - { - "epoch": 0.15439419218108852, - "grad_norm": 1.2005222952193202, - "learning_rate": 3.83992365606597e-06, - "loss": 1.0625, - "step": 1712 - }, - { - "epoch": 0.15448437570455878, - "grad_norm": 1.5936784915215425, - "learning_rate": 3.8396945623606855e-06, - "loss": 0.9778, - "step": 1713 - }, - { - "epoch": 0.15457455922802904, - "grad_norm": 1.398299567383636, - "learning_rate": 3.8394653116825174e-06, - "loss": 0.9827, - "step": 1714 - }, - { - "epoch": 0.1546647427514993, - "grad_norm": 1.5433263436743454, - "learning_rate": 3.839235904051026e-06, - "loss": 1.008, - "step": 1715 - }, - { - "epoch": 0.15475492627496956, - "grad_norm": 1.6221575452301917, - "learning_rate": 3.8390063394857855e-06, - "loss": 0.9976, - "step": 1716 - }, - { - "epoch": 0.15484510979843982, - "grad_norm": 1.4585533067013285, - "learning_rate": 3.838776618006385e-06, - "loss": 1.1017, - "step": 1717 - }, - { - "epoch": 0.15493529332191008, - "grad_norm": 1.9043438245414328, - "learning_rate": 3.838546739632423e-06, - "loss": 1.0255, - "step": 1718 - }, - { - "epoch": 0.15502547684538034, - "grad_norm": 2.1992809767922723, - "learning_rate": 3.838316704383517e-06, - "loss": 1.0715, - "step": 1719 - }, - { - "epoch": 0.1551156603688506, - "grad_norm": 1.5382110254194397, - "learning_rate": 3.838086512279292e-06, - "loss": 1.0171, - "step": 1720 - }, - { - "epoch": 0.15520584389232087, - "grad_norm": 1.9477382993036982, - "learning_rate": 3.837856163339391e-06, - "loss": 1.0134, - "step": 1721 - }, - { - "epoch": 0.15529602741579113, - "grad_norm": 1.3663684244302146, - "learning_rate": 3.837625657583469e-06, - "loss": 0.9477, - "step": 1722 - }, - { - "epoch": 0.1553862109392614, - "grad_norm": 1.4068114961097482, - "learning_rate": 3.837394995031193e-06, - "loss": 0.9728, - "step": 1723 - }, - { - "epoch": 0.15547639446273165, - "grad_norm": 1.4505732291145048, - "learning_rate": 3.837164175702245e-06, - "loss": 1.0264, - "step": 1724 - }, - { - "epoch": 0.1555665779862019, - "grad_norm": 1.8821750685680154, - "learning_rate": 3.836933199616319e-06, - "loss": 0.9302, - "step": 1725 - }, - { - "epoch": 0.15565676150967217, - "grad_norm": 1.3999341915194896, - "learning_rate": 3.836702066793124e-06, - "loss": 1.085, - "step": 1726 - }, - { - "epoch": 0.15574694503314246, - "grad_norm": 1.7213712644280952, - "learning_rate": 3.836470777252381e-06, - "loss": 1.0751, - "step": 1727 - }, - { - "epoch": 0.15583712855661272, - "grad_norm": 1.6792528876546293, - "learning_rate": 3.836239331013825e-06, - "loss": 1.0303, - "step": 1728 - }, - { - "epoch": 0.15592731208008298, - "grad_norm": 1.625188596491764, - "learning_rate": 3.836007728097205e-06, - "loss": 0.9764, - "step": 1729 - }, - { - "epoch": 0.15601749560355324, - "grad_norm": 1.244721760509356, - "learning_rate": 3.835775968522282e-06, - "loss": 0.9783, - "step": 1730 - }, - { - "epoch": 0.1561076791270235, - "grad_norm": 2.028590177491583, - "learning_rate": 3.83554405230883e-06, - "loss": 1.0364, - "step": 1731 - }, - { - "epoch": 0.15619786265049376, - "grad_norm": 1.8215129643077579, - "learning_rate": 3.835311979476639e-06, - "loss": 0.9996, - "step": 1732 - }, - { - "epoch": 0.15628804617396402, - "grad_norm": 1.4195077494228014, - "learning_rate": 3.83507975004551e-06, - "loss": 0.9167, - "step": 1733 - }, - { - "epoch": 0.15637822969743428, - "grad_norm": 1.5828284998063418, - "learning_rate": 3.834847364035258e-06, - "loss": 0.997, - "step": 1734 - }, - { - "epoch": 0.15646841322090455, - "grad_norm": 2.5322875733828263, - "learning_rate": 3.834614821465712e-06, - "loss": 0.9548, - "step": 1735 - }, - { - "epoch": 0.1565585967443748, - "grad_norm": 1.5935256650525822, - "learning_rate": 3.834382122356713e-06, - "loss": 1.048, - "step": 1736 - }, - { - "epoch": 0.15664878026784507, - "grad_norm": 1.3860745505227825, - "learning_rate": 3.834149266728117e-06, - "loss": 0.9827, - "step": 1737 - }, - { - "epoch": 0.15673896379131533, - "grad_norm": 0.7480716551817185, - "learning_rate": 3.833916254599792e-06, - "loss": 0.89, - "step": 1738 - }, - { - "epoch": 0.1568291473147856, - "grad_norm": 0.6810153618352884, - "learning_rate": 3.83368308599162e-06, - "loss": 0.8459, - "step": 1739 - }, - { - "epoch": 0.15691933083825585, - "grad_norm": 0.5765038778384721, - "learning_rate": 3.833449760923498e-06, - "loss": 0.7939, - "step": 1740 - }, - { - "epoch": 0.1570095143617261, - "grad_norm": 1.4265934551177124, - "learning_rate": 3.83321627941533e-06, - "loss": 1.0572, - "step": 1741 - }, - { - "epoch": 0.15709969788519637, - "grad_norm": 1.6496592690042526, - "learning_rate": 3.832982641487043e-06, - "loss": 1.0704, - "step": 1742 - }, - { - "epoch": 0.15718988140866663, - "grad_norm": 1.5208770362462258, - "learning_rate": 3.832748847158568e-06, - "loss": 1.103, - "step": 1743 - }, - { - "epoch": 0.1572800649321369, - "grad_norm": 1.2987433613817687, - "learning_rate": 3.832514896449858e-06, - "loss": 1.0431, - "step": 1744 - }, - { - "epoch": 0.15737024845560715, - "grad_norm": 1.4233928585484914, - "learning_rate": 3.832280789380871e-06, - "loss": 0.9557, - "step": 1745 - }, - { - "epoch": 0.15746043197907741, - "grad_norm": 2.6650581177463235, - "learning_rate": 3.832046525971584e-06, - "loss": 0.922, - "step": 1746 - }, - { - "epoch": 0.15755061550254768, - "grad_norm": 1.4846175949221623, - "learning_rate": 3.831812106241987e-06, - "loss": 1.0706, - "step": 1747 - }, - { - "epoch": 0.15764079902601794, - "grad_norm": 2.0322678583322262, - "learning_rate": 3.8315775302120796e-06, - "loss": 0.9294, - "step": 1748 - }, - { - "epoch": 0.1577309825494882, - "grad_norm": 1.5466236719507915, - "learning_rate": 3.831342797901878e-06, - "loss": 1.0328, - "step": 1749 - }, - { - "epoch": 0.15782116607295846, - "grad_norm": 0.6910886735619728, - "learning_rate": 3.831107909331411e-06, - "loss": 0.8748, - "step": 1750 - }, - { - "epoch": 0.15791134959642875, - "grad_norm": 1.6474594179518407, - "learning_rate": 3.830872864520721e-06, - "loss": 1.0596, - "step": 1751 - }, - { - "epoch": 0.158001533119899, - "grad_norm": 0.7256040506682001, - "learning_rate": 3.830637663489862e-06, - "loss": 0.9331, - "step": 1752 - }, - { - "epoch": 0.15809171664336927, - "grad_norm": 0.7641133156945042, - "learning_rate": 3.830402306258904e-06, - "loss": 0.9652, - "step": 1753 - }, - { - "epoch": 0.15818190016683953, - "grad_norm": 2.8424980006550626, - "learning_rate": 3.830166792847929e-06, - "loss": 1.0028, - "step": 1754 - }, - { - "epoch": 0.1582720836903098, - "grad_norm": 1.5355276963951137, - "learning_rate": 3.829931123277031e-06, - "loss": 0.9577, - "step": 1755 - }, - { - "epoch": 0.15836226721378005, - "grad_norm": 1.4438948818684358, - "learning_rate": 3.8296952975663204e-06, - "loss": 1.0082, - "step": 1756 - }, - { - "epoch": 0.1584524507372503, - "grad_norm": 2.13345550793247, - "learning_rate": 3.829459315735918e-06, - "loss": 1.0824, - "step": 1757 - }, - { - "epoch": 0.15854263426072057, - "grad_norm": 1.4351422420808617, - "learning_rate": 3.829223177805959e-06, - "loss": 0.9407, - "step": 1758 - }, - { - "epoch": 0.15863281778419083, - "grad_norm": 1.5433892944494565, - "learning_rate": 3.828986883796591e-06, - "loss": 1.0542, - "step": 1759 - }, - { - "epoch": 0.1587230013076611, - "grad_norm": 1.7519423060258543, - "learning_rate": 3.828750433727979e-06, - "loss": 1.0036, - "step": 1760 - }, - { - "epoch": 0.15881318483113135, - "grad_norm": 4.444894781079544, - "learning_rate": 3.828513827620296e-06, - "loss": 1.0573, - "step": 1761 - }, - { - "epoch": 0.15890336835460162, - "grad_norm": 3.496969273227946, - "learning_rate": 3.82827706549373e-06, - "loss": 1.0645, - "step": 1762 - }, - { - "epoch": 0.15899355187807188, - "grad_norm": 1.8581781462232858, - "learning_rate": 3.828040147368484e-06, - "loss": 0.9996, - "step": 1763 - }, - { - "epoch": 0.15908373540154214, - "grad_norm": 1.402955668199674, - "learning_rate": 3.827803073264774e-06, - "loss": 1.0079, - "step": 1764 - }, - { - "epoch": 0.1591739189250124, - "grad_norm": 1.4582802989943946, - "learning_rate": 3.827565843202826e-06, - "loss": 1.014, - "step": 1765 - }, - { - "epoch": 0.15926410244848266, - "grad_norm": 1.7233316563660206, - "learning_rate": 3.827328457202884e-06, - "loss": 0.8924, - "step": 1766 - }, - { - "epoch": 0.15935428597195292, - "grad_norm": 1.4472395962782543, - "learning_rate": 3.8270909152852014e-06, - "loss": 0.95, - "step": 1767 - }, - { - "epoch": 0.15944446949542318, - "grad_norm": 1.9143769920605276, - "learning_rate": 3.826853217470048e-06, - "loss": 1.0167, - "step": 1768 - }, - { - "epoch": 0.15953465301889344, - "grad_norm": 1.4940236563182083, - "learning_rate": 3.826615363777705e-06, - "loss": 1.0907, - "step": 1769 - }, - { - "epoch": 0.1596248365423637, - "grad_norm": 1.493740053024053, - "learning_rate": 3.826377354228468e-06, - "loss": 0.9957, - "step": 1770 - }, - { - "epoch": 0.15971502006583396, - "grad_norm": 0.6556378643130224, - "learning_rate": 3.826139188842643e-06, - "loss": 0.8516, - "step": 1771 - }, - { - "epoch": 0.15980520358930422, - "grad_norm": 1.2269375464595957, - "learning_rate": 3.825900867640554e-06, - "loss": 1.0746, - "step": 1772 - }, - { - "epoch": 0.15989538711277448, - "grad_norm": 1.3284681886561998, - "learning_rate": 3.825662390642535e-06, - "loss": 1.0568, - "step": 1773 - }, - { - "epoch": 0.15998557063624474, - "grad_norm": 1.6817724727820704, - "learning_rate": 3.825423757868934e-06, - "loss": 1.1147, - "step": 1774 - }, - { - "epoch": 0.16007575415971503, - "grad_norm": 1.5179796313702245, - "learning_rate": 3.825184969340114e-06, - "loss": 1.0422, - "step": 1775 - }, - { - "epoch": 0.1601659376831853, - "grad_norm": 1.3781448726129495, - "learning_rate": 3.824946025076447e-06, - "loss": 1.0445, - "step": 1776 - }, - { - "epoch": 0.16025612120665556, - "grad_norm": 1.4815217553510205, - "learning_rate": 3.824706925098323e-06, - "loss": 0.9625, - "step": 1777 - }, - { - "epoch": 0.16034630473012582, - "grad_norm": 2.007682825791836, - "learning_rate": 3.824467669426143e-06, - "loss": 1.0691, - "step": 1778 - }, - { - "epoch": 0.16043648825359608, - "grad_norm": 1.7701633158194334, - "learning_rate": 3.824228258080321e-06, - "loss": 0.9772, - "step": 1779 - }, - { - "epoch": 0.16052667177706634, - "grad_norm": 1.519069179699935, - "learning_rate": 3.823988691081285e-06, - "loss": 0.931, - "step": 1780 - }, - { - "epoch": 0.1606168553005366, - "grad_norm": 1.3358659780887827, - "learning_rate": 3.823748968449478e-06, - "loss": 1.0422, - "step": 1781 - }, - { - "epoch": 0.16070703882400686, - "grad_norm": 1.5502552898964508, - "learning_rate": 3.823509090205352e-06, - "loss": 0.9121, - "step": 1782 - }, - { - "epoch": 0.16079722234747712, - "grad_norm": 1.877837767511163, - "learning_rate": 3.823269056369376e-06, - "loss": 0.9697, - "step": 1783 - }, - { - "epoch": 0.16088740587094738, - "grad_norm": 1.3880144540188062, - "learning_rate": 3.8230288669620295e-06, - "loss": 1.0407, - "step": 1784 - }, - { - "epoch": 0.16097758939441764, - "grad_norm": 1.3563687637294015, - "learning_rate": 3.822788522003809e-06, - "loss": 1.0175, - "step": 1785 - }, - { - "epoch": 0.1610677729178879, - "grad_norm": 1.645705793565551, - "learning_rate": 3.822548021515221e-06, - "loss": 1.0878, - "step": 1786 - }, - { - "epoch": 0.16115795644135816, - "grad_norm": 1.6046897316149207, - "learning_rate": 3.822307365516787e-06, - "loss": 0.9858, - "step": 1787 - }, - { - "epoch": 0.16124813996482842, - "grad_norm": 2.581014135636082, - "learning_rate": 3.8220665540290395e-06, - "loss": 1.0296, - "step": 1788 - }, - { - "epoch": 0.16133832348829868, - "grad_norm": 2.359807461142139, - "learning_rate": 3.8218255870725265e-06, - "loss": 1.0751, - "step": 1789 - }, - { - "epoch": 0.16142850701176895, - "grad_norm": 1.5682910414995612, - "learning_rate": 3.82158446466781e-06, - "loss": 0.9635, - "step": 1790 - }, - { - "epoch": 0.1615186905352392, - "grad_norm": 1.5109692669816086, - "learning_rate": 3.821343186835462e-06, - "loss": 1.0255, - "step": 1791 - }, - { - "epoch": 0.16160887405870947, - "grad_norm": 1.3440086648071359, - "learning_rate": 3.821101753596072e-06, - "loss": 1.0838, - "step": 1792 - }, - { - "epoch": 0.16169905758217973, - "grad_norm": 1.7000468023252877, - "learning_rate": 3.820860164970237e-06, - "loss": 1.0024, - "step": 1793 - }, - { - "epoch": 0.16178924110565, - "grad_norm": 1.345609885918003, - "learning_rate": 3.820618420978574e-06, - "loss": 1.0866, - "step": 1794 - }, - { - "epoch": 0.16187942462912025, - "grad_norm": 1.723434445472701, - "learning_rate": 3.820376521641708e-06, - "loss": 1.0064, - "step": 1795 - }, - { - "epoch": 0.1619696081525905, - "grad_norm": 1.9074241899097026, - "learning_rate": 3.82013446698028e-06, - "loss": 1.0263, - "step": 1796 - }, - { - "epoch": 0.16205979167606077, - "grad_norm": 1.4627369289358578, - "learning_rate": 3.819892257014943e-06, - "loss": 0.974, - "step": 1797 - }, - { - "epoch": 0.16214997519953103, - "grad_norm": 1.5085556491680918, - "learning_rate": 3.819649891766364e-06, - "loss": 1.0392, - "step": 1798 - }, - { - "epoch": 0.16224015872300132, - "grad_norm": 1.4366474940856484, - "learning_rate": 3.819407371255222e-06, - "loss": 1.0017, - "step": 1799 - }, - { - "epoch": 0.16233034224647158, - "grad_norm": 1.7744317313675546, - "learning_rate": 3.819164695502212e-06, - "loss": 1.017, - "step": 1800 - }, - { - "epoch": 0.16242052576994184, - "grad_norm": 1.6310065836910963, - "learning_rate": 3.818921864528039e-06, - "loss": 0.9616, - "step": 1801 - }, - { - "epoch": 0.1625107092934121, - "grad_norm": 1.7153888783100921, - "learning_rate": 3.818678878353423e-06, - "loss": 1.0716, - "step": 1802 - }, - { - "epoch": 0.16260089281688236, - "grad_norm": 1.704785718457224, - "learning_rate": 3.818435736999097e-06, - "loss": 0.9616, - "step": 1803 - }, - { - "epoch": 0.16269107634035262, - "grad_norm": 1.8212628155573565, - "learning_rate": 3.818192440485807e-06, - "loss": 1.0122, - "step": 1804 - }, - { - "epoch": 0.16278125986382289, - "grad_norm": 1.5505437020420092, - "learning_rate": 3.817948988834314e-06, - "loss": 0.9458, - "step": 1805 - }, - { - "epoch": 0.16287144338729315, - "grad_norm": 1.3426203747989212, - "learning_rate": 3.817705382065388e-06, - "loss": 1.0118, - "step": 1806 - }, - { - "epoch": 0.1629616269107634, - "grad_norm": 1.2148487544799955, - "learning_rate": 3.8174616201998155e-06, - "loss": 0.9833, - "step": 1807 - }, - { - "epoch": 0.16305181043423367, - "grad_norm": 1.6088414465130834, - "learning_rate": 3.817217703258397e-06, - "loss": 1.0622, - "step": 1808 - }, - { - "epoch": 0.16314199395770393, - "grad_norm": 3.9019271735155994, - "learning_rate": 3.816973631261943e-06, - "loss": 1.006, - "step": 1809 - }, - { - "epoch": 0.1632321774811742, - "grad_norm": 1.5337132473920378, - "learning_rate": 3.816729404231281e-06, - "loss": 1.0425, - "step": 1810 - }, - { - "epoch": 0.16332236100464445, - "grad_norm": 1.8096368471730013, - "learning_rate": 3.816485022187249e-06, - "loss": 1.0796, - "step": 1811 - }, - { - "epoch": 0.1634125445281147, - "grad_norm": 2.4873997730401944, - "learning_rate": 3.816240485150698e-06, - "loss": 0.9621, - "step": 1812 - }, - { - "epoch": 0.16350272805158497, - "grad_norm": 1.3146380312865795, - "learning_rate": 3.815995793142495e-06, - "loss": 1.0085, - "step": 1813 - }, - { - "epoch": 0.16359291157505523, - "grad_norm": 1.705646363400993, - "learning_rate": 3.815750946183518e-06, - "loss": 0.9486, - "step": 1814 - }, - { - "epoch": 0.1636830950985255, - "grad_norm": 1.6074787560893882, - "learning_rate": 3.815505944294658e-06, - "loss": 1.0656, - "step": 1815 - }, - { - "epoch": 0.16377327862199575, - "grad_norm": 1.2132248963031151, - "learning_rate": 3.81526078749682e-06, - "loss": 0.9716, - "step": 1816 - }, - { - "epoch": 0.16386346214546602, - "grad_norm": 1.478685820243646, - "learning_rate": 3.8150154758109225e-06, - "loss": 0.9561, - "step": 1817 - }, - { - "epoch": 0.16395364566893628, - "grad_norm": 1.3878648347012505, - "learning_rate": 3.814770009257896e-06, - "loss": 0.9859, - "step": 1818 - }, - { - "epoch": 0.16404382919240654, - "grad_norm": 1.294769416223029, - "learning_rate": 3.814524387858687e-06, - "loss": 1.0263, - "step": 1819 - }, - { - "epoch": 0.1641340127158768, - "grad_norm": 1.6598129096941552, - "learning_rate": 3.814278611634251e-06, - "loss": 1.0327, - "step": 1820 - }, - { - "epoch": 0.16422419623934706, - "grad_norm": 1.970266938495757, - "learning_rate": 3.8140326806055606e-06, - "loss": 0.9871, - "step": 1821 - }, - { - "epoch": 0.16431437976281732, - "grad_norm": 1.65373059954116, - "learning_rate": 3.8137865947935992e-06, - "loss": 1.071, - "step": 1822 - }, - { - "epoch": 0.1644045632862876, - "grad_norm": 2.4495189428023334, - "learning_rate": 3.8135403542193646e-06, - "loss": 1.0289, - "step": 1823 - }, - { - "epoch": 0.16449474680975787, - "grad_norm": 1.9456261684513474, - "learning_rate": 3.813293958903867e-06, - "loss": 0.9457, - "step": 1824 - }, - { - "epoch": 0.16458493033322813, - "grad_norm": 1.4557932485307286, - "learning_rate": 3.8130474088681306e-06, - "loss": 0.9754, - "step": 1825 - }, - { - "epoch": 0.1646751138566984, - "grad_norm": 2.0484404656267268, - "learning_rate": 3.8128007041331927e-06, - "loss": 0.9589, - "step": 1826 - }, - { - "epoch": 0.16476529738016865, - "grad_norm": 1.5960326210598526, - "learning_rate": 3.812553844720102e-06, - "loss": 0.9286, - "step": 1827 - }, - { - "epoch": 0.1648554809036389, - "grad_norm": 1.7456009615326427, - "learning_rate": 3.8123068306499236e-06, - "loss": 1.0325, - "step": 1828 - }, - { - "epoch": 0.16494566442710917, - "grad_norm": 1.5868953574618767, - "learning_rate": 3.812059661943733e-06, - "loss": 1.017, - "step": 1829 - }, - { - "epoch": 0.16503584795057943, - "grad_norm": 1.306637158125539, - "learning_rate": 3.811812338622621e-06, - "loss": 0.9894, - "step": 1830 - }, - { - "epoch": 0.1651260314740497, - "grad_norm": 1.351872447201474, - "learning_rate": 3.81156486070769e-06, - "loss": 1.0066, - "step": 1831 - }, - { - "epoch": 0.16521621499751996, - "grad_norm": 1.6311037896939318, - "learning_rate": 3.811317228220056e-06, - "loss": 1.0597, - "step": 1832 - }, - { - "epoch": 0.16530639852099022, - "grad_norm": 1.6212061831539222, - "learning_rate": 3.811069441180849e-06, - "loss": 1.0547, - "step": 1833 - }, - { - "epoch": 0.16539658204446048, - "grad_norm": 1.7063832045878962, - "learning_rate": 3.8108214996112107e-06, - "loss": 1.0196, - "step": 1834 - }, - { - "epoch": 0.16548676556793074, - "grad_norm": 1.811558972444599, - "learning_rate": 3.810573403532297e-06, - "loss": 1.0536, - "step": 1835 - }, - { - "epoch": 0.165576949091401, - "grad_norm": 1.5584404662054123, - "learning_rate": 3.8103251529652774e-06, - "loss": 0.9934, - "step": 1836 - }, - { - "epoch": 0.16566713261487126, - "grad_norm": 1.4417366124790523, - "learning_rate": 3.810076747931334e-06, - "loss": 0.9901, - "step": 1837 - }, - { - "epoch": 0.16575731613834152, - "grad_norm": 1.4547252252687877, - "learning_rate": 3.809828188451662e-06, - "loss": 0.9743, - "step": 1838 - }, - { - "epoch": 0.16584749966181178, - "grad_norm": 2.603497767691244, - "learning_rate": 3.809579474547469e-06, - "loss": 1.0092, - "step": 1839 - }, - { - "epoch": 0.16593768318528204, - "grad_norm": 1.4755176702596324, - "learning_rate": 3.809330606239977e-06, - "loss": 1.0571, - "step": 1840 - }, - { - "epoch": 0.1660278667087523, - "grad_norm": 1.4281610563638922, - "learning_rate": 3.809081583550422e-06, - "loss": 1.081, - "step": 1841 - }, - { - "epoch": 0.16611805023222256, - "grad_norm": 1.812642782440939, - "learning_rate": 3.808832406500051e-06, - "loss": 1.0925, - "step": 1842 - }, - { - "epoch": 0.16620823375569282, - "grad_norm": 1.7431356817761423, - "learning_rate": 3.8085830751101253e-06, - "loss": 1.0835, - "step": 1843 - }, - { - "epoch": 0.16629841727916309, - "grad_norm": 1.4147066788229719, - "learning_rate": 3.808333589401919e-06, - "loss": 1.0507, - "step": 1844 - }, - { - "epoch": 0.16638860080263335, - "grad_norm": 1.646962740042969, - "learning_rate": 3.8080839493967194e-06, - "loss": 0.9431, - "step": 1845 - }, - { - "epoch": 0.1664787843261036, - "grad_norm": 0.686263315943645, - "learning_rate": 3.807834155115828e-06, - "loss": 0.8204, - "step": 1846 - }, - { - "epoch": 0.1665689678495739, - "grad_norm": 0.717437457904404, - "learning_rate": 3.8075842065805584e-06, - "loss": 0.93, - "step": 1847 - }, - { - "epoch": 0.16665915137304416, - "grad_norm": 1.9908699016744078, - "learning_rate": 3.8073341038122374e-06, - "loss": 0.9992, - "step": 1848 - }, - { - "epoch": 0.16674933489651442, - "grad_norm": 2.9440855511280306, - "learning_rate": 3.8070838468322048e-06, - "loss": 1.079, - "step": 1849 - }, - { - "epoch": 0.16683951841998468, - "grad_norm": 1.7411057744813003, - "learning_rate": 3.8068334356618143e-06, - "loss": 0.9559, - "step": 1850 - }, - { - "epoch": 0.16692970194345494, - "grad_norm": 2.040643656296322, - "learning_rate": 3.8065828703224324e-06, - "loss": 1.0412, - "step": 1851 - }, - { - "epoch": 0.1670198854669252, - "grad_norm": 7.136686678909877, - "learning_rate": 3.8063321508354386e-06, - "loss": 1.0076, - "step": 1852 - }, - { - "epoch": 0.16711006899039546, - "grad_norm": 1.4337190578484227, - "learning_rate": 3.8060812772222255e-06, - "loss": 1.0004, - "step": 1853 - }, - { - "epoch": 0.16720025251386572, - "grad_norm": 1.2034027472594166, - "learning_rate": 3.8058302495041993e-06, - "loss": 1.0806, - "step": 1854 - }, - { - "epoch": 0.16729043603733598, - "grad_norm": 1.5977316840907718, - "learning_rate": 3.805579067702779e-06, - "loss": 1.0349, - "step": 1855 - }, - { - "epoch": 0.16738061956080624, - "grad_norm": 2.7208938915139953, - "learning_rate": 3.8053277318393967e-06, - "loss": 1.016, - "step": 1856 - }, - { - "epoch": 0.1674708030842765, - "grad_norm": 1.5677002392261028, - "learning_rate": 3.805076241935498e-06, - "loss": 1.0221, - "step": 1857 - }, - { - "epoch": 0.16756098660774676, - "grad_norm": 1.535605173258552, - "learning_rate": 3.804824598012541e-06, - "loss": 0.8834, - "step": 1858 - }, - { - "epoch": 0.16765117013121703, - "grad_norm": 1.3195447777641338, - "learning_rate": 3.8045728000919975e-06, - "loss": 1.0543, - "step": 1859 - }, - { - "epoch": 0.1677413536546873, - "grad_norm": 1.3967223064762682, - "learning_rate": 3.8043208481953524e-06, - "loss": 1.0415, - "step": 1860 - }, - { - "epoch": 0.16783153717815755, - "grad_norm": 1.470377243838049, - "learning_rate": 3.804068742344104e-06, - "loss": 0.9506, - "step": 1861 - }, - { - "epoch": 0.1679217207016278, - "grad_norm": 4.018672514030524, - "learning_rate": 3.8038164825597628e-06, - "loss": 0.979, - "step": 1862 - }, - { - "epoch": 0.16801190422509807, - "grad_norm": 1.5275658643541972, - "learning_rate": 3.8035640688638537e-06, - "loss": 1.0951, - "step": 1863 - }, - { - "epoch": 0.16810208774856833, - "grad_norm": 1.6254448648546191, - "learning_rate": 3.8033115012779125e-06, - "loss": 1.0891, - "step": 1864 - }, - { - "epoch": 0.1681922712720386, - "grad_norm": 1.5639672066453036, - "learning_rate": 3.8030587798234915e-06, - "loss": 1.0552, - "step": 1865 - }, - { - "epoch": 0.16828245479550885, - "grad_norm": 1.7004426604252325, - "learning_rate": 3.802805904522153e-06, - "loss": 0.9513, - "step": 1866 - }, - { - "epoch": 0.1683726383189791, - "grad_norm": 1.2578400318615555, - "learning_rate": 3.8025528753954742e-06, - "loss": 1.0096, - "step": 1867 - }, - { - "epoch": 0.16846282184244937, - "grad_norm": 1.6343102407663395, - "learning_rate": 3.802299692465045e-06, - "loss": 0.9515, - "step": 1868 - }, - { - "epoch": 0.16855300536591963, - "grad_norm": 1.9235289984972133, - "learning_rate": 3.802046355752468e-06, - "loss": 1.0027, - "step": 1869 - }, - { - "epoch": 0.16864318888938992, - "grad_norm": 4.419821326325378, - "learning_rate": 3.80179286527936e-06, - "loss": 1.0714, - "step": 1870 - }, - { - "epoch": 0.16873337241286018, - "grad_norm": 1.4892030599566264, - "learning_rate": 3.801539221067349e-06, - "loss": 0.9285, - "step": 1871 - }, - { - "epoch": 0.16882355593633044, - "grad_norm": 1.4536082786958344, - "learning_rate": 3.801285423138079e-06, - "loss": 0.9706, - "step": 1872 - }, - { - "epoch": 0.1689137394598007, - "grad_norm": 1.4434181789865026, - "learning_rate": 3.8010314715132037e-06, - "loss": 0.9588, - "step": 1873 - }, - { - "epoch": 0.16900392298327097, - "grad_norm": 1.6016829282561629, - "learning_rate": 3.800777366214393e-06, - "loss": 0.9655, - "step": 1874 - }, - { - "epoch": 0.16909410650674123, - "grad_norm": 13.845089677525058, - "learning_rate": 3.800523107263328e-06, - "loss": 0.9906, - "step": 1875 - }, - { - "epoch": 0.1691842900302115, - "grad_norm": 1.552810686263547, - "learning_rate": 3.800268694681703e-06, - "loss": 1.0617, - "step": 1876 - }, - { - "epoch": 0.16927447355368175, - "grad_norm": 0.674722669284666, - "learning_rate": 3.800014128491227e-06, - "loss": 0.8569, - "step": 1877 - }, - { - "epoch": 0.169364657077152, - "grad_norm": 1.3240128978867414, - "learning_rate": 3.79975940871362e-06, - "loss": 1.0664, - "step": 1878 - }, - { - "epoch": 0.16945484060062227, - "grad_norm": 2.1460153693851325, - "learning_rate": 3.799504535370617e-06, - "loss": 1.0224, - "step": 1879 - }, - { - "epoch": 0.16954502412409253, - "grad_norm": 6.367982480087333, - "learning_rate": 3.799249508483964e-06, - "loss": 0.95, - "step": 1880 - }, - { - "epoch": 0.1696352076475628, - "grad_norm": 0.7226468472255682, - "learning_rate": 3.798994328075422e-06, - "loss": 0.9071, - "step": 1881 - }, - { - "epoch": 0.16972539117103305, - "grad_norm": 1.5789147846306475, - "learning_rate": 3.798738994166765e-06, - "loss": 0.945, - "step": 1882 - }, - { - "epoch": 0.1698155746945033, - "grad_norm": 1.4037305090375676, - "learning_rate": 3.7984835067797788e-06, - "loss": 1.0132, - "step": 1883 - }, - { - "epoch": 0.16990575821797357, - "grad_norm": 1.3651410290591155, - "learning_rate": 3.798227865936263e-06, - "loss": 1.1026, - "step": 1884 - }, - { - "epoch": 0.16999594174144383, - "grad_norm": 1.6266817047382913, - "learning_rate": 3.7979720716580297e-06, - "loss": 0.8796, - "step": 1885 - }, - { - "epoch": 0.1700861252649141, - "grad_norm": 3.620289603182746, - "learning_rate": 3.7977161239669057e-06, - "loss": 1.0018, - "step": 1886 - }, - { - "epoch": 0.17017630878838436, - "grad_norm": 2.122831359986156, - "learning_rate": 3.7974600228847294e-06, - "loss": 0.9904, - "step": 1887 - }, - { - "epoch": 0.17026649231185462, - "grad_norm": 2.421523333213186, - "learning_rate": 3.7972037684333534e-06, - "loss": 0.9427, - "step": 1888 - }, - { - "epoch": 0.17035667583532488, - "grad_norm": 1.4419106528740198, - "learning_rate": 3.796947360634642e-06, - "loss": 0.9442, - "step": 1889 - }, - { - "epoch": 0.17044685935879514, - "grad_norm": 1.5706706990744528, - "learning_rate": 3.796690799510473e-06, - "loss": 1.0581, - "step": 1890 - }, - { - "epoch": 0.1705370428822654, - "grad_norm": 1.4925118937569581, - "learning_rate": 3.7964340850827387e-06, - "loss": 0.9767, - "step": 1891 - }, - { - "epoch": 0.17062722640573566, - "grad_norm": 0.6777516563476901, - "learning_rate": 3.7961772173733425e-06, - "loss": 0.8189, - "step": 1892 - }, - { - "epoch": 0.17071740992920592, - "grad_norm": 1.287183652364079, - "learning_rate": 3.7959201964042024e-06, - "loss": 1.0015, - "step": 1893 - }, - { - "epoch": 0.1708075934526762, - "grad_norm": 1.6313024958205893, - "learning_rate": 3.795663022197248e-06, - "loss": 1.0367, - "step": 1894 - }, - { - "epoch": 0.17089777697614647, - "grad_norm": 1.7428112880517208, - "learning_rate": 3.7954056947744242e-06, - "loss": 1.0186, - "step": 1895 - }, - { - "epoch": 0.17098796049961673, - "grad_norm": 2.1361313035019127, - "learning_rate": 3.7951482141576863e-06, - "loss": 0.9894, - "step": 1896 - }, - { - "epoch": 0.171078144023087, - "grad_norm": 1.244149679633088, - "learning_rate": 3.794890580369004e-06, - "loss": 1.0568, - "step": 1897 - }, - { - "epoch": 0.17116832754655725, - "grad_norm": 1.626719445417221, - "learning_rate": 3.7946327934303612e-06, - "loss": 1.0431, - "step": 1898 - }, - { - "epoch": 0.1712585110700275, - "grad_norm": 1.4421394778240193, - "learning_rate": 3.794374853363752e-06, - "loss": 1.106, - "step": 1899 - }, - { - "epoch": 0.17134869459349777, - "grad_norm": 1.29970132257697, - "learning_rate": 3.794116760191187e-06, - "loss": 1.0305, - "step": 1900 - }, - { - "epoch": 0.17143887811696804, - "grad_norm": 1.5171817767647184, - "learning_rate": 3.7938585139346877e-06, - "loss": 1.1062, - "step": 1901 - }, - { - "epoch": 0.1715290616404383, - "grad_norm": 1.9841004429510383, - "learning_rate": 3.793600114616288e-06, - "loss": 0.9853, - "step": 1902 - }, - { - "epoch": 0.17161924516390856, - "grad_norm": 0.7549642935892574, - "learning_rate": 3.793341562258037e-06, - "loss": 0.8258, - "step": 1903 - }, - { - "epoch": 0.17170942868737882, - "grad_norm": 1.2788846582117557, - "learning_rate": 3.7930828568819953e-06, - "loss": 1.0223, - "step": 1904 - }, - { - "epoch": 0.17179961221084908, - "grad_norm": 1.42880263330894, - "learning_rate": 3.7928239985102378e-06, - "loss": 1.0094, - "step": 1905 - }, - { - "epoch": 0.17188979573431934, - "grad_norm": 1.4893576266499677, - "learning_rate": 3.7925649871648505e-06, - "loss": 1.006, - "step": 1906 - }, - { - "epoch": 0.1719799792577896, - "grad_norm": 2.7584889044401084, - "learning_rate": 3.792305822867935e-06, - "loss": 0.9417, - "step": 1907 - }, - { - "epoch": 0.17207016278125986, - "grad_norm": 1.5970996735962109, - "learning_rate": 3.792046505641604e-06, - "loss": 0.9756, - "step": 1908 - }, - { - "epoch": 0.17216034630473012, - "grad_norm": 1.6699672045885348, - "learning_rate": 3.791787035507984e-06, - "loss": 0.973, - "step": 1909 - }, - { - "epoch": 0.17225052982820038, - "grad_norm": 2.366912322212219, - "learning_rate": 3.7915274124892136e-06, - "loss": 0.9637, - "step": 1910 - }, - { - "epoch": 0.17234071335167064, - "grad_norm": 1.3131110721595733, - "learning_rate": 3.7912676366074466e-06, - "loss": 0.9956, - "step": 1911 - }, - { - "epoch": 0.1724308968751409, - "grad_norm": 2.0593731615350648, - "learning_rate": 3.7910077078848478e-06, - "loss": 0.9776, - "step": 1912 - }, - { - "epoch": 0.17252108039861117, - "grad_norm": 1.6136382587729448, - "learning_rate": 3.7907476263435957e-06, - "loss": 0.9725, - "step": 1913 - }, - { - "epoch": 0.17261126392208143, - "grad_norm": 7.024015375281257, - "learning_rate": 3.7904873920058826e-06, - "loss": 0.978, - "step": 1914 - }, - { - "epoch": 0.1727014474455517, - "grad_norm": 1.6340956323342932, - "learning_rate": 3.7902270048939114e-06, - "loss": 0.8799, - "step": 1915 - }, - { - "epoch": 0.17279163096902195, - "grad_norm": 1.6960695044472722, - "learning_rate": 3.7899664650299023e-06, - "loss": 0.9687, - "step": 1916 - }, - { - "epoch": 0.1728818144924922, - "grad_norm": 1.3612944983293074, - "learning_rate": 3.7897057724360836e-06, - "loss": 1.0189, - "step": 1917 - }, - { - "epoch": 0.1729719980159625, - "grad_norm": 1.7584150171817081, - "learning_rate": 3.7894449271347004e-06, - "loss": 1.0217, - "step": 1918 - }, - { - "epoch": 0.17306218153943276, - "grad_norm": 1.5569940833418485, - "learning_rate": 3.789183929148009e-06, - "loss": 1.0665, - "step": 1919 - }, - { - "epoch": 0.17315236506290302, - "grad_norm": 1.3924890947171722, - "learning_rate": 3.7889227784982795e-06, - "loss": 1.0391, - "step": 1920 - }, - { - "epoch": 0.17324254858637328, - "grad_norm": 2.2956986301826636, - "learning_rate": 3.7886614752077945e-06, - "loss": 1.0098, - "step": 1921 - }, - { - "epoch": 0.17333273210984354, - "grad_norm": 1.9831203188692155, - "learning_rate": 3.7884000192988495e-06, - "loss": 1.0123, - "step": 1922 - }, - { - "epoch": 0.1734229156333138, - "grad_norm": 1.5791808269473127, - "learning_rate": 3.7881384107937546e-06, - "loss": 1.0595, - "step": 1923 - }, - { - "epoch": 0.17351309915678406, - "grad_norm": 1.4881395487773086, - "learning_rate": 3.78787664971483e-06, - "loss": 1.0001, - "step": 1924 - }, - { - "epoch": 0.17360328268025432, - "grad_norm": 1.2390133110759942, - "learning_rate": 3.7876147360844115e-06, - "loss": 0.9794, - "step": 1925 - }, - { - "epoch": 0.17369346620372458, - "grad_norm": 1.5540626075141666, - "learning_rate": 3.7873526699248474e-06, - "loss": 1.0782, - "step": 1926 - }, - { - "epoch": 0.17378364972719484, - "grad_norm": 1.2744196749158025, - "learning_rate": 3.7870904512584974e-06, - "loss": 1.0119, - "step": 1927 - }, - { - "epoch": 0.1738738332506651, - "grad_norm": 1.4426977476243477, - "learning_rate": 3.7868280801077368e-06, - "loss": 1.0156, - "step": 1928 - }, - { - "epoch": 0.17396401677413537, - "grad_norm": 1.1902368027825379, - "learning_rate": 3.7865655564949517e-06, - "loss": 1.0004, - "step": 1929 - }, - { - "epoch": 0.17405420029760563, - "grad_norm": 1.360988492481204, - "learning_rate": 3.786302880442542e-06, - "loss": 0.9772, - "step": 1930 - }, - { - "epoch": 0.1741443838210759, - "grad_norm": 1.512445713676844, - "learning_rate": 3.7860400519729215e-06, - "loss": 1.0085, - "step": 1931 - }, - { - "epoch": 0.17423456734454615, - "grad_norm": 1.8393668105279803, - "learning_rate": 3.7857770711085157e-06, - "loss": 1.0272, - "step": 1932 - }, - { - "epoch": 0.1743247508680164, - "grad_norm": 1.3514609326252884, - "learning_rate": 3.785513937871763e-06, - "loss": 1.0566, - "step": 1933 - }, - { - "epoch": 0.17441493439148667, - "grad_norm": 2.6405744378480236, - "learning_rate": 3.785250652285116e-06, - "loss": 0.9799, - "step": 1934 - }, - { - "epoch": 0.17450511791495693, - "grad_norm": 1.48363580870108, - "learning_rate": 3.78498721437104e-06, - "loss": 1.0818, - "step": 1935 - }, - { - "epoch": 0.1745953014384272, - "grad_norm": 1.7497227312871764, - "learning_rate": 3.784723624152012e-06, - "loss": 1.0063, - "step": 1936 - }, - { - "epoch": 0.17468548496189745, - "grad_norm": 1.7362602915778105, - "learning_rate": 3.784459881650524e-06, - "loss": 0.9561, - "step": 1937 - }, - { - "epoch": 0.1747756684853677, - "grad_norm": 1.4789406330607608, - "learning_rate": 3.784195986889079e-06, - "loss": 0.9857, - "step": 1938 - }, - { - "epoch": 0.17486585200883797, - "grad_norm": 1.7651236168255102, - "learning_rate": 3.7839319398901946e-06, - "loss": 0.9333, - "step": 1939 - }, - { - "epoch": 0.17495603553230824, - "grad_norm": 1.5638806918606782, - "learning_rate": 3.7836677406764013e-06, - "loss": 1.0746, - "step": 1940 - }, - { - "epoch": 0.1750462190557785, - "grad_norm": 1.4457029073117744, - "learning_rate": 3.7834033892702407e-06, - "loss": 1.0523, - "step": 1941 - }, - { - "epoch": 0.17513640257924878, - "grad_norm": 1.6436782763247837, - "learning_rate": 3.783138885694269e-06, - "loss": 0.9685, - "step": 1942 - }, - { - "epoch": 0.17522658610271905, - "grad_norm": 1.5867365435565657, - "learning_rate": 3.7828742299710558e-06, - "loss": 1.0887, - "step": 1943 - }, - { - "epoch": 0.1753167696261893, - "grad_norm": 1.6896283891207422, - "learning_rate": 3.782609422123183e-06, - "loss": 0.9885, - "step": 1944 - }, - { - "epoch": 0.17540695314965957, - "grad_norm": 2.2102460555071715, - "learning_rate": 3.7823444621732444e-06, - "loss": 1.0045, - "step": 1945 - }, - { - "epoch": 0.17549713667312983, - "grad_norm": 1.970344321760061, - "learning_rate": 3.782079350143849e-06, - "loss": 1.0264, - "step": 1946 - }, - { - "epoch": 0.1755873201966001, - "grad_norm": 1.7110664632609007, - "learning_rate": 3.781814086057617e-06, - "loss": 1.109, - "step": 1947 - }, - { - "epoch": 0.17567750372007035, - "grad_norm": 1.5332308043878866, - "learning_rate": 3.7815486699371826e-06, - "loss": 0.9969, - "step": 1948 - }, - { - "epoch": 0.1757676872435406, - "grad_norm": 1.3500963317974968, - "learning_rate": 3.7812831018051918e-06, - "loss": 0.9875, - "step": 1949 - }, - { - "epoch": 0.17585787076701087, - "grad_norm": 1.3362370651518327, - "learning_rate": 3.7810173816843058e-06, - "loss": 1.0236, - "step": 1950 - }, - { - "epoch": 0.17594805429048113, - "grad_norm": 1.5532403853171208, - "learning_rate": 3.7807515095971955e-06, - "loss": 1.0739, - "step": 1951 - }, - { - "epoch": 0.1760382378139514, - "grad_norm": 1.4509953305453902, - "learning_rate": 3.7804854855665475e-06, - "loss": 0.9991, - "step": 1952 - }, - { - "epoch": 0.17612842133742165, - "grad_norm": 1.2691428661473412, - "learning_rate": 3.7802193096150606e-06, - "loss": 1.0157, - "step": 1953 - }, - { - "epoch": 0.17621860486089191, - "grad_norm": 1.3961081684992498, - "learning_rate": 3.779952981765446e-06, - "loss": 0.9716, - "step": 1954 - }, - { - "epoch": 0.17630878838436218, - "grad_norm": 1.3308333214935193, - "learning_rate": 3.779686502040429e-06, - "loss": 1.0201, - "step": 1955 - }, - { - "epoch": 0.17639897190783244, - "grad_norm": 0.6914919568405683, - "learning_rate": 3.779419870462746e-06, - "loss": 0.8347, - "step": 1956 - }, - { - "epoch": 0.1764891554313027, - "grad_norm": 2.0367884323672643, - "learning_rate": 3.779153087055148e-06, - "loss": 0.9874, - "step": 1957 - }, - { - "epoch": 0.17657933895477296, - "grad_norm": 2.131444023898859, - "learning_rate": 3.7788861518403988e-06, - "loss": 1.0115, - "step": 1958 - }, - { - "epoch": 0.17666952247824322, - "grad_norm": 2.2985768144346816, - "learning_rate": 3.7786190648412742e-06, - "loss": 0.9042, - "step": 1959 - }, - { - "epoch": 0.17675970600171348, - "grad_norm": 0.6518961118679015, - "learning_rate": 3.778351826080564e-06, - "loss": 0.8476, - "step": 1960 - }, - { - "epoch": 0.17684988952518374, - "grad_norm": 1.4133589453820696, - "learning_rate": 3.7780844355810704e-06, - "loss": 1.1108, - "step": 1961 - }, - { - "epoch": 0.176940073048654, - "grad_norm": 1.6526742158317007, - "learning_rate": 3.777816893365608e-06, - "loss": 1.025, - "step": 1962 - }, - { - "epoch": 0.17703025657212426, - "grad_norm": 1.353212132632887, - "learning_rate": 3.7775491994570057e-06, - "loss": 1.0877, - "step": 1963 - }, - { - "epoch": 0.17712044009559452, - "grad_norm": 1.41416108857319, - "learning_rate": 3.777281353878105e-06, - "loss": 0.9823, - "step": 1964 - }, - { - "epoch": 0.17721062361906478, - "grad_norm": 0.6205117719918377, - "learning_rate": 3.777013356651758e-06, - "loss": 0.8476, - "step": 1965 - }, - { - "epoch": 0.17730080714253507, - "grad_norm": 1.8167139592562362, - "learning_rate": 3.776745207800834e-06, - "loss": 1.0219, - "step": 1966 - }, - { - "epoch": 0.17739099066600533, - "grad_norm": 1.6121800245661486, - "learning_rate": 3.7764769073482122e-06, - "loss": 1.0426, - "step": 1967 - }, - { - "epoch": 0.1774811741894756, - "grad_norm": 1.4115004539070115, - "learning_rate": 3.7762084553167846e-06, - "loss": 1.0764, - "step": 1968 - }, - { - "epoch": 0.17757135771294585, - "grad_norm": 1.5183460938677815, - "learning_rate": 3.775939851729458e-06, - "loss": 1.0964, - "step": 1969 - }, - { - "epoch": 0.17766154123641612, - "grad_norm": 1.97719548761145, - "learning_rate": 3.775671096609151e-06, - "loss": 0.9711, - "step": 1970 - }, - { - "epoch": 0.17775172475988638, - "grad_norm": 1.5638475329432613, - "learning_rate": 3.775402189978795e-06, - "loss": 0.9958, - "step": 1971 - }, - { - "epoch": 0.17784190828335664, - "grad_norm": 1.3584952302806184, - "learning_rate": 3.7751331318613343e-06, - "loss": 0.9934, - "step": 1972 - }, - { - "epoch": 0.1779320918068269, - "grad_norm": 1.5834250256273068, - "learning_rate": 3.774863922279727e-06, - "loss": 1.0308, - "step": 1973 - }, - { - "epoch": 0.17802227533029716, - "grad_norm": 1.3288417901446594, - "learning_rate": 3.7745945612569435e-06, - "loss": 1.0774, - "step": 1974 - }, - { - "epoch": 0.17811245885376742, - "grad_norm": 1.2799744653390008, - "learning_rate": 3.7743250488159674e-06, - "loss": 0.8926, - "step": 1975 - }, - { - "epoch": 0.17820264237723768, - "grad_norm": 1.5790801226706253, - "learning_rate": 3.774055384979794e-06, - "loss": 0.9131, - "step": 1976 - }, - { - "epoch": 0.17829282590070794, - "grad_norm": 1.3444068545261239, - "learning_rate": 3.773785569771433e-06, - "loss": 0.995, - "step": 1977 - }, - { - "epoch": 0.1783830094241782, - "grad_norm": 1.3996202805413898, - "learning_rate": 3.7735156032139066e-06, - "loss": 0.999, - "step": 1978 - }, - { - "epoch": 0.17847319294764846, - "grad_norm": 1.4074730958281374, - "learning_rate": 3.773245485330251e-06, - "loss": 1.1076, - "step": 1979 - }, - { - "epoch": 0.17856337647111872, - "grad_norm": 1.5343917224229169, - "learning_rate": 3.7729752161435115e-06, - "loss": 0.9841, - "step": 1980 - }, - { - "epoch": 0.17865355999458898, - "grad_norm": 1.6151301616736495, - "learning_rate": 3.7727047956767514e-06, - "loss": 1.0758, - "step": 1981 - }, - { - "epoch": 0.17874374351805924, - "grad_norm": 1.307552002698689, - "learning_rate": 3.7724342239530436e-06, - "loss": 1.1353, - "step": 1982 - }, - { - "epoch": 0.1788339270415295, - "grad_norm": 1.471204775987831, - "learning_rate": 3.772163500995474e-06, - "loss": 0.9564, - "step": 1983 - }, - { - "epoch": 0.17892411056499977, - "grad_norm": 2.0376970535193366, - "learning_rate": 3.7718926268271437e-06, - "loss": 1.1006, - "step": 1984 - }, - { - "epoch": 0.17901429408847003, - "grad_norm": 1.8814764070352046, - "learning_rate": 3.771621601471164e-06, - "loss": 1.0254, - "step": 1985 - }, - { - "epoch": 0.1791044776119403, - "grad_norm": 1.3919256749754139, - "learning_rate": 3.771350424950661e-06, - "loss": 1.0505, - "step": 1986 - }, - { - "epoch": 0.17919466113541055, - "grad_norm": 1.5197366217758426, - "learning_rate": 3.771079097288772e-06, - "loss": 0.9602, - "step": 1987 - }, - { - "epoch": 0.1792848446588808, - "grad_norm": 1.3048445098878394, - "learning_rate": 3.770807618508649e-06, - "loss": 1.0226, - "step": 1988 - }, - { - "epoch": 0.17937502818235107, - "grad_norm": 1.5156062016845255, - "learning_rate": 3.7705359886334555e-06, - "loss": 1.0001, - "step": 1989 - }, - { - "epoch": 0.17946521170582136, - "grad_norm": 1.6858606499523439, - "learning_rate": 3.7702642076863694e-06, - "loss": 0.9886, - "step": 1990 - }, - { - "epoch": 0.17955539522929162, - "grad_norm": 2.1348860166618224, - "learning_rate": 3.7699922756905795e-06, - "loss": 1.0685, - "step": 1991 - }, - { - "epoch": 0.17964557875276188, - "grad_norm": 1.489713045176894, - "learning_rate": 3.7697201926692895e-06, - "loss": 0.9723, - "step": 1992 - }, - { - "epoch": 0.17973576227623214, - "grad_norm": 1.7347571836840405, - "learning_rate": 3.7694479586457144e-06, - "loss": 1.0233, - "step": 1993 - }, - { - "epoch": 0.1798259457997024, - "grad_norm": 2.3928335123583064, - "learning_rate": 3.7691755736430827e-06, - "loss": 1.0157, - "step": 1994 - }, - { - "epoch": 0.17991612932317266, - "grad_norm": 1.324786669273847, - "learning_rate": 3.768903037684636e-06, - "loss": 1.0577, - "step": 1995 - }, - { - "epoch": 0.18000631284664292, - "grad_norm": 1.4895577150645016, - "learning_rate": 3.7686303507936284e-06, - "loss": 1.0562, - "step": 1996 - }, - { - "epoch": 0.18009649637011318, - "grad_norm": 1.795103642970098, - "learning_rate": 3.7683575129933272e-06, - "loss": 1.0528, - "step": 1997 - }, - { - "epoch": 0.18018667989358345, - "grad_norm": 1.4695253152032854, - "learning_rate": 3.7680845243070128e-06, - "loss": 1.0154, - "step": 1998 - }, - { - "epoch": 0.1802768634170537, - "grad_norm": 0.6746487030069415, - "learning_rate": 3.7678113847579767e-06, - "loss": 0.8134, - "step": 1999 - }, - { - "epoch": 0.18036704694052397, - "grad_norm": 1.682498088569179, - "learning_rate": 3.7675380943695264e-06, - "loss": 1.085, - "step": 2000 - }, - { - "epoch": 0.18045723046399423, - "grad_norm": 1.764799211876452, - "learning_rate": 3.7672646531649795e-06, - "loss": 0.9756, - "step": 2001 - }, - { - "epoch": 0.1805474139874645, - "grad_norm": 0.5912898192193264, - "learning_rate": 3.7669910611676682e-06, - "loss": 0.8347, - "step": 2002 - }, - { - "epoch": 0.18063759751093475, - "grad_norm": 2.2720632692849443, - "learning_rate": 3.7667173184009356e-06, - "loss": 1.0013, - "step": 2003 - }, - { - "epoch": 0.180727781034405, - "grad_norm": 1.4110643838837007, - "learning_rate": 3.7664434248881403e-06, - "loss": 1.0752, - "step": 2004 - }, - { - "epoch": 0.18081796455787527, - "grad_norm": 1.663990287825778, - "learning_rate": 3.766169380652652e-06, - "loss": 1.0259, - "step": 2005 - }, - { - "epoch": 0.18090814808134553, - "grad_norm": 1.378148073102738, - "learning_rate": 3.7658951857178537e-06, - "loss": 1.0565, - "step": 2006 - }, - { - "epoch": 0.1809983316048158, - "grad_norm": 1.5180484234470835, - "learning_rate": 3.7656208401071414e-06, - "loss": 1.0273, - "step": 2007 - }, - { - "epoch": 0.18108851512828605, - "grad_norm": 1.896046015229342, - "learning_rate": 3.7653463438439225e-06, - "loss": 0.9592, - "step": 2008 - }, - { - "epoch": 0.18117869865175631, - "grad_norm": 2.2281107555645656, - "learning_rate": 3.7650716969516203e-06, - "loss": 1.0232, - "step": 2009 - }, - { - "epoch": 0.18126888217522658, - "grad_norm": 1.614127909100343, - "learning_rate": 3.764796899453668e-06, - "loss": 0.9213, - "step": 2010 - }, - { - "epoch": 0.18135906569869684, - "grad_norm": 1.4206620796489968, - "learning_rate": 3.7645219513735134e-06, - "loss": 1.0578, - "step": 2011 - }, - { - "epoch": 0.1814492492221671, - "grad_norm": 1.6310730206008879, - "learning_rate": 3.764246852734617e-06, - "loss": 1.0489, - "step": 2012 - }, - { - "epoch": 0.18153943274563739, - "grad_norm": 1.7248729189073626, - "learning_rate": 3.7639716035604502e-06, - "loss": 0.9822, - "step": 2013 - }, - { - "epoch": 0.18162961626910765, - "grad_norm": 0.6660000080503858, - "learning_rate": 3.7636962038745e-06, - "loss": 0.8311, - "step": 2014 - }, - { - "epoch": 0.1817197997925779, - "grad_norm": 1.4819454990769725, - "learning_rate": 3.763420653700265e-06, - "loss": 0.9722, - "step": 2015 - }, - { - "epoch": 0.18180998331604817, - "grad_norm": 1.8026029098698877, - "learning_rate": 3.7631449530612565e-06, - "loss": 0.9771, - "step": 2016 - }, - { - "epoch": 0.18190016683951843, - "grad_norm": 1.3060647239887007, - "learning_rate": 3.762869101980999e-06, - "loss": 1.1016, - "step": 2017 - }, - { - "epoch": 0.1819903503629887, - "grad_norm": 1.5966578864146195, - "learning_rate": 3.7625931004830287e-06, - "loss": 1.0103, - "step": 2018 - }, - { - "epoch": 0.18208053388645895, - "grad_norm": 1.5346753481020883, - "learning_rate": 3.7623169485908966e-06, - "loss": 0.9742, - "step": 2019 - }, - { - "epoch": 0.1821707174099292, - "grad_norm": 0.8234430070435956, - "learning_rate": 3.7620406463281647e-06, - "loss": 0.8929, - "step": 2020 - }, - { - "epoch": 0.18226090093339947, - "grad_norm": 2.4478208698980404, - "learning_rate": 3.7617641937184095e-06, - "loss": 1.0264, - "step": 2021 - }, - { - "epoch": 0.18235108445686973, - "grad_norm": 1.7531962497532687, - "learning_rate": 3.761487590785219e-06, - "loss": 1.0388, - "step": 2022 - }, - { - "epoch": 0.18244126798034, - "grad_norm": 1.6291478844590064, - "learning_rate": 3.7612108375521942e-06, - "loss": 0.9883, - "step": 2023 - }, - { - "epoch": 0.18253145150381025, - "grad_norm": 1.7529083335659654, - "learning_rate": 3.76093393404295e-06, - "loss": 1.0606, - "step": 2024 - }, - { - "epoch": 0.18262163502728052, - "grad_norm": 1.907948034722459, - "learning_rate": 3.7606568802811126e-06, - "loss": 1.0091, - "step": 2025 - }, - { - "epoch": 0.18271181855075078, - "grad_norm": 2.2028837680638906, - "learning_rate": 3.760379676290322e-06, - "loss": 1.0288, - "step": 2026 - }, - { - "epoch": 0.18280200207422104, - "grad_norm": 1.7058559535616593, - "learning_rate": 3.760102322094231e-06, - "loss": 0.9498, - "step": 2027 - }, - { - "epoch": 0.1828921855976913, - "grad_norm": 1.3499046698219532, - "learning_rate": 3.759824817716504e-06, - "loss": 0.9733, - "step": 2028 - }, - { - "epoch": 0.18298236912116156, - "grad_norm": 4.608338320259767, - "learning_rate": 3.759547163180821e-06, - "loss": 1.0622, - "step": 2029 - }, - { - "epoch": 0.18307255264463182, - "grad_norm": 1.4646426457006851, - "learning_rate": 3.759269358510871e-06, - "loss": 0.8171, - "step": 2030 - }, - { - "epoch": 0.18316273616810208, - "grad_norm": 1.4765141393421766, - "learning_rate": 3.75899140373036e-06, - "loss": 0.9146, - "step": 2031 - }, - { - "epoch": 0.18325291969157234, - "grad_norm": 1.7364168262459951, - "learning_rate": 3.7587132988630028e-06, - "loss": 1.0862, - "step": 2032 - }, - { - "epoch": 0.1833431032150426, - "grad_norm": 1.8457913493712712, - "learning_rate": 3.7584350439325295e-06, - "loss": 1.0496, - "step": 2033 - }, - { - "epoch": 0.18343328673851286, - "grad_norm": 1.8036281895102697, - "learning_rate": 3.758156638962682e-06, - "loss": 1.0053, - "step": 2034 - }, - { - "epoch": 0.18352347026198312, - "grad_norm": 1.689818379162765, - "learning_rate": 3.757878083977216e-06, - "loss": 0.9708, - "step": 2035 - }, - { - "epoch": 0.18361365378545338, - "grad_norm": 1.4745764722374342, - "learning_rate": 3.7575993789999e-06, - "loss": 1.0041, - "step": 2036 - }, - { - "epoch": 0.18370383730892367, - "grad_norm": 1.4219802985134329, - "learning_rate": 3.757320524054512e-06, - "loss": 1.0413, - "step": 2037 - }, - { - "epoch": 0.18379402083239393, - "grad_norm": 1.4992861638658805, - "learning_rate": 3.757041519164848e-06, - "loss": 0.949, - "step": 2038 - }, - { - "epoch": 0.1838842043558642, - "grad_norm": 1.571430093281492, - "learning_rate": 3.7567623643547133e-06, - "loss": 1.0219, - "step": 2039 - }, - { - "epoch": 0.18397438787933446, - "grad_norm": 1.6118873319416662, - "learning_rate": 3.756483059647927e-06, - "loss": 1.0137, - "step": 2040 - }, - { - "epoch": 0.18406457140280472, - "grad_norm": 1.7643166479771208, - "learning_rate": 3.756203605068321e-06, - "loss": 0.9874, - "step": 2041 - }, - { - "epoch": 0.18415475492627498, - "grad_norm": 1.6681445562903572, - "learning_rate": 3.7559240006397396e-06, - "loss": 0.9979, - "step": 2042 - }, - { - "epoch": 0.18424493844974524, - "grad_norm": 1.5014044226918062, - "learning_rate": 3.7556442463860406e-06, - "loss": 1.1328, - "step": 2043 - }, - { - "epoch": 0.1843351219732155, - "grad_norm": 2.0465237847611295, - "learning_rate": 3.7553643423310934e-06, - "loss": 1.1187, - "step": 2044 - }, - { - "epoch": 0.18442530549668576, - "grad_norm": 1.439568896665196, - "learning_rate": 3.755084288498782e-06, - "loss": 0.8801, - "step": 2045 - }, - { - "epoch": 0.18451548902015602, - "grad_norm": 1.5050052739047797, - "learning_rate": 3.754804084913002e-06, - "loss": 1.0251, - "step": 2046 - }, - { - "epoch": 0.18460567254362628, - "grad_norm": 1.3172182018462333, - "learning_rate": 3.754523731597661e-06, - "loss": 1.0785, - "step": 2047 - }, - { - "epoch": 0.18469585606709654, - "grad_norm": 2.4662839910578707, - "learning_rate": 3.754243228576681e-06, - "loss": 0.9114, - "step": 2048 - }, - { - "epoch": 0.1847860395905668, - "grad_norm": 1.921822772052979, - "learning_rate": 3.753962575873996e-06, - "loss": 1.0651, - "step": 2049 - }, - { - "epoch": 0.18487622311403706, - "grad_norm": 2.293524029902134, - "learning_rate": 3.7536817735135527e-06, - "loss": 0.9734, - "step": 2050 - }, - { - "epoch": 0.18496640663750732, - "grad_norm": 1.588372076602094, - "learning_rate": 3.753400821519311e-06, - "loss": 1.057, - "step": 2051 - }, - { - "epoch": 0.18505659016097759, - "grad_norm": 1.469309842163055, - "learning_rate": 3.7531197199152426e-06, - "loss": 0.94, - "step": 2052 - }, - { - "epoch": 0.18514677368444785, - "grad_norm": 0.7506345607583838, - "learning_rate": 3.7528384687253335e-06, - "loss": 0.8873, - "step": 2053 - }, - { - "epoch": 0.1852369572079181, - "grad_norm": 2.871486673262072, - "learning_rate": 3.7525570679735815e-06, - "loss": 1.1209, - "step": 2054 - }, - { - "epoch": 0.18532714073138837, - "grad_norm": 1.3757920151592065, - "learning_rate": 3.7522755176839965e-06, - "loss": 1.0248, - "step": 2055 - }, - { - "epoch": 0.18541732425485863, - "grad_norm": 1.6260613130127528, - "learning_rate": 3.7519938178806027e-06, - "loss": 1.023, - "step": 2056 - }, - { - "epoch": 0.1855075077783289, - "grad_norm": 1.9556101742917311, - "learning_rate": 3.7517119685874358e-06, - "loss": 0.9558, - "step": 2057 - }, - { - "epoch": 0.18559769130179915, - "grad_norm": 1.840135814381279, - "learning_rate": 3.7514299698285447e-06, - "loss": 1.0011, - "step": 2058 - }, - { - "epoch": 0.1856878748252694, - "grad_norm": 1.246959708259584, - "learning_rate": 3.751147821627991e-06, - "loss": 0.9935, - "step": 2059 - }, - { - "epoch": 0.18577805834873967, - "grad_norm": 1.5097827589634563, - "learning_rate": 3.75086552400985e-06, - "loss": 1.0322, - "step": 2060 - }, - { - "epoch": 0.18586824187220996, - "grad_norm": 1.5072904483532765, - "learning_rate": 3.750583076998208e-06, - "loss": 0.99, - "step": 2061 - }, - { - "epoch": 0.18595842539568022, - "grad_norm": 1.537209881958676, - "learning_rate": 3.7503004806171655e-06, - "loss": 0.9983, - "step": 2062 - }, - { - "epoch": 0.18604860891915048, - "grad_norm": 2.054511581258284, - "learning_rate": 3.7500177348908354e-06, - "loss": 0.9438, - "step": 2063 - }, - { - "epoch": 0.18613879244262074, - "grad_norm": 1.6105148575010693, - "learning_rate": 3.749734839843342e-06, - "loss": 1.03, - "step": 2064 - }, - { - "epoch": 0.186228975966091, - "grad_norm": 1.463306159170496, - "learning_rate": 3.7494517954988245e-06, - "loss": 1.016, - "step": 2065 - }, - { - "epoch": 0.18631915948956126, - "grad_norm": 1.6813271260208715, - "learning_rate": 3.749168601881433e-06, - "loss": 1.0043, - "step": 2066 - }, - { - "epoch": 0.18640934301303153, - "grad_norm": 1.7686628016892032, - "learning_rate": 3.7488852590153315e-06, - "loss": 0.9942, - "step": 2067 - }, - { - "epoch": 0.1864995265365018, - "grad_norm": 1.6009272362199278, - "learning_rate": 3.748601766924697e-06, - "loss": 1.008, - "step": 2068 - }, - { - "epoch": 0.18658971005997205, - "grad_norm": 1.6462735259417849, - "learning_rate": 3.7483181256337176e-06, - "loss": 0.9872, - "step": 2069 - }, - { - "epoch": 0.1866798935834423, - "grad_norm": 1.3522156476766771, - "learning_rate": 3.7480343351665962e-06, - "loss": 1.0132, - "step": 2070 - }, - { - "epoch": 0.18677007710691257, - "grad_norm": 1.3665565124561991, - "learning_rate": 3.747750395547546e-06, - "loss": 0.9997, - "step": 2071 - }, - { - "epoch": 0.18686026063038283, - "grad_norm": 1.1920059945864643, - "learning_rate": 3.7474663068007956e-06, - "loss": 0.9956, - "step": 2072 - }, - { - "epoch": 0.1869504441538531, - "grad_norm": 1.1610469802432766, - "learning_rate": 3.747182068950584e-06, - "loss": 0.9951, - "step": 2073 - }, - { - "epoch": 0.18704062767732335, - "grad_norm": 1.5888858693249042, - "learning_rate": 3.7468976820211643e-06, - "loss": 1.0197, - "step": 2074 - }, - { - "epoch": 0.1871308112007936, - "grad_norm": 1.5083547615229271, - "learning_rate": 3.746613146036803e-06, - "loss": 1.0133, - "step": 2075 - }, - { - "epoch": 0.18722099472426387, - "grad_norm": 1.676644411956846, - "learning_rate": 3.7463284610217766e-06, - "loss": 1.0513, - "step": 2076 - }, - { - "epoch": 0.18731117824773413, - "grad_norm": 1.557570807744057, - "learning_rate": 3.746043627000377e-06, - "loss": 0.9499, - "step": 2077 - }, - { - "epoch": 0.1874013617712044, - "grad_norm": 2.701964818452948, - "learning_rate": 3.7457586439969076e-06, - "loss": 0.9662, - "step": 2078 - }, - { - "epoch": 0.18749154529467466, - "grad_norm": 1.2519933066265356, - "learning_rate": 3.7454735120356842e-06, - "loss": 1.0, - "step": 2079 - }, - { - "epoch": 0.18758172881814492, - "grad_norm": 1.4559688837059068, - "learning_rate": 3.7451882311410373e-06, - "loss": 1.0773, - "step": 2080 - }, - { - "epoch": 0.18767191234161518, - "grad_norm": 1.3940116277535395, - "learning_rate": 3.7449028013373074e-06, - "loss": 1.0425, - "step": 2081 - }, - { - "epoch": 0.18776209586508544, - "grad_norm": 1.4935804326684579, - "learning_rate": 3.7446172226488485e-06, - "loss": 1.008, - "step": 2082 - }, - { - "epoch": 0.1878522793885557, - "grad_norm": 1.7238764517739764, - "learning_rate": 3.7443314951000285e-06, - "loss": 1.0673, - "step": 2083 - }, - { - "epoch": 0.18794246291202596, - "grad_norm": 1.7700836464680911, - "learning_rate": 3.7440456187152276e-06, - "loss": 0.9957, - "step": 2084 - }, - { - "epoch": 0.18803264643549625, - "grad_norm": 2.0937881466251542, - "learning_rate": 3.7437595935188377e-06, - "loss": 1.1003, - "step": 2085 - }, - { - "epoch": 0.1881228299589665, - "grad_norm": 1.9212873227149476, - "learning_rate": 3.7434734195352647e-06, - "loss": 1.0073, - "step": 2086 - }, - { - "epoch": 0.18821301348243677, - "grad_norm": 0.6811650975718516, - "learning_rate": 3.743187096788926e-06, - "loss": 0.8473, - "step": 2087 - }, - { - "epoch": 0.18830319700590703, - "grad_norm": 1.4265046257321101, - "learning_rate": 3.7429006253042524e-06, - "loss": 1.0133, - "step": 2088 - }, - { - "epoch": 0.1883933805293773, - "grad_norm": 1.646131665517779, - "learning_rate": 3.7426140051056867e-06, - "loss": 0.9549, - "step": 2089 - }, - { - "epoch": 0.18848356405284755, - "grad_norm": 1.675806289877027, - "learning_rate": 3.7423272362176856e-06, - "loss": 0.9475, - "step": 2090 - }, - { - "epoch": 0.1885737475763178, - "grad_norm": 1.3451461635995299, - "learning_rate": 3.742040318664718e-06, - "loss": 1.053, - "step": 2091 - }, - { - "epoch": 0.18866393109978807, - "grad_norm": 1.8238677967664727, - "learning_rate": 3.7417532524712643e-06, - "loss": 1.0631, - "step": 2092 - }, - { - "epoch": 0.18875411462325833, - "grad_norm": 2.4518297019428372, - "learning_rate": 3.7414660376618195e-06, - "loss": 1.039, - "step": 2093 - }, - { - "epoch": 0.1888442981467286, - "grad_norm": 0.6032725464994064, - "learning_rate": 3.74117867426089e-06, - "loss": 0.8513, - "step": 2094 - }, - { - "epoch": 0.18893448167019886, - "grad_norm": 1.438544184383074, - "learning_rate": 3.7408911622929954e-06, - "loss": 0.9898, - "step": 2095 - }, - { - "epoch": 0.18902466519366912, - "grad_norm": 1.5193235750647986, - "learning_rate": 3.740603501782668e-06, - "loss": 1.0437, - "step": 2096 - }, - { - "epoch": 0.18911484871713938, - "grad_norm": 1.4536664220740774, - "learning_rate": 3.7403156927544516e-06, - "loss": 0.9519, - "step": 2097 - }, - { - "epoch": 0.18920503224060964, - "grad_norm": 1.7897334277529662, - "learning_rate": 3.740027735232904e-06, - "loss": 0.9353, - "step": 2098 - }, - { - "epoch": 0.1892952157640799, - "grad_norm": 1.987662648197513, - "learning_rate": 3.7397396292425966e-06, - "loss": 0.9914, - "step": 2099 - }, - { - "epoch": 0.18938539928755016, - "grad_norm": 1.6548982538254509, - "learning_rate": 3.7394513748081105e-06, - "loss": 0.99, - "step": 2100 - }, - { - "epoch": 0.18947558281102042, - "grad_norm": 2.444994238474168, - "learning_rate": 3.7391629719540418e-06, - "loss": 1.0368, - "step": 2101 - }, - { - "epoch": 0.18956576633449068, - "grad_norm": 1.6682392410258373, - "learning_rate": 3.7388744207049998e-06, - "loss": 0.9987, - "step": 2102 - }, - { - "epoch": 0.18965594985796094, - "grad_norm": 2.6326074718480306, - "learning_rate": 3.7385857210856027e-06, - "loss": 0.9807, - "step": 2103 - }, - { - "epoch": 0.1897461333814312, - "grad_norm": 1.2908787734175167, - "learning_rate": 3.738296873120486e-06, - "loss": 0.9917, - "step": 2104 - }, - { - "epoch": 0.18983631690490146, - "grad_norm": 1.7031818345412302, - "learning_rate": 3.7380078768342955e-06, - "loss": 0.9909, - "step": 2105 - }, - { - "epoch": 0.18992650042837173, - "grad_norm": 2.0581653011580165, - "learning_rate": 3.7377187322516895e-06, - "loss": 1.036, - "step": 2106 - }, - { - "epoch": 0.19001668395184199, - "grad_norm": 2.623157944816627, - "learning_rate": 3.7374294393973395e-06, - "loss": 0.9701, - "step": 2107 - }, - { - "epoch": 0.19010686747531225, - "grad_norm": 1.5193621779182864, - "learning_rate": 3.7371399982959294e-06, - "loss": 1.0732, - "step": 2108 - }, - { - "epoch": 0.19019705099878254, - "grad_norm": 1.6473728737531197, - "learning_rate": 3.7368504089721565e-06, - "loss": 0.9255, - "step": 2109 - }, - { - "epoch": 0.1902872345222528, - "grad_norm": 1.6616258287656602, - "learning_rate": 3.73656067145073e-06, - "loss": 0.9587, - "step": 2110 - }, - { - "epoch": 0.19037741804572306, - "grad_norm": 1.4395488567428711, - "learning_rate": 3.736270785756371e-06, - "loss": 1.0597, - "step": 2111 - }, - { - "epoch": 0.19046760156919332, - "grad_norm": 2.7328400581588714, - "learning_rate": 3.7359807519138156e-06, - "loss": 1.0123, - "step": 2112 - }, - { - "epoch": 0.19055778509266358, - "grad_norm": 1.2278426954867752, - "learning_rate": 3.73569056994781e-06, - "loss": 1.0093, - "step": 2113 - }, - { - "epoch": 0.19064796861613384, - "grad_norm": 1.9470344490408877, - "learning_rate": 3.7354002398831144e-06, - "loss": 1.048, - "step": 2114 - }, - { - "epoch": 0.1907381521396041, - "grad_norm": 1.9077700198581153, - "learning_rate": 3.7351097617445015e-06, - "loss": 1.0036, - "step": 2115 - }, - { - "epoch": 0.19082833566307436, - "grad_norm": 0.6848185656418962, - "learning_rate": 3.7348191355567567e-06, - "loss": 0.8828, - "step": 2116 - }, - { - "epoch": 0.19091851918654462, - "grad_norm": 2.30531886876196, - "learning_rate": 3.734528361344677e-06, - "loss": 1.032, - "step": 2117 - }, - { - "epoch": 0.19100870271001488, - "grad_norm": 1.8195104837768126, - "learning_rate": 3.734237439133074e-06, - "loss": 1.0038, - "step": 2118 - }, - { - "epoch": 0.19109888623348514, - "grad_norm": 1.5957280767537387, - "learning_rate": 3.7339463689467702e-06, - "loss": 1.0542, - "step": 2119 - }, - { - "epoch": 0.1911890697569554, - "grad_norm": 1.438182585822031, - "learning_rate": 3.733655150810601e-06, - "loss": 0.9743, - "step": 2120 - }, - { - "epoch": 0.19127925328042567, - "grad_norm": 1.4740264751018373, - "learning_rate": 3.7333637847494154e-06, - "loss": 1.1336, - "step": 2121 - }, - { - "epoch": 0.19136943680389593, - "grad_norm": 1.5108548635865862, - "learning_rate": 3.7330722707880734e-06, - "loss": 1.1635, - "step": 2122 - }, - { - "epoch": 0.1914596203273662, - "grad_norm": 1.3621714099512223, - "learning_rate": 3.7327806089514497e-06, - "loss": 0.9196, - "step": 2123 - }, - { - "epoch": 0.19154980385083645, - "grad_norm": 1.6301819268018634, - "learning_rate": 3.7324887992644297e-06, - "loss": 1.1057, - "step": 2124 - }, - { - "epoch": 0.1916399873743067, - "grad_norm": 0.7303686226339983, - "learning_rate": 3.7321968417519123e-06, - "loss": 0.8422, - "step": 2125 - }, - { - "epoch": 0.19173017089777697, - "grad_norm": 1.5081981106555953, - "learning_rate": 3.7319047364388097e-06, - "loss": 0.9978, - "step": 2126 - }, - { - "epoch": 0.19182035442124723, - "grad_norm": 0.6092288991430527, - "learning_rate": 3.7316124833500453e-06, - "loss": 0.8256, - "step": 2127 - }, - { - "epoch": 0.1919105379447175, - "grad_norm": 1.372815738068914, - "learning_rate": 3.731320082510556e-06, - "loss": 0.9963, - "step": 2128 - }, - { - "epoch": 0.19200072146818775, - "grad_norm": 1.3827497823684347, - "learning_rate": 3.7310275339452906e-06, - "loss": 1.0275, - "step": 2129 - }, - { - "epoch": 0.192090904991658, - "grad_norm": 18.217489470490214, - "learning_rate": 3.7307348376792113e-06, - "loss": 1.0003, - "step": 2130 - }, - { - "epoch": 0.19218108851512827, - "grad_norm": 1.8191867998622266, - "learning_rate": 3.730441993737292e-06, - "loss": 1.0005, - "step": 2131 - }, - { - "epoch": 0.19227127203859856, - "grad_norm": 1.876846104011056, - "learning_rate": 3.7301490021445205e-06, - "loss": 0.995, - "step": 2132 - }, - { - "epoch": 0.19236145556206882, - "grad_norm": 1.6987018368976121, - "learning_rate": 3.7298558629258966e-06, - "loss": 0.9448, - "step": 2133 - }, - { - "epoch": 0.19245163908553908, - "grad_norm": 1.4566582823685923, - "learning_rate": 3.7295625761064314e-06, - "loss": 1.048, - "step": 2134 - }, - { - "epoch": 0.19254182260900934, - "grad_norm": 1.783161075092071, - "learning_rate": 3.7292691417111504e-06, - "loss": 0.9707, - "step": 2135 - }, - { - "epoch": 0.1926320061324796, - "grad_norm": 1.5091158121107846, - "learning_rate": 3.728975559765092e-06, - "loss": 0.9673, - "step": 2136 - }, - { - "epoch": 0.19272218965594987, - "grad_norm": 1.8221934019047792, - "learning_rate": 3.728681830293305e-06, - "loss": 1.0389, - "step": 2137 - }, - { - "epoch": 0.19281237317942013, - "grad_norm": 2.1019740499388213, - "learning_rate": 3.7283879533208523e-06, - "loss": 0.9639, - "step": 2138 - }, - { - "epoch": 0.1929025567028904, - "grad_norm": 1.3851996469342462, - "learning_rate": 3.7280939288728094e-06, - "loss": 0.9864, - "step": 2139 - }, - { - "epoch": 0.19299274022636065, - "grad_norm": 1.5398182828627154, - "learning_rate": 3.7277997569742637e-06, - "loss": 0.9763, - "step": 2140 - }, - { - "epoch": 0.1930829237498309, - "grad_norm": 2.4537913395435234, - "learning_rate": 3.7275054376503155e-06, - "loss": 0.9692, - "step": 2141 - }, - { - "epoch": 0.19317310727330117, - "grad_norm": 1.7707128221981394, - "learning_rate": 3.7272109709260783e-06, - "loss": 0.948, - "step": 2142 - }, - { - "epoch": 0.19326329079677143, - "grad_norm": 0.7414520233463167, - "learning_rate": 3.7269163568266774e-06, - "loss": 0.8485, - "step": 2143 - }, - { - "epoch": 0.1933534743202417, - "grad_norm": 1.6225785039827547, - "learning_rate": 3.7266215953772512e-06, - "loss": 0.9908, - "step": 2144 - }, - { - "epoch": 0.19344365784371195, - "grad_norm": 1.578447139078426, - "learning_rate": 3.7263266866029492e-06, - "loss": 1.0379, - "step": 2145 - }, - { - "epoch": 0.1935338413671822, - "grad_norm": 1.3490336138169003, - "learning_rate": 3.726031630528936e-06, - "loss": 0.9687, - "step": 2146 - }, - { - "epoch": 0.19362402489065247, - "grad_norm": 1.388466349011616, - "learning_rate": 3.7257364271803865e-06, - "loss": 0.9, - "step": 2147 - }, - { - "epoch": 0.19371420841412273, - "grad_norm": 1.6447746477299514, - "learning_rate": 3.7254410765824896e-06, - "loss": 0.938, - "step": 2148 - }, - { - "epoch": 0.193804391937593, - "grad_norm": 2.357954494480496, - "learning_rate": 3.725145578760446e-06, - "loss": 0.9174, - "step": 2149 - }, - { - "epoch": 0.19389457546106326, - "grad_norm": 1.8639740045969613, - "learning_rate": 3.7248499337394696e-06, - "loss": 0.9181, - "step": 2150 - }, - { - "epoch": 0.19398475898453352, - "grad_norm": 1.6706290192275643, - "learning_rate": 3.7245541415447848e-06, - "loss": 1.096, - "step": 2151 - }, - { - "epoch": 0.19407494250800378, - "grad_norm": 2.7118541915598113, - "learning_rate": 3.724258202201633e-06, - "loss": 1.0275, - "step": 2152 - }, - { - "epoch": 0.19416512603147404, - "grad_norm": 1.435696839265213, - "learning_rate": 3.7239621157352633e-06, - "loss": 1.0247, - "step": 2153 - }, - { - "epoch": 0.1942553095549443, - "grad_norm": 1.7928009952031734, - "learning_rate": 3.7236658821709403e-06, - "loss": 0.9892, - "step": 2154 - }, - { - "epoch": 0.19434549307841456, - "grad_norm": 1.237834476775684, - "learning_rate": 3.7233695015339404e-06, - "loss": 1.0438, - "step": 2155 - }, - { - "epoch": 0.19443567660188485, - "grad_norm": 1.8677134391628358, - "learning_rate": 3.7230729738495513e-06, - "loss": 0.9913, - "step": 2156 - }, - { - "epoch": 0.1945258601253551, - "grad_norm": 1.2849984115561082, - "learning_rate": 3.722776299143075e-06, - "loss": 1.0018, - "step": 2157 - }, - { - "epoch": 0.19461604364882537, - "grad_norm": 0.6771640191542427, - "learning_rate": 3.722479477439826e-06, - "loss": 0.8526, - "step": 2158 - }, - { - "epoch": 0.19470622717229563, - "grad_norm": 1.3442422275783201, - "learning_rate": 3.7221825087651306e-06, - "loss": 0.9863, - "step": 2159 - }, - { - "epoch": 0.1947964106957659, - "grad_norm": 1.597344850817591, - "learning_rate": 3.7218853931443274e-06, - "loss": 0.9429, - "step": 2160 - }, - { - "epoch": 0.19488659421923615, - "grad_norm": 1.328749521063416, - "learning_rate": 3.721588130602768e-06, - "loss": 0.9437, - "step": 2161 - }, - { - "epoch": 0.19497677774270641, - "grad_norm": 1.5651011369097216, - "learning_rate": 3.7212907211658164e-06, - "loss": 0.9829, - "step": 2162 - }, - { - "epoch": 0.19506696126617667, - "grad_norm": 2.2065932657916836, - "learning_rate": 3.72099316485885e-06, - "loss": 0.9639, - "step": 2163 - }, - { - "epoch": 0.19515714478964694, - "grad_norm": 1.3534999037320878, - "learning_rate": 3.720695461707256e-06, - "loss": 1.0427, - "step": 2164 - }, - { - "epoch": 0.1952473283131172, - "grad_norm": 1.612226829751533, - "learning_rate": 3.7203976117364383e-06, - "loss": 1.0295, - "step": 2165 - }, - { - "epoch": 0.19533751183658746, - "grad_norm": 1.9172736188204982, - "learning_rate": 3.7200996149718105e-06, - "loss": 0.9819, - "step": 2166 - }, - { - "epoch": 0.19542769536005772, - "grad_norm": 1.254294742284225, - "learning_rate": 3.7198014714387985e-06, - "loss": 0.9908, - "step": 2167 - }, - { - "epoch": 0.19551787888352798, - "grad_norm": 1.632103944203073, - "learning_rate": 3.7195031811628422e-06, - "loss": 0.9342, - "step": 2168 - }, - { - "epoch": 0.19560806240699824, - "grad_norm": 1.89198330093176, - "learning_rate": 3.719204744169393e-06, - "loss": 1.0337, - "step": 2169 - }, - { - "epoch": 0.1956982459304685, - "grad_norm": 1.6196924242566353, - "learning_rate": 3.718906160483916e-06, - "loss": 0.9752, - "step": 2170 - }, - { - "epoch": 0.19578842945393876, - "grad_norm": 2.918543584088077, - "learning_rate": 3.7186074301318868e-06, - "loss": 0.9767, - "step": 2171 - }, - { - "epoch": 0.19587861297740902, - "grad_norm": 1.339982273069955, - "learning_rate": 3.7183085531387957e-06, - "loss": 1.0762, - "step": 2172 - }, - { - "epoch": 0.19596879650087928, - "grad_norm": 2.8145282637382754, - "learning_rate": 3.7180095295301443e-06, - "loss": 0.9873, - "step": 2173 - }, - { - "epoch": 0.19605898002434954, - "grad_norm": 1.3528035810691632, - "learning_rate": 3.7177103593314465e-06, - "loss": 1.0644, - "step": 2174 - }, - { - "epoch": 0.1961491635478198, - "grad_norm": 1.4648981923476736, - "learning_rate": 3.7174110425682297e-06, - "loss": 1.0333, - "step": 2175 - }, - { - "epoch": 0.19623934707129007, - "grad_norm": 1.6027320871097412, - "learning_rate": 3.7171115792660333e-06, - "loss": 1.1285, - "step": 2176 - }, - { - "epoch": 0.19632953059476033, - "grad_norm": 4.429167755691428, - "learning_rate": 3.7168119694504083e-06, - "loss": 1.0658, - "step": 2177 - }, - { - "epoch": 0.1964197141182306, - "grad_norm": 1.5887632705290315, - "learning_rate": 3.71651221314692e-06, - "loss": 1.0016, - "step": 2178 - }, - { - "epoch": 0.19650989764170085, - "grad_norm": 1.5268583575234504, - "learning_rate": 3.716212310381145e-06, - "loss": 1.0082, - "step": 2179 - }, - { - "epoch": 0.19660008116517114, - "grad_norm": 1.5364910661044768, - "learning_rate": 3.7159122611786725e-06, - "loss": 1.0457, - "step": 2180 - }, - { - "epoch": 0.1966902646886414, - "grad_norm": 3.141654078477756, - "learning_rate": 3.7156120655651045e-06, - "loss": 0.9878, - "step": 2181 - }, - { - "epoch": 0.19678044821211166, - "grad_norm": 1.279363173302951, - "learning_rate": 3.7153117235660553e-06, - "loss": 0.9885, - "step": 2182 - }, - { - "epoch": 0.19687063173558192, - "grad_norm": 0.7153226155257023, - "learning_rate": 3.7150112352071514e-06, - "loss": 0.8639, - "step": 2183 - }, - { - "epoch": 0.19696081525905218, - "grad_norm": 1.8722039673981523, - "learning_rate": 3.7147106005140326e-06, - "loss": 0.9731, - "step": 2184 - }, - { - "epoch": 0.19705099878252244, - "grad_norm": 0.6499596161401627, - "learning_rate": 3.714409819512351e-06, - "loss": 0.8484, - "step": 2185 - }, - { - "epoch": 0.1971411823059927, - "grad_norm": 1.4227475852912301, - "learning_rate": 3.7141088922277695e-06, - "loss": 1.0293, - "step": 2186 - }, - { - "epoch": 0.19723136582946296, - "grad_norm": 1.7920291740903707, - "learning_rate": 3.7138078186859664e-06, - "loss": 1.0458, - "step": 2187 - }, - { - "epoch": 0.19732154935293322, - "grad_norm": 1.3986638914164988, - "learning_rate": 3.7135065989126303e-06, - "loss": 1.0096, - "step": 2188 - }, - { - "epoch": 0.19741173287640348, - "grad_norm": 0.7528007584951109, - "learning_rate": 3.713205232933463e-06, - "loss": 0.9374, - "step": 2189 - }, - { - "epoch": 0.19750191639987374, - "grad_norm": 1.601892502443902, - "learning_rate": 3.7129037207741792e-06, - "loss": 1.009, - "step": 2190 - }, - { - "epoch": 0.197592099923344, - "grad_norm": 2.24960493222126, - "learning_rate": 3.7126020624605046e-06, - "loss": 1.0156, - "step": 2191 - }, - { - "epoch": 0.19768228344681427, - "grad_norm": 1.3146535372036543, - "learning_rate": 3.7123002580181785e-06, - "loss": 0.9565, - "step": 2192 - }, - { - "epoch": 0.19777246697028453, - "grad_norm": 1.433543357691049, - "learning_rate": 3.7119983074729532e-06, - "loss": 1.0733, - "step": 2193 - }, - { - "epoch": 0.1978626504937548, - "grad_norm": 1.6848249779198237, - "learning_rate": 3.7116962108505926e-06, - "loss": 1.0539, - "step": 2194 - }, - { - "epoch": 0.19795283401722505, - "grad_norm": 1.4296323838579845, - "learning_rate": 3.711393968176873e-06, - "loss": 1.1081, - "step": 2195 - }, - { - "epoch": 0.1980430175406953, - "grad_norm": 1.334982502783363, - "learning_rate": 3.711091579477584e-06, - "loss": 1.0185, - "step": 2196 - }, - { - "epoch": 0.19813320106416557, - "grad_norm": 1.8772954876649552, - "learning_rate": 3.7107890447785255e-06, - "loss": 1.0278, - "step": 2197 - }, - { - "epoch": 0.19822338458763583, - "grad_norm": 2.9582615337703086, - "learning_rate": 3.710486364105513e-06, - "loss": 1.0517, - "step": 2198 - }, - { - "epoch": 0.1983135681111061, - "grad_norm": 1.8442932233840124, - "learning_rate": 3.7101835374843728e-06, - "loss": 1.0031, - "step": 2199 - }, - { - "epoch": 0.19840375163457635, - "grad_norm": 1.5050934304420245, - "learning_rate": 3.7098805649409427e-06, - "loss": 1.0313, - "step": 2200 - }, - { - "epoch": 0.1984939351580466, - "grad_norm": 1.5607966484542206, - "learning_rate": 3.7095774465010748e-06, - "loss": 1.0791, - "step": 2201 - }, - { - "epoch": 0.19858411868151687, - "grad_norm": 4.01002795164493, - "learning_rate": 3.7092741821906328e-06, - "loss": 0.9926, - "step": 2202 - }, - { - "epoch": 0.19867430220498714, - "grad_norm": 2.446631712529569, - "learning_rate": 3.708970772035493e-06, - "loss": 0.9901, - "step": 2203 - }, - { - "epoch": 0.19876448572845742, - "grad_norm": 1.410172354059383, - "learning_rate": 3.7086672160615427e-06, - "loss": 0.969, - "step": 2204 - }, - { - "epoch": 0.19885466925192768, - "grad_norm": 1.5810303335688005, - "learning_rate": 3.7083635142946852e-06, - "loss": 0.9873, - "step": 2205 - }, - { - "epoch": 0.19894485277539795, - "grad_norm": 1.3273150947805177, - "learning_rate": 3.7080596667608327e-06, - "loss": 1.0364, - "step": 2206 - }, - { - "epoch": 0.1990350362988682, - "grad_norm": 1.3933560845389947, - "learning_rate": 3.707755673485911e-06, - "loss": 1.0481, - "step": 2207 - }, - { - "epoch": 0.19912521982233847, - "grad_norm": 1.6766086483138605, - "learning_rate": 3.7074515344958584e-06, - "loss": 0.9181, - "step": 2208 - }, - { - "epoch": 0.19921540334580873, - "grad_norm": 0.6905980100579292, - "learning_rate": 3.707147249816627e-06, - "loss": 0.8712, - "step": 2209 - }, - { - "epoch": 0.199305586869279, - "grad_norm": 1.4700769152598054, - "learning_rate": 3.706842819474178e-06, - "loss": 1.0309, - "step": 2210 - }, - { - "epoch": 0.19939577039274925, - "grad_norm": 2.06601981171436, - "learning_rate": 3.706538243494489e-06, - "loss": 0.9427, - "step": 2211 - }, - { - "epoch": 0.1994859539162195, - "grad_norm": 1.5512256514046974, - "learning_rate": 3.706233521903547e-06, - "loss": 0.9822, - "step": 2212 - }, - { - "epoch": 0.19957613743968977, - "grad_norm": 2.093043079886086, - "learning_rate": 3.705928654727353e-06, - "loss": 1.0852, - "step": 2213 - }, - { - "epoch": 0.19966632096316003, - "grad_norm": 1.2681189560311397, - "learning_rate": 3.7056236419919195e-06, - "loss": 0.9736, - "step": 2214 - }, - { - "epoch": 0.1997565044866303, - "grad_norm": 1.40625105963773, - "learning_rate": 3.705318483723273e-06, - "loss": 1.0575, - "step": 2215 - }, - { - "epoch": 0.19984668801010055, - "grad_norm": 3.479159296145979, - "learning_rate": 3.7050131799474493e-06, - "loss": 0.9824, - "step": 2216 - }, - { - "epoch": 0.19993687153357081, - "grad_norm": 1.4756623607463815, - "learning_rate": 3.7047077306905e-06, - "loss": 1.0032, - "step": 2217 - }, - { - "epoch": 0.20002705505704108, - "grad_norm": 1.8284446241272267, - "learning_rate": 3.704402135978488e-06, - "loss": 1.043, - "step": 2218 - }, - { - "epoch": 0.20011723858051134, - "grad_norm": 2.2505671528101225, - "learning_rate": 3.7040963958374877e-06, - "loss": 1.0297, - "step": 2219 - }, - { - "epoch": 0.2002074221039816, - "grad_norm": 1.2303561204498301, - "learning_rate": 3.7037905102935864e-06, - "loss": 1.033, - "step": 2220 - }, - { - "epoch": 0.20029760562745186, - "grad_norm": 2.078951104965766, - "learning_rate": 3.7034844793728837e-06, - "loss": 1.0091, - "step": 2221 - }, - { - "epoch": 0.20038778915092212, - "grad_norm": 3.3585943690119953, - "learning_rate": 3.7031783031014933e-06, - "loss": 0.8986, - "step": 2222 - }, - { - "epoch": 0.20047797267439238, - "grad_norm": 1.4768845141439753, - "learning_rate": 3.702871981505538e-06, - "loss": 0.9811, - "step": 2223 - }, - { - "epoch": 0.20056815619786264, - "grad_norm": 1.7292531999131273, - "learning_rate": 3.7025655146111563e-06, - "loss": 0.9464, - "step": 2224 - }, - { - "epoch": 0.2006583397213329, - "grad_norm": 0.6756582203266394, - "learning_rate": 3.702258902444497e-06, - "loss": 0.9281, - "step": 2225 - }, - { - "epoch": 0.20074852324480316, - "grad_norm": 1.4515735178222364, - "learning_rate": 3.701952145031722e-06, - "loss": 1.0572, - "step": 2226 - }, - { - "epoch": 0.20083870676827342, - "grad_norm": 1.5240699433424003, - "learning_rate": 3.701645242399005e-06, - "loss": 1.0811, - "step": 2227 - }, - { - "epoch": 0.2009288902917437, - "grad_norm": 1.3074400413580027, - "learning_rate": 3.701338194572533e-06, - "loss": 1.0764, - "step": 2228 - }, - { - "epoch": 0.20101907381521397, - "grad_norm": 1.3795070474022224, - "learning_rate": 3.7010310015785056e-06, - "loss": 0.9953, - "step": 2229 - }, - { - "epoch": 0.20110925733868423, - "grad_norm": 1.4465070582091373, - "learning_rate": 3.700723663443134e-06, - "loss": 0.9467, - "step": 2230 - }, - { - "epoch": 0.2011994408621545, - "grad_norm": 0.676523980151977, - "learning_rate": 3.7004161801926416e-06, - "loss": 0.8506, - "step": 2231 - }, - { - "epoch": 0.20128962438562475, - "grad_norm": 1.425724645039676, - "learning_rate": 3.7001085518532643e-06, - "loss": 1.0321, - "step": 2232 - }, - { - "epoch": 0.20137980790909502, - "grad_norm": 2.9145756083111825, - "learning_rate": 3.6998007784512515e-06, - "loss": 1.0042, - "step": 2233 - }, - { - "epoch": 0.20146999143256528, - "grad_norm": 1.4578099356053276, - "learning_rate": 3.6994928600128637e-06, - "loss": 0.8871, - "step": 2234 - }, - { - "epoch": 0.20156017495603554, - "grad_norm": 1.4053158942536397, - "learning_rate": 3.6991847965643742e-06, - "loss": 0.9761, - "step": 2235 - }, - { - "epoch": 0.2016503584795058, - "grad_norm": 1.4854483488252253, - "learning_rate": 3.698876588132068e-06, - "loss": 0.9677, - "step": 2236 - }, - { - "epoch": 0.20174054200297606, - "grad_norm": 1.3773185082869006, - "learning_rate": 3.6985682347422446e-06, - "loss": 0.9636, - "step": 2237 - }, - { - "epoch": 0.20183072552644632, - "grad_norm": 1.8594571263744775, - "learning_rate": 3.698259736421213e-06, - "loss": 1.0143, - "step": 2238 - }, - { - "epoch": 0.20192090904991658, - "grad_norm": 1.41975311577736, - "learning_rate": 3.697951093195297e-06, - "loss": 0.9824, - "step": 2239 - }, - { - "epoch": 0.20201109257338684, - "grad_norm": 1.3937361130642691, - "learning_rate": 3.6976423050908307e-06, - "loss": 0.9755, - "step": 2240 - }, - { - "epoch": 0.2021012760968571, - "grad_norm": 2.2853515541540252, - "learning_rate": 3.697333372134163e-06, - "loss": 0.9966, - "step": 2241 - }, - { - "epoch": 0.20219145962032736, - "grad_norm": 2.151743173320051, - "learning_rate": 3.697024294351653e-06, - "loss": 1.1036, - "step": 2242 - }, - { - "epoch": 0.20228164314379762, - "grad_norm": 1.375391860957025, - "learning_rate": 3.696715071769672e-06, - "loss": 0.9666, - "step": 2243 - }, - { - "epoch": 0.20237182666726788, - "grad_norm": 6.003966292091609, - "learning_rate": 3.696405704414606e-06, - "loss": 1.0102, - "step": 2244 - }, - { - "epoch": 0.20246201019073815, - "grad_norm": 2.035071900243369, - "learning_rate": 3.6960961923128514e-06, - "loss": 1.1065, - "step": 2245 - }, - { - "epoch": 0.2025521937142084, - "grad_norm": 2.0276052787719574, - "learning_rate": 3.6957865354908177e-06, - "loss": 0.9439, - "step": 2246 - }, - { - "epoch": 0.20264237723767867, - "grad_norm": 1.5368960861490804, - "learning_rate": 3.6954767339749262e-06, - "loss": 1.0231, - "step": 2247 - }, - { - "epoch": 0.20273256076114893, - "grad_norm": 1.735447320136932, - "learning_rate": 3.6951667877916113e-06, - "loss": 0.9849, - "step": 2248 - }, - { - "epoch": 0.2028227442846192, - "grad_norm": 1.3836960636781797, - "learning_rate": 3.694856696967319e-06, - "loss": 1.0124, - "step": 2249 - }, - { - "epoch": 0.20291292780808945, - "grad_norm": 1.6541946902011262, - "learning_rate": 3.6945464615285077e-06, - "loss": 0.9888, - "step": 2250 - }, - { - "epoch": 0.2030031113315597, - "grad_norm": 0.6796331493526864, - "learning_rate": 3.694236081501648e-06, - "loss": 0.9356, - "step": 2251 - }, - { - "epoch": 0.20309329485503, - "grad_norm": 1.3523353175314123, - "learning_rate": 3.6939255569132246e-06, - "loss": 0.9699, - "step": 2252 - }, - { - "epoch": 0.20318347837850026, - "grad_norm": 0.796294168956964, - "learning_rate": 3.693614887789733e-06, - "loss": 0.8909, - "step": 2253 - }, - { - "epoch": 0.20327366190197052, - "grad_norm": 0.7581055064891141, - "learning_rate": 3.69330407415768e-06, - "loss": 0.888, - "step": 2254 - }, - { - "epoch": 0.20336384542544078, - "grad_norm": 5.664845379624127, - "learning_rate": 3.6929931160435867e-06, - "loss": 1.0058, - "step": 2255 - }, - { - "epoch": 0.20345402894891104, - "grad_norm": 1.5142586438311918, - "learning_rate": 3.6926820134739858e-06, - "loss": 1.0608, - "step": 2256 - }, - { - "epoch": 0.2035442124723813, - "grad_norm": 2.8365375314882586, - "learning_rate": 3.692370766475422e-06, - "loss": 0.9507, - "step": 2257 - }, - { - "epoch": 0.20363439599585156, - "grad_norm": 1.605649469232963, - "learning_rate": 3.692059375074453e-06, - "loss": 1.0855, - "step": 2258 - }, - { - "epoch": 0.20372457951932182, - "grad_norm": 5.084558912443286, - "learning_rate": 3.6917478392976475e-06, - "loss": 1.0671, - "step": 2259 - }, - { - "epoch": 0.20381476304279209, - "grad_norm": 1.3402600322903642, - "learning_rate": 3.691436159171589e-06, - "loss": 1.0635, - "step": 2260 - }, - { - "epoch": 0.20390494656626235, - "grad_norm": 1.6284242644181148, - "learning_rate": 3.6911243347228703e-06, - "loss": 0.985, - "step": 2261 - }, - { - "epoch": 0.2039951300897326, - "grad_norm": 0.6353403863823921, - "learning_rate": 3.690812365978099e-06, - "loss": 0.7874, - "step": 2262 - }, - { - "epoch": 0.20408531361320287, - "grad_norm": 1.461563792086412, - "learning_rate": 3.690500252963893e-06, - "loss": 0.9663, - "step": 2263 - }, - { - "epoch": 0.20417549713667313, - "grad_norm": 1.319274539493014, - "learning_rate": 3.6901879957068846e-06, - "loss": 0.9924, - "step": 2264 - }, - { - "epoch": 0.2042656806601434, - "grad_norm": 1.4744940908474649, - "learning_rate": 3.689875594233717e-06, - "loss": 1.0911, - "step": 2265 - }, - { - "epoch": 0.20435586418361365, - "grad_norm": 1.4592355208143402, - "learning_rate": 3.689563048571046e-06, - "loss": 0.9815, - "step": 2266 - }, - { - "epoch": 0.2044460477070839, - "grad_norm": 9.944207381034031, - "learning_rate": 3.6892503587455395e-06, - "loss": 1.0039, - "step": 2267 - }, - { - "epoch": 0.20453623123055417, - "grad_norm": 1.5020951897113104, - "learning_rate": 3.6889375247838766e-06, - "loss": 1.0121, - "step": 2268 - }, - { - "epoch": 0.20462641475402443, - "grad_norm": 1.5402306320837653, - "learning_rate": 3.688624546712753e-06, - "loss": 1.0023, - "step": 2269 - }, - { - "epoch": 0.2047165982774947, - "grad_norm": 1.3947274633687006, - "learning_rate": 3.688311424558871e-06, - "loss": 0.9623, - "step": 2270 - }, - { - "epoch": 0.20480678180096495, - "grad_norm": 2.0123910910002576, - "learning_rate": 3.6879981583489496e-06, - "loss": 0.9096, - "step": 2271 - }, - { - "epoch": 0.20489696532443522, - "grad_norm": 4.145054243995963, - "learning_rate": 3.687684748109718e-06, - "loss": 1.0179, - "step": 2272 - }, - { - "epoch": 0.20498714884790548, - "grad_norm": 1.9997600173022105, - "learning_rate": 3.6873711938679174e-06, - "loss": 1.066, - "step": 2273 - }, - { - "epoch": 0.20507733237137574, - "grad_norm": 1.2888953823075842, - "learning_rate": 3.6870574956503027e-06, - "loss": 0.988, - "step": 2274 - }, - { - "epoch": 0.20516751589484603, - "grad_norm": 2.119497580283679, - "learning_rate": 3.68674365348364e-06, - "loss": 1.002, - "step": 2275 - }, - { - "epoch": 0.2052576994183163, - "grad_norm": 1.5861815466813982, - "learning_rate": 3.6864296673947086e-06, - "loss": 1.0063, - "step": 2276 - }, - { - "epoch": 0.20534788294178655, - "grad_norm": 1.5692751670327683, - "learning_rate": 3.686115537410298e-06, - "loss": 0.9931, - "step": 2277 - }, - { - "epoch": 0.2054380664652568, - "grad_norm": 1.7983483949489538, - "learning_rate": 3.685801263557214e-06, - "loss": 0.9513, - "step": 2278 - }, - { - "epoch": 0.20552824998872707, - "grad_norm": 1.5709928490605285, - "learning_rate": 3.68548684586227e-06, - "loss": 1.0866, - "step": 2279 - }, - { - "epoch": 0.20561843351219733, - "grad_norm": 1.741110567204928, - "learning_rate": 3.685172284352295e-06, - "loss": 1.0079, - "step": 2280 - }, - { - "epoch": 0.2057086170356676, - "grad_norm": 1.1370726883165032, - "learning_rate": 3.684857579054128e-06, - "loss": 0.9414, - "step": 2281 - }, - { - "epoch": 0.20579880055913785, - "grad_norm": 1.70938478847121, - "learning_rate": 3.6845427299946233e-06, - "loss": 1.0666, - "step": 2282 - }, - { - "epoch": 0.2058889840826081, - "grad_norm": 1.3495834061019107, - "learning_rate": 3.6842277372006434e-06, - "loss": 1.0994, - "step": 2283 - }, - { - "epoch": 0.20597916760607837, - "grad_norm": 1.497640104410762, - "learning_rate": 3.6839126006990664e-06, - "loss": 0.979, - "step": 2284 - }, - { - "epoch": 0.20606935112954863, - "grad_norm": 1.2066843466403125, - "learning_rate": 3.6835973205167818e-06, - "loss": 1.0549, - "step": 2285 - }, - { - "epoch": 0.2061595346530189, - "grad_norm": 1.7783394173062526, - "learning_rate": 3.6832818966806904e-06, - "loss": 1.0339, - "step": 2286 - }, - { - "epoch": 0.20624971817648916, - "grad_norm": 1.623854673681103, - "learning_rate": 3.682966329217706e-06, - "loss": 1.0788, - "step": 2287 - }, - { - "epoch": 0.20633990169995942, - "grad_norm": 1.5223153701105527, - "learning_rate": 3.6826506181547543e-06, - "loss": 1.041, - "step": 2288 - }, - { - "epoch": 0.20643008522342968, - "grad_norm": 1.9364121982714269, - "learning_rate": 3.682334763518774e-06, - "loss": 1.0232, - "step": 2289 - }, - { - "epoch": 0.20652026874689994, - "grad_norm": 8.270876739413676, - "learning_rate": 3.6820187653367158e-06, - "loss": 0.9729, - "step": 2290 - }, - { - "epoch": 0.2066104522703702, - "grad_norm": 1.4821705561165432, - "learning_rate": 3.6817026236355412e-06, - "loss": 0.9034, - "step": 2291 - }, - { - "epoch": 0.20670063579384046, - "grad_norm": 1.9163032961287485, - "learning_rate": 3.681386338442227e-06, - "loss": 0.9946, - "step": 2292 - }, - { - "epoch": 0.20679081931731072, - "grad_norm": 1.3581368957644582, - "learning_rate": 3.681069909783758e-06, - "loss": 0.9796, - "step": 2293 - }, - { - "epoch": 0.20688100284078098, - "grad_norm": 1.373224629471805, - "learning_rate": 3.680753337687136e-06, - "loss": 1.0117, - "step": 2294 - }, - { - "epoch": 0.20697118636425124, - "grad_norm": 1.935481773844475, - "learning_rate": 3.680436622179371e-06, - "loss": 0.9371, - "step": 2295 - }, - { - "epoch": 0.2070613698877215, - "grad_norm": 1.7138040297301602, - "learning_rate": 3.680119763287488e-06, - "loss": 0.9888, - "step": 2296 - }, - { - "epoch": 0.20715155341119176, - "grad_norm": 1.615325002653267, - "learning_rate": 3.6798027610385227e-06, - "loss": 0.9461, - "step": 2297 - }, - { - "epoch": 0.20724173693466202, - "grad_norm": 1.672308535828397, - "learning_rate": 3.6794856154595235e-06, - "loss": 1.0484, - "step": 2298 - }, - { - "epoch": 0.2073319204581323, - "grad_norm": 1.7400411205256028, - "learning_rate": 3.6791683265775506e-06, - "loss": 1.0139, - "step": 2299 - }, - { - "epoch": 0.20742210398160257, - "grad_norm": 1.595620871096632, - "learning_rate": 3.6788508944196773e-06, - "loss": 0.9633, - "step": 2300 - }, - { - "epoch": 0.20751228750507283, - "grad_norm": 1.252440073707868, - "learning_rate": 3.678533319012989e-06, - "loss": 1.0749, - "step": 2301 - }, - { - "epoch": 0.2076024710285431, - "grad_norm": 1.4441470834527277, - "learning_rate": 3.6782156003845826e-06, - "loss": 0.9597, - "step": 2302 - }, - { - "epoch": 0.20769265455201336, - "grad_norm": 0.6709823666954629, - "learning_rate": 3.6778977385615676e-06, - "loss": 0.8599, - "step": 2303 - }, - { - "epoch": 0.20778283807548362, - "grad_norm": 1.6355315571011406, - "learning_rate": 3.6775797335710656e-06, - "loss": 1.0459, - "step": 2304 - }, - { - "epoch": 0.20787302159895388, - "grad_norm": 1.7818006367120387, - "learning_rate": 3.6772615854402105e-06, - "loss": 0.9564, - "step": 2305 - }, - { - "epoch": 0.20796320512242414, - "grad_norm": 1.3616265253121334, - "learning_rate": 3.6769432941961487e-06, - "loss": 0.8795, - "step": 2306 - }, - { - "epoch": 0.2080533886458944, - "grad_norm": 1.5243184208115812, - "learning_rate": 3.676624859866038e-06, - "loss": 1.0016, - "step": 2307 - }, - { - "epoch": 0.20814357216936466, - "grad_norm": 2.683232045680734, - "learning_rate": 3.67630628247705e-06, - "loss": 0.929, - "step": 2308 - }, - { - "epoch": 0.20823375569283492, - "grad_norm": 1.5708651353094218, - "learning_rate": 3.675987562056367e-06, - "loss": 1.0641, - "step": 2309 - }, - { - "epoch": 0.20832393921630518, - "grad_norm": 1.3166812869761169, - "learning_rate": 3.675668698631184e-06, - "loss": 1.0341, - "step": 2310 - }, - { - "epoch": 0.20841412273977544, - "grad_norm": 1.3569029769930028, - "learning_rate": 3.675349692228708e-06, - "loss": 1.0731, - "step": 2311 - }, - { - "epoch": 0.2085043062632457, - "grad_norm": 1.4615831223509868, - "learning_rate": 3.6750305428761578e-06, - "loss": 0.9721, - "step": 2312 - }, - { - "epoch": 0.20859448978671596, - "grad_norm": 1.837827188854417, - "learning_rate": 3.674711250600766e-06, - "loss": 1.0144, - "step": 2313 - }, - { - "epoch": 0.20868467331018623, - "grad_norm": 0.6474113387821018, - "learning_rate": 3.6743918154297765e-06, - "loss": 0.8285, - "step": 2314 - }, - { - "epoch": 0.20877485683365649, - "grad_norm": 1.4094102317990809, - "learning_rate": 3.6740722373904446e-06, - "loss": 0.9921, - "step": 2315 - }, - { - "epoch": 0.20886504035712675, - "grad_norm": 1.3096755836725669, - "learning_rate": 3.6737525165100383e-06, - "loss": 0.9835, - "step": 2316 - }, - { - "epoch": 0.208955223880597, - "grad_norm": 1.642504242495279, - "learning_rate": 3.6734326528158385e-06, - "loss": 1.0272, - "step": 2317 - }, - { - "epoch": 0.20904540740406727, - "grad_norm": 1.2691523999025756, - "learning_rate": 3.673112646335138e-06, - "loss": 0.9925, - "step": 2318 - }, - { - "epoch": 0.20913559092753753, - "grad_norm": 1.4503902008935465, - "learning_rate": 3.672792497095241e-06, - "loss": 0.9569, - "step": 2319 - }, - { - "epoch": 0.2092257744510078, - "grad_norm": 1.7540459863947966, - "learning_rate": 3.672472205123464e-06, - "loss": 1.007, - "step": 2320 - }, - { - "epoch": 0.20931595797447805, - "grad_norm": 2.40358497374509, - "learning_rate": 3.6721517704471363e-06, - "loss": 0.9306, - "step": 2321 - }, - { - "epoch": 0.2094061414979483, - "grad_norm": 1.8220174115434598, - "learning_rate": 3.6718311930936e-06, - "loss": 1.0322, - "step": 2322 - }, - { - "epoch": 0.2094963250214186, - "grad_norm": 1.562558668942491, - "learning_rate": 3.6715104730902074e-06, - "loss": 0.9649, - "step": 2323 - }, - { - "epoch": 0.20958650854488886, - "grad_norm": 1.7521413236343295, - "learning_rate": 3.671189610464325e-06, - "loss": 1.0426, - "step": 2324 - }, - { - "epoch": 0.20967669206835912, - "grad_norm": 1.4009149014251923, - "learning_rate": 3.6708686052433303e-06, - "loss": 0.9572, - "step": 2325 - }, - { - "epoch": 0.20976687559182938, - "grad_norm": 1.3865517529149995, - "learning_rate": 3.6705474574546127e-06, - "loss": 1.0162, - "step": 2326 - }, - { - "epoch": 0.20985705911529964, - "grad_norm": 10.703891609749313, - "learning_rate": 3.670226167125575e-06, - "loss": 1.0202, - "step": 2327 - }, - { - "epoch": 0.2099472426387699, - "grad_norm": 1.8723856183349514, - "learning_rate": 3.6699047342836313e-06, - "loss": 1.0011, - "step": 2328 - }, - { - "epoch": 0.21003742616224017, - "grad_norm": 1.4954881363643342, - "learning_rate": 3.669583158956208e-06, - "loss": 1.0452, - "step": 2329 - }, - { - "epoch": 0.21012760968571043, - "grad_norm": 0.6490862944221941, - "learning_rate": 3.669261441170743e-06, - "loss": 0.8646, - "step": 2330 - }, - { - "epoch": 0.2102177932091807, - "grad_norm": 1.6094264790718076, - "learning_rate": 3.668939580954688e-06, - "loss": 1.0158, - "step": 2331 - }, - { - "epoch": 0.21030797673265095, - "grad_norm": 1.345311654652187, - "learning_rate": 3.668617578335506e-06, - "loss": 0.9772, - "step": 2332 - }, - { - "epoch": 0.2103981602561212, - "grad_norm": 1.7187967467453256, - "learning_rate": 3.6682954333406707e-06, - "loss": 1.0351, - "step": 2333 - }, - { - "epoch": 0.21048834377959147, - "grad_norm": 1.6053510562424302, - "learning_rate": 3.6679731459976707e-06, - "loss": 1.1322, - "step": 2334 - }, - { - "epoch": 0.21057852730306173, - "grad_norm": 1.4643017389171622, - "learning_rate": 3.6676507163340046e-06, - "loss": 1.0068, - "step": 2335 - }, - { - "epoch": 0.210668710826532, - "grad_norm": 1.45930291596161, - "learning_rate": 3.6673281443771842e-06, - "loss": 1.0364, - "step": 2336 - }, - { - "epoch": 0.21075889435000225, - "grad_norm": 1.3694807166238783, - "learning_rate": 3.667005430154733e-06, - "loss": 0.8935, - "step": 2337 - }, - { - "epoch": 0.2108490778734725, - "grad_norm": 0.7335537721833469, - "learning_rate": 3.666682573694186e-06, - "loss": 0.9328, - "step": 2338 - }, - { - "epoch": 0.21093926139694277, - "grad_norm": 0.6522229933756715, - "learning_rate": 3.6663595750230924e-06, - "loss": 0.8182, - "step": 2339 - }, - { - "epoch": 0.21102944492041303, - "grad_norm": 3.3553667152887154, - "learning_rate": 3.666036434169012e-06, - "loss": 1.0577, - "step": 2340 - }, - { - "epoch": 0.2111196284438833, - "grad_norm": 1.5119274695171208, - "learning_rate": 3.665713151159516e-06, - "loss": 0.9835, - "step": 2341 - }, - { - "epoch": 0.21120981196735356, - "grad_norm": 1.3642159566827736, - "learning_rate": 3.665389726022189e-06, - "loss": 1.1052, - "step": 2342 - }, - { - "epoch": 0.21129999549082382, - "grad_norm": 1.6976004776129738, - "learning_rate": 3.6650661587846283e-06, - "loss": 0.9798, - "step": 2343 - }, - { - "epoch": 0.21139017901429408, - "grad_norm": 1.7100232612410848, - "learning_rate": 3.6647424494744418e-06, - "loss": 0.9695, - "step": 2344 - }, - { - "epoch": 0.21148036253776434, - "grad_norm": 1.39017569323493, - "learning_rate": 3.6644185981192503e-06, - "loss": 1.0548, - "step": 2345 - }, - { - "epoch": 0.2115705460612346, - "grad_norm": 1.957261184311555, - "learning_rate": 3.6640946047466868e-06, - "loss": 0.9371, - "step": 2346 - }, - { - "epoch": 0.2116607295847049, - "grad_norm": 1.5676038164380945, - "learning_rate": 3.6637704693843953e-06, - "loss": 0.9712, - "step": 2347 - }, - { - "epoch": 0.21175091310817515, - "grad_norm": 1.4655551657886259, - "learning_rate": 3.6634461920600337e-06, - "loss": 0.9421, - "step": 2348 - }, - { - "epoch": 0.2118410966316454, - "grad_norm": 1.7950127280801456, - "learning_rate": 3.66312177280127e-06, - "loss": 1.0304, - "step": 2349 - }, - { - "epoch": 0.21193128015511567, - "grad_norm": 1.284117815391028, - "learning_rate": 3.6627972116357872e-06, - "loss": 1.0571, - "step": 2350 - }, - { - "epoch": 0.21202146367858593, - "grad_norm": 1.6080550919959313, - "learning_rate": 3.662472508591278e-06, - "loss": 1.0696, - "step": 2351 - }, - { - "epoch": 0.2121116472020562, - "grad_norm": 1.7097188014555667, - "learning_rate": 3.662147663695447e-06, - "loss": 0.9972, - "step": 2352 - }, - { - "epoch": 0.21220183072552645, - "grad_norm": 1.3942552822396703, - "learning_rate": 3.6618226769760127e-06, - "loss": 0.9923, - "step": 2353 - }, - { - "epoch": 0.2122920142489967, - "grad_norm": 1.6549781378370985, - "learning_rate": 3.661497548460704e-06, - "loss": 0.881, - "step": 2354 - }, - { - "epoch": 0.21238219777246697, - "grad_norm": 5.174983539647263, - "learning_rate": 3.6611722781772635e-06, - "loss": 1.0189, - "step": 2355 - }, - { - "epoch": 0.21247238129593723, - "grad_norm": 0.7788084024171983, - "learning_rate": 3.6608468661534444e-06, - "loss": 0.9251, - "step": 2356 - }, - { - "epoch": 0.2125625648194075, - "grad_norm": 2.545131718612593, - "learning_rate": 3.660521312417013e-06, - "loss": 0.9271, - "step": 2357 - }, - { - "epoch": 0.21265274834287776, - "grad_norm": 1.6330577173399146, - "learning_rate": 3.660195616995747e-06, - "loss": 0.9689, - "step": 2358 - }, - { - "epoch": 0.21274293186634802, - "grad_norm": 1.3853055388355107, - "learning_rate": 3.6598697799174367e-06, - "loss": 1.0284, - "step": 2359 - }, - { - "epoch": 0.21283311538981828, - "grad_norm": 1.7630506669811425, - "learning_rate": 3.6595438012098844e-06, - "loss": 1.0708, - "step": 2360 - }, - { - "epoch": 0.21292329891328854, - "grad_norm": 1.3737124136450565, - "learning_rate": 3.6592176809009045e-06, - "loss": 0.9782, - "step": 2361 - }, - { - "epoch": 0.2130134824367588, - "grad_norm": 1.4567748143243204, - "learning_rate": 3.6588914190183227e-06, - "loss": 1.038, - "step": 2362 - }, - { - "epoch": 0.21310366596022906, - "grad_norm": 1.6724678485785036, - "learning_rate": 3.658565015589978e-06, - "loss": 0.8778, - "step": 2363 - }, - { - "epoch": 0.21319384948369932, - "grad_norm": 2.1739046780798468, - "learning_rate": 3.6582384706437217e-06, - "loss": 1.0216, - "step": 2364 - }, - { - "epoch": 0.21328403300716958, - "grad_norm": 1.5982470177954244, - "learning_rate": 3.6579117842074156e-06, - "loss": 1.0234, - "step": 2365 - }, - { - "epoch": 0.21337421653063984, - "grad_norm": 1.3925562959862143, - "learning_rate": 3.657584956308934e-06, - "loss": 0.9448, - "step": 2366 - }, - { - "epoch": 0.2134644000541101, - "grad_norm": 1.529657742733977, - "learning_rate": 3.6572579869761648e-06, - "loss": 1.0653, - "step": 2367 - }, - { - "epoch": 0.21355458357758036, - "grad_norm": 1.7476485667019306, - "learning_rate": 3.6569308762370056e-06, - "loss": 1.0573, - "step": 2368 - }, - { - "epoch": 0.21364476710105063, - "grad_norm": 8.5029906453289, - "learning_rate": 3.6566036241193676e-06, - "loss": 1.0417, - "step": 2369 - }, - { - "epoch": 0.2137349506245209, - "grad_norm": 0.6844296385670907, - "learning_rate": 3.656276230651174e-06, - "loss": 0.8952, - "step": 2370 - }, - { - "epoch": 0.21382513414799117, - "grad_norm": 0.711570206685439, - "learning_rate": 3.65594869586036e-06, - "loss": 0.8894, - "step": 2371 - }, - { - "epoch": 0.21391531767146144, - "grad_norm": 1.5455427501619374, - "learning_rate": 3.6556210197748724e-06, - "loss": 0.9291, - "step": 2372 - }, - { - "epoch": 0.2140055011949317, - "grad_norm": 1.484902860245078, - "learning_rate": 3.655293202422671e-06, - "loss": 0.9647, - "step": 2373 - }, - { - "epoch": 0.21409568471840196, - "grad_norm": 1.308463793322526, - "learning_rate": 3.654965243831725e-06, - "loss": 1.0185, - "step": 2374 - }, - { - "epoch": 0.21418586824187222, - "grad_norm": 1.3690627833725062, - "learning_rate": 3.65463714403002e-06, - "loss": 1.0396, - "step": 2375 - }, - { - "epoch": 0.21427605176534248, - "grad_norm": 1.4563431738651014, - "learning_rate": 3.65430890304555e-06, - "loss": 0.9582, - "step": 2376 - }, - { - "epoch": 0.21436623528881274, - "grad_norm": 1.2494202699994548, - "learning_rate": 3.653980520906323e-06, - "loss": 1.0356, - "step": 2377 - }, - { - "epoch": 0.214456418812283, - "grad_norm": 1.2957621936139718, - "learning_rate": 3.653651997640358e-06, - "loss": 0.9859, - "step": 2378 - }, - { - "epoch": 0.21454660233575326, - "grad_norm": 1.4487721868203431, - "learning_rate": 3.653323333275686e-06, - "loss": 1.0025, - "step": 2379 - }, - { - "epoch": 0.21463678585922352, - "grad_norm": 1.384097219114815, - "learning_rate": 3.652994527840351e-06, - "loss": 0.9861, - "step": 2380 - }, - { - "epoch": 0.21472696938269378, - "grad_norm": 1.474944262340482, - "learning_rate": 3.6526655813624087e-06, - "loss": 0.964, - "step": 2381 - }, - { - "epoch": 0.21481715290616404, - "grad_norm": 0.6000012785182682, - "learning_rate": 3.652336493869925e-06, - "loss": 0.8671, - "step": 2382 - }, - { - "epoch": 0.2149073364296343, - "grad_norm": 1.4997672059295775, - "learning_rate": 3.6520072653909823e-06, - "loss": 1.015, - "step": 2383 - }, - { - "epoch": 0.21499751995310457, - "grad_norm": 1.697096558327411, - "learning_rate": 3.6516778959536702e-06, - "loss": 1.0039, - "step": 2384 - }, - { - "epoch": 0.21508770347657483, - "grad_norm": 1.7379711774668314, - "learning_rate": 3.6513483855860923e-06, - "loss": 1.0556, - "step": 2385 - }, - { - "epoch": 0.2151778870000451, - "grad_norm": 1.392718207762509, - "learning_rate": 3.6510187343163654e-06, - "loss": 0.9425, - "step": 2386 - }, - { - "epoch": 0.21526807052351535, - "grad_norm": 1.603515178945203, - "learning_rate": 3.650688942172616e-06, - "loss": 1.0389, - "step": 2387 - }, - { - "epoch": 0.2153582540469856, - "grad_norm": 1.6213164027254465, - "learning_rate": 3.650359009182984e-06, - "loss": 0.9419, - "step": 2388 - }, - { - "epoch": 0.21544843757045587, - "grad_norm": 1.700530289238601, - "learning_rate": 3.650028935375622e-06, - "loss": 1.0949, - "step": 2389 - }, - { - "epoch": 0.21553862109392613, - "grad_norm": 1.2694690395665063, - "learning_rate": 3.6496987207786926e-06, - "loss": 0.9769, - "step": 2390 - }, - { - "epoch": 0.2156288046173964, - "grad_norm": 1.579720474336702, - "learning_rate": 3.6493683654203724e-06, - "loss": 0.9524, - "step": 2391 - }, - { - "epoch": 0.21571898814086665, - "grad_norm": 1.5654248899694498, - "learning_rate": 3.6490378693288484e-06, - "loss": 0.9827, - "step": 2392 - }, - { - "epoch": 0.2158091716643369, - "grad_norm": 1.5307762814548527, - "learning_rate": 3.648707232532321e-06, - "loss": 1.0461, - "step": 2393 - }, - { - "epoch": 0.2158993551878072, - "grad_norm": 1.5321775468677683, - "learning_rate": 3.6483764550590017e-06, - "loss": 1.007, - "step": 2394 - }, - { - "epoch": 0.21598953871127746, - "grad_norm": 2.948827452311488, - "learning_rate": 3.6480455369371133e-06, - "loss": 1.0214, - "step": 2395 - }, - { - "epoch": 0.21607972223474772, - "grad_norm": 1.424580609360923, - "learning_rate": 3.647714478194893e-06, - "loss": 1.0165, - "step": 2396 - }, - { - "epoch": 0.21616990575821798, - "grad_norm": 2.075481942521228, - "learning_rate": 3.647383278860588e-06, - "loss": 0.9492, - "step": 2397 - }, - { - "epoch": 0.21626008928168824, - "grad_norm": 2.0782963961714787, - "learning_rate": 3.6470519389624587e-06, - "loss": 0.9992, - "step": 2398 - }, - { - "epoch": 0.2163502728051585, - "grad_norm": 1.3741212984873434, - "learning_rate": 3.646720458528776e-06, - "loss": 1.0632, - "step": 2399 - }, - { - "epoch": 0.21644045632862877, - "grad_norm": 2.0934448019749685, - "learning_rate": 3.6463888375878235e-06, - "loss": 0.9636, - "step": 2400 - }, - { - "epoch": 0.21653063985209903, - "grad_norm": 1.8061011916376155, - "learning_rate": 3.646057076167897e-06, - "loss": 1.0103, - "step": 2401 - }, - { - "epoch": 0.2166208233755693, - "grad_norm": 1.4276522131369407, - "learning_rate": 3.645725174297305e-06, - "loss": 1.0056, - "step": 2402 - }, - { - "epoch": 0.21671100689903955, - "grad_norm": 1.6082719893278294, - "learning_rate": 3.645393132004367e-06, - "loss": 0.9834, - "step": 2403 - }, - { - "epoch": 0.2168011904225098, - "grad_norm": 1.2910796595403196, - "learning_rate": 3.6450609493174135e-06, - "loss": 0.9581, - "step": 2404 - }, - { - "epoch": 0.21689137394598007, - "grad_norm": 1.3684494298191479, - "learning_rate": 3.6447286262647896e-06, - "loss": 0.9672, - "step": 2405 - }, - { - "epoch": 0.21698155746945033, - "grad_norm": 1.5794675045601367, - "learning_rate": 3.64439616287485e-06, - "loss": 0.9729, - "step": 2406 - }, - { - "epoch": 0.2170717409929206, - "grad_norm": 1.2552719047350656, - "learning_rate": 3.644063559175963e-06, - "loss": 0.9713, - "step": 2407 - }, - { - "epoch": 0.21716192451639085, - "grad_norm": 1.6512796930362188, - "learning_rate": 3.6437308151965074e-06, - "loss": 0.9873, - "step": 2408 - }, - { - "epoch": 0.2172521080398611, - "grad_norm": 1.3759795514433693, - "learning_rate": 3.643397930964876e-06, - "loss": 1.0169, - "step": 2409 - }, - { - "epoch": 0.21734229156333137, - "grad_norm": 1.7434359469341028, - "learning_rate": 3.6430649065094707e-06, - "loss": 1.0629, - "step": 2410 - }, - { - "epoch": 0.21743247508680164, - "grad_norm": 1.8570954911511104, - "learning_rate": 3.6427317418587086e-06, - "loss": 1.0623, - "step": 2411 - }, - { - "epoch": 0.2175226586102719, - "grad_norm": 1.731284403545201, - "learning_rate": 3.6423984370410157e-06, - "loss": 0.9494, - "step": 2412 - }, - { - "epoch": 0.21761284213374216, - "grad_norm": 1.4718874434736826, - "learning_rate": 3.6420649920848324e-06, - "loss": 0.9693, - "step": 2413 - }, - { - "epoch": 0.21770302565721242, - "grad_norm": 5.323152520714493, - "learning_rate": 3.6417314070186096e-06, - "loss": 1.0091, - "step": 2414 - }, - { - "epoch": 0.21779320918068268, - "grad_norm": 1.3497731707004827, - "learning_rate": 3.641397681870811e-06, - "loss": 1.0074, - "step": 2415 - }, - { - "epoch": 0.21788339270415294, - "grad_norm": 1.2837368161602607, - "learning_rate": 3.641063816669911e-06, - "loss": 1.0251, - "step": 2416 - }, - { - "epoch": 0.2179735762276232, - "grad_norm": 1.4632312902586913, - "learning_rate": 3.640729811444398e-06, - "loss": 1.0705, - "step": 2417 - }, - { - "epoch": 0.2180637597510935, - "grad_norm": 1.3228421327915794, - "learning_rate": 3.6403956662227706e-06, - "loss": 1.0013, - "step": 2418 - }, - { - "epoch": 0.21815394327456375, - "grad_norm": 1.7302707533014088, - "learning_rate": 3.6400613810335396e-06, - "loss": 1.0661, - "step": 2419 - }, - { - "epoch": 0.218244126798034, - "grad_norm": 1.6175163045353274, - "learning_rate": 3.639726955905228e-06, - "loss": 0.9191, - "step": 2420 - }, - { - "epoch": 0.21833431032150427, - "grad_norm": 1.3477797299635013, - "learning_rate": 3.639392390866372e-06, - "loss": 1.0372, - "step": 2421 - }, - { - "epoch": 0.21842449384497453, - "grad_norm": 1.457313162604437, - "learning_rate": 3.639057685945517e-06, - "loss": 0.9811, - "step": 2422 - }, - { - "epoch": 0.2185146773684448, - "grad_norm": 1.389794819803063, - "learning_rate": 3.638722841171223e-06, - "loss": 0.8967, - "step": 2423 - }, - { - "epoch": 0.21860486089191505, - "grad_norm": 3.84813334803801, - "learning_rate": 3.638387856572061e-06, - "loss": 1.0462, - "step": 2424 - }, - { - "epoch": 0.21869504441538531, - "grad_norm": 0.6654486606178273, - "learning_rate": 3.638052732176612e-06, - "loss": 0.8736, - "step": 2425 - }, - { - "epoch": 0.21878522793885558, - "grad_norm": 1.5196406857031728, - "learning_rate": 3.637717468013472e-06, - "loss": 1.0077, - "step": 2426 - }, - { - "epoch": 0.21887541146232584, - "grad_norm": 0.6061708870619255, - "learning_rate": 3.6373820641112475e-06, - "loss": 0.8246, - "step": 2427 - }, - { - "epoch": 0.2189655949857961, - "grad_norm": 1.5549664271125025, - "learning_rate": 3.6370465204985567e-06, - "loss": 1.0203, - "step": 2428 - }, - { - "epoch": 0.21905577850926636, - "grad_norm": 0.6437357067632431, - "learning_rate": 3.6367108372040304e-06, - "loss": 0.8949, - "step": 2429 - }, - { - "epoch": 0.21914596203273662, - "grad_norm": 1.5569437801523387, - "learning_rate": 3.6363750142563107e-06, - "loss": 1.0103, - "step": 2430 - }, - { - "epoch": 0.21923614555620688, - "grad_norm": 0.7147085760334072, - "learning_rate": 3.636039051684052e-06, - "loss": 0.9218, - "step": 2431 - }, - { - "epoch": 0.21932632907967714, - "grad_norm": 1.6216044529229383, - "learning_rate": 3.6357029495159203e-06, - "loss": 0.9759, - "step": 2432 - }, - { - "epoch": 0.2194165126031474, - "grad_norm": 1.3966662735282205, - "learning_rate": 3.6353667077805934e-06, - "loss": 0.9805, - "step": 2433 - }, - { - "epoch": 0.21950669612661766, - "grad_norm": 2.0170016060531037, - "learning_rate": 3.6350303265067625e-06, - "loss": 1.047, - "step": 2434 - }, - { - "epoch": 0.21959687965008792, - "grad_norm": 1.3911867185940325, - "learning_rate": 3.6346938057231285e-06, - "loss": 1.0381, - "step": 2435 - }, - { - "epoch": 0.21968706317355818, - "grad_norm": 1.314417709621689, - "learning_rate": 3.6343571454584047e-06, - "loss": 0.9465, - "step": 2436 - }, - { - "epoch": 0.21977724669702844, - "grad_norm": 1.5267932416814787, - "learning_rate": 3.6340203457413176e-06, - "loss": 1.0452, - "step": 2437 - }, - { - "epoch": 0.2198674302204987, - "grad_norm": 1.8199262741373037, - "learning_rate": 3.633683406600605e-06, - "loss": 1.0271, - "step": 2438 - }, - { - "epoch": 0.21995761374396897, - "grad_norm": 1.306919085335086, - "learning_rate": 3.6333463280650165e-06, - "loss": 1.069, - "step": 2439 - }, - { - "epoch": 0.22004779726743923, - "grad_norm": 0.7403550263569718, - "learning_rate": 3.6330091101633126e-06, - "loss": 0.8427, - "step": 2440 - }, - { - "epoch": 0.2201379807909095, - "grad_norm": 2.0410511360547128, - "learning_rate": 3.632671752924267e-06, - "loss": 1.0405, - "step": 2441 - }, - { - "epoch": 0.22022816431437978, - "grad_norm": 1.5840223051485127, - "learning_rate": 3.632334256376665e-06, - "loss": 1.0182, - "step": 2442 - }, - { - "epoch": 0.22031834783785004, - "grad_norm": 1.4635078543923719, - "learning_rate": 3.6319966205493044e-06, - "loss": 1.0637, - "step": 2443 - }, - { - "epoch": 0.2204085313613203, - "grad_norm": 0.7003681602641056, - "learning_rate": 3.6316588454709922e-06, - "loss": 0.9147, - "step": 2444 - }, - { - "epoch": 0.22049871488479056, - "grad_norm": 1.319789670096164, - "learning_rate": 3.6313209311705514e-06, - "loss": 0.9535, - "step": 2445 - }, - { - "epoch": 0.22058889840826082, - "grad_norm": 1.560179245750189, - "learning_rate": 3.6309828776768133e-06, - "loss": 1.0407, - "step": 2446 - }, - { - "epoch": 0.22067908193173108, - "grad_norm": 1.9506221756797029, - "learning_rate": 3.630644685018623e-06, - "loss": 0.9669, - "step": 2447 - }, - { - "epoch": 0.22076926545520134, - "grad_norm": 2.4613687773136657, - "learning_rate": 3.6303063532248367e-06, - "loss": 1.0917, - "step": 2448 - }, - { - "epoch": 0.2208594489786716, - "grad_norm": 3.7378151342929486, - "learning_rate": 3.6299678823243236e-06, - "loss": 1.0834, - "step": 2449 - }, - { - "epoch": 0.22094963250214186, - "grad_norm": 1.8469731880848386, - "learning_rate": 3.629629272345963e-06, - "loss": 0.9092, - "step": 2450 - }, - { - "epoch": 0.22103981602561212, - "grad_norm": 1.818292503889214, - "learning_rate": 3.6292905233186468e-06, - "loss": 1.1236, - "step": 2451 - }, - { - "epoch": 0.22112999954908238, - "grad_norm": 1.5366870343533463, - "learning_rate": 3.6289516352712796e-06, - "loss": 0.9709, - "step": 2452 - }, - { - "epoch": 0.22122018307255265, - "grad_norm": 0.7654662843576134, - "learning_rate": 3.6286126082327764e-06, - "loss": 0.8886, - "step": 2453 - }, - { - "epoch": 0.2213103665960229, - "grad_norm": 1.4446513545362183, - "learning_rate": 3.628273442232066e-06, - "loss": 1.0414, - "step": 2454 - }, - { - "epoch": 0.22140055011949317, - "grad_norm": 1.4770040507845386, - "learning_rate": 3.627934137298087e-06, - "loss": 0.9993, - "step": 2455 - }, - { - "epoch": 0.22149073364296343, - "grad_norm": 1.7386028549492114, - "learning_rate": 3.627594693459792e-06, - "loss": 0.9772, - "step": 2456 - }, - { - "epoch": 0.2215809171664337, - "grad_norm": 1.46089490532617, - "learning_rate": 3.6272551107461424e-06, - "loss": 0.9695, - "step": 2457 - }, - { - "epoch": 0.22167110068990395, - "grad_norm": 1.4960713596511146, - "learning_rate": 3.6269153891861137e-06, - "loss": 0.9966, - "step": 2458 - }, - { - "epoch": 0.2217612842133742, - "grad_norm": 1.273843758962943, - "learning_rate": 3.6265755288086944e-06, - "loss": 0.9829, - "step": 2459 - }, - { - "epoch": 0.22185146773684447, - "grad_norm": 1.8490045317846522, - "learning_rate": 3.626235529642881e-06, - "loss": 0.9883, - "step": 2460 - }, - { - "epoch": 0.22194165126031473, - "grad_norm": 1.5453536133267907, - "learning_rate": 3.625895391717686e-06, - "loss": 0.9323, - "step": 2461 - }, - { - "epoch": 0.222031834783785, - "grad_norm": 1.8046989275417755, - "learning_rate": 3.625555115062131e-06, - "loss": 0.9792, - "step": 2462 - }, - { - "epoch": 0.22212201830725525, - "grad_norm": 1.8096682032598939, - "learning_rate": 3.6252146997052507e-06, - "loss": 1.0752, - "step": 2463 - }, - { - "epoch": 0.22221220183072551, - "grad_norm": 0.6256003357620509, - "learning_rate": 3.6248741456760898e-06, - "loss": 0.8235, - "step": 2464 - }, - { - "epoch": 0.22230238535419578, - "grad_norm": 1.6152202047443727, - "learning_rate": 3.624533453003708e-06, - "loss": 0.989, - "step": 2465 - }, - { - "epoch": 0.22239256887766606, - "grad_norm": 1.5028051531717803, - "learning_rate": 3.6241926217171745e-06, - "loss": 1.0179, - "step": 2466 - }, - { - "epoch": 0.22248275240113632, - "grad_norm": 1.562183348041045, - "learning_rate": 3.6238516518455703e-06, - "loss": 0.9187, - "step": 2467 - }, - { - "epoch": 0.22257293592460659, - "grad_norm": 1.8098500051573922, - "learning_rate": 3.62351054341799e-06, - "loss": 1.12, - "step": 2468 - }, - { - "epoch": 0.22266311944807685, - "grad_norm": 6.357589583521415, - "learning_rate": 3.623169296463538e-06, - "loss": 1.0167, - "step": 2469 - }, - { - "epoch": 0.2227533029715471, - "grad_norm": 1.5216969254665145, - "learning_rate": 3.6228279110113316e-06, - "loss": 1.0095, - "step": 2470 - }, - { - "epoch": 0.22284348649501737, - "grad_norm": 2.3780846891859064, - "learning_rate": 3.6224863870904994e-06, - "loss": 1.0138, - "step": 2471 - }, - { - "epoch": 0.22293367001848763, - "grad_norm": 1.4140669258849288, - "learning_rate": 3.6221447247301827e-06, - "loss": 1.0132, - "step": 2472 - }, - { - "epoch": 0.2230238535419579, - "grad_norm": 1.2481163614477466, - "learning_rate": 3.6218029239595332e-06, - "loss": 0.9576, - "step": 2473 - }, - { - "epoch": 0.22311403706542815, - "grad_norm": 1.8569548427861602, - "learning_rate": 3.621460984807716e-06, - "loss": 1.0335, - "step": 2474 - }, - { - "epoch": 0.2232042205888984, - "grad_norm": 1.8192439401558949, - "learning_rate": 3.621118907303907e-06, - "loss": 0.9807, - "step": 2475 - }, - { - "epoch": 0.22329440411236867, - "grad_norm": 1.355914215851538, - "learning_rate": 3.620776691477294e-06, - "loss": 0.9994, - "step": 2476 - }, - { - "epoch": 0.22338458763583893, - "grad_norm": 0.7541963878767642, - "learning_rate": 3.6204343373570765e-06, - "loss": 0.8205, - "step": 2477 - }, - { - "epoch": 0.2234747711593092, - "grad_norm": 1.381478781761889, - "learning_rate": 3.620091844972467e-06, - "loss": 1.0318, - "step": 2478 - }, - { - "epoch": 0.22356495468277945, - "grad_norm": 6.6780825465865155, - "learning_rate": 3.619749214352688e-06, - "loss": 1.0306, - "step": 2479 - }, - { - "epoch": 0.22365513820624972, - "grad_norm": 1.6935333197812645, - "learning_rate": 3.6194064455269744e-06, - "loss": 1.1082, - "step": 2480 - }, - { - "epoch": 0.22374532172971998, - "grad_norm": 1.5972570093024696, - "learning_rate": 3.6190635385245737e-06, - "loss": 0.9265, - "step": 2481 - }, - { - "epoch": 0.22383550525319024, - "grad_norm": 2.0595939276740016, - "learning_rate": 3.618720493374745e-06, - "loss": 0.9392, - "step": 2482 - }, - { - "epoch": 0.2239256887766605, - "grad_norm": 2.402233387962989, - "learning_rate": 3.6183773101067575e-06, - "loss": 0.8702, - "step": 2483 - }, - { - "epoch": 0.22401587230013076, - "grad_norm": 1.7598646603913288, - "learning_rate": 3.6180339887498948e-06, - "loss": 0.936, - "step": 2484 - }, - { - "epoch": 0.22410605582360102, - "grad_norm": 1.4199650271232647, - "learning_rate": 3.61769052933345e-06, - "loss": 1.0035, - "step": 2485 - }, - { - "epoch": 0.22419623934707128, - "grad_norm": 2.4847684344770706, - "learning_rate": 3.6173469318867297e-06, - "loss": 1.1646, - "step": 2486 - }, - { - "epoch": 0.22428642287054154, - "grad_norm": 1.573925132914646, - "learning_rate": 3.617003196439051e-06, - "loss": 1.036, - "step": 2487 - }, - { - "epoch": 0.2243766063940118, - "grad_norm": 1.2321527008784932, - "learning_rate": 3.616659323019744e-06, - "loss": 1.0139, - "step": 2488 - }, - { - "epoch": 0.22446678991748206, - "grad_norm": 2.0174508511632028, - "learning_rate": 3.616315311658149e-06, - "loss": 1.0357, - "step": 2489 - }, - { - "epoch": 0.22455697344095235, - "grad_norm": 1.4474380946386893, - "learning_rate": 3.6159711623836195e-06, - "loss": 1.0624, - "step": 2490 - }, - { - "epoch": 0.2246471569644226, - "grad_norm": 0.6552501282049619, - "learning_rate": 3.6156268752255203e-06, - "loss": 0.8396, - "step": 2491 - }, - { - "epoch": 0.22473734048789287, - "grad_norm": 1.315015426008594, - "learning_rate": 3.615282450213227e-06, - "loss": 0.9712, - "step": 2492 - }, - { - "epoch": 0.22482752401136313, - "grad_norm": 1.6652820398684272, - "learning_rate": 3.614937887376128e-06, - "loss": 1.0219, - "step": 2493 - }, - { - "epoch": 0.2249177075348334, - "grad_norm": 2.052629956349029, - "learning_rate": 3.614593186743625e-06, - "loss": 1.0207, - "step": 2494 - }, - { - "epoch": 0.22500789105830366, - "grad_norm": 2.528964953938811, - "learning_rate": 3.614248348345128e-06, - "loss": 0.9547, - "step": 2495 - }, - { - "epoch": 0.22509807458177392, - "grad_norm": 3.3203023035229466, - "learning_rate": 3.6139033722100614e-06, - "loss": 1.0567, - "step": 2496 - }, - { - "epoch": 0.22518825810524418, - "grad_norm": 4.113472987503491, - "learning_rate": 3.6135582583678596e-06, - "loss": 1.0856, - "step": 2497 - }, - { - "epoch": 0.22527844162871444, - "grad_norm": 1.5753711808014659, - "learning_rate": 3.61321300684797e-06, - "loss": 1.0243, - "step": 2498 - }, - { - "epoch": 0.2253686251521847, - "grad_norm": 1.10258268906919, - "learning_rate": 3.6128676176798527e-06, - "loss": 1.0316, - "step": 2499 - }, - { - "epoch": 0.22545880867565496, - "grad_norm": 1.3114447665958349, - "learning_rate": 3.612522090892976e-06, - "loss": 1.0236, - "step": 2500 - }, - { - "epoch": 0.22554899219912522, - "grad_norm": 1.3854056575404654, - "learning_rate": 3.6121764265168232e-06, - "loss": 0.9716, - "step": 2501 - }, - { - "epoch": 0.22563917572259548, - "grad_norm": 1.4225192444476717, - "learning_rate": 3.611830624580888e-06, - "loss": 1.0277, - "step": 2502 - }, - { - "epoch": 0.22572935924606574, - "grad_norm": 1.5882476367761729, - "learning_rate": 3.6114846851146767e-06, - "loss": 1.0142, - "step": 2503 - }, - { - "epoch": 0.225819542769536, - "grad_norm": 1.4358353305536344, - "learning_rate": 3.6111386081477068e-06, - "loss": 0.9115, - "step": 2504 - }, - { - "epoch": 0.22590972629300626, - "grad_norm": 1.5123567245341465, - "learning_rate": 3.6107923937095066e-06, - "loss": 1.0131, - "step": 2505 - }, - { - "epoch": 0.22599990981647652, - "grad_norm": 3.6769812608479464, - "learning_rate": 3.6104460418296173e-06, - "loss": 0.9308, - "step": 2506 - }, - { - "epoch": 0.22609009333994678, - "grad_norm": 1.5060356620781095, - "learning_rate": 3.6100995525375924e-06, - "loss": 1.067, - "step": 2507 - }, - { - "epoch": 0.22618027686341705, - "grad_norm": 1.5104734037419743, - "learning_rate": 3.6097529258629952e-06, - "loss": 0.9572, - "step": 2508 - }, - { - "epoch": 0.2262704603868873, - "grad_norm": 1.6370589965554463, - "learning_rate": 3.6094061618354027e-06, - "loss": 1.0375, - "step": 2509 - }, - { - "epoch": 0.22636064391035757, - "grad_norm": 4.4468864778543455, - "learning_rate": 3.609059260484402e-06, - "loss": 1.0113, - "step": 2510 - }, - { - "epoch": 0.22645082743382783, - "grad_norm": 2.2947933599321786, - "learning_rate": 3.6087122218395935e-06, - "loss": 0.9522, - "step": 2511 - }, - { - "epoch": 0.2265410109572981, - "grad_norm": 1.44915072134911, - "learning_rate": 3.608365045930587e-06, - "loss": 0.9696, - "step": 2512 - }, - { - "epoch": 0.22663119448076835, - "grad_norm": 1.8619641141127432, - "learning_rate": 3.608017732787007e-06, - "loss": 0.9943, - "step": 2513 - }, - { - "epoch": 0.22672137800423864, - "grad_norm": 1.3756652003250336, - "learning_rate": 3.6076702824384875e-06, - "loss": 0.9387, - "step": 2514 - }, - { - "epoch": 0.2268115615277089, - "grad_norm": 1.3833162930982554, - "learning_rate": 3.607322694914675e-06, - "loss": 1.0251, - "step": 2515 - }, - { - "epoch": 0.22690174505117916, - "grad_norm": 1.2441503982519393, - "learning_rate": 3.606974970245227e-06, - "loss": 0.9847, - "step": 2516 - }, - { - "epoch": 0.22699192857464942, - "grad_norm": 1.4322415244833344, - "learning_rate": 3.606627108459814e-06, - "loss": 0.9554, - "step": 2517 - }, - { - "epoch": 0.22708211209811968, - "grad_norm": 4.57190748243598, - "learning_rate": 3.6062791095881174e-06, - "loss": 0.9366, - "step": 2518 - }, - { - "epoch": 0.22717229562158994, - "grad_norm": 1.6085846080399826, - "learning_rate": 3.6059309736598303e-06, - "loss": 1.0758, - "step": 2519 - }, - { - "epoch": 0.2272624791450602, - "grad_norm": 1.78088114081501, - "learning_rate": 3.605582700704657e-06, - "loss": 0.9819, - "step": 2520 - }, - { - "epoch": 0.22735266266853046, - "grad_norm": 1.52866589231398, - "learning_rate": 3.6052342907523146e-06, - "loss": 1.0124, - "step": 2521 - }, - { - "epoch": 0.22744284619200072, - "grad_norm": 1.9311012309528526, - "learning_rate": 3.604885743832532e-06, - "loss": 0.936, - "step": 2522 - }, - { - "epoch": 0.22753302971547099, - "grad_norm": 0.7207170603989161, - "learning_rate": 3.6045370599750482e-06, - "loss": 0.8358, - "step": 2523 - }, - { - "epoch": 0.22762321323894125, - "grad_norm": 1.4793260336259306, - "learning_rate": 3.604188239209615e-06, - "loss": 0.9842, - "step": 2524 - }, - { - "epoch": 0.2277133967624115, - "grad_norm": 1.600183851884765, - "learning_rate": 3.603839281565996e-06, - "loss": 0.9324, - "step": 2525 - }, - { - "epoch": 0.22780358028588177, - "grad_norm": 1.8695809574110684, - "learning_rate": 3.603490187073966e-06, - "loss": 1.0234, - "step": 2526 - }, - { - "epoch": 0.22789376380935203, - "grad_norm": 2.5759830709339457, - "learning_rate": 3.6031409557633117e-06, - "loss": 1.0626, - "step": 2527 - }, - { - "epoch": 0.2279839473328223, - "grad_norm": 1.4846935532722891, - "learning_rate": 3.602791587663831e-06, - "loss": 1.0505, - "step": 2528 - }, - { - "epoch": 0.22807413085629255, - "grad_norm": 1.289952982038278, - "learning_rate": 3.6024420828053348e-06, - "loss": 1.0493, - "step": 2529 - }, - { - "epoch": 0.2281643143797628, - "grad_norm": 1.4074596288796224, - "learning_rate": 3.6020924412176445e-06, - "loss": 1.0775, - "step": 2530 - }, - { - "epoch": 0.22825449790323307, - "grad_norm": 1.438169323546114, - "learning_rate": 3.601742662930593e-06, - "loss": 0.9829, - "step": 2531 - }, - { - "epoch": 0.22834468142670333, - "grad_norm": 1.4457372092944911, - "learning_rate": 3.6013927479740248e-06, - "loss": 0.9679, - "step": 2532 - }, - { - "epoch": 0.2284348649501736, - "grad_norm": 1.5605223537996418, - "learning_rate": 3.6010426963777985e-06, - "loss": 1.0912, - "step": 2533 - }, - { - "epoch": 0.22852504847364385, - "grad_norm": 1.599505559698747, - "learning_rate": 3.6006925081717804e-06, - "loss": 1.0512, - "step": 2534 - }, - { - "epoch": 0.22861523199711412, - "grad_norm": 1.3244316357559336, - "learning_rate": 3.600342183385852e-06, - "loss": 0.9734, - "step": 2535 - }, - { - "epoch": 0.22870541552058438, - "grad_norm": 1.9265820925320407, - "learning_rate": 3.5999917220499043e-06, - "loss": 0.9433, - "step": 2536 - }, - { - "epoch": 0.22879559904405466, - "grad_norm": 1.7690195181563957, - "learning_rate": 3.5996411241938404e-06, - "loss": 1.0133, - "step": 2537 - }, - { - "epoch": 0.22888578256752493, - "grad_norm": 1.604698274719258, - "learning_rate": 3.5992903898475752e-06, - "loss": 0.9616, - "step": 2538 - }, - { - "epoch": 0.2289759660909952, - "grad_norm": 1.2718708722471697, - "learning_rate": 3.5989395190410365e-06, - "loss": 1.1074, - "step": 2539 - }, - { - "epoch": 0.22906614961446545, - "grad_norm": 1.405802591994367, - "learning_rate": 3.598588511804161e-06, - "loss": 1.0157, - "step": 2540 - }, - { - "epoch": 0.2291563331379357, - "grad_norm": 1.9523477457348317, - "learning_rate": 3.5982373681668987e-06, - "loss": 1.0195, - "step": 2541 - }, - { - "epoch": 0.22924651666140597, - "grad_norm": 1.7531636796515995, - "learning_rate": 3.597886088159212e-06, - "loss": 1.0139, - "step": 2542 - }, - { - "epoch": 0.22933670018487623, - "grad_norm": 1.466394280296904, - "learning_rate": 3.597534671811074e-06, - "loss": 0.9347, - "step": 2543 - }, - { - "epoch": 0.2294268837083465, - "grad_norm": 1.7735207155844495, - "learning_rate": 3.5971831191524684e-06, - "loss": 0.9828, - "step": 2544 - }, - { - "epoch": 0.22951706723181675, - "grad_norm": 1.4789362804106514, - "learning_rate": 3.5968314302133925e-06, - "loss": 1.0506, - "step": 2545 - }, - { - "epoch": 0.229607250755287, - "grad_norm": 1.6624982045099415, - "learning_rate": 3.596479605023854e-06, - "loss": 1.0054, - "step": 2546 - }, - { - "epoch": 0.22969743427875727, - "grad_norm": 1.4405316436374456, - "learning_rate": 3.596127643613873e-06, - "loss": 1.0173, - "step": 2547 - }, - { - "epoch": 0.22978761780222753, - "grad_norm": 3.940087573244898, - "learning_rate": 3.59577554601348e-06, - "loss": 1.0269, - "step": 2548 - }, - { - "epoch": 0.2298778013256978, - "grad_norm": 1.4576835910597667, - "learning_rate": 3.595423312252719e-06, - "loss": 1.0587, - "step": 2549 - }, - { - "epoch": 0.22996798484916806, - "grad_norm": 1.724692803824167, - "learning_rate": 3.5950709423616436e-06, - "loss": 0.9981, - "step": 2550 - }, - { - "epoch": 0.23005816837263832, - "grad_norm": 1.702033786994166, - "learning_rate": 3.5947184363703203e-06, - "loss": 0.985, - "step": 2551 - }, - { - "epoch": 0.23014835189610858, - "grad_norm": 1.905487158054149, - "learning_rate": 3.5943657943088274e-06, - "loss": 0.9603, - "step": 2552 - }, - { - "epoch": 0.23023853541957884, - "grad_norm": 1.4325561926507207, - "learning_rate": 3.5940130162072525e-06, - "loss": 0.9332, - "step": 2553 - }, - { - "epoch": 0.2303287189430491, - "grad_norm": 2.858879171438052, - "learning_rate": 3.5936601020956985e-06, - "loss": 1.0119, - "step": 2554 - }, - { - "epoch": 0.23041890246651936, - "grad_norm": 4.79018350397028, - "learning_rate": 3.5933070520042772e-06, - "loss": 1.0378, - "step": 2555 - }, - { - "epoch": 0.23050908598998962, - "grad_norm": 0.7515818286083688, - "learning_rate": 3.5929538659631133e-06, - "loss": 0.8848, - "step": 2556 - }, - { - "epoch": 0.23059926951345988, - "grad_norm": 1.5948744154611452, - "learning_rate": 3.592600544002341e-06, - "loss": 1.0388, - "step": 2557 - }, - { - "epoch": 0.23068945303693014, - "grad_norm": 1.5391391793140607, - "learning_rate": 3.5922470861521098e-06, - "loss": 1.0077, - "step": 2558 - }, - { - "epoch": 0.2307796365604004, - "grad_norm": 1.4707520819997308, - "learning_rate": 3.591893492442577e-06, - "loss": 0.977, - "step": 2559 - }, - { - "epoch": 0.23086982008387066, - "grad_norm": 1.9904429857051047, - "learning_rate": 3.591539762903914e-06, - "loss": 1.0694, - "step": 2560 - }, - { - "epoch": 0.23096000360734095, - "grad_norm": 1.7913731475047499, - "learning_rate": 3.591185897566303e-06, - "loss": 0.9419, - "step": 2561 - }, - { - "epoch": 0.2310501871308112, - "grad_norm": 10.818740014347645, - "learning_rate": 3.590831896459937e-06, - "loss": 1.0026, - "step": 2562 - }, - { - "epoch": 0.23114037065428147, - "grad_norm": 2.113014415881352, - "learning_rate": 3.5904777596150222e-06, - "loss": 1.0115, - "step": 2563 - }, - { - "epoch": 0.23123055417775173, - "grad_norm": 1.720722800238676, - "learning_rate": 3.590123487061775e-06, - "loss": 1.0058, - "step": 2564 - }, - { - "epoch": 0.231320737701222, - "grad_norm": 1.6889833359424202, - "learning_rate": 3.589769078830424e-06, - "loss": 1.1034, - "step": 2565 - }, - { - "epoch": 0.23141092122469226, - "grad_norm": 1.7945497145676432, - "learning_rate": 3.58941453495121e-06, - "loss": 0.9842, - "step": 2566 - }, - { - "epoch": 0.23150110474816252, - "grad_norm": 1.4748409670425542, - "learning_rate": 3.5890598554543834e-06, - "loss": 1.0157, - "step": 2567 - }, - { - "epoch": 0.23159128827163278, - "grad_norm": 1.5543948717614835, - "learning_rate": 3.5887050403702073e-06, - "loss": 1.0619, - "step": 2568 - }, - { - "epoch": 0.23168147179510304, - "grad_norm": 1.6716128705319968, - "learning_rate": 3.588350089728958e-06, - "loss": 0.977, - "step": 2569 - }, - { - "epoch": 0.2317716553185733, - "grad_norm": 1.4462717533041698, - "learning_rate": 3.5879950035609204e-06, - "loss": 1.0291, - "step": 2570 - }, - { - "epoch": 0.23186183884204356, - "grad_norm": 1.748751671921842, - "learning_rate": 3.5876397818963933e-06, - "loss": 1.0053, - "step": 2571 - }, - { - "epoch": 0.23195202236551382, - "grad_norm": 1.7141304357344163, - "learning_rate": 3.5872844247656858e-06, - "loss": 1.0566, - "step": 2572 - }, - { - "epoch": 0.23204220588898408, - "grad_norm": 1.3947768222207886, - "learning_rate": 3.5869289321991195e-06, - "loss": 1.0135, - "step": 2573 - }, - { - "epoch": 0.23213238941245434, - "grad_norm": 1.4064134290885122, - "learning_rate": 3.5865733042270263e-06, - "loss": 0.9956, - "step": 2574 - }, - { - "epoch": 0.2322225729359246, - "grad_norm": 1.362925660723627, - "learning_rate": 3.5862175408797498e-06, - "loss": 1.0359, - "step": 2575 - }, - { - "epoch": 0.23231275645939486, - "grad_norm": 1.3971003101082315, - "learning_rate": 3.585861642187647e-06, - "loss": 0.9494, - "step": 2576 - }, - { - "epoch": 0.23240293998286513, - "grad_norm": 3.351341969452807, - "learning_rate": 3.5855056081810845e-06, - "loss": 1.0011, - "step": 2577 - }, - { - "epoch": 0.2324931235063354, - "grad_norm": 1.3523305573858464, - "learning_rate": 3.5851494388904406e-06, - "loss": 1.0091, - "step": 2578 - }, - { - "epoch": 0.23258330702980565, - "grad_norm": 1.4818575743733062, - "learning_rate": 3.5847931343461064e-06, - "loss": 0.9468, - "step": 2579 - }, - { - "epoch": 0.2326734905532759, - "grad_norm": 1.8107801367361036, - "learning_rate": 3.5844366945784835e-06, - "loss": 0.9414, - "step": 2580 - }, - { - "epoch": 0.23276367407674617, - "grad_norm": 3.113180927110456, - "learning_rate": 3.5840801196179856e-06, - "loss": 1.0598, - "step": 2581 - }, - { - "epoch": 0.23285385760021643, - "grad_norm": 1.726616353709749, - "learning_rate": 3.583723409495037e-06, - "loss": 1.0603, - "step": 2582 - }, - { - "epoch": 0.2329440411236867, - "grad_norm": 1.4584076544579094, - "learning_rate": 3.5833665642400747e-06, - "loss": 1.0623, - "step": 2583 - }, - { - "epoch": 0.23303422464715695, - "grad_norm": 1.5590536638851722, - "learning_rate": 3.5830095838835472e-06, - "loss": 0.9302, - "step": 2584 - }, - { - "epoch": 0.23312440817062724, - "grad_norm": 5.129418306145817, - "learning_rate": 3.5826524684559125e-06, - "loss": 0.9778, - "step": 2585 - }, - { - "epoch": 0.2332145916940975, - "grad_norm": 1.6678368116454143, - "learning_rate": 3.5822952179876433e-06, - "loss": 1.0929, - "step": 2586 - }, - { - "epoch": 0.23330477521756776, - "grad_norm": 1.2666523378381562, - "learning_rate": 3.5819378325092205e-06, - "loss": 0.9762, - "step": 2587 - }, - { - "epoch": 0.23339495874103802, - "grad_norm": 1.4509014221609313, - "learning_rate": 3.581580312051139e-06, - "loss": 1.0316, - "step": 2588 - }, - { - "epoch": 0.23348514226450828, - "grad_norm": 1.5545847489796927, - "learning_rate": 3.5812226566439057e-06, - "loss": 0.9381, - "step": 2589 - }, - { - "epoch": 0.23357532578797854, - "grad_norm": 1.5691033258010114, - "learning_rate": 3.580864866318036e-06, - "loss": 0.9844, - "step": 2590 - }, - { - "epoch": 0.2336655093114488, - "grad_norm": 2.504357355339814, - "learning_rate": 3.580506941104059e-06, - "loss": 1.0215, - "step": 2591 - }, - { - "epoch": 0.23375569283491907, - "grad_norm": 1.4739103365610668, - "learning_rate": 3.580148881032515e-06, - "loss": 0.9986, - "step": 2592 - }, - { - "epoch": 0.23384587635838933, - "grad_norm": 1.3132618554911095, - "learning_rate": 3.5797906861339556e-06, - "loss": 1.0151, - "step": 2593 - }, - { - "epoch": 0.2339360598818596, - "grad_norm": 1.6399699765457774, - "learning_rate": 3.5794323564389435e-06, - "loss": 1.0361, - "step": 2594 - }, - { - "epoch": 0.23402624340532985, - "grad_norm": 2.7739939816388315, - "learning_rate": 3.579073891978055e-06, - "loss": 1.0211, - "step": 2595 - }, - { - "epoch": 0.2341164269288001, - "grad_norm": 2.3876411915517637, - "learning_rate": 3.5787152927818746e-06, - "loss": 0.9807, - "step": 2596 - }, - { - "epoch": 0.23420661045227037, - "grad_norm": 1.2577239295624774, - "learning_rate": 3.5783565588810003e-06, - "loss": 1.059, - "step": 2597 - }, - { - "epoch": 0.23429679397574063, - "grad_norm": 1.5753503712477905, - "learning_rate": 3.5779976903060412e-06, - "loss": 0.9802, - "step": 2598 - }, - { - "epoch": 0.2343869774992109, - "grad_norm": 1.4650519871519863, - "learning_rate": 3.577638687087619e-06, - "loss": 1.0327, - "step": 2599 - }, - { - "epoch": 0.23447716102268115, - "grad_norm": 1.3043516720493946, - "learning_rate": 3.577279549256364e-06, - "loss": 1.0147, - "step": 2600 - }, - { - "epoch": 0.2345673445461514, - "grad_norm": 1.3809551977338705, - "learning_rate": 3.5769202768429213e-06, - "loss": 1.0449, - "step": 2601 - }, - { - "epoch": 0.23465752806962167, - "grad_norm": 2.2088675902346355, - "learning_rate": 3.5765608698779454e-06, - "loss": 0.9926, - "step": 2602 - }, - { - "epoch": 0.23474771159309193, - "grad_norm": 1.4851913616558994, - "learning_rate": 3.5762013283921033e-06, - "loss": 1.0196, - "step": 2603 - }, - { - "epoch": 0.2348378951165622, - "grad_norm": 1.739165280604606, - "learning_rate": 3.5758416524160728e-06, - "loss": 1.0097, - "step": 2604 - }, - { - "epoch": 0.23492807864003246, - "grad_norm": 1.367542024695463, - "learning_rate": 3.5754818419805427e-06, - "loss": 0.9999, - "step": 2605 - }, - { - "epoch": 0.23501826216350272, - "grad_norm": 1.24169921875, - "learning_rate": 3.575121897116216e-06, - "loss": 0.9806, - "step": 2606 - }, - { - "epoch": 0.23510844568697298, - "grad_norm": 1.3851178882017998, - "learning_rate": 3.574761817853803e-06, - "loss": 1.0444, - "step": 2607 - }, - { - "epoch": 0.23519862921044324, - "grad_norm": 1.2982586752434389, - "learning_rate": 3.5744016042240287e-06, - "loss": 0.9483, - "step": 2608 - }, - { - "epoch": 0.23528881273391353, - "grad_norm": 1.756219506592933, - "learning_rate": 3.5740412562576286e-06, - "loss": 0.9425, - "step": 2609 - }, - { - "epoch": 0.2353789962573838, - "grad_norm": 1.5156007745124165, - "learning_rate": 3.573680773985349e-06, - "loss": 0.9978, - "step": 2610 - }, - { - "epoch": 0.23546917978085405, - "grad_norm": 1.4277567515987588, - "learning_rate": 3.5733201574379486e-06, - "loss": 1.0009, - "step": 2611 - }, - { - "epoch": 0.2355593633043243, - "grad_norm": 1.791780497757003, - "learning_rate": 3.5729594066461975e-06, - "loss": 0.9535, - "step": 2612 - }, - { - "epoch": 0.23564954682779457, - "grad_norm": 1.2466856408757712, - "learning_rate": 3.572598521640876e-06, - "loss": 1.0257, - "step": 2613 - }, - { - "epoch": 0.23573973035126483, - "grad_norm": 0.7451948572877191, - "learning_rate": 3.5722375024527782e-06, - "loss": 0.888, - "step": 2614 - }, - { - "epoch": 0.2358299138747351, - "grad_norm": 1.2376986796866183, - "learning_rate": 3.571876349112707e-06, - "loss": 1.0309, - "step": 2615 - }, - { - "epoch": 0.23592009739820535, - "grad_norm": 1.378496752226293, - "learning_rate": 3.5715150616514784e-06, - "loss": 1.0081, - "step": 2616 - }, - { - "epoch": 0.2360102809216756, - "grad_norm": 1.8768071049348443, - "learning_rate": 3.5711536400999196e-06, - "loss": 1.0631, - "step": 2617 - }, - { - "epoch": 0.23610046444514587, - "grad_norm": 0.7305998659012132, - "learning_rate": 3.570792084488869e-06, - "loss": 0.9012, - "step": 2618 - }, - { - "epoch": 0.23619064796861614, - "grad_norm": 0.7874094956480204, - "learning_rate": 3.5704303948491764e-06, - "loss": 0.9518, - "step": 2619 - }, - { - "epoch": 0.2362808314920864, - "grad_norm": 1.6279783098617657, - "learning_rate": 3.5700685712117035e-06, - "loss": 1.0255, - "step": 2620 - }, - { - "epoch": 0.23637101501555666, - "grad_norm": 1.4601392554145776, - "learning_rate": 3.5697066136073227e-06, - "loss": 0.9956, - "step": 2621 - }, - { - "epoch": 0.23646119853902692, - "grad_norm": 1.3257747724169155, - "learning_rate": 3.5693445220669184e-06, - "loss": 1.008, - "step": 2622 - }, - { - "epoch": 0.23655138206249718, - "grad_norm": 1.276790253996043, - "learning_rate": 3.568982296621386e-06, - "loss": 1.0001, - "step": 2623 - }, - { - "epoch": 0.23664156558596744, - "grad_norm": 1.5282954302337504, - "learning_rate": 3.5686199373016325e-06, - "loss": 1.0812, - "step": 2624 - }, - { - "epoch": 0.2367317491094377, - "grad_norm": 1.480352553183504, - "learning_rate": 3.568257444138577e-06, - "loss": 0.9552, - "step": 2625 - }, - { - "epoch": 0.23682193263290796, - "grad_norm": 1.7688726079386963, - "learning_rate": 3.5678948171631495e-06, - "loss": 1.0035, - "step": 2626 - }, - { - "epoch": 0.23691211615637822, - "grad_norm": 1.4673422702330547, - "learning_rate": 3.5675320564062908e-06, - "loss": 1.0147, - "step": 2627 - }, - { - "epoch": 0.23700229967984848, - "grad_norm": 1.4462288915466959, - "learning_rate": 3.5671691618989533e-06, - "loss": 0.9238, - "step": 2628 - }, - { - "epoch": 0.23709248320331874, - "grad_norm": 2.066676909923805, - "learning_rate": 3.5668061336721024e-06, - "loss": 1.0166, - "step": 2629 - }, - { - "epoch": 0.237182666726789, - "grad_norm": 1.565334952588086, - "learning_rate": 3.5664429717567117e-06, - "loss": 1.0052, - "step": 2630 - }, - { - "epoch": 0.23727285025025927, - "grad_norm": 3.918794919475966, - "learning_rate": 3.56607967618377e-06, - "loss": 1.0391, - "step": 2631 - }, - { - "epoch": 0.23736303377372953, - "grad_norm": 2.207500468650414, - "learning_rate": 3.5657162469842754e-06, - "loss": 1.03, - "step": 2632 - }, - { - "epoch": 0.23745321729719981, - "grad_norm": 1.773049333078614, - "learning_rate": 3.5653526841892374e-06, - "loss": 1.0097, - "step": 2633 - }, - { - "epoch": 0.23754340082067008, - "grad_norm": 3.167975974868457, - "learning_rate": 3.564988987829676e-06, - "loss": 1.0419, - "step": 2634 - }, - { - "epoch": 0.23763358434414034, - "grad_norm": 1.622068181228845, - "learning_rate": 3.564625157936626e-06, - "loss": 0.9546, - "step": 2635 - }, - { - "epoch": 0.2377237678676106, - "grad_norm": 1.5895173208679525, - "learning_rate": 3.56426119454113e-06, - "loss": 1.0512, - "step": 2636 - }, - { - "epoch": 0.23781395139108086, - "grad_norm": 1.8201216569000953, - "learning_rate": 3.5638970976742436e-06, - "loss": 1.0834, - "step": 2637 - }, - { - "epoch": 0.23790413491455112, - "grad_norm": 1.8518554198266024, - "learning_rate": 3.5635328673670335e-06, - "loss": 1.0297, - "step": 2638 - }, - { - "epoch": 0.23799431843802138, - "grad_norm": 2.639910448896352, - "learning_rate": 3.5631685036505783e-06, - "loss": 1.1007, - "step": 2639 - }, - { - "epoch": 0.23808450196149164, - "grad_norm": 1.7723962356906209, - "learning_rate": 3.562804006555966e-06, - "loss": 1.0198, - "step": 2640 - }, - { - "epoch": 0.2381746854849619, - "grad_norm": 1.4481073523817967, - "learning_rate": 3.5624393761143e-06, - "loss": 0.959, - "step": 2641 - }, - { - "epoch": 0.23826486900843216, - "grad_norm": 1.7226138985546506, - "learning_rate": 3.5620746123566906e-06, - "loss": 0.9254, - "step": 2642 - }, - { - "epoch": 0.23835505253190242, - "grad_norm": 2.3577313287210266, - "learning_rate": 3.5617097153142623e-06, - "loss": 0.9611, - "step": 2643 - }, - { - "epoch": 0.23844523605537268, - "grad_norm": 1.3736619941630723, - "learning_rate": 3.5613446850181497e-06, - "loss": 1.0565, - "step": 2644 - }, - { - "epoch": 0.23853541957884294, - "grad_norm": 0.6085120473137717, - "learning_rate": 3.5609795214994996e-06, - "loss": 0.7971, - "step": 2645 - }, - { - "epoch": 0.2386256031023132, - "grad_norm": 1.39160824422738, - "learning_rate": 3.560614224789469e-06, - "loss": 1.0601, - "step": 2646 - }, - { - "epoch": 0.23871578662578347, - "grad_norm": 1.484106501338453, - "learning_rate": 3.5602487949192285e-06, - "loss": 0.9518, - "step": 2647 - }, - { - "epoch": 0.23880597014925373, - "grad_norm": 1.4958637748036334, - "learning_rate": 3.559883231919957e-06, - "loss": 0.9582, - "step": 2648 - }, - { - "epoch": 0.238896153672724, - "grad_norm": 2.6977079057871127, - "learning_rate": 3.5595175358228473e-06, - "loss": 0.9815, - "step": 2649 - }, - { - "epoch": 0.23898633719619425, - "grad_norm": 1.579981476759676, - "learning_rate": 3.5591517066591027e-06, - "loss": 1.0093, - "step": 2650 - }, - { - "epoch": 0.2390765207196645, - "grad_norm": 1.6474207775419665, - "learning_rate": 3.5587857444599364e-06, - "loss": 1.0035, - "step": 2651 - }, - { - "epoch": 0.23916670424313477, - "grad_norm": 1.7789399409799904, - "learning_rate": 3.5584196492565766e-06, - "loss": 0.9756, - "step": 2652 - }, - { - "epoch": 0.23925688776660503, - "grad_norm": 1.4853026854168543, - "learning_rate": 3.5580534210802587e-06, - "loss": 1.0692, - "step": 2653 - }, - { - "epoch": 0.2393470712900753, - "grad_norm": 6.234439247201851, - "learning_rate": 3.557687059962232e-06, - "loss": 1.0507, - "step": 2654 - }, - { - "epoch": 0.23943725481354555, - "grad_norm": 1.647444222393161, - "learning_rate": 3.5573205659337558e-06, - "loss": 1.0043, - "step": 2655 - }, - { - "epoch": 0.23952743833701584, - "grad_norm": 1.32876490716476, - "learning_rate": 3.5569539390261025e-06, - "loss": 0.991, - "step": 2656 - }, - { - "epoch": 0.2396176218604861, - "grad_norm": 1.275335664134497, - "learning_rate": 3.5565871792705543e-06, - "loss": 1.027, - "step": 2657 - }, - { - "epoch": 0.23970780538395636, - "grad_norm": 1.258503694349284, - "learning_rate": 3.5562202866984045e-06, - "loss": 1.0011, - "step": 2658 - }, - { - "epoch": 0.23979798890742662, - "grad_norm": 1.7340682849660358, - "learning_rate": 3.5558532613409594e-06, - "loss": 1.0153, - "step": 2659 - }, - { - "epoch": 0.23988817243089688, - "grad_norm": 1.4157545388344501, - "learning_rate": 3.555486103229535e-06, - "loss": 0.9463, - "step": 2660 - }, - { - "epoch": 0.23997835595436715, - "grad_norm": 1.9022470563248373, - "learning_rate": 3.5551188123954595e-06, - "loss": 1.0333, - "step": 2661 - }, - { - "epoch": 0.2400685394778374, - "grad_norm": 1.5110405871975932, - "learning_rate": 3.5547513888700715e-06, - "loss": 0.902, - "step": 2662 - }, - { - "epoch": 0.24015872300130767, - "grad_norm": 1.6172847994959527, - "learning_rate": 3.5543838326847224e-06, - "loss": 1.0371, - "step": 2663 - }, - { - "epoch": 0.24024890652477793, - "grad_norm": 1.725839109474044, - "learning_rate": 3.5540161438707744e-06, - "loss": 0.9857, - "step": 2664 - }, - { - "epoch": 0.2403390900482482, - "grad_norm": 1.5935509501302463, - "learning_rate": 3.5536483224596e-06, - "loss": 1.1671, - "step": 2665 - }, - { - "epoch": 0.24042927357171845, - "grad_norm": 1.7687471854790715, - "learning_rate": 3.553280368482584e-06, - "loss": 0.9847, - "step": 2666 - }, - { - "epoch": 0.2405194570951887, - "grad_norm": 1.7567304205258703, - "learning_rate": 3.5529122819711227e-06, - "loss": 0.8369, - "step": 2667 - }, - { - "epoch": 0.24060964061865897, - "grad_norm": 2.188946273081637, - "learning_rate": 3.5525440629566223e-06, - "loss": 0.958, - "step": 2668 - }, - { - "epoch": 0.24069982414212923, - "grad_norm": 1.7050032451744184, - "learning_rate": 3.552175711470502e-06, - "loss": 1.0305, - "step": 2669 - }, - { - "epoch": 0.2407900076655995, - "grad_norm": 1.4426568454777147, - "learning_rate": 3.5518072275441912e-06, - "loss": 1.0525, - "step": 2670 - }, - { - "epoch": 0.24088019118906975, - "grad_norm": 1.3411525639732378, - "learning_rate": 3.551438611209131e-06, - "loss": 1.0384, - "step": 2671 - }, - { - "epoch": 0.24097037471254001, - "grad_norm": 2.028257660134623, - "learning_rate": 3.551069862496774e-06, - "loss": 1.1032, - "step": 2672 - }, - { - "epoch": 0.24106055823601027, - "grad_norm": 0.6769862814574049, - "learning_rate": 3.5507009814385846e-06, - "loss": 0.8632, - "step": 2673 - }, - { - "epoch": 0.24115074175948054, - "grad_norm": 0.6957564276228035, - "learning_rate": 3.550331968066036e-06, - "loss": 0.8563, - "step": 2674 - }, - { - "epoch": 0.2412409252829508, - "grad_norm": 1.657268786637068, - "learning_rate": 3.549962822410616e-06, - "loss": 1.0282, - "step": 2675 - }, - { - "epoch": 0.24133110880642106, - "grad_norm": 0.7636885775327398, - "learning_rate": 3.5495935445038217e-06, - "loss": 0.8401, - "step": 2676 - }, - { - "epoch": 0.24142129232989132, - "grad_norm": 1.4804228521393725, - "learning_rate": 3.5492241343771612e-06, - "loss": 1.0481, - "step": 2677 - }, - { - "epoch": 0.24151147585336158, - "grad_norm": 1.611133836534785, - "learning_rate": 3.548854592062156e-06, - "loss": 1.0126, - "step": 2678 - }, - { - "epoch": 0.24160165937683184, - "grad_norm": 1.5474449417845582, - "learning_rate": 3.548484917590336e-06, - "loss": 0.9841, - "step": 2679 - }, - { - "epoch": 0.24169184290030213, - "grad_norm": 3.81377127042257, - "learning_rate": 3.5481151109932447e-06, - "loss": 0.9302, - "step": 2680 - }, - { - "epoch": 0.2417820264237724, - "grad_norm": 1.3896911142729864, - "learning_rate": 3.5477451723024364e-06, - "loss": 1.0345, - "step": 2681 - }, - { - "epoch": 0.24187220994724265, - "grad_norm": 0.5999297488332113, - "learning_rate": 3.5473751015494757e-06, - "loss": 0.8185, - "step": 2682 - }, - { - "epoch": 0.2419623934707129, - "grad_norm": 1.4660554884002035, - "learning_rate": 3.547004898765939e-06, - "loss": 1.0198, - "step": 2683 - }, - { - "epoch": 0.24205257699418317, - "grad_norm": 1.2053266915001914, - "learning_rate": 3.546634563983414e-06, - "loss": 1.0273, - "step": 2684 - }, - { - "epoch": 0.24214276051765343, - "grad_norm": 1.3036413052711788, - "learning_rate": 3.5462640972335002e-06, - "loss": 0.9738, - "step": 2685 - }, - { - "epoch": 0.2422329440411237, - "grad_norm": 1.651152991102722, - "learning_rate": 3.5458934985478077e-06, - "loss": 1.0314, - "step": 2686 - }, - { - "epoch": 0.24232312756459395, - "grad_norm": 1.8819410911364396, - "learning_rate": 3.5455227679579577e-06, - "loss": 0.9883, - "step": 2687 - }, - { - "epoch": 0.24241331108806422, - "grad_norm": 1.6567629613592392, - "learning_rate": 3.545151905495584e-06, - "loss": 1.1111, - "step": 2688 - }, - { - "epoch": 0.24250349461153448, - "grad_norm": 1.861485861852392, - "learning_rate": 3.544780911192329e-06, - "loss": 0.9943, - "step": 2689 - }, - { - "epoch": 0.24259367813500474, - "grad_norm": 13.878645134575795, - "learning_rate": 3.544409785079849e-06, - "loss": 0.9563, - "step": 2690 - }, - { - "epoch": 0.242683861658475, - "grad_norm": 1.248723045417151, - "learning_rate": 3.5440385271898103e-06, - "loss": 0.9584, - "step": 2691 - }, - { - "epoch": 0.24277404518194526, - "grad_norm": 1.447697007603447, - "learning_rate": 3.5436671375538903e-06, - "loss": 0.9395, - "step": 2692 - }, - { - "epoch": 0.24286422870541552, - "grad_norm": 1.6025560228193003, - "learning_rate": 3.543295616203779e-06, - "loss": 1.0107, - "step": 2693 - }, - { - "epoch": 0.24295441222888578, - "grad_norm": 1.3400210160415107, - "learning_rate": 3.542923963171176e-06, - "loss": 1.1061, - "step": 2694 - }, - { - "epoch": 0.24304459575235604, - "grad_norm": 1.3467051812760946, - "learning_rate": 3.542552178487793e-06, - "loss": 0.9283, - "step": 2695 - }, - { - "epoch": 0.2431347792758263, - "grad_norm": 1.520987629139769, - "learning_rate": 3.5421802621853523e-06, - "loss": 1.0025, - "step": 2696 - }, - { - "epoch": 0.24322496279929656, - "grad_norm": 1.608177925006936, - "learning_rate": 3.5418082142955887e-06, - "loss": 1.0051, - "step": 2697 - }, - { - "epoch": 0.24331514632276682, - "grad_norm": 0.6939213369336112, - "learning_rate": 3.5414360348502463e-06, - "loss": 0.8701, - "step": 2698 - }, - { - "epoch": 0.24340532984623708, - "grad_norm": 1.2508744995506538, - "learning_rate": 3.5410637238810825e-06, - "loss": 0.9119, - "step": 2699 - }, - { - "epoch": 0.24349551336970734, - "grad_norm": 1.4968344346445097, - "learning_rate": 3.5406912814198635e-06, - "loss": 0.9562, - "step": 2700 - }, - { - "epoch": 0.2435856968931776, - "grad_norm": 1.7035001289136975, - "learning_rate": 3.54031870749837e-06, - "loss": 0.9706, - "step": 2701 - }, - { - "epoch": 0.24367588041664787, - "grad_norm": 1.5631777247246448, - "learning_rate": 3.539946002148391e-06, - "loss": 1.0213, - "step": 2702 - }, - { - "epoch": 0.24376606394011813, - "grad_norm": 1.8363023111411207, - "learning_rate": 3.5395731654017277e-06, - "loss": 0.9989, - "step": 2703 - }, - { - "epoch": 0.24385624746358842, - "grad_norm": 1.6498919393815925, - "learning_rate": 3.5392001972901923e-06, - "loss": 1.0523, - "step": 2704 - }, - { - "epoch": 0.24394643098705868, - "grad_norm": 1.265793447705321, - "learning_rate": 3.5388270978456098e-06, - "loss": 0.9627, - "step": 2705 - }, - { - "epoch": 0.24403661451052894, - "grad_norm": 1.584137444678975, - "learning_rate": 3.5384538670998137e-06, - "loss": 0.9743, - "step": 2706 - }, - { - "epoch": 0.2441267980339992, - "grad_norm": 1.626902933250961, - "learning_rate": 3.538080505084651e-06, - "loss": 1.0344, - "step": 2707 - }, - { - "epoch": 0.24421698155746946, - "grad_norm": 1.2780661683661163, - "learning_rate": 3.5377070118319788e-06, - "loss": 1.0217, - "step": 2708 - }, - { - "epoch": 0.24430716508093972, - "grad_norm": 2.0237379403370555, - "learning_rate": 3.5373333873736657e-06, - "loss": 0.9667, - "step": 2709 - }, - { - "epoch": 0.24439734860440998, - "grad_norm": 1.8037340692462995, - "learning_rate": 3.536959631741591e-06, - "loss": 1.0191, - "step": 2710 - }, - { - "epoch": 0.24448753212788024, - "grad_norm": 1.059648895865442, - "learning_rate": 3.536585744967646e-06, - "loss": 1.0321, - "step": 2711 - }, - { - "epoch": 0.2445777156513505, - "grad_norm": 1.6283266788269903, - "learning_rate": 3.5362117270837326e-06, - "loss": 0.9953, - "step": 2712 - }, - { - "epoch": 0.24466789917482076, - "grad_norm": 1.2403551418349028, - "learning_rate": 3.5358375781217634e-06, - "loss": 0.9082, - "step": 2713 - }, - { - "epoch": 0.24475808269829102, - "grad_norm": 1.873709107118812, - "learning_rate": 3.535463298113664e-06, - "loss": 1.0386, - "step": 2714 - }, - { - "epoch": 0.24484826622176128, - "grad_norm": 1.6721499208103368, - "learning_rate": 3.5350888870913697e-06, - "loss": 0.9669, - "step": 2715 - }, - { - "epoch": 0.24493844974523155, - "grad_norm": 1.5994612889567588, - "learning_rate": 3.5347143450868273e-06, - "loss": 1.0196, - "step": 2716 - }, - { - "epoch": 0.2450286332687018, - "grad_norm": 1.5592198082522448, - "learning_rate": 3.534339672131994e-06, - "loss": 1.0205, - "step": 2717 - }, - { - "epoch": 0.24511881679217207, - "grad_norm": 1.2984292701432834, - "learning_rate": 3.5339648682588397e-06, - "loss": 0.9361, - "step": 2718 - }, - { - "epoch": 0.24520900031564233, - "grad_norm": 2.2928785991980476, - "learning_rate": 3.533589933499345e-06, - "loss": 1.0539, - "step": 2719 - }, - { - "epoch": 0.2452991838391126, - "grad_norm": 2.387756122212614, - "learning_rate": 3.533214867885501e-06, - "loss": 0.947, - "step": 2720 - }, - { - "epoch": 0.24538936736258285, - "grad_norm": 1.5428627460620192, - "learning_rate": 3.53283967144931e-06, - "loss": 0.9497, - "step": 2721 - }, - { - "epoch": 0.2454795508860531, - "grad_norm": 1.4123048765730755, - "learning_rate": 3.532464344222787e-06, - "loss": 1.0185, - "step": 2722 - }, - { - "epoch": 0.24556973440952337, - "grad_norm": 1.5148195309053016, - "learning_rate": 3.532088886237956e-06, - "loss": 1.0483, - "step": 2723 - }, - { - "epoch": 0.24565991793299363, - "grad_norm": 0.6714052509871258, - "learning_rate": 3.5317132975268535e-06, - "loss": 0.7884, - "step": 2724 - }, - { - "epoch": 0.2457501014564639, - "grad_norm": 1.3784964063152725, - "learning_rate": 3.531337578121526e-06, - "loss": 1.032, - "step": 2725 - }, - { - "epoch": 0.24584028497993415, - "grad_norm": 1.5608039806489222, - "learning_rate": 3.530961728054033e-06, - "loss": 1.0385, - "step": 2726 - }, - { - "epoch": 0.24593046850340441, - "grad_norm": 1.342267393991699, - "learning_rate": 3.5305857473564435e-06, - "loss": 0.9728, - "step": 2727 - }, - { - "epoch": 0.2460206520268747, - "grad_norm": 1.54658767651954, - "learning_rate": 3.5302096360608385e-06, - "loss": 0.9284, - "step": 2728 - }, - { - "epoch": 0.24611083555034496, - "grad_norm": 1.394829854330862, - "learning_rate": 3.5298333941993105e-06, - "loss": 1.0528, - "step": 2729 - }, - { - "epoch": 0.24620101907381522, - "grad_norm": 2.242635967848895, - "learning_rate": 3.529457021803962e-06, - "loss": 1.0014, - "step": 2730 - }, - { - "epoch": 0.24629120259728549, - "grad_norm": 2.580078125, - "learning_rate": 3.529080518906906e-06, - "loss": 1.0195, - "step": 2731 - }, - { - "epoch": 0.24638138612075575, - "grad_norm": 1.323600287035706, - "learning_rate": 3.5287038855402696e-06, - "loss": 0.9889, - "step": 2732 - }, - { - "epoch": 0.246471569644226, - "grad_norm": 2.0540468791006132, - "learning_rate": 3.528327121736188e-06, - "loss": 1.0416, - "step": 2733 - }, - { - "epoch": 0.24656175316769627, - "grad_norm": 1.5878722317652043, - "learning_rate": 3.52795022752681e-06, - "loss": 1.0899, - "step": 2734 - }, - { - "epoch": 0.24665193669116653, - "grad_norm": 1.8675723138210938, - "learning_rate": 3.5275732029442925e-06, - "loss": 1.0762, - "step": 2735 - }, - { - "epoch": 0.2467421202146368, - "grad_norm": 4.196985234497943, - "learning_rate": 3.5271960480208077e-06, - "loss": 0.9504, - "step": 2736 - }, - { - "epoch": 0.24683230373810705, - "grad_norm": 0.6761276151296572, - "learning_rate": 3.526818762788534e-06, - "loss": 0.8843, - "step": 2737 - }, - { - "epoch": 0.2469224872615773, - "grad_norm": 1.3126373219178258, - "learning_rate": 3.5264413472796653e-06, - "loss": 1.0414, - "step": 2738 - }, - { - "epoch": 0.24701267078504757, - "grad_norm": 1.8311762980449164, - "learning_rate": 3.5260638015264037e-06, - "loss": 0.9749, - "step": 2739 - }, - { - "epoch": 0.24710285430851783, - "grad_norm": 1.2228267173028327, - "learning_rate": 3.5256861255609644e-06, - "loss": 0.984, - "step": 2740 - }, - { - "epoch": 0.2471930378319881, - "grad_norm": 1.5308596054976382, - "learning_rate": 3.5253083194155723e-06, - "loss": 1.0226, - "step": 2741 - }, - { - "epoch": 0.24728322135545835, - "grad_norm": 1.2929409519676307, - "learning_rate": 3.5249303831224637e-06, - "loss": 1.0236, - "step": 2742 - }, - { - "epoch": 0.24737340487892862, - "grad_norm": 1.6437395595900686, - "learning_rate": 3.524552316713887e-06, - "loss": 1.004, - "step": 2743 - }, - { - "epoch": 0.24746358840239888, - "grad_norm": 1.8355088403125714, - "learning_rate": 3.5241741202220995e-06, - "loss": 1.0685, - "step": 2744 - }, - { - "epoch": 0.24755377192586914, - "grad_norm": 1.3164534744259153, - "learning_rate": 3.5237957936793724e-06, - "loss": 0.9943, - "step": 2745 - }, - { - "epoch": 0.2476439554493394, - "grad_norm": 1.3430157806878196, - "learning_rate": 3.523417337117986e-06, - "loss": 0.9661, - "step": 2746 - }, - { - "epoch": 0.24773413897280966, - "grad_norm": 1.9912406316753541, - "learning_rate": 3.523038750570232e-06, - "loss": 0.9532, - "step": 2747 - }, - { - "epoch": 0.24782432249627992, - "grad_norm": 1.7217924759434438, - "learning_rate": 3.522660034068414e-06, - "loss": 1.0378, - "step": 2748 - }, - { - "epoch": 0.24791450601975018, - "grad_norm": 1.5630178737728884, - "learning_rate": 3.5222811876448464e-06, - "loss": 1.0287, - "step": 2749 - }, - { - "epoch": 0.24800468954322044, - "grad_norm": 1.1802809277413122, - "learning_rate": 3.521902211331854e-06, - "loss": 1.028, - "step": 2750 - }, - { - "epoch": 0.2480948730666907, - "grad_norm": 1.3842859123274267, - "learning_rate": 3.5215231051617726e-06, - "loss": 0.9623, - "step": 2751 - }, - { - "epoch": 0.248185056590161, - "grad_norm": 1.6438215813081165, - "learning_rate": 3.521143869166951e-06, - "loss": 0.9116, - "step": 2752 - }, - { - "epoch": 0.24827524011363125, - "grad_norm": 1.6681577052843977, - "learning_rate": 3.5207645033797464e-06, - "loss": 1.0311, - "step": 2753 - }, - { - "epoch": 0.2483654236371015, - "grad_norm": 1.9085352597855898, - "learning_rate": 3.5203850078325293e-06, - "loss": 0.9398, - "step": 2754 - }, - { - "epoch": 0.24845560716057177, - "grad_norm": 1.3826143801731106, - "learning_rate": 3.5200053825576797e-06, - "loss": 0.9222, - "step": 2755 - }, - { - "epoch": 0.24854579068404203, - "grad_norm": 1.3801151412115598, - "learning_rate": 3.51962562758759e-06, - "loss": 1.0076, - "step": 2756 - }, - { - "epoch": 0.2486359742075123, - "grad_norm": 1.4301621113101393, - "learning_rate": 3.5192457429546627e-06, - "loss": 0.98, - "step": 2757 - }, - { - "epoch": 0.24872615773098256, - "grad_norm": 1.3942230909930415, - "learning_rate": 3.5188657286913115e-06, - "loss": 0.9751, - "step": 2758 - }, - { - "epoch": 0.24881634125445282, - "grad_norm": 1.771798742089717, - "learning_rate": 3.518485584829961e-06, - "loss": 0.9664, - "step": 2759 - }, - { - "epoch": 0.24890652477792308, - "grad_norm": 1.4585628692185795, - "learning_rate": 3.5181053114030485e-06, - "loss": 1.0209, - "step": 2760 - }, - { - "epoch": 0.24899670830139334, - "grad_norm": 1.9038878642287458, - "learning_rate": 3.5177249084430198e-06, - "loss": 1.0108, - "step": 2761 - }, - { - "epoch": 0.2490868918248636, - "grad_norm": 1.9365272541172047, - "learning_rate": 3.517344375982333e-06, - "loss": 1.0486, - "step": 2762 - }, - { - "epoch": 0.24917707534833386, - "grad_norm": 1.503838079319391, - "learning_rate": 3.5169637140534565e-06, - "loss": 1.0428, - "step": 2763 - }, - { - "epoch": 0.24926725887180412, - "grad_norm": 1.9258464314724983, - "learning_rate": 3.5165829226888733e-06, - "loss": 0.9775, - "step": 2764 - }, - { - "epoch": 0.24935744239527438, - "grad_norm": 1.3825756236496318, - "learning_rate": 3.516202001921072e-06, - "loss": 1.0282, - "step": 2765 - }, - { - "epoch": 0.24944762591874464, - "grad_norm": 1.4248619983172606, - "learning_rate": 3.515820951782555e-06, - "loss": 1.0354, - "step": 2766 - }, - { - "epoch": 0.2495378094422149, - "grad_norm": 1.5912324056679013, - "learning_rate": 3.5154397723058366e-06, - "loss": 0.9317, - "step": 2767 - }, - { - "epoch": 0.24962799296568516, - "grad_norm": 1.4827193814816044, - "learning_rate": 3.5150584635234416e-06, - "loss": 1.0516, - "step": 2768 - }, - { - "epoch": 0.24971817648915542, - "grad_norm": 1.1979014962382932, - "learning_rate": 3.5146770254679035e-06, - "loss": 1.0451, - "step": 2769 - }, - { - "epoch": 0.24980836001262569, - "grad_norm": 1.3750494601283902, - "learning_rate": 3.51429545817177e-06, - "loss": 0.9261, - "step": 2770 - }, - { - "epoch": 0.24989854353609595, - "grad_norm": 2.4289894205100917, - "learning_rate": 3.5139137616675985e-06, - "loss": 1.0152, - "step": 2771 - }, - { - "epoch": 0.2499887270595662, - "grad_norm": 1.4892955137389765, - "learning_rate": 3.513531935987957e-06, - "loss": 0.953, - "step": 2772 - }, - { - "epoch": 0.2500789105830365, - "grad_norm": 1.5320855021658077, - "learning_rate": 3.5131499811654253e-06, - "loss": 1.0295, - "step": 2773 - }, - { - "epoch": 0.25016909410650673, - "grad_norm": 1.5053246881024431, - "learning_rate": 3.512767897232594e-06, - "loss": 0.9669, - "step": 2774 - }, - { - "epoch": 0.250259277629977, - "grad_norm": 1.6861006268058885, - "learning_rate": 3.512385684222064e-06, - "loss": 0.9965, - "step": 2775 - }, - { - "epoch": 0.25034946115344725, - "grad_norm": 1.488393864413509, - "learning_rate": 3.512003342166449e-06, - "loss": 1.1032, - "step": 2776 - }, - { - "epoch": 0.25043964467691754, - "grad_norm": 1.6175488054556517, - "learning_rate": 3.511620871098371e-06, - "loss": 1.0459, - "step": 2777 - }, - { - "epoch": 0.25052982820038777, - "grad_norm": 1.995432525824516, - "learning_rate": 3.511238271050465e-06, - "loss": 1.03, - "step": 2778 - }, - { - "epoch": 0.25062001172385806, - "grad_norm": 1.4461597330711842, - "learning_rate": 3.5108555420553778e-06, - "loss": 0.9197, - "step": 2779 - }, - { - "epoch": 0.2507101952473283, - "grad_norm": 1.5341573090745344, - "learning_rate": 3.510472684145764e-06, - "loss": 0.8219, - "step": 2780 - }, - { - "epoch": 0.2508003787707986, - "grad_norm": 1.4894280608374593, - "learning_rate": 3.5100896973542926e-06, - "loss": 0.8934, - "step": 2781 - }, - { - "epoch": 0.2508905622942688, - "grad_norm": 1.6198014957174736, - "learning_rate": 3.509706581713642e-06, - "loss": 1.0456, - "step": 2782 - }, - { - "epoch": 0.2509807458177391, - "grad_norm": 1.3996133815442933, - "learning_rate": 3.509323337256501e-06, - "loss": 1.0212, - "step": 2783 - }, - { - "epoch": 0.25107092934120934, - "grad_norm": 1.522266896761, - "learning_rate": 3.5089399640155703e-06, - "loss": 0.9526, - "step": 2784 - }, - { - "epoch": 0.2511611128646796, - "grad_norm": 1.2947914667761053, - "learning_rate": 3.508556462023562e-06, - "loss": 0.9944, - "step": 2785 - }, - { - "epoch": 0.25125129638814986, - "grad_norm": 1.8304785551947425, - "learning_rate": 3.5081728313131984e-06, - "loss": 0.9878, - "step": 2786 - }, - { - "epoch": 0.25134147991162015, - "grad_norm": 1.6630214246332073, - "learning_rate": 3.5077890719172125e-06, - "loss": 1.0004, - "step": 2787 - }, - { - "epoch": 0.25143166343509044, - "grad_norm": 1.8063207724674721, - "learning_rate": 3.5074051838683497e-06, - "loss": 1.0078, - "step": 2788 - }, - { - "epoch": 0.25152184695856067, - "grad_norm": 1.6667448104976728, - "learning_rate": 3.5070211671993643e-06, - "loss": 0.9219, - "step": 2789 - }, - { - "epoch": 0.25161203048203096, - "grad_norm": 1.5153639263757608, - "learning_rate": 3.5066370219430238e-06, - "loss": 1.0239, - "step": 2790 - }, - { - "epoch": 0.2517022140055012, - "grad_norm": 1.5091520693722686, - "learning_rate": 3.5062527481321044e-06, - "loss": 1.0047, - "step": 2791 - }, - { - "epoch": 0.2517923975289715, - "grad_norm": 1.7887258421134942, - "learning_rate": 3.5058683457993954e-06, - "loss": 1.0108, - "step": 2792 - }, - { - "epoch": 0.2518825810524417, - "grad_norm": 0.7841241230168449, - "learning_rate": 3.5054838149776963e-06, - "loss": 0.8206, - "step": 2793 - }, - { - "epoch": 0.251972764575912, - "grad_norm": 1.5891179107937579, - "learning_rate": 3.505099155699816e-06, - "loss": 1.0386, - "step": 2794 - }, - { - "epoch": 0.25206294809938223, - "grad_norm": 6.215484221692419, - "learning_rate": 3.5047143679985775e-06, - "loss": 0.8931, - "step": 2795 - }, - { - "epoch": 0.2521531316228525, - "grad_norm": 1.4221653903758902, - "learning_rate": 3.5043294519068126e-06, - "loss": 1.0157, - "step": 2796 - }, - { - "epoch": 0.25224331514632276, - "grad_norm": 1.7363404143749217, - "learning_rate": 3.503944407457363e-06, - "loss": 0.9248, - "step": 2797 - }, - { - "epoch": 0.25233349866979304, - "grad_norm": 1.761151816756417, - "learning_rate": 3.5035592346830846e-06, - "loss": 1.0165, - "step": 2798 - }, - { - "epoch": 0.2524236821932633, - "grad_norm": 1.7481469152370483, - "learning_rate": 3.503173933616841e-06, - "loss": 1.0672, - "step": 2799 - }, - { - "epoch": 0.25251386571673357, - "grad_norm": 2.684385046978979, - "learning_rate": 3.50278850429151e-06, - "loss": 1.0801, - "step": 2800 - }, - { - "epoch": 0.2526040492402038, - "grad_norm": 1.583299895820236, - "learning_rate": 3.502402946739977e-06, - "loss": 1.0283, - "step": 2801 - }, - { - "epoch": 0.2526942327636741, - "grad_norm": 1.392851001600204, - "learning_rate": 3.5020172609951405e-06, - "loss": 1.0229, - "step": 2802 - }, - { - "epoch": 0.2527844162871443, - "grad_norm": 1.1866482641546947, - "learning_rate": 3.501631447089909e-06, - "loss": 1.0183, - "step": 2803 - }, - { - "epoch": 0.2528745998106146, - "grad_norm": 1.7957970080791632, - "learning_rate": 3.501245505057203e-06, - "loss": 0.9759, - "step": 2804 - }, - { - "epoch": 0.25296478333408484, - "grad_norm": 1.3920887733329583, - "learning_rate": 3.5008594349299526e-06, - "loss": 1.0754, - "step": 2805 - }, - { - "epoch": 0.25305496685755513, - "grad_norm": 1.4715076287733224, - "learning_rate": 3.500473236741099e-06, - "loss": 1.0014, - "step": 2806 - }, - { - "epoch": 0.25314515038102536, - "grad_norm": 1.331889170119736, - "learning_rate": 3.500086910523596e-06, - "loss": 0.9601, - "step": 2807 - }, - { - "epoch": 0.25323533390449565, - "grad_norm": 1.2340854353426483, - "learning_rate": 3.499700456310406e-06, - "loss": 0.9548, - "step": 2808 - }, - { - "epoch": 0.2533255174279659, - "grad_norm": 1.7663711051700912, - "learning_rate": 3.499313874134504e-06, - "loss": 1.0545, - "step": 2809 - }, - { - "epoch": 0.2534157009514362, - "grad_norm": 2.3987693929758005, - "learning_rate": 3.498927164028875e-06, - "loss": 0.8717, - "step": 2810 - }, - { - "epoch": 0.25350588447490646, - "grad_norm": 1.147730811016627, - "learning_rate": 3.498540326026515e-06, - "loss": 1.0417, - "step": 2811 - }, - { - "epoch": 0.2535960679983767, - "grad_norm": 1.2432203497358931, - "learning_rate": 3.4981533601604323e-06, - "loss": 1.0713, - "step": 2812 - }, - { - "epoch": 0.253686251521847, - "grad_norm": 1.623171951854601, - "learning_rate": 3.4977662664636443e-06, - "loss": 1.0139, - "step": 2813 - }, - { - "epoch": 0.2537764350453172, - "grad_norm": 1.6639682223696919, - "learning_rate": 3.497379044969179e-06, - "loss": 0.9674, - "step": 2814 - }, - { - "epoch": 0.2538666185687875, - "grad_norm": 1.5033875995379835, - "learning_rate": 3.4969916957100777e-06, - "loss": 0.9938, - "step": 2815 - }, - { - "epoch": 0.25395680209225774, - "grad_norm": 1.421666056613113, - "learning_rate": 3.4966042187193905e-06, - "loss": 0.9968, - "step": 2816 - }, - { - "epoch": 0.254046985615728, - "grad_norm": 1.3242685764357225, - "learning_rate": 3.496216614030179e-06, - "loss": 1.0087, - "step": 2817 - }, - { - "epoch": 0.25413716913919826, - "grad_norm": 1.7372796419771057, - "learning_rate": 3.495828881675516e-06, - "loss": 0.9114, - "step": 2818 - }, - { - "epoch": 0.25422735266266855, - "grad_norm": 1.7551221816958111, - "learning_rate": 3.4954410216884845e-06, - "loss": 1.0831, - "step": 2819 - }, - { - "epoch": 0.2543175361861388, - "grad_norm": 1.6274242657738234, - "learning_rate": 3.49505303410218e-06, - "loss": 1.0167, - "step": 2820 - }, - { - "epoch": 0.25440771970960907, - "grad_norm": 1.2505779360828433, - "learning_rate": 3.4946649189497067e-06, - "loss": 1.0372, - "step": 2821 - }, - { - "epoch": 0.2544979032330793, - "grad_norm": 3.492140528557149, - "learning_rate": 3.4942766762641805e-06, - "loss": 0.9867, - "step": 2822 - }, - { - "epoch": 0.2545880867565496, - "grad_norm": 1.6704790936885625, - "learning_rate": 3.49388830607873e-06, - "loss": 1.0267, - "step": 2823 - }, - { - "epoch": 0.2546782702800198, - "grad_norm": 1.2170147892030634, - "learning_rate": 3.493499808426491e-06, - "loss": 0.9723, - "step": 2824 - }, - { - "epoch": 0.2547684538034901, - "grad_norm": 1.4744881081138763, - "learning_rate": 3.493111183340614e-06, - "loss": 1.0535, - "step": 2825 - }, - { - "epoch": 0.25485863732696035, - "grad_norm": 1.485312557272626, - "learning_rate": 3.4927224308542576e-06, - "loss": 0.9077, - "step": 2826 - }, - { - "epoch": 0.25494882085043064, - "grad_norm": 1.2485891486388523, - "learning_rate": 3.4923335510005923e-06, - "loss": 1.0838, - "step": 2827 - }, - { - "epoch": 0.25503900437390087, - "grad_norm": 4.482896727298874, - "learning_rate": 3.4919445438128e-06, - "loss": 1.027, - "step": 2828 - }, - { - "epoch": 0.25512918789737116, - "grad_norm": 1.5293222866163798, - "learning_rate": 3.491555409324073e-06, - "loss": 1.0836, - "step": 2829 - }, - { - "epoch": 0.2552193714208414, - "grad_norm": 0.6891700012014118, - "learning_rate": 3.4911661475676136e-06, - "loss": 0.8248, - "step": 2830 - }, - { - "epoch": 0.2553095549443117, - "grad_norm": 1.4171271230333975, - "learning_rate": 3.490776758576637e-06, - "loss": 0.9808, - "step": 2831 - }, - { - "epoch": 0.2553997384677819, - "grad_norm": 4.260809447347587, - "learning_rate": 3.4903872423843668e-06, - "loss": 1.0551, - "step": 2832 - }, - { - "epoch": 0.2554899219912522, - "grad_norm": 1.8262714216058207, - "learning_rate": 3.4899975990240396e-06, - "loss": 0.968, - "step": 2833 - }, - { - "epoch": 0.25558010551472243, - "grad_norm": 1.9295832802740869, - "learning_rate": 3.489607828528901e-06, - "loss": 1.0254, - "step": 2834 - }, - { - "epoch": 0.2556702890381927, - "grad_norm": 0.655897204618492, - "learning_rate": 3.4892179309322093e-06, - "loss": 0.9186, - "step": 2835 - }, - { - "epoch": 0.255760472561663, - "grad_norm": 1.4347497089104524, - "learning_rate": 3.488827906267232e-06, - "loss": 0.9202, - "step": 2836 - }, - { - "epoch": 0.25585065608513324, - "grad_norm": 1.6360714162614254, - "learning_rate": 3.4884377545672485e-06, - "loss": 1.0439, - "step": 2837 - }, - { - "epoch": 0.25594083960860353, - "grad_norm": 1.413408876542929, - "learning_rate": 3.4880474758655485e-06, - "loss": 0.984, - "step": 2838 - }, - { - "epoch": 0.25603102313207377, - "grad_norm": 1.5759842204306058, - "learning_rate": 3.487657070195433e-06, - "loss": 1.0638, - "step": 2839 - }, - { - "epoch": 0.25612120665554405, - "grad_norm": 1.7217832675875422, - "learning_rate": 3.487266537590213e-06, - "loss": 1.0805, - "step": 2840 - }, - { - "epoch": 0.2562113901790143, - "grad_norm": 1.639101247265846, - "learning_rate": 3.4868758780832116e-06, - "loss": 0.9228, - "step": 2841 - }, - { - "epoch": 0.2563015737024846, - "grad_norm": 1.5616469534189867, - "learning_rate": 3.486485091707762e-06, - "loss": 1.0111, - "step": 2842 - }, - { - "epoch": 0.2563917572259548, - "grad_norm": 1.6406114123553779, - "learning_rate": 3.4860941784972077e-06, - "loss": 0.8798, - "step": 2843 - }, - { - "epoch": 0.2564819407494251, - "grad_norm": 1.712837453297439, - "learning_rate": 3.485703138484904e-06, - "loss": 1.1021, - "step": 2844 - }, - { - "epoch": 0.25657212427289533, - "grad_norm": 1.9744026536203416, - "learning_rate": 3.485311971704216e-06, - "loss": 1.0508, - "step": 2845 - }, - { - "epoch": 0.2566623077963656, - "grad_norm": 1.4122930594572132, - "learning_rate": 3.484920678188521e-06, - "loss": 1.0294, - "step": 2846 - }, - { - "epoch": 0.25675249131983585, - "grad_norm": 2.412905023601962, - "learning_rate": 3.4845292579712063e-06, - "loss": 1.0206, - "step": 2847 - }, - { - "epoch": 0.25684267484330614, - "grad_norm": 2.0650875159831443, - "learning_rate": 3.484137711085669e-06, - "loss": 0.8875, - "step": 2848 - }, - { - "epoch": 0.2569328583667764, - "grad_norm": 1.2509594096452843, - "learning_rate": 3.4837460375653198e-06, - "loss": 0.9976, - "step": 2849 - }, - { - "epoch": 0.25702304189024666, - "grad_norm": 1.2496762333233515, - "learning_rate": 3.483354237443576e-06, - "loss": 1.0374, - "step": 2850 - }, - { - "epoch": 0.2571132254137169, - "grad_norm": 7.281174557012591, - "learning_rate": 3.48296231075387e-06, - "loss": 0.9499, - "step": 2851 - }, - { - "epoch": 0.2572034089371872, - "grad_norm": 1.3123189256284316, - "learning_rate": 3.4825702575296433e-06, - "loss": 1.0191, - "step": 2852 - }, - { - "epoch": 0.2572935924606574, - "grad_norm": 0.7537128813225007, - "learning_rate": 3.482178077804347e-06, - "loss": 0.8728, - "step": 2853 - }, - { - "epoch": 0.2573837759841277, - "grad_norm": 2.493435155740973, - "learning_rate": 3.4817857716114443e-06, - "loss": 0.9678, - "step": 2854 - }, - { - "epoch": 0.25747395950759794, - "grad_norm": 1.5226129109775126, - "learning_rate": 3.4813933389844094e-06, - "loss": 0.9587, - "step": 2855 - }, - { - "epoch": 0.2575641430310682, - "grad_norm": 1.4990169959908424, - "learning_rate": 3.4810007799567264e-06, - "loss": 1.0083, - "step": 2856 - }, - { - "epoch": 0.25765432655453846, - "grad_norm": 1.7345296429948893, - "learning_rate": 3.480608094561891e-06, - "loss": 1.0271, - "step": 2857 - }, - { - "epoch": 0.25774451007800875, - "grad_norm": 2.272378681499047, - "learning_rate": 3.4802152828334083e-06, - "loss": 1.0298, - "step": 2858 - }, - { - "epoch": 0.25783469360147904, - "grad_norm": 1.4331730945234658, - "learning_rate": 3.479822344804796e-06, - "loss": 0.9491, - "step": 2859 - }, - { - "epoch": 0.25792487712494927, - "grad_norm": 1.538515410030278, - "learning_rate": 3.479429280509582e-06, - "loss": 1.057, - "step": 2860 - }, - { - "epoch": 0.25801506064841956, - "grad_norm": 1.4488852392207896, - "learning_rate": 3.4790360899813038e-06, - "loss": 1.0484, - "step": 2861 - }, - { - "epoch": 0.2581052441718898, - "grad_norm": 1.4575942846198728, - "learning_rate": 3.4786427732535115e-06, - "loss": 0.9349, - "step": 2862 - }, - { - "epoch": 0.2581954276953601, - "grad_norm": 2.375177878695475, - "learning_rate": 3.478249330359764e-06, - "loss": 1.0473, - "step": 2863 - }, - { - "epoch": 0.2582856112188303, - "grad_norm": 1.4207927231937365, - "learning_rate": 3.4778557613336333e-06, - "loss": 0.9366, - "step": 2864 - }, - { - "epoch": 0.2583757947423006, - "grad_norm": 1.6234940373082418, - "learning_rate": 3.4774620662087004e-06, - "loss": 1.0192, - "step": 2865 - }, - { - "epoch": 0.25846597826577083, - "grad_norm": 2.1154377030333555, - "learning_rate": 3.477068245018557e-06, - "loss": 1.004, - "step": 2866 - }, - { - "epoch": 0.2585561617892411, - "grad_norm": 1.6450833007916588, - "learning_rate": 3.476674297796807e-06, - "loss": 1.0112, - "step": 2867 - }, - { - "epoch": 0.25864634531271136, - "grad_norm": 1.7656378956551675, - "learning_rate": 3.4762802245770627e-06, - "loss": 1.048, - "step": 2868 - }, - { - "epoch": 0.25873652883618165, - "grad_norm": 1.320779210053749, - "learning_rate": 3.4758860253929497e-06, - "loss": 0.958, - "step": 2869 - }, - { - "epoch": 0.2588267123596519, - "grad_norm": 1.7347259853672774, - "learning_rate": 3.4754917002781038e-06, - "loss": 1.0214, - "step": 2870 - }, - { - "epoch": 0.25891689588312217, - "grad_norm": 2.0477516684773946, - "learning_rate": 3.475097249266169e-06, - "loss": 1.0467, - "step": 2871 - }, - { - "epoch": 0.2590070794065924, - "grad_norm": 1.4575265651002807, - "learning_rate": 3.4747026723908044e-06, - "loss": 0.9915, - "step": 2872 - }, - { - "epoch": 0.2590972629300627, - "grad_norm": 1.7340862961781267, - "learning_rate": 3.474307969685676e-06, - "loss": 0.9844, - "step": 2873 - }, - { - "epoch": 0.2591874464535329, - "grad_norm": 1.3229946904439915, - "learning_rate": 3.473913141184462e-06, - "loss": 0.941, - "step": 2874 - }, - { - "epoch": 0.2592776299770032, - "grad_norm": 1.3642558901315196, - "learning_rate": 3.4735181869208523e-06, - "loss": 0.8719, - "step": 2875 - }, - { - "epoch": 0.25936781350047344, - "grad_norm": 4.015604100748145, - "learning_rate": 3.473123106928546e-06, - "loss": 0.8841, - "step": 2876 - }, - { - "epoch": 0.25945799702394373, - "grad_norm": 1.4018471351197783, - "learning_rate": 3.4727279012412533e-06, - "loss": 0.9594, - "step": 2877 - }, - { - "epoch": 0.25954818054741396, - "grad_norm": 0.6963874779444199, - "learning_rate": 3.4723325698926953e-06, - "loss": 0.8434, - "step": 2878 - }, - { - "epoch": 0.25963836407088425, - "grad_norm": 1.712733123393246, - "learning_rate": 3.4719371129166045e-06, - "loss": 1.0141, - "step": 2879 - }, - { - "epoch": 0.2597285475943545, - "grad_norm": 1.5240309121959223, - "learning_rate": 3.471541530346723e-06, - "loss": 0.927, - "step": 2880 - }, - { - "epoch": 0.2598187311178248, - "grad_norm": 2.069908361756171, - "learning_rate": 3.4711458222168037e-06, - "loss": 1.0701, - "step": 2881 - }, - { - "epoch": 0.259908914641295, - "grad_norm": 1.6446249747128798, - "learning_rate": 3.4707499885606114e-06, - "loss": 1.0431, - "step": 2882 - }, - { - "epoch": 0.2599990981647653, - "grad_norm": 1.4705970188888327, - "learning_rate": 3.4703540294119204e-06, - "loss": 1.0386, - "step": 2883 - }, - { - "epoch": 0.2600892816882356, - "grad_norm": 2.1264689361147475, - "learning_rate": 3.4699579448045163e-06, - "loss": 1.0498, - "step": 2884 - }, - { - "epoch": 0.2601794652117058, - "grad_norm": 1.9662989542033087, - "learning_rate": 3.4695617347721947e-06, - "loss": 0.8947, - "step": 2885 - }, - { - "epoch": 0.2602696487351761, - "grad_norm": 1.6614018333976919, - "learning_rate": 3.469165399348763e-06, - "loss": 0.9691, - "step": 2886 - }, - { - "epoch": 0.26035983225864634, - "grad_norm": 1.7676564236154186, - "learning_rate": 3.4687689385680384e-06, - "loss": 0.9834, - "step": 2887 - }, - { - "epoch": 0.26045001578211663, - "grad_norm": 1.2801451687896002, - "learning_rate": 3.4683723524638494e-06, - "loss": 0.981, - "step": 2888 - }, - { - "epoch": 0.26054019930558686, - "grad_norm": 1.4565050744892514, - "learning_rate": 3.4679756410700354e-06, - "loss": 1.0677, - "step": 2889 - }, - { - "epoch": 0.26063038282905715, - "grad_norm": 4.249093127265488, - "learning_rate": 3.4675788044204445e-06, - "loss": 0.9972, - "step": 2890 - }, - { - "epoch": 0.2607205663525274, - "grad_norm": 2.194217938457845, - "learning_rate": 3.467181842548938e-06, - "loss": 0.9392, - "step": 2891 - }, - { - "epoch": 0.26081074987599767, - "grad_norm": 1.9221912527008957, - "learning_rate": 3.466784755489387e-06, - "loss": 0.9972, - "step": 2892 - }, - { - "epoch": 0.2609009333994679, - "grad_norm": 1.4541691136089094, - "learning_rate": 3.4663875432756726e-06, - "loss": 1.118, - "step": 2893 - }, - { - "epoch": 0.2609911169229382, - "grad_norm": 1.5643462145752092, - "learning_rate": 3.465990205941687e-06, - "loss": 1.172, - "step": 2894 - }, - { - "epoch": 0.2610813004464084, - "grad_norm": 1.4439889156941133, - "learning_rate": 3.465592743521335e-06, - "loss": 0.9887, - "step": 2895 - }, - { - "epoch": 0.2611714839698787, - "grad_norm": 1.6960945259419287, - "learning_rate": 3.465195156048528e-06, - "loss": 1.0563, - "step": 2896 - }, - { - "epoch": 0.26126166749334895, - "grad_norm": 0.5677827092251221, - "learning_rate": 3.464797443557191e-06, - "loss": 0.8465, - "step": 2897 - }, - { - "epoch": 0.26135185101681924, - "grad_norm": 1.7379056031740685, - "learning_rate": 3.46439960608126e-06, - "loss": 0.9788, - "step": 2898 - }, - { - "epoch": 0.26144203454028947, - "grad_norm": 3.368766962974744, - "learning_rate": 3.4640016436546797e-06, - "loss": 0.9481, - "step": 2899 - }, - { - "epoch": 0.26153221806375976, - "grad_norm": 1.5934148697892916, - "learning_rate": 3.4636035563114065e-06, - "loss": 0.9843, - "step": 2900 - }, - { - "epoch": 0.26162240158723, - "grad_norm": 1.5801824622487155, - "learning_rate": 3.4632053440854085e-06, - "loss": 0.9384, - "step": 2901 - }, - { - "epoch": 0.2617125851107003, - "grad_norm": 1.4789182248363748, - "learning_rate": 3.462807007010662e-06, - "loss": 1.0549, - "step": 2902 - }, - { - "epoch": 0.2618027686341705, - "grad_norm": 1.78438867139506, - "learning_rate": 3.462408545121155e-06, - "loss": 1.0413, - "step": 2903 - }, - { - "epoch": 0.2618929521576408, - "grad_norm": 2.4229886355635406, - "learning_rate": 3.4620099584508883e-06, - "loss": 1.0588, - "step": 2904 - }, - { - "epoch": 0.26198313568111103, - "grad_norm": 1.65838291375246, - "learning_rate": 3.46161124703387e-06, - "loss": 0.9635, - "step": 2905 - }, - { - "epoch": 0.2620733192045813, - "grad_norm": 1.6502512856073779, - "learning_rate": 3.461212410904122e-06, - "loss": 0.9968, - "step": 2906 - }, - { - "epoch": 0.2621635027280516, - "grad_norm": 1.5498267630703682, - "learning_rate": 3.4608134500956726e-06, - "loss": 1.0574, - "step": 2907 - }, - { - "epoch": 0.26225368625152184, - "grad_norm": 1.3469737660601124, - "learning_rate": 3.4604143646425655e-06, - "loss": 0.9892, - "step": 2908 - }, - { - "epoch": 0.26234386977499213, - "grad_norm": 1.2537145735507718, - "learning_rate": 3.460015154578852e-06, - "loss": 1.0783, - "step": 2909 - }, - { - "epoch": 0.26243405329846237, - "grad_norm": 1.4843976470825824, - "learning_rate": 3.459615819938595e-06, - "loss": 0.9756, - "step": 2910 - }, - { - "epoch": 0.26252423682193265, - "grad_norm": 3.216897672221627, - "learning_rate": 3.4592163607558684e-06, - "loss": 0.9678, - "step": 2911 - }, - { - "epoch": 0.2626144203454029, - "grad_norm": 1.3097758405669877, - "learning_rate": 3.4588167770647553e-06, - "loss": 1.003, - "step": 2912 - }, - { - "epoch": 0.2627046038688732, - "grad_norm": 2.1633393218903416, - "learning_rate": 3.458417068899351e-06, - "loss": 1.1028, - "step": 2913 - }, - { - "epoch": 0.2627947873923434, - "grad_norm": 1.43188074864233, - "learning_rate": 3.4580172362937612e-06, - "loss": 0.9781, - "step": 2914 - }, - { - "epoch": 0.2628849709158137, - "grad_norm": 1.14273701402575, - "learning_rate": 3.457617279282101e-06, - "loss": 0.967, - "step": 2915 - }, - { - "epoch": 0.26297515443928393, - "grad_norm": 2.287502030335239, - "learning_rate": 3.4572171978984975e-06, - "loss": 0.9758, - "step": 2916 - }, - { - "epoch": 0.2630653379627542, - "grad_norm": 4.476891417344776, - "learning_rate": 3.456816992177088e-06, - "loss": 1.0045, - "step": 2917 - }, - { - "epoch": 0.26315552148622445, - "grad_norm": 1.5725301389732174, - "learning_rate": 3.4564166621520193e-06, - "loss": 0.9775, - "step": 2918 - }, - { - "epoch": 0.26324570500969474, - "grad_norm": 1.535604629847271, - "learning_rate": 3.4560162078574507e-06, - "loss": 1.0021, - "step": 2919 - }, - { - "epoch": 0.263335888533165, - "grad_norm": 1.512391958239831, - "learning_rate": 3.455615629327551e-06, - "loss": 1.0103, - "step": 2920 - }, - { - "epoch": 0.26342607205663526, - "grad_norm": 1.7198360133176849, - "learning_rate": 3.4552149265964994e-06, - "loss": 0.9796, - "step": 2921 - }, - { - "epoch": 0.2635162555801055, - "grad_norm": 1.6731381502087141, - "learning_rate": 3.4548140996984866e-06, - "loss": 0.9543, - "step": 2922 - }, - { - "epoch": 0.2636064391035758, - "grad_norm": 1.35688017870083, - "learning_rate": 3.4544131486677124e-06, - "loss": 1.0257, - "step": 2923 - }, - { - "epoch": 0.263696622627046, - "grad_norm": 2.000118728928253, - "learning_rate": 3.454012073538389e-06, - "loss": 0.9452, - "step": 2924 - }, - { - "epoch": 0.2637868061505163, - "grad_norm": 1.2810500849626452, - "learning_rate": 3.453610874344738e-06, - "loss": 1.0053, - "step": 2925 - }, - { - "epoch": 0.26387698967398654, - "grad_norm": 1.3474066461700147, - "learning_rate": 3.453209551120993e-06, - "loss": 1.0434, - "step": 2926 - }, - { - "epoch": 0.26396717319745683, - "grad_norm": 2.63177515076485, - "learning_rate": 3.452808103901395e-06, - "loss": 0.977, - "step": 2927 - }, - { - "epoch": 0.26405735672092706, - "grad_norm": 1.3782391541966088, - "learning_rate": 3.4524065327202e-06, - "loss": 1.0728, - "step": 2928 - }, - { - "epoch": 0.26414754024439735, - "grad_norm": 0.6612255082164789, - "learning_rate": 3.4520048376116702e-06, - "loss": 0.7945, - "step": 2929 - }, - { - "epoch": 0.26423772376786764, - "grad_norm": 1.4556895769862848, - "learning_rate": 3.4516030186100817e-06, - "loss": 1.0431, - "step": 2930 - }, - { - "epoch": 0.26432790729133787, - "grad_norm": 1.302734603767432, - "learning_rate": 3.4512010757497197e-06, - "loss": 0.9188, - "step": 2931 - }, - { - "epoch": 0.26441809081480816, - "grad_norm": 2.768007276128566, - "learning_rate": 3.4507990090648804e-06, - "loss": 0.9188, - "step": 2932 - }, - { - "epoch": 0.2645082743382784, - "grad_norm": 1.913127187972514, - "learning_rate": 3.4503968185898696e-06, - "loss": 1.0264, - "step": 2933 - }, - { - "epoch": 0.2645984578617487, - "grad_norm": 1.43552287594204, - "learning_rate": 3.4499945043590047e-06, - "loss": 1.0109, - "step": 2934 - }, - { - "epoch": 0.2646886413852189, - "grad_norm": 1.5582064575251482, - "learning_rate": 3.4495920664066137e-06, - "loss": 1.0204, - "step": 2935 - }, - { - "epoch": 0.2647788249086892, - "grad_norm": 1.976507073028593, - "learning_rate": 3.449189504767035e-06, - "loss": 0.9992, - "step": 2936 - }, - { - "epoch": 0.26486900843215944, - "grad_norm": 2.0508756488690443, - "learning_rate": 3.4487868194746163e-06, - "loss": 1.018, - "step": 2937 - }, - { - "epoch": 0.2649591919556297, - "grad_norm": 1.5717756751893908, - "learning_rate": 3.4483840105637175e-06, - "loss": 1.0438, - "step": 2938 - }, - { - "epoch": 0.26504937547909996, - "grad_norm": 2.4883811846776145, - "learning_rate": 3.4479810780687097e-06, - "loss": 0.9946, - "step": 2939 - }, - { - "epoch": 0.26513955900257025, - "grad_norm": 1.528118824702542, - "learning_rate": 3.4475780220239714e-06, - "loss": 0.9961, - "step": 2940 - }, - { - "epoch": 0.2652297425260405, - "grad_norm": 1.2736540066202091, - "learning_rate": 3.4471748424638948e-06, - "loss": 0.9879, - "step": 2941 - }, - { - "epoch": 0.26531992604951077, - "grad_norm": 0.6077932222344291, - "learning_rate": 3.4467715394228803e-06, - "loss": 0.8402, - "step": 2942 - }, - { - "epoch": 0.265410109572981, - "grad_norm": 1.271446121976416, - "learning_rate": 3.4463681129353413e-06, - "loss": 1.0076, - "step": 2943 - }, - { - "epoch": 0.2655002930964513, - "grad_norm": 1.521850228548639, - "learning_rate": 3.4459645630357e-06, - "loss": 1.0028, - "step": 2944 - }, - { - "epoch": 0.2655904766199215, - "grad_norm": 1.4742318788074233, - "learning_rate": 3.4455608897583884e-06, - "loss": 1.0432, - "step": 2945 - }, - { - "epoch": 0.2656806601433918, - "grad_norm": 1.6040522134088013, - "learning_rate": 3.4451570931378514e-06, - "loss": 1.0021, - "step": 2946 - }, - { - "epoch": 0.26577084366686204, - "grad_norm": 1.3175031127748145, - "learning_rate": 3.444753173208543e-06, - "loss": 1.0136, - "step": 2947 - }, - { - "epoch": 0.26586102719033233, - "grad_norm": 1.5141151222241254, - "learning_rate": 3.444349130004927e-06, - "loss": 0.9986, - "step": 2948 - }, - { - "epoch": 0.26595121071380257, - "grad_norm": 3.6236627677321254, - "learning_rate": 3.4439449635614794e-06, - "loss": 0.9268, - "step": 2949 - }, - { - "epoch": 0.26604139423727285, - "grad_norm": 1.454593859484848, - "learning_rate": 3.4435406739126854e-06, - "loss": 1.032, - "step": 2950 - }, - { - "epoch": 0.2661315777607431, - "grad_norm": 1.9728421869556516, - "learning_rate": 3.443136261093042e-06, - "loss": 0.8159, - "step": 2951 - }, - { - "epoch": 0.2662217612842134, - "grad_norm": 1.5087680457172958, - "learning_rate": 3.4427317251370553e-06, - "loss": 0.9548, - "step": 2952 - }, - { - "epoch": 0.2663119448076836, - "grad_norm": 1.9521641923842112, - "learning_rate": 3.4423270660792422e-06, - "loss": 0.9226, - "step": 2953 - }, - { - "epoch": 0.2664021283311539, - "grad_norm": 2.1069963395024, - "learning_rate": 3.4419222839541314e-06, - "loss": 0.8972, - "step": 2954 - }, - { - "epoch": 0.2664923118546242, - "grad_norm": 1.4186457759536824, - "learning_rate": 3.4415173787962607e-06, - "loss": 0.9776, - "step": 2955 - }, - { - "epoch": 0.2665824953780944, - "grad_norm": 1.9993728608110723, - "learning_rate": 3.4411123506401783e-06, - "loss": 0.8674, - "step": 2956 - }, - { - "epoch": 0.2666726789015647, - "grad_norm": 1.5932442105611726, - "learning_rate": 3.440707199520444e-06, - "loss": 0.8612, - "step": 2957 - }, - { - "epoch": 0.26676286242503494, - "grad_norm": 1.4847213341084127, - "learning_rate": 3.440301925471628e-06, - "loss": 0.9654, - "step": 2958 - }, - { - "epoch": 0.26685304594850523, - "grad_norm": 1.686326360535908, - "learning_rate": 3.43989652852831e-06, - "loss": 1.0358, - "step": 2959 - }, - { - "epoch": 0.26694322947197546, - "grad_norm": 1.4212195593963455, - "learning_rate": 3.4394910087250804e-06, - "loss": 1.0619, - "step": 2960 - }, - { - "epoch": 0.26703341299544575, - "grad_norm": 1.2555738631792008, - "learning_rate": 3.4390853660965405e-06, - "loss": 1.0169, - "step": 2961 - }, - { - "epoch": 0.267123596518916, - "grad_norm": 1.485197702598901, - "learning_rate": 3.438679600677302e-06, - "loss": 1.0691, - "step": 2962 - }, - { - "epoch": 0.2672137800423863, - "grad_norm": 1.261242047181779, - "learning_rate": 3.4382737125019874e-06, - "loss": 0.9605, - "step": 2963 - }, - { - "epoch": 0.2673039635658565, - "grad_norm": 1.3178925316371848, - "learning_rate": 3.4378677016052294e-06, - "loss": 0.9713, - "step": 2964 - }, - { - "epoch": 0.2673941470893268, - "grad_norm": 2.0553042570357456, - "learning_rate": 3.43746156802167e-06, - "loss": 1.0183, - "step": 2965 - }, - { - "epoch": 0.267484330612797, - "grad_norm": 1.4171525271641732, - "learning_rate": 3.4370553117859643e-06, - "loss": 0.9375, - "step": 2966 - }, - { - "epoch": 0.2675745141362673, - "grad_norm": 1.378825890466156, - "learning_rate": 3.4366489329327754e-06, - "loss": 1.0324, - "step": 2967 - }, - { - "epoch": 0.26766469765973755, - "grad_norm": 2.318082309225924, - "learning_rate": 3.4362424314967777e-06, - "loss": 1.0197, - "step": 2968 - }, - { - "epoch": 0.26775488118320784, - "grad_norm": 1.594050659542534, - "learning_rate": 3.4358358075126567e-06, - "loss": 1.0118, - "step": 2969 - }, - { - "epoch": 0.26784506470667807, - "grad_norm": 1.785002920965802, - "learning_rate": 3.4354290610151077e-06, - "loss": 1.0248, - "step": 2970 - }, - { - "epoch": 0.26793524823014836, - "grad_norm": 1.2660095666671565, - "learning_rate": 3.4350221920388354e-06, - "loss": 1.0042, - "step": 2971 - }, - { - "epoch": 0.2680254317536186, - "grad_norm": 1.4088363599327651, - "learning_rate": 3.4346152006185574e-06, - "loss": 1.0968, - "step": 2972 - }, - { - "epoch": 0.2681156152770889, - "grad_norm": 1.9836559044023518, - "learning_rate": 3.4342080867890006e-06, - "loss": 1.0939, - "step": 2973 - }, - { - "epoch": 0.2682057988005591, - "grad_norm": 1.6652923480829847, - "learning_rate": 3.4338008505849016e-06, - "loss": 0.9207, - "step": 2974 - }, - { - "epoch": 0.2682959823240294, - "grad_norm": 1.3396302434005842, - "learning_rate": 3.433393492041008e-06, - "loss": 1.0093, - "step": 2975 - }, - { - "epoch": 0.26838616584749964, - "grad_norm": 1.307818647253978, - "learning_rate": 3.432986011192078e-06, - "loss": 1.0552, - "step": 2976 - }, - { - "epoch": 0.2684763493709699, - "grad_norm": 1.8633358225638061, - "learning_rate": 3.4325784080728796e-06, - "loss": 1.0659, - "step": 2977 - }, - { - "epoch": 0.2685665328944402, - "grad_norm": 1.4383036813224843, - "learning_rate": 3.4321706827181926e-06, - "loss": 1.0333, - "step": 2978 - }, - { - "epoch": 0.26865671641791045, - "grad_norm": 1.143681754489048, - "learning_rate": 3.4317628351628064e-06, - "loss": 0.9565, - "step": 2979 - }, - { - "epoch": 0.26874689994138073, - "grad_norm": 1.4168995591009421, - "learning_rate": 3.43135486544152e-06, - "loss": 0.9819, - "step": 2980 - }, - { - "epoch": 0.26883708346485097, - "grad_norm": 1.3209158066393465, - "learning_rate": 3.4309467735891442e-06, - "loss": 1.0404, - "step": 2981 - }, - { - "epoch": 0.26892726698832126, - "grad_norm": 1.551011921136471, - "learning_rate": 3.4305385596405e-06, - "loss": 1.0847, - "step": 2982 - }, - { - "epoch": 0.2690174505117915, - "grad_norm": 1.3590189149507257, - "learning_rate": 3.4301302236304174e-06, - "loss": 0.9693, - "step": 2983 - }, - { - "epoch": 0.2691076340352618, - "grad_norm": 0.6186367836841791, - "learning_rate": 3.429721765593739e-06, - "loss": 0.8042, - "step": 2984 - }, - { - "epoch": 0.269197817558732, - "grad_norm": 1.240582707066157, - "learning_rate": 3.4293131855653155e-06, - "loss": 1.0195, - "step": 2985 - }, - { - "epoch": 0.2692880010822023, - "grad_norm": 2.09821606204065, - "learning_rate": 3.4289044835800102e-06, - "loss": 0.9423, - "step": 2986 - }, - { - "epoch": 0.26937818460567253, - "grad_norm": 1.623746241564435, - "learning_rate": 3.4284956596726953e-06, - "loss": 1.0019, - "step": 2987 - }, - { - "epoch": 0.2694683681291428, - "grad_norm": 1.2518352387580156, - "learning_rate": 3.4280867138782544e-06, - "loss": 0.9936, - "step": 2988 - }, - { - "epoch": 0.26955855165261305, - "grad_norm": 1.9222172378127511, - "learning_rate": 3.4276776462315803e-06, - "loss": 0.9961, - "step": 2989 - }, - { - "epoch": 0.26964873517608334, - "grad_norm": 1.380785949567805, - "learning_rate": 3.427268456767578e-06, - "loss": 1.0053, - "step": 2990 - }, - { - "epoch": 0.2697389186995536, - "grad_norm": 1.4281769991366746, - "learning_rate": 3.42685914552116e-06, - "loss": 1.0446, - "step": 2991 - }, - { - "epoch": 0.26982910222302386, - "grad_norm": 1.325577975117556, - "learning_rate": 3.426449712527253e-06, - "loss": 0.9759, - "step": 2992 - }, - { - "epoch": 0.2699192857464941, - "grad_norm": 1.3449816714601432, - "learning_rate": 3.4260401578207904e-06, - "loss": 0.9147, - "step": 2993 - }, - { - "epoch": 0.2700094692699644, - "grad_norm": 4.629185612614291, - "learning_rate": 3.4256304814367185e-06, - "loss": 0.9769, - "step": 2994 - }, - { - "epoch": 0.2700996527934346, - "grad_norm": 1.7812789780786027, - "learning_rate": 3.4252206834099936e-06, - "loss": 0.9634, - "step": 2995 - }, - { - "epoch": 0.2701898363169049, - "grad_norm": 1.4337033429989339, - "learning_rate": 3.424810763775581e-06, - "loss": 1.0048, - "step": 2996 - }, - { - "epoch": 0.27028001984037514, - "grad_norm": 1.4977725497735304, - "learning_rate": 3.4244007225684587e-06, - "loss": 0.9396, - "step": 2997 - }, - { - "epoch": 0.27037020336384543, - "grad_norm": 1.3803911291988669, - "learning_rate": 3.4239905598236115e-06, - "loss": 1.0131, - "step": 2998 - }, - { - "epoch": 0.27046038688731566, - "grad_norm": 1.3562795099660496, - "learning_rate": 3.4235802755760386e-06, - "loss": 1.0303, - "step": 2999 - }, - { - "epoch": 0.27055057041078595, - "grad_norm": 0.8813108314999922, - "learning_rate": 3.4231698698607464e-06, - "loss": 0.9768, - "step": 3000 - }, - { - "epoch": 0.2706407539342562, - "grad_norm": 1.4403091845781502, - "learning_rate": 3.4227593427127543e-06, - "loss": 1.0561, - "step": 3001 - }, - { - "epoch": 0.2707309374577265, - "grad_norm": 1.7241242051400345, - "learning_rate": 3.42234869416709e-06, - "loss": 0.9748, - "step": 3002 - }, - { - "epoch": 0.27082112098119676, - "grad_norm": 2.0047060911367582, - "learning_rate": 3.421937924258792e-06, - "loss": 1.0085, - "step": 3003 - }, - { - "epoch": 0.270911304504667, - "grad_norm": 1.3667522500541034, - "learning_rate": 3.4215270330229096e-06, - "loss": 1.0404, - "step": 3004 - }, - { - "epoch": 0.2710014880281373, - "grad_norm": 1.5329081063197019, - "learning_rate": 3.421116020494503e-06, - "loss": 0.9695, - "step": 3005 - }, - { - "epoch": 0.2710916715516075, - "grad_norm": 1.2690768323594832, - "learning_rate": 3.420704886708642e-06, - "loss": 0.9518, - "step": 3006 - }, - { - "epoch": 0.2711818550750778, - "grad_norm": 2.755874428278785, - "learning_rate": 3.4202936317004056e-06, - "loss": 0.9557, - "step": 3007 - }, - { - "epoch": 0.27127203859854804, - "grad_norm": 1.3445487199143766, - "learning_rate": 3.4198822555048856e-06, - "loss": 1.0155, - "step": 3008 - }, - { - "epoch": 0.2713622221220183, - "grad_norm": 1.9866002739409458, - "learning_rate": 3.419470758157182e-06, - "loss": 1.0816, - "step": 3009 - }, - { - "epoch": 0.27145240564548856, - "grad_norm": 10.987033831501147, - "learning_rate": 3.4190591396924068e-06, - "loss": 0.9993, - "step": 3010 - }, - { - "epoch": 0.27154258916895885, - "grad_norm": 0.6376071652862962, - "learning_rate": 3.418647400145681e-06, - "loss": 0.8558, - "step": 3011 - }, - { - "epoch": 0.2716327726924291, - "grad_norm": 0.7386987303033381, - "learning_rate": 3.4182355395521367e-06, - "loss": 0.8379, - "step": 3012 - }, - { - "epoch": 0.27172295621589937, - "grad_norm": 1.325877543219221, - "learning_rate": 3.417823557946916e-06, - "loss": 1.0015, - "step": 3013 - }, - { - "epoch": 0.2718131397393696, - "grad_norm": 1.2853917888704949, - "learning_rate": 3.417411455365172e-06, - "loss": 0.8962, - "step": 3014 - }, - { - "epoch": 0.2719033232628399, - "grad_norm": 1.4320872860364589, - "learning_rate": 3.416999231842066e-06, - "loss": 0.9088, - "step": 3015 - }, - { - "epoch": 0.2719935067863101, - "grad_norm": 2.0199439329921116, - "learning_rate": 3.416586887412773e-06, - "loss": 1.0514, - "step": 3016 - }, - { - "epoch": 0.2720836903097804, - "grad_norm": 1.6190853316525615, - "learning_rate": 3.416174422112476e-06, - "loss": 0.9643, - "step": 3017 - }, - { - "epoch": 0.27217387383325065, - "grad_norm": 0.6260828412987993, - "learning_rate": 3.4157618359763687e-06, - "loss": 0.8045, - "step": 3018 - }, - { - "epoch": 0.27226405735672093, - "grad_norm": 1.3223033995423608, - "learning_rate": 3.4153491290396542e-06, - "loss": 0.9204, - "step": 3019 - }, - { - "epoch": 0.27235424088019117, - "grad_norm": 1.4863962165384008, - "learning_rate": 3.4149363013375485e-06, - "loss": 1.0167, - "step": 3020 - }, - { - "epoch": 0.27244442440366146, - "grad_norm": 0.6789880420154328, - "learning_rate": 3.414523352905276e-06, - "loss": 0.8192, - "step": 3021 - }, - { - "epoch": 0.2725346079271317, - "grad_norm": 1.6564207168982248, - "learning_rate": 3.414110283778071e-06, - "loss": 0.9711, - "step": 3022 - }, - { - "epoch": 0.272624791450602, - "grad_norm": 1.7906601614437447, - "learning_rate": 3.4136970939911797e-06, - "loss": 0.9487, - "step": 3023 - }, - { - "epoch": 0.2727149749740722, - "grad_norm": 1.4910442506452322, - "learning_rate": 3.413283783579857e-06, - "loss": 0.9726, - "step": 3024 - }, - { - "epoch": 0.2728051584975425, - "grad_norm": 6.552044486783275, - "learning_rate": 3.412870352579369e-06, - "loss": 0.9315, - "step": 3025 - }, - { - "epoch": 0.2728953420210128, - "grad_norm": 0.6407378957872477, - "learning_rate": 3.4124568010249915e-06, - "loss": 0.8455, - "step": 3026 - }, - { - "epoch": 0.272985525544483, - "grad_norm": 1.453021384205888, - "learning_rate": 3.4120431289520124e-06, - "loss": 0.9842, - "step": 3027 - }, - { - "epoch": 0.2730757090679533, - "grad_norm": 1.4116753508710913, - "learning_rate": 3.4116293363957276e-06, - "loss": 1.0138, - "step": 3028 - }, - { - "epoch": 0.27316589259142354, - "grad_norm": 1.7071801301817002, - "learning_rate": 3.4112154233914438e-06, - "loss": 1.0153, - "step": 3029 - }, - { - "epoch": 0.27325607611489383, - "grad_norm": 1.6601767864079071, - "learning_rate": 3.410801389974479e-06, - "loss": 0.9027, - "step": 3030 - }, - { - "epoch": 0.27334625963836406, - "grad_norm": 1.5900142493899039, - "learning_rate": 3.410387236180161e-06, - "loss": 1.0361, - "step": 3031 - }, - { - "epoch": 0.27343644316183435, - "grad_norm": 3.3021735691452863, - "learning_rate": 3.409972962043826e-06, - "loss": 0.8882, - "step": 3032 - }, - { - "epoch": 0.2735266266853046, - "grad_norm": 1.420994412566226, - "learning_rate": 3.4095585676008234e-06, - "loss": 1.0521, - "step": 3033 - }, - { - "epoch": 0.2736168102087749, - "grad_norm": 1.2161845079217872, - "learning_rate": 3.4091440528865125e-06, - "loss": 0.9995, - "step": 3034 - }, - { - "epoch": 0.2737069937322451, - "grad_norm": 1.5495795294904817, - "learning_rate": 3.4087294179362606e-06, - "loss": 1.0018, - "step": 3035 - }, - { - "epoch": 0.2737971772557154, - "grad_norm": 1.5695298555177122, - "learning_rate": 3.4083146627854474e-06, - "loss": 1.0458, - "step": 3036 - }, - { - "epoch": 0.27388736077918563, - "grad_norm": 1.914879075856381, - "learning_rate": 3.4078997874694614e-06, - "loss": 0.9203, - "step": 3037 - }, - { - "epoch": 0.2739775443026559, - "grad_norm": 2.266646372012369, - "learning_rate": 3.407484792023703e-06, - "loss": 0.8933, - "step": 3038 - }, - { - "epoch": 0.27406772782612615, - "grad_norm": 1.7040481865073789, - "learning_rate": 3.407069676483581e-06, - "loss": 0.979, - "step": 3039 - }, - { - "epoch": 0.27415791134959644, - "grad_norm": 1.8894502126731612, - "learning_rate": 3.406654440884516e-06, - "loss": 1.0378, - "step": 3040 - }, - { - "epoch": 0.2742480948730667, - "grad_norm": 1.549891141945099, - "learning_rate": 3.4062390852619372e-06, - "loss": 1.0276, - "step": 3041 - }, - { - "epoch": 0.27433827839653696, - "grad_norm": 1.8680646424228107, - "learning_rate": 3.4058236096512867e-06, - "loss": 1.0009, - "step": 3042 - }, - { - "epoch": 0.2744284619200072, - "grad_norm": 2.4125342608771394, - "learning_rate": 3.405408014088013e-06, - "loss": 0.9042, - "step": 3043 - }, - { - "epoch": 0.2745186454434775, - "grad_norm": 3.7507943901430942, - "learning_rate": 3.404992298607579e-06, - "loss": 0.8925, - "step": 3044 - }, - { - "epoch": 0.2746088289669477, - "grad_norm": 1.984254247640913, - "learning_rate": 3.4045764632454547e-06, - "loss": 0.9741, - "step": 3045 - }, - { - "epoch": 0.274699012490418, - "grad_norm": 1.3255256347968993, - "learning_rate": 3.4041605080371223e-06, - "loss": 0.9584, - "step": 3046 - }, - { - "epoch": 0.27478919601388824, - "grad_norm": 1.3082704514782564, - "learning_rate": 3.4037444330180726e-06, - "loss": 0.9996, - "step": 3047 - }, - { - "epoch": 0.2748793795373585, - "grad_norm": 1.7304424344165124, - "learning_rate": 3.403328238223808e-06, - "loss": 1.0155, - "step": 3048 - }, - { - "epoch": 0.27496956306082876, - "grad_norm": 1.6075945143849752, - "learning_rate": 3.4029119236898395e-06, - "loss": 1.0272, - "step": 3049 - }, - { - "epoch": 0.27505974658429905, - "grad_norm": 0.8160160355044181, - "learning_rate": 3.4024954894516906e-06, - "loss": 0.8934, - "step": 3050 - }, - { - "epoch": 0.27514993010776934, - "grad_norm": 1.669178222689888, - "learning_rate": 3.4020789355448933e-06, - "loss": 1.0015, - "step": 3051 - }, - { - "epoch": 0.27524011363123957, - "grad_norm": 3.2308651958787182, - "learning_rate": 3.40166226200499e-06, - "loss": 1.0673, - "step": 3052 - }, - { - "epoch": 0.27533029715470986, - "grad_norm": 1.6088332958932101, - "learning_rate": 3.401245468867534e-06, - "loss": 0.9777, - "step": 3053 - }, - { - "epoch": 0.2754204806781801, - "grad_norm": 0.710924965884208, - "learning_rate": 3.400828556168088e-06, - "loss": 0.9041, - "step": 3054 - }, - { - "epoch": 0.2755106642016504, - "grad_norm": 1.571998336533887, - "learning_rate": 3.4004115239422255e-06, - "loss": 1.009, - "step": 3055 - }, - { - "epoch": 0.2756008477251206, - "grad_norm": 1.9058170139540889, - "learning_rate": 3.3999943722255305e-06, - "loss": 0.9854, - "step": 3056 - }, - { - "epoch": 0.2756910312485909, - "grad_norm": 1.6055752238587249, - "learning_rate": 3.3995771010535955e-06, - "loss": 0.9029, - "step": 3057 - }, - { - "epoch": 0.27578121477206113, - "grad_norm": 0.7733292166693504, - "learning_rate": 3.3991597104620253e-06, - "loss": 0.826, - "step": 3058 - }, - { - "epoch": 0.2758713982955314, - "grad_norm": 1.4396685124696271, - "learning_rate": 3.398742200486434e-06, - "loss": 0.9775, - "step": 3059 - }, - { - "epoch": 0.27596158181900166, - "grad_norm": 1.7396105537645261, - "learning_rate": 3.3983245711624453e-06, - "loss": 1.0152, - "step": 3060 - }, - { - "epoch": 0.27605176534247194, - "grad_norm": 0.8664497811721719, - "learning_rate": 3.3979068225256946e-06, - "loss": 0.9319, - "step": 3061 - }, - { - "epoch": 0.2761419488659422, - "grad_norm": 1.7189526525149361, - "learning_rate": 3.3974889546118246e-06, - "loss": 0.9662, - "step": 3062 - }, - { - "epoch": 0.27623213238941247, - "grad_norm": 1.7910383882396808, - "learning_rate": 3.3970709674564918e-06, - "loss": 0.9445, - "step": 3063 - }, - { - "epoch": 0.2763223159128827, - "grad_norm": 1.294908526367614, - "learning_rate": 3.3966528610953607e-06, - "loss": 0.9906, - "step": 3064 - }, - { - "epoch": 0.276412499436353, - "grad_norm": 3.4347301073948344, - "learning_rate": 3.3962346355641067e-06, - "loss": 1.0009, - "step": 3065 - }, - { - "epoch": 0.2765026829598232, - "grad_norm": 1.5019103442226935, - "learning_rate": 3.3958162908984146e-06, - "loss": 1.0347, - "step": 3066 - }, - { - "epoch": 0.2765928664832935, - "grad_norm": 1.402342432388069, - "learning_rate": 3.39539782713398e-06, - "loss": 0.9867, - "step": 3067 - }, - { - "epoch": 0.27668305000676374, - "grad_norm": 1.7614593653240673, - "learning_rate": 3.394979244306509e-06, - "loss": 1.0208, - "step": 3068 - }, - { - "epoch": 0.27677323353023403, - "grad_norm": 1.5768326859574493, - "learning_rate": 3.3945605424517166e-06, - "loss": 1.0148, - "step": 3069 - }, - { - "epoch": 0.27686341705370426, - "grad_norm": 1.52759169495316, - "learning_rate": 3.3941417216053294e-06, - "loss": 0.991, - "step": 3070 - }, - { - "epoch": 0.27695360057717455, - "grad_norm": 1.566872620065325, - "learning_rate": 3.3937227818030835e-06, - "loss": 0.965, - "step": 3071 - }, - { - "epoch": 0.2770437841006448, - "grad_norm": 1.5555968572416607, - "learning_rate": 3.393303723080725e-06, - "loss": 0.9664, - "step": 3072 - }, - { - "epoch": 0.2771339676241151, - "grad_norm": 1.4168431882648291, - "learning_rate": 3.3928845454740097e-06, - "loss": 1.0033, - "step": 3073 - }, - { - "epoch": 0.27722415114758536, - "grad_norm": 1.625385385443895, - "learning_rate": 3.392465249018705e-06, - "loss": 0.981, - "step": 3074 - }, - { - "epoch": 0.2773143346710556, - "grad_norm": 1.3734155976477038, - "learning_rate": 3.3920458337505872e-06, - "loss": 0.985, - "step": 3075 - }, - { - "epoch": 0.2774045181945259, - "grad_norm": 1.9087314401459956, - "learning_rate": 3.391626299705443e-06, - "loss": 0.8397, - "step": 3076 - }, - { - "epoch": 0.2774947017179961, - "grad_norm": 1.434919113644186, - "learning_rate": 3.39120664691907e-06, - "loss": 1.0023, - "step": 3077 - }, - { - "epoch": 0.2775848852414664, - "grad_norm": 1.5112154810444192, - "learning_rate": 3.390786875427275e-06, - "loss": 0.9767, - "step": 3078 - }, - { - "epoch": 0.27767506876493664, - "grad_norm": 1.6177894098713135, - "learning_rate": 3.390366985265875e-06, - "loss": 0.9876, - "step": 3079 - }, - { - "epoch": 0.2777652522884069, - "grad_norm": 1.2537469970728015, - "learning_rate": 3.389946976470697e-06, - "loss": 0.9482, - "step": 3080 - }, - { - "epoch": 0.27785543581187716, - "grad_norm": 1.2646722857609018, - "learning_rate": 3.3895268490775787e-06, - "loss": 1.0843, - "step": 3081 - }, - { - "epoch": 0.27794561933534745, - "grad_norm": 1.3751027762409342, - "learning_rate": 3.3891066031223685e-06, - "loss": 0.9551, - "step": 3082 - }, - { - "epoch": 0.2780358028588177, - "grad_norm": 1.5459497940873186, - "learning_rate": 3.3886862386409237e-06, - "loss": 1.0238, - "step": 3083 - }, - { - "epoch": 0.27812598638228797, - "grad_norm": 1.5228851882179202, - "learning_rate": 3.388265755669111e-06, - "loss": 0.9558, - "step": 3084 - }, - { - "epoch": 0.2782161699057582, - "grad_norm": 1.4111119463476745, - "learning_rate": 3.3878451542428093e-06, - "loss": 1.0561, - "step": 3085 - }, - { - "epoch": 0.2783063534292285, - "grad_norm": 1.284322357955228, - "learning_rate": 3.387424434397907e-06, - "loss": 0.9805, - "step": 3086 - }, - { - "epoch": 0.2783965369526987, - "grad_norm": 1.5538618492814031, - "learning_rate": 3.3870035961703013e-06, - "loss": 0.9128, - "step": 3087 - }, - { - "epoch": 0.278486720476169, - "grad_norm": 1.7789564257126567, - "learning_rate": 3.3865826395959014e-06, - "loss": 1.1173, - "step": 3088 - }, - { - "epoch": 0.27857690399963925, - "grad_norm": 1.3489942459962148, - "learning_rate": 3.3861615647106253e-06, - "loss": 1.0821, - "step": 3089 - }, - { - "epoch": 0.27866708752310954, - "grad_norm": 1.6525497127794364, - "learning_rate": 3.3857403715504012e-06, - "loss": 1.0077, - "step": 3090 - }, - { - "epoch": 0.27875727104657977, - "grad_norm": 1.8561767512296397, - "learning_rate": 3.385319060151167e-06, - "loss": 0.9602, - "step": 3091 - }, - { - "epoch": 0.27884745457005006, - "grad_norm": 1.9140726517388633, - "learning_rate": 3.3848976305488728e-06, - "loss": 0.9875, - "step": 3092 - }, - { - "epoch": 0.2789376380935203, - "grad_norm": 1.74493929869435, - "learning_rate": 3.384476082779476e-06, - "loss": 1.0295, - "step": 3093 - }, - { - "epoch": 0.2790278216169906, - "grad_norm": 2.0724852478827565, - "learning_rate": 3.3840544168789463e-06, - "loss": 0.9851, - "step": 3094 - }, - { - "epoch": 0.2791180051404608, - "grad_norm": 1.5821514319464587, - "learning_rate": 3.3836326328832617e-06, - "loss": 1.1079, - "step": 3095 - }, - { - "epoch": 0.2792081886639311, - "grad_norm": 1.7833082788555354, - "learning_rate": 3.383210730828412e-06, - "loss": 0.9467, - "step": 3096 - }, - { - "epoch": 0.2792983721874014, - "grad_norm": 2.1186049105048315, - "learning_rate": 3.3827887107503953e-06, - "loss": 1.0564, - "step": 3097 - }, - { - "epoch": 0.2793885557108716, - "grad_norm": 2.3215672860254775, - "learning_rate": 3.3823665726852216e-06, - "loss": 0.9408, - "step": 3098 - }, - { - "epoch": 0.2794787392343419, - "grad_norm": 1.9121085645227789, - "learning_rate": 3.3819443166689095e-06, - "loss": 1.113, - "step": 3099 - }, - { - "epoch": 0.27956892275781214, - "grad_norm": 1.3012902807474438, - "learning_rate": 3.3815219427374886e-06, - "loss": 0.9809, - "step": 3100 - }, - { - "epoch": 0.27965910628128243, - "grad_norm": 1.3588436886125248, - "learning_rate": 3.3810994509269975e-06, - "loss": 1.0057, - "step": 3101 - }, - { - "epoch": 0.27974928980475267, - "grad_norm": 1.4900108559744223, - "learning_rate": 3.3806768412734864e-06, - "loss": 1.0296, - "step": 3102 - }, - { - "epoch": 0.27983947332822295, - "grad_norm": 1.261319407282025, - "learning_rate": 3.380254113813014e-06, - "loss": 1.0763, - "step": 3103 - }, - { - "epoch": 0.2799296568516932, - "grad_norm": 1.474609375, - "learning_rate": 3.3798312685816496e-06, - "loss": 1.0295, - "step": 3104 - }, - { - "epoch": 0.2800198403751635, - "grad_norm": 1.4587428290052762, - "learning_rate": 3.3794083056154738e-06, - "loss": 0.9908, - "step": 3105 - }, - { - "epoch": 0.2801100238986337, - "grad_norm": 5.588356529314362, - "learning_rate": 3.3789852249505746e-06, - "loss": 1.0026, - "step": 3106 - }, - { - "epoch": 0.280200207422104, - "grad_norm": 1.597647818470837, - "learning_rate": 3.378562026623053e-06, - "loss": 1.0561, - "step": 3107 - }, - { - "epoch": 0.28029039094557423, - "grad_norm": 1.9384565145767467, - "learning_rate": 3.3781387106690175e-06, - "loss": 1.0627, - "step": 3108 - }, - { - "epoch": 0.2803805744690445, - "grad_norm": 1.3145765045440045, - "learning_rate": 3.3777152771245885e-06, - "loss": 1.0237, - "step": 3109 - }, - { - "epoch": 0.28047075799251475, - "grad_norm": 1.5315729695919462, - "learning_rate": 3.377291726025895e-06, - "loss": 1.0284, - "step": 3110 - }, - { - "epoch": 0.28056094151598504, - "grad_norm": 1.6983252971684903, - "learning_rate": 3.3768680574090782e-06, - "loss": 1.1069, - "step": 3111 - }, - { - "epoch": 0.2806511250394553, - "grad_norm": 1.7780233907244463, - "learning_rate": 3.3764442713102857e-06, - "loss": 0.9757, - "step": 3112 - }, - { - "epoch": 0.28074130856292556, - "grad_norm": 1.3297037054165897, - "learning_rate": 3.3760203677656786e-06, - "loss": 0.9967, - "step": 3113 - }, - { - "epoch": 0.2808314920863958, - "grad_norm": 1.759332093279711, - "learning_rate": 3.3755963468114262e-06, - "loss": 1.0496, - "step": 3114 - }, - { - "epoch": 0.2809216756098661, - "grad_norm": 1.4094020274207661, - "learning_rate": 3.3751722084837095e-06, - "loss": 0.9079, - "step": 3115 - }, - { - "epoch": 0.2810118591333363, - "grad_norm": 2.456748764222551, - "learning_rate": 3.3747479528187166e-06, - "loss": 0.9698, - "step": 3116 - }, - { - "epoch": 0.2811020426568066, - "grad_norm": 1.8657668706731005, - "learning_rate": 3.3743235798526485e-06, - "loss": 1.0408, - "step": 3117 - }, - { - "epoch": 0.28119222618027684, - "grad_norm": 1.3807720064961266, - "learning_rate": 3.373899089621714e-06, - "loss": 0.9344, - "step": 3118 - }, - { - "epoch": 0.2812824097037471, - "grad_norm": 2.085632073262092, - "learning_rate": 3.373474482162134e-06, - "loss": 0.9807, - "step": 3119 - }, - { - "epoch": 0.28137259322721736, - "grad_norm": 1.6291814704215928, - "learning_rate": 3.3730497575101376e-06, - "loss": 1.0043, - "step": 3120 - }, - { - "epoch": 0.28146277675068765, - "grad_norm": 3.114127815813387, - "learning_rate": 3.3726249157019654e-06, - "loss": 0.9875, - "step": 3121 - }, - { - "epoch": 0.28155296027415794, - "grad_norm": 1.5083617954132977, - "learning_rate": 3.372199956773866e-06, - "loss": 1.0774, - "step": 3122 - }, - { - "epoch": 0.28164314379762817, - "grad_norm": 1.38168405297955, - "learning_rate": 3.371774880762101e-06, - "loss": 0.95, - "step": 3123 - }, - { - "epoch": 0.28173332732109846, - "grad_norm": 1.9357025668966383, - "learning_rate": 3.3713496877029392e-06, - "loss": 0.9779, - "step": 3124 - }, - { - "epoch": 0.2818235108445687, - "grad_norm": 1.4997712596690689, - "learning_rate": 3.37092437763266e-06, - "loss": 1.0608, - "step": 3125 - }, - { - "epoch": 0.281913694368039, - "grad_norm": 1.999822012610823, - "learning_rate": 3.3704989505875537e-06, - "loss": 1.0271, - "step": 3126 - }, - { - "epoch": 0.2820038778915092, - "grad_norm": 1.3617390213648646, - "learning_rate": 3.3700734066039205e-06, - "loss": 0.9694, - "step": 3127 - }, - { - "epoch": 0.2820940614149795, - "grad_norm": 1.5244480009534873, - "learning_rate": 3.36964774571807e-06, - "loss": 1.0272, - "step": 3128 - }, - { - "epoch": 0.28218424493844974, - "grad_norm": 2.0734460688214873, - "learning_rate": 3.3692219679663206e-06, - "loss": 0.9958, - "step": 3129 - }, - { - "epoch": 0.28227442846192, - "grad_norm": 1.798904997169882, - "learning_rate": 3.3687960733850043e-06, - "loss": 1.0092, - "step": 3130 - }, - { - "epoch": 0.28236461198539026, - "grad_norm": 1.6434281879051695, - "learning_rate": 3.3683700620104586e-06, - "loss": 0.9411, - "step": 3131 - }, - { - "epoch": 0.28245479550886055, - "grad_norm": 1.7248097494502708, - "learning_rate": 3.3679439338790347e-06, - "loss": 1.0185, - "step": 3132 - }, - { - "epoch": 0.2825449790323308, - "grad_norm": 1.5286827364555349, - "learning_rate": 3.3675176890270916e-06, - "loss": 1.1203, - "step": 3133 - }, - { - "epoch": 0.28263516255580107, - "grad_norm": 1.8350108953459126, - "learning_rate": 3.367091327490998e-06, - "loss": 1.02, - "step": 3134 - }, - { - "epoch": 0.2827253460792713, - "grad_norm": 1.808416831671491, - "learning_rate": 3.3666648493071347e-06, - "loss": 0.9808, - "step": 3135 - }, - { - "epoch": 0.2828155296027416, - "grad_norm": 1.477878125726659, - "learning_rate": 3.3662382545118914e-06, - "loss": 0.9809, - "step": 3136 - }, - { - "epoch": 0.2829057131262118, - "grad_norm": 1.6275665848525713, - "learning_rate": 3.3658115431416663e-06, - "loss": 1.0804, - "step": 3137 - }, - { - "epoch": 0.2829958966496821, - "grad_norm": 0.6886368368824163, - "learning_rate": 3.36538471523287e-06, - "loss": 0.8814, - "step": 3138 - }, - { - "epoch": 0.28308608017315234, - "grad_norm": 1.4949479060202993, - "learning_rate": 3.3649577708219204e-06, - "loss": 1.0568, - "step": 3139 - }, - { - "epoch": 0.28317626369662263, - "grad_norm": 2.448493911103056, - "learning_rate": 3.3645307099452477e-06, - "loss": 0.9436, - "step": 3140 - }, - { - "epoch": 0.28326644722009287, - "grad_norm": 1.4205053251033066, - "learning_rate": 3.3641035326392907e-06, - "loss": 1.0007, - "step": 3141 - }, - { - "epoch": 0.28335663074356315, - "grad_norm": 1.6974587632396758, - "learning_rate": 3.363676238940499e-06, - "loss": 1.0038, - "step": 3142 - }, - { - "epoch": 0.2834468142670334, - "grad_norm": 1.8756340862029934, - "learning_rate": 3.363248828885331e-06, - "loss": 1.027, - "step": 3143 - }, - { - "epoch": 0.2835369977905037, - "grad_norm": 1.401119707432257, - "learning_rate": 3.3628213025102562e-06, - "loss": 1.0242, - "step": 3144 - }, - { - "epoch": 0.28362718131397396, - "grad_norm": 1.4719502099780064, - "learning_rate": 3.3623936598517536e-06, - "loss": 1.0369, - "step": 3145 - }, - { - "epoch": 0.2837173648374442, - "grad_norm": 1.776568031805681, - "learning_rate": 3.3619659009463117e-06, - "loss": 0.8469, - "step": 3146 - }, - { - "epoch": 0.2838075483609145, - "grad_norm": 1.3146222984878888, - "learning_rate": 3.3615380258304287e-06, - "loss": 1.0526, - "step": 3147 - }, - { - "epoch": 0.2838977318843847, - "grad_norm": 1.557987716753704, - "learning_rate": 3.3611100345406146e-06, - "loss": 0.9902, - "step": 3148 - }, - { - "epoch": 0.283987915407855, - "grad_norm": 1.797181144839761, - "learning_rate": 3.3606819271133873e-06, - "loss": 0.9838, - "step": 3149 - }, - { - "epoch": 0.28407809893132524, - "grad_norm": 1.3590967617071275, - "learning_rate": 3.360253703585275e-06, - "loss": 0.8638, - "step": 3150 - }, - { - "epoch": 0.28416828245479553, - "grad_norm": 1.5201891575616722, - "learning_rate": 3.3598253639928164e-06, - "loss": 0.9444, - "step": 3151 - }, - { - "epoch": 0.28425846597826576, - "grad_norm": 1.3887109764377887, - "learning_rate": 3.3593969083725596e-06, - "loss": 0.9511, - "step": 3152 - }, - { - "epoch": 0.28434864950173605, - "grad_norm": 1.4219101283167104, - "learning_rate": 3.358968336761063e-06, - "loss": 0.9609, - "step": 3153 - }, - { - "epoch": 0.2844388330252063, - "grad_norm": 1.3526965548818528, - "learning_rate": 3.3585396491948945e-06, - "loss": 0.965, - "step": 3154 - }, - { - "epoch": 0.28452901654867657, - "grad_norm": 1.8160323876000946, - "learning_rate": 3.358110845710633e-06, - "loss": 1.0562, - "step": 3155 - }, - { - "epoch": 0.2846192000721468, - "grad_norm": 5.189967108826244, - "learning_rate": 3.357681926344865e-06, - "loss": 0.9829, - "step": 3156 - }, - { - "epoch": 0.2847093835956171, - "grad_norm": 1.6827205508869616, - "learning_rate": 3.357252891134189e-06, - "loss": 0.9931, - "step": 3157 - }, - { - "epoch": 0.2847995671190873, - "grad_norm": 2.015052536688921, - "learning_rate": 3.356823740115212e-06, - "loss": 1.0416, - "step": 3158 - }, - { - "epoch": 0.2848897506425576, - "grad_norm": 1.1962863221429072, - "learning_rate": 3.3563944733245525e-06, - "loss": 0.9422, - "step": 3159 - }, - { - "epoch": 0.28497993416602785, - "grad_norm": 1.6416127455839342, - "learning_rate": 3.3559650907988375e-06, - "loss": 0.9882, - "step": 3160 - }, - { - "epoch": 0.28507011768949814, - "grad_norm": 1.2666389736277004, - "learning_rate": 3.3555355925747045e-06, - "loss": 0.8531, - "step": 3161 - }, - { - "epoch": 0.28516030121296837, - "grad_norm": 1.7528329806628369, - "learning_rate": 3.3551059786888e-06, - "loss": 0.9828, - "step": 3162 - }, - { - "epoch": 0.28525048473643866, - "grad_norm": 1.3016377513146977, - "learning_rate": 3.3546762491777807e-06, - "loss": 1.0441, - "step": 3163 - }, - { - "epoch": 0.2853406682599089, - "grad_norm": 1.9280845996451172, - "learning_rate": 3.3542464040783156e-06, - "loss": 0.9858, - "step": 3164 - }, - { - "epoch": 0.2854308517833792, - "grad_norm": 1.710764558316813, - "learning_rate": 3.353816443427079e-06, - "loss": 1.0114, - "step": 3165 - }, - { - "epoch": 0.2855210353068494, - "grad_norm": 1.5832627012995328, - "learning_rate": 3.3533863672607597e-06, - "loss": 0.9752, - "step": 3166 - }, - { - "epoch": 0.2856112188303197, - "grad_norm": 1.4203617301441234, - "learning_rate": 3.352956175616052e-06, - "loss": 1.0211, - "step": 3167 - }, - { - "epoch": 0.28570140235378993, - "grad_norm": 2.5010914327449028, - "learning_rate": 3.352525868529664e-06, - "loss": 0.9629, - "step": 3168 - }, - { - "epoch": 0.2857915858772602, - "grad_norm": 1.7299735779481025, - "learning_rate": 3.3520954460383103e-06, - "loss": 0.9363, - "step": 3169 - }, - { - "epoch": 0.2858817694007305, - "grad_norm": 1.9541316985666595, - "learning_rate": 3.3516649081787182e-06, - "loss": 0.9382, - "step": 3170 - }, - { - "epoch": 0.28597195292420075, - "grad_norm": 1.4236625083184402, - "learning_rate": 3.3512342549876236e-06, - "loss": 1.0918, - "step": 3171 - }, - { - "epoch": 0.28606213644767103, - "grad_norm": 1.5734380485283692, - "learning_rate": 3.350803486501771e-06, - "loss": 0.9563, - "step": 3172 - }, - { - "epoch": 0.28615231997114127, - "grad_norm": 1.3942779396435578, - "learning_rate": 3.3503726027579175e-06, - "loss": 1.0221, - "step": 3173 - }, - { - "epoch": 0.28624250349461156, - "grad_norm": 1.5881083997198882, - "learning_rate": 3.349941603792827e-06, - "loss": 1.0595, - "step": 3174 - }, - { - "epoch": 0.2863326870180818, - "grad_norm": 1.4120323824965029, - "learning_rate": 3.3495104896432755e-06, - "loss": 0.9877, - "step": 3175 - }, - { - "epoch": 0.2864228705415521, - "grad_norm": 1.4180018733756061, - "learning_rate": 3.3490792603460477e-06, - "loss": 0.9615, - "step": 3176 - }, - { - "epoch": 0.2865130540650223, - "grad_norm": 1.2337148325506264, - "learning_rate": 3.3486479159379393e-06, - "loss": 0.9347, - "step": 3177 - }, - { - "epoch": 0.2866032375884926, - "grad_norm": 0.6946065512172754, - "learning_rate": 3.3482164564557537e-06, - "loss": 0.8476, - "step": 3178 - }, - { - "epoch": 0.28669342111196283, - "grad_norm": 1.644874301386416, - "learning_rate": 3.3477848819363065e-06, - "loss": 1.057, - "step": 3179 - }, - { - "epoch": 0.2867836046354331, - "grad_norm": 0.6709385489254389, - "learning_rate": 3.3473531924164213e-06, - "loss": 0.879, - "step": 3180 - }, - { - "epoch": 0.28687378815890335, - "grad_norm": 1.6852371443996776, - "learning_rate": 3.3469213879329325e-06, - "loss": 0.9516, - "step": 3181 - }, - { - "epoch": 0.28696397168237364, - "grad_norm": 1.4969079414199324, - "learning_rate": 3.3464894685226837e-06, - "loss": 0.9658, - "step": 3182 - }, - { - "epoch": 0.2870541552058439, - "grad_norm": 1.4293626927028176, - "learning_rate": 3.34605743422253e-06, - "loss": 1.0686, - "step": 3183 - }, - { - "epoch": 0.28714433872931416, - "grad_norm": 2.0784747754351893, - "learning_rate": 3.345625285069333e-06, - "loss": 0.9662, - "step": 3184 - }, - { - "epoch": 0.2872345222527844, - "grad_norm": 1.890693411101685, - "learning_rate": 3.345193021099967e-06, - "loss": 0.9781, - "step": 3185 - }, - { - "epoch": 0.2873247057762547, - "grad_norm": 1.2911297030696105, - "learning_rate": 3.3447606423513157e-06, - "loss": 0.9653, - "step": 3186 - }, - { - "epoch": 0.2874148892997249, - "grad_norm": 1.5199532597031162, - "learning_rate": 3.344328148860271e-06, - "loss": 0.9612, - "step": 3187 - }, - { - "epoch": 0.2875050728231952, - "grad_norm": 1.9246365934893503, - "learning_rate": 3.3438955406637365e-06, - "loss": 1.0059, - "step": 3188 - }, - { - "epoch": 0.28759525634666544, - "grad_norm": 1.311773553263483, - "learning_rate": 3.343462817798624e-06, - "loss": 1.0126, - "step": 3189 - }, - { - "epoch": 0.28768543987013573, - "grad_norm": 1.7802562953439642, - "learning_rate": 3.343029980301856e-06, - "loss": 0.9628, - "step": 3190 - }, - { - "epoch": 0.28777562339360596, - "grad_norm": 1.9863230237267029, - "learning_rate": 3.342597028210365e-06, - "loss": 1.0248, - "step": 3191 - }, - { - "epoch": 0.28786580691707625, - "grad_norm": 1.4500781695900924, - "learning_rate": 3.342163961561092e-06, - "loss": 0.9356, - "step": 3192 - }, - { - "epoch": 0.28795599044054654, - "grad_norm": 1.3148446258161215, - "learning_rate": 3.34173078039099e-06, - "loss": 0.9699, - "step": 3193 - }, - { - "epoch": 0.28804617396401677, - "grad_norm": 1.902281147202889, - "learning_rate": 3.3412974847370193e-06, - "loss": 1.1599, - "step": 3194 - }, - { - "epoch": 0.28813635748748706, - "grad_norm": 1.4461124989815604, - "learning_rate": 3.3408640746361514e-06, - "loss": 1.0152, - "step": 3195 - }, - { - "epoch": 0.2882265410109573, - "grad_norm": 0.7446050521281635, - "learning_rate": 3.3404305501253663e-06, - "loss": 0.8512, - "step": 3196 - }, - { - "epoch": 0.2883167245344276, - "grad_norm": 1.3047901501505408, - "learning_rate": 3.3399969112416565e-06, - "loss": 1.0709, - "step": 3197 - }, - { - "epoch": 0.2884069080578978, - "grad_norm": 1.5543785363292462, - "learning_rate": 3.3395631580220213e-06, - "loss": 0.9328, - "step": 3198 - }, - { - "epoch": 0.2884970915813681, - "grad_norm": 2.0536546323044194, - "learning_rate": 3.3391292905034714e-06, - "loss": 0.9769, - "step": 3199 - }, - { - "epoch": 0.28858727510483834, - "grad_norm": 1.4836810447397857, - "learning_rate": 3.338695308723027e-06, - "loss": 1.065, - "step": 3200 - }, - { - "epoch": 0.2886774586283086, - "grad_norm": 1.5715675462849221, - "learning_rate": 3.338261212717716e-06, - "loss": 1.0467, - "step": 3201 - }, - { - "epoch": 0.28876764215177886, - "grad_norm": 1.5875498275746651, - "learning_rate": 3.33782700252458e-06, - "loss": 1.0292, - "step": 3202 - }, - { - "epoch": 0.28885782567524915, - "grad_norm": 1.5339260462275912, - "learning_rate": 3.337392678180668e-06, - "loss": 1.0364, - "step": 3203 - }, - { - "epoch": 0.2889480091987194, - "grad_norm": 1.6311471285482844, - "learning_rate": 3.3369582397230377e-06, - "loss": 0.9956, - "step": 3204 - }, - { - "epoch": 0.28903819272218967, - "grad_norm": 1.5662628640927534, - "learning_rate": 3.336523687188759e-06, - "loss": 1.0406, - "step": 3205 - }, - { - "epoch": 0.2891283762456599, - "grad_norm": 0.6936018398953491, - "learning_rate": 3.336089020614909e-06, - "loss": 0.8627, - "step": 3206 - }, - { - "epoch": 0.2892185597691302, - "grad_norm": 1.533067442120285, - "learning_rate": 3.3356542400385774e-06, - "loss": 0.8721, - "step": 3207 - }, - { - "epoch": 0.2893087432926004, - "grad_norm": 1.3249044527835723, - "learning_rate": 3.3352193454968607e-06, - "loss": 0.9315, - "step": 3208 - }, - { - "epoch": 0.2893989268160707, - "grad_norm": 1.3246234736680809, - "learning_rate": 3.3347843370268675e-06, - "loss": 1.06, - "step": 3209 - }, - { - "epoch": 0.28948911033954094, - "grad_norm": 1.482654176385559, - "learning_rate": 3.334349214665715e-06, - "loss": 1.015, - "step": 3210 - }, - { - "epoch": 0.28957929386301123, - "grad_norm": 1.5276011374498055, - "learning_rate": 3.3339139784505293e-06, - "loss": 1.0815, - "step": 3211 - }, - { - "epoch": 0.28966947738648147, - "grad_norm": 1.2288279416888552, - "learning_rate": 3.333478628418448e-06, - "loss": 1.0694, - "step": 3212 - }, - { - "epoch": 0.28975966090995176, - "grad_norm": 1.221081728397093, - "learning_rate": 3.333043164606618e-06, - "loss": 0.8836, - "step": 3213 - }, - { - "epoch": 0.289849844433422, - "grad_norm": 1.4525205934413934, - "learning_rate": 3.3326075870521948e-06, - "loss": 1.0816, - "step": 3214 - }, - { - "epoch": 0.2899400279568923, - "grad_norm": 1.621372796366324, - "learning_rate": 3.3321718957923437e-06, - "loss": 1.006, - "step": 3215 - }, - { - "epoch": 0.29003021148036257, - "grad_norm": 1.331549280401364, - "learning_rate": 3.3317360908642413e-06, - "loss": 0.9975, - "step": 3216 - }, - { - "epoch": 0.2901203950038328, - "grad_norm": 1.9642908368725058, - "learning_rate": 3.331300172305072e-06, - "loss": 0.949, - "step": 3217 - }, - { - "epoch": 0.2902105785273031, - "grad_norm": 1.5108498138578101, - "learning_rate": 3.330864140152032e-06, - "loss": 0.987, - "step": 3218 - }, - { - "epoch": 0.2903007620507733, - "grad_norm": 1.4920971428641254, - "learning_rate": 3.330427994442325e-06, - "loss": 1.0142, - "step": 3219 - }, - { - "epoch": 0.2903909455742436, - "grad_norm": 1.3420130126651975, - "learning_rate": 3.3299917352131657e-06, - "loss": 1.0319, - "step": 3220 - }, - { - "epoch": 0.29048112909771384, - "grad_norm": 2.0389511838029963, - "learning_rate": 3.329555362501778e-06, - "loss": 0.9633, - "step": 3221 - }, - { - "epoch": 0.29057131262118413, - "grad_norm": 1.5914212585250869, - "learning_rate": 3.3291188763453954e-06, - "loss": 0.961, - "step": 3222 - }, - { - "epoch": 0.29066149614465436, - "grad_norm": 1.3712848410618153, - "learning_rate": 3.3286822767812618e-06, - "loss": 0.9827, - "step": 3223 - }, - { - "epoch": 0.29075167966812465, - "grad_norm": 1.7169741386061155, - "learning_rate": 3.32824556384663e-06, - "loss": 0.9822, - "step": 3224 - }, - { - "epoch": 0.2908418631915949, - "grad_norm": 1.6348856491090873, - "learning_rate": 3.3278087375787628e-06, - "loss": 1.0028, - "step": 3225 - }, - { - "epoch": 0.2909320467150652, - "grad_norm": 2.935095594366781, - "learning_rate": 3.327371798014933e-06, - "loss": 1.0052, - "step": 3226 - }, - { - "epoch": 0.2910222302385354, - "grad_norm": 1.6662563136697301, - "learning_rate": 3.3269347451924218e-06, - "loss": 0.9952, - "step": 3227 - }, - { - "epoch": 0.2911124137620057, - "grad_norm": 1.3839861097548356, - "learning_rate": 3.326497579148522e-06, - "loss": 1.0591, - "step": 3228 - }, - { - "epoch": 0.29120259728547593, - "grad_norm": 1.854707131928664, - "learning_rate": 3.3260602999205345e-06, - "loss": 1.0037, - "step": 3229 - }, - { - "epoch": 0.2912927808089462, - "grad_norm": 1.5202026452955777, - "learning_rate": 3.32562290754577e-06, - "loss": 1.0119, - "step": 3230 - }, - { - "epoch": 0.29138296433241645, - "grad_norm": 1.3771467356819411, - "learning_rate": 3.3251854020615494e-06, - "loss": 0.9759, - "step": 3231 - }, - { - "epoch": 0.29147314785588674, - "grad_norm": 1.6064439767756604, - "learning_rate": 3.324747783505204e-06, - "loss": 1.0012, - "step": 3232 - }, - { - "epoch": 0.29156333137935697, - "grad_norm": 1.8478969554287086, - "learning_rate": 3.324310051914073e-06, - "loss": 0.9116, - "step": 3233 - }, - { - "epoch": 0.29165351490282726, - "grad_norm": 3.9171340068364584, - "learning_rate": 3.3238722073255056e-06, - "loss": 0.972, - "step": 3234 - }, - { - "epoch": 0.2917436984262975, - "grad_norm": 1.5390841874661692, - "learning_rate": 3.323434249776863e-06, - "loss": 0.968, - "step": 3235 - }, - { - "epoch": 0.2918338819497678, - "grad_norm": 0.7849566457402813, - "learning_rate": 3.3229961793055117e-06, - "loss": 0.8627, - "step": 3236 - }, - { - "epoch": 0.291924065473238, - "grad_norm": 1.884199715681169, - "learning_rate": 3.3225579959488314e-06, - "loss": 0.9355, - "step": 3237 - }, - { - "epoch": 0.2920142489967083, - "grad_norm": 1.958345318480266, - "learning_rate": 3.322119699744211e-06, - "loss": 1.0193, - "step": 3238 - }, - { - "epoch": 0.29210443252017854, - "grad_norm": 1.5498940646979356, - "learning_rate": 3.3216812907290476e-06, - "loss": 1.008, - "step": 3239 - }, - { - "epoch": 0.2921946160436488, - "grad_norm": 1.3421254206945685, - "learning_rate": 3.3212427689407484e-06, - "loss": 1.0167, - "step": 3240 - }, - { - "epoch": 0.2922847995671191, - "grad_norm": 0.6304853529321625, - "learning_rate": 3.3208041344167317e-06, - "loss": 0.8151, - "step": 3241 - }, - { - "epoch": 0.29237498309058935, - "grad_norm": 1.7470036467812726, - "learning_rate": 3.3203653871944224e-06, - "loss": 0.9086, - "step": 3242 - }, - { - "epoch": 0.29246516661405964, - "grad_norm": 1.3003275311955098, - "learning_rate": 3.3199265273112587e-06, - "loss": 0.9628, - "step": 3243 - }, - { - "epoch": 0.29255535013752987, - "grad_norm": 1.5582013317374208, - "learning_rate": 3.3194875548046852e-06, - "loss": 0.9398, - "step": 3244 - }, - { - "epoch": 0.29264553366100016, - "grad_norm": 2.8242645048917265, - "learning_rate": 3.319048469712158e-06, - "loss": 0.9757, - "step": 3245 - }, - { - "epoch": 0.2927357171844704, - "grad_norm": 1.5883243432979737, - "learning_rate": 3.3186092720711423e-06, - "loss": 1.0935, - "step": 3246 - }, - { - "epoch": 0.2928259007079407, - "grad_norm": 1.9518853487833043, - "learning_rate": 3.3181699619191125e-06, - "loss": 0.9241, - "step": 3247 - }, - { - "epoch": 0.2929160842314109, - "grad_norm": 1.4125497218470093, - "learning_rate": 3.3177305392935536e-06, - "loss": 1.0334, - "step": 3248 - }, - { - "epoch": 0.2930062677548812, - "grad_norm": 0.7403517255137323, - "learning_rate": 3.3172910042319595e-06, - "loss": 0.8993, - "step": 3249 - }, - { - "epoch": 0.29309645127835143, - "grad_norm": 1.6747186310571214, - "learning_rate": 3.316851356771833e-06, - "loss": 1.0334, - "step": 3250 - }, - { - "epoch": 0.2931866348018217, - "grad_norm": 1.2800161887426653, - "learning_rate": 3.3164115969506876e-06, - "loss": 1.0268, - "step": 3251 - }, - { - "epoch": 0.29327681832529195, - "grad_norm": 2.431329693505537, - "learning_rate": 3.315971724806046e-06, - "loss": 1.0801, - "step": 3252 - }, - { - "epoch": 0.29336700184876224, - "grad_norm": 1.42286563779929, - "learning_rate": 3.315531740375441e-06, - "loss": 0.9381, - "step": 3253 - }, - { - "epoch": 0.2934571853722325, - "grad_norm": 1.8699046519649567, - "learning_rate": 3.315091643696414e-06, - "loss": 0.9608, - "step": 3254 - }, - { - "epoch": 0.29354736889570276, - "grad_norm": 1.3869323565951692, - "learning_rate": 3.3146514348065164e-06, - "loss": 0.9234, - "step": 3255 - }, - { - "epoch": 0.293637552419173, - "grad_norm": 1.7944052267103057, - "learning_rate": 3.31421111374331e-06, - "loss": 1.0053, - "step": 3256 - }, - { - "epoch": 0.2937277359426433, - "grad_norm": 1.6749658267961942, - "learning_rate": 3.3137706805443647e-06, - "loss": 1.0132, - "step": 3257 - }, - { - "epoch": 0.2938179194661135, - "grad_norm": 1.828679579384764, - "learning_rate": 3.313330135247261e-06, - "loss": 1.0468, - "step": 3258 - }, - { - "epoch": 0.2939081029895838, - "grad_norm": 1.2482659710244948, - "learning_rate": 3.312889477889588e-06, - "loss": 0.9666, - "step": 3259 - }, - { - "epoch": 0.29399828651305404, - "grad_norm": 1.3574318864851131, - "learning_rate": 3.3124487085089464e-06, - "loss": 1.0098, - "step": 3260 - }, - { - "epoch": 0.29408847003652433, - "grad_norm": 1.427406200260028, - "learning_rate": 3.312007827142943e-06, - "loss": 1.032, - "step": 3261 - }, - { - "epoch": 0.29417865355999456, - "grad_norm": 1.2952642434107737, - "learning_rate": 3.3115668338291983e-06, - "loss": 0.9192, - "step": 3262 - }, - { - "epoch": 0.29426883708346485, - "grad_norm": 1.4723770322117706, - "learning_rate": 3.3111257286053394e-06, - "loss": 1.0689, - "step": 3263 - }, - { - "epoch": 0.29435902060693514, - "grad_norm": 1.4993837203185265, - "learning_rate": 3.3106845115090043e-06, - "loss": 0.9967, - "step": 3264 - }, - { - "epoch": 0.2944492041304054, - "grad_norm": 1.5641207871832672, - "learning_rate": 3.310243182577839e-06, - "loss": 0.9483, - "step": 3265 - }, - { - "epoch": 0.29453938765387566, - "grad_norm": 1.5016768143057324, - "learning_rate": 3.3098017418495007e-06, - "loss": 0.8974, - "step": 3266 - }, - { - "epoch": 0.2946295711773459, - "grad_norm": 1.724379527413376, - "learning_rate": 3.309360189361656e-06, - "loss": 0.9569, - "step": 3267 - }, - { - "epoch": 0.2947197547008162, - "grad_norm": 1.9945625060666035, - "learning_rate": 3.3089185251519797e-06, - "loss": 1.078, - "step": 3268 - }, - { - "epoch": 0.2948099382242864, - "grad_norm": 1.4896657516335554, - "learning_rate": 3.3084767492581574e-06, - "loss": 0.978, - "step": 3269 - }, - { - "epoch": 0.2949001217477567, - "grad_norm": 1.9547812801463107, - "learning_rate": 3.3080348617178846e-06, - "loss": 1.0117, - "step": 3270 - }, - { - "epoch": 0.29499030527122694, - "grad_norm": 1.45710185598342, - "learning_rate": 3.307592862568865e-06, - "loss": 0.9678, - "step": 3271 - }, - { - "epoch": 0.2950804887946972, - "grad_norm": 1.2604943348728148, - "learning_rate": 3.307150751848812e-06, - "loss": 1.0559, - "step": 3272 - }, - { - "epoch": 0.29517067231816746, - "grad_norm": 1.3934975861664318, - "learning_rate": 3.3067085295954497e-06, - "loss": 0.8983, - "step": 3273 - }, - { - "epoch": 0.29526085584163775, - "grad_norm": 1.5568144544469489, - "learning_rate": 3.3062661958465098e-06, - "loss": 0.8694, - "step": 3274 - }, - { - "epoch": 0.295351039365108, - "grad_norm": 1.6462804050288067, - "learning_rate": 3.305823750639736e-06, - "loss": 0.8914, - "step": 3275 - }, - { - "epoch": 0.29544122288857827, - "grad_norm": 1.3927949840872738, - "learning_rate": 3.3053811940128795e-06, - "loss": 0.9944, - "step": 3276 - }, - { - "epoch": 0.2955314064120485, - "grad_norm": 1.569384096423866, - "learning_rate": 3.3049385260037016e-06, - "loss": 0.9777, - "step": 3277 - }, - { - "epoch": 0.2956215899355188, - "grad_norm": 1.248771302019557, - "learning_rate": 3.3044957466499736e-06, - "loss": 0.8889, - "step": 3278 - }, - { - "epoch": 0.295711773458989, - "grad_norm": 1.5384903827127374, - "learning_rate": 3.304052855989475e-06, - "loss": 0.9192, - "step": 3279 - }, - { - "epoch": 0.2958019569824593, - "grad_norm": 1.277781466925635, - "learning_rate": 3.3036098540599966e-06, - "loss": 1.0517, - "step": 3280 - }, - { - "epoch": 0.29589214050592955, - "grad_norm": 1.7031248600111037, - "learning_rate": 3.3031667408993373e-06, - "loss": 0.9576, - "step": 3281 - }, - { - "epoch": 0.29598232402939983, - "grad_norm": 1.6110234382861335, - "learning_rate": 3.302723516545306e-06, - "loss": 0.9905, - "step": 3282 - }, - { - "epoch": 0.29607250755287007, - "grad_norm": 1.447652129379971, - "learning_rate": 3.302280181035722e-06, - "loss": 1.0213, - "step": 3283 - }, - { - "epoch": 0.29616269107634036, - "grad_norm": 0.62322366532143, - "learning_rate": 3.3018367344084117e-06, - "loss": 0.8719, - "step": 3284 - }, - { - "epoch": 0.2962528745998106, - "grad_norm": 1.1828035039320264, - "learning_rate": 3.3013931767012125e-06, - "loss": 0.9836, - "step": 3285 - }, - { - "epoch": 0.2963430581232809, - "grad_norm": 1.3766451876829742, - "learning_rate": 3.300949507951972e-06, - "loss": 0.9921, - "step": 3286 - }, - { - "epoch": 0.2964332416467511, - "grad_norm": 1.8319829678222637, - "learning_rate": 3.300505728198546e-06, - "loss": 1.0365, - "step": 3287 - }, - { - "epoch": 0.2965234251702214, - "grad_norm": 2.2014755675870257, - "learning_rate": 3.3000618374788e-06, - "loss": 0.9746, - "step": 3288 - }, - { - "epoch": 0.2966136086936917, - "grad_norm": 1.2700234735377107, - "learning_rate": 3.2996178358306104e-06, - "loss": 1.033, - "step": 3289 - }, - { - "epoch": 0.2967037922171619, - "grad_norm": 1.7475069544139439, - "learning_rate": 3.2991737232918606e-06, - "loss": 0.9579, - "step": 3290 - }, - { - "epoch": 0.2967939757406322, - "grad_norm": 1.7500095367171786, - "learning_rate": 3.298729499900445e-06, - "loss": 0.9395, - "step": 3291 - }, - { - "epoch": 0.29688415926410244, - "grad_norm": 1.6813612294894134, - "learning_rate": 3.2982851656942677e-06, - "loss": 0.9354, - "step": 3292 - }, - { - "epoch": 0.29697434278757273, - "grad_norm": 1.4546907254848103, - "learning_rate": 3.2978407207112416e-06, - "loss": 1.0155, - "step": 3293 - }, - { - "epoch": 0.29706452631104296, - "grad_norm": 2.290006891931549, - "learning_rate": 3.2973961649892888e-06, - "loss": 1.0037, - "step": 3294 - }, - { - "epoch": 0.29715470983451325, - "grad_norm": 1.3330709924502036, - "learning_rate": 3.296951498566341e-06, - "loss": 1.0554, - "step": 3295 - }, - { - "epoch": 0.2972448933579835, - "grad_norm": 2.2187547280704463, - "learning_rate": 3.2965067214803404e-06, - "loss": 0.9859, - "step": 3296 - }, - { - "epoch": 0.2973350768814538, - "grad_norm": 1.1677662345331041, - "learning_rate": 3.2960618337692372e-06, - "loss": 1.022, - "step": 3297 - }, - { - "epoch": 0.297425260404924, - "grad_norm": 2.039917748232919, - "learning_rate": 3.2956168354709927e-06, - "loss": 0.9533, - "step": 3298 - }, - { - "epoch": 0.2975154439283943, - "grad_norm": 1.5116756935789413, - "learning_rate": 3.2951717266235754e-06, - "loss": 1.039, - "step": 3299 - }, - { - "epoch": 0.29760562745186453, - "grad_norm": 1.364235180808389, - "learning_rate": 3.294726507264964e-06, - "loss": 0.8698, - "step": 3300 - }, - { - "epoch": 0.2976958109753348, - "grad_norm": 1.4350196337270542, - "learning_rate": 3.2942811774331487e-06, - "loss": 0.9613, - "step": 3301 - }, - { - "epoch": 0.29778599449880505, - "grad_norm": 1.5786830557480438, - "learning_rate": 3.293835737166127e-06, - "loss": 0.9829, - "step": 3302 - }, - { - "epoch": 0.29787617802227534, - "grad_norm": 1.538827636148511, - "learning_rate": 3.293390186501906e-06, - "loss": 0.9317, - "step": 3303 - }, - { - "epoch": 0.2979663615457456, - "grad_norm": 1.724322561950982, - "learning_rate": 3.2929445254785024e-06, - "loss": 1.0055, - "step": 3304 - }, - { - "epoch": 0.29805654506921586, - "grad_norm": 2.0171475832747543, - "learning_rate": 3.2924987541339423e-06, - "loss": 0.948, - "step": 3305 - }, - { - "epoch": 0.2981467285926861, - "grad_norm": 1.527582798663251, - "learning_rate": 3.292052872506262e-06, - "loss": 1.0107, - "step": 3306 - }, - { - "epoch": 0.2982369121161564, - "grad_norm": 1.3320897780282284, - "learning_rate": 3.291606880633506e-06, - "loss": 1.0283, - "step": 3307 - }, - { - "epoch": 0.2983270956396266, - "grad_norm": 1.806063437149307, - "learning_rate": 3.2911607785537297e-06, - "loss": 1.048, - "step": 3308 - }, - { - "epoch": 0.2984172791630969, - "grad_norm": 1.7323160407973648, - "learning_rate": 3.290714566304997e-06, - "loss": 1.0819, - "step": 3309 - }, - { - "epoch": 0.29850746268656714, - "grad_norm": 2.572882205459505, - "learning_rate": 3.2902682439253794e-06, - "loss": 1.0632, - "step": 3310 - }, - { - "epoch": 0.2985976462100374, - "grad_norm": 1.6515375429575738, - "learning_rate": 3.289821811452961e-06, - "loss": 1.004, - "step": 3311 - }, - { - "epoch": 0.2986878297335077, - "grad_norm": 1.84422457778037, - "learning_rate": 3.289375268925834e-06, - "loss": 1.0424, - "step": 3312 - }, - { - "epoch": 0.29877801325697795, - "grad_norm": 1.9481109345472636, - "learning_rate": 3.288928616382099e-06, - "loss": 0.9759, - "step": 3313 - }, - { - "epoch": 0.29886819678044824, - "grad_norm": 1.302627902276723, - "learning_rate": 3.288481853859868e-06, - "loss": 1.029, - "step": 3314 - }, - { - "epoch": 0.29895838030391847, - "grad_norm": 1.637049093117359, - "learning_rate": 3.2880349813972604e-06, - "loss": 0.9689, - "step": 3315 - }, - { - "epoch": 0.29904856382738876, - "grad_norm": 11.471678648338358, - "learning_rate": 3.2875879990324052e-06, - "loss": 0.9905, - "step": 3316 - }, - { - "epoch": 0.299138747350859, - "grad_norm": 2.4630055291086292, - "learning_rate": 3.287140906803443e-06, - "loss": 1.0476, - "step": 3317 - }, - { - "epoch": 0.2992289308743293, - "grad_norm": 2.2554497945614647, - "learning_rate": 3.2866937047485216e-06, - "loss": 0.9147, - "step": 3318 - }, - { - "epoch": 0.2993191143977995, - "grad_norm": 2.108440842230763, - "learning_rate": 3.2862463929057985e-06, - "loss": 1.061, - "step": 3319 - }, - { - "epoch": 0.2994092979212698, - "grad_norm": 13.407501944748601, - "learning_rate": 3.285798971313441e-06, - "loss": 0.8412, - "step": 3320 - }, - { - "epoch": 0.29949948144474003, - "grad_norm": 1.6831314620535995, - "learning_rate": 3.2853514400096248e-06, - "loss": 1.0132, - "step": 3321 - }, - { - "epoch": 0.2995896649682103, - "grad_norm": 1.3312212139473898, - "learning_rate": 3.2849037990325367e-06, - "loss": 1.0223, - "step": 3322 - }, - { - "epoch": 0.29967984849168056, - "grad_norm": 1.200595127987131, - "learning_rate": 3.2844560484203717e-06, - "loss": 0.947, - "step": 3323 - }, - { - "epoch": 0.29977003201515084, - "grad_norm": 1.3977969956470928, - "learning_rate": 3.2840081882113333e-06, - "loss": 1.0792, - "step": 3324 - }, - { - "epoch": 0.2998602155386211, - "grad_norm": 1.9948953214827354, - "learning_rate": 3.283560218443638e-06, - "loss": 0.9639, - "step": 3325 - }, - { - "epoch": 0.29995039906209137, - "grad_norm": 1.2979143872350738, - "learning_rate": 3.2831121391555064e-06, - "loss": 1.0071, - "step": 3326 - }, - { - "epoch": 0.3000405825855616, - "grad_norm": 1.2771068668323347, - "learning_rate": 3.2826639503851724e-06, - "loss": 0.9411, - "step": 3327 - }, - { - "epoch": 0.3001307661090319, - "grad_norm": 1.4832390705260845, - "learning_rate": 3.282215652170877e-06, - "loss": 1.0309, - "step": 3328 - }, - { - "epoch": 0.3002209496325021, - "grad_norm": 1.7476135739814547, - "learning_rate": 3.281767244550873e-06, - "loss": 1.0266, - "step": 3329 - }, - { - "epoch": 0.3003111331559724, - "grad_norm": 0.627110066483001, - "learning_rate": 3.2813187275634193e-06, - "loss": 0.7984, - "step": 3330 - }, - { - "epoch": 0.30040131667944264, - "grad_norm": 1.5460993045284965, - "learning_rate": 3.280870101246787e-06, - "loss": 1.0168, - "step": 3331 - }, - { - "epoch": 0.30049150020291293, - "grad_norm": 1.5931922086095633, - "learning_rate": 3.280421365639255e-06, - "loss": 0.9108, - "step": 3332 - }, - { - "epoch": 0.30058168372638316, - "grad_norm": 1.648223501479908, - "learning_rate": 3.279972520779112e-06, - "loss": 0.9767, - "step": 3333 - }, - { - "epoch": 0.30067186724985345, - "grad_norm": 1.3513048841567856, - "learning_rate": 3.279523566704656e-06, - "loss": 1.0608, - "step": 3334 - }, - { - "epoch": 0.30076205077332374, - "grad_norm": 2.511174690328791, - "learning_rate": 3.2790745034541935e-06, - "loss": 1.0471, - "step": 3335 - }, - { - "epoch": 0.300852234296794, - "grad_norm": 2.3877282637680275, - "learning_rate": 3.278625331066042e-06, - "loss": 1.0427, - "step": 3336 - }, - { - "epoch": 0.30094241782026426, - "grad_norm": 1.6032327270005622, - "learning_rate": 3.278176049578527e-06, - "loss": 1.0409, - "step": 3337 - }, - { - "epoch": 0.3010326013437345, - "grad_norm": 1.5767812768009377, - "learning_rate": 3.2777266590299835e-06, - "loss": 0.9495, - "step": 3338 - }, - { - "epoch": 0.3011227848672048, - "grad_norm": 4.951282435279395, - "learning_rate": 3.2772771594587562e-06, - "loss": 0.927, - "step": 3339 - }, - { - "epoch": 0.301212968390675, - "grad_norm": 1.9225835967025762, - "learning_rate": 3.2768275509031988e-06, - "loss": 1.0941, - "step": 3340 - }, - { - "epoch": 0.3013031519141453, - "grad_norm": 1.6216405474894369, - "learning_rate": 3.276377833401675e-06, - "loss": 0.9788, - "step": 3341 - }, - { - "epoch": 0.30139333543761554, - "grad_norm": 1.5731716412498167, - "learning_rate": 3.2759280069925557e-06, - "loss": 1.0547, - "step": 3342 - }, - { - "epoch": 0.30148351896108583, - "grad_norm": 1.4131009536715111, - "learning_rate": 3.2754780717142233e-06, - "loss": 1.0365, - "step": 3343 - }, - { - "epoch": 0.30157370248455606, - "grad_norm": 1.6822870055646857, - "learning_rate": 3.27502802760507e-06, - "loss": 0.9775, - "step": 3344 - }, - { - "epoch": 0.30166388600802635, - "grad_norm": 1.6334315682408034, - "learning_rate": 3.2745778747034943e-06, - "loss": 0.908, - "step": 3345 - }, - { - "epoch": 0.3017540695314966, - "grad_norm": 1.7341230054739525, - "learning_rate": 3.274127613047906e-06, - "loss": 0.9826, - "step": 3346 - }, - { - "epoch": 0.30184425305496687, - "grad_norm": 1.7394720563494421, - "learning_rate": 3.273677242676725e-06, - "loss": 1.0116, - "step": 3347 - }, - { - "epoch": 0.3019344365784371, - "grad_norm": 1.7017821171098815, - "learning_rate": 3.2732267636283782e-06, - "loss": 0.9879, - "step": 3348 - }, - { - "epoch": 0.3020246201019074, - "grad_norm": 2.1621100366276087, - "learning_rate": 3.2727761759413034e-06, - "loss": 0.9494, - "step": 3349 - }, - { - "epoch": 0.3021148036253776, - "grad_norm": 1.5445972974564648, - "learning_rate": 3.2723254796539477e-06, - "loss": 1.0074, - "step": 3350 - }, - { - "epoch": 0.3022049871488479, - "grad_norm": 3.5910172396792843, - "learning_rate": 3.271874674804766e-06, - "loss": 0.9768, - "step": 3351 - }, - { - "epoch": 0.30229517067231815, - "grad_norm": 1.4419676145019982, - "learning_rate": 3.2714237614322242e-06, - "loss": 1.0174, - "step": 3352 - }, - { - "epoch": 0.30238535419578844, - "grad_norm": 1.4059868884309548, - "learning_rate": 3.2709727395747974e-06, - "loss": 0.8235, - "step": 3353 - }, - { - "epoch": 0.30247553771925867, - "grad_norm": 0.7123095927214698, - "learning_rate": 3.2705216092709673e-06, - "loss": 0.8395, - "step": 3354 - }, - { - "epoch": 0.30256572124272896, - "grad_norm": 1.693943296994506, - "learning_rate": 3.2700703705592282e-06, - "loss": 1.0783, - "step": 3355 - }, - { - "epoch": 0.3026559047661992, - "grad_norm": 2.1516721477286067, - "learning_rate": 3.269619023478082e-06, - "loss": 0.8973, - "step": 3356 - }, - { - "epoch": 0.3027460882896695, - "grad_norm": 1.4244868843757548, - "learning_rate": 3.26916756806604e-06, - "loss": 1.0923, - "step": 3357 - }, - { - "epoch": 0.3028362718131397, - "grad_norm": 1.5819852092307094, - "learning_rate": 3.268716004361623e-06, - "loss": 1.0057, - "step": 3358 - }, - { - "epoch": 0.30292645533661, - "grad_norm": 0.7684170164625911, - "learning_rate": 3.268264332403361e-06, - "loss": 0.8638, - "step": 3359 - }, - { - "epoch": 0.3030166388600803, - "grad_norm": 1.276295176125873, - "learning_rate": 3.2678125522297933e-06, - "loss": 1.0273, - "step": 3360 - }, - { - "epoch": 0.3031068223835505, - "grad_norm": 1.9977489558830488, - "learning_rate": 3.267360663879468e-06, - "loss": 0.941, - "step": 3361 - }, - { - "epoch": 0.3031970059070208, - "grad_norm": 1.7684847198076423, - "learning_rate": 3.266908667390942e-06, - "loss": 0.9834, - "step": 3362 - }, - { - "epoch": 0.30328718943049104, - "grad_norm": 2.1819512975066977, - "learning_rate": 3.2664565628027833e-06, - "loss": 1.0512, - "step": 3363 - }, - { - "epoch": 0.30337737295396133, - "grad_norm": 2.0695035685628063, - "learning_rate": 3.2660043501535675e-06, - "loss": 0.9922, - "step": 3364 - }, - { - "epoch": 0.30346755647743157, - "grad_norm": 1.59942558825718, - "learning_rate": 3.2655520294818797e-06, - "loss": 1.0593, - "step": 3365 - }, - { - "epoch": 0.30355774000090185, - "grad_norm": 1.3166495531313973, - "learning_rate": 3.2650996008263146e-06, - "loss": 1.0626, - "step": 3366 - }, - { - "epoch": 0.3036479235243721, - "grad_norm": 2.8263654767773914, - "learning_rate": 3.2646470642254756e-06, - "loss": 0.9508, - "step": 3367 - }, - { - "epoch": 0.3037381070478424, - "grad_norm": 5.302139771566434, - "learning_rate": 3.2641944197179767e-06, - "loss": 1.0157, - "step": 3368 - }, - { - "epoch": 0.3038282905713126, - "grad_norm": 0.7236919919421538, - "learning_rate": 3.2637416673424383e-06, - "loss": 0.8219, - "step": 3369 - }, - { - "epoch": 0.3039184740947829, - "grad_norm": 1.452686694912375, - "learning_rate": 3.2632888071374937e-06, - "loss": 1.0109, - "step": 3370 - }, - { - "epoch": 0.30400865761825313, - "grad_norm": 1.6529909133615228, - "learning_rate": 3.2628358391417815e-06, - "loss": 1.0281, - "step": 3371 - }, - { - "epoch": 0.3040988411417234, - "grad_norm": 2.0317670897568556, - "learning_rate": 3.2623827633939526e-06, - "loss": 0.9837, - "step": 3372 - }, - { - "epoch": 0.30418902466519365, - "grad_norm": 1.9640742287175594, - "learning_rate": 3.2619295799326657e-06, - "loss": 0.9936, - "step": 3373 - }, - { - "epoch": 0.30427920818866394, - "grad_norm": 2.9756657408481426, - "learning_rate": 3.2614762887965883e-06, - "loss": 0.8857, - "step": 3374 - }, - { - "epoch": 0.3043693917121342, - "grad_norm": 1.7053347609655405, - "learning_rate": 3.2610228900243984e-06, - "loss": 0.9873, - "step": 3375 - }, - { - "epoch": 0.30445957523560446, - "grad_norm": 0.6115866410018325, - "learning_rate": 3.260569383654783e-06, - "loss": 0.8777, - "step": 3376 - }, - { - "epoch": 0.3045497587590747, - "grad_norm": 0.7619773621362859, - "learning_rate": 3.2601157697264365e-06, - "loss": 0.8824, - "step": 3377 - }, - { - "epoch": 0.304639942282545, - "grad_norm": 1.830967445838327, - "learning_rate": 3.2596620482780647e-06, - "loss": 0.9723, - "step": 3378 - }, - { - "epoch": 0.3047301258060152, - "grad_norm": 0.5751988875532202, - "learning_rate": 3.2592082193483803e-06, - "loss": 0.8423, - "step": 3379 - }, - { - "epoch": 0.3048203093294855, - "grad_norm": 1.4502041081120052, - "learning_rate": 3.258754282976109e-06, - "loss": 0.9757, - "step": 3380 - }, - { - "epoch": 0.30491049285295574, - "grad_norm": 1.56988177868813, - "learning_rate": 3.25830023919998e-06, - "loss": 1.0649, - "step": 3381 - }, - { - "epoch": 0.305000676376426, - "grad_norm": 2.5585043855159495, - "learning_rate": 3.2578460880587374e-06, - "loss": 0.9535, - "step": 3382 - }, - { - "epoch": 0.3050908598998963, - "grad_norm": 2.9398508607574114, - "learning_rate": 3.2573918295911306e-06, - "loss": 1.0514, - "step": 3383 - }, - { - "epoch": 0.30518104342336655, - "grad_norm": 1.7059177284751113, - "learning_rate": 3.2569374638359196e-06, - "loss": 0.935, - "step": 3384 - }, - { - "epoch": 0.30527122694683684, - "grad_norm": 1.2348945103893918, - "learning_rate": 3.2564829908318736e-06, - "loss": 1.0113, - "step": 3385 - }, - { - "epoch": 0.30536141047030707, - "grad_norm": 1.27112711444716, - "learning_rate": 3.2560284106177705e-06, - "loss": 1.0524, - "step": 3386 - }, - { - "epoch": 0.30545159399377736, - "grad_norm": 1.457623808691246, - "learning_rate": 3.2555737232323978e-06, - "loss": 0.9677, - "step": 3387 - }, - { - "epoch": 0.3055417775172476, - "grad_norm": 1.4015496856205714, - "learning_rate": 3.255118928714552e-06, - "loss": 1.0259, - "step": 3388 - }, - { - "epoch": 0.3056319610407179, - "grad_norm": 4.3619705184926705, - "learning_rate": 3.2546640271030386e-06, - "loss": 0.9285, - "step": 3389 - }, - { - "epoch": 0.3057221445641881, - "grad_norm": 0.6430396419855717, - "learning_rate": 3.2542090184366717e-06, - "loss": 0.8814, - "step": 3390 - }, - { - "epoch": 0.3058123280876584, - "grad_norm": 2.221200905656072, - "learning_rate": 3.253753902754276e-06, - "loss": 1.0682, - "step": 3391 - }, - { - "epoch": 0.30590251161112864, - "grad_norm": 9.877757013521038, - "learning_rate": 3.253298680094685e-06, - "loss": 0.933, - "step": 3392 - }, - { - "epoch": 0.3059926951345989, - "grad_norm": 1.7008087786793349, - "learning_rate": 3.2528433504967394e-06, - "loss": 0.9405, - "step": 3393 - }, - { - "epoch": 0.30608287865806916, - "grad_norm": 1.5782039358207676, - "learning_rate": 3.252387913999291e-06, - "loss": 1.0045, - "step": 3394 - }, - { - "epoch": 0.30617306218153945, - "grad_norm": 1.483136272556373, - "learning_rate": 3.2519323706411998e-06, - "loss": 0.9993, - "step": 3395 - }, - { - "epoch": 0.3062632457050097, - "grad_norm": 2.379852056793729, - "learning_rate": 3.251476720461336e-06, - "loss": 0.8848, - "step": 3396 - }, - { - "epoch": 0.30635342922847997, - "grad_norm": 1.5635067557398468, - "learning_rate": 3.251020963498578e-06, - "loss": 0.9193, - "step": 3397 - }, - { - "epoch": 0.3064436127519502, - "grad_norm": 1.4247641920436123, - "learning_rate": 3.250565099791813e-06, - "loss": 1.0407, - "step": 3398 - }, - { - "epoch": 0.3065337962754205, - "grad_norm": 1.2916004512591148, - "learning_rate": 3.2501091293799387e-06, - "loss": 1.0451, - "step": 3399 - }, - { - "epoch": 0.3066239797988907, - "grad_norm": 1.5074679915033369, - "learning_rate": 3.24965305230186e-06, - "loss": 1.0124, - "step": 3400 - }, - { - "epoch": 0.306714163322361, - "grad_norm": 2.5428400197531795, - "learning_rate": 3.249196868596492e-06, - "loss": 1.0686, - "step": 3401 - }, - { - "epoch": 0.30680434684583124, - "grad_norm": 1.5893916206525878, - "learning_rate": 3.24874057830276e-06, - "loss": 0.9235, - "step": 3402 - }, - { - "epoch": 0.30689453036930153, - "grad_norm": 1.8112461751893534, - "learning_rate": 3.2482841814595954e-06, - "loss": 0.9256, - "step": 3403 - }, - { - "epoch": 0.30698471389277177, - "grad_norm": 2.1085700865891543, - "learning_rate": 3.247827678105943e-06, - "loss": 0.9332, - "step": 3404 - }, - { - "epoch": 0.30707489741624205, - "grad_norm": 2.4785725712528164, - "learning_rate": 3.247371068280751e-06, - "loss": 0.9878, - "step": 3405 - }, - { - "epoch": 0.3071650809397123, - "grad_norm": 1.252951522957992, - "learning_rate": 3.2469143520229823e-06, - "loss": 0.8571, - "step": 3406 - }, - { - "epoch": 0.3072552644631826, - "grad_norm": 1.433936803034372, - "learning_rate": 3.2464575293716054e-06, - "loss": 0.9553, - "step": 3407 - }, - { - "epoch": 0.30734544798665286, - "grad_norm": 1.8251797025859036, - "learning_rate": 3.2460006003655997e-06, - "loss": 0.9312, - "step": 3408 - }, - { - "epoch": 0.3074356315101231, - "grad_norm": 1.8837014153688405, - "learning_rate": 3.245543565043952e-06, - "loss": 1.0534, - "step": 3409 - }, - { - "epoch": 0.3075258150335934, - "grad_norm": 1.4248346400103034, - "learning_rate": 3.2450864234456592e-06, - "loss": 0.9816, - "step": 3410 - }, - { - "epoch": 0.3076159985570636, - "grad_norm": 2.2081120098216713, - "learning_rate": 3.244629175609728e-06, - "loss": 0.9875, - "step": 3411 - }, - { - "epoch": 0.3077061820805339, - "grad_norm": 1.4392271860927575, - "learning_rate": 3.2441718215751726e-06, - "loss": 0.9799, - "step": 3412 - }, - { - "epoch": 0.30779636560400414, - "grad_norm": 1.774217341702573, - "learning_rate": 3.2437143613810173e-06, - "loss": 0.9545, - "step": 3413 - }, - { - "epoch": 0.30788654912747443, - "grad_norm": 1.430318140906824, - "learning_rate": 3.2432567950662947e-06, - "loss": 1.0327, - "step": 3414 - }, - { - "epoch": 0.30797673265094466, - "grad_norm": 1.3400737242558858, - "learning_rate": 3.2427991226700468e-06, - "loss": 0.9836, - "step": 3415 - }, - { - "epoch": 0.30806691617441495, - "grad_norm": 1.4753458167259172, - "learning_rate": 3.2423413442313246e-06, - "loss": 0.9216, - "step": 3416 - }, - { - "epoch": 0.3081570996978852, - "grad_norm": 1.4616570154362225, - "learning_rate": 3.2418834597891904e-06, - "loss": 0.912, - "step": 3417 - }, - { - "epoch": 0.3082472832213555, - "grad_norm": 1.807963976370355, - "learning_rate": 3.2414254693827098e-06, - "loss": 1.0747, - "step": 3418 - }, - { - "epoch": 0.3083374667448257, - "grad_norm": 1.3824778868577814, - "learning_rate": 3.2409673730509644e-06, - "loss": 0.9807, - "step": 3419 - }, - { - "epoch": 0.308427650268296, - "grad_norm": 1.6577384306587792, - "learning_rate": 3.2405091708330393e-06, - "loss": 0.9698, - "step": 3420 - }, - { - "epoch": 0.3085178337917662, - "grad_norm": 1.5093306572444454, - "learning_rate": 3.2400508627680323e-06, - "loss": 0.9844, - "step": 3421 - }, - { - "epoch": 0.3086080173152365, - "grad_norm": 1.602410812871658, - "learning_rate": 3.2395924488950474e-06, - "loss": 1.0835, - "step": 3422 - }, - { - "epoch": 0.30869820083870675, - "grad_norm": 2.1610098009571272, - "learning_rate": 3.2391339292532004e-06, - "loss": 1.1176, - "step": 3423 - }, - { - "epoch": 0.30878838436217704, - "grad_norm": 1.9129869825894374, - "learning_rate": 3.238675303881614e-06, - "loss": 0.9392, - "step": 3424 - }, - { - "epoch": 0.30887856788564727, - "grad_norm": 1.484296455312967, - "learning_rate": 3.2382165728194203e-06, - "loss": 0.9596, - "step": 3425 - }, - { - "epoch": 0.30896875140911756, - "grad_norm": 1.9381326442396765, - "learning_rate": 3.237757736105761e-06, - "loss": 0.9795, - "step": 3426 - }, - { - "epoch": 0.3090589349325878, - "grad_norm": 1.3562779278670445, - "learning_rate": 3.2372987937797867e-06, - "loss": 1.0653, - "step": 3427 - }, - { - "epoch": 0.3091491184560581, - "grad_norm": 1.2897877820243537, - "learning_rate": 3.2368397458806573e-06, - "loss": 0.8984, - "step": 3428 - }, - { - "epoch": 0.3092393019795283, - "grad_norm": 3.142050803771162, - "learning_rate": 3.2363805924475412e-06, - "loss": 1.0212, - "step": 3429 - }, - { - "epoch": 0.3093294855029986, - "grad_norm": 1.7689034735748226, - "learning_rate": 3.2359213335196153e-06, - "loss": 0.941, - "step": 3430 - }, - { - "epoch": 0.3094196690264689, - "grad_norm": 1.472128047370202, - "learning_rate": 3.2354619691360663e-06, - "loss": 0.9451, - "step": 3431 - }, - { - "epoch": 0.3095098525499391, - "grad_norm": 1.2952475390003166, - "learning_rate": 3.2350024993360898e-06, - "loss": 1.0312, - "step": 3432 - }, - { - "epoch": 0.3096000360734094, - "grad_norm": 1.412064421005472, - "learning_rate": 3.2345429241588902e-06, - "loss": 0.9928, - "step": 3433 - }, - { - "epoch": 0.30969021959687965, - "grad_norm": 1.8336454328121086, - "learning_rate": 3.234083243643681e-06, - "loss": 1.0163, - "step": 3434 - }, - { - "epoch": 0.30978040312034993, - "grad_norm": 1.3002301672603984, - "learning_rate": 3.233623457829686e-06, - "loss": 1.0667, - "step": 3435 - }, - { - "epoch": 0.30987058664382017, - "grad_norm": 2.7670962628272773, - "learning_rate": 3.2331635667561344e-06, - "loss": 0.963, - "step": 3436 - }, - { - "epoch": 0.30996077016729046, - "grad_norm": 1.6418235397261043, - "learning_rate": 3.2327035704622674e-06, - "loss": 1.0113, - "step": 3437 - }, - { - "epoch": 0.3100509536907607, - "grad_norm": 1.306033006011503, - "learning_rate": 3.2322434689873353e-06, - "loss": 0.89, - "step": 3438 - }, - { - "epoch": 0.310141137214231, - "grad_norm": 2.0696473402653957, - "learning_rate": 3.2317832623705957e-06, - "loss": 1.0736, - "step": 3439 - }, - { - "epoch": 0.3102313207377012, - "grad_norm": 1.3893034581183565, - "learning_rate": 3.231322950651316e-06, - "loss": 0.9362, - "step": 3440 - }, - { - "epoch": 0.3103215042611715, - "grad_norm": 3.0841048194794904, - "learning_rate": 3.2308625338687735e-06, - "loss": 1.0112, - "step": 3441 - }, - { - "epoch": 0.31041168778464173, - "grad_norm": 3.1188362369578537, - "learning_rate": 3.230402012062252e-06, - "loss": 1.0999, - "step": 3442 - }, - { - "epoch": 0.310501871308112, - "grad_norm": 0.637995837493998, - "learning_rate": 3.2299413852710466e-06, - "loss": 0.8761, - "step": 3443 - }, - { - "epoch": 0.31059205483158225, - "grad_norm": 1.4142723981497387, - "learning_rate": 3.2294806535344606e-06, - "loss": 0.9532, - "step": 3444 - }, - { - "epoch": 0.31068223835505254, - "grad_norm": 1.9388491947418633, - "learning_rate": 3.2290198168918056e-06, - "loss": 1.0316, - "step": 3445 - }, - { - "epoch": 0.3107724218785228, - "grad_norm": 1.6718613098154442, - "learning_rate": 3.2285588753824035e-06, - "loss": 1.0327, - "step": 3446 - }, - { - "epoch": 0.31086260540199306, - "grad_norm": 1.4850232766635902, - "learning_rate": 3.228097829045584e-06, - "loss": 1.0055, - "step": 3447 - }, - { - "epoch": 0.3109527889254633, - "grad_norm": 1.2993994811349554, - "learning_rate": 3.227636677920685e-06, - "loss": 1.0251, - "step": 3448 - }, - { - "epoch": 0.3110429724489336, - "grad_norm": 1.4383733004030128, - "learning_rate": 3.2271754220470567e-06, - "loss": 1.0364, - "step": 3449 - }, - { - "epoch": 0.3111331559724038, - "grad_norm": 1.757445436935711, - "learning_rate": 3.2267140614640547e-06, - "loss": 1.0782, - "step": 3450 - }, - { - "epoch": 0.3112233394958741, - "grad_norm": 1.7538038194137782, - "learning_rate": 3.2262525962110445e-06, - "loss": 1.028, - "step": 3451 - }, - { - "epoch": 0.31131352301934434, - "grad_norm": 1.4911227596770429, - "learning_rate": 3.2257910263274015e-06, - "loss": 0.9167, - "step": 3452 - }, - { - "epoch": 0.31140370654281463, - "grad_norm": 1.6515900175604559, - "learning_rate": 3.225329351852509e-06, - "loss": 0.9365, - "step": 3453 - }, - { - "epoch": 0.3114938900662849, - "grad_norm": 1.6765961344136842, - "learning_rate": 3.2248675728257596e-06, - "loss": 1.0027, - "step": 3454 - }, - { - "epoch": 0.31158407358975515, - "grad_norm": 2.079060508856658, - "learning_rate": 3.2244056892865557e-06, - "loss": 1.0632, - "step": 3455 - }, - { - "epoch": 0.31167425711322544, - "grad_norm": 1.8067291732945325, - "learning_rate": 3.2239437012743063e-06, - "loss": 0.9792, - "step": 3456 - }, - { - "epoch": 0.31176444063669567, - "grad_norm": 1.2767087425406485, - "learning_rate": 3.223481608828432e-06, - "loss": 1.0125, - "step": 3457 - }, - { - "epoch": 0.31185462416016596, - "grad_norm": 6.763182589055996, - "learning_rate": 3.223019411988361e-06, - "loss": 1.0153, - "step": 3458 - }, - { - "epoch": 0.3119448076836362, - "grad_norm": 1.9464747622147494, - "learning_rate": 3.22255711079353e-06, - "loss": 0.9952, - "step": 3459 - }, - { - "epoch": 0.3120349912071065, - "grad_norm": 1.7743660264764134, - "learning_rate": 3.222094705283385e-06, - "loss": 0.9568, - "step": 3460 - }, - { - "epoch": 0.3121251747305767, - "grad_norm": 1.519086208727437, - "learning_rate": 3.2216321954973805e-06, - "loss": 0.9877, - "step": 3461 - }, - { - "epoch": 0.312215358254047, - "grad_norm": 1.9776508817730223, - "learning_rate": 3.2211695814749816e-06, - "loss": 0.9854, - "step": 3462 - }, - { - "epoch": 0.31230554177751724, - "grad_norm": 2.5357551513848215, - "learning_rate": 3.220706863255661e-06, - "loss": 1.0902, - "step": 3463 - }, - { - "epoch": 0.3123957253009875, - "grad_norm": 1.60909513243722, - "learning_rate": 3.2202440408788994e-06, - "loss": 0.9863, - "step": 3464 - }, - { - "epoch": 0.31248590882445776, - "grad_norm": 1.7819788596058466, - "learning_rate": 3.2197811143841883e-06, - "loss": 1.0609, - "step": 3465 - }, - { - "epoch": 0.31257609234792805, - "grad_norm": 1.24724078824206, - "learning_rate": 3.2193180838110267e-06, - "loss": 0.9872, - "step": 3466 - }, - { - "epoch": 0.3126662758713983, - "grad_norm": 1.8416335679729268, - "learning_rate": 3.2188549491989225e-06, - "loss": 0.8859, - "step": 3467 - }, - { - "epoch": 0.31275645939486857, - "grad_norm": 1.710837095615408, - "learning_rate": 3.2183917105873934e-06, - "loss": 1.0223, - "step": 3468 - }, - { - "epoch": 0.3128466429183388, - "grad_norm": 1.6087764626959329, - "learning_rate": 3.217928368015966e-06, - "loss": 1.0514, - "step": 3469 - }, - { - "epoch": 0.3129368264418091, - "grad_norm": 1.375145904429282, - "learning_rate": 3.217464921524174e-06, - "loss": 1.0177, - "step": 3470 - }, - { - "epoch": 0.3130270099652793, - "grad_norm": 1.4677433053970028, - "learning_rate": 3.2170013711515616e-06, - "loss": 1.0082, - "step": 3471 - }, - { - "epoch": 0.3131171934887496, - "grad_norm": 6.285876643573484, - "learning_rate": 3.216537716937682e-06, - "loss": 0.9117, - "step": 3472 - }, - { - "epoch": 0.31320737701221985, - "grad_norm": 2.036145926966159, - "learning_rate": 3.2160739589220968e-06, - "loss": 0.9651, - "step": 3473 - }, - { - "epoch": 0.31329756053569013, - "grad_norm": 1.3673365048545785, - "learning_rate": 3.215610097144376e-06, - "loss": 1.0426, - "step": 3474 - }, - { - "epoch": 0.31338774405916037, - "grad_norm": 2.6446866732161873, - "learning_rate": 3.215146131644099e-06, - "loss": 0.9438, - "step": 3475 - }, - { - "epoch": 0.31347792758263066, - "grad_norm": 1.309097056603206, - "learning_rate": 3.214682062460854e-06, - "loss": 1.0705, - "step": 3476 - }, - { - "epoch": 0.3135681111061009, - "grad_norm": 1.7196026161147326, - "learning_rate": 3.2142178896342367e-06, - "loss": 0.9127, - "step": 3477 - }, - { - "epoch": 0.3136582946295712, - "grad_norm": 1.337546110695285, - "learning_rate": 3.2137536132038552e-06, - "loss": 0.9768, - "step": 3478 - }, - { - "epoch": 0.31374847815304147, - "grad_norm": 2.275572727758629, - "learning_rate": 3.2132892332093226e-06, - "loss": 0.9906, - "step": 3479 - }, - { - "epoch": 0.3138386616765117, - "grad_norm": 2.718022830224452, - "learning_rate": 3.2128247496902623e-06, - "loss": 1.0952, - "step": 3480 - }, - { - "epoch": 0.313928845199982, - "grad_norm": 1.2306959638772872, - "learning_rate": 3.2123601626863064e-06, - "loss": 1.0131, - "step": 3481 - }, - { - "epoch": 0.3140190287234522, - "grad_norm": 0.618842910765262, - "learning_rate": 3.2118954722370974e-06, - "loss": 0.8083, - "step": 3482 - }, - { - "epoch": 0.3141092122469225, - "grad_norm": 2.359169957428037, - "learning_rate": 3.2114306783822837e-06, - "loss": 0.9422, - "step": 3483 - }, - { - "epoch": 0.31419939577039274, - "grad_norm": 1.3655090492309065, - "learning_rate": 3.210965781161525e-06, - "loss": 0.9686, - "step": 3484 - }, - { - "epoch": 0.31428957929386303, - "grad_norm": 1.3865331391240825, - "learning_rate": 3.2105007806144892e-06, - "loss": 1.1131, - "step": 3485 - }, - { - "epoch": 0.31437976281733326, - "grad_norm": 1.6737730713807562, - "learning_rate": 3.2100356767808513e-06, - "loss": 0.9958, - "step": 3486 - }, - { - "epoch": 0.31446994634080355, - "grad_norm": 1.7794062793315186, - "learning_rate": 3.2095704697002977e-06, - "loss": 1.0491, - "step": 3487 - }, - { - "epoch": 0.3145601298642738, - "grad_norm": 1.8570240449103625, - "learning_rate": 3.209105159412522e-06, - "loss": 0.9974, - "step": 3488 - }, - { - "epoch": 0.3146503133877441, - "grad_norm": 1.85636555732646, - "learning_rate": 3.208639745957228e-06, - "loss": 0.955, - "step": 3489 - }, - { - "epoch": 0.3147404969112143, - "grad_norm": 1.3483456382535781, - "learning_rate": 3.2081742293741256e-06, - "loss": 1.014, - "step": 3490 - }, - { - "epoch": 0.3148306804346846, - "grad_norm": 1.2690635876156586, - "learning_rate": 3.2077086097029366e-06, - "loss": 1.0502, - "step": 3491 - }, - { - "epoch": 0.31492086395815483, - "grad_norm": 1.4441051492210795, - "learning_rate": 3.2072428869833895e-06, - "loss": 0.9958, - "step": 3492 - }, - { - "epoch": 0.3150110474816251, - "grad_norm": 1.3932168359894128, - "learning_rate": 3.206777061255223e-06, - "loss": 1.007, - "step": 3493 - }, - { - "epoch": 0.31510123100509535, - "grad_norm": 1.7864566799263675, - "learning_rate": 3.206311132558183e-06, - "loss": 1.0347, - "step": 3494 - }, - { - "epoch": 0.31519141452856564, - "grad_norm": 2.259920607834939, - "learning_rate": 3.205845100932026e-06, - "loss": 1.0042, - "step": 3495 - }, - { - "epoch": 0.31528159805203587, - "grad_norm": 1.5551707987802839, - "learning_rate": 3.205378966416516e-06, - "loss": 1.0233, - "step": 3496 - }, - { - "epoch": 0.31537178157550616, - "grad_norm": 2.73494788571767, - "learning_rate": 3.204912729051426e-06, - "loss": 0.9941, - "step": 3497 - }, - { - "epoch": 0.3154619650989764, - "grad_norm": 1.630717416398309, - "learning_rate": 3.2044463888765384e-06, - "loss": 0.9769, - "step": 3498 - }, - { - "epoch": 0.3155521486224467, - "grad_norm": 2.0043294062006853, - "learning_rate": 3.2039799459316436e-06, - "loss": 0.8982, - "step": 3499 - }, - { - "epoch": 0.3156423321459169, - "grad_norm": 1.5558763875182033, - "learning_rate": 3.2035134002565407e-06, - "loss": 1.0316, - "step": 3500 - }, - { - "epoch": 0.3157325156693872, - "grad_norm": 1.6051656246589228, - "learning_rate": 3.203046751891039e-06, - "loss": 1.0023, - "step": 3501 - }, - { - "epoch": 0.3158226991928575, - "grad_norm": 3.266059226046275, - "learning_rate": 3.2025800008749545e-06, - "loss": 1.0132, - "step": 3502 - }, - { - "epoch": 0.3159128827163277, - "grad_norm": 1.6773773630627171, - "learning_rate": 3.202113147248114e-06, - "loss": 0.9763, - "step": 3503 - }, - { - "epoch": 0.316003066239798, - "grad_norm": 1.8662288231743656, - "learning_rate": 3.20164619105035e-06, - "loss": 0.9718, - "step": 3504 - }, - { - "epoch": 0.31609324976326825, - "grad_norm": 1.5096422073433509, - "learning_rate": 3.201179132321508e-06, - "loss": 0.8484, - "step": 3505 - }, - { - "epoch": 0.31618343328673854, - "grad_norm": 1.5463965090124645, - "learning_rate": 3.200711971101439e-06, - "loss": 0.9373, - "step": 3506 - }, - { - "epoch": 0.31627361681020877, - "grad_norm": 1.7624816595976511, - "learning_rate": 3.2002447074300047e-06, - "loss": 1.0006, - "step": 3507 - }, - { - "epoch": 0.31636380033367906, - "grad_norm": 1.2415817991264688, - "learning_rate": 3.1997773413470736e-06, - "loss": 0.9942, - "step": 3508 - }, - { - "epoch": 0.3164539838571493, - "grad_norm": 1.8466012535605756, - "learning_rate": 3.199309872892524e-06, - "loss": 0.9864, - "step": 3509 - }, - { - "epoch": 0.3165441673806196, - "grad_norm": 1.7129959891785067, - "learning_rate": 3.198842302106243e-06, - "loss": 1.0434, - "step": 3510 - }, - { - "epoch": 0.3166343509040898, - "grad_norm": 1.6894260294069967, - "learning_rate": 3.1983746290281265e-06, - "loss": 0.9877, - "step": 3511 - }, - { - "epoch": 0.3167245344275601, - "grad_norm": 1.884939298385986, - "learning_rate": 3.197906853698079e-06, - "loss": 1.0005, - "step": 3512 - }, - { - "epoch": 0.31681471795103033, - "grad_norm": 15.300554372679708, - "learning_rate": 3.1974389761560137e-06, - "loss": 0.9427, - "step": 3513 - }, - { - "epoch": 0.3169049014745006, - "grad_norm": 1.750732064396709, - "learning_rate": 3.1969709964418525e-06, - "loss": 0.9476, - "step": 3514 - }, - { - "epoch": 0.31699508499797086, - "grad_norm": 1.6267771906253305, - "learning_rate": 3.196502914595525e-06, - "loss": 1.0076, - "step": 3515 - }, - { - "epoch": 0.31708526852144114, - "grad_norm": 1.6430194638307276, - "learning_rate": 3.1960347306569723e-06, - "loss": 1.02, - "step": 3516 - }, - { - "epoch": 0.3171754520449114, - "grad_norm": 1.6149003814821539, - "learning_rate": 3.195566444666141e-06, - "loss": 0.9459, - "step": 3517 - }, - { - "epoch": 0.31726563556838167, - "grad_norm": 1.3940949598995784, - "learning_rate": 3.1950980566629886e-06, - "loss": 1.1031, - "step": 3518 - }, - { - "epoch": 0.3173558190918519, - "grad_norm": 1.3853796712577533, - "learning_rate": 3.1946295666874797e-06, - "loss": 1.011, - "step": 3519 - }, - { - "epoch": 0.3174460026153222, - "grad_norm": 1.2934954167016826, - "learning_rate": 3.19416097477959e-06, - "loss": 1.0089, - "step": 3520 - }, - { - "epoch": 0.3175361861387924, - "grad_norm": 1.4142900989789273, - "learning_rate": 3.1936922809793005e-06, - "loss": 1.0587, - "step": 3521 - }, - { - "epoch": 0.3176263696622627, - "grad_norm": 1.3208868368886806, - "learning_rate": 3.193223485326604e-06, - "loss": 1.1195, - "step": 3522 - }, - { - "epoch": 0.31771655318573294, - "grad_norm": 1.9648188140075542, - "learning_rate": 3.1927545878615005e-06, - "loss": 1.0798, - "step": 3523 - }, - { - "epoch": 0.31780673670920323, - "grad_norm": 1.3943010669157259, - "learning_rate": 3.192285588623999e-06, - "loss": 0.9472, - "step": 3524 - }, - { - "epoch": 0.31789692023267346, - "grad_norm": 1.237153656013161, - "learning_rate": 3.191816487654117e-06, - "loss": 0.9294, - "step": 3525 - }, - { - "epoch": 0.31798710375614375, - "grad_norm": 1.7528454263566258, - "learning_rate": 3.19134728499188e-06, - "loss": 0.9654, - "step": 3526 - }, - { - "epoch": 0.31807728727961404, - "grad_norm": 1.9240464139919262, - "learning_rate": 3.1908779806773235e-06, - "loss": 0.8888, - "step": 3527 - }, - { - "epoch": 0.3181674708030843, - "grad_norm": 1.3818666919750087, - "learning_rate": 3.190408574750492e-06, - "loss": 1.0545, - "step": 3528 - }, - { - "epoch": 0.31825765432655456, - "grad_norm": 1.3369221767182082, - "learning_rate": 3.1899390672514367e-06, - "loss": 1.063, - "step": 3529 - }, - { - "epoch": 0.3183478378500248, - "grad_norm": 2.6626393384694698, - "learning_rate": 3.189469458220219e-06, - "loss": 0.9109, - "step": 3530 - }, - { - "epoch": 0.3184380213734951, - "grad_norm": 1.3329925648023317, - "learning_rate": 3.1889997476969086e-06, - "loss": 0.9916, - "step": 3531 - }, - { - "epoch": 0.3185282048969653, - "grad_norm": 1.5635611173507227, - "learning_rate": 3.188529935721583e-06, - "loss": 0.9818, - "step": 3532 - }, - { - "epoch": 0.3186183884204356, - "grad_norm": 1.3263191062763078, - "learning_rate": 3.18806002233433e-06, - "loss": 1.0552, - "step": 3533 - }, - { - "epoch": 0.31870857194390584, - "grad_norm": 1.7343227619174244, - "learning_rate": 3.187590007575245e-06, - "loss": 1.0623, - "step": 3534 - }, - { - "epoch": 0.3187987554673761, - "grad_norm": 1.458170018815355, - "learning_rate": 3.1871198914844327e-06, - "loss": 0.9307, - "step": 3535 - }, - { - "epoch": 0.31888893899084636, - "grad_norm": 1.8393589036958569, - "learning_rate": 3.1866496741020057e-06, - "loss": 0.9268, - "step": 3536 - }, - { - "epoch": 0.31897912251431665, - "grad_norm": 1.1030985086086007, - "learning_rate": 3.186179355468085e-06, - "loss": 0.9595, - "step": 3537 - }, - { - "epoch": 0.3190693060377869, - "grad_norm": 1.519465192521026, - "learning_rate": 3.1857089356228015e-06, - "loss": 0.9957, - "step": 3538 - }, - { - "epoch": 0.31915948956125717, - "grad_norm": 1.4411735863788406, - "learning_rate": 3.1852384146062933e-06, - "loss": 0.9387, - "step": 3539 - }, - { - "epoch": 0.3192496730847274, - "grad_norm": 0.5910125923961382, - "learning_rate": 3.184767792458708e-06, - "loss": 0.7987, - "step": 3540 - }, - { - "epoch": 0.3193398566081977, - "grad_norm": 1.5342785993387231, - "learning_rate": 3.1842970692202023e-06, - "loss": 1.1242, - "step": 3541 - }, - { - "epoch": 0.3194300401316679, - "grad_norm": 1.9022558924382813, - "learning_rate": 3.1838262449309403e-06, - "loss": 0.978, - "step": 3542 - }, - { - "epoch": 0.3195202236551382, - "grad_norm": 1.661947993367997, - "learning_rate": 3.1833553196310956e-06, - "loss": 0.9131, - "step": 3543 - }, - { - "epoch": 0.31961040717860845, - "grad_norm": 1.768305676190353, - "learning_rate": 3.18288429336085e-06, - "loss": 0.8599, - "step": 3544 - }, - { - "epoch": 0.31970059070207874, - "grad_norm": 1.675393795580542, - "learning_rate": 3.182413166160394e-06, - "loss": 1.1041, - "step": 3545 - }, - { - "epoch": 0.31979077422554897, - "grad_norm": 1.6665599153027608, - "learning_rate": 3.1819419380699275e-06, - "loss": 1.0401, - "step": 3546 - }, - { - "epoch": 0.31988095774901926, - "grad_norm": 1.3326578316604794, - "learning_rate": 3.181470609129658e-06, - "loss": 0.9941, - "step": 3547 - }, - { - "epoch": 0.3199711412724895, - "grad_norm": 1.9458669462166573, - "learning_rate": 3.1809991793798e-06, - "loss": 1.0514, - "step": 3548 - }, - { - "epoch": 0.3200613247959598, - "grad_norm": 1.6607749593875625, - "learning_rate": 3.1805276488605806e-06, - "loss": 0.954, - "step": 3549 - }, - { - "epoch": 0.32015150831943007, - "grad_norm": 1.3844124110830835, - "learning_rate": 3.1800560176122336e-06, - "loss": 0.9898, - "step": 3550 - }, - { - "epoch": 0.3202416918429003, - "grad_norm": 1.411071438098172, - "learning_rate": 3.179584285675e-06, - "loss": 1.0041, - "step": 3551 - }, - { - "epoch": 0.3203318753663706, - "grad_norm": 2.0353464927277085, - "learning_rate": 3.1791124530891315e-06, - "loss": 0.9036, - "step": 3552 - }, - { - "epoch": 0.3204220588898408, - "grad_norm": 1.7428500707447123, - "learning_rate": 3.178640519894886e-06, - "loss": 0.9914, - "step": 3553 - }, - { - "epoch": 0.3205122424133111, - "grad_norm": 1.6363043431174573, - "learning_rate": 3.1781684861325324e-06, - "loss": 0.9543, - "step": 3554 - }, - { - "epoch": 0.32060242593678134, - "grad_norm": 1.6312073479115072, - "learning_rate": 3.177696351842348e-06, - "loss": 0.9966, - "step": 3555 - }, - { - "epoch": 0.32069260946025163, - "grad_norm": 0.7169345650708032, - "learning_rate": 3.1772241170646167e-06, - "loss": 0.8498, - "step": 3556 - }, - { - "epoch": 0.32078279298372187, - "grad_norm": 2.236443475805099, - "learning_rate": 3.1767517818396334e-06, - "loss": 1.0969, - "step": 3557 - }, - { - "epoch": 0.32087297650719215, - "grad_norm": 1.416783954402632, - "learning_rate": 3.1762793462076986e-06, - "loss": 1.1456, - "step": 3558 - }, - { - "epoch": 0.3209631600306624, - "grad_norm": 2.0814196634873414, - "learning_rate": 3.1758068102091236e-06, - "loss": 0.8824, - "step": 3559 - }, - { - "epoch": 0.3210533435541327, - "grad_norm": 1.498645568339961, - "learning_rate": 3.175334173884229e-06, - "loss": 0.9589, - "step": 3560 - }, - { - "epoch": 0.3211435270776029, - "grad_norm": 1.811289415898114, - "learning_rate": 3.174861437273342e-06, - "loss": 0.9934, - "step": 3561 - }, - { - "epoch": 0.3212337106010732, - "grad_norm": 1.254469652848042, - "learning_rate": 3.174388600416799e-06, - "loss": 0.96, - "step": 3562 - }, - { - "epoch": 0.32132389412454343, - "grad_norm": 0.7150431703992899, - "learning_rate": 3.1739156633549445e-06, - "loss": 0.8478, - "step": 3563 - }, - { - "epoch": 0.3214140776480137, - "grad_norm": 1.6282576406268277, - "learning_rate": 3.173442626128133e-06, - "loss": 1.0524, - "step": 3564 - }, - { - "epoch": 0.32150426117148395, - "grad_norm": 1.965179111383829, - "learning_rate": 3.1729694887767265e-06, - "loss": 1.1051, - "step": 3565 - }, - { - "epoch": 0.32159444469495424, - "grad_norm": 1.50824687808343, - "learning_rate": 3.172496251341096e-06, - "loss": 0.9936, - "step": 3566 - }, - { - "epoch": 0.3216846282184245, - "grad_norm": 1.9505237193949951, - "learning_rate": 3.172022913861619e-06, - "loss": 0.8553, - "step": 3567 - }, - { - "epoch": 0.32177481174189476, - "grad_norm": 3.6955408908840788, - "learning_rate": 3.171549476378686e-06, - "loss": 0.9525, - "step": 3568 - }, - { - "epoch": 0.321864995265365, - "grad_norm": 1.8491880748736411, - "learning_rate": 3.1710759389326906e-06, - "loss": 0.962, - "step": 3569 - }, - { - "epoch": 0.3219551787888353, - "grad_norm": 2.0841620386502653, - "learning_rate": 3.1706023015640396e-06, - "loss": 0.9841, - "step": 3570 - }, - { - "epoch": 0.3220453623123055, - "grad_norm": 1.5497231513090088, - "learning_rate": 3.1701285643131453e-06, - "loss": 1.0197, - "step": 3571 - }, - { - "epoch": 0.3221355458357758, - "grad_norm": 1.6741393967200786, - "learning_rate": 3.16965472722043e-06, - "loss": 1.0048, - "step": 3572 - }, - { - "epoch": 0.32222572935924604, - "grad_norm": 2.617816450925015, - "learning_rate": 3.169180790326324e-06, - "loss": 0.9197, - "step": 3573 - }, - { - "epoch": 0.3223159128827163, - "grad_norm": 1.4370517861227323, - "learning_rate": 3.168706753671266e-06, - "loss": 1.0045, - "step": 3574 - }, - { - "epoch": 0.3224060964061866, - "grad_norm": 1.496976985125069, - "learning_rate": 3.168232617295704e-06, - "loss": 0.9296, - "step": 3575 - }, - { - "epoch": 0.32249627992965685, - "grad_norm": 1.3908974027114238, - "learning_rate": 3.167758381240093e-06, - "loss": 0.9944, - "step": 3576 - }, - { - "epoch": 0.32258646345312714, - "grad_norm": 1.4793681782032144, - "learning_rate": 3.1672840455448978e-06, - "loss": 1.036, - "step": 3577 - }, - { - "epoch": 0.32267664697659737, - "grad_norm": 1.6655785903047526, - "learning_rate": 3.166809610250592e-06, - "loss": 0.9376, - "step": 3578 - }, - { - "epoch": 0.32276683050006766, - "grad_norm": 1.4287097693625608, - "learning_rate": 3.166335075397656e-06, - "loss": 0.9251, - "step": 3579 - }, - { - "epoch": 0.3228570140235379, - "grad_norm": 1.4456272916532396, - "learning_rate": 3.1658604410265808e-06, - "loss": 0.9603, - "step": 3580 - }, - { - "epoch": 0.3229471975470082, - "grad_norm": 0.8689123303907713, - "learning_rate": 3.1653857071778644e-06, - "loss": 0.8649, - "step": 3581 - }, - { - "epoch": 0.3230373810704784, - "grad_norm": 1.583037407308375, - "learning_rate": 3.1649108738920133e-06, - "loss": 0.9893, - "step": 3582 - }, - { - "epoch": 0.3231275645939487, - "grad_norm": 1.9238407657421006, - "learning_rate": 3.1644359412095432e-06, - "loss": 1.0003, - "step": 3583 - }, - { - "epoch": 0.32321774811741893, - "grad_norm": 0.7690431625123764, - "learning_rate": 3.163960909170978e-06, - "loss": 0.8659, - "step": 3584 - }, - { - "epoch": 0.3233079316408892, - "grad_norm": 1.702970375251362, - "learning_rate": 3.1634857778168496e-06, - "loss": 0.8947, - "step": 3585 - }, - { - "epoch": 0.32339811516435946, - "grad_norm": 2.0097169860945043, - "learning_rate": 3.1630105471877002e-06, - "loss": 1.0317, - "step": 3586 - }, - { - "epoch": 0.32348829868782975, - "grad_norm": 1.7095589854308504, - "learning_rate": 3.162535217324077e-06, - "loss": 0.9758, - "step": 3587 - }, - { - "epoch": 0.3235784822113, - "grad_norm": 1.8765107108790569, - "learning_rate": 3.1620597882665393e-06, - "loss": 1.0252, - "step": 3588 - }, - { - "epoch": 0.32366866573477027, - "grad_norm": 2.0733206146111796, - "learning_rate": 3.1615842600556535e-06, - "loss": 1.0532, - "step": 3589 - }, - { - "epoch": 0.3237588492582405, - "grad_norm": 2.4732804075612354, - "learning_rate": 3.1611086327319932e-06, - "loss": 0.9885, - "step": 3590 - }, - { - "epoch": 0.3238490327817108, - "grad_norm": 1.7478207234121481, - "learning_rate": 3.160632906336142e-06, - "loss": 1.0538, - "step": 3591 - }, - { - "epoch": 0.323939216305181, - "grad_norm": 3.021114274915328, - "learning_rate": 3.160157080908692e-06, - "loss": 1.0173, - "step": 3592 - }, - { - "epoch": 0.3240293998286513, - "grad_norm": 1.2378854690689949, - "learning_rate": 3.1596811564902426e-06, - "loss": 1.0446, - "step": 3593 - }, - { - "epoch": 0.32411958335212154, - "grad_norm": 1.8898284747486866, - "learning_rate": 3.1592051331214023e-06, - "loss": 0.9318, - "step": 3594 - }, - { - "epoch": 0.32420976687559183, - "grad_norm": 1.6842872978973986, - "learning_rate": 3.158729010842789e-06, - "loss": 1.0196, - "step": 3595 - }, - { - "epoch": 0.32429995039906206, - "grad_norm": 1.3988063475003651, - "learning_rate": 3.1582527896950266e-06, - "loss": 1.0455, - "step": 3596 - }, - { - "epoch": 0.32439013392253235, - "grad_norm": 1.3674524976859084, - "learning_rate": 3.157776469718749e-06, - "loss": 1.0126, - "step": 3597 - }, - { - "epoch": 0.32448031744600264, - "grad_norm": 1.640757891404511, - "learning_rate": 3.1573000509546004e-06, - "loss": 0.9849, - "step": 3598 - }, - { - "epoch": 0.3245705009694729, - "grad_norm": 1.517522823155826, - "learning_rate": 3.1568235334432296e-06, - "loss": 1.0424, - "step": 3599 - }, - { - "epoch": 0.32466068449294316, - "grad_norm": 1.5015565425504542, - "learning_rate": 3.1563469172252964e-06, - "loss": 1.0587, - "step": 3600 - }, - { - "epoch": 0.3247508680164134, - "grad_norm": 1.837340123493915, - "learning_rate": 3.155870202341468e-06, - "loss": 1.0617, - "step": 3601 - }, - { - "epoch": 0.3248410515398837, - "grad_norm": 1.7543287554205276, - "learning_rate": 3.155393388832421e-06, - "loss": 0.989, - "step": 3602 - }, - { - "epoch": 0.3249312350633539, - "grad_norm": 1.4365290804401403, - "learning_rate": 3.1549164767388386e-06, - "loss": 1.0104, - "step": 3603 - }, - { - "epoch": 0.3250214185868242, - "grad_norm": 1.561154816461812, - "learning_rate": 3.1544394661014145e-06, - "loss": 0.9963, - "step": 3604 - }, - { - "epoch": 0.32511160211029444, - "grad_norm": 1.4666576497205355, - "learning_rate": 3.15396235696085e-06, - "loss": 0.9157, - "step": 3605 - }, - { - "epoch": 0.32520178563376473, - "grad_norm": 1.224060276196008, - "learning_rate": 3.153485149357854e-06, - "loss": 0.981, - "step": 3606 - }, - { - "epoch": 0.32529196915723496, - "grad_norm": 2.942168401736176, - "learning_rate": 3.153007843333145e-06, - "loss": 0.9823, - "step": 3607 - }, - { - "epoch": 0.32538215268070525, - "grad_norm": 2.226787569148165, - "learning_rate": 3.152530438927449e-06, - "loss": 1.0193, - "step": 3608 - }, - { - "epoch": 0.3254723362041755, - "grad_norm": 2.778025182726578, - "learning_rate": 3.1520529361815008e-06, - "loss": 1.0477, - "step": 3609 - }, - { - "epoch": 0.32556251972764577, - "grad_norm": 1.939839458115155, - "learning_rate": 3.151575335136044e-06, - "loss": 1.0311, - "step": 3610 - }, - { - "epoch": 0.325652703251116, - "grad_norm": 1.9099221427387696, - "learning_rate": 3.1510976358318298e-06, - "loss": 0.9782, - "step": 3611 - }, - { - "epoch": 0.3257428867745863, - "grad_norm": 1.430458819668947, - "learning_rate": 3.1506198383096186e-06, - "loss": 0.9446, - "step": 3612 - }, - { - "epoch": 0.3258330702980565, - "grad_norm": 1.5221012613093667, - "learning_rate": 3.150141942610178e-06, - "loss": 1.0092, - "step": 3613 - }, - { - "epoch": 0.3259232538215268, - "grad_norm": 0.661950534008245, - "learning_rate": 3.1496639487742853e-06, - "loss": 0.8454, - "step": 3614 - }, - { - "epoch": 0.32601343734499705, - "grad_norm": 1.6248107946725665, - "learning_rate": 3.1491858568427247e-06, - "loss": 1.0365, - "step": 3615 - }, - { - "epoch": 0.32610362086846734, - "grad_norm": 1.8986766648837416, - "learning_rate": 3.1487076668562903e-06, - "loss": 1.0425, - "step": 3616 - }, - { - "epoch": 0.32619380439193757, - "grad_norm": 1.6702956110472456, - "learning_rate": 3.1482293788557847e-06, - "loss": 1.0405, - "step": 3617 - }, - { - "epoch": 0.32628398791540786, - "grad_norm": 1.5530144205007252, - "learning_rate": 3.1477509928820165e-06, - "loss": 0.9702, - "step": 3618 - }, - { - "epoch": 0.3263741714388781, - "grad_norm": 1.4328610577358005, - "learning_rate": 3.147272508975805e-06, - "loss": 0.9972, - "step": 3619 - }, - { - "epoch": 0.3264643549623484, - "grad_norm": 0.6924549920642485, - "learning_rate": 3.1467939271779775e-06, - "loss": 0.8617, - "step": 3620 - }, - { - "epoch": 0.32655453848581867, - "grad_norm": 2.0820656034630667, - "learning_rate": 3.146315247529368e-06, - "loss": 0.9647, - "step": 3621 - }, - { - "epoch": 0.3266447220092889, - "grad_norm": 2.3118319190745686, - "learning_rate": 3.1458364700708212e-06, - "loss": 0.9561, - "step": 3622 - }, - { - "epoch": 0.3267349055327592, - "grad_norm": 1.3420818975646802, - "learning_rate": 3.1453575948431892e-06, - "loss": 1.0246, - "step": 3623 - }, - { - "epoch": 0.3268250890562294, - "grad_norm": 1.841112157216015, - "learning_rate": 3.144878621887331e-06, - "loss": 1.0015, - "step": 3624 - }, - { - "epoch": 0.3269152725796997, - "grad_norm": 1.8430302395008606, - "learning_rate": 3.1443995512441167e-06, - "loss": 1.0442, - "step": 3625 - }, - { - "epoch": 0.32700545610316994, - "grad_norm": 1.4078320399208009, - "learning_rate": 3.1439203829544224e-06, - "loss": 1.0703, - "step": 3626 - }, - { - "epoch": 0.32709563962664023, - "grad_norm": 1.4530018579952912, - "learning_rate": 3.143441117059133e-06, - "loss": 1.0819, - "step": 3627 - }, - { - "epoch": 0.32718582315011047, - "grad_norm": 1.6662636826018522, - "learning_rate": 3.142961753599143e-06, - "loss": 0.9511, - "step": 3628 - }, - { - "epoch": 0.32727600667358075, - "grad_norm": 1.396834835012691, - "learning_rate": 3.1424822926153543e-06, - "loss": 0.9684, - "step": 3629 - }, - { - "epoch": 0.327366190197051, - "grad_norm": 2.132661359972173, - "learning_rate": 3.142002734148676e-06, - "loss": 1.041, - "step": 3630 - }, - { - "epoch": 0.3274563737205213, - "grad_norm": 1.6331787428715825, - "learning_rate": 3.141523078240028e-06, - "loss": 1.0253, - "step": 3631 - }, - { - "epoch": 0.3275465572439915, - "grad_norm": 1.8241822853773204, - "learning_rate": 3.1410433249303366e-06, - "loss": 1.0518, - "step": 3632 - }, - { - "epoch": 0.3276367407674618, - "grad_norm": 1.6672434603479476, - "learning_rate": 3.1405634742605366e-06, - "loss": 1.0797, - "step": 3633 - }, - { - "epoch": 0.32772692429093203, - "grad_norm": 1.707856282251468, - "learning_rate": 3.1400835262715727e-06, - "loss": 1.0314, - "step": 3634 - }, - { - "epoch": 0.3278171078144023, - "grad_norm": 1.5001651355125452, - "learning_rate": 3.139603481004396e-06, - "loss": 0.9876, - "step": 3635 - }, - { - "epoch": 0.32790729133787255, - "grad_norm": 1.2995266749604317, - "learning_rate": 3.139123338499966e-06, - "loss": 1.0123, - "step": 3636 - }, - { - "epoch": 0.32799747486134284, - "grad_norm": 1.5966852870458004, - "learning_rate": 3.1386430987992524e-06, - "loss": 1.0358, - "step": 3637 - }, - { - "epoch": 0.3280876583848131, - "grad_norm": 1.783620695628253, - "learning_rate": 3.1381627619432307e-06, - "loss": 0.9468, - "step": 3638 - }, - { - "epoch": 0.32817784190828336, - "grad_norm": 2.2205772934611736, - "learning_rate": 3.1376823279728864e-06, - "loss": 1.0117, - "step": 3639 - }, - { - "epoch": 0.3282680254317536, - "grad_norm": 1.6073216051274775, - "learning_rate": 3.1372017969292125e-06, - "loss": 1.0829, - "step": 3640 - }, - { - "epoch": 0.3283582089552239, - "grad_norm": 1.5960788541258375, - "learning_rate": 3.136721168853211e-06, - "loss": 0.988, - "step": 3641 - }, - { - "epoch": 0.3284483924786941, - "grad_norm": 1.665615592772523, - "learning_rate": 3.1362404437858924e-06, - "loss": 0.9653, - "step": 3642 - }, - { - "epoch": 0.3285385760021644, - "grad_norm": 1.5264131096138986, - "learning_rate": 3.135759621768273e-06, - "loss": 1.0625, - "step": 3643 - }, - { - "epoch": 0.32862875952563464, - "grad_norm": 1.545007908254587, - "learning_rate": 3.13527870284138e-06, - "loss": 1.0478, - "step": 3644 - }, - { - "epoch": 0.32871894304910493, - "grad_norm": 1.7280715630600734, - "learning_rate": 3.134797687046249e-06, - "loss": 1.058, - "step": 3645 - }, - { - "epoch": 0.3288091265725752, - "grad_norm": 2.0410000887192776, - "learning_rate": 3.1343165744239218e-06, - "loss": 0.935, - "step": 3646 - }, - { - "epoch": 0.32889931009604545, - "grad_norm": 1.5801000793968825, - "learning_rate": 3.13383536501545e-06, - "loss": 1.016, - "step": 3647 - }, - { - "epoch": 0.32898949361951574, - "grad_norm": 1.2670112835412515, - "learning_rate": 3.133354058861893e-06, - "loss": 1.0275, - "step": 3648 - }, - { - "epoch": 0.32907967714298597, - "grad_norm": 1.8003624948433983, - "learning_rate": 3.132872656004318e-06, - "loss": 0.9672, - "step": 3649 - }, - { - "epoch": 0.32916986066645626, - "grad_norm": 1.4830232589039847, - "learning_rate": 3.132391156483802e-06, - "loss": 0.9008, - "step": 3650 - }, - { - "epoch": 0.3292600441899265, - "grad_norm": 1.4784556367866737, - "learning_rate": 3.131909560341428e-06, - "loss": 0.9562, - "step": 3651 - }, - { - "epoch": 0.3293502277133968, - "grad_norm": 1.791257153514732, - "learning_rate": 3.1314278676182893e-06, - "loss": 0.9473, - "step": 3652 - }, - { - "epoch": 0.329440411236867, - "grad_norm": 1.4190095806860274, - "learning_rate": 3.130946078355486e-06, - "loss": 1.0744, - "step": 3653 - }, - { - "epoch": 0.3295305947603373, - "grad_norm": 1.456297006789777, - "learning_rate": 3.130464192594128e-06, - "loss": 1.0361, - "step": 3654 - }, - { - "epoch": 0.32962077828380754, - "grad_norm": 1.40131295660801, - "learning_rate": 3.1299822103753315e-06, - "loss": 0.9213, - "step": 3655 - }, - { - "epoch": 0.3297109618072778, - "grad_norm": 1.5956843735749908, - "learning_rate": 3.1295001317402217e-06, - "loss": 1.0002, - "step": 3656 - }, - { - "epoch": 0.32980114533074806, - "grad_norm": 1.863047587140673, - "learning_rate": 3.1290179567299335e-06, - "loss": 0.9403, - "step": 3657 - }, - { - "epoch": 0.32989132885421835, - "grad_norm": 1.2913547826389424, - "learning_rate": 3.128535685385607e-06, - "loss": 1.0292, - "step": 3658 - }, - { - "epoch": 0.3299815123776886, - "grad_norm": 2.163994743305527, - "learning_rate": 3.1280533177483935e-06, - "loss": 1.0844, - "step": 3659 - }, - { - "epoch": 0.33007169590115887, - "grad_norm": 1.4454066890233987, - "learning_rate": 3.127570853859451e-06, - "loss": 1.0156, - "step": 3660 - }, - { - "epoch": 0.3301618794246291, - "grad_norm": 1.5458840943645253, - "learning_rate": 3.1270882937599456e-06, - "loss": 0.8894, - "step": 3661 - }, - { - "epoch": 0.3302520629480994, - "grad_norm": 1.8583714958799866, - "learning_rate": 3.1266056374910532e-06, - "loss": 1.0243, - "step": 3662 - }, - { - "epoch": 0.3303422464715696, - "grad_norm": 3.2112398399706077, - "learning_rate": 3.126122885093955e-06, - "loss": 0.8969, - "step": 3663 - }, - { - "epoch": 0.3304324299950399, - "grad_norm": 1.8664746060793755, - "learning_rate": 3.1256400366098427e-06, - "loss": 0.9913, - "step": 3664 - }, - { - "epoch": 0.33052261351851014, - "grad_norm": 2.371538651532821, - "learning_rate": 3.125157092079916e-06, - "loss": 0.9616, - "step": 3665 - }, - { - "epoch": 0.33061279704198043, - "grad_norm": 1.4582162900913376, - "learning_rate": 3.1246740515453824e-06, - "loss": 0.9725, - "step": 3666 - }, - { - "epoch": 0.33070298056545067, - "grad_norm": 1.3560916228046385, - "learning_rate": 3.124190915047457e-06, - "loss": 0.9378, - "step": 3667 - }, - { - "epoch": 0.33079316408892095, - "grad_norm": 1.548711514983884, - "learning_rate": 3.123707682627364e-06, - "loss": 0.972, - "step": 3668 - }, - { - "epoch": 0.33088334761239124, - "grad_norm": 1.5042671860405288, - "learning_rate": 3.1232243543263356e-06, - "loss": 0.9787, - "step": 3669 - }, - { - "epoch": 0.3309735311358615, - "grad_norm": 1.729952354105799, - "learning_rate": 3.1227409301856122e-06, - "loss": 0.9097, - "step": 3670 - }, - { - "epoch": 0.33106371465933176, - "grad_norm": 1.3554612305182503, - "learning_rate": 3.1222574102464413e-06, - "loss": 0.9812, - "step": 3671 - }, - { - "epoch": 0.331153898182802, - "grad_norm": 1.7248654548032034, - "learning_rate": 3.12177379455008e-06, - "loss": 1.0318, - "step": 3672 - }, - { - "epoch": 0.3312440817062723, - "grad_norm": 1.395785795892128, - "learning_rate": 3.121290083137794e-06, - "loss": 0.9928, - "step": 3673 - }, - { - "epoch": 0.3313342652297425, - "grad_norm": 1.8611241295898502, - "learning_rate": 3.1208062760508547e-06, - "loss": 0.9738, - "step": 3674 - }, - { - "epoch": 0.3314244487532128, - "grad_norm": 1.3092449788289922, - "learning_rate": 3.1203223733305438e-06, - "loss": 0.9398, - "step": 3675 - }, - { - "epoch": 0.33151463227668304, - "grad_norm": 0.6533053500482078, - "learning_rate": 3.1198383750181512e-06, - "loss": 0.8219, - "step": 3676 - }, - { - "epoch": 0.33160481580015333, - "grad_norm": 1.4007474266517446, - "learning_rate": 3.1193542811549734e-06, - "loss": 0.9256, - "step": 3677 - }, - { - "epoch": 0.33169499932362356, - "grad_norm": 8.528765753371287, - "learning_rate": 3.1188700917823166e-06, - "loss": 0.9209, - "step": 3678 - }, - { - "epoch": 0.33178518284709385, - "grad_norm": 0.6782457912116225, - "learning_rate": 3.1183858069414937e-06, - "loss": 0.8797, - "step": 3679 - }, - { - "epoch": 0.3318753663705641, - "grad_norm": 1.780401127976056, - "learning_rate": 3.117901426673827e-06, - "loss": 1.0559, - "step": 3680 - }, - { - "epoch": 0.3319655498940344, - "grad_norm": 1.808195593491282, - "learning_rate": 3.1174169510206466e-06, - "loss": 0.9607, - "step": 3681 - }, - { - "epoch": 0.3320557334175046, - "grad_norm": 1.60227206259049, - "learning_rate": 3.1169323800232908e-06, - "loss": 1.0024, - "step": 3682 - }, - { - "epoch": 0.3321459169409749, - "grad_norm": 1.5780178071432702, - "learning_rate": 3.1164477137231054e-06, - "loss": 1.029, - "step": 3683 - }, - { - "epoch": 0.3322361004644451, - "grad_norm": 1.6914393045133667, - "learning_rate": 3.115962952161445e-06, - "loss": 1.0008, - "step": 3684 - }, - { - "epoch": 0.3323262839879154, - "grad_norm": 1.8611131766135425, - "learning_rate": 3.1154780953796727e-06, - "loss": 1.033, - "step": 3685 - }, - { - "epoch": 0.33241646751138565, - "grad_norm": 1.5145429701151418, - "learning_rate": 3.114993143419158e-06, - "loss": 0.906, - "step": 3686 - }, - { - "epoch": 0.33250665103485594, - "grad_norm": 1.2147360014892985, - "learning_rate": 3.1145080963212806e-06, - "loss": 0.9038, - "step": 3687 - }, - { - "epoch": 0.33259683455832617, - "grad_norm": 1.2927220964992692, - "learning_rate": 3.114022954127427e-06, - "loss": 1.0029, - "step": 3688 - }, - { - "epoch": 0.33268701808179646, - "grad_norm": 1.687445180497019, - "learning_rate": 3.1135377168789923e-06, - "loss": 1.1982, - "step": 3689 - }, - { - "epoch": 0.3327772016052667, - "grad_norm": 1.584893921512383, - "learning_rate": 3.1130523846173803e-06, - "loss": 1.0573, - "step": 3690 - }, - { - "epoch": 0.332867385128737, - "grad_norm": 1.9565987784887122, - "learning_rate": 3.1125669573840006e-06, - "loss": 0.9472, - "step": 3691 - }, - { - "epoch": 0.3329575686522072, - "grad_norm": 1.7244241167838898, - "learning_rate": 3.112081435220274e-06, - "loss": 0.9427, - "step": 3692 - }, - { - "epoch": 0.3330477521756775, - "grad_norm": 1.4170202861079495, - "learning_rate": 3.111595818167627e-06, - "loss": 0.9829, - "step": 3693 - }, - { - "epoch": 0.3331379356991478, - "grad_norm": 1.8803285540947343, - "learning_rate": 3.1111101062674953e-06, - "loss": 1.0488, - "step": 3694 - }, - { - "epoch": 0.333228119222618, - "grad_norm": 1.3296710272193366, - "learning_rate": 3.1106242995613233e-06, - "loss": 0.9808, - "step": 3695 - }, - { - "epoch": 0.3333183027460883, - "grad_norm": 1.7281354410639036, - "learning_rate": 3.1101383980905616e-06, - "loss": 0.9916, - "step": 3696 - }, - { - "epoch": 0.33340848626955855, - "grad_norm": 0.6517541928143652, - "learning_rate": 3.109652401896671e-06, - "loss": 0.8171, - "step": 3697 - }, - { - "epoch": 0.33349866979302883, - "grad_norm": 1.422067650120688, - "learning_rate": 3.109166311021119e-06, - "loss": 1.0439, - "step": 3698 - }, - { - "epoch": 0.33358885331649907, - "grad_norm": 1.8412660580185398, - "learning_rate": 3.1086801255053807e-06, - "loss": 1.0433, - "step": 3699 - }, - { - "epoch": 0.33367903683996936, - "grad_norm": 1.521104248512218, - "learning_rate": 3.108193845390942e-06, - "loss": 1.0371, - "step": 3700 - }, - { - "epoch": 0.3337692203634396, - "grad_norm": 1.4512349624577563, - "learning_rate": 3.1077074707192933e-06, - "loss": 0.9704, - "step": 3701 - }, - { - "epoch": 0.3338594038869099, - "grad_norm": 1.3218944034099243, - "learning_rate": 3.1072210015319353e-06, - "loss": 1.018, - "step": 3702 - }, - { - "epoch": 0.3339495874103801, - "grad_norm": 1.4618306413291633, - "learning_rate": 3.106734437870376e-06, - "loss": 0.951, - "step": 3703 - }, - { - "epoch": 0.3340397709338504, - "grad_norm": 2.744838031374792, - "learning_rate": 3.1062477797761327e-06, - "loss": 0.9333, - "step": 3704 - }, - { - "epoch": 0.33412995445732063, - "grad_norm": 1.5419325814011935, - "learning_rate": 3.105761027290729e-06, - "loss": 1.0375, - "step": 3705 - }, - { - "epoch": 0.3342201379807909, - "grad_norm": 5.947972428238053, - "learning_rate": 3.105274180455697e-06, - "loss": 0.958, - "step": 3706 - }, - { - "epoch": 0.33431032150426115, - "grad_norm": 1.4396710793693333, - "learning_rate": 3.1047872393125775e-06, - "loss": 1.0169, - "step": 3707 - }, - { - "epoch": 0.33440050502773144, - "grad_norm": 0.6740407414786556, - "learning_rate": 3.1043002039029186e-06, - "loss": 0.8532, - "step": 3708 - }, - { - "epoch": 0.3344906885512017, - "grad_norm": 1.3279525196049957, - "learning_rate": 3.1038130742682782e-06, - "loss": 0.9838, - "step": 3709 - }, - { - "epoch": 0.33458087207467196, - "grad_norm": 1.6714488404364307, - "learning_rate": 3.103325850450219e-06, - "loss": 0.9722, - "step": 3710 - }, - { - "epoch": 0.3346710555981422, - "grad_norm": 6.121911652140312, - "learning_rate": 3.1028385324903154e-06, - "loss": 1.0371, - "step": 3711 - }, - { - "epoch": 0.3347612391216125, - "grad_norm": 1.4028459675060458, - "learning_rate": 3.1023511204301465e-06, - "loss": 0.998, - "step": 3712 - }, - { - "epoch": 0.3348514226450827, - "grad_norm": 1.3349424526941245, - "learning_rate": 3.1018636143113022e-06, - "loss": 0.9613, - "step": 3713 - }, - { - "epoch": 0.334941606168553, - "grad_norm": 1.4791078779689464, - "learning_rate": 3.1013760141753787e-06, - "loss": 0.9003, - "step": 3714 - }, - { - "epoch": 0.33503178969202324, - "grad_norm": 1.5687609546780426, - "learning_rate": 3.100888320063981e-06, - "loss": 0.9649, - "step": 3715 - }, - { - "epoch": 0.33512197321549353, - "grad_norm": 1.3679957589228737, - "learning_rate": 3.100400532018721e-06, - "loss": 0.997, - "step": 3716 - }, - { - "epoch": 0.3352121567389638, - "grad_norm": 1.7676633698367175, - "learning_rate": 3.0999126500812204e-06, - "loss": 1.0636, - "step": 3717 - }, - { - "epoch": 0.33530234026243405, - "grad_norm": 0.7813866305086014, - "learning_rate": 3.0994246742931076e-06, - "loss": 0.9028, - "step": 3718 - }, - { - "epoch": 0.33539252378590434, - "grad_norm": 4.251605011300925, - "learning_rate": 3.098936604696019e-06, - "loss": 0.9373, - "step": 3719 - }, - { - "epoch": 0.3354827073093746, - "grad_norm": 1.69437590010509, - "learning_rate": 3.0984484413316e-06, - "loss": 1.0287, - "step": 3720 - }, - { - "epoch": 0.33557289083284486, - "grad_norm": 5.225512480357601, - "learning_rate": 3.0979601842415033e-06, - "loss": 0.9753, - "step": 3721 - }, - { - "epoch": 0.3356630743563151, - "grad_norm": 1.3158647006264035, - "learning_rate": 3.0974718334673896e-06, - "loss": 0.9138, - "step": 3722 - }, - { - "epoch": 0.3357532578797854, - "grad_norm": 1.446104173092037, - "learning_rate": 3.0969833890509282e-06, - "loss": 0.9926, - "step": 3723 - }, - { - "epoch": 0.3358434414032556, - "grad_norm": 1.625947969202418, - "learning_rate": 3.096494851033795e-06, - "loss": 0.9895, - "step": 3724 - }, - { - "epoch": 0.3359336249267259, - "grad_norm": 1.66991366557284, - "learning_rate": 3.0960062194576747e-06, - "loss": 0.9172, - "step": 3725 - }, - { - "epoch": 0.33602380845019614, - "grad_norm": 1.5485838881505665, - "learning_rate": 3.0955174943642606e-06, - "loss": 0.9445, - "step": 3726 - }, - { - "epoch": 0.3361139919736664, - "grad_norm": 1.4166080892831427, - "learning_rate": 3.0950286757952534e-06, - "loss": 0.9158, - "step": 3727 - }, - { - "epoch": 0.33620417549713666, - "grad_norm": 1.5125545586448104, - "learning_rate": 3.0945397637923617e-06, - "loss": 1.0621, - "step": 3728 - }, - { - "epoch": 0.33629435902060695, - "grad_norm": 1.7852169502339108, - "learning_rate": 3.0940507583973025e-06, - "loss": 0.996, - "step": 3729 - }, - { - "epoch": 0.3363845425440772, - "grad_norm": 1.4034383428176527, - "learning_rate": 3.093561659651799e-06, - "loss": 0.965, - "step": 3730 - }, - { - "epoch": 0.33647472606754747, - "grad_norm": 1.3799920062510849, - "learning_rate": 3.093072467597586e-06, - "loss": 0.9249, - "step": 3731 - }, - { - "epoch": 0.3365649095910177, - "grad_norm": 0.6534997211899342, - "learning_rate": 3.092583182276402e-06, - "loss": 0.8647, - "step": 3732 - }, - { - "epoch": 0.336655093114488, - "grad_norm": 0.6715887258245085, - "learning_rate": 3.092093803729997e-06, - "loss": 0.8626, - "step": 3733 - }, - { - "epoch": 0.3367452766379582, - "grad_norm": 2.891409112928365, - "learning_rate": 3.0916043320001264e-06, - "loss": 1.0114, - "step": 3734 - }, - { - "epoch": 0.3368354601614285, - "grad_norm": 1.625438190947768, - "learning_rate": 3.0911147671285557e-06, - "loss": 1.0049, - "step": 3735 - }, - { - "epoch": 0.33692564368489875, - "grad_norm": 1.466386232153093, - "learning_rate": 3.0906251091570565e-06, - "loss": 1.0221, - "step": 3736 - }, - { - "epoch": 0.33701582720836903, - "grad_norm": 2.8219788085498694, - "learning_rate": 3.0901353581274094e-06, - "loss": 1.0143, - "step": 3737 - }, - { - "epoch": 0.33710601073183927, - "grad_norm": 1.4120547968657242, - "learning_rate": 3.089645514081402e-06, - "loss": 0.8756, - "step": 3738 - }, - { - "epoch": 0.33719619425530956, - "grad_norm": 5.201439386837675, - "learning_rate": 3.0891555770608323e-06, - "loss": 0.9216, - "step": 3739 - }, - { - "epoch": 0.33728637777877984, - "grad_norm": 2.043437254888622, - "learning_rate": 3.088665547107503e-06, - "loss": 1.0224, - "step": 3740 - }, - { - "epoch": 0.3373765613022501, - "grad_norm": 1.4316177262248249, - "learning_rate": 3.0881754242632254e-06, - "loss": 1.0735, - "step": 3741 - }, - { - "epoch": 0.33746674482572037, - "grad_norm": 1.3501664376975393, - "learning_rate": 3.0876852085698213e-06, - "loss": 1.0427, - "step": 3742 - }, - { - "epoch": 0.3375569283491906, - "grad_norm": 1.5479242638683468, - "learning_rate": 3.087194900069117e-06, - "loss": 1.0215, - "step": 3743 - }, - { - "epoch": 0.3376471118726609, - "grad_norm": 1.586745483834364, - "learning_rate": 3.08670449880295e-06, - "loss": 1.0489, - "step": 3744 - }, - { - "epoch": 0.3377372953961311, - "grad_norm": 4.029627747109333, - "learning_rate": 3.086214004813163e-06, - "loss": 1.0182, - "step": 3745 - }, - { - "epoch": 0.3378274789196014, - "grad_norm": 1.8914155804649364, - "learning_rate": 3.0857234181416074e-06, - "loss": 1.0619, - "step": 3746 - }, - { - "epoch": 0.33791766244307164, - "grad_norm": 2.7771544573052154, - "learning_rate": 3.085232738830143e-06, - "loss": 1.0436, - "step": 3747 - }, - { - "epoch": 0.33800784596654193, - "grad_norm": 1.7358164939973149, - "learning_rate": 3.084741966920638e-06, - "loss": 1.0579, - "step": 3748 - }, - { - "epoch": 0.33809802949001216, - "grad_norm": 1.7184169099735822, - "learning_rate": 3.084251102454966e-06, - "loss": 0.9541, - "step": 3749 - }, - { - "epoch": 0.33818821301348245, - "grad_norm": 0.7546402120682479, - "learning_rate": 3.083760145475013e-06, - "loss": 0.8714, - "step": 3750 - }, - { - "epoch": 0.3382783965369527, - "grad_norm": 1.468589043421488, - "learning_rate": 3.0832690960226678e-06, - "loss": 0.9412, - "step": 3751 - }, - { - "epoch": 0.338368580060423, - "grad_norm": 2.05355618157265, - "learning_rate": 3.08277795413983e-06, - "loss": 0.9651, - "step": 3752 - }, - { - "epoch": 0.3384587635838932, - "grad_norm": 1.5764471520380712, - "learning_rate": 3.0822867198684073e-06, - "loss": 1.0401, - "step": 3753 - }, - { - "epoch": 0.3385489471073635, - "grad_norm": 1.4723126645568039, - "learning_rate": 3.081795393250314e-06, - "loss": 1.0007, - "step": 3754 - }, - { - "epoch": 0.33863913063083373, - "grad_norm": 1.96042611190461, - "learning_rate": 3.081303974327473e-06, - "loss": 0.9657, - "step": 3755 - }, - { - "epoch": 0.338729314154304, - "grad_norm": 1.3280601485631587, - "learning_rate": 3.080812463141814e-06, - "loss": 0.9897, - "step": 3756 - }, - { - "epoch": 0.33881949767777425, - "grad_norm": 1.9554070530423502, - "learning_rate": 3.080320859735276e-06, - "loss": 0.8643, - "step": 3757 - }, - { - "epoch": 0.33890968120124454, - "grad_norm": 1.6792132750574813, - "learning_rate": 3.079829164149806e-06, - "loss": 1.046, - "step": 3758 - }, - { - "epoch": 0.3389998647247148, - "grad_norm": 1.5910424814409054, - "learning_rate": 3.0793373764273573e-06, - "loss": 0.9829, - "step": 3759 - }, - { - "epoch": 0.33909004824818506, - "grad_norm": 1.529185090053632, - "learning_rate": 3.078845496609892e-06, - "loss": 1.023, - "step": 3760 - }, - { - "epoch": 0.3391802317716553, - "grad_norm": 1.3976952911434095, - "learning_rate": 3.078353524739381e-06, - "loss": 1.0786, - "step": 3761 - }, - { - "epoch": 0.3392704152951256, - "grad_norm": 1.6032881952991698, - "learning_rate": 3.077861460857801e-06, - "loss": 1.0821, - "step": 3762 - }, - { - "epoch": 0.3393605988185958, - "grad_norm": 1.4288693781496764, - "learning_rate": 3.077369305007138e-06, - "loss": 1.019, - "step": 3763 - }, - { - "epoch": 0.3394507823420661, - "grad_norm": 1.7774536769534135, - "learning_rate": 3.0768770572293852e-06, - "loss": 1.0317, - "step": 3764 - }, - { - "epoch": 0.3395409658655364, - "grad_norm": 1.919054955220886, - "learning_rate": 3.0763847175665437e-06, - "loss": 1.0724, - "step": 3765 - }, - { - "epoch": 0.3396311493890066, - "grad_norm": 1.6184355853876522, - "learning_rate": 3.0758922860606237e-06, - "loss": 0.9388, - "step": 3766 - }, - { - "epoch": 0.3397213329124769, - "grad_norm": 1.4780402483847783, - "learning_rate": 3.0753997627536404e-06, - "loss": 0.9321, - "step": 3767 - }, - { - "epoch": 0.33981151643594715, - "grad_norm": 2.6183974472571916, - "learning_rate": 3.0749071476876203e-06, - "loss": 0.9517, - "step": 3768 - }, - { - "epoch": 0.33990169995941744, - "grad_norm": 3.2321309583967297, - "learning_rate": 3.0744144409045952e-06, - "loss": 0.9633, - "step": 3769 - }, - { - "epoch": 0.33999188348288767, - "grad_norm": 1.7618813450250115, - "learning_rate": 3.0739216424466056e-06, - "loss": 1.0142, - "step": 3770 - }, - { - "epoch": 0.34008206700635796, - "grad_norm": 1.878505798936046, - "learning_rate": 3.0734287523557002e-06, - "loss": 0.971, - "step": 3771 - }, - { - "epoch": 0.3401722505298282, - "grad_norm": 2.3253660303574533, - "learning_rate": 3.0729357706739348e-06, - "loss": 0.9521, - "step": 3772 - }, - { - "epoch": 0.3402624340532985, - "grad_norm": 1.4692300864105543, - "learning_rate": 3.0724426974433737e-06, - "loss": 0.9866, - "step": 3773 - }, - { - "epoch": 0.3403526175767687, - "grad_norm": 1.4673279716350989, - "learning_rate": 3.0719495327060874e-06, - "loss": 0.9667, - "step": 3774 - }, - { - "epoch": 0.340442801100239, - "grad_norm": 1.4225633233696777, - "learning_rate": 3.071456276504157e-06, - "loss": 1.002, - "step": 3775 - }, - { - "epoch": 0.34053298462370923, - "grad_norm": 1.352793094727586, - "learning_rate": 3.070962928879669e-06, - "loss": 0.9965, - "step": 3776 - }, - { - "epoch": 0.3406231681471795, - "grad_norm": 1.6627801960182187, - "learning_rate": 3.0704694898747185e-06, - "loss": 1.0158, - "step": 3777 - }, - { - "epoch": 0.34071335167064976, - "grad_norm": 1.7478145167889716, - "learning_rate": 3.069975959531408e-06, - "loss": 1.1225, - "step": 3778 - }, - { - "epoch": 0.34080353519412004, - "grad_norm": 0.7724508677910152, - "learning_rate": 3.06948233789185e-06, - "loss": 0.9042, - "step": 3779 - }, - { - "epoch": 0.3408937187175903, - "grad_norm": 1.6920533385064882, - "learning_rate": 3.0689886249981614e-06, - "loss": 1.0152, - "step": 3780 - }, - { - "epoch": 0.34098390224106057, - "grad_norm": 1.3445934044116294, - "learning_rate": 3.0684948208924693e-06, - "loss": 1.0223, - "step": 3781 - }, - { - "epoch": 0.3410740857645308, - "grad_norm": 1.5676936236564842, - "learning_rate": 3.068000925616907e-06, - "loss": 1.0177, - "step": 3782 - }, - { - "epoch": 0.3411642692880011, - "grad_norm": 1.7042339809090712, - "learning_rate": 3.067506939213617e-06, - "loss": 0.9949, - "step": 3783 - }, - { - "epoch": 0.3412544528114713, - "grad_norm": 1.2950652950719341, - "learning_rate": 3.0670128617247493e-06, - "loss": 1.0015, - "step": 3784 - }, - { - "epoch": 0.3413446363349416, - "grad_norm": 2.1090104529825817, - "learning_rate": 3.06651869319246e-06, - "loss": 0.9093, - "step": 3785 - }, - { - "epoch": 0.34143481985841184, - "grad_norm": 1.60015464869798, - "learning_rate": 3.0660244336589154e-06, - "loss": 0.997, - "step": 3786 - }, - { - "epoch": 0.34152500338188213, - "grad_norm": 1.3821512570201604, - "learning_rate": 3.065530083166288e-06, - "loss": 1.009, - "step": 3787 - }, - { - "epoch": 0.3416151869053524, - "grad_norm": 1.4275439092293134, - "learning_rate": 3.0650356417567586e-06, - "loss": 0.9576, - "step": 3788 - }, - { - "epoch": 0.34170537042882265, - "grad_norm": 1.4549472007478867, - "learning_rate": 3.0645411094725156e-06, - "loss": 1.0429, - "step": 3789 - }, - { - "epoch": 0.34179555395229294, - "grad_norm": 1.6093437414003944, - "learning_rate": 3.0640464863557556e-06, - "loss": 0.9937, - "step": 3790 - }, - { - "epoch": 0.3418857374757632, - "grad_norm": 1.2329193417046445, - "learning_rate": 3.063551772448682e-06, - "loss": 1.0301, - "step": 3791 - }, - { - "epoch": 0.34197592099923346, - "grad_norm": 1.9402711186271588, - "learning_rate": 3.0630569677935075e-06, - "loss": 0.9299, - "step": 3792 - }, - { - "epoch": 0.3420661045227037, - "grad_norm": 2.1075577431367134, - "learning_rate": 3.06256207243245e-06, - "loss": 1.0766, - "step": 3793 - }, - { - "epoch": 0.342156288046174, - "grad_norm": 0.6566464271266377, - "learning_rate": 3.0620670864077385e-06, - "loss": 0.8454, - "step": 3794 - }, - { - "epoch": 0.3422464715696442, - "grad_norm": 0.6302132619449634, - "learning_rate": 3.0615720097616063e-06, - "loss": 0.8387, - "step": 3795 - }, - { - "epoch": 0.3423366550931145, - "grad_norm": 1.4906789933828364, - "learning_rate": 3.0610768425362967e-06, - "loss": 0.9535, - "step": 3796 - }, - { - "epoch": 0.34242683861658474, - "grad_norm": 1.5762419844566515, - "learning_rate": 3.0605815847740603e-06, - "loss": 0.981, - "step": 3797 - }, - { - "epoch": 0.342517022140055, - "grad_norm": 1.576574942896032, - "learning_rate": 3.0600862365171553e-06, - "loss": 0.9626, - "step": 3798 - }, - { - "epoch": 0.34260720566352526, - "grad_norm": 1.48422384998326, - "learning_rate": 3.0595907978078474e-06, - "loss": 0.9729, - "step": 3799 - }, - { - "epoch": 0.34269738918699555, - "grad_norm": 1.4241832400198946, - "learning_rate": 3.05909526868841e-06, - "loss": 0.9712, - "step": 3800 - }, - { - "epoch": 0.3427875727104658, - "grad_norm": 1.4234267769335316, - "learning_rate": 3.0585996492011243e-06, - "loss": 0.9992, - "step": 3801 - }, - { - "epoch": 0.34287775623393607, - "grad_norm": 1.6946973959274936, - "learning_rate": 3.05810393938828e-06, - "loss": 1.0388, - "step": 3802 - }, - { - "epoch": 0.3429679397574063, - "grad_norm": 1.8926415526289364, - "learning_rate": 3.0576081392921723e-06, - "loss": 0.9102, - "step": 3803 - }, - { - "epoch": 0.3430581232808766, - "grad_norm": 1.5089453360109146, - "learning_rate": 3.057112248955107e-06, - "loss": 1.0458, - "step": 3804 - }, - { - "epoch": 0.3431483068043468, - "grad_norm": 2.4225077141008096, - "learning_rate": 3.0566162684193963e-06, - "loss": 1.019, - "step": 3805 - }, - { - "epoch": 0.3432384903278171, - "grad_norm": 1.3615328006862948, - "learning_rate": 3.056120197727359e-06, - "loss": 0.9464, - "step": 3806 - }, - { - "epoch": 0.34332867385128735, - "grad_norm": 1.3981051343070101, - "learning_rate": 3.0556240369213236e-06, - "loss": 0.989, - "step": 3807 - }, - { - "epoch": 0.34341885737475764, - "grad_norm": 1.8653965105619827, - "learning_rate": 3.055127786043624e-06, - "loss": 0.9059, - "step": 3808 - }, - { - "epoch": 0.34350904089822787, - "grad_norm": 1.2646145967057687, - "learning_rate": 3.054631445136604e-06, - "loss": 1.0841, - "step": 3809 - }, - { - "epoch": 0.34359922442169816, - "grad_norm": 2.0481725176553995, - "learning_rate": 3.0541350142426147e-06, - "loss": 1.0125, - "step": 3810 - }, - { - "epoch": 0.3436894079451684, - "grad_norm": 1.4014803212717135, - "learning_rate": 3.053638493404012e-06, - "loss": 0.9527, - "step": 3811 - }, - { - "epoch": 0.3437795914686387, - "grad_norm": 1.4009759549562804, - "learning_rate": 3.0531418826631643e-06, - "loss": 0.9353, - "step": 3812 - }, - { - "epoch": 0.34386977499210897, - "grad_norm": 1.33424663060238, - "learning_rate": 3.052645182062444e-06, - "loss": 1.0808, - "step": 3813 - }, - { - "epoch": 0.3439599585155792, - "grad_norm": 2.418796970591502, - "learning_rate": 3.0521483916442324e-06, - "loss": 1.0311, - "step": 3814 - }, - { - "epoch": 0.3440501420390495, - "grad_norm": 1.4343082819695494, - "learning_rate": 3.0516515114509183e-06, - "loss": 0.9889, - "step": 3815 - }, - { - "epoch": 0.3441403255625197, - "grad_norm": 1.348747143605401, - "learning_rate": 3.0511545415249e-06, - "loss": 0.9482, - "step": 3816 - }, - { - "epoch": 0.34423050908599, - "grad_norm": 1.7509210070313765, - "learning_rate": 3.050657481908579e-06, - "loss": 1.0215, - "step": 3817 - }, - { - "epoch": 0.34432069260946024, - "grad_norm": 1.5710823866757135, - "learning_rate": 3.0501603326443677e-06, - "loss": 1.1038, - "step": 3818 - }, - { - "epoch": 0.34441087613293053, - "grad_norm": 1.372313258698722, - "learning_rate": 3.049663093774687e-06, - "loss": 0.9857, - "step": 3819 - }, - { - "epoch": 0.34450105965640077, - "grad_norm": 1.8863252659892717, - "learning_rate": 3.0491657653419643e-06, - "loss": 0.8986, - "step": 3820 - }, - { - "epoch": 0.34459124317987105, - "grad_norm": 1.671610017972212, - "learning_rate": 3.0486683473886325e-06, - "loss": 1.0457, - "step": 3821 - }, - { - "epoch": 0.3446814267033413, - "grad_norm": 2.2873866485694347, - "learning_rate": 3.0481708399571355e-06, - "loss": 1.0454, - "step": 3822 - }, - { - "epoch": 0.3447716102268116, - "grad_norm": 1.5274677665293808, - "learning_rate": 3.047673243089922e-06, - "loss": 1.0168, - "step": 3823 - }, - { - "epoch": 0.3448617937502818, - "grad_norm": 1.5230818113168554, - "learning_rate": 3.047175556829451e-06, - "loss": 1.0257, - "step": 3824 - }, - { - "epoch": 0.3449519772737521, - "grad_norm": 14.928536634737092, - "learning_rate": 3.046677781218188e-06, - "loss": 1.0421, - "step": 3825 - }, - { - "epoch": 0.34504216079722233, - "grad_norm": 1.7198832157320554, - "learning_rate": 3.0461799162986043e-06, - "loss": 0.9718, - "step": 3826 - }, - { - "epoch": 0.3451323443206926, - "grad_norm": 1.586148478187899, - "learning_rate": 3.045681962113183e-06, - "loss": 0.9222, - "step": 3827 - }, - { - "epoch": 0.34522252784416285, - "grad_norm": 1.5454855950189283, - "learning_rate": 3.0451839187044095e-06, - "loss": 0.9466, - "step": 3828 - }, - { - "epoch": 0.34531271136763314, - "grad_norm": 1.5935849122955208, - "learning_rate": 3.0446857861147816e-06, - "loss": 1.0133, - "step": 3829 - }, - { - "epoch": 0.3454028948911034, - "grad_norm": 1.6477747303457297, - "learning_rate": 3.044187564386802e-06, - "loss": 0.9686, - "step": 3830 - }, - { - "epoch": 0.34549307841457366, - "grad_norm": 2.3200416278392995, - "learning_rate": 3.0436892535629818e-06, - "loss": 1.0621, - "step": 3831 - }, - { - "epoch": 0.3455832619380439, - "grad_norm": 1.57913794914981, - "learning_rate": 3.0431908536858393e-06, - "loss": 1.0745, - "step": 3832 - }, - { - "epoch": 0.3456734454615142, - "grad_norm": 1.6330847992239408, - "learning_rate": 3.0426923647979016e-06, - "loss": 0.9859, - "step": 3833 - }, - { - "epoch": 0.3457636289849844, - "grad_norm": 1.5647453197159977, - "learning_rate": 3.0421937869417016e-06, - "loss": 0.9379, - "step": 3834 - }, - { - "epoch": 0.3458538125084547, - "grad_norm": 1.6462261680783914, - "learning_rate": 3.041695120159782e-06, - "loss": 1.063, - "step": 3835 - }, - { - "epoch": 0.345943996031925, - "grad_norm": 1.6959715937583764, - "learning_rate": 3.04119636449469e-06, - "loss": 0.9408, - "step": 3836 - }, - { - "epoch": 0.3460341795553952, - "grad_norm": 1.6915834963919172, - "learning_rate": 3.040697519988983e-06, - "loss": 1.0258, - "step": 3837 - }, - { - "epoch": 0.3461243630788655, - "grad_norm": 0.6390407092196335, - "learning_rate": 3.040198586685226e-06, - "loss": 0.8439, - "step": 3838 - }, - { - "epoch": 0.34621454660233575, - "grad_norm": 1.5947834478955951, - "learning_rate": 3.039699564625989e-06, - "loss": 0.9549, - "step": 3839 - }, - { - "epoch": 0.34630473012580604, - "grad_norm": 0.7220741608248605, - "learning_rate": 3.039200453853853e-06, - "loss": 0.8567, - "step": 3840 - }, - { - "epoch": 0.34639491364927627, - "grad_norm": 1.7399328036870951, - "learning_rate": 3.038701254411404e-06, - "loss": 1.0451, - "step": 3841 - }, - { - "epoch": 0.34648509717274656, - "grad_norm": 1.6571464271929766, - "learning_rate": 3.0382019663412367e-06, - "loss": 1.0134, - "step": 3842 - }, - { - "epoch": 0.3465752806962168, - "grad_norm": 2.5603308451966194, - "learning_rate": 3.0377025896859532e-06, - "loss": 1.0371, - "step": 3843 - }, - { - "epoch": 0.3466654642196871, - "grad_norm": 1.3846969695080606, - "learning_rate": 3.0372031244881627e-06, - "loss": 1.1026, - "step": 3844 - }, - { - "epoch": 0.3467556477431573, - "grad_norm": 1.460364489455416, - "learning_rate": 3.0367035707904826e-06, - "loss": 0.9637, - "step": 3845 - }, - { - "epoch": 0.3468458312666276, - "grad_norm": 1.5020628096632924, - "learning_rate": 3.036203928635537e-06, - "loss": 0.9166, - "step": 3846 - }, - { - "epoch": 0.34693601479009784, - "grad_norm": 1.5862531676685399, - "learning_rate": 3.035704198065959e-06, - "loss": 1.0672, - "step": 3847 - }, - { - "epoch": 0.3470261983135681, - "grad_norm": 1.618174449861991, - "learning_rate": 3.0352043791243886e-06, - "loss": 0.9784, - "step": 3848 - }, - { - "epoch": 0.34711638183703836, - "grad_norm": 0.6667465494894047, - "learning_rate": 3.034704471853472e-06, - "loss": 0.8475, - "step": 3849 - }, - { - "epoch": 0.34720656536050865, - "grad_norm": 4.615998658879183, - "learning_rate": 3.0342044762958646e-06, - "loss": 1.0454, - "step": 3850 - }, - { - "epoch": 0.3472967488839789, - "grad_norm": 1.577071792110138, - "learning_rate": 3.0337043924942286e-06, - "loss": 1.0403, - "step": 3851 - }, - { - "epoch": 0.34738693240744917, - "grad_norm": 1.3621696159097183, - "learning_rate": 3.0332042204912343e-06, - "loss": 0.9843, - "step": 3852 - }, - { - "epoch": 0.3474771159309194, - "grad_norm": 0.7133130637821519, - "learning_rate": 3.0327039603295587e-06, - "loss": 0.8879, - "step": 3853 - }, - { - "epoch": 0.3475672994543897, - "grad_norm": 1.7253451651624914, - "learning_rate": 3.032203612051887e-06, - "loss": 0.9634, - "step": 3854 - }, - { - "epoch": 0.3476574829778599, - "grad_norm": 1.7640095975662338, - "learning_rate": 3.0317031757009116e-06, - "loss": 0.9881, - "step": 3855 - }, - { - "epoch": 0.3477476665013302, - "grad_norm": 1.4321577899672122, - "learning_rate": 3.0312026513193326e-06, - "loss": 0.9626, - "step": 3856 - }, - { - "epoch": 0.34783785002480044, - "grad_norm": 1.3883704007128286, - "learning_rate": 3.0307020389498573e-06, - "loss": 0.9717, - "step": 3857 - }, - { - "epoch": 0.34792803354827073, - "grad_norm": 1.4467461975638316, - "learning_rate": 3.0302013386352004e-06, - "loss": 0.9324, - "step": 3858 - }, - { - "epoch": 0.348018217071741, - "grad_norm": 1.4006242756669343, - "learning_rate": 3.0297005504180854e-06, - "loss": 0.9898, - "step": 3859 - }, - { - "epoch": 0.34810840059521125, - "grad_norm": 0.6502779155592365, - "learning_rate": 3.0291996743412417e-06, - "loss": 0.7817, - "step": 3860 - }, - { - "epoch": 0.34819858411868154, - "grad_norm": 1.5848757943666552, - "learning_rate": 3.0286987104474063e-06, - "loss": 1.0858, - "step": 3861 - }, - { - "epoch": 0.3482887676421518, - "grad_norm": 1.094285615836, - "learning_rate": 3.028197658779325e-06, - "loss": 0.9717, - "step": 3862 - }, - { - "epoch": 0.34837895116562206, - "grad_norm": 1.5149677857351243, - "learning_rate": 3.0276965193797503e-06, - "loss": 1.0222, - "step": 3863 - }, - { - "epoch": 0.3484691346890923, - "grad_norm": 1.319343798407142, - "learning_rate": 3.0271952922914423e-06, - "loss": 0.9653, - "step": 3864 - }, - { - "epoch": 0.3485593182125626, - "grad_norm": 1.4457688719408086, - "learning_rate": 3.0266939775571675e-06, - "loss": 0.98, - "step": 3865 - }, - { - "epoch": 0.3486495017360328, - "grad_norm": 2.007782457178871, - "learning_rate": 3.026192575219701e-06, - "loss": 0.9834, - "step": 3866 - }, - { - "epoch": 0.3487396852595031, - "grad_norm": 1.315395703120998, - "learning_rate": 3.025691085321826e-06, - "loss": 0.8928, - "step": 3867 - }, - { - "epoch": 0.34882986878297334, - "grad_norm": 1.4240673895009632, - "learning_rate": 3.025189507906332e-06, - "loss": 0.9779, - "step": 3868 - }, - { - "epoch": 0.34892005230644363, - "grad_norm": 1.3704067584930577, - "learning_rate": 3.0246878430160166e-06, - "loss": 1.0185, - "step": 3869 - }, - { - "epoch": 0.34901023582991386, - "grad_norm": 1.4185099764215257, - "learning_rate": 3.024186090693684e-06, - "loss": 0.9361, - "step": 3870 - }, - { - "epoch": 0.34910041935338415, - "grad_norm": 1.428713941277601, - "learning_rate": 3.023684250982147e-06, - "loss": 1.0221, - "step": 3871 - }, - { - "epoch": 0.3491906028768544, - "grad_norm": 2.1933286126307245, - "learning_rate": 3.0231823239242252e-06, - "loss": 0.9159, - "step": 3872 - }, - { - "epoch": 0.34928078640032467, - "grad_norm": 1.4318752538913084, - "learning_rate": 3.0226803095627457e-06, - "loss": 0.9556, - "step": 3873 - }, - { - "epoch": 0.3493709699237949, - "grad_norm": 1.7015677515934944, - "learning_rate": 3.022178207940543e-06, - "loss": 0.9686, - "step": 3874 - }, - { - "epoch": 0.3494611534472652, - "grad_norm": 5.884242153845952, - "learning_rate": 3.02167601910046e-06, - "loss": 1.0076, - "step": 3875 - }, - { - "epoch": 0.3495513369707354, - "grad_norm": 1.5362315985847825, - "learning_rate": 3.021173743085345e-06, - "loss": 1.016, - "step": 3876 - }, - { - "epoch": 0.3496415204942057, - "grad_norm": 0.6389983856198689, - "learning_rate": 3.0206713799380557e-06, - "loss": 0.8327, - "step": 3877 - }, - { - "epoch": 0.34973170401767595, - "grad_norm": 1.5527654680701708, - "learning_rate": 3.0201689297014565e-06, - "loss": 0.9936, - "step": 3878 - }, - { - "epoch": 0.34982188754114624, - "grad_norm": 1.2535620956865297, - "learning_rate": 3.0196663924184187e-06, - "loss": 1.0407, - "step": 3879 - }, - { - "epoch": 0.34991207106461647, - "grad_norm": 2.257598866846191, - "learning_rate": 3.019163768131822e-06, - "loss": 1.022, - "step": 3880 - }, - { - "epoch": 0.35000225458808676, - "grad_norm": 1.6716948795441677, - "learning_rate": 3.0186610568845533e-06, - "loss": 0.9139, - "step": 3881 - }, - { - "epoch": 0.350092438111557, - "grad_norm": 1.5226111102471622, - "learning_rate": 3.018158258719507e-06, - "loss": 0.9265, - "step": 3882 - }, - { - "epoch": 0.3501826216350273, - "grad_norm": 5.828124672733737, - "learning_rate": 3.0176553736795827e-06, - "loss": 1.0206, - "step": 3883 - }, - { - "epoch": 0.35027280515849757, - "grad_norm": 0.5276076327703059, - "learning_rate": 3.017152401807691e-06, - "loss": 0.7636, - "step": 3884 - }, - { - "epoch": 0.3503629886819678, - "grad_norm": 1.558389750732659, - "learning_rate": 3.0166493431467476e-06, - "loss": 0.9672, - "step": 3885 - }, - { - "epoch": 0.3504531722054381, - "grad_norm": 1.2801133207733217, - "learning_rate": 3.016146197739677e-06, - "loss": 0.9663, - "step": 3886 - }, - { - "epoch": 0.3505433557289083, - "grad_norm": 1.6154366566061815, - "learning_rate": 3.0156429656294097e-06, - "loss": 0.925, - "step": 3887 - }, - { - "epoch": 0.3506335392523786, - "grad_norm": 1.366309532550737, - "learning_rate": 3.0151396468588844e-06, - "loss": 0.9305, - "step": 3888 - }, - { - "epoch": 0.35072372277584885, - "grad_norm": 1.7323093657316768, - "learning_rate": 3.014636241471047e-06, - "loss": 0.9514, - "step": 3889 - }, - { - "epoch": 0.35081390629931913, - "grad_norm": 3.817393055275335, - "learning_rate": 3.0141327495088514e-06, - "loss": 0.9935, - "step": 3890 - }, - { - "epoch": 0.35090408982278937, - "grad_norm": 2.5961592182419015, - "learning_rate": 3.0136291710152566e-06, - "loss": 0.9598, - "step": 3891 - }, - { - "epoch": 0.35099427334625966, - "grad_norm": 1.3972567469757657, - "learning_rate": 3.0131255060332325e-06, - "loss": 1.0268, - "step": 3892 - }, - { - "epoch": 0.3510844568697299, - "grad_norm": 1.6263160511533838, - "learning_rate": 3.012621754605754e-06, - "loss": 1.011, - "step": 3893 - }, - { - "epoch": 0.3511746403932002, - "grad_norm": 2.2936646739254574, - "learning_rate": 3.0121179167758035e-06, - "loss": 1.0268, - "step": 3894 - }, - { - "epoch": 0.3512648239166704, - "grad_norm": 1.755936634710492, - "learning_rate": 3.0116139925863717e-06, - "loss": 1.0705, - "step": 3895 - }, - { - "epoch": 0.3513550074401407, - "grad_norm": 1.6527175660834996, - "learning_rate": 3.011109982080456e-06, - "loss": 1.0284, - "step": 3896 - }, - { - "epoch": 0.35144519096361093, - "grad_norm": 1.6301284705476782, - "learning_rate": 3.0106058853010614e-06, - "loss": 0.9442, - "step": 3897 - }, - { - "epoch": 0.3515353744870812, - "grad_norm": 1.438572773561405, - "learning_rate": 3.010101702291201e-06, - "loss": 0.9805, - "step": 3898 - }, - { - "epoch": 0.35162555801055145, - "grad_norm": 1.4524001087500706, - "learning_rate": 3.009597433093893e-06, - "loss": 0.9837, - "step": 3899 - }, - { - "epoch": 0.35171574153402174, - "grad_norm": 1.9118721398852512, - "learning_rate": 3.009093077752165e-06, - "loss": 1.001, - "step": 3900 - }, - { - "epoch": 0.351805925057492, - "grad_norm": 1.5417644968008508, - "learning_rate": 3.008588636309052e-06, - "loss": 1.0241, - "step": 3901 - }, - { - "epoch": 0.35189610858096226, - "grad_norm": 1.4695503813630406, - "learning_rate": 3.0080841088075947e-06, - "loss": 0.962, - "step": 3902 - }, - { - "epoch": 0.3519862921044325, - "grad_norm": 3.110054900245966, - "learning_rate": 3.0075794952908436e-06, - "loss": 1.0322, - "step": 3903 - }, - { - "epoch": 0.3520764756279028, - "grad_norm": 1.3112537734538314, - "learning_rate": 3.0070747958018528e-06, - "loss": 1.0042, - "step": 3904 - }, - { - "epoch": 0.352166659151373, - "grad_norm": 2.711911952533389, - "learning_rate": 3.0065700103836894e-06, - "loss": 1.0109, - "step": 3905 - }, - { - "epoch": 0.3522568426748433, - "grad_norm": 1.7273466651094804, - "learning_rate": 3.0060651390794214e-06, - "loss": 1.0118, - "step": 3906 - }, - { - "epoch": 0.3523470261983136, - "grad_norm": 1.5448918586833984, - "learning_rate": 3.005560181932128e-06, - "loss": 0.9695, - "step": 3907 - }, - { - "epoch": 0.35243720972178383, - "grad_norm": 1.4267807892731939, - "learning_rate": 3.005055138984896e-06, - "loss": 0.9394, - "step": 3908 - }, - { - "epoch": 0.3525273932452541, - "grad_norm": 2.1947946175179895, - "learning_rate": 3.0045500102808174e-06, - "loss": 0.9669, - "step": 3909 - }, - { - "epoch": 0.35261757676872435, - "grad_norm": 1.8478560551362058, - "learning_rate": 3.0040447958629927e-06, - "loss": 1.0763, - "step": 3910 - }, - { - "epoch": 0.35270776029219464, - "grad_norm": 1.5126113030961228, - "learning_rate": 3.00353949577453e-06, - "loss": 1.0206, - "step": 3911 - }, - { - "epoch": 0.35279794381566487, - "grad_norm": 2.1108248778709857, - "learning_rate": 3.003034110058544e-06, - "loss": 0.8287, - "step": 3912 - }, - { - "epoch": 0.35288812733913516, - "grad_norm": 1.9352053313023783, - "learning_rate": 3.002528638758157e-06, - "loss": 0.9779, - "step": 3913 - }, - { - "epoch": 0.3529783108626054, - "grad_norm": 1.8136680391013509, - "learning_rate": 3.0020230819164985e-06, - "loss": 1.0021, - "step": 3914 - }, - { - "epoch": 0.3530684943860757, - "grad_norm": 4.647899538855269, - "learning_rate": 3.0015174395767064e-06, - "loss": 0.9972, - "step": 3915 - }, - { - "epoch": 0.3531586779095459, - "grad_norm": 1.983217818104517, - "learning_rate": 3.001011711781923e-06, - "loss": 1.0201, - "step": 3916 - }, - { - "epoch": 0.3532488614330162, - "grad_norm": 1.4224707225340967, - "learning_rate": 3.0005058985753017e-06, - "loss": 0.944, - "step": 3917 - }, - { - "epoch": 0.35333904495648644, - "grad_norm": 1.4249054191388322, - "learning_rate": 3e-06, - "loss": 0.9795, - "step": 3918 - }, - { - "epoch": 0.3534292284799567, - "grad_norm": 0.6507306055189744, - "learning_rate": 2.9994940160991843e-06, - "loss": 0.8572, - "step": 3919 - }, - { - "epoch": 0.35351941200342696, - "grad_norm": 0.6117905400464192, - "learning_rate": 2.9989879469160285e-06, - "loss": 0.814, - "step": 3920 - }, - { - "epoch": 0.35360959552689725, - "grad_norm": 1.4571870204099306, - "learning_rate": 2.9984817924937124e-06, - "loss": 1.0118, - "step": 3921 - }, - { - "epoch": 0.3536997790503675, - "grad_norm": 14.211738484161165, - "learning_rate": 2.997975552875424e-06, - "loss": 0.936, - "step": 3922 - }, - { - "epoch": 0.35378996257383777, - "grad_norm": 1.74987996915892, - "learning_rate": 2.997469228104358e-06, - "loss": 0.9749, - "step": 3923 - }, - { - "epoch": 0.353880146097308, - "grad_norm": 1.3525339505205742, - "learning_rate": 2.996962818223718e-06, - "loss": 0.8799, - "step": 3924 - }, - { - "epoch": 0.3539703296207783, - "grad_norm": 1.5629347387150558, - "learning_rate": 2.9964563232767135e-06, - "loss": 0.9343, - "step": 3925 - }, - { - "epoch": 0.3540605131442485, - "grad_norm": 1.723437696441335, - "learning_rate": 2.9959497433065617e-06, - "loss": 0.9914, - "step": 3926 - }, - { - "epoch": 0.3541506966677188, - "grad_norm": 1.4948443980557988, - "learning_rate": 2.9954430783564848e-06, - "loss": 0.9107, - "step": 3927 - }, - { - "epoch": 0.35424088019118904, - "grad_norm": 1.408204564108358, - "learning_rate": 2.994936328469716e-06, - "loss": 1.0164, - "step": 3928 - }, - { - "epoch": 0.35433106371465933, - "grad_norm": 1.355241915936435, - "learning_rate": 2.994429493689494e-06, - "loss": 0.9889, - "step": 3929 - }, - { - "epoch": 0.35442124723812957, - "grad_norm": 3.648303272465613, - "learning_rate": 2.9939225740590642e-06, - "loss": 1.0153, - "step": 3930 - }, - { - "epoch": 0.35451143076159986, - "grad_norm": 2.273439387684461, - "learning_rate": 2.99341556962168e-06, - "loss": 0.9577, - "step": 3931 - }, - { - "epoch": 0.35460161428507014, - "grad_norm": 1.3888092187707346, - "learning_rate": 2.992908480420602e-06, - "loss": 1.0355, - "step": 3932 - }, - { - "epoch": 0.3546917978085404, - "grad_norm": 1.6352952397778924, - "learning_rate": 2.9924013064990974e-06, - "loss": 0.9801, - "step": 3933 - }, - { - "epoch": 0.35478198133201067, - "grad_norm": 0.6548094151132848, - "learning_rate": 2.991894047900441e-06, - "loss": 0.8664, - "step": 3934 - }, - { - "epoch": 0.3548721648554809, - "grad_norm": 2.1831170996635763, - "learning_rate": 2.991386704667916e-06, - "loss": 0.9959, - "step": 3935 - }, - { - "epoch": 0.3549623483789512, - "grad_norm": 1.6400815517804224, - "learning_rate": 2.9908792768448097e-06, - "loss": 1.0507, - "step": 3936 - }, - { - "epoch": 0.3550525319024214, - "grad_norm": 1.5959061641690477, - "learning_rate": 2.990371764474421e-06, - "loss": 0.9891, - "step": 3937 - }, - { - "epoch": 0.3551427154258917, - "grad_norm": 2.6039116594708758, - "learning_rate": 2.9898641676000518e-06, - "loss": 0.9635, - "step": 3938 - }, - { - "epoch": 0.35523289894936194, - "grad_norm": 1.4722168770379174, - "learning_rate": 2.9893564862650138e-06, - "loss": 0.9881, - "step": 3939 - }, - { - "epoch": 0.35532308247283223, - "grad_norm": 1.735042340534275, - "learning_rate": 2.9888487205126254e-06, - "loss": 1.046, - "step": 3940 - }, - { - "epoch": 0.35541326599630246, - "grad_norm": 1.715481511353561, - "learning_rate": 2.9883408703862115e-06, - "loss": 0.979, - "step": 3941 - }, - { - "epoch": 0.35550344951977275, - "grad_norm": 1.7738760519367998, - "learning_rate": 2.987832935929105e-06, - "loss": 1.0722, - "step": 3942 - }, - { - "epoch": 0.355593633043243, - "grad_norm": 1.4440600768190521, - "learning_rate": 2.9873249171846454e-06, - "loss": 1.0493, - "step": 3943 - }, - { - "epoch": 0.3556838165667133, - "grad_norm": 4.111038161538397, - "learning_rate": 2.98681681419618e-06, - "loss": 0.9775, - "step": 3944 - }, - { - "epoch": 0.3557740000901835, - "grad_norm": 2.9108669565266347, - "learning_rate": 2.9863086270070627e-06, - "loss": 1.0373, - "step": 3945 - }, - { - "epoch": 0.3558641836136538, - "grad_norm": 1.5721220910560354, - "learning_rate": 2.985800355660655e-06, - "loss": 0.9969, - "step": 3946 - }, - { - "epoch": 0.35595436713712403, - "grad_norm": 0.6731654350015748, - "learning_rate": 2.9852920002003252e-06, - "loss": 0.8608, - "step": 3947 - }, - { - "epoch": 0.3560445506605943, - "grad_norm": 1.633191224488005, - "learning_rate": 2.9847835606694494e-06, - "loss": 1.0283, - "step": 3948 - }, - { - "epoch": 0.35613473418406455, - "grad_norm": 1.918705691841802, - "learning_rate": 2.98427503711141e-06, - "loss": 0.9167, - "step": 3949 - }, - { - "epoch": 0.35622491770753484, - "grad_norm": 1.8456772252440419, - "learning_rate": 2.9837664295695973e-06, - "loss": 1.0249, - "step": 3950 - }, - { - "epoch": 0.35631510123100507, - "grad_norm": 1.9011019623134526, - "learning_rate": 2.983257738087408e-06, - "loss": 0.9864, - "step": 3951 - }, - { - "epoch": 0.35640528475447536, - "grad_norm": 1.5579663689476262, - "learning_rate": 2.982748962708247e-06, - "loss": 1.0179, - "step": 3952 - }, - { - "epoch": 0.3564954682779456, - "grad_norm": 1.5833777454906839, - "learning_rate": 2.982240103475526e-06, - "loss": 1.0414, - "step": 3953 - }, - { - "epoch": 0.3565856518014159, - "grad_norm": 1.6961075988232257, - "learning_rate": 2.981731160432663e-06, - "loss": 0.9888, - "step": 3954 - }, - { - "epoch": 0.35667583532488617, - "grad_norm": 1.4822835543587674, - "learning_rate": 2.981222133623084e-06, - "loss": 0.8992, - "step": 3955 - }, - { - "epoch": 0.3567660188483564, - "grad_norm": 1.7815400272297635, - "learning_rate": 2.980713023090222e-06, - "loss": 1.0039, - "step": 3956 - }, - { - "epoch": 0.3568562023718267, - "grad_norm": 1.4230911587262856, - "learning_rate": 2.980203828877518e-06, - "loss": 0.9655, - "step": 3957 - }, - { - "epoch": 0.3569463858952969, - "grad_norm": 1.8222469153062155, - "learning_rate": 2.9796945510284182e-06, - "loss": 1.0275, - "step": 3958 - }, - { - "epoch": 0.3570365694187672, - "grad_norm": 1.431862349468879, - "learning_rate": 2.9791851895863774e-06, - "loss": 1.0476, - "step": 3959 - }, - { - "epoch": 0.35712675294223745, - "grad_norm": 1.469665566632961, - "learning_rate": 2.978675744594857e-06, - "loss": 1.024, - "step": 3960 - }, - { - "epoch": 0.35721693646570774, - "grad_norm": 1.266608103644805, - "learning_rate": 2.978166216097326e-06, - "loss": 0.9456, - "step": 3961 - }, - { - "epoch": 0.35730711998917797, - "grad_norm": 1.5780717444168162, - "learning_rate": 2.9776566041372596e-06, - "loss": 0.9796, - "step": 3962 - }, - { - "epoch": 0.35739730351264826, - "grad_norm": 1.5711246496924476, - "learning_rate": 2.977146908758141e-06, - "loss": 1.0557, - "step": 3963 - }, - { - "epoch": 0.3574874870361185, - "grad_norm": 1.5708415340678346, - "learning_rate": 2.9766371300034604e-06, - "loss": 1.0079, - "step": 3964 - }, - { - "epoch": 0.3575776705595888, - "grad_norm": 1.3051542258204123, - "learning_rate": 2.9761272679167142e-06, - "loss": 1.0359, - "step": 3965 - }, - { - "epoch": 0.357667854083059, - "grad_norm": 1.5652729605632867, - "learning_rate": 2.9756173225414072e-06, - "loss": 0.8408, - "step": 3966 - }, - { - "epoch": 0.3577580376065293, - "grad_norm": 2.0288515455930174, - "learning_rate": 2.975107293921051e-06, - "loss": 1.164, - "step": 3967 - }, - { - "epoch": 0.35784822112999953, - "grad_norm": 1.9279226039214918, - "learning_rate": 2.9745971820991643e-06, - "loss": 1.0365, - "step": 3968 - }, - { - "epoch": 0.3579384046534698, - "grad_norm": 1.398841373367017, - "learning_rate": 2.9740869871192715e-06, - "loss": 1.0244, - "step": 3969 - }, - { - "epoch": 0.35802858817694005, - "grad_norm": 1.5965007903410415, - "learning_rate": 2.9735767090249065e-06, - "loss": 1.0523, - "step": 3970 - }, - { - "epoch": 0.35811877170041034, - "grad_norm": 1.5015886953492326, - "learning_rate": 2.973066347859608e-06, - "loss": 0.9085, - "step": 3971 - }, - { - "epoch": 0.3582089552238806, - "grad_norm": 1.337750905113078, - "learning_rate": 2.972555903666923e-06, - "loss": 1.006, - "step": 3972 - }, - { - "epoch": 0.35829913874735086, - "grad_norm": 2.530051901514894, - "learning_rate": 2.972045376490406e-06, - "loss": 0.9004, - "step": 3973 - }, - { - "epoch": 0.3583893222708211, - "grad_norm": 1.6165665618473792, - "learning_rate": 2.9715347663736177e-06, - "loss": 0.9198, - "step": 3974 - }, - { - "epoch": 0.3584795057942914, - "grad_norm": 1.411458858851186, - "learning_rate": 2.9710240733601266e-06, - "loss": 0.9868, - "step": 3975 - }, - { - "epoch": 0.3585696893177616, - "grad_norm": 1.9375679250316176, - "learning_rate": 2.970513297493507e-06, - "loss": 1.0676, - "step": 3976 - }, - { - "epoch": 0.3586598728412319, - "grad_norm": 1.3015163051655152, - "learning_rate": 2.9700024388173416e-06, - "loss": 1.0391, - "step": 3977 - }, - { - "epoch": 0.35875005636470214, - "grad_norm": 1.476872245149953, - "learning_rate": 2.969491497375219e-06, - "loss": 1.0404, - "step": 3978 - }, - { - "epoch": 0.35884023988817243, - "grad_norm": 1.4654514527207554, - "learning_rate": 2.9689804732107364e-06, - "loss": 1.0144, - "step": 3979 - }, - { - "epoch": 0.3589304234116427, - "grad_norm": 1.5502955831670868, - "learning_rate": 2.9684693663674968e-06, - "loss": 0.9526, - "step": 3980 - }, - { - "epoch": 0.35902060693511295, - "grad_norm": 1.3988915670581525, - "learning_rate": 2.9679581768891115e-06, - "loss": 1.0821, - "step": 3981 - }, - { - "epoch": 0.35911079045858324, - "grad_norm": 1.4794956521978821, - "learning_rate": 2.967446904819197e-06, - "loss": 0.9097, - "step": 3982 - }, - { - "epoch": 0.3592009739820535, - "grad_norm": 1.4176759864243067, - "learning_rate": 2.966935550201378e-06, - "loss": 1.0155, - "step": 3983 - }, - { - "epoch": 0.35929115750552376, - "grad_norm": 4.438373103682825, - "learning_rate": 2.966424113079286e-06, - "loss": 0.9927, - "step": 3984 - }, - { - "epoch": 0.359381341028994, - "grad_norm": 1.4693695547245562, - "learning_rate": 2.9659125934965596e-06, - "loss": 1.0404, - "step": 3985 - }, - { - "epoch": 0.3594715245524643, - "grad_norm": 1.6412214375761194, - "learning_rate": 2.9654009914968457e-06, - "loss": 1.0587, - "step": 3986 - }, - { - "epoch": 0.3595617080759345, - "grad_norm": 2.0326726773003108, - "learning_rate": 2.9648893071237956e-06, - "loss": 0.8824, - "step": 3987 - }, - { - "epoch": 0.3596518915994048, - "grad_norm": 1.557719968077428, - "learning_rate": 2.964377540421069e-06, - "loss": 0.9694, - "step": 3988 - }, - { - "epoch": 0.35974207512287504, - "grad_norm": 1.5535985305376498, - "learning_rate": 2.963865691432334e-06, - "loss": 1.0163, - "step": 3989 - }, - { - "epoch": 0.3598322586463453, - "grad_norm": 1.3564272083587559, - "learning_rate": 2.963353760201263e-06, - "loss": 0.9124, - "step": 3990 - }, - { - "epoch": 0.35992244216981556, - "grad_norm": 1.5374495676344668, - "learning_rate": 2.962841746771537e-06, - "loss": 0.979, - "step": 3991 - }, - { - "epoch": 0.36001262569328585, - "grad_norm": 1.3441709258376058, - "learning_rate": 2.9623296511868445e-06, - "loss": 0.9966, - "step": 3992 - }, - { - "epoch": 0.3601028092167561, - "grad_norm": 1.3240195605599263, - "learning_rate": 2.96181747349088e-06, - "loss": 0.9179, - "step": 3993 - }, - { - "epoch": 0.36019299274022637, - "grad_norm": 1.5637521685517397, - "learning_rate": 2.961305213727345e-06, - "loss": 1.013, - "step": 3994 - }, - { - "epoch": 0.3602831762636966, - "grad_norm": 0.6641688990904441, - "learning_rate": 2.960792871939949e-06, - "loss": 0.858, - "step": 3995 - }, - { - "epoch": 0.3603733597871669, - "grad_norm": 1.4798647974737016, - "learning_rate": 2.9602804481724064e-06, - "loss": 1.0837, - "step": 3996 - }, - { - "epoch": 0.3604635433106371, - "grad_norm": 2.1333236361322943, - "learning_rate": 2.9597679424684427e-06, - "loss": 0.9887, - "step": 3997 - }, - { - "epoch": 0.3605537268341074, - "grad_norm": 2.26537632070704, - "learning_rate": 2.9592553548717848e-06, - "loss": 1.0042, - "step": 3998 - }, - { - "epoch": 0.36064391035757765, - "grad_norm": 2.7846927551418577, - "learning_rate": 2.958742685426171e-06, - "loss": 1.0702, - "step": 3999 - }, - { - "epoch": 0.36073409388104793, - "grad_norm": 1.4824483313731511, - "learning_rate": 2.9582299341753446e-06, - "loss": 1.0575, - "step": 4000 - }, - { - "epoch": 0.36082427740451817, - "grad_norm": 1.6342807720730772, - "learning_rate": 2.957717101163057e-06, - "loss": 0.9398, - "step": 4001 - }, - { - "epoch": 0.36091446092798846, - "grad_norm": 1.5619192951663787, - "learning_rate": 2.9572041864330655e-06, - "loss": 0.9939, - "step": 4002 - }, - { - "epoch": 0.36100464445145874, - "grad_norm": 1.4679480453257312, - "learning_rate": 2.9566911900291346e-06, - "loss": 1.0129, - "step": 4003 - }, - { - "epoch": 0.361094827974929, - "grad_norm": 1.5042297015450719, - "learning_rate": 2.9561781119950368e-06, - "loss": 0.9104, - "step": 4004 - }, - { - "epoch": 0.36118501149839927, - "grad_norm": 1.3161562900205819, - "learning_rate": 2.9556649523745493e-06, - "loss": 0.9461, - "step": 4005 - }, - { - "epoch": 0.3612751950218695, - "grad_norm": 1.526188406111479, - "learning_rate": 2.955151711211459e-06, - "loss": 1.0526, - "step": 4006 - }, - { - "epoch": 0.3613653785453398, - "grad_norm": 1.2632112444729273, - "learning_rate": 2.9546383885495583e-06, - "loss": 0.961, - "step": 4007 - }, - { - "epoch": 0.36145556206881, - "grad_norm": 1.8687458000008326, - "learning_rate": 2.9541249844326464e-06, - "loss": 0.9445, - "step": 4008 - }, - { - "epoch": 0.3615457455922803, - "grad_norm": 1.4643986140067724, - "learning_rate": 2.9536114989045295e-06, - "loss": 0.9886, - "step": 4009 - }, - { - "epoch": 0.36163592911575054, - "grad_norm": 1.8094981756103627, - "learning_rate": 2.9530979320090216e-06, - "loss": 1.0576, - "step": 4010 - }, - { - "epoch": 0.36172611263922083, - "grad_norm": 1.7239892347933468, - "learning_rate": 2.9525842837899422e-06, - "loss": 1.0738, - "step": 4011 - }, - { - "epoch": 0.36181629616269106, - "grad_norm": 2.2648405624425116, - "learning_rate": 2.95207055429112e-06, - "loss": 1.0751, - "step": 4012 - }, - { - "epoch": 0.36190647968616135, - "grad_norm": 2.5882778122795895, - "learning_rate": 2.951556743556388e-06, - "loss": 0.9887, - "step": 4013 - }, - { - "epoch": 0.3619966632096316, - "grad_norm": 2.3919308156562527, - "learning_rate": 2.951042851629588e-06, - "loss": 1.0434, - "step": 4014 - }, - { - "epoch": 0.3620868467331019, - "grad_norm": 1.6912838228597715, - "learning_rate": 2.950528878554568e-06, - "loss": 0.9488, - "step": 4015 - }, - { - "epoch": 0.3621770302565721, - "grad_norm": 1.2662481022149312, - "learning_rate": 2.950014824375183e-06, - "loss": 1.0289, - "step": 4016 - }, - { - "epoch": 0.3622672137800424, - "grad_norm": 0.6540728740716932, - "learning_rate": 2.949500689135295e-06, - "loss": 0.7984, - "step": 4017 - }, - { - "epoch": 0.36235739730351263, - "grad_norm": 1.2794269524756743, - "learning_rate": 2.9489864728787722e-06, - "loss": 1.0738, - "step": 4018 - }, - { - "epoch": 0.3624475808269829, - "grad_norm": 2.0616137594569874, - "learning_rate": 2.9484721756494915e-06, - "loss": 1.0498, - "step": 4019 - }, - { - "epoch": 0.36253776435045315, - "grad_norm": 1.3528407672723102, - "learning_rate": 2.9479577974913343e-06, - "loss": 0.9947, - "step": 4020 - }, - { - "epoch": 0.36262794787392344, - "grad_norm": 1.5047856917780458, - "learning_rate": 2.9474433384481908e-06, - "loss": 0.9908, - "step": 4021 - }, - { - "epoch": 0.3627181313973937, - "grad_norm": 1.33613442202135, - "learning_rate": 2.9469287985639577e-06, - "loss": 0.9867, - "step": 4022 - }, - { - "epoch": 0.36280831492086396, - "grad_norm": 1.473626260003449, - "learning_rate": 2.9464141778825384e-06, - "loss": 1.0455, - "step": 4023 - }, - { - "epoch": 0.3628984984443342, - "grad_norm": 1.836322695316937, - "learning_rate": 2.9458994764478427e-06, - "loss": 1.0285, - "step": 4024 - }, - { - "epoch": 0.3629886819678045, - "grad_norm": 1.4188661704206669, - "learning_rate": 2.9453846943037883e-06, - "loss": 1.0371, - "step": 4025 - }, - { - "epoch": 0.36307886549127477, - "grad_norm": 1.4168340172748974, - "learning_rate": 2.9448698314942987e-06, - "loss": 0.9488, - "step": 4026 - }, - { - "epoch": 0.363169049014745, - "grad_norm": 17.395629930354005, - "learning_rate": 2.944354888063305e-06, - "loss": 0.9066, - "step": 4027 - }, - { - "epoch": 0.3632592325382153, - "grad_norm": 1.8267468205266764, - "learning_rate": 2.9438398640547453e-06, - "loss": 0.9665, - "step": 4028 - }, - { - "epoch": 0.3633494160616855, - "grad_norm": 1.439947863668125, - "learning_rate": 2.943324759512564e-06, - "loss": 0.9335, - "step": 4029 - }, - { - "epoch": 0.3634395995851558, - "grad_norm": 1.2943254711303007, - "learning_rate": 2.9428095744807134e-06, - "loss": 1.0222, - "step": 4030 - }, - { - "epoch": 0.36352978310862605, - "grad_norm": 1.3870992648874287, - "learning_rate": 2.942294309003151e-06, - "loss": 0.9489, - "step": 4031 - }, - { - "epoch": 0.36361996663209634, - "grad_norm": 1.6338459011846096, - "learning_rate": 2.941778963123843e-06, - "loss": 1.0131, - "step": 4032 - }, - { - "epoch": 0.36371015015556657, - "grad_norm": 1.4769330241024223, - "learning_rate": 2.94126353688676e-06, - "loss": 0.9948, - "step": 4033 - }, - { - "epoch": 0.36380033367903686, - "grad_norm": 1.8787718028861158, - "learning_rate": 2.9407480303358825e-06, - "loss": 0.9474, - "step": 4034 - }, - { - "epoch": 0.3638905172025071, - "grad_norm": 1.3573020827312618, - "learning_rate": 2.940232443515195e-06, - "loss": 1.0027, - "step": 4035 - }, - { - "epoch": 0.3639807007259774, - "grad_norm": 1.2713359505883655, - "learning_rate": 2.9397167764686916e-06, - "loss": 1.0129, - "step": 4036 - }, - { - "epoch": 0.3640708842494476, - "grad_norm": 1.5764246930478323, - "learning_rate": 2.9392010292403714e-06, - "loss": 0.9689, - "step": 4037 - }, - { - "epoch": 0.3641610677729179, - "grad_norm": 1.5242201139834044, - "learning_rate": 2.9386852018742404e-06, - "loss": 0.9854, - "step": 4038 - }, - { - "epoch": 0.36425125129638813, - "grad_norm": 1.5064102530804606, - "learning_rate": 2.938169294414312e-06, - "loss": 0.9704, - "step": 4039 - }, - { - "epoch": 0.3643414348198584, - "grad_norm": 1.3403174894617573, - "learning_rate": 2.9376533069046067e-06, - "loss": 1.027, - "step": 4040 - }, - { - "epoch": 0.36443161834332866, - "grad_norm": 0.7918288583416937, - "learning_rate": 2.9371372393891514e-06, - "loss": 0.912, - "step": 4041 - }, - { - "epoch": 0.36452180186679894, - "grad_norm": 4.02836091362115, - "learning_rate": 2.936621091911979e-06, - "loss": 1.0698, - "step": 4042 - }, - { - "epoch": 0.3646119853902692, - "grad_norm": 1.8857927597115667, - "learning_rate": 2.936104864517131e-06, - "loss": 0.9195, - "step": 4043 - }, - { - "epoch": 0.36470216891373947, - "grad_norm": 1.7040905096794134, - "learning_rate": 2.9355885572486535e-06, - "loss": 0.9541, - "step": 4044 - }, - { - "epoch": 0.3647923524372097, - "grad_norm": 1.948514455638394, - "learning_rate": 2.9350721701506026e-06, - "loss": 1.0305, - "step": 4045 - }, - { - "epoch": 0.36488253596068, - "grad_norm": 1.750131942679671, - "learning_rate": 2.9345557032670375e-06, - "loss": 0.9875, - "step": 4046 - }, - { - "epoch": 0.3649727194841502, - "grad_norm": 2.014671277477476, - "learning_rate": 2.934039156642027e-06, - "loss": 1.0448, - "step": 4047 - }, - { - "epoch": 0.3650629030076205, - "grad_norm": 1.742668119773366, - "learning_rate": 2.9335225303196454e-06, - "loss": 0.8559, - "step": 4048 - }, - { - "epoch": 0.36515308653109074, - "grad_norm": 1.5063200368466727, - "learning_rate": 2.933005824343974e-06, - "loss": 0.9639, - "step": 4049 - }, - { - "epoch": 0.36524327005456103, - "grad_norm": 2.878462820698583, - "learning_rate": 2.932489038759101e-06, - "loss": 0.9657, - "step": 4050 - }, - { - "epoch": 0.3653334535780313, - "grad_norm": 1.2874859503562774, - "learning_rate": 2.9319721736091215e-06, - "loss": 0.8812, - "step": 4051 - }, - { - "epoch": 0.36542363710150155, - "grad_norm": 1.1919429383221087, - "learning_rate": 2.9314552289381377e-06, - "loss": 0.9018, - "step": 4052 - }, - { - "epoch": 0.36551382062497184, - "grad_norm": 2.0157079162103075, - "learning_rate": 2.9309382047902574e-06, - "loss": 0.9605, - "step": 4053 - }, - { - "epoch": 0.3656040041484421, - "grad_norm": 1.7127613118858447, - "learning_rate": 2.9304211012095963e-06, - "loss": 1.0421, - "step": 4054 - }, - { - "epoch": 0.36569418767191236, - "grad_norm": 1.5416662972252206, - "learning_rate": 2.929903918240277e-06, - "loss": 1.0561, - "step": 4055 - }, - { - "epoch": 0.3657843711953826, - "grad_norm": 0.6857936447598344, - "learning_rate": 2.9293866559264273e-06, - "loss": 0.8333, - "step": 4056 - }, - { - "epoch": 0.3658745547188529, - "grad_norm": 1.925481064867037, - "learning_rate": 2.928869314312184e-06, - "loss": 0.9955, - "step": 4057 - }, - { - "epoch": 0.3659647382423231, - "grad_norm": 1.4521616541986386, - "learning_rate": 2.9283518934416892e-06, - "loss": 1.0008, - "step": 4058 - }, - { - "epoch": 0.3660549217657934, - "grad_norm": 1.4848659305868752, - "learning_rate": 2.927834393359092e-06, - "loss": 1.0075, - "step": 4059 - }, - { - "epoch": 0.36614510528926364, - "grad_norm": 1.4838907355366826, - "learning_rate": 2.927316814108548e-06, - "loss": 1.0613, - "step": 4060 - }, - { - "epoch": 0.36623528881273393, - "grad_norm": 1.7160272706850381, - "learning_rate": 2.92679915573422e-06, - "loss": 0.8959, - "step": 4061 - }, - { - "epoch": 0.36632547233620416, - "grad_norm": 1.754503110333325, - "learning_rate": 2.926281418280278e-06, - "loss": 0.9338, - "step": 4062 - }, - { - "epoch": 0.36641565585967445, - "grad_norm": 2.1628212794850636, - "learning_rate": 2.925763601790899e-06, - "loss": 0.9945, - "step": 4063 - }, - { - "epoch": 0.3665058393831447, - "grad_norm": 1.3408598758558994, - "learning_rate": 2.9252457063102635e-06, - "loss": 0.9526, - "step": 4064 - }, - { - "epoch": 0.36659602290661497, - "grad_norm": 1.4436324794217812, - "learning_rate": 2.9247277318825626e-06, - "loss": 0.958, - "step": 4065 - }, - { - "epoch": 0.3666862064300852, - "grad_norm": 1.3380283257005032, - "learning_rate": 2.924209678551993e-06, - "loss": 1.0845, - "step": 4066 - }, - { - "epoch": 0.3667763899535555, - "grad_norm": 1.4212648528840277, - "learning_rate": 2.923691546362757e-06, - "loss": 1.0002, - "step": 4067 - }, - { - "epoch": 0.3668665734770257, - "grad_norm": 1.6618467811578779, - "learning_rate": 2.9231733353590663e-06, - "loss": 0.9606, - "step": 4068 - }, - { - "epoch": 0.366956757000496, - "grad_norm": 1.9283741794613367, - "learning_rate": 2.922655045585136e-06, - "loss": 1.0569, - "step": 4069 - }, - { - "epoch": 0.36704694052396625, - "grad_norm": 0.7262011624196929, - "learning_rate": 2.92213667708519e-06, - "loss": 0.9186, - "step": 4070 - }, - { - "epoch": 0.36713712404743654, - "grad_norm": 1.3538885222175223, - "learning_rate": 2.921618229903457e-06, - "loss": 1.0, - "step": 4071 - }, - { - "epoch": 0.36722730757090677, - "grad_norm": 1.5472746823996053, - "learning_rate": 2.9210997040841752e-06, - "loss": 0.9146, - "step": 4072 - }, - { - "epoch": 0.36731749109437706, - "grad_norm": 1.62339923949403, - "learning_rate": 2.9205810996715885e-06, - "loss": 0.9579, - "step": 4073 - }, - { - "epoch": 0.36740767461784735, - "grad_norm": 1.4129574499128705, - "learning_rate": 2.9200624167099456e-06, - "loss": 0.9591, - "step": 4074 - }, - { - "epoch": 0.3674978581413176, - "grad_norm": 1.5773403369853414, - "learning_rate": 2.919543655243505e-06, - "loss": 0.9085, - "step": 4075 - }, - { - "epoch": 0.36758804166478787, - "grad_norm": 1.8554954767811935, - "learning_rate": 2.919024815316529e-06, - "loss": 1.0239, - "step": 4076 - }, - { - "epoch": 0.3676782251882581, - "grad_norm": 1.7645926369065619, - "learning_rate": 2.9185058969732877e-06, - "loss": 1.1157, - "step": 4077 - }, - { - "epoch": 0.3677684087117284, - "grad_norm": 1.8187863742822186, - "learning_rate": 2.917986900258059e-06, - "loss": 1.0296, - "step": 4078 - }, - { - "epoch": 0.3678585922351986, - "grad_norm": 2.2162914355299415, - "learning_rate": 2.917467825215126e-06, - "loss": 0.9872, - "step": 4079 - }, - { - "epoch": 0.3679487757586689, - "grad_norm": 1.3614532982349565, - "learning_rate": 2.9169486718887803e-06, - "loss": 0.9453, - "step": 4080 - }, - { - "epoch": 0.36803895928213914, - "grad_norm": 1.2708219100355358, - "learning_rate": 2.9164294403233173e-06, - "loss": 1.0407, - "step": 4081 - }, - { - "epoch": 0.36812914280560943, - "grad_norm": 1.571405666048143, - "learning_rate": 2.915910130563041e-06, - "loss": 0.9291, - "step": 4082 - }, - { - "epoch": 0.36821932632907967, - "grad_norm": 1.7116980408217102, - "learning_rate": 2.915390742652262e-06, - "loss": 0.9346, - "step": 4083 - }, - { - "epoch": 0.36830950985254995, - "grad_norm": 1.6849736977400585, - "learning_rate": 2.914871276635298e-06, - "loss": 0.9266, - "step": 4084 - }, - { - "epoch": 0.3683996933760202, - "grad_norm": 1.916081200895858, - "learning_rate": 2.914351732556472e-06, - "loss": 0.9695, - "step": 4085 - }, - { - "epoch": 0.3684898768994905, - "grad_norm": 1.4143841625259035, - "learning_rate": 2.9138321104601144e-06, - "loss": 0.9131, - "step": 4086 - }, - { - "epoch": 0.3685800604229607, - "grad_norm": 1.77838835148791, - "learning_rate": 2.9133124103905623e-06, - "loss": 0.9287, - "step": 4087 - }, - { - "epoch": 0.368670243946431, - "grad_norm": 1.9318981404000009, - "learning_rate": 2.9127926323921596e-06, - "loss": 0.9735, - "step": 4088 - }, - { - "epoch": 0.36876042746990123, - "grad_norm": 1.4447212453272185, - "learning_rate": 2.912272776509256e-06, - "loss": 1.0131, - "step": 4089 - }, - { - "epoch": 0.3688506109933715, - "grad_norm": 1.8315141350661266, - "learning_rate": 2.911752842786209e-06, - "loss": 0.9548, - "step": 4090 - }, - { - "epoch": 0.36894079451684175, - "grad_norm": 1.890862000798536, - "learning_rate": 2.911232831267383e-06, - "loss": 0.9329, - "step": 4091 - }, - { - "epoch": 0.36903097804031204, - "grad_norm": 1.6852696125523023, - "learning_rate": 2.910712741997146e-06, - "loss": 1.0191, - "step": 4092 - }, - { - "epoch": 0.3691211615637823, - "grad_norm": 2.233619188642192, - "learning_rate": 2.910192575019877e-06, - "loss": 0.9781, - "step": 4093 - }, - { - "epoch": 0.36921134508725256, - "grad_norm": 1.724972160778137, - "learning_rate": 2.9096723303799583e-06, - "loss": 1.0231, - "step": 4094 - }, - { - "epoch": 0.3693015286107228, - "grad_norm": 3.674852568075994, - "learning_rate": 2.9091520081217805e-06, - "loss": 0.9984, - "step": 4095 - }, - { - "epoch": 0.3693917121341931, - "grad_norm": 3.1891310763426968, - "learning_rate": 2.908631608289741e-06, - "loss": 0.938, - "step": 4096 - }, - { - "epoch": 0.3694818956576633, - "grad_norm": 1.5266886132896393, - "learning_rate": 2.9081111309282423e-06, - "loss": 0.9839, - "step": 4097 - }, - { - "epoch": 0.3695720791811336, - "grad_norm": 0.6673874807710847, - "learning_rate": 2.9075905760816942e-06, - "loss": 0.8896, - "step": 4098 - }, - { - "epoch": 0.3696622627046039, - "grad_norm": 2.028093790951553, - "learning_rate": 2.907069943794514e-06, - "loss": 1.0201, - "step": 4099 - }, - { - "epoch": 0.3697524462280741, - "grad_norm": 1.5735525988820216, - "learning_rate": 2.906549234111125e-06, - "loss": 0.9687, - "step": 4100 - }, - { - "epoch": 0.3698426297515444, - "grad_norm": 1.4001367144588845, - "learning_rate": 2.906028447075956e-06, - "loss": 1.0463, - "step": 4101 - }, - { - "epoch": 0.36993281327501465, - "grad_norm": 1.575477343082497, - "learning_rate": 2.905507582733445e-06, - "loss": 1.076, - "step": 4102 - }, - { - "epoch": 0.37002299679848494, - "grad_norm": 1.4174735725805379, - "learning_rate": 2.904986641128033e-06, - "loss": 1.1054, - "step": 4103 - }, - { - "epoch": 0.37011318032195517, - "grad_norm": 1.3047108448247107, - "learning_rate": 2.9044656223041716e-06, - "loss": 0.9732, - "step": 4104 - }, - { - "epoch": 0.37020336384542546, - "grad_norm": 1.3061079869804697, - "learning_rate": 2.9039445263063157e-06, - "loss": 0.9392, - "step": 4105 - }, - { - "epoch": 0.3702935473688957, - "grad_norm": 2.017238119304304, - "learning_rate": 2.903423353178929e-06, - "loss": 0.8952, - "step": 4106 - }, - { - "epoch": 0.370383730892366, - "grad_norm": 1.6722742477440622, - "learning_rate": 2.9029021029664802e-06, - "loss": 0.9899, - "step": 4107 - }, - { - "epoch": 0.3704739144158362, - "grad_norm": 1.2989786849560048, - "learning_rate": 2.9023807757134455e-06, - "loss": 1.04, - "step": 4108 - }, - { - "epoch": 0.3705640979393065, - "grad_norm": 2.0987205149775883, - "learning_rate": 2.901859371464307e-06, - "loss": 1.0552, - "step": 4109 - }, - { - "epoch": 0.37065428146277674, - "grad_norm": 1.6872256727116703, - "learning_rate": 2.9013378902635535e-06, - "loss": 1.024, - "step": 4110 - }, - { - "epoch": 0.370744464986247, - "grad_norm": 2.0568702620269446, - "learning_rate": 2.9008163321556823e-06, - "loss": 0.9637, - "step": 4111 - }, - { - "epoch": 0.37083464850971726, - "grad_norm": 1.4602984494382938, - "learning_rate": 2.900294697185194e-06, - "loss": 0.9968, - "step": 4112 - }, - { - "epoch": 0.37092483203318755, - "grad_norm": 1.6699964718581524, - "learning_rate": 2.899772985396599e-06, - "loss": 1.0084, - "step": 4113 - }, - { - "epoch": 0.3710150155566578, - "grad_norm": 1.3362005766675642, - "learning_rate": 2.8992511968344104e-06, - "loss": 0.9829, - "step": 4114 - }, - { - "epoch": 0.37110519908012807, - "grad_norm": 1.4926197165010813, - "learning_rate": 2.8987293315431523e-06, - "loss": 0.9328, - "step": 4115 - }, - { - "epoch": 0.3711953826035983, - "grad_norm": 1.4078158244137142, - "learning_rate": 2.898207389567351e-06, - "loss": 0.9256, - "step": 4116 - }, - { - "epoch": 0.3712855661270686, - "grad_norm": 1.412158506179787, - "learning_rate": 2.897685370951543e-06, - "loss": 0.9815, - "step": 4117 - }, - { - "epoch": 0.3713757496505388, - "grad_norm": 2.404026540003563, - "learning_rate": 2.89716327574027e-06, - "loss": 0.8901, - "step": 4118 - }, - { - "epoch": 0.3714659331740091, - "grad_norm": 1.8116416871993375, - "learning_rate": 2.8966411039780787e-06, - "loss": 1.0732, - "step": 4119 - }, - { - "epoch": 0.37155611669747934, - "grad_norm": 1.8482486992811684, - "learning_rate": 2.8961188557095248e-06, - "loss": 0.9998, - "step": 4120 - }, - { - "epoch": 0.37164630022094963, - "grad_norm": 1.6315666446804848, - "learning_rate": 2.895596530979168e-06, - "loss": 0.9321, - "step": 4121 - }, - { - "epoch": 0.3717364837444199, - "grad_norm": 1.4257608490620934, - "learning_rate": 2.895074129831578e-06, - "loss": 1.0061, - "step": 4122 - }, - { - "epoch": 0.37182666726789015, - "grad_norm": 1.4593899123677678, - "learning_rate": 2.8945516523113275e-06, - "loss": 0.8987, - "step": 4123 - }, - { - "epoch": 0.37191685079136044, - "grad_norm": 0.6847960879333334, - "learning_rate": 2.894029098462998e-06, - "loss": 0.8151, - "step": 4124 - }, - { - "epoch": 0.3720070343148307, - "grad_norm": 2.024431021546186, - "learning_rate": 2.8935064683311756e-06, - "loss": 1.0124, - "step": 4125 - }, - { - "epoch": 0.37209721783830096, - "grad_norm": 0.7721457049159884, - "learning_rate": 2.8929837619604544e-06, - "loss": 0.9196, - "step": 4126 - }, - { - "epoch": 0.3721874013617712, - "grad_norm": 1.7766373456690938, - "learning_rate": 2.8924609793954346e-06, - "loss": 1.0378, - "step": 4127 - }, - { - "epoch": 0.3722775848852415, - "grad_norm": 2.334369145683495, - "learning_rate": 2.891938120680724e-06, - "loss": 0.945, - "step": 4128 - }, - { - "epoch": 0.3723677684087117, - "grad_norm": 4.985697316762721, - "learning_rate": 2.8914151858609343e-06, - "loss": 0.9788, - "step": 4129 - }, - { - "epoch": 0.372457951932182, - "grad_norm": 1.4992346400481849, - "learning_rate": 2.8908921749806858e-06, - "loss": 1.0152, - "step": 4130 - }, - { - "epoch": 0.37254813545565224, - "grad_norm": 1.8517526134638873, - "learning_rate": 2.890369088084605e-06, - "loss": 0.9993, - "step": 4131 - }, - { - "epoch": 0.37263831897912253, - "grad_norm": 1.4778734472964032, - "learning_rate": 2.889845925217323e-06, - "loss": 1.0169, - "step": 4132 - }, - { - "epoch": 0.37272850250259276, - "grad_norm": 1.511811877001726, - "learning_rate": 2.8893226864234813e-06, - "loss": 1.0038, - "step": 4133 - }, - { - "epoch": 0.37281868602606305, - "grad_norm": 1.6939814391847658, - "learning_rate": 2.8887993717477236e-06, - "loss": 1.0071, - "step": 4134 - }, - { - "epoch": 0.3729088695495333, - "grad_norm": 1.5798616579308686, - "learning_rate": 2.8882759812347035e-06, - "loss": 0.8805, - "step": 4135 - }, - { - "epoch": 0.3729990530730036, - "grad_norm": 1.5271943548558782, - "learning_rate": 2.887752514929078e-06, - "loss": 0.9897, - "step": 4136 - }, - { - "epoch": 0.3730892365964738, - "grad_norm": 1.5871128931928995, - "learning_rate": 2.887228972875513e-06, - "loss": 0.9398, - "step": 4137 - }, - { - "epoch": 0.3731794201199441, - "grad_norm": 2.458672052960209, - "learning_rate": 2.88670535511868e-06, - "loss": 0.9963, - "step": 4138 - }, - { - "epoch": 0.3732696036434143, - "grad_norm": 1.7749719913045872, - "learning_rate": 2.886181661703257e-06, - "loss": 0.9312, - "step": 4139 - }, - { - "epoch": 0.3733597871668846, - "grad_norm": 1.9684386234186373, - "learning_rate": 2.8856578926739285e-06, - "loss": 0.955, - "step": 4140 - }, - { - "epoch": 0.37344997069035485, - "grad_norm": 1.508353734097993, - "learning_rate": 2.8851340480753846e-06, - "loss": 0.9523, - "step": 4141 - }, - { - "epoch": 0.37354015421382514, - "grad_norm": 1.8779838343931028, - "learning_rate": 2.8846101279523232e-06, - "loss": 0.9989, - "step": 4142 - }, - { - "epoch": 0.37363033773729537, - "grad_norm": 1.8148202842873629, - "learning_rate": 2.8840861323494487e-06, - "loss": 0.9706, - "step": 4143 - }, - { - "epoch": 0.37372052126076566, - "grad_norm": 1.460887316489572, - "learning_rate": 2.88356206131147e-06, - "loss": 1.0504, - "step": 4144 - }, - { - "epoch": 0.37381070478423595, - "grad_norm": 1.6397303003592572, - "learning_rate": 2.883037914883104e-06, - "loss": 0.9784, - "step": 4145 - }, - { - "epoch": 0.3739008883077062, - "grad_norm": 1.385280023973363, - "learning_rate": 2.882513693109075e-06, - "loss": 0.9526, - "step": 4146 - }, - { - "epoch": 0.37399107183117647, - "grad_norm": 1.408743026025632, - "learning_rate": 2.8819893960341106e-06, - "loss": 0.971, - "step": 4147 - }, - { - "epoch": 0.3740812553546467, - "grad_norm": 1.7361177503670793, - "learning_rate": 2.881465023702948e-06, - "loss": 1.0417, - "step": 4148 - }, - { - "epoch": 0.374171438878117, - "grad_norm": 1.579944883181949, - "learning_rate": 2.8809405761603294e-06, - "loss": 1.0721, - "step": 4149 - }, - { - "epoch": 0.3742616224015872, - "grad_norm": 1.9285784789365235, - "learning_rate": 2.880416053451003e-06, - "loss": 0.9118, - "step": 4150 - }, - { - "epoch": 0.3743518059250575, - "grad_norm": 1.6560763142043389, - "learning_rate": 2.879891455619725e-06, - "loss": 1.0842, - "step": 4151 - }, - { - "epoch": 0.37444198944852775, - "grad_norm": 1.5455361940112893, - "learning_rate": 2.879366782711256e-06, - "loss": 0.9929, - "step": 4152 - }, - { - "epoch": 0.37453217297199803, - "grad_norm": 2.8281560232241816, - "learning_rate": 2.8788420347703643e-06, - "loss": 1.0156, - "step": 4153 - }, - { - "epoch": 0.37462235649546827, - "grad_norm": 0.6972832771011599, - "learning_rate": 2.8783172118418244e-06, - "loss": 0.8632, - "step": 4154 - }, - { - "epoch": 0.37471254001893856, - "grad_norm": 2.4183261588700247, - "learning_rate": 2.877792313970417e-06, - "loss": 0.9939, - "step": 4155 - }, - { - "epoch": 0.3748027235424088, - "grad_norm": 1.3793574559453299, - "learning_rate": 2.8772673412009293e-06, - "loss": 1.0484, - "step": 4156 - }, - { - "epoch": 0.3748929070658791, - "grad_norm": 1.6434236906073143, - "learning_rate": 2.8767422935781545e-06, - "loss": 1.0425, - "step": 4157 - }, - { - "epoch": 0.3749830905893493, - "grad_norm": 1.3866319656080195, - "learning_rate": 2.8762171711468935e-06, - "loss": 1.0264, - "step": 4158 - }, - { - "epoch": 0.3750732741128196, - "grad_norm": 1.3563370795355627, - "learning_rate": 2.875691973951952e-06, - "loss": 0.993, - "step": 4159 - }, - { - "epoch": 0.37516345763628983, - "grad_norm": 0.6761374444421067, - "learning_rate": 2.8751667020381425e-06, - "loss": 0.9112, - "step": 4160 - }, - { - "epoch": 0.3752536411597601, - "grad_norm": 1.3018439823142094, - "learning_rate": 2.8746413554502837e-06, - "loss": 1.0536, - "step": 4161 - }, - { - "epoch": 0.37534382468323035, - "grad_norm": 1.557719968077428, - "learning_rate": 2.8741159342332027e-06, - "loss": 0.9366, - "step": 4162 - }, - { - "epoch": 0.37543400820670064, - "grad_norm": 1.7665388729850968, - "learning_rate": 2.87359043843173e-06, - "loss": 1.0452, - "step": 4163 - }, - { - "epoch": 0.3755241917301709, - "grad_norm": 1.3283500480717036, - "learning_rate": 2.873064868090704e-06, - "loss": 1.0334, - "step": 4164 - }, - { - "epoch": 0.37561437525364116, - "grad_norm": 1.5548113289597227, - "learning_rate": 2.8725392232549697e-06, - "loss": 1.0832, - "step": 4165 - }, - { - "epoch": 0.3757045587771114, - "grad_norm": 1.5900051775434154, - "learning_rate": 2.872013503969378e-06, - "loss": 1.0645, - "step": 4166 - }, - { - "epoch": 0.3757947423005817, - "grad_norm": 1.2524045704561146, - "learning_rate": 2.8714877102787853e-06, - "loss": 1.0076, - "step": 4167 - }, - { - "epoch": 0.3758849258240519, - "grad_norm": 1.514851716971338, - "learning_rate": 2.8709618422280564e-06, - "loss": 0.9988, - "step": 4168 - }, - { - "epoch": 0.3759751093475222, - "grad_norm": 1.3916573710711722, - "learning_rate": 2.8704358998620605e-06, - "loss": 0.9752, - "step": 4169 - }, - { - "epoch": 0.3760652928709925, - "grad_norm": 1.5531326263056286, - "learning_rate": 2.8699098832256735e-06, - "loss": 1.0186, - "step": 4170 - }, - { - "epoch": 0.37615547639446273, - "grad_norm": 2.379520931914128, - "learning_rate": 2.86938379236378e-06, - "loss": 0.9108, - "step": 4171 - }, - { - "epoch": 0.376245659917933, - "grad_norm": 1.3975162561798435, - "learning_rate": 2.868857627321266e-06, - "loss": 0.9214, - "step": 4172 - }, - { - "epoch": 0.37633584344140325, - "grad_norm": 1.3060168044453533, - "learning_rate": 2.8683313881430296e-06, - "loss": 0.9965, - "step": 4173 - }, - { - "epoch": 0.37642602696487354, - "grad_norm": 1.61634561490421, - "learning_rate": 2.8678050748739706e-06, - "loss": 0.9848, - "step": 4174 - }, - { - "epoch": 0.37651621048834377, - "grad_norm": 1.3093036604864279, - "learning_rate": 2.8672786875589976e-06, - "loss": 0.9609, - "step": 4175 - }, - { - "epoch": 0.37660639401181406, - "grad_norm": 1.4715039832436834, - "learning_rate": 2.866752226243025e-06, - "loss": 0.9497, - "step": 4176 - }, - { - "epoch": 0.3766965775352843, - "grad_norm": 2.1660135336230324, - "learning_rate": 2.8662256909709733e-06, - "loss": 1.0366, - "step": 4177 - }, - { - "epoch": 0.3767867610587546, - "grad_norm": 1.7148394399949396, - "learning_rate": 2.865699081787769e-06, - "loss": 1.031, - "step": 4178 - }, - { - "epoch": 0.3768769445822248, - "grad_norm": 1.3032109934291032, - "learning_rate": 2.8651723987383465e-06, - "loss": 0.9761, - "step": 4179 - }, - { - "epoch": 0.3769671281056951, - "grad_norm": 2.448249588640588, - "learning_rate": 2.8646456418676437e-06, - "loss": 0.9643, - "step": 4180 - }, - { - "epoch": 0.37705731162916534, - "grad_norm": 0.6807568129802012, - "learning_rate": 2.8641188112206067e-06, - "loss": 0.8624, - "step": 4181 - }, - { - "epoch": 0.3771474951526356, - "grad_norm": 1.9558382033536807, - "learning_rate": 2.863591906842189e-06, - "loss": 1.0236, - "step": 4182 - }, - { - "epoch": 0.37723767867610586, - "grad_norm": 1.3680331857125008, - "learning_rate": 2.863064928777347e-06, - "loss": 1.0259, - "step": 4183 - }, - { - "epoch": 0.37732786219957615, - "grad_norm": 1.7376059602770437, - "learning_rate": 2.862537877071047e-06, - "loss": 0.9756, - "step": 4184 - }, - { - "epoch": 0.3774180457230464, - "grad_norm": 0.6350237042966316, - "learning_rate": 2.8620107517682597e-06, - "loss": 0.8068, - "step": 4185 - }, - { - "epoch": 0.37750822924651667, - "grad_norm": 1.3534628237158821, - "learning_rate": 2.8614835529139618e-06, - "loss": 0.9298, - "step": 4186 - }, - { - "epoch": 0.3775984127699869, - "grad_norm": 1.4473820071974473, - "learning_rate": 2.8609562805531367e-06, - "loss": 0.9493, - "step": 4187 - }, - { - "epoch": 0.3776885962934572, - "grad_norm": 1.4523080970200655, - "learning_rate": 2.8604289347307746e-06, - "loss": 1.029, - "step": 4188 - }, - { - "epoch": 0.3777787798169274, - "grad_norm": 1.34982563058301, - "learning_rate": 2.859901515491871e-06, - "loss": 0.9894, - "step": 4189 - }, - { - "epoch": 0.3778689633403977, - "grad_norm": 1.341643567607107, - "learning_rate": 2.8593740228814298e-06, - "loss": 0.8826, - "step": 4190 - }, - { - "epoch": 0.37795914686386795, - "grad_norm": 2.0064659501548636, - "learning_rate": 2.8588464569444574e-06, - "loss": 0.9801, - "step": 4191 - }, - { - "epoch": 0.37804933038733823, - "grad_norm": 2.0216517282402107, - "learning_rate": 2.8583188177259697e-06, - "loss": 0.9781, - "step": 4192 - }, - { - "epoch": 0.3781395139108085, - "grad_norm": 1.5736747918856853, - "learning_rate": 2.857791105270988e-06, - "loss": 0.9779, - "step": 4193 - }, - { - "epoch": 0.37822969743427876, - "grad_norm": 1.356291639330444, - "learning_rate": 2.857263319624539e-06, - "loss": 1.0152, - "step": 4194 - }, - { - "epoch": 0.37831988095774904, - "grad_norm": 1.3221758723260477, - "learning_rate": 2.856735460831657e-06, - "loss": 0.9761, - "step": 4195 - }, - { - "epoch": 0.3784100644812193, - "grad_norm": 1.3024458672958439, - "learning_rate": 2.856207528937382e-06, - "loss": 1.0061, - "step": 4196 - }, - { - "epoch": 0.37850024800468957, - "grad_norm": 4.230108272167829, - "learning_rate": 2.855679523986759e-06, - "loss": 0.9804, - "step": 4197 - }, - { - "epoch": 0.3785904315281598, - "grad_norm": 1.553957130209024, - "learning_rate": 2.8551514460248406e-06, - "loss": 1.0423, - "step": 4198 - }, - { - "epoch": 0.3786806150516301, - "grad_norm": 1.2814288130867955, - "learning_rate": 2.8546232950966868e-06, - "loss": 0.9986, - "step": 4199 - }, - { - "epoch": 0.3787707985751003, - "grad_norm": 1.9120869932335698, - "learning_rate": 2.85409507124736e-06, - "loss": 0.9449, - "step": 4200 - }, - { - "epoch": 0.3788609820985706, - "grad_norm": 1.38115705116957, - "learning_rate": 2.8535667745219324e-06, - "loss": 0.9879, - "step": 4201 - }, - { - "epoch": 0.37895116562204084, - "grad_norm": 1.477225180174192, - "learning_rate": 2.853038404965481e-06, - "loss": 0.9809, - "step": 4202 - }, - { - "epoch": 0.37904134914551113, - "grad_norm": 2.0933785180157796, - "learning_rate": 2.8525099626230894e-06, - "loss": 1.0702, - "step": 4203 - }, - { - "epoch": 0.37913153266898136, - "grad_norm": 1.668713655683232, - "learning_rate": 2.8519814475398472e-06, - "loss": 0.9242, - "step": 4204 - }, - { - "epoch": 0.37922171619245165, - "grad_norm": 1.1786635040877995, - "learning_rate": 2.8514528597608502e-06, - "loss": 0.9686, - "step": 4205 - }, - { - "epoch": 0.3793118997159219, - "grad_norm": 1.199357512457394, - "learning_rate": 2.8509241993312004e-06, - "loss": 1.0272, - "step": 4206 - }, - { - "epoch": 0.3794020832393922, - "grad_norm": 1.5669270931389925, - "learning_rate": 2.850395466296006e-06, - "loss": 1.0366, - "step": 4207 - }, - { - "epoch": 0.3794922667628624, - "grad_norm": 2.962390231259748, - "learning_rate": 2.849866660700381e-06, - "loss": 1.021, - "step": 4208 - }, - { - "epoch": 0.3795824502863327, - "grad_norm": 1.6060999907865834, - "learning_rate": 2.8493377825894464e-06, - "loss": 1.049, - "step": 4209 - }, - { - "epoch": 0.37967263380980293, - "grad_norm": 1.931668766383735, - "learning_rate": 2.848808832008329e-06, - "loss": 1.0592, - "step": 4210 - }, - { - "epoch": 0.3797628173332732, - "grad_norm": 1.4229319909063218, - "learning_rate": 2.848279809002162e-06, - "loss": 1.026, - "step": 4211 - }, - { - "epoch": 0.37985300085674345, - "grad_norm": 1.6816471467462097, - "learning_rate": 2.8477507136160842e-06, - "loss": 1.0098, - "step": 4212 - }, - { - "epoch": 0.37994318438021374, - "grad_norm": 0.6768534098780862, - "learning_rate": 2.847221545895241e-06, - "loss": 0.8535, - "step": 4213 - }, - { - "epoch": 0.38003336790368397, - "grad_norm": 1.263448468466642, - "learning_rate": 2.846692305884785e-06, - "loss": 0.9939, - "step": 4214 - }, - { - "epoch": 0.38012355142715426, - "grad_norm": 1.391210539910269, - "learning_rate": 2.8461629936298718e-06, - "loss": 1.0393, - "step": 4215 - }, - { - "epoch": 0.3802137349506245, - "grad_norm": 3.102623899630929, - "learning_rate": 2.845633609175666e-06, - "loss": 1.0485, - "step": 4216 - }, - { - "epoch": 0.3803039184740948, - "grad_norm": 1.4971340934768362, - "learning_rate": 2.8451041525673383e-06, - "loss": 1.0276, - "step": 4217 - }, - { - "epoch": 0.38039410199756507, - "grad_norm": 1.6303151576068224, - "learning_rate": 2.8445746238500647e-06, - "loss": 1.0271, - "step": 4218 - }, - { - "epoch": 0.3804842855210353, - "grad_norm": 2.4972405463274705, - "learning_rate": 2.844045023069027e-06, - "loss": 0.9832, - "step": 4219 - }, - { - "epoch": 0.3805744690445056, - "grad_norm": 1.548049788913877, - "learning_rate": 2.8435153502694136e-06, - "loss": 1.0369, - "step": 4220 - }, - { - "epoch": 0.3806646525679758, - "grad_norm": 1.5746923448927548, - "learning_rate": 2.84298560549642e-06, - "loss": 0.9366, - "step": 4221 - }, - { - "epoch": 0.3807548360914461, - "grad_norm": 1.4757924973567649, - "learning_rate": 2.8424557887952462e-06, - "loss": 1.0157, - "step": 4222 - }, - { - "epoch": 0.38084501961491635, - "grad_norm": 1.4400998529940998, - "learning_rate": 2.841925900211099e-06, - "loss": 0.9195, - "step": 4223 - }, - { - "epoch": 0.38093520313838664, - "grad_norm": 1.3715776421003776, - "learning_rate": 2.841395939789192e-06, - "loss": 1.0484, - "step": 4224 - }, - { - "epoch": 0.38102538666185687, - "grad_norm": 1.8322252118014488, - "learning_rate": 2.8408659075747435e-06, - "loss": 1.0142, - "step": 4225 - }, - { - "epoch": 0.38111557018532716, - "grad_norm": 1.5158279912238266, - "learning_rate": 2.8403358036129796e-06, - "loss": 1.0021, - "step": 4226 - }, - { - "epoch": 0.3812057537087974, - "grad_norm": 1.652007084450298, - "learning_rate": 2.839805627949132e-06, - "loss": 1.0496, - "step": 4227 - }, - { - "epoch": 0.3812959372322677, - "grad_norm": 1.6694928605303152, - "learning_rate": 2.8392753806284367e-06, - "loss": 1.0537, - "step": 4228 - }, - { - "epoch": 0.3813861207557379, - "grad_norm": 1.504455941546216, - "learning_rate": 2.838745061696139e-06, - "loss": 0.9462, - "step": 4229 - }, - { - "epoch": 0.3814763042792082, - "grad_norm": 1.572110792793598, - "learning_rate": 2.838214671197487e-06, - "loss": 0.9331, - "step": 4230 - }, - { - "epoch": 0.38156648780267843, - "grad_norm": 1.3347107011604562, - "learning_rate": 2.8376842091777377e-06, - "loss": 1.098, - "step": 4231 - }, - { - "epoch": 0.3816566713261487, - "grad_norm": 1.6676158665746552, - "learning_rate": 2.8371536756821524e-06, - "loss": 0.9049, - "step": 4232 - }, - { - "epoch": 0.38174685484961896, - "grad_norm": 1.8284003547594674, - "learning_rate": 2.836623070756e-06, - "loss": 0.9799, - "step": 4233 - }, - { - "epoch": 0.38183703837308924, - "grad_norm": 1.401144253214181, - "learning_rate": 2.8360923944445542e-06, - "loss": 1.0238, - "step": 4234 - }, - { - "epoch": 0.3819272218965595, - "grad_norm": 1.3017973725703078, - "learning_rate": 2.8355616467930947e-06, - "loss": 0.9825, - "step": 4235 - }, - { - "epoch": 0.38201740542002977, - "grad_norm": 1.5530339174024161, - "learning_rate": 2.8350308278469085e-06, - "loss": 0.968, - "step": 4236 - }, - { - "epoch": 0.3821075889435, - "grad_norm": 1.4934655752694035, - "learning_rate": 2.8344999376512877e-06, - "loss": 1.0307, - "step": 4237 - }, - { - "epoch": 0.3821977724669703, - "grad_norm": 1.2775241362798067, - "learning_rate": 2.8339689762515307e-06, - "loss": 0.9843, - "step": 4238 - }, - { - "epoch": 0.3822879559904405, - "grad_norm": 1.594313951831635, - "learning_rate": 2.8334379436929424e-06, - "loss": 0.9277, - "step": 4239 - }, - { - "epoch": 0.3823781395139108, - "grad_norm": 1.647323231801321, - "learning_rate": 2.832906840020833e-06, - "loss": 0.9659, - "step": 4240 - }, - { - "epoch": 0.3824683230373811, - "grad_norm": 1.3238068786879233, - "learning_rate": 2.83237566528052e-06, - "loss": 1.0093, - "step": 4241 - }, - { - "epoch": 0.38255850656085133, - "grad_norm": 1.688618324731392, - "learning_rate": 2.831844419517325e-06, - "loss": 0.9847, - "step": 4242 - }, - { - "epoch": 0.3826486900843216, - "grad_norm": 2.0033252014951435, - "learning_rate": 2.8313131027765774e-06, - "loss": 1.034, - "step": 4243 - }, - { - "epoch": 0.38273887360779185, - "grad_norm": 1.3154666615070947, - "learning_rate": 2.8307817151036124e-06, - "loss": 0.9712, - "step": 4244 - }, - { - "epoch": 0.38282905713126214, - "grad_norm": 1.4146615124290067, - "learning_rate": 2.8302502565437704e-06, - "loss": 0.9692, - "step": 4245 - }, - { - "epoch": 0.3829192406547324, - "grad_norm": 1.8673065019936324, - "learning_rate": 2.829718727142398e-06, - "loss": 0.9268, - "step": 4246 - }, - { - "epoch": 0.38300942417820266, - "grad_norm": 1.5296451176976285, - "learning_rate": 2.829187126944849e-06, - "loss": 1.0142, - "step": 4247 - }, - { - "epoch": 0.3830996077016729, - "grad_norm": 1.849680205361544, - "learning_rate": 2.8286554559964826e-06, - "loss": 0.9476, - "step": 4248 - }, - { - "epoch": 0.3831897912251432, - "grad_norm": 1.4679624190912164, - "learning_rate": 2.8281237143426637e-06, - "loss": 1.019, - "step": 4249 - }, - { - "epoch": 0.3832799747486134, - "grad_norm": 1.3684120144140166, - "learning_rate": 2.8275919020287626e-06, - "loss": 0.9078, - "step": 4250 - }, - { - "epoch": 0.3833701582720837, - "grad_norm": 1.5993798991430512, - "learning_rate": 2.827060019100158e-06, - "loss": 1.0223, - "step": 4251 - }, - { - "epoch": 0.38346034179555394, - "grad_norm": 1.352436554807982, - "learning_rate": 2.8265280656022315e-06, - "loss": 0.9916, - "step": 4252 - }, - { - "epoch": 0.3835505253190242, - "grad_norm": 1.5415448149218216, - "learning_rate": 2.825996041580373e-06, - "loss": 0.8856, - "step": 4253 - }, - { - "epoch": 0.38364070884249446, - "grad_norm": 1.534939893856657, - "learning_rate": 2.825463947079978e-06, - "loss": 1.0136, - "step": 4254 - }, - { - "epoch": 0.38373089236596475, - "grad_norm": 1.5621548843718165, - "learning_rate": 2.8249317821464483e-06, - "loss": 0.9993, - "step": 4255 - }, - { - "epoch": 0.383821075889435, - "grad_norm": 1.4177106301847076, - "learning_rate": 2.824399546825189e-06, - "loss": 0.9324, - "step": 4256 - }, - { - "epoch": 0.38391125941290527, - "grad_norm": 1.7057317680358464, - "learning_rate": 2.823867241161616e-06, - "loss": 0.9532, - "step": 4257 - }, - { - "epoch": 0.3840014429363755, - "grad_norm": 1.6726157428053954, - "learning_rate": 2.8233348652011456e-06, - "loss": 1.0437, - "step": 4258 - }, - { - "epoch": 0.3840916264598458, - "grad_norm": 1.959875237036257, - "learning_rate": 2.8228024189892057e-06, - "loss": 0.9115, - "step": 4259 - }, - { - "epoch": 0.384181809983316, - "grad_norm": 1.1889078428167772, - "learning_rate": 2.822269902571226e-06, - "loss": 0.9995, - "step": 4260 - }, - { - "epoch": 0.3842719935067863, - "grad_norm": 1.692194096630761, - "learning_rate": 2.8217373159926446e-06, - "loss": 0.9789, - "step": 4261 - }, - { - "epoch": 0.38436217703025655, - "grad_norm": 1.1481660794637634, - "learning_rate": 2.8212046592989046e-06, - "loss": 0.9321, - "step": 4262 - }, - { - "epoch": 0.38445236055372684, - "grad_norm": 1.4963934774930163, - "learning_rate": 2.820671932535455e-06, - "loss": 1.0043, - "step": 4263 - }, - { - "epoch": 0.3845425440771971, - "grad_norm": 1.7928011281898077, - "learning_rate": 2.8201391357477506e-06, - "loss": 0.9828, - "step": 4264 - }, - { - "epoch": 0.38463272760066736, - "grad_norm": 1.4206181934762563, - "learning_rate": 2.8196062689812525e-06, - "loss": 0.968, - "step": 4265 - }, - { - "epoch": 0.38472291112413765, - "grad_norm": 1.6942676189621568, - "learning_rate": 2.819073332281429e-06, - "loss": 0.9433, - "step": 4266 - }, - { - "epoch": 0.3848130946476079, - "grad_norm": 0.6215907333735782, - "learning_rate": 2.8185403256937524e-06, - "loss": 0.8059, - "step": 4267 - }, - { - "epoch": 0.38490327817107817, - "grad_norm": 1.627387054204157, - "learning_rate": 2.8180072492637016e-06, - "loss": 0.9526, - "step": 4268 - }, - { - "epoch": 0.3849934616945484, - "grad_norm": 2.169429802231241, - "learning_rate": 2.817474103036762e-06, - "loss": 0.9865, - "step": 4269 - }, - { - "epoch": 0.3850836452180187, - "grad_norm": 1.5429388502097061, - "learning_rate": 2.816940887058425e-06, - "loss": 1.0148, - "step": 4270 - }, - { - "epoch": 0.3851738287414889, - "grad_norm": 4.420038172008993, - "learning_rate": 2.816407601374186e-06, - "loss": 1.0395, - "step": 4271 - }, - { - "epoch": 0.3852640122649592, - "grad_norm": 1.4334808221977169, - "learning_rate": 2.815874246029549e-06, - "loss": 0.9369, - "step": 4272 - }, - { - "epoch": 0.38535419578842944, - "grad_norm": 1.3328724104194203, - "learning_rate": 2.815340821070023e-06, - "loss": 0.9497, - "step": 4273 - }, - { - "epoch": 0.38544437931189973, - "grad_norm": 1.4249850623703098, - "learning_rate": 2.814807326541122e-06, - "loss": 0.974, - "step": 4274 - }, - { - "epoch": 0.38553456283536996, - "grad_norm": 1.4373831494149518, - "learning_rate": 2.8142737624883676e-06, - "loss": 0.9281, - "step": 4275 - }, - { - "epoch": 0.38562474635884025, - "grad_norm": 1.5941433888537362, - "learning_rate": 2.8137401289572854e-06, - "loss": 1.0022, - "step": 4276 - }, - { - "epoch": 0.3857149298823105, - "grad_norm": 1.5398213021525797, - "learning_rate": 2.8132064259934086e-06, - "loss": 1.0382, - "step": 4277 - }, - { - "epoch": 0.3858051134057808, - "grad_norm": 1.5279404823285665, - "learning_rate": 2.812672653642276e-06, - "loss": 0.9886, - "step": 4278 - }, - { - "epoch": 0.385895296929251, - "grad_norm": 1.4077076880319126, - "learning_rate": 2.812138811949431e-06, - "loss": 1.0101, - "step": 4279 - }, - { - "epoch": 0.3859854804527213, - "grad_norm": 1.3493385319201276, - "learning_rate": 2.8116049009604247e-06, - "loss": 1.0611, - "step": 4280 - }, - { - "epoch": 0.38607566397619153, - "grad_norm": 1.7186264687274604, - "learning_rate": 2.8110709207208132e-06, - "loss": 0.876, - "step": 4281 - }, - { - "epoch": 0.3861658474996618, - "grad_norm": 7.111507825249437, - "learning_rate": 2.810536871276158e-06, - "loss": 1.0258, - "step": 4282 - }, - { - "epoch": 0.38625603102313205, - "grad_norm": 0.7456995137178672, - "learning_rate": 2.8100027526720283e-06, - "loss": 0.9125, - "step": 4283 - }, - { - "epoch": 0.38634621454660234, - "grad_norm": 1.387182840406924, - "learning_rate": 2.8094685649539974e-06, - "loss": 0.9255, - "step": 4284 - }, - { - "epoch": 0.3864363980700726, - "grad_norm": 0.7827589340309293, - "learning_rate": 2.8089343081676455e-06, - "loss": 0.8909, - "step": 4285 - }, - { - "epoch": 0.38652658159354286, - "grad_norm": 1.559637270106944, - "learning_rate": 2.8083999823585577e-06, - "loss": 0.9924, - "step": 4286 - }, - { - "epoch": 0.3866167651170131, - "grad_norm": 1.3025742736990096, - "learning_rate": 2.8078655875723254e-06, - "loss": 1.024, - "step": 4287 - }, - { - "epoch": 0.3867069486404834, - "grad_norm": 3.1765514157801267, - "learning_rate": 2.807331123854547e-06, - "loss": 1.0699, - "step": 4288 - }, - { - "epoch": 0.38679713216395367, - "grad_norm": 1.6803321488465868, - "learning_rate": 2.806796591250826e-06, - "loss": 0.8716, - "step": 4289 - }, - { - "epoch": 0.3868873156874239, - "grad_norm": 1.6237855921743571, - "learning_rate": 2.8062619898067707e-06, - "loss": 1.0706, - "step": 4290 - }, - { - "epoch": 0.3869774992108942, - "grad_norm": 1.4300112331722787, - "learning_rate": 2.8057273195679963e-06, - "loss": 1.011, - "step": 4291 - }, - { - "epoch": 0.3870676827343644, - "grad_norm": 1.5914408092463623, - "learning_rate": 2.8051925805801253e-06, - "loss": 1.0037, - "step": 4292 - }, - { - "epoch": 0.3871578662578347, - "grad_norm": 1.3746699023654225, - "learning_rate": 2.804657772888783e-06, - "loss": 1.0272, - "step": 4293 - }, - { - "epoch": 0.38724804978130495, - "grad_norm": 2.8533699055879933, - "learning_rate": 2.804122896539602e-06, - "loss": 0.9963, - "step": 4294 - }, - { - "epoch": 0.38733823330477524, - "grad_norm": 1.3250410739362395, - "learning_rate": 2.8035879515782225e-06, - "loss": 0.9454, - "step": 4295 - }, - { - "epoch": 0.38742841682824547, - "grad_norm": 1.8720063152328976, - "learning_rate": 2.803052938050288e-06, - "loss": 1.043, - "step": 4296 - }, - { - "epoch": 0.38751860035171576, - "grad_norm": 1.6234485849804272, - "learning_rate": 2.802517856001449e-06, - "loss": 1.0388, - "step": 4297 - }, - { - "epoch": 0.387608783875186, - "grad_norm": 1.81809915387196, - "learning_rate": 2.801982705477361e-06, - "loss": 0.9546, - "step": 4298 - }, - { - "epoch": 0.3876989673986563, - "grad_norm": 1.2347850842678836, - "learning_rate": 2.8014474865236867e-06, - "loss": 0.9757, - "step": 4299 - }, - { - "epoch": 0.3877891509221265, - "grad_norm": 1.5467923845520455, - "learning_rate": 2.800912199186094e-06, - "loss": 0.9855, - "step": 4300 - }, - { - "epoch": 0.3878793344455968, - "grad_norm": 1.4999291085339472, - "learning_rate": 2.800376843510256e-06, - "loss": 1.0049, - "step": 4301 - }, - { - "epoch": 0.38796951796906703, - "grad_norm": 1.3536531893139014, - "learning_rate": 2.799841419541852e-06, - "loss": 1.0133, - "step": 4302 - }, - { - "epoch": 0.3880597014925373, - "grad_norm": 0.6832011376030851, - "learning_rate": 2.799305927326568e-06, - "loss": 0.8625, - "step": 4303 - }, - { - "epoch": 0.38814988501600756, - "grad_norm": 0.8342089741432891, - "learning_rate": 2.7987703669100955e-06, - "loss": 0.8554, - "step": 4304 - }, - { - "epoch": 0.38824006853947785, - "grad_norm": 1.9145699587393898, - "learning_rate": 2.79823473833813e-06, - "loss": 0.9991, - "step": 4305 - }, - { - "epoch": 0.3883302520629481, - "grad_norm": 1.6956830300997046, - "learning_rate": 2.797699041656376e-06, - "loss": 0.9465, - "step": 4306 - }, - { - "epoch": 0.38842043558641837, - "grad_norm": 1.7235580471346625, - "learning_rate": 2.7971632769105412e-06, - "loss": 1.0088, - "step": 4307 - }, - { - "epoch": 0.3885106191098886, - "grad_norm": 1.8050213129365065, - "learning_rate": 2.79662744414634e-06, - "loss": 0.9606, - "step": 4308 - }, - { - "epoch": 0.3886008026333589, - "grad_norm": 1.4809210496525314, - "learning_rate": 2.7960915434094923e-06, - "loss": 0.941, - "step": 4309 - }, - { - "epoch": 0.3886909861568291, - "grad_norm": 1.5963067135336133, - "learning_rate": 2.7955555747457256e-06, - "loss": 0.9658, - "step": 4310 - }, - { - "epoch": 0.3887811696802994, - "grad_norm": 1.6053311551201788, - "learning_rate": 2.79501953820077e-06, - "loss": 0.991, - "step": 4311 - }, - { - "epoch": 0.3888713532037697, - "grad_norm": 1.5353733520747423, - "learning_rate": 2.7944834338203637e-06, - "loss": 1.0095, - "step": 4312 - }, - { - "epoch": 0.38896153672723993, - "grad_norm": 3.778664327515296, - "learning_rate": 2.79394726165025e-06, - "loss": 0.9532, - "step": 4313 - }, - { - "epoch": 0.3890517202507102, - "grad_norm": 1.7710792894282086, - "learning_rate": 2.793411021736178e-06, - "loss": 0.9708, - "step": 4314 - }, - { - "epoch": 0.38914190377418045, - "grad_norm": 1.4733920497902973, - "learning_rate": 2.7928747141239027e-06, - "loss": 1.0915, - "step": 4315 - }, - { - "epoch": 0.38923208729765074, - "grad_norm": 0.9494347032329145, - "learning_rate": 2.7923383388591856e-06, - "loss": 0.8685, - "step": 4316 - }, - { - "epoch": 0.389322270821121, - "grad_norm": 1.491798390090305, - "learning_rate": 2.7918018959877923e-06, - "loss": 0.9561, - "step": 4317 - }, - { - "epoch": 0.38941245434459126, - "grad_norm": 1.3060781867821307, - "learning_rate": 2.791265385555495e-06, - "loss": 1.0192, - "step": 4318 - }, - { - "epoch": 0.3895026378680615, - "grad_norm": 1.4099800654956844, - "learning_rate": 2.790728807608072e-06, - "loss": 1.0512, - "step": 4319 - }, - { - "epoch": 0.3895928213915318, - "grad_norm": 1.5230027581062815, - "learning_rate": 2.790192162191307e-06, - "loss": 0.9811, - "step": 4320 - }, - { - "epoch": 0.389683004915002, - "grad_norm": 1.633220785830613, - "learning_rate": 2.78965544935099e-06, - "loss": 1.0152, - "step": 4321 - }, - { - "epoch": 0.3897731884384723, - "grad_norm": 1.6927680684822406, - "learning_rate": 2.789118669132916e-06, - "loss": 0.9715, - "step": 4322 - }, - { - "epoch": 0.38986337196194254, - "grad_norm": 3.3794047963582177, - "learning_rate": 2.7885818215828856e-06, - "loss": 1.0556, - "step": 4323 - }, - { - "epoch": 0.38995355548541283, - "grad_norm": 1.7194278420773341, - "learning_rate": 2.7880449067467064e-06, - "loss": 0.9511, - "step": 4324 - }, - { - "epoch": 0.39004373900888306, - "grad_norm": 1.5946517338106732, - "learning_rate": 2.78750792467019e-06, - "loss": 0.9401, - "step": 4325 - }, - { - "epoch": 0.39013392253235335, - "grad_norm": 1.4376286158866576, - "learning_rate": 2.786970875399156e-06, - "loss": 0.8552, - "step": 4326 - }, - { - "epoch": 0.3902241060558236, - "grad_norm": 1.2589930331375998, - "learning_rate": 2.7864337589794267e-06, - "loss": 0.9157, - "step": 4327 - }, - { - "epoch": 0.39031428957929387, - "grad_norm": 1.4691224944769161, - "learning_rate": 2.7858965754568335e-06, - "loss": 1.0098, - "step": 4328 - }, - { - "epoch": 0.3904044731027641, - "grad_norm": 1.45458984375, - "learning_rate": 2.785359324877211e-06, - "loss": 0.932, - "step": 4329 - }, - { - "epoch": 0.3904946566262344, - "grad_norm": 1.5308870936495635, - "learning_rate": 2.7848220072864e-06, - "loss": 1.0118, - "step": 4330 - }, - { - "epoch": 0.3905848401497046, - "grad_norm": 1.5604238831676194, - "learning_rate": 2.784284622730248e-06, - "loss": 0.9805, - "step": 4331 - }, - { - "epoch": 0.3906750236731749, - "grad_norm": 3.0135343746479912, - "learning_rate": 2.7837471712546073e-06, - "loss": 1.0041, - "step": 4332 - }, - { - "epoch": 0.39076520719664515, - "grad_norm": 1.3442703392897946, - "learning_rate": 2.783209652905337e-06, - "loss": 0.968, - "step": 4333 - }, - { - "epoch": 0.39085539072011544, - "grad_norm": 1.3285060111653901, - "learning_rate": 2.7826720677283e-06, - "loss": 0.9593, - "step": 4334 - }, - { - "epoch": 0.39094557424358567, - "grad_norm": 1.4269586587543261, - "learning_rate": 2.782134415769367e-06, - "loss": 0.9584, - "step": 4335 - }, - { - "epoch": 0.39103575776705596, - "grad_norm": 1.2611981430961956, - "learning_rate": 2.7815966970744126e-06, - "loss": 1.038, - "step": 4336 - }, - { - "epoch": 0.39112594129052625, - "grad_norm": 1.7429137490971813, - "learning_rate": 2.7810589116893184e-06, - "loss": 1.056, - "step": 4337 - }, - { - "epoch": 0.3912161248139965, - "grad_norm": 2.025132459852923, - "learning_rate": 2.780521059659972e-06, - "loss": 0.9887, - "step": 4338 - }, - { - "epoch": 0.39130630833746677, - "grad_norm": 3.7091517598979555, - "learning_rate": 2.7799831410322637e-06, - "loss": 0.9759, - "step": 4339 - }, - { - "epoch": 0.391396491860937, - "grad_norm": 1.3633760348272623, - "learning_rate": 2.779445155852094e-06, - "loss": 0.946, - "step": 4340 - }, - { - "epoch": 0.3914866753844073, - "grad_norm": 0.7892647285790885, - "learning_rate": 2.7789071041653655e-06, - "loss": 0.9231, - "step": 4341 - }, - { - "epoch": 0.3915768589078775, - "grad_norm": 1.8983048129709328, - "learning_rate": 2.7783689860179875e-06, - "loss": 1.0423, - "step": 4342 - }, - { - "epoch": 0.3916670424313478, - "grad_norm": 1.674759773564125, - "learning_rate": 2.7778308014558767e-06, - "loss": 0.9347, - "step": 4343 - }, - { - "epoch": 0.39175722595481804, - "grad_norm": 1.4612676074139517, - "learning_rate": 2.7772925505249524e-06, - "loss": 1.061, - "step": 4344 - }, - { - "epoch": 0.39184740947828833, - "grad_norm": 3.885733106611656, - "learning_rate": 2.7767542332711417e-06, - "loss": 0.9613, - "step": 4345 - }, - { - "epoch": 0.39193759300175857, - "grad_norm": 2.1201067291240783, - "learning_rate": 2.776215849740377e-06, - "loss": 0.9671, - "step": 4346 - }, - { - "epoch": 0.39202777652522885, - "grad_norm": 1.3814073740515889, - "learning_rate": 2.775677399978596e-06, - "loss": 1.0075, - "step": 4347 - }, - { - "epoch": 0.3921179600486991, - "grad_norm": 0.59807532085239, - "learning_rate": 2.775138884031742e-06, - "loss": 0.8338, - "step": 4348 - }, - { - "epoch": 0.3922081435721694, - "grad_norm": 1.3878785347297855, - "learning_rate": 2.774600301945764e-06, - "loss": 0.949, - "step": 4349 - }, - { - "epoch": 0.3922983270956396, - "grad_norm": 3.098160807248914, - "learning_rate": 2.774061653766618e-06, - "loss": 1.0137, - "step": 4350 - }, - { - "epoch": 0.3923885106191099, - "grad_norm": 1.6917716461956986, - "learning_rate": 2.773522939540263e-06, - "loss": 0.9768, - "step": 4351 - }, - { - "epoch": 0.39247869414258013, - "grad_norm": 1.5017201574086974, - "learning_rate": 2.7729841593126663e-06, - "loss": 0.9175, - "step": 4352 - }, - { - "epoch": 0.3925688776660504, - "grad_norm": 1.2437552389676731, - "learning_rate": 2.7724453131297988e-06, - "loss": 1.0536, - "step": 4353 - }, - { - "epoch": 0.39265906118952065, - "grad_norm": 1.3935557567910448, - "learning_rate": 2.771906401037637e-06, - "loss": 1.0222, - "step": 4354 - }, - { - "epoch": 0.39274924471299094, - "grad_norm": 1.9127798336974664, - "learning_rate": 2.7713674230821664e-06, - "loss": 0.9528, - "step": 4355 - }, - { - "epoch": 0.3928394282364612, - "grad_norm": 1.3837260014521395, - "learning_rate": 2.7708283793093724e-06, - "loss": 0.9416, - "step": 4356 - }, - { - "epoch": 0.39292961175993146, - "grad_norm": 1.477070554490582, - "learning_rate": 2.7702892697652514e-06, - "loss": 0.9697, - "step": 4357 - }, - { - "epoch": 0.3930197952834017, - "grad_norm": 1.3481065080883885, - "learning_rate": 2.7697500944958024e-06, - "loss": 0.9672, - "step": 4358 - }, - { - "epoch": 0.393109978806872, - "grad_norm": 1.6871102907390718, - "learning_rate": 2.7692108535470312e-06, - "loss": 1.0762, - "step": 4359 - }, - { - "epoch": 0.3932001623303423, - "grad_norm": 2.3408236736325994, - "learning_rate": 2.768671546964948e-06, - "loss": 1.0311, - "step": 4360 - }, - { - "epoch": 0.3932903458538125, - "grad_norm": 1.599636799597021, - "learning_rate": 2.7681321747955713e-06, - "loss": 1.014, - "step": 4361 - }, - { - "epoch": 0.3933805293772828, - "grad_norm": 1.276740535428365, - "learning_rate": 2.767592737084921e-06, - "loss": 1.0076, - "step": 4362 - }, - { - "epoch": 0.39347071290075303, - "grad_norm": 2.2004590509232473, - "learning_rate": 2.767053233879026e-06, - "loss": 1.0778, - "step": 4363 - }, - { - "epoch": 0.3935608964242233, - "grad_norm": 1.6911821106721534, - "learning_rate": 2.76651366522392e-06, - "loss": 1.0643, - "step": 4364 - }, - { - "epoch": 0.39365107994769355, - "grad_norm": 1.488834228310949, - "learning_rate": 2.7659740311656413e-06, - "loss": 0.964, - "step": 4365 - }, - { - "epoch": 0.39374126347116384, - "grad_norm": 1.7164914637419193, - "learning_rate": 2.7654343317502352e-06, - "loss": 1.0137, - "step": 4366 - }, - { - "epoch": 0.39383144699463407, - "grad_norm": 1.4692804717791585, - "learning_rate": 2.7648945670237502e-06, - "loss": 0.9686, - "step": 4367 - }, - { - "epoch": 0.39392163051810436, - "grad_norm": 1.6447720385349265, - "learning_rate": 2.7643547370322446e-06, - "loss": 1.0323, - "step": 4368 - }, - { - "epoch": 0.3940118140415746, - "grad_norm": 1.559365294279117, - "learning_rate": 2.7638148418217775e-06, - "loss": 1.0251, - "step": 4369 - }, - { - "epoch": 0.3941019975650449, - "grad_norm": 1.5648856452136768, - "learning_rate": 2.7632748814384163e-06, - "loss": 1.0316, - "step": 4370 - }, - { - "epoch": 0.3941921810885151, - "grad_norm": 1.4195476390373307, - "learning_rate": 2.7627348559282335e-06, - "loss": 1.0202, - "step": 4371 - }, - { - "epoch": 0.3942823646119854, - "grad_norm": 1.2582604694533452, - "learning_rate": 2.7621947653373075e-06, - "loss": 0.9307, - "step": 4372 - }, - { - "epoch": 0.39437254813545564, - "grad_norm": 0.6959581270830991, - "learning_rate": 2.7616546097117213e-06, - "loss": 0.8537, - "step": 4373 - }, - { - "epoch": 0.3944627316589259, - "grad_norm": 1.4123194368120071, - "learning_rate": 2.761114389097564e-06, - "loss": 0.9887, - "step": 4374 - }, - { - "epoch": 0.39455291518239616, - "grad_norm": 1.4417313206605142, - "learning_rate": 2.7605741035409305e-06, - "loss": 1.0264, - "step": 4375 - }, - { - "epoch": 0.39464309870586645, - "grad_norm": 1.3515001403887552, - "learning_rate": 2.76003375308792e-06, - "loss": 0.9727, - "step": 4376 - }, - { - "epoch": 0.3947332822293367, - "grad_norm": 1.3575928065855154, - "learning_rate": 2.75949333778464e-06, - "loss": 0.9591, - "step": 4377 - }, - { - "epoch": 0.39482346575280697, - "grad_norm": 1.7634334635413735, - "learning_rate": 2.7589528576772e-06, - "loss": 0.8699, - "step": 4378 - }, - { - "epoch": 0.3949136492762772, - "grad_norm": 2.8972004103781823, - "learning_rate": 2.758412312811717e-06, - "loss": 0.9805, - "step": 4379 - }, - { - "epoch": 0.3950038327997475, - "grad_norm": 0.7157739309400125, - "learning_rate": 2.7578717032343146e-06, - "loss": 0.8786, - "step": 4380 - }, - { - "epoch": 0.3950940163232177, - "grad_norm": 1.3346036531964023, - "learning_rate": 2.757331028991119e-06, - "loss": 1.0195, - "step": 4381 - }, - { - "epoch": 0.395184199846688, - "grad_norm": 1.894995186228413, - "learning_rate": 2.7567902901282642e-06, - "loss": 1.0092, - "step": 4382 - }, - { - "epoch": 0.3952743833701583, - "grad_norm": 1.9307197558908837, - "learning_rate": 2.7562494866918892e-06, - "loss": 0.9818, - "step": 4383 - }, - { - "epoch": 0.39536456689362853, - "grad_norm": 1.24357139225089, - "learning_rate": 2.7557086187281378e-06, - "loss": 0.9639, - "step": 4384 - }, - { - "epoch": 0.3954547504170988, - "grad_norm": 26.839662678129567, - "learning_rate": 2.75516768628316e-06, - "loss": 1.0847, - "step": 4385 - }, - { - "epoch": 0.39554493394056905, - "grad_norm": 1.5569296153384455, - "learning_rate": 2.7546266894031114e-06, - "loss": 1.0638, - "step": 4386 - }, - { - "epoch": 0.39563511746403934, - "grad_norm": 1.3483507661120813, - "learning_rate": 2.7540856281341526e-06, - "loss": 0.9361, - "step": 4387 - }, - { - "epoch": 0.3957253009875096, - "grad_norm": 6.872742993335442, - "learning_rate": 2.7535445025224506e-06, - "loss": 0.9535, - "step": 4388 - }, - { - "epoch": 0.39581548451097986, - "grad_norm": 1.983815812748074, - "learning_rate": 2.753003312614176e-06, - "loss": 0.9896, - "step": 4389 - }, - { - "epoch": 0.3959056680344501, - "grad_norm": 1.568190778171439, - "learning_rate": 2.7524620584555065e-06, - "loss": 0.9899, - "step": 4390 - }, - { - "epoch": 0.3959958515579204, - "grad_norm": 1.5450985659172873, - "learning_rate": 2.7519207400926253e-06, - "loss": 1.0237, - "step": 4391 - }, - { - "epoch": 0.3960860350813906, - "grad_norm": 1.7211001887398, - "learning_rate": 2.751379357571721e-06, - "loss": 0.9692, - "step": 4392 - }, - { - "epoch": 0.3961762186048609, - "grad_norm": 1.3389704224166523, - "learning_rate": 2.7508379109389865e-06, - "loss": 1.0561, - "step": 4393 - }, - { - "epoch": 0.39626640212833114, - "grad_norm": 1.7040046729932654, - "learning_rate": 2.750296400240622e-06, - "loss": 1.0001, - "step": 4394 - }, - { - "epoch": 0.39635658565180143, - "grad_norm": 1.2380560540803651, - "learning_rate": 2.7497548255228305e-06, - "loss": 1.0204, - "step": 4395 - }, - { - "epoch": 0.39644676917527166, - "grad_norm": 1.451971437073453, - "learning_rate": 2.749213186831824e-06, - "loss": 0.9401, - "step": 4396 - }, - { - "epoch": 0.39653695269874195, - "grad_norm": 1.3618850772759683, - "learning_rate": 2.7486714842138173e-06, - "loss": 1.0286, - "step": 4397 - }, - { - "epoch": 0.3966271362222122, - "grad_norm": 1.76807847502181, - "learning_rate": 2.748129717715031e-06, - "loss": 0.9437, - "step": 4398 - }, - { - "epoch": 0.3967173197456825, - "grad_norm": 1.4498427042652613, - "learning_rate": 2.747587887381692e-06, - "loss": 1.0112, - "step": 4399 - }, - { - "epoch": 0.3968075032691527, - "grad_norm": 1.3556558001735954, - "learning_rate": 2.7470459932600328e-06, - "loss": 0.9411, - "step": 4400 - }, - { - "epoch": 0.396897686792623, - "grad_norm": 1.438173882467255, - "learning_rate": 2.7465040353962897e-06, - "loss": 0.9863, - "step": 4401 - }, - { - "epoch": 0.3969878703160932, - "grad_norm": 1.6221938479495346, - "learning_rate": 2.745962013836706e-06, - "loss": 0.9772, - "step": 4402 - }, - { - "epoch": 0.3970780538395635, - "grad_norm": 1.531166463636343, - "learning_rate": 2.74541992862753e-06, - "loss": 0.962, - "step": 4403 - }, - { - "epoch": 0.39716823736303375, - "grad_norm": 1.6937947313095922, - "learning_rate": 2.744877779815016e-06, - "loss": 0.9875, - "step": 4404 - }, - { - "epoch": 0.39725842088650404, - "grad_norm": 1.1817997596712073, - "learning_rate": 2.7443355674454234e-06, - "loss": 0.9813, - "step": 4405 - }, - { - "epoch": 0.39734860440997427, - "grad_norm": 1.9688936211409769, - "learning_rate": 2.743793291565015e-06, - "loss": 1.0299, - "step": 4406 - }, - { - "epoch": 0.39743878793344456, - "grad_norm": 1.6938557496788842, - "learning_rate": 2.7432509522200617e-06, - "loss": 0.9168, - "step": 4407 - }, - { - "epoch": 0.39752897145691485, - "grad_norm": 1.6073468956777948, - "learning_rate": 2.7427085494568383e-06, - "loss": 0.9311, - "step": 4408 - }, - { - "epoch": 0.3976191549803851, - "grad_norm": 0.6553263067301799, - "learning_rate": 2.742166083321628e-06, - "loss": 0.87, - "step": 4409 - }, - { - "epoch": 0.39770933850385537, - "grad_norm": 1.543114918412826, - "learning_rate": 2.7416235538607137e-06, - "loss": 1.0346, - "step": 4410 - }, - { - "epoch": 0.3977995220273256, - "grad_norm": 1.5875221190826476, - "learning_rate": 2.7410809611203894e-06, - "loss": 0.9678, - "step": 4411 - }, - { - "epoch": 0.3978897055507959, - "grad_norm": 1.4329050681060842, - "learning_rate": 2.7405383051469507e-06, - "loss": 0.9441, - "step": 4412 - }, - { - "epoch": 0.3979798890742661, - "grad_norm": 1.975666551741319, - "learning_rate": 2.7399955859867e-06, - "loss": 0.8013, - "step": 4413 - }, - { - "epoch": 0.3980700725977364, - "grad_norm": 0.6535111677402003, - "learning_rate": 2.7394528036859465e-06, - "loss": 0.8111, - "step": 4414 - }, - { - "epoch": 0.39816025612120665, - "grad_norm": 1.400812870143194, - "learning_rate": 2.738909958291002e-06, - "loss": 0.9849, - "step": 4415 - }, - { - "epoch": 0.39825043964467693, - "grad_norm": 1.3148169729551376, - "learning_rate": 2.7383670498481863e-06, - "loss": 0.897, - "step": 4416 - }, - { - "epoch": 0.39834062316814717, - "grad_norm": 0.6058910988742053, - "learning_rate": 2.737824078403822e-06, - "loss": 0.7735, - "step": 4417 - }, - { - "epoch": 0.39843080669161746, - "grad_norm": 1.4276630681257003, - "learning_rate": 2.737281044004239e-06, - "loss": 0.9603, - "step": 4418 - }, - { - "epoch": 0.3985209902150877, - "grad_norm": 1.5710037000771602, - "learning_rate": 2.736737946695772e-06, - "loss": 0.9739, - "step": 4419 - }, - { - "epoch": 0.398611173738558, - "grad_norm": 1.4070535695086528, - "learning_rate": 2.736194786524761e-06, - "loss": 0.9766, - "step": 4420 - }, - { - "epoch": 0.3987013572620282, - "grad_norm": 1.4332460402735598, - "learning_rate": 2.7356515635375517e-06, - "loss": 0.9873, - "step": 4421 - }, - { - "epoch": 0.3987915407854985, - "grad_norm": 1.7620709864316946, - "learning_rate": 2.735108277780495e-06, - "loss": 0.9089, - "step": 4422 - }, - { - "epoch": 0.39888172430896873, - "grad_norm": 1.3656550511036971, - "learning_rate": 2.7345649292999456e-06, - "loss": 0.975, - "step": 4423 - }, - { - "epoch": 0.398971907832439, - "grad_norm": 1.487225495983484, - "learning_rate": 2.734021518142267e-06, - "loss": 0.9899, - "step": 4424 - }, - { - "epoch": 0.39906209135590925, - "grad_norm": 1.6088089919815416, - "learning_rate": 2.733478044353825e-06, - "loss": 0.8221, - "step": 4425 - }, - { - "epoch": 0.39915227487937954, - "grad_norm": 1.6334927250817182, - "learning_rate": 2.7329345079809917e-06, - "loss": 0.9932, - "step": 4426 - }, - { - "epoch": 0.3992424584028498, - "grad_norm": 1.4728739043866812, - "learning_rate": 2.7323909090701447e-06, - "loss": 0.9175, - "step": 4427 - }, - { - "epoch": 0.39933264192632006, - "grad_norm": 1.6218263740085772, - "learning_rate": 2.731847247667667e-06, - "loss": 1.032, - "step": 4428 - }, - { - "epoch": 0.3994228254497903, - "grad_norm": 1.451306014709058, - "learning_rate": 2.731303523819947e-06, - "loss": 0.9611, - "step": 4429 - }, - { - "epoch": 0.3995130089732606, - "grad_norm": 1.8308281760938212, - "learning_rate": 2.7307597375733783e-06, - "loss": 1.0448, - "step": 4430 - }, - { - "epoch": 0.3996031924967309, - "grad_norm": 1.545369349900633, - "learning_rate": 2.7302158889743587e-06, - "loss": 0.986, - "step": 4431 - }, - { - "epoch": 0.3996933760202011, - "grad_norm": 1.4326654489783333, - "learning_rate": 2.7296719780692937e-06, - "loss": 0.9447, - "step": 4432 - }, - { - "epoch": 0.3997835595436714, - "grad_norm": 1.4199280876433036, - "learning_rate": 2.7291280049045916e-06, - "loss": 0.8954, - "step": 4433 - }, - { - "epoch": 0.39987374306714163, - "grad_norm": 1.3673437846640066, - "learning_rate": 2.7285839695266683e-06, - "loss": 0.9251, - "step": 4434 - }, - { - "epoch": 0.3999639265906119, - "grad_norm": 1.526327512180291, - "learning_rate": 2.7280398719819423e-06, - "loss": 0.9374, - "step": 4435 - }, - { - "epoch": 0.40005411011408215, - "grad_norm": 1.6087126618908152, - "learning_rate": 2.727495712316841e-06, - "loss": 1.0521, - "step": 4436 - }, - { - "epoch": 0.40014429363755244, - "grad_norm": 1.6943903229969326, - "learning_rate": 2.7269514905777945e-06, - "loss": 0.9271, - "step": 4437 - }, - { - "epoch": 0.4002344771610227, - "grad_norm": 1.4506280426821834, - "learning_rate": 2.7264072068112377e-06, - "loss": 1.1081, - "step": 4438 - }, - { - "epoch": 0.40032466068449296, - "grad_norm": 1.7707025891077461, - "learning_rate": 2.7258628610636133e-06, - "loss": 0.9485, - "step": 4439 - }, - { - "epoch": 0.4004148442079632, - "grad_norm": 1.8182999778413145, - "learning_rate": 2.7253184533813667e-06, - "loss": 1.0616, - "step": 4440 - }, - { - "epoch": 0.4005050277314335, - "grad_norm": 1.461040228021576, - "learning_rate": 2.72477398381095e-06, - "loss": 0.993, - "step": 4441 - }, - { - "epoch": 0.4005952112549037, - "grad_norm": 1.3683508582779849, - "learning_rate": 2.724229452398821e-06, - "loss": 0.9943, - "step": 4442 - }, - { - "epoch": 0.400685394778374, - "grad_norm": 1.618844404376673, - "learning_rate": 2.7236848591914422e-06, - "loss": 0.998, - "step": 4443 - }, - { - "epoch": 0.40077557830184424, - "grad_norm": 1.9167859689928175, - "learning_rate": 2.7231402042352803e-06, - "loss": 0.9725, - "step": 4444 - }, - { - "epoch": 0.4008657618253145, - "grad_norm": 1.127231133877358, - "learning_rate": 2.722595487576809e-06, - "loss": 1.0187, - "step": 4445 - }, - { - "epoch": 0.40095594534878476, - "grad_norm": 1.5980275662303987, - "learning_rate": 2.722050709262506e-06, - "loss": 1.0269, - "step": 4446 - }, - { - "epoch": 0.40104612887225505, - "grad_norm": 1.2534898678613946, - "learning_rate": 2.7215058693388557e-06, - "loss": 0.9663, - "step": 4447 - }, - { - "epoch": 0.4011363123957253, - "grad_norm": 1.741702093405954, - "learning_rate": 2.720960967852346e-06, - "loss": 0.8991, - "step": 4448 - }, - { - "epoch": 0.40122649591919557, - "grad_norm": 1.4815125824171564, - "learning_rate": 2.720416004849471e-06, - "loss": 0.9228, - "step": 4449 - }, - { - "epoch": 0.4013166794426658, - "grad_norm": 1.4660550005224513, - "learning_rate": 2.7198709803767304e-06, - "loss": 0.9386, - "step": 4450 - }, - { - "epoch": 0.4014068629661361, - "grad_norm": 1.2532492844742977, - "learning_rate": 2.7193258944806286e-06, - "loss": 0.9491, - "step": 4451 - }, - { - "epoch": 0.4014970464896063, - "grad_norm": 1.5658170966740708, - "learning_rate": 2.718780747207675e-06, - "loss": 0.9865, - "step": 4452 - }, - { - "epoch": 0.4015872300130766, - "grad_norm": 1.7275109081227549, - "learning_rate": 2.7182355386043847e-06, - "loss": 1.0487, - "step": 4453 - }, - { - "epoch": 0.40167741353654685, - "grad_norm": 1.2674273149916433, - "learning_rate": 2.717690268717278e-06, - "loss": 1.0461, - "step": 4454 - }, - { - "epoch": 0.40176759706001713, - "grad_norm": 1.3120512876517167, - "learning_rate": 2.7171449375928803e-06, - "loss": 1.0099, - "step": 4455 - }, - { - "epoch": 0.4018577805834874, - "grad_norm": 1.9984332146497985, - "learning_rate": 2.716599545277722e-06, - "loss": 1.0098, - "step": 4456 - }, - { - "epoch": 0.40194796410695766, - "grad_norm": 1.5536288390177013, - "learning_rate": 2.7160540918183394e-06, - "loss": 0.9474, - "step": 4457 - }, - { - "epoch": 0.40203814763042794, - "grad_norm": 1.677059157613886, - "learning_rate": 2.715508577261273e-06, - "loss": 0.9071, - "step": 4458 - }, - { - "epoch": 0.4021283311538982, - "grad_norm": 1.779684884971191, - "learning_rate": 2.7149630016530702e-06, - "loss": 0.9654, - "step": 4459 - }, - { - "epoch": 0.40221851467736847, - "grad_norm": 1.2713732692851873, - "learning_rate": 2.7144173650402815e-06, - "loss": 1.0019, - "step": 4460 - }, - { - "epoch": 0.4023086982008387, - "grad_norm": 1.3011883622667821, - "learning_rate": 2.7138716674694636e-06, - "loss": 1.0103, - "step": 4461 - }, - { - "epoch": 0.402398881724309, - "grad_norm": 1.6933055899943037, - "learning_rate": 2.7133259089871795e-06, - "loss": 0.9397, - "step": 4462 - }, - { - "epoch": 0.4024890652477792, - "grad_norm": 1.4658007941322087, - "learning_rate": 2.712780089639995e-06, - "loss": 0.985, - "step": 4463 - }, - { - "epoch": 0.4025792487712495, - "grad_norm": 1.4196084370514743, - "learning_rate": 2.712234209474483e-06, - "loss": 0.8549, - "step": 4464 - }, - { - "epoch": 0.40266943229471974, - "grad_norm": 1.3344806513178677, - "learning_rate": 2.7116882685372218e-06, - "loss": 1.0231, - "step": 4465 - }, - { - "epoch": 0.40275961581819003, - "grad_norm": 1.3707021387661429, - "learning_rate": 2.7111422668747927e-06, - "loss": 0.957, - "step": 4466 - }, - { - "epoch": 0.40284979934166026, - "grad_norm": 1.8443915980337753, - "learning_rate": 2.7105962045337846e-06, - "loss": 0.9841, - "step": 4467 - }, - { - "epoch": 0.40293998286513055, - "grad_norm": 1.4865840335655192, - "learning_rate": 2.7100500815607898e-06, - "loss": 0.9774, - "step": 4468 - }, - { - "epoch": 0.4030301663886008, - "grad_norm": 1.5805125541103942, - "learning_rate": 2.709503898002407e-06, - "loss": 1.0092, - "step": 4469 - }, - { - "epoch": 0.4031203499120711, - "grad_norm": 1.4914618520228498, - "learning_rate": 2.708957653905239e-06, - "loss": 0.9822, - "step": 4470 - }, - { - "epoch": 0.4032105334355413, - "grad_norm": 1.4410364353727838, - "learning_rate": 2.7084113493158956e-06, - "loss": 0.9984, - "step": 4471 - }, - { - "epoch": 0.4033007169590116, - "grad_norm": 1.4914544986455844, - "learning_rate": 2.7078649842809888e-06, - "loss": 1.0027, - "step": 4472 - }, - { - "epoch": 0.40339090048248183, - "grad_norm": 1.3828598272985504, - "learning_rate": 2.707318558847139e-06, - "loss": 1.0325, - "step": 4473 - }, - { - "epoch": 0.4034810840059521, - "grad_norm": 0.7343404234193099, - "learning_rate": 2.7067720730609697e-06, - "loss": 0.854, - "step": 4474 - }, - { - "epoch": 0.40357126752942235, - "grad_norm": 1.5105294371851967, - "learning_rate": 2.70622552696911e-06, - "loss": 0.9628, - "step": 4475 - }, - { - "epoch": 0.40366145105289264, - "grad_norm": 1.3335040946728804, - "learning_rate": 2.7056789206181943e-06, - "loss": 0.9354, - "step": 4476 - }, - { - "epoch": 0.40375163457636287, - "grad_norm": 1.376600720917406, - "learning_rate": 2.7051322540548615e-06, - "loss": 1.0351, - "step": 4477 - }, - { - "epoch": 0.40384181809983316, - "grad_norm": 2.2186954115478836, - "learning_rate": 2.704585527325757e-06, - "loss": 0.9516, - "step": 4478 - }, - { - "epoch": 0.40393200162330345, - "grad_norm": 3.963428684517467, - "learning_rate": 2.7040387404775303e-06, - "loss": 0.9448, - "step": 4479 - }, - { - "epoch": 0.4040221851467737, - "grad_norm": 1.4146708660357028, - "learning_rate": 2.703491893556837e-06, - "loss": 1.0039, - "step": 4480 - }, - { - "epoch": 0.40411236867024397, - "grad_norm": 1.1953473054892274, - "learning_rate": 2.702944986610335e-06, - "loss": 0.9588, - "step": 4481 - }, - { - "epoch": 0.4042025521937142, - "grad_norm": 2.2778105216528637, - "learning_rate": 2.7023980196846917e-06, - "loss": 0.9979, - "step": 4482 - }, - { - "epoch": 0.4042927357171845, - "grad_norm": 0.7297584221495366, - "learning_rate": 2.7018509928265763e-06, - "loss": 0.8751, - "step": 4483 - }, - { - "epoch": 0.4043829192406547, - "grad_norm": 1.3681059014253807, - "learning_rate": 2.7013039060826635e-06, - "loss": 1.0076, - "step": 4484 - }, - { - "epoch": 0.404473102764125, - "grad_norm": 2.0756686041162182, - "learning_rate": 2.7007567594996347e-06, - "loss": 1.0509, - "step": 4485 - }, - { - "epoch": 0.40456328628759525, - "grad_norm": 1.5158861859408166, - "learning_rate": 2.7002095531241757e-06, - "loss": 0.8821, - "step": 4486 - }, - { - "epoch": 0.40465346981106554, - "grad_norm": 1.4619328988797764, - "learning_rate": 2.6996622870029767e-06, - "loss": 0.9711, - "step": 4487 - }, - { - "epoch": 0.40474365333453577, - "grad_norm": 2.3534333653933524, - "learning_rate": 2.6991149611827335e-06, - "loss": 1.0977, - "step": 4488 - }, - { - "epoch": 0.40483383685800606, - "grad_norm": 1.8013956672595175, - "learning_rate": 2.6985675757101466e-06, - "loss": 1.0245, - "step": 4489 - }, - { - "epoch": 0.4049240203814763, - "grad_norm": 1.955600603926174, - "learning_rate": 2.698020130631922e-06, - "loss": 0.9417, - "step": 4490 - }, - { - "epoch": 0.4050142039049466, - "grad_norm": 1.4888281430636132, - "learning_rate": 2.6974726259947713e-06, - "loss": 0.9229, - "step": 4491 - }, - { - "epoch": 0.4051043874284168, - "grad_norm": 2.265753696338038, - "learning_rate": 2.6969250618454106e-06, - "loss": 0.9706, - "step": 4492 - }, - { - "epoch": 0.4051945709518871, - "grad_norm": 1.6591819866449637, - "learning_rate": 2.696377438230561e-06, - "loss": 1.0588, - "step": 4493 - }, - { - "epoch": 0.40528475447535733, - "grad_norm": 1.5528558263724226, - "learning_rate": 2.6958297551969484e-06, - "loss": 0.9201, - "step": 4494 - }, - { - "epoch": 0.4053749379988276, - "grad_norm": 3.0936471941271773, - "learning_rate": 2.695282012791304e-06, - "loss": 0.9483, - "step": 4495 - }, - { - "epoch": 0.40546512152229786, - "grad_norm": 1.5821837551879567, - "learning_rate": 2.6947342110603646e-06, - "loss": 1.0343, - "step": 4496 - }, - { - "epoch": 0.40555530504576814, - "grad_norm": 1.650952847214661, - "learning_rate": 2.6941863500508717e-06, - "loss": 1.0382, - "step": 4497 - }, - { - "epoch": 0.4056454885692384, - "grad_norm": 1.7746347427654239, - "learning_rate": 2.693638429809572e-06, - "loss": 1.0325, - "step": 4498 - }, - { - "epoch": 0.40573567209270867, - "grad_norm": 1.8194783800501644, - "learning_rate": 2.6930904503832167e-06, - "loss": 0.9749, - "step": 4499 - }, - { - "epoch": 0.4058258556161789, - "grad_norm": 2.000280360598507, - "learning_rate": 2.692542411818562e-06, - "loss": 0.9625, - "step": 4500 - }, - { - "epoch": 0.4059160391396492, - "grad_norm": 2.026334948915236, - "learning_rate": 2.69199431416237e-06, - "loss": 0.9961, - "step": 4501 - }, - { - "epoch": 0.4060062226631194, - "grad_norm": 1.437051039535939, - "learning_rate": 2.691446157461408e-06, - "loss": 0.9441, - "step": 4502 - }, - { - "epoch": 0.4060964061865897, - "grad_norm": 1.381861688488528, - "learning_rate": 2.690897941762447e-06, - "loss": 1.0473, - "step": 4503 - }, - { - "epoch": 0.40618658971006, - "grad_norm": 1.3099376097957154, - "learning_rate": 2.6903496671122642e-06, - "loss": 0.9604, - "step": 4504 - }, - { - "epoch": 0.40627677323353023, - "grad_norm": 0.6885726969811267, - "learning_rate": 2.689801333557641e-06, - "loss": 0.8003, - "step": 4505 - }, - { - "epoch": 0.4063669567570005, - "grad_norm": 1.8708074107707025, - "learning_rate": 2.689252941145365e-06, - "loss": 0.9889, - "step": 4506 - }, - { - "epoch": 0.40645714028047075, - "grad_norm": 1.3252589548524798, - "learning_rate": 2.6887044899222277e-06, - "loss": 0.9804, - "step": 4507 - }, - { - "epoch": 0.40654732380394104, - "grad_norm": 1.5166517103247232, - "learning_rate": 2.688155979935025e-06, - "loss": 1.0318, - "step": 4508 - }, - { - "epoch": 0.4066375073274113, - "grad_norm": 1.8227056399315433, - "learning_rate": 2.68760741123056e-06, - "loss": 0.9492, - "step": 4509 - }, - { - "epoch": 0.40672769085088156, - "grad_norm": 2.7481651253598094, - "learning_rate": 2.6870587838556394e-06, - "loss": 1.0023, - "step": 4510 - }, - { - "epoch": 0.4068178743743518, - "grad_norm": 1.9245213223091562, - "learning_rate": 2.686510097857075e-06, - "loss": 1.0007, - "step": 4511 - }, - { - "epoch": 0.4069080578978221, - "grad_norm": 1.7120359892003616, - "learning_rate": 2.685961353281683e-06, - "loss": 0.9827, - "step": 4512 - }, - { - "epoch": 0.4069982414212923, - "grad_norm": 1.461851517454862, - "learning_rate": 2.6854125501762863e-06, - "loss": 0.964, - "step": 4513 - }, - { - "epoch": 0.4070884249447626, - "grad_norm": 1.6804634608456397, - "learning_rate": 2.684863688587712e-06, - "loss": 0.9903, - "step": 4514 - }, - { - "epoch": 0.40717860846823284, - "grad_norm": 1.417055450646699, - "learning_rate": 2.6843147685627916e-06, - "loss": 0.9574, - "step": 4515 - }, - { - "epoch": 0.4072687919917031, - "grad_norm": 1.737155095179193, - "learning_rate": 2.683765790148361e-06, - "loss": 0.955, - "step": 4516 - }, - { - "epoch": 0.40735897551517336, - "grad_norm": 1.8081734418132325, - "learning_rate": 2.6832167533912637e-06, - "loss": 1.0351, - "step": 4517 - }, - { - "epoch": 0.40744915903864365, - "grad_norm": 2.3973111667195237, - "learning_rate": 2.682667658338345e-06, - "loss": 1.0581, - "step": 4518 - }, - { - "epoch": 0.4075393425621139, - "grad_norm": 1.3539087734650144, - "learning_rate": 2.682118505036458e-06, - "loss": 0.9568, - "step": 4519 - }, - { - "epoch": 0.40762952608558417, - "grad_norm": 0.6211832809696454, - "learning_rate": 2.681569293532459e-06, - "loss": 0.817, - "step": 4520 - }, - { - "epoch": 0.4077197096090544, - "grad_norm": 0.6679053248582574, - "learning_rate": 2.6810200238732102e-06, - "loss": 0.8485, - "step": 4521 - }, - { - "epoch": 0.4078098931325247, - "grad_norm": 1.6227295493043647, - "learning_rate": 2.6804706961055776e-06, - "loss": 1.0255, - "step": 4522 - }, - { - "epoch": 0.4079000766559949, - "grad_norm": 1.4557522229965927, - "learning_rate": 2.6799213102764326e-06, - "loss": 1.0167, - "step": 4523 - }, - { - "epoch": 0.4079902601794652, - "grad_norm": 2.6025033343584174, - "learning_rate": 2.679371866432653e-06, - "loss": 1.0781, - "step": 4524 - }, - { - "epoch": 0.40808044370293545, - "grad_norm": 1.4028969951157226, - "learning_rate": 2.6788223646211194e-06, - "loss": 0.9562, - "step": 4525 - }, - { - "epoch": 0.40817062722640574, - "grad_norm": 1.593578553795419, - "learning_rate": 2.6782728048887183e-06, - "loss": 0.8757, - "step": 4526 - }, - { - "epoch": 0.408260810749876, - "grad_norm": 1.1545934925345955, - "learning_rate": 2.6777231872823416e-06, - "loss": 0.9666, - "step": 4527 - }, - { - "epoch": 0.40835099427334626, - "grad_norm": 1.7962278444626736, - "learning_rate": 2.6771735118488864e-06, - "loss": 0.9859, - "step": 4528 - }, - { - "epoch": 0.40844117779681655, - "grad_norm": 1.833944334460404, - "learning_rate": 2.6766237786352523e-06, - "loss": 1.026, - "step": 4529 - }, - { - "epoch": 0.4085313613202868, - "grad_norm": 1.2718220861426939, - "learning_rate": 2.676073987688347e-06, - "loss": 0.9121, - "step": 4530 - }, - { - "epoch": 0.40862154484375707, - "grad_norm": 1.400077327227606, - "learning_rate": 2.6755241390550818e-06, - "loss": 0.9927, - "step": 4531 - }, - { - "epoch": 0.4087117283672273, - "grad_norm": 1.300976758589513, - "learning_rate": 2.6749742327823716e-06, - "loss": 0.9852, - "step": 4532 - }, - { - "epoch": 0.4088019118906976, - "grad_norm": 1.7150465165438813, - "learning_rate": 2.674424268917138e-06, - "loss": 1.0029, - "step": 4533 - }, - { - "epoch": 0.4088920954141678, - "grad_norm": 1.748925355922599, - "learning_rate": 2.6738742475063074e-06, - "loss": 1.0545, - "step": 4534 - }, - { - "epoch": 0.4089822789376381, - "grad_norm": 1.794012293761795, - "learning_rate": 2.6733241685968104e-06, - "loss": 1.0268, - "step": 4535 - }, - { - "epoch": 0.40907246246110834, - "grad_norm": 1.7092222909169703, - "learning_rate": 2.6727740322355826e-06, - "loss": 0.9911, - "step": 4536 - }, - { - "epoch": 0.40916264598457863, - "grad_norm": 1.6622335923214566, - "learning_rate": 2.6722238384695644e-06, - "loss": 0.9772, - "step": 4537 - }, - { - "epoch": 0.40925282950804887, - "grad_norm": 1.3658096782194509, - "learning_rate": 2.671673587345702e-06, - "loss": 0.9965, - "step": 4538 - }, - { - "epoch": 0.40934301303151915, - "grad_norm": 0.7024170808625328, - "learning_rate": 2.6711232789109455e-06, - "loss": 0.8337, - "step": 4539 - }, - { - "epoch": 0.4094331965549894, - "grad_norm": 1.2911307648577972, - "learning_rate": 2.6705729132122497e-06, - "loss": 0.9875, - "step": 4540 - }, - { - "epoch": 0.4095233800784597, - "grad_norm": 1.6369398600460907, - "learning_rate": 2.670022490296576e-06, - "loss": 1.0592, - "step": 4541 - }, - { - "epoch": 0.4096135636019299, - "grad_norm": 2.187082741315106, - "learning_rate": 2.669472010210889e-06, - "loss": 1.0444, - "step": 4542 - }, - { - "epoch": 0.4097037471254002, - "grad_norm": 1.747632741620779, - "learning_rate": 2.668921473002159e-06, - "loss": 0.9651, - "step": 4543 - }, - { - "epoch": 0.40979393064887043, - "grad_norm": 1.4256152748803044, - "learning_rate": 2.6683708787173596e-06, - "loss": 1.0438, - "step": 4544 - }, - { - "epoch": 0.4098841141723407, - "grad_norm": 1.5933105760542599, - "learning_rate": 2.6678202274034718e-06, - "loss": 1.0061, - "step": 4545 - }, - { - "epoch": 0.40997429769581095, - "grad_norm": 1.5867356420131513, - "learning_rate": 2.66726951910748e-06, - "loss": 1.0689, - "step": 4546 - }, - { - "epoch": 0.41006448121928124, - "grad_norm": 1.8770408332012931, - "learning_rate": 2.6667187538763737e-06, - "loss": 0.9872, - "step": 4547 - }, - { - "epoch": 0.4101546647427515, - "grad_norm": 1.4265159072871518, - "learning_rate": 2.6661679317571473e-06, - "loss": 0.9703, - "step": 4548 - }, - { - "epoch": 0.41024484826622176, - "grad_norm": 1.3020724283397518, - "learning_rate": 2.665617052796799e-06, - "loss": 0.9808, - "step": 4549 - }, - { - "epoch": 0.41033503178969205, - "grad_norm": 1.4685185838561872, - "learning_rate": 2.6650661170423346e-06, - "loss": 0.8753, - "step": 4550 - }, - { - "epoch": 0.4104252153131623, - "grad_norm": 2.517452263478945, - "learning_rate": 2.6645151245407614e-06, - "loss": 1.0614, - "step": 4551 - }, - { - "epoch": 0.4105153988366326, - "grad_norm": 1.4254910944590569, - "learning_rate": 2.6639640753390936e-06, - "loss": 0.9864, - "step": 4552 - }, - { - "epoch": 0.4106055823601028, - "grad_norm": 1.3864216660891726, - "learning_rate": 2.66341296948435e-06, - "loss": 0.8986, - "step": 4553 - }, - { - "epoch": 0.4106957658835731, - "grad_norm": 1.2282374905617544, - "learning_rate": 2.6628618070235534e-06, - "loss": 0.9942, - "step": 4554 - }, - { - "epoch": 0.4107859494070433, - "grad_norm": 1.525009986969812, - "learning_rate": 2.662310588003733e-06, - "loss": 0.9952, - "step": 4555 - }, - { - "epoch": 0.4108761329305136, - "grad_norm": 1.9429076623459145, - "learning_rate": 2.6617593124719205e-06, - "loss": 0.9658, - "step": 4556 - }, - { - "epoch": 0.41096631645398385, - "grad_norm": 1.1647333861683122, - "learning_rate": 2.661207980475155e-06, - "loss": 0.9917, - "step": 4557 - }, - { - "epoch": 0.41105649997745414, - "grad_norm": 1.4519284151574758, - "learning_rate": 2.6606565920604793e-06, - "loss": 0.9429, - "step": 4558 - }, - { - "epoch": 0.41114668350092437, - "grad_norm": 1.3628228846143182, - "learning_rate": 2.66010514727494e-06, - "loss": 1.0623, - "step": 4559 - }, - { - "epoch": 0.41123686702439466, - "grad_norm": 1.465611371137617, - "learning_rate": 2.659553646165589e-06, - "loss": 0.8809, - "step": 4560 - }, - { - "epoch": 0.4113270505478649, - "grad_norm": 1.722084556054285, - "learning_rate": 2.659002088779485e-06, - "loss": 0.9581, - "step": 4561 - }, - { - "epoch": 0.4114172340713352, - "grad_norm": 0.6084107815636774, - "learning_rate": 2.6584504751636888e-06, - "loss": 0.7871, - "step": 4562 - }, - { - "epoch": 0.4115074175948054, - "grad_norm": 1.5151911635702127, - "learning_rate": 2.657898805365268e-06, - "loss": 0.9505, - "step": 4563 - }, - { - "epoch": 0.4115976011182757, - "grad_norm": 1.985934148105821, - "learning_rate": 2.657347079431293e-06, - "loss": 0.9501, - "step": 4564 - }, - { - "epoch": 0.41168778464174594, - "grad_norm": 1.2648393052454814, - "learning_rate": 2.6567952974088403e-06, - "loss": 0.9536, - "step": 4565 - }, - { - "epoch": 0.4117779681652162, - "grad_norm": 1.265878440064986, - "learning_rate": 2.6562434593449917e-06, - "loss": 1.0427, - "step": 4566 - }, - { - "epoch": 0.41186815168868646, - "grad_norm": 1.5651216257383584, - "learning_rate": 2.6556915652868325e-06, - "loss": 1.0044, - "step": 4567 - }, - { - "epoch": 0.41195833521215675, - "grad_norm": 2.013581060217262, - "learning_rate": 2.6551396152814534e-06, - "loss": 0.9905, - "step": 4568 - }, - { - "epoch": 0.412048518735627, - "grad_norm": 1.5397709798818946, - "learning_rate": 2.65458760937595e-06, - "loss": 0.9785, - "step": 4569 - }, - { - "epoch": 0.41213870225909727, - "grad_norm": 1.2902851202164685, - "learning_rate": 2.654035547617423e-06, - "loss": 0.9394, - "step": 4570 - }, - { - "epoch": 0.4122288857825675, - "grad_norm": 2.008002483715938, - "learning_rate": 2.653483430052976e-06, - "loss": 0.8966, - "step": 4571 - }, - { - "epoch": 0.4123190693060378, - "grad_norm": 1.6596745050115849, - "learning_rate": 2.6529312567297197e-06, - "loss": 0.97, - "step": 4572 - }, - { - "epoch": 0.412409252829508, - "grad_norm": 1.5588951488664342, - "learning_rate": 2.652379027694768e-06, - "loss": 1.0595, - "step": 4573 - }, - { - "epoch": 0.4124994363529783, - "grad_norm": 1.6584785151868273, - "learning_rate": 2.651826742995241e-06, - "loss": 1.0419, - "step": 4574 - }, - { - "epoch": 0.4125896198764486, - "grad_norm": 1.3060382543630724, - "learning_rate": 2.651274402678262e-06, - "loss": 0.9779, - "step": 4575 - }, - { - "epoch": 0.41267980339991883, - "grad_norm": 1.2753574898637634, - "learning_rate": 2.6507220067909597e-06, - "loss": 1.0122, - "step": 4576 - }, - { - "epoch": 0.4127699869233891, - "grad_norm": 1.6240108120187227, - "learning_rate": 2.650169555380468e-06, - "loss": 0.9418, - "step": 4577 - }, - { - "epoch": 0.41286017044685935, - "grad_norm": 2.2365088244067293, - "learning_rate": 2.6496170484939253e-06, - "loss": 0.9801, - "step": 4578 - }, - { - "epoch": 0.41295035397032964, - "grad_norm": 1.4330616309042576, - "learning_rate": 2.6490644861784735e-06, - "loss": 0.9962, - "step": 4579 - }, - { - "epoch": 0.4130405374937999, - "grad_norm": 1.7356963064775193, - "learning_rate": 2.648511868481261e-06, - "loss": 1.0064, - "step": 4580 - }, - { - "epoch": 0.41313072101727016, - "grad_norm": 1.5081687862080493, - "learning_rate": 2.6479591954494397e-06, - "loss": 0.9844, - "step": 4581 - }, - { - "epoch": 0.4132209045407404, - "grad_norm": 1.338227612455498, - "learning_rate": 2.647406467130167e-06, - "loss": 0.8843, - "step": 4582 - }, - { - "epoch": 0.4133110880642107, - "grad_norm": 1.4092158090214124, - "learning_rate": 2.646853683570605e-06, - "loss": 1.0073, - "step": 4583 - }, - { - "epoch": 0.4134012715876809, - "grad_norm": 1.8007410669704982, - "learning_rate": 2.6463008448179196e-06, - "loss": 0.9243, - "step": 4584 - }, - { - "epoch": 0.4134914551111512, - "grad_norm": 1.4403336832413487, - "learning_rate": 2.6457479509192828e-06, - "loss": 0.9384, - "step": 4585 - }, - { - "epoch": 0.41358163863462144, - "grad_norm": 1.992689002249543, - "learning_rate": 2.645195001921871e-06, - "loss": 0.9749, - "step": 4586 - }, - { - "epoch": 0.41367182215809173, - "grad_norm": 1.4817551632696755, - "learning_rate": 2.644641997872863e-06, - "loss": 0.958, - "step": 4587 - }, - { - "epoch": 0.41376200568156196, - "grad_norm": 1.968518258639675, - "learning_rate": 2.644088938819445e-06, - "loss": 0.9921, - "step": 4588 - }, - { - "epoch": 0.41385218920503225, - "grad_norm": 1.568565041630547, - "learning_rate": 2.6435358248088077e-06, - "loss": 1.0594, - "step": 4589 - }, - { - "epoch": 0.4139423727285025, - "grad_norm": 1.4343318026817666, - "learning_rate": 2.642982655888146e-06, - "loss": 1.0301, - "step": 4590 - }, - { - "epoch": 0.41403255625197277, - "grad_norm": 1.425597881910461, - "learning_rate": 2.6424294321046585e-06, - "loss": 0.9458, - "step": 4591 - }, - { - "epoch": 0.414122739775443, - "grad_norm": 0.7560284644752685, - "learning_rate": 2.641876153505549e-06, - "loss": 0.8304, - "step": 4592 - }, - { - "epoch": 0.4142129232989133, - "grad_norm": 1.6513609792997586, - "learning_rate": 2.641322820138027e-06, - "loss": 0.9909, - "step": 4593 - }, - { - "epoch": 0.4143031068223835, - "grad_norm": 1.4267299056669294, - "learning_rate": 2.640769432049306e-06, - "loss": 0.9409, - "step": 4594 - }, - { - "epoch": 0.4143932903458538, - "grad_norm": 1.4715578551511759, - "learning_rate": 2.6402159892866038e-06, - "loss": 0.9109, - "step": 4595 - }, - { - "epoch": 0.41448347386932405, - "grad_norm": 1.308182381265861, - "learning_rate": 2.639662491897143e-06, - "loss": 0.9752, - "step": 4596 - }, - { - "epoch": 0.41457365739279434, - "grad_norm": 1.3208934250809405, - "learning_rate": 2.639108939928152e-06, - "loss": 0.9994, - "step": 4597 - }, - { - "epoch": 0.4146638409162646, - "grad_norm": 1.7594151629510155, - "learning_rate": 2.638555333426862e-06, - "loss": 0.9941, - "step": 4598 - }, - { - "epoch": 0.41475402443973486, - "grad_norm": 1.3688036276706266, - "learning_rate": 2.6380016724405093e-06, - "loss": 1.0219, - "step": 4599 - }, - { - "epoch": 0.41484420796320515, - "grad_norm": 1.517726188908962, - "learning_rate": 2.637447957016336e-06, - "loss": 0.9446, - "step": 4600 - }, - { - "epoch": 0.4149343914866754, - "grad_norm": 1.7947026265161317, - "learning_rate": 2.636894187201589e-06, - "loss": 1.0446, - "step": 4601 - }, - { - "epoch": 0.41502457501014567, - "grad_norm": 1.5048478782802355, - "learning_rate": 2.6363403630435176e-06, - "loss": 0.8903, - "step": 4602 - }, - { - "epoch": 0.4151147585336159, - "grad_norm": 1.4158875716614392, - "learning_rate": 2.635786484589378e-06, - "loss": 0.9723, - "step": 4603 - }, - { - "epoch": 0.4152049420570862, - "grad_norm": 1.6443085508357496, - "learning_rate": 2.63523255188643e-06, - "loss": 0.8873, - "step": 4604 - }, - { - "epoch": 0.4152951255805564, - "grad_norm": 1.270407787845388, - "learning_rate": 2.6346785649819375e-06, - "loss": 1.0094, - "step": 4605 - }, - { - "epoch": 0.4153853091040267, - "grad_norm": 1.418041216887488, - "learning_rate": 2.6341245239231706e-06, - "loss": 1.0138, - "step": 4606 - }, - { - "epoch": 0.41547549262749695, - "grad_norm": 1.2984551145090986, - "learning_rate": 2.6335704287574024e-06, - "loss": 0.9913, - "step": 4607 - }, - { - "epoch": 0.41556567615096723, - "grad_norm": 1.4640983617862666, - "learning_rate": 2.6330162795319124e-06, - "loss": 1.0447, - "step": 4608 - }, - { - "epoch": 0.41565585967443747, - "grad_norm": 1.5566410844866794, - "learning_rate": 2.632462076293983e-06, - "loss": 1.0381, - "step": 4609 - }, - { - "epoch": 0.41574604319790776, - "grad_norm": 1.3901625732332947, - "learning_rate": 2.6319078190909017e-06, - "loss": 0.9837, - "step": 4610 - }, - { - "epoch": 0.415836226721378, - "grad_norm": 2.0662074435098425, - "learning_rate": 2.6313535079699606e-06, - "loss": 0.9012, - "step": 4611 - }, - { - "epoch": 0.4159264102448483, - "grad_norm": 1.5925101150838217, - "learning_rate": 2.6307991429784572e-06, - "loss": 0.9705, - "step": 4612 - }, - { - "epoch": 0.4160165937683185, - "grad_norm": 1.4174480060009913, - "learning_rate": 2.6302447241636924e-06, - "loss": 1.0352, - "step": 4613 - }, - { - "epoch": 0.4161067772917888, - "grad_norm": 1.7300898910056908, - "learning_rate": 2.629690251572973e-06, - "loss": 1.0638, - "step": 4614 - }, - { - "epoch": 0.41619696081525903, - "grad_norm": 2.1619401019512656, - "learning_rate": 2.629135725253609e-06, - "loss": 0.9608, - "step": 4615 - }, - { - "epoch": 0.4162871443387293, - "grad_norm": 1.3792378834587549, - "learning_rate": 2.6285811452529162e-06, - "loss": 0.9761, - "step": 4616 - }, - { - "epoch": 0.41637732786219955, - "grad_norm": 1.5790527185212224, - "learning_rate": 2.6280265116182136e-06, - "loss": 0.9981, - "step": 4617 - }, - { - "epoch": 0.41646751138566984, - "grad_norm": 1.3649656716246548, - "learning_rate": 2.6274718243968266e-06, - "loss": 0.9869, - "step": 4618 - }, - { - "epoch": 0.4165576949091401, - "grad_norm": 2.911401756069007, - "learning_rate": 2.626917083636084e-06, - "loss": 0.9027, - "step": 4619 - }, - { - "epoch": 0.41664787843261036, - "grad_norm": 1.2338359476625422, - "learning_rate": 2.6263622893833183e-06, - "loss": 0.9149, - "step": 4620 - }, - { - "epoch": 0.4167380619560806, - "grad_norm": 1.5802936571991957, - "learning_rate": 2.625807441685869e-06, - "loss": 0.9228, - "step": 4621 - }, - { - "epoch": 0.4168282454795509, - "grad_norm": 0.6597467766481028, - "learning_rate": 2.625252540591078e-06, - "loss": 0.8093, - "step": 4622 - }, - { - "epoch": 0.4169184290030212, - "grad_norm": 1.756179661547073, - "learning_rate": 2.6246975861462927e-06, - "loss": 1.0552, - "step": 4623 - }, - { - "epoch": 0.4170086125264914, - "grad_norm": 1.779201534325638, - "learning_rate": 2.624142578398864e-06, - "loss": 1.0285, - "step": 4624 - }, - { - "epoch": 0.4170987960499617, - "grad_norm": 1.3407645217390753, - "learning_rate": 2.6235875173961498e-06, - "loss": 0.9695, - "step": 4625 - }, - { - "epoch": 0.41718897957343193, - "grad_norm": 1.2196872237545076, - "learning_rate": 2.62303240318551e-06, - "loss": 0.9146, - "step": 4626 - }, - { - "epoch": 0.4172791630969022, - "grad_norm": 1.779303440789546, - "learning_rate": 2.62247723581431e-06, - "loss": 1.0532, - "step": 4627 - }, - { - "epoch": 0.41736934662037245, - "grad_norm": 2.017287049618339, - "learning_rate": 2.62192201532992e-06, - "loss": 0.9889, - "step": 4628 - }, - { - "epoch": 0.41745953014384274, - "grad_norm": 1.545118239856526, - "learning_rate": 2.6213667417797145e-06, - "loss": 0.9996, - "step": 4629 - }, - { - "epoch": 0.41754971366731297, - "grad_norm": 1.3859820490991612, - "learning_rate": 2.6208114152110725e-06, - "loss": 1.0145, - "step": 4630 - }, - { - "epoch": 0.41763989719078326, - "grad_norm": 1.663923732362313, - "learning_rate": 2.6202560356713774e-06, - "loss": 1.0009, - "step": 4631 - }, - { - "epoch": 0.4177300807142535, - "grad_norm": 0.6634575725147699, - "learning_rate": 2.619700603208017e-06, - "loss": 0.9047, - "step": 4632 - }, - { - "epoch": 0.4178202642377238, - "grad_norm": 1.252067096539339, - "learning_rate": 2.6191451178683842e-06, - "loss": 0.9019, - "step": 4633 - }, - { - "epoch": 0.417910447761194, - "grad_norm": 1.4232099365835273, - "learning_rate": 2.6185895796998764e-06, - "loss": 1.0099, - "step": 4634 - }, - { - "epoch": 0.4180006312846643, - "grad_norm": 1.5506810845376922, - "learning_rate": 2.6180339887498946e-06, - "loss": 1.0732, - "step": 4635 - }, - { - "epoch": 0.41809081480813454, - "grad_norm": 1.9710336791692717, - "learning_rate": 2.617478345065846e-06, - "loss": 0.9279, - "step": 4636 - }, - { - "epoch": 0.4181809983316048, - "grad_norm": 1.5500067403093034, - "learning_rate": 2.616922648695139e-06, - "loss": 0.9954, - "step": 4637 - }, - { - "epoch": 0.41827118185507506, - "grad_norm": 1.571734567364093, - "learning_rate": 2.61636689968519e-06, - "loss": 0.8821, - "step": 4638 - }, - { - "epoch": 0.41836136537854535, - "grad_norm": 2.0272908286535682, - "learning_rate": 2.6158110980834186e-06, - "loss": 1.0531, - "step": 4639 - }, - { - "epoch": 0.4184515489020156, - "grad_norm": 1.6104725918881768, - "learning_rate": 2.615255243937249e-06, - "loss": 1.0396, - "step": 4640 - }, - { - "epoch": 0.41854173242548587, - "grad_norm": 1.7125994827444357, - "learning_rate": 2.61469933729411e-06, - "loss": 1.0083, - "step": 4641 - }, - { - "epoch": 0.4186319159489561, - "grad_norm": 1.8153197617926522, - "learning_rate": 2.614143378201433e-06, - "loss": 0.9626, - "step": 4642 - }, - { - "epoch": 0.4187220994724264, - "grad_norm": 1.4043805730120877, - "learning_rate": 2.6135873667066567e-06, - "loss": 0.9633, - "step": 4643 - }, - { - "epoch": 0.4188122829958966, - "grad_norm": 1.4957899776257118, - "learning_rate": 2.613031302857224e-06, - "loss": 0.9405, - "step": 4644 - }, - { - "epoch": 0.4189024665193669, - "grad_norm": 1.6304438444245104, - "learning_rate": 2.6124751867005792e-06, - "loss": 1.0073, - "step": 4645 - }, - { - "epoch": 0.4189926500428372, - "grad_norm": 2.59358003358186, - "learning_rate": 2.611919018284175e-06, - "loss": 1.035, - "step": 4646 - }, - { - "epoch": 0.41908283356630743, - "grad_norm": 1.9809485461866898, - "learning_rate": 2.611362797655466e-06, - "loss": 1.0273, - "step": 4647 - }, - { - "epoch": 0.4191730170897777, - "grad_norm": 1.3056824140036516, - "learning_rate": 2.6108065248619124e-06, - "loss": 1.0474, - "step": 4648 - }, - { - "epoch": 0.41926320061324795, - "grad_norm": 1.6830037581231265, - "learning_rate": 2.610250199950978e-06, - "loss": 0.9222, - "step": 4649 - }, - { - "epoch": 0.41935338413671824, - "grad_norm": 1.693082476617363, - "learning_rate": 2.609693822970131e-06, - "loss": 0.9524, - "step": 4650 - }, - { - "epoch": 0.4194435676601885, - "grad_norm": 1.7198344190872412, - "learning_rate": 2.609137393966846e-06, - "loss": 0.947, - "step": 4651 - }, - { - "epoch": 0.41953375118365877, - "grad_norm": 1.579071742939882, - "learning_rate": 2.6085809129886e-06, - "loss": 0.941, - "step": 4652 - }, - { - "epoch": 0.419623934707129, - "grad_norm": 1.759518012134296, - "learning_rate": 2.608024380082874e-06, - "loss": 1.0038, - "step": 4653 - }, - { - "epoch": 0.4197141182305993, - "grad_norm": 1.5377210008531599, - "learning_rate": 2.6074677952971554e-06, - "loss": 0.99, - "step": 4654 - }, - { - "epoch": 0.4198043017540695, - "grad_norm": 1.4662690006465662, - "learning_rate": 2.606911158678935e-06, - "loss": 0.909, - "step": 4655 - }, - { - "epoch": 0.4198944852775398, - "grad_norm": 1.9861916464221518, - "learning_rate": 2.606354470275708e-06, - "loss": 0.9813, - "step": 4656 - }, - { - "epoch": 0.41998466880101004, - "grad_norm": 1.5239167071081172, - "learning_rate": 2.6057977301349744e-06, - "loss": 0.9576, - "step": 4657 - }, - { - "epoch": 0.42007485232448033, - "grad_norm": 1.4119573698632755, - "learning_rate": 2.6052409383042383e-06, - "loss": 0.9523, - "step": 4658 - }, - { - "epoch": 0.42016503584795056, - "grad_norm": 1.3380412887053041, - "learning_rate": 2.6046840948310074e-06, - "loss": 0.9591, - "step": 4659 - }, - { - "epoch": 0.42025521937142085, - "grad_norm": 1.7559240751471217, - "learning_rate": 2.6041271997627962e-06, - "loss": 0.9843, - "step": 4660 - }, - { - "epoch": 0.4203454028948911, - "grad_norm": 1.6411795269376481, - "learning_rate": 2.6035702531471202e-06, - "loss": 0.9748, - "step": 4661 - }, - { - "epoch": 0.4204355864183614, - "grad_norm": 1.4543618301693664, - "learning_rate": 2.6030132550315035e-06, - "loss": 0.9885, - "step": 4662 - }, - { - "epoch": 0.4205257699418316, - "grad_norm": 0.6887589112413218, - "learning_rate": 2.60245620546347e-06, - "loss": 0.8694, - "step": 4663 - }, - { - "epoch": 0.4206159534653019, - "grad_norm": 4.1118945386253944, - "learning_rate": 2.6018991044905517e-06, - "loss": 0.957, - "step": 4664 - }, - { - "epoch": 0.42070613698877213, - "grad_norm": 1.3048038544994938, - "learning_rate": 2.6013419521602825e-06, - "loss": 1.0243, - "step": 4665 - }, - { - "epoch": 0.4207963205122424, - "grad_norm": 1.5769033708110476, - "learning_rate": 2.600784748520202e-06, - "loss": 1.111, - "step": 4666 - }, - { - "epoch": 0.42088650403571265, - "grad_norm": 1.5832601413206246, - "learning_rate": 2.6002274936178544e-06, - "loss": 0.9117, - "step": 4667 - }, - { - "epoch": 0.42097668755918294, - "grad_norm": 1.5114218250214746, - "learning_rate": 2.5996701875007873e-06, - "loss": 0.9925, - "step": 4668 - }, - { - "epoch": 0.4210668710826532, - "grad_norm": 1.9391259169986121, - "learning_rate": 2.5991128302165533e-06, - "loss": 0.9735, - "step": 4669 - }, - { - "epoch": 0.42115705460612346, - "grad_norm": 1.8359256825168206, - "learning_rate": 2.5985554218127094e-06, - "loss": 1.0555, - "step": 4670 - }, - { - "epoch": 0.42124723812959375, - "grad_norm": 1.783572707049987, - "learning_rate": 2.597997962336816e-06, - "loss": 0.9159, - "step": 4671 - }, - { - "epoch": 0.421337421653064, - "grad_norm": 1.8922674437817681, - "learning_rate": 2.5974404518364393e-06, - "loss": 0.9714, - "step": 4672 - }, - { - "epoch": 0.42142760517653427, - "grad_norm": 1.6539369662265646, - "learning_rate": 2.596882890359149e-06, - "loss": 1.0919, - "step": 4673 - }, - { - "epoch": 0.4215177887000045, - "grad_norm": 1.6964961547583128, - "learning_rate": 2.5963252779525196e-06, - "loss": 0.894, - "step": 4674 - }, - { - "epoch": 0.4216079722234748, - "grad_norm": 1.778987317422279, - "learning_rate": 2.595767614664129e-06, - "loss": 0.9834, - "step": 4675 - }, - { - "epoch": 0.421698155746945, - "grad_norm": 1.4527165649011622, - "learning_rate": 2.5952099005415607e-06, - "loss": 0.9556, - "step": 4676 - }, - { - "epoch": 0.4217883392704153, - "grad_norm": 3.277340694603388, - "learning_rate": 2.594652135632402e-06, - "loss": 0.9382, - "step": 4677 - }, - { - "epoch": 0.42187852279388555, - "grad_norm": 1.2858971312479839, - "learning_rate": 2.594094319984244e-06, - "loss": 0.9397, - "step": 4678 - }, - { - "epoch": 0.42196870631735584, - "grad_norm": 1.5563053167232592, - "learning_rate": 2.5935364536446825e-06, - "loss": 1.0369, - "step": 4679 - }, - { - "epoch": 0.42205888984082607, - "grad_norm": 3.376334597598616, - "learning_rate": 2.5929785366613185e-06, - "loss": 0.9825, - "step": 4680 - }, - { - "epoch": 0.42214907336429636, - "grad_norm": 1.523973732499157, - "learning_rate": 2.592420569081756e-06, - "loss": 1.0209, - "step": 4681 - }, - { - "epoch": 0.4222392568877666, - "grad_norm": 1.456621618701844, - "learning_rate": 2.5918625509536037e-06, - "loss": 0.9428, - "step": 4682 - }, - { - "epoch": 0.4223294404112369, - "grad_norm": 0.642494497683821, - "learning_rate": 2.591304482324475e-06, - "loss": 0.8317, - "step": 4683 - }, - { - "epoch": 0.4224196239347071, - "grad_norm": 1.5323885363419605, - "learning_rate": 2.5907463632419878e-06, - "loss": 0.9983, - "step": 4684 - }, - { - "epoch": 0.4225098074581774, - "grad_norm": 1.5268220522706393, - "learning_rate": 2.5901881937537632e-06, - "loss": 0.9493, - "step": 4685 - }, - { - "epoch": 0.42259999098164763, - "grad_norm": 0.7553594547649897, - "learning_rate": 2.589629973907428e-06, - "loss": 0.8926, - "step": 4686 - }, - { - "epoch": 0.4226901745051179, - "grad_norm": 1.7276035812172996, - "learning_rate": 2.589071703750612e-06, - "loss": 0.9371, - "step": 4687 - }, - { - "epoch": 0.42278035802858815, - "grad_norm": 1.3414890874702476, - "learning_rate": 2.5885133833309504e-06, - "loss": 1.0233, - "step": 4688 - }, - { - "epoch": 0.42287054155205844, - "grad_norm": 1.4480653681884927, - "learning_rate": 2.5879550126960814e-06, - "loss": 1.063, - "step": 4689 - }, - { - "epoch": 0.4229607250755287, - "grad_norm": 1.3563476703123873, - "learning_rate": 2.5873965918936494e-06, - "loss": 1.0348, - "step": 4690 - }, - { - "epoch": 0.42305090859899896, - "grad_norm": 1.6234903659235123, - "learning_rate": 2.586838120971301e-06, - "loss": 1.0053, - "step": 4691 - }, - { - "epoch": 0.4231410921224692, - "grad_norm": 1.9580795921810217, - "learning_rate": 2.586279599976689e-06, - "loss": 1.0372, - "step": 4692 - }, - { - "epoch": 0.4232312756459395, - "grad_norm": 1.8023617112315173, - "learning_rate": 2.585721028957468e-06, - "loss": 0.9809, - "step": 4693 - }, - { - "epoch": 0.4233214591694098, - "grad_norm": 1.358256438873549, - "learning_rate": 2.585162407961299e-06, - "loss": 0.9995, - "step": 4694 - }, - { - "epoch": 0.42341164269288, - "grad_norm": 1.5998230210414448, - "learning_rate": 2.584603737035847e-06, - "loss": 0.926, - "step": 4695 - }, - { - "epoch": 0.4235018262163503, - "grad_norm": 1.7084314814339967, - "learning_rate": 2.5840450162287806e-06, - "loss": 0.9745, - "step": 4696 - }, - { - "epoch": 0.42359200973982053, - "grad_norm": 1.7269568359776912, - "learning_rate": 2.583486245587774e-06, - "loss": 0.9047, - "step": 4697 - }, - { - "epoch": 0.4236821932632908, - "grad_norm": 1.5362830455382566, - "learning_rate": 2.5829274251605023e-06, - "loss": 1.0842, - "step": 4698 - }, - { - "epoch": 0.42377237678676105, - "grad_norm": 2.2247619844493864, - "learning_rate": 2.582368554994649e-06, - "loss": 0.9603, - "step": 4699 - }, - { - "epoch": 0.42386256031023134, - "grad_norm": 0.7808525600393582, - "learning_rate": 2.5818096351378994e-06, - "loss": 0.8907, - "step": 4700 - }, - { - "epoch": 0.4239527438337016, - "grad_norm": 1.8526988345169022, - "learning_rate": 2.5812506656379435e-06, - "loss": 1.0374, - "step": 4701 - }, - { - "epoch": 0.42404292735717186, - "grad_norm": 1.4855504250816278, - "learning_rate": 2.580691646542476e-06, - "loss": 0.9333, - "step": 4702 - }, - { - "epoch": 0.4241331108806421, - "grad_norm": 1.540752741409975, - "learning_rate": 2.5801325778991958e-06, - "loss": 1.0247, - "step": 4703 - }, - { - "epoch": 0.4242232944041124, - "grad_norm": 1.7059341501628287, - "learning_rate": 2.5795734597558043e-06, - "loss": 0.8331, - "step": 4704 - }, - { - "epoch": 0.4243134779275826, - "grad_norm": 2.0464745049658197, - "learning_rate": 2.579014292160011e-06, - "loss": 0.9834, - "step": 4705 - }, - { - "epoch": 0.4244036614510529, - "grad_norm": 1.1960531695039296, - "learning_rate": 2.5784550751595236e-06, - "loss": 0.9699, - "step": 4706 - }, - { - "epoch": 0.42449384497452314, - "grad_norm": 1.3532648115631873, - "learning_rate": 2.577895808802061e-06, - "loss": 1.0445, - "step": 4707 - }, - { - "epoch": 0.4245840284979934, - "grad_norm": 1.674210103093192, - "learning_rate": 2.577336493135341e-06, - "loss": 0.8979, - "step": 4708 - }, - { - "epoch": 0.42467421202146366, - "grad_norm": 1.2149722408530148, - "learning_rate": 2.576777128207088e-06, - "loss": 1.0717, - "step": 4709 - }, - { - "epoch": 0.42476439554493395, - "grad_norm": 1.3502787408226054, - "learning_rate": 2.5762177140650306e-06, - "loss": 0.9462, - "step": 4710 - }, - { - "epoch": 0.4248545790684042, - "grad_norm": 1.8819376072245226, - "learning_rate": 2.5756582507569003e-06, - "loss": 0.9972, - "step": 4711 - }, - { - "epoch": 0.42494476259187447, - "grad_norm": 1.6621498972595952, - "learning_rate": 2.5750987383304335e-06, - "loss": 1.008, - "step": 4712 - }, - { - "epoch": 0.4250349461153447, - "grad_norm": 1.8819716225976786, - "learning_rate": 2.574539176833372e-06, - "loss": 0.9661, - "step": 4713 - }, - { - "epoch": 0.425125129638815, - "grad_norm": 1.6056587496633534, - "learning_rate": 2.5739795663134594e-06, - "loss": 0.9222, - "step": 4714 - }, - { - "epoch": 0.4252153131622852, - "grad_norm": 1.3008161660396906, - "learning_rate": 2.5734199068184454e-06, - "loss": 1.0711, - "step": 4715 - }, - { - "epoch": 0.4253054966857555, - "grad_norm": 1.3956590894814263, - "learning_rate": 2.572860198396083e-06, - "loss": 0.9061, - "step": 4716 - }, - { - "epoch": 0.4253956802092258, - "grad_norm": 1.4394601643155982, - "learning_rate": 2.57230044109413e-06, - "loss": 1.0099, - "step": 4717 - }, - { - "epoch": 0.42548586373269603, - "grad_norm": 1.6865370086671694, - "learning_rate": 2.5717406349603483e-06, - "loss": 0.9644, - "step": 4718 - }, - { - "epoch": 0.4255760472561663, - "grad_norm": 1.3020899149410483, - "learning_rate": 2.5711807800425026e-06, - "loss": 1.0092, - "step": 4719 - }, - { - "epoch": 0.42566623077963656, - "grad_norm": 2.04684401627015, - "learning_rate": 2.5706208763883633e-06, - "loss": 0.8219, - "step": 4720 - }, - { - "epoch": 0.42575641430310684, - "grad_norm": 1.4929516019290494, - "learning_rate": 2.570060924045704e-06, - "loss": 0.935, - "step": 4721 - }, - { - "epoch": 0.4258465978265771, - "grad_norm": 1.50305024593627, - "learning_rate": 2.569500923062304e-06, - "loss": 1.0125, - "step": 4722 - }, - { - "epoch": 0.42593678135004737, - "grad_norm": 1.3746467916952447, - "learning_rate": 2.5689408734859445e-06, - "loss": 0.9, - "step": 4723 - }, - { - "epoch": 0.4260269648735176, - "grad_norm": 1.6017143363620172, - "learning_rate": 2.5683807753644127e-06, - "loss": 0.9373, - "step": 4724 - }, - { - "epoch": 0.4261171483969879, - "grad_norm": 1.3787833529162112, - "learning_rate": 2.5678206287454996e-06, - "loss": 0.9613, - "step": 4725 - }, - { - "epoch": 0.4262073319204581, - "grad_norm": 2.9251605650040284, - "learning_rate": 2.567260433676999e-06, - "loss": 1.0117, - "step": 4726 - }, - { - "epoch": 0.4262975154439284, - "grad_norm": 1.4458043266853269, - "learning_rate": 2.5667001902067107e-06, - "loss": 0.9606, - "step": 4727 - }, - { - "epoch": 0.42638769896739864, - "grad_norm": 1.4807293745540115, - "learning_rate": 2.566139898382437e-06, - "loss": 1.0288, - "step": 4728 - }, - { - "epoch": 0.42647788249086893, - "grad_norm": 1.4568421596522592, - "learning_rate": 2.5655795582519853e-06, - "loss": 0.997, - "step": 4729 - }, - { - "epoch": 0.42656806601433916, - "grad_norm": 1.3024356162034127, - "learning_rate": 2.565019169863168e-06, - "loss": 1.0498, - "step": 4730 - }, - { - "epoch": 0.42665824953780945, - "grad_norm": 0.623013606124958, - "learning_rate": 2.5644587332637994e-06, - "loss": 0.8024, - "step": 4731 - }, - { - "epoch": 0.4267484330612797, - "grad_norm": 1.9724791192326259, - "learning_rate": 2.5638982485016994e-06, - "loss": 1.017, - "step": 4732 - }, - { - "epoch": 0.42683861658475, - "grad_norm": 0.7953345050278203, - "learning_rate": 2.5633377156246917e-06, - "loss": 0.8312, - "step": 4733 - }, - { - "epoch": 0.4269288001082202, - "grad_norm": 1.8469329127802432, - "learning_rate": 2.562777134680603e-06, - "loss": 0.959, - "step": 4734 - }, - { - "epoch": 0.4270189836316905, - "grad_norm": 1.5013970386112592, - "learning_rate": 2.562216505717267e-06, - "loss": 0.9888, - "step": 4735 - }, - { - "epoch": 0.42710916715516073, - "grad_norm": 1.5159497257290924, - "learning_rate": 2.561655828782518e-06, - "loss": 1.012, - "step": 4736 - }, - { - "epoch": 0.427199350678631, - "grad_norm": 2.129892103480137, - "learning_rate": 2.561095103924197e-06, - "loss": 0.9645, - "step": 4737 - }, - { - "epoch": 0.42728953420210125, - "grad_norm": 1.6540463020227787, - "learning_rate": 2.560534331190148e-06, - "loss": 0.9352, - "step": 4738 - }, - { - "epoch": 0.42737971772557154, - "grad_norm": 0.6131648542417655, - "learning_rate": 2.559973510628218e-06, - "loss": 0.8054, - "step": 4739 - }, - { - "epoch": 0.4274699012490418, - "grad_norm": 1.6072353472662537, - "learning_rate": 2.5594126422862615e-06, - "loss": 0.9305, - "step": 4740 - }, - { - "epoch": 0.42756008477251206, - "grad_norm": 1.7887574314733463, - "learning_rate": 2.558851726212134e-06, - "loss": 0.938, - "step": 4741 - }, - { - "epoch": 0.42765026829598235, - "grad_norm": 1.5741529427302556, - "learning_rate": 2.5582907624536953e-06, - "loss": 0.9559, - "step": 4742 - }, - { - "epoch": 0.4277404518194526, - "grad_norm": 1.6415179048061939, - "learning_rate": 2.557729751058811e-06, - "loss": 0.998, - "step": 4743 - }, - { - "epoch": 0.42783063534292287, - "grad_norm": 1.452204505027862, - "learning_rate": 2.557168692075348e-06, - "loss": 0.9561, - "step": 4744 - }, - { - "epoch": 0.4279208188663931, - "grad_norm": 1.4661704603964743, - "learning_rate": 2.556607585551181e-06, - "loss": 0.9276, - "step": 4745 - }, - { - "epoch": 0.4280110023898634, - "grad_norm": 0.7039965420307501, - "learning_rate": 2.5560464315341844e-06, - "loss": 0.9371, - "step": 4746 - }, - { - "epoch": 0.4281011859133336, - "grad_norm": 1.9203610543676624, - "learning_rate": 2.555485230072242e-06, - "loss": 0.9741, - "step": 4747 - }, - { - "epoch": 0.4281913694368039, - "grad_norm": 1.7268706862250454, - "learning_rate": 2.5549239812132354e-06, - "loss": 1.0328, - "step": 4748 - }, - { - "epoch": 0.42828155296027415, - "grad_norm": 2.3865370361280362, - "learning_rate": 2.5543626850050556e-06, - "loss": 1.0441, - "step": 4749 - }, - { - "epoch": 0.42837173648374444, - "grad_norm": 1.8698164177605001, - "learning_rate": 2.5538013414955944e-06, - "loss": 1.0378, - "step": 4750 - }, - { - "epoch": 0.42846192000721467, - "grad_norm": 2.9646274251511766, - "learning_rate": 2.5532399507327494e-06, - "loss": 0.9333, - "step": 4751 - }, - { - "epoch": 0.42855210353068496, - "grad_norm": 0.6162699385506767, - "learning_rate": 2.552678512764421e-06, - "loss": 0.8474, - "step": 4752 - }, - { - "epoch": 0.4286422870541552, - "grad_norm": 1.6073822721142832, - "learning_rate": 2.5521170276385147e-06, - "loss": 1.0381, - "step": 4753 - }, - { - "epoch": 0.4287324705776255, - "grad_norm": 1.4722380917171662, - "learning_rate": 2.5515554954029394e-06, - "loss": 1.0331, - "step": 4754 - }, - { - "epoch": 0.4288226541010957, - "grad_norm": 1.6528856908294816, - "learning_rate": 2.550993916105608e-06, - "loss": 1.0039, - "step": 4755 - }, - { - "epoch": 0.428912837624566, - "grad_norm": 1.2764366732233572, - "learning_rate": 2.550432289794437e-06, - "loss": 0.944, - "step": 4756 - }, - { - "epoch": 0.42900302114803623, - "grad_norm": 1.8346126240022211, - "learning_rate": 2.5498706165173483e-06, - "loss": 0.9935, - "step": 4757 - }, - { - "epoch": 0.4290932046715065, - "grad_norm": 1.3432521785063112, - "learning_rate": 2.5493088963222668e-06, - "loss": 0.9615, - "step": 4758 - }, - { - "epoch": 0.42918338819497676, - "grad_norm": 1.5824231074546624, - "learning_rate": 2.548747129257121e-06, - "loss": 1.0291, - "step": 4759 - }, - { - "epoch": 0.42927357171844704, - "grad_norm": 1.3243390144042582, - "learning_rate": 2.548185315369845e-06, - "loss": 0.9106, - "step": 4760 - }, - { - "epoch": 0.4293637552419173, - "grad_norm": 1.3735721283461635, - "learning_rate": 2.5476234547083746e-06, - "loss": 1.0647, - "step": 4761 - }, - { - "epoch": 0.42945393876538757, - "grad_norm": 1.5187748691108247, - "learning_rate": 2.547061547320652e-06, - "loss": 1.0472, - "step": 4762 - }, - { - "epoch": 0.4295441222888578, - "grad_norm": 1.2628933190017033, - "learning_rate": 2.5464995932546217e-06, - "loss": 0.9689, - "step": 4763 - }, - { - "epoch": 0.4296343058123281, - "grad_norm": 2.555087836859027, - "learning_rate": 2.545937592558232e-06, - "loss": 0.9438, - "step": 4764 - }, - { - "epoch": 0.4297244893357984, - "grad_norm": 1.331947570141221, - "learning_rate": 2.5453755452794374e-06, - "loss": 0.9297, - "step": 4765 - }, - { - "epoch": 0.4298146728592686, - "grad_norm": 1.4730211205609554, - "learning_rate": 2.5448134514661938e-06, - "loss": 1.0209, - "step": 4766 - }, - { - "epoch": 0.4299048563827389, - "grad_norm": 1.3026195744342082, - "learning_rate": 2.5442513111664623e-06, - "loss": 0.9848, - "step": 4767 - }, - { - "epoch": 0.42999503990620913, - "grad_norm": 1.631976483979685, - "learning_rate": 2.5436891244282084e-06, - "loss": 0.9752, - "step": 4768 - }, - { - "epoch": 0.4300852234296794, - "grad_norm": 1.5118091171815353, - "learning_rate": 2.5431268912994004e-06, - "loss": 1.0799, - "step": 4769 - }, - { - "epoch": 0.43017540695314965, - "grad_norm": 1.5609027327903233, - "learning_rate": 2.5425646118280108e-06, - "loss": 0.9555, - "step": 4770 - }, - { - "epoch": 0.43026559047661994, - "grad_norm": 0.5832092090654712, - "learning_rate": 2.5420022860620172e-06, - "loss": 0.8374, - "step": 4771 - }, - { - "epoch": 0.4303557740000902, - "grad_norm": 1.574909900934076, - "learning_rate": 2.5414399140493995e-06, - "loss": 0.9105, - "step": 4772 - }, - { - "epoch": 0.43044595752356046, - "grad_norm": 1.7599471630488321, - "learning_rate": 2.5408774958381436e-06, - "loss": 0.9143, - "step": 4773 - }, - { - "epoch": 0.4305361410470307, - "grad_norm": 1.378530045755765, - "learning_rate": 2.540315031476237e-06, - "loss": 1.0235, - "step": 4774 - }, - { - "epoch": 0.430626324570501, - "grad_norm": 1.4490273238686677, - "learning_rate": 2.5397525210116737e-06, - "loss": 0.9891, - "step": 4775 - }, - { - "epoch": 0.4307165080939712, - "grad_norm": 2.2571625615548094, - "learning_rate": 2.539189964492448e-06, - "loss": 0.9317, - "step": 4776 - }, - { - "epoch": 0.4308066916174415, - "grad_norm": 1.4179223423449179, - "learning_rate": 2.5386273619665613e-06, - "loss": 1.0297, - "step": 4777 - }, - { - "epoch": 0.43089687514091174, - "grad_norm": 0.699133926446471, - "learning_rate": 2.5380647134820186e-06, - "loss": 0.8415, - "step": 4778 - }, - { - "epoch": 0.43098705866438203, - "grad_norm": 1.3646035326367838, - "learning_rate": 2.5375020190868277e-06, - "loss": 1.039, - "step": 4779 - }, - { - "epoch": 0.43107724218785226, - "grad_norm": 1.370293147125513, - "learning_rate": 2.536939278829001e-06, - "loss": 0.9738, - "step": 4780 - }, - { - "epoch": 0.43116742571132255, - "grad_norm": 1.5889989309433352, - "learning_rate": 2.5363764927565536e-06, - "loss": 1.04, - "step": 4781 - }, - { - "epoch": 0.4312576092347928, - "grad_norm": 1.6601953838448271, - "learning_rate": 2.5358136609175064e-06, - "loss": 0.9292, - "step": 4782 - }, - { - "epoch": 0.43134779275826307, - "grad_norm": 1.507946107311177, - "learning_rate": 2.535250783359884e-06, - "loss": 0.9847, - "step": 4783 - }, - { - "epoch": 0.4314379762817333, - "grad_norm": 1.470084943198561, - "learning_rate": 2.5346878601317124e-06, - "loss": 0.8779, - "step": 4784 - }, - { - "epoch": 0.4315281598052036, - "grad_norm": 0.5790504317202061, - "learning_rate": 2.534124891281025e-06, - "loss": 0.8156, - "step": 4785 - }, - { - "epoch": 0.4316183433286738, - "grad_norm": 1.5098283169895335, - "learning_rate": 2.533561876855857e-06, - "loss": 0.9934, - "step": 4786 - }, - { - "epoch": 0.4317085268521441, - "grad_norm": 1.6353892022355707, - "learning_rate": 2.532998816904247e-06, - "loss": 0.9365, - "step": 4787 - }, - { - "epoch": 0.4317987103756144, - "grad_norm": 1.4208073223041318, - "learning_rate": 2.53243571147424e-06, - "loss": 0.877, - "step": 4788 - }, - { - "epoch": 0.43188889389908464, - "grad_norm": 1.6964330529748906, - "learning_rate": 2.5318725606138815e-06, - "loss": 0.8932, - "step": 4789 - }, - { - "epoch": 0.4319790774225549, - "grad_norm": 11.331330365476179, - "learning_rate": 2.5313093643712235e-06, - "loss": 0.9924, - "step": 4790 - }, - { - "epoch": 0.43206926094602516, - "grad_norm": 1.4226291040117156, - "learning_rate": 2.530746122794321e-06, - "loss": 1.0323, - "step": 4791 - }, - { - "epoch": 0.43215944446949545, - "grad_norm": 1.5579512952309413, - "learning_rate": 2.5301828359312323e-06, - "loss": 0.9137, - "step": 4792 - }, - { - "epoch": 0.4322496279929657, - "grad_norm": 1.735407341622543, - "learning_rate": 2.529619503830021e-06, - "loss": 0.9402, - "step": 4793 - }, - { - "epoch": 0.43233981151643597, - "grad_norm": 1.9597438507664886, - "learning_rate": 2.529056126538753e-06, - "loss": 0.9134, - "step": 4794 - }, - { - "epoch": 0.4324299950399062, - "grad_norm": 1.382674000156553, - "learning_rate": 2.5284927041054995e-06, - "loss": 1.0059, - "step": 4795 - }, - { - "epoch": 0.4325201785633765, - "grad_norm": 0.7083197807436661, - "learning_rate": 2.5279292365783348e-06, - "loss": 0.8055, - "step": 4796 - }, - { - "epoch": 0.4326103620868467, - "grad_norm": 1.4672924682638107, - "learning_rate": 2.527365724005336e-06, - "loss": 1.0771, - "step": 4797 - }, - { - "epoch": 0.432700545610317, - "grad_norm": 1.606805323926649, - "learning_rate": 2.526802166434586e-06, - "loss": 1.0353, - "step": 4798 - }, - { - "epoch": 0.43279072913378724, - "grad_norm": 1.3379132568227219, - "learning_rate": 2.5262385639141708e-06, - "loss": 0.9504, - "step": 4799 - }, - { - "epoch": 0.43288091265725753, - "grad_norm": 1.9286285460052142, - "learning_rate": 2.525674916492179e-06, - "loss": 1.0503, - "step": 4800 - }, - { - "epoch": 0.43297109618072777, - "grad_norm": 1.5684918529382819, - "learning_rate": 2.5251112242167056e-06, - "loss": 0.9901, - "step": 4801 - }, - { - "epoch": 0.43306127970419805, - "grad_norm": 1.683230048809965, - "learning_rate": 2.5245474871358464e-06, - "loss": 0.9424, - "step": 4802 - }, - { - "epoch": 0.4331514632276683, - "grad_norm": 1.5928186425413735, - "learning_rate": 2.5239837052977037e-06, - "loss": 0.9033, - "step": 4803 - }, - { - "epoch": 0.4332416467511386, - "grad_norm": 1.3368161975827828, - "learning_rate": 2.523419878750381e-06, - "loss": 1.0932, - "step": 4804 - }, - { - "epoch": 0.4333318302746088, - "grad_norm": 1.3866373387469246, - "learning_rate": 2.522856007541989e-06, - "loss": 0.9346, - "step": 4805 - }, - { - "epoch": 0.4334220137980791, - "grad_norm": 1.653389313593534, - "learning_rate": 2.5222920917206397e-06, - "loss": 0.9008, - "step": 4806 - }, - { - "epoch": 0.43351219732154933, - "grad_norm": 0.619677843065758, - "learning_rate": 2.5217281313344493e-06, - "loss": 0.8145, - "step": 4807 - }, - { - "epoch": 0.4336023808450196, - "grad_norm": 1.3295562213144603, - "learning_rate": 2.5211641264315372e-06, - "loss": 1.0252, - "step": 4808 - }, - { - "epoch": 0.43369256436848985, - "grad_norm": 1.7179434531146143, - "learning_rate": 2.5206000770600286e-06, - "loss": 0.9603, - "step": 4809 - }, - { - "epoch": 0.43378274789196014, - "grad_norm": 1.6611016666961809, - "learning_rate": 2.520035983268051e-06, - "loss": 0.9403, - "step": 4810 - }, - { - "epoch": 0.4338729314154304, - "grad_norm": 1.394718189692476, - "learning_rate": 2.5194718451037357e-06, - "loss": 0.985, - "step": 4811 - }, - { - "epoch": 0.43396311493890066, - "grad_norm": 1.4888351090683714, - "learning_rate": 2.518907662615218e-06, - "loss": 1.0091, - "step": 4812 - }, - { - "epoch": 0.43405329846237095, - "grad_norm": 1.6166829229632131, - "learning_rate": 2.5183434358506373e-06, - "loss": 0.9419, - "step": 4813 - }, - { - "epoch": 0.4341434819858412, - "grad_norm": 3.2408054577455916, - "learning_rate": 2.5177791648581368e-06, - "loss": 0.9673, - "step": 4814 - }, - { - "epoch": 0.4342336655093115, - "grad_norm": 1.8930467590304287, - "learning_rate": 2.517214849685863e-06, - "loss": 0.9862, - "step": 4815 - }, - { - "epoch": 0.4343238490327817, - "grad_norm": 1.618682980714962, - "learning_rate": 2.5166504903819663e-06, - "loss": 0.9994, - "step": 4816 - }, - { - "epoch": 0.434414032556252, - "grad_norm": 1.696382035785949, - "learning_rate": 2.5160860869946014e-06, - "loss": 1.0435, - "step": 4817 - }, - { - "epoch": 0.4345042160797222, - "grad_norm": 1.2651101466236883, - "learning_rate": 2.5155216395719253e-06, - "loss": 1.0595, - "step": 4818 - }, - { - "epoch": 0.4345943996031925, - "grad_norm": 1.1430501891904135, - "learning_rate": 2.5149571481621e-06, - "loss": 0.9408, - "step": 4819 - }, - { - "epoch": 0.43468458312666275, - "grad_norm": 0.7824205022410379, - "learning_rate": 2.514392612813292e-06, - "loss": 0.8072, - "step": 4820 - }, - { - "epoch": 0.43477476665013304, - "grad_norm": 1.495763916629279, - "learning_rate": 2.5138280335736695e-06, - "loss": 1.0329, - "step": 4821 - }, - { - "epoch": 0.43486495017360327, - "grad_norm": 1.3914235854528116, - "learning_rate": 2.5132634104914064e-06, - "loss": 1.0626, - "step": 4822 - }, - { - "epoch": 0.43495513369707356, - "grad_norm": 1.2532883782559185, - "learning_rate": 2.5126987436146794e-06, - "loss": 0.9995, - "step": 4823 - }, - { - "epoch": 0.4350453172205438, - "grad_norm": 1.5179246582915475, - "learning_rate": 2.5121340329916675e-06, - "loss": 0.9667, - "step": 4824 - }, - { - "epoch": 0.4351355007440141, - "grad_norm": 1.3051171423415466, - "learning_rate": 2.5115692786705566e-06, - "loss": 0.9022, - "step": 4825 - }, - { - "epoch": 0.4352256842674843, - "grad_norm": 1.7948243754973339, - "learning_rate": 2.511004480699534e-06, - "loss": 1.0145, - "step": 4826 - }, - { - "epoch": 0.4353158677909546, - "grad_norm": 0.7485209822634713, - "learning_rate": 2.510439639126791e-06, - "loss": 0.8983, - "step": 4827 - }, - { - "epoch": 0.43540605131442484, - "grad_norm": 1.4310873713821315, - "learning_rate": 2.509874754000524e-06, - "loss": 0.9811, - "step": 4828 - }, - { - "epoch": 0.4354962348378951, - "grad_norm": 2.562884604271525, - "learning_rate": 2.509309825368932e-06, - "loss": 0.975, - "step": 4829 - }, - { - "epoch": 0.43558641836136536, - "grad_norm": 1.3914868973455388, - "learning_rate": 2.5087448532802173e-06, - "loss": 0.9825, - "step": 4830 - }, - { - "epoch": 0.43567660188483565, - "grad_norm": 1.633432662953786, - "learning_rate": 2.508179837782586e-06, - "loss": 0.9618, - "step": 4831 - }, - { - "epoch": 0.4357667854083059, - "grad_norm": 1.3319775970482353, - "learning_rate": 2.5076147789242493e-06, - "loss": 0.9531, - "step": 4832 - }, - { - "epoch": 0.43585696893177617, - "grad_norm": 1.3192580939198773, - "learning_rate": 2.5070496767534202e-06, - "loss": 0.9932, - "step": 4833 - }, - { - "epoch": 0.4359471524552464, - "grad_norm": 1.5562811116858317, - "learning_rate": 2.506484531318317e-06, - "loss": 1.0153, - "step": 4834 - }, - { - "epoch": 0.4360373359787167, - "grad_norm": 0.9628040824207689, - "learning_rate": 2.5059193426671613e-06, - "loss": 0.8941, - "step": 4835 - }, - { - "epoch": 0.436127519502187, - "grad_norm": 2.110243554906151, - "learning_rate": 2.5053541108481772e-06, - "loss": 1.1136, - "step": 4836 - }, - { - "epoch": 0.4362177030256572, - "grad_norm": 1.9905874970405153, - "learning_rate": 2.5047888359095935e-06, - "loss": 0.9632, - "step": 4837 - }, - { - "epoch": 0.4363078865491275, - "grad_norm": 6.070491910858206, - "learning_rate": 2.5042235178996436e-06, - "loss": 0.9693, - "step": 4838 - }, - { - "epoch": 0.43639807007259773, - "grad_norm": 1.5781009030154778, - "learning_rate": 2.5036581568665627e-06, - "loss": 1.0421, - "step": 4839 - }, - { - "epoch": 0.436488253596068, - "grad_norm": 1.3817117048836915, - "learning_rate": 2.503092752858591e-06, - "loss": 1.0461, - "step": 4840 - }, - { - "epoch": 0.43657843711953825, - "grad_norm": 0.9395959313359358, - "learning_rate": 2.502527305923971e-06, - "loss": 0.8952, - "step": 4841 - }, - { - "epoch": 0.43666862064300854, - "grad_norm": 1.8389740202304434, - "learning_rate": 2.5019618161109506e-06, - "loss": 0.9699, - "step": 4842 - }, - { - "epoch": 0.4367588041664788, - "grad_norm": 1.372264872701967, - "learning_rate": 2.5013962834677804e-06, - "loss": 0.9933, - "step": 4843 - }, - { - "epoch": 0.43684898768994906, - "grad_norm": 2.141043629051656, - "learning_rate": 2.500830708042715e-06, - "loss": 0.9651, - "step": 4844 - }, - { - "epoch": 0.4369391712134193, - "grad_norm": 1.3624520389619885, - "learning_rate": 2.500265089884011e-06, - "loss": 1.043, - "step": 4845 - }, - { - "epoch": 0.4370293547368896, - "grad_norm": 1.4669467326285672, - "learning_rate": 2.499699429039932e-06, - "loss": 0.9856, - "step": 4846 - }, - { - "epoch": 0.4371195382603598, - "grad_norm": 2.0454132451598483, - "learning_rate": 2.4991337255587425e-06, - "loss": 0.9337, - "step": 4847 - }, - { - "epoch": 0.4372097217838301, - "grad_norm": 1.5300947229820798, - "learning_rate": 2.4985679794887106e-06, - "loss": 0.8507, - "step": 4848 - }, - { - "epoch": 0.43729990530730034, - "grad_norm": 1.4582215220866843, - "learning_rate": 2.49800219087811e-06, - "loss": 1.0131, - "step": 4849 - }, - { - "epoch": 0.43739008883077063, - "grad_norm": 5.287801797969952, - "learning_rate": 2.4974363597752163e-06, - "loss": 1.009, - "step": 4850 - }, - { - "epoch": 0.43748027235424086, - "grad_norm": 1.6397153239782096, - "learning_rate": 2.4968704862283097e-06, - "loss": 1.0324, - "step": 4851 - }, - { - "epoch": 0.43757045587771115, - "grad_norm": 2.3357038604350344, - "learning_rate": 2.4963045702856737e-06, - "loss": 1.0483, - "step": 4852 - }, - { - "epoch": 0.4376606394011814, - "grad_norm": 1.4551090928437003, - "learning_rate": 2.4957386119955946e-06, - "loss": 0.9286, - "step": 4853 - }, - { - "epoch": 0.4377508229246517, - "grad_norm": 1.0051169609219897, - "learning_rate": 2.495172611406364e-06, - "loss": 0.8176, - "step": 4854 - }, - { - "epoch": 0.4378410064481219, - "grad_norm": 1.8962256965261646, - "learning_rate": 2.4946065685662757e-06, - "loss": 1.0105, - "step": 4855 - }, - { - "epoch": 0.4379311899715922, - "grad_norm": 1.5665600479992545, - "learning_rate": 2.4940404835236283e-06, - "loss": 0.9901, - "step": 4856 - }, - { - "epoch": 0.4380213734950624, - "grad_norm": 1.3744112401523751, - "learning_rate": 2.4934743563267223e-06, - "loss": 0.9741, - "step": 4857 - }, - { - "epoch": 0.4381115570185327, - "grad_norm": 1.5764763406852886, - "learning_rate": 2.4929081870238635e-06, - "loss": 0.9415, - "step": 4858 - }, - { - "epoch": 0.43820174054200295, - "grad_norm": 1.5333830320900927, - "learning_rate": 2.49234197566336e-06, - "loss": 1.0212, - "step": 4859 - }, - { - "epoch": 0.43829192406547324, - "grad_norm": 1.429345595537319, - "learning_rate": 2.4917757222935247e-06, - "loss": 0.991, - "step": 4860 - }, - { - "epoch": 0.4383821075889435, - "grad_norm": 1.61310352889045, - "learning_rate": 2.4912094269626725e-06, - "loss": 1.0004, - "step": 4861 - }, - { - "epoch": 0.43847229111241376, - "grad_norm": 1.7759188463210422, - "learning_rate": 2.4906430897191245e-06, - "loss": 1.019, - "step": 4862 - }, - { - "epoch": 0.43856247463588405, - "grad_norm": 1.3818731188405455, - "learning_rate": 2.490076710611202e-06, - "loss": 1.0209, - "step": 4863 - }, - { - "epoch": 0.4386526581593543, - "grad_norm": 2.3685467031398884, - "learning_rate": 2.4895102896872326e-06, - "loss": 1.031, - "step": 4864 - }, - { - "epoch": 0.43874284168282457, - "grad_norm": 1.4858541246643644, - "learning_rate": 2.4889438269955457e-06, - "loss": 0.9157, - "step": 4865 - }, - { - "epoch": 0.4388330252062948, - "grad_norm": 1.3499252899124963, - "learning_rate": 2.4883773225844755e-06, - "loss": 0.9627, - "step": 4866 - }, - { - "epoch": 0.4389232087297651, - "grad_norm": 2.149260318288942, - "learning_rate": 2.48781077650236e-06, - "loss": 0.9728, - "step": 4867 - }, - { - "epoch": 0.4390133922532353, - "grad_norm": 1.6745050012030287, - "learning_rate": 2.4872441887975386e-06, - "loss": 0.9476, - "step": 4868 - }, - { - "epoch": 0.4391035757767056, - "grad_norm": 1.3227189834958262, - "learning_rate": 2.486677559518356e-06, - "loss": 0.9262, - "step": 4869 - }, - { - "epoch": 0.43919375930017585, - "grad_norm": 1.1468809829264819, - "learning_rate": 2.4861108887131614e-06, - "loss": 0.9659, - "step": 4870 - }, - { - "epoch": 0.43928394282364613, - "grad_norm": 1.450447404830343, - "learning_rate": 2.485544176430305e-06, - "loss": 0.9511, - "step": 4871 - }, - { - "epoch": 0.43937412634711637, - "grad_norm": 1.6584136793267095, - "learning_rate": 2.4849774227181425e-06, - "loss": 1.0594, - "step": 4872 - }, - { - "epoch": 0.43946430987058666, - "grad_norm": 1.2089224015283924, - "learning_rate": 2.484410627625032e-06, - "loss": 0.978, - "step": 4873 - }, - { - "epoch": 0.4395544933940569, - "grad_norm": 1.987019316408043, - "learning_rate": 2.4838437911993356e-06, - "loss": 1.025, - "step": 4874 - }, - { - "epoch": 0.4396446769175272, - "grad_norm": 2.2087240293365205, - "learning_rate": 2.483276913489419e-06, - "loss": 1.0182, - "step": 4875 - }, - { - "epoch": 0.4397348604409974, - "grad_norm": 2.2256472128466913, - "learning_rate": 2.4827099945436516e-06, - "loss": 0.8897, - "step": 4876 - }, - { - "epoch": 0.4398250439644677, - "grad_norm": 1.4863182598894444, - "learning_rate": 2.482143034410405e-06, - "loss": 1.0629, - "step": 4877 - }, - { - "epoch": 0.43991522748793793, - "grad_norm": 1.4069116095681904, - "learning_rate": 2.4815760331380573e-06, - "loss": 1.0174, - "step": 4878 - }, - { - "epoch": 0.4400054110114082, - "grad_norm": 1.2317256744472733, - "learning_rate": 2.481008990774987e-06, - "loss": 0.9438, - "step": 4879 - }, - { - "epoch": 0.44009559453487845, - "grad_norm": 1.7179023039398829, - "learning_rate": 2.480441907369577e-06, - "loss": 0.9859, - "step": 4880 - }, - { - "epoch": 0.44018577805834874, - "grad_norm": 1.6165695115355296, - "learning_rate": 2.479874782970214e-06, - "loss": 0.9246, - "step": 4881 - }, - { - "epoch": 0.440275961581819, - "grad_norm": 1.5734767632382385, - "learning_rate": 2.4793076176252887e-06, - "loss": 1.0068, - "step": 4882 - }, - { - "epoch": 0.44036614510528926, - "grad_norm": 1.3300364035450096, - "learning_rate": 2.478740411383195e-06, - "loss": 0.8668, - "step": 4883 - }, - { - "epoch": 0.44045632862875955, - "grad_norm": 1.2519195123644502, - "learning_rate": 2.4781731642923296e-06, - "loss": 0.974, - "step": 4884 - }, - { - "epoch": 0.4405465121522298, - "grad_norm": 1.5844679163786963, - "learning_rate": 2.477605876401093e-06, - "loss": 0.9657, - "step": 4885 - }, - { - "epoch": 0.4406366956757001, - "grad_norm": 1.28707135481533, - "learning_rate": 2.4770385477578894e-06, - "loss": 0.9296, - "step": 4886 - }, - { - "epoch": 0.4407268791991703, - "grad_norm": 1.2409748900406836, - "learning_rate": 2.476471178411127e-06, - "loss": 0.9339, - "step": 4887 - }, - { - "epoch": 0.4408170627226406, - "grad_norm": 1.2802429428367925, - "learning_rate": 2.475903768409216e-06, - "loss": 1.0085, - "step": 4888 - }, - { - "epoch": 0.44090724624611083, - "grad_norm": 1.4641017000696954, - "learning_rate": 2.475336317800572e-06, - "loss": 0.9484, - "step": 4889 - }, - { - "epoch": 0.4409974297695811, - "grad_norm": 1.3960118488011135, - "learning_rate": 2.4747688266336118e-06, - "loss": 0.9693, - "step": 4890 - }, - { - "epoch": 0.44108761329305135, - "grad_norm": 1.40069363985473, - "learning_rate": 2.4742012949567574e-06, - "loss": 1.0821, - "step": 4891 - }, - { - "epoch": 0.44117779681652164, - "grad_norm": 2.101907921521059, - "learning_rate": 2.4736337228184338e-06, - "loss": 0.9538, - "step": 4892 - }, - { - "epoch": 0.44126798033999187, - "grad_norm": 1.6513191816231523, - "learning_rate": 2.4730661102670692e-06, - "loss": 1.075, - "step": 4893 - }, - { - "epoch": 0.44135816386346216, - "grad_norm": 1.5867381963848282, - "learning_rate": 2.472498457351096e-06, - "loss": 0.9805, - "step": 4894 - }, - { - "epoch": 0.4414483473869324, - "grad_norm": 2.3087442158625033, - "learning_rate": 2.4719307641189495e-06, - "loss": 0.9657, - "step": 4895 - }, - { - "epoch": 0.4415385309104027, - "grad_norm": 1.5956575534247672, - "learning_rate": 2.4713630306190673e-06, - "loss": 0.9389, - "step": 4896 - }, - { - "epoch": 0.4416287144338729, - "grad_norm": 1.899542090800865, - "learning_rate": 2.4707952568998923e-06, - "loss": 1.0559, - "step": 4897 - }, - { - "epoch": 0.4417188979573432, - "grad_norm": 1.6951691175730703, - "learning_rate": 2.4702274430098703e-06, - "loss": 0.9482, - "step": 4898 - }, - { - "epoch": 0.44180908148081344, - "grad_norm": 1.8635146916503513, - "learning_rate": 2.4696595889974497e-06, - "loss": 0.9328, - "step": 4899 - }, - { - "epoch": 0.4418992650042837, - "grad_norm": 1.3478286411761442, - "learning_rate": 2.469091694911084e-06, - "loss": 1.0422, - "step": 4900 - }, - { - "epoch": 0.44198944852775396, - "grad_norm": 1.4778869985709475, - "learning_rate": 2.4685237607992276e-06, - "loss": 0.9798, - "step": 4901 - }, - { - "epoch": 0.44207963205122425, - "grad_norm": 1.4139404244181262, - "learning_rate": 2.4679557867103416e-06, - "loss": 1.041, - "step": 4902 - }, - { - "epoch": 0.4421698155746945, - "grad_norm": 1.2506606740214887, - "learning_rate": 2.4673877726928865e-06, - "loss": 0.9832, - "step": 4903 - }, - { - "epoch": 0.44225999909816477, - "grad_norm": 1.8396928405425008, - "learning_rate": 2.46681971879533e-06, - "loss": 0.871, - "step": 4904 - }, - { - "epoch": 0.442350182621635, - "grad_norm": 1.570115186146916, - "learning_rate": 2.4662516250661407e-06, - "loss": 0.9023, - "step": 4905 - }, - { - "epoch": 0.4424403661451053, - "grad_norm": 1.9317355388160795, - "learning_rate": 2.465683491553792e-06, - "loss": 0.9537, - "step": 4906 - }, - { - "epoch": 0.4425305496685755, - "grad_norm": 1.395396458405523, - "learning_rate": 2.4651153183067604e-06, - "loss": 0.9859, - "step": 4907 - }, - { - "epoch": 0.4426207331920458, - "grad_norm": 1.5129043700610523, - "learning_rate": 2.4645471053735245e-06, - "loss": 0.9186, - "step": 4908 - }, - { - "epoch": 0.4427109167155161, - "grad_norm": 1.490120859684843, - "learning_rate": 2.4639788528025684e-06, - "loss": 0.915, - "step": 4909 - }, - { - "epoch": 0.44280110023898633, - "grad_norm": 1.1271015248032208, - "learning_rate": 2.463410560642378e-06, - "loss": 0.986, - "step": 4910 - }, - { - "epoch": 0.4428912837624566, - "grad_norm": 0.7013299492872508, - "learning_rate": 2.4628422289414448e-06, - "loss": 0.8561, - "step": 4911 - }, - { - "epoch": 0.44298146728592686, - "grad_norm": 0.7195292685084856, - "learning_rate": 2.4622738577482592e-06, - "loss": 0.8377, - "step": 4912 - }, - { - "epoch": 0.44307165080939714, - "grad_norm": 1.4336479654896583, - "learning_rate": 2.461705447111319e-06, - "loss": 0.9686, - "step": 4913 - }, - { - "epoch": 0.4431618343328674, - "grad_norm": 1.394957405178367, - "learning_rate": 2.4611369970791246e-06, - "loss": 0.9434, - "step": 4914 - }, - { - "epoch": 0.44325201785633767, - "grad_norm": 3.032419195521649, - "learning_rate": 2.460568507700179e-06, - "loss": 0.9813, - "step": 4915 - }, - { - "epoch": 0.4433422013798079, - "grad_norm": 1.7181371116280673, - "learning_rate": 2.4599999790229887e-06, - "loss": 1.0155, - "step": 4916 - }, - { - "epoch": 0.4434323849032782, - "grad_norm": 1.6596212085894047, - "learning_rate": 2.459431411096064e-06, - "loss": 0.9325, - "step": 4917 - }, - { - "epoch": 0.4435225684267484, - "grad_norm": 1.517113338272573, - "learning_rate": 2.458862803967918e-06, - "loss": 0.9435, - "step": 4918 - }, - { - "epoch": 0.4436127519502187, - "grad_norm": 1.9848978750444768, - "learning_rate": 2.4582941576870667e-06, - "loss": 0.9964, - "step": 4919 - }, - { - "epoch": 0.44370293547368894, - "grad_norm": 1.820122966804257, - "learning_rate": 2.4577254723020315e-06, - "loss": 0.9598, - "step": 4920 - }, - { - "epoch": 0.44379311899715923, - "grad_norm": 1.5718056331670944, - "learning_rate": 2.457156747861335e-06, - "loss": 1.0069, - "step": 4921 - }, - { - "epoch": 0.44388330252062946, - "grad_norm": 1.794748524128676, - "learning_rate": 2.456587984413504e-06, - "loss": 1.0401, - "step": 4922 - }, - { - "epoch": 0.44397348604409975, - "grad_norm": 1.3582007938306857, - "learning_rate": 2.4560191820070683e-06, - "loss": 0.9094, - "step": 4923 - }, - { - "epoch": 0.44406366956757, - "grad_norm": 1.6383404766068035, - "learning_rate": 2.4554503406905617e-06, - "loss": 0.9701, - "step": 4924 - }, - { - "epoch": 0.4441538530910403, - "grad_norm": 2.476610153172994, - "learning_rate": 2.454881460512521e-06, - "loss": 1.0186, - "step": 4925 - }, - { - "epoch": 0.4442440366145105, - "grad_norm": 1.3336405946429595, - "learning_rate": 2.4543125415214856e-06, - "loss": 0.9535, - "step": 4926 - }, - { - "epoch": 0.4443342201379808, - "grad_norm": 1.5982061433089976, - "learning_rate": 2.4537435837659996e-06, - "loss": 1.0035, - "step": 4927 - }, - { - "epoch": 0.44442440366145103, - "grad_norm": 1.868736039942011, - "learning_rate": 2.4531745872946085e-06, - "loss": 0.9326, - "step": 4928 - }, - { - "epoch": 0.4445145871849213, - "grad_norm": 1.768006937690863, - "learning_rate": 2.4526055521558632e-06, - "loss": 1.0401, - "step": 4929 - }, - { - "epoch": 0.44460477070839155, - "grad_norm": 1.3147878688241001, - "learning_rate": 2.4520364783983164e-06, - "loss": 0.9883, - "step": 4930 - }, - { - "epoch": 0.44469495423186184, - "grad_norm": 1.297584753116609, - "learning_rate": 2.451467366070525e-06, - "loss": 0.9683, - "step": 4931 - }, - { - "epoch": 0.4447851377553321, - "grad_norm": 1.9181238177790871, - "learning_rate": 2.450898215221048e-06, - "loss": 1.0517, - "step": 4932 - }, - { - "epoch": 0.44487532127880236, - "grad_norm": 1.611067983262002, - "learning_rate": 2.4503290258984498e-06, - "loss": 1.1227, - "step": 4933 - }, - { - "epoch": 0.44496550480227265, - "grad_norm": 2.070070763636157, - "learning_rate": 2.4497597981512952e-06, - "loss": 0.8529, - "step": 4934 - }, - { - "epoch": 0.4450556883257429, - "grad_norm": 0.6387047248814882, - "learning_rate": 2.4491905320281555e-06, - "loss": 0.86, - "step": 4935 - }, - { - "epoch": 0.44514587184921317, - "grad_norm": 2.624792817431909, - "learning_rate": 2.448621227577602e-06, - "loss": 0.9607, - "step": 4936 - }, - { - "epoch": 0.4452360553726834, - "grad_norm": 1.56630563769575, - "learning_rate": 2.4480518848482123e-06, - "loss": 0.928, - "step": 4937 - }, - { - "epoch": 0.4453262388961537, - "grad_norm": 1.7444374370164235, - "learning_rate": 2.447482503888565e-06, - "loss": 0.9934, - "step": 4938 - }, - { - "epoch": 0.4454164224196239, - "grad_norm": 1.7733708843877978, - "learning_rate": 2.4469130847472434e-06, - "loss": 0.9573, - "step": 4939 - }, - { - "epoch": 0.4455066059430942, - "grad_norm": 1.6156568424473674, - "learning_rate": 2.4463436274728326e-06, - "loss": 1.0147, - "step": 4940 - }, - { - "epoch": 0.44559678946656445, - "grad_norm": 1.3231669612207808, - "learning_rate": 2.4457741321139227e-06, - "loss": 0.8288, - "step": 4941 - }, - { - "epoch": 0.44568697299003474, - "grad_norm": 1.4251354688533089, - "learning_rate": 2.4452045987191063e-06, - "loss": 1.0045, - "step": 4942 - }, - { - "epoch": 0.44577715651350497, - "grad_norm": 1.2737344500909322, - "learning_rate": 2.4446350273369776e-06, - "loss": 0.9537, - "step": 4943 - }, - { - "epoch": 0.44586734003697526, - "grad_norm": 1.6942509434733832, - "learning_rate": 2.4440654180161374e-06, - "loss": 1.0289, - "step": 4944 - }, - { - "epoch": 0.4459575235604455, - "grad_norm": 1.4725729516550843, - "learning_rate": 2.4434957708051875e-06, - "loss": 0.9297, - "step": 4945 - }, - { - "epoch": 0.4460477070839158, - "grad_norm": 1.5333743248864404, - "learning_rate": 2.4429260857527324e-06, - "loss": 0.923, - "step": 4946 - }, - { - "epoch": 0.446137890607386, - "grad_norm": 1.7181885235679908, - "learning_rate": 2.4423563629073815e-06, - "loss": 0.8808, - "step": 4947 - }, - { - "epoch": 0.4462280741308563, - "grad_norm": 1.2369564924337029, - "learning_rate": 2.4417866023177466e-06, - "loss": 0.9765, - "step": 4948 - }, - { - "epoch": 0.44631825765432653, - "grad_norm": 1.628632812968454, - "learning_rate": 2.441216804032443e-06, - "loss": 1.0058, - "step": 4949 - }, - { - "epoch": 0.4464084411777968, - "grad_norm": 1.4090207671861177, - "learning_rate": 2.440646968100089e-06, - "loss": 0.9956, - "step": 4950 - }, - { - "epoch": 0.44649862470126706, - "grad_norm": 0.7394087321179837, - "learning_rate": 2.4400770945693055e-06, - "loss": 0.861, - "step": 4951 - }, - { - "epoch": 0.44658880822473734, - "grad_norm": 1.6714543321391941, - "learning_rate": 2.4395071834887177e-06, - "loss": 1.0096, - "step": 4952 - }, - { - "epoch": 0.4466789917482076, - "grad_norm": 0.7134424869264476, - "learning_rate": 2.438937234906954e-06, - "loss": 0.9151, - "step": 4953 - }, - { - "epoch": 0.44676917527167787, - "grad_norm": 1.402920702566005, - "learning_rate": 2.4383672488726447e-06, - "loss": 0.9778, - "step": 4954 - }, - { - "epoch": 0.44685935879514815, - "grad_norm": 1.7515399152131705, - "learning_rate": 2.4377972254344256e-06, - "loss": 0.8988, - "step": 4955 - }, - { - "epoch": 0.4469495423186184, - "grad_norm": 3.4572573140686464, - "learning_rate": 2.437227164640932e-06, - "loss": 0.8781, - "step": 4956 - }, - { - "epoch": 0.4470397258420887, - "grad_norm": 1.40227280971279, - "learning_rate": 2.436657066540807e-06, - "loss": 0.9209, - "step": 4957 - }, - { - "epoch": 0.4471299093655589, - "grad_norm": 1.8246923971265594, - "learning_rate": 2.4360869311826927e-06, - "loss": 0.8511, - "step": 4958 - }, - { - "epoch": 0.4472200928890292, - "grad_norm": 1.6602006255558235, - "learning_rate": 2.4355167586152367e-06, - "loss": 1.0676, - "step": 4959 - }, - { - "epoch": 0.44731027641249943, - "grad_norm": 1.810310751652215, - "learning_rate": 2.4349465488870896e-06, - "loss": 0.947, - "step": 4960 - }, - { - "epoch": 0.4474004599359697, - "grad_norm": 1.4112374765196916, - "learning_rate": 2.434376302046905e-06, - "loss": 1.0583, - "step": 4961 - }, - { - "epoch": 0.44749064345943995, - "grad_norm": 1.7396758582203768, - "learning_rate": 2.433806018143339e-06, - "loss": 1.0023, - "step": 4962 - }, - { - "epoch": 0.44758082698291024, - "grad_norm": 0.666542471304438, - "learning_rate": 2.433235697225051e-06, - "loss": 0.8618, - "step": 4963 - }, - { - "epoch": 0.4476710105063805, - "grad_norm": 1.7207105378804215, - "learning_rate": 2.4326653393407048e-06, - "loss": 0.9944, - "step": 4964 - }, - { - "epoch": 0.44776119402985076, - "grad_norm": 1.7569185823824762, - "learning_rate": 2.432094944538966e-06, - "loss": 0.9444, - "step": 4965 - }, - { - "epoch": 0.447851377553321, - "grad_norm": 1.3472365015896486, - "learning_rate": 2.4315245128685047e-06, - "loss": 0.9539, - "step": 4966 - }, - { - "epoch": 0.4479415610767913, - "grad_norm": 1.7028833620333312, - "learning_rate": 2.4309540443779925e-06, - "loss": 0.9869, - "step": 4967 - }, - { - "epoch": 0.4480317446002615, - "grad_norm": 2.8795051107121146, - "learning_rate": 2.4303835391161047e-06, - "loss": 0.9122, - "step": 4968 - }, - { - "epoch": 0.4481219281237318, - "grad_norm": 1.4463937375080784, - "learning_rate": 2.42981299713152e-06, - "loss": 0.9285, - "step": 4969 - }, - { - "epoch": 0.44821211164720204, - "grad_norm": 1.7114594242636543, - "learning_rate": 2.4292424184729204e-06, - "loss": 1.0177, - "step": 4970 - }, - { - "epoch": 0.4483022951706723, - "grad_norm": 1.2433351217739412, - "learning_rate": 2.4286718031889913e-06, - "loss": 0.9662, - "step": 4971 - }, - { - "epoch": 0.44839247869414256, - "grad_norm": 1.3170885983371623, - "learning_rate": 2.4281011513284202e-06, - "loss": 0.9568, - "step": 4972 - }, - { - "epoch": 0.44848266221761285, - "grad_norm": 1.7396232996326055, - "learning_rate": 2.4275304629398985e-06, - "loss": 0.9208, - "step": 4973 - }, - { - "epoch": 0.4485728457410831, - "grad_norm": 1.6812643057629337, - "learning_rate": 2.4269597380721194e-06, - "loss": 1.0583, - "step": 4974 - }, - { - "epoch": 0.44866302926455337, - "grad_norm": 1.269463311358222, - "learning_rate": 2.426388976773782e-06, - "loss": 0.977, - "step": 4975 - }, - { - "epoch": 0.4487532127880236, - "grad_norm": 2.8818634042235636, - "learning_rate": 2.425818179093586e-06, - "loss": 1.0779, - "step": 4976 - }, - { - "epoch": 0.4488433963114939, - "grad_norm": 1.4397163719758241, - "learning_rate": 2.4252473450802346e-06, - "loss": 0.889, - "step": 4977 - }, - { - "epoch": 0.4489335798349641, - "grad_norm": 1.5926290571776214, - "learning_rate": 2.4246764747824355e-06, - "loss": 1.008, - "step": 4978 - }, - { - "epoch": 0.4490237633584344, - "grad_norm": 1.3001702967387967, - "learning_rate": 2.424105568248897e-06, - "loss": 0.9858, - "step": 4979 - }, - { - "epoch": 0.4491139468819047, - "grad_norm": 5.603674412016954, - "learning_rate": 2.4235346255283337e-06, - "loss": 0.924, - "step": 4980 - }, - { - "epoch": 0.44920413040537494, - "grad_norm": 1.9816084071269398, - "learning_rate": 2.42296364666946e-06, - "loss": 0.988, - "step": 4981 - }, - { - "epoch": 0.4492943139288452, - "grad_norm": 3.27716973334944, - "learning_rate": 2.4223926317209965e-06, - "loss": 0.928, - "step": 4982 - }, - { - "epoch": 0.44938449745231546, - "grad_norm": 1.34388328600928, - "learning_rate": 2.4218215807316647e-06, - "loss": 0.9888, - "step": 4983 - }, - { - "epoch": 0.44947468097578575, - "grad_norm": 1.469554031738989, - "learning_rate": 2.4212504937501894e-06, - "loss": 1.0602, - "step": 4984 - }, - { - "epoch": 0.449564864499256, - "grad_norm": 1.2249937290887503, - "learning_rate": 2.4206793708253e-06, - "loss": 1.0337, - "step": 4985 - }, - { - "epoch": 0.44965504802272627, - "grad_norm": 1.761470667252389, - "learning_rate": 2.420108212005726e-06, - "loss": 0.9386, - "step": 4986 - }, - { - "epoch": 0.4497452315461965, - "grad_norm": 1.630773996642392, - "learning_rate": 2.4195370173402034e-06, - "loss": 0.9812, - "step": 4987 - }, - { - "epoch": 0.4498354150696668, - "grad_norm": 1.5767717508025785, - "learning_rate": 2.4189657868774696e-06, - "loss": 1.1006, - "step": 4988 - }, - { - "epoch": 0.449925598593137, - "grad_norm": 1.6528724203429308, - "learning_rate": 2.418394520666264e-06, - "loss": 0.9111, - "step": 4989 - }, - { - "epoch": 0.4500157821166073, - "grad_norm": 1.5566989021448838, - "learning_rate": 2.4178232187553307e-06, - "loss": 0.9584, - "step": 4990 - }, - { - "epoch": 0.45010596564007754, - "grad_norm": 1.2628307343667218, - "learning_rate": 2.417251881193417e-06, - "loss": 1.0052, - "step": 4991 - }, - { - "epoch": 0.45019614916354783, - "grad_norm": 1.5840481182024237, - "learning_rate": 2.4166805080292723e-06, - "loss": 1.0385, - "step": 4992 - }, - { - "epoch": 0.45028633268701806, - "grad_norm": 1.33237719028936, - "learning_rate": 2.4161090993116485e-06, - "loss": 0.9366, - "step": 4993 - }, - { - "epoch": 0.45037651621048835, - "grad_norm": 1.9756515273304747, - "learning_rate": 2.4155376550893026e-06, - "loss": 0.8622, - "step": 4994 - }, - { - "epoch": 0.4504666997339586, - "grad_norm": 2.0943216710599493, - "learning_rate": 2.4149661754109926e-06, - "loss": 0.9926, - "step": 4995 - }, - { - "epoch": 0.4505568832574289, - "grad_norm": 1.5631729202826008, - "learning_rate": 2.41439466032548e-06, - "loss": 1.0328, - "step": 4996 - }, - { - "epoch": 0.4506470667808991, - "grad_norm": 1.6174294111128502, - "learning_rate": 2.41382310988153e-06, - "loss": 0.9213, - "step": 4997 - }, - { - "epoch": 0.4507372503043694, - "grad_norm": 1.9861899658886921, - "learning_rate": 2.413251524127911e-06, - "loss": 1.0398, - "step": 4998 - }, - { - "epoch": 0.45082743382783963, - "grad_norm": 1.7614885336258816, - "learning_rate": 2.412679903113393e-06, - "loss": 0.9444, - "step": 4999 - }, - { - "epoch": 0.4509176173513099, - "grad_norm": 21.384492963515424, - "learning_rate": 2.4121082468867505e-06, - "loss": 0.9915, - "step": 5000 - }, - { - "epoch": 0.45100780087478015, - "grad_norm": 1.7984128259313448, - "learning_rate": 2.4115365554967597e-06, - "loss": 0.9209, - "step": 5001 - }, - { - "epoch": 0.45109798439825044, - "grad_norm": 1.4582246285750398, - "learning_rate": 2.4109648289922006e-06, - "loss": 0.9904, - "step": 5002 - }, - { - "epoch": 0.45118816792172073, - "grad_norm": 1.4034351150604554, - "learning_rate": 2.4103930674218565e-06, - "loss": 0.9863, - "step": 5003 - }, - { - "epoch": 0.45127835144519096, - "grad_norm": 1.5289582989721229, - "learning_rate": 2.409821270834513e-06, - "loss": 0.957, - "step": 5004 - }, - { - "epoch": 0.45136853496866125, - "grad_norm": 1.9187347684870746, - "learning_rate": 2.409249439278959e-06, - "loss": 1.04, - "step": 5005 - }, - { - "epoch": 0.4514587184921315, - "grad_norm": 0.6151216676199682, - "learning_rate": 2.408677572803986e-06, - "loss": 0.8131, - "step": 5006 - }, - { - "epoch": 0.45154890201560177, - "grad_norm": 1.629431477494483, - "learning_rate": 2.408105671458389e-06, - "loss": 0.9881, - "step": 5007 - }, - { - "epoch": 0.451639085539072, - "grad_norm": 1.3678618593572158, - "learning_rate": 2.4075337352909663e-06, - "loss": 0.92, - "step": 5008 - }, - { - "epoch": 0.4517292690625423, - "grad_norm": 1.420945670794621, - "learning_rate": 2.4069617643505177e-06, - "loss": 1.1071, - "step": 5009 - }, - { - "epoch": 0.4518194525860125, - "grad_norm": 1.2699061852444866, - "learning_rate": 2.406389758685848e-06, - "loss": 1.0645, - "step": 5010 - }, - { - "epoch": 0.4519096361094828, - "grad_norm": 1.2528510005798497, - "learning_rate": 2.405817718345763e-06, - "loss": 1.0206, - "step": 5011 - }, - { - "epoch": 0.45199981963295305, - "grad_norm": 1.3668927992231783, - "learning_rate": 2.4052456433790726e-06, - "loss": 0.931, - "step": 5012 - }, - { - "epoch": 0.45209000315642334, - "grad_norm": 1.5303930103202208, - "learning_rate": 2.4046735338345897e-06, - "loss": 1.0341, - "step": 5013 - }, - { - "epoch": 0.45218018667989357, - "grad_norm": 1.4123210827426167, - "learning_rate": 2.404101389761129e-06, - "loss": 0.9995, - "step": 5014 - }, - { - "epoch": 0.45227037020336386, - "grad_norm": 0.812771861737179, - "learning_rate": 2.4035292112075097e-06, - "loss": 0.8121, - "step": 5015 - }, - { - "epoch": 0.4523605537268341, - "grad_norm": 1.4254045380707576, - "learning_rate": 2.4029569982225534e-06, - "loss": 1.0394, - "step": 5016 - }, - { - "epoch": 0.4524507372503044, - "grad_norm": 0.7335175723497855, - "learning_rate": 2.402384750855084e-06, - "loss": 0.8968, - "step": 5017 - }, - { - "epoch": 0.4525409207737746, - "grad_norm": 1.4067149347382581, - "learning_rate": 2.4018124691539286e-06, - "loss": 0.985, - "step": 5018 - }, - { - "epoch": 0.4526311042972449, - "grad_norm": 6.401138871905676, - "learning_rate": 2.4012401531679178e-06, - "loss": 0.968, - "step": 5019 - }, - { - "epoch": 0.45272128782071513, - "grad_norm": 1.7885396268281615, - "learning_rate": 2.4006678029458847e-06, - "loss": 0.983, - "step": 5020 - }, - { - "epoch": 0.4528114713441854, - "grad_norm": 3.874827842579764, - "learning_rate": 2.400095418536666e-06, - "loss": 0.886, - "step": 5021 - }, - { - "epoch": 0.45290165486765566, - "grad_norm": 1.5662474896653646, - "learning_rate": 2.3995229999890996e-06, - "loss": 0.959, - "step": 5022 - }, - { - "epoch": 0.45299183839112594, - "grad_norm": 1.6786812540551181, - "learning_rate": 2.398950547352028e-06, - "loss": 0.9245, - "step": 5023 - }, - { - "epoch": 0.4530820219145962, - "grad_norm": 2.136450044518853, - "learning_rate": 2.398378060674295e-06, - "loss": 0.8957, - "step": 5024 - }, - { - "epoch": 0.45317220543806647, - "grad_norm": 1.4295248970993832, - "learning_rate": 2.39780554000475e-06, - "loss": 0.9817, - "step": 5025 - }, - { - "epoch": 0.4532623889615367, - "grad_norm": 1.4131252913409358, - "learning_rate": 2.3972329853922434e-06, - "loss": 1.0098, - "step": 5026 - }, - { - "epoch": 0.453352572485007, - "grad_norm": 0.6381483119326864, - "learning_rate": 2.3966603968856278e-06, - "loss": 0.8394, - "step": 5027 - }, - { - "epoch": 0.4534427560084773, - "grad_norm": 1.4155812406197519, - "learning_rate": 2.39608777453376e-06, - "loss": 1.0582, - "step": 5028 - }, - { - "epoch": 0.4535329395319475, - "grad_norm": 3.0054946489757675, - "learning_rate": 2.3955151183854993e-06, - "loss": 1.0072, - "step": 5029 - }, - { - "epoch": 0.4536231230554178, - "grad_norm": 1.4620374322996776, - "learning_rate": 2.3949424284897073e-06, - "loss": 0.9395, - "step": 5030 - }, - { - "epoch": 0.45371330657888803, - "grad_norm": 4.639116754982291, - "learning_rate": 2.39436970489525e-06, - "loss": 0.8467, - "step": 5031 - }, - { - "epoch": 0.4538034901023583, - "grad_norm": 1.5051707314763147, - "learning_rate": 2.3937969476509955e-06, - "loss": 0.9863, - "step": 5032 - }, - { - "epoch": 0.45389367362582855, - "grad_norm": 1.3652864590861782, - "learning_rate": 2.393224156805813e-06, - "loss": 1.0439, - "step": 5033 - }, - { - "epoch": 0.45398385714929884, - "grad_norm": 1.7062369796759178, - "learning_rate": 2.392651332408578e-06, - "loss": 0.9516, - "step": 5034 - }, - { - "epoch": 0.4540740406727691, - "grad_norm": 1.5444740389009421, - "learning_rate": 2.3920784745081655e-06, - "loss": 0.9652, - "step": 5035 - }, - { - "epoch": 0.45416422419623936, - "grad_norm": 2.2148241583817145, - "learning_rate": 2.391505583153456e-06, - "loss": 1.0154, - "step": 5036 - }, - { - "epoch": 0.4542544077197096, - "grad_norm": 1.358241079665209, - "learning_rate": 2.3909326583933315e-06, - "loss": 0.9901, - "step": 5037 - }, - { - "epoch": 0.4543445912431799, - "grad_norm": 1.3896239889089232, - "learning_rate": 2.3903597002766777e-06, - "loss": 0.9445, - "step": 5038 - }, - { - "epoch": 0.4544347747666501, - "grad_norm": 1.2979434564233077, - "learning_rate": 2.389786708852381e-06, - "loss": 0.9457, - "step": 5039 - }, - { - "epoch": 0.4545249582901204, - "grad_norm": 0.8324496153170313, - "learning_rate": 2.389213684169333e-06, - "loss": 0.9937, - "step": 5040 - }, - { - "epoch": 0.45461514181359064, - "grad_norm": 1.3447499214309921, - "learning_rate": 2.388640626276428e-06, - "loss": 0.9419, - "step": 5041 - }, - { - "epoch": 0.45470532533706093, - "grad_norm": 0.74657041491995, - "learning_rate": 2.388067535222561e-06, - "loss": 0.8466, - "step": 5042 - }, - { - "epoch": 0.45479550886053116, - "grad_norm": 1.6349440537659696, - "learning_rate": 2.3874944110566332e-06, - "loss": 0.9941, - "step": 5043 - }, - { - "epoch": 0.45488569238400145, - "grad_norm": 0.7754917799454353, - "learning_rate": 2.3869212538275447e-06, - "loss": 0.968, - "step": 5044 - }, - { - "epoch": 0.4549758759074717, - "grad_norm": 2.2333624619629315, - "learning_rate": 2.386348063584202e-06, - "loss": 0.9969, - "step": 5045 - }, - { - "epoch": 0.45506605943094197, - "grad_norm": 1.5721325551461356, - "learning_rate": 2.385774840375511e-06, - "loss": 1.0306, - "step": 5046 - }, - { - "epoch": 0.4551562429544122, - "grad_norm": 1.432913553882149, - "learning_rate": 2.385201584250385e-06, - "loss": 0.9397, - "step": 5047 - }, - { - "epoch": 0.4552464264778825, - "grad_norm": 0.7006936102493441, - "learning_rate": 2.3846282952577346e-06, - "loss": 0.9045, - "step": 5048 - }, - { - "epoch": 0.4553366100013527, - "grad_norm": 1.2905492820419402, - "learning_rate": 2.3840549734464785e-06, - "loss": 1.0627, - "step": 5049 - }, - { - "epoch": 0.455426793524823, - "grad_norm": 1.6491612296507145, - "learning_rate": 2.3834816188655336e-06, - "loss": 0.9216, - "step": 5050 - }, - { - "epoch": 0.4555169770482933, - "grad_norm": 1.389495262126937, - "learning_rate": 2.3829082315638224e-06, - "loss": 0.9297, - "step": 5051 - }, - { - "epoch": 0.45560716057176354, - "grad_norm": 10.17937384098028, - "learning_rate": 2.3823348115902695e-06, - "loss": 0.8445, - "step": 5052 - }, - { - "epoch": 0.4556973440952338, - "grad_norm": 1.4878711686147903, - "learning_rate": 2.3817613589938026e-06, - "loss": 0.9943, - "step": 5053 - }, - { - "epoch": 0.45578752761870406, - "grad_norm": 1.4661599718115703, - "learning_rate": 2.3811878738233517e-06, - "loss": 1.0187, - "step": 5054 - }, - { - "epoch": 0.45587771114217435, - "grad_norm": 1.7147805586796403, - "learning_rate": 2.380614356127849e-06, - "loss": 0.926, - "step": 5055 - }, - { - "epoch": 0.4559678946656446, - "grad_norm": 1.6504043488242268, - "learning_rate": 2.3800408059562318e-06, - "loss": 0.9757, - "step": 5056 - }, - { - "epoch": 0.45605807818911487, - "grad_norm": 2.5111298768102834, - "learning_rate": 2.3794672233574365e-06, - "loss": 0.9884, - "step": 5057 - }, - { - "epoch": 0.4561482617125851, - "grad_norm": 1.8284581850740282, - "learning_rate": 2.3788936083804058e-06, - "loss": 0.9944, - "step": 5058 - }, - { - "epoch": 0.4562384452360554, - "grad_norm": 1.6678155833189554, - "learning_rate": 2.378319961074083e-06, - "loss": 0.9822, - "step": 5059 - }, - { - "epoch": 0.4563286287595256, - "grad_norm": 1.4368909291848346, - "learning_rate": 2.377746281487415e-06, - "loss": 1.0235, - "step": 5060 - }, - { - "epoch": 0.4564188122829959, - "grad_norm": 2.9325428793553576, - "learning_rate": 2.377172569669352e-06, - "loss": 1.0663, - "step": 5061 - }, - { - "epoch": 0.45650899580646614, - "grad_norm": 1.5960784806815582, - "learning_rate": 2.376598825668845e-06, - "loss": 0.8396, - "step": 5062 - }, - { - "epoch": 0.45659917932993643, - "grad_norm": 1.3995877442324014, - "learning_rate": 2.3760250495348495e-06, - "loss": 0.9702, - "step": 5063 - }, - { - "epoch": 0.45668936285340667, - "grad_norm": 1.6323946915640932, - "learning_rate": 2.3754512413163236e-06, - "loss": 1.0236, - "step": 5064 - }, - { - "epoch": 0.45677954637687695, - "grad_norm": 1.2682304408427998, - "learning_rate": 2.3748774010622285e-06, - "loss": 0.962, - "step": 5065 - }, - { - "epoch": 0.4568697299003472, - "grad_norm": 1.3673939576258052, - "learning_rate": 2.3743035288215254e-06, - "loss": 0.9929, - "step": 5066 - }, - { - "epoch": 0.4569599134238175, - "grad_norm": 1.7762065896642965, - "learning_rate": 2.3737296246431815e-06, - "loss": 0.9881, - "step": 5067 - }, - { - "epoch": 0.4570500969472877, - "grad_norm": 1.4286075536383749, - "learning_rate": 2.3731556885761656e-06, - "loss": 0.9527, - "step": 5068 - }, - { - "epoch": 0.457140280470758, - "grad_norm": 0.6377203382923818, - "learning_rate": 2.372581720669449e-06, - "loss": 0.8243, - "step": 5069 - }, - { - "epoch": 0.45723046399422823, - "grad_norm": 1.61344860769819, - "learning_rate": 2.3720077209720046e-06, - "loss": 1.01, - "step": 5070 - }, - { - "epoch": 0.4573206475176985, - "grad_norm": 0.6731346210440343, - "learning_rate": 2.3714336895328112e-06, - "loss": 0.8425, - "step": 5071 - }, - { - "epoch": 0.45741083104116875, - "grad_norm": 1.5659263427434342, - "learning_rate": 2.370859626400847e-06, - "loss": 0.94, - "step": 5072 - }, - { - "epoch": 0.45750101456463904, - "grad_norm": 1.894802679694328, - "learning_rate": 2.3702855316250943e-06, - "loss": 0.9596, - "step": 5073 - }, - { - "epoch": 0.45759119808810933, - "grad_norm": 1.5098303698318245, - "learning_rate": 2.369711405254539e-06, - "loss": 0.9821, - "step": 5074 - }, - { - "epoch": 0.45768138161157956, - "grad_norm": 1.4392478102865651, - "learning_rate": 2.3691372473381673e-06, - "loss": 1.0329, - "step": 5075 - }, - { - "epoch": 0.45777156513504985, - "grad_norm": 1.4196973620539688, - "learning_rate": 2.3685630579249708e-06, - "loss": 1.039, - "step": 5076 - }, - { - "epoch": 0.4578617486585201, - "grad_norm": 1.4199124720287009, - "learning_rate": 2.367988837063942e-06, - "loss": 0.9573, - "step": 5077 - }, - { - "epoch": 0.4579519321819904, - "grad_norm": 2.605257268784401, - "learning_rate": 2.367414584804076e-06, - "loss": 1.0536, - "step": 5078 - }, - { - "epoch": 0.4580421157054606, - "grad_norm": 1.3542304195166668, - "learning_rate": 2.366840301194372e-06, - "loss": 1.0152, - "step": 5079 - }, - { - "epoch": 0.4581322992289309, - "grad_norm": 1.5443930703514284, - "learning_rate": 2.3662659862838308e-06, - "loss": 0.9733, - "step": 5080 - }, - { - "epoch": 0.45822248275240113, - "grad_norm": 1.4374814239628564, - "learning_rate": 2.365691640121456e-06, - "loss": 1.0345, - "step": 5081 - }, - { - "epoch": 0.4583126662758714, - "grad_norm": 1.4881301763156936, - "learning_rate": 2.365117262756254e-06, - "loss": 0.9189, - "step": 5082 - }, - { - "epoch": 0.45840284979934165, - "grad_norm": 2.225780684423191, - "learning_rate": 2.3645428542372342e-06, - "loss": 0.979, - "step": 5083 - }, - { - "epoch": 0.45849303332281194, - "grad_norm": 0.6823051077608336, - "learning_rate": 2.3639684146134083e-06, - "loss": 0.7932, - "step": 5084 - }, - { - "epoch": 0.45858321684628217, - "grad_norm": 1.8591913084445812, - "learning_rate": 2.3633939439337897e-06, - "loss": 0.9467, - "step": 5085 - }, - { - "epoch": 0.45867340036975246, - "grad_norm": 1.4928995400859832, - "learning_rate": 2.362819442247396e-06, - "loss": 0.9516, - "step": 5086 - }, - { - "epoch": 0.4587635838932227, - "grad_norm": 0.634731467868552, - "learning_rate": 2.3622449096032477e-06, - "loss": 0.7952, - "step": 5087 - }, - { - "epoch": 0.458853767416693, - "grad_norm": 1.3860541671968258, - "learning_rate": 2.361670346050366e-06, - "loss": 1.0162, - "step": 5088 - }, - { - "epoch": 0.4589439509401632, - "grad_norm": 1.586151033505157, - "learning_rate": 2.3610957516377757e-06, - "loss": 0.9077, - "step": 5089 - }, - { - "epoch": 0.4590341344636335, - "grad_norm": 1.5812312611310144, - "learning_rate": 2.3605211264145048e-06, - "loss": 0.978, - "step": 5090 - }, - { - "epoch": 0.45912431798710374, - "grad_norm": 1.2031730543175676, - "learning_rate": 2.3599464704295836e-06, - "loss": 0.9509, - "step": 5091 - }, - { - "epoch": 0.459214501510574, - "grad_norm": 1.5963595101379608, - "learning_rate": 2.359371783732045e-06, - "loss": 0.8648, - "step": 5092 - }, - { - "epoch": 0.45930468503404426, - "grad_norm": 1.908629886068063, - "learning_rate": 2.358797066370924e-06, - "loss": 0.969, - "step": 5093 - }, - { - "epoch": 0.45939486855751455, - "grad_norm": 1.8951632674448438, - "learning_rate": 2.3582223183952594e-06, - "loss": 0.9868, - "step": 5094 - }, - { - "epoch": 0.4594850520809848, - "grad_norm": 3.9829716623319644, - "learning_rate": 2.357647539854091e-06, - "loss": 1.0144, - "step": 5095 - }, - { - "epoch": 0.45957523560445507, - "grad_norm": 1.5764420855598043, - "learning_rate": 2.3570727307964624e-06, - "loss": 0.9879, - "step": 5096 - }, - { - "epoch": 0.4596654191279253, - "grad_norm": 1.446270022373193, - "learning_rate": 2.35649789127142e-06, - "loss": 1.0265, - "step": 5097 - }, - { - "epoch": 0.4597556026513956, - "grad_norm": 1.5411425636139073, - "learning_rate": 2.3559230213280115e-06, - "loss": 0.9507, - "step": 5098 - }, - { - "epoch": 0.4598457861748659, - "grad_norm": 1.182930234746369, - "learning_rate": 2.3553481210152886e-06, - "loss": 0.8965, - "step": 5099 - }, - { - "epoch": 0.4599359696983361, - "grad_norm": 0.7734749910149755, - "learning_rate": 2.3547731903823043e-06, - "loss": 0.8895, - "step": 5100 - }, - { - "epoch": 0.4600261532218064, - "grad_norm": 1.2256314576954908, - "learning_rate": 2.3541982294781155e-06, - "loss": 0.952, - "step": 5101 - }, - { - "epoch": 0.46011633674527663, - "grad_norm": 1.3260062876993874, - "learning_rate": 2.3536232383517804e-06, - "loss": 0.9777, - "step": 5102 - }, - { - "epoch": 0.4602065202687469, - "grad_norm": 1.2632574849438263, - "learning_rate": 2.3530482170523602e-06, - "loss": 0.9798, - "step": 5103 - }, - { - "epoch": 0.46029670379221715, - "grad_norm": 1.370356782830572, - "learning_rate": 2.3524731656289206e-06, - "loss": 1.0709, - "step": 5104 - }, - { - "epoch": 0.46038688731568744, - "grad_norm": 1.5301735656126263, - "learning_rate": 2.351898084130526e-06, - "loss": 1.0663, - "step": 5105 - }, - { - "epoch": 0.4604770708391577, - "grad_norm": 1.8196155703814498, - "learning_rate": 2.351322972606247e-06, - "loss": 1.0433, - "step": 5106 - }, - { - "epoch": 0.46056725436262796, - "grad_norm": 1.453437771515215, - "learning_rate": 2.350747831105155e-06, - "loss": 1.1259, - "step": 5107 - }, - { - "epoch": 0.4606574378860982, - "grad_norm": 1.703315584210951, - "learning_rate": 2.350172659676323e-06, - "loss": 1.0588, - "step": 5108 - }, - { - "epoch": 0.4607476214095685, - "grad_norm": 1.5705083468706342, - "learning_rate": 2.3495974583688306e-06, - "loss": 1.0171, - "step": 5109 - }, - { - "epoch": 0.4608378049330387, - "grad_norm": 1.4108456434932186, - "learning_rate": 2.3490222272317543e-06, - "loss": 1.0508, - "step": 5110 - }, - { - "epoch": 0.460927988456509, - "grad_norm": 1.5091193667827778, - "learning_rate": 2.348446966314177e-06, - "loss": 1.0262, - "step": 5111 - }, - { - "epoch": 0.46101817197997924, - "grad_norm": 1.8190964332801018, - "learning_rate": 2.3478716756651837e-06, - "loss": 0.9625, - "step": 5112 - }, - { - "epoch": 0.46110835550344953, - "grad_norm": 1.7973882854269658, - "learning_rate": 2.347296355333861e-06, - "loss": 0.9734, - "step": 5113 - }, - { - "epoch": 0.46119853902691976, - "grad_norm": 1.388818016893119, - "learning_rate": 2.3467210053692972e-06, - "loss": 0.9869, - "step": 5114 - }, - { - "epoch": 0.46128872255039005, - "grad_norm": 1.806614891075168, - "learning_rate": 2.3461456258205866e-06, - "loss": 0.9863, - "step": 5115 - }, - { - "epoch": 0.4613789060738603, - "grad_norm": 1.6631485125898438, - "learning_rate": 2.345570216736822e-06, - "loss": 0.922, - "step": 5116 - }, - { - "epoch": 0.4614690895973306, - "grad_norm": 1.626371538457162, - "learning_rate": 2.3449947781671013e-06, - "loss": 0.9883, - "step": 5117 - }, - { - "epoch": 0.4615592731208008, - "grad_norm": 1.525934061503164, - "learning_rate": 2.3444193101605237e-06, - "loss": 1.0064, - "step": 5118 - }, - { - "epoch": 0.4616494566442711, - "grad_norm": 1.71535406144674, - "learning_rate": 2.3438438127661913e-06, - "loss": 0.9944, - "step": 5119 - }, - { - "epoch": 0.4617396401677413, - "grad_norm": 1.533905140710348, - "learning_rate": 2.3432682860332096e-06, - "loss": 0.8227, - "step": 5120 - }, - { - "epoch": 0.4618298236912116, - "grad_norm": 2.223792779434878, - "learning_rate": 2.342692730010684e-06, - "loss": 0.9305, - "step": 5121 - }, - { - "epoch": 0.4619200072146819, - "grad_norm": 2.1657826992377105, - "learning_rate": 2.342117144747726e-06, - "loss": 0.9938, - "step": 5122 - }, - { - "epoch": 0.46201019073815214, - "grad_norm": 1.6202424905158856, - "learning_rate": 2.3415415302934457e-06, - "loss": 0.974, - "step": 5123 - }, - { - "epoch": 0.4621003742616224, - "grad_norm": 1.4517928549443968, - "learning_rate": 2.340965886696959e-06, - "loss": 1.0022, - "step": 5124 - }, - { - "epoch": 0.46219055778509266, - "grad_norm": 1.466030687742154, - "learning_rate": 2.340390214007384e-06, - "loss": 0.9382, - "step": 5125 - }, - { - "epoch": 0.46228074130856295, - "grad_norm": 1.4924477554966415, - "learning_rate": 2.339814512273838e-06, - "loss": 0.867, - "step": 5126 - }, - { - "epoch": 0.4623709248320332, - "grad_norm": 1.6113025270220507, - "learning_rate": 2.3392387815454447e-06, - "loss": 1.0839, - "step": 5127 - }, - { - "epoch": 0.46246110835550347, - "grad_norm": 1.4558569545536375, - "learning_rate": 2.3386630218713273e-06, - "loss": 0.9853, - "step": 5128 - }, - { - "epoch": 0.4625512918789737, - "grad_norm": 1.512179124702142, - "learning_rate": 2.3380872333006135e-06, - "loss": 0.9993, - "step": 5129 - }, - { - "epoch": 0.462641475402444, - "grad_norm": 1.9968974367412486, - "learning_rate": 2.3375114158824335e-06, - "loss": 1.0225, - "step": 5130 - }, - { - "epoch": 0.4627316589259142, - "grad_norm": 1.486260912624943, - "learning_rate": 2.3369355696659184e-06, - "loss": 1.0194, - "step": 5131 - }, - { - "epoch": 0.4628218424493845, - "grad_norm": 1.5288286332675358, - "learning_rate": 2.336359694700202e-06, - "loss": 0.9695, - "step": 5132 - }, - { - "epoch": 0.46291202597285475, - "grad_norm": 1.473869653557013, - "learning_rate": 2.335783791034422e-06, - "loss": 0.9874, - "step": 5133 - }, - { - "epoch": 0.46300220949632503, - "grad_norm": 1.6616477818437854, - "learning_rate": 2.3352078587177173e-06, - "loss": 0.9628, - "step": 5134 - }, - { - "epoch": 0.46309239301979527, - "grad_norm": 1.4578789502665752, - "learning_rate": 2.33463189779923e-06, - "loss": 0.9861, - "step": 5135 - }, - { - "epoch": 0.46318257654326556, - "grad_norm": 1.3921375406979222, - "learning_rate": 2.334055908328104e-06, - "loss": 1.0552, - "step": 5136 - }, - { - "epoch": 0.4632727600667358, - "grad_norm": 1.4081580463024717, - "learning_rate": 2.3334798903534866e-06, - "loss": 0.9002, - "step": 5137 - }, - { - "epoch": 0.4633629435902061, - "grad_norm": 1.3337528591965522, - "learning_rate": 2.3329038439245252e-06, - "loss": 0.8814, - "step": 5138 - }, - { - "epoch": 0.4634531271136763, - "grad_norm": 1.8141977974366184, - "learning_rate": 2.3323277690903724e-06, - "loss": 0.9843, - "step": 5139 - }, - { - "epoch": 0.4635433106371466, - "grad_norm": 1.2901263384350543, - "learning_rate": 2.3317516659001827e-06, - "loss": 0.9565, - "step": 5140 - }, - { - "epoch": 0.46363349416061683, - "grad_norm": 2.1117084171269584, - "learning_rate": 2.331175534403111e-06, - "loss": 0.9936, - "step": 5141 - }, - { - "epoch": 0.4637236776840871, - "grad_norm": 1.4848339776632082, - "learning_rate": 2.3305993746483167e-06, - "loss": 1.0632, - "step": 5142 - }, - { - "epoch": 0.46381386120755735, - "grad_norm": 1.6712160327001717, - "learning_rate": 2.3300231866849606e-06, - "loss": 0.952, - "step": 5143 - }, - { - "epoch": 0.46390404473102764, - "grad_norm": 1.4567257149176016, - "learning_rate": 2.3294469705622067e-06, - "loss": 1.0168, - "step": 5144 - }, - { - "epoch": 0.4639942282544979, - "grad_norm": 1.6130250445631082, - "learning_rate": 2.3288707263292203e-06, - "loss": 0.959, - "step": 5145 - }, - { - "epoch": 0.46408441177796816, - "grad_norm": 1.2961069153524625, - "learning_rate": 2.3282944540351707e-06, - "loss": 1.0024, - "step": 5146 - }, - { - "epoch": 0.46417459530143845, - "grad_norm": 1.8967217121058715, - "learning_rate": 2.327718153729228e-06, - "loss": 0.9532, - "step": 5147 - }, - { - "epoch": 0.4642647788249087, - "grad_norm": 2.998088863712481, - "learning_rate": 2.327141825460566e-06, - "loss": 1.0269, - "step": 5148 - }, - { - "epoch": 0.464354962348379, - "grad_norm": 0.7785345187193795, - "learning_rate": 2.326565469278358e-06, - "loss": 0.8589, - "step": 5149 - }, - { - "epoch": 0.4644451458718492, - "grad_norm": 0.7454971804421602, - "learning_rate": 2.3259890852317846e-06, - "loss": 0.8516, - "step": 5150 - }, - { - "epoch": 0.4645353293953195, - "grad_norm": 1.5801452697897482, - "learning_rate": 2.3254126733700246e-06, - "loss": 0.981, - "step": 5151 - }, - { - "epoch": 0.46462551291878973, - "grad_norm": 1.7350290113546545, - "learning_rate": 2.324836233742262e-06, - "loss": 1.0078, - "step": 5152 - }, - { - "epoch": 0.46471569644226, - "grad_norm": 1.850171818358038, - "learning_rate": 2.3242597663976793e-06, - "loss": 1.0161, - "step": 5153 - }, - { - "epoch": 0.46480587996573025, - "grad_norm": 1.5428301398876203, - "learning_rate": 2.3236832713854663e-06, - "loss": 0.9585, - "step": 5154 - }, - { - "epoch": 0.46489606348920054, - "grad_norm": 1.4641648817549051, - "learning_rate": 2.323106748754812e-06, - "loss": 0.9764, - "step": 5155 - }, - { - "epoch": 0.4649862470126708, - "grad_norm": 1.2598910483681673, - "learning_rate": 2.3225301985549077e-06, - "loss": 1.0756, - "step": 5156 - }, - { - "epoch": 0.46507643053614106, - "grad_norm": 1.5959412713395633, - "learning_rate": 2.321953620834948e-06, - "loss": 0.9154, - "step": 5157 - }, - { - "epoch": 0.4651666140596113, - "grad_norm": 1.721794483773905, - "learning_rate": 2.3213770156441314e-06, - "loss": 0.9524, - "step": 5158 - }, - { - "epoch": 0.4652567975830816, - "grad_norm": 1.7709016524954961, - "learning_rate": 2.3208003830316554e-06, - "loss": 1.0282, - "step": 5159 - }, - { - "epoch": 0.4653469811065518, - "grad_norm": 1.577874569804355, - "learning_rate": 2.3202237230467215e-06, - "loss": 1.0703, - "step": 5160 - }, - { - "epoch": 0.4654371646300221, - "grad_norm": 1.3466332132288545, - "learning_rate": 2.3196470357385338e-06, - "loss": 0.9385, - "step": 5161 - }, - { - "epoch": 0.46552734815349234, - "grad_norm": 1.3809311563484667, - "learning_rate": 2.319070321156299e-06, - "loss": 0.9692, - "step": 5162 - }, - { - "epoch": 0.4656175316769626, - "grad_norm": 1.6064860515588517, - "learning_rate": 2.318493579349224e-06, - "loss": 1.1146, - "step": 5163 - }, - { - "epoch": 0.46570771520043286, - "grad_norm": 1.853062018470865, - "learning_rate": 2.317916810366522e-06, - "loss": 0.9741, - "step": 5164 - }, - { - "epoch": 0.46579789872390315, - "grad_norm": 1.4102379096313766, - "learning_rate": 2.317340014257404e-06, - "loss": 0.9235, - "step": 5165 - }, - { - "epoch": 0.4658880822473734, - "grad_norm": 1.4627063670831346, - "learning_rate": 2.316763191071086e-06, - "loss": 0.907, - "step": 5166 - }, - { - "epoch": 0.46597826577084367, - "grad_norm": 2.991973470896645, - "learning_rate": 2.316186340856787e-06, - "loss": 0.9392, - "step": 5167 - }, - { - "epoch": 0.4660684492943139, - "grad_norm": 1.8163177796835095, - "learning_rate": 2.315609463663725e-06, - "loss": 0.9254, - "step": 5168 - }, - { - "epoch": 0.4661586328177842, - "grad_norm": 1.5116107123357498, - "learning_rate": 2.315032559541123e-06, - "loss": 0.9525, - "step": 5169 - }, - { - "epoch": 0.4662488163412545, - "grad_norm": 1.806779515881614, - "learning_rate": 2.314455628538207e-06, - "loss": 1.0019, - "step": 5170 - }, - { - "epoch": 0.4663389998647247, - "grad_norm": 1.2183207220493717, - "learning_rate": 2.3138786707042023e-06, - "loss": 0.9495, - "step": 5171 - }, - { - "epoch": 0.466429183388195, - "grad_norm": 1.2762153144031148, - "learning_rate": 2.3133016860883387e-06, - "loss": 0.9907, - "step": 5172 - }, - { - "epoch": 0.46651936691166523, - "grad_norm": 1.7568450978386463, - "learning_rate": 2.3127246747398475e-06, - "loss": 0.9642, - "step": 5173 - }, - { - "epoch": 0.4666095504351355, - "grad_norm": 1.5441346218340808, - "learning_rate": 2.312147636707963e-06, - "loss": 0.9767, - "step": 5174 - }, - { - "epoch": 0.46669973395860576, - "grad_norm": 1.7592713808270442, - "learning_rate": 2.3115705720419214e-06, - "loss": 0.9695, - "step": 5175 - }, - { - "epoch": 0.46678991748207604, - "grad_norm": 1.6314370965174196, - "learning_rate": 2.31099348079096e-06, - "loss": 1.0021, - "step": 5176 - }, - { - "epoch": 0.4668801010055463, - "grad_norm": 1.1700749623501119, - "learning_rate": 2.31041636300432e-06, - "loss": 1.0164, - "step": 5177 - }, - { - "epoch": 0.46697028452901657, - "grad_norm": 1.9325547017871914, - "learning_rate": 2.3098392187312445e-06, - "loss": 1.0627, - "step": 5178 - }, - { - "epoch": 0.4670604680524868, - "grad_norm": 1.9652955766223081, - "learning_rate": 2.309262048020978e-06, - "loss": 0.9609, - "step": 5179 - }, - { - "epoch": 0.4671506515759571, - "grad_norm": 2.1026024496584736, - "learning_rate": 2.308684850922769e-06, - "loss": 1.0145, - "step": 5180 - }, - { - "epoch": 0.4672408350994273, - "grad_norm": 1.5917935049571303, - "learning_rate": 2.3081076274858664e-06, - "loss": 0.9837, - "step": 5181 - }, - { - "epoch": 0.4673310186228976, - "grad_norm": 1.6361943317499332, - "learning_rate": 2.307530377759522e-06, - "loss": 1.0158, - "step": 5182 - }, - { - "epoch": 0.46742120214636784, - "grad_norm": 3.0685298491094213, - "learning_rate": 2.30695310179299e-06, - "loss": 0.9203, - "step": 5183 - }, - { - "epoch": 0.46751138566983813, - "grad_norm": 1.192550409958956, - "learning_rate": 2.3063757996355267e-06, - "loss": 0.9707, - "step": 5184 - }, - { - "epoch": 0.46760156919330836, - "grad_norm": 1.475311556740855, - "learning_rate": 2.3057984713363903e-06, - "loss": 1.0139, - "step": 5185 - }, - { - "epoch": 0.46769175271677865, - "grad_norm": 0.6318202068369382, - "learning_rate": 2.3052211169448436e-06, - "loss": 0.833, - "step": 5186 - }, - { - "epoch": 0.4677819362402489, - "grad_norm": 1.6308098151620132, - "learning_rate": 2.3046437365101474e-06, - "loss": 0.9303, - "step": 5187 - }, - { - "epoch": 0.4678721197637192, - "grad_norm": 2.3635640124938453, - "learning_rate": 2.3040663300815673e-06, - "loss": 0.9675, - "step": 5188 - }, - { - "epoch": 0.4679623032871894, - "grad_norm": 0.6466196154981008, - "learning_rate": 2.3034888977083723e-06, - "loss": 0.8192, - "step": 5189 - }, - { - "epoch": 0.4680524868106597, - "grad_norm": 1.4455998315529233, - "learning_rate": 2.30291143943983e-06, - "loss": 0.9061, - "step": 5190 - }, - { - "epoch": 0.46814267033412993, - "grad_norm": 1.3740879415135256, - "learning_rate": 2.3023339553252145e-06, - "loss": 0.943, - "step": 5191 - }, - { - "epoch": 0.4682328538576002, - "grad_norm": 2.829360944756411, - "learning_rate": 2.301756445413799e-06, - "loss": 1.0159, - "step": 5192 - }, - { - "epoch": 0.4683230373810705, - "grad_norm": 1.3789995718437345, - "learning_rate": 2.3011789097548585e-06, - "loss": 0.9791, - "step": 5193 - }, - { - "epoch": 0.46841322090454074, - "grad_norm": 1.3639109121997455, - "learning_rate": 2.3006013483976738e-06, - "loss": 0.9741, - "step": 5194 - }, - { - "epoch": 0.468503404428011, - "grad_norm": 1.5367383109138877, - "learning_rate": 2.300023761391524e-06, - "loss": 0.9812, - "step": 5195 - }, - { - "epoch": 0.46859358795148126, - "grad_norm": 1.8732990178713238, - "learning_rate": 2.299446148785693e-06, - "loss": 1.0292, - "step": 5196 - }, - { - "epoch": 0.46868377147495155, - "grad_norm": 2.2527355942211025, - "learning_rate": 2.2988685106294654e-06, - "loss": 0.9624, - "step": 5197 - }, - { - "epoch": 0.4687739549984218, - "grad_norm": 1.6875875238397287, - "learning_rate": 2.2982908469721284e-06, - "loss": 0.8814, - "step": 5198 - }, - { - "epoch": 0.46886413852189207, - "grad_norm": 4.303428064116716, - "learning_rate": 2.2977131578629714e-06, - "loss": 1.0495, - "step": 5199 - }, - { - "epoch": 0.4689543220453623, - "grad_norm": 1.4104859872721662, - "learning_rate": 2.297135443351286e-06, - "loss": 1.007, - "step": 5200 - }, - { - "epoch": 0.4690445055688326, - "grad_norm": 1.2828685028081137, - "learning_rate": 2.296557703486367e-06, - "loss": 0.996, - "step": 5201 - }, - { - "epoch": 0.4691346890923028, - "grad_norm": 1.8581717949808827, - "learning_rate": 2.295979938317509e-06, - "loss": 1.0075, - "step": 5202 - }, - { - "epoch": 0.4692248726157731, - "grad_norm": 1.8083627112444316, - "learning_rate": 2.295402147894011e-06, - "loss": 0.9794, - "step": 5203 - }, - { - "epoch": 0.46931505613924335, - "grad_norm": 2.115578916602798, - "learning_rate": 2.2948243322651723e-06, - "loss": 0.9018, - "step": 5204 - }, - { - "epoch": 0.46940523966271364, - "grad_norm": 1.546026131807588, - "learning_rate": 2.2942464914802962e-06, - "loss": 0.9815, - "step": 5205 - }, - { - "epoch": 0.46949542318618387, - "grad_norm": 2.0124363480447376, - "learning_rate": 2.293668625588687e-06, - "loss": 0.9572, - "step": 5206 - }, - { - "epoch": 0.46958560670965416, - "grad_norm": 2.174275670075817, - "learning_rate": 2.293090734639651e-06, - "loss": 1.0177, - "step": 5207 - }, - { - "epoch": 0.4696757902331244, - "grad_norm": 1.4640756449725063, - "learning_rate": 2.2925128186824983e-06, - "loss": 1.1043, - "step": 5208 - }, - { - "epoch": 0.4697659737565947, - "grad_norm": 1.6974627662361783, - "learning_rate": 2.2919348777665384e-06, - "loss": 1.0812, - "step": 5209 - }, - { - "epoch": 0.4698561572800649, - "grad_norm": 1.6114936882271818, - "learning_rate": 2.2913569119410856e-06, - "loss": 0.9842, - "step": 5210 - }, - { - "epoch": 0.4699463408035352, - "grad_norm": 1.8658459045386473, - "learning_rate": 2.290778921255454e-06, - "loss": 1.0115, - "step": 5211 - }, - { - "epoch": 0.47003652432700543, - "grad_norm": 1.3802229346696577, - "learning_rate": 2.2902009057589613e-06, - "loss": 1.0182, - "step": 5212 - }, - { - "epoch": 0.4701267078504757, - "grad_norm": 1.2143760835855768, - "learning_rate": 2.2896228655009276e-06, - "loss": 0.942, - "step": 5213 - }, - { - "epoch": 0.47021689137394596, - "grad_norm": 1.5588090408511344, - "learning_rate": 2.289044800530674e-06, - "loss": 0.947, - "step": 5214 - }, - { - "epoch": 0.47030707489741624, - "grad_norm": 2.0334880530271624, - "learning_rate": 2.2884667108975245e-06, - "loss": 0.9525, - "step": 5215 - }, - { - "epoch": 0.4703972584208865, - "grad_norm": 2.281127926421317, - "learning_rate": 2.287888596650804e-06, - "loss": 1.0217, - "step": 5216 - }, - { - "epoch": 0.47048744194435677, - "grad_norm": 1.4983760467115752, - "learning_rate": 2.287310457839841e-06, - "loss": 0.9853, - "step": 5217 - }, - { - "epoch": 0.47057762546782705, - "grad_norm": 1.5689050240688056, - "learning_rate": 2.286732294513966e-06, - "loss": 1.0546, - "step": 5218 - }, - { - "epoch": 0.4706678089912973, - "grad_norm": 1.5579360683335264, - "learning_rate": 2.2861541067225106e-06, - "loss": 0.9911, - "step": 5219 - }, - { - "epoch": 0.4707579925147676, - "grad_norm": 1.8259878463633126, - "learning_rate": 2.2855758945148095e-06, - "loss": 0.9664, - "step": 5220 - }, - { - "epoch": 0.4708481760382378, - "grad_norm": 1.323543905509177, - "learning_rate": 2.2849976579401977e-06, - "loss": 1.0406, - "step": 5221 - }, - { - "epoch": 0.4709383595617081, - "grad_norm": 3.0398450299968394, - "learning_rate": 2.284419397048014e-06, - "loss": 0.9776, - "step": 5222 - }, - { - "epoch": 0.47102854308517833, - "grad_norm": 1.5158071506805209, - "learning_rate": 2.2838411118875997e-06, - "loss": 0.9931, - "step": 5223 - }, - { - "epoch": 0.4711187266086486, - "grad_norm": 1.511889071073808, - "learning_rate": 2.283262802508296e-06, - "loss": 0.9128, - "step": 5224 - }, - { - "epoch": 0.47120891013211885, - "grad_norm": 1.232361176895852, - "learning_rate": 2.2826844689594492e-06, - "loss": 0.9593, - "step": 5225 - }, - { - "epoch": 0.47129909365558914, - "grad_norm": 1.2861043105736356, - "learning_rate": 2.282106111290404e-06, - "loss": 1.0602, - "step": 5226 - }, - { - "epoch": 0.4713892771790594, - "grad_norm": 0.66971540466786, - "learning_rate": 2.2815277295505098e-06, - "loss": 0.8591, - "step": 5227 - }, - { - "epoch": 0.47147946070252966, - "grad_norm": 1.67110047269934, - "learning_rate": 2.2809493237891174e-06, - "loss": 0.9323, - "step": 5228 - }, - { - "epoch": 0.4715696442259999, - "grad_norm": 1.368924372813996, - "learning_rate": 2.2803708940555796e-06, - "loss": 1.0048, - "step": 5229 - }, - { - "epoch": 0.4716598277494702, - "grad_norm": 1.2205521879341186, - "learning_rate": 2.2797924403992514e-06, - "loss": 1.0573, - "step": 5230 - }, - { - "epoch": 0.4717500112729404, - "grad_norm": 1.722777693224052, - "learning_rate": 2.2792139628694892e-06, - "loss": 1.0038, - "step": 5231 - }, - { - "epoch": 0.4718401947964107, - "grad_norm": 1.486091344272117, - "learning_rate": 2.2786354615156524e-06, - "loss": 0.9485, - "step": 5232 - }, - { - "epoch": 0.47193037831988094, - "grad_norm": 3.201412133725269, - "learning_rate": 2.2780569363871016e-06, - "loss": 1.0451, - "step": 5233 - }, - { - "epoch": 0.4720205618433512, - "grad_norm": 0.6280917467263486, - "learning_rate": 2.277478387533199e-06, - "loss": 0.8246, - "step": 5234 - }, - { - "epoch": 0.47211074536682146, - "grad_norm": 1.560632810744973, - "learning_rate": 2.276899815003311e-06, - "loss": 1.0257, - "step": 5235 - }, - { - "epoch": 0.47220092889029175, - "grad_norm": 1.3951772267553328, - "learning_rate": 2.2763212188468045e-06, - "loss": 0.9717, - "step": 5236 - }, - { - "epoch": 0.472291112413762, - "grad_norm": 1.1776194350298703, - "learning_rate": 2.2757425991130473e-06, - "loss": 0.9975, - "step": 5237 - }, - { - "epoch": 0.47238129593723227, - "grad_norm": 0.8235365227183908, - "learning_rate": 2.2751639558514117e-06, - "loss": 0.8658, - "step": 5238 - }, - { - "epoch": 0.4724714794607025, - "grad_norm": 2.3546462006671325, - "learning_rate": 2.2745852891112697e-06, - "loss": 1.0235, - "step": 5239 - }, - { - "epoch": 0.4725616629841728, - "grad_norm": 1.7974900229738215, - "learning_rate": 2.274006598941997e-06, - "loss": 0.9926, - "step": 5240 - }, - { - "epoch": 0.4726518465076431, - "grad_norm": 1.7023808664772198, - "learning_rate": 2.27342788539297e-06, - "loss": 0.9668, - "step": 5241 - }, - { - "epoch": 0.4727420300311133, - "grad_norm": 1.3851754640644025, - "learning_rate": 2.2728491485135684e-06, - "loss": 0.944, - "step": 5242 - }, - { - "epoch": 0.4728322135545836, - "grad_norm": 1.332454312169297, - "learning_rate": 2.272270388353173e-06, - "loss": 1.0372, - "step": 5243 - }, - { - "epoch": 0.47292239707805384, - "grad_norm": 1.513042727742588, - "learning_rate": 2.2716916049611666e-06, - "loss": 0.9628, - "step": 5244 - }, - { - "epoch": 0.4730125806015241, - "grad_norm": 2.3458829202600087, - "learning_rate": 2.2711127983869346e-06, - "loss": 0.9688, - "step": 5245 - }, - { - "epoch": 0.47310276412499436, - "grad_norm": 1.2009881600170331, - "learning_rate": 2.270533968679864e-06, - "loss": 0.9922, - "step": 5246 - }, - { - "epoch": 0.47319294764846465, - "grad_norm": 1.4830557331254117, - "learning_rate": 2.269955115889343e-06, - "loss": 0.9897, - "step": 5247 - }, - { - "epoch": 0.4732831311719349, - "grad_norm": 1.4711865444970282, - "learning_rate": 2.269376240064763e-06, - "loss": 0.9447, - "step": 5248 - }, - { - "epoch": 0.47337331469540517, - "grad_norm": 1.5709939872825749, - "learning_rate": 2.268797341255517e-06, - "loss": 1.0008, - "step": 5249 - }, - { - "epoch": 0.4734634982188754, - "grad_norm": 1.7073134269824952, - "learning_rate": 2.268218419511e-06, - "loss": 0.9, - "step": 5250 - }, - { - "epoch": 0.4735536817423457, - "grad_norm": 1.6589027609262517, - "learning_rate": 2.267639474880608e-06, - "loss": 0.9067, - "step": 5251 - }, - { - "epoch": 0.4736438652658159, - "grad_norm": 1.410177933375644, - "learning_rate": 2.2670605074137407e-06, - "loss": 0.9591, - "step": 5252 - }, - { - "epoch": 0.4737340487892862, - "grad_norm": 1.4922621144119188, - "learning_rate": 2.2664815171597983e-06, - "loss": 0.9683, - "step": 5253 - }, - { - "epoch": 0.47382423231275644, - "grad_norm": 1.5075440637275583, - "learning_rate": 2.265902504168183e-06, - "loss": 0.9342, - "step": 5254 - }, - { - "epoch": 0.47391441583622673, - "grad_norm": 1.712453510182432, - "learning_rate": 2.2653234684883007e-06, - "loss": 0.9465, - "step": 5255 - }, - { - "epoch": 0.47400459935969697, - "grad_norm": 2.0215837979399978, - "learning_rate": 2.264744410169556e-06, - "loss": 0.9063, - "step": 5256 - }, - { - "epoch": 0.47409478288316725, - "grad_norm": 1.507288312963179, - "learning_rate": 2.264165329261359e-06, - "loss": 0.9649, - "step": 5257 - }, - { - "epoch": 0.4741849664066375, - "grad_norm": 0.6328738383011815, - "learning_rate": 2.26358622581312e-06, - "loss": 0.8371, - "step": 5258 - }, - { - "epoch": 0.4742751499301078, - "grad_norm": 1.4407571297581188, - "learning_rate": 2.2630070998742504e-06, - "loss": 1.0469, - "step": 5259 - }, - { - "epoch": 0.474365333453578, - "grad_norm": 2.740093942688517, - "learning_rate": 2.262427951494165e-06, - "loss": 1.0039, - "step": 5260 - }, - { - "epoch": 0.4744555169770483, - "grad_norm": 0.6814872109982668, - "learning_rate": 2.2618487807222794e-06, - "loss": 0.8734, - "step": 5261 - }, - { - "epoch": 0.47454570050051853, - "grad_norm": 1.4595698518572386, - "learning_rate": 2.261269587608012e-06, - "loss": 1.0643, - "step": 5262 - }, - { - "epoch": 0.4746358840239888, - "grad_norm": 1.917403563221726, - "learning_rate": 2.260690372200783e-06, - "loss": 0.9828, - "step": 5263 - }, - { - "epoch": 0.47472606754745905, - "grad_norm": 1.2541637215732877, - "learning_rate": 2.2601111345500138e-06, - "loss": 1.0664, - "step": 5264 - }, - { - "epoch": 0.47481625107092934, - "grad_norm": 1.739123331190197, - "learning_rate": 2.2595318747051286e-06, - "loss": 0.9718, - "step": 5265 - }, - { - "epoch": 0.47490643459439963, - "grad_norm": 1.9103225604403482, - "learning_rate": 2.258952592715553e-06, - "loss": 1.0116, - "step": 5266 - }, - { - "epoch": 0.47499661811786986, - "grad_norm": 0.7380415915991292, - "learning_rate": 2.2583732886307142e-06, - "loss": 0.888, - "step": 5267 - }, - { - "epoch": 0.47508680164134015, - "grad_norm": 1.4915212372084083, - "learning_rate": 2.2577939625000414e-06, - "loss": 0.9369, - "step": 5268 - }, - { - "epoch": 0.4751769851648104, - "grad_norm": 0.7208862485555373, - "learning_rate": 2.257214614372967e-06, - "loss": 0.8448, - "step": 5269 - }, - { - "epoch": 0.4752671686882807, - "grad_norm": 4.938999576041062, - "learning_rate": 2.2566352442989227e-06, - "loss": 1.0085, - "step": 5270 - }, - { - "epoch": 0.4753573522117509, - "grad_norm": 1.4544931349502168, - "learning_rate": 2.256055852327344e-06, - "loss": 0.9823, - "step": 5271 - }, - { - "epoch": 0.4754475357352212, - "grad_norm": 1.5037034727843521, - "learning_rate": 2.2554764385076685e-06, - "loss": 0.9236, - "step": 5272 - }, - { - "epoch": 0.4755377192586914, - "grad_norm": 1.3521126305570756, - "learning_rate": 2.2548970028893348e-06, - "loss": 0.9979, - "step": 5273 - }, - { - "epoch": 0.4756279027821617, - "grad_norm": 1.665658606213471, - "learning_rate": 2.254317545521783e-06, - "loss": 1.0031, - "step": 5274 - }, - { - "epoch": 0.47571808630563195, - "grad_norm": 1.4704706380611285, - "learning_rate": 2.253738066454457e-06, - "loss": 1.0136, - "step": 5275 - }, - { - "epoch": 0.47580826982910224, - "grad_norm": 1.3298075172419532, - "learning_rate": 2.2531585657367986e-06, - "loss": 0.9537, - "step": 5276 - }, - { - "epoch": 0.47589845335257247, - "grad_norm": 1.6753883167908223, - "learning_rate": 2.252579043418256e-06, - "loss": 0.9318, - "step": 5277 - }, - { - "epoch": 0.47598863687604276, - "grad_norm": 1.5892876629641421, - "learning_rate": 2.251999499548277e-06, - "loss": 0.9593, - "step": 5278 - }, - { - "epoch": 0.476078820399513, - "grad_norm": 1.687975145486928, - "learning_rate": 2.251419934176311e-06, - "loss": 0.9444, - "step": 5279 - }, - { - "epoch": 0.4761690039229833, - "grad_norm": 1.8521008574003945, - "learning_rate": 2.25084034735181e-06, - "loss": 1.0345, - "step": 5280 - }, - { - "epoch": 0.4762591874464535, - "grad_norm": 1.3972151118216185, - "learning_rate": 2.2502607391242274e-06, - "loss": 1.0336, - "step": 5281 - }, - { - "epoch": 0.4763493709699238, - "grad_norm": 0.6570482167923349, - "learning_rate": 2.2496811095430182e-06, - "loss": 0.8577, - "step": 5282 - }, - { - "epoch": 0.47643955449339404, - "grad_norm": 1.7331165181174935, - "learning_rate": 2.249101458657641e-06, - "loss": 0.9785, - "step": 5283 - }, - { - "epoch": 0.4765297380168643, - "grad_norm": 1.8193219809373093, - "learning_rate": 2.2485217865175526e-06, - "loss": 1.0541, - "step": 5284 - }, - { - "epoch": 0.47661992154033456, - "grad_norm": 1.4704622879510563, - "learning_rate": 2.2479420931722156e-06, - "loss": 1.1161, - "step": 5285 - }, - { - "epoch": 0.47671010506380485, - "grad_norm": 0.6146594312937431, - "learning_rate": 2.2473623786710923e-06, - "loss": 0.8709, - "step": 5286 - }, - { - "epoch": 0.4768002885872751, - "grad_norm": 1.622221699080245, - "learning_rate": 2.2467826430636465e-06, - "loss": 0.9035, - "step": 5287 - }, - { - "epoch": 0.47689047211074537, - "grad_norm": 1.2605958080930764, - "learning_rate": 2.246202886399345e-06, - "loss": 1.0083, - "step": 5288 - }, - { - "epoch": 0.47698065563421566, - "grad_norm": 1.4928676792589084, - "learning_rate": 2.2456231087276556e-06, - "loss": 0.9875, - "step": 5289 - }, - { - "epoch": 0.4770708391576859, - "grad_norm": 1.3162363548332299, - "learning_rate": 2.245043310098048e-06, - "loss": 0.9112, - "step": 5290 - }, - { - "epoch": 0.4771610226811562, - "grad_norm": 1.8016365982261437, - "learning_rate": 2.244463490559995e-06, - "loss": 1.0236, - "step": 5291 - }, - { - "epoch": 0.4772512062046264, - "grad_norm": 1.849387650054953, - "learning_rate": 2.2438836501629683e-06, - "loss": 0.9944, - "step": 5292 - }, - { - "epoch": 0.4773413897280967, - "grad_norm": 1.1927523653434307, - "learning_rate": 2.2433037889564437e-06, - "loss": 0.9889, - "step": 5293 - }, - { - "epoch": 0.47743157325156693, - "grad_norm": 8.540653174835754, - "learning_rate": 2.242723906989899e-06, - "loss": 0.9193, - "step": 5294 - }, - { - "epoch": 0.4775217567750372, - "grad_norm": 1.3426642356163627, - "learning_rate": 2.2421440043128114e-06, - "loss": 0.9109, - "step": 5295 - }, - { - "epoch": 0.47761194029850745, - "grad_norm": 1.5295052222031489, - "learning_rate": 2.241564080974662e-06, - "loss": 0.8946, - "step": 5296 - }, - { - "epoch": 0.47770212382197774, - "grad_norm": 1.8688805218792919, - "learning_rate": 2.2409841370249343e-06, - "loss": 0.9341, - "step": 5297 - }, - { - "epoch": 0.477792307345448, - "grad_norm": 2.294771957370542, - "learning_rate": 2.2404041725131106e-06, - "loss": 1.0774, - "step": 5298 - }, - { - "epoch": 0.47788249086891826, - "grad_norm": 1.6334903168007415, - "learning_rate": 2.239824187488677e-06, - "loss": 0.9636, - "step": 5299 - }, - { - "epoch": 0.4779726743923885, - "grad_norm": 0.7409919344865922, - "learning_rate": 2.239244182001122e-06, - "loss": 0.9026, - "step": 5300 - }, - { - "epoch": 0.4780628579158588, - "grad_norm": 1.3516876515563776, - "learning_rate": 2.2386641560999336e-06, - "loss": 0.999, - "step": 5301 - }, - { - "epoch": 0.478153041439329, - "grad_norm": 2.145117952175954, - "learning_rate": 2.238084109834604e-06, - "loss": 1.0163, - "step": 5302 - }, - { - "epoch": 0.4782432249627993, - "grad_norm": 1.693737722542384, - "learning_rate": 2.237504043254625e-06, - "loss": 0.9827, - "step": 5303 - }, - { - "epoch": 0.47833340848626954, - "grad_norm": 1.3737444780844452, - "learning_rate": 2.2369239564094915e-06, - "loss": 0.9566, - "step": 5304 - }, - { - "epoch": 0.47842359200973983, - "grad_norm": 1.8523889942347191, - "learning_rate": 2.2363438493486995e-06, - "loss": 0.9061, - "step": 5305 - }, - { - "epoch": 0.47851377553321006, - "grad_norm": 1.5654265652992492, - "learning_rate": 2.235763722121747e-06, - "loss": 0.8657, - "step": 5306 - }, - { - "epoch": 0.47860395905668035, - "grad_norm": 1.3962145991605945, - "learning_rate": 2.2351835747781346e-06, - "loss": 1.006, - "step": 5307 - }, - { - "epoch": 0.4786941425801506, - "grad_norm": 1.5547657855849262, - "learning_rate": 2.234603407367362e-06, - "loss": 1.0223, - "step": 5308 - }, - { - "epoch": 0.47878432610362087, - "grad_norm": 1.5204392885069105, - "learning_rate": 2.2340232199389337e-06, - "loss": 1.0015, - "step": 5309 - }, - { - "epoch": 0.4788745096270911, - "grad_norm": 1.3834241818083612, - "learning_rate": 2.2334430125423538e-06, - "loss": 0.8638, - "step": 5310 - }, - { - "epoch": 0.4789646931505614, - "grad_norm": 9.355017767052876, - "learning_rate": 2.232862785227128e-06, - "loss": 1.0318, - "step": 5311 - }, - { - "epoch": 0.4790548766740317, - "grad_norm": 2.198068863599586, - "learning_rate": 2.232282538042766e-06, - "loss": 1.013, - "step": 5312 - }, - { - "epoch": 0.4791450601975019, - "grad_norm": 1.7107719445800789, - "learning_rate": 2.231702271038777e-06, - "loss": 1.0212, - "step": 5313 - }, - { - "epoch": 0.4792352437209722, - "grad_norm": 1.6665269395749291, - "learning_rate": 2.231121984264673e-06, - "loss": 0.964, - "step": 5314 - }, - { - "epoch": 0.47932542724444244, - "grad_norm": 1.5632424688604691, - "learning_rate": 2.2305416777699665e-06, - "loss": 1.007, - "step": 5315 - }, - { - "epoch": 0.4794156107679127, - "grad_norm": 1.473834065117734, - "learning_rate": 2.229961351604173e-06, - "loss": 0.9877, - "step": 5316 - }, - { - "epoch": 0.47950579429138296, - "grad_norm": 1.5373305436699318, - "learning_rate": 2.2293810058168085e-06, - "loss": 0.9396, - "step": 5317 - }, - { - "epoch": 0.47959597781485325, - "grad_norm": 1.4018043181874398, - "learning_rate": 2.2288006404573922e-06, - "loss": 1.0005, - "step": 5318 - }, - { - "epoch": 0.4796861613383235, - "grad_norm": 1.5662075306705137, - "learning_rate": 2.228220255575444e-06, - "loss": 0.9711, - "step": 5319 - }, - { - "epoch": 0.47977634486179377, - "grad_norm": 1.3406644926500897, - "learning_rate": 2.2276398512204847e-06, - "loss": 0.9333, - "step": 5320 - }, - { - "epoch": 0.479866528385264, - "grad_norm": 1.6315007393426415, - "learning_rate": 2.2270594274420382e-06, - "loss": 0.9875, - "step": 5321 - }, - { - "epoch": 0.4799567119087343, - "grad_norm": 0.6858364265375231, - "learning_rate": 2.22647898428963e-06, - "loss": 0.8695, - "step": 5322 - }, - { - "epoch": 0.4800468954322045, - "grad_norm": 1.4315660152796013, - "learning_rate": 2.225898521812785e-06, - "loss": 1.0296, - "step": 5323 - }, - { - "epoch": 0.4801370789556748, - "grad_norm": 1.5843063760977913, - "learning_rate": 2.2253180400610337e-06, - "loss": 1.032, - "step": 5324 - }, - { - "epoch": 0.48022726247914505, - "grad_norm": 2.0145377842021905, - "learning_rate": 2.2247375390839037e-06, - "loss": 0.9563, - "step": 5325 - }, - { - "epoch": 0.48031744600261533, - "grad_norm": 1.8865197750079303, - "learning_rate": 2.224157018930928e-06, - "loss": 0.9504, - "step": 5326 - }, - { - "epoch": 0.48040762952608557, - "grad_norm": 1.4428707632719588, - "learning_rate": 2.2235764796516395e-06, - "loss": 0.9846, - "step": 5327 - }, - { - "epoch": 0.48049781304955586, - "grad_norm": 1.6462809843190331, - "learning_rate": 2.222995921295573e-06, - "loss": 0.9858, - "step": 5328 - }, - { - "epoch": 0.4805879965730261, - "grad_norm": 1.5853429722879258, - "learning_rate": 2.222415343912265e-06, - "loss": 0.9547, - "step": 5329 - }, - { - "epoch": 0.4806781800964964, - "grad_norm": 1.9614485724380684, - "learning_rate": 2.221834747551254e-06, - "loss": 1.0469, - "step": 5330 - }, - { - "epoch": 0.4807683636199666, - "grad_norm": 1.7542350477842916, - "learning_rate": 2.221254132262078e-06, - "loss": 0.9556, - "step": 5331 - }, - { - "epoch": 0.4808585471434369, - "grad_norm": 1.6455386014599152, - "learning_rate": 2.2206734980942802e-06, - "loss": 1.0261, - "step": 5332 - }, - { - "epoch": 0.48094873066690713, - "grad_norm": 1.5925535311003414, - "learning_rate": 2.2200928450974024e-06, - "loss": 0.9595, - "step": 5333 - }, - { - "epoch": 0.4810389141903774, - "grad_norm": 1.4519899098332076, - "learning_rate": 2.21951217332099e-06, - "loss": 1.0027, - "step": 5334 - }, - { - "epoch": 0.48112909771384765, - "grad_norm": 1.9413891797303477, - "learning_rate": 2.2189314828145883e-06, - "loss": 0.9027, - "step": 5335 - }, - { - "epoch": 0.48121928123731794, - "grad_norm": 2.1207170520774157, - "learning_rate": 2.2183507736277453e-06, - "loss": 0.9705, - "step": 5336 - }, - { - "epoch": 0.48130946476078823, - "grad_norm": 1.3611771925414171, - "learning_rate": 2.2177700458100107e-06, - "loss": 0.9942, - "step": 5337 - }, - { - "epoch": 0.48139964828425846, - "grad_norm": 1.5956130264974553, - "learning_rate": 2.2171892994109346e-06, - "loss": 0.9653, - "step": 5338 - }, - { - "epoch": 0.48148983180772875, - "grad_norm": 1.7544268067106648, - "learning_rate": 2.21660853448007e-06, - "loss": 1.0429, - "step": 5339 - }, - { - "epoch": 0.481580015331199, - "grad_norm": 1.6325462160680986, - "learning_rate": 2.2160277510669703e-06, - "loss": 0.942, - "step": 5340 - }, - { - "epoch": 0.4816701988546693, - "grad_norm": 1.242039317763378, - "learning_rate": 2.215446949221193e-06, - "loss": 0.98, - "step": 5341 - }, - { - "epoch": 0.4817603823781395, - "grad_norm": 1.8044378595135453, - "learning_rate": 2.2148661289922924e-06, - "loss": 0.9894, - "step": 5342 - }, - { - "epoch": 0.4818505659016098, - "grad_norm": 1.6817319271454274, - "learning_rate": 2.21428529042983e-06, - "loss": 1.0357, - "step": 5343 - }, - { - "epoch": 0.48194074942508003, - "grad_norm": 1.76173032092843, - "learning_rate": 2.2137044335833647e-06, - "loss": 1.037, - "step": 5344 - }, - { - "epoch": 0.4820309329485503, - "grad_norm": 1.5131041021845957, - "learning_rate": 2.213123558502459e-06, - "loss": 0.9686, - "step": 5345 - }, - { - "epoch": 0.48212111647202055, - "grad_norm": 1.573119127325494, - "learning_rate": 2.2125426652366763e-06, - "loss": 0.9973, - "step": 5346 - }, - { - "epoch": 0.48221129999549084, - "grad_norm": 1.444066433268649, - "learning_rate": 2.211961753835581e-06, - "loss": 1.0783, - "step": 5347 - }, - { - "epoch": 0.48230148351896107, - "grad_norm": 1.5061763919557007, - "learning_rate": 2.21138082434874e-06, - "loss": 0.976, - "step": 5348 - }, - { - "epoch": 0.48239166704243136, - "grad_norm": 2.0936996681654403, - "learning_rate": 2.210799876825722e-06, - "loss": 1.0188, - "step": 5349 - }, - { - "epoch": 0.4824818505659016, - "grad_norm": 1.6112644992173037, - "learning_rate": 2.210218911316096e-06, - "loss": 0.9526, - "step": 5350 - }, - { - "epoch": 0.4825720340893719, - "grad_norm": 2.9161935331696336, - "learning_rate": 2.2096379278694336e-06, - "loss": 0.9382, - "step": 5351 - }, - { - "epoch": 0.4826622176128421, - "grad_norm": 1.5691648626224293, - "learning_rate": 2.2090569265353074e-06, - "loss": 0.9241, - "step": 5352 - }, - { - "epoch": 0.4827524011363124, - "grad_norm": 1.8261468732621755, - "learning_rate": 2.2084759073632912e-06, - "loss": 0.9777, - "step": 5353 - }, - { - "epoch": 0.48284258465978264, - "grad_norm": 1.577998316750638, - "learning_rate": 2.2078948704029606e-06, - "loss": 0.9453, - "step": 5354 - }, - { - "epoch": 0.4829327681832529, - "grad_norm": 1.5870726333034468, - "learning_rate": 2.2073138157038935e-06, - "loss": 0.9016, - "step": 5355 - }, - { - "epoch": 0.48302295170672316, - "grad_norm": 1.4365520668937157, - "learning_rate": 2.2067327433156687e-06, - "loss": 0.9735, - "step": 5356 - }, - { - "epoch": 0.48311313523019345, - "grad_norm": 1.8902142291287336, - "learning_rate": 2.2061516532878667e-06, - "loss": 1.0041, - "step": 5357 - }, - { - "epoch": 0.4832033187536637, - "grad_norm": 1.3028213952324263, - "learning_rate": 2.2055705456700686e-06, - "loss": 1.0281, - "step": 5358 - }, - { - "epoch": 0.48329350227713397, - "grad_norm": 1.5344021330569002, - "learning_rate": 2.204989420511858e-06, - "loss": 0.8829, - "step": 5359 - }, - { - "epoch": 0.48338368580060426, - "grad_norm": 1.3916846534952414, - "learning_rate": 2.20440827786282e-06, - "loss": 0.9276, - "step": 5360 - }, - { - "epoch": 0.4834738693240745, - "grad_norm": 1.5471706685795892, - "learning_rate": 2.20382711777254e-06, - "loss": 0.9844, - "step": 5361 - }, - { - "epoch": 0.4835640528475448, - "grad_norm": 1.720414067041058, - "learning_rate": 2.203245940290607e-06, - "loss": 0.9916, - "step": 5362 - }, - { - "epoch": 0.483654236371015, - "grad_norm": 2.1876535634226704, - "learning_rate": 2.2026647454666097e-06, - "loss": 0.9797, - "step": 5363 - }, - { - "epoch": 0.4837444198944853, - "grad_norm": 1.9241671655565322, - "learning_rate": 2.2020835333501384e-06, - "loss": 1.0158, - "step": 5364 - }, - { - "epoch": 0.48383460341795553, - "grad_norm": 1.396221856471386, - "learning_rate": 2.2015023039907863e-06, - "loss": 1.0368, - "step": 5365 - }, - { - "epoch": 0.4839247869414258, - "grad_norm": 1.3886186077133214, - "learning_rate": 2.2009210574381464e-06, - "loss": 0.9656, - "step": 5366 - }, - { - "epoch": 0.48401497046489605, - "grad_norm": 1.4454325033320798, - "learning_rate": 2.2003397937418134e-06, - "loss": 0.9328, - "step": 5367 - }, - { - "epoch": 0.48410515398836634, - "grad_norm": 0.6805993901838809, - "learning_rate": 2.1997585129513852e-06, - "loss": 0.87, - "step": 5368 - }, - { - "epoch": 0.4841953375118366, - "grad_norm": 1.4414322186536903, - "learning_rate": 2.1991772151164595e-06, - "loss": 1.0108, - "step": 5369 - }, - { - "epoch": 0.48428552103530687, - "grad_norm": 1.2870556092211876, - "learning_rate": 2.1985959002866346e-06, - "loss": 1.0269, - "step": 5370 - }, - { - "epoch": 0.4843757045587771, - "grad_norm": 1.8576068966528052, - "learning_rate": 2.198014568511513e-06, - "loss": 1.0485, - "step": 5371 - }, - { - "epoch": 0.4844658880822474, - "grad_norm": 0.6810763408958964, - "learning_rate": 2.1974332198406965e-06, - "loss": 0.8489, - "step": 5372 - }, - { - "epoch": 0.4845560716057176, - "grad_norm": 2.3908929612320775, - "learning_rate": 2.196851854323789e-06, - "loss": 0.9612, - "step": 5373 - }, - { - "epoch": 0.4846462551291879, - "grad_norm": 1.7158649853452805, - "learning_rate": 2.196270472010396e-06, - "loss": 0.9614, - "step": 5374 - }, - { - "epoch": 0.48473643865265814, - "grad_norm": 1.7730473160559321, - "learning_rate": 2.195689072950124e-06, - "loss": 0.9807, - "step": 5375 - }, - { - "epoch": 0.48482662217612843, - "grad_norm": 0.5898736605102567, - "learning_rate": 2.195107657192581e-06, - "loss": 0.8167, - "step": 5376 - }, - { - "epoch": 0.48491680569959866, - "grad_norm": 0.6155601244596033, - "learning_rate": 2.194526224787378e-06, - "loss": 0.7867, - "step": 5377 - }, - { - "epoch": 0.48500698922306895, - "grad_norm": 1.5274954718040366, - "learning_rate": 2.1939447757841236e-06, - "loss": 0.9499, - "step": 5378 - }, - { - "epoch": 0.4850971727465392, - "grad_norm": 1.6400974697130966, - "learning_rate": 2.193363310232432e-06, - "loss": 1.0446, - "step": 5379 - }, - { - "epoch": 0.4851873562700095, - "grad_norm": 1.220956321397443, - "learning_rate": 2.192781828181917e-06, - "loss": 1.0862, - "step": 5380 - }, - { - "epoch": 0.4852775397934797, - "grad_norm": 1.2494897754765286, - "learning_rate": 2.192200329682193e-06, - "loss": 1.0045, - "step": 5381 - }, - { - "epoch": 0.48536772331695, - "grad_norm": 1.382389887654993, - "learning_rate": 2.1916188147828767e-06, - "loss": 0.9847, - "step": 5382 - }, - { - "epoch": 0.48545790684042023, - "grad_norm": 1.400227684852535, - "learning_rate": 2.191037283533587e-06, - "loss": 0.9955, - "step": 5383 - }, - { - "epoch": 0.4855480903638905, - "grad_norm": 2.3958180192098593, - "learning_rate": 2.1904557359839428e-06, - "loss": 0.9846, - "step": 5384 - }, - { - "epoch": 0.4856382738873608, - "grad_norm": 1.7036026451104636, - "learning_rate": 2.189874172183565e-06, - "loss": 0.9652, - "step": 5385 - }, - { - "epoch": 0.48572845741083104, - "grad_norm": 1.613517910130044, - "learning_rate": 2.1892925921820763e-06, - "loss": 0.9486, - "step": 5386 - }, - { - "epoch": 0.4858186409343013, - "grad_norm": 1.7259304218744547, - "learning_rate": 2.1887109960290994e-06, - "loss": 0.9765, - "step": 5387 - }, - { - "epoch": 0.48590882445777156, - "grad_norm": 1.414395372190931, - "learning_rate": 2.18812938377426e-06, - "loss": 1.0758, - "step": 5388 - }, - { - "epoch": 0.48599900798124185, - "grad_norm": 1.3483668568518112, - "learning_rate": 2.187547755467184e-06, - "loss": 1.0256, - "step": 5389 - }, - { - "epoch": 0.4860891915047121, - "grad_norm": 1.427011873261338, - "learning_rate": 2.1869661111574994e-06, - "loss": 0.8774, - "step": 5390 - }, - { - "epoch": 0.48617937502818237, - "grad_norm": 6.290014121520379, - "learning_rate": 2.1863844508948353e-06, - "loss": 1.0322, - "step": 5391 - }, - { - "epoch": 0.4862695585516526, - "grad_norm": 1.2415080101409253, - "learning_rate": 2.185802774728823e-06, - "loss": 0.9955, - "step": 5392 - }, - { - "epoch": 0.4863597420751229, - "grad_norm": 1.2782989570110286, - "learning_rate": 2.1852210827090927e-06, - "loss": 0.9357, - "step": 5393 - }, - { - "epoch": 0.4864499255985931, - "grad_norm": 1.3111177613786549, - "learning_rate": 2.184639374885278e-06, - "loss": 0.9223, - "step": 5394 - }, - { - "epoch": 0.4865401091220634, - "grad_norm": 11.236380577130747, - "learning_rate": 2.184057651307014e-06, - "loss": 1.0829, - "step": 5395 - }, - { - "epoch": 0.48663029264553365, - "grad_norm": 1.7271386475519073, - "learning_rate": 2.183475912023937e-06, - "loss": 1.0509, - "step": 5396 - }, - { - "epoch": 0.48672047616900393, - "grad_norm": 1.3760850266645415, - "learning_rate": 2.1828941570856826e-06, - "loss": 0.9914, - "step": 5397 - }, - { - "epoch": 0.48681065969247417, - "grad_norm": 0.6805277486773175, - "learning_rate": 2.1823123865418903e-06, - "loss": 0.8593, - "step": 5398 - }, - { - "epoch": 0.48690084321594446, - "grad_norm": 1.7096309463275958, - "learning_rate": 2.1817306004422e-06, - "loss": 0.9509, - "step": 5399 - }, - { - "epoch": 0.4869910267394147, - "grad_norm": 1.457519613039152, - "learning_rate": 2.1811487988362527e-06, - "loss": 1.0132, - "step": 5400 - }, - { - "epoch": 0.487081210262885, - "grad_norm": 1.803948056879395, - "learning_rate": 2.1805669817736917e-06, - "loss": 1.0513, - "step": 5401 - }, - { - "epoch": 0.4871713937863552, - "grad_norm": 1.5629694424190739, - "learning_rate": 2.17998514930416e-06, - "loss": 0.7998, - "step": 5402 - }, - { - "epoch": 0.4872615773098255, - "grad_norm": 1.690499924071614, - "learning_rate": 2.1794033014773025e-06, - "loss": 0.9835, - "step": 5403 - }, - { - "epoch": 0.48735176083329573, - "grad_norm": 1.2995715315514174, - "learning_rate": 2.178821438342766e-06, - "loss": 1.0507, - "step": 5404 - }, - { - "epoch": 0.487441944356766, - "grad_norm": 3.277793444220681, - "learning_rate": 2.1782395599501996e-06, - "loss": 0.973, - "step": 5405 - }, - { - "epoch": 0.48753212788023625, - "grad_norm": 0.6939790777081329, - "learning_rate": 2.1776576663492498e-06, - "loss": 0.8544, - "step": 5406 - }, - { - "epoch": 0.48762231140370654, - "grad_norm": 2.556775840639884, - "learning_rate": 2.177075757589569e-06, - "loss": 1.0654, - "step": 5407 - }, - { - "epoch": 0.48771249492717683, - "grad_norm": 1.2915920061663286, - "learning_rate": 2.176493833720808e-06, - "loss": 0.9032, - "step": 5408 - }, - { - "epoch": 0.48780267845064706, - "grad_norm": 1.4925947182917771, - "learning_rate": 2.1759118947926195e-06, - "loss": 0.9482, - "step": 5409 - }, - { - "epoch": 0.48789286197411735, - "grad_norm": 1.353854534660505, - "learning_rate": 2.1753299408546587e-06, - "loss": 0.9922, - "step": 5410 - }, - { - "epoch": 0.4879830454975876, - "grad_norm": 1.1629773400740429, - "learning_rate": 2.1747479719565803e-06, - "loss": 0.9902, - "step": 5411 - }, - { - "epoch": 0.4880732290210579, - "grad_norm": 1.5008770444958848, - "learning_rate": 2.174165988148042e-06, - "loss": 1.0433, - "step": 5412 - }, - { - "epoch": 0.4881634125445281, - "grad_norm": 1.4471915737054402, - "learning_rate": 2.1735839894787003e-06, - "loss": 0.9543, - "step": 5413 - }, - { - "epoch": 0.4882535960679984, - "grad_norm": 0.7730766379326296, - "learning_rate": 2.1730019759982163e-06, - "loss": 0.8984, - "step": 5414 - }, - { - "epoch": 0.48834377959146863, - "grad_norm": 1.3512096495695418, - "learning_rate": 2.172419947756249e-06, - "loss": 1.0357, - "step": 5415 - }, - { - "epoch": 0.4884339631149389, - "grad_norm": 1.454245842615395, - "learning_rate": 2.171837904802461e-06, - "loss": 1.0265, - "step": 5416 - }, - { - "epoch": 0.48852414663840915, - "grad_norm": 1.5045224203186796, - "learning_rate": 2.171255847186516e-06, - "loss": 0.9705, - "step": 5417 - }, - { - "epoch": 0.48861433016187944, - "grad_norm": 1.5087777640404227, - "learning_rate": 2.1706737749580783e-06, - "loss": 0.9644, - "step": 5418 - }, - { - "epoch": 0.4887045136853497, - "grad_norm": 2.1916609291417357, - "learning_rate": 2.1700916881668127e-06, - "loss": 1.0547, - "step": 5419 - }, - { - "epoch": 0.48879469720881996, - "grad_norm": 1.3939601038473677, - "learning_rate": 2.1695095868623862e-06, - "loss": 1.0653, - "step": 5420 - }, - { - "epoch": 0.4888848807322902, - "grad_norm": 1.280192753092, - "learning_rate": 2.168927471094467e-06, - "loss": 0.9233, - "step": 5421 - }, - { - "epoch": 0.4889750642557605, - "grad_norm": 1.892895556777285, - "learning_rate": 2.168345340912725e-06, - "loss": 0.9998, - "step": 5422 - }, - { - "epoch": 0.4890652477792307, - "grad_norm": 1.340279822383676, - "learning_rate": 2.1677631963668298e-06, - "loss": 0.9971, - "step": 5423 - }, - { - "epoch": 0.489155431302701, - "grad_norm": 1.424593245025605, - "learning_rate": 2.167181037506453e-06, - "loss": 0.9361, - "step": 5424 - }, - { - "epoch": 0.48924561482617124, - "grad_norm": 1.3152448018733929, - "learning_rate": 2.1665988643812693e-06, - "loss": 0.9202, - "step": 5425 - }, - { - "epoch": 0.4893357983496415, - "grad_norm": 1.4842949293525312, - "learning_rate": 2.166016677040951e-06, - "loss": 0.9766, - "step": 5426 - }, - { - "epoch": 0.48942598187311176, - "grad_norm": 1.307103142183403, - "learning_rate": 2.165434475535175e-06, - "loss": 0.9723, - "step": 5427 - }, - { - "epoch": 0.48951616539658205, - "grad_norm": 1.3942990149736294, - "learning_rate": 2.1648522599136173e-06, - "loss": 1.0218, - "step": 5428 - }, - { - "epoch": 0.4896063489200523, - "grad_norm": 1.286666029664773, - "learning_rate": 2.164270030225956e-06, - "loss": 0.9592, - "step": 5429 - }, - { - "epoch": 0.48969653244352257, - "grad_norm": 1.2431961379067316, - "learning_rate": 2.16368778652187e-06, - "loss": 0.9008, - "step": 5430 - }, - { - "epoch": 0.4897867159669928, - "grad_norm": 1.4981754650644543, - "learning_rate": 2.163105528851039e-06, - "loss": 1.0191, - "step": 5431 - }, - { - "epoch": 0.4898768994904631, - "grad_norm": 1.361406452656598, - "learning_rate": 2.1625232572631448e-06, - "loss": 0.9867, - "step": 5432 - }, - { - "epoch": 0.4899670830139334, - "grad_norm": 0.7317566151895015, - "learning_rate": 2.161940971807871e-06, - "loss": 0.8781, - "step": 5433 - }, - { - "epoch": 0.4900572665374036, - "grad_norm": 2.455284577325377, - "learning_rate": 2.1613586725348994e-06, - "loss": 0.9731, - "step": 5434 - }, - { - "epoch": 0.4901474500608739, - "grad_norm": 1.505069035439032, - "learning_rate": 2.1607763594939176e-06, - "loss": 1.0699, - "step": 5435 - }, - { - "epoch": 0.49023763358434413, - "grad_norm": 1.456828821760424, - "learning_rate": 2.1601940327346093e-06, - "loss": 0.9019, - "step": 5436 - }, - { - "epoch": 0.4903278171078144, - "grad_norm": 1.4347866822236426, - "learning_rate": 2.159611692306663e-06, - "loss": 1.0499, - "step": 5437 - }, - { - "epoch": 0.49041800063128466, - "grad_norm": 1.8236877844946326, - "learning_rate": 2.1590293382597667e-06, - "loss": 0.9477, - "step": 5438 - }, - { - "epoch": 0.49050818415475494, - "grad_norm": 2.517716385482982, - "learning_rate": 2.1584469706436102e-06, - "loss": 0.9234, - "step": 5439 - }, - { - "epoch": 0.4905983676782252, - "grad_norm": 1.3648121280424221, - "learning_rate": 2.1578645895078855e-06, - "loss": 0.8969, - "step": 5440 - }, - { - "epoch": 0.49068855120169547, - "grad_norm": 1.4891818468074065, - "learning_rate": 2.157282194902283e-06, - "loss": 1.0461, - "step": 5441 - }, - { - "epoch": 0.4907787347251657, - "grad_norm": 1.7399334888242788, - "learning_rate": 2.1566997868764965e-06, - "loss": 0.9148, - "step": 5442 - }, - { - "epoch": 0.490868918248636, - "grad_norm": 1.789033048295723, - "learning_rate": 2.15611736548022e-06, - "loss": 0.9435, - "step": 5443 - }, - { - "epoch": 0.4909591017721062, - "grad_norm": 1.581867802971693, - "learning_rate": 2.155534930763149e-06, - "loss": 0.9978, - "step": 5444 - }, - { - "epoch": 0.4910492852955765, - "grad_norm": 1.5873690002938423, - "learning_rate": 2.1549524827749804e-06, - "loss": 0.8737, - "step": 5445 - }, - { - "epoch": 0.49113946881904674, - "grad_norm": 1.1618751097417968, - "learning_rate": 2.1543700215654115e-06, - "loss": 0.8387, - "step": 5446 - }, - { - "epoch": 0.49122965234251703, - "grad_norm": 2.116034049266753, - "learning_rate": 2.153787547184141e-06, - "loss": 0.9688, - "step": 5447 - }, - { - "epoch": 0.49131983586598726, - "grad_norm": 1.446139784481561, - "learning_rate": 2.1532050596808695e-06, - "loss": 0.91, - "step": 5448 - }, - { - "epoch": 0.49141001938945755, - "grad_norm": 1.585289132013488, - "learning_rate": 2.152622559105297e-06, - "loss": 1.0271, - "step": 5449 - }, - { - "epoch": 0.4915002029129278, - "grad_norm": 1.6250553121689866, - "learning_rate": 2.152040045507126e-06, - "loss": 1.0278, - "step": 5450 - }, - { - "epoch": 0.4915903864363981, - "grad_norm": 1.9235112737767017, - "learning_rate": 2.1514575189360607e-06, - "loss": 1.0017, - "step": 5451 - }, - { - "epoch": 0.4916805699598683, - "grad_norm": 1.3362386263728188, - "learning_rate": 2.1508749794418043e-06, - "loss": 0.9404, - "step": 5452 - }, - { - "epoch": 0.4917707534833386, - "grad_norm": 1.366362317242162, - "learning_rate": 2.1502924270740626e-06, - "loss": 0.9551, - "step": 5453 - }, - { - "epoch": 0.49186093700680883, - "grad_norm": 1.655859343323344, - "learning_rate": 2.1497098618825427e-06, - "loss": 0.9819, - "step": 5454 - }, - { - "epoch": 0.4919511205302791, - "grad_norm": 2.0920136497008492, - "learning_rate": 2.1491272839169516e-06, - "loss": 1.0249, - "step": 5455 - }, - { - "epoch": 0.4920413040537494, - "grad_norm": 2.3165853344296075, - "learning_rate": 2.1485446932269986e-06, - "loss": 1.0925, - "step": 5456 - }, - { - "epoch": 0.49213148757721964, - "grad_norm": 1.480042972585206, - "learning_rate": 2.147962089862393e-06, - "loss": 1.016, - "step": 5457 - }, - { - "epoch": 0.49222167110068993, - "grad_norm": 2.7911868086280394, - "learning_rate": 2.1473794738728462e-06, - "loss": 0.8858, - "step": 5458 - }, - { - "epoch": 0.49231185462416016, - "grad_norm": 1.4138656816449493, - "learning_rate": 2.14679684530807e-06, - "loss": 1.0124, - "step": 5459 - }, - { - "epoch": 0.49240203814763045, - "grad_norm": 1.5159961993086788, - "learning_rate": 2.1462142042177774e-06, - "loss": 0.969, - "step": 5460 - }, - { - "epoch": 0.4924922216711007, - "grad_norm": 1.3905729219779148, - "learning_rate": 2.145631550651683e-06, - "loss": 0.9811, - "step": 5461 - }, - { - "epoch": 0.49258240519457097, - "grad_norm": 1.5664209379425535, - "learning_rate": 2.1450488846595016e-06, - "loss": 1.0809, - "step": 5462 - }, - { - "epoch": 0.4926725887180412, - "grad_norm": 1.5902383300612126, - "learning_rate": 2.14446620629095e-06, - "loss": 0.9572, - "step": 5463 - }, - { - "epoch": 0.4927627722415115, - "grad_norm": 1.3206559586585909, - "learning_rate": 2.1438835155957445e-06, - "loss": 1.0488, - "step": 5464 - }, - { - "epoch": 0.4928529557649817, - "grad_norm": 1.3284385311075146, - "learning_rate": 2.143300812623604e-06, - "loss": 1.0192, - "step": 5465 - }, - { - "epoch": 0.492943139288452, - "grad_norm": 1.582420094116384, - "learning_rate": 2.1427180974242485e-06, - "loss": 0.961, - "step": 5466 - }, - { - "epoch": 0.49303332281192225, - "grad_norm": 1.464693758596922, - "learning_rate": 2.142135370047398e-06, - "loss": 1.0074, - "step": 5467 - }, - { - "epoch": 0.49312350633539254, - "grad_norm": 1.726560981024855, - "learning_rate": 2.1415526305427735e-06, - "loss": 1.0159, - "step": 5468 - }, - { - "epoch": 0.49321368985886277, - "grad_norm": 1.4756666422699527, - "learning_rate": 2.140969878960098e-06, - "loss": 0.907, - "step": 5469 - }, - { - "epoch": 0.49330387338233306, - "grad_norm": 1.4387679934598616, - "learning_rate": 2.1403871153490956e-06, - "loss": 1.0345, - "step": 5470 - }, - { - "epoch": 0.4933940569058033, - "grad_norm": 1.7531826506194685, - "learning_rate": 2.13980433975949e-06, - "loss": 1.105, - "step": 5471 - }, - { - "epoch": 0.4934842404292736, - "grad_norm": 1.2753903913278644, - "learning_rate": 2.1392215522410076e-06, - "loss": 1.0251, - "step": 5472 - }, - { - "epoch": 0.4935744239527438, - "grad_norm": 1.725042787657001, - "learning_rate": 2.1386387528433743e-06, - "loss": 1.0966, - "step": 5473 - }, - { - "epoch": 0.4936646074762141, - "grad_norm": 1.6347729171170995, - "learning_rate": 2.1380559416163186e-06, - "loss": 1.0108, - "step": 5474 - }, - { - "epoch": 0.49375479099968433, - "grad_norm": 1.2350720236313442, - "learning_rate": 2.1374731186095685e-06, - "loss": 1.0322, - "step": 5475 - }, - { - "epoch": 0.4938449745231546, - "grad_norm": 1.2259404737294013, - "learning_rate": 2.136890283872854e-06, - "loss": 0.9297, - "step": 5476 - }, - { - "epoch": 0.49393515804662486, - "grad_norm": 1.4327995740667292, - "learning_rate": 2.136307437455906e-06, - "loss": 0.9636, - "step": 5477 - }, - { - "epoch": 0.49402534157009514, - "grad_norm": 1.1894935137526574, - "learning_rate": 2.135724579408456e-06, - "loss": 0.9319, - "step": 5478 - }, - { - "epoch": 0.49411552509356543, - "grad_norm": 1.6234093730645116, - "learning_rate": 2.1351417097802356e-06, - "loss": 1.0799, - "step": 5479 - }, - { - "epoch": 0.49420570861703567, - "grad_norm": 1.699194686543873, - "learning_rate": 2.1345588286209798e-06, - "loss": 0.8609, - "step": 5480 - }, - { - "epoch": 0.49429589214050595, - "grad_norm": 1.6200112278867094, - "learning_rate": 2.1339759359804227e-06, - "loss": 0.8269, - "step": 5481 - }, - { - "epoch": 0.4943860756639762, - "grad_norm": 1.449600457359604, - "learning_rate": 2.1333930319082997e-06, - "loss": 0.9607, - "step": 5482 - }, - { - "epoch": 0.4944762591874465, - "grad_norm": 1.328944189212966, - "learning_rate": 2.132810116454348e-06, - "loss": 0.9904, - "step": 5483 - }, - { - "epoch": 0.4945664427109167, - "grad_norm": 1.6017483487109536, - "learning_rate": 2.132227189668305e-06, - "loss": 0.9792, - "step": 5484 - }, - { - "epoch": 0.494656626234387, - "grad_norm": 1.444249850190749, - "learning_rate": 2.1316442515999096e-06, - "loss": 0.9271, - "step": 5485 - }, - { - "epoch": 0.49474680975785723, - "grad_norm": 1.881056794038737, - "learning_rate": 2.1310613022989e-06, - "loss": 1.0139, - "step": 5486 - }, - { - "epoch": 0.4948369932813275, - "grad_norm": 1.4718659807645638, - "learning_rate": 2.130478341815017e-06, - "loss": 0.8361, - "step": 5487 - }, - { - "epoch": 0.49492717680479775, - "grad_norm": 1.4736421153771553, - "learning_rate": 2.1298953701980033e-06, - "loss": 0.8811, - "step": 5488 - }, - { - "epoch": 0.49501736032826804, - "grad_norm": 2.826475642500339, - "learning_rate": 2.1293123874976003e-06, - "loss": 0.996, - "step": 5489 - }, - { - "epoch": 0.4951075438517383, - "grad_norm": 2.0259203903132432, - "learning_rate": 2.1287293937635513e-06, - "loss": 0.9316, - "step": 5490 - }, - { - "epoch": 0.49519772737520856, - "grad_norm": 1.7044156984590604, - "learning_rate": 2.1281463890456005e-06, - "loss": 1.0294, - "step": 5491 - }, - { - "epoch": 0.4952879108986788, - "grad_norm": 1.390327207419351, - "learning_rate": 2.127563373393493e-06, - "loss": 0.9742, - "step": 5492 - }, - { - "epoch": 0.4953780944221491, - "grad_norm": 1.5336786589628644, - "learning_rate": 2.1269803468569756e-06, - "loss": 0.9703, - "step": 5493 - }, - { - "epoch": 0.4954682779456193, - "grad_norm": 1.4391119667854013, - "learning_rate": 2.126397309485794e-06, - "loss": 1.0186, - "step": 5494 - }, - { - "epoch": 0.4955584614690896, - "grad_norm": 1.3710988797257584, - "learning_rate": 2.1258142613296983e-06, - "loss": 0.934, - "step": 5495 - }, - { - "epoch": 0.49564864499255984, - "grad_norm": 1.1876442470277744, - "learning_rate": 2.125231202438435e-06, - "loss": 0.9591, - "step": 5496 - }, - { - "epoch": 0.49573882851603013, - "grad_norm": 1.2800255018167446, - "learning_rate": 2.1246481328617553e-06, - "loss": 1.0147, - "step": 5497 - }, - { - "epoch": 0.49582901203950036, - "grad_norm": 1.9538780286627702, - "learning_rate": 2.1240650526494096e-06, - "loss": 0.9649, - "step": 5498 - }, - { - "epoch": 0.49591919556297065, - "grad_norm": 1.4578360209146783, - "learning_rate": 2.1234819618511493e-06, - "loss": 0.9437, - "step": 5499 - }, - { - "epoch": 0.4960093790864409, - "grad_norm": 0.6714850114483172, - "learning_rate": 2.122898860516728e-06, - "loss": 0.8702, - "step": 5500 - }, - { - "epoch": 0.49609956260991117, - "grad_norm": 1.3557499750479545, - "learning_rate": 2.1223157486958976e-06, - "loss": 0.9693, - "step": 5501 - }, - { - "epoch": 0.4961897461333814, - "grad_norm": 1.1951519228716894, - "learning_rate": 2.1217326264384127e-06, - "loss": 0.8701, - "step": 5502 - }, - { - "epoch": 0.4962799296568517, - "grad_norm": 2.6985988760859683, - "learning_rate": 2.1211494937940296e-06, - "loss": 1.0051, - "step": 5503 - }, - { - "epoch": 0.496370113180322, - "grad_norm": 1.216957583767315, - "learning_rate": 2.1205663508125034e-06, - "loss": 0.9906, - "step": 5504 - }, - { - "epoch": 0.4964602967037922, - "grad_norm": 1.470317815432533, - "learning_rate": 2.1199831975435914e-06, - "loss": 0.9476, - "step": 5505 - }, - { - "epoch": 0.4965504802272625, - "grad_norm": 1.2653247335779514, - "learning_rate": 2.1194000340370517e-06, - "loss": 1.0282, - "step": 5506 - }, - { - "epoch": 0.49664066375073274, - "grad_norm": 1.6961059822896, - "learning_rate": 2.1188168603426423e-06, - "loss": 0.9305, - "step": 5507 - }, - { - "epoch": 0.496730847274203, - "grad_norm": 1.7555522713784726, - "learning_rate": 2.118233676510123e-06, - "loss": 0.9521, - "step": 5508 - }, - { - "epoch": 0.49682103079767326, - "grad_norm": 1.4853461050401056, - "learning_rate": 2.117650482589255e-06, - "loss": 0.9941, - "step": 5509 - }, - { - "epoch": 0.49691121432114355, - "grad_norm": 1.4493821044326642, - "learning_rate": 2.1170672786297988e-06, - "loss": 1.0165, - "step": 5510 - }, - { - "epoch": 0.4970013978446138, - "grad_norm": 1.3154845137806466, - "learning_rate": 2.1164840646815174e-06, - "loss": 0.9458, - "step": 5511 - }, - { - "epoch": 0.49709158136808407, - "grad_norm": 1.4239403951203824, - "learning_rate": 2.1159008407941726e-06, - "loss": 1.0654, - "step": 5512 - }, - { - "epoch": 0.4971817648915543, - "grad_norm": 1.4565628567040234, - "learning_rate": 2.1153176070175293e-06, - "loss": 0.9787, - "step": 5513 - }, - { - "epoch": 0.4972719484150246, - "grad_norm": 1.339893796114008, - "learning_rate": 2.114734363401352e-06, - "loss": 0.8665, - "step": 5514 - }, - { - "epoch": 0.4973621319384948, - "grad_norm": 1.7296725609331507, - "learning_rate": 2.1141511099954056e-06, - "loss": 0.9747, - "step": 5515 - }, - { - "epoch": 0.4974523154619651, - "grad_norm": 1.2183204774313097, - "learning_rate": 2.1135678468494576e-06, - "loss": 0.9699, - "step": 5516 - }, - { - "epoch": 0.49754249898543534, - "grad_norm": 1.6635226954662503, - "learning_rate": 2.112984574013275e-06, - "loss": 1.0202, - "step": 5517 - }, - { - "epoch": 0.49763268250890563, - "grad_norm": 1.3994245402335859, - "learning_rate": 2.112401291536625e-06, - "loss": 0.9454, - "step": 5518 - }, - { - "epoch": 0.49772286603237587, - "grad_norm": 1.5315935956082325, - "learning_rate": 2.111817999469278e-06, - "loss": 1.0206, - "step": 5519 - }, - { - "epoch": 0.49781304955584615, - "grad_norm": 1.3555916502364778, - "learning_rate": 2.1112346978610016e-06, - "loss": 0.9269, - "step": 5520 - }, - { - "epoch": 0.4979032330793164, - "grad_norm": 1.6006164734064765, - "learning_rate": 2.1106513867615678e-06, - "loss": 0.9376, - "step": 5521 - }, - { - "epoch": 0.4979934166027867, - "grad_norm": 1.2419369042935378, - "learning_rate": 2.110068066220748e-06, - "loss": 0.9775, - "step": 5522 - }, - { - "epoch": 0.4980836001262569, - "grad_norm": 1.1834758536160441, - "learning_rate": 2.109484736288313e-06, - "loss": 1.0505, - "step": 5523 - }, - { - "epoch": 0.4981737836497272, - "grad_norm": 1.5068851445606946, - "learning_rate": 2.108901397014037e-06, - "loss": 0.994, - "step": 5524 - }, - { - "epoch": 0.49826396717319743, - "grad_norm": 4.91510920452757, - "learning_rate": 2.1083180484476934e-06, - "loss": 0.9853, - "step": 5525 - }, - { - "epoch": 0.4983541506966677, - "grad_norm": 1.4366166261571482, - "learning_rate": 2.1077346906390567e-06, - "loss": 0.9973, - "step": 5526 - }, - { - "epoch": 0.498444334220138, - "grad_norm": 1.8825295896598344, - "learning_rate": 2.107151323637902e-06, - "loss": 1.0042, - "step": 5527 - }, - { - "epoch": 0.49853451774360824, - "grad_norm": 1.5059183507470852, - "learning_rate": 2.106567947494006e-06, - "loss": 0.9614, - "step": 5528 - }, - { - "epoch": 0.49862470126707853, - "grad_norm": 3.83144312709418, - "learning_rate": 2.1059845622571447e-06, - "loss": 1.0452, - "step": 5529 - }, - { - "epoch": 0.49871488479054876, - "grad_norm": 1.3624050527265017, - "learning_rate": 2.1054011679770956e-06, - "loss": 1.0197, - "step": 5530 - }, - { - "epoch": 0.49880506831401905, - "grad_norm": 1.4671215200975263, - "learning_rate": 2.104817764703638e-06, - "loss": 0.9853, - "step": 5531 - }, - { - "epoch": 0.4988952518374893, - "grad_norm": 1.6101148071582425, - "learning_rate": 2.1042343524865516e-06, - "loss": 0.9921, - "step": 5532 - }, - { - "epoch": 0.4989854353609596, - "grad_norm": 1.36582743979566, - "learning_rate": 2.103650931375615e-06, - "loss": 1.0923, - "step": 5533 - }, - { - "epoch": 0.4990756188844298, - "grad_norm": 1.7575461630910632, - "learning_rate": 2.1030675014206094e-06, - "loss": 0.9209, - "step": 5534 - }, - { - "epoch": 0.4991658024079001, - "grad_norm": 1.8328152993937754, - "learning_rate": 2.1024840626713166e-06, - "loss": 0.9174, - "step": 5535 - }, - { - "epoch": 0.4992559859313703, - "grad_norm": 1.3311589760622458, - "learning_rate": 2.1019006151775177e-06, - "loss": 1.0802, - "step": 5536 - }, - { - "epoch": 0.4993461694548406, - "grad_norm": 1.7994703043283786, - "learning_rate": 2.101317158988997e-06, - "loss": 0.9739, - "step": 5537 - }, - { - "epoch": 0.49943635297831085, - "grad_norm": 4.748930961372139, - "learning_rate": 2.1007336941555374e-06, - "loss": 0.9548, - "step": 5538 - }, - { - "epoch": 0.49952653650178114, - "grad_norm": 1.3823586705212034, - "learning_rate": 2.1001502207269238e-06, - "loss": 0.9438, - "step": 5539 - }, - { - "epoch": 0.49961672002525137, - "grad_norm": 1.6886993667161307, - "learning_rate": 2.0995667387529407e-06, - "loss": 0.9385, - "step": 5540 - }, - { - "epoch": 0.49970690354872166, - "grad_norm": 1.39778484266804, - "learning_rate": 2.098983248283375e-06, - "loss": 0.9326, - "step": 5541 - }, - { - "epoch": 0.4997970870721919, - "grad_norm": 1.4974203179610521, - "learning_rate": 2.098399749368012e-06, - "loss": 0.9858, - "step": 5542 - }, - { - "epoch": 0.4998872705956622, - "grad_norm": 0.6587361065378462, - "learning_rate": 2.09781624205664e-06, - "loss": 0.8385, - "step": 5543 - }, - { - "epoch": 0.4999774541191324, - "grad_norm": 1.5049238453097502, - "learning_rate": 2.0972327263990477e-06, - "loss": 1.0308, - "step": 5544 - }, - { - "epoch": 0.5000676376426026, - "grad_norm": 1.6235884624707517, - "learning_rate": 2.0966492024450226e-06, - "loss": 0.9169, - "step": 5545 - }, - { - "epoch": 0.500157821166073, - "grad_norm": 1.6256574621189626, - "learning_rate": 2.0960656702443545e-06, - "loss": 1.1209, - "step": 5546 - }, - { - "epoch": 0.5002480046895432, - "grad_norm": 1.4105699096252498, - "learning_rate": 2.0954821298468343e-06, - "loss": 0.9805, - "step": 5547 - }, - { - "epoch": 0.5003381882130135, - "grad_norm": 1.3003975242094834, - "learning_rate": 2.0948985813022513e-06, - "loss": 1.0388, - "step": 5548 - }, - { - "epoch": 0.5004283717364837, - "grad_norm": 2.13386392352948, - "learning_rate": 2.094315024660399e-06, - "loss": 1.0189, - "step": 5549 - }, - { - "epoch": 0.500518555259954, - "grad_norm": 1.965852755161845, - "learning_rate": 2.0937314599710676e-06, - "loss": 1.007, - "step": 5550 - }, - { - "epoch": 0.5006087387834243, - "grad_norm": 1.2595503275630493, - "learning_rate": 2.0931478872840526e-06, - "loss": 0.9726, - "step": 5551 - }, - { - "epoch": 0.5006989223068945, - "grad_norm": 1.3887473299153614, - "learning_rate": 2.092564306649145e-06, - "loss": 0.9631, - "step": 5552 - }, - { - "epoch": 0.5007891058303648, - "grad_norm": 1.4192357143525092, - "learning_rate": 2.091980718116141e-06, - "loss": 1.0158, - "step": 5553 - }, - { - "epoch": 0.5008792893538351, - "grad_norm": 1.3215084189883977, - "learning_rate": 2.091397121734835e-06, - "loss": 1.023, - "step": 5554 - }, - { - "epoch": 0.5009694728773053, - "grad_norm": 1.4615068601172558, - "learning_rate": 2.090813517555022e-06, - "loss": 1.0035, - "step": 5555 - }, - { - "epoch": 0.5010596564007755, - "grad_norm": 1.515000676825344, - "learning_rate": 2.0902299056265e-06, - "loss": 0.932, - "step": 5556 - }, - { - "epoch": 0.5011498399242459, - "grad_norm": 1.677469821095642, - "learning_rate": 2.0896462859990643e-06, - "loss": 0.9666, - "step": 5557 - }, - { - "epoch": 0.5012400234477161, - "grad_norm": 1.272646652569537, - "learning_rate": 2.089062658722513e-06, - "loss": 1.0498, - "step": 5558 - }, - { - "epoch": 0.5013302069711864, - "grad_norm": 1.5890649486061519, - "learning_rate": 2.0884790238466452e-06, - "loss": 1.0285, - "step": 5559 - }, - { - "epoch": 0.5014203904946566, - "grad_norm": 1.8716003751465369, - "learning_rate": 2.087895381421259e-06, - "loss": 1.0194, - "step": 5560 - }, - { - "epoch": 0.5015105740181269, - "grad_norm": 1.786540890520104, - "learning_rate": 2.087311731496154e-06, - "loss": 0.9717, - "step": 5561 - }, - { - "epoch": 0.5016007575415972, - "grad_norm": 2.095462611067619, - "learning_rate": 2.08672807412113e-06, - "loss": 0.9721, - "step": 5562 - }, - { - "epoch": 0.5016909410650674, - "grad_norm": 1.7076685084781598, - "learning_rate": 2.08614440934599e-06, - "loss": 1.0748, - "step": 5563 - }, - { - "epoch": 0.5017811245885376, - "grad_norm": 1.5772492651110075, - "learning_rate": 2.0855607372205337e-06, - "loss": 0.9006, - "step": 5564 - }, - { - "epoch": 0.501871308112008, - "grad_norm": 1.5339136117535284, - "learning_rate": 2.0849770577945623e-06, - "loss": 0.9926, - "step": 5565 - }, - { - "epoch": 0.5019614916354782, - "grad_norm": 1.206831684095793, - "learning_rate": 2.084393371117881e-06, - "loss": 1.0138, - "step": 5566 - }, - { - "epoch": 0.5020516751589484, - "grad_norm": 1.179100143029883, - "learning_rate": 2.0838096772402902e-06, - "loss": 0.9129, - "step": 5567 - }, - { - "epoch": 0.5021418586824187, - "grad_norm": 1.7978599171027363, - "learning_rate": 2.0832259762115973e-06, - "loss": 0.8708, - "step": 5568 - }, - { - "epoch": 0.502232042205889, - "grad_norm": 1.562479400498977, - "learning_rate": 2.082642268081605e-06, - "loss": 1.0079, - "step": 5569 - }, - { - "epoch": 0.5023222257293593, - "grad_norm": 1.5953495749892739, - "learning_rate": 2.082058552900118e-06, - "loss": 0.9328, - "step": 5570 - }, - { - "epoch": 0.5024124092528295, - "grad_norm": 1.5907031086927828, - "learning_rate": 2.081474830716944e-06, - "loss": 0.9568, - "step": 5571 - }, - { - "epoch": 0.5025025927762997, - "grad_norm": 1.9079296654067304, - "learning_rate": 2.080891101581887e-06, - "loss": 1.0044, - "step": 5572 - }, - { - "epoch": 0.5025927762997701, - "grad_norm": 2.166605752920089, - "learning_rate": 2.080307365544755e-06, - "loss": 1.0272, - "step": 5573 - }, - { - "epoch": 0.5026829598232403, - "grad_norm": 1.4575159325227767, - "learning_rate": 2.0797236226553567e-06, - "loss": 1.0196, - "step": 5574 - }, - { - "epoch": 0.5027731433467105, - "grad_norm": 1.7562357294446351, - "learning_rate": 2.079139872963499e-06, - "loss": 0.9117, - "step": 5575 - }, - { - "epoch": 0.5028633268701809, - "grad_norm": 1.7585072818547849, - "learning_rate": 2.078556116518991e-06, - "loss": 0.8835, - "step": 5576 - }, - { - "epoch": 0.5029535103936511, - "grad_norm": 1.2154445818307678, - "learning_rate": 2.077972353371642e-06, - "loss": 0.958, - "step": 5577 - }, - { - "epoch": 0.5030436939171213, - "grad_norm": 1.27899278127797, - "learning_rate": 2.077388583571262e-06, - "loss": 0.9246, - "step": 5578 - }, - { - "epoch": 0.5031338774405916, - "grad_norm": 0.6923751292589283, - "learning_rate": 2.0768048071676608e-06, - "loss": 0.8004, - "step": 5579 - }, - { - "epoch": 0.5032240609640619, - "grad_norm": 1.3316785060052307, - "learning_rate": 2.0762210242106505e-06, - "loss": 0.9413, - "step": 5580 - }, - { - "epoch": 0.5033142444875321, - "grad_norm": 1.5937933074452597, - "learning_rate": 2.0756372347500424e-06, - "loss": 0.9911, - "step": 5581 - }, - { - "epoch": 0.5034044280110024, - "grad_norm": 1.5793224361897709, - "learning_rate": 2.0750534388356473e-06, - "loss": 1.0875, - "step": 5582 - }, - { - "epoch": 0.5034946115344726, - "grad_norm": 1.6735286912193865, - "learning_rate": 2.07446963651728e-06, - "loss": 0.9303, - "step": 5583 - }, - { - "epoch": 0.503584795057943, - "grad_norm": 1.3925140922559267, - "learning_rate": 2.0738858278447516e-06, - "loss": 0.9211, - "step": 5584 - }, - { - "epoch": 0.5036749785814132, - "grad_norm": 1.3586926556429784, - "learning_rate": 2.073302012867878e-06, - "loss": 1.0694, - "step": 5585 - }, - { - "epoch": 0.5037651621048834, - "grad_norm": 1.2924636822436533, - "learning_rate": 2.0727181916364725e-06, - "loss": 1.0184, - "step": 5586 - }, - { - "epoch": 0.5038553456283537, - "grad_norm": 1.237618687265207, - "learning_rate": 2.0721343642003493e-06, - "loss": 1.0127, - "step": 5587 - }, - { - "epoch": 0.503945529151824, - "grad_norm": 1.6407103743365081, - "learning_rate": 2.0715505306093247e-06, - "loss": 1.0139, - "step": 5588 - }, - { - "epoch": 0.5040357126752942, - "grad_norm": 1.6866233102530888, - "learning_rate": 2.070966690913214e-06, - "loss": 1.0103, - "step": 5589 - }, - { - "epoch": 0.5041258961987645, - "grad_norm": 1.8972953845889275, - "learning_rate": 2.0703828451618346e-06, - "loss": 0.8681, - "step": 5590 - }, - { - "epoch": 0.5042160797222347, - "grad_norm": 1.3874386490326003, - "learning_rate": 2.069798993405002e-06, - "loss": 0.9729, - "step": 5591 - }, - { - "epoch": 0.504306263245705, - "grad_norm": 1.4221370581288117, - "learning_rate": 2.0692151356925345e-06, - "loss": 1.0189, - "step": 5592 - }, - { - "epoch": 0.5043964467691753, - "grad_norm": 1.4956524150677517, - "learning_rate": 2.068631272074251e-06, - "loss": 1.0034, - "step": 5593 - }, - { - "epoch": 0.5044866302926455, - "grad_norm": 1.3132710235760328, - "learning_rate": 2.0680474025999676e-06, - "loss": 1.1101, - "step": 5594 - }, - { - "epoch": 0.5045768138161157, - "grad_norm": 0.8671599718828037, - "learning_rate": 2.0674635273195055e-06, - "loss": 0.8805, - "step": 5595 - }, - { - "epoch": 0.5046669973395861, - "grad_norm": 5.249852132985547, - "learning_rate": 2.066879646282682e-06, - "loss": 0.9782, - "step": 5596 - }, - { - "epoch": 0.5047571808630563, - "grad_norm": 1.6128957070667618, - "learning_rate": 2.0662957595393194e-06, - "loss": 0.9535, - "step": 5597 - }, - { - "epoch": 0.5048473643865266, - "grad_norm": 1.596441651312356, - "learning_rate": 2.0657118671392373e-06, - "loss": 1.0043, - "step": 5598 - }, - { - "epoch": 0.5049375479099969, - "grad_norm": 1.5553177366040174, - "learning_rate": 2.0651279691322558e-06, - "loss": 1.0367, - "step": 5599 - }, - { - "epoch": 0.5050277314334671, - "grad_norm": 1.269626367380259, - "learning_rate": 2.0645440655681973e-06, - "loss": 0.9381, - "step": 5600 - }, - { - "epoch": 0.5051179149569374, - "grad_norm": 1.878680495183952, - "learning_rate": 2.0639601564968826e-06, - "loss": 1.0522, - "step": 5601 - }, - { - "epoch": 0.5052080984804076, - "grad_norm": 0.6921503621077905, - "learning_rate": 2.0633762419681355e-06, - "loss": 0.9288, - "step": 5602 - }, - { - "epoch": 0.5052982820038779, - "grad_norm": 1.1174445223425213, - "learning_rate": 2.062792322031777e-06, - "loss": 0.995, - "step": 5603 - }, - { - "epoch": 0.5053884655273482, - "grad_norm": 1.7770203683959882, - "learning_rate": 2.062208396737632e-06, - "loss": 0.9445, - "step": 5604 - }, - { - "epoch": 0.5054786490508184, - "grad_norm": 1.312445276118624, - "learning_rate": 2.0616244661355235e-06, - "loss": 1.003, - "step": 5605 - }, - { - "epoch": 0.5055688325742886, - "grad_norm": 1.6439437722988286, - "learning_rate": 2.0610405302752752e-06, - "loss": 1.0106, - "step": 5606 - }, - { - "epoch": 0.505659016097759, - "grad_norm": 1.4159910423512725, - "learning_rate": 2.060456589206713e-06, - "loss": 0.976, - "step": 5607 - }, - { - "epoch": 0.5057491996212292, - "grad_norm": 1.5135546829633584, - "learning_rate": 2.0598726429796614e-06, - "loss": 0.9294, - "step": 5608 - }, - { - "epoch": 0.5058393831446995, - "grad_norm": 1.4481922225924535, - "learning_rate": 2.059288691643945e-06, - "loss": 0.9429, - "step": 5609 - }, - { - "epoch": 0.5059295666681697, - "grad_norm": 1.6151317116378188, - "learning_rate": 2.0587047352493913e-06, - "loss": 1.0661, - "step": 5610 - }, - { - "epoch": 0.50601975019164, - "grad_norm": 1.793744553082533, - "learning_rate": 2.0581207738458248e-06, - "loss": 0.9612, - "step": 5611 - }, - { - "epoch": 0.5061099337151103, - "grad_norm": 1.9126010219845289, - "learning_rate": 2.0575368074830743e-06, - "loss": 0.9381, - "step": 5612 - }, - { - "epoch": 0.5062001172385805, - "grad_norm": 2.4744355608106856, - "learning_rate": 2.0569528362109667e-06, - "loss": 0.896, - "step": 5613 - }, - { - "epoch": 0.5062903007620507, - "grad_norm": 1.3449050020327822, - "learning_rate": 2.056368860079327e-06, - "loss": 0.9794, - "step": 5614 - }, - { - "epoch": 0.5063804842855211, - "grad_norm": 1.8881404093442202, - "learning_rate": 2.0557848791379874e-06, - "loss": 0.9235, - "step": 5615 - }, - { - "epoch": 0.5064706678089913, - "grad_norm": 1.4532523971004985, - "learning_rate": 2.0552008934367734e-06, - "loss": 0.8853, - "step": 5616 - }, - { - "epoch": 0.5065608513324615, - "grad_norm": 2.081016078324098, - "learning_rate": 2.0546169030255154e-06, - "loss": 1.0478, - "step": 5617 - }, - { - "epoch": 0.5066510348559318, - "grad_norm": 1.5736014620709668, - "learning_rate": 2.054032907954041e-06, - "loss": 1.0633, - "step": 5618 - }, - { - "epoch": 0.5067412183794021, - "grad_norm": 1.411314976955886, - "learning_rate": 2.053448908272182e-06, - "loss": 0.9428, - "step": 5619 - }, - { - "epoch": 0.5068314019028723, - "grad_norm": 1.4300208198258857, - "learning_rate": 2.0528649040297673e-06, - "loss": 1.0116, - "step": 5620 - }, - { - "epoch": 0.5069215854263426, - "grad_norm": 1.3689552431947816, - "learning_rate": 2.0522808952766266e-06, - "loss": 0.9909, - "step": 5621 - }, - { - "epoch": 0.5070117689498129, - "grad_norm": 1.3868201850005293, - "learning_rate": 2.0516968820625925e-06, - "loss": 1.0296, - "step": 5622 - }, - { - "epoch": 0.5071019524732832, - "grad_norm": 1.7784728770875684, - "learning_rate": 2.051112864437495e-06, - "loss": 0.9705, - "step": 5623 - }, - { - "epoch": 0.5071921359967534, - "grad_norm": 1.6089657753955002, - "learning_rate": 2.050528842451166e-06, - "loss": 0.9525, - "step": 5624 - }, - { - "epoch": 0.5072823195202236, - "grad_norm": 1.3056266282848907, - "learning_rate": 2.049944816153438e-06, - "loss": 0.8651, - "step": 5625 - }, - { - "epoch": 0.507372503043694, - "grad_norm": 1.275920997156008, - "learning_rate": 2.049360785594142e-06, - "loss": 1.0316, - "step": 5626 - }, - { - "epoch": 0.5074626865671642, - "grad_norm": 0.677951327291484, - "learning_rate": 2.048776750823113e-06, - "loss": 0.7877, - "step": 5627 - }, - { - "epoch": 0.5075528700906344, - "grad_norm": 0.6685355386267905, - "learning_rate": 2.0481927118901817e-06, - "loss": 0.8321, - "step": 5628 - }, - { - "epoch": 0.5076430536141047, - "grad_norm": 1.6189426351684566, - "learning_rate": 2.0476086688451824e-06, - "loss": 0.9301, - "step": 5629 - }, - { - "epoch": 0.507733237137575, - "grad_norm": 1.6225660141974092, - "learning_rate": 2.04702462173795e-06, - "loss": 0.9732, - "step": 5630 - }, - { - "epoch": 0.5078234206610452, - "grad_norm": 2.5823862636812978, - "learning_rate": 2.0464405706183167e-06, - "loss": 0.9598, - "step": 5631 - }, - { - "epoch": 0.5079136041845155, - "grad_norm": 1.9720622461952493, - "learning_rate": 2.045856515536118e-06, - "loss": 0.9988, - "step": 5632 - }, - { - "epoch": 0.5080037877079857, - "grad_norm": 1.568266033278555, - "learning_rate": 2.045272456541188e-06, - "loss": 0.9992, - "step": 5633 - }, - { - "epoch": 0.508093971231456, - "grad_norm": 1.3431664907686716, - "learning_rate": 2.0446883936833635e-06, - "loss": 1.0462, - "step": 5634 - }, - { - "epoch": 0.5081841547549263, - "grad_norm": 1.7563625883348435, - "learning_rate": 2.0441043270124782e-06, - "loss": 0.9493, - "step": 5635 - }, - { - "epoch": 0.5082743382783965, - "grad_norm": 1.4676711807620526, - "learning_rate": 2.0435202565783683e-06, - "loss": 1.0087, - "step": 5636 - }, - { - "epoch": 0.5083645218018668, - "grad_norm": 1.365360281490825, - "learning_rate": 2.042936182430871e-06, - "loss": 0.9436, - "step": 5637 - }, - { - "epoch": 0.5084547053253371, - "grad_norm": 1.447064961101591, - "learning_rate": 2.0423521046198206e-06, - "loss": 0.9846, - "step": 5638 - }, - { - "epoch": 0.5085448888488073, - "grad_norm": 1.4316597764216135, - "learning_rate": 2.041768023195056e-06, - "loss": 0.9001, - "step": 5639 - }, - { - "epoch": 0.5086350723722776, - "grad_norm": 1.7910160909171626, - "learning_rate": 2.0411839382064126e-06, - "loss": 1.0506, - "step": 5640 - }, - { - "epoch": 0.5087252558957478, - "grad_norm": 1.6219604381482586, - "learning_rate": 2.040599849703729e-06, - "loss": 0.8748, - "step": 5641 - }, - { - "epoch": 0.5088154394192181, - "grad_norm": 1.5960890117767255, - "learning_rate": 2.040015757736843e-06, - "loss": 0.9148, - "step": 5642 - }, - { - "epoch": 0.5089056229426884, - "grad_norm": 1.4442761804110509, - "learning_rate": 2.039431662355591e-06, - "loss": 0.9099, - "step": 5643 - }, - { - "epoch": 0.5089958064661586, - "grad_norm": 1.395564788872518, - "learning_rate": 2.0388475636098126e-06, - "loss": 0.9922, - "step": 5644 - }, - { - "epoch": 0.5090859899896288, - "grad_norm": 1.634290838171618, - "learning_rate": 2.038263461549346e-06, - "loss": 1.0173, - "step": 5645 - }, - { - "epoch": 0.5091761735130992, - "grad_norm": 1.711114743601809, - "learning_rate": 2.0376793562240297e-06, - "loss": 0.9853, - "step": 5646 - }, - { - "epoch": 0.5092663570365694, - "grad_norm": 1.2907822206071253, - "learning_rate": 2.037095247683703e-06, - "loss": 0.9851, - "step": 5647 - }, - { - "epoch": 0.5093565405600397, - "grad_norm": 1.6091975142006256, - "learning_rate": 2.0365111359782046e-06, - "loss": 0.9419, - "step": 5648 - }, - { - "epoch": 0.50944672408351, - "grad_norm": 1.4115326311702245, - "learning_rate": 2.0359270211573757e-06, - "loss": 1.0116, - "step": 5649 - }, - { - "epoch": 0.5095369076069802, - "grad_norm": 2.142546410964866, - "learning_rate": 2.0353429032710545e-06, - "loss": 1.0448, - "step": 5650 - }, - { - "epoch": 0.5096270911304505, - "grad_norm": 1.338731175921657, - "learning_rate": 2.0347587823690825e-06, - "loss": 0.9642, - "step": 5651 - }, - { - "epoch": 0.5097172746539207, - "grad_norm": 1.4038271907735567, - "learning_rate": 2.034174658501299e-06, - "loss": 0.9768, - "step": 5652 - }, - { - "epoch": 0.509807458177391, - "grad_norm": 2.364016281452687, - "learning_rate": 2.0335905317175453e-06, - "loss": 1.0225, - "step": 5653 - }, - { - "epoch": 0.5098976417008613, - "grad_norm": 1.6894192554408443, - "learning_rate": 2.033006402067663e-06, - "loss": 0.9807, - "step": 5654 - }, - { - "epoch": 0.5099878252243315, - "grad_norm": 1.3499172980090033, - "learning_rate": 2.0324222696014912e-06, - "loss": 0.8899, - "step": 5655 - }, - { - "epoch": 0.5100780087478017, - "grad_norm": 1.6458126340441657, - "learning_rate": 2.0318381343688733e-06, - "loss": 1.0126, - "step": 5656 - }, - { - "epoch": 0.5101681922712721, - "grad_norm": 2.8946750620587425, - "learning_rate": 2.0312539964196505e-06, - "loss": 0.9591, - "step": 5657 - }, - { - "epoch": 0.5102583757947423, - "grad_norm": 1.5336485003251032, - "learning_rate": 2.030669855803664e-06, - "loss": 0.9643, - "step": 5658 - }, - { - "epoch": 0.5103485593182125, - "grad_norm": 1.2948000291025927, - "learning_rate": 2.0300857125707563e-06, - "loss": 1.0266, - "step": 5659 - }, - { - "epoch": 0.5104387428416828, - "grad_norm": 2.061071884019621, - "learning_rate": 2.0295015667707697e-06, - "loss": 1.0061, - "step": 5660 - }, - { - "epoch": 0.5105289263651531, - "grad_norm": 1.3509307002033872, - "learning_rate": 2.0289174184535472e-06, - "loss": 0.9859, - "step": 5661 - }, - { - "epoch": 0.5106191098886234, - "grad_norm": 1.405630950304517, - "learning_rate": 2.02833326766893e-06, - "loss": 0.9511, - "step": 5662 - }, - { - "epoch": 0.5107092934120936, - "grad_norm": 1.4623583244105318, - "learning_rate": 2.027749114466763e-06, - "loss": 1.0477, - "step": 5663 - }, - { - "epoch": 0.5107994769355638, - "grad_norm": 1.3828941795234908, - "learning_rate": 2.027164958896889e-06, - "loss": 1.0262, - "step": 5664 - }, - { - "epoch": 0.5108896604590342, - "grad_norm": 1.6679139318101495, - "learning_rate": 2.02658080100915e-06, - "loss": 1.0961, - "step": 5665 - }, - { - "epoch": 0.5109798439825044, - "grad_norm": 1.1928256725356474, - "learning_rate": 2.0259966408533915e-06, - "loss": 1.0519, - "step": 5666 - }, - { - "epoch": 0.5110700275059746, - "grad_norm": 2.4677076432847205, - "learning_rate": 2.025412478479455e-06, - "loss": 1.0713, - "step": 5667 - }, - { - "epoch": 0.5111602110294449, - "grad_norm": 1.5868031812223908, - "learning_rate": 2.0248283139371862e-06, - "loss": 1.0287, - "step": 5668 - }, - { - "epoch": 0.5112503945529152, - "grad_norm": 1.3856281128720638, - "learning_rate": 2.024244147276429e-06, - "loss": 0.9568, - "step": 5669 - }, - { - "epoch": 0.5113405780763854, - "grad_norm": 1.2514271695125438, - "learning_rate": 2.023659978547027e-06, - "loss": 1.0501, - "step": 5670 - }, - { - "epoch": 0.5114307615998557, - "grad_norm": 1.5298725082535505, - "learning_rate": 2.023075807798826e-06, - "loss": 0.9756, - "step": 5671 - }, - { - "epoch": 0.511520945123326, - "grad_norm": 1.5424157032739043, - "learning_rate": 2.0224916350816696e-06, - "loss": 0.9114, - "step": 5672 - }, - { - "epoch": 0.5116111286467963, - "grad_norm": 1.655769854350958, - "learning_rate": 2.0219074604454026e-06, - "loss": 0.9975, - "step": 5673 - }, - { - "epoch": 0.5117013121702665, - "grad_norm": 1.7614484017075414, - "learning_rate": 2.02132328393987e-06, - "loss": 0.9828, - "step": 5674 - }, - { - "epoch": 0.5117914956937367, - "grad_norm": 1.5676806205589746, - "learning_rate": 2.0207391056149174e-06, - "loss": 0.9335, - "step": 5675 - }, - { - "epoch": 0.5118816792172071, - "grad_norm": 2.0212283059809653, - "learning_rate": 2.020154925520391e-06, - "loss": 0.956, - "step": 5676 - }, - { - "epoch": 0.5119718627406773, - "grad_norm": 1.3302228627597574, - "learning_rate": 2.0195707437061332e-06, - "loss": 1.0864, - "step": 5677 - }, - { - "epoch": 0.5120620462641475, - "grad_norm": 1.7008248291600474, - "learning_rate": 2.0189865602219934e-06, - "loss": 0.9898, - "step": 5678 - }, - { - "epoch": 0.5121522297876178, - "grad_norm": 1.5716920932049303, - "learning_rate": 2.0184023751178154e-06, - "loss": 1.0597, - "step": 5679 - }, - { - "epoch": 0.5122424133110881, - "grad_norm": 1.2641047071051956, - "learning_rate": 2.017818188443444e-06, - "loss": 0.9361, - "step": 5680 - }, - { - "epoch": 0.5123325968345583, - "grad_norm": 1.508414113883118, - "learning_rate": 2.017234000248728e-06, - "loss": 0.9963, - "step": 5681 - }, - { - "epoch": 0.5124227803580286, - "grad_norm": 1.4405940385582066, - "learning_rate": 2.0166498105835108e-06, - "loss": 0.9225, - "step": 5682 - }, - { - "epoch": 0.5125129638814988, - "grad_norm": 1.1446170709611763, - "learning_rate": 2.0160656194976407e-06, - "loss": 0.9207, - "step": 5683 - }, - { - "epoch": 0.5126031474049692, - "grad_norm": 1.5559962146144215, - "learning_rate": 2.0154814270409634e-06, - "loss": 0.9353, - "step": 5684 - }, - { - "epoch": 0.5126933309284394, - "grad_norm": 2.0317980687137216, - "learning_rate": 2.0148972332633247e-06, - "loss": 0.9421, - "step": 5685 - }, - { - "epoch": 0.5127835144519096, - "grad_norm": 2.0738728552740473, - "learning_rate": 2.0143130382145733e-06, - "loss": 1.0436, - "step": 5686 - }, - { - "epoch": 0.5128736979753798, - "grad_norm": 2.2813017329790704, - "learning_rate": 2.0137288419445533e-06, - "loss": 0.9603, - "step": 5687 - }, - { - "epoch": 0.5129638814988502, - "grad_norm": 1.9208254545743915, - "learning_rate": 2.0131446445031134e-06, - "loss": 1.0248, - "step": 5688 - }, - { - "epoch": 0.5130540650223204, - "grad_norm": 1.5519927935039701, - "learning_rate": 2.0125604459400994e-06, - "loss": 1.0603, - "step": 5689 - }, - { - "epoch": 0.5131442485457907, - "grad_norm": 1.8211928436798914, - "learning_rate": 2.0119762463053596e-06, - "loss": 1.0495, - "step": 5690 - }, - { - "epoch": 0.5132344320692609, - "grad_norm": 2.245628348522413, - "learning_rate": 2.0113920456487406e-06, - "loss": 0.9774, - "step": 5691 - }, - { - "epoch": 0.5133246155927312, - "grad_norm": 1.6161333416984547, - "learning_rate": 2.010807844020088e-06, - "loss": 0.9803, - "step": 5692 - }, - { - "epoch": 0.5134147991162015, - "grad_norm": 1.4339005560344649, - "learning_rate": 2.0102236414692524e-06, - "loss": 1.034, - "step": 5693 - }, - { - "epoch": 0.5135049826396717, - "grad_norm": 1.4213321194372188, - "learning_rate": 2.0096394380460777e-06, - "loss": 0.9737, - "step": 5694 - }, - { - "epoch": 0.513595166163142, - "grad_norm": 1.448255933720796, - "learning_rate": 2.0090552338004136e-06, - "loss": 1.0074, - "step": 5695 - }, - { - "epoch": 0.5136853496866123, - "grad_norm": 1.280752876417008, - "learning_rate": 2.0084710287821077e-06, - "loss": 0.9803, - "step": 5696 - }, - { - "epoch": 0.5137755332100825, - "grad_norm": 1.8815057422267483, - "learning_rate": 2.007886823041006e-06, - "loss": 1.0286, - "step": 5697 - }, - { - "epoch": 0.5138657167335527, - "grad_norm": 1.625668388236467, - "learning_rate": 2.0073026166269577e-06, - "loss": 1.0085, - "step": 5698 - }, - { - "epoch": 0.5139559002570231, - "grad_norm": 1.2707471922566316, - "learning_rate": 2.0067184095898093e-06, - "loss": 1.0447, - "step": 5699 - }, - { - "epoch": 0.5140460837804933, - "grad_norm": 1.5581091411837793, - "learning_rate": 2.0061342019794094e-06, - "loss": 1.0012, - "step": 5700 - }, - { - "epoch": 0.5141362673039636, - "grad_norm": 1.371223552243638, - "learning_rate": 2.0055499938456058e-06, - "loss": 1.0625, - "step": 5701 - }, - { - "epoch": 0.5142264508274338, - "grad_norm": 1.4111306583163705, - "learning_rate": 2.0049657852382464e-06, - "loss": 0.9539, - "step": 5702 - }, - { - "epoch": 0.5143166343509041, - "grad_norm": 1.9099526638136959, - "learning_rate": 2.0043815762071782e-06, - "loss": 0.917, - "step": 5703 - }, - { - "epoch": 0.5144068178743744, - "grad_norm": 1.603368643228455, - "learning_rate": 2.0037973668022492e-06, - "loss": 0.9533, - "step": 5704 - }, - { - "epoch": 0.5144970013978446, - "grad_norm": 1.6055151568386083, - "learning_rate": 2.003213157073309e-06, - "loss": 0.872, - "step": 5705 - }, - { - "epoch": 0.5145871849213148, - "grad_norm": 1.357710465964203, - "learning_rate": 2.002628947070204e-06, - "loss": 0.9805, - "step": 5706 - }, - { - "epoch": 0.5146773684447852, - "grad_norm": 1.956437133028599, - "learning_rate": 2.002044736842783e-06, - "loss": 1.0469, - "step": 5707 - }, - { - "epoch": 0.5147675519682554, - "grad_norm": 1.3571201345864752, - "learning_rate": 2.001460526440894e-06, - "loss": 0.9709, - "step": 5708 - }, - { - "epoch": 0.5148577354917256, - "grad_norm": 0.6407891505139677, - "learning_rate": 2.0008763159143843e-06, - "loss": 0.8282, - "step": 5709 - }, - { - "epoch": 0.5149479190151959, - "grad_norm": 1.3362107471520908, - "learning_rate": 2.000292105313103e-06, - "loss": 1.01, - "step": 5710 - }, - { - "epoch": 0.5150381025386662, - "grad_norm": 1.259404188248575, - "learning_rate": 1.999707894686897e-06, - "loss": 1.0244, - "step": 5711 - }, - { - "epoch": 0.5151282860621365, - "grad_norm": 1.2719264045843588, - "learning_rate": 1.9991236840856155e-06, - "loss": 0.9822, - "step": 5712 - }, - { - "epoch": 0.5152184695856067, - "grad_norm": 1.2998241305598777, - "learning_rate": 1.9985394735591065e-06, - "loss": 0.9565, - "step": 5713 - }, - { - "epoch": 0.5153086531090769, - "grad_norm": 1.8619358795754135, - "learning_rate": 1.997955263157217e-06, - "loss": 1.0302, - "step": 5714 - }, - { - "epoch": 0.5153988366325473, - "grad_norm": 1.604636466190754, - "learning_rate": 1.997371052929796e-06, - "loss": 1.0148, - "step": 5715 - }, - { - "epoch": 0.5154890201560175, - "grad_norm": 1.578448045355886, - "learning_rate": 1.996786842926691e-06, - "loss": 0.9379, - "step": 5716 - }, - { - "epoch": 0.5155792036794877, - "grad_norm": 1.5349010614574266, - "learning_rate": 1.9962026331977506e-06, - "loss": 0.9959, - "step": 5717 - }, - { - "epoch": 0.5156693872029581, - "grad_norm": 1.6065742788753035, - "learning_rate": 1.9956184237928224e-06, - "loss": 0.9935, - "step": 5718 - }, - { - "epoch": 0.5157595707264283, - "grad_norm": 3.2921286311089197, - "learning_rate": 1.995034214761754e-06, - "loss": 0.9698, - "step": 5719 - }, - { - "epoch": 0.5158497542498985, - "grad_norm": 2.286075348520908, - "learning_rate": 1.9944500061543945e-06, - "loss": 0.9605, - "step": 5720 - }, - { - "epoch": 0.5159399377733688, - "grad_norm": 1.4559338402441124, - "learning_rate": 1.99386579802059e-06, - "loss": 1.004, - "step": 5721 - }, - { - "epoch": 0.5160301212968391, - "grad_norm": 1.307918681912522, - "learning_rate": 1.993281590410191e-06, - "loss": 0.9631, - "step": 5722 - }, - { - "epoch": 0.5161203048203094, - "grad_norm": 2.193226104519193, - "learning_rate": 1.992697383373043e-06, - "loss": 0.9359, - "step": 5723 - }, - { - "epoch": 0.5162104883437796, - "grad_norm": 0.8258810771558615, - "learning_rate": 1.9921131769589937e-06, - "loss": 0.9294, - "step": 5724 - }, - { - "epoch": 0.5163006718672498, - "grad_norm": 1.466316073230591, - "learning_rate": 1.991528971217893e-06, - "loss": 0.9871, - "step": 5725 - }, - { - "epoch": 0.5163908553907202, - "grad_norm": 1.2960480500868208, - "learning_rate": 1.9909447661995858e-06, - "loss": 0.9812, - "step": 5726 - }, - { - "epoch": 0.5164810389141904, - "grad_norm": 1.7385517038551195, - "learning_rate": 1.990360561953922e-06, - "loss": 1.0091, - "step": 5727 - }, - { - "epoch": 0.5165712224376606, - "grad_norm": 1.2619393926385902, - "learning_rate": 1.9897763585307483e-06, - "loss": 1.0077, - "step": 5728 - }, - { - "epoch": 0.5166614059611309, - "grad_norm": 1.3581466825451103, - "learning_rate": 1.989192155979912e-06, - "loss": 1.0201, - "step": 5729 - }, - { - "epoch": 0.5167515894846012, - "grad_norm": 1.1246445412154427, - "learning_rate": 1.98860795435126e-06, - "loss": 1.0156, - "step": 5730 - }, - { - "epoch": 0.5168417730080714, - "grad_norm": 1.4810161937212074, - "learning_rate": 1.9880237536946406e-06, - "loss": 0.9928, - "step": 5731 - }, - { - "epoch": 0.5169319565315417, - "grad_norm": 1.7455651765454756, - "learning_rate": 1.987439554059901e-06, - "loss": 0.9462, - "step": 5732 - }, - { - "epoch": 0.5170221400550119, - "grad_norm": 1.5095197272564271, - "learning_rate": 1.9868553554968864e-06, - "loss": 0.9595, - "step": 5733 - }, - { - "epoch": 0.5171123235784822, - "grad_norm": 1.5406104499176245, - "learning_rate": 1.986271158055447e-06, - "loss": 0.9355, - "step": 5734 - }, - { - "epoch": 0.5172025071019525, - "grad_norm": 1.8409169948292607, - "learning_rate": 1.9856869617854273e-06, - "loss": 0.9773, - "step": 5735 - }, - { - "epoch": 0.5172926906254227, - "grad_norm": 1.866371966271071, - "learning_rate": 1.9851027667366746e-06, - "loss": 0.9606, - "step": 5736 - }, - { - "epoch": 0.517382874148893, - "grad_norm": 1.4242564788022831, - "learning_rate": 1.984518572959037e-06, - "loss": 1.0818, - "step": 5737 - }, - { - "epoch": 0.5174730576723633, - "grad_norm": 1.8183753712541564, - "learning_rate": 1.9839343805023587e-06, - "loss": 0.9013, - "step": 5738 - }, - { - "epoch": 0.5175632411958335, - "grad_norm": 1.6610375791383205, - "learning_rate": 1.9833501894164886e-06, - "loss": 1.0758, - "step": 5739 - }, - { - "epoch": 0.5176534247193038, - "grad_norm": 1.4959867354483225, - "learning_rate": 1.982765999751273e-06, - "loss": 1.0231, - "step": 5740 - }, - { - "epoch": 0.5177436082427741, - "grad_norm": 1.8775360758194284, - "learning_rate": 1.9821818115565553e-06, - "loss": 1.0156, - "step": 5741 - }, - { - "epoch": 0.5178337917662443, - "grad_norm": 1.460273632640074, - "learning_rate": 1.9815976248821853e-06, - "loss": 0.8954, - "step": 5742 - }, - { - "epoch": 0.5179239752897146, - "grad_norm": 1.307826531812349, - "learning_rate": 1.981013439778007e-06, - "loss": 1.0001, - "step": 5743 - }, - { - "epoch": 0.5180141588131848, - "grad_norm": 1.7881590048560976, - "learning_rate": 1.9804292562938666e-06, - "loss": 0.9918, - "step": 5744 - }, - { - "epoch": 0.5181043423366551, - "grad_norm": 1.7670783753772643, - "learning_rate": 1.97984507447961e-06, - "loss": 1.0259, - "step": 5745 - }, - { - "epoch": 0.5181945258601254, - "grad_norm": 1.6067624413896109, - "learning_rate": 1.9792608943850824e-06, - "loss": 0.925, - "step": 5746 - }, - { - "epoch": 0.5182847093835956, - "grad_norm": 1.4218625078595728, - "learning_rate": 1.9786767160601305e-06, - "loss": 0.9648, - "step": 5747 - }, - { - "epoch": 0.5183748929070658, - "grad_norm": 1.5233453184751211, - "learning_rate": 1.9780925395545977e-06, - "loss": 0.9133, - "step": 5748 - }, - { - "epoch": 0.5184650764305362, - "grad_norm": 0.6974940206414982, - "learning_rate": 1.9775083649183306e-06, - "loss": 0.8379, - "step": 5749 - }, - { - "epoch": 0.5185552599540064, - "grad_norm": 1.4360603089060813, - "learning_rate": 1.976924192201174e-06, - "loss": 1.0235, - "step": 5750 - }, - { - "epoch": 0.5186454434774767, - "grad_norm": 1.320505252288923, - "learning_rate": 1.9763400214529723e-06, - "loss": 0.9167, - "step": 5751 - }, - { - "epoch": 0.5187356270009469, - "grad_norm": 1.3059154830504345, - "learning_rate": 1.9757558527235713e-06, - "loss": 0.8563, - "step": 5752 - }, - { - "epoch": 0.5188258105244172, - "grad_norm": 1.4716475292760132, - "learning_rate": 1.9751716860628136e-06, - "loss": 0.9747, - "step": 5753 - }, - { - "epoch": 0.5189159940478875, - "grad_norm": 1.6661978618594828, - "learning_rate": 1.974587521520545e-06, - "loss": 1.0098, - "step": 5754 - }, - { - "epoch": 0.5190061775713577, - "grad_norm": 1.4173680233614208, - "learning_rate": 1.9740033591466088e-06, - "loss": 1.0212, - "step": 5755 - }, - { - "epoch": 0.5190963610948279, - "grad_norm": 1.4389202731051172, - "learning_rate": 1.97341919899085e-06, - "loss": 1.0866, - "step": 5756 - }, - { - "epoch": 0.5191865446182983, - "grad_norm": 1.4629365025088836, - "learning_rate": 1.9728350411031114e-06, - "loss": 1.0114, - "step": 5757 - }, - { - "epoch": 0.5192767281417685, - "grad_norm": 1.5618580834738467, - "learning_rate": 1.9722508855332367e-06, - "loss": 1.0583, - "step": 5758 - }, - { - "epoch": 0.5193669116652387, - "grad_norm": 1.245691647122348, - "learning_rate": 1.97166673233107e-06, - "loss": 0.9167, - "step": 5759 - }, - { - "epoch": 0.519457095188709, - "grad_norm": 1.3126573468305784, - "learning_rate": 1.971082581546453e-06, - "loss": 1.0274, - "step": 5760 - }, - { - "epoch": 0.5195472787121793, - "grad_norm": 1.4612906941930413, - "learning_rate": 1.9704984332292306e-06, - "loss": 1.0115, - "step": 5761 - }, - { - "epoch": 0.5196374622356495, - "grad_norm": 1.5620959713230353, - "learning_rate": 1.9699142874292444e-06, - "loss": 1.0732, - "step": 5762 - }, - { - "epoch": 0.5197276457591198, - "grad_norm": 1.3657625892864742, - "learning_rate": 1.969330144196336e-06, - "loss": 1.0788, - "step": 5763 - }, - { - "epoch": 0.51981782928259, - "grad_norm": 1.6498913613592017, - "learning_rate": 1.9687460035803497e-06, - "loss": 0.9367, - "step": 5764 - }, - { - "epoch": 0.5199080128060604, - "grad_norm": 1.7134768658321073, - "learning_rate": 1.9681618656311265e-06, - "loss": 0.9602, - "step": 5765 - }, - { - "epoch": 0.5199981963295306, - "grad_norm": 1.3865229078808503, - "learning_rate": 1.9675777303985086e-06, - "loss": 0.8977, - "step": 5766 - }, - { - "epoch": 0.5200883798530008, - "grad_norm": 1.415937750417889, - "learning_rate": 1.9669935979323376e-06, - "loss": 1.0133, - "step": 5767 - }, - { - "epoch": 0.5201785633764712, - "grad_norm": 1.4327750297813562, - "learning_rate": 1.9664094682824545e-06, - "loss": 1.0439, - "step": 5768 - }, - { - "epoch": 0.5202687468999414, - "grad_norm": 1.2855775368719038, - "learning_rate": 1.965825341498701e-06, - "loss": 0.9493, - "step": 5769 - }, - { - "epoch": 0.5203589304234116, - "grad_norm": 1.5423565772845524, - "learning_rate": 1.9652412176309177e-06, - "loss": 1.0556, - "step": 5770 - }, - { - "epoch": 0.5204491139468819, - "grad_norm": 1.1892728621967927, - "learning_rate": 1.9646570967289453e-06, - "loss": 0.8478, - "step": 5771 - }, - { - "epoch": 0.5205392974703522, - "grad_norm": 1.5150497760716013, - "learning_rate": 1.9640729788426246e-06, - "loss": 1.0212, - "step": 5772 - }, - { - "epoch": 0.5206294809938224, - "grad_norm": 1.5868066369889366, - "learning_rate": 1.963488864021795e-06, - "loss": 1.0214, - "step": 5773 - }, - { - "epoch": 0.5207196645172927, - "grad_norm": 1.770536091111039, - "learning_rate": 1.962904752316298e-06, - "loss": 0.8744, - "step": 5774 - }, - { - "epoch": 0.5208098480407629, - "grad_norm": 1.7044803930639258, - "learning_rate": 1.9623206437759706e-06, - "loss": 0.9122, - "step": 5775 - }, - { - "epoch": 0.5209000315642333, - "grad_norm": 1.4933471966565455, - "learning_rate": 1.9617365384506545e-06, - "loss": 0.9429, - "step": 5776 - }, - { - "epoch": 0.5209902150877035, - "grad_norm": 1.4860331058331373, - "learning_rate": 1.9611524363901872e-06, - "loss": 0.9705, - "step": 5777 - }, - { - "epoch": 0.5210803986111737, - "grad_norm": 2.163201336333466, - "learning_rate": 1.960568337644409e-06, - "loss": 0.9796, - "step": 5778 - }, - { - "epoch": 0.521170582134644, - "grad_norm": 1.2949495384518022, - "learning_rate": 1.9599842422631576e-06, - "loss": 0.9669, - "step": 5779 - }, - { - "epoch": 0.5212607656581143, - "grad_norm": 2.1856100364742255, - "learning_rate": 1.9594001502962703e-06, - "loss": 0.9759, - "step": 5780 - }, - { - "epoch": 0.5213509491815845, - "grad_norm": 1.1505930387335765, - "learning_rate": 1.9588160617935868e-06, - "loss": 1.0452, - "step": 5781 - }, - { - "epoch": 0.5214411327050548, - "grad_norm": 1.9345298889958917, - "learning_rate": 1.958231976804944e-06, - "loss": 1.0714, - "step": 5782 - }, - { - "epoch": 0.521531316228525, - "grad_norm": 2.091197493419006, - "learning_rate": 1.957647895380179e-06, - "loss": 0.9055, - "step": 5783 - }, - { - "epoch": 0.5216214997519953, - "grad_norm": 1.5374031997849837, - "learning_rate": 1.9570638175691297e-06, - "loss": 0.9092, - "step": 5784 - }, - { - "epoch": 0.5217116832754656, - "grad_norm": 1.7184381375447764, - "learning_rate": 1.956479743421632e-06, - "loss": 1.022, - "step": 5785 - }, - { - "epoch": 0.5218018667989358, - "grad_norm": 1.4631934062702825, - "learning_rate": 1.955895672987522e-06, - "loss": 0.9224, - "step": 5786 - }, - { - "epoch": 0.521892050322406, - "grad_norm": 1.4022663913312192, - "learning_rate": 1.9553116063166367e-06, - "loss": 0.8868, - "step": 5787 - }, - { - "epoch": 0.5219822338458764, - "grad_norm": 1.68875767500663, - "learning_rate": 1.954727543458812e-06, - "loss": 0.8957, - "step": 5788 - }, - { - "epoch": 0.5220724173693466, - "grad_norm": 1.6151046977636339, - "learning_rate": 1.954143484463883e-06, - "loss": 1.0081, - "step": 5789 - }, - { - "epoch": 0.5221626008928169, - "grad_norm": 1.5459137829653318, - "learning_rate": 1.9535594293816836e-06, - "loss": 0.9749, - "step": 5790 - }, - { - "epoch": 0.5222527844162872, - "grad_norm": 1.9313028964110075, - "learning_rate": 1.952975378262051e-06, - "loss": 0.9257, - "step": 5791 - }, - { - "epoch": 0.5223429679397574, - "grad_norm": 1.7800601532233757, - "learning_rate": 1.952391331154817e-06, - "loss": 0.9375, - "step": 5792 - }, - { - "epoch": 0.5224331514632277, - "grad_norm": 1.405912146779549, - "learning_rate": 1.9518072881098185e-06, - "loss": 1.0357, - "step": 5793 - }, - { - "epoch": 0.5225233349866979, - "grad_norm": 1.448542023048849, - "learning_rate": 1.9512232491768867e-06, - "loss": 0.9758, - "step": 5794 - }, - { - "epoch": 0.5226135185101682, - "grad_norm": 1.2760925230933735, - "learning_rate": 1.9506392144058573e-06, - "loss": 0.9787, - "step": 5795 - }, - { - "epoch": 0.5227037020336385, - "grad_norm": 1.3303872532056695, - "learning_rate": 1.9500551838465623e-06, - "loss": 0.9752, - "step": 5796 - }, - { - "epoch": 0.5227938855571087, - "grad_norm": 1.4378761960243251, - "learning_rate": 1.9494711575488337e-06, - "loss": 0.9919, - "step": 5797 - }, - { - "epoch": 0.5228840690805789, - "grad_norm": 1.6067760185104452, - "learning_rate": 1.948887135562505e-06, - "loss": 0.9324, - "step": 5798 - }, - { - "epoch": 0.5229742526040493, - "grad_norm": 1.284443387997628, - "learning_rate": 1.9483031179374074e-06, - "loss": 1.0333, - "step": 5799 - }, - { - "epoch": 0.5230644361275195, - "grad_norm": 1.301524502679271, - "learning_rate": 1.9477191047233736e-06, - "loss": 1.0397, - "step": 5800 - }, - { - "epoch": 0.5231546196509897, - "grad_norm": 1.5984096849096567, - "learning_rate": 1.9471350959702334e-06, - "loss": 0.8662, - "step": 5801 - }, - { - "epoch": 0.52324480317446, - "grad_norm": 1.5731714139205888, - "learning_rate": 1.9465510917278184e-06, - "loss": 0.9822, - "step": 5802 - }, - { - "epoch": 0.5233349866979303, - "grad_norm": 1.5484471664357948, - "learning_rate": 1.9459670920459593e-06, - "loss": 0.9765, - "step": 5803 - }, - { - "epoch": 0.5234251702214006, - "grad_norm": 1.5705749899647528, - "learning_rate": 1.945383096974485e-06, - "loss": 0.8986, - "step": 5804 - }, - { - "epoch": 0.5235153537448708, - "grad_norm": 1.8239908014701325, - "learning_rate": 1.944799106563227e-06, - "loss": 0.9026, - "step": 5805 - }, - { - "epoch": 0.523605537268341, - "grad_norm": 1.774226076353695, - "learning_rate": 1.9442151208620133e-06, - "loss": 1.0115, - "step": 5806 - }, - { - "epoch": 0.5236957207918114, - "grad_norm": 1.8346308177261341, - "learning_rate": 1.943631139920672e-06, - "loss": 1.0303, - "step": 5807 - }, - { - "epoch": 0.5237859043152816, - "grad_norm": 1.7243194509188657, - "learning_rate": 1.943047163789034e-06, - "loss": 0.9312, - "step": 5808 - }, - { - "epoch": 0.5238760878387518, - "grad_norm": 1.2339073454171021, - "learning_rate": 1.942463192516925e-06, - "loss": 0.9428, - "step": 5809 - }, - { - "epoch": 0.5239662713622221, - "grad_norm": 1.3417053191420243, - "learning_rate": 1.9418792261541746e-06, - "loss": 0.997, - "step": 5810 - }, - { - "epoch": 0.5240564548856924, - "grad_norm": 1.3631281725791031, - "learning_rate": 1.9412952647506094e-06, - "loss": 0.9334, - "step": 5811 - }, - { - "epoch": 0.5241466384091626, - "grad_norm": 1.460388570053605, - "learning_rate": 1.9407113083560552e-06, - "loss": 0.9548, - "step": 5812 - }, - { - "epoch": 0.5242368219326329, - "grad_norm": 1.6655297057068783, - "learning_rate": 1.940127357020339e-06, - "loss": 0.9637, - "step": 5813 - }, - { - "epoch": 0.5243270054561032, - "grad_norm": 1.6607912532122198, - "learning_rate": 1.939543410793287e-06, - "loss": 0.9886, - "step": 5814 - }, - { - "epoch": 0.5244171889795735, - "grad_norm": 3.0467095745857624, - "learning_rate": 1.9389594697247246e-06, - "loss": 1.0159, - "step": 5815 - }, - { - "epoch": 0.5245073725030437, - "grad_norm": 2.6123514160540258, - "learning_rate": 1.9383755338644763e-06, - "loss": 1.0553, - "step": 5816 - }, - { - "epoch": 0.5245975560265139, - "grad_norm": 3.4961275749062035, - "learning_rate": 1.937791603262368e-06, - "loss": 1.0067, - "step": 5817 - }, - { - "epoch": 0.5246877395499843, - "grad_norm": 1.5011064104690073, - "learning_rate": 1.9372076779682235e-06, - "loss": 0.9283, - "step": 5818 - }, - { - "epoch": 0.5247779230734545, - "grad_norm": 1.4164632576412997, - "learning_rate": 1.9366237580318648e-06, - "loss": 0.9271, - "step": 5819 - }, - { - "epoch": 0.5248681065969247, - "grad_norm": 1.1689401624880027, - "learning_rate": 1.9360398435031176e-06, - "loss": 0.9852, - "step": 5820 - }, - { - "epoch": 0.524958290120395, - "grad_norm": 2.1009578473139783, - "learning_rate": 1.9354559344318025e-06, - "loss": 0.9937, - "step": 5821 - }, - { - "epoch": 0.5250484736438653, - "grad_norm": 5.513837746298388, - "learning_rate": 1.934872030867744e-06, - "loss": 1.0357, - "step": 5822 - }, - { - "epoch": 0.5251386571673355, - "grad_norm": 1.6114399079360788, - "learning_rate": 1.934288132860763e-06, - "loss": 0.9894, - "step": 5823 - }, - { - "epoch": 0.5252288406908058, - "grad_norm": 1.2810477120345805, - "learning_rate": 1.93370424046068e-06, - "loss": 0.981, - "step": 5824 - }, - { - "epoch": 0.525319024214276, - "grad_norm": 1.504369174009357, - "learning_rate": 1.9331203537173177e-06, - "loss": 1.0427, - "step": 5825 - }, - { - "epoch": 0.5254092077377464, - "grad_norm": 1.1699322682525617, - "learning_rate": 1.9325364726804947e-06, - "loss": 1.0929, - "step": 5826 - }, - { - "epoch": 0.5254993912612166, - "grad_norm": 1.3346753320695308, - "learning_rate": 1.9319525974000327e-06, - "loss": 0.9092, - "step": 5827 - }, - { - "epoch": 0.5255895747846868, - "grad_norm": 1.8624464040123687, - "learning_rate": 1.93136872792575e-06, - "loss": 1.0376, - "step": 5828 - }, - { - "epoch": 0.525679758308157, - "grad_norm": 1.9569956153777628, - "learning_rate": 1.9307848643074653e-06, - "loss": 0.931, - "step": 5829 - }, - { - "epoch": 0.5257699418316274, - "grad_norm": 1.3182816546101417, - "learning_rate": 1.9302010065949985e-06, - "loss": 1.0138, - "step": 5830 - }, - { - "epoch": 0.5258601253550976, - "grad_norm": 1.4394701849290108, - "learning_rate": 1.9296171548381657e-06, - "loss": 0.9434, - "step": 5831 - }, - { - "epoch": 0.5259503088785679, - "grad_norm": 1.313653484468707, - "learning_rate": 1.9290333090867862e-06, - "loss": 0.9679, - "step": 5832 - }, - { - "epoch": 0.5260404924020381, - "grad_norm": 2.0724773101116774, - "learning_rate": 1.928449469390676e-06, - "loss": 0.9828, - "step": 5833 - }, - { - "epoch": 0.5261306759255084, - "grad_norm": 1.2890275892674443, - "learning_rate": 1.927865635799651e-06, - "loss": 0.954, - "step": 5834 - }, - { - "epoch": 0.5262208594489787, - "grad_norm": 1.8746231972053062, - "learning_rate": 1.927281808363528e-06, - "loss": 0.9123, - "step": 5835 - }, - { - "epoch": 0.5263110429724489, - "grad_norm": 1.4304132339313378, - "learning_rate": 1.9266979871321216e-06, - "loss": 1.0556, - "step": 5836 - }, - { - "epoch": 0.5264012264959192, - "grad_norm": 1.5078503697833017, - "learning_rate": 1.9261141721552482e-06, - "loss": 0.9339, - "step": 5837 - }, - { - "epoch": 0.5264914100193895, - "grad_norm": 1.3956344045210278, - "learning_rate": 1.9255303634827204e-06, - "loss": 1.0452, - "step": 5838 - }, - { - "epoch": 0.5265815935428597, - "grad_norm": 1.969526289222822, - "learning_rate": 1.924946561164352e-06, - "loss": 1.0453, - "step": 5839 - }, - { - "epoch": 0.52667177706633, - "grad_norm": 1.586203717217126, - "learning_rate": 1.9243627652499582e-06, - "loss": 0.9277, - "step": 5840 - }, - { - "epoch": 0.5267619605898003, - "grad_norm": 2.940834101216263, - "learning_rate": 1.9237789757893493e-06, - "loss": 1.044, - "step": 5841 - }, - { - "epoch": 0.5268521441132705, - "grad_norm": 1.4240552514372882, - "learning_rate": 1.9231951928323395e-06, - "loss": 1.0202, - "step": 5842 - }, - { - "epoch": 0.5269423276367408, - "grad_norm": 1.4988349522685431, - "learning_rate": 1.922611416428738e-06, - "loss": 0.938, - "step": 5843 - }, - { - "epoch": 0.527032511160211, - "grad_norm": 1.486769341286076, - "learning_rate": 1.922027646628358e-06, - "loss": 0.9977, - "step": 5844 - }, - { - "epoch": 0.5271226946836813, - "grad_norm": 0.7905467586477665, - "learning_rate": 1.9214438834810092e-06, - "loss": 0.8839, - "step": 5845 - }, - { - "epoch": 0.5272128782071516, - "grad_norm": 1.7150390096737622, - "learning_rate": 1.9208601270365008e-06, - "loss": 0.9147, - "step": 5846 - }, - { - "epoch": 0.5273030617306218, - "grad_norm": 1.452740772271181, - "learning_rate": 1.9202763773446435e-06, - "loss": 1.0687, - "step": 5847 - }, - { - "epoch": 0.527393245254092, - "grad_norm": 1.5034804496902205, - "learning_rate": 1.9196926344552444e-06, - "loss": 0.8842, - "step": 5848 - }, - { - "epoch": 0.5274834287775624, - "grad_norm": 1.732370747840728, - "learning_rate": 1.919108898418113e-06, - "loss": 1.0084, - "step": 5849 - }, - { - "epoch": 0.5275736123010326, - "grad_norm": 1.2879278564625227, - "learning_rate": 1.918525169283057e-06, - "loss": 0.8726, - "step": 5850 - }, - { - "epoch": 0.5276637958245028, - "grad_norm": 1.5045962645697681, - "learning_rate": 1.9179414470998817e-06, - "loss": 1.0114, - "step": 5851 - }, - { - "epoch": 0.5277539793479731, - "grad_norm": 1.2705637327960686, - "learning_rate": 1.917357731918395e-06, - "loss": 0.9391, - "step": 5852 - }, - { - "epoch": 0.5278441628714434, - "grad_norm": 1.2509161452891902, - "learning_rate": 1.9167740237884025e-06, - "loss": 1.11, - "step": 5853 - }, - { - "epoch": 0.5279343463949137, - "grad_norm": 1.4546011533422367, - "learning_rate": 1.916190322759709e-06, - "loss": 1.0179, - "step": 5854 - }, - { - "epoch": 0.5280245299183839, - "grad_norm": 1.5160296971474723, - "learning_rate": 1.91560662888212e-06, - "loss": 1.0797, - "step": 5855 - }, - { - "epoch": 0.5281147134418541, - "grad_norm": 1.2952126109004682, - "learning_rate": 1.915022942205438e-06, - "loss": 1.0207, - "step": 5856 - }, - { - "epoch": 0.5282048969653245, - "grad_norm": 1.8013214823551595, - "learning_rate": 1.914439262779468e-06, - "loss": 0.9798, - "step": 5857 - }, - { - "epoch": 0.5282950804887947, - "grad_norm": 1.7809131370708067, - "learning_rate": 1.9138555906540103e-06, - "loss": 0.9223, - "step": 5858 - }, - { - "epoch": 0.5283852640122649, - "grad_norm": 1.5027221774386368, - "learning_rate": 1.91327192587887e-06, - "loss": 0.8911, - "step": 5859 - }, - { - "epoch": 0.5284754475357353, - "grad_norm": 1.7181860258574753, - "learning_rate": 1.912688268503846e-06, - "loss": 0.9563, - "step": 5860 - }, - { - "epoch": 0.5285656310592055, - "grad_norm": 1.6265380623014127, - "learning_rate": 1.912104618578741e-06, - "loss": 0.9343, - "step": 5861 - }, - { - "epoch": 0.5286558145826757, - "grad_norm": 1.2826625201963893, - "learning_rate": 1.9115209761533554e-06, - "loss": 0.9582, - "step": 5862 - }, - { - "epoch": 0.528745998106146, - "grad_norm": 1.5700170134576208, - "learning_rate": 1.9109373412774863e-06, - "loss": 0.8916, - "step": 5863 - }, - { - "epoch": 0.5288361816296163, - "grad_norm": 3.057967588363941, - "learning_rate": 1.910353714000936e-06, - "loss": 0.9823, - "step": 5864 - }, - { - "epoch": 0.5289263651530866, - "grad_norm": 1.6057362577232492, - "learning_rate": 1.9097700943734997e-06, - "loss": 0.9399, - "step": 5865 - }, - { - "epoch": 0.5290165486765568, - "grad_norm": 1.555459295806433, - "learning_rate": 1.909186482444977e-06, - "loss": 0.8798, - "step": 5866 - }, - { - "epoch": 0.529106732200027, - "grad_norm": 1.4751282846300937, - "learning_rate": 1.9086028782651652e-06, - "loss": 0.8855, - "step": 5867 - }, - { - "epoch": 0.5291969157234974, - "grad_norm": 1.3620463464724557, - "learning_rate": 1.908019281883859e-06, - "loss": 0.9795, - "step": 5868 - }, - { - "epoch": 0.5292870992469676, - "grad_norm": 1.4056991345916015, - "learning_rate": 1.9074356933508545e-06, - "loss": 0.9754, - "step": 5869 - }, - { - "epoch": 0.5293772827704378, - "grad_norm": 1.9946737296065233, - "learning_rate": 1.9068521127159477e-06, - "loss": 1.016, - "step": 5870 - }, - { - "epoch": 0.5294674662939081, - "grad_norm": 1.7165879956832473, - "learning_rate": 1.9062685400289322e-06, - "loss": 0.9275, - "step": 5871 - }, - { - "epoch": 0.5295576498173784, - "grad_norm": 1.5250297637286228, - "learning_rate": 1.9056849753396018e-06, - "loss": 0.9505, - "step": 5872 - }, - { - "epoch": 0.5296478333408486, - "grad_norm": 1.7370829706091748, - "learning_rate": 1.9051014186977485e-06, - "loss": 1.0133, - "step": 5873 - }, - { - "epoch": 0.5297380168643189, - "grad_norm": 1.3506061941353067, - "learning_rate": 1.9045178701531664e-06, - "loss": 0.9792, - "step": 5874 - }, - { - "epoch": 0.5298282003877891, - "grad_norm": 1.301522029691733, - "learning_rate": 1.903934329755645e-06, - "loss": 0.9703, - "step": 5875 - }, - { - "epoch": 0.5299183839112594, - "grad_norm": 1.33691455292118, - "learning_rate": 1.9033507975549775e-06, - "loss": 1.0167, - "step": 5876 - }, - { - "epoch": 0.5300085674347297, - "grad_norm": 1.3558139856072127, - "learning_rate": 1.9027672736009525e-06, - "loss": 0.9713, - "step": 5877 - }, - { - "epoch": 0.5300987509581999, - "grad_norm": 1.3416948349090665, - "learning_rate": 1.9021837579433593e-06, - "loss": 0.9261, - "step": 5878 - }, - { - "epoch": 0.5301889344816701, - "grad_norm": 1.5076711321560867, - "learning_rate": 1.901600250631988e-06, - "loss": 0.9582, - "step": 5879 - }, - { - "epoch": 0.5302791180051405, - "grad_norm": 1.5212605890863098, - "learning_rate": 1.901016751716625e-06, - "loss": 0.9682, - "step": 5880 - }, - { - "epoch": 0.5303693015286107, - "grad_norm": 1.377557976085953, - "learning_rate": 1.9004332612470593e-06, - "loss": 0.9493, - "step": 5881 - }, - { - "epoch": 0.530459485052081, - "grad_norm": 1.1454204451210828, - "learning_rate": 1.8998497792730763e-06, - "loss": 1.0063, - "step": 5882 - }, - { - "epoch": 0.5305496685755512, - "grad_norm": 1.3581540554992033, - "learning_rate": 1.8992663058444629e-06, - "loss": 1.026, - "step": 5883 - }, - { - "epoch": 0.5306398520990215, - "grad_norm": 1.387676241567143, - "learning_rate": 1.8986828410110032e-06, - "loss": 1.0158, - "step": 5884 - }, - { - "epoch": 0.5307300356224918, - "grad_norm": 3.756282375913405, - "learning_rate": 1.8980993848224823e-06, - "loss": 0.9688, - "step": 5885 - }, - { - "epoch": 0.530820219145962, - "grad_norm": 2.223030796237539, - "learning_rate": 1.8975159373286843e-06, - "loss": 0.9389, - "step": 5886 - }, - { - "epoch": 0.5309104026694323, - "grad_norm": 1.3729997738048259, - "learning_rate": 1.8969324985793904e-06, - "loss": 1.0117, - "step": 5887 - }, - { - "epoch": 0.5310005861929026, - "grad_norm": 1.470742841896069, - "learning_rate": 1.8963490686243851e-06, - "loss": 0.967, - "step": 5888 - }, - { - "epoch": 0.5310907697163728, - "grad_norm": 1.5622044856046022, - "learning_rate": 1.8957656475134486e-06, - "loss": 0.913, - "step": 5889 - }, - { - "epoch": 0.531180953239843, - "grad_norm": 1.5393857664846131, - "learning_rate": 1.895182235296361e-06, - "loss": 0.9586, - "step": 5890 - }, - { - "epoch": 0.5312711367633134, - "grad_norm": 1.8236320254555105, - "learning_rate": 1.8945988320229042e-06, - "loss": 0.9039, - "step": 5891 - }, - { - "epoch": 0.5313613202867836, - "grad_norm": 1.4958142051784287, - "learning_rate": 1.8940154377428553e-06, - "loss": 1.0643, - "step": 5892 - }, - { - "epoch": 0.5314515038102539, - "grad_norm": 1.2586070329378412, - "learning_rate": 1.8934320525059944e-06, - "loss": 1.0059, - "step": 5893 - }, - { - "epoch": 0.5315416873337241, - "grad_norm": 1.9084645524644428, - "learning_rate": 1.8928486763620984e-06, - "loss": 0.8598, - "step": 5894 - }, - { - "epoch": 0.5316318708571944, - "grad_norm": 1.437307593696104, - "learning_rate": 1.892265309360943e-06, - "loss": 0.9907, - "step": 5895 - }, - { - "epoch": 0.5317220543806647, - "grad_norm": 1.506237413029294, - "learning_rate": 1.8916819515523067e-06, - "loss": 0.9622, - "step": 5896 - }, - { - "epoch": 0.5318122379041349, - "grad_norm": 1.5498252247132878, - "learning_rate": 1.891098602985963e-06, - "loss": 0.902, - "step": 5897 - }, - { - "epoch": 0.5319024214276051, - "grad_norm": 1.4047752700898004, - "learning_rate": 1.8905152637116872e-06, - "loss": 0.9956, - "step": 5898 - }, - { - "epoch": 0.5319926049510755, - "grad_norm": 2.149545390836878, - "learning_rate": 1.8899319337792527e-06, - "loss": 1.0162, - "step": 5899 - }, - { - "epoch": 0.5320827884745457, - "grad_norm": 1.2208318779755511, - "learning_rate": 1.8893486132384325e-06, - "loss": 1.0157, - "step": 5900 - }, - { - "epoch": 0.5321729719980159, - "grad_norm": 1.4810875880055563, - "learning_rate": 1.888765302138999e-06, - "loss": 0.9497, - "step": 5901 - }, - { - "epoch": 0.5322631555214862, - "grad_norm": 2.0071084535924864, - "learning_rate": 1.8881820005307224e-06, - "loss": 0.9223, - "step": 5902 - }, - { - "epoch": 0.5323533390449565, - "grad_norm": 1.2558605615304708, - "learning_rate": 1.8875987084633748e-06, - "loss": 0.9162, - "step": 5903 - }, - { - "epoch": 0.5324435225684268, - "grad_norm": 2.862670833370811, - "learning_rate": 1.8870154259867246e-06, - "loss": 1.0099, - "step": 5904 - }, - { - "epoch": 0.532533706091897, - "grad_norm": 1.4697028782352468, - "learning_rate": 1.886432153150542e-06, - "loss": 0.9108, - "step": 5905 - }, - { - "epoch": 0.5326238896153672, - "grad_norm": 2.1518580724719043, - "learning_rate": 1.8858488900045944e-06, - "loss": 0.9586, - "step": 5906 - }, - { - "epoch": 0.5327140731388376, - "grad_norm": 1.8299727272127662, - "learning_rate": 1.885265636598648e-06, - "loss": 0.9293, - "step": 5907 - }, - { - "epoch": 0.5328042566623078, - "grad_norm": 1.442269167752381, - "learning_rate": 1.884682392982471e-06, - "loss": 1.0117, - "step": 5908 - }, - { - "epoch": 0.532894440185778, - "grad_norm": 1.7860104042852292, - "learning_rate": 1.8840991592058274e-06, - "loss": 1.034, - "step": 5909 - }, - { - "epoch": 0.5329846237092484, - "grad_norm": 1.2061131019191242, - "learning_rate": 1.8835159353184828e-06, - "loss": 0.9965, - "step": 5910 - }, - { - "epoch": 0.5330748072327186, - "grad_norm": 1.2998255520929407, - "learning_rate": 1.8829327213702013e-06, - "loss": 1.0456, - "step": 5911 - }, - { - "epoch": 0.5331649907561888, - "grad_norm": 1.5290296377100103, - "learning_rate": 1.8823495174107452e-06, - "loss": 1.0436, - "step": 5912 - }, - { - "epoch": 0.5332551742796591, - "grad_norm": 1.4321607032781056, - "learning_rate": 1.8817663234898773e-06, - "loss": 0.9487, - "step": 5913 - }, - { - "epoch": 0.5333453578031294, - "grad_norm": 1.4066431555583536, - "learning_rate": 1.881183139657358e-06, - "loss": 0.9826, - "step": 5914 - }, - { - "epoch": 0.5334355413265996, - "grad_norm": 1.3500581870078359, - "learning_rate": 1.8805999659629488e-06, - "loss": 0.9854, - "step": 5915 - }, - { - "epoch": 0.5335257248500699, - "grad_norm": 0.6425187567559434, - "learning_rate": 1.880016802456409e-06, - "loss": 0.8001, - "step": 5916 - }, - { - "epoch": 0.5336159083735401, - "grad_norm": 1.5723995175423242, - "learning_rate": 1.8794336491874964e-06, - "loss": 0.9171, - "step": 5917 - }, - { - "epoch": 0.5337060918970105, - "grad_norm": 2.2341940212982148, - "learning_rate": 1.8788505062059708e-06, - "loss": 0.94, - "step": 5918 - }, - { - "epoch": 0.5337962754204807, - "grad_norm": 1.3808941654640912, - "learning_rate": 1.8782673735615869e-06, - "loss": 0.9642, - "step": 5919 - }, - { - "epoch": 0.5338864589439509, - "grad_norm": 1.4594109867731602, - "learning_rate": 1.8776842513041026e-06, - "loss": 0.938, - "step": 5920 - }, - { - "epoch": 0.5339766424674212, - "grad_norm": 1.556908942135526, - "learning_rate": 1.8771011394832727e-06, - "loss": 0.9412, - "step": 5921 - }, - { - "epoch": 0.5340668259908915, - "grad_norm": 1.4898555569113385, - "learning_rate": 1.8765180381488501e-06, - "loss": 0.9596, - "step": 5922 - }, - { - "epoch": 0.5341570095143617, - "grad_norm": 0.7281179288594456, - "learning_rate": 1.8759349473505905e-06, - "loss": 0.938, - "step": 5923 - }, - { - "epoch": 0.534247193037832, - "grad_norm": 1.9444193800567204, - "learning_rate": 1.8753518671382447e-06, - "loss": 0.8984, - "step": 5924 - }, - { - "epoch": 0.5343373765613022, - "grad_norm": 1.2671867436454953, - "learning_rate": 1.8747687975615649e-06, - "loss": 0.9577, - "step": 5925 - }, - { - "epoch": 0.5344275600847725, - "grad_norm": 1.536848925927421, - "learning_rate": 1.874185738670302e-06, - "loss": 0.8753, - "step": 5926 - }, - { - "epoch": 0.5345177436082428, - "grad_norm": 1.36057815800554, - "learning_rate": 1.8736026905142057e-06, - "loss": 0.9583, - "step": 5927 - }, - { - "epoch": 0.534607927131713, - "grad_norm": 1.521327508912456, - "learning_rate": 1.873019653143025e-06, - "loss": 0.9467, - "step": 5928 - }, - { - "epoch": 0.5346981106551832, - "grad_norm": 1.434874334249268, - "learning_rate": 1.8724366266065069e-06, - "loss": 0.9847, - "step": 5929 - }, - { - "epoch": 0.5347882941786536, - "grad_norm": 1.4228107601015214, - "learning_rate": 1.8718536109543998e-06, - "loss": 1.0445, - "step": 5930 - }, - { - "epoch": 0.5348784777021238, - "grad_norm": 1.527809715900471, - "learning_rate": 1.8712706062364485e-06, - "loss": 0.9909, - "step": 5931 - }, - { - "epoch": 0.534968661225594, - "grad_norm": 1.417169939666262, - "learning_rate": 1.8706876125024e-06, - "loss": 0.8708, - "step": 5932 - }, - { - "epoch": 0.5350588447490644, - "grad_norm": 1.4811241289571337, - "learning_rate": 1.870104629801997e-06, - "loss": 1.0116, - "step": 5933 - }, - { - "epoch": 0.5351490282725346, - "grad_norm": 1.916105651295904, - "learning_rate": 1.8695216581849823e-06, - "loss": 0.9896, - "step": 5934 - }, - { - "epoch": 0.5352392117960049, - "grad_norm": 1.3608745328261709, - "learning_rate": 1.8689386977011003e-06, - "loss": 0.9483, - "step": 5935 - }, - { - "epoch": 0.5353293953194751, - "grad_norm": 0.7124872608049826, - "learning_rate": 1.8683557484000903e-06, - "loss": 0.8332, - "step": 5936 - }, - { - "epoch": 0.5354195788429454, - "grad_norm": 1.4070694549135172, - "learning_rate": 1.8677728103316947e-06, - "loss": 1.0077, - "step": 5937 - }, - { - "epoch": 0.5355097623664157, - "grad_norm": 1.214124313214482, - "learning_rate": 1.8671898835456518e-06, - "loss": 0.9333, - "step": 5938 - }, - { - "epoch": 0.5355999458898859, - "grad_norm": 1.3774611814121305, - "learning_rate": 1.8666069680917003e-06, - "loss": 1.0793, - "step": 5939 - }, - { - "epoch": 0.5356901294133561, - "grad_norm": 1.7213601147370483, - "learning_rate": 1.8660240640195775e-06, - "loss": 0.9308, - "step": 5940 - }, - { - "epoch": 0.5357803129368265, - "grad_norm": 1.1763560768804857, - "learning_rate": 1.8654411713790203e-06, - "loss": 1.0115, - "step": 5941 - }, - { - "epoch": 0.5358704964602967, - "grad_norm": 1.7034411355684709, - "learning_rate": 1.8648582902197648e-06, - "loss": 0.9969, - "step": 5942 - }, - { - "epoch": 0.535960679983767, - "grad_norm": 1.7740579603633349, - "learning_rate": 1.8642754205915452e-06, - "loss": 0.984, - "step": 5943 - }, - { - "epoch": 0.5360508635072372, - "grad_norm": 1.5747806123797292, - "learning_rate": 1.8636925625440943e-06, - "loss": 0.9256, - "step": 5944 - }, - { - "epoch": 0.5361410470307075, - "grad_norm": 1.5074078110678637, - "learning_rate": 1.863109716127146e-06, - "loss": 0.8916, - "step": 5945 - }, - { - "epoch": 0.5362312305541778, - "grad_norm": 1.2847033691168472, - "learning_rate": 1.8625268813904311e-06, - "loss": 1.0182, - "step": 5946 - }, - { - "epoch": 0.536321414077648, - "grad_norm": 1.2955788259195742, - "learning_rate": 1.8619440583836814e-06, - "loss": 1.0003, - "step": 5947 - }, - { - "epoch": 0.5364115976011182, - "grad_norm": 1.6352122071702613, - "learning_rate": 1.8613612471566249e-06, - "loss": 0.9828, - "step": 5948 - }, - { - "epoch": 0.5365017811245886, - "grad_norm": 2.7029323619490526, - "learning_rate": 1.8607784477589922e-06, - "loss": 0.9234, - "step": 5949 - }, - { - "epoch": 0.5365919646480588, - "grad_norm": 1.4917144824556638, - "learning_rate": 1.8601956602405103e-06, - "loss": 0.9941, - "step": 5950 - }, - { - "epoch": 0.536682148171529, - "grad_norm": 1.7072032431543966, - "learning_rate": 1.8596128846509043e-06, - "loss": 1.0188, - "step": 5951 - }, - { - "epoch": 0.5367723316949993, - "grad_norm": 1.4307393024957176, - "learning_rate": 1.859030121039902e-06, - "loss": 0.9285, - "step": 5952 - }, - { - "epoch": 0.5368625152184696, - "grad_norm": 1.6745121202642475, - "learning_rate": 1.8584473694572268e-06, - "loss": 0.9271, - "step": 5953 - }, - { - "epoch": 0.5369526987419398, - "grad_norm": 1.6514641334595777, - "learning_rate": 1.8578646299526026e-06, - "loss": 0.9986, - "step": 5954 - }, - { - "epoch": 0.5370428822654101, - "grad_norm": 1.4946434222179776, - "learning_rate": 1.8572819025757518e-06, - "loss": 0.9463, - "step": 5955 - }, - { - "epoch": 0.5371330657888804, - "grad_norm": 1.555031743108936, - "learning_rate": 1.8566991873763959e-06, - "loss": 0.995, - "step": 5956 - }, - { - "epoch": 0.5372232493123507, - "grad_norm": 1.943235521826725, - "learning_rate": 1.856116484404256e-06, - "loss": 0.9258, - "step": 5957 - }, - { - "epoch": 0.5373134328358209, - "grad_norm": 1.5330840046281162, - "learning_rate": 1.8555337937090502e-06, - "loss": 0.9319, - "step": 5958 - }, - { - "epoch": 0.5374036163592911, - "grad_norm": 1.8082584210303496, - "learning_rate": 1.8549511153404984e-06, - "loss": 0.9528, - "step": 5959 - }, - { - "epoch": 0.5374937998827615, - "grad_norm": 1.4039930245696242, - "learning_rate": 1.854368449348317e-06, - "loss": 1.0857, - "step": 5960 - }, - { - "epoch": 0.5375839834062317, - "grad_norm": 1.8867192554670649, - "learning_rate": 1.853785795782222e-06, - "loss": 0.9401, - "step": 5961 - }, - { - "epoch": 0.5376741669297019, - "grad_norm": 1.7172863363436022, - "learning_rate": 1.85320315469193e-06, - "loss": 0.8429, - "step": 5962 - }, - { - "epoch": 0.5377643504531722, - "grad_norm": 0.7356455748345677, - "learning_rate": 1.8526205261271534e-06, - "loss": 0.8751, - "step": 5963 - }, - { - "epoch": 0.5378545339766425, - "grad_norm": 1.6580714880069045, - "learning_rate": 1.852037910137607e-06, - "loss": 0.8857, - "step": 5964 - }, - { - "epoch": 0.5379447175001127, - "grad_norm": 1.253186123087395, - "learning_rate": 1.851455306773002e-06, - "loss": 1.0653, - "step": 5965 - }, - { - "epoch": 0.538034901023583, - "grad_norm": 1.1967878524088558, - "learning_rate": 1.8508727160830483e-06, - "loss": 0.9521, - "step": 5966 - }, - { - "epoch": 0.5381250845470532, - "grad_norm": 1.4156626715289722, - "learning_rate": 1.8502901381174575e-06, - "loss": 1.0446, - "step": 5967 - }, - { - "epoch": 0.5382152680705236, - "grad_norm": 1.8156974476002117, - "learning_rate": 1.8497075729259372e-06, - "loss": 0.8773, - "step": 5968 - }, - { - "epoch": 0.5383054515939938, - "grad_norm": 0.7636416690247851, - "learning_rate": 1.8491250205581963e-06, - "loss": 0.9205, - "step": 5969 - }, - { - "epoch": 0.538395635117464, - "grad_norm": 1.220111135752035, - "learning_rate": 1.8485424810639393e-06, - "loss": 0.979, - "step": 5970 - }, - { - "epoch": 0.5384858186409343, - "grad_norm": 1.806639305320464, - "learning_rate": 1.847959954492874e-06, - "loss": 1.0305, - "step": 5971 - }, - { - "epoch": 0.5385760021644046, - "grad_norm": 1.7341128314398422, - "learning_rate": 1.8473774408947035e-06, - "loss": 1.0691, - "step": 5972 - }, - { - "epoch": 0.5386661856878748, - "grad_norm": 0.7406448796175565, - "learning_rate": 1.8467949403191308e-06, - "loss": 0.8295, - "step": 5973 - }, - { - "epoch": 0.5387563692113451, - "grad_norm": 1.5744609788388242, - "learning_rate": 1.8462124528158592e-06, - "loss": 1.0255, - "step": 5974 - }, - { - "epoch": 0.5388465527348153, - "grad_norm": 1.2030866047070752, - "learning_rate": 1.8456299784345881e-06, - "loss": 0.9621, - "step": 5975 - }, - { - "epoch": 0.5389367362582856, - "grad_norm": 1.4176323441716392, - "learning_rate": 1.8450475172250194e-06, - "loss": 1.0004, - "step": 5976 - }, - { - "epoch": 0.5390269197817559, - "grad_norm": 1.3545962557043787, - "learning_rate": 1.844465069236851e-06, - "loss": 0.9573, - "step": 5977 - }, - { - "epoch": 0.5391171033052261, - "grad_norm": 4.265856837864725, - "learning_rate": 1.8438826345197796e-06, - "loss": 0.8921, - "step": 5978 - }, - { - "epoch": 0.5392072868286965, - "grad_norm": 1.7546474870041282, - "learning_rate": 1.8433002131235036e-06, - "loss": 0.9721, - "step": 5979 - }, - { - "epoch": 0.5392974703521667, - "grad_norm": 1.3807040589864241, - "learning_rate": 1.8427178050977167e-06, - "loss": 0.9968, - "step": 5980 - }, - { - "epoch": 0.5393876538756369, - "grad_norm": 1.314149365179185, - "learning_rate": 1.8421354104921143e-06, - "loss": 1.0536, - "step": 5981 - }, - { - "epoch": 0.5394778373991072, - "grad_norm": 0.630946811722565, - "learning_rate": 1.8415530293563894e-06, - "loss": 0.8188, - "step": 5982 - }, - { - "epoch": 0.5395680209225775, - "grad_norm": 2.8828776695928173, - "learning_rate": 1.8409706617402333e-06, - "loss": 0.9497, - "step": 5983 - }, - { - "epoch": 0.5396582044460477, - "grad_norm": 1.4443592123821936, - "learning_rate": 1.8403883076933378e-06, - "loss": 0.9966, - "step": 5984 - }, - { - "epoch": 0.539748387969518, - "grad_norm": 1.272672411662051, - "learning_rate": 1.839805967265391e-06, - "loss": 0.9677, - "step": 5985 - }, - { - "epoch": 0.5398385714929882, - "grad_norm": 1.4753334541343734, - "learning_rate": 1.839223640506083e-06, - "loss": 1.0313, - "step": 5986 - }, - { - "epoch": 0.5399287550164585, - "grad_norm": 1.4184394664628912, - "learning_rate": 1.8386413274650998e-06, - "loss": 0.9725, - "step": 5987 - }, - { - "epoch": 0.5400189385399288, - "grad_norm": 1.3947930183812438, - "learning_rate": 1.8380590281921294e-06, - "loss": 1.0203, - "step": 5988 - }, - { - "epoch": 0.540109122063399, - "grad_norm": 1.3418203737736054, - "learning_rate": 1.8374767427368552e-06, - "loss": 0.8861, - "step": 5989 - }, - { - "epoch": 0.5401993055868692, - "grad_norm": 2.494435316105457, - "learning_rate": 1.8368944711489608e-06, - "loss": 1.0027, - "step": 5990 - }, - { - "epoch": 0.5402894891103396, - "grad_norm": 1.8399716472514485, - "learning_rate": 1.8363122134781304e-06, - "loss": 1.0007, - "step": 5991 - }, - { - "epoch": 0.5403796726338098, - "grad_norm": 2.3045499081677185, - "learning_rate": 1.835729969774044e-06, - "loss": 0.9982, - "step": 5992 - }, - { - "epoch": 0.54046985615728, - "grad_norm": 1.8980902208967356, - "learning_rate": 1.8351477400863823e-06, - "loss": 0.979, - "step": 5993 - }, - { - "epoch": 0.5405600396807503, - "grad_norm": 1.4888109280844697, - "learning_rate": 1.8345655244648249e-06, - "loss": 0.9556, - "step": 5994 - }, - { - "epoch": 0.5406502232042206, - "grad_norm": 1.1836872126877391, - "learning_rate": 1.8339833229590486e-06, - "loss": 0.97, - "step": 5995 - }, - { - "epoch": 0.5407404067276909, - "grad_norm": 1.465530275321708, - "learning_rate": 1.833401135618731e-06, - "loss": 1.0574, - "step": 5996 - }, - { - "epoch": 0.5408305902511611, - "grad_norm": 1.2344776545464826, - "learning_rate": 1.8328189624935466e-06, - "loss": 0.9063, - "step": 5997 - }, - { - "epoch": 0.5409207737746313, - "grad_norm": 1.3947830186632277, - "learning_rate": 1.832236803633171e-06, - "loss": 1.0028, - "step": 5998 - }, - { - "epoch": 0.5410109572981017, - "grad_norm": 1.682607623054832, - "learning_rate": 1.831654659087276e-06, - "loss": 0.9417, - "step": 5999 - }, - { - "epoch": 0.5411011408215719, - "grad_norm": 1.3878342990413008, - "learning_rate": 1.831072528905533e-06, - "loss": 0.9945, - "step": 6000 - }, - { - "epoch": 0.5411913243450421, - "grad_norm": 1.315856320665766, - "learning_rate": 1.8304904131376142e-06, - "loss": 1.032, - "step": 6001 - }, - { - "epoch": 0.5412815078685124, - "grad_norm": 1.6211625050131533, - "learning_rate": 1.8299083118331874e-06, - "loss": 0.9873, - "step": 6002 - }, - { - "epoch": 0.5413716913919827, - "grad_norm": 1.559863116176508, - "learning_rate": 1.8293262250419217e-06, - "loss": 1.1182, - "step": 6003 - }, - { - "epoch": 0.541461874915453, - "grad_norm": 1.6893708490171817, - "learning_rate": 1.828744152813484e-06, - "loss": 1.088, - "step": 6004 - }, - { - "epoch": 0.5415520584389232, - "grad_norm": 1.5107671221479773, - "learning_rate": 1.8281620951975382e-06, - "loss": 0.9289, - "step": 6005 - }, - { - "epoch": 0.5416422419623935, - "grad_norm": 2.534659361967062, - "learning_rate": 1.827580052243751e-06, - "loss": 0.9455, - "step": 6006 - }, - { - "epoch": 0.5417324254858638, - "grad_norm": 1.5319844255776238, - "learning_rate": 1.826998024001784e-06, - "loss": 1.0082, - "step": 6007 - }, - { - "epoch": 0.541822609009334, - "grad_norm": 2.3780281438164694, - "learning_rate": 1.8264160105212995e-06, - "loss": 1.0012, - "step": 6008 - }, - { - "epoch": 0.5419127925328042, - "grad_norm": 1.6780141612695474, - "learning_rate": 1.8258340118519582e-06, - "loss": 0.9437, - "step": 6009 - }, - { - "epoch": 0.5420029760562746, - "grad_norm": 1.4556269282798722, - "learning_rate": 1.82525202804342e-06, - "loss": 0.9626, - "step": 6010 - }, - { - "epoch": 0.5420931595797448, - "grad_norm": 1.5160350441518282, - "learning_rate": 1.8246700591453415e-06, - "loss": 0.9556, - "step": 6011 - }, - { - "epoch": 0.542183343103215, - "grad_norm": 1.2951095237959662, - "learning_rate": 1.8240881052073801e-06, - "loss": 0.9775, - "step": 6012 - }, - { - "epoch": 0.5422735266266853, - "grad_norm": 1.337409117988959, - "learning_rate": 1.8235061662791923e-06, - "loss": 0.9506, - "step": 6013 - }, - { - "epoch": 0.5423637101501556, - "grad_norm": 1.431244216307569, - "learning_rate": 1.8229242424104309e-06, - "loss": 1.0448, - "step": 6014 - }, - { - "epoch": 0.5424538936736258, - "grad_norm": 0.6400435530476196, - "learning_rate": 1.8223423336507503e-06, - "loss": 0.8215, - "step": 6015 - }, - { - "epoch": 0.5425440771970961, - "grad_norm": 1.776166454715483, - "learning_rate": 1.8217604400498012e-06, - "loss": 0.9622, - "step": 6016 - }, - { - "epoch": 0.5426342607205663, - "grad_norm": 1.2113584217562439, - "learning_rate": 1.8211785616572333e-06, - "loss": 1.0189, - "step": 6017 - }, - { - "epoch": 0.5427244442440367, - "grad_norm": 1.6155371607028093, - "learning_rate": 1.8205966985226975e-06, - "loss": 1.0476, - "step": 6018 - }, - { - "epoch": 0.5428146277675069, - "grad_norm": 1.1739420653025292, - "learning_rate": 1.8200148506958397e-06, - "loss": 0.9822, - "step": 6019 - }, - { - "epoch": 0.5429048112909771, - "grad_norm": 0.6541402373067303, - "learning_rate": 1.819433018226308e-06, - "loss": 0.8703, - "step": 6020 - }, - { - "epoch": 0.5429949948144474, - "grad_norm": 0.60649037462476, - "learning_rate": 1.8188512011637471e-06, - "loss": 0.8028, - "step": 6021 - }, - { - "epoch": 0.5430851783379177, - "grad_norm": 1.4918468945416417, - "learning_rate": 1.8182693995578e-06, - "loss": 0.9554, - "step": 6022 - }, - { - "epoch": 0.5431753618613879, - "grad_norm": 1.0861444619097158, - "learning_rate": 1.8176876134581098e-06, - "loss": 0.8553, - "step": 6023 - }, - { - "epoch": 0.5432655453848582, - "grad_norm": 1.3905015097661002, - "learning_rate": 1.8171058429143176e-06, - "loss": 0.9231, - "step": 6024 - }, - { - "epoch": 0.5433557289083284, - "grad_norm": 1.3872750899025352, - "learning_rate": 1.8165240879760637e-06, - "loss": 1.01, - "step": 6025 - }, - { - "epoch": 0.5434459124317987, - "grad_norm": 1.243560799628159, - "learning_rate": 1.8159423486929862e-06, - "loss": 1.003, - "step": 6026 - }, - { - "epoch": 0.543536095955269, - "grad_norm": 1.2718813228215697, - "learning_rate": 1.815360625114722e-06, - "loss": 0.976, - "step": 6027 - }, - { - "epoch": 0.5436262794787392, - "grad_norm": 1.5649204580062446, - "learning_rate": 1.814778917290908e-06, - "loss": 0.9668, - "step": 6028 - }, - { - "epoch": 0.5437164630022095, - "grad_norm": 1.6514094892663702, - "learning_rate": 1.8141972252711773e-06, - "loss": 0.9926, - "step": 6029 - }, - { - "epoch": 0.5438066465256798, - "grad_norm": 1.496390370578165, - "learning_rate": 1.8136155491051645e-06, - "loss": 0.9725, - "step": 6030 - }, - { - "epoch": 0.54389683004915, - "grad_norm": 0.7219842208871057, - "learning_rate": 1.8130338888424998e-06, - "loss": 0.8842, - "step": 6031 - }, - { - "epoch": 0.5439870135726202, - "grad_norm": 1.679257998889422, - "learning_rate": 1.812452244532816e-06, - "loss": 0.9928, - "step": 6032 - }, - { - "epoch": 0.5440771970960906, - "grad_norm": 1.9811227534975362, - "learning_rate": 1.8118706162257405e-06, - "loss": 0.921, - "step": 6033 - }, - { - "epoch": 0.5441673806195608, - "grad_norm": 1.302282343596986, - "learning_rate": 1.8112890039709002e-06, - "loss": 1.0605, - "step": 6034 - }, - { - "epoch": 0.5442575641430311, - "grad_norm": 1.5230128552300684, - "learning_rate": 1.8107074078179238e-06, - "loss": 0.9832, - "step": 6035 - }, - { - "epoch": 0.5443477476665013, - "grad_norm": 1.2607346703669438, - "learning_rate": 1.8101258278164348e-06, - "loss": 0.9325, - "step": 6036 - }, - { - "epoch": 0.5444379311899716, - "grad_norm": 2.2361202226641392, - "learning_rate": 1.8095442640160575e-06, - "loss": 1.0847, - "step": 6037 - }, - { - "epoch": 0.5445281147134419, - "grad_norm": 1.6500278412753049, - "learning_rate": 1.8089627164664132e-06, - "loss": 0.9421, - "step": 6038 - }, - { - "epoch": 0.5446182982369121, - "grad_norm": 1.356435293733249, - "learning_rate": 1.8083811852171233e-06, - "loss": 0.9002, - "step": 6039 - }, - { - "epoch": 0.5447084817603823, - "grad_norm": 1.734083752621351, - "learning_rate": 1.8077996703178078e-06, - "loss": 0.9792, - "step": 6040 - }, - { - "epoch": 0.5447986652838527, - "grad_norm": 1.5658001952076313, - "learning_rate": 1.8072181718180833e-06, - "loss": 0.9358, - "step": 6041 - }, - { - "epoch": 0.5448888488073229, - "grad_norm": 1.4604913366858092, - "learning_rate": 1.806636689767568e-06, - "loss": 1.0663, - "step": 6042 - }, - { - "epoch": 0.5449790323307931, - "grad_norm": 1.2256448313632298, - "learning_rate": 1.8060552242158769e-06, - "loss": 0.9188, - "step": 6043 - }, - { - "epoch": 0.5450692158542634, - "grad_norm": 1.2999803981770384, - "learning_rate": 1.8054737752126224e-06, - "loss": 0.9534, - "step": 6044 - }, - { - "epoch": 0.5451593993777337, - "grad_norm": 1.4022394423091602, - "learning_rate": 1.804892342807419e-06, - "loss": 0.9684, - "step": 6045 - }, - { - "epoch": 0.545249582901204, - "grad_norm": 2.8964893137811907, - "learning_rate": 1.8043109270498756e-06, - "loss": 1.0421, - "step": 6046 - }, - { - "epoch": 0.5453397664246742, - "grad_norm": 1.242098199285546, - "learning_rate": 1.803729527989604e-06, - "loss": 0.9908, - "step": 6047 - }, - { - "epoch": 0.5454299499481444, - "grad_norm": 2.0200063711009255, - "learning_rate": 1.8031481456762112e-06, - "loss": 0.9708, - "step": 6048 - }, - { - "epoch": 0.5455201334716148, - "grad_norm": 1.6151440374904091, - "learning_rate": 1.8025667801593033e-06, - "loss": 0.9063, - "step": 6049 - }, - { - "epoch": 0.545610316995085, - "grad_norm": 1.3356157328061464, - "learning_rate": 1.8019854314884871e-06, - "loss": 0.9185, - "step": 6050 - }, - { - "epoch": 0.5457005005185552, - "grad_norm": 2.2161965519397944, - "learning_rate": 1.8014040997133652e-06, - "loss": 1.0298, - "step": 6051 - }, - { - "epoch": 0.5457906840420256, - "grad_norm": 0.7528145747870725, - "learning_rate": 1.8008227848835414e-06, - "loss": 0.8798, - "step": 6052 - }, - { - "epoch": 0.5458808675654958, - "grad_norm": 1.8237791001297132, - "learning_rate": 1.8002414870486144e-06, - "loss": 0.924, - "step": 6053 - }, - { - "epoch": 0.545971051088966, - "grad_norm": 1.6403910152156689, - "learning_rate": 1.7996602062581864e-06, - "loss": 0.9363, - "step": 6054 - }, - { - "epoch": 0.5460612346124363, - "grad_norm": 1.410085195510657, - "learning_rate": 1.7990789425618544e-06, - "loss": 1.0274, - "step": 6055 - }, - { - "epoch": 0.5461514181359066, - "grad_norm": 1.1867309891130509, - "learning_rate": 1.7984976960092137e-06, - "loss": 0.9551, - "step": 6056 - }, - { - "epoch": 0.5462416016593769, - "grad_norm": 1.5518887118326412, - "learning_rate": 1.7979164666498617e-06, - "loss": 0.9388, - "step": 6057 - }, - { - "epoch": 0.5463317851828471, - "grad_norm": 0.6232576402394043, - "learning_rate": 1.7973352545333901e-06, - "loss": 0.8003, - "step": 6058 - }, - { - "epoch": 0.5464219687063173, - "grad_norm": 1.711525106336735, - "learning_rate": 1.796754059709393e-06, - "loss": 0.8941, - "step": 6059 - }, - { - "epoch": 0.5465121522297877, - "grad_norm": 1.650363321473484, - "learning_rate": 1.7961728822274603e-06, - "loss": 1.034, - "step": 6060 - }, - { - "epoch": 0.5466023357532579, - "grad_norm": 1.4535996677016203, - "learning_rate": 1.7955917221371802e-06, - "loss": 0.9279, - "step": 6061 - }, - { - "epoch": 0.5466925192767281, - "grad_norm": 1.761243464266236, - "learning_rate": 1.7950105794881422e-06, - "loss": 0.8931, - "step": 6062 - }, - { - "epoch": 0.5467827028001984, - "grad_norm": 2.8886408414069984, - "learning_rate": 1.7944294543299317e-06, - "loss": 0.9651, - "step": 6063 - }, - { - "epoch": 0.5468728863236687, - "grad_norm": 1.4686659930939472, - "learning_rate": 1.7938483467121333e-06, - "loss": 1.0302, - "step": 6064 - }, - { - "epoch": 0.5469630698471389, - "grad_norm": 1.6495930025235344, - "learning_rate": 1.7932672566843313e-06, - "loss": 0.9556, - "step": 6065 - }, - { - "epoch": 0.5470532533706092, - "grad_norm": 1.5969033554157515, - "learning_rate": 1.7926861842961065e-06, - "loss": 0.9002, - "step": 6066 - }, - { - "epoch": 0.5471434368940794, - "grad_norm": 1.3930559234813045, - "learning_rate": 1.7921051295970399e-06, - "loss": 0.896, - "step": 6067 - }, - { - "epoch": 0.5472336204175497, - "grad_norm": 1.3869378574974771, - "learning_rate": 1.7915240926367092e-06, - "loss": 0.9395, - "step": 6068 - }, - { - "epoch": 0.54732380394102, - "grad_norm": 1.205823423630058, - "learning_rate": 1.7909430734646932e-06, - "loss": 1.0237, - "step": 6069 - }, - { - "epoch": 0.5474139874644902, - "grad_norm": 0.7415018227079577, - "learning_rate": 1.790362072130567e-06, - "loss": 0.8705, - "step": 6070 - }, - { - "epoch": 0.5475041709879604, - "grad_norm": 2.517389567130311, - "learning_rate": 1.7897810886839037e-06, - "loss": 0.9707, - "step": 6071 - }, - { - "epoch": 0.5475943545114308, - "grad_norm": 1.3460178311769548, - "learning_rate": 1.7892001231742782e-06, - "loss": 0.9877, - "step": 6072 - }, - { - "epoch": 0.547684538034901, - "grad_norm": 1.6777429724796085, - "learning_rate": 1.7886191756512594e-06, - "loss": 0.9928, - "step": 6073 - }, - { - "epoch": 0.5477747215583713, - "grad_norm": 1.2763229630972937, - "learning_rate": 1.7880382461644192e-06, - "loss": 1.0549, - "step": 6074 - }, - { - "epoch": 0.5478649050818416, - "grad_norm": 1.6667572553175483, - "learning_rate": 1.7874573347633235e-06, - "loss": 0.9567, - "step": 6075 - }, - { - "epoch": 0.5479550886053118, - "grad_norm": 1.557303523227971, - "learning_rate": 1.7868764414975408e-06, - "loss": 1.0504, - "step": 6076 - }, - { - "epoch": 0.5480452721287821, - "grad_norm": 2.667685989431291, - "learning_rate": 1.7862955664166353e-06, - "loss": 0.9226, - "step": 6077 - }, - { - "epoch": 0.5481354556522523, - "grad_norm": 1.2741186742349555, - "learning_rate": 1.78571470957017e-06, - "loss": 1.0606, - "step": 6078 - }, - { - "epoch": 0.5482256391757226, - "grad_norm": 1.5605381665804423, - "learning_rate": 1.7851338710077074e-06, - "loss": 1.0382, - "step": 6079 - }, - { - "epoch": 0.5483158226991929, - "grad_norm": 3.883143942366318, - "learning_rate": 1.7845530507788076e-06, - "loss": 1.079, - "step": 6080 - }, - { - "epoch": 0.5484060062226631, - "grad_norm": 1.501393703853302, - "learning_rate": 1.7839722489330298e-06, - "loss": 0.9494, - "step": 6081 - }, - { - "epoch": 0.5484961897461333, - "grad_norm": 1.3226568863084753, - "learning_rate": 1.7833914655199308e-06, - "loss": 0.9511, - "step": 6082 - }, - { - "epoch": 0.5485863732696037, - "grad_norm": 1.7428560214519244, - "learning_rate": 1.7828107005890658e-06, - "loss": 1.0336, - "step": 6083 - }, - { - "epoch": 0.5486765567930739, - "grad_norm": 1.5559785169476592, - "learning_rate": 1.7822299541899898e-06, - "loss": 0.8969, - "step": 6084 - }, - { - "epoch": 0.5487667403165442, - "grad_norm": 1.2920093491986613, - "learning_rate": 1.7816492263722545e-06, - "loss": 0.9796, - "step": 6085 - }, - { - "epoch": 0.5488569238400144, - "grad_norm": 1.4215149475986308, - "learning_rate": 1.781068517185412e-06, - "loss": 1.005, - "step": 6086 - }, - { - "epoch": 0.5489471073634847, - "grad_norm": 1.3936554111149981, - "learning_rate": 1.7804878266790104e-06, - "loss": 1.0422, - "step": 6087 - }, - { - "epoch": 0.549037290886955, - "grad_norm": 1.2760856101845042, - "learning_rate": 1.779907154902597e-06, - "loss": 0.9097, - "step": 6088 - }, - { - "epoch": 0.5491274744104252, - "grad_norm": 1.2081490683094611, - "learning_rate": 1.7793265019057198e-06, - "loss": 0.9041, - "step": 6089 - }, - { - "epoch": 0.5492176579338954, - "grad_norm": 0.7408021546000875, - "learning_rate": 1.7787458677379212e-06, - "loss": 0.8593, - "step": 6090 - }, - { - "epoch": 0.5493078414573658, - "grad_norm": 1.4184082863289698, - "learning_rate": 1.7781652524487463e-06, - "loss": 1.0413, - "step": 6091 - }, - { - "epoch": 0.549398024980836, - "grad_norm": 1.499499475894339, - "learning_rate": 1.777584656087735e-06, - "loss": 0.9295, - "step": 6092 - }, - { - "epoch": 0.5494882085043062, - "grad_norm": 1.4079548144621903, - "learning_rate": 1.777004078704427e-06, - "loss": 0.8919, - "step": 6093 - }, - { - "epoch": 0.5495783920277765, - "grad_norm": 1.4029629331247875, - "learning_rate": 1.7764235203483603e-06, - "loss": 0.9996, - "step": 6094 - }, - { - "epoch": 0.5496685755512468, - "grad_norm": 1.577219864022116, - "learning_rate": 1.775842981069072e-06, - "loss": 0.947, - "step": 6095 - }, - { - "epoch": 0.549758759074717, - "grad_norm": 1.3590184325062198, - "learning_rate": 1.7752624609160966e-06, - "loss": 0.9817, - "step": 6096 - }, - { - "epoch": 0.5498489425981873, - "grad_norm": 1.8687101404538464, - "learning_rate": 1.7746819599389665e-06, - "loss": 1.0139, - "step": 6097 - }, - { - "epoch": 0.5499391261216575, - "grad_norm": 1.2956836237804117, - "learning_rate": 1.774101478187215e-06, - "loss": 0.9766, - "step": 6098 - }, - { - "epoch": 0.5500293096451279, - "grad_norm": 1.547977247460306, - "learning_rate": 1.773521015710371e-06, - "loss": 1.0464, - "step": 6099 - }, - { - "epoch": 0.5501194931685981, - "grad_norm": 1.5610575311922807, - "learning_rate": 1.7729405725579614e-06, - "loss": 1.0558, - "step": 6100 - }, - { - "epoch": 0.5502096766920683, - "grad_norm": 2.264820350612944, - "learning_rate": 1.7723601487795151e-06, - "loss": 0.9536, - "step": 6101 - }, - { - "epoch": 0.5502998602155387, - "grad_norm": 1.276046747729507, - "learning_rate": 1.7717797444245557e-06, - "loss": 0.9286, - "step": 6102 - }, - { - "epoch": 0.5503900437390089, - "grad_norm": 1.2625625198294008, - "learning_rate": 1.7711993595426076e-06, - "loss": 0.9084, - "step": 6103 - }, - { - "epoch": 0.5504802272624791, - "grad_norm": 1.5603690302681983, - "learning_rate": 1.7706189941831915e-06, - "loss": 1.087, - "step": 6104 - }, - { - "epoch": 0.5505704107859494, - "grad_norm": 1.753869750868657, - "learning_rate": 1.770038648395827e-06, - "loss": 0.9396, - "step": 6105 - }, - { - "epoch": 0.5506605943094197, - "grad_norm": 0.6638277199784236, - "learning_rate": 1.7694583222300336e-06, - "loss": 0.8595, - "step": 6106 - }, - { - "epoch": 0.55075077783289, - "grad_norm": 1.9740241718300306, - "learning_rate": 1.7688780157353272e-06, - "loss": 0.9585, - "step": 6107 - }, - { - "epoch": 0.5508409613563602, - "grad_norm": 1.4556451908609582, - "learning_rate": 1.768297728961223e-06, - "loss": 0.9201, - "step": 6108 - }, - { - "epoch": 0.5509311448798304, - "grad_norm": 2.0114290788111107, - "learning_rate": 1.7677174619572342e-06, - "loss": 0.9789, - "step": 6109 - }, - { - "epoch": 0.5510213284033008, - "grad_norm": 1.5244182070981855, - "learning_rate": 1.7671372147728717e-06, - "loss": 0.9928, - "step": 6110 - }, - { - "epoch": 0.551111511926771, - "grad_norm": 2.2634432054274205, - "learning_rate": 1.7665569874576471e-06, - "loss": 1.0449, - "step": 6111 - }, - { - "epoch": 0.5512016954502412, - "grad_norm": 1.5712317058557366, - "learning_rate": 1.7659767800610664e-06, - "loss": 0.979, - "step": 6112 - }, - { - "epoch": 0.5512918789737115, - "grad_norm": 1.4061929055385591, - "learning_rate": 1.7653965926326379e-06, - "loss": 0.8841, - "step": 6113 - }, - { - "epoch": 0.5513820624971818, - "grad_norm": 1.2384167906891999, - "learning_rate": 1.764816425221866e-06, - "loss": 0.9402, - "step": 6114 - }, - { - "epoch": 0.551472246020652, - "grad_norm": 1.2540522695352705, - "learning_rate": 1.7642362778782524e-06, - "loss": 0.8967, - "step": 6115 - }, - { - "epoch": 0.5515624295441223, - "grad_norm": 1.9630333388340044, - "learning_rate": 1.7636561506513005e-06, - "loss": 0.9472, - "step": 6116 - }, - { - "epoch": 0.5516526130675925, - "grad_norm": 1.1107202888960863, - "learning_rate": 1.7630760435905083e-06, - "loss": 0.9503, - "step": 6117 - }, - { - "epoch": 0.5517427965910628, - "grad_norm": 2.556608172182303, - "learning_rate": 1.762495956745375e-06, - "loss": 0.9343, - "step": 6118 - }, - { - "epoch": 0.5518329801145331, - "grad_norm": 1.7296031569512957, - "learning_rate": 1.7619158901653962e-06, - "loss": 1.0264, - "step": 6119 - }, - { - "epoch": 0.5519231636380033, - "grad_norm": 1.6059465648649895, - "learning_rate": 1.761335843900066e-06, - "loss": 0.9762, - "step": 6120 - }, - { - "epoch": 0.5520133471614735, - "grad_norm": 1.4647246859163796, - "learning_rate": 1.7607558179988785e-06, - "loss": 0.9627, - "step": 6121 - }, - { - "epoch": 0.5521035306849439, - "grad_norm": 1.3871103512871459, - "learning_rate": 1.760175812511323e-06, - "loss": 0.9112, - "step": 6122 - }, - { - "epoch": 0.5521937142084141, - "grad_norm": 1.474319611431114, - "learning_rate": 1.75959582748689e-06, - "loss": 0.9315, - "step": 6123 - }, - { - "epoch": 0.5522838977318844, - "grad_norm": 1.3535878876391048, - "learning_rate": 1.7590158629750657e-06, - "loss": 1.0087, - "step": 6124 - }, - { - "epoch": 0.5523740812553547, - "grad_norm": 1.2196872237545076, - "learning_rate": 1.7584359190253376e-06, - "loss": 1.032, - "step": 6125 - }, - { - "epoch": 0.5524642647788249, - "grad_norm": 1.8130205656506404, - "learning_rate": 1.7578559956871892e-06, - "loss": 1.0408, - "step": 6126 - }, - { - "epoch": 0.5525544483022952, - "grad_norm": 1.3179311551360886, - "learning_rate": 1.7572760930101012e-06, - "loss": 0.975, - "step": 6127 - }, - { - "epoch": 0.5526446318257654, - "grad_norm": 0.7120183755775087, - "learning_rate": 1.7566962110435563e-06, - "loss": 0.9137, - "step": 6128 - }, - { - "epoch": 0.5527348153492357, - "grad_norm": 1.2239092661444109, - "learning_rate": 1.7561163498370313e-06, - "loss": 0.9918, - "step": 6129 - }, - { - "epoch": 0.552824998872706, - "grad_norm": 1.4985236213764674, - "learning_rate": 1.755536509440005e-06, - "loss": 0.9403, - "step": 6130 - }, - { - "epoch": 0.5529151823961762, - "grad_norm": 1.8935366822006907, - "learning_rate": 1.7549566899019519e-06, - "loss": 0.9742, - "step": 6131 - }, - { - "epoch": 0.5530053659196464, - "grad_norm": 1.5544226339332663, - "learning_rate": 1.754376891272344e-06, - "loss": 0.9926, - "step": 6132 - }, - { - "epoch": 0.5530955494431168, - "grad_norm": 1.2541624383865677, - "learning_rate": 1.753797113600655e-06, - "loss": 0.9294, - "step": 6133 - }, - { - "epoch": 0.553185732966587, - "grad_norm": 1.6367234842236498, - "learning_rate": 1.7532173569363535e-06, - "loss": 1.1154, - "step": 6134 - }, - { - "epoch": 0.5532759164900573, - "grad_norm": 1.6557164322406477, - "learning_rate": 1.7526376213289077e-06, - "loss": 0.7996, - "step": 6135 - }, - { - "epoch": 0.5533661000135275, - "grad_norm": 1.3862503765164165, - "learning_rate": 1.7520579068277844e-06, - "loss": 0.9582, - "step": 6136 - }, - { - "epoch": 0.5534562835369978, - "grad_norm": 1.486986853940994, - "learning_rate": 1.7514782134824472e-06, - "loss": 1.0076, - "step": 6137 - }, - { - "epoch": 0.5535464670604681, - "grad_norm": 1.3204345646699784, - "learning_rate": 1.7508985413423599e-06, - "loss": 0.9722, - "step": 6138 - }, - { - "epoch": 0.5536366505839383, - "grad_norm": 2.284657480958109, - "learning_rate": 1.7503188904569814e-06, - "loss": 1.0441, - "step": 6139 - }, - { - "epoch": 0.5537268341074085, - "grad_norm": 2.039937500276761, - "learning_rate": 1.7497392608757728e-06, - "loss": 0.9453, - "step": 6140 - }, - { - "epoch": 0.5538170176308789, - "grad_norm": 1.1618378137605991, - "learning_rate": 1.7491596526481897e-06, - "loss": 0.9992, - "step": 6141 - }, - { - "epoch": 0.5539072011543491, - "grad_norm": 1.4089185191163154, - "learning_rate": 1.7485800658236888e-06, - "loss": 0.907, - "step": 6142 - }, - { - "epoch": 0.5539973846778193, - "grad_norm": 1.4268843890368708, - "learning_rate": 1.7480005004517232e-06, - "loss": 1.0412, - "step": 6143 - }, - { - "epoch": 0.5540875682012896, - "grad_norm": 1.3307166002530004, - "learning_rate": 1.7474209565817435e-06, - "loss": 1.0398, - "step": 6144 - }, - { - "epoch": 0.5541777517247599, - "grad_norm": 1.2713855523352338, - "learning_rate": 1.7468414342632014e-06, - "loss": 0.9803, - "step": 6145 - }, - { - "epoch": 0.5542679352482301, - "grad_norm": 1.696263411376829, - "learning_rate": 1.746261933545543e-06, - "loss": 1.0328, - "step": 6146 - }, - { - "epoch": 0.5543581187717004, - "grad_norm": 1.4559614329470152, - "learning_rate": 1.7456824544782165e-06, - "loss": 0.9789, - "step": 6147 - }, - { - "epoch": 0.5544483022951707, - "grad_norm": 1.3463722636066027, - "learning_rate": 1.7451029971106653e-06, - "loss": 0.9803, - "step": 6148 - }, - { - "epoch": 0.554538485818641, - "grad_norm": 1.2658262210585032, - "learning_rate": 1.7445235614923313e-06, - "loss": 1.0077, - "step": 6149 - }, - { - "epoch": 0.5546286693421112, - "grad_norm": 1.4834842619387378, - "learning_rate": 1.7439441476726556e-06, - "loss": 0.9698, - "step": 6150 - }, - { - "epoch": 0.5547188528655814, - "grad_norm": 1.488739984256034, - "learning_rate": 1.7433647557010776e-06, - "loss": 0.9874, - "step": 6151 - }, - { - "epoch": 0.5548090363890518, - "grad_norm": 1.401503074517418, - "learning_rate": 1.7427853856270338e-06, - "loss": 0.9459, - "step": 6152 - }, - { - "epoch": 0.554899219912522, - "grad_norm": 1.3137240378793509, - "learning_rate": 1.7422060374999593e-06, - "loss": 0.9357, - "step": 6153 - }, - { - "epoch": 0.5549894034359922, - "grad_norm": 1.4671242014725225, - "learning_rate": 1.7416267113692862e-06, - "loss": 0.9368, - "step": 6154 - }, - { - "epoch": 0.5550795869594625, - "grad_norm": 1.7389176136131936, - "learning_rate": 1.7410474072844475e-06, - "loss": 1.0103, - "step": 6155 - }, - { - "epoch": 0.5551697704829328, - "grad_norm": 1.3205879872050852, - "learning_rate": 1.740468125294871e-06, - "loss": 0.9675, - "step": 6156 - }, - { - "epoch": 0.555259954006403, - "grad_norm": 1.7471606517483096, - "learning_rate": 1.739888865449986e-06, - "loss": 0.8864, - "step": 6157 - }, - { - "epoch": 0.5553501375298733, - "grad_norm": 1.6464148673050796, - "learning_rate": 1.7393096277992174e-06, - "loss": 1.0081, - "step": 6158 - }, - { - "epoch": 0.5554403210533435, - "grad_norm": 1.46441920931319, - "learning_rate": 1.738730412391988e-06, - "loss": 0.9898, - "step": 6159 - }, - { - "epoch": 0.5555305045768139, - "grad_norm": 4.208868590707338, - "learning_rate": 1.738151219277721e-06, - "loss": 1.0272, - "step": 6160 - }, - { - "epoch": 0.5556206881002841, - "grad_norm": 2.507653727511446, - "learning_rate": 1.7375720485058349e-06, - "loss": 0.9348, - "step": 6161 - }, - { - "epoch": 0.5557108716237543, - "grad_norm": 1.6992534063904767, - "learning_rate": 1.7369929001257498e-06, - "loss": 0.9969, - "step": 6162 - }, - { - "epoch": 0.5558010551472246, - "grad_norm": 2.726588732748984, - "learning_rate": 1.73641377418688e-06, - "loss": 0.893, - "step": 6163 - }, - { - "epoch": 0.5558912386706949, - "grad_norm": 1.3765745251202872, - "learning_rate": 1.7358346707386408e-06, - "loss": 0.9141, - "step": 6164 - }, - { - "epoch": 0.5559814221941651, - "grad_norm": 1.425387477061088, - "learning_rate": 1.7352555898304439e-06, - "loss": 1.0217, - "step": 6165 - }, - { - "epoch": 0.5560716057176354, - "grad_norm": 1.5957911267521658, - "learning_rate": 1.7346765315116996e-06, - "loss": 0.8916, - "step": 6166 - }, - { - "epoch": 0.5561617892411056, - "grad_norm": 1.4104922837347535, - "learning_rate": 1.734097495831817e-06, - "loss": 0.9417, - "step": 6167 - }, - { - "epoch": 0.5562519727645759, - "grad_norm": 2.5724801891516833, - "learning_rate": 1.7335184828402015e-06, - "loss": 0.9908, - "step": 6168 - }, - { - "epoch": 0.5563421562880462, - "grad_norm": 1.4129547079296143, - "learning_rate": 1.7329394925862595e-06, - "loss": 0.9504, - "step": 6169 - }, - { - "epoch": 0.5564323398115164, - "grad_norm": 1.4075917095166741, - "learning_rate": 1.7323605251193922e-06, - "loss": 1.0221, - "step": 6170 - }, - { - "epoch": 0.5565225233349868, - "grad_norm": 1.6666225030134025, - "learning_rate": 1.7317815804890001e-06, - "loss": 0.9993, - "step": 6171 - }, - { - "epoch": 0.556612706858457, - "grad_norm": 2.3860602586464252, - "learning_rate": 1.731202658744483e-06, - "loss": 0.9357, - "step": 6172 - }, - { - "epoch": 0.5567028903819272, - "grad_norm": 2.220329474666816, - "learning_rate": 1.7306237599352365e-06, - "loss": 1.0352, - "step": 6173 - }, - { - "epoch": 0.5567930739053975, - "grad_norm": 1.384163148398218, - "learning_rate": 1.730044884110657e-06, - "loss": 0.9646, - "step": 6174 - }, - { - "epoch": 0.5568832574288678, - "grad_norm": 1.5857425062449644, - "learning_rate": 1.7294660313201366e-06, - "loss": 0.9386, - "step": 6175 - }, - { - "epoch": 0.556973440952338, - "grad_norm": 2.0287869118313444, - "learning_rate": 1.7288872016130652e-06, - "loss": 0.9496, - "step": 6176 - }, - { - "epoch": 0.5570636244758083, - "grad_norm": 3.6620154935883003, - "learning_rate": 1.7283083950388334e-06, - "loss": 0.8909, - "step": 6177 - }, - { - "epoch": 0.5571538079992785, - "grad_norm": 1.5014620649110464, - "learning_rate": 1.727729611646827e-06, - "loss": 0.8799, - "step": 6178 - }, - { - "epoch": 0.5572439915227488, - "grad_norm": 1.3236751734808379, - "learning_rate": 1.7271508514864318e-06, - "loss": 1.0333, - "step": 6179 - }, - { - "epoch": 0.5573341750462191, - "grad_norm": 1.5296516640257793, - "learning_rate": 1.7265721146070302e-06, - "loss": 0.9821, - "step": 6180 - }, - { - "epoch": 0.5574243585696893, - "grad_norm": 1.434422723645252, - "learning_rate": 1.7259934010580035e-06, - "loss": 0.977, - "step": 6181 - }, - { - "epoch": 0.5575145420931595, - "grad_norm": 1.3830189523839498, - "learning_rate": 1.725414710888731e-06, - "loss": 0.926, - "step": 6182 - }, - { - "epoch": 0.5576047256166299, - "grad_norm": 1.342779873919233, - "learning_rate": 1.7248360441485885e-06, - "loss": 0.9612, - "step": 6183 - }, - { - "epoch": 0.5576949091401001, - "grad_norm": 1.4788329415104615, - "learning_rate": 1.7242574008869528e-06, - "loss": 1.0481, - "step": 6184 - }, - { - "epoch": 0.5577850926635703, - "grad_norm": 0.7217258959934868, - "learning_rate": 1.7236787811531951e-06, - "loss": 0.9063, - "step": 6185 - }, - { - "epoch": 0.5578752761870406, - "grad_norm": 1.439152389850202, - "learning_rate": 1.7231001849966887e-06, - "loss": 0.9447, - "step": 6186 - }, - { - "epoch": 0.5579654597105109, - "grad_norm": 0.6548453011081378, - "learning_rate": 1.722521612466801e-06, - "loss": 0.8719, - "step": 6187 - }, - { - "epoch": 0.5580556432339812, - "grad_norm": 1.3880007123890348, - "learning_rate": 1.7219430636128984e-06, - "loss": 0.9218, - "step": 6188 - }, - { - "epoch": 0.5581458267574514, - "grad_norm": 1.300467513456333, - "learning_rate": 1.7213645384843479e-06, - "loss": 0.9219, - "step": 6189 - }, - { - "epoch": 0.5582360102809216, - "grad_norm": 1.325673207616428, - "learning_rate": 1.7207860371305108e-06, - "loss": 0.9642, - "step": 6190 - }, - { - "epoch": 0.558326193804392, - "grad_norm": 1.6121366194910947, - "learning_rate": 1.7202075596007487e-06, - "loss": 0.9433, - "step": 6191 - }, - { - "epoch": 0.5584163773278622, - "grad_norm": 1.4249943482136327, - "learning_rate": 1.7196291059444206e-06, - "loss": 0.9378, - "step": 6192 - }, - { - "epoch": 0.5585065608513324, - "grad_norm": 1.4676187907137048, - "learning_rate": 1.7190506762108828e-06, - "loss": 0.9619, - "step": 6193 - }, - { - "epoch": 0.5585967443748028, - "grad_norm": 1.8169472165710197, - "learning_rate": 1.7184722704494907e-06, - "loss": 0.9756, - "step": 6194 - }, - { - "epoch": 0.558686927898273, - "grad_norm": 1.4163491323309167, - "learning_rate": 1.717893888709596e-06, - "loss": 0.987, - "step": 6195 - }, - { - "epoch": 0.5587771114217432, - "grad_norm": 1.5054258917511714, - "learning_rate": 1.7173155310405515e-06, - "loss": 0.9981, - "step": 6196 - }, - { - "epoch": 0.5588672949452135, - "grad_norm": 1.232334962128882, - "learning_rate": 1.7167371974917043e-06, - "loss": 1.0533, - "step": 6197 - }, - { - "epoch": 0.5589574784686838, - "grad_norm": 1.4274332587411576, - "learning_rate": 1.7161588881124003e-06, - "loss": 0.998, - "step": 6198 - }, - { - "epoch": 0.559047661992154, - "grad_norm": 5.992655709686147, - "learning_rate": 1.7155806029519861e-06, - "loss": 0.9666, - "step": 6199 - }, - { - "epoch": 0.5591378455156243, - "grad_norm": 1.6238064417378977, - "learning_rate": 1.7150023420598023e-06, - "loss": 1.0324, - "step": 6200 - }, - { - "epoch": 0.5592280290390945, - "grad_norm": 1.2138139094647076, - "learning_rate": 1.714424105485191e-06, - "loss": 1.0259, - "step": 6201 - }, - { - "epoch": 0.5593182125625649, - "grad_norm": 1.6922639781968019, - "learning_rate": 1.7138458932774896e-06, - "loss": 0.9354, - "step": 6202 - }, - { - "epoch": 0.5594083960860351, - "grad_norm": 0.6763596457474828, - "learning_rate": 1.7132677054860335e-06, - "loss": 0.8727, - "step": 6203 - }, - { - "epoch": 0.5594985796095053, - "grad_norm": 1.3271080162511624, - "learning_rate": 1.7126895421601586e-06, - "loss": 1.0016, - "step": 6204 - }, - { - "epoch": 0.5595887631329756, - "grad_norm": 1.4767732823003956, - "learning_rate": 1.712111403349196e-06, - "loss": 0.9203, - "step": 6205 - }, - { - "epoch": 0.5596789466564459, - "grad_norm": 1.1881308636628511, - "learning_rate": 1.7115332891024757e-06, - "loss": 1.0039, - "step": 6206 - }, - { - "epoch": 0.5597691301799161, - "grad_norm": 0.7557082702846458, - "learning_rate": 1.7109551994693257e-06, - "loss": 0.9293, - "step": 6207 - }, - { - "epoch": 0.5598593137033864, - "grad_norm": 1.5814062320289728, - "learning_rate": 1.7103771344990725e-06, - "loss": 0.9171, - "step": 6208 - }, - { - "epoch": 0.5599494972268566, - "grad_norm": 1.4050935440553705, - "learning_rate": 1.709799094241039e-06, - "loss": 0.8949, - "step": 6209 - }, - { - "epoch": 0.560039680750327, - "grad_norm": 1.4503955433107425, - "learning_rate": 1.709221078744546e-06, - "loss": 1.0067, - "step": 6210 - }, - { - "epoch": 0.5601298642737972, - "grad_norm": 1.2787389572427452, - "learning_rate": 1.7086430880589148e-06, - "loss": 0.9858, - "step": 6211 - }, - { - "epoch": 0.5602200477972674, - "grad_norm": 1.8319704741135108, - "learning_rate": 1.7080651222334612e-06, - "loss": 0.9835, - "step": 6212 - }, - { - "epoch": 0.5603102313207377, - "grad_norm": 2.9041630624961288, - "learning_rate": 1.7074871813175018e-06, - "loss": 1.0264, - "step": 6213 - }, - { - "epoch": 0.560400414844208, - "grad_norm": 1.261231555709341, - "learning_rate": 1.706909265360349e-06, - "loss": 0.8892, - "step": 6214 - }, - { - "epoch": 0.5604905983676782, - "grad_norm": 0.7252778211567885, - "learning_rate": 1.7063313744113128e-06, - "loss": 0.8443, - "step": 6215 - }, - { - "epoch": 0.5605807818911485, - "grad_norm": 0.657736298594192, - "learning_rate": 1.7057535085197042e-06, - "loss": 0.8248, - "step": 6216 - }, - { - "epoch": 0.5606709654146187, - "grad_norm": 1.7273879343607639, - "learning_rate": 1.705175667734828e-06, - "loss": 0.8917, - "step": 6217 - }, - { - "epoch": 0.560761148938089, - "grad_norm": 3.059663354145067, - "learning_rate": 1.7045978521059894e-06, - "loss": 0.9423, - "step": 6218 - }, - { - "epoch": 0.5608513324615593, - "grad_norm": 1.25994502719667, - "learning_rate": 1.7040200616824914e-06, - "loss": 0.9719, - "step": 6219 - }, - { - "epoch": 0.5609415159850295, - "grad_norm": 1.470385513357584, - "learning_rate": 1.7034422965136333e-06, - "loss": 0.9287, - "step": 6220 - }, - { - "epoch": 0.5610316995084998, - "grad_norm": 1.4056004189546554, - "learning_rate": 1.7028645566487137e-06, - "loss": 1.0192, - "step": 6221 - }, - { - "epoch": 0.5611218830319701, - "grad_norm": 1.4157063745011182, - "learning_rate": 1.7022868421370284e-06, - "loss": 0.9535, - "step": 6222 - }, - { - "epoch": 0.5612120665554403, - "grad_norm": 1.5762393374464343, - "learning_rate": 1.701709153027872e-06, - "loss": 1.0192, - "step": 6223 - }, - { - "epoch": 0.5613022500789105, - "grad_norm": 3.851614992352741, - "learning_rate": 1.7011314893705353e-06, - "loss": 0.9425, - "step": 6224 - }, - { - "epoch": 0.5613924336023809, - "grad_norm": 2.896200874843851, - "learning_rate": 1.700553851214307e-06, - "loss": 0.9778, - "step": 6225 - }, - { - "epoch": 0.5614826171258511, - "grad_norm": 1.3428683382657534, - "learning_rate": 1.699976238608476e-06, - "loss": 0.8428, - "step": 6226 - }, - { - "epoch": 0.5615728006493214, - "grad_norm": 1.8439758938270807, - "learning_rate": 1.699398651602326e-06, - "loss": 0.9234, - "step": 6227 - }, - { - "epoch": 0.5616629841727916, - "grad_norm": 1.4129355138978268, - "learning_rate": 1.6988210902451413e-06, - "loss": 1.0214, - "step": 6228 - }, - { - "epoch": 0.5617531676962619, - "grad_norm": 1.4561483454116944, - "learning_rate": 1.6982435545862011e-06, - "loss": 0.9502, - "step": 6229 - }, - { - "epoch": 0.5618433512197322, - "grad_norm": 1.5752403408929503, - "learning_rate": 1.6976660446747853e-06, - "loss": 0.9724, - "step": 6230 - }, - { - "epoch": 0.5619335347432024, - "grad_norm": 7.299889540163193, - "learning_rate": 1.6970885605601696e-06, - "loss": 0.8994, - "step": 6231 - }, - { - "epoch": 0.5620237182666726, - "grad_norm": 13.298381070149631, - "learning_rate": 1.6965111022916282e-06, - "loss": 1.0855, - "step": 6232 - }, - { - "epoch": 0.562113901790143, - "grad_norm": 2.0313098605212065, - "learning_rate": 1.6959336699184323e-06, - "loss": 0.9463, - "step": 6233 - }, - { - "epoch": 0.5622040853136132, - "grad_norm": 1.2379281295627413, - "learning_rate": 1.6953562634898529e-06, - "loss": 0.9634, - "step": 6234 - }, - { - "epoch": 0.5622942688370834, - "grad_norm": 1.836409942225965, - "learning_rate": 1.6947788830551569e-06, - "loss": 0.9442, - "step": 6235 - }, - { - "epoch": 0.5623844523605537, - "grad_norm": 0.7136177013195701, - "learning_rate": 1.6942015286636093e-06, - "loss": 0.8983, - "step": 6236 - }, - { - "epoch": 0.562474635884024, - "grad_norm": 1.4437821818146221, - "learning_rate": 1.6936242003644735e-06, - "loss": 0.9543, - "step": 6237 - }, - { - "epoch": 0.5625648194074943, - "grad_norm": 1.6083925081478443, - "learning_rate": 1.6930468982070106e-06, - "loss": 1.0445, - "step": 6238 - }, - { - "epoch": 0.5626550029309645, - "grad_norm": 0.6446269802324209, - "learning_rate": 1.692469622240478e-06, - "loss": 0.7967, - "step": 6239 - }, - { - "epoch": 0.5627451864544347, - "grad_norm": 3.106204769795355, - "learning_rate": 1.6918923725141339e-06, - "loss": 0.931, - "step": 6240 - }, - { - "epoch": 0.5628353699779051, - "grad_norm": 1.2652380551940967, - "learning_rate": 1.6913151490772312e-06, - "loss": 1.0241, - "step": 6241 - }, - { - "epoch": 0.5629255535013753, - "grad_norm": 1.2415780065659365, - "learning_rate": 1.6907379519790215e-06, - "loss": 1.0171, - "step": 6242 - }, - { - "epoch": 0.5630157370248455, - "grad_norm": 1.24158597573036, - "learning_rate": 1.6901607812687558e-06, - "loss": 0.8804, - "step": 6243 - }, - { - "epoch": 0.5631059205483159, - "grad_norm": 1.3222905527128908, - "learning_rate": 1.6895836369956794e-06, - "loss": 0.9317, - "step": 6244 - }, - { - "epoch": 0.5631961040717861, - "grad_norm": 0.7081356801290716, - "learning_rate": 1.6890065192090402e-06, - "loss": 0.8177, - "step": 6245 - }, - { - "epoch": 0.5632862875952563, - "grad_norm": 1.6862736943915275, - "learning_rate": 1.6884294279580793e-06, - "loss": 0.8779, - "step": 6246 - }, - { - "epoch": 0.5633764711187266, - "grad_norm": 2.5007638717948493, - "learning_rate": 1.6878523632920371e-06, - "loss": 0.995, - "step": 6247 - }, - { - "epoch": 0.5634666546421969, - "grad_norm": 1.7195936733286525, - "learning_rate": 1.6872753252601525e-06, - "loss": 1.0386, - "step": 6248 - }, - { - "epoch": 0.5635568381656672, - "grad_norm": 1.5085329696446017, - "learning_rate": 1.6866983139116616e-06, - "loss": 0.9178, - "step": 6249 - }, - { - "epoch": 0.5636470216891374, - "grad_norm": 1.405421373163586, - "learning_rate": 1.6861213292957981e-06, - "loss": 0.9456, - "step": 6250 - }, - { - "epoch": 0.5637372052126076, - "grad_norm": 1.3402225859659158, - "learning_rate": 1.685544371461793e-06, - "loss": 1.0057, - "step": 6251 - }, - { - "epoch": 0.563827388736078, - "grad_norm": 1.5810710489087414, - "learning_rate": 1.6849674404588767e-06, - "loss": 0.9922, - "step": 6252 - }, - { - "epoch": 0.5639175722595482, - "grad_norm": 1.644469053854676, - "learning_rate": 1.6843905363362758e-06, - "loss": 0.9984, - "step": 6253 - }, - { - "epoch": 0.5640077557830184, - "grad_norm": 1.742428202743955, - "learning_rate": 1.6838136591432136e-06, - "loss": 0.9184, - "step": 6254 - }, - { - "epoch": 0.5640979393064887, - "grad_norm": 1.5120976094069276, - "learning_rate": 1.6832368089289139e-06, - "loss": 0.9943, - "step": 6255 - }, - { - "epoch": 0.564188122829959, - "grad_norm": 1.3110691172713802, - "learning_rate": 1.682659985742596e-06, - "loss": 0.9874, - "step": 6256 - }, - { - "epoch": 0.5642783063534292, - "grad_norm": 1.4626225020789358, - "learning_rate": 1.6820831896334782e-06, - "loss": 1.0178, - "step": 6257 - }, - { - "epoch": 0.5643684898768995, - "grad_norm": 1.426934097543292, - "learning_rate": 1.681506420650776e-06, - "loss": 0.9961, - "step": 6258 - }, - { - "epoch": 0.5644586734003697, - "grad_norm": 1.1994568032440813, - "learning_rate": 1.680929678843701e-06, - "loss": 0.9639, - "step": 6259 - }, - { - "epoch": 0.56454885692384, - "grad_norm": 1.5113652725367317, - "learning_rate": 1.6803529642614662e-06, - "loss": 1.0249, - "step": 6260 - }, - { - "epoch": 0.5646390404473103, - "grad_norm": 1.5284455756873172, - "learning_rate": 1.6797762769532785e-06, - "loss": 0.9803, - "step": 6261 - }, - { - "epoch": 0.5647292239707805, - "grad_norm": 1.3732870876679937, - "learning_rate": 1.679199616968345e-06, - "loss": 1.007, - "step": 6262 - }, - { - "epoch": 0.5648194074942507, - "grad_norm": 2.184949641574895, - "learning_rate": 1.6786229843558689e-06, - "loss": 1.0028, - "step": 6263 - }, - { - "epoch": 0.5649095910177211, - "grad_norm": 5.844251315395326, - "learning_rate": 1.6780463791650514e-06, - "loss": 0.9596, - "step": 6264 - }, - { - "epoch": 0.5649997745411913, - "grad_norm": 1.634098550380967, - "learning_rate": 1.6774698014450928e-06, - "loss": 0.9225, - "step": 6265 - }, - { - "epoch": 0.5650899580646616, - "grad_norm": 0.6922345776188344, - "learning_rate": 1.6768932512451883e-06, - "loss": 0.8405, - "step": 6266 - }, - { - "epoch": 0.5651801415881319, - "grad_norm": 1.2480660737042992, - "learning_rate": 1.676316728614534e-06, - "loss": 0.9269, - "step": 6267 - }, - { - "epoch": 0.5652703251116021, - "grad_norm": 0.5760296662260791, - "learning_rate": 1.675740233602321e-06, - "loss": 0.8386, - "step": 6268 - }, - { - "epoch": 0.5653605086350724, - "grad_norm": 1.3084111328237977, - "learning_rate": 1.6751637662577385e-06, - "loss": 1.0182, - "step": 6269 - }, - { - "epoch": 0.5654506921585426, - "grad_norm": 0.6720204306828912, - "learning_rate": 1.6745873266299753e-06, - "loss": 0.774, - "step": 6270 - }, - { - "epoch": 0.565540875682013, - "grad_norm": 1.474551329828194, - "learning_rate": 1.6740109147682148e-06, - "loss": 0.9397, - "step": 6271 - }, - { - "epoch": 0.5656310592054832, - "grad_norm": 1.5047517059666347, - "learning_rate": 1.6734345307216418e-06, - "loss": 0.9913, - "step": 6272 - }, - { - "epoch": 0.5657212427289534, - "grad_norm": 1.7671474543316843, - "learning_rate": 1.6728581745394346e-06, - "loss": 0.9456, - "step": 6273 - }, - { - "epoch": 0.5658114262524236, - "grad_norm": 1.4199608295034827, - "learning_rate": 1.672281846270772e-06, - "loss": 1.0487, - "step": 6274 - }, - { - "epoch": 0.565901609775894, - "grad_norm": 1.5466615163509077, - "learning_rate": 1.6717055459648295e-06, - "loss": 0.9754, - "step": 6275 - }, - { - "epoch": 0.5659917932993642, - "grad_norm": 1.6633551441870011, - "learning_rate": 1.6711292736707793e-06, - "loss": 0.927, - "step": 6276 - }, - { - "epoch": 0.5660819768228345, - "grad_norm": 1.5803731635807987, - "learning_rate": 1.6705530294377938e-06, - "loss": 1.0431, - "step": 6277 - }, - { - "epoch": 0.5661721603463047, - "grad_norm": 1.354293841723623, - "learning_rate": 1.6699768133150395e-06, - "loss": 0.9691, - "step": 6278 - }, - { - "epoch": 0.566262343869775, - "grad_norm": 1.5636856920907385, - "learning_rate": 1.6694006253516837e-06, - "loss": 0.9643, - "step": 6279 - }, - { - "epoch": 0.5663525273932453, - "grad_norm": 1.6426131781104831, - "learning_rate": 1.6688244655968896e-06, - "loss": 0.9773, - "step": 6280 - }, - { - "epoch": 0.5664427109167155, - "grad_norm": 1.6305089151418883, - "learning_rate": 1.6682483340998175e-06, - "loss": 0.8927, - "step": 6281 - }, - { - "epoch": 0.5665328944401857, - "grad_norm": 1.4474218698607522, - "learning_rate": 1.6676722309096276e-06, - "loss": 0.9344, - "step": 6282 - }, - { - "epoch": 0.5666230779636561, - "grad_norm": 0.752535270168512, - "learning_rate": 1.6670961560754744e-06, - "loss": 0.8976, - "step": 6283 - }, - { - "epoch": 0.5667132614871263, - "grad_norm": 1.7651772395504588, - "learning_rate": 1.6665201096465138e-06, - "loss": 0.9683, - "step": 6284 - }, - { - "epoch": 0.5668034450105965, - "grad_norm": 1.6092227752691504, - "learning_rate": 1.6659440916718961e-06, - "loss": 1.0223, - "step": 6285 - }, - { - "epoch": 0.5668936285340668, - "grad_norm": 1.5972293945741174, - "learning_rate": 1.6653681022007696e-06, - "loss": 0.989, - "step": 6286 - }, - { - "epoch": 0.5669838120575371, - "grad_norm": 1.8548987868425413, - "learning_rate": 1.6647921412822825e-06, - "loss": 0.9043, - "step": 6287 - }, - { - "epoch": 0.5670739955810074, - "grad_norm": 2.210935882459932, - "learning_rate": 1.6642162089655782e-06, - "loss": 0.9751, - "step": 6288 - }, - { - "epoch": 0.5671641791044776, - "grad_norm": 1.7976559476093699, - "learning_rate": 1.663640305299798e-06, - "loss": 0.9344, - "step": 6289 - }, - { - "epoch": 0.5672543626279479, - "grad_norm": 0.6946138236324173, - "learning_rate": 1.6630644303340824e-06, - "loss": 0.8721, - "step": 6290 - }, - { - "epoch": 0.5673445461514182, - "grad_norm": 1.4340249226531188, - "learning_rate": 1.662488584117567e-06, - "loss": 0.9857, - "step": 6291 - }, - { - "epoch": 0.5674347296748884, - "grad_norm": 1.3012139686044604, - "learning_rate": 1.6619127666993867e-06, - "loss": 1.0249, - "step": 6292 - }, - { - "epoch": 0.5675249131983586, - "grad_norm": 1.829026089815381, - "learning_rate": 1.6613369781286727e-06, - "loss": 0.9803, - "step": 6293 - }, - { - "epoch": 0.567615096721829, - "grad_norm": 1.5466929627243935, - "learning_rate": 1.6607612184545562e-06, - "loss": 0.9068, - "step": 6294 - }, - { - "epoch": 0.5677052802452992, - "grad_norm": 1.564721245238948, - "learning_rate": 1.6601854877261617e-06, - "loss": 1.0318, - "step": 6295 - }, - { - "epoch": 0.5677954637687694, - "grad_norm": 1.3919683675707015, - "learning_rate": 1.6596097859926163e-06, - "loss": 0.9661, - "step": 6296 - }, - { - "epoch": 0.5678856472922397, - "grad_norm": 1.497950106881093, - "learning_rate": 1.6590341133030407e-06, - "loss": 1.0535, - "step": 6297 - }, - { - "epoch": 0.56797583081571, - "grad_norm": 2.1628153267874874, - "learning_rate": 1.658458469706554e-06, - "loss": 1.0522, - "step": 6298 - }, - { - "epoch": 0.5680660143391802, - "grad_norm": 1.349285125425052, - "learning_rate": 1.6578828552522746e-06, - "loss": 0.9348, - "step": 6299 - }, - { - "epoch": 0.5681561978626505, - "grad_norm": 1.4893296121267916, - "learning_rate": 1.6573072699893156e-06, - "loss": 1.0209, - "step": 6300 - }, - { - "epoch": 0.5682463813861207, - "grad_norm": 1.3871602819582196, - "learning_rate": 1.6567317139667906e-06, - "loss": 0.9417, - "step": 6301 - }, - { - "epoch": 0.5683365649095911, - "grad_norm": 1.5188914229241564, - "learning_rate": 1.6561561872338087e-06, - "loss": 0.9877, - "step": 6302 - }, - { - "epoch": 0.5684267484330613, - "grad_norm": 1.5661515863649307, - "learning_rate": 1.6555806898394764e-06, - "loss": 0.8898, - "step": 6303 - }, - { - "epoch": 0.5685169319565315, - "grad_norm": 1.5492967086775744, - "learning_rate": 1.6550052218328987e-06, - "loss": 0.944, - "step": 6304 - }, - { - "epoch": 0.5686071154800018, - "grad_norm": 1.3201405791360405, - "learning_rate": 1.6544297832631777e-06, - "loss": 0.9502, - "step": 6305 - }, - { - "epoch": 0.5686972990034721, - "grad_norm": 1.944004768283318, - "learning_rate": 1.6538543741794135e-06, - "loss": 0.9769, - "step": 6306 - }, - { - "epoch": 0.5687874825269423, - "grad_norm": 1.3189123731691788, - "learning_rate": 1.6532789946307028e-06, - "loss": 0.944, - "step": 6307 - }, - { - "epoch": 0.5688776660504126, - "grad_norm": 0.6604627315408568, - "learning_rate": 1.6527036446661393e-06, - "loss": 0.9007, - "step": 6308 - }, - { - "epoch": 0.5689678495738828, - "grad_norm": 1.4802124284673746, - "learning_rate": 1.6521283243348165e-06, - "loss": 1.01, - "step": 6309 - }, - { - "epoch": 0.5690580330973531, - "grad_norm": 1.7919703677145695, - "learning_rate": 1.6515530336858227e-06, - "loss": 0.9711, - "step": 6310 - }, - { - "epoch": 0.5691482166208234, - "grad_norm": 1.3773387612410983, - "learning_rate": 1.6509777727682457e-06, - "loss": 1.0177, - "step": 6311 - }, - { - "epoch": 0.5692384001442936, - "grad_norm": 1.2208141062831452, - "learning_rate": 1.65040254163117e-06, - "loss": 1.0101, - "step": 6312 - }, - { - "epoch": 0.569328583667764, - "grad_norm": 6.526333213031613, - "learning_rate": 1.649827340323676e-06, - "loss": 0.8767, - "step": 6313 - }, - { - "epoch": 0.5694187671912342, - "grad_norm": 2.0232066608266517, - "learning_rate": 1.6492521688948454e-06, - "loss": 0.9452, - "step": 6314 - }, - { - "epoch": 0.5695089507147044, - "grad_norm": 1.9126043253848812, - "learning_rate": 1.6486770273937526e-06, - "loss": 0.994, - "step": 6315 - }, - { - "epoch": 0.5695991342381747, - "grad_norm": 1.26556716598426, - "learning_rate": 1.6481019158694738e-06, - "loss": 0.9476, - "step": 6316 - }, - { - "epoch": 0.569689317761645, - "grad_norm": 0.7607706674281264, - "learning_rate": 1.6475268343710792e-06, - "loss": 0.9291, - "step": 6317 - }, - { - "epoch": 0.5697795012851152, - "grad_norm": 1.483615399805831, - "learning_rate": 1.6469517829476396e-06, - "loss": 0.9089, - "step": 6318 - }, - { - "epoch": 0.5698696848085855, - "grad_norm": 1.494460127833691, - "learning_rate": 1.64637676164822e-06, - "loss": 0.9494, - "step": 6319 - }, - { - "epoch": 0.5699598683320557, - "grad_norm": 1.1910549458850026, - "learning_rate": 1.6458017705218848e-06, - "loss": 1.0084, - "step": 6320 - }, - { - "epoch": 0.570050051855526, - "grad_norm": 1.6288769030957628, - "learning_rate": 1.645226809617696e-06, - "loss": 0.9914, - "step": 6321 - }, - { - "epoch": 0.5701402353789963, - "grad_norm": 2.444308528830301, - "learning_rate": 1.6446518789847112e-06, - "loss": 1.0068, - "step": 6322 - }, - { - "epoch": 0.5702304189024665, - "grad_norm": 1.6534708566280392, - "learning_rate": 1.6440769786719883e-06, - "loss": 0.9955, - "step": 6323 - }, - { - "epoch": 0.5703206024259367, - "grad_norm": 1.3330898161875933, - "learning_rate": 1.6435021087285803e-06, - "loss": 1.0135, - "step": 6324 - }, - { - "epoch": 0.5704107859494071, - "grad_norm": 1.635664716565623, - "learning_rate": 1.642927269203537e-06, - "loss": 1.0029, - "step": 6325 - }, - { - "epoch": 0.5705009694728773, - "grad_norm": 1.8746422744283706, - "learning_rate": 1.642352460145909e-06, - "loss": 1.0247, - "step": 6326 - }, - { - "epoch": 0.5705911529963476, - "grad_norm": 1.6769528150613722, - "learning_rate": 1.6417776816047402e-06, - "loss": 0.9777, - "step": 6327 - }, - { - "epoch": 0.5706813365198178, - "grad_norm": 1.3627967738744575, - "learning_rate": 1.6412029336290755e-06, - "loss": 0.9587, - "step": 6328 - }, - { - "epoch": 0.5707715200432881, - "grad_norm": 2.118734097491425, - "learning_rate": 1.6406282162679551e-06, - "loss": 0.9667, - "step": 6329 - }, - { - "epoch": 0.5708617035667584, - "grad_norm": 1.1918161156225575, - "learning_rate": 1.6400535295704162e-06, - "loss": 0.9476, - "step": 6330 - }, - { - "epoch": 0.5709518870902286, - "grad_norm": 1.6945311683946715, - "learning_rate": 1.6394788735854955e-06, - "loss": 0.9119, - "step": 6331 - }, - { - "epoch": 0.5710420706136988, - "grad_norm": 1.5226515870196606, - "learning_rate": 1.6389042483622246e-06, - "loss": 1.0334, - "step": 6332 - }, - { - "epoch": 0.5711322541371692, - "grad_norm": 1.9256434517050351, - "learning_rate": 1.638329653949635e-06, - "loss": 0.972, - "step": 6333 - }, - { - "epoch": 0.5712224376606394, - "grad_norm": 1.3591415818731767, - "learning_rate": 1.637755090396753e-06, - "loss": 0.938, - "step": 6334 - }, - { - "epoch": 0.5713126211841096, - "grad_norm": 0.7014506431394458, - "learning_rate": 1.6371805577526039e-06, - "loss": 0.8904, - "step": 6335 - }, - { - "epoch": 0.5714028047075799, - "grad_norm": 1.7234860452297855, - "learning_rate": 1.636606056066211e-06, - "loss": 0.9221, - "step": 6336 - }, - { - "epoch": 0.5714929882310502, - "grad_norm": 1.4713840807909253, - "learning_rate": 1.636031585386592e-06, - "loss": 0.8987, - "step": 6337 - }, - { - "epoch": 0.5715831717545204, - "grad_norm": 1.2512047207439616, - "learning_rate": 1.635457145762766e-06, - "loss": 1.0321, - "step": 6338 - }, - { - "epoch": 0.5716733552779907, - "grad_norm": 1.8600788226909997, - "learning_rate": 1.6348827372437456e-06, - "loss": 1.0037, - "step": 6339 - }, - { - "epoch": 0.571763538801461, - "grad_norm": 1.1992428059603715, - "learning_rate": 1.634308359878544e-06, - "loss": 0.9705, - "step": 6340 - }, - { - "epoch": 0.5718537223249313, - "grad_norm": 1.6110483747449773, - "learning_rate": 1.6337340137161695e-06, - "loss": 1.0542, - "step": 6341 - }, - { - "epoch": 0.5719439058484015, - "grad_norm": 1.498546452384925, - "learning_rate": 1.6331596988056277e-06, - "loss": 0.9757, - "step": 6342 - }, - { - "epoch": 0.5720340893718717, - "grad_norm": 1.2947969448302212, - "learning_rate": 1.632585415195924e-06, - "loss": 1.0064, - "step": 6343 - }, - { - "epoch": 0.5721242728953421, - "grad_norm": 1.4962838552623625, - "learning_rate": 1.6320111629360583e-06, - "loss": 1.0053, - "step": 6344 - }, - { - "epoch": 0.5722144564188123, - "grad_norm": 1.568303203408204, - "learning_rate": 1.631436942075029e-06, - "loss": 0.9494, - "step": 6345 - }, - { - "epoch": 0.5723046399422825, - "grad_norm": 2.3898613401914757, - "learning_rate": 1.630862752661833e-06, - "loss": 0.9479, - "step": 6346 - }, - { - "epoch": 0.5723948234657528, - "grad_norm": 1.495648350164725, - "learning_rate": 1.6302885947454612e-06, - "loss": 0.9048, - "step": 6347 - }, - { - "epoch": 0.5724850069892231, - "grad_norm": 1.8175877164009233, - "learning_rate": 1.6297144683749057e-06, - "loss": 0.991, - "step": 6348 - }, - { - "epoch": 0.5725751905126933, - "grad_norm": 1.3719137842348534, - "learning_rate": 1.629140373599153e-06, - "loss": 0.9784, - "step": 6349 - }, - { - "epoch": 0.5726653740361636, - "grad_norm": 1.4793929164302775, - "learning_rate": 1.628566310467189e-06, - "loss": 0.9878, - "step": 6350 - }, - { - "epoch": 0.5727555575596338, - "grad_norm": 1.563345413378234, - "learning_rate": 1.6279922790279957e-06, - "loss": 0.9422, - "step": 6351 - }, - { - "epoch": 0.5728457410831042, - "grad_norm": 1.377288950556451, - "learning_rate": 1.6274182793305512e-06, - "loss": 0.9904, - "step": 6352 - }, - { - "epoch": 0.5729359246065744, - "grad_norm": 2.0955932247074913, - "learning_rate": 1.626844311423835e-06, - "loss": 1.0009, - "step": 6353 - }, - { - "epoch": 0.5730261081300446, - "grad_norm": 1.5387477649762356, - "learning_rate": 1.6262703753568181e-06, - "loss": 1.056, - "step": 6354 - }, - { - "epoch": 0.5731162916535149, - "grad_norm": 2.1433870636695116, - "learning_rate": 1.6256964711784747e-06, - "loss": 1.0476, - "step": 6355 - }, - { - "epoch": 0.5732064751769852, - "grad_norm": 1.663672817970684, - "learning_rate": 1.6251225989377723e-06, - "loss": 0.9928, - "step": 6356 - }, - { - "epoch": 0.5732966587004554, - "grad_norm": 3.947268041037113, - "learning_rate": 1.624548758683676e-06, - "loss": 0.9852, - "step": 6357 - }, - { - "epoch": 0.5733868422239257, - "grad_norm": 1.7470086962696891, - "learning_rate": 1.6239749504651505e-06, - "loss": 1.0703, - "step": 6358 - }, - { - "epoch": 0.5734770257473959, - "grad_norm": 1.4249028256394394, - "learning_rate": 1.6234011743311552e-06, - "loss": 0.9506, - "step": 6359 - }, - { - "epoch": 0.5735672092708662, - "grad_norm": 1.7817659132851524, - "learning_rate": 1.6228274303306483e-06, - "loss": 0.911, - "step": 6360 - }, - { - "epoch": 0.5736573927943365, - "grad_norm": 1.9556291320113421, - "learning_rate": 1.6222537185125847e-06, - "loss": 1.059, - "step": 6361 - }, - { - "epoch": 0.5737475763178067, - "grad_norm": 1.3228683562981967, - "learning_rate": 1.6216800389259172e-06, - "loss": 1.0035, - "step": 6362 - }, - { - "epoch": 0.573837759841277, - "grad_norm": 1.1758565783529193, - "learning_rate": 1.6211063916195949e-06, - "loss": 0.9729, - "step": 6363 - }, - { - "epoch": 0.5739279433647473, - "grad_norm": 1.4177051645991416, - "learning_rate": 1.6205327766425633e-06, - "loss": 0.9804, - "step": 6364 - }, - { - "epoch": 0.5740181268882175, - "grad_norm": 1.800821432132869, - "learning_rate": 1.6199591940437689e-06, - "loss": 0.9885, - "step": 6365 - }, - { - "epoch": 0.5741083104116878, - "grad_norm": 1.3310944963353029, - "learning_rate": 1.6193856438721505e-06, - "loss": 0.9426, - "step": 6366 - }, - { - "epoch": 0.5741984939351581, - "grad_norm": 2.299188810847697, - "learning_rate": 1.6188121261766483e-06, - "loss": 0.9821, - "step": 6367 - }, - { - "epoch": 0.5742886774586283, - "grad_norm": 1.358952774597118, - "learning_rate": 1.6182386410061976e-06, - "loss": 0.9082, - "step": 6368 - }, - { - "epoch": 0.5743788609820986, - "grad_norm": 1.5550282933890163, - "learning_rate": 1.61766518840973e-06, - "loss": 0.8908, - "step": 6369 - }, - { - "epoch": 0.5744690445055688, - "grad_norm": 1.4674278151842732, - "learning_rate": 1.6170917684361779e-06, - "loss": 0.8603, - "step": 6370 - }, - { - "epoch": 0.5745592280290391, - "grad_norm": 1.4905391996309916, - "learning_rate": 1.6165183811344662e-06, - "loss": 1.0554, - "step": 6371 - }, - { - "epoch": 0.5746494115525094, - "grad_norm": 1.5323390591171222, - "learning_rate": 1.6159450265535218e-06, - "loss": 0.9692, - "step": 6372 - }, - { - "epoch": 0.5747395950759796, - "grad_norm": 1.485856451317185, - "learning_rate": 1.6153717047422652e-06, - "loss": 1.0587, - "step": 6373 - }, - { - "epoch": 0.5748297785994498, - "grad_norm": 2.051601166081148, - "learning_rate": 1.6147984157496155e-06, - "loss": 0.8661, - "step": 6374 - }, - { - "epoch": 0.5749199621229202, - "grad_norm": 1.4045134948973017, - "learning_rate": 1.6142251596244886e-06, - "loss": 0.9109, - "step": 6375 - }, - { - "epoch": 0.5750101456463904, - "grad_norm": 1.415260439787235, - "learning_rate": 1.6136519364157983e-06, - "loss": 1.0308, - "step": 6376 - }, - { - "epoch": 0.5751003291698606, - "grad_norm": 2.395373844921413, - "learning_rate": 1.6130787461724555e-06, - "loss": 0.9208, - "step": 6377 - }, - { - "epoch": 0.5751905126933309, - "grad_norm": 1.4591840533330485, - "learning_rate": 1.6125055889433679e-06, - "loss": 1.0534, - "step": 6378 - }, - { - "epoch": 0.5752806962168012, - "grad_norm": 1.2291615157369673, - "learning_rate": 1.6119324647774386e-06, - "loss": 1.0072, - "step": 6379 - }, - { - "epoch": 0.5753708797402715, - "grad_norm": 1.5122929549571933, - "learning_rate": 1.6113593737235724e-06, - "loss": 0.9912, - "step": 6380 - }, - { - "epoch": 0.5754610632637417, - "grad_norm": 1.7226835149220614, - "learning_rate": 1.6107863158306665e-06, - "loss": 0.9283, - "step": 6381 - }, - { - "epoch": 0.5755512467872119, - "grad_norm": 1.6114556649335365, - "learning_rate": 1.610213291147619e-06, - "loss": 0.8735, - "step": 6382 - }, - { - "epoch": 0.5756414303106823, - "grad_norm": 1.2691995979807373, - "learning_rate": 1.609640299723322e-06, - "loss": 1.0026, - "step": 6383 - }, - { - "epoch": 0.5757316138341525, - "grad_norm": 1.2637311163905443, - "learning_rate": 1.609067341606668e-06, - "loss": 0.9629, - "step": 6384 - }, - { - "epoch": 0.5758217973576227, - "grad_norm": 1.7368102975758823, - "learning_rate": 1.6084944168465438e-06, - "loss": 0.9722, - "step": 6385 - }, - { - "epoch": 0.5759119808810931, - "grad_norm": 1.266510500608724, - "learning_rate": 1.6079215254918339e-06, - "loss": 1.0044, - "step": 6386 - }, - { - "epoch": 0.5760021644045633, - "grad_norm": 1.867266154527978, - "learning_rate": 1.6073486675914222e-06, - "loss": 0.8825, - "step": 6387 - }, - { - "epoch": 0.5760923479280335, - "grad_norm": 1.8312146414510166, - "learning_rate": 1.606775843194187e-06, - "loss": 1.0038, - "step": 6388 - }, - { - "epoch": 0.5761825314515038, - "grad_norm": 1.6856656170832272, - "learning_rate": 1.6062030523490053e-06, - "loss": 0.9706, - "step": 6389 - }, - { - "epoch": 0.5762727149749741, - "grad_norm": 1.4476169669374175, - "learning_rate": 1.60563029510475e-06, - "loss": 0.9029, - "step": 6390 - }, - { - "epoch": 0.5763628984984444, - "grad_norm": 0.6346235849944399, - "learning_rate": 1.6050575715102927e-06, - "loss": 0.824, - "step": 6391 - }, - { - "epoch": 0.5764530820219146, - "grad_norm": 2.2243190237635244, - "learning_rate": 1.6044848816145014e-06, - "loss": 1.0279, - "step": 6392 - }, - { - "epoch": 0.5765432655453848, - "grad_norm": 1.5837765709048865, - "learning_rate": 1.60391222546624e-06, - "loss": 1.0021, - "step": 6393 - }, - { - "epoch": 0.5766334490688552, - "grad_norm": 1.3668829006390593, - "learning_rate": 1.6033396031143725e-06, - "loss": 0.9684, - "step": 6394 - }, - { - "epoch": 0.5767236325923254, - "grad_norm": 1.5118763765074135, - "learning_rate": 1.602767014607757e-06, - "loss": 0.9794, - "step": 6395 - }, - { - "epoch": 0.5768138161157956, - "grad_norm": 1.3649868938669216, - "learning_rate": 1.6021944599952493e-06, - "loss": 0.9933, - "step": 6396 - }, - { - "epoch": 0.5769039996392659, - "grad_norm": 1.5793796499687311, - "learning_rate": 1.6016219393257048e-06, - "loss": 0.9931, - "step": 6397 - }, - { - "epoch": 0.5769941831627362, - "grad_norm": 1.589420796588471, - "learning_rate": 1.6010494526479722e-06, - "loss": 0.9328, - "step": 6398 - }, - { - "epoch": 0.5770843666862064, - "grad_norm": 2.0388667571725767, - "learning_rate": 1.6004770000109006e-06, - "loss": 0.9349, - "step": 6399 - }, - { - "epoch": 0.5771745502096767, - "grad_norm": 1.3204202551575797, - "learning_rate": 1.5999045814633348e-06, - "loss": 0.9464, - "step": 6400 - }, - { - "epoch": 0.5772647337331469, - "grad_norm": 1.594158793361994, - "learning_rate": 1.5993321970541151e-06, - "loss": 0.9905, - "step": 6401 - }, - { - "epoch": 0.5773549172566173, - "grad_norm": 1.6640118515115285, - "learning_rate": 1.5987598468320825e-06, - "loss": 0.992, - "step": 6402 - }, - { - "epoch": 0.5774451007800875, - "grad_norm": 1.2962870816377579, - "learning_rate": 1.5981875308460717e-06, - "loss": 0.9718, - "step": 6403 - }, - { - "epoch": 0.5775352843035577, - "grad_norm": 1.672388586145232, - "learning_rate": 1.5976152491449169e-06, - "loss": 0.9693, - "step": 6404 - }, - { - "epoch": 0.577625467827028, - "grad_norm": 1.5129719746547516, - "learning_rate": 1.5970430017774468e-06, - "loss": 1.0424, - "step": 6405 - }, - { - "epoch": 0.5777156513504983, - "grad_norm": 1.5484902012052773, - "learning_rate": 1.5964707887924904e-06, - "loss": 0.9945, - "step": 6406 - }, - { - "epoch": 0.5778058348739685, - "grad_norm": 1.3594067010800504, - "learning_rate": 1.5958986102388714e-06, - "loss": 0.8634, - "step": 6407 - }, - { - "epoch": 0.5778960183974388, - "grad_norm": 1.4117517295409332, - "learning_rate": 1.5953264661654104e-06, - "loss": 0.9371, - "step": 6408 - }, - { - "epoch": 0.5779862019209091, - "grad_norm": 1.628152576879278, - "learning_rate": 1.5947543566209276e-06, - "loss": 1.0191, - "step": 6409 - }, - { - "epoch": 0.5780763854443793, - "grad_norm": 1.4947953528736488, - "learning_rate": 1.5941822816542367e-06, - "loss": 0.9561, - "step": 6410 - }, - { - "epoch": 0.5781665689678496, - "grad_norm": 1.4460702095609377, - "learning_rate": 1.5936102413141519e-06, - "loss": 0.9372, - "step": 6411 - }, - { - "epoch": 0.5782567524913198, - "grad_norm": 1.838385131339062, - "learning_rate": 1.5930382356494823e-06, - "loss": 0.9064, - "step": 6412 - }, - { - "epoch": 0.5783469360147901, - "grad_norm": 1.3723648569663578, - "learning_rate": 1.5924662647090335e-06, - "loss": 1.0284, - "step": 6413 - }, - { - "epoch": 0.5784371195382604, - "grad_norm": 1.434358481185306, - "learning_rate": 1.5918943285416108e-06, - "loss": 0.9952, - "step": 6414 - }, - { - "epoch": 0.5785273030617306, - "grad_norm": 1.2727030408862339, - "learning_rate": 1.5913224271960139e-06, - "loss": 0.9154, - "step": 6415 - }, - { - "epoch": 0.5786174865852008, - "grad_norm": 1.7533200650142202, - "learning_rate": 1.590750560721041e-06, - "loss": 0.988, - "step": 6416 - }, - { - "epoch": 0.5787076701086712, - "grad_norm": 1.168322557425948, - "learning_rate": 1.5901787291654874e-06, - "loss": 0.9501, - "step": 6417 - }, - { - "epoch": 0.5787978536321414, - "grad_norm": 1.3619987773273599, - "learning_rate": 1.5896069325781435e-06, - "loss": 0.8557, - "step": 6418 - }, - { - "epoch": 0.5788880371556117, - "grad_norm": 1.3047520056214115, - "learning_rate": 1.5890351710077998e-06, - "loss": 1.0127, - "step": 6419 - }, - { - "epoch": 0.5789782206790819, - "grad_norm": 1.279841649199643, - "learning_rate": 1.5884634445032406e-06, - "loss": 1.0343, - "step": 6420 - }, - { - "epoch": 0.5790684042025522, - "grad_norm": 0.6177855141611363, - "learning_rate": 1.5878917531132501e-06, - "loss": 0.8552, - "step": 6421 - }, - { - "epoch": 0.5791585877260225, - "grad_norm": 2.73500254378122, - "learning_rate": 1.5873200968866077e-06, - "loss": 0.8497, - "step": 6422 - }, - { - "epoch": 0.5792487712494927, - "grad_norm": 1.9184700180705445, - "learning_rate": 1.586748475872089e-06, - "loss": 0.952, - "step": 6423 - }, - { - "epoch": 0.5793389547729629, - "grad_norm": 3.6039194599259305, - "learning_rate": 1.58617689011847e-06, - "loss": 0.8455, - "step": 6424 - }, - { - "epoch": 0.5794291382964333, - "grad_norm": 1.340434441883894, - "learning_rate": 1.5856053396745198e-06, - "loss": 1.0143, - "step": 6425 - }, - { - "epoch": 0.5795193218199035, - "grad_norm": 1.808274242941639, - "learning_rate": 1.5850338245890078e-06, - "loss": 1.0502, - "step": 6426 - }, - { - "epoch": 0.5796095053433737, - "grad_norm": 1.4940624341264561, - "learning_rate": 1.5844623449106974e-06, - "loss": 0.9606, - "step": 6427 - }, - { - "epoch": 0.579699688866844, - "grad_norm": 1.5698122964409245, - "learning_rate": 1.583890900688351e-06, - "loss": 1.0235, - "step": 6428 - }, - { - "epoch": 0.5797898723903143, - "grad_norm": 1.893912428635035, - "learning_rate": 1.583319491970728e-06, - "loss": 1.0484, - "step": 6429 - }, - { - "epoch": 0.5798800559137846, - "grad_norm": 1.2760839286604682, - "learning_rate": 1.5827481188065828e-06, - "loss": 0.9928, - "step": 6430 - }, - { - "epoch": 0.5799702394372548, - "grad_norm": 1.2777432158306286, - "learning_rate": 1.5821767812446689e-06, - "loss": 0.9459, - "step": 6431 - }, - { - "epoch": 0.5800604229607251, - "grad_norm": 2.0907285154343316, - "learning_rate": 1.581605479333736e-06, - "loss": 1.0973, - "step": 6432 - }, - { - "epoch": 0.5801506064841954, - "grad_norm": 1.6342961629667874, - "learning_rate": 1.5810342131225308e-06, - "loss": 0.9831, - "step": 6433 - }, - { - "epoch": 0.5802407900076656, - "grad_norm": 1.3353503983947135, - "learning_rate": 1.580462982659797e-06, - "loss": 1.0145, - "step": 6434 - }, - { - "epoch": 0.5803309735311358, - "grad_norm": 1.7792991529297302, - "learning_rate": 1.5798917879942736e-06, - "loss": 1.0406, - "step": 6435 - }, - { - "epoch": 0.5804211570546062, - "grad_norm": 1.1581687986488132, - "learning_rate": 1.5793206291747006e-06, - "loss": 0.9234, - "step": 6436 - }, - { - "epoch": 0.5805113405780764, - "grad_norm": 1.2002979604033635, - "learning_rate": 1.57874950624981e-06, - "loss": 0.9211, - "step": 6437 - }, - { - "epoch": 0.5806015241015466, - "grad_norm": 1.3865660247517204, - "learning_rate": 1.5781784192683351e-06, - "loss": 0.9759, - "step": 6438 - }, - { - "epoch": 0.5806917076250169, - "grad_norm": 1.160951848923328, - "learning_rate": 1.5776073682790033e-06, - "loss": 0.9435, - "step": 6439 - }, - { - "epoch": 0.5807818911484872, - "grad_norm": 1.503487268518929, - "learning_rate": 1.5770363533305393e-06, - "loss": 0.9958, - "step": 6440 - }, - { - "epoch": 0.5808720746719575, - "grad_norm": 1.5369551119531948, - "learning_rate": 1.5764653744716665e-06, - "loss": 0.9714, - "step": 6441 - }, - { - "epoch": 0.5809622581954277, - "grad_norm": 1.7910773912468292, - "learning_rate": 1.575894431751103e-06, - "loss": 1.0016, - "step": 6442 - }, - { - "epoch": 0.5810524417188979, - "grad_norm": 1.7163866614090815, - "learning_rate": 1.575323525217565e-06, - "loss": 0.9981, - "step": 6443 - }, - { - "epoch": 0.5811426252423683, - "grad_norm": 13.058898137156925, - "learning_rate": 1.574752654919766e-06, - "loss": 1.0564, - "step": 6444 - }, - { - "epoch": 0.5812328087658385, - "grad_norm": 1.3813099861065186, - "learning_rate": 1.5741818209064146e-06, - "loss": 0.9528, - "step": 6445 - }, - { - "epoch": 0.5813229922893087, - "grad_norm": 1.4513789525926475, - "learning_rate": 1.5736110232262183e-06, - "loss": 0.9876, - "step": 6446 - }, - { - "epoch": 0.581413175812779, - "grad_norm": 1.7186430464325702, - "learning_rate": 1.5730402619278804e-06, - "loss": 1.0213, - "step": 6447 - }, - { - "epoch": 0.5815033593362493, - "grad_norm": 0.6484090729882305, - "learning_rate": 1.5724695370601024e-06, - "loss": 0.8179, - "step": 6448 - }, - { - "epoch": 0.5815935428597195, - "grad_norm": 1.328177462270944, - "learning_rate": 1.5718988486715798e-06, - "loss": 0.9065, - "step": 6449 - }, - { - "epoch": 0.5816837263831898, - "grad_norm": 1.5284012745628044, - "learning_rate": 1.5713281968110087e-06, - "loss": 0.9674, - "step": 6450 - }, - { - "epoch": 0.58177390990666, - "grad_norm": 1.5545323740182215, - "learning_rate": 1.5707575815270796e-06, - "loss": 0.9686, - "step": 6451 - }, - { - "epoch": 0.5818640934301303, - "grad_norm": 1.661655888626105, - "learning_rate": 1.57018700286848e-06, - "loss": 0.9765, - "step": 6452 - }, - { - "epoch": 0.5819542769536006, - "grad_norm": 1.8839521322668933, - "learning_rate": 1.5696164608838956e-06, - "loss": 1.0011, - "step": 6453 - }, - { - "epoch": 0.5820444604770708, - "grad_norm": 1.5488277401629014, - "learning_rate": 1.5690459556220073e-06, - "loss": 0.9919, - "step": 6454 - }, - { - "epoch": 0.582134644000541, - "grad_norm": 1.498835111337803, - "learning_rate": 1.5684754871314949e-06, - "loss": 1.037, - "step": 6455 - }, - { - "epoch": 0.5822248275240114, - "grad_norm": 1.5124924525553394, - "learning_rate": 1.5679050554610335e-06, - "loss": 0.9788, - "step": 6456 - }, - { - "epoch": 0.5823150110474816, - "grad_norm": 1.1261595472282306, - "learning_rate": 1.567334660659295e-06, - "loss": 0.9816, - "step": 6457 - }, - { - "epoch": 0.5824051945709519, - "grad_norm": 4.506588880518468, - "learning_rate": 1.5667643027749488e-06, - "loss": 0.9905, - "step": 6458 - }, - { - "epoch": 0.5824953780944222, - "grad_norm": 1.7429299590044458, - "learning_rate": 1.5661939818566614e-06, - "loss": 1.0462, - "step": 6459 - }, - { - "epoch": 0.5825855616178924, - "grad_norm": 1.718094856938628, - "learning_rate": 1.5656236979530956e-06, - "loss": 0.8465, - "step": 6460 - }, - { - "epoch": 0.5826757451413627, - "grad_norm": 1.5427532576726093, - "learning_rate": 1.5650534511129106e-06, - "loss": 0.9218, - "step": 6461 - }, - { - "epoch": 0.5827659286648329, - "grad_norm": 1.691720629365391, - "learning_rate": 1.5644832413847635e-06, - "loss": 0.9577, - "step": 6462 - }, - { - "epoch": 0.5828561121883032, - "grad_norm": 1.1986051598837404, - "learning_rate": 1.5639130688173082e-06, - "loss": 1.0242, - "step": 6463 - }, - { - "epoch": 0.5829462957117735, - "grad_norm": 1.3687435341599563, - "learning_rate": 1.5633429334591932e-06, - "loss": 0.9553, - "step": 6464 - }, - { - "epoch": 0.5830364792352437, - "grad_norm": 1.640365216713861, - "learning_rate": 1.562772835359068e-06, - "loss": 1.0039, - "step": 6465 - }, - { - "epoch": 0.5831266627587139, - "grad_norm": 1.814153903232313, - "learning_rate": 1.5622027745655753e-06, - "loss": 0.9459, - "step": 6466 - }, - { - "epoch": 0.5832168462821843, - "grad_norm": 1.754753265099559, - "learning_rate": 1.561632751127355e-06, - "loss": 0.9335, - "step": 6467 - }, - { - "epoch": 0.5833070298056545, - "grad_norm": 1.5501383258255101, - "learning_rate": 1.561062765093046e-06, - "loss": 1.0261, - "step": 6468 - }, - { - "epoch": 0.5833972133291248, - "grad_norm": 2.0090971521761523, - "learning_rate": 1.5604928165112817e-06, - "loss": 0.979, - "step": 6469 - }, - { - "epoch": 0.583487396852595, - "grad_norm": 1.4480025542695083, - "learning_rate": 1.5599229054306945e-06, - "loss": 0.9708, - "step": 6470 - }, - { - "epoch": 0.5835775803760653, - "grad_norm": 1.7876103933964753, - "learning_rate": 1.5593530318999111e-06, - "loss": 0.9771, - "step": 6471 - }, - { - "epoch": 0.5836677638995356, - "grad_norm": 1.2653341076777842, - "learning_rate": 1.5587831959675572e-06, - "loss": 1.0069, - "step": 6472 - }, - { - "epoch": 0.5837579474230058, - "grad_norm": 1.5208849528013593, - "learning_rate": 1.5582133976822534e-06, - "loss": 0.9615, - "step": 6473 - }, - { - "epoch": 0.583848130946476, - "grad_norm": 1.358883604484057, - "learning_rate": 1.5576436370926185e-06, - "loss": 0.9336, - "step": 6474 - }, - { - "epoch": 0.5839383144699464, - "grad_norm": 1.1749939674871894, - "learning_rate": 1.5570739142472679e-06, - "loss": 0.9623, - "step": 6475 - }, - { - "epoch": 0.5840284979934166, - "grad_norm": 1.2570123437534864, - "learning_rate": 1.5565042291948127e-06, - "loss": 1.0463, - "step": 6476 - }, - { - "epoch": 0.5841186815168868, - "grad_norm": 1.3880621192426965, - "learning_rate": 1.5559345819838624e-06, - "loss": 1.0181, - "step": 6477 - }, - { - "epoch": 0.5842088650403571, - "grad_norm": 1.2051447474334942, - "learning_rate": 1.5553649726630226e-06, - "loss": 0.9559, - "step": 6478 - }, - { - "epoch": 0.5842990485638274, - "grad_norm": 1.3250482262525924, - "learning_rate": 1.5547954012808942e-06, - "loss": 0.9098, - "step": 6479 - }, - { - "epoch": 0.5843892320872976, - "grad_norm": 1.9219348556998057, - "learning_rate": 1.5542258678860776e-06, - "loss": 0.9515, - "step": 6480 - }, - { - "epoch": 0.5844794156107679, - "grad_norm": 1.4539051217838852, - "learning_rate": 1.553656372527167e-06, - "loss": 1.0947, - "step": 6481 - }, - { - "epoch": 0.5845695991342382, - "grad_norm": 1.9239073761238317, - "learning_rate": 1.5530869152527568e-06, - "loss": 1.0804, - "step": 6482 - }, - { - "epoch": 0.5846597826577085, - "grad_norm": 1.2138209314962414, - "learning_rate": 1.5525174961114353e-06, - "loss": 0.9973, - "step": 6483 - }, - { - "epoch": 0.5847499661811787, - "grad_norm": 1.3657723650650404, - "learning_rate": 1.5519481151517875e-06, - "loss": 1.0198, - "step": 6484 - }, - { - "epoch": 0.5848401497046489, - "grad_norm": 6.728960378533887, - "learning_rate": 1.551378772422398e-06, - "loss": 0.9812, - "step": 6485 - }, - { - "epoch": 0.5849303332281193, - "grad_norm": 1.6868961101396904, - "learning_rate": 1.5508094679718447e-06, - "loss": 0.9908, - "step": 6486 - }, - { - "epoch": 0.5850205167515895, - "grad_norm": 1.4394857540081567, - "learning_rate": 1.5502402018487048e-06, - "loss": 0.884, - "step": 6487 - }, - { - "epoch": 0.5851107002750597, - "grad_norm": 2.4336640912662233, - "learning_rate": 1.54967097410155e-06, - "loss": 0.9772, - "step": 6488 - }, - { - "epoch": 0.58520088379853, - "grad_norm": 1.3695594049182966, - "learning_rate": 1.5491017847789519e-06, - "loss": 0.905, - "step": 6489 - }, - { - "epoch": 0.5852910673220003, - "grad_norm": 1.5941799556233465, - "learning_rate": 1.5485326339294755e-06, - "loss": 0.9199, - "step": 6490 - }, - { - "epoch": 0.5853812508454705, - "grad_norm": 1.2520751893619144, - "learning_rate": 1.5479635216016832e-06, - "loss": 0.9433, - "step": 6491 - }, - { - "epoch": 0.5854714343689408, - "grad_norm": 1.8978132940699302, - "learning_rate": 1.547394447844137e-06, - "loss": 0.8951, - "step": 6492 - }, - { - "epoch": 0.585561617892411, - "grad_norm": 1.5761430588821492, - "learning_rate": 1.546825412705391e-06, - "loss": 0.9003, - "step": 6493 - }, - { - "epoch": 0.5856518014158814, - "grad_norm": 2.922200393917555, - "learning_rate": 1.5462564162340007e-06, - "loss": 1.0094, - "step": 6494 - }, - { - "epoch": 0.5857419849393516, - "grad_norm": 1.6062016728474766, - "learning_rate": 1.5456874584785144e-06, - "loss": 0.978, - "step": 6495 - }, - { - "epoch": 0.5858321684628218, - "grad_norm": 2.931772044855815, - "learning_rate": 1.5451185394874785e-06, - "loss": 0.951, - "step": 6496 - }, - { - "epoch": 0.5859223519862921, - "grad_norm": 1.496215257615759, - "learning_rate": 1.5445496593094381e-06, - "loss": 0.9906, - "step": 6497 - }, - { - "epoch": 0.5860125355097624, - "grad_norm": 1.5231105355760506, - "learning_rate": 1.5439808179929316e-06, - "loss": 0.9886, - "step": 6498 - }, - { - "epoch": 0.5861027190332326, - "grad_norm": 2.3971002186945745, - "learning_rate": 1.543412015586496e-06, - "loss": 0.9423, - "step": 6499 - }, - { - "epoch": 0.5861929025567029, - "grad_norm": 1.594480010746196, - "learning_rate": 1.5428432521386655e-06, - "loss": 0.9477, - "step": 6500 - }, - { - "epoch": 0.5862830860801731, - "grad_norm": 1.591797099669271, - "learning_rate": 1.5422745276979688e-06, - "loss": 1.0138, - "step": 6501 - }, - { - "epoch": 0.5863732696036434, - "grad_norm": 1.4301120982215394, - "learning_rate": 1.5417058423129336e-06, - "loss": 0.9627, - "step": 6502 - }, - { - "epoch": 0.5864634531271137, - "grad_norm": 1.4992527690007598, - "learning_rate": 1.5411371960320822e-06, - "loss": 1.0181, - "step": 6503 - }, - { - "epoch": 0.5865536366505839, - "grad_norm": 1.2545614461891053, - "learning_rate": 1.5405685889039363e-06, - "loss": 1.0124, - "step": 6504 - }, - { - "epoch": 0.5866438201740543, - "grad_norm": 1.42252603235543, - "learning_rate": 1.5400000209770118e-06, - "loss": 0.9727, - "step": 6505 - }, - { - "epoch": 0.5867340036975245, - "grad_norm": 1.9613152858159664, - "learning_rate": 1.5394314922998208e-06, - "loss": 0.9884, - "step": 6506 - }, - { - "epoch": 0.5868241872209947, - "grad_norm": 1.8108459686503222, - "learning_rate": 1.5388630029208756e-06, - "loss": 1.0303, - "step": 6507 - }, - { - "epoch": 0.586914370744465, - "grad_norm": 1.6775474222134599, - "learning_rate": 1.5382945528886806e-06, - "loss": 1.0015, - "step": 6508 - }, - { - "epoch": 0.5870045542679353, - "grad_norm": 2.1757001604896047, - "learning_rate": 1.5377261422517412e-06, - "loss": 0.967, - "step": 6509 - }, - { - "epoch": 0.5870947377914055, - "grad_norm": 1.537996804212963, - "learning_rate": 1.5371577710585553e-06, - "loss": 0.9469, - "step": 6510 - }, - { - "epoch": 0.5871849213148758, - "grad_norm": 0.7403777695719467, - "learning_rate": 1.536589439357621e-06, - "loss": 0.8502, - "step": 6511 - }, - { - "epoch": 0.587275104838346, - "grad_norm": 1.2676525123528866, - "learning_rate": 1.5360211471974315e-06, - "loss": 0.9833, - "step": 6512 - }, - { - "epoch": 0.5873652883618163, - "grad_norm": 1.2829740136860532, - "learning_rate": 1.5354528946264753e-06, - "loss": 1.0308, - "step": 6513 - }, - { - "epoch": 0.5874554718852866, - "grad_norm": 1.3591018928578085, - "learning_rate": 1.5348846816932399e-06, - "loss": 0.9156, - "step": 6514 - }, - { - "epoch": 0.5875456554087568, - "grad_norm": 0.6381673190574403, - "learning_rate": 1.5343165084462077e-06, - "loss": 0.8258, - "step": 6515 - }, - { - "epoch": 0.587635838932227, - "grad_norm": 1.9450079131762203, - "learning_rate": 1.5337483749338595e-06, - "loss": 0.9293, - "step": 6516 - }, - { - "epoch": 0.5877260224556974, - "grad_norm": 1.8828190846941393, - "learning_rate": 1.5331802812046708e-06, - "loss": 0.9512, - "step": 6517 - }, - { - "epoch": 0.5878162059791676, - "grad_norm": 1.4694213955919815, - "learning_rate": 1.5326122273071133e-06, - "loss": 0.8337, - "step": 6518 - }, - { - "epoch": 0.5879063895026378, - "grad_norm": 1.6818884335778428, - "learning_rate": 1.532044213289659e-06, - "loss": 1.0221, - "step": 6519 - }, - { - "epoch": 0.5879965730261081, - "grad_norm": 1.6243674440931046, - "learning_rate": 1.5314762392007718e-06, - "loss": 0.8678, - "step": 6520 - }, - { - "epoch": 0.5880867565495784, - "grad_norm": 1.4041010225310635, - "learning_rate": 1.530908305088916e-06, - "loss": 1.0344, - "step": 6521 - }, - { - "epoch": 0.5881769400730487, - "grad_norm": 1.740868586646942, - "learning_rate": 1.5303404110025501e-06, - "loss": 0.971, - "step": 6522 - }, - { - "epoch": 0.5882671235965189, - "grad_norm": 1.2570023385771951, - "learning_rate": 1.5297725569901293e-06, - "loss": 0.9425, - "step": 6523 - }, - { - "epoch": 0.5883573071199891, - "grad_norm": 1.9512994398052237, - "learning_rate": 1.5292047431001077e-06, - "loss": 0.8715, - "step": 6524 - }, - { - "epoch": 0.5884474906434595, - "grad_norm": 0.7497704075029452, - "learning_rate": 1.5286369693809321e-06, - "loss": 0.8659, - "step": 6525 - }, - { - "epoch": 0.5885376741669297, - "grad_norm": 1.534188234729135, - "learning_rate": 1.5280692358810506e-06, - "loss": 0.9866, - "step": 6526 - }, - { - "epoch": 0.5886278576903999, - "grad_norm": 1.4292827929741092, - "learning_rate": 1.527501542648904e-06, - "loss": 0.9307, - "step": 6527 - }, - { - "epoch": 0.5887180412138703, - "grad_norm": 2.182183262234872, - "learning_rate": 1.5269338897329308e-06, - "loss": 0.8882, - "step": 6528 - }, - { - "epoch": 0.5888082247373405, - "grad_norm": 1.4207170403433345, - "learning_rate": 1.5263662771815662e-06, - "loss": 0.9992, - "step": 6529 - }, - { - "epoch": 0.5888984082608107, - "grad_norm": 1.5138038626931132, - "learning_rate": 1.5257987050432429e-06, - "loss": 1.0528, - "step": 6530 - }, - { - "epoch": 0.588988591784281, - "grad_norm": 1.6230717736272728, - "learning_rate": 1.5252311733663887e-06, - "loss": 0.8824, - "step": 6531 - }, - { - "epoch": 0.5890787753077513, - "grad_norm": 1.452044587824743, - "learning_rate": 1.5246636821994281e-06, - "loss": 0.9258, - "step": 6532 - }, - { - "epoch": 0.5891689588312216, - "grad_norm": 1.2607270586330845, - "learning_rate": 1.524096231590784e-06, - "loss": 1.0268, - "step": 6533 - }, - { - "epoch": 0.5892591423546918, - "grad_norm": 1.5602522895452677, - "learning_rate": 1.5235288215888736e-06, - "loss": 1.0167, - "step": 6534 - }, - { - "epoch": 0.589349325878162, - "grad_norm": 1.463060845325523, - "learning_rate": 1.5229614522421102e-06, - "loss": 0.982, - "step": 6535 - }, - { - "epoch": 0.5894395094016324, - "grad_norm": 1.4896584694148076, - "learning_rate": 1.5223941235989071e-06, - "loss": 0.808, - "step": 6536 - }, - { - "epoch": 0.5895296929251026, - "grad_norm": 1.2828678988018865, - "learning_rate": 1.52182683570767e-06, - "loss": 0.948, - "step": 6537 - }, - { - "epoch": 0.5896198764485728, - "grad_norm": 1.7292231431799951, - "learning_rate": 1.5212595886168046e-06, - "loss": 0.9949, - "step": 6538 - }, - { - "epoch": 0.5897100599720431, - "grad_norm": 1.4571739311003733, - "learning_rate": 1.520692382374711e-06, - "loss": 0.9894, - "step": 6539 - }, - { - "epoch": 0.5898002434955134, - "grad_norm": 1.8638292702307058, - "learning_rate": 1.5201252170297854e-06, - "loss": 1.0047, - "step": 6540 - }, - { - "epoch": 0.5898904270189836, - "grad_norm": 1.6400667239781257, - "learning_rate": 1.5195580926304232e-06, - "loss": 0.8698, - "step": 6541 - }, - { - "epoch": 0.5899806105424539, - "grad_norm": 1.7314088218359345, - "learning_rate": 1.5189910092250131e-06, - "loss": 0.9891, - "step": 6542 - }, - { - "epoch": 0.5900707940659241, - "grad_norm": 1.6540625179888235, - "learning_rate": 1.5184239668619427e-06, - "loss": 0.981, - "step": 6543 - }, - { - "epoch": 0.5901609775893945, - "grad_norm": 1.6492568596683805, - "learning_rate": 1.5178569655895946e-06, - "loss": 0.9785, - "step": 6544 - }, - { - "epoch": 0.5902511611128647, - "grad_norm": 1.4416778225917302, - "learning_rate": 1.5172900054563487e-06, - "loss": 1.0088, - "step": 6545 - }, - { - "epoch": 0.5903413446363349, - "grad_norm": 1.4192230309885168, - "learning_rate": 1.5167230865105814e-06, - "loss": 1.0022, - "step": 6546 - }, - { - "epoch": 0.5904315281598052, - "grad_norm": 1.5312193264127232, - "learning_rate": 1.5161562088006644e-06, - "loss": 0.8952, - "step": 6547 - }, - { - "epoch": 0.5905217116832755, - "grad_norm": 5.8640173666620115, - "learning_rate": 1.5155893723749685e-06, - "loss": 1.0078, - "step": 6548 - }, - { - "epoch": 0.5906118952067457, - "grad_norm": 1.4813861671666113, - "learning_rate": 1.5150225772818582e-06, - "loss": 0.9856, - "step": 6549 - }, - { - "epoch": 0.590702078730216, - "grad_norm": 1.4977070449071481, - "learning_rate": 1.5144558235696949e-06, - "loss": 1.0351, - "step": 6550 - }, - { - "epoch": 0.5907922622536863, - "grad_norm": 1.420238936966869, - "learning_rate": 1.5138891112868388e-06, - "loss": 0.8923, - "step": 6551 - }, - { - "epoch": 0.5908824457771565, - "grad_norm": 0.7213513113761791, - "learning_rate": 1.5133224404816433e-06, - "loss": 0.8531, - "step": 6552 - }, - { - "epoch": 0.5909726293006268, - "grad_norm": 1.7081882911455024, - "learning_rate": 1.5127558112024617e-06, - "loss": 0.9099, - "step": 6553 - }, - { - "epoch": 0.591062812824097, - "grad_norm": 0.6603238496829518, - "learning_rate": 1.5121892234976404e-06, - "loss": 0.8556, - "step": 6554 - }, - { - "epoch": 0.5911529963475673, - "grad_norm": 1.4905248036488217, - "learning_rate": 1.5116226774155243e-06, - "loss": 0.9671, - "step": 6555 - }, - { - "epoch": 0.5912431798710376, - "grad_norm": 1.5096429969957452, - "learning_rate": 1.5110561730044547e-06, - "loss": 0.9444, - "step": 6556 - }, - { - "epoch": 0.5913333633945078, - "grad_norm": 1.4915382610583778, - "learning_rate": 1.510489710312768e-06, - "loss": 0.9733, - "step": 6557 - }, - { - "epoch": 0.591423546917978, - "grad_norm": 1.7968642524729534, - "learning_rate": 1.5099232893887987e-06, - "loss": 0.972, - "step": 6558 - }, - { - "epoch": 0.5915137304414484, - "grad_norm": 1.447181441809837, - "learning_rate": 1.5093569102808758e-06, - "loss": 0.947, - "step": 6559 - }, - { - "epoch": 0.5916039139649186, - "grad_norm": 1.4009879951643427, - "learning_rate": 1.5087905730373275e-06, - "loss": 0.9778, - "step": 6560 - }, - { - "epoch": 0.5916940974883889, - "grad_norm": 0.6473900021090687, - "learning_rate": 1.508224277706476e-06, - "loss": 0.8672, - "step": 6561 - }, - { - "epoch": 0.5917842810118591, - "grad_norm": 1.6020771223261185, - "learning_rate": 1.5076580243366399e-06, - "loss": 0.9441, - "step": 6562 - }, - { - "epoch": 0.5918744645353294, - "grad_norm": 1.887619024907017, - "learning_rate": 1.507091812976137e-06, - "loss": 0.8842, - "step": 6563 - }, - { - "epoch": 0.5919646480587997, - "grad_norm": 1.3355945347697267, - "learning_rate": 1.5065256436732773e-06, - "loss": 0.9292, - "step": 6564 - }, - { - "epoch": 0.5920548315822699, - "grad_norm": 1.5603409918993947, - "learning_rate": 1.5059595164763717e-06, - "loss": 0.9078, - "step": 6565 - }, - { - "epoch": 0.5921450151057401, - "grad_norm": 0.6085599681797272, - "learning_rate": 1.5053934314337243e-06, - "loss": 0.8611, - "step": 6566 - }, - { - "epoch": 0.5922351986292105, - "grad_norm": 1.433041416796894, - "learning_rate": 1.5048273885936356e-06, - "loss": 0.9674, - "step": 6567 - }, - { - "epoch": 0.5923253821526807, - "grad_norm": 1.2647831318533338, - "learning_rate": 1.5042613880044053e-06, - "loss": 0.8951, - "step": 6568 - }, - { - "epoch": 0.592415565676151, - "grad_norm": 1.467899482071348, - "learning_rate": 1.5036954297143265e-06, - "loss": 1.0042, - "step": 6569 - }, - { - "epoch": 0.5925057491996212, - "grad_norm": 1.1139149786181366, - "learning_rate": 1.50312951377169e-06, - "loss": 0.9309, - "step": 6570 - }, - { - "epoch": 0.5925959327230915, - "grad_norm": 0.5882890181516159, - "learning_rate": 1.502563640224784e-06, - "loss": 0.8332, - "step": 6571 - }, - { - "epoch": 0.5926861162465618, - "grad_norm": 1.5823545525891116, - "learning_rate": 1.5019978091218903e-06, - "loss": 0.9706, - "step": 6572 - }, - { - "epoch": 0.592776299770032, - "grad_norm": 1.2103313282458241, - "learning_rate": 1.50143202051129e-06, - "loss": 1.0347, - "step": 6573 - }, - { - "epoch": 0.5928664832935022, - "grad_norm": 1.4276871993050326, - "learning_rate": 1.500866274441258e-06, - "loss": 1.0282, - "step": 6574 - }, - { - "epoch": 0.5929566668169726, - "grad_norm": 4.372955716585608, - "learning_rate": 1.5003005709600682e-06, - "loss": 0.9592, - "step": 6575 - }, - { - "epoch": 0.5930468503404428, - "grad_norm": 1.3641841488981987, - "learning_rate": 1.4997349101159885e-06, - "loss": 1.048, - "step": 6576 - }, - { - "epoch": 0.593137033863913, - "grad_norm": 1.6542687712064554, - "learning_rate": 1.4991692919572854e-06, - "loss": 0.9275, - "step": 6577 - }, - { - "epoch": 0.5932272173873834, - "grad_norm": 1.4740783139339482, - "learning_rate": 1.4986037165322199e-06, - "loss": 0.9806, - "step": 6578 - }, - { - "epoch": 0.5933174009108536, - "grad_norm": 0.733773533055433, - "learning_rate": 1.498038183889049e-06, - "loss": 0.8753, - "step": 6579 - }, - { - "epoch": 0.5934075844343238, - "grad_norm": 1.531646287981075, - "learning_rate": 1.4974726940760292e-06, - "loss": 0.8971, - "step": 6580 - }, - { - "epoch": 0.5934977679577941, - "grad_norm": 1.3744443724481565, - "learning_rate": 1.496907247141409e-06, - "loss": 1.0409, - "step": 6581 - }, - { - "epoch": 0.5935879514812644, - "grad_norm": 0.7721521119515515, - "learning_rate": 1.4963418431334372e-06, - "loss": 0.9611, - "step": 6582 - }, - { - "epoch": 0.5936781350047347, - "grad_norm": 1.5208715495006693, - "learning_rate": 1.4957764821003566e-06, - "loss": 0.9568, - "step": 6583 - }, - { - "epoch": 0.5937683185282049, - "grad_norm": 1.5615714556651166, - "learning_rate": 1.4952111640904063e-06, - "loss": 0.9788, - "step": 6584 - }, - { - "epoch": 0.5938585020516751, - "grad_norm": 1.3100354807176544, - "learning_rate": 1.494645889151823e-06, - "loss": 0.9548, - "step": 6585 - }, - { - "epoch": 0.5939486855751455, - "grad_norm": 0.6580811792259261, - "learning_rate": 1.494080657332839e-06, - "loss": 0.8127, - "step": 6586 - }, - { - "epoch": 0.5940388690986157, - "grad_norm": 2.1919765988165842, - "learning_rate": 1.4935154686816832e-06, - "loss": 0.9385, - "step": 6587 - }, - { - "epoch": 0.5941290526220859, - "grad_norm": 1.5817378772881807, - "learning_rate": 1.4929503232465802e-06, - "loss": 1.0395, - "step": 6588 - }, - { - "epoch": 0.5942192361455562, - "grad_norm": 1.8407315908244821, - "learning_rate": 1.492385221075751e-06, - "loss": 0.8898, - "step": 6589 - }, - { - "epoch": 0.5943094196690265, - "grad_norm": 1.4744565770898066, - "learning_rate": 1.4918201622174142e-06, - "loss": 1.0038, - "step": 6590 - }, - { - "epoch": 0.5943996031924967, - "grad_norm": 1.4291373273027044, - "learning_rate": 1.4912551467197827e-06, - "loss": 1.0008, - "step": 6591 - }, - { - "epoch": 0.594489786715967, - "grad_norm": 1.6625530578636556, - "learning_rate": 1.4906901746310678e-06, - "loss": 1.0491, - "step": 6592 - }, - { - "epoch": 0.5945799702394372, - "grad_norm": 1.2162175399039916, - "learning_rate": 1.4901252459994757e-06, - "loss": 0.9395, - "step": 6593 - }, - { - "epoch": 0.5946701537629075, - "grad_norm": 3.30650824131408, - "learning_rate": 1.489560360873208e-06, - "loss": 0.9594, - "step": 6594 - }, - { - "epoch": 0.5947603372863778, - "grad_norm": 2.762155026261039, - "learning_rate": 1.4889955193004659e-06, - "loss": 0.9515, - "step": 6595 - }, - { - "epoch": 0.594850520809848, - "grad_norm": 0.6460236351041575, - "learning_rate": 1.4884307213294428e-06, - "loss": 0.8369, - "step": 6596 - }, - { - "epoch": 0.5949407043333182, - "grad_norm": 1.5477484346806087, - "learning_rate": 1.4878659670083321e-06, - "loss": 0.9595, - "step": 6597 - }, - { - "epoch": 0.5950308878567886, - "grad_norm": 1.4210601819453141, - "learning_rate": 1.4873012563853208e-06, - "loss": 0.8993, - "step": 6598 - }, - { - "epoch": 0.5951210713802588, - "grad_norm": 1.836188181543537, - "learning_rate": 1.4867365895085935e-06, - "loss": 0.982, - "step": 6599 - }, - { - "epoch": 0.5952112549037291, - "grad_norm": 1.2409542847850075, - "learning_rate": 1.4861719664263301e-06, - "loss": 0.9528, - "step": 6600 - }, - { - "epoch": 0.5953014384271994, - "grad_norm": 1.4411150216835733, - "learning_rate": 1.485607387186708e-06, - "loss": 1.0697, - "step": 6601 - }, - { - "epoch": 0.5953916219506696, - "grad_norm": 1.6474071735864533, - "learning_rate": 1.4850428518379001e-06, - "loss": 0.9691, - "step": 6602 - }, - { - "epoch": 0.5954818054741399, - "grad_norm": 1.214209191531744, - "learning_rate": 1.4844783604280746e-06, - "loss": 0.9683, - "step": 6603 - }, - { - "epoch": 0.5955719889976101, - "grad_norm": 2.117430950962105, - "learning_rate": 1.483913913005399e-06, - "loss": 0.9142, - "step": 6604 - }, - { - "epoch": 0.5956621725210804, - "grad_norm": 1.5884317412151, - "learning_rate": 1.483349509618034e-06, - "loss": 1.0316, - "step": 6605 - }, - { - "epoch": 0.5957523560445507, - "grad_norm": 1.5601214042520053, - "learning_rate": 1.4827851503141367e-06, - "loss": 0.9113, - "step": 6606 - }, - { - "epoch": 0.5958425395680209, - "grad_norm": 1.5554471101122556, - "learning_rate": 1.482220835141863e-06, - "loss": 0.9035, - "step": 6607 - }, - { - "epoch": 0.5959327230914911, - "grad_norm": 1.4868808675989353, - "learning_rate": 1.481656564149362e-06, - "loss": 1.0538, - "step": 6608 - }, - { - "epoch": 0.5960229066149615, - "grad_norm": 1.4760737341836783, - "learning_rate": 1.4810923373847818e-06, - "loss": 0.9712, - "step": 6609 - }, - { - "epoch": 0.5961130901384317, - "grad_norm": 1.5481635227577324, - "learning_rate": 1.4805281548962647e-06, - "loss": 0.9811, - "step": 6610 - }, - { - "epoch": 0.596203273661902, - "grad_norm": 1.5140289869821653, - "learning_rate": 1.4799640167319488e-06, - "loss": 0.9217, - "step": 6611 - }, - { - "epoch": 0.5962934571853722, - "grad_norm": 1.55023452768785, - "learning_rate": 1.4793999229399714e-06, - "loss": 0.962, - "step": 6612 - }, - { - "epoch": 0.5963836407088425, - "grad_norm": 2.2257540121718247, - "learning_rate": 1.4788358735684626e-06, - "loss": 1.0359, - "step": 6613 - }, - { - "epoch": 0.5964738242323128, - "grad_norm": 1.319337834964633, - "learning_rate": 1.4782718686655514e-06, - "loss": 1.0414, - "step": 6614 - }, - { - "epoch": 0.596564007755783, - "grad_norm": 2.922110971316203, - "learning_rate": 1.4777079082793605e-06, - "loss": 1.0178, - "step": 6615 - }, - { - "epoch": 0.5966541912792532, - "grad_norm": 2.4025227797874176, - "learning_rate": 1.4771439924580108e-06, - "loss": 0.9407, - "step": 6616 - }, - { - "epoch": 0.5967443748027236, - "grad_norm": 1.3708673708708554, - "learning_rate": 1.4765801212496189e-06, - "loss": 1.0157, - "step": 6617 - }, - { - "epoch": 0.5968345583261938, - "grad_norm": 1.644138098905349, - "learning_rate": 1.4760162947022968e-06, - "loss": 0.8933, - "step": 6618 - }, - { - "epoch": 0.596924741849664, - "grad_norm": 1.2941770410869036, - "learning_rate": 1.475452512864154e-06, - "loss": 0.9631, - "step": 6619 - }, - { - "epoch": 0.5970149253731343, - "grad_norm": 1.2763873144662856, - "learning_rate": 1.4748887757832945e-06, - "loss": 0.9859, - "step": 6620 - }, - { - "epoch": 0.5971051088966046, - "grad_norm": 4.693807198776608, - "learning_rate": 1.4743250835078209e-06, - "loss": 1.0053, - "step": 6621 - }, - { - "epoch": 0.5971952924200749, - "grad_norm": 1.3737534160762448, - "learning_rate": 1.4737614360858297e-06, - "loss": 0.9426, - "step": 6622 - }, - { - "epoch": 0.5972854759435451, - "grad_norm": 1.7356450697247097, - "learning_rate": 1.4731978335654133e-06, - "loss": 0.9282, - "step": 6623 - }, - { - "epoch": 0.5973756594670154, - "grad_norm": 1.744057169559045, - "learning_rate": 1.4726342759946638e-06, - "loss": 0.9122, - "step": 6624 - }, - { - "epoch": 0.5974658429904857, - "grad_norm": 1.4406351647670055, - "learning_rate": 1.4720707634216653e-06, - "loss": 0.9732, - "step": 6625 - }, - { - "epoch": 0.5975560265139559, - "grad_norm": 1.8036741904587836, - "learning_rate": 1.4715072958945e-06, - "loss": 0.9803, - "step": 6626 - }, - { - "epoch": 0.5976462100374261, - "grad_norm": 1.677566679810362, - "learning_rate": 1.470943873461247e-06, - "loss": 0.978, - "step": 6627 - }, - { - "epoch": 0.5977363935608965, - "grad_norm": 2.278225186712958, - "learning_rate": 1.470380496169979e-06, - "loss": 0.9449, - "step": 6628 - }, - { - "epoch": 0.5978265770843667, - "grad_norm": 1.4343115233626986, - "learning_rate": 1.4698171640687682e-06, - "loss": 0.9712, - "step": 6629 - }, - { - "epoch": 0.5979167606078369, - "grad_norm": 1.2724495079950042, - "learning_rate": 1.4692538772056792e-06, - "loss": 0.9145, - "step": 6630 - }, - { - "epoch": 0.5980069441313072, - "grad_norm": 1.2042629755614704, - "learning_rate": 1.4686906356287772e-06, - "loss": 0.8895, - "step": 6631 - }, - { - "epoch": 0.5980971276547775, - "grad_norm": 1.4201213376495196, - "learning_rate": 1.4681274393861194e-06, - "loss": 1.0117, - "step": 6632 - }, - { - "epoch": 0.5981873111782477, - "grad_norm": 1.627841372688829, - "learning_rate": 1.4675642885257603e-06, - "loss": 1.0703, - "step": 6633 - }, - { - "epoch": 0.598277494701718, - "grad_norm": 1.5581528271800302, - "learning_rate": 1.4670011830957529e-06, - "loss": 1.0651, - "step": 6634 - }, - { - "epoch": 0.5983676782251882, - "grad_norm": 1.5383685721221807, - "learning_rate": 1.4664381231441427e-06, - "loss": 0.9366, - "step": 6635 - }, - { - "epoch": 0.5984578617486586, - "grad_norm": 1.5391146268585667, - "learning_rate": 1.4658751087189746e-06, - "loss": 0.9792, - "step": 6636 - }, - { - "epoch": 0.5985480452721288, - "grad_norm": 1.54316961212377, - "learning_rate": 1.4653121398682874e-06, - "loss": 1.0151, - "step": 6637 - }, - { - "epoch": 0.598638228795599, - "grad_norm": 1.249613272925281, - "learning_rate": 1.4647492166401159e-06, - "loss": 0.9133, - "step": 6638 - }, - { - "epoch": 0.5987284123190693, - "grad_norm": 1.473287674698398, - "learning_rate": 1.4641863390824934e-06, - "loss": 0.9647, - "step": 6639 - }, - { - "epoch": 0.5988185958425396, - "grad_norm": 1.3078964880952075, - "learning_rate": 1.4636235072434465e-06, - "loss": 0.9686, - "step": 6640 - }, - { - "epoch": 0.5989087793660098, - "grad_norm": 1.3233869072297457, - "learning_rate": 1.4630607211709994e-06, - "loss": 0.9984, - "step": 6641 - }, - { - "epoch": 0.5989989628894801, - "grad_norm": 1.6930010107299889, - "learning_rate": 1.4624979809131723e-06, - "loss": 0.9807, - "step": 6642 - }, - { - "epoch": 0.5990891464129503, - "grad_norm": 1.4043761590427426, - "learning_rate": 1.4619352865179814e-06, - "loss": 0.8851, - "step": 6643 - }, - { - "epoch": 0.5991793299364206, - "grad_norm": 1.4496414925243222, - "learning_rate": 1.4613726380334391e-06, - "loss": 0.9447, - "step": 6644 - }, - { - "epoch": 0.5992695134598909, - "grad_norm": 1.6429082332391085, - "learning_rate": 1.4608100355075522e-06, - "loss": 0.83, - "step": 6645 - }, - { - "epoch": 0.5993596969833611, - "grad_norm": 0.6789241538194679, - "learning_rate": 1.460247478988327e-06, - "loss": 0.8384, - "step": 6646 - }, - { - "epoch": 0.5994498805068315, - "grad_norm": 1.3398380112594621, - "learning_rate": 1.4596849685237623e-06, - "loss": 0.9869, - "step": 6647 - }, - { - "epoch": 0.5995400640303017, - "grad_norm": 1.3219251546520905, - "learning_rate": 1.459122504161856e-06, - "loss": 0.8794, - "step": 6648 - }, - { - "epoch": 0.5996302475537719, - "grad_norm": 1.42582154935556, - "learning_rate": 1.4585600859506001e-06, - "loss": 0.9465, - "step": 6649 - }, - { - "epoch": 0.5997204310772422, - "grad_norm": 1.7027028811842122, - "learning_rate": 1.4579977139379826e-06, - "loss": 1.032, - "step": 6650 - }, - { - "epoch": 0.5998106146007125, - "grad_norm": 1.7723150524286893, - "learning_rate": 1.4574353881719895e-06, - "loss": 1.0408, - "step": 6651 - }, - { - "epoch": 0.5999007981241827, - "grad_norm": 1.851739866879707, - "learning_rate": 1.4568731087005998e-06, - "loss": 0.9767, - "step": 6652 - }, - { - "epoch": 0.599990981647653, - "grad_norm": 1.2487995582346407, - "learning_rate": 1.4563108755717916e-06, - "loss": 0.9953, - "step": 6653 - }, - { - "epoch": 0.6000811651711232, - "grad_norm": 1.8669496648139972, - "learning_rate": 1.455748688833538e-06, - "loss": 0.9962, - "step": 6654 - }, - { - "epoch": 0.6001713486945935, - "grad_norm": 1.4345310068166643, - "learning_rate": 1.4551865485338065e-06, - "loss": 0.8827, - "step": 6655 - }, - { - "epoch": 0.6002615322180638, - "grad_norm": 1.4641840148459633, - "learning_rate": 1.4546244547205629e-06, - "loss": 1.0364, - "step": 6656 - }, - { - "epoch": 0.600351715741534, - "grad_norm": 1.304635281003198, - "learning_rate": 1.4540624074417678e-06, - "loss": 0.9949, - "step": 6657 - }, - { - "epoch": 0.6004418992650042, - "grad_norm": 1.7884797725584112, - "learning_rate": 1.453500406745379e-06, - "loss": 1.0261, - "step": 6658 - }, - { - "epoch": 0.6005320827884746, - "grad_norm": 1.810743006480878, - "learning_rate": 1.4529384526793486e-06, - "loss": 1.0485, - "step": 6659 - }, - { - "epoch": 0.6006222663119448, - "grad_norm": 1.3978685041943737, - "learning_rate": 1.4523765452916252e-06, - "loss": 1.0277, - "step": 6660 - }, - { - "epoch": 0.600712449835415, - "grad_norm": 1.4904639390987164, - "learning_rate": 1.4518146846301554e-06, - "loss": 1.0056, - "step": 6661 - }, - { - "epoch": 0.6008026333588853, - "grad_norm": 2.240613750605096, - "learning_rate": 1.4512528707428787e-06, - "loss": 0.954, - "step": 6662 - }, - { - "epoch": 0.6008928168823556, - "grad_norm": 1.3228589393255716, - "learning_rate": 1.4506911036777335e-06, - "loss": 0.9495, - "step": 6663 - }, - { - "epoch": 0.6009830004058259, - "grad_norm": 0.7015521577612266, - "learning_rate": 1.450129383482651e-06, - "loss": 0.8259, - "step": 6664 - }, - { - "epoch": 0.6010731839292961, - "grad_norm": 1.2715723619620032, - "learning_rate": 1.4495677102055629e-06, - "loss": 0.9321, - "step": 6665 - }, - { - "epoch": 0.6011633674527663, - "grad_norm": 1.4731261619203992, - "learning_rate": 1.4490060838943924e-06, - "loss": 1.0124, - "step": 6666 - }, - { - "epoch": 0.6012535509762367, - "grad_norm": 1.4232748497703778, - "learning_rate": 1.4484445045970609e-06, - "loss": 0.9744, - "step": 6667 - }, - { - "epoch": 0.6013437344997069, - "grad_norm": 1.6109202105596132, - "learning_rate": 1.447882972361485e-06, - "loss": 0.9745, - "step": 6668 - }, - { - "epoch": 0.6014339180231771, - "grad_norm": 1.8302631101478728, - "learning_rate": 1.4473214872355785e-06, - "loss": 0.9611, - "step": 6669 - }, - { - "epoch": 0.6015241015466475, - "grad_norm": 1.6039098145918667, - "learning_rate": 1.4467600492672508e-06, - "loss": 1.0174, - "step": 6670 - }, - { - "epoch": 0.6016142850701177, - "grad_norm": 1.4710226129885238, - "learning_rate": 1.4461986585044054e-06, - "loss": 0.9553, - "step": 6671 - }, - { - "epoch": 0.601704468593588, - "grad_norm": 1.4521829155918733, - "learning_rate": 1.4456373149949446e-06, - "loss": 0.9519, - "step": 6672 - }, - { - "epoch": 0.6017946521170582, - "grad_norm": 1.4509429956513242, - "learning_rate": 1.4450760187867648e-06, - "loss": 0.9816, - "step": 6673 - }, - { - "epoch": 0.6018848356405285, - "grad_norm": 1.4092698203887821, - "learning_rate": 1.4445147699277581e-06, - "loss": 0.9666, - "step": 6674 - }, - { - "epoch": 0.6019750191639988, - "grad_norm": 1.947632413581325, - "learning_rate": 1.4439535684658154e-06, - "loss": 1.0248, - "step": 6675 - }, - { - "epoch": 0.602065202687469, - "grad_norm": 0.7940644459886362, - "learning_rate": 1.44339241444882e-06, - "loss": 0.8939, - "step": 6676 - }, - { - "epoch": 0.6021553862109392, - "grad_norm": 1.7552047714363277, - "learning_rate": 1.4428313079246518e-06, - "loss": 0.9203, - "step": 6677 - }, - { - "epoch": 0.6022455697344096, - "grad_norm": 0.6777901090631918, - "learning_rate": 1.4422702489411896e-06, - "loss": 0.8538, - "step": 6678 - }, - { - "epoch": 0.6023357532578798, - "grad_norm": 1.5782327898435573, - "learning_rate": 1.4417092375463043e-06, - "loss": 0.9276, - "step": 6679 - }, - { - "epoch": 0.60242593678135, - "grad_norm": 1.730244985885421, - "learning_rate": 1.441148273787866e-06, - "loss": 1.0074, - "step": 6680 - }, - { - "epoch": 0.6025161203048203, - "grad_norm": 0.7924128872018414, - "learning_rate": 1.4405873577137383e-06, - "loss": 0.8988, - "step": 6681 - }, - { - "epoch": 0.6026063038282906, - "grad_norm": 1.5604439750424286, - "learning_rate": 1.4400264893717816e-06, - "loss": 0.9884, - "step": 6682 - }, - { - "epoch": 0.6026964873517608, - "grad_norm": 2.0789735825453355, - "learning_rate": 1.4394656688098526e-06, - "loss": 1.0233, - "step": 6683 - }, - { - "epoch": 0.6027866708752311, - "grad_norm": 1.6353211182626535, - "learning_rate": 1.4389048960758032e-06, - "loss": 1.0025, - "step": 6684 - }, - { - "epoch": 0.6028768543987013, - "grad_norm": 1.7975231162801222, - "learning_rate": 1.4383441712174826e-06, - "loss": 1.0048, - "step": 6685 - }, - { - "epoch": 0.6029670379221717, - "grad_norm": 1.5655173350336602, - "learning_rate": 1.4377834942827333e-06, - "loss": 1.0367, - "step": 6686 - }, - { - "epoch": 0.6030572214456419, - "grad_norm": 1.4988158638348013, - "learning_rate": 1.437222865319397e-06, - "loss": 0.9145, - "step": 6687 - }, - { - "epoch": 0.6031474049691121, - "grad_norm": 1.3502603774283786, - "learning_rate": 1.4366622843753092e-06, - "loss": 0.9074, - "step": 6688 - }, - { - "epoch": 0.6032375884925824, - "grad_norm": 1.2248602067241123, - "learning_rate": 1.4361017514983006e-06, - "loss": 0.996, - "step": 6689 - }, - { - "epoch": 0.6033277720160527, - "grad_norm": 1.3604283252973985, - "learning_rate": 1.4355412667362006e-06, - "loss": 1.033, - "step": 6690 - }, - { - "epoch": 0.6034179555395229, - "grad_norm": 1.6577976840954423, - "learning_rate": 1.4349808301368311e-06, - "loss": 0.9314, - "step": 6691 - }, - { - "epoch": 0.6035081390629932, - "grad_norm": 1.5016601435484, - "learning_rate": 1.4344204417480139e-06, - "loss": 1.0168, - "step": 6692 - }, - { - "epoch": 0.6035983225864634, - "grad_norm": 1.4753256971612974, - "learning_rate": 1.4338601016175628e-06, - "loss": 0.921, - "step": 6693 - }, - { - "epoch": 0.6036885061099337, - "grad_norm": 1.205383410495562, - "learning_rate": 1.433299809793289e-06, - "loss": 0.9736, - "step": 6694 - }, - { - "epoch": 0.603778689633404, - "grad_norm": 2.0409910939763845, - "learning_rate": 1.432739566323001e-06, - "loss": 0.8359, - "step": 6695 - }, - { - "epoch": 0.6038688731568742, - "grad_norm": 1.7110571971810722, - "learning_rate": 1.4321793712545004e-06, - "loss": 0.9424, - "step": 6696 - }, - { - "epoch": 0.6039590566803446, - "grad_norm": 1.6062971145310436, - "learning_rate": 1.4316192246355873e-06, - "loss": 1.0093, - "step": 6697 - }, - { - "epoch": 0.6040492402038148, - "grad_norm": 0.640449755820434, - "learning_rate": 1.4310591265140555e-06, - "loss": 0.8429, - "step": 6698 - }, - { - "epoch": 0.604139423727285, - "grad_norm": 1.2883481907614989, - "learning_rate": 1.4304990769376963e-06, - "loss": 0.9836, - "step": 6699 - }, - { - "epoch": 0.6042296072507553, - "grad_norm": 1.4675959659372295, - "learning_rate": 1.4299390759542962e-06, - "loss": 0.9639, - "step": 6700 - }, - { - "epoch": 0.6043197907742256, - "grad_norm": 1.4178097638412148, - "learning_rate": 1.4293791236116368e-06, - "loss": 0.9548, - "step": 6701 - }, - { - "epoch": 0.6044099742976958, - "grad_norm": 1.3564247036405757, - "learning_rate": 1.4288192199574978e-06, - "loss": 0.9457, - "step": 6702 - }, - { - "epoch": 0.6045001578211661, - "grad_norm": 0.6112480232971307, - "learning_rate": 1.4282593650396524e-06, - "loss": 0.7981, - "step": 6703 - }, - { - "epoch": 0.6045903413446363, - "grad_norm": 1.323992594533995, - "learning_rate": 1.4276995589058695e-06, - "loss": 1.0747, - "step": 6704 - }, - { - "epoch": 0.6046805248681066, - "grad_norm": 0.6269445448433886, - "learning_rate": 1.4271398016039168e-06, - "loss": 0.8235, - "step": 6705 - }, - { - "epoch": 0.6047707083915769, - "grad_norm": 1.656108490278788, - "learning_rate": 1.4265800931815542e-06, - "loss": 1.0079, - "step": 6706 - }, - { - "epoch": 0.6048608919150471, - "grad_norm": 1.4075134959005804, - "learning_rate": 1.4260204336865406e-06, - "loss": 0.984, - "step": 6707 - }, - { - "epoch": 0.6049510754385173, - "grad_norm": 1.603270870956227, - "learning_rate": 1.4254608231666281e-06, - "loss": 0.9691, - "step": 6708 - }, - { - "epoch": 0.6050412589619877, - "grad_norm": 1.2819447378367406, - "learning_rate": 1.4249012616695661e-06, - "loss": 0.9792, - "step": 6709 - }, - { - "epoch": 0.6051314424854579, - "grad_norm": 1.5704751001695905, - "learning_rate": 1.4243417492431e-06, - "loss": 0.9815, - "step": 6710 - }, - { - "epoch": 0.6052216260089281, - "grad_norm": 1.5704838293943948, - "learning_rate": 1.4237822859349696e-06, - "loss": 0.9612, - "step": 6711 - }, - { - "epoch": 0.6053118095323984, - "grad_norm": 1.4591124043087267, - "learning_rate": 1.423222871792912e-06, - "loss": 0.9468, - "step": 6712 - }, - { - "epoch": 0.6054019930558687, - "grad_norm": 1.3712741048497228, - "learning_rate": 1.4226635068646586e-06, - "loss": 1.0384, - "step": 6713 - }, - { - "epoch": 0.605492176579339, - "grad_norm": 0.6543938408355419, - "learning_rate": 1.4221041911979393e-06, - "loss": 0.839, - "step": 6714 - }, - { - "epoch": 0.6055823601028092, - "grad_norm": 1.8998926810021564, - "learning_rate": 1.4215449248404765e-06, - "loss": 0.9923, - "step": 6715 - }, - { - "epoch": 0.6056725436262794, - "grad_norm": 1.6532672438692055, - "learning_rate": 1.4209857078399896e-06, - "loss": 0.9878, - "step": 6716 - }, - { - "epoch": 0.6057627271497498, - "grad_norm": 1.6599115168877179, - "learning_rate": 1.4204265402441955e-06, - "loss": 0.942, - "step": 6717 - }, - { - "epoch": 0.60585291067322, - "grad_norm": 1.1742130606812333, - "learning_rate": 1.419867422100804e-06, - "loss": 0.8522, - "step": 6718 - }, - { - "epoch": 0.6059430941966902, - "grad_norm": 1.324421644843083, - "learning_rate": 1.4193083534575236e-06, - "loss": 0.9604, - "step": 6719 - }, - { - "epoch": 0.6060332777201606, - "grad_norm": 1.6579082033539405, - "learning_rate": 1.4187493343620567e-06, - "loss": 1.0024, - "step": 6720 - }, - { - "epoch": 0.6061234612436308, - "grad_norm": 1.2965298848383915, - "learning_rate": 1.4181903648621006e-06, - "loss": 1.0076, - "step": 6721 - }, - { - "epoch": 0.606213644767101, - "grad_norm": 1.5619152500786797, - "learning_rate": 1.4176314450053512e-06, - "loss": 1.0086, - "step": 6722 - }, - { - "epoch": 0.6063038282905713, - "grad_norm": 1.5692365005979692, - "learning_rate": 1.4170725748394977e-06, - "loss": 0.9655, - "step": 6723 - }, - { - "epoch": 0.6063940118140416, - "grad_norm": 1.2620726759217025, - "learning_rate": 1.4165137544122266e-06, - "loss": 0.8842, - "step": 6724 - }, - { - "epoch": 0.6064841953375119, - "grad_norm": 2.0496547547818995, - "learning_rate": 1.4159549837712194e-06, - "loss": 1.0361, - "step": 6725 - }, - { - "epoch": 0.6065743788609821, - "grad_norm": 1.4907311327756056, - "learning_rate": 1.415396262964153e-06, - "loss": 0.8962, - "step": 6726 - }, - { - "epoch": 0.6066645623844523, - "grad_norm": 1.497419840302346, - "learning_rate": 1.4148375920387016e-06, - "loss": 1.0516, - "step": 6727 - }, - { - "epoch": 0.6067547459079227, - "grad_norm": 0.7774770804458426, - "learning_rate": 1.4142789710425325e-06, - "loss": 0.8467, - "step": 6728 - }, - { - "epoch": 0.6068449294313929, - "grad_norm": 1.7312340690898902, - "learning_rate": 1.4137204000233118e-06, - "loss": 0.9032, - "step": 6729 - }, - { - "epoch": 0.6069351129548631, - "grad_norm": 1.6687314436091545, - "learning_rate": 1.4131618790286987e-06, - "loss": 0.9646, - "step": 6730 - }, - { - "epoch": 0.6070252964783334, - "grad_norm": 1.650306329323765, - "learning_rate": 1.4126034081063506e-06, - "loss": 0.9673, - "step": 6731 - }, - { - "epoch": 0.6071154800018037, - "grad_norm": 1.8078882806534524, - "learning_rate": 1.4120449873039186e-06, - "loss": 0.9573, - "step": 6732 - }, - { - "epoch": 0.6072056635252739, - "grad_norm": 1.451740302456668, - "learning_rate": 1.4114866166690494e-06, - "loss": 1.0121, - "step": 6733 - }, - { - "epoch": 0.6072958470487442, - "grad_norm": 1.245518566585855, - "learning_rate": 1.4109282962493877e-06, - "loss": 0.93, - "step": 6734 - }, - { - "epoch": 0.6073860305722144, - "grad_norm": 1.8605585258482704, - "learning_rate": 1.4103700260925716e-06, - "loss": 0.9616, - "step": 6735 - }, - { - "epoch": 0.6074762140956848, - "grad_norm": 2.0837960555874315, - "learning_rate": 1.4098118062462364e-06, - "loss": 1.0123, - "step": 6736 - }, - { - "epoch": 0.607566397619155, - "grad_norm": 1.182383658349489, - "learning_rate": 1.4092536367580123e-06, - "loss": 0.9688, - "step": 6737 - }, - { - "epoch": 0.6076565811426252, - "grad_norm": 1.774972662916689, - "learning_rate": 1.4086955176755248e-06, - "loss": 0.8889, - "step": 6738 - }, - { - "epoch": 0.6077467646660955, - "grad_norm": 2.1202919359552252, - "learning_rate": 1.4081374490463964e-06, - "loss": 0.9114, - "step": 6739 - }, - { - "epoch": 0.6078369481895658, - "grad_norm": 1.647676482060578, - "learning_rate": 1.4075794309182443e-06, - "loss": 0.9884, - "step": 6740 - }, - { - "epoch": 0.607927131713036, - "grad_norm": 1.900966890162129, - "learning_rate": 1.407021463338682e-06, - "loss": 0.9796, - "step": 6741 - }, - { - "epoch": 0.6080173152365063, - "grad_norm": 0.6452563513096254, - "learning_rate": 1.4064635463553177e-06, - "loss": 0.802, - "step": 6742 - }, - { - "epoch": 0.6081074987599766, - "grad_norm": 1.516069563329025, - "learning_rate": 1.4059056800157563e-06, - "loss": 1.0007, - "step": 6743 - }, - { - "epoch": 0.6081976822834468, - "grad_norm": 1.4572595003335047, - "learning_rate": 1.4053478643675982e-06, - "loss": 0.907, - "step": 6744 - }, - { - "epoch": 0.6082878658069171, - "grad_norm": 1.704740055997852, - "learning_rate": 1.4047900994584389e-06, - "loss": 0.8735, - "step": 6745 - }, - { - "epoch": 0.6083780493303873, - "grad_norm": 1.76513860974138, - "learning_rate": 1.404232385335871e-06, - "loss": 0.8788, - "step": 6746 - }, - { - "epoch": 0.6084682328538576, - "grad_norm": 1.6733278040902226, - "learning_rate": 1.4036747220474806e-06, - "loss": 0.9789, - "step": 6747 - }, - { - "epoch": 0.6085584163773279, - "grad_norm": 1.4963792174973507, - "learning_rate": 1.4031171096408506e-06, - "loss": 0.9336, - "step": 6748 - }, - { - "epoch": 0.6086485999007981, - "grad_norm": 1.6826692596366197, - "learning_rate": 1.4025595481635607e-06, - "loss": 1.0009, - "step": 6749 - }, - { - "epoch": 0.6087387834242683, - "grad_norm": 1.4363112302631535, - "learning_rate": 1.4020020376631836e-06, - "loss": 0.9937, - "step": 6750 - }, - { - "epoch": 0.6088289669477387, - "grad_norm": 2.2587737574803035, - "learning_rate": 1.4014445781872908e-06, - "loss": 0.9873, - "step": 6751 - }, - { - "epoch": 0.6089191504712089, - "grad_norm": 1.4008698434892237, - "learning_rate": 1.4008871697834465e-06, - "loss": 1.0347, - "step": 6752 - }, - { - "epoch": 0.6090093339946792, - "grad_norm": 2.2603328589681873, - "learning_rate": 1.400329812499213e-06, - "loss": 1.008, - "step": 6753 - }, - { - "epoch": 0.6090995175181494, - "grad_norm": 1.2798326142210508, - "learning_rate": 1.3997725063821458e-06, - "loss": 0.9832, - "step": 6754 - }, - { - "epoch": 0.6091897010416197, - "grad_norm": 1.3450168581618684, - "learning_rate": 1.3992152514797978e-06, - "loss": 0.9411, - "step": 6755 - }, - { - "epoch": 0.60927988456509, - "grad_norm": 0.6077480850801542, - "learning_rate": 1.398658047839718e-06, - "loss": 0.8601, - "step": 6756 - }, - { - "epoch": 0.6093700680885602, - "grad_norm": 0.6237919337715201, - "learning_rate": 1.3981008955094481e-06, - "loss": 0.8276, - "step": 6757 - }, - { - "epoch": 0.6094602516120304, - "grad_norm": 1.551211511225137, - "learning_rate": 1.39754379453653e-06, - "loss": 0.9498, - "step": 6758 - }, - { - "epoch": 0.6095504351355008, - "grad_norm": 1.599062946223349, - "learning_rate": 1.3969867449684972e-06, - "loss": 0.9816, - "step": 6759 - }, - { - "epoch": 0.609640618658971, - "grad_norm": 1.8423634504135713, - "learning_rate": 1.396429746852879e-06, - "loss": 0.9545, - "step": 6760 - }, - { - "epoch": 0.6097308021824412, - "grad_norm": 1.5751733654217346, - "learning_rate": 1.395872800237204e-06, - "loss": 0.9553, - "step": 6761 - }, - { - "epoch": 0.6098209857059115, - "grad_norm": 0.7439755634658112, - "learning_rate": 1.3953159051689918e-06, - "loss": 0.8766, - "step": 6762 - }, - { - "epoch": 0.6099111692293818, - "grad_norm": 1.868294615104734, - "learning_rate": 1.3947590616957618e-06, - "loss": 1.0134, - "step": 6763 - }, - { - "epoch": 0.610001352752852, - "grad_norm": 1.7678273736558314, - "learning_rate": 1.3942022698650258e-06, - "loss": 1.0081, - "step": 6764 - }, - { - "epoch": 0.6100915362763223, - "grad_norm": 1.4517844795190966, - "learning_rate": 1.3936455297242917e-06, - "loss": 1.0039, - "step": 6765 - }, - { - "epoch": 0.6101817197997926, - "grad_norm": 1.5126809697613295, - "learning_rate": 1.3930888413210652e-06, - "loss": 0.9861, - "step": 6766 - }, - { - "epoch": 0.6102719033232629, - "grad_norm": 2.7117087726414963, - "learning_rate": 1.392532204702845e-06, - "loss": 0.9455, - "step": 6767 - }, - { - "epoch": 0.6103620868467331, - "grad_norm": 1.7867800883450782, - "learning_rate": 1.3919756199171266e-06, - "loss": 0.8965, - "step": 6768 - }, - { - "epoch": 0.6104522703702033, - "grad_norm": 1.2720563454654115, - "learning_rate": 1.3914190870114009e-06, - "loss": 1.0519, - "step": 6769 - }, - { - "epoch": 0.6105424538936737, - "grad_norm": 1.339356670316518, - "learning_rate": 1.3908626060331541e-06, - "loss": 1.0217, - "step": 6770 - }, - { - "epoch": 0.6106326374171439, - "grad_norm": 1.4777788268988254, - "learning_rate": 1.3903061770298693e-06, - "loss": 1.0036, - "step": 6771 - }, - { - "epoch": 0.6107228209406141, - "grad_norm": 1.4408641095438635, - "learning_rate": 1.3897498000490223e-06, - "loss": 0.9442, - "step": 6772 - }, - { - "epoch": 0.6108130044640844, - "grad_norm": 5.517231045499529, - "learning_rate": 1.3891934751380879e-06, - "loss": 1.0406, - "step": 6773 - }, - { - "epoch": 0.6109031879875547, - "grad_norm": 1.3687437954418002, - "learning_rate": 1.3886372023445334e-06, - "loss": 0.9874, - "step": 6774 - }, - { - "epoch": 0.610993371511025, - "grad_norm": 1.3576287201188657, - "learning_rate": 1.3880809817158246e-06, - "loss": 1.0447, - "step": 6775 - }, - { - "epoch": 0.6110835550344952, - "grad_norm": 1.6066423938759105, - "learning_rate": 1.3875248132994206e-06, - "loss": 0.9717, - "step": 6776 - }, - { - "epoch": 0.6111737385579654, - "grad_norm": 1.3903367676077838, - "learning_rate": 1.386968697142776e-06, - "loss": 0.9005, - "step": 6777 - }, - { - "epoch": 0.6112639220814358, - "grad_norm": 1.4042384701867214, - "learning_rate": 1.386412633293343e-06, - "loss": 1.0203, - "step": 6778 - }, - { - "epoch": 0.611354105604906, - "grad_norm": 1.591578106971257, - "learning_rate": 1.3858566217985672e-06, - "loss": 0.949, - "step": 6779 - }, - { - "epoch": 0.6114442891283762, - "grad_norm": 1.713695028412548, - "learning_rate": 1.3853006627058905e-06, - "loss": 1.0607, - "step": 6780 - }, - { - "epoch": 0.6115344726518465, - "grad_norm": 1.7382353701591, - "learning_rate": 1.3847447560627512e-06, - "loss": 0.8579, - "step": 6781 - }, - { - "epoch": 0.6116246561753168, - "grad_norm": 1.6055699523066187, - "learning_rate": 1.3841889019165812e-06, - "loss": 1.0186, - "step": 6782 - }, - { - "epoch": 0.611714839698787, - "grad_norm": 1.7759242163461226, - "learning_rate": 1.3836331003148106e-06, - "loss": 0.9283, - "step": 6783 - }, - { - "epoch": 0.6118050232222573, - "grad_norm": 1.477407465972358, - "learning_rate": 1.3830773513048612e-06, - "loss": 1.0172, - "step": 6784 - }, - { - "epoch": 0.6118952067457275, - "grad_norm": 1.6235426456594173, - "learning_rate": 1.382521654934155e-06, - "loss": 0.9952, - "step": 6785 - }, - { - "epoch": 0.6119853902691978, - "grad_norm": 1.5188382880525881, - "learning_rate": 1.3819660112501057e-06, - "loss": 0.9232, - "step": 6786 - }, - { - "epoch": 0.6120755737926681, - "grad_norm": 1.2270209555107243, - "learning_rate": 1.3814104203001234e-06, - "loss": 0.9435, - "step": 6787 - }, - { - "epoch": 0.6121657573161383, - "grad_norm": 1.289379242794362, - "learning_rate": 1.3808548821316156e-06, - "loss": 1.0167, - "step": 6788 - }, - { - "epoch": 0.6122559408396087, - "grad_norm": 1.3078539223075858, - "learning_rate": 1.3802993967919824e-06, - "loss": 0.9063, - "step": 6789 - }, - { - "epoch": 0.6123461243630789, - "grad_norm": 1.7327817867711433, - "learning_rate": 1.3797439643286227e-06, - "loss": 0.9397, - "step": 6790 - }, - { - "epoch": 0.6124363078865491, - "grad_norm": 1.3932178199757603, - "learning_rate": 1.3791885847889277e-06, - "loss": 0.9621, - "step": 6791 - }, - { - "epoch": 0.6125264914100194, - "grad_norm": 1.7287038237279135, - "learning_rate": 1.3786332582202853e-06, - "loss": 1.009, - "step": 6792 - }, - { - "epoch": 0.6126166749334897, - "grad_norm": 1.4234111159524818, - "learning_rate": 1.3780779846700799e-06, - "loss": 0.9793, - "step": 6793 - }, - { - "epoch": 0.6127068584569599, - "grad_norm": 1.2894937891934453, - "learning_rate": 1.3775227641856899e-06, - "loss": 0.9856, - "step": 6794 - }, - { - "epoch": 0.6127970419804302, - "grad_norm": 1.7684375338664762, - "learning_rate": 1.37696759681449e-06, - "loss": 0.9744, - "step": 6795 - }, - { - "epoch": 0.6128872255039004, - "grad_norm": 0.7204092196654563, - "learning_rate": 1.37641248260385e-06, - "loss": 0.8811, - "step": 6796 - }, - { - "epoch": 0.6129774090273707, - "grad_norm": 1.4137171539619409, - "learning_rate": 1.375857421601136e-06, - "loss": 1.0282, - "step": 6797 - }, - { - "epoch": 0.613067592550841, - "grad_norm": 1.6984672199867106, - "learning_rate": 1.3753024138537082e-06, - "loss": 0.9816, - "step": 6798 - }, - { - "epoch": 0.6131577760743112, - "grad_norm": 1.2331218877599375, - "learning_rate": 1.3747474594089221e-06, - "loss": 1.0389, - "step": 6799 - }, - { - "epoch": 0.6132479595977814, - "grad_norm": 1.3936225216989178, - "learning_rate": 1.374192558314131e-06, - "loss": 0.9547, - "step": 6800 - }, - { - "epoch": 0.6133381431212518, - "grad_norm": 1.5711421767500828, - "learning_rate": 1.373637710616681e-06, - "loss": 1.0354, - "step": 6801 - }, - { - "epoch": 0.613428326644722, - "grad_norm": 1.660083078006025, - "learning_rate": 1.373082916363916e-06, - "loss": 1.0682, - "step": 6802 - }, - { - "epoch": 0.6135185101681923, - "grad_norm": 1.2823903661198268, - "learning_rate": 1.3725281756031732e-06, - "loss": 1.0168, - "step": 6803 - }, - { - "epoch": 0.6136086936916625, - "grad_norm": 1.2683080326235305, - "learning_rate": 1.3719734883817858e-06, - "loss": 0.9761, - "step": 6804 - }, - { - "epoch": 0.6136988772151328, - "grad_norm": 1.8687144783241803, - "learning_rate": 1.371418854747084e-06, - "loss": 1.0925, - "step": 6805 - }, - { - "epoch": 0.6137890607386031, - "grad_norm": 1.6452447426257948, - "learning_rate": 1.3708642747463905e-06, - "loss": 1.0137, - "step": 6806 - }, - { - "epoch": 0.6138792442620733, - "grad_norm": 1.443875067111722, - "learning_rate": 1.370309748427027e-06, - "loss": 1.0097, - "step": 6807 - }, - { - "epoch": 0.6139694277855435, - "grad_norm": 1.4492810999369727, - "learning_rate": 1.3697552758363079e-06, - "loss": 1.054, - "step": 6808 - }, - { - "epoch": 0.6140596113090139, - "grad_norm": 2.7973897449628624, - "learning_rate": 1.3692008570215432e-06, - "loss": 0.9762, - "step": 6809 - }, - { - "epoch": 0.6141497948324841, - "grad_norm": 1.4205947813687663, - "learning_rate": 1.3686464920300398e-06, - "loss": 0.9305, - "step": 6810 - }, - { - "epoch": 0.6142399783559543, - "grad_norm": 2.05652689823114, - "learning_rate": 1.3680921809090985e-06, - "loss": 0.9535, - "step": 6811 - }, - { - "epoch": 0.6143301618794246, - "grad_norm": 1.8247656972272417, - "learning_rate": 1.3675379237060175e-06, - "loss": 1.0121, - "step": 6812 - }, - { - "epoch": 0.6144203454028949, - "grad_norm": 1.498212305357027, - "learning_rate": 1.366983720468088e-06, - "loss": 0.9709, - "step": 6813 - }, - { - "epoch": 0.6145105289263652, - "grad_norm": 1.2631335283583252, - "learning_rate": 1.3664295712425972e-06, - "loss": 0.9836, - "step": 6814 - }, - { - "epoch": 0.6146007124498354, - "grad_norm": 1.6821129608505856, - "learning_rate": 1.3658754760768296e-06, - "loss": 0.9156, - "step": 6815 - }, - { - "epoch": 0.6146908959733057, - "grad_norm": 1.727290418639227, - "learning_rate": 1.3653214350180621e-06, - "loss": 1.0179, - "step": 6816 - }, - { - "epoch": 0.614781079496776, - "grad_norm": 3.0595986772544097, - "learning_rate": 1.3647674481135703e-06, - "loss": 1.0477, - "step": 6817 - }, - { - "epoch": 0.6148712630202462, - "grad_norm": 1.2881863014852664, - "learning_rate": 1.3642135154106217e-06, - "loss": 0.8786, - "step": 6818 - }, - { - "epoch": 0.6149614465437164, - "grad_norm": 1.6795641476781324, - "learning_rate": 1.363659636956482e-06, - "loss": 0.9293, - "step": 6819 - }, - { - "epoch": 0.6150516300671868, - "grad_norm": 1.3867322034586174, - "learning_rate": 1.3631058127984112e-06, - "loss": 0.9481, - "step": 6820 - }, - { - "epoch": 0.615141813590657, - "grad_norm": 1.5573362091056409, - "learning_rate": 1.3625520429836632e-06, - "loss": 0.9581, - "step": 6821 - }, - { - "epoch": 0.6152319971141272, - "grad_norm": 1.4329391772931372, - "learning_rate": 1.361998327559491e-06, - "loss": 0.8851, - "step": 6822 - }, - { - "epoch": 0.6153221806375975, - "grad_norm": 0.6317526808162157, - "learning_rate": 1.3614446665731385e-06, - "loss": 0.8423, - "step": 6823 - }, - { - "epoch": 0.6154123641610678, - "grad_norm": 1.4961823519374313, - "learning_rate": 1.3608910600718484e-06, - "loss": 0.9858, - "step": 6824 - }, - { - "epoch": 0.615502547684538, - "grad_norm": 1.4201015269560107, - "learning_rate": 1.360337508102857e-06, - "loss": 0.9863, - "step": 6825 - }, - { - "epoch": 0.6155927312080083, - "grad_norm": 3.867168881149939, - "learning_rate": 1.3597840107133962e-06, - "loss": 1.0766, - "step": 6826 - }, - { - "epoch": 0.6156829147314785, - "grad_norm": 1.5052118356349375, - "learning_rate": 1.3592305679506944e-06, - "loss": 0.9891, - "step": 6827 - }, - { - "epoch": 0.6157730982549489, - "grad_norm": 6.173888059263024, - "learning_rate": 1.3586771798619726e-06, - "loss": 0.8918, - "step": 6828 - }, - { - "epoch": 0.6158632817784191, - "grad_norm": 1.3097009785640568, - "learning_rate": 1.358123846494451e-06, - "loss": 1.0397, - "step": 6829 - }, - { - "epoch": 0.6159534653018893, - "grad_norm": 1.9681118733273286, - "learning_rate": 1.3575705678953422e-06, - "loss": 0.986, - "step": 6830 - }, - { - "epoch": 0.6160436488253596, - "grad_norm": 2.3713531603616587, - "learning_rate": 1.357017344111854e-06, - "loss": 1.0128, - "step": 6831 - }, - { - "epoch": 0.6161338323488299, - "grad_norm": 1.2833190122441087, - "learning_rate": 1.356464175191192e-06, - "loss": 0.9857, - "step": 6832 - }, - { - "epoch": 0.6162240158723001, - "grad_norm": 2.0660547776027567, - "learning_rate": 1.3559110611805542e-06, - "loss": 1.0034, - "step": 6833 - }, - { - "epoch": 0.6163141993957704, - "grad_norm": 1.4749626898491852, - "learning_rate": 1.3553580021271372e-06, - "loss": 1.0234, - "step": 6834 - }, - { - "epoch": 0.6164043829192406, - "grad_norm": 1.3444264062392994, - "learning_rate": 1.3548049980781297e-06, - "loss": 0.9234, - "step": 6835 - }, - { - "epoch": 0.616494566442711, - "grad_norm": 1.6512890779778906, - "learning_rate": 1.3542520490807166e-06, - "loss": 0.9753, - "step": 6836 - }, - { - "epoch": 0.6165847499661812, - "grad_norm": 1.8502334138178107, - "learning_rate": 1.3536991551820802e-06, - "loss": 0.9783, - "step": 6837 - }, - { - "epoch": 0.6166749334896514, - "grad_norm": 1.207157116101971, - "learning_rate": 1.3531463164293952e-06, - "loss": 0.9565, - "step": 6838 - }, - { - "epoch": 0.6167651170131218, - "grad_norm": 2.0551669065538074, - "learning_rate": 1.3525935328698332e-06, - "loss": 0.8914, - "step": 6839 - }, - { - "epoch": 0.616855300536592, - "grad_norm": 1.6569673676071788, - "learning_rate": 1.3520408045505605e-06, - "loss": 0.9167, - "step": 6840 - }, - { - "epoch": 0.6169454840600622, - "grad_norm": 1.5214506054951464, - "learning_rate": 1.3514881315187396e-06, - "loss": 0.9383, - "step": 6841 - }, - { - "epoch": 0.6170356675835325, - "grad_norm": 1.2889766780143246, - "learning_rate": 1.3509355138215273e-06, - "loss": 1.0332, - "step": 6842 - }, - { - "epoch": 0.6171258511070028, - "grad_norm": 1.410205956425718, - "learning_rate": 1.350382951506075e-06, - "loss": 1.0561, - "step": 6843 - }, - { - "epoch": 0.617216034630473, - "grad_norm": 0.6430027495024349, - "learning_rate": 1.349830444619532e-06, - "loss": 0.8242, - "step": 6844 - }, - { - "epoch": 0.6173062181539433, - "grad_norm": 1.3936426660237655, - "learning_rate": 1.3492779932090397e-06, - "loss": 0.9964, - "step": 6845 - }, - { - "epoch": 0.6173964016774135, - "grad_norm": 1.5487581600439149, - "learning_rate": 1.3487255973217377e-06, - "loss": 1.0029, - "step": 6846 - }, - { - "epoch": 0.6174865852008838, - "grad_norm": 1.5435941177307488, - "learning_rate": 1.3481732570047592e-06, - "loss": 1.0532, - "step": 6847 - }, - { - "epoch": 0.6175767687243541, - "grad_norm": 2.1534984558184957, - "learning_rate": 1.3476209723052314e-06, - "loss": 0.9042, - "step": 6848 - }, - { - "epoch": 0.6176669522478243, - "grad_norm": 1.5750725562927792, - "learning_rate": 1.3470687432702806e-06, - "loss": 0.9924, - "step": 6849 - }, - { - "epoch": 0.6177571357712945, - "grad_norm": 1.5137659055975143, - "learning_rate": 1.346516569947024e-06, - "loss": 0.9091, - "step": 6850 - }, - { - "epoch": 0.6178473192947649, - "grad_norm": 1.65702089327139, - "learning_rate": 1.3459644523825774e-06, - "loss": 0.9703, - "step": 6851 - }, - { - "epoch": 0.6179375028182351, - "grad_norm": 1.4264335916694022, - "learning_rate": 1.34541239062405e-06, - "loss": 1.0243, - "step": 6852 - }, - { - "epoch": 0.6180276863417054, - "grad_norm": 1.3958104781751646, - "learning_rate": 1.3448603847185464e-06, - "loss": 0.9793, - "step": 6853 - }, - { - "epoch": 0.6181178698651756, - "grad_norm": 1.8417010155951254, - "learning_rate": 1.344308434713168e-06, - "loss": 0.9202, - "step": 6854 - }, - { - "epoch": 0.6182080533886459, - "grad_norm": 1.7620523141202513, - "learning_rate": 1.3437565406550083e-06, - "loss": 0.9153, - "step": 6855 - }, - { - "epoch": 0.6182982369121162, - "grad_norm": 2.276114968196841, - "learning_rate": 1.34320470259116e-06, - "loss": 0.9638, - "step": 6856 - }, - { - "epoch": 0.6183884204355864, - "grad_norm": 1.4758821565007552, - "learning_rate": 1.3426529205687078e-06, - "loss": 0.8813, - "step": 6857 - }, - { - "epoch": 0.6184786039590566, - "grad_norm": 1.529865651184786, - "learning_rate": 1.3421011946347323e-06, - "loss": 1.0353, - "step": 6858 - }, - { - "epoch": 0.618568787482527, - "grad_norm": 1.3946318603467243, - "learning_rate": 1.3415495248363113e-06, - "loss": 0.9401, - "step": 6859 - }, - { - "epoch": 0.6186589710059972, - "grad_norm": 1.7767177946715302, - "learning_rate": 1.3409979112205148e-06, - "loss": 1.005, - "step": 6860 - }, - { - "epoch": 0.6187491545294674, - "grad_norm": 1.5122059276109954, - "learning_rate": 1.3404463538344107e-06, - "loss": 1.0104, - "step": 6861 - }, - { - "epoch": 0.6188393380529378, - "grad_norm": 1.6461787364368283, - "learning_rate": 1.3398948527250602e-06, - "loss": 1.0724, - "step": 6862 - }, - { - "epoch": 0.618929521576408, - "grad_norm": 0.7661126977396798, - "learning_rate": 1.3393434079395207e-06, - "loss": 0.8518, - "step": 6863 - }, - { - "epoch": 0.6190197050998782, - "grad_norm": 1.4950085082816735, - "learning_rate": 1.3387920195248449e-06, - "loss": 0.9653, - "step": 6864 - }, - { - "epoch": 0.6191098886233485, - "grad_norm": 1.396613951320198, - "learning_rate": 1.3382406875280791e-06, - "loss": 0.9848, - "step": 6865 - }, - { - "epoch": 0.6192000721468188, - "grad_norm": 1.353201649457291, - "learning_rate": 1.3376894119962672e-06, - "loss": 0.999, - "step": 6866 - }, - { - "epoch": 0.6192902556702891, - "grad_norm": 1.820655561217487, - "learning_rate": 1.3371381929764464e-06, - "loss": 0.9988, - "step": 6867 - }, - { - "epoch": 0.6193804391937593, - "grad_norm": 0.6778274823964816, - "learning_rate": 1.3365870305156506e-06, - "loss": 0.8292, - "step": 6868 - }, - { - "epoch": 0.6194706227172295, - "grad_norm": 1.8139285015917024, - "learning_rate": 1.3360359246609073e-06, - "loss": 0.9882, - "step": 6869 - }, - { - "epoch": 0.6195608062406999, - "grad_norm": 1.550949894649763, - "learning_rate": 1.3354848754592387e-06, - "loss": 0.977, - "step": 6870 - }, - { - "epoch": 0.6196509897641701, - "grad_norm": 1.5042108400714806, - "learning_rate": 1.334933882957666e-06, - "loss": 1.0667, - "step": 6871 - }, - { - "epoch": 0.6197411732876403, - "grad_norm": 1.3415434263425388, - "learning_rate": 1.3343829472032004e-06, - "loss": 0.944, - "step": 6872 - }, - { - "epoch": 0.6198313568111106, - "grad_norm": 1.5707370317049143, - "learning_rate": 1.3338320682428527e-06, - "loss": 0.9609, - "step": 6873 - }, - { - "epoch": 0.6199215403345809, - "grad_norm": 1.2880823744116516, - "learning_rate": 1.3332812461236263e-06, - "loss": 0.9435, - "step": 6874 - }, - { - "epoch": 0.6200117238580511, - "grad_norm": 1.2865103686641706, - "learning_rate": 1.3327304808925192e-06, - "loss": 0.9432, - "step": 6875 - }, - { - "epoch": 0.6201019073815214, - "grad_norm": 1.6021585239314724, - "learning_rate": 1.332179772596528e-06, - "loss": 0.8419, - "step": 6876 - }, - { - "epoch": 0.6201920909049916, - "grad_norm": 1.6929459468641952, - "learning_rate": 1.3316291212826402e-06, - "loss": 0.8999, - "step": 6877 - }, - { - "epoch": 0.620282274428462, - "grad_norm": 2.196519838712018, - "learning_rate": 1.3310785269978413e-06, - "loss": 1.0237, - "step": 6878 - }, - { - "epoch": 0.6203724579519322, - "grad_norm": 1.550981868960117, - "learning_rate": 1.3305279897891111e-06, - "loss": 1.0192, - "step": 6879 - }, - { - "epoch": 0.6204626414754024, - "grad_norm": 1.7717116103956554, - "learning_rate": 1.329977509703424e-06, - "loss": 0.8389, - "step": 6880 - }, - { - "epoch": 0.6205528249988727, - "grad_norm": 2.5317623420535793, - "learning_rate": 1.32942708678775e-06, - "loss": 1.0618, - "step": 6881 - }, - { - "epoch": 0.620643008522343, - "grad_norm": 1.3967119364962217, - "learning_rate": 1.3288767210890548e-06, - "loss": 0.989, - "step": 6882 - }, - { - "epoch": 0.6207331920458132, - "grad_norm": 4.220136174540466, - "learning_rate": 1.3283264126542986e-06, - "loss": 0.9439, - "step": 6883 - }, - { - "epoch": 0.6208233755692835, - "grad_norm": 1.3037048568298035, - "learning_rate": 1.3277761615304356e-06, - "loss": 0.9747, - "step": 6884 - }, - { - "epoch": 0.6209135590927538, - "grad_norm": 1.2880325364155645, - "learning_rate": 1.3272259677644178e-06, - "loss": 1.0731, - "step": 6885 - }, - { - "epoch": 0.621003742616224, - "grad_norm": 1.7327667202762675, - "learning_rate": 1.32667583140319e-06, - "loss": 0.8484, - "step": 6886 - }, - { - "epoch": 0.6210939261396943, - "grad_norm": 1.541311644151293, - "learning_rate": 1.3261257524936924e-06, - "loss": 1.0162, - "step": 6887 - }, - { - "epoch": 0.6211841096631645, - "grad_norm": 1.3944536289015632, - "learning_rate": 1.3255757310828619e-06, - "loss": 1.0131, - "step": 6888 - }, - { - "epoch": 0.6212742931866349, - "grad_norm": 1.7162688640049544, - "learning_rate": 1.3250257672176282e-06, - "loss": 0.9982, - "step": 6889 - }, - { - "epoch": 0.6213644767101051, - "grad_norm": 2.0774848353524473, - "learning_rate": 1.3244758609449183e-06, - "loss": 0.8844, - "step": 6890 - }, - { - "epoch": 0.6214546602335753, - "grad_norm": 1.6775992253572334, - "learning_rate": 1.323926012311653e-06, - "loss": 0.9988, - "step": 6891 - }, - { - "epoch": 0.6215448437570456, - "grad_norm": 1.5466115707965045, - "learning_rate": 1.3233762213647476e-06, - "loss": 1.001, - "step": 6892 - }, - { - "epoch": 0.6216350272805159, - "grad_norm": 1.6017137409534152, - "learning_rate": 1.3228264881511137e-06, - "loss": 0.9481, - "step": 6893 - }, - { - "epoch": 0.6217252108039861, - "grad_norm": 1.5281384052086489, - "learning_rate": 1.322276812717658e-06, - "loss": 1.0634, - "step": 6894 - }, - { - "epoch": 0.6218153943274564, - "grad_norm": 1.4360392238711093, - "learning_rate": 1.3217271951112818e-06, - "loss": 0.8859, - "step": 6895 - }, - { - "epoch": 0.6219055778509266, - "grad_norm": 2.1783478150862896, - "learning_rate": 1.321177635378881e-06, - "loss": 0.955, - "step": 6896 - }, - { - "epoch": 0.6219957613743969, - "grad_norm": 1.641198194373279, - "learning_rate": 1.3206281335673475e-06, - "loss": 0.8721, - "step": 6897 - }, - { - "epoch": 0.6220859448978672, - "grad_norm": 1.8006845973756964, - "learning_rate": 1.3200786897235677e-06, - "loss": 0.9471, - "step": 6898 - }, - { - "epoch": 0.6221761284213374, - "grad_norm": 1.6529039375745238, - "learning_rate": 1.3195293038944227e-06, - "loss": 0.998, - "step": 6899 - }, - { - "epoch": 0.6222663119448076, - "grad_norm": 1.330726633486293, - "learning_rate": 1.3189799761267902e-06, - "loss": 0.9727, - "step": 6900 - }, - { - "epoch": 0.622356495468278, - "grad_norm": 1.788422582531801, - "learning_rate": 1.3184307064675412e-06, - "loss": 0.974, - "step": 6901 - }, - { - "epoch": 0.6224466789917482, - "grad_norm": 0.7158396719569391, - "learning_rate": 1.3178814949635416e-06, - "loss": 0.8826, - "step": 6902 - }, - { - "epoch": 0.6225368625152184, - "grad_norm": 1.6586153665967627, - "learning_rate": 1.3173323416616549e-06, - "loss": 0.9282, - "step": 6903 - }, - { - "epoch": 0.6226270460386887, - "grad_norm": 1.7073481285281351, - "learning_rate": 1.3167832466087361e-06, - "loss": 1.0227, - "step": 6904 - }, - { - "epoch": 0.622717229562159, - "grad_norm": 1.3491043934735911, - "learning_rate": 1.3162342098516388e-06, - "loss": 0.9291, - "step": 6905 - }, - { - "epoch": 0.6228074130856293, - "grad_norm": 1.3066252520787327, - "learning_rate": 1.3156852314372086e-06, - "loss": 0.8939, - "step": 6906 - }, - { - "epoch": 0.6228975966090995, - "grad_norm": 0.6883341107544961, - "learning_rate": 1.3151363114122882e-06, - "loss": 0.8394, - "step": 6907 - }, - { - "epoch": 0.6229877801325698, - "grad_norm": 1.428343678099043, - "learning_rate": 1.3145874498237133e-06, - "loss": 0.9818, - "step": 6908 - }, - { - "epoch": 0.6230779636560401, - "grad_norm": 1.45104971739906, - "learning_rate": 1.3140386467183166e-06, - "loss": 1.0273, - "step": 6909 - }, - { - "epoch": 0.6231681471795103, - "grad_norm": 1.4198824996421067, - "learning_rate": 1.3134899021429258e-06, - "loss": 1.0163, - "step": 6910 - }, - { - "epoch": 0.6232583307029805, - "grad_norm": 1.5093876018748402, - "learning_rate": 1.3129412161443604e-06, - "loss": 0.9616, - "step": 6911 - }, - { - "epoch": 0.6233485142264509, - "grad_norm": 3.3655545906487268, - "learning_rate": 1.3123925887694402e-06, - "loss": 0.9841, - "step": 6912 - }, - { - "epoch": 0.6234386977499211, - "grad_norm": 1.3579920607209754, - "learning_rate": 1.3118440200649756e-06, - "loss": 0.9868, - "step": 6913 - }, - { - "epoch": 0.6235288812733913, - "grad_norm": 1.3456244591031048, - "learning_rate": 1.3112955100777727e-06, - "loss": 0.9392, - "step": 6914 - }, - { - "epoch": 0.6236190647968616, - "grad_norm": 1.2212021440951346, - "learning_rate": 1.3107470588546353e-06, - "loss": 1.0064, - "step": 6915 - }, - { - "epoch": 0.6237092483203319, - "grad_norm": 1.4478971722431708, - "learning_rate": 1.3101986664423583e-06, - "loss": 0.9541, - "step": 6916 - }, - { - "epoch": 0.6237994318438022, - "grad_norm": 1.4629089598786347, - "learning_rate": 1.3096503328877358e-06, - "loss": 0.9453, - "step": 6917 - }, - { - "epoch": 0.6238896153672724, - "grad_norm": 1.4596072581896098, - "learning_rate": 1.309102058237553e-06, - "loss": 1.0011, - "step": 6918 - }, - { - "epoch": 0.6239797988907426, - "grad_norm": 1.8186011394393542, - "learning_rate": 1.3085538425385917e-06, - "loss": 1.0211, - "step": 6919 - }, - { - "epoch": 0.624069982414213, - "grad_norm": 1.925055376717817, - "learning_rate": 1.3080056858376298e-06, - "loss": 1.0056, - "step": 6920 - }, - { - "epoch": 0.6241601659376832, - "grad_norm": 1.5704158919728832, - "learning_rate": 1.3074575881814383e-06, - "loss": 0.9617, - "step": 6921 - }, - { - "epoch": 0.6242503494611534, - "grad_norm": 1.5090345263263187, - "learning_rate": 1.3069095496167841e-06, - "loss": 0.9043, - "step": 6922 - }, - { - "epoch": 0.6243405329846237, - "grad_norm": 1.2733989779958792, - "learning_rate": 1.3063615701904285e-06, - "loss": 0.8867, - "step": 6923 - }, - { - "epoch": 0.624430716508094, - "grad_norm": 1.7095455970228377, - "learning_rate": 1.3058136499491283e-06, - "loss": 1.0075, - "step": 6924 - }, - { - "epoch": 0.6245209000315642, - "grad_norm": 2.1951150499922862, - "learning_rate": 1.3052657889396357e-06, - "loss": 0.9539, - "step": 6925 - }, - { - "epoch": 0.6246110835550345, - "grad_norm": 1.471673450317531, - "learning_rate": 1.304717987208696e-06, - "loss": 0.9633, - "step": 6926 - }, - { - "epoch": 0.6247012670785047, - "grad_norm": 1.4871576828922346, - "learning_rate": 1.304170244803052e-06, - "loss": 0.966, - "step": 6927 - }, - { - "epoch": 0.624791450601975, - "grad_norm": 1.5378732491834948, - "learning_rate": 1.3036225617694387e-06, - "loss": 0.9475, - "step": 6928 - }, - { - "epoch": 0.6248816341254453, - "grad_norm": 1.3888943221727696, - "learning_rate": 1.3030749381545892e-06, - "loss": 1.0083, - "step": 6929 - }, - { - "epoch": 0.6249718176489155, - "grad_norm": 1.6770161522466742, - "learning_rate": 1.3025273740052285e-06, - "loss": 0.9145, - "step": 6930 - }, - { - "epoch": 0.6250620011723858, - "grad_norm": 1.256906502972419, - "learning_rate": 1.3019798693680774e-06, - "loss": 1.021, - "step": 6931 - }, - { - "epoch": 0.6251521846958561, - "grad_norm": 1.3171076957447108, - "learning_rate": 1.3014324242898536e-06, - "loss": 1.0895, - "step": 6932 - }, - { - "epoch": 0.6252423682193263, - "grad_norm": 1.6900953185834895, - "learning_rate": 1.3008850388172668e-06, - "loss": 1.0317, - "step": 6933 - }, - { - "epoch": 0.6253325517427966, - "grad_norm": 1.2925468747697983, - "learning_rate": 1.3003377129970233e-06, - "loss": 0.9353, - "step": 6934 - }, - { - "epoch": 0.6254227352662669, - "grad_norm": 1.2363016568645584, - "learning_rate": 1.2997904468758243e-06, - "loss": 0.9016, - "step": 6935 - }, - { - "epoch": 0.6255129187897371, - "grad_norm": 1.3819450200567958, - "learning_rate": 1.299243240500365e-06, - "loss": 0.9909, - "step": 6936 - }, - { - "epoch": 0.6256031023132074, - "grad_norm": 1.8319637717242212, - "learning_rate": 1.2986960939173368e-06, - "loss": 0.9911, - "step": 6937 - }, - { - "epoch": 0.6256932858366776, - "grad_norm": 1.5337582500456615, - "learning_rate": 1.298149007173424e-06, - "loss": 1.0189, - "step": 6938 - }, - { - "epoch": 0.625783469360148, - "grad_norm": 2.6160843531281412, - "learning_rate": 1.2976019803153087e-06, - "loss": 1.0047, - "step": 6939 - }, - { - "epoch": 0.6258736528836182, - "grad_norm": 1.6655335707199999, - "learning_rate": 1.2970550133896652e-06, - "loss": 0.9255, - "step": 6940 - }, - { - "epoch": 0.6259638364070884, - "grad_norm": 3.6230342402070796, - "learning_rate": 1.2965081064431634e-06, - "loss": 1.0585, - "step": 6941 - }, - { - "epoch": 0.6260540199305586, - "grad_norm": 1.7238867553726147, - "learning_rate": 1.2959612595224698e-06, - "loss": 0.9998, - "step": 6942 - }, - { - "epoch": 0.626144203454029, - "grad_norm": 1.752785577402362, - "learning_rate": 1.2954144726742424e-06, - "loss": 0.9913, - "step": 6943 - }, - { - "epoch": 0.6262343869774992, - "grad_norm": 1.4921549848439188, - "learning_rate": 1.2948677459451385e-06, - "loss": 0.9747, - "step": 6944 - }, - { - "epoch": 0.6263245705009695, - "grad_norm": 1.283568633583156, - "learning_rate": 1.2943210793818064e-06, - "loss": 1.0257, - "step": 6945 - }, - { - "epoch": 0.6264147540244397, - "grad_norm": 1.6274591325354748, - "learning_rate": 1.2937744730308899e-06, - "loss": 0.9535, - "step": 6946 - }, - { - "epoch": 0.62650493754791, - "grad_norm": 2.0899748288696776, - "learning_rate": 1.2932279269390305e-06, - "loss": 0.9725, - "step": 6947 - }, - { - "epoch": 0.6265951210713803, - "grad_norm": 1.3483276463888894, - "learning_rate": 1.292681441152861e-06, - "loss": 0.8829, - "step": 6948 - }, - { - "epoch": 0.6266853045948505, - "grad_norm": 1.5339167980971122, - "learning_rate": 1.292135015719011e-06, - "loss": 0.9854, - "step": 6949 - }, - { - "epoch": 0.6267754881183207, - "grad_norm": 1.5374661604716173, - "learning_rate": 1.2915886506841046e-06, - "loss": 0.938, - "step": 6950 - }, - { - "epoch": 0.6268656716417911, - "grad_norm": 1.402614344255267, - "learning_rate": 1.2910423460947613e-06, - "loss": 0.9082, - "step": 6951 - }, - { - "epoch": 0.6269558551652613, - "grad_norm": 2.6962742955329504, - "learning_rate": 1.290496101997594e-06, - "loss": 0.9462, - "step": 6952 - }, - { - "epoch": 0.6270460386887315, - "grad_norm": 0.7282001538436547, - "learning_rate": 1.2899499184392105e-06, - "loss": 0.858, - "step": 6953 - }, - { - "epoch": 0.6271362222122018, - "grad_norm": 1.4599731853060673, - "learning_rate": 1.289403795466216e-06, - "loss": 0.948, - "step": 6954 - }, - { - "epoch": 0.6272264057356721, - "grad_norm": 1.651184180173017, - "learning_rate": 1.288857733125207e-06, - "loss": 0.9875, - "step": 6955 - }, - { - "epoch": 0.6273165892591424, - "grad_norm": 1.4617477862656285, - "learning_rate": 1.2883117314627785e-06, - "loss": 0.9703, - "step": 6956 - }, - { - "epoch": 0.6274067727826126, - "grad_norm": 3.26980921430802, - "learning_rate": 1.2877657905255168e-06, - "loss": 0.9125, - "step": 6957 - }, - { - "epoch": 0.6274969563060829, - "grad_norm": 1.7354319333263233, - "learning_rate": 1.2872199103600046e-06, - "loss": 1.0365, - "step": 6958 - }, - { - "epoch": 0.6275871398295532, - "grad_norm": 1.3570333899182987, - "learning_rate": 1.286674091012821e-06, - "loss": 1.0452, - "step": 6959 - }, - { - "epoch": 0.6276773233530234, - "grad_norm": 1.7392808418955414, - "learning_rate": 1.2861283325305356e-06, - "loss": 1.0344, - "step": 6960 - }, - { - "epoch": 0.6277675068764936, - "grad_norm": 1.7268100750895878, - "learning_rate": 1.2855826349597185e-06, - "loss": 0.9827, - "step": 6961 - }, - { - "epoch": 0.627857690399964, - "grad_norm": 1.3190158594087587, - "learning_rate": 1.2850369983469302e-06, - "loss": 0.8362, - "step": 6962 - }, - { - "epoch": 0.6279478739234342, - "grad_norm": 1.4616944499113806, - "learning_rate": 1.2844914227387266e-06, - "loss": 1.0053, - "step": 6963 - }, - { - "epoch": 0.6280380574469044, - "grad_norm": 1.6553286022795166, - "learning_rate": 1.2839459081816606e-06, - "loss": 0.9948, - "step": 6964 - }, - { - "epoch": 0.6281282409703747, - "grad_norm": 1.4364314876460944, - "learning_rate": 1.283400454722278e-06, - "loss": 1.0428, - "step": 6965 - }, - { - "epoch": 0.628218424493845, - "grad_norm": 1.3880057366881975, - "learning_rate": 1.28285506240712e-06, - "loss": 1.0395, - "step": 6966 - }, - { - "epoch": 0.6283086080173153, - "grad_norm": 1.3272339019316477, - "learning_rate": 1.2823097312827225e-06, - "loss": 0.997, - "step": 6967 - }, - { - "epoch": 0.6283987915407855, - "grad_norm": 5.841047835211797, - "learning_rate": 1.2817644613956153e-06, - "loss": 0.9866, - "step": 6968 - }, - { - "epoch": 0.6284889750642557, - "grad_norm": 1.4835822950165, - "learning_rate": 1.2812192527923253e-06, - "loss": 1.0355, - "step": 6969 - }, - { - "epoch": 0.6285791585877261, - "grad_norm": 8.135037369785863, - "learning_rate": 1.2806741055193712e-06, - "loss": 1.0182, - "step": 6970 - }, - { - "epoch": 0.6286693421111963, - "grad_norm": 1.7196754045042015, - "learning_rate": 1.2801290196232695e-06, - "loss": 0.9307, - "step": 6971 - }, - { - "epoch": 0.6287595256346665, - "grad_norm": 1.6112151505139616, - "learning_rate": 1.2795839951505282e-06, - "loss": 0.9688, - "step": 6972 - }, - { - "epoch": 0.6288497091581368, - "grad_norm": 2.18819351102109, - "learning_rate": 1.2790390321476538e-06, - "loss": 0.9931, - "step": 6973 - }, - { - "epoch": 0.6289398926816071, - "grad_norm": 1.3387373646372278, - "learning_rate": 1.2784941306611446e-06, - "loss": 0.9566, - "step": 6974 - }, - { - "epoch": 0.6290300762050773, - "grad_norm": 1.8550715292329178, - "learning_rate": 1.2779492907374935e-06, - "loss": 1.0389, - "step": 6975 - }, - { - "epoch": 0.6291202597285476, - "grad_norm": 1.5556582386700468, - "learning_rate": 1.2774045124231911e-06, - "loss": 0.9667, - "step": 6976 - }, - { - "epoch": 0.6292104432520178, - "grad_norm": 2.3985004230409035, - "learning_rate": 1.2768597957647197e-06, - "loss": 0.8632, - "step": 6977 - }, - { - "epoch": 0.6293006267754881, - "grad_norm": 1.6619540185638582, - "learning_rate": 1.2763151408085582e-06, - "loss": 0.9818, - "step": 6978 - }, - { - "epoch": 0.6293908102989584, - "grad_norm": 1.7524833769832977, - "learning_rate": 1.2757705476011788e-06, - "loss": 0.9477, - "step": 6979 - }, - { - "epoch": 0.6294809938224286, - "grad_norm": 1.4928350192090976, - "learning_rate": 1.27522601618905e-06, - "loss": 0.9245, - "step": 6980 - }, - { - "epoch": 0.629571177345899, - "grad_norm": 0.5690213572645524, - "learning_rate": 1.2746815466186337e-06, - "loss": 0.856, - "step": 6981 - }, - { - "epoch": 0.6296613608693692, - "grad_norm": 1.394828315958, - "learning_rate": 1.274137138936387e-06, - "loss": 0.9786, - "step": 6982 - }, - { - "epoch": 0.6297515443928394, - "grad_norm": 1.401617515615425, - "learning_rate": 1.2735927931887625e-06, - "loss": 0.9775, - "step": 6983 - }, - { - "epoch": 0.6298417279163097, - "grad_norm": 0.7009298177732665, - "learning_rate": 1.2730485094222061e-06, - "loss": 0.8522, - "step": 6984 - }, - { - "epoch": 0.62993191143978, - "grad_norm": 1.5078052263627995, - "learning_rate": 1.2725042876831586e-06, - "loss": 1.0102, - "step": 6985 - }, - { - "epoch": 0.6300220949632502, - "grad_norm": 1.6717440830204153, - "learning_rate": 1.2719601280180573e-06, - "loss": 0.8725, - "step": 6986 - }, - { - "epoch": 0.6301122784867205, - "grad_norm": 1.5959707757047703, - "learning_rate": 1.2714160304733317e-06, - "loss": 1.0398, - "step": 6987 - }, - { - "epoch": 0.6302024620101907, - "grad_norm": 1.5056882926840973, - "learning_rate": 1.2708719950954082e-06, - "loss": 0.997, - "step": 6988 - }, - { - "epoch": 0.630292645533661, - "grad_norm": 1.492828630851746, - "learning_rate": 1.2703280219307065e-06, - "loss": 0.991, - "step": 6989 - }, - { - "epoch": 0.6303828290571313, - "grad_norm": 1.4600493643528105, - "learning_rate": 1.2697841110256411e-06, - "loss": 0.9111, - "step": 6990 - }, - { - "epoch": 0.6304730125806015, - "grad_norm": 1.404805437511096, - "learning_rate": 1.2692402624266221e-06, - "loss": 1.0175, - "step": 6991 - }, - { - "epoch": 0.6305631961040717, - "grad_norm": 2.2174615476902004, - "learning_rate": 1.2686964761800529e-06, - "loss": 0.8933, - "step": 6992 - }, - { - "epoch": 0.6306533796275421, - "grad_norm": 0.7133008220949707, - "learning_rate": 1.268152752332333e-06, - "loss": 0.8391, - "step": 6993 - }, - { - "epoch": 0.6307435631510123, - "grad_norm": 2.068949700711621, - "learning_rate": 1.2676090909298549e-06, - "loss": 0.8903, - "step": 6994 - }, - { - "epoch": 0.6308337466744826, - "grad_norm": 1.3420926452443156, - "learning_rate": 1.2670654920190086e-06, - "loss": 1.0237, - "step": 6995 - }, - { - "epoch": 0.6309239301979528, - "grad_norm": 1.6777097902284148, - "learning_rate": 1.2665219556461754e-06, - "loss": 1.0228, - "step": 6996 - }, - { - "epoch": 0.6310141137214231, - "grad_norm": 1.5407906526410762, - "learning_rate": 1.2659784818577329e-06, - "loss": 1.0701, - "step": 6997 - }, - { - "epoch": 0.6311042972448934, - "grad_norm": 1.3217671974900334, - "learning_rate": 1.2654350707000542e-06, - "loss": 0.8604, - "step": 6998 - }, - { - "epoch": 0.6311944807683636, - "grad_norm": 1.6085673407396452, - "learning_rate": 1.264891722219505e-06, - "loss": 0.9617, - "step": 6999 - }, - { - "epoch": 0.6312846642918338, - "grad_norm": 1.9370320893378385, - "learning_rate": 1.2643484364624483e-06, - "loss": 0.9719, - "step": 7000 - }, - { - "epoch": 0.6313748478153042, - "grad_norm": 1.4740828426688852, - "learning_rate": 1.2638052134752393e-06, - "loss": 1.0281, - "step": 7001 - }, - { - "epoch": 0.6314650313387744, - "grad_norm": 1.5823657777100002, - "learning_rate": 1.2632620533042277e-06, - "loss": 1.0473, - "step": 7002 - }, - { - "epoch": 0.6315552148622446, - "grad_norm": 1.8102782872221912, - "learning_rate": 1.2627189559957612e-06, - "loss": 0.9422, - "step": 7003 - }, - { - "epoch": 0.631645398385715, - "grad_norm": 11.65982516280362, - "learning_rate": 1.262175921596178e-06, - "loss": 1.0356, - "step": 7004 - }, - { - "epoch": 0.6317355819091852, - "grad_norm": 1.6946798805488992, - "learning_rate": 1.2616329501518137e-06, - "loss": 0.8412, - "step": 7005 - }, - { - "epoch": 0.6318257654326555, - "grad_norm": 1.4623229449439163, - "learning_rate": 1.2610900417089978e-06, - "loss": 0.8956, - "step": 7006 - }, - { - "epoch": 0.6319159489561257, - "grad_norm": 1.5101957313045835, - "learning_rate": 1.2605471963140535e-06, - "loss": 0.9849, - "step": 7007 - }, - { - "epoch": 0.632006132479596, - "grad_norm": 1.418199084361325, - "learning_rate": 1.2600044140133e-06, - "loss": 1.0493, - "step": 7008 - }, - { - "epoch": 0.6320963160030663, - "grad_norm": 1.5883803322167362, - "learning_rate": 1.2594616948530493e-06, - "loss": 0.9324, - "step": 7009 - }, - { - "epoch": 0.6321864995265365, - "grad_norm": 1.4991543293322132, - "learning_rate": 1.258919038879611e-06, - "loss": 1.0029, - "step": 7010 - }, - { - "epoch": 0.6322766830500067, - "grad_norm": 1.7047506850409484, - "learning_rate": 1.2583764461392859e-06, - "loss": 0.9802, - "step": 7011 - }, - { - "epoch": 0.6323668665734771, - "grad_norm": 1.3950682816150424, - "learning_rate": 1.2578339166783724e-06, - "loss": 0.9411, - "step": 7012 - }, - { - "epoch": 0.6324570500969473, - "grad_norm": 1.511951359637575, - "learning_rate": 1.2572914505431613e-06, - "loss": 0.912, - "step": 7013 - }, - { - "epoch": 0.6325472336204175, - "grad_norm": 2.520306324581709, - "learning_rate": 1.2567490477799383e-06, - "loss": 0.8791, - "step": 7014 - }, - { - "epoch": 0.6326374171438878, - "grad_norm": 1.3044665029515679, - "learning_rate": 1.2562067084349852e-06, - "loss": 1.0649, - "step": 7015 - }, - { - "epoch": 0.6327276006673581, - "grad_norm": 1.4341868491177756, - "learning_rate": 1.2556644325545764e-06, - "loss": 0.9593, - "step": 7016 - }, - { - "epoch": 0.6328177841908283, - "grad_norm": 1.4592016995311343, - "learning_rate": 1.255122220184983e-06, - "loss": 0.9554, - "step": 7017 - }, - { - "epoch": 0.6329079677142986, - "grad_norm": 1.5034749787471522, - "learning_rate": 1.2545800713724694e-06, - "loss": 0.8903, - "step": 7018 - }, - { - "epoch": 0.6329981512377688, - "grad_norm": 1.4044177516648693, - "learning_rate": 1.254037986163294e-06, - "loss": 0.8431, - "step": 7019 - }, - { - "epoch": 0.6330883347612392, - "grad_norm": 1.4997898590392107, - "learning_rate": 1.2534959646037104e-06, - "loss": 0.9384, - "step": 7020 - }, - { - "epoch": 0.6331785182847094, - "grad_norm": 1.3255373261130772, - "learning_rate": 1.2529540067399675e-06, - "loss": 0.9824, - "step": 7021 - }, - { - "epoch": 0.6332687018081796, - "grad_norm": 1.3955060186034218, - "learning_rate": 1.252412112618308e-06, - "loss": 0.9134, - "step": 7022 - }, - { - "epoch": 0.6333588853316499, - "grad_norm": 3.8441681673179446, - "learning_rate": 1.2518702822849696e-06, - "loss": 0.8643, - "step": 7023 - }, - { - "epoch": 0.6334490688551202, - "grad_norm": 1.7540261412426548, - "learning_rate": 1.2513285157861831e-06, - "loss": 0.912, - "step": 7024 - }, - { - "epoch": 0.6335392523785904, - "grad_norm": 1.3626530027662493, - "learning_rate": 1.2507868131681764e-06, - "loss": 0.9654, - "step": 7025 - }, - { - "epoch": 0.6336294359020607, - "grad_norm": 1.4289563086299268, - "learning_rate": 1.250245174477169e-06, - "loss": 0.9548, - "step": 7026 - }, - { - "epoch": 0.6337196194255309, - "grad_norm": 0.7197273906563247, - "learning_rate": 1.2497035997593783e-06, - "loss": 0.8034, - "step": 7027 - }, - { - "epoch": 0.6338098029490012, - "grad_norm": 1.3917827715762725, - "learning_rate": 1.2491620890610135e-06, - "loss": 0.9718, - "step": 7028 - }, - { - "epoch": 0.6338999864724715, - "grad_norm": 1.6264359658574161, - "learning_rate": 1.2486206424282788e-06, - "loss": 0.959, - "step": 7029 - }, - { - "epoch": 0.6339901699959417, - "grad_norm": 1.5604943181856972, - "learning_rate": 1.2480792599073743e-06, - "loss": 1.0263, - "step": 7030 - }, - { - "epoch": 0.634080353519412, - "grad_norm": 1.3384364887084987, - "learning_rate": 1.247537941544493e-06, - "loss": 1.0157, - "step": 7031 - }, - { - "epoch": 0.6341705370428823, - "grad_norm": 2.1534340203467046, - "learning_rate": 1.2469966873858242e-06, - "loss": 0.9999, - "step": 7032 - }, - { - "epoch": 0.6342607205663525, - "grad_norm": 1.6185960762750335, - "learning_rate": 1.2464554974775496e-06, - "loss": 0.9006, - "step": 7033 - }, - { - "epoch": 0.6343509040898228, - "grad_norm": 2.1271255185100344, - "learning_rate": 1.2459143718658474e-06, - "loss": 0.9659, - "step": 7034 - }, - { - "epoch": 0.6344410876132931, - "grad_norm": 1.627050133568722, - "learning_rate": 1.2453733105968886e-06, - "loss": 0.9042, - "step": 7035 - }, - { - "epoch": 0.6345312711367633, - "grad_norm": 0.6308102662426688, - "learning_rate": 1.2448323137168399e-06, - "loss": 0.838, - "step": 7036 - }, - { - "epoch": 0.6346214546602336, - "grad_norm": 1.3813159409054643, - "learning_rate": 1.2442913812718625e-06, - "loss": 0.8908, - "step": 7037 - }, - { - "epoch": 0.6347116381837038, - "grad_norm": 1.280095813339154, - "learning_rate": 1.2437505133081108e-06, - "loss": 0.9548, - "step": 7038 - }, - { - "epoch": 0.6348018217071741, - "grad_norm": 2.07520530788034, - "learning_rate": 1.2432097098717358e-06, - "loss": 0.9863, - "step": 7039 - }, - { - "epoch": 0.6348920052306444, - "grad_norm": 1.8183276443089418, - "learning_rate": 1.2426689710088813e-06, - "loss": 0.9778, - "step": 7040 - }, - { - "epoch": 0.6349821887541146, - "grad_norm": 1.6963976362462432, - "learning_rate": 1.2421282967656855e-06, - "loss": 1.0268, - "step": 7041 - }, - { - "epoch": 0.6350723722775848, - "grad_norm": 1.4132880097370428, - "learning_rate": 1.2415876871882827e-06, - "loss": 0.9962, - "step": 7042 - }, - { - "epoch": 0.6351625558010552, - "grad_norm": 0.6797177099770906, - "learning_rate": 1.2410471423227998e-06, - "loss": 0.8911, - "step": 7043 - }, - { - "epoch": 0.6352527393245254, - "grad_norm": 1.7737800166552085, - "learning_rate": 1.24050666221536e-06, - "loss": 0.9636, - "step": 7044 - }, - { - "epoch": 0.6353429228479957, - "grad_norm": 1.3160403957283309, - "learning_rate": 1.23996624691208e-06, - "loss": 1.0403, - "step": 7045 - }, - { - "epoch": 0.6354331063714659, - "grad_norm": 1.327056050681389, - "learning_rate": 1.2394258964590693e-06, - "loss": 0.9591, - "step": 7046 - }, - { - "epoch": 0.6355232898949362, - "grad_norm": 1.7959563851147657, - "learning_rate": 1.238885610902436e-06, - "loss": 0.9249, - "step": 7047 - }, - { - "epoch": 0.6356134734184065, - "grad_norm": 1.4487846937789697, - "learning_rate": 1.2383453902882787e-06, - "loss": 0.9875, - "step": 7048 - }, - { - "epoch": 0.6357036569418767, - "grad_norm": 1.2153765132698071, - "learning_rate": 1.2378052346626927e-06, - "loss": 1.0353, - "step": 7049 - }, - { - "epoch": 0.6357938404653469, - "grad_norm": 1.6871284499683197, - "learning_rate": 1.2372651440717665e-06, - "loss": 0.9041, - "step": 7050 - }, - { - "epoch": 0.6358840239888173, - "grad_norm": 1.4567688406515922, - "learning_rate": 1.236725118561584e-06, - "loss": 1.0075, - "step": 7051 - }, - { - "epoch": 0.6359742075122875, - "grad_norm": 1.5224975033443633, - "learning_rate": 1.2361851581782232e-06, - "loss": 0.9763, - "step": 7052 - }, - { - "epoch": 0.6360643910357577, - "grad_norm": 1.3331944820505033, - "learning_rate": 1.2356452629677554e-06, - "loss": 1.0404, - "step": 7053 - }, - { - "epoch": 0.6361545745592281, - "grad_norm": 1.3308286635894795, - "learning_rate": 1.2351054329762494e-06, - "loss": 0.9167, - "step": 7054 - }, - { - "epoch": 0.6362447580826983, - "grad_norm": 1.4108567122842972, - "learning_rate": 1.2345656682497648e-06, - "loss": 1.0017, - "step": 7055 - }, - { - "epoch": 0.6363349416061685, - "grad_norm": 1.3514140935134988, - "learning_rate": 1.2340259688343583e-06, - "loss": 0.9531, - "step": 7056 - }, - { - "epoch": 0.6364251251296388, - "grad_norm": 1.2343900172611775, - "learning_rate": 1.2334863347760803e-06, - "loss": 0.8734, - "step": 7057 - }, - { - "epoch": 0.6365153086531091, - "grad_norm": 0.6516249116787393, - "learning_rate": 1.2329467661209734e-06, - "loss": 0.7898, - "step": 7058 - }, - { - "epoch": 0.6366054921765794, - "grad_norm": 1.6120782759051322, - "learning_rate": 1.2324072629150788e-06, - "loss": 1.014, - "step": 7059 - }, - { - "epoch": 0.6366956757000496, - "grad_norm": 1.7014642019751773, - "learning_rate": 1.2318678252044287e-06, - "loss": 1.0216, - "step": 7060 - }, - { - "epoch": 0.6367858592235198, - "grad_norm": 2.0742273707668666, - "learning_rate": 1.2313284530350512e-06, - "loss": 1.0281, - "step": 7061 - }, - { - "epoch": 0.6368760427469902, - "grad_norm": 1.7790666550601675, - "learning_rate": 1.230789146452969e-06, - "loss": 0.91, - "step": 7062 - }, - { - "epoch": 0.6369662262704604, - "grad_norm": 1.773380900416672, - "learning_rate": 1.2302499055041974e-06, - "loss": 0.9776, - "step": 7063 - }, - { - "epoch": 0.6370564097939306, - "grad_norm": 0.6647309024846431, - "learning_rate": 1.2297107302347488e-06, - "loss": 0.8512, - "step": 7064 - }, - { - "epoch": 0.6371465933174009, - "grad_norm": 1.3343871640939635, - "learning_rate": 1.2291716206906275e-06, - "loss": 1.0377, - "step": 7065 - }, - { - "epoch": 0.6372367768408712, - "grad_norm": 1.716282964002269, - "learning_rate": 1.2286325769178345e-06, - "loss": 1.0384, - "step": 7066 - }, - { - "epoch": 0.6373269603643414, - "grad_norm": 1.4767377638367212, - "learning_rate": 1.2280935989623633e-06, - "loss": 0.9929, - "step": 7067 - }, - { - "epoch": 0.6374171438878117, - "grad_norm": 1.6428556264550456, - "learning_rate": 1.2275546868702017e-06, - "loss": 0.9119, - "step": 7068 - }, - { - "epoch": 0.6375073274112819, - "grad_norm": 1.4873441213789063, - "learning_rate": 1.2270158406873341e-06, - "loss": 0.9158, - "step": 7069 - }, - { - "epoch": 0.6375975109347523, - "grad_norm": 1.2439794510864486, - "learning_rate": 1.2264770604597363e-06, - "loss": 1.0183, - "step": 7070 - }, - { - "epoch": 0.6376876944582225, - "grad_norm": 1.5174338960782723, - "learning_rate": 1.2259383462333819e-06, - "loss": 1.0734, - "step": 7071 - }, - { - "epoch": 0.6377778779816927, - "grad_norm": 1.5212019730496207, - "learning_rate": 1.2253996980542359e-06, - "loss": 0.9313, - "step": 7072 - }, - { - "epoch": 0.637868061505163, - "grad_norm": 1.407300726096441, - "learning_rate": 1.2248611159682578e-06, - "loss": 0.9579, - "step": 7073 - }, - { - "epoch": 0.6379582450286333, - "grad_norm": 0.7381336528842617, - "learning_rate": 1.2243226000214044e-06, - "loss": 0.8293, - "step": 7074 - }, - { - "epoch": 0.6380484285521035, - "grad_norm": 1.5510245259680515, - "learning_rate": 1.2237841502596232e-06, - "loss": 0.9744, - "step": 7075 - }, - { - "epoch": 0.6381386120755738, - "grad_norm": 1.4003021765619748, - "learning_rate": 1.2232457667288583e-06, - "loss": 0.9687, - "step": 7076 - }, - { - "epoch": 0.6382287955990441, - "grad_norm": 1.6075620346795385, - "learning_rate": 1.2227074494750476e-06, - "loss": 0.9736, - "step": 7077 - }, - { - "epoch": 0.6383189791225143, - "grad_norm": 1.430151441997997, - "learning_rate": 1.2221691985441238e-06, - "loss": 1.0063, - "step": 7078 - }, - { - "epoch": 0.6384091626459846, - "grad_norm": 1.3602812366148636, - "learning_rate": 1.2216310139820128e-06, - "loss": 0.9177, - "step": 7079 - }, - { - "epoch": 0.6384993461694548, - "grad_norm": 1.495045506349321, - "learning_rate": 1.2210928958346347e-06, - "loss": 1.0179, - "step": 7080 - }, - { - "epoch": 0.6385895296929252, - "grad_norm": 1.2662681546656158, - "learning_rate": 1.2205548441479065e-06, - "loss": 0.9008, - "step": 7081 - }, - { - "epoch": 0.6386797132163954, - "grad_norm": 5.973357175086897, - "learning_rate": 1.2200168589677357e-06, - "loss": 0.9073, - "step": 7082 - }, - { - "epoch": 0.6387698967398656, - "grad_norm": 1.3217526769291679, - "learning_rate": 1.2194789403400284e-06, - "loss": 0.9383, - "step": 7083 - }, - { - "epoch": 0.6388600802633358, - "grad_norm": 2.1777853921294366, - "learning_rate": 1.2189410883106816e-06, - "loss": 0.9516, - "step": 7084 - }, - { - "epoch": 0.6389502637868062, - "grad_norm": 1.5583155486508085, - "learning_rate": 1.2184033029255872e-06, - "loss": 1.0436, - "step": 7085 - }, - { - "epoch": 0.6390404473102764, - "grad_norm": 0.6974549236908958, - "learning_rate": 1.2178655842306334e-06, - "loss": 0.8546, - "step": 7086 - }, - { - "epoch": 0.6391306308337467, - "grad_norm": 1.7264525779842088, - "learning_rate": 1.2173279322716999e-06, - "loss": 0.9713, - "step": 7087 - }, - { - "epoch": 0.6392208143572169, - "grad_norm": 1.5638870186547043, - "learning_rate": 1.216790347094663e-06, - "loss": 0.9752, - "step": 7088 - }, - { - "epoch": 0.6393109978806872, - "grad_norm": 1.971881857979482, - "learning_rate": 1.2162528287453927e-06, - "loss": 1.0061, - "step": 7089 - }, - { - "epoch": 0.6394011814041575, - "grad_norm": 1.649594303308857, - "learning_rate": 1.215715377269752e-06, - "loss": 0.9752, - "step": 7090 - }, - { - "epoch": 0.6394913649276277, - "grad_norm": 1.3471826135683471, - "learning_rate": 1.2151779927136003e-06, - "loss": 0.9556, - "step": 7091 - }, - { - "epoch": 0.6395815484510979, - "grad_norm": 1.8532191723830351, - "learning_rate": 1.2146406751227893e-06, - "loss": 1.0457, - "step": 7092 - }, - { - "epoch": 0.6396717319745683, - "grad_norm": 2.8418284201427872, - "learning_rate": 1.214103424543167e-06, - "loss": 1.0122, - "step": 7093 - }, - { - "epoch": 0.6397619154980385, - "grad_norm": 1.4608974349296089, - "learning_rate": 1.2135662410205735e-06, - "loss": 0.8822, - "step": 7094 - }, - { - "epoch": 0.6398520990215087, - "grad_norm": 1.7993570848522489, - "learning_rate": 1.2130291246008444e-06, - "loss": 0.8895, - "step": 7095 - }, - { - "epoch": 0.639942282544979, - "grad_norm": 1.3247325429347478, - "learning_rate": 1.21249207532981e-06, - "loss": 0.897, - "step": 7096 - }, - { - "epoch": 0.6400324660684493, - "grad_norm": 1.4762028654812087, - "learning_rate": 1.2119550932532936e-06, - "loss": 0.8895, - "step": 7097 - }, - { - "epoch": 0.6401226495919196, - "grad_norm": 1.495804641717655, - "learning_rate": 1.2114181784171144e-06, - "loss": 0.9543, - "step": 7098 - }, - { - "epoch": 0.6402128331153898, - "grad_norm": 1.6391657561296586, - "learning_rate": 1.2108813308670837e-06, - "loss": 0.8689, - "step": 7099 - }, - { - "epoch": 0.6403030166388601, - "grad_norm": 1.6369900353419125, - "learning_rate": 1.2103445506490099e-06, - "loss": 0.9795, - "step": 7100 - }, - { - "epoch": 0.6403932001623304, - "grad_norm": 0.693228480918845, - "learning_rate": 1.209807837808693e-06, - "loss": 0.8654, - "step": 7101 - }, - { - "epoch": 0.6404833836858006, - "grad_norm": 1.8533770848102542, - "learning_rate": 1.2092711923919282e-06, - "loss": 1.0115, - "step": 7102 - }, - { - "epoch": 0.6405735672092708, - "grad_norm": 1.2032136760581205, - "learning_rate": 1.2087346144445053e-06, - "loss": 0.9336, - "step": 7103 - }, - { - "epoch": 0.6406637507327412, - "grad_norm": 3.8887069447658957, - "learning_rate": 1.2081981040122081e-06, - "loss": 0.9831, - "step": 7104 - }, - { - "epoch": 0.6407539342562114, - "grad_norm": 1.5906423301726984, - "learning_rate": 1.2076616611408148e-06, - "loss": 0.9258, - "step": 7105 - }, - { - "epoch": 0.6408441177796816, - "grad_norm": 1.6269314364925929, - "learning_rate": 1.2071252858760972e-06, - "loss": 1.003, - "step": 7106 - }, - { - "epoch": 0.6409343013031519, - "grad_norm": 1.5185699167745186, - "learning_rate": 1.2065889782638218e-06, - "loss": 0.9536, - "step": 7107 - }, - { - "epoch": 0.6410244848266222, - "grad_norm": 1.3714410064976488, - "learning_rate": 1.2060527383497506e-06, - "loss": 0.952, - "step": 7108 - }, - { - "epoch": 0.6411146683500925, - "grad_norm": 1.303898464245919, - "learning_rate": 1.2055165661796363e-06, - "loss": 1.09, - "step": 7109 - }, - { - "epoch": 0.6412048518735627, - "grad_norm": 1.9020319062293496, - "learning_rate": 1.2049804617992303e-06, - "loss": 1.0389, - "step": 7110 - }, - { - "epoch": 0.6412950353970329, - "grad_norm": 0.628248166586724, - "learning_rate": 1.204444425254275e-06, - "loss": 0.8548, - "step": 7111 - }, - { - "epoch": 0.6413852189205033, - "grad_norm": 1.6148409566130744, - "learning_rate": 1.203908456590507e-06, - "loss": 0.955, - "step": 7112 - }, - { - "epoch": 0.6414754024439735, - "grad_norm": 1.51666577971436, - "learning_rate": 1.20337255585366e-06, - "loss": 0.9367, - "step": 7113 - }, - { - "epoch": 0.6415655859674437, - "grad_norm": 0.6262419758356861, - "learning_rate": 1.2028367230894582e-06, - "loss": 0.7893, - "step": 7114 - }, - { - "epoch": 0.641655769490914, - "grad_norm": 1.5149393005297442, - "learning_rate": 1.2023009583436237e-06, - "loss": 0.8957, - "step": 7115 - }, - { - "epoch": 0.6417459530143843, - "grad_norm": 2.5502915197050977, - "learning_rate": 1.2017652616618698e-06, - "loss": 1.0334, - "step": 7116 - }, - { - "epoch": 0.6418361365378545, - "grad_norm": 2.370875340666701, - "learning_rate": 1.2012296330899048e-06, - "loss": 0.9664, - "step": 7117 - }, - { - "epoch": 0.6419263200613248, - "grad_norm": 1.51375708555521, - "learning_rate": 1.200694072673432e-06, - "loss": 1.0031, - "step": 7118 - }, - { - "epoch": 0.642016503584795, - "grad_norm": 1.4446810605984308, - "learning_rate": 1.200158580458148e-06, - "loss": 1.0024, - "step": 7119 - }, - { - "epoch": 0.6421066871082654, - "grad_norm": 1.7524119513101726, - "learning_rate": 1.1996231564897448e-06, - "loss": 0.9521, - "step": 7120 - }, - { - "epoch": 0.6421968706317356, - "grad_norm": 2.006735546264834, - "learning_rate": 1.1990878008139062e-06, - "loss": 0.9305, - "step": 7121 - }, - { - "epoch": 0.6422870541552058, - "grad_norm": 1.466172899591512, - "learning_rate": 1.1985525134763132e-06, - "loss": 1.0031, - "step": 7122 - }, - { - "epoch": 0.6423772376786762, - "grad_norm": 1.5556883537661323, - "learning_rate": 1.1980172945226393e-06, - "loss": 1.0203, - "step": 7123 - }, - { - "epoch": 0.6424674212021464, - "grad_norm": 2.103740258650459, - "learning_rate": 1.197482143998551e-06, - "loss": 0.9787, - "step": 7124 - }, - { - "epoch": 0.6425576047256166, - "grad_norm": 1.4515215322833661, - "learning_rate": 1.196947061949712e-06, - "loss": 0.9538, - "step": 7125 - }, - { - "epoch": 0.6426477882490869, - "grad_norm": 1.4501210819859298, - "learning_rate": 1.1964120484217768e-06, - "loss": 1.0291, - "step": 7126 - }, - { - "epoch": 0.6427379717725572, - "grad_norm": 0.7502322234806448, - "learning_rate": 1.1958771034603975e-06, - "loss": 0.8527, - "step": 7127 - }, - { - "epoch": 0.6428281552960274, - "grad_norm": 1.4301948689352308, - "learning_rate": 1.1953422271112175e-06, - "loss": 0.9138, - "step": 7128 - }, - { - "epoch": 0.6429183388194977, - "grad_norm": 1.6043579268280812, - "learning_rate": 1.1948074194198748e-06, - "loss": 0.9582, - "step": 7129 - }, - { - "epoch": 0.6430085223429679, - "grad_norm": 1.8757812779627951, - "learning_rate": 1.1942726804320033e-06, - "loss": 0.8796, - "step": 7130 - }, - { - "epoch": 0.6430987058664382, - "grad_norm": 1.991545809453331, - "learning_rate": 1.1937380101932295e-06, - "loss": 0.9125, - "step": 7131 - }, - { - "epoch": 0.6431888893899085, - "grad_norm": 1.277671328662422, - "learning_rate": 1.1932034087491745e-06, - "loss": 0.9675, - "step": 7132 - }, - { - "epoch": 0.6432790729133787, - "grad_norm": 1.9942518720690972, - "learning_rate": 1.1926688761454531e-06, - "loss": 0.8936, - "step": 7133 - }, - { - "epoch": 0.643369256436849, - "grad_norm": 1.020415175034313, - "learning_rate": 1.1921344124276746e-06, - "loss": 0.9899, - "step": 7134 - }, - { - "epoch": 0.6434594399603193, - "grad_norm": 1.4784759556476468, - "learning_rate": 1.1916000176414431e-06, - "loss": 0.9994, - "step": 7135 - }, - { - "epoch": 0.6435496234837895, - "grad_norm": 1.4905365603780023, - "learning_rate": 1.1910656918323546e-06, - "loss": 1.0076, - "step": 7136 - }, - { - "epoch": 0.6436398070072598, - "grad_norm": 1.4115648076999423, - "learning_rate": 1.1905314350460024e-06, - "loss": 0.9987, - "step": 7137 - }, - { - "epoch": 0.64372999053073, - "grad_norm": 1.3667535583661894, - "learning_rate": 1.1899972473279717e-06, - "loss": 1.0263, - "step": 7138 - }, - { - "epoch": 0.6438201740542003, - "grad_norm": 3.6014842523444766, - "learning_rate": 1.1894631287238414e-06, - "loss": 0.9678, - "step": 7139 - }, - { - "epoch": 0.6439103575776706, - "grad_norm": 1.3013842219039808, - "learning_rate": 1.188929079279187e-06, - "loss": 0.9827, - "step": 7140 - }, - { - "epoch": 0.6440005411011408, - "grad_norm": 1.4528373515595852, - "learning_rate": 1.1883950990395751e-06, - "loss": 0.9165, - "step": 7141 - }, - { - "epoch": 0.644090724624611, - "grad_norm": 1.431271285519302, - "learning_rate": 1.187861188050569e-06, - "loss": 0.9147, - "step": 7142 - }, - { - "epoch": 0.6441809081480814, - "grad_norm": 1.3513489923907647, - "learning_rate": 1.187327346357724e-06, - "loss": 0.9638, - "step": 7143 - }, - { - "epoch": 0.6442710916715516, - "grad_norm": 1.493314946237399, - "learning_rate": 1.1867935740065912e-06, - "loss": 1.0, - "step": 7144 - }, - { - "epoch": 0.6443612751950218, - "grad_norm": 1.3320833347015493, - "learning_rate": 1.1862598710427148e-06, - "loss": 0.8576, - "step": 7145 - }, - { - "epoch": 0.6444514587184921, - "grad_norm": 1.8872501738479095, - "learning_rate": 1.1857262375116328e-06, - "loss": 0.9377, - "step": 7146 - }, - { - "epoch": 0.6445416422419624, - "grad_norm": 2.0364475363780943, - "learning_rate": 1.1851926734588783e-06, - "loss": 0.9833, - "step": 7147 - }, - { - "epoch": 0.6446318257654327, - "grad_norm": 1.4048555029690464, - "learning_rate": 1.184659178929977e-06, - "loss": 0.9984, - "step": 7148 - }, - { - "epoch": 0.6447220092889029, - "grad_norm": 1.4922077916548158, - "learning_rate": 1.1841257539704513e-06, - "loss": 0.9701, - "step": 7149 - }, - { - "epoch": 0.6448121928123732, - "grad_norm": 1.4396633786564845, - "learning_rate": 1.1835923986258146e-06, - "loss": 0.9931, - "step": 7150 - }, - { - "epoch": 0.6449023763358435, - "grad_norm": 1.4247579168182414, - "learning_rate": 1.1830591129415754e-06, - "loss": 0.9309, - "step": 7151 - }, - { - "epoch": 0.6449925598593137, - "grad_norm": 1.6475875615887026, - "learning_rate": 1.182525896963238e-06, - "loss": 0.9199, - "step": 7152 - }, - { - "epoch": 0.6450827433827839, - "grad_norm": 1.9854251155560088, - "learning_rate": 1.181992750736298e-06, - "loss": 0.9019, - "step": 7153 - }, - { - "epoch": 0.6451729269062543, - "grad_norm": 1.847492686606916, - "learning_rate": 1.1814596743062474e-06, - "loss": 0.9679, - "step": 7154 - }, - { - "epoch": 0.6452631104297245, - "grad_norm": 1.3731692871397172, - "learning_rate": 1.1809266677185711e-06, - "loss": 0.9988, - "step": 7155 - }, - { - "epoch": 0.6453532939531947, - "grad_norm": 0.6885179872427927, - "learning_rate": 1.180393731018747e-06, - "loss": 0.8769, - "step": 7156 - }, - { - "epoch": 0.645443477476665, - "grad_norm": 0.5727555713157809, - "learning_rate": 1.1798608642522498e-06, - "loss": 0.8023, - "step": 7157 - }, - { - "epoch": 0.6455336610001353, - "grad_norm": 1.786903644664407, - "learning_rate": 1.1793280674645454e-06, - "loss": 0.9581, - "step": 7158 - }, - { - "epoch": 0.6456238445236056, - "grad_norm": 1.4284832160826255, - "learning_rate": 1.1787953407010954e-06, - "loss": 1.0494, - "step": 7159 - }, - { - "epoch": 0.6457140280470758, - "grad_norm": 1.7799390687050534, - "learning_rate": 1.1782626840073554e-06, - "loss": 0.9797, - "step": 7160 - }, - { - "epoch": 0.645804211570546, - "grad_norm": 1.6743495133565145, - "learning_rate": 1.1777300974287738e-06, - "loss": 0.8711, - "step": 7161 - }, - { - "epoch": 0.6458943950940164, - "grad_norm": 1.327760085294385, - "learning_rate": 1.1771975810107947e-06, - "loss": 1.0009, - "step": 7162 - }, - { - "epoch": 0.6459845786174866, - "grad_norm": 1.681237078195144, - "learning_rate": 1.1766651347988542e-06, - "loss": 1.0352, - "step": 7163 - }, - { - "epoch": 0.6460747621409568, - "grad_norm": 2.823935762317774, - "learning_rate": 1.1761327588383848e-06, - "loss": 1.0884, - "step": 7164 - }, - { - "epoch": 0.6461649456644271, - "grad_norm": 1.7763094732755982, - "learning_rate": 1.1756004531748105e-06, - "loss": 0.9868, - "step": 7165 - }, - { - "epoch": 0.6462551291878974, - "grad_norm": 1.5150326229901339, - "learning_rate": 1.1750682178535521e-06, - "loss": 1.005, - "step": 7166 - }, - { - "epoch": 0.6463453127113676, - "grad_norm": 1.4886864138203308, - "learning_rate": 1.1745360529200218e-06, - "loss": 0.9298, - "step": 7167 - }, - { - "epoch": 0.6464354962348379, - "grad_norm": 1.3513848954303245, - "learning_rate": 1.1740039584196261e-06, - "loss": 0.9991, - "step": 7168 - }, - { - "epoch": 0.6465256797583081, - "grad_norm": 2.037543070519294, - "learning_rate": 1.1734719343977683e-06, - "loss": 1.0037, - "step": 7169 - }, - { - "epoch": 0.6466158632817784, - "grad_norm": 1.508052352249498, - "learning_rate": 1.1729399808998416e-06, - "loss": 0.9205, - "step": 7170 - }, - { - "epoch": 0.6467060468052487, - "grad_norm": 1.3681786132735791, - "learning_rate": 1.1724080979712368e-06, - "loss": 0.8619, - "step": 7171 - }, - { - "epoch": 0.6467962303287189, - "grad_norm": 1.3115855619099472, - "learning_rate": 1.1718762856573365e-06, - "loss": 1.0471, - "step": 7172 - }, - { - "epoch": 0.6468864138521893, - "grad_norm": 1.4587992151060052, - "learning_rate": 1.1713445440035172e-06, - "loss": 0.9643, - "step": 7173 - }, - { - "epoch": 0.6469765973756595, - "grad_norm": 1.6195669314717955, - "learning_rate": 1.1708128730551506e-06, - "loss": 1.0144, - "step": 7174 - }, - { - "epoch": 0.6470667808991297, - "grad_norm": 1.5275050709791655, - "learning_rate": 1.1702812728576019e-06, - "loss": 0.963, - "step": 7175 - }, - { - "epoch": 0.6471569644226, - "grad_norm": 1.4360990746783002, - "learning_rate": 1.1697497434562303e-06, - "loss": 1.0022, - "step": 7176 - }, - { - "epoch": 0.6472471479460703, - "grad_norm": 1.4877644440057634, - "learning_rate": 1.1692182848963885e-06, - "loss": 1.0098, - "step": 7177 - }, - { - "epoch": 0.6473373314695405, - "grad_norm": 1.5407713876626594, - "learning_rate": 1.1686868972234227e-06, - "loss": 0.9406, - "step": 7178 - }, - { - "epoch": 0.6474275149930108, - "grad_norm": 1.28275145967438, - "learning_rate": 1.1681555804826755e-06, - "loss": 0.9841, - "step": 7179 - }, - { - "epoch": 0.647517698516481, - "grad_norm": 1.4356292493372842, - "learning_rate": 1.1676243347194806e-06, - "loss": 0.9155, - "step": 7180 - }, - { - "epoch": 0.6476078820399513, - "grad_norm": 1.2288510784901416, - "learning_rate": 1.167093159979167e-06, - "loss": 0.9912, - "step": 7181 - }, - { - "epoch": 0.6476980655634216, - "grad_norm": 1.3795933726982454, - "learning_rate": 1.1665620563070575e-06, - "loss": 1.0021, - "step": 7182 - }, - { - "epoch": 0.6477882490868918, - "grad_norm": 1.7436833139882946, - "learning_rate": 1.1660310237484691e-06, - "loss": 1.0719, - "step": 7183 - }, - { - "epoch": 0.647878432610362, - "grad_norm": 1.2439470125517067, - "learning_rate": 1.165500062348713e-06, - "loss": 0.9241, - "step": 7184 - }, - { - "epoch": 0.6479686161338324, - "grad_norm": 1.5268639008301708, - "learning_rate": 1.164969172153091e-06, - "loss": 1.0231, - "step": 7185 - }, - { - "epoch": 0.6480587996573026, - "grad_norm": 1.856940976373209, - "learning_rate": 1.1644383532069055e-06, - "loss": 0.9922, - "step": 7186 - }, - { - "epoch": 0.6481489831807729, - "grad_norm": 1.2635836681021781, - "learning_rate": 1.1639076055554454e-06, - "loss": 1.012, - "step": 7187 - }, - { - "epoch": 0.6482391667042431, - "grad_norm": 3.5204636569699717, - "learning_rate": 1.163376929244e-06, - "loss": 0.954, - "step": 7188 - }, - { - "epoch": 0.6483293502277134, - "grad_norm": 1.2346987722196567, - "learning_rate": 1.1628463243178472e-06, - "loss": 0.9359, - "step": 7189 - }, - { - "epoch": 0.6484195337511837, - "grad_norm": 1.8213839672407974, - "learning_rate": 1.1623157908222623e-06, - "loss": 1.0351, - "step": 7190 - }, - { - "epoch": 0.6485097172746539, - "grad_norm": 1.4208285494786308, - "learning_rate": 1.1617853288025129e-06, - "loss": 1.0185, - "step": 7191 - }, - { - "epoch": 0.6485999007981241, - "grad_norm": 1.2913391815880915, - "learning_rate": 1.1612549383038612e-06, - "loss": 0.9524, - "step": 7192 - }, - { - "epoch": 0.6486900843215945, - "grad_norm": 1.1809923647925258, - "learning_rate": 1.1607246193715629e-06, - "loss": 0.9575, - "step": 7193 - }, - { - "epoch": 0.6487802678450647, - "grad_norm": 1.9406740584753055, - "learning_rate": 1.1601943720508684e-06, - "loss": 0.9867, - "step": 7194 - }, - { - "epoch": 0.6488704513685349, - "grad_norm": 1.3304618472392316, - "learning_rate": 1.1596641963870202e-06, - "loss": 1.0164, - "step": 7195 - }, - { - "epoch": 0.6489606348920053, - "grad_norm": 1.2689904102627436, - "learning_rate": 1.1591340924252561e-06, - "loss": 0.9867, - "step": 7196 - }, - { - "epoch": 0.6490508184154755, - "grad_norm": 0.7970844442343528, - "learning_rate": 1.158604060210808e-06, - "loss": 0.9066, - "step": 7197 - }, - { - "epoch": 0.6491410019389457, - "grad_norm": 1.6464955973214448, - "learning_rate": 1.1580740997889008e-06, - "loss": 1.0376, - "step": 7198 - }, - { - "epoch": 0.649231185462416, - "grad_norm": 1.4029725771485468, - "learning_rate": 1.1575442112047544e-06, - "loss": 0.9688, - "step": 7199 - }, - { - "epoch": 0.6493213689858863, - "grad_norm": 1.6082936329792492, - "learning_rate": 1.1570143945035797e-06, - "loss": 0.9121, - "step": 7200 - }, - { - "epoch": 0.6494115525093566, - "grad_norm": 1.8427758310339848, - "learning_rate": 1.1564846497305864e-06, - "loss": 0.9307, - "step": 7201 - }, - { - "epoch": 0.6495017360328268, - "grad_norm": 1.6002080633064475, - "learning_rate": 1.1559549769309726e-06, - "loss": 0.9483, - "step": 7202 - }, - { - "epoch": 0.649591919556297, - "grad_norm": 1.3333500572983625, - "learning_rate": 1.1554253761499358e-06, - "loss": 0.9605, - "step": 7203 - }, - { - "epoch": 0.6496821030797674, - "grad_norm": 1.5105741835267072, - "learning_rate": 1.1548958474326617e-06, - "loss": 1.0039, - "step": 7204 - }, - { - "epoch": 0.6497722866032376, - "grad_norm": 1.2581562970319826, - "learning_rate": 1.154366390824334e-06, - "loss": 0.9023, - "step": 7205 - }, - { - "epoch": 0.6498624701267078, - "grad_norm": 0.8129107464083183, - "learning_rate": 1.1538370063701287e-06, - "loss": 0.8378, - "step": 7206 - }, - { - "epoch": 0.6499526536501781, - "grad_norm": 1.3118767848191244, - "learning_rate": 1.1533076941152153e-06, - "loss": 0.9366, - "step": 7207 - }, - { - "epoch": 0.6500428371736484, - "grad_norm": 2.8749411203740634, - "learning_rate": 1.1527784541047583e-06, - "loss": 0.89, - "step": 7208 - }, - { - "epoch": 0.6501330206971186, - "grad_norm": 2.2301480518682952, - "learning_rate": 1.1522492863839152e-06, - "loss": 0.9315, - "step": 7209 - }, - { - "epoch": 0.6502232042205889, - "grad_norm": 1.6692166451056405, - "learning_rate": 1.1517201909978382e-06, - "loss": 0.9312, - "step": 7210 - }, - { - "epoch": 0.6503133877440591, - "grad_norm": 0.6220123886855584, - "learning_rate": 1.151191167991671e-06, - "loss": 0.841, - "step": 7211 - }, - { - "epoch": 0.6504035712675295, - "grad_norm": 1.5515622879443067, - "learning_rate": 1.1506622174105536e-06, - "loss": 0.9817, - "step": 7212 - }, - { - "epoch": 0.6504937547909997, - "grad_norm": 1.555963500587701, - "learning_rate": 1.1501333392996194e-06, - "loss": 1.001, - "step": 7213 - }, - { - "epoch": 0.6505839383144699, - "grad_norm": 0.6668737035379437, - "learning_rate": 1.1496045337039943e-06, - "loss": 0.8414, - "step": 7214 - }, - { - "epoch": 0.6506741218379402, - "grad_norm": 1.3318022589866452, - "learning_rate": 1.1490758006687995e-06, - "loss": 0.9304, - "step": 7215 - }, - { - "epoch": 0.6507643053614105, - "grad_norm": 1.595535773813166, - "learning_rate": 1.1485471402391502e-06, - "loss": 0.9563, - "step": 7216 - }, - { - "epoch": 0.6508544888848807, - "grad_norm": 2.018803064930288, - "learning_rate": 1.1480185524601522e-06, - "loss": 0.9462, - "step": 7217 - }, - { - "epoch": 0.650944672408351, - "grad_norm": 2.0934081296658507, - "learning_rate": 1.1474900373769108e-06, - "loss": 0.9808, - "step": 7218 - }, - { - "epoch": 0.6510348559318213, - "grad_norm": 2.0102419867913874, - "learning_rate": 1.1469615950345184e-06, - "loss": 0.7707, - "step": 7219 - }, - { - "epoch": 0.6511250394552915, - "grad_norm": 0.703881979469919, - "learning_rate": 1.1464332254780678e-06, - "loss": 0.8182, - "step": 7220 - }, - { - "epoch": 0.6512152229787618, - "grad_norm": 3.177230147841594, - "learning_rate": 1.1459049287526404e-06, - "loss": 0.9608, - "step": 7221 - }, - { - "epoch": 0.651305406502232, - "grad_norm": 1.4907703160883639, - "learning_rate": 1.1453767049033137e-06, - "loss": 0.9385, - "step": 7222 - }, - { - "epoch": 0.6513955900257024, - "grad_norm": 1.2946729694115477, - "learning_rate": 1.1448485539751586e-06, - "loss": 1.0185, - "step": 7223 - }, - { - "epoch": 0.6514857735491726, - "grad_norm": 1.5303725239139272, - "learning_rate": 1.1443204760132408e-06, - "loss": 0.9466, - "step": 7224 - }, - { - "epoch": 0.6515759570726428, - "grad_norm": 3.730416342482007, - "learning_rate": 1.1437924710626185e-06, - "loss": 0.9552, - "step": 7225 - }, - { - "epoch": 0.651666140596113, - "grad_norm": 0.6725598548643591, - "learning_rate": 1.1432645391683429e-06, - "loss": 0.7988, - "step": 7226 - }, - { - "epoch": 0.6517563241195834, - "grad_norm": 1.4903893146488136, - "learning_rate": 1.1427366803754609e-06, - "loss": 1.0014, - "step": 7227 - }, - { - "epoch": 0.6518465076430536, - "grad_norm": 1.298516762491229, - "learning_rate": 1.142208894729012e-06, - "loss": 0.9907, - "step": 7228 - }, - { - "epoch": 0.6519366911665239, - "grad_norm": 1.3164164828573444, - "learning_rate": 1.1416811822740301e-06, - "loss": 1.0126, - "step": 7229 - }, - { - "epoch": 0.6520268746899941, - "grad_norm": 1.3869635997113021, - "learning_rate": 1.1411535430555428e-06, - "loss": 1.0203, - "step": 7230 - }, - { - "epoch": 0.6521170582134644, - "grad_norm": 1.383937701181982, - "learning_rate": 1.1406259771185705e-06, - "loss": 1.0271, - "step": 7231 - }, - { - "epoch": 0.6522072417369347, - "grad_norm": 1.7924341140194209, - "learning_rate": 1.1400984845081282e-06, - "loss": 0.9271, - "step": 7232 - }, - { - "epoch": 0.6522974252604049, - "grad_norm": 1.9110808523811254, - "learning_rate": 1.139571065269226e-06, - "loss": 1.0565, - "step": 7233 - }, - { - "epoch": 0.6523876087838751, - "grad_norm": 1.500132316475518, - "learning_rate": 1.139043719446863e-06, - "loss": 1.0156, - "step": 7234 - }, - { - "epoch": 0.6524777923073455, - "grad_norm": 1.331732036735103, - "learning_rate": 1.1385164470860385e-06, - "loss": 1.0378, - "step": 7235 - }, - { - "epoch": 0.6525679758308157, - "grad_norm": 1.6519273454193037, - "learning_rate": 1.1379892482317403e-06, - "loss": 0.9784, - "step": 7236 - }, - { - "epoch": 0.652658159354286, - "grad_norm": 1.603452655703511, - "learning_rate": 1.1374621229289524e-06, - "loss": 0.9649, - "step": 7237 - }, - { - "epoch": 0.6527483428777562, - "grad_norm": 1.4410582745249347, - "learning_rate": 1.1369350712226525e-06, - "loss": 0.959, - "step": 7238 - }, - { - "epoch": 0.6528385264012265, - "grad_norm": 1.5084801021144898, - "learning_rate": 1.136408093157811e-06, - "loss": 0.9351, - "step": 7239 - }, - { - "epoch": 0.6529287099246968, - "grad_norm": 1.6350840414537038, - "learning_rate": 1.1358811887793935e-06, - "loss": 0.938, - "step": 7240 - }, - { - "epoch": 0.653018893448167, - "grad_norm": 1.3510271456410385, - "learning_rate": 1.135354358132356e-06, - "loss": 1.0109, - "step": 7241 - }, - { - "epoch": 0.6531090769716373, - "grad_norm": 1.4704498032320155, - "learning_rate": 1.1348276012616542e-06, - "loss": 1.0149, - "step": 7242 - }, - { - "epoch": 0.6531992604951076, - "grad_norm": 1.63847180714942, - "learning_rate": 1.134300918212231e-06, - "loss": 0.9362, - "step": 7243 - }, - { - "epoch": 0.6532894440185778, - "grad_norm": 1.5088372577759608, - "learning_rate": 1.133774309029027e-06, - "loss": 0.9783, - "step": 7244 - }, - { - "epoch": 0.653379627542048, - "grad_norm": 1.8606614863438502, - "learning_rate": 1.133247773756975e-06, - "loss": 1.0145, - "step": 7245 - }, - { - "epoch": 0.6534698110655184, - "grad_norm": 0.795565295560122, - "learning_rate": 1.1327213124410024e-06, - "loss": 0.8653, - "step": 7246 - }, - { - "epoch": 0.6535599945889886, - "grad_norm": 1.2831381399575355, - "learning_rate": 1.1321949251260292e-06, - "loss": 1.0109, - "step": 7247 - }, - { - "epoch": 0.6536501781124588, - "grad_norm": 1.4845511884025921, - "learning_rate": 1.1316686118569712e-06, - "loss": 0.8609, - "step": 7248 - }, - { - "epoch": 0.6537403616359291, - "grad_norm": 1.5669475580920986, - "learning_rate": 1.1311423726787335e-06, - "loss": 0.8822, - "step": 7249 - }, - { - "epoch": 0.6538305451593994, - "grad_norm": 1.5231957659524284, - "learning_rate": 1.130616207636221e-06, - "loss": 0.9487, - "step": 7250 - }, - { - "epoch": 0.6539207286828697, - "grad_norm": 1.7477835515484617, - "learning_rate": 1.1300901167743263e-06, - "loss": 0.9774, - "step": 7251 - }, - { - "epoch": 0.6540109122063399, - "grad_norm": 1.1779884077061988, - "learning_rate": 1.12956410013794e-06, - "loss": 0.9714, - "step": 7252 - }, - { - "epoch": 0.6541010957298101, - "grad_norm": 1.5835428433923506, - "learning_rate": 1.1290381577719436e-06, - "loss": 0.9569, - "step": 7253 - }, - { - "epoch": 0.6541912792532805, - "grad_norm": 1.9231195529467604, - "learning_rate": 1.1285122897212143e-06, - "loss": 0.8842, - "step": 7254 - }, - { - "epoch": 0.6542814627767507, - "grad_norm": 0.7867398210902842, - "learning_rate": 1.1279864960306228e-06, - "loss": 0.8945, - "step": 7255 - }, - { - "epoch": 0.6543716463002209, - "grad_norm": 1.574493081395929, - "learning_rate": 1.1274607767450297e-06, - "loss": 1.0445, - "step": 7256 - }, - { - "epoch": 0.6544618298236912, - "grad_norm": 1.3354273932031628, - "learning_rate": 1.126935131909296e-06, - "loss": 0.9523, - "step": 7257 - }, - { - "epoch": 0.6545520133471615, - "grad_norm": 1.2707178761716322, - "learning_rate": 1.1264095615682693e-06, - "loss": 0.9177, - "step": 7258 - }, - { - "epoch": 0.6546421968706317, - "grad_norm": 1.5388377069040442, - "learning_rate": 1.1258840657667973e-06, - "loss": 0.8971, - "step": 7259 - }, - { - "epoch": 0.654732380394102, - "grad_norm": 1.3382520646867337, - "learning_rate": 1.125358644549716e-06, - "loss": 0.9018, - "step": 7260 - }, - { - "epoch": 0.6548225639175722, - "grad_norm": 1.3158504773102813, - "learning_rate": 1.1248332979618578e-06, - "loss": 0.8393, - "step": 7261 - }, - { - "epoch": 0.6549127474410426, - "grad_norm": 2.1774733587971826, - "learning_rate": 1.1243080260480482e-06, - "loss": 0.8717, - "step": 7262 - }, - { - "epoch": 0.6550029309645128, - "grad_norm": 1.6493092622771075, - "learning_rate": 1.1237828288531063e-06, - "loss": 1.0258, - "step": 7263 - }, - { - "epoch": 0.655093114487983, - "grad_norm": 1.6464349958769078, - "learning_rate": 1.1232577064218449e-06, - "loss": 0.8959, - "step": 7264 - }, - { - "epoch": 0.6551832980114533, - "grad_norm": 1.4089791835332865, - "learning_rate": 1.1227326587990711e-06, - "loss": 1.0415, - "step": 7265 - }, - { - "epoch": 0.6552734815349236, - "grad_norm": 1.2621310950924791, - "learning_rate": 1.1222076860295832e-06, - "loss": 0.9586, - "step": 7266 - }, - { - "epoch": 0.6553636650583938, - "grad_norm": 1.5539678700438244, - "learning_rate": 1.1216827881581756e-06, - "loss": 0.9873, - "step": 7267 - }, - { - "epoch": 0.6554538485818641, - "grad_norm": 1.7467850718271563, - "learning_rate": 1.1211579652296355e-06, - "loss": 0.9626, - "step": 7268 - }, - { - "epoch": 0.6555440321053344, - "grad_norm": 2.299002148986051, - "learning_rate": 1.1206332172887438e-06, - "loss": 1.0503, - "step": 7269 - }, - { - "epoch": 0.6556342156288046, - "grad_norm": 1.5581047801677028, - "learning_rate": 1.1201085443802756e-06, - "loss": 0.9873, - "step": 7270 - }, - { - "epoch": 0.6557243991522749, - "grad_norm": 1.3730500873728868, - "learning_rate": 1.1195839465489964e-06, - "loss": 0.9437, - "step": 7271 - }, - { - "epoch": 0.6558145826757451, - "grad_norm": 1.5708551181055008, - "learning_rate": 1.1190594238396708e-06, - "loss": 1.016, - "step": 7272 - }, - { - "epoch": 0.6559047661992154, - "grad_norm": 1.1227932054712935, - "learning_rate": 1.1185349762970515e-06, - "loss": 0.9443, - "step": 7273 - }, - { - "epoch": 0.6559949497226857, - "grad_norm": 1.2968250402915802, - "learning_rate": 1.1180106039658896e-06, - "loss": 0.982, - "step": 7274 - }, - { - "epoch": 0.6560851332461559, - "grad_norm": 0.6237905004879998, - "learning_rate": 1.117486306890925e-06, - "loss": 0.8251, - "step": 7275 - }, - { - "epoch": 0.6561753167696261, - "grad_norm": 1.7556401371532468, - "learning_rate": 1.116962085116896e-06, - "loss": 0.925, - "step": 7276 - }, - { - "epoch": 0.6562655002930965, - "grad_norm": 0.6750859559042773, - "learning_rate": 1.1164379386885302e-06, - "loss": 0.7812, - "step": 7277 - }, - { - "epoch": 0.6563556838165667, - "grad_norm": 1.6062807132144665, - "learning_rate": 1.1159138676505516e-06, - "loss": 1.0117, - "step": 7278 - }, - { - "epoch": 0.656445867340037, - "grad_norm": 1.410491480831692, - "learning_rate": 1.1153898720476761e-06, - "loss": 1.0341, - "step": 7279 - }, - { - "epoch": 0.6565360508635072, - "grad_norm": 1.5555276565471934, - "learning_rate": 1.114865951924615e-06, - "loss": 0.9579, - "step": 7280 - }, - { - "epoch": 0.6566262343869775, - "grad_norm": 1.5182150506944099, - "learning_rate": 1.1143421073260721e-06, - "loss": 0.9245, - "step": 7281 - }, - { - "epoch": 0.6567164179104478, - "grad_norm": 1.4275506732451277, - "learning_rate": 1.1138183382967432e-06, - "loss": 0.912, - "step": 7282 - }, - { - "epoch": 0.656806601433918, - "grad_norm": 1.640348211282349, - "learning_rate": 1.11329464488132e-06, - "loss": 0.8858, - "step": 7283 - }, - { - "epoch": 0.6568967849573882, - "grad_norm": 1.4886250737044722, - "learning_rate": 1.112771027124487e-06, - "loss": 0.9504, - "step": 7284 - }, - { - "epoch": 0.6569869684808586, - "grad_norm": 1.6648610665948171, - "learning_rate": 1.112247485070922e-06, - "loss": 0.987, - "step": 7285 - }, - { - "epoch": 0.6570771520043288, - "grad_norm": 1.4532181904761787, - "learning_rate": 1.1117240187652968e-06, - "loss": 0.9806, - "step": 7286 - }, - { - "epoch": 0.657167335527799, - "grad_norm": 1.6804224579844749, - "learning_rate": 1.1112006282522767e-06, - "loss": 1.0526, - "step": 7287 - }, - { - "epoch": 0.6572575190512693, - "grad_norm": 1.4523603006438701, - "learning_rate": 1.1106773135765183e-06, - "loss": 0.9597, - "step": 7288 - }, - { - "epoch": 0.6573477025747396, - "grad_norm": 1.3787258992029767, - "learning_rate": 1.110154074782677e-06, - "loss": 0.9448, - "step": 7289 - }, - { - "epoch": 0.6574378860982099, - "grad_norm": 1.4940108897413604, - "learning_rate": 1.1096309119153948e-06, - "loss": 0.9656, - "step": 7290 - }, - { - "epoch": 0.6575280696216801, - "grad_norm": 0.6915541883918093, - "learning_rate": 1.1091078250193145e-06, - "loss": 0.877, - "step": 7291 - }, - { - "epoch": 0.6576182531451504, - "grad_norm": 1.6247075624689205, - "learning_rate": 1.108584814139066e-06, - "loss": 0.9808, - "step": 7292 - }, - { - "epoch": 0.6577084366686207, - "grad_norm": 1.4228197249998444, - "learning_rate": 1.108061879319276e-06, - "loss": 0.9686, - "step": 7293 - }, - { - "epoch": 0.6577986201920909, - "grad_norm": 1.8536519039690966, - "learning_rate": 1.1075390206045648e-06, - "loss": 1.0339, - "step": 7294 - }, - { - "epoch": 0.6578888037155611, - "grad_norm": 1.6700156023815693, - "learning_rate": 1.1070162380395454e-06, - "loss": 1.0171, - "step": 7295 - }, - { - "epoch": 0.6579789872390315, - "grad_norm": 1.507805937915813, - "learning_rate": 1.1064935316688253e-06, - "loss": 0.9211, - "step": 7296 - }, - { - "epoch": 0.6580691707625017, - "grad_norm": 2.139320365794343, - "learning_rate": 1.105970901537002e-06, - "loss": 1.0635, - "step": 7297 - }, - { - "epoch": 0.6581593542859719, - "grad_norm": 1.3533203512580725, - "learning_rate": 1.1054483476886727e-06, - "loss": 1.021, - "step": 7298 - }, - { - "epoch": 0.6582495378094422, - "grad_norm": 1.2722307349748025, - "learning_rate": 1.1049258701684222e-06, - "loss": 0.8899, - "step": 7299 - }, - { - "epoch": 0.6583397213329125, - "grad_norm": 1.4484938790584094, - "learning_rate": 1.1044034690208315e-06, - "loss": 0.9216, - "step": 7300 - }, - { - "epoch": 0.6584299048563828, - "grad_norm": 1.542060990637671, - "learning_rate": 1.1038811442904755e-06, - "loss": 1.0125, - "step": 7301 - }, - { - "epoch": 0.658520088379853, - "grad_norm": 1.5506704756907823, - "learning_rate": 1.103358896021921e-06, - "loss": 0.906, - "step": 7302 - }, - { - "epoch": 0.6586102719033232, - "grad_norm": 2.1860442631071475, - "learning_rate": 1.1028367242597298e-06, - "loss": 0.9504, - "step": 7303 - }, - { - "epoch": 0.6587004554267936, - "grad_norm": 1.5541183733489343, - "learning_rate": 1.102314629048457e-06, - "loss": 1.0042, - "step": 7304 - }, - { - "epoch": 0.6587906389502638, - "grad_norm": 1.9365830251170195, - "learning_rate": 1.1017926104326484e-06, - "loss": 1.0256, - "step": 7305 - }, - { - "epoch": 0.658880822473734, - "grad_norm": 2.0829371902218807, - "learning_rate": 1.1012706684568483e-06, - "loss": 1.0268, - "step": 7306 - }, - { - "epoch": 0.6589710059972043, - "grad_norm": 1.3251100764345103, - "learning_rate": 1.1007488031655894e-06, - "loss": 0.9897, - "step": 7307 - }, - { - "epoch": 0.6590611895206746, - "grad_norm": 1.5993646194580275, - "learning_rate": 1.1002270146034013e-06, - "loss": 0.9343, - "step": 7308 - }, - { - "epoch": 0.6591513730441448, - "grad_norm": 1.107494587336073, - "learning_rate": 1.0997053028148052e-06, - "loss": 1.002, - "step": 7309 - }, - { - "epoch": 0.6592415565676151, - "grad_norm": 1.1856517965277824, - "learning_rate": 1.0991836678443173e-06, - "loss": 0.9722, - "step": 7310 - }, - { - "epoch": 0.6593317400910853, - "grad_norm": 1.7736367269186515, - "learning_rate": 1.0986621097364465e-06, - "loss": 0.9182, - "step": 7311 - }, - { - "epoch": 0.6594219236145556, - "grad_norm": 1.5886519180143641, - "learning_rate": 1.0981406285356932e-06, - "loss": 0.8884, - "step": 7312 - }, - { - "epoch": 0.6595121071380259, - "grad_norm": 2.479160041693376, - "learning_rate": 1.0976192242865554e-06, - "loss": 0.8961, - "step": 7313 - }, - { - "epoch": 0.6596022906614961, - "grad_norm": 1.5435965118090027, - "learning_rate": 1.0970978970335202e-06, - "loss": 1.0347, - "step": 7314 - }, - { - "epoch": 0.6596924741849665, - "grad_norm": 1.5142686418126166, - "learning_rate": 1.0965766468210714e-06, - "loss": 0.9968, - "step": 7315 - }, - { - "epoch": 0.6597826577084367, - "grad_norm": 9.361469321058586, - "learning_rate": 1.0960554736936843e-06, - "loss": 0.9852, - "step": 7316 - }, - { - "epoch": 0.6598728412319069, - "grad_norm": 1.5228129353748672, - "learning_rate": 1.0955343776958283e-06, - "loss": 0.8797, - "step": 7317 - }, - { - "epoch": 0.6599630247553772, - "grad_norm": 1.6197133265089847, - "learning_rate": 1.0950133588719665e-06, - "loss": 0.995, - "step": 7318 - }, - { - "epoch": 0.6600532082788475, - "grad_norm": 1.4297643755480067, - "learning_rate": 1.0944924172665551e-06, - "loss": 0.9903, - "step": 7319 - }, - { - "epoch": 0.6601433918023177, - "grad_norm": 1.567980424792851, - "learning_rate": 1.0939715529240437e-06, - "loss": 1.0069, - "step": 7320 - }, - { - "epoch": 0.660233575325788, - "grad_norm": 1.4582919886949912, - "learning_rate": 1.0934507658888755e-06, - "loss": 1.0054, - "step": 7321 - }, - { - "epoch": 0.6603237588492582, - "grad_norm": 1.2169275106533657, - "learning_rate": 1.092930056205486e-06, - "loss": 0.9436, - "step": 7322 - }, - { - "epoch": 0.6604139423727285, - "grad_norm": 1.845566969597695, - "learning_rate": 1.092409423918306e-06, - "loss": 0.934, - "step": 7323 - }, - { - "epoch": 0.6605041258961988, - "grad_norm": 1.2329468493147497, - "learning_rate": 1.0918888690717581e-06, - "loss": 0.9023, - "step": 7324 - }, - { - "epoch": 0.660594309419669, - "grad_norm": 1.6353721450609298, - "learning_rate": 1.091368391710259e-06, - "loss": 1.0388, - "step": 7325 - }, - { - "epoch": 0.6606844929431392, - "grad_norm": 1.4976086628357665, - "learning_rate": 1.0908479918782198e-06, - "loss": 0.921, - "step": 7326 - }, - { - "epoch": 0.6607746764666096, - "grad_norm": 1.3101546813555103, - "learning_rate": 1.0903276696200413e-06, - "loss": 1.0004, - "step": 7327 - }, - { - "epoch": 0.6608648599900798, - "grad_norm": 1.4969197276526776, - "learning_rate": 1.0898074249801234e-06, - "loss": 0.981, - "step": 7328 - }, - { - "epoch": 0.6609550435135501, - "grad_norm": 1.8947909147720006, - "learning_rate": 1.0892872580028533e-06, - "loss": 0.9854, - "step": 7329 - }, - { - "epoch": 0.6610452270370203, - "grad_norm": 1.2648508506420084, - "learning_rate": 1.0887671687326178e-06, - "loss": 1.0022, - "step": 7330 - }, - { - "epoch": 0.6611354105604906, - "grad_norm": 1.4301475243399655, - "learning_rate": 1.0882471572137908e-06, - "loss": 0.9396, - "step": 7331 - }, - { - "epoch": 0.6612255940839609, - "grad_norm": 1.9870113371864957, - "learning_rate": 1.087727223490744e-06, - "loss": 1.0153, - "step": 7332 - }, - { - "epoch": 0.6613157776074311, - "grad_norm": 2.4802613179137314, - "learning_rate": 1.0872073676078405e-06, - "loss": 1.0297, - "step": 7333 - }, - { - "epoch": 0.6614059611309013, - "grad_norm": 1.7851781530726865, - "learning_rate": 1.0866875896094375e-06, - "loss": 0.9207, - "step": 7334 - }, - { - "epoch": 0.6614961446543717, - "grad_norm": 1.3666086333831875, - "learning_rate": 1.0861678895398854e-06, - "loss": 0.9902, - "step": 7335 - }, - { - "epoch": 0.6615863281778419, - "grad_norm": 1.5773924836579545, - "learning_rate": 1.0856482674435286e-06, - "loss": 0.9276, - "step": 7336 - }, - { - "epoch": 0.6616765117013121, - "grad_norm": 0.6752958814843636, - "learning_rate": 1.0851287233647024e-06, - "loss": 0.8353, - "step": 7337 - }, - { - "epoch": 0.6617666952247825, - "grad_norm": 1.4983602939481366, - "learning_rate": 1.084609257347738e-06, - "loss": 1.0324, - "step": 7338 - }, - { - "epoch": 0.6618568787482527, - "grad_norm": 1.423870656407807, - "learning_rate": 1.0840898694369594e-06, - "loss": 0.9546, - "step": 7339 - }, - { - "epoch": 0.661947062271723, - "grad_norm": 1.3626909262198514, - "learning_rate": 1.083570559676683e-06, - "loss": 0.9674, - "step": 7340 - }, - { - "epoch": 0.6620372457951932, - "grad_norm": 2.0158000544744366, - "learning_rate": 1.08305132811122e-06, - "loss": 0.9477, - "step": 7341 - }, - { - "epoch": 0.6621274293186635, - "grad_norm": 1.3598278650572135, - "learning_rate": 1.0825321747848735e-06, - "loss": 0.9743, - "step": 7342 - }, - { - "epoch": 0.6622176128421338, - "grad_norm": 0.7523488378303229, - "learning_rate": 1.0820130997419417e-06, - "loss": 0.8936, - "step": 7343 - }, - { - "epoch": 0.662307796365604, - "grad_norm": 2.0307190421211514, - "learning_rate": 1.0814941030267123e-06, - "loss": 0.965, - "step": 7344 - }, - { - "epoch": 0.6623979798890742, - "grad_norm": 0.5734872115620856, - "learning_rate": 1.080975184683472e-06, - "loss": 0.819, - "step": 7345 - }, - { - "epoch": 0.6624881634125446, - "grad_norm": 1.5147156494496727, - "learning_rate": 1.0804563447564948e-06, - "loss": 1.0392, - "step": 7346 - }, - { - "epoch": 0.6625783469360148, - "grad_norm": 1.4695108756032516, - "learning_rate": 1.0799375832900545e-06, - "loss": 0.9205, - "step": 7347 - }, - { - "epoch": 0.662668530459485, - "grad_norm": 1.5880996923070316, - "learning_rate": 1.0794189003284118e-06, - "loss": 1.0052, - "step": 7348 - }, - { - "epoch": 0.6627587139829553, - "grad_norm": 1.3697240351043452, - "learning_rate": 1.0789002959158242e-06, - "loss": 0.9931, - "step": 7349 - }, - { - "epoch": 0.6628488975064256, - "grad_norm": 1.8562797621065363, - "learning_rate": 1.0783817700965428e-06, - "loss": 0.9311, - "step": 7350 - }, - { - "epoch": 0.6629390810298958, - "grad_norm": 1.348552240151196, - "learning_rate": 1.0778633229148102e-06, - "loss": 1.0207, - "step": 7351 - }, - { - "epoch": 0.6630292645533661, - "grad_norm": 2.57643626998651, - "learning_rate": 1.0773449544148645e-06, - "loss": 0.9807, - "step": 7352 - }, - { - "epoch": 0.6631194480768363, - "grad_norm": 1.3563725868761194, - "learning_rate": 1.076826664640934e-06, - "loss": 1.0167, - "step": 7353 - }, - { - "epoch": 0.6632096316003067, - "grad_norm": 0.7759171243021047, - "learning_rate": 1.0763084536372424e-06, - "loss": 0.9073, - "step": 7354 - }, - { - "epoch": 0.6632998151237769, - "grad_norm": 1.7646944412627246, - "learning_rate": 1.0757903214480068e-06, - "loss": 0.9913, - "step": 7355 - }, - { - "epoch": 0.6633899986472471, - "grad_norm": 1.4229132246848049, - "learning_rate": 1.0752722681174376e-06, - "loss": 0.9174, - "step": 7356 - }, - { - "epoch": 0.6634801821707174, - "grad_norm": 1.4841053768027916, - "learning_rate": 1.074754293689737e-06, - "loss": 0.8924, - "step": 7357 - }, - { - "epoch": 0.6635703656941877, - "grad_norm": 1.6073675134859589, - "learning_rate": 1.0742363982091023e-06, - "loss": 0.9693, - "step": 7358 - }, - { - "epoch": 0.6636605492176579, - "grad_norm": 2.2796792671415766, - "learning_rate": 1.0737185817197215e-06, - "loss": 0.9432, - "step": 7359 - }, - { - "epoch": 0.6637507327411282, - "grad_norm": 1.5197799204563516, - "learning_rate": 1.0732008442657803e-06, - "loss": 0.933, - "step": 7360 - }, - { - "epoch": 0.6638409162645985, - "grad_norm": 1.3140128816905092, - "learning_rate": 1.0726831858914516e-06, - "loss": 0.9554, - "step": 7361 - }, - { - "epoch": 0.6639310997880687, - "grad_norm": 1.6815359900482763, - "learning_rate": 1.0721656066409084e-06, - "loss": 0.9929, - "step": 7362 - }, - { - "epoch": 0.664021283311539, - "grad_norm": 1.725009685986733, - "learning_rate": 1.0716481065583108e-06, - "loss": 0.9782, - "step": 7363 - }, - { - "epoch": 0.6641114668350092, - "grad_norm": 0.6346890447345156, - "learning_rate": 1.071130685687816e-06, - "loss": 0.8309, - "step": 7364 - }, - { - "epoch": 0.6642016503584796, - "grad_norm": 1.1819439963581349, - "learning_rate": 1.0706133440735723e-06, - "loss": 0.9097, - "step": 7365 - }, - { - "epoch": 0.6642918338819498, - "grad_norm": 1.811834805198345, - "learning_rate": 1.070096081759723e-06, - "loss": 0.9171, - "step": 7366 - }, - { - "epoch": 0.66438201740542, - "grad_norm": 2.5797853614179473, - "learning_rate": 1.069578898790404e-06, - "loss": 1.0765, - "step": 7367 - }, - { - "epoch": 0.6644722009288903, - "grad_norm": 1.481122760699863, - "learning_rate": 1.0690617952097424e-06, - "loss": 0.9625, - "step": 7368 - }, - { - "epoch": 0.6645623844523606, - "grad_norm": 1.4859286559731224, - "learning_rate": 1.068544771061863e-06, - "loss": 1.0183, - "step": 7369 - }, - { - "epoch": 0.6646525679758308, - "grad_norm": 2.494250552772781, - "learning_rate": 1.0680278263908787e-06, - "loss": 1.0154, - "step": 7370 - }, - { - "epoch": 0.6647427514993011, - "grad_norm": 0.7797408213758579, - "learning_rate": 1.0675109612408991e-06, - "loss": 0.8813, - "step": 7371 - }, - { - "epoch": 0.6648329350227713, - "grad_norm": 1.6810008038048578, - "learning_rate": 1.0669941756560264e-06, - "loss": 0.9455, - "step": 7372 - }, - { - "epoch": 0.6649231185462416, - "grad_norm": 1.8676878446301588, - "learning_rate": 1.0664774696803548e-06, - "loss": 0.9291, - "step": 7373 - }, - { - "epoch": 0.6650133020697119, - "grad_norm": 1.6611844099083444, - "learning_rate": 1.065960843357973e-06, - "loss": 0.9446, - "step": 7374 - }, - { - "epoch": 0.6651034855931821, - "grad_norm": 1.9018874355464952, - "learning_rate": 1.065444296732963e-06, - "loss": 0.9938, - "step": 7375 - }, - { - "epoch": 0.6651936691166523, - "grad_norm": 0.7125428488044026, - "learning_rate": 1.064927829849397e-06, - "loss": 0.8375, - "step": 7376 - }, - { - "epoch": 0.6652838526401227, - "grad_norm": 2.0625341007997102, - "learning_rate": 1.0644114427513465e-06, - "loss": 0.9745, - "step": 7377 - }, - { - "epoch": 0.6653740361635929, - "grad_norm": 1.303650266685776, - "learning_rate": 1.0638951354828693e-06, - "loss": 0.9018, - "step": 7378 - }, - { - "epoch": 0.6654642196870632, - "grad_norm": 1.479101027344169, - "learning_rate": 1.063378908088021e-06, - "loss": 1.0282, - "step": 7379 - }, - { - "epoch": 0.6655544032105334, - "grad_norm": 1.3543366203274476, - "learning_rate": 1.0628627606108486e-06, - "loss": 0.9664, - "step": 7380 - }, - { - "epoch": 0.6656445867340037, - "grad_norm": 1.976786001483317, - "learning_rate": 1.062346693095393e-06, - "loss": 1.0331, - "step": 7381 - }, - { - "epoch": 0.665734770257474, - "grad_norm": 1.4517165711004394, - "learning_rate": 1.0618307055856882e-06, - "loss": 0.9193, - "step": 7382 - }, - { - "epoch": 0.6658249537809442, - "grad_norm": 1.4920528810502878, - "learning_rate": 1.061314798125759e-06, - "loss": 1.0286, - "step": 7383 - }, - { - "epoch": 0.6659151373044144, - "grad_norm": 1.463743320387952, - "learning_rate": 1.0607989707596293e-06, - "loss": 0.9328, - "step": 7384 - }, - { - "epoch": 0.6660053208278848, - "grad_norm": 1.188806768479852, - "learning_rate": 1.0602832235313078e-06, - "loss": 1.0914, - "step": 7385 - }, - { - "epoch": 0.666095504351355, - "grad_norm": 2.1234287736164625, - "learning_rate": 1.0597675564848053e-06, - "loss": 0.9543, - "step": 7386 - }, - { - "epoch": 0.6661856878748252, - "grad_norm": 1.5877516569580312, - "learning_rate": 1.059251969664118e-06, - "loss": 1.0441, - "step": 7387 - }, - { - "epoch": 0.6662758713982956, - "grad_norm": 1.879402967371135, - "learning_rate": 1.0587364631132402e-06, - "loss": 0.9848, - "step": 7388 - }, - { - "epoch": 0.6663660549217658, - "grad_norm": 1.4631599208961477, - "learning_rate": 1.0582210368761573e-06, - "loss": 1.0143, - "step": 7389 - }, - { - "epoch": 0.666456238445236, - "grad_norm": 4.015086097031688, - "learning_rate": 1.0577056909968485e-06, - "loss": 0.9747, - "step": 7390 - }, - { - "epoch": 0.6665464219687063, - "grad_norm": 2.290545194934835, - "learning_rate": 1.0571904255192857e-06, - "loss": 0.9931, - "step": 7391 - }, - { - "epoch": 0.6666366054921766, - "grad_norm": 1.9928267107943358, - "learning_rate": 1.0566752404874354e-06, - "loss": 1.0068, - "step": 7392 - }, - { - "epoch": 0.6667267890156469, - "grad_norm": 1.403557764615684, - "learning_rate": 1.0561601359452543e-06, - "loss": 0.9664, - "step": 7393 - }, - { - "epoch": 0.6668169725391171, - "grad_norm": 1.7039696235187867, - "learning_rate": 1.0556451119366947e-06, - "loss": 0.912, - "step": 7394 - }, - { - "epoch": 0.6669071560625873, - "grad_norm": 1.2832397269917775, - "learning_rate": 1.0551301685057011e-06, - "loss": 0.906, - "step": 7395 - }, - { - "epoch": 0.6669973395860577, - "grad_norm": 1.5181413977386402, - "learning_rate": 1.0546153056962117e-06, - "loss": 1.0588, - "step": 7396 - }, - { - "epoch": 0.6670875231095279, - "grad_norm": 1.8225297644425598, - "learning_rate": 1.0541005235521578e-06, - "loss": 0.9681, - "step": 7397 - }, - { - "epoch": 0.6671777066329981, - "grad_norm": 1.5948715003936662, - "learning_rate": 1.0535858221174614e-06, - "loss": 0.9849, - "step": 7398 - }, - { - "epoch": 0.6672678901564684, - "grad_norm": 1.3065440508587576, - "learning_rate": 1.0530712014360426e-06, - "loss": 1.0184, - "step": 7399 - }, - { - "epoch": 0.6673580736799387, - "grad_norm": 1.4377219816557911, - "learning_rate": 1.0525566615518088e-06, - "loss": 1.0196, - "step": 7400 - }, - { - "epoch": 0.667448257203409, - "grad_norm": 1.5556268968949911, - "learning_rate": 1.0520422025086662e-06, - "loss": 0.9868, - "step": 7401 - }, - { - "epoch": 0.6675384407268792, - "grad_norm": 1.4835979636375047, - "learning_rate": 1.0515278243505092e-06, - "loss": 1.0215, - "step": 7402 - }, - { - "epoch": 0.6676286242503494, - "grad_norm": 1.2390738757310005, - "learning_rate": 1.0510135271212278e-06, - "loss": 0.9989, - "step": 7403 - }, - { - "epoch": 0.6677188077738198, - "grad_norm": 1.1439390245173295, - "learning_rate": 1.0504993108647052e-06, - "loss": 1.0294, - "step": 7404 - }, - { - "epoch": 0.66780899129729, - "grad_norm": 1.6846204136571394, - "learning_rate": 1.0499851756248168e-06, - "loss": 0.946, - "step": 7405 - }, - { - "epoch": 0.6678991748207602, - "grad_norm": 1.6582472652675577, - "learning_rate": 1.0494711214454316e-06, - "loss": 0.9577, - "step": 7406 - }, - { - "epoch": 0.6679893583442305, - "grad_norm": 1.5099510878415732, - "learning_rate": 1.0489571483704111e-06, - "loss": 0.8872, - "step": 7407 - }, - { - "epoch": 0.6680795418677008, - "grad_norm": 1.4908208530444536, - "learning_rate": 1.048443256443612e-06, - "loss": 0.8915, - "step": 7408 - }, - { - "epoch": 0.668169725391171, - "grad_norm": 1.437256087466274, - "learning_rate": 1.0479294457088801e-06, - "loss": 0.9942, - "step": 7409 - }, - { - "epoch": 0.6682599089146413, - "grad_norm": 1.2517282935733325, - "learning_rate": 1.0474157162100574e-06, - "loss": 1.0887, - "step": 7410 - }, - { - "epoch": 0.6683500924381116, - "grad_norm": 1.6589938054581344, - "learning_rate": 1.0469020679909786e-06, - "loss": 0.9069, - "step": 7411 - }, - { - "epoch": 0.6684402759615818, - "grad_norm": 1.5239149079183942, - "learning_rate": 1.0463885010954705e-06, - "loss": 0.8934, - "step": 7412 - }, - { - "epoch": 0.6685304594850521, - "grad_norm": 2.654850220702941, - "learning_rate": 1.0458750155673536e-06, - "loss": 1.0393, - "step": 7413 - }, - { - "epoch": 0.6686206430085223, - "grad_norm": 1.7124808679333383, - "learning_rate": 1.0453616114504421e-06, - "loss": 0.9298, - "step": 7414 - }, - { - "epoch": 0.6687108265319927, - "grad_norm": 1.4426713885960423, - "learning_rate": 1.0448482887885406e-06, - "loss": 1.0051, - "step": 7415 - }, - { - "epoch": 0.6688010100554629, - "grad_norm": 1.4102062100254387, - "learning_rate": 1.044335047625451e-06, - "loss": 0.9805, - "step": 7416 - }, - { - "epoch": 0.6688911935789331, - "grad_norm": 1.433141985400394, - "learning_rate": 1.0438218880049637e-06, - "loss": 0.9785, - "step": 7417 - }, - { - "epoch": 0.6689813771024034, - "grad_norm": 1.6775514016611992, - "learning_rate": 1.0433088099708653e-06, - "loss": 1.0319, - "step": 7418 - }, - { - "epoch": 0.6690715606258737, - "grad_norm": 0.6760993605703358, - "learning_rate": 1.0427958135669346e-06, - "loss": 0.7891, - "step": 7419 - }, - { - "epoch": 0.6691617441493439, - "grad_norm": 2.0430779801702728, - "learning_rate": 1.0422828988369428e-06, - "loss": 0.9111, - "step": 7420 - }, - { - "epoch": 0.6692519276728142, - "grad_norm": 1.892071950520232, - "learning_rate": 1.041770065824655e-06, - "loss": 1.0414, - "step": 7421 - }, - { - "epoch": 0.6693421111962844, - "grad_norm": 1.5307531523883113, - "learning_rate": 1.0412573145738287e-06, - "loss": 0.8311, - "step": 7422 - }, - { - "epoch": 0.6694322947197547, - "grad_norm": 1.5677526305098586, - "learning_rate": 1.040744645128216e-06, - "loss": 0.9494, - "step": 7423 - }, - { - "epoch": 0.669522478243225, - "grad_norm": 1.4168551357039572, - "learning_rate": 1.040232057531558e-06, - "loss": 1.0337, - "step": 7424 - }, - { - "epoch": 0.6696126617666952, - "grad_norm": 3.238499467259255, - "learning_rate": 1.0397195518275932e-06, - "loss": 0.9471, - "step": 7425 - }, - { - "epoch": 0.6697028452901654, - "grad_norm": 2.1161906580708063, - "learning_rate": 1.0392071280600512e-06, - "loss": 0.9465, - "step": 7426 - }, - { - "epoch": 0.6697930288136358, - "grad_norm": 1.4805572402798195, - "learning_rate": 1.0386947862726549e-06, - "loss": 0.9773, - "step": 7427 - }, - { - "epoch": 0.669883212337106, - "grad_norm": 1.6857525997218192, - "learning_rate": 1.0381825265091197e-06, - "loss": 0.8924, - "step": 7428 - }, - { - "epoch": 0.6699733958605762, - "grad_norm": 1.5665001591336254, - "learning_rate": 1.037670348813155e-06, - "loss": 0.8961, - "step": 7429 - }, - { - "epoch": 0.6700635793840465, - "grad_norm": 1.3904148328748267, - "learning_rate": 1.0371582532284624e-06, - "loss": 1.0009, - "step": 7430 - }, - { - "epoch": 0.6701537629075168, - "grad_norm": 1.8482172237396581, - "learning_rate": 1.0366462397987375e-06, - "loss": 0.9111, - "step": 7431 - }, - { - "epoch": 0.6702439464309871, - "grad_norm": 1.696936115548277, - "learning_rate": 1.0361343085676665e-06, - "loss": 0.9704, - "step": 7432 - }, - { - "epoch": 0.6703341299544573, - "grad_norm": 1.6774750088281256, - "learning_rate": 1.0356224595789309e-06, - "loss": 1.0851, - "step": 7433 - }, - { - "epoch": 0.6704243134779276, - "grad_norm": 1.290433397456639, - "learning_rate": 1.0351106928762046e-06, - "loss": 1.0335, - "step": 7434 - }, - { - "epoch": 0.6705144970013979, - "grad_norm": 1.479075397666536, - "learning_rate": 1.034599008503154e-06, - "loss": 1.0447, - "step": 7435 - }, - { - "epoch": 0.6706046805248681, - "grad_norm": 1.5311285476786727, - "learning_rate": 1.0340874065034406e-06, - "loss": 0.9652, - "step": 7436 - }, - { - "epoch": 0.6706948640483383, - "grad_norm": 2.3145146228973608, - "learning_rate": 1.0335758869207137e-06, - "loss": 0.9027, - "step": 7437 - }, - { - "epoch": 0.6707850475718087, - "grad_norm": 2.3016367230227375, - "learning_rate": 1.0330644497986227e-06, - "loss": 0.9925, - "step": 7438 - }, - { - "epoch": 0.6708752310952789, - "grad_norm": 1.3991341587142798, - "learning_rate": 1.0325530951808029e-06, - "loss": 1.0173, - "step": 7439 - }, - { - "epoch": 0.6709654146187491, - "grad_norm": 1.465378971231467, - "learning_rate": 1.0320418231108887e-06, - "loss": 0.991, - "step": 7440 - }, - { - "epoch": 0.6710555981422194, - "grad_norm": 6.934275109567403, - "learning_rate": 1.0315306336325028e-06, - "loss": 1.0118, - "step": 7441 - }, - { - "epoch": 0.6711457816656897, - "grad_norm": 1.5064790195456348, - "learning_rate": 1.0310195267892635e-06, - "loss": 0.9377, - "step": 7442 - }, - { - "epoch": 0.67123596518916, - "grad_norm": 1.6446266418481308, - "learning_rate": 1.030508502624781e-06, - "loss": 0.9159, - "step": 7443 - }, - { - "epoch": 0.6713261487126302, - "grad_norm": 1.5128206874639252, - "learning_rate": 1.0299975611826587e-06, - "loss": 0.9287, - "step": 7444 - }, - { - "epoch": 0.6714163322361004, - "grad_norm": 1.3452740831018288, - "learning_rate": 1.0294867025064928e-06, - "loss": 0.9854, - "step": 7445 - }, - { - "epoch": 0.6715065157595708, - "grad_norm": 2.676785513098871, - "learning_rate": 1.028975926639874e-06, - "loss": 0.9862, - "step": 7446 - }, - { - "epoch": 0.671596699283041, - "grad_norm": 1.9798347014969369, - "learning_rate": 1.0284652336263823e-06, - "loss": 0.8946, - "step": 7447 - }, - { - "epoch": 0.6716868828065112, - "grad_norm": 1.7101264320168454, - "learning_rate": 1.0279546235095938e-06, - "loss": 0.9088, - "step": 7448 - }, - { - "epoch": 0.6717770663299815, - "grad_norm": 0.7298013422705114, - "learning_rate": 1.0274440963330768e-06, - "loss": 0.8706, - "step": 7449 - }, - { - "epoch": 0.6718672498534518, - "grad_norm": 1.3424775508516746, - "learning_rate": 1.0269336521403919e-06, - "loss": 0.9764, - "step": 7450 - }, - { - "epoch": 0.671957433376922, - "grad_norm": 1.470777775667051, - "learning_rate": 1.0264232909750936e-06, - "loss": 1.1042, - "step": 7451 - }, - { - "epoch": 0.6720476169003923, - "grad_norm": 1.2415909684338169, - "learning_rate": 1.025913012880728e-06, - "loss": 1.0362, - "step": 7452 - }, - { - "epoch": 0.6721378004238625, - "grad_norm": 1.5973294770353976, - "learning_rate": 1.0254028179008362e-06, - "loss": 0.8852, - "step": 7453 - }, - { - "epoch": 0.6722279839473329, - "grad_norm": 1.7021844349051456, - "learning_rate": 1.0248927060789483e-06, - "loss": 0.8753, - "step": 7454 - }, - { - "epoch": 0.6723181674708031, - "grad_norm": 1.4243125562383687, - "learning_rate": 1.0243826774585928e-06, - "loss": 0.9295, - "step": 7455 - }, - { - "epoch": 0.6724083509942733, - "grad_norm": 1.7027032312431645, - "learning_rate": 1.0238727320832854e-06, - "loss": 0.9677, - "step": 7456 - }, - { - "epoch": 0.6724985345177437, - "grad_norm": 1.8881181222688734, - "learning_rate": 1.0233628699965403e-06, - "loss": 0.9743, - "step": 7457 - }, - { - "epoch": 0.6725887180412139, - "grad_norm": 1.4707266310476277, - "learning_rate": 1.0228530912418594e-06, - "loss": 0.9976, - "step": 7458 - }, - { - "epoch": 0.6726789015646841, - "grad_norm": 1.3708395871809176, - "learning_rate": 1.0223433958627404e-06, - "loss": 0.9475, - "step": 7459 - }, - { - "epoch": 0.6727690850881544, - "grad_norm": 1.7406389684476198, - "learning_rate": 1.021833783902674e-06, - "loss": 1.0033, - "step": 7460 - }, - { - "epoch": 0.6728592686116247, - "grad_norm": 1.4562768696283013, - "learning_rate": 1.0213242554051427e-06, - "loss": 0.9765, - "step": 7461 - }, - { - "epoch": 0.6729494521350949, - "grad_norm": 1.828885564153427, - "learning_rate": 1.0208148104136229e-06, - "loss": 0.921, - "step": 7462 - }, - { - "epoch": 0.6730396356585652, - "grad_norm": 1.4912203705394997, - "learning_rate": 1.020305448971582e-06, - "loss": 0.9457, - "step": 7463 - }, - { - "epoch": 0.6731298191820354, - "grad_norm": 1.3104447212713455, - "learning_rate": 1.0197961711224824e-06, - "loss": 0.9363, - "step": 7464 - }, - { - "epoch": 0.6732200027055057, - "grad_norm": 1.4776918643621875, - "learning_rate": 1.0192869769097777e-06, - "loss": 0.9869, - "step": 7465 - }, - { - "epoch": 0.673310186228976, - "grad_norm": 1.496543796666976, - "learning_rate": 1.018777866376916e-06, - "loss": 0.9874, - "step": 7466 - }, - { - "epoch": 0.6734003697524462, - "grad_norm": 2.0955610271533898, - "learning_rate": 1.0182688395673374e-06, - "loss": 0.8519, - "step": 7467 - }, - { - "epoch": 0.6734905532759164, - "grad_norm": 1.710568810718886, - "learning_rate": 1.017759896524475e-06, - "loss": 0.939, - "step": 7468 - }, - { - "epoch": 0.6735807367993868, - "grad_norm": 3.004233393322692, - "learning_rate": 1.0172510372917528e-06, - "loss": 1.0116, - "step": 7469 - }, - { - "epoch": 0.673670920322857, - "grad_norm": 2.1967348534281212, - "learning_rate": 1.0167422619125925e-06, - "loss": 0.8702, - "step": 7470 - }, - { - "epoch": 0.6737611038463273, - "grad_norm": 1.468862732152454, - "learning_rate": 1.0162335704304026e-06, - "loss": 0.9877, - "step": 7471 - }, - { - "epoch": 0.6738512873697975, - "grad_norm": 0.702210424920754, - "learning_rate": 1.0157249628885903e-06, - "loss": 0.8379, - "step": 7472 - }, - { - "epoch": 0.6739414708932678, - "grad_norm": 1.2432172333882827, - "learning_rate": 1.0152164393305506e-06, - "loss": 0.8881, - "step": 7473 - }, - { - "epoch": 0.6740316544167381, - "grad_norm": 1.5821622817794703, - "learning_rate": 1.0147079997996746e-06, - "loss": 0.9617, - "step": 7474 - }, - { - "epoch": 0.6741218379402083, - "grad_norm": 1.5007925324063076, - "learning_rate": 1.0141996443393446e-06, - "loss": 1.0056, - "step": 7475 - }, - { - "epoch": 0.6742120214636785, - "grad_norm": 1.4824508241985608, - "learning_rate": 1.0136913729929369e-06, - "loss": 0.9587, - "step": 7476 - }, - { - "epoch": 0.6743022049871489, - "grad_norm": 1.4743331953705987, - "learning_rate": 1.0131831858038203e-06, - "loss": 0.9394, - "step": 7477 - }, - { - "epoch": 0.6743923885106191, - "grad_norm": 1.3141943122715147, - "learning_rate": 1.0126750828153538e-06, - "loss": 1.0358, - "step": 7478 - }, - { - "epoch": 0.6744825720340893, - "grad_norm": 1.7144692952768275, - "learning_rate": 1.012167064070895e-06, - "loss": 1.0139, - "step": 7479 - }, - { - "epoch": 0.6745727555575597, - "grad_norm": 1.4958192259708583, - "learning_rate": 1.0116591296137885e-06, - "loss": 0.9997, - "step": 7480 - }, - { - "epoch": 0.6746629390810299, - "grad_norm": 1.7316493706425327, - "learning_rate": 1.0111512794873746e-06, - "loss": 0.9997, - "step": 7481 - }, - { - "epoch": 0.6747531226045002, - "grad_norm": 1.4238089519617445, - "learning_rate": 1.010643513734986e-06, - "loss": 0.974, - "step": 7482 - }, - { - "epoch": 0.6748433061279704, - "grad_norm": 1.4902448541315292, - "learning_rate": 1.010135832399948e-06, - "loss": 1.0148, - "step": 7483 - }, - { - "epoch": 0.6749334896514407, - "grad_norm": 1.1447909844870556, - "learning_rate": 1.0096282355255792e-06, - "loss": 0.9806, - "step": 7484 - }, - { - "epoch": 0.675023673174911, - "grad_norm": 2.3063170798039057, - "learning_rate": 1.0091207231551905e-06, - "loss": 1.0288, - "step": 7485 - }, - { - "epoch": 0.6751138566983812, - "grad_norm": 1.667229088619377, - "learning_rate": 1.0086132953320842e-06, - "loss": 0.934, - "step": 7486 - }, - { - "epoch": 0.6752040402218514, - "grad_norm": 2.291097836267391, - "learning_rate": 1.0081059520995591e-06, - "loss": 0.9997, - "step": 7487 - }, - { - "epoch": 0.6752942237453218, - "grad_norm": 5.072264684912793, - "learning_rate": 1.0075986935009028e-06, - "loss": 1.0372, - "step": 7488 - }, - { - "epoch": 0.675384407268792, - "grad_norm": 1.4403270620221165, - "learning_rate": 1.0070915195793982e-06, - "loss": 0.9815, - "step": 7489 - }, - { - "epoch": 0.6754745907922622, - "grad_norm": 1.9875284204910182, - "learning_rate": 1.0065844303783197e-06, - "loss": 0.9522, - "step": 7490 - }, - { - "epoch": 0.6755647743157325, - "grad_norm": 1.3161191995228245, - "learning_rate": 1.0060774259409356e-06, - "loss": 0.8948, - "step": 7491 - }, - { - "epoch": 0.6756549578392028, - "grad_norm": 1.26808581890043, - "learning_rate": 1.0055705063105065e-06, - "loss": 1.0089, - "step": 7492 - }, - { - "epoch": 0.675745141362673, - "grad_norm": 2.0904257275959877, - "learning_rate": 1.0050636715302837e-06, - "loss": 0.9656, - "step": 7493 - }, - { - "epoch": 0.6758353248861433, - "grad_norm": 1.4827582941307313, - "learning_rate": 1.0045569216435157e-06, - "loss": 0.9892, - "step": 7494 - }, - { - "epoch": 0.6759255084096135, - "grad_norm": 1.3480484543788063, - "learning_rate": 1.0040502566934384e-06, - "loss": 1.0596, - "step": 7495 - }, - { - "epoch": 0.6760156919330839, - "grad_norm": 1.6531971560561045, - "learning_rate": 1.0035436767232866e-06, - "loss": 0.9118, - "step": 7496 - }, - { - "epoch": 0.6761058754565541, - "grad_norm": 1.2871203501671917, - "learning_rate": 1.0030371817762816e-06, - "loss": 0.9904, - "step": 7497 - }, - { - "epoch": 0.6761960589800243, - "grad_norm": 1.353551646712453, - "learning_rate": 1.0025307718956417e-06, - "loss": 0.983, - "step": 7498 - }, - { - "epoch": 0.6762862425034946, - "grad_norm": 1.5931885422208643, - "learning_rate": 1.0020244471245765e-06, - "loss": 0.9946, - "step": 7499 - }, - { - "epoch": 0.6763764260269649, - "grad_norm": 1.5459757030694892, - "learning_rate": 1.001518207506288e-06, - "loss": 0.9276, - "step": 7500 - }, - { - "epoch": 0.6764666095504351, - "grad_norm": 1.4196489956047007, - "learning_rate": 1.0010120530839717e-06, - "loss": 1.0353, - "step": 7501 - }, - { - "epoch": 0.6765567930739054, - "grad_norm": 1.0957225451048889, - "learning_rate": 1.0005059839008161e-06, - "loss": 0.9954, - "step": 7502 - }, - { - "epoch": 0.6766469765973756, - "grad_norm": 1.6032040254921163, - "learning_rate": 1.0000000000000004e-06, - "loss": 0.9263, - "step": 7503 - }, - { - "epoch": 0.676737160120846, - "grad_norm": 1.402867806360529, - "learning_rate": 9.994941014246985e-07, - "loss": 1.0254, - "step": 7504 - }, - { - "epoch": 0.6768273436443162, - "grad_norm": 1.4837733605465253, - "learning_rate": 9.989882882180766e-07, - "loss": 0.8648, - "step": 7505 - }, - { - "epoch": 0.6769175271677864, - "grad_norm": 1.419301481236849, - "learning_rate": 9.984825604232938e-07, - "loss": 0.9904, - "step": 7506 - }, - { - "epoch": 0.6770077106912568, - "grad_norm": 1.4619737509733737, - "learning_rate": 9.97976918083502e-07, - "loss": 0.9158, - "step": 7507 - }, - { - "epoch": 0.677097894214727, - "grad_norm": 1.1964913831624509, - "learning_rate": 9.974713612418427e-07, - "loss": 1.0467, - "step": 7508 - }, - { - "epoch": 0.6771880777381972, - "grad_norm": 1.4183408811094198, - "learning_rate": 9.969658899414563e-07, - "loss": 0.9854, - "step": 7509 - }, - { - "epoch": 0.6772782612616675, - "grad_norm": 1.7549096446673986, - "learning_rate": 9.964605042254696e-07, - "loss": 0.8975, - "step": 7510 - }, - { - "epoch": 0.6773684447851378, - "grad_norm": 7.352482704679245, - "learning_rate": 9.959552041370076e-07, - "loss": 0.9164, - "step": 7511 - }, - { - "epoch": 0.677458628308608, - "grad_norm": 1.364109388946019, - "learning_rate": 9.954499897191824e-07, - "loss": 0.8952, - "step": 7512 - }, - { - "epoch": 0.6775488118320783, - "grad_norm": 1.830122484058344, - "learning_rate": 9.949448610151043e-07, - "loss": 0.9356, - "step": 7513 - }, - { - "epoch": 0.6776389953555485, - "grad_norm": 1.6363374179367671, - "learning_rate": 9.944398180678719e-07, - "loss": 0.9549, - "step": 7514 - }, - { - "epoch": 0.6777291788790188, - "grad_norm": 1.3058177141441418, - "learning_rate": 9.939348609205789e-07, - "loss": 0.9284, - "step": 7515 - }, - { - "epoch": 0.6778193624024891, - "grad_norm": 1.311269046656025, - "learning_rate": 9.93429989616311e-07, - "loss": 0.9801, - "step": 7516 - }, - { - "epoch": 0.6779095459259593, - "grad_norm": 1.5097604137098772, - "learning_rate": 9.929252041981464e-07, - "loss": 0.9049, - "step": 7517 - }, - { - "epoch": 0.6779997294494295, - "grad_norm": 1.3907886794555795, - "learning_rate": 9.924205047091572e-07, - "loss": 0.943, - "step": 7518 - }, - { - "epoch": 0.6780899129728999, - "grad_norm": 1.4648387044183937, - "learning_rate": 9.919158911924056e-07, - "loss": 1.0915, - "step": 7519 - }, - { - "epoch": 0.6781800964963701, - "grad_norm": 1.5139860750009, - "learning_rate": 9.914113636909483e-07, - "loss": 1.0064, - "step": 7520 - }, - { - "epoch": 0.6782702800198404, - "grad_norm": 0.7810479475046775, - "learning_rate": 9.90906922247835e-07, - "loss": 0.9194, - "step": 7521 - }, - { - "epoch": 0.6783604635433106, - "grad_norm": 1.2300207382097614, - "learning_rate": 9.904025669061072e-07, - "loss": 1.0082, - "step": 7522 - }, - { - "epoch": 0.6784506470667809, - "grad_norm": 1.3063193526406067, - "learning_rate": 9.89898297708799e-07, - "loss": 0.9668, - "step": 7523 - }, - { - "epoch": 0.6785408305902512, - "grad_norm": 1.6679739672827143, - "learning_rate": 9.893941146989388e-07, - "loss": 0.9344, - "step": 7524 - }, - { - "epoch": 0.6786310141137214, - "grad_norm": 1.6644831739075385, - "learning_rate": 9.888900179195437e-07, - "loss": 0.9361, - "step": 7525 - }, - { - "epoch": 0.6787211976371916, - "grad_norm": 2.533373095384389, - "learning_rate": 9.883860074136285e-07, - "loss": 1.0507, - "step": 7526 - }, - { - "epoch": 0.678811381160662, - "grad_norm": 2.1645496247071407, - "learning_rate": 9.87882083224196e-07, - "loss": 0.9766, - "step": 7527 - }, - { - "epoch": 0.6789015646841322, - "grad_norm": 0.7192540681741839, - "learning_rate": 9.873782453942462e-07, - "loss": 0.8714, - "step": 7528 - }, - { - "epoch": 0.6789917482076024, - "grad_norm": 1.63563928081151, - "learning_rate": 9.868744939667676e-07, - "loss": 0.9066, - "step": 7529 - }, - { - "epoch": 0.6790819317310728, - "grad_norm": 2.032242283171415, - "learning_rate": 9.863708289847432e-07, - "loss": 0.8607, - "step": 7530 - }, - { - "epoch": 0.679172115254543, - "grad_norm": 1.5901727359438533, - "learning_rate": 9.85867250491149e-07, - "loss": 0.8805, - "step": 7531 - }, - { - "epoch": 0.6792622987780133, - "grad_norm": 1.426217877145778, - "learning_rate": 9.853637585289528e-07, - "loss": 1.0827, - "step": 7532 - }, - { - "epoch": 0.6793524823014835, - "grad_norm": 2.2477088495118918, - "learning_rate": 9.848603531411159e-07, - "loss": 0.9708, - "step": 7533 - }, - { - "epoch": 0.6794426658249538, - "grad_norm": 1.69478729128688, - "learning_rate": 9.843570343705899e-07, - "loss": 0.9862, - "step": 7534 - }, - { - "epoch": 0.6795328493484241, - "grad_norm": 1.2262849919015217, - "learning_rate": 9.83853802260323e-07, - "loss": 1.0369, - "step": 7535 - }, - { - "epoch": 0.6796230328718943, - "grad_norm": 89.09755066867018, - "learning_rate": 9.833506568532524e-07, - "loss": 0.9178, - "step": 7536 - }, - { - "epoch": 0.6797132163953645, - "grad_norm": 1.4525201010174913, - "learning_rate": 9.828475981923093e-07, - "loss": 0.8282, - "step": 7537 - }, - { - "epoch": 0.6798033999188349, - "grad_norm": 1.7317221345483778, - "learning_rate": 9.823446263204175e-07, - "loss": 0.9572, - "step": 7538 - }, - { - "epoch": 0.6798935834423051, - "grad_norm": 1.4451018308774037, - "learning_rate": 9.818417412804937e-07, - "loss": 0.9761, - "step": 7539 - }, - { - "epoch": 0.6799837669657753, - "grad_norm": 1.5380584229929866, - "learning_rate": 9.813389431154463e-07, - "loss": 1.056, - "step": 7540 - }, - { - "epoch": 0.6800739504892456, - "grad_norm": 1.599737849217259, - "learning_rate": 9.808362318681783e-07, - "loss": 1.0047, - "step": 7541 - }, - { - "epoch": 0.6801641340127159, - "grad_norm": 1.871410876578372, - "learning_rate": 9.803336075815807e-07, - "loss": 0.9965, - "step": 7542 - }, - { - "epoch": 0.6802543175361861, - "grad_norm": 1.7512959041746434, - "learning_rate": 9.79831070298544e-07, - "loss": 0.9032, - "step": 7543 - }, - { - "epoch": 0.6803445010596564, - "grad_norm": 1.5580425004659073, - "learning_rate": 9.793286200619443e-07, - "loss": 0.9482, - "step": 7544 - }, - { - "epoch": 0.6804346845831266, - "grad_norm": 1.582142390362112, - "learning_rate": 9.78826256914655e-07, - "loss": 1.0292, - "step": 7545 - }, - { - "epoch": 0.680524868106597, - "grad_norm": 1.5015778190474423, - "learning_rate": 9.7832398089954e-07, - "loss": 1.0245, - "step": 7546 - }, - { - "epoch": 0.6806150516300672, - "grad_norm": 1.4632375634266315, - "learning_rate": 9.778217920594565e-07, - "loss": 1.0754, - "step": 7547 - }, - { - "epoch": 0.6807052351535374, - "grad_norm": 2.0198293204008664, - "learning_rate": 9.773196904372547e-07, - "loss": 1.0185, - "step": 7548 - }, - { - "epoch": 0.6807954186770077, - "grad_norm": 1.6513701472151552, - "learning_rate": 9.768176760757742e-07, - "loss": 1.0636, - "step": 7549 - }, - { - "epoch": 0.680885602200478, - "grad_norm": 1.4009567669903573, - "learning_rate": 9.76315749017853e-07, - "loss": 0.9511, - "step": 7550 - }, - { - "epoch": 0.6809757857239482, - "grad_norm": 1.7073579034840936, - "learning_rate": 9.758139093063161e-07, - "loss": 0.9798, - "step": 7551 - }, - { - "epoch": 0.6810659692474185, - "grad_norm": 1.5261005307801998, - "learning_rate": 9.753121569839834e-07, - "loss": 1.0447, - "step": 7552 - }, - { - "epoch": 0.6811561527708888, - "grad_norm": 0.780982620260793, - "learning_rate": 9.748104920936678e-07, - "loss": 0.8111, - "step": 7553 - }, - { - "epoch": 0.681246336294359, - "grad_norm": 4.153638435125987, - "learning_rate": 9.743089146781738e-07, - "loss": 0.9548, - "step": 7554 - }, - { - "epoch": 0.6813365198178293, - "grad_norm": 1.325276000589095, - "learning_rate": 9.738074247802988e-07, - "loss": 0.9054, - "step": 7555 - }, - { - "epoch": 0.6814267033412995, - "grad_norm": 1.5452459986372142, - "learning_rate": 9.733060224428325e-07, - "loss": 0.9932, - "step": 7556 - }, - { - "epoch": 0.6815168868647699, - "grad_norm": 1.531271019616174, - "learning_rate": 9.728047077085577e-07, - "loss": 0.9882, - "step": 7557 - }, - { - "epoch": 0.6816070703882401, - "grad_norm": 2.4105611056147462, - "learning_rate": 9.723034806202497e-07, - "loss": 0.8737, - "step": 7558 - }, - { - "epoch": 0.6816972539117103, - "grad_norm": 1.545008679831607, - "learning_rate": 9.718023412206748e-07, - "loss": 0.9862, - "step": 7559 - }, - { - "epoch": 0.6817874374351806, - "grad_norm": 1.2188617948462874, - "learning_rate": 9.713012895525935e-07, - "loss": 0.9977, - "step": 7560 - }, - { - "epoch": 0.6818776209586509, - "grad_norm": 1.6339055103637037, - "learning_rate": 9.708003256587584e-07, - "loss": 0.9596, - "step": 7561 - }, - { - "epoch": 0.6819678044821211, - "grad_norm": 1.5334060437474546, - "learning_rate": 9.702994495819147e-07, - "loss": 1.026, - "step": 7562 - }, - { - "epoch": 0.6820579880055914, - "grad_norm": 1.6547807618569934, - "learning_rate": 9.697986613647999e-07, - "loss": 0.8835, - "step": 7563 - }, - { - "epoch": 0.6821481715290616, - "grad_norm": 0.7251376054335927, - "learning_rate": 9.692979610501425e-07, - "loss": 0.873, - "step": 7564 - }, - { - "epoch": 0.6822383550525319, - "grad_norm": 1.4174019177124175, - "learning_rate": 9.68797348680668e-07, - "loss": 1.0528, - "step": 7565 - }, - { - "epoch": 0.6823285385760022, - "grad_norm": 1.2932026354797392, - "learning_rate": 9.682968242990878e-07, - "loss": 0.9416, - "step": 7566 - }, - { - "epoch": 0.6824187220994724, - "grad_norm": 1.34108580914835, - "learning_rate": 9.677963879481132e-07, - "loss": 1.0239, - "step": 7567 - }, - { - "epoch": 0.6825089056229426, - "grad_norm": 1.893542159355313, - "learning_rate": 9.672960396704416e-07, - "loss": 0.9614, - "step": 7568 - }, - { - "epoch": 0.682599089146413, - "grad_norm": 1.548410212548688, - "learning_rate": 9.667957795087657e-07, - "loss": 0.9791, - "step": 7569 - }, - { - "epoch": 0.6826892726698832, - "grad_norm": 2.1479756916666832, - "learning_rate": 9.662956075057712e-07, - "loss": 1.0299, - "step": 7570 - }, - { - "epoch": 0.6827794561933535, - "grad_norm": 2.134556096047338, - "learning_rate": 9.657955237041354e-07, - "loss": 1.028, - "step": 7571 - }, - { - "epoch": 0.6828696397168237, - "grad_norm": 1.2603164769645994, - "learning_rate": 9.652955281465278e-07, - "loss": 0.9554, - "step": 7572 - }, - { - "epoch": 0.682959823240294, - "grad_norm": 1.3795281755640179, - "learning_rate": 9.64795620875612e-07, - "loss": 0.9816, - "step": 7573 - }, - { - "epoch": 0.6830500067637643, - "grad_norm": 1.4017893085659276, - "learning_rate": 9.64295801934041e-07, - "loss": 0.9188, - "step": 7574 - }, - { - "epoch": 0.6831401902872345, - "grad_norm": 1.2203525375460489, - "learning_rate": 9.63796071364463e-07, - "loss": 0.8427, - "step": 7575 - }, - { - "epoch": 0.6832303738107048, - "grad_norm": 1.4390909264622074, - "learning_rate": 9.632964292095179e-07, - "loss": 1.0598, - "step": 7576 - }, - { - "epoch": 0.6833205573341751, - "grad_norm": 1.3498656806528728, - "learning_rate": 9.627968755118374e-07, - "loss": 0.9604, - "step": 7577 - }, - { - "epoch": 0.6834107408576453, - "grad_norm": 0.737155035127967, - "learning_rate": 9.622974103140468e-07, - "loss": 0.8602, - "step": 7578 - }, - { - "epoch": 0.6835009243811155, - "grad_norm": 1.5657896887987033, - "learning_rate": 9.617980336587632e-07, - "loss": 1.0399, - "step": 7579 - }, - { - "epoch": 0.6835911079045859, - "grad_norm": 1.482109107192986, - "learning_rate": 9.612987455885964e-07, - "loss": 0.8579, - "step": 7580 - }, - { - "epoch": 0.6836812914280561, - "grad_norm": 1.5646646382844638, - "learning_rate": 9.607995461461467e-07, - "loss": 0.9758, - "step": 7581 - }, - { - "epoch": 0.6837714749515263, - "grad_norm": 1.5564377483806895, - "learning_rate": 9.603004353740111e-07, - "loss": 0.8758, - "step": 7582 - }, - { - "epoch": 0.6838616584749966, - "grad_norm": 0.7664384996509853, - "learning_rate": 9.598014133147738e-07, - "loss": 0.8836, - "step": 7583 - }, - { - "epoch": 0.6839518419984669, - "grad_norm": 1.2805476240591447, - "learning_rate": 9.59302480011017e-07, - "loss": 0.942, - "step": 7584 - }, - { - "epoch": 0.6840420255219372, - "grad_norm": 0.7061110773427525, - "learning_rate": 9.588036355053102e-07, - "loss": 0.8487, - "step": 7585 - }, - { - "epoch": 0.6841322090454074, - "grad_norm": 1.7561320092148718, - "learning_rate": 9.583048798402182e-07, - "loss": 0.8421, - "step": 7586 - }, - { - "epoch": 0.6842223925688776, - "grad_norm": 1.2957556156431993, - "learning_rate": 9.57806213058298e-07, - "loss": 0.9731, - "step": 7587 - }, - { - "epoch": 0.684312576092348, - "grad_norm": 0.7125532214046878, - "learning_rate": 9.57307635202098e-07, - "loss": 0.8796, - "step": 7588 - }, - { - "epoch": 0.6844027596158182, - "grad_norm": 1.3522647510640122, - "learning_rate": 9.568091463141607e-07, - "loss": 1.0744, - "step": 7589 - }, - { - "epoch": 0.6844929431392884, - "grad_norm": 1.5503191896158552, - "learning_rate": 9.563107464370187e-07, - "loss": 0.9483, - "step": 7590 - }, - { - "epoch": 0.6845831266627587, - "grad_norm": 1.4213293516742649, - "learning_rate": 9.558124356131982e-07, - "loss": 0.9942, - "step": 7591 - }, - { - "epoch": 0.684673310186229, - "grad_norm": 2.657197222395213, - "learning_rate": 9.553142138852187e-07, - "loss": 0.9147, - "step": 7592 - }, - { - "epoch": 0.6847634937096992, - "grad_norm": 1.65979574440561, - "learning_rate": 9.548160812955905e-07, - "loss": 0.8875, - "step": 7593 - }, - { - "epoch": 0.6848536772331695, - "grad_norm": 1.3281861683840326, - "learning_rate": 9.543180378868175e-07, - "loss": 0.8046, - "step": 7594 - }, - { - "epoch": 0.6849438607566397, - "grad_norm": 4.063168984130923, - "learning_rate": 9.538200837013962e-07, - "loss": 1.0104, - "step": 7595 - }, - { - "epoch": 0.68503404428011, - "grad_norm": 0.6801610590429323, - "learning_rate": 9.533222187818122e-07, - "loss": 0.8901, - "step": 7596 - }, - { - "epoch": 0.6851242278035803, - "grad_norm": 1.6352926154579757, - "learning_rate": 9.528244431705492e-07, - "loss": 0.987, - "step": 7597 - }, - { - "epoch": 0.6852144113270505, - "grad_norm": 1.331029296991072, - "learning_rate": 9.523267569100774e-07, - "loss": 0.9839, - "step": 7598 - }, - { - "epoch": 0.6853045948505209, - "grad_norm": 1.2992031387795036, - "learning_rate": 9.518291600428652e-07, - "loss": 0.9321, - "step": 7599 - }, - { - "epoch": 0.6853947783739911, - "grad_norm": 1.987152318713529, - "learning_rate": 9.513316526113677e-07, - "loss": 0.9908, - "step": 7600 - }, - { - "epoch": 0.6854849618974613, - "grad_norm": 1.4819613458840888, - "learning_rate": 9.50834234658036e-07, - "loss": 0.9977, - "step": 7601 - }, - { - "epoch": 0.6855751454209316, - "grad_norm": 2.0592226519114596, - "learning_rate": 9.503369062253123e-07, - "loss": 0.9888, - "step": 7602 - }, - { - "epoch": 0.6856653289444019, - "grad_norm": 0.6692985326079192, - "learning_rate": 9.498396673556317e-07, - "loss": 0.8482, - "step": 7603 - }, - { - "epoch": 0.6857555124678721, - "grad_norm": 1.4280850962066114, - "learning_rate": 9.493425180914219e-07, - "loss": 0.9973, - "step": 7604 - }, - { - "epoch": 0.6858456959913424, - "grad_norm": 1.4953896083103895, - "learning_rate": 9.488454584751e-07, - "loss": 0.9066, - "step": 7605 - }, - { - "epoch": 0.6859358795148126, - "grad_norm": 1.3732462015257434, - "learning_rate": 9.483484885490813e-07, - "loss": 0.9835, - "step": 7606 - }, - { - "epoch": 0.686026063038283, - "grad_norm": 1.5277018797931616, - "learning_rate": 9.478516083557675e-07, - "loss": 0.9973, - "step": 7607 - }, - { - "epoch": 0.6861162465617532, - "grad_norm": 2.2031622268020623, - "learning_rate": 9.473548179375561e-07, - "loss": 0.9934, - "step": 7608 - }, - { - "epoch": 0.6862064300852234, - "grad_norm": 0.679003535807846, - "learning_rate": 9.468581173368358e-07, - "loss": 0.8637, - "step": 7609 - }, - { - "epoch": 0.6862966136086937, - "grad_norm": 1.3472572510000882, - "learning_rate": 9.463615065959878e-07, - "loss": 0.9854, - "step": 7610 - }, - { - "epoch": 0.686386797132164, - "grad_norm": 1.6551921273124943, - "learning_rate": 9.458649857573857e-07, - "loss": 1.0314, - "step": 7611 - }, - { - "epoch": 0.6864769806556342, - "grad_norm": 12.49337470906455, - "learning_rate": 9.453685548633963e-07, - "loss": 0.9895, - "step": 7612 - }, - { - "epoch": 0.6865671641791045, - "grad_norm": 1.882922980521716, - "learning_rate": 9.448722139563756e-07, - "loss": 0.9314, - "step": 7613 - }, - { - "epoch": 0.6866573477025747, - "grad_norm": 1.8173098705808124, - "learning_rate": 9.443759630786769e-07, - "loss": 0.8717, - "step": 7614 - }, - { - "epoch": 0.686747531226045, - "grad_norm": 1.517531542756561, - "learning_rate": 9.438798022726408e-07, - "loss": 1.0383, - "step": 7615 - }, - { - "epoch": 0.6868377147495153, - "grad_norm": 1.6048495915607146, - "learning_rate": 9.433837315806037e-07, - "loss": 0.9315, - "step": 7616 - }, - { - "epoch": 0.6869278982729855, - "grad_norm": 1.675516601152404, - "learning_rate": 9.428877510448925e-07, - "loss": 0.9306, - "step": 7617 - }, - { - "epoch": 0.6870180817964557, - "grad_norm": 1.2472307524687922, - "learning_rate": 9.423918607078272e-07, - "loss": 0.9032, - "step": 7618 - }, - { - "epoch": 0.6871082653199261, - "grad_norm": 1.4877455340424015, - "learning_rate": 9.418960606117208e-07, - "loss": 0.9891, - "step": 7619 - }, - { - "epoch": 0.6871984488433963, - "grad_norm": 1.5262012159594387, - "learning_rate": 9.414003507988752e-07, - "loss": 0.9227, - "step": 7620 - }, - { - "epoch": 0.6872886323668665, - "grad_norm": 1.5586784167704792, - "learning_rate": 9.409047313115904e-07, - "loss": 0.9057, - "step": 7621 - }, - { - "epoch": 0.6873788158903368, - "grad_norm": 1.578644770772189, - "learning_rate": 9.404092021921521e-07, - "loss": 0.9179, - "step": 7622 - }, - { - "epoch": 0.6874689994138071, - "grad_norm": 3.5014464250600663, - "learning_rate": 9.399137634828447e-07, - "loss": 0.9527, - "step": 7623 - }, - { - "epoch": 0.6875591829372774, - "grad_norm": 1.387303231867043, - "learning_rate": 9.394184152259396e-07, - "loss": 0.9929, - "step": 7624 - }, - { - "epoch": 0.6876493664607476, - "grad_norm": 1.2524175630497252, - "learning_rate": 9.389231574637033e-07, - "loss": 1.074, - "step": 7625 - }, - { - "epoch": 0.6877395499842179, - "grad_norm": 2.5295724843805822, - "learning_rate": 9.384279902383938e-07, - "loss": 1.0664, - "step": 7626 - }, - { - "epoch": 0.6878297335076882, - "grad_norm": 1.4938224902579667, - "learning_rate": 9.379329135922615e-07, - "loss": 0.9872, - "step": 7627 - }, - { - "epoch": 0.6879199170311584, - "grad_norm": 1.4669192652937286, - "learning_rate": 9.374379275675495e-07, - "loss": 0.933, - "step": 7628 - }, - { - "epoch": 0.6880101005546286, - "grad_norm": 1.5799980001799083, - "learning_rate": 9.369430322064931e-07, - "loss": 0.9727, - "step": 7629 - }, - { - "epoch": 0.688100284078099, - "grad_norm": 1.6730981078353961, - "learning_rate": 9.364482275513179e-07, - "loss": 1.0508, - "step": 7630 - }, - { - "epoch": 0.6881904676015692, - "grad_norm": 1.5105572952925188, - "learning_rate": 9.359535136442444e-07, - "loss": 1.0411, - "step": 7631 - }, - { - "epoch": 0.6882806511250394, - "grad_norm": 1.4335026932988115, - "learning_rate": 9.354588905274843e-07, - "loss": 0.9051, - "step": 7632 - }, - { - "epoch": 0.6883708346485097, - "grad_norm": 1.6529562245622882, - "learning_rate": 9.349643582432414e-07, - "loss": 1.0107, - "step": 7633 - }, - { - "epoch": 0.68846101817198, - "grad_norm": 1.4741996145295262, - "learning_rate": 9.344699168337127e-07, - "loss": 0.9671, - "step": 7634 - }, - { - "epoch": 0.6885512016954503, - "grad_norm": 1.9137516314239824, - "learning_rate": 9.339755663410845e-07, - "loss": 0.9401, - "step": 7635 - }, - { - "epoch": 0.6886413852189205, - "grad_norm": 1.4731824020964959, - "learning_rate": 9.334813068075405e-07, - "loss": 1.0006, - "step": 7636 - }, - { - "epoch": 0.6887315687423907, - "grad_norm": 1.4640047239060612, - "learning_rate": 9.329871382752506e-07, - "loss": 0.9798, - "step": 7637 - }, - { - "epoch": 0.6888217522658611, - "grad_norm": 1.3594391467865021, - "learning_rate": 9.32493060786383e-07, - "loss": 0.963, - "step": 7638 - }, - { - "epoch": 0.6889119357893313, - "grad_norm": 1.2170320776455272, - "learning_rate": 9.31999074383093e-07, - "loss": 1.0418, - "step": 7639 - }, - { - "epoch": 0.6890021193128015, - "grad_norm": 1.6382426084983006, - "learning_rate": 9.315051791075308e-07, - "loss": 0.9719, - "step": 7640 - }, - { - "epoch": 0.6890923028362718, - "grad_norm": 1.2907262988665782, - "learning_rate": 9.310113750018382e-07, - "loss": 0.9656, - "step": 7641 - }, - { - "epoch": 0.6891824863597421, - "grad_norm": 2.1924219616493454, - "learning_rate": 9.305176621081496e-07, - "loss": 1.0423, - "step": 7642 - }, - { - "epoch": 0.6892726698832123, - "grad_norm": 1.412542210851245, - "learning_rate": 9.300240404685911e-07, - "loss": 1.0056, - "step": 7643 - }, - { - "epoch": 0.6893628534066826, - "grad_norm": 1.422831454688048, - "learning_rate": 9.295305101252812e-07, - "loss": 0.9824, - "step": 7644 - }, - { - "epoch": 0.6894530369301528, - "grad_norm": 1.5761773961382932, - "learning_rate": 9.290370711203314e-07, - "loss": 0.9566, - "step": 7645 - }, - { - "epoch": 0.6895432204536232, - "grad_norm": 1.4130280225125404, - "learning_rate": 9.285437234958433e-07, - "loss": 0.9768, - "step": 7646 - }, - { - "epoch": 0.6896334039770934, - "grad_norm": 1.6090474211993295, - "learning_rate": 9.280504672939124e-07, - "loss": 0.9647, - "step": 7647 - }, - { - "epoch": 0.6897235875005636, - "grad_norm": 1.4446682705631926, - "learning_rate": 9.275573025566266e-07, - "loss": 1.0128, - "step": 7648 - }, - { - "epoch": 0.689813771024034, - "grad_norm": 1.6768355176601977, - "learning_rate": 9.27064229326065e-07, - "loss": 1.0122, - "step": 7649 - }, - { - "epoch": 0.6899039545475042, - "grad_norm": 1.5354132594877798, - "learning_rate": 9.265712476442995e-07, - "loss": 0.9406, - "step": 7650 - }, - { - "epoch": 0.6899941380709744, - "grad_norm": 1.3510035422933067, - "learning_rate": 9.260783575533949e-07, - "loss": 0.9043, - "step": 7651 - }, - { - "epoch": 0.6900843215944447, - "grad_norm": 1.4869919045387632, - "learning_rate": 9.255855590954045e-07, - "loss": 0.9344, - "step": 7652 - }, - { - "epoch": 0.690174505117915, - "grad_norm": 1.3742823461896922, - "learning_rate": 9.250928523123802e-07, - "loss": 1.0405, - "step": 7653 - }, - { - "epoch": 0.6902646886413852, - "grad_norm": 1.4576321505642782, - "learning_rate": 9.24600237246359e-07, - "loss": 0.9852, - "step": 7654 - }, - { - "epoch": 0.6903548721648555, - "grad_norm": 1.4156129040738141, - "learning_rate": 9.241077139393769e-07, - "loss": 0.9838, - "step": 7655 - }, - { - "epoch": 0.6904450556883257, - "grad_norm": 1.531999676975784, - "learning_rate": 9.236152824334564e-07, - "loss": 1.0006, - "step": 7656 - }, - { - "epoch": 0.690535239211796, - "grad_norm": 1.549932444546168, - "learning_rate": 9.231229427706151e-07, - "loss": 0.8751, - "step": 7657 - }, - { - "epoch": 0.6906254227352663, - "grad_norm": 1.7241222000226555, - "learning_rate": 9.226306949928622e-07, - "loss": 0.983, - "step": 7658 - }, - { - "epoch": 0.6907156062587365, - "grad_norm": 1.5100067459359965, - "learning_rate": 9.221385391421988e-07, - "loss": 1.0495, - "step": 7659 - }, - { - "epoch": 0.6908057897822067, - "grad_norm": 1.387195000332817, - "learning_rate": 9.216464752606192e-07, - "loss": 0.9907, - "step": 7660 - }, - { - "epoch": 0.6908959733056771, - "grad_norm": 1.3723077860413901, - "learning_rate": 9.211545033901078e-07, - "loss": 1.0532, - "step": 7661 - }, - { - "epoch": 0.6909861568291473, - "grad_norm": 1.399318225930494, - "learning_rate": 9.206626235726426e-07, - "loss": 1.0071, - "step": 7662 - }, - { - "epoch": 0.6910763403526176, - "grad_norm": 1.688549774859582, - "learning_rate": 9.20170835850194e-07, - "loss": 0.9122, - "step": 7663 - }, - { - "epoch": 0.6911665238760878, - "grad_norm": 0.7113925609281179, - "learning_rate": 9.196791402647237e-07, - "loss": 0.8606, - "step": 7664 - }, - { - "epoch": 0.6912567073995581, - "grad_norm": 1.3934877910209207, - "learning_rate": 9.191875368581861e-07, - "loss": 0.8986, - "step": 7665 - }, - { - "epoch": 0.6913468909230284, - "grad_norm": 1.5680623040374582, - "learning_rate": 9.186960256725271e-07, - "loss": 0.987, - "step": 7666 - }, - { - "epoch": 0.6914370744464986, - "grad_norm": 1.4755398877555517, - "learning_rate": 9.182046067496856e-07, - "loss": 1.0385, - "step": 7667 - }, - { - "epoch": 0.6915272579699688, - "grad_norm": 1.554044274154802, - "learning_rate": 9.177132801315927e-07, - "loss": 1.0571, - "step": 7668 - }, - { - "epoch": 0.6916174414934392, - "grad_norm": 1.3635014134879877, - "learning_rate": 9.172220458601692e-07, - "loss": 0.9504, - "step": 7669 - }, - { - "epoch": 0.6917076250169094, - "grad_norm": 1.6697310489242203, - "learning_rate": 9.167309039773324e-07, - "loss": 0.9476, - "step": 7670 - }, - { - "epoch": 0.6917978085403796, - "grad_norm": 1.188825820847418, - "learning_rate": 9.162398545249872e-07, - "loss": 0.8759, - "step": 7671 - }, - { - "epoch": 0.69188799206385, - "grad_norm": 1.180762754595552, - "learning_rate": 9.157488975450334e-07, - "loss": 1.0274, - "step": 7672 - }, - { - "epoch": 0.6919781755873202, - "grad_norm": 1.5041409396069845, - "learning_rate": 9.15258033079362e-07, - "loss": 0.8114, - "step": 7673 - }, - { - "epoch": 0.6920683591107905, - "grad_norm": 1.6458080708299225, - "learning_rate": 9.147672611698567e-07, - "loss": 0.9683, - "step": 7674 - }, - { - "epoch": 0.6921585426342607, - "grad_norm": 1.6008470379336945, - "learning_rate": 9.142765818583933e-07, - "loss": 0.9651, - "step": 7675 - }, - { - "epoch": 0.692248726157731, - "grad_norm": 1.2773203716413504, - "learning_rate": 9.13785995186837e-07, - "loss": 0.9532, - "step": 7676 - }, - { - "epoch": 0.6923389096812013, - "grad_norm": 1.4662155035437427, - "learning_rate": 9.132955011970502e-07, - "loss": 0.9374, - "step": 7677 - }, - { - "epoch": 0.6924290932046715, - "grad_norm": 1.2605135331356097, - "learning_rate": 9.128050999308827e-07, - "loss": 0.9671, - "step": 7678 - }, - { - "epoch": 0.6925192767281417, - "grad_norm": 1.8193686334412758, - "learning_rate": 9.123147914301789e-07, - "loss": 1.0725, - "step": 7679 - }, - { - "epoch": 0.6926094602516121, - "grad_norm": 1.4758392661844366, - "learning_rate": 9.118245757367745e-07, - "loss": 1.0216, - "step": 7680 - }, - { - "epoch": 0.6926996437750823, - "grad_norm": 0.6731365912315184, - "learning_rate": 9.113344528924973e-07, - "loss": 0.8511, - "step": 7681 - }, - { - "epoch": 0.6927898272985525, - "grad_norm": 1.2848552594880747, - "learning_rate": 9.108444229391676e-07, - "loss": 0.9538, - "step": 7682 - }, - { - "epoch": 0.6928800108220228, - "grad_norm": 1.5704375259958705, - "learning_rate": 9.103544859185979e-07, - "loss": 0.9447, - "step": 7683 - }, - { - "epoch": 0.6929701943454931, - "grad_norm": 1.4268330078362277, - "learning_rate": 9.098646418725902e-07, - "loss": 1.0633, - "step": 7684 - }, - { - "epoch": 0.6930603778689634, - "grad_norm": 0.6302741675761285, - "learning_rate": 9.093748908429437e-07, - "loss": 0.8655, - "step": 7685 - }, - { - "epoch": 0.6931505613924336, - "grad_norm": 1.3739529871553415, - "learning_rate": 9.088852328714444e-07, - "loss": 0.965, - "step": 7686 - }, - { - "epoch": 0.6932407449159038, - "grad_norm": 1.3240195605599263, - "learning_rate": 9.083956679998735e-07, - "loss": 0.8869, - "step": 7687 - }, - { - "epoch": 0.6933309284393742, - "grad_norm": 1.2594380270660723, - "learning_rate": 9.079061962700032e-07, - "loss": 0.9468, - "step": 7688 - }, - { - "epoch": 0.6934211119628444, - "grad_norm": 1.2997736880577466, - "learning_rate": 9.074168177235979e-07, - "loss": 0.9966, - "step": 7689 - }, - { - "epoch": 0.6935112954863146, - "grad_norm": 1.494623881459007, - "learning_rate": 9.069275324024151e-07, - "loss": 0.9881, - "step": 7690 - }, - { - "epoch": 0.6936014790097849, - "grad_norm": 1.5588656310239726, - "learning_rate": 9.064383403482005e-07, - "loss": 0.9075, - "step": 7691 - }, - { - "epoch": 0.6936916625332552, - "grad_norm": 1.179728172561094, - "learning_rate": 9.059492416026983e-07, - "loss": 0.9397, - "step": 7692 - }, - { - "epoch": 0.6937818460567254, - "grad_norm": 1.3753384260365928, - "learning_rate": 9.054602362076378e-07, - "loss": 0.971, - "step": 7693 - }, - { - "epoch": 0.6938720295801957, - "grad_norm": 3.0214338736083386, - "learning_rate": 9.049713242047468e-07, - "loss": 0.9619, - "step": 7694 - }, - { - "epoch": 0.693962213103666, - "grad_norm": 1.7636057016456583, - "learning_rate": 9.044825056357395e-07, - "loss": 0.952, - "step": 7695 - }, - { - "epoch": 0.6940523966271362, - "grad_norm": 2.0610076822936367, - "learning_rate": 9.039937805423255e-07, - "loss": 0.9305, - "step": 7696 - }, - { - "epoch": 0.6941425801506065, - "grad_norm": 0.6376838624636056, - "learning_rate": 9.035051489662051e-07, - "loss": 0.7935, - "step": 7697 - }, - { - "epoch": 0.6942327636740767, - "grad_norm": 1.329445844088849, - "learning_rate": 9.030166109490718e-07, - "loss": 0.9277, - "step": 7698 - }, - { - "epoch": 0.6943229471975471, - "grad_norm": 2.0507238180202156, - "learning_rate": 9.025281665326099e-07, - "loss": 0.9935, - "step": 7699 - }, - { - "epoch": 0.6944131307210173, - "grad_norm": 1.6563629255844294, - "learning_rate": 9.020398157584967e-07, - "loss": 0.9782, - "step": 7700 - }, - { - "epoch": 0.6945033142444875, - "grad_norm": 1.79305830588484, - "learning_rate": 9.015515586684002e-07, - "loss": 0.878, - "step": 7701 - }, - { - "epoch": 0.6945934977679578, - "grad_norm": 0.6517423953163954, - "learning_rate": 9.010633953039812e-07, - "loss": 0.8443, - "step": 7702 - }, - { - "epoch": 0.6946836812914281, - "grad_norm": 1.3388421677492208, - "learning_rate": 9.005753257068929e-07, - "loss": 0.9889, - "step": 7703 - }, - { - "epoch": 0.6947738648148983, - "grad_norm": 1.4477436136069626, - "learning_rate": 9.000873499187797e-07, - "loss": 0.9892, - "step": 7704 - }, - { - "epoch": 0.6948640483383686, - "grad_norm": 1.4648347167690225, - "learning_rate": 8.995994679812797e-07, - "loss": 1.0092, - "step": 7705 - }, - { - "epoch": 0.6949542318618388, - "grad_norm": 1.2919305511327595, - "learning_rate": 8.991116799360192e-07, - "loss": 1.0055, - "step": 7706 - }, - { - "epoch": 0.6950444153853091, - "grad_norm": 1.281390298744799, - "learning_rate": 8.986239858246217e-07, - "loss": 0.9687, - "step": 7707 - }, - { - "epoch": 0.6951345989087794, - "grad_norm": 1.5704052646243618, - "learning_rate": 8.981363856886972e-07, - "loss": 1.0508, - "step": 7708 - }, - { - "epoch": 0.6952247824322496, - "grad_norm": 2.39075684028053, - "learning_rate": 8.976488795698533e-07, - "loss": 0.9582, - "step": 7709 - }, - { - "epoch": 0.6953149659557198, - "grad_norm": 1.3024780844896648, - "learning_rate": 8.971614675096841e-07, - "loss": 0.9917, - "step": 7710 - }, - { - "epoch": 0.6954051494791902, - "grad_norm": 1.6117650035114268, - "learning_rate": 8.966741495497807e-07, - "loss": 1.037, - "step": 7711 - }, - { - "epoch": 0.6954953330026604, - "grad_norm": 1.2577351137606096, - "learning_rate": 8.961869257317218e-07, - "loss": 0.9478, - "step": 7712 - }, - { - "epoch": 0.6955855165261307, - "grad_norm": 1.9225292177795223, - "learning_rate": 8.956997960970809e-07, - "loss": 0.964, - "step": 7713 - }, - { - "epoch": 0.6956757000496009, - "grad_norm": 1.5788154224845747, - "learning_rate": 8.952127606874224e-07, - "loss": 0.8837, - "step": 7714 - }, - { - "epoch": 0.6957658835730712, - "grad_norm": 1.4846174343295675, - "learning_rate": 8.947258195443028e-07, - "loss": 0.9724, - "step": 7715 - }, - { - "epoch": 0.6958560670965415, - "grad_norm": 1.5597610118175815, - "learning_rate": 8.942389727092716e-07, - "loss": 0.9444, - "step": 7716 - }, - { - "epoch": 0.6959462506200117, - "grad_norm": 0.7066573529559389, - "learning_rate": 8.937522202238677e-07, - "loss": 0.8451, - "step": 7717 - }, - { - "epoch": 0.696036434143482, - "grad_norm": 1.522417871674815, - "learning_rate": 8.932655621296239e-07, - "loss": 0.9942, - "step": 7718 - }, - { - "epoch": 0.6961266176669523, - "grad_norm": 1.2247482780760577, - "learning_rate": 8.927789984680649e-07, - "loss": 0.9694, - "step": 7719 - }, - { - "epoch": 0.6962168011904225, - "grad_norm": 1.6018697304402694, - "learning_rate": 8.922925292807068e-07, - "loss": 0.8432, - "step": 7720 - }, - { - "epoch": 0.6963069847138927, - "grad_norm": 5.238521015730583, - "learning_rate": 8.91806154609058e-07, - "loss": 1.0536, - "step": 7721 - }, - { - "epoch": 0.6963971682373631, - "grad_norm": 1.3316490542345603, - "learning_rate": 8.913198744946195e-07, - "loss": 0.9462, - "step": 7722 - }, - { - "epoch": 0.6964873517608333, - "grad_norm": 1.5294236170840259, - "learning_rate": 8.908336889788807e-07, - "loss": 0.9937, - "step": 7723 - }, - { - "epoch": 0.6965775352843036, - "grad_norm": 2.035518211348872, - "learning_rate": 8.903475981033293e-07, - "loss": 0.8575, - "step": 7724 - }, - { - "epoch": 0.6966677188077738, - "grad_norm": 0.8123604214427421, - "learning_rate": 8.898616019094376e-07, - "loss": 0.9227, - "step": 7725 - }, - { - "epoch": 0.6967579023312441, - "grad_norm": 1.691398356289317, - "learning_rate": 8.89375700438677e-07, - "loss": 1.0108, - "step": 7726 - }, - { - "epoch": 0.6968480858547144, - "grad_norm": 2.215027386084867, - "learning_rate": 8.888898937325047e-07, - "loss": 0.9453, - "step": 7727 - }, - { - "epoch": 0.6969382693781846, - "grad_norm": 1.9573498007881125, - "learning_rate": 8.884041818323733e-07, - "loss": 0.9489, - "step": 7728 - }, - { - "epoch": 0.6970284529016548, - "grad_norm": 1.5928429659152066, - "learning_rate": 8.879185647797262e-07, - "loss": 1.0048, - "step": 7729 - }, - { - "epoch": 0.6971186364251252, - "grad_norm": 1.7235870268794944, - "learning_rate": 8.874330426159993e-07, - "loss": 0.9458, - "step": 7730 - }, - { - "epoch": 0.6972088199485954, - "grad_norm": 1.4146138164998083, - "learning_rate": 8.869476153826205e-07, - "loss": 1.0012, - "step": 7731 - }, - { - "epoch": 0.6972990034720656, - "grad_norm": 1.3584636395600413, - "learning_rate": 8.864622831210071e-07, - "loss": 0.9741, - "step": 7732 - }, - { - "epoch": 0.6973891869955359, - "grad_norm": 2.0994493716286615, - "learning_rate": 8.85977045872573e-07, - "loss": 0.9805, - "step": 7733 - }, - { - "epoch": 0.6974793705190062, - "grad_norm": 3.590120821649055, - "learning_rate": 8.854919036787194e-07, - "loss": 0.9587, - "step": 7734 - }, - { - "epoch": 0.6975695540424764, - "grad_norm": 1.613115648540363, - "learning_rate": 8.850068565808417e-07, - "loss": 0.9448, - "step": 7735 - }, - { - "epoch": 0.6976597375659467, - "grad_norm": 1.7944826869347321, - "learning_rate": 8.845219046203271e-07, - "loss": 1.0268, - "step": 7736 - }, - { - "epoch": 0.6977499210894169, - "grad_norm": 1.550956965948246, - "learning_rate": 8.840370478385544e-07, - "loss": 0.9991, - "step": 7737 - }, - { - "epoch": 0.6978401046128873, - "grad_norm": 2.875709280343072, - "learning_rate": 8.83552286276894e-07, - "loss": 0.9916, - "step": 7738 - }, - { - "epoch": 0.6979302881363575, - "grad_norm": 1.6713904988956303, - "learning_rate": 8.830676199767095e-07, - "loss": 0.9898, - "step": 7739 - }, - { - "epoch": 0.6980204716598277, - "grad_norm": 1.4407328037795892, - "learning_rate": 8.825830489793527e-07, - "loss": 0.9581, - "step": 7740 - }, - { - "epoch": 0.698110655183298, - "grad_norm": 1.3966835573505811, - "learning_rate": 8.820985733261732e-07, - "loss": 0.9337, - "step": 7741 - }, - { - "epoch": 0.6982008387067683, - "grad_norm": 1.7570498019842329, - "learning_rate": 8.816141930585066e-07, - "loss": 0.8769, - "step": 7742 - }, - { - "epoch": 0.6982910222302385, - "grad_norm": 0.6752739032693663, - "learning_rate": 8.811299082176837e-07, - "loss": 0.8467, - "step": 7743 - }, - { - "epoch": 0.6983812057537088, - "grad_norm": 1.382729522456892, - "learning_rate": 8.806457188450265e-07, - "loss": 0.9443, - "step": 7744 - }, - { - "epoch": 0.6984713892771791, - "grad_norm": 1.4086797280711776, - "learning_rate": 8.801616249818487e-07, - "loss": 1.0083, - "step": 7745 - }, - { - "epoch": 0.6985615728006493, - "grad_norm": 1.5524947455374092, - "learning_rate": 8.796776266694564e-07, - "loss": 0.943, - "step": 7746 - }, - { - "epoch": 0.6986517563241196, - "grad_norm": 1.218523835936383, - "learning_rate": 8.79193723949145e-07, - "loss": 0.9214, - "step": 7747 - }, - { - "epoch": 0.6987419398475898, - "grad_norm": 3.818995211605997, - "learning_rate": 8.787099168622063e-07, - "loss": 0.8705, - "step": 7748 - }, - { - "epoch": 0.6988321233710602, - "grad_norm": 1.680746693771449, - "learning_rate": 8.782262054499199e-07, - "loss": 0.9064, - "step": 7749 - }, - { - "epoch": 0.6989223068945304, - "grad_norm": 1.260963190093563, - "learning_rate": 8.777425897535588e-07, - "loss": 1.0011, - "step": 7750 - }, - { - "epoch": 0.6990124904180006, - "grad_norm": 1.5321038648640384, - "learning_rate": 8.77259069814388e-07, - "loss": 0.9782, - "step": 7751 - }, - { - "epoch": 0.6991026739414709, - "grad_norm": 1.3364174410162892, - "learning_rate": 8.767756456736641e-07, - "loss": 0.9821, - "step": 7752 - }, - { - "epoch": 0.6991928574649412, - "grad_norm": 2.026757421950249, - "learning_rate": 8.762923173726358e-07, - "loss": 0.9016, - "step": 7753 - }, - { - "epoch": 0.6992830409884114, - "grad_norm": 1.5715392525820047, - "learning_rate": 8.758090849525428e-07, - "loss": 1.0724, - "step": 7754 - }, - { - "epoch": 0.6993732245118817, - "grad_norm": 1.7387040556288764, - "learning_rate": 8.753259484546174e-07, - "loss": 0.9143, - "step": 7755 - }, - { - "epoch": 0.6994634080353519, - "grad_norm": 1.4538265708641573, - "learning_rate": 8.748429079200841e-07, - "loss": 0.9409, - "step": 7756 - }, - { - "epoch": 0.6995535915588222, - "grad_norm": 1.5423327716710673, - "learning_rate": 8.743599633901575e-07, - "loss": 1.0311, - "step": 7757 - }, - { - "epoch": 0.6996437750822925, - "grad_norm": 1.933870916056887, - "learning_rate": 8.738771149060453e-07, - "loss": 0.9854, - "step": 7758 - }, - { - "epoch": 0.6997339586057627, - "grad_norm": 1.4030854540309265, - "learning_rate": 8.73394362508947e-07, - "loss": 0.9385, - "step": 7759 - }, - { - "epoch": 0.6998241421292329, - "grad_norm": 1.3473455983241818, - "learning_rate": 8.72911706240054e-07, - "loss": 0.933, - "step": 7760 - }, - { - "epoch": 0.6999143256527033, - "grad_norm": 2.2438182682288246, - "learning_rate": 8.724291461405493e-07, - "loss": 0.8529, - "step": 7761 - }, - { - "epoch": 0.7000045091761735, - "grad_norm": 1.423817743124873, - "learning_rate": 8.71946682251606e-07, - "loss": 0.9377, - "step": 7762 - }, - { - "epoch": 0.7000946926996438, - "grad_norm": 1.5319476192822283, - "learning_rate": 8.714643146143932e-07, - "loss": 0.9697, - "step": 7763 - }, - { - "epoch": 0.700184876223114, - "grad_norm": 1.3326989343520568, - "learning_rate": 8.709820432700663e-07, - "loss": 0.9304, - "step": 7764 - }, - { - "epoch": 0.7002750597465843, - "grad_norm": 1.7975702018982633, - "learning_rate": 8.704998682597784e-07, - "loss": 0.8909, - "step": 7765 - }, - { - "epoch": 0.7003652432700546, - "grad_norm": 1.337132817073188, - "learning_rate": 8.700177896246688e-07, - "loss": 0.9916, - "step": 7766 - }, - { - "epoch": 0.7004554267935248, - "grad_norm": 1.868017291591427, - "learning_rate": 8.695358074058721e-07, - "loss": 0.8983, - "step": 7767 - }, - { - "epoch": 0.7005456103169951, - "grad_norm": 1.9640696158997175, - "learning_rate": 8.690539216445136e-07, - "loss": 1.0108, - "step": 7768 - }, - { - "epoch": 0.7006357938404654, - "grad_norm": 2.498098890346114, - "learning_rate": 8.685721323817106e-07, - "loss": 0.8774, - "step": 7769 - }, - { - "epoch": 0.7007259773639356, - "grad_norm": 1.3979107168253884, - "learning_rate": 8.680904396585718e-07, - "loss": 1.0111, - "step": 7770 - }, - { - "epoch": 0.7008161608874058, - "grad_norm": 1.7493256223196563, - "learning_rate": 8.676088435161988e-07, - "loss": 0.9239, - "step": 7771 - }, - { - "epoch": 0.7009063444108762, - "grad_norm": 1.6084787780670864, - "learning_rate": 8.671273439956824e-07, - "loss": 0.9877, - "step": 7772 - }, - { - "epoch": 0.7009965279343464, - "grad_norm": 1.8056053040652609, - "learning_rate": 8.666459411381075e-07, - "loss": 0.9747, - "step": 7773 - }, - { - "epoch": 0.7010867114578166, - "grad_norm": 1.3007748348776684, - "learning_rate": 8.661646349845501e-07, - "loss": 0.9969, - "step": 7774 - }, - { - "epoch": 0.7011768949812869, - "grad_norm": 1.8299290811106672, - "learning_rate": 8.656834255760783e-07, - "loss": 1.0263, - "step": 7775 - }, - { - "epoch": 0.7012670785047572, - "grad_norm": 1.4023791550441194, - "learning_rate": 8.652023129537509e-07, - "loss": 1.0267, - "step": 7776 - }, - { - "epoch": 0.7013572620282275, - "grad_norm": 1.4094492230885456, - "learning_rate": 8.647212971586195e-07, - "loss": 1.0012, - "step": 7777 - }, - { - "epoch": 0.7014474455516977, - "grad_norm": 1.6361168822248078, - "learning_rate": 8.642403782317275e-07, - "loss": 0.931, - "step": 7778 - }, - { - "epoch": 0.7015376290751679, - "grad_norm": 1.5741558204359343, - "learning_rate": 8.637595562141075e-07, - "loss": 0.9784, - "step": 7779 - }, - { - "epoch": 0.7016278125986383, - "grad_norm": 1.2646408021909954, - "learning_rate": 8.632788311467889e-07, - "loss": 0.9413, - "step": 7780 - }, - { - "epoch": 0.7017179961221085, - "grad_norm": 1.221999897146767, - "learning_rate": 8.627982030707867e-07, - "loss": 0.9589, - "step": 7781 - }, - { - "epoch": 0.7018081796455787, - "grad_norm": 1.5216502342854938, - "learning_rate": 8.623176720271139e-07, - "loss": 1.0398, - "step": 7782 - }, - { - "epoch": 0.701898363169049, - "grad_norm": 1.7503665812602585, - "learning_rate": 8.618372380567696e-07, - "loss": 1.0452, - "step": 7783 - }, - { - "epoch": 0.7019885466925193, - "grad_norm": 1.6670124251618286, - "learning_rate": 8.613569012007478e-07, - "loss": 0.9272, - "step": 7784 - }, - { - "epoch": 0.7020787302159895, - "grad_norm": 2.546036752002533, - "learning_rate": 8.608766615000338e-07, - "loss": 1.0094, - "step": 7785 - }, - { - "epoch": 0.7021689137394598, - "grad_norm": 1.7867836910800694, - "learning_rate": 8.603965189956039e-07, - "loss": 0.9963, - "step": 7786 - }, - { - "epoch": 0.70225909726293, - "grad_norm": 2.3636772895817795, - "learning_rate": 8.599164737284276e-07, - "loss": 0.9723, - "step": 7787 - }, - { - "epoch": 0.7023492807864004, - "grad_norm": 4.08438390075601, - "learning_rate": 8.594365257394634e-07, - "loss": 0.9304, - "step": 7788 - }, - { - "epoch": 0.7024394643098706, - "grad_norm": 1.8930121869855152, - "learning_rate": 8.589566750696637e-07, - "loss": 0.9197, - "step": 7789 - }, - { - "epoch": 0.7025296478333408, - "grad_norm": 2.6608071224149437, - "learning_rate": 8.584769217599721e-07, - "loss": 0.913, - "step": 7790 - }, - { - "epoch": 0.7026198313568112, - "grad_norm": 1.6078363859813887, - "learning_rate": 8.579972658513239e-07, - "loss": 0.9229, - "step": 7791 - }, - { - "epoch": 0.7027100148802814, - "grad_norm": 2.677476154011059, - "learning_rate": 8.57517707384646e-07, - "loss": 0.9776, - "step": 7792 - }, - { - "epoch": 0.7028001984037516, - "grad_norm": 1.4467887968085738, - "learning_rate": 8.570382464008574e-07, - "loss": 1.0056, - "step": 7793 - }, - { - "epoch": 0.7028903819272219, - "grad_norm": 1.5488111151089516, - "learning_rate": 8.565588829408665e-07, - "loss": 0.9399, - "step": 7794 - }, - { - "epoch": 0.7029805654506922, - "grad_norm": 1.44986613740933, - "learning_rate": 8.560796170455782e-07, - "loss": 0.9525, - "step": 7795 - }, - { - "epoch": 0.7030707489741624, - "grad_norm": 1.5871878519825808, - "learning_rate": 8.556004487558828e-07, - "loss": 1.0043, - "step": 7796 - }, - { - "epoch": 0.7031609324976327, - "grad_norm": 1.3336546282289674, - "learning_rate": 8.55121378112669e-07, - "loss": 0.8848, - "step": 7797 - }, - { - "epoch": 0.7032511160211029, - "grad_norm": 1.4847856455524229, - "learning_rate": 8.546424051568111e-07, - "loss": 0.9509, - "step": 7798 - }, - { - "epoch": 0.7033412995445733, - "grad_norm": 1.8865714005770091, - "learning_rate": 8.541635299291785e-07, - "loss": 1.0196, - "step": 7799 - }, - { - "epoch": 0.7034314830680435, - "grad_norm": 0.5852411328262807, - "learning_rate": 8.536847524706317e-07, - "loss": 0.8527, - "step": 7800 - }, - { - "epoch": 0.7035216665915137, - "grad_norm": 1.7565922549567246, - "learning_rate": 8.532060728220225e-07, - "loss": 0.9099, - "step": 7801 - }, - { - "epoch": 0.703611850114984, - "grad_norm": 2.1093911629516757, - "learning_rate": 8.527274910241955e-07, - "loss": 0.988, - "step": 7802 - }, - { - "epoch": 0.7037020336384543, - "grad_norm": 1.7671678941686384, - "learning_rate": 8.522490071179833e-07, - "loss": 0.9704, - "step": 7803 - }, - { - "epoch": 0.7037922171619245, - "grad_norm": 1.2172445144326014, - "learning_rate": 8.517706211442159e-07, - "loss": 0.8453, - "step": 7804 - }, - { - "epoch": 0.7038824006853948, - "grad_norm": 1.4463031569778706, - "learning_rate": 8.512923331437097e-07, - "loss": 1.0532, - "step": 7805 - }, - { - "epoch": 0.703972584208865, - "grad_norm": 1.3065282205949236, - "learning_rate": 8.508141431572755e-07, - "loss": 0.9403, - "step": 7806 - }, - { - "epoch": 0.7040627677323353, - "grad_norm": 2.211802714326219, - "learning_rate": 8.503360512257152e-07, - "loss": 0.9647, - "step": 7807 - }, - { - "epoch": 0.7041529512558056, - "grad_norm": 1.647224305258612, - "learning_rate": 8.498580573898219e-07, - "loss": 0.9344, - "step": 7808 - }, - { - "epoch": 0.7042431347792758, - "grad_norm": 2.020373878815249, - "learning_rate": 8.493801616903813e-07, - "loss": 0.9798, - "step": 7809 - }, - { - "epoch": 0.704333318302746, - "grad_norm": 5.808999597035567, - "learning_rate": 8.489023641681705e-07, - "loss": 0.9879, - "step": 7810 - }, - { - "epoch": 0.7044235018262164, - "grad_norm": 1.328764682878848, - "learning_rate": 8.484246648639555e-07, - "loss": 1.0207, - "step": 7811 - }, - { - "epoch": 0.7045136853496866, - "grad_norm": 1.2947443729682735, - "learning_rate": 8.479470638184994e-07, - "loss": 0.8461, - "step": 7812 - }, - { - "epoch": 0.7046038688731568, - "grad_norm": 2.1126186721243836, - "learning_rate": 8.474695610725513e-07, - "loss": 0.8972, - "step": 7813 - }, - { - "epoch": 0.7046940523966272, - "grad_norm": 1.2735399076321823, - "learning_rate": 8.469921566668552e-07, - "loss": 1.0453, - "step": 7814 - }, - { - "epoch": 0.7047842359200974, - "grad_norm": 1.3544355516756357, - "learning_rate": 8.46514850642146e-07, - "loss": 1.0255, - "step": 7815 - }, - { - "epoch": 0.7048744194435677, - "grad_norm": 1.565292380933424, - "learning_rate": 8.460376430391499e-07, - "loss": 0.9623, - "step": 7816 - }, - { - "epoch": 0.7049646029670379, - "grad_norm": 1.1581854730454229, - "learning_rate": 8.455605338985858e-07, - "loss": 0.9948, - "step": 7817 - }, - { - "epoch": 0.7050547864905082, - "grad_norm": 1.6869274156984233, - "learning_rate": 8.45083523261161e-07, - "loss": 0.833, - "step": 7818 - }, - { - "epoch": 0.7051449700139785, - "grad_norm": 2.470554127177241, - "learning_rate": 8.446066111675796e-07, - "loss": 0.9806, - "step": 7819 - }, - { - "epoch": 0.7052351535374487, - "grad_norm": 1.8261008508915801, - "learning_rate": 8.441297976585314e-07, - "loss": 0.8677, - "step": 7820 - }, - { - "epoch": 0.7053253370609189, - "grad_norm": 1.3672659279262949, - "learning_rate": 8.436530827747037e-07, - "loss": 1.0506, - "step": 7821 - }, - { - "epoch": 0.7054155205843893, - "grad_norm": 1.6874431317955054, - "learning_rate": 8.431764665567704e-07, - "loss": 1.1038, - "step": 7822 - }, - { - "epoch": 0.7055057041078595, - "grad_norm": 1.518935530519325, - "learning_rate": 8.426999490453996e-07, - "loss": 0.9116, - "step": 7823 - }, - { - "epoch": 0.7055958876313297, - "grad_norm": 1.5054883685414913, - "learning_rate": 8.422235302812504e-07, - "loss": 0.9822, - "step": 7824 - }, - { - "epoch": 0.7056860711548, - "grad_norm": 1.6676747690604177, - "learning_rate": 8.417472103049734e-07, - "loss": 1.0416, - "step": 7825 - }, - { - "epoch": 0.7057762546782703, - "grad_norm": 1.472419699534045, - "learning_rate": 8.412709891572112e-07, - "loss": 1.0032, - "step": 7826 - }, - { - "epoch": 0.7058664382017406, - "grad_norm": 1.664736186025065, - "learning_rate": 8.407948668785978e-07, - "loss": 1.0031, - "step": 7827 - }, - { - "epoch": 0.7059566217252108, - "grad_norm": 1.2767002456289518, - "learning_rate": 8.403188435097576e-07, - "loss": 0.9621, - "step": 7828 - }, - { - "epoch": 0.706046805248681, - "grad_norm": 1.390350443278754, - "learning_rate": 8.398429190913081e-07, - "loss": 1.0362, - "step": 7829 - }, - { - "epoch": 0.7061369887721514, - "grad_norm": 1.483553287542908, - "learning_rate": 8.393670936638578e-07, - "loss": 0.9748, - "step": 7830 - }, - { - "epoch": 0.7062271722956216, - "grad_norm": 1.578269876307508, - "learning_rate": 8.388913672680067e-07, - "loss": 0.9667, - "step": 7831 - }, - { - "epoch": 0.7063173558190918, - "grad_norm": 1.8094330852455083, - "learning_rate": 8.384157399443472e-07, - "loss": 0.921, - "step": 7832 - }, - { - "epoch": 0.7064075393425621, - "grad_norm": 1.707768331307305, - "learning_rate": 8.379402117334601e-07, - "loss": 1.0006, - "step": 7833 - }, - { - "epoch": 0.7064977228660324, - "grad_norm": 2.0598275180938996, - "learning_rate": 8.374647826759232e-07, - "loss": 0.9685, - "step": 7834 - }, - { - "epoch": 0.7065879063895026, - "grad_norm": 1.4283209768507874, - "learning_rate": 8.369894528122998e-07, - "loss": 0.9697, - "step": 7835 - }, - { - "epoch": 0.7066780899129729, - "grad_norm": 1.4989771534487373, - "learning_rate": 8.365142221831505e-07, - "loss": 0.8389, - "step": 7836 - }, - { - "epoch": 0.7067682734364432, - "grad_norm": 1.7222684739509564, - "learning_rate": 8.360390908290222e-07, - "loss": 0.9365, - "step": 7837 - }, - { - "epoch": 0.7068584569599135, - "grad_norm": 1.7364162083737307, - "learning_rate": 8.355640587904569e-07, - "loss": 0.9537, - "step": 7838 - }, - { - "epoch": 0.7069486404833837, - "grad_norm": 1.4888641737710135, - "learning_rate": 8.350891261079866e-07, - "loss": 0.9827, - "step": 7839 - }, - { - "epoch": 0.7070388240068539, - "grad_norm": 1.651922366112315, - "learning_rate": 8.346142928221356e-07, - "loss": 1.0152, - "step": 7840 - }, - { - "epoch": 0.7071290075303243, - "grad_norm": 2.4478047013886006, - "learning_rate": 8.341395589734189e-07, - "loss": 0.8201, - "step": 7841 - }, - { - "epoch": 0.7072191910537945, - "grad_norm": 2.089989202553633, - "learning_rate": 8.336649246023433e-07, - "loss": 0.9206, - "step": 7842 - }, - { - "epoch": 0.7073093745772647, - "grad_norm": 1.2996217067214848, - "learning_rate": 8.331903897494084e-07, - "loss": 0.8745, - "step": 7843 - }, - { - "epoch": 0.707399558100735, - "grad_norm": 1.6722652657180326, - "learning_rate": 8.327159544551024e-07, - "loss": 0.9805, - "step": 7844 - }, - { - "epoch": 0.7074897416242053, - "grad_norm": 1.703855655274061, - "learning_rate": 8.322416187599073e-07, - "loss": 0.9257, - "step": 7845 - }, - { - "epoch": 0.7075799251476755, - "grad_norm": 1.5852644671429852, - "learning_rate": 8.317673827042963e-07, - "loss": 0.9957, - "step": 7846 - }, - { - "epoch": 0.7076701086711458, - "grad_norm": 1.747128719677677, - "learning_rate": 8.312932463287339e-07, - "loss": 0.9609, - "step": 7847 - }, - { - "epoch": 0.707760292194616, - "grad_norm": 1.6579745688932632, - "learning_rate": 8.308192096736759e-07, - "loss": 0.9341, - "step": 7848 - }, - { - "epoch": 0.7078504757180863, - "grad_norm": 1.8225391178616344, - "learning_rate": 8.303452727795703e-07, - "loss": 0.9494, - "step": 7849 - }, - { - "epoch": 0.7079406592415566, - "grad_norm": 1.685900459907931, - "learning_rate": 8.298714356868542e-07, - "loss": 1.0437, - "step": 7850 - }, - { - "epoch": 0.7080308427650268, - "grad_norm": 1.6965720424359145, - "learning_rate": 8.293976984359605e-07, - "loss": 0.9859, - "step": 7851 - }, - { - "epoch": 0.708121026288497, - "grad_norm": 1.6328912013310994, - "learning_rate": 8.289240610673092e-07, - "loss": 0.9701, - "step": 7852 - }, - { - "epoch": 0.7082112098119674, - "grad_norm": 1.2914874762660027, - "learning_rate": 8.284505236213144e-07, - "loss": 0.9994, - "step": 7853 - }, - { - "epoch": 0.7083013933354376, - "grad_norm": 1.5092881645671221, - "learning_rate": 8.279770861383806e-07, - "loss": 0.9613, - "step": 7854 - }, - { - "epoch": 0.7083915768589079, - "grad_norm": 1.6908433783721473, - "learning_rate": 8.275037486589042e-07, - "loss": 0.9622, - "step": 7855 - }, - { - "epoch": 0.7084817603823781, - "grad_norm": 0.7057354470474425, - "learning_rate": 8.270305112232739e-07, - "loss": 0.8229, - "step": 7856 - }, - { - "epoch": 0.7085719439058484, - "grad_norm": 1.5151476550775544, - "learning_rate": 8.265573738718665e-07, - "loss": 0.963, - "step": 7857 - }, - { - "epoch": 0.7086621274293187, - "grad_norm": 1.4916100308341598, - "learning_rate": 8.260843366450559e-07, - "loss": 0.9317, - "step": 7858 - }, - { - "epoch": 0.7087523109527889, - "grad_norm": 1.7663404652401806, - "learning_rate": 8.256113995832017e-07, - "loss": 0.971, - "step": 7859 - }, - { - "epoch": 0.7088424944762591, - "grad_norm": 1.267753130524324, - "learning_rate": 8.251385627266583e-07, - "loss": 1.014, - "step": 7860 - }, - { - "epoch": 0.7089326779997295, - "grad_norm": 1.6003089278720766, - "learning_rate": 8.24665826115771e-07, - "loss": 0.9712, - "step": 7861 - }, - { - "epoch": 0.7090228615231997, - "grad_norm": 1.3698912996481007, - "learning_rate": 8.241931897908763e-07, - "loss": 0.986, - "step": 7862 - }, - { - "epoch": 0.7091130450466699, - "grad_norm": 1.5590483879571606, - "learning_rate": 8.237206537923016e-07, - "loss": 0.9954, - "step": 7863 - }, - { - "epoch": 0.7092032285701403, - "grad_norm": 1.8526007079473867, - "learning_rate": 8.232482181603671e-07, - "loss": 0.9585, - "step": 7864 - }, - { - "epoch": 0.7092934120936105, - "grad_norm": 1.407703411520306, - "learning_rate": 8.227758829353828e-07, - "loss": 0.9425, - "step": 7865 - }, - { - "epoch": 0.7093835956170808, - "grad_norm": 1.4498813481948734, - "learning_rate": 8.223036481576522e-07, - "loss": 0.9421, - "step": 7866 - }, - { - "epoch": 0.709473779140551, - "grad_norm": 3.8755212248682493, - "learning_rate": 8.218315138674672e-07, - "loss": 0.9743, - "step": 7867 - }, - { - "epoch": 0.7095639626640213, - "grad_norm": 0.6489611200565697, - "learning_rate": 8.21359480105114e-07, - "loss": 0.8267, - "step": 7868 - }, - { - "epoch": 0.7096541461874916, - "grad_norm": 1.4905136866563364, - "learning_rate": 8.208875469108689e-07, - "loss": 1.0012, - "step": 7869 - }, - { - "epoch": 0.7097443297109618, - "grad_norm": 2.0290534247015204, - "learning_rate": 8.204157143249997e-07, - "loss": 0.9578, - "step": 7870 - }, - { - "epoch": 0.709834513234432, - "grad_norm": 1.3793499370557099, - "learning_rate": 8.199439823877668e-07, - "loss": 0.9454, - "step": 7871 - }, - { - "epoch": 0.7099246967579024, - "grad_norm": 1.5119427655453066, - "learning_rate": 8.194723511394186e-07, - "loss": 0.959, - "step": 7872 - }, - { - "epoch": 0.7100148802813726, - "grad_norm": 1.5142802929202814, - "learning_rate": 8.190008206202002e-07, - "loss": 0.9228, - "step": 7873 - }, - { - "epoch": 0.7101050638048428, - "grad_norm": 1.6169392860588225, - "learning_rate": 8.185293908703423e-07, - "loss": 1.0121, - "step": 7874 - }, - { - "epoch": 0.7101952473283131, - "grad_norm": 1.3237352416861112, - "learning_rate": 8.180580619300727e-07, - "loss": 0.936, - "step": 7875 - }, - { - "epoch": 0.7102854308517834, - "grad_norm": 1.186914450184161, - "learning_rate": 8.175868338396057e-07, - "loss": 0.93, - "step": 7876 - }, - { - "epoch": 0.7103756143752537, - "grad_norm": 1.690652868919047, - "learning_rate": 8.171157066391499e-07, - "loss": 0.9477, - "step": 7877 - }, - { - "epoch": 0.7104657978987239, - "grad_norm": 1.554798218124935, - "learning_rate": 8.166446803689045e-07, - "loss": 0.9937, - "step": 7878 - }, - { - "epoch": 0.7105559814221941, - "grad_norm": 0.6521362157306498, - "learning_rate": 8.161737550690595e-07, - "loss": 0.8393, - "step": 7879 - }, - { - "epoch": 0.7106461649456645, - "grad_norm": 1.5632552801031134, - "learning_rate": 8.157029307797976e-07, - "loss": 1.0017, - "step": 7880 - }, - { - "epoch": 0.7107363484691347, - "grad_norm": 1.2701666547659056, - "learning_rate": 8.152322075412925e-07, - "loss": 1.0216, - "step": 7881 - }, - { - "epoch": 0.7108265319926049, - "grad_norm": 1.399711028998054, - "learning_rate": 8.147615853937073e-07, - "loss": 0.9239, - "step": 7882 - }, - { - "epoch": 0.7109167155160752, - "grad_norm": 1.3999775765530602, - "learning_rate": 8.142910643771992e-07, - "loss": 0.896, - "step": 7883 - }, - { - "epoch": 0.7110068990395455, - "grad_norm": 1.648147557508924, - "learning_rate": 8.138206445319152e-07, - "loss": 0.9287, - "step": 7884 - }, - { - "epoch": 0.7110970825630157, - "grad_norm": 1.6385905412715467, - "learning_rate": 8.133503258979944e-07, - "loss": 0.8702, - "step": 7885 - }, - { - "epoch": 0.711187266086486, - "grad_norm": 1.4516558860410622, - "learning_rate": 8.12880108515567e-07, - "loss": 0.8912, - "step": 7886 - }, - { - "epoch": 0.7112774496099563, - "grad_norm": 1.4030804837308135, - "learning_rate": 8.124099924247543e-07, - "loss": 0.9371, - "step": 7887 - }, - { - "epoch": 0.7113676331334265, - "grad_norm": 1.4401904098136347, - "learning_rate": 8.119399776656701e-07, - "loss": 0.8829, - "step": 7888 - }, - { - "epoch": 0.7114578166568968, - "grad_norm": 1.4018759624666677, - "learning_rate": 8.114700642784167e-07, - "loss": 0.9701, - "step": 7889 - }, - { - "epoch": 0.711548000180367, - "grad_norm": 1.2510408359665643, - "learning_rate": 8.110002523030921e-07, - "loss": 1.0229, - "step": 7890 - }, - { - "epoch": 0.7116381837038374, - "grad_norm": 1.2966279863179546, - "learning_rate": 8.105305417797808e-07, - "loss": 0.9974, - "step": 7891 - }, - { - "epoch": 0.7117283672273076, - "grad_norm": 1.4664532986685608, - "learning_rate": 8.100609327485635e-07, - "loss": 1.0049, - "step": 7892 - }, - { - "epoch": 0.7118185507507778, - "grad_norm": 1.666668923694354, - "learning_rate": 8.095914252495082e-07, - "loss": 1.0262, - "step": 7893 - }, - { - "epoch": 0.7119087342742481, - "grad_norm": 1.4991521028352335, - "learning_rate": 8.091220193226762e-07, - "loss": 0.9387, - "step": 7894 - }, - { - "epoch": 0.7119989177977184, - "grad_norm": 1.2457563846431414, - "learning_rate": 8.0865271500812e-07, - "loss": 0.9533, - "step": 7895 - }, - { - "epoch": 0.7120891013211886, - "grad_norm": 1.4170207908682673, - "learning_rate": 8.081835123458831e-07, - "loss": 1.0145, - "step": 7896 - }, - { - "epoch": 0.7121792848446589, - "grad_norm": 1.4446561405541867, - "learning_rate": 8.077144113760013e-07, - "loss": 0.9572, - "step": 7897 - }, - { - "epoch": 0.7122694683681291, - "grad_norm": 1.2933278118258198, - "learning_rate": 8.072454121384995e-07, - "loss": 0.9469, - "step": 7898 - }, - { - "epoch": 0.7123596518915994, - "grad_norm": 1.2359260278837287, - "learning_rate": 8.067765146733958e-07, - "loss": 1.0116, - "step": 7899 - }, - { - "epoch": 0.7124498354150697, - "grad_norm": 1.560709805153248, - "learning_rate": 8.063077190206993e-07, - "loss": 1.0437, - "step": 7900 - }, - { - "epoch": 0.7125400189385399, - "grad_norm": 0.6239880953288953, - "learning_rate": 8.058390252204101e-07, - "loss": 0.7468, - "step": 7901 - }, - { - "epoch": 0.7126302024620101, - "grad_norm": 1.2863209093750465, - "learning_rate": 8.0537043331252e-07, - "loss": 0.9643, - "step": 7902 - }, - { - "epoch": 0.7127203859854805, - "grad_norm": 2.183578027112446, - "learning_rate": 8.049019433370121e-07, - "loss": 0.9052, - "step": 7903 - }, - { - "epoch": 0.7128105695089507, - "grad_norm": 1.4401503469870358, - "learning_rate": 8.044335553338588e-07, - "loss": 0.9705, - "step": 7904 - }, - { - "epoch": 0.712900753032421, - "grad_norm": 1.4603382044170135, - "learning_rate": 8.039652693430281e-07, - "loss": 0.9774, - "step": 7905 - }, - { - "epoch": 0.7129909365558912, - "grad_norm": 1.4573155348238556, - "learning_rate": 8.034970854044742e-07, - "loss": 0.9477, - "step": 7906 - }, - { - "epoch": 0.7130811200793615, - "grad_norm": 1.4688039729164346, - "learning_rate": 8.03029003558148e-07, - "loss": 1.0691, - "step": 7907 - }, - { - "epoch": 0.7131713036028318, - "grad_norm": 1.7671013795086647, - "learning_rate": 8.025610238439864e-07, - "loss": 1.0324, - "step": 7908 - }, - { - "epoch": 0.713261487126302, - "grad_norm": 2.2869574857162056, - "learning_rate": 8.020931463019207e-07, - "loss": 1.0439, - "step": 7909 - }, - { - "epoch": 0.7133516706497723, - "grad_norm": 1.8968147280825554, - "learning_rate": 8.016253709718732e-07, - "loss": 0.9133, - "step": 7910 - }, - { - "epoch": 0.7134418541732426, - "grad_norm": 5.561455274906045, - "learning_rate": 8.011576978937567e-07, - "loss": 0.8747, - "step": 7911 - }, - { - "epoch": 0.7135320376967128, - "grad_norm": 1.7821238198124918, - "learning_rate": 8.006901271074764e-07, - "loss": 0.9734, - "step": 7912 - }, - { - "epoch": 0.713622221220183, - "grad_norm": 1.539882693095839, - "learning_rate": 8.002226586529261e-07, - "loss": 1.0089, - "step": 7913 - }, - { - "epoch": 0.7137124047436534, - "grad_norm": 1.3984217722770216, - "learning_rate": 7.997552925699956e-07, - "loss": 1.051, - "step": 7914 - }, - { - "epoch": 0.7138025882671236, - "grad_norm": 2.3559933105205393, - "learning_rate": 7.992880288985606e-07, - "loss": 1.0578, - "step": 7915 - }, - { - "epoch": 0.7138927717905938, - "grad_norm": 1.4139830003172447, - "learning_rate": 7.988208676784918e-07, - "loss": 0.9763, - "step": 7916 - }, - { - "epoch": 0.7139829553140641, - "grad_norm": 1.6612927664252852, - "learning_rate": 7.983538089496497e-07, - "loss": 0.9675, - "step": 7917 - }, - { - "epoch": 0.7140731388375344, - "grad_norm": 1.7001808687371112, - "learning_rate": 7.978868527518864e-07, - "loss": 0.9297, - "step": 7918 - }, - { - "epoch": 0.7141633223610047, - "grad_norm": 1.6519572931085096, - "learning_rate": 7.974199991250455e-07, - "loss": 0.9446, - "step": 7919 - }, - { - "epoch": 0.7142535058844749, - "grad_norm": 1.4450184290941597, - "learning_rate": 7.969532481089616e-07, - "loss": 1.0666, - "step": 7920 - }, - { - "epoch": 0.7143436894079451, - "grad_norm": 1.605433108637228, - "learning_rate": 7.964865997434589e-07, - "loss": 0.9808, - "step": 7921 - }, - { - "epoch": 0.7144338729314155, - "grad_norm": 1.4814461171821316, - "learning_rate": 7.96020054068357e-07, - "loss": 1.0452, - "step": 7922 - }, - { - "epoch": 0.7145240564548857, - "grad_norm": 1.1118476605283987, - "learning_rate": 7.95553611123462e-07, - "loss": 0.93, - "step": 7923 - }, - { - "epoch": 0.7146142399783559, - "grad_norm": 1.6983059941874987, - "learning_rate": 7.950872709485741e-07, - "loss": 1.1449, - "step": 7924 - }, - { - "epoch": 0.7147044235018262, - "grad_norm": 2.0692403068246317, - "learning_rate": 7.946210335834842e-07, - "loss": 1.026, - "step": 7925 - }, - { - "epoch": 0.7147946070252965, - "grad_norm": 1.2553039556568149, - "learning_rate": 7.94154899067974e-07, - "loss": 0.8929, - "step": 7926 - }, - { - "epoch": 0.7148847905487667, - "grad_norm": 1.532904062443094, - "learning_rate": 7.936888674418177e-07, - "loss": 0.8942, - "step": 7927 - }, - { - "epoch": 0.714974974072237, - "grad_norm": 1.65648534290336, - "learning_rate": 7.932229387447771e-07, - "loss": 0.976, - "step": 7928 - }, - { - "epoch": 0.7150651575957072, - "grad_norm": 1.7081912919826905, - "learning_rate": 7.927571130166109e-07, - "loss": 0.9557, - "step": 7929 - }, - { - "epoch": 0.7151553411191776, - "grad_norm": 0.6453917567833449, - "learning_rate": 7.922913902970632e-07, - "loss": 0.8255, - "step": 7930 - }, - { - "epoch": 0.7152455246426478, - "grad_norm": 3.0822087032544503, - "learning_rate": 7.918257706258744e-07, - "loss": 0.9689, - "step": 7931 - }, - { - "epoch": 0.715335708166118, - "grad_norm": 1.6238258227822266, - "learning_rate": 7.913602540427724e-07, - "loss": 0.9783, - "step": 7932 - }, - { - "epoch": 0.7154258916895884, - "grad_norm": 1.4150675368619743, - "learning_rate": 7.908948405874775e-07, - "loss": 0.9623, - "step": 7933 - }, - { - "epoch": 0.7155160752130586, - "grad_norm": 1.607579683550661, - "learning_rate": 7.904295302997019e-07, - "loss": 0.9021, - "step": 7934 - }, - { - "epoch": 0.7156062587365288, - "grad_norm": 1.293300159772832, - "learning_rate": 7.899643232191484e-07, - "loss": 0.9872, - "step": 7935 - }, - { - "epoch": 0.7156964422599991, - "grad_norm": 1.3337717179994366, - "learning_rate": 7.894992193855108e-07, - "loss": 0.9482, - "step": 7936 - }, - { - "epoch": 0.7157866257834694, - "grad_norm": 1.3507677508904716, - "learning_rate": 7.890342188384751e-07, - "loss": 0.9138, - "step": 7937 - }, - { - "epoch": 0.7158768093069396, - "grad_norm": 1.4070417930127541, - "learning_rate": 7.885693216177165e-07, - "loss": 0.9129, - "step": 7938 - }, - { - "epoch": 0.7159669928304099, - "grad_norm": 1.3237528923946837, - "learning_rate": 7.88104527762903e-07, - "loss": 0.9628, - "step": 7939 - }, - { - "epoch": 0.7160571763538801, - "grad_norm": 1.5905702322953663, - "learning_rate": 7.876398373136936e-07, - "loss": 0.9284, - "step": 7940 - }, - { - "epoch": 0.7161473598773505, - "grad_norm": 1.3661359394203387, - "learning_rate": 7.87175250309738e-07, - "loss": 0.9702, - "step": 7941 - }, - { - "epoch": 0.7162375434008207, - "grad_norm": 2.722501512287095, - "learning_rate": 7.867107667906785e-07, - "loss": 1.0386, - "step": 7942 - }, - { - "epoch": 0.7163277269242909, - "grad_norm": 1.619029373746001, - "learning_rate": 7.862463867961446e-07, - "loss": 0.9666, - "step": 7943 - }, - { - "epoch": 0.7164179104477612, - "grad_norm": 1.2945412469085371, - "learning_rate": 7.857821103657632e-07, - "loss": 1.0318, - "step": 7944 - }, - { - "epoch": 0.7165080939712315, - "grad_norm": 1.3051680176829052, - "learning_rate": 7.853179375391459e-07, - "loss": 0.9934, - "step": 7945 - }, - { - "epoch": 0.7165982774947017, - "grad_norm": 1.3951941872530338, - "learning_rate": 7.848538683559012e-07, - "loss": 0.9554, - "step": 7946 - }, - { - "epoch": 0.716688461018172, - "grad_norm": 1.417871393061431, - "learning_rate": 7.843899028556238e-07, - "loss": 1.0093, - "step": 7947 - }, - { - "epoch": 0.7167786445416422, - "grad_norm": 1.2835203385445038, - "learning_rate": 7.839260410779029e-07, - "loss": 0.9656, - "step": 7948 - }, - { - "epoch": 0.7168688280651125, - "grad_norm": 1.2333400105390138, - "learning_rate": 7.834622830623175e-07, - "loss": 0.988, - "step": 7949 - }, - { - "epoch": 0.7169590115885828, - "grad_norm": 1.7466966241106918, - "learning_rate": 7.82998628848438e-07, - "loss": 0.941, - "step": 7950 - }, - { - "epoch": 0.717049195112053, - "grad_norm": 1.26146437971376, - "learning_rate": 7.825350784758261e-07, - "loss": 0.9649, - "step": 7951 - }, - { - "epoch": 0.7171393786355232, - "grad_norm": 1.7458038430769356, - "learning_rate": 7.820716319840342e-07, - "loss": 0.9279, - "step": 7952 - }, - { - "epoch": 0.7172295621589936, - "grad_norm": 1.4854805292826316, - "learning_rate": 7.816082894126069e-07, - "loss": 1.0441, - "step": 7953 - }, - { - "epoch": 0.7173197456824638, - "grad_norm": 1.5752611519001511, - "learning_rate": 7.811450508010778e-07, - "loss": 0.9383, - "step": 7954 - }, - { - "epoch": 0.717409929205934, - "grad_norm": 1.358487727550999, - "learning_rate": 7.806819161889737e-07, - "loss": 0.9435, - "step": 7955 - }, - { - "epoch": 0.7175001127294043, - "grad_norm": 1.37684273784587, - "learning_rate": 7.802188856158119e-07, - "loss": 0.9969, - "step": 7956 - }, - { - "epoch": 0.7175902962528746, - "grad_norm": 1.7028912025055507, - "learning_rate": 7.797559591211002e-07, - "loss": 0.9529, - "step": 7957 - }, - { - "epoch": 0.7176804797763449, - "grad_norm": 1.3306091861672085, - "learning_rate": 7.79293136744339e-07, - "loss": 0.9291, - "step": 7958 - }, - { - "epoch": 0.7177706632998151, - "grad_norm": 1.7021284074857772, - "learning_rate": 7.788304185250185e-07, - "loss": 0.9691, - "step": 7959 - }, - { - "epoch": 0.7178608468232854, - "grad_norm": 1.4991818422005674, - "learning_rate": 7.78367804502619e-07, - "loss": 0.9985, - "step": 7960 - }, - { - "epoch": 0.7179510303467557, - "grad_norm": 1.7067057214058228, - "learning_rate": 7.779052947166156e-07, - "loss": 0.979, - "step": 7961 - }, - { - "epoch": 0.7180412138702259, - "grad_norm": 1.6186405601475307, - "learning_rate": 7.774428892064697e-07, - "loss": 0.9231, - "step": 7962 - }, - { - "epoch": 0.7181313973936961, - "grad_norm": 1.2606655484639546, - "learning_rate": 7.769805880116391e-07, - "loss": 0.9562, - "step": 7963 - }, - { - "epoch": 0.7182215809171665, - "grad_norm": 2.117163175886563, - "learning_rate": 7.765183911715678e-07, - "loss": 0.9444, - "step": 7964 - }, - { - "epoch": 0.7183117644406367, - "grad_norm": 1.9709194884982522, - "learning_rate": 7.760562987256933e-07, - "loss": 0.8673, - "step": 7965 - }, - { - "epoch": 0.718401947964107, - "grad_norm": 1.7134133458039544, - "learning_rate": 7.755943107134444e-07, - "loss": 0.9721, - "step": 7966 - }, - { - "epoch": 0.7184921314875772, - "grad_norm": 0.6864466617462213, - "learning_rate": 7.751324271742401e-07, - "loss": 0.8604, - "step": 7967 - }, - { - "epoch": 0.7185823150110475, - "grad_norm": 1.3846000281831352, - "learning_rate": 7.746706481474916e-07, - "loss": 0.9856, - "step": 7968 - }, - { - "epoch": 0.7186724985345178, - "grad_norm": 0.6199610235668327, - "learning_rate": 7.742089736725992e-07, - "loss": 0.7935, - "step": 7969 - }, - { - "epoch": 0.718762682057988, - "grad_norm": 1.658278608457524, - "learning_rate": 7.737474037889559e-07, - "loss": 0.8889, - "step": 7970 - }, - { - "epoch": 0.7188528655814582, - "grad_norm": 1.3634844959106371, - "learning_rate": 7.732859385359458e-07, - "loss": 0.9375, - "step": 7971 - }, - { - "epoch": 0.7189430491049286, - "grad_norm": 1.7072822159355652, - "learning_rate": 7.728245779529434e-07, - "loss": 0.888, - "step": 7972 - }, - { - "epoch": 0.7190332326283988, - "grad_norm": 1.513864970042564, - "learning_rate": 7.723633220793146e-07, - "loss": 0.9949, - "step": 7973 - }, - { - "epoch": 0.719123416151869, - "grad_norm": 2.0040891091718502, - "learning_rate": 7.719021709544162e-07, - "loss": 0.8552, - "step": 7974 - }, - { - "epoch": 0.7192135996753393, - "grad_norm": 1.425360463378132, - "learning_rate": 7.714411246175964e-07, - "loss": 1.0539, - "step": 7975 - }, - { - "epoch": 0.7193037831988096, - "grad_norm": 1.4164055228297223, - "learning_rate": 7.709801831081946e-07, - "loss": 0.8832, - "step": 7976 - }, - { - "epoch": 0.7193939667222798, - "grad_norm": 1.8371953023922307, - "learning_rate": 7.705193464655391e-07, - "loss": 0.9291, - "step": 7977 - }, - { - "epoch": 0.7194841502457501, - "grad_norm": 1.1675627139807891, - "learning_rate": 7.700586147289534e-07, - "loss": 0.9429, - "step": 7978 - }, - { - "epoch": 0.7195743337692203, - "grad_norm": 1.3321870505761488, - "learning_rate": 7.695979879377481e-07, - "loss": 0.9247, - "step": 7979 - }, - { - "epoch": 0.7196645172926907, - "grad_norm": 1.326559288281618, - "learning_rate": 7.691374661312266e-07, - "loss": 0.984, - "step": 7980 - }, - { - "epoch": 0.7197547008161609, - "grad_norm": 1.3525906659689926, - "learning_rate": 7.686770493486834e-07, - "loss": 0.9442, - "step": 7981 - }, - { - "epoch": 0.7198448843396311, - "grad_norm": 1.3958793983639548, - "learning_rate": 7.68216737629404e-07, - "loss": 0.9066, - "step": 7982 - }, - { - "epoch": 0.7199350678631015, - "grad_norm": 1.2974984841898332, - "learning_rate": 7.67756531012665e-07, - "loss": 1.0329, - "step": 7983 - }, - { - "epoch": 0.7200252513865717, - "grad_norm": 1.5541060237285942, - "learning_rate": 7.67296429537732e-07, - "loss": 1.0176, - "step": 7984 - }, - { - "epoch": 0.7201154349100419, - "grad_norm": 1.4066337485702816, - "learning_rate": 7.668364332438661e-07, - "loss": 1.0298, - "step": 7985 - }, - { - "epoch": 0.7202056184335122, - "grad_norm": 1.3741167873184688, - "learning_rate": 7.663765421703145e-07, - "loss": 0.9733, - "step": 7986 - }, - { - "epoch": 0.7202958019569825, - "grad_norm": 1.5180816404229174, - "learning_rate": 7.659167563563187e-07, - "loss": 0.9015, - "step": 7987 - }, - { - "epoch": 0.7203859854804527, - "grad_norm": 1.344670667851033, - "learning_rate": 7.654570758411096e-07, - "loss": 0.9867, - "step": 7988 - }, - { - "epoch": 0.720476169003923, - "grad_norm": 1.6224123084965136, - "learning_rate": 7.649975006639103e-07, - "loss": 1.032, - "step": 7989 - }, - { - "epoch": 0.7205663525273932, - "grad_norm": 0.644870091004976, - "learning_rate": 7.645380308639337e-07, - "loss": 0.798, - "step": 7990 - }, - { - "epoch": 0.7206565360508635, - "grad_norm": 1.552058004219531, - "learning_rate": 7.640786664803853e-07, - "loss": 1.0165, - "step": 7991 - }, - { - "epoch": 0.7207467195743338, - "grad_norm": 1.2665724798788653, - "learning_rate": 7.636194075524587e-07, - "loss": 0.8931, - "step": 7992 - }, - { - "epoch": 0.720836903097804, - "grad_norm": 1.3204434121142592, - "learning_rate": 7.631602541193429e-07, - "loss": 0.926, - "step": 7993 - }, - { - "epoch": 0.7209270866212742, - "grad_norm": 1.595024197912072, - "learning_rate": 7.627012062202132e-07, - "loss": 1.0289, - "step": 7994 - }, - { - "epoch": 0.7210172701447446, - "grad_norm": 2.209987153063411, - "learning_rate": 7.622422638942391e-07, - "loss": 0.9633, - "step": 7995 - }, - { - "epoch": 0.7211074536682148, - "grad_norm": 1.4686932654278797, - "learning_rate": 7.617834271805801e-07, - "loss": 1.0307, - "step": 7996 - }, - { - "epoch": 0.7211976371916851, - "grad_norm": 0.6485534931905487, - "learning_rate": 7.613246961183863e-07, - "loss": 0.8063, - "step": 7997 - }, - { - "epoch": 0.7212878207151553, - "grad_norm": 1.4918810944258913, - "learning_rate": 7.608660707468002e-07, - "loss": 0.928, - "step": 7998 - }, - { - "epoch": 0.7213780042386256, - "grad_norm": 1.4921068898741592, - "learning_rate": 7.604075511049522e-07, - "loss": 0.8596, - "step": 7999 - }, - { - "epoch": 0.7214681877620959, - "grad_norm": 1.446640229591546, - "learning_rate": 7.599491372319682e-07, - "loss": 0.9097, - "step": 8000 - }, - { - "epoch": 0.7215583712855661, - "grad_norm": 1.2993510864171067, - "learning_rate": 7.594908291669601e-07, - "loss": 0.9654, - "step": 8001 - }, - { - "epoch": 0.7216485548090363, - "grad_norm": 2.1001329062048346, - "learning_rate": 7.590326269490359e-07, - "loss": 1.0052, - "step": 8002 - }, - { - "epoch": 0.7217387383325067, - "grad_norm": 1.253229737126172, - "learning_rate": 7.585745306172899e-07, - "loss": 0.9237, - "step": 8003 - }, - { - "epoch": 0.7218289218559769, - "grad_norm": 1.476106280534815, - "learning_rate": 7.5811654021081e-07, - "loss": 0.9521, - "step": 8004 - }, - { - "epoch": 0.7219191053794471, - "grad_norm": 3.114734879871352, - "learning_rate": 7.576586557686748e-07, - "loss": 0.9955, - "step": 8005 - }, - { - "epoch": 0.7220092889029175, - "grad_norm": 1.579128588349049, - "learning_rate": 7.572008773299531e-07, - "loss": 0.939, - "step": 8006 - }, - { - "epoch": 0.7220994724263877, - "grad_norm": 1.3705632849148348, - "learning_rate": 7.567432049337055e-07, - "loss": 0.9996, - "step": 8007 - }, - { - "epoch": 0.722189655949858, - "grad_norm": 1.5200809376901305, - "learning_rate": 7.562856386189834e-07, - "loss": 0.9058, - "step": 8008 - }, - { - "epoch": 0.7222798394733282, - "grad_norm": 3.1387624199369717, - "learning_rate": 7.558281784248275e-07, - "loss": 0.9776, - "step": 8009 - }, - { - "epoch": 0.7223700229967985, - "grad_norm": 3.77772572113772, - "learning_rate": 7.553708243902721e-07, - "loss": 0.9567, - "step": 8010 - }, - { - "epoch": 0.7224602065202688, - "grad_norm": 1.4439996478748685, - "learning_rate": 7.549135765543404e-07, - "loss": 0.8593, - "step": 8011 - }, - { - "epoch": 0.722550390043739, - "grad_norm": 1.7584165086403412, - "learning_rate": 7.544564349560481e-07, - "loss": 0.9908, - "step": 8012 - }, - { - "epoch": 0.7226405735672092, - "grad_norm": 1.2030173414538987, - "learning_rate": 7.539993996344009e-07, - "loss": 0.9515, - "step": 8013 - }, - { - "epoch": 0.7227307570906796, - "grad_norm": 1.4497390183350103, - "learning_rate": 7.535424706283941e-07, - "loss": 0.9486, - "step": 8014 - }, - { - "epoch": 0.7228209406141498, - "grad_norm": 2.129645375026171, - "learning_rate": 7.530856479770181e-07, - "loss": 0.928, - "step": 8015 - }, - { - "epoch": 0.72291112413762, - "grad_norm": 1.4008566109022265, - "learning_rate": 7.526289317192484e-07, - "loss": 1.0462, - "step": 8016 - }, - { - "epoch": 0.7230013076610903, - "grad_norm": 1.4722626258696072, - "learning_rate": 7.521723218940579e-07, - "loss": 0.9672, - "step": 8017 - }, - { - "epoch": 0.7230914911845606, - "grad_norm": 1.707406009416214, - "learning_rate": 7.517158185404038e-07, - "loss": 1.0311, - "step": 8018 - }, - { - "epoch": 0.7231816747080309, - "grad_norm": 1.5559690934325807, - "learning_rate": 7.512594216972403e-07, - "loss": 0.9882, - "step": 8019 - }, - { - "epoch": 0.7232718582315011, - "grad_norm": 1.9280041599584947, - "learning_rate": 7.508031314035078e-07, - "loss": 1.0008, - "step": 8020 - }, - { - "epoch": 0.7233620417549713, - "grad_norm": 1.5946449310284367, - "learning_rate": 7.503469476981401e-07, - "loss": 0.9867, - "step": 8021 - }, - { - "epoch": 0.7234522252784417, - "grad_norm": 2.973928171907903, - "learning_rate": 7.498908706200613e-07, - "loss": 0.8382, - "step": 8022 - }, - { - "epoch": 0.7235424088019119, - "grad_norm": 1.4248267754456523, - "learning_rate": 7.494349002081866e-07, - "loss": 1.0252, - "step": 8023 - }, - { - "epoch": 0.7236325923253821, - "grad_norm": 1.4372214171771855, - "learning_rate": 7.489790365014224e-07, - "loss": 0.988, - "step": 8024 - }, - { - "epoch": 0.7237227758488524, - "grad_norm": 1.5680486197816137, - "learning_rate": 7.485232795386642e-07, - "loss": 0.9839, - "step": 8025 - }, - { - "epoch": 0.7238129593723227, - "grad_norm": 1.411255722237027, - "learning_rate": 7.480676293588002e-07, - "loss": 0.9868, - "step": 8026 - }, - { - "epoch": 0.7239031428957929, - "grad_norm": 0.7373165306504337, - "learning_rate": 7.476120860007093e-07, - "loss": 0.863, - "step": 8027 - }, - { - "epoch": 0.7239933264192632, - "grad_norm": 1.6187184776271015, - "learning_rate": 7.471566495032608e-07, - "loss": 1.0118, - "step": 8028 - }, - { - "epoch": 0.7240835099427335, - "grad_norm": 1.6861588835647694, - "learning_rate": 7.467013199053152e-07, - "loss": 0.9045, - "step": 8029 - }, - { - "epoch": 0.7241736934662037, - "grad_norm": 1.4899431696557124, - "learning_rate": 7.46246097245724e-07, - "loss": 0.9644, - "step": 8030 - }, - { - "epoch": 0.724263876989674, - "grad_norm": 1.8536319676054185, - "learning_rate": 7.457909815633276e-07, - "loss": 0.9687, - "step": 8031 - }, - { - "epoch": 0.7243540605131442, - "grad_norm": 1.5132378725088684, - "learning_rate": 7.453359728969618e-07, - "loss": 0.9664, - "step": 8032 - }, - { - "epoch": 0.7244442440366146, - "grad_norm": 1.2832887292929864, - "learning_rate": 7.448810712854475e-07, - "loss": 0.8896, - "step": 8033 - }, - { - "epoch": 0.7245344275600848, - "grad_norm": 1.4281873493232062, - "learning_rate": 7.444262767676022e-07, - "loss": 1.0022, - "step": 8034 - }, - { - "epoch": 0.724624611083555, - "grad_norm": 1.3190281506846562, - "learning_rate": 7.439715893822296e-07, - "loss": 1.0433, - "step": 8035 - }, - { - "epoch": 0.7247147946070253, - "grad_norm": 1.3421107651043638, - "learning_rate": 7.435170091681264e-07, - "loss": 0.9889, - "step": 8036 - }, - { - "epoch": 0.7248049781304956, - "grad_norm": 1.6132276031958834, - "learning_rate": 7.430625361640803e-07, - "loss": 0.9287, - "step": 8037 - }, - { - "epoch": 0.7248951616539658, - "grad_norm": 1.5009262880680534, - "learning_rate": 7.426081704088694e-07, - "loss": 1.0086, - "step": 8038 - }, - { - "epoch": 0.7249853451774361, - "grad_norm": 1.618995650810446, - "learning_rate": 7.42153911941263e-07, - "loss": 1.0368, - "step": 8039 - }, - { - "epoch": 0.7250755287009063, - "grad_norm": 1.3413767594221147, - "learning_rate": 7.416997608000192e-07, - "loss": 1.0117, - "step": 8040 - }, - { - "epoch": 0.7251657122243766, - "grad_norm": 1.6412938525696086, - "learning_rate": 7.412457170238918e-07, - "loss": 0.948, - "step": 8041 - }, - { - "epoch": 0.7252558957478469, - "grad_norm": 1.7002074423614189, - "learning_rate": 7.407917806516193e-07, - "loss": 0.9082, - "step": 8042 - }, - { - "epoch": 0.7253460792713171, - "grad_norm": 1.7367826366814723, - "learning_rate": 7.403379517219354e-07, - "loss": 0.9587, - "step": 8043 - }, - { - "epoch": 0.7254362627947873, - "grad_norm": 1.6280975163752165, - "learning_rate": 7.398842302735636e-07, - "loss": 0.9941, - "step": 8044 - }, - { - "epoch": 0.7255264463182577, - "grad_norm": 0.6843737266911893, - "learning_rate": 7.394306163452171e-07, - "loss": 0.8062, - "step": 8045 - }, - { - "epoch": 0.7256166298417279, - "grad_norm": 1.2065089802603428, - "learning_rate": 7.38977109975601e-07, - "loss": 0.9414, - "step": 8046 - }, - { - "epoch": 0.7257068133651982, - "grad_norm": 1.831061588528662, - "learning_rate": 7.385237112034119e-07, - "loss": 0.8782, - "step": 8047 - }, - { - "epoch": 0.7257969968886684, - "grad_norm": 2.4029135423744368, - "learning_rate": 7.380704200673342e-07, - "loss": 1.0278, - "step": 8048 - }, - { - "epoch": 0.7258871804121387, - "grad_norm": 1.263510456417822, - "learning_rate": 7.376172366060478e-07, - "loss": 0.9912, - "step": 8049 - }, - { - "epoch": 0.725977363935609, - "grad_norm": 0.6455280110348616, - "learning_rate": 7.371641608582187e-07, - "loss": 0.7807, - "step": 8050 - }, - { - "epoch": 0.7260675474590792, - "grad_norm": 1.7130225031856086, - "learning_rate": 7.367111928625067e-07, - "loss": 0.8323, - "step": 8051 - }, - { - "epoch": 0.7261577309825495, - "grad_norm": 1.3619888869376962, - "learning_rate": 7.362583326575613e-07, - "loss": 0.8768, - "step": 8052 - }, - { - "epoch": 0.7262479145060198, - "grad_norm": 1.978643897098224, - "learning_rate": 7.358055802820234e-07, - "loss": 0.9531, - "step": 8053 - }, - { - "epoch": 0.72633809802949, - "grad_norm": 2.3498780563856525, - "learning_rate": 7.353529357745245e-07, - "loss": 0.9409, - "step": 8054 - }, - { - "epoch": 0.7264282815529602, - "grad_norm": 2.0422110966995226, - "learning_rate": 7.349003991736851e-07, - "loss": 1.0064, - "step": 8055 - }, - { - "epoch": 0.7265184650764306, - "grad_norm": 1.6056115302756688, - "learning_rate": 7.344479705181206e-07, - "loss": 0.8149, - "step": 8056 - }, - { - "epoch": 0.7266086485999008, - "grad_norm": 1.361518748002656, - "learning_rate": 7.339956498464322e-07, - "loss": 0.9175, - "step": 8057 - }, - { - "epoch": 0.726698832123371, - "grad_norm": 1.4132489978321787, - "learning_rate": 7.335434371972169e-07, - "loss": 1.0187, - "step": 8058 - }, - { - "epoch": 0.7267890156468413, - "grad_norm": 1.5507166005839166, - "learning_rate": 7.33091332609058e-07, - "loss": 0.9592, - "step": 8059 - }, - { - "epoch": 0.7268791991703116, - "grad_norm": 1.5926708232819544, - "learning_rate": 7.326393361205323e-07, - "loss": 0.9809, - "step": 8060 - }, - { - "epoch": 0.7269693826937819, - "grad_norm": 1.4200259751643987, - "learning_rate": 7.321874477702068e-07, - "loss": 1.0509, - "step": 8061 - }, - { - "epoch": 0.7270595662172521, - "grad_norm": 1.2942694261672456, - "learning_rate": 7.317356675966386e-07, - "loss": 0.8821, - "step": 8062 - }, - { - "epoch": 0.7271497497407223, - "grad_norm": 1.426046017662676, - "learning_rate": 7.312839956383765e-07, - "loss": 0.9959, - "step": 8063 - }, - { - "epoch": 0.7272399332641927, - "grad_norm": 2.107004826160438, - "learning_rate": 7.308324319339603e-07, - "loss": 0.9428, - "step": 8064 - }, - { - "epoch": 0.7273301167876629, - "grad_norm": 1.4527839342420037, - "learning_rate": 7.303809765219182e-07, - "loss": 0.9672, - "step": 8065 - }, - { - "epoch": 0.7274203003111331, - "grad_norm": 1.5183495484179623, - "learning_rate": 7.299296294407719e-07, - "loss": 1.0117, - "step": 8066 - }, - { - "epoch": 0.7275104838346034, - "grad_norm": 1.4102046461597686, - "learning_rate": 7.294783907290327e-07, - "loss": 1.0076, - "step": 8067 - }, - { - "epoch": 0.7276006673580737, - "grad_norm": 1.6867848753059511, - "learning_rate": 7.290272604252028e-07, - "loss": 0.8321, - "step": 8068 - }, - { - "epoch": 0.727690850881544, - "grad_norm": 2.0646614539811763, - "learning_rate": 7.285762385677758e-07, - "loss": 0.8744, - "step": 8069 - }, - { - "epoch": 0.7277810344050142, - "grad_norm": 1.3296723271903959, - "learning_rate": 7.281253251952335e-07, - "loss": 0.9964, - "step": 8070 - }, - { - "epoch": 0.7278712179284844, - "grad_norm": 1.323188313305964, - "learning_rate": 7.276745203460526e-07, - "loss": 0.9517, - "step": 8071 - }, - { - "epoch": 0.7279614014519548, - "grad_norm": 0.7575685560722231, - "learning_rate": 7.272238240586959e-07, - "loss": 0.9115, - "step": 8072 - }, - { - "epoch": 0.728051584975425, - "grad_norm": 1.1970906215294936, - "learning_rate": 7.267732363716219e-07, - "loss": 0.9055, - "step": 8073 - }, - { - "epoch": 0.7281417684988952, - "grad_norm": 1.3631757460378602, - "learning_rate": 7.263227573232753e-07, - "loss": 0.9258, - "step": 8074 - }, - { - "epoch": 0.7282319520223655, - "grad_norm": 1.6804421792086672, - "learning_rate": 7.258723869520937e-07, - "loss": 1.0524, - "step": 8075 - }, - { - "epoch": 0.7283221355458358, - "grad_norm": 1.6207858241292599, - "learning_rate": 7.254221252965059e-07, - "loss": 1.0157, - "step": 8076 - }, - { - "epoch": 0.728412319069306, - "grad_norm": 1.527838975470596, - "learning_rate": 7.249719723949301e-07, - "loss": 0.8799, - "step": 8077 - }, - { - "epoch": 0.7285025025927763, - "grad_norm": 1.4891166845272163, - "learning_rate": 7.245219282857761e-07, - "loss": 0.9872, - "step": 8078 - }, - { - "epoch": 0.7285926861162466, - "grad_norm": 1.4367552776993118, - "learning_rate": 7.240719930074442e-07, - "loss": 0.9994, - "step": 8079 - }, - { - "epoch": 0.7286828696397168, - "grad_norm": 1.700605366014155, - "learning_rate": 7.236221665983257e-07, - "loss": 0.9967, - "step": 8080 - }, - { - "epoch": 0.7287730531631871, - "grad_norm": 0.8521098556370306, - "learning_rate": 7.231724490968012e-07, - "loss": 0.8506, - "step": 8081 - }, - { - "epoch": 0.7288632366866573, - "grad_norm": 1.624401789374964, - "learning_rate": 7.227228405412438e-07, - "loss": 0.9364, - "step": 8082 - }, - { - "epoch": 0.7289534202101277, - "grad_norm": 1.6804121716426046, - "learning_rate": 7.222733409700165e-07, - "loss": 0.9305, - "step": 8083 - }, - { - "epoch": 0.7290436037335979, - "grad_norm": 1.4398134937305302, - "learning_rate": 7.21823950421473e-07, - "loss": 0.9823, - "step": 8084 - }, - { - "epoch": 0.7291337872570681, - "grad_norm": 1.662580591413892, - "learning_rate": 7.213746689339577e-07, - "loss": 0.9378, - "step": 8085 - }, - { - "epoch": 0.7292239707805384, - "grad_norm": 1.731120105373722, - "learning_rate": 7.20925496545807e-07, - "loss": 0.9694, - "step": 8086 - }, - { - "epoch": 0.7293141543040087, - "grad_norm": 3.7761578281118044, - "learning_rate": 7.20476433295344e-07, - "loss": 0.8764, - "step": 8087 - }, - { - "epoch": 0.7294043378274789, - "grad_norm": 1.4336757376610851, - "learning_rate": 7.200274792208882e-07, - "loss": 1.0024, - "step": 8088 - }, - { - "epoch": 0.7294945213509492, - "grad_norm": 1.8921251256791123, - "learning_rate": 7.195786343607444e-07, - "loss": 0.8639, - "step": 8089 - }, - { - "epoch": 0.7295847048744194, - "grad_norm": 1.7168740178158524, - "learning_rate": 7.191298987532131e-07, - "loss": 0.9615, - "step": 8090 - }, - { - "epoch": 0.7296748883978897, - "grad_norm": 1.2585574485236803, - "learning_rate": 7.186812724365805e-07, - "loss": 0.9747, - "step": 8091 - }, - { - "epoch": 0.72976507192136, - "grad_norm": 1.5559972871938166, - "learning_rate": 7.182327554491272e-07, - "loss": 0.9727, - "step": 8092 - }, - { - "epoch": 0.7298552554448302, - "grad_norm": 0.7116838666488645, - "learning_rate": 7.177843478291225e-07, - "loss": 0.8566, - "step": 8093 - }, - { - "epoch": 0.7299454389683004, - "grad_norm": 1.8850531324188813, - "learning_rate": 7.173360496148276e-07, - "loss": 0.9964, - "step": 8094 - }, - { - "epoch": 0.7300356224917708, - "grad_norm": 1.5702882072125923, - "learning_rate": 7.168878608444939e-07, - "loss": 0.978, - "step": 8095 - }, - { - "epoch": 0.730125806015241, - "grad_norm": 1.6518525099044221, - "learning_rate": 7.164397815563623e-07, - "loss": 0.9656, - "step": 8096 - }, - { - "epoch": 0.7302159895387113, - "grad_norm": 1.817383271657485, - "learning_rate": 7.159918117886661e-07, - "loss": 0.9663, - "step": 8097 - }, - { - "epoch": 0.7303061730621815, - "grad_norm": 1.6053393235211018, - "learning_rate": 7.155439515796284e-07, - "loss": 0.9871, - "step": 8098 - }, - { - "epoch": 0.7303963565856518, - "grad_norm": 2.087539229338481, - "learning_rate": 7.150962009674633e-07, - "loss": 0.9571, - "step": 8099 - }, - { - "epoch": 0.7304865401091221, - "grad_norm": 1.278674957295915, - "learning_rate": 7.146485599903751e-07, - "loss": 1.0164, - "step": 8100 - }, - { - "epoch": 0.7305767236325923, - "grad_norm": 1.3503351095858527, - "learning_rate": 7.142010286865592e-07, - "loss": 1.0374, - "step": 8101 - }, - { - "epoch": 0.7306669071560626, - "grad_norm": 0.7591607988705144, - "learning_rate": 7.137536070942012e-07, - "loss": 0.7984, - "step": 8102 - }, - { - "epoch": 0.7307570906795329, - "grad_norm": 1.3204661624130627, - "learning_rate": 7.133062952514786e-07, - "loss": 0.9719, - "step": 8103 - }, - { - "epoch": 0.7308472742030031, - "grad_norm": 0.8735476770671103, - "learning_rate": 7.128590931965562e-07, - "loss": 0.8403, - "step": 8104 - }, - { - "epoch": 0.7309374577264733, - "grad_norm": 1.2801878643753315, - "learning_rate": 7.124120009675945e-07, - "loss": 1.0342, - "step": 8105 - }, - { - "epoch": 0.7310276412499437, - "grad_norm": 1.4595673199529404, - "learning_rate": 7.119650186027399e-07, - "loss": 0.9937, - "step": 8106 - }, - { - "epoch": 0.7311178247734139, - "grad_norm": 1.5854220750727328, - "learning_rate": 7.11518146140132e-07, - "loss": 1.0899, - "step": 8107 - }, - { - "epoch": 0.7312080082968841, - "grad_norm": 1.537239040155157, - "learning_rate": 7.110713836179007e-07, - "loss": 0.9247, - "step": 8108 - }, - { - "epoch": 0.7312981918203544, - "grad_norm": 1.5407083299242428, - "learning_rate": 7.106247310741659e-07, - "loss": 0.8965, - "step": 8109 - }, - { - "epoch": 0.7313883753438247, - "grad_norm": 1.7897227039474988, - "learning_rate": 7.101781885470393e-07, - "loss": 1.03, - "step": 8110 - }, - { - "epoch": 0.731478558867295, - "grad_norm": 1.5070821940352979, - "learning_rate": 7.097317560746203e-07, - "loss": 1.0093, - "step": 8111 - }, - { - "epoch": 0.7315687423907652, - "grad_norm": 1.3256420936903217, - "learning_rate": 7.092854336950036e-07, - "loss": 1.027, - "step": 8112 - }, - { - "epoch": 0.7316589259142354, - "grad_norm": 1.3530263745880236, - "learning_rate": 7.0883922144627e-07, - "loss": 1.0233, - "step": 8113 - }, - { - "epoch": 0.7317491094377058, - "grad_norm": 1.3940835014904263, - "learning_rate": 7.083931193664934e-07, - "loss": 0.9978, - "step": 8114 - }, - { - "epoch": 0.731839292961176, - "grad_norm": 1.961673978220954, - "learning_rate": 7.079471274937378e-07, - "loss": 1.014, - "step": 8115 - }, - { - "epoch": 0.7319294764846462, - "grad_norm": 1.3937767830229701, - "learning_rate": 7.075012458660574e-07, - "loss": 0.9571, - "step": 8116 - }, - { - "epoch": 0.7320196600081165, - "grad_norm": 0.6755328741792158, - "learning_rate": 7.070554745214976e-07, - "loss": 0.8537, - "step": 8117 - }, - { - "epoch": 0.7321098435315868, - "grad_norm": 2.1807131524716947, - "learning_rate": 7.066098134980947e-07, - "loss": 0.9291, - "step": 8118 - }, - { - "epoch": 0.732200027055057, - "grad_norm": 1.3790875712799127, - "learning_rate": 7.061642628338727e-07, - "loss": 0.9694, - "step": 8119 - }, - { - "epoch": 0.7322902105785273, - "grad_norm": 1.2948540256958379, - "learning_rate": 7.057188225668513e-07, - "loss": 0.9674, - "step": 8120 - }, - { - "epoch": 0.7323803941019975, - "grad_norm": 1.5113356940345406, - "learning_rate": 7.052734927350358e-07, - "loss": 1.1179, - "step": 8121 - }, - { - "epoch": 0.7324705776254679, - "grad_norm": 1.318609098165893, - "learning_rate": 7.048282733764252e-07, - "loss": 0.9914, - "step": 8122 - }, - { - "epoch": 0.7325607611489381, - "grad_norm": 1.4125224204520521, - "learning_rate": 7.043831645290077e-07, - "loss": 1.0458, - "step": 8123 - }, - { - "epoch": 0.7326509446724083, - "grad_norm": 1.4567424089015648, - "learning_rate": 7.039381662307624e-07, - "loss": 0.9406, - "step": 8124 - }, - { - "epoch": 0.7327411281958787, - "grad_norm": 1.3162462720247605, - "learning_rate": 7.034932785196601e-07, - "loss": 0.9493, - "step": 8125 - }, - { - "epoch": 0.7328313117193489, - "grad_norm": 1.6422459904991538, - "learning_rate": 7.030485014336585e-07, - "loss": 0.9553, - "step": 8126 - }, - { - "epoch": 0.7329214952428191, - "grad_norm": 1.724976998327842, - "learning_rate": 7.026038350107118e-07, - "loss": 0.9275, - "step": 8127 - }, - { - "epoch": 0.7330116787662894, - "grad_norm": 1.4002649737692994, - "learning_rate": 7.021592792887579e-07, - "loss": 1.1, - "step": 8128 - }, - { - "epoch": 0.7331018622897597, - "grad_norm": 1.5899479712364848, - "learning_rate": 7.01714834305732e-07, - "loss": 1.0269, - "step": 8129 - }, - { - "epoch": 0.7331920458132299, - "grad_norm": 1.6809384677796948, - "learning_rate": 7.012705000995544e-07, - "loss": 0.9305, - "step": 8130 - }, - { - "epoch": 0.7332822293367002, - "grad_norm": 1.3687598641793486, - "learning_rate": 7.008262767081392e-07, - "loss": 0.9669, - "step": 8131 - }, - { - "epoch": 0.7333724128601704, - "grad_norm": 1.669036166339226, - "learning_rate": 7.003821641693892e-07, - "loss": 1.0244, - "step": 8132 - }, - { - "epoch": 0.7334625963836408, - "grad_norm": 1.3745115886572985, - "learning_rate": 6.999381625211993e-07, - "loss": 0.9575, - "step": 8133 - }, - { - "epoch": 0.733552779907111, - "grad_norm": 1.468839196276581, - "learning_rate": 6.994942718014536e-07, - "loss": 0.8944, - "step": 8134 - }, - { - "epoch": 0.7336429634305812, - "grad_norm": 1.6125205696627405, - "learning_rate": 6.990504920480282e-07, - "loss": 0.8808, - "step": 8135 - }, - { - "epoch": 0.7337331469540515, - "grad_norm": 1.8435797531924747, - "learning_rate": 6.986068232987879e-07, - "loss": 0.9702, - "step": 8136 - }, - { - "epoch": 0.7338233304775218, - "grad_norm": 1.2631378224518102, - "learning_rate": 6.981632655915888e-07, - "loss": 0.9292, - "step": 8137 - }, - { - "epoch": 0.733913514000992, - "grad_norm": 1.326058159561055, - "learning_rate": 6.977198189642783e-07, - "loss": 1.0347, - "step": 8138 - }, - { - "epoch": 0.7340036975244623, - "grad_norm": 1.49302777503534, - "learning_rate": 6.972764834546935e-07, - "loss": 1.0006, - "step": 8139 - }, - { - "epoch": 0.7340938810479325, - "grad_norm": 1.4659704325922196, - "learning_rate": 6.96833259100663e-07, - "loss": 0.9909, - "step": 8140 - }, - { - "epoch": 0.7341840645714028, - "grad_norm": 1.6250325713194556, - "learning_rate": 6.96390145940003e-07, - "loss": 0.9399, - "step": 8141 - }, - { - "epoch": 0.7342742480948731, - "grad_norm": 2.482052563223677, - "learning_rate": 6.959471440105253e-07, - "loss": 0.9237, - "step": 8142 - }, - { - "epoch": 0.7343644316183433, - "grad_norm": 1.4562388865820406, - "learning_rate": 6.955042533500261e-07, - "loss": 1.0055, - "step": 8143 - }, - { - "epoch": 0.7344546151418135, - "grad_norm": 1.6812538827617296, - "learning_rate": 6.950614739962986e-07, - "loss": 0.9298, - "step": 8144 - }, - { - "epoch": 0.7345447986652839, - "grad_norm": 1.3742281307834978, - "learning_rate": 6.946188059871198e-07, - "loss": 0.8626, - "step": 8145 - }, - { - "epoch": 0.7346349821887541, - "grad_norm": 1.651209087687997, - "learning_rate": 6.941762493602638e-07, - "loss": 1.0861, - "step": 8146 - }, - { - "epoch": 0.7347251657122243, - "grad_norm": 1.3238287157207667, - "learning_rate": 6.937338041534899e-07, - "loss": 0.9043, - "step": 8147 - }, - { - "epoch": 0.7348153492356947, - "grad_norm": 1.3198717724767604, - "learning_rate": 6.932914704045505e-07, - "loss": 0.9646, - "step": 8148 - }, - { - "epoch": 0.7349055327591649, - "grad_norm": 1.416531762076372, - "learning_rate": 6.928492481511878e-07, - "loss": 0.9977, - "step": 8149 - }, - { - "epoch": 0.7349957162826352, - "grad_norm": 2.9635798357409, - "learning_rate": 6.924071374311349e-07, - "loss": 0.9231, - "step": 8150 - }, - { - "epoch": 0.7350858998061054, - "grad_norm": 0.62502319769724, - "learning_rate": 6.919651382821157e-07, - "loss": 0.8286, - "step": 8151 - }, - { - "epoch": 0.7351760833295757, - "grad_norm": 1.3622193231123096, - "learning_rate": 6.915232507418425e-07, - "loss": 0.9432, - "step": 8152 - }, - { - "epoch": 0.735266266853046, - "grad_norm": 1.4606814236530254, - "learning_rate": 6.910814748480204e-07, - "loss": 0.867, - "step": 8153 - }, - { - "epoch": 0.7353564503765162, - "grad_norm": 1.6840547076646866, - "learning_rate": 6.906398106383445e-07, - "loss": 0.9866, - "step": 8154 - }, - { - "epoch": 0.7354466338999864, - "grad_norm": 1.4031450537796908, - "learning_rate": 6.901982581504994e-07, - "loss": 0.9674, - "step": 8155 - }, - { - "epoch": 0.7355368174234568, - "grad_norm": 6.3824712318880765, - "learning_rate": 6.897568174221611e-07, - "loss": 0.8562, - "step": 8156 - }, - { - "epoch": 0.735627000946927, - "grad_norm": 1.5371732783333407, - "learning_rate": 6.893154884909966e-07, - "loss": 1.0458, - "step": 8157 - }, - { - "epoch": 0.7357171844703972, - "grad_norm": 0.6018102804387268, - "learning_rate": 6.888742713946602e-07, - "loss": 0.8255, - "step": 8158 - }, - { - "epoch": 0.7358073679938675, - "grad_norm": 2.011748022959048, - "learning_rate": 6.884331661708018e-07, - "loss": 0.9711, - "step": 8159 - }, - { - "epoch": 0.7358975515173378, - "grad_norm": 1.3259328815021045, - "learning_rate": 6.879921728570561e-07, - "loss": 0.9671, - "step": 8160 - }, - { - "epoch": 0.735987735040808, - "grad_norm": 1.3499187109456605, - "learning_rate": 6.875512914910539e-07, - "loss": 1.052, - "step": 8161 - }, - { - "epoch": 0.7360779185642783, - "grad_norm": 1.4675290328733022, - "learning_rate": 6.871105221104119e-07, - "loss": 0.9273, - "step": 8162 - }, - { - "epoch": 0.7361681020877485, - "grad_norm": 1.4385255389606628, - "learning_rate": 6.866698647527391e-07, - "loss": 0.9489, - "step": 8163 - }, - { - "epoch": 0.7362582856112189, - "grad_norm": 1.5907280639354182, - "learning_rate": 6.862293194556353e-07, - "loss": 0.9998, - "step": 8164 - }, - { - "epoch": 0.7363484691346891, - "grad_norm": 1.3873571081728884, - "learning_rate": 6.857888862566896e-07, - "loss": 0.9074, - "step": 8165 - }, - { - "epoch": 0.7364386526581593, - "grad_norm": 1.4286841535371761, - "learning_rate": 6.853485651934836e-07, - "loss": 0.9279, - "step": 8166 - }, - { - "epoch": 0.7365288361816296, - "grad_norm": 1.5015180376756085, - "learning_rate": 6.849083563035855e-07, - "loss": 0.834, - "step": 8167 - }, - { - "epoch": 0.7366190197050999, - "grad_norm": 0.7317798699921739, - "learning_rate": 6.844682596245592e-07, - "loss": 0.9285, - "step": 8168 - }, - { - "epoch": 0.7367092032285701, - "grad_norm": 1.5691088718110255, - "learning_rate": 6.840282751939539e-07, - "loss": 1.0248, - "step": 8169 - }, - { - "epoch": 0.7367993867520404, - "grad_norm": 1.2772594739077623, - "learning_rate": 6.835884030493126e-07, - "loss": 1.0126, - "step": 8170 - }, - { - "epoch": 0.7368895702755107, - "grad_norm": 1.6626996114113564, - "learning_rate": 6.831486432281672e-07, - "loss": 0.9881, - "step": 8171 - }, - { - "epoch": 0.736979753798981, - "grad_norm": 1.7445641971932881, - "learning_rate": 6.827089957680407e-07, - "loss": 0.9846, - "step": 8172 - }, - { - "epoch": 0.7370699373224512, - "grad_norm": 1.391749281153794, - "learning_rate": 6.822694607064461e-07, - "loss": 1.0786, - "step": 8173 - }, - { - "epoch": 0.7371601208459214, - "grad_norm": 1.5809651868438879, - "learning_rate": 6.818300380808877e-07, - "loss": 0.9996, - "step": 8174 - }, - { - "epoch": 0.7372503043693918, - "grad_norm": 1.4388225110935557, - "learning_rate": 6.813907279288574e-07, - "loss": 0.9629, - "step": 8175 - }, - { - "epoch": 0.737340487892862, - "grad_norm": 1.4456423821169109, - "learning_rate": 6.809515302878422e-07, - "loss": 0.9876, - "step": 8176 - }, - { - "epoch": 0.7374306714163322, - "grad_norm": 1.5118077766956248, - "learning_rate": 6.80512445195315e-07, - "loss": 0.9588, - "step": 8177 - }, - { - "epoch": 0.7375208549398025, - "grad_norm": 1.5505394735124571, - "learning_rate": 6.800734726887416e-07, - "loss": 0.9923, - "step": 8178 - }, - { - "epoch": 0.7376110384632728, - "grad_norm": 1.7120721268567556, - "learning_rate": 6.796346128055775e-07, - "loss": 0.9714, - "step": 8179 - }, - { - "epoch": 0.737701221986743, - "grad_norm": 1.7779737760199732, - "learning_rate": 6.791958655832684e-07, - "loss": 0.9401, - "step": 8180 - }, - { - "epoch": 0.7377914055102133, - "grad_norm": 1.5266855680275677, - "learning_rate": 6.787572310592518e-07, - "loss": 0.9352, - "step": 8181 - }, - { - "epoch": 0.7378815890336835, - "grad_norm": 1.5924133230482307, - "learning_rate": 6.783187092709521e-07, - "loss": 1.0472, - "step": 8182 - }, - { - "epoch": 0.7379717725571538, - "grad_norm": 1.5389958865858682, - "learning_rate": 6.778803002557891e-07, - "loss": 0.9798, - "step": 8183 - }, - { - "epoch": 0.7380619560806241, - "grad_norm": 1.2350888180165778, - "learning_rate": 6.774420040511686e-07, - "loss": 1.0053, - "step": 8184 - }, - { - "epoch": 0.7381521396040943, - "grad_norm": 2.5627851560148174, - "learning_rate": 6.770038206944886e-07, - "loss": 0.9714, - "step": 8185 - }, - { - "epoch": 0.7382423231275645, - "grad_norm": 1.2349275245713445, - "learning_rate": 6.765657502231375e-07, - "loss": 0.9869, - "step": 8186 - }, - { - "epoch": 0.7383325066510349, - "grad_norm": 1.534590444569921, - "learning_rate": 6.761277926744939e-07, - "loss": 1.0384, - "step": 8187 - }, - { - "epoch": 0.7384226901745051, - "grad_norm": 1.369647140517547, - "learning_rate": 6.756899480859268e-07, - "loss": 0.9541, - "step": 8188 - }, - { - "epoch": 0.7385128736979754, - "grad_norm": 1.6486592075895417, - "learning_rate": 6.752522164947956e-07, - "loss": 1.0177, - "step": 8189 - }, - { - "epoch": 0.7386030572214456, - "grad_norm": 1.1107405196838576, - "learning_rate": 6.748145979384498e-07, - "loss": 1.0215, - "step": 8190 - }, - { - "epoch": 0.7386932407449159, - "grad_norm": 1.5260709254179925, - "learning_rate": 6.743770924542303e-07, - "loss": 0.9145, - "step": 8191 - }, - { - "epoch": 0.7387834242683862, - "grad_norm": 0.654060890571229, - "learning_rate": 6.739397000794658e-07, - "loss": 0.8422, - "step": 8192 - }, - { - "epoch": 0.7388736077918564, - "grad_norm": 1.9833222848386793, - "learning_rate": 6.735024208514782e-07, - "loss": 0.9445, - "step": 8193 - }, - { - "epoch": 0.7389637913153266, - "grad_norm": 1.3270000405674507, - "learning_rate": 6.73065254807578e-07, - "loss": 0.8869, - "step": 8194 - }, - { - "epoch": 0.739053974838797, - "grad_norm": 1.5226527613780425, - "learning_rate": 6.726282019850669e-07, - "loss": 1.0297, - "step": 8195 - }, - { - "epoch": 0.7391441583622672, - "grad_norm": 1.1178380499253462, - "learning_rate": 6.721912624212376e-07, - "loss": 1.0355, - "step": 8196 - }, - { - "epoch": 0.7392343418857374, - "grad_norm": 1.446608503611024, - "learning_rate": 6.717544361533696e-07, - "loss": 0.8687, - "step": 8197 - }, - { - "epoch": 0.7393245254092078, - "grad_norm": 1.4136583794031061, - "learning_rate": 6.713177232187386e-07, - "loss": 0.9527, - "step": 8198 - }, - { - "epoch": 0.739414708932678, - "grad_norm": 1.5517547396130682, - "learning_rate": 6.708811236546041e-07, - "loss": 0.9657, - "step": 8199 - }, - { - "epoch": 0.7395048924561483, - "grad_norm": 3.6907289236813567, - "learning_rate": 6.704446374982224e-07, - "loss": 0.9351, - "step": 8200 - }, - { - "epoch": 0.7395950759796185, - "grad_norm": 1.6885442681579819, - "learning_rate": 6.700082647868346e-07, - "loss": 1.0107, - "step": 8201 - }, - { - "epoch": 0.7396852595030888, - "grad_norm": 1.4173694531643137, - "learning_rate": 6.695720055576751e-07, - "loss": 1.0274, - "step": 8202 - }, - { - "epoch": 0.7397754430265591, - "grad_norm": 1.493531425796769, - "learning_rate": 6.691358598479679e-07, - "loss": 0.9857, - "step": 8203 - }, - { - "epoch": 0.7398656265500293, - "grad_norm": 6.687669413194389, - "learning_rate": 6.686998276949276e-07, - "loss": 0.9721, - "step": 8204 - }, - { - "epoch": 0.7399558100734995, - "grad_norm": 0.7236311239410756, - "learning_rate": 6.682639091357587e-07, - "loss": 0.8826, - "step": 8205 - }, - { - "epoch": 0.7400459935969699, - "grad_norm": 1.4875092193574264, - "learning_rate": 6.678281042076568e-07, - "loss": 0.9577, - "step": 8206 - }, - { - "epoch": 0.7401361771204401, - "grad_norm": 1.3529009944163501, - "learning_rate": 6.673924129478059e-07, - "loss": 0.9733, - "step": 8207 - }, - { - "epoch": 0.7402263606439103, - "grad_norm": 1.5786068624453866, - "learning_rate": 6.669568353933824e-07, - "loss": 0.8919, - "step": 8208 - }, - { - "epoch": 0.7403165441673806, - "grad_norm": 1.2275949494257672, - "learning_rate": 6.665213715815519e-07, - "loss": 0.9082, - "step": 8209 - }, - { - "epoch": 0.7404067276908509, - "grad_norm": 1.555461748261646, - "learning_rate": 6.660860215494706e-07, - "loss": 0.9774, - "step": 8210 - }, - { - "epoch": 0.7404969112143212, - "grad_norm": 1.4569753682156181, - "learning_rate": 6.656507853342852e-07, - "loss": 0.9269, - "step": 8211 - }, - { - "epoch": 0.7405870947377914, - "grad_norm": 1.228220942197982, - "learning_rate": 6.652156629731323e-07, - "loss": 0.976, - "step": 8212 - }, - { - "epoch": 0.7406772782612616, - "grad_norm": 1.4891430219592252, - "learning_rate": 6.647806545031396e-07, - "loss": 1.0329, - "step": 8213 - }, - { - "epoch": 0.740767461784732, - "grad_norm": 1.5661909378740462, - "learning_rate": 6.643457599614224e-07, - "loss": 0.8978, - "step": 8214 - }, - { - "epoch": 0.7408576453082022, - "grad_norm": 1.701131441356274, - "learning_rate": 6.63910979385091e-07, - "loss": 1.0283, - "step": 8215 - }, - { - "epoch": 0.7409478288316724, - "grad_norm": 1.416790601507685, - "learning_rate": 6.634763128112409e-07, - "loss": 1.0537, - "step": 8216 - }, - { - "epoch": 0.7410380123551427, - "grad_norm": 1.704375621542347, - "learning_rate": 6.630417602769622e-07, - "loss": 0.9582, - "step": 8217 - }, - { - "epoch": 0.741128195878613, - "grad_norm": 1.4418997397943938, - "learning_rate": 6.62607321819332e-07, - "loss": 0.9856, - "step": 8218 - }, - { - "epoch": 0.7412183794020832, - "grad_norm": 1.1821823013518074, - "learning_rate": 6.621729974754196e-07, - "loss": 0.9738, - "step": 8219 - }, - { - "epoch": 0.7413085629255535, - "grad_norm": 1.7850357115135775, - "learning_rate": 6.617387872822835e-07, - "loss": 0.9221, - "step": 8220 - }, - { - "epoch": 0.7413987464490238, - "grad_norm": 2.00999741473435, - "learning_rate": 6.613046912769731e-07, - "loss": 0.9436, - "step": 8221 - }, - { - "epoch": 0.741488929972494, - "grad_norm": 1.787768233120791, - "learning_rate": 6.608707094965289e-07, - "loss": 1.0381, - "step": 8222 - }, - { - "epoch": 0.7415791134959643, - "grad_norm": 2.975627602165637, - "learning_rate": 6.604368419779787e-07, - "loss": 0.9038, - "step": 8223 - }, - { - "epoch": 0.7416692970194345, - "grad_norm": 1.7972811695961317, - "learning_rate": 6.600030887583434e-07, - "loss": 0.9795, - "step": 8224 - }, - { - "epoch": 0.7417594805429049, - "grad_norm": 1.6072398716543217, - "learning_rate": 6.595694498746336e-07, - "loss": 1.069, - "step": 8225 - }, - { - "epoch": 0.7418496640663751, - "grad_norm": 1.5655907389228985, - "learning_rate": 6.591359253638491e-07, - "loss": 0.9421, - "step": 8226 - }, - { - "epoch": 0.7419398475898453, - "grad_norm": 1.7517972981058976, - "learning_rate": 6.587025152629808e-07, - "loss": 0.8687, - "step": 8227 - }, - { - "epoch": 0.7420300311133156, - "grad_norm": 1.8823069293345076, - "learning_rate": 6.582692196090107e-07, - "loss": 1.0086, - "step": 8228 - }, - { - "epoch": 0.7421202146367859, - "grad_norm": 1.277264607162112, - "learning_rate": 6.578360384389074e-07, - "loss": 0.9795, - "step": 8229 - }, - { - "epoch": 0.7422103981602561, - "grad_norm": 1.5451447799894158, - "learning_rate": 6.574029717896355e-07, - "loss": 0.9528, - "step": 8230 - }, - { - "epoch": 0.7423005816837264, - "grad_norm": 1.5277091367310127, - "learning_rate": 6.569700196981436e-07, - "loss": 0.9637, - "step": 8231 - }, - { - "epoch": 0.7423907652071966, - "grad_norm": 2.017204789280392, - "learning_rate": 6.565371822013763e-07, - "loss": 0.871, - "step": 8232 - }, - { - "epoch": 0.742480948730667, - "grad_norm": 1.501110698834173, - "learning_rate": 6.561044593362636e-07, - "loss": 1.0248, - "step": 8233 - }, - { - "epoch": 0.7425711322541372, - "grad_norm": 1.321111809569305, - "learning_rate": 6.556718511397288e-07, - "loss": 0.9561, - "step": 8234 - }, - { - "epoch": 0.7426613157776074, - "grad_norm": 1.2405018915933057, - "learning_rate": 6.552393576486843e-07, - "loss": 1.0779, - "step": 8235 - }, - { - "epoch": 0.7427514993010776, - "grad_norm": 1.580412764413275, - "learning_rate": 6.548069789000325e-07, - "loss": 0.9821, - "step": 8236 - }, - { - "epoch": 0.742841682824548, - "grad_norm": 1.5259618727444346, - "learning_rate": 6.543747149306673e-07, - "loss": 0.9543, - "step": 8237 - }, - { - "epoch": 0.7429318663480182, - "grad_norm": 1.4478863866187528, - "learning_rate": 6.5394256577747e-07, - "loss": 1.0226, - "step": 8238 - }, - { - "epoch": 0.7430220498714885, - "grad_norm": 1.6203841161397166, - "learning_rate": 6.535105314773161e-07, - "loss": 1.0015, - "step": 8239 - }, - { - "epoch": 0.7431122333949587, - "grad_norm": 1.6423958803152154, - "learning_rate": 6.530786120670677e-07, - "loss": 0.904, - "step": 8240 - }, - { - "epoch": 0.743202416918429, - "grad_norm": 1.4966137333510134, - "learning_rate": 6.526468075835787e-07, - "loss": 1.0185, - "step": 8241 - }, - { - "epoch": 0.7432926004418993, - "grad_norm": 1.4541213198844534, - "learning_rate": 6.522151180636937e-07, - "loss": 1.012, - "step": 8242 - }, - { - "epoch": 0.7433827839653695, - "grad_norm": 1.3591762704391053, - "learning_rate": 6.517835435442461e-07, - "loss": 0.9364, - "step": 8243 - }, - { - "epoch": 0.7434729674888398, - "grad_norm": 2.4053745411150493, - "learning_rate": 6.513520840620606e-07, - "loss": 0.9965, - "step": 8244 - }, - { - "epoch": 0.7435631510123101, - "grad_norm": 1.641044998904901, - "learning_rate": 6.509207396539525e-07, - "loss": 0.9228, - "step": 8245 - }, - { - "epoch": 0.7436533345357803, - "grad_norm": 0.6593533661534039, - "learning_rate": 6.50489510356724e-07, - "loss": 0.8121, - "step": 8246 - }, - { - "epoch": 0.7437435180592505, - "grad_norm": 1.5884056241508673, - "learning_rate": 6.500583962071732e-07, - "loss": 0.8956, - "step": 8247 - }, - { - "epoch": 0.7438337015827209, - "grad_norm": 1.4516098162573803, - "learning_rate": 6.496273972420827e-07, - "loss": 0.9982, - "step": 8248 - }, - { - "epoch": 0.7439238851061911, - "grad_norm": 2.2339704454084104, - "learning_rate": 6.491965134982287e-07, - "loss": 1.0276, - "step": 8249 - }, - { - "epoch": 0.7440140686296614, - "grad_norm": 1.5124092200913528, - "learning_rate": 6.487657450123765e-07, - "loss": 0.9391, - "step": 8250 - }, - { - "epoch": 0.7441042521531316, - "grad_norm": 1.305116183272081, - "learning_rate": 6.483350918212814e-07, - "loss": 0.982, - "step": 8251 - }, - { - "epoch": 0.7441944356766019, - "grad_norm": 0.7509248911687632, - "learning_rate": 6.479045539616898e-07, - "loss": 0.8508, - "step": 8252 - }, - { - "epoch": 0.7442846192000722, - "grad_norm": 1.9876611488317302, - "learning_rate": 6.474741314703358e-07, - "loss": 0.9748, - "step": 8253 - }, - { - "epoch": 0.7443748027235424, - "grad_norm": 2.289473981620737, - "learning_rate": 6.47043824383948e-07, - "loss": 1.0303, - "step": 8254 - }, - { - "epoch": 0.7444649862470126, - "grad_norm": 1.839563757452773, - "learning_rate": 6.466136327392399e-07, - "loss": 1.0131, - "step": 8255 - }, - { - "epoch": 0.744555169770483, - "grad_norm": 2.900412161904466, - "learning_rate": 6.461835565729206e-07, - "loss": 1.0058, - "step": 8256 - }, - { - "epoch": 0.7446453532939532, - "grad_norm": 6.914517386607489, - "learning_rate": 6.457535959216844e-07, - "loss": 0.9987, - "step": 8257 - }, - { - "epoch": 0.7447355368174234, - "grad_norm": 1.5243352347892571, - "learning_rate": 6.453237508222186e-07, - "loss": 0.9345, - "step": 8258 - }, - { - "epoch": 0.7448257203408937, - "grad_norm": 2.3088405625653685, - "learning_rate": 6.448940213112e-07, - "loss": 0.9897, - "step": 8259 - }, - { - "epoch": 0.744915903864364, - "grad_norm": 1.2457383943614644, - "learning_rate": 6.444644074252954e-07, - "loss": 1.0568, - "step": 8260 - }, - { - "epoch": 0.7450060873878342, - "grad_norm": 1.8713113104816885, - "learning_rate": 6.440349092011628e-07, - "loss": 1.0424, - "step": 8261 - }, - { - "epoch": 0.7450962709113045, - "grad_norm": 1.3893158568965893, - "learning_rate": 6.436055266754475e-07, - "loss": 0.9694, - "step": 8262 - }, - { - "epoch": 0.7451864544347747, - "grad_norm": 2.6508776920672066, - "learning_rate": 6.431762598847879e-07, - "loss": 0.9546, - "step": 8263 - }, - { - "epoch": 0.7452766379582451, - "grad_norm": 1.4771961285666158, - "learning_rate": 6.427471088658111e-07, - "loss": 0.9522, - "step": 8264 - }, - { - "epoch": 0.7453668214817153, - "grad_norm": 1.6493924525162598, - "learning_rate": 6.42318073655135e-07, - "loss": 0.9709, - "step": 8265 - }, - { - "epoch": 0.7454570050051855, - "grad_norm": 2.4604225225376233, - "learning_rate": 6.41889154289367e-07, - "loss": 0.938, - "step": 8266 - }, - { - "epoch": 0.7455471885286559, - "grad_norm": 1.5756604429659677, - "learning_rate": 6.414603508051055e-07, - "loss": 1.032, - "step": 8267 - }, - { - "epoch": 0.7456373720521261, - "grad_norm": 0.6997331400097724, - "learning_rate": 6.410316632389365e-07, - "loss": 0.8865, - "step": 8268 - }, - { - "epoch": 0.7457275555755963, - "grad_norm": 1.858476630132961, - "learning_rate": 6.406030916274406e-07, - "loss": 0.976, - "step": 8269 - }, - { - "epoch": 0.7458177390990666, - "grad_norm": 2.9174201173804892, - "learning_rate": 6.401746360071831e-07, - "loss": 0.9331, - "step": 8270 - }, - { - "epoch": 0.7459079226225369, - "grad_norm": 2.1150186275446465, - "learning_rate": 6.397462964147251e-07, - "loss": 0.9384, - "step": 8271 - }, - { - "epoch": 0.7459981061460071, - "grad_norm": 1.7634563124209401, - "learning_rate": 6.393180728866128e-07, - "loss": 0.9669, - "step": 8272 - }, - { - "epoch": 0.7460882896694774, - "grad_norm": 1.3514311180865557, - "learning_rate": 6.388899654593853e-07, - "loss": 1.0061, - "step": 8273 - }, - { - "epoch": 0.7461784731929476, - "grad_norm": 2.8557110434460395, - "learning_rate": 6.384619741695709e-07, - "loss": 1.0062, - "step": 8274 - }, - { - "epoch": 0.746268656716418, - "grad_norm": 1.5721975371029582, - "learning_rate": 6.380340990536883e-07, - "loss": 0.9787, - "step": 8275 - }, - { - "epoch": 0.7463588402398882, - "grad_norm": 1.6947373500092449, - "learning_rate": 6.37606340148247e-07, - "loss": 0.9746, - "step": 8276 - }, - { - "epoch": 0.7464490237633584, - "grad_norm": 1.6208092865063113, - "learning_rate": 6.371786974897433e-07, - "loss": 0.8763, - "step": 8277 - }, - { - "epoch": 0.7465392072868287, - "grad_norm": 1.2822443197492337, - "learning_rate": 6.367511711146691e-07, - "loss": 0.9996, - "step": 8278 - }, - { - "epoch": 0.746629390810299, - "grad_norm": 1.3656622089406032, - "learning_rate": 6.363237610595014e-07, - "loss": 0.9171, - "step": 8279 - }, - { - "epoch": 0.7467195743337692, - "grad_norm": 1.7680877119591059, - "learning_rate": 6.358964673607094e-07, - "loss": 0.9486, - "step": 8280 - }, - { - "epoch": 0.7468097578572395, - "grad_norm": 1.2414399302710932, - "learning_rate": 6.354692900547525e-07, - "loss": 1.0124, - "step": 8281 - }, - { - "epoch": 0.7468999413807097, - "grad_norm": 1.5006643254705125, - "learning_rate": 6.350422291780797e-07, - "loss": 0.9432, - "step": 8282 - }, - { - "epoch": 0.74699012490418, - "grad_norm": 1.752982867186016, - "learning_rate": 6.346152847671302e-07, - "loss": 1.0686, - "step": 8283 - }, - { - "epoch": 0.7470803084276503, - "grad_norm": 1.8168495867736196, - "learning_rate": 6.34188456858334e-07, - "loss": 0.9693, - "step": 8284 - }, - { - "epoch": 0.7471704919511205, - "grad_norm": 1.7483136362445701, - "learning_rate": 6.337617454881081e-07, - "loss": 0.9324, - "step": 8285 - }, - { - "epoch": 0.7472606754745907, - "grad_norm": 1.245489135238445, - "learning_rate": 6.333351506928651e-07, - "loss": 0.8905, - "step": 8286 - }, - { - "epoch": 0.7473508589980611, - "grad_norm": 1.8637714500907072, - "learning_rate": 6.329086725090018e-07, - "loss": 0.9445, - "step": 8287 - }, - { - "epoch": 0.7474410425215313, - "grad_norm": 1.5159695420489305, - "learning_rate": 6.324823109729087e-07, - "loss": 0.9402, - "step": 8288 - }, - { - "epoch": 0.7475312260450016, - "grad_norm": 1.5694308106872064, - "learning_rate": 6.320560661209653e-07, - "loss": 0.9039, - "step": 8289 - }, - { - "epoch": 0.7476214095684719, - "grad_norm": 1.4979425466085352, - "learning_rate": 6.316299379895411e-07, - "loss": 0.8606, - "step": 8290 - }, - { - "epoch": 0.7477115930919421, - "grad_norm": 1.5703273032330474, - "learning_rate": 6.312039266149965e-07, - "loss": 1.0044, - "step": 8291 - }, - { - "epoch": 0.7478017766154124, - "grad_norm": 1.3467759062741584, - "learning_rate": 6.307780320336789e-07, - "loss": 1.0073, - "step": 8292 - }, - { - "epoch": 0.7478919601388826, - "grad_norm": 2.0565077692576534, - "learning_rate": 6.303522542819306e-07, - "loss": 1.0294, - "step": 8293 - }, - { - "epoch": 0.7479821436623529, - "grad_norm": 2.09706286343501, - "learning_rate": 6.299265933960796e-07, - "loss": 0.9467, - "step": 8294 - }, - { - "epoch": 0.7480723271858232, - "grad_norm": 1.935016886713828, - "learning_rate": 6.295010494124462e-07, - "loss": 0.9261, - "step": 8295 - }, - { - "epoch": 0.7481625107092934, - "grad_norm": 1.3575777911102223, - "learning_rate": 6.290756223673399e-07, - "loss": 0.9461, - "step": 8296 - }, - { - "epoch": 0.7482526942327636, - "grad_norm": 1.3310588520345938, - "learning_rate": 6.28650312297061e-07, - "loss": 1.0011, - "step": 8297 - }, - { - "epoch": 0.748342877756234, - "grad_norm": 1.6383975939535564, - "learning_rate": 6.282251192378987e-07, - "loss": 0.8869, - "step": 8298 - }, - { - "epoch": 0.7484330612797042, - "grad_norm": 1.710979722429575, - "learning_rate": 6.278000432261334e-07, - "loss": 0.9018, - "step": 8299 - }, - { - "epoch": 0.7485232448031744, - "grad_norm": 2.1352508356476605, - "learning_rate": 6.273750842980345e-07, - "loss": 0.9978, - "step": 8300 - }, - { - "epoch": 0.7486134283266447, - "grad_norm": 1.417902585001808, - "learning_rate": 6.269502424898625e-07, - "loss": 1.0023, - "step": 8301 - }, - { - "epoch": 0.748703611850115, - "grad_norm": 1.6926443312845183, - "learning_rate": 6.265255178378663e-07, - "loss": 1.0171, - "step": 8302 - }, - { - "epoch": 0.7487937953735853, - "grad_norm": 1.4478758479157607, - "learning_rate": 6.261009103782861e-07, - "loss": 0.8681, - "step": 8303 - }, - { - "epoch": 0.7488839788970555, - "grad_norm": 1.808952345427583, - "learning_rate": 6.256764201473519e-07, - "loss": 1.0267, - "step": 8304 - }, - { - "epoch": 0.7489741624205257, - "grad_norm": 0.6223076527801259, - "learning_rate": 6.252520471812835e-07, - "loss": 0.8285, - "step": 8305 - }, - { - "epoch": 0.7490643459439961, - "grad_norm": 1.5299943719398854, - "learning_rate": 6.248277915162912e-07, - "loss": 0.9863, - "step": 8306 - }, - { - "epoch": 0.7491545294674663, - "grad_norm": 1.556221976276849, - "learning_rate": 6.244036531885731e-07, - "loss": 0.8289, - "step": 8307 - }, - { - "epoch": 0.7492447129909365, - "grad_norm": 1.6418839483521872, - "learning_rate": 6.239796322343216e-07, - "loss": 0.9042, - "step": 8308 - }, - { - "epoch": 0.7493348965144068, - "grad_norm": 1.4482835903924594, - "learning_rate": 6.235557286897137e-07, - "loss": 0.9117, - "step": 8309 - }, - { - "epoch": 0.7494250800378771, - "grad_norm": 1.9015344547156392, - "learning_rate": 6.231319425909223e-07, - "loss": 0.97, - "step": 8310 - }, - { - "epoch": 0.7495152635613473, - "grad_norm": 1.5018951047646245, - "learning_rate": 6.227082739741045e-07, - "loss": 1.0084, - "step": 8311 - }, - { - "epoch": 0.7496054470848176, - "grad_norm": 1.4226690736933196, - "learning_rate": 6.222847228754113e-07, - "loss": 0.9946, - "step": 8312 - }, - { - "epoch": 0.7496956306082878, - "grad_norm": 1.5629571627349725, - "learning_rate": 6.218612893309823e-07, - "loss": 1.0699, - "step": 8313 - }, - { - "epoch": 0.7497858141317582, - "grad_norm": 1.538893482202927, - "learning_rate": 6.214379733769468e-07, - "loss": 0.9181, - "step": 8314 - }, - { - "epoch": 0.7498759976552284, - "grad_norm": 1.3953119651733603, - "learning_rate": 6.21014775049425e-07, - "loss": 1.0025, - "step": 8315 - }, - { - "epoch": 0.7499661811786986, - "grad_norm": 0.5728234706562734, - "learning_rate": 6.205916943845267e-07, - "loss": 0.8124, - "step": 8316 - }, - { - "epoch": 0.750056364702169, - "grad_norm": 1.8217673320592296, - "learning_rate": 6.201687314183504e-07, - "loss": 1.0166, - "step": 8317 - }, - { - "epoch": 0.7501465482256392, - "grad_norm": 0.9093276414632483, - "learning_rate": 6.197458861869862e-07, - "loss": 0.8785, - "step": 8318 - }, - { - "epoch": 0.7502367317491094, - "grad_norm": 1.5375578055331236, - "learning_rate": 6.193231587265138e-07, - "loss": 1.0105, - "step": 8319 - }, - { - "epoch": 0.7503269152725797, - "grad_norm": 1.8500765861948465, - "learning_rate": 6.189005490730024e-07, - "loss": 0.951, - "step": 8320 - }, - { - "epoch": 0.75041709879605, - "grad_norm": 1.3371128912480241, - "learning_rate": 6.184780572625115e-07, - "loss": 0.9923, - "step": 8321 - }, - { - "epoch": 0.7505072823195202, - "grad_norm": 1.6988520785686956, - "learning_rate": 6.180556833310902e-07, - "loss": 0.875, - "step": 8322 - }, - { - "epoch": 0.7505974658429905, - "grad_norm": 1.480585018212323, - "learning_rate": 6.176334273147788e-07, - "loss": 1.0117, - "step": 8323 - }, - { - "epoch": 0.7506876493664607, - "grad_norm": 2.0322175289248405, - "learning_rate": 6.172112892496042e-07, - "loss": 0.8788, - "step": 8324 - }, - { - "epoch": 0.750777832889931, - "grad_norm": 1.3848845910970196, - "learning_rate": 6.167892691715883e-07, - "loss": 1.01, - "step": 8325 - }, - { - "epoch": 0.7508680164134013, - "grad_norm": 1.3856571916102238, - "learning_rate": 6.163673671167378e-07, - "loss": 0.9539, - "step": 8326 - }, - { - "epoch": 0.7509581999368715, - "grad_norm": 2.027183570487535, - "learning_rate": 6.15945583121054e-07, - "loss": 1.0177, - "step": 8327 - }, - { - "epoch": 0.7510483834603418, - "grad_norm": 4.998292059062819, - "learning_rate": 6.15523917220524e-07, - "loss": 0.9218, - "step": 8328 - }, - { - "epoch": 0.7511385669838121, - "grad_norm": 2.503585056884316, - "learning_rate": 6.151023694511273e-07, - "loss": 1.0366, - "step": 8329 - }, - { - "epoch": 0.7512287505072823, - "grad_norm": 1.502624123725607, - "learning_rate": 6.146809398488328e-07, - "loss": 0.914, - "step": 8330 - }, - { - "epoch": 0.7513189340307526, - "grad_norm": 2.0622618566646915, - "learning_rate": 6.142596284495989e-07, - "loss": 1.0161, - "step": 8331 - }, - { - "epoch": 0.7514091175542228, - "grad_norm": 1.5640831365757792, - "learning_rate": 6.138384352893751e-07, - "loss": 0.9307, - "step": 8332 - }, - { - "epoch": 0.7514993010776931, - "grad_norm": 1.943253373365322, - "learning_rate": 6.134173604040987e-07, - "loss": 0.9637, - "step": 8333 - }, - { - "epoch": 0.7515894846011634, - "grad_norm": 1.5088956431055254, - "learning_rate": 6.129964038296984e-07, - "loss": 0.9788, - "step": 8334 - }, - { - "epoch": 0.7516796681246336, - "grad_norm": 1.6852085662488256, - "learning_rate": 6.12575565602093e-07, - "loss": 1.0432, - "step": 8335 - }, - { - "epoch": 0.7517698516481038, - "grad_norm": 1.1297159426156917, - "learning_rate": 6.121548457571905e-07, - "loss": 1.04, - "step": 8336 - }, - { - "epoch": 0.7518600351715742, - "grad_norm": 1.8737491249861147, - "learning_rate": 6.11734244330889e-07, - "loss": 0.9193, - "step": 8337 - }, - { - "epoch": 0.7519502186950444, - "grad_norm": 1.3514801176685394, - "learning_rate": 6.113137613590773e-07, - "loss": 0.9811, - "step": 8338 - }, - { - "epoch": 0.7520404022185146, - "grad_norm": 1.3738128999675052, - "learning_rate": 6.108933968776313e-07, - "loss": 1.0242, - "step": 8339 - }, - { - "epoch": 0.752130585741985, - "grad_norm": 1.3575546529035645, - "learning_rate": 6.104731509224212e-07, - "loss": 0.971, - "step": 8340 - }, - { - "epoch": 0.7522207692654552, - "grad_norm": 1.7249651117528513, - "learning_rate": 6.100530235293027e-07, - "loss": 0.8997, - "step": 8341 - }, - { - "epoch": 0.7523109527889255, - "grad_norm": 1.5248613013058647, - "learning_rate": 6.096330147341253e-07, - "loss": 0.9948, - "step": 8342 - }, - { - "epoch": 0.7524011363123957, - "grad_norm": 1.7796994203029306, - "learning_rate": 6.09213124572725e-07, - "loss": 1.0321, - "step": 8343 - }, - { - "epoch": 0.752491319835866, - "grad_norm": 1.3811772909920803, - "learning_rate": 6.087933530809297e-07, - "loss": 1.0466, - "step": 8344 - }, - { - "epoch": 0.7525815033593363, - "grad_norm": 2.4920256272126435, - "learning_rate": 6.083737002945566e-07, - "loss": 0.9613, - "step": 8345 - }, - { - "epoch": 0.7526716868828065, - "grad_norm": 1.5032725398563973, - "learning_rate": 6.079541662494126e-07, - "loss": 0.9244, - "step": 8346 - }, - { - "epoch": 0.7527618704062767, - "grad_norm": 1.8366608838142056, - "learning_rate": 6.075347509812954e-07, - "loss": 0.9803, - "step": 8347 - }, - { - "epoch": 0.7528520539297471, - "grad_norm": 1.569205885829384, - "learning_rate": 6.0711545452599e-07, - "loss": 0.939, - "step": 8348 - }, - { - "epoch": 0.7529422374532173, - "grad_norm": 1.4186157768044476, - "learning_rate": 6.066962769192756e-07, - "loss": 0.8996, - "step": 8349 - }, - { - "epoch": 0.7530324209766875, - "grad_norm": 1.7078562124509271, - "learning_rate": 6.062772181969167e-07, - "loss": 0.9104, - "step": 8350 - }, - { - "epoch": 0.7531226045001578, - "grad_norm": 1.3415430709032647, - "learning_rate": 6.058582783946706e-07, - "loss": 1.0424, - "step": 8351 - }, - { - "epoch": 0.7532127880236281, - "grad_norm": 1.5706610601181543, - "learning_rate": 6.054394575482833e-07, - "loss": 0.9583, - "step": 8352 - }, - { - "epoch": 0.7533029715470984, - "grad_norm": 1.3281103245541452, - "learning_rate": 6.05020755693491e-07, - "loss": 0.8873, - "step": 8353 - }, - { - "epoch": 0.7533931550705686, - "grad_norm": 1.409547330889127, - "learning_rate": 6.046021728660198e-07, - "loss": 0.8862, - "step": 8354 - }, - { - "epoch": 0.7534833385940388, - "grad_norm": 1.4153787798785156, - "learning_rate": 6.041837091015858e-07, - "loss": 1.012, - "step": 8355 - }, - { - "epoch": 0.7535735221175092, - "grad_norm": 1.3293708343512332, - "learning_rate": 6.037653644358931e-07, - "loss": 1.0218, - "step": 8356 - }, - { - "epoch": 0.7536637056409794, - "grad_norm": 1.4188989367851603, - "learning_rate": 6.033471389046393e-07, - "loss": 0.9812, - "step": 8357 - }, - { - "epoch": 0.7537538891644496, - "grad_norm": 1.7589094342137441, - "learning_rate": 6.029290325435084e-07, - "loss": 0.9242, - "step": 8358 - }, - { - "epoch": 0.7538440726879199, - "grad_norm": 1.67661881578898, - "learning_rate": 6.025110453881756e-07, - "loss": 1.0371, - "step": 8359 - }, - { - "epoch": 0.7539342562113902, - "grad_norm": 2.0446450702848016, - "learning_rate": 6.020931774743061e-07, - "loss": 0.9091, - "step": 8360 - }, - { - "epoch": 0.7540244397348604, - "grad_norm": 1.3602378562184656, - "learning_rate": 6.016754288375546e-07, - "loss": 0.8939, - "step": 8361 - }, - { - "epoch": 0.7541146232583307, - "grad_norm": 1.534320322216248, - "learning_rate": 6.012577995135665e-07, - "loss": 0.967, - "step": 8362 - }, - { - "epoch": 0.754204806781801, - "grad_norm": 1.8344347708483162, - "learning_rate": 6.008402895379743e-07, - "loss": 0.9275, - "step": 8363 - }, - { - "epoch": 0.7542949903052713, - "grad_norm": 1.7709933339558384, - "learning_rate": 6.004228989464047e-07, - "loss": 1.0615, - "step": 8364 - }, - { - "epoch": 0.7543851738287415, - "grad_norm": 1.9774394144453893, - "learning_rate": 6.000056277744692e-07, - "loss": 0.9873, - "step": 8365 - }, - { - "epoch": 0.7544753573522117, - "grad_norm": 1.273309010769355, - "learning_rate": 5.995884760577745e-07, - "loss": 0.9418, - "step": 8366 - }, - { - "epoch": 0.7545655408756821, - "grad_norm": 0.6803833917804273, - "learning_rate": 5.99171443831912e-07, - "loss": 0.8324, - "step": 8367 - }, - { - "epoch": 0.7546557243991523, - "grad_norm": 1.2758939488864, - "learning_rate": 5.98754531132466e-07, - "loss": 0.9634, - "step": 8368 - }, - { - "epoch": 0.7547459079226225, - "grad_norm": 1.6167823173344975, - "learning_rate": 5.983377379950099e-07, - "loss": 0.9788, - "step": 8369 - }, - { - "epoch": 0.7548360914460928, - "grad_norm": 1.3983607351022729, - "learning_rate": 5.979210644551067e-07, - "loss": 0.9141, - "step": 8370 - }, - { - "epoch": 0.7549262749695631, - "grad_norm": 2.0655762821056176, - "learning_rate": 5.975045105483091e-07, - "loss": 0.9683, - "step": 8371 - }, - { - "epoch": 0.7550164584930333, - "grad_norm": 0.6255887119923645, - "learning_rate": 5.970880763101607e-07, - "loss": 0.8443, - "step": 8372 - }, - { - "epoch": 0.7551066420165036, - "grad_norm": 1.3832476087611916, - "learning_rate": 5.966717617761925e-07, - "loss": 0.9958, - "step": 8373 - }, - { - "epoch": 0.7551968255399738, - "grad_norm": 1.4959200367287755, - "learning_rate": 5.962555669819276e-07, - "loss": 0.9586, - "step": 8374 - }, - { - "epoch": 0.7552870090634441, - "grad_norm": 1.8217925902060685, - "learning_rate": 5.958394919628777e-07, - "loss": 0.9851, - "step": 8375 - }, - { - "epoch": 0.7553771925869144, - "grad_norm": 1.5215874026163134, - "learning_rate": 5.954235367545451e-07, - "loss": 0.9138, - "step": 8376 - }, - { - "epoch": 0.7554673761103846, - "grad_norm": 1.3384449499592241, - "learning_rate": 5.950077013924213e-07, - "loss": 0.9594, - "step": 8377 - }, - { - "epoch": 0.7555575596338548, - "grad_norm": 1.5051723946721536, - "learning_rate": 5.945919859119865e-07, - "loss": 1.0545, - "step": 8378 - }, - { - "epoch": 0.7556477431573252, - "grad_norm": 1.6099502868559206, - "learning_rate": 5.94176390348714e-07, - "loss": 0.8709, - "step": 8379 - }, - { - "epoch": 0.7557379266807954, - "grad_norm": 2.0200402450329387, - "learning_rate": 5.937609147380622e-07, - "loss": 1.0738, - "step": 8380 - }, - { - "epoch": 0.7558281102042657, - "grad_norm": 1.4737100650466164, - "learning_rate": 5.933455591154844e-07, - "loss": 0.9162, - "step": 8381 - }, - { - "epoch": 0.7559182937277359, - "grad_norm": 1.7357675958505532, - "learning_rate": 5.929303235164191e-07, - "loss": 0.9452, - "step": 8382 - }, - { - "epoch": 0.7560084772512062, - "grad_norm": 1.5839102430200158, - "learning_rate": 5.92515207976297e-07, - "loss": 0.9458, - "step": 8383 - }, - { - "epoch": 0.7560986607746765, - "grad_norm": 1.7514488489608033, - "learning_rate": 5.921002125305383e-07, - "loss": 0.9553, - "step": 8384 - }, - { - "epoch": 0.7561888442981467, - "grad_norm": 1.7430462965734133, - "learning_rate": 5.916853372145525e-07, - "loss": 0.9013, - "step": 8385 - }, - { - "epoch": 0.756279027821617, - "grad_norm": 1.4836210243325458, - "learning_rate": 5.912705820637389e-07, - "loss": 0.9076, - "step": 8386 - }, - { - "epoch": 0.7563692113450873, - "grad_norm": 1.606652484741512, - "learning_rate": 5.908559471134871e-07, - "loss": 1.0365, - "step": 8387 - }, - { - "epoch": 0.7564593948685575, - "grad_norm": 1.9802737168205493, - "learning_rate": 5.904414323991764e-07, - "loss": 0.9089, - "step": 8388 - }, - { - "epoch": 0.7565495783920277, - "grad_norm": 1.446980436697674, - "learning_rate": 5.900270379561743e-07, - "loss": 0.9782, - "step": 8389 - }, - { - "epoch": 0.7566397619154981, - "grad_norm": 1.4171308243258258, - "learning_rate": 5.896127638198399e-07, - "loss": 0.9861, - "step": 8390 - }, - { - "epoch": 0.7567299454389683, - "grad_norm": 1.6713121126169557, - "learning_rate": 5.89198610025521e-07, - "loss": 0.9791, - "step": 8391 - }, - { - "epoch": 0.7568201289624386, - "grad_norm": 1.5502072288052298, - "learning_rate": 5.887845766085559e-07, - "loss": 1.0427, - "step": 8392 - }, - { - "epoch": 0.7569103124859088, - "grad_norm": 2.6261246406452887, - "learning_rate": 5.883706636042722e-07, - "loss": 0.9288, - "step": 8393 - }, - { - "epoch": 0.7570004960093791, - "grad_norm": 2.306027607933662, - "learning_rate": 5.879568710479879e-07, - "loss": 0.9142, - "step": 8394 - }, - { - "epoch": 0.7570906795328494, - "grad_norm": 1.4261287735982637, - "learning_rate": 5.875431989750078e-07, - "loss": 1.0646, - "step": 8395 - }, - { - "epoch": 0.7571808630563196, - "grad_norm": 1.3384111046353775, - "learning_rate": 5.871296474206313e-07, - "loss": 0.9034, - "step": 8396 - }, - { - "epoch": 0.7572710465797898, - "grad_norm": 1.4244765073018004, - "learning_rate": 5.867162164201427e-07, - "loss": 0.9777, - "step": 8397 - }, - { - "epoch": 0.7573612301032602, - "grad_norm": 1.5786233247389676, - "learning_rate": 5.863029060088205e-07, - "loss": 0.9994, - "step": 8398 - }, - { - "epoch": 0.7574514136267304, - "grad_norm": 1.4896667919476134, - "learning_rate": 5.858897162219289e-07, - "loss": 0.8662, - "step": 8399 - }, - { - "epoch": 0.7575415971502006, - "grad_norm": 1.4867015875719283, - "learning_rate": 5.854766470947238e-07, - "loss": 1.0172, - "step": 8400 - }, - { - "epoch": 0.7576317806736709, - "grad_norm": 1.3826101984870458, - "learning_rate": 5.850636986624511e-07, - "loss": 0.9597, - "step": 8401 - }, - { - "epoch": 0.7577219641971412, - "grad_norm": 1.617344355693177, - "learning_rate": 5.846508709603453e-07, - "loss": 0.9772, - "step": 8402 - }, - { - "epoch": 0.7578121477206115, - "grad_norm": 1.3650079847807923, - "learning_rate": 5.842381640236318e-07, - "loss": 0.9533, - "step": 8403 - }, - { - "epoch": 0.7579023312440817, - "grad_norm": 1.3736705855653313, - "learning_rate": 5.838255778875242e-07, - "loss": 0.9704, - "step": 8404 - }, - { - "epoch": 0.7579925147675519, - "grad_norm": 1.6187866707554919, - "learning_rate": 5.83413112587227e-07, - "loss": 0.9945, - "step": 8405 - }, - { - "epoch": 0.7580826982910223, - "grad_norm": 1.4337906455387324, - "learning_rate": 5.830007681579338e-07, - "loss": 0.9561, - "step": 8406 - }, - { - "epoch": 0.7581728818144925, - "grad_norm": 1.5067988333642635, - "learning_rate": 5.825885446348284e-07, - "loss": 0.9859, - "step": 8407 - }, - { - "epoch": 0.7582630653379627, - "grad_norm": 1.312704206657048, - "learning_rate": 5.821764420530842e-07, - "loss": 0.9161, - "step": 8408 - }, - { - "epoch": 0.7583532488614331, - "grad_norm": 1.3891394320318142, - "learning_rate": 5.817644604478633e-07, - "loss": 0.9862, - "step": 8409 - }, - { - "epoch": 0.7584434323849033, - "grad_norm": 1.6972117530692714, - "learning_rate": 5.81352599854319e-07, - "loss": 0.91, - "step": 8410 - }, - { - "epoch": 0.7585336159083735, - "grad_norm": 1.6711956319475223, - "learning_rate": 5.809408603075938e-07, - "loss": 1.0077, - "step": 8411 - }, - { - "epoch": 0.7586237994318438, - "grad_norm": 1.5952294158970557, - "learning_rate": 5.805292418428176e-07, - "loss": 0.9559, - "step": 8412 - }, - { - "epoch": 0.7587139829553141, - "grad_norm": 2.8332086049469614, - "learning_rate": 5.801177444951148e-07, - "loss": 0.956, - "step": 8413 - }, - { - "epoch": 0.7588041664787843, - "grad_norm": 1.2298881977721248, - "learning_rate": 5.797063682995944e-07, - "loss": 1.0085, - "step": 8414 - }, - { - "epoch": 0.7588943500022546, - "grad_norm": 2.2850499315878974, - "learning_rate": 5.792951132913584e-07, - "loss": 1.0371, - "step": 8415 - }, - { - "epoch": 0.7589845335257248, - "grad_norm": 1.757359764288675, - "learning_rate": 5.788839795054968e-07, - "loss": 1.0169, - "step": 8416 - }, - { - "epoch": 0.7590747170491952, - "grad_norm": 1.2999821863421144, - "learning_rate": 5.784729669770898e-07, - "loss": 0.8854, - "step": 8417 - }, - { - "epoch": 0.7591649005726654, - "grad_norm": 1.2270090541583265, - "learning_rate": 5.780620757412084e-07, - "loss": 0.9711, - "step": 8418 - }, - { - "epoch": 0.7592550840961356, - "grad_norm": 2.031531563831222, - "learning_rate": 5.776513058329098e-07, - "loss": 0.9917, - "step": 8419 - }, - { - "epoch": 0.7593452676196059, - "grad_norm": 2.0770277984551435, - "learning_rate": 5.772406572872459e-07, - "loss": 0.9979, - "step": 8420 - }, - { - "epoch": 0.7594354511430762, - "grad_norm": 3.1898975705051944, - "learning_rate": 5.768301301392535e-07, - "loss": 1.0023, - "step": 8421 - }, - { - "epoch": 0.7595256346665464, - "grad_norm": 1.5479271133227785, - "learning_rate": 5.764197244239615e-07, - "loss": 0.9833, - "step": 8422 - }, - { - "epoch": 0.7596158181900167, - "grad_norm": 4.084817940846725, - "learning_rate": 5.760094401763884e-07, - "loss": 1.0151, - "step": 8423 - }, - { - "epoch": 0.7597060017134869, - "grad_norm": 1.462288950502589, - "learning_rate": 5.755992774315414e-07, - "loss": 0.952, - "step": 8424 - }, - { - "epoch": 0.7597961852369572, - "grad_norm": 1.6892338781373635, - "learning_rate": 5.751892362244183e-07, - "loss": 0.9677, - "step": 8425 - }, - { - "epoch": 0.7598863687604275, - "grad_norm": 1.4920022260276, - "learning_rate": 5.747793165900065e-07, - "loss": 1.0237, - "step": 8426 - }, - { - "epoch": 0.7599765522838977, - "grad_norm": 1.9676747489016597, - "learning_rate": 5.743695185632806e-07, - "loss": 1.0302, - "step": 8427 - }, - { - "epoch": 0.7600667358073679, - "grad_norm": 2.3067727176270694, - "learning_rate": 5.739598421792098e-07, - "loss": 1.0166, - "step": 8428 - }, - { - "epoch": 0.7601569193308383, - "grad_norm": 1.644824511550581, - "learning_rate": 5.735502874727474e-07, - "loss": 0.953, - "step": 8429 - }, - { - "epoch": 0.7602471028543085, - "grad_norm": 1.361648324884142, - "learning_rate": 5.731408544788398e-07, - "loss": 1.0991, - "step": 8430 - }, - { - "epoch": 0.7603372863777788, - "grad_norm": 1.6593201450397512, - "learning_rate": 5.727315432324225e-07, - "loss": 0.8874, - "step": 8431 - }, - { - "epoch": 0.760427469901249, - "grad_norm": 1.9873556144618734, - "learning_rate": 5.723223537684196e-07, - "loss": 0.9638, - "step": 8432 - }, - { - "epoch": 0.7605176534247193, - "grad_norm": 1.4051360062445635, - "learning_rate": 5.719132861217462e-07, - "loss": 1.0439, - "step": 8433 - }, - { - "epoch": 0.7606078369481896, - "grad_norm": 1.4683734329902418, - "learning_rate": 5.715043403273044e-07, - "loss": 0.9371, - "step": 8434 - }, - { - "epoch": 0.7606980204716598, - "grad_norm": 1.989544060870393, - "learning_rate": 5.710955164199902e-07, - "loss": 0.9309, - "step": 8435 - }, - { - "epoch": 0.7607882039951301, - "grad_norm": 1.5425868853515652, - "learning_rate": 5.706868144346841e-07, - "loss": 0.9887, - "step": 8436 - }, - { - "epoch": 0.7608783875186004, - "grad_norm": 2.633372352005204, - "learning_rate": 5.702782344062613e-07, - "loss": 0.9665, - "step": 8437 - }, - { - "epoch": 0.7609685710420706, - "grad_norm": 1.4207782917417722, - "learning_rate": 5.698697763695826e-07, - "loss": 0.9447, - "step": 8438 - }, - { - "epoch": 0.7610587545655408, - "grad_norm": 1.3846335623843944, - "learning_rate": 5.694614403595002e-07, - "loss": 0.9834, - "step": 8439 - }, - { - "epoch": 0.7611489380890112, - "grad_norm": 1.5287859807494026, - "learning_rate": 5.690532264108554e-07, - "loss": 0.8893, - "step": 8440 - }, - { - "epoch": 0.7612391216124814, - "grad_norm": 1.3907514793227669, - "learning_rate": 5.686451345584795e-07, - "loss": 1.01, - "step": 8441 - }, - { - "epoch": 0.7613293051359517, - "grad_norm": 1.7459250417701506, - "learning_rate": 5.682371648371933e-07, - "loss": 1.0639, - "step": 8442 - }, - { - "epoch": 0.7614194886594219, - "grad_norm": 1.4884083610980545, - "learning_rate": 5.678293172818074e-07, - "loss": 0.9328, - "step": 8443 - }, - { - "epoch": 0.7615096721828922, - "grad_norm": 1.7931803659458918, - "learning_rate": 5.674215919271204e-07, - "loss": 0.9881, - "step": 8444 - }, - { - "epoch": 0.7615998557063625, - "grad_norm": 2.750396959958219, - "learning_rate": 5.670139888079224e-07, - "loss": 0.9033, - "step": 8445 - }, - { - "epoch": 0.7616900392298327, - "grad_norm": 1.2848122086956772, - "learning_rate": 5.666065079589924e-07, - "loss": 1.0317, - "step": 8446 - }, - { - "epoch": 0.7617802227533029, - "grad_norm": 1.830595515343545, - "learning_rate": 5.661991494150986e-07, - "loss": 0.9213, - "step": 8447 - }, - { - "epoch": 0.7618704062767733, - "grad_norm": 1.5273052708298251, - "learning_rate": 5.657919132109999e-07, - "loss": 1.0171, - "step": 8448 - }, - { - "epoch": 0.7619605898002435, - "grad_norm": 1.4676199278818969, - "learning_rate": 5.653847993814421e-07, - "loss": 1.0009, - "step": 8449 - }, - { - "epoch": 0.7620507733237137, - "grad_norm": 1.4724882723174493, - "learning_rate": 5.649778079611647e-07, - "loss": 0.9865, - "step": 8450 - }, - { - "epoch": 0.762140956847184, - "grad_norm": 1.3362957212014648, - "learning_rate": 5.645709389848923e-07, - "loss": 0.951, - "step": 8451 - }, - { - "epoch": 0.7622311403706543, - "grad_norm": 1.7172074071200583, - "learning_rate": 5.641641924873435e-07, - "loss": 0.9269, - "step": 8452 - }, - { - "epoch": 0.7623213238941245, - "grad_norm": 2.2667834247174947, - "learning_rate": 5.637575685032217e-07, - "loss": 0.9711, - "step": 8453 - }, - { - "epoch": 0.7624115074175948, - "grad_norm": 1.6371950171280412, - "learning_rate": 5.633510670672246e-07, - "loss": 1.0502, - "step": 8454 - }, - { - "epoch": 0.762501690941065, - "grad_norm": 1.0910318113246968, - "learning_rate": 5.629446882140354e-07, - "loss": 0.8577, - "step": 8455 - }, - { - "epoch": 0.7625918744645354, - "grad_norm": 1.3841836456935521, - "learning_rate": 5.625384319783295e-07, - "loss": 0.9758, - "step": 8456 - }, - { - "epoch": 0.7626820579880056, - "grad_norm": 1.5809307273633717, - "learning_rate": 5.621322983947705e-07, - "loss": 0.9189, - "step": 8457 - }, - { - "epoch": 0.7627722415114758, - "grad_norm": 2.754780515592288, - "learning_rate": 5.617262874980122e-07, - "loss": 1.0101, - "step": 8458 - }, - { - "epoch": 0.7628624250349462, - "grad_norm": 1.3523402098857837, - "learning_rate": 5.613203993226981e-07, - "loss": 1.017, - "step": 8459 - }, - { - "epoch": 0.7629526085584164, - "grad_norm": 2.15412643463914, - "learning_rate": 5.609146339034599e-07, - "loss": 1.023, - "step": 8460 - }, - { - "epoch": 0.7630427920818866, - "grad_norm": 2.125172327571343, - "learning_rate": 5.605089912749199e-07, - "loss": 0.9176, - "step": 8461 - }, - { - "epoch": 0.7631329756053569, - "grad_norm": 1.9796405091162037, - "learning_rate": 5.601034714716901e-07, - "loss": 0.9143, - "step": 8462 - }, - { - "epoch": 0.7632231591288272, - "grad_norm": 1.2271352997652922, - "learning_rate": 5.59698074528372e-07, - "loss": 0.9419, - "step": 8463 - }, - { - "epoch": 0.7633133426522974, - "grad_norm": 1.3299763062166394, - "learning_rate": 5.592928004795555e-07, - "loss": 0.9479, - "step": 8464 - }, - { - "epoch": 0.7634035261757677, - "grad_norm": 1.5222171688614723, - "learning_rate": 5.58887649359822e-07, - "loss": 0.9729, - "step": 8465 - }, - { - "epoch": 0.7634937096992379, - "grad_norm": 1.5495859146746294, - "learning_rate": 5.584826212037393e-07, - "loss": 0.8312, - "step": 8466 - }, - { - "epoch": 0.7635838932227083, - "grad_norm": 1.2462000786550023, - "learning_rate": 5.580777160458689e-07, - "loss": 0.887, - "step": 8467 - }, - { - "epoch": 0.7636740767461785, - "grad_norm": 1.5012779355209371, - "learning_rate": 5.576729339207574e-07, - "loss": 0.9921, - "step": 8468 - }, - { - "epoch": 0.7637642602696487, - "grad_norm": 1.173533677224561, - "learning_rate": 5.572682748629449e-07, - "loss": 1.0285, - "step": 8469 - }, - { - "epoch": 0.763854443793119, - "grad_norm": 1.2628774607450437, - "learning_rate": 5.568637389069582e-07, - "loss": 0.9569, - "step": 8470 - }, - { - "epoch": 0.7639446273165893, - "grad_norm": 1.4164985201659923, - "learning_rate": 5.564593260873145e-07, - "loss": 0.9914, - "step": 8471 - }, - { - "epoch": 0.7640348108400595, - "grad_norm": 1.3424750645060397, - "learning_rate": 5.560550364385206e-07, - "loss": 1.0085, - "step": 8472 - }, - { - "epoch": 0.7641249943635298, - "grad_norm": 1.46256993118419, - "learning_rate": 5.556508699950728e-07, - "loss": 1.0448, - "step": 8473 - }, - { - "epoch": 0.764215177887, - "grad_norm": 1.6713586170220098, - "learning_rate": 5.552468267914577e-07, - "loss": 0.9278, - "step": 8474 - }, - { - "epoch": 0.7643053614104703, - "grad_norm": 2.6393735730517154, - "learning_rate": 5.548429068621481e-07, - "loss": 0.8872, - "step": 8475 - }, - { - "epoch": 0.7643955449339406, - "grad_norm": 0.6387415623340849, - "learning_rate": 5.544391102416115e-07, - "loss": 0.8431, - "step": 8476 - }, - { - "epoch": 0.7644857284574108, - "grad_norm": 1.730707156462015, - "learning_rate": 5.540354369643003e-07, - "loss": 1.0277, - "step": 8477 - }, - { - "epoch": 0.764575911980881, - "grad_norm": 3.4233304764817105, - "learning_rate": 5.536318870646586e-07, - "loss": 0.837, - "step": 8478 - }, - { - "epoch": 0.7646660955043514, - "grad_norm": 1.899250500067228, - "learning_rate": 5.532284605771194e-07, - "loss": 0.8763, - "step": 8479 - }, - { - "epoch": 0.7647562790278216, - "grad_norm": 1.2636015458265693, - "learning_rate": 5.528251575361052e-07, - "loss": 0.9386, - "step": 8480 - }, - { - "epoch": 0.7648464625512919, - "grad_norm": 1.3718778970813792, - "learning_rate": 5.524219779760284e-07, - "loss": 0.9648, - "step": 8481 - }, - { - "epoch": 0.7649366460747622, - "grad_norm": 1.4114477525527964, - "learning_rate": 5.520189219312907e-07, - "loss": 0.9974, - "step": 8482 - }, - { - "epoch": 0.7650268295982324, - "grad_norm": 2.3063318625707008, - "learning_rate": 5.516159894362817e-07, - "loss": 0.8512, - "step": 8483 - }, - { - "epoch": 0.7651170131217027, - "grad_norm": 1.333249114277501, - "learning_rate": 5.512131805253839e-07, - "loss": 0.9123, - "step": 8484 - }, - { - "epoch": 0.7652071966451729, - "grad_norm": 1.2918969174737251, - "learning_rate": 5.508104952329653e-07, - "loss": 0.9879, - "step": 8485 - }, - { - "epoch": 0.7652973801686432, - "grad_norm": 3.063221904760603, - "learning_rate": 5.504079335933862e-07, - "loss": 0.9949, - "step": 8486 - }, - { - "epoch": 0.7653875636921135, - "grad_norm": 1.6547187348354417, - "learning_rate": 5.500054956409952e-07, - "loss": 1.0022, - "step": 8487 - }, - { - "epoch": 0.7654777472155837, - "grad_norm": 1.4491417548195633, - "learning_rate": 5.496031814101303e-07, - "loss": 0.9295, - "step": 8488 - }, - { - "epoch": 0.7655679307390539, - "grad_norm": 1.2934852789980094, - "learning_rate": 5.492009909351203e-07, - "loss": 0.9813, - "step": 8489 - }, - { - "epoch": 0.7656581142625243, - "grad_norm": 3.664744884984695, - "learning_rate": 5.4879892425028e-07, - "loss": 0.8957, - "step": 8490 - }, - { - "epoch": 0.7657482977859945, - "grad_norm": 1.2870030453282082, - "learning_rate": 5.483969813899184e-07, - "loss": 1.1044, - "step": 8491 - }, - { - "epoch": 0.7658384813094647, - "grad_norm": 1.2607413365060547, - "learning_rate": 5.479951623883299e-07, - "loss": 1.0495, - "step": 8492 - }, - { - "epoch": 0.765928664832935, - "grad_norm": 1.932030926683941, - "learning_rate": 5.475934672798004e-07, - "loss": 0.9046, - "step": 8493 - }, - { - "epoch": 0.7660188483564053, - "grad_norm": 1.5261420866785929, - "learning_rate": 5.471918960986047e-07, - "loss": 0.9367, - "step": 8494 - }, - { - "epoch": 0.7661090318798756, - "grad_norm": 1.578552641383832, - "learning_rate": 5.467904488790071e-07, - "loss": 1.0629, - "step": 8495 - }, - { - "epoch": 0.7661992154033458, - "grad_norm": 1.4311243560147175, - "learning_rate": 5.463891256552615e-07, - "loss": 0.9792, - "step": 8496 - }, - { - "epoch": 0.766289398926816, - "grad_norm": 1.4835646977525458, - "learning_rate": 5.459879264616107e-07, - "loss": 0.8819, - "step": 8497 - }, - { - "epoch": 0.7663795824502864, - "grad_norm": 2.4375703263530157, - "learning_rate": 5.455868513322874e-07, - "loss": 0.85, - "step": 8498 - }, - { - "epoch": 0.7664697659737566, - "grad_norm": 1.4538888051884067, - "learning_rate": 5.451859003015143e-07, - "loss": 0.865, - "step": 8499 - }, - { - "epoch": 0.7665599494972268, - "grad_norm": 1.998708009642591, - "learning_rate": 5.447850734035009e-07, - "loss": 1.0107, - "step": 8500 - }, - { - "epoch": 0.7666501330206971, - "grad_norm": 1.986127244962357, - "learning_rate": 5.443843706724494e-07, - "loss": 0.9599, - "step": 8501 - }, - { - "epoch": 0.7667403165441674, - "grad_norm": 3.5796827898198496, - "learning_rate": 5.439837921425494e-07, - "loss": 0.9016, - "step": 8502 - }, - { - "epoch": 0.7668305000676376, - "grad_norm": 1.2654959059445199, - "learning_rate": 5.435833378479807e-07, - "loss": 0.9587, - "step": 8503 - }, - { - "epoch": 0.7669206835911079, - "grad_norm": 1.32548345526835, - "learning_rate": 5.431830078229128e-07, - "loss": 0.8315, - "step": 8504 - }, - { - "epoch": 0.7670108671145782, - "grad_norm": 1.2800040350790796, - "learning_rate": 5.427828021015022e-07, - "loss": 0.9862, - "step": 8505 - }, - { - "epoch": 0.7671010506380485, - "grad_norm": 1.5077087682906367, - "learning_rate": 5.42382720717899e-07, - "loss": 1.001, - "step": 8506 - }, - { - "epoch": 0.7671912341615187, - "grad_norm": 1.4203522461553575, - "learning_rate": 5.419827637062384e-07, - "loss": 0.9976, - "step": 8507 - }, - { - "epoch": 0.7672814176849889, - "grad_norm": 1.428455843677296, - "learning_rate": 5.415829311006487e-07, - "loss": 1.1174, - "step": 8508 - }, - { - "epoch": 0.7673716012084593, - "grad_norm": 1.3923896565712572, - "learning_rate": 5.411832229352447e-07, - "loss": 1.0459, - "step": 8509 - }, - { - "epoch": 0.7674617847319295, - "grad_norm": 1.5687233394469873, - "learning_rate": 5.407836392441319e-07, - "loss": 1.0593, - "step": 8510 - }, - { - "epoch": 0.7675519682553997, - "grad_norm": 1.3706885714558719, - "learning_rate": 5.403841800614049e-07, - "loss": 0.9897, - "step": 8511 - }, - { - "epoch": 0.76764215177887, - "grad_norm": 1.27896402702715, - "learning_rate": 5.39984845421148e-07, - "loss": 0.9232, - "step": 8512 - }, - { - "epoch": 0.7677323353023403, - "grad_norm": 1.4765496631851476, - "learning_rate": 5.395856353574344e-07, - "loss": 0.9511, - "step": 8513 - }, - { - "epoch": 0.7678225188258105, - "grad_norm": 0.6125431600272706, - "learning_rate": 5.391865499043275e-07, - "loss": 0.8433, - "step": 8514 - }, - { - "epoch": 0.7679127023492808, - "grad_norm": 1.187456381147274, - "learning_rate": 5.387875890958788e-07, - "loss": 0.9725, - "step": 8515 - }, - { - "epoch": 0.768002885872751, - "grad_norm": 1.31574035465011, - "learning_rate": 5.383887529661298e-07, - "loss": 0.9952, - "step": 8516 - }, - { - "epoch": 0.7680930693962214, - "grad_norm": 2.1355898034843532, - "learning_rate": 5.379900415491116e-07, - "loss": 0.9264, - "step": 8517 - }, - { - "epoch": 0.7681832529196916, - "grad_norm": 1.5133183023291659, - "learning_rate": 5.375914548788447e-07, - "loss": 0.9252, - "step": 8518 - }, - { - "epoch": 0.7682734364431618, - "grad_norm": 1.4519131437128001, - "learning_rate": 5.371929929893384e-07, - "loss": 0.8524, - "step": 8519 - }, - { - "epoch": 0.768363619966632, - "grad_norm": 1.586519933027722, - "learning_rate": 5.367946559145917e-07, - "loss": 0.9358, - "step": 8520 - }, - { - "epoch": 0.7684538034901024, - "grad_norm": 1.2384615986414391, - "learning_rate": 5.363964436885935e-07, - "loss": 0.9932, - "step": 8521 - }, - { - "epoch": 0.7685439870135726, - "grad_norm": 1.265495058147257, - "learning_rate": 5.359983563453199e-07, - "loss": 1.0547, - "step": 8522 - }, - { - "epoch": 0.7686341705370429, - "grad_norm": 1.4000822230444523, - "learning_rate": 5.356003939187402e-07, - "loss": 1.0873, - "step": 8523 - }, - { - "epoch": 0.7687243540605131, - "grad_norm": 1.820947888202145, - "learning_rate": 5.352025564428082e-07, - "loss": 0.9446, - "step": 8524 - }, - { - "epoch": 0.7688145375839834, - "grad_norm": 1.4389043665183998, - "learning_rate": 5.348048439514723e-07, - "loss": 1.0133, - "step": 8525 - }, - { - "epoch": 0.7689047211074537, - "grad_norm": 1.7588087183538659, - "learning_rate": 5.344072564786653e-07, - "loss": 0.8572, - "step": 8526 - }, - { - "epoch": 0.7689949046309239, - "grad_norm": 1.564720483382362, - "learning_rate": 5.340097940583123e-07, - "loss": 0.9832, - "step": 8527 - }, - { - "epoch": 0.7690850881543942, - "grad_norm": 1.8999146417216346, - "learning_rate": 5.336124567243275e-07, - "loss": 0.9317, - "step": 8528 - }, - { - "epoch": 0.7691752716778645, - "grad_norm": 1.667808650103295, - "learning_rate": 5.33215244510613e-07, - "loss": 0.9799, - "step": 8529 - }, - { - "epoch": 0.7692654552013347, - "grad_norm": 2.209888654294574, - "learning_rate": 5.328181574510624e-07, - "loss": 0.9736, - "step": 8530 - }, - { - "epoch": 0.769355638724805, - "grad_norm": 1.4263018767996511, - "learning_rate": 5.324211955795559e-07, - "loss": 1.0245, - "step": 8531 - }, - { - "epoch": 0.7694458222482753, - "grad_norm": 1.6918993929177986, - "learning_rate": 5.320243589299651e-07, - "loss": 0.8963, - "step": 8532 - }, - { - "epoch": 0.7695360057717455, - "grad_norm": 1.5626402219795792, - "learning_rate": 5.316276475361505e-07, - "loss": 0.9363, - "step": 8533 - }, - { - "epoch": 0.7696261892952158, - "grad_norm": 1.938976893967529, - "learning_rate": 5.312310614319613e-07, - "loss": 1.01, - "step": 8534 - }, - { - "epoch": 0.769716372818686, - "grad_norm": 2.3391271140144516, - "learning_rate": 5.308346006512367e-07, - "loss": 0.9379, - "step": 8535 - }, - { - "epoch": 0.7698065563421563, - "grad_norm": 1.5191182259212652, - "learning_rate": 5.30438265227805e-07, - "loss": 0.903, - "step": 8536 - }, - { - "epoch": 0.7698967398656266, - "grad_norm": 0.6478436059839836, - "learning_rate": 5.300420551954837e-07, - "loss": 0.832, - "step": 8537 - }, - { - "epoch": 0.7699869233890968, - "grad_norm": 1.5416166537996205, - "learning_rate": 5.296459705880798e-07, - "loss": 0.8984, - "step": 8538 - }, - { - "epoch": 0.770077106912567, - "grad_norm": 1.6311162871882963, - "learning_rate": 5.292500114393881e-07, - "loss": 1.0239, - "step": 8539 - }, - { - "epoch": 0.7701672904360374, - "grad_norm": 1.7266820805768657, - "learning_rate": 5.288541777831963e-07, - "loss": 0.9942, - "step": 8540 - }, - { - "epoch": 0.7702574739595076, - "grad_norm": 1.2810860970960978, - "learning_rate": 5.284584696532772e-07, - "loss": 1.0255, - "step": 8541 - }, - { - "epoch": 0.7703476574829778, - "grad_norm": 1.464946366848394, - "learning_rate": 5.280628870833954e-07, - "loss": 0.976, - "step": 8542 - }, - { - "epoch": 0.7704378410064481, - "grad_norm": 1.860834462526659, - "learning_rate": 5.276674301073045e-07, - "loss": 1.004, - "step": 8543 - }, - { - "epoch": 0.7705280245299184, - "grad_norm": 1.3172825463372562, - "learning_rate": 5.272720987587467e-07, - "loss": 0.9801, - "step": 8544 - }, - { - "epoch": 0.7706182080533887, - "grad_norm": 2.478991259185166, - "learning_rate": 5.268768930714545e-07, - "loss": 0.9291, - "step": 8545 - }, - { - "epoch": 0.7707083915768589, - "grad_norm": 1.392081837010581, - "learning_rate": 5.264818130791473e-07, - "loss": 1.0046, - "step": 8546 - }, - { - "epoch": 0.7707985751003291, - "grad_norm": 1.7226354204159837, - "learning_rate": 5.260868588155378e-07, - "loss": 0.9762, - "step": 8547 - }, - { - "epoch": 0.7708887586237995, - "grad_norm": 1.2062267103565767, - "learning_rate": 5.256920303143242e-07, - "loss": 0.9999, - "step": 8548 - }, - { - "epoch": 0.7709789421472697, - "grad_norm": 1.2894303692806393, - "learning_rate": 5.252973276091956e-07, - "loss": 1.0801, - "step": 8549 - }, - { - "epoch": 0.7710691256707399, - "grad_norm": 1.2815675574544494, - "learning_rate": 5.249027507338307e-07, - "loss": 1.0344, - "step": 8550 - }, - { - "epoch": 0.7711593091942102, - "grad_norm": 1.3515706144761832, - "learning_rate": 5.245082997218966e-07, - "loss": 0.9465, - "step": 8551 - }, - { - "epoch": 0.7712494927176805, - "grad_norm": 1.6565103866016626, - "learning_rate": 5.241139746070499e-07, - "loss": 0.9975, - "step": 8552 - }, - { - "epoch": 0.7713396762411507, - "grad_norm": 1.503887543004594, - "learning_rate": 5.237197754229376e-07, - "loss": 0.8553, - "step": 8553 - }, - { - "epoch": 0.771429859764621, - "grad_norm": 2.5232229230133374, - "learning_rate": 5.233257022031931e-07, - "loss": 0.8772, - "step": 8554 - }, - { - "epoch": 0.7715200432880913, - "grad_norm": 1.533869468556508, - "learning_rate": 5.229317549814432e-07, - "loss": 0.9789, - "step": 8555 - }, - { - "epoch": 0.7716102268115616, - "grad_norm": 1.698086767120766, - "learning_rate": 5.225379337912998e-07, - "loss": 1.0541, - "step": 8556 - }, - { - "epoch": 0.7717004103350318, - "grad_norm": 1.4474020210272034, - "learning_rate": 5.221442386663663e-07, - "loss": 0.982, - "step": 8557 - }, - { - "epoch": 0.771790593858502, - "grad_norm": 1.6604746793650236, - "learning_rate": 5.217506696402354e-07, - "loss": 1.0159, - "step": 8558 - }, - { - "epoch": 0.7718807773819724, - "grad_norm": 1.9723743199485624, - "learning_rate": 5.213572267464883e-07, - "loss": 0.9813, - "step": 8559 - }, - { - "epoch": 0.7719709609054426, - "grad_norm": 1.3954136298119908, - "learning_rate": 5.209639100186965e-07, - "loss": 0.9544, - "step": 8560 - }, - { - "epoch": 0.7720611444289128, - "grad_norm": 0.6745268806066814, - "learning_rate": 5.205707194904179e-07, - "loss": 0.907, - "step": 8561 - }, - { - "epoch": 0.7721513279523831, - "grad_norm": 1.3186786180313983, - "learning_rate": 5.201776551952042e-07, - "loss": 0.8986, - "step": 8562 - }, - { - "epoch": 0.7722415114758534, - "grad_norm": 1.7558366309401885, - "learning_rate": 5.197847171665914e-07, - "loss": 0.9343, - "step": 8563 - }, - { - "epoch": 0.7723316949993236, - "grad_norm": 1.2367317782189935, - "learning_rate": 5.193919054381095e-07, - "loss": 0.9548, - "step": 8564 - }, - { - "epoch": 0.7724218785227939, - "grad_norm": 1.7917392479441006, - "learning_rate": 5.189992200432738e-07, - "loss": 0.942, - "step": 8565 - }, - { - "epoch": 0.7725120620462641, - "grad_norm": 1.681820389092424, - "learning_rate": 5.186066610155906e-07, - "loss": 0.98, - "step": 8566 - }, - { - "epoch": 0.7726022455697344, - "grad_norm": 1.971765056271611, - "learning_rate": 5.182142283885555e-07, - "loss": 1.0221, - "step": 8567 - }, - { - "epoch": 0.7726924290932047, - "grad_norm": 0.7042400842875186, - "learning_rate": 5.178219221956528e-07, - "loss": 0.8141, - "step": 8568 - }, - { - "epoch": 0.7727826126166749, - "grad_norm": 1.4650747701943856, - "learning_rate": 5.174297424703565e-07, - "loss": 0.9912, - "step": 8569 - }, - { - "epoch": 0.7728727961401451, - "grad_norm": 1.4947739000848517, - "learning_rate": 5.170376892461299e-07, - "loss": 0.9101, - "step": 8570 - }, - { - "epoch": 0.7729629796636155, - "grad_norm": 0.7252332772473985, - "learning_rate": 5.16645762556424e-07, - "loss": 0.8922, - "step": 8571 - }, - { - "epoch": 0.7730531631870857, - "grad_norm": 1.4162425734080297, - "learning_rate": 5.162539624346809e-07, - "loss": 1.0216, - "step": 8572 - }, - { - "epoch": 0.773143346710556, - "grad_norm": 1.5571240065872272, - "learning_rate": 5.158622889143309e-07, - "loss": 1.0323, - "step": 8573 - }, - { - "epoch": 0.7732335302340262, - "grad_norm": 1.621559168175733, - "learning_rate": 5.154707420287939e-07, - "loss": 0.933, - "step": 8574 - }, - { - "epoch": 0.7733237137574965, - "grad_norm": 1.4605956468093397, - "learning_rate": 5.150793218114793e-07, - "loss": 0.9, - "step": 8575 - }, - { - "epoch": 0.7734138972809668, - "grad_norm": 1.3193075655604973, - "learning_rate": 5.146880282957837e-07, - "loss": 0.9628, - "step": 8576 - }, - { - "epoch": 0.773504080804437, - "grad_norm": 1.4684826221989962, - "learning_rate": 5.142968615150964e-07, - "loss": 0.9667, - "step": 8577 - }, - { - "epoch": 0.7735942643279073, - "grad_norm": 1.6111857773297977, - "learning_rate": 5.139058215027921e-07, - "loss": 0.9048, - "step": 8578 - }, - { - "epoch": 0.7736844478513776, - "grad_norm": 1.179540763975283, - "learning_rate": 5.135149082922383e-07, - "loss": 0.9611, - "step": 8579 - }, - { - "epoch": 0.7737746313748478, - "grad_norm": 1.5332492311190549, - "learning_rate": 5.131241219167879e-07, - "loss": 0.9096, - "step": 8580 - }, - { - "epoch": 0.773864814898318, - "grad_norm": 0.7790114183507241, - "learning_rate": 5.127334624097869e-07, - "loss": 0.8756, - "step": 8581 - }, - { - "epoch": 0.7739549984217884, - "grad_norm": 1.1860274168965788, - "learning_rate": 5.123429298045672e-07, - "loss": 1.0285, - "step": 8582 - }, - { - "epoch": 0.7740451819452586, - "grad_norm": 2.0659661500415916, - "learning_rate": 5.119525241344515e-07, - "loss": 0.8618, - "step": 8583 - }, - { - "epoch": 0.7741353654687289, - "grad_norm": 1.4881644616147882, - "learning_rate": 5.115622454327515e-07, - "loss": 0.9807, - "step": 8584 - }, - { - "epoch": 0.7742255489921991, - "grad_norm": 1.5748464691171942, - "learning_rate": 5.11172093732768e-07, - "loss": 0.9552, - "step": 8585 - }, - { - "epoch": 0.7743157325156694, - "grad_norm": 2.461404326227536, - "learning_rate": 5.107820690677911e-07, - "loss": 0.9982, - "step": 8586 - }, - { - "epoch": 0.7744059160391397, - "grad_norm": 1.6431327908441142, - "learning_rate": 5.103921714710991e-07, - "loss": 0.9164, - "step": 8587 - }, - { - "epoch": 0.7744960995626099, - "grad_norm": 1.5147897050649237, - "learning_rate": 5.100024009759605e-07, - "loss": 0.9877, - "step": 8588 - }, - { - "epoch": 0.7745862830860801, - "grad_norm": 2.286965825815079, - "learning_rate": 5.09612757615633e-07, - "loss": 1.0001, - "step": 8589 - }, - { - "epoch": 0.7746764666095505, - "grad_norm": 1.359566905189062, - "learning_rate": 5.092232414233628e-07, - "loss": 0.9251, - "step": 8590 - }, - { - "epoch": 0.7747666501330207, - "grad_norm": 1.5016761792326534, - "learning_rate": 5.088338524323858e-07, - "loss": 0.9408, - "step": 8591 - }, - { - "epoch": 0.7748568336564909, - "grad_norm": 1.6106657019058606, - "learning_rate": 5.084445906759271e-07, - "loss": 1.0303, - "step": 8592 - }, - { - "epoch": 0.7749470171799612, - "grad_norm": 2.1488382971321225, - "learning_rate": 5.080554561871995e-07, - "loss": 1.0382, - "step": 8593 - }, - { - "epoch": 0.7750372007034315, - "grad_norm": 1.3269122701649245, - "learning_rate": 5.076664489994078e-07, - "loss": 0.9944, - "step": 8594 - }, - { - "epoch": 0.7751273842269017, - "grad_norm": 2.0940446788608473, - "learning_rate": 5.07277569145742e-07, - "loss": 0.9464, - "step": 8595 - }, - { - "epoch": 0.775217567750372, - "grad_norm": 1.362938999182373, - "learning_rate": 5.068888166593861e-07, - "loss": 0.8871, - "step": 8596 - }, - { - "epoch": 0.7753077512738422, - "grad_norm": 1.563296763406518, - "learning_rate": 5.065001915735087e-07, - "loss": 0.9606, - "step": 8597 - }, - { - "epoch": 0.7753979347973126, - "grad_norm": 1.310935268468401, - "learning_rate": 5.061116939212702e-07, - "loss": 0.9399, - "step": 8598 - }, - { - "epoch": 0.7754881183207828, - "grad_norm": 1.744988213092159, - "learning_rate": 5.05723323735819e-07, - "loss": 0.9654, - "step": 8599 - }, - { - "epoch": 0.775578301844253, - "grad_norm": 0.6652594157874688, - "learning_rate": 5.053350810502932e-07, - "loss": 0.8453, - "step": 8600 - }, - { - "epoch": 0.7756684853677234, - "grad_norm": 1.9403626613243072, - "learning_rate": 5.049469658978202e-07, - "loss": 0.9659, - "step": 8601 - }, - { - "epoch": 0.7757586688911936, - "grad_norm": 0.6249558433193004, - "learning_rate": 5.045589783115147e-07, - "loss": 0.7917, - "step": 8602 - }, - { - "epoch": 0.7758488524146638, - "grad_norm": 2.33001325186043, - "learning_rate": 5.041711183244842e-07, - "loss": 1.0021, - "step": 8603 - }, - { - "epoch": 0.7759390359381341, - "grad_norm": 2.8661274210575134, - "learning_rate": 5.037833859698211e-07, - "loss": 0.9968, - "step": 8604 - }, - { - "epoch": 0.7760292194616044, - "grad_norm": 1.780549765028773, - "learning_rate": 5.033957812806096e-07, - "loss": 1.0411, - "step": 8605 - }, - { - "epoch": 0.7761194029850746, - "grad_norm": 2.5754987807658853, - "learning_rate": 5.030083042899223e-07, - "loss": 1.0213, - "step": 8606 - }, - { - "epoch": 0.7762095865085449, - "grad_norm": 2.2271144249806265, - "learning_rate": 5.026209550308207e-07, - "loss": 0.9723, - "step": 8607 - }, - { - "epoch": 0.7762997700320151, - "grad_norm": 1.721791645116361, - "learning_rate": 5.022337335363558e-07, - "loss": 1.0217, - "step": 8608 - }, - { - "epoch": 0.7763899535554855, - "grad_norm": 7.676779863078146, - "learning_rate": 5.018466398395677e-07, - "loss": 0.9947, - "step": 8609 - }, - { - "epoch": 0.7764801370789557, - "grad_norm": 1.353878748681013, - "learning_rate": 5.01459673973484e-07, - "loss": 0.9048, - "step": 8610 - }, - { - "epoch": 0.7765703206024259, - "grad_norm": 1.9295905702779643, - "learning_rate": 5.01072835971125e-07, - "loss": 1.0321, - "step": 8611 - }, - { - "epoch": 0.7766605041258962, - "grad_norm": 1.4675596567534728, - "learning_rate": 5.006861258654959e-07, - "loss": 0.8752, - "step": 8612 - }, - { - "epoch": 0.7767506876493665, - "grad_norm": 1.4063600285188973, - "learning_rate": 5.002995436895938e-07, - "loss": 1.0009, - "step": 8613 - }, - { - "epoch": 0.7768408711728367, - "grad_norm": 1.6407346416349184, - "learning_rate": 4.999130894764039e-07, - "loss": 0.9277, - "step": 8614 - }, - { - "epoch": 0.776931054696307, - "grad_norm": 1.6298164002160138, - "learning_rate": 4.995267632589006e-07, - "loss": 0.9065, - "step": 8615 - }, - { - "epoch": 0.7770212382197772, - "grad_norm": 0.7823314810694235, - "learning_rate": 4.99140565070048e-07, - "loss": 0.8716, - "step": 8616 - }, - { - "epoch": 0.7771114217432475, - "grad_norm": 1.768848885511705, - "learning_rate": 4.987544949427969e-07, - "loss": 0.9983, - "step": 8617 - }, - { - "epoch": 0.7772016052667178, - "grad_norm": 0.7936196498020578, - "learning_rate": 4.98368552910091e-07, - "loss": 0.8422, - "step": 8618 - }, - { - "epoch": 0.777291788790188, - "grad_norm": 1.714483131950928, - "learning_rate": 4.979827390048596e-07, - "loss": 1.0796, - "step": 8619 - }, - { - "epoch": 0.7773819723136582, - "grad_norm": 1.3261585263014446, - "learning_rate": 4.975970532600231e-07, - "loss": 0.9351, - "step": 8620 - }, - { - "epoch": 0.7774721558371286, - "grad_norm": 1.500336768175143, - "learning_rate": 4.972114957084901e-07, - "loss": 0.9893, - "step": 8621 - }, - { - "epoch": 0.7775623393605988, - "grad_norm": 1.6030398370382033, - "learning_rate": 4.968260663831585e-07, - "loss": 0.9513, - "step": 8622 - }, - { - "epoch": 0.777652522884069, - "grad_norm": 1.1800576444058946, - "learning_rate": 4.964407653169154e-07, - "loss": 1.079, - "step": 8623 - }, - { - "epoch": 0.7777427064075394, - "grad_norm": 1.3254282331316758, - "learning_rate": 4.960555925426366e-07, - "loss": 0.95, - "step": 8624 - }, - { - "epoch": 0.7778328899310096, - "grad_norm": 1.6620418117969766, - "learning_rate": 4.956705480931876e-07, - "loss": 1.0265, - "step": 8625 - }, - { - "epoch": 0.7779230734544799, - "grad_norm": 1.3162091840626768, - "learning_rate": 4.952856320014225e-07, - "loss": 0.9509, - "step": 8626 - }, - { - "epoch": 0.7780132569779501, - "grad_norm": 1.3818399921263393, - "learning_rate": 4.949008443001838e-07, - "loss": 0.9108, - "step": 8627 - }, - { - "epoch": 0.7781034405014204, - "grad_norm": 1.4682619623222695, - "learning_rate": 4.945161850223041e-07, - "loss": 0.9777, - "step": 8628 - }, - { - "epoch": 0.7781936240248907, - "grad_norm": 1.7032561645396256, - "learning_rate": 4.941316542006044e-07, - "loss": 0.9054, - "step": 8629 - }, - { - "epoch": 0.7782838075483609, - "grad_norm": 1.1987650258352687, - "learning_rate": 4.937472518678956e-07, - "loss": 0.9976, - "step": 8630 - }, - { - "epoch": 0.7783739910718311, - "grad_norm": 1.5250984722925955, - "learning_rate": 4.93362978056977e-07, - "loss": 0.9671, - "step": 8631 - }, - { - "epoch": 0.7784641745953015, - "grad_norm": 1.3682264031986267, - "learning_rate": 4.929788328006355e-07, - "loss": 0.9146, - "step": 8632 - }, - { - "epoch": 0.7785543581187717, - "grad_norm": 1.7855814421150835, - "learning_rate": 4.925948161316506e-07, - "loss": 1.0349, - "step": 8633 - }, - { - "epoch": 0.778644541642242, - "grad_norm": 1.7342578057911782, - "learning_rate": 4.922109280827868e-07, - "loss": 0.9636, - "step": 8634 - }, - { - "epoch": 0.7787347251657122, - "grad_norm": 1.5647282543021332, - "learning_rate": 4.918271686868016e-07, - "loss": 0.967, - "step": 8635 - }, - { - "epoch": 0.7788249086891825, - "grad_norm": 1.7861546368566543, - "learning_rate": 4.914435379764379e-07, - "loss": 1.015, - "step": 8636 - }, - { - "epoch": 0.7789150922126528, - "grad_norm": 1.4721138762346686, - "learning_rate": 4.910600359844294e-07, - "loss": 0.9579, - "step": 8637 - }, - { - "epoch": 0.779005275736123, - "grad_norm": 1.5709681873803518, - "learning_rate": 4.90676662743499e-07, - "loss": 0.8886, - "step": 8638 - }, - { - "epoch": 0.7790954592595932, - "grad_norm": 1.5423190910061562, - "learning_rate": 4.902934182863581e-07, - "loss": 0.9598, - "step": 8639 - }, - { - "epoch": 0.7791856427830636, - "grad_norm": 1.659122207840434, - "learning_rate": 4.899103026457069e-07, - "loss": 0.8426, - "step": 8640 - }, - { - "epoch": 0.7792758263065338, - "grad_norm": 0.6207595981957397, - "learning_rate": 4.895273158542361e-07, - "loss": 0.8111, - "step": 8641 - }, - { - "epoch": 0.779366009830004, - "grad_norm": 1.7287920887003037, - "learning_rate": 4.891444579446227e-07, - "loss": 0.9833, - "step": 8642 - }, - { - "epoch": 0.7794561933534743, - "grad_norm": 3.0349622195561676, - "learning_rate": 4.887617289495349e-07, - "loss": 0.9309, - "step": 8643 - }, - { - "epoch": 0.7795463768769446, - "grad_norm": 0.5935391754503369, - "learning_rate": 4.883791289016292e-07, - "loss": 0.7636, - "step": 8644 - }, - { - "epoch": 0.7796365604004148, - "grad_norm": 1.617967279379198, - "learning_rate": 4.879966578335514e-07, - "loss": 0.9831, - "step": 8645 - }, - { - "epoch": 0.7797267439238851, - "grad_norm": 2.134346769936502, - "learning_rate": 4.876143157779358e-07, - "loss": 0.862, - "step": 8646 - }, - { - "epoch": 0.7798169274473554, - "grad_norm": 1.9010179979775688, - "learning_rate": 4.872321027674058e-07, - "loss": 0.8631, - "step": 8647 - }, - { - "epoch": 0.7799071109708257, - "grad_norm": 1.4339201760839462, - "learning_rate": 4.868500188345748e-07, - "loss": 1.0476, - "step": 8648 - }, - { - "epoch": 0.7799972944942959, - "grad_norm": 1.4428189599031156, - "learning_rate": 4.864680640120425e-07, - "loss": 1.0373, - "step": 8649 - }, - { - "epoch": 0.7800874780177661, - "grad_norm": 3.1475021449559715, - "learning_rate": 4.860862383324016e-07, - "loss": 0.9743, - "step": 8650 - }, - { - "epoch": 0.7801776615412365, - "grad_norm": 3.6067311565911444, - "learning_rate": 4.857045418282295e-07, - "loss": 0.9619, - "step": 8651 - }, - { - "epoch": 0.7802678450647067, - "grad_norm": 1.6412884052138295, - "learning_rate": 4.853229745320966e-07, - "loss": 0.9196, - "step": 8652 - }, - { - "epoch": 0.7803580285881769, - "grad_norm": 1.3703446474730256, - "learning_rate": 4.849415364765587e-07, - "loss": 0.9539, - "step": 8653 - }, - { - "epoch": 0.7804482121116472, - "grad_norm": 1.538665410435217, - "learning_rate": 4.845602276941631e-07, - "loss": 1.0341, - "step": 8654 - }, - { - "epoch": 0.7805383956351175, - "grad_norm": 1.4652933066671665, - "learning_rate": 4.841790482174449e-07, - "loss": 1.0023, - "step": 8655 - }, - { - "epoch": 0.7806285791585877, - "grad_norm": 1.6721297453272368, - "learning_rate": 4.837979980789282e-07, - "loss": 0.9606, - "step": 8656 - }, - { - "epoch": 0.780718762682058, - "grad_norm": 1.4793931581698692, - "learning_rate": 4.834170773111273e-07, - "loss": 0.9125, - "step": 8657 - }, - { - "epoch": 0.7808089462055282, - "grad_norm": 1.6340813338302687, - "learning_rate": 4.830362859465431e-07, - "loss": 0.9906, - "step": 8658 - }, - { - "epoch": 0.7808991297289986, - "grad_norm": 1.5633216223586501, - "learning_rate": 4.826556240176675e-07, - "loss": 1.0408, - "step": 8659 - }, - { - "epoch": 0.7809893132524688, - "grad_norm": 1.7419253438594067, - "learning_rate": 4.822750915569807e-07, - "loss": 0.9868, - "step": 8660 - }, - { - "epoch": 0.781079496775939, - "grad_norm": 1.8053122755951534, - "learning_rate": 4.818946885969514e-07, - "loss": 1.0435, - "step": 8661 - }, - { - "epoch": 0.7811696802994093, - "grad_norm": 1.6676594002871843, - "learning_rate": 4.815144151700383e-07, - "loss": 1.0202, - "step": 8662 - }, - { - "epoch": 0.7812598638228796, - "grad_norm": 1.415713868709578, - "learning_rate": 4.811342713086885e-07, - "loss": 0.9664, - "step": 8663 - }, - { - "epoch": 0.7813500473463498, - "grad_norm": 1.6296993675787335, - "learning_rate": 4.807542570453367e-07, - "loss": 1.0092, - "step": 8664 - }, - { - "epoch": 0.7814402308698201, - "grad_norm": 1.4436346263942743, - "learning_rate": 4.803743724124098e-07, - "loss": 0.9492, - "step": 8665 - }, - { - "epoch": 0.7815304143932903, - "grad_norm": 1.4069856203126603, - "learning_rate": 4.799946174423192e-07, - "loss": 0.9768, - "step": 8666 - }, - { - "epoch": 0.7816205979167606, - "grad_norm": 0.621634074367533, - "learning_rate": 4.796149921674706e-07, - "loss": 0.785, - "step": 8667 - }, - { - "epoch": 0.7817107814402309, - "grad_norm": 1.9938398502397154, - "learning_rate": 4.792354966202534e-07, - "loss": 0.9316, - "step": 8668 - }, - { - "epoch": 0.7818009649637011, - "grad_norm": 1.3756831812741181, - "learning_rate": 4.788561308330489e-07, - "loss": 0.7952, - "step": 8669 - }, - { - "epoch": 0.7818911484871713, - "grad_norm": 2.00643065878063, - "learning_rate": 4.784768948382272e-07, - "loss": 0.9169, - "step": 8670 - }, - { - "epoch": 0.7819813320106417, - "grad_norm": 4.883110928380307, - "learning_rate": 4.780977886681461e-07, - "loss": 0.975, - "step": 8671 - }, - { - "epoch": 0.7820715155341119, - "grad_norm": 1.4149999725018287, - "learning_rate": 4.777188123551541e-07, - "loss": 0.9184, - "step": 8672 - }, - { - "epoch": 0.7821616990575821, - "grad_norm": 2.506558398823747, - "learning_rate": 4.773399659315856e-07, - "loss": 0.9957, - "step": 8673 - }, - { - "epoch": 0.7822518825810525, - "grad_norm": 1.7211532437036998, - "learning_rate": 4.769612494297681e-07, - "loss": 0.9536, - "step": 8674 - }, - { - "epoch": 0.7823420661045227, - "grad_norm": 0.6930770944150408, - "learning_rate": 4.765826628820142e-07, - "loss": 0.8354, - "step": 8675 - }, - { - "epoch": 0.782432249627993, - "grad_norm": 1.6423259818272122, - "learning_rate": 4.7620420632062775e-07, - "loss": 0.8835, - "step": 8676 - }, - { - "epoch": 0.7825224331514632, - "grad_norm": 1.631463620752054, - "learning_rate": 4.758258797779002e-07, - "loss": 0.9314, - "step": 8677 - }, - { - "epoch": 0.7826126166749335, - "grad_norm": 0.6710749231613087, - "learning_rate": 4.7544768328611317e-07, - "loss": 0.8431, - "step": 8678 - }, - { - "epoch": 0.7827028001984038, - "grad_norm": 1.5208729603799886, - "learning_rate": 4.750696168775359e-07, - "loss": 0.9519, - "step": 8679 - }, - { - "epoch": 0.782792983721874, - "grad_norm": 0.619820952057513, - "learning_rate": 4.746916805844279e-07, - "loss": 0.8148, - "step": 8680 - }, - { - "epoch": 0.7828831672453442, - "grad_norm": 1.4302798852250374, - "learning_rate": 4.743138744390356e-07, - "loss": 1.0031, - "step": 8681 - }, - { - "epoch": 0.7829733507688146, - "grad_norm": 1.4049316584436253, - "learning_rate": 4.739361984735959e-07, - "loss": 1.0361, - "step": 8682 - }, - { - "epoch": 0.7830635342922848, - "grad_norm": 1.7162946328779634, - "learning_rate": 4.7355865272033455e-07, - "loss": 0.9307, - "step": 8683 - }, - { - "epoch": 0.783153717815755, - "grad_norm": 1.3559082813723802, - "learning_rate": 4.7318123721146563e-07, - "loss": 0.9586, - "step": 8684 - }, - { - "epoch": 0.7832439013392253, - "grad_norm": 1.7544658082645512, - "learning_rate": 4.728039519791924e-07, - "loss": 0.9604, - "step": 8685 - }, - { - "epoch": 0.7833340848626956, - "grad_norm": 0.6851487701947271, - "learning_rate": 4.72426797055707e-07, - "loss": 0.8641, - "step": 8686 - }, - { - "epoch": 0.7834242683861659, - "grad_norm": 1.5548643845423424, - "learning_rate": 4.720497724731904e-07, - "loss": 0.9429, - "step": 8687 - }, - { - "epoch": 0.7835144519096361, - "grad_norm": 1.6793316841289019, - "learning_rate": 4.7167287826381153e-07, - "loss": 1.0151, - "step": 8688 - }, - { - "epoch": 0.7836046354331063, - "grad_norm": 1.2548389234178088, - "learning_rate": 4.712961144597307e-07, - "loss": 0.978, - "step": 8689 - }, - { - "epoch": 0.7836948189565767, - "grad_norm": 1.6799124633599223, - "learning_rate": 4.7091948109309343e-07, - "loss": 1.0144, - "step": 8690 - }, - { - "epoch": 0.7837850024800469, - "grad_norm": 2.6101963783429025, - "learning_rate": 4.705429781960384e-07, - "loss": 0.9866, - "step": 8691 - }, - { - "epoch": 0.7838751860035171, - "grad_norm": 1.6339484101162394, - "learning_rate": 4.7016660580068923e-07, - "loss": 0.9743, - "step": 8692 - }, - { - "epoch": 0.7839653695269874, - "grad_norm": 1.6262514723962427, - "learning_rate": 4.6979036393916093e-07, - "loss": 1.0025, - "step": 8693 - }, - { - "epoch": 0.7840555530504577, - "grad_norm": 1.6204901984982678, - "learning_rate": 4.6941425264355603e-07, - "loss": 1.0439, - "step": 8694 - }, - { - "epoch": 0.7841457365739279, - "grad_norm": 1.3642200199864476, - "learning_rate": 4.6903827194596666e-07, - "loss": 0.983, - "step": 8695 - }, - { - "epoch": 0.7842359200973982, - "grad_norm": 1.4428164812264015, - "learning_rate": 4.686624218784743e-07, - "loss": 1.025, - "step": 8696 - }, - { - "epoch": 0.7843261036208685, - "grad_norm": 1.7320352529325502, - "learning_rate": 4.6828670247314696e-07, - "loss": 0.9634, - "step": 8697 - }, - { - "epoch": 0.7844162871443388, - "grad_norm": 1.5439043902210392, - "learning_rate": 4.679111137620442e-07, - "loss": 0.992, - "step": 8698 - }, - { - "epoch": 0.784506470667809, - "grad_norm": 2.132807693196564, - "learning_rate": 4.67535655777213e-07, - "loss": 0.936, - "step": 8699 - }, - { - "epoch": 0.7845966541912792, - "grad_norm": 1.4469759055229428, - "learning_rate": 4.6716032855068956e-07, - "loss": 0.9834, - "step": 8700 - }, - { - "epoch": 0.7846868377147496, - "grad_norm": 1.76313281771858, - "learning_rate": 4.6678513211449867e-07, - "loss": 1.0216, - "step": 8701 - }, - { - "epoch": 0.7847770212382198, - "grad_norm": 2.5393025915632377, - "learning_rate": 4.6641006650065516e-07, - "loss": 0.938, - "step": 8702 - }, - { - "epoch": 0.78486720476169, - "grad_norm": 1.366453878577004, - "learning_rate": 4.6603513174115973e-07, - "loss": 0.9602, - "step": 8703 - }, - { - "epoch": 0.7849573882851603, - "grad_norm": 1.622135866093345, - "learning_rate": 4.6566032786800625e-07, - "loss": 0.9861, - "step": 8704 - }, - { - "epoch": 0.7850475718086306, - "grad_norm": 1.3892638585111787, - "learning_rate": 4.6528565491317274e-07, - "loss": 0.9501, - "step": 8705 - }, - { - "epoch": 0.7851377553321008, - "grad_norm": 1.5270299564763208, - "learning_rate": 4.649111129086305e-07, - "loss": 1.0424, - "step": 8706 - }, - { - "epoch": 0.7852279388555711, - "grad_norm": 1.404391522991528, - "learning_rate": 4.6453670188633596e-07, - "loss": 0.9898, - "step": 8707 - }, - { - "epoch": 0.7853181223790413, - "grad_norm": 2.213613555339231, - "learning_rate": 4.641624218782365e-07, - "loss": 0.9806, - "step": 8708 - }, - { - "epoch": 0.7854083059025116, - "grad_norm": 1.644452598303836, - "learning_rate": 4.6378827291626765e-07, - "loss": 0.9389, - "step": 8709 - }, - { - "epoch": 0.7854984894259819, - "grad_norm": 1.636538985037735, - "learning_rate": 4.634142550323541e-07, - "loss": 1.0297, - "step": 8710 - }, - { - "epoch": 0.7855886729494521, - "grad_norm": 1.812246107537458, - "learning_rate": 4.6304036825840943e-07, - "loss": 1.024, - "step": 8711 - }, - { - "epoch": 0.7856788564729223, - "grad_norm": 1.379361733915958, - "learning_rate": 4.626666126263341e-07, - "loss": 0.8816, - "step": 8712 - }, - { - "epoch": 0.7857690399963927, - "grad_norm": 1.8516301009959593, - "learning_rate": 4.622929881680213e-07, - "loss": 0.9971, - "step": 8713 - }, - { - "epoch": 0.7858592235198629, - "grad_norm": 1.5844028357594304, - "learning_rate": 4.6191949491534887e-07, - "loss": 0.8865, - "step": 8714 - }, - { - "epoch": 0.7859494070433332, - "grad_norm": 1.5790376951080114, - "learning_rate": 4.6154613290018617e-07, - "loss": 0.9814, - "step": 8715 - }, - { - "epoch": 0.7860395905668034, - "grad_norm": 1.8308329292798136, - "learning_rate": 4.6117290215439043e-07, - "loss": 0.9228, - "step": 8716 - }, - { - "epoch": 0.7861297740902737, - "grad_norm": 1.7586835949256328, - "learning_rate": 4.6079980270980744e-07, - "loss": 0.8457, - "step": 8717 - }, - { - "epoch": 0.786219957613744, - "grad_norm": 1.5151945466375452, - "learning_rate": 4.6042683459827245e-07, - "loss": 0.944, - "step": 8718 - }, - { - "epoch": 0.7863101411372142, - "grad_norm": 0.7594429252605907, - "learning_rate": 4.600539978516098e-07, - "loss": 0.8451, - "step": 8719 - }, - { - "epoch": 0.7864003246606845, - "grad_norm": 1.4108671050396226, - "learning_rate": 4.5968129250163004e-07, - "loss": 0.9352, - "step": 8720 - }, - { - "epoch": 0.7864905081841548, - "grad_norm": 1.7171553410025742, - "learning_rate": 4.5930871858013653e-07, - "loss": 0.9011, - "step": 8721 - }, - { - "epoch": 0.786580691707625, - "grad_norm": 0.6740694581346031, - "learning_rate": 4.589362761189182e-07, - "loss": 0.882, - "step": 8722 - }, - { - "epoch": 0.7866708752310952, - "grad_norm": 1.4387157109314903, - "learning_rate": 4.585639651497539e-07, - "loss": 1.0356, - "step": 8723 - }, - { - "epoch": 0.7867610587545656, - "grad_norm": 2.1800658195078593, - "learning_rate": 4.581917857044115e-07, - "loss": 0.9308, - "step": 8724 - }, - { - "epoch": 0.7868512422780358, - "grad_norm": 1.3469200002848567, - "learning_rate": 4.5781973781464734e-07, - "loss": 1.0088, - "step": 8725 - }, - { - "epoch": 0.7869414258015061, - "grad_norm": 1.5867854515191497, - "learning_rate": 4.574478215122073e-07, - "loss": 1.01, - "step": 8726 - }, - { - "epoch": 0.7870316093249763, - "grad_norm": 2.149709429068275, - "learning_rate": 4.5707603682882357e-07, - "loss": 0.9579, - "step": 8727 - }, - { - "epoch": 0.7871217928484466, - "grad_norm": 2.02593015806841, - "learning_rate": 4.56704383796221e-07, - "loss": 0.8406, - "step": 8728 - }, - { - "epoch": 0.7872119763719169, - "grad_norm": 1.4619737509733737, - "learning_rate": 4.5633286244610956e-07, - "loss": 1.008, - "step": 8729 - }, - { - "epoch": 0.7873021598953871, - "grad_norm": 1.2481831221020232, - "learning_rate": 4.5596147281018993e-07, - "loss": 1.0352, - "step": 8730 - }, - { - "epoch": 0.7873923434188573, - "grad_norm": 1.5758570624948174, - "learning_rate": 4.5559021492015137e-07, - "loss": 1.0768, - "step": 8731 - }, - { - "epoch": 0.7874825269423277, - "grad_norm": 1.414491030004221, - "learning_rate": 4.552190888076712e-07, - "loss": 0.8836, - "step": 8732 - }, - { - "epoch": 0.7875727104657979, - "grad_norm": 1.5315797411826373, - "learning_rate": 4.548480945044164e-07, - "loss": 0.9209, - "step": 8733 - }, - { - "epoch": 0.7876628939892681, - "grad_norm": 1.3884557456552684, - "learning_rate": 4.54477232042042e-07, - "loss": 0.9665, - "step": 8734 - }, - { - "epoch": 0.7877530775127384, - "grad_norm": 1.4326061204146063, - "learning_rate": 4.541065014521921e-07, - "loss": 0.9616, - "step": 8735 - }, - { - "epoch": 0.7878432610362087, - "grad_norm": 2.1279075029677683, - "learning_rate": 4.5373590276649996e-07, - "loss": 0.9214, - "step": 8736 - }, - { - "epoch": 0.787933444559679, - "grad_norm": 1.2018687637968735, - "learning_rate": 4.533654360165862e-07, - "loss": 0.9387, - "step": 8737 - }, - { - "epoch": 0.7880236280831492, - "grad_norm": 2.0829525281649177, - "learning_rate": 4.5299510123406115e-07, - "loss": 1.0295, - "step": 8738 - }, - { - "epoch": 0.7881138116066194, - "grad_norm": 10.645194216739748, - "learning_rate": 4.5262489845052456e-07, - "loss": 0.9369, - "step": 8739 - }, - { - "epoch": 0.7882039951300898, - "grad_norm": 1.5265624250335832, - "learning_rate": 4.5225482769756353e-07, - "loss": 0.94, - "step": 8740 - }, - { - "epoch": 0.78829417865356, - "grad_norm": 2.244323244956114, - "learning_rate": 4.5188488900675545e-07, - "loss": 0.8969, - "step": 8741 - }, - { - "epoch": 0.7883843621770302, - "grad_norm": 1.5885948880978438, - "learning_rate": 4.5151508240966363e-07, - "loss": 0.9942, - "step": 8742 - }, - { - "epoch": 0.7884745457005006, - "grad_norm": 1.3426947775255857, - "learning_rate": 4.511454079378445e-07, - "loss": 1.0173, - "step": 8743 - }, - { - "epoch": 0.7885647292239708, - "grad_norm": 1.6523099133789834, - "learning_rate": 4.507758656228382e-07, - "loss": 0.8688, - "step": 8744 - }, - { - "epoch": 0.788654912747441, - "grad_norm": 1.4208413024112063, - "learning_rate": 4.5040645549617864e-07, - "loss": 1.0274, - "step": 8745 - }, - { - "epoch": 0.7887450962709113, - "grad_norm": 1.7628684341924024, - "learning_rate": 4.5003717758938384e-07, - "loss": 0.884, - "step": 8746 - }, - { - "epoch": 0.7888352797943816, - "grad_norm": 1.4652837880730967, - "learning_rate": 4.4966803193396365e-07, - "loss": 0.9076, - "step": 8747 - }, - { - "epoch": 0.7889254633178518, - "grad_norm": 1.4847599534245302, - "learning_rate": 4.492990185614154e-07, - "loss": 0.9793, - "step": 8748 - }, - { - "epoch": 0.7890156468413221, - "grad_norm": 1.8952054741211555, - "learning_rate": 4.489301375032255e-07, - "loss": 0.964, - "step": 8749 - }, - { - "epoch": 0.7891058303647923, - "grad_norm": 1.4781007373584736, - "learning_rate": 4.4856138879086857e-07, - "loss": 0.9781, - "step": 8750 - }, - { - "epoch": 0.7891960138882627, - "grad_norm": 1.1793933021568697, - "learning_rate": 4.481927724558092e-07, - "loss": 0.9254, - "step": 8751 - }, - { - "epoch": 0.7892861974117329, - "grad_norm": 1.5924863854848845, - "learning_rate": 4.478242885294985e-07, - "loss": 0.9442, - "step": 8752 - }, - { - "epoch": 0.7893763809352031, - "grad_norm": 1.5479109406739713, - "learning_rate": 4.474559370433779e-07, - "loss": 0.9271, - "step": 8753 - }, - { - "epoch": 0.7894665644586734, - "grad_norm": 1.5382027330407264, - "learning_rate": 4.470877180288777e-07, - "loss": 0.9152, - "step": 8754 - }, - { - "epoch": 0.7895567479821437, - "grad_norm": 1.4174750863767085, - "learning_rate": 4.4671963151741574e-07, - "loss": 0.9432, - "step": 8755 - }, - { - "epoch": 0.7896469315056139, - "grad_norm": 1.3621258580682178, - "learning_rate": 4.4635167754039973e-07, - "loss": 0.9507, - "step": 8756 - }, - { - "epoch": 0.7897371150290842, - "grad_norm": 1.2246471441793272, - "learning_rate": 4.459838561292253e-07, - "loss": 0.9951, - "step": 8757 - }, - { - "epoch": 0.7898272985525544, - "grad_norm": 2.2531380492955484, - "learning_rate": 4.456161673152774e-07, - "loss": 0.8931, - "step": 8758 - }, - { - "epoch": 0.7899174820760247, - "grad_norm": 1.2779314748036465, - "learning_rate": 4.4524861112992806e-07, - "loss": 0.9603, - "step": 8759 - }, - { - "epoch": 0.790007665599495, - "grad_norm": 1.4347762134721085, - "learning_rate": 4.448811876045411e-07, - "loss": 0.9654, - "step": 8760 - }, - { - "epoch": 0.7900978491229652, - "grad_norm": 1.713771615257259, - "learning_rate": 4.445138967704647e-07, - "loss": 1.0403, - "step": 8761 - }, - { - "epoch": 0.7901880326464354, - "grad_norm": 1.3974909642563784, - "learning_rate": 4.4414673865904075e-07, - "loss": 0.9424, - "step": 8762 - }, - { - "epoch": 0.7902782161699058, - "grad_norm": 1.3215981718901628, - "learning_rate": 4.437797133015955e-07, - "loss": 0.9807, - "step": 8763 - }, - { - "epoch": 0.790368399693376, - "grad_norm": 1.6336850841791417, - "learning_rate": 4.4341282072944586e-07, - "loss": 0.8891, - "step": 8764 - }, - { - "epoch": 0.7904585832168463, - "grad_norm": 1.3248720161238108, - "learning_rate": 4.430460609738973e-07, - "loss": 0.9938, - "step": 8765 - }, - { - "epoch": 0.7905487667403166, - "grad_norm": 0.7424690917032873, - "learning_rate": 4.4267943406624386e-07, - "loss": 0.8823, - "step": 8766 - }, - { - "epoch": 0.7906389502637868, - "grad_norm": 1.7216199322487913, - "learning_rate": 4.4231294003776853e-07, - "loss": 1.0733, - "step": 8767 - }, - { - "epoch": 0.7907291337872571, - "grad_norm": 4.298656857106239, - "learning_rate": 4.419465789197416e-07, - "loss": 1.0063, - "step": 8768 - }, - { - "epoch": 0.7908193173107273, - "grad_norm": 1.5549229582563475, - "learning_rate": 4.415803507434237e-07, - "loss": 0.9669, - "step": 8769 - }, - { - "epoch": 0.7909095008341976, - "grad_norm": 1.5764733915978073, - "learning_rate": 4.4121425554006307e-07, - "loss": 0.9964, - "step": 8770 - }, - { - "epoch": 0.7909996843576679, - "grad_norm": 1.4857397932703036, - "learning_rate": 4.4084829334089744e-07, - "loss": 0.9542, - "step": 8771 - }, - { - "epoch": 0.7910898678811381, - "grad_norm": 1.6489823873584457, - "learning_rate": 4.404824641771525e-07, - "loss": 0.9181, - "step": 8772 - }, - { - "epoch": 0.7911800514046083, - "grad_norm": 1.319177489574678, - "learning_rate": 4.4011676808004327e-07, - "loss": 1.0445, - "step": 8773 - }, - { - "epoch": 0.7912702349280787, - "grad_norm": 1.555862519464325, - "learning_rate": 4.3975120508077145e-07, - "loss": 0.974, - "step": 8774 - }, - { - "epoch": 0.7913604184515489, - "grad_norm": 1.3968483190527272, - "learning_rate": 4.39385775210531e-07, - "loss": 1.0167, - "step": 8775 - }, - { - "epoch": 0.7914506019750192, - "grad_norm": 1.9275883674403573, - "learning_rate": 4.390204785005003e-07, - "loss": 0.991, - "step": 8776 - }, - { - "epoch": 0.7915407854984894, - "grad_norm": 1.2187377244380033, - "learning_rate": 4.386553149818504e-07, - "loss": 0.9771, - "step": 8777 - }, - { - "epoch": 0.7916309690219597, - "grad_norm": 1.5128025635044482, - "learning_rate": 4.3829028468573793e-07, - "loss": 0.8376, - "step": 8778 - }, - { - "epoch": 0.79172115254543, - "grad_norm": 1.842570428627993, - "learning_rate": 4.3792538764330935e-07, - "loss": 0.9066, - "step": 8779 - }, - { - "epoch": 0.7918113360689002, - "grad_norm": 1.5801586229639786, - "learning_rate": 4.3756062388569994e-07, - "loss": 0.9394, - "step": 8780 - }, - { - "epoch": 0.7919015195923704, - "grad_norm": 0.6429792734724132, - "learning_rate": 4.3719599344403346e-07, - "loss": 0.8403, - "step": 8781 - }, - { - "epoch": 0.7919917031158408, - "grad_norm": 1.2750605437919735, - "learning_rate": 4.3683149634942243e-07, - "loss": 0.9522, - "step": 8782 - }, - { - "epoch": 0.792081886639311, - "grad_norm": 1.8390200445546214, - "learning_rate": 4.364671326329663e-07, - "loss": 0.859, - "step": 8783 - }, - { - "epoch": 0.7921720701627812, - "grad_norm": 1.841189724310445, - "learning_rate": 4.3610290232575673e-07, - "loss": 0.983, - "step": 8784 - }, - { - "epoch": 0.7922622536862515, - "grad_norm": 1.4544615802641043, - "learning_rate": 4.357388054588702e-07, - "loss": 0.9699, - "step": 8785 - }, - { - "epoch": 0.7923524372097218, - "grad_norm": 1.2603322255666876, - "learning_rate": 4.3537484206337405e-07, - "loss": 0.9191, - "step": 8786 - }, - { - "epoch": 0.792442620733192, - "grad_norm": 1.5438670187394747, - "learning_rate": 4.3501101217032366e-07, - "loss": 0.9997, - "step": 8787 - }, - { - "epoch": 0.7925328042566623, - "grad_norm": 2.0014892040593337, - "learning_rate": 4.346473158107629e-07, - "loss": 0.8983, - "step": 8788 - }, - { - "epoch": 0.7926229877801325, - "grad_norm": 1.6800905675544837, - "learning_rate": 4.342837530157244e-07, - "loss": 0.9724, - "step": 8789 - }, - { - "epoch": 0.7927131713036029, - "grad_norm": 1.601510321534738, - "learning_rate": 4.3392032381622987e-07, - "loss": 0.8773, - "step": 8790 - }, - { - "epoch": 0.7928033548270731, - "grad_norm": 1.2446094629647944, - "learning_rate": 4.3355702824328765e-07, - "loss": 0.9366, - "step": 8791 - }, - { - "epoch": 0.7928935383505433, - "grad_norm": 1.3705504990424158, - "learning_rate": 4.3319386632789823e-07, - "loss": 1.0137, - "step": 8792 - }, - { - "epoch": 0.7929837218740137, - "grad_norm": 1.4815070303506472, - "learning_rate": 4.328308381010466e-07, - "loss": 0.9625, - "step": 8793 - }, - { - "epoch": 0.7930739053974839, - "grad_norm": 1.3867726490130454, - "learning_rate": 4.3246794359370933e-07, - "loss": 0.9765, - "step": 8794 - }, - { - "epoch": 0.7931640889209541, - "grad_norm": 1.4784087894597457, - "learning_rate": 4.3210518283685025e-07, - "loss": 0.9543, - "step": 8795 - }, - { - "epoch": 0.7932542724444244, - "grad_norm": 4.163218457419012, - "learning_rate": 4.317425558614225e-07, - "loss": 0.972, - "step": 8796 - }, - { - "epoch": 0.7933444559678947, - "grad_norm": 1.2811211311846917, - "learning_rate": 4.3138006269836744e-07, - "loss": 0.9327, - "step": 8797 - }, - { - "epoch": 0.793434639491365, - "grad_norm": 1.4905348808509398, - "learning_rate": 4.3101770337861376e-07, - "loss": 0.954, - "step": 8798 - }, - { - "epoch": 0.7935248230148352, - "grad_norm": 0.6360721698605817, - "learning_rate": 4.30655477933082e-07, - "loss": 0.8614, - "step": 8799 - }, - { - "epoch": 0.7936150065383054, - "grad_norm": 1.3532601427776023, - "learning_rate": 4.30293386392677e-07, - "loss": 0.9912, - "step": 8800 - }, - { - "epoch": 0.7937051900617758, - "grad_norm": 1.1806244823804037, - "learning_rate": 4.299314287882967e-07, - "loss": 0.9845, - "step": 8801 - }, - { - "epoch": 0.793795373585246, - "grad_norm": 2.1106343218931065, - "learning_rate": 4.2956960515082353e-07, - "loss": 1.0067, - "step": 8802 - }, - { - "epoch": 0.7938855571087162, - "grad_norm": 1.5100421923622642, - "learning_rate": 4.29207915511131e-07, - "loss": 0.9254, - "step": 8803 - }, - { - "epoch": 0.7939757406321865, - "grad_norm": 1.5546713209987697, - "learning_rate": 4.2884635990008024e-07, - "loss": 1.0219, - "step": 8804 - }, - { - "epoch": 0.7940659241556568, - "grad_norm": 1.5417180267576627, - "learning_rate": 4.284849383485214e-07, - "loss": 0.9323, - "step": 8805 - }, - { - "epoch": 0.794156107679127, - "grad_norm": 1.8970080358870953, - "learning_rate": 4.2812365088729296e-07, - "loss": 0.994, - "step": 8806 - }, - { - "epoch": 0.7942462912025973, - "grad_norm": 1.3608793068848573, - "learning_rate": 4.2776249754722227e-07, - "loss": 0.9948, - "step": 8807 - }, - { - "epoch": 0.7943364747260675, - "grad_norm": 4.475760982319052, - "learning_rate": 4.27401478359124e-07, - "loss": 1.032, - "step": 8808 - }, - { - "epoch": 0.7944266582495378, - "grad_norm": 1.3845760931345552, - "learning_rate": 4.2704059335380283e-07, - "loss": 1.0644, - "step": 8809 - }, - { - "epoch": 0.7945168417730081, - "grad_norm": 1.304308396500171, - "learning_rate": 4.266798425620515e-07, - "loss": 1.0718, - "step": 8810 - }, - { - "epoch": 0.7946070252964783, - "grad_norm": 1.3910448533511524, - "learning_rate": 4.263192260146511e-07, - "loss": 0.9512, - "step": 8811 - }, - { - "epoch": 0.7946972088199485, - "grad_norm": 1.4205955366040626, - "learning_rate": 4.2595874374237216e-07, - "loss": 1.0181, - "step": 8812 - }, - { - "epoch": 0.7947873923434189, - "grad_norm": 1.3430918479012535, - "learning_rate": 4.255983957759712e-07, - "loss": 0.9699, - "step": 8813 - }, - { - "epoch": 0.7948775758668891, - "grad_norm": 1.2599799868341541, - "learning_rate": 4.2523818214619745e-07, - "loss": 0.9105, - "step": 8814 - }, - { - "epoch": 0.7949677593903594, - "grad_norm": 1.4119217406325457, - "learning_rate": 4.24878102883784e-07, - "loss": 1.0174, - "step": 8815 - }, - { - "epoch": 0.7950579429138297, - "grad_norm": 1.3753099525669032, - "learning_rate": 4.24518158019457e-07, - "loss": 0.9755, - "step": 8816 - }, - { - "epoch": 0.7951481264372999, - "grad_norm": 1.9556683269710633, - "learning_rate": 4.241583475839274e-07, - "loss": 0.9137, - "step": 8817 - }, - { - "epoch": 0.7952383099607702, - "grad_norm": 1.5156728992561928, - "learning_rate": 4.237986716078965e-07, - "loss": 1.0185, - "step": 8818 - }, - { - "epoch": 0.7953284934842404, - "grad_norm": 1.387324198333628, - "learning_rate": 4.2343913012205433e-07, - "loss": 0.9571, - "step": 8819 - }, - { - "epoch": 0.7954186770077107, - "grad_norm": 1.3855860422565665, - "learning_rate": 4.230797231570784e-07, - "loss": 0.9421, - "step": 8820 - }, - { - "epoch": 0.795508860531181, - "grad_norm": 1.7741101035578086, - "learning_rate": 4.227204507436357e-07, - "loss": 0.9698, - "step": 8821 - }, - { - "epoch": 0.7955990440546512, - "grad_norm": 1.3296932162062245, - "learning_rate": 4.223613129123811e-07, - "loss": 0.894, - "step": 8822 - }, - { - "epoch": 0.7956892275781214, - "grad_norm": 1.2346764691322805, - "learning_rate": 4.220023096939589e-07, - "loss": 0.8812, - "step": 8823 - }, - { - "epoch": 0.7957794111015918, - "grad_norm": 1.61776863013611, - "learning_rate": 4.21643441119e-07, - "loss": 0.9581, - "step": 8824 - }, - { - "epoch": 0.795869594625062, - "grad_norm": 0.6876245732586093, - "learning_rate": 4.212847072181256e-07, - "loss": 0.9097, - "step": 8825 - }, - { - "epoch": 0.7959597781485322, - "grad_norm": 1.613727941857242, - "learning_rate": 4.2092610802194505e-07, - "loss": 0.9417, - "step": 8826 - }, - { - "epoch": 0.7960499616720025, - "grad_norm": 1.3280942576721193, - "learning_rate": 4.2056764356105587e-07, - "loss": 0.8803, - "step": 8827 - }, - { - "epoch": 0.7961401451954728, - "grad_norm": 1.4193365892582273, - "learning_rate": 4.202093138660443e-07, - "loss": 1.0161, - "step": 8828 - }, - { - "epoch": 0.7962303287189431, - "grad_norm": 1.259426952644069, - "learning_rate": 4.198511189674854e-07, - "loss": 0.9998, - "step": 8829 - }, - { - "epoch": 0.7963205122424133, - "grad_norm": 1.5321945858129624, - "learning_rate": 4.1949305889594066e-07, - "loss": 0.9857, - "step": 8830 - }, - { - "epoch": 0.7964106957658835, - "grad_norm": 1.499565458934038, - "learning_rate": 4.191351336819642e-07, - "loss": 0.9665, - "step": 8831 - }, - { - "epoch": 0.7965008792893539, - "grad_norm": 1.5645242929642054, - "learning_rate": 4.187773433560939e-07, - "loss": 0.906, - "step": 8832 - }, - { - "epoch": 0.7965910628128241, - "grad_norm": 1.7390749373081202, - "learning_rate": 4.184196879488604e-07, - "loss": 0.9581, - "step": 8833 - }, - { - "epoch": 0.7966812463362943, - "grad_norm": 1.451466588197204, - "learning_rate": 4.1806216749077936e-07, - "loss": 1.0331, - "step": 8834 - }, - { - "epoch": 0.7967714298597646, - "grad_norm": 1.3115494328562074, - "learning_rate": 4.177047820123569e-07, - "loss": 1.0112, - "step": 8835 - }, - { - "epoch": 0.7968616133832349, - "grad_norm": 1.487060045847529, - "learning_rate": 4.1734753154408733e-07, - "loss": 0.8424, - "step": 8836 - }, - { - "epoch": 0.7969517969067051, - "grad_norm": 1.6406719382701405, - "learning_rate": 4.169904161164528e-07, - "loss": 0.9561, - "step": 8837 - }, - { - "epoch": 0.7970419804301754, - "grad_norm": 1.9932317652177447, - "learning_rate": 4.1663343575992526e-07, - "loss": 1.0798, - "step": 8838 - }, - { - "epoch": 0.7971321639536457, - "grad_norm": 1.5855165871985888, - "learning_rate": 4.1627659050496275e-07, - "loss": 0.9784, - "step": 8839 - }, - { - "epoch": 0.797222347477116, - "grad_norm": 1.4276232382627625, - "learning_rate": 4.1591988038201453e-07, - "loss": 0.8888, - "step": 8840 - }, - { - "epoch": 0.7973125310005862, - "grad_norm": 1.4345408956629107, - "learning_rate": 4.155633054215164e-07, - "loss": 1.0179, - "step": 8841 - }, - { - "epoch": 0.7974027145240564, - "grad_norm": 1.6313003035349163, - "learning_rate": 4.152068656538934e-07, - "loss": 0.8826, - "step": 8842 - }, - { - "epoch": 0.7974928980475268, - "grad_norm": 1.59693478287621, - "learning_rate": 4.148505611095594e-07, - "loss": 1.0337, - "step": 8843 - }, - { - "epoch": 0.797583081570997, - "grad_norm": 1.4946696622638704, - "learning_rate": 4.1449439181891563e-07, - "loss": 0.8501, - "step": 8844 - }, - { - "epoch": 0.7976732650944672, - "grad_norm": 1.5204984043543233, - "learning_rate": 4.14138357812353e-07, - "loss": 0.9858, - "step": 8845 - }, - { - "epoch": 0.7977634486179375, - "grad_norm": 1.2445896841388848, - "learning_rate": 4.137824591202506e-07, - "loss": 1.0256, - "step": 8846 - }, - { - "epoch": 0.7978536321414078, - "grad_norm": 1.4849582530103644, - "learning_rate": 4.134266957729737e-07, - "loss": 0.9542, - "step": 8847 - }, - { - "epoch": 0.797943815664878, - "grad_norm": 1.2778484968757886, - "learning_rate": 4.1307106780088065e-07, - "loss": 1.0638, - "step": 8848 - }, - { - "epoch": 0.7980339991883483, - "grad_norm": 1.2735792209890242, - "learning_rate": 4.1271557523431387e-07, - "loss": 0.9708, - "step": 8849 - }, - { - "epoch": 0.7981241827118185, - "grad_norm": 1.4557927572138065, - "learning_rate": 4.1236021810360634e-07, - "loss": 0.9559, - "step": 8850 - }, - { - "epoch": 0.7982143662352889, - "grad_norm": 2.2163243533774066, - "learning_rate": 4.120049964390793e-07, - "loss": 0.9186, - "step": 8851 - }, - { - "epoch": 0.7983045497587591, - "grad_norm": 1.5898975110161278, - "learning_rate": 4.116499102710418e-07, - "loss": 0.9215, - "step": 8852 - }, - { - "epoch": 0.7983947332822293, - "grad_norm": 1.787062080466235, - "learning_rate": 4.112949596297928e-07, - "loss": 0.9434, - "step": 8853 - }, - { - "epoch": 0.7984849168056996, - "grad_norm": 1.4805344539344936, - "learning_rate": 4.1094014454561664e-07, - "loss": 0.9137, - "step": 8854 - }, - { - "epoch": 0.7985751003291699, - "grad_norm": 1.9579583140369015, - "learning_rate": 4.1058546504879057e-07, - "loss": 0.8704, - "step": 8855 - }, - { - "epoch": 0.7986652838526401, - "grad_norm": 4.019186496315279, - "learning_rate": 4.1023092116957583e-07, - "loss": 1.0274, - "step": 8856 - }, - { - "epoch": 0.7987554673761104, - "grad_norm": 1.4418312004590084, - "learning_rate": 4.098765129382249e-07, - "loss": 1.0045, - "step": 8857 - }, - { - "epoch": 0.7988456508995806, - "grad_norm": 1.8078996220254326, - "learning_rate": 4.0952224038497764e-07, - "loss": 1.0411, - "step": 8858 - }, - { - "epoch": 0.7989358344230509, - "grad_norm": 1.405529388595452, - "learning_rate": 4.091681035400627e-07, - "loss": 0.9589, - "step": 8859 - }, - { - "epoch": 0.7990260179465212, - "grad_norm": 1.4465712555978611, - "learning_rate": 4.088141024336971e-07, - "loss": 0.996, - "step": 8860 - }, - { - "epoch": 0.7991162014699914, - "grad_norm": 1.280972706631284, - "learning_rate": 4.0846023709608636e-07, - "loss": 1.0442, - "step": 8861 - }, - { - "epoch": 0.7992063849934617, - "grad_norm": 1.575366943241915, - "learning_rate": 4.081065075574226e-07, - "loss": 1.039, - "step": 8862 - }, - { - "epoch": 0.799296568516932, - "grad_norm": 1.347915536014293, - "learning_rate": 4.077529138478906e-07, - "loss": 0.8931, - "step": 8863 - }, - { - "epoch": 0.7993867520404022, - "grad_norm": 0.7024167202223772, - "learning_rate": 4.073994559976588e-07, - "loss": 0.8077, - "step": 8864 - }, - { - "epoch": 0.7994769355638724, - "grad_norm": 1.6644558866917067, - "learning_rate": 4.0704613403688716e-07, - "loss": 0.9304, - "step": 8865 - }, - { - "epoch": 0.7995671190873428, - "grad_norm": 1.2773275578490384, - "learning_rate": 4.0669294799572264e-07, - "loss": 1.0224, - "step": 8866 - }, - { - "epoch": 0.799657302610813, - "grad_norm": 2.063083450560013, - "learning_rate": 4.0633989790430113e-07, - "loss": 1.0689, - "step": 8867 - }, - { - "epoch": 0.7997474861342833, - "grad_norm": 1.835337504290132, - "learning_rate": 4.059869837927477e-07, - "loss": 0.9797, - "step": 8868 - }, - { - "epoch": 0.7998376696577535, - "grad_norm": 1.576240925653098, - "learning_rate": 4.056342056911728e-07, - "loss": 0.9569, - "step": 8869 - }, - { - "epoch": 0.7999278531812238, - "grad_norm": 1.4931553602744698, - "learning_rate": 4.052815636296798e-07, - "loss": 0.9879, - "step": 8870 - }, - { - "epoch": 0.8000180367046941, - "grad_norm": 1.355787651958696, - "learning_rate": 4.0492905763835593e-07, - "loss": 1.0001, - "step": 8871 - }, - { - "epoch": 0.8001082202281643, - "grad_norm": 1.3999078362646218, - "learning_rate": 4.0457668774728115e-07, - "loss": 1.0002, - "step": 8872 - }, - { - "epoch": 0.8001984037516345, - "grad_norm": 1.5347020688275221, - "learning_rate": 4.0422445398651985e-07, - "loss": 0.9886, - "step": 8873 - }, - { - "epoch": 0.8002885872751049, - "grad_norm": 1.3532152159043715, - "learning_rate": 4.0387235638612706e-07, - "loss": 0.8889, - "step": 8874 - }, - { - "epoch": 0.8003787707985751, - "grad_norm": 1.5946124865908198, - "learning_rate": 4.0352039497614586e-07, - "loss": 0.977, - "step": 8875 - }, - { - "epoch": 0.8004689543220453, - "grad_norm": 2.2759003293449713, - "learning_rate": 4.031685697866074e-07, - "loss": 0.9518, - "step": 8876 - }, - { - "epoch": 0.8005591378455156, - "grad_norm": 2.3151779134452153, - "learning_rate": 4.0281688084753165e-07, - "loss": 1.0074, - "step": 8877 - }, - { - "epoch": 0.8006493213689859, - "grad_norm": 1.4139530708586392, - "learning_rate": 4.0246532818892675e-07, - "loss": 1.0179, - "step": 8878 - }, - { - "epoch": 0.8007395048924562, - "grad_norm": 1.5683257787011051, - "learning_rate": 4.0211391184078814e-07, - "loss": 1.0035, - "step": 8879 - }, - { - "epoch": 0.8008296884159264, - "grad_norm": 1.3876289067228411, - "learning_rate": 4.0176263183310135e-07, - "loss": 0.9976, - "step": 8880 - }, - { - "epoch": 0.8009198719393966, - "grad_norm": 1.285463800704774, - "learning_rate": 4.0141148819583925e-07, - "loss": 1.0261, - "step": 8881 - }, - { - "epoch": 0.801010055462867, - "grad_norm": 0.6488036994539333, - "learning_rate": 4.010604809589637e-07, - "loss": 0.8393, - "step": 8882 - }, - { - "epoch": 0.8011002389863372, - "grad_norm": 2.7948081621860257, - "learning_rate": 4.0070961015242475e-07, - "loss": 0.9814, - "step": 8883 - }, - { - "epoch": 0.8011904225098074, - "grad_norm": 1.2805841157715991, - "learning_rate": 4.0035887580615933e-07, - "loss": 1.0414, - "step": 8884 - }, - { - "epoch": 0.8012806060332777, - "grad_norm": 1.483018194790513, - "learning_rate": 4.0000827795009594e-07, - "loss": 1.0254, - "step": 8885 - }, - { - "epoch": 0.801370789556748, - "grad_norm": 1.6233361602669814, - "learning_rate": 3.996578166141475e-07, - "loss": 0.9328, - "step": 8886 - }, - { - "epoch": 0.8014609730802182, - "grad_norm": 1.5609714660920266, - "learning_rate": 3.9930749182821955e-07, - "loss": 0.979, - "step": 8887 - }, - { - "epoch": 0.8015511566036885, - "grad_norm": 0.8571982819939156, - "learning_rate": 3.9895730362220116e-07, - "loss": 0.8421, - "step": 8888 - }, - { - "epoch": 0.8016413401271588, - "grad_norm": 1.4571502064271125, - "learning_rate": 3.986072520259749e-07, - "loss": 1.003, - "step": 8889 - }, - { - "epoch": 0.801731523650629, - "grad_norm": 1.5048485912316023, - "learning_rate": 3.9825733706940736e-07, - "loss": 0.9588, - "step": 8890 - }, - { - "epoch": 0.8018217071740993, - "grad_norm": 1.4623170754565349, - "learning_rate": 3.979075587823557e-07, - "loss": 0.9874, - "step": 8891 - }, - { - "epoch": 0.8019118906975695, - "grad_norm": 1.9497411482740028, - "learning_rate": 3.9755791719466504e-07, - "loss": 0.8504, - "step": 8892 - }, - { - "epoch": 0.8020020742210399, - "grad_norm": 1.7676470495453434, - "learning_rate": 3.9720841233616875e-07, - "loss": 0.9267, - "step": 8893 - }, - { - "epoch": 0.8020922577445101, - "grad_norm": 1.4347886762628936, - "learning_rate": 3.968590442366888e-07, - "loss": 0.9434, - "step": 8894 - }, - { - "epoch": 0.8021824412679803, - "grad_norm": 1.4942736996498773, - "learning_rate": 3.9650981292603423e-07, - "loss": 0.9532, - "step": 8895 - }, - { - "epoch": 0.8022726247914506, - "grad_norm": 2.3778710328687507, - "learning_rate": 3.961607184340041e-07, - "loss": 0.9446, - "step": 8896 - }, - { - "epoch": 0.8023628083149209, - "grad_norm": 1.2541177161328492, - "learning_rate": 3.9581176079038505e-07, - "loss": 0.8701, - "step": 8897 - }, - { - "epoch": 0.8024529918383911, - "grad_norm": 1.3179012604660303, - "learning_rate": 3.954629400249516e-07, - "loss": 0.9249, - "step": 8898 - }, - { - "epoch": 0.8025431753618614, - "grad_norm": 1.4753301412654451, - "learning_rate": 3.9511425616746787e-07, - "loss": 1.0414, - "step": 8899 - }, - { - "epoch": 0.8026333588853316, - "grad_norm": 1.4686869343954954, - "learning_rate": 3.947657092476853e-07, - "loss": 1.0259, - "step": 8900 - }, - { - "epoch": 0.802723542408802, - "grad_norm": 1.3319196906036461, - "learning_rate": 3.944172992953425e-07, - "loss": 0.9859, - "step": 8901 - }, - { - "epoch": 0.8028137259322722, - "grad_norm": 1.3064249770226701, - "learning_rate": 3.9406902634017e-07, - "loss": 0.9217, - "step": 8902 - }, - { - "epoch": 0.8029039094557424, - "grad_norm": 1.3022135504141643, - "learning_rate": 3.9372089041188227e-07, - "loss": 1.0081, - "step": 8903 - }, - { - "epoch": 0.8029940929792126, - "grad_norm": 1.5804331301693113, - "learning_rate": 3.9337289154018593e-07, - "loss": 0.8895, - "step": 8904 - }, - { - "epoch": 0.803084276502683, - "grad_norm": 1.79334296682434, - "learning_rate": 3.930250297547728e-07, - "loss": 0.9736, - "step": 8905 - }, - { - "epoch": 0.8031744600261532, - "grad_norm": 1.3462024753296735, - "learning_rate": 3.9267730508532513e-07, - "loss": 0.8437, - "step": 8906 - }, - { - "epoch": 0.8032646435496235, - "grad_norm": 1.4801711133035271, - "learning_rate": 3.923297175615121e-07, - "loss": 0.9638, - "step": 8907 - }, - { - "epoch": 0.8033548270730937, - "grad_norm": 1.3220136168466843, - "learning_rate": 3.9198226721299243e-07, - "loss": 1.0118, - "step": 8908 - }, - { - "epoch": 0.803445010596564, - "grad_norm": 1.630601545043086, - "learning_rate": 3.916349540694128e-07, - "loss": 0.9491, - "step": 8909 - }, - { - "epoch": 0.8035351941200343, - "grad_norm": 1.657389914307646, - "learning_rate": 3.912877781604063e-07, - "loss": 0.9749, - "step": 8910 - }, - { - "epoch": 0.8036253776435045, - "grad_norm": 1.5005948158924027, - "learning_rate": 3.909407395155977e-07, - "loss": 1.0059, - "step": 8911 - }, - { - "epoch": 0.8037155611669748, - "grad_norm": 2.017682348847819, - "learning_rate": 3.9059383816459725e-07, - "loss": 0.9115, - "step": 8912 - }, - { - "epoch": 0.8038057446904451, - "grad_norm": 1.4234735913521892, - "learning_rate": 3.902470741370045e-07, - "loss": 1.0222, - "step": 8913 - }, - { - "epoch": 0.8038959282139153, - "grad_norm": 1.3638904161481384, - "learning_rate": 3.8990044746240746e-07, - "loss": 0.9638, - "step": 8914 - }, - { - "epoch": 0.8039861117373855, - "grad_norm": 1.856149905810238, - "learning_rate": 3.8955395817038237e-07, - "loss": 1.0753, - "step": 8915 - }, - { - "epoch": 0.8040762952608559, - "grad_norm": 1.2575795686548457, - "learning_rate": 3.892076062904934e-07, - "loss": 0.939, - "step": 8916 - }, - { - "epoch": 0.8041664787843261, - "grad_norm": 1.5776348297003933, - "learning_rate": 3.8886139185229384e-07, - "loss": 0.9353, - "step": 8917 - }, - { - "epoch": 0.8042566623077964, - "grad_norm": 1.4412254488889469, - "learning_rate": 3.8851531488532284e-07, - "loss": 0.8872, - "step": 8918 - }, - { - "epoch": 0.8043468458312666, - "grad_norm": 1.3223779086921075, - "learning_rate": 3.88169375419112e-07, - "loss": 0.9892, - "step": 8919 - }, - { - "epoch": 0.8044370293547369, - "grad_norm": 1.713674924668189, - "learning_rate": 3.8782357348317717e-07, - "loss": 0.9464, - "step": 8920 - }, - { - "epoch": 0.8045272128782072, - "grad_norm": 1.7920980229943857, - "learning_rate": 3.8747790910702437e-07, - "loss": 1.0209, - "step": 8921 - }, - { - "epoch": 0.8046173964016774, - "grad_norm": 2.475773827158573, - "learning_rate": 3.8713238232014776e-07, - "loss": 0.9784, - "step": 8922 - }, - { - "epoch": 0.8047075799251476, - "grad_norm": 1.4213826930658382, - "learning_rate": 3.867869931520296e-07, - "loss": 1.0288, - "step": 8923 - }, - { - "epoch": 0.804797763448618, - "grad_norm": 1.3849879679518735, - "learning_rate": 3.864417416321406e-07, - "loss": 0.9837, - "step": 8924 - }, - { - "epoch": 0.8048879469720882, - "grad_norm": 1.4447875848156457, - "learning_rate": 3.8609662778993847e-07, - "loss": 0.9549, - "step": 8925 - }, - { - "epoch": 0.8049781304955584, - "grad_norm": 0.6011499749173576, - "learning_rate": 3.85751651654872e-07, - "loss": 0.8084, - "step": 8926 - }, - { - "epoch": 0.8050683140190287, - "grad_norm": 1.5556134864071463, - "learning_rate": 3.8540681325637505e-07, - "loss": 1.0817, - "step": 8927 - }, - { - "epoch": 0.805158497542499, - "grad_norm": 1.4136118724070768, - "learning_rate": 3.8506211262387155e-07, - "loss": 0.9829, - "step": 8928 - }, - { - "epoch": 0.8052486810659693, - "grad_norm": 2.302855924418714, - "learning_rate": 3.847175497867732e-07, - "loss": 0.9835, - "step": 8929 - }, - { - "epoch": 0.8053388645894395, - "grad_norm": 1.8874508402908856, - "learning_rate": 3.843731247744801e-07, - "loss": 0.9223, - "step": 8930 - }, - { - "epoch": 0.8054290481129097, - "grad_norm": 1.6928601084425083, - "learning_rate": 3.8402883761638047e-07, - "loss": 1.0182, - "step": 8931 - }, - { - "epoch": 0.8055192316363801, - "grad_norm": 0.7062177667604623, - "learning_rate": 3.8368468834185076e-07, - "loss": 0.9111, - "step": 8932 - }, - { - "epoch": 0.8056094151598503, - "grad_norm": 1.4891321348097675, - "learning_rate": 3.8334067698025583e-07, - "loss": 1.0136, - "step": 8933 - }, - { - "epoch": 0.8056995986833205, - "grad_norm": 1.3783218831359727, - "learning_rate": 3.8299680356094897e-07, - "loss": 0.914, - "step": 8934 - }, - { - "epoch": 0.8057897822067909, - "grad_norm": 1.8502625356434323, - "learning_rate": 3.8265306811327024e-07, - "loss": 0.8652, - "step": 8935 - }, - { - "epoch": 0.8058799657302611, - "grad_norm": 1.6614151075075565, - "learning_rate": 3.8230947066654994e-07, - "loss": 0.9398, - "step": 8936 - }, - { - "epoch": 0.8059701492537313, - "grad_norm": 1.7303698233750415, - "learning_rate": 3.819660112501053e-07, - "loss": 1.0326, - "step": 8937 - }, - { - "epoch": 0.8060603327772016, - "grad_norm": 0.7080271751123907, - "learning_rate": 3.816226898932422e-07, - "loss": 0.8649, - "step": 8938 - }, - { - "epoch": 0.8061505163006719, - "grad_norm": 1.5460058525586602, - "learning_rate": 3.812795066252557e-07, - "loss": 0.9646, - "step": 8939 - }, - { - "epoch": 0.8062406998241421, - "grad_norm": 1.7590579897466438, - "learning_rate": 3.8093646147542577e-07, - "loss": 0.9898, - "step": 8940 - }, - { - "epoch": 0.8063308833476124, - "grad_norm": 1.5120143552078513, - "learning_rate": 3.805935544730259e-07, - "loss": 0.9431, - "step": 8941 - }, - { - "epoch": 0.8064210668710826, - "grad_norm": 1.3667653767286136, - "learning_rate": 3.802507856473118e-07, - "loss": 0.9075, - "step": 8942 - }, - { - "epoch": 0.806511250394553, - "grad_norm": 2.467356228026558, - "learning_rate": 3.7990815502753317e-07, - "loss": 0.9757, - "step": 8943 - }, - { - "epoch": 0.8066014339180232, - "grad_norm": 1.2482385622490288, - "learning_rate": 3.795656626429231e-07, - "loss": 0.9736, - "step": 8944 - }, - { - "epoch": 0.8066916174414934, - "grad_norm": 1.8271745346546213, - "learning_rate": 3.792233085227059e-07, - "loss": 0.8928, - "step": 8945 - }, - { - "epoch": 0.8067818009649637, - "grad_norm": 1.7916445693789909, - "learning_rate": 3.788810926960928e-07, - "loss": 0.9824, - "step": 8946 - }, - { - "epoch": 0.806871984488434, - "grad_norm": 1.3962800416643006, - "learning_rate": 3.785390151922836e-07, - "loss": 0.9602, - "step": 8947 - }, - { - "epoch": 0.8069621680119042, - "grad_norm": 1.630669021918737, - "learning_rate": 3.781970760404665e-07, - "loss": 0.9501, - "step": 8948 - }, - { - "epoch": 0.8070523515353745, - "grad_norm": 1.3559352280948733, - "learning_rate": 3.778552752698176e-07, - "loss": 1.0451, - "step": 8949 - }, - { - "epoch": 0.8071425350588447, - "grad_norm": 5.0609697454543525, - "learning_rate": 3.775136129095007e-07, - "loss": 0.8873, - "step": 8950 - }, - { - "epoch": 0.807232718582315, - "grad_norm": 1.439463476922792, - "learning_rate": 3.771720889886685e-07, - "loss": 0.9461, - "step": 8951 - }, - { - "epoch": 0.8073229021057853, - "grad_norm": 0.7653360405413279, - "learning_rate": 3.7683070353646194e-07, - "loss": 0.8851, - "step": 8952 - }, - { - "epoch": 0.8074130856292555, - "grad_norm": 1.5055828310586532, - "learning_rate": 3.7648945658200983e-07, - "loss": 0.9359, - "step": 8953 - }, - { - "epoch": 0.8075032691527257, - "grad_norm": 1.7205646298948325, - "learning_rate": 3.761483481544292e-07, - "loss": 0.8585, - "step": 8954 - }, - { - "epoch": 0.8075934526761961, - "grad_norm": 1.711337595802193, - "learning_rate": 3.7580737828282525e-07, - "loss": 0.9418, - "step": 8955 - }, - { - "epoch": 0.8076836361996663, - "grad_norm": 1.4926498256095064, - "learning_rate": 3.754665469962921e-07, - "loss": 0.9554, - "step": 8956 - }, - { - "epoch": 0.8077738197231366, - "grad_norm": 1.4032412661360996, - "learning_rate": 3.7512585432390973e-07, - "loss": 0.9311, - "step": 8957 - }, - { - "epoch": 0.8078640032466069, - "grad_norm": 1.4655625677740356, - "learning_rate": 3.7478530029474987e-07, - "loss": 0.9118, - "step": 8958 - }, - { - "epoch": 0.8079541867700771, - "grad_norm": 0.6196580042819267, - "learning_rate": 3.7444488493786854e-07, - "loss": 0.8784, - "step": 8959 - }, - { - "epoch": 0.8080443702935474, - "grad_norm": 1.9942587463485513, - "learning_rate": 3.7410460828231405e-07, - "loss": 1.0302, - "step": 8960 - }, - { - "epoch": 0.8081345538170176, - "grad_norm": 1.262958779400082, - "learning_rate": 3.737644703571188e-07, - "loss": 0.9046, - "step": 8961 - }, - { - "epoch": 0.8082247373404879, - "grad_norm": 2.26208714464529, - "learning_rate": 3.734244711913059e-07, - "loss": 1.0486, - "step": 8962 - }, - { - "epoch": 0.8083149208639582, - "grad_norm": 1.3880511692685145, - "learning_rate": 3.7308461081388584e-07, - "loss": 0.9413, - "step": 8963 - }, - { - "epoch": 0.8084051043874284, - "grad_norm": 2.2846608203597563, - "learning_rate": 3.727448892538576e-07, - "loss": 0.9013, - "step": 8964 - }, - { - "epoch": 0.8084952879108986, - "grad_norm": 1.3415981628668119, - "learning_rate": 3.724053065402086e-07, - "loss": 1.051, - "step": 8965 - }, - { - "epoch": 0.808585471434369, - "grad_norm": 3.1928886789951005, - "learning_rate": 3.7206586270191285e-07, - "loss": 0.9388, - "step": 8966 - }, - { - "epoch": 0.8086756549578392, - "grad_norm": 1.4017516349836012, - "learning_rate": 3.7172655776793385e-07, - "loss": 1.0183, - "step": 8967 - }, - { - "epoch": 0.8087658384813095, - "grad_norm": 1.4486204490632928, - "learning_rate": 3.7138739176722323e-07, - "loss": 0.8645, - "step": 8968 - }, - { - "epoch": 0.8088560220047797, - "grad_norm": 1.5112617058060027, - "learning_rate": 3.710483647287206e-07, - "loss": 0.944, - "step": 8969 - }, - { - "epoch": 0.80894620552825, - "grad_norm": 1.3807808126630412, - "learning_rate": 3.707094766813532e-07, - "loss": 0.875, - "step": 8970 - }, - { - "epoch": 0.8090363890517203, - "grad_norm": 1.5721695580365749, - "learning_rate": 3.7037072765403754e-07, - "loss": 0.9426, - "step": 8971 - }, - { - "epoch": 0.8091265725751905, - "grad_norm": 1.6538666184785062, - "learning_rate": 3.700321176756762e-07, - "loss": 0.928, - "step": 8972 - }, - { - "epoch": 0.8092167560986607, - "grad_norm": 1.4573759024185617, - "learning_rate": 3.69693646775163e-07, - "loss": 1.0209, - "step": 8973 - }, - { - "epoch": 0.8093069396221311, - "grad_norm": 1.9252667155593401, - "learning_rate": 3.693553149813764e-07, - "loss": 0.9482, - "step": 8974 - }, - { - "epoch": 0.8093971231456013, - "grad_norm": 0.5830598996407327, - "learning_rate": 3.690171223231866e-07, - "loss": 0.805, - "step": 8975 - }, - { - "epoch": 0.8094873066690715, - "grad_norm": 1.229864353515747, - "learning_rate": 3.6867906882944854e-07, - "loss": 1.066, - "step": 8976 - }, - { - "epoch": 0.8095774901925418, - "grad_norm": 1.2967115552008308, - "learning_rate": 3.6834115452900737e-07, - "loss": 0.9738, - "step": 8977 - }, - { - "epoch": 0.8096676737160121, - "grad_norm": 1.4753694912035902, - "learning_rate": 3.680033794506958e-07, - "loss": 0.8708, - "step": 8978 - }, - { - "epoch": 0.8097578572394823, - "grad_norm": 2.554511761234885, - "learning_rate": 3.676657436233346e-07, - "loss": 0.8043, - "step": 8979 - }, - { - "epoch": 0.8098480407629526, - "grad_norm": 1.5023943546875669, - "learning_rate": 3.6732824707573305e-07, - "loss": 0.9658, - "step": 8980 - }, - { - "epoch": 0.8099382242864229, - "grad_norm": 1.520351080880305, - "learning_rate": 3.6699088983668716e-07, - "loss": 0.9983, - "step": 8981 - }, - { - "epoch": 0.8100284078098932, - "grad_norm": 1.9467982244890658, - "learning_rate": 3.6665367193498376e-07, - "loss": 0.9236, - "step": 8982 - }, - { - "epoch": 0.8101185913333634, - "grad_norm": 1.7973547253642121, - "learning_rate": 3.663165933993948e-07, - "loss": 0.9679, - "step": 8983 - }, - { - "epoch": 0.8102087748568336, - "grad_norm": 2.1910264065090077, - "learning_rate": 3.659796542586822e-07, - "loss": 0.9909, - "step": 8984 - }, - { - "epoch": 0.810298958380304, - "grad_norm": 1.830764560348284, - "learning_rate": 3.6564285454159526e-07, - "loss": 0.93, - "step": 8985 - }, - { - "epoch": 0.8103891419037742, - "grad_norm": 1.8947513413153347, - "learning_rate": 3.653061942768718e-07, - "loss": 1.0077, - "step": 8986 - }, - { - "epoch": 0.8104793254272444, - "grad_norm": 1.620772952775009, - "learning_rate": 3.649696734932375e-07, - "loss": 0.9311, - "step": 8987 - }, - { - "epoch": 0.8105695089507147, - "grad_norm": 1.5549119950196086, - "learning_rate": 3.646332922194064e-07, - "loss": 0.9033, - "step": 8988 - }, - { - "epoch": 0.810659692474185, - "grad_norm": 1.452031698453656, - "learning_rate": 3.6429705048407943e-07, - "loss": 0.9566, - "step": 8989 - }, - { - "epoch": 0.8107498759976552, - "grad_norm": 3.247795384271192, - "learning_rate": 3.6396094831594804e-07, - "loss": 0.9619, - "step": 8990 - }, - { - "epoch": 0.8108400595211255, - "grad_norm": 2.0331223294990797, - "learning_rate": 3.6362498574368926e-07, - "loss": 0.9229, - "step": 8991 - }, - { - "epoch": 0.8109302430445957, - "grad_norm": 0.7030910271808427, - "learning_rate": 3.6328916279596935e-07, - "loss": 0.8912, - "step": 8992 - }, - { - "epoch": 0.811020426568066, - "grad_norm": 2.0609002122260596, - "learning_rate": 3.6295347950144305e-07, - "loss": 1.0165, - "step": 8993 - }, - { - "epoch": 0.8111106100915363, - "grad_norm": 1.841963985948424, - "learning_rate": 3.626179358887522e-07, - "loss": 0.8279, - "step": 8994 - }, - { - "epoch": 0.8112007936150065, - "grad_norm": 1.3782936442810065, - "learning_rate": 3.6228253198652816e-07, - "loss": 1.0299, - "step": 8995 - }, - { - "epoch": 0.8112909771384768, - "grad_norm": 2.086079110471428, - "learning_rate": 3.6194726782338767e-07, - "loss": 0.92, - "step": 8996 - }, - { - "epoch": 0.8113811606619471, - "grad_norm": 1.6362534910999704, - "learning_rate": 3.6161214342793953e-07, - "loss": 0.9413, - "step": 8997 - }, - { - "epoch": 0.8114713441854173, - "grad_norm": 1.4224791867521873, - "learning_rate": 3.612771588287764e-07, - "loss": 1.0001, - "step": 8998 - }, - { - "epoch": 0.8115615277088876, - "grad_norm": 1.7192857080844823, - "learning_rate": 3.609423140544827e-07, - "loss": 1.0323, - "step": 8999 - }, - { - "epoch": 0.8116517112323578, - "grad_norm": 2.23061518681132, - "learning_rate": 3.6060760913362787e-07, - "loss": 0.9848, - "step": 9000 - }, - { - "epoch": 0.8117418947558281, - "grad_norm": 1.3534854593810006, - "learning_rate": 3.6027304409477146e-07, - "loss": 0.9723, - "step": 9001 - }, - { - "epoch": 0.8118320782792984, - "grad_norm": 1.3247295733459163, - "learning_rate": 3.599386189664604e-07, - "loss": 0.945, - "step": 9002 - }, - { - "epoch": 0.8119222618027686, - "grad_norm": 6.020523573707066, - "learning_rate": 3.5960433377722945e-07, - "loss": 0.9596, - "step": 9003 - }, - { - "epoch": 0.8120124453262388, - "grad_norm": 1.8128069913609794, - "learning_rate": 3.5927018855560174e-07, - "loss": 1.0202, - "step": 9004 - }, - { - "epoch": 0.8121026288497092, - "grad_norm": 1.3666433068736905, - "learning_rate": 3.5893618333008904e-07, - "loss": 0.9647, - "step": 9005 - }, - { - "epoch": 0.8121928123731794, - "grad_norm": 1.6481474851796647, - "learning_rate": 3.586023181291893e-07, - "loss": 1.0579, - "step": 9006 - }, - { - "epoch": 0.8122829958966497, - "grad_norm": 0.7863045262046968, - "learning_rate": 3.5826859298139044e-07, - "loss": 0.8926, - "step": 9007 - }, - { - "epoch": 0.81237317942012, - "grad_norm": 1.383976807171197, - "learning_rate": 3.5793500791516773e-07, - "loss": 0.9575, - "step": 9008 - }, - { - "epoch": 0.8124633629435902, - "grad_norm": 1.4774122265661103, - "learning_rate": 3.5760156295898415e-07, - "loss": 0.9414, - "step": 9009 - }, - { - "epoch": 0.8125535464670605, - "grad_norm": 1.3760505910849203, - "learning_rate": 3.5726825814129203e-07, - "loss": 0.9736, - "step": 9010 - }, - { - "epoch": 0.8126437299905307, - "grad_norm": 0.7284820794367043, - "learning_rate": 3.5693509349052886e-07, - "loss": 0.8928, - "step": 9011 - }, - { - "epoch": 0.812733913514001, - "grad_norm": 1.2561204321549784, - "learning_rate": 3.5660206903512433e-07, - "loss": 1.0434, - "step": 9012 - }, - { - "epoch": 0.8128240970374713, - "grad_norm": 1.7777309121802478, - "learning_rate": 3.56269184803492e-07, - "loss": 1.0207, - "step": 9013 - }, - { - "epoch": 0.8129142805609415, - "grad_norm": 1.2165275753328015, - "learning_rate": 3.5593644082403727e-07, - "loss": 0.9902, - "step": 9014 - }, - { - "epoch": 0.8130044640844117, - "grad_norm": 1.8262565389172767, - "learning_rate": 3.5560383712514994e-07, - "loss": 1.0785, - "step": 9015 - }, - { - "epoch": 0.8130946476078821, - "grad_norm": 1.4265836783066521, - "learning_rate": 3.5527137373521066e-07, - "loss": 0.9721, - "step": 9016 - }, - { - "epoch": 0.8131848311313523, - "grad_norm": 1.435611811615913, - "learning_rate": 3.5493905068258645e-07, - "loss": 0.93, - "step": 9017 - }, - { - "epoch": 0.8132750146548225, - "grad_norm": 1.363562087723781, - "learning_rate": 3.546068679956333e-07, - "loss": 0.9884, - "step": 9018 - }, - { - "epoch": 0.8133651981782928, - "grad_norm": 1.3102596781241655, - "learning_rate": 3.5427482570269487e-07, - "loss": 1.0308, - "step": 9019 - }, - { - "epoch": 0.8134553817017631, - "grad_norm": 2.024537836717562, - "learning_rate": 3.539429238321026e-07, - "loss": 0.8171, - "step": 9020 - }, - { - "epoch": 0.8135455652252334, - "grad_norm": 1.3808382672068442, - "learning_rate": 3.536111624121769e-07, - "loss": 0.8227, - "step": 9021 - }, - { - "epoch": 0.8136357487487036, - "grad_norm": 1.4364852640874635, - "learning_rate": 3.532795414712244e-07, - "loss": 0.9278, - "step": 9022 - }, - { - "epoch": 0.8137259322721738, - "grad_norm": 1.260623562858795, - "learning_rate": 3.5294806103754124e-07, - "loss": 0.9873, - "step": 9023 - }, - { - "epoch": 0.8138161157956442, - "grad_norm": 1.5753430310850964, - "learning_rate": 3.526167211394115e-07, - "loss": 0.9828, - "step": 9024 - }, - { - "epoch": 0.8139062993191144, - "grad_norm": 1.332705374702452, - "learning_rate": 3.522855218051066e-07, - "loss": 0.9039, - "step": 9025 - }, - { - "epoch": 0.8139964828425846, - "grad_norm": 2.1371755164064665, - "learning_rate": 3.5195446306288633e-07, - "loss": 0.9715, - "step": 9026 - }, - { - "epoch": 0.8140866663660549, - "grad_norm": 1.295994885096052, - "learning_rate": 3.51623544940999e-07, - "loss": 0.9189, - "step": 9027 - }, - { - "epoch": 0.8141768498895252, - "grad_norm": 1.3389211876105274, - "learning_rate": 3.5129276746767886e-07, - "loss": 0.9524, - "step": 9028 - }, - { - "epoch": 0.8142670334129954, - "grad_norm": 1.831135284684518, - "learning_rate": 3.5096213067115165e-07, - "loss": 0.943, - "step": 9029 - }, - { - "epoch": 0.8143572169364657, - "grad_norm": 1.7322159121112082, - "learning_rate": 3.506316345796272e-07, - "loss": 0.9837, - "step": 9030 - }, - { - "epoch": 0.814447400459936, - "grad_norm": 0.5900367017014977, - "learning_rate": 3.5030127922130714e-07, - "loss": 0.7842, - "step": 9031 - }, - { - "epoch": 0.8145375839834063, - "grad_norm": 1.4094673227999408, - "learning_rate": 3.4997106462437784e-07, - "loss": 0.8592, - "step": 9032 - }, - { - "epoch": 0.8146277675068765, - "grad_norm": 0.6867904686393056, - "learning_rate": 3.496409908170157e-07, - "loss": 0.8171, - "step": 9033 - }, - { - "epoch": 0.8147179510303467, - "grad_norm": 1.6747298065439564, - "learning_rate": 3.493110578273839e-07, - "loss": 0.9441, - "step": 9034 - }, - { - "epoch": 0.8148081345538171, - "grad_norm": 1.4900630987491048, - "learning_rate": 3.489812656836346e-07, - "loss": 1.0283, - "step": 9035 - }, - { - "epoch": 0.8148983180772873, - "grad_norm": 1.3115163932150966, - "learning_rate": 3.486516144139078e-07, - "loss": 0.8906, - "step": 9036 - }, - { - "epoch": 0.8149885016007575, - "grad_norm": 1.4296541470665587, - "learning_rate": 3.4832210404632957e-07, - "loss": 0.9328, - "step": 9037 - }, - { - "epoch": 0.8150786851242278, - "grad_norm": 1.3104144283855752, - "learning_rate": 3.479927346090179e-07, - "loss": 0.9775, - "step": 9038 - }, - { - "epoch": 0.8151688686476981, - "grad_norm": 1.5153743890642941, - "learning_rate": 3.4766350613007455e-07, - "loss": 0.95, - "step": 9039 - }, - { - "epoch": 0.8152590521711683, - "grad_norm": 1.431665938128877, - "learning_rate": 3.4733441863759173e-07, - "loss": 0.9553, - "step": 9040 - }, - { - "epoch": 0.8153492356946386, - "grad_norm": 2.0139570801981175, - "learning_rate": 3.4700547215964916e-07, - "loss": 0.8591, - "step": 9041 - }, - { - "epoch": 0.8154394192181088, - "grad_norm": 1.2225767359608988, - "learning_rate": 3.46676666724314e-07, - "loss": 0.9452, - "step": 9042 - }, - { - "epoch": 0.8155296027415792, - "grad_norm": 2.525937097226835, - "learning_rate": 3.463480023596421e-07, - "loss": 0.9635, - "step": 9043 - }, - { - "epoch": 0.8156197862650494, - "grad_norm": 1.6306454820907392, - "learning_rate": 3.460194790936772e-07, - "loss": 1.0439, - "step": 9044 - }, - { - "epoch": 0.8157099697885196, - "grad_norm": 1.5291828293253689, - "learning_rate": 3.456910969544495e-07, - "loss": 0.9624, - "step": 9045 - }, - { - "epoch": 0.8158001533119899, - "grad_norm": 1.7059726531597326, - "learning_rate": 3.4536285596997994e-07, - "loss": 0.995, - "step": 9046 - }, - { - "epoch": 0.8158903368354602, - "grad_norm": 1.6154995277425346, - "learning_rate": 3.450347561682747e-07, - "loss": 0.9427, - "step": 9047 - }, - { - "epoch": 0.8159805203589304, - "grad_norm": 1.7142985874214933, - "learning_rate": 3.4470679757732945e-07, - "loss": 0.9134, - "step": 9048 - }, - { - "epoch": 0.8160707038824007, - "grad_norm": 1.3322876714189686, - "learning_rate": 3.4437898022512735e-07, - "loss": 1.0553, - "step": 9049 - }, - { - "epoch": 0.8161608874058709, - "grad_norm": 1.863544693402952, - "learning_rate": 3.4405130413963977e-07, - "loss": 0.973, - "step": 9050 - }, - { - "epoch": 0.8162510709293412, - "grad_norm": 1.247387779172673, - "learning_rate": 3.437237693488262e-07, - "loss": 1.0375, - "step": 9051 - }, - { - "epoch": 0.8163412544528115, - "grad_norm": 1.2277622065604374, - "learning_rate": 3.433963758806322e-07, - "loss": 0.9838, - "step": 9052 - }, - { - "epoch": 0.8164314379762817, - "grad_norm": 1.4704926886093035, - "learning_rate": 3.430691237629948e-07, - "loss": 0.8351, - "step": 9053 - }, - { - "epoch": 0.816521621499752, - "grad_norm": 1.6590648698960488, - "learning_rate": 3.427420130238354e-07, - "loss": 0.913, - "step": 9054 - }, - { - "epoch": 0.8166118050232223, - "grad_norm": 1.3393259187749151, - "learning_rate": 3.424150436910658e-07, - "loss": 1.0518, - "step": 9055 - }, - { - "epoch": 0.8167019885466925, - "grad_norm": 1.7949833740059442, - "learning_rate": 3.420882157925842e-07, - "loss": 1.0292, - "step": 9056 - }, - { - "epoch": 0.8167921720701627, - "grad_norm": 1.8926196334900849, - "learning_rate": 3.417615293562777e-07, - "loss": 0.9491, - "step": 9057 - }, - { - "epoch": 0.8168823555936331, - "grad_norm": 1.4966701263835203, - "learning_rate": 3.4143498441002105e-07, - "loss": 0.8828, - "step": 9058 - }, - { - "epoch": 0.8169725391171033, - "grad_norm": 1.2900811534313101, - "learning_rate": 3.411085809816767e-07, - "loss": 0.9763, - "step": 9059 - }, - { - "epoch": 0.8170627226405736, - "grad_norm": 1.875902149605141, - "learning_rate": 3.407823190990953e-07, - "loss": 1.0858, - "step": 9060 - }, - { - "epoch": 0.8171529061640438, - "grad_norm": 1.6594413384864455, - "learning_rate": 3.4045619879011577e-07, - "loss": 1.045, - "step": 9061 - }, - { - "epoch": 0.8172430896875141, - "grad_norm": 1.4857312080315608, - "learning_rate": 3.4013022008256334e-07, - "loss": 0.8961, - "step": 9062 - }, - { - "epoch": 0.8173332732109844, - "grad_norm": 1.6613892049568908, - "learning_rate": 3.398043830042532e-07, - "loss": 0.9016, - "step": 9063 - }, - { - "epoch": 0.8174234567344546, - "grad_norm": 5.1153684222007545, - "learning_rate": 3.394786875829871e-07, - "loss": 0.9634, - "step": 9064 - }, - { - "epoch": 0.8175136402579248, - "grad_norm": 1.4108341943851253, - "learning_rate": 3.3915313384655564e-07, - "loss": 0.8524, - "step": 9065 - }, - { - "epoch": 0.8176038237813952, - "grad_norm": 1.7826198196266314, - "learning_rate": 3.388277218227369e-07, - "loss": 0.9854, - "step": 9066 - }, - { - "epoch": 0.8176940073048654, - "grad_norm": 1.7706916153990457, - "learning_rate": 3.3850245153929557e-07, - "loss": 0.9894, - "step": 9067 - }, - { - "epoch": 0.8177841908283356, - "grad_norm": 1.4343733577677316, - "learning_rate": 3.381773230239875e-07, - "loss": 0.9232, - "step": 9068 - }, - { - "epoch": 0.8178743743518059, - "grad_norm": 1.7244266054589699, - "learning_rate": 3.3785233630455247e-07, - "loss": 1.0024, - "step": 9069 - }, - { - "epoch": 0.8179645578752762, - "grad_norm": 1.7260505987598886, - "learning_rate": 3.375274914087221e-07, - "loss": 0.9814, - "step": 9070 - }, - { - "epoch": 0.8180547413987465, - "grad_norm": 1.3976614306435435, - "learning_rate": 3.3720278836421234e-07, - "loss": 0.9433, - "step": 9071 - }, - { - "epoch": 0.8181449249222167, - "grad_norm": 1.5166364617885733, - "learning_rate": 3.368782271987294e-07, - "loss": 0.9884, - "step": 9072 - }, - { - "epoch": 0.8182351084456869, - "grad_norm": 1.6447102863777996, - "learning_rate": 3.3655380793996636e-07, - "loss": 0.9781, - "step": 9073 - }, - { - "epoch": 0.8183252919691573, - "grad_norm": 1.304945000918218, - "learning_rate": 3.362295306156047e-07, - "loss": 0.8926, - "step": 9074 - }, - { - "epoch": 0.8184154754926275, - "grad_norm": 1.380270955326563, - "learning_rate": 3.3590539525331327e-07, - "loss": 0.9813, - "step": 9075 - }, - { - "epoch": 0.8185056590160977, - "grad_norm": 1.6230553214797894, - "learning_rate": 3.3558140188074967e-07, - "loss": 0.925, - "step": 9076 - }, - { - "epoch": 0.8185958425395681, - "grad_norm": 2.0918803051464416, - "learning_rate": 3.3525755052555817e-07, - "loss": 0.9956, - "step": 9077 - }, - { - "epoch": 0.8186860260630383, - "grad_norm": 0.7514816986236201, - "learning_rate": 3.3493384121537147e-07, - "loss": 0.9164, - "step": 9078 - }, - { - "epoch": 0.8187762095865085, - "grad_norm": 1.823229699278502, - "learning_rate": 3.3461027397781075e-07, - "loss": 0.9711, - "step": 9079 - }, - { - "epoch": 0.8188663931099788, - "grad_norm": 1.4406249867602698, - "learning_rate": 3.3428684884048397e-07, - "loss": 0.9563, - "step": 9080 - }, - { - "epoch": 0.8189565766334491, - "grad_norm": 1.2396863793035566, - "learning_rate": 3.3396356583098826e-07, - "loss": 0.9692, - "step": 9081 - }, - { - "epoch": 0.8190467601569194, - "grad_norm": 1.4334193651122085, - "learning_rate": 3.3364042497690736e-07, - "loss": 1.0001, - "step": 9082 - }, - { - "epoch": 0.8191369436803896, - "grad_norm": 1.2430590565002886, - "learning_rate": 3.3331742630581405e-07, - "loss": 1.0227, - "step": 9083 - }, - { - "epoch": 0.8192271272038598, - "grad_norm": 1.4097542652106871, - "learning_rate": 3.3299456984526717e-07, - "loss": 0.9163, - "step": 9084 - }, - { - "epoch": 0.8193173107273302, - "grad_norm": 1.2772010466863142, - "learning_rate": 3.3267185562281605e-07, - "loss": 1.0097, - "step": 9085 - }, - { - "epoch": 0.8194074942508004, - "grad_norm": 1.5456117037561024, - "learning_rate": 3.3234928366599514e-07, - "loss": 0.9632, - "step": 9086 - }, - { - "epoch": 0.8194976777742706, - "grad_norm": 1.9885152443005802, - "learning_rate": 3.3202685400232946e-07, - "loss": 1.0081, - "step": 9087 - }, - { - "epoch": 0.8195878612977409, - "grad_norm": 1.8054311301893888, - "learning_rate": 3.317045666593292e-07, - "loss": 0.997, - "step": 9088 - }, - { - "epoch": 0.8196780448212112, - "grad_norm": 2.21106290967065, - "learning_rate": 3.3138242166449426e-07, - "loss": 0.9704, - "step": 9089 - }, - { - "epoch": 0.8197682283446814, - "grad_norm": 1.2875081256498935, - "learning_rate": 3.310604190453117e-07, - "loss": 1.0322, - "step": 9090 - }, - { - "epoch": 0.8198584118681517, - "grad_norm": 1.3273922413482997, - "learning_rate": 3.307385588292566e-07, - "loss": 0.9203, - "step": 9091 - }, - { - "epoch": 0.8199485953916219, - "grad_norm": 1.8247438773624647, - "learning_rate": 3.304168410437924e-07, - "loss": 0.96, - "step": 9092 - }, - { - "epoch": 0.8200387789150922, - "grad_norm": 0.6605688983622876, - "learning_rate": 3.300952657163687e-07, - "loss": 0.8747, - "step": 9093 - }, - { - "epoch": 0.8201289624385625, - "grad_norm": 1.4294422543828458, - "learning_rate": 3.297738328744248e-07, - "loss": 0.9494, - "step": 9094 - }, - { - "epoch": 0.8202191459620327, - "grad_norm": 1.5211681189085862, - "learning_rate": 3.2945254254538714e-07, - "loss": 0.9584, - "step": 9095 - }, - { - "epoch": 0.820309329485503, - "grad_norm": 1.2761191000766998, - "learning_rate": 3.2913139475666963e-07, - "loss": 0.9376, - "step": 9096 - }, - { - "epoch": 0.8203995130089733, - "grad_norm": 1.9098296403055492, - "learning_rate": 3.288103895356749e-07, - "loss": 0.9248, - "step": 9097 - }, - { - "epoch": 0.8204896965324435, - "grad_norm": 3.5216344038687093, - "learning_rate": 3.284895269097927e-07, - "loss": 0.9039, - "step": 9098 - }, - { - "epoch": 0.8205798800559138, - "grad_norm": 0.7291870295996443, - "learning_rate": 3.281688069063999e-07, - "loss": 0.8493, - "step": 9099 - }, - { - "epoch": 0.8206700635793841, - "grad_norm": 1.7792584847455246, - "learning_rate": 3.2784822955286396e-07, - "loss": 0.7952, - "step": 9100 - }, - { - "epoch": 0.8207602471028543, - "grad_norm": 1.3199533278710553, - "learning_rate": 3.275277948765365e-07, - "loss": 1.0359, - "step": 9101 - }, - { - "epoch": 0.8208504306263246, - "grad_norm": 1.3952830876478826, - "learning_rate": 3.2720750290475964e-07, - "loss": 1.0199, - "step": 9102 - }, - { - "epoch": 0.8209406141497948, - "grad_norm": 1.8297313577427037, - "learning_rate": 3.268873536648622e-07, - "loss": 0.9718, - "step": 9103 - }, - { - "epoch": 0.8210307976732651, - "grad_norm": 1.519345858132422, - "learning_rate": 3.265673471841612e-07, - "loss": 0.9421, - "step": 9104 - }, - { - "epoch": 0.8211209811967354, - "grad_norm": 0.6451225804869499, - "learning_rate": 3.262474834899616e-07, - "loss": 0.7973, - "step": 9105 - }, - { - "epoch": 0.8212111647202056, - "grad_norm": 0.6559178329143673, - "learning_rate": 3.2592776260955534e-07, - "loss": 0.8797, - "step": 9106 - }, - { - "epoch": 0.8213013482436758, - "grad_norm": 2.2823546361020504, - "learning_rate": 3.256081845702239e-07, - "loss": 1.0148, - "step": 9107 - }, - { - "epoch": 0.8213915317671462, - "grad_norm": 1.5497746119137918, - "learning_rate": 3.2528874939923335e-07, - "loss": 1.0176, - "step": 9108 - }, - { - "epoch": 0.8214817152906164, - "grad_norm": 1.395688770646365, - "learning_rate": 3.2496945712384217e-07, - "loss": 0.9284, - "step": 9109 - }, - { - "epoch": 0.8215718988140867, - "grad_norm": 1.8884628795665035, - "learning_rate": 3.246503077712923e-07, - "loss": 0.9553, - "step": 9110 - }, - { - "epoch": 0.8216620823375569, - "grad_norm": 1.270583294936, - "learning_rate": 3.2433130136881625e-07, - "loss": 1.0615, - "step": 9111 - }, - { - "epoch": 0.8217522658610272, - "grad_norm": 2.12469199697291, - "learning_rate": 3.2401243794363287e-07, - "loss": 0.9673, - "step": 9112 - }, - { - "epoch": 0.8218424493844975, - "grad_norm": 1.512824391028926, - "learning_rate": 3.236937175229495e-07, - "loss": 1.0277, - "step": 9113 - }, - { - "epoch": 0.8219326329079677, - "grad_norm": 1.9414085219310462, - "learning_rate": 3.233751401339615e-07, - "loss": 0.9363, - "step": 9114 - }, - { - "epoch": 0.8220228164314379, - "grad_norm": 1.5404653594156443, - "learning_rate": 3.2305670580385157e-07, - "loss": 1.0484, - "step": 9115 - }, - { - "epoch": 0.8221129999549083, - "grad_norm": 1.345726599855345, - "learning_rate": 3.227384145597898e-07, - "loss": 0.9658, - "step": 9116 - }, - { - "epoch": 0.8222031834783785, - "grad_norm": 1.8449104019573987, - "learning_rate": 3.224202664289346e-07, - "loss": 0.9536, - "step": 9117 - }, - { - "epoch": 0.8222933670018487, - "grad_norm": 1.2568762475628308, - "learning_rate": 3.2210226143843257e-07, - "loss": 0.9122, - "step": 9118 - }, - { - "epoch": 0.822383550525319, - "grad_norm": 0.689677539644624, - "learning_rate": 3.217843996154173e-07, - "loss": 0.8734, - "step": 9119 - }, - { - "epoch": 0.8224737340487893, - "grad_norm": 0.6472980415178476, - "learning_rate": 3.2146668098701055e-07, - "loss": 0.7879, - "step": 9120 - }, - { - "epoch": 0.8225639175722596, - "grad_norm": 1.3756366902902781, - "learning_rate": 3.2114910558032215e-07, - "loss": 1.0954, - "step": 9121 - }, - { - "epoch": 0.8226541010957298, - "grad_norm": 1.807431071801961, - "learning_rate": 3.2083167342244945e-07, - "loss": 0.9938, - "step": 9122 - }, - { - "epoch": 0.8227442846192, - "grad_norm": 1.723809027376934, - "learning_rate": 3.205143845404763e-07, - "loss": 1.0657, - "step": 9123 - }, - { - "epoch": 0.8228344681426704, - "grad_norm": 1.7161092411784515, - "learning_rate": 3.201972389614773e-07, - "loss": 0.948, - "step": 9124 - }, - { - "epoch": 0.8229246516661406, - "grad_norm": 2.247794659980101, - "learning_rate": 3.198802367125115e-07, - "loss": 0.9734, - "step": 9125 - }, - { - "epoch": 0.8230148351896108, - "grad_norm": 1.4552386099365202, - "learning_rate": 3.195633778206288e-07, - "loss": 0.9798, - "step": 9126 - }, - { - "epoch": 0.8231050187130812, - "grad_norm": 1.7168316626034992, - "learning_rate": 3.19246662312864e-07, - "loss": 1.0453, - "step": 9127 - }, - { - "epoch": 0.8231952022365514, - "grad_norm": 0.6614996203856125, - "learning_rate": 3.189300902162417e-07, - "loss": 0.8388, - "step": 9128 - }, - { - "epoch": 0.8232853857600216, - "grad_norm": 1.5466005486436436, - "learning_rate": 3.1861366155777327e-07, - "loss": 0.9779, - "step": 9129 - }, - { - "epoch": 0.8233755692834919, - "grad_norm": 1.2317032691324352, - "learning_rate": 3.182973763644583e-07, - "loss": 0.9818, - "step": 9130 - }, - { - "epoch": 0.8234657528069622, - "grad_norm": 1.4097579858604117, - "learning_rate": 3.1798123466328463e-07, - "loss": 0.8988, - "step": 9131 - }, - { - "epoch": 0.8235559363304324, - "grad_norm": 1.502516780892854, - "learning_rate": 3.17665236481226e-07, - "loss": 0.9713, - "step": 9132 - }, - { - "epoch": 0.8236461198539027, - "grad_norm": 2.261560727727959, - "learning_rate": 3.1734938184524576e-07, - "loss": 0.9307, - "step": 9133 - }, - { - "epoch": 0.8237363033773729, - "grad_norm": 1.5870313959026805, - "learning_rate": 3.1703367078229427e-07, - "loss": 0.9827, - "step": 9134 - }, - { - "epoch": 0.8238264869008433, - "grad_norm": 1.74984481668427, - "learning_rate": 3.167181033193096e-07, - "loss": 0.9304, - "step": 9135 - }, - { - "epoch": 0.8239166704243135, - "grad_norm": 1.5379090610103525, - "learning_rate": 3.16402679483218e-07, - "loss": 0.9838, - "step": 9136 - }, - { - "epoch": 0.8240068539477837, - "grad_norm": 1.2013235104999331, - "learning_rate": 3.1608739930093366e-07, - "loss": 0.9063, - "step": 9137 - }, - { - "epoch": 0.824097037471254, - "grad_norm": 1.4964917004753735, - "learning_rate": 3.157722627993562e-07, - "loss": 0.8849, - "step": 9138 - }, - { - "epoch": 0.8241872209947243, - "grad_norm": 1.9504481778705929, - "learning_rate": 3.1545727000537727e-07, - "loss": 0.9341, - "step": 9139 - }, - { - "epoch": 0.8242774045181945, - "grad_norm": 1.4655441847523902, - "learning_rate": 3.151424209458713e-07, - "loss": 0.9726, - "step": 9140 - }, - { - "epoch": 0.8243675880416648, - "grad_norm": 1.5403659161658392, - "learning_rate": 3.148277156477053e-07, - "loss": 0.9928, - "step": 9141 - }, - { - "epoch": 0.824457771565135, - "grad_norm": 1.6584108759452132, - "learning_rate": 3.145131541377299e-07, - "loss": 0.9769, - "step": 9142 - }, - { - "epoch": 0.8245479550886053, - "grad_norm": 1.6100929658716572, - "learning_rate": 3.1419873644278606e-07, - "loss": 0.9332, - "step": 9143 - }, - { - "epoch": 0.8246381386120756, - "grad_norm": 1.4798308837769532, - "learning_rate": 3.1388446258970147e-07, - "loss": 0.9853, - "step": 9144 - }, - { - "epoch": 0.8247283221355458, - "grad_norm": 1.5787042744373339, - "learning_rate": 3.1357033260529145e-07, - "loss": 0.958, - "step": 9145 - }, - { - "epoch": 0.824818505659016, - "grad_norm": 1.2986523961096463, - "learning_rate": 3.1325634651636025e-07, - "loss": 1.0487, - "step": 9146 - }, - { - "epoch": 0.8249086891824864, - "grad_norm": 1.2792739982597927, - "learning_rate": 3.1294250434969694e-07, - "loss": 1.0076, - "step": 9147 - }, - { - "epoch": 0.8249988727059566, - "grad_norm": 1.7632281483440517, - "learning_rate": 3.1262880613208274e-07, - "loss": 1.0103, - "step": 9148 - }, - { - "epoch": 0.8250890562294269, - "grad_norm": 1.3168200286848453, - "learning_rate": 3.123152518902823e-07, - "loss": 1.0056, - "step": 9149 - }, - { - "epoch": 0.8251792397528972, - "grad_norm": 1.2674248225021396, - "learning_rate": 3.1200184165105017e-07, - "loss": 0.931, - "step": 9150 - }, - { - "epoch": 0.8252694232763674, - "grad_norm": 3.7581954411982306, - "learning_rate": 3.116885754411287e-07, - "loss": 0.977, - "step": 9151 - }, - { - "epoch": 0.8253596067998377, - "grad_norm": 1.4945148473099743, - "learning_rate": 3.1137545328724703e-07, - "loss": 0.9027, - "step": 9152 - }, - { - "epoch": 0.8254497903233079, - "grad_norm": 2.4092456946204384, - "learning_rate": 3.1106247521612285e-07, - "loss": 0.9707, - "step": 9153 - }, - { - "epoch": 0.8255399738467782, - "grad_norm": 1.555671802029751, - "learning_rate": 3.107496412544612e-07, - "loss": 0.9818, - "step": 9154 - }, - { - "epoch": 0.8256301573702485, - "grad_norm": 1.3034439099194688, - "learning_rate": 3.1043695142895397e-07, - "loss": 0.995, - "step": 9155 - }, - { - "epoch": 0.8257203408937187, - "grad_norm": 1.4977528110631224, - "learning_rate": 3.101244057662828e-07, - "loss": 0.8895, - "step": 9156 - }, - { - "epoch": 0.8258105244171889, - "grad_norm": 1.65553995423074, - "learning_rate": 3.098120042931152e-07, - "loss": 0.9585, - "step": 9157 - }, - { - "epoch": 0.8259007079406593, - "grad_norm": 1.6319296608473943, - "learning_rate": 3.0949974703610647e-07, - "loss": 1.0163, - "step": 9158 - }, - { - "epoch": 0.8259908914641295, - "grad_norm": 2.342594421340213, - "learning_rate": 3.0918763402190107e-07, - "loss": 0.9607, - "step": 9159 - }, - { - "epoch": 0.8260810749875998, - "grad_norm": 1.4974429269655132, - "learning_rate": 3.088756652771296e-07, - "loss": 0.9318, - "step": 9160 - }, - { - "epoch": 0.82617125851107, - "grad_norm": 1.9268895300936928, - "learning_rate": 3.0856384082841147e-07, - "loss": 1.0675, - "step": 9161 - }, - { - "epoch": 0.8262614420345403, - "grad_norm": 1.4447710827399636, - "learning_rate": 3.0825216070235207e-07, - "loss": 0.9211, - "step": 9162 - }, - { - "epoch": 0.8263516255580106, - "grad_norm": 1.7860439105301407, - "learning_rate": 3.0794062492554764e-07, - "loss": 1.0837, - "step": 9163 - }, - { - "epoch": 0.8264418090814808, - "grad_norm": 3.372312252624187, - "learning_rate": 3.076292335245783e-07, - "loss": 0.9859, - "step": 9164 - }, - { - "epoch": 0.826531992604951, - "grad_norm": 1.5982033834976053, - "learning_rate": 3.073179865260145e-07, - "loss": 1.0381, - "step": 9165 - }, - { - "epoch": 0.8266221761284214, - "grad_norm": 1.3940653730686514, - "learning_rate": 3.070068839564135e-07, - "loss": 0.979, - "step": 9166 - }, - { - "epoch": 0.8267123596518916, - "grad_norm": 1.6868065716212683, - "learning_rate": 3.0669592584232006e-07, - "loss": 1.0007, - "step": 9167 - }, - { - "epoch": 0.8268025431753618, - "grad_norm": 2.70367602426575, - "learning_rate": 3.063851122102672e-07, - "loss": 0.8857, - "step": 9168 - }, - { - "epoch": 0.8268927266988321, - "grad_norm": 5.670695200556735, - "learning_rate": 3.06074443086775e-07, - "loss": 0.9009, - "step": 9169 - }, - { - "epoch": 0.8269829102223024, - "grad_norm": 0.6217778355204221, - "learning_rate": 3.057639184983514e-07, - "loss": 0.8019, - "step": 9170 - }, - { - "epoch": 0.8270730937457726, - "grad_norm": 1.2293369961843998, - "learning_rate": 3.054535384714927e-07, - "loss": 0.9697, - "step": 9171 - }, - { - "epoch": 0.8271632772692429, - "grad_norm": 1.5232888173639156, - "learning_rate": 3.0514330303268135e-07, - "loss": 1.1317, - "step": 9172 - }, - { - "epoch": 0.8272534607927132, - "grad_norm": 1.5196410779316623, - "learning_rate": 3.0483321220838876e-07, - "loss": 0.9229, - "step": 9173 - }, - { - "epoch": 0.8273436443161835, - "grad_norm": 1.2471110815973188, - "learning_rate": 3.045232660250734e-07, - "loss": 0.8273, - "step": 9174 - }, - { - "epoch": 0.8274338278396537, - "grad_norm": 3.123058631593334, - "learning_rate": 3.0421346450918185e-07, - "loss": 0.9982, - "step": 9175 - }, - { - "epoch": 0.8275240113631239, - "grad_norm": 1.3419581598135333, - "learning_rate": 3.039038076871485e-07, - "loss": 0.9198, - "step": 9176 - }, - { - "epoch": 0.8276141948865943, - "grad_norm": 1.3136041175140034, - "learning_rate": 3.035942955853934e-07, - "loss": 0.9906, - "step": 9177 - }, - { - "epoch": 0.8277043784100645, - "grad_norm": 1.6932921434759416, - "learning_rate": 3.0328492823032804e-07, - "loss": 0.9445, - "step": 9178 - }, - { - "epoch": 0.8277945619335347, - "grad_norm": 1.6895387131725381, - "learning_rate": 3.029757056483471e-07, - "loss": 0.9095, - "step": 9179 - }, - { - "epoch": 0.827884745457005, - "grad_norm": 1.334976564551686, - "learning_rate": 3.026666278658372e-07, - "loss": 0.8594, - "step": 9180 - }, - { - "epoch": 0.8279749289804753, - "grad_norm": 1.1890212404007787, - "learning_rate": 3.023576949091691e-07, - "loss": 0.9735, - "step": 9181 - }, - { - "epoch": 0.8280651125039455, - "grad_norm": 1.6171605941819378, - "learning_rate": 3.020489068047032e-07, - "loss": 0.8929, - "step": 9182 - }, - { - "epoch": 0.8281552960274158, - "grad_norm": 1.5184909426934334, - "learning_rate": 3.017402635787869e-07, - "loss": 0.9401, - "step": 9183 - }, - { - "epoch": 0.828245479550886, - "grad_norm": 1.3853476179694126, - "learning_rate": 3.0143176525775537e-07, - "loss": 0.9414, - "step": 9184 - }, - { - "epoch": 0.8283356630743564, - "grad_norm": 1.4427124555875457, - "learning_rate": 3.0112341186793155e-07, - "loss": 0.9121, - "step": 9185 - }, - { - "epoch": 0.8284258465978266, - "grad_norm": 1.4784433808307527, - "learning_rate": 3.008152034356264e-07, - "loss": 0.9109, - "step": 9186 - }, - { - "epoch": 0.8285160301212968, - "grad_norm": 0.6339305608054249, - "learning_rate": 3.005071399871366e-07, - "loss": 0.8171, - "step": 9187 - }, - { - "epoch": 0.828606213644767, - "grad_norm": 1.4098621600662449, - "learning_rate": 3.0019922154874853e-07, - "loss": 1.033, - "step": 9188 - }, - { - "epoch": 0.8286963971682374, - "grad_norm": 1.7224943815895435, - "learning_rate": 2.998914481467356e-07, - "loss": 0.927, - "step": 9189 - }, - { - "epoch": 0.8287865806917076, - "grad_norm": 1.688778287141002, - "learning_rate": 2.9958381980735837e-07, - "loss": 0.9165, - "step": 9190 - }, - { - "epoch": 0.8288767642151779, - "grad_norm": 1.5762864535636771, - "learning_rate": 2.992763365568658e-07, - "loss": 0.9001, - "step": 9191 - }, - { - "epoch": 0.8289669477386481, - "grad_norm": 1.6512180398624636, - "learning_rate": 2.98968998421494e-07, - "loss": 0.9702, - "step": 9192 - }, - { - "epoch": 0.8290571312621184, - "grad_norm": 1.5156619667185616, - "learning_rate": 2.98661805427467e-07, - "loss": 0.9614, - "step": 9193 - }, - { - "epoch": 0.8291473147855887, - "grad_norm": 1.455250405987013, - "learning_rate": 2.9835475760099483e-07, - "loss": 0.9869, - "step": 9194 - }, - { - "epoch": 0.8292374983090589, - "grad_norm": 1.2761008839263617, - "learning_rate": 2.9804785496827856e-07, - "loss": 0.9839, - "step": 9195 - }, - { - "epoch": 0.8293276818325293, - "grad_norm": 1.717122573249892, - "learning_rate": 2.977410975555028e-07, - "loss": 0.9638, - "step": 9196 - }, - { - "epoch": 0.8294178653559995, - "grad_norm": 1.6039422939317933, - "learning_rate": 2.9743448538884376e-07, - "loss": 1.0239, - "step": 9197 - }, - { - "epoch": 0.8295080488794697, - "grad_norm": 1.5899383741708006, - "learning_rate": 2.9712801849446154e-07, - "loss": 0.9573, - "step": 9198 - }, - { - "epoch": 0.82959823240294, - "grad_norm": 1.4219057687637147, - "learning_rate": 2.9682169689850665e-07, - "loss": 0.9144, - "step": 9199 - }, - { - "epoch": 0.8296884159264103, - "grad_norm": 0.6842002793680874, - "learning_rate": 2.9651552062711573e-07, - "loss": 0.8052, - "step": 9200 - }, - { - "epoch": 0.8297785994498805, - "grad_norm": 1.2938080797657552, - "learning_rate": 2.9620948970641333e-07, - "loss": 1.0425, - "step": 9201 - }, - { - "epoch": 0.8298687829733508, - "grad_norm": 1.7262743541151444, - "learning_rate": 2.959036041625125e-07, - "loss": 0.9853, - "step": 9202 - }, - { - "epoch": 0.829958966496821, - "grad_norm": 1.8667114800228364, - "learning_rate": 2.95597864021512e-07, - "loss": 0.8795, - "step": 9203 - }, - { - "epoch": 0.8300491500202913, - "grad_norm": 1.984616062210254, - "learning_rate": 2.9529226930949966e-07, - "loss": 1.0091, - "step": 9204 - }, - { - "epoch": 0.8301393335437616, - "grad_norm": 1.561828697956227, - "learning_rate": 2.949868200525505e-07, - "loss": 0.9752, - "step": 9205 - }, - { - "epoch": 0.8302295170672318, - "grad_norm": 1.6477662658860897, - "learning_rate": 2.9468151627672734e-07, - "loss": 0.9679, - "step": 9206 - }, - { - "epoch": 0.830319700590702, - "grad_norm": 1.5866015317247086, - "learning_rate": 2.9437635800808026e-07, - "loss": 0.9271, - "step": 9207 - }, - { - "epoch": 0.8304098841141724, - "grad_norm": 1.6038669289638634, - "learning_rate": 2.940713452726473e-07, - "loss": 0.9535, - "step": 9208 - }, - { - "epoch": 0.8305000676376426, - "grad_norm": 1.355423104784631, - "learning_rate": 2.937664780964526e-07, - "loss": 0.9569, - "step": 9209 - }, - { - "epoch": 0.8305902511611128, - "grad_norm": 1.8096763056898109, - "learning_rate": 2.9346175650551133e-07, - "loss": 0.9809, - "step": 9210 - }, - { - "epoch": 0.8306804346845831, - "grad_norm": 4.353155697113926, - "learning_rate": 2.931571805258215e-07, - "loss": 0.9886, - "step": 9211 - }, - { - "epoch": 0.8307706182080534, - "grad_norm": 1.576366162054593, - "learning_rate": 2.9285275018337353e-07, - "loss": 1.0192, - "step": 9212 - }, - { - "epoch": 0.8308608017315237, - "grad_norm": 1.6428952449621337, - "learning_rate": 2.9254846550414146e-07, - "loss": 1.068, - "step": 9213 - }, - { - "epoch": 0.8309509852549939, - "grad_norm": 1.6117232884293682, - "learning_rate": 2.922443265140893e-07, - "loss": 1.0154, - "step": 9214 - }, - { - "epoch": 0.8310411687784641, - "grad_norm": 1.7207613879936376, - "learning_rate": 2.919403332391674e-07, - "loss": 0.9652, - "step": 9215 - }, - { - "epoch": 0.8311313523019345, - "grad_norm": 1.5331071762871487, - "learning_rate": 2.9163648570531464e-07, - "loss": 0.9894, - "step": 9216 - }, - { - "epoch": 0.8312215358254047, - "grad_norm": 1.6774078513246768, - "learning_rate": 2.9133278393845717e-07, - "loss": 0.9187, - "step": 9217 - }, - { - "epoch": 0.8313117193488749, - "grad_norm": 1.3217377503478442, - "learning_rate": 2.9102922796450703e-07, - "loss": 1.038, - "step": 9218 - }, - { - "epoch": 0.8314019028723453, - "grad_norm": 2.0973775383683666, - "learning_rate": 2.907258178093672e-07, - "loss": 0.927, - "step": 9219 - }, - { - "epoch": 0.8314920863958155, - "grad_norm": 1.5578926823373218, - "learning_rate": 2.904225534989251e-07, - "loss": 0.8832, - "step": 9220 - }, - { - "epoch": 0.8315822699192857, - "grad_norm": 4.095516144173115, - "learning_rate": 2.901194350590572e-07, - "loss": 0.8756, - "step": 9221 - }, - { - "epoch": 0.831672453442756, - "grad_norm": 1.4624873625380481, - "learning_rate": 2.898164625156274e-07, - "loss": 0.853, - "step": 9222 - }, - { - "epoch": 0.8317626369662263, - "grad_norm": 1.4208359327693365, - "learning_rate": 2.8951363589448676e-07, - "loss": 0.9262, - "step": 9223 - }, - { - "epoch": 0.8318528204896966, - "grad_norm": 1.6062292817557082, - "learning_rate": 2.8921095522147434e-07, - "loss": 0.9589, - "step": 9224 - }, - { - "epoch": 0.8319430040131668, - "grad_norm": 1.5122342278138425, - "learning_rate": 2.8890842052241683e-07, - "loss": 0.9997, - "step": 9225 - }, - { - "epoch": 0.832033187536637, - "grad_norm": 0.6260337862945868, - "learning_rate": 2.886060318231267e-07, - "loss": 0.7761, - "step": 9226 - }, - { - "epoch": 0.8321233710601074, - "grad_norm": 1.574917545885076, - "learning_rate": 2.8830378914940755e-07, - "loss": 0.9907, - "step": 9227 - }, - { - "epoch": 0.8322135545835776, - "grad_norm": 1.7665979860858976, - "learning_rate": 2.8800169252704675e-07, - "loss": 0.9683, - "step": 9228 - }, - { - "epoch": 0.8323037381070478, - "grad_norm": 1.3814362396067421, - "learning_rate": 2.8769974198182143e-07, - "loss": 0.9909, - "step": 9229 - }, - { - "epoch": 0.8323939216305181, - "grad_norm": 1.3228007688842514, - "learning_rate": 2.873979375394955e-07, - "loss": 0.9692, - "step": 9230 - }, - { - "epoch": 0.8324841051539884, - "grad_norm": 1.3925482063236159, - "learning_rate": 2.870962792258209e-07, - "loss": 1.0009, - "step": 9231 - }, - { - "epoch": 0.8325742886774586, - "grad_norm": 1.2726982638997872, - "learning_rate": 2.8679476706653716e-07, - "loss": 0.9832, - "step": 9232 - }, - { - "epoch": 0.8326644722009289, - "grad_norm": 1.626229994443366, - "learning_rate": 2.864934010873692e-07, - "loss": 0.9893, - "step": 9233 - }, - { - "epoch": 0.8327546557243991, - "grad_norm": 1.3073975974804466, - "learning_rate": 2.8619218131403357e-07, - "loss": 0.9438, - "step": 9234 - }, - { - "epoch": 0.8328448392478695, - "grad_norm": 1.5039273347243554, - "learning_rate": 2.858911077722299e-07, - "loss": 0.9959, - "step": 9235 - }, - { - "epoch": 0.8329350227713397, - "grad_norm": 1.685839860720906, - "learning_rate": 2.855901804876493e-07, - "loss": 0.8246, - "step": 9236 - }, - { - "epoch": 0.8330252062948099, - "grad_norm": 1.2635207402738964, - "learning_rate": 2.852893994859673e-07, - "loss": 1.007, - "step": 9237 - }, - { - "epoch": 0.8331153898182801, - "grad_norm": 1.2367018485827144, - "learning_rate": 2.849887647928484e-07, - "loss": 0.9171, - "step": 9238 - }, - { - "epoch": 0.8332055733417505, - "grad_norm": 1.9711344372650839, - "learning_rate": 2.8468827643394465e-07, - "loss": 0.8689, - "step": 9239 - }, - { - "epoch": 0.8332957568652207, - "grad_norm": 1.5032047369540757, - "learning_rate": 2.843879344348954e-07, - "loss": 0.9862, - "step": 9240 - }, - { - "epoch": 0.833385940388691, - "grad_norm": 1.9811314183317192, - "learning_rate": 2.840877388213272e-07, - "loss": 0.9423, - "step": 9241 - }, - { - "epoch": 0.8334761239121612, - "grad_norm": 1.6767691165851852, - "learning_rate": 2.8378768961885515e-07, - "loss": 0.9752, - "step": 9242 - }, - { - "epoch": 0.8335663074356315, - "grad_norm": 2.9505203305429477, - "learning_rate": 2.8348778685307983e-07, - "loss": 1.0008, - "step": 9243 - }, - { - "epoch": 0.8336564909591018, - "grad_norm": 1.498832486692857, - "learning_rate": 2.831880305495915e-07, - "loss": 1.0426, - "step": 9244 - }, - { - "epoch": 0.833746674482572, - "grad_norm": 0.6409003201120654, - "learning_rate": 2.828884207339668e-07, - "loss": 0.8595, - "step": 9245 - }, - { - "epoch": 0.8338368580060423, - "grad_norm": 1.9476034623209886, - "learning_rate": 2.8258895743177014e-07, - "loss": 1.0486, - "step": 9246 - }, - { - "epoch": 0.8339270415295126, - "grad_norm": 1.4324970259976764, - "learning_rate": 2.8228964066855356e-07, - "loss": 1.0409, - "step": 9247 - }, - { - "epoch": 0.8340172250529828, - "grad_norm": 1.2936246193839076, - "learning_rate": 2.819904704698555e-07, - "loss": 0.9484, - "step": 9248 - }, - { - "epoch": 0.834107408576453, - "grad_norm": 0.6586414541613446, - "learning_rate": 2.8169144686120437e-07, - "loss": 0.7919, - "step": 9249 - }, - { - "epoch": 0.8341975920999234, - "grad_norm": 1.7703023076921403, - "learning_rate": 2.8139256986811254e-07, - "loss": 0.8856, - "step": 9250 - }, - { - "epoch": 0.8342877756233936, - "grad_norm": 1.4937511460048478, - "learning_rate": 2.8109383951608424e-07, - "loss": 1.0237, - "step": 9251 - }, - { - "epoch": 0.8343779591468639, - "grad_norm": 1.966883910298052, - "learning_rate": 2.8079525583060683e-07, - "loss": 1.041, - "step": 9252 - }, - { - "epoch": 0.8344681426703341, - "grad_norm": 1.3451464294649376, - "learning_rate": 2.804968188371577e-07, - "loss": 0.975, - "step": 9253 - }, - { - "epoch": 0.8345583261938044, - "grad_norm": 1.3278689418116958, - "learning_rate": 2.801985285612014e-07, - "loss": 0.9663, - "step": 9254 - }, - { - "epoch": 0.8346485097172747, - "grad_norm": 1.475726097615169, - "learning_rate": 2.7990038502818934e-07, - "loss": 0.9446, - "step": 9255 - }, - { - "epoch": 0.8347386932407449, - "grad_norm": 1.3399973967512664, - "learning_rate": 2.796023882635612e-07, - "loss": 0.9467, - "step": 9256 - }, - { - "epoch": 0.8348288767642151, - "grad_norm": 1.3562904527669748, - "learning_rate": 2.7930453829274323e-07, - "loss": 1.0147, - "step": 9257 - }, - { - "epoch": 0.8349190602876855, - "grad_norm": 1.8086030436662202, - "learning_rate": 2.7900683514115054e-07, - "loss": 1.0132, - "step": 9258 - }, - { - "epoch": 0.8350092438111557, - "grad_norm": 1.6480319713019003, - "learning_rate": 2.787092788341836e-07, - "loss": 0.9078, - "step": 9259 - }, - { - "epoch": 0.8350994273346259, - "grad_norm": 1.4353933239253647, - "learning_rate": 2.7841186939723195e-07, - "loss": 0.9617, - "step": 9260 - }, - { - "epoch": 0.8351896108580962, - "grad_norm": 1.585579892886342, - "learning_rate": 2.7811460685567255e-07, - "loss": 0.9409, - "step": 9261 - }, - { - "epoch": 0.8352797943815665, - "grad_norm": 2.069233854475331, - "learning_rate": 2.778174912348692e-07, - "loss": 0.961, - "step": 9262 - }, - { - "epoch": 0.8353699779050368, - "grad_norm": 1.3124816529944414, - "learning_rate": 2.7752052256017354e-07, - "loss": 0.9346, - "step": 9263 - }, - { - "epoch": 0.835460161428507, - "grad_norm": 1.5816638661243552, - "learning_rate": 2.7722370085692493e-07, - "loss": 0.9444, - "step": 9264 - }, - { - "epoch": 0.8355503449519772, - "grad_norm": 1.9511549512646618, - "learning_rate": 2.769270261504486e-07, - "loss": 0.946, - "step": 9265 - }, - { - "epoch": 0.8356405284754476, - "grad_norm": 1.605497633773975, - "learning_rate": 2.7663049846606015e-07, - "loss": 0.9322, - "step": 9266 - }, - { - "epoch": 0.8357307119989178, - "grad_norm": 1.3624141088588444, - "learning_rate": 2.763341178290592e-07, - "loss": 0.9835, - "step": 9267 - }, - { - "epoch": 0.835820895522388, - "grad_norm": 1.242304814395083, - "learning_rate": 2.7603788426473663e-07, - "loss": 0.9922, - "step": 9268 - }, - { - "epoch": 0.8359110790458584, - "grad_norm": 2.359554561830501, - "learning_rate": 2.7574179779836695e-07, - "loss": 0.9079, - "step": 9269 - }, - { - "epoch": 0.8360012625693286, - "grad_norm": 1.4031698614820334, - "learning_rate": 2.754458584552146e-07, - "loss": 0.9276, - "step": 9270 - }, - { - "epoch": 0.8360914460927988, - "grad_norm": 1.8659226989925715, - "learning_rate": 2.751500662605308e-07, - "loss": 0.9531, - "step": 9271 - }, - { - "epoch": 0.8361816296162691, - "grad_norm": 1.3488235504488204, - "learning_rate": 2.7485442123955383e-07, - "loss": 0.9278, - "step": 9272 - }, - { - "epoch": 0.8362718131397394, - "grad_norm": 1.5486767227042326, - "learning_rate": 2.7455892341751075e-07, - "loss": 0.8521, - "step": 9273 - }, - { - "epoch": 0.8363619966632097, - "grad_norm": 1.6214813136818773, - "learning_rate": 2.7426357281961365e-07, - "loss": 0.8454, - "step": 9274 - }, - { - "epoch": 0.8364521801866799, - "grad_norm": 4.363133058276875, - "learning_rate": 2.7396836947106416e-07, - "loss": 0.9706, - "step": 9275 - }, - { - "epoch": 0.8365423637101501, - "grad_norm": 2.1132748192990767, - "learning_rate": 2.736733133970506e-07, - "loss": 1.0722, - "step": 9276 - }, - { - "epoch": 0.8366325472336205, - "grad_norm": 1.1751241780669663, - "learning_rate": 2.7337840462274896e-07, - "loss": 1.0063, - "step": 9277 - }, - { - "epoch": 0.8367227307570907, - "grad_norm": 1.4240867264783632, - "learning_rate": 2.730836431733221e-07, - "loss": 0.939, - "step": 9278 - }, - { - "epoch": 0.8368129142805609, - "grad_norm": 1.6109489225913154, - "learning_rate": 2.727890290739212e-07, - "loss": 1.0251, - "step": 9279 - }, - { - "epoch": 0.8369030978040312, - "grad_norm": 1.385357513694614, - "learning_rate": 2.7249456234968395e-07, - "loss": 1.0071, - "step": 9280 - }, - { - "epoch": 0.8369932813275015, - "grad_norm": 1.99378586022346, - "learning_rate": 2.722002430257364e-07, - "loss": 1.034, - "step": 9281 - }, - { - "epoch": 0.8370834648509717, - "grad_norm": 1.606255554308361, - "learning_rate": 2.7190607112719035e-07, - "loss": 0.9284, - "step": 9282 - }, - { - "epoch": 0.837173648374442, - "grad_norm": 2.263763188914021, - "learning_rate": 2.716120466791476e-07, - "loss": 0.8962, - "step": 9283 - }, - { - "epoch": 0.8372638318979122, - "grad_norm": 1.8743051512990507, - "learning_rate": 2.7131816970669483e-07, - "loss": 0.942, - "step": 9284 - }, - { - "epoch": 0.8373540154213825, - "grad_norm": 1.7807973319975343, - "learning_rate": 2.7102444023490777e-07, - "loss": 0.9759, - "step": 9285 - }, - { - "epoch": 0.8374441989448528, - "grad_norm": 1.2408172919622256, - "learning_rate": 2.70730858288849e-07, - "loss": 1.0237, - "step": 9286 - }, - { - "epoch": 0.837534382468323, - "grad_norm": 1.465506279209914, - "learning_rate": 2.704374238935685e-07, - "loss": 0.9039, - "step": 9287 - }, - { - "epoch": 0.8376245659917932, - "grad_norm": 0.6828781960524473, - "learning_rate": 2.70144137074104e-07, - "loss": 0.8342, - "step": 9288 - }, - { - "epoch": 0.8377147495152636, - "grad_norm": 1.3636413798095457, - "learning_rate": 2.6985099785547926e-07, - "loss": 0.9825, - "step": 9289 - }, - { - "epoch": 0.8378049330387338, - "grad_norm": 1.3011579454971056, - "learning_rate": 2.695580062627083e-07, - "loss": 0.9911, - "step": 9290 - }, - { - "epoch": 0.8378951165622041, - "grad_norm": 2.163863630831595, - "learning_rate": 2.692651623207891e-07, - "loss": 0.9691, - "step": 9291 - }, - { - "epoch": 0.8379853000856744, - "grad_norm": 1.81048597050256, - "learning_rate": 2.689724660547097e-07, - "loss": 0.954, - "step": 9292 - }, - { - "epoch": 0.8380754836091446, - "grad_norm": 0.6167535101037249, - "learning_rate": 2.686799174894441e-07, - "loss": 0.8319, - "step": 9293 - }, - { - "epoch": 0.8381656671326149, - "grad_norm": 0.6753773005656574, - "learning_rate": 2.683875166499545e-07, - "loss": 0.8534, - "step": 9294 - }, - { - "epoch": 0.8382558506560851, - "grad_norm": 1.475086180545398, - "learning_rate": 2.680952635611899e-07, - "loss": 0.981, - "step": 9295 - }, - { - "epoch": 0.8383460341795554, - "grad_norm": 1.4773842275996054, - "learning_rate": 2.678031582480875e-07, - "loss": 0.8893, - "step": 9296 - }, - { - "epoch": 0.8384362177030257, - "grad_norm": 1.3051806677067852, - "learning_rate": 2.6751120073557e-07, - "loss": 1.0058, - "step": 9297 - }, - { - "epoch": 0.8385264012264959, - "grad_norm": 1.461116351435313, - "learning_rate": 2.672193910485505e-07, - "loss": 0.937, - "step": 9298 - }, - { - "epoch": 0.8386165847499661, - "grad_norm": 1.3688745607457777, - "learning_rate": 2.669277292119265e-07, - "loss": 0.9663, - "step": 9299 - }, - { - "epoch": 0.8387067682734365, - "grad_norm": 1.480070196368601, - "learning_rate": 2.666362152505848e-07, - "loss": 0.9759, - "step": 9300 - }, - { - "epoch": 0.8387969517969067, - "grad_norm": 1.6768476743174288, - "learning_rate": 2.663448491893989e-07, - "loss": 0.9033, - "step": 9301 - }, - { - "epoch": 0.838887135320377, - "grad_norm": 1.5897338229329776, - "learning_rate": 2.6605363105322974e-07, - "loss": 0.9496, - "step": 9302 - }, - { - "epoch": 0.8389773188438472, - "grad_norm": 1.4487387795462245, - "learning_rate": 2.657625608669263e-07, - "loss": 0.968, - "step": 9303 - }, - { - "epoch": 0.8390675023673175, - "grad_norm": 1.2843683952662923, - "learning_rate": 2.654716386553224e-07, - "loss": 0.8734, - "step": 9304 - }, - { - "epoch": 0.8391576858907878, - "grad_norm": 1.5924946946275809, - "learning_rate": 2.651808644432436e-07, - "loss": 0.9696, - "step": 9305 - }, - { - "epoch": 0.839247869414258, - "grad_norm": 1.5930143416214728, - "learning_rate": 2.6489023825549807e-07, - "loss": 1.0949, - "step": 9306 - }, - { - "epoch": 0.8393380529377282, - "grad_norm": 2.610168701799841, - "learning_rate": 2.6459976011688547e-07, - "loss": 0.8945, - "step": 9307 - }, - { - "epoch": 0.8394282364611986, - "grad_norm": 1.6555349857892956, - "learning_rate": 2.6430943005219e-07, - "loss": 0.964, - "step": 9308 - }, - { - "epoch": 0.8395184199846688, - "grad_norm": 1.1744538478561082, - "learning_rate": 2.6401924808618447e-07, - "loss": 1.002, - "step": 9309 - }, - { - "epoch": 0.839608603508139, - "grad_norm": 1.4295074683064688, - "learning_rate": 2.637292142436287e-07, - "loss": 0.9616, - "step": 9310 - }, - { - "epoch": 0.8396987870316093, - "grad_norm": 3.570738785752077, - "learning_rate": 2.6343932854927e-07, - "loss": 0.9414, - "step": 9311 - }, - { - "epoch": 0.8397889705550796, - "grad_norm": 0.7571303059714949, - "learning_rate": 2.6314959102784316e-07, - "loss": 0.8481, - "step": 9312 - }, - { - "epoch": 0.8398791540785498, - "grad_norm": 1.6931872428867658, - "learning_rate": 2.6286000170407074e-07, - "loss": 0.9639, - "step": 9313 - }, - { - "epoch": 0.8399693376020201, - "grad_norm": 2.880238942724631, - "learning_rate": 2.625705606026607e-07, - "loss": 0.9466, - "step": 9314 - }, - { - "epoch": 0.8400595211254904, - "grad_norm": 1.2896721525321253, - "learning_rate": 2.622812677483106e-07, - "loss": 1.0403, - "step": 9315 - }, - { - "epoch": 0.8401497046489607, - "grad_norm": 1.8639911442902382, - "learning_rate": 2.6199212316570453e-07, - "loss": 1.0131, - "step": 9316 - }, - { - "epoch": 0.8402398881724309, - "grad_norm": 1.4454742341148348, - "learning_rate": 2.617031268795138e-07, - "loss": 0.947, - "step": 9317 - }, - { - "epoch": 0.8403300716959011, - "grad_norm": 1.2346482276318902, - "learning_rate": 2.614142789143976e-07, - "loss": 0.9973, - "step": 9318 - }, - { - "epoch": 0.8404202552193715, - "grad_norm": 1.345549110933896, - "learning_rate": 2.6112557929500047e-07, - "loss": 0.9543, - "step": 9319 - }, - { - "epoch": 0.8405104387428417, - "grad_norm": 1.4501907913702672, - "learning_rate": 2.6083702804595817e-07, - "loss": 0.9914, - "step": 9320 - }, - { - "epoch": 0.8406006222663119, - "grad_norm": 1.375225915556109, - "learning_rate": 2.6054862519188915e-07, - "loss": 0.957, - "step": 9321 - }, - { - "epoch": 0.8406908057897822, - "grad_norm": 1.4576033627221086, - "learning_rate": 2.6026037075740357e-07, - "loss": 0.9008, - "step": 9322 - }, - { - "epoch": 0.8407809893132525, - "grad_norm": 1.8359495121989013, - "learning_rate": 2.5997226476709524e-07, - "loss": 0.9377, - "step": 9323 - }, - { - "epoch": 0.8408711728367227, - "grad_norm": 1.5684260333991726, - "learning_rate": 2.5968430724554856e-07, - "loss": 0.9711, - "step": 9324 - }, - { - "epoch": 0.840961356360193, - "grad_norm": 1.6244108892852183, - "learning_rate": 2.5939649821733225e-07, - "loss": 0.9055, - "step": 9325 - }, - { - "epoch": 0.8410515398836632, - "grad_norm": 1.6691446559151173, - "learning_rate": 2.5910883770700433e-07, - "loss": 0.9374, - "step": 9326 - }, - { - "epoch": 0.8411417234071336, - "grad_norm": 3.2238351596028454, - "learning_rate": 2.5882132573910965e-07, - "loss": 0.8089, - "step": 9327 - }, - { - "epoch": 0.8412319069306038, - "grad_norm": 1.1594396876465183, - "learning_rate": 2.585339623381801e-07, - "loss": 1.0034, - "step": 9328 - }, - { - "epoch": 0.841322090454074, - "grad_norm": 1.3587154235230592, - "learning_rate": 2.582467475287358e-07, - "loss": 0.9423, - "step": 9329 - }, - { - "epoch": 0.8414122739775443, - "grad_norm": 1.553142450798749, - "learning_rate": 2.5795968133528224e-07, - "loss": 0.9767, - "step": 9330 - }, - { - "epoch": 0.8415024575010146, - "grad_norm": 1.5655809925465443, - "learning_rate": 2.576727637823144e-07, - "loss": 0.9683, - "step": 9331 - }, - { - "epoch": 0.8415926410244848, - "grad_norm": 1.6434805587579613, - "learning_rate": 2.5738599489431335e-07, - "loss": 1.0116, - "step": 9332 - }, - { - "epoch": 0.8416828245479551, - "grad_norm": 1.189268351517727, - "learning_rate": 2.5709937469574794e-07, - "loss": 1.0287, - "step": 9333 - }, - { - "epoch": 0.8417730080714253, - "grad_norm": 1.2865926026405454, - "learning_rate": 2.568129032110742e-07, - "loss": 1.008, - "step": 9334 - }, - { - "epoch": 0.8418631915948956, - "grad_norm": 1.5267566225599416, - "learning_rate": 2.5652658046473565e-07, - "loss": 1.0413, - "step": 9335 - }, - { - "epoch": 0.8419533751183659, - "grad_norm": 1.8975572468360846, - "learning_rate": 2.5624040648116184e-07, - "loss": 0.9647, - "step": 9336 - }, - { - "epoch": 0.8420435586418361, - "grad_norm": 1.5163280783329005, - "learning_rate": 2.5595438128477245e-07, - "loss": 0.909, - "step": 9337 - }, - { - "epoch": 0.8421337421653065, - "grad_norm": 1.4614000047321667, - "learning_rate": 2.5566850489997096e-07, - "loss": 0.9771, - "step": 9338 - }, - { - "epoch": 0.8422239256887767, - "grad_norm": 1.4419972105118308, - "learning_rate": 2.5538277735115166e-07, - "loss": 0.9962, - "step": 9339 - }, - { - "epoch": 0.8423141092122469, - "grad_norm": 1.2102704087277878, - "learning_rate": 2.5509719866269306e-07, - "loss": 1.0168, - "step": 9340 - }, - { - "epoch": 0.8424042927357172, - "grad_norm": 1.7084187121881993, - "learning_rate": 2.548117688589628e-07, - "loss": 1.0223, - "step": 9341 - }, - { - "epoch": 0.8424944762591875, - "grad_norm": 1.531263078906198, - "learning_rate": 2.545264879643152e-07, - "loss": 0.957, - "step": 9342 - }, - { - "epoch": 0.8425846597826577, - "grad_norm": 2.5380122903176066, - "learning_rate": 2.542413560030923e-07, - "loss": 0.92, - "step": 9343 - }, - { - "epoch": 0.842674843306128, - "grad_norm": 1.4268712723859167, - "learning_rate": 2.53956372999623e-07, - "loss": 0.8592, - "step": 9344 - }, - { - "epoch": 0.8427650268295982, - "grad_norm": 0.7318637196154455, - "learning_rate": 2.5367153897822293e-07, - "loss": 0.8789, - "step": 9345 - }, - { - "epoch": 0.8428552103530685, - "grad_norm": 1.459159871159446, - "learning_rate": 2.5338685396319715e-07, - "loss": 0.9609, - "step": 9346 - }, - { - "epoch": 0.8429453938765388, - "grad_norm": 1.9296398079727686, - "learning_rate": 2.531023179788352e-07, - "loss": 0.9042, - "step": 9347 - }, - { - "epoch": 0.843035577400009, - "grad_norm": 1.3757736890369543, - "learning_rate": 2.528179310494158e-07, - "loss": 0.9459, - "step": 9348 - }, - { - "epoch": 0.8431257609234792, - "grad_norm": 1.2166318827494857, - "learning_rate": 2.5253369319920436e-07, - "loss": 0.9845, - "step": 9349 - }, - { - "epoch": 0.8432159444469496, - "grad_norm": 1.668540338496734, - "learning_rate": 2.522496044524538e-07, - "loss": 0.9648, - "step": 9350 - }, - { - "epoch": 0.8433061279704198, - "grad_norm": 1.7217843753622866, - "learning_rate": 2.5196566483340386e-07, - "loss": 1.0617, - "step": 9351 - }, - { - "epoch": 0.84339631149389, - "grad_norm": 1.5966820766419434, - "learning_rate": 2.516818743662825e-07, - "loss": 0.9747, - "step": 9352 - }, - { - "epoch": 0.8434864950173603, - "grad_norm": 1.4578999647455926, - "learning_rate": 2.5139823307530285e-07, - "loss": 0.9674, - "step": 9353 - }, - { - "epoch": 0.8435766785408306, - "grad_norm": 1.5904003171985701, - "learning_rate": 2.5111474098466836e-07, - "loss": 1.0037, - "step": 9354 - }, - { - "epoch": 0.8436668620643009, - "grad_norm": 1.3187301001471061, - "learning_rate": 2.50831398118567e-07, - "loss": 1.0056, - "step": 9355 - }, - { - "epoch": 0.8437570455877711, - "grad_norm": 1.4853109520972927, - "learning_rate": 2.5054820450117576e-07, - "loss": 0.9182, - "step": 9356 - }, - { - "epoch": 0.8438472291112413, - "grad_norm": 1.6549998189459296, - "learning_rate": 2.502651601566579e-07, - "loss": 0.9309, - "step": 9357 - }, - { - "epoch": 0.8439374126347117, - "grad_norm": 1.4492911349004975, - "learning_rate": 2.499822651091645e-07, - "loss": 0.9301, - "step": 9358 - }, - { - "epoch": 0.8440275961581819, - "grad_norm": 2.0074336663067056, - "learning_rate": 2.496995193828344e-07, - "loss": 0.9897, - "step": 9359 - }, - { - "epoch": 0.8441177796816521, - "grad_norm": 3.0589892447146303, - "learning_rate": 2.494169230017913e-07, - "loss": 0.9546, - "step": 9360 - }, - { - "epoch": 0.8442079632051224, - "grad_norm": 1.2494627274765784, - "learning_rate": 2.491344759901499e-07, - "loss": 0.9247, - "step": 9361 - }, - { - "epoch": 0.8442981467285927, - "grad_norm": 1.6289060304490635, - "learning_rate": 2.488521783720088e-07, - "loss": 0.9611, - "step": 9362 - }, - { - "epoch": 0.844388330252063, - "grad_norm": 1.7313082275514178, - "learning_rate": 2.4857003017145526e-07, - "loss": 1.0063, - "step": 9363 - }, - { - "epoch": 0.8444785137755332, - "grad_norm": 2.0221831804682413, - "learning_rate": 2.482880314125644e-07, - "loss": 1.006, - "step": 9364 - }, - { - "epoch": 0.8445686972990035, - "grad_norm": 1.604964945282587, - "learning_rate": 2.4800618211939726e-07, - "loss": 0.96, - "step": 9365 - }, - { - "epoch": 0.8446588808224738, - "grad_norm": 2.079513315520016, - "learning_rate": 2.477244823160034e-07, - "loss": 0.9762, - "step": 9366 - }, - { - "epoch": 0.844749064345944, - "grad_norm": 1.563776867801125, - "learning_rate": 2.474429320264184e-07, - "loss": 0.9858, - "step": 9367 - }, - { - "epoch": 0.8448392478694142, - "grad_norm": 1.5557923344892208, - "learning_rate": 2.47161531274666e-07, - "loss": 0.9303, - "step": 9368 - }, - { - "epoch": 0.8449294313928846, - "grad_norm": 1.5170130714854164, - "learning_rate": 2.4688028008475714e-07, - "loss": 0.9028, - "step": 9369 - }, - { - "epoch": 0.8450196149163548, - "grad_norm": 2.4880878846448145, - "learning_rate": 2.465991784806891e-07, - "loss": 0.9599, - "step": 9370 - }, - { - "epoch": 0.845109798439825, - "grad_norm": 1.3773151760415472, - "learning_rate": 2.463182264864472e-07, - "loss": 1.0557, - "step": 9371 - }, - { - "epoch": 0.8451999819632953, - "grad_norm": 1.6641265426991922, - "learning_rate": 2.460374241260039e-07, - "loss": 0.8884, - "step": 9372 - }, - { - "epoch": 0.8452901654867656, - "grad_norm": 1.5910533455681501, - "learning_rate": 2.4575677142331884e-07, - "loss": 1.0285, - "step": 9373 - }, - { - "epoch": 0.8453803490102358, - "grad_norm": 1.387211800620431, - "learning_rate": 2.454762684023395e-07, - "loss": 0.9093, - "step": 9374 - }, - { - "epoch": 0.8454705325337061, - "grad_norm": 3.5649970154395323, - "learning_rate": 2.4519591508699823e-07, - "loss": 0.909, - "step": 9375 - }, - { - "epoch": 0.8455607160571763, - "grad_norm": 2.7864096283741104, - "learning_rate": 2.4491571150121815e-07, - "loss": 1.0126, - "step": 9376 - }, - { - "epoch": 0.8456508995806467, - "grad_norm": 2.1496160430499773, - "learning_rate": 2.446356576689062e-07, - "loss": 0.9599, - "step": 9377 - }, - { - "epoch": 0.8457410831041169, - "grad_norm": 1.5189965883860848, - "learning_rate": 2.4435575361395976e-07, - "loss": 0.9817, - "step": 9378 - }, - { - "epoch": 0.8458312666275871, - "grad_norm": 1.6717000138876248, - "learning_rate": 2.440759993602606e-07, - "loss": 0.9442, - "step": 9379 - }, - { - "epoch": 0.8459214501510574, - "grad_norm": 1.477310879087973, - "learning_rate": 2.437963949316793e-07, - "loss": 0.9119, - "step": 9380 - }, - { - "epoch": 0.8460116336745277, - "grad_norm": 1.4885421083777564, - "learning_rate": 2.435169403520729e-07, - "loss": 0.9664, - "step": 9381 - }, - { - "epoch": 0.8461018171979979, - "grad_norm": 1.5536381232692384, - "learning_rate": 2.4323763564528653e-07, - "loss": 1.0116, - "step": 9382 - }, - { - "epoch": 0.8461920007214682, - "grad_norm": 1.4484543751018564, - "learning_rate": 2.429584808351517e-07, - "loss": 0.9821, - "step": 9383 - }, - { - "epoch": 0.8462821842449384, - "grad_norm": 1.6411459686011287, - "learning_rate": 2.42679475945488e-07, - "loss": 1.0864, - "step": 9384 - }, - { - "epoch": 0.8463723677684087, - "grad_norm": 1.2151423636181722, - "learning_rate": 2.424006210001008e-07, - "loss": 0.9653, - "step": 9385 - }, - { - "epoch": 0.846462551291879, - "grad_norm": 1.3793724936016145, - "learning_rate": 2.421219160227839e-07, - "loss": 0.9872, - "step": 9386 - }, - { - "epoch": 0.8465527348153492, - "grad_norm": 1.4446182645644179, - "learning_rate": 2.4184336103731785e-07, - "loss": 0.9573, - "step": 9387 - }, - { - "epoch": 0.8466429183388196, - "grad_norm": 1.4286250768533477, - "learning_rate": 2.4156495606747065e-07, - "loss": 0.9878, - "step": 9388 - }, - { - "epoch": 0.8467331018622898, - "grad_norm": 1.6263666275039943, - "learning_rate": 2.412867011369972e-07, - "loss": 1.0839, - "step": 9389 - }, - { - "epoch": 0.84682328538576, - "grad_norm": 1.443088449285399, - "learning_rate": 2.4100859626963997e-07, - "loss": 0.9802, - "step": 9390 - }, - { - "epoch": 0.8469134689092302, - "grad_norm": 1.2105956056381173, - "learning_rate": 2.407306414891288e-07, - "loss": 0.9633, - "step": 9391 - }, - { - "epoch": 0.8470036524327006, - "grad_norm": 1.160621780859407, - "learning_rate": 2.4045283681917893e-07, - "loss": 1.0719, - "step": 9392 - }, - { - "epoch": 0.8470938359561708, - "grad_norm": 1.5296048259896398, - "learning_rate": 2.4017518228349586e-07, - "loss": 1.0784, - "step": 9393 - }, - { - "epoch": 0.8471840194796411, - "grad_norm": 0.6244106852249484, - "learning_rate": 2.3989767790576887e-07, - "loss": 0.8558, - "step": 9394 - }, - { - "epoch": 0.8472742030031113, - "grad_norm": 2.1216535584713334, - "learning_rate": 2.396203237096781e-07, - "loss": 0.9331, - "step": 9395 - }, - { - "epoch": 0.8473643865265816, - "grad_norm": 1.8396502674521766, - "learning_rate": 2.393431197188873e-07, - "loss": 0.959, - "step": 9396 - }, - { - "epoch": 0.8474545700500519, - "grad_norm": 1.9594674242236405, - "learning_rate": 2.3906606595705004e-07, - "loss": 1.0316, - "step": 9397 - }, - { - "epoch": 0.8475447535735221, - "grad_norm": 1.4695508680803575, - "learning_rate": 2.387891624478056e-07, - "loss": 1.0164, - "step": 9398 - }, - { - "epoch": 0.8476349370969923, - "grad_norm": 1.5688533551337902, - "learning_rate": 2.3851240921478075e-07, - "loss": 0.9831, - "step": 9399 - }, - { - "epoch": 0.8477251206204627, - "grad_norm": 1.5020048730316555, - "learning_rate": 2.3823580628159057e-07, - "loss": 0.9477, - "step": 9400 - }, - { - "epoch": 0.8478153041439329, - "grad_norm": 1.533401690217143, - "learning_rate": 2.3795935367183517e-07, - "loss": 0.9528, - "step": 9401 - }, - { - "epoch": 0.8479054876674031, - "grad_norm": 1.272305365630744, - "learning_rate": 2.376830514091035e-07, - "loss": 0.9811, - "step": 9402 - }, - { - "epoch": 0.8479956711908734, - "grad_norm": 1.3340154730773057, - "learning_rate": 2.3740689951697135e-07, - "loss": 1.0452, - "step": 9403 - }, - { - "epoch": 0.8480858547143437, - "grad_norm": 1.4286320026361927, - "learning_rate": 2.371308980190012e-07, - "loss": 0.9677, - "step": 9404 - }, - { - "epoch": 0.848176038237814, - "grad_norm": 0.6536905240489449, - "learning_rate": 2.3685504693874337e-07, - "loss": 0.8588, - "step": 9405 - }, - { - "epoch": 0.8482662217612842, - "grad_norm": 1.5588978253271029, - "learning_rate": 2.3657934629973497e-07, - "loss": 1.0044, - "step": 9406 - }, - { - "epoch": 0.8483564052847544, - "grad_norm": 1.705420041404434, - "learning_rate": 2.3630379612549944e-07, - "loss": 0.9718, - "step": 9407 - }, - { - "epoch": 0.8484465888082248, - "grad_norm": 0.7594310739794895, - "learning_rate": 2.3602839643954997e-07, - "loss": 0.9042, - "step": 9408 - }, - { - "epoch": 0.848536772331695, - "grad_norm": 1.2645988543006765, - "learning_rate": 2.3575314726538308e-07, - "loss": 0.9738, - "step": 9409 - }, - { - "epoch": 0.8486269558551652, - "grad_norm": 1.5566370256829858, - "learning_rate": 2.3547804862648645e-07, - "loss": 1.0068, - "step": 9410 - }, - { - "epoch": 0.8487171393786356, - "grad_norm": 1.4553876918759276, - "learning_rate": 2.3520310054633174e-07, - "loss": 1.0086, - "step": 9411 - }, - { - "epoch": 0.8488073229021058, - "grad_norm": 1.7153209118003532, - "learning_rate": 2.3492830304837973e-07, - "loss": 0.9274, - "step": 9412 - }, - { - "epoch": 0.848897506425576, - "grad_norm": 1.3187230943583708, - "learning_rate": 2.3465365615607723e-07, - "loss": 0.9073, - "step": 9413 - }, - { - "epoch": 0.8489876899490463, - "grad_norm": 1.4480060119872924, - "learning_rate": 2.3437915989285884e-07, - "loss": 0.9548, - "step": 9414 - }, - { - "epoch": 0.8490778734725166, - "grad_norm": 1.7641725220340554, - "learning_rate": 2.3410481428214647e-07, - "loss": 1.0418, - "step": 9415 - }, - { - "epoch": 0.8491680569959869, - "grad_norm": 1.5044944504660542, - "learning_rate": 2.338306193473476e-07, - "loss": 0.9855, - "step": 9416 - }, - { - "epoch": 0.8492582405194571, - "grad_norm": 1.3748943981886952, - "learning_rate": 2.3355657511185957e-07, - "loss": 0.9664, - "step": 9417 - }, - { - "epoch": 0.8493484240429273, - "grad_norm": 0.6602495342008889, - "learning_rate": 2.3328268159906428e-07, - "loss": 0.8213, - "step": 9418 - }, - { - "epoch": 0.8494386075663977, - "grad_norm": 1.6033486431498543, - "learning_rate": 2.330089388323322e-07, - "loss": 1.0149, - "step": 9419 - }, - { - "epoch": 0.8495287910898679, - "grad_norm": 1.474043458383354, - "learning_rate": 2.327353468350204e-07, - "loss": 1.006, - "step": 9420 - }, - { - "epoch": 0.8496189746133381, - "grad_norm": 1.4965349547805848, - "learning_rate": 2.3246190563047352e-07, - "loss": 0.956, - "step": 9421 - }, - { - "epoch": 0.8497091581368084, - "grad_norm": 1.6306270593796135, - "learning_rate": 2.3218861524202293e-07, - "loss": 0.9521, - "step": 9422 - }, - { - "epoch": 0.8497993416602787, - "grad_norm": 1.4850557071499448, - "learning_rate": 2.3191547569298775e-07, - "loss": 0.9729, - "step": 9423 - }, - { - "epoch": 0.8498895251837489, - "grad_norm": 2.0136687965998536, - "learning_rate": 2.3164248700667245e-07, - "loss": 0.9545, - "step": 9424 - }, - { - "epoch": 0.8499797087072192, - "grad_norm": 1.4959288025605773, - "learning_rate": 2.313696492063717e-07, - "loss": 0.9935, - "step": 9425 - }, - { - "epoch": 0.8500698922306894, - "grad_norm": 1.8001094069609822, - "learning_rate": 2.3109696231536401e-07, - "loss": 0.958, - "step": 9426 - }, - { - "epoch": 0.8501600757541597, - "grad_norm": 1.6719820220062869, - "learning_rate": 2.3082442635691722e-07, - "loss": 0.9292, - "step": 9427 - }, - { - "epoch": 0.85025025927763, - "grad_norm": 1.5121650924015726, - "learning_rate": 2.305520413542854e-07, - "loss": 0.9826, - "step": 9428 - }, - { - "epoch": 0.8503404428011002, - "grad_norm": 1.4268194729681685, - "learning_rate": 2.3027980733071018e-07, - "loss": 0.8988, - "step": 9429 - }, - { - "epoch": 0.8504306263245704, - "grad_norm": 0.6101363878521843, - "learning_rate": 2.3000772430942027e-07, - "loss": 0.7967, - "step": 9430 - }, - { - "epoch": 0.8505208098480408, - "grad_norm": 1.4427502989833947, - "learning_rate": 2.2973579231363028e-07, - "loss": 0.9921, - "step": 9431 - }, - { - "epoch": 0.850610993371511, - "grad_norm": 1.6944508274493175, - "learning_rate": 2.2946401136654446e-07, - "loss": 0.9904, - "step": 9432 - }, - { - "epoch": 0.8507011768949813, - "grad_norm": 1.6291480308045718, - "learning_rate": 2.2919238149135077e-07, - "loss": 0.9987, - "step": 9433 - }, - { - "epoch": 0.8507913604184516, - "grad_norm": 1.2960432671665467, - "learning_rate": 2.289209027112282e-07, - "loss": 0.9723, - "step": 9434 - }, - { - "epoch": 0.8508815439419218, - "grad_norm": 1.8352858015755926, - "learning_rate": 2.2864957504933934e-07, - "loss": 1.0262, - "step": 9435 - }, - { - "epoch": 0.8509717274653921, - "grad_norm": 2.005797566294546, - "learning_rate": 2.2837839852883589e-07, - "loss": 0.9462, - "step": 9436 - }, - { - "epoch": 0.8510619109888623, - "grad_norm": 1.4885736613731488, - "learning_rate": 2.2810737317285623e-07, - "loss": 0.9741, - "step": 9437 - }, - { - "epoch": 0.8511520945123326, - "grad_norm": 1.3364096805381742, - "learning_rate": 2.278364990045254e-07, - "loss": 0.9081, - "step": 9438 - }, - { - "epoch": 0.8512422780358029, - "grad_norm": 1.4489958146945068, - "learning_rate": 2.2756577604695625e-07, - "loss": 0.9566, - "step": 9439 - }, - { - "epoch": 0.8513324615592731, - "grad_norm": 1.2545854861923964, - "learning_rate": 2.2729520432324855e-07, - "loss": 0.9836, - "step": 9440 - }, - { - "epoch": 0.8514226450827433, - "grad_norm": 1.4951770653373517, - "learning_rate": 2.2702478385648826e-07, - "loss": 0.8903, - "step": 9441 - }, - { - "epoch": 0.8515128286062137, - "grad_norm": 1.4815091224361225, - "learning_rate": 2.2675451466974938e-07, - "loss": 0.9661, - "step": 9442 - }, - { - "epoch": 0.8516030121296839, - "grad_norm": 1.4569055743386605, - "learning_rate": 2.26484396786093e-07, - "loss": 0.8985, - "step": 9443 - }, - { - "epoch": 0.8516931956531542, - "grad_norm": 0.833521587247885, - "learning_rate": 2.2621443022856667e-07, - "loss": 0.8666, - "step": 9444 - }, - { - "epoch": 0.8517833791766244, - "grad_norm": 2.6655494813406833, - "learning_rate": 2.2594461502020646e-07, - "loss": 1.0067, - "step": 9445 - }, - { - "epoch": 0.8518735627000947, - "grad_norm": 1.6386728207172852, - "learning_rate": 2.2567495118403278e-07, - "loss": 0.8495, - "step": 9446 - }, - { - "epoch": 0.851963746223565, - "grad_norm": 1.4605454515530305, - "learning_rate": 2.254054387430566e-07, - "loss": 0.9832, - "step": 9447 - }, - { - "epoch": 0.8520539297470352, - "grad_norm": 2.7988674802602045, - "learning_rate": 2.2513607772027243e-07, - "loss": 0.9556, - "step": 9448 - }, - { - "epoch": 0.8521441132705054, - "grad_norm": 1.4255445309965333, - "learning_rate": 2.2486686813866562e-07, - "loss": 0.9306, - "step": 9449 - }, - { - "epoch": 0.8522342967939758, - "grad_norm": 1.485519209109372, - "learning_rate": 2.2459781002120514e-07, - "loss": 1.0123, - "step": 9450 - }, - { - "epoch": 0.852324480317446, - "grad_norm": 1.2818412812037452, - "learning_rate": 2.243289033908491e-07, - "loss": 1.0343, - "step": 9451 - }, - { - "epoch": 0.8524146638409162, - "grad_norm": 1.507696829198438, - "learning_rate": 2.2406014827054176e-07, - "loss": 0.9755, - "step": 9452 - }, - { - "epoch": 0.8525048473643865, - "grad_norm": 1.432349722870493, - "learning_rate": 2.2379154468321525e-07, - "loss": 1.0312, - "step": 9453 - }, - { - "epoch": 0.8525950308878568, - "grad_norm": 1.248304838883283, - "learning_rate": 2.2352309265178793e-07, - "loss": 0.9986, - "step": 9454 - }, - { - "epoch": 0.852685214411327, - "grad_norm": 1.7295972295705395, - "learning_rate": 2.2325479219916565e-07, - "loss": 1.0314, - "step": 9455 - }, - { - "epoch": 0.8527753979347973, - "grad_norm": 1.4001288388777489, - "learning_rate": 2.229866433482419e-07, - "loss": 0.9938, - "step": 9456 - }, - { - "epoch": 0.8528655814582676, - "grad_norm": 1.528392928960757, - "learning_rate": 2.2271864612189552e-07, - "loss": 1.0125, - "step": 9457 - }, - { - "epoch": 0.8529557649817379, - "grad_norm": 1.3674634818408848, - "learning_rate": 2.2245080054299415e-07, - "loss": 1.0012, - "step": 9458 - }, - { - "epoch": 0.8530459485052081, - "grad_norm": 1.4333905068165262, - "learning_rate": 2.2218310663439198e-07, - "loss": 0.9261, - "step": 9459 - }, - { - "epoch": 0.8531361320286783, - "grad_norm": 1.490351641079582, - "learning_rate": 2.2191556441892968e-07, - "loss": 0.9859, - "step": 9460 - }, - { - "epoch": 0.8532263155521487, - "grad_norm": 1.7467764729401516, - "learning_rate": 2.216481739194358e-07, - "loss": 0.9737, - "step": 9461 - }, - { - "epoch": 0.8533164990756189, - "grad_norm": 1.4500299943912882, - "learning_rate": 2.2138093515872592e-07, - "loss": 0.9138, - "step": 9462 - }, - { - "epoch": 0.8534066825990891, - "grad_norm": 1.1929879117674627, - "learning_rate": 2.2111384815960087e-07, - "loss": 1.0449, - "step": 9463 - }, - { - "epoch": 0.8534968661225594, - "grad_norm": 1.6854303878898367, - "learning_rate": 2.208469129448518e-07, - "loss": 0.8916, - "step": 9464 - }, - { - "epoch": 0.8535870496460297, - "grad_norm": 0.6290477569792966, - "learning_rate": 2.2058012953725357e-07, - "loss": 0.8409, - "step": 9465 - }, - { - "epoch": 0.8536772331695, - "grad_norm": 1.656636031166261, - "learning_rate": 2.203134979595711e-07, - "loss": 0.9358, - "step": 9466 - }, - { - "epoch": 0.8537674166929702, - "grad_norm": 1.4566041049281895, - "learning_rate": 2.2004701823455374e-07, - "loss": 0.9208, - "step": 9467 - }, - { - "epoch": 0.8538576002164404, - "grad_norm": 1.6050654366484445, - "learning_rate": 2.1978069038493906e-07, - "loss": 1.0, - "step": 9468 - }, - { - "epoch": 0.8539477837399108, - "grad_norm": 1.2887346717818313, - "learning_rate": 2.1951451443345225e-07, - "loss": 0.9737, - "step": 9469 - }, - { - "epoch": 0.854037967263381, - "grad_norm": 1.5725766081714796, - "learning_rate": 2.1924849040280425e-07, - "loss": 0.9609, - "step": 9470 - }, - { - "epoch": 0.8541281507868512, - "grad_norm": 2.0225136315790544, - "learning_rate": 2.1898261831569465e-07, - "loss": 1.0466, - "step": 9471 - }, - { - "epoch": 0.8542183343103215, - "grad_norm": 1.5534973956854543, - "learning_rate": 2.1871689819480798e-07, - "loss": 0.9792, - "step": 9472 - }, - { - "epoch": 0.8543085178337918, - "grad_norm": 1.2288481197201393, - "learning_rate": 2.1845133006281745e-07, - "loss": 0.9504, - "step": 9473 - }, - { - "epoch": 0.854398701357262, - "grad_norm": 1.5429937819369821, - "learning_rate": 2.1818591394238294e-07, - "loss": 0.8297, - "step": 9474 - }, - { - "epoch": 0.8544888848807323, - "grad_norm": 1.7935150577454373, - "learning_rate": 2.1792064985615076e-07, - "loss": 0.9561, - "step": 9475 - }, - { - "epoch": 0.8545790684042025, - "grad_norm": 1.5194735871580403, - "learning_rate": 2.1765553782675528e-07, - "loss": 0.9411, - "step": 9476 - }, - { - "epoch": 0.8546692519276728, - "grad_norm": 1.3709083714378645, - "learning_rate": 2.1739057787681703e-07, - "loss": 1.0393, - "step": 9477 - }, - { - "epoch": 0.8547594354511431, - "grad_norm": 1.4646296230216178, - "learning_rate": 2.1712577002894372e-07, - "loss": 0.9913, - "step": 9478 - }, - { - "epoch": 0.8548496189746133, - "grad_norm": 1.4171616119745643, - "learning_rate": 2.1686111430573105e-07, - "loss": 0.9444, - "step": 9479 - }, - { - "epoch": 0.8549398024980835, - "grad_norm": 1.536002433079541, - "learning_rate": 2.165966107297592e-07, - "loss": 1.0206, - "step": 9480 - }, - { - "epoch": 0.8550299860215539, - "grad_norm": 1.749594437062089, - "learning_rate": 2.16332259323599e-07, - "loss": 1.0043, - "step": 9481 - }, - { - "epoch": 0.8551201695450241, - "grad_norm": 1.4829974558596868, - "learning_rate": 2.1606806010980504e-07, - "loss": 0.9225, - "step": 9482 - }, - { - "epoch": 0.8552103530684944, - "grad_norm": 1.4846412018446482, - "learning_rate": 2.1580401311092067e-07, - "loss": 0.9682, - "step": 9483 - }, - { - "epoch": 0.8553005365919647, - "grad_norm": 1.9535041746197928, - "learning_rate": 2.1554011834947604e-07, - "loss": 0.9648, - "step": 9484 - }, - { - "epoch": 0.8553907201154349, - "grad_norm": 10.791969167429185, - "learning_rate": 2.1527637584798764e-07, - "loss": 1.0034, - "step": 9485 - }, - { - "epoch": 0.8554809036389052, - "grad_norm": 1.4215827893150756, - "learning_rate": 2.150127856289603e-07, - "loss": 1.0366, - "step": 9486 - }, - { - "epoch": 0.8555710871623754, - "grad_norm": 0.6314467302961919, - "learning_rate": 2.1474934771488363e-07, - "loss": 0.8209, - "step": 9487 - }, - { - "epoch": 0.8556612706858457, - "grad_norm": 1.5428583419507529, - "learning_rate": 2.1448606212823715e-07, - "loss": 1.0145, - "step": 9488 - }, - { - "epoch": 0.855751454209316, - "grad_norm": 1.3679804654929606, - "learning_rate": 2.1422292889148452e-07, - "loss": 0.9501, - "step": 9489 - }, - { - "epoch": 0.8558416377327862, - "grad_norm": 1.3045446808818448, - "learning_rate": 2.139599480270784e-07, - "loss": 0.9884, - "step": 9490 - }, - { - "epoch": 0.8559318212562564, - "grad_norm": 1.6211797116939357, - "learning_rate": 2.1369711955745773e-07, - "loss": 0.9563, - "step": 9491 - }, - { - "epoch": 0.8560220047797268, - "grad_norm": 1.5974404485405085, - "learning_rate": 2.1343444350504813e-07, - "loss": 0.9204, - "step": 9492 - }, - { - "epoch": 0.856112188303197, - "grad_norm": 1.3086323746631832, - "learning_rate": 2.1317191989226302e-07, - "loss": 0.9737, - "step": 9493 - }, - { - "epoch": 0.8562023718266673, - "grad_norm": 1.4293568546692992, - "learning_rate": 2.129095487415027e-07, - "loss": 0.9553, - "step": 9494 - }, - { - "epoch": 0.8562925553501375, - "grad_norm": 1.8709294957833495, - "learning_rate": 2.1264733007515257e-07, - "loss": 0.9796, - "step": 9495 - }, - { - "epoch": 0.8563827388736078, - "grad_norm": 1.7403102049056387, - "learning_rate": 2.1238526391558852e-07, - "loss": 0.9586, - "step": 9496 - }, - { - "epoch": 0.8564729223970781, - "grad_norm": 1.931084378248534, - "learning_rate": 2.1212335028517003e-07, - "loss": 1.1053, - "step": 9497 - }, - { - "epoch": 0.8565631059205483, - "grad_norm": 1.3969040033457178, - "learning_rate": 2.1186158920624563e-07, - "loss": 0.9798, - "step": 9498 - }, - { - "epoch": 0.8566532894440185, - "grad_norm": 1.4079130723193687, - "learning_rate": 2.1159998070115015e-07, - "loss": 0.9698, - "step": 9499 - }, - { - "epoch": 0.8567434729674889, - "grad_norm": 1.5420379535244466, - "learning_rate": 2.113385247922055e-07, - "loss": 0.9572, - "step": 9500 - }, - { - "epoch": 0.8568336564909591, - "grad_norm": 1.327590341098172, - "learning_rate": 2.1107722150172068e-07, - "loss": 0.9711, - "step": 9501 - }, - { - "epoch": 0.8569238400144293, - "grad_norm": 1.285415159540926, - "learning_rate": 2.108160708519906e-07, - "loss": 1.0106, - "step": 9502 - }, - { - "epoch": 0.8570140235378996, - "grad_norm": 1.6861371082157068, - "learning_rate": 2.1055507286529984e-07, - "loss": 0.9515, - "step": 9503 - }, - { - "epoch": 0.8571042070613699, - "grad_norm": 1.4711055939491848, - "learning_rate": 2.1029422756391612e-07, - "loss": 0.9927, - "step": 9504 - }, - { - "epoch": 0.8571943905848401, - "grad_norm": 1.618782841411885, - "learning_rate": 2.1003353497009812e-07, - "loss": 0.8883, - "step": 9505 - }, - { - "epoch": 0.8572845741083104, - "grad_norm": 1.431111361522995, - "learning_rate": 2.0977299510608825e-07, - "loss": 0.8973, - "step": 9506 - }, - { - "epoch": 0.8573747576317807, - "grad_norm": 1.5710757094703283, - "learning_rate": 2.0951260799411784e-07, - "loss": 1.0062, - "step": 9507 - }, - { - "epoch": 0.857464941155251, - "grad_norm": 1.40196154813237, - "learning_rate": 2.0925237365640424e-07, - "loss": 0.9629, - "step": 9508 - }, - { - "epoch": 0.8575551246787212, - "grad_norm": 0.6375917873501, - "learning_rate": 2.0899229211515211e-07, - "loss": 0.8467, - "step": 9509 - }, - { - "epoch": 0.8576453082021914, - "grad_norm": 1.483808389236298, - "learning_rate": 2.0873236339255306e-07, - "loss": 0.8906, - "step": 9510 - }, - { - "epoch": 0.8577354917256618, - "grad_norm": 1.7307287154386997, - "learning_rate": 2.0847258751078644e-07, - "loss": 0.9169, - "step": 9511 - }, - { - "epoch": 0.857825675249132, - "grad_norm": 1.666407620643592, - "learning_rate": 2.082129644920163e-07, - "loss": 0.9477, - "step": 9512 - }, - { - "epoch": 0.8579158587726022, - "grad_norm": 1.3391316915683351, - "learning_rate": 2.0795349435839605e-07, - "loss": 0.976, - "step": 9513 - }, - { - "epoch": 0.8580060422960725, - "grad_norm": 1.8698185854136755, - "learning_rate": 2.0769417713206484e-07, - "loss": 0.943, - "step": 9514 - }, - { - "epoch": 0.8580962258195428, - "grad_norm": 1.1999652539627026, - "learning_rate": 2.074350128351492e-07, - "loss": 1.0317, - "step": 9515 - }, - { - "epoch": 0.858186409343013, - "grad_norm": 1.4526317948971172, - "learning_rate": 2.0717600148976256e-07, - "loss": 0.8878, - "step": 9516 - }, - { - "epoch": 0.8582765928664833, - "grad_norm": 1.5812164845891516, - "learning_rate": 2.0691714311800436e-07, - "loss": 0.9038, - "step": 9517 - }, - { - "epoch": 0.8583667763899535, - "grad_norm": 1.4614575118454327, - "learning_rate": 2.066584377419631e-07, - "loss": 1.0328, - "step": 9518 - }, - { - "epoch": 0.8584569599134239, - "grad_norm": 1.6767298007906997, - "learning_rate": 2.0639988538371167e-07, - "loss": 0.9912, - "step": 9519 - }, - { - "epoch": 0.8585471434368941, - "grad_norm": 1.428010801595018, - "learning_rate": 2.0614148606531258e-07, - "loss": 0.9334, - "step": 9520 - }, - { - "epoch": 0.8586373269603643, - "grad_norm": 1.5479078601502787, - "learning_rate": 2.0588323980881285e-07, - "loss": 0.99, - "step": 9521 - }, - { - "epoch": 0.8587275104838346, - "grad_norm": 1.4034525278731267, - "learning_rate": 2.0562514663624752e-07, - "loss": 0.9529, - "step": 9522 - }, - { - "epoch": 0.8588176940073049, - "grad_norm": 1.6783672733278185, - "learning_rate": 2.0536720656963902e-07, - "loss": 1.03, - "step": 9523 - }, - { - "epoch": 0.8589078775307751, - "grad_norm": 5.869283614106773, - "learning_rate": 2.051094196309957e-07, - "loss": 0.9909, - "step": 9524 - }, - { - "epoch": 0.8589980610542454, - "grad_norm": 1.7306929673404055, - "learning_rate": 2.0485178584231378e-07, - "loss": 0.9318, - "step": 9525 - }, - { - "epoch": 0.8590882445777156, - "grad_norm": 0.6977245839860421, - "learning_rate": 2.0459430522557587e-07, - "loss": 0.8394, - "step": 9526 - }, - { - "epoch": 0.8591784281011859, - "grad_norm": 1.4011878985511557, - "learning_rate": 2.0433697780275195e-07, - "loss": 0.9714, - "step": 9527 - }, - { - "epoch": 0.8592686116246562, - "grad_norm": 1.3523831384345002, - "learning_rate": 2.040798035957978e-07, - "loss": 0.9909, - "step": 9528 - }, - { - "epoch": 0.8593587951481264, - "grad_norm": 0.7128732339488305, - "learning_rate": 2.038227826266574e-07, - "loss": 0.865, - "step": 9529 - }, - { - "epoch": 0.8594489786715968, - "grad_norm": 1.45279287829229, - "learning_rate": 2.0356591491726126e-07, - "loss": 0.9744, - "step": 9530 - }, - { - "epoch": 0.859539162195067, - "grad_norm": 1.364714604050282, - "learning_rate": 2.033092004895267e-07, - "loss": 1.0119, - "step": 9531 - }, - { - "epoch": 0.8596293457185372, - "grad_norm": 1.6224577163805751, - "learning_rate": 2.03052639365358e-07, - "loss": 0.9876, - "step": 9532 - }, - { - "epoch": 0.8597195292420075, - "grad_norm": 1.3061679504032568, - "learning_rate": 2.0279623156664694e-07, - "loss": 0.9828, - "step": 9533 - }, - { - "epoch": 0.8598097127654778, - "grad_norm": 1.4565768517681488, - "learning_rate": 2.0253997711527005e-07, - "loss": 1.0304, - "step": 9534 - }, - { - "epoch": 0.859899896288948, - "grad_norm": 1.7628231266420278, - "learning_rate": 2.0228387603309428e-07, - "loss": 1.0277, - "step": 9535 - }, - { - "epoch": 0.8599900798124183, - "grad_norm": 1.4341851867212934, - "learning_rate": 2.0202792834197035e-07, - "loss": 0.9882, - "step": 9536 - }, - { - "epoch": 0.8600802633358885, - "grad_norm": 1.5950942260987804, - "learning_rate": 2.017721340637375e-07, - "loss": 0.9025, - "step": 9537 - }, - { - "epoch": 0.8601704468593588, - "grad_norm": 1.675946847696463, - "learning_rate": 2.0151649322022134e-07, - "loss": 0.9577, - "step": 9538 - }, - { - "epoch": 0.8602606303828291, - "grad_norm": 1.8133031446851755, - "learning_rate": 2.012610058332349e-07, - "loss": 0.7768, - "step": 9539 - }, - { - "epoch": 0.8603508139062993, - "grad_norm": 0.6887341173144211, - "learning_rate": 2.010056719245774e-07, - "loss": 0.8377, - "step": 9540 - }, - { - "epoch": 0.8604409974297695, - "grad_norm": 1.5751810090940415, - "learning_rate": 2.0075049151603563e-07, - "loss": 0.9874, - "step": 9541 - }, - { - "epoch": 0.8605311809532399, - "grad_norm": 2.4033844965207827, - "learning_rate": 2.0049546462938326e-07, - "loss": 0.9796, - "step": 9542 - }, - { - "epoch": 0.8606213644767101, - "grad_norm": 1.3887965579602222, - "learning_rate": 2.0024059128637961e-07, - "loss": 0.9895, - "step": 9543 - }, - { - "epoch": 0.8607115480001803, - "grad_norm": 1.3981214624393405, - "learning_rate": 1.99985871508773e-07, - "loss": 0.8377, - "step": 9544 - }, - { - "epoch": 0.8608017315236506, - "grad_norm": 1.393792392103123, - "learning_rate": 1.9973130531829674e-07, - "loss": 0.9979, - "step": 9545 - }, - { - "epoch": 0.8608919150471209, - "grad_norm": 1.496998884135654, - "learning_rate": 1.994768927366721e-07, - "loss": 0.922, - "step": 9546 - }, - { - "epoch": 0.8609820985705912, - "grad_norm": 1.5556945606218742, - "learning_rate": 1.992226337856069e-07, - "loss": 0.9373, - "step": 9547 - }, - { - "epoch": 0.8610722820940614, - "grad_norm": 1.4006427447494203, - "learning_rate": 1.9896852848679592e-07, - "loss": 0.9251, - "step": 9548 - }, - { - "epoch": 0.8611624656175316, - "grad_norm": 1.4640468209810293, - "learning_rate": 1.9871457686192094e-07, - "loss": 0.9827, - "step": 9549 - }, - { - "epoch": 0.861252649141002, - "grad_norm": 1.6476954376350161, - "learning_rate": 1.984607789326509e-07, - "loss": 0.963, - "step": 9550 - }, - { - "epoch": 0.8613428326644722, - "grad_norm": 1.2851111685315864, - "learning_rate": 1.982071347206402e-07, - "loss": 0.9219, - "step": 9551 - }, - { - "epoch": 0.8614330161879424, - "grad_norm": 1.3322055288728398, - "learning_rate": 1.9795364424753202e-07, - "loss": 0.9514, - "step": 9552 - }, - { - "epoch": 0.8615231997114128, - "grad_norm": 1.5950415370877964, - "learning_rate": 1.9770030753495505e-07, - "loss": 0.9353, - "step": 9553 - }, - { - "epoch": 0.861613383234883, - "grad_norm": 1.465059717152512, - "learning_rate": 1.9744712460452573e-07, - "loss": 0.9241, - "step": 9554 - }, - { - "epoch": 0.8617035667583532, - "grad_norm": 1.3802694007271477, - "learning_rate": 1.9719409547784703e-07, - "loss": 0.9499, - "step": 9555 - }, - { - "epoch": 0.8617937502818235, - "grad_norm": 1.7230896696468105, - "learning_rate": 1.9694122017650837e-07, - "loss": 0.9906, - "step": 9556 - }, - { - "epoch": 0.8618839338052938, - "grad_norm": 1.4507029868171097, - "learning_rate": 1.9668849872208738e-07, - "loss": 0.9826, - "step": 9557 - }, - { - "epoch": 0.8619741173287641, - "grad_norm": 1.3112133622483035, - "learning_rate": 1.9643593113614632e-07, - "loss": 0.8185, - "step": 9558 - }, - { - "epoch": 0.8620643008522343, - "grad_norm": 1.360491633820247, - "learning_rate": 1.961835174402371e-07, - "loss": 1.007, - "step": 9559 - }, - { - "epoch": 0.8621544843757045, - "grad_norm": 1.7099681878058741, - "learning_rate": 1.9593125765589535e-07, - "loss": 0.9396, - "step": 9560 - }, - { - "epoch": 0.8622446678991749, - "grad_norm": 1.389521343024731, - "learning_rate": 1.9567915180464721e-07, - "loss": 0.9125, - "step": 9561 - }, - { - "epoch": 0.8623348514226451, - "grad_norm": 2.4338145638058895, - "learning_rate": 1.9542719990800217e-07, - "loss": 0.9589, - "step": 9562 - }, - { - "epoch": 0.8624250349461153, - "grad_norm": 1.5075356026615876, - "learning_rate": 1.9517540198745896e-07, - "loss": 1.03, - "step": 9563 - }, - { - "epoch": 0.8625152184695856, - "grad_norm": 3.009417851064742, - "learning_rate": 1.94923758064502e-07, - "loss": 0.9603, - "step": 9564 - }, - { - "epoch": 0.8626054019930559, - "grad_norm": 1.2681050432420025, - "learning_rate": 1.9467226816060322e-07, - "loss": 0.9335, - "step": 9565 - }, - { - "epoch": 0.8626955855165261, - "grad_norm": 3.218867031303089, - "learning_rate": 1.9442093229722122e-07, - "loss": 0.9117, - "step": 9566 - }, - { - "epoch": 0.8627857690399964, - "grad_norm": 1.8944884619845996, - "learning_rate": 1.9416975049580085e-07, - "loss": 1.0459, - "step": 9567 - }, - { - "epoch": 0.8628759525634666, - "grad_norm": 1.573913014447663, - "learning_rate": 1.9391872277777456e-07, - "loss": 0.9535, - "step": 9568 - }, - { - "epoch": 0.862966136086937, - "grad_norm": 1.9157103004568539, - "learning_rate": 1.9366784916456158e-07, - "loss": 0.9606, - "step": 9569 - }, - { - "epoch": 0.8630563196104072, - "grad_norm": 1.9448410136890937, - "learning_rate": 1.9341712967756774e-07, - "loss": 0.9233, - "step": 9570 - }, - { - "epoch": 0.8631465031338774, - "grad_norm": 1.417302839638959, - "learning_rate": 1.9316656433818566e-07, - "loss": 0.887, - "step": 9571 - }, - { - "epoch": 0.8632366866573477, - "grad_norm": 1.4366723041173044, - "learning_rate": 1.929161531677954e-07, - "loss": 0.8706, - "step": 9572 - }, - { - "epoch": 0.863326870180818, - "grad_norm": 2.6358006213997833, - "learning_rate": 1.9266589618776251e-07, - "loss": 0.9208, - "step": 9573 - }, - { - "epoch": 0.8634170537042882, - "grad_norm": 1.526153569029938, - "learning_rate": 1.924157934194417e-07, - "loss": 1.0182, - "step": 9574 - }, - { - "epoch": 0.8635072372277585, - "grad_norm": 1.7721166859649895, - "learning_rate": 1.9216584488417142e-07, - "loss": 0.856, - "step": 9575 - }, - { - "epoch": 0.8635974207512288, - "grad_norm": 1.8890869568155846, - "learning_rate": 1.919160506032802e-07, - "loss": 1.0238, - "step": 9576 - }, - { - "epoch": 0.863687604274699, - "grad_norm": 1.921624640308233, - "learning_rate": 1.916664105980812e-07, - "loss": 1.0261, - "step": 9577 - }, - { - "epoch": 0.8637777877981693, - "grad_norm": 1.585978841007898, - "learning_rate": 1.914169248898747e-07, - "loss": 1.0067, - "step": 9578 - }, - { - "epoch": 0.8638679713216395, - "grad_norm": 1.4350933162905442, - "learning_rate": 1.9116759349994882e-07, - "loss": 0.8815, - "step": 9579 - }, - { - "epoch": 0.8639581548451098, - "grad_norm": 1.6795024681198096, - "learning_rate": 1.9091841644957763e-07, - "loss": 0.9945, - "step": 9580 - }, - { - "epoch": 0.8640483383685801, - "grad_norm": 1.34172762007695, - "learning_rate": 1.9066939376002278e-07, - "loss": 1.0498, - "step": 9581 - }, - { - "epoch": 0.8641385218920503, - "grad_norm": 1.7351126949197435, - "learning_rate": 1.9042052545253085e-07, - "loss": 0.8877, - "step": 9582 - }, - { - "epoch": 0.8642287054155205, - "grad_norm": 1.8465230745247125, - "learning_rate": 1.901718115483384e-07, - "loss": 0.9478, - "step": 9583 - }, - { - "epoch": 0.8643188889389909, - "grad_norm": 1.4173342123079729, - "learning_rate": 1.8992325206866598e-07, - "loss": 0.9507, - "step": 9584 - }, - { - "epoch": 0.8644090724624611, - "grad_norm": 1.300245294903833, - "learning_rate": 1.8967484703472225e-07, - "loss": 0.9412, - "step": 9585 - }, - { - "epoch": 0.8644992559859314, - "grad_norm": 1.2001803143451106, - "learning_rate": 1.8942659646770288e-07, - "loss": 1.0127, - "step": 9586 - }, - { - "epoch": 0.8645894395094016, - "grad_norm": 1.5861394593882014, - "learning_rate": 1.8917850038878936e-07, - "loss": 1.0357, - "step": 9587 - }, - { - "epoch": 0.8646796230328719, - "grad_norm": 1.6075193948005835, - "learning_rate": 1.8893055881915121e-07, - "loss": 0.8774, - "step": 9588 - }, - { - "epoch": 0.8647698065563422, - "grad_norm": 1.7003462635145168, - "learning_rate": 1.886827717799442e-07, - "loss": 0.9212, - "step": 9589 - }, - { - "epoch": 0.8648599900798124, - "grad_norm": 3.0833248619444347, - "learning_rate": 1.884351392923096e-07, - "loss": 0.9853, - "step": 9590 - }, - { - "epoch": 0.8649501736032826, - "grad_norm": 2.0595903391110855, - "learning_rate": 1.8818766137737896e-07, - "loss": 1.0102, - "step": 9591 - }, - { - "epoch": 0.865040357126753, - "grad_norm": 1.7574339056270951, - "learning_rate": 1.8794033805626653e-07, - "loss": 0.9402, - "step": 9592 - }, - { - "epoch": 0.8651305406502232, - "grad_norm": 1.3419356406252583, - "learning_rate": 1.876931693500763e-07, - "loss": 0.9645, - "step": 9593 - }, - { - "epoch": 0.8652207241736934, - "grad_norm": 1.6483149189140627, - "learning_rate": 1.8744615527989783e-07, - "loss": 0.8226, - "step": 9594 - }, - { - "epoch": 0.8653109076971637, - "grad_norm": 1.4334114644838278, - "learning_rate": 1.871992958668076e-07, - "loss": 1.0527, - "step": 9595 - }, - { - "epoch": 0.865401091220634, - "grad_norm": 1.584479352234503, - "learning_rate": 1.8695259113186944e-07, - "loss": 1.005, - "step": 9596 - }, - { - "epoch": 0.8654912747441043, - "grad_norm": 1.3733837425291844, - "learning_rate": 1.8670604109613252e-07, - "loss": 0.9316, - "step": 9597 - }, - { - "epoch": 0.8655814582675745, - "grad_norm": 1.3758598586786617, - "learning_rate": 1.8645964578063533e-07, - "loss": 1.0154, - "step": 9598 - }, - { - "epoch": 0.8656716417910447, - "grad_norm": 1.2961382784101196, - "learning_rate": 1.862134052064006e-07, - "loss": 1.0221, - "step": 9599 - }, - { - "epoch": 0.8657618253145151, - "grad_norm": 1.9256782426812475, - "learning_rate": 1.8596731939443932e-07, - "loss": 1.0088, - "step": 9600 - }, - { - "epoch": 0.8658520088379853, - "grad_norm": 1.505301563935154, - "learning_rate": 1.857213883657487e-07, - "loss": 0.9917, - "step": 9601 - }, - { - "epoch": 0.8659421923614555, - "grad_norm": 0.6456711370595332, - "learning_rate": 1.8547561214131303e-07, - "loss": 0.8089, - "step": 9602 - }, - { - "epoch": 0.8660323758849259, - "grad_norm": 2.0566942981407417, - "learning_rate": 1.8522999074210355e-07, - "loss": 1.051, - "step": 9603 - }, - { - "epoch": 0.8661225594083961, - "grad_norm": 1.51926692438609, - "learning_rate": 1.849845241890775e-07, - "loss": 1.0082, - "step": 9604 - }, - { - "epoch": 0.8662127429318663, - "grad_norm": 1.527876348879853, - "learning_rate": 1.8473921250317992e-07, - "loss": 0.9742, - "step": 9605 - }, - { - "epoch": 0.8663029264553366, - "grad_norm": 1.6209432141372633, - "learning_rate": 1.8449405570534225e-07, - "loss": 0.9033, - "step": 9606 - }, - { - "epoch": 0.8663931099788069, - "grad_norm": 1.3716757210347303, - "learning_rate": 1.8424905381648204e-07, - "loss": 1.0071, - "step": 9607 - }, - { - "epoch": 0.8664832935022772, - "grad_norm": 1.4486453832024606, - "learning_rate": 1.8400420685750452e-07, - "loss": 0.9243, - "step": 9608 - }, - { - "epoch": 0.8665734770257474, - "grad_norm": 1.4351889235380608, - "learning_rate": 1.8375951484930142e-07, - "loss": 0.9029, - "step": 9609 - }, - { - "epoch": 0.8666636605492176, - "grad_norm": 1.3860608326634034, - "learning_rate": 1.8351497781275094e-07, - "loss": 0.9685, - "step": 9610 - }, - { - "epoch": 0.866753844072688, - "grad_norm": 1.5262483146589443, - "learning_rate": 1.8327059576871907e-07, - "loss": 0.9553, - "step": 9611 - }, - { - "epoch": 0.8668440275961582, - "grad_norm": 1.2375278527564386, - "learning_rate": 1.8302636873805665e-07, - "loss": 1.0141, - "step": 9612 - }, - { - "epoch": 0.8669342111196284, - "grad_norm": 1.6336462639094569, - "learning_rate": 1.8278229674160373e-07, - "loss": 1.0457, - "step": 9613 - }, - { - "epoch": 0.8670243946430987, - "grad_norm": 1.3670275349358871, - "learning_rate": 1.825383798001845e-07, - "loss": 0.9003, - "step": 9614 - }, - { - "epoch": 0.867114578166569, - "grad_norm": 1.3720360802557934, - "learning_rate": 1.8229461793461297e-07, - "loss": 0.9095, - "step": 9615 - }, - { - "epoch": 0.8672047616900392, - "grad_norm": 14.34277925551603, - "learning_rate": 1.8205101116568698e-07, - "loss": 0.9831, - "step": 9616 - }, - { - "epoch": 0.8672949452135095, - "grad_norm": 1.6833729610248063, - "learning_rate": 1.818075595141928e-07, - "loss": 1.015, - "step": 9617 - }, - { - "epoch": 0.8673851287369797, - "grad_norm": 1.4813718431796599, - "learning_rate": 1.8156426300090288e-07, - "loss": 0.9722, - "step": 9618 - }, - { - "epoch": 0.86747531226045, - "grad_norm": 1.490364279028487, - "learning_rate": 1.8132112164657686e-07, - "loss": 0.9625, - "step": 9619 - }, - { - "epoch": 0.8675654957839203, - "grad_norm": 1.4763000900982732, - "learning_rate": 1.8107813547196106e-07, - "loss": 0.8611, - "step": 9620 - }, - { - "epoch": 0.8676556793073905, - "grad_norm": 1.5776594627297034, - "learning_rate": 1.8083530449778817e-07, - "loss": 1.0057, - "step": 9621 - }, - { - "epoch": 0.8677458628308607, - "grad_norm": 1.6788698557250226, - "learning_rate": 1.8059262874477787e-07, - "loss": 0.8871, - "step": 9622 - }, - { - "epoch": 0.8678360463543311, - "grad_norm": 1.3726620738601243, - "learning_rate": 1.8035010823363627e-07, - "loss": 0.9587, - "step": 9623 - }, - { - "epoch": 0.8679262298778013, - "grad_norm": 1.5467104583465576, - "learning_rate": 1.8010774298505705e-07, - "loss": 1.0054, - "step": 9624 - }, - { - "epoch": 0.8680164134012716, - "grad_norm": 1.5124085895249013, - "learning_rate": 1.7986553301972007e-07, - "loss": 1.0622, - "step": 9625 - }, - { - "epoch": 0.8681065969247419, - "grad_norm": 1.588026653248686, - "learning_rate": 1.7962347835829171e-07, - "loss": 0.966, - "step": 9626 - }, - { - "epoch": 0.8681967804482121, - "grad_norm": 1.2657389648366337, - "learning_rate": 1.793815790214257e-07, - "loss": 0.9792, - "step": 9627 - }, - { - "epoch": 0.8682869639716824, - "grad_norm": 1.6421449432520985, - "learning_rate": 1.791398350297626e-07, - "loss": 0.9242, - "step": 9628 - }, - { - "epoch": 0.8683771474951526, - "grad_norm": 1.393220771930633, - "learning_rate": 1.7889824640392813e-07, - "loss": 0.9748, - "step": 9629 - }, - { - "epoch": 0.868467331018623, - "grad_norm": 1.5845278784675867, - "learning_rate": 1.7865681316453741e-07, - "loss": 0.9506, - "step": 9630 - }, - { - "epoch": 0.8685575145420932, - "grad_norm": 1.8978524268033434, - "learning_rate": 1.7841553533218968e-07, - "loss": 0.9577, - "step": 9631 - }, - { - "epoch": 0.8686476980655634, - "grad_norm": 3.9373958513321834, - "learning_rate": 1.7817441292747292e-07, - "loss": 1.0659, - "step": 9632 - }, - { - "epoch": 0.8687378815890336, - "grad_norm": 4.061814705195305, - "learning_rate": 1.779334459709607e-07, - "loss": 0.957, - "step": 9633 - }, - { - "epoch": 0.868828065112504, - "grad_norm": 2.143511086544039, - "learning_rate": 1.7769263448321347e-07, - "loss": 0.9882, - "step": 9634 - }, - { - "epoch": 0.8689182486359742, - "grad_norm": 1.394524924213156, - "learning_rate": 1.7745197848477879e-07, - "loss": 0.9567, - "step": 9635 - }, - { - "epoch": 0.8690084321594445, - "grad_norm": 1.8521431443033085, - "learning_rate": 1.7721147799619063e-07, - "loss": 1.066, - "step": 9636 - }, - { - "epoch": 0.8690986156829147, - "grad_norm": 0.6047779942104915, - "learning_rate": 1.769711330379704e-07, - "loss": 0.8029, - "step": 9637 - }, - { - "epoch": 0.869188799206385, - "grad_norm": 1.9134451357090891, - "learning_rate": 1.767309436306248e-07, - "loss": 1.0135, - "step": 9638 - }, - { - "epoch": 0.8692789827298553, - "grad_norm": 1.5307053356342575, - "learning_rate": 1.764909097946483e-07, - "loss": 1.0336, - "step": 9639 - }, - { - "epoch": 0.8693691662533255, - "grad_norm": 1.6093096673537484, - "learning_rate": 1.7625103155052236e-07, - "loss": 1.0385, - "step": 9640 - }, - { - "epoch": 0.8694593497767957, - "grad_norm": 1.6088479669792517, - "learning_rate": 1.760113089187143e-07, - "loss": 0.9789, - "step": 9641 - }, - { - "epoch": 0.8695495333002661, - "grad_norm": 1.449293108983504, - "learning_rate": 1.7577174191967868e-07, - "loss": 0.9929, - "step": 9642 - }, - { - "epoch": 0.8696397168237363, - "grad_norm": 1.5452367411095391, - "learning_rate": 1.755323305738574e-07, - "loss": 0.9797, - "step": 9643 - }, - { - "epoch": 0.8697299003472065, - "grad_norm": 1.4746011291677563, - "learning_rate": 1.7529307490167677e-07, - "loss": 1.0034, - "step": 9644 - }, - { - "epoch": 0.8698200838706768, - "grad_norm": 1.7165167431346744, - "learning_rate": 1.7505397492355288e-07, - "loss": 0.9498, - "step": 9645 - }, - { - "epoch": 0.8699102673941471, - "grad_norm": 1.461373901465968, - "learning_rate": 1.7481503065988589e-07, - "loss": 1.0022, - "step": 9646 - }, - { - "epoch": 0.8700004509176174, - "grad_norm": 1.2205839297274776, - "learning_rate": 1.7457624213106526e-07, - "loss": 0.9363, - "step": 9647 - }, - { - "epoch": 0.8700906344410876, - "grad_norm": 1.5432958328656494, - "learning_rate": 1.7433760935746465e-07, - "loss": 0.9744, - "step": 9648 - }, - { - "epoch": 0.8701808179645579, - "grad_norm": 1.4886022506924412, - "learning_rate": 1.740991323594456e-07, - "loss": 0.9126, - "step": 9649 - }, - { - "epoch": 0.8702710014880282, - "grad_norm": 1.571053021935461, - "learning_rate": 1.7386081115735651e-07, - "loss": 0.9455, - "step": 9650 - }, - { - "epoch": 0.8703611850114984, - "grad_norm": 1.709184558588148, - "learning_rate": 1.736226457715324e-07, - "loss": 0.9312, - "step": 9651 - }, - { - "epoch": 0.8704513685349686, - "grad_norm": 1.3630657298946984, - "learning_rate": 1.7338463622229505e-07, - "loss": 0.9259, - "step": 9652 - }, - { - "epoch": 0.870541552058439, - "grad_norm": 1.3920352514314147, - "learning_rate": 1.7314678252995152e-07, - "loss": 0.92, - "step": 9653 - }, - { - "epoch": 0.8706317355819092, - "grad_norm": 1.491008192667977, - "learning_rate": 1.7290908471479847e-07, - "loss": 0.908, - "step": 9654 - }, - { - "epoch": 0.8707219191053794, - "grad_norm": 1.565335181055381, - "learning_rate": 1.7267154279711637e-07, - "loss": 0.9674, - "step": 9655 - }, - { - "epoch": 0.8708121026288497, - "grad_norm": 1.2455705363400882, - "learning_rate": 1.724341567971741e-07, - "loss": 1.0162, - "step": 9656 - }, - { - "epoch": 0.87090228615232, - "grad_norm": 1.4161811258347763, - "learning_rate": 1.7219692673522657e-07, - "loss": 1.0613, - "step": 9657 - }, - { - "epoch": 0.8709924696757902, - "grad_norm": 1.3902230271135931, - "learning_rate": 1.7195985263151558e-07, - "loss": 0.9909, - "step": 9658 - }, - { - "epoch": 0.8710826531992605, - "grad_norm": 1.490569510716651, - "learning_rate": 1.7172293450626985e-07, - "loss": 1.0156, - "step": 9659 - }, - { - "epoch": 0.8711728367227307, - "grad_norm": 1.4689252221308886, - "learning_rate": 1.7148617237970475e-07, - "loss": 0.9668, - "step": 9660 - }, - { - "epoch": 0.8712630202462011, - "grad_norm": 1.2220332596868064, - "learning_rate": 1.7124956627202102e-07, - "loss": 0.927, - "step": 9661 - }, - { - "epoch": 0.8713532037696713, - "grad_norm": 1.784383794492304, - "learning_rate": 1.7101311620340852e-07, - "loss": 0.9885, - "step": 9662 - }, - { - "epoch": 0.8714433872931415, - "grad_norm": 2.64873788756139, - "learning_rate": 1.7077682219404155e-07, - "loss": 0.906, - "step": 9663 - }, - { - "epoch": 0.8715335708166118, - "grad_norm": 1.7207144175060425, - "learning_rate": 1.705406842640824e-07, - "loss": 1.0979, - "step": 9664 - }, - { - "epoch": 0.8716237543400821, - "grad_norm": 1.3011914313892905, - "learning_rate": 1.7030470243367946e-07, - "loss": 0.9566, - "step": 9665 - }, - { - "epoch": 0.8717139378635523, - "grad_norm": 1.3409138846201505, - "learning_rate": 1.7006887672296834e-07, - "loss": 0.892, - "step": 9666 - }, - { - "epoch": 0.8718041213870226, - "grad_norm": 1.551612227796426, - "learning_rate": 1.6983320715207094e-07, - "loss": 0.9205, - "step": 9667 - }, - { - "epoch": 0.8718943049104928, - "grad_norm": 2.128069623991293, - "learning_rate": 1.6959769374109523e-07, - "loss": 0.9895, - "step": 9668 - }, - { - "epoch": 0.8719844884339631, - "grad_norm": 1.8216075956161952, - "learning_rate": 1.6936233651013754e-07, - "loss": 0.903, - "step": 9669 - }, - { - "epoch": 0.8720746719574334, - "grad_norm": 1.3988835992495114, - "learning_rate": 1.691271354792787e-07, - "loss": 0.9963, - "step": 9670 - }, - { - "epoch": 0.8721648554809036, - "grad_norm": 1.384813531173389, - "learning_rate": 1.6889209066858866e-07, - "loss": 0.8506, - "step": 9671 - }, - { - "epoch": 0.872255039004374, - "grad_norm": 1.8601663011634226, - "learning_rate": 1.6865720209812185e-07, - "loss": 1.0128, - "step": 9672 - }, - { - "epoch": 0.8723452225278442, - "grad_norm": 1.6606028954553382, - "learning_rate": 1.684224697879204e-07, - "loss": 0.9388, - "step": 9673 - }, - { - "epoch": 0.8724354060513144, - "grad_norm": 1.690780065620867, - "learning_rate": 1.6818789375801302e-07, - "loss": 1.035, - "step": 9674 - }, - { - "epoch": 0.8725255895747847, - "grad_norm": 1.4256688739960799, - "learning_rate": 1.679534740284152e-07, - "loss": 0.9324, - "step": 9675 - }, - { - "epoch": 0.872615773098255, - "grad_norm": 1.8412078530433549, - "learning_rate": 1.6771921061912853e-07, - "loss": 0.918, - "step": 9676 - }, - { - "epoch": 0.8727059566217252, - "grad_norm": 1.6598458033493975, - "learning_rate": 1.6748510355014234e-07, - "loss": 0.9135, - "step": 9677 - }, - { - "epoch": 0.8727961401451955, - "grad_norm": 1.301424755121049, - "learning_rate": 1.6725115284143132e-07, - "loss": 0.9785, - "step": 9678 - }, - { - "epoch": 0.8728863236686657, - "grad_norm": 1.636408446401628, - "learning_rate": 1.670173585129575e-07, - "loss": 0.9427, - "step": 9679 - }, - { - "epoch": 0.872976507192136, - "grad_norm": 1.2867645125829728, - "learning_rate": 1.667837205846696e-07, - "loss": 1.0244, - "step": 9680 - }, - { - "epoch": 0.8730666907156063, - "grad_norm": 1.5517659556209402, - "learning_rate": 1.6655023907650278e-07, - "loss": 0.968, - "step": 9681 - }, - { - "epoch": 0.8731568742390765, - "grad_norm": 1.7690259199297507, - "learning_rate": 1.6631691400837954e-07, - "loss": 1.0071, - "step": 9682 - }, - { - "epoch": 0.8732470577625467, - "grad_norm": 1.3637930010651667, - "learning_rate": 1.6608374540020752e-07, - "loss": 1.0197, - "step": 9683 - }, - { - "epoch": 0.8733372412860171, - "grad_norm": 0.6792068974645694, - "learning_rate": 1.658507332718828e-07, - "loss": 0.7926, - "step": 9684 - }, - { - "epoch": 0.8734274248094873, - "grad_norm": 1.7174953389555694, - "learning_rate": 1.656178776432864e-07, - "loss": 0.8995, - "step": 9685 - }, - { - "epoch": 0.8735176083329576, - "grad_norm": 1.7855146119335177, - "learning_rate": 1.6538517853428814e-07, - "loss": 0.9427, - "step": 9686 - }, - { - "epoch": 0.8736077918564278, - "grad_norm": 1.237496834567867, - "learning_rate": 1.6515263596474194e-07, - "loss": 0.947, - "step": 9687 - }, - { - "epoch": 0.8736979753798981, - "grad_norm": 1.5744107037077666, - "learning_rate": 1.6492024995449017e-07, - "loss": 0.9517, - "step": 9688 - }, - { - "epoch": 0.8737881589033684, - "grad_norm": 1.5749222388069115, - "learning_rate": 1.6468802052336116e-07, - "loss": 0.9315, - "step": 9689 - }, - { - "epoch": 0.8738783424268386, - "grad_norm": 2.315937477269325, - "learning_rate": 1.6445594769116998e-07, - "loss": 0.9933, - "step": 9690 - }, - { - "epoch": 0.8739685259503088, - "grad_norm": 5.1819838644915315, - "learning_rate": 1.6422403147771836e-07, - "loss": 1.042, - "step": 9691 - }, - { - "epoch": 0.8740587094737792, - "grad_norm": 1.3677313458651064, - "learning_rate": 1.6399227190279485e-07, - "loss": 0.9366, - "step": 9692 - }, - { - "epoch": 0.8741488929972494, - "grad_norm": 1.5013892574978374, - "learning_rate": 1.637606689861748e-07, - "loss": 0.8972, - "step": 9693 - }, - { - "epoch": 0.8742390765207196, - "grad_norm": 1.974725706633663, - "learning_rate": 1.6352922274761883e-07, - "loss": 0.93, - "step": 9694 - }, - { - "epoch": 0.87432926004419, - "grad_norm": 1.6493227782660824, - "learning_rate": 1.6329793320687602e-07, - "loss": 0.8362, - "step": 9695 - }, - { - "epoch": 0.8744194435676602, - "grad_norm": 2.2950045696537176, - "learning_rate": 1.630668003836808e-07, - "loss": 0.9959, - "step": 9696 - }, - { - "epoch": 0.8745096270911304, - "grad_norm": 1.7298372033317233, - "learning_rate": 1.62835824297755e-07, - "loss": 0.9662, - "step": 9697 - }, - { - "epoch": 0.8745998106146007, - "grad_norm": 1.4309799939135965, - "learning_rate": 1.626050049688066e-07, - "loss": 0.9765, - "step": 9698 - }, - { - "epoch": 0.874689994138071, - "grad_norm": 1.3333172896532564, - "learning_rate": 1.623743424165309e-07, - "loss": 0.9459, - "step": 9699 - }, - { - "epoch": 0.8747801776615413, - "grad_norm": 1.8769026956918835, - "learning_rate": 1.6214383666060826e-07, - "loss": 0.9117, - "step": 9700 - }, - { - "epoch": 0.8748703611850115, - "grad_norm": 1.7146851068684748, - "learning_rate": 1.619134877207078e-07, - "loss": 0.9923, - "step": 9701 - }, - { - "epoch": 0.8749605447084817, - "grad_norm": 1.4739272403021086, - "learning_rate": 1.616832956164831e-07, - "loss": 0.9713, - "step": 9702 - }, - { - "epoch": 0.8750507282319521, - "grad_norm": 3.475312220317266, - "learning_rate": 1.6145326036757667e-07, - "loss": 0.9371, - "step": 9703 - }, - { - "epoch": 0.8751409117554223, - "grad_norm": 1.6921075860171717, - "learning_rate": 1.612233819936155e-07, - "loss": 0.9313, - "step": 9704 - }, - { - "epoch": 0.8752310952788925, - "grad_norm": 1.7439583301859385, - "learning_rate": 1.6099366051421414e-07, - "loss": 0.9664, - "step": 9705 - }, - { - "epoch": 0.8753212788023628, - "grad_norm": 1.4387083365392597, - "learning_rate": 1.6076409594897378e-07, - "loss": 1.0349, - "step": 9706 - }, - { - "epoch": 0.8754114623258331, - "grad_norm": 0.6807082830921238, - "learning_rate": 1.605346883174823e-07, - "loss": 0.7968, - "step": 9707 - }, - { - "epoch": 0.8755016458493033, - "grad_norm": 2.246169856896734, - "learning_rate": 1.6030543763931427e-07, - "loss": 0.9487, - "step": 9708 - }, - { - "epoch": 0.8755918293727736, - "grad_norm": 1.6289706503306534, - "learning_rate": 1.600763439340298e-07, - "loss": 0.9855, - "step": 9709 - }, - { - "epoch": 0.8756820128962438, - "grad_norm": 1.5063277133505626, - "learning_rate": 1.5984740722117707e-07, - "loss": 1.0236, - "step": 9710 - }, - { - "epoch": 0.8757721964197142, - "grad_norm": 1.16252839043518, - "learning_rate": 1.5961862752028998e-07, - "loss": 0.9784, - "step": 9711 - }, - { - "epoch": 0.8758623799431844, - "grad_norm": 1.3796214985424144, - "learning_rate": 1.5939000485088937e-07, - "loss": 0.8798, - "step": 9712 - }, - { - "epoch": 0.8759525634666546, - "grad_norm": 1.43428309858862, - "learning_rate": 1.5916153923248254e-07, - "loss": 1.0573, - "step": 9713 - }, - { - "epoch": 0.8760427469901249, - "grad_norm": 1.4104690416191445, - "learning_rate": 1.5893323068456342e-07, - "loss": 1.0149, - "step": 9714 - }, - { - "epoch": 0.8761329305135952, - "grad_norm": 1.3756226516973964, - "learning_rate": 1.5870507922661248e-07, - "loss": 1.006, - "step": 9715 - }, - { - "epoch": 0.8762231140370654, - "grad_norm": 0.6292087940823763, - "learning_rate": 1.5847708487809763e-07, - "loss": 0.8219, - "step": 9716 - }, - { - "epoch": 0.8763132975605357, - "grad_norm": 1.8685512271154068, - "learning_rate": 1.5824924765847113e-07, - "loss": 1.0514, - "step": 9717 - }, - { - "epoch": 0.8764034810840059, - "grad_norm": 1.2727294076554858, - "learning_rate": 1.5802156758717478e-07, - "loss": 0.9421, - "step": 9718 - }, - { - "epoch": 0.8764936646074762, - "grad_norm": 1.6175079028306492, - "learning_rate": 1.5779404468363433e-07, - "loss": 0.9661, - "step": 9719 - }, - { - "epoch": 0.8765838481309465, - "grad_norm": 1.421119237569507, - "learning_rate": 1.5756667896726405e-07, - "loss": 1.0237, - "step": 9720 - }, - { - "epoch": 0.8766740316544167, - "grad_norm": 2.9613513506335947, - "learning_rate": 1.5733947045746377e-07, - "loss": 0.9864, - "step": 9721 - }, - { - "epoch": 0.876764215177887, - "grad_norm": 1.4129778247910103, - "learning_rate": 1.5711241917362018e-07, - "loss": 1.0335, - "step": 9722 - }, - { - "epoch": 0.8768543987013573, - "grad_norm": 1.7109501110988208, - "learning_rate": 1.5688552513510688e-07, - "loss": 0.9158, - "step": 9723 - }, - { - "epoch": 0.8769445822248275, - "grad_norm": 1.7320938229818668, - "learning_rate": 1.5665878836128266e-07, - "loss": 0.9956, - "step": 9724 - }, - { - "epoch": 0.8770347657482978, - "grad_norm": 1.6277104294738722, - "learning_rate": 1.5643220887149554e-07, - "loss": 1.0101, - "step": 9725 - }, - { - "epoch": 0.8771249492717681, - "grad_norm": 0.7165215818531585, - "learning_rate": 1.562057866850772e-07, - "loss": 0.8692, - "step": 9726 - }, - { - "epoch": 0.8772151327952383, - "grad_norm": 1.7875821848295932, - "learning_rate": 1.5597952182134777e-07, - "loss": 0.8413, - "step": 9727 - }, - { - "epoch": 0.8773053163187086, - "grad_norm": 1.32536468885225, - "learning_rate": 1.557534142996133e-07, - "loss": 1.0207, - "step": 9728 - }, - { - "epoch": 0.8773954998421788, - "grad_norm": 1.8261865625453806, - "learning_rate": 1.5552746413916662e-07, - "loss": 1.0001, - "step": 9729 - }, - { - "epoch": 0.8774856833656491, - "grad_norm": 1.713918101540754, - "learning_rate": 1.5530167135928697e-07, - "loss": 0.9574, - "step": 9730 - }, - { - "epoch": 0.8775758668891194, - "grad_norm": 1.6756090195721547, - "learning_rate": 1.5507603597924068e-07, - "loss": 0.9155, - "step": 9731 - }, - { - "epoch": 0.8776660504125896, - "grad_norm": 2.4164762312298116, - "learning_rate": 1.548505580182793e-07, - "loss": 0.9247, - "step": 9732 - }, - { - "epoch": 0.8777562339360598, - "grad_norm": 6.273708693935923, - "learning_rate": 1.5462523749564271e-07, - "loss": 0.9717, - "step": 9733 - }, - { - "epoch": 0.8778464174595302, - "grad_norm": 1.798595699242636, - "learning_rate": 1.5440007443055602e-07, - "loss": 0.9644, - "step": 9734 - }, - { - "epoch": 0.8779366009830004, - "grad_norm": 1.671710710385846, - "learning_rate": 1.541750688422314e-07, - "loss": 0.9081, - "step": 9735 - }, - { - "epoch": 0.8780267845064706, - "grad_norm": 1.530315503270741, - "learning_rate": 1.5395022074986797e-07, - "loss": 0.9777, - "step": 9736 - }, - { - "epoch": 0.8781169680299409, - "grad_norm": 1.3822981314183347, - "learning_rate": 1.5372553017265033e-07, - "loss": 0.9179, - "step": 9737 - }, - { - "epoch": 0.8782071515534112, - "grad_norm": 2.2074481941615813, - "learning_rate": 1.5350099712975116e-07, - "loss": 1.0245, - "step": 9738 - }, - { - "epoch": 0.8782973350768815, - "grad_norm": 1.543932572661504, - "learning_rate": 1.5327662164032785e-07, - "loss": 0.9585, - "step": 9739 - }, - { - "epoch": 0.8783875186003517, - "grad_norm": 1.5925236640022353, - "learning_rate": 1.5305240372352656e-07, - "loss": 0.8228, - "step": 9740 - }, - { - "epoch": 0.8784777021238219, - "grad_norm": 1.586007854235372, - "learning_rate": 1.5282834339847738e-07, - "loss": 0.9763, - "step": 9741 - }, - { - "epoch": 0.8785678856472923, - "grad_norm": 1.4138292151804885, - "learning_rate": 1.526044406842999e-07, - "loss": 0.95, - "step": 9742 - }, - { - "epoch": 0.8786580691707625, - "grad_norm": 1.4706316318840684, - "learning_rate": 1.523806956000977e-07, - "loss": 0.898, - "step": 9743 - }, - { - "epoch": 0.8787482526942327, - "grad_norm": 1.2854232742576648, - "learning_rate": 1.5215710816496197e-07, - "loss": 1.0148, - "step": 9744 - }, - { - "epoch": 0.8788384362177031, - "grad_norm": 1.5474308440943778, - "learning_rate": 1.5193367839797077e-07, - "loss": 0.917, - "step": 9745 - }, - { - "epoch": 0.8789286197411733, - "grad_norm": 1.4638159643045656, - "learning_rate": 1.5171040631818842e-07, - "loss": 0.9728, - "step": 9746 - }, - { - "epoch": 0.8790188032646435, - "grad_norm": 1.5125889996282085, - "learning_rate": 1.5148729194466547e-07, - "loss": 0.8993, - "step": 9747 - }, - { - "epoch": 0.8791089867881138, - "grad_norm": 1.8070593766176994, - "learning_rate": 1.5126433529643956e-07, - "loss": 1.0357, - "step": 9748 - }, - { - "epoch": 0.8791991703115841, - "grad_norm": 1.467751183661554, - "learning_rate": 1.5104153639253436e-07, - "loss": 0.9688, - "step": 9749 - }, - { - "epoch": 0.8792893538350544, - "grad_norm": 0.7069087686407403, - "learning_rate": 1.5081889525196002e-07, - "loss": 0.8839, - "step": 9750 - }, - { - "epoch": 0.8793795373585246, - "grad_norm": 2.0802419105485277, - "learning_rate": 1.5059641189371398e-07, - "loss": 1.0244, - "step": 9751 - }, - { - "epoch": 0.8794697208819948, - "grad_norm": 1.3748768404509497, - "learning_rate": 1.503740863367795e-07, - "loss": 0.983, - "step": 9752 - }, - { - "epoch": 0.8795599044054652, - "grad_norm": 1.8137666945737456, - "learning_rate": 1.50151918600127e-07, - "loss": 0.996, - "step": 9753 - }, - { - "epoch": 0.8796500879289354, - "grad_norm": 1.5784068092046575, - "learning_rate": 1.4992990870271217e-07, - "loss": 1.0182, - "step": 9754 - }, - { - "epoch": 0.8797402714524056, - "grad_norm": 1.815411564006497, - "learning_rate": 1.497080566634794e-07, - "loss": 0.9729, - "step": 9755 - }, - { - "epoch": 0.8798304549758759, - "grad_norm": 1.442255364471367, - "learning_rate": 1.4948636250135693e-07, - "loss": 0.98, - "step": 9756 - }, - { - "epoch": 0.8799206384993462, - "grad_norm": 1.6101874365602047, - "learning_rate": 1.4926482623526249e-07, - "loss": 1.0353, - "step": 9757 - }, - { - "epoch": 0.8800108220228164, - "grad_norm": 1.7110593569485246, - "learning_rate": 1.4904344788409694e-07, - "loss": 0.9692, - "step": 9758 - }, - { - "epoch": 0.8801010055462867, - "grad_norm": 1.6375088844349497, - "learning_rate": 1.4882222746675143e-07, - "loss": 0.9238, - "step": 9759 - }, - { - "epoch": 0.8801911890697569, - "grad_norm": 1.3528674666882197, - "learning_rate": 1.4860116500210018e-07, - "loss": 1.0105, - "step": 9760 - }, - { - "epoch": 0.8802813725932273, - "grad_norm": 1.9967375013983075, - "learning_rate": 1.4838026050900632e-07, - "loss": 1.0302, - "step": 9761 - }, - { - "epoch": 0.8803715561166975, - "grad_norm": 1.2865587367775755, - "learning_rate": 1.481595140063181e-07, - "loss": 1.0204, - "step": 9762 - }, - { - "epoch": 0.8804617396401677, - "grad_norm": 1.4368964047557076, - "learning_rate": 1.4793892551287136e-07, - "loss": 1.0579, - "step": 9763 - }, - { - "epoch": 0.880551923163638, - "grad_norm": 1.6791448381896716, - "learning_rate": 1.4771849504748768e-07, - "loss": 0.8997, - "step": 9764 - }, - { - "epoch": 0.8806421066871083, - "grad_norm": 1.3397922339141852, - "learning_rate": 1.4749822262897517e-07, - "loss": 1.0515, - "step": 9765 - }, - { - "epoch": 0.8807322902105785, - "grad_norm": 1.4369903988023023, - "learning_rate": 1.4727810827612895e-07, - "loss": 0.9129, - "step": 9766 - }, - { - "epoch": 0.8808224737340488, - "grad_norm": 1.7934057829213041, - "learning_rate": 1.470581520077303e-07, - "loss": 0.9747, - "step": 9767 - }, - { - "epoch": 0.8809126572575191, - "grad_norm": 2.046388641116223, - "learning_rate": 1.4683835384254705e-07, - "loss": 0.9659, - "step": 9768 - }, - { - "epoch": 0.8810028407809893, - "grad_norm": 1.6240967661164278, - "learning_rate": 1.4661871379933376e-07, - "loss": 1.055, - "step": 9769 - }, - { - "epoch": 0.8810930243044596, - "grad_norm": 1.4652790694308904, - "learning_rate": 1.4639923189683169e-07, - "loss": 1.0227, - "step": 9770 - }, - { - "epoch": 0.8811832078279298, - "grad_norm": 1.396935108777561, - "learning_rate": 1.461799081537669e-07, - "loss": 1.0192, - "step": 9771 - }, - { - "epoch": 0.8812733913514001, - "grad_norm": 1.499044034238589, - "learning_rate": 1.4596074258885514e-07, - "loss": 1.0234, - "step": 9772 - }, - { - "epoch": 0.8813635748748704, - "grad_norm": 1.5282997983098061, - "learning_rate": 1.4574173522079502e-07, - "loss": 0.939, - "step": 9773 - }, - { - "epoch": 0.8814537583983406, - "grad_norm": 1.4591471263391198, - "learning_rate": 1.4552288606827513e-07, - "loss": 0.9443, - "step": 9774 - }, - { - "epoch": 0.8815439419218108, - "grad_norm": 1.5303200213773487, - "learning_rate": 1.4530419514996761e-07, - "loss": 1.0487, - "step": 9775 - }, - { - "epoch": 0.8816341254452812, - "grad_norm": 0.7330576260983851, - "learning_rate": 1.4508566248453291e-07, - "loss": 0.8754, - "step": 9776 - }, - { - "epoch": 0.8817243089687514, - "grad_norm": 1.7631848783443465, - "learning_rate": 1.448672880906172e-07, - "loss": 0.9683, - "step": 9777 - }, - { - "epoch": 0.8818144924922217, - "grad_norm": 1.6323136293077467, - "learning_rate": 1.4464907198685382e-07, - "loss": 0.9295, - "step": 9778 - }, - { - "epoch": 0.8819046760156919, - "grad_norm": 1.342556356742488, - "learning_rate": 1.444310141918621e-07, - "loss": 0.9713, - "step": 9779 - }, - { - "epoch": 0.8819948595391622, - "grad_norm": 1.9238454130528744, - "learning_rate": 1.4421311472424735e-07, - "loss": 0.9974, - "step": 9780 - }, - { - "epoch": 0.8820850430626325, - "grad_norm": 1.5980044407550091, - "learning_rate": 1.4399537360260273e-07, - "loss": 1.0016, - "step": 9781 - }, - { - "epoch": 0.8821752265861027, - "grad_norm": 1.5696395267033998, - "learning_rate": 1.4377779084550645e-07, - "loss": 0.9949, - "step": 9782 - }, - { - "epoch": 0.8822654101095729, - "grad_norm": 1.2825948123946538, - "learning_rate": 1.4356036647152413e-07, - "loss": 0.9692, - "step": 9783 - }, - { - "epoch": 0.8823555936330433, - "grad_norm": 1.3640110281445317, - "learning_rate": 1.4334310049920785e-07, - "loss": 0.9567, - "step": 9784 - }, - { - "epoch": 0.8824457771565135, - "grad_norm": 0.6155763190526417, - "learning_rate": 1.431259929470956e-07, - "loss": 0.791, - "step": 9785 - }, - { - "epoch": 0.8825359606799837, - "grad_norm": 1.6367970450163016, - "learning_rate": 1.4290904383371237e-07, - "loss": 1.0431, - "step": 9786 - }, - { - "epoch": 0.882626144203454, - "grad_norm": 1.6336274372059008, - "learning_rate": 1.4269225317756961e-07, - "loss": 0.9523, - "step": 9787 - }, - { - "epoch": 0.8827163277269243, - "grad_norm": 1.5995280672492405, - "learning_rate": 1.424756209971645e-07, - "loss": 0.9312, - "step": 9788 - }, - { - "epoch": 0.8828065112503946, - "grad_norm": 1.72697913204854, - "learning_rate": 1.4225914731098199e-07, - "loss": 0.9642, - "step": 9789 - }, - { - "epoch": 0.8828966947738648, - "grad_norm": 1.5035242958046775, - "learning_rate": 1.4204283213749248e-07, - "loss": 0.9109, - "step": 9790 - }, - { - "epoch": 0.8829868782973351, - "grad_norm": 1.3782092269797843, - "learning_rate": 1.4182667549515315e-07, - "loss": 1.0256, - "step": 9791 - }, - { - "epoch": 0.8830770618208054, - "grad_norm": 1.4500563018654524, - "learning_rate": 1.4161067740240752e-07, - "loss": 0.9504, - "step": 9792 - }, - { - "epoch": 0.8831672453442756, - "grad_norm": 1.468297685817575, - "learning_rate": 1.4139483787768614e-07, - "loss": 0.9668, - "step": 9793 - }, - { - "epoch": 0.8832574288677458, - "grad_norm": 1.360863451689622, - "learning_rate": 1.4117915693940584e-07, - "loss": 0.8801, - "step": 9794 - }, - { - "epoch": 0.8833476123912162, - "grad_norm": 1.3828240518033557, - "learning_rate": 1.409636346059684e-07, - "loss": 0.9625, - "step": 9795 - }, - { - "epoch": 0.8834377959146864, - "grad_norm": 1.9993419756819684, - "learning_rate": 1.4074827089576501e-07, - "loss": 0.994, - "step": 9796 - }, - { - "epoch": 0.8835279794381566, - "grad_norm": 1.6675408772686986, - "learning_rate": 1.4053306582717085e-07, - "loss": 0.9906, - "step": 9797 - }, - { - "epoch": 0.8836181629616269, - "grad_norm": 1.5246767139151027, - "learning_rate": 1.4031801941854827e-07, - "loss": 0.9995, - "step": 9798 - }, - { - "epoch": 0.8837083464850972, - "grad_norm": 1.457868483798263, - "learning_rate": 1.401031316882466e-07, - "loss": 0.9347, - "step": 9799 - }, - { - "epoch": 0.8837985300085675, - "grad_norm": 2.4697868970301755, - "learning_rate": 1.39888402654601e-07, - "loss": 0.9887, - "step": 9800 - }, - { - "epoch": 0.8838887135320377, - "grad_norm": 0.6414034463540064, - "learning_rate": 1.3967383233593344e-07, - "loss": 0.86, - "step": 9801 - }, - { - "epoch": 0.8839788970555079, - "grad_norm": 1.5310192323519463, - "learning_rate": 1.3945942075055218e-07, - "loss": 0.9253, - "step": 9802 - }, - { - "epoch": 0.8840690805789783, - "grad_norm": 1.531680610988985, - "learning_rate": 1.3924516791675212e-07, - "loss": 1.0004, - "step": 9803 - }, - { - "epoch": 0.8841592641024485, - "grad_norm": 1.2903833269232874, - "learning_rate": 1.3903107385281487e-07, - "loss": 0.8795, - "step": 9804 - }, - { - "epoch": 0.8842494476259187, - "grad_norm": 1.361199612346104, - "learning_rate": 1.3881713857700717e-07, - "loss": 0.9809, - "step": 9805 - }, - { - "epoch": 0.884339631149389, - "grad_norm": 1.57908586011828, - "learning_rate": 1.3860336210758372e-07, - "loss": 0.9481, - "step": 9806 - }, - { - "epoch": 0.8844298146728593, - "grad_norm": 1.4100602981173473, - "learning_rate": 1.3838974446278506e-07, - "loss": 0.9264, - "step": 9807 - }, - { - "epoch": 0.8845199981963295, - "grad_norm": 1.4745718641752215, - "learning_rate": 1.3817628566083817e-07, - "loss": 0.9487, - "step": 9808 - }, - { - "epoch": 0.8846101817197998, - "grad_norm": 1.260635477834798, - "learning_rate": 1.3796298571995712e-07, - "loss": 0.9833, - "step": 9809 - }, - { - "epoch": 0.88470036524327, - "grad_norm": 1.5143794811527267, - "learning_rate": 1.377498446583405e-07, - "loss": 0.8737, - "step": 9810 - }, - { - "epoch": 0.8847905487667403, - "grad_norm": 1.6330215831295056, - "learning_rate": 1.3753686249417596e-07, - "loss": 0.9472, - "step": 9811 - }, - { - "epoch": 0.8848807322902106, - "grad_norm": 1.7263342244721154, - "learning_rate": 1.373240392456354e-07, - "loss": 0.9211, - "step": 9812 - }, - { - "epoch": 0.8849709158136808, - "grad_norm": 1.6421164137011879, - "learning_rate": 1.37111374930879e-07, - "loss": 0.9119, - "step": 9813 - }, - { - "epoch": 0.885061099337151, - "grad_norm": 1.3458275813559488, - "learning_rate": 1.3689886956805176e-07, - "loss": 0.9034, - "step": 9814 - }, - { - "epoch": 0.8851512828606214, - "grad_norm": 1.4251758701147892, - "learning_rate": 1.3668652317528585e-07, - "loss": 0.9594, - "step": 9815 - }, - { - "epoch": 0.8852414663840916, - "grad_norm": 1.443869205194073, - "learning_rate": 1.3647433577070012e-07, - "loss": 0.9801, - "step": 9816 - }, - { - "epoch": 0.8853316499075619, - "grad_norm": 0.71501545329167, - "learning_rate": 1.3626230737239942e-07, - "loss": 0.8307, - "step": 9817 - }, - { - "epoch": 0.8854218334310322, - "grad_norm": 0.7445554603250912, - "learning_rate": 1.3605043799847527e-07, - "loss": 0.8423, - "step": 9818 - }, - { - "epoch": 0.8855120169545024, - "grad_norm": 1.2489470338456092, - "learning_rate": 1.3583872766700567e-07, - "loss": 0.9032, - "step": 9819 - }, - { - "epoch": 0.8856022004779727, - "grad_norm": 1.5321094669993456, - "learning_rate": 1.3562717639605437e-07, - "loss": 0.9556, - "step": 9820 - }, - { - "epoch": 0.8856923840014429, - "grad_norm": 1.517104380537652, - "learning_rate": 1.3541578420367229e-07, - "loss": 0.9413, - "step": 9821 - }, - { - "epoch": 0.8857825675249132, - "grad_norm": 1.5699063815007046, - "learning_rate": 1.3520455110789697e-07, - "loss": 0.9865, - "step": 9822 - }, - { - "epoch": 0.8858727510483835, - "grad_norm": 1.4351342678722745, - "learning_rate": 1.3499347712675158e-07, - "loss": 0.9472, - "step": 9823 - }, - { - "epoch": 0.8859629345718537, - "grad_norm": 2.3492491232681707, - "learning_rate": 1.3478256227824635e-07, - "loss": 0.941, - "step": 9824 - }, - { - "epoch": 0.8860531180953239, - "grad_norm": 1.5979169339109724, - "learning_rate": 1.3457180658037759e-07, - "loss": 0.8863, - "step": 9825 - }, - { - "epoch": 0.8861433016187943, - "grad_norm": 1.3030502645527489, - "learning_rate": 1.3436121005112843e-07, - "loss": 0.9978, - "step": 9826 - }, - { - "epoch": 0.8862334851422645, - "grad_norm": 1.3495790779036472, - "learning_rate": 1.3415077270846719e-07, - "loss": 0.9441, - "step": 9827 - }, - { - "epoch": 0.8863236686657348, - "grad_norm": 1.8888206383897224, - "learning_rate": 1.3394049457035105e-07, - "loss": 1.0762, - "step": 9828 - }, - { - "epoch": 0.886413852189205, - "grad_norm": 1.4186228354848693, - "learning_rate": 1.3373037565472034e-07, - "loss": 0.9797, - "step": 9829 - }, - { - "epoch": 0.8865040357126753, - "grad_norm": 1.7633150907893682, - "learning_rate": 1.3352041597950537e-07, - "loss": 0.9471, - "step": 9830 - }, - { - "epoch": 0.8865942192361456, - "grad_norm": 1.4002070393059942, - "learning_rate": 1.333106155626196e-07, - "loss": 0.961, - "step": 9831 - }, - { - "epoch": 0.8866844027596158, - "grad_norm": 2.6938674370110487, - "learning_rate": 1.331009744219651e-07, - "loss": 0.9118, - "step": 9832 - }, - { - "epoch": 0.886774586283086, - "grad_norm": 0.7014662568275805, - "learning_rate": 1.3289149257542943e-07, - "loss": 0.8703, - "step": 9833 - }, - { - "epoch": 0.8868647698065564, - "grad_norm": 1.4827429382279547, - "learning_rate": 1.3268217004088666e-07, - "loss": 0.8727, - "step": 9834 - }, - { - "epoch": 0.8869549533300266, - "grad_norm": 1.4614704812220878, - "learning_rate": 1.3247300683619788e-07, - "loss": 0.8615, - "step": 9835 - }, - { - "epoch": 0.8870451368534968, - "grad_norm": 2.049516444046972, - "learning_rate": 1.3226400297920903e-07, - "loss": 0.946, - "step": 9836 - }, - { - "epoch": 0.8871353203769671, - "grad_norm": 1.5167222917577698, - "learning_rate": 1.3205515848775428e-07, - "loss": 0.9484, - "step": 9837 - }, - { - "epoch": 0.8872255039004374, - "grad_norm": 1.7651735251824754, - "learning_rate": 1.3184647337965316e-07, - "loss": 0.9068, - "step": 9838 - }, - { - "epoch": 0.8873156874239077, - "grad_norm": 1.2466177959518947, - "learning_rate": 1.3163794767271163e-07, - "loss": 0.9183, - "step": 9839 - }, - { - "epoch": 0.8874058709473779, - "grad_norm": 1.334508522017915, - "learning_rate": 1.314295813847226e-07, - "loss": 0.9369, - "step": 9840 - }, - { - "epoch": 0.8874960544708482, - "grad_norm": 2.0015411399636633, - "learning_rate": 1.3122137453346515e-07, - "loss": 1.071, - "step": 9841 - }, - { - "epoch": 0.8875862379943185, - "grad_norm": 1.5910361877056691, - "learning_rate": 1.3101332713670376e-07, - "loss": 1.0236, - "step": 9842 - }, - { - "epoch": 0.8876764215177887, - "grad_norm": 0.6535336954248643, - "learning_rate": 1.3080543921219133e-07, - "loss": 0.8574, - "step": 9843 - }, - { - "epoch": 0.8877666050412589, - "grad_norm": 1.5931427490634456, - "learning_rate": 1.3059771077766478e-07, - "loss": 0.9683, - "step": 9844 - }, - { - "epoch": 0.8878567885647293, - "grad_norm": 1.7140495529311761, - "learning_rate": 1.3039014185085018e-07, - "loss": 1.0113, - "step": 9845 - }, - { - "epoch": 0.8879469720881995, - "grad_norm": 1.5418807044385667, - "learning_rate": 1.301827324494571e-07, - "loss": 0.9799, - "step": 9846 - }, - { - "epoch": 0.8880371556116697, - "grad_norm": 1.7583191544140377, - "learning_rate": 1.2997548259118342e-07, - "loss": 1.0029, - "step": 9847 - }, - { - "epoch": 0.88812733913514, - "grad_norm": 1.6412997356935433, - "learning_rate": 1.2976839229371272e-07, - "loss": 0.9965, - "step": 9848 - }, - { - "epoch": 0.8882175226586103, - "grad_norm": 0.6220715102059947, - "learning_rate": 1.2956146157471515e-07, - "loss": 0.8504, - "step": 9849 - }, - { - "epoch": 0.8883077061820805, - "grad_norm": 1.7245672788914956, - "learning_rate": 1.2935469045184745e-07, - "loss": 0.9895, - "step": 9850 - }, - { - "epoch": 0.8883978897055508, - "grad_norm": 1.5716221600349634, - "learning_rate": 1.291480789427517e-07, - "loss": 0.9441, - "step": 9851 - }, - { - "epoch": 0.888488073229021, - "grad_norm": 1.7668182931766117, - "learning_rate": 1.2894162706505807e-07, - "loss": 1.0086, - "step": 9852 - }, - { - "epoch": 0.8885782567524914, - "grad_norm": 1.3710511899249394, - "learning_rate": 1.2873533483638155e-07, - "loss": 0.9654, - "step": 9853 - }, - { - "epoch": 0.8886684402759616, - "grad_norm": 1.2948537034722072, - "learning_rate": 1.285292022743243e-07, - "loss": 0.9146, - "step": 9854 - }, - { - "epoch": 0.8887586237994318, - "grad_norm": 1.5724865491622313, - "learning_rate": 1.2832322939647467e-07, - "loss": 0.9187, - "step": 9855 - }, - { - "epoch": 0.8888488073229021, - "grad_norm": 1.5565504098210918, - "learning_rate": 1.281174162204075e-07, - "loss": 0.9553, - "step": 9856 - }, - { - "epoch": 0.8889389908463724, - "grad_norm": 1.3709658484292029, - "learning_rate": 1.2791176276368366e-07, - "loss": 0.8456, - "step": 9857 - }, - { - "epoch": 0.8890291743698426, - "grad_norm": 1.5995402152326412, - "learning_rate": 1.2770626904385128e-07, - "loss": 0.9817, - "step": 9858 - }, - { - "epoch": 0.8891193578933129, - "grad_norm": 1.3971198923386712, - "learning_rate": 1.2750093507844306e-07, - "loss": 0.8315, - "step": 9859 - }, - { - "epoch": 0.8892095414167831, - "grad_norm": 1.2500553118865896, - "learning_rate": 1.272957608849805e-07, - "loss": 1.0098, - "step": 9860 - }, - { - "epoch": 0.8892997249402534, - "grad_norm": 1.3216361008068493, - "learning_rate": 1.270907464809694e-07, - "loss": 0.947, - "step": 9861 - }, - { - "epoch": 0.8893899084637237, - "grad_norm": 1.24544702088134, - "learning_rate": 1.2688589188390285e-07, - "loss": 0.9691, - "step": 9862 - }, - { - "epoch": 0.8894800919871939, - "grad_norm": 1.7587621539576423, - "learning_rate": 1.2668119711126023e-07, - "loss": 0.9805, - "step": 9863 - }, - { - "epoch": 0.8895702755106643, - "grad_norm": 1.338340516567945, - "learning_rate": 1.2647666218050735e-07, - "loss": 0.9643, - "step": 9864 - }, - { - "epoch": 0.8896604590341345, - "grad_norm": 1.5539405600325837, - "learning_rate": 1.2627228710909643e-07, - "loss": 0.9313, - "step": 9865 - }, - { - "epoch": 0.8897506425576047, - "grad_norm": 1.3191393544126293, - "learning_rate": 1.260680719144649e-07, - "loss": 0.9445, - "step": 9866 - }, - { - "epoch": 0.889840826081075, - "grad_norm": 2.007551480555983, - "learning_rate": 1.2586401661403877e-07, - "loss": 0.8953, - "step": 9867 - }, - { - "epoch": 0.8899310096045453, - "grad_norm": 1.5530963979501349, - "learning_rate": 1.2566012122522817e-07, - "loss": 0.9421, - "step": 9868 - }, - { - "epoch": 0.8900211931280155, - "grad_norm": 1.8603181770913488, - "learning_rate": 1.254563857654316e-07, - "loss": 0.9492, - "step": 9869 - }, - { - "epoch": 0.8901113766514858, - "grad_norm": 1.7164445153104728, - "learning_rate": 1.2525281025203205e-07, - "loss": 0.8603, - "step": 9870 - }, - { - "epoch": 0.890201560174956, - "grad_norm": 2.4764103887878295, - "learning_rate": 1.2504939470240006e-07, - "loss": 0.9487, - "step": 9871 - }, - { - "epoch": 0.8902917436984263, - "grad_norm": 1.4919308746319904, - "learning_rate": 1.2484613913389196e-07, - "loss": 1.0352, - "step": 9872 - }, - { - "epoch": 0.8903819272218966, - "grad_norm": 1.5947558650859617, - "learning_rate": 1.2464304356385057e-07, - "loss": 0.8823, - "step": 9873 - }, - { - "epoch": 0.8904721107453668, - "grad_norm": 3.9872379563666525, - "learning_rate": 1.2444010800960558e-07, - "loss": 1.0342, - "step": 9874 - }, - { - "epoch": 0.890562294268837, - "grad_norm": 1.3143644260447345, - "learning_rate": 1.2423733248847267e-07, - "loss": 0.9493, - "step": 9875 - }, - { - "epoch": 0.8906524777923074, - "grad_norm": 1.426563121716133, - "learning_rate": 1.2403471701775293e-07, - "loss": 0.9081, - "step": 9876 - }, - { - "epoch": 0.8907426613157776, - "grad_norm": 1.5103109739308902, - "learning_rate": 1.2383226161473515e-07, - "loss": 1.0045, - "step": 9877 - }, - { - "epoch": 0.8908328448392479, - "grad_norm": 1.7839569804688262, - "learning_rate": 1.2362996629669376e-07, - "loss": 1.0464, - "step": 9878 - }, - { - "epoch": 0.8909230283627181, - "grad_norm": 1.5576486422911364, - "learning_rate": 1.2342783108089007e-07, - "loss": 0.9778, - "step": 9879 - }, - { - "epoch": 0.8910132118861884, - "grad_norm": 1.5807652812353676, - "learning_rate": 1.2322585598457135e-07, - "loss": 1.0244, - "step": 9880 - }, - { - "epoch": 0.8911033954096587, - "grad_norm": 1.1587413547197478, - "learning_rate": 1.2302404102497034e-07, - "loss": 0.9525, - "step": 9881 - }, - { - "epoch": 0.8911935789331289, - "grad_norm": 1.402272937230007, - "learning_rate": 1.228223862193083e-07, - "loss": 0.9004, - "step": 9882 - }, - { - "epoch": 0.8912837624565991, - "grad_norm": 7.323805717520658, - "learning_rate": 1.2262089158479038e-07, - "loss": 0.9755, - "step": 9883 - }, - { - "epoch": 0.8913739459800695, - "grad_norm": 1.627973696657869, - "learning_rate": 1.2241955713861042e-07, - "loss": 1.0064, - "step": 9884 - }, - { - "epoch": 0.8914641295035397, - "grad_norm": 1.4128339288339864, - "learning_rate": 1.222183828979464e-07, - "loss": 0.9105, - "step": 9885 - }, - { - "epoch": 0.8915543130270099, - "grad_norm": 1.411716348448644, - "learning_rate": 1.2201736887996372e-07, - "loss": 0.9637, - "step": 9886 - }, - { - "epoch": 0.8916444965504803, - "grad_norm": 1.8260600499477049, - "learning_rate": 1.2181651510181444e-07, - "loss": 1.0408, - "step": 9887 - }, - { - "epoch": 0.8917346800739505, - "grad_norm": 1.441238683042404, - "learning_rate": 1.2161582158063622e-07, - "loss": 0.9953, - "step": 9888 - }, - { - "epoch": 0.8918248635974207, - "grad_norm": 7.430322597888426, - "learning_rate": 1.214152883335533e-07, - "loss": 0.8808, - "step": 9889 - }, - { - "epoch": 0.891915047120891, - "grad_norm": 1.577178822511097, - "learning_rate": 1.2121491537767648e-07, - "loss": 0.9164, - "step": 9890 - }, - { - "epoch": 0.8920052306443613, - "grad_norm": 2.02254581319368, - "learning_rate": 1.2101470273010294e-07, - "loss": 1.0018, - "step": 9891 - }, - { - "epoch": 0.8920954141678316, - "grad_norm": 1.8487606175693594, - "learning_rate": 1.2081465040791528e-07, - "loss": 0.9482, - "step": 9892 - }, - { - "epoch": 0.8921855976913018, - "grad_norm": 1.7069579224364337, - "learning_rate": 1.2061475842818335e-07, - "loss": 0.9343, - "step": 9893 - }, - { - "epoch": 0.892275781214772, - "grad_norm": 1.415742834715349, - "learning_rate": 1.2041502680796313e-07, - "loss": 0.9382, - "step": 9894 - }, - { - "epoch": 0.8923659647382424, - "grad_norm": 1.3286908907585746, - "learning_rate": 1.2021545556429648e-07, - "loss": 0.9757, - "step": 9895 - }, - { - "epoch": 0.8924561482617126, - "grad_norm": 1.189001940514703, - "learning_rate": 1.2001604471421245e-07, - "loss": 0.9664, - "step": 9896 - }, - { - "epoch": 0.8925463317851828, - "grad_norm": 1.541695216490181, - "learning_rate": 1.1981679427472567e-07, - "loss": 1.0203, - "step": 9897 - }, - { - "epoch": 0.8926365153086531, - "grad_norm": 1.551627900895548, - "learning_rate": 1.196177042628368e-07, - "loss": 1.019, - "step": 9898 - }, - { - "epoch": 0.8927266988321234, - "grad_norm": 1.8500267129979913, - "learning_rate": 1.194187746955344e-07, - "loss": 0.9745, - "step": 9899 - }, - { - "epoch": 0.8928168823555936, - "grad_norm": 1.419062253224936, - "learning_rate": 1.1922000558979094e-07, - "loss": 0.9425, - "step": 9900 - }, - { - "epoch": 0.8929070658790639, - "grad_norm": 1.447336460289959, - "learning_rate": 1.1902139696256752e-07, - "loss": 1.0031, - "step": 9901 - }, - { - "epoch": 0.8929972494025341, - "grad_norm": 1.4667755003487963, - "learning_rate": 1.188229488308099e-07, - "loss": 0.9024, - "step": 9902 - }, - { - "epoch": 0.8930874329260045, - "grad_norm": 0.5582868359785669, - "learning_rate": 1.1862466121145098e-07, - "loss": 0.8059, - "step": 9903 - }, - { - "epoch": 0.8931776164494747, - "grad_norm": 1.911416790447549, - "learning_rate": 1.184265341214099e-07, - "loss": 0.9071, - "step": 9904 - }, - { - "epoch": 0.8932677999729449, - "grad_norm": 1.3908347925247453, - "learning_rate": 1.182285675775918e-07, - "loss": 0.9923, - "step": 9905 - }, - { - "epoch": 0.8933579834964152, - "grad_norm": 2.17925167853967, - "learning_rate": 1.1803076159688851e-07, - "loss": 0.9516, - "step": 9906 - }, - { - "epoch": 0.8934481670198855, - "grad_norm": 1.4210463404283822, - "learning_rate": 1.1783311619617741e-07, - "loss": 1.0011, - "step": 9907 - }, - { - "epoch": 0.8935383505433557, - "grad_norm": 1.5024846479548761, - "learning_rate": 1.1763563139232302e-07, - "loss": 0.9362, - "step": 9908 - }, - { - "epoch": 0.893628534066826, - "grad_norm": 1.6672729184292499, - "learning_rate": 1.1743830720217562e-07, - "loss": 0.9062, - "step": 9909 - }, - { - "epoch": 0.8937187175902963, - "grad_norm": 2.0900635790850766, - "learning_rate": 1.1724114364257243e-07, - "loss": 0.947, - "step": 9910 - }, - { - "epoch": 0.8938089011137665, - "grad_norm": 1.4709249584991637, - "learning_rate": 1.1704414073033619e-07, - "loss": 0.9727, - "step": 9911 - }, - { - "epoch": 0.8938990846372368, - "grad_norm": 1.4979546430263124, - "learning_rate": 1.1684729848227636e-07, - "loss": 1.0127, - "step": 9912 - }, - { - "epoch": 0.893989268160707, - "grad_norm": 1.5335522685414118, - "learning_rate": 1.1665061691518884e-07, - "loss": 1.0218, - "step": 9913 - }, - { - "epoch": 0.8940794516841774, - "grad_norm": 1.5264935480825481, - "learning_rate": 1.1645409604585532e-07, - "loss": 0.9677, - "step": 9914 - }, - { - "epoch": 0.8941696352076476, - "grad_norm": 1.69664076001537, - "learning_rate": 1.162577358910437e-07, - "loss": 0.9773, - "step": 9915 - }, - { - "epoch": 0.8942598187311178, - "grad_norm": 1.5206538664950684, - "learning_rate": 1.160615364675095e-07, - "loss": 0.9479, - "step": 9916 - }, - { - "epoch": 0.894350002254588, - "grad_norm": 0.6265582924033272, - "learning_rate": 1.1586549779199262e-07, - "loss": 0.8249, - "step": 9917 - }, - { - "epoch": 0.8944401857780584, - "grad_norm": 1.3104579571322361, - "learning_rate": 1.1566961988122037e-07, - "loss": 1.0296, - "step": 9918 - }, - { - "epoch": 0.8945303693015286, - "grad_norm": 1.506672640819577, - "learning_rate": 1.1547390275190627e-07, - "loss": 0.931, - "step": 9919 - }, - { - "epoch": 0.8946205528249989, - "grad_norm": 2.0976710256841105, - "learning_rate": 1.1527834642075007e-07, - "loss": 0.9115, - "step": 9920 - }, - { - "epoch": 0.8947107363484691, - "grad_norm": 2.274333764080059, - "learning_rate": 1.1508295090443797e-07, - "loss": 0.9533, - "step": 9921 - }, - { - "epoch": 0.8948009198719394, - "grad_norm": 3.087709637524886, - "learning_rate": 1.148877162196411e-07, - "loss": 0.9156, - "step": 9922 - }, - { - "epoch": 0.8948911033954097, - "grad_norm": 2.2045707558178362, - "learning_rate": 1.1469264238301924e-07, - "loss": 0.8968, - "step": 9923 - }, - { - "epoch": 0.8949812869188799, - "grad_norm": 1.840135231335306, - "learning_rate": 1.1449772941121638e-07, - "loss": 0.9909, - "step": 9924 - }, - { - "epoch": 0.8950714704423501, - "grad_norm": 1.6578230675046823, - "learning_rate": 1.1430297732086369e-07, - "loss": 0.9353, - "step": 9925 - }, - { - "epoch": 0.8951616539658205, - "grad_norm": 1.3423837325116428, - "learning_rate": 1.1410838612857876e-07, - "loss": 0.962, - "step": 9926 - }, - { - "epoch": 0.8952518374892907, - "grad_norm": 1.8289680167436944, - "learning_rate": 1.1391395585096497e-07, - "loss": 1.0616, - "step": 9927 - }, - { - "epoch": 0.895342021012761, - "grad_norm": 1.484512563655492, - "learning_rate": 1.1371968650461216e-07, - "loss": 0.9058, - "step": 9928 - }, - { - "epoch": 0.8954322045362312, - "grad_norm": 0.9192903784960562, - "learning_rate": 1.1352557810609687e-07, - "loss": 0.8344, - "step": 9929 - }, - { - "epoch": 0.8955223880597015, - "grad_norm": 1.9668772433849688, - "learning_rate": 1.1333163067198048e-07, - "loss": 0.9547, - "step": 9930 - }, - { - "epoch": 0.8956125715831718, - "grad_norm": 1.2818223093736207, - "learning_rate": 1.1313784421881311e-07, - "loss": 1.0315, - "step": 9931 - }, - { - "epoch": 0.895702755106642, - "grad_norm": 1.3741838894272356, - "learning_rate": 1.1294421876312865e-07, - "loss": 0.9978, - "step": 9932 - }, - { - "epoch": 0.8957929386301122, - "grad_norm": 1.846043082117846, - "learning_rate": 1.1275075432144831e-07, - "loss": 0.9392, - "step": 9933 - }, - { - "epoch": 0.8958831221535826, - "grad_norm": 1.8932621746167952, - "learning_rate": 1.1255745091028002e-07, - "loss": 0.9849, - "step": 9934 - }, - { - "epoch": 0.8959733056770528, - "grad_norm": 1.6657849204639588, - "learning_rate": 1.1236430854611723e-07, - "loss": 0.9843, - "step": 9935 - }, - { - "epoch": 0.896063489200523, - "grad_norm": 1.5432380537827954, - "learning_rate": 1.1217132724544032e-07, - "loss": 0.9392, - "step": 9936 - }, - { - "epoch": 0.8961536727239934, - "grad_norm": 1.72095036543579, - "learning_rate": 1.1197850702471434e-07, - "loss": 1.0164, - "step": 9937 - }, - { - "epoch": 0.8962438562474636, - "grad_norm": 1.6783203948935523, - "learning_rate": 1.1178584790039348e-07, - "loss": 1.037, - "step": 9938 - }, - { - "epoch": 0.8963340397709338, - "grad_norm": 1.4220336993177198, - "learning_rate": 1.1159334988891478e-07, - "loss": 0.9176, - "step": 9939 - }, - { - "epoch": 0.8964242232944041, - "grad_norm": 1.9258049582362589, - "learning_rate": 1.1140101300670446e-07, - "loss": 0.9484, - "step": 9940 - }, - { - "epoch": 0.8965144068178744, - "grad_norm": 1.6373964786566695, - "learning_rate": 1.1120883727017338e-07, - "loss": 0.8358, - "step": 9941 - }, - { - "epoch": 0.8966045903413447, - "grad_norm": 1.4842160591561318, - "learning_rate": 1.1101682269571889e-07, - "loss": 1.0176, - "step": 9942 - }, - { - "epoch": 0.8966947738648149, - "grad_norm": 1.595718290333956, - "learning_rate": 1.1082496929972473e-07, - "loss": 0.9515, - "step": 9943 - }, - { - "epoch": 0.8967849573882851, - "grad_norm": 2.449094534953213, - "learning_rate": 1.1063327709856096e-07, - "loss": 0.9357, - "step": 9944 - }, - { - "epoch": 0.8968751409117555, - "grad_norm": 1.807288867021164, - "learning_rate": 1.1044174610858403e-07, - "loss": 0.8562, - "step": 9945 - }, - { - "epoch": 0.8969653244352257, - "grad_norm": 1.6427808856443775, - "learning_rate": 1.1025037634613643e-07, - "loss": 0.9209, - "step": 9946 - }, - { - "epoch": 0.8970555079586959, - "grad_norm": 2.7562740022128893, - "learning_rate": 1.1005916782754643e-07, - "loss": 0.9962, - "step": 9947 - }, - { - "epoch": 0.8971456914821662, - "grad_norm": 2.1161789409786924, - "learning_rate": 1.0986812056912919e-07, - "loss": 0.9483, - "step": 9948 - }, - { - "epoch": 0.8972358750056365, - "grad_norm": 1.5672559455080937, - "learning_rate": 1.0967723458718613e-07, - "loss": 0.8489, - "step": 9949 - }, - { - "epoch": 0.8973260585291067, - "grad_norm": 1.846193085079242, - "learning_rate": 1.0948650989800445e-07, - "loss": 0.9369, - "step": 9950 - }, - { - "epoch": 0.897416242052577, - "grad_norm": 1.3023941533724845, - "learning_rate": 1.0929594651785823e-07, - "loss": 0.949, - "step": 9951 - }, - { - "epoch": 0.8975064255760472, - "grad_norm": 2.8956670553280985, - "learning_rate": 1.0910554446300646e-07, - "loss": 1.0177, - "step": 9952 - }, - { - "epoch": 0.8975966090995176, - "grad_norm": 1.3684157167988968, - "learning_rate": 1.089153037496966e-07, - "loss": 0.8665, - "step": 9953 - }, - { - "epoch": 0.8976867926229878, - "grad_norm": 1.5818928222375728, - "learning_rate": 1.0872522439415966e-07, - "loss": 0.9923, - "step": 9954 - }, - { - "epoch": 0.897776976146458, - "grad_norm": 2.4770800895017318, - "learning_rate": 1.0853530641261554e-07, - "loss": 0.9144, - "step": 9955 - }, - { - "epoch": 0.8978671596699282, - "grad_norm": 1.4788342312760243, - "learning_rate": 1.083455498212682e-07, - "loss": 0.9804, - "step": 9956 - }, - { - "epoch": 0.8979573431933986, - "grad_norm": 1.5143054841979693, - "learning_rate": 1.0815595463630911e-07, - "loss": 1.0094, - "step": 9957 - }, - { - "epoch": 0.8980475267168688, - "grad_norm": 1.4934864082956751, - "learning_rate": 1.0796652087391556e-07, - "loss": 0.8374, - "step": 9958 - }, - { - "epoch": 0.8981377102403391, - "grad_norm": 1.5268653842468656, - "learning_rate": 1.0777724855025083e-07, - "loss": 1.0612, - "step": 9959 - }, - { - "epoch": 0.8982278937638094, - "grad_norm": 1.4423348761711101, - "learning_rate": 1.075881376814649e-07, - "loss": 0.9647, - "step": 9960 - }, - { - "epoch": 0.8983180772872796, - "grad_norm": 1.4300512466051243, - "learning_rate": 1.0739918828369377e-07, - "loss": 1.0225, - "step": 9961 - }, - { - "epoch": 0.8984082608107499, - "grad_norm": 1.7342532003438345, - "learning_rate": 1.0721040037305983e-07, - "loss": 0.9697, - "step": 9962 - }, - { - "epoch": 0.8984984443342201, - "grad_norm": 1.8431251242848765, - "learning_rate": 1.0702177396567114e-07, - "loss": 0.9213, - "step": 9963 - }, - { - "epoch": 0.8985886278576904, - "grad_norm": 0.6840555647441404, - "learning_rate": 1.0683330907762233e-07, - "loss": 0.8165, - "step": 9964 - }, - { - "epoch": 0.8986788113811607, - "grad_norm": 1.2754008597975393, - "learning_rate": 1.0664500572499435e-07, - "loss": 0.9011, - "step": 9965 - }, - { - "epoch": 0.8987689949046309, - "grad_norm": 1.6950057494929653, - "learning_rate": 1.0645686392385455e-07, - "loss": 0.918, - "step": 9966 - }, - { - "epoch": 0.8988591784281011, - "grad_norm": 1.5044576847696485, - "learning_rate": 1.0626888369025588e-07, - "loss": 0.9039, - "step": 9967 - }, - { - "epoch": 0.8989493619515715, - "grad_norm": 2.2325656207138587, - "learning_rate": 1.0608106504023817e-07, - "loss": 0.9999, - "step": 9968 - }, - { - "epoch": 0.8990395454750417, - "grad_norm": 1.2240413340028236, - "learning_rate": 1.0589340798982637e-07, - "loss": 0.9506, - "step": 9969 - }, - { - "epoch": 0.899129728998512, - "grad_norm": 1.565832399195781, - "learning_rate": 1.057059125550337e-07, - "loss": 0.9112, - "step": 9970 - }, - { - "epoch": 0.8992199125219822, - "grad_norm": 1.2771379964226393, - "learning_rate": 1.0551857875185732e-07, - "loss": 0.9743, - "step": 9971 - }, - { - "epoch": 0.8993100960454525, - "grad_norm": 1.1995345206276342, - "learning_rate": 1.0533140659628181e-07, - "loss": 0.8486, - "step": 9972 - }, - { - "epoch": 0.8994002795689228, - "grad_norm": 1.309438358858843, - "learning_rate": 1.0514439610427772e-07, - "loss": 1.0224, - "step": 9973 - }, - { - "epoch": 0.899490463092393, - "grad_norm": 2.0666825627156578, - "learning_rate": 1.0495754729180206e-07, - "loss": 0.9937, - "step": 9974 - }, - { - "epoch": 0.8995806466158632, - "grad_norm": 1.3983495247729079, - "learning_rate": 1.0477086017479741e-07, - "loss": 1.0662, - "step": 9975 - }, - { - "epoch": 0.8996708301393336, - "grad_norm": 1.5691656982910496, - "learning_rate": 1.0458433476919327e-07, - "loss": 0.9222, - "step": 9976 - }, - { - "epoch": 0.8997610136628038, - "grad_norm": 1.4086963990650254, - "learning_rate": 1.0439797109090509e-07, - "loss": 0.9956, - "step": 9977 - }, - { - "epoch": 0.899851197186274, - "grad_norm": 1.6650298982519065, - "learning_rate": 1.0421176915583396e-07, - "loss": 0.9359, - "step": 9978 - }, - { - "epoch": 0.8999413807097443, - "grad_norm": 1.6962821753872135, - "learning_rate": 1.0402572897986828e-07, - "loss": 0.9951, - "step": 9979 - }, - { - "epoch": 0.9000315642332146, - "grad_norm": 0.5970693711163279, - "learning_rate": 1.0383985057888134e-07, - "loss": 0.8295, - "step": 9980 - }, - { - "epoch": 0.9001217477566849, - "grad_norm": 1.6035400834814404, - "learning_rate": 1.036541339687338e-07, - "loss": 0.879, - "step": 9981 - }, - { - "epoch": 0.9002119312801551, - "grad_norm": 0.6671227792263682, - "learning_rate": 1.0346857916527186e-07, - "loss": 0.8108, - "step": 9982 - }, - { - "epoch": 0.9003021148036254, - "grad_norm": 1.5128915264226432, - "learning_rate": 1.0328318618432819e-07, - "loss": 0.9685, - "step": 9983 - }, - { - "epoch": 0.9003922983270957, - "grad_norm": 1.564787144429889, - "learning_rate": 1.0309795504172148e-07, - "loss": 1.0278, - "step": 9984 - }, - { - "epoch": 0.9004824818505659, - "grad_norm": 1.487414490622181, - "learning_rate": 1.0291288575325685e-07, - "loss": 0.944, - "step": 9985 - }, - { - "epoch": 0.9005726653740361, - "grad_norm": 2.068495042654344, - "learning_rate": 1.0272797833472502e-07, - "loss": 0.9226, - "step": 9986 - }, - { - "epoch": 0.9006628488975065, - "grad_norm": 0.6863825994073313, - "learning_rate": 1.0254323280190335e-07, - "loss": 0.8151, - "step": 9987 - }, - { - "epoch": 0.9007530324209767, - "grad_norm": 1.8170123001883187, - "learning_rate": 1.023586491705557e-07, - "loss": 0.9437, - "step": 9988 - }, - { - "epoch": 0.9008432159444469, - "grad_norm": 2.4130496767624146, - "learning_rate": 1.0217422745643145e-07, - "loss": 0.9, - "step": 9989 - }, - { - "epoch": 0.9009333994679172, - "grad_norm": 1.6077176050295563, - "learning_rate": 1.0198996767526691e-07, - "loss": 0.8893, - "step": 9990 - }, - { - "epoch": 0.9010235829913875, - "grad_norm": 1.5389198198358305, - "learning_rate": 1.018058698427835e-07, - "loss": 0.9468, - "step": 9991 - }, - { - "epoch": 0.9011137665148578, - "grad_norm": 1.3072320030346547, - "learning_rate": 1.0162193397469021e-07, - "loss": 1.0086, - "step": 9992 - }, - { - "epoch": 0.901203950038328, - "grad_norm": 1.5391149366711707, - "learning_rate": 1.0143816008668049e-07, - "loss": 0.9024, - "step": 9993 - }, - { - "epoch": 0.9012941335617982, - "grad_norm": 0.6460057587307866, - "learning_rate": 1.0125454819443624e-07, - "loss": 0.8455, - "step": 9994 - }, - { - "epoch": 0.9013843170852686, - "grad_norm": 1.2277957037992697, - "learning_rate": 1.0107109831362315e-07, - "loss": 0.8716, - "step": 9995 - }, - { - "epoch": 0.9014745006087388, - "grad_norm": 1.4578352032004687, - "learning_rate": 1.0088781045989447e-07, - "loss": 0.9739, - "step": 9996 - }, - { - "epoch": 0.901564684132209, - "grad_norm": 1.324673284880474, - "learning_rate": 1.0070468464888926e-07, - "loss": 1.0126, - "step": 9997 - }, - { - "epoch": 0.9016548676556793, - "grad_norm": 1.4846968452482274, - "learning_rate": 1.0052172089623324e-07, - "loss": 0.9826, - "step": 9998 - }, - { - "epoch": 0.9017450511791496, - "grad_norm": 1.9595362911732894, - "learning_rate": 1.0033891921753746e-07, - "loss": 1.0065, - "step": 9999 - }, - { - "epoch": 0.9018352347026198, - "grad_norm": 1.9905847422602698, - "learning_rate": 1.0015627962839968e-07, - "loss": 0.8861, - "step": 10000 - }, - { - "epoch": 0.9019254182260901, - "grad_norm": 1.525313176375921, - "learning_rate": 9.99738021444041e-08, - "loss": 0.9411, - "step": 10001 - }, - { - "epoch": 0.9020156017495603, - "grad_norm": 1.2799312969485694, - "learning_rate": 9.979148678112003e-08, - "loss": 0.8966, - "step": 10002 - }, - { - "epoch": 0.9021057852730306, - "grad_norm": 1.9527071696624279, - "learning_rate": 9.960933355410417e-08, - "loss": 1.0654, - "step": 10003 - }, - { - "epoch": 0.9021959687965009, - "grad_norm": 1.4438816720608403, - "learning_rate": 9.942734247889828e-08, - "loss": 0.8845, - "step": 10004 - }, - { - "epoch": 0.9022861523199711, - "grad_norm": 1.3830885961734745, - "learning_rate": 9.92455135710315e-08, - "loss": 0.9759, - "step": 10005 - }, - { - "epoch": 0.9023763358434415, - "grad_norm": 1.524149332269478, - "learning_rate": 9.906384684601787e-08, - "loss": 0.9087, - "step": 10006 - }, - { - "epoch": 0.9024665193669117, - "grad_norm": 1.3261705715976428, - "learning_rate": 9.8882342319359e-08, - "loss": 1.0113, - "step": 10007 - }, - { - "epoch": 0.9025567028903819, - "grad_norm": 1.2864315581012828, - "learning_rate": 9.870100000654048e-08, - "loss": 0.9138, - "step": 10008 - }, - { - "epoch": 0.9026468864138522, - "grad_norm": 1.175003656422222, - "learning_rate": 9.851981992303704e-08, - "loss": 0.938, - "step": 10009 - }, - { - "epoch": 0.9027370699373225, - "grad_norm": 1.2388892380942482, - "learning_rate": 9.833880208430678e-08, - "loss": 0.9674, - "step": 10010 - }, - { - "epoch": 0.9028272534607927, - "grad_norm": 1.6062532536217655, - "learning_rate": 9.815794650579601e-08, - "loss": 0.9587, - "step": 10011 - }, - { - "epoch": 0.902917436984263, - "grad_norm": 1.436814103066019, - "learning_rate": 9.797725320293548e-08, - "loss": 0.9453, - "step": 10012 - }, - { - "epoch": 0.9030076205077332, - "grad_norm": 2.428045275936903, - "learning_rate": 9.779672219114354e-08, - "loss": 1.0382, - "step": 10013 - }, - { - "epoch": 0.9030978040312035, - "grad_norm": 1.4372813846374997, - "learning_rate": 9.761635348582386e-08, - "loss": 0.9458, - "step": 10014 - }, - { - "epoch": 0.9031879875546738, - "grad_norm": 1.5930788459542358, - "learning_rate": 9.743614710236658e-08, - "loss": 0.9957, - "step": 10015 - }, - { - "epoch": 0.903278171078144, - "grad_norm": 1.4188886028595864, - "learning_rate": 9.725610305614806e-08, - "loss": 0.9261, - "step": 10016 - }, - { - "epoch": 0.9033683546016142, - "grad_norm": 1.462061729949186, - "learning_rate": 9.707622136253002e-08, - "loss": 0.9648, - "step": 10017 - }, - { - "epoch": 0.9034585381250846, - "grad_norm": 1.3250241151450575, - "learning_rate": 9.689650203686173e-08, - "loss": 0.9883, - "step": 10018 - }, - { - "epoch": 0.9035487216485548, - "grad_norm": 1.811293364769848, - "learning_rate": 9.671694509447715e-08, - "loss": 0.9421, - "step": 10019 - }, - { - "epoch": 0.903638905172025, - "grad_norm": 1.4006856822971905, - "learning_rate": 9.653755055069757e-08, - "loss": 0.9697, - "step": 10020 - }, - { - "epoch": 0.9037290886954953, - "grad_norm": 1.2044893430821926, - "learning_rate": 9.635831842082987e-08, - "loss": 0.9379, - "step": 10021 - }, - { - "epoch": 0.9038192722189656, - "grad_norm": 1.5167482284301652, - "learning_rate": 9.617924872016691e-08, - "loss": 0.9753, - "step": 10022 - }, - { - "epoch": 0.9039094557424359, - "grad_norm": 1.657138226413364, - "learning_rate": 9.600034146398806e-08, - "loss": 1.0033, - "step": 10023 - }, - { - "epoch": 0.9039996392659061, - "grad_norm": 0.680174642042971, - "learning_rate": 9.582159666755863e-08, - "loss": 0.8335, - "step": 10024 - }, - { - "epoch": 0.9040898227893763, - "grad_norm": 1.2260148593475046, - "learning_rate": 9.564301434612976e-08, - "loss": 0.9357, - "step": 10025 - }, - { - "epoch": 0.9041800063128467, - "grad_norm": 1.5120618169702036, - "learning_rate": 9.546459451494015e-08, - "loss": 1.0043, - "step": 10026 - }, - { - "epoch": 0.9042701898363169, - "grad_norm": 0.7459004336458013, - "learning_rate": 9.528633718921231e-08, - "loss": 0.8667, - "step": 10027 - }, - { - "epoch": 0.9043603733597871, - "grad_norm": 1.9198738161981612, - "learning_rate": 9.510824238415672e-08, - "loss": 1.0124, - "step": 10028 - }, - { - "epoch": 0.9044505568832575, - "grad_norm": 1.1824727304874973, - "learning_rate": 9.493031011496944e-08, - "loss": 0.9964, - "step": 10029 - }, - { - "epoch": 0.9045407404067277, - "grad_norm": 1.6116709951259132, - "learning_rate": 9.475254039683234e-08, - "loss": 0.8273, - "step": 10030 - }, - { - "epoch": 0.904630923930198, - "grad_norm": 1.3787931228354413, - "learning_rate": 9.45749332449144e-08, - "loss": 0.9323, - "step": 10031 - }, - { - "epoch": 0.9047211074536682, - "grad_norm": 0.6385403185566232, - "learning_rate": 9.439748867436903e-08, - "loss": 0.8664, - "step": 10032 - }, - { - "epoch": 0.9048112909771385, - "grad_norm": 1.4351638386732886, - "learning_rate": 9.42202067003377e-08, - "loss": 0.927, - "step": 10033 - }, - { - "epoch": 0.9049014745006088, - "grad_norm": 1.5928856993124831, - "learning_rate": 9.404308733794652e-08, - "loss": 1.1016, - "step": 10034 - }, - { - "epoch": 0.904991658024079, - "grad_norm": 1.3630421163694866, - "learning_rate": 9.38661306023083e-08, - "loss": 0.9851, - "step": 10035 - }, - { - "epoch": 0.9050818415475492, - "grad_norm": 3.2929602065150885, - "learning_rate": 9.368933650852229e-08, - "loss": 0.9583, - "step": 10036 - }, - { - "epoch": 0.9051720250710196, - "grad_norm": 0.7611414751514795, - "learning_rate": 9.351270507167352e-08, - "loss": 0.8763, - "step": 10037 - }, - { - "epoch": 0.9052622085944898, - "grad_norm": 1.5502512143737337, - "learning_rate": 9.333623630683285e-08, - "loss": 0.8994, - "step": 10038 - }, - { - "epoch": 0.90535239211796, - "grad_norm": 1.6703484955094867, - "learning_rate": 9.315993022905799e-08, - "loss": 0.8209, - "step": 10039 - }, - { - "epoch": 0.9054425756414303, - "grad_norm": 1.5705449326095255, - "learning_rate": 9.298378685339158e-08, - "loss": 0.9422, - "step": 10040 - }, - { - "epoch": 0.9055327591649006, - "grad_norm": 1.6503162254402688, - "learning_rate": 9.280780619486406e-08, - "loss": 1.0317, - "step": 10041 - }, - { - "epoch": 0.9056229426883708, - "grad_norm": 1.3601922834028557, - "learning_rate": 9.26319882684905e-08, - "loss": 1.046, - "step": 10042 - }, - { - "epoch": 0.9057131262118411, - "grad_norm": 1.2712756570123023, - "learning_rate": 9.245633308927293e-08, - "loss": 0.9396, - "step": 10043 - }, - { - "epoch": 0.9058033097353113, - "grad_norm": 1.756110558455266, - "learning_rate": 9.228084067219888e-08, - "loss": 0.9513, - "step": 10044 - }, - { - "epoch": 0.9058934932587817, - "grad_norm": 1.5538652248730267, - "learning_rate": 9.210551103224284e-08, - "loss": 0.9116, - "step": 10045 - }, - { - "epoch": 0.9059836767822519, - "grad_norm": 1.306699286770584, - "learning_rate": 9.193034418436463e-08, - "loss": 1.0152, - "step": 10046 - }, - { - "epoch": 0.9060738603057221, - "grad_norm": 0.6497928518553755, - "learning_rate": 9.175534014351005e-08, - "loss": 0.8634, - "step": 10047 - }, - { - "epoch": 0.9061640438291924, - "grad_norm": 2.8494417011798734, - "learning_rate": 9.158049892461228e-08, - "loss": 0.9065, - "step": 10048 - }, - { - "epoch": 0.9062542273526627, - "grad_norm": 1.3433481547072037, - "learning_rate": 9.140582054258871e-08, - "loss": 0.9072, - "step": 10049 - }, - { - "epoch": 0.9063444108761329, - "grad_norm": 1.4541002507969234, - "learning_rate": 9.123130501234499e-08, - "loss": 0.971, - "step": 10050 - }, - { - "epoch": 0.9064345943996032, - "grad_norm": 1.6488529784555588, - "learning_rate": 9.105695234877098e-08, - "loss": 1.019, - "step": 10051 - }, - { - "epoch": 0.9065247779230734, - "grad_norm": 1.3049503449972053, - "learning_rate": 9.088276256674344e-08, - "loss": 0.9851, - "step": 10052 - }, - { - "epoch": 0.9066149614465437, - "grad_norm": 1.2794575597589888, - "learning_rate": 9.070873568112536e-08, - "loss": 0.9423, - "step": 10053 - }, - { - "epoch": 0.906705144970014, - "grad_norm": 1.5097473854058725, - "learning_rate": 9.053487170676577e-08, - "loss": 1.0185, - "step": 10054 - }, - { - "epoch": 0.9067953284934842, - "grad_norm": 1.5280685071117583, - "learning_rate": 9.036117065849968e-08, - "loss": 1.0233, - "step": 10055 - }, - { - "epoch": 0.9068855120169546, - "grad_norm": 2.5744728808075785, - "learning_rate": 9.018763255114837e-08, - "loss": 0.9446, - "step": 10056 - }, - { - "epoch": 0.9069756955404248, - "grad_norm": 1.4342700496001628, - "learning_rate": 9.00142573995184e-08, - "loss": 0.9309, - "step": 10057 - }, - { - "epoch": 0.907065879063895, - "grad_norm": 1.3069007511967765, - "learning_rate": 8.984104521840375e-08, - "loss": 1.0456, - "step": 10058 - }, - { - "epoch": 0.9071560625873653, - "grad_norm": 1.4741992910745438, - "learning_rate": 8.966799602258346e-08, - "loss": 0.9921, - "step": 10059 - }, - { - "epoch": 0.9072462461108356, - "grad_norm": 1.3420431696746289, - "learning_rate": 8.949510982682329e-08, - "loss": 1.0078, - "step": 10060 - }, - { - "epoch": 0.9073364296343058, - "grad_norm": 1.6860153590370444, - "learning_rate": 8.932238664587499e-08, - "loss": 1.0369, - "step": 10061 - }, - { - "epoch": 0.9074266131577761, - "grad_norm": 1.971722009590974, - "learning_rate": 8.914982649447567e-08, - "loss": 1.0535, - "step": 10062 - }, - { - "epoch": 0.9075167966812463, - "grad_norm": 1.2901588632659484, - "learning_rate": 8.897742938734975e-08, - "loss": 1.0319, - "step": 10063 - }, - { - "epoch": 0.9076069802047166, - "grad_norm": 1.460283836966713, - "learning_rate": 8.880519533920661e-08, - "loss": 0.9932, - "step": 10064 - }, - { - "epoch": 0.9076971637281869, - "grad_norm": 1.4376919659685867, - "learning_rate": 8.863312436474268e-08, - "loss": 0.9624, - "step": 10065 - }, - { - "epoch": 0.9077873472516571, - "grad_norm": 1.9641707919302298, - "learning_rate": 8.846121647863936e-08, - "loss": 0.9771, - "step": 10066 - }, - { - "epoch": 0.9078775307751273, - "grad_norm": 1.2601497094208702, - "learning_rate": 8.828947169556555e-08, - "loss": 0.967, - "step": 10067 - }, - { - "epoch": 0.9079677142985977, - "grad_norm": 1.4220668118390163, - "learning_rate": 8.81178900301749e-08, - "loss": 0.9271, - "step": 10068 - }, - { - "epoch": 0.9080578978220679, - "grad_norm": 1.2829638857616292, - "learning_rate": 8.794647149710787e-08, - "loss": 0.8915, - "step": 10069 - }, - { - "epoch": 0.9081480813455381, - "grad_norm": 1.6389254531804562, - "learning_rate": 8.777521611099081e-08, - "loss": 0.9612, - "step": 10070 - }, - { - "epoch": 0.9082382648690084, - "grad_norm": 1.4629335689989744, - "learning_rate": 8.760412388643624e-08, - "loss": 0.9075, - "step": 10071 - }, - { - "epoch": 0.9083284483924787, - "grad_norm": 1.4821742558395292, - "learning_rate": 8.74331948380429e-08, - "loss": 0.9705, - "step": 10072 - }, - { - "epoch": 0.908418631915949, - "grad_norm": 1.607909192439166, - "learning_rate": 8.726242898039516e-08, - "loss": 0.9647, - "step": 10073 - }, - { - "epoch": 0.9085088154394192, - "grad_norm": 1.5430367370306424, - "learning_rate": 8.709182632806334e-08, - "loss": 0.9701, - "step": 10074 - }, - { - "epoch": 0.9085989989628894, - "grad_norm": 1.4737511568978956, - "learning_rate": 8.692138689560469e-08, - "loss": 0.9748, - "step": 10075 - }, - { - "epoch": 0.9086891824863598, - "grad_norm": 1.4083322156341151, - "learning_rate": 8.675111069756203e-08, - "loss": 0.9509, - "step": 10076 - }, - { - "epoch": 0.90877936600983, - "grad_norm": 1.4559546371666463, - "learning_rate": 8.658099774846395e-08, - "loss": 0.9325, - "step": 10077 - }, - { - "epoch": 0.9088695495333002, - "grad_norm": 1.2367970811107436, - "learning_rate": 8.641104806282595e-08, - "loss": 0.9091, - "step": 10078 - }, - { - "epoch": 0.9089597330567706, - "grad_norm": 1.5522856441809836, - "learning_rate": 8.624126165514845e-08, - "loss": 0.9804, - "step": 10079 - }, - { - "epoch": 0.9090499165802408, - "grad_norm": 1.5979216338888234, - "learning_rate": 8.607163853991917e-08, - "loss": 0.9287, - "step": 10080 - }, - { - "epoch": 0.909140100103711, - "grad_norm": 1.3840739644054458, - "learning_rate": 8.590217873161054e-08, - "loss": 0.8113, - "step": 10081 - }, - { - "epoch": 0.9092302836271813, - "grad_norm": 1.4476871262791842, - "learning_rate": 8.573288224468255e-08, - "loss": 0.9828, - "step": 10082 - }, - { - "epoch": 0.9093204671506516, - "grad_norm": 2.4450991601475187, - "learning_rate": 8.556374909358011e-08, - "loss": 0.8861, - "step": 10083 - }, - { - "epoch": 0.9094106506741219, - "grad_norm": 1.79552362460574, - "learning_rate": 8.539477929273476e-08, - "loss": 0.942, - "step": 10084 - }, - { - "epoch": 0.9095008341975921, - "grad_norm": 1.278536318603655, - "learning_rate": 8.522597285656386e-08, - "loss": 0.9983, - "step": 10085 - }, - { - "epoch": 0.9095910177210623, - "grad_norm": 0.6878913069228302, - "learning_rate": 8.505732979947078e-08, - "loss": 0.8478, - "step": 10086 - }, - { - "epoch": 0.9096812012445327, - "grad_norm": 1.1710692623904335, - "learning_rate": 8.488885013584557e-08, - "loss": 0.9838, - "step": 10087 - }, - { - "epoch": 0.9097713847680029, - "grad_norm": 1.5567652940784515, - "learning_rate": 8.472053388006295e-08, - "loss": 0.9085, - "step": 10088 - }, - { - "epoch": 0.9098615682914731, - "grad_norm": 1.441223711897284, - "learning_rate": 8.455238104648565e-08, - "loss": 1.0493, - "step": 10089 - }, - { - "epoch": 0.9099517518149434, - "grad_norm": 1.4010843131040143, - "learning_rate": 8.438439164946043e-08, - "loss": 0.8398, - "step": 10090 - }, - { - "epoch": 0.9100419353384137, - "grad_norm": 1.4999103519353327, - "learning_rate": 8.42165657033218e-08, - "loss": 0.9814, - "step": 10091 - }, - { - "epoch": 0.9101321188618839, - "grad_norm": 1.4759653488526734, - "learning_rate": 8.4048903222389e-08, - "loss": 1.0052, - "step": 10092 - }, - { - "epoch": 0.9102223023853542, - "grad_norm": 1.3513164406532787, - "learning_rate": 8.388140422096856e-08, - "loss": 1.0148, - "step": 10093 - }, - { - "epoch": 0.9103124859088244, - "grad_norm": 1.5997116931800546, - "learning_rate": 8.371406871335173e-08, - "loss": 0.9674, - "step": 10094 - }, - { - "epoch": 0.9104026694322948, - "grad_norm": 1.464838297515894, - "learning_rate": 8.354689671381732e-08, - "loss": 0.9576, - "step": 10095 - }, - { - "epoch": 0.910492852955765, - "grad_norm": 1.5892071773417864, - "learning_rate": 8.337988823662834e-08, - "loss": 0.9585, - "step": 10096 - }, - { - "epoch": 0.9105830364792352, - "grad_norm": 1.5793050753984168, - "learning_rate": 8.321304329603607e-08, - "loss": 0.9202, - "step": 10097 - }, - { - "epoch": 0.9106732200027055, - "grad_norm": 0.6327119441259621, - "learning_rate": 8.304636190627557e-08, - "loss": 0.8528, - "step": 10098 - }, - { - "epoch": 0.9107634035261758, - "grad_norm": 1.7029564450350818, - "learning_rate": 8.287984408156945e-08, - "loss": 0.9797, - "step": 10099 - }, - { - "epoch": 0.910853587049646, - "grad_norm": 1.5330182200902447, - "learning_rate": 8.271348983612591e-08, - "loss": 0.864, - "step": 10100 - }, - { - "epoch": 0.9109437705731163, - "grad_norm": 1.23562944626575, - "learning_rate": 8.254729918413938e-08, - "loss": 0.9238, - "step": 10101 - }, - { - "epoch": 0.9110339540965866, - "grad_norm": 1.8999136378085666, - "learning_rate": 8.238127213979006e-08, - "loss": 0.9348, - "step": 10102 - }, - { - "epoch": 0.9111241376200568, - "grad_norm": 1.6922253041786544, - "learning_rate": 8.221540871724398e-08, - "loss": 1.0305, - "step": 10103 - }, - { - "epoch": 0.9112143211435271, - "grad_norm": 1.6134759448338531, - "learning_rate": 8.2049708930654e-08, - "loss": 0.8502, - "step": 10104 - }, - { - "epoch": 0.9113045046669973, - "grad_norm": 1.5294847239055382, - "learning_rate": 8.188417279415793e-08, - "loss": 0.9366, - "step": 10105 - }, - { - "epoch": 0.9113946881904677, - "grad_norm": 2.214559762230571, - "learning_rate": 8.171880032188117e-08, - "loss": 1.0676, - "step": 10106 - }, - { - "epoch": 0.9114848717139379, - "grad_norm": 1.544342897051395, - "learning_rate": 8.155359152793351e-08, - "loss": 0.9451, - "step": 10107 - }, - { - "epoch": 0.9115750552374081, - "grad_norm": 1.4801220651498748, - "learning_rate": 8.138854642641147e-08, - "loss": 0.9534, - "step": 10108 - }, - { - "epoch": 0.9116652387608783, - "grad_norm": 1.528957363360005, - "learning_rate": 8.122366503139777e-08, - "loss": 0.8901, - "step": 10109 - }, - { - "epoch": 0.9117554222843487, - "grad_norm": 1.317324400243767, - "learning_rate": 8.105894735696117e-08, - "loss": 0.9071, - "step": 10110 - }, - { - "epoch": 0.9118456058078189, - "grad_norm": 1.9146340898573344, - "learning_rate": 8.089439341715576e-08, - "loss": 0.9411, - "step": 10111 - }, - { - "epoch": 0.9119357893312892, - "grad_norm": 1.8411800771619504, - "learning_rate": 8.073000322602319e-08, - "loss": 0.9961, - "step": 10112 - }, - { - "epoch": 0.9120259728547594, - "grad_norm": 1.4324778025506766, - "learning_rate": 8.056577679758891e-08, - "loss": 0.9825, - "step": 10113 - }, - { - "epoch": 0.9121161563782297, - "grad_norm": 1.734886300266826, - "learning_rate": 8.040171414586638e-08, - "loss": 0.9161, - "step": 10114 - }, - { - "epoch": 0.9122063399017, - "grad_norm": 1.3186411916671836, - "learning_rate": 8.023781528485419e-08, - "loss": 0.9295, - "step": 10115 - }, - { - "epoch": 0.9122965234251702, - "grad_norm": 2.6335523341251315, - "learning_rate": 8.00740802285369e-08, - "loss": 0.9815, - "step": 10116 - }, - { - "epoch": 0.9123867069486404, - "grad_norm": 1.3803145697631933, - "learning_rate": 7.99105089908858e-08, - "loss": 0.837, - "step": 10117 - }, - { - "epoch": 0.9124768904721108, - "grad_norm": 1.171008692649678, - "learning_rate": 7.974710158585685e-08, - "loss": 0.9376, - "step": 10118 - }, - { - "epoch": 0.912567073995581, - "grad_norm": 2.0166782909510172, - "learning_rate": 7.958385802739375e-08, - "loss": 0.9955, - "step": 10119 - }, - { - "epoch": 0.9126572575190512, - "grad_norm": 1.77267560683501, - "learning_rate": 7.942077832942452e-08, - "loss": 1.0217, - "step": 10120 - }, - { - "epoch": 0.9127474410425215, - "grad_norm": 0.7504830394355816, - "learning_rate": 7.925786250586508e-08, - "loss": 0.8563, - "step": 10121 - }, - { - "epoch": 0.9128376245659918, - "grad_norm": 0.6695852295738515, - "learning_rate": 7.909511057061524e-08, - "loss": 0.8442, - "step": 10122 - }, - { - "epoch": 0.9129278080894621, - "grad_norm": 1.5643519298576067, - "learning_rate": 7.893252253756234e-08, - "loss": 0.9499, - "step": 10123 - }, - { - "epoch": 0.9130179916129323, - "grad_norm": 1.817423808246813, - "learning_rate": 7.877009842057925e-08, - "loss": 0.8943, - "step": 10124 - }, - { - "epoch": 0.9131081751364026, - "grad_norm": 1.4236813483964208, - "learning_rate": 7.860783823352512e-08, - "loss": 0.994, - "step": 10125 - }, - { - "epoch": 0.9131983586598729, - "grad_norm": 1.5948140949023477, - "learning_rate": 7.844574199024445e-08, - "loss": 0.972, - "step": 10126 - }, - { - "epoch": 0.9132885421833431, - "grad_norm": 1.8556859581951104, - "learning_rate": 7.82838097045686e-08, - "loss": 1.0103, - "step": 10127 - }, - { - "epoch": 0.9133787257068133, - "grad_norm": 1.5126124852495555, - "learning_rate": 7.812204139031454e-08, - "loss": 0.9605, - "step": 10128 - }, - { - "epoch": 0.9134689092302837, - "grad_norm": 2.4383077994639555, - "learning_rate": 7.796043706128474e-08, - "loss": 0.986, - "step": 10129 - }, - { - "epoch": 0.9135590927537539, - "grad_norm": 1.4625481687259076, - "learning_rate": 7.779899673126844e-08, - "loss": 0.9926, - "step": 10130 - }, - { - "epoch": 0.9136492762772241, - "grad_norm": 1.3006759921723259, - "learning_rate": 7.76377204140406e-08, - "loss": 1.0119, - "step": 10131 - }, - { - "epoch": 0.9137394598006944, - "grad_norm": 1.4445461406528266, - "learning_rate": 7.74766081233622e-08, - "loss": 0.9588, - "step": 10132 - }, - { - "epoch": 0.9138296433241647, - "grad_norm": 1.7737517225540755, - "learning_rate": 7.73156598729805e-08, - "loss": 1.0321, - "step": 10133 - }, - { - "epoch": 0.913919826847635, - "grad_norm": 1.5176719135774908, - "learning_rate": 7.715487567662849e-08, - "loss": 1.0102, - "step": 10134 - }, - { - "epoch": 0.9140100103711052, - "grad_norm": 1.494609684339195, - "learning_rate": 7.69942555480243e-08, - "loss": 0.9349, - "step": 10135 - }, - { - "epoch": 0.9141001938945754, - "grad_norm": 1.8318023864035833, - "learning_rate": 7.68337995008741e-08, - "loss": 1.0617, - "step": 10136 - }, - { - "epoch": 0.9141903774180458, - "grad_norm": 1.2538973133564912, - "learning_rate": 7.667350754886803e-08, - "loss": 0.8184, - "step": 10137 - }, - { - "epoch": 0.914280560941516, - "grad_norm": 1.658243886493816, - "learning_rate": 7.651337970568361e-08, - "loss": 0.9569, - "step": 10138 - }, - { - "epoch": 0.9143707444649862, - "grad_norm": 1.4452848689556848, - "learning_rate": 7.635341598498368e-08, - "loss": 0.9286, - "step": 10139 - }, - { - "epoch": 0.9144609279884565, - "grad_norm": 1.399237377355857, - "learning_rate": 7.61936164004171e-08, - "loss": 0.9613, - "step": 10140 - }, - { - "epoch": 0.9145511115119268, - "grad_norm": 1.8398406399257152, - "learning_rate": 7.603398096561875e-08, - "loss": 1.0069, - "step": 10141 - }, - { - "epoch": 0.914641295035397, - "grad_norm": 1.406093461542898, - "learning_rate": 7.587450969420994e-08, - "loss": 0.973, - "step": 10142 - }, - { - "epoch": 0.9147314785588673, - "grad_norm": 0.6993590059744125, - "learning_rate": 7.571520259979757e-08, - "loss": 0.85, - "step": 10143 - }, - { - "epoch": 0.9148216620823375, - "grad_norm": 1.4809704738148979, - "learning_rate": 7.555605969597455e-08, - "loss": 0.8057, - "step": 10144 - }, - { - "epoch": 0.9149118456058078, - "grad_norm": 2.3139798352443526, - "learning_rate": 7.539708099631959e-08, - "loss": 1.054, - "step": 10145 - }, - { - "epoch": 0.9150020291292781, - "grad_norm": 1.386112822202664, - "learning_rate": 7.52382665143978e-08, - "loss": 0.9692, - "step": 10146 - }, - { - "epoch": 0.9150922126527483, - "grad_norm": 1.7161389718813382, - "learning_rate": 7.507961626376014e-08, - "loss": 1.0234, - "step": 10147 - }, - { - "epoch": 0.9151823961762187, - "grad_norm": 1.5151247595100852, - "learning_rate": 7.492113025794378e-08, - "loss": 1.004, - "step": 10148 - }, - { - "epoch": 0.9152725796996889, - "grad_norm": 1.5203314784905086, - "learning_rate": 7.476280851047101e-08, - "loss": 0.9121, - "step": 10149 - }, - { - "epoch": 0.9153627632231591, - "grad_norm": 1.1368450081566972, - "learning_rate": 7.460465103485125e-08, - "loss": 0.9911, - "step": 10150 - }, - { - "epoch": 0.9154529467466294, - "grad_norm": 1.8205307596358804, - "learning_rate": 7.444665784457948e-08, - "loss": 0.9561, - "step": 10151 - }, - { - "epoch": 0.9155431302700997, - "grad_norm": 2.3794932776458557, - "learning_rate": 7.42888289531356e-08, - "loss": 0.9258, - "step": 10152 - }, - { - "epoch": 0.9156333137935699, - "grad_norm": 1.45916036134262, - "learning_rate": 7.41311643739877e-08, - "loss": 0.894, - "step": 10153 - }, - { - "epoch": 0.9157234973170402, - "grad_norm": 1.4190923269397064, - "learning_rate": 7.39736641205877e-08, - "loss": 1.0024, - "step": 10154 - }, - { - "epoch": 0.9158136808405104, - "grad_norm": 1.6183397546490852, - "learning_rate": 7.381632820637462e-08, - "loss": 0.9891, - "step": 10155 - }, - { - "epoch": 0.9159038643639807, - "grad_norm": 1.530757435575148, - "learning_rate": 7.365915664477352e-08, - "loss": 0.9561, - "step": 10156 - }, - { - "epoch": 0.915994047887451, - "grad_norm": 1.691510344863547, - "learning_rate": 7.350214944919474e-08, - "loss": 0.9733, - "step": 10157 - }, - { - "epoch": 0.9160842314109212, - "grad_norm": 1.5689409633686842, - "learning_rate": 7.334530663303539e-08, - "loss": 0.9911, - "step": 10158 - }, - { - "epoch": 0.9161744149343914, - "grad_norm": 1.5305553339738602, - "learning_rate": 7.318862820967742e-08, - "loss": 0.9677, - "step": 10159 - }, - { - "epoch": 0.9162645984578618, - "grad_norm": 1.696533536890392, - "learning_rate": 7.303211419249056e-08, - "loss": 0.9927, - "step": 10160 - }, - { - "epoch": 0.916354781981332, - "grad_norm": 1.341515435211411, - "learning_rate": 7.287576459482858e-08, - "loss": 0.9802, - "step": 10161 - }, - { - "epoch": 0.9164449655048023, - "grad_norm": 1.4779255544938164, - "learning_rate": 7.271957943003259e-08, - "loss": 1.0077, - "step": 10162 - }, - { - "epoch": 0.9165351490282725, - "grad_norm": 1.5861481775620687, - "learning_rate": 7.256355871142883e-08, - "loss": 0.966, - "step": 10163 - }, - { - "epoch": 0.9166253325517428, - "grad_norm": 1.4889002836718173, - "learning_rate": 7.240770245233019e-08, - "loss": 1.0199, - "step": 10164 - }, - { - "epoch": 0.9167155160752131, - "grad_norm": 2.3181231409694347, - "learning_rate": 7.225201066603492e-08, - "loss": 0.8823, - "step": 10165 - }, - { - "epoch": 0.9168056995986833, - "grad_norm": 1.4901414994762423, - "learning_rate": 7.209648336582774e-08, - "loss": 1.0735, - "step": 10166 - }, - { - "epoch": 0.9168958831221535, - "grad_norm": 1.5537044925424153, - "learning_rate": 7.19411205649787e-08, - "loss": 0.9535, - "step": 10167 - }, - { - "epoch": 0.9169860666456239, - "grad_norm": 2.7688918135515133, - "learning_rate": 7.178592227674474e-08, - "loss": 0.987, - "step": 10168 - }, - { - "epoch": 0.9170762501690941, - "grad_norm": 1.7251993727530974, - "learning_rate": 7.163088851436771e-08, - "loss": 0.8895, - "step": 10169 - }, - { - "epoch": 0.9171664336925643, - "grad_norm": 1.4753111527268508, - "learning_rate": 7.147601929107639e-08, - "loss": 0.9293, - "step": 10170 - }, - { - "epoch": 0.9172566172160346, - "grad_norm": 1.2551188562351288, - "learning_rate": 7.132131462008461e-08, - "loss": 1.0072, - "step": 10171 - }, - { - "epoch": 0.9173468007395049, - "grad_norm": 1.5537746951042684, - "learning_rate": 7.116677451459297e-08, - "loss": 0.9621, - "step": 10172 - }, - { - "epoch": 0.9174369842629752, - "grad_norm": 1.1280115931774155, - "learning_rate": 7.101239898778799e-08, - "loss": 0.9903, - "step": 10173 - }, - { - "epoch": 0.9175271677864454, - "grad_norm": 1.240981566262261, - "learning_rate": 7.085818805284094e-08, - "loss": 0.9996, - "step": 10174 - }, - { - "epoch": 0.9176173513099157, - "grad_norm": 1.467129076687233, - "learning_rate": 7.070414172291083e-08, - "loss": 1.0428, - "step": 10175 - }, - { - "epoch": 0.917707534833386, - "grad_norm": 1.3687971829856365, - "learning_rate": 7.055026001114095e-08, - "loss": 0.9008, - "step": 10176 - }, - { - "epoch": 0.9177977183568562, - "grad_norm": 0.6727208424390902, - "learning_rate": 7.039654293066211e-08, - "loss": 0.8281, - "step": 10177 - }, - { - "epoch": 0.9178879018803264, - "grad_norm": 1.581690169877637, - "learning_rate": 7.024299049459003e-08, - "loss": 0.8894, - "step": 10178 - }, - { - "epoch": 0.9179780854037968, - "grad_norm": 2.046395048990591, - "learning_rate": 7.008960271602627e-08, - "loss": 0.8823, - "step": 10179 - }, - { - "epoch": 0.918068268927267, - "grad_norm": 1.5074150866226879, - "learning_rate": 6.993637960805921e-08, - "loss": 0.9307, - "step": 10180 - }, - { - "epoch": 0.9181584524507372, - "grad_norm": 2.297438364291788, - "learning_rate": 6.97833211837624e-08, - "loss": 0.994, - "step": 10181 - }, - { - "epoch": 0.9182486359742075, - "grad_norm": 2.027419836714986, - "learning_rate": 6.963042745619562e-08, - "loss": 0.9162, - "step": 10182 - }, - { - "epoch": 0.9183388194976778, - "grad_norm": 1.2923785473189782, - "learning_rate": 6.947769843840511e-08, - "loss": 0.9334, - "step": 10183 - }, - { - "epoch": 0.918429003021148, - "grad_norm": 1.5358074641554411, - "learning_rate": 6.9325134143422e-08, - "loss": 0.9981, - "step": 10184 - }, - { - "epoch": 0.9185191865446183, - "grad_norm": 1.8375603179373747, - "learning_rate": 6.917273458426387e-08, - "loss": 0.9469, - "step": 10185 - }, - { - "epoch": 0.9186093700680885, - "grad_norm": 0.715853702031852, - "learning_rate": 6.902049977393476e-08, - "loss": 0.8163, - "step": 10186 - }, - { - "epoch": 0.9186995535915589, - "grad_norm": 1.2924846192261938, - "learning_rate": 6.886842972542362e-08, - "loss": 0.9681, - "step": 10187 - }, - { - "epoch": 0.9187897371150291, - "grad_norm": 2.2110672228603923, - "learning_rate": 6.871652445170672e-08, - "loss": 0.8508, - "step": 10188 - }, - { - "epoch": 0.9188799206384993, - "grad_norm": 1.4840232030694585, - "learning_rate": 6.856478396574416e-08, - "loss": 1.0448, - "step": 10189 - }, - { - "epoch": 0.9189701041619696, - "grad_norm": 1.8707383999034748, - "learning_rate": 6.841320828048491e-08, - "loss": 1.0241, - "step": 10190 - }, - { - "epoch": 0.9190602876854399, - "grad_norm": 1.7220220458007367, - "learning_rate": 6.826179740886062e-08, - "loss": 0.9615, - "step": 10191 - }, - { - "epoch": 0.9191504712089101, - "grad_norm": 1.7724959777226832, - "learning_rate": 6.811055136379184e-08, - "loss": 1.0283, - "step": 10192 - }, - { - "epoch": 0.9192406547323804, - "grad_norm": 1.6140114376414445, - "learning_rate": 6.79594701581827e-08, - "loss": 0.9409, - "step": 10193 - }, - { - "epoch": 0.9193308382558506, - "grad_norm": 1.919797317019127, - "learning_rate": 6.780855380492511e-08, - "loss": 0.9724, - "step": 10194 - }, - { - "epoch": 0.919421021779321, - "grad_norm": 1.254406077746464, - "learning_rate": 6.765780231689544e-08, - "loss": 0.9797, - "step": 10195 - }, - { - "epoch": 0.9195112053027912, - "grad_norm": 1.5350389120283043, - "learning_rate": 6.750721570695695e-08, - "loss": 1.0302, - "step": 10196 - }, - { - "epoch": 0.9196013888262614, - "grad_norm": 2.6754746310122592, - "learning_rate": 6.735679398795868e-08, - "loss": 0.9322, - "step": 10197 - }, - { - "epoch": 0.9196915723497318, - "grad_norm": 2.1959641327769055, - "learning_rate": 6.720653717273506e-08, - "loss": 0.8982, - "step": 10198 - }, - { - "epoch": 0.919781755873202, - "grad_norm": 1.3417367713493595, - "learning_rate": 6.705644527410714e-08, - "loss": 0.9667, - "step": 10199 - }, - { - "epoch": 0.9198719393966722, - "grad_norm": 1.417259606458534, - "learning_rate": 6.690651830488136e-08, - "loss": 0.9373, - "step": 10200 - }, - { - "epoch": 0.9199621229201425, - "grad_norm": 1.6654818217448732, - "learning_rate": 6.675675627785037e-08, - "loss": 0.9566, - "step": 10201 - }, - { - "epoch": 0.9200523064436128, - "grad_norm": 1.8109978337477688, - "learning_rate": 6.660715920579263e-08, - "loss": 0.9859, - "step": 10202 - }, - { - "epoch": 0.920142489967083, - "grad_norm": 0.6594031513225007, - "learning_rate": 6.645772710147279e-08, - "loss": 0.8347, - "step": 10203 - }, - { - "epoch": 0.9202326734905533, - "grad_norm": 1.1998624782439415, - "learning_rate": 6.630845997764112e-08, - "loss": 0.9347, - "step": 10204 - }, - { - "epoch": 0.9203228570140235, - "grad_norm": 1.3406831208548289, - "learning_rate": 6.615935784703409e-08, - "loss": 0.9094, - "step": 10205 - }, - { - "epoch": 0.9204130405374938, - "grad_norm": 19.856389767518397, - "learning_rate": 6.601042072237328e-08, - "loss": 0.8917, - "step": 10206 - }, - { - "epoch": 0.9205032240609641, - "grad_norm": 1.3542402785418644, - "learning_rate": 6.586164861636767e-08, - "loss": 1.0014, - "step": 10207 - }, - { - "epoch": 0.9205934075844343, - "grad_norm": 1.843929993200452, - "learning_rate": 6.571304154171065e-08, - "loss": 0.8751, - "step": 10208 - }, - { - "epoch": 0.9206835911079045, - "grad_norm": 1.3935822321756595, - "learning_rate": 6.556459951108273e-08, - "loss": 0.9927, - "step": 10209 - }, - { - "epoch": 0.9207737746313749, - "grad_norm": 1.7474287762405216, - "learning_rate": 6.541632253714957e-08, - "loss": 0.9464, - "step": 10210 - }, - { - "epoch": 0.9208639581548451, - "grad_norm": 1.4273598488921322, - "learning_rate": 6.526821063256261e-08, - "loss": 0.8951, - "step": 10211 - }, - { - "epoch": 0.9209541416783154, - "grad_norm": 1.7152007476491864, - "learning_rate": 6.512026380996016e-08, - "loss": 0.8715, - "step": 10212 - }, - { - "epoch": 0.9210443252017856, - "grad_norm": 1.6518907579874835, - "learning_rate": 6.49724820819657e-08, - "loss": 0.9356, - "step": 10213 - }, - { - "epoch": 0.9211345087252559, - "grad_norm": 1.5221419082733776, - "learning_rate": 6.48248654611887e-08, - "loss": 0.9977, - "step": 10214 - }, - { - "epoch": 0.9212246922487262, - "grad_norm": 1.1847313679447335, - "learning_rate": 6.467741396022419e-08, - "loss": 1.0114, - "step": 10215 - }, - { - "epoch": 0.9213148757721964, - "grad_norm": 1.5319453626306956, - "learning_rate": 6.453012759165455e-08, - "loss": 0.9704, - "step": 10216 - }, - { - "epoch": 0.9214050592956666, - "grad_norm": 1.4377554790861387, - "learning_rate": 6.438300636804639e-08, - "loss": 0.9343, - "step": 10217 - }, - { - "epoch": 0.921495242819137, - "grad_norm": 1.316226482851468, - "learning_rate": 6.423605030195278e-08, - "loss": 1.0018, - "step": 10218 - }, - { - "epoch": 0.9215854263426072, - "grad_norm": 1.8977314456318604, - "learning_rate": 6.408925940591304e-08, - "loss": 1.0444, - "step": 10219 - }, - { - "epoch": 0.9216756098660774, - "grad_norm": 1.8878211676593262, - "learning_rate": 6.394263369245222e-08, - "loss": 0.9011, - "step": 10220 - }, - { - "epoch": 0.9217657933895478, - "grad_norm": 1.4631468035455588, - "learning_rate": 6.379617317408126e-08, - "loss": 0.9684, - "step": 10221 - }, - { - "epoch": 0.921855976913018, - "grad_norm": 1.7690539526855045, - "learning_rate": 6.364987786329723e-08, - "loss": 0.9593, - "step": 10222 - }, - { - "epoch": 0.9219461604364882, - "grad_norm": 2.33977486790826, - "learning_rate": 6.350374777258193e-08, - "loss": 0.9563, - "step": 10223 - }, - { - "epoch": 0.9220363439599585, - "grad_norm": 1.5506095887309703, - "learning_rate": 6.335778291440519e-08, - "loss": 0.9545, - "step": 10224 - }, - { - "epoch": 0.9221265274834288, - "grad_norm": 1.3287610942991075, - "learning_rate": 6.321198330122057e-08, - "loss": 0.8791, - "step": 10225 - }, - { - "epoch": 0.9222167110068991, - "grad_norm": 1.4429636246239594, - "learning_rate": 6.306634894546902e-08, - "loss": 0.9126, - "step": 10226 - }, - { - "epoch": 0.9223068945303693, - "grad_norm": 1.1656979330936776, - "learning_rate": 6.292087985957661e-08, - "loss": 0.9192, - "step": 10227 - }, - { - "epoch": 0.9223970780538395, - "grad_norm": 1.3561291583673711, - "learning_rate": 6.277557605595585e-08, - "loss": 1.0063, - "step": 10228 - }, - { - "epoch": 0.9224872615773099, - "grad_norm": 1.2713831613691446, - "learning_rate": 6.263043754700481e-08, - "loss": 0.8895, - "step": 10229 - }, - { - "epoch": 0.9225774451007801, - "grad_norm": 1.9426834541207243, - "learning_rate": 6.248546434510671e-08, - "loss": 0.9622, - "step": 10230 - }, - { - "epoch": 0.9226676286242503, - "grad_norm": 1.636656912498128, - "learning_rate": 6.234065646263298e-08, - "loss": 0.8984, - "step": 10231 - }, - { - "epoch": 0.9227578121477206, - "grad_norm": 1.6105849523158033, - "learning_rate": 6.219601391193796e-08, - "loss": 0.8785, - "step": 10232 - }, - { - "epoch": 0.9228479956711909, - "grad_norm": 1.3672549857786127, - "learning_rate": 6.205153670536423e-08, - "loss": 0.8968, - "step": 10233 - }, - { - "epoch": 0.9229381791946611, - "grad_norm": 1.3126979860205628, - "learning_rate": 6.190722485523902e-08, - "loss": 0.9945, - "step": 10234 - }, - { - "epoch": 0.9230283627181314, - "grad_norm": 1.3503838841030318, - "learning_rate": 6.176307837387607e-08, - "loss": 0.9385, - "step": 10235 - }, - { - "epoch": 0.9231185462416016, - "grad_norm": 1.2860265874769112, - "learning_rate": 6.16190972735744e-08, - "loss": 0.921, - "step": 10236 - }, - { - "epoch": 0.923208729765072, - "grad_norm": 1.6252469095301354, - "learning_rate": 6.147528156661974e-08, - "loss": 0.986, - "step": 10237 - }, - { - "epoch": 0.9232989132885422, - "grad_norm": 1.2400141963607547, - "learning_rate": 6.133163126528273e-08, - "loss": 0.8842, - "step": 10238 - }, - { - "epoch": 0.9233890968120124, - "grad_norm": 1.530930622036324, - "learning_rate": 6.11881463818209e-08, - "loss": 0.9764, - "step": 10239 - }, - { - "epoch": 0.9234792803354827, - "grad_norm": 1.6416213870057055, - "learning_rate": 6.104482692847668e-08, - "loss": 0.9538, - "step": 10240 - }, - { - "epoch": 0.923569463858953, - "grad_norm": 1.3711571746676299, - "learning_rate": 6.090167291747917e-08, - "loss": 0.9382, - "step": 10241 - }, - { - "epoch": 0.9236596473824232, - "grad_norm": 1.4918804551829086, - "learning_rate": 6.075868436104303e-08, - "loss": 0.8611, - "step": 10242 - }, - { - "epoch": 0.9237498309058935, - "grad_norm": 1.6659454295138367, - "learning_rate": 6.061586127136875e-08, - "loss": 0.9524, - "step": 10243 - }, - { - "epoch": 0.9238400144293638, - "grad_norm": 1.3944901318725833, - "learning_rate": 6.047320366064324e-08, - "loss": 0.9384, - "step": 10244 - }, - { - "epoch": 0.923930197952834, - "grad_norm": 1.324184406026455, - "learning_rate": 6.033071154103786e-08, - "loss": 0.9721, - "step": 10245 - }, - { - "epoch": 0.9240203814763043, - "grad_norm": 1.4963515734315365, - "learning_rate": 6.018838492471178e-08, - "loss": 0.9872, - "step": 10246 - }, - { - "epoch": 0.9241105649997745, - "grad_norm": 0.6265158153084316, - "learning_rate": 6.00462238238082e-08, - "loss": 0.8452, - "step": 10247 - }, - { - "epoch": 0.9242007485232449, - "grad_norm": 1.4330578043872941, - "learning_rate": 5.990422825045827e-08, - "loss": 0.9342, - "step": 10248 - }, - { - "epoch": 0.9242909320467151, - "grad_norm": 1.4646327159183807, - "learning_rate": 5.976239821677675e-08, - "loss": 0.9383, - "step": 10249 - }, - { - "epoch": 0.9243811155701853, - "grad_norm": 1.41796336948015, - "learning_rate": 5.962073373486598e-08, - "loss": 0.9094, - "step": 10250 - }, - { - "epoch": 0.9244712990936556, - "grad_norm": 1.514134175244691, - "learning_rate": 5.947923481681316e-08, - "loss": 0.9236, - "step": 10251 - }, - { - "epoch": 0.9245614826171259, - "grad_norm": 1.5505283255156348, - "learning_rate": 5.933790147469198e-08, - "loss": 0.9441, - "step": 10252 - }, - { - "epoch": 0.9246516661405961, - "grad_norm": 1.7228168577031504, - "learning_rate": 5.9196733720561665e-08, - "loss": 0.9165, - "step": 10253 - }, - { - "epoch": 0.9247418496640664, - "grad_norm": 1.4649361136245302, - "learning_rate": 5.905573156646793e-08, - "loss": 1.0431, - "step": 10254 - }, - { - "epoch": 0.9248320331875366, - "grad_norm": 1.8551733805815651, - "learning_rate": 5.8914895024441134e-08, - "loss": 1.0416, - "step": 10255 - }, - { - "epoch": 0.9249222167110069, - "grad_norm": 1.9069584639996227, - "learning_rate": 5.877422410649857e-08, - "loss": 1.0027, - "step": 10256 - }, - { - "epoch": 0.9250124002344772, - "grad_norm": 1.8130765195332996, - "learning_rate": 5.863371882464285e-08, - "loss": 0.9741, - "step": 10257 - }, - { - "epoch": 0.9251025837579474, - "grad_norm": 1.5573439403253275, - "learning_rate": 5.849337919086283e-08, - "loss": 0.9458, - "step": 10258 - }, - { - "epoch": 0.9251927672814176, - "grad_norm": 1.9130459947296223, - "learning_rate": 5.835320521713316e-08, - "loss": 0.9431, - "step": 10259 - }, - { - "epoch": 0.925282950804888, - "grad_norm": 1.4910021962457314, - "learning_rate": 5.8213196915414264e-08, - "loss": 0.9542, - "step": 10260 - }, - { - "epoch": 0.9253731343283582, - "grad_norm": 1.8377755561829536, - "learning_rate": 5.807335429765237e-08, - "loss": 1.022, - "step": 10261 - }, - { - "epoch": 0.9254633178518284, - "grad_norm": 1.5125392688035744, - "learning_rate": 5.7933677375779034e-08, - "loss": 1.0072, - "step": 10262 - }, - { - "epoch": 0.9255535013752987, - "grad_norm": 1.4229360959842874, - "learning_rate": 5.77941661617134e-08, - "loss": 1.0131, - "step": 10263 - }, - { - "epoch": 0.925643684898769, - "grad_norm": 1.8691872458388736, - "learning_rate": 5.765482066735816e-08, - "loss": 0.8959, - "step": 10264 - }, - { - "epoch": 0.9257338684222393, - "grad_norm": 1.7009938055976332, - "learning_rate": 5.7515640904604256e-08, - "loss": 0.9665, - "step": 10265 - }, - { - "epoch": 0.9258240519457095, - "grad_norm": 2.0272457856193165, - "learning_rate": 5.7376626885326187e-08, - "loss": 0.9473, - "step": 10266 - }, - { - "epoch": 0.9259142354691798, - "grad_norm": 1.4688605408971733, - "learning_rate": 5.723777862138601e-08, - "loss": 1.0289, - "step": 10267 - }, - { - "epoch": 0.9260044189926501, - "grad_norm": 1.6115819372780624, - "learning_rate": 5.7099096124630705e-08, - "loss": 1.0486, - "step": 10268 - }, - { - "epoch": 0.9260946025161203, - "grad_norm": 1.1458763056702164, - "learning_rate": 5.696057940689347e-08, - "loss": 0.9786, - "step": 10269 - }, - { - "epoch": 0.9261847860395905, - "grad_norm": 1.6599134559362183, - "learning_rate": 5.6822228479993736e-08, - "loss": 0.9839, - "step": 10270 - }, - { - "epoch": 0.9262749695630609, - "grad_norm": 1.6405754445175793, - "learning_rate": 5.668404335573584e-08, - "loss": 1.0043, - "step": 10271 - }, - { - "epoch": 0.9263651530865311, - "grad_norm": 1.4999098750695985, - "learning_rate": 5.654602404591058e-08, - "loss": 0.9135, - "step": 10272 - }, - { - "epoch": 0.9264553366100013, - "grad_norm": 1.3349650898487222, - "learning_rate": 5.640817056229474e-08, - "loss": 1.013, - "step": 10273 - }, - { - "epoch": 0.9265455201334716, - "grad_norm": 1.3774197268073056, - "learning_rate": 5.6270482916650706e-08, - "loss": 1.0058, - "step": 10274 - }, - { - "epoch": 0.9266357036569419, - "grad_norm": 1.6363377821931697, - "learning_rate": 5.613296112072663e-08, - "loss": 1.0395, - "step": 10275 - }, - { - "epoch": 0.9267258871804122, - "grad_norm": 2.2529318040193784, - "learning_rate": 5.59956051862569e-08, - "loss": 0.9749, - "step": 10276 - }, - { - "epoch": 0.9268160707038824, - "grad_norm": 1.4497831741539617, - "learning_rate": 5.585841512496081e-08, - "loss": 1.0279, - "step": 10277 - }, - { - "epoch": 0.9269062542273526, - "grad_norm": 1.5681391617221774, - "learning_rate": 5.5721390948545e-08, - "loss": 1.0696, - "step": 10278 - }, - { - "epoch": 0.926996437750823, - "grad_norm": 1.2665631620046305, - "learning_rate": 5.558453266870056e-08, - "loss": 1.0127, - "step": 10279 - }, - { - "epoch": 0.9270866212742932, - "grad_norm": 1.5658135184509037, - "learning_rate": 5.544784029710525e-08, - "loss": 0.9077, - "step": 10280 - }, - { - "epoch": 0.9271768047977634, - "grad_norm": 1.5982250889126075, - "learning_rate": 5.531131384542242e-08, - "loss": 1.0381, - "step": 10281 - }, - { - "epoch": 0.9272669883212337, - "grad_norm": 1.917095537164098, - "learning_rate": 5.51749533253012e-08, - "loss": 0.9223, - "step": 10282 - }, - { - "epoch": 0.927357171844704, - "grad_norm": 1.5829030590002993, - "learning_rate": 5.503875874837649e-08, - "loss": 0.9082, - "step": 10283 - }, - { - "epoch": 0.9274473553681742, - "grad_norm": 1.4829194813153461, - "learning_rate": 5.4902730126269225e-08, - "loss": 0.9284, - "step": 10284 - }, - { - "epoch": 0.9275375388916445, - "grad_norm": 1.514835033814653, - "learning_rate": 5.476686747058656e-08, - "loss": 0.9927, - "step": 10285 - }, - { - "epoch": 0.9276277224151147, - "grad_norm": 2.097397090310597, - "learning_rate": 5.4631170792920124e-08, - "loss": 0.9377, - "step": 10286 - }, - { - "epoch": 0.927717905938585, - "grad_norm": 1.6545564882394521, - "learning_rate": 5.449564010484953e-08, - "loss": 0.9433, - "step": 10287 - }, - { - "epoch": 0.9278080894620553, - "grad_norm": 1.6573449599998087, - "learning_rate": 5.436027541793775e-08, - "loss": 0.9934, - "step": 10288 - }, - { - "epoch": 0.9278982729855255, - "grad_norm": 1.9866074147231894, - "learning_rate": 5.4225076743735554e-08, - "loss": 0.948, - "step": 10289 - }, - { - "epoch": 0.9279884565089958, - "grad_norm": 1.972626094776476, - "learning_rate": 5.409004409377882e-08, - "loss": 0.8853, - "step": 10290 - }, - { - "epoch": 0.9280786400324661, - "grad_norm": 1.553475755962153, - "learning_rate": 5.3955177479589e-08, - "loss": 0.8935, - "step": 10291 - }, - { - "epoch": 0.9281688235559363, - "grad_norm": 1.6375200954689304, - "learning_rate": 5.3820476912674e-08, - "loss": 0.9702, - "step": 10292 - }, - { - "epoch": 0.9282590070794066, - "grad_norm": 1.2353061592292511, - "learning_rate": 5.3685942404527063e-08, - "loss": 0.9902, - "step": 10293 - }, - { - "epoch": 0.9283491906028769, - "grad_norm": 1.4231884098794716, - "learning_rate": 5.355157396662702e-08, - "loss": 0.9504, - "step": 10294 - }, - { - "epoch": 0.9284393741263471, - "grad_norm": 1.3149634811639215, - "learning_rate": 5.34173716104398e-08, - "loss": 0.9487, - "step": 10295 - }, - { - "epoch": 0.9285295576498174, - "grad_norm": 4.030000241565934, - "learning_rate": 5.328333534741536e-08, - "loss": 0.973, - "step": 10296 - }, - { - "epoch": 0.9286197411732876, - "grad_norm": 1.5487743238458673, - "learning_rate": 5.314946518899099e-08, - "loss": 0.9256, - "step": 10297 - }, - { - "epoch": 0.928709924696758, - "grad_norm": 1.6817512785939448, - "learning_rate": 5.301576114658912e-08, - "loss": 1.0423, - "step": 10298 - }, - { - "epoch": 0.9288001082202282, - "grad_norm": 1.5768315519500165, - "learning_rate": 5.288222323161795e-08, - "loss": 0.9975, - "step": 10299 - }, - { - "epoch": 0.9288902917436984, - "grad_norm": 1.524321940031558, - "learning_rate": 5.274885145547214e-08, - "loss": 1.0085, - "step": 10300 - }, - { - "epoch": 0.9289804752671686, - "grad_norm": 1.7243750338469692, - "learning_rate": 5.261564582953082e-08, - "loss": 0.965, - "step": 10301 - }, - { - "epoch": 0.929070658790639, - "grad_norm": 2.10491562409183, - "learning_rate": 5.248260636516066e-08, - "loss": 0.9444, - "step": 10302 - }, - { - "epoch": 0.9291608423141092, - "grad_norm": 1.631111536690024, - "learning_rate": 5.2349733073712824e-08, - "loss": 0.9098, - "step": 10303 - }, - { - "epoch": 0.9292510258375795, - "grad_norm": 1.3522173226869483, - "learning_rate": 5.221702596652533e-08, - "loss": 1.0555, - "step": 10304 - }, - { - "epoch": 0.9293412093610497, - "grad_norm": 1.8020387838806993, - "learning_rate": 5.208448505492091e-08, - "loss": 0.9141, - "step": 10305 - }, - { - "epoch": 0.92943139288452, - "grad_norm": 1.3719491925521805, - "learning_rate": 5.1952110350208965e-08, - "loss": 0.9511, - "step": 10306 - }, - { - "epoch": 0.9295215764079903, - "grad_norm": 1.4956037949151995, - "learning_rate": 5.181990186368446e-08, - "loss": 0.9249, - "step": 10307 - }, - { - "epoch": 0.9296117599314605, - "grad_norm": 1.384970064769274, - "learning_rate": 5.1687859606627915e-08, - "loss": 1.0629, - "step": 10308 - }, - { - "epoch": 0.9297019434549307, - "grad_norm": 1.6231558679371036, - "learning_rate": 5.1555983590306327e-08, - "loss": 0.8807, - "step": 10309 - }, - { - "epoch": 0.9297921269784011, - "grad_norm": 0.7059268862132007, - "learning_rate": 5.1424273825971806e-08, - "loss": 0.8536, - "step": 10310 - }, - { - "epoch": 0.9298823105018713, - "grad_norm": 1.8249269862789095, - "learning_rate": 5.1292730324862475e-08, - "loss": 0.9318, - "step": 10311 - }, - { - "epoch": 0.9299724940253415, - "grad_norm": 1.39573728411234, - "learning_rate": 5.116135309820224e-08, - "loss": 0.9803, - "step": 10312 - }, - { - "epoch": 0.9300626775488118, - "grad_norm": 2.7573187280664078, - "learning_rate": 5.103014215720147e-08, - "loss": 1.0005, - "step": 10313 - }, - { - "epoch": 0.9301528610722821, - "grad_norm": 1.400108915588228, - "learning_rate": 5.0899097513055214e-08, - "loss": 1.0231, - "step": 10314 - }, - { - "epoch": 0.9302430445957524, - "grad_norm": 1.6015970365940844, - "learning_rate": 5.076821917694563e-08, - "loss": 0.958, - "step": 10315 - }, - { - "epoch": 0.9303332281192226, - "grad_norm": 2.06545900725348, - "learning_rate": 5.063750716003889e-08, - "loss": 1.021, - "step": 10316 - }, - { - "epoch": 0.9304234116426929, - "grad_norm": 2.1390223385793345, - "learning_rate": 5.050696147348921e-08, - "loss": 0.9546, - "step": 10317 - }, - { - "epoch": 0.9305135951661632, - "grad_norm": 1.9083724166443852, - "learning_rate": 5.037658212843454e-08, - "loss": 0.9415, - "step": 10318 - }, - { - "epoch": 0.9306037786896334, - "grad_norm": 1.3797690742198576, - "learning_rate": 5.0246369136000444e-08, - "loss": 0.8581, - "step": 10319 - }, - { - "epoch": 0.9306939622131036, - "grad_norm": 1.4163589797887737, - "learning_rate": 5.011632250729691e-08, - "loss": 1.0417, - "step": 10320 - }, - { - "epoch": 0.930784145736574, - "grad_norm": 1.4970569348717364, - "learning_rate": 4.998644225342019e-08, - "loss": 1.0549, - "step": 10321 - }, - { - "epoch": 0.9308743292600442, - "grad_norm": 1.4423327272638844, - "learning_rate": 4.9856728385452296e-08, - "loss": 0.9719, - "step": 10322 - }, - { - "epoch": 0.9309645127835144, - "grad_norm": 1.5778960260143071, - "learning_rate": 4.9727180914461485e-08, - "loss": 1.0556, - "step": 10323 - }, - { - "epoch": 0.9310546963069847, - "grad_norm": 1.5576105291050868, - "learning_rate": 4.959779985150137e-08, - "loss": 0.9662, - "step": 10324 - }, - { - "epoch": 0.931144879830455, - "grad_norm": 1.855592229649498, - "learning_rate": 4.9468585207611105e-08, - "loss": 0.9167, - "step": 10325 - }, - { - "epoch": 0.9312350633539253, - "grad_norm": 1.6864485820687056, - "learning_rate": 4.9339536993816764e-08, - "loss": 0.9382, - "step": 10326 - }, - { - "epoch": 0.9313252468773955, - "grad_norm": 1.7189146483193363, - "learning_rate": 4.921065522112844e-08, - "loss": 0.9099, - "step": 10327 - }, - { - "epoch": 0.9314154304008657, - "grad_norm": 1.4687283291131072, - "learning_rate": 4.908193990054377e-08, - "loss": 1.0129, - "step": 10328 - }, - { - "epoch": 0.9315056139243361, - "grad_norm": 1.2557363494368055, - "learning_rate": 4.89533910430453e-08, - "loss": 0.9898, - "step": 10329 - }, - { - "epoch": 0.9315957974478063, - "grad_norm": 2.7630064901302624, - "learning_rate": 4.8825008659601376e-08, - "loss": 0.9564, - "step": 10330 - }, - { - "epoch": 0.9316859809712765, - "grad_norm": 1.7381930553908096, - "learning_rate": 4.869679276116634e-08, - "loss": 0.9773, - "step": 10331 - }, - { - "epoch": 0.9317761644947468, - "grad_norm": 1.2877341622304577, - "learning_rate": 4.856874335868055e-08, - "loss": 0.9864, - "step": 10332 - }, - { - "epoch": 0.9318663480182171, - "grad_norm": 1.553561468985067, - "learning_rate": 4.844086046306928e-08, - "loss": 1.0733, - "step": 10333 - }, - { - "epoch": 0.9319565315416873, - "grad_norm": 1.3666603598022702, - "learning_rate": 4.8313144085244896e-08, - "loss": 0.9691, - "step": 10334 - }, - { - "epoch": 0.9320467150651576, - "grad_norm": 0.6747668375753214, - "learning_rate": 4.818559423610424e-08, - "loss": 0.8392, - "step": 10335 - }, - { - "epoch": 0.9321368985886278, - "grad_norm": 2.2457001183609635, - "learning_rate": 4.8058210926531284e-08, - "loss": 0.9188, - "step": 10336 - }, - { - "epoch": 0.9322270821120981, - "grad_norm": 2.017531447041415, - "learning_rate": 4.7930994167394435e-08, - "loss": 0.9973, - "step": 10337 - }, - { - "epoch": 0.9323172656355684, - "grad_norm": 1.2895591472685128, - "learning_rate": 4.7803943969548786e-08, - "loss": 1.0083, - "step": 10338 - }, - { - "epoch": 0.9324074491590386, - "grad_norm": 1.8604339661457796, - "learning_rate": 4.7677060343834784e-08, - "loss": 0.9173, - "step": 10339 - }, - { - "epoch": 0.932497632682509, - "grad_norm": 1.478313235662725, - "learning_rate": 4.75503433010791e-08, - "loss": 0.9614, - "step": 10340 - }, - { - "epoch": 0.9325878162059792, - "grad_norm": 1.9725889893197524, - "learning_rate": 4.742379285209419e-08, - "loss": 0.948, - "step": 10341 - }, - { - "epoch": 0.9326779997294494, - "grad_norm": 1.4440723769364652, - "learning_rate": 4.72974090076772e-08, - "loss": 1.0665, - "step": 10342 - }, - { - "epoch": 0.9327681832529197, - "grad_norm": 1.5514710859431373, - "learning_rate": 4.717119177861262e-08, - "loss": 0.9232, - "step": 10343 - }, - { - "epoch": 0.93285836677639, - "grad_norm": 2.323619605023089, - "learning_rate": 4.70451411756696e-08, - "loss": 0.9828, - "step": 10344 - }, - { - "epoch": 0.9329485502998602, - "grad_norm": 1.3092948288147275, - "learning_rate": 4.691925720960355e-08, - "loss": 1.0067, - "step": 10345 - }, - { - "epoch": 0.9330387338233305, - "grad_norm": 1.599211814578345, - "learning_rate": 4.6793539891155645e-08, - "loss": 0.9938, - "step": 10346 - }, - { - "epoch": 0.9331289173468007, - "grad_norm": 1.6421956128356483, - "learning_rate": 4.6667989231052864e-08, - "loss": 0.9419, - "step": 10347 - }, - { - "epoch": 0.933219100870271, - "grad_norm": 1.712323050162818, - "learning_rate": 4.654260524000797e-08, - "loss": 0.9733, - "step": 10348 - }, - { - "epoch": 0.9333092843937413, - "grad_norm": 1.2492250424453828, - "learning_rate": 4.6417387928719076e-08, - "loss": 0.9606, - "step": 10349 - }, - { - "epoch": 0.9333994679172115, - "grad_norm": 1.4020442802276885, - "learning_rate": 4.629233730787052e-08, - "loss": 0.8402, - "step": 10350 - }, - { - "epoch": 0.9334896514406817, - "grad_norm": 1.7676486680933674, - "learning_rate": 4.616745338813266e-08, - "loss": 0.9306, - "step": 10351 - }, - { - "epoch": 0.9335798349641521, - "grad_norm": 1.2321119205863866, - "learning_rate": 4.6042736180160744e-08, - "loss": 0.9833, - "step": 10352 - }, - { - "epoch": 0.9336700184876223, - "grad_norm": 4.198611193330336, - "learning_rate": 4.591818569459671e-08, - "loss": 1.0866, - "step": 10353 - }, - { - "epoch": 0.9337602020110926, - "grad_norm": 1.7287209254183065, - "learning_rate": 4.5793801942067614e-08, - "loss": 0.991, - "step": 10354 - }, - { - "epoch": 0.9338503855345628, - "grad_norm": 1.5426483980634107, - "learning_rate": 4.566958493318673e-08, - "loss": 0.8304, - "step": 10355 - }, - { - "epoch": 0.9339405690580331, - "grad_norm": 0.632115957951688, - "learning_rate": 4.554553467855316e-08, - "loss": 0.8633, - "step": 10356 - }, - { - "epoch": 0.9340307525815034, - "grad_norm": 1.5884338425694187, - "learning_rate": 4.5421651188751074e-08, - "loss": 0.9839, - "step": 10357 - }, - { - "epoch": 0.9341209361049736, - "grad_norm": 1.614197994723142, - "learning_rate": 4.529793447435137e-08, - "loss": 0.9804, - "step": 10358 - }, - { - "epoch": 0.9342111196284438, - "grad_norm": 1.447270484741124, - "learning_rate": 4.5174384545909824e-08, - "loss": 0.9717, - "step": 10359 - }, - { - "epoch": 0.9343013031519142, - "grad_norm": 1.7555347520196534, - "learning_rate": 4.505100141396867e-08, - "loss": 1.0084, - "step": 10360 - }, - { - "epoch": 0.9343914866753844, - "grad_norm": 1.6739914234675444, - "learning_rate": 4.492778508905548e-08, - "loss": 0.9968, - "step": 10361 - }, - { - "epoch": 0.9344816701988546, - "grad_norm": 1.4507112863132252, - "learning_rate": 4.480473558168385e-08, - "loss": 0.9095, - "step": 10362 - }, - { - "epoch": 0.934571853722325, - "grad_norm": 1.7307994517923875, - "learning_rate": 4.4681852902352936e-08, - "loss": 0.8765, - "step": 10363 - }, - { - "epoch": 0.9346620372457952, - "grad_norm": 1.4913230908780661, - "learning_rate": 4.455913706154812e-08, - "loss": 0.8786, - "step": 10364 - }, - { - "epoch": 0.9347522207692655, - "grad_norm": 2.139935013327996, - "learning_rate": 4.443658806973949e-08, - "loss": 0.9937, - "step": 10365 - }, - { - "epoch": 0.9348424042927357, - "grad_norm": 0.6589281520591107, - "learning_rate": 4.431420593738444e-08, - "loss": 0.8429, - "step": 10366 - }, - { - "epoch": 0.934932587816206, - "grad_norm": 1.4809736935728086, - "learning_rate": 4.419199067492485e-08, - "loss": 0.8741, - "step": 10367 - }, - { - "epoch": 0.9350227713396763, - "grad_norm": 1.6027128972644287, - "learning_rate": 4.4069942292788596e-08, - "loss": 0.9744, - "step": 10368 - }, - { - "epoch": 0.9351129548631465, - "grad_norm": 1.8156401956943307, - "learning_rate": 4.39480608013898e-08, - "loss": 0.9597, - "step": 10369 - }, - { - "epoch": 0.9352031383866167, - "grad_norm": 1.3379486739659243, - "learning_rate": 4.3826346211128126e-08, - "loss": 1.0532, - "step": 10370 - }, - { - "epoch": 0.9352933219100871, - "grad_norm": 1.2878953215698838, - "learning_rate": 4.370479853238884e-08, - "loss": 0.9679, - "step": 10371 - }, - { - "epoch": 0.9353835054335573, - "grad_norm": 1.4660441045436816, - "learning_rate": 4.3583417775542756e-08, - "loss": 0.9347, - "step": 10372 - }, - { - "epoch": 0.9354736889570275, - "grad_norm": 1.2273595370631663, - "learning_rate": 4.3462203950947575e-08, - "loss": 0.9221, - "step": 10373 - }, - { - "epoch": 0.9355638724804978, - "grad_norm": 1.5842252611657992, - "learning_rate": 4.3341157068944814e-08, - "loss": 0.9525, - "step": 10374 - }, - { - "epoch": 0.9356540560039681, - "grad_norm": 1.5343039284462454, - "learning_rate": 4.322027713986376e-08, - "loss": 0.9834, - "step": 10375 - }, - { - "epoch": 0.9357442395274383, - "grad_norm": 2.0256810067498727, - "learning_rate": 4.309956417401816e-08, - "loss": 1.0394, - "step": 10376 - }, - { - "epoch": 0.9358344230509086, - "grad_norm": 1.4573379480167303, - "learning_rate": 4.297901818170801e-08, - "loss": 0.9343, - "step": 10377 - }, - { - "epoch": 0.9359246065743788, - "grad_norm": 2.1119635628822726, - "learning_rate": 4.285863917321886e-08, - "loss": 0.895, - "step": 10378 - }, - { - "epoch": 0.9360147900978492, - "grad_norm": 1.9919584853239358, - "learning_rate": 4.2738427158822253e-08, - "loss": 0.8975, - "step": 10379 - }, - { - "epoch": 0.9361049736213194, - "grad_norm": 1.4843903390192246, - "learning_rate": 4.261838214877511e-08, - "loss": 0.979, - "step": 10380 - }, - { - "epoch": 0.9361951571447896, - "grad_norm": 1.470697370013848, - "learning_rate": 4.249850415332079e-08, - "loss": 0.9724, - "step": 10381 - }, - { - "epoch": 0.9362853406682599, - "grad_norm": 1.458623675553769, - "learning_rate": 4.237879318268756e-08, - "loss": 1.0284, - "step": 10382 - }, - { - "epoch": 0.9363755241917302, - "grad_norm": 1.4052509149651229, - "learning_rate": 4.225924924708968e-08, - "loss": 0.8973, - "step": 10383 - }, - { - "epoch": 0.9364657077152004, - "grad_norm": 1.5516489517697611, - "learning_rate": 4.2139872356727665e-08, - "loss": 0.9786, - "step": 10384 - }, - { - "epoch": 0.9365558912386707, - "grad_norm": 1.5609370234491577, - "learning_rate": 4.202066252178738e-08, - "loss": 0.9349, - "step": 10385 - }, - { - "epoch": 0.936646074762141, - "grad_norm": 1.3895177826678649, - "learning_rate": 4.1901619752440445e-08, - "loss": 1.0616, - "step": 10386 - }, - { - "epoch": 0.9367362582856112, - "grad_norm": 1.741824604095164, - "learning_rate": 4.178274405884363e-08, - "loss": 0.9655, - "step": 10387 - }, - { - "epoch": 0.9368264418090815, - "grad_norm": 1.478979564528148, - "learning_rate": 4.166403545114105e-08, - "loss": 0.976, - "step": 10388 - }, - { - "epoch": 0.9369166253325517, - "grad_norm": 1.8100384406755075, - "learning_rate": 4.154549393946083e-08, - "loss": 1.0191, - "step": 10389 - }, - { - "epoch": 0.937006808856022, - "grad_norm": 1.2918259102067484, - "learning_rate": 4.14271195339182e-08, - "loss": 0.9934, - "step": 10390 - }, - { - "epoch": 0.9370969923794923, - "grad_norm": 3.588701500214472, - "learning_rate": 4.1308912244613084e-08, - "loss": 0.9335, - "step": 10391 - }, - { - "epoch": 0.9371871759029625, - "grad_norm": 1.3447387074217814, - "learning_rate": 4.1190872081631636e-08, - "loss": 0.9197, - "step": 10392 - }, - { - "epoch": 0.9372773594264328, - "grad_norm": 1.6002108941564477, - "learning_rate": 4.107299905504558e-08, - "loss": 1.0287, - "step": 10393 - }, - { - "epoch": 0.9373675429499031, - "grad_norm": 1.430634815320716, - "learning_rate": 4.095529317491286e-08, - "loss": 0.9606, - "step": 10394 - }, - { - "epoch": 0.9374577264733733, - "grad_norm": 1.4470731167039488, - "learning_rate": 4.0837754451276575e-08, - "loss": 0.9767, - "step": 10395 - }, - { - "epoch": 0.9375479099968436, - "grad_norm": 1.2837840351915724, - "learning_rate": 4.072038289416557e-08, - "loss": 0.9583, - "step": 10396 - }, - { - "epoch": 0.9376380935203138, - "grad_norm": 1.3961781412756409, - "learning_rate": 4.0603178513595185e-08, - "loss": 0.9735, - "step": 10397 - }, - { - "epoch": 0.9377282770437841, - "grad_norm": 1.4940002774413232, - "learning_rate": 4.0486141319565624e-08, - "loss": 0.9758, - "step": 10398 - }, - { - "epoch": 0.9378184605672544, - "grad_norm": 1.5825009249282231, - "learning_rate": 4.0369271322062916e-08, - "loss": 0.9124, - "step": 10399 - }, - { - "epoch": 0.9379086440907246, - "grad_norm": 1.6267555730526968, - "learning_rate": 4.0252568531059295e-08, - "loss": 0.9903, - "step": 10400 - }, - { - "epoch": 0.9379988276141948, - "grad_norm": 1.7519910249044728, - "learning_rate": 4.013603295651235e-08, - "loss": 0.9651, - "step": 10401 - }, - { - "epoch": 0.9380890111376652, - "grad_norm": 1.4762520438494249, - "learning_rate": 4.001966460836592e-08, - "loss": 0.9944, - "step": 10402 - }, - { - "epoch": 0.9381791946611354, - "grad_norm": 2.5069572915698766, - "learning_rate": 3.990346349654894e-08, - "loss": 1.0418, - "step": 10403 - }, - { - "epoch": 0.9382693781846057, - "grad_norm": 1.3196411231615999, - "learning_rate": 3.9787429630975924e-08, - "loss": 0.9455, - "step": 10404 - }, - { - "epoch": 0.9383595617080759, - "grad_norm": 1.6188235645442586, - "learning_rate": 3.967156302154828e-08, - "loss": 0.9644, - "step": 10405 - }, - { - "epoch": 0.9384497452315462, - "grad_norm": 1.5541245097697498, - "learning_rate": 3.955586367815189e-08, - "loss": 0.9241, - "step": 10406 - }, - { - "epoch": 0.9385399287550165, - "grad_norm": 1.4896696728135206, - "learning_rate": 3.944033161065907e-08, - "loss": 1.0345, - "step": 10407 - }, - { - "epoch": 0.9386301122784867, - "grad_norm": 1.4838694464433637, - "learning_rate": 3.93249668289275e-08, - "loss": 0.8606, - "step": 10408 - }, - { - "epoch": 0.9387202958019569, - "grad_norm": 1.3030958231854561, - "learning_rate": 3.920976934280063e-08, - "loss": 0.8965, - "step": 10409 - }, - { - "epoch": 0.9388104793254273, - "grad_norm": 1.3757654573659477, - "learning_rate": 3.909473916210815e-08, - "loss": 1.0459, - "step": 10410 - }, - { - "epoch": 0.9389006628488975, - "grad_norm": 1.6770086173084668, - "learning_rate": 3.897987629666488e-08, - "loss": 0.9327, - "step": 10411 - }, - { - "epoch": 0.9389908463723677, - "grad_norm": 1.4453732039616858, - "learning_rate": 3.886518075627143e-08, - "loss": 1.0398, - "step": 10412 - }, - { - "epoch": 0.9390810298958381, - "grad_norm": 2.068810259814389, - "learning_rate": 3.875065255071419e-08, - "loss": 0.9571, - "step": 10413 - }, - { - "epoch": 0.9391712134193083, - "grad_norm": 1.5365002987405454, - "learning_rate": 3.863629168976579e-08, - "loss": 0.941, - "step": 10414 - }, - { - "epoch": 0.9392613969427785, - "grad_norm": 1.4732824962067357, - "learning_rate": 3.852209818318375e-08, - "loss": 1.0211, - "step": 10415 - }, - { - "epoch": 0.9393515804662488, - "grad_norm": 1.4965435576977604, - "learning_rate": 3.840807204071161e-08, - "loss": 0.873, - "step": 10416 - }, - { - "epoch": 0.9394417639897191, - "grad_norm": 1.360854253839766, - "learning_rate": 3.829421327207894e-08, - "loss": 0.9463, - "step": 10417 - }, - { - "epoch": 0.9395319475131894, - "grad_norm": 3.419403995536705, - "learning_rate": 3.8180521887000825e-08, - "loss": 0.9269, - "step": 10418 - }, - { - "epoch": 0.9396221310366596, - "grad_norm": 0.6259203571670753, - "learning_rate": 3.806699789517775e-08, - "loss": 0.8786, - "step": 10419 - }, - { - "epoch": 0.9397123145601298, - "grad_norm": 1.241142745286966, - "learning_rate": 3.7953641306296635e-08, - "loss": 0.9531, - "step": 10420 - }, - { - "epoch": 0.9398024980836002, - "grad_norm": 1.9767706237504465, - "learning_rate": 3.784045213002951e-08, - "loss": 0.8165, - "step": 10421 - }, - { - "epoch": 0.9398926816070704, - "grad_norm": 1.4322398598289197, - "learning_rate": 3.7727430376033986e-08, - "loss": 0.9855, - "step": 10422 - }, - { - "epoch": 0.9399828651305406, - "grad_norm": 1.7100050662897026, - "learning_rate": 3.7614576053954126e-08, - "loss": 0.9744, - "step": 10423 - }, - { - "epoch": 0.9400730486540109, - "grad_norm": 1.6462462989574356, - "learning_rate": 3.75018891734189e-08, - "loss": 0.9677, - "step": 10424 - }, - { - "epoch": 0.9401632321774812, - "grad_norm": 1.8478525714732408, - "learning_rate": 3.738936974404372e-08, - "loss": 1.0087, - "step": 10425 - }, - { - "epoch": 0.9402534157009514, - "grad_norm": 1.1593265327954652, - "learning_rate": 3.7277017775429354e-08, - "loss": 1.0293, - "step": 10426 - }, - { - "epoch": 0.9403435992244217, - "grad_norm": 1.2775897801124834, - "learning_rate": 3.7164833277162136e-08, - "loss": 0.996, - "step": 10427 - }, - { - "epoch": 0.9404337827478919, - "grad_norm": 1.2153500302347122, - "learning_rate": 3.705281625881418e-08, - "loss": 0.9503, - "step": 10428 - }, - { - "epoch": 0.9405239662713623, - "grad_norm": 1.3479921668642925, - "learning_rate": 3.694096672994362e-08, - "loss": 0.8794, - "step": 10429 - }, - { - "epoch": 0.9406141497948325, - "grad_norm": 1.3114535837892918, - "learning_rate": 3.682928470009394e-08, - "loss": 1.025, - "step": 10430 - }, - { - "epoch": 0.9407043333183027, - "grad_norm": 1.4435282648409256, - "learning_rate": 3.6717770178794406e-08, - "loss": 0.9328, - "step": 10431 - }, - { - "epoch": 0.940794516841773, - "grad_norm": 1.3120040864938969, - "learning_rate": 3.6606423175560287e-08, - "loss": 1.0635, - "step": 10432 - }, - { - "epoch": 0.9408847003652433, - "grad_norm": 0.6684156116613129, - "learning_rate": 3.649524369989221e-08, - "loss": 0.8041, - "step": 10433 - }, - { - "epoch": 0.9409748838887135, - "grad_norm": 1.6252316529875988, - "learning_rate": 3.638423176127636e-08, - "loss": 0.9998, - "step": 10434 - }, - { - "epoch": 0.9410650674121838, - "grad_norm": 1.3392422051401403, - "learning_rate": 3.6273387369185396e-08, - "loss": 1.0043, - "step": 10435 - }, - { - "epoch": 0.9411552509356541, - "grad_norm": 1.5839933308114191, - "learning_rate": 3.616271053307685e-08, - "loss": 0.9169, - "step": 10436 - }, - { - "epoch": 0.9412454344591243, - "grad_norm": 1.5430486344530276, - "learning_rate": 3.6052201262394275e-08, - "loss": 0.9847, - "step": 10437 - }, - { - "epoch": 0.9413356179825946, - "grad_norm": 1.524030442877458, - "learning_rate": 3.5941859566566816e-08, - "loss": 1.0207, - "step": 10438 - }, - { - "epoch": 0.9414258015060648, - "grad_norm": 0.7023058039817972, - "learning_rate": 3.583168545500981e-08, - "loss": 0.8502, - "step": 10439 - }, - { - "epoch": 0.9415159850295352, - "grad_norm": 1.32997536507357, - "learning_rate": 3.5721678937123746e-08, - "loss": 0.9145, - "step": 10440 - }, - { - "epoch": 0.9416061685530054, - "grad_norm": 1.307912119508835, - "learning_rate": 3.561184002229467e-08, - "loss": 1.047, - "step": 10441 - }, - { - "epoch": 0.9416963520764756, - "grad_norm": 1.4448239712265334, - "learning_rate": 3.550216871989531e-08, - "loss": 0.9106, - "step": 10442 - }, - { - "epoch": 0.9417865355999459, - "grad_norm": 0.620468088426816, - "learning_rate": 3.539266503928262e-08, - "loss": 0.8969, - "step": 10443 - }, - { - "epoch": 0.9418767191234162, - "grad_norm": 1.3540230992824691, - "learning_rate": 3.528332898980091e-08, - "loss": 0.9806, - "step": 10444 - }, - { - "epoch": 0.9419669026468864, - "grad_norm": 1.6929547487532837, - "learning_rate": 3.517416058077849e-08, - "loss": 0.9342, - "step": 10445 - }, - { - "epoch": 0.9420570861703567, - "grad_norm": 2.5790820570601394, - "learning_rate": 3.506515982153102e-08, - "loss": 0.8757, - "step": 10446 - }, - { - "epoch": 0.9421472696938269, - "grad_norm": 1.4127769316896048, - "learning_rate": 3.495632672135862e-08, - "loss": 0.9755, - "step": 10447 - }, - { - "epoch": 0.9422374532172972, - "grad_norm": 1.3912817443626058, - "learning_rate": 3.4847661289547417e-08, - "loss": 0.9953, - "step": 10448 - }, - { - "epoch": 0.9423276367407675, - "grad_norm": 2.0463924858432514, - "learning_rate": 3.473916353536932e-08, - "loss": 0.9239, - "step": 10449 - }, - { - "epoch": 0.9424178202642377, - "grad_norm": 3.106555983518622, - "learning_rate": 3.463083346808249e-08, - "loss": 0.944, - "step": 10450 - }, - { - "epoch": 0.9425080037877079, - "grad_norm": 1.604972744172543, - "learning_rate": 3.452267109692975e-08, - "loss": 0.8728, - "step": 10451 - }, - { - "epoch": 0.9425981873111783, - "grad_norm": 1.555512099378094, - "learning_rate": 3.441467643114016e-08, - "loss": 0.9637, - "step": 10452 - }, - { - "epoch": 0.9426883708346485, - "grad_norm": 0.5909024520397714, - "learning_rate": 3.430684947992857e-08, - "loss": 0.8126, - "step": 10453 - }, - { - "epoch": 0.9427785543581187, - "grad_norm": 1.4434081031926005, - "learning_rate": 3.419919025249518e-08, - "loss": 0.9836, - "step": 10454 - }, - { - "epoch": 0.942868737881589, - "grad_norm": 1.3441123917310336, - "learning_rate": 3.40916987580262e-08, - "loss": 0.9862, - "step": 10455 - }, - { - "epoch": 0.9429589214050593, - "grad_norm": 1.316381210858747, - "learning_rate": 3.398437500569362e-08, - "loss": 0.9444, - "step": 10456 - }, - { - "epoch": 0.9430491049285296, - "grad_norm": 1.5118868633307874, - "learning_rate": 3.3877219004654347e-08, - "loss": 0.8589, - "step": 10457 - }, - { - "epoch": 0.9431392884519998, - "grad_norm": 5.826356839483495, - "learning_rate": 3.3770230764051946e-08, - "loss": 1.006, - "step": 10458 - }, - { - "epoch": 0.9432294719754701, - "grad_norm": 1.7848798341667602, - "learning_rate": 3.366341029301534e-08, - "loss": 0.9842, - "step": 10459 - }, - { - "epoch": 0.9433196554989404, - "grad_norm": 1.722196833447323, - "learning_rate": 3.355675760065857e-08, - "loss": 0.972, - "step": 10460 - }, - { - "epoch": 0.9434098390224106, - "grad_norm": 0.576149710536011, - "learning_rate": 3.345027269608236e-08, - "loss": 0.7808, - "step": 10461 - }, - { - "epoch": 0.9435000225458808, - "grad_norm": 0.6502156294275121, - "learning_rate": 3.334395558837211e-08, - "loss": 0.8469, - "step": 10462 - }, - { - "epoch": 0.9435902060693512, - "grad_norm": 1.323001268139705, - "learning_rate": 3.3237806286599667e-08, - "loss": 0.8779, - "step": 10463 - }, - { - "epoch": 0.9436803895928214, - "grad_norm": 1.8611721681982845, - "learning_rate": 3.313182479982224e-08, - "loss": 0.9602, - "step": 10464 - }, - { - "epoch": 0.9437705731162916, - "grad_norm": 1.7057699960573416, - "learning_rate": 3.302601113708259e-08, - "loss": 1.0296, - "step": 10465 - }, - { - "epoch": 0.9438607566397619, - "grad_norm": 1.3302674459866586, - "learning_rate": 3.292036530740972e-08, - "loss": 1.0174, - "step": 10466 - }, - { - "epoch": 0.9439509401632322, - "grad_norm": 1.4066942996584761, - "learning_rate": 3.2814887319817294e-08, - "loss": 0.9637, - "step": 10467 - }, - { - "epoch": 0.9440411236867025, - "grad_norm": 1.5807322502629864, - "learning_rate": 3.270957718330591e-08, - "loss": 1.0168, - "step": 10468 - }, - { - "epoch": 0.9441313072101727, - "grad_norm": 1.5049983035360008, - "learning_rate": 3.260443490686082e-08, - "loss": 1.0114, - "step": 10469 - }, - { - "epoch": 0.9442214907336429, - "grad_norm": 1.5263757784767031, - "learning_rate": 3.249946049945351e-08, - "loss": 0.98, - "step": 10470 - }, - { - "epoch": 0.9443116742571133, - "grad_norm": 1.6601869827118625, - "learning_rate": 3.239465397004082e-08, - "loss": 0.983, - "step": 10471 - }, - { - "epoch": 0.9444018577805835, - "grad_norm": 2.3326959988426177, - "learning_rate": 3.229001532756559e-08, - "loss": 0.9336, - "step": 10472 - }, - { - "epoch": 0.9444920413040537, - "grad_norm": 1.9292121950493735, - "learning_rate": 3.218554458095602e-08, - "loss": 0.9697, - "step": 10473 - }, - { - "epoch": 0.944582224827524, - "grad_norm": 1.5899801359795185, - "learning_rate": 3.20812417391263e-08, - "loss": 0.9808, - "step": 10474 - }, - { - "epoch": 0.9446724083509943, - "grad_norm": 1.1884466212943494, - "learning_rate": 3.1977106810975764e-08, - "loss": 0.9619, - "step": 10475 - }, - { - "epoch": 0.9447625918744645, - "grad_norm": 0.8314579882740926, - "learning_rate": 3.187313980539042e-08, - "loss": 0.906, - "step": 10476 - }, - { - "epoch": 0.9448527753979348, - "grad_norm": 1.2967178984934677, - "learning_rate": 3.176934073124071e-08, - "loss": 1.0528, - "step": 10477 - }, - { - "epoch": 0.944942958921405, - "grad_norm": 1.8934606299567058, - "learning_rate": 3.166570959738357e-08, - "loss": 0.9687, - "step": 10478 - }, - { - "epoch": 0.9450331424448754, - "grad_norm": 1.4660465439489492, - "learning_rate": 3.1562246412661476e-08, - "loss": 0.9099, - "step": 10479 - }, - { - "epoch": 0.9451233259683456, - "grad_norm": 1.5137774030755022, - "learning_rate": 3.145895118590225e-08, - "loss": 1.0089, - "step": 10480 - }, - { - "epoch": 0.9452135094918158, - "grad_norm": 1.7308925686603933, - "learning_rate": 3.135582392591996e-08, - "loss": 0.8838, - "step": 10481 - }, - { - "epoch": 0.9453036930152862, - "grad_norm": 0.5995737866703575, - "learning_rate": 3.125286464151333e-08, - "loss": 0.7949, - "step": 10482 - }, - { - "epoch": 0.9453938765387564, - "grad_norm": 2.350087866256489, - "learning_rate": 3.115007334146824e-08, - "loss": 0.9394, - "step": 10483 - }, - { - "epoch": 0.9454840600622266, - "grad_norm": 0.6520267790095573, - "learning_rate": 3.104745003455478e-08, - "loss": 0.8409, - "step": 10484 - }, - { - "epoch": 0.9455742435856969, - "grad_norm": 1.4798689862849324, - "learning_rate": 3.094499472952972e-08, - "loss": 0.9874, - "step": 10485 - }, - { - "epoch": 0.9456644271091672, - "grad_norm": 3.0349106854992485, - "learning_rate": 3.084270743513495e-08, - "loss": 0.9371, - "step": 10486 - }, - { - "epoch": 0.9457546106326374, - "grad_norm": 1.3985720947962073, - "learning_rate": 3.074058816009817e-08, - "loss": 0.8606, - "step": 10487 - }, - { - "epoch": 0.9458447941561077, - "grad_norm": 1.5657934954767487, - "learning_rate": 3.063863691313284e-08, - "loss": 0.9212, - "step": 10488 - }, - { - "epoch": 0.9459349776795779, - "grad_norm": 1.5672184462834056, - "learning_rate": 3.0536853702937794e-08, - "loss": 0.9135, - "step": 10489 - }, - { - "epoch": 0.9460251612030482, - "grad_norm": 2.056535477228122, - "learning_rate": 3.043523853819807e-08, - "loss": 0.9708, - "step": 10490 - }, - { - "epoch": 0.9461153447265185, - "grad_norm": 1.6702262377967856, - "learning_rate": 3.0333791427583855e-08, - "loss": 0.985, - "step": 10491 - }, - { - "epoch": 0.9462055282499887, - "grad_norm": 1.3378071779832705, - "learning_rate": 3.023251237975111e-08, - "loss": 1.0906, - "step": 10492 - }, - { - "epoch": 0.946295711773459, - "grad_norm": 0.6642386931999374, - "learning_rate": 3.0131401403341584e-08, - "loss": 0.8586, - "step": 10493 - }, - { - "epoch": 0.9463858952969293, - "grad_norm": 1.2936270153174818, - "learning_rate": 3.00304585069826e-08, - "loss": 0.9517, - "step": 10494 - }, - { - "epoch": 0.9464760788203995, - "grad_norm": 1.7852497368505071, - "learning_rate": 2.992968369928728e-08, - "loss": 0.9585, - "step": 10495 - }, - { - "epoch": 0.9465662623438698, - "grad_norm": 2.519626633368927, - "learning_rate": 2.982907698885429e-08, - "loss": 0.9833, - "step": 10496 - }, - { - "epoch": 0.94665644586734, - "grad_norm": 2.55128910915036, - "learning_rate": 2.9728638384267645e-08, - "loss": 0.9923, - "step": 10497 - }, - { - "epoch": 0.9467466293908103, - "grad_norm": 1.5456248153890029, - "learning_rate": 2.962836789409784e-08, - "loss": 0.9742, - "step": 10498 - }, - { - "epoch": 0.9468368129142806, - "grad_norm": 1.4079612492463252, - "learning_rate": 2.95282655268998e-08, - "loss": 1.0623, - "step": 10499 - }, - { - "epoch": 0.9469269964377508, - "grad_norm": 1.6291618604011893, - "learning_rate": 2.942833129121558e-08, - "loss": 0.9682, - "step": 10500 - }, - { - "epoch": 0.947017179961221, - "grad_norm": 1.7074312836879366, - "learning_rate": 2.9328565195571475e-08, - "loss": 0.8967, - "step": 10501 - }, - { - "epoch": 0.9471073634846914, - "grad_norm": 1.3579503190274813, - "learning_rate": 2.9228967248480675e-08, - "loss": 1.0368, - "step": 10502 - }, - { - "epoch": 0.9471975470081616, - "grad_norm": 1.7453783861255974, - "learning_rate": 2.912953745844082e-08, - "loss": 1.0126, - "step": 10503 - }, - { - "epoch": 0.9472877305316318, - "grad_norm": 1.4217660044719622, - "learning_rate": 2.9030275833936247e-08, - "loss": 0.9617, - "step": 10504 - }, - { - "epoch": 0.9473779140551022, - "grad_norm": 1.5780101772120374, - "learning_rate": 2.893118238343617e-08, - "loss": 0.8767, - "step": 10505 - }, - { - "epoch": 0.9474680975785724, - "grad_norm": 1.7631263945478342, - "learning_rate": 2.8832257115396052e-08, - "loss": 0.8807, - "step": 10506 - }, - { - "epoch": 0.9475582811020427, - "grad_norm": 1.5808937787270592, - "learning_rate": 2.873350003825692e-08, - "loss": 0.9562, - "step": 10507 - }, - { - "epoch": 0.9476484646255129, - "grad_norm": 0.5878060365571988, - "learning_rate": 2.8634911160444696e-08, - "loss": 0.7351, - "step": 10508 - }, - { - "epoch": 0.9477386481489832, - "grad_norm": 1.4289327828606775, - "learning_rate": 2.853649049037199e-08, - "loss": 1.035, - "step": 10509 - }, - { - "epoch": 0.9478288316724535, - "grad_norm": 1.6901303736492166, - "learning_rate": 2.8438238036436525e-08, - "loss": 1.0268, - "step": 10510 - }, - { - "epoch": 0.9479190151959237, - "grad_norm": 1.4792383561648763, - "learning_rate": 2.834015380702137e-08, - "loss": 0.9683, - "step": 10511 - }, - { - "epoch": 0.9480091987193939, - "grad_norm": 0.6066183436146645, - "learning_rate": 2.824223781049606e-08, - "loss": 0.7746, - "step": 10512 - }, - { - "epoch": 0.9480993822428643, - "grad_norm": 1.773027750816841, - "learning_rate": 2.8144490055215465e-08, - "loss": 1.0338, - "step": 10513 - }, - { - "epoch": 0.9481895657663345, - "grad_norm": 1.4263211834835727, - "learning_rate": 2.8046910549519355e-08, - "loss": 1.0073, - "step": 10514 - }, - { - "epoch": 0.9482797492898047, - "grad_norm": 1.404647931824174, - "learning_rate": 2.794949930173418e-08, - "loss": 0.9757, - "step": 10515 - }, - { - "epoch": 0.948369932813275, - "grad_norm": 1.4978990941968935, - "learning_rate": 2.7852256320171296e-08, - "loss": 1.0052, - "step": 10516 - }, - { - "epoch": 0.9484601163367453, - "grad_norm": 1.8044648136292651, - "learning_rate": 2.775518161312851e-08, - "loss": 0.8232, - "step": 10517 - }, - { - "epoch": 0.9485502998602156, - "grad_norm": 1.2998089980147816, - "learning_rate": 2.76582751888883e-08, - "loss": 1.0013, - "step": 10518 - }, - { - "epoch": 0.9486404833836858, - "grad_norm": 2.1256529982802697, - "learning_rate": 2.756153705571962e-08, - "loss": 0.9918, - "step": 10519 - }, - { - "epoch": 0.948730666907156, - "grad_norm": 1.3287264191481023, - "learning_rate": 2.74649672218763e-08, - "loss": 0.9591, - "step": 10520 - }, - { - "epoch": 0.9488208504306264, - "grad_norm": 2.617895594217638, - "learning_rate": 2.7368565695598424e-08, - "loss": 0.9573, - "step": 10521 - }, - { - "epoch": 0.9489110339540966, - "grad_norm": 1.36845086717752, - "learning_rate": 2.727233248511185e-08, - "loss": 0.9012, - "step": 10522 - }, - { - "epoch": 0.9490012174775668, - "grad_norm": 1.5950966923502716, - "learning_rate": 2.71762675986269e-08, - "loss": 0.9253, - "step": 10523 - }, - { - "epoch": 0.9490914010010371, - "grad_norm": 1.505059134756834, - "learning_rate": 2.7080371044341242e-08, - "loss": 1.0119, - "step": 10524 - }, - { - "epoch": 0.9491815845245074, - "grad_norm": 1.6759563790246184, - "learning_rate": 2.6984642830436556e-08, - "loss": 0.9977, - "step": 10525 - }, - { - "epoch": 0.9492717680479776, - "grad_norm": 1.8364974446350315, - "learning_rate": 2.688908296508141e-08, - "loss": 0.955, - "step": 10526 - }, - { - "epoch": 0.9493619515714479, - "grad_norm": 1.8284948904014588, - "learning_rate": 2.679369145642929e-08, - "loss": 1.0214, - "step": 10527 - }, - { - "epoch": 0.9494521350949181, - "grad_norm": 1.5302842656515037, - "learning_rate": 2.669846831261946e-08, - "loss": 0.9919, - "step": 10528 - }, - { - "epoch": 0.9495423186183884, - "grad_norm": 1.55659528831407, - "learning_rate": 2.6603413541776976e-08, - "loss": 0.962, - "step": 10529 - }, - { - "epoch": 0.9496325021418587, - "grad_norm": 1.1934835865664444, - "learning_rate": 2.6508527152012683e-08, - "loss": 0.9291, - "step": 10530 - }, - { - "epoch": 0.9497226856653289, - "grad_norm": 1.3926843546493874, - "learning_rate": 2.641380915142233e-08, - "loss": 0.87, - "step": 10531 - }, - { - "epoch": 0.9498128691887993, - "grad_norm": 2.4387407323992605, - "learning_rate": 2.6319259548088334e-08, - "loss": 0.9896, - "step": 10532 - }, - { - "epoch": 0.9499030527122695, - "grad_norm": 1.7614446794765373, - "learning_rate": 2.6224878350077585e-08, - "loss": 0.9692, - "step": 10533 - }, - { - "epoch": 0.9499932362357397, - "grad_norm": 1.46815624816041, - "learning_rate": 2.6130665565443633e-08, - "loss": 0.8829, - "step": 10534 - }, - { - "epoch": 0.95008341975921, - "grad_norm": 1.2217135466618376, - "learning_rate": 2.603662120222494e-08, - "loss": 0.9707, - "step": 10535 - }, - { - "epoch": 0.9501736032826803, - "grad_norm": 2.0007131020507485, - "learning_rate": 2.59427452684462e-08, - "loss": 0.8184, - "step": 10536 - }, - { - "epoch": 0.9502637868061505, - "grad_norm": 1.5329580317635696, - "learning_rate": 2.5849037772117443e-08, - "loss": 0.9467, - "step": 10537 - }, - { - "epoch": 0.9503539703296208, - "grad_norm": 1.4585104789318213, - "learning_rate": 2.575549872123384e-08, - "loss": 0.9962, - "step": 10538 - }, - { - "epoch": 0.950444153853091, - "grad_norm": 1.265275223989151, - "learning_rate": 2.5662128123776994e-08, - "loss": 0.9702, - "step": 10539 - }, - { - "epoch": 0.9505343373765613, - "grad_norm": 1.4722286180241329, - "learning_rate": 2.5568925987713875e-08, - "loss": 0.9647, - "step": 10540 - }, - { - "epoch": 0.9506245209000316, - "grad_norm": 1.547314436946032, - "learning_rate": 2.5475892320996785e-08, - "loss": 0.9476, - "step": 10541 - }, - { - "epoch": 0.9507147044235018, - "grad_norm": 0.7457632561124746, - "learning_rate": 2.5383027131564038e-08, - "loss": 0.8498, - "step": 10542 - }, - { - "epoch": 0.950804887946972, - "grad_norm": 1.7780444430320337, - "learning_rate": 2.52903304273393e-08, - "loss": 0.9473, - "step": 10543 - }, - { - "epoch": 0.9508950714704424, - "grad_norm": 1.271337732159093, - "learning_rate": 2.519780221623202e-08, - "loss": 0.9522, - "step": 10544 - }, - { - "epoch": 0.9509852549939126, - "grad_norm": 2.6200633176767414, - "learning_rate": 2.510544250613722e-08, - "loss": 0.9256, - "step": 10545 - }, - { - "epoch": 0.9510754385173829, - "grad_norm": 1.5282792058419532, - "learning_rate": 2.501325130493548e-08, - "loss": 1.0644, - "step": 10546 - }, - { - "epoch": 0.9511656220408531, - "grad_norm": 0.7366045526566316, - "learning_rate": 2.4921228620493395e-08, - "loss": 0.8475, - "step": 10547 - }, - { - "epoch": 0.9512558055643234, - "grad_norm": 0.7594094115572475, - "learning_rate": 2.4829374460662244e-08, - "loss": 0.8755, - "step": 10548 - }, - { - "epoch": 0.9513459890877937, - "grad_norm": 1.2919645529953963, - "learning_rate": 2.473768883327976e-08, - "loss": 0.981, - "step": 10549 - }, - { - "epoch": 0.9514361726112639, - "grad_norm": 1.6228457623501025, - "learning_rate": 2.464617174616923e-08, - "loss": 0.9339, - "step": 10550 - }, - { - "epoch": 0.9515263561347341, - "grad_norm": 1.731727710464234, - "learning_rate": 2.455482320713953e-08, - "loss": 0.9673, - "step": 10551 - }, - { - "epoch": 0.9516165396582045, - "grad_norm": 1.6285512706637209, - "learning_rate": 2.4463643223984643e-08, - "loss": 1.0259, - "step": 10552 - }, - { - "epoch": 0.9517067231816747, - "grad_norm": 1.7658561622736557, - "learning_rate": 2.4372631804484567e-08, - "loss": 1.0447, - "step": 10553 - }, - { - "epoch": 0.9517969067051449, - "grad_norm": 1.4506956733603857, - "learning_rate": 2.4281788956405313e-08, - "loss": 1.0493, - "step": 10554 - }, - { - "epoch": 0.9518870902286153, - "grad_norm": 1.8229366265067044, - "learning_rate": 2.4191114687497572e-08, - "loss": 0.9079, - "step": 10555 - }, - { - "epoch": 0.9519772737520855, - "grad_norm": 2.240808255457354, - "learning_rate": 2.4100609005498706e-08, - "loss": 0.9353, - "step": 10556 - }, - { - "epoch": 0.9520674572755558, - "grad_norm": 1.6309818061969257, - "learning_rate": 2.4010271918130764e-08, - "loss": 0.9016, - "step": 10557 - }, - { - "epoch": 0.952157640799026, - "grad_norm": 1.4685343320247324, - "learning_rate": 2.39201034331018e-08, - "loss": 0.9321, - "step": 10558 - }, - { - "epoch": 0.9522478243224963, - "grad_norm": 1.8074263230288228, - "learning_rate": 2.3830103558105663e-08, - "loss": 0.9596, - "step": 10559 - }, - { - "epoch": 0.9523380078459666, - "grad_norm": 2.443119516042047, - "learning_rate": 2.374027230082154e-08, - "loss": 0.9773, - "step": 10560 - }, - { - "epoch": 0.9524281913694368, - "grad_norm": 0.7272806207813706, - "learning_rate": 2.365060966891441e-08, - "loss": 0.864, - "step": 10561 - }, - { - "epoch": 0.952518374892907, - "grad_norm": 1.4259582409676381, - "learning_rate": 2.3561115670034827e-08, - "loss": 0.9659, - "step": 10562 - }, - { - "epoch": 0.9526085584163774, - "grad_norm": 1.4272913630350696, - "learning_rate": 2.3471790311818675e-08, - "loss": 0.9888, - "step": 10563 - }, - { - "epoch": 0.9526987419398476, - "grad_norm": 1.7068586109426356, - "learning_rate": 2.338263360188808e-08, - "loss": 0.9616, - "step": 10564 - }, - { - "epoch": 0.9527889254633178, - "grad_norm": 1.5266054520279624, - "learning_rate": 2.329364554784985e-08, - "loss": 0.9102, - "step": 10565 - }, - { - "epoch": 0.9528791089867881, - "grad_norm": 1.4791837968864159, - "learning_rate": 2.3204826157297465e-08, - "loss": 1.0337, - "step": 10566 - }, - { - "epoch": 0.9529692925102584, - "grad_norm": 1.7968007611404737, - "learning_rate": 2.3116175437809082e-08, - "loss": 0.904, - "step": 10567 - }, - { - "epoch": 0.9530594760337286, - "grad_norm": 0.7150188294178826, - "learning_rate": 2.30276933969491e-08, - "loss": 0.8705, - "step": 10568 - }, - { - "epoch": 0.9531496595571989, - "grad_norm": 1.6209963849713953, - "learning_rate": 2.2939380042267255e-08, - "loss": 1.0263, - "step": 10569 - }, - { - "epoch": 0.9532398430806691, - "grad_norm": 1.6711828635175803, - "learning_rate": 2.2851235381298627e-08, - "loss": 0.8768, - "step": 10570 - }, - { - "epoch": 0.9533300266041395, - "grad_norm": 1.384796271368392, - "learning_rate": 2.2763259421564986e-08, - "loss": 0.8983, - "step": 10571 - }, - { - "epoch": 0.9534202101276097, - "grad_norm": 1.6255877972268737, - "learning_rate": 2.2675452170571873e-08, - "loss": 0.9431, - "step": 10572 - }, - { - "epoch": 0.9535103936510799, - "grad_norm": 1.4847365892597564, - "learning_rate": 2.2587813635812414e-08, - "loss": 0.9639, - "step": 10573 - }, - { - "epoch": 0.9536005771745502, - "grad_norm": 1.9008985979425714, - "learning_rate": 2.2500343824763958e-08, - "loss": 1.0461, - "step": 10574 - }, - { - "epoch": 0.9536907606980205, - "grad_norm": 1.449426024387205, - "learning_rate": 2.2413042744890088e-08, - "loss": 0.9145, - "step": 10575 - }, - { - "epoch": 0.9537809442214907, - "grad_norm": 1.4129322656512662, - "learning_rate": 2.2325910403639514e-08, - "loss": 0.9589, - "step": 10576 - }, - { - "epoch": 0.953871127744961, - "grad_norm": 1.4356525823378934, - "learning_rate": 2.223894680844718e-08, - "loss": 0.9227, - "step": 10577 - }, - { - "epoch": 0.9539613112684313, - "grad_norm": 1.7896786757248802, - "learning_rate": 2.2152151966733146e-08, - "loss": 0.8066, - "step": 10578 - }, - { - "epoch": 0.9540514947919015, - "grad_norm": 1.2029484706030973, - "learning_rate": 2.2065525885903267e-08, - "loss": 0.997, - "step": 10579 - }, - { - "epoch": 0.9541416783153718, - "grad_norm": 1.5298060401443172, - "learning_rate": 2.1979068573348747e-08, - "loss": 1.0192, - "step": 10580 - }, - { - "epoch": 0.954231861838842, - "grad_norm": 1.440579805472032, - "learning_rate": 2.1892780036447013e-08, - "loss": 0.93, - "step": 10581 - }, - { - "epoch": 0.9543220453623124, - "grad_norm": 1.541967371232979, - "learning_rate": 2.1806660282560175e-08, - "loss": 1.1128, - "step": 10582 - }, - { - "epoch": 0.9544122288857826, - "grad_norm": 1.319764514494257, - "learning_rate": 2.1720709319037024e-08, - "loss": 0.9913, - "step": 10583 - }, - { - "epoch": 0.9545024124092528, - "grad_norm": 1.2828550287554559, - "learning_rate": 2.1634927153211023e-08, - "loss": 0.9379, - "step": 10584 - }, - { - "epoch": 0.954592595932723, - "grad_norm": 1.6041193207858957, - "learning_rate": 2.1549313792401437e-08, - "loss": 1.0395, - "step": 10585 - }, - { - "epoch": 0.9546827794561934, - "grad_norm": 1.3768194904817488, - "learning_rate": 2.1463869243913746e-08, - "loss": 1.0574, - "step": 10586 - }, - { - "epoch": 0.9547729629796636, - "grad_norm": 1.71548012154741, - "learning_rate": 2.1378593515037902e-08, - "loss": 0.9494, - "step": 10587 - }, - { - "epoch": 0.9548631465031339, - "grad_norm": 1.5041551260184043, - "learning_rate": 2.129348661305075e-08, - "loss": 1.0356, - "step": 10588 - }, - { - "epoch": 0.9549533300266041, - "grad_norm": 1.472402778485865, - "learning_rate": 2.1208548545213813e-08, - "loss": 0.997, - "step": 10589 - }, - { - "epoch": 0.9550435135500744, - "grad_norm": 2.5963414127160314, - "learning_rate": 2.1123779318774404e-08, - "loss": 0.9284, - "step": 10590 - }, - { - "epoch": 0.9551336970735447, - "grad_norm": 1.32245052064808, - "learning_rate": 2.1039178940965408e-08, - "loss": 0.9098, - "step": 10591 - }, - { - "epoch": 0.9552238805970149, - "grad_norm": 1.9981782484073956, - "learning_rate": 2.0954747419005712e-08, - "loss": 1.0282, - "step": 10592 - }, - { - "epoch": 0.9553140641204851, - "grad_norm": 1.4640758078183036, - "learning_rate": 2.087048476009934e-08, - "loss": 0.9528, - "step": 10593 - }, - { - "epoch": 0.9554042476439555, - "grad_norm": 1.437171898591492, - "learning_rate": 2.0786390971435862e-08, - "loss": 0.9427, - "step": 10594 - }, - { - "epoch": 0.9554944311674257, - "grad_norm": 1.5935231215610224, - "learning_rate": 2.070246606019088e-08, - "loss": 0.9294, - "step": 10595 - }, - { - "epoch": 0.955584614690896, - "grad_norm": 1.9681886145061729, - "learning_rate": 2.0618710033525112e-08, - "loss": 0.885, - "step": 10596 - }, - { - "epoch": 0.9556747982143662, - "grad_norm": 1.5061042081942908, - "learning_rate": 2.053512289858528e-08, - "loss": 0.9303, - "step": 10597 - }, - { - "epoch": 0.9557649817378365, - "grad_norm": 1.909955472477066, - "learning_rate": 2.0451704662503456e-08, - "loss": 1.0058, - "step": 10598 - }, - { - "epoch": 0.9558551652613068, - "grad_norm": 1.5166073005012588, - "learning_rate": 2.0368455332397282e-08, - "loss": 1.0428, - "step": 10599 - }, - { - "epoch": 0.955945348784777, - "grad_norm": 1.4131327992375469, - "learning_rate": 2.0285374915369967e-08, - "loss": 0.9817, - "step": 10600 - }, - { - "epoch": 0.9560355323082473, - "grad_norm": 2.580436271627089, - "learning_rate": 2.020246341851073e-08, - "loss": 1.0038, - "step": 10601 - }, - { - "epoch": 0.9561257158317176, - "grad_norm": 1.4189442203357376, - "learning_rate": 2.0119720848893463e-08, - "loss": 1.0119, - "step": 10602 - }, - { - "epoch": 0.9562158993551878, - "grad_norm": 1.4555551041577313, - "learning_rate": 2.0037147213578964e-08, - "loss": 0.9236, - "step": 10603 - }, - { - "epoch": 0.956306082878658, - "grad_norm": 1.3928833101420601, - "learning_rate": 1.9954742519612265e-08, - "loss": 0.997, - "step": 10604 - }, - { - "epoch": 0.9563962664021284, - "grad_norm": 1.4527788467759883, - "learning_rate": 1.9872506774024633e-08, - "loss": 0.9402, - "step": 10605 - }, - { - "epoch": 0.9564864499255986, - "grad_norm": 1.4325384678760218, - "learning_rate": 1.979043998383334e-08, - "loss": 0.9747, - "step": 10606 - }, - { - "epoch": 0.9565766334490688, - "grad_norm": 1.2352315128180584, - "learning_rate": 1.970854215604034e-08, - "loss": 0.9172, - "step": 10607 - }, - { - "epoch": 0.9566668169725391, - "grad_norm": 1.796363956922082, - "learning_rate": 1.9626813297633826e-08, - "loss": 0.9442, - "step": 10608 - }, - { - "epoch": 0.9567570004960094, - "grad_norm": 1.650756217597045, - "learning_rate": 1.954525341558688e-08, - "loss": 0.8657, - "step": 10609 - }, - { - "epoch": 0.9568471840194797, - "grad_norm": 1.9046189881818416, - "learning_rate": 1.9463862516859498e-08, - "loss": 0.931, - "step": 10610 - }, - { - "epoch": 0.9569373675429499, - "grad_norm": 1.5592739370617887, - "learning_rate": 1.938264060839545e-08, - "loss": 1.0518, - "step": 10611 - }, - { - "epoch": 0.9570275510664201, - "grad_norm": 1.750271912294708, - "learning_rate": 1.9301587697126086e-08, - "loss": 0.9468, - "step": 10612 - }, - { - "epoch": 0.9571177345898905, - "grad_norm": 2.492819105132152, - "learning_rate": 1.9220703789966318e-08, - "loss": 0.9251, - "step": 10613 - }, - { - "epoch": 0.9572079181133607, - "grad_norm": 1.4764149102618993, - "learning_rate": 1.913998889381818e-08, - "loss": 0.9268, - "step": 10614 - }, - { - "epoch": 0.9572981016368309, - "grad_norm": 1.3950020131742655, - "learning_rate": 1.9059443015568387e-08, - "loss": 0.9753, - "step": 10615 - }, - { - "epoch": 0.9573882851603012, - "grad_norm": 1.7941694374021737, - "learning_rate": 1.8979066162089884e-08, - "loss": 0.9781, - "step": 10616 - }, - { - "epoch": 0.9574784686837715, - "grad_norm": 1.530346896046174, - "learning_rate": 1.889885834024052e-08, - "loss": 0.9699, - "step": 10617 - }, - { - "epoch": 0.9575686522072417, - "grad_norm": 1.4066702320782882, - "learning_rate": 1.8818819556864374e-08, - "loss": 0.9265, - "step": 10618 - }, - { - "epoch": 0.957658835730712, - "grad_norm": 1.588373652677294, - "learning_rate": 1.873894981879065e-08, - "loss": 0.9562, - "step": 10619 - }, - { - "epoch": 0.9577490192541822, - "grad_norm": 0.6191557632665406, - "learning_rate": 1.8659249132834342e-08, - "loss": 0.8329, - "step": 10620 - }, - { - "epoch": 0.9578392027776526, - "grad_norm": 1.2511880474016015, - "learning_rate": 1.857971750579579e-08, - "loss": 0.9495, - "step": 10621 - }, - { - "epoch": 0.9579293863011228, - "grad_norm": 1.2882458035952036, - "learning_rate": 1.8500354944461116e-08, - "loss": 0.8541, - "step": 10622 - }, - { - "epoch": 0.958019569824593, - "grad_norm": 1.2559341242304205, - "learning_rate": 1.8421161455602242e-08, - "loss": 0.9774, - "step": 10623 - }, - { - "epoch": 0.9581097533480634, - "grad_norm": 1.3063490560837374, - "learning_rate": 1.834213704597598e-08, - "loss": 0.9999, - "step": 10624 - }, - { - "epoch": 0.9581999368715336, - "grad_norm": 1.3737193559416911, - "learning_rate": 1.8263281722325385e-08, - "loss": 0.9788, - "step": 10625 - }, - { - "epoch": 0.9582901203950038, - "grad_norm": 1.5108038132075798, - "learning_rate": 1.818459549137885e-08, - "loss": 0.9798, - "step": 10626 - }, - { - "epoch": 0.9583803039184741, - "grad_norm": 1.6670395275113905, - "learning_rate": 1.8106078359850117e-08, - "loss": 0.9559, - "step": 10627 - }, - { - "epoch": 0.9584704874419444, - "grad_norm": 1.6193408727196836, - "learning_rate": 1.802773033443894e-08, - "loss": 0.983, - "step": 10628 - }, - { - "epoch": 0.9585606709654146, - "grad_norm": 1.5777780132487544, - "learning_rate": 1.7949551421830413e-08, - "loss": 0.9394, - "step": 10629 - }, - { - "epoch": 0.9586508544888849, - "grad_norm": 1.4351997215236705, - "learning_rate": 1.7871541628694752e-08, - "loss": 0.9393, - "step": 10630 - }, - { - "epoch": 0.9587410380123551, - "grad_norm": 1.3443236346432132, - "learning_rate": 1.779370096168864e-08, - "loss": 0.8786, - "step": 10631 - }, - { - "epoch": 0.9588312215358255, - "grad_norm": 1.4198946733768272, - "learning_rate": 1.771602942745387e-08, - "loss": 0.9308, - "step": 10632 - }, - { - "epoch": 0.9589214050592957, - "grad_norm": 1.8287478958920873, - "learning_rate": 1.763852703261759e-08, - "loss": 0.9501, - "step": 10633 - }, - { - "epoch": 0.9590115885827659, - "grad_norm": 1.3944872680908975, - "learning_rate": 1.756119378379295e-08, - "loss": 1.0265, - "step": 10634 - }, - { - "epoch": 0.9591017721062362, - "grad_norm": 1.5325391364273202, - "learning_rate": 1.7484029687578005e-08, - "loss": 1.063, - "step": 10635 - }, - { - "epoch": 0.9591919556297065, - "grad_norm": 1.2992870925297426, - "learning_rate": 1.740703475055727e-08, - "loss": 0.944, - "step": 10636 - }, - { - "epoch": 0.9592821391531767, - "grad_norm": 1.3003039243195766, - "learning_rate": 1.7330208979300153e-08, - "loss": 1.0094, - "step": 10637 - }, - { - "epoch": 0.959372322676647, - "grad_norm": 1.2276191776325172, - "learning_rate": 1.725355238036208e-08, - "loss": 1.0105, - "step": 10638 - }, - { - "epoch": 0.9594625062001172, - "grad_norm": 1.5798627143078543, - "learning_rate": 1.7177064960283594e-08, - "loss": 1.0181, - "step": 10639 - }, - { - "epoch": 0.9595526897235875, - "grad_norm": 2.481456746056648, - "learning_rate": 1.7100746725591253e-08, - "loss": 0.9629, - "step": 10640 - }, - { - "epoch": 0.9596428732470578, - "grad_norm": 1.8864471682367847, - "learning_rate": 1.7024597682796517e-08, - "loss": 0.9632, - "step": 10641 - }, - { - "epoch": 0.959733056770528, - "grad_norm": 1.5757405613878759, - "learning_rate": 1.6948617838397293e-08, - "loss": 1.038, - "step": 10642 - }, - { - "epoch": 0.9598232402939982, - "grad_norm": 1.1762996810653217, - "learning_rate": 1.6872807198876404e-08, - "loss": 0.9973, - "step": 10643 - }, - { - "epoch": 0.9599134238174686, - "grad_norm": 1.499453286038928, - "learning_rate": 1.679716577070245e-08, - "loss": 1.0598, - "step": 10644 - }, - { - "epoch": 0.9600036073409388, - "grad_norm": 1.3791657546732827, - "learning_rate": 1.6721693560329596e-08, - "loss": 1.0609, - "step": 10645 - }, - { - "epoch": 0.960093790864409, - "grad_norm": 1.7520344352778263, - "learning_rate": 1.6646390574197366e-08, - "loss": 0.8802, - "step": 10646 - }, - { - "epoch": 0.9601839743878793, - "grad_norm": 0.8282631093146959, - "learning_rate": 1.6571256818731504e-08, - "loss": 0.8878, - "step": 10647 - }, - { - "epoch": 0.9602741579113496, - "grad_norm": 1.4293677801125835, - "learning_rate": 1.6496292300342218e-08, - "loss": 0.9042, - "step": 10648 - }, - { - "epoch": 0.9603643414348199, - "grad_norm": 2.315291395365361, - "learning_rate": 1.642149702542639e-08, - "loss": 0.9102, - "step": 10649 - }, - { - "epoch": 0.9604545249582901, - "grad_norm": 1.3588747880023324, - "learning_rate": 1.634687100036558e-08, - "loss": 0.9386, - "step": 10650 - }, - { - "epoch": 0.9605447084817604, - "grad_norm": 1.4858547664999893, - "learning_rate": 1.627241423152781e-08, - "loss": 0.9021, - "step": 10651 - }, - { - "epoch": 0.9606348920052307, - "grad_norm": 1.7560041153670556, - "learning_rate": 1.619812672526555e-08, - "loss": 1.0235, - "step": 10652 - }, - { - "epoch": 0.9607250755287009, - "grad_norm": 1.6837191435458145, - "learning_rate": 1.6124008487917727e-08, - "loss": 0.9535, - "step": 10653 - }, - { - "epoch": 0.9608152590521711, - "grad_norm": 0.6622617878093394, - "learning_rate": 1.6050059525808623e-08, - "loss": 0.8588, - "step": 10654 - }, - { - "epoch": 0.9609054425756415, - "grad_norm": 0.669964424761676, - "learning_rate": 1.597627984524763e-08, - "loss": 0.8434, - "step": 10655 - }, - { - "epoch": 0.9609956260991117, - "grad_norm": 1.3132897680257107, - "learning_rate": 1.590266945253038e-08, - "loss": 1.0071, - "step": 10656 - }, - { - "epoch": 0.9610858096225819, - "grad_norm": 2.066853754230263, - "learning_rate": 1.582922835393763e-08, - "loss": 0.9775, - "step": 10657 - }, - { - "epoch": 0.9611759931460522, - "grad_norm": 1.923534266302245, - "learning_rate": 1.5755956555735473e-08, - "loss": 0.9477, - "step": 10658 - }, - { - "epoch": 0.9612661766695225, - "grad_norm": 0.8053869522252246, - "learning_rate": 1.5682854064176244e-08, - "loss": 0.9496, - "step": 10659 - }, - { - "epoch": 0.9613563601929928, - "grad_norm": 1.3390482773798906, - "learning_rate": 1.5609920885497395e-08, - "loss": 0.9018, - "step": 10660 - }, - { - "epoch": 0.961446543716463, - "grad_norm": 1.5586238083773292, - "learning_rate": 1.5537157025921732e-08, - "loss": 0.9874, - "step": 10661 - }, - { - "epoch": 0.9615367272399332, - "grad_norm": 1.724909271397372, - "learning_rate": 1.5464562491658285e-08, - "loss": 0.9457, - "step": 10662 - }, - { - "epoch": 0.9616269107634036, - "grad_norm": 1.3489768371883977, - "learning_rate": 1.5392137288900764e-08, - "loss": 1.0064, - "step": 10663 - }, - { - "epoch": 0.9617170942868738, - "grad_norm": 0.6125960440168262, - "learning_rate": 1.531988142382934e-08, - "loss": 0.8464, - "step": 10664 - }, - { - "epoch": 0.961807277810344, - "grad_norm": 1.353887069398992, - "learning_rate": 1.5247794902608634e-08, - "loss": 0.9053, - "step": 10665 - }, - { - "epoch": 0.9618974613338143, - "grad_norm": 1.5842525758413835, - "learning_rate": 1.5175877731390398e-08, - "loss": 0.9254, - "step": 10666 - }, - { - "epoch": 0.9619876448572846, - "grad_norm": 1.5189013119269912, - "learning_rate": 1.510412991631016e-08, - "loss": 0.9986, - "step": 10667 - }, - { - "epoch": 0.9620778283807548, - "grad_norm": 1.1658431393636093, - "learning_rate": 1.503255146349014e-08, - "loss": 0.9954, - "step": 10668 - }, - { - "epoch": 0.9621680119042251, - "grad_norm": 1.853964941073947, - "learning_rate": 1.4961142379037893e-08, - "loss": 0.9365, - "step": 10669 - }, - { - "epoch": 0.9622581954276953, - "grad_norm": 1.2839182540445664, - "learning_rate": 1.4889902669046327e-08, - "loss": 1.0085, - "step": 10670 - }, - { - "epoch": 0.9623483789511657, - "grad_norm": 1.48453842066599, - "learning_rate": 1.4818832339594135e-08, - "loss": 0.9638, - "step": 10671 - }, - { - "epoch": 0.9624385624746359, - "grad_norm": 1.4088497291067283, - "learning_rate": 1.474793139674535e-08, - "loss": 0.9188, - "step": 10672 - }, - { - "epoch": 0.9625287459981061, - "grad_norm": 1.6380839697026752, - "learning_rate": 1.4677199846549581e-08, - "loss": 0.9634, - "step": 10673 - }, - { - "epoch": 0.9626189295215765, - "grad_norm": 1.8986349120931452, - "learning_rate": 1.4606637695042224e-08, - "loss": 0.9709, - "step": 10674 - }, - { - "epoch": 0.9627091130450467, - "grad_norm": 1.6375249729727406, - "learning_rate": 1.4536244948243793e-08, - "loss": 1.067, - "step": 10675 - }, - { - "epoch": 0.9627992965685169, - "grad_norm": 1.6773504988201882, - "learning_rate": 1.4466021612160595e-08, - "loss": 0.9414, - "step": 10676 - }, - { - "epoch": 0.9628894800919872, - "grad_norm": 1.7887858214347017, - "learning_rate": 1.4395967692784505e-08, - "loss": 0.9323, - "step": 10677 - }, - { - "epoch": 0.9629796636154575, - "grad_norm": 1.3174089184571665, - "learning_rate": 1.4326083196092963e-08, - "loss": 0.9042, - "step": 10678 - }, - { - "epoch": 0.9630698471389277, - "grad_norm": 1.359428536248795, - "learning_rate": 1.42563681280492e-08, - "loss": 0.9823, - "step": 10679 - }, - { - "epoch": 0.963160030662398, - "grad_norm": 1.3303968857034278, - "learning_rate": 1.4186822494600902e-08, - "loss": 0.9436, - "step": 10680 - }, - { - "epoch": 0.9632502141858682, - "grad_norm": 1.3729588791064888, - "learning_rate": 1.4117446301682877e-08, - "loss": 0.9582, - "step": 10681 - }, - { - "epoch": 0.9633403977093385, - "grad_norm": 1.3265354742345834, - "learning_rate": 1.4048239555214392e-08, - "loss": 0.9213, - "step": 10682 - }, - { - "epoch": 0.9634305812328088, - "grad_norm": 1.5870603147750133, - "learning_rate": 1.3979202261100497e-08, - "loss": 1.0001, - "step": 10683 - }, - { - "epoch": 0.963520764756279, - "grad_norm": 1.7302335488879845, - "learning_rate": 1.3910334425231817e-08, - "loss": 1.0273, - "step": 10684 - }, - { - "epoch": 0.9636109482797492, - "grad_norm": 1.519974121827135, - "learning_rate": 1.384163605348454e-08, - "loss": 0.9686, - "step": 10685 - }, - { - "epoch": 0.9637011318032196, - "grad_norm": 1.538905179266122, - "learning_rate": 1.3773107151720642e-08, - "loss": 1.0282, - "step": 10686 - }, - { - "epoch": 0.9637913153266898, - "grad_norm": 1.5228024455113736, - "learning_rate": 1.3704747725787003e-08, - "loss": 1.0239, - "step": 10687 - }, - { - "epoch": 0.9638814988501601, - "grad_norm": 0.7222836745437704, - "learning_rate": 1.3636557781516512e-08, - "loss": 0.8664, - "step": 10688 - }, - { - "epoch": 0.9639716823736303, - "grad_norm": 1.7204064450091592, - "learning_rate": 1.3568537324727847e-08, - "loss": 0.9737, - "step": 10689 - }, - { - "epoch": 0.9640618658971006, - "grad_norm": 1.5210029124295392, - "learning_rate": 1.3500686361224589e-08, - "loss": 0.9656, - "step": 10690 - }, - { - "epoch": 0.9641520494205709, - "grad_norm": 1.5987415311706863, - "learning_rate": 1.3433004896796108e-08, - "loss": 0.9746, - "step": 10691 - }, - { - "epoch": 0.9642422329440411, - "grad_norm": 1.457632641261205, - "learning_rate": 1.336549293721756e-08, - "loss": 0.8577, - "step": 10692 - }, - { - "epoch": 0.9643324164675113, - "grad_norm": 0.7060410958613563, - "learning_rate": 1.3298150488249227e-08, - "loss": 0.8454, - "step": 10693 - }, - { - "epoch": 0.9644225999909817, - "grad_norm": 1.6283092548278137, - "learning_rate": 1.3230977555637401e-08, - "loss": 1.0448, - "step": 10694 - }, - { - "epoch": 0.9645127835144519, - "grad_norm": 1.2461966349540854, - "learning_rate": 1.3163974145113499e-08, - "loss": 0.9245, - "step": 10695 - }, - { - "epoch": 0.9646029670379221, - "grad_norm": 1.727237966292647, - "learning_rate": 1.3097140262394723e-08, - "loss": 0.8701, - "step": 10696 - }, - { - "epoch": 0.9646931505613925, - "grad_norm": 1.9116807092795511, - "learning_rate": 1.303047591318318e-08, - "loss": 1.0055, - "step": 10697 - }, - { - "epoch": 0.9647833340848627, - "grad_norm": 1.688432365267054, - "learning_rate": 1.2963981103167875e-08, - "loss": 0.9024, - "step": 10698 - }, - { - "epoch": 0.964873517608333, - "grad_norm": 1.3404136757920542, - "learning_rate": 1.2897655838021825e-08, - "loss": 0.9491, - "step": 10699 - }, - { - "epoch": 0.9649637011318032, - "grad_norm": 2.543638267218916, - "learning_rate": 1.2831500123404726e-08, - "loss": 0.977, - "step": 10700 - }, - { - "epoch": 0.9650538846552735, - "grad_norm": 1.9263507240889122, - "learning_rate": 1.2765513964961172e-08, - "loss": 1.1215, - "step": 10701 - }, - { - "epoch": 0.9651440681787438, - "grad_norm": 1.4693239592104885, - "learning_rate": 1.2699697368321549e-08, - "loss": 0.9596, - "step": 10702 - }, - { - "epoch": 0.965234251702214, - "grad_norm": 2.3673048336548375, - "learning_rate": 1.2634050339101366e-08, - "loss": 0.9512, - "step": 10703 - }, - { - "epoch": 0.9653244352256842, - "grad_norm": 1.2100597522894139, - "learning_rate": 1.2568572882902361e-08, - "loss": 0.9433, - "step": 10704 - }, - { - "epoch": 0.9654146187491546, - "grad_norm": 2.2819840804739147, - "learning_rate": 1.2503265005311402e-08, - "loss": 0.9412, - "step": 10705 - }, - { - "epoch": 0.9655048022726248, - "grad_norm": 1.675903813786267, - "learning_rate": 1.2438126711900698e-08, - "loss": 0.9375, - "step": 10706 - }, - { - "epoch": 0.965594985796095, - "grad_norm": 1.323759916590792, - "learning_rate": 1.2373158008228247e-08, - "loss": 0.8187, - "step": 10707 - }, - { - "epoch": 0.9656851693195653, - "grad_norm": 1.3227113679598803, - "learning_rate": 1.2308358899837833e-08, - "loss": 0.9388, - "step": 10708 - }, - { - "epoch": 0.9657753528430356, - "grad_norm": 1.4632368302005978, - "learning_rate": 1.224372939225815e-08, - "loss": 0.9464, - "step": 10709 - }, - { - "epoch": 0.9658655363665059, - "grad_norm": 1.3975291365450564, - "learning_rate": 1.2179269491003674e-08, - "loss": 1.0545, - "step": 10710 - }, - { - "epoch": 0.9659557198899761, - "grad_norm": 1.3742498172026372, - "learning_rate": 1.2114979201574894e-08, - "loss": 0.8426, - "step": 10711 - }, - { - "epoch": 0.9660459034134463, - "grad_norm": 1.4561701216503709, - "learning_rate": 1.2050858529456975e-08, - "loss": 0.9871, - "step": 10712 - }, - { - "epoch": 0.9661360869369167, - "grad_norm": 1.3954132880945198, - "learning_rate": 1.1986907480121545e-08, - "loss": 0.9681, - "step": 10713 - }, - { - "epoch": 0.9662262704603869, - "grad_norm": 1.8424218776916237, - "learning_rate": 1.192312605902468e-08, - "loss": 0.9876, - "step": 10714 - }, - { - "epoch": 0.9663164539838571, - "grad_norm": 0.8167553388049197, - "learning_rate": 1.1859514271608917e-08, - "loss": 0.8842, - "step": 10715 - }, - { - "epoch": 0.9664066375073274, - "grad_norm": 1.967505470374911, - "learning_rate": 1.1796072123301914e-08, - "loss": 0.9983, - "step": 10716 - }, - { - "epoch": 0.9664968210307977, - "grad_norm": 0.6708716063707698, - "learning_rate": 1.1732799619516897e-08, - "loss": 0.8626, - "step": 10717 - }, - { - "epoch": 0.9665870045542679, - "grad_norm": 1.551927349607211, - "learning_rate": 1.1669696765652659e-08, - "loss": 0.9541, - "step": 10718 - }, - { - "epoch": 0.9666771880777382, - "grad_norm": 1.4014834259419562, - "learning_rate": 1.1606763567093336e-08, - "loss": 0.9999, - "step": 10719 - }, - { - "epoch": 0.9667673716012085, - "grad_norm": 0.6865494833030477, - "learning_rate": 1.1544000029208857e-08, - "loss": 0.8414, - "step": 10720 - }, - { - "epoch": 0.9668575551246787, - "grad_norm": 1.8507124482647512, - "learning_rate": 1.148140615735449e-08, - "loss": 0.8346, - "step": 10721 - }, - { - "epoch": 0.966947738648149, - "grad_norm": 1.397811536441081, - "learning_rate": 1.1418981956871076e-08, - "loss": 0.9449, - "step": 10722 - }, - { - "epoch": 0.9670379221716192, - "grad_norm": 2.158188321852513, - "learning_rate": 1.1356727433085245e-08, - "loss": 0.9602, - "step": 10723 - }, - { - "epoch": 0.9671281056950896, - "grad_norm": 1.4555256200256825, - "learning_rate": 1.1294642591308524e-08, - "loss": 0.993, - "step": 10724 - }, - { - "epoch": 0.9672182892185598, - "grad_norm": 1.2508423351308717, - "learning_rate": 1.1232727436838452e-08, - "loss": 0.9567, - "step": 10725 - }, - { - "epoch": 0.96730847274203, - "grad_norm": 1.4239546270910595, - "learning_rate": 1.1170981974958138e-08, - "loss": 1.0153, - "step": 10726 - }, - { - "epoch": 0.9673986562655003, - "grad_norm": 1.44686278632593, - "learning_rate": 1.1109406210936035e-08, - "loss": 1.0323, - "step": 10727 - }, - { - "epoch": 0.9674888397889706, - "grad_norm": 1.4883662322204132, - "learning_rate": 1.1048000150025939e-08, - "loss": 1.0882, - "step": 10728 - }, - { - "epoch": 0.9675790233124408, - "grad_norm": 2.638560554646626, - "learning_rate": 1.0986763797467213e-08, - "loss": 0.8683, - "step": 10729 - }, - { - "epoch": 0.9676692068359111, - "grad_norm": 0.5993814438999555, - "learning_rate": 1.0925697158485459e-08, - "loss": 0.7908, - "step": 10730 - }, - { - "epoch": 0.9677593903593813, - "grad_norm": 1.3396685071399275, - "learning_rate": 1.0864800238290727e-08, - "loss": 0.972, - "step": 10731 - }, - { - "epoch": 0.9678495738828516, - "grad_norm": 1.4854348665311108, - "learning_rate": 1.0804073042079309e-08, - "loss": 0.9326, - "step": 10732 - }, - { - "epoch": 0.9679397574063219, - "grad_norm": 0.6651481281310669, - "learning_rate": 1.0743515575032392e-08, - "loss": 0.8455, - "step": 10733 - }, - { - "epoch": 0.9680299409297921, - "grad_norm": 1.4217384188941522, - "learning_rate": 1.0683127842317619e-08, - "loss": 0.9771, - "step": 10734 - }, - { - "epoch": 0.9681201244532623, - "grad_norm": 9.316402564857762, - "learning_rate": 1.0622909849087314e-08, - "loss": 0.9922, - "step": 10735 - }, - { - "epoch": 0.9682103079767327, - "grad_norm": 1.7048459239040692, - "learning_rate": 1.0562861600479588e-08, - "loss": 1.0279, - "step": 10736 - }, - { - "epoch": 0.9683004915002029, - "grad_norm": 1.4637329772949712, - "learning_rate": 1.0502983101618345e-08, - "loss": 1.0574, - "step": 10737 - }, - { - "epoch": 0.9683906750236732, - "grad_norm": 1.4569011558564262, - "learning_rate": 1.0443274357612386e-08, - "loss": 0.8412, - "step": 10738 - }, - { - "epoch": 0.9684808585471434, - "grad_norm": 1.4053208990611623, - "learning_rate": 1.0383735373556524e-08, - "loss": 0.9848, - "step": 10739 - }, - { - "epoch": 0.9685710420706137, - "grad_norm": 1.2791969784470134, - "learning_rate": 1.0324366154531139e-08, - "loss": 1.0003, - "step": 10740 - }, - { - "epoch": 0.968661225594084, - "grad_norm": 1.5362696989827975, - "learning_rate": 1.0265166705601735e-08, - "loss": 0.933, - "step": 10741 - }, - { - "epoch": 0.9687514091175542, - "grad_norm": 0.7265825166303552, - "learning_rate": 1.0206137031819606e-08, - "loss": 0.9116, - "step": 10742 - }, - { - "epoch": 0.9688415926410244, - "grad_norm": 1.468729059597648, - "learning_rate": 1.0147277138221388e-08, - "loss": 0.9821, - "step": 10743 - }, - { - "epoch": 0.9689317761644948, - "grad_norm": 1.6562564058000018, - "learning_rate": 1.0088587029829287e-08, - "loss": 1.0079, - "step": 10744 - }, - { - "epoch": 0.969021959687965, - "grad_norm": 1.3354748376484626, - "learning_rate": 1.003006671165152e-08, - "loss": 1.064, - "step": 10745 - }, - { - "epoch": 0.9691121432114352, - "grad_norm": 1.4317516164182713, - "learning_rate": 9.971716188680978e-09, - "loss": 0.9609, - "step": 10746 - }, - { - "epoch": 0.9692023267349056, - "grad_norm": 1.5553252479194812, - "learning_rate": 9.91353546589635e-09, - "loss": 0.9372, - "step": 10747 - }, - { - "epoch": 0.9692925102583758, - "grad_norm": 1.4101961928017988, - "learning_rate": 9.855524548262106e-09, - "loss": 0.9058, - "step": 10748 - }, - { - "epoch": 0.969382693781846, - "grad_norm": 1.5834746381380616, - "learning_rate": 9.797683440728288e-09, - "loss": 0.9107, - "step": 10749 - }, - { - "epoch": 0.9694728773053163, - "grad_norm": 1.6077136010243558, - "learning_rate": 9.740012148229836e-09, - "loss": 0.8869, - "step": 10750 - }, - { - "epoch": 0.9695630608287866, - "grad_norm": 1.697241323141253, - "learning_rate": 9.682510675687705e-09, - "loss": 0.9231, - "step": 10751 - }, - { - "epoch": 0.9696532443522569, - "grad_norm": 1.4704573427178802, - "learning_rate": 9.625179028008191e-09, - "loss": 0.8885, - "step": 10752 - }, - { - "epoch": 0.9697434278757271, - "grad_norm": 1.4359339600894037, - "learning_rate": 9.568017210083379e-09, - "loss": 0.8166, - "step": 10753 - }, - { - "epoch": 0.9698336113991973, - "grad_norm": 1.662795036351823, - "learning_rate": 9.511025226790259e-09, - "loss": 0.9476, - "step": 10754 - }, - { - "epoch": 0.9699237949226677, - "grad_norm": 1.4198024022607703, - "learning_rate": 9.454203082992052e-09, - "loss": 0.9434, - "step": 10755 - }, - { - "epoch": 0.9700139784461379, - "grad_norm": 1.5229965745633152, - "learning_rate": 9.3975507835371e-09, - "loss": 0.8719, - "step": 10756 - }, - { - "epoch": 0.9701041619696081, - "grad_norm": 0.646870385489844, - "learning_rate": 9.341068333259094e-09, - "loss": 0.8478, - "step": 10757 - }, - { - "epoch": 0.9701943454930784, - "grad_norm": 1.5216130213283616, - "learning_rate": 9.28475573697729e-09, - "loss": 0.9459, - "step": 10758 - }, - { - "epoch": 0.9702845290165487, - "grad_norm": 2.2484059515165273, - "learning_rate": 9.228612999497177e-09, - "loss": 0.9704, - "step": 10759 - }, - { - "epoch": 0.970374712540019, - "grad_norm": 1.9985318159405874, - "learning_rate": 9.172640125608478e-09, - "loss": 0.9163, - "step": 10760 - }, - { - "epoch": 0.9704648960634892, - "grad_norm": 1.3710446253830946, - "learning_rate": 9.116837120087817e-09, - "loss": 0.9793, - "step": 10761 - }, - { - "epoch": 0.9705550795869594, - "grad_norm": 1.7978276919925376, - "learning_rate": 9.061203987695832e-09, - "loss": 1.044, - "step": 10762 - }, - { - "epoch": 0.9706452631104298, - "grad_norm": 1.7185718790870712, - "learning_rate": 9.005740733180055e-09, - "loss": 0.9147, - "step": 10763 - }, - { - "epoch": 0.9707354466339, - "grad_norm": 1.433645387805745, - "learning_rate": 8.950447361272483e-09, - "loss": 1.0542, - "step": 10764 - }, - { - "epoch": 0.9708256301573702, - "grad_norm": 1.7034587708039004, - "learning_rate": 8.895323876691784e-09, - "loss": 0.9737, - "step": 10765 - }, - { - "epoch": 0.9709158136808405, - "grad_norm": 1.8189413776189252, - "learning_rate": 8.840370284140419e-09, - "loss": 1.0209, - "step": 10766 - }, - { - "epoch": 0.9710059972043108, - "grad_norm": 2.23523690199185, - "learning_rate": 8.78558658830797e-09, - "loss": 0.8794, - "step": 10767 - }, - { - "epoch": 0.971096180727781, - "grad_norm": 0.6977520055724966, - "learning_rate": 8.730972793868696e-09, - "loss": 0.8594, - "step": 10768 - }, - { - "epoch": 0.9711863642512513, - "grad_norm": 1.9552746958810006, - "learning_rate": 8.67652890548265e-09, - "loss": 0.9268, - "step": 10769 - }, - { - "epoch": 0.9712765477747216, - "grad_norm": 1.32465897616026, - "learning_rate": 8.622254927795004e-09, - "loss": 0.942, - "step": 10770 - }, - { - "epoch": 0.9713667312981918, - "grad_norm": 1.7474420108369242, - "learning_rate": 8.568150865436941e-09, - "loss": 0.927, - "step": 10771 - }, - { - "epoch": 0.9714569148216621, - "grad_norm": 1.289872625849125, - "learning_rate": 8.514216723024991e-09, - "loss": 0.9736, - "step": 10772 - }, - { - "epoch": 0.9715470983451323, - "grad_norm": 1.8673647872203265, - "learning_rate": 8.460452505161031e-09, - "loss": 0.8871, - "step": 10773 - }, - { - "epoch": 0.9716372818686027, - "grad_norm": 1.660576693083846, - "learning_rate": 8.4068582164325e-09, - "loss": 0.9934, - "step": 10774 - }, - { - "epoch": 0.9717274653920729, - "grad_norm": 1.6059757369596985, - "learning_rate": 8.353433861412406e-09, - "loss": 0.9467, - "step": 10775 - }, - { - "epoch": 0.9718176489155431, - "grad_norm": 1.376513341920088, - "learning_rate": 8.300179444658883e-09, - "loss": 0.9216, - "step": 10776 - }, - { - "epoch": 0.9719078324390134, - "grad_norm": 1.7195154739619827, - "learning_rate": 8.247094970716296e-09, - "loss": 1.0601, - "step": 10777 - }, - { - "epoch": 0.9719980159624837, - "grad_norm": 1.3760249479299946, - "learning_rate": 8.19418044411413e-09, - "loss": 0.9973, - "step": 10778 - }, - { - "epoch": 0.9720881994859539, - "grad_norm": 1.2826948160699765, - "learning_rate": 8.141435869367219e-09, - "loss": 0.9665, - "step": 10779 - }, - { - "epoch": 0.9721783830094242, - "grad_norm": 1.4009649782806697, - "learning_rate": 8.088861250975742e-09, - "loss": 1.0025, - "step": 10780 - }, - { - "epoch": 0.9722685665328944, - "grad_norm": 1.7669493847880051, - "learning_rate": 8.036456593426111e-09, - "loss": 0.9411, - "step": 10781 - }, - { - "epoch": 0.9723587500563647, - "grad_norm": 2.0185272849797, - "learning_rate": 7.984221901189415e-09, - "loss": 1.0001, - "step": 10782 - }, - { - "epoch": 0.972448933579835, - "grad_norm": 3.520730207087881, - "learning_rate": 7.932157178722976e-09, - "loss": 0.8902, - "step": 10783 - }, - { - "epoch": 0.9725391171033052, - "grad_norm": 1.537790382689767, - "learning_rate": 7.880262430468799e-09, - "loss": 0.9393, - "step": 10784 - }, - { - "epoch": 0.9726293006267754, - "grad_norm": 1.584103430472985, - "learning_rate": 7.828537660855339e-09, - "loss": 0.903, - "step": 10785 - }, - { - "epoch": 0.9727194841502458, - "grad_norm": 1.7524228354102735, - "learning_rate": 7.776982874295512e-09, - "loss": 0.9743, - "step": 10786 - }, - { - "epoch": 0.972809667673716, - "grad_norm": 1.8452127117983517, - "learning_rate": 7.725598075188688e-09, - "loss": 0.8711, - "step": 10787 - }, - { - "epoch": 0.9728998511971862, - "grad_norm": 1.2323492787496977, - "learning_rate": 7.674383267918916e-09, - "loss": 0.9215, - "step": 10788 - }, - { - "epoch": 0.9729900347206565, - "grad_norm": 1.4322511794407857, - "learning_rate": 7.623338456856476e-09, - "loss": 0.9559, - "step": 10789 - }, - { - "epoch": 0.9730802182441268, - "grad_norm": 1.3759924168211437, - "learning_rate": 7.572463646356554e-09, - "loss": 0.8616, - "step": 10790 - }, - { - "epoch": 0.9731704017675971, - "grad_norm": 1.5516858285252744, - "learning_rate": 7.521758840760339e-09, - "loss": 0.9362, - "step": 10791 - }, - { - "epoch": 0.9732605852910673, - "grad_norm": 1.7338651302866102, - "learning_rate": 7.471224044393931e-09, - "loss": 0.9188, - "step": 10792 - }, - { - "epoch": 0.9733507688145376, - "grad_norm": 1.3403191348707895, - "learning_rate": 7.420859261569434e-09, - "loss": 1.0323, - "step": 10793 - }, - { - "epoch": 0.9734409523380079, - "grad_norm": 1.522350921473016, - "learning_rate": 7.370664496584078e-09, - "loss": 1.0246, - "step": 10794 - }, - { - "epoch": 0.9735311358614781, - "grad_norm": 1.506538366675573, - "learning_rate": 7.3206397537211026e-09, - "loss": 0.8871, - "step": 10795 - }, - { - "epoch": 0.9736213193849483, - "grad_norm": 2.0838093277438055, - "learning_rate": 7.270785037248428e-09, - "loss": 0.9339, - "step": 10796 - }, - { - "epoch": 0.9737115029084187, - "grad_norm": 1.6876089272840953, - "learning_rate": 7.221100351420428e-09, - "loss": 0.9281, - "step": 10797 - }, - { - "epoch": 0.9738016864318889, - "grad_norm": 1.4193349094683734, - "learning_rate": 7.171585700475935e-09, - "loss": 0.8936, - "step": 10798 - }, - { - "epoch": 0.9738918699553591, - "grad_norm": 1.5489347981569948, - "learning_rate": 7.122241088640235e-09, - "loss": 1.0231, - "step": 10799 - }, - { - "epoch": 0.9739820534788294, - "grad_norm": 1.403632886422298, - "learning_rate": 7.073066520123516e-09, - "loss": 1.0898, - "step": 10800 - }, - { - "epoch": 0.9740722370022997, - "grad_norm": 1.5263644539972725, - "learning_rate": 7.0240619991217555e-09, - "loss": 0.8582, - "step": 10801 - }, - { - "epoch": 0.97416242052577, - "grad_norm": 1.5008421758927475, - "learning_rate": 6.975227529816052e-09, - "loss": 1.0282, - "step": 10802 - }, - { - "epoch": 0.9742526040492402, - "grad_norm": 1.5617753446551945, - "learning_rate": 6.926563116373296e-09, - "loss": 1.005, - "step": 10803 - }, - { - "epoch": 0.9743427875727104, - "grad_norm": 1.7152756687963016, - "learning_rate": 6.878068762945943e-09, - "loss": 0.9057, - "step": 10804 - }, - { - "epoch": 0.9744329710961808, - "grad_norm": 1.402004487804571, - "learning_rate": 6.829744473671794e-09, - "loss": 0.998, - "step": 10805 - }, - { - "epoch": 0.974523154619651, - "grad_norm": 0.6093314228615697, - "learning_rate": 6.781590252674219e-09, - "loss": 0.7421, - "step": 10806 - }, - { - "epoch": 0.9746133381431212, - "grad_norm": 1.161286955821291, - "learning_rate": 6.733606104061484e-09, - "loss": 0.9741, - "step": 10807 - }, - { - "epoch": 0.9747035216665915, - "grad_norm": 1.2054398301438733, - "learning_rate": 6.6857920319283165e-09, - "loss": 0.9177, - "step": 10808 - }, - { - "epoch": 0.9747937051900618, - "grad_norm": 1.692759547314687, - "learning_rate": 6.638148040354563e-09, - "loss": 0.9425, - "step": 10809 - }, - { - "epoch": 0.974883888713532, - "grad_norm": 1.590198374195249, - "learning_rate": 6.590674133405194e-09, - "loss": 0.9233, - "step": 10810 - }, - { - "epoch": 0.9749740722370023, - "grad_norm": 1.3264374726847326, - "learning_rate": 6.5433703151311914e-09, - "loss": 0.9765, - "step": 10811 - }, - { - "epoch": 0.9750642557604725, - "grad_norm": 1.5309617686017394, - "learning_rate": 6.49623658956866e-09, - "loss": 0.9653, - "step": 10812 - }, - { - "epoch": 0.9751544392839429, - "grad_norm": 2.229340757423037, - "learning_rate": 6.44927296073905e-09, - "loss": 0.8464, - "step": 10813 - }, - { - "epoch": 0.9752446228074131, - "grad_norm": 1.3423673924426707, - "learning_rate": 6.402479432649821e-09, - "loss": 1.0489, - "step": 10814 - }, - { - "epoch": 0.9753348063308833, - "grad_norm": 1.3562598214914534, - "learning_rate": 6.355856009293781e-09, - "loss": 0.9587, - "step": 10815 - }, - { - "epoch": 0.9754249898543537, - "grad_norm": 1.6011832928070124, - "learning_rate": 6.3094026946488575e-09, - "loss": 0.9386, - "step": 10816 - }, - { - "epoch": 0.9755151733778239, - "grad_norm": 1.2264798859394368, - "learning_rate": 6.2631194926787704e-09, - "loss": 0.9657, - "step": 10817 - }, - { - "epoch": 0.9756053569012941, - "grad_norm": 1.3828352586845378, - "learning_rate": 6.217006407332581e-09, - "loss": 0.8659, - "step": 10818 - }, - { - "epoch": 0.9756955404247644, - "grad_norm": 1.4326857515971847, - "learning_rate": 6.1710634425453654e-09, - "loss": 0.9152, - "step": 10819 - }, - { - "epoch": 0.9757857239482347, - "grad_norm": 1.8422550024036373, - "learning_rate": 6.1252906022366544e-09, - "loss": 0.9866, - "step": 10820 - }, - { - "epoch": 0.9758759074717049, - "grad_norm": 1.447994980192177, - "learning_rate": 6.079687890312213e-09, - "loss": 0.9425, - "step": 10821 - }, - { - "epoch": 0.9759660909951752, - "grad_norm": 1.3951201491075462, - "learning_rate": 6.034255310663372e-09, - "loss": 0.9482, - "step": 10822 - }, - { - "epoch": 0.9760562745186454, - "grad_norm": 1.6758740806210024, - "learning_rate": 5.988992867166143e-09, - "loss": 1.0538, - "step": 10823 - }, - { - "epoch": 0.9761464580421157, - "grad_norm": 3.0917580096657034, - "learning_rate": 5.943900563682991e-09, - "loss": 1.0056, - "step": 10824 - }, - { - "epoch": 0.976236641565586, - "grad_norm": 0.7385882592042531, - "learning_rate": 5.898978404061506e-09, - "loss": 0.9127, - "step": 10825 - }, - { - "epoch": 0.9763268250890562, - "grad_norm": 1.6177217642215727, - "learning_rate": 5.85422639213462e-09, - "loss": 0.9597, - "step": 10826 - }, - { - "epoch": 0.9764170086125264, - "grad_norm": 1.3954600171819784, - "learning_rate": 5.809644531720614e-09, - "loss": 0.9722, - "step": 10827 - }, - { - "epoch": 0.9765071921359968, - "grad_norm": 1.7615160772633196, - "learning_rate": 5.765232826623556e-09, - "loss": 0.9912, - "step": 10828 - }, - { - "epoch": 0.976597375659467, - "grad_norm": 1.615624893749241, - "learning_rate": 5.720991280633081e-09, - "loss": 0.9318, - "step": 10829 - }, - { - "epoch": 0.9766875591829373, - "grad_norm": 1.2715743775749329, - "learning_rate": 5.676919897523724e-09, - "loss": 0.9468, - "step": 10830 - }, - { - "epoch": 0.9767777427064075, - "grad_norm": 1.3424051785503297, - "learning_rate": 5.633018681056256e-09, - "loss": 0.975, - "step": 10831 - }, - { - "epoch": 0.9768679262298778, - "grad_norm": 1.5030295454713911, - "learning_rate": 5.589287634976569e-09, - "loss": 0.9656, - "step": 10832 - }, - { - "epoch": 0.9769581097533481, - "grad_norm": 1.314647416468762, - "learning_rate": 5.5457267630159014e-09, - "loss": 0.9378, - "step": 10833 - }, - { - "epoch": 0.9770482932768183, - "grad_norm": 5.1679558324904695, - "learning_rate": 5.5023360688910555e-09, - "loss": 1.0004, - "step": 10834 - }, - { - "epoch": 0.9771384768002885, - "grad_norm": 1.352017981894956, - "learning_rate": 5.459115556304183e-09, - "loss": 1.0096, - "step": 10835 - }, - { - "epoch": 0.9772286603237589, - "grad_norm": 1.5095243076061764, - "learning_rate": 5.416065228943889e-09, - "loss": 0.9032, - "step": 10836 - }, - { - "epoch": 0.9773188438472291, - "grad_norm": 2.032330739010521, - "learning_rate": 5.373185090482568e-09, - "loss": 0.9275, - "step": 10837 - }, - { - "epoch": 0.9774090273706993, - "grad_norm": 1.510970450419738, - "learning_rate": 5.330475144579516e-09, - "loss": 0.9513, - "step": 10838 - }, - { - "epoch": 0.9774992108941697, - "grad_norm": 1.3949377498615376, - "learning_rate": 5.2879353948787065e-09, - "loss": 1.0468, - "step": 10839 - }, - { - "epoch": 0.9775893944176399, - "grad_norm": 1.7705610028465626, - "learning_rate": 5.245565845010125e-09, - "loss": 1.0197, - "step": 10840 - }, - { - "epoch": 0.9776795779411102, - "grad_norm": 1.366243963271867, - "learning_rate": 5.2033664985886575e-09, - "loss": 0.8961, - "step": 10841 - }, - { - "epoch": 0.9777697614645804, - "grad_norm": 1.4193626257467467, - "learning_rate": 5.161337359215201e-09, - "loss": 1.1074, - "step": 10842 - }, - { - "epoch": 0.9778599449880507, - "grad_norm": 1.5717821218910364, - "learning_rate": 5.119478430475999e-09, - "loss": 0.983, - "step": 10843 - }, - { - "epoch": 0.977950128511521, - "grad_norm": 2.268527782476054, - "learning_rate": 5.077789715942416e-09, - "loss": 0.904, - "step": 10844 - }, - { - "epoch": 0.9780403120349912, - "grad_norm": 1.7161008359097112, - "learning_rate": 5.036271219171606e-09, - "loss": 0.9836, - "step": 10845 - }, - { - "epoch": 0.9781304955584614, - "grad_norm": 2.097659546123167, - "learning_rate": 4.994922943706514e-09, - "loss": 1.0004, - "step": 10846 - }, - { - "epoch": 0.9782206790819318, - "grad_norm": 1.3305165916707162, - "learning_rate": 4.953744893074763e-09, - "loss": 0.9488, - "step": 10847 - }, - { - "epoch": 0.978310862605402, - "grad_norm": 1.9968030531552705, - "learning_rate": 4.912737070789985e-09, - "loss": 0.8945, - "step": 10848 - }, - { - "epoch": 0.9784010461288722, - "grad_norm": 1.6380694876895894, - "learning_rate": 4.871899480351604e-09, - "loss": 1.0388, - "step": 10849 - }, - { - "epoch": 0.9784912296523425, - "grad_norm": 1.388049966913145, - "learning_rate": 4.831232125243501e-09, - "loss": 0.9775, - "step": 10850 - }, - { - "epoch": 0.9785814131758128, - "grad_norm": 1.3077610839799914, - "learning_rate": 4.7907350089360086e-09, - "loss": 1.0604, - "step": 10851 - }, - { - "epoch": 0.978671596699283, - "grad_norm": 1.4737108739523819, - "learning_rate": 4.750408134884365e-09, - "loss": 0.8547, - "step": 10852 - }, - { - "epoch": 0.9787617802227533, - "grad_norm": 1.5833449195973293, - "learning_rate": 4.710251506529816e-09, - "loss": 0.8727, - "step": 10853 - }, - { - "epoch": 0.9788519637462235, - "grad_norm": 1.3537910038170557, - "learning_rate": 4.6702651272982894e-09, - "loss": 0.9858, - "step": 10854 - }, - { - "epoch": 0.9789421472696939, - "grad_norm": 1.4792313449670351, - "learning_rate": 4.630449000602166e-09, - "loss": 0.9382, - "step": 10855 - }, - { - "epoch": 0.9790323307931641, - "grad_norm": 2.3472921435032124, - "learning_rate": 4.590803129838283e-09, - "loss": 1.0298, - "step": 10856 - }, - { - "epoch": 0.9791225143166343, - "grad_norm": 1.6264117050995617, - "learning_rate": 4.551327518389714e-09, - "loss": 0.9631, - "step": 10857 - }, - { - "epoch": 0.9792126978401046, - "grad_norm": 1.7071764292838965, - "learning_rate": 4.512022169624652e-09, - "loss": 0.9535, - "step": 10858 - }, - { - "epoch": 0.9793028813635749, - "grad_norm": 1.4141947226063216, - "learning_rate": 4.472887086896637e-09, - "loss": 0.9388, - "step": 10859 - }, - { - "epoch": 0.9793930648870451, - "grad_norm": 1.6123409163195705, - "learning_rate": 4.433922273545443e-09, - "loss": 0.9139, - "step": 10860 - }, - { - "epoch": 0.9794832484105154, - "grad_norm": 0.5797966168057865, - "learning_rate": 4.395127732895299e-09, - "loss": 0.8272, - "step": 10861 - }, - { - "epoch": 0.9795734319339856, - "grad_norm": 1.9760112973944304, - "learning_rate": 4.356503468256445e-09, - "loss": 1.0433, - "step": 10862 - }, - { - "epoch": 0.979663615457456, - "grad_norm": 2.1669762341331977, - "learning_rate": 4.318049482924913e-09, - "loss": 0.9751, - "step": 10863 - }, - { - "epoch": 0.9797537989809262, - "grad_norm": 1.5507126031474772, - "learning_rate": 4.279765780181188e-09, - "loss": 0.9318, - "step": 10864 - }, - { - "epoch": 0.9798439825043964, - "grad_norm": 1.8755326468164428, - "learning_rate": 4.241652363291992e-09, - "loss": 0.9777, - "step": 10865 - }, - { - "epoch": 0.9799341660278668, - "grad_norm": 2.0226250272943815, - "learning_rate": 4.203709235509834e-09, - "loss": 0.9394, - "step": 10866 - }, - { - "epoch": 0.980024349551337, - "grad_norm": 1.8086090416807648, - "learning_rate": 4.165936400071679e-09, - "loss": 0.9805, - "step": 10867 - }, - { - "epoch": 0.9801145330748072, - "grad_norm": 1.2508777397722721, - "learning_rate": 4.12833386020095e-09, - "loss": 0.9142, - "step": 10868 - }, - { - "epoch": 0.9802047165982775, - "grad_norm": 3.3239361545385075, - "learning_rate": 4.090901619105746e-09, - "loss": 1.0087, - "step": 10869 - }, - { - "epoch": 0.9802949001217478, - "grad_norm": 1.41364897693347, - "learning_rate": 4.053639679980181e-09, - "loss": 0.8604, - "step": 10870 - }, - { - "epoch": 0.980385083645218, - "grad_norm": 1.6844480254527476, - "learning_rate": 4.01654804600371e-09, - "loss": 0.9727, - "step": 10871 - }, - { - "epoch": 0.9804752671686883, - "grad_norm": 1.3489098509492226, - "learning_rate": 3.9796267203409114e-09, - "loss": 0.9732, - "step": 10872 - }, - { - "epoch": 0.9805654506921585, - "grad_norm": 1.5853450025418285, - "learning_rate": 3.942875706142379e-09, - "loss": 0.9469, - "step": 10873 - }, - { - "epoch": 0.9806556342156288, - "grad_norm": 1.7381973074886354, - "learning_rate": 3.906295006543825e-09, - "loss": 1.0213, - "step": 10874 - }, - { - "epoch": 0.9807458177390991, - "grad_norm": 1.5036473435316264, - "learning_rate": 3.8698846246665305e-09, - "loss": 0.9457, - "step": 10875 - }, - { - "epoch": 0.9808360012625693, - "grad_norm": 2.3148191009764374, - "learning_rate": 3.833644563617344e-09, - "loss": 0.9678, - "step": 10876 - }, - { - "epoch": 0.9809261847860395, - "grad_norm": 1.6809783943054064, - "learning_rate": 3.797574826488237e-09, - "loss": 0.9688, - "step": 10877 - }, - { - "epoch": 0.9810163683095099, - "grad_norm": 1.420530584900994, - "learning_rate": 3.761675416356969e-09, - "loss": 0.8948, - "step": 10878 - }, - { - "epoch": 0.9811065518329801, - "grad_norm": 1.3848362139316346, - "learning_rate": 3.725946336286867e-09, - "loss": 0.8729, - "step": 10879 - }, - { - "epoch": 0.9811967353564504, - "grad_norm": 1.3191658322606166, - "learning_rate": 3.6903875893261604e-09, - "loss": 0.9426, - "step": 10880 - }, - { - "epoch": 0.9812869188799206, - "grad_norm": 1.566379765693344, - "learning_rate": 3.6549991785093105e-09, - "loss": 0.9022, - "step": 10881 - }, - { - "epoch": 0.9813771024033909, - "grad_norm": 1.6423012299359832, - "learning_rate": 3.6197811068554575e-09, - "loss": 0.9515, - "step": 10882 - }, - { - "epoch": 0.9814672859268612, - "grad_norm": 1.2326643474021317, - "learning_rate": 3.584733377369975e-09, - "loss": 1.0453, - "step": 10883 - }, - { - "epoch": 0.9815574694503314, - "grad_norm": 1.6564608745565854, - "learning_rate": 3.549855993043138e-09, - "loss": 0.9349, - "step": 10884 - }, - { - "epoch": 0.9816476529738016, - "grad_norm": 1.7148908117696924, - "learning_rate": 3.5151489568507887e-09, - "loss": 0.9549, - "step": 10885 - }, - { - "epoch": 0.981737836497272, - "grad_norm": 1.6844228309535287, - "learning_rate": 3.4806122717545572e-09, - "loss": 0.9811, - "step": 10886 - }, - { - "epoch": 0.9818280200207422, - "grad_norm": 1.6405173856772597, - "learning_rate": 3.446245940701198e-09, - "loss": 0.9272, - "step": 10887 - }, - { - "epoch": 0.9819182035442124, - "grad_norm": 1.453868962649671, - "learning_rate": 3.41204996662281e-09, - "loss": 0.9034, - "step": 10888 - }, - { - "epoch": 0.9820083870676828, - "grad_norm": 1.429858004716032, - "learning_rate": 3.3780243524375028e-09, - "loss": 0.9651, - "step": 10889 - }, - { - "epoch": 0.982098570591153, - "grad_norm": 1.4603715912324369, - "learning_rate": 3.3441691010485107e-09, - "loss": 0.9917, - "step": 10890 - }, - { - "epoch": 0.9821887541146233, - "grad_norm": 1.488789949561729, - "learning_rate": 3.3104842153444113e-09, - "loss": 1.0451, - "step": 10891 - }, - { - "epoch": 0.9822789376380935, - "grad_norm": 1.5249973234559633, - "learning_rate": 3.27696969819935e-09, - "loss": 0.9615, - "step": 10892 - }, - { - "epoch": 0.9823691211615638, - "grad_norm": 1.2924557961997425, - "learning_rate": 3.2436255524732615e-09, - "loss": 0.9229, - "step": 10893 - }, - { - "epoch": 0.9824593046850341, - "grad_norm": 1.3079234214058184, - "learning_rate": 3.210451781010759e-09, - "loss": 0.9482, - "step": 10894 - }, - { - "epoch": 0.9825494882085043, - "grad_norm": 1.4989319813815267, - "learning_rate": 3.1774483866426895e-09, - "loss": 0.8303, - "step": 10895 - }, - { - "epoch": 0.9826396717319745, - "grad_norm": 1.2770471724368553, - "learning_rate": 3.144615372185244e-09, - "loss": 0.9286, - "step": 10896 - }, - { - "epoch": 0.9827298552554449, - "grad_norm": 1.2562321277315, - "learning_rate": 3.1119527404399604e-09, - "loss": 1.0485, - "step": 10897 - }, - { - "epoch": 0.9828200387789151, - "grad_norm": 1.9034694341154885, - "learning_rate": 3.0794604941932754e-09, - "loss": 0.9685, - "step": 10898 - }, - { - "epoch": 0.9829102223023853, - "grad_norm": 1.4965435576977604, - "learning_rate": 3.0471386362180827e-09, - "loss": 0.8661, - "step": 10899 - }, - { - "epoch": 0.9830004058258556, - "grad_norm": 1.6587844746161386, - "learning_rate": 3.0149871692719542e-09, - "loss": 1.0183, - "step": 10900 - }, - { - "epoch": 0.9830905893493259, - "grad_norm": 1.4741952478812745, - "learning_rate": 2.9830060960984728e-09, - "loss": 0.9502, - "step": 10901 - }, - { - "epoch": 0.9831807728727961, - "grad_norm": 0.6771503097468686, - "learning_rate": 2.9511954194263442e-09, - "loss": 0.8403, - "step": 10902 - }, - { - "epoch": 0.9832709563962664, - "grad_norm": 1.5057417332553567, - "learning_rate": 2.9195551419698426e-09, - "loss": 0.9693, - "step": 10903 - }, - { - "epoch": 0.9833611399197366, - "grad_norm": 1.755939961282499, - "learning_rate": 2.888085266428808e-09, - "loss": 0.9255, - "step": 10904 - }, - { - "epoch": 0.983451323443207, - "grad_norm": 1.7175131075163173, - "learning_rate": 2.8567857954882037e-09, - "loss": 0.919, - "step": 10905 - }, - { - "epoch": 0.9835415069666772, - "grad_norm": 1.6661449887495254, - "learning_rate": 2.82565673181856e-09, - "loss": 1.0181, - "step": 10906 - }, - { - "epoch": 0.9836316904901474, - "grad_norm": 1.390680204860176, - "learning_rate": 2.7946980780764184e-09, - "loss": 0.9898, - "step": 10907 - }, - { - "epoch": 0.9837218740136177, - "grad_norm": 1.9068041761981163, - "learning_rate": 2.763909836903e-09, - "loss": 1.0231, - "step": 10908 - }, - { - "epoch": 0.983812057537088, - "grad_norm": 1.288130544681082, - "learning_rate": 2.7332920109255364e-09, - "loss": 0.882, - "step": 10909 - }, - { - "epoch": 0.9839022410605582, - "grad_norm": 1.4499573997290585, - "learning_rate": 2.702844602756382e-09, - "loss": 0.9468, - "step": 10910 - }, - { - "epoch": 0.9839924245840285, - "grad_norm": 2.4538774247588178, - "learning_rate": 2.6725676149936814e-09, - "loss": 0.9389, - "step": 10911 - }, - { - "epoch": 0.9840826081074988, - "grad_norm": 1.4478198597698522, - "learning_rate": 2.642461050220479e-09, - "loss": 1.0386, - "step": 10912 - }, - { - "epoch": 0.984172791630969, - "grad_norm": 1.4011161340070035, - "learning_rate": 2.612524911005609e-09, - "loss": 0.9423, - "step": 10913 - }, - { - "epoch": 0.9842629751544393, - "grad_norm": 3.407830667775248, - "learning_rate": 2.582759199903917e-09, - "loss": 0.8448, - "step": 10914 - }, - { - "epoch": 0.9843531586779095, - "grad_norm": 1.4045050072857421, - "learning_rate": 2.553163919454704e-09, - "loss": 0.9219, - "step": 10915 - }, - { - "epoch": 0.9844433422013799, - "grad_norm": 1.4162174054777914, - "learning_rate": 2.523739072183506e-09, - "loss": 0.95, - "step": 10916 - }, - { - "epoch": 0.9845335257248501, - "grad_norm": 1.7000126529671515, - "learning_rate": 2.4944846606007597e-09, - "loss": 0.9339, - "step": 10917 - }, - { - "epoch": 0.9846237092483203, - "grad_norm": 0.6427101996676542, - "learning_rate": 2.46540068720269e-09, - "loss": 0.8791, - "step": 10918 - }, - { - "epoch": 0.9847138927717906, - "grad_norm": 1.3121807527561695, - "learning_rate": 2.4364871544708674e-09, - "loss": 1.0043, - "step": 10919 - }, - { - "epoch": 0.9848040762952609, - "grad_norm": 1.49500085339474, - "learning_rate": 2.4077440648726523e-09, - "loss": 0.9092, - "step": 10920 - }, - { - "epoch": 0.9848942598187311, - "grad_norm": 1.4376030760588345, - "learning_rate": 2.379171420860082e-09, - "loss": 1.0302, - "step": 10921 - }, - { - "epoch": 0.9849844433422014, - "grad_norm": 1.5948993801798121, - "learning_rate": 2.3507692248714296e-09, - "loss": 0.8917, - "step": 10922 - }, - { - "epoch": 0.9850746268656716, - "grad_norm": 1.6099434746717611, - "learning_rate": 2.322537479330089e-09, - "loss": 0.9769, - "step": 10923 - }, - { - "epoch": 0.9851648103891419, - "grad_norm": 1.7362536316758823, - "learning_rate": 2.2944761866450223e-09, - "loss": 1.0385, - "step": 10924 - }, - { - "epoch": 0.9852549939126122, - "grad_norm": 1.8501079012359847, - "learning_rate": 2.266585349210315e-09, - "loss": 0.9508, - "step": 10925 - }, - { - "epoch": 0.9853451774360824, - "grad_norm": 1.6900604743972185, - "learning_rate": 2.2388649694060623e-09, - "loss": 0.9226, - "step": 10926 - }, - { - "epoch": 0.9854353609595526, - "grad_norm": 2.3009960547077255, - "learning_rate": 2.211315049597262e-09, - "loss": 1.028, - "step": 10927 - }, - { - "epoch": 0.985525544483023, - "grad_norm": 1.6862324793352885, - "learning_rate": 2.1839355921349224e-09, - "loss": 1.0078, - "step": 10928 - }, - { - "epoch": 0.9856157280064932, - "grad_norm": 1.9844955047302772, - "learning_rate": 2.156726599354952e-09, - "loss": 0.9573, - "step": 10929 - }, - { - "epoch": 0.9857059115299635, - "grad_norm": 1.4646347507153208, - "learning_rate": 2.129688073578828e-09, - "loss": 0.9896, - "step": 10930 - }, - { - "epoch": 0.9857960950534337, - "grad_norm": 1.9301218015156227, - "learning_rate": 2.1028200171142597e-09, - "loss": 1.0962, - "step": 10931 - }, - { - "epoch": 0.985886278576904, - "grad_norm": 2.0204591961889147, - "learning_rate": 2.076122432253191e-09, - "loss": 0.9498, - "step": 10932 - }, - { - "epoch": 0.9859764621003743, - "grad_norm": 1.7898007664541473, - "learning_rate": 2.0495953212738005e-09, - "loss": 1.0065, - "step": 10933 - }, - { - "epoch": 0.9860666456238445, - "grad_norm": 1.2440092535441376, - "learning_rate": 2.0232386864396102e-09, - "loss": 1.047, - "step": 10934 - }, - { - "epoch": 0.9861568291473148, - "grad_norm": 1.3806545855701062, - "learning_rate": 1.9970525299992656e-09, - "loss": 0.9868, - "step": 10935 - }, - { - "epoch": 0.9862470126707851, - "grad_norm": 3.199189387409134, - "learning_rate": 1.9710368541874245e-09, - "loss": 0.9282, - "step": 10936 - }, - { - "epoch": 0.9863371961942553, - "grad_norm": 0.5757080280013898, - "learning_rate": 1.945191661223644e-09, - "loss": 0.832, - "step": 10937 - }, - { - "epoch": 0.9864273797177255, - "grad_norm": 1.613616317424247, - "learning_rate": 1.9195169533132714e-09, - "loss": 1.0096, - "step": 10938 - }, - { - "epoch": 0.9865175632411959, - "grad_norm": 0.6022172065142216, - "learning_rate": 1.894012732646999e-09, - "loss": 0.84, - "step": 10939 - }, - { - "epoch": 0.9866077467646661, - "grad_norm": 1.5249303301546473, - "learning_rate": 1.8686790014010854e-09, - "loss": 0.9209, - "step": 10940 - }, - { - "epoch": 0.9866979302881363, - "grad_norm": 0.5997645273679324, - "learning_rate": 1.8435157617369134e-09, - "loss": 0.8005, - "step": 10941 - }, - { - "epoch": 0.9867881138116066, - "grad_norm": 2.1456164895524217, - "learning_rate": 1.818523015801876e-09, - "loss": 1.1085, - "step": 10942 - }, - { - "epoch": 0.9868782973350769, - "grad_norm": 1.5903081941911752, - "learning_rate": 1.7937007657282677e-09, - "loss": 0.9224, - "step": 10943 - }, - { - "epoch": 0.9869684808585472, - "grad_norm": 1.5317992179114535, - "learning_rate": 1.7690490136341718e-09, - "loss": 0.9054, - "step": 10944 - }, - { - "epoch": 0.9870586643820174, - "grad_norm": 1.2817780871795084, - "learning_rate": 1.744567761622795e-09, - "loss": 0.9625, - "step": 10945 - }, - { - "epoch": 0.9871488479054876, - "grad_norm": 1.4725485845817983, - "learning_rate": 1.7202570117831327e-09, - "loss": 0.9634, - "step": 10946 - }, - { - "epoch": 0.987239031428958, - "grad_norm": 1.899366112424964, - "learning_rate": 1.696116766189526e-09, - "loss": 0.9445, - "step": 10947 - }, - { - "epoch": 0.9873292149524282, - "grad_norm": 1.3597746952042586, - "learning_rate": 1.6721470269021042e-09, - "loss": 1.0071, - "step": 10948 - }, - { - "epoch": 0.9874193984758984, - "grad_norm": 2.034273919144812, - "learning_rate": 1.6483477959654546e-09, - "loss": 0.9322, - "step": 10949 - }, - { - "epoch": 0.9875095819993687, - "grad_norm": 1.3272205639145702, - "learning_rate": 1.6247190754106187e-09, - "loss": 0.9463, - "step": 10950 - }, - { - "epoch": 0.987599765522839, - "grad_norm": 1.3551210072809308, - "learning_rate": 1.6012608672537619e-09, - "loss": 0.9292, - "step": 10951 - }, - { - "epoch": 0.9876899490463092, - "grad_norm": 1.7117765973134775, - "learning_rate": 1.5779731734963942e-09, - "loss": 0.9643, - "step": 10952 - }, - { - "epoch": 0.9877801325697795, - "grad_norm": 1.5000696166096068, - "learning_rate": 1.5548559961253705e-09, - "loss": 1.0168, - "step": 10953 - }, - { - "epoch": 0.9878703160932497, - "grad_norm": 1.5942254946606291, - "learning_rate": 1.5319093371135573e-09, - "loss": 0.8899, - "step": 10954 - }, - { - "epoch": 0.9879604996167201, - "grad_norm": 1.4014766211762772, - "learning_rate": 1.5091331984184997e-09, - "loss": 1.0601, - "step": 10955 - }, - { - "epoch": 0.9880506831401903, - "grad_norm": 1.1979622984225224, - "learning_rate": 1.486527581983754e-09, - "loss": 0.9609, - "step": 10956 - }, - { - "epoch": 0.9881408666636605, - "grad_norm": 1.4045023761157438, - "learning_rate": 1.4640924897382223e-09, - "loss": 1.022, - "step": 10957 - }, - { - "epoch": 0.9882310501871309, - "grad_norm": 1.2370325284457155, - "learning_rate": 1.4418279235961506e-09, - "loss": 1.0023, - "step": 10958 - }, - { - "epoch": 0.9883212337106011, - "grad_norm": 2.353783151450863, - "learning_rate": 1.4197338854573526e-09, - "loss": 0.8703, - "step": 10959 - }, - { - "epoch": 0.9884114172340713, - "grad_norm": 1.6045040750092197, - "learning_rate": 1.3978103772067651e-09, - "loss": 0.926, - "step": 10960 - }, - { - "epoch": 0.9885016007575416, - "grad_norm": 1.5402759086858204, - "learning_rate": 1.3760574007153358e-09, - "loss": 0.8887, - "step": 10961 - }, - { - "epoch": 0.9885917842810119, - "grad_norm": 1.9114373714395962, - "learning_rate": 1.3544749578389137e-09, - "loss": 1.0537, - "step": 10962 - }, - { - "epoch": 0.9886819678044821, - "grad_norm": 1.4333137425850726, - "learning_rate": 1.3330630504189143e-09, - "loss": 1.0057, - "step": 10963 - }, - { - "epoch": 0.9887721513279524, - "grad_norm": 1.6489579522651945, - "learning_rate": 1.3118216802827652e-09, - "loss": 0.9383, - "step": 10964 - }, - { - "epoch": 0.9888623348514226, - "grad_norm": 1.4286291655725811, - "learning_rate": 1.2907508492425722e-09, - "loss": 0.9404, - "step": 10965 - }, - { - "epoch": 0.988952518374893, - "grad_norm": 1.5592240132412076, - "learning_rate": 1.2698505590962305e-09, - "loss": 0.93, - "step": 10966 - }, - { - "epoch": 0.9890427018983632, - "grad_norm": 1.3640969795335114, - "learning_rate": 1.2491208116272022e-09, - "loss": 0.961, - "step": 10967 - }, - { - "epoch": 0.9891328854218334, - "grad_norm": 1.4677439551523803, - "learning_rate": 1.2285616086040728e-09, - "loss": 0.9967, - "step": 10968 - }, - { - "epoch": 0.9892230689453037, - "grad_norm": 1.2442712640369042, - "learning_rate": 1.2081729517812167e-09, - "loss": 1.0291, - "step": 10969 - }, - { - "epoch": 0.989313252468774, - "grad_norm": 1.4031729624139877, - "learning_rate": 1.1879548428983533e-09, - "loss": 0.9581, - "step": 10970 - }, - { - "epoch": 0.9894034359922442, - "grad_norm": 1.6615256013875206, - "learning_rate": 1.167907283680547e-09, - "loss": 0.914, - "step": 10971 - }, - { - "epoch": 0.9894936195157145, - "grad_norm": 1.6881897540986042, - "learning_rate": 1.1480302758382077e-09, - "loss": 0.9054, - "step": 10972 - }, - { - "epoch": 0.9895838030391847, - "grad_norm": 1.3469157077806584, - "learning_rate": 1.1283238210675338e-09, - "loss": 0.9551, - "step": 10973 - }, - { - "epoch": 0.989673986562655, - "grad_norm": 1.9322630339469282, - "learning_rate": 1.1087879210498474e-09, - "loss": 0.8961, - "step": 10974 - }, - { - "epoch": 0.9897641700861253, - "grad_norm": 1.4420881441718163, - "learning_rate": 1.0894225774522592e-09, - "loss": 1.0119, - "step": 10975 - }, - { - "epoch": 0.9898543536095955, - "grad_norm": 0.6308157465821066, - "learning_rate": 1.070227791927003e-09, - "loss": 0.8478, - "step": 10976 - }, - { - "epoch": 0.9899445371330657, - "grad_norm": 1.8239783183741654, - "learning_rate": 1.0512035661118802e-09, - "loss": 1.0613, - "step": 10977 - }, - { - "epoch": 0.9900347206565361, - "grad_norm": 2.099057431086397, - "learning_rate": 1.0323499016300364e-09, - "loss": 0.9856, - "step": 10978 - }, - { - "epoch": 0.9901249041800063, - "grad_norm": 1.646504068313424, - "learning_rate": 1.013666800090407e-09, - "loss": 0.9192, - "step": 10979 - }, - { - "epoch": 0.9902150877034765, - "grad_norm": 1.9112354810423966, - "learning_rate": 9.951542630870502e-10, - "loss": 0.9551, - "step": 10980 - }, - { - "epoch": 0.9903052712269468, - "grad_norm": 1.59136245509971, - "learning_rate": 9.768122921995915e-10, - "loss": 0.998, - "step": 10981 - }, - { - "epoch": 0.9903954547504171, - "grad_norm": 1.7356984355859904, - "learning_rate": 9.58640888992779e-10, - "loss": 0.8996, - "step": 10982 - }, - { - "epoch": 0.9904856382738874, - "grad_norm": 1.5403533788957684, - "learning_rate": 9.40640055017594e-10, - "loss": 0.8769, - "step": 10983 - }, - { - "epoch": 0.9905758217973576, - "grad_norm": 1.4913666549550177, - "learning_rate": 9.228097918094757e-10, - "loss": 0.9673, - "step": 10984 - }, - { - "epoch": 0.9906660053208279, - "grad_norm": 1.5774980564982053, - "learning_rate": 9.051501008900952e-10, - "loss": 0.935, - "step": 10985 - }, - { - "epoch": 0.9907561888442982, - "grad_norm": 1.372726424740593, - "learning_rate": 8.876609837662475e-10, - "loss": 0.9729, - "step": 10986 - }, - { - "epoch": 0.9908463723677684, - "grad_norm": 1.285915625792892, - "learning_rate": 8.70342441930294e-10, - "loss": 1.052, - "step": 10987 - }, - { - "epoch": 0.9909365558912386, - "grad_norm": 1.6332661851717147, - "learning_rate": 8.531944768594979e-10, - "loss": 0.9438, - "step": 10988 - }, - { - "epoch": 0.991026739414709, - "grad_norm": 1.586289390273289, - "learning_rate": 8.362170900175769e-10, - "loss": 1.0448, - "step": 10989 - }, - { - "epoch": 0.9911169229381792, - "grad_norm": 1.496602263321644, - "learning_rate": 8.194102828527061e-10, - "loss": 0.92, - "step": 10990 - }, - { - "epoch": 0.9912071064616494, - "grad_norm": 1.5114862623028935, - "learning_rate": 8.027740567992936e-10, - "loss": 1.0047, - "step": 10991 - }, - { - "epoch": 0.9912972899851197, - "grad_norm": 1.3609751349061991, - "learning_rate": 7.863084132766484e-10, - "loss": 0.9581, - "step": 10992 - }, - { - "epoch": 0.99138747350859, - "grad_norm": 1.4901969375007522, - "learning_rate": 7.700133536896469e-10, - "loss": 0.981, - "step": 10993 - }, - { - "epoch": 0.9914776570320603, - "grad_norm": 1.365956782490938, - "learning_rate": 7.538888794287324e-10, - "loss": 0.9278, - "step": 10994 - }, - { - "epoch": 0.9915678405555305, - "grad_norm": 1.6096157153206103, - "learning_rate": 7.379349918696931e-10, - "loss": 0.9904, - "step": 10995 - }, - { - "epoch": 0.9916580240790007, - "grad_norm": 1.4802241060336951, - "learning_rate": 7.221516923738846e-10, - "loss": 0.9387, - "step": 10996 - }, - { - "epoch": 0.9917482076024711, - "grad_norm": 4.843799615421293, - "learning_rate": 7.065389822880075e-10, - "loss": 1.0732, - "step": 10997 - }, - { - "epoch": 0.9918383911259413, - "grad_norm": 1.9871925116144518, - "learning_rate": 6.910968629443292e-10, - "loss": 1.0356, - "step": 10998 - }, - { - "epoch": 0.9919285746494115, - "grad_norm": 1.4835994099634087, - "learning_rate": 6.758253356602406e-10, - "loss": 0.9645, - "step": 10999 - }, - { - "epoch": 0.9920187581728818, - "grad_norm": 1.5406351332882549, - "learning_rate": 6.607244017389213e-10, - "loss": 1.0593, - "step": 11000 - }, - { - "epoch": 0.9921089416963521, - "grad_norm": 1.9112086605094556, - "learning_rate": 6.457940624686742e-10, - "loss": 0.9858, - "step": 11001 - }, - { - "epoch": 0.9921991252198223, - "grad_norm": 1.9019209060870181, - "learning_rate": 6.310343191238132e-10, - "loss": 1.042, - "step": 11002 - }, - { - "epoch": 0.9922893087432926, - "grad_norm": 1.798203019036368, - "learning_rate": 6.164451729635534e-10, - "loss": 0.8633, - "step": 11003 - }, - { - "epoch": 0.9923794922667628, - "grad_norm": 7.433585539275549, - "learning_rate": 6.020266252324546e-10, - "loss": 0.8372, - "step": 11004 - }, - { - "epoch": 0.9924696757902332, - "grad_norm": 1.614042975131294, - "learning_rate": 5.877786771610882e-10, - "loss": 0.9328, - "step": 11005 - }, - { - "epoch": 0.9925598593137034, - "grad_norm": 0.7803966821154205, - "learning_rate": 5.737013299651483e-10, - "loss": 0.94, - "step": 11006 - }, - { - "epoch": 0.9926500428371736, - "grad_norm": 1.3795152999538285, - "learning_rate": 5.597945848458963e-10, - "loss": 0.9656, - "step": 11007 - }, - { - "epoch": 0.992740226360644, - "grad_norm": 1.4530462428254176, - "learning_rate": 5.460584429894944e-10, - "loss": 0.9423, - "step": 11008 - }, - { - "epoch": 0.9928304098841142, - "grad_norm": 4.656024492166603, - "learning_rate": 5.32492905568338e-10, - "loss": 0.9691, - "step": 11009 - }, - { - "epoch": 0.9929205934075844, - "grad_norm": 1.4154136483099715, - "learning_rate": 5.190979737399459e-10, - "loss": 0.9995, - "step": 11010 - }, - { - "epoch": 0.9930107769310547, - "grad_norm": 1.6983956283559791, - "learning_rate": 5.058736486469594e-10, - "loss": 0.8537, - "step": 11011 - }, - { - "epoch": 0.993100960454525, - "grad_norm": 1.5764131231401082, - "learning_rate": 4.928199314180314e-10, - "loss": 0.9974, - "step": 11012 - }, - { - "epoch": 0.9931911439779952, - "grad_norm": 1.5841623528488853, - "learning_rate": 4.799368231669376e-10, - "loss": 0.8801, - "step": 11013 - }, - { - "epoch": 0.9932813275014655, - "grad_norm": 1.7747707648743776, - "learning_rate": 4.672243249927988e-10, - "loss": 0.9486, - "step": 11014 - }, - { - "epoch": 0.9933715110249357, - "grad_norm": 2.005757865082993, - "learning_rate": 4.546824379803027e-10, - "loss": 0.9937, - "step": 11015 - }, - { - "epoch": 0.993461694548406, - "grad_norm": 1.6786159202221607, - "learning_rate": 4.4231116319970454e-10, - "loss": 0.989, - "step": 11016 - }, - { - "epoch": 0.9935518780718763, - "grad_norm": 1.3830948880757916, - "learning_rate": 4.3011050170660423e-10, - "loss": 0.9623, - "step": 11017 - }, - { - "epoch": 0.9936420615953465, - "grad_norm": 1.8000215264198791, - "learning_rate": 4.18080454542169e-10, - "loss": 0.9599, - "step": 11018 - }, - { - "epoch": 0.9937322451188167, - "grad_norm": 0.6710885790446928, - "learning_rate": 4.0622102273246694e-10, - "loss": 0.8288, - "step": 11019 - }, - { - "epoch": 0.9938224286422871, - "grad_norm": 1.3554974643623052, - "learning_rate": 3.945322072897994e-10, - "loss": 1.0233, - "step": 11020 - }, - { - "epoch": 0.9939126121657573, - "grad_norm": 1.960738639322949, - "learning_rate": 3.830140092111467e-10, - "loss": 1.0164, - "step": 11021 - }, - { - "epoch": 0.9940027956892276, - "grad_norm": 1.5478537189456587, - "learning_rate": 3.7166642947972225e-10, - "loss": 0.9697, - "step": 11022 - }, - { - "epoch": 0.9940929792126978, - "grad_norm": 1.4811071463803143, - "learning_rate": 3.604894690634186e-10, - "loss": 0.9403, - "step": 11023 - }, - { - "epoch": 0.9941831627361681, - "grad_norm": 1.9942625720242453, - "learning_rate": 3.494831289161393e-10, - "loss": 1.0489, - "step": 11024 - }, - { - "epoch": 0.9942733462596384, - "grad_norm": 1.5112000987709704, - "learning_rate": 3.3864740997668897e-10, - "loss": 1.0034, - "step": 11025 - }, - { - "epoch": 0.9943635297831086, - "grad_norm": 1.4831874714975515, - "learning_rate": 3.279823131701054e-10, - "loss": 0.9917, - "step": 11026 - }, - { - "epoch": 0.9944537133065788, - "grad_norm": 1.6647403393138724, - "learning_rate": 3.1748783940610536e-10, - "loss": 0.9225, - "step": 11027 - }, - { - "epoch": 0.9945438968300492, - "grad_norm": 1.5179388729498724, - "learning_rate": 3.071639895801947e-10, - "loss": 0.9198, - "step": 11028 - }, - { - "epoch": 0.9946340803535194, - "grad_norm": 1.8382375391778139, - "learning_rate": 2.9701076457322447e-10, - "loss": 1.0283, - "step": 11029 - }, - { - "epoch": 0.9947242638769896, - "grad_norm": 1.5997251065858895, - "learning_rate": 2.870281652513906e-10, - "loss": 1.0011, - "step": 11030 - }, - { - "epoch": 0.99481444740046, - "grad_norm": 1.5737402404046223, - "learning_rate": 2.772161924669003e-10, - "loss": 0.9486, - "step": 11031 - }, - { - "epoch": 0.9949046309239302, - "grad_norm": 1.7213119139994615, - "learning_rate": 2.6757484705641765e-10, - "loss": 0.9518, - "step": 11032 - }, - { - "epoch": 0.9949948144474005, - "grad_norm": 1.6248242943478695, - "learning_rate": 2.58104129843062e-10, - "loss": 0.9622, - "step": 11033 - }, - { - "epoch": 0.9950849979708707, - "grad_norm": 1.6271236188381124, - "learning_rate": 2.4880404163463154e-10, - "loss": 0.9131, - "step": 11034 - }, - { - "epoch": 0.995175181494341, - "grad_norm": 1.7231459840556003, - "learning_rate": 2.3967458322471377e-10, - "loss": 0.9461, - "step": 11035 - }, - { - "epoch": 0.9952653650178113, - "grad_norm": 1.3979833280963603, - "learning_rate": 2.307157553922412e-10, - "loss": 0.9766, - "step": 11036 - }, - { - "epoch": 0.9953555485412815, - "grad_norm": 1.2914261850432665, - "learning_rate": 2.2192755890193538e-10, - "loss": 1.0029, - "step": 11037 - }, - { - "epoch": 0.9954457320647517, - "grad_norm": 0.7344400093036396, - "learning_rate": 2.133099945034189e-10, - "loss": 0.8032, - "step": 11038 - }, - { - "epoch": 0.9955359155882221, - "grad_norm": 1.7205498028447808, - "learning_rate": 2.048630629318815e-10, - "loss": 0.9889, - "step": 11039 - }, - { - "epoch": 0.9956260991116923, - "grad_norm": 1.439556475258607, - "learning_rate": 1.965867649080799e-10, - "loss": 0.963, - "step": 11040 - }, - { - "epoch": 0.9957162826351625, - "grad_norm": 0.6076860987776488, - "learning_rate": 1.8848110113856008e-10, - "loss": 0.8164, - "step": 11041 - }, - { - "epoch": 0.9958064661586328, - "grad_norm": 3.6413269675986166, - "learning_rate": 1.8054607231454687e-10, - "loss": 1.0051, - "step": 11042 - }, - { - "epoch": 0.9958966496821031, - "grad_norm": 1.603377342069549, - "learning_rate": 1.7278167911327635e-10, - "loss": 0.9175, - "step": 11043 - }, - { - "epoch": 0.9959868332055734, - "grad_norm": 1.5592682031667093, - "learning_rate": 1.6518792219710753e-10, - "loss": 0.9994, - "step": 11044 - }, - { - "epoch": 0.9960770167290436, - "grad_norm": 1.3991309210274463, - "learning_rate": 1.5776480221418865e-10, - "loss": 0.9685, - "step": 11045 - }, - { - "epoch": 0.9961672002525138, - "grad_norm": 3.838294025992311, - "learning_rate": 1.505123197977909e-10, - "loss": 0.9671, - "step": 11046 - }, - { - "epoch": 0.9962573837759842, - "grad_norm": 1.6479455295097594, - "learning_rate": 1.4343047556675258e-10, - "loss": 0.904, - "step": 11047 - }, - { - "epoch": 0.9963475672994544, - "grad_norm": 1.6738016311168984, - "learning_rate": 1.3651927012503506e-10, - "loss": 0.9681, - "step": 11048 - }, - { - "epoch": 0.9964377508229246, - "grad_norm": 1.6132616683843017, - "learning_rate": 1.297787040630549e-10, - "loss": 0.9721, - "step": 11049 - }, - { - "epoch": 0.9965279343463949, - "grad_norm": 1.9462858162722239, - "learning_rate": 1.2320877795524153e-10, - "loss": 0.983, - "step": 11050 - }, - { - "epoch": 0.9966181178698652, - "grad_norm": 1.7795127963757973, - "learning_rate": 1.1680949236247962e-10, - "loss": 0.9563, - "step": 11051 - }, - { - "epoch": 0.9967083013933354, - "grad_norm": 1.3710529723447828, - "learning_rate": 1.1058084783099886e-10, - "loss": 0.9728, - "step": 11052 - }, - { - "epoch": 0.9967984849168057, - "grad_norm": 1.956010929404774, - "learning_rate": 1.0452284489170793e-10, - "loss": 0.9723, - "step": 11053 - }, - { - "epoch": 0.996888668440276, - "grad_norm": 1.4949159293633398, - "learning_rate": 9.86354840621928e-11, - "loss": 0.9511, - "step": 11054 - }, - { - "epoch": 0.9969788519637462, - "grad_norm": 1.7027464979754907, - "learning_rate": 9.291876584427427e-11, - "loss": 0.8434, - "step": 11055 - }, - { - "epoch": 0.9970690354872165, - "grad_norm": 1.7524507936325613, - "learning_rate": 8.737269072578435e-11, - "loss": 0.9431, - "step": 11056 - }, - { - "epoch": 0.9971592190106867, - "grad_norm": 1.4789452275108679, - "learning_rate": 8.199725918012212e-11, - "loss": 0.9573, - "step": 11057 - }, - { - "epoch": 0.9972494025341571, - "grad_norm": 1.3624825310243418, - "learning_rate": 7.679247166603175e-11, - "loss": 0.9694, - "step": 11058 - }, - { - "epoch": 0.9973395860576273, - "grad_norm": 1.332395173864634, - "learning_rate": 7.17583286273804e-11, - "loss": 0.968, - "step": 11059 - }, - { - "epoch": 0.9974297695810975, - "grad_norm": 1.4646693418307928, - "learning_rate": 6.689483049360233e-11, - "loss": 0.9879, - "step": 11060 - }, - { - "epoch": 0.9975199531045678, - "grad_norm": 3.935557173722567, - "learning_rate": 6.220197768014302e-11, - "loss": 0.9362, - "step": 11061 - }, - { - "epoch": 0.9976101366280381, - "grad_norm": 1.5486684093889755, - "learning_rate": 5.7679770587126806e-11, - "loss": 0.9768, - "step": 11062 - }, - { - "epoch": 0.9977003201515083, - "grad_norm": 0.6474788197253051, - "learning_rate": 5.33282096002452e-11, - "loss": 0.907, - "step": 11063 - }, - { - "epoch": 0.9977905036749786, - "grad_norm": 1.651292326599119, - "learning_rate": 4.914729509120086e-11, - "loss": 0.9376, - "step": 11064 - }, - { - "epoch": 0.9978806871984488, - "grad_norm": 1.2393376991249112, - "learning_rate": 4.513702741637537e-11, - "loss": 0.8911, - "step": 11065 - }, - { - "epoch": 0.9979708707219191, - "grad_norm": 2.347167003767094, - "learning_rate": 4.129740691816153e-11, - "loss": 1.0253, - "step": 11066 - }, - { - "epoch": 0.9980610542453894, - "grad_norm": 1.2061576768400335, - "learning_rate": 3.762843392429715e-11, - "loss": 0.9882, - "step": 11067 - }, - { - "epoch": 0.9981512377688596, - "grad_norm": 2.6824536518455107, - "learning_rate": 3.413010874742106e-11, - "loss": 0.9552, - "step": 11068 - }, - { - "epoch": 0.9982414212923298, - "grad_norm": 1.7767873710630078, - "learning_rate": 3.080243168618324e-11, - "loss": 0.9627, - "step": 11069 - }, - { - "epoch": 0.9983316048158002, - "grad_norm": 2.0498006165469937, - "learning_rate": 2.7645403024800783e-11, - "loss": 0.9106, - "step": 11070 - }, - { - "epoch": 0.9984217883392704, - "grad_norm": 1.1878947053768347, - "learning_rate": 2.4659023032391756e-11, - "loss": 1.0364, - "step": 11071 - }, - { - "epoch": 0.9985119718627407, - "grad_norm": 1.3047580814114952, - "learning_rate": 2.1843291963863364e-11, - "loss": 1.0164, - "step": 11072 - }, - { - "epoch": 0.9986021553862109, - "grad_norm": 1.4878819047507572, - "learning_rate": 1.9198210059245822e-11, - "loss": 0.9792, - "step": 11073 - }, - { - "epoch": 0.9986923389096812, - "grad_norm": 1.7956353596947132, - "learning_rate": 1.672377754458054e-11, - "loss": 0.9693, - "step": 11074 - }, - { - "epoch": 0.9987825224331515, - "grad_norm": 1.3967230319452417, - "learning_rate": 1.4419994630809895e-11, - "loss": 0.955, - "step": 11075 - }, - { - "epoch": 0.9988727059566217, - "grad_norm": 1.8752077623336845, - "learning_rate": 1.2286861514443358e-11, - "loss": 0.9132, - "step": 11076 - }, - { - "epoch": 0.998962889480092, - "grad_norm": 1.1732137027265408, - "learning_rate": 1.0324378377779553e-11, - "loss": 1.0108, - "step": 11077 - }, - { - "epoch": 0.9990530730035623, - "grad_norm": 1.2734242538044056, - "learning_rate": 8.532545388018064e-12, - "loss": 0.9569, - "step": 11078 - }, - { - "epoch": 0.9991432565270325, - "grad_norm": 1.2314728530357366, - "learning_rate": 6.911362697925582e-12, - "loss": 0.8068, - "step": 11079 - }, - { - "epoch": 0.9992334400505027, - "grad_norm": 1.7829540868464933, - "learning_rate": 5.46083044605794e-12, - "loss": 0.9419, - "step": 11080 - }, - { - "epoch": 0.9993236235739731, - "grad_norm": 1.6162576997105935, - "learning_rate": 4.1809487563160276e-12, - "loss": 1.0124, - "step": 11081 - }, - { - "epoch": 0.9994138070974433, - "grad_norm": 1.2538823871183884, - "learning_rate": 3.0717177375017e-12, - "loss": 0.9679, - "step": 11082 - }, - { - "epoch": 0.9995039906209136, - "grad_norm": 3.5742617078190353, - "learning_rate": 2.1331374846500495e-12, - "loss": 0.9665, - "step": 11083 - }, - { - "epoch": 0.9995941741443838, - "grad_norm": 1.761836079752519, - "learning_rate": 1.3652080774750885e-12, - "loss": 1.0611, - "step": 11084 - }, - { - "epoch": 0.9996843576678541, - "grad_norm": 1.1290673269701332, - "learning_rate": 7.679295817020204e-13, - "loss": 0.8842, - "step": 11085 - }, - { - "epoch": 0.9997745411913244, - "grad_norm": 1.4505354254472018, - "learning_rate": 3.413020484011042e-13, - "loss": 0.9478, - "step": 11086 - }, - { - "epoch": 0.9998647247147946, - "grad_norm": 1.4774905724394112, - "learning_rate": 8.53255139876552e-14, - "loss": 1.008, - "step": 11087 - }, - { - "epoch": 0.9999549082382648, - "grad_norm": 1.6932020279269981, - "learning_rate": 0.0, - "loss": 0.9163, - "step": 11088 - }, - { - "epoch": 0.9999549082382648, - "step": 11088, - "total_flos": 1.337419456107774e+18, - "train_loss": 0.980954664663921, - "train_runtime": 182177.1192, - "train_samples_per_second": 3.652, - "train_steps_per_second": 0.061 - } - ], - "logging_steps": 1.0, - "max_steps": 11088, - "num_input_tokens_seen": 0, - "num_train_epochs": 1, - "save_steps": 100, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": true - }, - "attributes": {} - } - }, - "total_flos": 1.337419456107774e+18, - "train_batch_size": 5, - "trial_name": null, - "trial_params": null -} diff --git a/sft_full/smoe_cosinegating/training_args.bin b/sft_full/smoe_cosinegating/training_args.bin deleted file mode 100644 index 726ff8f7d8b1bbfcf79f326b16b1e9c3dfcb6b94..0000000000000000000000000000000000000000 --- a/sft_full/smoe_cosinegating/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2a0214971739c105c5dac6932f7f8a3c889163f3275c80e75467eea56f809608 -size 8184 diff --git a/sft_full/smoe_perturbed/added_tokens.json b/sft_full/smoe_perturbed/added_tokens.json deleted file mode 100644 index c9d3d3a1b74d87e381e471f7b33784015d2dc0ea..0000000000000000000000000000000000000000 --- a/sft_full/smoe_perturbed/added_tokens.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "<|assistant|>": 32001, - "<|endoftext|>": 32000, - "<|end|>": 32007, - "<|placeholder1|>": 32002, - "<|placeholder2|>": 32003, - "<|placeholder3|>": 32004, - "<|placeholder4|>": 32005, - "<|placeholder5|>": 32008, - "<|placeholder6|>": 32009, - "<|system|>": 32006, - "<|user|>": 32010 -} diff --git a/sft_full/smoe_perturbed/config.json b/sft_full/smoe_perturbed/config.json deleted file mode 100644 index a26ea167de11d01860d4c2f390e5e06ac049ab82..0000000000000000000000000000000000000000 --- a/sft_full/smoe_perturbed/config.json +++ /dev/null @@ -1,66 +0,0 @@ -{ - "_name_or_path": "/cm/archive/thongdt4/toolkitmoe/checkpoints/phi3mini-siglip224-full/pft", - "architectures": [ - "LlavaPhiForCausalLM" - ], - "attention_bias": false, - "attention_dropout": 0.0, - "auto_map": { - "AutoConfig": "configuration_phi3.Phi3Config", - "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM" - }, - "balance_loss_coef": 0.1, - "bos_token_id": 1, - "clip_smoe": true, - "dropout": false, - "embd_pdrop": 0.0, - "eos_token_id": 32000, - "freeze_mm_mlp_adapter": false, - "hidden_act": "silu", - "hidden_size": 3072, - "image_aspect_ratio": "pad", - "initializer_range": 0.02, - "intermediate_size": 8192, - "local_rank": 0, - "max_position_embeddings": 4096, - "mlp_smoe": true, - "mm_hidden_size": 1152, - "mm_patch_merge_type": "flat", - "mm_projector_lr": null, - "mm_projector_type": "moe", - "mm_use_im_patch_token": false, - "mm_use_im_start_end": false, - "mm_vision_select_feature": "patch", - "mm_vision_select_layer": -2, - "mm_vision_tower": "google/siglip-so400m-patch14-224", - "model_type": "llava_phi", - "moe_name": "smoe_perturbed", - "num_attention_heads": 32, - "num_experts": 4, - "num_hidden_layers": 32, - "num_key_value_heads": 32, - "num_layers": 3, - "num_selected": 2, - "original_max_position_embeddings": 4096, - "pad_token_id": 32000, - "resid_pdrop": 0.0, - "rms_norm_eps": 1e-05, - "rope_scaling": null, - "rope_theta": 10000.0, - "router_z_loss_coef": 0.01, - "scales": [ - 1, - 3 - ], - "sliding_window": 2047, - "tie_word_embeddings": false, - "tokenizer_model_max_length": 2048, - "tokenizer_padding_side": "right", - "torch_dtype": "bfloat16", - "training": true, - "transformers_version": "4.43.2", - "tune_mm_mlp_adapter": false, - "use_cache": true, - "use_mm_proj": true, - "vocab_size": 32064 -} diff --git a/sft_full/smoe_perturbed/generation_config.json b/sft_full/smoe_perturbed/generation_config.json deleted file mode 100644 index 3a20824ea777f1ebd11da590160a7209fe3b62c6..0000000000000000000000000000000000000000 --- a/sft_full/smoe_perturbed/generation_config.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "_from_model_config": true, - "bos_token_id": 1, - "do_sample": true, - "eos_token_id": [ - 32000, - 32001, - 32007 - ], - "pad_token_id": 32000, - "transformers_version": "4.43.2" -} diff --git a/sft_full/smoe_perturbed/model-00001-of-00003.safetensors b/sft_full/smoe_perturbed/model-00001-of-00003.safetensors deleted file mode 100644 index f1bd5e5d3b2f59c9a81967ee8e0dffe2698e7250..0000000000000000000000000000000000000000 --- a/sft_full/smoe_perturbed/model-00001-of-00003.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bb586ea3bea1734186c683ad6efae405f4be9464d2ff1b5a0f22baef9f3d7d38 -size 4972489328 diff --git a/sft_full/smoe_perturbed/model-00002-of-00003.safetensors b/sft_full/smoe_perturbed/model-00002-of-00003.safetensors deleted file mode 100644 index 9b9473e1b5a35d49bf224df075296f84c277a5db..0000000000000000000000000000000000000000 --- a/sft_full/smoe_perturbed/model-00002-of-00003.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2cf1305d64ab35453affb81f156e79911662284fd96eaf1b53126fec609c3d49 -size 4985533608 diff --git a/sft_full/smoe_perturbed/model-00003-of-00003.safetensors b/sft_full/smoe_perturbed/model-00003-of-00003.safetensors deleted file mode 100644 index 926fe026a8f6ca9441f1d48e26324155c7d2aa04..0000000000000000000000000000000000000000 --- a/sft_full/smoe_perturbed/model-00003-of-00003.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6dbbedc953f1a7b0616be21494c215f49435fd4bbb9143f963a4c4caa5055281 -size 248943664 diff --git a/sft_full/smoe_perturbed/model.safetensors.index.json b/sft_full/smoe_perturbed/model.safetensors.index.json deleted file mode 100644 index f5e0d563e520320e7e1cb47747945b2591e60790..0000000000000000000000000000000000000000 --- a/sft_full/smoe_perturbed/model.safetensors.index.json +++ /dev/null @@ -1,1033 +0,0 @@ -{ - "metadata": { - "total_size": 10206819680 - }, - "weight_map": { - "lm_head.weight": "model-00003-of-00003.safetensors", - "model.embed_tokens.weight": "model-00001-of-00003.safetensors", - "model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.0.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.0.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.1.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.1.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.10.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.10.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.10.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.10.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.11.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.11.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.11.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.11.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.12.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.12.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.12.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.12.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.13.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.13.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.13.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.13.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.14.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.14.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.14.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.14.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.15.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.15.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.15.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.15.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.16.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.16.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.16.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.16.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.17.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.17.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.17.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.17.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.18.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.18.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.18.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.18.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.19.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.19.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.19.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.19.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.2.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.2.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.20.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.20.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.20.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.20.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.20.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.21.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.21.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.21.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.22.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.22.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.22.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.22.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.23.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.23.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.23.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.23.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.24.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.24.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.24.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.24.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.25.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.25.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.25.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.25.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.26.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.26.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.26.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.26.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.27.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.27.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.27.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.27.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.28.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.28.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.28.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.28.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.28.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.28.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.29.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.29.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.29.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.29.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.29.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.29.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.3.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.3.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.30.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.30.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.30.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.30.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.30.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.30.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.31.input_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.31.mlp.down_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.31.mlp.gate_up_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.31.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", - "model.layers.31.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.31.self_attn.qkv_proj.weight": "model-00002-of-00003.safetensors", - "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.4.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.4.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.5.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.5.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.6.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.6.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.7.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.7.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.8.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.8.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.9.mlp.gate_up_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", - "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", - "model.layers.9.self_attn.qkv_proj.weight": "model-00001-of-00003.safetensors", - "model.mm_projector.moelayer.experts.0.0.bias": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.0.0.weight": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.0.2.bias": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.0.2.weight": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.1.0.bias": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.1.0.weight": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.1.2.bias": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.1.2.weight": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.2.0.bias": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.2.0.weight": "model-00002-of-00003.safetensors", - "model.mm_projector.moelayer.experts.2.2.bias": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.2.2.weight": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.3.0.bias": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.3.0.weight": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.3.2.bias": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.experts.3.2.weight": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.gate.bias": "model-00003-of-00003.safetensors", - "model.mm_projector.moelayer.gate.weight": "model-00003-of-00003.safetensors", - "model.norm.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.embeddings.patch_embedding.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.embeddings.patch_embedding.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.embeddings.position_embedding.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.0.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.0.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.0.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.0.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.1.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.1.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.1.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.1.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.2.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.2.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.2.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.2.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.3.fc1.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.3.fc1.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.3.fc2.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.experts.3.fc2.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.gate.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.moelayer.gate.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00002-of-00003.safetensors", - "model.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00002-of-00003.safetensors" - } -} diff --git a/sft_full/smoe_perturbed/special_tokens_map.json b/sft_full/smoe_perturbed/special_tokens_map.json deleted file mode 100644 index 3e4d5a5bc1cb51753cc9ae0305ece0da60052b10..0000000000000000000000000000000000000000 --- a/sft_full/smoe_perturbed/special_tokens_map.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "bos_token": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - }, - "eos_token": { - "content": "<|endoftext|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - }, - "pad_token": "", - "unk_token": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false - } -} diff --git a/sft_full/smoe_perturbed/tokenizer.model b/sft_full/smoe_perturbed/tokenizer.model deleted file mode 100644 index 6c00c742ce03c627d6cd5b795984876fa49fa899..0000000000000000000000000000000000000000 --- a/sft_full/smoe_perturbed/tokenizer.model +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 -size 499723 diff --git a/sft_full/smoe_perturbed/tokenizer_config.json b/sft_full/smoe_perturbed/tokenizer_config.json deleted file mode 100644 index 3bd56c6314b14d6a33a69cd1802e04dbc1e47840..0000000000000000000000000000000000000000 --- a/sft_full/smoe_perturbed/tokenizer_config.json +++ /dev/null @@ -1,132 +0,0 @@ -{ - "add_bos_token": true, - "add_eos_token": false, - "add_prefix_space": true, - "added_tokens_decoder": { - "0": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "1": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "2": { - "content": "", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": false - }, - "32000": { - "content": "<|endoftext|>", - "lstrip": false, - "normalized": false, - "rstrip": false, - "single_word": false, - "special": true - }, - "32001": { - "content": "<|assistant|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32002": { - "content": "<|placeholder1|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32003": { - "content": "<|placeholder2|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32004": { - "content": "<|placeholder3|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32005": { - "content": "<|placeholder4|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32006": { - "content": "<|system|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32007": { - "content": "<|end|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32008": { - "content": "<|placeholder5|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32009": { - "content": "<|placeholder6|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - }, - "32010": { - "content": "<|user|>", - "lstrip": false, - "normalized": false, - "rstrip": true, - "single_word": false, - "special": true - } - }, - "bos_token": "", - "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}", - "clean_up_tokenization_spaces": false, - "eos_token": "<|endoftext|>", - "legacy": false, - "model_max_length": 2048, - "pad_token": "", - "padding_side": "right", - "sp_model_kwargs": {}, - "spaces_between_special_tokens": false, - "tokenizer_class": "LlamaTokenizer", - "unk_token": "", - "use_default_system_prompt": false -} diff --git a/sft_full/smoe_perturbed/trainer_state.json b/sft_full/smoe_perturbed/trainer_state.json deleted file mode 100644 index 08c116985ceed1ff757958c319a09364c74aea84..0000000000000000000000000000000000000000 --- a/sft_full/smoe_perturbed/trainer_state.json +++ /dev/null @@ -1,93184 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 1.0, - "eval_steps": 500, - "global_step": 13306, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 7.515406583496167e-05, - "grad_norm": 20.468459265587903, - "learning_rate": 0.0, - "loss": 1.5843, - "step": 1 - }, - { - "epoch": 0.00015030813166992335, - "grad_norm": 13.441950571113182, - "learning_rate": 4.6275642631951835e-07, - "loss": 1.692, - "step": 2 - }, - { - "epoch": 0.000225462197504885, - "grad_norm": 12.547891654016523, - "learning_rate": 7.334515826841693e-07, - "loss": 1.6105, - "step": 3 - }, - { - "epoch": 0.0003006162633398467, - "grad_norm": 14.08007257096055, - "learning_rate": 9.255128526390367e-07, - "loss": 1.5526, - "step": 4 - }, - { - "epoch": 0.00037577032917480833, - "grad_norm": 12.388105754495733, - "learning_rate": 1.0744871473609632e-06, - "loss": 1.5925, - "step": 5 - }, - { - "epoch": 0.00045092439500977, - "grad_norm": 13.68767725163484, - "learning_rate": 1.1962080090036876e-06, - "loss": 1.5714, - "step": 6 - }, - { - "epoch": 0.0005260784608447317, - "grad_norm": 13.22384941994748, - "learning_rate": 1.2991215311418868e-06, - "loss": 1.4503, - "step": 7 - }, - { - "epoch": 0.0006012325266796934, - "grad_norm": 9.580226848042845, - "learning_rate": 1.3882692789585548e-06, - "loss": 1.37, - "step": 8 - }, - { - "epoch": 0.000676386592514655, - "grad_norm": 8.412817902814608, - "learning_rate": 1.4669031653683387e-06, - "loss": 1.5099, - "step": 9 - }, - { - "epoch": 0.0007515406583496167, - "grad_norm": 12.99951170957971, - "learning_rate": 1.5372435736804818e-06, - "loss": 1.3597, - "step": 10 - }, - { - "epoch": 0.0008266947241845784, - "grad_norm": 2.609535075320156, - "learning_rate": 1.6008742129373428e-06, - "loss": 0.9571, - "step": 11 - }, - { - "epoch": 0.00090184879001954, - "grad_norm": 6.224454937094173, - "learning_rate": 1.658964435323206e-06, - "loss": 1.4301, - "step": 12 - }, - { - "epoch": 0.0009770028558545017, - "grad_norm": 5.127906161421229, - "learning_rate": 1.7124022597777776e-06, - "loss": 1.4212, - "step": 13 - }, - { - "epoch": 0.0010521569216894633, - "grad_norm": 3.508685506751856, - "learning_rate": 1.761877957461405e-06, - "loss": 1.2688, - "step": 14 - }, - { - "epoch": 0.0011273109875244252, - "grad_norm": 5.135429934264629, - "learning_rate": 1.8079387300451324e-06, - "loss": 1.3217, - "step": 15 - }, - { - "epoch": 0.0012024650533593868, - "grad_norm": 3.239898388293572, - "learning_rate": 1.8510257052780734e-06, - "loss": 1.2745, - "step": 16 - }, - { - "epoch": 0.0012776191191943484, - "grad_norm": 3.0077286349408037, - "learning_rate": 1.891499697130832e-06, - "loss": 1.3359, - "step": 17 - }, - { - "epoch": 0.00135277318502931, - "grad_norm": 3.4676082897260128, - "learning_rate": 1.929659591687857e-06, - "loss": 1.3346, - "step": 18 - }, - { - "epoch": 0.0014279272508642717, - "grad_norm": 2.5391230649327126, - "learning_rate": 1.9657557553855114e-06, - "loss": 1.2042, - "step": 19 - }, - { - "epoch": 0.0015030813166992333, - "grad_norm": 2.590340268969583, - "learning_rate": 2e-06, - "loss": 1.1651, - "step": 20 - }, - { - "epoch": 0.0015782353825341952, - "grad_norm": 3.3271219237368137, - "learning_rate": 2.032573113826056e-06, - "loss": 1.2231, - "step": 21 - }, - { - "epoch": 0.0016533894483691568, - "grad_norm": 3.259270577298688, - "learning_rate": 2.063630639256861e-06, - "loss": 1.1083, - "step": 22 - }, - { - "epoch": 0.0017285435142041184, - "grad_norm": 2.005480766780503, - "learning_rate": 2.093307365019873e-06, - "loss": 1.1884, - "step": 23 - }, - { - "epoch": 0.00180369758003908, - "grad_norm": 2.282999178063758, - "learning_rate": 2.1217208616427245e-06, - "loss": 1.2749, - "step": 24 - }, - { - "epoch": 0.0018788516458740417, - "grad_norm": 2.2316498042925605, - "learning_rate": 2.1489742947219264e-06, - "loss": 1.2457, - "step": 25 - }, - { - "epoch": 0.0019540057117090033, - "grad_norm": 2.125927666525449, - "learning_rate": 2.175158686097296e-06, - "loss": 1.0765, - "step": 26 - }, - { - "epoch": 0.002029159777543965, - "grad_norm": 1.8548124096144114, - "learning_rate": 2.200354748052508e-06, - "loss": 1.1195, - "step": 27 - }, - { - "epoch": 0.0021043138433789266, - "grad_norm": 2.5055325325441222, - "learning_rate": 2.2246343837809235e-06, - "loss": 1.1665, - "step": 28 - }, - { - "epoch": 0.0021794679092138887, - "grad_norm": 2.4832967661612466, - "learning_rate": 2.2480619244333726e-06, - "loss": 1.1458, - "step": 29 - }, - { - "epoch": 0.0022546219750488503, - "grad_norm": 1.7619944016349265, - "learning_rate": 2.270695156364651e-06, - "loss": 1.1224, - "step": 30 - }, - { - "epoch": 0.002329776040883812, - "grad_norm": 2.1492364680933718, - "learning_rate": 2.2925861798799734e-06, - "loss": 1.2799, - "step": 31 - }, - { - "epoch": 0.0024049301067187736, - "grad_norm": 2.178665194849819, - "learning_rate": 2.3137821315975918e-06, - "loss": 1.1449, - "step": 32 - }, - { - "epoch": 0.002480084172553735, - "grad_norm": 1.890218580715651, - "learning_rate": 2.334325795621512e-06, - "loss": 1.1097, - "step": 33 - }, - { - "epoch": 0.002555238238388697, - "grad_norm": 2.3427088141423185, - "learning_rate": 2.3542561234503503e-06, - "loss": 1.1166, - "step": 34 - }, - { - "epoch": 0.0026303923042236585, - "grad_norm": 1.9846185850090332, - "learning_rate": 2.3736086785028504e-06, - "loss": 1.249, - "step": 35 - }, - { - "epoch": 0.00270554637005862, - "grad_norm": 1.94615265501745, - "learning_rate": 2.392416018007375e-06, - "loss": 1.1841, - "step": 36 - }, - { - "epoch": 0.0027807004358935817, - "grad_norm": 1.592436080560082, - "learning_rate": 2.41070802255664e-06, - "loss": 0.9196, - "step": 37 - }, - { - "epoch": 0.0028558545017285434, - "grad_norm": 2.257039079556827, - "learning_rate": 2.4285121817050297e-06, - "loss": 1.2075, - "step": 38 - }, - { - "epoch": 0.002931008567563505, - "grad_norm": 3.2092478220849707, - "learning_rate": 2.445853842461947e-06, - "loss": 1.1348, - "step": 39 - }, - { - "epoch": 0.0030061626333984666, - "grad_norm": 2.618876717717686, - "learning_rate": 2.4627564263195183e-06, - "loss": 1.1768, - "step": 40 - }, - { - "epoch": 0.0030813166992334287, - "grad_norm": 2.440488401685906, - "learning_rate": 2.4792416194780364e-06, - "loss": 1.0871, - "step": 41 - }, - { - "epoch": 0.0031564707650683903, - "grad_norm": 1.8359232151206237, - "learning_rate": 2.4953295401455745e-06, - "loss": 1.1649, - "step": 42 - }, - { - "epoch": 0.003231624830903352, - "grad_norm": 1.744591324724666, - "learning_rate": 2.511038886149501e-06, - "loss": 1.1869, - "step": 43 - }, - { - "epoch": 0.0033067788967383136, - "grad_norm": 2.756026859722828, - "learning_rate": 2.526387065576379e-06, - "loss": 1.1328, - "step": 44 - }, - { - "epoch": 0.0033819329625732752, - "grad_norm": 1.8548527709032043, - "learning_rate": 2.5413903127293017e-06, - "loss": 1.041, - "step": 45 - }, - { - "epoch": 0.003457087028408237, - "grad_norm": 1.7665248367070878, - "learning_rate": 2.5560637913393917e-06, - "loss": 1.2099, - "step": 46 - }, - { - "epoch": 0.0035322410942431985, - "grad_norm": 1.7849356684058084, - "learning_rate": 2.5704216866765804e-06, - "loss": 1.1076, - "step": 47 - }, - { - "epoch": 0.00360739516007816, - "grad_norm": 1.9044692680381092, - "learning_rate": 2.584477287962243e-06, - "loss": 1.2239, - "step": 48 - }, - { - "epoch": 0.0036825492259131218, - "grad_norm": 1.9276399444362924, - "learning_rate": 2.5982430622837735e-06, - "loss": 1.1172, - "step": 49 - }, - { - "epoch": 0.0037577032917480834, - "grad_norm": 1.6286821462765082, - "learning_rate": 2.6117307210414448e-06, - "loss": 1.0843, - "step": 50 - }, - { - "epoch": 0.003832857357583045, - "grad_norm": 4.589773104509514, - "learning_rate": 2.624951279815001e-06, - "loss": 1.0887, - "step": 51 - }, - { - "epoch": 0.003908011423418007, - "grad_norm": 1.8095845419030396, - "learning_rate": 2.6379151124168143e-06, - "loss": 1.135, - "step": 52 - }, - { - "epoch": 0.003983165489252968, - "grad_norm": 2.0249456139313673, - "learning_rate": 2.650631999796137e-06, - "loss": 1.1699, - "step": 53 - }, - { - "epoch": 0.00405831955508793, - "grad_norm": 3.3810679670541015, - "learning_rate": 2.6631111743720262e-06, - "loss": 0.9961, - "step": 54 - }, - { - "epoch": 0.004133473620922892, - "grad_norm": 2.883414022239092, - "learning_rate": 2.675361360298306e-06, - "loss": 1.1241, - "step": 55 - }, - { - "epoch": 0.004208627686757853, - "grad_norm": 1.9871589775935983, - "learning_rate": 2.6873908101004422e-06, - "loss": 1.1867, - "step": 56 - }, - { - "epoch": 0.004283781752592815, - "grad_norm": 1.7754708619236523, - "learning_rate": 2.6992073380696804e-06, - "loss": 1.1486, - "step": 57 - }, - { - "epoch": 0.004358935818427777, - "grad_norm": 1.8453019932408203, - "learning_rate": 2.710818350752891e-06, - "loss": 1.0997, - "step": 58 - }, - { - "epoch": 0.004434089884262739, - "grad_norm": 1.6491509651771799, - "learning_rate": 2.7222308748360397e-06, - "loss": 1.0583, - "step": 59 - }, - { - "epoch": 0.004509243950097701, - "grad_norm": 1.8118517143803337, - "learning_rate": 2.733451582684169e-06, - "loss": 1.1229, - "step": 60 - }, - { - "epoch": 0.004584398015932662, - "grad_norm": 1.8392388064028415, - "learning_rate": 2.744486815770336e-06, - "loss": 1.1301, - "step": 61 - }, - { - "epoch": 0.004659552081767624, - "grad_norm": 1.8364817360526235, - "learning_rate": 2.755342606199492e-06, - "loss": 1.135, - "step": 62 - }, - { - "epoch": 0.0047347061476025855, - "grad_norm": 2.551877310604906, - "learning_rate": 2.766024696510225e-06, - "loss": 1.0844, - "step": 63 - }, - { - "epoch": 0.004809860213437547, - "grad_norm": 3.4208916074100384, - "learning_rate": 2.7765385579171097e-06, - "loss": 1.1576, - "step": 64 - }, - { - "epoch": 0.004885014279272509, - "grad_norm": 2.858376917678217, - "learning_rate": 2.7868894071387408e-06, - "loss": 1.1018, - "step": 65 - }, - { - "epoch": 0.00496016834510747, - "grad_norm": 1.638967857864937, - "learning_rate": 2.79708222194103e-06, - "loss": 1.1346, - "step": 66 - }, - { - "epoch": 0.005035322410942432, - "grad_norm": 4.873225940785661, - "learning_rate": 2.807121755511699e-06, - "loss": 1.1601, - "step": 67 - }, - { - "epoch": 0.005110476476777394, - "grad_norm": 1.790649110322178, - "learning_rate": 2.8170125497698686e-06, - "loss": 1.1186, - "step": 68 - }, - { - "epoch": 0.005185630542612355, - "grad_norm": 1.6694744380697435, - "learning_rate": 2.826758947704043e-06, - "loss": 1.105, - "step": 69 - }, - { - "epoch": 0.005260784608447317, - "grad_norm": 1.9640036392799736, - "learning_rate": 2.8363651048223687e-06, - "loss": 1.1251, - "step": 70 - }, - { - "epoch": 0.005335938674282279, - "grad_norm": 2.9608295994727993, - "learning_rate": 2.8458349997907386e-06, - "loss": 1.1333, - "step": 71 - }, - { - "epoch": 0.00541109274011724, - "grad_norm": 1.4894132539268274, - "learning_rate": 2.8551724443268935e-06, - "loss": 0.9023, - "step": 72 - }, - { - "epoch": 0.005486246805952202, - "grad_norm": 1.7825984201104776, - "learning_rate": 2.8643810924121057e-06, - "loss": 1.1308, - "step": 73 - }, - { - "epoch": 0.0055614008717871635, - "grad_norm": 1.6616757608366988, - "learning_rate": 2.8734644488761585e-06, - "loss": 1.2039, - "step": 74 - }, - { - "epoch": 0.005636554937622125, - "grad_norm": 3.9615134999782917, - "learning_rate": 2.882425877406096e-06, - "loss": 1.1325, - "step": 75 - }, - { - "epoch": 0.005711709003457087, - "grad_norm": 2.0729672091918196, - "learning_rate": 2.891268608024548e-06, - "loss": 1.2151, - "step": 76 - }, - { - "epoch": 0.005786863069292048, - "grad_norm": 1.349684319851223, - "learning_rate": 2.8999957440792298e-06, - "loss": 0.8944, - "step": 77 - }, - { - "epoch": 0.00586201713512701, - "grad_norm": 1.7327859145550963, - "learning_rate": 2.9086102687814654e-06, - "loss": 1.0001, - "step": 78 - }, - { - "epoch": 0.005937171200961972, - "grad_norm": 1.585240478684354, - "learning_rate": 2.9171150513282156e-06, - "loss": 1.116, - "step": 79 - }, - { - "epoch": 0.006012325266796933, - "grad_norm": 1.4778775610893107, - "learning_rate": 2.9255128526390366e-06, - "loss": 1.1466, - "step": 80 - }, - { - "epoch": 0.006087479332631896, - "grad_norm": 1.7168056934811555, - "learning_rate": 2.9338063307366773e-06, - "loss": 1.1567, - "step": 81 - }, - { - "epoch": 0.006162633398466857, - "grad_norm": 2.002222495216034, - "learning_rate": 2.9419980457975543e-06, - "loss": 1.0501, - "step": 82 - }, - { - "epoch": 0.006237787464301819, - "grad_norm": 2.1227045564065343, - "learning_rate": 2.9500904648961173e-06, - "loss": 1.0303, - "step": 83 - }, - { - "epoch": 0.006312941530136781, - "grad_norm": 2.069755508063496, - "learning_rate": 2.958085966465093e-06, - "loss": 1.1318, - "step": 84 - }, - { - "epoch": 0.006388095595971742, - "grad_norm": 2.602645419525882, - "learning_rate": 2.965986844491795e-06, - "loss": 1.1557, - "step": 85 - }, - { - "epoch": 0.006463249661806704, - "grad_norm": 2.144488113608742, - "learning_rate": 2.973795312469019e-06, - "loss": 1.1513, - "step": 86 - }, - { - "epoch": 0.006538403727641666, - "grad_norm": 2.0798511649948113, - "learning_rate": 2.981513507117542e-06, - "loss": 1.1615, - "step": 87 - }, - { - "epoch": 0.006613557793476627, - "grad_norm": 1.7025601911617716, - "learning_rate": 2.989143491895898e-06, - "loss": 1.1137, - "step": 88 - }, - { - "epoch": 0.006688711859311589, - "grad_norm": 2.023122402091681, - "learning_rate": 2.9966872603118436e-06, - "loss": 1.0988, - "step": 89 - }, - { - "epoch": 0.0067638659251465505, - "grad_norm": 1.7300883751275085, - "learning_rate": 3.00414673904882e-06, - "loss": 1.0485, - "step": 90 - }, - { - "epoch": 0.006839019990981512, - "grad_norm": 2.1158601886516846, - "learning_rate": 3.0115237909196643e-06, - "loss": 1.1396, - "step": 91 - }, - { - "epoch": 0.006914174056816474, - "grad_norm": 1.9068741323843563, - "learning_rate": 3.01882021765891e-06, - "loss": 1.1073, - "step": 92 - }, - { - "epoch": 0.006989328122651435, - "grad_norm": 1.8025197142053335, - "learning_rate": 3.0260377625641433e-06, - "loss": 1.1589, - "step": 93 - }, - { - "epoch": 0.007064482188486397, - "grad_norm": 3.9397388935224247, - "learning_rate": 3.033178112996099e-06, - "loss": 1.069, - "step": 94 - }, - { - "epoch": 0.007139636254321359, - "grad_norm": 2.0568157820637305, - "learning_rate": 3.0402429027464746e-06, - "loss": 1.1373, - "step": 95 - }, - { - "epoch": 0.00721479032015632, - "grad_norm": 1.696315205163402, - "learning_rate": 3.047233714281761e-06, - "loss": 1.1262, - "step": 96 - }, - { - "epoch": 0.007289944385991282, - "grad_norm": 2.0463563684257635, - "learning_rate": 3.0541520808708106e-06, - "loss": 1.122, - "step": 97 - }, - { - "epoch": 0.0073650984518262436, - "grad_norm": 1.982433901595293, - "learning_rate": 3.0609994886032923e-06, - "loss": 1.0103, - "step": 98 - }, - { - "epoch": 0.007440252517661205, - "grad_norm": 1.4020132881492582, - "learning_rate": 3.067777378305681e-06, - "loss": 1.0623, - "step": 99 - }, - { - "epoch": 0.007515406583496167, - "grad_norm": 1.6424301389239444, - "learning_rate": 3.0744871473609635e-06, - "loss": 1.0666, - "step": 100 - }, - { - "epoch": 0.0075905606493311285, - "grad_norm": 0.8801821454608585, - "learning_rate": 3.081130151437802e-06, - "loss": 0.8185, - "step": 101 - }, - { - "epoch": 0.00766571471516609, - "grad_norm": 2.073701898294034, - "learning_rate": 3.0877077061345193e-06, - "loss": 1.009, - "step": 102 - }, - { - "epoch": 0.007740868781001052, - "grad_norm": 0.8145082304171625, - "learning_rate": 3.0942210885428814e-06, - "loss": 0.8506, - "step": 103 - }, - { - "epoch": 0.007816022846836013, - "grad_norm": 0.9120029820343254, - "learning_rate": 3.1006715387363326e-06, - "loss": 0.8558, - "step": 104 - }, - { - "epoch": 0.007891176912670975, - "grad_norm": 1.9612439284707808, - "learning_rate": 3.107060261187019e-06, - "loss": 1.0502, - "step": 105 - }, - { - "epoch": 0.007966330978505937, - "grad_norm": 2.225289070982099, - "learning_rate": 3.113388426115655e-06, - "loss": 1.0749, - "step": 106 - }, - { - "epoch": 0.008041485044340898, - "grad_norm": 2.421513684300461, - "learning_rate": 3.119657170778007e-06, - "loss": 1.0466, - "step": 107 - }, - { - "epoch": 0.00811663911017586, - "grad_norm": 1.5443069256515152, - "learning_rate": 3.1258676006915446e-06, - "loss": 1.105, - "step": 108 - }, - { - "epoch": 0.008191793176010822, - "grad_norm": 7.529310807785423, - "learning_rate": 3.1320207908055525e-06, - "loss": 1.0512, - "step": 109 - }, - { - "epoch": 0.008266947241845783, - "grad_norm": 2.6601886047098566, - "learning_rate": 3.138117786617824e-06, - "loss": 1.1679, - "step": 110 - }, - { - "epoch": 0.008342101307680745, - "grad_norm": 1.512736369807768, - "learning_rate": 3.144159605240809e-06, - "loss": 1.0661, - "step": 111 - }, - { - "epoch": 0.008417255373515706, - "grad_norm": 0.7882095107675242, - "learning_rate": 3.1501472364199597e-06, - "loss": 0.8391, - "step": 112 - }, - { - "epoch": 0.008492409439350668, - "grad_norm": 2.3470170710771345, - "learning_rate": 3.156081643506813e-06, - "loss": 1.1089, - "step": 113 - }, - { - "epoch": 0.00856756350518563, - "grad_norm": 2.459852191237184, - "learning_rate": 3.161963764389199e-06, - "loss": 1.1838, - "step": 114 - }, - { - "epoch": 0.008642717571020593, - "grad_norm": 1.5488105763310698, - "learning_rate": 3.167794512380837e-06, - "loss": 1.0867, - "step": 115 - }, - { - "epoch": 0.008717871636855555, - "grad_norm": 0.8319760872907396, - "learning_rate": 3.1735747770724093e-06, - "loss": 0.8672, - "step": 116 - }, - { - "epoch": 0.008793025702690516, - "grad_norm": 1.626620804825464, - "learning_rate": 3.179305425146116e-06, - "loss": 1.0725, - "step": 117 - }, - { - "epoch": 0.008868179768525478, - "grad_norm": 1.6596168988308746, - "learning_rate": 3.184987301155558e-06, - "loss": 1.0662, - "step": 118 - }, - { - "epoch": 0.00894333383436044, - "grad_norm": 1.7667404972099925, - "learning_rate": 3.190621228272719e-06, - "loss": 0.9914, - "step": 119 - }, - { - "epoch": 0.009018487900195401, - "grad_norm": 1.755701518362099, - "learning_rate": 3.1962080090036873e-06, - "loss": 1.2045, - "step": 120 - }, - { - "epoch": 0.009093641966030363, - "grad_norm": 1.9425464252437559, - "learning_rate": 3.2017484258746856e-06, - "loss": 1.1037, - "step": 121 - }, - { - "epoch": 0.009168796031865325, - "grad_norm": 1.9174702107131072, - "learning_rate": 3.207243242089855e-06, - "loss": 1.1712, - "step": 122 - }, - { - "epoch": 0.009243950097700286, - "grad_norm": 5.11744672977692, - "learning_rate": 3.212693202162205e-06, - "loss": 1.1213, - "step": 123 - }, - { - "epoch": 0.009319104163535248, - "grad_norm": 1.5719950757131544, - "learning_rate": 3.2180990325190106e-06, - "loss": 1.06, - "step": 124 - }, - { - "epoch": 0.00939425822937021, - "grad_norm": 11.597724007254074, - "learning_rate": 3.22346144208289e-06, - "loss": 1.0653, - "step": 125 - }, - { - "epoch": 0.009469412295205171, - "grad_norm": 1.8650912085108473, - "learning_rate": 3.2287811228297436e-06, - "loss": 1.0932, - "step": 126 - }, - { - "epoch": 0.009544566361040133, - "grad_norm": 2.931751226261455, - "learning_rate": 3.2340587503246298e-06, - "loss": 1.1588, - "step": 127 - }, - { - "epoch": 0.009619720426875094, - "grad_norm": 1.812469482164954, - "learning_rate": 3.239294984236628e-06, - "loss": 1.1456, - "step": 128 - }, - { - "epoch": 0.009694874492710056, - "grad_norm": 2.098074134389625, - "learning_rate": 3.24449046883367e-06, - "loss": 1.0444, - "step": 129 - }, - { - "epoch": 0.009770028558545018, - "grad_norm": 2.2246254509268795, - "learning_rate": 3.249645833458259e-06, - "loss": 0.9896, - "step": 130 - }, - { - "epoch": 0.00984518262437998, - "grad_norm": 3.401477515123196, - "learning_rate": 3.2547616929849703e-06, - "loss": 1.1776, - "step": 131 - }, - { - "epoch": 0.00992033669021494, - "grad_norm": 1.5961480144998754, - "learning_rate": 3.2598386482605483e-06, - "loss": 1.1009, - "step": 132 - }, - { - "epoch": 0.009995490756049902, - "grad_norm": 2.3196854177489015, - "learning_rate": 3.2648772865273986e-06, - "loss": 0.9942, - "step": 133 - }, - { - "epoch": 0.010070644821884864, - "grad_norm": 2.1204445198355084, - "learning_rate": 3.269878181831217e-06, - "loss": 1.0595, - "step": 134 - }, - { - "epoch": 0.010145798887719826, - "grad_norm": 2.3289766738005198, - "learning_rate": 3.274841895413471e-06, - "loss": 1.1791, - "step": 135 - }, - { - "epoch": 0.010220952953554787, - "grad_norm": 1.9112773951680895, - "learning_rate": 3.279768976089387e-06, - "loss": 1.1193, - "step": 136 - }, - { - "epoch": 0.010296107019389749, - "grad_norm": 1.7642749587481872, - "learning_rate": 3.2846599606121004e-06, - "loss": 1.0484, - "step": 137 - }, - { - "epoch": 0.01037126108522471, - "grad_norm": 1.0019792400920182, - "learning_rate": 3.289515374023561e-06, - "loss": 0.8875, - "step": 138 - }, - { - "epoch": 0.010446415151059672, - "grad_norm": 1.690720134885921, - "learning_rate": 3.2943357299927686e-06, - "loss": 1.0966, - "step": 139 - }, - { - "epoch": 0.010521569216894634, - "grad_norm": 4.139089853981904, - "learning_rate": 3.2991215311418867e-06, - "loss": 1.0456, - "step": 140 - }, - { - "epoch": 0.010596723282729596, - "grad_norm": 3.1268753528156346, - "learning_rate": 3.30387326936075e-06, - "loss": 1.0806, - "step": 141 - }, - { - "epoch": 0.010671877348564557, - "grad_norm": 1.4805493496416264, - "learning_rate": 3.308591426110257e-06, - "loss": 1.0486, - "step": 142 - }, - { - "epoch": 0.010747031414399519, - "grad_norm": 1.8277566000623977, - "learning_rate": 3.3132764727151197e-06, - "loss": 1.0203, - "step": 143 - }, - { - "epoch": 0.01082218548023448, - "grad_norm": 2.878248121380431, - "learning_rate": 3.317928870646412e-06, - "loss": 1.1111, - "step": 144 - }, - { - "epoch": 0.010897339546069442, - "grad_norm": 1.9476065839363546, - "learning_rate": 3.3225490717943362e-06, - "loss": 1.1308, - "step": 145 - }, - { - "epoch": 0.010972493611904404, - "grad_norm": 1.625847228808962, - "learning_rate": 3.327137518731624e-06, - "loss": 1.1357, - "step": 146 - }, - { - "epoch": 0.011047647677739365, - "grad_norm": 2.4147742743665668, - "learning_rate": 3.3316946449679425e-06, - "loss": 1.1238, - "step": 147 - }, - { - "epoch": 0.011122801743574327, - "grad_norm": 1.9731345626024168, - "learning_rate": 3.336220875195677e-06, - "loss": 1.1835, - "step": 148 - }, - { - "epoch": 0.011197955809409289, - "grad_norm": 1.8663156940116952, - "learning_rate": 3.3407166255274344e-06, - "loss": 1.1057, - "step": 149 - }, - { - "epoch": 0.01127310987524425, - "grad_norm": 1.6230514287597821, - "learning_rate": 3.345182303725614e-06, - "loss": 1.0232, - "step": 150 - }, - { - "epoch": 0.011348263941079212, - "grad_norm": 1.4955580427052444, - "learning_rate": 3.3496183094243384e-06, - "loss": 1.1147, - "step": 151 - }, - { - "epoch": 0.011423418006914174, - "grad_norm": 2.199853293989325, - "learning_rate": 3.3540250343440664e-06, - "loss": 1.1623, - "step": 152 - }, - { - "epoch": 0.011498572072749135, - "grad_norm": 2.597445325964439, - "learning_rate": 3.35840286249917e-06, - "loss": 1.19, - "step": 153 - }, - { - "epoch": 0.011573726138584097, - "grad_norm": 1.5146992796008358, - "learning_rate": 3.3627521703987477e-06, - "loss": 1.138, - "step": 154 - }, - { - "epoch": 0.011648880204419058, - "grad_norm": 1.9387680334239983, - "learning_rate": 3.367073327240937e-06, - "loss": 1.0596, - "step": 155 - }, - { - "epoch": 0.01172403427025402, - "grad_norm": 2.0242404596481522, - "learning_rate": 3.3713666951009833e-06, - "loss": 1.1196, - "step": 156 - }, - { - "epoch": 0.011799188336088982, - "grad_norm": 1.8826434427554382, - "learning_rate": 3.375632629113298e-06, - "loss": 1.0953, - "step": 157 - }, - { - "epoch": 0.011874342401923943, - "grad_norm": 1.8235559995403137, - "learning_rate": 3.3798714776477344e-06, - "loss": 1.1057, - "step": 158 - }, - { - "epoch": 0.011949496467758905, - "grad_norm": 1.8712484339603657, - "learning_rate": 3.3840835824803065e-06, - "loss": 1.1486, - "step": 159 - }, - { - "epoch": 0.012024650533593867, - "grad_norm": 1.922684909772807, - "learning_rate": 3.388269278958555e-06, - "loss": 0.9706, - "step": 160 - }, - { - "epoch": 0.01209980459942883, - "grad_norm": 0.9190038486043147, - "learning_rate": 3.3924288961617605e-06, - "loss": 0.8694, - "step": 161 - }, - { - "epoch": 0.012174958665263792, - "grad_norm": 2.6246098728011975, - "learning_rate": 3.3965627570561953e-06, - "loss": 1.0006, - "step": 162 - }, - { - "epoch": 0.012250112731098753, - "grad_norm": 2.480187203355865, - "learning_rate": 3.4006711786456036e-06, - "loss": 1.054, - "step": 163 - }, - { - "epoch": 0.012325266796933715, - "grad_norm": 1.9069619647125187, - "learning_rate": 3.404754472117073e-06, - "loss": 1.0605, - "step": 164 - }, - { - "epoch": 0.012400420862768676, - "grad_norm": 2.4368619817409027, - "learning_rate": 3.408812942982475e-06, - "loss": 1.1122, - "step": 165 - }, - { - "epoch": 0.012475574928603638, - "grad_norm": 5.812584332910557, - "learning_rate": 3.4128468912156357e-06, - "loss": 1.1156, - "step": 166 - }, - { - "epoch": 0.0125507289944386, - "grad_norm": 2.1306374886265083, - "learning_rate": 3.4168566113853806e-06, - "loss": 1.019, - "step": 167 - }, - { - "epoch": 0.012625883060273561, - "grad_norm": 2.4426221581544953, - "learning_rate": 3.420842392784611e-06, - "loss": 1.0799, - "step": 168 - }, - { - "epoch": 0.012701037126108523, - "grad_norm": 1.96109257423038, - "learning_rate": 3.424804519555555e-06, - "loss": 1.0574, - "step": 169 - }, - { - "epoch": 0.012776191191943485, - "grad_norm": 1.7035925687064901, - "learning_rate": 3.4287432708113135e-06, - "loss": 1.1223, - "step": 170 - }, - { - "epoch": 0.012851345257778446, - "grad_norm": 1.5397348242505946, - "learning_rate": 3.4326589207538503e-06, - "loss": 1.0507, - "step": 171 - }, - { - "epoch": 0.012926499323613408, - "grad_norm": 2.0619491072626315, - "learning_rate": 3.436551738788537e-06, - "loss": 0.9284, - "step": 172 - }, - { - "epoch": 0.01300165338944837, - "grad_norm": 2.8835367259647993, - "learning_rate": 3.440421989635386e-06, - "loss": 1.1311, - "step": 173 - }, - { - "epoch": 0.013076807455283331, - "grad_norm": 1.5360621140805468, - "learning_rate": 3.44426993343706e-06, - "loss": 1.1177, - "step": 174 - }, - { - "epoch": 0.013151961521118293, - "grad_norm": 2.7945557253829287, - "learning_rate": 3.4480958258638136e-06, - "loss": 1.1016, - "step": 175 - }, - { - "epoch": 0.013227115586953254, - "grad_norm": 3.970424509101099, - "learning_rate": 3.4518999182154156e-06, - "loss": 1.0868, - "step": 176 - }, - { - "epoch": 0.013302269652788216, - "grad_norm": 1.4478811172768453, - "learning_rate": 3.4556824575202087e-06, - "loss": 1.0982, - "step": 177 - }, - { - "epoch": 0.013377423718623178, - "grad_norm": 2.116032922542716, - "learning_rate": 3.4594436866313616e-06, - "loss": 1.0881, - "step": 178 - }, - { - "epoch": 0.01345257778445814, - "grad_norm": 2.6088159384845673, - "learning_rate": 3.463183844320436e-06, - "loss": 1.1314, - "step": 179 - }, - { - "epoch": 0.013527731850293101, - "grad_norm": 2.665228803735887, - "learning_rate": 3.4669031653683388e-06, - "loss": 0.9712, - "step": 180 - }, - { - "epoch": 0.013602885916128063, - "grad_norm": 1.542136902447741, - "learning_rate": 3.4706018806537624e-06, - "loss": 1.1424, - "step": 181 - }, - { - "epoch": 0.013678039981963024, - "grad_norm": 1.7602656899744276, - "learning_rate": 3.4742802172391827e-06, - "loss": 1.0283, - "step": 182 - }, - { - "epoch": 0.013753194047797986, - "grad_norm": 1.7757619672971097, - "learning_rate": 3.4779383984545055e-06, - "loss": 1.1934, - "step": 183 - }, - { - "epoch": 0.013828348113632948, - "grad_norm": 2.127716459924063, - "learning_rate": 3.481576643978429e-06, - "loss": 1.0334, - "step": 184 - }, - { - "epoch": 0.01390350217946791, - "grad_norm": 1.6072015993796747, - "learning_rate": 3.485195169917603e-06, - "loss": 1.0246, - "step": 185 - }, - { - "epoch": 0.01397865624530287, - "grad_norm": 1.9194440897186633, - "learning_rate": 3.4887941888836612e-06, - "loss": 1.0706, - "step": 186 - }, - { - "epoch": 0.014053810311137832, - "grad_norm": 1.845285196772569, - "learning_rate": 3.4923739100681745e-06, - "loss": 1.0426, - "step": 187 - }, - { - "epoch": 0.014128964376972794, - "grad_norm": 1.7967772084038145, - "learning_rate": 3.4959345393156175e-06, - "loss": 1.1329, - "step": 188 - }, - { - "epoch": 0.014204118442807756, - "grad_norm": 4.783087283905041, - "learning_rate": 3.4994762791943946e-06, - "loss": 1.0861, - "step": 189 - }, - { - "epoch": 0.014279272508642717, - "grad_norm": 2.318848118929597, - "learning_rate": 3.502999329065993e-06, - "loss": 1.0442, - "step": 190 - }, - { - "epoch": 0.014354426574477679, - "grad_norm": 2.3145783853234785, - "learning_rate": 3.506503885152319e-06, - "loss": 0.9882, - "step": 191 - }, - { - "epoch": 0.01442958064031264, - "grad_norm": 2.1885187229247185, - "learning_rate": 3.5099901406012796e-06, - "loss": 1.15, - "step": 192 - }, - { - "epoch": 0.014504734706147602, - "grad_norm": 2.1770312897760147, - "learning_rate": 3.513458285550655e-06, - "loss": 1.1009, - "step": 193 - }, - { - "epoch": 0.014579888771982564, - "grad_norm": 1.6757743497408564, - "learning_rate": 3.516908507190329e-06, - "loss": 1.023, - "step": 194 - }, - { - "epoch": 0.014655042837817525, - "grad_norm": 2.518687782749711, - "learning_rate": 3.5203409898229102e-06, - "loss": 1.0276, - "step": 195 - }, - { - "epoch": 0.014730196903652487, - "grad_norm": 2.30422958738795, - "learning_rate": 3.52375591492281e-06, - "loss": 1.043, - "step": 196 - }, - { - "epoch": 0.014805350969487449, - "grad_norm": 0.8203522808783303, - "learning_rate": 3.527153461193815e-06, - "loss": 0.8988, - "step": 197 - }, - { - "epoch": 0.01488050503532241, - "grad_norm": 1.8823869786270535, - "learning_rate": 3.5305338046251994e-06, - "loss": 1.0904, - "step": 198 - }, - { - "epoch": 0.014955659101157372, - "grad_norm": 1.7506284947549406, - "learning_rate": 3.533897118546427e-06, - "loss": 1.1824, - "step": 199 - }, - { - "epoch": 0.015030813166992334, - "grad_norm": 1.898903557982585, - "learning_rate": 3.5372435736804815e-06, - "loss": 1.1426, - "step": 200 - }, - { - "epoch": 0.015105967232827295, - "grad_norm": 1.8938003231198575, - "learning_rate": 3.5405733381958684e-06, - "loss": 1.0372, - "step": 201 - }, - { - "epoch": 0.015181121298662257, - "grad_norm": 1.6407011468563977, - "learning_rate": 3.5438865777573207e-06, - "loss": 1.1145, - "step": 202 - }, - { - "epoch": 0.015256275364497219, - "grad_norm": 1.820066443582367, - "learning_rate": 3.5471834555752594e-06, - "loss": 1.1626, - "step": 203 - }, - { - "epoch": 0.01533142943033218, - "grad_norm": 2.332461012405062, - "learning_rate": 3.5504641324540377e-06, - "loss": 1.1489, - "step": 204 - }, - { - "epoch": 0.015406583496167142, - "grad_norm": 1.5759012398830679, - "learning_rate": 3.5537287668389996e-06, - "loss": 1.0808, - "step": 205 - }, - { - "epoch": 0.015481737562002103, - "grad_norm": 1.664092945662101, - "learning_rate": 3.5569775148623998e-06, - "loss": 1.0845, - "step": 206 - }, - { - "epoch": 0.015556891627837067, - "grad_norm": 3.246811696703037, - "learning_rate": 3.5602105303882114e-06, - "loss": 1.0256, - "step": 207 - }, - { - "epoch": 0.015632045693672027, - "grad_norm": 2.4430756987415463, - "learning_rate": 3.563427965055851e-06, - "loss": 1.0377, - "step": 208 - }, - { - "epoch": 0.01570719975950699, - "grad_norm": 1.539659800542594, - "learning_rate": 3.566629968322854e-06, - "loss": 1.1123, - "step": 209 - }, - { - "epoch": 0.01578235382534195, - "grad_norm": 1.469864361661973, - "learning_rate": 3.5698166875065377e-06, - "loss": 1.0127, - "step": 210 - }, - { - "epoch": 0.015857507891176913, - "grad_norm": 1.6128327344287736, - "learning_rate": 3.5729882678246694e-06, - "loss": 1.0982, - "step": 211 - }, - { - "epoch": 0.015932661957011873, - "grad_norm": 1.884057610702055, - "learning_rate": 3.5761448524351738e-06, - "loss": 0.914, - "step": 212 - }, - { - "epoch": 0.016007816022846837, - "grad_norm": 1.5767800671535424, - "learning_rate": 3.579286582474908e-06, - "loss": 1.1374, - "step": 213 - }, - { - "epoch": 0.016082970088681797, - "grad_norm": 2.0846416434378297, - "learning_rate": 3.582413597097526e-06, - "loss": 1.0611, - "step": 214 - }, - { - "epoch": 0.01615812415451676, - "grad_norm": 2.462951126989078, - "learning_rate": 3.5855260335104637e-06, - "loss": 1.1144, - "step": 215 - }, - { - "epoch": 0.01623327822035172, - "grad_norm": 1.938130429975917, - "learning_rate": 3.588624027011063e-06, - "loss": 1.0811, - "step": 216 - }, - { - "epoch": 0.016308432286186683, - "grad_norm": 1.1173601884023459, - "learning_rate": 3.5917077110218606e-06, - "loss": 0.9977, - "step": 217 - }, - { - "epoch": 0.016383586352021643, - "grad_norm": 2.1389928010852404, - "learning_rate": 3.5947772171250713e-06, - "loss": 1.0804, - "step": 218 - }, - { - "epoch": 0.016458740417856606, - "grad_norm": 2.0652826207245334, - "learning_rate": 3.597832675096275e-06, - "loss": 1.0882, - "step": 219 - }, - { - "epoch": 0.016533894483691566, - "grad_norm": 1.673353308118977, - "learning_rate": 3.600874212937343e-06, - "loss": 1.0719, - "step": 220 - }, - { - "epoch": 0.01660904854952653, - "grad_norm": 1.5832968088588528, - "learning_rate": 3.603901956908609e-06, - "loss": 1.0512, - "step": 221 - }, - { - "epoch": 0.01668420261536149, - "grad_norm": 1.7140381469665698, - "learning_rate": 3.6069160315603275e-06, - "loss": 1.1442, - "step": 222 - }, - { - "epoch": 0.016759356681196453, - "grad_norm": 1.5425412901757194, - "learning_rate": 3.6099165597634083e-06, - "loss": 1.1317, - "step": 223 - }, - { - "epoch": 0.016834510747031413, - "grad_norm": 1.8009195336725363, - "learning_rate": 3.6129036627394785e-06, - "loss": 1.1171, - "step": 224 - }, - { - "epoch": 0.016909664812866376, - "grad_norm": 2.1454093538941157, - "learning_rate": 3.615877460090265e-06, - "loss": 1.0289, - "step": 225 - }, - { - "epoch": 0.016984818878701336, - "grad_norm": 2.1745164574872864, - "learning_rate": 3.6188380698263314e-06, - "loss": 1.1263, - "step": 226 - }, - { - "epoch": 0.0170599729445363, - "grad_norm": 1.5778331675604778, - "learning_rate": 3.6217856083951765e-06, - "loss": 1.1357, - "step": 227 - }, - { - "epoch": 0.01713512701037126, - "grad_norm": 1.7975520972924453, - "learning_rate": 3.6247201907087175e-06, - "loss": 1.0921, - "step": 228 - }, - { - "epoch": 0.017210281076206223, - "grad_norm": 1.6556261435159527, - "learning_rate": 3.627641930170173e-06, - "loss": 1.1342, - "step": 229 - }, - { - "epoch": 0.017285435142041186, - "grad_norm": 1.5340833336228876, - "learning_rate": 3.630550938700355e-06, - "loss": 1.0676, - "step": 230 - }, - { - "epoch": 0.017360589207876146, - "grad_norm": 1.631924474427164, - "learning_rate": 3.6334473267633984e-06, - "loss": 1.0003, - "step": 231 - }, - { - "epoch": 0.01743574327371111, - "grad_norm": 1.9177551150551497, - "learning_rate": 3.6363312033919277e-06, - "loss": 1.0893, - "step": 232 - }, - { - "epoch": 0.01751089733954607, - "grad_norm": 1.9107235805147886, - "learning_rate": 3.639202676211685e-06, - "loss": 1.2072, - "step": 233 - }, - { - "epoch": 0.017586051405381033, - "grad_norm": 1.7493272578194028, - "learning_rate": 3.642061851465635e-06, - "loss": 1.1226, - "step": 234 - }, - { - "epoch": 0.017661205471215993, - "grad_norm": 1.503098783839403, - "learning_rate": 3.644908834037544e-06, - "loss": 1.0557, - "step": 235 - }, - { - "epoch": 0.017736359537050956, - "grad_norm": 1.7921441203473851, - "learning_rate": 3.647743727475077e-06, - "loss": 1.1443, - "step": 236 - }, - { - "epoch": 0.017811513602885916, - "grad_norm": 1.7693531207770308, - "learning_rate": 3.650566634012385e-06, - "loss": 1.127, - "step": 237 - }, - { - "epoch": 0.01788666766872088, - "grad_norm": 1.618221965667775, - "learning_rate": 3.653377654592237e-06, - "loss": 1.0812, - "step": 238 - }, - { - "epoch": 0.01796182173455584, - "grad_norm": 5.33917504499832, - "learning_rate": 3.6561768888876717e-06, - "loss": 1.0664, - "step": 239 - }, - { - "epoch": 0.018036975800390802, - "grad_norm": 1.6831991702458473, - "learning_rate": 3.658964435323206e-06, - "loss": 1.0212, - "step": 240 - }, - { - "epoch": 0.018112129866225762, - "grad_norm": 1.7707726038074854, - "learning_rate": 3.6617403910956026e-06, - "loss": 1.0889, - "step": 241 - }, - { - "epoch": 0.018187283932060726, - "grad_norm": 1.4644424092126371, - "learning_rate": 3.6645048521942035e-06, - "loss": 1.1205, - "step": 242 - }, - { - "epoch": 0.018262437997895686, - "grad_norm": 1.4313908836623437, - "learning_rate": 3.667257913420846e-06, - "loss": 1.1053, - "step": 243 - }, - { - "epoch": 0.01833759206373065, - "grad_norm": 1.69512411028893, - "learning_rate": 3.6699996684093732e-06, - "loss": 1.0573, - "step": 244 - }, - { - "epoch": 0.01841274612956561, - "grad_norm": 2.512253106735515, - "learning_rate": 3.6727302096447376e-06, - "loss": 1.1433, - "step": 245 - }, - { - "epoch": 0.018487900195400572, - "grad_norm": 1.7529892595204744, - "learning_rate": 3.6754496284817233e-06, - "loss": 1.1632, - "step": 246 - }, - { - "epoch": 0.018563054261235532, - "grad_norm": 1.7925694505120457, - "learning_rate": 3.678158015163289e-06, - "loss": 1.0594, - "step": 247 - }, - { - "epoch": 0.018638208327070496, - "grad_norm": 2.06973281518147, - "learning_rate": 3.680855458838529e-06, - "loss": 1.0693, - "step": 248 - }, - { - "epoch": 0.018713362392905455, - "grad_norm": 2.0418626989596014, - "learning_rate": 3.6835420475802863e-06, - "loss": 1.0838, - "step": 249 - }, - { - "epoch": 0.01878851645874042, - "grad_norm": 1.8332928378516722, - "learning_rate": 3.686217868402408e-06, - "loss": 1.0107, - "step": 250 - }, - { - "epoch": 0.01886367052457538, - "grad_norm": 2.2569529867418137, - "learning_rate": 3.688883007276652e-06, - "loss": 1.0969, - "step": 251 - }, - { - "epoch": 0.018938824590410342, - "grad_norm": 1.6379579209545865, - "learning_rate": 3.6915375491492623e-06, - "loss": 1.0278, - "step": 252 - }, - { - "epoch": 0.019013978656245302, - "grad_norm": 1.4033598977215214, - "learning_rate": 3.694181577957216e-06, - "loss": 1.1124, - "step": 253 - }, - { - "epoch": 0.019089132722080265, - "grad_norm": 1.8221425038373842, - "learning_rate": 3.6968151766441486e-06, - "loss": 1.0547, - "step": 254 - }, - { - "epoch": 0.019164286787915225, - "grad_norm": 1.6932879898177928, - "learning_rate": 3.699438427175964e-06, - "loss": 1.1252, - "step": 255 - }, - { - "epoch": 0.01923944085375019, - "grad_norm": 1.7764710683788547, - "learning_rate": 3.702051410556147e-06, - "loss": 1.094, - "step": 256 - }, - { - "epoch": 0.01931459491958515, - "grad_norm": 1.8403693418295997, - "learning_rate": 3.7046542068407645e-06, - "loss": 0.9172, - "step": 257 - }, - { - "epoch": 0.019389748985420112, - "grad_norm": 1.808784820811994, - "learning_rate": 3.7072468951531883e-06, - "loss": 1.1524, - "step": 258 - }, - { - "epoch": 0.019464903051255072, - "grad_norm": 2.2239950803866964, - "learning_rate": 3.7098295536985265e-06, - "loss": 1.0448, - "step": 259 - }, - { - "epoch": 0.019540057117090035, - "grad_norm": 2.098464554351672, - "learning_rate": 3.7124022597777775e-06, - "loss": 1.0072, - "step": 260 - }, - { - "epoch": 0.019615211182924995, - "grad_norm": 2.119153113262454, - "learning_rate": 3.7149650898017115e-06, - "loss": 1.0923, - "step": 261 - }, - { - "epoch": 0.01969036524875996, - "grad_norm": 2.32352664163386, - "learning_rate": 3.717518119304489e-06, - "loss": 1.157, - "step": 262 - }, - { - "epoch": 0.01976551931459492, - "grad_norm": 1.888046587187407, - "learning_rate": 3.7200614229570204e-06, - "loss": 1.0065, - "step": 263 - }, - { - "epoch": 0.01984067338042988, - "grad_norm": 1.8736492377755878, - "learning_rate": 3.7225950745800667e-06, - "loss": 1.0922, - "step": 264 - }, - { - "epoch": 0.01991582744626484, - "grad_norm": 2.1684983165154796, - "learning_rate": 3.7251191471571003e-06, - "loss": 1.0736, - "step": 265 - }, - { - "epoch": 0.019990981512099805, - "grad_norm": 1.5576550709293973, - "learning_rate": 3.7276337128469165e-06, - "loss": 1.0505, - "step": 266 - }, - { - "epoch": 0.020066135577934765, - "grad_norm": 3.045228170731449, - "learning_rate": 3.730138842996013e-06, - "loss": 1.0912, - "step": 267 - }, - { - "epoch": 0.020141289643769728, - "grad_norm": 1.845708227402835, - "learning_rate": 3.7326346081507353e-06, - "loss": 1.086, - "step": 268 - }, - { - "epoch": 0.020216443709604688, - "grad_norm": 1.4238962751072861, - "learning_rate": 3.7351210780691997e-06, - "loss": 1.0822, - "step": 269 - }, - { - "epoch": 0.02029159777543965, - "grad_norm": 2.5242676683253182, - "learning_rate": 3.7375983217329894e-06, - "loss": 1.0147, - "step": 270 - }, - { - "epoch": 0.02036675184127461, - "grad_norm": 2.850309917263504, - "learning_rate": 3.7400664073586386e-06, - "loss": 1.0003, - "step": 271 - }, - { - "epoch": 0.020441905907109575, - "grad_norm": 2.087332269931272, - "learning_rate": 3.7425254024089058e-06, - "loss": 1.059, - "step": 272 - }, - { - "epoch": 0.020517059972944535, - "grad_norm": 1.9919275450800495, - "learning_rate": 3.7449753736038338e-06, - "loss": 1.1257, - "step": 273 - }, - { - "epoch": 0.020592214038779498, - "grad_norm": 1.7502994281190092, - "learning_rate": 3.7474163869316188e-06, - "loss": 1.179, - "step": 274 - }, - { - "epoch": 0.020667368104614458, - "grad_norm": 1.865551283512572, - "learning_rate": 3.749848507659269e-06, - "loss": 1.161, - "step": 275 - }, - { - "epoch": 0.02074252217044942, - "grad_norm": 2.3705834934775742, - "learning_rate": 3.752271800343079e-06, - "loss": 1.0816, - "step": 276 - }, - { - "epoch": 0.020817676236284385, - "grad_norm": 2.1703154089450223, - "learning_rate": 3.7546863288389105e-06, - "loss": 1.0451, - "step": 277 - }, - { - "epoch": 0.020892830302119345, - "grad_norm": 2.511810633282434, - "learning_rate": 3.757092156312287e-06, - "loss": 0.9654, - "step": 278 - }, - { - "epoch": 0.020967984367954308, - "grad_norm": 1.5777371373710807, - "learning_rate": 3.759489345248312e-06, - "loss": 1.0618, - "step": 279 - }, - { - "epoch": 0.021043138433789268, - "grad_norm": 1.9113249840506905, - "learning_rate": 3.7618779574614046e-06, - "loss": 1.035, - "step": 280 - }, - { - "epoch": 0.02111829249962423, - "grad_norm": 1.9540106024439838, - "learning_rate": 3.7642580541048654e-06, - "loss": 1.0515, - "step": 281 - }, - { - "epoch": 0.02119344656545919, - "grad_norm": 2.132877670242596, - "learning_rate": 3.766629695680268e-06, - "loss": 1.143, - "step": 282 - }, - { - "epoch": 0.021268600631294154, - "grad_norm": 0.8878687656596707, - "learning_rate": 3.7689929420466896e-06, - "loss": 0.8472, - "step": 283 - }, - { - "epoch": 0.021343754697129114, - "grad_norm": 0.7525011319883836, - "learning_rate": 3.7713478524297754e-06, - "loss": 0.832, - "step": 284 - }, - { - "epoch": 0.021418908762964078, - "grad_norm": 2.813795511902081, - "learning_rate": 3.7736944854306444e-06, - "loss": 1.165, - "step": 285 - }, - { - "epoch": 0.021494062828799038, - "grad_norm": 2.1353027561478486, - "learning_rate": 3.7760328990346385e-06, - "loss": 1.0944, - "step": 286 - }, - { - "epoch": 0.021569216894634, - "grad_norm": 1.888228292357857, - "learning_rate": 3.778363150619923e-06, - "loss": 1.0381, - "step": 287 - }, - { - "epoch": 0.02164437096046896, - "grad_norm": 2.0204735924029436, - "learning_rate": 3.78068529696593e-06, - "loss": 0.9284, - "step": 288 - }, - { - "epoch": 0.021719525026303924, - "grad_norm": 2.0831512244422545, - "learning_rate": 3.782999394261664e-06, - "loss": 1.1171, - "step": 289 - }, - { - "epoch": 0.021794679092138884, - "grad_norm": 2.3713965937416646, - "learning_rate": 3.785305498113854e-06, - "loss": 1.0919, - "step": 290 - }, - { - "epoch": 0.021869833157973848, - "grad_norm": 1.6095954966186334, - "learning_rate": 3.78760366355498e-06, - "loss": 1.1292, - "step": 291 - }, - { - "epoch": 0.021944987223808807, - "grad_norm": 6.667481595140193, - "learning_rate": 3.789893945051143e-06, - "loss": 1.0208, - "step": 292 - }, - { - "epoch": 0.02202014128964377, - "grad_norm": 1.9728911911455251, - "learning_rate": 3.792176396509817e-06, - "loss": 1.0671, - "step": 293 - }, - { - "epoch": 0.02209529535547873, - "grad_norm": 1.7076033761719145, - "learning_rate": 3.7944510712874613e-06, - "loss": 1.0913, - "step": 294 - }, - { - "epoch": 0.022170449421313694, - "grad_norm": 1.8692119907638634, - "learning_rate": 3.796718022197003e-06, - "loss": 1.0335, - "step": 295 - }, - { - "epoch": 0.022245603487148654, - "grad_norm": 2.144765038592867, - "learning_rate": 3.7989773015151948e-06, - "loss": 1.1111, - "step": 296 - }, - { - "epoch": 0.022320757552983617, - "grad_norm": 1.9290921918474002, - "learning_rate": 3.80122896098985e-06, - "loss": 1.0608, - "step": 297 - }, - { - "epoch": 0.022395911618818577, - "grad_norm": 2.8573512852802465, - "learning_rate": 3.803473051846953e-06, - "loss": 1.0055, - "step": 298 - }, - { - "epoch": 0.02247106568465354, - "grad_norm": 2.0862809373539917, - "learning_rate": 3.805709624797651e-06, - "loss": 1.1869, - "step": 299 - }, - { - "epoch": 0.0225462197504885, - "grad_norm": 2.258026218529017, - "learning_rate": 3.8079387300451326e-06, - "loss": 1.0919, - "step": 300 - }, - { - "epoch": 0.022621373816323464, - "grad_norm": 0.8182743000975866, - "learning_rate": 3.8101604172913872e-06, - "loss": 0.8455, - "step": 301 - }, - { - "epoch": 0.022696527882158424, - "grad_norm": 3.179683750909568, - "learning_rate": 3.8123747357438563e-06, - "loss": 1.0116, - "step": 302 - }, - { - "epoch": 0.022771681947993387, - "grad_norm": 2.2006594883199706, - "learning_rate": 3.814581734121971e-06, - "loss": 1.0272, - "step": 303 - }, - { - "epoch": 0.022846836013828347, - "grad_norm": 1.4374686527980705, - "learning_rate": 3.816781460663585e-06, - "loss": 1.1034, - "step": 304 - }, - { - "epoch": 0.02292199007966331, - "grad_norm": 2.0245484354900634, - "learning_rate": 3.818973963131299e-06, - "loss": 1.1101, - "step": 305 - }, - { - "epoch": 0.02299714414549827, - "grad_norm": 2.824360402038491, - "learning_rate": 3.821159288818688e-06, - "loss": 1.1283, - "step": 306 - }, - { - "epoch": 0.023072298211333234, - "grad_norm": 3.789859648549097, - "learning_rate": 3.823337484556417e-06, - "loss": 1.0466, - "step": 307 - }, - { - "epoch": 0.023147452277168194, - "grad_norm": 2.1733573410642824, - "learning_rate": 3.825508596718266e-06, - "loss": 1.1509, - "step": 308 - }, - { - "epoch": 0.023222606343003157, - "grad_norm": 4.6616094071044065, - "learning_rate": 3.827672671227052e-06, - "loss": 1.0466, - "step": 309 - }, - { - "epoch": 0.023297760408838117, - "grad_norm": 1.979391915068699, - "learning_rate": 3.8298297535604554e-06, - "loss": 0.9601, - "step": 310 - }, - { - "epoch": 0.02337291447467308, - "grad_norm": 2.819531361618737, - "learning_rate": 3.831979888756763e-06, - "loss": 1.0705, - "step": 311 - }, - { - "epoch": 0.02344806854050804, - "grad_norm": 2.3447624562863316, - "learning_rate": 3.834123121420502e-06, - "loss": 1.0899, - "step": 312 - }, - { - "epoch": 0.023523222606343003, - "grad_norm": 35.08170395161041, - "learning_rate": 3.836259495727992e-06, - "loss": 1.0394, - "step": 313 - }, - { - "epoch": 0.023598376672177963, - "grad_norm": 1.9333709896739344, - "learning_rate": 3.838389055432816e-06, - "loss": 1.0813, - "step": 314 - }, - { - "epoch": 0.023673530738012927, - "grad_norm": 2.0426104433789813, - "learning_rate": 3.840511843871188e-06, - "loss": 1.1522, - "step": 315 - }, - { - "epoch": 0.023748684803847887, - "grad_norm": 2.1848029812954843, - "learning_rate": 3.842627903967253e-06, - "loss": 1.0053, - "step": 316 - }, - { - "epoch": 0.02382383886968285, - "grad_norm": 1.709146895174153, - "learning_rate": 3.844737278238285e-06, - "loss": 0.9798, - "step": 317 - }, - { - "epoch": 0.02389899293551781, - "grad_norm": 2.0117531190176994, - "learning_rate": 3.846840008799825e-06, - "loss": 1.1551, - "step": 318 - }, - { - "epoch": 0.023974147001352773, - "grad_norm": 2.414743963060061, - "learning_rate": 3.848936137370715e-06, - "loss": 0.99, - "step": 319 - }, - { - "epoch": 0.024049301067187733, - "grad_norm": 0.7161579246656221, - "learning_rate": 3.851025705278073e-06, - "loss": 0.8301, - "step": 320 - }, - { - "epoch": 0.024124455133022697, - "grad_norm": 2.1717757607160975, - "learning_rate": 3.853108753462177e-06, - "loss": 1.0185, - "step": 321 - }, - { - "epoch": 0.02419960919885766, - "grad_norm": 0.755491379060669, - "learning_rate": 3.855185322481279e-06, - "loss": 0.8408, - "step": 322 - }, - { - "epoch": 0.02427476326469262, - "grad_norm": 2.0153778870974826, - "learning_rate": 3.857255452516343e-06, - "loss": 1.0784, - "step": 323 - }, - { - "epoch": 0.024349917330527583, - "grad_norm": 3.3009603779057857, - "learning_rate": 3.859319183375714e-06, - "loss": 1.0637, - "step": 324 - }, - { - "epoch": 0.024425071396362543, - "grad_norm": 1.9804681480752906, - "learning_rate": 3.861376554499704e-06, - "loss": 1.1505, - "step": 325 - }, - { - "epoch": 0.024500225462197506, - "grad_norm": 2.0540262180724005, - "learning_rate": 3.863427604965122e-06, - "loss": 1.0752, - "step": 326 - }, - { - "epoch": 0.024575379528032466, - "grad_norm": 2.0410000887192776, - "learning_rate": 3.865472373489722e-06, - "loss": 1.0325, - "step": 327 - }, - { - "epoch": 0.02465053359386743, - "grad_norm": 2.306684036556987, - "learning_rate": 3.8675108984365914e-06, - "loss": 1.0982, - "step": 328 - }, - { - "epoch": 0.02472568765970239, - "grad_norm": 1.7637471027742486, - "learning_rate": 3.869543217818467e-06, - "loss": 1.0414, - "step": 329 - }, - { - "epoch": 0.024800841725537353, - "grad_norm": 1.809552723222158, - "learning_rate": 3.871569369301993e-06, - "loss": 1.1076, - "step": 330 - }, - { - "epoch": 0.024875995791372313, - "grad_norm": 2.0602942723203177, - "learning_rate": 3.873589390211904e-06, - "loss": 1.0828, - "step": 331 - }, - { - "epoch": 0.024951149857207276, - "grad_norm": 2.133504343132567, - "learning_rate": 3.875603317535154e-06, - "loss": 1.0379, - "step": 332 - }, - { - "epoch": 0.025026303923042236, - "grad_norm": 1.832040880255206, - "learning_rate": 3.877611187924979e-06, - "loss": 1.1357, - "step": 333 - }, - { - "epoch": 0.0251014579888772, - "grad_norm": 1.8635860809554736, - "learning_rate": 3.879613037704899e-06, - "loss": 0.9661, - "step": 334 - }, - { - "epoch": 0.02517661205471216, - "grad_norm": 1.8999328375540507, - "learning_rate": 3.881608902872662e-06, - "loss": 0.9705, - "step": 335 - }, - { - "epoch": 0.025251766120547123, - "grad_norm": 12.524496870157536, - "learning_rate": 3.88359881910413e-06, - "loss": 1.0964, - "step": 336 - }, - { - "epoch": 0.025326920186382083, - "grad_norm": 3.575935593470937, - "learning_rate": 3.885582821757098e-06, - "loss": 0.9215, - "step": 337 - }, - { - "epoch": 0.025402074252217046, - "grad_norm": 2.356236068724876, - "learning_rate": 3.887560945875073e-06, - "loss": 1.0772, - "step": 338 - }, - { - "epoch": 0.025477228318052006, - "grad_norm": 0.7343214705417113, - "learning_rate": 3.889533226190982e-06, - "loss": 0.8098, - "step": 339 - }, - { - "epoch": 0.02555238238388697, - "grad_norm": 1.8927919564243096, - "learning_rate": 3.891499697130832e-06, - "loss": 1.0101, - "step": 340 - }, - { - "epoch": 0.02562753644972193, - "grad_norm": 2.4518995200474043, - "learning_rate": 3.893460392817316e-06, - "loss": 1.1322, - "step": 341 - }, - { - "epoch": 0.025702690515556893, - "grad_norm": 1.7664362301279497, - "learning_rate": 3.895415347073368e-06, - "loss": 1.0687, - "step": 342 - }, - { - "epoch": 0.025777844581391852, - "grad_norm": 3.8737877210791245, - "learning_rate": 3.89736459342566e-06, - "loss": 1.0427, - "step": 343 - }, - { - "epoch": 0.025852998647226816, - "grad_norm": 2.0484452376230435, - "learning_rate": 3.899308165108055e-06, - "loss": 1.1685, - "step": 344 - }, - { - "epoch": 0.025928152713061776, - "grad_norm": 1.9582557121747755, - "learning_rate": 3.9012460950650064e-06, - "loss": 1.0596, - "step": 345 - }, - { - "epoch": 0.02600330677889674, - "grad_norm": 1.7852003229823452, - "learning_rate": 3.903178415954904e-06, - "loss": 1.0826, - "step": 346 - }, - { - "epoch": 0.0260784608447317, - "grad_norm": 1.6947914412704845, - "learning_rate": 3.905105160153384e-06, - "loss": 1.143, - "step": 347 - }, - { - "epoch": 0.026153614910566662, - "grad_norm": 0.9170345016320448, - "learning_rate": 3.907026359756579e-06, - "loss": 0.8668, - "step": 348 - }, - { - "epoch": 0.026228768976401622, - "grad_norm": 1.8150133924804586, - "learning_rate": 3.908942046584326e-06, - "loss": 1.0783, - "step": 349 - }, - { - "epoch": 0.026303923042236586, - "grad_norm": 2.6798034287002075, - "learning_rate": 3.910852252183332e-06, - "loss": 1.1384, - "step": 350 - }, - { - "epoch": 0.026379077108071546, - "grad_norm": 0.7855185816149596, - "learning_rate": 3.912757007830285e-06, - "loss": 0.9036, - "step": 351 - }, - { - "epoch": 0.02645423117390651, - "grad_norm": 3.1390069997465773, - "learning_rate": 3.914656344534934e-06, - "loss": 1.0858, - "step": 352 - }, - { - "epoch": 0.02652938523974147, - "grad_norm": 2.3117165140371285, - "learning_rate": 3.916550293043113e-06, - "loss": 1.0427, - "step": 353 - }, - { - "epoch": 0.026604539305576432, - "grad_norm": 1.7621221313150135, - "learning_rate": 3.9184388838397275e-06, - "loss": 1.0097, - "step": 354 - }, - { - "epoch": 0.026679693371411392, - "grad_norm": 1.4241156058907753, - "learning_rate": 3.9203221471517014e-06, - "loss": 1.0091, - "step": 355 - }, - { - "epoch": 0.026754847437246355, - "grad_norm": 1.3537462266497802, - "learning_rate": 3.92220011295088e-06, - "loss": 1.1168, - "step": 356 - }, - { - "epoch": 0.026830001503081315, - "grad_norm": 2.076568938762709, - "learning_rate": 3.924072810956888e-06, - "loss": 1.0436, - "step": 357 - }, - { - "epoch": 0.02690515556891628, - "grad_norm": 1.8065933798721154, - "learning_rate": 3.925940270639954e-06, - "loss": 1.0311, - "step": 358 - }, - { - "epoch": 0.02698030963475124, - "grad_norm": 1.6859860163116676, - "learning_rate": 3.927802521223695e-06, - "loss": 1.0363, - "step": 359 - }, - { - "epoch": 0.027055463700586202, - "grad_norm": 2.1707636782355713, - "learning_rate": 3.929659591687857e-06, - "loss": 1.0611, - "step": 360 - }, - { - "epoch": 0.027130617766421162, - "grad_norm": 2.1721628121883056, - "learning_rate": 3.931511510771023e-06, - "loss": 1.0498, - "step": 361 - }, - { - "epoch": 0.027205771832256125, - "grad_norm": 1.565956641000098, - "learning_rate": 3.93335830697328e-06, - "loss": 1.1629, - "step": 362 - }, - { - "epoch": 0.027280925898091085, - "grad_norm": 1.6272657444389014, - "learning_rate": 3.935200008558854e-06, - "loss": 1.0477, - "step": 363 - }, - { - "epoch": 0.02735607996392605, - "grad_norm": 1.720579595385877, - "learning_rate": 3.937036643558701e-06, - "loss": 0.9706, - "step": 364 - }, - { - "epoch": 0.02743123402976101, - "grad_norm": 1.7476367661162109, - "learning_rate": 3.938868239773069e-06, - "loss": 1.0311, - "step": 365 - }, - { - "epoch": 0.027506388095595972, - "grad_norm": 1.75596154990277, - "learning_rate": 3.940694824774024e-06, - "loss": 1.109, - "step": 366 - }, - { - "epoch": 0.02758154216143093, - "grad_norm": 1.7640446029309351, - "learning_rate": 3.942516425907938e-06, - "loss": 1.0313, - "step": 367 - }, - { - "epoch": 0.027656696227265895, - "grad_norm": 4.853317361078774, - "learning_rate": 3.944333070297947e-06, - "loss": 1.0717, - "step": 368 - }, - { - "epoch": 0.02773185029310086, - "grad_norm": 2.1499718198481945, - "learning_rate": 3.946144784846375e-06, - "loss": 1.021, - "step": 369 - }, - { - "epoch": 0.02780700435893582, - "grad_norm": 3.3720527778830234, - "learning_rate": 3.947951596237121e-06, - "loss": 0.9826, - "step": 370 - }, - { - "epoch": 0.02788215842477078, - "grad_norm": 2.7471517204321354, - "learning_rate": 3.949753530938024e-06, - "loss": 1.0053, - "step": 371 - }, - { - "epoch": 0.02795731249060574, - "grad_norm": 1.8008542656318172, - "learning_rate": 3.951550615203179e-06, - "loss": 1.0599, - "step": 372 - }, - { - "epoch": 0.028032466556440705, - "grad_norm": 1.8766451929375945, - "learning_rate": 3.953342875075244e-06, - "loss": 1.023, - "step": 373 - }, - { - "epoch": 0.028107620622275665, - "grad_norm": 2.593703487852447, - "learning_rate": 3.955130336387693e-06, - "loss": 0.9555, - "step": 374 - }, - { - "epoch": 0.028182774688110628, - "grad_norm": 1.9521234614733871, - "learning_rate": 3.956913024767059e-06, - "loss": 1.0562, - "step": 375 - }, - { - "epoch": 0.028257928753945588, - "grad_norm": 1.6488319395121047, - "learning_rate": 3.958690965635136e-06, - "loss": 1.0764, - "step": 376 - }, - { - "epoch": 0.02833308281978055, - "grad_norm": 2.5344388678578236, - "learning_rate": 3.96046418421115e-06, - "loss": 0.9704, - "step": 377 - }, - { - "epoch": 0.02840823688561551, - "grad_norm": 21.114226172772707, - "learning_rate": 3.962232705513913e-06, - "loss": 1.0296, - "step": 378 - }, - { - "epoch": 0.028483390951450475, - "grad_norm": 2.8280159945055905, - "learning_rate": 3.963996554363933e-06, - "loss": 1.1149, - "step": 379 - }, - { - "epoch": 0.028558545017285435, - "grad_norm": 1.759513269550556, - "learning_rate": 3.965755755385512e-06, - "loss": 1.0724, - "step": 380 - }, - { - "epoch": 0.028633699083120398, - "grad_norm": 4.408798454760352, - "learning_rate": 3.9675103330088e-06, - "loss": 0.9764, - "step": 381 - }, - { - "epoch": 0.028708853148955358, - "grad_norm": 1.6818235078582036, - "learning_rate": 3.969260311471838e-06, - "loss": 1.0771, - "step": 382 - }, - { - "epoch": 0.02878400721479032, - "grad_norm": 2.252948841927948, - "learning_rate": 3.971005714822564e-06, - "loss": 1.0507, - "step": 383 - }, - { - "epoch": 0.02885916128062528, - "grad_norm": 1.7037481165516575, - "learning_rate": 3.9727465669207975e-06, - "loss": 1.0732, - "step": 384 - }, - { - "epoch": 0.028934315346460245, - "grad_norm": 2.632977941731432, - "learning_rate": 3.974482891440193e-06, - "loss": 0.992, - "step": 385 - }, - { - "epoch": 0.029009469412295204, - "grad_norm": 1.7888378018863307, - "learning_rate": 3.976214711870174e-06, - "loss": 1.0114, - "step": 386 - }, - { - "epoch": 0.029084623478130168, - "grad_norm": 1.5815751536793439, - "learning_rate": 3.977942051517839e-06, - "loss": 1.0528, - "step": 387 - }, - { - "epoch": 0.029159777543965128, - "grad_norm": 1.5997865832569649, - "learning_rate": 3.979664933509847e-06, - "loss": 1.0546, - "step": 388 - }, - { - "epoch": 0.02923493160980009, - "grad_norm": 2.6177353015203204, - "learning_rate": 3.9813833807942695e-06, - "loss": 0.9888, - "step": 389 - }, - { - "epoch": 0.02931008567563505, - "grad_norm": 1.6384122913435988, - "learning_rate": 3.9830974161424286e-06, - "loss": 1.0634, - "step": 390 - }, - { - "epoch": 0.029385239741470014, - "grad_norm": 1.9989502058978807, - "learning_rate": 3.984807062150705e-06, - "loss": 1.0959, - "step": 391 - }, - { - "epoch": 0.029460393807304974, - "grad_norm": 2.243372906207832, - "learning_rate": 3.986512341242329e-06, - "loss": 1.0899, - "step": 392 - }, - { - "epoch": 0.029535547873139938, - "grad_norm": 8.312349475845789, - "learning_rate": 3.98821327566914e-06, - "loss": 1.0251, - "step": 393 - }, - { - "epoch": 0.029610701938974898, - "grad_norm": 2.3297226307687184, - "learning_rate": 3.989909887513334e-06, - "loss": 1.131, - "step": 394 - }, - { - "epoch": 0.02968585600480986, - "grad_norm": 3.911243756229224, - "learning_rate": 3.991602198689179e-06, - "loss": 1.0661, - "step": 395 - }, - { - "epoch": 0.02976101007064482, - "grad_norm": 1.8706030470977537, - "learning_rate": 3.993290230944718e-06, - "loss": 1.1196, - "step": 396 - }, - { - "epoch": 0.029836164136479784, - "grad_norm": 2.5960731673135036, - "learning_rate": 3.994974005863441e-06, - "loss": 1.0438, - "step": 397 - }, - { - "epoch": 0.029911318202314744, - "grad_norm": 2.671596936485749, - "learning_rate": 3.996653544865945e-06, - "loss": 0.9666, - "step": 398 - }, - { - "epoch": 0.029986472268149707, - "grad_norm": 1.7985768096022652, - "learning_rate": 3.9983288692115676e-06, - "loss": 1.0664, - "step": 399 - }, - { - "epoch": 0.030061626333984667, - "grad_norm": 1.9783487202475274, - "learning_rate": 4e-06, - "loss": 1.0866, - "step": 400 - }, - { - "epoch": 0.03013678039981963, - "grad_norm": 2.9276504962655108, - "learning_rate": 3.999999940746171e-06, - "loss": 1.1527, - "step": 401 - }, - { - "epoch": 0.03021193446565459, - "grad_norm": 1.983214752541869, - "learning_rate": 3.999999762984686e-06, - "loss": 1.1123, - "step": 402 - }, - { - "epoch": 0.030287088531489554, - "grad_norm": 2.2298795920739094, - "learning_rate": 3.9999994667155576e-06, - "loss": 1.0241, - "step": 403 - }, - { - "epoch": 0.030362242597324514, - "grad_norm": 1.6962963009659533, - "learning_rate": 3.999999051938802e-06, - "loss": 0.9726, - "step": 404 - }, - { - "epoch": 0.030437396663159477, - "grad_norm": 2.139587819343279, - "learning_rate": 3.9999985186544445e-06, - "loss": 0.9459, - "step": 405 - }, - { - "epoch": 0.030512550728994437, - "grad_norm": 2.581077411044977, - "learning_rate": 3.999997866862515e-06, - "loss": 1.067, - "step": 406 - }, - { - "epoch": 0.0305877047948294, - "grad_norm": 2.0182505222954483, - "learning_rate": 3.9999970965630544e-06, - "loss": 1.1405, - "step": 407 - }, - { - "epoch": 0.03066285886066436, - "grad_norm": 1.606056124493915, - "learning_rate": 3.9999962077561075e-06, - "loss": 1.0719, - "step": 408 - }, - { - "epoch": 0.030738012926499324, - "grad_norm": 1.8819279789253196, - "learning_rate": 3.999995200441726e-06, - "loss": 1.0107, - "step": 409 - }, - { - "epoch": 0.030813166992334284, - "grad_norm": 1.7008472575256772, - "learning_rate": 3.999994074619971e-06, - "loss": 1.1123, - "step": 410 - }, - { - "epoch": 0.030888321058169247, - "grad_norm": 1.8070608938962924, - "learning_rate": 3.999992830290909e-06, - "loss": 1.0413, - "step": 411 - }, - { - "epoch": 0.030963475124004207, - "grad_norm": 1.7970040979864046, - "learning_rate": 3.999991467454612e-06, - "loss": 1.0841, - "step": 412 - }, - { - "epoch": 0.03103862918983917, - "grad_norm": 2.128835022460981, - "learning_rate": 3.999989986111163e-06, - "loss": 1.0114, - "step": 413 - }, - { - "epoch": 0.031113783255674134, - "grad_norm": 2.255129477258013, - "learning_rate": 3.999988386260648e-06, - "loss": 1.0688, - "step": 414 - }, - { - "epoch": 0.031188937321509094, - "grad_norm": 1.8651701431798773, - "learning_rate": 3.999986667903163e-06, - "loss": 1.0058, - "step": 415 - }, - { - "epoch": 0.03126409138734405, - "grad_norm": 1.6638788829383022, - "learning_rate": 3.999984831038811e-06, - "loss": 1.0779, - "step": 416 - }, - { - "epoch": 0.03133924545317902, - "grad_norm": 2.4076994456609375, - "learning_rate": 3.999982875667697e-06, - "loss": 1.0092, - "step": 417 - }, - { - "epoch": 0.03141439951901398, - "grad_norm": 2.3431826095438444, - "learning_rate": 3.999980801789941e-06, - "loss": 1.0237, - "step": 418 - }, - { - "epoch": 0.031489553584848944, - "grad_norm": 1.6269072563968567, - "learning_rate": 3.999978609405662e-06, - "loss": 1.0302, - "step": 419 - }, - { - "epoch": 0.0315647076506839, - "grad_norm": 1.6653555560867737, - "learning_rate": 3.999976298514994e-06, - "loss": 1.0911, - "step": 420 - }, - { - "epoch": 0.03163986171651886, - "grad_norm": 1.7436369609768458, - "learning_rate": 3.999973869118071e-06, - "loss": 1.2069, - "step": 421 - }, - { - "epoch": 0.03171501578235383, - "grad_norm": 1.803909398237664, - "learning_rate": 3.999971321215038e-06, - "loss": 1.1312, - "step": 422 - }, - { - "epoch": 0.03179016984818879, - "grad_norm": 1.804501478507984, - "learning_rate": 3.999968654806046e-06, - "loss": 1.0534, - "step": 423 - }, - { - "epoch": 0.03186532391402375, - "grad_norm": 1.598017793892918, - "learning_rate": 3.999965869891253e-06, - "loss": 0.8723, - "step": 424 - }, - { - "epoch": 0.03194047797985871, - "grad_norm": 0.860175055108587, - "learning_rate": 3.999962966470823e-06, - "loss": 0.9177, - "step": 425 - }, - { - "epoch": 0.03201563204569367, - "grad_norm": 1.731327300291208, - "learning_rate": 3.9999599445449295e-06, - "loss": 1.0804, - "step": 426 - }, - { - "epoch": 0.03209078611152864, - "grad_norm": 1.482079990452187, - "learning_rate": 3.999956804113751e-06, - "loss": 1.0199, - "step": 427 - }, - { - "epoch": 0.03216594017736359, - "grad_norm": 1.6416009815584933, - "learning_rate": 3.999953545177472e-06, - "loss": 1.1257, - "step": 428 - }, - { - "epoch": 0.032241094243198556, - "grad_norm": 1.494598278690196, - "learning_rate": 3.9999501677362885e-06, - "loss": 1.0186, - "step": 429 - }, - { - "epoch": 0.03231624830903352, - "grad_norm": 1.586637821509653, - "learning_rate": 3.9999466717903995e-06, - "loss": 1.0143, - "step": 430 - }, - { - "epoch": 0.03239140237486848, - "grad_norm": 1.6769098069669413, - "learning_rate": 3.999943057340012e-06, - "loss": 1.0786, - "step": 431 - }, - { - "epoch": 0.03246655644070344, - "grad_norm": 1.7311685150470575, - "learning_rate": 3.999939324385339e-06, - "loss": 1.0612, - "step": 432 - }, - { - "epoch": 0.0325417105065384, - "grad_norm": 2.084785171711637, - "learning_rate": 3.999935472926604e-06, - "loss": 1.0353, - "step": 433 - }, - { - "epoch": 0.032616864572373366, - "grad_norm": 1.4666759375076903, - "learning_rate": 3.9999315029640325e-06, - "loss": 1.1322, - "step": 434 - }, - { - "epoch": 0.03269201863820833, - "grad_norm": 2.0517250432443848, - "learning_rate": 3.999927414497862e-06, - "loss": 1.0213, - "step": 435 - }, - { - "epoch": 0.032767172704043286, - "grad_norm": 2.1715591530328733, - "learning_rate": 3.999923207528334e-06, - "loss": 0.9936, - "step": 436 - }, - { - "epoch": 0.03284232676987825, - "grad_norm": 2.315161127534702, - "learning_rate": 3.999918882055698e-06, - "loss": 1.0506, - "step": 437 - }, - { - "epoch": 0.03291748083571321, - "grad_norm": 1.615365370174669, - "learning_rate": 3.9999144380802095e-06, - "loss": 1.1167, - "step": 438 - }, - { - "epoch": 0.032992634901548176, - "grad_norm": 1.9608743969008169, - "learning_rate": 3.999909875602132e-06, - "loss": 1.0349, - "step": 439 - }, - { - "epoch": 0.03306778896738313, - "grad_norm": 0.7695580492339399, - "learning_rate": 3.999905194621737e-06, - "loss": 0.8507, - "step": 440 - }, - { - "epoch": 0.033142943033218096, - "grad_norm": 1.9250318450275354, - "learning_rate": 3.999900395139301e-06, - "loss": 1.0649, - "step": 441 - }, - { - "epoch": 0.03321809709905306, - "grad_norm": 2.427960533355512, - "learning_rate": 3.999895477155108e-06, - "loss": 1.0183, - "step": 442 - }, - { - "epoch": 0.03329325116488802, - "grad_norm": 1.76382313836905, - "learning_rate": 3.9998904406694504e-06, - "loss": 1.1105, - "step": 443 - }, - { - "epoch": 0.03336840523072298, - "grad_norm": 1.8817090485121666, - "learning_rate": 3.999885285682626e-06, - "loss": 1.1427, - "step": 444 - }, - { - "epoch": 0.03344355929655794, - "grad_norm": 1.8454614874687434, - "learning_rate": 3.99988001219494e-06, - "loss": 1.1199, - "step": 445 - }, - { - "epoch": 0.033518713362392906, - "grad_norm": 2.06994187978338, - "learning_rate": 3.999874620206705e-06, - "loss": 1.0664, - "step": 446 - }, - { - "epoch": 0.03359386742822787, - "grad_norm": 1.6453467587402943, - "learning_rate": 3.999869109718242e-06, - "loss": 1.0442, - "step": 447 - }, - { - "epoch": 0.033669021494062826, - "grad_norm": 3.065970342474006, - "learning_rate": 3.999863480729875e-06, - "loss": 1.0487, - "step": 448 - }, - { - "epoch": 0.03374417555989779, - "grad_norm": 1.9100978975022727, - "learning_rate": 3.999857733241938e-06, - "loss": 1.0149, - "step": 449 - }, - { - "epoch": 0.03381932962573275, - "grad_norm": 1.517685737869301, - "learning_rate": 3.999851867254774e-06, - "loss": 1.0403, - "step": 450 - }, - { - "epoch": 0.033894483691567716, - "grad_norm": 1.829284756555358, - "learning_rate": 3.9998458827687286e-06, - "loss": 1.0053, - "step": 451 - }, - { - "epoch": 0.03396963775740267, - "grad_norm": 0.6734855776306719, - "learning_rate": 3.999839779784157e-06, - "loss": 0.8368, - "step": 452 - }, - { - "epoch": 0.034044791823237636, - "grad_norm": 1.7819642759696135, - "learning_rate": 3.999833558301419e-06, - "loss": 0.9961, - "step": 453 - }, - { - "epoch": 0.0341199458890726, - "grad_norm": 1.7984726810411367, - "learning_rate": 3.999827218320886e-06, - "loss": 1.0314, - "step": 454 - }, - { - "epoch": 0.03419509995490756, - "grad_norm": 0.8285597613645415, - "learning_rate": 3.999820759842933e-06, - "loss": 0.8669, - "step": 455 - }, - { - "epoch": 0.03427025402074252, - "grad_norm": 3.023148081724685, - "learning_rate": 3.999814182867941e-06, - "loss": 1.1258, - "step": 456 - }, - { - "epoch": 0.03434540808657748, - "grad_norm": 1.8617805499514082, - "learning_rate": 3.999807487396301e-06, - "loss": 1.1455, - "step": 457 - }, - { - "epoch": 0.034420562152412446, - "grad_norm": 2.5439799886672176, - "learning_rate": 3.999800673428411e-06, - "loss": 1.0029, - "step": 458 - }, - { - "epoch": 0.03449571621824741, - "grad_norm": 2.64001069456882, - "learning_rate": 3.999793740964672e-06, - "loss": 1.0619, - "step": 459 - }, - { - "epoch": 0.03457087028408237, - "grad_norm": 1.7850662308915695, - "learning_rate": 3.999786690005496e-06, - "loss": 1.0764, - "step": 460 - }, - { - "epoch": 0.03464602434991733, - "grad_norm": 2.13161840303179, - "learning_rate": 3.999779520551302e-06, - "loss": 0.9212, - "step": 461 - }, - { - "epoch": 0.03472117841575229, - "grad_norm": 0.8598070619087697, - "learning_rate": 3.9997722326025135e-06, - "loss": 0.8467, - "step": 462 - }, - { - "epoch": 0.034796332481587255, - "grad_norm": 2.76280844852188, - "learning_rate": 3.999764826159562e-06, - "loss": 1.002, - "step": 463 - }, - { - "epoch": 0.03487148654742222, - "grad_norm": 1.720997745089076, - "learning_rate": 3.999757301222887e-06, - "loss": 1.0444, - "step": 464 - }, - { - "epoch": 0.034946640613257175, - "grad_norm": 1.2797056054777927, - "learning_rate": 3.999749657792934e-06, - "loss": 1.0195, - "step": 465 - }, - { - "epoch": 0.03502179467909214, - "grad_norm": 1.6087834280197177, - "learning_rate": 3.999741895870157e-06, - "loss": 1.0178, - "step": 466 - }, - { - "epoch": 0.0350969487449271, - "grad_norm": 1.7020763003306334, - "learning_rate": 3.9997340154550145e-06, - "loss": 1.0402, - "step": 467 - }, - { - "epoch": 0.035172102810762065, - "grad_norm": 1.4332551894214582, - "learning_rate": 3.999726016547974e-06, - "loss": 1.0842, - "step": 468 - }, - { - "epoch": 0.03524725687659702, - "grad_norm": 3.2655839689314514, - "learning_rate": 3.9997178991495105e-06, - "loss": 1.0554, - "step": 469 - }, - { - "epoch": 0.035322410942431985, - "grad_norm": 1.570502426276751, - "learning_rate": 3.9997096632601035e-06, - "loss": 1.0694, - "step": 470 - }, - { - "epoch": 0.03539756500826695, - "grad_norm": 1.3980337657670914, - "learning_rate": 3.999701308880242e-06, - "loss": 1.0105, - "step": 471 - }, - { - "epoch": 0.03547271907410191, - "grad_norm": 1.8206469838379709, - "learning_rate": 3.999692836010419e-06, - "loss": 1.0825, - "step": 472 - }, - { - "epoch": 0.03554787313993687, - "grad_norm": 1.6228087396282467, - "learning_rate": 3.99968424465114e-06, - "loss": 1.1883, - "step": 473 - }, - { - "epoch": 0.03562302720577183, - "grad_norm": 1.916826704503317, - "learning_rate": 3.999675534802911e-06, - "loss": 1.0205, - "step": 474 - }, - { - "epoch": 0.035698181271606795, - "grad_norm": 1.5288337795603884, - "learning_rate": 3.99966670646625e-06, - "loss": 0.9448, - "step": 475 - }, - { - "epoch": 0.03577333533744176, - "grad_norm": 2.116129931284438, - "learning_rate": 3.999657759641679e-06, - "loss": 1.0704, - "step": 476 - }, - { - "epoch": 0.035848489403276715, - "grad_norm": 2.5877860569970483, - "learning_rate": 3.999648694329729e-06, - "loss": 1.1318, - "step": 477 - }, - { - "epoch": 0.03592364346911168, - "grad_norm": 1.2778995249746676, - "learning_rate": 3.9996395105309365e-06, - "loss": 1.0261, - "step": 478 - }, - { - "epoch": 0.03599879753494664, - "grad_norm": 1.8749457669362137, - "learning_rate": 3.999630208245846e-06, - "loss": 0.9784, - "step": 479 - }, - { - "epoch": 0.036073951600781605, - "grad_norm": 1.5970124904022298, - "learning_rate": 3.9996207874750075e-06, - "loss": 1.0804, - "step": 480 - }, - { - "epoch": 0.03614910566661656, - "grad_norm": 1.5168440330811765, - "learning_rate": 3.999611248218982e-06, - "loss": 1.0996, - "step": 481 - }, - { - "epoch": 0.036224259732451525, - "grad_norm": 2.335761124194183, - "learning_rate": 3.999601590478332e-06, - "loss": 0.9153, - "step": 482 - }, - { - "epoch": 0.03629941379828649, - "grad_norm": 1.650007178551057, - "learning_rate": 3.99959181425363e-06, - "loss": 1.1364, - "step": 483 - }, - { - "epoch": 0.03637456786412145, - "grad_norm": 2.1178864736416765, - "learning_rate": 3.999581919545458e-06, - "loss": 1.0375, - "step": 484 - }, - { - "epoch": 0.03644972192995641, - "grad_norm": 1.9517021795028555, - "learning_rate": 3.999571906354399e-06, - "loss": 1.1544, - "step": 485 - }, - { - "epoch": 0.03652487599579137, - "grad_norm": 1.6843260126297668, - "learning_rate": 3.999561774681048e-06, - "loss": 1.1042, - "step": 486 - }, - { - "epoch": 0.036600030061626335, - "grad_norm": 0.8830852256073788, - "learning_rate": 3.999551524526005e-06, - "loss": 0.948, - "step": 487 - }, - { - "epoch": 0.0366751841274613, - "grad_norm": 5.710612526556744, - "learning_rate": 3.9995411558898775e-06, - "loss": 1.015, - "step": 488 - }, - { - "epoch": 0.036750338193296254, - "grad_norm": 4.986764365036457, - "learning_rate": 3.9995306687732795e-06, - "loss": 1.032, - "step": 489 - }, - { - "epoch": 0.03682549225913122, - "grad_norm": 0.9138801588461509, - "learning_rate": 3.9995200631768326e-06, - "loss": 0.9222, - "step": 490 - }, - { - "epoch": 0.03690064632496618, - "grad_norm": 2.85562538312282, - "learning_rate": 3.999509339101166e-06, - "loss": 1.0558, - "step": 491 - }, - { - "epoch": 0.036975800390801145, - "grad_norm": 1.640532790726269, - "learning_rate": 3.999498496546914e-06, - "loss": 1.0358, - "step": 492 - }, - { - "epoch": 0.0370509544566361, - "grad_norm": 1.6885238649660284, - "learning_rate": 3.99948753551472e-06, - "loss": 1.0174, - "step": 493 - }, - { - "epoch": 0.037126108522471064, - "grad_norm": 1.6195910739476533, - "learning_rate": 3.999476456005232e-06, - "loss": 1.0921, - "step": 494 - }, - { - "epoch": 0.03720126258830603, - "grad_norm": 1.6800113099716592, - "learning_rate": 3.999465258019108e-06, - "loss": 0.9464, - "step": 495 - }, - { - "epoch": 0.03727641665414099, - "grad_norm": 1.6862768049209274, - "learning_rate": 3.999453941557011e-06, - "loss": 0.9696, - "step": 496 - }, - { - "epoch": 0.03735157071997595, - "grad_norm": 1.9239055792228714, - "learning_rate": 3.9994425066196105e-06, - "loss": 1.0623, - "step": 497 - }, - { - "epoch": 0.03742672478581091, - "grad_norm": 1.7057553898394784, - "learning_rate": 3.999430953207586e-06, - "loss": 1.0849, - "step": 498 - }, - { - "epoch": 0.037501878851645874, - "grad_norm": 1.6993926562184372, - "learning_rate": 3.999419281321621e-06, - "loss": 1.0632, - "step": 499 - }, - { - "epoch": 0.03757703291748084, - "grad_norm": 8.760651127553873, - "learning_rate": 3.999407490962408e-06, - "loss": 0.9825, - "step": 500 - }, - { - "epoch": 0.037652186983315794, - "grad_norm": 1.582825487239838, - "learning_rate": 3.999395582130644e-06, - "loss": 0.9624, - "step": 501 - }, - { - "epoch": 0.03772734104915076, - "grad_norm": 2.1971916220176944, - "learning_rate": 3.999383554827037e-06, - "loss": 1.0307, - "step": 502 - }, - { - "epoch": 0.03780249511498572, - "grad_norm": 1.8370276278607467, - "learning_rate": 3.999371409052297e-06, - "loss": 1.118, - "step": 503 - }, - { - "epoch": 0.037877649180820684, - "grad_norm": 1.9895336351295723, - "learning_rate": 3.999359144807145e-06, - "loss": 1.0908, - "step": 504 - }, - { - "epoch": 0.03795280324665565, - "grad_norm": 1.666017700050913, - "learning_rate": 3.999346762092307e-06, - "loss": 1.1459, - "step": 505 - }, - { - "epoch": 0.038027957312490604, - "grad_norm": 2.655124560521552, - "learning_rate": 3.999334260908518e-06, - "loss": 1.0297, - "step": 506 - }, - { - "epoch": 0.03810311137832557, - "grad_norm": 1.7541108893484292, - "learning_rate": 3.999321641256519e-06, - "loss": 1.0456, - "step": 507 - }, - { - "epoch": 0.03817826544416053, - "grad_norm": 1.7024516603214321, - "learning_rate": 3.999308903137056e-06, - "loss": 1.0174, - "step": 508 - }, - { - "epoch": 0.038253419509995494, - "grad_norm": 3.7204320613613637, - "learning_rate": 3.999296046550884e-06, - "loss": 1.0, - "step": 509 - }, - { - "epoch": 0.03832857357583045, - "grad_norm": 2.268500877133007, - "learning_rate": 3.999283071498766e-06, - "loss": 1.0646, - "step": 510 - }, - { - "epoch": 0.038403727641665414, - "grad_norm": 1.7283560296643454, - "learning_rate": 3.9992699779814704e-06, - "loss": 1.0535, - "step": 511 - }, - { - "epoch": 0.03847888170750038, - "grad_norm": 1.897468413742187, - "learning_rate": 3.999256765999773e-06, - "loss": 1.0229, - "step": 512 - }, - { - "epoch": 0.03855403577333534, - "grad_norm": 1.5883491107574692, - "learning_rate": 3.999243435554456e-06, - "loss": 1.0383, - "step": 513 - }, - { - "epoch": 0.0386291898391703, - "grad_norm": 2.3967677961544087, - "learning_rate": 3.999229986646311e-06, - "loss": 1.0559, - "step": 514 - }, - { - "epoch": 0.03870434390500526, - "grad_norm": 1.6534664587420382, - "learning_rate": 3.999216419276132e-06, - "loss": 1.1257, - "step": 515 - }, - { - "epoch": 0.038779497970840224, - "grad_norm": 2.2526525121068968, - "learning_rate": 3.999202733444726e-06, - "loss": 1.009, - "step": 516 - }, - { - "epoch": 0.03885465203667519, - "grad_norm": 3.1518484716832673, - "learning_rate": 3.999188929152902e-06, - "loss": 1.0832, - "step": 517 - }, - { - "epoch": 0.038929806102510144, - "grad_norm": 2.5391872903528605, - "learning_rate": 3.999175006401478e-06, - "loss": 1.0092, - "step": 518 - }, - { - "epoch": 0.03900496016834511, - "grad_norm": 1.910634673259133, - "learning_rate": 3.999160965191281e-06, - "loss": 1.039, - "step": 519 - }, - { - "epoch": 0.03908011423418007, - "grad_norm": 1.915145318615641, - "learning_rate": 3.99914680552314e-06, - "loss": 0.9903, - "step": 520 - }, - { - "epoch": 0.039155268300015034, - "grad_norm": 2.3332923022704364, - "learning_rate": 3.999132527397897e-06, - "loss": 1.1208, - "step": 521 - }, - { - "epoch": 0.03923042236584999, - "grad_norm": 1.4620393076359999, - "learning_rate": 3.999118130816395e-06, - "loss": 0.9926, - "step": 522 - }, - { - "epoch": 0.03930557643168495, - "grad_norm": 1.723938549039631, - "learning_rate": 3.999103615779489e-06, - "loss": 1.0306, - "step": 523 - }, - { - "epoch": 0.03938073049751992, - "grad_norm": 0.7049838819419838, - "learning_rate": 3.99908898228804e-06, - "loss": 0.8644, - "step": 524 - }, - { - "epoch": 0.03945588456335488, - "grad_norm": 1.714226057513222, - "learning_rate": 3.999074230342913e-06, - "loss": 1.0493, - "step": 525 - }, - { - "epoch": 0.03953103862918984, - "grad_norm": 1.67055965982253, - "learning_rate": 3.999059359944982e-06, - "loss": 0.998, - "step": 526 - }, - { - "epoch": 0.0396061926950248, - "grad_norm": 2.865002873071425, - "learning_rate": 3.99904437109513e-06, - "loss": 1.0787, - "step": 527 - }, - { - "epoch": 0.03968134676085976, - "grad_norm": 1.5565075979025902, - "learning_rate": 3.999029263794244e-06, - "loss": 1.0664, - "step": 528 - }, - { - "epoch": 0.03975650082669473, - "grad_norm": 1.8301124528822545, - "learning_rate": 3.999014038043219e-06, - "loss": 1.0517, - "step": 529 - }, - { - "epoch": 0.03983165489252968, - "grad_norm": 1.658261283510611, - "learning_rate": 3.9989986938429574e-06, - "loss": 1.1147, - "step": 530 - }, - { - "epoch": 0.03990680895836465, - "grad_norm": 2.2045221971037607, - "learning_rate": 3.9989832311943695e-06, - "loss": 1.0495, - "step": 531 - }, - { - "epoch": 0.03998196302419961, - "grad_norm": 1.5664158390350977, - "learning_rate": 3.99896765009837e-06, - "loss": 1.0066, - "step": 532 - }, - { - "epoch": 0.04005711709003457, - "grad_norm": 1.8319320163814574, - "learning_rate": 3.998951950555883e-06, - "loss": 0.9982, - "step": 533 - }, - { - "epoch": 0.04013227115586953, - "grad_norm": 1.013046983448363, - "learning_rate": 3.998936132567837e-06, - "loss": 0.8546, - "step": 534 - }, - { - "epoch": 0.04020742522170449, - "grad_norm": 2.453464848977306, - "learning_rate": 3.998920196135172e-06, - "loss": 1.0341, - "step": 535 - }, - { - "epoch": 0.040282579287539456, - "grad_norm": 1.4644402113431865, - "learning_rate": 3.998904141258831e-06, - "loss": 1.0673, - "step": 536 - }, - { - "epoch": 0.04035773335337442, - "grad_norm": 2.931532459436528, - "learning_rate": 3.9988879679397644e-06, - "loss": 1.0962, - "step": 537 - }, - { - "epoch": 0.040432887419209376, - "grad_norm": 2.7606381213528604, - "learning_rate": 3.9988716761789324e-06, - "loss": 1.0959, - "step": 538 - }, - { - "epoch": 0.04050804148504434, - "grad_norm": 2.0845261274508866, - "learning_rate": 3.998855265977299e-06, - "loss": 1.063, - "step": 539 - }, - { - "epoch": 0.0405831955508793, - "grad_norm": 1.7601529964140965, - "learning_rate": 3.998838737335837e-06, - "loss": 1.0373, - "step": 540 - }, - { - "epoch": 0.040658349616714266, - "grad_norm": 1.9902911566513652, - "learning_rate": 3.998822090255526e-06, - "loss": 1.0718, - "step": 541 - }, - { - "epoch": 0.04073350368254922, - "grad_norm": 1.9025331751453605, - "learning_rate": 3.9988053247373515e-06, - "loss": 1.0265, - "step": 542 - }, - { - "epoch": 0.040808657748384186, - "grad_norm": 1.9857765114493806, - "learning_rate": 3.998788440782309e-06, - "loss": 1.0205, - "step": 543 - }, - { - "epoch": 0.04088381181421915, - "grad_norm": 1.5973273127532075, - "learning_rate": 3.998771438391396e-06, - "loss": 1.0198, - "step": 544 - }, - { - "epoch": 0.04095896588005411, - "grad_norm": 1.9854255358513562, - "learning_rate": 3.9987543175656214e-06, - "loss": 1.012, - "step": 545 - }, - { - "epoch": 0.04103411994588907, - "grad_norm": 1.6846714332195116, - "learning_rate": 3.998737078306001e-06, - "loss": 1.1143, - "step": 546 - }, - { - "epoch": 0.04110927401172403, - "grad_norm": 1.8874572824861067, - "learning_rate": 3.998719720613554e-06, - "loss": 1.0785, - "step": 547 - }, - { - "epoch": 0.041184428077558996, - "grad_norm": 0.8897120163075101, - "learning_rate": 3.99870224448931e-06, - "loss": 0.9363, - "step": 548 - }, - { - "epoch": 0.04125958214339396, - "grad_norm": 1.995964270048332, - "learning_rate": 3.998684649934305e-06, - "loss": 1.0614, - "step": 549 - }, - { - "epoch": 0.041334736209228916, - "grad_norm": 1.5383137852069755, - "learning_rate": 3.9986669369495805e-06, - "loss": 1.036, - "step": 550 - }, - { - "epoch": 0.04140989027506388, - "grad_norm": 1.8250202648135117, - "learning_rate": 3.998649105536187e-06, - "loss": 1.065, - "step": 551 - }, - { - "epoch": 0.04148504434089884, - "grad_norm": 2.9517357197342844, - "learning_rate": 3.998631155695181e-06, - "loss": 1.0759, - "step": 552 - }, - { - "epoch": 0.041560198406733806, - "grad_norm": 2.34626675583024, - "learning_rate": 3.9986130874276244e-06, - "loss": 1.0545, - "step": 553 - }, - { - "epoch": 0.04163535247256877, - "grad_norm": 2.2645625286907176, - "learning_rate": 3.998594900734591e-06, - "loss": 1.0776, - "step": 554 - }, - { - "epoch": 0.041710506538403726, - "grad_norm": 2.7056940472367152, - "learning_rate": 3.998576595617155e-06, - "loss": 1.0034, - "step": 555 - }, - { - "epoch": 0.04178566060423869, - "grad_norm": 1.7415053738585893, - "learning_rate": 3.998558172076404e-06, - "loss": 1.1527, - "step": 556 - }, - { - "epoch": 0.04186081467007365, - "grad_norm": 2.4055252965541767, - "learning_rate": 3.998539630113427e-06, - "loss": 1.0337, - "step": 557 - }, - { - "epoch": 0.041935968735908616, - "grad_norm": 2.003328414800935, - "learning_rate": 3.998520969729325e-06, - "loss": 1.0919, - "step": 558 - }, - { - "epoch": 0.04201112280174357, - "grad_norm": 1.8251631781380684, - "learning_rate": 3.998502190925202e-06, - "loss": 1.09, - "step": 559 - }, - { - "epoch": 0.042086276867578536, - "grad_norm": 1.4761626494402773, - "learning_rate": 3.998483293702172e-06, - "loss": 1.0175, - "step": 560 - }, - { - "epoch": 0.0421614309334135, - "grad_norm": 0.7164283241523649, - "learning_rate": 3.998464278061353e-06, - "loss": 0.8515, - "step": 561 - }, - { - "epoch": 0.04223658499924846, - "grad_norm": 2.3598036218825706, - "learning_rate": 3.998445144003874e-06, - "loss": 1.0324, - "step": 562 - }, - { - "epoch": 0.04231173906508342, - "grad_norm": 1.534398481573919, - "learning_rate": 3.9984258915308674e-06, - "loss": 1.1015, - "step": 563 - }, - { - "epoch": 0.04238689313091838, - "grad_norm": 0.7129678344720685, - "learning_rate": 3.998406520643475e-06, - "loss": 0.798, - "step": 564 - }, - { - "epoch": 0.042462047196753346, - "grad_norm": 2.3103593764728054, - "learning_rate": 3.998387031342843e-06, - "loss": 1.0358, - "step": 565 - }, - { - "epoch": 0.04253720126258831, - "grad_norm": 1.707489999543558, - "learning_rate": 3.998367423630127e-06, - "loss": 1.049, - "step": 566 - }, - { - "epoch": 0.042612355328423265, - "grad_norm": 1.8548356753052961, - "learning_rate": 3.9983476975064885e-06, - "loss": 1.0801, - "step": 567 - }, - { - "epoch": 0.04268750939425823, - "grad_norm": 1.8031864281410168, - "learning_rate": 3.998327852973098e-06, - "loss": 1.0362, - "step": 568 - }, - { - "epoch": 0.04276266346009319, - "grad_norm": 1.6041268265305872, - "learning_rate": 3.998307890031129e-06, - "loss": 1.0706, - "step": 569 - }, - { - "epoch": 0.042837817525928155, - "grad_norm": 1.774745039023067, - "learning_rate": 3.998287808681766e-06, - "loss": 1.0784, - "step": 570 - }, - { - "epoch": 0.04291297159176311, - "grad_norm": 0.7832670875363225, - "learning_rate": 3.998267608926198e-06, - "loss": 0.9036, - "step": 571 - }, - { - "epoch": 0.042988125657598075, - "grad_norm": 1.8441855999020236, - "learning_rate": 3.998247290765623e-06, - "loss": 1.1272, - "step": 572 - }, - { - "epoch": 0.04306327972343304, - "grad_norm": 2.034602875259904, - "learning_rate": 3.9982268542012435e-06, - "loss": 1.0015, - "step": 573 - }, - { - "epoch": 0.043138433789268, - "grad_norm": 2.0601635038431145, - "learning_rate": 3.998206299234272e-06, - "loss": 0.9341, - "step": 574 - }, - { - "epoch": 0.04321358785510296, - "grad_norm": 1.8530707674630438, - "learning_rate": 3.998185625865924e-06, - "loss": 1.0967, - "step": 575 - }, - { - "epoch": 0.04328874192093792, - "grad_norm": 2.2200950521448086, - "learning_rate": 3.998164834097428e-06, - "loss": 1.0235, - "step": 576 - }, - { - "epoch": 0.043363895986772885, - "grad_norm": 0.794170915332756, - "learning_rate": 3.998143923930013e-06, - "loss": 0.8758, - "step": 577 - }, - { - "epoch": 0.04343905005260785, - "grad_norm": 1.574736025048003, - "learning_rate": 3.998122895364919e-06, - "loss": 1.082, - "step": 578 - }, - { - "epoch": 0.043514204118442805, - "grad_norm": 2.791090369641866, - "learning_rate": 3.998101748403393e-06, - "loss": 1.037, - "step": 579 - }, - { - "epoch": 0.04358935818427777, - "grad_norm": 1.6992718567715828, - "learning_rate": 3.998080483046687e-06, - "loss": 0.9981, - "step": 580 - }, - { - "epoch": 0.04366451225011273, - "grad_norm": 2.4536920365936545, - "learning_rate": 3.998059099296061e-06, - "loss": 1.0596, - "step": 581 - }, - { - "epoch": 0.043739666315947695, - "grad_norm": 1.8401832348352039, - "learning_rate": 3.9980375971527814e-06, - "loss": 1.1084, - "step": 582 - }, - { - "epoch": 0.04381482038178265, - "grad_norm": 1.7735744877141357, - "learning_rate": 3.998015976618124e-06, - "loss": 1.0867, - "step": 583 - }, - { - "epoch": 0.043889974447617615, - "grad_norm": 1.5481999435426765, - "learning_rate": 3.997994237693369e-06, - "loss": 1.1147, - "step": 584 - }, - { - "epoch": 0.04396512851345258, - "grad_norm": 0.6692551834908902, - "learning_rate": 3.997972380379804e-06, - "loss": 0.8446, - "step": 585 - }, - { - "epoch": 0.04404028257928754, - "grad_norm": 1.646750793985002, - "learning_rate": 3.997950404678726e-06, - "loss": 1.1159, - "step": 586 - }, - { - "epoch": 0.0441154366451225, - "grad_norm": 1.1492779828644473, - "learning_rate": 3.997928310591435e-06, - "loss": 0.9804, - "step": 587 - }, - { - "epoch": 0.04419059071095746, - "grad_norm": 3.1480220397507304, - "learning_rate": 3.997906098119241e-06, - "loss": 1.0552, - "step": 588 - }, - { - "epoch": 0.044265744776792425, - "grad_norm": 1.5844843930131562, - "learning_rate": 3.997883767263461e-06, - "loss": 1.0435, - "step": 589 - }, - { - "epoch": 0.04434089884262739, - "grad_norm": 2.4823645368267466, - "learning_rate": 3.997861318025417e-06, - "loss": 0.9818, - "step": 590 - }, - { - "epoch": 0.044416052908462345, - "grad_norm": 2.055819819477736, - "learning_rate": 3.997838750406439e-06, - "loss": 0.9434, - "step": 591 - }, - { - "epoch": 0.04449120697429731, - "grad_norm": 1.8719195811164309, - "learning_rate": 3.997816064407865e-06, - "loss": 1.05, - "step": 592 - }, - { - "epoch": 0.04456636104013227, - "grad_norm": 1.865204847811118, - "learning_rate": 3.997793260031039e-06, - "loss": 1.0406, - "step": 593 - }, - { - "epoch": 0.044641515105967235, - "grad_norm": 1.8101135854751704, - "learning_rate": 3.997770337277313e-06, - "loss": 1.0658, - "step": 594 - }, - { - "epoch": 0.04471666917180219, - "grad_norm": 0.7876794988935172, - "learning_rate": 3.997747296148044e-06, - "loss": 0.8982, - "step": 595 - }, - { - "epoch": 0.044791823237637154, - "grad_norm": 1.8183911706949236, - "learning_rate": 3.997724136644597e-06, - "loss": 1.0622, - "step": 596 - }, - { - "epoch": 0.04486697730347212, - "grad_norm": 2.160920544252328, - "learning_rate": 3.997700858768346e-06, - "loss": 1.0226, - "step": 597 - }, - { - "epoch": 0.04494213136930708, - "grad_norm": 2.8498898133510684, - "learning_rate": 3.99767746252067e-06, - "loss": 1.0359, - "step": 598 - }, - { - "epoch": 0.045017285435142045, - "grad_norm": 1.7835835346692996, - "learning_rate": 3.997653947902954e-06, - "loss": 1.0765, - "step": 599 - }, - { - "epoch": 0.045092439500977, - "grad_norm": 1.5134502420907083, - "learning_rate": 3.997630314916592e-06, - "loss": 1.1247, - "step": 600 - }, - { - "epoch": 0.045167593566811964, - "grad_norm": 1.6461950299029124, - "learning_rate": 3.9976065635629845e-06, - "loss": 1.0185, - "step": 601 - }, - { - "epoch": 0.04524274763264693, - "grad_norm": 0.739691865148263, - "learning_rate": 3.997582693843539e-06, - "loss": 0.8357, - "step": 602 - }, - { - "epoch": 0.04531790169848189, - "grad_norm": 1.4374198061923347, - "learning_rate": 3.997558705759669e-06, - "loss": 1.0665, - "step": 603 - }, - { - "epoch": 0.04539305576431685, - "grad_norm": 1.4920355434837331, - "learning_rate": 3.9975345993127975e-06, - "loss": 1.0632, - "step": 604 - }, - { - "epoch": 0.04546820983015181, - "grad_norm": 2.1928302673602933, - "learning_rate": 3.997510374504351e-06, - "loss": 1.0824, - "step": 605 - }, - { - "epoch": 0.045543363895986774, - "grad_norm": 2.476035558435986, - "learning_rate": 3.9974860313357665e-06, - "loss": 1.0874, - "step": 606 - }, - { - "epoch": 0.04561851796182174, - "grad_norm": 1.673809394152964, - "learning_rate": 3.997461569808485e-06, - "loss": 0.9814, - "step": 607 - }, - { - "epoch": 0.045693672027656694, - "grad_norm": 0.7403896843225087, - "learning_rate": 3.997436989923957e-06, - "loss": 0.842, - "step": 608 - }, - { - "epoch": 0.04576882609349166, - "grad_norm": 1.657022260167593, - "learning_rate": 3.997412291683639e-06, - "loss": 1.0363, - "step": 609 - }, - { - "epoch": 0.04584398015932662, - "grad_norm": 1.5309912015237275, - "learning_rate": 3.997387475088994e-06, - "loss": 1.0507, - "step": 610 - }, - { - "epoch": 0.045919134225161584, - "grad_norm": 3.256051811293418, - "learning_rate": 3.997362540141493e-06, - "loss": 1.0375, - "step": 611 - }, - { - "epoch": 0.04599428829099654, - "grad_norm": 1.549138734321839, - "learning_rate": 3.997337486842612e-06, - "loss": 0.998, - "step": 612 - }, - { - "epoch": 0.046069442356831504, - "grad_norm": 1.8238392992753771, - "learning_rate": 3.997312315193837e-06, - "loss": 0.9444, - "step": 613 - }, - { - "epoch": 0.04614459642266647, - "grad_norm": 1.9604765817673215, - "learning_rate": 3.9972870251966595e-06, - "loss": 1.0052, - "step": 614 - }, - { - "epoch": 0.04621975048850143, - "grad_norm": 1.6678609701528715, - "learning_rate": 3.997261616852578e-06, - "loss": 0.9724, - "step": 615 - }, - { - "epoch": 0.04629490455433639, - "grad_norm": 2.6225176381698954, - "learning_rate": 3.997236090163097e-06, - "loss": 1.0277, - "step": 616 - }, - { - "epoch": 0.04637005862017135, - "grad_norm": 0.6890371002221573, - "learning_rate": 3.997210445129729e-06, - "loss": 0.8828, - "step": 617 - }, - { - "epoch": 0.046445212686006314, - "grad_norm": 1.303216527564973, - "learning_rate": 3.997184681753996e-06, - "loss": 1.0243, - "step": 618 - }, - { - "epoch": 0.04652036675184128, - "grad_norm": 5.270394448967276, - "learning_rate": 3.997158800037422e-06, - "loss": 1.0271, - "step": 619 - }, - { - "epoch": 0.046595520817676234, - "grad_norm": 1.9768891196601452, - "learning_rate": 3.997132799981541e-06, - "loss": 0.9923, - "step": 620 - }, - { - "epoch": 0.0466706748835112, - "grad_norm": 1.4124315667243215, - "learning_rate": 3.997106681587895e-06, - "loss": 0.8973, - "step": 621 - }, - { - "epoch": 0.04674582894934616, - "grad_norm": 1.618528758951053, - "learning_rate": 3.99708044485803e-06, - "loss": 1.0444, - "step": 622 - }, - { - "epoch": 0.046820983015181124, - "grad_norm": 3.34338548045948, - "learning_rate": 3.997054089793501e-06, - "loss": 0.9621, - "step": 623 - }, - { - "epoch": 0.04689613708101608, - "grad_norm": 2.2942661338755137, - "learning_rate": 3.997027616395871e-06, - "loss": 1.0779, - "step": 624 - }, - { - "epoch": 0.046971291146851044, - "grad_norm": 1.7594407064252746, - "learning_rate": 3.997001024666707e-06, - "loss": 0.9767, - "step": 625 - }, - { - "epoch": 0.04704644521268601, - "grad_norm": 33.42609691660282, - "learning_rate": 3.996974314607585e-06, - "loss": 1.0687, - "step": 626 - }, - { - "epoch": 0.04712159927852097, - "grad_norm": 1.649799814507202, - "learning_rate": 3.996947486220088e-06, - "loss": 1.0364, - "step": 627 - }, - { - "epoch": 0.04719675334435593, - "grad_norm": 0.9220418617401981, - "learning_rate": 3.9969205395058064e-06, - "loss": 0.905, - "step": 628 - }, - { - "epoch": 0.04727190741019089, - "grad_norm": 1.9630251406606083, - "learning_rate": 3.996893474466336e-06, - "loss": 1.027, - "step": 629 - }, - { - "epoch": 0.04734706147602585, - "grad_norm": 2.172890953821311, - "learning_rate": 3.99686629110328e-06, - "loss": 1.06, - "step": 630 - }, - { - "epoch": 0.04742221554186082, - "grad_norm": 1.7501412743356053, - "learning_rate": 3.99683898941825e-06, - "loss": 1.0733, - "step": 631 - }, - { - "epoch": 0.04749736960769577, - "grad_norm": 2.4793096762904114, - "learning_rate": 3.996811569412864e-06, - "loss": 1.0377, - "step": 632 - }, - { - "epoch": 0.04757252367353074, - "grad_norm": 1.697356508058339, - "learning_rate": 3.996784031088745e-06, - "loss": 1.0798, - "step": 633 - }, - { - "epoch": 0.0476476777393657, - "grad_norm": 1.5822904398019575, - "learning_rate": 3.996756374447526e-06, - "loss": 1.0607, - "step": 634 - }, - { - "epoch": 0.04772283180520066, - "grad_norm": 1.5703160679714985, - "learning_rate": 3.996728599490847e-06, - "loss": 1.0714, - "step": 635 - }, - { - "epoch": 0.04779798587103562, - "grad_norm": 2.1737817312055725, - "learning_rate": 3.996700706220352e-06, - "loss": 1.0478, - "step": 636 - }, - { - "epoch": 0.04787313993687058, - "grad_norm": 3.604534916816972, - "learning_rate": 3.996672694637694e-06, - "loss": 1.0225, - "step": 637 - }, - { - "epoch": 0.04794829400270555, - "grad_norm": 1.858179878376537, - "learning_rate": 3.996644564744534e-06, - "loss": 1.0485, - "step": 638 - }, - { - "epoch": 0.04802344806854051, - "grad_norm": 1.7761497427385933, - "learning_rate": 3.996616316542537e-06, - "loss": 1.0137, - "step": 639 - }, - { - "epoch": 0.048098602134375466, - "grad_norm": 2.571235004208852, - "learning_rate": 3.996587950033377e-06, - "loss": 1.045, - "step": 640 - }, - { - "epoch": 0.04817375620021043, - "grad_norm": 1.8864342137323178, - "learning_rate": 3.996559465218736e-06, - "loss": 0.9832, - "step": 641 - }, - { - "epoch": 0.04824891026604539, - "grad_norm": 1.976369976573657, - "learning_rate": 3.996530862100302e-06, - "loss": 1.1212, - "step": 642 - }, - { - "epoch": 0.048324064331880356, - "grad_norm": 1.7728174949130482, - "learning_rate": 3.996502140679769e-06, - "loss": 0.9638, - "step": 643 - }, - { - "epoch": 0.04839921839771532, - "grad_norm": 1.6121964398095747, - "learning_rate": 3.996473300958839e-06, - "loss": 1.1478, - "step": 644 - }, - { - "epoch": 0.048474372463550276, - "grad_norm": 1.79291609144486, - "learning_rate": 3.99644434293922e-06, - "loss": 1.0735, - "step": 645 - }, - { - "epoch": 0.04854952652938524, - "grad_norm": 1.6238278783339661, - "learning_rate": 3.99641526662263e-06, - "loss": 1.1133, - "step": 646 - }, - { - "epoch": 0.0486246805952202, - "grad_norm": 1.5815399537347767, - "learning_rate": 3.99638607201079e-06, - "loss": 1.0232, - "step": 647 - }, - { - "epoch": 0.048699834661055166, - "grad_norm": 1.6543672044524966, - "learning_rate": 3.996356759105431e-06, - "loss": 1.0023, - "step": 648 - }, - { - "epoch": 0.04877498872689012, - "grad_norm": 1.8340777128638062, - "learning_rate": 3.996327327908289e-06, - "loss": 0.9552, - "step": 649 - }, - { - "epoch": 0.048850142792725086, - "grad_norm": 1.9565723360816973, - "learning_rate": 3.996297778421109e-06, - "loss": 0.9786, - "step": 650 - }, - { - "epoch": 0.04892529685856005, - "grad_norm": 1.790772066850834, - "learning_rate": 3.996268110645641e-06, - "loss": 0.9707, - "step": 651 - }, - { - "epoch": 0.04900045092439501, - "grad_norm": 1.6496662784981144, - "learning_rate": 3.996238324583643e-06, - "loss": 0.9852, - "step": 652 - }, - { - "epoch": 0.04907560499022997, - "grad_norm": 1.9798844959785866, - "learning_rate": 3.99620842023688e-06, - "loss": 1.0748, - "step": 653 - }, - { - "epoch": 0.04915075905606493, - "grad_norm": 0.7292966772159907, - "learning_rate": 3.996178397607125e-06, - "loss": 0.8444, - "step": 654 - }, - { - "epoch": 0.049225913121899896, - "grad_norm": 1.4860454596321278, - "learning_rate": 3.996148256696155e-06, - "loss": 1.1137, - "step": 655 - }, - { - "epoch": 0.04930106718773486, - "grad_norm": 1.8193977906487837, - "learning_rate": 3.996117997505758e-06, - "loss": 1.1378, - "step": 656 - }, - { - "epoch": 0.049376221253569816, - "grad_norm": 1.8032931270923838, - "learning_rate": 3.996087620037725e-06, - "loss": 1.0236, - "step": 657 - }, - { - "epoch": 0.04945137531940478, - "grad_norm": 0.7804615619927928, - "learning_rate": 3.996057124293857e-06, - "loss": 0.8072, - "step": 658 - }, - { - "epoch": 0.04952652938523974, - "grad_norm": 1.621391250704193, - "learning_rate": 3.996026510275962e-06, - "loss": 1.0427, - "step": 659 - }, - { - "epoch": 0.049601683451074706, - "grad_norm": 1.807486868951779, - "learning_rate": 3.995995777985852e-06, - "loss": 0.9802, - "step": 660 - }, - { - "epoch": 0.04967683751690966, - "grad_norm": 1.5352283878218407, - "learning_rate": 3.995964927425349e-06, - "loss": 1.0216, - "step": 661 - }, - { - "epoch": 0.049751991582744626, - "grad_norm": 2.6561816038413957, - "learning_rate": 3.995933958596282e-06, - "loss": 0.9578, - "step": 662 - }, - { - "epoch": 0.04982714564857959, - "grad_norm": 3.134876665186785, - "learning_rate": 3.995902871500485e-06, - "loss": 1.1779, - "step": 663 - }, - { - "epoch": 0.04990229971441455, - "grad_norm": 1.8852736346821284, - "learning_rate": 3.995871666139799e-06, - "loss": 1.0751, - "step": 664 - }, - { - "epoch": 0.04997745378024951, - "grad_norm": 1.57776418659165, - "learning_rate": 3.995840342516074e-06, - "loss": 1.0926, - "step": 665 - }, - { - "epoch": 0.05005260784608447, - "grad_norm": 1.607742666909636, - "learning_rate": 3.995808900631167e-06, - "loss": 1.0659, - "step": 666 - }, - { - "epoch": 0.050127761911919436, - "grad_norm": 2.2431961608636195, - "learning_rate": 3.99577734048694e-06, - "loss": 1.05, - "step": 667 - }, - { - "epoch": 0.0502029159777544, - "grad_norm": 2.1815566932153954, - "learning_rate": 3.9957456620852636e-06, - "loss": 0.9536, - "step": 668 - }, - { - "epoch": 0.050278070043589355, - "grad_norm": 1.7251363534525437, - "learning_rate": 3.995713865428014e-06, - "loss": 1.0122, - "step": 669 - }, - { - "epoch": 0.05035322410942432, - "grad_norm": 1.5565768316160944, - "learning_rate": 3.995681950517075e-06, - "loss": 1.0397, - "step": 670 - }, - { - "epoch": 0.05042837817525928, - "grad_norm": 1.7518231568029867, - "learning_rate": 3.995649917354339e-06, - "loss": 1.0442, - "step": 671 - }, - { - "epoch": 0.050503532241094246, - "grad_norm": 1.7284335531804542, - "learning_rate": 3.9956177659417036e-06, - "loss": 1.0133, - "step": 672 - }, - { - "epoch": 0.0505786863069292, - "grad_norm": 1.7141275843849304, - "learning_rate": 3.995585496281074e-06, - "loss": 1.0915, - "step": 673 - }, - { - "epoch": 0.050653840372764165, - "grad_norm": 2.4306878977274, - "learning_rate": 3.995553108374362e-06, - "loss": 1.0845, - "step": 674 - }, - { - "epoch": 0.05072899443859913, - "grad_norm": 1.7100273742492436, - "learning_rate": 3.995520602223487e-06, - "loss": 0.9152, - "step": 675 - }, - { - "epoch": 0.05080414850443409, - "grad_norm": 1.5990269384256135, - "learning_rate": 3.995487977830375e-06, - "loss": 1.1148, - "step": 676 - }, - { - "epoch": 0.05087930257026905, - "grad_norm": 1.6110744207231014, - "learning_rate": 3.995455235196959e-06, - "loss": 1.0063, - "step": 677 - }, - { - "epoch": 0.05095445663610401, - "grad_norm": 1.6153590236110804, - "learning_rate": 3.995422374325179e-06, - "loss": 1.0368, - "step": 678 - }, - { - "epoch": 0.051029610701938975, - "grad_norm": 1.825497948634161, - "learning_rate": 3.995389395216983e-06, - "loss": 1.0304, - "step": 679 - }, - { - "epoch": 0.05110476476777394, - "grad_norm": 1.5566926227150948, - "learning_rate": 3.9953562978743244e-06, - "loss": 0.9698, - "step": 680 - }, - { - "epoch": 0.051179918833608895, - "grad_norm": 1.6706197429307397, - "learning_rate": 3.995323082299164e-06, - "loss": 1.0481, - "step": 681 - }, - { - "epoch": 0.05125507289944386, - "grad_norm": 1.528449163398238, - "learning_rate": 3.9952897484934706e-06, - "loss": 0.9705, - "step": 682 - }, - { - "epoch": 0.05133022696527882, - "grad_norm": 1.8357152926117286, - "learning_rate": 3.9952562964592184e-06, - "loss": 1.0556, - "step": 683 - }, - { - "epoch": 0.051405381031113785, - "grad_norm": 1.6830296821674058, - "learning_rate": 3.995222726198391e-06, - "loss": 1.0953, - "step": 684 - }, - { - "epoch": 0.05148053509694874, - "grad_norm": 1.6460911834508498, - "learning_rate": 3.995189037712977e-06, - "loss": 1.0148, - "step": 685 - }, - { - "epoch": 0.051555689162783705, - "grad_norm": 1.685351947285273, - "learning_rate": 3.9951552310049715e-06, - "loss": 1.0712, - "step": 686 - }, - { - "epoch": 0.05163084322861867, - "grad_norm": 1.6139112078505315, - "learning_rate": 3.99512130607638e-06, - "loss": 1.0176, - "step": 687 - }, - { - "epoch": 0.05170599729445363, - "grad_norm": 2.0763608859019227, - "learning_rate": 3.995087262929209e-06, - "loss": 0.9557, - "step": 688 - }, - { - "epoch": 0.051781151360288595, - "grad_norm": 13.831072580446751, - "learning_rate": 3.99505310156548e-06, - "loss": 1.0436, - "step": 689 - }, - { - "epoch": 0.05185630542612355, - "grad_norm": 1.7348555165575255, - "learning_rate": 3.995018821987215e-06, - "loss": 1.0012, - "step": 690 - }, - { - "epoch": 0.051931459491958515, - "grad_norm": 1.968160147330382, - "learning_rate": 3.994984424196445e-06, - "loss": 1.0937, - "step": 691 - }, - { - "epoch": 0.05200661355779348, - "grad_norm": 1.617407668601836, - "learning_rate": 3.994949908195208e-06, - "loss": 1.0259, - "step": 692 - }, - { - "epoch": 0.05208176762362844, - "grad_norm": 0.7159151896510094, - "learning_rate": 3.994915273985551e-06, - "loss": 0.8302, - "step": 693 - }, - { - "epoch": 0.0521569216894634, - "grad_norm": 1.5606564899853126, - "learning_rate": 3.994880521569524e-06, - "loss": 0.9716, - "step": 694 - }, - { - "epoch": 0.05223207575529836, - "grad_norm": 1.6101022947286165, - "learning_rate": 3.994845650949187e-06, - "loss": 1.0116, - "step": 695 - }, - { - "epoch": 0.052307229821133325, - "grad_norm": 0.884282062081904, - "learning_rate": 3.994810662126607e-06, - "loss": 0.9341, - "step": 696 - }, - { - "epoch": 0.05238238388696829, - "grad_norm": 1.7162650437886935, - "learning_rate": 3.994775555103857e-06, - "loss": 1.0294, - "step": 697 - }, - { - "epoch": 0.052457537952803245, - "grad_norm": 1.6065069772311777, - "learning_rate": 3.994740329883016e-06, - "loss": 1.014, - "step": 698 - }, - { - "epoch": 0.05253269201863821, - "grad_norm": 1.771569768325826, - "learning_rate": 3.994704986466172e-06, - "loss": 0.9792, - "step": 699 - }, - { - "epoch": 0.05260784608447317, - "grad_norm": 4.028603564852126, - "learning_rate": 3.99466952485542e-06, - "loss": 1.0327, - "step": 700 - }, - { - "epoch": 0.052683000150308135, - "grad_norm": 1.9427277578519693, - "learning_rate": 3.994633945052861e-06, - "loss": 0.9432, - "step": 701 - }, - { - "epoch": 0.05275815421614309, - "grad_norm": 4.010136159359376, - "learning_rate": 3.994598247060602e-06, - "loss": 1.0356, - "step": 702 - }, - { - "epoch": 0.052833308281978054, - "grad_norm": 2.2158451675762776, - "learning_rate": 3.9945624308807585e-06, - "loss": 1.0839, - "step": 703 - }, - { - "epoch": 0.05290846234781302, - "grad_norm": 3.609515298866182, - "learning_rate": 3.994526496515454e-06, - "loss": 1.0751, - "step": 704 - }, - { - "epoch": 0.05298361641364798, - "grad_norm": 2.631265247871636, - "learning_rate": 3.994490443966818e-06, - "loss": 1.051, - "step": 705 - }, - { - "epoch": 0.05305877047948294, - "grad_norm": 0.7902456798489685, - "learning_rate": 3.994454273236984e-06, - "loss": 0.8146, - "step": 706 - }, - { - "epoch": 0.0531339245453179, - "grad_norm": 2.334990344466641, - "learning_rate": 3.994417984328098e-06, - "loss": 1.0737, - "step": 707 - }, - { - "epoch": 0.053209078611152864, - "grad_norm": 4.814861164250311, - "learning_rate": 3.994381577242309e-06, - "loss": 1.1017, - "step": 708 - }, - { - "epoch": 0.05328423267698783, - "grad_norm": 1.5787479191009024, - "learning_rate": 3.994345051981774e-06, - "loss": 1.0991, - "step": 709 - }, - { - "epoch": 0.053359386742822784, - "grad_norm": 2.5293944351056648, - "learning_rate": 3.994308408548659e-06, - "loss": 1.1393, - "step": 710 - }, - { - "epoch": 0.05343454080865775, - "grad_norm": 1.7826121292050852, - "learning_rate": 3.994271646945133e-06, - "loss": 1.1634, - "step": 711 - }, - { - "epoch": 0.05350969487449271, - "grad_norm": 2.827071441497942, - "learning_rate": 3.994234767173376e-06, - "loss": 0.9906, - "step": 712 - }, - { - "epoch": 0.053584848940327674, - "grad_norm": 1.8144822954968138, - "learning_rate": 3.994197769235572e-06, - "loss": 1.0715, - "step": 713 - }, - { - "epoch": 0.05366000300616263, - "grad_norm": 1.4838177889962678, - "learning_rate": 3.994160653133915e-06, - "loss": 1.0274, - "step": 714 - }, - { - "epoch": 0.053735157071997594, - "grad_norm": 1.316394432309135, - "learning_rate": 3.994123418870603e-06, - "loss": 1.0914, - "step": 715 - }, - { - "epoch": 0.05381031113783256, - "grad_norm": 1.5992689131345468, - "learning_rate": 3.994086066447841e-06, - "loss": 1.0123, - "step": 716 - }, - { - "epoch": 0.05388546520366752, - "grad_norm": 1.0269345240832788, - "learning_rate": 3.994048595867845e-06, - "loss": 0.8607, - "step": 717 - }, - { - "epoch": 0.05396061926950248, - "grad_norm": 1.793446861464666, - "learning_rate": 3.994011007132833e-06, - "loss": 1.0527, - "step": 718 - }, - { - "epoch": 0.05403577333533744, - "grad_norm": 0.697112550147894, - "learning_rate": 3.993973300245034e-06, - "loss": 0.8237, - "step": 719 - }, - { - "epoch": 0.054110927401172404, - "grad_norm": 2.2172724142340607, - "learning_rate": 3.993935475206682e-06, - "loss": 1.0212, - "step": 720 - }, - { - "epoch": 0.05418608146700737, - "grad_norm": 1.8149967754745213, - "learning_rate": 3.993897532020017e-06, - "loss": 0.9841, - "step": 721 - }, - { - "epoch": 0.054261235532842324, - "grad_norm": 2.4226967678722895, - "learning_rate": 3.993859470687288e-06, - "loss": 1.0427, - "step": 722 - }, - { - "epoch": 0.05433638959867729, - "grad_norm": 1.787943594249794, - "learning_rate": 3.993821291210751e-06, - "loss": 1.0483, - "step": 723 - }, - { - "epoch": 0.05441154366451225, - "grad_norm": 1.4393285648278147, - "learning_rate": 3.993782993592667e-06, - "loss": 0.9811, - "step": 724 - }, - { - "epoch": 0.054486697730347214, - "grad_norm": 3.8818551001885506, - "learning_rate": 3.993744577835306e-06, - "loss": 0.9897, - "step": 725 - }, - { - "epoch": 0.05456185179618217, - "grad_norm": 1.584598520978348, - "learning_rate": 3.993706043940945e-06, - "loss": 1.1294, - "step": 726 - }, - { - "epoch": 0.054637005862017134, - "grad_norm": 2.440420601817571, - "learning_rate": 3.993667391911866e-06, - "loss": 1.0389, - "step": 727 - }, - { - "epoch": 0.0547121599278521, - "grad_norm": 2.031227933323648, - "learning_rate": 3.993628621750359e-06, - "loss": 1.034, - "step": 728 - }, - { - "epoch": 0.05478731399368706, - "grad_norm": 1.8615033446514238, - "learning_rate": 3.993589733458723e-06, - "loss": 0.9922, - "step": 729 - }, - { - "epoch": 0.05486246805952202, - "grad_norm": 3.2909995421064737, - "learning_rate": 3.993550727039261e-06, - "loss": 0.9552, - "step": 730 - }, - { - "epoch": 0.05493762212535698, - "grad_norm": 1.8157510211431718, - "learning_rate": 3.993511602494285e-06, - "loss": 0.958, - "step": 731 - }, - { - "epoch": 0.055012776191191944, - "grad_norm": 1.8118921115536992, - "learning_rate": 3.993472359826112e-06, - "loss": 1.0096, - "step": 732 - }, - { - "epoch": 0.05508793025702691, - "grad_norm": 2.2857791410339043, - "learning_rate": 3.993432999037068e-06, - "loss": 1.0021, - "step": 733 - }, - { - "epoch": 0.05516308432286186, - "grad_norm": 1.66981443539154, - "learning_rate": 3.993393520129487e-06, - "loss": 1.0623, - "step": 734 - }, - { - "epoch": 0.05523823838869683, - "grad_norm": 2.2658612358317898, - "learning_rate": 3.993353923105705e-06, - "loss": 1.0556, - "step": 735 - }, - { - "epoch": 0.05531339245453179, - "grad_norm": 0.8172037345484406, - "learning_rate": 3.993314207968071e-06, - "loss": 0.9285, - "step": 736 - }, - { - "epoch": 0.05538854652036675, - "grad_norm": 0.7047317584025806, - "learning_rate": 3.993274374718938e-06, - "loss": 0.801, - "step": 737 - }, - { - "epoch": 0.05546370058620172, - "grad_norm": 1.7940093700287567, - "learning_rate": 3.9932344233606634e-06, - "loss": 1.0538, - "step": 738 - }, - { - "epoch": 0.05553885465203667, - "grad_norm": 1.907236625617493, - "learning_rate": 3.993194353895618e-06, - "loss": 1.0374, - "step": 739 - }, - { - "epoch": 0.05561400871787164, - "grad_norm": 2.280779228233073, - "learning_rate": 3.9931541663261756e-06, - "loss": 1.0456, - "step": 740 - }, - { - "epoch": 0.0556891627837066, - "grad_norm": 1.7798181099058283, - "learning_rate": 3.993113860654715e-06, - "loss": 1.0255, - "step": 741 - }, - { - "epoch": 0.05576431684954156, - "grad_norm": 1.937521657515044, - "learning_rate": 3.993073436883627e-06, - "loss": 1.0125, - "step": 742 - }, - { - "epoch": 0.05583947091537652, - "grad_norm": 1.6273453733057943, - "learning_rate": 3.993032895015304e-06, - "loss": 1.0181, - "step": 743 - }, - { - "epoch": 0.05591462498121148, - "grad_norm": 1.9309718996568552, - "learning_rate": 3.992992235052152e-06, - "loss": 1.0849, - "step": 744 - }, - { - "epoch": 0.05598977904704645, - "grad_norm": 1.5087030972678377, - "learning_rate": 3.992951456996578e-06, - "loss": 1.1061, - "step": 745 - }, - { - "epoch": 0.05606493311288141, - "grad_norm": 1.417425382557547, - "learning_rate": 3.9929105608509984e-06, - "loss": 1.0949, - "step": 746 - }, - { - "epoch": 0.056140087178716366, - "grad_norm": 1.5947366540125818, - "learning_rate": 3.9928695466178375e-06, - "loss": 0.9512, - "step": 747 - }, - { - "epoch": 0.05621524124455133, - "grad_norm": 1.5960296334166288, - "learning_rate": 3.992828414299524e-06, - "loss": 0.9732, - "step": 748 - }, - { - "epoch": 0.05629039531038629, - "grad_norm": 1.73882609197444, - "learning_rate": 3.9927871638984955e-06, - "loss": 1.0708, - "step": 749 - }, - { - "epoch": 0.056365549376221256, - "grad_norm": 2.2377176956744718, - "learning_rate": 3.992745795417198e-06, - "loss": 1.0495, - "step": 750 - }, - { - "epoch": 0.05644070344205621, - "grad_norm": 3.2532052593609415, - "learning_rate": 3.99270430885808e-06, - "loss": 1.0651, - "step": 751 - }, - { - "epoch": 0.056515857507891176, - "grad_norm": 1.6454927438108264, - "learning_rate": 3.992662704223602e-06, - "loss": 1.051, - "step": 752 - }, - { - "epoch": 0.05659101157372614, - "grad_norm": 1.769959118042283, - "learning_rate": 3.992620981516228e-06, - "loss": 1.0471, - "step": 753 - }, - { - "epoch": 0.0566661656395611, - "grad_norm": 1.4238562561521417, - "learning_rate": 3.9925791407384304e-06, - "loss": 1.0921, - "step": 754 - }, - { - "epoch": 0.05674131970539606, - "grad_norm": 1.4821500466151032, - "learning_rate": 3.9925371818926884e-06, - "loss": 1.0799, - "step": 755 - }, - { - "epoch": 0.05681647377123102, - "grad_norm": 0.7255871286292587, - "learning_rate": 3.992495104981489e-06, - "loss": 0.8795, - "step": 756 - }, - { - "epoch": 0.056891627837065986, - "grad_norm": 2.010857555160787, - "learning_rate": 3.992452910007325e-06, - "loss": 0.9975, - "step": 757 - }, - { - "epoch": 0.05696678190290095, - "grad_norm": 2.3282968342495476, - "learning_rate": 3.992410596972696e-06, - "loss": 1.1599, - "step": 758 - }, - { - "epoch": 0.057041935968735906, - "grad_norm": 1.4826923671411454, - "learning_rate": 3.99236816588011e-06, - "loss": 1.0747, - "step": 759 - }, - { - "epoch": 0.05711709003457087, - "grad_norm": 1.4320325952368544, - "learning_rate": 3.992325616732081e-06, - "loss": 0.9893, - "step": 760 - }, - { - "epoch": 0.05719224410040583, - "grad_norm": 1.56122834910643, - "learning_rate": 3.992282949531129e-06, - "loss": 1.0189, - "step": 761 - }, - { - "epoch": 0.057267398166240796, - "grad_norm": 1.778778168059171, - "learning_rate": 3.992240164279785e-06, - "loss": 1.1249, - "step": 762 - }, - { - "epoch": 0.05734255223207575, - "grad_norm": 2.0290900851086744, - "learning_rate": 3.9921972609805815e-06, - "loss": 1.0836, - "step": 763 - }, - { - "epoch": 0.057417706297910716, - "grad_norm": 1.6040333366306896, - "learning_rate": 3.992154239636062e-06, - "loss": 1.0904, - "step": 764 - }, - { - "epoch": 0.05749286036374568, - "grad_norm": 0.7691642544489183, - "learning_rate": 3.992111100248775e-06, - "loss": 0.8586, - "step": 765 - }, - { - "epoch": 0.05756801442958064, - "grad_norm": 1.8080974252116524, - "learning_rate": 3.992067842821277e-06, - "loss": 1.0507, - "step": 766 - }, - { - "epoch": 0.0576431684954156, - "grad_norm": 1.6544697388607716, - "learning_rate": 3.992024467356132e-06, - "loss": 0.9736, - "step": 767 - }, - { - "epoch": 0.05771832256125056, - "grad_norm": 1.7411586991121097, - "learning_rate": 3.991980973855908e-06, - "loss": 1.0943, - "step": 768 - }, - { - "epoch": 0.057793476627085526, - "grad_norm": 2.272688909553649, - "learning_rate": 3.991937362323183e-06, - "loss": 1.1295, - "step": 769 - }, - { - "epoch": 0.05786863069292049, - "grad_norm": 2.2212199043463383, - "learning_rate": 3.991893632760544e-06, - "loss": 1.0401, - "step": 770 - }, - { - "epoch": 0.057943784758755446, - "grad_norm": 0.7825176635552324, - "learning_rate": 3.991849785170578e-06, - "loss": 0.8489, - "step": 771 - }, - { - "epoch": 0.05801893882459041, - "grad_norm": 1.8858452901603036, - "learning_rate": 3.991805819555885e-06, - "loss": 0.9718, - "step": 772 - }, - { - "epoch": 0.05809409289042537, - "grad_norm": 1.76189162934931, - "learning_rate": 3.991761735919071e-06, - "loss": 0.9857, - "step": 773 - }, - { - "epoch": 0.058169246956260336, - "grad_norm": 1.955346941688041, - "learning_rate": 3.991717534262747e-06, - "loss": 1.094, - "step": 774 - }, - { - "epoch": 0.05824440102209529, - "grad_norm": 1.4119689787193785, - "learning_rate": 3.991673214589532e-06, - "loss": 1.1182, - "step": 775 - }, - { - "epoch": 0.058319555087930255, - "grad_norm": 2.510696986417314, - "learning_rate": 3.991628776902052e-06, - "loss": 1.0205, - "step": 776 - }, - { - "epoch": 0.05839470915376522, - "grad_norm": 1.873848497933274, - "learning_rate": 3.991584221202942e-06, - "loss": 1.0783, - "step": 777 - }, - { - "epoch": 0.05846986321960018, - "grad_norm": 2.2339600931426475, - "learning_rate": 3.991539547494839e-06, - "loss": 1.0972, - "step": 778 - }, - { - "epoch": 0.05854501728543514, - "grad_norm": 1.8332488806375322, - "learning_rate": 3.991494755780392e-06, - "loss": 0.9598, - "step": 779 - }, - { - "epoch": 0.0586201713512701, - "grad_norm": 1.4841158188868682, - "learning_rate": 3.991449846062255e-06, - "loss": 1.1333, - "step": 780 - }, - { - "epoch": 0.058695325417105065, - "grad_norm": 1.6101696682018758, - "learning_rate": 3.991404818343089e-06, - "loss": 1.1102, - "step": 781 - }, - { - "epoch": 0.05877047948294003, - "grad_norm": 1.9581591007832082, - "learning_rate": 3.991359672625562e-06, - "loss": 1.0076, - "step": 782 - }, - { - "epoch": 0.05884563354877499, - "grad_norm": 1.4816983634868122, - "learning_rate": 3.9913144089123485e-06, - "loss": 1.0734, - "step": 783 - }, - { - "epoch": 0.05892078761460995, - "grad_norm": 3.9942543964924075, - "learning_rate": 3.991269027206131e-06, - "loss": 1.02, - "step": 784 - }, - { - "epoch": 0.05899594168044491, - "grad_norm": 1.8270550066304567, - "learning_rate": 3.991223527509599e-06, - "loss": 0.9748, - "step": 785 - }, - { - "epoch": 0.059071095746279875, - "grad_norm": 1.9862274774579225, - "learning_rate": 3.991177909825448e-06, - "loss": 0.9903, - "step": 786 - }, - { - "epoch": 0.05914624981211484, - "grad_norm": 1.6519867351267286, - "learning_rate": 3.991132174156381e-06, - "loss": 1.0609, - "step": 787 - }, - { - "epoch": 0.059221403877949795, - "grad_norm": 1.7434386819745555, - "learning_rate": 3.991086320505108e-06, - "loss": 1.0949, - "step": 788 - }, - { - "epoch": 0.05929655794378476, - "grad_norm": 2.0713554778145666, - "learning_rate": 3.991040348874346e-06, - "loss": 1.0978, - "step": 789 - }, - { - "epoch": 0.05937171200961972, - "grad_norm": 3.0918039694130544, - "learning_rate": 3.99099425926682e-06, - "loss": 1.1164, - "step": 790 - }, - { - "epoch": 0.059446866075454685, - "grad_norm": 4.54644458739249, - "learning_rate": 3.990948051685259e-06, - "loss": 1.0351, - "step": 791 - }, - { - "epoch": 0.05952202014128964, - "grad_norm": 1.582416402769173, - "learning_rate": 3.990901726132403e-06, - "loss": 1.1208, - "step": 792 - }, - { - "epoch": 0.059597174207124605, - "grad_norm": 1.9548511659208483, - "learning_rate": 3.990855282610996e-06, - "loss": 0.9477, - "step": 793 - }, - { - "epoch": 0.05967232827295957, - "grad_norm": 2.28468659620198, - "learning_rate": 3.990808721123789e-06, - "loss": 0.9702, - "step": 794 - }, - { - "epoch": 0.05974748233879453, - "grad_norm": 3.250221244910743, - "learning_rate": 3.990762041673543e-06, - "loss": 1.0517, - "step": 795 - }, - { - "epoch": 0.05982263640462949, - "grad_norm": 2.0963711629300983, - "learning_rate": 3.990715244263023e-06, - "loss": 0.9966, - "step": 796 - }, - { - "epoch": 0.05989779047046445, - "grad_norm": 2.357562853399046, - "learning_rate": 3.9906683288950005e-06, - "loss": 1.0497, - "step": 797 - }, - { - "epoch": 0.059972944536299415, - "grad_norm": 1.3131410304910769, - "learning_rate": 3.990621295572258e-06, - "loss": 1.0636, - "step": 798 - }, - { - "epoch": 0.06004809860213438, - "grad_norm": 1.81091383884827, - "learning_rate": 3.99057414429758e-06, - "loss": 1.1145, - "step": 799 - }, - { - "epoch": 0.060123252667969335, - "grad_norm": 1.7400685926049624, - "learning_rate": 3.9905268750737625e-06, - "loss": 1.0317, - "step": 800 - }, - { - "epoch": 0.0601984067338043, - "grad_norm": 2.5804659301483706, - "learning_rate": 3.990479487903605e-06, - "loss": 0.9973, - "step": 801 - }, - { - "epoch": 0.06027356079963926, - "grad_norm": 1.4298937706070851, - "learning_rate": 3.990431982789917e-06, - "loss": 1.0208, - "step": 802 - }, - { - "epoch": 0.060348714865474225, - "grad_norm": 1.977590783467158, - "learning_rate": 3.9903843597355105e-06, - "loss": 1.1151, - "step": 803 - }, - { - "epoch": 0.06042386893130918, - "grad_norm": 1.9448493498093828, - "learning_rate": 3.99033661874321e-06, - "loss": 1.1663, - "step": 804 - }, - { - "epoch": 0.060499022997144145, - "grad_norm": 1.7044896249586845, - "learning_rate": 3.990288759815843e-06, - "loss": 0.8283, - "step": 805 - }, - { - "epoch": 0.06057417706297911, - "grad_norm": 1.8751043926424162, - "learning_rate": 3.990240782956245e-06, - "loss": 1.1136, - "step": 806 - }, - { - "epoch": 0.06064933112881407, - "grad_norm": 1.787276730231739, - "learning_rate": 3.99019268816726e-06, - "loss": 1.0396, - "step": 807 - }, - { - "epoch": 0.06072448519464903, - "grad_norm": 1.7327480761674607, - "learning_rate": 3.990144475451738e-06, - "loss": 1.0858, - "step": 808 - }, - { - "epoch": 0.06079963926048399, - "grad_norm": 1.4558847943567124, - "learning_rate": 3.990096144812534e-06, - "loss": 1.1287, - "step": 809 - }, - { - "epoch": 0.060874793326318954, - "grad_norm": 1.6359819378870077, - "learning_rate": 3.9900476962525125e-06, - "loss": 1.0769, - "step": 810 - }, - { - "epoch": 0.06094994739215392, - "grad_norm": 1.7278091969892282, - "learning_rate": 3.989999129774546e-06, - "loss": 0.9594, - "step": 811 - }, - { - "epoch": 0.061025101457988874, - "grad_norm": 2.1582785752860585, - "learning_rate": 3.989950445381511e-06, - "loss": 0.9949, - "step": 812 - }, - { - "epoch": 0.06110025552382384, - "grad_norm": 1.4953771722693778, - "learning_rate": 3.98990164307629e-06, - "loss": 1.0309, - "step": 813 - }, - { - "epoch": 0.0611754095896588, - "grad_norm": 2.0827593076170796, - "learning_rate": 3.989852722861778e-06, - "loss": 1.1513, - "step": 814 - }, - { - "epoch": 0.061250563655493764, - "grad_norm": 1.8471694530831004, - "learning_rate": 3.989803684740873e-06, - "loss": 1.0567, - "step": 815 - }, - { - "epoch": 0.06132571772132872, - "grad_norm": 1.9378121647544586, - "learning_rate": 3.9897545287164795e-06, - "loss": 0.9805, - "step": 816 - }, - { - "epoch": 0.061400871787163684, - "grad_norm": 1.473460820034754, - "learning_rate": 3.9897052547915115e-06, - "loss": 1.0755, - "step": 817 - }, - { - "epoch": 0.06147602585299865, - "grad_norm": 1.9649835313102022, - "learning_rate": 3.989655862968887e-06, - "loss": 1.0504, - "step": 818 - }, - { - "epoch": 0.06155117991883361, - "grad_norm": 1.2204870902585545, - "learning_rate": 3.989606353251535e-06, - "loss": 0.9997, - "step": 819 - }, - { - "epoch": 0.06162633398466857, - "grad_norm": 1.33941421074018, - "learning_rate": 3.989556725642388e-06, - "loss": 0.9799, - "step": 820 - }, - { - "epoch": 0.06170148805050353, - "grad_norm": 2.023266759318705, - "learning_rate": 3.989506980144385e-06, - "loss": 1.0231, - "step": 821 - }, - { - "epoch": 0.061776642116338494, - "grad_norm": 1.5002271162390954, - "learning_rate": 3.989457116760477e-06, - "loss": 1.0052, - "step": 822 - }, - { - "epoch": 0.06185179618217346, - "grad_norm": 0.7347081828643911, - "learning_rate": 3.989407135493615e-06, - "loss": 0.9061, - "step": 823 - }, - { - "epoch": 0.061926950248008414, - "grad_norm": 1.5777877598358692, - "learning_rate": 3.9893570363467625e-06, - "loss": 0.8758, - "step": 824 - }, - { - "epoch": 0.06200210431384338, - "grad_norm": 1.551547305675224, - "learning_rate": 3.9893068193228885e-06, - "loss": 1.0588, - "step": 825 - }, - { - "epoch": 0.06207725837967834, - "grad_norm": 2.6228039956429354, - "learning_rate": 3.989256484424968e-06, - "loss": 1.0046, - "step": 826 - }, - { - "epoch": 0.062152412445513304, - "grad_norm": 1.7048966878203344, - "learning_rate": 3.989206031655982e-06, - "loss": 0.9876, - "step": 827 - }, - { - "epoch": 0.06222756651134827, - "grad_norm": 1.78939269904185, - "learning_rate": 3.989155461018923e-06, - "loss": 1.0915, - "step": 828 - }, - { - "epoch": 0.062302720577183224, - "grad_norm": 1.482350223302353, - "learning_rate": 3.989104772516785e-06, - "loss": 1.052, - "step": 829 - }, - { - "epoch": 0.06237787464301819, - "grad_norm": 1.9461573715560776, - "learning_rate": 3.989053966152573e-06, - "loss": 1.0104, - "step": 830 - }, - { - "epoch": 0.06245302870885315, - "grad_norm": 1.8799320204598604, - "learning_rate": 3.9890030419292965e-06, - "loss": 1.0373, - "step": 831 - }, - { - "epoch": 0.0625281827746881, - "grad_norm": 2.529427425594353, - "learning_rate": 3.988951999849974e-06, - "loss": 0.9435, - "step": 832 - }, - { - "epoch": 0.06260333684052308, - "grad_norm": 1.8557767913500143, - "learning_rate": 3.988900839917628e-06, - "loss": 1.0504, - "step": 833 - }, - { - "epoch": 0.06267849090635803, - "grad_norm": 1.481969470323552, - "learning_rate": 3.988849562135293e-06, - "loss": 1.0204, - "step": 834 - }, - { - "epoch": 0.06275364497219299, - "grad_norm": 1.6252706742437966, - "learning_rate": 3.988798166506005e-06, - "loss": 1.0365, - "step": 835 - }, - { - "epoch": 0.06282879903802796, - "grad_norm": 0.8450163063191453, - "learning_rate": 3.98874665303281e-06, - "loss": 0.8773, - "step": 836 - }, - { - "epoch": 0.06290395310386292, - "grad_norm": 1.9229934043354584, - "learning_rate": 3.98869502171876e-06, - "loss": 1.1612, - "step": 837 - }, - { - "epoch": 0.06297910716969789, - "grad_norm": 1.6406797854040998, - "learning_rate": 3.9886432725669146e-06, - "loss": 1.0663, - "step": 838 - }, - { - "epoch": 0.06305426123553284, - "grad_norm": 1.7392321782308713, - "learning_rate": 3.988591405580341e-06, - "loss": 1.0741, - "step": 839 - }, - { - "epoch": 0.0631294153013678, - "grad_norm": 1.3725047445302092, - "learning_rate": 3.988539420762111e-06, - "loss": 1.0356, - "step": 840 - }, - { - "epoch": 0.06320456936720277, - "grad_norm": 1.8096090478348796, - "learning_rate": 3.988487318115306e-06, - "loss": 1.0485, - "step": 841 - }, - { - "epoch": 0.06327972343303773, - "grad_norm": 1.7628081816573218, - "learning_rate": 3.9884350976430136e-06, - "loss": 1.0749, - "step": 842 - }, - { - "epoch": 0.06335487749887268, - "grad_norm": 2.0158738566700696, - "learning_rate": 3.988382759348327e-06, - "loss": 1.0264, - "step": 843 - }, - { - "epoch": 0.06343003156470765, - "grad_norm": 1.655625207482653, - "learning_rate": 3.988330303234347e-06, - "loss": 1.1575, - "step": 844 - }, - { - "epoch": 0.06350518563054261, - "grad_norm": 1.440551090620145, - "learning_rate": 3.988277729304184e-06, - "loss": 1.0618, - "step": 845 - }, - { - "epoch": 0.06358033969637758, - "grad_norm": 2.37026012667064, - "learning_rate": 3.988225037560951e-06, - "loss": 0.9921, - "step": 846 - }, - { - "epoch": 0.06365549376221254, - "grad_norm": 1.839573931498999, - "learning_rate": 3.988172228007771e-06, - "loss": 1.0629, - "step": 847 - }, - { - "epoch": 0.0637306478280475, - "grad_norm": 1.906189776860721, - "learning_rate": 3.9881193006477745e-06, - "loss": 1.0026, - "step": 848 - }, - { - "epoch": 0.06380580189388246, - "grad_norm": 1.5759188651050284, - "learning_rate": 3.9880662554840955e-06, - "loss": 1.0194, - "step": 849 - }, - { - "epoch": 0.06388095595971742, - "grad_norm": 2.0748970373874647, - "learning_rate": 3.9880130925198786e-06, - "loss": 1.1169, - "step": 850 - }, - { - "epoch": 0.06395611002555238, - "grad_norm": 2.6657315839989755, - "learning_rate": 3.987959811758273e-06, - "loss": 0.9808, - "step": 851 - }, - { - "epoch": 0.06403126409138735, - "grad_norm": 1.5937872489615397, - "learning_rate": 3.9879064132024365e-06, - "loss": 0.9743, - "step": 852 - }, - { - "epoch": 0.0641064181572223, - "grad_norm": 1.5135963470320142, - "learning_rate": 3.987852896855532e-06, - "loss": 1.0975, - "step": 853 - }, - { - "epoch": 0.06418157222305727, - "grad_norm": 1.3648724819886449, - "learning_rate": 3.987799262720732e-06, - "loss": 1.0826, - "step": 854 - }, - { - "epoch": 0.06425672628889223, - "grad_norm": 2.8401610815008485, - "learning_rate": 3.987745510801214e-06, - "loss": 1.0387, - "step": 855 - }, - { - "epoch": 0.06433188035472719, - "grad_norm": 2.173470329733295, - "learning_rate": 3.987691641100162e-06, - "loss": 1.0355, - "step": 856 - }, - { - "epoch": 0.06440703442056216, - "grad_norm": 1.691067210129031, - "learning_rate": 3.98763765362077e-06, - "loss": 1.1088, - "step": 857 - }, - { - "epoch": 0.06448218848639711, - "grad_norm": 1.5606629062337327, - "learning_rate": 3.987583548366235e-06, - "loss": 0.9203, - "step": 858 - }, - { - "epoch": 0.06455734255223207, - "grad_norm": 2.3451560317064866, - "learning_rate": 3.987529325339764e-06, - "loss": 0.9419, - "step": 859 - }, - { - "epoch": 0.06463249661806704, - "grad_norm": 1.563014060334892, - "learning_rate": 3.98747498454457e-06, - "loss": 0.95, - "step": 860 - }, - { - "epoch": 0.064707650683902, - "grad_norm": 1.4880000772476176, - "learning_rate": 3.987420525983873e-06, - "loss": 1.1052, - "step": 861 - }, - { - "epoch": 0.06478280474973697, - "grad_norm": 1.3700615021203557, - "learning_rate": 3.9873659496608985e-06, - "loss": 0.9659, - "step": 862 - }, - { - "epoch": 0.06485795881557192, - "grad_norm": 2.1035841963420085, - "learning_rate": 3.9873112555788816e-06, - "loss": 1.0882, - "step": 863 - }, - { - "epoch": 0.06493311288140688, - "grad_norm": 1.8888455047798705, - "learning_rate": 3.987256443741063e-06, - "loss": 0.9819, - "step": 864 - }, - { - "epoch": 0.06500826694724185, - "grad_norm": 1.874195371277666, - "learning_rate": 3.9872015141506905e-06, - "loss": 1.121, - "step": 865 - }, - { - "epoch": 0.0650834210130768, - "grad_norm": 2.0104403980284165, - "learning_rate": 3.987146466811019e-06, - "loss": 1.0643, - "step": 866 - }, - { - "epoch": 0.06515857507891176, - "grad_norm": 2.037579929211619, - "learning_rate": 3.98709130172531e-06, - "loss": 1.0908, - "step": 867 - }, - { - "epoch": 0.06523372914474673, - "grad_norm": 1.7893767101773994, - "learning_rate": 3.987036018896832e-06, - "loss": 0.9586, - "step": 868 - }, - { - "epoch": 0.06530888321058169, - "grad_norm": 2.734874570714625, - "learning_rate": 3.986980618328861e-06, - "loss": 0.9957, - "step": 869 - }, - { - "epoch": 0.06538403727641666, - "grad_norm": 1.6262414298392507, - "learning_rate": 3.98692510002468e-06, - "loss": 0.9921, - "step": 870 - }, - { - "epoch": 0.06545919134225162, - "grad_norm": 2.372135141051388, - "learning_rate": 3.986869463987578e-06, - "loss": 0.9984, - "step": 871 - }, - { - "epoch": 0.06553434540808657, - "grad_norm": 1.9685981934163692, - "learning_rate": 3.9868137102208525e-06, - "loss": 0.9818, - "step": 872 - }, - { - "epoch": 0.06560949947392154, - "grad_norm": 2.0723639922385506, - "learning_rate": 3.9867578387278065e-06, - "loss": 1.1122, - "step": 873 - }, - { - "epoch": 0.0656846535397565, - "grad_norm": 1.5350098673599646, - "learning_rate": 3.986701849511751e-06, - "loss": 1.0319, - "step": 874 - }, - { - "epoch": 0.06575980760559147, - "grad_norm": 1.7299777124325346, - "learning_rate": 3.986645742576002e-06, - "loss": 1.0278, - "step": 875 - }, - { - "epoch": 0.06583496167142643, - "grad_norm": 1.6096576331390633, - "learning_rate": 3.986589517923887e-06, - "loss": 1.0463, - "step": 876 - }, - { - "epoch": 0.06591011573726138, - "grad_norm": 2.308729448541977, - "learning_rate": 3.986533175558735e-06, - "loss": 1.0182, - "step": 877 - }, - { - "epoch": 0.06598526980309635, - "grad_norm": 1.7327033570076615, - "learning_rate": 3.9864767154838856e-06, - "loss": 1.0144, - "step": 878 - }, - { - "epoch": 0.06606042386893131, - "grad_norm": 1.7879009890782056, - "learning_rate": 3.986420137702684e-06, - "loss": 0.9614, - "step": 879 - }, - { - "epoch": 0.06613557793476627, - "grad_norm": 0.7985309150529988, - "learning_rate": 3.9863634422184835e-06, - "loss": 0.8322, - "step": 880 - }, - { - "epoch": 0.06621073200060124, - "grad_norm": 1.8658082728245557, - "learning_rate": 3.986306629034642e-06, - "loss": 1.1231, - "step": 881 - }, - { - "epoch": 0.06628588606643619, - "grad_norm": 1.2453006145527548, - "learning_rate": 3.9862496981545265e-06, - "loss": 0.9615, - "step": 882 - }, - { - "epoch": 0.06636104013227116, - "grad_norm": 1.777600012110376, - "learning_rate": 3.986192649581511e-06, - "loss": 0.9944, - "step": 883 - }, - { - "epoch": 0.06643619419810612, - "grad_norm": 2.007631880061338, - "learning_rate": 3.986135483318975e-06, - "loss": 1.0315, - "step": 884 - }, - { - "epoch": 0.06651134826394108, - "grad_norm": 1.917974778921437, - "learning_rate": 3.986078199370307e-06, - "loss": 0.9739, - "step": 885 - }, - { - "epoch": 0.06658650232977605, - "grad_norm": 2.0106398568925155, - "learning_rate": 3.9860207977388994e-06, - "loss": 0.922, - "step": 886 - }, - { - "epoch": 0.066661656395611, - "grad_norm": 1.9273119679131807, - "learning_rate": 3.985963278428155e-06, - "loss": 1.0825, - "step": 887 - }, - { - "epoch": 0.06673681046144596, - "grad_norm": 0.7684949684449915, - "learning_rate": 3.985905641441482e-06, - "loss": 0.8602, - "step": 888 - }, - { - "epoch": 0.06681196452728093, - "grad_norm": 0.8365190212392364, - "learning_rate": 3.9858478867822945e-06, - "loss": 0.852, - "step": 889 - }, - { - "epoch": 0.06688711859311589, - "grad_norm": 1.469766954646368, - "learning_rate": 3.985790014454016e-06, - "loss": 1.0386, - "step": 890 - }, - { - "epoch": 0.06696227265895086, - "grad_norm": 5.065540860399215, - "learning_rate": 3.985732024460074e-06, - "loss": 1.0985, - "step": 891 - }, - { - "epoch": 0.06703742672478581, - "grad_norm": 1.836602403078415, - "learning_rate": 3.985673916803907e-06, - "loss": 1.0666, - "step": 892 - }, - { - "epoch": 0.06711258079062077, - "grad_norm": 1.6924417686869224, - "learning_rate": 3.9856156914889556e-06, - "loss": 1.1097, - "step": 893 - }, - { - "epoch": 0.06718773485645574, - "grad_norm": 1.6168836225199368, - "learning_rate": 3.985557348518672e-06, - "loss": 1.0, - "step": 894 - }, - { - "epoch": 0.0672628889222907, - "grad_norm": 1.5472324613365156, - "learning_rate": 3.9854988878965125e-06, - "loss": 1.0148, - "step": 895 - }, - { - "epoch": 0.06733804298812565, - "grad_norm": 4.731967225632791, - "learning_rate": 3.98544030962594e-06, - "loss": 1.1051, - "step": 896 - }, - { - "epoch": 0.06741319705396062, - "grad_norm": 1.8925979030587436, - "learning_rate": 3.985381613710427e-06, - "loss": 1.0516, - "step": 897 - }, - { - "epoch": 0.06748835111979558, - "grad_norm": 2.0682255428772067, - "learning_rate": 3.98532280015345e-06, - "loss": 1.0031, - "step": 898 - }, - { - "epoch": 0.06756350518563055, - "grad_norm": 1.905572989890391, - "learning_rate": 3.985263868958496e-06, - "loss": 1.0037, - "step": 899 - }, - { - "epoch": 0.0676386592514655, - "grad_norm": 1.4631689644668866, - "learning_rate": 3.9852048201290545e-06, - "loss": 0.9786, - "step": 900 - }, - { - "epoch": 0.06771381331730046, - "grad_norm": 1.7240781559941167, - "learning_rate": 3.985145653668626e-06, - "loss": 1.0472, - "step": 901 - }, - { - "epoch": 0.06778896738313543, - "grad_norm": 1.5668815215210532, - "learning_rate": 3.985086369580716e-06, - "loss": 1.0327, - "step": 902 - }, - { - "epoch": 0.06786412144897039, - "grad_norm": 1.6420987730438534, - "learning_rate": 3.985026967868837e-06, - "loss": 0.9632, - "step": 903 - }, - { - "epoch": 0.06793927551480534, - "grad_norm": 1.9019131966327052, - "learning_rate": 3.9849674485365094e-06, - "loss": 1.0143, - "step": 904 - }, - { - "epoch": 0.06801442958064031, - "grad_norm": 1.894669360602976, - "learning_rate": 3.98490781158726e-06, - "loss": 1.094, - "step": 905 - }, - { - "epoch": 0.06808958364647527, - "grad_norm": 1.4465271665551787, - "learning_rate": 3.98484805702462e-06, - "loss": 1.0715, - "step": 906 - }, - { - "epoch": 0.06816473771231024, - "grad_norm": 1.399576799256878, - "learning_rate": 3.9847881848521345e-06, - "loss": 0.9923, - "step": 907 - }, - { - "epoch": 0.0682398917781452, - "grad_norm": 1.5375648608909263, - "learning_rate": 3.984728195073347e-06, - "loss": 0.993, - "step": 908 - }, - { - "epoch": 0.06831504584398015, - "grad_norm": 1.9089464596393617, - "learning_rate": 3.984668087691815e-06, - "loss": 1.0715, - "step": 909 - }, - { - "epoch": 0.06839019990981512, - "grad_norm": 1.8294518375192643, - "learning_rate": 3.984607862711099e-06, - "loss": 0.9086, - "step": 910 - }, - { - "epoch": 0.06846535397565008, - "grad_norm": 2.1535845881974844, - "learning_rate": 3.984547520134767e-06, - "loss": 0.9658, - "step": 911 - }, - { - "epoch": 0.06854050804148504, - "grad_norm": 4.777657948452579, - "learning_rate": 3.9844870599663954e-06, - "loss": 0.9794, - "step": 912 - }, - { - "epoch": 0.06861566210732001, - "grad_norm": 1.612232818942595, - "learning_rate": 3.984426482209567e-06, - "loss": 1.0611, - "step": 913 - }, - { - "epoch": 0.06869081617315496, - "grad_norm": 1.697478918582311, - "learning_rate": 3.98436578686787e-06, - "loss": 1.0273, - "step": 914 - }, - { - "epoch": 0.06876597023898993, - "grad_norm": 1.5109711604821707, - "learning_rate": 3.984304973944901e-06, - "loss": 1.0714, - "step": 915 - }, - { - "epoch": 0.06884112430482489, - "grad_norm": 3.065069716440098, - "learning_rate": 3.984244043444264e-06, - "loss": 0.9842, - "step": 916 - }, - { - "epoch": 0.06891627837065985, - "grad_norm": 1.930620346384809, - "learning_rate": 3.98418299536957e-06, - "loss": 1.083, - "step": 917 - }, - { - "epoch": 0.06899143243649482, - "grad_norm": 1.5808863134895528, - "learning_rate": 3.984121829724435e-06, - "loss": 1.0087, - "step": 918 - }, - { - "epoch": 0.06906658650232977, - "grad_norm": 10.073584475598134, - "learning_rate": 3.984060546512484e-06, - "loss": 1.1426, - "step": 919 - }, - { - "epoch": 0.06914174056816474, - "grad_norm": 1.3708143683206742, - "learning_rate": 3.983999145737348e-06, - "loss": 1.0237, - "step": 920 - }, - { - "epoch": 0.0692168946339997, - "grad_norm": 2.1404667259789845, - "learning_rate": 3.983937627402665e-06, - "loss": 0.9778, - "step": 921 - }, - { - "epoch": 0.06929204869983466, - "grad_norm": 1.8327792659780588, - "learning_rate": 3.983875991512082e-06, - "loss": 1.0061, - "step": 922 - }, - { - "epoch": 0.06936720276566963, - "grad_norm": 1.6802465883438036, - "learning_rate": 3.983814238069249e-06, - "loss": 1.0734, - "step": 923 - }, - { - "epoch": 0.06944235683150458, - "grad_norm": 1.5613386797554176, - "learning_rate": 3.983752367077826e-06, - "loss": 0.9938, - "step": 924 - }, - { - "epoch": 0.06951751089733954, - "grad_norm": 1.6364119431079653, - "learning_rate": 3.983690378541478e-06, - "loss": 0.9734, - "step": 925 - }, - { - "epoch": 0.06959266496317451, - "grad_norm": 1.4587192932924193, - "learning_rate": 3.9836282724638805e-06, - "loss": 1.018, - "step": 926 - }, - { - "epoch": 0.06966781902900947, - "grad_norm": 1.6792609094467272, - "learning_rate": 3.983566048848711e-06, - "loss": 1.1045, - "step": 927 - }, - { - "epoch": 0.06974297309484444, - "grad_norm": 1.6869151196791725, - "learning_rate": 3.983503707699658e-06, - "loss": 0.9128, - "step": 928 - }, - { - "epoch": 0.0698181271606794, - "grad_norm": 1.659368853369067, - "learning_rate": 3.983441249020414e-06, - "loss": 1.0923, - "step": 929 - }, - { - "epoch": 0.06989328122651435, - "grad_norm": 1.9181591801784672, - "learning_rate": 3.983378672814682e-06, - "loss": 1.0303, - "step": 930 - }, - { - "epoch": 0.06996843529234932, - "grad_norm": 1.5663726879148776, - "learning_rate": 3.983315979086169e-06, - "loss": 1.0961, - "step": 931 - }, - { - "epoch": 0.07004358935818428, - "grad_norm": 1.5158612568558578, - "learning_rate": 3.9832531678385885e-06, - "loss": 1.0392, - "step": 932 - }, - { - "epoch": 0.07011874342401923, - "grad_norm": 2.3567809945971097, - "learning_rate": 3.983190239075664e-06, - "loss": 1.1237, - "step": 933 - }, - { - "epoch": 0.0701938974898542, - "grad_norm": 2.054237113296628, - "learning_rate": 3.983127192801123e-06, - "loss": 0.9804, - "step": 934 - }, - { - "epoch": 0.07026905155568916, - "grad_norm": 1.729562009482217, - "learning_rate": 3.983064029018703e-06, - "loss": 1.0981, - "step": 935 - }, - { - "epoch": 0.07034420562152413, - "grad_norm": 1.3187922014471571, - "learning_rate": 3.983000747732145e-06, - "loss": 0.9889, - "step": 936 - }, - { - "epoch": 0.07041935968735909, - "grad_norm": 2.416023223544286, - "learning_rate": 3.9829373489452e-06, - "loss": 1.14, - "step": 937 - }, - { - "epoch": 0.07049451375319404, - "grad_norm": 1.7092561167883298, - "learning_rate": 3.982873832661623e-06, - "loss": 1.0596, - "step": 938 - }, - { - "epoch": 0.07056966781902901, - "grad_norm": 1.6556874885445323, - "learning_rate": 3.982810198885179e-06, - "loss": 1.1045, - "step": 939 - }, - { - "epoch": 0.07064482188486397, - "grad_norm": 1.989595589591684, - "learning_rate": 3.982746447619638e-06, - "loss": 1.1131, - "step": 940 - }, - { - "epoch": 0.07071997595069893, - "grad_norm": 1.7864131050575582, - "learning_rate": 3.982682578868777e-06, - "loss": 1.0566, - "step": 941 - }, - { - "epoch": 0.0707951300165339, - "grad_norm": 2.1740590921353062, - "learning_rate": 3.982618592636381e-06, - "loss": 1.1013, - "step": 942 - }, - { - "epoch": 0.07087028408236885, - "grad_norm": 1.8556848018745211, - "learning_rate": 3.982554488926242e-06, - "loss": 1.1172, - "step": 943 - }, - { - "epoch": 0.07094543814820382, - "grad_norm": 1.472099300067228, - "learning_rate": 3.982490267742158e-06, - "loss": 1.0276, - "step": 944 - }, - { - "epoch": 0.07102059221403878, - "grad_norm": 1.3550307915767876, - "learning_rate": 3.9824259290879336e-06, - "loss": 1.0667, - "step": 945 - }, - { - "epoch": 0.07109574627987374, - "grad_norm": 2.045159822481707, - "learning_rate": 3.982361472967382e-06, - "loss": 0.9866, - "step": 946 - }, - { - "epoch": 0.0711709003457087, - "grad_norm": 1.817984799526108, - "learning_rate": 3.982296899384322e-06, - "loss": 1.0301, - "step": 947 - }, - { - "epoch": 0.07124605441154366, - "grad_norm": 1.605055707160845, - "learning_rate": 3.9822322083425805e-06, - "loss": 1.0498, - "step": 948 - }, - { - "epoch": 0.07132120847737862, - "grad_norm": 2.382541128245891, - "learning_rate": 3.982167399845989e-06, - "loss": 1.0041, - "step": 949 - }, - { - "epoch": 0.07139636254321359, - "grad_norm": 1.3532731360560122, - "learning_rate": 3.982102473898391e-06, - "loss": 1.0138, - "step": 950 - }, - { - "epoch": 0.07147151660904855, - "grad_norm": 1.8492642718160595, - "learning_rate": 3.9820374305036295e-06, - "loss": 1.0525, - "step": 951 - }, - { - "epoch": 0.07154667067488352, - "grad_norm": 1.8451641284749556, - "learning_rate": 3.981972269665561e-06, - "loss": 0.9739, - "step": 952 - }, - { - "epoch": 0.07162182474071847, - "grad_norm": 0.7673573261319154, - "learning_rate": 3.981906991388046e-06, - "loss": 0.7838, - "step": 953 - }, - { - "epoch": 0.07169697880655343, - "grad_norm": 1.9032266123036325, - "learning_rate": 3.981841595674952e-06, - "loss": 1.0975, - "step": 954 - }, - { - "epoch": 0.0717721328723884, - "grad_norm": 1.597421195126662, - "learning_rate": 3.981776082530156e-06, - "loss": 1.044, - "step": 955 - }, - { - "epoch": 0.07184728693822336, - "grad_norm": 1.8504647006378925, - "learning_rate": 3.981710451957537e-06, - "loss": 1.0519, - "step": 956 - }, - { - "epoch": 0.07192244100405831, - "grad_norm": 1.5819827978932641, - "learning_rate": 3.981644703960986e-06, - "loss": 1.0915, - "step": 957 - }, - { - "epoch": 0.07199759506989328, - "grad_norm": 1.519196461175952, - "learning_rate": 3.981578838544398e-06, - "loss": 1.1032, - "step": 958 - }, - { - "epoch": 0.07207274913572824, - "grad_norm": 1.9225076393997542, - "learning_rate": 3.981512855711675e-06, - "loss": 0.9816, - "step": 959 - }, - { - "epoch": 0.07214790320156321, - "grad_norm": 2.279636805534804, - "learning_rate": 3.981446755466729e-06, - "loss": 1.0361, - "step": 960 - }, - { - "epoch": 0.07222305726739817, - "grad_norm": 1.8543624631678426, - "learning_rate": 3.981380537813474e-06, - "loss": 1.0843, - "step": 961 - }, - { - "epoch": 0.07229821133323312, - "grad_norm": 2.0197836388016417, - "learning_rate": 3.981314202755835e-06, - "loss": 0.9798, - "step": 962 - }, - { - "epoch": 0.0723733653990681, - "grad_norm": 1.4739999484420623, - "learning_rate": 3.981247750297744e-06, - "loss": 1.0328, - "step": 963 - }, - { - "epoch": 0.07244851946490305, - "grad_norm": 0.763760027468421, - "learning_rate": 3.9811811804431355e-06, - "loss": 0.8613, - "step": 964 - }, - { - "epoch": 0.07252367353073802, - "grad_norm": 2.0575178834826273, - "learning_rate": 3.981114493195956e-06, - "loss": 0.9866, - "step": 965 - }, - { - "epoch": 0.07259882759657298, - "grad_norm": 1.854718701197378, - "learning_rate": 3.981047688560156e-06, - "loss": 1.0038, - "step": 966 - }, - { - "epoch": 0.07267398166240793, - "grad_norm": 2.0079114125152615, - "learning_rate": 3.980980766539696e-06, - "loss": 1.0391, - "step": 967 - }, - { - "epoch": 0.0727491357282429, - "grad_norm": 1.7574902048285672, - "learning_rate": 3.980913727138539e-06, - "loss": 1.0157, - "step": 968 - }, - { - "epoch": 0.07282428979407786, - "grad_norm": 2.2045080294377404, - "learning_rate": 3.980846570360658e-06, - "loss": 0.9507, - "step": 969 - }, - { - "epoch": 0.07289944385991282, - "grad_norm": 1.520508361306701, - "learning_rate": 3.980779296210033e-06, - "loss": 1.0535, - "step": 970 - }, - { - "epoch": 0.07297459792574779, - "grad_norm": 1.6088805689137016, - "learning_rate": 3.98071190469065e-06, - "loss": 1.0292, - "step": 971 - }, - { - "epoch": 0.07304975199158274, - "grad_norm": 1.7794084901274212, - "learning_rate": 3.980644395806502e-06, - "loss": 0.9927, - "step": 972 - }, - { - "epoch": 0.07312490605741771, - "grad_norm": 1.8097676696041225, - "learning_rate": 3.980576769561588e-06, - "loss": 0.9589, - "step": 973 - }, - { - "epoch": 0.07320006012325267, - "grad_norm": 5.1817125875492, - "learning_rate": 3.980509025959918e-06, - "loss": 1.0144, - "step": 974 - }, - { - "epoch": 0.07327521418908763, - "grad_norm": 2.196907957000749, - "learning_rate": 3.980441165005503e-06, - "loss": 1.0747, - "step": 975 - }, - { - "epoch": 0.0733503682549226, - "grad_norm": 1.7999657627664332, - "learning_rate": 3.9803731867023665e-06, - "loss": 1.1237, - "step": 976 - }, - { - "epoch": 0.07342552232075755, - "grad_norm": 2.279661592332384, - "learning_rate": 3.980305091054534e-06, - "loss": 1.0221, - "step": 977 - }, - { - "epoch": 0.07350067638659251, - "grad_norm": 1.6848863211298901, - "learning_rate": 3.980236878066042e-06, - "loss": 1.0547, - "step": 978 - }, - { - "epoch": 0.07357583045242748, - "grad_norm": 1.922357948224124, - "learning_rate": 3.9801685477409336e-06, - "loss": 1.0245, - "step": 979 - }, - { - "epoch": 0.07365098451826244, - "grad_norm": 1.885075898400569, - "learning_rate": 3.980100100083254e-06, - "loss": 0.9309, - "step": 980 - }, - { - "epoch": 0.0737261385840974, - "grad_norm": 1.6674087620700213, - "learning_rate": 3.980031535097063e-06, - "loss": 1.0914, - "step": 981 - }, - { - "epoch": 0.07380129264993236, - "grad_norm": 0.7275615509449085, - "learning_rate": 3.9799628527864205e-06, - "loss": 0.8906, - "step": 982 - }, - { - "epoch": 0.07387644671576732, - "grad_norm": 2.3506663696575933, - "learning_rate": 3.979894053155398e-06, - "loss": 1.0527, - "step": 983 - }, - { - "epoch": 0.07395160078160229, - "grad_norm": 1.5412617572667018, - "learning_rate": 3.979825136208071e-06, - "loss": 0.9946, - "step": 984 - }, - { - "epoch": 0.07402675484743725, - "grad_norm": 1.7477570192209453, - "learning_rate": 3.979756101948523e-06, - "loss": 1.0847, - "step": 985 - }, - { - "epoch": 0.0741019089132722, - "grad_norm": 2.1300732135278855, - "learning_rate": 3.979686950380845e-06, - "loss": 1.0038, - "step": 986 - }, - { - "epoch": 0.07417706297910717, - "grad_norm": 1.970155849778949, - "learning_rate": 3.979617681509135e-06, - "loss": 0.9926, - "step": 987 - }, - { - "epoch": 0.07425221704494213, - "grad_norm": 0.7459951205746, - "learning_rate": 3.979548295337496e-06, - "loss": 0.8231, - "step": 988 - }, - { - "epoch": 0.0743273711107771, - "grad_norm": 1.7149260551148318, - "learning_rate": 3.979478791870041e-06, - "loss": 1.078, - "step": 989 - }, - { - "epoch": 0.07440252517661206, - "grad_norm": 1.5720684046288818, - "learning_rate": 3.9794091711108875e-06, - "loss": 0.975, - "step": 990 - }, - { - "epoch": 0.07447767924244701, - "grad_norm": 1.5505511596023884, - "learning_rate": 3.9793394330641614e-06, - "loss": 1.0118, - "step": 991 - }, - { - "epoch": 0.07455283330828198, - "grad_norm": 0.7548501228469372, - "learning_rate": 3.979269577733994e-06, - "loss": 0.8497, - "step": 992 - }, - { - "epoch": 0.07462798737411694, - "grad_norm": 0.8299283599384694, - "learning_rate": 3.979199605124525e-06, - "loss": 0.9418, - "step": 993 - }, - { - "epoch": 0.0747031414399519, - "grad_norm": 2.4048811747169663, - "learning_rate": 3.979129515239901e-06, - "loss": 0.9307, - "step": 994 - }, - { - "epoch": 0.07477829550578687, - "grad_norm": 1.9937360541833542, - "learning_rate": 3.979059308084274e-06, - "loss": 1.0706, - "step": 995 - }, - { - "epoch": 0.07485344957162182, - "grad_norm": 1.897181154082456, - "learning_rate": 3.9789889836618045e-06, - "loss": 1.0099, - "step": 996 - }, - { - "epoch": 0.07492860363745679, - "grad_norm": 1.679118712176691, - "learning_rate": 3.97891854197666e-06, - "loss": 1.0633, - "step": 997 - }, - { - "epoch": 0.07500375770329175, - "grad_norm": 1.5293812149975405, - "learning_rate": 3.978847983033014e-06, - "loss": 0.9728, - "step": 998 - }, - { - "epoch": 0.0750789117691267, - "grad_norm": 0.6700418437161032, - "learning_rate": 3.978777306835048e-06, - "loss": 0.8583, - "step": 999 - }, - { - "epoch": 0.07515406583496168, - "grad_norm": 1.6627213352263641, - "learning_rate": 3.978706513386949e-06, - "loss": 1.0019, - "step": 1000 - }, - { - "epoch": 0.07522921990079663, - "grad_norm": 3.348530996693241, - "learning_rate": 3.978635602692912e-06, - "loss": 1.0741, - "step": 1001 - }, - { - "epoch": 0.07530437396663159, - "grad_norm": 2.3073987627397585, - "learning_rate": 3.978564574757139e-06, - "loss": 1.0757, - "step": 1002 - }, - { - "epoch": 0.07537952803246656, - "grad_norm": 1.5832541931183557, - "learning_rate": 3.9784934295838385e-06, - "loss": 0.9716, - "step": 1003 - }, - { - "epoch": 0.07545468209830151, - "grad_norm": 1.9127589555340496, - "learning_rate": 3.978422167177226e-06, - "loss": 1.0264, - "step": 1004 - }, - { - "epoch": 0.07552983616413649, - "grad_norm": 1.7812736911202056, - "learning_rate": 3.9783507875415245e-06, - "loss": 1.1806, - "step": 1005 - }, - { - "epoch": 0.07560499022997144, - "grad_norm": 1.3724949732759628, - "learning_rate": 3.9782792906809625e-06, - "loss": 1.0495, - "step": 1006 - }, - { - "epoch": 0.0756801442958064, - "grad_norm": 2.100799081541559, - "learning_rate": 3.978207676599778e-06, - "loss": 1.068, - "step": 1007 - }, - { - "epoch": 0.07575529836164137, - "grad_norm": 1.6172764702985156, - "learning_rate": 3.978135945302213e-06, - "loss": 1.0228, - "step": 1008 - }, - { - "epoch": 0.07583045242747632, - "grad_norm": 1.8155807752360842, - "learning_rate": 3.978064096792519e-06, - "loss": 0.9698, - "step": 1009 - }, - { - "epoch": 0.0759056064933113, - "grad_norm": 1.507178929490619, - "learning_rate": 3.977992131074953e-06, - "loss": 1.0375, - "step": 1010 - }, - { - "epoch": 0.07598076055914625, - "grad_norm": 1.449459498065604, - "learning_rate": 3.9779200481537775e-06, - "loss": 1.0422, - "step": 1011 - }, - { - "epoch": 0.07605591462498121, - "grad_norm": 1.7792444148113347, - "learning_rate": 3.977847848033267e-06, - "loss": 1.0382, - "step": 1012 - }, - { - "epoch": 0.07613106869081618, - "grad_norm": 1.569360245027991, - "learning_rate": 3.977775530717696e-06, - "loss": 1.0282, - "step": 1013 - }, - { - "epoch": 0.07620622275665113, - "grad_norm": 3.1106399186880105, - "learning_rate": 3.977703096211354e-06, - "loss": 1.006, - "step": 1014 - }, - { - "epoch": 0.07628137682248609, - "grad_norm": 1.7513792188785677, - "learning_rate": 3.977630544518529e-06, - "loss": 1.0075, - "step": 1015 - }, - { - "epoch": 0.07635653088832106, - "grad_norm": 2.0543919339829744, - "learning_rate": 3.97755787564352e-06, - "loss": 1.1329, - "step": 1016 - }, - { - "epoch": 0.07643168495415602, - "grad_norm": 1.8762955322433634, - "learning_rate": 3.977485089590636e-06, - "loss": 1.012, - "step": 1017 - }, - { - "epoch": 0.07650683901999099, - "grad_norm": 1.6406056720866715, - "learning_rate": 3.977412186364187e-06, - "loss": 1.0624, - "step": 1018 - }, - { - "epoch": 0.07658199308582594, - "grad_norm": 2.0969649726223443, - "learning_rate": 3.977339165968495e-06, - "loss": 1.1206, - "step": 1019 - }, - { - "epoch": 0.0766571471516609, - "grad_norm": 1.6369055593579485, - "learning_rate": 3.977266028407885e-06, - "loss": 1.018, - "step": 1020 - }, - { - "epoch": 0.07673230121749587, - "grad_norm": 1.8288940377135254, - "learning_rate": 3.977192773686692e-06, - "loss": 1.0408, - "step": 1021 - }, - { - "epoch": 0.07680745528333083, - "grad_norm": 1.2349951429696269, - "learning_rate": 3.977119401809255e-06, - "loss": 0.9921, - "step": 1022 - }, - { - "epoch": 0.07688260934916578, - "grad_norm": 1.3720664027691183, - "learning_rate": 3.977045912779924e-06, - "loss": 1.0215, - "step": 1023 - }, - { - "epoch": 0.07695776341500075, - "grad_norm": 1.2636026779166958, - "learning_rate": 3.9769723066030505e-06, - "loss": 0.9794, - "step": 1024 - }, - { - "epoch": 0.07703291748083571, - "grad_norm": 2.0203177066272695, - "learning_rate": 3.976898583282998e-06, - "loss": 0.9069, - "step": 1025 - }, - { - "epoch": 0.07710807154667068, - "grad_norm": 1.511539576752737, - "learning_rate": 3.976824742824135e-06, - "loss": 1.0089, - "step": 1026 - }, - { - "epoch": 0.07718322561250564, - "grad_norm": 1.7740717354541309, - "learning_rate": 3.976750785230835e-06, - "loss": 1.0395, - "step": 1027 - }, - { - "epoch": 0.0772583796783406, - "grad_norm": 1.2255194049421039, - "learning_rate": 3.976676710507483e-06, - "loss": 1.0119, - "step": 1028 - }, - { - "epoch": 0.07733353374417556, - "grad_norm": 2.6461421956486553, - "learning_rate": 3.976602518658466e-06, - "loss": 1.101, - "step": 1029 - }, - { - "epoch": 0.07740868781001052, - "grad_norm": 1.9209305838613524, - "learning_rate": 3.976528209688181e-06, - "loss": 1.017, - "step": 1030 - }, - { - "epoch": 0.07748384187584548, - "grad_norm": 1.469541782663859, - "learning_rate": 3.976453783601031e-06, - "loss": 1.0274, - "step": 1031 - }, - { - "epoch": 0.07755899594168045, - "grad_norm": 1.8271569191161372, - "learning_rate": 3.976379240401426e-06, - "loss": 1.0198, - "step": 1032 - }, - { - "epoch": 0.0776341500075154, - "grad_norm": 1.671145484983091, - "learning_rate": 3.976304580093782e-06, - "loss": 0.9454, - "step": 1033 - }, - { - "epoch": 0.07770930407335037, - "grad_norm": 1.9152748472421695, - "learning_rate": 3.976229802682524e-06, - "loss": 1.081, - "step": 1034 - }, - { - "epoch": 0.07778445813918533, - "grad_norm": 1.7909136527847893, - "learning_rate": 3.9761549081720845e-06, - "loss": 0.9703, - "step": 1035 - }, - { - "epoch": 0.07785961220502029, - "grad_norm": 1.6726617120816645, - "learning_rate": 3.976079896566898e-06, - "loss": 0.9495, - "step": 1036 - }, - { - "epoch": 0.07793476627085526, - "grad_norm": 1.8234181250267716, - "learning_rate": 3.976004767871411e-06, - "loss": 1.0115, - "step": 1037 - }, - { - "epoch": 0.07800992033669021, - "grad_norm": 1.6501560021837234, - "learning_rate": 3.975929522090075e-06, - "loss": 0.9789, - "step": 1038 - }, - { - "epoch": 0.07808507440252517, - "grad_norm": 2.1938141294490543, - "learning_rate": 3.9758541592273485e-06, - "loss": 1.0611, - "step": 1039 - }, - { - "epoch": 0.07816022846836014, - "grad_norm": 1.9001378109795382, - "learning_rate": 3.975778679287697e-06, - "loss": 1.0416, - "step": 1040 - }, - { - "epoch": 0.0782353825341951, - "grad_norm": 0.7184772388540309, - "learning_rate": 3.975703082275592e-06, - "loss": 0.8472, - "step": 1041 - }, - { - "epoch": 0.07831053660003007, - "grad_norm": 2.2757366915949295, - "learning_rate": 3.975627368195515e-06, - "loss": 0.9483, - "step": 1042 - }, - { - "epoch": 0.07838569066586502, - "grad_norm": 2.3467144338485677, - "learning_rate": 3.9755515370519515e-06, - "loss": 1.0148, - "step": 1043 - }, - { - "epoch": 0.07846084473169998, - "grad_norm": 1.5941898262640168, - "learning_rate": 3.975475588849394e-06, - "loss": 1.0791, - "step": 1044 - }, - { - "epoch": 0.07853599879753495, - "grad_norm": 3.4302149477857355, - "learning_rate": 3.975399523592343e-06, - "loss": 1.071, - "step": 1045 - }, - { - "epoch": 0.0786111528633699, - "grad_norm": 1.4973979474481227, - "learning_rate": 3.975323341285306e-06, - "loss": 1.0555, - "step": 1046 - }, - { - "epoch": 0.07868630692920486, - "grad_norm": 1.6836374370102276, - "learning_rate": 3.975247041932797e-06, - "loss": 1.0343, - "step": 1047 - }, - { - "epoch": 0.07876146099503983, - "grad_norm": 1.801729891594299, - "learning_rate": 3.975170625539338e-06, - "loss": 0.9868, - "step": 1048 - }, - { - "epoch": 0.07883661506087479, - "grad_norm": 1.6146123904002876, - "learning_rate": 3.975094092109455e-06, - "loss": 1.0807, - "step": 1049 - }, - { - "epoch": 0.07891176912670976, - "grad_norm": 1.571849317701087, - "learning_rate": 3.975017441647684e-06, - "loss": 0.9906, - "step": 1050 - }, - { - "epoch": 0.07898692319254472, - "grad_norm": 1.5487653183198766, - "learning_rate": 3.974940674158567e-06, - "loss": 0.9733, - "step": 1051 - }, - { - "epoch": 0.07906207725837967, - "grad_norm": 0.5942310091449803, - "learning_rate": 3.9748637896466526e-06, - "loss": 0.8044, - "step": 1052 - }, - { - "epoch": 0.07913723132421464, - "grad_norm": 1.4732583026998352, - "learning_rate": 3.974786788116496e-06, - "loss": 0.9857, - "step": 1053 - }, - { - "epoch": 0.0792123853900496, - "grad_norm": 1.6117232144654978, - "learning_rate": 3.974709669572661e-06, - "loss": 1.0127, - "step": 1054 - }, - { - "epoch": 0.07928753945588456, - "grad_norm": 1.6516228584512984, - "learning_rate": 3.974632434019716e-06, - "loss": 0.9545, - "step": 1055 - }, - { - "epoch": 0.07936269352171953, - "grad_norm": 1.6096102348184254, - "learning_rate": 3.974555081462237e-06, - "loss": 1.0068, - "step": 1056 - }, - { - "epoch": 0.07943784758755448, - "grad_norm": 18.554731907841596, - "learning_rate": 3.97447761190481e-06, - "loss": 0.9838, - "step": 1057 - }, - { - "epoch": 0.07951300165338945, - "grad_norm": 1.8590664006440933, - "learning_rate": 3.974400025352022e-06, - "loss": 1.1348, - "step": 1058 - }, - { - "epoch": 0.07958815571922441, - "grad_norm": 1.597271562818544, - "learning_rate": 3.974322321808473e-06, - "loss": 1.066, - "step": 1059 - }, - { - "epoch": 0.07966330978505937, - "grad_norm": 2.2598857930114153, - "learning_rate": 3.974244501278766e-06, - "loss": 1.0449, - "step": 1060 - }, - { - "epoch": 0.07973846385089434, - "grad_norm": 1.6879185757908335, - "learning_rate": 3.974166563767513e-06, - "loss": 0.9615, - "step": 1061 - }, - { - "epoch": 0.0798136179167293, - "grad_norm": 1.8247779789525336, - "learning_rate": 3.974088509279331e-06, - "loss": 1.0799, - "step": 1062 - }, - { - "epoch": 0.07988877198256426, - "grad_norm": 1.7136107163203704, - "learning_rate": 3.9740103378188455e-06, - "loss": 0.9645, - "step": 1063 - }, - { - "epoch": 0.07996392604839922, - "grad_norm": 2.256633516722728, - "learning_rate": 3.973932049390688e-06, - "loss": 1.2079, - "step": 1064 - }, - { - "epoch": 0.08003908011423418, - "grad_norm": 1.5470015348834936, - "learning_rate": 3.973853643999499e-06, - "loss": 1.0076, - "step": 1065 - }, - { - "epoch": 0.08011423418006915, - "grad_norm": 1.994789249694263, - "learning_rate": 3.973775121649922e-06, - "loss": 1.0755, - "step": 1066 - }, - { - "epoch": 0.0801893882459041, - "grad_norm": 1.463539214036002, - "learning_rate": 3.973696482346611e-06, - "loss": 0.9488, - "step": 1067 - }, - { - "epoch": 0.08026454231173906, - "grad_norm": 1.7461540331237704, - "learning_rate": 3.973617726094227e-06, - "loss": 1.0696, - "step": 1068 - }, - { - "epoch": 0.08033969637757403, - "grad_norm": 1.4487480777072743, - "learning_rate": 3.973538852897435e-06, - "loss": 1.0702, - "step": 1069 - }, - { - "epoch": 0.08041485044340899, - "grad_norm": 2.017714371193783, - "learning_rate": 3.973459862760908e-06, - "loss": 1.1417, - "step": 1070 - }, - { - "epoch": 0.08049000450924396, - "grad_norm": 2.459041385732083, - "learning_rate": 3.973380755689328e-06, - "loss": 1.0172, - "step": 1071 - }, - { - "epoch": 0.08056515857507891, - "grad_norm": 1.6907772453986183, - "learning_rate": 3.97330153168738e-06, - "loss": 1.0778, - "step": 1072 - }, - { - "epoch": 0.08064031264091387, - "grad_norm": 2.1476382346731557, - "learning_rate": 3.973222190759761e-06, - "loss": 1.0879, - "step": 1073 - }, - { - "epoch": 0.08071546670674884, - "grad_norm": 5.795463287292524, - "learning_rate": 3.973142732911172e-06, - "loss": 1.0887, - "step": 1074 - }, - { - "epoch": 0.0807906207725838, - "grad_norm": 1.6519075002659243, - "learning_rate": 3.9730631581463195e-06, - "loss": 1.0392, - "step": 1075 - }, - { - "epoch": 0.08086577483841875, - "grad_norm": 3.1665228426211924, - "learning_rate": 3.972983466469919e-06, - "loss": 1.0789, - "step": 1076 - }, - { - "epoch": 0.08094092890425372, - "grad_norm": 3.2081194389350056, - "learning_rate": 3.972903657886695e-06, - "loss": 1.041, - "step": 1077 - }, - { - "epoch": 0.08101608297008868, - "grad_norm": 1.9545148253309195, - "learning_rate": 3.972823732401373e-06, - "loss": 1.0824, - "step": 1078 - }, - { - "epoch": 0.08109123703592365, - "grad_norm": 1.6766474692507722, - "learning_rate": 3.972743690018691e-06, - "loss": 1.0356, - "step": 1079 - }, - { - "epoch": 0.0811663911017586, - "grad_norm": 3.1915740362392326, - "learning_rate": 3.9726635307433906e-06, - "loss": 1.1499, - "step": 1080 - }, - { - "epoch": 0.08124154516759356, - "grad_norm": 1.5992451347407068, - "learning_rate": 3.972583254580223e-06, - "loss": 0.986, - "step": 1081 - }, - { - "epoch": 0.08131669923342853, - "grad_norm": 1.3888430805069498, - "learning_rate": 3.972502861533943e-06, - "loss": 1.0568, - "step": 1082 - }, - { - "epoch": 0.08139185329926349, - "grad_norm": 1.9595269224902192, - "learning_rate": 3.9724223516093154e-06, - "loss": 1.0742, - "step": 1083 - }, - { - "epoch": 0.08146700736509845, - "grad_norm": 0.801687035358021, - "learning_rate": 3.972341724811111e-06, - "loss": 0.8536, - "step": 1084 - }, - { - "epoch": 0.08154216143093342, - "grad_norm": 1.5300249142581739, - "learning_rate": 3.972260981144107e-06, - "loss": 1.0866, - "step": 1085 - }, - { - "epoch": 0.08161731549676837, - "grad_norm": 1.9464383831251835, - "learning_rate": 3.972180120613087e-06, - "loss": 1.1012, - "step": 1086 - }, - { - "epoch": 0.08169246956260334, - "grad_norm": 2.4763108375312473, - "learning_rate": 3.972099143222844e-06, - "loss": 1.0831, - "step": 1087 - }, - { - "epoch": 0.0817676236284383, - "grad_norm": 1.468662502847992, - "learning_rate": 3.972018048978174e-06, - "loss": 1.0747, - "step": 1088 - }, - { - "epoch": 0.08184277769427326, - "grad_norm": 1.8235890774131376, - "learning_rate": 3.971936837883884e-06, - "loss": 1.0737, - "step": 1089 - }, - { - "epoch": 0.08191793176010823, - "grad_norm": 1.773836536299322, - "learning_rate": 3.971855509944784e-06, - "loss": 1.0517, - "step": 1090 - }, - { - "epoch": 0.08199308582594318, - "grad_norm": 2.669027763402386, - "learning_rate": 3.971774065165696e-06, - "loss": 0.9564, - "step": 1091 - }, - { - "epoch": 0.08206823989177814, - "grad_norm": 1.3225926230144835, - "learning_rate": 3.971692503551443e-06, - "loss": 1.112, - "step": 1092 - }, - { - "epoch": 0.08214339395761311, - "grad_norm": 2.1108079352383844, - "learning_rate": 3.971610825106859e-06, - "loss": 0.9964, - "step": 1093 - }, - { - "epoch": 0.08221854802344807, - "grad_norm": 1.7058361068476928, - "learning_rate": 3.971529029836785e-06, - "loss": 0.8877, - "step": 1094 - }, - { - "epoch": 0.08229370208928304, - "grad_norm": 1.7191020951723892, - "learning_rate": 3.971447117746065e-06, - "loss": 0.8752, - "step": 1095 - }, - { - "epoch": 0.08236885615511799, - "grad_norm": 1.8697642020614322, - "learning_rate": 3.9713650888395555e-06, - "loss": 0.9825, - "step": 1096 - }, - { - "epoch": 0.08244401022095295, - "grad_norm": 1.4184824115629535, - "learning_rate": 3.971282943122115e-06, - "loss": 1.0117, - "step": 1097 - }, - { - "epoch": 0.08251916428678792, - "grad_norm": 1.8767252931646732, - "learning_rate": 3.971200680598611e-06, - "loss": 0.9685, - "step": 1098 - }, - { - "epoch": 0.08259431835262288, - "grad_norm": 1.4873482089772807, - "learning_rate": 3.971118301273919e-06, - "loss": 0.9709, - "step": 1099 - }, - { - "epoch": 0.08266947241845783, - "grad_norm": 1.5396891446159968, - "learning_rate": 3.97103580515292e-06, - "loss": 0.9991, - "step": 1100 - }, - { - "epoch": 0.0827446264842928, - "grad_norm": 1.6027546237277293, - "learning_rate": 3.970953192240502e-06, - "loss": 1.0855, - "step": 1101 - }, - { - "epoch": 0.08281978055012776, - "grad_norm": 1.5940209700666064, - "learning_rate": 3.970870462541559e-06, - "loss": 1.0382, - "step": 1102 - }, - { - "epoch": 0.08289493461596273, - "grad_norm": 1.8563148898204387, - "learning_rate": 3.970787616060995e-06, - "loss": 1.0176, - "step": 1103 - }, - { - "epoch": 0.08297008868179769, - "grad_norm": 2.1325509047162403, - "learning_rate": 3.970704652803718e-06, - "loss": 1.0039, - "step": 1104 - }, - { - "epoch": 0.08304524274763264, - "grad_norm": 1.392996234376614, - "learning_rate": 3.970621572774644e-06, - "loss": 1.024, - "step": 1105 - }, - { - "epoch": 0.08312039681346761, - "grad_norm": 1.8398768590065977, - "learning_rate": 3.970538375978695e-06, - "loss": 0.9457, - "step": 1106 - }, - { - "epoch": 0.08319555087930257, - "grad_norm": 1.5398506431483365, - "learning_rate": 3.970455062420803e-06, - "loss": 1.0042, - "step": 1107 - }, - { - "epoch": 0.08327070494513754, - "grad_norm": 1.7850794535672903, - "learning_rate": 3.9703716321059026e-06, - "loss": 1.0951, - "step": 1108 - }, - { - "epoch": 0.0833458590109725, - "grad_norm": 0.8478829217017961, - "learning_rate": 3.970288085038938e-06, - "loss": 0.8749, - "step": 1109 - }, - { - "epoch": 0.08342101307680745, - "grad_norm": 1.4187845032651443, - "learning_rate": 3.970204421224859e-06, - "loss": 1.0104, - "step": 1110 - }, - { - "epoch": 0.08349616714264242, - "grad_norm": 1.3834789846717739, - "learning_rate": 3.970120640668623e-06, - "loss": 1.1269, - "step": 1111 - }, - { - "epoch": 0.08357132120847738, - "grad_norm": 1.6222155997969971, - "learning_rate": 3.970036743375197e-06, - "loss": 1.0278, - "step": 1112 - }, - { - "epoch": 0.08364647527431233, - "grad_norm": 1.594932790434383, - "learning_rate": 3.9699527293495485e-06, - "loss": 0.9464, - "step": 1113 - }, - { - "epoch": 0.0837216293401473, - "grad_norm": 1.7562355936891758, - "learning_rate": 3.969868598596658e-06, - "loss": 1.0668, - "step": 1114 - }, - { - "epoch": 0.08379678340598226, - "grad_norm": 1.7130333591960865, - "learning_rate": 3.9697843511215104e-06, - "loss": 1.0299, - "step": 1115 - }, - { - "epoch": 0.08387193747181723, - "grad_norm": 2.3740904220740706, - "learning_rate": 3.969699986929096e-06, - "loss": 0.9982, - "step": 1116 - }, - { - "epoch": 0.08394709153765219, - "grad_norm": 3.78552269515063, - "learning_rate": 3.969615506024416e-06, - "loss": 0.9832, - "step": 1117 - }, - { - "epoch": 0.08402224560348714, - "grad_norm": 1.8103152953089596, - "learning_rate": 3.969530908412474e-06, - "loss": 1.0456, - "step": 1118 - }, - { - "epoch": 0.08409739966932211, - "grad_norm": 3.772442862328883, - "learning_rate": 3.969446194098286e-06, - "loss": 1.0963, - "step": 1119 - }, - { - "epoch": 0.08417255373515707, - "grad_norm": 1.7058553945024546, - "learning_rate": 3.969361363086867e-06, - "loss": 1.083, - "step": 1120 - }, - { - "epoch": 0.08424770780099203, - "grad_norm": 3.0439059928383636, - "learning_rate": 3.969276415383248e-06, - "loss": 1.0498, - "step": 1121 - }, - { - "epoch": 0.084322861866827, - "grad_norm": 1.7195021630773002, - "learning_rate": 3.9691913509924586e-06, - "loss": 1.0895, - "step": 1122 - }, - { - "epoch": 0.08439801593266195, - "grad_norm": 1.8948021134697932, - "learning_rate": 3.969106169919542e-06, - "loss": 1.1148, - "step": 1123 - }, - { - "epoch": 0.08447316999849692, - "grad_norm": 1.81691578927197, - "learning_rate": 3.969020872169545e-06, - "loss": 1.1269, - "step": 1124 - }, - { - "epoch": 0.08454832406433188, - "grad_norm": 1.93170844750672, - "learning_rate": 3.9689354577475206e-06, - "loss": 1.0233, - "step": 1125 - }, - { - "epoch": 0.08462347813016684, - "grad_norm": 11.723521814929043, - "learning_rate": 3.968849926658532e-06, - "loss": 0.9372, - "step": 1126 - }, - { - "epoch": 0.08469863219600181, - "grad_norm": 1.7036484780014567, - "learning_rate": 3.968764278907645e-06, - "loss": 0.9738, - "step": 1127 - }, - { - "epoch": 0.08477378626183676, - "grad_norm": 1.7733294080150632, - "learning_rate": 3.968678514499936e-06, - "loss": 1.0948, - "step": 1128 - }, - { - "epoch": 0.08484894032767172, - "grad_norm": 1.5782080146937008, - "learning_rate": 3.968592633440486e-06, - "loss": 0.9772, - "step": 1129 - }, - { - "epoch": 0.08492409439350669, - "grad_norm": 2.3717444342460157, - "learning_rate": 3.968506635734385e-06, - "loss": 1.0134, - "step": 1130 - }, - { - "epoch": 0.08499924845934165, - "grad_norm": 1.9329886057873595, - "learning_rate": 3.9684205213867275e-06, - "loss": 1.0134, - "step": 1131 - }, - { - "epoch": 0.08507440252517662, - "grad_norm": 1.6229447058368118, - "learning_rate": 3.968334290402616e-06, - "loss": 1.0198, - "step": 1132 - }, - { - "epoch": 0.08514955659101157, - "grad_norm": 1.8124527760634772, - "learning_rate": 3.968247942787161e-06, - "loss": 1.0748, - "step": 1133 - }, - { - "epoch": 0.08522471065684653, - "grad_norm": 1.8372826376307894, - "learning_rate": 3.968161478545479e-06, - "loss": 1.0596, - "step": 1134 - }, - { - "epoch": 0.0852998647226815, - "grad_norm": 2.5647254212407695, - "learning_rate": 3.968074897682692e-06, - "loss": 1.0244, - "step": 1135 - }, - { - "epoch": 0.08537501878851646, - "grad_norm": 1.484324886078395, - "learning_rate": 3.967988200203931e-06, - "loss": 1.0075, - "step": 1136 - }, - { - "epoch": 0.08545017285435141, - "grad_norm": 1.7277879465600512, - "learning_rate": 3.967901386114334e-06, - "loss": 0.952, - "step": 1137 - }, - { - "epoch": 0.08552532692018638, - "grad_norm": 3.309990706185838, - "learning_rate": 3.967814455419044e-06, - "loss": 1.0204, - "step": 1138 - }, - { - "epoch": 0.08560048098602134, - "grad_norm": 3.5101401713495677, - "learning_rate": 3.9677274081232116e-06, - "loss": 0.9648, - "step": 1139 - }, - { - "epoch": 0.08567563505185631, - "grad_norm": 1.727212705778535, - "learning_rate": 3.967640244231996e-06, - "loss": 1.1616, - "step": 1140 - }, - { - "epoch": 0.08575078911769127, - "grad_norm": 1.9921933641534404, - "learning_rate": 3.9675529637505615e-06, - "loss": 1.0702, - "step": 1141 - }, - { - "epoch": 0.08582594318352622, - "grad_norm": 2.133954423655475, - "learning_rate": 3.967465566684079e-06, - "loss": 1.0297, - "step": 1142 - }, - { - "epoch": 0.0859010972493612, - "grad_norm": 1.6787147721651228, - "learning_rate": 3.967378053037728e-06, - "loss": 1.0733, - "step": 1143 - }, - { - "epoch": 0.08597625131519615, - "grad_norm": 1.8849916628949184, - "learning_rate": 3.967290422816693e-06, - "loss": 1.0737, - "step": 1144 - }, - { - "epoch": 0.0860514053810311, - "grad_norm": 1.6704265701838985, - "learning_rate": 3.967202676026169e-06, - "loss": 1.0014, - "step": 1145 - }, - { - "epoch": 0.08612655944686608, - "grad_norm": 2.260034859987645, - "learning_rate": 3.967114812671353e-06, - "loss": 1.0712, - "step": 1146 - }, - { - "epoch": 0.08620171351270103, - "grad_norm": 1.8001042415316533, - "learning_rate": 3.967026832757451e-06, - "loss": 1.1263, - "step": 1147 - }, - { - "epoch": 0.086276867578536, - "grad_norm": 1.4051320188408292, - "learning_rate": 3.966938736289677e-06, - "loss": 1.0215, - "step": 1148 - }, - { - "epoch": 0.08635202164437096, - "grad_norm": 1.650982234940125, - "learning_rate": 3.966850523273251e-06, - "loss": 1.1252, - "step": 1149 - }, - { - "epoch": 0.08642717571020592, - "grad_norm": 1.7550313692468307, - "learning_rate": 3.9667621937134e-06, - "loss": 1.0713, - "step": 1150 - }, - { - "epoch": 0.08650232977604089, - "grad_norm": 1.932639701600322, - "learning_rate": 3.966673747615358e-06, - "loss": 1.0375, - "step": 1151 - }, - { - "epoch": 0.08657748384187584, - "grad_norm": 1.7780027404480832, - "learning_rate": 3.966585184984366e-06, - "loss": 1.0038, - "step": 1152 - }, - { - "epoch": 0.08665263790771081, - "grad_norm": 1.718657681846858, - "learning_rate": 3.96649650582567e-06, - "loss": 0.9863, - "step": 1153 - }, - { - "epoch": 0.08672779197354577, - "grad_norm": 2.4765507550367203, - "learning_rate": 3.966407710144527e-06, - "loss": 1.0176, - "step": 1154 - }, - { - "epoch": 0.08680294603938073, - "grad_norm": 0.8600349666450975, - "learning_rate": 3.966318797946196e-06, - "loss": 0.8402, - "step": 1155 - }, - { - "epoch": 0.0868781001052157, - "grad_norm": 1.5724932203768331, - "learning_rate": 3.966229769235948e-06, - "loss": 1.1191, - "step": 1156 - }, - { - "epoch": 0.08695325417105065, - "grad_norm": 2.5331112647184213, - "learning_rate": 3.966140624019056e-06, - "loss": 0.9953, - "step": 1157 - }, - { - "epoch": 0.08702840823688561, - "grad_norm": 1.8993541874801263, - "learning_rate": 3.966051362300804e-06, - "loss": 0.9843, - "step": 1158 - }, - { - "epoch": 0.08710356230272058, - "grad_norm": 1.9764927787697097, - "learning_rate": 3.9659619840864795e-06, - "loss": 0.9667, - "step": 1159 - }, - { - "epoch": 0.08717871636855554, - "grad_norm": 2.122228947418546, - "learning_rate": 3.96587248938138e-06, - "loss": 1.1028, - "step": 1160 - }, - { - "epoch": 0.0872538704343905, - "grad_norm": 0.6381892442635514, - "learning_rate": 3.965782878190807e-06, - "loss": 0.8145, - "step": 1161 - }, - { - "epoch": 0.08732902450022546, - "grad_norm": 1.7981694741793621, - "learning_rate": 3.965693150520071e-06, - "loss": 0.9536, - "step": 1162 - }, - { - "epoch": 0.08740417856606042, - "grad_norm": 3.1317493033016657, - "learning_rate": 3.965603306374489e-06, - "loss": 0.9736, - "step": 1163 - }, - { - "epoch": 0.08747933263189539, - "grad_norm": 1.2730060150621678, - "learning_rate": 3.9655133457593845e-06, - "loss": 1.0533, - "step": 1164 - }, - { - "epoch": 0.08755448669773035, - "grad_norm": 0.7467641963726775, - "learning_rate": 3.965423268680087e-06, - "loss": 0.8654, - "step": 1165 - }, - { - "epoch": 0.0876296407635653, - "grad_norm": 2.086458633989099, - "learning_rate": 3.965333075141936e-06, - "loss": 1.1056, - "step": 1166 - }, - { - "epoch": 0.08770479482940027, - "grad_norm": 1.851710575154277, - "learning_rate": 3.965242765150274e-06, - "loss": 1.0349, - "step": 1167 - }, - { - "epoch": 0.08777994889523523, - "grad_norm": 2.4076947915567604, - "learning_rate": 3.9651523387104526e-06, - "loss": 1.0006, - "step": 1168 - }, - { - "epoch": 0.0878551029610702, - "grad_norm": 1.8186263104619629, - "learning_rate": 3.96506179582783e-06, - "loss": 1.0145, - "step": 1169 - }, - { - "epoch": 0.08793025702690516, - "grad_norm": 2.0429899896571584, - "learning_rate": 3.964971136507771e-06, - "loss": 1.1343, - "step": 1170 - }, - { - "epoch": 0.08800541109274011, - "grad_norm": 1.9998661234871042, - "learning_rate": 3.964880360755648e-06, - "loss": 1.0655, - "step": 1171 - }, - { - "epoch": 0.08808056515857508, - "grad_norm": 2.195629599262636, - "learning_rate": 3.96478946857684e-06, - "loss": 1.0689, - "step": 1172 - }, - { - "epoch": 0.08815571922441004, - "grad_norm": 3.5358695812166965, - "learning_rate": 3.964698459976732e-06, - "loss": 1.0151, - "step": 1173 - }, - { - "epoch": 0.088230873290245, - "grad_norm": 2.918286246270804, - "learning_rate": 3.9646073349607165e-06, - "loss": 1.1846, - "step": 1174 - }, - { - "epoch": 0.08830602735607997, - "grad_norm": 3.7170832128274967, - "learning_rate": 3.964516093534194e-06, - "loss": 1.0566, - "step": 1175 - }, - { - "epoch": 0.08838118142191492, - "grad_norm": 1.7063247300994469, - "learning_rate": 3.964424735702571e-06, - "loss": 0.9289, - "step": 1176 - }, - { - "epoch": 0.0884563354877499, - "grad_norm": 1.4780485556838059, - "learning_rate": 3.964333261471258e-06, - "loss": 0.9951, - "step": 1177 - }, - { - "epoch": 0.08853148955358485, - "grad_norm": 1.816378226115306, - "learning_rate": 3.964241670845679e-06, - "loss": 0.9953, - "step": 1178 - }, - { - "epoch": 0.0886066436194198, - "grad_norm": 3.0679054045834215, - "learning_rate": 3.9641499638312595e-06, - "loss": 1.0601, - "step": 1179 - }, - { - "epoch": 0.08868179768525478, - "grad_norm": 1.5390983616092813, - "learning_rate": 3.964058140433434e-06, - "loss": 1.0144, - "step": 1180 - }, - { - "epoch": 0.08875695175108973, - "grad_norm": 2.0916274970086515, - "learning_rate": 3.9639662006576405e-06, - "loss": 1.0569, - "step": 1181 - }, - { - "epoch": 0.08883210581692469, - "grad_norm": 1.611769884993019, - "learning_rate": 3.963874144509331e-06, - "loss": 1.0674, - "step": 1182 - }, - { - "epoch": 0.08890725988275966, - "grad_norm": 0.84681477068586, - "learning_rate": 3.963781971993957e-06, - "loss": 0.8649, - "step": 1183 - }, - { - "epoch": 0.08898241394859462, - "grad_norm": 2.14962502691694, - "learning_rate": 3.963689683116982e-06, - "loss": 0.9595, - "step": 1184 - }, - { - "epoch": 0.08905756801442959, - "grad_norm": 1.6328958736470565, - "learning_rate": 3.963597277883874e-06, - "loss": 1.0544, - "step": 1185 - }, - { - "epoch": 0.08913272208026454, - "grad_norm": 1.7222765722551971, - "learning_rate": 3.963504756300107e-06, - "loss": 1.054, - "step": 1186 - }, - { - "epoch": 0.0892078761460995, - "grad_norm": 0.7034972582955472, - "learning_rate": 3.963412118371166e-06, - "loss": 0.8831, - "step": 1187 - }, - { - "epoch": 0.08928303021193447, - "grad_norm": 2.4710150842148324, - "learning_rate": 3.963319364102538e-06, - "loss": 1.0128, - "step": 1188 - }, - { - "epoch": 0.08935818427776943, - "grad_norm": 1.777082486914567, - "learning_rate": 3.96322649349972e-06, - "loss": 1.0458, - "step": 1189 - }, - { - "epoch": 0.08943333834360438, - "grad_norm": 1.9627260357379288, - "learning_rate": 3.963133506568214e-06, - "loss": 1.1253, - "step": 1190 - }, - { - "epoch": 0.08950849240943935, - "grad_norm": 2.9396547571521996, - "learning_rate": 3.96304040331353e-06, - "loss": 1.0092, - "step": 1191 - }, - { - "epoch": 0.08958364647527431, - "grad_norm": 3.3106615254852203, - "learning_rate": 3.9629471837411855e-06, - "loss": 1.1018, - "step": 1192 - }, - { - "epoch": 0.08965880054110928, - "grad_norm": 1.8388948038370376, - "learning_rate": 3.962853847856704e-06, - "loss": 1.1014, - "step": 1193 - }, - { - "epoch": 0.08973395460694424, - "grad_norm": 2.4889886590094648, - "learning_rate": 3.962760395665616e-06, - "loss": 1.0086, - "step": 1194 - }, - { - "epoch": 0.08980910867277919, - "grad_norm": 1.6807097407178746, - "learning_rate": 3.962666827173458e-06, - "loss": 1.0848, - "step": 1195 - }, - { - "epoch": 0.08988426273861416, - "grad_norm": 0.6977853627785631, - "learning_rate": 3.9625731423857745e-06, - "loss": 0.8452, - "step": 1196 - }, - { - "epoch": 0.08995941680444912, - "grad_norm": 6.931583940667073, - "learning_rate": 3.9624793413081185e-06, - "loss": 1.0568, - "step": 1197 - }, - { - "epoch": 0.09003457087028409, - "grad_norm": 1.994340040378088, - "learning_rate": 3.962385423946046e-06, - "loss": 1.0909, - "step": 1198 - }, - { - "epoch": 0.09010972493611905, - "grad_norm": 1.810891457104838, - "learning_rate": 3.962291390305123e-06, - "loss": 1.016, - "step": 1199 - }, - { - "epoch": 0.090184879001954, - "grad_norm": 1.8876679680467436, - "learning_rate": 3.96219724039092e-06, - "loss": 1.0161, - "step": 1200 - }, - { - "epoch": 0.09026003306778897, - "grad_norm": 1.7349352919634826, - "learning_rate": 3.962102974209018e-06, - "loss": 1.0888, - "step": 1201 - }, - { - "epoch": 0.09033518713362393, - "grad_norm": 1.723570704219188, - "learning_rate": 3.962008591765e-06, - "loss": 1.0624, - "step": 1202 - }, - { - "epoch": 0.09041034119945889, - "grad_norm": 2.030428793771988, - "learning_rate": 3.961914093064461e-06, - "loss": 1.0052, - "step": 1203 - }, - { - "epoch": 0.09048549526529386, - "grad_norm": 2.130893612594444, - "learning_rate": 3.961819478112999e-06, - "loss": 1.1097, - "step": 1204 - }, - { - "epoch": 0.09056064933112881, - "grad_norm": 2.6020400150339054, - "learning_rate": 3.961724746916221e-06, - "loss": 1.0194, - "step": 1205 - }, - { - "epoch": 0.09063580339696378, - "grad_norm": 2.4184239564509746, - "learning_rate": 3.961629899479739e-06, - "loss": 1.0872, - "step": 1206 - }, - { - "epoch": 0.09071095746279874, - "grad_norm": 1.4855538756501248, - "learning_rate": 3.961534935809174e-06, - "loss": 1.0634, - "step": 1207 - }, - { - "epoch": 0.0907861115286337, - "grad_norm": 1.486168590911604, - "learning_rate": 3.961439855910154e-06, - "loss": 0.9948, - "step": 1208 - }, - { - "epoch": 0.09086126559446867, - "grad_norm": 2.1111955486388005, - "learning_rate": 3.961344659788311e-06, - "loss": 1.0677, - "step": 1209 - }, - { - "epoch": 0.09093641966030362, - "grad_norm": 3.0509083192667066, - "learning_rate": 3.961249347449286e-06, - "loss": 1.0928, - "step": 1210 - }, - { - "epoch": 0.09101157372613858, - "grad_norm": 1.7923379424191201, - "learning_rate": 3.961153918898727e-06, - "loss": 1.0163, - "step": 1211 - }, - { - "epoch": 0.09108672779197355, - "grad_norm": 1.7622408547858857, - "learning_rate": 3.961058374142289e-06, - "loss": 1.1491, - "step": 1212 - }, - { - "epoch": 0.0911618818578085, - "grad_norm": 1.7633246907032607, - "learning_rate": 3.960962713185633e-06, - "loss": 1.006, - "step": 1213 - }, - { - "epoch": 0.09123703592364348, - "grad_norm": 1.7014246861158486, - "learning_rate": 3.960866936034426e-06, - "loss": 1.0579, - "step": 1214 - }, - { - "epoch": 0.09131218998947843, - "grad_norm": 1.445194218924854, - "learning_rate": 3.960771042694346e-06, - "loss": 1.0553, - "step": 1215 - }, - { - "epoch": 0.09138734405531339, - "grad_norm": 19.085429025154735, - "learning_rate": 3.960675033171072e-06, - "loss": 0.9841, - "step": 1216 - }, - { - "epoch": 0.09146249812114836, - "grad_norm": 1.8972605758012324, - "learning_rate": 3.960578907470295e-06, - "loss": 1.0764, - "step": 1217 - }, - { - "epoch": 0.09153765218698331, - "grad_norm": 2.241371777316925, - "learning_rate": 3.960482665597711e-06, - "loss": 0.9438, - "step": 1218 - }, - { - "epoch": 0.09161280625281827, - "grad_norm": 1.8357093182253248, - "learning_rate": 3.960386307559021e-06, - "loss": 1.0487, - "step": 1219 - }, - { - "epoch": 0.09168796031865324, - "grad_norm": 2.676911899010402, - "learning_rate": 3.960289833359936e-06, - "loss": 1.0105, - "step": 1220 - }, - { - "epoch": 0.0917631143844882, - "grad_norm": 1.3223694347838186, - "learning_rate": 3.9601932430061714e-06, - "loss": 1.0259, - "step": 1221 - }, - { - "epoch": 0.09183826845032317, - "grad_norm": 1.8596027138660474, - "learning_rate": 3.9600965365034515e-06, - "loss": 1.105, - "step": 1222 - }, - { - "epoch": 0.09191342251615812, - "grad_norm": 0.6158692490200987, - "learning_rate": 3.959999713857505e-06, - "loss": 0.7762, - "step": 1223 - }, - { - "epoch": 0.09198857658199308, - "grad_norm": 1.615089788273451, - "learning_rate": 3.959902775074072e-06, - "loss": 1.0987, - "step": 1224 - }, - { - "epoch": 0.09206373064782805, - "grad_norm": 2.5936450247605842, - "learning_rate": 3.9598057201588926e-06, - "loss": 1.0836, - "step": 1225 - }, - { - "epoch": 0.09213888471366301, - "grad_norm": 1.619405064541149, - "learning_rate": 3.959708549117721e-06, - "loss": 1.0194, - "step": 1226 - }, - { - "epoch": 0.09221403877949796, - "grad_norm": 1.8251740202827744, - "learning_rate": 3.959611261956313e-06, - "loss": 1.0482, - "step": 1227 - }, - { - "epoch": 0.09228919284533293, - "grad_norm": 2.083706695161577, - "learning_rate": 3.959513858680434e-06, - "loss": 0.9899, - "step": 1228 - }, - { - "epoch": 0.09236434691116789, - "grad_norm": 1.7691975462068312, - "learning_rate": 3.9594163392958566e-06, - "loss": 1.1058, - "step": 1229 - }, - { - "epoch": 0.09243950097700286, - "grad_norm": 2.8111887100207964, - "learning_rate": 3.959318703808356e-06, - "loss": 0.936, - "step": 1230 - }, - { - "epoch": 0.09251465504283782, - "grad_norm": 1.5845942328646028, - "learning_rate": 3.95922095222372e-06, - "loss": 1.0741, - "step": 1231 - }, - { - "epoch": 0.09258980910867277, - "grad_norm": 1.6626355854474961, - "learning_rate": 3.959123084547741e-06, - "loss": 1.099, - "step": 1232 - }, - { - "epoch": 0.09266496317450774, - "grad_norm": 2.04435504974735, - "learning_rate": 3.959025100786217e-06, - "loss": 1.0863, - "step": 1233 - }, - { - "epoch": 0.0927401172403427, - "grad_norm": 1.9153322329059383, - "learning_rate": 3.958927000944954e-06, - "loss": 1.073, - "step": 1234 - }, - { - "epoch": 0.09281527130617766, - "grad_norm": 1.9070203506552075, - "learning_rate": 3.958828785029765e-06, - "loss": 1.0524, - "step": 1235 - }, - { - "epoch": 0.09289042537201263, - "grad_norm": 3.4514705785596793, - "learning_rate": 3.958730453046469e-06, - "loss": 1.1097, - "step": 1236 - }, - { - "epoch": 0.09296557943784758, - "grad_norm": 1.7354583106324148, - "learning_rate": 3.958632005000895e-06, - "loss": 1.0265, - "step": 1237 - }, - { - "epoch": 0.09304073350368255, - "grad_norm": 1.8455010197506683, - "learning_rate": 3.958533440898873e-06, - "loss": 1.0211, - "step": 1238 - }, - { - "epoch": 0.09311588756951751, - "grad_norm": 1.5350986304887282, - "learning_rate": 3.958434760746245e-06, - "loss": 1.0264, - "step": 1239 - }, - { - "epoch": 0.09319104163535247, - "grad_norm": 0.8316415223223861, - "learning_rate": 3.958335964548859e-06, - "loss": 0.8816, - "step": 1240 - }, - { - "epoch": 0.09326619570118744, - "grad_norm": 1.5732241534212341, - "learning_rate": 3.958237052312568e-06, - "loss": 1.0576, - "step": 1241 - }, - { - "epoch": 0.0933413497670224, - "grad_norm": 2.1251180279434903, - "learning_rate": 3.958138024043232e-06, - "loss": 1.1219, - "step": 1242 - }, - { - "epoch": 0.09341650383285736, - "grad_norm": 0.6865830158194788, - "learning_rate": 3.958038879746721e-06, - "loss": 0.8239, - "step": 1243 - }, - { - "epoch": 0.09349165789869232, - "grad_norm": 2.0923152961474116, - "learning_rate": 3.9579396194289075e-06, - "loss": 0.9196, - "step": 1244 - }, - { - "epoch": 0.09356681196452728, - "grad_norm": 2.4974729163426357, - "learning_rate": 3.957840243095675e-06, - "loss": 0.9892, - "step": 1245 - }, - { - "epoch": 0.09364196603036225, - "grad_norm": 2.1808678497198986, - "learning_rate": 3.95774075075291e-06, - "loss": 0.966, - "step": 1246 - }, - { - "epoch": 0.0937171200961972, - "grad_norm": 1.6577417385520794, - "learning_rate": 3.957641142406509e-06, - "loss": 1.037, - "step": 1247 - }, - { - "epoch": 0.09379227416203216, - "grad_norm": 1.8318999351176186, - "learning_rate": 3.9575414180623746e-06, - "loss": 0.9609, - "step": 1248 - }, - { - "epoch": 0.09386742822786713, - "grad_norm": 2.4806717436623265, - "learning_rate": 3.957441577726415e-06, - "loss": 1.0338, - "step": 1249 - }, - { - "epoch": 0.09394258229370209, - "grad_norm": 1.5726214083333485, - "learning_rate": 3.957341621404547e-06, - "loss": 1.0412, - "step": 1250 - }, - { - "epoch": 0.09401773635953706, - "grad_norm": 2.0963761670081666, - "learning_rate": 3.957241549102692e-06, - "loss": 1.0739, - "step": 1251 - }, - { - "epoch": 0.09409289042537201, - "grad_norm": 1.942060456298405, - "learning_rate": 3.957141360826781e-06, - "loss": 1.0439, - "step": 1252 - }, - { - "epoch": 0.09416804449120697, - "grad_norm": 1.526781764140871, - "learning_rate": 3.95704105658275e-06, - "loss": 0.9302, - "step": 1253 - }, - { - "epoch": 0.09424319855704194, - "grad_norm": 0.9559653749919195, - "learning_rate": 3.9569406363765415e-06, - "loss": 0.878, - "step": 1254 - }, - { - "epoch": 0.0943183526228769, - "grad_norm": 2.1295941002810617, - "learning_rate": 3.956840100214107e-06, - "loss": 0.9799, - "step": 1255 - }, - { - "epoch": 0.09439350668871185, - "grad_norm": 1.7462913179527013, - "learning_rate": 3.956739448101404e-06, - "loss": 1.043, - "step": 1256 - }, - { - "epoch": 0.09446866075454682, - "grad_norm": 1.8293978832444069, - "learning_rate": 3.956638680044396e-06, - "loss": 1.08, - "step": 1257 - }, - { - "epoch": 0.09454381482038178, - "grad_norm": 2.3862567960263688, - "learning_rate": 3.956537796049052e-06, - "loss": 0.9955, - "step": 1258 - }, - { - "epoch": 0.09461896888621675, - "grad_norm": 1.5775455884195138, - "learning_rate": 3.9564367961213536e-06, - "loss": 0.9856, - "step": 1259 - }, - { - "epoch": 0.0946941229520517, - "grad_norm": 1.5201385774936922, - "learning_rate": 3.956335680267282e-06, - "loss": 1.1021, - "step": 1260 - }, - { - "epoch": 0.09476927701788666, - "grad_norm": 1.8338989627924438, - "learning_rate": 3.956234448492831e-06, - "loss": 1.0013, - "step": 1261 - }, - { - "epoch": 0.09484443108372163, - "grad_norm": 1.6576901778126258, - "learning_rate": 3.956133100803996e-06, - "loss": 0.9951, - "step": 1262 - }, - { - "epoch": 0.09491958514955659, - "grad_norm": 3.481310262107106, - "learning_rate": 3.956031637206786e-06, - "loss": 1.0511, - "step": 1263 - }, - { - "epoch": 0.09499473921539155, - "grad_norm": 1.872349391426731, - "learning_rate": 3.955930057707211e-06, - "loss": 0.9468, - "step": 1264 - }, - { - "epoch": 0.09506989328122652, - "grad_norm": 1.4693151157885986, - "learning_rate": 3.95582836231129e-06, - "loss": 1.0316, - "step": 1265 - }, - { - "epoch": 0.09514504734706147, - "grad_norm": 1.9509788012895104, - "learning_rate": 3.9557265510250505e-06, - "loss": 1.0054, - "step": 1266 - }, - { - "epoch": 0.09522020141289644, - "grad_norm": 1.8863868184456607, - "learning_rate": 3.955624623854522e-06, - "loss": 0.975, - "step": 1267 - }, - { - "epoch": 0.0952953554787314, - "grad_norm": 1.6386947175571944, - "learning_rate": 3.9555225808057475e-06, - "loss": 0.9161, - "step": 1268 - }, - { - "epoch": 0.09537050954456636, - "grad_norm": 2.10410945801974, - "learning_rate": 3.9554204218847705e-06, - "loss": 1.0161, - "step": 1269 - }, - { - "epoch": 0.09544566361040133, - "grad_norm": 1.8767446666157477, - "learning_rate": 3.955318147097647e-06, - "loss": 1.0491, - "step": 1270 - }, - { - "epoch": 0.09552081767623628, - "grad_norm": 1.7293521904139075, - "learning_rate": 3.955215756450435e-06, - "loss": 1.0146, - "step": 1271 - }, - { - "epoch": 0.09559597174207124, - "grad_norm": 2.7012064746066766, - "learning_rate": 3.955113249949203e-06, - "loss": 0.9163, - "step": 1272 - }, - { - "epoch": 0.09567112580790621, - "grad_norm": 3.3250701208536566, - "learning_rate": 3.955010627600024e-06, - "loss": 1.0303, - "step": 1273 - }, - { - "epoch": 0.09574627987374117, - "grad_norm": 1.476455361652286, - "learning_rate": 3.954907889408979e-06, - "loss": 1.0998, - "step": 1274 - }, - { - "epoch": 0.09582143393957614, - "grad_norm": 1.8621614238041888, - "learning_rate": 3.954805035382155e-06, - "loss": 1.1093, - "step": 1275 - }, - { - "epoch": 0.0958965880054111, - "grad_norm": 1.7833294693141444, - "learning_rate": 3.954702065525649e-06, - "loss": 0.9856, - "step": 1276 - }, - { - "epoch": 0.09597174207124605, - "grad_norm": 1.694400805899813, - "learning_rate": 3.954598979845559e-06, - "loss": 1.0678, - "step": 1277 - }, - { - "epoch": 0.09604689613708102, - "grad_norm": 2.187200689274666, - "learning_rate": 3.954495778347996e-06, - "loss": 0.9062, - "step": 1278 - }, - { - "epoch": 0.09612205020291598, - "grad_norm": 1.789350328238707, - "learning_rate": 3.954392461039073e-06, - "loss": 1.0907, - "step": 1279 - }, - { - "epoch": 0.09619720426875093, - "grad_norm": 2.0198938866872593, - "learning_rate": 3.954289027924912e-06, - "loss": 0.9733, - "step": 1280 - }, - { - "epoch": 0.0962723583345859, - "grad_norm": 1.6432567015777764, - "learning_rate": 3.954185479011644e-06, - "loss": 1.076, - "step": 1281 - }, - { - "epoch": 0.09634751240042086, - "grad_norm": 0.7786102712073778, - "learning_rate": 3.954081814305403e-06, - "loss": 0.8209, - "step": 1282 - }, - { - "epoch": 0.09642266646625583, - "grad_norm": 1.7806827913642265, - "learning_rate": 3.953978033812332e-06, - "loss": 1.0307, - "step": 1283 - }, - { - "epoch": 0.09649782053209079, - "grad_norm": 2.163068301987929, - "learning_rate": 3.95387413753858e-06, - "loss": 0.9619, - "step": 1284 - }, - { - "epoch": 0.09657297459792574, - "grad_norm": 1.7522320817664485, - "learning_rate": 3.9537701254903034e-06, - "loss": 0.9141, - "step": 1285 - }, - { - "epoch": 0.09664812866376071, - "grad_norm": 2.15576785681813, - "learning_rate": 3.953665997673665e-06, - "loss": 1.0153, - "step": 1286 - }, - { - "epoch": 0.09672328272959567, - "grad_norm": 2.1317726367004903, - "learning_rate": 3.953561754094836e-06, - "loss": 1.0183, - "step": 1287 - }, - { - "epoch": 0.09679843679543064, - "grad_norm": 1.643945295096605, - "learning_rate": 3.953457394759992e-06, - "loss": 1.1083, - "step": 1288 - }, - { - "epoch": 0.0968735908612656, - "grad_norm": 2.2913106352779073, - "learning_rate": 3.953352919675317e-06, - "loss": 1.0863, - "step": 1289 - }, - { - "epoch": 0.09694874492710055, - "grad_norm": 3.062775112951829, - "learning_rate": 3.953248328847001e-06, - "loss": 0.9645, - "step": 1290 - }, - { - "epoch": 0.09702389899293552, - "grad_norm": 1.4904135500084625, - "learning_rate": 3.953143622281243e-06, - "loss": 1.0806, - "step": 1291 - }, - { - "epoch": 0.09709905305877048, - "grad_norm": 1.7924625787593005, - "learning_rate": 3.953038799984246e-06, - "loss": 1.0275, - "step": 1292 - }, - { - "epoch": 0.09717420712460544, - "grad_norm": 1.708654544064466, - "learning_rate": 3.952933861962222e-06, - "loss": 1.1184, - "step": 1293 - }, - { - "epoch": 0.0972493611904404, - "grad_norm": 1.9518344124694469, - "learning_rate": 3.952828808221387e-06, - "loss": 1.0284, - "step": 1294 - }, - { - "epoch": 0.09732451525627536, - "grad_norm": 1.7431961355811636, - "learning_rate": 3.952723638767968e-06, - "loss": 1.1232, - "step": 1295 - }, - { - "epoch": 0.09739966932211033, - "grad_norm": 1.9791804530684172, - "learning_rate": 3.952618353608196e-06, - "loss": 1.0085, - "step": 1296 - }, - { - "epoch": 0.09747482338794529, - "grad_norm": 1.7093236967822887, - "learning_rate": 3.95251295274831e-06, - "loss": 1.0396, - "step": 1297 - }, - { - "epoch": 0.09754997745378025, - "grad_norm": 1.9936644819807132, - "learning_rate": 3.952407436194554e-06, - "loss": 1.0159, - "step": 1298 - }, - { - "epoch": 0.09762513151961522, - "grad_norm": 3.0534137694690195, - "learning_rate": 3.9523018039531816e-06, - "loss": 0.9461, - "step": 1299 - }, - { - "epoch": 0.09770028558545017, - "grad_norm": 2.4942460601674585, - "learning_rate": 3.952196056030451e-06, - "loss": 1.0066, - "step": 1300 - }, - { - "epoch": 0.09777543965128513, - "grad_norm": 0.8127644915337531, - "learning_rate": 3.952090192432629e-06, - "loss": 0.8386, - "step": 1301 - }, - { - "epoch": 0.0978505937171201, - "grad_norm": 2.133646148853949, - "learning_rate": 3.951984213165988e-06, - "loss": 1.0374, - "step": 1302 - }, - { - "epoch": 0.09792574778295506, - "grad_norm": 1.909733512922106, - "learning_rate": 3.951878118236807e-06, - "loss": 0.8682, - "step": 1303 - }, - { - "epoch": 0.09800090184879003, - "grad_norm": 2.1826404544691065, - "learning_rate": 3.951771907651374e-06, - "loss": 0.9847, - "step": 1304 - }, - { - "epoch": 0.09807605591462498, - "grad_norm": 1.684279866265614, - "learning_rate": 3.951665581415982e-06, - "loss": 1.0426, - "step": 1305 - }, - { - "epoch": 0.09815120998045994, - "grad_norm": 1.5504760441853107, - "learning_rate": 3.9515591395369305e-06, - "loss": 1.0246, - "step": 1306 - }, - { - "epoch": 0.09822636404629491, - "grad_norm": 2.7918131965404567, - "learning_rate": 3.9514525820205265e-06, - "loss": 1.0236, - "step": 1307 - }, - { - "epoch": 0.09830151811212987, - "grad_norm": 2.043843477777007, - "learning_rate": 3.951345908873085e-06, - "loss": 0.998, - "step": 1308 - }, - { - "epoch": 0.09837667217796482, - "grad_norm": 3.1854971782987374, - "learning_rate": 3.9512391201009265e-06, - "loss": 1.0093, - "step": 1309 - }, - { - "epoch": 0.09845182624379979, - "grad_norm": 1.9633493978601981, - "learning_rate": 3.9511322157103776e-06, - "loss": 1.0444, - "step": 1310 - }, - { - "epoch": 0.09852698030963475, - "grad_norm": 1.8405759613810149, - "learning_rate": 3.951025195707774e-06, - "loss": 1.0898, - "step": 1311 - }, - { - "epoch": 0.09860213437546972, - "grad_norm": 1.8956204459025299, - "learning_rate": 3.950918060099456e-06, - "loss": 1.1204, - "step": 1312 - }, - { - "epoch": 0.09867728844130468, - "grad_norm": 1.622695976729744, - "learning_rate": 3.950810808891773e-06, - "loss": 0.9725, - "step": 1313 - }, - { - "epoch": 0.09875244250713963, - "grad_norm": 3.541340278096839, - "learning_rate": 3.950703442091079e-06, - "loss": 1.0477, - "step": 1314 - }, - { - "epoch": 0.0988275965729746, - "grad_norm": 2.1299528856394794, - "learning_rate": 3.950595959703736e-06, - "loss": 1.0265, - "step": 1315 - }, - { - "epoch": 0.09890275063880956, - "grad_norm": 1.6718331447602859, - "learning_rate": 3.950488361736114e-06, - "loss": 1.0164, - "step": 1316 - }, - { - "epoch": 0.09897790470464451, - "grad_norm": 1.7151596021762814, - "learning_rate": 3.950380648194587e-06, - "loss": 1.0752, - "step": 1317 - }, - { - "epoch": 0.09905305877047949, - "grad_norm": 1.6356167599999247, - "learning_rate": 3.950272819085538e-06, - "loss": 1.0072, - "step": 1318 - }, - { - "epoch": 0.09912821283631444, - "grad_norm": 3.1697037504859544, - "learning_rate": 3.950164874415357e-06, - "loss": 1.0066, - "step": 1319 - }, - { - "epoch": 0.09920336690214941, - "grad_norm": 2.70308689685934, - "learning_rate": 3.950056814190439e-06, - "loss": 0.9398, - "step": 1320 - }, - { - "epoch": 0.09927852096798437, - "grad_norm": 2.0337776303187525, - "learning_rate": 3.949948638417188e-06, - "loss": 1.0094, - "step": 1321 - }, - { - "epoch": 0.09935367503381932, - "grad_norm": 1.9761527017363394, - "learning_rate": 3.949840347102013e-06, - "loss": 1.0218, - "step": 1322 - }, - { - "epoch": 0.0994288290996543, - "grad_norm": 1.7880149339696554, - "learning_rate": 3.949731940251331e-06, - "loss": 0.9991, - "step": 1323 - }, - { - "epoch": 0.09950398316548925, - "grad_norm": 1.8425647999562902, - "learning_rate": 3.949623417871565e-06, - "loss": 1.0154, - "step": 1324 - }, - { - "epoch": 0.09957913723132421, - "grad_norm": 0.7067863082222359, - "learning_rate": 3.949514779969147e-06, - "loss": 0.8131, - "step": 1325 - }, - { - "epoch": 0.09965429129715918, - "grad_norm": 2.0804796001568246, - "learning_rate": 3.949406026550512e-06, - "loss": 1.1182, - "step": 1326 - }, - { - "epoch": 0.09972944536299413, - "grad_norm": 1.8309680318036519, - "learning_rate": 3.949297157622105e-06, - "loss": 1.0754, - "step": 1327 - }, - { - "epoch": 0.0998045994288291, - "grad_norm": 1.8384707890177459, - "learning_rate": 3.949188173190378e-06, - "loss": 0.989, - "step": 1328 - }, - { - "epoch": 0.09987975349466406, - "grad_norm": 7.876129553787626, - "learning_rate": 3.949079073261788e-06, - "loss": 0.9893, - "step": 1329 - }, - { - "epoch": 0.09995490756049902, - "grad_norm": 2.788650012034124, - "learning_rate": 3.948969857842799e-06, - "loss": 0.9812, - "step": 1330 - }, - { - "epoch": 0.10003006162633399, - "grad_norm": 2.720098665215039, - "learning_rate": 3.948860526939882e-06, - "loss": 1.0277, - "step": 1331 - }, - { - "epoch": 0.10010521569216894, - "grad_norm": 2.202837877975655, - "learning_rate": 3.948751080559517e-06, - "loss": 1.0595, - "step": 1332 - }, - { - "epoch": 0.10018036975800391, - "grad_norm": 7.249545642164223, - "learning_rate": 3.948641518708188e-06, - "loss": 1.0994, - "step": 1333 - }, - { - "epoch": 0.10025552382383887, - "grad_norm": 4.158957381172965, - "learning_rate": 3.9485318413923865e-06, - "loss": 0.9783, - "step": 1334 - }, - { - "epoch": 0.10033067788967383, - "grad_norm": 1.6685219055191898, - "learning_rate": 3.948422048618612e-06, - "loss": 1.0166, - "step": 1335 - }, - { - "epoch": 0.1004058319555088, - "grad_norm": 2.0879653046601026, - "learning_rate": 3.948312140393372e-06, - "loss": 1.0319, - "step": 1336 - }, - { - "epoch": 0.10048098602134375, - "grad_norm": 0.752242748416978, - "learning_rate": 3.948202116723176e-06, - "loss": 0.8168, - "step": 1337 - }, - { - "epoch": 0.10055614008717871, - "grad_norm": 1.3877870125374656, - "learning_rate": 3.948091977614544e-06, - "loss": 0.9849, - "step": 1338 - }, - { - "epoch": 0.10063129415301368, - "grad_norm": 1.6314254783284192, - "learning_rate": 3.947981723074003e-06, - "loss": 0.9718, - "step": 1339 - }, - { - "epoch": 0.10070644821884864, - "grad_norm": 1.9097008659632895, - "learning_rate": 3.947871353108085e-06, - "loss": 0.9526, - "step": 1340 - }, - { - "epoch": 0.10078160228468361, - "grad_norm": 2.1508918198924203, - "learning_rate": 3.947760867723331e-06, - "loss": 1.1198, - "step": 1341 - }, - { - "epoch": 0.10085675635051856, - "grad_norm": 2.1654527515866624, - "learning_rate": 3.9476502669262866e-06, - "loss": 1.0292, - "step": 1342 - }, - { - "epoch": 0.10093191041635352, - "grad_norm": 2.038678129368939, - "learning_rate": 3.947539550723506e-06, - "loss": 1.0049, - "step": 1343 - }, - { - "epoch": 0.10100706448218849, - "grad_norm": 0.7000235502504926, - "learning_rate": 3.94742871912155e-06, - "loss": 0.8391, - "step": 1344 - }, - { - "epoch": 0.10108221854802345, - "grad_norm": 1.644502181931159, - "learning_rate": 3.947317772126985e-06, - "loss": 1.037, - "step": 1345 - }, - { - "epoch": 0.1011573726138584, - "grad_norm": 2.3124017436830115, - "learning_rate": 3.947206709746385e-06, - "loss": 1.1396, - "step": 1346 - }, - { - "epoch": 0.10123252667969337, - "grad_norm": 2.335226812027226, - "learning_rate": 3.947095531986331e-06, - "loss": 1.1121, - "step": 1347 - }, - { - "epoch": 0.10130768074552833, - "grad_norm": 1.970697136407681, - "learning_rate": 3.9469842388534105e-06, - "loss": 1.0467, - "step": 1348 - }, - { - "epoch": 0.1013828348113633, - "grad_norm": 1.9710783738221855, - "learning_rate": 3.946872830354219e-06, - "loss": 1.0112, - "step": 1349 - }, - { - "epoch": 0.10145798887719826, - "grad_norm": 1.596683868496055, - "learning_rate": 3.946761306495357e-06, - "loss": 1.1577, - "step": 1350 - }, - { - "epoch": 0.10153314294303321, - "grad_norm": 2.112788962741031, - "learning_rate": 3.946649667283433e-06, - "loss": 1.025, - "step": 1351 - }, - { - "epoch": 0.10160829700886818, - "grad_norm": 1.8204886551786126, - "learning_rate": 3.946537912725062e-06, - "loss": 1.0021, - "step": 1352 - }, - { - "epoch": 0.10168345107470314, - "grad_norm": 1.713349753844148, - "learning_rate": 3.946426042826865e-06, - "loss": 0.95, - "step": 1353 - }, - { - "epoch": 0.1017586051405381, - "grad_norm": 3.1876548280699213, - "learning_rate": 3.946314057595473e-06, - "loss": 1.0058, - "step": 1354 - }, - { - "epoch": 0.10183375920637307, - "grad_norm": 3.4955838817276774, - "learning_rate": 3.94620195703752e-06, - "loss": 0.9749, - "step": 1355 - }, - { - "epoch": 0.10190891327220802, - "grad_norm": 1.76104060144534, - "learning_rate": 3.946089741159647e-06, - "loss": 0.9045, - "step": 1356 - }, - { - "epoch": 0.101984067338043, - "grad_norm": 0.8132703137280838, - "learning_rate": 3.9459774099685065e-06, - "loss": 0.8759, - "step": 1357 - }, - { - "epoch": 0.10205922140387795, - "grad_norm": 2.034648927215465, - "learning_rate": 3.945864963470752e-06, - "loss": 1.0461, - "step": 1358 - }, - { - "epoch": 0.1021343754697129, - "grad_norm": 7.46211183902684, - "learning_rate": 3.945752401673047e-06, - "loss": 0.9175, - "step": 1359 - }, - { - "epoch": 0.10220952953554788, - "grad_norm": 1.6404992918264714, - "learning_rate": 3.945639724582062e-06, - "loss": 1.1165, - "step": 1360 - }, - { - "epoch": 0.10228468360138283, - "grad_norm": 2.3344123479876067, - "learning_rate": 3.9455269322044725e-06, - "loss": 1.0832, - "step": 1361 - }, - { - "epoch": 0.10235983766721779, - "grad_norm": 1.8830380126633157, - "learning_rate": 3.945414024546963e-06, - "loss": 1.0205, - "step": 1362 - }, - { - "epoch": 0.10243499173305276, - "grad_norm": 1.8934363908098657, - "learning_rate": 3.945301001616222e-06, - "loss": 1.0555, - "step": 1363 - }, - { - "epoch": 0.10251014579888772, - "grad_norm": 3.3180199461297755, - "learning_rate": 3.945187863418949e-06, - "loss": 0.9636, - "step": 1364 - }, - { - "epoch": 0.10258529986472269, - "grad_norm": 1.682866198179746, - "learning_rate": 3.945074609961845e-06, - "loss": 1.0612, - "step": 1365 - }, - { - "epoch": 0.10266045393055764, - "grad_norm": 1.9503064990642773, - "learning_rate": 3.944961241251623e-06, - "loss": 1.0487, - "step": 1366 - }, - { - "epoch": 0.1027356079963926, - "grad_norm": 1.856541116922338, - "learning_rate": 3.944847757295e-06, - "loss": 0.9653, - "step": 1367 - }, - { - "epoch": 0.10281076206222757, - "grad_norm": 2.0411080225422364, - "learning_rate": 3.9447341580987e-06, - "loss": 1.0142, - "step": 1368 - }, - { - "epoch": 0.10288591612806253, - "grad_norm": 2.2170856304571416, - "learning_rate": 3.944620443669453e-06, - "loss": 1.0615, - "step": 1369 - }, - { - "epoch": 0.10296107019389748, - "grad_norm": 1.6515472873358845, - "learning_rate": 3.9445066140139995e-06, - "loss": 0.9391, - "step": 1370 - }, - { - "epoch": 0.10303622425973245, - "grad_norm": 1.8847711908920408, - "learning_rate": 3.944392669139083e-06, - "loss": 1.037, - "step": 1371 - }, - { - "epoch": 0.10311137832556741, - "grad_norm": 1.605169932083285, - "learning_rate": 3.944278609051455e-06, - "loss": 0.9123, - "step": 1372 - }, - { - "epoch": 0.10318653239140238, - "grad_norm": 1.9549587339060728, - "learning_rate": 3.944164433757874e-06, - "loss": 1.0388, - "step": 1373 - }, - { - "epoch": 0.10326168645723734, - "grad_norm": 2.322682823647598, - "learning_rate": 3.944050143265106e-06, - "loss": 0.9495, - "step": 1374 - }, - { - "epoch": 0.1033368405230723, - "grad_norm": 2.358390956574574, - "learning_rate": 3.943935737579923e-06, - "loss": 1.0616, - "step": 1375 - }, - { - "epoch": 0.10341199458890726, - "grad_norm": 2.0992122398761053, - "learning_rate": 3.943821216709103e-06, - "loss": 0.9516, - "step": 1376 - }, - { - "epoch": 0.10348714865474222, - "grad_norm": 2.0027794597946116, - "learning_rate": 3.943706580659433e-06, - "loss": 1.0112, - "step": 1377 - }, - { - "epoch": 0.10356230272057719, - "grad_norm": 1.506935378407261, - "learning_rate": 3.943591829437705e-06, - "loss": 1.1192, - "step": 1378 - }, - { - "epoch": 0.10363745678641215, - "grad_norm": 2.0897839689171613, - "learning_rate": 3.943476963050719e-06, - "loss": 1.0079, - "step": 1379 - }, - { - "epoch": 0.1037126108522471, - "grad_norm": 0.8917316788892004, - "learning_rate": 3.94336198150528e-06, - "loss": 0.9117, - "step": 1380 - }, - { - "epoch": 0.10378776491808207, - "grad_norm": 1.857516730581027, - "learning_rate": 3.9432468848082024e-06, - "loss": 0.9966, - "step": 1381 - }, - { - "epoch": 0.10386291898391703, - "grad_norm": 2.455672186914899, - "learning_rate": 3.9431316729663055e-06, - "loss": 1.0359, - "step": 1382 - }, - { - "epoch": 0.10393807304975199, - "grad_norm": 2.488126022311827, - "learning_rate": 3.943016345986417e-06, - "loss": 0.9484, - "step": 1383 - }, - { - "epoch": 0.10401322711558696, - "grad_norm": 1.5127650540986124, - "learning_rate": 3.942900903875369e-06, - "loss": 1.0545, - "step": 1384 - }, - { - "epoch": 0.10408838118142191, - "grad_norm": 3.354383441878177, - "learning_rate": 3.942785346640003e-06, - "loss": 1.1047, - "step": 1385 - }, - { - "epoch": 0.10416353524725688, - "grad_norm": 3.5152910540178617, - "learning_rate": 3.942669674287166e-06, - "loss": 1.0572, - "step": 1386 - }, - { - "epoch": 0.10423868931309184, - "grad_norm": 1.7569590891687186, - "learning_rate": 3.942553886823711e-06, - "loss": 1.0762, - "step": 1387 - }, - { - "epoch": 0.1043138433789268, - "grad_norm": 2.1054224352440194, - "learning_rate": 3.9424379842565005e-06, - "loss": 1.0513, - "step": 1388 - }, - { - "epoch": 0.10438899744476177, - "grad_norm": 1.9535790487854634, - "learning_rate": 3.9423219665924e-06, - "loss": 1.0743, - "step": 1389 - }, - { - "epoch": 0.10446415151059672, - "grad_norm": 1.81747575672882, - "learning_rate": 3.942205833838287e-06, - "loss": 1.0012, - "step": 1390 - }, - { - "epoch": 0.10453930557643168, - "grad_norm": 2.4366498589446977, - "learning_rate": 3.942089586001039e-06, - "loss": 1.1546, - "step": 1391 - }, - { - "epoch": 0.10461445964226665, - "grad_norm": 1.6840938524565179, - "learning_rate": 3.941973223087548e-06, - "loss": 0.9515, - "step": 1392 - }, - { - "epoch": 0.1046896137081016, - "grad_norm": 1.6402796790599794, - "learning_rate": 3.941856745104707e-06, - "loss": 1.0116, - "step": 1393 - }, - { - "epoch": 0.10476476777393658, - "grad_norm": 2.1230456396974904, - "learning_rate": 3.941740152059418e-06, - "loss": 1.0247, - "step": 1394 - }, - { - "epoch": 0.10483992183977153, - "grad_norm": 1.3294142804352858, - "learning_rate": 3.94162344395859e-06, - "loss": 1.0023, - "step": 1395 - }, - { - "epoch": 0.10491507590560649, - "grad_norm": 2.248753096200403, - "learning_rate": 3.941506620809137e-06, - "loss": 1.055, - "step": 1396 - }, - { - "epoch": 0.10499022997144146, - "grad_norm": 1.8149177605148534, - "learning_rate": 3.941389682617982e-06, - "loss": 0.9779, - "step": 1397 - }, - { - "epoch": 0.10506538403727642, - "grad_norm": 1.9200579729864684, - "learning_rate": 3.9412726293920555e-06, - "loss": 0.9808, - "step": 1398 - }, - { - "epoch": 0.10514053810311137, - "grad_norm": 1.9561598126277089, - "learning_rate": 3.9411554611382914e-06, - "loss": 1.1205, - "step": 1399 - }, - { - "epoch": 0.10521569216894634, - "grad_norm": 2.149652532844062, - "learning_rate": 3.941038177863633e-06, - "loss": 1.0358, - "step": 1400 - }, - { - "epoch": 0.1052908462347813, - "grad_norm": 0.8133505990457489, - "learning_rate": 3.940920779575029e-06, - "loss": 0.8729, - "step": 1401 - }, - { - "epoch": 0.10536600030061627, - "grad_norm": 4.3879475422369, - "learning_rate": 3.940803266279438e-06, - "loss": 1.0888, - "step": 1402 - }, - { - "epoch": 0.10544115436645123, - "grad_norm": 1.8341793001402198, - "learning_rate": 3.940685637983822e-06, - "loss": 1.0237, - "step": 1403 - }, - { - "epoch": 0.10551630843228618, - "grad_norm": 1.7890812902005198, - "learning_rate": 3.940567894695149e-06, - "loss": 1.0787, - "step": 1404 - }, - { - "epoch": 0.10559146249812115, - "grad_norm": 2.178537372975093, - "learning_rate": 3.940450036420397e-06, - "loss": 0.9679, - "step": 1405 - }, - { - "epoch": 0.10566661656395611, - "grad_norm": 0.7493413973950765, - "learning_rate": 3.940332063166551e-06, - "loss": 0.833, - "step": 1406 - }, - { - "epoch": 0.10574177062979107, - "grad_norm": 2.08532511547954, - "learning_rate": 3.9402139749406e-06, - "loss": 1.078, - "step": 1407 - }, - { - "epoch": 0.10581692469562604, - "grad_norm": 1.915829088500327, - "learning_rate": 3.940095771749542e-06, - "loss": 1.0517, - "step": 1408 - }, - { - "epoch": 0.10589207876146099, - "grad_norm": 11.753229225059677, - "learning_rate": 3.939977453600379e-06, - "loss": 0.9967, - "step": 1409 - }, - { - "epoch": 0.10596723282729596, - "grad_norm": 1.7696910390224028, - "learning_rate": 3.939859020500124e-06, - "loss": 1.0102, - "step": 1410 - }, - { - "epoch": 0.10604238689313092, - "grad_norm": 1.897174996247022, - "learning_rate": 3.939740472455794e-06, - "loss": 0.9425, - "step": 1411 - }, - { - "epoch": 0.10611754095896588, - "grad_norm": 1.6893407177877875, - "learning_rate": 3.939621809474413e-06, - "loss": 1.0616, - "step": 1412 - }, - { - "epoch": 0.10619269502480085, - "grad_norm": 1.961141568086377, - "learning_rate": 3.9395030315630124e-06, - "loss": 1.0804, - "step": 1413 - }, - { - "epoch": 0.1062678490906358, - "grad_norm": 1.7666008202259704, - "learning_rate": 3.939384138728631e-06, - "loss": 1.0435, - "step": 1414 - }, - { - "epoch": 0.10634300315647076, - "grad_norm": 1.9216897766673482, - "learning_rate": 3.939265130978312e-06, - "loss": 1.0157, - "step": 1415 - }, - { - "epoch": 0.10641815722230573, - "grad_norm": 1.791346928063045, - "learning_rate": 3.939146008319109e-06, - "loss": 1.0246, - "step": 1416 - }, - { - "epoch": 0.10649331128814069, - "grad_norm": 2.0741174820427086, - "learning_rate": 3.939026770758079e-06, - "loss": 1.0092, - "step": 1417 - }, - { - "epoch": 0.10656846535397566, - "grad_norm": 1.4896578292180507, - "learning_rate": 3.938907418302288e-06, - "loss": 1.0304, - "step": 1418 - }, - { - "epoch": 0.10664361941981061, - "grad_norm": 1.8178072212959702, - "learning_rate": 3.938787950958807e-06, - "loss": 1.0268, - "step": 1419 - }, - { - "epoch": 0.10671877348564557, - "grad_norm": 2.142180385897765, - "learning_rate": 3.938668368734717e-06, - "loss": 0.9848, - "step": 1420 - }, - { - "epoch": 0.10679392755148054, - "grad_norm": 1.7134439581596785, - "learning_rate": 3.938548671637102e-06, - "loss": 1.0059, - "step": 1421 - }, - { - "epoch": 0.1068690816173155, - "grad_norm": 1.9513067708624576, - "learning_rate": 3.938428859673055e-06, - "loss": 1.0558, - "step": 1422 - }, - { - "epoch": 0.10694423568315047, - "grad_norm": 2.1068247885253006, - "learning_rate": 3.9383089328496755e-06, - "loss": 1.0084, - "step": 1423 - }, - { - "epoch": 0.10701938974898542, - "grad_norm": 1.7698177417303402, - "learning_rate": 3.938188891174069e-06, - "loss": 0.9798, - "step": 1424 - }, - { - "epoch": 0.10709454381482038, - "grad_norm": 2.156915423783516, - "learning_rate": 3.9380687346533495e-06, - "loss": 0.9972, - "step": 1425 - }, - { - "epoch": 0.10716969788065535, - "grad_norm": 3.359065267789032, - "learning_rate": 3.9379484632946355e-06, - "loss": 1.0489, - "step": 1426 - }, - { - "epoch": 0.1072448519464903, - "grad_norm": 1.919152603407083, - "learning_rate": 3.937828077105054e-06, - "loss": 0.8877, - "step": 1427 - }, - { - "epoch": 0.10732000601232526, - "grad_norm": 1.85714767790787, - "learning_rate": 3.9377075760917396e-06, - "loss": 1.0833, - "step": 1428 - }, - { - "epoch": 0.10739516007816023, - "grad_norm": 1.602355388597229, - "learning_rate": 3.93758696026183e-06, - "loss": 1.0767, - "step": 1429 - }, - { - "epoch": 0.10747031414399519, - "grad_norm": 2.466900289486594, - "learning_rate": 3.9374662296224746e-06, - "loss": 1.08, - "step": 1430 - }, - { - "epoch": 0.10754546820983016, - "grad_norm": 1.7570052264140714, - "learning_rate": 3.937345384180826e-06, - "loss": 1.027, - "step": 1431 - }, - { - "epoch": 0.10762062227566511, - "grad_norm": 2.6075599433833543, - "learning_rate": 3.937224423944044e-06, - "loss": 1.0519, - "step": 1432 - }, - { - "epoch": 0.10769577634150007, - "grad_norm": 1.6158105267782066, - "learning_rate": 3.937103348919297e-06, - "loss": 0.9453, - "step": 1433 - }, - { - "epoch": 0.10777093040733504, - "grad_norm": 3.018849758608611, - "learning_rate": 3.936982159113759e-06, - "loss": 0.9655, - "step": 1434 - }, - { - "epoch": 0.10784608447317, - "grad_norm": 1.775593526763357, - "learning_rate": 3.936860854534611e-06, - "loss": 0.9608, - "step": 1435 - }, - { - "epoch": 0.10792123853900495, - "grad_norm": 2.536148699922561, - "learning_rate": 3.936739435189041e-06, - "loss": 1.0137, - "step": 1436 - }, - { - "epoch": 0.10799639260483992, - "grad_norm": 1.6783323277101405, - "learning_rate": 3.936617901084243e-06, - "loss": 0.9721, - "step": 1437 - }, - { - "epoch": 0.10807154667067488, - "grad_norm": 2.2789746786521126, - "learning_rate": 3.936496252227417e-06, - "loss": 1.0197, - "step": 1438 - }, - { - "epoch": 0.10814670073650985, - "grad_norm": 2.3097505462840724, - "learning_rate": 3.936374488625775e-06, - "loss": 1.0375, - "step": 1439 - }, - { - "epoch": 0.10822185480234481, - "grad_norm": 1.6193508844556528, - "learning_rate": 3.936252610286528e-06, - "loss": 1.0664, - "step": 1440 - }, - { - "epoch": 0.10829700886817976, - "grad_norm": 1.7779228859861116, - "learning_rate": 3.9361306172169005e-06, - "loss": 0.9514, - "step": 1441 - }, - { - "epoch": 0.10837216293401473, - "grad_norm": 1.929717337806228, - "learning_rate": 3.93600850942412e-06, - "loss": 0.8864, - "step": 1442 - }, - { - "epoch": 0.10844731699984969, - "grad_norm": 1.7326280886156968, - "learning_rate": 3.935886286915421e-06, - "loss": 0.9327, - "step": 1443 - }, - { - "epoch": 0.10852247106568465, - "grad_norm": 1.7284390707358874, - "learning_rate": 3.935763949698047e-06, - "loss": 0.9536, - "step": 1444 - }, - { - "epoch": 0.10859762513151962, - "grad_norm": 2.5253720264384265, - "learning_rate": 3.935641497779247e-06, - "loss": 1.0551, - "step": 1445 - }, - { - "epoch": 0.10867277919735457, - "grad_norm": 6.148165433843197, - "learning_rate": 3.935518931166275e-06, - "loss": 1.0178, - "step": 1446 - }, - { - "epoch": 0.10874793326318954, - "grad_norm": 1.612015419235507, - "learning_rate": 3.935396249866396e-06, - "loss": 0.9901, - "step": 1447 - }, - { - "epoch": 0.1088230873290245, - "grad_norm": 1.7417049680571635, - "learning_rate": 3.935273453886877e-06, - "loss": 1.0045, - "step": 1448 - }, - { - "epoch": 0.10889824139485946, - "grad_norm": 1.8411582575859493, - "learning_rate": 3.935150543234996e-06, - "loss": 0.9541, - "step": 1449 - }, - { - "epoch": 0.10897339546069443, - "grad_norm": 1.9349225035138984, - "learning_rate": 3.935027517918034e-06, - "loss": 1.0166, - "step": 1450 - }, - { - "epoch": 0.10904854952652938, - "grad_norm": 1.8364823851702605, - "learning_rate": 3.9349043779432825e-06, - "loss": 1.0292, - "step": 1451 - }, - { - "epoch": 0.10912370359236434, - "grad_norm": 2.0477999861247698, - "learning_rate": 3.934781123318037e-06, - "loss": 1.054, - "step": 1452 - }, - { - "epoch": 0.10919885765819931, - "grad_norm": 4.462916481393917, - "learning_rate": 3.934657754049602e-06, - "loss": 1.0071, - "step": 1453 - }, - { - "epoch": 0.10927401172403427, - "grad_norm": 4.461406302328673, - "learning_rate": 3.934534270145287e-06, - "loss": 1.0661, - "step": 1454 - }, - { - "epoch": 0.10934916578986924, - "grad_norm": 2.227663216481992, - "learning_rate": 3.934410671612408e-06, - "loss": 1.0604, - "step": 1455 - }, - { - "epoch": 0.1094243198557042, - "grad_norm": 1.9209712935392362, - "learning_rate": 3.934286958458289e-06, - "loss": 1.0633, - "step": 1456 - }, - { - "epoch": 0.10949947392153915, - "grad_norm": 1.7822167968482447, - "learning_rate": 3.934163130690262e-06, - "loss": 1.0314, - "step": 1457 - }, - { - "epoch": 0.10957462798737412, - "grad_norm": 2.0162872882381957, - "learning_rate": 3.9340391883156614e-06, - "loss": 0.95, - "step": 1458 - }, - { - "epoch": 0.10964978205320908, - "grad_norm": 2.0195767012006427, - "learning_rate": 3.933915131341834e-06, - "loss": 1.0553, - "step": 1459 - }, - { - "epoch": 0.10972493611904403, - "grad_norm": 3.8318448424813063, - "learning_rate": 3.93379095977613e-06, - "loss": 0.9467, - "step": 1460 - }, - { - "epoch": 0.109800090184879, - "grad_norm": 1.7803699260099155, - "learning_rate": 3.9336666736259055e-06, - "loss": 1.0461, - "step": 1461 - }, - { - "epoch": 0.10987524425071396, - "grad_norm": 1.7228648779518532, - "learning_rate": 3.933542272898527e-06, - "loss": 0.9949, - "step": 1462 - }, - { - "epoch": 0.10995039831654893, - "grad_norm": 2.478697617554925, - "learning_rate": 3.933417757601365e-06, - "loss": 0.9418, - "step": 1463 - }, - { - "epoch": 0.11002555238238389, - "grad_norm": 1.6806122828946182, - "learning_rate": 3.933293127741796e-06, - "loss": 0.9931, - "step": 1464 - }, - { - "epoch": 0.11010070644821884, - "grad_norm": 2.2302798643820503, - "learning_rate": 3.933168383327207e-06, - "loss": 1.0642, - "step": 1465 - }, - { - "epoch": 0.11017586051405381, - "grad_norm": 2.1407129868293837, - "learning_rate": 3.933043524364989e-06, - "loss": 1.0157, - "step": 1466 - }, - { - "epoch": 0.11025101457988877, - "grad_norm": 1.37841027176938, - "learning_rate": 3.932918550862539e-06, - "loss": 1.0023, - "step": 1467 - }, - { - "epoch": 0.11032616864572373, - "grad_norm": 1.9842193422317371, - "learning_rate": 3.932793462827265e-06, - "loss": 1.0403, - "step": 1468 - }, - { - "epoch": 0.1104013227115587, - "grad_norm": 1.7480305079541036, - "learning_rate": 3.932668260266576e-06, - "loss": 0.9944, - "step": 1469 - }, - { - "epoch": 0.11047647677739365, - "grad_norm": 6.650698643747976, - "learning_rate": 3.932542943187892e-06, - "loss": 1.0972, - "step": 1470 - }, - { - "epoch": 0.11055163084322862, - "grad_norm": 1.8507163130195983, - "learning_rate": 3.932417511598638e-06, - "loss": 1.067, - "step": 1471 - }, - { - "epoch": 0.11062678490906358, - "grad_norm": 2.1207007505973077, - "learning_rate": 3.932291965506247e-06, - "loss": 0.9699, - "step": 1472 - }, - { - "epoch": 0.11070193897489854, - "grad_norm": 2.084760240816251, - "learning_rate": 3.932166304918158e-06, - "loss": 0.9999, - "step": 1473 - }, - { - "epoch": 0.1107770930407335, - "grad_norm": 1.732979152635495, - "learning_rate": 3.9320405298418175e-06, - "loss": 1.0437, - "step": 1474 - }, - { - "epoch": 0.11085224710656846, - "grad_norm": 1.788589414989703, - "learning_rate": 3.931914640284676e-06, - "loss": 1.0056, - "step": 1475 - }, - { - "epoch": 0.11092740117240343, - "grad_norm": 1.7518878699220186, - "learning_rate": 3.931788636254195e-06, - "loss": 1.0545, - "step": 1476 - }, - { - "epoch": 0.11100255523823839, - "grad_norm": 1.6538315155630294, - "learning_rate": 3.931662517757839e-06, - "loss": 0.9664, - "step": 1477 - }, - { - "epoch": 0.11107770930407335, - "grad_norm": 1.8144542418970258, - "learning_rate": 3.931536284803083e-06, - "loss": 0.9763, - "step": 1478 - }, - { - "epoch": 0.11115286336990832, - "grad_norm": 2.593342025108122, - "learning_rate": 3.931409937397406e-06, - "loss": 1.101, - "step": 1479 - }, - { - "epoch": 0.11122801743574327, - "grad_norm": 0.7889992716922468, - "learning_rate": 3.931283475548293e-06, - "loss": 0.9056, - "step": 1480 - }, - { - "epoch": 0.11130317150157823, - "grad_norm": 1.7531608917859745, - "learning_rate": 3.93115689926324e-06, - "loss": 0.9781, - "step": 1481 - }, - { - "epoch": 0.1113783255674132, - "grad_norm": 1.4552786669690299, - "learning_rate": 3.931030208549745e-06, - "loss": 1.0575, - "step": 1482 - }, - { - "epoch": 0.11145347963324816, - "grad_norm": 1.849928895769531, - "learning_rate": 3.930903403415316e-06, - "loss": 1.0217, - "step": 1483 - }, - { - "epoch": 0.11152863369908313, - "grad_norm": 1.965562753343722, - "learning_rate": 3.930776483867467e-06, - "loss": 1.065, - "step": 1484 - }, - { - "epoch": 0.11160378776491808, - "grad_norm": 1.5596579071459864, - "learning_rate": 3.9306494499137175e-06, - "loss": 1.041, - "step": 1485 - }, - { - "epoch": 0.11167894183075304, - "grad_norm": 2.499932097466521, - "learning_rate": 3.930522301561595e-06, - "loss": 1.0114, - "step": 1486 - }, - { - "epoch": 0.11175409589658801, - "grad_norm": 1.5905980375811428, - "learning_rate": 3.930395038818633e-06, - "loss": 1.0202, - "step": 1487 - }, - { - "epoch": 0.11182924996242297, - "grad_norm": 2.195063675323977, - "learning_rate": 3.930267661692374e-06, - "loss": 1.0965, - "step": 1488 - }, - { - "epoch": 0.11190440402825792, - "grad_norm": 3.266210914141452, - "learning_rate": 3.930140170190364e-06, - "loss": 1.0193, - "step": 1489 - }, - { - "epoch": 0.1119795580940929, - "grad_norm": 0.9393370114460452, - "learning_rate": 3.930012564320159e-06, - "loss": 0.9141, - "step": 1490 - }, - { - "epoch": 0.11205471215992785, - "grad_norm": 1.4911395482410394, - "learning_rate": 3.929884844089318e-06, - "loss": 1.0193, - "step": 1491 - }, - { - "epoch": 0.11212986622576282, - "grad_norm": 1.4957078082334376, - "learning_rate": 3.92975700950541e-06, - "loss": 1.0652, - "step": 1492 - }, - { - "epoch": 0.11220502029159778, - "grad_norm": 1.5042506232391126, - "learning_rate": 3.92962906057601e-06, - "loss": 0.9211, - "step": 1493 - }, - { - "epoch": 0.11228017435743273, - "grad_norm": 2.635382329141628, - "learning_rate": 3.929500997308698e-06, - "loss": 1.0409, - "step": 1494 - }, - { - "epoch": 0.1123553284232677, - "grad_norm": 1.3338043851051886, - "learning_rate": 3.929372819711065e-06, - "loss": 1.0025, - "step": 1495 - }, - { - "epoch": 0.11243048248910266, - "grad_norm": 3.8636970566251096, - "learning_rate": 3.929244527790703e-06, - "loss": 0.9864, - "step": 1496 - }, - { - "epoch": 0.11250563655493762, - "grad_norm": 2.025358017720588, - "learning_rate": 3.929116121555216e-06, - "loss": 1.0348, - "step": 1497 - }, - { - "epoch": 0.11258079062077259, - "grad_norm": 1.9575149029862347, - "learning_rate": 3.928987601012212e-06, - "loss": 1.0957, - "step": 1498 - }, - { - "epoch": 0.11265594468660754, - "grad_norm": 1.8887597963999299, - "learning_rate": 3.928858966169306e-06, - "loss": 0.979, - "step": 1499 - }, - { - "epoch": 0.11273109875244251, - "grad_norm": 1.6381650375020198, - "learning_rate": 3.928730217034119e-06, - "loss": 1.0114, - "step": 1500 - }, - { - "epoch": 0.11280625281827747, - "grad_norm": 1.500763063256053, - "learning_rate": 3.928601353614282e-06, - "loss": 0.9391, - "step": 1501 - }, - { - "epoch": 0.11288140688411243, - "grad_norm": 3.8633733262062058, - "learning_rate": 3.92847237591743e-06, - "loss": 1.0018, - "step": 1502 - }, - { - "epoch": 0.1129565609499474, - "grad_norm": 1.4470510388055016, - "learning_rate": 3.928343283951204e-06, - "loss": 1.0291, - "step": 1503 - }, - { - "epoch": 0.11303171501578235, - "grad_norm": 1.9503700052031268, - "learning_rate": 3.928214077723255e-06, - "loss": 1.0215, - "step": 1504 - }, - { - "epoch": 0.11310686908161731, - "grad_norm": 2.028032424765358, - "learning_rate": 3.928084757241239e-06, - "loss": 1.0201, - "step": 1505 - }, - { - "epoch": 0.11318202314745228, - "grad_norm": 4.249841799316048, - "learning_rate": 3.9279553225128165e-06, - "loss": 0.8249, - "step": 1506 - }, - { - "epoch": 0.11325717721328724, - "grad_norm": 2.2299081395135865, - "learning_rate": 3.92782577354566e-06, - "loss": 0.9698, - "step": 1507 - }, - { - "epoch": 0.1133323312791222, - "grad_norm": 1.9783022013236409, - "learning_rate": 3.927696110347443e-06, - "loss": 1.0256, - "step": 1508 - }, - { - "epoch": 0.11340748534495716, - "grad_norm": 2.5203363123015374, - "learning_rate": 3.92756633292585e-06, - "loss": 1.0264, - "step": 1509 - }, - { - "epoch": 0.11348263941079212, - "grad_norm": 19.199527512140843, - "learning_rate": 3.927436441288571e-06, - "loss": 1.0856, - "step": 1510 - }, - { - "epoch": 0.11355779347662709, - "grad_norm": 1.7573071919109486, - "learning_rate": 3.9273064354433025e-06, - "loss": 0.9556, - "step": 1511 - }, - { - "epoch": 0.11363294754246205, - "grad_norm": 1.560176113042334, - "learning_rate": 3.927176315397747e-06, - "loss": 1.0593, - "step": 1512 - }, - { - "epoch": 0.113708101608297, - "grad_norm": 0.6891485087029505, - "learning_rate": 3.927046081159615e-06, - "loss": 0.7971, - "step": 1513 - }, - { - "epoch": 0.11378325567413197, - "grad_norm": 2.9522217676138998, - "learning_rate": 3.926915732736624e-06, - "loss": 0.8889, - "step": 1514 - }, - { - "epoch": 0.11385840973996693, - "grad_norm": 2.623563009852956, - "learning_rate": 3.926785270136497e-06, - "loss": 1.0611, - "step": 1515 - }, - { - "epoch": 0.1139335638058019, - "grad_norm": 2.125249399247694, - "learning_rate": 3.926654693366965e-06, - "loss": 0.9284, - "step": 1516 - }, - { - "epoch": 0.11400871787163686, - "grad_norm": 1.8435095290787953, - "learning_rate": 3.926524002435764e-06, - "loss": 1.0466, - "step": 1517 - }, - { - "epoch": 0.11408387193747181, - "grad_norm": 2.0190135063411145, - "learning_rate": 3.9263931973506395e-06, - "loss": 0.9329, - "step": 1518 - }, - { - "epoch": 0.11415902600330678, - "grad_norm": 2.336336235793844, - "learning_rate": 3.926262278119341e-06, - "loss": 1.0736, - "step": 1519 - }, - { - "epoch": 0.11423418006914174, - "grad_norm": 2.250425298549816, - "learning_rate": 3.9261312447496265e-06, - "loss": 1.0985, - "step": 1520 - }, - { - "epoch": 0.11430933413497671, - "grad_norm": 2.2613573592697023, - "learning_rate": 3.92600009724926e-06, - "loss": 1.0303, - "step": 1521 - }, - { - "epoch": 0.11438448820081167, - "grad_norm": 2.1630336918775135, - "learning_rate": 3.925868835626012e-06, - "loss": 1.073, - "step": 1522 - }, - { - "epoch": 0.11445964226664662, - "grad_norm": 1.693364162064312, - "learning_rate": 3.925737459887662e-06, - "loss": 1.0012, - "step": 1523 - }, - { - "epoch": 0.11453479633248159, - "grad_norm": 1.990587796472921, - "learning_rate": 3.925605970041992e-06, - "loss": 1.0724, - "step": 1524 - }, - { - "epoch": 0.11460995039831655, - "grad_norm": 1.3002775207816637, - "learning_rate": 3.925474366096796e-06, - "loss": 1.007, - "step": 1525 - }, - { - "epoch": 0.1146851044641515, - "grad_norm": 1.5746563097451425, - "learning_rate": 3.92534264805987e-06, - "loss": 1.1212, - "step": 1526 - }, - { - "epoch": 0.11476025852998648, - "grad_norm": 1.5777961463858918, - "learning_rate": 3.92521081593902e-06, - "loss": 1.0753, - "step": 1527 - }, - { - "epoch": 0.11483541259582143, - "grad_norm": 1.5270630561485055, - "learning_rate": 3.925078869742056e-06, - "loss": 0.9918, - "step": 1528 - }, - { - "epoch": 0.1149105666616564, - "grad_norm": 1.5051811066203318, - "learning_rate": 3.924946809476798e-06, - "loss": 0.9554, - "step": 1529 - }, - { - "epoch": 0.11498572072749136, - "grad_norm": 2.4073031214055405, - "learning_rate": 3.924814635151071e-06, - "loss": 1.0314, - "step": 1530 - }, - { - "epoch": 0.11506087479332631, - "grad_norm": 2.1211891785119916, - "learning_rate": 3.924682346772705e-06, - "loss": 1.0918, - "step": 1531 - }, - { - "epoch": 0.11513602885916129, - "grad_norm": 1.7518099553053226, - "learning_rate": 3.92454994434954e-06, - "loss": 1.1059, - "step": 1532 - }, - { - "epoch": 0.11521118292499624, - "grad_norm": 2.1973392999801056, - "learning_rate": 3.9244174278894226e-06, - "loss": 0.9988, - "step": 1533 - }, - { - "epoch": 0.1152863369908312, - "grad_norm": 1.5346866889319182, - "learning_rate": 3.924284797400202e-06, - "loss": 1.04, - "step": 1534 - }, - { - "epoch": 0.11536149105666617, - "grad_norm": 1.762021463780865, - "learning_rate": 3.92415205288974e-06, - "loss": 1.0372, - "step": 1535 - }, - { - "epoch": 0.11543664512250112, - "grad_norm": 2.031233215262683, - "learning_rate": 3.9240191943659e-06, - "loss": 1.0197, - "step": 1536 - }, - { - "epoch": 0.1155117991883361, - "grad_norm": 2.0255750757907727, - "learning_rate": 3.923886221836555e-06, - "loss": 0.991, - "step": 1537 - }, - { - "epoch": 0.11558695325417105, - "grad_norm": 1.366094359304272, - "learning_rate": 3.923753135309584e-06, - "loss": 1.1158, - "step": 1538 - }, - { - "epoch": 0.11566210732000601, - "grad_norm": 1.7932503007553233, - "learning_rate": 3.923619934792873e-06, - "loss": 1.1092, - "step": 1539 - }, - { - "epoch": 0.11573726138584098, - "grad_norm": 1.974354471870977, - "learning_rate": 3.923486620294316e-06, - "loss": 1.0212, - "step": 1540 - }, - { - "epoch": 0.11581241545167593, - "grad_norm": 2.073558292736832, - "learning_rate": 3.923353191821811e-06, - "loss": 1.0006, - "step": 1541 - }, - { - "epoch": 0.11588756951751089, - "grad_norm": 2.0714025542781678, - "learning_rate": 3.923219649383264e-06, - "loss": 1.0196, - "step": 1542 - }, - { - "epoch": 0.11596272358334586, - "grad_norm": 1.8766544672070908, - "learning_rate": 3.923085992986588e-06, - "loss": 1.033, - "step": 1543 - }, - { - "epoch": 0.11603787764918082, - "grad_norm": 2.612857618906171, - "learning_rate": 3.922952222639703e-06, - "loss": 1.0461, - "step": 1544 - }, - { - "epoch": 0.11611303171501579, - "grad_norm": 1.834532959357868, - "learning_rate": 3.922818338350536e-06, - "loss": 1.1124, - "step": 1545 - }, - { - "epoch": 0.11618818578085074, - "grad_norm": 2.098474666127147, - "learning_rate": 3.9226843401270195e-06, - "loss": 1.0155, - "step": 1546 - }, - { - "epoch": 0.1162633398466857, - "grad_norm": 1.4984555240378445, - "learning_rate": 3.922550227977093e-06, - "loss": 1.0466, - "step": 1547 - }, - { - "epoch": 0.11633849391252067, - "grad_norm": 0.7817277973146349, - "learning_rate": 3.9224160019087036e-06, - "loss": 0.8202, - "step": 1548 - }, - { - "epoch": 0.11641364797835563, - "grad_norm": 1.920121051628067, - "learning_rate": 3.922281661929804e-06, - "loss": 0.9532, - "step": 1549 - }, - { - "epoch": 0.11648880204419058, - "grad_norm": 1.7043614230368815, - "learning_rate": 3.922147208048356e-06, - "loss": 0.9816, - "step": 1550 - }, - { - "epoch": 0.11656395611002555, - "grad_norm": 4.255201410531522, - "learning_rate": 3.922012640272325e-06, - "loss": 1.0814, - "step": 1551 - }, - { - "epoch": 0.11663911017586051, - "grad_norm": 1.7879844649235241, - "learning_rate": 3.921877958609685e-06, - "loss": 1.041, - "step": 1552 - }, - { - "epoch": 0.11671426424169548, - "grad_norm": 4.833420500572195, - "learning_rate": 3.9217431630684174e-06, - "loss": 1.1378, - "step": 1553 - }, - { - "epoch": 0.11678941830753044, - "grad_norm": 2.4950805900263657, - "learning_rate": 3.921608253656508e-06, - "loss": 1.0143, - "step": 1554 - }, - { - "epoch": 0.1168645723733654, - "grad_norm": 1.9735216112251048, - "learning_rate": 3.921473230381951e-06, - "loss": 0.9609, - "step": 1555 - }, - { - "epoch": 0.11693972643920036, - "grad_norm": 1.8451634178051473, - "learning_rate": 3.921338093252748e-06, - "loss": 0.9754, - "step": 1556 - }, - { - "epoch": 0.11701488050503532, - "grad_norm": 1.5446185985017942, - "learning_rate": 3.921202842276906e-06, - "loss": 0.999, - "step": 1557 - }, - { - "epoch": 0.11709003457087028, - "grad_norm": 2.1405474794927795, - "learning_rate": 3.921067477462437e-06, - "loss": 0.9512, - "step": 1558 - }, - { - "epoch": 0.11716518863670525, - "grad_norm": 0.815409732049048, - "learning_rate": 3.920931998817365e-06, - "loss": 0.8928, - "step": 1559 - }, - { - "epoch": 0.1172403427025402, - "grad_norm": 1.3889247057852918, - "learning_rate": 3.920796406349717e-06, - "loss": 1.059, - "step": 1560 - }, - { - "epoch": 0.11731549676837517, - "grad_norm": 1.8028881796777168, - "learning_rate": 3.920660700067525e-06, - "loss": 1.0452, - "step": 1561 - }, - { - "epoch": 0.11739065083421013, - "grad_norm": 2.8625985407630274, - "learning_rate": 3.920524879978833e-06, - "loss": 1.0195, - "step": 1562 - }, - { - "epoch": 0.11746580490004509, - "grad_norm": 2.0300197690555497, - "learning_rate": 3.920388946091687e-06, - "loss": 0.9345, - "step": 1563 - }, - { - "epoch": 0.11754095896588006, - "grad_norm": 1.5632545937892048, - "learning_rate": 3.920252898414143e-06, - "loss": 1.0962, - "step": 1564 - }, - { - "epoch": 0.11761611303171501, - "grad_norm": 3.0799293457943886, - "learning_rate": 3.920116736954261e-06, - "loss": 0.9442, - "step": 1565 - }, - { - "epoch": 0.11769126709754998, - "grad_norm": 1.6592547673060212, - "learning_rate": 3.91998046172011e-06, - "loss": 0.9614, - "step": 1566 - }, - { - "epoch": 0.11776642116338494, - "grad_norm": 3.1014130601986416, - "learning_rate": 3.9198440727197645e-06, - "loss": 1.065, - "step": 1567 - }, - { - "epoch": 0.1178415752292199, - "grad_norm": 2.0272821258998865, - "learning_rate": 3.919707569961306e-06, - "loss": 1.0242, - "step": 1568 - }, - { - "epoch": 0.11791672929505487, - "grad_norm": 1.6141140245483818, - "learning_rate": 3.9195709534528235e-06, - "loss": 1.0901, - "step": 1569 - }, - { - "epoch": 0.11799188336088982, - "grad_norm": 2.280611967978404, - "learning_rate": 3.919434223202411e-06, - "loss": 0.9718, - "step": 1570 - }, - { - "epoch": 0.11806703742672478, - "grad_norm": 1.7504192939901426, - "learning_rate": 3.919297379218171e-06, - "loss": 1.1077, - "step": 1571 - }, - { - "epoch": 0.11814219149255975, - "grad_norm": 2.7419843027846693, - "learning_rate": 3.919160421508211e-06, - "loss": 1.0183, - "step": 1572 - }, - { - "epoch": 0.1182173455583947, - "grad_norm": 2.0278506665070473, - "learning_rate": 3.919023350080648e-06, - "loss": 1.075, - "step": 1573 - }, - { - "epoch": 0.11829249962422968, - "grad_norm": 1.7040553920142192, - "learning_rate": 3.918886164943603e-06, - "loss": 1.0759, - "step": 1574 - }, - { - "epoch": 0.11836765369006463, - "grad_norm": 1.8207515465403636, - "learning_rate": 3.918748866105204e-06, - "loss": 1.0874, - "step": 1575 - }, - { - "epoch": 0.11844280775589959, - "grad_norm": 2.536619918867028, - "learning_rate": 3.918611453573589e-06, - "loss": 0.9622, - "step": 1576 - }, - { - "epoch": 0.11851796182173456, - "grad_norm": 1.8036931588990823, - "learning_rate": 3.918473927356896e-06, - "loss": 0.9792, - "step": 1577 - }, - { - "epoch": 0.11859311588756952, - "grad_norm": 1.8505071537690723, - "learning_rate": 3.918336287463279e-06, - "loss": 0.9664, - "step": 1578 - }, - { - "epoch": 0.11866826995340447, - "grad_norm": 2.118013227974394, - "learning_rate": 3.9181985339008895e-06, - "loss": 1.1197, - "step": 1579 - }, - { - "epoch": 0.11874342401923944, - "grad_norm": 2.648676768609706, - "learning_rate": 3.918060666677892e-06, - "loss": 1.0673, - "step": 1580 - }, - { - "epoch": 0.1188185780850744, - "grad_norm": 0.8073935700776395, - "learning_rate": 3.9179226858024555e-06, - "loss": 0.885, - "step": 1581 - }, - { - "epoch": 0.11889373215090937, - "grad_norm": 1.5593052819822046, - "learning_rate": 3.917784591282756e-06, - "loss": 0.9423, - "step": 1582 - }, - { - "epoch": 0.11896888621674433, - "grad_norm": 2.5623404290362517, - "learning_rate": 3.917646383126975e-06, - "loss": 0.9956, - "step": 1583 - }, - { - "epoch": 0.11904404028257928, - "grad_norm": 2.045896456405664, - "learning_rate": 3.917508061343303e-06, - "loss": 1.1131, - "step": 1584 - }, - { - "epoch": 0.11911919434841425, - "grad_norm": 2.4753510312921065, - "learning_rate": 3.917369625939936e-06, - "loss": 1.0729, - "step": 1585 - }, - { - "epoch": 0.11919434841424921, - "grad_norm": 1.7473119799927046, - "learning_rate": 3.917231076925076e-06, - "loss": 1.0212, - "step": 1586 - }, - { - "epoch": 0.11926950248008417, - "grad_norm": 1.8696887130516588, - "learning_rate": 3.917092414306933e-06, - "loss": 1.042, - "step": 1587 - }, - { - "epoch": 0.11934465654591914, - "grad_norm": 2.0034001772874097, - "learning_rate": 3.916953638093725e-06, - "loss": 0.9492, - "step": 1588 - }, - { - "epoch": 0.1194198106117541, - "grad_norm": 6.318449030952001, - "learning_rate": 3.9168147482936715e-06, - "loss": 1.0109, - "step": 1589 - }, - { - "epoch": 0.11949496467758906, - "grad_norm": 2.1080420909856006, - "learning_rate": 3.916675744915005e-06, - "loss": 0.9599, - "step": 1590 - }, - { - "epoch": 0.11957011874342402, - "grad_norm": 1.8162245792886158, - "learning_rate": 3.916536627965961e-06, - "loss": 0.9304, - "step": 1591 - }, - { - "epoch": 0.11964527280925898, - "grad_norm": 1.8633067131301935, - "learning_rate": 3.916397397454783e-06, - "loss": 1.0036, - "step": 1592 - }, - { - "epoch": 0.11972042687509395, - "grad_norm": 1.6207742031396188, - "learning_rate": 3.916258053389721e-06, - "loss": 0.9791, - "step": 1593 - }, - { - "epoch": 0.1197955809409289, - "grad_norm": 3.6308294318628147, - "learning_rate": 3.916118595779031e-06, - "loss": 1.0042, - "step": 1594 - }, - { - "epoch": 0.11987073500676386, - "grad_norm": 1.7580994096365623, - "learning_rate": 3.915979024630977e-06, - "loss": 1.0208, - "step": 1595 - }, - { - "epoch": 0.11994588907259883, - "grad_norm": 2.242936278466572, - "learning_rate": 3.91583933995383e-06, - "loss": 1.0314, - "step": 1596 - }, - { - "epoch": 0.12002104313843379, - "grad_norm": 1.7404761012715142, - "learning_rate": 3.915699541755865e-06, - "loss": 1.0088, - "step": 1597 - }, - { - "epoch": 0.12009619720426876, - "grad_norm": 2.3226843633660064, - "learning_rate": 3.915559630045367e-06, - "loss": 0.981, - "step": 1598 - }, - { - "epoch": 0.12017135127010371, - "grad_norm": 2.232142687116344, - "learning_rate": 3.9154196048306244e-06, - "loss": 1.0765, - "step": 1599 - }, - { - "epoch": 0.12024650533593867, - "grad_norm": 1.827768731247848, - "learning_rate": 3.915279466119937e-06, - "loss": 1.0457, - "step": 1600 - }, - { - "epoch": 0.12032165940177364, - "grad_norm": 1.6400751554899908, - "learning_rate": 3.915139213921606e-06, - "loss": 0.9866, - "step": 1601 - }, - { - "epoch": 0.1203968134676086, - "grad_norm": 1.953320851996757, - "learning_rate": 3.914998848243944e-06, - "loss": 0.9318, - "step": 1602 - }, - { - "epoch": 0.12047196753344355, - "grad_norm": 2.2668513694942978, - "learning_rate": 3.914858369095267e-06, - "loss": 1.0299, - "step": 1603 - }, - { - "epoch": 0.12054712159927852, - "grad_norm": 2.235870607709705, - "learning_rate": 3.914717776483899e-06, - "loss": 1.0066, - "step": 1604 - }, - { - "epoch": 0.12062227566511348, - "grad_norm": 2.4317687709394087, - "learning_rate": 3.9145770704181715e-06, - "loss": 1.0672, - "step": 1605 - }, - { - "epoch": 0.12069742973094845, - "grad_norm": 2.0285790121869307, - "learning_rate": 3.9144362509064194e-06, - "loss": 0.9968, - "step": 1606 - }, - { - "epoch": 0.1207725837967834, - "grad_norm": 1.4447663796138799, - "learning_rate": 3.91429531795699e-06, - "loss": 0.9496, - "step": 1607 - }, - { - "epoch": 0.12084773786261836, - "grad_norm": 2.0639381018901313, - "learning_rate": 3.9141542715782325e-06, - "loss": 0.9242, - "step": 1608 - }, - { - "epoch": 0.12092289192845333, - "grad_norm": 1.7781841596741161, - "learning_rate": 3.9140131117785045e-06, - "loss": 0.9779, - "step": 1609 - }, - { - "epoch": 0.12099804599428829, - "grad_norm": 1.8303257663709103, - "learning_rate": 3.91387183856617e-06, - "loss": 1.0182, - "step": 1610 - }, - { - "epoch": 0.12107320006012326, - "grad_norm": 1.6683494258540195, - "learning_rate": 3.913730451949601e-06, - "loss": 0.9682, - "step": 1611 - }, - { - "epoch": 0.12114835412595822, - "grad_norm": 2.0246399359187763, - "learning_rate": 3.913588951937174e-06, - "loss": 0.9375, - "step": 1612 - }, - { - "epoch": 0.12122350819179317, - "grad_norm": 0.7449704646970067, - "learning_rate": 3.913447338537274e-06, - "loss": 0.946, - "step": 1613 - }, - { - "epoch": 0.12129866225762814, - "grad_norm": 2.044251019489136, - "learning_rate": 3.913305611758292e-06, - "loss": 0.9448, - "step": 1614 - }, - { - "epoch": 0.1213738163234631, - "grad_norm": 1.7229758591114772, - "learning_rate": 3.913163771608627e-06, - "loss": 0.9335, - "step": 1615 - }, - { - "epoch": 0.12144897038929806, - "grad_norm": 1.5149799035882843, - "learning_rate": 3.913021818096682e-06, - "loss": 1.0425, - "step": 1616 - }, - { - "epoch": 0.12152412445513303, - "grad_norm": 1.4170228099077404, - "learning_rate": 3.912879751230868e-06, - "loss": 0.9321, - "step": 1617 - }, - { - "epoch": 0.12159927852096798, - "grad_norm": 2.0123936974545233, - "learning_rate": 3.9127375710196044e-06, - "loss": 1.0297, - "step": 1618 - }, - { - "epoch": 0.12167443258680295, - "grad_norm": 1.9812583177073038, - "learning_rate": 3.912595277471316e-06, - "loss": 0.9247, - "step": 1619 - }, - { - "epoch": 0.12174958665263791, - "grad_norm": 4.952706207958295, - "learning_rate": 3.912452870594433e-06, - "loss": 1.0416, - "step": 1620 - }, - { - "epoch": 0.12182474071847287, - "grad_norm": 1.5705810620878857, - "learning_rate": 3.912310350397394e-06, - "loss": 1.0291, - "step": 1621 - }, - { - "epoch": 0.12189989478430784, - "grad_norm": 1.8614215006828372, - "learning_rate": 3.912167716888644e-06, - "loss": 1.1328, - "step": 1622 - }, - { - "epoch": 0.12197504885014279, - "grad_norm": 1.8224128099471917, - "learning_rate": 3.912024970076636e-06, - "loss": 1.0264, - "step": 1623 - }, - { - "epoch": 0.12205020291597775, - "grad_norm": 1.5353021528103823, - "learning_rate": 3.911882109969825e-06, - "loss": 1.0499, - "step": 1624 - }, - { - "epoch": 0.12212535698181272, - "grad_norm": 2.02971426789779, - "learning_rate": 3.9117391365766785e-06, - "loss": 1.0066, - "step": 1625 - }, - { - "epoch": 0.12220051104764768, - "grad_norm": 1.8120563227129447, - "learning_rate": 3.9115960499056674e-06, - "loss": 1.0321, - "step": 1626 - }, - { - "epoch": 0.12227566511348265, - "grad_norm": 1.823704126211189, - "learning_rate": 3.911452849965271e-06, - "loss": 1.0181, - "step": 1627 - }, - { - "epoch": 0.1223508191793176, - "grad_norm": 5.292729829132548, - "learning_rate": 3.911309536763974e-06, - "loss": 0.9705, - "step": 1628 - }, - { - "epoch": 0.12242597324515256, - "grad_norm": 4.357080436813317, - "learning_rate": 3.911166110310267e-06, - "loss": 1.0174, - "step": 1629 - }, - { - "epoch": 0.12250112731098753, - "grad_norm": 1.7094402293812874, - "learning_rate": 3.91102257061265e-06, - "loss": 1.0963, - "step": 1630 - }, - { - "epoch": 0.12257628137682249, - "grad_norm": 1.7832850158539337, - "learning_rate": 3.9108789176796285e-06, - "loss": 1.0179, - "step": 1631 - }, - { - "epoch": 0.12265143544265744, - "grad_norm": 2.15198515243088, - "learning_rate": 3.910735151519713e-06, - "loss": 1.0614, - "step": 1632 - }, - { - "epoch": 0.12272658950849241, - "grad_norm": 1.8205927031481328, - "learning_rate": 3.910591272141424e-06, - "loss": 0.9567, - "step": 1633 - }, - { - "epoch": 0.12280174357432737, - "grad_norm": 1.8329600113015, - "learning_rate": 3.910447279553285e-06, - "loss": 1.0269, - "step": 1634 - }, - { - "epoch": 0.12287689764016234, - "grad_norm": 1.8845944657953848, - "learning_rate": 3.91030317376383e-06, - "loss": 1.0348, - "step": 1635 - }, - { - "epoch": 0.1229520517059973, - "grad_norm": 0.6901600917012282, - "learning_rate": 3.9101589547815965e-06, - "loss": 0.8229, - "step": 1636 - }, - { - "epoch": 0.12302720577183225, - "grad_norm": 1.6562648268701763, - "learning_rate": 3.91001462261513e-06, - "loss": 1.0513, - "step": 1637 - }, - { - "epoch": 0.12310235983766722, - "grad_norm": 0.7558168699945264, - "learning_rate": 3.909870177272984e-06, - "loss": 0.845, - "step": 1638 - }, - { - "epoch": 0.12317751390350218, - "grad_norm": 7.04051202039151, - "learning_rate": 3.909725618763716e-06, - "loss": 0.918, - "step": 1639 - }, - { - "epoch": 0.12325266796933713, - "grad_norm": 1.6045268096253165, - "learning_rate": 3.909580947095892e-06, - "loss": 0.9708, - "step": 1640 - }, - { - "epoch": 0.1233278220351721, - "grad_norm": 1.4819048757771367, - "learning_rate": 3.909436162278085e-06, - "loss": 1.0244, - "step": 1641 - }, - { - "epoch": 0.12340297610100706, - "grad_norm": 2.4597716461200028, - "learning_rate": 3.9092912643188745e-06, - "loss": 1.0277, - "step": 1642 - }, - { - "epoch": 0.12347813016684203, - "grad_norm": 2.913889371009046, - "learning_rate": 3.909146253226844e-06, - "loss": 1.0809, - "step": 1643 - }, - { - "epoch": 0.12355328423267699, - "grad_norm": 2.0376728335602943, - "learning_rate": 3.909001129010588e-06, - "loss": 1.069, - "step": 1644 - }, - { - "epoch": 0.12362843829851194, - "grad_norm": 1.6767209848259819, - "learning_rate": 3.908855891678706e-06, - "loss": 1.0601, - "step": 1645 - }, - { - "epoch": 0.12370359236434691, - "grad_norm": 2.4920176863739227, - "learning_rate": 3.908710541239802e-06, - "loss": 1.1196, - "step": 1646 - }, - { - "epoch": 0.12377874643018187, - "grad_norm": 2.279494040884507, - "learning_rate": 3.90856507770249e-06, - "loss": 0.9601, - "step": 1647 - }, - { - "epoch": 0.12385390049601683, - "grad_norm": 1.4189968112912417, - "learning_rate": 3.908419501075388e-06, - "loss": 0.9205, - "step": 1648 - }, - { - "epoch": 0.1239290545618518, - "grad_norm": 1.7257506934690512, - "learning_rate": 3.908273811367123e-06, - "loss": 0.9742, - "step": 1649 - }, - { - "epoch": 0.12400420862768675, - "grad_norm": 1.5769271837113028, - "learning_rate": 3.908128008586328e-06, - "loss": 0.9334, - "step": 1650 - }, - { - "epoch": 0.12407936269352172, - "grad_norm": 2.2558038878119113, - "learning_rate": 3.90798209274164e-06, - "loss": 0.8855, - "step": 1651 - }, - { - "epoch": 0.12415451675935668, - "grad_norm": 1.9390916746835742, - "learning_rate": 3.907836063841709e-06, - "loss": 0.9773, - "step": 1652 - }, - { - "epoch": 0.12422967082519164, - "grad_norm": 1.6741836152548772, - "learning_rate": 3.907689921895184e-06, - "loss": 0.9933, - "step": 1653 - }, - { - "epoch": 0.12430482489102661, - "grad_norm": 6.5920711748484315, - "learning_rate": 3.9075436669107265e-06, - "loss": 0.9789, - "step": 1654 - }, - { - "epoch": 0.12437997895686156, - "grad_norm": 5.997675445394742, - "learning_rate": 3.907397298897003e-06, - "loss": 0.9628, - "step": 1655 - }, - { - "epoch": 0.12445513302269653, - "grad_norm": 1.8502117654380354, - "learning_rate": 3.907250817862685e-06, - "loss": 0.9535, - "step": 1656 - }, - { - "epoch": 0.12453028708853149, - "grad_norm": 1.5560349802287334, - "learning_rate": 3.907104223816453e-06, - "loss": 0.9796, - "step": 1657 - }, - { - "epoch": 0.12460544115436645, - "grad_norm": 1.6580741481697547, - "learning_rate": 3.906957516766993e-06, - "loss": 1.0003, - "step": 1658 - }, - { - "epoch": 0.12468059522020142, - "grad_norm": 1.4873058096166976, - "learning_rate": 3.906810696722997e-06, - "loss": 1.0454, - "step": 1659 - }, - { - "epoch": 0.12475574928603637, - "grad_norm": 2.330842755211927, - "learning_rate": 3.906663763693167e-06, - "loss": 0.9602, - "step": 1660 - }, - { - "epoch": 0.12483090335187133, - "grad_norm": 1.5720114555815081, - "learning_rate": 3.906516717686207e-06, - "loss": 1.0715, - "step": 1661 - }, - { - "epoch": 0.1249060574177063, - "grad_norm": 5.890825101251535, - "learning_rate": 3.906369558710831e-06, - "loss": 0.9676, - "step": 1662 - }, - { - "epoch": 0.12498121148354126, - "grad_norm": 1.9640783559664452, - "learning_rate": 3.906222286775759e-06, - "loss": 0.9607, - "step": 1663 - }, - { - "epoch": 0.1250563655493762, - "grad_norm": 1.6792474924454701, - "learning_rate": 3.906074901889717e-06, - "loss": 0.9961, - "step": 1664 - }, - { - "epoch": 0.12513151961521118, - "grad_norm": 1.5826110530895858, - "learning_rate": 3.905927404061439e-06, - "loss": 1.0631, - "step": 1665 - }, - { - "epoch": 0.12520667368104615, - "grad_norm": 2.2439575651851813, - "learning_rate": 3.905779793299662e-06, - "loss": 0.999, - "step": 1666 - }, - { - "epoch": 0.1252818277468811, - "grad_norm": 0.9657344953559841, - "learning_rate": 3.905632069613136e-06, - "loss": 0.8753, - "step": 1667 - }, - { - "epoch": 0.12535698181271607, - "grad_norm": 2.2022158864047032, - "learning_rate": 3.9054842330106125e-06, - "loss": 1.0438, - "step": 1668 - }, - { - "epoch": 0.12543213587855104, - "grad_norm": 1.9124427637249501, - "learning_rate": 3.9053362835008516e-06, - "loss": 1.0499, - "step": 1669 - }, - { - "epoch": 0.12550728994438598, - "grad_norm": 2.2346372650521746, - "learning_rate": 3.9051882210926195e-06, - "loss": 0.8982, - "step": 1670 - }, - { - "epoch": 0.12558244401022095, - "grad_norm": 3.574930155678703, - "learning_rate": 3.90504004579469e-06, - "loss": 1.0022, - "step": 1671 - }, - { - "epoch": 0.12565759807605592, - "grad_norm": 1.7569819543961545, - "learning_rate": 3.904891757615843e-06, - "loss": 1.0118, - "step": 1672 - }, - { - "epoch": 0.12573275214189086, - "grad_norm": 2.2885619778282353, - "learning_rate": 3.904743356564865e-06, - "loss": 1.0073, - "step": 1673 - }, - { - "epoch": 0.12580790620772583, - "grad_norm": 1.7805788214531106, - "learning_rate": 3.90459484265055e-06, - "loss": 1.039, - "step": 1674 - }, - { - "epoch": 0.1258830602735608, - "grad_norm": 1.8271012659472383, - "learning_rate": 3.904446215881697e-06, - "loss": 1.015, - "step": 1675 - }, - { - "epoch": 0.12595821433939577, - "grad_norm": 1.2873472882414452, - "learning_rate": 3.9042974762671125e-06, - "loss": 1.0278, - "step": 1676 - }, - { - "epoch": 0.12603336840523072, - "grad_norm": 1.8683495516701236, - "learning_rate": 3.904148623815611e-06, - "loss": 1.0874, - "step": 1677 - }, - { - "epoch": 0.1261085224710657, - "grad_norm": 1.8177258364063353, - "learning_rate": 3.903999658536012e-06, - "loss": 1.0307, - "step": 1678 - }, - { - "epoch": 0.12618367653690066, - "grad_norm": 1.376417123346926, - "learning_rate": 3.903850580437142e-06, - "loss": 1.1225, - "step": 1679 - }, - { - "epoch": 0.1262588306027356, - "grad_norm": 1.8776337246060735, - "learning_rate": 3.903701389527836e-06, - "loss": 1.0524, - "step": 1680 - }, - { - "epoch": 0.12633398466857057, - "grad_norm": 2.653158026356022, - "learning_rate": 3.903552085816932e-06, - "loss": 1.0063, - "step": 1681 - }, - { - "epoch": 0.12640913873440554, - "grad_norm": 2.4538974396244755, - "learning_rate": 3.903402669313278e-06, - "loss": 1.1229, - "step": 1682 - }, - { - "epoch": 0.12648429280024048, - "grad_norm": 1.847558049179151, - "learning_rate": 3.903253140025726e-06, - "loss": 0.9623, - "step": 1683 - }, - { - "epoch": 0.12655944686607545, - "grad_norm": 1.711617182369511, - "learning_rate": 3.9031034979631385e-06, - "loss": 0.9882, - "step": 1684 - }, - { - "epoch": 0.12663460093191042, - "grad_norm": 1.8815094803716492, - "learning_rate": 3.902953743134381e-06, - "loss": 1.0196, - "step": 1685 - }, - { - "epoch": 0.12670975499774537, - "grad_norm": 2.391441754211848, - "learning_rate": 3.9028038755483275e-06, - "loss": 1.0192, - "step": 1686 - }, - { - "epoch": 0.12678490906358034, - "grad_norm": 1.6558835325933385, - "learning_rate": 3.902653895213858e-06, - "loss": 1.1272, - "step": 1687 - }, - { - "epoch": 0.1268600631294153, - "grad_norm": 2.284617199041184, - "learning_rate": 3.90250380213986e-06, - "loss": 1.0004, - "step": 1688 - }, - { - "epoch": 0.12693521719525025, - "grad_norm": 2.0164460868107543, - "learning_rate": 3.902353596335225e-06, - "loss": 1.0105, - "step": 1689 - }, - { - "epoch": 0.12701037126108522, - "grad_norm": 2.110040969270097, - "learning_rate": 3.902203277808856e-06, - "loss": 1.0483, - "step": 1690 - }, - { - "epoch": 0.1270855253269202, - "grad_norm": 1.8809505135548195, - "learning_rate": 3.902052846569659e-06, - "loss": 1.0805, - "step": 1691 - }, - { - "epoch": 0.12716067939275516, - "grad_norm": 2.083003577520015, - "learning_rate": 3.901902302626547e-06, - "loss": 0.9701, - "step": 1692 - }, - { - "epoch": 0.1272358334585901, - "grad_norm": 1.7575818397925957, - "learning_rate": 3.901751645988441e-06, - "loss": 0.9959, - "step": 1693 - }, - { - "epoch": 0.12731098752442507, - "grad_norm": 3.735370838389807, - "learning_rate": 3.901600876664267e-06, - "loss": 1.0419, - "step": 1694 - }, - { - "epoch": 0.12738614159026004, - "grad_norm": 1.992698633782217, - "learning_rate": 3.9014499946629595e-06, - "loss": 1.0648, - "step": 1695 - }, - { - "epoch": 0.127461295656095, - "grad_norm": 2.336913858454337, - "learning_rate": 3.901298999993459e-06, - "loss": 0.982, - "step": 1696 - }, - { - "epoch": 0.12753644972192996, - "grad_norm": 1.6641943794251848, - "learning_rate": 3.901147892664713e-06, - "loss": 1.0511, - "step": 1697 - }, - { - "epoch": 0.12761160378776493, - "grad_norm": 2.4237117570335918, - "learning_rate": 3.9009966726856725e-06, - "loss": 1.01, - "step": 1698 - }, - { - "epoch": 0.12768675785359987, - "grad_norm": 4.6168954308268075, - "learning_rate": 3.900845340065301e-06, - "loss": 0.9983, - "step": 1699 - }, - { - "epoch": 0.12776191191943484, - "grad_norm": 2.1282949147332917, - "learning_rate": 3.900693894812564e-06, - "loss": 1.0771, - "step": 1700 - }, - { - "epoch": 0.1278370659852698, - "grad_norm": 2.057496098529737, - "learning_rate": 3.900542336936436e-06, - "loss": 0.9968, - "step": 1701 - }, - { - "epoch": 0.12791222005110475, - "grad_norm": 2.9489322346339004, - "learning_rate": 3.900390666445896e-06, - "loss": 1.0376, - "step": 1702 - }, - { - "epoch": 0.12798737411693972, - "grad_norm": 5.2138983737536595, - "learning_rate": 3.900238883349932e-06, - "loss": 1.101, - "step": 1703 - }, - { - "epoch": 0.1280625281827747, - "grad_norm": 1.874595089742924, - "learning_rate": 3.900086987657539e-06, - "loss": 1.0992, - "step": 1704 - }, - { - "epoch": 0.12813768224860966, - "grad_norm": 1.7890155236352712, - "learning_rate": 3.899934979377714e-06, - "loss": 1.0381, - "step": 1705 - }, - { - "epoch": 0.1282128363144446, - "grad_norm": 1.4680427309308361, - "learning_rate": 3.899782858519467e-06, - "loss": 1.0903, - "step": 1706 - }, - { - "epoch": 0.12828799038027958, - "grad_norm": 1.7735150021998556, - "learning_rate": 3.899630625091811e-06, - "loss": 1.0758, - "step": 1707 - }, - { - "epoch": 0.12836314444611455, - "grad_norm": 1.6694523736924465, - "learning_rate": 3.899478279103767e-06, - "loss": 0.9597, - "step": 1708 - }, - { - "epoch": 0.1284382985119495, - "grad_norm": 1.7913750108007227, - "learning_rate": 3.89932582056436e-06, - "loss": 1.0812, - "step": 1709 - }, - { - "epoch": 0.12851345257778446, - "grad_norm": 1.509541760190783, - "learning_rate": 3.899173249482626e-06, - "loss": 1.0452, - "step": 1710 - }, - { - "epoch": 0.12858860664361943, - "grad_norm": 1.6749383544965246, - "learning_rate": 3.899020565867604e-06, - "loss": 1.0781, - "step": 1711 - }, - { - "epoch": 0.12866376070945437, - "grad_norm": 1.6694712962344713, - "learning_rate": 3.898867769728342e-06, - "loss": 0.9945, - "step": 1712 - }, - { - "epoch": 0.12873891477528934, - "grad_norm": 1.8742568768673677, - "learning_rate": 3.8987148610738935e-06, - "loss": 1.0023, - "step": 1713 - }, - { - "epoch": 0.1288140688411243, - "grad_norm": 2.164403564899126, - "learning_rate": 3.898561839913319e-06, - "loss": 1.0268, - "step": 1714 - }, - { - "epoch": 0.12888922290695926, - "grad_norm": 2.5034029688422037, - "learning_rate": 3.898408706255685e-06, - "loss": 1.1103, - "step": 1715 - }, - { - "epoch": 0.12896437697279423, - "grad_norm": 2.279336937141229, - "learning_rate": 3.898255460110066e-06, - "loss": 0.9734, - "step": 1716 - }, - { - "epoch": 0.1290395310386292, - "grad_norm": 1.9825110504689054, - "learning_rate": 3.898102101485542e-06, - "loss": 1.0036, - "step": 1717 - }, - { - "epoch": 0.12911468510446414, - "grad_norm": 2.8614676067219156, - "learning_rate": 3.8979486303912e-06, - "loss": 0.9495, - "step": 1718 - }, - { - "epoch": 0.1291898391702991, - "grad_norm": 2.6872538964011468, - "learning_rate": 3.8977950468361335e-06, - "loss": 1.0774, - "step": 1719 - }, - { - "epoch": 0.12926499323613408, - "grad_norm": 1.6911104221080815, - "learning_rate": 3.897641350829444e-06, - "loss": 0.924, - "step": 1720 - }, - { - "epoch": 0.12934014730196905, - "grad_norm": 2.2679771052622857, - "learning_rate": 3.8974875423802385e-06, - "loss": 1.1826, - "step": 1721 - }, - { - "epoch": 0.129415301367804, - "grad_norm": 2.1405127280092335, - "learning_rate": 3.897333621497629e-06, - "loss": 1.108, - "step": 1722 - }, - { - "epoch": 0.12949045543363896, - "grad_norm": 1.9012685631642714, - "learning_rate": 3.897179588190737e-06, - "loss": 1.0344, - "step": 1723 - }, - { - "epoch": 0.12956560949947393, - "grad_norm": 1.740726148861328, - "learning_rate": 3.89702544246869e-06, - "loss": 0.9575, - "step": 1724 - }, - { - "epoch": 0.12964076356530888, - "grad_norm": 1.743027967595211, - "learning_rate": 3.896871184340622e-06, - "loss": 1.0447, - "step": 1725 - }, - { - "epoch": 0.12971591763114385, - "grad_norm": 2.1519258789287607, - "learning_rate": 3.896716813815672e-06, - "loss": 1.0734, - "step": 1726 - }, - { - "epoch": 0.12979107169697882, - "grad_norm": 2.3668086689895844, - "learning_rate": 3.8965623309029876e-06, - "loss": 1.0839, - "step": 1727 - }, - { - "epoch": 0.12986622576281376, - "grad_norm": 1.6531114170259185, - "learning_rate": 3.896407735611722e-06, - "loss": 1.0337, - "step": 1728 - }, - { - "epoch": 0.12994137982864873, - "grad_norm": 2.0127204253042725, - "learning_rate": 3.896253027951038e-06, - "loss": 1.0136, - "step": 1729 - }, - { - "epoch": 0.1300165338944837, - "grad_norm": 2.954866733289172, - "learning_rate": 3.8960982079301e-06, - "loss": 0.9778, - "step": 1730 - }, - { - "epoch": 0.13009168796031864, - "grad_norm": 1.503711717582844, - "learning_rate": 3.895943275558083e-06, - "loss": 1.0625, - "step": 1731 - }, - { - "epoch": 0.1301668420261536, - "grad_norm": 2.1384418799346974, - "learning_rate": 3.895788230844166e-06, - "loss": 0.9532, - "step": 1732 - }, - { - "epoch": 0.13024199609198858, - "grad_norm": 1.717850883433962, - "learning_rate": 3.895633073797537e-06, - "loss": 0.965, - "step": 1733 - }, - { - "epoch": 0.13031715015782352, - "grad_norm": 1.8042346997314644, - "learning_rate": 3.89547780442739e-06, - "loss": 1.0842, - "step": 1734 - }, - { - "epoch": 0.1303923042236585, - "grad_norm": 1.7495602327647652, - "learning_rate": 3.895322422742924e-06, - "loss": 0.9621, - "step": 1735 - }, - { - "epoch": 0.13046745828949347, - "grad_norm": 2.9192829568539147, - "learning_rate": 3.895166928753348e-06, - "loss": 1.056, - "step": 1736 - }, - { - "epoch": 0.13054261235532844, - "grad_norm": 2.8915365895267136, - "learning_rate": 3.895011322467874e-06, - "loss": 1.0442, - "step": 1737 - }, - { - "epoch": 0.13061776642116338, - "grad_norm": 10.437303735406799, - "learning_rate": 3.894855603895723e-06, - "loss": 0.986, - "step": 1738 - }, - { - "epoch": 0.13069292048699835, - "grad_norm": 2.9008548660450804, - "learning_rate": 3.89469977304612e-06, - "loss": 1.0206, - "step": 1739 - }, - { - "epoch": 0.13076807455283332, - "grad_norm": 2.1146205529255657, - "learning_rate": 3.894543829928302e-06, - "loss": 0.9753, - "step": 1740 - }, - { - "epoch": 0.13084322861866826, - "grad_norm": 2.490035033381712, - "learning_rate": 3.894387774551506e-06, - "loss": 1.0159, - "step": 1741 - }, - { - "epoch": 0.13091838268450323, - "grad_norm": 1.943727207356851, - "learning_rate": 3.894231606924981e-06, - "loss": 1.0186, - "step": 1742 - }, - { - "epoch": 0.1309935367503382, - "grad_norm": 1.8572834978193213, - "learning_rate": 3.89407532705798e-06, - "loss": 0.938, - "step": 1743 - }, - { - "epoch": 0.13106869081617314, - "grad_norm": 0.7763937123883732, - "learning_rate": 3.893918934959762e-06, - "loss": 0.8799, - "step": 1744 - }, - { - "epoch": 0.13114384488200811, - "grad_norm": 5.821004245281392, - "learning_rate": 3.893762430639596e-06, - "loss": 0.9595, - "step": 1745 - }, - { - "epoch": 0.13121899894784309, - "grad_norm": 1.648109584211136, - "learning_rate": 3.893605814106753e-06, - "loss": 1.0353, - "step": 1746 - }, - { - "epoch": 0.13129415301367803, - "grad_norm": 2.9903989381562743, - "learning_rate": 3.893449085370515e-06, - "loss": 1.05, - "step": 1747 - }, - { - "epoch": 0.131369307079513, - "grad_norm": 1.708962541713212, - "learning_rate": 3.893292244440168e-06, - "loss": 1.0753, - "step": 1748 - }, - { - "epoch": 0.13144446114534797, - "grad_norm": 1.8932961124580732, - "learning_rate": 3.893135291325006e-06, - "loss": 1.0784, - "step": 1749 - }, - { - "epoch": 0.13151961521118294, - "grad_norm": 1.8599028278932537, - "learning_rate": 3.892978226034329e-06, - "loss": 1.0291, - "step": 1750 - }, - { - "epoch": 0.13159476927701788, - "grad_norm": 2.062629695629224, - "learning_rate": 3.892821048577443e-06, - "loss": 0.9827, - "step": 1751 - }, - { - "epoch": 0.13166992334285285, - "grad_norm": 2.59978857647983, - "learning_rate": 3.892663758963661e-06, - "loss": 1.0704, - "step": 1752 - }, - { - "epoch": 0.13174507740868782, - "grad_norm": 1.769164596870255, - "learning_rate": 3.892506357202305e-06, - "loss": 0.9173, - "step": 1753 - }, - { - "epoch": 0.13182023147452276, - "grad_norm": 1.9451670764661944, - "learning_rate": 3.8923488433027e-06, - "loss": 1.0249, - "step": 1754 - }, - { - "epoch": 0.13189538554035773, - "grad_norm": 1.655972871447615, - "learning_rate": 3.89219121727418e-06, - "loss": 1.057, - "step": 1755 - }, - { - "epoch": 0.1319705396061927, - "grad_norm": 1.564305673572689, - "learning_rate": 3.892033479126084e-06, - "loss": 1.0059, - "step": 1756 - }, - { - "epoch": 0.13204569367202765, - "grad_norm": 1.6704705301863512, - "learning_rate": 3.89187562886776e-06, - "loss": 1.0631, - "step": 1757 - }, - { - "epoch": 0.13212084773786262, - "grad_norm": 1.5853640266465185, - "learning_rate": 3.89171766650856e-06, - "loss": 1.0627, - "step": 1758 - }, - { - "epoch": 0.1321960018036976, - "grad_norm": 2.0885876454709105, - "learning_rate": 3.891559592057845e-06, - "loss": 1.1199, - "step": 1759 - }, - { - "epoch": 0.13227115586953253, - "grad_norm": 2.314392861656101, - "learning_rate": 3.8914014055249805e-06, - "loss": 0.9749, - "step": 1760 - }, - { - "epoch": 0.1323463099353675, - "grad_norm": 1.6181972134221154, - "learning_rate": 3.89124310691934e-06, - "loss": 0.9303, - "step": 1761 - }, - { - "epoch": 0.13242146400120247, - "grad_norm": 1.9916930538754491, - "learning_rate": 3.891084696250304e-06, - "loss": 1.0804, - "step": 1762 - }, - { - "epoch": 0.1324966180670374, - "grad_norm": 2.7730637849280604, - "learning_rate": 3.890926173527258e-06, - "loss": 1.0245, - "step": 1763 - }, - { - "epoch": 0.13257177213287238, - "grad_norm": 1.6928638406383054, - "learning_rate": 3.8907675387595944e-06, - "loss": 0.9615, - "step": 1764 - }, - { - "epoch": 0.13264692619870735, - "grad_norm": 2.774947322311103, - "learning_rate": 3.890608791956714e-06, - "loss": 0.9711, - "step": 1765 - }, - { - "epoch": 0.13272208026454232, - "grad_norm": 2.0314624381849513, - "learning_rate": 3.890449933128025e-06, - "loss": 1.048, - "step": 1766 - }, - { - "epoch": 0.13279723433037727, - "grad_norm": 1.6057499919988165, - "learning_rate": 3.890290962282937e-06, - "loss": 0.9966, - "step": 1767 - }, - { - "epoch": 0.13287238839621224, - "grad_norm": 1.8960140128201328, - "learning_rate": 3.890131879430871e-06, - "loss": 0.9719, - "step": 1768 - }, - { - "epoch": 0.1329475424620472, - "grad_norm": 2.1500371353136063, - "learning_rate": 3.889972684581253e-06, - "loss": 0.9902, - "step": 1769 - }, - { - "epoch": 0.13302269652788215, - "grad_norm": 3.2729469969712173, - "learning_rate": 3.889813377743517e-06, - "loss": 1.0529, - "step": 1770 - }, - { - "epoch": 0.13309785059371712, - "grad_norm": 1.5775440015263311, - "learning_rate": 3.8896539589271016e-06, - "loss": 1.0699, - "step": 1771 - }, - { - "epoch": 0.1331730046595521, - "grad_norm": 2.619692204573679, - "learning_rate": 3.889494428141453e-06, - "loss": 1.0372, - "step": 1772 - }, - { - "epoch": 0.13324815872538703, - "grad_norm": 1.318537947252227, - "learning_rate": 3.889334785396024e-06, - "loss": 0.9713, - "step": 1773 - }, - { - "epoch": 0.133323312791222, - "grad_norm": 3.370618024125989, - "learning_rate": 3.8891750307002746e-06, - "loss": 1.064, - "step": 1774 - }, - { - "epoch": 0.13339846685705697, - "grad_norm": 1.7114959920066735, - "learning_rate": 3.889015164063671e-06, - "loss": 1.0327, - "step": 1775 - }, - { - "epoch": 0.13347362092289192, - "grad_norm": 2.1956978999240113, - "learning_rate": 3.888855185495685e-06, - "loss": 1.0335, - "step": 1776 - }, - { - "epoch": 0.1335487749887269, - "grad_norm": 2.3130581156500396, - "learning_rate": 3.8886950950057965e-06, - "loss": 0.9077, - "step": 1777 - }, - { - "epoch": 0.13362392905456186, - "grad_norm": 1.792191679865678, - "learning_rate": 3.888534892603491e-06, - "loss": 0.8907, - "step": 1778 - }, - { - "epoch": 0.1336990831203968, - "grad_norm": 1.8466781382198683, - "learning_rate": 3.888374578298261e-06, - "loss": 1.0615, - "step": 1779 - }, - { - "epoch": 0.13377423718623177, - "grad_norm": 1.9998260660832936, - "learning_rate": 3.888214152099607e-06, - "loss": 1.0551, - "step": 1780 - }, - { - "epoch": 0.13384939125206674, - "grad_norm": 1.6871191230734313, - "learning_rate": 3.888053614017034e-06, - "loss": 1.0049, - "step": 1781 - }, - { - "epoch": 0.1339245453179017, - "grad_norm": 1.4735843556945691, - "learning_rate": 3.887892964060054e-06, - "loss": 1.0189, - "step": 1782 - }, - { - "epoch": 0.13399969938373665, - "grad_norm": 2.145013362424973, - "learning_rate": 3.887732202238186e-06, - "loss": 0.9658, - "step": 1783 - }, - { - "epoch": 0.13407485344957162, - "grad_norm": 4.085189138117041, - "learning_rate": 3.887571328560958e-06, - "loss": 0.9908, - "step": 1784 - }, - { - "epoch": 0.1341500075154066, - "grad_norm": 1.9138904104917274, - "learning_rate": 3.8874103430379e-06, - "loss": 1.0157, - "step": 1785 - }, - { - "epoch": 0.13422516158124154, - "grad_norm": 2.3050728168694707, - "learning_rate": 3.887249245678552e-06, - "loss": 1.127, - "step": 1786 - }, - { - "epoch": 0.1343003156470765, - "grad_norm": 1.428783693892503, - "learning_rate": 3.887088036492459e-06, - "loss": 0.9983, - "step": 1787 - }, - { - "epoch": 0.13437546971291148, - "grad_norm": 1.8457850846757988, - "learning_rate": 3.886926715489173e-06, - "loss": 1.0267, - "step": 1788 - }, - { - "epoch": 0.13445062377874642, - "grad_norm": 2.075283545918358, - "learning_rate": 3.8867652826782555e-06, - "loss": 1.0579, - "step": 1789 - }, - { - "epoch": 0.1345257778445814, - "grad_norm": 1.9624711077075851, - "learning_rate": 3.886603738069269e-06, - "loss": 1.0209, - "step": 1790 - }, - { - "epoch": 0.13460093191041636, - "grad_norm": 2.472016599821632, - "learning_rate": 3.886442081671787e-06, - "loss": 1.0079, - "step": 1791 - }, - { - "epoch": 0.1346760859762513, - "grad_norm": 2.9396761686038353, - "learning_rate": 3.886280313495388e-06, - "loss": 0.9486, - "step": 1792 - }, - { - "epoch": 0.13475124004208627, - "grad_norm": 1.9539641751897103, - "learning_rate": 3.886118433549657e-06, - "loss": 1.0563, - "step": 1793 - }, - { - "epoch": 0.13482639410792124, - "grad_norm": 1.3923965485529763, - "learning_rate": 3.8859564418441865e-06, - "loss": 0.9969, - "step": 1794 - }, - { - "epoch": 0.1349015481737562, - "grad_norm": 1.5896790065279771, - "learning_rate": 3.885794338388575e-06, - "loss": 1.0008, - "step": 1795 - }, - { - "epoch": 0.13497670223959116, - "grad_norm": 4.229444723274715, - "learning_rate": 3.8856321231924275e-06, - "loss": 1.0788, - "step": 1796 - }, - { - "epoch": 0.13505185630542613, - "grad_norm": 1.6914832822248114, - "learning_rate": 3.885469796265357e-06, - "loss": 1.0686, - "step": 1797 - }, - { - "epoch": 0.1351270103712611, - "grad_norm": 1.8717264845685329, - "learning_rate": 3.885307357616981e-06, - "loss": 1.0797, - "step": 1798 - }, - { - "epoch": 0.13520216443709604, - "grad_norm": 2.149421161534165, - "learning_rate": 3.8851448072569245e-06, - "loss": 1.0057, - "step": 1799 - }, - { - "epoch": 0.135277318502931, - "grad_norm": 3.343252554995565, - "learning_rate": 3.884982145194819e-06, - "loss": 1.0123, - "step": 1800 - }, - { - "epoch": 0.13535247256876598, - "grad_norm": 1.3132529142773337, - "learning_rate": 3.8848193714403035e-06, - "loss": 1.0721, - "step": 1801 - }, - { - "epoch": 0.13542762663460092, - "grad_norm": 1.3552668088494402, - "learning_rate": 3.884656486003023e-06, - "loss": 1.0205, - "step": 1802 - }, - { - "epoch": 0.1355027807004359, - "grad_norm": 1.982726065312652, - "learning_rate": 3.8844934888926295e-06, - "loss": 1.0213, - "step": 1803 - }, - { - "epoch": 0.13557793476627086, - "grad_norm": 8.297663017250795, - "learning_rate": 3.884330380118779e-06, - "loss": 0.9759, - "step": 1804 - }, - { - "epoch": 0.1356530888321058, - "grad_norm": 1.8912644999289248, - "learning_rate": 3.884167159691139e-06, - "loss": 1.0042, - "step": 1805 - }, - { - "epoch": 0.13572824289794078, - "grad_norm": 5.386050729914262, - "learning_rate": 3.88400382761938e-06, - "loss": 0.9338, - "step": 1806 - }, - { - "epoch": 0.13580339696377575, - "grad_norm": 1.7931329656126853, - "learning_rate": 3.883840383913179e-06, - "loss": 1.0174, - "step": 1807 - }, - { - "epoch": 0.1358785510296107, - "grad_norm": 3.7045021554572752, - "learning_rate": 3.8836768285822225e-06, - "loss": 0.9232, - "step": 1808 - }, - { - "epoch": 0.13595370509544566, - "grad_norm": 1.7412987054173208, - "learning_rate": 3.8835131616362005e-06, - "loss": 1.0445, - "step": 1809 - }, - { - "epoch": 0.13602885916128063, - "grad_norm": 1.8291050167020575, - "learning_rate": 3.883349383084811e-06, - "loss": 1.1091, - "step": 1810 - }, - { - "epoch": 0.1361040132271156, - "grad_norm": 1.3772760107354352, - "learning_rate": 3.883185492937759e-06, - "loss": 1.0813, - "step": 1811 - }, - { - "epoch": 0.13617916729295054, - "grad_norm": 2.17505418556505, - "learning_rate": 3.883021491204755e-06, - "loss": 1.066, - "step": 1812 - }, - { - "epoch": 0.1362543213587855, - "grad_norm": 2.511593449197687, - "learning_rate": 3.8828573778955175e-06, - "loss": 1.1007, - "step": 1813 - }, - { - "epoch": 0.13632947542462048, - "grad_norm": 0.7598400324691899, - "learning_rate": 3.88269315301977e-06, - "loss": 0.8393, - "step": 1814 - }, - { - "epoch": 0.13640462949045543, - "grad_norm": 1.644970398030718, - "learning_rate": 3.882528816587244e-06, - "loss": 0.9759, - "step": 1815 - }, - { - "epoch": 0.1364797835562904, - "grad_norm": 1.7502937070426454, - "learning_rate": 3.882364368607677e-06, - "loss": 0.9708, - "step": 1816 - }, - { - "epoch": 0.13655493762212537, - "grad_norm": 2.0772414085769704, - "learning_rate": 3.882199809090813e-06, - "loss": 0.9782, - "step": 1817 - }, - { - "epoch": 0.1366300916879603, - "grad_norm": 2.1484535910263896, - "learning_rate": 3.8820351380464035e-06, - "loss": 1.1334, - "step": 1818 - }, - { - "epoch": 0.13670524575379528, - "grad_norm": 1.9418239949983287, - "learning_rate": 3.881870355484204e-06, - "loss": 1.0416, - "step": 1819 - }, - { - "epoch": 0.13678039981963025, - "grad_norm": 1.538216140322336, - "learning_rate": 3.88170546141398e-06, - "loss": 1.021, - "step": 1820 - }, - { - "epoch": 0.1368555538854652, - "grad_norm": 4.7179105908895655, - "learning_rate": 3.881540455845503e-06, - "loss": 1.0049, - "step": 1821 - }, - { - "epoch": 0.13693070795130016, - "grad_norm": 1.945378865408334, - "learning_rate": 3.881375338788549e-06, - "loss": 0.9565, - "step": 1822 - }, - { - "epoch": 0.13700586201713513, - "grad_norm": 2.3883904878530733, - "learning_rate": 3.881210110252901e-06, - "loss": 0.9935, - "step": 1823 - }, - { - "epoch": 0.13708101608297008, - "grad_norm": 1.6806202981981677, - "learning_rate": 3.881044770248351e-06, - "loss": 1.0452, - "step": 1824 - }, - { - "epoch": 0.13715617014880505, - "grad_norm": 1.7522926979604638, - "learning_rate": 3.880879318784695e-06, - "loss": 1.0686, - "step": 1825 - }, - { - "epoch": 0.13723132421464002, - "grad_norm": 1.8468633970874024, - "learning_rate": 3.8807137558717375e-06, - "loss": 1.0989, - "step": 1826 - }, - { - "epoch": 0.137306478280475, - "grad_norm": 2.059212694733028, - "learning_rate": 3.880548081519287e-06, - "loss": 0.943, - "step": 1827 - }, - { - "epoch": 0.13738163234630993, - "grad_norm": 2.297284874708129, - "learning_rate": 3.880382295737163e-06, - "loss": 0.914, - "step": 1828 - }, - { - "epoch": 0.1374567864121449, - "grad_norm": 1.9970174246129562, - "learning_rate": 3.880216398535187e-06, - "loss": 0.9837, - "step": 1829 - }, - { - "epoch": 0.13753194047797987, - "grad_norm": 1.6849701603134848, - "learning_rate": 3.8800503899231895e-06, - "loss": 1.0295, - "step": 1830 - }, - { - "epoch": 0.1376070945438148, - "grad_norm": 1.7901273665054585, - "learning_rate": 3.879884269911007e-06, - "loss": 0.8997, - "step": 1831 - }, - { - "epoch": 0.13768224860964978, - "grad_norm": 1.9937053209040076, - "learning_rate": 3.879718038508483e-06, - "loss": 0.9235, - "step": 1832 - }, - { - "epoch": 0.13775740267548475, - "grad_norm": 1.778544060479565, - "learning_rate": 3.8795516957254675e-06, - "loss": 1.0485, - "step": 1833 - }, - { - "epoch": 0.1378325567413197, - "grad_norm": 5.91775989730476, - "learning_rate": 3.8793852415718165e-06, - "loss": 0.9922, - "step": 1834 - }, - { - "epoch": 0.13790771080715467, - "grad_norm": 2.5646460316046613, - "learning_rate": 3.879218676057394e-06, - "loss": 1.0328, - "step": 1835 - }, - { - "epoch": 0.13798286487298964, - "grad_norm": 1.9228890697721093, - "learning_rate": 3.879051999192068e-06, - "loss": 1.0655, - "step": 1836 - }, - { - "epoch": 0.13805801893882458, - "grad_norm": 5.194937097280596, - "learning_rate": 3.8788852109857166e-06, - "loss": 1.0319, - "step": 1837 - }, - { - "epoch": 0.13813317300465955, - "grad_norm": 1.853950987998242, - "learning_rate": 3.878718311448221e-06, - "loss": 1.0423, - "step": 1838 - }, - { - "epoch": 0.13820832707049452, - "grad_norm": 1.6541232723946877, - "learning_rate": 3.878551300589471e-06, - "loss": 0.9604, - "step": 1839 - }, - { - "epoch": 0.1382834811363295, - "grad_norm": 3.601193754636795, - "learning_rate": 3.8783841784193635e-06, - "loss": 1.0453, - "step": 1840 - }, - { - "epoch": 0.13835863520216443, - "grad_norm": 4.355280659125308, - "learning_rate": 3.878216944947801e-06, - "loss": 0.9283, - "step": 1841 - }, - { - "epoch": 0.1384337892679994, - "grad_norm": 1.7379270728540603, - "learning_rate": 3.878049600184692e-06, - "loss": 1.0016, - "step": 1842 - }, - { - "epoch": 0.13850894333383437, - "grad_norm": 1.9128887703878594, - "learning_rate": 3.877882144139952e-06, - "loss": 0.9174, - "step": 1843 - }, - { - "epoch": 0.13858409739966931, - "grad_norm": 3.4602427592587963, - "learning_rate": 3.8777145768235054e-06, - "loss": 0.9947, - "step": 1844 - }, - { - "epoch": 0.13865925146550429, - "grad_norm": 2.7869947445987227, - "learning_rate": 3.877546898245279e-06, - "loss": 1.033, - "step": 1845 - }, - { - "epoch": 0.13873440553133926, - "grad_norm": 2.2711287373412254, - "learning_rate": 3.877379108415209e-06, - "loss": 0.9597, - "step": 1846 - }, - { - "epoch": 0.1388095595971742, - "grad_norm": 2.167101547417954, - "learning_rate": 3.8772112073432385e-06, - "loss": 1.0235, - "step": 1847 - }, - { - "epoch": 0.13888471366300917, - "grad_norm": 2.0600852702371797, - "learning_rate": 3.8770431950393154e-06, - "loss": 1.0345, - "step": 1848 - }, - { - "epoch": 0.13895986772884414, - "grad_norm": 1.5930052868599096, - "learning_rate": 3.876875071513395e-06, - "loss": 1.0205, - "step": 1849 - }, - { - "epoch": 0.13903502179467908, - "grad_norm": 1.8713206748892137, - "learning_rate": 3.87670683677544e-06, - "loss": 0.9268, - "step": 1850 - }, - { - "epoch": 0.13911017586051405, - "grad_norm": 2.0202941044016414, - "learning_rate": 3.876538490835419e-06, - "loss": 0.9982, - "step": 1851 - }, - { - "epoch": 0.13918532992634902, - "grad_norm": 1.6286798772699014, - "learning_rate": 3.876370033703307e-06, - "loss": 1.0927, - "step": 1852 - }, - { - "epoch": 0.13926048399218396, - "grad_norm": 0.8101334586105832, - "learning_rate": 3.876201465389084e-06, - "loss": 0.8637, - "step": 1853 - }, - { - "epoch": 0.13933563805801893, - "grad_norm": 1.8535467533114522, - "learning_rate": 3.87603278590274e-06, - "loss": 1.0893, - "step": 1854 - }, - { - "epoch": 0.1394107921238539, - "grad_norm": 1.5504159953645509, - "learning_rate": 3.8758639952542695e-06, - "loss": 1.0241, - "step": 1855 - }, - { - "epoch": 0.13948594618968888, - "grad_norm": 4.520844818428792, - "learning_rate": 3.875695093453675e-06, - "loss": 1.0031, - "step": 1856 - }, - { - "epoch": 0.13956110025552382, - "grad_norm": 0.6269961428516027, - "learning_rate": 3.875526080510963e-06, - "loss": 0.8092, - "step": 1857 - }, - { - "epoch": 0.1396362543213588, - "grad_norm": 1.8624796872663434, - "learning_rate": 3.8753569564361495e-06, - "loss": 1.1551, - "step": 1858 - }, - { - "epoch": 0.13971140838719376, - "grad_norm": 1.9062973391799034, - "learning_rate": 3.875187721239254e-06, - "loss": 0.9291, - "step": 1859 - }, - { - "epoch": 0.1397865624530287, - "grad_norm": 1.4166027035952509, - "learning_rate": 3.8750183749303066e-06, - "loss": 0.9986, - "step": 1860 - }, - { - "epoch": 0.13986171651886367, - "grad_norm": 2.6338581306699753, - "learning_rate": 3.87484891751934e-06, - "loss": 1.0348, - "step": 1861 - }, - { - "epoch": 0.13993687058469864, - "grad_norm": 3.9544857532853763, - "learning_rate": 3.874679349016396e-06, - "loss": 0.9213, - "step": 1862 - }, - { - "epoch": 0.14001202465053358, - "grad_norm": 1.4650480814520002, - "learning_rate": 3.874509669431521e-06, - "loss": 1.0959, - "step": 1863 - }, - { - "epoch": 0.14008717871636855, - "grad_norm": 2.538883425926751, - "learning_rate": 3.874339878774771e-06, - "loss": 1.0577, - "step": 1864 - }, - { - "epoch": 0.14016233278220352, - "grad_norm": 1.9729880478811597, - "learning_rate": 3.8741699770562065e-06, - "loss": 1.0412, - "step": 1865 - }, - { - "epoch": 0.14023748684803847, - "grad_norm": 1.8382047248991282, - "learning_rate": 3.873999964285893e-06, - "loss": 0.8264, - "step": 1866 - }, - { - "epoch": 0.14031264091387344, - "grad_norm": 2.2301165140616046, - "learning_rate": 3.873829840473906e-06, - "loss": 1.0213, - "step": 1867 - }, - { - "epoch": 0.1403877949797084, - "grad_norm": 2.170205771538799, - "learning_rate": 3.873659605630325e-06, - "loss": 1.0669, - "step": 1868 - }, - { - "epoch": 0.14046294904554335, - "grad_norm": 1.8486176584371283, - "learning_rate": 3.873489259765239e-06, - "loss": 1.0827, - "step": 1869 - }, - { - "epoch": 0.14053810311137832, - "grad_norm": 1.9504391322491992, - "learning_rate": 3.873318802888739e-06, - "loss": 1.0554, - "step": 1870 - }, - { - "epoch": 0.1406132571772133, - "grad_norm": 2.0482755339465117, - "learning_rate": 3.8731482350109276e-06, - "loss": 0.9611, - "step": 1871 - }, - { - "epoch": 0.14068841124304826, - "grad_norm": 5.386250631173452, - "learning_rate": 3.87297755614191e-06, - "loss": 1.0842, - "step": 1872 - }, - { - "epoch": 0.1407635653088832, - "grad_norm": 1.9210321082192579, - "learning_rate": 3.8728067662918e-06, - "loss": 1.0881, - "step": 1873 - }, - { - "epoch": 0.14083871937471817, - "grad_norm": 1.9837112518160664, - "learning_rate": 3.872635865470718e-06, - "loss": 1.0914, - "step": 1874 - }, - { - "epoch": 0.14091387344055314, - "grad_norm": 1.772650523069301, - "learning_rate": 3.8724648536887895e-06, - "loss": 1.0646, - "step": 1875 - }, - { - "epoch": 0.1409890275063881, - "grad_norm": 2.4306637682364727, - "learning_rate": 3.872293730956149e-06, - "loss": 0.9728, - "step": 1876 - }, - { - "epoch": 0.14106418157222306, - "grad_norm": 2.093033853154129, - "learning_rate": 3.872122497282935e-06, - "loss": 0.9886, - "step": 1877 - }, - { - "epoch": 0.14113933563805803, - "grad_norm": 2.5344680298688895, - "learning_rate": 3.871951152679294e-06, - "loss": 1.0348, - "step": 1878 - }, - { - "epoch": 0.14121448970389297, - "grad_norm": 1.6642579872749952, - "learning_rate": 3.871779697155379e-06, - "loss": 1.0709, - "step": 1879 - }, - { - "epoch": 0.14128964376972794, - "grad_norm": 2.170181602227221, - "learning_rate": 3.87160813072135e-06, - "loss": 0.9758, - "step": 1880 - }, - { - "epoch": 0.1413647978355629, - "grad_norm": 1.5490343839253562, - "learning_rate": 3.871436453387372e-06, - "loss": 1.0674, - "step": 1881 - }, - { - "epoch": 0.14143995190139785, - "grad_norm": 1.7421227002601858, - "learning_rate": 3.8712646651636185e-06, - "loss": 1.0479, - "step": 1882 - }, - { - "epoch": 0.14151510596723282, - "grad_norm": 1.5885271248907498, - "learning_rate": 3.8710927660602676e-06, - "loss": 0.9204, - "step": 1883 - }, - { - "epoch": 0.1415902600330678, - "grad_norm": 1.805975714320639, - "learning_rate": 3.870920756087505e-06, - "loss": 1.0228, - "step": 1884 - }, - { - "epoch": 0.14166541409890276, - "grad_norm": 1.8357660091740566, - "learning_rate": 3.870748635255524e-06, - "loss": 0.9981, - "step": 1885 - }, - { - "epoch": 0.1417405681647377, - "grad_norm": 1.662908306032308, - "learning_rate": 3.870576403574523e-06, - "loss": 1.0298, - "step": 1886 - }, - { - "epoch": 0.14181572223057268, - "grad_norm": 3.9267127606763395, - "learning_rate": 3.870404061054706e-06, - "loss": 1.0086, - "step": 1887 - }, - { - "epoch": 0.14189087629640765, - "grad_norm": 1.9480252023198497, - "learning_rate": 3.870231607706287e-06, - "loss": 0.9648, - "step": 1888 - }, - { - "epoch": 0.1419660303622426, - "grad_norm": 2.152159196588044, - "learning_rate": 3.870059043539484e-06, - "loss": 0.9611, - "step": 1889 - }, - { - "epoch": 0.14204118442807756, - "grad_norm": 2.0558945044158645, - "learning_rate": 3.869886368564521e-06, - "loss": 1.2128, - "step": 1890 - }, - { - "epoch": 0.14211633849391253, - "grad_norm": 3.0715377731174023, - "learning_rate": 3.869713582791631e-06, - "loss": 1.0612, - "step": 1891 - }, - { - "epoch": 0.14219149255974747, - "grad_norm": 1.695798672346083, - "learning_rate": 3.869540686231051e-06, - "loss": 1.0231, - "step": 1892 - }, - { - "epoch": 0.14226664662558244, - "grad_norm": 1.947575183930304, - "learning_rate": 3.8693676788930264e-06, - "loss": 1.0038, - "step": 1893 - }, - { - "epoch": 0.1423418006914174, - "grad_norm": 2.105432626842544, - "learning_rate": 3.869194560787808e-06, - "loss": 1.0102, - "step": 1894 - }, - { - "epoch": 0.14241695475725236, - "grad_norm": 1.879967023276195, - "learning_rate": 3.8690213319256555e-06, - "loss": 0.9845, - "step": 1895 - }, - { - "epoch": 0.14249210882308733, - "grad_norm": 0.7457873928760963, - "learning_rate": 3.8688479923168316e-06, - "loss": 0.9271, - "step": 1896 - }, - { - "epoch": 0.1425672628889223, - "grad_norm": 2.2912244774728325, - "learning_rate": 3.868674541971608e-06, - "loss": 0.9509, - "step": 1897 - }, - { - "epoch": 0.14264241695475724, - "grad_norm": 1.668722442513711, - "learning_rate": 3.868500980900262e-06, - "loss": 0.9726, - "step": 1898 - }, - { - "epoch": 0.1427175710205922, - "grad_norm": 2.5292905593852133, - "learning_rate": 3.868327309113079e-06, - "loss": 1.0967, - "step": 1899 - }, - { - "epoch": 0.14279272508642718, - "grad_norm": 2.5460450862179735, - "learning_rate": 3.8681535266203464e-06, - "loss": 1.0182, - "step": 1900 - }, - { - "epoch": 0.14286787915226215, - "grad_norm": 2.7247815805794846, - "learning_rate": 3.867979633432365e-06, - "loss": 1.0028, - "step": 1901 - }, - { - "epoch": 0.1429430332180971, - "grad_norm": 0.7812400054292336, - "learning_rate": 3.867805629559438e-06, - "loss": 0.8412, - "step": 1902 - }, - { - "epoch": 0.14301818728393206, - "grad_norm": 1.9504810595733229, - "learning_rate": 3.867631515011874e-06, - "loss": 1.0483, - "step": 1903 - }, - { - "epoch": 0.14309334134976703, - "grad_norm": 2.0841666144617994, - "learning_rate": 3.8674572897999915e-06, - "loss": 1.0533, - "step": 1904 - }, - { - "epoch": 0.14316849541560198, - "grad_norm": 1.7572095048650394, - "learning_rate": 3.8672829539341136e-06, - "loss": 1.0412, - "step": 1905 - }, - { - "epoch": 0.14324364948143695, - "grad_norm": 1.5645354936230351, - "learning_rate": 3.8671085074245704e-06, - "loss": 1.073, - "step": 1906 - }, - { - "epoch": 0.14331880354727192, - "grad_norm": 1.4428004523475135, - "learning_rate": 3.8669339502816985e-06, - "loss": 0.902, - "step": 1907 - }, - { - "epoch": 0.14339395761310686, - "grad_norm": 3.3691738845731525, - "learning_rate": 3.866759282515841e-06, - "loss": 0.985, - "step": 1908 - }, - { - "epoch": 0.14346911167894183, - "grad_norm": 1.6168515506042243, - "learning_rate": 3.866584504137347e-06, - "loss": 0.9702, - "step": 1909 - }, - { - "epoch": 0.1435442657447768, - "grad_norm": 1.6505608645787144, - "learning_rate": 3.8664096151565755e-06, - "loss": 0.9281, - "step": 1910 - }, - { - "epoch": 0.14361941981061174, - "grad_norm": 2.026969507621331, - "learning_rate": 3.8662346155838855e-06, - "loss": 1.1031, - "step": 1911 - }, - { - "epoch": 0.1436945738764467, - "grad_norm": 1.6058418972629676, - "learning_rate": 3.866059505429649e-06, - "loss": 1.0346, - "step": 1912 - }, - { - "epoch": 0.14376972794228168, - "grad_norm": 2.0540271466623983, - "learning_rate": 3.865884284704241e-06, - "loss": 1.0867, - "step": 1913 - }, - { - "epoch": 0.14384488200811663, - "grad_norm": 1.7023693123062862, - "learning_rate": 3.8657089534180445e-06, - "loss": 1.0112, - "step": 1914 - }, - { - "epoch": 0.1439200360739516, - "grad_norm": 3.3040528465829198, - "learning_rate": 3.865533511581448e-06, - "loss": 1.0167, - "step": 1915 - }, - { - "epoch": 0.14399519013978657, - "grad_norm": 1.923637574339376, - "learning_rate": 3.865357959204847e-06, - "loss": 1.0084, - "step": 1916 - }, - { - "epoch": 0.14407034420562154, - "grad_norm": 1.6667524633576778, - "learning_rate": 3.865182296298644e-06, - "loss": 1.0312, - "step": 1917 - }, - { - "epoch": 0.14414549827145648, - "grad_norm": 1.577372985538678, - "learning_rate": 3.865006522873249e-06, - "loss": 1.1025, - "step": 1918 - }, - { - "epoch": 0.14422065233729145, - "grad_norm": 1.9981173953143403, - "learning_rate": 3.864830638939074e-06, - "loss": 1.0273, - "step": 1919 - }, - { - "epoch": 0.14429580640312642, - "grad_norm": 0.8305722908650295, - "learning_rate": 3.864654644506544e-06, - "loss": 0.9079, - "step": 1920 - }, - { - "epoch": 0.14437096046896136, - "grad_norm": 1.7914016956482732, - "learning_rate": 3.864478539586085e-06, - "loss": 1.0992, - "step": 1921 - }, - { - "epoch": 0.14444611453479633, - "grad_norm": 3.1522091218734936, - "learning_rate": 3.8643023241881344e-06, - "loss": 1.1421, - "step": 1922 - }, - { - "epoch": 0.1445212686006313, - "grad_norm": 1.924529188966, - "learning_rate": 3.864125998323131e-06, - "loss": 1.0333, - "step": 1923 - }, - { - "epoch": 0.14459642266646625, - "grad_norm": 1.7321913435699088, - "learning_rate": 3.863949562001524e-06, - "loss": 0.965, - "step": 1924 - }, - { - "epoch": 0.14467157673230122, - "grad_norm": 1.7253456488134888, - "learning_rate": 3.863773015233769e-06, - "loss": 1.0574, - "step": 1925 - }, - { - "epoch": 0.1447467307981362, - "grad_norm": 1.4286946669282432, - "learning_rate": 3.863596358030326e-06, - "loss": 0.9203, - "step": 1926 - }, - { - "epoch": 0.14482188486397113, - "grad_norm": 1.8131269488740038, - "learning_rate": 3.863419590401661e-06, - "loss": 1.0372, - "step": 1927 - }, - { - "epoch": 0.1448970389298061, - "grad_norm": 1.637619535436613, - "learning_rate": 3.8632427123582505e-06, - "loss": 0.9891, - "step": 1928 - }, - { - "epoch": 0.14497219299564107, - "grad_norm": 1.7871118430865813, - "learning_rate": 3.8630657239105754e-06, - "loss": 1.034, - "step": 1929 - }, - { - "epoch": 0.14504734706147604, - "grad_norm": 2.2173855306675403, - "learning_rate": 3.862888625069121e-06, - "loss": 0.9467, - "step": 1930 - }, - { - "epoch": 0.14512250112731098, - "grad_norm": 1.9807775132602232, - "learning_rate": 3.8627114158443825e-06, - "loss": 1.1192, - "step": 1931 - }, - { - "epoch": 0.14519765519314595, - "grad_norm": 1.8460868638370695, - "learning_rate": 3.862534096246859e-06, - "loss": 1.0585, - "step": 1932 - }, - { - "epoch": 0.14527280925898092, - "grad_norm": 2.2120135489147157, - "learning_rate": 3.862356666287059e-06, - "loss": 0.9642, - "step": 1933 - }, - { - "epoch": 0.14534796332481587, - "grad_norm": 0.8019662416540272, - "learning_rate": 3.862179125975495e-06, - "loss": 0.8389, - "step": 1934 - }, - { - "epoch": 0.14542311739065084, - "grad_norm": 1.6789447650423495, - "learning_rate": 3.862001475322687e-06, - "loss": 1.0931, - "step": 1935 - }, - { - "epoch": 0.1454982714564858, - "grad_norm": 1.741893384252685, - "learning_rate": 3.861823714339162e-06, - "loss": 0.9422, - "step": 1936 - }, - { - "epoch": 0.14557342552232075, - "grad_norm": 19.28617491802742, - "learning_rate": 3.861645843035452e-06, - "loss": 0.9718, - "step": 1937 - }, - { - "epoch": 0.14564857958815572, - "grad_norm": 2.7876238705606395, - "learning_rate": 3.861467861422096e-06, - "loss": 1.061, - "step": 1938 - }, - { - "epoch": 0.1457237336539907, - "grad_norm": 2.3566455337066086, - "learning_rate": 3.861289769509643e-06, - "loss": 0.9561, - "step": 1939 - }, - { - "epoch": 0.14579888771982563, - "grad_norm": 287.26985353496457, - "learning_rate": 3.861111567308643e-06, - "loss": 1.007, - "step": 1940 - }, - { - "epoch": 0.1458740417856606, - "grad_norm": 1.8274022817513322, - "learning_rate": 3.860933254829656e-06, - "loss": 1.1175, - "step": 1941 - }, - { - "epoch": 0.14594919585149557, - "grad_norm": 2.07611950927611, - "learning_rate": 3.860754832083247e-06, - "loss": 0.9302, - "step": 1942 - }, - { - "epoch": 0.14602434991733051, - "grad_norm": 1.5085973722788757, - "learning_rate": 3.86057629907999e-06, - "loss": 0.9298, - "step": 1943 - }, - { - "epoch": 0.14609950398316549, - "grad_norm": 2.3580214299788502, - "learning_rate": 3.8603976558304624e-06, - "loss": 0.9163, - "step": 1944 - }, - { - "epoch": 0.14617465804900046, - "grad_norm": 1.7435695486459886, - "learning_rate": 3.86021890234525e-06, - "loss": 0.9775, - "step": 1945 - }, - { - "epoch": 0.14624981211483543, - "grad_norm": 2.1259536847069467, - "learning_rate": 3.860040038634944e-06, - "loss": 0.9155, - "step": 1946 - }, - { - "epoch": 0.14632496618067037, - "grad_norm": 1.6635667662457814, - "learning_rate": 3.8598610647101426e-06, - "loss": 1.0672, - "step": 1947 - }, - { - "epoch": 0.14640012024650534, - "grad_norm": 1.8052141486278037, - "learning_rate": 3.859681980581452e-06, - "loss": 0.9456, - "step": 1948 - }, - { - "epoch": 0.1464752743123403, - "grad_norm": 2.167466553954687, - "learning_rate": 3.859502786259482e-06, - "loss": 1.0112, - "step": 1949 - }, - { - "epoch": 0.14655042837817525, - "grad_norm": 1.468979594364836, - "learning_rate": 3.8593234817548525e-06, - "loss": 1.015, - "step": 1950 - }, - { - "epoch": 0.14662558244401022, - "grad_norm": 1.7888961114928483, - "learning_rate": 3.859144067078186e-06, - "loss": 0.9961, - "step": 1951 - }, - { - "epoch": 0.1467007365098452, - "grad_norm": 0.7946985479345192, - "learning_rate": 3.858964542240115e-06, - "loss": 0.9081, - "step": 1952 - }, - { - "epoch": 0.14677589057568013, - "grad_norm": 2.3049661758820776, - "learning_rate": 3.8587849072512755e-06, - "loss": 1.0356, - "step": 1953 - }, - { - "epoch": 0.1468510446415151, - "grad_norm": 1.7794520355474504, - "learning_rate": 3.858605162122314e-06, - "loss": 1.0291, - "step": 1954 - }, - { - "epoch": 0.14692619870735008, - "grad_norm": 2.6517907803603125, - "learning_rate": 3.858425306863878e-06, - "loss": 0.8917, - "step": 1955 - }, - { - "epoch": 0.14700135277318502, - "grad_norm": 2.494610412851666, - "learning_rate": 3.858245341486627e-06, - "loss": 1.0379, - "step": 1956 - }, - { - "epoch": 0.14707650683902, - "grad_norm": 4.5611593288568395, - "learning_rate": 3.858065266001224e-06, - "loss": 1.0096, - "step": 1957 - }, - { - "epoch": 0.14715166090485496, - "grad_norm": 2.7477858906809662, - "learning_rate": 3.857885080418339e-06, - "loss": 1.0039, - "step": 1958 - }, - { - "epoch": 0.1472268149706899, - "grad_norm": 2.2666685660672177, - "learning_rate": 3.857704784748648e-06, - "loss": 1.0445, - "step": 1959 - }, - { - "epoch": 0.14730196903652487, - "grad_norm": 2.377010598659357, - "learning_rate": 3.857524379002835e-06, - "loss": 0.9438, - "step": 1960 - }, - { - "epoch": 0.14737712310235984, - "grad_norm": 1.5813423823333055, - "learning_rate": 3.85734386319159e-06, - "loss": 1.0889, - "step": 1961 - }, - { - "epoch": 0.1474522771681948, - "grad_norm": 1.7417259802445357, - "learning_rate": 3.857163237325608e-06, - "loss": 0.9968, - "step": 1962 - }, - { - "epoch": 0.14752743123402975, - "grad_norm": 2.1150710447026135, - "learning_rate": 3.856982501415595e-06, - "loss": 1.0639, - "step": 1963 - }, - { - "epoch": 0.14760258529986472, - "grad_norm": 2.4034389573518, - "learning_rate": 3.8568016554722554e-06, - "loss": 1.0715, - "step": 1964 - }, - { - "epoch": 0.1476777393656997, - "grad_norm": 1.9364435330448297, - "learning_rate": 3.856620699506308e-06, - "loss": 1.0597, - "step": 1965 - }, - { - "epoch": 0.14775289343153464, - "grad_norm": 2.0446413388785034, - "learning_rate": 3.856439633528476e-06, - "loss": 1.0696, - "step": 1966 - }, - { - "epoch": 0.1478280474973696, - "grad_norm": 1.5884859252488657, - "learning_rate": 3.856258457549486e-06, - "loss": 0.9272, - "step": 1967 - }, - { - "epoch": 0.14790320156320458, - "grad_norm": 2.9749443882464726, - "learning_rate": 3.856077171580074e-06, - "loss": 1.0449, - "step": 1968 - }, - { - "epoch": 0.14797835562903952, - "grad_norm": 1.8753051509500227, - "learning_rate": 3.855895775630983e-06, - "loss": 0.9157, - "step": 1969 - }, - { - "epoch": 0.1480535096948745, - "grad_norm": 2.678889920509478, - "learning_rate": 3.85571426971296e-06, - "loss": 1.0926, - "step": 1970 - }, - { - "epoch": 0.14812866376070946, - "grad_norm": 4.306822868522131, - "learning_rate": 3.8555326538367605e-06, - "loss": 1.1039, - "step": 1971 - }, - { - "epoch": 0.1482038178265444, - "grad_norm": 1.6536102130888237, - "learning_rate": 3.855350928013145e-06, - "loss": 1.0805, - "step": 1972 - }, - { - "epoch": 0.14827897189237937, - "grad_norm": 1.8705692073660087, - "learning_rate": 3.855169092252884e-06, - "loss": 1.0291, - "step": 1973 - }, - { - "epoch": 0.14835412595821434, - "grad_norm": 1.7105234420227127, - "learning_rate": 3.85498714656675e-06, - "loss": 0.9935, - "step": 1974 - }, - { - "epoch": 0.14842928002404931, - "grad_norm": 2.008408157516533, - "learning_rate": 3.854805090965525e-06, - "loss": 0.9931, - "step": 1975 - }, - { - "epoch": 0.14850443408988426, - "grad_norm": 1.6793243015531536, - "learning_rate": 3.854622925459994e-06, - "loss": 1.033, - "step": 1976 - }, - { - "epoch": 0.14857958815571923, - "grad_norm": 2.681658139765499, - "learning_rate": 3.854440650060955e-06, - "loss": 0.9975, - "step": 1977 - }, - { - "epoch": 0.1486547422215542, - "grad_norm": 1.5962428624076455, - "learning_rate": 3.854258264779204e-06, - "loss": 1.035, - "step": 1978 - }, - { - "epoch": 0.14872989628738914, - "grad_norm": 1.881894342835547, - "learning_rate": 3.854075769625552e-06, - "loss": 1.0314, - "step": 1979 - }, - { - "epoch": 0.1488050503532241, - "grad_norm": 1.9403050943738023, - "learning_rate": 3.8538931646108105e-06, - "loss": 0.9648, - "step": 1980 - }, - { - "epoch": 0.14888020441905908, - "grad_norm": 2.1834537686432123, - "learning_rate": 3.853710449745801e-06, - "loss": 1.0132, - "step": 1981 - }, - { - "epoch": 0.14895535848489402, - "grad_norm": 1.6497129595223279, - "learning_rate": 3.853527625041347e-06, - "loss": 1.0069, - "step": 1982 - }, - { - "epoch": 0.149030512550729, - "grad_norm": 1.7913935771082965, - "learning_rate": 3.853344690508285e-06, - "loss": 0.9348, - "step": 1983 - }, - { - "epoch": 0.14910566661656396, - "grad_norm": 1.979214423423273, - "learning_rate": 3.853161646157453e-06, - "loss": 1.0573, - "step": 1984 - }, - { - "epoch": 0.1491808206823989, - "grad_norm": 1.743884367876018, - "learning_rate": 3.852978491999697e-06, - "loss": 1.0784, - "step": 1985 - }, - { - "epoch": 0.14925597474823388, - "grad_norm": 2.196901879614186, - "learning_rate": 3.852795228045869e-06, - "loss": 1.0755, - "step": 1986 - }, - { - "epoch": 0.14933112881406885, - "grad_norm": 4.503692383663645, - "learning_rate": 3.85261185430683e-06, - "loss": 1.0279, - "step": 1987 - }, - { - "epoch": 0.1494062828799038, - "grad_norm": 1.7462634659113467, - "learning_rate": 3.8524283707934445e-06, - "loss": 1.1328, - "step": 1988 - }, - { - "epoch": 0.14948143694573876, - "grad_norm": 1.8357405536610654, - "learning_rate": 3.8522447775165845e-06, - "loss": 0.9856, - "step": 1989 - }, - { - "epoch": 0.14955659101157373, - "grad_norm": 1.7091440355106717, - "learning_rate": 3.852061074487129e-06, - "loss": 1.0197, - "step": 1990 - }, - { - "epoch": 0.1496317450774087, - "grad_norm": 1.6495307081589672, - "learning_rate": 3.851877261715961e-06, - "loss": 1.0632, - "step": 1991 - }, - { - "epoch": 0.14970689914324364, - "grad_norm": 3.2459129163773888, - "learning_rate": 3.851693339213976e-06, - "loss": 1.0911, - "step": 1992 - }, - { - "epoch": 0.1497820532090786, - "grad_norm": 1.6868142041348952, - "learning_rate": 3.8515093069920695e-06, - "loss": 1.0111, - "step": 1993 - }, - { - "epoch": 0.14985720727491358, - "grad_norm": 1.556048923355828, - "learning_rate": 3.851325165061147e-06, - "loss": 1.0609, - "step": 1994 - }, - { - "epoch": 0.14993236134074853, - "grad_norm": 1.7390353849341595, - "learning_rate": 3.851140913432118e-06, - "loss": 1.0287, - "step": 1995 - }, - { - "epoch": 0.1500075154065835, - "grad_norm": 1.781772001642065, - "learning_rate": 3.850956552115903e-06, - "loss": 0.9803, - "step": 1996 - }, - { - "epoch": 0.15008266947241847, - "grad_norm": 1.8406959714305682, - "learning_rate": 3.850772081123423e-06, - "loss": 1.0977, - "step": 1997 - }, - { - "epoch": 0.1501578235382534, - "grad_norm": 1.4768129973890405, - "learning_rate": 3.850587500465611e-06, - "loss": 1.0094, - "step": 1998 - }, - { - "epoch": 0.15023297760408838, - "grad_norm": 1.3826824924540575, - "learning_rate": 3.850402810153403e-06, - "loss": 0.9665, - "step": 1999 - }, - { - "epoch": 0.15030813166992335, - "grad_norm": 1.9073969407944948, - "learning_rate": 3.850218010197743e-06, - "loss": 0.9295, - "step": 2000 - }, - { - "epoch": 0.1503832857357583, - "grad_norm": 1.63772537485918, - "learning_rate": 3.850033100609581e-06, - "loss": 1.1044, - "step": 2001 - }, - { - "epoch": 0.15045843980159326, - "grad_norm": 1.7041627714428147, - "learning_rate": 3.8498480813998735e-06, - "loss": 0.9964, - "step": 2002 - }, - { - "epoch": 0.15053359386742823, - "grad_norm": 2.1271173362990656, - "learning_rate": 3.849662952579583e-06, - "loss": 1.0177, - "step": 2003 - }, - { - "epoch": 0.15060874793326318, - "grad_norm": 1.7644292782938584, - "learning_rate": 3.8494777141596805e-06, - "loss": 1.0423, - "step": 2004 - }, - { - "epoch": 0.15068390199909815, - "grad_norm": 0.8338797009998465, - "learning_rate": 3.8492923661511405e-06, - "loss": 0.8841, - "step": 2005 - }, - { - "epoch": 0.15075905606493312, - "grad_norm": 3.371204361145211, - "learning_rate": 3.8491069085649475e-06, - "loss": 1.0755, - "step": 2006 - }, - { - "epoch": 0.1508342101307681, - "grad_norm": 1.6363860818739586, - "learning_rate": 3.848921341412088e-06, - "loss": 1.0404, - "step": 2007 - }, - { - "epoch": 0.15090936419660303, - "grad_norm": 1.7487310849980764, - "learning_rate": 3.848735664703561e-06, - "loss": 1.0787, - "step": 2008 - }, - { - "epoch": 0.150984518262438, - "grad_norm": 1.5047312665775536, - "learning_rate": 3.848549878450365e-06, - "loss": 1.0755, - "step": 2009 - }, - { - "epoch": 0.15105967232827297, - "grad_norm": 1.8589016608400972, - "learning_rate": 3.84836398266351e-06, - "loss": 1.0786, - "step": 2010 - }, - { - "epoch": 0.1511348263941079, - "grad_norm": 2.2274684200766943, - "learning_rate": 3.848177977354012e-06, - "loss": 1.0867, - "step": 2011 - }, - { - "epoch": 0.15120998045994288, - "grad_norm": 0.8384795708894465, - "learning_rate": 3.847991862532892e-06, - "loss": 0.8965, - "step": 2012 - }, - { - "epoch": 0.15128513452577785, - "grad_norm": 1.8026062825861118, - "learning_rate": 3.847805638211177e-06, - "loss": 1.0698, - "step": 2013 - }, - { - "epoch": 0.1513602885916128, - "grad_norm": 1.7774302702584717, - "learning_rate": 3.847619304399902e-06, - "loss": 1.0314, - "step": 2014 - }, - { - "epoch": 0.15143544265744777, - "grad_norm": 2.0729268391677484, - "learning_rate": 3.847432861110109e-06, - "loss": 1.008, - "step": 2015 - }, - { - "epoch": 0.15151059672328274, - "grad_norm": 2.5336897595117747, - "learning_rate": 3.847246308352844e-06, - "loss": 0.955, - "step": 2016 - }, - { - "epoch": 0.15158575078911768, - "grad_norm": 2.0355849738866434, - "learning_rate": 3.847059646139162e-06, - "loss": 1.0226, - "step": 2017 - }, - { - "epoch": 0.15166090485495265, - "grad_norm": 2.9635008332579735, - "learning_rate": 3.846872874480123e-06, - "loss": 1.0444, - "step": 2018 - }, - { - "epoch": 0.15173605892078762, - "grad_norm": 1.6069732818756226, - "learning_rate": 3.8466859933867945e-06, - "loss": 1.0185, - "step": 2019 - }, - { - "epoch": 0.1518112129866226, - "grad_norm": 0.9213233525694313, - "learning_rate": 3.846499002870249e-06, - "loss": 1.0035, - "step": 2020 - }, - { - "epoch": 0.15188636705245753, - "grad_norm": 2.2819906626183517, - "learning_rate": 3.846311902941567e-06, - "loss": 0.9088, - "step": 2021 - }, - { - "epoch": 0.1519615211182925, - "grad_norm": 2.658954544987249, - "learning_rate": 3.846124693611835e-06, - "loss": 0.9821, - "step": 2022 - }, - { - "epoch": 0.15203667518412747, - "grad_norm": 2.420184523743445, - "learning_rate": 3.845937374892145e-06, - "loss": 0.8334, - "step": 2023 - }, - { - "epoch": 0.15211182924996242, - "grad_norm": 2.521325328724765, - "learning_rate": 3.845749946793597e-06, - "loss": 0.8189, - "step": 2024 - }, - { - "epoch": 0.1521869833157974, - "grad_norm": 1.741960108675826, - "learning_rate": 3.845562409327297e-06, - "loss": 1.0419, - "step": 2025 - }, - { - "epoch": 0.15226213738163236, - "grad_norm": 1.6141742885464139, - "learning_rate": 3.8453747625043575e-06, - "loss": 1.0059, - "step": 2026 - }, - { - "epoch": 0.1523372914474673, - "grad_norm": 1.7523026984104915, - "learning_rate": 3.8451870063358966e-06, - "loss": 1.1312, - "step": 2027 - }, - { - "epoch": 0.15241244551330227, - "grad_norm": 2.0480417900888144, - "learning_rate": 3.844999140833039e-06, - "loss": 0.944, - "step": 2028 - }, - { - "epoch": 0.15248759957913724, - "grad_norm": 1.788868123118921, - "learning_rate": 3.844811166006919e-06, - "loss": 1.0314, - "step": 2029 - }, - { - "epoch": 0.15256275364497218, - "grad_norm": 3.1886313431516817, - "learning_rate": 3.844623081868672e-06, - "loss": 1.0326, - "step": 2030 - }, - { - "epoch": 0.15263790771080715, - "grad_norm": 2.060510080018042, - "learning_rate": 3.844434888429444e-06, - "loss": 0.9877, - "step": 2031 - }, - { - "epoch": 0.15271306177664212, - "grad_norm": 1.9570710869120966, - "learning_rate": 3.8442465857003864e-06, - "loss": 1.031, - "step": 2032 - }, - { - "epoch": 0.15278821584247707, - "grad_norm": 1.4934096998176352, - "learning_rate": 3.844058173692657e-06, - "loss": 1.0806, - "step": 2033 - }, - { - "epoch": 0.15286336990831204, - "grad_norm": 1.849622007365125, - "learning_rate": 3.843869652417418e-06, - "loss": 1.0487, - "step": 2034 - }, - { - "epoch": 0.152938523974147, - "grad_norm": 1.456316406962481, - "learning_rate": 3.843681021885842e-06, - "loss": 1.0417, - "step": 2035 - }, - { - "epoch": 0.15301367803998198, - "grad_norm": 16.902426352496335, - "learning_rate": 3.843492282109107e-06, - "loss": 0.8502, - "step": 2036 - }, - { - "epoch": 0.15308883210581692, - "grad_norm": 1.5270279267560949, - "learning_rate": 3.843303433098393e-06, - "loss": 1.0177, - "step": 2037 - }, - { - "epoch": 0.1531639861716519, - "grad_norm": 1.6954347768538038, - "learning_rate": 3.843114474864894e-06, - "loss": 1.1269, - "step": 2038 - }, - { - "epoch": 0.15323914023748686, - "grad_norm": 0.6887941750623499, - "learning_rate": 3.842925407419803e-06, - "loss": 0.8295, - "step": 2039 - }, - { - "epoch": 0.1533142943033218, - "grad_norm": 1.8622738976176207, - "learning_rate": 3.842736230774325e-06, - "loss": 0.9278, - "step": 2040 - }, - { - "epoch": 0.15338944836915677, - "grad_norm": 8.146227528691963, - "learning_rate": 3.842546944939669e-06, - "loss": 0.9424, - "step": 2041 - }, - { - "epoch": 0.15346460243499174, - "grad_norm": 1.2410994268789637, - "learning_rate": 3.842357549927051e-06, - "loss": 1.025, - "step": 2042 - }, - { - "epoch": 0.15353975650082669, - "grad_norm": 1.8867573546572949, - "learning_rate": 3.842168045747693e-06, - "loss": 1.0396, - "step": 2043 - }, - { - "epoch": 0.15361491056666166, - "grad_norm": 1.8694357643977695, - "learning_rate": 3.8419784324128256e-06, - "loss": 1.0442, - "step": 2044 - }, - { - "epoch": 0.15369006463249663, - "grad_norm": 1.7381157614123952, - "learning_rate": 3.841788709933682e-06, - "loss": 0.8597, - "step": 2045 - }, - { - "epoch": 0.15376521869833157, - "grad_norm": 1.7972094017343252, - "learning_rate": 3.841598878321503e-06, - "loss": 1.0635, - "step": 2046 - }, - { - "epoch": 0.15384037276416654, - "grad_norm": 1.753805654652759, - "learning_rate": 3.84140893758754e-06, - "loss": 1.1841, - "step": 2047 - }, - { - "epoch": 0.1539155268300015, - "grad_norm": 3.1053408422459836, - "learning_rate": 3.841218887743046e-06, - "loss": 1.0406, - "step": 2048 - }, - { - "epoch": 0.15399068089583645, - "grad_norm": 1.643113274777302, - "learning_rate": 3.8410287287992825e-06, - "loss": 1.0048, - "step": 2049 - }, - { - "epoch": 0.15406583496167142, - "grad_norm": 1.5139151297734164, - "learning_rate": 3.840838460767517e-06, - "loss": 0.9205, - "step": 2050 - }, - { - "epoch": 0.1541409890275064, - "grad_norm": 1.8324424428604802, - "learning_rate": 3.840648083659024e-06, - "loss": 1.0155, - "step": 2051 - }, - { - "epoch": 0.15421614309334136, - "grad_norm": 2.1463812958643502, - "learning_rate": 3.840457597485083e-06, - "loss": 1.0225, - "step": 2052 - }, - { - "epoch": 0.1542912971591763, - "grad_norm": 0.9411593861422989, - "learning_rate": 3.840267002256983e-06, - "loss": 0.8956, - "step": 2053 - }, - { - "epoch": 0.15436645122501128, - "grad_norm": 2.19864724357744, - "learning_rate": 3.840076297986015e-06, - "loss": 1.0074, - "step": 2054 - }, - { - "epoch": 0.15444160529084625, - "grad_norm": 2.2125854820505486, - "learning_rate": 3.839885484683481e-06, - "loss": 1.0034, - "step": 2055 - }, - { - "epoch": 0.1545167593566812, - "grad_norm": 1.7289711562089998, - "learning_rate": 3.839694562360686e-06, - "loss": 0.994, - "step": 2056 - }, - { - "epoch": 0.15459191342251616, - "grad_norm": 1.7113141903705036, - "learning_rate": 3.839503531028944e-06, - "loss": 0.9721, - "step": 2057 - }, - { - "epoch": 0.15466706748835113, - "grad_norm": 1.9595404888154593, - "learning_rate": 3.839312390699573e-06, - "loss": 1.0382, - "step": 2058 - }, - { - "epoch": 0.15474222155418607, - "grad_norm": 1.463760178658451, - "learning_rate": 3.8391211413839005e-06, - "loss": 1.0417, - "step": 2059 - }, - { - "epoch": 0.15481737562002104, - "grad_norm": 2.009628009440584, - "learning_rate": 3.838929783093258e-06, - "loss": 0.9864, - "step": 2060 - }, - { - "epoch": 0.154892529685856, - "grad_norm": 2.107215963208467, - "learning_rate": 3.838738315838983e-06, - "loss": 1.0417, - "step": 2061 - }, - { - "epoch": 0.15496768375169095, - "grad_norm": 12.89893908019304, - "learning_rate": 3.838546739632423e-06, - "loss": 1.0421, - "step": 2062 - }, - { - "epoch": 0.15504283781752592, - "grad_norm": 2.082274231321674, - "learning_rate": 3.838355054484928e-06, - "loss": 1.0883, - "step": 2063 - }, - { - "epoch": 0.1551179918833609, - "grad_norm": 2.199445533239349, - "learning_rate": 3.838163260407857e-06, - "loss": 1.0073, - "step": 2064 - }, - { - "epoch": 0.15519314594919587, - "grad_norm": 2.1829461790641833, - "learning_rate": 3.837971357412573e-06, - "loss": 0.9306, - "step": 2065 - }, - { - "epoch": 0.1552683000150308, - "grad_norm": 1.9607158398826807, - "learning_rate": 3.837779345510449e-06, - "loss": 1.0056, - "step": 2066 - }, - { - "epoch": 0.15534345408086578, - "grad_norm": 1.6522319927253055, - "learning_rate": 3.837587224712861e-06, - "loss": 0.9581, - "step": 2067 - }, - { - "epoch": 0.15541860814670075, - "grad_norm": 1.4099249399105047, - "learning_rate": 3.837394995031193e-06, - "loss": 0.8579, - "step": 2068 - }, - { - "epoch": 0.1554937622125357, - "grad_norm": 1.8314634960305598, - "learning_rate": 3.837202656476836e-06, - "loss": 1.0655, - "step": 2069 - }, - { - "epoch": 0.15556891627837066, - "grad_norm": 1.7266678583786632, - "learning_rate": 3.837010209061187e-06, - "loss": 0.938, - "step": 2070 - }, - { - "epoch": 0.15564407034420563, - "grad_norm": 2.2047102615403475, - "learning_rate": 3.836817652795648e-06, - "loss": 0.9877, - "step": 2071 - }, - { - "epoch": 0.15571922441004057, - "grad_norm": 2.127659872034742, - "learning_rate": 3.8366249876916294e-06, - "loss": 1.0249, - "step": 2072 - }, - { - "epoch": 0.15579437847587554, - "grad_norm": 2.0556661501622417, - "learning_rate": 3.8364322137605484e-06, - "loss": 0.9632, - "step": 2073 - }, - { - "epoch": 0.15586953254171051, - "grad_norm": 1.8188275350187366, - "learning_rate": 3.836239331013825e-06, - "loss": 1.0702, - "step": 2074 - }, - { - "epoch": 0.15594468660754546, - "grad_norm": 0.6979544306734334, - "learning_rate": 3.836046339462891e-06, - "loss": 0.8511, - "step": 2075 - }, - { - "epoch": 0.15601984067338043, - "grad_norm": 1.637619389848102, - "learning_rate": 3.83585323911918e-06, - "loss": 1.0711, - "step": 2076 - }, - { - "epoch": 0.1560949947392154, - "grad_norm": 2.04978456528207, - "learning_rate": 3.835660029994135e-06, - "loss": 1.0289, - "step": 2077 - }, - { - "epoch": 0.15617014880505034, - "grad_norm": 1.6134837764684464, - "learning_rate": 3.835466712099204e-06, - "loss": 0.9673, - "step": 2078 - }, - { - "epoch": 0.1562453028708853, - "grad_norm": 1.6355497059092634, - "learning_rate": 3.835273285445842e-06, - "loss": 1.0296, - "step": 2079 - }, - { - "epoch": 0.15632045693672028, - "grad_norm": 1.860559807283354, - "learning_rate": 3.83507975004551e-06, - "loss": 1.0189, - "step": 2080 - }, - { - "epoch": 0.15639561100255525, - "grad_norm": 2.3265787404597082, - "learning_rate": 3.8348861059096755e-06, - "loss": 1.0269, - "step": 2081 - }, - { - "epoch": 0.1564707650683902, - "grad_norm": 2.2450758775492132, - "learning_rate": 3.834692353049814e-06, - "loss": 1.0159, - "step": 2082 - }, - { - "epoch": 0.15654591913422516, - "grad_norm": 1.502998613210966, - "learning_rate": 3.834498491477403e-06, - "loss": 1.0599, - "step": 2083 - }, - { - "epoch": 0.15662107320006013, - "grad_norm": 1.7734973241991798, - "learning_rate": 3.834304521203934e-06, - "loss": 1.0526, - "step": 2084 - }, - { - "epoch": 0.15669622726589508, - "grad_norm": 1.7278243757071967, - "learning_rate": 3.834110442240896e-06, - "loss": 1.0318, - "step": 2085 - }, - { - "epoch": 0.15677138133173005, - "grad_norm": 2.316031877650797, - "learning_rate": 3.833916254599792e-06, - "loss": 0.9821, - "step": 2086 - }, - { - "epoch": 0.15684653539756502, - "grad_norm": 1.6640458083770768, - "learning_rate": 3.833721958292128e-06, - "loss": 1.0122, - "step": 2087 - }, - { - "epoch": 0.15692168946339996, - "grad_norm": 1.9531862783355862, - "learning_rate": 3.8335275533294155e-06, - "loss": 1.0242, - "step": 2088 - }, - { - "epoch": 0.15699684352923493, - "grad_norm": 1.8968492936681618, - "learning_rate": 3.833333039723174e-06, - "loss": 0.9728, - "step": 2089 - }, - { - "epoch": 0.1570719975950699, - "grad_norm": 5.856530885260469, - "learning_rate": 3.83313841748493e-06, - "loss": 1.0873, - "step": 2090 - }, - { - "epoch": 0.15714715166090484, - "grad_norm": 1.5832172234107726, - "learning_rate": 3.832943686626215e-06, - "loss": 1.0926, - "step": 2091 - }, - { - "epoch": 0.1572223057267398, - "grad_norm": 1.8587004135074732, - "learning_rate": 3.832748847158568e-06, - "loss": 0.9988, - "step": 2092 - }, - { - "epoch": 0.15729745979257478, - "grad_norm": 2.116654783000277, - "learning_rate": 3.8325538990935346e-06, - "loss": 1.0068, - "step": 2093 - }, - { - "epoch": 0.15737261385840973, - "grad_norm": 17.221870818022538, - "learning_rate": 3.832358842442665e-06, - "loss": 1.0813, - "step": 2094 - }, - { - "epoch": 0.1574477679242447, - "grad_norm": 1.901915265025954, - "learning_rate": 3.832163677217516e-06, - "loss": 1.0119, - "step": 2095 - }, - { - "epoch": 0.15752292199007967, - "grad_norm": 1.6448527042336885, - "learning_rate": 3.831968403429655e-06, - "loss": 0.9607, - "step": 2096 - }, - { - "epoch": 0.15759807605591464, - "grad_norm": 1.9000214926859267, - "learning_rate": 3.83177302109065e-06, - "loss": 1.0073, - "step": 2097 - }, - { - "epoch": 0.15767323012174958, - "grad_norm": 2.451546325046724, - "learning_rate": 3.8315775302120796e-06, - "loss": 1.0395, - "step": 2098 - }, - { - "epoch": 0.15774838418758455, - "grad_norm": 1.896729756911077, - "learning_rate": 3.831381930805526e-06, - "loss": 1.0409, - "step": 2099 - }, - { - "epoch": 0.15782353825341952, - "grad_norm": 1.5579909303765995, - "learning_rate": 3.831186222882582e-06, - "loss": 1.0204, - "step": 2100 - }, - { - "epoch": 0.15789869231925446, - "grad_norm": 1.6730974665796285, - "learning_rate": 3.830990406454841e-06, - "loss": 0.9779, - "step": 2101 - }, - { - "epoch": 0.15797384638508943, - "grad_norm": 1.7473834776879755, - "learning_rate": 3.8307944815339065e-06, - "loss": 1.0068, - "step": 2102 - }, - { - "epoch": 0.1580490004509244, - "grad_norm": 2.002704936957554, - "learning_rate": 3.83059844813139e-06, - "loss": 1.051, - "step": 2103 - }, - { - "epoch": 0.15812415451675935, - "grad_norm": 2.037320383029908, - "learning_rate": 3.830402306258904e-06, - "loss": 0.9827, - "step": 2104 - }, - { - "epoch": 0.15819930858259432, - "grad_norm": 2.216571221651101, - "learning_rate": 3.8302060559280735e-06, - "loss": 0.9806, - "step": 2105 - }, - { - "epoch": 0.1582744626484293, - "grad_norm": 1.7593233524418601, - "learning_rate": 3.830009697150526e-06, - "loss": 0.9412, - "step": 2106 - }, - { - "epoch": 0.15834961671426423, - "grad_norm": 1.589010334190148, - "learning_rate": 3.829813229937896e-06, - "loss": 0.935, - "step": 2107 - }, - { - "epoch": 0.1584247707800992, - "grad_norm": 1.5730950294401067, - "learning_rate": 3.829616654301824e-06, - "loss": 1.0501, - "step": 2108 - }, - { - "epoch": 0.15849992484593417, - "grad_norm": 1.3497672092211845, - "learning_rate": 3.829419970253961e-06, - "loss": 0.9542, - "step": 2109 - }, - { - "epoch": 0.1585750789117691, - "grad_norm": 1.940037757368464, - "learning_rate": 3.829223177805959e-06, - "loss": 1.0864, - "step": 2110 - }, - { - "epoch": 0.15865023297760408, - "grad_norm": 2.1446953382408585, - "learning_rate": 3.8290262769694785e-06, - "loss": 0.9563, - "step": 2111 - }, - { - "epoch": 0.15872538704343905, - "grad_norm": 1.5440381171210777, - "learning_rate": 3.828829267756188e-06, - "loss": 1.0597, - "step": 2112 - }, - { - "epoch": 0.15880054110927402, - "grad_norm": 1.6953506113857755, - "learning_rate": 3.82863215017776e-06, - "loss": 1.0296, - "step": 2113 - }, - { - "epoch": 0.15887569517510897, - "grad_norm": 2.6528700922891986, - "learning_rate": 3.828434924245874e-06, - "loss": 1.0052, - "step": 2114 - }, - { - "epoch": 0.15895084924094394, - "grad_norm": 1.4841194334373156, - "learning_rate": 3.828237589972218e-06, - "loss": 1.0046, - "step": 2115 - }, - { - "epoch": 0.1590260033067789, - "grad_norm": 1.963481635703091, - "learning_rate": 3.828040147368484e-06, - "loss": 1.0331, - "step": 2116 - }, - { - "epoch": 0.15910115737261385, - "grad_norm": 1.4809981635041733, - "learning_rate": 3.827842596446372e-06, - "loss": 1.0063, - "step": 2117 - }, - { - "epoch": 0.15917631143844882, - "grad_norm": 1.5565735384892991, - "learning_rate": 3.827644937217585e-06, - "loss": 0.9939, - "step": 2118 - }, - { - "epoch": 0.1592514655042838, - "grad_norm": 1.609985235869198, - "learning_rate": 3.827447169693839e-06, - "loss": 1.0958, - "step": 2119 - }, - { - "epoch": 0.15932661957011873, - "grad_norm": 1.9600952284898137, - "learning_rate": 3.827249293886849e-06, - "loss": 1.0717, - "step": 2120 - }, - { - "epoch": 0.1594017736359537, - "grad_norm": 2.6157838615931723, - "learning_rate": 3.827051309808342e-06, - "loss": 1.0423, - "step": 2121 - }, - { - "epoch": 0.15947692770178867, - "grad_norm": 1.479538114231473, - "learning_rate": 3.826853217470048e-06, - "loss": 0.9915, - "step": 2122 - }, - { - "epoch": 0.15955208176762362, - "grad_norm": 1.2584640047717504, - "learning_rate": 3.8266550168837065e-06, - "loss": 1.0067, - "step": 2123 - }, - { - "epoch": 0.1596272358334586, - "grad_norm": 1.9486315498298254, - "learning_rate": 3.82645670806106e-06, - "loss": 0.953, - "step": 2124 - }, - { - "epoch": 0.15970238989929356, - "grad_norm": 2.0075911462978113, - "learning_rate": 3.826258291013859e-06, - "loss": 1.0684, - "step": 2125 - }, - { - "epoch": 0.15977754396512853, - "grad_norm": 1.5806643008183976, - "learning_rate": 3.826059765753861e-06, - "loss": 0.9858, - "step": 2126 - }, - { - "epoch": 0.15985269803096347, - "grad_norm": 3.8734765288189785, - "learning_rate": 3.82586113229283e-06, - "loss": 1.0621, - "step": 2127 - }, - { - "epoch": 0.15992785209679844, - "grad_norm": 2.0474822330286218, - "learning_rate": 3.825662390642535e-06, - "loss": 0.9453, - "step": 2128 - }, - { - "epoch": 0.1600030061626334, - "grad_norm": 1.5536434942985147, - "learning_rate": 3.825463540814753e-06, - "loss": 0.9946, - "step": 2129 - }, - { - "epoch": 0.16007816022846835, - "grad_norm": 1.6958687568482713, - "learning_rate": 3.8252645828212655e-06, - "loss": 1.0112, - "step": 2130 - }, - { - "epoch": 0.16015331429430332, - "grad_norm": 2.1004052361548937, - "learning_rate": 3.825065516673862e-06, - "loss": 0.994, - "step": 2131 - }, - { - "epoch": 0.1602284683601383, - "grad_norm": 2.013198338060699, - "learning_rate": 3.824866342384338e-06, - "loss": 1.0801, - "step": 2132 - }, - { - "epoch": 0.16030362242597324, - "grad_norm": 1.7720551333622945, - "learning_rate": 3.824667059964496e-06, - "loss": 0.952, - "step": 2133 - }, - { - "epoch": 0.1603787764918082, - "grad_norm": 1.7600119161202443, - "learning_rate": 3.824467669426143e-06, - "loss": 0.9996, - "step": 2134 - }, - { - "epoch": 0.16045393055764318, - "grad_norm": 1.5624765012881432, - "learning_rate": 3.824268170781094e-06, - "loss": 0.9394, - "step": 2135 - }, - { - "epoch": 0.16052908462347812, - "grad_norm": 0.939930086934684, - "learning_rate": 3.82406856404117e-06, - "loss": 0.8952, - "step": 2136 - }, - { - "epoch": 0.1606042386893131, - "grad_norm": 2.2573337773395665, - "learning_rate": 3.8238688492182e-06, - "loss": 1.0898, - "step": 2137 - }, - { - "epoch": 0.16067939275514806, - "grad_norm": 1.9599625188785972, - "learning_rate": 3.823669026324016e-06, - "loss": 1.029, - "step": 2138 - }, - { - "epoch": 0.160754546820983, - "grad_norm": 2.3581939166301393, - "learning_rate": 3.823469095370459e-06, - "loss": 1.0851, - "step": 2139 - }, - { - "epoch": 0.16082970088681797, - "grad_norm": 1.4797578982209936, - "learning_rate": 3.823269056369376e-06, - "loss": 1.0205, - "step": 2140 - }, - { - "epoch": 0.16090485495265294, - "grad_norm": 1.8789814321342555, - "learning_rate": 3.8230689093326185e-06, - "loss": 1.1048, - "step": 2141 - }, - { - "epoch": 0.1609800090184879, - "grad_norm": 1.6558800050134725, - "learning_rate": 3.822868654272048e-06, - "loss": 0.9507, - "step": 2142 - }, - { - "epoch": 0.16105516308432286, - "grad_norm": 1.9245696986424943, - "learning_rate": 3.822668291199529e-06, - "loss": 1.1099, - "step": 2143 - }, - { - "epoch": 0.16113031715015783, - "grad_norm": 1.7636168546321582, - "learning_rate": 3.822467820126935e-06, - "loss": 0.9498, - "step": 2144 - }, - { - "epoch": 0.1612054712159928, - "grad_norm": 1.77083830739239, - "learning_rate": 3.822267241066143e-06, - "loss": 0.9647, - "step": 2145 - }, - { - "epoch": 0.16128062528182774, - "grad_norm": 2.493049783325125, - "learning_rate": 3.8220665540290395e-06, - "loss": 1.0404, - "step": 2146 - }, - { - "epoch": 0.1613557793476627, - "grad_norm": 1.4506049505620802, - "learning_rate": 3.821865759027515e-06, - "loss": 0.9878, - "step": 2147 - }, - { - "epoch": 0.16143093341349768, - "grad_norm": 1.9661056682871472, - "learning_rate": 3.821664856073469e-06, - "loss": 0.9816, - "step": 2148 - }, - { - "epoch": 0.16150608747933262, - "grad_norm": 2.4373387014377226, - "learning_rate": 3.821463845178803e-06, - "loss": 1.0477, - "step": 2149 - }, - { - "epoch": 0.1615812415451676, - "grad_norm": 2.318673425114357, - "learning_rate": 3.821262726355431e-06, - "loss": 1.0553, - "step": 2150 - }, - { - "epoch": 0.16165639561100256, - "grad_norm": 2.7093642009093104, - "learning_rate": 3.821061499615268e-06, - "loss": 0.961, - "step": 2151 - }, - { - "epoch": 0.1617315496768375, - "grad_norm": 0.831654494686408, - "learning_rate": 3.820860164970237e-06, - "loss": 0.8318, - "step": 2152 - }, - { - "epoch": 0.16180670374267248, - "grad_norm": 2.462011968594425, - "learning_rate": 3.820658722432269e-06, - "loss": 1.0421, - "step": 2153 - }, - { - "epoch": 0.16188185780850745, - "grad_norm": 2.150441514746101, - "learning_rate": 3.820457172013301e-06, - "loss": 1.0431, - "step": 2154 - }, - { - "epoch": 0.1619570118743424, - "grad_norm": 1.5982194201728686, - "learning_rate": 3.820255513725274e-06, - "loss": 1.056, - "step": 2155 - }, - { - "epoch": 0.16203216594017736, - "grad_norm": 1.8022496655681213, - "learning_rate": 3.820053747580137e-06, - "loss": 1.0616, - "step": 2156 - }, - { - "epoch": 0.16210732000601233, - "grad_norm": 2.085771075406864, - "learning_rate": 3.8198518735898465e-06, - "loss": 1.0286, - "step": 2157 - }, - { - "epoch": 0.1621824740718473, - "grad_norm": 1.4878107563754535, - "learning_rate": 3.819649891766364e-06, - "loss": 0.969, - "step": 2158 - }, - { - "epoch": 0.16225762813768224, - "grad_norm": 2.9099827247447685, - "learning_rate": 3.8194478021216566e-06, - "loss": 0.9921, - "step": 2159 - }, - { - "epoch": 0.1623327822035172, - "grad_norm": 1.7010835784526719, - "learning_rate": 3.8192456046677004e-06, - "loss": 1.1711, - "step": 2160 - }, - { - "epoch": 0.16240793626935218, - "grad_norm": 1.784474582866271, - "learning_rate": 3.819043299416476e-06, - "loss": 1.0696, - "step": 2161 - }, - { - "epoch": 0.16248309033518712, - "grad_norm": 1.649882401986248, - "learning_rate": 3.8188408863799706e-06, - "loss": 1.0046, - "step": 2162 - }, - { - "epoch": 0.1625582444010221, - "grad_norm": 2.182610414908396, - "learning_rate": 3.818638365570177e-06, - "loss": 0.9474, - "step": 2163 - }, - { - "epoch": 0.16263339846685707, - "grad_norm": 1.4594815593102068, - "learning_rate": 3.818435736999097e-06, - "loss": 1.0529, - "step": 2164 - }, - { - "epoch": 0.162708552532692, - "grad_norm": 3.290642075888298, - "learning_rate": 3.818233000678736e-06, - "loss": 0.982, - "step": 2165 - }, - { - "epoch": 0.16278370659852698, - "grad_norm": 1.9575937037737339, - "learning_rate": 3.8180301566211075e-06, - "loss": 1.0159, - "step": 2166 - }, - { - "epoch": 0.16285886066436195, - "grad_norm": 2.0971884889675647, - "learning_rate": 3.81782720483823e-06, - "loss": 1.0991, - "step": 2167 - }, - { - "epoch": 0.1629340147301969, - "grad_norm": 2.0918330056768193, - "learning_rate": 3.8176241453421305e-06, - "loss": 1.0614, - "step": 2168 - }, - { - "epoch": 0.16300916879603186, - "grad_norm": 1.6351695592842546, - "learning_rate": 3.81742097814484e-06, - "loss": 0.9711, - "step": 2169 - }, - { - "epoch": 0.16308432286186683, - "grad_norm": 0.8376709621214063, - "learning_rate": 3.817217703258397e-06, - "loss": 0.908, - "step": 2170 - }, - { - "epoch": 0.1631594769277018, - "grad_norm": 0.7442835472806005, - "learning_rate": 3.817014320694846e-06, - "loss": 0.8323, - "step": 2171 - }, - { - "epoch": 0.16323463099353674, - "grad_norm": 2.6777579698142184, - "learning_rate": 3.816810830466239e-06, - "loss": 1.0588, - "step": 2172 - }, - { - "epoch": 0.16330978505937171, - "grad_norm": 1.8553739824688682, - "learning_rate": 3.816607232584633e-06, - "loss": 0.9537, - "step": 2173 - }, - { - "epoch": 0.16338493912520669, - "grad_norm": 1.3785283594794484, - "learning_rate": 3.816403527062093e-06, - "loss": 1.0101, - "step": 2174 - }, - { - "epoch": 0.16346009319104163, - "grad_norm": 2.465686780883192, - "learning_rate": 3.816199713910688e-06, - "loss": 1.0816, - "step": 2175 - }, - { - "epoch": 0.1635352472568766, - "grad_norm": 1.3630063892027235, - "learning_rate": 3.815995793142495e-06, - "loss": 1.0037, - "step": 2176 - }, - { - "epoch": 0.16361040132271157, - "grad_norm": 2.5027082079571374, - "learning_rate": 3.815791764769598e-06, - "loss": 0.9357, - "step": 2177 - }, - { - "epoch": 0.1636855553885465, - "grad_norm": 0.8117540309607374, - "learning_rate": 3.815587628804086e-06, - "loss": 0.8951, - "step": 2178 - }, - { - "epoch": 0.16376070945438148, - "grad_norm": 2.110453351208155, - "learning_rate": 3.815383385258054e-06, - "loss": 1.0153, - "step": 2179 - }, - { - "epoch": 0.16383586352021645, - "grad_norm": 1.9356898804464882, - "learning_rate": 3.8151790341436046e-06, - "loss": 1.0062, - "step": 2180 - }, - { - "epoch": 0.1639110175860514, - "grad_norm": 1.643207805835269, - "learning_rate": 3.814974575472847e-06, - "loss": 1.0608, - "step": 2181 - }, - { - "epoch": 0.16398617165188636, - "grad_norm": 2.2906764463135243, - "learning_rate": 3.814770009257896e-06, - "loss": 1.0105, - "step": 2182 - }, - { - "epoch": 0.16406132571772133, - "grad_norm": 1.6589192887183914, - "learning_rate": 3.814565335510873e-06, - "loss": 1.0541, - "step": 2183 - }, - { - "epoch": 0.16413647978355628, - "grad_norm": 0.6651778335420899, - "learning_rate": 3.814360554243905e-06, - "loss": 0.8298, - "step": 2184 - }, - { - "epoch": 0.16421163384939125, - "grad_norm": 2.5240698810340536, - "learning_rate": 3.814155665469126e-06, - "loss": 0.9389, - "step": 2185 - }, - { - "epoch": 0.16428678791522622, - "grad_norm": 2.922424591109693, - "learning_rate": 3.813950669198678e-06, - "loss": 1.1085, - "step": 2186 - }, - { - "epoch": 0.1643619419810612, - "grad_norm": 0.7659088698428761, - "learning_rate": 3.8137455654447063e-06, - "loss": 0.8553, - "step": 2187 - }, - { - "epoch": 0.16443709604689613, - "grad_norm": 3.2478378146038485, - "learning_rate": 3.8135403542193646e-06, - "loss": 0.9768, - "step": 2188 - }, - { - "epoch": 0.1645122501127311, - "grad_norm": 5.2650669605891345, - "learning_rate": 3.8133350355348125e-06, - "loss": 1.0487, - "step": 2189 - }, - { - "epoch": 0.16458740417856607, - "grad_norm": 1.6729709206315233, - "learning_rate": 3.8131296094032158e-06, - "loss": 0.9556, - "step": 2190 - }, - { - "epoch": 0.164662558244401, - "grad_norm": 1.9841691758952908, - "learning_rate": 3.8129240758367463e-06, - "loss": 1.0244, - "step": 2191 - }, - { - "epoch": 0.16473771231023598, - "grad_norm": 2.1188704779925898, - "learning_rate": 3.8127184348475836e-06, - "loss": 1.0164, - "step": 2192 - }, - { - "epoch": 0.16481286637607095, - "grad_norm": 1.6424116306541598, - "learning_rate": 3.8125126864479123e-06, - "loss": 1.0878, - "step": 2193 - }, - { - "epoch": 0.1648880204419059, - "grad_norm": 1.473636857236414, - "learning_rate": 3.8123068306499236e-06, - "loss": 0.9779, - "step": 2194 - }, - { - "epoch": 0.16496317450774087, - "grad_norm": 2.163943841759038, - "learning_rate": 3.8121008674658154e-06, - "loss": 1.0447, - "step": 2195 - }, - { - "epoch": 0.16503832857357584, - "grad_norm": 1.8976024784967727, - "learning_rate": 3.8118947969077915e-06, - "loss": 1.0475, - "step": 2196 - }, - { - "epoch": 0.16511348263941078, - "grad_norm": 2.0271028879077804, - "learning_rate": 3.8116886189880634e-06, - "loss": 1.0345, - "step": 2197 - }, - { - "epoch": 0.16518863670524575, - "grad_norm": 1.708430713886399, - "learning_rate": 3.811482333718847e-06, - "loss": 0.9272, - "step": 2198 - }, - { - "epoch": 0.16526379077108072, - "grad_norm": 1.8414442871722727, - "learning_rate": 3.811275941112366e-06, - "loss": 1.0424, - "step": 2199 - }, - { - "epoch": 0.16533894483691566, - "grad_norm": 1.8293997078110982, - "learning_rate": 3.811069441180849e-06, - "loss": 1.0723, - "step": 2200 - }, - { - "epoch": 0.16541409890275063, - "grad_norm": 2.6714009539267267, - "learning_rate": 3.810862833936532e-06, - "loss": 1.0288, - "step": 2201 - }, - { - "epoch": 0.1654892529685856, - "grad_norm": 1.906386698839959, - "learning_rate": 3.8106561193916587e-06, - "loss": 0.9597, - "step": 2202 - }, - { - "epoch": 0.16556440703442057, - "grad_norm": 1.9552790246079088, - "learning_rate": 3.810449297558477e-06, - "loss": 0.9429, - "step": 2203 - }, - { - "epoch": 0.16563956110025552, - "grad_norm": 2.305456001042245, - "learning_rate": 3.810242368449241e-06, - "loss": 0.9245, - "step": 2204 - }, - { - "epoch": 0.1657147151660905, - "grad_norm": 2.3038922505747315, - "learning_rate": 3.810035332076214e-06, - "loss": 1.0082, - "step": 2205 - }, - { - "epoch": 0.16578986923192546, - "grad_norm": 1.0469996250046256, - "learning_rate": 3.809828188451662e-06, - "loss": 0.8797, - "step": 2206 - }, - { - "epoch": 0.1658650232977604, - "grad_norm": 1.8000635798139688, - "learning_rate": 3.809620937587859e-06, - "loss": 1.0281, - "step": 2207 - }, - { - "epoch": 0.16594017736359537, - "grad_norm": 2.2224913738757017, - "learning_rate": 3.8094135794970857e-06, - "loss": 1.016, - "step": 2208 - }, - { - "epoch": 0.16601533142943034, - "grad_norm": 3.1702595623140777, - "learning_rate": 3.80920611419163e-06, - "loss": 1.0483, - "step": 2209 - }, - { - "epoch": 0.16609048549526528, - "grad_norm": 1.7349180453949578, - "learning_rate": 3.808998541683784e-06, - "loss": 1.0471, - "step": 2210 - }, - { - "epoch": 0.16616563956110025, - "grad_norm": 1.8499889528099347, - "learning_rate": 3.8087908619858473e-06, - "loss": 1.0188, - "step": 2211 - }, - { - "epoch": 0.16624079362693522, - "grad_norm": 2.3690451204150285, - "learning_rate": 3.8085830751101253e-06, - "loss": 1.0369, - "step": 2212 - }, - { - "epoch": 0.16631594769277017, - "grad_norm": 2.5481461237353944, - "learning_rate": 3.8083751810689306e-06, - "loss": 0.9913, - "step": 2213 - }, - { - "epoch": 0.16639110175860514, - "grad_norm": 1.400117046708577, - "learning_rate": 3.8081671798745817e-06, - "loss": 1.0941, - "step": 2214 - }, - { - "epoch": 0.1664662558244401, - "grad_norm": 1.8933063125808953, - "learning_rate": 3.807959071539404e-06, - "loss": 1.0638, - "step": 2215 - }, - { - "epoch": 0.16654140989027508, - "grad_norm": 5.542792222677962, - "learning_rate": 3.8077508560757275e-06, - "loss": 1.1338, - "step": 2216 - }, - { - "epoch": 0.16661656395611002, - "grad_norm": 0.7731803697820506, - "learning_rate": 3.8075425334958908e-06, - "loss": 0.9155, - "step": 2217 - }, - { - "epoch": 0.166691718021945, - "grad_norm": 1.8495220417484102, - "learning_rate": 3.8073341038122374e-06, - "loss": 0.9659, - "step": 2218 - }, - { - "epoch": 0.16676687208777996, - "grad_norm": 1.687437197611567, - "learning_rate": 3.8071255670371174e-06, - "loss": 0.962, - "step": 2219 - }, - { - "epoch": 0.1668420261536149, - "grad_norm": 2.2958706067539048, - "learning_rate": 3.8069169231828875e-06, - "loss": 1.0255, - "step": 2220 - }, - { - "epoch": 0.16691718021944987, - "grad_norm": 2.2965012849670843, - "learning_rate": 3.8067081722619114e-06, - "loss": 1.0176, - "step": 2221 - }, - { - "epoch": 0.16699233428528484, - "grad_norm": 2.775616544673267, - "learning_rate": 3.8064993142865573e-06, - "loss": 0.9847, - "step": 2222 - }, - { - "epoch": 0.1670674883511198, - "grad_norm": 0.7455556237048128, - "learning_rate": 3.8062903492692014e-06, - "loss": 0.8322, - "step": 2223 - }, - { - "epoch": 0.16714264241695476, - "grad_norm": 1.7443644517766201, - "learning_rate": 3.8060812772222255e-06, - "loss": 0.9398, - "step": 2224 - }, - { - "epoch": 0.16721779648278973, - "grad_norm": 2.42366788397468, - "learning_rate": 3.805872098158018e-06, - "loss": 1.0999, - "step": 2225 - }, - { - "epoch": 0.16729295054862467, - "grad_norm": 2.2092379990204583, - "learning_rate": 3.8056628120889736e-06, - "loss": 1.0651, - "step": 2226 - }, - { - "epoch": 0.16736810461445964, - "grad_norm": 2.0591704340794412, - "learning_rate": 3.805453419027493e-06, - "loss": 1.0412, - "step": 2227 - }, - { - "epoch": 0.1674432586802946, - "grad_norm": 1.4877559505773765, - "learning_rate": 3.805243918985984e-06, - "loss": 1.053, - "step": 2228 - }, - { - "epoch": 0.16751841274612955, - "grad_norm": 2.0280896764186003, - "learning_rate": 3.80503431197686e-06, - "loss": 1.0409, - "step": 2229 - }, - { - "epoch": 0.16759356681196452, - "grad_norm": 2.284085640799849, - "learning_rate": 3.804824598012541e-06, - "loss": 1.0023, - "step": 2230 - }, - { - "epoch": 0.1676687208777995, - "grad_norm": 2.2569387256517257, - "learning_rate": 3.8046147771054536e-06, - "loss": 1.1093, - "step": 2231 - }, - { - "epoch": 0.16774387494363446, - "grad_norm": 3.867006363142612, - "learning_rate": 3.8044048492680297e-06, - "loss": 0.9533, - "step": 2232 - }, - { - "epoch": 0.1678190290094694, - "grad_norm": 2.522841439253031, - "learning_rate": 3.80419481451271e-06, - "loss": 1.1033, - "step": 2233 - }, - { - "epoch": 0.16789418307530438, - "grad_norm": 4.500070359421739, - "learning_rate": 3.8039846728519383e-06, - "loss": 0.9468, - "step": 2234 - }, - { - "epoch": 0.16796933714113935, - "grad_norm": 2.564571380793972, - "learning_rate": 3.803774424298167e-06, - "loss": 1.0155, - "step": 2235 - }, - { - "epoch": 0.1680444912069743, - "grad_norm": 0.7900172585400895, - "learning_rate": 3.8035640688638537e-06, - "loss": 0.8401, - "step": 2236 - }, - { - "epoch": 0.16811964527280926, - "grad_norm": 1.5280702233979622, - "learning_rate": 3.8033536065614625e-06, - "loss": 0.996, - "step": 2237 - }, - { - "epoch": 0.16819479933864423, - "grad_norm": 1.3967297318467764, - "learning_rate": 3.8031430374034653e-06, - "loss": 1.0529, - "step": 2238 - }, - { - "epoch": 0.16826995340447917, - "grad_norm": 1.9368640101940522, - "learning_rate": 3.802932361402338e-06, - "loss": 1.1498, - "step": 2239 - }, - { - "epoch": 0.16834510747031414, - "grad_norm": 2.4073842335533864, - "learning_rate": 3.8027215785705654e-06, - "loss": 0.8969, - "step": 2240 - }, - { - "epoch": 0.1684202615361491, - "grad_norm": 1.9019413391236994, - "learning_rate": 3.8025106889206353e-06, - "loss": 1.008, - "step": 2241 - }, - { - "epoch": 0.16849541560198406, - "grad_norm": 0.7007728652707655, - "learning_rate": 3.802299692465045e-06, - "loss": 0.8613, - "step": 2242 - }, - { - "epoch": 0.16857056966781903, - "grad_norm": 3.6374062647327143, - "learning_rate": 3.802088589216296e-06, - "loss": 0.9349, - "step": 2243 - }, - { - "epoch": 0.168645723733654, - "grad_norm": 1.9370491980044033, - "learning_rate": 3.801877379186898e-06, - "loss": 0.9529, - "step": 2244 - }, - { - "epoch": 0.16872087779948894, - "grad_norm": 3.2729300968340596, - "learning_rate": 3.8016660623893653e-06, - "loss": 1.0522, - "step": 2245 - }, - { - "epoch": 0.1687960318653239, - "grad_norm": 2.154066666643747, - "learning_rate": 3.801454638836219e-06, - "loss": 0.9551, - "step": 2246 - }, - { - "epoch": 0.16887118593115888, - "grad_norm": 1.4461071407413255, - "learning_rate": 3.801243108539987e-06, - "loss": 0.9486, - "step": 2247 - }, - { - "epoch": 0.16894633999699385, - "grad_norm": 3.202713656181078, - "learning_rate": 3.8010314715132037e-06, - "loss": 0.9991, - "step": 2248 - }, - { - "epoch": 0.1690214940628288, - "grad_norm": 1.9800795674953748, - "learning_rate": 3.8008197277684094e-06, - "loss": 1.0273, - "step": 2249 - }, - { - "epoch": 0.16909664812866376, - "grad_norm": 1.7657708301916926, - "learning_rate": 3.80060787731815e-06, - "loss": 1.0426, - "step": 2250 - }, - { - "epoch": 0.16917180219449873, - "grad_norm": 2.9355299816546516, - "learning_rate": 3.8003959201749793e-06, - "loss": 1.0156, - "step": 2251 - }, - { - "epoch": 0.16924695626033368, - "grad_norm": 1.53036637014897, - "learning_rate": 3.800183856351456e-06, - "loss": 1.0752, - "step": 2252 - }, - { - "epoch": 0.16932211032616865, - "grad_norm": 2.0065153808004244, - "learning_rate": 3.7999716858601456e-06, - "loss": 1.0166, - "step": 2253 - }, - { - "epoch": 0.16939726439200362, - "grad_norm": 1.6841090004681298, - "learning_rate": 3.79975940871362e-06, - "loss": 0.9231, - "step": 2254 - }, - { - "epoch": 0.16947241845783856, - "grad_norm": 1.6522970711537874, - "learning_rate": 3.7995470249244582e-06, - "loss": 1.0348, - "step": 2255 - }, - { - "epoch": 0.16954757252367353, - "grad_norm": 2.428277787319675, - "learning_rate": 3.7993345345052445e-06, - "loss": 1.06, - "step": 2256 - }, - { - "epoch": 0.1696227265895085, - "grad_norm": 2.20577821523154, - "learning_rate": 3.799121937468569e-06, - "loss": 1.0591, - "step": 2257 - }, - { - "epoch": 0.16969788065534344, - "grad_norm": 1.9126257038567658, - "learning_rate": 3.7989092338270295e-06, - "loss": 0.9405, - "step": 2258 - }, - { - "epoch": 0.1697730347211784, - "grad_norm": 2.4617611430157984, - "learning_rate": 3.7986964235932293e-06, - "loss": 1.0591, - "step": 2259 - }, - { - "epoch": 0.16984818878701338, - "grad_norm": 1.6345252586133143, - "learning_rate": 3.7984835067797788e-06, - "loss": 1.0045, - "step": 2260 - }, - { - "epoch": 0.16992334285284835, - "grad_norm": 3.604104293254127, - "learning_rate": 3.7982704833992933e-06, - "loss": 1.0394, - "step": 2261 - }, - { - "epoch": 0.1699984969186833, - "grad_norm": 1.7589726667291947, - "learning_rate": 3.7980573534643954e-06, - "loss": 1.063, - "step": 2262 - }, - { - "epoch": 0.17007365098451827, - "grad_norm": 2.2411726406041717, - "learning_rate": 3.7978441169877143e-06, - "loss": 0.9028, - "step": 2263 - }, - { - "epoch": 0.17014880505035324, - "grad_norm": 2.145672381639339, - "learning_rate": 3.7976307739818852e-06, - "loss": 0.971, - "step": 2264 - }, - { - "epoch": 0.17022395911618818, - "grad_norm": 1.5382839499275998, - "learning_rate": 3.7974173244595493e-06, - "loss": 1.0733, - "step": 2265 - }, - { - "epoch": 0.17029911318202315, - "grad_norm": 1.4688719029261232, - "learning_rate": 3.7972037684333534e-06, - "loss": 1.0226, - "step": 2266 - }, - { - "epoch": 0.17037426724785812, - "grad_norm": 1.637522789017607, - "learning_rate": 3.7969901059159524e-06, - "loss": 0.9127, - "step": 2267 - }, - { - "epoch": 0.17044942131369306, - "grad_norm": 2.7689889395710923, - "learning_rate": 3.796776336920007e-06, - "loss": 0.9134, - "step": 2268 - }, - { - "epoch": 0.17052457537952803, - "grad_norm": 2.020702502023633, - "learning_rate": 3.796562461458183e-06, - "loss": 1.0197, - "step": 2269 - }, - { - "epoch": 0.170599729445363, - "grad_norm": 2.7178929172780224, - "learning_rate": 3.7963484795431537e-06, - "loss": 0.9867, - "step": 2270 - }, - { - "epoch": 0.17067488351119794, - "grad_norm": 0.6983618787069192, - "learning_rate": 3.796134391187598e-06, - "loss": 0.8218, - "step": 2271 - }, - { - "epoch": 0.17075003757703291, - "grad_norm": 1.8907707095153827, - "learning_rate": 3.7959201964042024e-06, - "loss": 0.8847, - "step": 2272 - }, - { - "epoch": 0.17082519164286789, - "grad_norm": 1.4997602907016274, - "learning_rate": 3.7957058952056577e-06, - "loss": 0.9423, - "step": 2273 - }, - { - "epoch": 0.17090034570870283, - "grad_norm": 2.2254818083911583, - "learning_rate": 3.7954914876046626e-06, - "loss": 0.977, - "step": 2274 - }, - { - "epoch": 0.1709754997745378, - "grad_norm": 2.1722610459865086, - "learning_rate": 3.795276973613921e-06, - "loss": 0.8297, - "step": 2275 - }, - { - "epoch": 0.17105065384037277, - "grad_norm": 2.1239647306901253, - "learning_rate": 3.795062353246145e-06, - "loss": 1.016, - "step": 2276 - }, - { - "epoch": 0.17112580790620774, - "grad_norm": 2.0219638716491435, - "learning_rate": 3.79484762651405e-06, - "loss": 0.9945, - "step": 2277 - }, - { - "epoch": 0.17120096197204268, - "grad_norm": 2.0321775226092913, - "learning_rate": 3.7946327934303612e-06, - "loss": 1.0234, - "step": 2278 - }, - { - "epoch": 0.17127611603787765, - "grad_norm": 2.3400815610446806, - "learning_rate": 3.7944178540078065e-06, - "loss": 1.0693, - "step": 2279 - }, - { - "epoch": 0.17135127010371262, - "grad_norm": 3.9472351828056693, - "learning_rate": 3.7942028082591227e-06, - "loss": 0.971, - "step": 2280 - }, - { - "epoch": 0.17142642416954756, - "grad_norm": 2.666275671268857, - "learning_rate": 3.7939876561970526e-06, - "loss": 1.1121, - "step": 2281 - }, - { - "epoch": 0.17150157823538253, - "grad_norm": 3.765960425162373, - "learning_rate": 3.7937723978343437e-06, - "loss": 1.0141, - "step": 2282 - }, - { - "epoch": 0.1715767323012175, - "grad_norm": 1.5154152951231274, - "learning_rate": 3.7935570331837514e-06, - "loss": 1.0088, - "step": 2283 - }, - { - "epoch": 0.17165188636705245, - "grad_norm": 3.7542601229049875, - "learning_rate": 3.793341562258037e-06, - "loss": 1.0764, - "step": 2284 - }, - { - "epoch": 0.17172704043288742, - "grad_norm": 1.9108122340868967, - "learning_rate": 3.7931259850699678e-06, - "loss": 1.0432, - "step": 2285 - }, - { - "epoch": 0.1718021944987224, - "grad_norm": 0.8357347973494355, - "learning_rate": 3.7929103016323183e-06, - "loss": 0.8587, - "step": 2286 - }, - { - "epoch": 0.17187734856455733, - "grad_norm": 2.3775839552311004, - "learning_rate": 3.792694511957867e-06, - "loss": 1.0451, - "step": 2287 - }, - { - "epoch": 0.1719525026303923, - "grad_norm": 1.6090351227316582, - "learning_rate": 3.7924786160594016e-06, - "loss": 1.1035, - "step": 2288 - }, - { - "epoch": 0.17202765669622727, - "grad_norm": 3.541960012217729, - "learning_rate": 3.792262613949714e-06, - "loss": 1.048, - "step": 2289 - }, - { - "epoch": 0.1721028107620622, - "grad_norm": 1.8019603915164157, - "learning_rate": 3.792046505641604e-06, - "loss": 1.0283, - "step": 2290 - }, - { - "epoch": 0.17217796482789718, - "grad_norm": 3.8132154700153205, - "learning_rate": 3.7918302911478764e-06, - "loss": 1.03, - "step": 2291 - }, - { - "epoch": 0.17225311889373215, - "grad_norm": 2.240888265681446, - "learning_rate": 3.791613970481342e-06, - "loss": 0.9911, - "step": 2292 - }, - { - "epoch": 0.17232827295956712, - "grad_norm": 2.0561032367495846, - "learning_rate": 3.7913975436548195e-06, - "loss": 0.94, - "step": 2293 - }, - { - "epoch": 0.17240342702540207, - "grad_norm": 1.8684258604093869, - "learning_rate": 3.7911810106811332e-06, - "loss": 1.0367, - "step": 2294 - }, - { - "epoch": 0.17247858109123704, - "grad_norm": 2.6741490195898563, - "learning_rate": 3.7909643715731133e-06, - "loss": 1.0326, - "step": 2295 - }, - { - "epoch": 0.172553735157072, - "grad_norm": 1.86474916228636, - "learning_rate": 3.790747626343596e-06, - "loss": 1.0495, - "step": 2296 - }, - { - "epoch": 0.17262888922290695, - "grad_norm": 1.7680974882270366, - "learning_rate": 3.7905307750054247e-06, - "loss": 1.0788, - "step": 2297 - }, - { - "epoch": 0.17270404328874192, - "grad_norm": 2.0216849849507326, - "learning_rate": 3.790313817571448e-06, - "loss": 1.0792, - "step": 2298 - }, - { - "epoch": 0.1727791973545769, - "grad_norm": 1.8162841755853993, - "learning_rate": 3.790096754054523e-06, - "loss": 0.9524, - "step": 2299 - }, - { - "epoch": 0.17285435142041183, - "grad_norm": 2.042586281680441, - "learning_rate": 3.7898795844675096e-06, - "loss": 1.0247, - "step": 2300 - }, - { - "epoch": 0.1729295054862468, - "grad_norm": 0.8731150760509905, - "learning_rate": 3.789662308823278e-06, - "loss": 0.8794, - "step": 2301 - }, - { - "epoch": 0.17300465955208177, - "grad_norm": 1.9168757725413395, - "learning_rate": 3.7894449271347e-06, - "loss": 1.016, - "step": 2302 - }, - { - "epoch": 0.17307981361791672, - "grad_norm": 4.298088207633783, - "learning_rate": 3.7892274394146592e-06, - "loss": 1.1583, - "step": 2303 - }, - { - "epoch": 0.1731549676837517, - "grad_norm": 1.5566545627022674, - "learning_rate": 3.789009845676041e-06, - "loss": 0.9946, - "step": 2304 - }, - { - "epoch": 0.17323012174958666, - "grad_norm": 2.9592378514059745, - "learning_rate": 3.7887921459317386e-06, - "loss": 1.0398, - "step": 2305 - }, - { - "epoch": 0.17330527581542163, - "grad_norm": 1.9826813927270104, - "learning_rate": 3.7885743401946517e-06, - "loss": 0.914, - "step": 2306 - }, - { - "epoch": 0.17338042988125657, - "grad_norm": 3.0213333417629276, - "learning_rate": 3.7883564284776863e-06, - "loss": 1.06, - "step": 2307 - }, - { - "epoch": 0.17345558394709154, - "grad_norm": 1.8295792884002964, - "learning_rate": 3.7881384107937546e-06, - "loss": 0.9986, - "step": 2308 - }, - { - "epoch": 0.1735307380129265, - "grad_norm": 1.924032783272621, - "learning_rate": 3.7879202871557742e-06, - "loss": 1.0202, - "step": 2309 - }, - { - "epoch": 0.17360589207876145, - "grad_norm": 0.7387442776737532, - "learning_rate": 3.7877020575766714e-06, - "loss": 0.8184, - "step": 2310 - }, - { - "epoch": 0.17368104614459642, - "grad_norm": 2.702109261777815, - "learning_rate": 3.7874837220693756e-06, - "loss": 0.9934, - "step": 2311 - }, - { - "epoch": 0.1737562002104314, - "grad_norm": 0.7589317393837853, - "learning_rate": 3.7872652806468244e-06, - "loss": 0.8606, - "step": 2312 - }, - { - "epoch": 0.17383135427626634, - "grad_norm": 2.48595699587981, - "learning_rate": 3.7870467333219614e-06, - "loss": 0.9871, - "step": 2313 - }, - { - "epoch": 0.1739065083421013, - "grad_norm": 8.297927358666563, - "learning_rate": 3.7868280801077368e-06, - "loss": 1.03, - "step": 2314 - }, - { - "epoch": 0.17398166240793628, - "grad_norm": 1.8433721122778577, - "learning_rate": 3.786609321017106e-06, - "loss": 1.0774, - "step": 2315 - }, - { - "epoch": 0.17405681647377122, - "grad_norm": 2.998573440881328, - "learning_rate": 3.7863904560630315e-06, - "loss": 1.0598, - "step": 2316 - }, - { - "epoch": 0.1741319705396062, - "grad_norm": 0.7137655245801188, - "learning_rate": 3.786171485258482e-06, - "loss": 0.8749, - "step": 2317 - }, - { - "epoch": 0.17420712460544116, - "grad_norm": 2.6144339124884715, - "learning_rate": 3.785952408616432e-06, - "loss": 1.0908, - "step": 2318 - }, - { - "epoch": 0.1742822786712761, - "grad_norm": 1.981795429038677, - "learning_rate": 3.7857332261498635e-06, - "loss": 1.0403, - "step": 2319 - }, - { - "epoch": 0.17435743273711107, - "grad_norm": 1.4851488204521393, - "learning_rate": 3.785513937871763e-06, - "loss": 1.0288, - "step": 2320 - }, - { - "epoch": 0.17443258680294604, - "grad_norm": 5.767611525787979, - "learning_rate": 3.785294543795125e-06, - "loss": 1.0715, - "step": 2321 - }, - { - "epoch": 0.174507740868781, - "grad_norm": 1.5851871613430435, - "learning_rate": 3.7850750439329477e-06, - "loss": 1.0035, - "step": 2322 - }, - { - "epoch": 0.17458289493461596, - "grad_norm": 1.8609971095354283, - "learning_rate": 3.7848554382982398e-06, - "loss": 1.0051, - "step": 2323 - }, - { - "epoch": 0.17465804900045093, - "grad_norm": 1.7416527444870147, - "learning_rate": 3.7846357269040115e-06, - "loss": 1.0313, - "step": 2324 - }, - { - "epoch": 0.1747332030662859, - "grad_norm": 1.5980100356587759, - "learning_rate": 3.784415909763283e-06, - "loss": 0.9463, - "step": 2325 - }, - { - "epoch": 0.17480835713212084, - "grad_norm": 2.3307874164370586, - "learning_rate": 3.784195986889079e-06, - "loss": 1.0721, - "step": 2326 - }, - { - "epoch": 0.1748835111979558, - "grad_norm": 3.0746672907771027, - "learning_rate": 3.7839759582944307e-06, - "loss": 1.093, - "step": 2327 - }, - { - "epoch": 0.17495866526379078, - "grad_norm": 3.7135803448917475, - "learning_rate": 3.783755823992376e-06, - "loss": 0.9232, - "step": 2328 - }, - { - "epoch": 0.17503381932962572, - "grad_norm": 1.5068463802902887, - "learning_rate": 3.783535583995957e-06, - "loss": 1.0516, - "step": 2329 - }, - { - "epoch": 0.1751089733954607, - "grad_norm": 2.0248674325759786, - "learning_rate": 3.783315238318226e-06, - "loss": 1.0397, - "step": 2330 - }, - { - "epoch": 0.17518412746129566, - "grad_norm": 2.1910304326947694, - "learning_rate": 3.7830947869722377e-06, - "loss": 0.9591, - "step": 2331 - }, - { - "epoch": 0.1752592815271306, - "grad_norm": 0.9929618216932424, - "learning_rate": 3.7828742299710558e-06, - "loss": 0.8567, - "step": 2332 - }, - { - "epoch": 0.17533443559296558, - "grad_norm": 2.864739727028143, - "learning_rate": 3.782653567327749e-06, - "loss": 1.1126, - "step": 2333 - }, - { - "epoch": 0.17540958965880055, - "grad_norm": 1.6009686369197733, - "learning_rate": 3.7824327990553914e-06, - "loss": 1.0276, - "step": 2334 - }, - { - "epoch": 0.1754847437246355, - "grad_norm": 2.4654858413317595, - "learning_rate": 3.7822119251670657e-06, - "loss": 0.9927, - "step": 2335 - }, - { - "epoch": 0.17555989779047046, - "grad_norm": 1.524716354006397, - "learning_rate": 3.7819909456758582e-06, - "loss": 1.0082, - "step": 2336 - }, - { - "epoch": 0.17563505185630543, - "grad_norm": 1.4823049466504865, - "learning_rate": 3.7817698605948643e-06, - "loss": 1.1028, - "step": 2337 - }, - { - "epoch": 0.1757102059221404, - "grad_norm": 1.727117388872299, - "learning_rate": 3.7815486699371826e-06, - "loss": 1.0651, - "step": 2338 - }, - { - "epoch": 0.17578535998797534, - "grad_norm": 1.573296136955184, - "learning_rate": 3.7813273737159205e-06, - "loss": 1.0215, - "step": 2339 - }, - { - "epoch": 0.1758605140538103, - "grad_norm": 1.909773400131506, - "learning_rate": 3.78110597194419e-06, - "loss": 1.0524, - "step": 2340 - }, - { - "epoch": 0.17593566811964528, - "grad_norm": 1.7909776190632516, - "learning_rate": 3.780884464635111e-06, - "loss": 0.9949, - "step": 2341 - }, - { - "epoch": 0.17601082218548023, - "grad_norm": 2.0170994769687964, - "learning_rate": 3.7806628518018074e-06, - "loss": 1.0253, - "step": 2342 - }, - { - "epoch": 0.1760859762513152, - "grad_norm": 2.2600698834982733, - "learning_rate": 3.7804411334574116e-06, - "loss": 0.9453, - "step": 2343 - }, - { - "epoch": 0.17616113031715017, - "grad_norm": 2.025707959409847, - "learning_rate": 3.7802193096150606e-06, - "loss": 1.01, - "step": 2344 - }, - { - "epoch": 0.1762362843829851, - "grad_norm": 2.081022837845789, - "learning_rate": 3.7799973802878985e-06, - "loss": 0.9821, - "step": 2345 - }, - { - "epoch": 0.17631143844882008, - "grad_norm": 1.573211650682262, - "learning_rate": 3.779775345489076e-06, - "loss": 0.9665, - "step": 2346 - }, - { - "epoch": 0.17638659251465505, - "grad_norm": 2.3965376993354264, - "learning_rate": 3.779553205231749e-06, - "loss": 1.0024, - "step": 2347 - }, - { - "epoch": 0.17646174658049, - "grad_norm": 1.8131408873752541, - "learning_rate": 3.77933095952908e-06, - "loss": 1.0297, - "step": 2348 - }, - { - "epoch": 0.17653690064632496, - "grad_norm": 2.1083036739791226, - "learning_rate": 3.779108608394238e-06, - "loss": 0.9654, - "step": 2349 - }, - { - "epoch": 0.17661205471215993, - "grad_norm": 3.4617462935371033, - "learning_rate": 3.7788861518403988e-06, - "loss": 0.9436, - "step": 2350 - }, - { - "epoch": 0.1766872087779949, - "grad_norm": 1.668309625818564, - "learning_rate": 3.778663589880743e-06, - "loss": 1.084, - "step": 2351 - }, - { - "epoch": 0.17676236284382985, - "grad_norm": 1.7259813944718843, - "learning_rate": 3.7784409225284585e-06, - "loss": 0.9594, - "step": 2352 - }, - { - "epoch": 0.17683751690966482, - "grad_norm": 2.0762759135062825, - "learning_rate": 3.7782181497967393e-06, - "loss": 0.977, - "step": 2353 - }, - { - "epoch": 0.1769126709754998, - "grad_norm": 2.2577032042980116, - "learning_rate": 3.7779952716987856e-06, - "loss": 0.9665, - "step": 2354 - }, - { - "epoch": 0.17698782504133473, - "grad_norm": 1.8402288402392357, - "learning_rate": 3.7777722882478032e-06, - "loss": 0.9709, - "step": 2355 - }, - { - "epoch": 0.1770629791071697, - "grad_norm": 1.9491377146662194, - "learning_rate": 3.7775491994570057e-06, - "loss": 1.0166, - "step": 2356 - }, - { - "epoch": 0.17713813317300467, - "grad_norm": 1.5295642214732548, - "learning_rate": 3.777326005339611e-06, - "loss": 0.9648, - "step": 2357 - }, - { - "epoch": 0.1772132872388396, - "grad_norm": 0.7048850389521003, - "learning_rate": 3.7771027059088454e-06, - "loss": 0.854, - "step": 2358 - }, - { - "epoch": 0.17728844130467458, - "grad_norm": 2.686603929041143, - "learning_rate": 3.7768793011779383e-06, - "loss": 1.1185, - "step": 2359 - }, - { - "epoch": 0.17736359537050955, - "grad_norm": 1.9020632433239448, - "learning_rate": 3.7766557911601295e-06, - "loss": 1.0281, - "step": 2360 - }, - { - "epoch": 0.1774387494363445, - "grad_norm": 1.804437000674808, - "learning_rate": 3.7764321758686614e-06, - "loss": 1.0541, - "step": 2361 - }, - { - "epoch": 0.17751390350217947, - "grad_norm": 7.346779056573262, - "learning_rate": 3.7762084553167846e-06, - "loss": 1.0168, - "step": 2362 - }, - { - "epoch": 0.17758905756801444, - "grad_norm": 2.8644912057569467, - "learning_rate": 3.7759846295177552e-06, - "loss": 1.0298, - "step": 2363 - }, - { - "epoch": 0.17766421163384938, - "grad_norm": 13.988931367537123, - "learning_rate": 3.775760698484836e-06, - "loss": 1.0488, - "step": 2364 - }, - { - "epoch": 0.17773936569968435, - "grad_norm": 1.6610860218094023, - "learning_rate": 3.7755366622312954e-06, - "loss": 1.0407, - "step": 2365 - }, - { - "epoch": 0.17781451976551932, - "grad_norm": 1.584377780889294, - "learning_rate": 3.7753125207704084e-06, - "loss": 1.0501, - "step": 2366 - }, - { - "epoch": 0.1778896738313543, - "grad_norm": 1.6638419851415505, - "learning_rate": 3.7750882741154566e-06, - "loss": 1.0566, - "step": 2367 - }, - { - "epoch": 0.17796482789718923, - "grad_norm": 1.8164267261950706, - "learning_rate": 3.774863922279727e-06, - "loss": 1.0906, - "step": 2368 - }, - { - "epoch": 0.1780399819630242, - "grad_norm": 1.9696309979490232, - "learning_rate": 3.7746394652765136e-06, - "loss": 0.9003, - "step": 2369 - }, - { - "epoch": 0.17811513602885917, - "grad_norm": 0.8473561542383163, - "learning_rate": 3.774414903119117e-06, - "loss": 0.8891, - "step": 2370 - }, - { - "epoch": 0.17819029009469411, - "grad_norm": 2.4009747830313506, - "learning_rate": 3.7741902358208427e-06, - "loss": 1.0044, - "step": 2371 - }, - { - "epoch": 0.17826544416052909, - "grad_norm": 1.884795098741373, - "learning_rate": 3.7739654633950023e-06, - "loss": 1.0968, - "step": 2372 - }, - { - "epoch": 0.17834059822636406, - "grad_norm": 1.7125444225480515, - "learning_rate": 3.7737405858549156e-06, - "loss": 1.018, - "step": 2373 - }, - { - "epoch": 0.178415752292199, - "grad_norm": 1.63032897730305, - "learning_rate": 3.7735156032139066e-06, - "loss": 1.0749, - "step": 2374 - }, - { - "epoch": 0.17849090635803397, - "grad_norm": 2.5461171900889603, - "learning_rate": 3.773290515485308e-06, - "loss": 0.9016, - "step": 2375 - }, - { - "epoch": 0.17856606042386894, - "grad_norm": 3.0709032262114446, - "learning_rate": 3.773065322682455e-06, - "loss": 1.0052, - "step": 2376 - }, - { - "epoch": 0.17864121448970388, - "grad_norm": 1.5833680333133093, - "learning_rate": 3.772840024818692e-06, - "loss": 0.921, - "step": 2377 - }, - { - "epoch": 0.17871636855553885, - "grad_norm": 2.8030336226897457, - "learning_rate": 3.7726146219073697e-06, - "loss": 1.08, - "step": 2378 - }, - { - "epoch": 0.17879152262137382, - "grad_norm": 1.628098834334566, - "learning_rate": 3.772389113961843e-06, - "loss": 1.034, - "step": 2379 - }, - { - "epoch": 0.17886667668720876, - "grad_norm": 2.618044949034217, - "learning_rate": 3.772163500995474e-06, - "loss": 1.0309, - "step": 2380 - }, - { - "epoch": 0.17894183075304373, - "grad_norm": 1.6735224227656802, - "learning_rate": 3.771937783021632e-06, - "loss": 1.0399, - "step": 2381 - }, - { - "epoch": 0.1790169848188787, - "grad_norm": 2.181570244920046, - "learning_rate": 3.771711960053691e-06, - "loss": 1.0232, - "step": 2382 - }, - { - "epoch": 0.17909213888471368, - "grad_norm": 2.954511367494513, - "learning_rate": 3.7714860321050316e-06, - "loss": 0.903, - "step": 2383 - }, - { - "epoch": 0.17916729295054862, - "grad_norm": 1.9324167079174022, - "learning_rate": 3.771259999189042e-06, - "loss": 1.0053, - "step": 2384 - }, - { - "epoch": 0.1792424470163836, - "grad_norm": 1.5785422198522145, - "learning_rate": 3.7710338613191145e-06, - "loss": 1.0112, - "step": 2385 - }, - { - "epoch": 0.17931760108221856, - "grad_norm": 1.9361017780939487, - "learning_rate": 3.770807618508649e-06, - "loss": 1.0056, - "step": 2386 - }, - { - "epoch": 0.1793927551480535, - "grad_norm": 1.90128969290266, - "learning_rate": 3.770581270771051e-06, - "loss": 1.0526, - "step": 2387 - }, - { - "epoch": 0.17946790921388847, - "grad_norm": 1.8391384060475495, - "learning_rate": 3.770354818119733e-06, - "loss": 1.0896, - "step": 2388 - }, - { - "epoch": 0.17954306327972344, - "grad_norm": 0.777793217592789, - "learning_rate": 3.7701282605681123e-06, - "loss": 0.8539, - "step": 2389 - }, - { - "epoch": 0.17961821734555838, - "grad_norm": 17.551756263953152, - "learning_rate": 3.769901598129615e-06, - "loss": 1.0826, - "step": 2390 - }, - { - "epoch": 0.17969337141139335, - "grad_norm": 2.2968984589708694, - "learning_rate": 3.7696748308176698e-06, - "loss": 1.0457, - "step": 2391 - }, - { - "epoch": 0.17976852547722832, - "grad_norm": 1.968576575012883, - "learning_rate": 3.7694479586457144e-06, - "loss": 1.1158, - "step": 2392 - }, - { - "epoch": 0.17984367954306327, - "grad_norm": 1.6067402577595271, - "learning_rate": 3.7692209816271915e-06, - "loss": 1.0287, - "step": 2393 - }, - { - "epoch": 0.17991883360889824, - "grad_norm": 2.515213547697539, - "learning_rate": 3.7689938997755512e-06, - "loss": 1.0728, - "step": 2394 - }, - { - "epoch": 0.1799939876747332, - "grad_norm": 1.7877534299872773, - "learning_rate": 3.7687667131042487e-06, - "loss": 0.9649, - "step": 2395 - }, - { - "epoch": 0.18006914174056818, - "grad_norm": 3.074382695209561, - "learning_rate": 3.7685394216267444e-06, - "loss": 1.0566, - "step": 2396 - }, - { - "epoch": 0.18014429580640312, - "grad_norm": 2.592804242254622, - "learning_rate": 3.7683120253565076e-06, - "loss": 0.9965, - "step": 2397 - }, - { - "epoch": 0.1802194498722381, - "grad_norm": 2.3444576975170137, - "learning_rate": 3.7680845243070128e-06, - "loss": 1.004, - "step": 2398 - }, - { - "epoch": 0.18029460393807306, - "grad_norm": 3.420204207586769, - "learning_rate": 3.767856918491739e-06, - "loss": 1.0075, - "step": 2399 - }, - { - "epoch": 0.180369758003908, - "grad_norm": 1.5432767536483663, - "learning_rate": 3.767629207924172e-06, - "loss": 1.1136, - "step": 2400 - }, - { - "epoch": 0.18044491206974297, - "grad_norm": 1.89894907145736, - "learning_rate": 3.767401392617807e-06, - "loss": 0.9924, - "step": 2401 - }, - { - "epoch": 0.18052006613557794, - "grad_norm": 1.5410916656569775, - "learning_rate": 3.7671734725861413e-06, - "loss": 0.9964, - "step": 2402 - }, - { - "epoch": 0.1805952202014129, - "grad_norm": 2.888462226693328, - "learning_rate": 3.7669454478426806e-06, - "loss": 1.0152, - "step": 2403 - }, - { - "epoch": 0.18067037426724786, - "grad_norm": 4.031904300553072, - "learning_rate": 3.7667173184009356e-06, - "loss": 0.9587, - "step": 2404 - }, - { - "epoch": 0.18074552833308283, - "grad_norm": 1.9401070067792412, - "learning_rate": 3.7664890842744248e-06, - "loss": 0.9591, - "step": 2405 - }, - { - "epoch": 0.18082068239891777, - "grad_norm": 3.356257353762166, - "learning_rate": 3.7662607454766712e-06, - "loss": 0.9184, - "step": 2406 - }, - { - "epoch": 0.18089583646475274, - "grad_norm": 1.4666716297383577, - "learning_rate": 3.7660323020212047e-06, - "loss": 1.1235, - "step": 2407 - }, - { - "epoch": 0.1809709905305877, - "grad_norm": 12.868604534525709, - "learning_rate": 3.765803753921562e-06, - "loss": 1.0381, - "step": 2408 - }, - { - "epoch": 0.18104614459642265, - "grad_norm": 2.123132895207092, - "learning_rate": 3.7655751011912852e-06, - "loss": 0.9865, - "step": 2409 - }, - { - "epoch": 0.18112129866225762, - "grad_norm": 1.6126703394143234, - "learning_rate": 3.7653463438439225e-06, - "loss": 0.9644, - "step": 2410 - }, - { - "epoch": 0.1811964527280926, - "grad_norm": 2.0542416397041596, - "learning_rate": 3.7651174818930293e-06, - "loss": 1.1213, - "step": 2411 - }, - { - "epoch": 0.18127160679392756, - "grad_norm": 6.236959356755683, - "learning_rate": 3.764888515352166e-06, - "loss": 0.9643, - "step": 2412 - }, - { - "epoch": 0.1813467608597625, - "grad_norm": 1.9708454545705485, - "learning_rate": 3.7646594442349004e-06, - "loss": 1.0468, - "step": 2413 - }, - { - "epoch": 0.18142191492559748, - "grad_norm": 0.9180165379848404, - "learning_rate": 3.764430268554805e-06, - "loss": 0.8808, - "step": 2414 - }, - { - "epoch": 0.18149706899143245, - "grad_norm": 0.8398251553628292, - "learning_rate": 3.7642009883254594e-06, - "loss": 0.8627, - "step": 2415 - }, - { - "epoch": 0.1815722230572674, - "grad_norm": 1.764540752931511, - "learning_rate": 3.7639716035604502e-06, - "loss": 1.0622, - "step": 2416 - }, - { - "epoch": 0.18164737712310236, - "grad_norm": 1.7845881453378045, - "learning_rate": 3.763742114273369e-06, - "loss": 1.016, - "step": 2417 - }, - { - "epoch": 0.18172253118893733, - "grad_norm": 1.5563637596130728, - "learning_rate": 3.763512520477813e-06, - "loss": 1.026, - "step": 2418 - }, - { - "epoch": 0.18179768525477227, - "grad_norm": 1.3869318408844593, - "learning_rate": 3.7632828221873876e-06, - "loss": 0.9905, - "step": 2419 - }, - { - "epoch": 0.18187283932060724, - "grad_norm": 1.8090205502584331, - "learning_rate": 3.763053019415703e-06, - "loss": 0.9828, - "step": 2420 - }, - { - "epoch": 0.1819479933864422, - "grad_norm": 1.7052856179233697, - "learning_rate": 3.7628231121763757e-06, - "loss": 1.0451, - "step": 2421 - }, - { - "epoch": 0.18202314745227716, - "grad_norm": 6.443256952050453, - "learning_rate": 3.7625931004830287e-06, - "loss": 1.1064, - "step": 2422 - }, - { - "epoch": 0.18209830151811213, - "grad_norm": 1.9541495115697047, - "learning_rate": 3.762362984349291e-06, - "loss": 0.9386, - "step": 2423 - }, - { - "epoch": 0.1821734555839471, - "grad_norm": 1.9204441109261468, - "learning_rate": 3.762132763788798e-06, - "loss": 1.0227, - "step": 2424 - }, - { - "epoch": 0.18224860964978204, - "grad_norm": 2.280139600595886, - "learning_rate": 3.7619024388151914e-06, - "loss": 0.9942, - "step": 2425 - }, - { - "epoch": 0.182323763715617, - "grad_norm": 1.954312383211629, - "learning_rate": 3.761672009442118e-06, - "loss": 1.0116, - "step": 2426 - }, - { - "epoch": 0.18239891778145198, - "grad_norm": 2.3163557127510415, - "learning_rate": 3.7614414756832328e-06, - "loss": 0.9974, - "step": 2427 - }, - { - "epoch": 0.18247407184728695, - "grad_norm": 2.130856354009373, - "learning_rate": 3.7612108375521942e-06, - "loss": 1.0243, - "step": 2428 - }, - { - "epoch": 0.1825492259131219, - "grad_norm": 1.5337232740043212, - "learning_rate": 3.76098009506267e-06, - "loss": 0.988, - "step": 2429 - }, - { - "epoch": 0.18262437997895686, - "grad_norm": 2.950700359937576, - "learning_rate": 3.7607492482283315e-06, - "loss": 0.9227, - "step": 2430 - }, - { - "epoch": 0.18269953404479183, - "grad_norm": 1.8421534074859336, - "learning_rate": 3.7605182970628583e-06, - "loss": 0.9445, - "step": 2431 - }, - { - "epoch": 0.18277468811062678, - "grad_norm": 2.1155751976112898, - "learning_rate": 3.7602872415799347e-06, - "loss": 0.9984, - "step": 2432 - }, - { - "epoch": 0.18284984217646175, - "grad_norm": 7.0052636656058995, - "learning_rate": 3.7600560817932506e-06, - "loss": 1.0213, - "step": 2433 - }, - { - "epoch": 0.18292499624229672, - "grad_norm": 1.8224817538296487, - "learning_rate": 3.759824817716504e-06, - "loss": 0.9468, - "step": 2434 - }, - { - "epoch": 0.18300015030813166, - "grad_norm": 2.5010704609776884, - "learning_rate": 3.7595934493633986e-06, - "loss": 1.0102, - "step": 2435 - }, - { - "epoch": 0.18307530437396663, - "grad_norm": 1.782992263811394, - "learning_rate": 3.7593619767476435e-06, - "loss": 1.1201, - "step": 2436 - }, - { - "epoch": 0.1831504584398016, - "grad_norm": 0.9742203664269149, - "learning_rate": 3.759130399882954e-06, - "loss": 0.8269, - "step": 2437 - }, - { - "epoch": 0.18322561250563654, - "grad_norm": 1.9084425027374765, - "learning_rate": 3.758898718783052e-06, - "loss": 0.9381, - "step": 2438 - }, - { - "epoch": 0.1833007665714715, - "grad_norm": 1.8748156774838385, - "learning_rate": 3.758666933461666e-06, - "loss": 1.0764, - "step": 2439 - }, - { - "epoch": 0.18337592063730648, - "grad_norm": 1.5257174914623848, - "learning_rate": 3.7584350439325295e-06, - "loss": 0.9479, - "step": 2440 - }, - { - "epoch": 0.18345107470314145, - "grad_norm": 1.692649402081633, - "learning_rate": 3.7582030502093833e-06, - "loss": 0.8764, - "step": 2441 - }, - { - "epoch": 0.1835262287689764, - "grad_norm": 1.6070360390420713, - "learning_rate": 3.7579709523059736e-06, - "loss": 0.9457, - "step": 2442 - }, - { - "epoch": 0.18360138283481137, - "grad_norm": 1.9639335935984947, - "learning_rate": 3.7577387502360535e-06, - "loss": 0.9953, - "step": 2443 - }, - { - "epoch": 0.18367653690064634, - "grad_norm": 2.5470961024257948, - "learning_rate": 3.757506444013381e-06, - "loss": 1.0629, - "step": 2444 - }, - { - "epoch": 0.18375169096648128, - "grad_norm": 1.4653805168898608, - "learning_rate": 3.7572740336517225e-06, - "loss": 1.0039, - "step": 2445 - }, - { - "epoch": 0.18382684503231625, - "grad_norm": 2.719272607143846, - "learning_rate": 3.757041519164848e-06, - "loss": 1.1087, - "step": 2446 - }, - { - "epoch": 0.18390199909815122, - "grad_norm": 0.8069829329939538, - "learning_rate": 3.7568089005665353e-06, - "loss": 0.8451, - "step": 2447 - }, - { - "epoch": 0.18397715316398616, - "grad_norm": 2.112713016401625, - "learning_rate": 3.7565761778705682e-06, - "loss": 1.0885, - "step": 2448 - }, - { - "epoch": 0.18405230722982113, - "grad_norm": 1.519023349479539, - "learning_rate": 3.756343351090736e-06, - "loss": 1.0041, - "step": 2449 - }, - { - "epoch": 0.1841274612956561, - "grad_norm": 2.7761682955052027, - "learning_rate": 3.756110420240835e-06, - "loss": 1.0022, - "step": 2450 - }, - { - "epoch": 0.18420261536149105, - "grad_norm": 1.6546525268893197, - "learning_rate": 3.755877385334667e-06, - "loss": 1.0455, - "step": 2451 - }, - { - "epoch": 0.18427776942732602, - "grad_norm": 1.4381809280442295, - "learning_rate": 3.7556442463860406e-06, - "loss": 1.0352, - "step": 2452 - }, - { - "epoch": 0.184352923493161, - "grad_norm": 1.9150930938801745, - "learning_rate": 3.7554110034087686e-06, - "loss": 1.0142, - "step": 2453 - }, - { - "epoch": 0.18442807755899593, - "grad_norm": 2.6568438595400803, - "learning_rate": 3.7551776564166736e-06, - "loss": 0.997, - "step": 2454 - }, - { - "epoch": 0.1845032316248309, - "grad_norm": 2.678571374075753, - "learning_rate": 3.7549442054235813e-06, - "loss": 1.0196, - "step": 2455 - }, - { - "epoch": 0.18457838569066587, - "grad_norm": 2.588234886447344, - "learning_rate": 3.754710650443325e-06, - "loss": 0.9053, - "step": 2456 - }, - { - "epoch": 0.18465353975650084, - "grad_norm": 2.471842313953062, - "learning_rate": 3.754476991489743e-06, - "loss": 1.0456, - "step": 2457 - }, - { - "epoch": 0.18472869382233578, - "grad_norm": 1.9502113276680293, - "learning_rate": 3.754243228576681e-06, - "loss": 1.0654, - "step": 2458 - }, - { - "epoch": 0.18480384788817075, - "grad_norm": 1.3545907994728217, - "learning_rate": 3.7540093617179904e-06, - "loss": 1.0767, - "step": 2459 - }, - { - "epoch": 0.18487900195400572, - "grad_norm": 1.716310469082368, - "learning_rate": 3.7537753909275284e-06, - "loss": 1.0515, - "step": 2460 - }, - { - "epoch": 0.18495415601984067, - "grad_norm": 1.7077882253696393, - "learning_rate": 3.7535413162191584e-06, - "loss": 1.0606, - "step": 2461 - }, - { - "epoch": 0.18502931008567564, - "grad_norm": 2.0583344211137913, - "learning_rate": 3.7533071376067514e-06, - "loss": 0.9844, - "step": 2462 - }, - { - "epoch": 0.1851044641515106, - "grad_norm": 1.7670883596118387, - "learning_rate": 3.7530728551041825e-06, - "loss": 1.0099, - "step": 2463 - }, - { - "epoch": 0.18517961821734555, - "grad_norm": 5.95577695555224, - "learning_rate": 3.7528384687253335e-06, - "loss": 1.0171, - "step": 2464 - }, - { - "epoch": 0.18525477228318052, - "grad_norm": 1.9198764861631574, - "learning_rate": 3.752603978484094e-06, - "loss": 0.9781, - "step": 2465 - }, - { - "epoch": 0.1853299263490155, - "grad_norm": 2.9385091793117497, - "learning_rate": 3.752369384394357e-06, - "loss": 1.0445, - "step": 2466 - }, - { - "epoch": 0.18540508041485043, - "grad_norm": 2.096998285572113, - "learning_rate": 3.7521346864700235e-06, - "loss": 1.0463, - "step": 2467 - }, - { - "epoch": 0.1854802344806854, - "grad_norm": 2.0942192121239254, - "learning_rate": 3.751899884725001e-06, - "loss": 0.989, - "step": 2468 - }, - { - "epoch": 0.18555538854652037, - "grad_norm": 0.821986135068848, - "learning_rate": 3.751664979173202e-06, - "loss": 0.8849, - "step": 2469 - }, - { - "epoch": 0.18563054261235531, - "grad_norm": 1.4997815926809572, - "learning_rate": 3.7514299698285447e-06, - "loss": 0.9681, - "step": 2470 - }, - { - "epoch": 0.18570569667819029, - "grad_norm": 0.7206722345942838, - "learning_rate": 3.751194856704955e-06, - "loss": 0.8257, - "step": 2471 - }, - { - "epoch": 0.18578085074402526, - "grad_norm": 1.7568592793057487, - "learning_rate": 3.750959639816365e-06, - "loss": 1.0395, - "step": 2472 - }, - { - "epoch": 0.18585600480986023, - "grad_norm": 3.8366167822764643, - "learning_rate": 3.750724319176711e-06, - "loss": 0.8816, - "step": 2473 - }, - { - "epoch": 0.18593115887569517, - "grad_norm": 0.7997769268281094, - "learning_rate": 3.7504888947999367e-06, - "loss": 0.9107, - "step": 2474 - }, - { - "epoch": 0.18600631294153014, - "grad_norm": 1.6280989807744277, - "learning_rate": 3.7502533666999935e-06, - "loss": 1.0053, - "step": 2475 - }, - { - "epoch": 0.1860814670073651, - "grad_norm": 1.7189497398143474, - "learning_rate": 3.7500177348908354e-06, - "loss": 0.9629, - "step": 2476 - }, - { - "epoch": 0.18615662107320005, - "grad_norm": 1.5745553918024757, - "learning_rate": 3.749781999386425e-06, - "loss": 1.0143, - "step": 2477 - }, - { - "epoch": 0.18623177513903502, - "grad_norm": 2.8884150124557393, - "learning_rate": 3.749546160200731e-06, - "loss": 0.9442, - "step": 2478 - }, - { - "epoch": 0.18630692920487, - "grad_norm": 1.6869135650036895, - "learning_rate": 3.7493102173477277e-06, - "loss": 1.1328, - "step": 2479 - }, - { - "epoch": 0.18638208327070493, - "grad_norm": 2.4897786998839377, - "learning_rate": 3.7490741708413954e-06, - "loss": 1.1279, - "step": 2480 - }, - { - "epoch": 0.1864572373365399, - "grad_norm": 2.0578519291673856, - "learning_rate": 3.748838020695721e-06, - "loss": 0.9912, - "step": 2481 - }, - { - "epoch": 0.18653239140237488, - "grad_norm": 2.332070406228799, - "learning_rate": 3.748601766924697e-06, - "loss": 1.0162, - "step": 2482 - }, - { - "epoch": 0.18660754546820982, - "grad_norm": 1.0157734689031623, - "learning_rate": 3.7483654095423223e-06, - "loss": 0.8194, - "step": 2483 - }, - { - "epoch": 0.1866826995340448, - "grad_norm": 2.1911006177174226, - "learning_rate": 3.7481289485626024e-06, - "loss": 1.0079, - "step": 2484 - }, - { - "epoch": 0.18675785359987976, - "grad_norm": 1.9417152082190379, - "learning_rate": 3.7478923839995477e-06, - "loss": 1.0007, - "step": 2485 - }, - { - "epoch": 0.18683300766571473, - "grad_norm": 1.6655653493767315, - "learning_rate": 3.7476557158671768e-06, - "loss": 0.9902, - "step": 2486 - }, - { - "epoch": 0.18690816173154967, - "grad_norm": 2.4699377751717178, - "learning_rate": 3.747418944179512e-06, - "loss": 1.0411, - "step": 2487 - }, - { - "epoch": 0.18698331579738464, - "grad_norm": 6.617194417794495, - "learning_rate": 3.747182068950584e-06, - "loss": 1.008, - "step": 2488 - }, - { - "epoch": 0.1870584698632196, - "grad_norm": 1.6372689935942883, - "learning_rate": 3.746945090194428e-06, - "loss": 1.0889, - "step": 2489 - }, - { - "epoch": 0.18713362392905455, - "grad_norm": 1.582226927158835, - "learning_rate": 3.7467080079250853e-06, - "loss": 1.0992, - "step": 2490 - }, - { - "epoch": 0.18720877799488952, - "grad_norm": 0.9652433919080207, - "learning_rate": 3.7464708221566052e-06, - "loss": 0.8863, - "step": 2491 - }, - { - "epoch": 0.1872839320607245, - "grad_norm": 1.8372434475914654, - "learning_rate": 3.7462335329030408e-06, - "loss": 1.1562, - "step": 2492 - }, - { - "epoch": 0.18735908612655944, - "grad_norm": 2.147471706750225, - "learning_rate": 3.7459961401784527e-06, - "loss": 1.0568, - "step": 2493 - }, - { - "epoch": 0.1874342401923944, - "grad_norm": 1.837574590090293, - "learning_rate": 3.7457586439969076e-06, - "loss": 1.0478, - "step": 2494 - }, - { - "epoch": 0.18750939425822938, - "grad_norm": 2.215158914415235, - "learning_rate": 3.745521044372478e-06, - "loss": 1.1185, - "step": 2495 - }, - { - "epoch": 0.18758454832406432, - "grad_norm": 2.689774637389733, - "learning_rate": 3.745283341319242e-06, - "loss": 1.0758, - "step": 2496 - }, - { - "epoch": 0.1876597023898993, - "grad_norm": 2.0475628112176065, - "learning_rate": 3.7450455348512854e-06, - "loss": 0.8519, - "step": 2497 - }, - { - "epoch": 0.18773485645573426, - "grad_norm": 2.325878108746373, - "learning_rate": 3.7448076249826987e-06, - "loss": 0.9534, - "step": 2498 - }, - { - "epoch": 0.1878100105215692, - "grad_norm": 1.9325084376181787, - "learning_rate": 3.7445696117275785e-06, - "loss": 0.979, - "step": 2499 - }, - { - "epoch": 0.18788516458740417, - "grad_norm": 2.2288754947437814, - "learning_rate": 3.7443314951000285e-06, - "loss": 0.9404, - "step": 2500 - }, - { - "epoch": 0.18796031865323914, - "grad_norm": 2.8581640462170625, - "learning_rate": 3.744093275114158e-06, - "loss": 1.012, - "step": 2501 - }, - { - "epoch": 0.18803547271907411, - "grad_norm": 0.7935851759502801, - "learning_rate": 3.7438549517840823e-06, - "loss": 0.8821, - "step": 2502 - }, - { - "epoch": 0.18811062678490906, - "grad_norm": 0.9237133704886757, - "learning_rate": 3.743616525123923e-06, - "loss": 0.8996, - "step": 2503 - }, - { - "epoch": 0.18818578085074403, - "grad_norm": 1.4269548158716543, - "learning_rate": 3.743377995147808e-06, - "loss": 0.9783, - "step": 2504 - }, - { - "epoch": 0.188260934916579, - "grad_norm": 2.0907352435549025, - "learning_rate": 3.743139361869871e-06, - "loss": 0.9811, - "step": 2505 - }, - { - "epoch": 0.18833608898241394, - "grad_norm": 2.11567414309788, - "learning_rate": 3.7429006253042524e-06, - "loss": 1.0475, - "step": 2506 - }, - { - "epoch": 0.1884112430482489, - "grad_norm": 2.4944643723112634, - "learning_rate": 3.742661785465097e-06, - "loss": 0.9325, - "step": 2507 - }, - { - "epoch": 0.18848639711408388, - "grad_norm": 2.0136523389192367, - "learning_rate": 3.7424228423665578e-06, - "loss": 1.0269, - "step": 2508 - }, - { - "epoch": 0.18856155117991882, - "grad_norm": 1.8441820446648254, - "learning_rate": 3.7421837960227933e-06, - "loss": 0.9846, - "step": 2509 - }, - { - "epoch": 0.1886367052457538, - "grad_norm": 1.6279881952546675, - "learning_rate": 3.741944646447967e-06, - "loss": 0.9686, - "step": 2510 - }, - { - "epoch": 0.18871185931158876, - "grad_norm": 1.5572850749238845, - "learning_rate": 3.7417053936562503e-06, - "loss": 0.9917, - "step": 2511 - }, - { - "epoch": 0.1887870133774237, - "grad_norm": 1.5104301539728733, - "learning_rate": 3.7414660376618195e-06, - "loss": 0.9138, - "step": 2512 - }, - { - "epoch": 0.18886216744325868, - "grad_norm": 1.55924312668448, - "learning_rate": 3.7412265784788577e-06, - "loss": 1.1101, - "step": 2513 - }, - { - "epoch": 0.18893732150909365, - "grad_norm": 4.002091814489213, - "learning_rate": 3.7409870161215532e-06, - "loss": 0.9373, - "step": 2514 - }, - { - "epoch": 0.1890124755749286, - "grad_norm": 1.5032940299626152, - "learning_rate": 3.740747350604102e-06, - "loss": 1.0729, - "step": 2515 - }, - { - "epoch": 0.18908762964076356, - "grad_norm": 1.7522735812951225, - "learning_rate": 3.7405075819407045e-06, - "loss": 1.0046, - "step": 2516 - }, - { - "epoch": 0.18916278370659853, - "grad_norm": 0.7585528188833983, - "learning_rate": 3.7402677101455672e-06, - "loss": 0.8478, - "step": 2517 - }, - { - "epoch": 0.1892379377724335, - "grad_norm": 9.905195980376561, - "learning_rate": 3.740027735232904e-06, - "loss": 1.0713, - "step": 2518 - }, - { - "epoch": 0.18931309183826844, - "grad_norm": 2.6573429327525186, - "learning_rate": 3.7397876572169355e-06, - "loss": 1.1001, - "step": 2519 - }, - { - "epoch": 0.1893882459041034, - "grad_norm": 2.1570371213927695, - "learning_rate": 3.7395474761118856e-06, - "loss": 0.9147, - "step": 2520 - }, - { - "epoch": 0.18946339996993838, - "grad_norm": 1.6908490185913303, - "learning_rate": 3.7393071919319864e-06, - "loss": 1.0854, - "step": 2521 - }, - { - "epoch": 0.18953855403577333, - "grad_norm": 1.502813958167124, - "learning_rate": 3.739066804691476e-06, - "loss": 1.074, - "step": 2522 - }, - { - "epoch": 0.1896137081016083, - "grad_norm": 2.814242353098371, - "learning_rate": 3.738826314404598e-06, - "loss": 1.0156, - "step": 2523 - }, - { - "epoch": 0.18968886216744327, - "grad_norm": 1.5375943224747521, - "learning_rate": 3.738585721085603e-06, - "loss": 1.1416, - "step": 2524 - }, - { - "epoch": 0.1897640162332782, - "grad_norm": 4.214325796126061, - "learning_rate": 3.738345024748746e-06, - "loss": 0.9887, - "step": 2525 - }, - { - "epoch": 0.18983917029911318, - "grad_norm": 2.042630286106072, - "learning_rate": 3.73810422540829e-06, - "loss": 1.0418, - "step": 2526 - }, - { - "epoch": 0.18991432436494815, - "grad_norm": 2.3112183189387046, - "learning_rate": 3.7378633230785025e-06, - "loss": 1.077, - "step": 2527 - }, - { - "epoch": 0.1899894784307831, - "grad_norm": 1.503613331990403, - "learning_rate": 3.7376223177736587e-06, - "loss": 1.0198, - "step": 2528 - }, - { - "epoch": 0.19006463249661806, - "grad_norm": 1.7064620057466515, - "learning_rate": 3.737381209508039e-06, - "loss": 0.8485, - "step": 2529 - }, - { - "epoch": 0.19013978656245303, - "grad_norm": 3.308271768479157, - "learning_rate": 3.7371399982959294e-06, - "loss": 0.9816, - "step": 2530 - }, - { - "epoch": 0.190214940628288, - "grad_norm": 1.787716688346951, - "learning_rate": 3.736898684151623e-06, - "loss": 1.0499, - "step": 2531 - }, - { - "epoch": 0.19029009469412295, - "grad_norm": 2.088745741414649, - "learning_rate": 3.736657267089419e-06, - "loss": 0.9859, - "step": 2532 - }, - { - "epoch": 0.19036524875995792, - "grad_norm": 1.4524642098198135, - "learning_rate": 3.7364157471236215e-06, - "loss": 0.9961, - "step": 2533 - }, - { - "epoch": 0.1904404028257929, - "grad_norm": 2.173164808003609, - "learning_rate": 3.7361741242685417e-06, - "loss": 1.0634, - "step": 2534 - }, - { - "epoch": 0.19051555689162783, - "grad_norm": 1.9562407398918773, - "learning_rate": 3.7359323985384966e-06, - "loss": 0.9469, - "step": 2535 - }, - { - "epoch": 0.1905907109574628, - "grad_norm": 1.8199798541997636, - "learning_rate": 3.7356905699478096e-06, - "loss": 1.0059, - "step": 2536 - }, - { - "epoch": 0.19066586502329777, - "grad_norm": 2.478341122760174, - "learning_rate": 3.7354486385108103e-06, - "loss": 0.9645, - "step": 2537 - }, - { - "epoch": 0.1907410190891327, - "grad_norm": 2.5874738940704964, - "learning_rate": 3.735206604241834e-06, - "loss": 1.0643, - "step": 2538 - }, - { - "epoch": 0.19081617315496768, - "grad_norm": 3.06491336320461, - "learning_rate": 3.734964467155221e-06, - "loss": 1.0088, - "step": 2539 - }, - { - "epoch": 0.19089132722080265, - "grad_norm": 2.447361291962426, - "learning_rate": 3.73472222726532e-06, - "loss": 0.9237, - "step": 2540 - }, - { - "epoch": 0.1909664812866376, - "grad_norm": 2.313006268319205, - "learning_rate": 3.7344798845864846e-06, - "loss": 0.9371, - "step": 2541 - }, - { - "epoch": 0.19104163535247257, - "grad_norm": 2.4968099268142994, - "learning_rate": 3.734237439133074e-06, - "loss": 1.0578, - "step": 2542 - }, - { - "epoch": 0.19111678941830754, - "grad_norm": 1.900744821722284, - "learning_rate": 3.7339948909194543e-06, - "loss": 1.0484, - "step": 2543 - }, - { - "epoch": 0.19119194348414248, - "grad_norm": 2.1839542534896306, - "learning_rate": 3.7337522399599973e-06, - "loss": 0.9601, - "step": 2544 - }, - { - "epoch": 0.19126709754997745, - "grad_norm": 1.6725516687916795, - "learning_rate": 3.7335094862690814e-06, - "loss": 0.9641, - "step": 2545 - }, - { - "epoch": 0.19134225161581242, - "grad_norm": 1.5276747244940698, - "learning_rate": 3.7332666298610906e-06, - "loss": 1.0492, - "step": 2546 - }, - { - "epoch": 0.1914174056816474, - "grad_norm": 0.7365513066188614, - "learning_rate": 3.733023670750414e-06, - "loss": 0.8291, - "step": 2547 - }, - { - "epoch": 0.19149255974748233, - "grad_norm": 2.0931233849096347, - "learning_rate": 3.7327806089514497e-06, - "loss": 1.0106, - "step": 2548 - }, - { - "epoch": 0.1915677138133173, - "grad_norm": 1.829201080010131, - "learning_rate": 3.7325374444785983e-06, - "loss": 0.9974, - "step": 2549 - }, - { - "epoch": 0.19164286787915227, - "grad_norm": 1.633771769722253, - "learning_rate": 3.7322941773462694e-06, - "loss": 1.0999, - "step": 2550 - }, - { - "epoch": 0.19171802194498722, - "grad_norm": 1.9492457202582096, - "learning_rate": 3.732050807568877e-06, - "loss": 1.0665, - "step": 2551 - }, - { - "epoch": 0.1917931760108222, - "grad_norm": 1.5900443885413194, - "learning_rate": 3.731807335160842e-06, - "loss": 1.0312, - "step": 2552 - }, - { - "epoch": 0.19186833007665716, - "grad_norm": 1.990138058114674, - "learning_rate": 3.7315637601365902e-06, - "loss": 1.0404, - "step": 2553 - }, - { - "epoch": 0.1919434841424921, - "grad_norm": 1.3328570270144018, - "learning_rate": 3.731320082510556e-06, - "loss": 1.0157, - "step": 2554 - }, - { - "epoch": 0.19201863820832707, - "grad_norm": 3.330617195416607, - "learning_rate": 3.7310763022971764e-06, - "loss": 1.0347, - "step": 2555 - }, - { - "epoch": 0.19209379227416204, - "grad_norm": 2.6467003991053675, - "learning_rate": 3.730832419510897e-06, - "loss": 1.115, - "step": 2556 - }, - { - "epoch": 0.19216894633999698, - "grad_norm": 0.810216997420216, - "learning_rate": 3.73058843416617e-06, - "loss": 0.9182, - "step": 2557 - }, - { - "epoch": 0.19224410040583195, - "grad_norm": 2.101723363455948, - "learning_rate": 3.7303443462774505e-06, - "loss": 1.0561, - "step": 2558 - }, - { - "epoch": 0.19231925447166692, - "grad_norm": 1.4155536187303575, - "learning_rate": 3.730100155859203e-06, - "loss": 0.9144, - "step": 2559 - }, - { - "epoch": 0.19239440853750187, - "grad_norm": 2.8099733659561283, - "learning_rate": 3.7298558629258966e-06, - "loss": 1.0258, - "step": 2560 - }, - { - "epoch": 0.19246956260333684, - "grad_norm": 0.6911440853354555, - "learning_rate": 3.729611467492005e-06, - "loss": 0.7957, - "step": 2561 - }, - { - "epoch": 0.1925447166691718, - "grad_norm": 2.410482078550509, - "learning_rate": 3.7293669695720117e-06, - "loss": 0.9934, - "step": 2562 - }, - { - "epoch": 0.19261987073500678, - "grad_norm": 1.8984189130713052, - "learning_rate": 3.7291223691804038e-06, - "loss": 1.0201, - "step": 2563 - }, - { - "epoch": 0.19269502480084172, - "grad_norm": 1.4554068584459297, - "learning_rate": 3.728877666331673e-06, - "loss": 1.0291, - "step": 2564 - }, - { - "epoch": 0.1927701788666767, - "grad_norm": 1.1520435006750005, - "learning_rate": 3.7286328610403207e-06, - "loss": 1.0479, - "step": 2565 - }, - { - "epoch": 0.19284533293251166, - "grad_norm": 2.7287827335758643, - "learning_rate": 3.7283879533208523e-06, - "loss": 0.8676, - "step": 2566 - }, - { - "epoch": 0.1929204869983466, - "grad_norm": 3.8992549673671135, - "learning_rate": 3.7281429431877795e-06, - "loss": 1.0666, - "step": 2567 - }, - { - "epoch": 0.19299564106418157, - "grad_norm": 2.050272444322282, - "learning_rate": 3.727897830655619e-06, - "loss": 0.9934, - "step": 2568 - }, - { - "epoch": 0.19307079513001654, - "grad_norm": 1.743260348356171, - "learning_rate": 3.727652615738896e-06, - "loss": 0.9561, - "step": 2569 - }, - { - "epoch": 0.19314594919585149, - "grad_norm": 1.630656447891402, - "learning_rate": 3.7274072984521395e-06, - "loss": 1.0357, - "step": 2570 - }, - { - "epoch": 0.19322110326168646, - "grad_norm": 1.5789594804566938, - "learning_rate": 3.7271618788098864e-06, - "loss": 0.9939, - "step": 2571 - }, - { - "epoch": 0.19329625732752143, - "grad_norm": 1.7230319696826824, - "learning_rate": 3.7269163568266774e-06, - "loss": 1.0531, - "step": 2572 - }, - { - "epoch": 0.19337141139335637, - "grad_norm": 2.5061792301831227, - "learning_rate": 3.7266707325170623e-06, - "loss": 1.1085, - "step": 2573 - }, - { - "epoch": 0.19344656545919134, - "grad_norm": 1.6870394890758829, - "learning_rate": 3.7264250058955938e-06, - "loss": 0.9645, - "step": 2574 - }, - { - "epoch": 0.1935217195250263, - "grad_norm": 2.330355094570519, - "learning_rate": 3.726179176976833e-06, - "loss": 1.0518, - "step": 2575 - }, - { - "epoch": 0.19359687359086128, - "grad_norm": 1.9347174567810264, - "learning_rate": 3.7259332457753464e-06, - "loss": 1.0035, - "step": 2576 - }, - { - "epoch": 0.19367202765669622, - "grad_norm": 0.6775175071931744, - "learning_rate": 3.725687212305706e-06, - "loss": 0.8329, - "step": 2577 - }, - { - "epoch": 0.1937471817225312, - "grad_norm": 1.98579800263657, - "learning_rate": 3.7254410765824896e-06, - "loss": 1.0665, - "step": 2578 - }, - { - "epoch": 0.19382233578836616, - "grad_norm": 1.9778968016507703, - "learning_rate": 3.7251948386202827e-06, - "loss": 1.0414, - "step": 2579 - }, - { - "epoch": 0.1938974898542011, - "grad_norm": 4.09497890518214, - "learning_rate": 3.724948498433675e-06, - "loss": 0.9803, - "step": 2580 - }, - { - "epoch": 0.19397264392003608, - "grad_norm": 1.802942728915533, - "learning_rate": 3.7247020560372635e-06, - "loss": 1.0642, - "step": 2581 - }, - { - "epoch": 0.19404779798587105, - "grad_norm": 2.004308589528595, - "learning_rate": 3.724455511445651e-06, - "loss": 0.9182, - "step": 2582 - }, - { - "epoch": 0.194122952051706, - "grad_norm": 1.8101414429211706, - "learning_rate": 3.724208864673446e-06, - "loss": 0.9411, - "step": 2583 - }, - { - "epoch": 0.19419810611754096, - "grad_norm": 2.031073107719805, - "learning_rate": 3.7239621157352633e-06, - "loss": 0.99, - "step": 2584 - }, - { - "epoch": 0.19427326018337593, - "grad_norm": 1.950279360102864, - "learning_rate": 3.723715264645724e-06, - "loss": 1.0195, - "step": 2585 - }, - { - "epoch": 0.19434841424921087, - "grad_norm": 2.8145360570438864, - "learning_rate": 3.723468311419455e-06, - "loss": 1.0331, - "step": 2586 - }, - { - "epoch": 0.19442356831504584, - "grad_norm": 1.5512141240941117, - "learning_rate": 3.7232212560710883e-06, - "loss": 1.0101, - "step": 2587 - }, - { - "epoch": 0.1944987223808808, - "grad_norm": 2.72551368281377, - "learning_rate": 3.7229740986152636e-06, - "loss": 0.9447, - "step": 2588 - }, - { - "epoch": 0.19457387644671575, - "grad_norm": 3.0148228976993305, - "learning_rate": 3.722726839066626e-06, - "loss": 0.9141, - "step": 2589 - }, - { - "epoch": 0.19464903051255072, - "grad_norm": 1.4878535419548993, - "learning_rate": 3.722479477439826e-06, - "loss": 1.0849, - "step": 2590 - }, - { - "epoch": 0.1947241845783857, - "grad_norm": 1.743748055733858, - "learning_rate": 3.722232013749522e-06, - "loss": 0.9948, - "step": 2591 - }, - { - "epoch": 0.19479933864422067, - "grad_norm": 1.6567419509105026, - "learning_rate": 3.721984448010376e-06, - "loss": 1.0388, - "step": 2592 - }, - { - "epoch": 0.1948744927100556, - "grad_norm": 1.7103557555689723, - "learning_rate": 3.7217367802370573e-06, - "loss": 1.012, - "step": 2593 - }, - { - "epoch": 0.19494964677589058, - "grad_norm": 1.9700552231653727, - "learning_rate": 3.7214890104442413e-06, - "loss": 1.0048, - "step": 2594 - }, - { - "epoch": 0.19502480084172555, - "grad_norm": 2.433307563523352, - "learning_rate": 3.7212411386466097e-06, - "loss": 1.0844, - "step": 2595 - }, - { - "epoch": 0.1950999549075605, - "grad_norm": 2.8076874991195875, - "learning_rate": 3.72099316485885e-06, - "loss": 0.9608, - "step": 2596 - }, - { - "epoch": 0.19517510897339546, - "grad_norm": 5.59116273855832, - "learning_rate": 3.7207450890956544e-06, - "loss": 1.0493, - "step": 2597 - }, - { - "epoch": 0.19525026303923043, - "grad_norm": 2.1405571697097123, - "learning_rate": 3.720496911371723e-06, - "loss": 0.9562, - "step": 2598 - }, - { - "epoch": 0.19532541710506537, - "grad_norm": 2.1300202700944384, - "learning_rate": 3.720248631701762e-06, - "loss": 0.895, - "step": 2599 - }, - { - "epoch": 0.19540057117090034, - "grad_norm": 2.9618929727962495, - "learning_rate": 3.720000250100482e-06, - "loss": 0.9914, - "step": 2600 - }, - { - "epoch": 0.19547572523673531, - "grad_norm": 2.8687162974918947, - "learning_rate": 3.719751766582601e-06, - "loss": 0.93, - "step": 2601 - }, - { - "epoch": 0.19555087930257026, - "grad_norm": 1.6214621251729444, - "learning_rate": 3.7195031811628422e-06, - "loss": 0.9815, - "step": 2602 - }, - { - "epoch": 0.19562603336840523, - "grad_norm": 2.2357074525903684, - "learning_rate": 3.719254493855936e-06, - "loss": 1.0777, - "step": 2603 - }, - { - "epoch": 0.1957011874342402, - "grad_norm": 0.8896786530741191, - "learning_rate": 3.719005704676617e-06, - "loss": 0.9444, - "step": 2604 - }, - { - "epoch": 0.19577634150007514, - "grad_norm": 1.6130387167780416, - "learning_rate": 3.7187568136396274e-06, - "loss": 1.0548, - "step": 2605 - }, - { - "epoch": 0.1958514955659101, - "grad_norm": 1.7586930845550115, - "learning_rate": 3.7185078207597158e-06, - "loss": 0.9495, - "step": 2606 - }, - { - "epoch": 0.19592664963174508, - "grad_norm": 6.167138708794835, - "learning_rate": 3.7182587260516343e-06, - "loss": 0.9759, - "step": 2607 - }, - { - "epoch": 0.19600180369758005, - "grad_norm": 1.5465850558344407, - "learning_rate": 3.7180095295301443e-06, - "loss": 0.9941, - "step": 2608 - }, - { - "epoch": 0.196076957763415, - "grad_norm": 1.7057705551445352, - "learning_rate": 3.717760231210011e-06, - "loss": 1.0893, - "step": 2609 - }, - { - "epoch": 0.19615211182924996, - "grad_norm": 1.5531986334249226, - "learning_rate": 3.7175108311060057e-06, - "loss": 0.93, - "step": 2610 - }, - { - "epoch": 0.19622726589508493, - "grad_norm": 2.3074799770094034, - "learning_rate": 3.717261329232907e-06, - "loss": 1.0319, - "step": 2611 - }, - { - "epoch": 0.19630241996091988, - "grad_norm": 0.7230375778779344, - "learning_rate": 3.717011725605499e-06, - "loss": 0.7597, - "step": 2612 - }, - { - "epoch": 0.19637757402675485, - "grad_norm": 1.8488889296136346, - "learning_rate": 3.7167620202385715e-06, - "loss": 0.9993, - "step": 2613 - }, - { - "epoch": 0.19645272809258982, - "grad_norm": 1.799364372452585, - "learning_rate": 3.71651221314692e-06, - "loss": 0.905, - "step": 2614 - }, - { - "epoch": 0.19652788215842476, - "grad_norm": 2.1376563879436423, - "learning_rate": 3.7162623043453476e-06, - "loss": 1.1163, - "step": 2615 - }, - { - "epoch": 0.19660303622425973, - "grad_norm": 1.6318502556785068, - "learning_rate": 3.716012293848661e-06, - "loss": 1.0385, - "step": 2616 - }, - { - "epoch": 0.1966781902900947, - "grad_norm": 2.2541586273482475, - "learning_rate": 3.7157621816716747e-06, - "loss": 1.0799, - "step": 2617 - }, - { - "epoch": 0.19675334435592964, - "grad_norm": 1.6657044096548548, - "learning_rate": 3.71551196782921e-06, - "loss": 1.0815, - "step": 2618 - }, - { - "epoch": 0.1968284984217646, - "grad_norm": 2.852111763507111, - "learning_rate": 3.7152616523360913e-06, - "loss": 1.0747, - "step": 2619 - }, - { - "epoch": 0.19690365248759958, - "grad_norm": 1.618224691338352, - "learning_rate": 3.7150112352071514e-06, - "loss": 1.0692, - "step": 2620 - }, - { - "epoch": 0.19697880655343455, - "grad_norm": 0.7057370095092861, - "learning_rate": 3.714760716457229e-06, - "loss": 0.8605, - "step": 2621 - }, - { - "epoch": 0.1970539606192695, - "grad_norm": 2.0360501425813338, - "learning_rate": 3.7145100961011675e-06, - "loss": 0.9808, - "step": 2622 - }, - { - "epoch": 0.19712911468510447, - "grad_norm": 1.7380298218357981, - "learning_rate": 3.714259374153818e-06, - "loss": 0.9872, - "step": 2623 - }, - { - "epoch": 0.19720426875093944, - "grad_norm": 1.434911968973572, - "learning_rate": 3.714008550630036e-06, - "loss": 0.8197, - "step": 2624 - }, - { - "epoch": 0.19727942281677438, - "grad_norm": 1.5831301793052508, - "learning_rate": 3.713757625544684e-06, - "loss": 1.0158, - "step": 2625 - }, - { - "epoch": 0.19735457688260935, - "grad_norm": 1.6518204674323471, - "learning_rate": 3.7135065989126303e-06, - "loss": 0.9556, - "step": 2626 - }, - { - "epoch": 0.19742973094844432, - "grad_norm": 1.8061724741840808, - "learning_rate": 3.7132554707487493e-06, - "loss": 1.0136, - "step": 2627 - }, - { - "epoch": 0.19750488501427926, - "grad_norm": 17.2360987281123, - "learning_rate": 3.713004241067921e-06, - "loss": 0.9439, - "step": 2628 - }, - { - "epoch": 0.19758003908011423, - "grad_norm": 1.6329953762019351, - "learning_rate": 3.712752909885032e-06, - "loss": 0.985, - "step": 2629 - }, - { - "epoch": 0.1976551931459492, - "grad_norm": 1.7336431110329151, - "learning_rate": 3.7125014772149746e-06, - "loss": 1.0747, - "step": 2630 - }, - { - "epoch": 0.19773034721178415, - "grad_norm": 1.7018386462612503, - "learning_rate": 3.712249943072647e-06, - "loss": 1.0117, - "step": 2631 - }, - { - "epoch": 0.19780550127761912, - "grad_norm": 1.4949520525691087, - "learning_rate": 3.7119983074729532e-06, - "loss": 1.0523, - "step": 2632 - }, - { - "epoch": 0.1978806553434541, - "grad_norm": 1.6168319384986316, - "learning_rate": 3.7117465704308045e-06, - "loss": 1.0718, - "step": 2633 - }, - { - "epoch": 0.19795580940928903, - "grad_norm": 1.697637203441497, - "learning_rate": 3.7114947319611164e-06, - "loss": 0.8898, - "step": 2634 - }, - { - "epoch": 0.198030963475124, - "grad_norm": 1.8234874231455491, - "learning_rate": 3.711242792078812e-06, - "loss": 0.9323, - "step": 2635 - }, - { - "epoch": 0.19810611754095897, - "grad_norm": 1.5794169359334564, - "learning_rate": 3.7109907507988192e-06, - "loss": 1.0036, - "step": 2636 - }, - { - "epoch": 0.19818127160679394, - "grad_norm": 1.9651605490987576, - "learning_rate": 3.710738608136073e-06, - "loss": 0.9973, - "step": 2637 - }, - { - "epoch": 0.19825642567262888, - "grad_norm": 1.7022123778913167, - "learning_rate": 3.710486364105513e-06, - "loss": 0.9847, - "step": 2638 - }, - { - "epoch": 0.19833157973846385, - "grad_norm": 2.5353399130854295, - "learning_rate": 3.7102340187220863e-06, - "loss": 1.0143, - "step": 2639 - }, - { - "epoch": 0.19840673380429882, - "grad_norm": 1.8549979485965706, - "learning_rate": 3.7099815720007447e-06, - "loss": 0.9976, - "step": 2640 - }, - { - "epoch": 0.19848188787013377, - "grad_norm": 1.7313045093755055, - "learning_rate": 3.7097290239564478e-06, - "loss": 1.0906, - "step": 2641 - }, - { - "epoch": 0.19855704193596874, - "grad_norm": 2.222112659561492, - "learning_rate": 3.7094763746041584e-06, - "loss": 0.9877, - "step": 2642 - }, - { - "epoch": 0.1986321960018037, - "grad_norm": 2.5680990689143575, - "learning_rate": 3.709223623958848e-06, - "loss": 0.9375, - "step": 2643 - }, - { - "epoch": 0.19870735006763865, - "grad_norm": 1.781181601834063, - "learning_rate": 3.708970772035493e-06, - "loss": 0.9631, - "step": 2644 - }, - { - "epoch": 0.19878250413347362, - "grad_norm": 1.7678747781751918, - "learning_rate": 3.7087178188490754e-06, - "loss": 0.9683, - "step": 2645 - }, - { - "epoch": 0.1988576581993086, - "grad_norm": 2.2685216867622287, - "learning_rate": 3.708464764414584e-06, - "loss": 0.9028, - "step": 2646 - }, - { - "epoch": 0.19893281226514353, - "grad_norm": 2.130278930489102, - "learning_rate": 3.708211608747013e-06, - "loss": 1.039, - "step": 2647 - }, - { - "epoch": 0.1990079663309785, - "grad_norm": 2.996516748680386, - "learning_rate": 3.7079583518613636e-06, - "loss": 1.0146, - "step": 2648 - }, - { - "epoch": 0.19908312039681347, - "grad_norm": 1.6765914416770376, - "learning_rate": 3.707704993772641e-06, - "loss": 1.0353, - "step": 2649 - }, - { - "epoch": 0.19915827446264842, - "grad_norm": 2.126977673433577, - "learning_rate": 3.7074515344958584e-06, - "loss": 1.0198, - "step": 2650 - }, - { - "epoch": 0.1992334285284834, - "grad_norm": 1.6357771684692428, - "learning_rate": 3.7071979740460345e-06, - "loss": 0.9729, - "step": 2651 - }, - { - "epoch": 0.19930858259431836, - "grad_norm": 1.7595457898680715, - "learning_rate": 3.706944312438193e-06, - "loss": 1.0596, - "step": 2652 - }, - { - "epoch": 0.19938373666015333, - "grad_norm": 2.5819590409839672, - "learning_rate": 3.7066905496873646e-06, - "loss": 0.9841, - "step": 2653 - }, - { - "epoch": 0.19945889072598827, - "grad_norm": 1.6603910392337478, - "learning_rate": 3.706436685808586e-06, - "loss": 0.9811, - "step": 2654 - }, - { - "epoch": 0.19953404479182324, - "grad_norm": 2.1470263476378606, - "learning_rate": 3.7061827208168995e-06, - "loss": 1.0331, - "step": 2655 - }, - { - "epoch": 0.1996091988576582, - "grad_norm": 1.9148008829685754, - "learning_rate": 3.705928654727353e-06, - "loss": 0.9535, - "step": 2656 - }, - { - "epoch": 0.19968435292349315, - "grad_norm": 0.8332433691736653, - "learning_rate": 3.7056744875550016e-06, - "loss": 0.9147, - "step": 2657 - }, - { - "epoch": 0.19975950698932812, - "grad_norm": 1.536735285566738, - "learning_rate": 3.7054202193149047e-06, - "loss": 1.0316, - "step": 2658 - }, - { - "epoch": 0.1998346610551631, - "grad_norm": 2.0941371275152663, - "learning_rate": 3.7051658500221297e-06, - "loss": 0.9937, - "step": 2659 - }, - { - "epoch": 0.19990981512099804, - "grad_norm": 1.78967567830272, - "learning_rate": 3.704911379691749e-06, - "loss": 0.9897, - "step": 2660 - }, - { - "epoch": 0.199984969186833, - "grad_norm": 1.8156268016392605, - "learning_rate": 3.70465680833884e-06, - "loss": 1.0403, - "step": 2661 - }, - { - "epoch": 0.20006012325266798, - "grad_norm": 1.7006264654263286, - "learning_rate": 3.704402135978488e-06, - "loss": 1.0035, - "step": 2662 - }, - { - "epoch": 0.20013527731850292, - "grad_norm": 1.619368772918024, - "learning_rate": 3.7041473626257823e-06, - "loss": 0.9747, - "step": 2663 - }, - { - "epoch": 0.2002104313843379, - "grad_norm": 1.5408171899478358, - "learning_rate": 3.7038924882958204e-06, - "loss": 0.9215, - "step": 2664 - }, - { - "epoch": 0.20028558545017286, - "grad_norm": 1.8252867486460078, - "learning_rate": 3.7036375130037037e-06, - "loss": 1.0017, - "step": 2665 - }, - { - "epoch": 0.20036073951600783, - "grad_norm": 1.5967724881049652, - "learning_rate": 3.70338243676454e-06, - "loss": 1.0805, - "step": 2666 - }, - { - "epoch": 0.20043589358184277, - "grad_norm": 3.157179893764655, - "learning_rate": 3.7031272595934453e-06, - "loss": 0.986, - "step": 2667 - }, - { - "epoch": 0.20051104764767774, - "grad_norm": 2.4091426752198446, - "learning_rate": 3.702871981505538e-06, - "loss": 1.0544, - "step": 2668 - }, - { - "epoch": 0.2005862017135127, - "grad_norm": 1.5667596356558366, - "learning_rate": 3.7026166025159454e-06, - "loss": 0.9803, - "step": 2669 - }, - { - "epoch": 0.20066135577934766, - "grad_norm": 1.9172780955205075, - "learning_rate": 3.7023611226397993e-06, - "loss": 0.8935, - "step": 2670 - }, - { - "epoch": 0.20073650984518263, - "grad_norm": 1.8838204499393947, - "learning_rate": 3.702105541892238e-06, - "loss": 1.0206, - "step": 2671 - }, - { - "epoch": 0.2008116639110176, - "grad_norm": 4.611444144625306, - "learning_rate": 3.7018498602884053e-06, - "loss": 0.952, - "step": 2672 - }, - { - "epoch": 0.20088681797685254, - "grad_norm": 2.3932428923425317, - "learning_rate": 3.701594077843452e-06, - "loss": 1.0766, - "step": 2673 - }, - { - "epoch": 0.2009619720426875, - "grad_norm": 1.6760763695417507, - "learning_rate": 3.701338194572533e-06, - "loss": 1.015, - "step": 2674 - }, - { - "epoch": 0.20103712610852248, - "grad_norm": 2.080323855790416, - "learning_rate": 3.7010822104908116e-06, - "loss": 1.0505, - "step": 2675 - }, - { - "epoch": 0.20111228017435742, - "grad_norm": 4.168910655710656, - "learning_rate": 3.7008261256134556e-06, - "loss": 1.0371, - "step": 2676 - }, - { - "epoch": 0.2011874342401924, - "grad_norm": 2.668089387771732, - "learning_rate": 3.7005699399556383e-06, - "loss": 1.0842, - "step": 2677 - }, - { - "epoch": 0.20126258830602736, - "grad_norm": 1.7921369364223148, - "learning_rate": 3.7003136535325405e-06, - "loss": 1.03, - "step": 2678 - }, - { - "epoch": 0.2013377423718623, - "grad_norm": 1.7909334885660968, - "learning_rate": 3.7000572663593475e-06, - "loss": 0.9469, - "step": 2679 - }, - { - "epoch": 0.20141289643769728, - "grad_norm": 0.756539603024332, - "learning_rate": 3.6998007784512515e-06, - "loss": 0.8118, - "step": 2680 - }, - { - "epoch": 0.20148805050353225, - "grad_norm": 1.4859165418813554, - "learning_rate": 3.6995441898234507e-06, - "loss": 0.9714, - "step": 2681 - }, - { - "epoch": 0.20156320456936722, - "grad_norm": 2.1372654300944367, - "learning_rate": 3.6992875004911485e-06, - "loss": 0.9705, - "step": 2682 - }, - { - "epoch": 0.20163835863520216, - "grad_norm": 1.7846788565203109, - "learning_rate": 3.6990307104695547e-06, - "loss": 1.0194, - "step": 2683 - }, - { - "epoch": 0.20171351270103713, - "grad_norm": 1.8046205846757983, - "learning_rate": 3.6987738197738858e-06, - "loss": 1.0035, - "step": 2684 - }, - { - "epoch": 0.2017886667668721, - "grad_norm": 2.1964896633322653, - "learning_rate": 3.698516828419362e-06, - "loss": 1.051, - "step": 2685 - }, - { - "epoch": 0.20186382083270704, - "grad_norm": 1.496108251755353, - "learning_rate": 3.698259736421213e-06, - "loss": 1.0423, - "step": 2686 - }, - { - "epoch": 0.201938974898542, - "grad_norm": 2.3125812413147053, - "learning_rate": 3.698002543794671e-06, - "loss": 0.9918, - "step": 2687 - }, - { - "epoch": 0.20201412896437698, - "grad_norm": 1.277661718520341, - "learning_rate": 3.697745250554977e-06, - "loss": 0.9582, - "step": 2688 - }, - { - "epoch": 0.20208928303021192, - "grad_norm": 1.6317167849274545, - "learning_rate": 3.697487856717375e-06, - "loss": 0.9558, - "step": 2689 - }, - { - "epoch": 0.2021644370960469, - "grad_norm": 1.3931932201085815, - "learning_rate": 3.6972303622971177e-06, - "loss": 1.1123, - "step": 2690 - }, - { - "epoch": 0.20223959116188187, - "grad_norm": 1.9895960689223613, - "learning_rate": 3.6969727673094626e-06, - "loss": 1.0014, - "step": 2691 - }, - { - "epoch": 0.2023147452277168, - "grad_norm": 2.9241979819927764, - "learning_rate": 3.696715071769672e-06, - "loss": 1.0311, - "step": 2692 - }, - { - "epoch": 0.20238989929355178, - "grad_norm": 1.4814817641620013, - "learning_rate": 3.696457275693017e-06, - "loss": 0.9862, - "step": 2693 - }, - { - "epoch": 0.20246505335938675, - "grad_norm": 1.8100124916151425, - "learning_rate": 3.6961993790947722e-06, - "loss": 1.0534, - "step": 2694 - }, - { - "epoch": 0.2025402074252217, - "grad_norm": 1.534270984985391, - "learning_rate": 3.695941381990219e-06, - "loss": 0.9969, - "step": 2695 - }, - { - "epoch": 0.20261536149105666, - "grad_norm": 3.7570268281532644, - "learning_rate": 3.6956832843946445e-06, - "loss": 0.987, - "step": 2696 - }, - { - "epoch": 0.20269051555689163, - "grad_norm": 1.5163294934397242, - "learning_rate": 3.695425086323342e-06, - "loss": 1.0081, - "step": 2697 - }, - { - "epoch": 0.2027656696227266, - "grad_norm": 1.3958544183913053, - "learning_rate": 3.6951667877916113e-06, - "loss": 1.0011, - "step": 2698 - }, - { - "epoch": 0.20284082368856154, - "grad_norm": 1.5634462924292964, - "learning_rate": 3.694908388814757e-06, - "loss": 1.0028, - "step": 2699 - }, - { - "epoch": 0.20291597775439651, - "grad_norm": 1.8792099419775379, - "learning_rate": 3.6946498894080905e-06, - "loss": 0.9681, - "step": 2700 - }, - { - "epoch": 0.20299113182023149, - "grad_norm": 1.3752436422018337, - "learning_rate": 3.694391289586929e-06, - "loss": 1.0812, - "step": 2701 - }, - { - "epoch": 0.20306628588606643, - "grad_norm": 1.6707515328694365, - "learning_rate": 3.6941325893665953e-06, - "loss": 1.0238, - "step": 2702 - }, - { - "epoch": 0.2031414399519014, - "grad_norm": 2.11767381101116, - "learning_rate": 3.693873788762418e-06, - "loss": 0.926, - "step": 2703 - }, - { - "epoch": 0.20321659401773637, - "grad_norm": 2.1132823781911267, - "learning_rate": 3.6936148877897324e-06, - "loss": 1.1019, - "step": 2704 - }, - { - "epoch": 0.2032917480835713, - "grad_norm": 1.7281610329925796, - "learning_rate": 3.6933558864638805e-06, - "loss": 1.0215, - "step": 2705 - }, - { - "epoch": 0.20336690214940628, - "grad_norm": 3.4405117366209725, - "learning_rate": 3.6930967848002065e-06, - "loss": 1.079, - "step": 2706 - }, - { - "epoch": 0.20344205621524125, - "grad_norm": 2.4248529645251997, - "learning_rate": 3.6928375828140658e-06, - "loss": 0.9285, - "step": 2707 - }, - { - "epoch": 0.2035172102810762, - "grad_norm": 1.6468532958943982, - "learning_rate": 3.6925782805208156e-06, - "loss": 0.9473, - "step": 2708 - }, - { - "epoch": 0.20359236434691116, - "grad_norm": 1.6840881188277912, - "learning_rate": 3.692318877935821e-06, - "loss": 1.0202, - "step": 2709 - }, - { - "epoch": 0.20366751841274613, - "grad_norm": 1.9479796115544588, - "learning_rate": 3.692059375074453e-06, - "loss": 1.0239, - "step": 2710 - }, - { - "epoch": 0.2037426724785811, - "grad_norm": 1.7019415427096858, - "learning_rate": 3.6917997719520867e-06, - "loss": 0.9274, - "step": 2711 - }, - { - "epoch": 0.20381782654441605, - "grad_norm": 1.9827935232181748, - "learning_rate": 3.691540068584106e-06, - "loss": 0.9863, - "step": 2712 - }, - { - "epoch": 0.20389298061025102, - "grad_norm": 0.7394163095344476, - "learning_rate": 3.6912802649858995e-06, - "loss": 0.87, - "step": 2713 - }, - { - "epoch": 0.203968134676086, - "grad_norm": 2.6476737302381523, - "learning_rate": 3.6910203611728603e-06, - "loss": 1.0119, - "step": 2714 - }, - { - "epoch": 0.20404328874192093, - "grad_norm": 1.6706519956794488, - "learning_rate": 3.6907603571603895e-06, - "loss": 0.9847, - "step": 2715 - }, - { - "epoch": 0.2041184428077559, - "grad_norm": 1.709546782459028, - "learning_rate": 3.690500252963893e-06, - "loss": 1.0258, - "step": 2716 - }, - { - "epoch": 0.20419359687359087, - "grad_norm": 1.5618593046781977, - "learning_rate": 3.6902400485987835e-06, - "loss": 0.9826, - "step": 2717 - }, - { - "epoch": 0.2042687509394258, - "grad_norm": 1.6168366572224622, - "learning_rate": 3.6899797440804788e-06, - "loss": 1.1086, - "step": 2718 - }, - { - "epoch": 0.20434390500526078, - "grad_norm": 2.1280966242888435, - "learning_rate": 3.689719339424403e-06, - "loss": 0.9851, - "step": 2719 - }, - { - "epoch": 0.20441905907109575, - "grad_norm": 3.1781900614902763, - "learning_rate": 3.689458834645986e-06, - "loss": 0.9231, - "step": 2720 - }, - { - "epoch": 0.2044942131369307, - "grad_norm": 0.838968148778374, - "learning_rate": 3.689198229760663e-06, - "loss": 0.8715, - "step": 2721 - }, - { - "epoch": 0.20456936720276567, - "grad_norm": 2.0887999592895388, - "learning_rate": 3.6889375247838766e-06, - "loss": 0.9541, - "step": 2722 - }, - { - "epoch": 0.20464452126860064, - "grad_norm": 1.7010837186095533, - "learning_rate": 3.6886767197310757e-06, - "loss": 1.0585, - "step": 2723 - }, - { - "epoch": 0.20471967533443558, - "grad_norm": 1.6989203530874082, - "learning_rate": 3.688415814617711e-06, - "loss": 0.972, - "step": 2724 - }, - { - "epoch": 0.20479482940027055, - "grad_norm": 0.7087646227880591, - "learning_rate": 3.688154809459245e-06, - "loss": 0.828, - "step": 2725 - }, - { - "epoch": 0.20486998346610552, - "grad_norm": 1.4559617604537018, - "learning_rate": 3.6878937042711424e-06, - "loss": 1.0632, - "step": 2726 - }, - { - "epoch": 0.2049451375319405, - "grad_norm": 1.6616962784654254, - "learning_rate": 3.687632499068874e-06, - "loss": 0.89, - "step": 2727 - }, - { - "epoch": 0.20502029159777543, - "grad_norm": 1.7343155446902034, - "learning_rate": 3.6873711938679174e-06, - "loss": 1.0406, - "step": 2728 - }, - { - "epoch": 0.2050954456636104, - "grad_norm": 1.576983501679422, - "learning_rate": 3.6871097886837565e-06, - "loss": 0.9371, - "step": 2729 - }, - { - "epoch": 0.20517059972944537, - "grad_norm": 1.5509653438579827, - "learning_rate": 3.68684828353188e-06, - "loss": 0.9795, - "step": 2730 - }, - { - "epoch": 0.20524575379528032, - "grad_norm": 1.6489426259272497, - "learning_rate": 3.6865866784277836e-06, - "loss": 1.113, - "step": 2731 - }, - { - "epoch": 0.2053209078611153, - "grad_norm": 2.4711424425690365, - "learning_rate": 3.6863249733869683e-06, - "loss": 1.0645, - "step": 2732 - }, - { - "epoch": 0.20539606192695026, - "grad_norm": 2.2562665563594093, - "learning_rate": 3.6860631684249403e-06, - "loss": 0.9643, - "step": 2733 - }, - { - "epoch": 0.2054712159927852, - "grad_norm": 1.8531405006004862, - "learning_rate": 3.685801263557214e-06, - "loss": 0.9548, - "step": 2734 - }, - { - "epoch": 0.20554637005862017, - "grad_norm": 1.7467000365284941, - "learning_rate": 3.6855392587993065e-06, - "loss": 1.0408, - "step": 2735 - }, - { - "epoch": 0.20562152412445514, - "grad_norm": 2.323525615527133, - "learning_rate": 3.6852771541667444e-06, - "loss": 0.9682, - "step": 2736 - }, - { - "epoch": 0.20569667819029008, - "grad_norm": 1.5921858797823194, - "learning_rate": 3.6850149496750575e-06, - "loss": 1.088, - "step": 2737 - }, - { - "epoch": 0.20577183225612505, - "grad_norm": 0.7618586093932617, - "learning_rate": 3.684752645339782e-06, - "loss": 0.8781, - "step": 2738 - }, - { - "epoch": 0.20584698632196002, - "grad_norm": 1.6164234955108763, - "learning_rate": 3.6844902411764612e-06, - "loss": 0.9697, - "step": 2739 - }, - { - "epoch": 0.20592214038779497, - "grad_norm": 1.4796932071305842, - "learning_rate": 3.6842277372006434e-06, - "loss": 1.0892, - "step": 2740 - }, - { - "epoch": 0.20599729445362994, - "grad_norm": 1.9391853016984013, - "learning_rate": 3.6839651334278823e-06, - "loss": 1.0731, - "step": 2741 - }, - { - "epoch": 0.2060724485194649, - "grad_norm": 2.8769759975541906, - "learning_rate": 3.6837024298737393e-06, - "loss": 1.0243, - "step": 2742 - }, - { - "epoch": 0.20614760258529988, - "grad_norm": 2.1315653861260766, - "learning_rate": 3.68343962655378e-06, - "loss": 1.0259, - "step": 2743 - }, - { - "epoch": 0.20622275665113482, - "grad_norm": 1.4139201477224366, - "learning_rate": 3.6831767234835763e-06, - "loss": 1.062, - "step": 2744 - }, - { - "epoch": 0.2062979107169698, - "grad_norm": 1.5832644330476238, - "learning_rate": 3.6829137206787065e-06, - "loss": 1.0216, - "step": 2745 - }, - { - "epoch": 0.20637306478280476, - "grad_norm": 1.467494184232297, - "learning_rate": 3.6826506181547543e-06, - "loss": 1.0134, - "step": 2746 - }, - { - "epoch": 0.2064482188486397, - "grad_norm": 1.5256545928872942, - "learning_rate": 3.6823874159273095e-06, - "loss": 0.9392, - "step": 2747 - }, - { - "epoch": 0.20652337291447467, - "grad_norm": 1.977982866384667, - "learning_rate": 3.6821241140119685e-06, - "loss": 0.9746, - "step": 2748 - }, - { - "epoch": 0.20659852698030964, - "grad_norm": 1.8298638705917034, - "learning_rate": 3.6818607124243322e-06, - "loss": 1.0269, - "step": 2749 - }, - { - "epoch": 0.2066736810461446, - "grad_norm": 1.6992789422259171, - "learning_rate": 3.6815972111800082e-06, - "loss": 0.9905, - "step": 2750 - }, - { - "epoch": 0.20674883511197956, - "grad_norm": 2.4307858845401196, - "learning_rate": 3.6813336102946107e-06, - "loss": 0.9592, - "step": 2751 - }, - { - "epoch": 0.20682398917781453, - "grad_norm": 2.2915531708327035, - "learning_rate": 3.681069909783758e-06, - "loss": 0.9665, - "step": 2752 - }, - { - "epoch": 0.20689914324364947, - "grad_norm": 0.7283689131291753, - "learning_rate": 3.6808061096630765e-06, - "loss": 0.8501, - "step": 2753 - }, - { - "epoch": 0.20697429730948444, - "grad_norm": 0.7574257404451068, - "learning_rate": 3.6805422099481965e-06, - "loss": 0.8628, - "step": 2754 - }, - { - "epoch": 0.2070494513753194, - "grad_norm": 1.6784021471924406, - "learning_rate": 3.6802782106547553e-06, - "loss": 1.0081, - "step": 2755 - }, - { - "epoch": 0.20712460544115438, - "grad_norm": 1.9023429980266737, - "learning_rate": 3.6800141117983954e-06, - "loss": 0.9119, - "step": 2756 - }, - { - "epoch": 0.20719975950698932, - "grad_norm": 1.952231912512272, - "learning_rate": 3.679749913394767e-06, - "loss": 0.9426, - "step": 2757 - }, - { - "epoch": 0.2072749135728243, - "grad_norm": 1.5238618699801163, - "learning_rate": 3.6794856154595235e-06, - "loss": 1.0058, - "step": 2758 - }, - { - "epoch": 0.20735006763865926, - "grad_norm": 0.7476245295413574, - "learning_rate": 3.679221218008326e-06, - "loss": 0.8703, - "step": 2759 - }, - { - "epoch": 0.2074252217044942, - "grad_norm": 2.395846679208045, - "learning_rate": 3.6789567210568417e-06, - "loss": 1.0181, - "step": 2760 - }, - { - "epoch": 0.20750037577032918, - "grad_norm": 1.8122735375301802, - "learning_rate": 3.678692124620742e-06, - "loss": 1.0795, - "step": 2761 - }, - { - "epoch": 0.20757552983616415, - "grad_norm": 1.8103024545104722, - "learning_rate": 3.6784274287157066e-06, - "loss": 0.9315, - "step": 2762 - }, - { - "epoch": 0.2076506839019991, - "grad_norm": 2.1988344009101124, - "learning_rate": 3.678162633357418e-06, - "loss": 1.0327, - "step": 2763 - }, - { - "epoch": 0.20772583796783406, - "grad_norm": 2.0496046196729902, - "learning_rate": 3.6778977385615676e-06, - "loss": 1.0102, - "step": 2764 - }, - { - "epoch": 0.20780099203366903, - "grad_norm": 1.5845577458132898, - "learning_rate": 3.677632744343851e-06, - "loss": 1.018, - "step": 2765 - }, - { - "epoch": 0.20787614609950397, - "grad_norm": 1.6172542834659354, - "learning_rate": 3.6773676507199703e-06, - "loss": 0.9385, - "step": 2766 - }, - { - "epoch": 0.20795130016533894, - "grad_norm": 1.4821855962372308, - "learning_rate": 3.6771024577056333e-06, - "loss": 1.0243, - "step": 2767 - }, - { - "epoch": 0.2080264542311739, - "grad_norm": 1.8727121382208667, - "learning_rate": 3.6768371653165537e-06, - "loss": 0.9797, - "step": 2768 - }, - { - "epoch": 0.20810160829700886, - "grad_norm": 2.620203358715069, - "learning_rate": 3.676571773568451e-06, - "loss": 0.9661, - "step": 2769 - }, - { - "epoch": 0.20817676236284383, - "grad_norm": 2.591041529615247, - "learning_rate": 3.67630628247705e-06, - "loss": 1.0431, - "step": 2770 - }, - { - "epoch": 0.2082519164286788, - "grad_norm": 2.926934416072347, - "learning_rate": 3.6760406920580834e-06, - "loss": 1.1713, - "step": 2771 - }, - { - "epoch": 0.20832707049451377, - "grad_norm": 2.237365428777706, - "learning_rate": 3.6757750023272882e-06, - "loss": 1.0322, - "step": 2772 - }, - { - "epoch": 0.2084022245603487, - "grad_norm": 1.5278630069117467, - "learning_rate": 3.6755092133004062e-06, - "loss": 1.0806, - "step": 2773 - }, - { - "epoch": 0.20847737862618368, - "grad_norm": 1.7767883103592002, - "learning_rate": 3.6752433249931876e-06, - "loss": 0.9295, - "step": 2774 - }, - { - "epoch": 0.20855253269201865, - "grad_norm": 2.9783721789228492, - "learning_rate": 3.6749773374213877e-06, - "loss": 0.9498, - "step": 2775 - }, - { - "epoch": 0.2086276867578536, - "grad_norm": 1.4966338853424945, - "learning_rate": 3.674711250600766e-06, - "loss": 0.9532, - "step": 2776 - }, - { - "epoch": 0.20870284082368856, - "grad_norm": 2.5656358093703093, - "learning_rate": 3.6744450645470904e-06, - "loss": 0.9936, - "step": 2777 - }, - { - "epoch": 0.20877799488952353, - "grad_norm": 2.0072575258585945, - "learning_rate": 3.6741787792761324e-06, - "loss": 0.9617, - "step": 2778 - }, - { - "epoch": 0.20885314895535848, - "grad_norm": 2.2263233792582744, - "learning_rate": 3.673912394803671e-06, - "loss": 0.9805, - "step": 2779 - }, - { - "epoch": 0.20892830302119345, - "grad_norm": 1.4516684502737751, - "learning_rate": 3.6736459111454903e-06, - "loss": 1.0132, - "step": 2780 - }, - { - "epoch": 0.20900345708702842, - "grad_norm": 2.222560887226809, - "learning_rate": 3.6733793283173805e-06, - "loss": 1.0212, - "step": 2781 - }, - { - "epoch": 0.20907861115286336, - "grad_norm": 1.3888694740633865, - "learning_rate": 3.673112646335138e-06, - "loss": 1.0844, - "step": 2782 - }, - { - "epoch": 0.20915376521869833, - "grad_norm": 1.5032327308037152, - "learning_rate": 3.672845865214564e-06, - "loss": 1.0411, - "step": 2783 - }, - { - "epoch": 0.2092289192845333, - "grad_norm": 1.6301454363339682, - "learning_rate": 3.6725789849714665e-06, - "loss": 0.9495, - "step": 2784 - }, - { - "epoch": 0.20930407335036824, - "grad_norm": 1.6266219774084898, - "learning_rate": 3.67231200562166e-06, - "loss": 0.975, - "step": 2785 - }, - { - "epoch": 0.2093792274162032, - "grad_norm": 1.9342991014421016, - "learning_rate": 3.6720449271809633e-06, - "loss": 1.0519, - "step": 2786 - }, - { - "epoch": 0.20945438148203818, - "grad_norm": 2.268569716339316, - "learning_rate": 3.6717777496652023e-06, - "loss": 0.9639, - "step": 2787 - }, - { - "epoch": 0.20952953554787315, - "grad_norm": 1.522602576322191, - "learning_rate": 3.6715104730902074e-06, - "loss": 1.0759, - "step": 2788 - }, - { - "epoch": 0.2096046896137081, - "grad_norm": 1.317442081778065, - "learning_rate": 3.671243097471817e-06, - "loss": 0.9872, - "step": 2789 - }, - { - "epoch": 0.20967984367954307, - "grad_norm": 2.057329575072684, - "learning_rate": 3.6709756228258728e-06, - "loss": 1.0815, - "step": 2790 - }, - { - "epoch": 0.20975499774537804, - "grad_norm": 2.1602796412402108, - "learning_rate": 3.6707080491682243e-06, - "loss": 1.0281, - "step": 2791 - }, - { - "epoch": 0.20983015181121298, - "grad_norm": 2.0191506003090796, - "learning_rate": 3.670440376514727e-06, - "loss": 0.9401, - "step": 2792 - }, - { - "epoch": 0.20990530587704795, - "grad_norm": 1.5833062955990524, - "learning_rate": 3.67017260488124e-06, - "loss": 0.9841, - "step": 2793 - }, - { - "epoch": 0.20998045994288292, - "grad_norm": 0.8031029516377463, - "learning_rate": 3.6699047342836313e-06, - "loss": 0.8971, - "step": 2794 - }, - { - "epoch": 0.21005561400871786, - "grad_norm": 1.6865503676781948, - "learning_rate": 3.669636764737772e-06, - "loss": 0.973, - "step": 2795 - }, - { - "epoch": 0.21013076807455283, - "grad_norm": 1.5147062840289636, - "learning_rate": 3.669368696259542e-06, - "loss": 1.101, - "step": 2796 - }, - { - "epoch": 0.2102059221403878, - "grad_norm": 2.5369669086491955, - "learning_rate": 3.669100528864823e-06, - "loss": 1.0651, - "step": 2797 - }, - { - "epoch": 0.21028107620622274, - "grad_norm": 2.0125283991545473, - "learning_rate": 3.6688322625695075e-06, - "loss": 0.9338, - "step": 2798 - }, - { - "epoch": 0.21035623027205771, - "grad_norm": 0.8051778910130978, - "learning_rate": 3.6685638973894896e-06, - "loss": 0.9212, - "step": 2799 - }, - { - "epoch": 0.21043138433789269, - "grad_norm": 1.7602900021221062, - "learning_rate": 3.6682954333406707e-06, - "loss": 1.114, - "step": 2800 - }, - { - "epoch": 0.21050653840372766, - "grad_norm": 1.7859722918361363, - "learning_rate": 3.6680268704389597e-06, - "loss": 0.9906, - "step": 2801 - }, - { - "epoch": 0.2105816924695626, - "grad_norm": 1.3609727261492133, - "learning_rate": 3.6677582087002695e-06, - "loss": 1.0216, - "step": 2802 - }, - { - "epoch": 0.21065684653539757, - "grad_norm": 1.6759637053071759, - "learning_rate": 3.6674894481405184e-06, - "loss": 1.0534, - "step": 2803 - }, - { - "epoch": 0.21073200060123254, - "grad_norm": 1.7772706404520504, - "learning_rate": 3.667220588775633e-06, - "loss": 0.868, - "step": 2804 - }, - { - "epoch": 0.21080715466706748, - "grad_norm": 1.7163112331060788, - "learning_rate": 3.6669516306215433e-06, - "loss": 0.9276, - "step": 2805 - }, - { - "epoch": 0.21088230873290245, - "grad_norm": 1.5688264562205563, - "learning_rate": 3.666682573694186e-06, - "loss": 0.9629, - "step": 2806 - }, - { - "epoch": 0.21095746279873742, - "grad_norm": 2.3936968247207036, - "learning_rate": 3.6664134180095045e-06, - "loss": 1.0315, - "step": 2807 - }, - { - "epoch": 0.21103261686457236, - "grad_norm": 2.507116108162837, - "learning_rate": 3.666144163583446e-06, - "loss": 1.0342, - "step": 2808 - }, - { - "epoch": 0.21110777093040733, - "grad_norm": 1.474688112291523, - "learning_rate": 3.6658748104319667e-06, - "loss": 1.0166, - "step": 2809 - }, - { - "epoch": 0.2111829249962423, - "grad_norm": 1.8994029537552723, - "learning_rate": 3.665605358571026e-06, - "loss": 1.017, - "step": 2810 - }, - { - "epoch": 0.21125807906207725, - "grad_norm": 4.540120669538391, - "learning_rate": 3.6653358080165893e-06, - "loss": 0.9422, - "step": 2811 - }, - { - "epoch": 0.21133323312791222, - "grad_norm": 0.8700860185959572, - "learning_rate": 3.6650661587846283e-06, - "loss": 0.9195, - "step": 2812 - }, - { - "epoch": 0.2114083871937472, - "grad_norm": 1.6506985890553065, - "learning_rate": 3.6647964108911226e-06, - "loss": 1.0855, - "step": 2813 - }, - { - "epoch": 0.21148354125958213, - "grad_norm": 1.4769180919222271, - "learning_rate": 3.6645265643520536e-06, - "loss": 0.9022, - "step": 2814 - }, - { - "epoch": 0.2115586953254171, - "grad_norm": 1.5933841958699708, - "learning_rate": 3.664256619183413e-06, - "loss": 1.0349, - "step": 2815 - }, - { - "epoch": 0.21163384939125207, - "grad_norm": 0.735152948516016, - "learning_rate": 3.6639865754011934e-06, - "loss": 0.8103, - "step": 2816 - }, - { - "epoch": 0.21170900345708704, - "grad_norm": 1.515561131970192, - "learning_rate": 3.663716433021398e-06, - "loss": 0.9918, - "step": 2817 - }, - { - "epoch": 0.21178415752292198, - "grad_norm": 1.6564382769976458, - "learning_rate": 3.6634461920600337e-06, - "loss": 1.0112, - "step": 2818 - }, - { - "epoch": 0.21185931158875695, - "grad_norm": 1.6129405698759964, - "learning_rate": 3.6631758525331124e-06, - "loss": 0.983, - "step": 2819 - }, - { - "epoch": 0.21193446565459192, - "grad_norm": 2.395249624602863, - "learning_rate": 3.662905414456653e-06, - "loss": 0.9658, - "step": 2820 - }, - { - "epoch": 0.21200961972042687, - "grad_norm": 1.7304245919476984, - "learning_rate": 3.66263487784668e-06, - "loss": 1.0312, - "step": 2821 - }, - { - "epoch": 0.21208477378626184, - "grad_norm": 2.5914887832319367, - "learning_rate": 3.6623642427192237e-06, - "loss": 0.9215, - "step": 2822 - }, - { - "epoch": 0.2121599278520968, - "grad_norm": 2.544404124292618, - "learning_rate": 3.6620935090903205e-06, - "loss": 1.0408, - "step": 2823 - }, - { - "epoch": 0.21223508191793175, - "grad_norm": 2.0244743606854483, - "learning_rate": 3.6618226769760127e-06, - "loss": 1.0262, - "step": 2824 - }, - { - "epoch": 0.21231023598376672, - "grad_norm": 1.8376515929302, - "learning_rate": 3.6615517463923477e-06, - "loss": 1.0081, - "step": 2825 - }, - { - "epoch": 0.2123853900496017, - "grad_norm": 1.5069382262602147, - "learning_rate": 3.661280717355379e-06, - "loss": 1.035, - "step": 2826 - }, - { - "epoch": 0.21246054411543663, - "grad_norm": 1.5396553872694538, - "learning_rate": 3.661009589881166e-06, - "loss": 1.011, - "step": 2827 - }, - { - "epoch": 0.2125356981812716, - "grad_norm": 1.7177986286304865, - "learning_rate": 3.660738363985775e-06, - "loss": 0.9666, - "step": 2828 - }, - { - "epoch": 0.21261085224710657, - "grad_norm": 0.7009411913178893, - "learning_rate": 3.660467039685276e-06, - "loss": 0.8089, - "step": 2829 - }, - { - "epoch": 0.21268600631294152, - "grad_norm": 1.5578436325161045, - "learning_rate": 3.660195616995747e-06, - "loss": 1.0662, - "step": 2830 - }, - { - "epoch": 0.2127611603787765, - "grad_norm": 1.628626371714083, - "learning_rate": 3.6599240959332704e-06, - "loss": 1.089, - "step": 2831 - }, - { - "epoch": 0.21283631444461146, - "grad_norm": 1.5217535640197901, - "learning_rate": 3.659652476513934e-06, - "loss": 1.0567, - "step": 2832 - }, - { - "epoch": 0.21291146851044643, - "grad_norm": 1.473118231478239, - "learning_rate": 3.6593807587538343e-06, - "loss": 1.0273, - "step": 2833 - }, - { - "epoch": 0.21298662257628137, - "grad_norm": 0.895002515831933, - "learning_rate": 3.6591089426690695e-06, - "loss": 0.9602, - "step": 2834 - }, - { - "epoch": 0.21306177664211634, - "grad_norm": 3.005949954724805, - "learning_rate": 3.658837028275747e-06, - "loss": 1.043, - "step": 2835 - }, - { - "epoch": 0.2131369307079513, - "grad_norm": 1.597397613109803, - "learning_rate": 3.6585650155899786e-06, - "loss": 1.0428, - "step": 2836 - }, - { - "epoch": 0.21321208477378625, - "grad_norm": 1.659755667434166, - "learning_rate": 3.658292904627882e-06, - "loss": 0.9935, - "step": 2837 - }, - { - "epoch": 0.21328723883962122, - "grad_norm": 2.0496818574739617, - "learning_rate": 3.6580206954055807e-06, - "loss": 1.1096, - "step": 2838 - }, - { - "epoch": 0.2133623929054562, - "grad_norm": 2.2886418812172047, - "learning_rate": 3.657748387939204e-06, - "loss": 0.9769, - "step": 2839 - }, - { - "epoch": 0.21343754697129114, - "grad_norm": 1.3181632793252924, - "learning_rate": 3.657475982244888e-06, - "loss": 0.8632, - "step": 2840 - }, - { - "epoch": 0.2135127010371261, - "grad_norm": 4.41906109568619, - "learning_rate": 3.6572034783387725e-06, - "loss": 0.9164, - "step": 2841 - }, - { - "epoch": 0.21358785510296108, - "grad_norm": 0.8155046805429302, - "learning_rate": 3.6569308762370056e-06, - "loss": 0.8033, - "step": 2842 - }, - { - "epoch": 0.21366300916879602, - "grad_norm": 2.5882298200467813, - "learning_rate": 3.6566581759557387e-06, - "loss": 1.074, - "step": 2843 - }, - { - "epoch": 0.213738163234631, - "grad_norm": 1.549928752738352, - "learning_rate": 3.656385377511132e-06, - "loss": 0.9934, - "step": 2844 - }, - { - "epoch": 0.21381331730046596, - "grad_norm": 1.5657762130840533, - "learning_rate": 3.656112480919348e-06, - "loss": 0.9727, - "step": 2845 - }, - { - "epoch": 0.21388847136630093, - "grad_norm": 1.3870117737559835, - "learning_rate": 3.6558394861965587e-06, - "loss": 0.9553, - "step": 2846 - }, - { - "epoch": 0.21396362543213587, - "grad_norm": 1.7151391680779855, - "learning_rate": 3.6555663933589384e-06, - "loss": 0.9418, - "step": 2847 - }, - { - "epoch": 0.21403877949797084, - "grad_norm": 0.7132955576870779, - "learning_rate": 3.655293202422671e-06, - "loss": 0.8412, - "step": 2848 - }, - { - "epoch": 0.2141139335638058, - "grad_norm": 1.5863170451491444, - "learning_rate": 3.6550199134039414e-06, - "loss": 1.0317, - "step": 2849 - }, - { - "epoch": 0.21418908762964076, - "grad_norm": 4.907763205265213, - "learning_rate": 3.654746526318945e-06, - "loss": 0.9793, - "step": 2850 - }, - { - "epoch": 0.21426424169547573, - "grad_norm": 1.5316920518207038, - "learning_rate": 3.6544730411838805e-06, - "loss": 0.9554, - "step": 2851 - }, - { - "epoch": 0.2143393957613107, - "grad_norm": 1.4645776939373956, - "learning_rate": 3.654199458014953e-06, - "loss": 1.0242, - "step": 2852 - }, - { - "epoch": 0.21441454982714564, - "grad_norm": 1.4693685000400383, - "learning_rate": 3.653925776828373e-06, - "loss": 1.0376, - "step": 2853 - }, - { - "epoch": 0.2144897038929806, - "grad_norm": 1.9601337259761402, - "learning_rate": 3.653651997640358e-06, - "loss": 0.8885, - "step": 2854 - }, - { - "epoch": 0.21456485795881558, - "grad_norm": 0.7122850746444065, - "learning_rate": 3.6533781204671296e-06, - "loss": 0.8673, - "step": 2855 - }, - { - "epoch": 0.21464001202465052, - "grad_norm": 1.6803203011761412, - "learning_rate": 3.6531041453249154e-06, - "loss": 1.0725, - "step": 2856 - }, - { - "epoch": 0.2147151660904855, - "grad_norm": 1.7265705091195742, - "learning_rate": 3.6528300722299515e-06, - "loss": 1.0211, - "step": 2857 - }, - { - "epoch": 0.21479032015632046, - "grad_norm": 1.5469347239052473, - "learning_rate": 3.6525559011984768e-06, - "loss": 0.9489, - "step": 2858 - }, - { - "epoch": 0.2148654742221554, - "grad_norm": 0.8739554778375607, - "learning_rate": 3.652281632246736e-06, - "loss": 0.8552, - "step": 2859 - }, - { - "epoch": 0.21494062828799038, - "grad_norm": 1.4244524054189929, - "learning_rate": 3.6520072653909823e-06, - "loss": 1.0391, - "step": 2860 - }, - { - "epoch": 0.21501578235382535, - "grad_norm": 1.6276392411585348, - "learning_rate": 3.6517328006474717e-06, - "loss": 1.0802, - "step": 2861 - }, - { - "epoch": 0.21509093641966032, - "grad_norm": 2.274002476914801, - "learning_rate": 3.651458238032468e-06, - "loss": 1.047, - "step": 2862 - }, - { - "epoch": 0.21516609048549526, - "grad_norm": 1.3028499431907137, - "learning_rate": 3.65118357756224e-06, - "loss": 1.0569, - "step": 2863 - }, - { - "epoch": 0.21524124455133023, - "grad_norm": 2.845510587547552, - "learning_rate": 3.650908819253062e-06, - "loss": 0.9836, - "step": 2864 - }, - { - "epoch": 0.2153163986171652, - "grad_norm": 1.9186018697114962, - "learning_rate": 3.6506339631212145e-06, - "loss": 1.0923, - "step": 2865 - }, - { - "epoch": 0.21539155268300014, - "grad_norm": 1.9288108780475437, - "learning_rate": 3.650359009182984e-06, - "loss": 0.89, - "step": 2866 - }, - { - "epoch": 0.2154667067488351, - "grad_norm": 3.034984215533517, - "learning_rate": 3.650083957454663e-06, - "loss": 1.0622, - "step": 2867 - }, - { - "epoch": 0.21554186081467008, - "grad_norm": 1.356326796295533, - "learning_rate": 3.6498088079525487e-06, - "loss": 0.9706, - "step": 2868 - }, - { - "epoch": 0.21561701488050503, - "grad_norm": 2.764700869888708, - "learning_rate": 3.649533560692945e-06, - "loss": 1.0208, - "step": 2869 - }, - { - "epoch": 0.21569216894634, - "grad_norm": 1.6308094496708878, - "learning_rate": 3.6492582156921615e-06, - "loss": 1.0258, - "step": 2870 - }, - { - "epoch": 0.21576732301217497, - "grad_norm": 1.6097035489312603, - "learning_rate": 3.648982772966513e-06, - "loss": 1.047, - "step": 2871 - }, - { - "epoch": 0.2158424770780099, - "grad_norm": 1.5631065717157662, - "learning_rate": 3.648707232532321e-06, - "loss": 1.1055, - "step": 2872 - }, - { - "epoch": 0.21591763114384488, - "grad_norm": 1.8753787611824677, - "learning_rate": 3.648431594405912e-06, - "loss": 0.9849, - "step": 2873 - }, - { - "epoch": 0.21599278520967985, - "grad_norm": 1.6035096775925028, - "learning_rate": 3.648155858603619e-06, - "loss": 0.9692, - "step": 2874 - }, - { - "epoch": 0.2160679392755148, - "grad_norm": 1.6777120639796337, - "learning_rate": 3.64788002514178e-06, - "loss": 1.0388, - "step": 2875 - }, - { - "epoch": 0.21614309334134976, - "grad_norm": 1.4978789592271338, - "learning_rate": 3.6476040940367395e-06, - "loss": 1.0043, - "step": 2876 - }, - { - "epoch": 0.21621824740718473, - "grad_norm": 2.079283541934579, - "learning_rate": 3.647328065304847e-06, - "loss": 0.8909, - "step": 2877 - }, - { - "epoch": 0.2162934014730197, - "grad_norm": 1.3326051879503582, - "learning_rate": 3.6470519389624587e-06, - "loss": 1.0255, - "step": 2878 - }, - { - "epoch": 0.21636855553885465, - "grad_norm": 10.840633769071655, - "learning_rate": 3.646775715025936e-06, - "loss": 1.0555, - "step": 2879 - }, - { - "epoch": 0.21644370960468962, - "grad_norm": 1.8316383184449783, - "learning_rate": 3.6464993935116464e-06, - "loss": 1.0012, - "step": 2880 - }, - { - "epoch": 0.2165188636705246, - "grad_norm": 4.804246835737253, - "learning_rate": 3.646222974435963e-06, - "loss": 1.0359, - "step": 2881 - }, - { - "epoch": 0.21659401773635953, - "grad_norm": 0.7731620220920034, - "learning_rate": 3.645946457815264e-06, - "loss": 0.8977, - "step": 2882 - }, - { - "epoch": 0.2166691718021945, - "grad_norm": 2.0012579776335664, - "learning_rate": 3.6456698436659353e-06, - "loss": 0.9607, - "step": 2883 - }, - { - "epoch": 0.21674432586802947, - "grad_norm": 1.3103819059644655, - "learning_rate": 3.645393132004367e-06, - "loss": 0.9764, - "step": 2884 - }, - { - "epoch": 0.2168194799338644, - "grad_norm": 1.8946742053024002, - "learning_rate": 3.6451163228469543e-06, - "loss": 0.8927, - "step": 2885 - }, - { - "epoch": 0.21689463399969938, - "grad_norm": 1.5191063764992654, - "learning_rate": 3.6448394162100994e-06, - "loss": 1.021, - "step": 2886 - }, - { - "epoch": 0.21696978806553435, - "grad_norm": 2.9589452155515237, - "learning_rate": 3.644562412110211e-06, - "loss": 0.9998, - "step": 2887 - }, - { - "epoch": 0.2170449421313693, - "grad_norm": 1.6347560722773837, - "learning_rate": 3.6442853105637024e-06, - "loss": 0.9717, - "step": 2888 - }, - { - "epoch": 0.21712009619720427, - "grad_norm": 1.6608752320640359, - "learning_rate": 3.644008111586993e-06, - "loss": 1.0641, - "step": 2889 - }, - { - "epoch": 0.21719525026303924, - "grad_norm": 1.716104795421602, - "learning_rate": 3.6437308151965074e-06, - "loss": 0.9237, - "step": 2890 - }, - { - "epoch": 0.2172704043288742, - "grad_norm": 2.5096435517187197, - "learning_rate": 3.6434534214086767e-06, - "loss": 1.0455, - "step": 2891 - }, - { - "epoch": 0.21734555839470915, - "grad_norm": 1.8878350598370675, - "learning_rate": 3.643175930239938e-06, - "loss": 1.141, - "step": 2892 - }, - { - "epoch": 0.21742071246054412, - "grad_norm": 1.5001420112778838, - "learning_rate": 3.6428983417067326e-06, - "loss": 0.9609, - "step": 2893 - }, - { - "epoch": 0.2174958665263791, - "grad_norm": 1.5167813952862244, - "learning_rate": 3.64262065582551e-06, - "loss": 0.996, - "step": 2894 - }, - { - "epoch": 0.21757102059221403, - "grad_norm": 1.6841523201979605, - "learning_rate": 3.6423428726127232e-06, - "loss": 0.9697, - "step": 2895 - }, - { - "epoch": 0.217646174658049, - "grad_norm": 2.1916873635981613, - "learning_rate": 3.6420649920848324e-06, - "loss": 1.0232, - "step": 2896 - }, - { - "epoch": 0.21772132872388397, - "grad_norm": 2.03059599691609, - "learning_rate": 3.641787014258303e-06, - "loss": 0.9559, - "step": 2897 - }, - { - "epoch": 0.21779648278971891, - "grad_norm": 2.3830430216361496, - "learning_rate": 3.641508939149606e-06, - "loss": 0.955, - "step": 2898 - }, - { - "epoch": 0.21787163685555389, - "grad_norm": 1.635693795925986, - "learning_rate": 3.6412307667752185e-06, - "loss": 0.9223, - "step": 2899 - }, - { - "epoch": 0.21794679092138886, - "grad_norm": 1.7739824306473855, - "learning_rate": 3.640952497151623e-06, - "loss": 1.0084, - "step": 2900 - }, - { - "epoch": 0.2180219449872238, - "grad_norm": 3.3106875949535612, - "learning_rate": 3.6406741302953093e-06, - "loss": 1.0716, - "step": 2901 - }, - { - "epoch": 0.21809709905305877, - "grad_norm": 1.6173830513043614, - "learning_rate": 3.6403956662227706e-06, - "loss": 1.0167, - "step": 2902 - }, - { - "epoch": 0.21817225311889374, - "grad_norm": 1.5688135384994297, - "learning_rate": 3.640117104950507e-06, - "loss": 0.9252, - "step": 2903 - }, - { - "epoch": 0.21824740718472868, - "grad_norm": 1.7796385318761176, - "learning_rate": 3.639838446495024e-06, - "loss": 1.0394, - "step": 2904 - }, - { - "epoch": 0.21832256125056365, - "grad_norm": 1.7399940538852863, - "learning_rate": 3.6395596908728344e-06, - "loss": 0.9971, - "step": 2905 - }, - { - "epoch": 0.21839771531639862, - "grad_norm": 1.618245686214596, - "learning_rate": 3.639280838100455e-06, - "loss": 0.9717, - "step": 2906 - }, - { - "epoch": 0.2184728693822336, - "grad_norm": 1.555002075267523, - "learning_rate": 3.639001888194408e-06, - "loss": 1.0361, - "step": 2907 - }, - { - "epoch": 0.21854802344806853, - "grad_norm": 1.3844031974728, - "learning_rate": 3.638722841171223e-06, - "loss": 0.9338, - "step": 2908 - }, - { - "epoch": 0.2186231775139035, - "grad_norm": 0.7108380174724745, - "learning_rate": 3.6384436970474353e-06, - "loss": 0.8222, - "step": 2909 - }, - { - "epoch": 0.21869833157973848, - "grad_norm": 1.3351609945910479, - "learning_rate": 3.638164455839584e-06, - "loss": 1.0761, - "step": 2910 - }, - { - "epoch": 0.21877348564557342, - "grad_norm": 1.556803121664793, - "learning_rate": 3.637885117564216e-06, - "loss": 0.9915, - "step": 2911 - }, - { - "epoch": 0.2188486397114084, - "grad_norm": 2.116590239679447, - "learning_rate": 3.6376056822378826e-06, - "loss": 1.108, - "step": 2912 - }, - { - "epoch": 0.21892379377724336, - "grad_norm": 1.9575202011293753, - "learning_rate": 3.6373261498771418e-06, - "loss": 0.9169, - "step": 2913 - }, - { - "epoch": 0.2189989478430783, - "grad_norm": 1.6353519532182592, - "learning_rate": 3.6370465204985567e-06, - "loss": 1.0182, - "step": 2914 - }, - { - "epoch": 0.21907410190891327, - "grad_norm": 1.6450232995189105, - "learning_rate": 3.636766794118697e-06, - "loss": 1.088, - "step": 2915 - }, - { - "epoch": 0.21914925597474824, - "grad_norm": 1.6054801847695737, - "learning_rate": 3.636486970754137e-06, - "loss": 0.9364, - "step": 2916 - }, - { - "epoch": 0.21922441004058318, - "grad_norm": 2.7784871297678606, - "learning_rate": 3.6362070504214577e-06, - "loss": 0.9652, - "step": 2917 - }, - { - "epoch": 0.21929956410641815, - "grad_norm": 2.2882901604069645, - "learning_rate": 3.6359270331372447e-06, - "loss": 1.0333, - "step": 2918 - }, - { - "epoch": 0.21937471817225312, - "grad_norm": 2.0459433029463083, - "learning_rate": 3.6356469189180907e-06, - "loss": 0.8791, - "step": 2919 - }, - { - "epoch": 0.21944987223808807, - "grad_norm": 2.570850721781664, - "learning_rate": 3.6353667077805934e-06, - "loss": 1.0393, - "step": 2920 - }, - { - "epoch": 0.21952502630392304, - "grad_norm": 1.3801058989126305, - "learning_rate": 3.635086399741357e-06, - "loss": 0.9941, - "step": 2921 - }, - { - "epoch": 0.219600180369758, - "grad_norm": 1.6963716353993818, - "learning_rate": 3.6348059948169894e-06, - "loss": 1.0071, - "step": 2922 - }, - { - "epoch": 0.21967533443559298, - "grad_norm": 1.7578082275338702, - "learning_rate": 3.6345254930241075e-06, - "loss": 1.0201, - "step": 2923 - }, - { - "epoch": 0.21975048850142792, - "grad_norm": 1.639631424634415, - "learning_rate": 3.63424489437933e-06, - "loss": 1.0333, - "step": 2924 - }, - { - "epoch": 0.2198256425672629, - "grad_norm": 1.6128196517765072, - "learning_rate": 3.6339641988992853e-06, - "loss": 1.0392, - "step": 2925 - }, - { - "epoch": 0.21990079663309786, - "grad_norm": 1.6461353588231653, - "learning_rate": 3.633683406600605e-06, - "loss": 0.9905, - "step": 2926 - }, - { - "epoch": 0.2199759506989328, - "grad_norm": 1.5976457292354926, - "learning_rate": 3.633402517499927e-06, - "loss": 0.9773, - "step": 2927 - }, - { - "epoch": 0.22005110476476777, - "grad_norm": 2.2759471556524034, - "learning_rate": 3.633121531613895e-06, - "loss": 1.0648, - "step": 2928 - }, - { - "epoch": 0.22012625883060274, - "grad_norm": 1.9544424267767422, - "learning_rate": 3.6328404489591585e-06, - "loss": 1.0894, - "step": 2929 - }, - { - "epoch": 0.2202014128964377, - "grad_norm": 1.4241413038899737, - "learning_rate": 3.6325592695523727e-06, - "loss": 1.0911, - "step": 2930 - }, - { - "epoch": 0.22027656696227266, - "grad_norm": 1.5043085526236237, - "learning_rate": 3.6322779934101995e-06, - "loss": 1.0707, - "step": 2931 - }, - { - "epoch": 0.22035172102810763, - "grad_norm": 1.7850795203482321, - "learning_rate": 3.6319966205493044e-06, - "loss": 1.1135, - "step": 2932 - }, - { - "epoch": 0.22042687509394257, - "grad_norm": 1.6830968986627257, - "learning_rate": 3.63171515098636e-06, - "loss": 1.0425, - "step": 2933 - }, - { - "epoch": 0.22050202915977754, - "grad_norm": 1.917025767424306, - "learning_rate": 3.6314335847380443e-06, - "loss": 0.9814, - "step": 2934 - }, - { - "epoch": 0.2205771832256125, - "grad_norm": 1.573358418997441, - "learning_rate": 3.631151921821042e-06, - "loss": 1.0243, - "step": 2935 - }, - { - "epoch": 0.22065233729144745, - "grad_norm": 2.039183748228518, - "learning_rate": 3.630870162252042e-06, - "loss": 1.0201, - "step": 2936 - }, - { - "epoch": 0.22072749135728242, - "grad_norm": 0.8172511788934728, - "learning_rate": 3.63058830604774e-06, - "loss": 0.8934, - "step": 2937 - }, - { - "epoch": 0.2208026454231174, - "grad_norm": 1.6894469156316527, - "learning_rate": 3.6303063532248367e-06, - "loss": 1.0288, - "step": 2938 - }, - { - "epoch": 0.22087779948895236, - "grad_norm": 2.045078100377956, - "learning_rate": 3.6300243038000397e-06, - "loss": 0.9798, - "step": 2939 - }, - { - "epoch": 0.2209529535547873, - "grad_norm": 2.011953988256341, - "learning_rate": 3.6297421577900608e-06, - "loss": 0.9632, - "step": 2940 - }, - { - "epoch": 0.22102810762062228, - "grad_norm": 1.6103247645492373, - "learning_rate": 3.629459915211618e-06, - "loss": 0.9966, - "step": 2941 - }, - { - "epoch": 0.22110326168645725, - "grad_norm": 1.9359247510488709, - "learning_rate": 3.6291775760814358e-06, - "loss": 1.0137, - "step": 2942 - }, - { - "epoch": 0.2211784157522922, - "grad_norm": 3.2737007126333335, - "learning_rate": 3.6288951404162433e-06, - "loss": 1.1605, - "step": 2943 - }, - { - "epoch": 0.22125356981812716, - "grad_norm": 2.032128833508787, - "learning_rate": 3.6286126082327764e-06, - "loss": 1.0161, - "step": 2944 - }, - { - "epoch": 0.22132872388396213, - "grad_norm": 1.4432776070483044, - "learning_rate": 3.6283299795477767e-06, - "loss": 1.0271, - "step": 2945 - }, - { - "epoch": 0.22140387794979707, - "grad_norm": 4.377638974600734, - "learning_rate": 3.62804725437799e-06, - "loss": 1.0203, - "step": 2946 - }, - { - "epoch": 0.22147903201563204, - "grad_norm": 2.286276726835026, - "learning_rate": 3.6277644327401687e-06, - "loss": 1.0568, - "step": 2947 - }, - { - "epoch": 0.221554186081467, - "grad_norm": 1.4346870598182675, - "learning_rate": 3.627481514651073e-06, - "loss": 0.9958, - "step": 2948 - }, - { - "epoch": 0.22162934014730196, - "grad_norm": 1.438439352534475, - "learning_rate": 3.6271985001274647e-06, - "loss": 0.9025, - "step": 2949 - }, - { - "epoch": 0.22170449421313693, - "grad_norm": 1.7908422288397103, - "learning_rate": 3.6269153891861137e-06, - "loss": 1.0288, - "step": 2950 - }, - { - "epoch": 0.2217796482789719, - "grad_norm": 2.3979714403655006, - "learning_rate": 3.6266321818437967e-06, - "loss": 0.9762, - "step": 2951 - }, - { - "epoch": 0.22185480234480687, - "grad_norm": 1.8732195984376598, - "learning_rate": 3.6263488781172946e-06, - "loss": 0.92, - "step": 2952 - }, - { - "epoch": 0.2219299564106418, - "grad_norm": 1.7074673093512074, - "learning_rate": 3.6260654780233927e-06, - "loss": 1.0763, - "step": 2953 - }, - { - "epoch": 0.22200511047647678, - "grad_norm": 1.8023276484993416, - "learning_rate": 3.6257819815788854e-06, - "loss": 0.9787, - "step": 2954 - }, - { - "epoch": 0.22208026454231175, - "grad_norm": 0.748776033168629, - "learning_rate": 3.6254983888005697e-06, - "loss": 0.9126, - "step": 2955 - }, - { - "epoch": 0.2221554186081467, - "grad_norm": 1.6416234928915125, - "learning_rate": 3.6252146997052507e-06, - "loss": 1.0551, - "step": 2956 - }, - { - "epoch": 0.22223057267398166, - "grad_norm": 1.8461908251164452, - "learning_rate": 3.624930914309736e-06, - "loss": 1.0427, - "step": 2957 - }, - { - "epoch": 0.22230572673981663, - "grad_norm": 1.770883005978901, - "learning_rate": 3.624647032630844e-06, - "loss": 1.05, - "step": 2958 - }, - { - "epoch": 0.22238088080565158, - "grad_norm": 1.555892553988509, - "learning_rate": 3.6243630546853932e-06, - "loss": 0.9099, - "step": 2959 - }, - { - "epoch": 0.22245603487148655, - "grad_norm": 2.304441794536264, - "learning_rate": 3.6240789804902116e-06, - "loss": 1.0451, - "step": 2960 - }, - { - "epoch": 0.22253118893732152, - "grad_norm": 1.6879702725165684, - "learning_rate": 3.623794810062131e-06, - "loss": 1.1132, - "step": 2961 - }, - { - "epoch": 0.22260634300315646, - "grad_norm": 1.6404923884974718, - "learning_rate": 3.62351054341799e-06, - "loss": 0.9327, - "step": 2962 - }, - { - "epoch": 0.22268149706899143, - "grad_norm": 0.8054875960860138, - "learning_rate": 3.623226180574633e-06, - "loss": 0.8056, - "step": 2963 - }, - { - "epoch": 0.2227566511348264, - "grad_norm": 1.502206530481191, - "learning_rate": 3.6229417215489084e-06, - "loss": 1.0412, - "step": 2964 - }, - { - "epoch": 0.22283180520066134, - "grad_norm": 1.7281519965327463, - "learning_rate": 3.6226571663576727e-06, - "loss": 0.9873, - "step": 2965 - }, - { - "epoch": 0.2229069592664963, - "grad_norm": 1.8166932238232942, - "learning_rate": 3.6223725150177858e-06, - "loss": 0.9051, - "step": 2966 - }, - { - "epoch": 0.22298211333233128, - "grad_norm": 1.600098922770898, - "learning_rate": 3.622087767546116e-06, - "loss": 1.0803, - "step": 2967 - }, - { - "epoch": 0.22305726739816625, - "grad_norm": 1.7664150394725484, - "learning_rate": 3.6218029239595332e-06, - "loss": 0.9026, - "step": 2968 - }, - { - "epoch": 0.2231324214640012, - "grad_norm": 1.4998272160515802, - "learning_rate": 3.6215179842749172e-06, - "loss": 1.0719, - "step": 2969 - }, - { - "epoch": 0.22320757552983617, - "grad_norm": 2.151676801583286, - "learning_rate": 3.6212329485091518e-06, - "loss": 0.9619, - "step": 2970 - }, - { - "epoch": 0.22328272959567114, - "grad_norm": 1.7593542500927253, - "learning_rate": 3.620947816679126e-06, - "loss": 1.1373, - "step": 2971 - }, - { - "epoch": 0.22335788366150608, - "grad_norm": 3.755728097923521, - "learning_rate": 3.6206625888017355e-06, - "loss": 0.9935, - "step": 2972 - }, - { - "epoch": 0.22343303772734105, - "grad_norm": 1.7885520240100181, - "learning_rate": 3.62037726489388e-06, - "loss": 0.9333, - "step": 2973 - }, - { - "epoch": 0.22350819179317602, - "grad_norm": 1.4128460789271662, - "learning_rate": 3.620091844972467e-06, - "loss": 1.0283, - "step": 2974 - }, - { - "epoch": 0.22358334585901096, - "grad_norm": 1.622874704127787, - "learning_rate": 3.619806329054408e-06, - "loss": 1.0598, - "step": 2975 - }, - { - "epoch": 0.22365849992484593, - "grad_norm": 1.6512477838355533, - "learning_rate": 3.619520717156622e-06, - "loss": 1.028, - "step": 2976 - }, - { - "epoch": 0.2237336539906809, - "grad_norm": 1.5624179055582046, - "learning_rate": 3.6192350092960315e-06, - "loss": 1.0785, - "step": 2977 - }, - { - "epoch": 0.22380880805651585, - "grad_norm": 1.4898808411180375, - "learning_rate": 3.6189492054895667e-06, - "loss": 0.9912, - "step": 2978 - }, - { - "epoch": 0.22388396212235082, - "grad_norm": 1.4074448489670603, - "learning_rate": 3.6186633057541617e-06, - "loss": 0.9986, - "step": 2979 - }, - { - "epoch": 0.2239591161881858, - "grad_norm": 1.7030566840512964, - "learning_rate": 3.6183773101067575e-06, - "loss": 1.0895, - "step": 2980 - }, - { - "epoch": 0.22403427025402073, - "grad_norm": 0.7525741192760483, - "learning_rate": 3.618091218564301e-06, - "loss": 0.8674, - "step": 2981 - }, - { - "epoch": 0.2241094243198557, - "grad_norm": 1.7455211955182028, - "learning_rate": 3.6178050311437432e-06, - "loss": 0.9977, - "step": 2982 - }, - { - "epoch": 0.22418457838569067, - "grad_norm": 1.5707964554698364, - "learning_rate": 3.6175187478620424e-06, - "loss": 1.0776, - "step": 2983 - }, - { - "epoch": 0.22425973245152564, - "grad_norm": 1.4377419641277145, - "learning_rate": 3.617232368736162e-06, - "loss": 0.9853, - "step": 2984 - }, - { - "epoch": 0.22433488651736058, - "grad_norm": 1.602517043572069, - "learning_rate": 3.616945893783071e-06, - "loss": 0.9448, - "step": 2985 - }, - { - "epoch": 0.22441004058319555, - "grad_norm": 1.740107230905276, - "learning_rate": 3.616659323019744e-06, - "loss": 1.0056, - "step": 2986 - }, - { - "epoch": 0.22448519464903052, - "grad_norm": 2.382056744902921, - "learning_rate": 3.616372656463161e-06, - "loss": 1.0399, - "step": 2987 - }, - { - "epoch": 0.22456034871486547, - "grad_norm": 1.5741258314553526, - "learning_rate": 3.6160858941303095e-06, - "loss": 1.0913, - "step": 2988 - }, - { - "epoch": 0.22463550278070044, - "grad_norm": 74.01250950008384, - "learning_rate": 3.61579903603818e-06, - "loss": 1.0092, - "step": 2989 - }, - { - "epoch": 0.2247106568465354, - "grad_norm": 1.9902027490740422, - "learning_rate": 3.6155120822037707e-06, - "loss": 1.0131, - "step": 2990 - }, - { - "epoch": 0.22478581091237035, - "grad_norm": 2.4219780684658643, - "learning_rate": 3.6152250326440833e-06, - "loss": 1.0545, - "step": 2991 - }, - { - "epoch": 0.22486096497820532, - "grad_norm": 1.4736038518629564, - "learning_rate": 3.614937887376128e-06, - "loss": 0.8862, - "step": 2992 - }, - { - "epoch": 0.2249361190440403, - "grad_norm": 1.6783961100172307, - "learning_rate": 3.61465064641692e-06, - "loss": 0.987, - "step": 2993 - }, - { - "epoch": 0.22501127310987523, - "grad_norm": 1.852452897001962, - "learning_rate": 3.614363309783477e-06, - "loss": 1.0269, - "step": 2994 - }, - { - "epoch": 0.2250864271757102, - "grad_norm": 1.7620459546646614, - "learning_rate": 3.6140758774928265e-06, - "loss": 1.0218, - "step": 2995 - }, - { - "epoch": 0.22516158124154517, - "grad_norm": 1.1098775128493898, - "learning_rate": 3.613788349561999e-06, - "loss": 0.953, - "step": 2996 - }, - { - "epoch": 0.22523673530738014, - "grad_norm": 1.7683956721418774, - "learning_rate": 3.6135007260080334e-06, - "loss": 0.9939, - "step": 2997 - }, - { - "epoch": 0.22531188937321509, - "grad_norm": 2.008257271155984, - "learning_rate": 3.61321300684797e-06, - "loss": 1.0202, - "step": 2998 - }, - { - "epoch": 0.22538704343905006, - "grad_norm": 2.1325196005668405, - "learning_rate": 3.6129251920988594e-06, - "loss": 1.0521, - "step": 2999 - }, - { - "epoch": 0.22546219750488503, - "grad_norm": 1.8289354924109245, - "learning_rate": 3.612637281777755e-06, - "loss": 1.0529, - "step": 3000 - }, - { - "epoch": 0.22553735157071997, - "grad_norm": 2.9492991051242696, - "learning_rate": 3.6123492759017155e-06, - "loss": 0.9704, - "step": 3001 - }, - { - "epoch": 0.22561250563655494, - "grad_norm": 2.0432599007924273, - "learning_rate": 3.6120611744878076e-06, - "loss": 1.0814, - "step": 3002 - }, - { - "epoch": 0.2256876597023899, - "grad_norm": 2.002585885138502, - "learning_rate": 3.6117729775531028e-06, - "loss": 1.0179, - "step": 3003 - }, - { - "epoch": 0.22576281376822485, - "grad_norm": 1.830646569101178, - "learning_rate": 3.6114846851146767e-06, - "loss": 0.8992, - "step": 3004 - }, - { - "epoch": 0.22583796783405982, - "grad_norm": 1.6538377144893357, - "learning_rate": 3.611196297189612e-06, - "loss": 1.1149, - "step": 3005 - }, - { - "epoch": 0.2259131218998948, - "grad_norm": 1.9341313395081026, - "learning_rate": 3.6109078137949975e-06, - "loss": 0.9409, - "step": 3006 - }, - { - "epoch": 0.22598827596572973, - "grad_norm": 1.8760020439486562, - "learning_rate": 3.6106192349479263e-06, - "loss": 1.1266, - "step": 3007 - }, - { - "epoch": 0.2260634300315647, - "grad_norm": 1.8739687308814117, - "learning_rate": 3.610330560665498e-06, - "loss": 1.0433, - "step": 3008 - }, - { - "epoch": 0.22613858409739968, - "grad_norm": 1.6317838504132844, - "learning_rate": 3.6100417909648175e-06, - "loss": 0.8843, - "step": 3009 - }, - { - "epoch": 0.22621373816323462, - "grad_norm": 1.6474161464207868, - "learning_rate": 3.6097529258629952e-06, - "loss": 1.0478, - "step": 3010 - }, - { - "epoch": 0.2262888922290696, - "grad_norm": 3.9286491584519325, - "learning_rate": 3.6094639653771486e-06, - "loss": 0.9477, - "step": 3011 - }, - { - "epoch": 0.22636404629490456, - "grad_norm": 2.0765734164924803, - "learning_rate": 3.6091749095243986e-06, - "loss": 1.0004, - "step": 3012 - }, - { - "epoch": 0.22643920036073953, - "grad_norm": 1.9832436047086288, - "learning_rate": 3.6088857583218735e-06, - "loss": 1.0661, - "step": 3013 - }, - { - "epoch": 0.22651435442657447, - "grad_norm": 1.5648138842348989, - "learning_rate": 3.6085965117867066e-06, - "loss": 0.949, - "step": 3014 - }, - { - "epoch": 0.22658950849240944, - "grad_norm": 1.9153514648098895, - "learning_rate": 3.608307169936036e-06, - "loss": 0.9568, - "step": 3015 - }, - { - "epoch": 0.2266646625582444, - "grad_norm": 2.197681058817544, - "learning_rate": 3.608017732787007e-06, - "loss": 0.9261, - "step": 3016 - }, - { - "epoch": 0.22673981662407935, - "grad_norm": 4.713239066455105, - "learning_rate": 3.60772820035677e-06, - "loss": 0.9777, - "step": 3017 - }, - { - "epoch": 0.22681497068991432, - "grad_norm": 2.0602225242720316, - "learning_rate": 3.607438572662481e-06, - "loss": 1.0513, - "step": 3018 - }, - { - "epoch": 0.2268901247557493, - "grad_norm": 1.5860367166086238, - "learning_rate": 3.6071488497213017e-06, - "loss": 0.9465, - "step": 3019 - }, - { - "epoch": 0.22696527882158424, - "grad_norm": 1.681478707452379, - "learning_rate": 3.6068590315503976e-06, - "loss": 0.9153, - "step": 3020 - }, - { - "epoch": 0.2270404328874192, - "grad_norm": 1.9889992127759784, - "learning_rate": 3.606569118166944e-06, - "loss": 0.9931, - "step": 3021 - }, - { - "epoch": 0.22711558695325418, - "grad_norm": 1.8075808496187176, - "learning_rate": 3.6062791095881174e-06, - "loss": 1.0245, - "step": 3022 - }, - { - "epoch": 0.22719074101908912, - "grad_norm": 1.3380811569536055, - "learning_rate": 3.6059890058311025e-06, - "loss": 1.0064, - "step": 3023 - }, - { - "epoch": 0.2272658950849241, - "grad_norm": 1.3279779240571516, - "learning_rate": 3.6056988069130903e-06, - "loss": 0.9242, - "step": 3024 - }, - { - "epoch": 0.22734104915075906, - "grad_norm": 9.059711737688984, - "learning_rate": 3.6054085128512747e-06, - "loss": 0.9856, - "step": 3025 - }, - { - "epoch": 0.227416203216594, - "grad_norm": 1.5398878798603792, - "learning_rate": 3.605118123662857e-06, - "loss": 0.9525, - "step": 3026 - }, - { - "epoch": 0.22749135728242897, - "grad_norm": 1.4442568661270836, - "learning_rate": 3.6048276393650434e-06, - "loss": 1.0129, - "step": 3027 - }, - { - "epoch": 0.22756651134826394, - "grad_norm": 1.7929174212253882, - "learning_rate": 3.6045370599750482e-06, - "loss": 0.9955, - "step": 3028 - }, - { - "epoch": 0.22764166541409891, - "grad_norm": 2.173587371024859, - "learning_rate": 3.6042463855100876e-06, - "loss": 0.9684, - "step": 3029 - }, - { - "epoch": 0.22771681947993386, - "grad_norm": 2.096600883292875, - "learning_rate": 3.603955615987385e-06, - "loss": 0.9746, - "step": 3030 - }, - { - "epoch": 0.22779197354576883, - "grad_norm": 4.763682685642714, - "learning_rate": 3.603664751424171e-06, - "loss": 1.0027, - "step": 3031 - }, - { - "epoch": 0.2278671276116038, - "grad_norm": 1.6627186824984908, - "learning_rate": 3.603373791837679e-06, - "loss": 1.0592, - "step": 3032 - }, - { - "epoch": 0.22794228167743874, - "grad_norm": 1.5294928297290664, - "learning_rate": 3.6030827372451506e-06, - "loss": 0.8963, - "step": 3033 - }, - { - "epoch": 0.2280174357432737, - "grad_norm": 1.626122380496975, - "learning_rate": 3.602791587663831e-06, - "loss": 0.9104, - "step": 3034 - }, - { - "epoch": 0.22809258980910868, - "grad_norm": 1.4375131855235546, - "learning_rate": 3.6025003431109722e-06, - "loss": 1.0114, - "step": 3035 - }, - { - "epoch": 0.22816774387494362, - "grad_norm": 1.6320181196459749, - "learning_rate": 3.6022090036038326e-06, - "loss": 1.1181, - "step": 3036 - }, - { - "epoch": 0.2282428979407786, - "grad_norm": 1.3553534027182987, - "learning_rate": 3.601917569159673e-06, - "loss": 1.1101, - "step": 3037 - }, - { - "epoch": 0.22831805200661356, - "grad_norm": 2.1965728074408717, - "learning_rate": 3.6016260397957642e-06, - "loss": 1.0332, - "step": 3038 - }, - { - "epoch": 0.2283932060724485, - "grad_norm": 0.6727652307499431, - "learning_rate": 3.6013344155293792e-06, - "loss": 0.8098, - "step": 3039 - }, - { - "epoch": 0.22846836013828348, - "grad_norm": 2.025014373351526, - "learning_rate": 3.6010426963777985e-06, - "loss": 0.9813, - "step": 3040 - }, - { - "epoch": 0.22854351420411845, - "grad_norm": 1.4747297427332822, - "learning_rate": 3.600750882358307e-06, - "loss": 1.0938, - "step": 3041 - }, - { - "epoch": 0.22861866826995342, - "grad_norm": 1.736744679451762, - "learning_rate": 3.6004589734881953e-06, - "loss": 1.0112, - "step": 3042 - }, - { - "epoch": 0.22869382233578836, - "grad_norm": 1.6592766799103713, - "learning_rate": 3.600166969784762e-06, - "loss": 1.0265, - "step": 3043 - }, - { - "epoch": 0.22876897640162333, - "grad_norm": 1.714968179346385, - "learning_rate": 3.5998748712653077e-06, - "loss": 1.0065, - "step": 3044 - }, - { - "epoch": 0.2288441304674583, - "grad_norm": 2.7406067381090544, - "learning_rate": 3.5995826779471408e-06, - "loss": 1.0254, - "step": 3045 - }, - { - "epoch": 0.22891928453329324, - "grad_norm": 2.282632696132179, - "learning_rate": 3.5992903898475757e-06, - "loss": 0.9754, - "step": 3046 - }, - { - "epoch": 0.2289944385991282, - "grad_norm": 1.3356733005128236, - "learning_rate": 3.5989980069839304e-06, - "loss": 0.9305, - "step": 3047 - }, - { - "epoch": 0.22906959266496318, - "grad_norm": 4.945894083594271, - "learning_rate": 3.5987055293735305e-06, - "loss": 1.0565, - "step": 3048 - }, - { - "epoch": 0.22914474673079813, - "grad_norm": 1.7090835629698224, - "learning_rate": 3.5984129570337056e-06, - "loss": 1.0338, - "step": 3049 - }, - { - "epoch": 0.2292199007966331, - "grad_norm": 1.8561943482556167, - "learning_rate": 3.598120289981793e-06, - "loss": 1.0256, - "step": 3050 - }, - { - "epoch": 0.22929505486246807, - "grad_norm": 1.8955805124356961, - "learning_rate": 3.597827528235133e-06, - "loss": 1.0453, - "step": 3051 - }, - { - "epoch": 0.229370208928303, - "grad_norm": 2.38343787819867, - "learning_rate": 3.597534671811074e-06, - "loss": 0.9508, - "step": 3052 - }, - { - "epoch": 0.22944536299413798, - "grad_norm": 1.9300151346038208, - "learning_rate": 3.5972417207269675e-06, - "loss": 1.0588, - "step": 3053 - }, - { - "epoch": 0.22952051705997295, - "grad_norm": 2.54795524295561, - "learning_rate": 3.596948675000173e-06, - "loss": 1.0519, - "step": 3054 - }, - { - "epoch": 0.2295956711258079, - "grad_norm": 3.614636174382802, - "learning_rate": 3.596655534648055e-06, - "loss": 0.9653, - "step": 3055 - }, - { - "epoch": 0.22967082519164286, - "grad_norm": 2.090091298495681, - "learning_rate": 3.596362299687982e-06, - "loss": 1.0367, - "step": 3056 - }, - { - "epoch": 0.22974597925747783, - "grad_norm": 2.417080175166135, - "learning_rate": 3.59606897013733e-06, - "loss": 0.9547, - "step": 3057 - }, - { - "epoch": 0.2298211333233128, - "grad_norm": 1.5029458682730341, - "learning_rate": 3.59577554601348e-06, - "loss": 0.874, - "step": 3058 - }, - { - "epoch": 0.22989628738914775, - "grad_norm": 1.8112281414684865, - "learning_rate": 3.595482027333818e-06, - "loss": 0.9263, - "step": 3059 - }, - { - "epoch": 0.22997144145498272, - "grad_norm": 1.460329306579439, - "learning_rate": 3.5951884141157365e-06, - "loss": 0.9073, - "step": 3060 - }, - { - "epoch": 0.2300465955208177, - "grad_norm": 1.414132638108626, - "learning_rate": 3.5948947063766334e-06, - "loss": 1.0422, - "step": 3061 - }, - { - "epoch": 0.23012174958665263, - "grad_norm": 3.846459155069271, - "learning_rate": 3.5946009041339114e-06, - "loss": 1.0555, - "step": 3062 - }, - { - "epoch": 0.2301969036524876, - "grad_norm": 3.447376005226458, - "learning_rate": 3.5943070074049797e-06, - "loss": 0.9462, - "step": 3063 - }, - { - "epoch": 0.23027205771832257, - "grad_norm": 1.8324177218463114, - "learning_rate": 3.5940130162072525e-06, - "loss": 1.0372, - "step": 3064 - }, - { - "epoch": 0.2303472117841575, - "grad_norm": 1.6518130340673458, - "learning_rate": 3.593718930558151e-06, - "loss": 1.0171, - "step": 3065 - }, - { - "epoch": 0.23042236584999248, - "grad_norm": 1.9146027717253873, - "learning_rate": 3.5934247504750995e-06, - "loss": 0.9399, - "step": 3066 - }, - { - "epoch": 0.23049751991582745, - "grad_norm": 1.8847150252275169, - "learning_rate": 3.59313047597553e-06, - "loss": 0.9205, - "step": 3067 - }, - { - "epoch": 0.2305726739816624, - "grad_norm": 2.079524207350104, - "learning_rate": 3.5928361070768788e-06, - "loss": 0.924, - "step": 3068 - }, - { - "epoch": 0.23064782804749737, - "grad_norm": 1.637080914981124, - "learning_rate": 3.59254164379659e-06, - "loss": 1.0589, - "step": 3069 - }, - { - "epoch": 0.23072298211333234, - "grad_norm": 0.8066576939732719, - "learning_rate": 3.5922470861521098e-06, - "loss": 0.8417, - "step": 3070 - }, - { - "epoch": 0.23079813617916728, - "grad_norm": 2.2117579794543922, - "learning_rate": 3.5919524341608923e-06, - "loss": 1.0335, - "step": 3071 - }, - { - "epoch": 0.23087329024500225, - "grad_norm": 0.7586097456309332, - "learning_rate": 3.5916576878403975e-06, - "loss": 0.8548, - "step": 3072 - }, - { - "epoch": 0.23094844431083722, - "grad_norm": 3.067436599138422, - "learning_rate": 3.59136284720809e-06, - "loss": 0.9882, - "step": 3073 - }, - { - "epoch": 0.2310235983766722, - "grad_norm": 2.0245756387521827, - "learning_rate": 3.59106791228144e-06, - "loss": 1.0652, - "step": 3074 - }, - { - "epoch": 0.23109875244250713, - "grad_norm": 0.7199191660009904, - "learning_rate": 3.5907728830779236e-06, - "loss": 0.8853, - "step": 3075 - }, - { - "epoch": 0.2311739065083421, - "grad_norm": 1.5811964304900368, - "learning_rate": 3.5904777596150222e-06, - "loss": 1.013, - "step": 3076 - }, - { - "epoch": 0.23124906057417707, - "grad_norm": 1.669586040791975, - "learning_rate": 3.5901825419102238e-06, - "loss": 0.9691, - "step": 3077 - }, - { - "epoch": 0.23132421464001202, - "grad_norm": 1.8629515414157858, - "learning_rate": 3.58988722998102e-06, - "loss": 0.9613, - "step": 3078 - }, - { - "epoch": 0.231399368705847, - "grad_norm": 1.8697182331877957, - "learning_rate": 3.58959182384491e-06, - "loss": 0.9754, - "step": 3079 - }, - { - "epoch": 0.23147452277168196, - "grad_norm": 1.4820439556825962, - "learning_rate": 3.5892963235193968e-06, - "loss": 1.007, - "step": 3080 - }, - { - "epoch": 0.2315496768375169, - "grad_norm": 2.8670880458951795, - "learning_rate": 3.589000729021991e-06, - "loss": 0.8725, - "step": 3081 - }, - { - "epoch": 0.23162483090335187, - "grad_norm": 1.6883383893498705, - "learning_rate": 3.5887050403702073e-06, - "loss": 0.9828, - "step": 3082 - }, - { - "epoch": 0.23169998496918684, - "grad_norm": 1.9214963462411694, - "learning_rate": 3.588409257581567e-06, - "loss": 0.9375, - "step": 3083 - }, - { - "epoch": 0.23177513903502178, - "grad_norm": 1.9622533874093673, - "learning_rate": 3.5881133806735956e-06, - "loss": 1.0766, - "step": 3084 - }, - { - "epoch": 0.23185029310085675, - "grad_norm": 1.4617736382019075, - "learning_rate": 3.587817409663824e-06, - "loss": 1.0272, - "step": 3085 - }, - { - "epoch": 0.23192544716669172, - "grad_norm": 0.6291773194133311, - "learning_rate": 3.5875213445697917e-06, - "loss": 0.8415, - "step": 3086 - }, - { - "epoch": 0.2320006012325267, - "grad_norm": 2.4818969459687352, - "learning_rate": 3.587225185409041e-06, - "loss": 0.9947, - "step": 3087 - }, - { - "epoch": 0.23207575529836164, - "grad_norm": 1.829138645950998, - "learning_rate": 3.5869289321991195e-06, - "loss": 0.9709, - "step": 3088 - }, - { - "epoch": 0.2321509093641966, - "grad_norm": 1.7116415587047509, - "learning_rate": 3.586632584957582e-06, - "loss": 0.9564, - "step": 3089 - }, - { - "epoch": 0.23222606343003158, - "grad_norm": 1.4524511600234336, - "learning_rate": 3.5863361437019885e-06, - "loss": 1.0468, - "step": 3090 - }, - { - "epoch": 0.23230121749586652, - "grad_norm": 1.3349834850447035, - "learning_rate": 3.5860396084499043e-06, - "loss": 1.0603, - "step": 3091 - }, - { - "epoch": 0.2323763715617015, - "grad_norm": 1.7861862049496524, - "learning_rate": 3.5857429792188996e-06, - "loss": 1.1211, - "step": 3092 - }, - { - "epoch": 0.23245152562753646, - "grad_norm": 0.7668726350705866, - "learning_rate": 3.585446256026551e-06, - "loss": 0.8289, - "step": 3093 - }, - { - "epoch": 0.2325266796933714, - "grad_norm": 1.6069310714808525, - "learning_rate": 3.5851494388904406e-06, - "loss": 0.8865, - "step": 3094 - }, - { - "epoch": 0.23260183375920637, - "grad_norm": 1.4962091227136671, - "learning_rate": 3.5848525278281564e-06, - "loss": 0.9598, - "step": 3095 - }, - { - "epoch": 0.23267698782504134, - "grad_norm": 2.107726859013528, - "learning_rate": 3.5845555228572907e-06, - "loss": 0.8926, - "step": 3096 - }, - { - "epoch": 0.23275214189087629, - "grad_norm": 1.7566193324642114, - "learning_rate": 3.5842584239954426e-06, - "loss": 1.0281, - "step": 3097 - }, - { - "epoch": 0.23282729595671126, - "grad_norm": 1.7586770877216071, - "learning_rate": 3.5839612312602166e-06, - "loss": 0.9663, - "step": 3098 - }, - { - "epoch": 0.23290245002254623, - "grad_norm": 2.1065111852965073, - "learning_rate": 3.5836639446692223e-06, - "loss": 1.0323, - "step": 3099 - }, - { - "epoch": 0.23297760408838117, - "grad_norm": 0.8657478548310306, - "learning_rate": 3.5833665642400747e-06, - "loss": 0.849, - "step": 3100 - }, - { - "epoch": 0.23305275815421614, - "grad_norm": 1.6002929866281197, - "learning_rate": 3.5830690899903954e-06, - "loss": 0.9325, - "step": 3101 - }, - { - "epoch": 0.2331279122200511, - "grad_norm": 1.8078301879756042, - "learning_rate": 3.582771521937811e-06, - "loss": 1.0037, - "step": 3102 - }, - { - "epoch": 0.23320306628588608, - "grad_norm": 1.5906913428720117, - "learning_rate": 3.582473860099952e-06, - "loss": 0.9924, - "step": 3103 - }, - { - "epoch": 0.23327822035172102, - "grad_norm": 1.8888940374765475, - "learning_rate": 3.582176104494458e-06, - "loss": 0.9945, - "step": 3104 - }, - { - "epoch": 0.233353374417556, - "grad_norm": 4.167175846777271, - "learning_rate": 3.581878255138971e-06, - "loss": 0.9956, - "step": 3105 - }, - { - "epoch": 0.23342852848339096, - "grad_norm": 2.0349000267947, - "learning_rate": 3.5815803120511395e-06, - "loss": 0.9703, - "step": 3106 - }, - { - "epoch": 0.2335036825492259, - "grad_norm": 2.0897243002150843, - "learning_rate": 3.5812822752486187e-06, - "loss": 1.0305, - "step": 3107 - }, - { - "epoch": 0.23357883661506088, - "grad_norm": 1.6572098739583996, - "learning_rate": 3.5809841447490674e-06, - "loss": 1.0501, - "step": 3108 - }, - { - "epoch": 0.23365399068089585, - "grad_norm": 1.6620073836308333, - "learning_rate": 3.5806859205701523e-06, - "loss": 1.0759, - "step": 3109 - }, - { - "epoch": 0.2337291447467308, - "grad_norm": 1.8191986605524384, - "learning_rate": 3.5803876027295433e-06, - "loss": 1.022, - "step": 3110 - }, - { - "epoch": 0.23380429881256576, - "grad_norm": 0.7301091823840092, - "learning_rate": 3.580089191244917e-06, - "loss": 0.865, - "step": 3111 - }, - { - "epoch": 0.23387945287840073, - "grad_norm": 4.487592121336937, - "learning_rate": 3.5797906861339556e-06, - "loss": 1.0938, - "step": 3112 - }, - { - "epoch": 0.23395460694423567, - "grad_norm": 1.7340255935214528, - "learning_rate": 3.579492087414347e-06, - "loss": 1.0716, - "step": 3113 - }, - { - "epoch": 0.23402976101007064, - "grad_norm": 2.8249171489618123, - "learning_rate": 3.5791933951037834e-06, - "loss": 0.9836, - "step": 3114 - }, - { - "epoch": 0.2341049150759056, - "grad_norm": 2.116474327099317, - "learning_rate": 3.5788946092199643e-06, - "loss": 0.9961, - "step": 3115 - }, - { - "epoch": 0.23418006914174055, - "grad_norm": 1.9234913177770256, - "learning_rate": 3.578595729780593e-06, - "loss": 0.9462, - "step": 3116 - }, - { - "epoch": 0.23425522320757552, - "grad_norm": 2.086257167138736, - "learning_rate": 3.5782967568033805e-06, - "loss": 0.9961, - "step": 3117 - }, - { - "epoch": 0.2343303772734105, - "grad_norm": 1.4427336909777477, - "learning_rate": 3.5779976903060412e-06, - "loss": 0.9734, - "step": 3118 - }, - { - "epoch": 0.23440553133924547, - "grad_norm": 1.8779465728651303, - "learning_rate": 3.5776985303062965e-06, - "loss": 1.0257, - "step": 3119 - }, - { - "epoch": 0.2344806854050804, - "grad_norm": 2.123891204576391, - "learning_rate": 3.5773992768218724e-06, - "loss": 1.0747, - "step": 3120 - }, - { - "epoch": 0.23455583947091538, - "grad_norm": 2.921672467234878, - "learning_rate": 3.577099929870501e-06, - "loss": 1.0207, - "step": 3121 - }, - { - "epoch": 0.23463099353675035, - "grad_norm": 1.3806624427232241, - "learning_rate": 3.5768004894699192e-06, - "loss": 0.8652, - "step": 3122 - }, - { - "epoch": 0.2347061476025853, - "grad_norm": 1.8321097871583838, - "learning_rate": 3.57650095563787e-06, - "loss": 0.9796, - "step": 3123 - }, - { - "epoch": 0.23478130166842026, - "grad_norm": 0.714048766957785, - "learning_rate": 3.5762013283921033e-06, - "loss": 0.8218, - "step": 3124 - }, - { - "epoch": 0.23485645573425523, - "grad_norm": 2.3743138576988594, - "learning_rate": 3.5759016077503716e-06, - "loss": 1.0565, - "step": 3125 - }, - { - "epoch": 0.23493160980009017, - "grad_norm": 2.224418491282414, - "learning_rate": 3.5756017937304356e-06, - "loss": 0.97, - "step": 3126 - }, - { - "epoch": 0.23500676386592514, - "grad_norm": 2.275979210689912, - "learning_rate": 3.57530188635006e-06, - "loss": 1.0518, - "step": 3127 - }, - { - "epoch": 0.23508191793176011, - "grad_norm": 1.4935244816962818, - "learning_rate": 3.5750018856270153e-06, - "loss": 1.0388, - "step": 3128 - }, - { - "epoch": 0.23515707199759506, - "grad_norm": 2.0856943738886464, - "learning_rate": 3.5747017915790776e-06, - "loss": 1.0783, - "step": 3129 - }, - { - "epoch": 0.23523222606343003, - "grad_norm": 1.6980447155307108, - "learning_rate": 3.5744016042240287e-06, - "loss": 0.9992, - "step": 3130 - }, - { - "epoch": 0.235307380129265, - "grad_norm": 1.7690633193005563, - "learning_rate": 3.574101323579656e-06, - "loss": 1.0435, - "step": 3131 - }, - { - "epoch": 0.23538253419509997, - "grad_norm": 2.21522424514276, - "learning_rate": 3.5738009496637523e-06, - "loss": 1.0282, - "step": 3132 - }, - { - "epoch": 0.2354576882609349, - "grad_norm": 5.0286376995203295, - "learning_rate": 3.573500482494116e-06, - "loss": 1.0291, - "step": 3133 - }, - { - "epoch": 0.23553284232676988, - "grad_norm": 2.3538807945761535, - "learning_rate": 3.573199922088551e-06, - "loss": 0.999, - "step": 3134 - }, - { - "epoch": 0.23560799639260485, - "grad_norm": 1.5857578420118872, - "learning_rate": 3.5728992684648657e-06, - "loss": 0.938, - "step": 3135 - }, - { - "epoch": 0.2356831504584398, - "grad_norm": 1.8892313336697946, - "learning_rate": 3.572598521640876e-06, - "loss": 0.9941, - "step": 3136 - }, - { - "epoch": 0.23575830452427476, - "grad_norm": 2.005909058309354, - "learning_rate": 3.572297681634402e-06, - "loss": 1.0236, - "step": 3137 - }, - { - "epoch": 0.23583345859010973, - "grad_norm": 1.5511416537727847, - "learning_rate": 3.57199674846327e-06, - "loss": 1.0301, - "step": 3138 - }, - { - "epoch": 0.23590861265594468, - "grad_norm": 2.5772520494755415, - "learning_rate": 3.5716957221453106e-06, - "loss": 0.9714, - "step": 3139 - }, - { - "epoch": 0.23598376672177965, - "grad_norm": 2.7477289705954644, - "learning_rate": 3.571394602698362e-06, - "loss": 1.0134, - "step": 3140 - }, - { - "epoch": 0.23605892078761462, - "grad_norm": 1.5617606893521376, - "learning_rate": 3.5710933901402652e-06, - "loss": 1.0357, - "step": 3141 - }, - { - "epoch": 0.23613407485344956, - "grad_norm": 2.3292402630019313, - "learning_rate": 3.570792084488869e-06, - "loss": 1.0032, - "step": 3142 - }, - { - "epoch": 0.23620922891928453, - "grad_norm": 2.7437248593788315, - "learning_rate": 3.5704906857620268e-06, - "loss": 1.0207, - "step": 3143 - }, - { - "epoch": 0.2362843829851195, - "grad_norm": 1.384373704896756, - "learning_rate": 3.5701891939775974e-06, - "loss": 1.0225, - "step": 3144 - }, - { - "epoch": 0.23635953705095444, - "grad_norm": 1.3901409635489064, - "learning_rate": 3.5698876091534465e-06, - "loss": 0.9742, - "step": 3145 - }, - { - "epoch": 0.2364346911167894, - "grad_norm": 1.6854709152840077, - "learning_rate": 3.5695859313074425e-06, - "loss": 1.0245, - "step": 3146 - }, - { - "epoch": 0.23650984518262438, - "grad_norm": 1.861513270709666, - "learning_rate": 3.5692841604574617e-06, - "loss": 0.9759, - "step": 3147 - }, - { - "epoch": 0.23658499924845935, - "grad_norm": 1.9712036827901398, - "learning_rate": 3.568982296621386e-06, - "loss": 0.9978, - "step": 3148 - }, - { - "epoch": 0.2366601533142943, - "grad_norm": 2.1557576819929647, - "learning_rate": 3.5686803398171007e-06, - "loss": 1.0149, - "step": 3149 - }, - { - "epoch": 0.23673530738012927, - "grad_norm": 1.7269473790433632, - "learning_rate": 3.5683782900624986e-06, - "loss": 1.0556, - "step": 3150 - }, - { - "epoch": 0.23681046144596424, - "grad_norm": 1.6024665327936665, - "learning_rate": 3.5680761473754767e-06, - "loss": 0.9712, - "step": 3151 - }, - { - "epoch": 0.23688561551179918, - "grad_norm": 1.5596467478921145, - "learning_rate": 3.5677739117739385e-06, - "loss": 1.0125, - "step": 3152 - }, - { - "epoch": 0.23696076957763415, - "grad_norm": 2.077760363260458, - "learning_rate": 3.5674715832757927e-06, - "loss": 1.0294, - "step": 3153 - }, - { - "epoch": 0.23703592364346912, - "grad_norm": 2.4125434515800372, - "learning_rate": 3.5671691618989533e-06, - "loss": 1.0409, - "step": 3154 - }, - { - "epoch": 0.23711107770930406, - "grad_norm": 1.8870554874269527, - "learning_rate": 3.56686664766134e-06, - "loss": 1.055, - "step": 3155 - }, - { - "epoch": 0.23718623177513903, - "grad_norm": 2.1967744676749623, - "learning_rate": 3.5665640405808785e-06, - "loss": 0.9821, - "step": 3156 - }, - { - "epoch": 0.237261385840974, - "grad_norm": 1.585569968646298, - "learning_rate": 3.566261340675498e-06, - "loss": 1.0366, - "step": 3157 - }, - { - "epoch": 0.23733653990680895, - "grad_norm": 1.622681136989618, - "learning_rate": 3.5659585479631357e-06, - "loss": 1.0006, - "step": 3158 - }, - { - "epoch": 0.23741169397264392, - "grad_norm": 1.6902375798010985, - "learning_rate": 3.565655662461733e-06, - "loss": 1.1107, - "step": 3159 - }, - { - "epoch": 0.2374868480384789, - "grad_norm": 1.8530209748531854, - "learning_rate": 3.565352684189237e-06, - "loss": 0.9773, - "step": 3160 - }, - { - "epoch": 0.23756200210431383, - "grad_norm": 2.0122681099115476, - "learning_rate": 3.5650496131636006e-06, - "loss": 1.0341, - "step": 3161 - }, - { - "epoch": 0.2376371561701488, - "grad_norm": 1.2199526013093822, - "learning_rate": 3.564746449402781e-06, - "loss": 0.9484, - "step": 3162 - }, - { - "epoch": 0.23771231023598377, - "grad_norm": 1.9434965919089853, - "learning_rate": 3.5644431929247432e-06, - "loss": 1.0403, - "step": 3163 - }, - { - "epoch": 0.23778746430181874, - "grad_norm": 1.5511430371192056, - "learning_rate": 3.5641398437474546e-06, - "loss": 1.0926, - "step": 3164 - }, - { - "epoch": 0.23786261836765368, - "grad_norm": 2.315544186725071, - "learning_rate": 3.563836401888892e-06, - "loss": 0.9013, - "step": 3165 - }, - { - "epoch": 0.23793777243348865, - "grad_norm": 1.8438621745495896, - "learning_rate": 3.5635328673670335e-06, - "loss": 1.0043, - "step": 3166 - }, - { - "epoch": 0.23801292649932362, - "grad_norm": 1.5922927177785422, - "learning_rate": 3.5632292401998657e-06, - "loss": 1.0148, - "step": 3167 - }, - { - "epoch": 0.23808808056515857, - "grad_norm": 0.6092235425871632, - "learning_rate": 3.562925520405379e-06, - "loss": 0.8311, - "step": 3168 - }, - { - "epoch": 0.23816323463099354, - "grad_norm": 2.097883443054764, - "learning_rate": 3.562621708001571e-06, - "loss": 1.0985, - "step": 3169 - }, - { - "epoch": 0.2382383886968285, - "grad_norm": 1.5159303809765312, - "learning_rate": 3.5623178030064426e-06, - "loss": 0.9846, - "step": 3170 - }, - { - "epoch": 0.23831354276266345, - "grad_norm": 1.4942719445457684, - "learning_rate": 3.562013805438002e-06, - "loss": 1.0745, - "step": 3171 - }, - { - "epoch": 0.23838869682849842, - "grad_norm": 1.8091639367634569, - "learning_rate": 3.5617097153142623e-06, - "loss": 1.0613, - "step": 3172 - }, - { - "epoch": 0.2384638508943334, - "grad_norm": 1.6517994662857534, - "learning_rate": 3.5614055326532416e-06, - "loss": 0.9396, - "step": 3173 - }, - { - "epoch": 0.23853900496016833, - "grad_norm": 2.1071567881310695, - "learning_rate": 3.561101257472964e-06, - "loss": 0.9986, - "step": 3174 - }, - { - "epoch": 0.2386141590260033, - "grad_norm": 4.3865656974635705, - "learning_rate": 3.560796889791459e-06, - "loss": 1.0958, - "step": 3175 - }, - { - "epoch": 0.23868931309183827, - "grad_norm": 1.8283258311495336, - "learning_rate": 3.5604924296267616e-06, - "loss": 1.039, - "step": 3176 - }, - { - "epoch": 0.23876446715767324, - "grad_norm": 2.065376818546168, - "learning_rate": 3.5601878769969123e-06, - "loss": 1.101, - "step": 3177 - }, - { - "epoch": 0.2388396212235082, - "grad_norm": 2.0895448274826673, - "learning_rate": 3.559883231919957e-06, - "loss": 0.959, - "step": 3178 - }, - { - "epoch": 0.23891477528934316, - "grad_norm": 1.8036929606237346, - "learning_rate": 3.559578494413947e-06, - "loss": 1.0538, - "step": 3179 - }, - { - "epoch": 0.23898992935517813, - "grad_norm": 0.7986277956302401, - "learning_rate": 3.559273664496939e-06, - "loss": 0.8496, - "step": 3180 - }, - { - "epoch": 0.23906508342101307, - "grad_norm": 1.503454997742472, - "learning_rate": 3.5589687421869957e-06, - "loss": 0.8437, - "step": 3181 - }, - { - "epoch": 0.23914023748684804, - "grad_norm": 1.8045948880034959, - "learning_rate": 3.558663727502185e-06, - "loss": 0.9759, - "step": 3182 - }, - { - "epoch": 0.239215391552683, - "grad_norm": 1.6574922617275676, - "learning_rate": 3.5583586204605796e-06, - "loss": 1.0176, - "step": 3183 - }, - { - "epoch": 0.23929054561851795, - "grad_norm": 2.8491568673445005, - "learning_rate": 3.5580534210802587e-06, - "loss": 1.0073, - "step": 3184 - }, - { - "epoch": 0.23936569968435292, - "grad_norm": 1.7084866741800564, - "learning_rate": 3.5577481293793063e-06, - "loss": 1.0333, - "step": 3185 - }, - { - "epoch": 0.2394408537501879, - "grad_norm": 1.624744541922689, - "learning_rate": 3.5574427453758124e-06, - "loss": 0.9336, - "step": 3186 - }, - { - "epoch": 0.23951600781602284, - "grad_norm": 2.644342187521466, - "learning_rate": 3.557137269087872e-06, - "loss": 1.0293, - "step": 3187 - }, - { - "epoch": 0.2395911618818578, - "grad_norm": 1.5800898944053328, - "learning_rate": 3.5568317005335852e-06, - "loss": 1.0725, - "step": 3188 - }, - { - "epoch": 0.23966631594769278, - "grad_norm": 1.8100533249940178, - "learning_rate": 3.556526039731059e-06, - "loss": 0.9214, - "step": 3189 - }, - { - "epoch": 0.23974147001352772, - "grad_norm": 1.7619065144492114, - "learning_rate": 3.5562202866984045e-06, - "loss": 0.9692, - "step": 3190 - }, - { - "epoch": 0.2398166240793627, - "grad_norm": 1.6536920335778804, - "learning_rate": 3.555914441453739e-06, - "loss": 1.0195, - "step": 3191 - }, - { - "epoch": 0.23989177814519766, - "grad_norm": 2.543163942501533, - "learning_rate": 3.555608504015185e-06, - "loss": 0.9865, - "step": 3192 - }, - { - "epoch": 0.23996693221103263, - "grad_norm": 1.7575088577806977, - "learning_rate": 3.5553024744008697e-06, - "loss": 1.0006, - "step": 3193 - }, - { - "epoch": 0.24004208627686757, - "grad_norm": 2.0212603901045787, - "learning_rate": 3.5549963526289276e-06, - "loss": 0.9372, - "step": 3194 - }, - { - "epoch": 0.24011724034270254, - "grad_norm": 1.7924648399575738, - "learning_rate": 3.5546901387174975e-06, - "loss": 1.1093, - "step": 3195 - }, - { - "epoch": 0.2401923944085375, - "grad_norm": 1.9329056565479, - "learning_rate": 3.554383832684723e-06, - "loss": 1.0667, - "step": 3196 - }, - { - "epoch": 0.24026754847437246, - "grad_norm": 1.5619850072934898, - "learning_rate": 3.554077434548754e-06, - "loss": 0.9854, - "step": 3197 - }, - { - "epoch": 0.24034270254020743, - "grad_norm": 1.9759942244279294, - "learning_rate": 3.5537709443277465e-06, - "loss": 0.8965, - "step": 3198 - }, - { - "epoch": 0.2404178566060424, - "grad_norm": 1.5749896034018684, - "learning_rate": 3.55346436203986e-06, - "loss": 1.0724, - "step": 3199 - }, - { - "epoch": 0.24049301067187734, - "grad_norm": 1.8489563704645904, - "learning_rate": 3.5531576877032627e-06, - "loss": 1.057, - "step": 3200 - }, - { - "epoch": 0.2405681647377123, - "grad_norm": 2.786170977930414, - "learning_rate": 3.552850921336124e-06, - "loss": 0.9113, - "step": 3201 - }, - { - "epoch": 0.24064331880354728, - "grad_norm": 2.0900196607477195, - "learning_rate": 3.5525440629566223e-06, - "loss": 1.0069, - "step": 3202 - }, - { - "epoch": 0.24071847286938222, - "grad_norm": 8.157482770222956, - "learning_rate": 3.5522371125829395e-06, - "loss": 1.0979, - "step": 3203 - }, - { - "epoch": 0.2407936269352172, - "grad_norm": 0.8256323746056587, - "learning_rate": 3.551930070233264e-06, - "loss": 0.8994, - "step": 3204 - }, - { - "epoch": 0.24086878100105216, - "grad_norm": 1.6777784987876865, - "learning_rate": 3.551622935925789e-06, - "loss": 0.9675, - "step": 3205 - }, - { - "epoch": 0.2409439350668871, - "grad_norm": 4.5245290437378065, - "learning_rate": 3.5513157096787143e-06, - "loss": 0.9316, - "step": 3206 - }, - { - "epoch": 0.24101908913272208, - "grad_norm": 1.2789520031779145, - "learning_rate": 3.551008391510242e-06, - "loss": 0.9609, - "step": 3207 - }, - { - "epoch": 0.24109424319855705, - "grad_norm": 4.711288514985634, - "learning_rate": 3.5507009814385846e-06, - "loss": 1.0715, - "step": 3208 - }, - { - "epoch": 0.24116939726439202, - "grad_norm": 1.669473081368874, - "learning_rate": 3.550393479481955e-06, - "loss": 1.0437, - "step": 3209 - }, - { - "epoch": 0.24124455133022696, - "grad_norm": 1.6468663977266218, - "learning_rate": 3.550085885658576e-06, - "loss": 0.9822, - "step": 3210 - }, - { - "epoch": 0.24131970539606193, - "grad_norm": 1.791384992710432, - "learning_rate": 3.5497781999866715e-06, - "loss": 0.9971, - "step": 3211 - }, - { - "epoch": 0.2413948594618969, - "grad_norm": 1.4898173897486104, - "learning_rate": 3.5494704224844746e-06, - "loss": 0.8941, - "step": 3212 - }, - { - "epoch": 0.24147001352773184, - "grad_norm": 2.83091556013781, - "learning_rate": 3.549162553170222e-06, - "loss": 0.9272, - "step": 3213 - }, - { - "epoch": 0.2415451675935668, - "grad_norm": 1.6117608616366634, - "learning_rate": 3.548854592062156e-06, - "loss": 0.961, - "step": 3214 - }, - { - "epoch": 0.24162032165940178, - "grad_norm": 1.2831994090033714, - "learning_rate": 3.548546539178524e-06, - "loss": 1.0385, - "step": 3215 - }, - { - "epoch": 0.24169547572523672, - "grad_norm": 1.5597096514098545, - "learning_rate": 3.548238394537581e-06, - "loss": 1.0436, - "step": 3216 - }, - { - "epoch": 0.2417706297910717, - "grad_norm": 3.7722198859613143, - "learning_rate": 3.5479301581575827e-06, - "loss": 1.0254, - "step": 3217 - }, - { - "epoch": 0.24184578385690667, - "grad_norm": 1.6949933714160321, - "learning_rate": 3.547621830056796e-06, - "loss": 1.0774, - "step": 3218 - }, - { - "epoch": 0.2419209379227416, - "grad_norm": 1.4947535635138012, - "learning_rate": 3.5473134102534895e-06, - "loss": 0.9937, - "step": 3219 - }, - { - "epoch": 0.24199609198857658, - "grad_norm": 2.1287226886531667, - "learning_rate": 3.5470048987659387e-06, - "loss": 0.9537, - "step": 3220 - }, - { - "epoch": 0.24207124605441155, - "grad_norm": 1.636574240338832, - "learning_rate": 3.5466962956124235e-06, - "loss": 0.977, - "step": 3221 - }, - { - "epoch": 0.24214640012024652, - "grad_norm": 2.3874602469285353, - "learning_rate": 3.54638760081123e-06, - "loss": 1.0095, - "step": 3222 - }, - { - "epoch": 0.24222155418608146, - "grad_norm": 1.7168288157402742, - "learning_rate": 3.5460788143806505e-06, - "loss": 1.0429, - "step": 3223 - }, - { - "epoch": 0.24229670825191643, - "grad_norm": 2.7009730069316844, - "learning_rate": 3.54576993633898e-06, - "loss": 1.0182, - "step": 3224 - }, - { - "epoch": 0.2423718623177514, - "grad_norm": 1.583412979866649, - "learning_rate": 3.545460966704522e-06, - "loss": 0.9983, - "step": 3225 - }, - { - "epoch": 0.24244701638358634, - "grad_norm": 1.5300458727922215, - "learning_rate": 3.5451519054955836e-06, - "loss": 1.006, - "step": 3226 - }, - { - "epoch": 0.24252217044942131, - "grad_norm": 2.055091615105826, - "learning_rate": 3.544842752730478e-06, - "loss": 1.0591, - "step": 3227 - }, - { - "epoch": 0.24259732451525629, - "grad_norm": 1.5563535724782842, - "learning_rate": 3.5445335084275235e-06, - "loss": 0.8987, - "step": 3228 - }, - { - "epoch": 0.24267247858109123, - "grad_norm": 1.8497992380855615, - "learning_rate": 3.5442241726050444e-06, - "loss": 0.9926, - "step": 3229 - }, - { - "epoch": 0.2427476326469262, - "grad_norm": 2.0047842262582174, - "learning_rate": 3.5439147452813696e-06, - "loss": 1.011, - "step": 3230 - }, - { - "epoch": 0.24282278671276117, - "grad_norm": 1.7754787847035094, - "learning_rate": 3.5436052264748348e-06, - "loss": 0.9171, - "step": 3231 - }, - { - "epoch": 0.2428979407785961, - "grad_norm": 1.9744162988645337, - "learning_rate": 3.543295616203779e-06, - "loss": 1.0464, - "step": 3232 - }, - { - "epoch": 0.24297309484443108, - "grad_norm": 1.5650374598788281, - "learning_rate": 3.5429859144865486e-06, - "loss": 0.994, - "step": 3233 - }, - { - "epoch": 0.24304824891026605, - "grad_norm": 3.0894966557477392, - "learning_rate": 3.542676121341494e-06, - "loss": 0.8425, - "step": 3234 - }, - { - "epoch": 0.243123402976101, - "grad_norm": 1.5189430647847262, - "learning_rate": 3.5423662367869716e-06, - "loss": 0.95, - "step": 3235 - }, - { - "epoch": 0.24319855704193596, - "grad_norm": 2.4113483664786064, - "learning_rate": 3.542056260841344e-06, - "loss": 0.9319, - "step": 3236 - }, - { - "epoch": 0.24327371110777093, - "grad_norm": 1.6044874324716516, - "learning_rate": 3.5417461935229777e-06, - "loss": 0.945, - "step": 3237 - }, - { - "epoch": 0.2433488651736059, - "grad_norm": 1.4267792853502654, - "learning_rate": 3.5414360348502463e-06, - "loss": 0.8534, - "step": 3238 - }, - { - "epoch": 0.24342401923944085, - "grad_norm": 1.7291999798365496, - "learning_rate": 3.5411257848415266e-06, - "loss": 0.9513, - "step": 3239 - }, - { - "epoch": 0.24349917330527582, - "grad_norm": 1.748211423492204, - "learning_rate": 3.5408154435152034e-06, - "loss": 1.0189, - "step": 3240 - }, - { - "epoch": 0.2435743273711108, - "grad_norm": 2.180288252540535, - "learning_rate": 3.5405050108896645e-06, - "loss": 0.8636, - "step": 3241 - }, - { - "epoch": 0.24364948143694573, - "grad_norm": 2.2323278906288153, - "learning_rate": 3.5401944869833046e-06, - "loss": 0.9479, - "step": 3242 - }, - { - "epoch": 0.2437246355027807, - "grad_norm": 1.840795898275601, - "learning_rate": 3.539883871814524e-06, - "loss": 1.0537, - "step": 3243 - }, - { - "epoch": 0.24379978956861567, - "grad_norm": 2.319057240656883, - "learning_rate": 3.5395731654017277e-06, - "loss": 1.0098, - "step": 3244 - }, - { - "epoch": 0.2438749436344506, - "grad_norm": 16.20827067672792, - "learning_rate": 3.539262367763325e-06, - "loss": 0.9554, - "step": 3245 - }, - { - "epoch": 0.24395009770028558, - "grad_norm": 1.3350830915216811, - "learning_rate": 3.5389514789177334e-06, - "loss": 0.9713, - "step": 3246 - }, - { - "epoch": 0.24402525176612055, - "grad_norm": 1.7064720652071266, - "learning_rate": 3.5386404988833732e-06, - "loss": 1.0318, - "step": 3247 - }, - { - "epoch": 0.2441004058319555, - "grad_norm": 12.823114529448436, - "learning_rate": 3.538329427678672e-06, - "loss": 0.9483, - "step": 3248 - }, - { - "epoch": 0.24417555989779047, - "grad_norm": 0.8947590034097963, - "learning_rate": 3.5380182653220613e-06, - "loss": 0.8584, - "step": 3249 - }, - { - "epoch": 0.24425071396362544, - "grad_norm": 1.7899150566106738, - "learning_rate": 3.5377070118319788e-06, - "loss": 0.9824, - "step": 3250 - }, - { - "epoch": 0.24432586802946038, - "grad_norm": 2.481296671469271, - "learning_rate": 3.5373956672268683e-06, - "loss": 1.0206, - "step": 3251 - }, - { - "epoch": 0.24440102209529535, - "grad_norm": 1.7946597832570887, - "learning_rate": 3.5370842315251766e-06, - "loss": 1.0569, - "step": 3252 - }, - { - "epoch": 0.24447617616113032, - "grad_norm": 2.151948037438395, - "learning_rate": 3.5367727047453583e-06, - "loss": 1.0704, - "step": 3253 - }, - { - "epoch": 0.2445513302269653, - "grad_norm": 1.847508624189635, - "learning_rate": 3.536461086905873e-06, - "loss": 1.1121, - "step": 3254 - }, - { - "epoch": 0.24462648429280023, - "grad_norm": 2.1612249286669027, - "learning_rate": 3.536149378025185e-06, - "loss": 0.9801, - "step": 3255 - }, - { - "epoch": 0.2447016383586352, - "grad_norm": 0.7370939446205338, - "learning_rate": 3.5358375781217634e-06, - "loss": 0.8739, - "step": 3256 - }, - { - "epoch": 0.24477679242447017, - "grad_norm": 1.4424266149937521, - "learning_rate": 3.5355256872140846e-06, - "loss": 0.9848, - "step": 3257 - }, - { - "epoch": 0.24485194649030512, - "grad_norm": 4.214187528560981, - "learning_rate": 3.535213705320629e-06, - "loss": 0.8999, - "step": 3258 - }, - { - "epoch": 0.2449271005561401, - "grad_norm": 1.6750759249887537, - "learning_rate": 3.534901632459882e-06, - "loss": 0.949, - "step": 3259 - }, - { - "epoch": 0.24500225462197506, - "grad_norm": 3.1344163551530886, - "learning_rate": 3.5345894686503366e-06, - "loss": 0.9221, - "step": 3260 - }, - { - "epoch": 0.24507740868781, - "grad_norm": 1.7787250223771838, - "learning_rate": 3.5342772139104884e-06, - "loss": 1.0481, - "step": 3261 - }, - { - "epoch": 0.24515256275364497, - "grad_norm": 1.5983496469348661, - "learning_rate": 3.5339648682588397e-06, - "loss": 0.913, - "step": 3262 - }, - { - "epoch": 0.24522771681947994, - "grad_norm": 2.284400541107963, - "learning_rate": 3.5336524317138993e-06, - "loss": 0.9636, - "step": 3263 - }, - { - "epoch": 0.24530287088531488, - "grad_norm": 1.9626297658793543, - "learning_rate": 3.5333399042941797e-06, - "loss": 1.0004, - "step": 3264 - }, - { - "epoch": 0.24537802495114985, - "grad_norm": 1.6786698918320446, - "learning_rate": 3.5330272860181985e-06, - "loss": 0.9645, - "step": 3265 - }, - { - "epoch": 0.24545317901698482, - "grad_norm": 1.4146707817692519, - "learning_rate": 3.532714576904481e-06, - "loss": 0.9861, - "step": 3266 - }, - { - "epoch": 0.2455283330828198, - "grad_norm": 0.7309701596025867, - "learning_rate": 3.5324017769715548e-06, - "loss": 0.8925, - "step": 3267 - }, - { - "epoch": 0.24560348714865474, - "grad_norm": 1.690285890877209, - "learning_rate": 3.532088886237956e-06, - "loss": 0.8783, - "step": 3268 - }, - { - "epoch": 0.2456786412144897, - "grad_norm": 2.2311788253758738, - "learning_rate": 3.5317759047222235e-06, - "loss": 0.9977, - "step": 3269 - }, - { - "epoch": 0.24575379528032468, - "grad_norm": 1.4239395579412069, - "learning_rate": 3.531462832442903e-06, - "loss": 1.0345, - "step": 3270 - }, - { - "epoch": 0.24582894934615962, - "grad_norm": 3.246212145761285, - "learning_rate": 3.531149669418546e-06, - "loss": 0.9661, - "step": 3271 - }, - { - "epoch": 0.2459041034119946, - "grad_norm": 2.295184804585663, - "learning_rate": 3.530836415667708e-06, - "loss": 0.8819, - "step": 3272 - }, - { - "epoch": 0.24597925747782956, - "grad_norm": 2.5224678373223965, - "learning_rate": 3.53052307120895e-06, - "loss": 1.0947, - "step": 3273 - }, - { - "epoch": 0.2460544115436645, - "grad_norm": 2.166763107892198, - "learning_rate": 3.5302096360608385e-06, - "loss": 1.0083, - "step": 3274 - }, - { - "epoch": 0.24612956560949947, - "grad_norm": 2.2263647158933884, - "learning_rate": 3.5298961102419477e-06, - "loss": 0.9401, - "step": 3275 - }, - { - "epoch": 0.24620471967533444, - "grad_norm": 1.5308672368187701, - "learning_rate": 3.5295824937708537e-06, - "loss": 1.0649, - "step": 3276 - }, - { - "epoch": 0.2462798737411694, - "grad_norm": 1.9756439849124765, - "learning_rate": 3.5292687866661396e-06, - "loss": 1.1082, - "step": 3277 - }, - { - "epoch": 0.24635502780700436, - "grad_norm": 4.681669551042262, - "learning_rate": 3.528954988946394e-06, - "loss": 0.9151, - "step": 3278 - }, - { - "epoch": 0.24643018187283933, - "grad_norm": 1.6681472003898754, - "learning_rate": 3.5286411006302107e-06, - "loss": 1.0005, - "step": 3279 - }, - { - "epoch": 0.24650533593867427, - "grad_norm": 1.6176617060727467, - "learning_rate": 3.528327121736188e-06, - "loss": 0.969, - "step": 3280 - }, - { - "epoch": 0.24658049000450924, - "grad_norm": 1.6167503171430577, - "learning_rate": 3.5280130522829317e-06, - "loss": 0.9153, - "step": 3281 - }, - { - "epoch": 0.2466556440703442, - "grad_norm": 1.8016405020880382, - "learning_rate": 3.5276988922890503e-06, - "loss": 0.9584, - "step": 3282 - }, - { - "epoch": 0.24673079813617918, - "grad_norm": 2.3419916296271017, - "learning_rate": 3.52738464177316e-06, - "loss": 1.0355, - "step": 3283 - }, - { - "epoch": 0.24680595220201412, - "grad_norm": 1.7894226111587765, - "learning_rate": 3.527070300753881e-06, - "loss": 1.0385, - "step": 3284 - }, - { - "epoch": 0.2468811062678491, - "grad_norm": 1.8213631540727049, - "learning_rate": 3.526755869249839e-06, - "loss": 0.9622, - "step": 3285 - }, - { - "epoch": 0.24695626033368406, - "grad_norm": 1.803238191803677, - "learning_rate": 3.5264413472796653e-06, - "loss": 0.9087, - "step": 3286 - }, - { - "epoch": 0.247031414399519, - "grad_norm": 13.542714114638185, - "learning_rate": 3.5261267348619964e-06, - "loss": 1.0311, - "step": 3287 - }, - { - "epoch": 0.24710656846535398, - "grad_norm": 2.7650733871792292, - "learning_rate": 3.5258120320154755e-06, - "loss": 0.9719, - "step": 3288 - }, - { - "epoch": 0.24718172253118895, - "grad_norm": 2.09932183714249, - "learning_rate": 3.5254972387587483e-06, - "loss": 0.9144, - "step": 3289 - }, - { - "epoch": 0.2472568765970239, - "grad_norm": 4.3522328488804485, - "learning_rate": 3.525182355110468e-06, - "loss": 0.9271, - "step": 3290 - }, - { - "epoch": 0.24733203066285886, - "grad_norm": 2.7558322961938146, - "learning_rate": 3.524867381089293e-06, - "loss": 0.9623, - "step": 3291 - }, - { - "epoch": 0.24740718472869383, - "grad_norm": 1.7572874514249013, - "learning_rate": 3.524552316713887e-06, - "loss": 1.0007, - "step": 3292 - }, - { - "epoch": 0.24748233879452877, - "grad_norm": 1.5980236125438068, - "learning_rate": 3.5242371620029176e-06, - "loss": 1.0016, - "step": 3293 - }, - { - "epoch": 0.24755749286036374, - "grad_norm": 2.0516657783677847, - "learning_rate": 3.5239219169750604e-06, - "loss": 0.9927, - "step": 3294 - }, - { - "epoch": 0.2476326469261987, - "grad_norm": 0.6650494814153224, - "learning_rate": 3.5236065816489938e-06, - "loss": 0.7684, - "step": 3295 - }, - { - "epoch": 0.24770780099203366, - "grad_norm": 0.8081506445666594, - "learning_rate": 3.5232911560434023e-06, - "loss": 0.8313, - "step": 3296 - }, - { - "epoch": 0.24778295505786863, - "grad_norm": 2.285939139563076, - "learning_rate": 3.5229756401769775e-06, - "loss": 1.0674, - "step": 3297 - }, - { - "epoch": 0.2478581091237036, - "grad_norm": 1.942693088120547, - "learning_rate": 3.522660034068414e-06, - "loss": 0.9318, - "step": 3298 - }, - { - "epoch": 0.24793326318953857, - "grad_norm": 1.742567422949827, - "learning_rate": 3.5223443377364133e-06, - "loss": 0.9855, - "step": 3299 - }, - { - "epoch": 0.2480084172553735, - "grad_norm": 2.607688770135982, - "learning_rate": 3.5220285511996802e-06, - "loss": 0.9534, - "step": 3300 - }, - { - "epoch": 0.24808357132120848, - "grad_norm": 2.253159953149154, - "learning_rate": 3.521712674476928e-06, - "loss": 1.0245, - "step": 3301 - }, - { - "epoch": 0.24815872538704345, - "grad_norm": 0.7392499915733505, - "learning_rate": 3.521396707586872e-06, - "loss": 0.8003, - "step": 3302 - }, - { - "epoch": 0.2482338794528784, - "grad_norm": 0.7940868894276089, - "learning_rate": 3.521080650548236e-06, - "loss": 0.8273, - "step": 3303 - }, - { - "epoch": 0.24830903351871336, - "grad_norm": 1.5800871783964985, - "learning_rate": 3.5207645033797464e-06, - "loss": 1.0158, - "step": 3304 - }, - { - "epoch": 0.24838418758454833, - "grad_norm": 1.5868707175570769, - "learning_rate": 3.5204482661001373e-06, - "loss": 0.9527, - "step": 3305 - }, - { - "epoch": 0.24845934165038328, - "grad_norm": 3.454417267387737, - "learning_rate": 3.5201319387281455e-06, - "loss": 0.9585, - "step": 3306 - }, - { - "epoch": 0.24853449571621825, - "grad_norm": 1.5057138652237356, - "learning_rate": 3.519815521282515e-06, - "loss": 1.0415, - "step": 3307 - }, - { - "epoch": 0.24860964978205322, - "grad_norm": 1.5138437874907023, - "learning_rate": 3.519499013781996e-06, - "loss": 1.0478, - "step": 3308 - }, - { - "epoch": 0.24868480384788816, - "grad_norm": 1.5381386399700916, - "learning_rate": 3.5191824162453417e-06, - "loss": 1.0589, - "step": 3309 - }, - { - "epoch": 0.24875995791372313, - "grad_norm": 4.1785757136992485, - "learning_rate": 3.5188657286913115e-06, - "loss": 1.0103, - "step": 3310 - }, - { - "epoch": 0.2488351119795581, - "grad_norm": 3.1930695288471633, - "learning_rate": 3.5185489511386712e-06, - "loss": 1.0064, - "step": 3311 - }, - { - "epoch": 0.24891026604539307, - "grad_norm": 6.184980708025093, - "learning_rate": 3.5182320836061906e-06, - "loss": 1.0032, - "step": 3312 - }, - { - "epoch": 0.248985420111228, - "grad_norm": 1.6779814816840966, - "learning_rate": 3.517915126112645e-06, - "loss": 0.9408, - "step": 3313 - }, - { - "epoch": 0.24906057417706298, - "grad_norm": 2.1488580465530616, - "learning_rate": 3.517598078676816e-06, - "loss": 0.9568, - "step": 3314 - }, - { - "epoch": 0.24913572824289795, - "grad_norm": 1.810866441887757, - "learning_rate": 3.517280941317489e-06, - "loss": 1.0195, - "step": 3315 - }, - { - "epoch": 0.2492108823087329, - "grad_norm": 3.753354289017262, - "learning_rate": 3.5169637140534573e-06, - "loss": 1.0706, - "step": 3316 - }, - { - "epoch": 0.24928603637456787, - "grad_norm": 1.8284129380613976, - "learning_rate": 3.5166463969035157e-06, - "loss": 0.8607, - "step": 3317 - }, - { - "epoch": 0.24936119044040284, - "grad_norm": 1.717998270888785, - "learning_rate": 3.5163289898864675e-06, - "loss": 0.9025, - "step": 3318 - }, - { - "epoch": 0.24943634450623778, - "grad_norm": 4.561756439236216, - "learning_rate": 3.5160114930211203e-06, - "loss": 0.8111, - "step": 3319 - }, - { - "epoch": 0.24951149857207275, - "grad_norm": 1.4605625099718527, - "learning_rate": 3.5156939063262875e-06, - "loss": 0.954, - "step": 3320 - }, - { - "epoch": 0.24958665263790772, - "grad_norm": 1.742525281791135, - "learning_rate": 3.515376229820787e-06, - "loss": 0.981, - "step": 3321 - }, - { - "epoch": 0.24966180670374266, - "grad_norm": 5.048711956170785, - "learning_rate": 3.5150584635234416e-06, - "loss": 0.9398, - "step": 3322 - }, - { - "epoch": 0.24973696076957763, - "grad_norm": 1.9632439290638601, - "learning_rate": 3.5147406074530805e-06, - "loss": 0.9662, - "step": 3323 - }, - { - "epoch": 0.2498121148354126, - "grad_norm": 0.703638842698805, - "learning_rate": 3.5144226616285384e-06, - "loss": 0.8764, - "step": 3324 - }, - { - "epoch": 0.24988726890124754, - "grad_norm": 2.571550992670387, - "learning_rate": 3.5141046260686537e-06, - "loss": 0.984, - "step": 3325 - }, - { - "epoch": 0.24996242296708251, - "grad_norm": 3.9207207550415615, - "learning_rate": 3.5137865007922726e-06, - "loss": 1.0179, - "step": 3326 - }, - { - "epoch": 0.25003757703291746, - "grad_norm": 1.4242301132744881, - "learning_rate": 3.5134682858182448e-06, - "loss": 0.9866, - "step": 3327 - }, - { - "epoch": 0.2501127310987524, - "grad_norm": 0.7489864733153196, - "learning_rate": 3.5131499811654253e-06, - "loss": 0.8493, - "step": 3328 - }, - { - "epoch": 0.2501878851645874, - "grad_norm": 1.50554110358615, - "learning_rate": 3.5128315868526755e-06, - "loss": 0.94, - "step": 3329 - }, - { - "epoch": 0.25026303923042237, - "grad_norm": 1.5545674953439481, - "learning_rate": 3.512513102898861e-06, - "loss": 0.9503, - "step": 3330 - }, - { - "epoch": 0.25033819329625734, - "grad_norm": 3.28538311006047, - "learning_rate": 3.512194529322853e-06, - "loss": 0.9147, - "step": 3331 - }, - { - "epoch": 0.2504133473620923, - "grad_norm": 1.9698011226282515, - "learning_rate": 3.511875866143529e-06, - "loss": 1.0106, - "step": 3332 - }, - { - "epoch": 0.2504885014279272, - "grad_norm": 1.2188483467524935, - "learning_rate": 3.511557113379771e-06, - "loss": 1.0078, - "step": 3333 - }, - { - "epoch": 0.2505636554937622, - "grad_norm": 2.0216610448915833, - "learning_rate": 3.511238271050465e-06, - "loss": 0.9904, - "step": 3334 - }, - { - "epoch": 0.25063880955959716, - "grad_norm": 4.0779213105411705, - "learning_rate": 3.510919339174505e-06, - "loss": 1.0758, - "step": 3335 - }, - { - "epoch": 0.25071396362543213, - "grad_norm": 1.7611574348724461, - "learning_rate": 3.5106003177707882e-06, - "loss": 0.9562, - "step": 3336 - }, - { - "epoch": 0.2507891176912671, - "grad_norm": 1.6392888032021555, - "learning_rate": 3.5102812068582183e-06, - "loss": 0.9875, - "step": 3337 - }, - { - "epoch": 0.2508642717571021, - "grad_norm": 1.8028095597514118, - "learning_rate": 3.509962006455704e-06, - "loss": 1.0636, - "step": 3338 - }, - { - "epoch": 0.25093942582293705, - "grad_norm": 1.8415004778946054, - "learning_rate": 3.5096427165821583e-06, - "loss": 1.0856, - "step": 3339 - }, - { - "epoch": 0.25101457988877196, - "grad_norm": 1.3357857065799132, - "learning_rate": 3.509323337256501e-06, - "loss": 0.9501, - "step": 3340 - }, - { - "epoch": 0.25108973395460693, - "grad_norm": 0.7911537709108969, - "learning_rate": 3.5090038684976563e-06, - "loss": 0.883, - "step": 3341 - }, - { - "epoch": 0.2511648880204419, - "grad_norm": 2.169373093507765, - "learning_rate": 3.5086843103245542e-06, - "loss": 0.9738, - "step": 3342 - }, - { - "epoch": 0.25124004208627687, - "grad_norm": 1.6608742272139867, - "learning_rate": 3.508364662756129e-06, - "loss": 1.1192, - "step": 3343 - }, - { - "epoch": 0.25131519615211184, - "grad_norm": 2.1760436743883287, - "learning_rate": 3.5080449258113224e-06, - "loss": 0.9826, - "step": 3344 - }, - { - "epoch": 0.2513903502179468, - "grad_norm": 1.682838571500577, - "learning_rate": 3.5077250995090786e-06, - "loss": 1.0194, - "step": 3345 - }, - { - "epoch": 0.2514655042837817, - "grad_norm": 1.6759395924700395, - "learning_rate": 3.5074051838683497e-06, - "loss": 1.0535, - "step": 3346 - }, - { - "epoch": 0.2515406583496167, - "grad_norm": 1.7222002251905446, - "learning_rate": 3.507085178908091e-06, - "loss": 1.0933, - "step": 3347 - }, - { - "epoch": 0.25161581241545167, - "grad_norm": 1.663291788493358, - "learning_rate": 3.506765084647265e-06, - "loss": 1.0423, - "step": 3348 - }, - { - "epoch": 0.25169096648128664, - "grad_norm": 1.728046797626438, - "learning_rate": 3.506444901104837e-06, - "loss": 1.0218, - "step": 3349 - }, - { - "epoch": 0.2517661205471216, - "grad_norm": 1.6508574598623598, - "learning_rate": 3.506124628299781e-06, - "loss": 0.8303, - "step": 3350 - }, - { - "epoch": 0.2518412746129566, - "grad_norm": 6.070376912564466, - "learning_rate": 3.505804266251073e-06, - "loss": 1.0322, - "step": 3351 - }, - { - "epoch": 0.25191642867879155, - "grad_norm": 0.7090673056616575, - "learning_rate": 3.505483814977696e-06, - "loss": 0.8064, - "step": 3352 - }, - { - "epoch": 0.25199158274462646, - "grad_norm": 1.7056135143258142, - "learning_rate": 3.5051632744986384e-06, - "loss": 1.0292, - "step": 3353 - }, - { - "epoch": 0.25206673681046143, - "grad_norm": 1.881110470689, - "learning_rate": 3.5048426448328926e-06, - "loss": 1.0321, - "step": 3354 - }, - { - "epoch": 0.2521418908762964, - "grad_norm": 1.6780042864184368, - "learning_rate": 3.504521925999458e-06, - "loss": 1.0115, - "step": 3355 - }, - { - "epoch": 0.2522170449421314, - "grad_norm": 0.6392426579605388, - "learning_rate": 3.5042011180173386e-06, - "loss": 0.7667, - "step": 3356 - }, - { - "epoch": 0.25229219900796634, - "grad_norm": 3.5000464572548027, - "learning_rate": 3.5038802209055424e-06, - "loss": 0.9909, - "step": 3357 - }, - { - "epoch": 0.2523673530738013, - "grad_norm": 1.8159203976817246, - "learning_rate": 3.5035592346830846e-06, - "loss": 1.0463, - "step": 3358 - }, - { - "epoch": 0.25244250713963623, - "grad_norm": 0.6720323378397357, - "learning_rate": 3.5032381593689843e-06, - "loss": 0.8804, - "step": 3359 - }, - { - "epoch": 0.2525176612054712, - "grad_norm": 1.7031380189170555, - "learning_rate": 3.502916994982267e-06, - "loss": 0.8779, - "step": 3360 - }, - { - "epoch": 0.25259281527130617, - "grad_norm": 2.0730278202587864, - "learning_rate": 3.502595741541963e-06, - "loss": 1.0073, - "step": 3361 - }, - { - "epoch": 0.25266796933714114, - "grad_norm": 3.386134254502346, - "learning_rate": 3.502274399067107e-06, - "loss": 0.9879, - "step": 3362 - }, - { - "epoch": 0.2527431234029761, - "grad_norm": 2.0087184420695943, - "learning_rate": 3.5019529675767403e-06, - "loss": 0.9212, - "step": 3363 - }, - { - "epoch": 0.2528182774688111, - "grad_norm": 1.7209463478028568, - "learning_rate": 3.501631447089909e-06, - "loss": 1.0248, - "step": 3364 - }, - { - "epoch": 0.25289343153464605, - "grad_norm": 1.8176069331555709, - "learning_rate": 3.5013098376256645e-06, - "loss": 1.0836, - "step": 3365 - }, - { - "epoch": 0.25296858560048097, - "grad_norm": 1.3758015463792483, - "learning_rate": 3.5009881392030633e-06, - "loss": 1.0183, - "step": 3366 - }, - { - "epoch": 0.25304373966631594, - "grad_norm": 2.1454710299603605, - "learning_rate": 3.5006663518411666e-06, - "loss": 1.0346, - "step": 3367 - }, - { - "epoch": 0.2531188937321509, - "grad_norm": 2.1098233346969106, - "learning_rate": 3.500344475559043e-06, - "loss": 1.0458, - "step": 3368 - }, - { - "epoch": 0.2531940477979859, - "grad_norm": 1.5938419240733115, - "learning_rate": 3.5000225103757634e-06, - "loss": 1.0584, - "step": 3369 - }, - { - "epoch": 0.25326920186382085, - "grad_norm": 1.747541062468819, - "learning_rate": 3.499700456310406e-06, - "loss": 1.0186, - "step": 3370 - }, - { - "epoch": 0.2533443559296558, - "grad_norm": 1.8541400850369414, - "learning_rate": 3.499378313382054e-06, - "loss": 1.0047, - "step": 3371 - }, - { - "epoch": 0.25341950999549073, - "grad_norm": 0.8348889414660892, - "learning_rate": 3.4990560816097954e-06, - "loss": 0.9159, - "step": 3372 - }, - { - "epoch": 0.2534946640613257, - "grad_norm": 1.6350771152614063, - "learning_rate": 3.4987337610127237e-06, - "loss": 1.0351, - "step": 3373 - }, - { - "epoch": 0.2535698181271607, - "grad_norm": 1.683732312512403, - "learning_rate": 3.498411351609938e-06, - "loss": 1.0638, - "step": 3374 - }, - { - "epoch": 0.25364497219299564, - "grad_norm": 1.5561932503644458, - "learning_rate": 3.4980888534205414e-06, - "loss": 1.033, - "step": 3375 - }, - { - "epoch": 0.2537201262588306, - "grad_norm": 2.247426999002354, - "learning_rate": 3.4977662664636443e-06, - "loss": 0.9533, - "step": 3376 - }, - { - "epoch": 0.2537952803246656, - "grad_norm": 1.8305562472627548, - "learning_rate": 3.4974435907583597e-06, - "loss": 0.9045, - "step": 3377 - }, - { - "epoch": 0.2538704343905005, - "grad_norm": 1.611437022835349, - "learning_rate": 3.497120826323809e-06, - "loss": 1.0186, - "step": 3378 - }, - { - "epoch": 0.25394558845633547, - "grad_norm": 1.892646780432032, - "learning_rate": 3.496797973179116e-06, - "loss": 0.9848, - "step": 3379 - }, - { - "epoch": 0.25402074252217044, - "grad_norm": 1.6145281464364056, - "learning_rate": 3.4964750313434114e-06, - "loss": 1.0616, - "step": 3380 - }, - { - "epoch": 0.2540958965880054, - "grad_norm": 0.6685153219639826, - "learning_rate": 3.496152000835831e-06, - "loss": 0.7496, - "step": 3381 - }, - { - "epoch": 0.2541710506538404, - "grad_norm": 1.4839497810462579, - "learning_rate": 3.495828881675516e-06, - "loss": 0.9745, - "step": 3382 - }, - { - "epoch": 0.25424620471967535, - "grad_norm": 1.5877015023492715, - "learning_rate": 3.4955056738816113e-06, - "loss": 0.9498, - "step": 3383 - }, - { - "epoch": 0.2543213587855103, - "grad_norm": 0.7259605128844095, - "learning_rate": 3.4951823774732686e-06, - "loss": 0.8374, - "step": 3384 - }, - { - "epoch": 0.25439651285134524, - "grad_norm": 1.6500704663575168, - "learning_rate": 3.4948589924696447e-06, - "loss": 0.9784, - "step": 3385 - }, - { - "epoch": 0.2544716669171802, - "grad_norm": 1.8810324584293905, - "learning_rate": 3.4945355188899013e-06, - "loss": 1.0969, - "step": 3386 - }, - { - "epoch": 0.2545468209830152, - "grad_norm": 1.9431068141278878, - "learning_rate": 3.494211956753206e-06, - "loss": 0.9135, - "step": 3387 - }, - { - "epoch": 0.25462197504885015, - "grad_norm": 1.667351995073648, - "learning_rate": 3.49388830607873e-06, - "loss": 1.0348, - "step": 3388 - }, - { - "epoch": 0.2546971291146851, - "grad_norm": 2.3096396824941463, - "learning_rate": 3.493564566885651e-06, - "loss": 1.0198, - "step": 3389 - }, - { - "epoch": 0.2547722831805201, - "grad_norm": 1.5003980267458097, - "learning_rate": 3.4932407391931527e-06, - "loss": 0.9889, - "step": 3390 - }, - { - "epoch": 0.254847437246355, - "grad_norm": 2.0617878002125893, - "learning_rate": 3.4929168230204226e-06, - "loss": 1.0258, - "step": 3391 - }, - { - "epoch": 0.25492259131219, - "grad_norm": 1.6598970098604027, - "learning_rate": 3.4925928183866534e-06, - "loss": 1.0351, - "step": 3392 - }, - { - "epoch": 0.25499774537802494, - "grad_norm": 1.6221447582489295, - "learning_rate": 3.492268725311045e-06, - "loss": 1.0262, - "step": 3393 - }, - { - "epoch": 0.2550728994438599, - "grad_norm": 2.405632038775142, - "learning_rate": 3.4919445438128e-06, - "loss": 1.0734, - "step": 3394 - }, - { - "epoch": 0.2551480535096949, - "grad_norm": 1.3710947933356072, - "learning_rate": 3.491620273911128e-06, - "loss": 0.8662, - "step": 3395 - }, - { - "epoch": 0.25522320757552985, - "grad_norm": 1.3964384058075368, - "learning_rate": 3.491295915625243e-06, - "loss": 1.0759, - "step": 3396 - }, - { - "epoch": 0.2552983616413648, - "grad_norm": 1.963260687869529, - "learning_rate": 3.490971468974364e-06, - "loss": 1.0987, - "step": 3397 - }, - { - "epoch": 0.25537351570719974, - "grad_norm": 1.6222291210686874, - "learning_rate": 3.490646933977716e-06, - "loss": 1.0573, - "step": 3398 - }, - { - "epoch": 0.2554486697730347, - "grad_norm": 1.5672457531098527, - "learning_rate": 3.49032231065453e-06, - "loss": 0.9226, - "step": 3399 - }, - { - "epoch": 0.2555238238388697, - "grad_norm": 2.21937219196852, - "learning_rate": 3.48999759902404e-06, - "loss": 0.977, - "step": 3400 - }, - { - "epoch": 0.25559897790470465, - "grad_norm": 1.4530212201211778, - "learning_rate": 3.4896727991054856e-06, - "loss": 1.0102, - "step": 3401 - }, - { - "epoch": 0.2556741319705396, - "grad_norm": 4.126083000661127, - "learning_rate": 3.4893479109181144e-06, - "loss": 0.8525, - "step": 3402 - }, - { - "epoch": 0.2557492860363746, - "grad_norm": 1.83403546441919, - "learning_rate": 3.489022934481176e-06, - "loss": 0.9813, - "step": 3403 - }, - { - "epoch": 0.2558244401022095, - "grad_norm": 1.842150560159119, - "learning_rate": 3.4886978698139275e-06, - "loss": 1.036, - "step": 3404 - }, - { - "epoch": 0.2558995941680445, - "grad_norm": 1.930651342941472, - "learning_rate": 3.4883727169356293e-06, - "loss": 1.0109, - "step": 3405 - }, - { - "epoch": 0.25597474823387945, - "grad_norm": 8.946780208467894, - "learning_rate": 3.4880474758655485e-06, - "loss": 1.0616, - "step": 3406 - }, - { - "epoch": 0.2560499022997144, - "grad_norm": 0.7958597934445069, - "learning_rate": 3.487722146622956e-06, - "loss": 0.9297, - "step": 3407 - }, - { - "epoch": 0.2561250563655494, - "grad_norm": 1.8111148671668382, - "learning_rate": 3.48739672922713e-06, - "loss": 1.0713, - "step": 3408 - }, - { - "epoch": 0.25620021043138436, - "grad_norm": 1.6264158096640886, - "learning_rate": 3.4870712236973524e-06, - "loss": 1.0806, - "step": 3409 - }, - { - "epoch": 0.2562753644972193, - "grad_norm": 1.6383867527249225, - "learning_rate": 3.4867456300529096e-06, - "loss": 1.031, - "step": 3410 - }, - { - "epoch": 0.25635051856305424, - "grad_norm": 3.158123253618577, - "learning_rate": 3.4864199483130957e-06, - "loss": 0.8683, - "step": 3411 - }, - { - "epoch": 0.2564256726288892, - "grad_norm": 1.6049526155316258, - "learning_rate": 3.4860941784972077e-06, - "loss": 1.0194, - "step": 3412 - }, - { - "epoch": 0.2565008266947242, - "grad_norm": 2.2087309377438165, - "learning_rate": 3.485768320624549e-06, - "loss": 1.0826, - "step": 3413 - }, - { - "epoch": 0.25657598076055915, - "grad_norm": 4.319546236649373, - "learning_rate": 3.485442374714428e-06, - "loss": 0.9874, - "step": 3414 - }, - { - "epoch": 0.2566511348263941, - "grad_norm": 1.7542318538897697, - "learning_rate": 3.485116340786158e-06, - "loss": 1.0623, - "step": 3415 - }, - { - "epoch": 0.2567262888922291, - "grad_norm": 2.227329055272028, - "learning_rate": 3.4847902188590582e-06, - "loss": 0.97, - "step": 3416 - }, - { - "epoch": 0.256801442958064, - "grad_norm": 1.6099126713922334, - "learning_rate": 3.484464008952452e-06, - "loss": 0.9776, - "step": 3417 - }, - { - "epoch": 0.256876597023899, - "grad_norm": 14.816781628351874, - "learning_rate": 3.484137711085669e-06, - "loss": 1.0425, - "step": 3418 - }, - { - "epoch": 0.25695175108973395, - "grad_norm": 1.5504405226286446, - "learning_rate": 3.4838113252780435e-06, - "loss": 1.1198, - "step": 3419 - }, - { - "epoch": 0.2570269051555689, - "grad_norm": 1.4703225989810749, - "learning_rate": 3.4834848515489154e-06, - "loss": 0.9563, - "step": 3420 - }, - { - "epoch": 0.2571020592214039, - "grad_norm": 1.5325011766438532, - "learning_rate": 3.4831582899176286e-06, - "loss": 1.0201, - "step": 3421 - }, - { - "epoch": 0.25717721328723886, - "grad_norm": 1.8814978224036931, - "learning_rate": 3.4828316404035345e-06, - "loss": 1.0607, - "step": 3422 - }, - { - "epoch": 0.2572523673530738, - "grad_norm": 1.9306767202299093, - "learning_rate": 3.4825049030259868e-06, - "loss": 1.0041, - "step": 3423 - }, - { - "epoch": 0.25732752141890874, - "grad_norm": 4.1521873408777115, - "learning_rate": 3.482178077804347e-06, - "loss": 0.9327, - "step": 3424 - }, - { - "epoch": 0.2574026754847437, - "grad_norm": 2.681535622915823, - "learning_rate": 3.48185116475798e-06, - "loss": 1.0188, - "step": 3425 - }, - { - "epoch": 0.2574778295505787, - "grad_norm": 2.122630761997037, - "learning_rate": 3.481524163906258e-06, - "loss": 0.9647, - "step": 3426 - }, - { - "epoch": 0.25755298361641366, - "grad_norm": 1.4418558386168978, - "learning_rate": 3.4811970752685555e-06, - "loss": 0.9474, - "step": 3427 - }, - { - "epoch": 0.2576281376822486, - "grad_norm": 0.7293406460605794, - "learning_rate": 3.4808698988642547e-06, - "loss": 0.8339, - "step": 3428 - }, - { - "epoch": 0.2577032917480836, - "grad_norm": 1.6793349494885923, - "learning_rate": 3.4805426347127416e-06, - "loss": 1.0443, - "step": 3429 - }, - { - "epoch": 0.2577784458139185, - "grad_norm": 1.5602982839879909, - "learning_rate": 3.4802152828334083e-06, - "loss": 0.9983, - "step": 3430 - }, - { - "epoch": 0.2578535998797535, - "grad_norm": 1.6120880369563273, - "learning_rate": 3.479887843245651e-06, - "loss": 1.0408, - "step": 3431 - }, - { - "epoch": 0.25792875394558845, - "grad_norm": 2.0894514910052733, - "learning_rate": 3.4795603159688725e-06, - "loss": 1.0169, - "step": 3432 - }, - { - "epoch": 0.2580039080114234, - "grad_norm": 1.8168616595597, - "learning_rate": 3.4792327010224794e-06, - "loss": 0.7988, - "step": 3433 - }, - { - "epoch": 0.2580790620772584, - "grad_norm": 1.723693189563101, - "learning_rate": 3.478904998425884e-06, - "loss": 0.8807, - "step": 3434 - }, - { - "epoch": 0.25815421614309336, - "grad_norm": 2.422216772759672, - "learning_rate": 3.478577208198505e-06, - "loss": 0.9894, - "step": 3435 - }, - { - "epoch": 0.2582293702089283, - "grad_norm": 1.7244459616981027, - "learning_rate": 3.478249330359764e-06, - "loss": 1.0346, - "step": 3436 - }, - { - "epoch": 0.25830452427476325, - "grad_norm": 1.8525480713572464, - "learning_rate": 3.4779213649290907e-06, - "loss": 0.9474, - "step": 3437 - }, - { - "epoch": 0.2583796783405982, - "grad_norm": 2.477223208889372, - "learning_rate": 3.4775933119259162e-06, - "loss": 0.9875, - "step": 3438 - }, - { - "epoch": 0.2584548324064332, - "grad_norm": 1.519260882568553, - "learning_rate": 3.47726517136968e-06, - "loss": 0.9686, - "step": 3439 - }, - { - "epoch": 0.25852998647226816, - "grad_norm": 0.719108906574286, - "learning_rate": 3.4769369432798258e-06, - "loss": 0.8554, - "step": 3440 - }, - { - "epoch": 0.25860514053810313, - "grad_norm": 2.1319473244491935, - "learning_rate": 3.4766086276758014e-06, - "loss": 1.0094, - "step": 3441 - }, - { - "epoch": 0.2586802946039381, - "grad_norm": 1.3341222108196853, - "learning_rate": 3.4762802245770627e-06, - "loss": 0.8692, - "step": 3442 - }, - { - "epoch": 0.258755448669773, - "grad_norm": 0.7078406406223311, - "learning_rate": 3.4759517340030674e-06, - "loss": 0.8328, - "step": 3443 - }, - { - "epoch": 0.258830602735608, - "grad_norm": 1.580551321802054, - "learning_rate": 3.475623155973279e-06, - "loss": 0.8776, - "step": 3444 - }, - { - "epoch": 0.25890575680144295, - "grad_norm": 1.8287014826650674, - "learning_rate": 3.4752944905071687e-06, - "loss": 0.9236, - "step": 3445 - }, - { - "epoch": 0.2589809108672779, - "grad_norm": 1.820723785957548, - "learning_rate": 3.474965737624211e-06, - "loss": 0.8616, - "step": 3446 - }, - { - "epoch": 0.2590560649331129, - "grad_norm": 2.0834931757325186, - "learning_rate": 3.474636897343885e-06, - "loss": 0.8828, - "step": 3447 - }, - { - "epoch": 0.25913121899894787, - "grad_norm": 1.325410469821777, - "learning_rate": 3.474307969685676e-06, - "loss": 0.9506, - "step": 3448 - }, - { - "epoch": 0.2592063730647828, - "grad_norm": 1.2533132987162765, - "learning_rate": 3.473978954669074e-06, - "loss": 1.0099, - "step": 3449 - }, - { - "epoch": 0.25928152713061775, - "grad_norm": 0.7091980499184827, - "learning_rate": 3.473649852313575e-06, - "loss": 0.8905, - "step": 3450 - }, - { - "epoch": 0.2593566811964527, - "grad_norm": 1.980863513013173, - "learning_rate": 3.4733206626386794e-06, - "loss": 1.1009, - "step": 3451 - }, - { - "epoch": 0.2594318352622877, - "grad_norm": 1.3952028169485404, - "learning_rate": 3.472991385663893e-06, - "loss": 0.9889, - "step": 3452 - }, - { - "epoch": 0.25950698932812266, - "grad_norm": 1.6219020804494593, - "learning_rate": 3.4726620214087264e-06, - "loss": 1.0658, - "step": 3453 - }, - { - "epoch": 0.25958214339395763, - "grad_norm": 2.157773902984311, - "learning_rate": 3.4723325698926953e-06, - "loss": 0.8354, - "step": 3454 - }, - { - "epoch": 0.2596572974597926, - "grad_norm": 1.5180013059225985, - "learning_rate": 3.4720030311353216e-06, - "loss": 1.0014, - "step": 3455 - }, - { - "epoch": 0.2597324515256275, - "grad_norm": 1.7628270488342193, - "learning_rate": 3.4716734051561324e-06, - "loss": 1.0284, - "step": 3456 - }, - { - "epoch": 0.2598076055914625, - "grad_norm": 2.2034474671197506, - "learning_rate": 3.471343691974658e-06, - "loss": 0.9941, - "step": 3457 - }, - { - "epoch": 0.25988275965729746, - "grad_norm": 1.4090601499617847, - "learning_rate": 3.471013891610436e-06, - "loss": 1.1123, - "step": 3458 - }, - { - "epoch": 0.2599579137231324, - "grad_norm": 1.9396703316107122, - "learning_rate": 3.4706840040830076e-06, - "loss": 1.0457, - "step": 3459 - }, - { - "epoch": 0.2600330677889674, - "grad_norm": 2.005076997757756, - "learning_rate": 3.4703540294119204e-06, - "loss": 1.0697, - "step": 3460 - }, - { - "epoch": 0.26010822185480237, - "grad_norm": 1.9811991712145243, - "learning_rate": 3.4700239676167264e-06, - "loss": 0.9357, - "step": 3461 - }, - { - "epoch": 0.2601833759206373, - "grad_norm": 1.7954171942852264, - "learning_rate": 3.4696938187169836e-06, - "loss": 1.052, - "step": 3462 - }, - { - "epoch": 0.26025852998647225, - "grad_norm": 2.2716742425551093, - "learning_rate": 3.469363582732254e-06, - "loss": 0.9901, - "step": 3463 - }, - { - "epoch": 0.2603336840523072, - "grad_norm": 1.9142346363126144, - "learning_rate": 3.4690332596821065e-06, - "loss": 0.9312, - "step": 3464 - }, - { - "epoch": 0.2604088381181422, - "grad_norm": 2.0124740219808923, - "learning_rate": 3.468702849586112e-06, - "loss": 0.9722, - "step": 3465 - }, - { - "epoch": 0.26048399218397716, - "grad_norm": 0.7802200394598119, - "learning_rate": 3.4683723524638494e-06, - "loss": 0.7813, - "step": 3466 - }, - { - "epoch": 0.26055914624981213, - "grad_norm": 1.5200787418448727, - "learning_rate": 3.4680417683349024e-06, - "loss": 1.0829, - "step": 3467 - }, - { - "epoch": 0.26063430031564705, - "grad_norm": 0.8138473050586534, - "learning_rate": 3.46771109721886e-06, - "loss": 0.937, - "step": 3468 - }, - { - "epoch": 0.260709454381482, - "grad_norm": 3.3276279418560883, - "learning_rate": 3.467380339135314e-06, - "loss": 0.9948, - "step": 3469 - }, - { - "epoch": 0.260784608447317, - "grad_norm": 2.017593014450746, - "learning_rate": 3.4670494941038642e-06, - "loss": 0.9832, - "step": 3470 - }, - { - "epoch": 0.26085976251315196, - "grad_norm": 0.6927324711506835, - "learning_rate": 3.466718562144114e-06, - "loss": 0.8416, - "step": 3471 - }, - { - "epoch": 0.26093491657898693, - "grad_norm": 0.6795928713817007, - "learning_rate": 3.4663875432756726e-06, - "loss": 0.845, - "step": 3472 - }, - { - "epoch": 0.2610100706448219, - "grad_norm": 1.7068632204647343, - "learning_rate": 3.466056437518154e-06, - "loss": 1.024, - "step": 3473 - }, - { - "epoch": 0.26108522471065687, - "grad_norm": 1.6412508542827815, - "learning_rate": 3.465725244891178e-06, - "loss": 1.0364, - "step": 3474 - }, - { - "epoch": 0.2611603787764918, - "grad_norm": 1.5431592606379378, - "learning_rate": 3.465393965414368e-06, - "loss": 1.0062, - "step": 3475 - }, - { - "epoch": 0.26123553284232676, - "grad_norm": 1.7089166420729343, - "learning_rate": 3.4650625991073543e-06, - "loss": 0.9804, - "step": 3476 - }, - { - "epoch": 0.2613106869081617, - "grad_norm": 1.60792320470905, - "learning_rate": 3.464731145989772e-06, - "loss": 0.967, - "step": 3477 - }, - { - "epoch": 0.2613858409739967, - "grad_norm": 7.72971791017934, - "learning_rate": 3.46439960608126e-06, - "loss": 1.0962, - "step": 3478 - }, - { - "epoch": 0.26146099503983167, - "grad_norm": 2.7489786852485203, - "learning_rate": 3.464067979401464e-06, - "loss": 1.0219, - "step": 3479 - }, - { - "epoch": 0.26153614910566664, - "grad_norm": 6.566727502737939, - "learning_rate": 3.4637362659700337e-06, - "loss": 0.983, - "step": 3480 - }, - { - "epoch": 0.26161130317150155, - "grad_norm": 2.2893997308518474, - "learning_rate": 3.463404465806625e-06, - "loss": 0.9747, - "step": 3481 - }, - { - "epoch": 0.2616864572373365, - "grad_norm": 1.8934966418304764, - "learning_rate": 3.4630725789308974e-06, - "loss": 1.0405, - "step": 3482 - }, - { - "epoch": 0.2617616113031715, - "grad_norm": 2.3915199150581774, - "learning_rate": 3.4627406053625175e-06, - "loss": 1.0225, - "step": 3483 - }, - { - "epoch": 0.26183676536900646, - "grad_norm": 2.047718835112218, - "learning_rate": 3.462408545121155e-06, - "loss": 0.9574, - "step": 3484 - }, - { - "epoch": 0.26191191943484143, - "grad_norm": 1.7255753677122165, - "learning_rate": 3.462076398226487e-06, - "loss": 0.9648, - "step": 3485 - }, - { - "epoch": 0.2619870735006764, - "grad_norm": 1.8703505407813266, - "learning_rate": 3.4617441646981935e-06, - "loss": 1.0424, - "step": 3486 - }, - { - "epoch": 0.2620622275665114, - "grad_norm": 1.5253539722058915, - "learning_rate": 3.461411844555961e-06, - "loss": 1.0551, - "step": 3487 - }, - { - "epoch": 0.2621373816323463, - "grad_norm": 1.724748858538691, - "learning_rate": 3.46107943781948e-06, - "loss": 1.024, - "step": 3488 - }, - { - "epoch": 0.26221253569818126, - "grad_norm": 8.95936226073824, - "learning_rate": 3.460746944508448e-06, - "loss": 0.9505, - "step": 3489 - }, - { - "epoch": 0.26228768976401623, - "grad_norm": 1.5824382493926483, - "learning_rate": 3.4604143646425655e-06, - "loss": 0.9468, - "step": 3490 - }, - { - "epoch": 0.2623628438298512, - "grad_norm": 2.451279352878988, - "learning_rate": 3.46008169824154e-06, - "loss": 1.0116, - "step": 3491 - }, - { - "epoch": 0.26243799789568617, - "grad_norm": 1.8327568260381086, - "learning_rate": 3.4597489453250824e-06, - "loss": 1.0257, - "step": 3492 - }, - { - "epoch": 0.26251315196152114, - "grad_norm": 2.1224676638814577, - "learning_rate": 3.4594161059129102e-06, - "loss": 0.8755, - "step": 3493 - }, - { - "epoch": 0.26258830602735606, - "grad_norm": 1.4993381629607112, - "learning_rate": 3.4590831800247457e-06, - "loss": 0.9419, - "step": 3494 - }, - { - "epoch": 0.262663460093191, - "grad_norm": 2.4471150052786768, - "learning_rate": 3.458750167680315e-06, - "loss": 0.8524, - "step": 3495 - }, - { - "epoch": 0.262738614159026, - "grad_norm": 1.3152131240075065, - "learning_rate": 3.458417068899351e-06, - "loss": 0.9772, - "step": 3496 - }, - { - "epoch": 0.26281376822486097, - "grad_norm": 1.4816014126517563, - "learning_rate": 3.4580838837015915e-06, - "loss": 0.9327, - "step": 3497 - }, - { - "epoch": 0.26288892229069594, - "grad_norm": 2.1940274533177258, - "learning_rate": 3.4577506121067784e-06, - "loss": 0.9358, - "step": 3498 - }, - { - "epoch": 0.2629640763565309, - "grad_norm": 1.8818959898143925, - "learning_rate": 3.457417254134659e-06, - "loss": 0.9355, - "step": 3499 - }, - { - "epoch": 0.2630392304223659, - "grad_norm": 2.9237371212666265, - "learning_rate": 3.457083809804986e-06, - "loss": 0.9476, - "step": 3500 - }, - { - "epoch": 0.2631143844882008, - "grad_norm": 1.7485539047314111, - "learning_rate": 3.456750279137519e-06, - "loss": 0.9998, - "step": 3501 - }, - { - "epoch": 0.26318953855403576, - "grad_norm": 2.39574975130545, - "learning_rate": 3.4564166621520193e-06, - "loss": 0.8916, - "step": 3502 - }, - { - "epoch": 0.26326469261987073, - "grad_norm": 1.8845202034190895, - "learning_rate": 3.456082958868255e-06, - "loss": 1.0332, - "step": 3503 - }, - { - "epoch": 0.2633398466857057, - "grad_norm": 2.0203551155900343, - "learning_rate": 3.455749169306e-06, - "loss": 0.902, - "step": 3504 - }, - { - "epoch": 0.2634150007515407, - "grad_norm": 1.6697044900311406, - "learning_rate": 3.455415293485032e-06, - "loss": 1.0581, - "step": 3505 - }, - { - "epoch": 0.26349015481737564, - "grad_norm": 1.5873454942382939, - "learning_rate": 3.455081331425135e-06, - "loss": 0.9232, - "step": 3506 - }, - { - "epoch": 0.26356530888321056, - "grad_norm": 4.934706078523917, - "learning_rate": 3.4547472831460973e-06, - "loss": 0.9221, - "step": 3507 - }, - { - "epoch": 0.26364046294904553, - "grad_norm": 1.6906702144873385, - "learning_rate": 3.4544131486677124e-06, - "loss": 1.0852, - "step": 3508 - }, - { - "epoch": 0.2637156170148805, - "grad_norm": 4.32659157811757, - "learning_rate": 3.454078928009779e-06, - "loss": 1.0303, - "step": 3509 - }, - { - "epoch": 0.26379077108071547, - "grad_norm": 1.6508778231057892, - "learning_rate": 3.4537446211921008e-06, - "loss": 1.0068, - "step": 3510 - }, - { - "epoch": 0.26386592514655044, - "grad_norm": 1.7147306435373062, - "learning_rate": 3.4534102282344876e-06, - "loss": 1.069, - "step": 3511 - }, - { - "epoch": 0.2639410792123854, - "grad_norm": 1.6507292811982521, - "learning_rate": 3.453075749156753e-06, - "loss": 1.053, - "step": 3512 - }, - { - "epoch": 0.2640162332782203, - "grad_norm": 4.183578704892415, - "learning_rate": 3.4527411839787152e-06, - "loss": 1.0054, - "step": 3513 - }, - { - "epoch": 0.2640913873440553, - "grad_norm": 1.3143656958061603, - "learning_rate": 3.4524065327202e-06, - "loss": 0.9153, - "step": 3514 - }, - { - "epoch": 0.26416654140989027, - "grad_norm": 1.6733449018060773, - "learning_rate": 3.4520717954010356e-06, - "loss": 0.96, - "step": 3515 - }, - { - "epoch": 0.26424169547572524, - "grad_norm": 1.6183542658941348, - "learning_rate": 3.4517369720410576e-06, - "loss": 1.0226, - "step": 3516 - }, - { - "epoch": 0.2643168495415602, - "grad_norm": 4.086084541898873, - "learning_rate": 3.4514020626601044e-06, - "loss": 1.0811, - "step": 3517 - }, - { - "epoch": 0.2643920036073952, - "grad_norm": 1.8873034216975149, - "learning_rate": 3.451067067278021e-06, - "loss": 0.9875, - "step": 3518 - }, - { - "epoch": 0.26446715767323015, - "grad_norm": 2.0361728581943295, - "learning_rate": 3.4507319859146585e-06, - "loss": 1.0733, - "step": 3519 - }, - { - "epoch": 0.26454231173906506, - "grad_norm": 7.5941536250596, - "learning_rate": 3.4503968185898696e-06, - "loss": 0.9719, - "step": 3520 - }, - { - "epoch": 0.26461746580490003, - "grad_norm": 1.6287921527083322, - "learning_rate": 3.450061565323516e-06, - "loss": 0.9768, - "step": 3521 - }, - { - "epoch": 0.264692619870735, - "grad_norm": 1.8488649443202783, - "learning_rate": 3.449726226135461e-06, - "loss": 1.0715, - "step": 3522 - }, - { - "epoch": 0.26476777393657, - "grad_norm": 3.0567602749645046, - "learning_rate": 3.4493908010455762e-06, - "loss": 0.9905, - "step": 3523 - }, - { - "epoch": 0.26484292800240494, - "grad_norm": 6.062145419418908, - "learning_rate": 3.4490552900737363e-06, - "loss": 1.0255, - "step": 3524 - }, - { - "epoch": 0.2649180820682399, - "grad_norm": 2.4327799751395904, - "learning_rate": 3.448719693239822e-06, - "loss": 1.0398, - "step": 3525 - }, - { - "epoch": 0.2649932361340748, - "grad_norm": 2.1540668880097527, - "learning_rate": 3.448384010563718e-06, - "loss": 0.9017, - "step": 3526 - }, - { - "epoch": 0.2650683901999098, - "grad_norm": 1.3999050261435542, - "learning_rate": 3.4480482420653153e-06, - "loss": 0.9669, - "step": 3527 - }, - { - "epoch": 0.26514354426574477, - "grad_norm": 1.3951779530277217, - "learning_rate": 3.4477123877645093e-06, - "loss": 1.0079, - "step": 3528 - }, - { - "epoch": 0.26521869833157974, - "grad_norm": 2.354368341860307, - "learning_rate": 3.4473764476812004e-06, - "loss": 1.086, - "step": 3529 - }, - { - "epoch": 0.2652938523974147, - "grad_norm": 1.7078283618075223, - "learning_rate": 3.447040421835295e-06, - "loss": 0.9457, - "step": 3530 - }, - { - "epoch": 0.2653690064632497, - "grad_norm": 1.7166525787713693, - "learning_rate": 3.446704310246703e-06, - "loss": 0.8822, - "step": 3531 - }, - { - "epoch": 0.26544416052908465, - "grad_norm": 4.457023119104108, - "learning_rate": 3.4463681129353413e-06, - "loss": 1.0269, - "step": 3532 - }, - { - "epoch": 0.26551931459491956, - "grad_norm": 2.1032642154047707, - "learning_rate": 3.4460318299211304e-06, - "loss": 1.0028, - "step": 3533 - }, - { - "epoch": 0.26559446866075453, - "grad_norm": 2.705618177137895, - "learning_rate": 3.4456954612239964e-06, - "loss": 1.011, - "step": 3534 - }, - { - "epoch": 0.2656696227265895, - "grad_norm": 1.6727227174184238, - "learning_rate": 3.44535900686387e-06, - "loss": 0.9387, - "step": 3535 - }, - { - "epoch": 0.2657447767924245, - "grad_norm": 2.7019947312965105, - "learning_rate": 3.4450224668606884e-06, - "loss": 1.0567, - "step": 3536 - }, - { - "epoch": 0.26581993085825945, - "grad_norm": 2.732084612042984, - "learning_rate": 3.444685841234392e-06, - "loss": 1.0524, - "step": 3537 - }, - { - "epoch": 0.2658950849240944, - "grad_norm": 2.1932680649404213, - "learning_rate": 3.444349130004927e-06, - "loss": 1.0515, - "step": 3538 - }, - { - "epoch": 0.26597023898992933, - "grad_norm": 2.7049419492743447, - "learning_rate": 3.4440123331922457e-06, - "loss": 1.0466, - "step": 3539 - }, - { - "epoch": 0.2660453930557643, - "grad_norm": 1.7881217382108612, - "learning_rate": 3.443675450816304e-06, - "loss": 0.9735, - "step": 3540 - }, - { - "epoch": 0.26612054712159927, - "grad_norm": 1.8815983066468502, - "learning_rate": 3.4433384828970636e-06, - "loss": 0.9777, - "step": 3541 - }, - { - "epoch": 0.26619570118743424, - "grad_norm": 1.7216388353509071, - "learning_rate": 3.443001429454491e-06, - "loss": 1.0204, - "step": 3542 - }, - { - "epoch": 0.2662708552532692, - "grad_norm": 2.1214955548220313, - "learning_rate": 3.4426642905085585e-06, - "loss": 0.9637, - "step": 3543 - }, - { - "epoch": 0.2663460093191042, - "grad_norm": 2.2792794064307573, - "learning_rate": 3.4423270660792422e-06, - "loss": 0.9754, - "step": 3544 - }, - { - "epoch": 0.26642116338493915, - "grad_norm": 1.970192577515134, - "learning_rate": 3.4419897561865242e-06, - "loss": 1.0313, - "step": 3545 - }, - { - "epoch": 0.26649631745077407, - "grad_norm": 2.1036514054221693, - "learning_rate": 3.4416523608503914e-06, - "loss": 0.9639, - "step": 3546 - }, - { - "epoch": 0.26657147151660904, - "grad_norm": 1.6561973131544736, - "learning_rate": 3.4413148800908364e-06, - "loss": 1.0906, - "step": 3547 - }, - { - "epoch": 0.266646625582444, - "grad_norm": 2.9516263519698875, - "learning_rate": 3.4409773139278546e-06, - "loss": 1.0594, - "step": 3548 - }, - { - "epoch": 0.266721779648279, - "grad_norm": 2.844473945775957, - "learning_rate": 3.44063966238145e-06, - "loss": 1.0309, - "step": 3549 - }, - { - "epoch": 0.26679693371411395, - "grad_norm": 1.8402062968000938, - "learning_rate": 3.440301925471628e-06, - "loss": 1.015, - "step": 3550 - }, - { - "epoch": 0.2668720877799489, - "grad_norm": 1.5509280656554147, - "learning_rate": 3.439964103218402e-06, - "loss": 0.9195, - "step": 3551 - }, - { - "epoch": 0.26694724184578383, - "grad_norm": 1.7071121860058702, - "learning_rate": 3.439626195641789e-06, - "loss": 1.0453, - "step": 3552 - }, - { - "epoch": 0.2670223959116188, - "grad_norm": 2.0327129084522677, - "learning_rate": 3.4392882027618113e-06, - "loss": 0.9744, - "step": 3553 - }, - { - "epoch": 0.2670975499774538, - "grad_norm": 3.460171927000781, - "learning_rate": 3.438950124598496e-06, - "loss": 0.798, - "step": 3554 - }, - { - "epoch": 0.26717270404328874, - "grad_norm": 1.4486424207529462, - "learning_rate": 3.438611961171875e-06, - "loss": 0.921, - "step": 3555 - }, - { - "epoch": 0.2672478581091237, - "grad_norm": 2.3716929651099345, - "learning_rate": 3.4382737125019874e-06, - "loss": 1.0689, - "step": 3556 - }, - { - "epoch": 0.2673230121749587, - "grad_norm": 2.2719032380291924, - "learning_rate": 3.4379353786088748e-06, - "loss": 1.1205, - "step": 3557 - }, - { - "epoch": 0.2673981662407936, - "grad_norm": 2.0895364981131634, - "learning_rate": 3.437596959512585e-06, - "loss": 1.0369, - "step": 3558 - }, - { - "epoch": 0.26747332030662857, - "grad_norm": 1.9378603323299122, - "learning_rate": 3.4372584552331694e-06, - "loss": 0.9839, - "step": 3559 - }, - { - "epoch": 0.26754847437246354, - "grad_norm": 1.6052540730720417, - "learning_rate": 3.4369198657906875e-06, - "loss": 0.9689, - "step": 3560 - }, - { - "epoch": 0.2676236284382985, - "grad_norm": 1.724024430435982, - "learning_rate": 3.4365811912052013e-06, - "loss": 1.0201, - "step": 3561 - }, - { - "epoch": 0.2676987825041335, - "grad_norm": 1.7773821816114528, - "learning_rate": 3.4362424314967777e-06, - "loss": 1.0069, - "step": 3562 - }, - { - "epoch": 0.26777393656996845, - "grad_norm": 0.7965277869052623, - "learning_rate": 3.4359035866854907e-06, - "loss": 0.8854, - "step": 3563 - }, - { - "epoch": 0.2678490906358034, - "grad_norm": 1.7616996679520196, - "learning_rate": 3.435564656791418e-06, - "loss": 0.9644, - "step": 3564 - }, - { - "epoch": 0.26792424470163834, - "grad_norm": 2.0755927928159115, - "learning_rate": 3.435225641834642e-06, - "loss": 1.0991, - "step": 3565 - }, - { - "epoch": 0.2679993987674733, - "grad_norm": 1.4974337719520174, - "learning_rate": 3.434886541835251e-06, - "loss": 1.004, - "step": 3566 - }, - { - "epoch": 0.2680745528333083, - "grad_norm": 2.238782789883722, - "learning_rate": 3.434547356813338e-06, - "loss": 1.0247, - "step": 3567 - }, - { - "epoch": 0.26814970689914325, - "grad_norm": 1.6193795206234316, - "learning_rate": 3.4342080867890006e-06, - "loss": 0.9877, - "step": 3568 - }, - { - "epoch": 0.2682248609649782, - "grad_norm": 2.1721187976152154, - "learning_rate": 3.4338687317823425e-06, - "loss": 1.0479, - "step": 3569 - }, - { - "epoch": 0.2683000150308132, - "grad_norm": 1.980244279589034, - "learning_rate": 3.4335292918134713e-06, - "loss": 1.0134, - "step": 3570 - }, - { - "epoch": 0.2683751690966481, - "grad_norm": 1.7799678002348938, - "learning_rate": 3.4331897669024996e-06, - "loss": 1.0691, - "step": 3571 - }, - { - "epoch": 0.2684503231624831, - "grad_norm": 1.8502839257189492, - "learning_rate": 3.432850157069546e-06, - "loss": 1.078, - "step": 3572 - }, - { - "epoch": 0.26852547722831804, - "grad_norm": 1.7289505406018804, - "learning_rate": 3.4325104623347345e-06, - "loss": 1.0432, - "step": 3573 - }, - { - "epoch": 0.268600631294153, - "grad_norm": 1.4419866287680259, - "learning_rate": 3.432170682718193e-06, - "loss": 0.9787, - "step": 3574 - }, - { - "epoch": 0.268675785359988, - "grad_norm": 1.6586484996152375, - "learning_rate": 3.431830818240054e-06, - "loss": 0.9051, - "step": 3575 - }, - { - "epoch": 0.26875093942582295, - "grad_norm": 1.6476924713190577, - "learning_rate": 3.431490868920456e-06, - "loss": 0.9867, - "step": 3576 - }, - { - "epoch": 0.2688260934916579, - "grad_norm": 1.6947569046566378, - "learning_rate": 3.4311508347795427e-06, - "loss": 1.0194, - "step": 3577 - }, - { - "epoch": 0.26890124755749284, - "grad_norm": 3.9886813957796345, - "learning_rate": 3.430810715837462e-06, - "loss": 0.9464, - "step": 3578 - }, - { - "epoch": 0.2689764016233278, - "grad_norm": 1.6686001371034056, - "learning_rate": 3.4304705121143674e-06, - "loss": 1.0059, - "step": 3579 - }, - { - "epoch": 0.2690515556891628, - "grad_norm": 3.422716847161729, - "learning_rate": 3.4301302236304174e-06, - "loss": 1.0024, - "step": 3580 - }, - { - "epoch": 0.26912670975499775, - "grad_norm": 1.9944571338215653, - "learning_rate": 3.4297898504057754e-06, - "loss": 0.9904, - "step": 3581 - }, - { - "epoch": 0.2692018638208327, - "grad_norm": 1.4760166350050135, - "learning_rate": 3.4294493924606095e-06, - "loss": 1.0645, - "step": 3582 - }, - { - "epoch": 0.2692770178866677, - "grad_norm": 2.8315387165993466, - "learning_rate": 3.429108849815094e-06, - "loss": 1.1052, - "step": 3583 - }, - { - "epoch": 0.2693521719525026, - "grad_norm": 1.649902488254655, - "learning_rate": 3.428768222489406e-06, - "loss": 0.9223, - "step": 3584 - }, - { - "epoch": 0.2694273260183376, - "grad_norm": 1.862700237800844, - "learning_rate": 3.4284275105037298e-06, - "loss": 1.0173, - "step": 3585 - }, - { - "epoch": 0.26950248008417255, - "grad_norm": 1.8107243752326094, - "learning_rate": 3.4280867138782544e-06, - "loss": 0.9932, - "step": 3586 - }, - { - "epoch": 0.2695776341500075, - "grad_norm": 1.6617283457204137, - "learning_rate": 3.427745832633172e-06, - "loss": 1.0232, - "step": 3587 - }, - { - "epoch": 0.2696527882158425, - "grad_norm": 1.5929607606306746, - "learning_rate": 3.4274048667886826e-06, - "loss": 0.9915, - "step": 3588 - }, - { - "epoch": 0.26972794228167746, - "grad_norm": 1.6150556878329725, - "learning_rate": 3.4270638163649884e-06, - "loss": 1.0638, - "step": 3589 - }, - { - "epoch": 0.2698030963475124, - "grad_norm": 2.711614079105186, - "learning_rate": 3.4267226813822983e-06, - "loss": 0.9683, - "step": 3590 - }, - { - "epoch": 0.26987825041334734, - "grad_norm": 1.722039975330347, - "learning_rate": 3.426381461860826e-06, - "loss": 1.033, - "step": 3591 - }, - { - "epoch": 0.2699534044791823, - "grad_norm": 1.8882705909134598, - "learning_rate": 3.4260401578207904e-06, - "loss": 1.0023, - "step": 3592 - }, - { - "epoch": 0.2700285585450173, - "grad_norm": 1.586496414392112, - "learning_rate": 3.425698769282415e-06, - "loss": 1.0187, - "step": 3593 - }, - { - "epoch": 0.27010371261085225, - "grad_norm": 1.776167864152127, - "learning_rate": 3.4253572962659276e-06, - "loss": 1.0163, - "step": 3594 - }, - { - "epoch": 0.2701788666766872, - "grad_norm": 2.416773585442678, - "learning_rate": 3.425015738791563e-06, - "loss": 0.9286, - "step": 3595 - }, - { - "epoch": 0.2702540207425222, - "grad_norm": 1.9122598060918037, - "learning_rate": 3.424674096879559e-06, - "loss": 0.9498, - "step": 3596 - }, - { - "epoch": 0.2703291748083571, - "grad_norm": 2.21824780005745, - "learning_rate": 3.424332370550159e-06, - "loss": 1.0106, - "step": 3597 - }, - { - "epoch": 0.2704043288741921, - "grad_norm": 1.5726405863543256, - "learning_rate": 3.4239905598236115e-06, - "loss": 1.0124, - "step": 3598 - }, - { - "epoch": 0.27047948294002705, - "grad_norm": 2.5127219752095393, - "learning_rate": 3.423648664720171e-06, - "loss": 1.0422, - "step": 3599 - }, - { - "epoch": 0.270554637005862, - "grad_norm": 1.640032924810066, - "learning_rate": 3.4233066852600958e-06, - "loss": 1.0729, - "step": 3600 - }, - { - "epoch": 0.270629791071697, - "grad_norm": 1.408326713651791, - "learning_rate": 3.422964621463649e-06, - "loss": 0.9997, - "step": 3601 - }, - { - "epoch": 0.27070494513753196, - "grad_norm": 3.545170983843727, - "learning_rate": 3.4226224733511e-06, - "loss": 1.063, - "step": 3602 - }, - { - "epoch": 0.2707800992033669, - "grad_norm": 1.9089563887956504, - "learning_rate": 3.4222802409427216e-06, - "loss": 0.9404, - "step": 3603 - }, - { - "epoch": 0.27085525326920185, - "grad_norm": 1.5196328411121218, - "learning_rate": 3.421937924258792e-06, - "loss": 1.0492, - "step": 3604 - }, - { - "epoch": 0.2709304073350368, - "grad_norm": 1.7110907081045394, - "learning_rate": 3.421595523319596e-06, - "loss": 1.0901, - "step": 3605 - }, - { - "epoch": 0.2710055614008718, - "grad_norm": 3.19500701115515, - "learning_rate": 3.421253038145421e-06, - "loss": 1.0352, - "step": 3606 - }, - { - "epoch": 0.27108071546670676, - "grad_norm": 1.6295835701417112, - "learning_rate": 3.420910468756562e-06, - "loss": 0.9145, - "step": 3607 - }, - { - "epoch": 0.2711558695325417, - "grad_norm": 2.3277879509391215, - "learning_rate": 3.4205678151733162e-06, - "loss": 0.9655, - "step": 3608 - }, - { - "epoch": 0.2712310235983767, - "grad_norm": 2.0453349136278463, - "learning_rate": 3.420225077415988e-06, - "loss": 0.9945, - "step": 3609 - }, - { - "epoch": 0.2713061776642116, - "grad_norm": 2.031056204134146, - "learning_rate": 3.4198822555048856e-06, - "loss": 1.1081, - "step": 3610 - }, - { - "epoch": 0.2713813317300466, - "grad_norm": 1.602800960398825, - "learning_rate": 3.419539349460322e-06, - "loss": 1.0929, - "step": 3611 - }, - { - "epoch": 0.27145648579588155, - "grad_norm": 3.4015083557501784, - "learning_rate": 3.4191963593026163e-06, - "loss": 1.0791, - "step": 3612 - }, - { - "epoch": 0.2715316398617165, - "grad_norm": 2.622881897647152, - "learning_rate": 3.4188532850520924e-06, - "loss": 1.0285, - "step": 3613 - }, - { - "epoch": 0.2716067939275515, - "grad_norm": 2.4207312067214963, - "learning_rate": 3.4185101267290773e-06, - "loss": 1.0055, - "step": 3614 - }, - { - "epoch": 0.27168194799338646, - "grad_norm": 3.010430957447878, - "learning_rate": 3.418166884353906e-06, - "loss": 1.0078, - "step": 3615 - }, - { - "epoch": 0.2717571020592214, - "grad_norm": 1.7526881824627158, - "learning_rate": 3.4178235579469154e-06, - "loss": 1.0276, - "step": 3616 - }, - { - "epoch": 0.27183225612505635, - "grad_norm": 1.6190522725441785, - "learning_rate": 3.417480147528451e-06, - "loss": 1.0142, - "step": 3617 - }, - { - "epoch": 0.2719074101908913, - "grad_norm": 2.1785417505605777, - "learning_rate": 3.4171366531188596e-06, - "loss": 1.0239, - "step": 3618 - }, - { - "epoch": 0.2719825642567263, - "grad_norm": 2.079198574313137, - "learning_rate": 3.4167930747384947e-06, - "loss": 0.9904, - "step": 3619 - }, - { - "epoch": 0.27205771832256126, - "grad_norm": 1.8683963198541238, - "learning_rate": 3.416449412407715e-06, - "loss": 1.0036, - "step": 3620 - }, - { - "epoch": 0.27213287238839623, - "grad_norm": 2.157841523546381, - "learning_rate": 3.4161056661468834e-06, - "loss": 1.139, - "step": 3621 - }, - { - "epoch": 0.2722080264542312, - "grad_norm": 2.3532362142880636, - "learning_rate": 3.4157618359763687e-06, - "loss": 0.9519, - "step": 3622 - }, - { - "epoch": 0.2722831805200661, - "grad_norm": 1.5452135970477021, - "learning_rate": 3.4154179219165435e-06, - "loss": 1.0311, - "step": 3623 - }, - { - "epoch": 0.2723583345859011, - "grad_norm": 2.038433109356498, - "learning_rate": 3.415073923987787e-06, - "loss": 0.9844, - "step": 3624 - }, - { - "epoch": 0.27243348865173606, - "grad_norm": 2.6870148686635407, - "learning_rate": 3.4147298422104815e-06, - "loss": 0.9819, - "step": 3625 - }, - { - "epoch": 0.272508642717571, - "grad_norm": 2.8398165485071045, - "learning_rate": 3.4143856766050157e-06, - "loss": 0.934, - "step": 3626 - }, - { - "epoch": 0.272583796783406, - "grad_norm": 34.216670170811845, - "learning_rate": 3.4140414271917825e-06, - "loss": 1.0676, - "step": 3627 - }, - { - "epoch": 0.27265895084924097, - "grad_norm": 3.4996993071953613, - "learning_rate": 3.4136970939911793e-06, - "loss": 1.0325, - "step": 3628 - }, - { - "epoch": 0.2727341049150759, - "grad_norm": 0.9259417109115281, - "learning_rate": 3.413352677023611e-06, - "loss": 0.8396, - "step": 3629 - }, - { - "epoch": 0.27280925898091085, - "grad_norm": 1.6277270542801314, - "learning_rate": 3.4130081763094836e-06, - "loss": 0.9812, - "step": 3630 - }, - { - "epoch": 0.2728844130467458, - "grad_norm": 1.7087277988199892, - "learning_rate": 3.4126635918692114e-06, - "loss": 1.0061, - "step": 3631 - }, - { - "epoch": 0.2729595671125808, - "grad_norm": 2.102160173819498, - "learning_rate": 3.412318923723212e-06, - "loss": 0.9968, - "step": 3632 - }, - { - "epoch": 0.27303472117841576, - "grad_norm": 2.1954226211676575, - "learning_rate": 3.411974171891908e-06, - "loss": 1.0437, - "step": 3633 - }, - { - "epoch": 0.27310987524425073, - "grad_norm": 1.536958602238563, - "learning_rate": 3.4116293363957276e-06, - "loss": 1.0037, - "step": 3634 - }, - { - "epoch": 0.2731850293100857, - "grad_norm": 2.2990688304909894, - "learning_rate": 3.4112844172551034e-06, - "loss": 0.9027, - "step": 3635 - }, - { - "epoch": 0.2732601833759206, - "grad_norm": 1.5879863414561364, - "learning_rate": 3.410939414490474e-06, - "loss": 0.9757, - "step": 3636 - }, - { - "epoch": 0.2733353374417556, - "grad_norm": 2.024113486941881, - "learning_rate": 3.4105943281222804e-06, - "loss": 1.0725, - "step": 3637 - }, - { - "epoch": 0.27341049150759056, - "grad_norm": 1.6457978578760084, - "learning_rate": 3.4102491581709717e-06, - "loss": 1.0228, - "step": 3638 - }, - { - "epoch": 0.27348564557342553, - "grad_norm": 2.2190984532922493, - "learning_rate": 3.4099039046570006e-06, - "loss": 1.1434, - "step": 3639 - }, - { - "epoch": 0.2735607996392605, - "grad_norm": 2.1765044566765446, - "learning_rate": 3.4095585676008234e-06, - "loss": 0.9199, - "step": 3640 - }, - { - "epoch": 0.27363595370509547, - "grad_norm": 0.7321653603541323, - "learning_rate": 3.4092131470229045e-06, - "loss": 0.8697, - "step": 3641 - }, - { - "epoch": 0.2737111077709304, - "grad_norm": 2.2446771815255473, - "learning_rate": 3.40886764294371e-06, - "loss": 1.0503, - "step": 3642 - }, - { - "epoch": 0.27378626183676535, - "grad_norm": 0.656900764417385, - "learning_rate": 3.4085220553837133e-06, - "loss": 0.8497, - "step": 3643 - }, - { - "epoch": 0.2738614159026003, - "grad_norm": 3.181691730760899, - "learning_rate": 3.40817638436339e-06, - "loss": 0.9597, - "step": 3644 - }, - { - "epoch": 0.2739365699684353, - "grad_norm": 2.2670365770348235, - "learning_rate": 3.407830629903224e-06, - "loss": 1.0255, - "step": 3645 - }, - { - "epoch": 0.27401172403427027, - "grad_norm": 1.4743089382480015, - "learning_rate": 3.4074847920237032e-06, - "loss": 1.0042, - "step": 3646 - }, - { - "epoch": 0.27408687810010524, - "grad_norm": 2.6688879616162526, - "learning_rate": 3.407138870745318e-06, - "loss": 1.0137, - "step": 3647 - }, - { - "epoch": 0.27416203216594015, - "grad_norm": 3.097020895838492, - "learning_rate": 3.4067928660885665e-06, - "loss": 1.0349, - "step": 3648 - }, - { - "epoch": 0.2742371862317751, - "grad_norm": 2.062861208641867, - "learning_rate": 3.406446778073951e-06, - "loss": 0.9934, - "step": 3649 - }, - { - "epoch": 0.2743123402976101, - "grad_norm": 1.5483382269238246, - "learning_rate": 3.4061006067219776e-06, - "loss": 0.9723, - "step": 3650 - }, - { - "epoch": 0.27438749436344506, - "grad_norm": 1.8276827024463336, - "learning_rate": 3.40575435205316e-06, - "loss": 1.0019, - "step": 3651 - }, - { - "epoch": 0.27446264842928003, - "grad_norm": 1.7147681843090568, - "learning_rate": 3.405408014088013e-06, - "loss": 1.0546, - "step": 3652 - }, - { - "epoch": 0.274537802495115, - "grad_norm": 1.5230814982429262, - "learning_rate": 3.40506159284706e-06, - "loss": 1.0298, - "step": 3653 - }, - { - "epoch": 0.27461295656095, - "grad_norm": 2.11598886716254, - "learning_rate": 3.4047150883508274e-06, - "loss": 1.0169, - "step": 3654 - }, - { - "epoch": 0.2746881106267849, - "grad_norm": 4.13726993163156, - "learning_rate": 3.4043685006198465e-06, - "loss": 0.9279, - "step": 3655 - }, - { - "epoch": 0.27476326469261986, - "grad_norm": 2.4804774006369823, - "learning_rate": 3.4040218296746544e-06, - "loss": 1.017, - "step": 3656 - }, - { - "epoch": 0.2748384187584548, - "grad_norm": 2.287836260652016, - "learning_rate": 3.403675075535793e-06, - "loss": 1.0216, - "step": 3657 - }, - { - "epoch": 0.2749135728242898, - "grad_norm": 2.4697837114038395, - "learning_rate": 3.403328238223808e-06, - "loss": 0.913, - "step": 3658 - }, - { - "epoch": 0.27498872689012477, - "grad_norm": 1.8003707715804356, - "learning_rate": 3.4029813177592504e-06, - "loss": 0.9738, - "step": 3659 - }, - { - "epoch": 0.27506388095595974, - "grad_norm": 1.4757714145508345, - "learning_rate": 3.402634314162678e-06, - "loss": 0.971, - "step": 3660 - }, - { - "epoch": 0.27513903502179465, - "grad_norm": 2.3244494411839467, - "learning_rate": 3.4022872274546517e-06, - "loss": 0.9893, - "step": 3661 - }, - { - "epoch": 0.2752141890876296, - "grad_norm": 1.8332114901657641, - "learning_rate": 3.4019400576557377e-06, - "loss": 1.0958, - "step": 3662 - }, - { - "epoch": 0.2752893431534646, - "grad_norm": 1.8067551035309495, - "learning_rate": 3.4015928047865056e-06, - "loss": 1.0663, - "step": 3663 - }, - { - "epoch": 0.27536449721929956, - "grad_norm": 1.6810011583831446, - "learning_rate": 3.401245468867534e-06, - "loss": 0.9979, - "step": 3664 - }, - { - "epoch": 0.27543965128513453, - "grad_norm": 1.421870137300665, - "learning_rate": 3.4008980499194025e-06, - "loss": 1.0454, - "step": 3665 - }, - { - "epoch": 0.2755148053509695, - "grad_norm": 1.7541685863976686, - "learning_rate": 3.4005505479626965e-06, - "loss": 0.9476, - "step": 3666 - }, - { - "epoch": 0.2755899594168045, - "grad_norm": 1.718280381288072, - "learning_rate": 3.4002029630180074e-06, - "loss": 0.9705, - "step": 3667 - }, - { - "epoch": 0.2756651134826394, - "grad_norm": 2.601258131213405, - "learning_rate": 3.399855295105932e-06, - "loss": 1.0545, - "step": 3668 - }, - { - "epoch": 0.27574026754847436, - "grad_norm": 5.4408750035112, - "learning_rate": 3.3995075442470694e-06, - "loss": 0.9824, - "step": 3669 - }, - { - "epoch": 0.27581542161430933, - "grad_norm": 1.9226872658271996, - "learning_rate": 3.3991597104620253e-06, - "loss": 0.9916, - "step": 3670 - }, - { - "epoch": 0.2758905756801443, - "grad_norm": 1.5231233713096706, - "learning_rate": 3.3988117937714114e-06, - "loss": 1.0714, - "step": 3671 - }, - { - "epoch": 0.27596572974597927, - "grad_norm": 3.3367828798841246, - "learning_rate": 3.398463794195842e-06, - "loss": 0.9774, - "step": 3672 - }, - { - "epoch": 0.27604088381181424, - "grad_norm": 0.6790781468547453, - "learning_rate": 3.3981157117559376e-06, - "loss": 0.8503, - "step": 3673 - }, - { - "epoch": 0.27611603787764916, - "grad_norm": 1.8121280946317675, - "learning_rate": 3.397767546472323e-06, - "loss": 0.9689, - "step": 3674 - }, - { - "epoch": 0.2761911919434841, - "grad_norm": 0.7167919688875442, - "learning_rate": 3.39741929836563e-06, - "loss": 0.7949, - "step": 3675 - }, - { - "epoch": 0.2762663460093191, - "grad_norm": 2.563511811513607, - "learning_rate": 3.3970709674564918e-06, - "loss": 1.0388, - "step": 3676 - }, - { - "epoch": 0.27634150007515407, - "grad_norm": 1.777887550415379, - "learning_rate": 3.3967225537655492e-06, - "loss": 1.0378, - "step": 3677 - }, - { - "epoch": 0.27641665414098904, - "grad_norm": 2.58015842587117, - "learning_rate": 3.396374057313447e-06, - "loss": 0.983, - "step": 3678 - }, - { - "epoch": 0.276491808206824, - "grad_norm": 1.9953442265580383, - "learning_rate": 3.396025478120835e-06, - "loss": 0.9573, - "step": 3679 - }, - { - "epoch": 0.276566962272659, - "grad_norm": 1.5095648983742185, - "learning_rate": 3.395676816208367e-06, - "loss": 0.9925, - "step": 3680 - }, - { - "epoch": 0.2766421163384939, - "grad_norm": 5.081571284010246, - "learning_rate": 3.3953280715967036e-06, - "loss": 1.0245, - "step": 3681 - }, - { - "epoch": 0.27671727040432886, - "grad_norm": 1.98512175919236, - "learning_rate": 3.394979244306509e-06, - "loss": 0.9049, - "step": 3682 - }, - { - "epoch": 0.27679242447016383, - "grad_norm": 1.6194668982933762, - "learning_rate": 3.3946303343584523e-06, - "loss": 0.9205, - "step": 3683 - }, - { - "epoch": 0.2768675785359988, - "grad_norm": 2.2713219936383746, - "learning_rate": 3.3942813417732083e-06, - "loss": 1.0262, - "step": 3684 - }, - { - "epoch": 0.2769427326018338, - "grad_norm": 1.4577986509068024, - "learning_rate": 3.3939322665714548e-06, - "loss": 1.029, - "step": 3685 - }, - { - "epoch": 0.27701788666766874, - "grad_norm": 1.7917669918744392, - "learning_rate": 3.3935831087738774e-06, - "loss": 0.9814, - "step": 3686 - }, - { - "epoch": 0.27709304073350366, - "grad_norm": 1.6498285002156743, - "learning_rate": 3.3932338684011646e-06, - "loss": 0.9885, - "step": 3687 - }, - { - "epoch": 0.27716819479933863, - "grad_norm": 1.6421567759738505, - "learning_rate": 3.3928845454740097e-06, - "loss": 1.0768, - "step": 3688 - }, - { - "epoch": 0.2772433488651736, - "grad_norm": 1.7134312958425002, - "learning_rate": 3.3925351400131118e-06, - "loss": 1.0747, - "step": 3689 - }, - { - "epoch": 0.27731850293100857, - "grad_norm": 2.036249317522678, - "learning_rate": 3.392185652039175e-06, - "loss": 0.9692, - "step": 3690 - }, - { - "epoch": 0.27739365699684354, - "grad_norm": 2.2556936492155417, - "learning_rate": 3.3918360815729066e-06, - "loss": 1.0572, - "step": 3691 - }, - { - "epoch": 0.2774688110626785, - "grad_norm": 1.7453741515223864, - "learning_rate": 3.391486428635021e-06, - "loss": 1.0323, - "step": 3692 - }, - { - "epoch": 0.2775439651285134, - "grad_norm": 3.036483651127075, - "learning_rate": 3.391136693246236e-06, - "loss": 0.9022, - "step": 3693 - }, - { - "epoch": 0.2776191191943484, - "grad_norm": 1.7236418034910685, - "learning_rate": 3.390786875427275e-06, - "loss": 1.0682, - "step": 3694 - }, - { - "epoch": 0.27769427326018337, - "grad_norm": 1.806626372409997, - "learning_rate": 3.3904369751988657e-06, - "loss": 1.0359, - "step": 3695 - }, - { - "epoch": 0.27776942732601834, - "grad_norm": 1.6298589687534695, - "learning_rate": 3.3900869925817416e-06, - "loss": 1.018, - "step": 3696 - }, - { - "epoch": 0.2778445813918533, - "grad_norm": 2.428624745593116, - "learning_rate": 3.3897369275966404e-06, - "loss": 0.942, - "step": 3697 - }, - { - "epoch": 0.2779197354576883, - "grad_norm": 1.8291136847652893, - "learning_rate": 3.389386780264304e-06, - "loss": 0.9878, - "step": 3698 - }, - { - "epoch": 0.27799488952352325, - "grad_norm": 2.7573370591248527, - "learning_rate": 3.389036550605481e-06, - "loss": 1.0275, - "step": 3699 - }, - { - "epoch": 0.27807004358935816, - "grad_norm": 1.750229071202258, - "learning_rate": 3.3886862386409233e-06, - "loss": 0.9951, - "step": 3700 - }, - { - "epoch": 0.27814519765519313, - "grad_norm": 8.672888894135536, - "learning_rate": 3.3883358443913883e-06, - "loss": 1.0645, - "step": 3701 - }, - { - "epoch": 0.2782203517210281, - "grad_norm": 1.571951016053601, - "learning_rate": 3.387985367877638e-06, - "loss": 1.0176, - "step": 3702 - }, - { - "epoch": 0.2782955057868631, - "grad_norm": 1.6079233529863852, - "learning_rate": 3.38763480912044e-06, - "loss": 0.9546, - "step": 3703 - }, - { - "epoch": 0.27837065985269804, - "grad_norm": 1.9252322886630249, - "learning_rate": 3.3872841681405654e-06, - "loss": 1.0439, - "step": 3704 - }, - { - "epoch": 0.278445813918533, - "grad_norm": 4.601931597636072, - "learning_rate": 3.3869334449587925e-06, - "loss": 1.008, - "step": 3705 - }, - { - "epoch": 0.27852096798436793, - "grad_norm": 1.6280386464361192, - "learning_rate": 3.3865826395959018e-06, - "loss": 0.945, - "step": 3706 - }, - { - "epoch": 0.2785961220502029, - "grad_norm": 7.6072676793583485, - "learning_rate": 3.38623175207268e-06, - "loss": 1.0402, - "step": 3707 - }, - { - "epoch": 0.27867127611603787, - "grad_norm": 2.075767499241421, - "learning_rate": 3.3858807824099182e-06, - "loss": 1.0623, - "step": 3708 - }, - { - "epoch": 0.27874643018187284, - "grad_norm": 1.3695693276786598, - "learning_rate": 3.385529730628414e-06, - "loss": 0.9647, - "step": 3709 - }, - { - "epoch": 0.2788215842477078, - "grad_norm": 1.8412362112032685, - "learning_rate": 3.385178596748967e-06, - "loss": 1.1065, - "step": 3710 - }, - { - "epoch": 0.2788967383135428, - "grad_norm": 1.6273724037464392, - "learning_rate": 3.3848273807923836e-06, - "loss": 1.0165, - "step": 3711 - }, - { - "epoch": 0.27897189237937775, - "grad_norm": 3.7823313278238198, - "learning_rate": 3.384476082779476e-06, - "loss": 1.0313, - "step": 3712 - }, - { - "epoch": 0.27904704644521267, - "grad_norm": 1.7086474974890693, - "learning_rate": 3.3841247027310584e-06, - "loss": 1.064, - "step": 3713 - }, - { - "epoch": 0.27912220051104764, - "grad_norm": 2.00943746751474, - "learning_rate": 3.3837732406679524e-06, - "loss": 1.0034, - "step": 3714 - }, - { - "epoch": 0.2791973545768826, - "grad_norm": 1.600489335930002, - "learning_rate": 3.3834216966109827e-06, - "loss": 0.9856, - "step": 3715 - }, - { - "epoch": 0.2792725086427176, - "grad_norm": 0.6909479680642149, - "learning_rate": 3.3830700705809802e-06, - "loss": 0.8467, - "step": 3716 - }, - { - "epoch": 0.27934766270855255, - "grad_norm": 1.950246902876078, - "learning_rate": 3.38271836259878e-06, - "loss": 1.0705, - "step": 3717 - }, - { - "epoch": 0.2794228167743875, - "grad_norm": 1.4792459314447348, - "learning_rate": 3.382366572685222e-06, - "loss": 0.9518, - "step": 3718 - }, - { - "epoch": 0.27949797084022243, - "grad_norm": 2.81271827698445, - "learning_rate": 3.3820147008611512e-06, - "loss": 1.0434, - "step": 3719 - }, - { - "epoch": 0.2795731249060574, - "grad_norm": 1.7171151448800301, - "learning_rate": 3.3816627471474166e-06, - "loss": 1.0017, - "step": 3720 - }, - { - "epoch": 0.2796482789718924, - "grad_norm": 6.208816646051574, - "learning_rate": 3.381310711564874e-06, - "loss": 0.9187, - "step": 3721 - }, - { - "epoch": 0.27972343303772734, - "grad_norm": 1.7079286638769002, - "learning_rate": 3.380958594134382e-06, - "loss": 1.085, - "step": 3722 - }, - { - "epoch": 0.2797985871035623, - "grad_norm": 2.161111410097931, - "learning_rate": 3.380606394876806e-06, - "loss": 0.9642, - "step": 3723 - }, - { - "epoch": 0.2798737411693973, - "grad_norm": 1.4482981593196114, - "learning_rate": 3.380254113813014e-06, - "loss": 1.0441, - "step": 3724 - }, - { - "epoch": 0.27994889523523225, - "grad_norm": 1.9106162673805536, - "learning_rate": 3.3799017509638805e-06, - "loss": 1.0649, - "step": 3725 - }, - { - "epoch": 0.28002404930106717, - "grad_norm": 1.6394468891351504, - "learning_rate": 3.3795493063502836e-06, - "loss": 0.8628, - "step": 3726 - }, - { - "epoch": 0.28009920336690214, - "grad_norm": 2.430508882589143, - "learning_rate": 3.3791967799931085e-06, - "loss": 1.0017, - "step": 3727 - }, - { - "epoch": 0.2801743574327371, - "grad_norm": 5.26541055757704, - "learning_rate": 3.3788441719132425e-06, - "loss": 0.9573, - "step": 3728 - }, - { - "epoch": 0.2802495114985721, - "grad_norm": 2.0401526566028467, - "learning_rate": 3.37849148213158e-06, - "loss": 1.0091, - "step": 3729 - }, - { - "epoch": 0.28032466556440705, - "grad_norm": 1.6764005925906789, - "learning_rate": 3.3781387106690175e-06, - "loss": 1.0086, - "step": 3730 - }, - { - "epoch": 0.280399819630242, - "grad_norm": 1.9913676650633145, - "learning_rate": 3.37778585754646e-06, - "loss": 1.0184, - "step": 3731 - }, - { - "epoch": 0.28047497369607693, - "grad_norm": 2.6840486765567833, - "learning_rate": 3.3774329227848144e-06, - "loss": 0.8827, - "step": 3732 - }, - { - "epoch": 0.2805501277619119, - "grad_norm": 1.5553366681153256, - "learning_rate": 3.3770799064049927e-06, - "loss": 1.0933, - "step": 3733 - }, - { - "epoch": 0.2806252818277469, - "grad_norm": 1.721584202741625, - "learning_rate": 3.3767268084279143e-06, - "loss": 1.0033, - "step": 3734 - }, - { - "epoch": 0.28070043589358185, - "grad_norm": 2.394936755944074, - "learning_rate": 3.376373628874501e-06, - "loss": 0.918, - "step": 3735 - }, - { - "epoch": 0.2807755899594168, - "grad_norm": 1.8319085248822815, - "learning_rate": 3.3760203677656786e-06, - "loss": 0.9829, - "step": 3736 - }, - { - "epoch": 0.2808507440252518, - "grad_norm": 1.5273218958235077, - "learning_rate": 3.3756670251223813e-06, - "loss": 0.9771, - "step": 3737 - }, - { - "epoch": 0.2809258980910867, - "grad_norm": 2.5234728358956864, - "learning_rate": 3.375313600965544e-06, - "loss": 1.0091, - "step": 3738 - }, - { - "epoch": 0.28100105215692167, - "grad_norm": 1.6053043476214819, - "learning_rate": 3.3749600953161102e-06, - "loss": 1.04, - "step": 3739 - }, - { - "epoch": 0.28107620622275664, - "grad_norm": 1.5239404875031355, - "learning_rate": 3.3746065081950253e-06, - "loss": 1.0698, - "step": 3740 - }, - { - "epoch": 0.2811513602885916, - "grad_norm": 1.9437153705811703, - "learning_rate": 3.374252839623241e-06, - "loss": 1.0233, - "step": 3741 - }, - { - "epoch": 0.2812265143544266, - "grad_norm": 1.6103930910399074, - "learning_rate": 3.373899089621714e-06, - "loss": 1.0631, - "step": 3742 - }, - { - "epoch": 0.28130166842026155, - "grad_norm": 1.5272154302913585, - "learning_rate": 3.3735452582114046e-06, - "loss": 0.9284, - "step": 3743 - }, - { - "epoch": 0.2813768224860965, - "grad_norm": 0.7304986141852277, - "learning_rate": 3.373191345413279e-06, - "loss": 0.8316, - "step": 3744 - }, - { - "epoch": 0.28145197655193144, - "grad_norm": 2.1952270273966525, - "learning_rate": 3.3728373512483083e-06, - "loss": 1.0553, - "step": 3745 - }, - { - "epoch": 0.2815271306177664, - "grad_norm": 2.444918957324709, - "learning_rate": 3.3724832757374674e-06, - "loss": 1.011, - "step": 3746 - }, - { - "epoch": 0.2816022846836014, - "grad_norm": 1.6668389310821308, - "learning_rate": 3.3721291189017363e-06, - "loss": 1.0732, - "step": 3747 - }, - { - "epoch": 0.28167743874943635, - "grad_norm": 1.501739764591935, - "learning_rate": 3.371774880762101e-06, - "loss": 1.061, - "step": 3748 - }, - { - "epoch": 0.2817525928152713, - "grad_norm": 1.605000894136269, - "learning_rate": 3.3714205613395513e-06, - "loss": 1.0554, - "step": 3749 - }, - { - "epoch": 0.2818277468811063, - "grad_norm": 1.5224808256698903, - "learning_rate": 3.371066160655082e-06, - "loss": 0.9762, - "step": 3750 - }, - { - "epoch": 0.2819029009469412, - "grad_norm": 2.0698588324062315, - "learning_rate": 3.3707116787296918e-06, - "loss": 0.9947, - "step": 3751 - }, - { - "epoch": 0.2819780550127762, - "grad_norm": 1.7880584698019983, - "learning_rate": 3.3703571155843866e-06, - "loss": 1.0929, - "step": 3752 - }, - { - "epoch": 0.28205320907861114, - "grad_norm": 1.9979142399502343, - "learning_rate": 3.370002471240174e-06, - "loss": 0.9394, - "step": 3753 - }, - { - "epoch": 0.2821283631444461, - "grad_norm": 1.4399270839244058, - "learning_rate": 3.36964774571807e-06, - "loss": 0.9955, - "step": 3754 - }, - { - "epoch": 0.2822035172102811, - "grad_norm": 2.7647327772701495, - "learning_rate": 3.3692929390390914e-06, - "loss": 0.9758, - "step": 3755 - }, - { - "epoch": 0.28227867127611606, - "grad_norm": 1.69076525940158, - "learning_rate": 3.3689380512242627e-06, - "loss": 0.9851, - "step": 3756 - }, - { - "epoch": 0.282353825341951, - "grad_norm": 1.6262704577907496, - "learning_rate": 3.3685830822946134e-06, - "loss": 0.8576, - "step": 3757 - }, - { - "epoch": 0.28242897940778594, - "grad_norm": 1.5361637759862135, - "learning_rate": 3.3682280322711753e-06, - "loss": 1.0772, - "step": 3758 - }, - { - "epoch": 0.2825041334736209, - "grad_norm": 1.8772117920788656, - "learning_rate": 3.367872901174987e-06, - "loss": 1.0095, - "step": 3759 - }, - { - "epoch": 0.2825792875394559, - "grad_norm": 1.4354007153485213, - "learning_rate": 3.367517689027091e-06, - "loss": 0.9204, - "step": 3760 - }, - { - "epoch": 0.28265444160529085, - "grad_norm": 1.623018083190775, - "learning_rate": 3.3671623958485354e-06, - "loss": 0.9962, - "step": 3761 - }, - { - "epoch": 0.2827295956711258, - "grad_norm": 2.041034782356265, - "learning_rate": 3.3668070216603736e-06, - "loss": 1.0901, - "step": 3762 - }, - { - "epoch": 0.2828047497369608, - "grad_norm": 1.7351553596174831, - "learning_rate": 3.366451566483661e-06, - "loss": 0.9366, - "step": 3763 - }, - { - "epoch": 0.2828799038027957, - "grad_norm": 1.59922426313187, - "learning_rate": 3.366096030339461e-06, - "loss": 1.04, - "step": 3764 - }, - { - "epoch": 0.2829550578686307, - "grad_norm": 1.647638570257468, - "learning_rate": 3.3657404132488403e-06, - "loss": 1.0135, - "step": 3765 - }, - { - "epoch": 0.28303021193446565, - "grad_norm": 1.6361821644755514, - "learning_rate": 3.3653847152328694e-06, - "loss": 1.0689, - "step": 3766 - }, - { - "epoch": 0.2831053660003006, - "grad_norm": 2.4274304077670283, - "learning_rate": 3.3650289363126266e-06, - "loss": 1.0459, - "step": 3767 - }, - { - "epoch": 0.2831805200661356, - "grad_norm": 1.567379846302684, - "learning_rate": 3.3646730765091916e-06, - "loss": 0.987, - "step": 3768 - }, - { - "epoch": 0.28325567413197056, - "grad_norm": 0.679764250006522, - "learning_rate": 3.3643171358436513e-06, - "loss": 0.8566, - "step": 3769 - }, - { - "epoch": 0.28333082819780553, - "grad_norm": 1.4833207251337959, - "learning_rate": 3.3639611143370967e-06, - "loss": 0.9058, - "step": 3770 - }, - { - "epoch": 0.28340598226364044, - "grad_norm": 1.4943511615097838, - "learning_rate": 3.3636050120106233e-06, - "loss": 1.0671, - "step": 3771 - }, - { - "epoch": 0.2834811363294754, - "grad_norm": 2.167964571012301, - "learning_rate": 3.363248828885331e-06, - "loss": 0.9766, - "step": 3772 - }, - { - "epoch": 0.2835562903953104, - "grad_norm": 2.0556728770594868, - "learning_rate": 3.362892564982325e-06, - "loss": 0.9418, - "step": 3773 - }, - { - "epoch": 0.28363144446114535, - "grad_norm": 1.8507666185089717, - "learning_rate": 3.3625362203227167e-06, - "loss": 1.0086, - "step": 3774 - }, - { - "epoch": 0.2837065985269803, - "grad_norm": 1.2609783161274388, - "learning_rate": 3.3621797949276188e-06, - "loss": 1.0016, - "step": 3775 - }, - { - "epoch": 0.2837817525928153, - "grad_norm": 1.6025930671030644, - "learning_rate": 3.3618232888181524e-06, - "loss": 0.9979, - "step": 3776 - }, - { - "epoch": 0.2838569066586502, - "grad_norm": 2.7429568523778034, - "learning_rate": 3.3614667020154415e-06, - "loss": 1.1041, - "step": 3777 - }, - { - "epoch": 0.2839320607244852, - "grad_norm": 1.6606225649091149, - "learning_rate": 3.3611100345406146e-06, - "loss": 0.9378, - "step": 3778 - }, - { - "epoch": 0.28400721479032015, - "grad_norm": 1.7720296371771493, - "learning_rate": 3.3607532864148063e-06, - "loss": 1.0093, - "step": 3779 - }, - { - "epoch": 0.2840823688561551, - "grad_norm": 2.1159650926504976, - "learning_rate": 3.3603964576591553e-06, - "loss": 0.9018, - "step": 3780 - }, - { - "epoch": 0.2841575229219901, - "grad_norm": 2.4247338925680206, - "learning_rate": 3.360039548294805e-06, - "loss": 1.0815, - "step": 3781 - }, - { - "epoch": 0.28423267698782506, - "grad_norm": 1.696229466965496, - "learning_rate": 3.3596825583429033e-06, - "loss": 0.9474, - "step": 3782 - }, - { - "epoch": 0.28430783105366, - "grad_norm": 2.25216157714356, - "learning_rate": 3.3593254878246035e-06, - "loss": 0.9914, - "step": 3783 - }, - { - "epoch": 0.28438298511949495, - "grad_norm": 1.7224482888707755, - "learning_rate": 3.358968336761063e-06, - "loss": 0.9975, - "step": 3784 - }, - { - "epoch": 0.2844581391853299, - "grad_norm": 2.1646271667839705, - "learning_rate": 3.3586111051734455e-06, - "loss": 1.0009, - "step": 3785 - }, - { - "epoch": 0.2845332932511649, - "grad_norm": 1.511690913296478, - "learning_rate": 3.358253793082917e-06, - "loss": 0.9935, - "step": 3786 - }, - { - "epoch": 0.28460844731699986, - "grad_norm": 5.325520077233545, - "learning_rate": 3.3578964005106496e-06, - "loss": 0.9497, - "step": 3787 - }, - { - "epoch": 0.2846836013828348, - "grad_norm": 1.5830564591441882, - "learning_rate": 3.3575389274778214e-06, - "loss": 1.0642, - "step": 3788 - }, - { - "epoch": 0.2847587554486698, - "grad_norm": 2.0910391267666872, - "learning_rate": 3.3571813740056135e-06, - "loss": 1.0725, - "step": 3789 - }, - { - "epoch": 0.2848339095145047, - "grad_norm": 2.0000319478344335, - "learning_rate": 3.356823740115212e-06, - "loss": 0.9867, - "step": 3790 - }, - { - "epoch": 0.2849090635803397, - "grad_norm": 1.8876998593452743, - "learning_rate": 3.3564660258278085e-06, - "loss": 0.9286, - "step": 3791 - }, - { - "epoch": 0.28498421764617465, - "grad_norm": 1.6364434131287762, - "learning_rate": 3.3561082311645982e-06, - "loss": 1.0622, - "step": 3792 - }, - { - "epoch": 0.2850593717120096, - "grad_norm": 2.404776481186764, - "learning_rate": 3.3557503561467832e-06, - "loss": 1.0142, - "step": 3793 - }, - { - "epoch": 0.2851345257778446, - "grad_norm": 1.6879848207631942, - "learning_rate": 3.3553924007955673e-06, - "loss": 0.8955, - "step": 3794 - }, - { - "epoch": 0.28520967984367956, - "grad_norm": 1.8051444792052114, - "learning_rate": 3.355034365132162e-06, - "loss": 0.8917, - "step": 3795 - }, - { - "epoch": 0.2852848339095145, - "grad_norm": 1.3898609076690465, - "learning_rate": 3.354676249177781e-06, - "loss": 1.0656, - "step": 3796 - }, - { - "epoch": 0.28535998797534945, - "grad_norm": 1.7539014242738804, - "learning_rate": 3.354318052953646e-06, - "loss": 0.9934, - "step": 3797 - }, - { - "epoch": 0.2854351420411844, - "grad_norm": 1.7592065327348276, - "learning_rate": 3.3539597764809794e-06, - "loss": 0.9082, - "step": 3798 - }, - { - "epoch": 0.2855102961070194, - "grad_norm": 2.6473312543164234, - "learning_rate": 3.3536014197810115e-06, - "loss": 0.9956, - "step": 3799 - }, - { - "epoch": 0.28558545017285436, - "grad_norm": 1.7358954011634795, - "learning_rate": 3.3532429828749768e-06, - "loss": 0.9336, - "step": 3800 - }, - { - "epoch": 0.28566060423868933, - "grad_norm": 1.6504101272423919, - "learning_rate": 3.3528844657841128e-06, - "loss": 1.0402, - "step": 3801 - }, - { - "epoch": 0.2857357583045243, - "grad_norm": 1.6958019762943888, - "learning_rate": 3.352525868529664e-06, - "loss": 1.0033, - "step": 3802 - }, - { - "epoch": 0.2858109123703592, - "grad_norm": 1.5165252371580018, - "learning_rate": 3.352167191132878e-06, - "loss": 1.0272, - "step": 3803 - }, - { - "epoch": 0.2858860664361942, - "grad_norm": 1.8305095542693801, - "learning_rate": 3.3518084336150084e-06, - "loss": 1.0626, - "step": 3804 - }, - { - "epoch": 0.28596122050202916, - "grad_norm": 2.4701896043352556, - "learning_rate": 3.3514495959973125e-06, - "loss": 0.9613, - "step": 3805 - }, - { - "epoch": 0.2860363745678641, - "grad_norm": 1.7807837428160609, - "learning_rate": 3.3510906783010536e-06, - "loss": 1.0023, - "step": 3806 - }, - { - "epoch": 0.2861115286336991, - "grad_norm": 4.335656179288769, - "learning_rate": 3.3507316805474976e-06, - "loss": 0.8959, - "step": 3807 - }, - { - "epoch": 0.28618668269953407, - "grad_norm": 1.4180644189597242, - "learning_rate": 3.3503726027579175e-06, - "loss": 0.9985, - "step": 3808 - }, - { - "epoch": 0.286261836765369, - "grad_norm": 1.4017426629089607, - "learning_rate": 3.3500134449535894e-06, - "loss": 0.9866, - "step": 3809 - }, - { - "epoch": 0.28633699083120395, - "grad_norm": 2.7457955037020936, - "learning_rate": 3.3496542071557955e-06, - "loss": 0.8859, - "step": 3810 - }, - { - "epoch": 0.2864121448970389, - "grad_norm": 1.437246383187346, - "learning_rate": 3.3492948893858217e-06, - "loss": 0.9656, - "step": 3811 - }, - { - "epoch": 0.2864872989628739, - "grad_norm": 1.9829184525651364, - "learning_rate": 3.3489354916649593e-06, - "loss": 0.9288, - "step": 3812 - }, - { - "epoch": 0.28656245302870886, - "grad_norm": 2.5526234165584816, - "learning_rate": 3.348576014014503e-06, - "loss": 1.0204, - "step": 3813 - }, - { - "epoch": 0.28663760709454383, - "grad_norm": 5.577064049960193, - "learning_rate": 3.3482164564557537e-06, - "loss": 0.9921, - "step": 3814 - }, - { - "epoch": 0.2867127611603788, - "grad_norm": 1.7378611539201183, - "learning_rate": 3.3478568190100173e-06, - "loss": 1.0101, - "step": 3815 - }, - { - "epoch": 0.2867879152262137, - "grad_norm": 11.598440367985972, - "learning_rate": 3.3474971016986024e-06, - "loss": 0.9088, - "step": 3816 - }, - { - "epoch": 0.2868630692920487, - "grad_norm": 1.4467815459618774, - "learning_rate": 3.3471373045428248e-06, - "loss": 1.0776, - "step": 3817 - }, - { - "epoch": 0.28693822335788366, - "grad_norm": 2.2772882628183493, - "learning_rate": 3.346777427564003e-06, - "loss": 0.9164, - "step": 3818 - }, - { - "epoch": 0.28701337742371863, - "grad_norm": 1.8242354790453486, - "learning_rate": 3.3464174707834618e-06, - "loss": 1.0632, - "step": 3819 - }, - { - "epoch": 0.2870885314895536, - "grad_norm": 1.677970540985024, - "learning_rate": 3.34605743422253e-06, - "loss": 0.9836, - "step": 3820 - }, - { - "epoch": 0.28716368555538857, - "grad_norm": 2.216451609607558, - "learning_rate": 3.34569731790254e-06, - "loss": 0.9164, - "step": 3821 - }, - { - "epoch": 0.2872388396212235, - "grad_norm": 1.8752697432719188, - "learning_rate": 3.3453371218448318e-06, - "loss": 1.0545, - "step": 3822 - }, - { - "epoch": 0.28731399368705846, - "grad_norm": 1.8784370708956917, - "learning_rate": 3.3449768460707465e-06, - "loss": 0.9913, - "step": 3823 - }, - { - "epoch": 0.2873891477528934, - "grad_norm": 0.8183428050255087, - "learning_rate": 3.344616490601633e-06, - "loss": 0.8252, - "step": 3824 - }, - { - "epoch": 0.2874643018187284, - "grad_norm": 1.734406479558419, - "learning_rate": 3.3442560554588444e-06, - "loss": 1.0112, - "step": 3825 - }, - { - "epoch": 0.28753945588456337, - "grad_norm": 1.8088890147749357, - "learning_rate": 3.3438955406637365e-06, - "loss": 1.006, - "step": 3826 - }, - { - "epoch": 0.28761460995039834, - "grad_norm": 2.559201979232552, - "learning_rate": 3.3435349462376713e-06, - "loss": 0.9584, - "step": 3827 - }, - { - "epoch": 0.28768976401623325, - "grad_norm": 1.8625501561932036, - "learning_rate": 3.343174272202017e-06, - "loss": 0.9729, - "step": 3828 - }, - { - "epoch": 0.2877649180820682, - "grad_norm": 2.376090652605282, - "learning_rate": 3.3428135185781425e-06, - "loss": 0.9568, - "step": 3829 - }, - { - "epoch": 0.2878400721479032, - "grad_norm": 1.6779153390971824, - "learning_rate": 3.3424526853874252e-06, - "loss": 1.0363, - "step": 3830 - }, - { - "epoch": 0.28791522621373816, - "grad_norm": 1.7877586977830715, - "learning_rate": 3.342091772651246e-06, - "loss": 0.8685, - "step": 3831 - }, - { - "epoch": 0.28799038027957313, - "grad_norm": 1.7846642281510048, - "learning_rate": 3.34173078039099e-06, - "loss": 1.0127, - "step": 3832 - }, - { - "epoch": 0.2880655343454081, - "grad_norm": 1.6447683421682227, - "learning_rate": 3.341369708628047e-06, - "loss": 1.1322, - "step": 3833 - }, - { - "epoch": 0.2881406884112431, - "grad_norm": 0.7101421308635154, - "learning_rate": 3.341008557383813e-06, - "loss": 0.8093, - "step": 3834 - }, - { - "epoch": 0.288215842477078, - "grad_norm": 0.8075901382141747, - "learning_rate": 3.3406473266796865e-06, - "loss": 0.8197, - "step": 3835 - }, - { - "epoch": 0.28829099654291296, - "grad_norm": 1.7195464629661976, - "learning_rate": 3.3402860165370724e-06, - "loss": 1.0062, - "step": 3836 - }, - { - "epoch": 0.28836615060874793, - "grad_norm": 1.6508817946310876, - "learning_rate": 3.3399246269773796e-06, - "loss": 1.0362, - "step": 3837 - }, - { - "epoch": 0.2884413046745829, - "grad_norm": 2.0461747231033427, - "learning_rate": 3.3395631580220213e-06, - "loss": 0.9522, - "step": 3838 - }, - { - "epoch": 0.28851645874041787, - "grad_norm": 1.5166587843358266, - "learning_rate": 3.3392016096924168e-06, - "loss": 1.0, - "step": 3839 - }, - { - "epoch": 0.28859161280625284, - "grad_norm": 2.119466083338932, - "learning_rate": 3.3388399820099887e-06, - "loss": 1.0097, - "step": 3840 - }, - { - "epoch": 0.28866676687208775, - "grad_norm": 1.7832318707893204, - "learning_rate": 3.3384782749961646e-06, - "loss": 1.0334, - "step": 3841 - }, - { - "epoch": 0.2887419209379227, - "grad_norm": 2.0641664650641927, - "learning_rate": 3.3381164886723777e-06, - "loss": 0.98, - "step": 3842 - }, - { - "epoch": 0.2888170750037577, - "grad_norm": 1.9425437250702529, - "learning_rate": 3.337754623060065e-06, - "loss": 1.014, - "step": 3843 - }, - { - "epoch": 0.28889222906959267, - "grad_norm": 2.360946435109871, - "learning_rate": 3.337392678180668e-06, - "loss": 1.0667, - "step": 3844 - }, - { - "epoch": 0.28896738313542764, - "grad_norm": 1.6513029387172602, - "learning_rate": 3.3370306540556336e-06, - "loss": 1.0718, - "step": 3845 - }, - { - "epoch": 0.2890425372012626, - "grad_norm": 1.4647534151833406, - "learning_rate": 3.336668550706413e-06, - "loss": 1.0367, - "step": 3846 - }, - { - "epoch": 0.2891176912670976, - "grad_norm": 2.303634765193626, - "learning_rate": 3.3363063681544628e-06, - "loss": 0.9851, - "step": 3847 - }, - { - "epoch": 0.2891928453329325, - "grad_norm": 2.253880227928748, - "learning_rate": 3.335944106421243e-06, - "loss": 1.0013, - "step": 3848 - }, - { - "epoch": 0.28926799939876746, - "grad_norm": 1.8570599930480278, - "learning_rate": 3.3355817655282188e-06, - "loss": 1.0049, - "step": 3849 - }, - { - "epoch": 0.28934315346460243, - "grad_norm": 6.23978688242914, - "learning_rate": 3.3352193454968607e-06, - "loss": 1.0006, - "step": 3850 - }, - { - "epoch": 0.2894183075304374, - "grad_norm": 1.4660083261334094, - "learning_rate": 3.334856846348644e-06, - "loss": 0.9833, - "step": 3851 - }, - { - "epoch": 0.2894934615962724, - "grad_norm": 1.7423700485044193, - "learning_rate": 3.3344942681050477e-06, - "loss": 0.8087, - "step": 3852 - }, - { - "epoch": 0.28956861566210734, - "grad_norm": 2.3231421277120643, - "learning_rate": 3.3341316107875552e-06, - "loss": 1.0152, - "step": 3853 - }, - { - "epoch": 0.28964376972794226, - "grad_norm": 1.5695555271608541, - "learning_rate": 3.3337688744176564e-06, - "loss": 1.0394, - "step": 3854 - }, - { - "epoch": 0.2897189237937772, - "grad_norm": 2.075967457451926, - "learning_rate": 3.3334060590168447e-06, - "loss": 0.9859, - "step": 3855 - }, - { - "epoch": 0.2897940778596122, - "grad_norm": 1.7537663665247065, - "learning_rate": 3.333043164606618e-06, - "loss": 1.0712, - "step": 3856 - }, - { - "epoch": 0.28986923192544717, - "grad_norm": 1.8445814165453376, - "learning_rate": 3.332680191208479e-06, - "loss": 0.925, - "step": 3857 - }, - { - "epoch": 0.28994438599128214, - "grad_norm": 2.6956717873644913, - "learning_rate": 3.3323171388439353e-06, - "loss": 1.0629, - "step": 3858 - }, - { - "epoch": 0.2900195400571171, - "grad_norm": 1.5467657956194962, - "learning_rate": 3.3319540075344996e-06, - "loss": 0.9069, - "step": 3859 - }, - { - "epoch": 0.2900946941229521, - "grad_norm": 2.448497319175432, - "learning_rate": 3.331590797301689e-06, - "loss": 0.9316, - "step": 3860 - }, - { - "epoch": 0.290169848188787, - "grad_norm": 2.024661014582072, - "learning_rate": 3.331227508167024e-06, - "loss": 0.9956, - "step": 3861 - }, - { - "epoch": 0.29024500225462196, - "grad_norm": 1.8751349718469421, - "learning_rate": 3.330864140152032e-06, - "loss": 1.1142, - "step": 3862 - }, - { - "epoch": 0.29032015632045693, - "grad_norm": 1.4406451772075344, - "learning_rate": 3.3305006932782435e-06, - "loss": 1.0496, - "step": 3863 - }, - { - "epoch": 0.2903953103862919, - "grad_norm": 2.2482916386355214, - "learning_rate": 3.3301371675671935e-06, - "loss": 0.9331, - "step": 3864 - }, - { - "epoch": 0.2904704644521269, - "grad_norm": 0.7402575647752823, - "learning_rate": 3.329773563040423e-06, - "loss": 0.8431, - "step": 3865 - }, - { - "epoch": 0.29054561851796185, - "grad_norm": 1.8559078299351834, - "learning_rate": 3.3294098797194776e-06, - "loss": 0.9601, - "step": 3866 - }, - { - "epoch": 0.29062077258379676, - "grad_norm": 1.6559299663625013, - "learning_rate": 3.3290461176259054e-06, - "loss": 0.9783, - "step": 3867 - }, - { - "epoch": 0.29069592664963173, - "grad_norm": 0.7588111353821797, - "learning_rate": 3.3286822767812618e-06, - "loss": 0.8517, - "step": 3868 - }, - { - "epoch": 0.2907710807154667, - "grad_norm": 2.05228251411964, - "learning_rate": 3.3283183572071054e-06, - "loss": 0.9957, - "step": 3869 - }, - { - "epoch": 0.29084623478130167, - "grad_norm": 2.1650103204494404, - "learning_rate": 3.3279543589249998e-06, - "loss": 1.0549, - "step": 3870 - }, - { - "epoch": 0.29092138884713664, - "grad_norm": 5.291110612784771, - "learning_rate": 3.3275902819565127e-06, - "loss": 0.9525, - "step": 3871 - }, - { - "epoch": 0.2909965429129716, - "grad_norm": 2.530582198602823, - "learning_rate": 3.3272261263232195e-06, - "loss": 0.9101, - "step": 3872 - }, - { - "epoch": 0.2910716969788065, - "grad_norm": 2.42067152085479, - "learning_rate": 3.326861892046694e-06, - "loss": 0.9705, - "step": 3873 - }, - { - "epoch": 0.2911468510446415, - "grad_norm": 1.7285583145990777, - "learning_rate": 3.3264975791485218e-06, - "loss": 1.0272, - "step": 3874 - }, - { - "epoch": 0.29122200511047647, - "grad_norm": 2.176111055920114, - "learning_rate": 3.3261331876502884e-06, - "loss": 0.9421, - "step": 3875 - }, - { - "epoch": 0.29129715917631144, - "grad_norm": 2.448495079585548, - "learning_rate": 3.325768717573585e-06, - "loss": 1.0344, - "step": 3876 - }, - { - "epoch": 0.2913723132421464, - "grad_norm": 1.8125904981448082, - "learning_rate": 3.325404168940009e-06, - "loss": 1.0144, - "step": 3877 - }, - { - "epoch": 0.2914474673079814, - "grad_norm": 1.4962714266537536, - "learning_rate": 3.3250395417711605e-06, - "loss": 1.0534, - "step": 3878 - }, - { - "epoch": 0.29152262137381635, - "grad_norm": 2.131151607624999, - "learning_rate": 3.3246748360886453e-06, - "loss": 1.0656, - "step": 3879 - }, - { - "epoch": 0.29159777543965126, - "grad_norm": 1.4349305782738522, - "learning_rate": 3.324310051914073e-06, - "loss": 0.9974, - "step": 3880 - }, - { - "epoch": 0.29167292950548623, - "grad_norm": 1.3536774949992814, - "learning_rate": 3.323945189269059e-06, - "loss": 1.0013, - "step": 3881 - }, - { - "epoch": 0.2917480835713212, - "grad_norm": 2.074594697090505, - "learning_rate": 3.323580248175223e-06, - "loss": 0.9368, - "step": 3882 - }, - { - "epoch": 0.2918232376371562, - "grad_norm": 2.0045429132996295, - "learning_rate": 3.3232152286541898e-06, - "loss": 0.9823, - "step": 3883 - }, - { - "epoch": 0.29189839170299114, - "grad_norm": 3.134377714024716, - "learning_rate": 3.3228501307275866e-06, - "loss": 0.9491, - "step": 3884 - }, - { - "epoch": 0.2919735457688261, - "grad_norm": 1.613951019839108, - "learning_rate": 3.3224849544170475e-06, - "loss": 0.9298, - "step": 3885 - }, - { - "epoch": 0.29204869983466103, - "grad_norm": 2.742591762902728, - "learning_rate": 3.3221196997442107e-06, - "loss": 0.9214, - "step": 3886 - }, - { - "epoch": 0.292123853900496, - "grad_norm": 1.8794341743472545, - "learning_rate": 3.3217543667307196e-06, - "loss": 0.9755, - "step": 3887 - }, - { - "epoch": 0.29219900796633097, - "grad_norm": 3.4098558850752654, - "learning_rate": 3.3213889553982206e-06, - "loss": 1.0771, - "step": 3888 - }, - { - "epoch": 0.29227416203216594, - "grad_norm": 0.8014414972953647, - "learning_rate": 3.321023465768366e-06, - "loss": 0.882, - "step": 3889 - }, - { - "epoch": 0.2923493160980009, - "grad_norm": 1.842618109904259, - "learning_rate": 3.320657897862812e-06, - "loss": 1.0186, - "step": 3890 - }, - { - "epoch": 0.2924244701638359, - "grad_norm": 4.124140794454097, - "learning_rate": 3.320292251703221e-06, - "loss": 0.8758, - "step": 3891 - }, - { - "epoch": 0.29249962422967085, - "grad_norm": 1.7664313036606796, - "learning_rate": 3.3199265273112583e-06, - "loss": 0.8938, - "step": 3892 - }, - { - "epoch": 0.29257477829550577, - "grad_norm": 2.3504825786439145, - "learning_rate": 3.3195607247085945e-06, - "loss": 0.873, - "step": 3893 - }, - { - "epoch": 0.29264993236134074, - "grad_norm": 1.5273282179567107, - "learning_rate": 3.319194843916905e-06, - "loss": 0.8679, - "step": 3894 - }, - { - "epoch": 0.2927250864271757, - "grad_norm": 3.0458898272060893, - "learning_rate": 3.3188288849578694e-06, - "loss": 0.9825, - "step": 3895 - }, - { - "epoch": 0.2928002404930107, - "grad_norm": 1.923375915979326, - "learning_rate": 3.318462847853172e-06, - "loss": 1.053, - "step": 3896 - }, - { - "epoch": 0.29287539455884565, - "grad_norm": 0.6907143875893501, - "learning_rate": 3.3180967326245018e-06, - "loss": 0.8178, - "step": 3897 - }, - { - "epoch": 0.2929505486246806, - "grad_norm": 1.70971949870043, - "learning_rate": 3.3177305392935536e-06, - "loss": 1.0306, - "step": 3898 - }, - { - "epoch": 0.29302570269051553, - "grad_norm": 1.7632086769755904, - "learning_rate": 3.317364267882025e-06, - "loss": 1.0179, - "step": 3899 - }, - { - "epoch": 0.2931008567563505, - "grad_norm": 1.854132434086056, - "learning_rate": 3.3169979184116182e-06, - "loss": 1.1048, - "step": 3900 - }, - { - "epoch": 0.2931760108221855, - "grad_norm": 1.96834396534933, - "learning_rate": 3.3166314909040427e-06, - "loss": 0.9902, - "step": 3901 - }, - { - "epoch": 0.29325116488802044, - "grad_norm": 2.320390077058315, - "learning_rate": 3.316264985381009e-06, - "loss": 0.9826, - "step": 3902 - }, - { - "epoch": 0.2933263189538554, - "grad_norm": 1.748619079706351, - "learning_rate": 3.315898401864235e-06, - "loss": 0.9834, - "step": 3903 - }, - { - "epoch": 0.2934014730196904, - "grad_norm": 1.637835210603773, - "learning_rate": 3.315531740375441e-06, - "loss": 0.9368, - "step": 3904 - }, - { - "epoch": 0.29347662708552535, - "grad_norm": 2.107231690148375, - "learning_rate": 3.3151650009363544e-06, - "loss": 0.992, - "step": 3905 - }, - { - "epoch": 0.29355178115136027, - "grad_norm": 1.4001079364448956, - "learning_rate": 3.3147981835687054e-06, - "loss": 0.9743, - "step": 3906 - }, - { - "epoch": 0.29362693521719524, - "grad_norm": 1.3089838613647438, - "learning_rate": 3.314431288294229e-06, - "loss": 1.0415, - "step": 3907 - }, - { - "epoch": 0.2937020892830302, - "grad_norm": 2.146876430372181, - "learning_rate": 3.314064315134666e-06, - "loss": 0.9736, - "step": 3908 - }, - { - "epoch": 0.2937772433488652, - "grad_norm": 1.722927772766014, - "learning_rate": 3.31369726411176e-06, - "loss": 0.9302, - "step": 3909 - }, - { - "epoch": 0.29385239741470015, - "grad_norm": 1.7530582136464692, - "learning_rate": 3.313330135247261e-06, - "loss": 1.0435, - "step": 3910 - }, - { - "epoch": 0.2939275514805351, - "grad_norm": 1.452954928382337, - "learning_rate": 3.312962928562922e-06, - "loss": 0.9183, - "step": 3911 - }, - { - "epoch": 0.29400270554637004, - "grad_norm": 1.6950454853222674, - "learning_rate": 3.312595644080502e-06, - "loss": 0.862, - "step": 3912 - }, - { - "epoch": 0.294077859612205, - "grad_norm": 1.8255866270593542, - "learning_rate": 3.312228281821764e-06, - "loss": 0.9577, - "step": 3913 - }, - { - "epoch": 0.29415301367804, - "grad_norm": 2.5527340948079895, - "learning_rate": 3.311860841808475e-06, - "loss": 1.013, - "step": 3914 - }, - { - "epoch": 0.29422816774387495, - "grad_norm": 1.6864530353176252, - "learning_rate": 3.311493324062408e-06, - "loss": 0.93, - "step": 3915 - }, - { - "epoch": 0.2943033218097099, - "grad_norm": 2.123126606641627, - "learning_rate": 3.3111257286053394e-06, - "loss": 0.9448, - "step": 3916 - }, - { - "epoch": 0.2943784758755449, - "grad_norm": 2.788962055182064, - "learning_rate": 3.310758055459051e-06, - "loss": 0.9434, - "step": 3917 - }, - { - "epoch": 0.2944536299413798, - "grad_norm": 2.015500323557467, - "learning_rate": 3.3103903046453282e-06, - "loss": 1.0387, - "step": 3918 - }, - { - "epoch": 0.2945287840072148, - "grad_norm": 1.783301995201109, - "learning_rate": 3.3100224761859626e-06, - "loss": 1.0203, - "step": 3919 - }, - { - "epoch": 0.29460393807304974, - "grad_norm": 1.6945830854733526, - "learning_rate": 3.309654570102748e-06, - "loss": 1.0235, - "step": 3920 - }, - { - "epoch": 0.2946790921388847, - "grad_norm": 1.6566000514513906, - "learning_rate": 3.309286586417486e-06, - "loss": 0.9415, - "step": 3921 - }, - { - "epoch": 0.2947542462047197, - "grad_norm": 7.3814544881973045, - "learning_rate": 3.3089185251519797e-06, - "loss": 0.9309, - "step": 3922 - }, - { - "epoch": 0.29482940027055465, - "grad_norm": 1.6160750686375691, - "learning_rate": 3.3085503863280387e-06, - "loss": 0.9836, - "step": 3923 - }, - { - "epoch": 0.2949045543363896, - "grad_norm": 2.916089373177859, - "learning_rate": 3.3081821699674763e-06, - "loss": 0.9819, - "step": 3924 - }, - { - "epoch": 0.29497970840222454, - "grad_norm": 2.220409256439624, - "learning_rate": 3.307813876092111e-06, - "loss": 0.9832, - "step": 3925 - }, - { - "epoch": 0.2950548624680595, - "grad_norm": 1.904707925888829, - "learning_rate": 3.307445504723766e-06, - "loss": 1.0683, - "step": 3926 - }, - { - "epoch": 0.2951300165338945, - "grad_norm": 0.7347280181394954, - "learning_rate": 3.307077055884268e-06, - "loss": 0.8807, - "step": 3927 - }, - { - "epoch": 0.29520517059972945, - "grad_norm": 1.6672882192660978, - "learning_rate": 3.3067085295954497e-06, - "loss": 1.0584, - "step": 3928 - }, - { - "epoch": 0.2952803246655644, - "grad_norm": 2.6307266983527158, - "learning_rate": 3.306339925879147e-06, - "loss": 0.9448, - "step": 3929 - }, - { - "epoch": 0.2953554787313994, - "grad_norm": 1.9145120520980754, - "learning_rate": 3.305971244757201e-06, - "loss": 1.0078, - "step": 3930 - }, - { - "epoch": 0.2954306327972343, - "grad_norm": 1.9367349406216148, - "learning_rate": 3.305602486251458e-06, - "loss": 1.0138, - "step": 3931 - }, - { - "epoch": 0.2955057868630693, - "grad_norm": 1.707082717084723, - "learning_rate": 3.3052336503837686e-06, - "loss": 0.9268, - "step": 3932 - }, - { - "epoch": 0.29558094092890425, - "grad_norm": 1.9829361873252882, - "learning_rate": 3.304864737175987e-06, - "loss": 1.0059, - "step": 3933 - }, - { - "epoch": 0.2956560949947392, - "grad_norm": 1.251585098424525, - "learning_rate": 3.3044957466499736e-06, - "loss": 0.9068, - "step": 3934 - }, - { - "epoch": 0.2957312490605742, - "grad_norm": 1.6904547219541044, - "learning_rate": 3.3041266788275913e-06, - "loss": 1.0093, - "step": 3935 - }, - { - "epoch": 0.29580640312640916, - "grad_norm": 2.235858451464254, - "learning_rate": 3.303757533730709e-06, - "loss": 1.0508, - "step": 3936 - }, - { - "epoch": 0.2958815571922441, - "grad_norm": 1.4535000224866363, - "learning_rate": 3.3033883113812017e-06, - "loss": 1.0132, - "step": 3937 - }, - { - "epoch": 0.29595671125807904, - "grad_norm": 1.699068610913722, - "learning_rate": 3.303019011800946e-06, - "loss": 1.0095, - "step": 3938 - }, - { - "epoch": 0.296031865323914, - "grad_norm": 1.8018917871302556, - "learning_rate": 3.302649635011823e-06, - "loss": 1.0525, - "step": 3939 - }, - { - "epoch": 0.296107019389749, - "grad_norm": 2.7090052333653096, - "learning_rate": 3.302280181035722e-06, - "loss": 1.0422, - "step": 3940 - }, - { - "epoch": 0.29618217345558395, - "grad_norm": 2.482808416902175, - "learning_rate": 3.301910649894533e-06, - "loss": 0.9917, - "step": 3941 - }, - { - "epoch": 0.2962573275214189, - "grad_norm": 1.7329071982179247, - "learning_rate": 3.3015410416101527e-06, - "loss": 0.9814, - "step": 3942 - }, - { - "epoch": 0.2963324815872539, - "grad_norm": 1.7063709090434402, - "learning_rate": 3.301171356204482e-06, - "loss": 1.0792, - "step": 3943 - }, - { - "epoch": 0.2964076356530888, - "grad_norm": 1.74578083144507, - "learning_rate": 3.300801593699425e-06, - "loss": 0.9182, - "step": 3944 - }, - { - "epoch": 0.2964827897189238, - "grad_norm": 0.6196523290705659, - "learning_rate": 3.300431754116894e-06, - "loss": 0.803, - "step": 3945 - }, - { - "epoch": 0.29655794378475875, - "grad_norm": 2.1932412147115565, - "learning_rate": 3.3000618374788e-06, - "loss": 0.8426, - "step": 3946 - }, - { - "epoch": 0.2966330978505937, - "grad_norm": 1.5169233286624222, - "learning_rate": 3.299691843807065e-06, - "loss": 1.0497, - "step": 3947 - }, - { - "epoch": 0.2967082519164287, - "grad_norm": 4.640256719393335, - "learning_rate": 3.2993217731236118e-06, - "loss": 1.0174, - "step": 3948 - }, - { - "epoch": 0.29678340598226366, - "grad_norm": 2.3146799483394305, - "learning_rate": 3.2989516254503677e-06, - "loss": 0.8307, - "step": 3949 - }, - { - "epoch": 0.29685856004809863, - "grad_norm": 2.2862892406969126, - "learning_rate": 3.298581400809266e-06, - "loss": 0.9089, - "step": 3950 - }, - { - "epoch": 0.29693371411393354, - "grad_norm": 1.845012297343802, - "learning_rate": 3.298211099222243e-06, - "loss": 0.9649, - "step": 3951 - }, - { - "epoch": 0.2970088681797685, - "grad_norm": 1.5954726390064597, - "learning_rate": 3.2978407207112416e-06, - "loss": 0.988, - "step": 3952 - }, - { - "epoch": 0.2970840222456035, - "grad_norm": 2.1422284657284902, - "learning_rate": 3.297470265298208e-06, - "loss": 0.8943, - "step": 3953 - }, - { - "epoch": 0.29715917631143846, - "grad_norm": 2.1123210537753043, - "learning_rate": 3.2970997330050923e-06, - "loss": 0.978, - "step": 3954 - }, - { - "epoch": 0.2972343303772734, - "grad_norm": 2.1669031772151355, - "learning_rate": 3.2967291238538507e-06, - "loss": 0.9416, - "step": 3955 - }, - { - "epoch": 0.2973094844431084, - "grad_norm": 1.9552910962182377, - "learning_rate": 3.296358437866443e-06, - "loss": 0.9745, - "step": 3956 - }, - { - "epoch": 0.2973846385089433, - "grad_norm": 2.312724540737735, - "learning_rate": 3.2959876750648338e-06, - "loss": 1.0085, - "step": 3957 - }, - { - "epoch": 0.2974597925747783, - "grad_norm": 1.7371545461928246, - "learning_rate": 3.2956168354709923e-06, - "loss": 1.0031, - "step": 3958 - }, - { - "epoch": 0.29753494664061325, - "grad_norm": 2.1462784339474297, - "learning_rate": 3.295245919106892e-06, - "loss": 0.932, - "step": 3959 - }, - { - "epoch": 0.2976101007064482, - "grad_norm": 4.220169619726522, - "learning_rate": 3.294874925994511e-06, - "loss": 1.042, - "step": 3960 - }, - { - "epoch": 0.2976852547722832, - "grad_norm": 1.8324214950753044, - "learning_rate": 3.2945038561558324e-06, - "loss": 0.9067, - "step": 3961 - }, - { - "epoch": 0.29776040883811816, - "grad_norm": 1.9016040405650978, - "learning_rate": 3.2941327096128435e-06, - "loss": 1.0212, - "step": 3962 - }, - { - "epoch": 0.2978355629039531, - "grad_norm": 0.7624119660884469, - "learning_rate": 3.2937614863875353e-06, - "loss": 0.9228, - "step": 3963 - }, - { - "epoch": 0.29791071696978805, - "grad_norm": 2.297493883691394, - "learning_rate": 3.293390186501906e-06, - "loss": 0.9358, - "step": 3964 - }, - { - "epoch": 0.297985871035623, - "grad_norm": 2.150305695203523, - "learning_rate": 3.2930188099779546e-06, - "loss": 0.9122, - "step": 3965 - }, - { - "epoch": 0.298061025101458, - "grad_norm": 1.6176224275743485, - "learning_rate": 3.292647356837688e-06, - "loss": 1.0446, - "step": 3966 - }, - { - "epoch": 0.29813617916729296, - "grad_norm": 2.547898724529966, - "learning_rate": 3.2922758271031147e-06, - "loss": 1.0421, - "step": 3967 - }, - { - "epoch": 0.29821133323312793, - "grad_norm": 1.991517496574712, - "learning_rate": 3.2919042207962506e-06, - "loss": 0.9048, - "step": 3968 - }, - { - "epoch": 0.2982864872989629, - "grad_norm": 1.6368561097548397, - "learning_rate": 3.2915325379391147e-06, - "loss": 0.9724, - "step": 3969 - }, - { - "epoch": 0.2983616413647978, - "grad_norm": 1.3201473516524862, - "learning_rate": 3.2911607785537297e-06, - "loss": 1.0234, - "step": 3970 - }, - { - "epoch": 0.2984367954306328, - "grad_norm": 1.4373964189445798, - "learning_rate": 3.290788942662125e-06, - "loss": 1.0608, - "step": 3971 - }, - { - "epoch": 0.29851194949646775, - "grad_norm": 1.6027336490637827, - "learning_rate": 3.290417030286333e-06, - "loss": 1.0267, - "step": 3972 - }, - { - "epoch": 0.2985871035623027, - "grad_norm": 1.860833245343031, - "learning_rate": 3.2900450414483897e-06, - "loss": 0.9534, - "step": 3973 - }, - { - "epoch": 0.2986622576281377, - "grad_norm": 1.6751157065894422, - "learning_rate": 3.2896729761703386e-06, - "loss": 0.9617, - "step": 3974 - }, - { - "epoch": 0.29873741169397267, - "grad_norm": 1.6326785968102167, - "learning_rate": 3.2893008344742244e-06, - "loss": 0.9114, - "step": 3975 - }, - { - "epoch": 0.2988125657598076, - "grad_norm": 1.5905860461428443, - "learning_rate": 3.288928616382099e-06, - "loss": 1.0107, - "step": 3976 - }, - { - "epoch": 0.29888771982564255, - "grad_norm": 3.9130003929069743, - "learning_rate": 3.288556321916018e-06, - "loss": 0.9781, - "step": 3977 - }, - { - "epoch": 0.2989628738914775, - "grad_norm": 2.2808553275888905, - "learning_rate": 3.2881839510980403e-06, - "loss": 1.079, - "step": 3978 - }, - { - "epoch": 0.2990380279573125, - "grad_norm": 1.5049275683085557, - "learning_rate": 3.2878115039502304e-06, - "loss": 0.9384, - "step": 3979 - }, - { - "epoch": 0.29911318202314746, - "grad_norm": 6.401022960448239, - "learning_rate": 3.2874389804946575e-06, - "loss": 1.0056, - "step": 3980 - }, - { - "epoch": 0.29918833608898243, - "grad_norm": 1.4628056296497112, - "learning_rate": 3.287066380753395e-06, - "loss": 1.0612, - "step": 3981 - }, - { - "epoch": 0.2992634901548174, - "grad_norm": 5.366002025477325, - "learning_rate": 3.2866937047485216e-06, - "loss": 1.0597, - "step": 3982 - }, - { - "epoch": 0.2993386442206523, - "grad_norm": 1.7183839581589677, - "learning_rate": 3.2863209525021186e-06, - "loss": 0.976, - "step": 3983 - }, - { - "epoch": 0.2994137982864873, - "grad_norm": 1.5312162123085917, - "learning_rate": 3.285948124036274e-06, - "loss": 1.0061, - "step": 3984 - }, - { - "epoch": 0.29948895235232226, - "grad_norm": 1.7719821419860116, - "learning_rate": 3.2855752193730786e-06, - "loss": 1.0317, - "step": 3985 - }, - { - "epoch": 0.2995641064181572, - "grad_norm": 1.915384700072189, - "learning_rate": 3.2852022385346283e-06, - "loss": 1.0243, - "step": 3986 - }, - { - "epoch": 0.2996392604839922, - "grad_norm": 4.5935517352649535, - "learning_rate": 3.2848291815430245e-06, - "loss": 0.9968, - "step": 3987 - }, - { - "epoch": 0.29971441454982717, - "grad_norm": 1.5172559477179914, - "learning_rate": 3.2844560484203717e-06, - "loss": 1.0473, - "step": 3988 - }, - { - "epoch": 0.2997895686156621, - "grad_norm": 1.5291699664975076, - "learning_rate": 3.2840828391887792e-06, - "loss": 0.8902, - "step": 3989 - }, - { - "epoch": 0.29986472268149705, - "grad_norm": 1.7539004047525788, - "learning_rate": 3.2837095538703613e-06, - "loss": 0.9121, - "step": 3990 - }, - { - "epoch": 0.299939876747332, - "grad_norm": 1.8292833228773109, - "learning_rate": 3.283336192487237e-06, - "loss": 0.8946, - "step": 3991 - }, - { - "epoch": 0.300015030813167, - "grad_norm": 1.4291188928091096, - "learning_rate": 3.282962755061529e-06, - "loss": 0.988, - "step": 3992 - }, - { - "epoch": 0.30009018487900196, - "grad_norm": 1.726797924993248, - "learning_rate": 3.2825892416153656e-06, - "loss": 1.0842, - "step": 3993 - }, - { - "epoch": 0.30016533894483693, - "grad_norm": 1.3647702455516113, - "learning_rate": 3.282215652170877e-06, - "loss": 1.0627, - "step": 3994 - }, - { - "epoch": 0.3002404930106719, - "grad_norm": 1.854759450158009, - "learning_rate": 3.2818419867502024e-06, - "loss": 1.0224, - "step": 3995 - }, - { - "epoch": 0.3003156470765068, - "grad_norm": 1.8478883109628381, - "learning_rate": 3.2814682453754805e-06, - "loss": 1.0119, - "step": 3996 - }, - { - "epoch": 0.3003908011423418, - "grad_norm": 33.13987726054142, - "learning_rate": 3.281094428068858e-06, - "loss": 1.0049, - "step": 3997 - }, - { - "epoch": 0.30046595520817676, - "grad_norm": 1.7988725257057583, - "learning_rate": 3.280720534852486e-06, - "loss": 1.0409, - "step": 3998 - }, - { - "epoch": 0.30054110927401173, - "grad_norm": 0.7873597232071186, - "learning_rate": 3.2803465657485175e-06, - "loss": 0.8571, - "step": 3999 - }, - { - "epoch": 0.3006162633398467, - "grad_norm": 1.5256439662881172, - "learning_rate": 3.279972520779112e-06, - "loss": 0.9943, - "step": 4000 - }, - { - "epoch": 0.30069141740568167, - "grad_norm": 1.783866165751919, - "learning_rate": 3.279598399966433e-06, - "loss": 0.9327, - "step": 4001 - }, - { - "epoch": 0.3007665714715166, - "grad_norm": 2.0312815737104386, - "learning_rate": 3.27922420333265e-06, - "loss": 1.0168, - "step": 4002 - }, - { - "epoch": 0.30084172553735156, - "grad_norm": 1.8039732341134065, - "learning_rate": 3.278849930899934e-06, - "loss": 0.9368, - "step": 4003 - }, - { - "epoch": 0.3009168796031865, - "grad_norm": 1.5695507422414552, - "learning_rate": 3.278475582690462e-06, - "loss": 1.0301, - "step": 4004 - }, - { - "epoch": 0.3009920336690215, - "grad_norm": 4.746801604556742, - "learning_rate": 3.2781011587264173e-06, - "loss": 0.9777, - "step": 4005 - }, - { - "epoch": 0.30106718773485647, - "grad_norm": 0.7761067271429258, - "learning_rate": 3.2777266590299835e-06, - "loss": 0.8784, - "step": 4006 - }, - { - "epoch": 0.30114234180069144, - "grad_norm": 2.7672830557627583, - "learning_rate": 3.277352083623353e-06, - "loss": 0.9491, - "step": 4007 - }, - { - "epoch": 0.30121749586652635, - "grad_norm": 2.2929072981078114, - "learning_rate": 3.2769774325287197e-06, - "loss": 0.9884, - "step": 4008 - }, - { - "epoch": 0.3012926499323613, - "grad_norm": 2.953819717959865, - "learning_rate": 3.2766027057682844e-06, - "loss": 1.068, - "step": 4009 - }, - { - "epoch": 0.3013678039981963, - "grad_norm": 1.6319414215420622, - "learning_rate": 3.27622790336425e-06, - "loss": 0.9703, - "step": 4010 - }, - { - "epoch": 0.30144295806403126, - "grad_norm": 1.7191397485121884, - "learning_rate": 3.2758530253388255e-06, - "loss": 0.96, - "step": 4011 - }, - { - "epoch": 0.30151811212986623, - "grad_norm": 0.7112641474920758, - "learning_rate": 3.2754780717142233e-06, - "loss": 0.8262, - "step": 4012 - }, - { - "epoch": 0.3015932661957012, - "grad_norm": 1.9006076543757215, - "learning_rate": 3.2751030425126616e-06, - "loss": 1.0694, - "step": 4013 - }, - { - "epoch": 0.3016684202615362, - "grad_norm": 1.840815649861469, - "learning_rate": 3.2747279377563616e-06, - "loss": 1.0203, - "step": 4014 - }, - { - "epoch": 0.3017435743273711, - "grad_norm": 3.866208347474821, - "learning_rate": 3.2743527574675507e-06, - "loss": 1.0446, - "step": 4015 - }, - { - "epoch": 0.30181872839320606, - "grad_norm": 1.31615837321656, - "learning_rate": 3.2739775016684584e-06, - "loss": 1.0281, - "step": 4016 - }, - { - "epoch": 0.30189388245904103, - "grad_norm": 2.020446923925472, - "learning_rate": 3.2736021703813214e-06, - "loss": 1.0275, - "step": 4017 - }, - { - "epoch": 0.301969036524876, - "grad_norm": 1.7263275953306623, - "learning_rate": 3.2732267636283782e-06, - "loss": 1.0083, - "step": 4018 - }, - { - "epoch": 0.30204419059071097, - "grad_norm": 0.7339800726422893, - "learning_rate": 3.2728512814318744e-06, - "loss": 0.8945, - "step": 4019 - }, - { - "epoch": 0.30211934465654594, - "grad_norm": 1.5003335899709331, - "learning_rate": 3.2724757238140572e-06, - "loss": 0.9667, - "step": 4020 - }, - { - "epoch": 0.30219449872238086, - "grad_norm": 1.7881066046644913, - "learning_rate": 3.2721000907971813e-06, - "loss": 1.0546, - "step": 4021 - }, - { - "epoch": 0.3022696527882158, - "grad_norm": 1.9211263051169756, - "learning_rate": 3.2717243824035037e-06, - "loss": 0.8966, - "step": 4022 - }, - { - "epoch": 0.3023448068540508, - "grad_norm": 1.9454201051909463, - "learning_rate": 3.2713485986552865e-06, - "loss": 0.9634, - "step": 4023 - }, - { - "epoch": 0.30241996091988577, - "grad_norm": 3.024558478046272, - "learning_rate": 3.2709727395747974e-06, - "loss": 1.01, - "step": 4024 - }, - { - "epoch": 0.30249511498572074, - "grad_norm": 0.6561249205054542, - "learning_rate": 3.2705968051843053e-06, - "loss": 0.7712, - "step": 4025 - }, - { - "epoch": 0.3025702690515557, - "grad_norm": 1.4203650872931128, - "learning_rate": 3.270220795506088e-06, - "loss": 1.0069, - "step": 4026 - }, - { - "epoch": 0.3026454231173907, - "grad_norm": 2.0810383044653693, - "learning_rate": 3.269844710562424e-06, - "loss": 1.047, - "step": 4027 - }, - { - "epoch": 0.3027205771832256, - "grad_norm": 2.5845460403837426, - "learning_rate": 3.269468550375599e-06, - "loss": 0.9497, - "step": 4028 - }, - { - "epoch": 0.30279573124906056, - "grad_norm": 1.6088641939337498, - "learning_rate": 3.2690923149679008e-06, - "loss": 0.961, - "step": 4029 - }, - { - "epoch": 0.30287088531489553, - "grad_norm": 1.5621387827099529, - "learning_rate": 3.268716004361623e-06, - "loss": 0.967, - "step": 4030 - }, - { - "epoch": 0.3029460393807305, - "grad_norm": 2.0657079775212117, - "learning_rate": 3.2683396185790644e-06, - "loss": 1.0353, - "step": 4031 - }, - { - "epoch": 0.3030211934465655, - "grad_norm": 1.4347586822964937, - "learning_rate": 3.2679631576425265e-06, - "loss": 1.0085, - "step": 4032 - }, - { - "epoch": 0.30309634751240044, - "grad_norm": 2.308012242025241, - "learning_rate": 3.267586621574315e-06, - "loss": 0.983, - "step": 4033 - }, - { - "epoch": 0.30317150157823536, - "grad_norm": 1.6816638763273521, - "learning_rate": 3.2672100103967434e-06, - "loss": 0.9335, - "step": 4034 - }, - { - "epoch": 0.30324665564407033, - "grad_norm": 1.733841685185914, - "learning_rate": 3.266833324132126e-06, - "loss": 1.0411, - "step": 4035 - }, - { - "epoch": 0.3033218097099053, - "grad_norm": 2.011681891538859, - "learning_rate": 3.2664565628027833e-06, - "loss": 1.0905, - "step": 4036 - }, - { - "epoch": 0.30339696377574027, - "grad_norm": 4.129252005169021, - "learning_rate": 3.2660797264310393e-06, - "loss": 0.9528, - "step": 4037 - }, - { - "epoch": 0.30347211784157524, - "grad_norm": 1.6501999965027396, - "learning_rate": 3.2657028150392236e-06, - "loss": 1.026, - "step": 4038 - }, - { - "epoch": 0.3035472719074102, - "grad_norm": 4.5691446491336905, - "learning_rate": 3.2653258286496696e-06, - "loss": 1.025, - "step": 4039 - }, - { - "epoch": 0.3036224259732452, - "grad_norm": 2.839706900341335, - "learning_rate": 3.2649487672847143e-06, - "loss": 0.9917, - "step": 4040 - }, - { - "epoch": 0.3036975800390801, - "grad_norm": 1.560602791062533, - "learning_rate": 3.264571630966701e-06, - "loss": 1.0413, - "step": 4041 - }, - { - "epoch": 0.30377273410491507, - "grad_norm": 1.8666107691615317, - "learning_rate": 3.2641944197179767e-06, - "loss": 0.992, - "step": 4042 - }, - { - "epoch": 0.30384788817075004, - "grad_norm": 1.8999682875546577, - "learning_rate": 3.2638171335608914e-06, - "loss": 0.9352, - "step": 4043 - }, - { - "epoch": 0.303923042236585, - "grad_norm": 2.5523619726748956, - "learning_rate": 3.263439772517802e-06, - "loss": 1.0263, - "step": 4044 - }, - { - "epoch": 0.30399819630242, - "grad_norm": 2.755275088524927, - "learning_rate": 3.2630623366110673e-06, - "loss": 0.9314, - "step": 4045 - }, - { - "epoch": 0.30407335036825495, - "grad_norm": 1.777976122692594, - "learning_rate": 3.2626848258630533e-06, - "loss": 0.8741, - "step": 4046 - }, - { - "epoch": 0.30414850443408986, - "grad_norm": 1.6451583717659308, - "learning_rate": 3.2623072402961283e-06, - "loss": 0.8816, - "step": 4047 - }, - { - "epoch": 0.30422365849992483, - "grad_norm": 1.8863159128735012, - "learning_rate": 3.2619295799326657e-06, - "loss": 0.9428, - "step": 4048 - }, - { - "epoch": 0.3042988125657598, - "grad_norm": 2.6897671030753982, - "learning_rate": 3.2615518447950425e-06, - "loss": 1.0373, - "step": 4049 - }, - { - "epoch": 0.3043739666315948, - "grad_norm": 1.6580770959127928, - "learning_rate": 3.2611740349056424e-06, - "loss": 0.9812, - "step": 4050 - }, - { - "epoch": 0.30444912069742974, - "grad_norm": 1.3308162125736465, - "learning_rate": 3.2607961502868507e-06, - "loss": 1.0426, - "step": 4051 - }, - { - "epoch": 0.3045242747632647, - "grad_norm": 1.3976001470232724, - "learning_rate": 3.2604181909610595e-06, - "loss": 1.0026, - "step": 4052 - }, - { - "epoch": 0.3045994288290996, - "grad_norm": 1.461092935544977, - "learning_rate": 3.2600401569506646e-06, - "loss": 0.9565, - "step": 4053 - }, - { - "epoch": 0.3046745828949346, - "grad_norm": 2.1305258093852766, - "learning_rate": 3.2596620482780647e-06, - "loss": 0.9288, - "step": 4054 - }, - { - "epoch": 0.30474973696076957, - "grad_norm": 2.0191305268386244, - "learning_rate": 3.2592838649656648e-06, - "loss": 1.0132, - "step": 4055 - }, - { - "epoch": 0.30482489102660454, - "grad_norm": 1.6529803841979531, - "learning_rate": 3.2589056070358743e-06, - "loss": 1.021, - "step": 4056 - }, - { - "epoch": 0.3049000450924395, - "grad_norm": 1.6583725625898145, - "learning_rate": 3.258527274511105e-06, - "loss": 0.9902, - "step": 4057 - }, - { - "epoch": 0.3049751991582745, - "grad_norm": 1.4771281778534298, - "learning_rate": 3.2581488674137766e-06, - "loss": 1.0726, - "step": 4058 - }, - { - "epoch": 0.30505035322410945, - "grad_norm": 1.6929712257666412, - "learning_rate": 3.2577703857663094e-06, - "loss": 0.9309, - "step": 4059 - }, - { - "epoch": 0.30512550728994436, - "grad_norm": 1.7680374813163198, - "learning_rate": 3.2573918295911306e-06, - "loss": 0.9949, - "step": 4060 - }, - { - "epoch": 0.30520066135577933, - "grad_norm": 1.9035011859285886, - "learning_rate": 3.257013198910671e-06, - "loss": 0.9535, - "step": 4061 - }, - { - "epoch": 0.3052758154216143, - "grad_norm": 2.102387787390646, - "learning_rate": 3.256634493747366e-06, - "loss": 1.0407, - "step": 4062 - }, - { - "epoch": 0.3053509694874493, - "grad_norm": 2.1958772740561034, - "learning_rate": 3.256255714123655e-06, - "loss": 0.9739, - "step": 4063 - }, - { - "epoch": 0.30542612355328425, - "grad_norm": 1.9354506082778578, - "learning_rate": 3.255876860061983e-06, - "loss": 0.978, - "step": 4064 - }, - { - "epoch": 0.3055012776191192, - "grad_norm": 1.5040598605093354, - "learning_rate": 3.2554979315847977e-06, - "loss": 1.0039, - "step": 4065 - }, - { - "epoch": 0.30557643168495413, - "grad_norm": 2.2393312770739047, - "learning_rate": 3.255118928714552e-06, - "loss": 0.8915, - "step": 4066 - }, - { - "epoch": 0.3056515857507891, - "grad_norm": 2.273505350784908, - "learning_rate": 3.2547398514737038e-06, - "loss": 0.9247, - "step": 4067 - }, - { - "epoch": 0.30572673981662407, - "grad_norm": 1.6882760418092677, - "learning_rate": 3.2543606998847145e-06, - "loss": 0.9531, - "step": 4068 - }, - { - "epoch": 0.30580189388245904, - "grad_norm": 1.9210362038325948, - "learning_rate": 3.253981473970051e-06, - "loss": 0.9764, - "step": 4069 - }, - { - "epoch": 0.305877047948294, - "grad_norm": 1.6632343075973974, - "learning_rate": 3.253602173752183e-06, - "loss": 0.9331, - "step": 4070 - }, - { - "epoch": 0.305952202014129, - "grad_norm": 1.9506912937895586, - "learning_rate": 3.253222799253586e-06, - "loss": 0.9562, - "step": 4071 - }, - { - "epoch": 0.30602735607996395, - "grad_norm": 1.8263058210402037, - "learning_rate": 3.2528433504967394e-06, - "loss": 0.8989, - "step": 4072 - }, - { - "epoch": 0.30610251014579887, - "grad_norm": 2.2447525929696126, - "learning_rate": 3.252463827504126e-06, - "loss": 1.0498, - "step": 4073 - }, - { - "epoch": 0.30617766421163384, - "grad_norm": 1.6247882705081031, - "learning_rate": 3.2520842302982356e-06, - "loss": 0.975, - "step": 4074 - }, - { - "epoch": 0.3062528182774688, - "grad_norm": 1.9962991806593449, - "learning_rate": 3.2517045589015602e-06, - "loss": 0.9541, - "step": 4075 - }, - { - "epoch": 0.3063279723433038, - "grad_norm": 1.5293878404010466, - "learning_rate": 3.251324813336596e-06, - "loss": 0.9382, - "step": 4076 - }, - { - "epoch": 0.30640312640913875, - "grad_norm": 1.3144830527448306, - "learning_rate": 3.2509449936258452e-06, - "loss": 0.9631, - "step": 4077 - }, - { - "epoch": 0.3064782804749737, - "grad_norm": 1.7416143458172002, - "learning_rate": 3.2505650997918127e-06, - "loss": 1.0781, - "step": 4078 - }, - { - "epoch": 0.30655343454080863, - "grad_norm": 1.4110391235929416, - "learning_rate": 3.2501851318570103e-06, - "loss": 0.9194, - "step": 4079 - }, - { - "epoch": 0.3066285886066436, - "grad_norm": 1.6632858398144825, - "learning_rate": 3.249805089843951e-06, - "loss": 1.0318, - "step": 4080 - }, - { - "epoch": 0.3067037426724786, - "grad_norm": 1.4852330990112081, - "learning_rate": 3.249424973775155e-06, - "loss": 0.9549, - "step": 4081 - }, - { - "epoch": 0.30677889673831354, - "grad_norm": 1.473868440328913, - "learning_rate": 3.249044783673144e-06, - "loss": 0.9191, - "step": 4082 - }, - { - "epoch": 0.3068540508041485, - "grad_norm": 1.96495835442128, - "learning_rate": 3.2486645195604466e-06, - "loss": 1.0182, - "step": 4083 - }, - { - "epoch": 0.3069292048699835, - "grad_norm": 1.9388800597452853, - "learning_rate": 3.2482841814595954e-06, - "loss": 0.9863, - "step": 4084 - }, - { - "epoch": 0.30700435893581846, - "grad_norm": 1.8876467489832793, - "learning_rate": 3.247903769393127e-06, - "loss": 0.9927, - "step": 4085 - }, - { - "epoch": 0.30707951300165337, - "grad_norm": 5.558336735866864, - "learning_rate": 3.247523283383581e-06, - "loss": 1.0372, - "step": 4086 - }, - { - "epoch": 0.30715466706748834, - "grad_norm": 1.4050604133242608, - "learning_rate": 3.2471427234535034e-06, - "loss": 1.0531, - "step": 4087 - }, - { - "epoch": 0.3072298211333233, - "grad_norm": 2.488122093584193, - "learning_rate": 3.246762089625444e-06, - "loss": 0.8962, - "step": 4088 - }, - { - "epoch": 0.3073049751991583, - "grad_norm": 1.9079281658629446, - "learning_rate": 3.2463813819219565e-06, - "loss": 1.0918, - "step": 4089 - }, - { - "epoch": 0.30738012926499325, - "grad_norm": 1.6976860061333905, - "learning_rate": 3.2460006003655993e-06, - "loss": 1.1196, - "step": 4090 - }, - { - "epoch": 0.3074552833308282, - "grad_norm": 1.7197156101031845, - "learning_rate": 3.245619744978936e-06, - "loss": 0.9133, - "step": 4091 - }, - { - "epoch": 0.30753043739666314, - "grad_norm": 1.6011719018024593, - "learning_rate": 3.2452388157845322e-06, - "loss": 0.886, - "step": 4092 - }, - { - "epoch": 0.3076055914624981, - "grad_norm": 1.6390077885459564, - "learning_rate": 3.2448578128049607e-06, - "loss": 1.1377, - "step": 4093 - }, - { - "epoch": 0.3076807455283331, - "grad_norm": 0.874822632660275, - "learning_rate": 3.2444767360627964e-06, - "loss": 0.9185, - "step": 4094 - }, - { - "epoch": 0.30775589959416805, - "grad_norm": 2.2811197740147007, - "learning_rate": 3.2440955855806203e-06, - "loss": 0.953, - "step": 4095 - }, - { - "epoch": 0.307831053660003, - "grad_norm": 1.5406999736130997, - "learning_rate": 3.2437143613810173e-06, - "loss": 0.9833, - "step": 4096 - }, - { - "epoch": 0.307906207725838, - "grad_norm": 2.08418857821732, - "learning_rate": 3.2433330634865757e-06, - "loss": 1.0714, - "step": 4097 - }, - { - "epoch": 0.3079813617916729, - "grad_norm": 3.139327507168633, - "learning_rate": 3.242951691919888e-06, - "loss": 0.8725, - "step": 4098 - }, - { - "epoch": 0.3080565158575079, - "grad_norm": 1.9177196830428849, - "learning_rate": 3.242570246703554e-06, - "loss": 0.8861, - "step": 4099 - }, - { - "epoch": 0.30813166992334284, - "grad_norm": 1.998657849103849, - "learning_rate": 3.242188727860174e-06, - "loss": 0.9634, - "step": 4100 - }, - { - "epoch": 0.3082068239891778, - "grad_norm": 1.5932075474912033, - "learning_rate": 3.2418071354123566e-06, - "loss": 0.9304, - "step": 4101 - }, - { - "epoch": 0.3082819780550128, - "grad_norm": 1.6614427316664573, - "learning_rate": 3.2414254693827098e-06, - "loss": 1.0025, - "step": 4102 - }, - { - "epoch": 0.30835713212084775, - "grad_norm": 1.508500253633725, - "learning_rate": 3.2410437297938512e-06, - "loss": 0.9554, - "step": 4103 - }, - { - "epoch": 0.3084322861866827, - "grad_norm": 1.6364922196028175, - "learning_rate": 3.240661916668399e-06, - "loss": 0.9558, - "step": 4104 - }, - { - "epoch": 0.30850744025251764, - "grad_norm": 0.8636502474143432, - "learning_rate": 3.2402800300289773e-06, - "loss": 0.9502, - "step": 4105 - }, - { - "epoch": 0.3085825943183526, - "grad_norm": 6.036553931500979, - "learning_rate": 3.2398980698982143e-06, - "loss": 0.8533, - "step": 4106 - }, - { - "epoch": 0.3086577483841876, - "grad_norm": 2.0829518413937644, - "learning_rate": 3.2395160362987432e-06, - "loss": 1.0253, - "step": 4107 - }, - { - "epoch": 0.30873290245002255, - "grad_norm": 1.6328060752340705, - "learning_rate": 3.2391339292532004e-06, - "loss": 1.027, - "step": 4108 - }, - { - "epoch": 0.3088080565158575, - "grad_norm": 1.9389550682634151, - "learning_rate": 3.2387517487842273e-06, - "loss": 1.0934, - "step": 4109 - }, - { - "epoch": 0.3088832105816925, - "grad_norm": 1.1937311720237083, - "learning_rate": 3.2383694949144693e-06, - "loss": 1.0038, - "step": 4110 - }, - { - "epoch": 0.3089583646475274, - "grad_norm": 1.5825541821163995, - "learning_rate": 3.2379871676665767e-06, - "loss": 1.0744, - "step": 4111 - }, - { - "epoch": 0.3090335187133624, - "grad_norm": 1.5647444055021162, - "learning_rate": 3.237604767063204e-06, - "loss": 1.0186, - "step": 4112 - }, - { - "epoch": 0.30910867277919735, - "grad_norm": 1.5960911777380382, - "learning_rate": 3.23722229312701e-06, - "loss": 0.9921, - "step": 4113 - }, - { - "epoch": 0.3091838268450323, - "grad_norm": 1.5660443351627804, - "learning_rate": 3.2368397458806573e-06, - "loss": 0.9582, - "step": 4114 - }, - { - "epoch": 0.3092589809108673, - "grad_norm": 1.6874931476595179, - "learning_rate": 3.236457125346814e-06, - "loss": 0.9442, - "step": 4115 - }, - { - "epoch": 0.30933413497670226, - "grad_norm": 1.757621246105993, - "learning_rate": 3.23607443154815e-06, - "loss": 0.978, - "step": 4116 - }, - { - "epoch": 0.3094092890425372, - "grad_norm": 2.059058352310049, - "learning_rate": 3.2356916645073444e-06, - "loss": 1.0197, - "step": 4117 - }, - { - "epoch": 0.30948444310837214, - "grad_norm": 1.4262965277154482, - "learning_rate": 3.2353088242470744e-06, - "loss": 0.8437, - "step": 4118 - }, - { - "epoch": 0.3095595971742071, - "grad_norm": 1.9476982715050426, - "learning_rate": 3.2349259107900267e-06, - "loss": 0.9753, - "step": 4119 - }, - { - "epoch": 0.3096347512400421, - "grad_norm": 5.046447546841003, - "learning_rate": 3.2345429241588902e-06, - "loss": 0.966, - "step": 4120 - }, - { - "epoch": 0.30970990530587705, - "grad_norm": 2.3003480564971626, - "learning_rate": 3.234159864376358e-06, - "loss": 0.9796, - "step": 4121 - }, - { - "epoch": 0.309785059371712, - "grad_norm": 2.51080884811595, - "learning_rate": 3.233776731465128e-06, - "loss": 0.9072, - "step": 4122 - }, - { - "epoch": 0.309860213437547, - "grad_norm": 2.397143583406416, - "learning_rate": 3.233393525447902e-06, - "loss": 0.9256, - "step": 4123 - }, - { - "epoch": 0.3099353675033819, - "grad_norm": 1.7070110677752508, - "learning_rate": 3.2330102463473867e-06, - "loss": 1.0082, - "step": 4124 - }, - { - "epoch": 0.3100105215692169, - "grad_norm": 2.442020625821673, - "learning_rate": 3.2326268941862927e-06, - "loss": 1.0162, - "step": 4125 - }, - { - "epoch": 0.31008567563505185, - "grad_norm": 1.616889004641156, - "learning_rate": 3.2322434689873353e-06, - "loss": 1.0718, - "step": 4126 - }, - { - "epoch": 0.3101608297008868, - "grad_norm": 1.5070877309827666, - "learning_rate": 3.231859970773234e-06, - "loss": 1.0279, - "step": 4127 - }, - { - "epoch": 0.3102359837667218, - "grad_norm": 1.9287379472990094, - "learning_rate": 3.231476399566712e-06, - "loss": 0.9454, - "step": 4128 - }, - { - "epoch": 0.31031113783255676, - "grad_norm": 1.9302971992547058, - "learning_rate": 3.231092755390498e-06, - "loss": 1.0924, - "step": 4129 - }, - { - "epoch": 0.31038629189839173, - "grad_norm": 0.7208886463427433, - "learning_rate": 3.230709038267324e-06, - "loss": 0.7892, - "step": 4130 - }, - { - "epoch": 0.31046144596422665, - "grad_norm": 1.9479890357733132, - "learning_rate": 3.2303252482199265e-06, - "loss": 0.9639, - "step": 4131 - }, - { - "epoch": 0.3105366000300616, - "grad_norm": 1.7535393210016912, - "learning_rate": 3.2299413852710466e-06, - "loss": 0.9072, - "step": 4132 - }, - { - "epoch": 0.3106117540958966, - "grad_norm": 1.814290116361635, - "learning_rate": 3.2295574494434307e-06, - "loss": 0.9934, - "step": 4133 - }, - { - "epoch": 0.31068690816173156, - "grad_norm": 1.864499507561289, - "learning_rate": 3.229173440759827e-06, - "loss": 1.0445, - "step": 4134 - }, - { - "epoch": 0.3107620622275665, - "grad_norm": 2.246046301299095, - "learning_rate": 3.22878935924299e-06, - "loss": 1.0283, - "step": 4135 - }, - { - "epoch": 0.3108372162934015, - "grad_norm": 2.560513540555806, - "learning_rate": 3.2284052049156783e-06, - "loss": 0.9539, - "step": 4136 - }, - { - "epoch": 0.3109123703592364, - "grad_norm": 2.9607511678157232, - "learning_rate": 3.2280209778006545e-06, - "loss": 1.0205, - "step": 4137 - }, - { - "epoch": 0.3109875244250714, - "grad_norm": 3.118416527175754, - "learning_rate": 3.227636677920685e-06, - "loss": 1.0458, - "step": 4138 - }, - { - "epoch": 0.31106267849090635, - "grad_norm": 1.0316174748288502, - "learning_rate": 3.227252305298542e-06, - "loss": 0.9704, - "step": 4139 - }, - { - "epoch": 0.3111378325567413, - "grad_norm": 1.7126141698049504, - "learning_rate": 3.2268678599570002e-06, - "loss": 1.0619, - "step": 4140 - }, - { - "epoch": 0.3112129866225763, - "grad_norm": 3.853765449604317, - "learning_rate": 3.2264833419188397e-06, - "loss": 1.008, - "step": 4141 - }, - { - "epoch": 0.31128814068841126, - "grad_norm": 3.0176271107472177, - "learning_rate": 3.2260987512068443e-06, - "loss": 0.8468, - "step": 4142 - }, - { - "epoch": 0.3113632947542462, - "grad_norm": 1.581797490687158, - "learning_rate": 3.225714087843803e-06, - "loss": 1.0441, - "step": 4143 - }, - { - "epoch": 0.31143844882008115, - "grad_norm": 1.6814307814050062, - "learning_rate": 3.225329351852509e-06, - "loss": 1.0211, - "step": 4144 - }, - { - "epoch": 0.3115136028859161, - "grad_norm": 2.4112073686706097, - "learning_rate": 3.2249445432557584e-06, - "loss": 0.9946, - "step": 4145 - }, - { - "epoch": 0.3115887569517511, - "grad_norm": 1.8368113933711636, - "learning_rate": 3.224559662076353e-06, - "loss": 0.9703, - "step": 4146 - }, - { - "epoch": 0.31166391101758606, - "grad_norm": 1.6209364481522293, - "learning_rate": 3.224174708337098e-06, - "loss": 0.987, - "step": 4147 - }, - { - "epoch": 0.31173906508342103, - "grad_norm": 1.654619529992313, - "learning_rate": 3.2237896820608047e-06, - "loss": 1.0137, - "step": 4148 - }, - { - "epoch": 0.311814219149256, - "grad_norm": 1.4939156157167393, - "learning_rate": 3.223404583270286e-06, - "loss": 1.1012, - "step": 4149 - }, - { - "epoch": 0.3118893732150909, - "grad_norm": 3.201786412851505, - "learning_rate": 3.223019411988361e-06, - "loss": 1.0697, - "step": 4150 - }, - { - "epoch": 0.3119645272809259, - "grad_norm": 2.8849011636659396, - "learning_rate": 3.2226341682378525e-06, - "loss": 1.0108, - "step": 4151 - }, - { - "epoch": 0.31203968134676086, - "grad_norm": 1.5832393601621875, - "learning_rate": 3.2222488520415876e-06, - "loss": 1.021, - "step": 4152 - }, - { - "epoch": 0.3121148354125958, - "grad_norm": 1.738072895035836, - "learning_rate": 3.221863463422399e-06, - "loss": 1.0678, - "step": 4153 - }, - { - "epoch": 0.3121899894784308, - "grad_norm": 1.822416800129143, - "learning_rate": 3.2214780024031204e-06, - "loss": 1.0321, - "step": 4154 - }, - { - "epoch": 0.31226514354426577, - "grad_norm": 1.763528980705958, - "learning_rate": 3.221092469006593e-06, - "loss": 0.9299, - "step": 4155 - }, - { - "epoch": 0.3123402976101007, - "grad_norm": 2.061437043910058, - "learning_rate": 3.220706863255661e-06, - "loss": 1.032, - "step": 4156 - }, - { - "epoch": 0.31241545167593565, - "grad_norm": 1.6267305842463693, - "learning_rate": 3.220321185173173e-06, - "loss": 1.0228, - "step": 4157 - }, - { - "epoch": 0.3124906057417706, - "grad_norm": 1.5558240560382037, - "learning_rate": 3.219935434781982e-06, - "loss": 0.9622, - "step": 4158 - }, - { - "epoch": 0.3125657598076056, - "grad_norm": 2.267936947580215, - "learning_rate": 3.2195496121049447e-06, - "loss": 1.0927, - "step": 4159 - }, - { - "epoch": 0.31264091387344056, - "grad_norm": 1.7625975859119605, - "learning_rate": 3.219163717164923e-06, - "loss": 0.9837, - "step": 4160 - }, - { - "epoch": 0.31271606793927553, - "grad_norm": 1.961303615967752, - "learning_rate": 3.218777749984782e-06, - "loss": 1.0603, - "step": 4161 - }, - { - "epoch": 0.3127912220051105, - "grad_norm": 1.967288489018181, - "learning_rate": 3.2183917105873934e-06, - "loss": 0.8512, - "step": 4162 - }, - { - "epoch": 0.3128663760709454, - "grad_norm": 2.0105419086982694, - "learning_rate": 3.21800559899563e-06, - "loss": 0.9429, - "step": 4163 - }, - { - "epoch": 0.3129415301367804, - "grad_norm": 2.464447908612129, - "learning_rate": 3.217619415232371e-06, - "loss": 1.0787, - "step": 4164 - }, - { - "epoch": 0.31301668420261536, - "grad_norm": 2.3826907267487982, - "learning_rate": 3.217233159320498e-06, - "loss": 1.0486, - "step": 4165 - }, - { - "epoch": 0.31309183826845033, - "grad_norm": 2.1664963679755322, - "learning_rate": 3.2168468312829005e-06, - "loss": 0.9362, - "step": 4166 - }, - { - "epoch": 0.3131669923342853, - "grad_norm": 2.457802655975747, - "learning_rate": 3.2164604311424677e-06, - "loss": 0.9825, - "step": 4167 - }, - { - "epoch": 0.31324214640012027, - "grad_norm": 0.7692836227507469, - "learning_rate": 3.2160739589220968e-06, - "loss": 0.8611, - "step": 4168 - }, - { - "epoch": 0.3133173004659552, - "grad_norm": 2.631174908289119, - "learning_rate": 3.2156874146446872e-06, - "loss": 0.9847, - "step": 4169 - }, - { - "epoch": 0.31339245453179015, - "grad_norm": 2.233735639956126, - "learning_rate": 3.2153007983331437e-06, - "loss": 1.081, - "step": 4170 - }, - { - "epoch": 0.3134676085976251, - "grad_norm": 2.582543026620173, - "learning_rate": 3.214914110010373e-06, - "loss": 1.0056, - "step": 4171 - }, - { - "epoch": 0.3135427626634601, - "grad_norm": 1.8748833302121357, - "learning_rate": 3.214527349699289e-06, - "loss": 1.0403, - "step": 4172 - }, - { - "epoch": 0.31361791672929507, - "grad_norm": 2.4288037032758805, - "learning_rate": 3.21414051742281e-06, - "loss": 1.0011, - "step": 4173 - }, - { - "epoch": 0.31369307079513004, - "grad_norm": 2.9340377857606885, - "learning_rate": 3.2137536132038552e-06, - "loss": 1.0308, - "step": 4174 - }, - { - "epoch": 0.31376822486096495, - "grad_norm": 2.144624301580111, - "learning_rate": 3.213366637065351e-06, - "loss": 1.0267, - "step": 4175 - }, - { - "epoch": 0.3138433789267999, - "grad_norm": 2.4320752349120913, - "learning_rate": 3.212979589030228e-06, - "loss": 1.0668, - "step": 4176 - }, - { - "epoch": 0.3139185329926349, - "grad_norm": 3.072339192241413, - "learning_rate": 3.212592469121419e-06, - "loss": 1.0859, - "step": 4177 - }, - { - "epoch": 0.31399368705846986, - "grad_norm": 1.7676570305678827, - "learning_rate": 3.2122052773618625e-06, - "loss": 1.1038, - "step": 4178 - }, - { - "epoch": 0.31406884112430483, - "grad_norm": 3.080363432757351, - "learning_rate": 3.211818013774502e-06, - "loss": 1.0469, - "step": 4179 - }, - { - "epoch": 0.3141439951901398, - "grad_norm": 1.5488221984980797, - "learning_rate": 3.211430678382284e-06, - "loss": 1.0439, - "step": 4180 - }, - { - "epoch": 0.3142191492559748, - "grad_norm": 1.3453232626312783, - "learning_rate": 3.211043271208159e-06, - "loss": 0.9307, - "step": 4181 - }, - { - "epoch": 0.3142943033218097, - "grad_norm": 0.7319995527213976, - "learning_rate": 3.2106557922750826e-06, - "loss": 0.8346, - "step": 4182 - }, - { - "epoch": 0.31436945738764466, - "grad_norm": 4.658450022937988, - "learning_rate": 3.210268241606015e-06, - "loss": 0.9355, - "step": 4183 - }, - { - "epoch": 0.3144446114534796, - "grad_norm": 0.7212902458505558, - "learning_rate": 3.2098806192239196e-06, - "loss": 0.8921, - "step": 4184 - }, - { - "epoch": 0.3145197655193146, - "grad_norm": 3.2879449753531014, - "learning_rate": 3.209492925151765e-06, - "loss": 0.8698, - "step": 4185 - }, - { - "epoch": 0.31459491958514957, - "grad_norm": 2.0180162543104387, - "learning_rate": 3.209105159412522e-06, - "loss": 0.9546, - "step": 4186 - }, - { - "epoch": 0.31467007365098454, - "grad_norm": 1.9666590410289448, - "learning_rate": 3.2087173220291695e-06, - "loss": 1.077, - "step": 4187 - }, - { - "epoch": 0.31474522771681945, - "grad_norm": 2.248232040932494, - "learning_rate": 3.2083294130246865e-06, - "loss": 0.9699, - "step": 4188 - }, - { - "epoch": 0.3148203817826544, - "grad_norm": 1.602116930757726, - "learning_rate": 3.207941432422059e-06, - "loss": 1.025, - "step": 4189 - }, - { - "epoch": 0.3148955358484894, - "grad_norm": 1.7430672242009189, - "learning_rate": 3.2075533802442757e-06, - "loss": 0.9413, - "step": 4190 - }, - { - "epoch": 0.31497068991432436, - "grad_norm": 0.7548477539764887, - "learning_rate": 3.2071652565143313e-06, - "loss": 0.9146, - "step": 4191 - }, - { - "epoch": 0.31504584398015933, - "grad_norm": 2.730135874179066, - "learning_rate": 3.206777061255223e-06, - "loss": 1.0083, - "step": 4192 - }, - { - "epoch": 0.3151209980459943, - "grad_norm": 1.9729991652558965, - "learning_rate": 3.206388794489952e-06, - "loss": 0.9994, - "step": 4193 - }, - { - "epoch": 0.3151961521118293, - "grad_norm": 2.040277345728138, - "learning_rate": 3.2060004562415258e-06, - "loss": 1.0331, - "step": 4194 - }, - { - "epoch": 0.3152713061776642, - "grad_norm": 1.4944469661447142, - "learning_rate": 3.205612046532954e-06, - "loss": 0.8911, - "step": 4195 - }, - { - "epoch": 0.31534646024349916, - "grad_norm": 2.4256478732140576, - "learning_rate": 3.2052235653872525e-06, - "loss": 1.1022, - "step": 4196 - }, - { - "epoch": 0.31542161430933413, - "grad_norm": 3.3623764469179496, - "learning_rate": 3.2048350128274395e-06, - "loss": 1.0185, - "step": 4197 - }, - { - "epoch": 0.3154967683751691, - "grad_norm": 2.5207905305953933, - "learning_rate": 3.2044463888765384e-06, - "loss": 1.048, - "step": 4198 - }, - { - "epoch": 0.31557192244100407, - "grad_norm": 1.5507119881563411, - "learning_rate": 3.204057693557576e-06, - "loss": 1.0606, - "step": 4199 - }, - { - "epoch": 0.31564707650683904, - "grad_norm": 1.521928480019369, - "learning_rate": 3.203668926893585e-06, - "loss": 1.0139, - "step": 4200 - }, - { - "epoch": 0.31572223057267396, - "grad_norm": 0.7138982471227066, - "learning_rate": 3.2032800889076014e-06, - "loss": 0.8671, - "step": 4201 - }, - { - "epoch": 0.3157973846385089, - "grad_norm": 1.5645015866206637, - "learning_rate": 3.2028911796226642e-06, - "loss": 0.932, - "step": 4202 - }, - { - "epoch": 0.3158725387043439, - "grad_norm": 0.7990726758825902, - "learning_rate": 3.2025021990618193e-06, - "loss": 0.9041, - "step": 4203 - }, - { - "epoch": 0.31594769277017887, - "grad_norm": 1.6992406383288514, - "learning_rate": 3.2021131472481135e-06, - "loss": 0.9604, - "step": 4204 - }, - { - "epoch": 0.31602284683601384, - "grad_norm": 1.3980240024039672, - "learning_rate": 3.2017240242046005e-06, - "loss": 0.9777, - "step": 4205 - }, - { - "epoch": 0.3160980009018488, - "grad_norm": 1.9388082455925058, - "learning_rate": 3.2013348299543382e-06, - "loss": 1.0541, - "step": 4206 - }, - { - "epoch": 0.3161731549676838, - "grad_norm": 1.8106004034234267, - "learning_rate": 3.2009455645203864e-06, - "loss": 0.9426, - "step": 4207 - }, - { - "epoch": 0.3162483090335187, - "grad_norm": 3.3619006221863597, - "learning_rate": 3.2005562279258113e-06, - "loss": 1.0231, - "step": 4208 - }, - { - "epoch": 0.31632346309935366, - "grad_norm": 2.1652320978798576, - "learning_rate": 3.200166820193682e-06, - "loss": 0.9175, - "step": 4209 - }, - { - "epoch": 0.31639861716518863, - "grad_norm": 1.500017324983363, - "learning_rate": 3.1997773413470736e-06, - "loss": 1.0348, - "step": 4210 - }, - { - "epoch": 0.3164737712310236, - "grad_norm": 1.725537509378977, - "learning_rate": 3.1993877914090632e-06, - "loss": 1.0118, - "step": 4211 - }, - { - "epoch": 0.3165489252968586, - "grad_norm": 2.08255622357403, - "learning_rate": 3.198998170402733e-06, - "loss": 1.038, - "step": 4212 - }, - { - "epoch": 0.31662407936269354, - "grad_norm": 3.9017719693797583, - "learning_rate": 3.1986084783511708e-06, - "loss": 0.9538, - "step": 4213 - }, - { - "epoch": 0.31669923342852846, - "grad_norm": 1.6517372551367586, - "learning_rate": 3.198218715277466e-06, - "loss": 1.0223, - "step": 4214 - }, - { - "epoch": 0.31677438749436343, - "grad_norm": 2.14120753917026, - "learning_rate": 3.1978288812047136e-06, - "loss": 0.9692, - "step": 4215 - }, - { - "epoch": 0.3168495415601984, - "grad_norm": 1.4866352741422406, - "learning_rate": 3.1974389761560137e-06, - "loss": 1.0242, - "step": 4216 - }, - { - "epoch": 0.31692469562603337, - "grad_norm": 2.083416962534531, - "learning_rate": 3.197049000154469e-06, - "loss": 0.9827, - "step": 4217 - }, - { - "epoch": 0.31699984969186834, - "grad_norm": 1.8805294519951468, - "learning_rate": 3.1966589532231876e-06, - "loss": 0.8765, - "step": 4218 - }, - { - "epoch": 0.3170750037577033, - "grad_norm": 1.842844207164866, - "learning_rate": 3.1962688353852805e-06, - "loss": 1.1216, - "step": 4219 - }, - { - "epoch": 0.3171501578235382, - "grad_norm": 1.655872661863934, - "learning_rate": 3.195878646663864e-06, - "loss": 0.9555, - "step": 4220 - }, - { - "epoch": 0.3172253118893732, - "grad_norm": 6.550789402573937, - "learning_rate": 3.195488387082059e-06, - "loss": 0.9841, - "step": 4221 - }, - { - "epoch": 0.31730046595520817, - "grad_norm": 1.7283096794243626, - "learning_rate": 3.1950980566629886e-06, - "loss": 0.8887, - "step": 4222 - }, - { - "epoch": 0.31737562002104314, - "grad_norm": 0.7920750434705228, - "learning_rate": 3.194707655429782e-06, - "loss": 0.8928, - "step": 4223 - }, - { - "epoch": 0.3174507740868781, - "grad_norm": 2.668124237668073, - "learning_rate": 3.1943171834055723e-06, - "loss": 1.077, - "step": 4224 - }, - { - "epoch": 0.3175259281527131, - "grad_norm": 1.6874516091649312, - "learning_rate": 3.1939266406134963e-06, - "loss": 1.0199, - "step": 4225 - }, - { - "epoch": 0.31760108221854805, - "grad_norm": 2.0230393183644955, - "learning_rate": 3.1935360270766945e-06, - "loss": 1.0059, - "step": 4226 - }, - { - "epoch": 0.31767623628438296, - "grad_norm": 1.7086853812109797, - "learning_rate": 3.1931453428183125e-06, - "loss": 1.0636, - "step": 4227 - }, - { - "epoch": 0.31775139035021793, - "grad_norm": 1.5274006474426505, - "learning_rate": 3.1927545878615005e-06, - "loss": 1.0542, - "step": 4228 - }, - { - "epoch": 0.3178265444160529, - "grad_norm": 2.608502636209669, - "learning_rate": 3.1923637622294123e-06, - "loss": 1.0475, - "step": 4229 - }, - { - "epoch": 0.3179016984818879, - "grad_norm": 1.5202745517177836, - "learning_rate": 3.191972865945205e-06, - "loss": 0.9618, - "step": 4230 - }, - { - "epoch": 0.31797685254772284, - "grad_norm": 1.5225511368806317, - "learning_rate": 3.191581899032041e-06, - "loss": 1.0088, - "step": 4231 - }, - { - "epoch": 0.3180520066135578, - "grad_norm": 1.6281536019241982, - "learning_rate": 3.1911908615130862e-06, - "loss": 0.9586, - "step": 4232 - }, - { - "epoch": 0.31812716067939273, - "grad_norm": 1.8405830210123013, - "learning_rate": 3.1907997534115118e-06, - "loss": 1.0147, - "step": 4233 - }, - { - "epoch": 0.3182023147452277, - "grad_norm": 1.5859937986707253, - "learning_rate": 3.190408574750492e-06, - "loss": 1.0469, - "step": 4234 - }, - { - "epoch": 0.31827746881106267, - "grad_norm": 1.9070676082034719, - "learning_rate": 3.1900173255532057e-06, - "loss": 1.0528, - "step": 4235 - }, - { - "epoch": 0.31835262287689764, - "grad_norm": 1.3745471902339081, - "learning_rate": 3.1896260058428364e-06, - "loss": 0.9535, - "step": 4236 - }, - { - "epoch": 0.3184277769427326, - "grad_norm": 1.496616202581948, - "learning_rate": 3.1892346156425704e-06, - "loss": 1.0323, - "step": 4237 - }, - { - "epoch": 0.3185029310085676, - "grad_norm": 2.5665993813782015, - "learning_rate": 3.1888431549755998e-06, - "loss": 1.0399, - "step": 4238 - }, - { - "epoch": 0.31857808507440255, - "grad_norm": 1.6841943647525246, - "learning_rate": 3.18845162386512e-06, - "loss": 0.9717, - "step": 4239 - }, - { - "epoch": 0.31865323914023747, - "grad_norm": 1.476506469309347, - "learning_rate": 3.1880600223343303e-06, - "loss": 1.0152, - "step": 4240 - }, - { - "epoch": 0.31872839320607244, - "grad_norm": 1.9854808939752042, - "learning_rate": 3.187668350406435e-06, - "loss": 1.0512, - "step": 4241 - }, - { - "epoch": 0.3188035472719074, - "grad_norm": 2.1061563306859084, - "learning_rate": 3.187276608104642e-06, - "loss": 1.0507, - "step": 4242 - }, - { - "epoch": 0.3188787013377424, - "grad_norm": 2.020827683378626, - "learning_rate": 3.1868847954521635e-06, - "loss": 1.0104, - "step": 4243 - }, - { - "epoch": 0.31895385540357735, - "grad_norm": 2.70749403835778, - "learning_rate": 3.1864929124722162e-06, - "loss": 0.9888, - "step": 4244 - }, - { - "epoch": 0.3190290094694123, - "grad_norm": 2.028646120619029, - "learning_rate": 3.1861009591880206e-06, - "loss": 1.0389, - "step": 4245 - }, - { - "epoch": 0.31910416353524723, - "grad_norm": 1.8310194007016556, - "learning_rate": 3.1857089356228015e-06, - "loss": 0.9496, - "step": 4246 - }, - { - "epoch": 0.3191793176010822, - "grad_norm": 1.6699467171946065, - "learning_rate": 3.185316841799787e-06, - "loss": 0.9938, - "step": 4247 - }, - { - "epoch": 0.3192544716669172, - "grad_norm": 2.0117143649451603, - "learning_rate": 3.1849246777422108e-06, - "loss": 1.0229, - "step": 4248 - }, - { - "epoch": 0.31932962573275214, - "grad_norm": 1.7543129905991648, - "learning_rate": 3.1845324434733104e-06, - "loss": 0.9986, - "step": 4249 - }, - { - "epoch": 0.3194047797985871, - "grad_norm": 1.5700943069318836, - "learning_rate": 3.1841401390163263e-06, - "loss": 1.0045, - "step": 4250 - }, - { - "epoch": 0.3194799338644221, - "grad_norm": 1.8343393715512932, - "learning_rate": 3.183747764394505e-06, - "loss": 1.0081, - "step": 4251 - }, - { - "epoch": 0.31955508793025705, - "grad_norm": 4.0039019626501435, - "learning_rate": 3.1833553196310956e-06, - "loss": 1.0312, - "step": 4252 - }, - { - "epoch": 0.31963024199609197, - "grad_norm": 1.4903370833094125, - "learning_rate": 3.1829628047493523e-06, - "loss": 0.9191, - "step": 4253 - }, - { - "epoch": 0.31970539606192694, - "grad_norm": 1.8830002180677745, - "learning_rate": 3.1825702197725325e-06, - "loss": 0.9676, - "step": 4254 - }, - { - "epoch": 0.3197805501277619, - "grad_norm": 1.6004529133156271, - "learning_rate": 3.1821775647239e-06, - "loss": 0.9461, - "step": 4255 - }, - { - "epoch": 0.3198557041935969, - "grad_norm": 2.1439386045718147, - "learning_rate": 3.1817848396267188e-06, - "loss": 0.9746, - "step": 4256 - }, - { - "epoch": 0.31993085825943185, - "grad_norm": 1.7333770492738563, - "learning_rate": 3.181392044504261e-06, - "loss": 1.0954, - "step": 4257 - }, - { - "epoch": 0.3200060123252668, - "grad_norm": 1.9148740955128773, - "learning_rate": 3.1809991793798e-06, - "loss": 0.9169, - "step": 4258 - }, - { - "epoch": 0.32008116639110173, - "grad_norm": 1.4958154006067261, - "learning_rate": 3.180606244276616e-06, - "loss": 0.9824, - "step": 4259 - }, - { - "epoch": 0.3201563204569367, - "grad_norm": 1.6622404053586561, - "learning_rate": 3.180213239217991e-06, - "loss": 0.938, - "step": 4260 - }, - { - "epoch": 0.3202314745227717, - "grad_norm": 2.0786724086647097, - "learning_rate": 3.1798201642272123e-06, - "loss": 0.9515, - "step": 4261 - }, - { - "epoch": 0.32030662858860665, - "grad_norm": 0.7661841549982761, - "learning_rate": 3.179427019327571e-06, - "loss": 0.8304, - "step": 4262 - }, - { - "epoch": 0.3203817826544416, - "grad_norm": 1.6084421657971142, - "learning_rate": 3.179033804542363e-06, - "loss": 1.0276, - "step": 4263 - }, - { - "epoch": 0.3204569367202766, - "grad_norm": 1.5086836595970368, - "learning_rate": 3.178640519894886e-06, - "loss": 1.0241, - "step": 4264 - }, - { - "epoch": 0.3205320907861115, - "grad_norm": 1.828435366115391, - "learning_rate": 3.178247165408446e-06, - "loss": 1.0708, - "step": 4265 - }, - { - "epoch": 0.32060724485194647, - "grad_norm": 1.6830043247731719, - "learning_rate": 3.1778537411063487e-06, - "loss": 0.9922, - "step": 4266 - }, - { - "epoch": 0.32068239891778144, - "grad_norm": 1.874044810814804, - "learning_rate": 3.1774602470119076e-06, - "loss": 1.0776, - "step": 4267 - }, - { - "epoch": 0.3207575529836164, - "grad_norm": 2.1106122944556454, - "learning_rate": 3.177066683148438e-06, - "loss": 1.0443, - "step": 4268 - }, - { - "epoch": 0.3208327070494514, - "grad_norm": 2.056580806199153, - "learning_rate": 3.17667304953926e-06, - "loss": 1.0539, - "step": 4269 - }, - { - "epoch": 0.32090786111528635, - "grad_norm": 1.4518261098327645, - "learning_rate": 3.176279346207698e-06, - "loss": 1.0176, - "step": 4270 - }, - { - "epoch": 0.3209830151811213, - "grad_norm": 1.7711695482804868, - "learning_rate": 3.1758855731770807e-06, - "loss": 1.0012, - "step": 4271 - }, - { - "epoch": 0.32105816924695624, - "grad_norm": 0.8185562588879833, - "learning_rate": 3.1754917304707405e-06, - "loss": 0.8818, - "step": 4272 - }, - { - "epoch": 0.3211333233127912, - "grad_norm": 0.59958303188646, - "learning_rate": 3.1750978181120136e-06, - "loss": 0.8188, - "step": 4273 - }, - { - "epoch": 0.3212084773786262, - "grad_norm": 2.7234403994310084, - "learning_rate": 3.1747038361242417e-06, - "loss": 0.9804, - "step": 4274 - }, - { - "epoch": 0.32128363144446115, - "grad_norm": 6.225565027538414, - "learning_rate": 3.1743097845307692e-06, - "loss": 0.9834, - "step": 4275 - }, - { - "epoch": 0.3213587855102961, - "grad_norm": 1.5851231631251879, - "learning_rate": 3.1739156633549445e-06, - "loss": 0.8858, - "step": 4276 - }, - { - "epoch": 0.3214339395761311, - "grad_norm": 1.730580896743443, - "learning_rate": 3.1735214726201223e-06, - "loss": 0.9721, - "step": 4277 - }, - { - "epoch": 0.321509093641966, - "grad_norm": 2.1532135744520535, - "learning_rate": 3.1731272123496588e-06, - "loss": 0.834, - "step": 4278 - }, - { - "epoch": 0.321584247707801, - "grad_norm": 1.6436646413940834, - "learning_rate": 3.1727328825669164e-06, - "loss": 0.9621, - "step": 4279 - }, - { - "epoch": 0.32165940177363594, - "grad_norm": 2.0814557452073275, - "learning_rate": 3.172338483295259e-06, - "loss": 1.0449, - "step": 4280 - }, - { - "epoch": 0.3217345558394709, - "grad_norm": 1.6636450158750289, - "learning_rate": 3.1719440145580578e-06, - "loss": 0.9588, - "step": 4281 - }, - { - "epoch": 0.3218097099053059, - "grad_norm": 1.4566594280933087, - "learning_rate": 3.1715494763786855e-06, - "loss": 1.1039, - "step": 4282 - }, - { - "epoch": 0.32188486397114086, - "grad_norm": 2.428741761653512, - "learning_rate": 3.1711548687805214e-06, - "loss": 0.8882, - "step": 4283 - }, - { - "epoch": 0.3219600180369758, - "grad_norm": 1.8025542362428844, - "learning_rate": 3.170760191786946e-06, - "loss": 1.0816, - "step": 4284 - }, - { - "epoch": 0.32203517210281074, - "grad_norm": 1.4827213914575013, - "learning_rate": 3.1703654454213473e-06, - "loss": 1.0266, - "step": 4285 - }, - { - "epoch": 0.3221103261686457, - "grad_norm": 1.6662443659144142, - "learning_rate": 3.169970629707113e-06, - "loss": 0.9698, - "step": 4286 - }, - { - "epoch": 0.3221854802344807, - "grad_norm": 1.8375567498818246, - "learning_rate": 3.1695757446676396e-06, - "loss": 1.1174, - "step": 4287 - }, - { - "epoch": 0.32226063430031565, - "grad_norm": 2.2467462537273084, - "learning_rate": 3.169180790326324e-06, - "loss": 0.9112, - "step": 4288 - }, - { - "epoch": 0.3223357883661506, - "grad_norm": 1.8206539897922023, - "learning_rate": 3.1687857667065698e-06, - "loss": 1.0091, - "step": 4289 - }, - { - "epoch": 0.3224109424319856, - "grad_norm": 2.2281910077305205, - "learning_rate": 3.1683906738317838e-06, - "loss": 0.9813, - "step": 4290 - }, - { - "epoch": 0.3224860964978205, - "grad_norm": 2.169057981406417, - "learning_rate": 3.167995511725375e-06, - "loss": 1.0711, - "step": 4291 - }, - { - "epoch": 0.3225612505636555, - "grad_norm": 1.9629156462180133, - "learning_rate": 3.1676002804107607e-06, - "loss": 0.9598, - "step": 4292 - }, - { - "epoch": 0.32263640462949045, - "grad_norm": 1.6950531510653355, - "learning_rate": 3.167204979911359e-06, - "loss": 1.0377, - "step": 4293 - }, - { - "epoch": 0.3227115586953254, - "grad_norm": 1.9140595728014926, - "learning_rate": 3.166809610250592e-06, - "loss": 0.9482, - "step": 4294 - }, - { - "epoch": 0.3227867127611604, - "grad_norm": 2.1828056100505857, - "learning_rate": 3.1664141714518876e-06, - "loss": 0.9996, - "step": 4295 - }, - { - "epoch": 0.32286186682699536, - "grad_norm": 1.445508046711363, - "learning_rate": 3.1660186635386773e-06, - "loss": 0.8227, - "step": 4296 - }, - { - "epoch": 0.32293702089283033, - "grad_norm": 2.034247548788543, - "learning_rate": 3.1656230865343964e-06, - "loss": 0.8519, - "step": 4297 - }, - { - "epoch": 0.32301217495866524, - "grad_norm": 1.3582488909414403, - "learning_rate": 3.165227440462484e-06, - "loss": 1.0269, - "step": 4298 - }, - { - "epoch": 0.3230873290245002, - "grad_norm": 0.8678251791533654, - "learning_rate": 3.164831725346383e-06, - "loss": 0.9347, - "step": 4299 - }, - { - "epoch": 0.3231624830903352, - "grad_norm": 2.989437263633114, - "learning_rate": 3.1644359412095432e-06, - "loss": 1.0709, - "step": 4300 - }, - { - "epoch": 0.32323763715617015, - "grad_norm": 2.0254842061538425, - "learning_rate": 3.164040088075414e-06, - "loss": 0.9882, - "step": 4301 - }, - { - "epoch": 0.3233127912220051, - "grad_norm": 1.6369118952079675, - "learning_rate": 3.1636441659674528e-06, - "loss": 1.0687, - "step": 4302 - }, - { - "epoch": 0.3233879452878401, - "grad_norm": 1.5798650534258085, - "learning_rate": 3.1632481749091185e-06, - "loss": 1.0458, - "step": 4303 - }, - { - "epoch": 0.323463099353675, - "grad_norm": 1.5241235215448883, - "learning_rate": 3.1628521149238757e-06, - "loss": 0.9926, - "step": 4304 - }, - { - "epoch": 0.32353825341951, - "grad_norm": 1.7028905724689385, - "learning_rate": 3.1624559860351917e-06, - "loss": 1.0657, - "step": 4305 - }, - { - "epoch": 0.32361340748534495, - "grad_norm": 1.7622156901370114, - "learning_rate": 3.1620597882665393e-06, - "loss": 0.9395, - "step": 4306 - }, - { - "epoch": 0.3236885615511799, - "grad_norm": 1.441426843015371, - "learning_rate": 3.1616635216413952e-06, - "loss": 1.0638, - "step": 4307 - }, - { - "epoch": 0.3237637156170149, - "grad_norm": 1.333701822899823, - "learning_rate": 3.161267186183239e-06, - "loss": 1.1002, - "step": 4308 - }, - { - "epoch": 0.32383886968284986, - "grad_norm": 1.544682885745174, - "learning_rate": 3.160870781915555e-06, - "loss": 0.9761, - "step": 4309 - }, - { - "epoch": 0.3239140237486848, - "grad_norm": 4.3439225635967995, - "learning_rate": 3.160474308861832e-06, - "loss": 1.1043, - "step": 4310 - }, - { - "epoch": 0.32398917781451975, - "grad_norm": 2.0529947596658236, - "learning_rate": 3.160077767045562e-06, - "loss": 1.0925, - "step": 4311 - }, - { - "epoch": 0.3240643318803547, - "grad_norm": 2.0423427811699777, - "learning_rate": 3.1596811564902426e-06, - "loss": 1.047, - "step": 4312 - }, - { - "epoch": 0.3241394859461897, - "grad_norm": 1.617028417456709, - "learning_rate": 3.159284477219374e-06, - "loss": 1.0012, - "step": 4313 - }, - { - "epoch": 0.32421464001202466, - "grad_norm": 3.574192468098386, - "learning_rate": 3.1588877292564606e-06, - "loss": 0.8688, - "step": 4314 - }, - { - "epoch": 0.3242897940778596, - "grad_norm": 1.4221503860904967, - "learning_rate": 3.1584909126250116e-06, - "loss": 1.0119, - "step": 4315 - }, - { - "epoch": 0.3243649481436946, - "grad_norm": 1.5943766839090665, - "learning_rate": 3.15809402734854e-06, - "loss": 0.9316, - "step": 4316 - }, - { - "epoch": 0.3244401022095295, - "grad_norm": 2.100056615702075, - "learning_rate": 3.1576970734505624e-06, - "loss": 1.0177, - "step": 4317 - }, - { - "epoch": 0.3245152562753645, - "grad_norm": 3.1122185821160846, - "learning_rate": 3.1573000509546004e-06, - "loss": 1.0475, - "step": 4318 - }, - { - "epoch": 0.32459041034119945, - "grad_norm": 2.1512097038294185, - "learning_rate": 3.1569029598841788e-06, - "loss": 0.9125, - "step": 4319 - }, - { - "epoch": 0.3246655644070344, - "grad_norm": 1.5192235326176744, - "learning_rate": 3.1565058002628268e-06, - "loss": 0.9374, - "step": 4320 - }, - { - "epoch": 0.3247407184728694, - "grad_norm": 4.603496151714222, - "learning_rate": 3.156108572114077e-06, - "loss": 0.9989, - "step": 4321 - }, - { - "epoch": 0.32481587253870436, - "grad_norm": 1.5637836524031825, - "learning_rate": 3.155711275461468e-06, - "loss": 0.9267, - "step": 4322 - }, - { - "epoch": 0.3248910266045393, - "grad_norm": 3.604605425696879, - "learning_rate": 3.15531391032854e-06, - "loss": 0.8976, - "step": 4323 - }, - { - "epoch": 0.32496618067037425, - "grad_norm": 1.850270718020801, - "learning_rate": 3.1549164767388386e-06, - "loss": 1.1168, - "step": 4324 - }, - { - "epoch": 0.3250413347362092, - "grad_norm": 2.3582135303742295, - "learning_rate": 3.1545189747159136e-06, - "loss": 1.0585, - "step": 4325 - }, - { - "epoch": 0.3251164888020442, - "grad_norm": 1.9360730238454615, - "learning_rate": 3.1541214042833187e-06, - "loss": 0.9616, - "step": 4326 - }, - { - "epoch": 0.32519164286787916, - "grad_norm": 1.8220758369943517, - "learning_rate": 3.153723765464611e-06, - "loss": 1.0134, - "step": 4327 - }, - { - "epoch": 0.32526679693371413, - "grad_norm": 2.0838047511477082, - "learning_rate": 3.1533260582833527e-06, - "loss": 0.9993, - "step": 4328 - }, - { - "epoch": 0.3253419509995491, - "grad_norm": 2.1107516847228966, - "learning_rate": 3.152928282763109e-06, - "loss": 0.8919, - "step": 4329 - }, - { - "epoch": 0.325417105065384, - "grad_norm": 1.8337521797068408, - "learning_rate": 3.152530438927449e-06, - "loss": 1.0751, - "step": 4330 - }, - { - "epoch": 0.325492259131219, - "grad_norm": 1.809458449684334, - "learning_rate": 3.1521325267999476e-06, - "loss": 0.9615, - "step": 4331 - }, - { - "epoch": 0.32556741319705396, - "grad_norm": 1.8356719129293568, - "learning_rate": 3.1517345464041817e-06, - "loss": 1.0918, - "step": 4332 - }, - { - "epoch": 0.3256425672628889, - "grad_norm": 3.2081033863712194, - "learning_rate": 3.1513364977637344e-06, - "loss": 1.0805, - "step": 4333 - }, - { - "epoch": 0.3257177213287239, - "grad_norm": 1.9026968939575948, - "learning_rate": 3.1509383809021905e-06, - "loss": 1.0082, - "step": 4334 - }, - { - "epoch": 0.32579287539455887, - "grad_norm": 2.075241497647821, - "learning_rate": 3.15054019584314e-06, - "loss": 1.0536, - "step": 4335 - }, - { - "epoch": 0.3258680294603938, - "grad_norm": 1.9873116457955577, - "learning_rate": 3.150141942610178e-06, - "loss": 1.004, - "step": 4336 - }, - { - "epoch": 0.32594318352622875, - "grad_norm": 2.126406148282212, - "learning_rate": 3.149743621226901e-06, - "loss": 1.0261, - "step": 4337 - }, - { - "epoch": 0.3260183375920637, - "grad_norm": 1.5489352599295831, - "learning_rate": 3.149345231716912e-06, - "loss": 1.0045, - "step": 4338 - }, - { - "epoch": 0.3260934916578987, - "grad_norm": 1.9855351098173775, - "learning_rate": 3.148946774103817e-06, - "loss": 1.0291, - "step": 4339 - }, - { - "epoch": 0.32616864572373366, - "grad_norm": 1.6925022723634868, - "learning_rate": 3.1485482484112257e-06, - "loss": 0.9539, - "step": 4340 - }, - { - "epoch": 0.32624379978956863, - "grad_norm": 2.024783596202368, - "learning_rate": 3.148149654662753e-06, - "loss": 0.8389, - "step": 4341 - }, - { - "epoch": 0.3263189538554036, - "grad_norm": 1.9924543613390604, - "learning_rate": 3.1477509928820165e-06, - "loss": 1.0321, - "step": 4342 - }, - { - "epoch": 0.3263941079212385, - "grad_norm": 2.540556482796447, - "learning_rate": 3.147352263092638e-06, - "loss": 0.9773, - "step": 4343 - }, - { - "epoch": 0.3264692619870735, - "grad_norm": 1.6720474011669848, - "learning_rate": 3.1469534653182453e-06, - "loss": 1.0465, - "step": 4344 - }, - { - "epoch": 0.32654441605290846, - "grad_norm": 1.540463811709963, - "learning_rate": 3.146554599582468e-06, - "loss": 1.0173, - "step": 4345 - }, - { - "epoch": 0.32661957011874343, - "grad_norm": 1.7241811080163116, - "learning_rate": 3.1461556659089397e-06, - "loss": 1.0041, - "step": 4346 - }, - { - "epoch": 0.3266947241845784, - "grad_norm": 2.353377139500568, - "learning_rate": 3.145756664321299e-06, - "loss": 1.0754, - "step": 4347 - }, - { - "epoch": 0.32676987825041337, - "grad_norm": 1.814704343817127, - "learning_rate": 3.145357594843189e-06, - "loss": 1.0263, - "step": 4348 - }, - { - "epoch": 0.3268450323162483, - "grad_norm": 1.6701335928928354, - "learning_rate": 3.1449584574982556e-06, - "loss": 0.9823, - "step": 4349 - }, - { - "epoch": 0.32692018638208326, - "grad_norm": 1.4862564209975002, - "learning_rate": 3.144559252310149e-06, - "loss": 0.9649, - "step": 4350 - }, - { - "epoch": 0.3269953404479182, - "grad_norm": 1.9275953557712315, - "learning_rate": 3.1441599793025243e-06, - "loss": 0.9987, - "step": 4351 - }, - { - "epoch": 0.3270704945137532, - "grad_norm": 2.229578057472214, - "learning_rate": 3.1437606384990396e-06, - "loss": 0.9285, - "step": 4352 - }, - { - "epoch": 0.32714564857958817, - "grad_norm": 1.4863003741968333, - "learning_rate": 3.1433612299233567e-06, - "loss": 0.9704, - "step": 4353 - }, - { - "epoch": 0.32722080264542314, - "grad_norm": 1.811485006330889, - "learning_rate": 3.1429617535991427e-06, - "loss": 0.8713, - "step": 4354 - }, - { - "epoch": 0.32729595671125805, - "grad_norm": 2.0204473959369826, - "learning_rate": 3.1425622095500685e-06, - "loss": 0.8815, - "step": 4355 - }, - { - "epoch": 0.327371110777093, - "grad_norm": 3.76915958133706, - "learning_rate": 3.1421625977998087e-06, - "loss": 1.0298, - "step": 4356 - }, - { - "epoch": 0.327446264842928, - "grad_norm": 1.4124890419864111, - "learning_rate": 3.1417629183720403e-06, - "loss": 0.9666, - "step": 4357 - }, - { - "epoch": 0.32752141890876296, - "grad_norm": 3.2920488224233173, - "learning_rate": 3.1413631712904476e-06, - "loss": 0.9582, - "step": 4358 - }, - { - "epoch": 0.32759657297459793, - "grad_norm": 2.1080932113553357, - "learning_rate": 3.140963356578716e-06, - "loss": 1.1105, - "step": 4359 - }, - { - "epoch": 0.3276717270404329, - "grad_norm": 2.1914311643873945, - "learning_rate": 3.1405634742605366e-06, - "loss": 1.1239, - "step": 4360 - }, - { - "epoch": 0.3277468811062679, - "grad_norm": 1.6821936075863981, - "learning_rate": 3.1401635243596043e-06, - "loss": 1.0799, - "step": 4361 - }, - { - "epoch": 0.3278220351721028, - "grad_norm": 2.920378276151545, - "learning_rate": 3.1397635068996167e-06, - "loss": 0.9556, - "step": 4362 - }, - { - "epoch": 0.32789718923793776, - "grad_norm": 1.4768188092636474, - "learning_rate": 3.139363421904277e-06, - "loss": 0.9239, - "step": 4363 - }, - { - "epoch": 0.32797234330377273, - "grad_norm": 0.7234196934661042, - "learning_rate": 3.138963269397292e-06, - "loss": 0.8058, - "step": 4364 - }, - { - "epoch": 0.3280474973696077, - "grad_norm": 1.8572010827630934, - "learning_rate": 3.1385630494023716e-06, - "loss": 1.0475, - "step": 4365 - }, - { - "epoch": 0.32812265143544267, - "grad_norm": 1.4773394442878325, - "learning_rate": 3.1381627619432307e-06, - "loss": 1.0191, - "step": 4366 - }, - { - "epoch": 0.32819780550127764, - "grad_norm": 1.9180113871199582, - "learning_rate": 3.1377624070435874e-06, - "loss": 1.0158, - "step": 4367 - }, - { - "epoch": 0.32827295956711255, - "grad_norm": 1.8646634970824318, - "learning_rate": 3.137361984727165e-06, - "loss": 1.0299, - "step": 4368 - }, - { - "epoch": 0.3283481136329475, - "grad_norm": 1.3613619697555968, - "learning_rate": 3.1369614950176903e-06, - "loss": 0.9452, - "step": 4369 - }, - { - "epoch": 0.3284232676987825, - "grad_norm": 1.6984357761911244, - "learning_rate": 3.1365609379388922e-06, - "loss": 1.0244, - "step": 4370 - }, - { - "epoch": 0.32849842176461747, - "grad_norm": 8.034220936387516, - "learning_rate": 3.1361603135145074e-06, - "loss": 0.9706, - "step": 4371 - }, - { - "epoch": 0.32857357583045244, - "grad_norm": 2.584910208878418, - "learning_rate": 3.135759621768273e-06, - "loss": 1.0403, - "step": 4372 - }, - { - "epoch": 0.3286487298962874, - "grad_norm": 1.808054569691023, - "learning_rate": 3.1353588627239317e-06, - "loss": 1.0597, - "step": 4373 - }, - { - "epoch": 0.3287238839621224, - "grad_norm": 1.804270047873906, - "learning_rate": 3.13495803640523e-06, - "loss": 0.9979, - "step": 4374 - }, - { - "epoch": 0.3287990380279573, - "grad_norm": 1.9494079620323967, - "learning_rate": 3.134557142835919e-06, - "loss": 0.8929, - "step": 4375 - }, - { - "epoch": 0.32887419209379226, - "grad_norm": 4.28513354953536, - "learning_rate": 3.134156182039753e-06, - "loss": 0.9364, - "step": 4376 - }, - { - "epoch": 0.32894934615962723, - "grad_norm": 1.6160190802618593, - "learning_rate": 3.13375515404049e-06, - "loss": 0.98, - "step": 4377 - }, - { - "epoch": 0.3290245002254622, - "grad_norm": 1.7301931743045793, - "learning_rate": 3.133354058861893e-06, - "loss": 1.0523, - "step": 4378 - }, - { - "epoch": 0.3290996542912972, - "grad_norm": 1.257628670284708, - "learning_rate": 3.1329528965277275e-06, - "loss": 1.0104, - "step": 4379 - }, - { - "epoch": 0.32917480835713214, - "grad_norm": 2.316830472392224, - "learning_rate": 3.1325516670617648e-06, - "loss": 1.0149, - "step": 4380 - }, - { - "epoch": 0.32924996242296706, - "grad_norm": 4.573242500300277, - "learning_rate": 3.132150370487779e-06, - "loss": 0.9861, - "step": 4381 - }, - { - "epoch": 0.329325116488802, - "grad_norm": 1.6051660702550825, - "learning_rate": 3.1317490068295486e-06, - "loss": 0.9816, - "step": 4382 - }, - { - "epoch": 0.329400270554637, - "grad_norm": 2.171973576126806, - "learning_rate": 3.131347576110855e-06, - "loss": 1.0609, - "step": 4383 - }, - { - "epoch": 0.32947542462047197, - "grad_norm": 2.677789844742526, - "learning_rate": 3.130946078355486e-06, - "loss": 1.0312, - "step": 4384 - }, - { - "epoch": 0.32955057868630694, - "grad_norm": 1.8392064636976397, - "learning_rate": 3.1305445135872318e-06, - "loss": 1.0298, - "step": 4385 - }, - { - "epoch": 0.3296257327521419, - "grad_norm": 1.4458769649697722, - "learning_rate": 3.1301428818298847e-06, - "loss": 0.9082, - "step": 4386 - }, - { - "epoch": 0.3297008868179769, - "grad_norm": 1.8908465547240905, - "learning_rate": 3.129741183107245e-06, - "loss": 1.0371, - "step": 4387 - }, - { - "epoch": 0.3297760408838118, - "grad_norm": 1.6165628009871817, - "learning_rate": 3.129339417443114e-06, - "loss": 0.9132, - "step": 4388 - }, - { - "epoch": 0.32985119494964676, - "grad_norm": 1.5559082605878793, - "learning_rate": 3.128937584861298e-06, - "loss": 1.0087, - "step": 4389 - }, - { - "epoch": 0.32992634901548173, - "grad_norm": 1.6488891272160535, - "learning_rate": 3.128535685385607e-06, - "loss": 0.914, - "step": 4390 - }, - { - "epoch": 0.3300015030813167, - "grad_norm": 1.7321168788034882, - "learning_rate": 3.1281337190398552e-06, - "loss": 1.039, - "step": 4391 - }, - { - "epoch": 0.3300766571471517, - "grad_norm": 2.9981194005885667, - "learning_rate": 3.1277316858478607e-06, - "loss": 0.9725, - "step": 4392 - }, - { - "epoch": 0.33015181121298665, - "grad_norm": 0.8488112636248465, - "learning_rate": 3.1273295858334454e-06, - "loss": 0.8081, - "step": 4393 - }, - { - "epoch": 0.33022696527882156, - "grad_norm": 2.3003524095648733, - "learning_rate": 3.1269274190204352e-06, - "loss": 1.0349, - "step": 4394 - }, - { - "epoch": 0.33030211934465653, - "grad_norm": 1.8407920126927784, - "learning_rate": 3.1265251854326613e-06, - "loss": 1.0169, - "step": 4395 - }, - { - "epoch": 0.3303772734104915, - "grad_norm": 1.8365297701380703, - "learning_rate": 3.126122885093955e-06, - "loss": 0.9361, - "step": 4396 - }, - { - "epoch": 0.33045242747632647, - "grad_norm": 1.968905972554242, - "learning_rate": 3.1257205180281555e-06, - "loss": 1.0198, - "step": 4397 - }, - { - "epoch": 0.33052758154216144, - "grad_norm": 10.199789860374613, - "learning_rate": 3.125318084259105e-06, - "loss": 1.0476, - "step": 4398 - }, - { - "epoch": 0.3306027356079964, - "grad_norm": 2.0158965644943776, - "learning_rate": 3.1249155838106493e-06, - "loss": 0.9414, - "step": 4399 - }, - { - "epoch": 0.3306778896738313, - "grad_norm": 4.328307440593028, - "learning_rate": 3.1245130167066373e-06, - "loss": 0.9747, - "step": 4400 - }, - { - "epoch": 0.3307530437396663, - "grad_norm": 1.644872199662532, - "learning_rate": 3.1241103829709234e-06, - "loss": 0.924, - "step": 4401 - }, - { - "epoch": 0.33082819780550127, - "grad_norm": 1.650929524396831, - "learning_rate": 3.123707682627364e-06, - "loss": 0.9149, - "step": 4402 - }, - { - "epoch": 0.33090335187133624, - "grad_norm": 1.9237389558372229, - "learning_rate": 3.1233049156998215e-06, - "loss": 0.8843, - "step": 4403 - }, - { - "epoch": 0.3309785059371712, - "grad_norm": 3.3528048829130728, - "learning_rate": 3.122902082212162e-06, - "loss": 1.0332, - "step": 4404 - }, - { - "epoch": 0.3310536600030062, - "grad_norm": 2.1947047794079513, - "learning_rate": 3.122499182188254e-06, - "loss": 0.9666, - "step": 4405 - }, - { - "epoch": 0.33112881406884115, - "grad_norm": 1.665583314069815, - "learning_rate": 3.1220962156519715e-06, - "loss": 0.9997, - "step": 4406 - }, - { - "epoch": 0.33120396813467606, - "grad_norm": 1.6713919253634577, - "learning_rate": 3.121693182627191e-06, - "loss": 1.039, - "step": 4407 - }, - { - "epoch": 0.33127912220051103, - "grad_norm": 1.7846494660677317, - "learning_rate": 3.1212900831377934e-06, - "loss": 1.0433, - "step": 4408 - }, - { - "epoch": 0.331354276266346, - "grad_norm": 1.4322201335267972, - "learning_rate": 3.1208869172076657e-06, - "loss": 0.9528, - "step": 4409 - }, - { - "epoch": 0.331429430332181, - "grad_norm": 2.7971102652883815, - "learning_rate": 3.120483684860696e-06, - "loss": 0.9584, - "step": 4410 - }, - { - "epoch": 0.33150458439801594, - "grad_norm": 0.7769809432153677, - "learning_rate": 3.1200803861207774e-06, - "loss": 0.8891, - "step": 4411 - }, - { - "epoch": 0.3315797384638509, - "grad_norm": 1.871457695656375, - "learning_rate": 3.1196770210118063e-06, - "loss": 1.0162, - "step": 4412 - }, - { - "epoch": 0.33165489252968583, - "grad_norm": 1.610739120567439, - "learning_rate": 3.1192735895576845e-06, - "loss": 1.0382, - "step": 4413 - }, - { - "epoch": 0.3317300465955208, - "grad_norm": 1.744058468241048, - "learning_rate": 3.118870091782316e-06, - "loss": 0.8912, - "step": 4414 - }, - { - "epoch": 0.33180520066135577, - "grad_norm": 1.8414138606047314, - "learning_rate": 3.118466527709611e-06, - "loss": 0.958, - "step": 4415 - }, - { - "epoch": 0.33188035472719074, - "grad_norm": 1.636378068450902, - "learning_rate": 3.1180628973634807e-06, - "loss": 0.9639, - "step": 4416 - }, - { - "epoch": 0.3319555087930257, - "grad_norm": 1.5791402138513355, - "learning_rate": 3.117659200767843e-06, - "loss": 0.9404, - "step": 4417 - }, - { - "epoch": 0.3320306628588607, - "grad_norm": 1.7693758932129584, - "learning_rate": 3.1172554379466176e-06, - "loss": 0.9184, - "step": 4418 - }, - { - "epoch": 0.33210581692469565, - "grad_norm": 2.0278538409501676, - "learning_rate": 3.1168516089237288e-06, - "loss": 0.9589, - "step": 4419 - }, - { - "epoch": 0.33218097099053057, - "grad_norm": 0.6819851067698366, - "learning_rate": 3.1164477137231054e-06, - "loss": 0.8125, - "step": 4420 - }, - { - "epoch": 0.33225612505636554, - "grad_norm": 1.77907348972493, - "learning_rate": 3.1160437523686806e-06, - "loss": 1.0414, - "step": 4421 - }, - { - "epoch": 0.3323312791222005, - "grad_norm": 4.787241454152531, - "learning_rate": 3.1156397248843896e-06, - "loss": 1.0433, - "step": 4422 - }, - { - "epoch": 0.3324064331880355, - "grad_norm": 1.4908703488139592, - "learning_rate": 3.1152356312941724e-06, - "loss": 1.0111, - "step": 4423 - }, - { - "epoch": 0.33248158725387045, - "grad_norm": 2.158285424229758, - "learning_rate": 3.114831471621974e-06, - "loss": 1.0475, - "step": 4424 - }, - { - "epoch": 0.3325567413197054, - "grad_norm": 1.7549895953008547, - "learning_rate": 3.1144272458917417e-06, - "loss": 0.842, - "step": 4425 - }, - { - "epoch": 0.33263189538554033, - "grad_norm": 1.9220683923197526, - "learning_rate": 3.114022954127427e-06, - "loss": 0.9383, - "step": 4426 - }, - { - "epoch": 0.3327070494513753, - "grad_norm": 1.4557755610170702, - "learning_rate": 3.1136185963529873e-06, - "loss": 0.9852, - "step": 4427 - }, - { - "epoch": 0.3327822035172103, - "grad_norm": 1.8907051384705755, - "learning_rate": 3.1132141725923812e-06, - "loss": 1.0594, - "step": 4428 - }, - { - "epoch": 0.33285735758304524, - "grad_norm": 2.0603553719141363, - "learning_rate": 3.1128096828695728e-06, - "loss": 1.0249, - "step": 4429 - }, - { - "epoch": 0.3329325116488802, - "grad_norm": 1.6794226859065653, - "learning_rate": 3.1124051272085286e-06, - "loss": 0.982, - "step": 4430 - }, - { - "epoch": 0.3330076657147152, - "grad_norm": 2.0484124153748713, - "learning_rate": 3.1120005056332216e-06, - "loss": 1.0081, - "step": 4431 - }, - { - "epoch": 0.33308281978055015, - "grad_norm": 1.8504995522177732, - "learning_rate": 3.111595818167627e-06, - "loss": 0.989, - "step": 4432 - }, - { - "epoch": 0.33315797384638507, - "grad_norm": 1.5006887920759024, - "learning_rate": 3.111191064835723e-06, - "loss": 0.9545, - "step": 4433 - }, - { - "epoch": 0.33323312791222004, - "grad_norm": 0.6818880872580662, - "learning_rate": 3.1107862456614932e-06, - "loss": 0.8157, - "step": 4434 - }, - { - "epoch": 0.333308281978055, - "grad_norm": 3.0113609091483013, - "learning_rate": 3.1103813606689253e-06, - "loss": 1.0097, - "step": 4435 - }, - { - "epoch": 0.33338343604389, - "grad_norm": 2.3085309583254183, - "learning_rate": 3.1099764098820096e-06, - "loss": 1.0159, - "step": 4436 - }, - { - "epoch": 0.33345859010972495, - "grad_norm": 1.8813894758766923, - "learning_rate": 3.1095713933247416e-06, - "loss": 1.0099, - "step": 4437 - }, - { - "epoch": 0.3335337441755599, - "grad_norm": 2.6568964452064248, - "learning_rate": 3.1091663110211188e-06, - "loss": 0.9622, - "step": 4438 - }, - { - "epoch": 0.33360889824139484, - "grad_norm": 2.0559842619163704, - "learning_rate": 3.1087611629951457e-06, - "loss": 0.83, - "step": 4439 - }, - { - "epoch": 0.3336840523072298, - "grad_norm": 1.6124454577780551, - "learning_rate": 3.1083559492708277e-06, - "loss": 1.0288, - "step": 4440 - }, - { - "epoch": 0.3337592063730648, - "grad_norm": 2.265642152918635, - "learning_rate": 3.1079506698721752e-06, - "loss": 0.9674, - "step": 4441 - }, - { - "epoch": 0.33383436043889975, - "grad_norm": 0.8035647142220591, - "learning_rate": 3.107545324823203e-06, - "loss": 0.9804, - "step": 4442 - }, - { - "epoch": 0.3339095145047347, - "grad_norm": 1.562139545825636, - "learning_rate": 3.1071399141479292e-06, - "loss": 0.9442, - "step": 4443 - }, - { - "epoch": 0.3339846685705697, - "grad_norm": 1.610473554164908, - "learning_rate": 3.1067344378703765e-06, - "loss": 0.9267, - "step": 4444 - }, - { - "epoch": 0.3340598226364046, - "grad_norm": 1.370413848017323, - "learning_rate": 3.10632889601457e-06, - "loss": 0.9321, - "step": 4445 - }, - { - "epoch": 0.3341349767022396, - "grad_norm": 1.6116060514336934, - "learning_rate": 3.10592328860454e-06, - "loss": 1.0458, - "step": 4446 - }, - { - "epoch": 0.33421013076807454, - "grad_norm": 1.9513448308252572, - "learning_rate": 3.10551761566432e-06, - "loss": 0.9143, - "step": 4447 - }, - { - "epoch": 0.3342852848339095, - "grad_norm": 1.9848347529276695, - "learning_rate": 3.1051118772179483e-06, - "loss": 1.0827, - "step": 4448 - }, - { - "epoch": 0.3343604388997445, - "grad_norm": 1.5701033419717396, - "learning_rate": 3.104706073289466e-06, - "loss": 1.0648, - "step": 4449 - }, - { - "epoch": 0.33443559296557945, - "grad_norm": 1.8187964023818777, - "learning_rate": 3.104300203902919e-06, - "loss": 1.0362, - "step": 4450 - }, - { - "epoch": 0.3345107470314144, - "grad_norm": 1.49292141905715, - "learning_rate": 3.1038942690823556e-06, - "loss": 1.0213, - "step": 4451 - }, - { - "epoch": 0.33458590109724934, - "grad_norm": 1.5828992181579251, - "learning_rate": 3.10348826885183e-06, - "loss": 0.9387, - "step": 4452 - }, - { - "epoch": 0.3346610551630843, - "grad_norm": 1.8796245129972713, - "learning_rate": 3.1030822032353997e-06, - "loss": 1.1273, - "step": 4453 - }, - { - "epoch": 0.3347362092289193, - "grad_norm": 2.554586239362375, - "learning_rate": 3.1026760722571236e-06, - "loss": 0.9303, - "step": 4454 - }, - { - "epoch": 0.33481136329475425, - "grad_norm": 1.6086660508761839, - "learning_rate": 3.1022698759410684e-06, - "loss": 1.0015, - "step": 4455 - }, - { - "epoch": 0.3348865173605892, - "grad_norm": 2.1242611666531004, - "learning_rate": 3.1018636143113022e-06, - "loss": 1.0727, - "step": 4456 - }, - { - "epoch": 0.3349616714264242, - "grad_norm": 1.9380814079886037, - "learning_rate": 3.1014572873918976e-06, - "loss": 0.9862, - "step": 4457 - }, - { - "epoch": 0.3350368254922591, - "grad_norm": 1.8071056860251637, - "learning_rate": 3.101050895206931e-06, - "loss": 1.0581, - "step": 4458 - }, - { - "epoch": 0.3351119795580941, - "grad_norm": 3.525827345915504, - "learning_rate": 3.100644437780482e-06, - "loss": 0.9828, - "step": 4459 - }, - { - "epoch": 0.33518713362392905, - "grad_norm": 1.3960878889732526, - "learning_rate": 3.100237915136636e-06, - "loss": 1.0731, - "step": 4460 - }, - { - "epoch": 0.335262287689764, - "grad_norm": 1.394018554473713, - "learning_rate": 3.0998313272994805e-06, - "loss": 0.8704, - "step": 4461 - }, - { - "epoch": 0.335337441755599, - "grad_norm": 1.5489922108266587, - "learning_rate": 3.0994246742931076e-06, - "loss": 1.0364, - "step": 4462 - }, - { - "epoch": 0.33541259582143396, - "grad_norm": 8.269806638807326, - "learning_rate": 3.099017956141612e-06, - "loss": 1.0076, - "step": 4463 - }, - { - "epoch": 0.3354877498872689, - "grad_norm": 1.7323157655374326, - "learning_rate": 3.098611172869094e-06, - "loss": 1.0004, - "step": 4464 - }, - { - "epoch": 0.33556290395310384, - "grad_norm": 1.8608619450398018, - "learning_rate": 3.0982043244996582e-06, - "loss": 0.9405, - "step": 4465 - }, - { - "epoch": 0.3356380580189388, - "grad_norm": 2.1517011787528686, - "learning_rate": 3.09779741105741e-06, - "loss": 0.9583, - "step": 4466 - }, - { - "epoch": 0.3357132120847738, - "grad_norm": 1.7061485260353086, - "learning_rate": 3.0973904325664615e-06, - "loss": 0.9756, - "step": 4467 - }, - { - "epoch": 0.33578836615060875, - "grad_norm": 2.0406547555900905, - "learning_rate": 3.0969833890509282e-06, - "loss": 0.9338, - "step": 4468 - }, - { - "epoch": 0.3358635202164437, - "grad_norm": 1.6397624336861187, - "learning_rate": 3.096576280534928e-06, - "loss": 0.9881, - "step": 4469 - }, - { - "epoch": 0.3359386742822787, - "grad_norm": 1.7372806712522841, - "learning_rate": 3.096169107042584e-06, - "loss": 0.9689, - "step": 4470 - }, - { - "epoch": 0.3360138283481136, - "grad_norm": 2.4393658465859502, - "learning_rate": 3.0957618685980233e-06, - "loss": 0.9894, - "step": 4471 - }, - { - "epoch": 0.3360889824139486, - "grad_norm": 0.7223707305206428, - "learning_rate": 3.0953545652253763e-06, - "loss": 0.8622, - "step": 4472 - }, - { - "epoch": 0.33616413647978355, - "grad_norm": 2.006202616384567, - "learning_rate": 3.094947196948776e-06, - "loss": 1.0353, - "step": 4473 - }, - { - "epoch": 0.3362392905456185, - "grad_norm": 2.8611429720918333, - "learning_rate": 3.0945397637923617e-06, - "loss": 0.9795, - "step": 4474 - }, - { - "epoch": 0.3363144446114535, - "grad_norm": 1.5774802222122937, - "learning_rate": 3.094132265780275e-06, - "loss": 1.0335, - "step": 4475 - }, - { - "epoch": 0.33638959867728846, - "grad_norm": 1.9548384817773683, - "learning_rate": 3.0937247029366623e-06, - "loss": 1.0438, - "step": 4476 - }, - { - "epoch": 0.33646475274312343, - "grad_norm": 1.625550176943745, - "learning_rate": 3.0933170752856723e-06, - "loss": 0.9771, - "step": 4477 - }, - { - "epoch": 0.33653990680895834, - "grad_norm": 1.9285344682916077, - "learning_rate": 3.0929093828514595e-06, - "loss": 1.0233, - "step": 4478 - }, - { - "epoch": 0.3366150608747933, - "grad_norm": 2.4853380364010618, - "learning_rate": 3.0925016256581805e-06, - "loss": 0.9533, - "step": 4479 - }, - { - "epoch": 0.3366902149406283, - "grad_norm": 2.1166036441261564, - "learning_rate": 3.092093803729997e-06, - "loss": 0.965, - "step": 4480 - }, - { - "epoch": 0.33676536900646326, - "grad_norm": 0.6879712786986494, - "learning_rate": 3.091685917091073e-06, - "loss": 0.7952, - "step": 4481 - }, - { - "epoch": 0.3368405230722982, - "grad_norm": 2.458059220198912, - "learning_rate": 3.0912779657655784e-06, - "loss": 0.9851, - "step": 4482 - }, - { - "epoch": 0.3369156771381332, - "grad_norm": 0.7315984140450884, - "learning_rate": 3.0908699497776864e-06, - "loss": 0.9063, - "step": 4483 - }, - { - "epoch": 0.3369908312039681, - "grad_norm": 1.5097128795969121, - "learning_rate": 3.0904618691515714e-06, - "loss": 0.9554, - "step": 4484 - }, - { - "epoch": 0.3370659852698031, - "grad_norm": 2.0164515257064033, - "learning_rate": 3.0900537239114157e-06, - "loss": 1.0382, - "step": 4485 - }, - { - "epoch": 0.33714113933563805, - "grad_norm": 1.5509591180761675, - "learning_rate": 3.089645514081402e-06, - "loss": 1.1063, - "step": 4486 - }, - { - "epoch": 0.337216293401473, - "grad_norm": 1.783657120636986, - "learning_rate": 3.08923723968572e-06, - "loss": 0.9703, - "step": 4487 - }, - { - "epoch": 0.337291447467308, - "grad_norm": 2.1059603709869474, - "learning_rate": 3.0888289007485605e-06, - "loss": 1.0613, - "step": 4488 - }, - { - "epoch": 0.33736660153314296, - "grad_norm": 1.9883741195963578, - "learning_rate": 3.0884204972941187e-06, - "loss": 1.0295, - "step": 4489 - }, - { - "epoch": 0.3374417555989779, - "grad_norm": 1.9421749933995578, - "learning_rate": 3.088012029346595e-06, - "loss": 1.0081, - "step": 4490 - }, - { - "epoch": 0.33751690966481285, - "grad_norm": 2.343708495726261, - "learning_rate": 3.087603496930192e-06, - "loss": 1.0344, - "step": 4491 - }, - { - "epoch": 0.3375920637306478, - "grad_norm": 5.2313050334410125, - "learning_rate": 3.087194900069117e-06, - "loss": 0.9905, - "step": 4492 - }, - { - "epoch": 0.3376672177964828, - "grad_norm": 1.5614643479380474, - "learning_rate": 3.0867862387875815e-06, - "loss": 0.9551, - "step": 4493 - }, - { - "epoch": 0.33774237186231776, - "grad_norm": 1.4452338944788024, - "learning_rate": 3.0863775131097995e-06, - "loss": 0.9417, - "step": 4494 - }, - { - "epoch": 0.33781752592815273, - "grad_norm": 1.8381722991471525, - "learning_rate": 3.0859687230599897e-06, - "loss": 1.0358, - "step": 4495 - }, - { - "epoch": 0.3378926799939877, - "grad_norm": 1.6284009117594307, - "learning_rate": 3.0855598686623745e-06, - "loss": 1.1321, - "step": 4496 - }, - { - "epoch": 0.3379678340598226, - "grad_norm": 1.6492837478355973, - "learning_rate": 3.085150949941181e-06, - "loss": 1.059, - "step": 4497 - }, - { - "epoch": 0.3380429881256576, - "grad_norm": 1.4867499375793125, - "learning_rate": 3.084741966920638e-06, - "loss": 0.9649, - "step": 4498 - }, - { - "epoch": 0.33811814219149255, - "grad_norm": 1.6618152900634215, - "learning_rate": 3.0843329196249794e-06, - "loss": 1.0339, - "step": 4499 - }, - { - "epoch": 0.3381932962573275, - "grad_norm": 1.64063241139509, - "learning_rate": 3.0839238080784435e-06, - "loss": 0.9925, - "step": 4500 - }, - { - "epoch": 0.3382684503231625, - "grad_norm": 1.9619316833221265, - "learning_rate": 3.083514632305271e-06, - "loss": 0.991, - "step": 4501 - }, - { - "epoch": 0.33834360438899747, - "grad_norm": 1.723541655034599, - "learning_rate": 3.0831053923297074e-06, - "loss": 0.9367, - "step": 4502 - }, - { - "epoch": 0.3384187584548324, - "grad_norm": 1.6919690048318756, - "learning_rate": 3.082696088176002e-06, - "loss": 0.9411, - "step": 4503 - }, - { - "epoch": 0.33849391252066735, - "grad_norm": 1.6948487662755205, - "learning_rate": 3.0822867198684073e-06, - "loss": 0.9865, - "step": 4504 - }, - { - "epoch": 0.3385690665865023, - "grad_norm": 1.538687878244875, - "learning_rate": 3.0818772874311804e-06, - "loss": 0.9641, - "step": 4505 - }, - { - "epoch": 0.3386442206523373, - "grad_norm": 1.7518392162227245, - "learning_rate": 3.081467790888581e-06, - "loss": 1.0024, - "step": 4506 - }, - { - "epoch": 0.33871937471817226, - "grad_norm": 2.242004706079204, - "learning_rate": 3.0810582302648743e-06, - "loss": 0.9816, - "step": 4507 - }, - { - "epoch": 0.33879452878400723, - "grad_norm": 1.7309201859398353, - "learning_rate": 3.0806486055843276e-06, - "loss": 0.9881, - "step": 4508 - }, - { - "epoch": 0.3388696828498422, - "grad_norm": 2.2831058008288445, - "learning_rate": 3.080238916871213e-06, - "loss": 0.9489, - "step": 4509 - }, - { - "epoch": 0.3389448369156771, - "grad_norm": 2.8155537451260124, - "learning_rate": 3.079829164149806e-06, - "loss": 1.0265, - "step": 4510 - }, - { - "epoch": 0.3390199909815121, - "grad_norm": 1.6444689813636237, - "learning_rate": 3.0794193474443866e-06, - "loss": 0.9777, - "step": 4511 - }, - { - "epoch": 0.33909514504734706, - "grad_norm": 1.6892104487241177, - "learning_rate": 3.0790094667792368e-06, - "loss": 1.0145, - "step": 4512 - }, - { - "epoch": 0.339170299113182, - "grad_norm": 2.518130742660871, - "learning_rate": 3.078599522178644e-06, - "loss": 0.9332, - "step": 4513 - }, - { - "epoch": 0.339245453179017, - "grad_norm": 2.3799330025547296, - "learning_rate": 3.0781895136669e-06, - "loss": 0.9105, - "step": 4514 - }, - { - "epoch": 0.33932060724485197, - "grad_norm": 2.0726362898203754, - "learning_rate": 3.077779441268299e-06, - "loss": 0.971, - "step": 4515 - }, - { - "epoch": 0.3393957613106869, - "grad_norm": 1.7279584945842124, - "learning_rate": 3.077369305007138e-06, - "loss": 0.9232, - "step": 4516 - }, - { - "epoch": 0.33947091537652185, - "grad_norm": 3.472299357934987, - "learning_rate": 3.07695910490772e-06, - "loss": 0.9168, - "step": 4517 - }, - { - "epoch": 0.3395460694423568, - "grad_norm": 2.6379873431164578, - "learning_rate": 3.076548840994352e-06, - "loss": 0.8271, - "step": 4518 - }, - { - "epoch": 0.3396212235081918, - "grad_norm": 2.595657750085441, - "learning_rate": 3.076138513291342e-06, - "loss": 0.8613, - "step": 4519 - }, - { - "epoch": 0.33969637757402676, - "grad_norm": 11.935227102828573, - "learning_rate": 3.0757281218230046e-06, - "loss": 0.9135, - "step": 4520 - }, - { - "epoch": 0.33977153163986173, - "grad_norm": 1.4904816948484214, - "learning_rate": 3.0753176666136575e-06, - "loss": 0.979, - "step": 4521 - }, - { - "epoch": 0.3398466857056967, - "grad_norm": 1.6363185493442218, - "learning_rate": 3.0749071476876203e-06, - "loss": 0.9967, - "step": 4522 - }, - { - "epoch": 0.3399218397715316, - "grad_norm": 1.6209830740410323, - "learning_rate": 3.0744965650692184e-06, - "loss": 1.0119, - "step": 4523 - }, - { - "epoch": 0.3399969938373666, - "grad_norm": 1.8310559244787488, - "learning_rate": 3.0740859187827807e-06, - "loss": 0.9517, - "step": 4524 - }, - { - "epoch": 0.34007214790320156, - "grad_norm": 1.9377882343133088, - "learning_rate": 3.0736752088526388e-06, - "loss": 1.0568, - "step": 4525 - }, - { - "epoch": 0.34014730196903653, - "grad_norm": 1.8570201290890518, - "learning_rate": 3.0732644353031304e-06, - "loss": 0.9948, - "step": 4526 - }, - { - "epoch": 0.3402224560348715, - "grad_norm": 1.3897889442209417, - "learning_rate": 3.072853598158594e-06, - "loss": 0.9767, - "step": 4527 - }, - { - "epoch": 0.34029761010070647, - "grad_norm": 1.646898464202211, - "learning_rate": 3.0724426974433737e-06, - "loss": 1.0206, - "step": 4528 - }, - { - "epoch": 0.3403727641665414, - "grad_norm": 1.5868999398410042, - "learning_rate": 3.0720317331818163e-06, - "loss": 0.9411, - "step": 4529 - }, - { - "epoch": 0.34044791823237636, - "grad_norm": 0.7929794611465819, - "learning_rate": 3.071620705398274e-06, - "loss": 0.8834, - "step": 4530 - }, - { - "epoch": 0.3405230722982113, - "grad_norm": 1.457556008756267, - "learning_rate": 3.0712096141171017e-06, - "loss": 1.0719, - "step": 4531 - }, - { - "epoch": 0.3405982263640463, - "grad_norm": 2.195497226748955, - "learning_rate": 3.070798459362658e-06, - "loss": 1.0985, - "step": 4532 - }, - { - "epoch": 0.34067338042988127, - "grad_norm": 2.429656000945086, - "learning_rate": 3.070387241159305e-06, - "loss": 0.8483, - "step": 4533 - }, - { - "epoch": 0.34074853449571624, - "grad_norm": 2.0596742635425183, - "learning_rate": 3.069975959531408e-06, - "loss": 1.0318, - "step": 4534 - }, - { - "epoch": 0.34082368856155115, - "grad_norm": 1.387638356639268, - "learning_rate": 3.0695646145033404e-06, - "loss": 0.9717, - "step": 4535 - }, - { - "epoch": 0.3408988426273861, - "grad_norm": 1.442426449703823, - "learning_rate": 3.0691532060994722e-06, - "loss": 0.9216, - "step": 4536 - }, - { - "epoch": 0.3409739966932211, - "grad_norm": 1.7378269244759657, - "learning_rate": 3.068741734344183e-06, - "loss": 0.9643, - "step": 4537 - }, - { - "epoch": 0.34104915075905606, - "grad_norm": 1.4534673800172366, - "learning_rate": 3.0683301992618538e-06, - "loss": 1.0268, - "step": 4538 - }, - { - "epoch": 0.34112430482489103, - "grad_norm": 2.297934774245033, - "learning_rate": 3.067918600876869e-06, - "loss": 1.0336, - "step": 4539 - }, - { - "epoch": 0.341199458890726, - "grad_norm": 0.8077923032497364, - "learning_rate": 3.067506939213617e-06, - "loss": 0.8824, - "step": 4540 - }, - { - "epoch": 0.341274612956561, - "grad_norm": 2.0371682441083423, - "learning_rate": 3.067095214296492e-06, - "loss": 1.0034, - "step": 4541 - }, - { - "epoch": 0.3413497670223959, - "grad_norm": 4.700141441976503, - "learning_rate": 3.066683426149889e-06, - "loss": 0.9872, - "step": 4542 - }, - { - "epoch": 0.34142492108823086, - "grad_norm": 1.6695769729029795, - "learning_rate": 3.066271574798209e-06, - "loss": 0.9938, - "step": 4543 - }, - { - "epoch": 0.34150007515406583, - "grad_norm": 2.397244930442186, - "learning_rate": 3.0658596602658548e-06, - "loss": 0.9142, - "step": 4544 - }, - { - "epoch": 0.3415752292199008, - "grad_norm": 1.8421185274291936, - "learning_rate": 3.0654476825772338e-06, - "loss": 1.0225, - "step": 4545 - }, - { - "epoch": 0.34165038328573577, - "grad_norm": 2.4325039843755007, - "learning_rate": 3.0650356417567586e-06, - "loss": 1.0011, - "step": 4546 - }, - { - "epoch": 0.34172553735157074, - "grad_norm": 1.8638702677165946, - "learning_rate": 3.064623537828843e-06, - "loss": 1.0381, - "step": 4547 - }, - { - "epoch": 0.34180069141740566, - "grad_norm": 2.1628249172366285, - "learning_rate": 3.0642113708179062e-06, - "loss": 1.007, - "step": 4548 - }, - { - "epoch": 0.3418758454832406, - "grad_norm": 1.8976630997956216, - "learning_rate": 3.0637991407483706e-06, - "loss": 1.0321, - "step": 4549 - }, - { - "epoch": 0.3419509995490756, - "grad_norm": 2.359177739063377, - "learning_rate": 3.0633868476446615e-06, - "loss": 0.9466, - "step": 4550 - }, - { - "epoch": 0.34202615361491057, - "grad_norm": 1.7105805882635556, - "learning_rate": 3.062974491531211e-06, - "loss": 1.0033, - "step": 4551 - }, - { - "epoch": 0.34210130768074554, - "grad_norm": 1.741664585617258, - "learning_rate": 3.06256207243245e-06, - "loss": 1.1033, - "step": 4552 - }, - { - "epoch": 0.3421764617465805, - "grad_norm": 1.797238255147091, - "learning_rate": 3.0621495903728177e-06, - "loss": 0.9697, - "step": 4553 - }, - { - "epoch": 0.3422516158124155, - "grad_norm": 1.4692758471071994, - "learning_rate": 3.061737045376756e-06, - "loss": 0.9064, - "step": 4554 - }, - { - "epoch": 0.3423267698782504, - "grad_norm": 1.6970621388691056, - "learning_rate": 3.061324437468708e-06, - "loss": 0.8927, - "step": 4555 - }, - { - "epoch": 0.34240192394408536, - "grad_norm": 1.382514274895973, - "learning_rate": 3.060911766673123e-06, - "loss": 1.1044, - "step": 4556 - }, - { - "epoch": 0.34247707800992033, - "grad_norm": 1.8377411119736267, - "learning_rate": 3.0604990330144537e-06, - "loss": 1.0104, - "step": 4557 - }, - { - "epoch": 0.3425522320757553, - "grad_norm": 1.6887179324072343, - "learning_rate": 3.0600862365171553e-06, - "loss": 1.0413, - "step": 4558 - }, - { - "epoch": 0.3426273861415903, - "grad_norm": 1.7752903741076533, - "learning_rate": 3.0596733772056884e-06, - "loss": 1.0134, - "step": 4559 - }, - { - "epoch": 0.34270254020742524, - "grad_norm": 2.4953490864287646, - "learning_rate": 3.0592604551045157e-06, - "loss": 0.9734, - "step": 4560 - }, - { - "epoch": 0.34277769427326016, - "grad_norm": 2.0906378547489934, - "learning_rate": 3.0588474702381055e-06, - "loss": 0.9833, - "step": 4561 - }, - { - "epoch": 0.34285284833909513, - "grad_norm": 2.007180556063019, - "learning_rate": 3.0584344226309277e-06, - "loss": 0.9868, - "step": 4562 - }, - { - "epoch": 0.3429280024049301, - "grad_norm": 1.9010780087136991, - "learning_rate": 3.0580213123074573e-06, - "loss": 0.9028, - "step": 4563 - }, - { - "epoch": 0.34300315647076507, - "grad_norm": 0.7143901365450398, - "learning_rate": 3.0576081392921723e-06, - "loss": 0.8665, - "step": 4564 - }, - { - "epoch": 0.34307831053660004, - "grad_norm": 1.3597244602756873, - "learning_rate": 3.057194903609556e-06, - "loss": 0.9953, - "step": 4565 - }, - { - "epoch": 0.343153464602435, - "grad_norm": 1.337899312457538, - "learning_rate": 3.056781605284093e-06, - "loss": 0.8086, - "step": 4566 - }, - { - "epoch": 0.34322861866827, - "grad_norm": 1.7922454239282284, - "learning_rate": 3.056368244340273e-06, - "loss": 1.0921, - "step": 4567 - }, - { - "epoch": 0.3433037727341049, - "grad_norm": 4.24085620764267, - "learning_rate": 3.05595482080259e-06, - "loss": 0.9898, - "step": 4568 - }, - { - "epoch": 0.34337892679993987, - "grad_norm": 1.7623459741913372, - "learning_rate": 3.05554133469554e-06, - "loss": 1.0429, - "step": 4569 - }, - { - "epoch": 0.34345408086577484, - "grad_norm": 1.9463346316165626, - "learning_rate": 3.055127786043624e-06, - "loss": 0.975, - "step": 4570 - }, - { - "epoch": 0.3435292349316098, - "grad_norm": 1.8493655405579368, - "learning_rate": 3.0547141748713463e-06, - "loss": 0.9431, - "step": 4571 - }, - { - "epoch": 0.3436043889974448, - "grad_norm": 2.500314502007248, - "learning_rate": 3.0543005012032152e-06, - "loss": 0.8894, - "step": 4572 - }, - { - "epoch": 0.34367954306327975, - "grad_norm": 1.6424823238635178, - "learning_rate": 3.0538867650637416e-06, - "loss": 1.0547, - "step": 4573 - }, - { - "epoch": 0.34375469712911466, - "grad_norm": 0.7353412683179277, - "learning_rate": 3.053472966477442e-06, - "loss": 0.9113, - "step": 4574 - }, - { - "epoch": 0.34382985119494963, - "grad_norm": 1.3605695277389196, - "learning_rate": 3.053059105468835e-06, - "loss": 1.0746, - "step": 4575 - }, - { - "epoch": 0.3439050052607846, - "grad_norm": 2.2152523356869755, - "learning_rate": 3.052645182062444e-06, - "loss": 0.9731, - "step": 4576 - }, - { - "epoch": 0.3439801593266196, - "grad_norm": 2.053609703133771, - "learning_rate": 3.052231196282795e-06, - "loss": 0.9796, - "step": 4577 - }, - { - "epoch": 0.34405531339245454, - "grad_norm": 0.7095558238525606, - "learning_rate": 3.051817148154418e-06, - "loss": 0.8392, - "step": 4578 - }, - { - "epoch": 0.3441304674582895, - "grad_norm": 1.4893874015083686, - "learning_rate": 3.0514030377018473e-06, - "loss": 0.9988, - "step": 4579 - }, - { - "epoch": 0.3442056215241244, - "grad_norm": 1.4038379752360033, - "learning_rate": 3.0509888649496204e-06, - "loss": 0.9297, - "step": 4580 - }, - { - "epoch": 0.3442807755899594, - "grad_norm": 2.140507381577073, - "learning_rate": 3.05057462992228e-06, - "loss": 1.1101, - "step": 4581 - }, - { - "epoch": 0.34435592965579437, - "grad_norm": 2.1363156791041282, - "learning_rate": 3.050160332644368e-06, - "loss": 0.9337, - "step": 4582 - }, - { - "epoch": 0.34443108372162934, - "grad_norm": 1.1970535762623342, - "learning_rate": 3.0497459731404364e-06, - "loss": 0.9854, - "step": 4583 - }, - { - "epoch": 0.3445062377874643, - "grad_norm": 1.5860819633345822, - "learning_rate": 3.049331551435035e-06, - "loss": 1.0104, - "step": 4584 - }, - { - "epoch": 0.3445813918532993, - "grad_norm": 1.647721410758853, - "learning_rate": 3.048917067552722e-06, - "loss": 0.9836, - "step": 4585 - }, - { - "epoch": 0.34465654591913425, - "grad_norm": 2.056849281616219, - "learning_rate": 3.0485025215180554e-06, - "loss": 0.983, - "step": 4586 - }, - { - "epoch": 0.34473169998496916, - "grad_norm": 2.007766544988741, - "learning_rate": 3.0480879133556e-06, - "loss": 1.0177, - "step": 4587 - }, - { - "epoch": 0.34480685405080413, - "grad_norm": 1.8015843253620911, - "learning_rate": 3.047673243089922e-06, - "loss": 0.9727, - "step": 4588 - }, - { - "epoch": 0.3448820081166391, - "grad_norm": 1.5800327817920348, - "learning_rate": 3.047258510745593e-06, - "loss": 1.0255, - "step": 4589 - }, - { - "epoch": 0.3449571621824741, - "grad_norm": 3.5576813886003014, - "learning_rate": 3.046843716347187e-06, - "loss": 0.9814, - "step": 4590 - }, - { - "epoch": 0.34503231624830905, - "grad_norm": 1.52561466462019, - "learning_rate": 3.046428859919281e-06, - "loss": 1.0559, - "step": 4591 - }, - { - "epoch": 0.345107470314144, - "grad_norm": 1.6319575649662916, - "learning_rate": 3.0460139414864593e-06, - "loss": 0.8723, - "step": 4592 - }, - { - "epoch": 0.34518262437997893, - "grad_norm": 1.992976312985607, - "learning_rate": 3.0455989610733057e-06, - "loss": 0.9874, - "step": 4593 - }, - { - "epoch": 0.3452577784458139, - "grad_norm": 1.70411723219873, - "learning_rate": 3.0451839187044095e-06, - "loss": 1.0065, - "step": 4594 - }, - { - "epoch": 0.34533293251164887, - "grad_norm": 1.523296956157329, - "learning_rate": 3.0447688144043636e-06, - "loss": 1.0062, - "step": 4595 - }, - { - "epoch": 0.34540808657748384, - "grad_norm": 3.4797894192703414, - "learning_rate": 3.0443536481977657e-06, - "loss": 0.9742, - "step": 4596 - }, - { - "epoch": 0.3454832406433188, - "grad_norm": 1.8605439174259377, - "learning_rate": 3.0439384201092145e-06, - "loss": 1.0375, - "step": 4597 - }, - { - "epoch": 0.3455583947091538, - "grad_norm": 1.4456918579421125, - "learning_rate": 3.0435231301633147e-06, - "loss": 0.9808, - "step": 4598 - }, - { - "epoch": 0.34563354877498875, - "grad_norm": 1.7485400649627412, - "learning_rate": 3.043107778384673e-06, - "loss": 0.9967, - "step": 4599 - }, - { - "epoch": 0.34570870284082367, - "grad_norm": 1.6399516585831497, - "learning_rate": 3.0426923647979016e-06, - "loss": 1.0505, - "step": 4600 - }, - { - "epoch": 0.34578385690665864, - "grad_norm": 0.8314670207742965, - "learning_rate": 3.042276889427615e-06, - "loss": 0.8962, - "step": 4601 - }, - { - "epoch": 0.3458590109724936, - "grad_norm": 1.7376323045813469, - "learning_rate": 3.041861352298431e-06, - "loss": 1.0082, - "step": 4602 - }, - { - "epoch": 0.3459341650383286, - "grad_norm": 1.8259718515121333, - "learning_rate": 3.0414457534349727e-06, - "loss": 1.0304, - "step": 4603 - }, - { - "epoch": 0.34600931910416355, - "grad_norm": 2.9470730889935726, - "learning_rate": 3.041030092861866e-06, - "loss": 0.8972, - "step": 4604 - }, - { - "epoch": 0.3460844731699985, - "grad_norm": 2.364539449903542, - "learning_rate": 3.0406143706037384e-06, - "loss": 0.9043, - "step": 4605 - }, - { - "epoch": 0.34615962723583343, - "grad_norm": 1.756712505760723, - "learning_rate": 3.040198586685226e-06, - "loss": 0.9935, - "step": 4606 - }, - { - "epoch": 0.3462347813016684, - "grad_norm": 1.5962206072629126, - "learning_rate": 3.0397827411309632e-06, - "loss": 0.8538, - "step": 4607 - }, - { - "epoch": 0.3463099353675034, - "grad_norm": 1.664151113237196, - "learning_rate": 3.0393668339655917e-06, - "loss": 0.9676, - "step": 4608 - }, - { - "epoch": 0.34638508943333834, - "grad_norm": 2.1940068064734453, - "learning_rate": 3.0389508652137555e-06, - "loss": 0.9178, - "step": 4609 - }, - { - "epoch": 0.3464602434991733, - "grad_norm": 1.8189017268598864, - "learning_rate": 3.0385348349001023e-06, - "loss": 1.0475, - "step": 4610 - }, - { - "epoch": 0.3465353975650083, - "grad_norm": 2.064236690035276, - "learning_rate": 3.038118743049283e-06, - "loss": 1.1048, - "step": 4611 - }, - { - "epoch": 0.34661055163084326, - "grad_norm": 1.287108170967523, - "learning_rate": 3.0377025896859532e-06, - "loss": 1.0357, - "step": 4612 - }, - { - "epoch": 0.34668570569667817, - "grad_norm": 0.650964890085544, - "learning_rate": 3.037286374834771e-06, - "loss": 0.7657, - "step": 4613 - }, - { - "epoch": 0.34676085976251314, - "grad_norm": 1.7577067025453512, - "learning_rate": 3.036870098520399e-06, - "loss": 1.1141, - "step": 4614 - }, - { - "epoch": 0.3468360138283481, - "grad_norm": 1.3022051741497096, - "learning_rate": 3.036453760767504e-06, - "loss": 0.9207, - "step": 4615 - }, - { - "epoch": 0.3469111678941831, - "grad_norm": 1.4480275813737344, - "learning_rate": 3.036037361600754e-06, - "loss": 0.9415, - "step": 4616 - }, - { - "epoch": 0.34698632196001805, - "grad_norm": 1.6892361363768915, - "learning_rate": 3.0356209010448234e-06, - "loss": 0.9311, - "step": 4617 - }, - { - "epoch": 0.347061476025853, - "grad_norm": 2.2483214368946123, - "learning_rate": 3.0352043791243886e-06, - "loss": 1.0532, - "step": 4618 - }, - { - "epoch": 0.34713663009168794, - "grad_norm": 1.420321275874892, - "learning_rate": 3.0347877958641303e-06, - "loss": 0.9875, - "step": 4619 - }, - { - "epoch": 0.3472117841575229, - "grad_norm": 1.7204813473307583, - "learning_rate": 3.0343711512887325e-06, - "loss": 0.9954, - "step": 4620 - }, - { - "epoch": 0.3472869382233579, - "grad_norm": 1.4841471446982906, - "learning_rate": 3.0339544454228836e-06, - "loss": 1.0825, - "step": 4621 - }, - { - "epoch": 0.34736209228919285, - "grad_norm": 3.2365943555156496, - "learning_rate": 3.0335376782912742e-06, - "loss": 1.0171, - "step": 4622 - }, - { - "epoch": 0.3474372463550278, - "grad_norm": 1.4381742140245923, - "learning_rate": 3.0331208499185996e-06, - "loss": 0.969, - "step": 4623 - }, - { - "epoch": 0.3475124004208628, - "grad_norm": 1.4733558834444567, - "learning_rate": 3.0327039603295587e-06, - "loss": 1.0297, - "step": 4624 - }, - { - "epoch": 0.3475875544866977, - "grad_norm": 2.5118871368395945, - "learning_rate": 3.032287009548853e-06, - "loss": 1.0071, - "step": 4625 - }, - { - "epoch": 0.3476627085525327, - "grad_norm": 2.0949640881867726, - "learning_rate": 3.03186999760119e-06, - "loss": 0.9448, - "step": 4626 - }, - { - "epoch": 0.34773786261836764, - "grad_norm": 1.5449368443589728, - "learning_rate": 3.031452924511279e-06, - "loss": 1.0444, - "step": 4627 - }, - { - "epoch": 0.3478130166842026, - "grad_norm": 4.356414774291923, - "learning_rate": 3.031035790303831e-06, - "loss": 0.9917, - "step": 4628 - }, - { - "epoch": 0.3478881707500376, - "grad_norm": 1.9323404585306805, - "learning_rate": 3.030618595003565e-06, - "loss": 0.9641, - "step": 4629 - }, - { - "epoch": 0.34796332481587255, - "grad_norm": 2.2489931184859304, - "learning_rate": 3.0302013386352004e-06, - "loss": 0.9227, - "step": 4630 - }, - { - "epoch": 0.3480384788817075, - "grad_norm": 1.7935150577454373, - "learning_rate": 3.0297840212234623e-06, - "loss": 0.9147, - "step": 4631 - }, - { - "epoch": 0.34811363294754244, - "grad_norm": 2.359591139436321, - "learning_rate": 3.029366642793077e-06, - "loss": 1.0098, - "step": 4632 - }, - { - "epoch": 0.3481887870133774, - "grad_norm": 1.5241067052315873, - "learning_rate": 3.0289492033687768e-06, - "loss": 1.0359, - "step": 4633 - }, - { - "epoch": 0.3482639410792124, - "grad_norm": 1.7435370722013637, - "learning_rate": 3.0285317029752957e-06, - "loss": 0.9283, - "step": 4634 - }, - { - "epoch": 0.34833909514504735, - "grad_norm": 1.740837497855331, - "learning_rate": 3.028114141637373e-06, - "loss": 0.9873, - "step": 4635 - }, - { - "epoch": 0.3484142492108823, - "grad_norm": 1.6707321967181439, - "learning_rate": 3.0276965193797503e-06, - "loss": 1.135, - "step": 4636 - }, - { - "epoch": 0.3484894032767173, - "grad_norm": 2.2976033619407135, - "learning_rate": 3.0272788362271743e-06, - "loss": 1.0027, - "step": 4637 - }, - { - "epoch": 0.3485645573425522, - "grad_norm": 0.7678007235029528, - "learning_rate": 3.0268610922043925e-06, - "loss": 0.8424, - "step": 4638 - }, - { - "epoch": 0.3486397114083872, - "grad_norm": 1.6098391409890087, - "learning_rate": 3.0264432873361594e-06, - "loss": 0.968, - "step": 4639 - }, - { - "epoch": 0.34871486547422215, - "grad_norm": 1.624506068356006, - "learning_rate": 3.026025421647231e-06, - "loss": 1.0595, - "step": 4640 - }, - { - "epoch": 0.3487900195400571, - "grad_norm": 2.3616522100414885, - "learning_rate": 3.025607495162367e-06, - "loss": 0.996, - "step": 4641 - }, - { - "epoch": 0.3488651736058921, - "grad_norm": 2.212349807900159, - "learning_rate": 3.025189507906332e-06, - "loss": 1.0131, - "step": 4642 - }, - { - "epoch": 0.34894032767172706, - "grad_norm": 1.8680116757797873, - "learning_rate": 3.0247714599038936e-06, - "loss": 0.9837, - "step": 4643 - }, - { - "epoch": 0.349015481737562, - "grad_norm": 1.3245171404860279, - "learning_rate": 3.0243533511798205e-06, - "loss": 0.9925, - "step": 4644 - }, - { - "epoch": 0.34909063580339694, - "grad_norm": 1.6641756834124306, - "learning_rate": 3.0239351817588903e-06, - "loss": 0.9141, - "step": 4645 - }, - { - "epoch": 0.3491657898692319, - "grad_norm": 1.444360698000238, - "learning_rate": 3.023516951665879e-06, - "loss": 1.0098, - "step": 4646 - }, - { - "epoch": 0.3492409439350669, - "grad_norm": 1.8072641977060406, - "learning_rate": 3.0230986609255687e-06, - "loss": 1.0147, - "step": 4647 - }, - { - "epoch": 0.34931609800090185, - "grad_norm": 1.9782962357348386, - "learning_rate": 3.022680309562746e-06, - "loss": 1.0937, - "step": 4648 - }, - { - "epoch": 0.3493912520667368, - "grad_norm": 1.681598233033113, - "learning_rate": 3.022261897602198e-06, - "loss": 0.8924, - "step": 4649 - }, - { - "epoch": 0.3494664061325718, - "grad_norm": 1.633381648549262, - "learning_rate": 3.0218434250687184e-06, - "loss": 0.9667, - "step": 4650 - }, - { - "epoch": 0.3495415601984067, - "grad_norm": 1.857411221093591, - "learning_rate": 3.021424891987103e-06, - "loss": 0.9562, - "step": 4651 - }, - { - "epoch": 0.3496167142642417, - "grad_norm": 2.079308079845761, - "learning_rate": 3.0210062983821513e-06, - "loss": 0.9615, - "step": 4652 - }, - { - "epoch": 0.34969186833007665, - "grad_norm": 1.8419856665443965, - "learning_rate": 3.0205876442786666e-06, - "loss": 0.9588, - "step": 4653 - }, - { - "epoch": 0.3497670223959116, - "grad_norm": 2.60087131792333, - "learning_rate": 3.0201689297014565e-06, - "loss": 0.9698, - "step": 4654 - }, - { - "epoch": 0.3498421764617466, - "grad_norm": 2.435421277366257, - "learning_rate": 3.01975015467533e-06, - "loss": 1.0169, - "step": 4655 - }, - { - "epoch": 0.34991733052758156, - "grad_norm": 1.4943581017687901, - "learning_rate": 3.019331319225103e-06, - "loss": 1.0263, - "step": 4656 - }, - { - "epoch": 0.34999248459341653, - "grad_norm": 3.247451222958646, - "learning_rate": 3.018912423375591e-06, - "loss": 0.9667, - "step": 4657 - }, - { - "epoch": 0.35006763865925145, - "grad_norm": 1.5920955073591605, - "learning_rate": 3.018493467151616e-06, - "loss": 0.9672, - "step": 4658 - }, - { - "epoch": 0.3501427927250864, - "grad_norm": 3.732893390505597, - "learning_rate": 3.0180744505780045e-06, - "loss": 0.8768, - "step": 4659 - }, - { - "epoch": 0.3502179467909214, - "grad_norm": 1.8747665259913138, - "learning_rate": 3.0176553736795827e-06, - "loss": 0.9822, - "step": 4660 - }, - { - "epoch": 0.35029310085675636, - "grad_norm": 1.4613358877506577, - "learning_rate": 3.0172362364811827e-06, - "loss": 0.9924, - "step": 4661 - }, - { - "epoch": 0.3503682549225913, - "grad_norm": 1.8848753585890983, - "learning_rate": 3.016817039007641e-06, - "loss": 1.0049, - "step": 4662 - }, - { - "epoch": 0.3504434089884263, - "grad_norm": 1.7678529304465218, - "learning_rate": 3.0163977812837954e-06, - "loss": 1.0181, - "step": 4663 - }, - { - "epoch": 0.3505185630542612, - "grad_norm": 1.7071487072093665, - "learning_rate": 3.0159784633344894e-06, - "loss": 0.9111, - "step": 4664 - }, - { - "epoch": 0.3505937171200962, - "grad_norm": 0.6901779255346696, - "learning_rate": 3.0155590851845694e-06, - "loss": 0.8317, - "step": 4665 - }, - { - "epoch": 0.35066887118593115, - "grad_norm": 1.5879214801113124, - "learning_rate": 3.0151396468588844e-06, - "loss": 0.9903, - "step": 4666 - }, - { - "epoch": 0.3507440252517661, - "grad_norm": 2.3667778441234906, - "learning_rate": 3.0147201483822884e-06, - "loss": 1.0433, - "step": 4667 - }, - { - "epoch": 0.3508191793176011, - "grad_norm": 1.4138040042886866, - "learning_rate": 3.014300589779638e-06, - "loss": 1.0001, - "step": 4668 - }, - { - "epoch": 0.35089433338343606, - "grad_norm": 4.011141281390181, - "learning_rate": 3.0138809710757927e-06, - "loss": 1.0109, - "step": 4669 - }, - { - "epoch": 0.350969487449271, - "grad_norm": 1.9989837806543638, - "learning_rate": 3.013461292295619e-06, - "loss": 0.9884, - "step": 4670 - }, - { - "epoch": 0.35104464151510595, - "grad_norm": 1.9551536705520163, - "learning_rate": 3.013041553463982e-06, - "loss": 1.0514, - "step": 4671 - }, - { - "epoch": 0.3511197955809409, - "grad_norm": 1.5200729385242795, - "learning_rate": 3.012621754605754e-06, - "loss": 0.8502, - "step": 4672 - }, - { - "epoch": 0.3511949496467759, - "grad_norm": 1.3346968126519168, - "learning_rate": 3.012201895745809e-06, - "loss": 0.9047, - "step": 4673 - }, - { - "epoch": 0.35127010371261086, - "grad_norm": 1.8521829201435884, - "learning_rate": 3.011781976909026e-06, - "loss": 0.9731, - "step": 4674 - }, - { - "epoch": 0.35134525777844583, - "grad_norm": 2.505569453129237, - "learning_rate": 3.011361998120287e-06, - "loss": 0.9461, - "step": 4675 - }, - { - "epoch": 0.3514204118442808, - "grad_norm": 1.6628179776697065, - "learning_rate": 3.0109419594044765e-06, - "loss": 1.0498, - "step": 4676 - }, - { - "epoch": 0.3514955659101157, - "grad_norm": 1.8908853272840505, - "learning_rate": 3.0105218607864835e-06, - "loss": 1.0763, - "step": 4677 - }, - { - "epoch": 0.3515707199759507, - "grad_norm": 1.621730449607481, - "learning_rate": 3.010101702291201e-06, - "loss": 1.0372, - "step": 4678 - }, - { - "epoch": 0.35164587404178566, - "grad_norm": 5.324892019690381, - "learning_rate": 3.0096814839435244e-06, - "loss": 0.949, - "step": 4679 - }, - { - "epoch": 0.3517210281076206, - "grad_norm": 1.571551237644355, - "learning_rate": 3.0092612057683532e-06, - "loss": 0.9289, - "step": 4680 - }, - { - "epoch": 0.3517961821734556, - "grad_norm": 1.2487892963385698, - "learning_rate": 3.0088408677905913e-06, - "loss": 1.0932, - "step": 4681 - }, - { - "epoch": 0.35187133623929057, - "grad_norm": 2.50556612268641, - "learning_rate": 3.0084204700351453e-06, - "loss": 0.8776, - "step": 4682 - }, - { - "epoch": 0.3519464903051255, - "grad_norm": 1.6153786535991443, - "learning_rate": 3.0080000125269242e-06, - "loss": 0.9769, - "step": 4683 - }, - { - "epoch": 0.35202164437096045, - "grad_norm": 1.5287529963591087, - "learning_rate": 3.0075794952908436e-06, - "loss": 0.8977, - "step": 4684 - }, - { - "epoch": 0.3520967984367954, - "grad_norm": 3.2056551732878034, - "learning_rate": 3.007158918351818e-06, - "loss": 0.9761, - "step": 4685 - }, - { - "epoch": 0.3521719525026304, - "grad_norm": 1.7842410222677914, - "learning_rate": 3.0067382817347712e-06, - "loss": 0.9073, - "step": 4686 - }, - { - "epoch": 0.35224710656846536, - "grad_norm": 1.4509102956541304, - "learning_rate": 3.006317585464626e-06, - "loss": 1.0592, - "step": 4687 - }, - { - "epoch": 0.35232226063430033, - "grad_norm": 1.6244053119269162, - "learning_rate": 3.0058968295663094e-06, - "loss": 1.0567, - "step": 4688 - }, - { - "epoch": 0.3523974147001353, - "grad_norm": 1.6679484524709083, - "learning_rate": 3.0054760140647547e-06, - "loss": 1.0158, - "step": 4689 - }, - { - "epoch": 0.3524725687659702, - "grad_norm": 1.8494108550736523, - "learning_rate": 3.005055138984896e-06, - "loss": 1.0356, - "step": 4690 - }, - { - "epoch": 0.3525477228318052, - "grad_norm": 1.477745510687581, - "learning_rate": 3.0046342043516707e-06, - "loss": 1.024, - "step": 4691 - }, - { - "epoch": 0.35262287689764016, - "grad_norm": 2.038120329803853, - "learning_rate": 3.0042132101900228e-06, - "loss": 1.0218, - "step": 4692 - }, - { - "epoch": 0.35269803096347513, - "grad_norm": 1.715217497466106, - "learning_rate": 3.003792156524897e-06, - "loss": 0.9559, - "step": 4693 - }, - { - "epoch": 0.3527731850293101, - "grad_norm": 0.7360318749045787, - "learning_rate": 3.003371043381241e-06, - "loss": 0.852, - "step": 4694 - }, - { - "epoch": 0.35284833909514507, - "grad_norm": 1.801615755430512, - "learning_rate": 3.0029498707840094e-06, - "loss": 1.0393, - "step": 4695 - }, - { - "epoch": 0.35292349316098, - "grad_norm": 2.00876057724699, - "learning_rate": 3.002528638758157e-06, - "loss": 1.0129, - "step": 4696 - }, - { - "epoch": 0.35299864722681495, - "grad_norm": 2.1662505068065756, - "learning_rate": 3.0021073473286446e-06, - "loss": 0.8403, - "step": 4697 - }, - { - "epoch": 0.3530738012926499, - "grad_norm": 1.6191018977670255, - "learning_rate": 3.0016859965204336e-06, - "loss": 1.0244, - "step": 4698 - }, - { - "epoch": 0.3531489553584849, - "grad_norm": 0.6471492655837383, - "learning_rate": 3.001264586358492e-06, - "loss": 0.8138, - "step": 4699 - }, - { - "epoch": 0.35322410942431987, - "grad_norm": 2.1036495920519025, - "learning_rate": 3.0008431168677898e-06, - "loss": 1.0348, - "step": 4700 - }, - { - "epoch": 0.35329926349015484, - "grad_norm": 2.0890442076333695, - "learning_rate": 3.0004215880732993e-06, - "loss": 1.037, - "step": 4701 - }, - { - "epoch": 0.3533744175559898, - "grad_norm": 13.833748746096164, - "learning_rate": 3e-06, - "loss": 1.0237, - "step": 4702 - }, - { - "epoch": 0.3534495716218247, - "grad_norm": 1.368777500381467, - "learning_rate": 2.999578352672871e-06, - "loss": 1.0541, - "step": 4703 - }, - { - "epoch": 0.3535247256876597, - "grad_norm": 1.343199018140301, - "learning_rate": 2.9991566461168974e-06, - "loss": 0.9733, - "step": 4704 - }, - { - "epoch": 0.35359987975349466, - "grad_norm": 13.730581547367404, - "learning_rate": 2.998734880357066e-06, - "loss": 0.8867, - "step": 4705 - }, - { - "epoch": 0.35367503381932963, - "grad_norm": 1.7658973416292056, - "learning_rate": 2.998313055418369e-06, - "loss": 1.1093, - "step": 4706 - }, - { - "epoch": 0.3537501878851646, - "grad_norm": 2.351700281308026, - "learning_rate": 2.9978911713257998e-06, - "loss": 1.0685, - "step": 4707 - }, - { - "epoch": 0.3538253419509996, - "grad_norm": 1.6188824750385868, - "learning_rate": 2.997469228104358e-06, - "loss": 0.973, - "step": 4708 - }, - { - "epoch": 0.3539004960168345, - "grad_norm": 1.4382431762894154, - "learning_rate": 2.9970472257790454e-06, - "loss": 0.993, - "step": 4709 - }, - { - "epoch": 0.35397565008266946, - "grad_norm": 1.9936867252461836, - "learning_rate": 2.996625164374866e-06, - "loss": 1.0249, - "step": 4710 - }, - { - "epoch": 0.3540508041485044, - "grad_norm": 0.6223187392196825, - "learning_rate": 2.9962030439168297e-06, - "loss": 0.8492, - "step": 4711 - }, - { - "epoch": 0.3541259582143394, - "grad_norm": 1.6817471673158186, - "learning_rate": 2.995780864429948e-06, - "loss": 0.7715, - "step": 4712 - }, - { - "epoch": 0.35420111228017437, - "grad_norm": 1.2719859175622281, - "learning_rate": 2.9953586259392366e-06, - "loss": 0.9238, - "step": 4713 - }, - { - "epoch": 0.35427626634600934, - "grad_norm": 1.79614096866114, - "learning_rate": 2.994936328469716e-06, - "loss": 1.032, - "step": 4714 - }, - { - "epoch": 0.35435142041184425, - "grad_norm": 2.41599648050974, - "learning_rate": 2.9945139720464082e-06, - "loss": 0.9702, - "step": 4715 - }, - { - "epoch": 0.3544265744776792, - "grad_norm": 1.899945574533056, - "learning_rate": 2.9940915566943384e-06, - "loss": 0.9439, - "step": 4716 - }, - { - "epoch": 0.3545017285435142, - "grad_norm": 1.5923987251309215, - "learning_rate": 2.9936690824385383e-06, - "loss": 1.0874, - "step": 4717 - }, - { - "epoch": 0.35457688260934916, - "grad_norm": 1.7836141457312624, - "learning_rate": 2.9932465493040393e-06, - "loss": 0.9656, - "step": 4718 - }, - { - "epoch": 0.35465203667518413, - "grad_norm": 1.6172385829652864, - "learning_rate": 2.992823957315879e-06, - "loss": 1.0115, - "step": 4719 - }, - { - "epoch": 0.3547271907410191, - "grad_norm": 2.489649326189337, - "learning_rate": 2.9924013064990974e-06, - "loss": 1.0655, - "step": 4720 - }, - { - "epoch": 0.3548023448068541, - "grad_norm": 2.068037978895291, - "learning_rate": 2.9919785968787384e-06, - "loss": 1.0606, - "step": 4721 - }, - { - "epoch": 0.354877498872689, - "grad_norm": 1.9600168323767204, - "learning_rate": 2.991555828479849e-06, - "loss": 1.0032, - "step": 4722 - }, - { - "epoch": 0.35495265293852396, - "grad_norm": 1.482562032112616, - "learning_rate": 2.9911330013274792e-06, - "loss": 0.9275, - "step": 4723 - }, - { - "epoch": 0.35502780700435893, - "grad_norm": 0.6169553634025212, - "learning_rate": 2.990710115446684e-06, - "loss": 0.7719, - "step": 4724 - }, - { - "epoch": 0.3551029610701939, - "grad_norm": 3.767173860098829, - "learning_rate": 2.9902871708625216e-06, - "loss": 1.0901, - "step": 4725 - }, - { - "epoch": 0.35517811513602887, - "grad_norm": 1.798508672689611, - "learning_rate": 2.9898641676000518e-06, - "loss": 0.9428, - "step": 4726 - }, - { - "epoch": 0.35525326920186384, - "grad_norm": 1.6226610076202748, - "learning_rate": 2.9894411056843396e-06, - "loss": 0.9967, - "step": 4727 - }, - { - "epoch": 0.35532842326769876, - "grad_norm": 1.7402204005646824, - "learning_rate": 2.9890179851404533e-06, - "loss": 0.9535, - "step": 4728 - }, - { - "epoch": 0.3554035773335337, - "grad_norm": 2.0392085348283646, - "learning_rate": 2.9885948059934635e-06, - "loss": 1.0415, - "step": 4729 - }, - { - "epoch": 0.3554787313993687, - "grad_norm": 7.7429260455953335, - "learning_rate": 2.988171568268446e-06, - "loss": 1.0426, - "step": 4730 - }, - { - "epoch": 0.35555388546520367, - "grad_norm": 2.937978664925782, - "learning_rate": 2.98774827199048e-06, - "loss": 1.0221, - "step": 4731 - }, - { - "epoch": 0.35562903953103864, - "grad_norm": 1.423211528038995, - "learning_rate": 2.9873249171846454e-06, - "loss": 0.9506, - "step": 4732 - }, - { - "epoch": 0.3557041935968736, - "grad_norm": 2.2794529355581803, - "learning_rate": 2.9869015038760296e-06, - "loss": 0.9534, - "step": 4733 - }, - { - "epoch": 0.3557793476627086, - "grad_norm": 1.6295449447824506, - "learning_rate": 2.98647803208972e-06, - "loss": 1.1166, - "step": 4734 - }, - { - "epoch": 0.3558545017285435, - "grad_norm": 4.258890613936288, - "learning_rate": 2.98605450185081e-06, - "loss": 1.0102, - "step": 4735 - }, - { - "epoch": 0.35592965579437846, - "grad_norm": 1.4728304408336654, - "learning_rate": 2.9856309131843945e-06, - "loss": 1.0138, - "step": 4736 - }, - { - "epoch": 0.35600480986021343, - "grad_norm": 2.0319483803552854, - "learning_rate": 2.985207266115574e-06, - "loss": 0.978, - "step": 4737 - }, - { - "epoch": 0.3560799639260484, - "grad_norm": 1.7651555610379177, - "learning_rate": 2.9847835606694494e-06, - "loss": 1.0681, - "step": 4738 - }, - { - "epoch": 0.3561551179918834, - "grad_norm": 2.297305838731252, - "learning_rate": 2.9843597968711285e-06, - "loss": 0.9086, - "step": 4739 - }, - { - "epoch": 0.35623027205771834, - "grad_norm": 0.7098563220829259, - "learning_rate": 2.9839359747457195e-06, - "loss": 0.7821, - "step": 4740 - }, - { - "epoch": 0.35630542612355326, - "grad_norm": 2.1588245446566092, - "learning_rate": 2.9835120943183374e-06, - "loss": 1.0179, - "step": 4741 - }, - { - "epoch": 0.35638058018938823, - "grad_norm": 1.4571102826567297, - "learning_rate": 2.9830881556140965e-06, - "loss": 0.9463, - "step": 4742 - }, - { - "epoch": 0.3564557342552232, - "grad_norm": 1.6497655644186249, - "learning_rate": 2.9826641586581184e-06, - "loss": 0.9899, - "step": 4743 - }, - { - "epoch": 0.35653088832105817, - "grad_norm": 1.9790831163405638, - "learning_rate": 2.9822401034755255e-06, - "loss": 1.0827, - "step": 4744 - }, - { - "epoch": 0.35660604238689314, - "grad_norm": 1.8630254477924648, - "learning_rate": 2.981815990091446e-06, - "loss": 0.971, - "step": 4745 - }, - { - "epoch": 0.3566811964527281, - "grad_norm": 1.6461970575229643, - "learning_rate": 2.9813918185310085e-06, - "loss": 1.102, - "step": 4746 - }, - { - "epoch": 0.3567563505185631, - "grad_norm": 1.7156548808401872, - "learning_rate": 2.9809675888193486e-06, - "loss": 1.0277, - "step": 4747 - }, - { - "epoch": 0.356831504584398, - "grad_norm": 0.790352475085618, - "learning_rate": 2.9805433009816024e-06, - "loss": 0.872, - "step": 4748 - }, - { - "epoch": 0.35690665865023297, - "grad_norm": 1.4273524993477993, - "learning_rate": 2.980118955042911e-06, - "loss": 0.9859, - "step": 4749 - }, - { - "epoch": 0.35698181271606794, - "grad_norm": 1.6165238645085809, - "learning_rate": 2.9796945510284187e-06, - "loss": 0.9011, - "step": 4750 - }, - { - "epoch": 0.3570569667819029, - "grad_norm": 2.4750616778770693, - "learning_rate": 2.9792700889632716e-06, - "loss": 1.0302, - "step": 4751 - }, - { - "epoch": 0.3571321208477379, - "grad_norm": 1.5525873462474378, - "learning_rate": 2.9788455688726234e-06, - "loss": 0.9841, - "step": 4752 - }, - { - "epoch": 0.35720727491357285, - "grad_norm": 1.6046999833346676, - "learning_rate": 2.978420990781626e-06, - "loss": 1.0578, - "step": 4753 - }, - { - "epoch": 0.35728242897940776, - "grad_norm": 1.6505526310786989, - "learning_rate": 2.977996354715438e-06, - "loss": 0.887, - "step": 4754 - }, - { - "epoch": 0.35735758304524273, - "grad_norm": 1.5645242167689726, - "learning_rate": 2.9775716606992217e-06, - "loss": 1.0217, - "step": 4755 - }, - { - "epoch": 0.3574327371110777, - "grad_norm": 1.7129138001077964, - "learning_rate": 2.977146908758141e-06, - "loss": 1.0834, - "step": 4756 - }, - { - "epoch": 0.3575078911769127, - "grad_norm": 2.2957391330711463, - "learning_rate": 2.9767220989173635e-06, - "loss": 0.9903, - "step": 4757 - }, - { - "epoch": 0.35758304524274764, - "grad_norm": 1.5107771432355028, - "learning_rate": 2.9762972312020623e-06, - "loss": 1.0093, - "step": 4758 - }, - { - "epoch": 0.3576581993085826, - "grad_norm": 1.5882419324249961, - "learning_rate": 2.975872305637412e-06, - "loss": 0.9198, - "step": 4759 - }, - { - "epoch": 0.35773335337441753, - "grad_norm": 1.5616834414194234, - "learning_rate": 2.97544732224859e-06, - "loss": 0.9341, - "step": 4760 - }, - { - "epoch": 0.3578085074402525, - "grad_norm": 1.812999459213454, - "learning_rate": 2.975022281060779e-06, - "loss": 0.9262, - "step": 4761 - }, - { - "epoch": 0.35788366150608747, - "grad_norm": 1.7053506290422664, - "learning_rate": 2.9745971820991643e-06, - "loss": 0.9785, - "step": 4762 - }, - { - "epoch": 0.35795881557192244, - "grad_norm": 1.3904228492136765, - "learning_rate": 2.9741720253889346e-06, - "loss": 0.9548, - "step": 4763 - }, - { - "epoch": 0.3580339696377574, - "grad_norm": 3.4872870486199807, - "learning_rate": 2.9737468109552827e-06, - "loss": 1.0027, - "step": 4764 - }, - { - "epoch": 0.3581091237035924, - "grad_norm": 6.546046675765369, - "learning_rate": 2.973321538823402e-06, - "loss": 1.0233, - "step": 4765 - }, - { - "epoch": 0.35818427776942735, - "grad_norm": 1.5262220707867808, - "learning_rate": 2.9728962090184938e-06, - "loss": 1.1792, - "step": 4766 - }, - { - "epoch": 0.35825943183526227, - "grad_norm": 0.7714700045635502, - "learning_rate": 2.9724708215657603e-06, - "loss": 0.9074, - "step": 4767 - }, - { - "epoch": 0.35833458590109724, - "grad_norm": 1.7273760643761222, - "learning_rate": 2.972045376490406e-06, - "loss": 0.9611, - "step": 4768 - }, - { - "epoch": 0.3584097399669322, - "grad_norm": 1.8222689613238148, - "learning_rate": 2.971619873817642e-06, - "loss": 0.891, - "step": 4769 - }, - { - "epoch": 0.3584848940327672, - "grad_norm": 1.9884269975705315, - "learning_rate": 2.971194313572679e-06, - "loss": 0.9892, - "step": 4770 - }, - { - "epoch": 0.35856004809860215, - "grad_norm": 1.5731584561002996, - "learning_rate": 2.970768695780734e-06, - "loss": 0.9711, - "step": 4771 - }, - { - "epoch": 0.3586352021644371, - "grad_norm": 2.0974218709987382, - "learning_rate": 2.970343020467027e-06, - "loss": 0.8785, - "step": 4772 - }, - { - "epoch": 0.35871035623027203, - "grad_norm": 0.793801044715142, - "learning_rate": 2.9699172876567795e-06, - "loss": 0.8465, - "step": 4773 - }, - { - "epoch": 0.358785510296107, - "grad_norm": 1.525846952790431, - "learning_rate": 2.969491497375219e-06, - "loss": 0.9622, - "step": 4774 - }, - { - "epoch": 0.358860664361942, - "grad_norm": 1.7173554051152713, - "learning_rate": 2.969065649647575e-06, - "loss": 1.029, - "step": 4775 - }, - { - "epoch": 0.35893581842777694, - "grad_norm": 8.160205802566157, - "learning_rate": 2.9686397444990803e-06, - "loss": 1.024, - "step": 4776 - }, - { - "epoch": 0.3590109724936119, - "grad_norm": 1.8665672133836642, - "learning_rate": 2.9682137819549718e-06, - "loss": 0.9821, - "step": 4777 - }, - { - "epoch": 0.3590861265594469, - "grad_norm": 1.7310423578449259, - "learning_rate": 2.9677877620404887e-06, - "loss": 1.0533, - "step": 4778 - }, - { - "epoch": 0.35916128062528185, - "grad_norm": 1.813272114426241, - "learning_rate": 2.9673616847808755e-06, - "loss": 0.9704, - "step": 4779 - }, - { - "epoch": 0.35923643469111677, - "grad_norm": 2.6402584927616184, - "learning_rate": 2.966935550201378e-06, - "loss": 0.9608, - "step": 4780 - }, - { - "epoch": 0.35931158875695174, - "grad_norm": 1.6082476769336709, - "learning_rate": 2.9665093583272463e-06, - "loss": 0.93, - "step": 4781 - }, - { - "epoch": 0.3593867428227867, - "grad_norm": 0.7980209413591525, - "learning_rate": 2.966083109183734e-06, - "loss": 0.9384, - "step": 4782 - }, - { - "epoch": 0.3594618968886217, - "grad_norm": 1.6184635011919644, - "learning_rate": 2.9656568027960984e-06, - "loss": 0.9692, - "step": 4783 - }, - { - "epoch": 0.35953705095445665, - "grad_norm": 6.382120083216219, - "learning_rate": 2.9652304391895994e-06, - "loss": 0.9929, - "step": 4784 - }, - { - "epoch": 0.3596122050202916, - "grad_norm": 1.6713131111913946, - "learning_rate": 2.9648040183895004e-06, - "loss": 1.0593, - "step": 4785 - }, - { - "epoch": 0.35968735908612653, - "grad_norm": 2.4145788733040994, - "learning_rate": 2.964377540421069e-06, - "loss": 1.1243, - "step": 4786 - }, - { - "epoch": 0.3597625131519615, - "grad_norm": 1.9656244323438543, - "learning_rate": 2.963951005309576e-06, - "loss": 0.9554, - "step": 4787 - }, - { - "epoch": 0.3598376672177965, - "grad_norm": 2.3182012026787224, - "learning_rate": 2.963524413080294e-06, - "loss": 0.9738, - "step": 4788 - }, - { - "epoch": 0.35991282128363145, - "grad_norm": 2.796506292665148, - "learning_rate": 2.9630977637585016e-06, - "loss": 0.9448, - "step": 4789 - }, - { - "epoch": 0.3599879753494664, - "grad_norm": 1.6516289213135869, - "learning_rate": 2.9626710573694783e-06, - "loss": 0.9726, - "step": 4790 - }, - { - "epoch": 0.3600631294153014, - "grad_norm": 1.5750126126707753, - "learning_rate": 2.9622442939385085e-06, - "loss": 1.07, - "step": 4791 - }, - { - "epoch": 0.36013828348113636, - "grad_norm": 1.395902285604589, - "learning_rate": 2.96181747349088e-06, - "loss": 1.0901, - "step": 4792 - }, - { - "epoch": 0.36021343754697127, - "grad_norm": 2.1095415967921065, - "learning_rate": 2.9613905960518832e-06, - "loss": 0.9159, - "step": 4793 - }, - { - "epoch": 0.36028859161280624, - "grad_norm": 1.6609244689717315, - "learning_rate": 2.960963661646812e-06, - "loss": 1.0274, - "step": 4794 - }, - { - "epoch": 0.3603637456786412, - "grad_norm": 1.4880219481449226, - "learning_rate": 2.960536670300963e-06, - "loss": 1.0086, - "step": 4795 - }, - { - "epoch": 0.3604388997444762, - "grad_norm": 2.569484681739435, - "learning_rate": 2.9601096220396392e-06, - "loss": 0.9471, - "step": 4796 - }, - { - "epoch": 0.36051405381031115, - "grad_norm": 1.8893837755752165, - "learning_rate": 2.9596825168881444e-06, - "loss": 1.1086, - "step": 4797 - }, - { - "epoch": 0.3605892078761461, - "grad_norm": 2.695401397565907, - "learning_rate": 2.9592553548717848e-06, - "loss": 1.1295, - "step": 4798 - }, - { - "epoch": 0.36066436194198104, - "grad_norm": 1.5048915261245945, - "learning_rate": 2.958828136015872e-06, - "loss": 1.0387, - "step": 4799 - }, - { - "epoch": 0.360739516007816, - "grad_norm": 1.9344102156827652, - "learning_rate": 2.958400860345721e-06, - "loss": 1.0027, - "step": 4800 - }, - { - "epoch": 0.360814670073651, - "grad_norm": 1.7133877423227915, - "learning_rate": 2.9579735278866488e-06, - "loss": 0.9409, - "step": 4801 - }, - { - "epoch": 0.36088982413948595, - "grad_norm": 1.9308316935122192, - "learning_rate": 2.9575461386639768e-06, - "loss": 0.9741, - "step": 4802 - }, - { - "epoch": 0.3609649782053209, - "grad_norm": 1.8284276076045807, - "learning_rate": 2.95711869270303e-06, - "loss": 0.9297, - "step": 4803 - }, - { - "epoch": 0.3610401322711559, - "grad_norm": 1.5675373512210624, - "learning_rate": 2.9566911900291346e-06, - "loss": 0.98, - "step": 4804 - }, - { - "epoch": 0.3611152863369908, - "grad_norm": 1.6965905219767894, - "learning_rate": 2.9562636306676237e-06, - "loss": 1.0997, - "step": 4805 - }, - { - "epoch": 0.3611904404028258, - "grad_norm": 1.4376087976830767, - "learning_rate": 2.9558360146438303e-06, - "loss": 0.9625, - "step": 4806 - }, - { - "epoch": 0.36126559446866074, - "grad_norm": 1.3609397914442356, - "learning_rate": 2.9554083419830925e-06, - "loss": 0.8654, - "step": 4807 - }, - { - "epoch": 0.3613407485344957, - "grad_norm": 2.1139729437851527, - "learning_rate": 2.954980612710753e-06, - "loss": 0.9884, - "step": 4808 - }, - { - "epoch": 0.3614159026003307, - "grad_norm": 2.2467849861970413, - "learning_rate": 2.9545528268521548e-06, - "loss": 1.0392, - "step": 4809 - }, - { - "epoch": 0.36149105666616566, - "grad_norm": 1.757285823333679, - "learning_rate": 2.954124984432646e-06, - "loss": 0.9687, - "step": 4810 - }, - { - "epoch": 0.3615662107320006, - "grad_norm": 0.7936481891127644, - "learning_rate": 2.953697085477579e-06, - "loss": 0.9441, - "step": 4811 - }, - { - "epoch": 0.36164136479783554, - "grad_norm": 2.114166582002539, - "learning_rate": 2.953269130012307e-06, - "loss": 0.9503, - "step": 4812 - }, - { - "epoch": 0.3617165188636705, - "grad_norm": 2.1272977859737314, - "learning_rate": 2.9528411180621894e-06, - "loss": 0.8862, - "step": 4813 - }, - { - "epoch": 0.3617916729295055, - "grad_norm": 1.5082139187465842, - "learning_rate": 2.952413049652587e-06, - "loss": 0.9598, - "step": 4814 - }, - { - "epoch": 0.36186682699534045, - "grad_norm": 1.7482727245859697, - "learning_rate": 2.9519849248088633e-06, - "loss": 0.9535, - "step": 4815 - }, - { - "epoch": 0.3619419810611754, - "grad_norm": 4.582038950933959, - "learning_rate": 2.9515567435563886e-06, - "loss": 1.0072, - "step": 4816 - }, - { - "epoch": 0.3620171351270104, - "grad_norm": 1.916576804943182, - "learning_rate": 2.951128505920532e-06, - "loss": 0.9762, - "step": 4817 - }, - { - "epoch": 0.3620922891928453, - "grad_norm": 1.9693456081859275, - "learning_rate": 2.95070021192667e-06, - "loss": 1.0526, - "step": 4818 - }, - { - "epoch": 0.3621674432586803, - "grad_norm": 3.437090623927784, - "learning_rate": 2.9502718616001803e-06, - "loss": 0.9997, - "step": 4819 - }, - { - "epoch": 0.36224259732451525, - "grad_norm": 0.7626661619899432, - "learning_rate": 2.9498434549664434e-06, - "loss": 0.8087, - "step": 4820 - }, - { - "epoch": 0.3623177513903502, - "grad_norm": 2.0600652484374917, - "learning_rate": 2.9494149920508443e-06, - "loss": 1.0427, - "step": 4821 - }, - { - "epoch": 0.3623929054561852, - "grad_norm": 1.4801218235293314, - "learning_rate": 2.9489864728787722e-06, - "loss": 0.9796, - "step": 4822 - }, - { - "epoch": 0.36246805952202016, - "grad_norm": 1.5688941585370022, - "learning_rate": 2.9485578974756167e-06, - "loss": 0.9176, - "step": 4823 - }, - { - "epoch": 0.36254321358785513, - "grad_norm": 1.7480101171283677, - "learning_rate": 2.9481292658667743e-06, - "loss": 1.033, - "step": 4824 - }, - { - "epoch": 0.36261836765369004, - "grad_norm": 1.5405576772469243, - "learning_rate": 2.947700578077643e-06, - "loss": 1.0562, - "step": 4825 - }, - { - "epoch": 0.362693521719525, - "grad_norm": 1.901699074422508, - "learning_rate": 2.947271834133622e-06, - "loss": 0.913, - "step": 4826 - }, - { - "epoch": 0.36276867578536, - "grad_norm": 1.6188721658568308, - "learning_rate": 2.946843034060118e-06, - "loss": 1.0347, - "step": 4827 - }, - { - "epoch": 0.36284382985119495, - "grad_norm": 1.9480197559623853, - "learning_rate": 2.9464141778825384e-06, - "loss": 1.0676, - "step": 4828 - }, - { - "epoch": 0.3629189839170299, - "grad_norm": 1.9279862290559342, - "learning_rate": 2.9459852656262945e-06, - "loss": 0.9593, - "step": 4829 - }, - { - "epoch": 0.3629941379828649, - "grad_norm": 1.9307511213063007, - "learning_rate": 2.945556297316802e-06, - "loss": 0.9233, - "step": 4830 - }, - { - "epoch": 0.3630692920486998, - "grad_norm": 1.6818072051550021, - "learning_rate": 2.9451272729794774e-06, - "loss": 1.0408, - "step": 4831 - }, - { - "epoch": 0.3631444461145348, - "grad_norm": 3.554619891183976, - "learning_rate": 2.944698192639743e-06, - "loss": 0.9406, - "step": 4832 - }, - { - "epoch": 0.36321960018036975, - "grad_norm": 1.7664462179920308, - "learning_rate": 2.944269056323023e-06, - "loss": 1.0086, - "step": 4833 - }, - { - "epoch": 0.3632947542462047, - "grad_norm": 1.7592664342960733, - "learning_rate": 2.9438398640547453e-06, - "loss": 0.8769, - "step": 4834 - }, - { - "epoch": 0.3633699083120397, - "grad_norm": 3.246635309207619, - "learning_rate": 2.943410615860342e-06, - "loss": 0.9736, - "step": 4835 - }, - { - "epoch": 0.36344506237787466, - "grad_norm": 1.696354840045905, - "learning_rate": 2.9429813117652478e-06, - "loss": 1.0318, - "step": 4836 - }, - { - "epoch": 0.36352021644370963, - "grad_norm": 2.961780599171207, - "learning_rate": 2.942551951794899e-06, - "loss": 1.0764, - "step": 4837 - }, - { - "epoch": 0.36359537050954455, - "grad_norm": 2.3485272358027123, - "learning_rate": 2.942122535974738e-06, - "loss": 0.9846, - "step": 4838 - }, - { - "epoch": 0.3636705245753795, - "grad_norm": 1.3251436767413443, - "learning_rate": 2.9416930643302086e-06, - "loss": 1.0296, - "step": 4839 - }, - { - "epoch": 0.3637456786412145, - "grad_norm": 2.7641941830593115, - "learning_rate": 2.9412635368867596e-06, - "loss": 1.0357, - "step": 4840 - }, - { - "epoch": 0.36382083270704946, - "grad_norm": 1.3191261604742814, - "learning_rate": 2.9408339536698422e-06, - "loss": 0.9193, - "step": 4841 - }, - { - "epoch": 0.3638959867728844, - "grad_norm": 1.9297901079952031, - "learning_rate": 2.9404043147049097e-06, - "loss": 0.9939, - "step": 4842 - }, - { - "epoch": 0.3639711408387194, - "grad_norm": 1.964491583204407, - "learning_rate": 2.9399746200174206e-06, - "loss": 1.0028, - "step": 4843 - }, - { - "epoch": 0.3640462949045543, - "grad_norm": 1.2942273793387047, - "learning_rate": 2.939544869632836e-06, - "loss": 0.9678, - "step": 4844 - }, - { - "epoch": 0.3641214489703893, - "grad_norm": 1.6945609259392154, - "learning_rate": 2.9391150635766194e-06, - "loss": 0.981, - "step": 4845 - }, - { - "epoch": 0.36419660303622425, - "grad_norm": 1.4891496662833856, - "learning_rate": 2.9386852018742404e-06, - "loss": 0.8888, - "step": 4846 - }, - { - "epoch": 0.3642717571020592, - "grad_norm": 1.6313309221913725, - "learning_rate": 2.938255284551168e-06, - "loss": 1.0028, - "step": 4847 - }, - { - "epoch": 0.3643469111678942, - "grad_norm": 2.356572893517134, - "learning_rate": 2.9378253116328777e-06, - "loss": 1.0451, - "step": 4848 - }, - { - "epoch": 0.36442206523372916, - "grad_norm": 1.7102116826302256, - "learning_rate": 2.937395283144846e-06, - "loss": 1.005, - "step": 4849 - }, - { - "epoch": 0.3644972192995641, - "grad_norm": 1.9314263420704787, - "learning_rate": 2.9369651991125542e-06, - "loss": 0.9737, - "step": 4850 - }, - { - "epoch": 0.36457237336539905, - "grad_norm": 1.6640066934460118, - "learning_rate": 2.9365350595614863e-06, - "loss": 1.0785, - "step": 4851 - }, - { - "epoch": 0.364647527431234, - "grad_norm": 2.3121669632574124, - "learning_rate": 2.936104864517131e-06, - "loss": 1.0039, - "step": 4852 - }, - { - "epoch": 0.364722681497069, - "grad_norm": 2.1108157288662714, - "learning_rate": 2.935674614004977e-06, - "loss": 0.9331, - "step": 4853 - }, - { - "epoch": 0.36479783556290396, - "grad_norm": 1.924875413900678, - "learning_rate": 2.9352443080505192e-06, - "loss": 0.9371, - "step": 4854 - }, - { - "epoch": 0.36487298962873893, - "grad_norm": 1.3661136006275763, - "learning_rate": 2.934813946679255e-06, - "loss": 1.0523, - "step": 4855 - }, - { - "epoch": 0.3649481436945739, - "grad_norm": 1.8574131465028878, - "learning_rate": 2.9343835299166846e-06, - "loss": 0.9595, - "step": 4856 - }, - { - "epoch": 0.3650232977604088, - "grad_norm": 2.7828362902438837, - "learning_rate": 2.9339530577883125e-06, - "loss": 1.0945, - "step": 4857 - }, - { - "epoch": 0.3650984518262438, - "grad_norm": 1.7675622760209686, - "learning_rate": 2.9335225303196454e-06, - "loss": 0.8635, - "step": 4858 - }, - { - "epoch": 0.36517360589207876, - "grad_norm": 1.8809557104742325, - "learning_rate": 2.933091947536193e-06, - "loss": 1.1727, - "step": 4859 - }, - { - "epoch": 0.3652487599579137, - "grad_norm": 1.8327557202948344, - "learning_rate": 2.93266130946347e-06, - "loss": 0.9569, - "step": 4860 - }, - { - "epoch": 0.3653239140237487, - "grad_norm": 3.3678477761760037, - "learning_rate": 2.9322306161269933e-06, - "loss": 1.0089, - "step": 4861 - }, - { - "epoch": 0.36539906808958367, - "grad_norm": 1.5243001989426432, - "learning_rate": 2.931799867552282e-06, - "loss": 1.0243, - "step": 4862 - }, - { - "epoch": 0.3654742221554186, - "grad_norm": 1.3324162438641864, - "learning_rate": 2.931369063764862e-06, - "loss": 1.0087, - "step": 4863 - }, - { - "epoch": 0.36554937622125355, - "grad_norm": 1.5702486547140315, - "learning_rate": 2.9309382047902574e-06, - "loss": 0.8879, - "step": 4864 - }, - { - "epoch": 0.3656245302870885, - "grad_norm": 1.3790781924325015, - "learning_rate": 2.9305072906539993e-06, - "loss": 0.9774, - "step": 4865 - }, - { - "epoch": 0.3656996843529235, - "grad_norm": 0.6248129564783296, - "learning_rate": 2.930076321381622e-06, - "loss": 0.8445, - "step": 4866 - }, - { - "epoch": 0.36577483841875846, - "grad_norm": 1.5640203327359743, - "learning_rate": 2.92964529699866e-06, - "loss": 1.012, - "step": 4867 - }, - { - "epoch": 0.36584999248459343, - "grad_norm": 1.8997793596551427, - "learning_rate": 2.9292142175306548e-06, - "loss": 1.0056, - "step": 4868 - }, - { - "epoch": 0.3659251465504284, - "grad_norm": 1.471019533526859, - "learning_rate": 2.9287830830031492e-06, - "loss": 0.9366, - "step": 4869 - }, - { - "epoch": 0.3660003006162633, - "grad_norm": 1.8580411085379263, - "learning_rate": 2.9283518934416892e-06, - "loss": 1.0464, - "step": 4870 - }, - { - "epoch": 0.3660754546820983, - "grad_norm": 1.6025045462081147, - "learning_rate": 2.927920648871825e-06, - "loss": 0.8823, - "step": 4871 - }, - { - "epoch": 0.36615060874793326, - "grad_norm": 1.2831502639357002, - "learning_rate": 2.9274893493191084e-06, - "loss": 1.0451, - "step": 4872 - }, - { - "epoch": 0.36622576281376823, - "grad_norm": 0.670634832887831, - "learning_rate": 2.9270579948090962e-06, - "loss": 0.8317, - "step": 4873 - }, - { - "epoch": 0.3663009168796032, - "grad_norm": 1.5739937519739562, - "learning_rate": 2.9266265853673483e-06, - "loss": 0.9516, - "step": 4874 - }, - { - "epoch": 0.36637607094543817, - "grad_norm": 1.4792758292450559, - "learning_rate": 2.926195121019427e-06, - "loss": 0.8408, - "step": 4875 - }, - { - "epoch": 0.3664512250112731, - "grad_norm": 1.675267209616888, - "learning_rate": 2.9257636017908984e-06, - "loss": 0.9933, - "step": 4876 - }, - { - "epoch": 0.36652637907710806, - "grad_norm": 2.1587591637360015, - "learning_rate": 2.925332027707331e-06, - "loss": 1.0198, - "step": 4877 - }, - { - "epoch": 0.366601533142943, - "grad_norm": 1.4738184544629973, - "learning_rate": 2.9249003987942976e-06, - "loss": 1.0176, - "step": 4878 - }, - { - "epoch": 0.366676687208778, - "grad_norm": 3.698805337783924, - "learning_rate": 2.924468715077374e-06, - "loss": 1.0559, - "step": 4879 - }, - { - "epoch": 0.36675184127461297, - "grad_norm": 2.312961841550515, - "learning_rate": 2.9240369765821392e-06, - "loss": 1.039, - "step": 4880 - }, - { - "epoch": 0.36682699534044794, - "grad_norm": 1.5485825794976777, - "learning_rate": 2.9236051833341745e-06, - "loss": 1.0267, - "step": 4881 - }, - { - "epoch": 0.3669021494062829, - "grad_norm": 1.5954080820172292, - "learning_rate": 2.9231733353590668e-06, - "loss": 1.0147, - "step": 4882 - }, - { - "epoch": 0.3669773034721178, - "grad_norm": 1.4552484399852377, - "learning_rate": 2.9227414326824027e-06, - "loss": 1.089, - "step": 4883 - }, - { - "epoch": 0.3670524575379528, - "grad_norm": 3.206106296464376, - "learning_rate": 2.9223094753297767e-06, - "loss": 0.9511, - "step": 4884 - }, - { - "epoch": 0.36712761160378776, - "grad_norm": 1.6034226942373193, - "learning_rate": 2.9218774633267815e-06, - "loss": 0.9404, - "step": 4885 - }, - { - "epoch": 0.36720276566962273, - "grad_norm": 2.0962529949787836, - "learning_rate": 2.9214453966990174e-06, - "loss": 1.0858, - "step": 4886 - }, - { - "epoch": 0.3672779197354577, - "grad_norm": 2.950486068767685, - "learning_rate": 2.9210132754720845e-06, - "loss": 0.9908, - "step": 4887 - }, - { - "epoch": 0.3673530738012927, - "grad_norm": 1.5793214549327392, - "learning_rate": 2.9205810996715885e-06, - "loss": 1.0023, - "step": 4888 - }, - { - "epoch": 0.3674282278671276, - "grad_norm": 1.6734997706576948, - "learning_rate": 2.9201488693231366e-06, - "loss": 1.0339, - "step": 4889 - }, - { - "epoch": 0.36750338193296256, - "grad_norm": 1.8544556753251358, - "learning_rate": 2.9197165844523416e-06, - "loss": 0.9092, - "step": 4890 - }, - { - "epoch": 0.36757853599879753, - "grad_norm": 1.918339400205898, - "learning_rate": 2.9192842450848164e-06, - "loss": 0.9425, - "step": 4891 - }, - { - "epoch": 0.3676536900646325, - "grad_norm": 1.9368395141029375, - "learning_rate": 2.91885185124618e-06, - "loss": 1.004, - "step": 4892 - }, - { - "epoch": 0.36772884413046747, - "grad_norm": 2.1951756553490007, - "learning_rate": 2.918419402962053e-06, - "loss": 0.9463, - "step": 4893 - }, - { - "epoch": 0.36780399819630244, - "grad_norm": 9.635042574382132, - "learning_rate": 2.917986900258059e-06, - "loss": 0.9427, - "step": 4894 - }, - { - "epoch": 0.36787915226213735, - "grad_norm": 1.90093265029261, - "learning_rate": 2.9175543431598257e-06, - "loss": 0.9164, - "step": 4895 - }, - { - "epoch": 0.3679543063279723, - "grad_norm": 1.5830634623305508, - "learning_rate": 2.917121731692985e-06, - "loss": 0.9134, - "step": 4896 - }, - { - "epoch": 0.3680294603938073, - "grad_norm": 1.857512174029907, - "learning_rate": 2.9166890658831695e-06, - "loss": 0.9252, - "step": 4897 - }, - { - "epoch": 0.36810461445964227, - "grad_norm": 1.483658065325875, - "learning_rate": 2.9162563457560157e-06, - "loss": 1.0052, - "step": 4898 - }, - { - "epoch": 0.36817976852547724, - "grad_norm": 1.4497829274771445, - "learning_rate": 2.915823571337166e-06, - "loss": 1.0371, - "step": 4899 - }, - { - "epoch": 0.3682549225913122, - "grad_norm": 4.893736608104481, - "learning_rate": 2.915390742652262e-06, - "loss": 0.9573, - "step": 4900 - }, - { - "epoch": 0.3683300766571472, - "grad_norm": 1.9348157315896801, - "learning_rate": 2.914957859726952e-06, - "loss": 1.0428, - "step": 4901 - }, - { - "epoch": 0.3684052307229821, - "grad_norm": 1.628207049800941, - "learning_rate": 2.9145249225868848e-06, - "loss": 1.0288, - "step": 4902 - }, - { - "epoch": 0.36848038478881706, - "grad_norm": 1.8245800893887871, - "learning_rate": 2.9140919312577134e-06, - "loss": 1.1233, - "step": 4903 - }, - { - "epoch": 0.36855553885465203, - "grad_norm": 1.6760519027002247, - "learning_rate": 2.9136588857650956e-06, - "loss": 0.9632, - "step": 4904 - }, - { - "epoch": 0.368630692920487, - "grad_norm": 2.426766163372201, - "learning_rate": 2.9132257861346897e-06, - "loss": 1.1233, - "step": 4905 - }, - { - "epoch": 0.368705846986322, - "grad_norm": 1.83300748732599, - "learning_rate": 2.912792632392159e-06, - "loss": 1.0431, - "step": 4906 - }, - { - "epoch": 0.36878100105215694, - "grad_norm": 1.767727300766577, - "learning_rate": 2.9123594245631702e-06, - "loss": 0.9449, - "step": 4907 - }, - { - "epoch": 0.36885615511799186, - "grad_norm": 1.5068985140471713, - "learning_rate": 2.9119261626733915e-06, - "loss": 0.9934, - "step": 4908 - }, - { - "epoch": 0.3689313091838268, - "grad_norm": 1.7104782115102757, - "learning_rate": 2.911492846748495e-06, - "loss": 0.9558, - "step": 4909 - }, - { - "epoch": 0.3690064632496618, - "grad_norm": 1.8780636871028444, - "learning_rate": 2.911059476814158e-06, - "loss": 1.009, - "step": 4910 - }, - { - "epoch": 0.36908161731549677, - "grad_norm": 1.446192870202767, - "learning_rate": 2.9106260528960573e-06, - "loss": 1.0079, - "step": 4911 - }, - { - "epoch": 0.36915677138133174, - "grad_norm": 2.5151527861946743, - "learning_rate": 2.910192575019877e-06, - "loss": 1.008, - "step": 4912 - }, - { - "epoch": 0.3692319254471667, - "grad_norm": 1.5927109416626786, - "learning_rate": 2.9097590432113007e-06, - "loss": 0.9255, - "step": 4913 - }, - { - "epoch": 0.3693070795130017, - "grad_norm": 1.8229621300250571, - "learning_rate": 2.909325457496017e-06, - "loss": 1.0383, - "step": 4914 - }, - { - "epoch": 0.3693822335788366, - "grad_norm": 8.015705903892389, - "learning_rate": 2.908891817899718e-06, - "loss": 0.9489, - "step": 4915 - }, - { - "epoch": 0.36945738764467156, - "grad_norm": 1.9271759492080458, - "learning_rate": 2.9084581244480994e-06, - "loss": 1.0667, - "step": 4916 - }, - { - "epoch": 0.36953254171050653, - "grad_norm": 1.3904257213666174, - "learning_rate": 2.908024377166857e-06, - "loss": 0.9929, - "step": 4917 - }, - { - "epoch": 0.3696076957763415, - "grad_norm": 1.5799045160872864, - "learning_rate": 2.9075905760816942e-06, - "loss": 1.0335, - "step": 4918 - }, - { - "epoch": 0.3696828498421765, - "grad_norm": 1.6566979144479925, - "learning_rate": 2.9071567212183138e-06, - "loss": 0.8692, - "step": 4919 - }, - { - "epoch": 0.36975800390801145, - "grad_norm": 2.2009602185146115, - "learning_rate": 2.906722812602424e-06, - "loss": 0.9581, - "step": 4920 - }, - { - "epoch": 0.36983315797384636, - "grad_norm": 1.6341160585515835, - "learning_rate": 2.906288850259736e-06, - "loss": 0.9211, - "step": 4921 - }, - { - "epoch": 0.36990831203968133, - "grad_norm": 1.5375919965807636, - "learning_rate": 2.9058548342159628e-06, - "loss": 0.8493, - "step": 4922 - }, - { - "epoch": 0.3699834661055163, - "grad_norm": 4.3041568712909, - "learning_rate": 2.9054207644968218e-06, - "loss": 1.0019, - "step": 4923 - }, - { - "epoch": 0.37005862017135127, - "grad_norm": 1.7933248860161422, - "learning_rate": 2.904986641128033e-06, - "loss": 1.064, - "step": 4924 - }, - { - "epoch": 0.37013377423718624, - "grad_norm": 1.6860714976716202, - "learning_rate": 2.9045524641353208e-06, - "loss": 1.0081, - "step": 4925 - }, - { - "epoch": 0.3702089283030212, - "grad_norm": 1.4030665073224546, - "learning_rate": 2.904118233544411e-06, - "loss": 0.9975, - "step": 4926 - }, - { - "epoch": 0.3702840823688562, - "grad_norm": 2.0545545179607547, - "learning_rate": 2.9036839493810348e-06, - "loss": 1.1017, - "step": 4927 - }, - { - "epoch": 0.3703592364346911, - "grad_norm": 1.7415098232240858, - "learning_rate": 2.903249611670923e-06, - "loss": 0.9888, - "step": 4928 - }, - { - "epoch": 0.37043439050052607, - "grad_norm": 1.8697772083059954, - "learning_rate": 2.9028152204398135e-06, - "loss": 0.9366, - "step": 4929 - }, - { - "epoch": 0.37050954456636104, - "grad_norm": 1.5877377669929753, - "learning_rate": 2.9023807757134455e-06, - "loss": 1.0776, - "step": 4930 - }, - { - "epoch": 0.370584698632196, - "grad_norm": 1.6872271564456014, - "learning_rate": 2.90194627751756e-06, - "loss": 1.0374, - "step": 4931 - }, - { - "epoch": 0.370659852698031, - "grad_norm": 1.5579792235755712, - "learning_rate": 2.9015117258779045e-06, - "loss": 0.9665, - "step": 4932 - }, - { - "epoch": 0.37073500676386595, - "grad_norm": 1.5391352292609075, - "learning_rate": 2.9010771208202265e-06, - "loss": 1.0315, - "step": 4933 - }, - { - "epoch": 0.37081016082970086, - "grad_norm": 0.8130968176072324, - "learning_rate": 2.900642462370279e-06, - "loss": 0.8895, - "step": 4934 - }, - { - "epoch": 0.37088531489553583, - "grad_norm": 1.6334730938393565, - "learning_rate": 2.900207750553817e-06, - "loss": 0.9246, - "step": 4935 - }, - { - "epoch": 0.3709604689613708, - "grad_norm": 1.4461614641341678, - "learning_rate": 2.899772985396599e-06, - "loss": 1.0031, - "step": 4936 - }, - { - "epoch": 0.3710356230272058, - "grad_norm": 1.170413364601751, - "learning_rate": 2.8993381669243854e-06, - "loss": 1.0196, - "step": 4937 - }, - { - "epoch": 0.37111077709304074, - "grad_norm": 1.3172354873839502, - "learning_rate": 2.8989032951629417e-06, - "loss": 1.042, - "step": 4938 - }, - { - "epoch": 0.3711859311588757, - "grad_norm": 0.8457620084847659, - "learning_rate": 2.898468370138036e-06, - "loss": 0.9064, - "step": 4939 - }, - { - "epoch": 0.37126108522471063, - "grad_norm": 1.6763055154708881, - "learning_rate": 2.8980333918754383e-06, - "loss": 0.9748, - "step": 4940 - }, - { - "epoch": 0.3713362392905456, - "grad_norm": 1.6351828276346532, - "learning_rate": 2.8975983604009244e-06, - "loss": 1.0191, - "step": 4941 - }, - { - "epoch": 0.37141139335638057, - "grad_norm": 2.549677798541081, - "learning_rate": 2.8971632757402694e-06, - "loss": 0.9574, - "step": 4942 - }, - { - "epoch": 0.37148654742221554, - "grad_norm": 2.3292829462914715, - "learning_rate": 2.8967281379192557e-06, - "loss": 1.0475, - "step": 4943 - }, - { - "epoch": 0.3715617014880505, - "grad_norm": 1.5664072393476067, - "learning_rate": 2.8962929469636653e-06, - "loss": 0.9048, - "step": 4944 - }, - { - "epoch": 0.3716368555538855, - "grad_norm": 1.9843687522970053, - "learning_rate": 2.8958577028992866e-06, - "loss": 0.9561, - "step": 4945 - }, - { - "epoch": 0.37171200961972045, - "grad_norm": 2.0303747785779835, - "learning_rate": 2.895422405751908e-06, - "loss": 1.0398, - "step": 4946 - }, - { - "epoch": 0.37178716368555537, - "grad_norm": 1.614378179826902, - "learning_rate": 2.8949870555473226e-06, - "loss": 0.9908, - "step": 4947 - }, - { - "epoch": 0.37186231775139034, - "grad_norm": 1.467646407406379, - "learning_rate": 2.8945516523113275e-06, - "loss": 0.9631, - "step": 4948 - }, - { - "epoch": 0.3719374718172253, - "grad_norm": 2.1480887944997002, - "learning_rate": 2.8941161960697217e-06, - "loss": 0.983, - "step": 4949 - }, - { - "epoch": 0.3720126258830603, - "grad_norm": 1.7798824749413293, - "learning_rate": 2.893680686848307e-06, - "loss": 1.0139, - "step": 4950 - }, - { - "epoch": 0.37208777994889525, - "grad_norm": 1.7994536762851978, - "learning_rate": 2.89324512467289e-06, - "loss": 0.9344, - "step": 4951 - }, - { - "epoch": 0.3721629340147302, - "grad_norm": 2.2195123786486546, - "learning_rate": 2.8928095095692783e-06, - "loss": 1.04, - "step": 4952 - }, - { - "epoch": 0.37223808808056513, - "grad_norm": 1.5172539049224962, - "learning_rate": 2.892373841563285e-06, - "loss": 0.975, - "step": 4953 - }, - { - "epoch": 0.3723132421464001, - "grad_norm": 1.9132761059929828, - "learning_rate": 2.891938120680724e-06, - "loss": 0.9856, - "step": 4954 - }, - { - "epoch": 0.3723883962122351, - "grad_norm": 0.681295728461121, - "learning_rate": 2.891502346947414e-06, - "loss": 0.8272, - "step": 4955 - }, - { - "epoch": 0.37246355027807004, - "grad_norm": 2.266981994442978, - "learning_rate": 2.8910665203891763e-06, - "loss": 1.0203, - "step": 4956 - }, - { - "epoch": 0.372538704343905, - "grad_norm": 1.9108403702861556, - "learning_rate": 2.8906306410318353e-06, - "loss": 1.0577, - "step": 4957 - }, - { - "epoch": 0.37261385840974, - "grad_norm": 1.4100295245008132, - "learning_rate": 2.890194708901218e-06, - "loss": 0.9862, - "step": 4958 - }, - { - "epoch": 0.37268901247557495, - "grad_norm": 1.4202259268148225, - "learning_rate": 2.889758724023155e-06, - "loss": 1.0298, - "step": 4959 - }, - { - "epoch": 0.37276416654140987, - "grad_norm": 2.4392859933865356, - "learning_rate": 2.8893226864234813e-06, - "loss": 0.8467, - "step": 4960 - }, - { - "epoch": 0.37283932060724484, - "grad_norm": 1.6480950456312293, - "learning_rate": 2.8888865961280325e-06, - "loss": 1.0097, - "step": 4961 - }, - { - "epoch": 0.3729144746730798, - "grad_norm": 1.480616579752433, - "learning_rate": 2.888450453162649e-06, - "loss": 0.9895, - "step": 4962 - }, - { - "epoch": 0.3729896287389148, - "grad_norm": 0.697660211635162, - "learning_rate": 2.888014257553175e-06, - "loss": 0.8167, - "step": 4963 - }, - { - "epoch": 0.37306478280474975, - "grad_norm": 0.5935341543077266, - "learning_rate": 2.8875780093254545e-06, - "loss": 0.7951, - "step": 4964 - }, - { - "epoch": 0.3731399368705847, - "grad_norm": 1.378334726650421, - "learning_rate": 2.8871417085053394e-06, - "loss": 0.9867, - "step": 4965 - }, - { - "epoch": 0.37321509093641964, - "grad_norm": 2.1597932093144316, - "learning_rate": 2.88670535511868e-06, - "loss": 1.0833, - "step": 4966 - }, - { - "epoch": 0.3732902450022546, - "grad_norm": 3.0388476925912378, - "learning_rate": 2.886268949191334e-06, - "loss": 0.9359, - "step": 4967 - }, - { - "epoch": 0.3733653990680896, - "grad_norm": 2.611156472247229, - "learning_rate": 2.885832490749158e-06, - "loss": 1.0281, - "step": 4968 - }, - { - "epoch": 0.37344055313392455, - "grad_norm": 3.866621002936114, - "learning_rate": 2.885395979818015e-06, - "loss": 0.9569, - "step": 4969 - }, - { - "epoch": 0.3735157071997595, - "grad_norm": 1.589661908822043, - "learning_rate": 2.8849594164237694e-06, - "loss": 0.9084, - "step": 4970 - }, - { - "epoch": 0.3735908612655945, - "grad_norm": 1.323329120328204, - "learning_rate": 2.8845228005922905e-06, - "loss": 0.9527, - "step": 4971 - }, - { - "epoch": 0.37366601533142946, - "grad_norm": 1.8873739743633227, - "learning_rate": 2.8840861323494482e-06, - "loss": 1.0074, - "step": 4972 - }, - { - "epoch": 0.3737411693972644, - "grad_norm": 1.739265626079094, - "learning_rate": 2.8836494117211177e-06, - "loss": 0.9785, - "step": 4973 - }, - { - "epoch": 0.37381632346309934, - "grad_norm": 2.185559092905974, - "learning_rate": 2.883212638733175e-06, - "loss": 1.0118, - "step": 4974 - }, - { - "epoch": 0.3738914775289343, - "grad_norm": 1.443137434438711, - "learning_rate": 2.8827758134115017e-06, - "loss": 0.981, - "step": 4975 - }, - { - "epoch": 0.3739666315947693, - "grad_norm": 2.1598291960046994, - "learning_rate": 2.8823389357819815e-06, - "loss": 0.9934, - "step": 4976 - }, - { - "epoch": 0.37404178566060425, - "grad_norm": 1.3430282961263946, - "learning_rate": 2.8819020058705003e-06, - "loss": 0.9471, - "step": 4977 - }, - { - "epoch": 0.3741169397264392, - "grad_norm": 1.6225423568110622, - "learning_rate": 2.881465023702948e-06, - "loss": 0.98, - "step": 4978 - }, - { - "epoch": 0.37419209379227414, - "grad_norm": 3.1360656134300315, - "learning_rate": 2.8810279893052184e-06, - "loss": 0.9431, - "step": 4979 - }, - { - "epoch": 0.3742672478581091, - "grad_norm": 1.735102389279892, - "learning_rate": 2.880590902703206e-06, - "loss": 1.0551, - "step": 4980 - }, - { - "epoch": 0.3743424019239441, - "grad_norm": 1.9950114860299626, - "learning_rate": 2.8801537639228107e-06, - "loss": 1.0269, - "step": 4981 - }, - { - "epoch": 0.37441755598977905, - "grad_norm": 1.8364307146886008, - "learning_rate": 2.8797165729899347e-06, - "loss": 1.0265, - "step": 4982 - }, - { - "epoch": 0.374492710055614, - "grad_norm": 2.033103214854321, - "learning_rate": 2.879279329930483e-06, - "loss": 0.9091, - "step": 4983 - }, - { - "epoch": 0.374567864121449, - "grad_norm": 1.9318534648957493, - "learning_rate": 2.8788420347703643e-06, - "loss": 1.0059, - "step": 4984 - }, - { - "epoch": 0.3746430181872839, - "grad_norm": 1.5551717186220484, - "learning_rate": 2.87840468753549e-06, - "loss": 1.0403, - "step": 4985 - }, - { - "epoch": 0.3747181722531189, - "grad_norm": 2.428503796893948, - "learning_rate": 2.8779672882517735e-06, - "loss": 0.9885, - "step": 4986 - }, - { - "epoch": 0.37479332631895385, - "grad_norm": 1.398803748222262, - "learning_rate": 2.877529836945134e-06, - "loss": 0.9683, - "step": 4987 - }, - { - "epoch": 0.3748684803847888, - "grad_norm": 0.7827042967430191, - "learning_rate": 2.8770923336414906e-06, - "loss": 0.9065, - "step": 4988 - }, - { - "epoch": 0.3749436344506238, - "grad_norm": 1.6567156874748805, - "learning_rate": 2.8766547783667686e-06, - "loss": 0.9549, - "step": 4989 - }, - { - "epoch": 0.37501878851645876, - "grad_norm": 1.7733360631223811, - "learning_rate": 2.8762171711468935e-06, - "loss": 1.0041, - "step": 4990 - }, - { - "epoch": 0.3750939425822937, - "grad_norm": 2.0841635257901157, - "learning_rate": 2.8757795120077955e-06, - "loss": 1.0508, - "step": 4991 - }, - { - "epoch": 0.37516909664812864, - "grad_norm": 1.9151617513725279, - "learning_rate": 2.8753418009754082e-06, - "loss": 0.9905, - "step": 4992 - }, - { - "epoch": 0.3752442507139636, - "grad_norm": 1.760524641107767, - "learning_rate": 2.874904038075668e-06, - "loss": 1.0308, - "step": 4993 - }, - { - "epoch": 0.3753194047797986, - "grad_norm": 1.8150529968258837, - "learning_rate": 2.874466223334512e-06, - "loss": 0.9682, - "step": 4994 - }, - { - "epoch": 0.37539455884563355, - "grad_norm": 2.109206920567717, - "learning_rate": 2.8740283567778844e-06, - "loss": 1.0177, - "step": 4995 - }, - { - "epoch": 0.3754697129114685, - "grad_norm": 1.769950833796178, - "learning_rate": 2.87359043843173e-06, - "loss": 0.9047, - "step": 4996 - }, - { - "epoch": 0.3755448669773035, - "grad_norm": 2.103751705030724, - "learning_rate": 2.873152468321997e-06, - "loss": 0.8805, - "step": 4997 - }, - { - "epoch": 0.3756200210431384, - "grad_norm": 1.9889082304169756, - "learning_rate": 2.872714446474636e-06, - "loss": 0.9754, - "step": 4998 - }, - { - "epoch": 0.3756951751089734, - "grad_norm": 1.9663652175544561, - "learning_rate": 2.8722763729156027e-06, - "loss": 1.0141, - "step": 4999 - }, - { - "epoch": 0.37577032917480835, - "grad_norm": 2.1547839944656113, - "learning_rate": 2.8718382476708544e-06, - "loss": 1.0468, - "step": 5000 - }, - { - "epoch": 0.3758454832406433, - "grad_norm": 2.027408194574235, - "learning_rate": 2.8714000707663507e-06, - "loss": 0.957, - "step": 5001 - }, - { - "epoch": 0.3759206373064783, - "grad_norm": 1.9279975440931705, - "learning_rate": 2.8709618422280564e-06, - "loss": 0.9133, - "step": 5002 - }, - { - "epoch": 0.37599579137231326, - "grad_norm": 1.9307768677330435, - "learning_rate": 2.8705235620819377e-06, - "loss": 1.0929, - "step": 5003 - }, - { - "epoch": 0.37607094543814823, - "grad_norm": 1.705334271639242, - "learning_rate": 2.8700852303539647e-06, - "loss": 0.9447, - "step": 5004 - }, - { - "epoch": 0.37614609950398314, - "grad_norm": 2.958197541321648, - "learning_rate": 2.8696468470701096e-06, - "loss": 0.9165, - "step": 5005 - }, - { - "epoch": 0.3762212535698181, - "grad_norm": 1.6530089425954528, - "learning_rate": 2.869208412256349e-06, - "loss": 1.0672, - "step": 5006 - }, - { - "epoch": 0.3762964076356531, - "grad_norm": 1.7624161855911107, - "learning_rate": 2.868769925938662e-06, - "loss": 0.9157, - "step": 5007 - }, - { - "epoch": 0.37637156170148806, - "grad_norm": 2.6640181999214905, - "learning_rate": 2.868331388143029e-06, - "loss": 0.9799, - "step": 5008 - }, - { - "epoch": 0.376446715767323, - "grad_norm": 2.1675458615916923, - "learning_rate": 2.867892798895437e-06, - "loss": 1.035, - "step": 5009 - }, - { - "epoch": 0.376521869833158, - "grad_norm": 1.3657021001005956, - "learning_rate": 2.867454158221873e-06, - "loss": 1.0495, - "step": 5010 - }, - { - "epoch": 0.3765970238989929, - "grad_norm": 1.5546513078780444, - "learning_rate": 2.867015466148329e-06, - "loss": 0.9722, - "step": 5011 - }, - { - "epoch": 0.3766721779648279, - "grad_norm": 1.9119298147304147, - "learning_rate": 2.8665767227007985e-06, - "loss": 0.9061, - "step": 5012 - }, - { - "epoch": 0.37674733203066285, - "grad_norm": 0.7037909427068697, - "learning_rate": 2.866137927905278e-06, - "loss": 0.8405, - "step": 5013 - }, - { - "epoch": 0.3768224860964978, - "grad_norm": 1.7696108094913077, - "learning_rate": 2.865699081787769e-06, - "loss": 1.0331, - "step": 5014 - }, - { - "epoch": 0.3768976401623328, - "grad_norm": 2.231389645606253, - "learning_rate": 2.865260184374275e-06, - "loss": 1.016, - "step": 5015 - }, - { - "epoch": 0.37697279422816776, - "grad_norm": 1.5923852500115496, - "learning_rate": 2.864821235690801e-06, - "loss": 0.9838, - "step": 5016 - }, - { - "epoch": 0.37704794829400273, - "grad_norm": 3.018512351591662, - "learning_rate": 2.8643822357633576e-06, - "loss": 0.832, - "step": 5017 - }, - { - "epoch": 0.37712310235983765, - "grad_norm": 1.8706142631449498, - "learning_rate": 2.863943184617957e-06, - "loss": 0.9699, - "step": 5018 - }, - { - "epoch": 0.3771982564256726, - "grad_norm": 2.2383277981430014, - "learning_rate": 2.8635040822806135e-06, - "loss": 0.9654, - "step": 5019 - }, - { - "epoch": 0.3772734104915076, - "grad_norm": 1.5247599021889053, - "learning_rate": 2.8630649287773475e-06, - "loss": 1.0562, - "step": 5020 - }, - { - "epoch": 0.37734856455734256, - "grad_norm": 1.7682262602282661, - "learning_rate": 2.862625724134179e-06, - "loss": 1.003, - "step": 5021 - }, - { - "epoch": 0.37742371862317753, - "grad_norm": 1.3922706467859158, - "learning_rate": 2.8621864683771337e-06, - "loss": 1.0147, - "step": 5022 - }, - { - "epoch": 0.3774988726890125, - "grad_norm": 1.952181718016871, - "learning_rate": 2.8617471615322377e-06, - "loss": 0.9134, - "step": 5023 - }, - { - "epoch": 0.3775740267548474, - "grad_norm": 1.6805123366516252, - "learning_rate": 2.8613078036255233e-06, - "loss": 0.9343, - "step": 5024 - }, - { - "epoch": 0.3776491808206824, - "grad_norm": 1.7242106303968876, - "learning_rate": 2.8608683946830236e-06, - "loss": 0.9223, - "step": 5025 - }, - { - "epoch": 0.37772433488651735, - "grad_norm": 1.5871984420672585, - "learning_rate": 2.8604289347307746e-06, - "loss": 1.0386, - "step": 5026 - }, - { - "epoch": 0.3777994889523523, - "grad_norm": 1.422167234467082, - "learning_rate": 2.859989423794816e-06, - "loss": 0.891, - "step": 5027 - }, - { - "epoch": 0.3778746430181873, - "grad_norm": 1.6132842795872355, - "learning_rate": 2.8595498619011916e-06, - "loss": 1.0156, - "step": 5028 - }, - { - "epoch": 0.37794979708402227, - "grad_norm": 1.74279425622599, - "learning_rate": 2.8591102490759468e-06, - "loss": 0.9774, - "step": 5029 - }, - { - "epoch": 0.3780249511498572, - "grad_norm": 1.6767032105626312, - "learning_rate": 2.858670585345129e-06, - "loss": 1.1123, - "step": 5030 - }, - { - "epoch": 0.37810010521569215, - "grad_norm": 1.5294928297290664, - "learning_rate": 2.8582308707347913e-06, - "loss": 0.9745, - "step": 5031 - }, - { - "epoch": 0.3781752592815271, - "grad_norm": 0.724559442902467, - "learning_rate": 2.857791105270988e-06, - "loss": 0.8671, - "step": 5032 - }, - { - "epoch": 0.3782504133473621, - "grad_norm": 1.7679944638730882, - "learning_rate": 2.8573512889797773e-06, - "loss": 1.0357, - "step": 5033 - }, - { - "epoch": 0.37832556741319706, - "grad_norm": 3.128086701881798, - "learning_rate": 2.8569114218872195e-06, - "loss": 1.0116, - "step": 5034 - }, - { - "epoch": 0.37840072147903203, - "grad_norm": 2.5121930328661257, - "learning_rate": 2.856471504019379e-06, - "loss": 0.8866, - "step": 5035 - }, - { - "epoch": 0.378475875544867, - "grad_norm": 1.5509193032261959, - "learning_rate": 2.856031535402321e-06, - "loss": 0.9091, - "step": 5036 - }, - { - "epoch": 0.3785510296107019, - "grad_norm": 1.8450670226000876, - "learning_rate": 2.8555915160621184e-06, - "loss": 1.047, - "step": 5037 - }, - { - "epoch": 0.3786261836765369, - "grad_norm": 1.9673558703202119, - "learning_rate": 2.8551514460248406e-06, - "loss": 1.1023, - "step": 5038 - }, - { - "epoch": 0.37870133774237186, - "grad_norm": 1.985875080847473, - "learning_rate": 2.8547113253165666e-06, - "loss": 1.113, - "step": 5039 - }, - { - "epoch": 0.3787764918082068, - "grad_norm": 2.022971198947538, - "learning_rate": 2.8542711539633723e-06, - "loss": 1.035, - "step": 5040 - }, - { - "epoch": 0.3788516458740418, - "grad_norm": 1.9297722554609333, - "learning_rate": 2.8538309319913413e-06, - "loss": 0.9163, - "step": 5041 - }, - { - "epoch": 0.37892679993987677, - "grad_norm": 2.1276311853219285, - "learning_rate": 2.8533906594265588e-06, - "loss": 0.9605, - "step": 5042 - }, - { - "epoch": 0.3790019540057117, - "grad_norm": 1.6849705140564764, - "learning_rate": 2.852950336295111e-06, - "loss": 0.8541, - "step": 5043 - }, - { - "epoch": 0.37907710807154665, - "grad_norm": 1.5979174561314164, - "learning_rate": 2.8525099626230894e-06, - "loss": 0.8938, - "step": 5044 - }, - { - "epoch": 0.3791522621373816, - "grad_norm": 2.1276317456129914, - "learning_rate": 2.8520695384365887e-06, - "loss": 0.8779, - "step": 5045 - }, - { - "epoch": 0.3792274162032166, - "grad_norm": 1.8978816973091872, - "learning_rate": 2.851629063761705e-06, - "loss": 1.0405, - "step": 5046 - }, - { - "epoch": 0.37930257026905156, - "grad_norm": 1.7018189628416995, - "learning_rate": 2.8511885386245373e-06, - "loss": 1.0089, - "step": 5047 - }, - { - "epoch": 0.37937772433488653, - "grad_norm": 1.8448036545274769, - "learning_rate": 2.8507479630511905e-06, - "loss": 0.9465, - "step": 5048 - }, - { - "epoch": 0.3794528784007215, - "grad_norm": 0.9617430559150082, - "learning_rate": 2.850307337067768e-06, - "loss": 0.9979, - "step": 5049 - }, - { - "epoch": 0.3795280324665564, - "grad_norm": 2.048946351748564, - "learning_rate": 2.849866660700381e-06, - "loss": 0.8539, - "step": 5050 - }, - { - "epoch": 0.3796031865323914, - "grad_norm": 1.380751199545297, - "learning_rate": 2.8494259339751396e-06, - "loss": 1.0379, - "step": 5051 - }, - { - "epoch": 0.37967834059822636, - "grad_norm": 1.7552754043155498, - "learning_rate": 2.8489851569181584e-06, - "loss": 0.9131, - "step": 5052 - }, - { - "epoch": 0.37975349466406133, - "grad_norm": 1.3749092678998749, - "learning_rate": 2.848544329555556e-06, - "loss": 1.023, - "step": 5053 - }, - { - "epoch": 0.3798286487298963, - "grad_norm": 1.3965476281839637, - "learning_rate": 2.8481034519134524e-06, - "loss": 0.9804, - "step": 5054 - }, - { - "epoch": 0.37990380279573127, - "grad_norm": 3.368790601156581, - "learning_rate": 2.8476625240179726e-06, - "loss": 1.0164, - "step": 5055 - }, - { - "epoch": 0.3799789568615662, - "grad_norm": 2.490453134539603, - "learning_rate": 2.847221545895241e-06, - "loss": 0.9402, - "step": 5056 - }, - { - "epoch": 0.38005411092740116, - "grad_norm": 2.142150001559393, - "learning_rate": 2.8467805175713897e-06, - "loss": 1.0011, - "step": 5057 - }, - { - "epoch": 0.3801292649932361, - "grad_norm": 1.426717623145252, - "learning_rate": 2.84633943907255e-06, - "loss": 0.9882, - "step": 5058 - }, - { - "epoch": 0.3802044190590711, - "grad_norm": 3.873272295310595, - "learning_rate": 2.8458983104248575e-06, - "loss": 1.0172, - "step": 5059 - }, - { - "epoch": 0.38027957312490607, - "grad_norm": 1.3941697792859093, - "learning_rate": 2.8454571316544504e-06, - "loss": 0.9631, - "step": 5060 - }, - { - "epoch": 0.38035472719074104, - "grad_norm": 1.5634529259710044, - "learning_rate": 2.845015902787472e-06, - "loss": 0.9565, - "step": 5061 - }, - { - "epoch": 0.380429881256576, - "grad_norm": 1.929478251737716, - "learning_rate": 2.8445746238500647e-06, - "loss": 0.9078, - "step": 5062 - }, - { - "epoch": 0.3805050353224109, - "grad_norm": 1.7915137543593451, - "learning_rate": 2.8441332948683768e-06, - "loss": 0.8767, - "step": 5063 - }, - { - "epoch": 0.3805801893882459, - "grad_norm": 1.1267429202043342, - "learning_rate": 2.8436919158685594e-06, - "loss": 0.9386, - "step": 5064 - }, - { - "epoch": 0.38065534345408086, - "grad_norm": 3.430301550463431, - "learning_rate": 2.8432504868767648e-06, - "loss": 1.0265, - "step": 5065 - }, - { - "epoch": 0.38073049751991583, - "grad_norm": 2.0708785524810422, - "learning_rate": 2.84280900791915e-06, - "loss": 1.0045, - "step": 5066 - }, - { - "epoch": 0.3808056515857508, - "grad_norm": 2.3113265282034052, - "learning_rate": 2.8423674790218737e-06, - "loss": 0.8955, - "step": 5067 - }, - { - "epoch": 0.3808808056515858, - "grad_norm": 2.022894591326599, - "learning_rate": 2.841925900211099e-06, - "loss": 0.9029, - "step": 5068 - }, - { - "epoch": 0.3809559597174207, - "grad_norm": 1.850347450208452, - "learning_rate": 2.841484271512991e-06, - "loss": 0.9425, - "step": 5069 - }, - { - "epoch": 0.38103111378325566, - "grad_norm": 2.1804091923451505, - "learning_rate": 2.8410425929537175e-06, - "loss": 0.9494, - "step": 5070 - }, - { - "epoch": 0.38110626784909063, - "grad_norm": 1.5756030184745686, - "learning_rate": 2.8406008645594493e-06, - "loss": 0.9454, - "step": 5071 - }, - { - "epoch": 0.3811814219149256, - "grad_norm": 2.1204857841866094, - "learning_rate": 2.840159086356362e-06, - "loss": 1.0106, - "step": 5072 - }, - { - "epoch": 0.38125657598076057, - "grad_norm": 2.0535299427272764, - "learning_rate": 2.839717258370631e-06, - "loss": 0.9985, - "step": 5073 - }, - { - "epoch": 0.38133173004659554, - "grad_norm": 1.9039983740406947, - "learning_rate": 2.8392753806284367e-06, - "loss": 0.9378, - "step": 5074 - }, - { - "epoch": 0.38140688411243046, - "grad_norm": 2.316100024716853, - "learning_rate": 2.838833453155963e-06, - "loss": 1.0534, - "step": 5075 - }, - { - "epoch": 0.3814820381782654, - "grad_norm": 1.8268300874496388, - "learning_rate": 2.8383914759793944e-06, - "loss": 0.9975, - "step": 5076 - }, - { - "epoch": 0.3815571922441004, - "grad_norm": 2.3487138190941668, - "learning_rate": 2.8379494491249214e-06, - "loss": 1.0708, - "step": 5077 - }, - { - "epoch": 0.38163234630993537, - "grad_norm": 1.756345687920255, - "learning_rate": 2.8375073726187334e-06, - "loss": 0.8973, - "step": 5078 - }, - { - "epoch": 0.38170750037577034, - "grad_norm": 1.310854106883169, - "learning_rate": 2.8370652464870277e-06, - "loss": 1.0302, - "step": 5079 - }, - { - "epoch": 0.3817826544416053, - "grad_norm": 1.2634851238098073, - "learning_rate": 2.836623070756e-06, - "loss": 0.9829, - "step": 5080 - }, - { - "epoch": 0.3818578085074403, - "grad_norm": 2.0086448517526465, - "learning_rate": 2.836180845451852e-06, - "loss": 0.9683, - "step": 5081 - }, - { - "epoch": 0.3819329625732752, - "grad_norm": 2.2850982397818482, - "learning_rate": 2.835738570600787e-06, - "loss": 0.9298, - "step": 5082 - }, - { - "epoch": 0.38200811663911016, - "grad_norm": 3.3996633138566836, - "learning_rate": 2.835296246229012e-06, - "loss": 1.0368, - "step": 5083 - }, - { - "epoch": 0.38208327070494513, - "grad_norm": 1.9008846130990946, - "learning_rate": 2.8348538723627356e-06, - "loss": 0.9263, - "step": 5084 - }, - { - "epoch": 0.3821584247707801, - "grad_norm": 1.9354167936863633, - "learning_rate": 2.83441144902817e-06, - "loss": 1.0566, - "step": 5085 - }, - { - "epoch": 0.3822335788366151, - "grad_norm": 2.2453841440216045, - "learning_rate": 2.8339689762515307e-06, - "loss": 0.9517, - "step": 5086 - }, - { - "epoch": 0.38230873290245004, - "grad_norm": 2.467274478396274, - "learning_rate": 2.8335264540590366e-06, - "loss": 0.9389, - "step": 5087 - }, - { - "epoch": 0.38238388696828496, - "grad_norm": 1.32703988120397, - "learning_rate": 2.833083882476908e-06, - "loss": 0.9985, - "step": 5088 - }, - { - "epoch": 0.38245904103411993, - "grad_norm": 1.8875050298358518, - "learning_rate": 2.8326412615313695e-06, - "loss": 0.9885, - "step": 5089 - }, - { - "epoch": 0.3825341950999549, - "grad_norm": 2.7600392977363253, - "learning_rate": 2.8321985912486476e-06, - "loss": 0.9825, - "step": 5090 - }, - { - "epoch": 0.38260934916578987, - "grad_norm": 1.5384958841084695, - "learning_rate": 2.8317558716549727e-06, - "loss": 0.9474, - "step": 5091 - }, - { - "epoch": 0.38268450323162484, - "grad_norm": 1.4072991590020427, - "learning_rate": 2.8313131027765774e-06, - "loss": 1.0178, - "step": 5092 - }, - { - "epoch": 0.3827596572974598, - "grad_norm": 1.8944448549687205, - "learning_rate": 2.830870284639697e-06, - "loss": 1.0407, - "step": 5093 - }, - { - "epoch": 0.3828348113632948, - "grad_norm": 0.7132158765468788, - "learning_rate": 2.830427417270571e-06, - "loss": 0.8758, - "step": 5094 - }, - { - "epoch": 0.3829099654291297, - "grad_norm": 1.9860361310355577, - "learning_rate": 2.829984500695441e-06, - "loss": 1.032, - "step": 5095 - }, - { - "epoch": 0.38298511949496467, - "grad_norm": 1.4267573111844603, - "learning_rate": 2.8295415349405508e-06, - "loss": 1.0218, - "step": 5096 - }, - { - "epoch": 0.38306027356079964, - "grad_norm": 5.8554808022933, - "learning_rate": 2.8290985200321477e-06, - "loss": 0.9143, - "step": 5097 - }, - { - "epoch": 0.3831354276266346, - "grad_norm": 0.8133344033760578, - "learning_rate": 2.8286554559964826e-06, - "loss": 0.8033, - "step": 5098 - }, - { - "epoch": 0.3832105816924696, - "grad_norm": 1.831007877005817, - "learning_rate": 2.8282123428598096e-06, - "loss": 0.9557, - "step": 5099 - }, - { - "epoch": 0.38328573575830455, - "grad_norm": 1.8385037282283974, - "learning_rate": 2.8277691806483824e-06, - "loss": 1.0272, - "step": 5100 - }, - { - "epoch": 0.38336088982413946, - "grad_norm": 2.1841674487789207, - "learning_rate": 2.8273259693884625e-06, - "loss": 1.0519, - "step": 5101 - }, - { - "epoch": 0.38343604388997443, - "grad_norm": 1.6985348081393739, - "learning_rate": 2.8268827091063105e-06, - "loss": 1.0551, - "step": 5102 - }, - { - "epoch": 0.3835111979558094, - "grad_norm": 1.5302372132708002, - "learning_rate": 2.8264393998281916e-06, - "loss": 0.8936, - "step": 5103 - }, - { - "epoch": 0.3835863520216444, - "grad_norm": 1.4143032480229139, - "learning_rate": 2.825996041580373e-06, - "loss": 0.9754, - "step": 5104 - }, - { - "epoch": 0.38366150608747934, - "grad_norm": 1.5825423557103258, - "learning_rate": 2.825552634389127e-06, - "loss": 0.9246, - "step": 5105 - }, - { - "epoch": 0.3837366601533143, - "grad_norm": 0.8377797513577931, - "learning_rate": 2.8251091782807265e-06, - "loss": 0.8486, - "step": 5106 - }, - { - "epoch": 0.3838118142191493, - "grad_norm": 2.2980395628044255, - "learning_rate": 2.8246656732814463e-06, - "loss": 0.9125, - "step": 5107 - }, - { - "epoch": 0.3838869682849842, - "grad_norm": 2.6447019986766374, - "learning_rate": 2.8242221194175676e-06, - "loss": 0.9354, - "step": 5108 - }, - { - "epoch": 0.38396212235081917, - "grad_norm": 1.6916610842958746, - "learning_rate": 2.8237785167153726e-06, - "loss": 1.0155, - "step": 5109 - }, - { - "epoch": 0.38403727641665414, - "grad_norm": 1.309514146207685, - "learning_rate": 2.8233348652011456e-06, - "loss": 1.0317, - "step": 5110 - }, - { - "epoch": 0.3841124304824891, - "grad_norm": 1.549143582288857, - "learning_rate": 2.8228911649011755e-06, - "loss": 1.0566, - "step": 5111 - }, - { - "epoch": 0.3841875845483241, - "grad_norm": 1.8280906347973598, - "learning_rate": 2.8224474158417526e-06, - "loss": 1.0225, - "step": 5112 - }, - { - "epoch": 0.38426273861415905, - "grad_norm": 1.8916532384365017, - "learning_rate": 2.8220036180491703e-06, - "loss": 0.9778, - "step": 5113 - }, - { - "epoch": 0.38433789267999396, - "grad_norm": 2.998665035772608, - "learning_rate": 2.8215597715497266e-06, - "loss": 0.9867, - "step": 5114 - }, - { - "epoch": 0.38441304674582893, - "grad_norm": 1.7899476239730514, - "learning_rate": 2.8211158763697205e-06, - "loss": 0.9267, - "step": 5115 - }, - { - "epoch": 0.3844882008116639, - "grad_norm": 2.3380266626435064, - "learning_rate": 2.820671932535455e-06, - "loss": 1.0077, - "step": 5116 - }, - { - "epoch": 0.3845633548774989, - "grad_norm": 1.7401697080437208, - "learning_rate": 2.8202279400732343e-06, - "loss": 0.9365, - "step": 5117 - }, - { - "epoch": 0.38463850894333385, - "grad_norm": 1.5885506885043077, - "learning_rate": 2.819783899009367e-06, - "loss": 0.9433, - "step": 5118 - }, - { - "epoch": 0.3847136630091688, - "grad_norm": 1.5585945913365131, - "learning_rate": 2.819339809370165e-06, - "loss": 0.9417, - "step": 5119 - }, - { - "epoch": 0.38478881707500373, - "grad_norm": 2.6722981078893873, - "learning_rate": 2.8188956711819413e-06, - "loss": 0.927, - "step": 5120 - }, - { - "epoch": 0.3848639711408387, - "grad_norm": 1.7074892315727468, - "learning_rate": 2.818451484471014e-06, - "loss": 1.0435, - "step": 5121 - }, - { - "epoch": 0.38493912520667367, - "grad_norm": 1.5696026920143864, - "learning_rate": 2.8180072492637016e-06, - "loss": 0.9428, - "step": 5122 - }, - { - "epoch": 0.38501427927250864, - "grad_norm": 1.8970782276366653, - "learning_rate": 2.817562965586328e-06, - "loss": 0.8606, - "step": 5123 - }, - { - "epoch": 0.3850894333383436, - "grad_norm": 1.6141292384660744, - "learning_rate": 2.8171186334652174e-06, - "loss": 1.0989, - "step": 5124 - }, - { - "epoch": 0.3851645874041786, - "grad_norm": 0.7180973904898927, - "learning_rate": 2.8166742529266988e-06, - "loss": 0.7985, - "step": 5125 - }, - { - "epoch": 0.38523974147001355, - "grad_norm": 1.6425633197348524, - "learning_rate": 2.8162298239971036e-06, - "loss": 1.0218, - "step": 5126 - }, - { - "epoch": 0.38531489553584847, - "grad_norm": 2.5127734970103917, - "learning_rate": 2.8157853467027665e-06, - "loss": 1.0855, - "step": 5127 - }, - { - "epoch": 0.38539004960168344, - "grad_norm": 0.7525440618852396, - "learning_rate": 2.815340821070023e-06, - "loss": 0.8762, - "step": 5128 - }, - { - "epoch": 0.3854652036675184, - "grad_norm": 2.131477021686096, - "learning_rate": 2.8148962471252135e-06, - "loss": 1.0274, - "step": 5129 - }, - { - "epoch": 0.3855403577333534, - "grad_norm": 2.5532876008455916, - "learning_rate": 2.8144516248946813e-06, - "loss": 0.8655, - "step": 5130 - }, - { - "epoch": 0.38561551179918835, - "grad_norm": 1.5185861663990101, - "learning_rate": 2.8140069544047717e-06, - "loss": 0.9871, - "step": 5131 - }, - { - "epoch": 0.3856906658650233, - "grad_norm": 1.8284849806876238, - "learning_rate": 2.813562235681833e-06, - "loss": 0.9874, - "step": 5132 - }, - { - "epoch": 0.38576581993085823, - "grad_norm": 0.6686359664807838, - "learning_rate": 2.813117468752216e-06, - "loss": 0.7936, - "step": 5133 - }, - { - "epoch": 0.3858409739966932, - "grad_norm": 2.60834661080228, - "learning_rate": 2.812672653642276e-06, - "loss": 1.0022, - "step": 5134 - }, - { - "epoch": 0.3859161280625282, - "grad_norm": 1.8339227538070277, - "learning_rate": 2.812227790378369e-06, - "loss": 1.0438, - "step": 5135 - }, - { - "epoch": 0.38599128212836314, - "grad_norm": 1.4375340001604942, - "learning_rate": 2.811782878986855e-06, - "loss": 0.9891, - "step": 5136 - }, - { - "epoch": 0.3860664361941981, - "grad_norm": 1.5076740576876169, - "learning_rate": 2.811337919494097e-06, - "loss": 1.1284, - "step": 5137 - }, - { - "epoch": 0.3861415902600331, - "grad_norm": 3.543877095091237, - "learning_rate": 2.8108929119264608e-06, - "loss": 0.8357, - "step": 5138 - }, - { - "epoch": 0.38621674432586806, - "grad_norm": 1.7685710668200314, - "learning_rate": 2.8104478563103145e-06, - "loss": 0.9812, - "step": 5139 - }, - { - "epoch": 0.38629189839170297, - "grad_norm": 1.7476521136832197, - "learning_rate": 2.8100027526720283e-06, - "loss": 1.0071, - "step": 5140 - }, - { - "epoch": 0.38636705245753794, - "grad_norm": 1.5853971866943144, - "learning_rate": 2.8095576010379784e-06, - "loss": 0.9644, - "step": 5141 - }, - { - "epoch": 0.3864422065233729, - "grad_norm": 1.8941376896711764, - "learning_rate": 2.80911240143454e-06, - "loss": 1.0406, - "step": 5142 - }, - { - "epoch": 0.3865173605892079, - "grad_norm": 2.387409516062901, - "learning_rate": 2.8086671538880938e-06, - "loss": 1.0417, - "step": 5143 - }, - { - "epoch": 0.38659251465504285, - "grad_norm": 3.4631806073064166, - "learning_rate": 2.808221858425022e-06, - "loss": 0.9618, - "step": 5144 - }, - { - "epoch": 0.3866676687208778, - "grad_norm": 2.2461795160255758, - "learning_rate": 2.8077765150717107e-06, - "loss": 1.0748, - "step": 5145 - }, - { - "epoch": 0.38674282278671274, - "grad_norm": 1.9692999661992987, - "learning_rate": 2.807331123854547e-06, - "loss": 1.0315, - "step": 5146 - }, - { - "epoch": 0.3868179768525477, - "grad_norm": 1.5724044454194905, - "learning_rate": 2.806885684799923e-06, - "loss": 0.986, - "step": 5147 - }, - { - "epoch": 0.3868931309183827, - "grad_norm": 1.8282746473831655, - "learning_rate": 2.8064401979342324e-06, - "loss": 1.0088, - "step": 5148 - }, - { - "epoch": 0.38696828498421765, - "grad_norm": 1.6202641949861, - "learning_rate": 2.805994663283872e-06, - "loss": 0.9394, - "step": 5149 - }, - { - "epoch": 0.3870434390500526, - "grad_norm": 2.201463871216044, - "learning_rate": 2.805549080875242e-06, - "loss": 1.0198, - "step": 5150 - }, - { - "epoch": 0.3871185931158876, - "grad_norm": 1.6375538009075303, - "learning_rate": 2.8051034507347435e-06, - "loss": 0.9489, - "step": 5151 - }, - { - "epoch": 0.38719374718172256, - "grad_norm": 1.9761803902244455, - "learning_rate": 2.804657772888783e-06, - "loss": 1.003, - "step": 5152 - }, - { - "epoch": 0.3872689012475575, - "grad_norm": 3.09610034788874, - "learning_rate": 2.804212047363768e-06, - "loss": 0.8888, - "step": 5153 - }, - { - "epoch": 0.38734405531339244, - "grad_norm": 1.619338222543919, - "learning_rate": 2.8037662741861097e-06, - "loss": 1.0595, - "step": 5154 - }, - { - "epoch": 0.3874192093792274, - "grad_norm": 1.7420024003714174, - "learning_rate": 2.803320453382222e-06, - "loss": 0.9852, - "step": 5155 - }, - { - "epoch": 0.3874943634450624, - "grad_norm": 1.4663495678278662, - "learning_rate": 2.8028745849785213e-06, - "loss": 1.0392, - "step": 5156 - }, - { - "epoch": 0.38756951751089735, - "grad_norm": 1.5578659002550541, - "learning_rate": 2.8024286690014266e-06, - "loss": 1.0275, - "step": 5157 - }, - { - "epoch": 0.3876446715767323, - "grad_norm": 1.8487691289957977, - "learning_rate": 2.801982705477361e-06, - "loss": 1.0966, - "step": 5158 - }, - { - "epoch": 0.38771982564256724, - "grad_norm": 2.2176579756519392, - "learning_rate": 2.801536694432749e-06, - "loss": 1.008, - "step": 5159 - }, - { - "epoch": 0.3877949797084022, - "grad_norm": 1.853926296583897, - "learning_rate": 2.8010906358940185e-06, - "loss": 0.9627, - "step": 5160 - }, - { - "epoch": 0.3878701337742372, - "grad_norm": 4.435482802404907, - "learning_rate": 2.8006445298876003e-06, - "loss": 0.935, - "step": 5161 - }, - { - "epoch": 0.38794528784007215, - "grad_norm": 0.9734727121416834, - "learning_rate": 2.800198376439928e-06, - "loss": 0.8878, - "step": 5162 - }, - { - "epoch": 0.3880204419059071, - "grad_norm": 1.4284949827176203, - "learning_rate": 2.7997521755774373e-06, - "loss": 1.0069, - "step": 5163 - }, - { - "epoch": 0.3880955959717421, - "grad_norm": 1.6341049700653072, - "learning_rate": 2.799305927326568e-06, - "loss": 1.0183, - "step": 5164 - }, - { - "epoch": 0.388170750037577, - "grad_norm": 1.4673023800465794, - "learning_rate": 2.7988596317137623e-06, - "loss": 0.9989, - "step": 5165 - }, - { - "epoch": 0.388245904103412, - "grad_norm": 1.6468011770767257, - "learning_rate": 2.7984132887654633e-06, - "loss": 0.9949, - "step": 5166 - }, - { - "epoch": 0.38832105816924695, - "grad_norm": 2.4561948580776662, - "learning_rate": 2.7979668985081204e-06, - "loss": 0.9615, - "step": 5167 - }, - { - "epoch": 0.3883962122350819, - "grad_norm": 1.8764090647610028, - "learning_rate": 2.797520460968183e-06, - "loss": 0.9637, - "step": 5168 - }, - { - "epoch": 0.3884713663009169, - "grad_norm": 1.590848188349229, - "learning_rate": 2.797073976172104e-06, - "loss": 0.9364, - "step": 5169 - }, - { - "epoch": 0.38854652036675186, - "grad_norm": 1.7518190058273304, - "learning_rate": 2.79662744414634e-06, - "loss": 1.0501, - "step": 5170 - }, - { - "epoch": 0.3886216744325868, - "grad_norm": 2.0245321840162065, - "learning_rate": 2.79618086491735e-06, - "loss": 1.0614, - "step": 5171 - }, - { - "epoch": 0.38869682849842174, - "grad_norm": 1.7904587007695478, - "learning_rate": 2.7957342385115944e-06, - "loss": 0.9886, - "step": 5172 - }, - { - "epoch": 0.3887719825642567, - "grad_norm": 2.1563459734359234, - "learning_rate": 2.795287564955538e-06, - "loss": 1.0498, - "step": 5173 - }, - { - "epoch": 0.3888471366300917, - "grad_norm": 1.9259705363443271, - "learning_rate": 2.7948408442756477e-06, - "loss": 1.0221, - "step": 5174 - }, - { - "epoch": 0.38892229069592665, - "grad_norm": 2.2200560688120996, - "learning_rate": 2.794394076498394e-06, - "loss": 1.1268, - "step": 5175 - }, - { - "epoch": 0.3889974447617616, - "grad_norm": 1.427613301499024, - "learning_rate": 2.79394726165025e-06, - "loss": 0.9833, - "step": 5176 - }, - { - "epoch": 0.3890725988275966, - "grad_norm": 0.7059065371845507, - "learning_rate": 2.79350039975769e-06, - "loss": 0.834, - "step": 5177 - }, - { - "epoch": 0.3891477528934315, - "grad_norm": 0.7057617761789533, - "learning_rate": 2.7930534908471927e-06, - "loss": 0.8576, - "step": 5178 - }, - { - "epoch": 0.3892229069592665, - "grad_norm": 1.4375869890512636, - "learning_rate": 2.792606534945239e-06, - "loss": 0.9983, - "step": 5179 - }, - { - "epoch": 0.38929806102510145, - "grad_norm": 1.9109371905821755, - "learning_rate": 2.7921595320783136e-06, - "loss": 0.9534, - "step": 5180 - }, - { - "epoch": 0.3893732150909364, - "grad_norm": 1.5217909302099173, - "learning_rate": 2.7917124822729022e-06, - "loss": 1.0523, - "step": 5181 - }, - { - "epoch": 0.3894483691567714, - "grad_norm": 1.7010947208887062, - "learning_rate": 2.791265385555495e-06, - "loss": 0.9721, - "step": 5182 - }, - { - "epoch": 0.38952352322260636, - "grad_norm": 1.4773802738123, - "learning_rate": 2.7908182419525834e-06, - "loss": 0.9473, - "step": 5183 - }, - { - "epoch": 0.38959867728844133, - "grad_norm": 0.7409976456330616, - "learning_rate": 2.7903710514906626e-06, - "loss": 0.8767, - "step": 5184 - }, - { - "epoch": 0.38967383135427625, - "grad_norm": 2.7908582703083886, - "learning_rate": 2.7899238141962304e-06, - "loss": 0.9699, - "step": 5185 - }, - { - "epoch": 0.3897489854201112, - "grad_norm": 2.0607296830934003, - "learning_rate": 2.7894765300957875e-06, - "loss": 1.0502, - "step": 5186 - }, - { - "epoch": 0.3898241394859462, - "grad_norm": 1.3175708362277085, - "learning_rate": 2.7890291992158376e-06, - "loss": 1.0448, - "step": 5187 - }, - { - "epoch": 0.38989929355178116, - "grad_norm": 1.8029139006754875, - "learning_rate": 2.7885818215828856e-06, - "loss": 1.0523, - "step": 5188 - }, - { - "epoch": 0.3899744476176161, - "grad_norm": 1.8077933927826957, - "learning_rate": 2.7881343972234416e-06, - "loss": 0.9544, - "step": 5189 - }, - { - "epoch": 0.3900496016834511, - "grad_norm": 1.828006707953368, - "learning_rate": 2.787686926164016e-06, - "loss": 0.783, - "step": 5190 - }, - { - "epoch": 0.390124755749286, - "grad_norm": 2.0868623090725436, - "learning_rate": 2.787239408431124e-06, - "loss": 0.9627, - "step": 5191 - }, - { - "epoch": 0.390199909815121, - "grad_norm": 1.9233179025785812, - "learning_rate": 2.786791844051282e-06, - "loss": 0.8903, - "step": 5192 - }, - { - "epoch": 0.39027506388095595, - "grad_norm": 1.7359791116336747, - "learning_rate": 2.7863442330510115e-06, - "loss": 0.9782, - "step": 5193 - }, - { - "epoch": 0.3903502179467909, - "grad_norm": 2.2198659749857454, - "learning_rate": 2.7858965754568335e-06, - "loss": 0.9469, - "step": 5194 - }, - { - "epoch": 0.3904253720126259, - "grad_norm": 1.8142328857522998, - "learning_rate": 2.7854488712952735e-06, - "loss": 0.9452, - "step": 5195 - }, - { - "epoch": 0.39050052607846086, - "grad_norm": 2.3955927078375576, - "learning_rate": 2.7850011205928607e-06, - "loss": 1.0355, - "step": 5196 - }, - { - "epoch": 0.39057568014429583, - "grad_norm": 2.463283909760844, - "learning_rate": 2.7845533233761256e-06, - "loss": 0.9674, - "step": 5197 - }, - { - "epoch": 0.39065083421013075, - "grad_norm": 4.1734099689633855, - "learning_rate": 2.784105479671602e-06, - "loss": 1.0156, - "step": 5198 - }, - { - "epoch": 0.3907259882759657, - "grad_norm": 2.197125104013599, - "learning_rate": 2.783657589505826e-06, - "loss": 0.9031, - "step": 5199 - }, - { - "epoch": 0.3908011423418007, - "grad_norm": 1.4320775467318505, - "learning_rate": 2.783209652905337e-06, - "loss": 0.9573, - "step": 5200 - }, - { - "epoch": 0.39087629640763566, - "grad_norm": 0.8159840053131433, - "learning_rate": 2.7827616698966763e-06, - "loss": 0.8925, - "step": 5201 - }, - { - "epoch": 0.39095145047347063, - "grad_norm": 2.1026456516177636, - "learning_rate": 2.78231364050639e-06, - "loss": 1.0839, - "step": 5202 - }, - { - "epoch": 0.3910266045393056, - "grad_norm": 1.4288792227498364, - "learning_rate": 2.781865564761025e-06, - "loss": 0.9336, - "step": 5203 - }, - { - "epoch": 0.3911017586051405, - "grad_norm": 1.655668696470932, - "learning_rate": 2.781417442687131e-06, - "loss": 0.9874, - "step": 5204 - }, - { - "epoch": 0.3911769126709755, - "grad_norm": 1.8190816884879277, - "learning_rate": 2.7809692743112616e-06, - "loss": 0.9515, - "step": 5205 - }, - { - "epoch": 0.39125206673681046, - "grad_norm": 5.769231727795644, - "learning_rate": 2.780521059659972e-06, - "loss": 1.1014, - "step": 5206 - }, - { - "epoch": 0.3913272208026454, - "grad_norm": 0.6209040178046387, - "learning_rate": 2.78007279875982e-06, - "loss": 0.7677, - "step": 5207 - }, - { - "epoch": 0.3914023748684804, - "grad_norm": 0.6551080941411943, - "learning_rate": 2.7796244916373686e-06, - "loss": 0.824, - "step": 5208 - }, - { - "epoch": 0.39147752893431537, - "grad_norm": 1.6576051026819922, - "learning_rate": 2.7791761383191807e-06, - "loss": 1.0003, - "step": 5209 - }, - { - "epoch": 0.3915526830001503, - "grad_norm": 1.7978312062815873, - "learning_rate": 2.778727738831822e-06, - "loss": 0.9456, - "step": 5210 - }, - { - "epoch": 0.39162783706598525, - "grad_norm": 0.7941165753432129, - "learning_rate": 2.7782792932018635e-06, - "loss": 0.8653, - "step": 5211 - }, - { - "epoch": 0.3917029911318202, - "grad_norm": 1.767390963092634, - "learning_rate": 2.7778308014558767e-06, - "loss": 0.971, - "step": 5212 - }, - { - "epoch": 0.3917781451976552, - "grad_norm": 2.2017265913857997, - "learning_rate": 2.777382263620436e-06, - "loss": 0.9474, - "step": 5213 - }, - { - "epoch": 0.39185329926349016, - "grad_norm": 2.581521311395466, - "learning_rate": 2.7769336797221197e-06, - "loss": 1.0565, - "step": 5214 - }, - { - "epoch": 0.39192845332932513, - "grad_norm": 1.6414747671418315, - "learning_rate": 2.7764850497875076e-06, - "loss": 0.9058, - "step": 5215 - }, - { - "epoch": 0.3920036073951601, - "grad_norm": 2.613314093823878, - "learning_rate": 2.776036373843183e-06, - "loss": 0.9435, - "step": 5216 - }, - { - "epoch": 0.392078761460995, - "grad_norm": 2.5212163923120112, - "learning_rate": 2.775587651915732e-06, - "loss": 1.0829, - "step": 5217 - }, - { - "epoch": 0.39215391552683, - "grad_norm": 1.6615570262268486, - "learning_rate": 2.775138884031742e-06, - "loss": 0.9412, - "step": 5218 - }, - { - "epoch": 0.39222906959266496, - "grad_norm": 2.5957805544496884, - "learning_rate": 2.7746900702178053e-06, - "loss": 1.1398, - "step": 5219 - }, - { - "epoch": 0.39230422365849993, - "grad_norm": 1.4883632687373611, - "learning_rate": 2.7742412105005154e-06, - "loss": 0.9948, - "step": 5220 - }, - { - "epoch": 0.3923793777243349, - "grad_norm": 1.81196685052911, - "learning_rate": 2.773792304906469e-06, - "loss": 1.0225, - "step": 5221 - }, - { - "epoch": 0.39245453179016987, - "grad_norm": 2.1255032560529514, - "learning_rate": 2.7733433534622655e-06, - "loss": 0.9787, - "step": 5222 - }, - { - "epoch": 0.3925296858560048, - "grad_norm": 2.0561561122434893, - "learning_rate": 2.772894356194507e-06, - "loss": 1.0559, - "step": 5223 - }, - { - "epoch": 0.39260483992183975, - "grad_norm": 0.8477139079792747, - "learning_rate": 2.7724453131297988e-06, - "loss": 0.8343, - "step": 5224 - }, - { - "epoch": 0.3926799939876747, - "grad_norm": 1.9082382339480626, - "learning_rate": 2.771996224294747e-06, - "loss": 0.9901, - "step": 5225 - }, - { - "epoch": 0.3927551480535097, - "grad_norm": 2.049538197489261, - "learning_rate": 2.7715470897159636e-06, - "loss": 1.1016, - "step": 5226 - }, - { - "epoch": 0.39283030211934467, - "grad_norm": 1.7076081233049636, - "learning_rate": 2.7710979094200593e-06, - "loss": 1.0483, - "step": 5227 - }, - { - "epoch": 0.39290545618517964, - "grad_norm": 2.2553149074072754, - "learning_rate": 2.7706486834336524e-06, - "loss": 0.9711, - "step": 5228 - }, - { - "epoch": 0.3929806102510146, - "grad_norm": 2.0347506362383485, - "learning_rate": 2.7701994117833596e-06, - "loss": 1.0396, - "step": 5229 - }, - { - "epoch": 0.3930557643168495, - "grad_norm": 3.8012329962217444, - "learning_rate": 2.7697500944958024e-06, - "loss": 1.0054, - "step": 5230 - }, - { - "epoch": 0.3931309183826845, - "grad_norm": 2.146464714814138, - "learning_rate": 2.7693007315976047e-06, - "loss": 1.0533, - "step": 5231 - }, - { - "epoch": 0.39320607244851946, - "grad_norm": 1.602242748661331, - "learning_rate": 2.7688513231153926e-06, - "loss": 0.9247, - "step": 5232 - }, - { - "epoch": 0.39328122651435443, - "grad_norm": 3.0008896462254726, - "learning_rate": 2.7684018690757954e-06, - "loss": 0.9236, - "step": 5233 - }, - { - "epoch": 0.3933563805801894, - "grad_norm": 1.9607224061486594, - "learning_rate": 2.767952369505445e-06, - "loss": 1.039, - "step": 5234 - }, - { - "epoch": 0.3934315346460244, - "grad_norm": 2.798799758203029, - "learning_rate": 2.7675028244309766e-06, - "loss": 0.9731, - "step": 5235 - }, - { - "epoch": 0.3935066887118593, - "grad_norm": 1.5046831774624765, - "learning_rate": 2.767053233879026e-06, - "loss": 0.987, - "step": 5236 - }, - { - "epoch": 0.39358184277769426, - "grad_norm": 2.1663372327318777, - "learning_rate": 2.766603597876235e-06, - "loss": 1.0223, - "step": 5237 - }, - { - "epoch": 0.3936569968435292, - "grad_norm": 1.3180413660871877, - "learning_rate": 2.7661539164492442e-06, - "loss": 0.9419, - "step": 5238 - }, - { - "epoch": 0.3937321509093642, - "grad_norm": 1.8696250805899093, - "learning_rate": 2.765704189624701e-06, - "loss": 0.9287, - "step": 5239 - }, - { - "epoch": 0.39380730497519917, - "grad_norm": 1.8257705655517833, - "learning_rate": 2.765254417429252e-06, - "loss": 0.9438, - "step": 5240 - }, - { - "epoch": 0.39388245904103414, - "grad_norm": 0.8242363950458709, - "learning_rate": 2.764804599889549e-06, - "loss": 0.8589, - "step": 5241 - }, - { - "epoch": 0.3939576131068691, - "grad_norm": 2.8594928988138264, - "learning_rate": 2.7643547370322446e-06, - "loss": 1.0046, - "step": 5242 - }, - { - "epoch": 0.394032767172704, - "grad_norm": 1.7691964681196395, - "learning_rate": 2.763904828883995e-06, - "loss": 0.9597, - "step": 5243 - }, - { - "epoch": 0.394107921238539, - "grad_norm": 1.7341047884111709, - "learning_rate": 2.763454875471459e-06, - "loss": 1.0648, - "step": 5244 - }, - { - "epoch": 0.39418307530437396, - "grad_norm": 1.7563941489146315, - "learning_rate": 2.7630048768212975e-06, - "loss": 1.0833, - "step": 5245 - }, - { - "epoch": 0.39425822937020893, - "grad_norm": 1.8737507791243977, - "learning_rate": 2.7625548329601763e-06, - "loss": 0.9606, - "step": 5246 - }, - { - "epoch": 0.3943333834360439, - "grad_norm": 1.7545244448576798, - "learning_rate": 2.7621047439147606e-06, - "loss": 0.9666, - "step": 5247 - }, - { - "epoch": 0.3944085375018789, - "grad_norm": 1.836129166358336, - "learning_rate": 2.7616546097117213e-06, - "loss": 0.9311, - "step": 5248 - }, - { - "epoch": 0.3944836915677138, - "grad_norm": 1.6080558333216648, - "learning_rate": 2.761204430377729e-06, - "loss": 1.0125, - "step": 5249 - }, - { - "epoch": 0.39455884563354876, - "grad_norm": 1.8468162772562073, - "learning_rate": 2.7607542059394604e-06, - "loss": 1.0094, - "step": 5250 - }, - { - "epoch": 0.39463399969938373, - "grad_norm": 2.508730901427813, - "learning_rate": 2.760303936423591e-06, - "loss": 0.9509, - "step": 5251 - }, - { - "epoch": 0.3947091537652187, - "grad_norm": 1.5505810663656325, - "learning_rate": 2.759853621856802e-06, - "loss": 0.9067, - "step": 5252 - }, - { - "epoch": 0.39478430783105367, - "grad_norm": 2.064769074687799, - "learning_rate": 2.759403262265777e-06, - "loss": 0.9493, - "step": 5253 - }, - { - "epoch": 0.39485946189688864, - "grad_norm": 1.5151649642116247, - "learning_rate": 2.7589528576772e-06, - "loss": 1.027, - "step": 5254 - }, - { - "epoch": 0.39493461596272356, - "grad_norm": 1.6533598244503358, - "learning_rate": 2.7585024081177602e-06, - "loss": 0.902, - "step": 5255 - }, - { - "epoch": 0.3950097700285585, - "grad_norm": 1.4819923954818774, - "learning_rate": 2.7580519136141483e-06, - "loss": 0.9969, - "step": 5256 - }, - { - "epoch": 0.3950849240943935, - "grad_norm": 1.924525348554865, - "learning_rate": 2.7576013741930576e-06, - "loss": 1.0126, - "step": 5257 - }, - { - "epoch": 0.39516007816022847, - "grad_norm": 3.2589583542987643, - "learning_rate": 2.7571507898811846e-06, - "loss": 1.1013, - "step": 5258 - }, - { - "epoch": 0.39523523222606344, - "grad_norm": 2.0859305278075775, - "learning_rate": 2.756700160705228e-06, - "loss": 0.876, - "step": 5259 - }, - { - "epoch": 0.3953103862918984, - "grad_norm": 2.2677320482319168, - "learning_rate": 2.756249486691889e-06, - "loss": 1.0741, - "step": 5260 - }, - { - "epoch": 0.3953855403577334, - "grad_norm": 1.65180625019048, - "learning_rate": 2.7557987678678723e-06, - "loss": 0.9264, - "step": 5261 - }, - { - "epoch": 0.3954606944235683, - "grad_norm": 1.7856550125094015, - "learning_rate": 2.755348004259884e-06, - "loss": 0.9854, - "step": 5262 - }, - { - "epoch": 0.39553584848940326, - "grad_norm": 3.1802618084503806, - "learning_rate": 2.7548971958946347e-06, - "loss": 1.065, - "step": 5263 - }, - { - "epoch": 0.39561100255523823, - "grad_norm": 1.8422092530290366, - "learning_rate": 2.7544463427988355e-06, - "loss": 1.1108, - "step": 5264 - }, - { - "epoch": 0.3956861566210732, - "grad_norm": 2.023867646085299, - "learning_rate": 2.7539954449992014e-06, - "loss": 0.9825, - "step": 5265 - }, - { - "epoch": 0.3957613106869082, - "grad_norm": 0.7378125999156556, - "learning_rate": 2.7535445025224506e-06, - "loss": 0.8551, - "step": 5266 - }, - { - "epoch": 0.39583646475274314, - "grad_norm": 0.6972295073229513, - "learning_rate": 2.7530935153953016e-06, - "loss": 0.8359, - "step": 5267 - }, - { - "epoch": 0.39591161881857806, - "grad_norm": 1.9008619110264848, - "learning_rate": 2.752642483644478e-06, - "loss": 1.0123, - "step": 5268 - }, - { - "epoch": 0.39598677288441303, - "grad_norm": 1.5037634050971986, - "learning_rate": 2.752191407296706e-06, - "loss": 0.8896, - "step": 5269 - }, - { - "epoch": 0.396061926950248, - "grad_norm": 4.544514361162102, - "learning_rate": 2.7517402863787123e-06, - "loss": 0.8461, - "step": 5270 - }, - { - "epoch": 0.39613708101608297, - "grad_norm": 2.1813174480514257, - "learning_rate": 2.751289120917228e-06, - "loss": 1.0634, - "step": 5271 - }, - { - "epoch": 0.39621223508191794, - "grad_norm": 1.6007885062861709, - "learning_rate": 2.750837910938987e-06, - "loss": 0.9649, - "step": 5272 - }, - { - "epoch": 0.3962873891477529, - "grad_norm": 1.8184387648611329, - "learning_rate": 2.7503866564707236e-06, - "loss": 1.0148, - "step": 5273 - }, - { - "epoch": 0.3963625432135879, - "grad_norm": 1.9787904749940055, - "learning_rate": 2.7499353575391784e-06, - "loss": 1.0678, - "step": 5274 - }, - { - "epoch": 0.3964376972794228, - "grad_norm": 1.878266413945578, - "learning_rate": 2.749484014171091e-06, - "loss": 0.8971, - "step": 5275 - }, - { - "epoch": 0.39651285134525777, - "grad_norm": 1.7612120582711865, - "learning_rate": 2.749032626393206e-06, - "loss": 1.0478, - "step": 5276 - }, - { - "epoch": 0.39658800541109274, - "grad_norm": 2.0621603483796305, - "learning_rate": 2.74858119423227e-06, - "loss": 0.9094, - "step": 5277 - }, - { - "epoch": 0.3966631594769277, - "grad_norm": 1.91124103222197, - "learning_rate": 2.748129717715031e-06, - "loss": 1.0506, - "step": 5278 - }, - { - "epoch": 0.3967383135427627, - "grad_norm": 1.468371403374184, - "learning_rate": 2.747678196868241e-06, - "loss": 0.9422, - "step": 5279 - }, - { - "epoch": 0.39681346760859765, - "grad_norm": 2.0304314944939676, - "learning_rate": 2.747226631718656e-06, - "loss": 1.0833, - "step": 5280 - }, - { - "epoch": 0.39688862167443256, - "grad_norm": 1.6025759584037236, - "learning_rate": 2.746775022293032e-06, - "loss": 1.0461, - "step": 5281 - }, - { - "epoch": 0.39696377574026753, - "grad_norm": 1.9064147127625073, - "learning_rate": 2.746323368618127e-06, - "loss": 0.9324, - "step": 5282 - }, - { - "epoch": 0.3970389298061025, - "grad_norm": 1.5162399460222562, - "learning_rate": 2.7458716707207054e-06, - "loss": 1.0318, - "step": 5283 - }, - { - "epoch": 0.3971140838719375, - "grad_norm": 2.3150388853626125, - "learning_rate": 2.74541992862753e-06, - "loss": 1.0159, - "step": 5284 - }, - { - "epoch": 0.39718923793777244, - "grad_norm": 1.6869879758188115, - "learning_rate": 2.744968142365371e-06, - "loss": 1.0208, - "step": 5285 - }, - { - "epoch": 0.3972643920036074, - "grad_norm": 1.6212332424218356, - "learning_rate": 2.744516311960996e-06, - "loss": 0.9831, - "step": 5286 - }, - { - "epoch": 0.3973395460694424, - "grad_norm": 1.7946811718026274, - "learning_rate": 2.744064437441179e-06, - "loss": 1.0005, - "step": 5287 - }, - { - "epoch": 0.3974147001352773, - "grad_norm": 1.836735069238101, - "learning_rate": 2.743612518832695e-06, - "loss": 1.0463, - "step": 5288 - }, - { - "epoch": 0.39748985420111227, - "grad_norm": 1.817323514615629, - "learning_rate": 2.743160556162321e-06, - "loss": 1.0405, - "step": 5289 - }, - { - "epoch": 0.39756500826694724, - "grad_norm": 2.1954747476048557, - "learning_rate": 2.7427085494568383e-06, - "loss": 0.9493, - "step": 5290 - }, - { - "epoch": 0.3976401623327822, - "grad_norm": 1.5045004723479372, - "learning_rate": 2.742256498743031e-06, - "loss": 0.9496, - "step": 5291 - }, - { - "epoch": 0.3977153163986172, - "grad_norm": 4.085215050629244, - "learning_rate": 2.7418044040476838e-06, - "loss": 0.8858, - "step": 5292 - }, - { - "epoch": 0.39779047046445215, - "grad_norm": 1.9493882099864699, - "learning_rate": 2.7413522653975842e-06, - "loss": 0.9331, - "step": 5293 - }, - { - "epoch": 0.39786562453028707, - "grad_norm": 1.7325252945059433, - "learning_rate": 2.7409000828195247e-06, - "loss": 0.9493, - "step": 5294 - }, - { - "epoch": 0.39794077859612204, - "grad_norm": 1.6018820095020523, - "learning_rate": 2.7404478563402976e-06, - "loss": 0.9621, - "step": 5295 - }, - { - "epoch": 0.398015932661957, - "grad_norm": 1.5419290250630568, - "learning_rate": 2.7399955859867e-06, - "loss": 0.9794, - "step": 5296 - }, - { - "epoch": 0.398091086727792, - "grad_norm": 1.9385959847896188, - "learning_rate": 2.739543271785531e-06, - "loss": 1.0656, - "step": 5297 - }, - { - "epoch": 0.39816624079362695, - "grad_norm": 2.1451785252045528, - "learning_rate": 2.7390909137635906e-06, - "loss": 0.946, - "step": 5298 - }, - { - "epoch": 0.3982413948594619, - "grad_norm": 1.9293231079517783, - "learning_rate": 2.7386385119476833e-06, - "loss": 0.9761, - "step": 5299 - }, - { - "epoch": 0.39831654892529683, - "grad_norm": 2.407766780629628, - "learning_rate": 2.738186066364616e-06, - "loss": 0.9616, - "step": 5300 - }, - { - "epoch": 0.3983917029911318, - "grad_norm": 2.0870810813635265, - "learning_rate": 2.7377335770411965e-06, - "loss": 1.0336, - "step": 5301 - }, - { - "epoch": 0.3984668570569668, - "grad_norm": 1.9322188604439072, - "learning_rate": 2.737281044004239e-06, - "loss": 0.9332, - "step": 5302 - }, - { - "epoch": 0.39854201112280174, - "grad_norm": 1.635450723216526, - "learning_rate": 2.7368284672805558e-06, - "loss": 1.0027, - "step": 5303 - }, - { - "epoch": 0.3986171651886367, - "grad_norm": 2.0177497015319386, - "learning_rate": 2.7363758468969643e-06, - "loss": 1.081, - "step": 5304 - }, - { - "epoch": 0.3986923192544717, - "grad_norm": 2.2382675090587707, - "learning_rate": 2.735923182880285e-06, - "loss": 1.0863, - "step": 5305 - }, - { - "epoch": 0.39876747332030665, - "grad_norm": 2.02151244507987, - "learning_rate": 2.7354704752573376e-06, - "loss": 1.0221, - "step": 5306 - }, - { - "epoch": 0.39884262738614157, - "grad_norm": 3.169554965913322, - "learning_rate": 2.735017724054949e-06, - "loss": 1.033, - "step": 5307 - }, - { - "epoch": 0.39891778145197654, - "grad_norm": 1.5697593663703666, - "learning_rate": 2.7345649292999456e-06, - "loss": 0.9702, - "step": 5308 - }, - { - "epoch": 0.3989929355178115, - "grad_norm": 2.199898379319712, - "learning_rate": 2.7341120910191575e-06, - "loss": 0.8703, - "step": 5309 - }, - { - "epoch": 0.3990680895836465, - "grad_norm": 1.9118809938664179, - "learning_rate": 2.733659209239417e-06, - "loss": 1.0662, - "step": 5310 - }, - { - "epoch": 0.39914324364948145, - "grad_norm": 2.378871572958978, - "learning_rate": 2.7332062839875586e-06, - "loss": 1.0597, - "step": 5311 - }, - { - "epoch": 0.3992183977153164, - "grad_norm": 0.7437929958449662, - "learning_rate": 2.73275331529042e-06, - "loss": 0.8234, - "step": 5312 - }, - { - "epoch": 0.39929355178115133, - "grad_norm": 1.8391171456177489, - "learning_rate": 2.7323003031748424e-06, - "loss": 1.0115, - "step": 5313 - }, - { - "epoch": 0.3993687058469863, - "grad_norm": 2.0533617042876915, - "learning_rate": 2.731847247667667e-06, - "loss": 1.0011, - "step": 5314 - }, - { - "epoch": 0.3994438599128213, - "grad_norm": 6.156670105827677, - "learning_rate": 2.7313941487957398e-06, - "loss": 0.9477, - "step": 5315 - }, - { - "epoch": 0.39951901397865625, - "grad_norm": 2.0011940014621366, - "learning_rate": 2.730941006585909e-06, - "loss": 1.0877, - "step": 5316 - }, - { - "epoch": 0.3995941680444912, - "grad_norm": 20.537700690637283, - "learning_rate": 2.7304878210650243e-06, - "loss": 0.9424, - "step": 5317 - }, - { - "epoch": 0.3996693221103262, - "grad_norm": 1.3685732984359418, - "learning_rate": 2.7300345922599394e-06, - "loss": 0.9307, - "step": 5318 - }, - { - "epoch": 0.39974447617616116, - "grad_norm": 1.6916628460144094, - "learning_rate": 2.7295813201975087e-06, - "loss": 0.9437, - "step": 5319 - }, - { - "epoch": 0.39981963024199607, - "grad_norm": 1.9345653212288905, - "learning_rate": 2.7291280049045916e-06, - "loss": 0.9869, - "step": 5320 - }, - { - "epoch": 0.39989478430783104, - "grad_norm": 1.5810338773530093, - "learning_rate": 2.728674646408048e-06, - "loss": 0.9634, - "step": 5321 - }, - { - "epoch": 0.399969938373666, - "grad_norm": 1.3334688723735446, - "learning_rate": 2.7282212447347413e-06, - "loss": 0.9544, - "step": 5322 - }, - { - "epoch": 0.400045092439501, - "grad_norm": 1.8150704014473056, - "learning_rate": 2.7277677999115368e-06, - "loss": 0.9641, - "step": 5323 - }, - { - "epoch": 0.40012024650533595, - "grad_norm": 1.9453591337321101, - "learning_rate": 2.7273143119653042e-06, - "loss": 0.9784, - "step": 5324 - }, - { - "epoch": 0.4001954005711709, - "grad_norm": 1.8278507776894366, - "learning_rate": 2.7268607809229137e-06, - "loss": 0.937, - "step": 5325 - }, - { - "epoch": 0.40027055463700584, - "grad_norm": 2.6471396898246735, - "learning_rate": 2.7264072068112377e-06, - "loss": 1.0681, - "step": 5326 - }, - { - "epoch": 0.4003457087028408, - "grad_norm": 1.5566734013762316, - "learning_rate": 2.725953589657154e-06, - "loss": 1.0143, - "step": 5327 - }, - { - "epoch": 0.4004208627686758, - "grad_norm": 1.5122497572265672, - "learning_rate": 2.7254999294875395e-06, - "loss": 0.9935, - "step": 5328 - }, - { - "epoch": 0.40049601683451075, - "grad_norm": 1.8894673736331542, - "learning_rate": 2.725046226329276e-06, - "loss": 0.9555, - "step": 5329 - }, - { - "epoch": 0.4005711709003457, - "grad_norm": 1.8212455355681798, - "learning_rate": 2.7245924802092476e-06, - "loss": 0.9615, - "step": 5330 - }, - { - "epoch": 0.4006463249661807, - "grad_norm": 0.646406370683474, - "learning_rate": 2.7241386911543397e-06, - "loss": 0.8374, - "step": 5331 - }, - { - "epoch": 0.40072147903201566, - "grad_norm": 2.6269047502225775, - "learning_rate": 2.7236848591914422e-06, - "loss": 0.9455, - "step": 5332 - }, - { - "epoch": 0.4007966330978506, - "grad_norm": 9.772598681904348, - "learning_rate": 2.7232309843474446e-06, - "loss": 0.8954, - "step": 5333 - }, - { - "epoch": 0.40087178716368554, - "grad_norm": 1.5139723744057774, - "learning_rate": 2.7227770666492423e-06, - "loss": 0.9267, - "step": 5334 - }, - { - "epoch": 0.4009469412295205, - "grad_norm": 0.7586314308873218, - "learning_rate": 2.722323106123731e-06, - "loss": 0.8649, - "step": 5335 - }, - { - "epoch": 0.4010220952953555, - "grad_norm": 2.283591127345184, - "learning_rate": 2.7218691027978103e-06, - "loss": 0.9894, - "step": 5336 - }, - { - "epoch": 0.40109724936119046, - "grad_norm": 2.390940028358671, - "learning_rate": 2.7214150566983807e-06, - "loss": 0.9829, - "step": 5337 - }, - { - "epoch": 0.4011724034270254, - "grad_norm": 1.751554139633712, - "learning_rate": 2.7209609678523462e-06, - "loss": 1.0395, - "step": 5338 - }, - { - "epoch": 0.40124755749286034, - "grad_norm": 1.987726160293419, - "learning_rate": 2.7205068362866134e-06, - "loss": 1.0301, - "step": 5339 - }, - { - "epoch": 0.4013227115586953, - "grad_norm": 1.3809715128342233, - "learning_rate": 2.7200526620280923e-06, - "loss": 0.9878, - "step": 5340 - }, - { - "epoch": 0.4013978656245303, - "grad_norm": 1.2434552998501895, - "learning_rate": 2.719598445103693e-06, - "loss": 0.8938, - "step": 5341 - }, - { - "epoch": 0.40147301969036525, - "grad_norm": 1.7566766078216485, - "learning_rate": 2.7191441855403304e-06, - "loss": 1.0871, - "step": 5342 - }, - { - "epoch": 0.4015481737562002, - "grad_norm": 1.6861535811517754, - "learning_rate": 2.718689883364922e-06, - "loss": 1.0128, - "step": 5343 - }, - { - "epoch": 0.4016233278220352, - "grad_norm": 1.9447613896807414, - "learning_rate": 2.7182355386043847e-06, - "loss": 0.8771, - "step": 5344 - }, - { - "epoch": 0.4016984818878701, - "grad_norm": 2.7869787472946963, - "learning_rate": 2.7177811512856415e-06, - "loss": 0.9724, - "step": 5345 - }, - { - "epoch": 0.4017736359537051, - "grad_norm": 2.001717783423192, - "learning_rate": 2.7173267214356173e-06, - "loss": 0.9679, - "step": 5346 - }, - { - "epoch": 0.40184879001954005, - "grad_norm": 1.7039883026697618, - "learning_rate": 2.716872249081238e-06, - "loss": 0.9975, - "step": 5347 - }, - { - "epoch": 0.401923944085375, - "grad_norm": 3.33266028920913, - "learning_rate": 2.7164177342494323e-06, - "loss": 1.013, - "step": 5348 - }, - { - "epoch": 0.40199909815121, - "grad_norm": 1.833303826990525, - "learning_rate": 2.7159631769671326e-06, - "loss": 1.0888, - "step": 5349 - }, - { - "epoch": 0.40207425221704496, - "grad_norm": 1.717856573768116, - "learning_rate": 2.715508577261273e-06, - "loss": 0.9828, - "step": 5350 - }, - { - "epoch": 0.40214940628287993, - "grad_norm": 1.94986605550995, - "learning_rate": 2.715053935158791e-06, - "loss": 0.8978, - "step": 5351 - }, - { - "epoch": 0.40222456034871484, - "grad_norm": 0.8759234528192643, - "learning_rate": 2.7145992506866242e-06, - "loss": 0.9384, - "step": 5352 - }, - { - "epoch": 0.4022997144145498, - "grad_norm": 5.830357182915219, - "learning_rate": 2.714144523871716e-06, - "loss": 1.0203, - "step": 5353 - }, - { - "epoch": 0.4023748684803848, - "grad_norm": 1.6846703010407165, - "learning_rate": 2.7136897547410105e-06, - "loss": 1.0552, - "step": 5354 - }, - { - "epoch": 0.40245002254621975, - "grad_norm": 2.0483054484411074, - "learning_rate": 2.7132349433214536e-06, - "loss": 0.9696, - "step": 5355 - }, - { - "epoch": 0.4025251766120547, - "grad_norm": 1.5929347178222748, - "learning_rate": 2.712780089639995e-06, - "loss": 0.9528, - "step": 5356 - }, - { - "epoch": 0.4026003306778897, - "grad_norm": 1.4519405665148302, - "learning_rate": 2.7123251937235873e-06, - "loss": 0.9426, - "step": 5357 - }, - { - "epoch": 0.4026754847437246, - "grad_norm": 3.4993976347147475, - "learning_rate": 2.7118702555991835e-06, - "loss": 1.0188, - "step": 5358 - }, - { - "epoch": 0.4027506388095596, - "grad_norm": 1.7097752076460673, - "learning_rate": 2.7114152752937417e-06, - "loss": 0.9448, - "step": 5359 - }, - { - "epoch": 0.40282579287539455, - "grad_norm": 1.7848620683607797, - "learning_rate": 2.71096025283422e-06, - "loss": 0.9868, - "step": 5360 - }, - { - "epoch": 0.4029009469412295, - "grad_norm": 1.6844394621961303, - "learning_rate": 2.7105051882475813e-06, - "loss": 0.9808, - "step": 5361 - }, - { - "epoch": 0.4029761010070645, - "grad_norm": 2.102565823680133, - "learning_rate": 2.7100500815607898e-06, - "loss": 1.0612, - "step": 5362 - }, - { - "epoch": 0.40305125507289946, - "grad_norm": 1.4405288300096988, - "learning_rate": 2.7095949328008113e-06, - "loss": 1.012, - "step": 5363 - }, - { - "epoch": 0.40312640913873443, - "grad_norm": 1.6430702515510354, - "learning_rate": 2.7091397419946162e-06, - "loss": 1.0145, - "step": 5364 - }, - { - "epoch": 0.40320156320456935, - "grad_norm": 1.6002403942952763, - "learning_rate": 2.708684509169176e-06, - "loss": 0.9837, - "step": 5365 - }, - { - "epoch": 0.4032767172704043, - "grad_norm": 1.6030140323417388, - "learning_rate": 2.7082292343514646e-06, - "loss": 0.9742, - "step": 5366 - }, - { - "epoch": 0.4033518713362393, - "grad_norm": 3.0321872481348286, - "learning_rate": 2.707773917568459e-06, - "loss": 0.9911, - "step": 5367 - }, - { - "epoch": 0.40342702540207426, - "grad_norm": 1.547096853049037, - "learning_rate": 2.707318558847139e-06, - "loss": 1.0735, - "step": 5368 - }, - { - "epoch": 0.4035021794679092, - "grad_norm": 1.5816205280409263, - "learning_rate": 2.706863158214486e-06, - "loss": 1.0355, - "step": 5369 - }, - { - "epoch": 0.4035773335337442, - "grad_norm": 1.9232491023672595, - "learning_rate": 2.7064077156974835e-06, - "loss": 0.9289, - "step": 5370 - }, - { - "epoch": 0.4036524875995791, - "grad_norm": 1.73431025203782, - "learning_rate": 2.705952231323119e-06, - "loss": 0.8887, - "step": 5371 - }, - { - "epoch": 0.4037276416654141, - "grad_norm": 1.561315163565008, - "learning_rate": 2.7054967051183813e-06, - "loss": 0.9486, - "step": 5372 - }, - { - "epoch": 0.40380279573124905, - "grad_norm": 1.6183605270510548, - "learning_rate": 2.705041137110263e-06, - "loss": 1.0322, - "step": 5373 - }, - { - "epoch": 0.403877949797084, - "grad_norm": 1.4128751881003707, - "learning_rate": 2.704585527325757e-06, - "loss": 0.9785, - "step": 5374 - }, - { - "epoch": 0.403953103862919, - "grad_norm": 1.5906128019153705, - "learning_rate": 2.704129875791861e-06, - "loss": 1.0731, - "step": 5375 - }, - { - "epoch": 0.40402825792875396, - "grad_norm": 1.6768282662788323, - "learning_rate": 2.7036741825355728e-06, - "loss": 0.9875, - "step": 5376 - }, - { - "epoch": 0.40410341199458893, - "grad_norm": 1.7426177036807702, - "learning_rate": 2.7032184475838953e-06, - "loss": 1.0171, - "step": 5377 - }, - { - "epoch": 0.40417856606042385, - "grad_norm": 1.680554401337759, - "learning_rate": 2.7027626709638317e-06, - "loss": 1.0326, - "step": 5378 - }, - { - "epoch": 0.4042537201262588, - "grad_norm": 1.944995042260135, - "learning_rate": 2.702306852702389e-06, - "loss": 0.9329, - "step": 5379 - }, - { - "epoch": 0.4043288741920938, - "grad_norm": 1.8424432293907544, - "learning_rate": 2.7018509928265763e-06, - "loss": 1.0685, - "step": 5380 - }, - { - "epoch": 0.40440402825792876, - "grad_norm": 1.5052057374010064, - "learning_rate": 2.7013950913634036e-06, - "loss": 1.0034, - "step": 5381 - }, - { - "epoch": 0.40447918232376373, - "grad_norm": 1.4525102525043916, - "learning_rate": 2.7009391483398868e-06, - "loss": 1.0594, - "step": 5382 - }, - { - "epoch": 0.4045543363895987, - "grad_norm": 2.0024181291129106, - "learning_rate": 2.7004831637830416e-06, - "loss": 0.9417, - "step": 5383 - }, - { - "epoch": 0.4046294904554336, - "grad_norm": 1.99704613705951, - "learning_rate": 2.700027137719886e-06, - "loss": 0.9703, - "step": 5384 - }, - { - "epoch": 0.4047046445212686, - "grad_norm": 1.4245025335298063, - "learning_rate": 2.699571070177442e-06, - "loss": 1.0096, - "step": 5385 - }, - { - "epoch": 0.40477979858710356, - "grad_norm": 1.8570352787781355, - "learning_rate": 2.6991149611827335e-06, - "loss": 1.042, - "step": 5386 - }, - { - "epoch": 0.4048549526529385, - "grad_norm": 2.1084006991071704, - "learning_rate": 2.6986588107627858e-06, - "loss": 1.0501, - "step": 5387 - }, - { - "epoch": 0.4049301067187735, - "grad_norm": 2.342818927203996, - "learning_rate": 2.698202618944629e-06, - "loss": 1.0193, - "step": 5388 - }, - { - "epoch": 0.40500526078460847, - "grad_norm": 1.9492181995826816, - "learning_rate": 2.697746385755293e-06, - "loss": 0.9891, - "step": 5389 - }, - { - "epoch": 0.4050804148504434, - "grad_norm": 1.722276087742339, - "learning_rate": 2.6972901112218123e-06, - "loss": 1.0313, - "step": 5390 - }, - { - "epoch": 0.40515556891627835, - "grad_norm": 1.8270251886503286, - "learning_rate": 2.696833795371222e-06, - "loss": 0.9469, - "step": 5391 - }, - { - "epoch": 0.4052307229821133, - "grad_norm": 2.9797808852062144, - "learning_rate": 2.696377438230561e-06, - "loss": 0.9591, - "step": 5392 - }, - { - "epoch": 0.4053058770479483, - "grad_norm": 3.222853775906761, - "learning_rate": 2.6959210398268703e-06, - "loss": 0.9799, - "step": 5393 - }, - { - "epoch": 0.40538103111378326, - "grad_norm": 2.591380220224401, - "learning_rate": 2.6954646001871928e-06, - "loss": 0.9404, - "step": 5394 - }, - { - "epoch": 0.40545618517961823, - "grad_norm": 1.737186112627195, - "learning_rate": 2.695008119338575e-06, - "loss": 1.0398, - "step": 5395 - }, - { - "epoch": 0.4055313392454532, - "grad_norm": 1.700455769982958, - "learning_rate": 2.6945515973080643e-06, - "loss": 0.9996, - "step": 5396 - }, - { - "epoch": 0.4056064933112881, - "grad_norm": 1.9543276936679967, - "learning_rate": 2.6940950341227124e-06, - "loss": 1.021, - "step": 5397 - }, - { - "epoch": 0.4056816473771231, - "grad_norm": 2.0166159862528965, - "learning_rate": 2.693638429809572e-06, - "loss": 0.916, - "step": 5398 - }, - { - "epoch": 0.40575680144295806, - "grad_norm": 1.9770407117048767, - "learning_rate": 2.6931817843956977e-06, - "loss": 1.0631, - "step": 5399 - }, - { - "epoch": 0.40583195550879303, - "grad_norm": 1.6316196154446612, - "learning_rate": 2.692725097908149e-06, - "loss": 0.8752, - "step": 5400 - }, - { - "epoch": 0.405907109574628, - "grad_norm": 1.7292274173344597, - "learning_rate": 2.692268370373985e-06, - "loss": 0.9395, - "step": 5401 - }, - { - "epoch": 0.40598226364046297, - "grad_norm": 1.5177722939249005, - "learning_rate": 2.69181160182027e-06, - "loss": 1.1021, - "step": 5402 - }, - { - "epoch": 0.4060574177062979, - "grad_norm": 1.65561246297661, - "learning_rate": 2.691354792274068e-06, - "loss": 1.0014, - "step": 5403 - }, - { - "epoch": 0.40613257177213286, - "grad_norm": 1.6642978842013763, - "learning_rate": 2.690897941762447e-06, - "loss": 1.0003, - "step": 5404 - }, - { - "epoch": 0.4062077258379678, - "grad_norm": 1.2620943059703504, - "learning_rate": 2.6904410503124774e-06, - "loss": 0.9726, - "step": 5405 - }, - { - "epoch": 0.4062828799038028, - "grad_norm": 1.723877627355457, - "learning_rate": 2.6899841179512324e-06, - "loss": 0.9155, - "step": 5406 - }, - { - "epoch": 0.40635803396963777, - "grad_norm": 1.7446494732511115, - "learning_rate": 2.689527144705785e-06, - "loss": 0.9417, - "step": 5407 - }, - { - "epoch": 0.40643318803547274, - "grad_norm": 1.7055337653216074, - "learning_rate": 2.6890701306032154e-06, - "loss": 0.9087, - "step": 5408 - }, - { - "epoch": 0.4065083421013077, - "grad_norm": 1.8924705388183485, - "learning_rate": 2.6886130756706003e-06, - "loss": 1.0334, - "step": 5409 - }, - { - "epoch": 0.4065834961671426, - "grad_norm": 2.027082069869659, - "learning_rate": 2.688155979935025e-06, - "loss": 0.952, - "step": 5410 - }, - { - "epoch": 0.4066586502329776, - "grad_norm": 1.6873167432956417, - "learning_rate": 2.687698843423572e-06, - "loss": 0.9317, - "step": 5411 - }, - { - "epoch": 0.40673380429881256, - "grad_norm": 1.723427182648281, - "learning_rate": 2.6872416661633296e-06, - "loss": 0.9562, - "step": 5412 - }, - { - "epoch": 0.40680895836464753, - "grad_norm": 1.470345057128045, - "learning_rate": 2.6867844481813868e-06, - "loss": 1.0167, - "step": 5413 - }, - { - "epoch": 0.4068841124304825, - "grad_norm": 2.0040515155452536, - "learning_rate": 2.6863271895048353e-06, - "loss": 0.9643, - "step": 5414 - }, - { - "epoch": 0.4069592664963175, - "grad_norm": 2.2677707376744114, - "learning_rate": 2.6858698901607696e-06, - "loss": 1.0167, - "step": 5415 - }, - { - "epoch": 0.4070344205621524, - "grad_norm": 1.590892998502516, - "learning_rate": 2.6854125501762863e-06, - "loss": 0.9435, - "step": 5416 - }, - { - "epoch": 0.40710957462798736, - "grad_norm": 1.679526884694539, - "learning_rate": 2.684955169578486e-06, - "loss": 0.9672, - "step": 5417 - }, - { - "epoch": 0.40718472869382233, - "grad_norm": 1.6154147396841978, - "learning_rate": 2.684497748394468e-06, - "loss": 1.017, - "step": 5418 - }, - { - "epoch": 0.4072598827596573, - "grad_norm": 2.4442471752136767, - "learning_rate": 2.6840402866513377e-06, - "loss": 0.9333, - "step": 5419 - }, - { - "epoch": 0.40733503682549227, - "grad_norm": 1.3902506806718862, - "learning_rate": 2.6835827843762006e-06, - "loss": 1.0663, - "step": 5420 - }, - { - "epoch": 0.40741019089132724, - "grad_norm": 1.9001801581047533, - "learning_rate": 2.6831252415961665e-06, - "loss": 1.0065, - "step": 5421 - }, - { - "epoch": 0.4074853449571622, - "grad_norm": 1.7486241245298944, - "learning_rate": 2.682667658338345e-06, - "loss": 0.9926, - "step": 5422 - }, - { - "epoch": 0.4075604990229971, - "grad_norm": 1.48050385687679, - "learning_rate": 2.6822100346298517e-06, - "loss": 0.9612, - "step": 5423 - }, - { - "epoch": 0.4076356530888321, - "grad_norm": 1.5138941841538396, - "learning_rate": 2.6817523704978014e-06, - "loss": 1.0847, - "step": 5424 - }, - { - "epoch": 0.40771080715466707, - "grad_norm": 2.0122301951507353, - "learning_rate": 2.681294665969312e-06, - "loss": 0.9302, - "step": 5425 - }, - { - "epoch": 0.40778596122050204, - "grad_norm": 2.0477298960564525, - "learning_rate": 2.6808369210715055e-06, - "loss": 0.9826, - "step": 5426 - }, - { - "epoch": 0.407861115286337, - "grad_norm": 1.9436044203036535, - "learning_rate": 2.6803791358315035e-06, - "loss": 0.9653, - "step": 5427 - }, - { - "epoch": 0.407936269352172, - "grad_norm": 1.4325294805825244, - "learning_rate": 2.679921310276432e-06, - "loss": 1.0597, - "step": 5428 - }, - { - "epoch": 0.4080114234180069, - "grad_norm": 1.5882507141150104, - "learning_rate": 2.6794634444334203e-06, - "loss": 0.831, - "step": 5429 - }, - { - "epoch": 0.40808657748384186, - "grad_norm": 2.017564535326026, - "learning_rate": 2.679005538329598e-06, - "loss": 1.015, - "step": 5430 - }, - { - "epoch": 0.40816173154967683, - "grad_norm": 1.7676332918273012, - "learning_rate": 2.678547591992096e-06, - "loss": 1.029, - "step": 5431 - }, - { - "epoch": 0.4082368856155118, - "grad_norm": 1.7304159806517538, - "learning_rate": 2.6780896054480526e-06, - "loss": 0.8419, - "step": 5432 - }, - { - "epoch": 0.4083120396813468, - "grad_norm": 2.213062464884402, - "learning_rate": 2.6776315787246024e-06, - "loss": 1.0492, - "step": 5433 - }, - { - "epoch": 0.40838719374718174, - "grad_norm": 1.3411789627674537, - "learning_rate": 2.6771735118488864e-06, - "loss": 0.9128, - "step": 5434 - }, - { - "epoch": 0.40846234781301666, - "grad_norm": 1.5861629833165174, - "learning_rate": 2.676715404848047e-06, - "loss": 1.0706, - "step": 5435 - }, - { - "epoch": 0.4085375018788516, - "grad_norm": 1.7216997670145737, - "learning_rate": 2.676257257749228e-06, - "loss": 1.018, - "step": 5436 - }, - { - "epoch": 0.4086126559446866, - "grad_norm": 1.585430346048552, - "learning_rate": 2.6757990705795777e-06, - "loss": 1.0006, - "step": 5437 - }, - { - "epoch": 0.40868781001052157, - "grad_norm": 1.7270977864812993, - "learning_rate": 2.675340843366244e-06, - "loss": 1.0606, - "step": 5438 - }, - { - "epoch": 0.40876296407635654, - "grad_norm": 1.698851166352029, - "learning_rate": 2.6748825761363794e-06, - "loss": 0.8801, - "step": 5439 - }, - { - "epoch": 0.4088381181421915, - "grad_norm": 1.9968216197918909, - "learning_rate": 2.674424268917138e-06, - "loss": 1.0447, - "step": 5440 - }, - { - "epoch": 0.4089132722080265, - "grad_norm": 1.6861487736162484, - "learning_rate": 2.6739659217356766e-06, - "loss": 1.0114, - "step": 5441 - }, - { - "epoch": 0.4089884262738614, - "grad_norm": 1.788739570891169, - "learning_rate": 2.6735075346191526e-06, - "loss": 0.9706, - "step": 5442 - }, - { - "epoch": 0.40906358033969636, - "grad_norm": 0.6753379602676995, - "learning_rate": 2.6730491075947294e-06, - "loss": 0.8429, - "step": 5443 - }, - { - "epoch": 0.40913873440553133, - "grad_norm": 21.589571999940734, - "learning_rate": 2.672590640689568e-06, - "loss": 0.967, - "step": 5444 - }, - { - "epoch": 0.4092138884713663, - "grad_norm": 1.6634741804287938, - "learning_rate": 2.6721321339308365e-06, - "loss": 0.9688, - "step": 5445 - }, - { - "epoch": 0.4092890425372013, - "grad_norm": 0.6814777431058202, - "learning_rate": 2.671673587345702e-06, - "loss": 0.8011, - "step": 5446 - }, - { - "epoch": 0.40936419660303625, - "grad_norm": 1.826314567662135, - "learning_rate": 2.671215000961335e-06, - "loss": 1.0228, - "step": 5447 - }, - { - "epoch": 0.40943935066887116, - "grad_norm": 1.9191163275617216, - "learning_rate": 2.6707563748049094e-06, - "loss": 1.0443, - "step": 5448 - }, - { - "epoch": 0.40951450473470613, - "grad_norm": 0.7875991319901511, - "learning_rate": 2.6702977089036e-06, - "loss": 0.8232, - "step": 5449 - }, - { - "epoch": 0.4095896588005411, - "grad_norm": 2.42387524112521, - "learning_rate": 2.6698390032845844e-06, - "loss": 1.085, - "step": 5450 - }, - { - "epoch": 0.40966481286637607, - "grad_norm": 1.9504230578319939, - "learning_rate": 2.6693802579750434e-06, - "loss": 0.9199, - "step": 5451 - }, - { - "epoch": 0.40973996693221104, - "grad_norm": 1.8114370978413894, - "learning_rate": 2.668921473002159e-06, - "loss": 1.0097, - "step": 5452 - }, - { - "epoch": 0.409815120998046, - "grad_norm": 1.6913356986539905, - "learning_rate": 2.668462648393115e-06, - "loss": 0.9646, - "step": 5453 - }, - { - "epoch": 0.409890275063881, - "grad_norm": 2.0569172063697323, - "learning_rate": 2.6680037841751e-06, - "loss": 1.0566, - "step": 5454 - }, - { - "epoch": 0.4099654291297159, - "grad_norm": 1.657864413585628, - "learning_rate": 2.6675448803753026e-06, - "loss": 0.9404, - "step": 5455 - }, - { - "epoch": 0.41004058319555087, - "grad_norm": 1.7265853535101972, - "learning_rate": 2.667085937020915e-06, - "loss": 1.0105, - "step": 5456 - }, - { - "epoch": 0.41011573726138584, - "grad_norm": 0.926285393176406, - "learning_rate": 2.6666269541391313e-06, - "loss": 0.9564, - "step": 5457 - }, - { - "epoch": 0.4101908913272208, - "grad_norm": 3.221897697183783, - "learning_rate": 2.6661679317571473e-06, - "loss": 1.0141, - "step": 5458 - }, - { - "epoch": 0.4102660453930558, - "grad_norm": 1.8781633395377306, - "learning_rate": 2.665708869902163e-06, - "loss": 1.0442, - "step": 5459 - }, - { - "epoch": 0.41034119945889075, - "grad_norm": 2.006726991986561, - "learning_rate": 2.6652497686013786e-06, - "loss": 0.9835, - "step": 5460 - }, - { - "epoch": 0.41041635352472566, - "grad_norm": 1.5855468870295948, - "learning_rate": 2.664790627881998e-06, - "loss": 0.9738, - "step": 5461 - }, - { - "epoch": 0.41049150759056063, - "grad_norm": 1.5365142639669909, - "learning_rate": 2.664331447771227e-06, - "loss": 1.0532, - "step": 5462 - }, - { - "epoch": 0.4105666616563956, - "grad_norm": 2.0468246803213646, - "learning_rate": 2.663872228296275e-06, - "loss": 0.9228, - "step": 5463 - }, - { - "epoch": 0.4106418157222306, - "grad_norm": 0.7194020174920324, - "learning_rate": 2.6634129694843497e-06, - "loss": 0.8256, - "step": 5464 - }, - { - "epoch": 0.41071696978806554, - "grad_norm": 2.592746586489966, - "learning_rate": 2.6629536713626664e-06, - "loss": 1.0637, - "step": 5465 - }, - { - "epoch": 0.4107921238539005, - "grad_norm": 1.4202224014627776, - "learning_rate": 2.662494333958439e-06, - "loss": 1.0141, - "step": 5466 - }, - { - "epoch": 0.4108672779197355, - "grad_norm": 1.4404594806240905, - "learning_rate": 2.662034957298886e-06, - "loss": 0.9808, - "step": 5467 - }, - { - "epoch": 0.4109424319855704, - "grad_norm": 3.707208241296399, - "learning_rate": 2.6615755414112266e-06, - "loss": 0.9045, - "step": 5468 - }, - { - "epoch": 0.41101758605140537, - "grad_norm": 1.8554340560078786, - "learning_rate": 2.6611160863226826e-06, - "loss": 0.9137, - "step": 5469 - }, - { - "epoch": 0.41109274011724034, - "grad_norm": 2.2619665665655844, - "learning_rate": 2.6606565920604793e-06, - "loss": 0.945, - "step": 5470 - }, - { - "epoch": 0.4111678941830753, - "grad_norm": 1.7714628409661402, - "learning_rate": 2.6601970586518428e-06, - "loss": 0.8993, - "step": 5471 - }, - { - "epoch": 0.4112430482489103, - "grad_norm": 1.4845086288535425, - "learning_rate": 2.6597374861240026e-06, - "loss": 0.9693, - "step": 5472 - }, - { - "epoch": 0.41131820231474525, - "grad_norm": 1.9019756861876544, - "learning_rate": 2.65927787450419e-06, - "loss": 1.0132, - "step": 5473 - }, - { - "epoch": 0.41139335638058017, - "grad_norm": 0.8820195645824492, - "learning_rate": 2.6588182238196395e-06, - "loss": 0.875, - "step": 5474 - }, - { - "epoch": 0.41146851044641514, - "grad_norm": 5.024075908050992, - "learning_rate": 2.6583585340975854e-06, - "loss": 0.9437, - "step": 5475 - }, - { - "epoch": 0.4115436645122501, - "grad_norm": 4.86657867787567, - "learning_rate": 2.657898805365268e-06, - "loss": 1.0214, - "step": 5476 - }, - { - "epoch": 0.4116188185780851, - "grad_norm": 2.1980970649148115, - "learning_rate": 2.6574390376499265e-06, - "loss": 1.0483, - "step": 5477 - }, - { - "epoch": 0.41169397264392005, - "grad_norm": 1.8655645108280976, - "learning_rate": 2.6569792309788046e-06, - "loss": 0.846, - "step": 5478 - }, - { - "epoch": 0.411769126709755, - "grad_norm": 1.8255213920240505, - "learning_rate": 2.656519385379148e-06, - "loss": 0.9642, - "step": 5479 - }, - { - "epoch": 0.41184428077558993, - "grad_norm": 3.6373973504228654, - "learning_rate": 2.6560595008782032e-06, - "loss": 0.9701, - "step": 5480 - }, - { - "epoch": 0.4119194348414249, - "grad_norm": 1.726830578183309, - "learning_rate": 2.655599577503221e-06, - "loss": 1.0005, - "step": 5481 - }, - { - "epoch": 0.4119945889072599, - "grad_norm": 1.827335611091223, - "learning_rate": 2.6551396152814534e-06, - "loss": 1.0203, - "step": 5482 - }, - { - "epoch": 0.41206974297309484, - "grad_norm": 17.756116216277068, - "learning_rate": 2.6546796142401547e-06, - "loss": 1.0115, - "step": 5483 - }, - { - "epoch": 0.4121448970389298, - "grad_norm": 0.7320795505348513, - "learning_rate": 2.6542195744065826e-06, - "loss": 0.8023, - "step": 5484 - }, - { - "epoch": 0.4122200511047648, - "grad_norm": 1.5180164622191952, - "learning_rate": 2.653759495807995e-06, - "loss": 1.0356, - "step": 5485 - }, - { - "epoch": 0.41229520517059975, - "grad_norm": 0.7481066328835941, - "learning_rate": 2.6532993784716535e-06, - "loss": 0.8833, - "step": 5486 - }, - { - "epoch": 0.41237035923643467, - "grad_norm": 1.8881598550760799, - "learning_rate": 2.652839222424823e-06, - "loss": 1.0565, - "step": 5487 - }, - { - "epoch": 0.41244551330226964, - "grad_norm": 1.3737177939280074, - "learning_rate": 2.652379027694768e-06, - "loss": 0.9756, - "step": 5488 - }, - { - "epoch": 0.4125206673681046, - "grad_norm": 1.7341775867729983, - "learning_rate": 2.651918794308758e-06, - "loss": 0.9076, - "step": 5489 - }, - { - "epoch": 0.4125958214339396, - "grad_norm": 4.091406930628763, - "learning_rate": 2.651458522294063e-06, - "loss": 1.0475, - "step": 5490 - }, - { - "epoch": 0.41267097549977455, - "grad_norm": 1.831916854312581, - "learning_rate": 2.650998211677956e-06, - "loss": 1.0006, - "step": 5491 - }, - { - "epoch": 0.4127461295656095, - "grad_norm": 2.31564292746334, - "learning_rate": 2.6505378624877116e-06, - "loss": 1.0315, - "step": 5492 - }, - { - "epoch": 0.41282128363144444, - "grad_norm": 4.9465590799633015, - "learning_rate": 2.650077474750608e-06, - "loss": 0.9662, - "step": 5493 - }, - { - "epoch": 0.4128964376972794, - "grad_norm": 0.8612027931286795, - "learning_rate": 2.649617048493925e-06, - "loss": 0.9258, - "step": 5494 - }, - { - "epoch": 0.4129715917631144, - "grad_norm": 1.674063275443676, - "learning_rate": 2.649156583744944e-06, - "loss": 0.9716, - "step": 5495 - }, - { - "epoch": 0.41304674582894935, - "grad_norm": 2.729527214248646, - "learning_rate": 2.64869608053095e-06, - "loss": 0.9318, - "step": 5496 - }, - { - "epoch": 0.4131218998947843, - "grad_norm": 1.6837697656231712, - "learning_rate": 2.648235538879229e-06, - "loss": 0.9218, - "step": 5497 - }, - { - "epoch": 0.4131970539606193, - "grad_norm": 1.44842277116087, - "learning_rate": 2.6477749588170703e-06, - "loss": 1.0087, - "step": 5498 - }, - { - "epoch": 0.41327220802645426, - "grad_norm": 1.7025333741847801, - "learning_rate": 2.647314340371764e-06, - "loss": 1.0261, - "step": 5499 - }, - { - "epoch": 0.4133473620922892, - "grad_norm": 1.540318552610146, - "learning_rate": 2.646853683570605e-06, - "loss": 0.8887, - "step": 5500 - }, - { - "epoch": 0.41342251615812414, - "grad_norm": 2.081812863231436, - "learning_rate": 2.646392988440888e-06, - "loss": 1.0409, - "step": 5501 - }, - { - "epoch": 0.4134976702239591, - "grad_norm": 1.8482668877718493, - "learning_rate": 2.6459322550099113e-06, - "loss": 0.9641, - "step": 5502 - }, - { - "epoch": 0.4135728242897941, - "grad_norm": 1.794277602701101, - "learning_rate": 2.645471483304975e-06, - "loss": 1.0049, - "step": 5503 - }, - { - "epoch": 0.41364797835562905, - "grad_norm": 1.9232231311971646, - "learning_rate": 2.645010673353382e-06, - "loss": 1.0601, - "step": 5504 - }, - { - "epoch": 0.413723132421464, - "grad_norm": 2.4990556840802465, - "learning_rate": 2.644549825182436e-06, - "loss": 1.0182, - "step": 5505 - }, - { - "epoch": 0.41379828648729894, - "grad_norm": 1.692676093943785, - "learning_rate": 2.644088938819445e-06, - "loss": 0.994, - "step": 5506 - }, - { - "epoch": 0.4138734405531339, - "grad_norm": 2.381013284767128, - "learning_rate": 2.6436280142917183e-06, - "loss": 0.9958, - "step": 5507 - }, - { - "epoch": 0.4139485946189689, - "grad_norm": 2.5136402901752537, - "learning_rate": 2.6431670516265668e-06, - "loss": 0.9473, - "step": 5508 - }, - { - "epoch": 0.41402374868480385, - "grad_norm": 1.889466301077721, - "learning_rate": 2.6427060508513052e-06, - "loss": 0.8849, - "step": 5509 - }, - { - "epoch": 0.4140989027506388, - "grad_norm": 1.3995912789727487, - "learning_rate": 2.6422450119932484e-06, - "loss": 1.0159, - "step": 5510 - }, - { - "epoch": 0.4141740568164738, - "grad_norm": 1.9361405679452575, - "learning_rate": 2.641783935079716e-06, - "loss": 0.9552, - "step": 5511 - }, - { - "epoch": 0.41424921088230876, - "grad_norm": 1.448880549449428, - "learning_rate": 2.641322820138027e-06, - "loss": 0.9367, - "step": 5512 - }, - { - "epoch": 0.4143243649481437, - "grad_norm": 1.9024740248923282, - "learning_rate": 2.6408616671955053e-06, - "loss": 0.9568, - "step": 5513 - }, - { - "epoch": 0.41439951901397865, - "grad_norm": 2.0130947110995385, - "learning_rate": 2.6404004762794766e-06, - "loss": 0.977, - "step": 5514 - }, - { - "epoch": 0.4144746730798136, - "grad_norm": 0.6742536003149625, - "learning_rate": 2.6399392474172667e-06, - "loss": 0.8091, - "step": 5515 - }, - { - "epoch": 0.4145498271456486, - "grad_norm": 0.7297303654824983, - "learning_rate": 2.6394779806362057e-06, - "loss": 0.8478, - "step": 5516 - }, - { - "epoch": 0.41462498121148356, - "grad_norm": 1.5848783517358755, - "learning_rate": 2.6390166759636263e-06, - "loss": 0.9573, - "step": 5517 - }, - { - "epoch": 0.4147001352773185, - "grad_norm": 0.770616337587502, - "learning_rate": 2.638555333426862e-06, - "loss": 0.9608, - "step": 5518 - }, - { - "epoch": 0.41477528934315344, - "grad_norm": 1.7918038500526776, - "learning_rate": 2.638093953053248e-06, - "loss": 1.0021, - "step": 5519 - }, - { - "epoch": 0.4148504434089884, - "grad_norm": 1.6550740799934345, - "learning_rate": 2.6376325348701244e-06, - "loss": 1.0276, - "step": 5520 - }, - { - "epoch": 0.4149255974748234, - "grad_norm": 1.733535219625282, - "learning_rate": 2.6371710789048313e-06, - "loss": 1.0451, - "step": 5521 - }, - { - "epoch": 0.41500075154065835, - "grad_norm": 1.7238636586295626, - "learning_rate": 2.6367095851847125e-06, - "loss": 0.9312, - "step": 5522 - }, - { - "epoch": 0.4150759056064933, - "grad_norm": 2.5123965949040468, - "learning_rate": 2.636248053737112e-06, - "loss": 0.9537, - "step": 5523 - }, - { - "epoch": 0.4151510596723283, - "grad_norm": 2.147245319083767, - "learning_rate": 2.635786484589378e-06, - "loss": 1.0282, - "step": 5524 - }, - { - "epoch": 0.4152262137381632, - "grad_norm": 1.6428586740664342, - "learning_rate": 2.6353248777688606e-06, - "loss": 0.9939, - "step": 5525 - }, - { - "epoch": 0.4153013678039982, - "grad_norm": 1.7359263037759158, - "learning_rate": 2.634863233302911e-06, - "loss": 0.894, - "step": 5526 - }, - { - "epoch": 0.41537652186983315, - "grad_norm": 1.5492496950999648, - "learning_rate": 2.634401551218884e-06, - "loss": 0.93, - "step": 5527 - }, - { - "epoch": 0.4154516759356681, - "grad_norm": 2.4057678135863783, - "learning_rate": 2.6339398315441353e-06, - "loss": 1.0693, - "step": 5528 - }, - { - "epoch": 0.4155268300015031, - "grad_norm": 1.591073874819858, - "learning_rate": 2.633478074306025e-06, - "loss": 0.997, - "step": 5529 - }, - { - "epoch": 0.41560198406733806, - "grad_norm": 1.6285602741887233, - "learning_rate": 2.633016279531912e-06, - "loss": 0.9479, - "step": 5530 - }, - { - "epoch": 0.41567713813317303, - "grad_norm": 2.0473478513646146, - "learning_rate": 2.6325544472491616e-06, - "loss": 0.8926, - "step": 5531 - }, - { - "epoch": 0.41575229219900794, - "grad_norm": 0.6315150441003476, - "learning_rate": 2.632092577485137e-06, - "loss": 0.8265, - "step": 5532 - }, - { - "epoch": 0.4158274462648429, - "grad_norm": 2.1493664761812687, - "learning_rate": 2.631630670267207e-06, - "loss": 0.9414, - "step": 5533 - }, - { - "epoch": 0.4159026003306779, - "grad_norm": 2.3029469269812606, - "learning_rate": 2.631168725622742e-06, - "loss": 1.0103, - "step": 5534 - }, - { - "epoch": 0.41597775439651286, - "grad_norm": 2.081774382643497, - "learning_rate": 2.630706743579112e-06, - "loss": 1.0817, - "step": 5535 - }, - { - "epoch": 0.4160529084623478, - "grad_norm": 1.8784130821233878, - "learning_rate": 2.6302447241636924e-06, - "loss": 0.9059, - "step": 5536 - }, - { - "epoch": 0.4161280625281828, - "grad_norm": 1.8960710383478643, - "learning_rate": 2.6297826674038595e-06, - "loss": 1.0759, - "step": 5537 - }, - { - "epoch": 0.4162032165940177, - "grad_norm": 1.9347787635953233, - "learning_rate": 2.6293205733269924e-06, - "loss": 0.8669, - "step": 5538 - }, - { - "epoch": 0.4162783706598527, - "grad_norm": 2.4923563454571416, - "learning_rate": 2.6288584419604713e-06, - "loss": 1.0553, - "step": 5539 - }, - { - "epoch": 0.41635352472568765, - "grad_norm": 1.983711131627916, - "learning_rate": 2.62839627333168e-06, - "loss": 1.0216, - "step": 5540 - }, - { - "epoch": 0.4164286787915226, - "grad_norm": 0.7703966415908435, - "learning_rate": 2.6279340674680025e-06, - "loss": 0.8675, - "step": 5541 - }, - { - "epoch": 0.4165038328573576, - "grad_norm": 1.46320530113357, - "learning_rate": 2.627471824396827e-06, - "loss": 0.9969, - "step": 5542 - }, - { - "epoch": 0.41657898692319256, - "grad_norm": 2.267200316925942, - "learning_rate": 2.6270095441455435e-06, - "loss": 0.9556, - "step": 5543 - }, - { - "epoch": 0.41665414098902753, - "grad_norm": 0.6631489247672099, - "learning_rate": 2.6265472267415432e-06, - "loss": 0.8337, - "step": 5544 - }, - { - "epoch": 0.41672929505486245, - "grad_norm": 1.9901043940243413, - "learning_rate": 2.626084872212221e-06, - "loss": 1.0129, - "step": 5545 - }, - { - "epoch": 0.4168044491206974, - "grad_norm": 1.631608363867568, - "learning_rate": 2.625622480584972e-06, - "loss": 1.0246, - "step": 5546 - }, - { - "epoch": 0.4168796031865324, - "grad_norm": 1.890235797765661, - "learning_rate": 2.6251600518871953e-06, - "loss": 1.0876, - "step": 5547 - }, - { - "epoch": 0.41695475725236736, - "grad_norm": 1.825128365274178, - "learning_rate": 2.6246975861462927e-06, - "loss": 0.9588, - "step": 5548 - }, - { - "epoch": 0.41702991131820233, - "grad_norm": 1.2956413468223535, - "learning_rate": 2.6242350833896645e-06, - "loss": 1.0128, - "step": 5549 - }, - { - "epoch": 0.4171050653840373, - "grad_norm": 1.9482539355848798, - "learning_rate": 2.623772543644718e-06, - "loss": 0.8653, - "step": 5550 - }, - { - "epoch": 0.4171802194498722, - "grad_norm": 1.2018297332316872, - "learning_rate": 2.6233099669388605e-06, - "loss": 0.924, - "step": 5551 - }, - { - "epoch": 0.4172553735157072, - "grad_norm": 2.5831064821950633, - "learning_rate": 2.6228473532995e-06, - "loss": 1.0334, - "step": 5552 - }, - { - "epoch": 0.41733052758154215, - "grad_norm": 1.6535441049438715, - "learning_rate": 2.6223847027540485e-06, - "loss": 0.8814, - "step": 5553 - }, - { - "epoch": 0.4174056816473771, - "grad_norm": 5.142506542062562, - "learning_rate": 2.62192201532992e-06, - "loss": 0.9743, - "step": 5554 - }, - { - "epoch": 0.4174808357132121, - "grad_norm": 1.8936946948949147, - "learning_rate": 2.621459291054531e-06, - "loss": 0.8538, - "step": 5555 - }, - { - "epoch": 0.41755598977904707, - "grad_norm": 3.2267025970265997, - "learning_rate": 2.6209965299552994e-06, - "loss": 1.0306, - "step": 5556 - }, - { - "epoch": 0.41763114384488204, - "grad_norm": 1.4020436850501925, - "learning_rate": 2.6205337320596452e-06, - "loss": 1.0184, - "step": 5557 - }, - { - "epoch": 0.41770629791071695, - "grad_norm": 2.542727129308191, - "learning_rate": 2.620070897394991e-06, - "loss": 1.033, - "step": 5558 - }, - { - "epoch": 0.4177814519765519, - "grad_norm": 1.807013659756514, - "learning_rate": 2.619608025988762e-06, - "loss": 0.9135, - "step": 5559 - }, - { - "epoch": 0.4178566060423869, - "grad_norm": 1.8103118052547944, - "learning_rate": 2.6191451178683842e-06, - "loss": 0.9245, - "step": 5560 - }, - { - "epoch": 0.41793176010822186, - "grad_norm": 0.7183424996929263, - "learning_rate": 2.6186821730612884e-06, - "loss": 0.8209, - "step": 5561 - }, - { - "epoch": 0.41800691417405683, - "grad_norm": 1.743202016721787, - "learning_rate": 2.6182191915949043e-06, - "loss": 1.1105, - "step": 5562 - }, - { - "epoch": 0.4180820682398918, - "grad_norm": 1.892196695571593, - "learning_rate": 2.6177561734966653e-06, - "loss": 0.9495, - "step": 5563 - }, - { - "epoch": 0.4181572223057267, - "grad_norm": 1.4640027696604143, - "learning_rate": 2.6172931187940084e-06, - "loss": 0.8413, - "step": 5564 - }, - { - "epoch": 0.4182323763715617, - "grad_norm": 2.0548014446037373, - "learning_rate": 2.6168300275143695e-06, - "loss": 0.9658, - "step": 5565 - }, - { - "epoch": 0.41830753043739666, - "grad_norm": 1.8193275504704696, - "learning_rate": 2.61636689968519e-06, - "loss": 0.9643, - "step": 5566 - }, - { - "epoch": 0.4183826845032316, - "grad_norm": 1.774842096746578, - "learning_rate": 2.6159037353339113e-06, - "loss": 0.9582, - "step": 5567 - }, - { - "epoch": 0.4184578385690666, - "grad_norm": 0.6591886844727448, - "learning_rate": 2.6154405344879776e-06, - "loss": 0.7916, - "step": 5568 - }, - { - "epoch": 0.41853299263490157, - "grad_norm": 3.1921947537048947, - "learning_rate": 2.6149772971748357e-06, - "loss": 0.8709, - "step": 5569 - }, - { - "epoch": 0.4186081467007365, - "grad_norm": 1.628487585772947, - "learning_rate": 2.614514023421934e-06, - "loss": 1.0517, - "step": 5570 - }, - { - "epoch": 0.41868330076657145, - "grad_norm": 2.139668940311398, - "learning_rate": 2.6140507132567238e-06, - "loss": 0.9672, - "step": 5571 - }, - { - "epoch": 0.4187584548324064, - "grad_norm": 2.719369138177837, - "learning_rate": 2.6135873667066567e-06, - "loss": 1.0443, - "step": 5572 - }, - { - "epoch": 0.4188336088982414, - "grad_norm": 1.4377037401526271, - "learning_rate": 2.6131239837991894e-06, - "loss": 1.0811, - "step": 5573 - }, - { - "epoch": 0.41890876296407636, - "grad_norm": 1.4310999495964156, - "learning_rate": 2.6126605645617777e-06, - "loss": 0.9074, - "step": 5574 - }, - { - "epoch": 0.41898391702991133, - "grad_norm": 2.4062947851199588, - "learning_rate": 2.6121971090218816e-06, - "loss": 0.9496, - "step": 5575 - }, - { - "epoch": 0.4190590710957463, - "grad_norm": 0.8583008381948475, - "learning_rate": 2.6117336172069625e-06, - "loss": 0.912, - "step": 5576 - }, - { - "epoch": 0.4191342251615812, - "grad_norm": 1.876641063967252, - "learning_rate": 2.6112700891444845e-06, - "loss": 0.9786, - "step": 5577 - }, - { - "epoch": 0.4192093792274162, - "grad_norm": 1.8296604066531899, - "learning_rate": 2.6108065248619128e-06, - "loss": 1.01, - "step": 5578 - }, - { - "epoch": 0.41928453329325116, - "grad_norm": 1.8677147156889484, - "learning_rate": 2.6103429243867147e-06, - "loss": 0.9641, - "step": 5579 - }, - { - "epoch": 0.41935968735908613, - "grad_norm": 1.7697897210726972, - "learning_rate": 2.609879287746362e-06, - "loss": 0.9872, - "step": 5580 - }, - { - "epoch": 0.4194348414249211, - "grad_norm": 1.5635704188675643, - "learning_rate": 2.609415614968326e-06, - "loss": 0.951, - "step": 5581 - }, - { - "epoch": 0.41950999549075607, - "grad_norm": 0.6744283904965567, - "learning_rate": 2.608951906080081e-06, - "loss": 0.8676, - "step": 5582 - }, - { - "epoch": 0.419585149556591, - "grad_norm": 2.0225722182386274, - "learning_rate": 2.608488161109104e-06, - "loss": 0.9444, - "step": 5583 - }, - { - "epoch": 0.41966030362242596, - "grad_norm": 2.4847640206797013, - "learning_rate": 2.608024380082874e-06, - "loss": 1.0336, - "step": 5584 - }, - { - "epoch": 0.4197354576882609, - "grad_norm": 1.8823389114617881, - "learning_rate": 2.60756056302887e-06, - "loss": 0.8837, - "step": 5585 - }, - { - "epoch": 0.4198106117540959, - "grad_norm": 1.8413232252965472, - "learning_rate": 2.6070967099745773e-06, - "loss": 1.0199, - "step": 5586 - }, - { - "epoch": 0.41988576581993087, - "grad_norm": 1.6191527730856474, - "learning_rate": 2.6066328209474786e-06, - "loss": 0.9924, - "step": 5587 - }, - { - "epoch": 0.41996091988576584, - "grad_norm": 1.6357687876801712, - "learning_rate": 2.6061688959750633e-06, - "loss": 0.8279, - "step": 5588 - }, - { - "epoch": 0.4200360739516008, - "grad_norm": 1.5757765717406231, - "learning_rate": 2.6057049350848194e-06, - "loss": 1.0375, - "step": 5589 - }, - { - "epoch": 0.4201112280174357, - "grad_norm": 1.9650625179139474, - "learning_rate": 2.6052409383042383e-06, - "loss": 0.8885, - "step": 5590 - }, - { - "epoch": 0.4201863820832707, - "grad_norm": 1.7217810520359151, - "learning_rate": 2.604776905660814e-06, - "loss": 1.0008, - "step": 5591 - }, - { - "epoch": 0.42026153614910566, - "grad_norm": 1.7844597524100092, - "learning_rate": 2.6043128371820427e-06, - "loss": 1.0103, - "step": 5592 - }, - { - "epoch": 0.42033669021494063, - "grad_norm": 1.6488766921318625, - "learning_rate": 2.603848732895421e-06, - "loss": 0.8548, - "step": 5593 - }, - { - "epoch": 0.4204118442807756, - "grad_norm": 2.426698471356965, - "learning_rate": 2.6033845928284503e-06, - "loss": 0.9501, - "step": 5594 - }, - { - "epoch": 0.4204869983466106, - "grad_norm": 1.9438357588809483, - "learning_rate": 2.602920417008632e-06, - "loss": 1.0221, - "step": 5595 - }, - { - "epoch": 0.4205621524124455, - "grad_norm": 1.4583705443221584, - "learning_rate": 2.60245620546347e-06, - "loss": 0.9551, - "step": 5596 - }, - { - "epoch": 0.42063730647828046, - "grad_norm": 2.726021961747813, - "learning_rate": 2.6019919582204713e-06, - "loss": 1.0211, - "step": 5597 - }, - { - "epoch": 0.42071246054411543, - "grad_norm": 1.8555440465807513, - "learning_rate": 2.601527675307143e-06, - "loss": 0.9747, - "step": 5598 - }, - { - "epoch": 0.4207876146099504, - "grad_norm": 2.988468737735655, - "learning_rate": 2.601063356750997e-06, - "loss": 0.9811, - "step": 5599 - }, - { - "epoch": 0.42086276867578537, - "grad_norm": 1.9877416331878315, - "learning_rate": 2.600599002579546e-06, - "loss": 0.9423, - "step": 5600 - }, - { - "epoch": 0.42093792274162034, - "grad_norm": 2.084745030536394, - "learning_rate": 2.6001346128203036e-06, - "loss": 0.9886, - "step": 5601 - }, - { - "epoch": 0.4210130768074553, - "grad_norm": 2.240806553081122, - "learning_rate": 2.5996701875007873e-06, - "loss": 0.9849, - "step": 5602 - }, - { - "epoch": 0.4210882308732902, - "grad_norm": 1.4121398922483692, - "learning_rate": 2.5992057266485162e-06, - "loss": 1.0004, - "step": 5603 - }, - { - "epoch": 0.4211633849391252, - "grad_norm": 1.568320533965066, - "learning_rate": 2.5987412302910114e-06, - "loss": 0.9566, - "step": 5604 - }, - { - "epoch": 0.42123853900496017, - "grad_norm": 1.768196865979677, - "learning_rate": 2.598276698455796e-06, - "loss": 0.9805, - "step": 5605 - }, - { - "epoch": 0.42131369307079514, - "grad_norm": 0.6430221693020015, - "learning_rate": 2.5978121311703955e-06, - "loss": 0.8014, - "step": 5606 - }, - { - "epoch": 0.4213888471366301, - "grad_norm": 2.2889985475948023, - "learning_rate": 2.5973475284623366e-06, - "loss": 0.9775, - "step": 5607 - }, - { - "epoch": 0.4214640012024651, - "grad_norm": 1.5545384321201186, - "learning_rate": 2.5968828903591492e-06, - "loss": 1.0647, - "step": 5608 - }, - { - "epoch": 0.4215391552683, - "grad_norm": 2.1268022411903567, - "learning_rate": 2.5964182168883654e-06, - "loss": 1.0111, - "step": 5609 - }, - { - "epoch": 0.42161430933413496, - "grad_norm": 2.333542326241501, - "learning_rate": 2.5959535080775176e-06, - "loss": 0.8116, - "step": 5610 - }, - { - "epoch": 0.42168946339996993, - "grad_norm": 1.5270323765238334, - "learning_rate": 2.595488763954143e-06, - "loss": 0.9674, - "step": 5611 - }, - { - "epoch": 0.4217646174658049, - "grad_norm": 2.2358180368046887, - "learning_rate": 2.5950239845457792e-06, - "loss": 0.9636, - "step": 5612 - }, - { - "epoch": 0.4218397715316399, - "grad_norm": 1.8751261986542442, - "learning_rate": 2.594559169879965e-06, - "loss": 0.9511, - "step": 5613 - }, - { - "epoch": 0.42191492559747484, - "grad_norm": 1.358301506176675, - "learning_rate": 2.594094319984244e-06, - "loss": 1.0524, - "step": 5614 - }, - { - "epoch": 0.42199007966330976, - "grad_norm": 0.7635672807408772, - "learning_rate": 2.593629434886159e-06, - "loss": 0.8328, - "step": 5615 - }, - { - "epoch": 0.42206523372914473, - "grad_norm": 3.212317845267787, - "learning_rate": 2.5931645146132576e-06, - "loss": 0.9015, - "step": 5616 - }, - { - "epoch": 0.4221403877949797, - "grad_norm": 1.8340641284700756, - "learning_rate": 2.592699559193086e-06, - "loss": 1.0715, - "step": 5617 - }, - { - "epoch": 0.42221554186081467, - "grad_norm": 1.4003576809507556, - "learning_rate": 2.592234568653197e-06, - "loss": 0.9804, - "step": 5618 - }, - { - "epoch": 0.42229069592664964, - "grad_norm": 1.6936539654409724, - "learning_rate": 2.5917695430211416e-06, - "loss": 0.9469, - "step": 5619 - }, - { - "epoch": 0.4223658499924846, - "grad_norm": 2.0420701802417844, - "learning_rate": 2.591304482324475e-06, - "loss": 1.049, - "step": 5620 - }, - { - "epoch": 0.4224410040583196, - "grad_norm": 1.3619863049214926, - "learning_rate": 2.590839386590754e-06, - "loss": 0.9492, - "step": 5621 - }, - { - "epoch": 0.4225161581241545, - "grad_norm": 1.7984235642039892, - "learning_rate": 2.5903742558475358e-06, - "loss": 0.9909, - "step": 5622 - }, - { - "epoch": 0.42259131218998947, - "grad_norm": 1.8629163468725445, - "learning_rate": 2.589909090122383e-06, - "loss": 1.023, - "step": 5623 - }, - { - "epoch": 0.42266646625582444, - "grad_norm": 1.6398718422150464, - "learning_rate": 2.589443889442857e-06, - "loss": 0.8962, - "step": 5624 - }, - { - "epoch": 0.4227416203216594, - "grad_norm": 1.5785089158007861, - "learning_rate": 2.5889786538365243e-06, - "loss": 0.9896, - "step": 5625 - }, - { - "epoch": 0.4228167743874944, - "grad_norm": 1.3731565255325668, - "learning_rate": 2.588513383330951e-06, - "loss": 0.9334, - "step": 5626 - }, - { - "epoch": 0.42289192845332935, - "grad_norm": 1.67100616411023, - "learning_rate": 2.588048077953705e-06, - "loss": 0.9857, - "step": 5627 - }, - { - "epoch": 0.42296708251916426, - "grad_norm": 3.0977872457622593, - "learning_rate": 2.58758273773236e-06, - "loss": 1.0373, - "step": 5628 - }, - { - "epoch": 0.42304223658499923, - "grad_norm": 2.0938028357372636, - "learning_rate": 2.5871173626944864e-06, - "loss": 0.9914, - "step": 5629 - }, - { - "epoch": 0.4231173906508342, - "grad_norm": 9.421259972282478, - "learning_rate": 2.586651952867662e-06, - "loss": 0.994, - "step": 5630 - }, - { - "epoch": 0.4231925447166692, - "grad_norm": 1.6603050973024627, - "learning_rate": 2.5861865082794625e-06, - "loss": 1.0492, - "step": 5631 - }, - { - "epoch": 0.42326769878250414, - "grad_norm": 1.501124278576376, - "learning_rate": 2.5857210289574675e-06, - "loss": 0.9181, - "step": 5632 - }, - { - "epoch": 0.4233428528483391, - "grad_norm": 2.149283613573301, - "learning_rate": 2.5852555149292593e-06, - "loss": 0.995, - "step": 5633 - }, - { - "epoch": 0.4234180069141741, - "grad_norm": 1.8760272073338542, - "learning_rate": 2.5847899662224195e-06, - "loss": 1.0573, - "step": 5634 - }, - { - "epoch": 0.423493160980009, - "grad_norm": 1.9461002822943987, - "learning_rate": 2.584324382864536e-06, - "loss": 0.8492, - "step": 5635 - }, - { - "epoch": 0.42356831504584397, - "grad_norm": 0.6593114650543535, - "learning_rate": 2.583858764883195e-06, - "loss": 0.8058, - "step": 5636 - }, - { - "epoch": 0.42364346911167894, - "grad_norm": 1.561571761022332, - "learning_rate": 2.5833931123059865e-06, - "loss": 0.9361, - "step": 5637 - }, - { - "epoch": 0.4237186231775139, - "grad_norm": 1.9567481652832788, - "learning_rate": 2.5829274251605023e-06, - "loss": 1.0485, - "step": 5638 - }, - { - "epoch": 0.4237937772433489, - "grad_norm": 2.004539345122719, - "learning_rate": 2.5824617034743354e-06, - "loss": 0.9333, - "step": 5639 - }, - { - "epoch": 0.42386893130918385, - "grad_norm": 1.4768171948563287, - "learning_rate": 2.5819959472750827e-06, - "loss": 0.9866, - "step": 5640 - }, - { - "epoch": 0.42394408537501876, - "grad_norm": 2.1205836011025685, - "learning_rate": 2.581530156590341e-06, - "loss": 1.0031, - "step": 5641 - }, - { - "epoch": 0.42401923944085373, - "grad_norm": 0.6807778918673989, - "learning_rate": 2.5810643314477116e-06, - "loss": 0.8224, - "step": 5642 - }, - { - "epoch": 0.4240943935066887, - "grad_norm": 1.8129890045320736, - "learning_rate": 2.5805984718747953e-06, - "loss": 0.9604, - "step": 5643 - }, - { - "epoch": 0.4241695475725237, - "grad_norm": 1.6922642599714686, - "learning_rate": 2.5801325778991958e-06, - "loss": 0.9673, - "step": 5644 - }, - { - "epoch": 0.42424470163835865, - "grad_norm": 2.0644769152927953, - "learning_rate": 2.5796666495485196e-06, - "loss": 0.989, - "step": 5645 - }, - { - "epoch": 0.4243198557041936, - "grad_norm": 1.8643165127528374, - "learning_rate": 2.579200686850375e-06, - "loss": 0.9951, - "step": 5646 - }, - { - "epoch": 0.4243950097700286, - "grad_norm": 1.7297059868821372, - "learning_rate": 2.5787346898323716e-06, - "loss": 0.991, - "step": 5647 - }, - { - "epoch": 0.4244701638358635, - "grad_norm": 2.0362958005808927, - "learning_rate": 2.578268658522122e-06, - "loss": 0.9527, - "step": 5648 - }, - { - "epoch": 0.42454531790169847, - "grad_norm": 1.4796973964275624, - "learning_rate": 2.5778025929472397e-06, - "loss": 0.8891, - "step": 5649 - }, - { - "epoch": 0.42462047196753344, - "grad_norm": 1.6495986392525241, - "learning_rate": 2.577336493135341e-06, - "loss": 1.0118, - "step": 5650 - }, - { - "epoch": 0.4246956260333684, - "grad_norm": 1.5752124915513552, - "learning_rate": 2.5768703591140445e-06, - "loss": 1.0173, - "step": 5651 - }, - { - "epoch": 0.4247707800992034, - "grad_norm": 2.2682219255917953, - "learning_rate": 2.5764041909109706e-06, - "loss": 0.9507, - "step": 5652 - }, - { - "epoch": 0.42484593416503835, - "grad_norm": 2.17459101284601, - "learning_rate": 2.5759379885537414e-06, - "loss": 1.1014, - "step": 5653 - }, - { - "epoch": 0.42492108823087327, - "grad_norm": 2.9260760618428807, - "learning_rate": 2.57547175206998e-06, - "loss": 0.9874, - "step": 5654 - }, - { - "epoch": 0.42499624229670824, - "grad_norm": 17.452732447185653, - "learning_rate": 2.5750054814873144e-06, - "loss": 1.0537, - "step": 5655 - }, - { - "epoch": 0.4250713963625432, - "grad_norm": 1.9026492145787628, - "learning_rate": 2.5745391768333715e-06, - "loss": 1.0713, - "step": 5656 - }, - { - "epoch": 0.4251465504283782, - "grad_norm": 1.7740146185262948, - "learning_rate": 2.574072838135783e-06, - "loss": 1.0799, - "step": 5657 - }, - { - "epoch": 0.42522170449421315, - "grad_norm": 1.6152233044984137, - "learning_rate": 2.5736064654221806e-06, - "loss": 1.0096, - "step": 5658 - }, - { - "epoch": 0.4252968585600481, - "grad_norm": 0.9454250741559285, - "learning_rate": 2.573140058720198e-06, - "loss": 0.9217, - "step": 5659 - }, - { - "epoch": 0.42537201262588303, - "grad_norm": 1.3631366117420984, - "learning_rate": 2.572673618057473e-06, - "loss": 0.9348, - "step": 5660 - }, - { - "epoch": 0.425447166691718, - "grad_norm": 4.145190676363176, - "learning_rate": 2.5722071434616426e-06, - "loss": 0.9234, - "step": 5661 - }, - { - "epoch": 0.425522320757553, - "grad_norm": 1.631018131775119, - "learning_rate": 2.5717406349603483e-06, - "loss": 0.9971, - "step": 5662 - }, - { - "epoch": 0.42559747482338794, - "grad_norm": 1.605290015450998, - "learning_rate": 2.5712740925812314e-06, - "loss": 1.0057, - "step": 5663 - }, - { - "epoch": 0.4256726288892229, - "grad_norm": 0.7195967372123748, - "learning_rate": 2.5708075163519373e-06, - "loss": 0.8608, - "step": 5664 - }, - { - "epoch": 0.4257477829550579, - "grad_norm": 1.5777331328381237, - "learning_rate": 2.5703409063001124e-06, - "loss": 0.9414, - "step": 5665 - }, - { - "epoch": 0.42582293702089286, - "grad_norm": 1.4938269591358104, - "learning_rate": 2.5698742624534046e-06, - "loss": 0.9606, - "step": 5666 - }, - { - "epoch": 0.42589809108672777, - "grad_norm": 1.3889580497476302, - "learning_rate": 2.5694075848394646e-06, - "loss": 1.0271, - "step": 5667 - }, - { - "epoch": 0.42597324515256274, - "grad_norm": 2.2689270160650246, - "learning_rate": 2.5689408734859445e-06, - "loss": 0.9594, - "step": 5668 - }, - { - "epoch": 0.4260483992183977, - "grad_norm": 1.715657729651372, - "learning_rate": 2.568474128420499e-06, - "loss": 0.8414, - "step": 5669 - }, - { - "epoch": 0.4261235532842327, - "grad_norm": 2.226161023803118, - "learning_rate": 2.5680073496707854e-06, - "loss": 0.918, - "step": 5670 - }, - { - "epoch": 0.42619870735006765, - "grad_norm": 2.8385164498891853, - "learning_rate": 2.5675405372644606e-06, - "loss": 0.9883, - "step": 5671 - }, - { - "epoch": 0.4262738614159026, - "grad_norm": 1.8157622477514772, - "learning_rate": 2.567073691229186e-06, - "loss": 0.8421, - "step": 5672 - }, - { - "epoch": 0.42634901548173754, - "grad_norm": 1.608449429026248, - "learning_rate": 2.5666068115926223e-06, - "loss": 0.9866, - "step": 5673 - }, - { - "epoch": 0.4264241695475725, - "grad_norm": 3.314848013683403, - "learning_rate": 2.5661398983824375e-06, - "loss": 0.9653, - "step": 5674 - }, - { - "epoch": 0.4264993236134075, - "grad_norm": 1.4951189416118875, - "learning_rate": 2.565672951626295e-06, - "loss": 0.966, - "step": 5675 - }, - { - "epoch": 0.42657447767924245, - "grad_norm": 1.4548588734773693, - "learning_rate": 2.5652059713518636e-06, - "loss": 0.9847, - "step": 5676 - }, - { - "epoch": 0.4266496317450774, - "grad_norm": 2.018558821468614, - "learning_rate": 2.5647389575868142e-06, - "loss": 0.891, - "step": 5677 - }, - { - "epoch": 0.4267247858109124, - "grad_norm": 2.0211511604521855, - "learning_rate": 2.564271910358819e-06, - "loss": 0.9961, - "step": 5678 - }, - { - "epoch": 0.42679993987674736, - "grad_norm": 1.6878811441131842, - "learning_rate": 2.563804829695553e-06, - "loss": 0.97, - "step": 5679 - }, - { - "epoch": 0.4268750939425823, - "grad_norm": 1.6089756294192474, - "learning_rate": 2.5633377156246917e-06, - "loss": 1.0069, - "step": 5680 - }, - { - "epoch": 0.42695024800841724, - "grad_norm": 0.6959529456012076, - "learning_rate": 2.5628705681739124e-06, - "loss": 0.8173, - "step": 5681 - }, - { - "epoch": 0.4270254020742522, - "grad_norm": 2.426212978797935, - "learning_rate": 2.5624033873708983e-06, - "loss": 0.9677, - "step": 5682 - }, - { - "epoch": 0.4271005561400872, - "grad_norm": 1.453733173381222, - "learning_rate": 2.5619361732433287e-06, - "loss": 1.0902, - "step": 5683 - }, - { - "epoch": 0.42717571020592215, - "grad_norm": 1.5383420700673776, - "learning_rate": 2.5614689258188896e-06, - "loss": 0.9874, - "step": 5684 - }, - { - "epoch": 0.4272508642717571, - "grad_norm": 1.2670750729208906, - "learning_rate": 2.561001645125266e-06, - "loss": 0.9604, - "step": 5685 - }, - { - "epoch": 0.42732601833759204, - "grad_norm": 1.5825260095131208, - "learning_rate": 2.560534331190148e-06, - "loss": 1.071, - "step": 5686 - }, - { - "epoch": 0.427401172403427, - "grad_norm": 3.043242181483211, - "learning_rate": 2.5600669840412233e-06, - "loss": 1.0031, - "step": 5687 - }, - { - "epoch": 0.427476326469262, - "grad_norm": 0.7087159712761485, - "learning_rate": 2.5595996037061853e-06, - "loss": 0.833, - "step": 5688 - }, - { - "epoch": 0.42755148053509695, - "grad_norm": 1.9919267072331064, - "learning_rate": 2.559132190212728e-06, - "loss": 1.0855, - "step": 5689 - }, - { - "epoch": 0.4276266346009319, - "grad_norm": 2.667161319472325, - "learning_rate": 2.558664743588547e-06, - "loss": 1.0071, - "step": 5690 - }, - { - "epoch": 0.4277017886667669, - "grad_norm": 1.787822043478928, - "learning_rate": 2.5581972638613417e-06, - "loss": 1.0306, - "step": 5691 - }, - { - "epoch": 0.42777694273260186, - "grad_norm": 1.1277482056042165, - "learning_rate": 2.557729751058811e-06, - "loss": 0.8686, - "step": 5692 - }, - { - "epoch": 0.4278520967984368, - "grad_norm": 1.7036461688929334, - "learning_rate": 2.557262205208656e-06, - "loss": 0.9496, - "step": 5693 - }, - { - "epoch": 0.42792725086427175, - "grad_norm": 2.2412775299883467, - "learning_rate": 2.556794626338582e-06, - "loss": 0.9716, - "step": 5694 - }, - { - "epoch": 0.4280024049301067, - "grad_norm": 2.021112232691448, - "learning_rate": 2.5563270144762933e-06, - "loss": 1.1037, - "step": 5695 - }, - { - "epoch": 0.4280775589959417, - "grad_norm": 1.6622690198099617, - "learning_rate": 2.5558593696495e-06, - "loss": 1.0151, - "step": 5696 - }, - { - "epoch": 0.42815271306177666, - "grad_norm": 3.4584520771522187, - "learning_rate": 2.5553916918859102e-06, - "loss": 1.0297, - "step": 5697 - }, - { - "epoch": 0.4282278671276116, - "grad_norm": 1.5545141228849937, - "learning_rate": 2.554923981213235e-06, - "loss": 0.9176, - "step": 5698 - }, - { - "epoch": 0.42830302119344654, - "grad_norm": 1.9545281214883041, - "learning_rate": 2.55445623765919e-06, - "loss": 1.1379, - "step": 5699 - }, - { - "epoch": 0.4283781752592815, - "grad_norm": 3.070920306509217, - "learning_rate": 2.553988461251489e-06, - "loss": 0.9457, - "step": 5700 - }, - { - "epoch": 0.4284533293251165, - "grad_norm": 0.8320635937059107, - "learning_rate": 2.553520652017851e-06, - "loss": 0.856, - "step": 5701 - }, - { - "epoch": 0.42852848339095145, - "grad_norm": 1.9026657552607777, - "learning_rate": 2.5530528099859946e-06, - "loss": 0.9636, - "step": 5702 - }, - { - "epoch": 0.4286036374567864, - "grad_norm": 1.4551591479382064, - "learning_rate": 2.5525849351836414e-06, - "loss": 1.0075, - "step": 5703 - }, - { - "epoch": 0.4286787915226214, - "grad_norm": 1.8770600763786098, - "learning_rate": 2.5521170276385147e-06, - "loss": 0.9317, - "step": 5704 - }, - { - "epoch": 0.4287539455884563, - "grad_norm": 0.7513934542113181, - "learning_rate": 2.5516490873783397e-06, - "loss": 0.8142, - "step": 5705 - }, - { - "epoch": 0.4288290996542913, - "grad_norm": 1.7568214165690195, - "learning_rate": 2.5511811144308447e-06, - "loss": 0.9569, - "step": 5706 - }, - { - "epoch": 0.42890425372012625, - "grad_norm": 1.7086585208318124, - "learning_rate": 2.550713108823757e-06, - "loss": 0.9868, - "step": 5707 - }, - { - "epoch": 0.4289794077859612, - "grad_norm": 1.8794262457946995, - "learning_rate": 2.5502450705848097e-06, - "loss": 0.9433, - "step": 5708 - }, - { - "epoch": 0.4290545618517962, - "grad_norm": 1.8853404063052102, - "learning_rate": 2.5497769997417347e-06, - "loss": 1.0085, - "step": 5709 - }, - { - "epoch": 0.42912971591763116, - "grad_norm": 1.6739236276950005, - "learning_rate": 2.5493088963222668e-06, - "loss": 0.97, - "step": 5710 - }, - { - "epoch": 0.42920486998346613, - "grad_norm": 0.7443522955749337, - "learning_rate": 2.5488407603541437e-06, - "loss": 0.8676, - "step": 5711 - }, - { - "epoch": 0.42928002404930105, - "grad_norm": 2.468385017552803, - "learning_rate": 2.5483725918651034e-06, - "loss": 1.0397, - "step": 5712 - }, - { - "epoch": 0.429355178115136, - "grad_norm": 1.8709621184044958, - "learning_rate": 2.5479043908828877e-06, - "loss": 1.0317, - "step": 5713 - }, - { - "epoch": 0.429430332180971, - "grad_norm": 1.5605896524956722, - "learning_rate": 2.547436157435239e-06, - "loss": 1.043, - "step": 5714 - }, - { - "epoch": 0.42950548624680596, - "grad_norm": 1.7919344443183112, - "learning_rate": 2.546967891549901e-06, - "loss": 0.9538, - "step": 5715 - }, - { - "epoch": 0.4295806403126409, - "grad_norm": 1.4698550348749917, - "learning_rate": 2.5464995932546217e-06, - "loss": 0.9535, - "step": 5716 - }, - { - "epoch": 0.4296557943784759, - "grad_norm": 2.002644578569785, - "learning_rate": 2.5460312625771475e-06, - "loss": 0.9731, - "step": 5717 - }, - { - "epoch": 0.4297309484443108, - "grad_norm": 2.6152163242118838, - "learning_rate": 2.5455628995452313e-06, - "loss": 1.0171, - "step": 5718 - }, - { - "epoch": 0.4298061025101458, - "grad_norm": 1.8396019908380818, - "learning_rate": 2.5450945041866246e-06, - "loss": 0.99, - "step": 5719 - }, - { - "epoch": 0.42988125657598075, - "grad_norm": 1.517009849637066, - "learning_rate": 2.54462607652908e-06, - "loss": 0.9952, - "step": 5720 - }, - { - "epoch": 0.4299564106418157, - "grad_norm": 1.5037018079637863, - "learning_rate": 2.5441576166003555e-06, - "loss": 1.0515, - "step": 5721 - }, - { - "epoch": 0.4300315647076507, - "grad_norm": 1.8047094302475708, - "learning_rate": 2.5436891244282076e-06, - "loss": 1.0394, - "step": 5722 - }, - { - "epoch": 0.43010671877348566, - "grad_norm": 1.7269118978208133, - "learning_rate": 2.5432206000403982e-06, - "loss": 1.0005, - "step": 5723 - }, - { - "epoch": 0.43018187283932063, - "grad_norm": 2.4617505864754468, - "learning_rate": 2.5427520434646884e-06, - "loss": 0.9672, - "step": 5724 - }, - { - "epoch": 0.43025702690515555, - "grad_norm": 1.6612802806703673, - "learning_rate": 2.5422834547288406e-06, - "loss": 1.0008, - "step": 5725 - }, - { - "epoch": 0.4303321809709905, - "grad_norm": 3.0055688986616245, - "learning_rate": 2.5418148338606226e-06, - "loss": 0.9977, - "step": 5726 - }, - { - "epoch": 0.4304073350368255, - "grad_norm": 1.8622055947515632, - "learning_rate": 2.5413461808878e-06, - "loss": 0.9458, - "step": 5727 - }, - { - "epoch": 0.43048248910266046, - "grad_norm": 1.3935999391796654, - "learning_rate": 2.5408774958381436e-06, - "loss": 0.9808, - "step": 5728 - }, - { - "epoch": 0.43055764316849543, - "grad_norm": 1.4471587065664107, - "learning_rate": 2.5404087787394248e-06, - "loss": 0.9303, - "step": 5729 - }, - { - "epoch": 0.4306327972343304, - "grad_norm": 1.8750516884354973, - "learning_rate": 2.5399400296194164e-06, - "loss": 0.9831, - "step": 5730 - }, - { - "epoch": 0.4307079513001653, - "grad_norm": 8.301405307423858, - "learning_rate": 2.5394712485058933e-06, - "loss": 0.9174, - "step": 5731 - }, - { - "epoch": 0.4307831053660003, - "grad_norm": 1.4949065196432598, - "learning_rate": 2.539002435426633e-06, - "loss": 0.9725, - "step": 5732 - }, - { - "epoch": 0.43085825943183526, - "grad_norm": 12.295900260168338, - "learning_rate": 2.5385335904094147e-06, - "loss": 1.0059, - "step": 5733 - }, - { - "epoch": 0.4309334134976702, - "grad_norm": 3.3782457356575293, - "learning_rate": 2.5380647134820186e-06, - "loss": 0.9718, - "step": 5734 - }, - { - "epoch": 0.4310085675635052, - "grad_norm": 1.6943092717867434, - "learning_rate": 2.5375958046722283e-06, - "loss": 1.0185, - "step": 5735 - }, - { - "epoch": 0.43108372162934017, - "grad_norm": 1.6476340844520572, - "learning_rate": 2.5371268640078277e-06, - "loss": 1.0989, - "step": 5736 - }, - { - "epoch": 0.43115887569517514, - "grad_norm": 1.7905957843733875, - "learning_rate": 2.5366578915166033e-06, - "loss": 0.9969, - "step": 5737 - }, - { - "epoch": 0.43123402976101005, - "grad_norm": 1.7811485395480304, - "learning_rate": 2.536188887226345e-06, - "loss": 0.9704, - "step": 5738 - }, - { - "epoch": 0.431309183826845, - "grad_norm": 2.036050845172688, - "learning_rate": 2.53571985116484e-06, - "loss": 0.9195, - "step": 5739 - }, - { - "epoch": 0.43138433789268, - "grad_norm": 2.119033177899607, - "learning_rate": 2.535250783359884e-06, - "loss": 1.0061, - "step": 5740 - }, - { - "epoch": 0.43145949195851496, - "grad_norm": 2.2355654019673388, - "learning_rate": 2.5347816838392695e-06, - "loss": 0.9384, - "step": 5741 - }, - { - "epoch": 0.43153464602434993, - "grad_norm": 1.7598513837403382, - "learning_rate": 2.534312552630791e-06, - "loss": 0.9839, - "step": 5742 - }, - { - "epoch": 0.4316098000901849, - "grad_norm": 1.8666748236863977, - "learning_rate": 2.533843389762249e-06, - "loss": 1.0927, - "step": 5743 - }, - { - "epoch": 0.4316849541560198, - "grad_norm": 1.9308697248949735, - "learning_rate": 2.5333741952614412e-06, - "loss": 0.997, - "step": 5744 - }, - { - "epoch": 0.4317601082218548, - "grad_norm": 2.244591889191315, - "learning_rate": 2.5329049691561705e-06, - "loss": 0.9746, - "step": 5745 - }, - { - "epoch": 0.43183526228768976, - "grad_norm": 1.5585584134563317, - "learning_rate": 2.53243571147424e-06, - "loss": 1.037, - "step": 5746 - }, - { - "epoch": 0.43191041635352473, - "grad_norm": 1.5678304918247958, - "learning_rate": 2.5319664222434534e-06, - "loss": 0.9927, - "step": 5747 - }, - { - "epoch": 0.4319855704193597, - "grad_norm": 1.3312305269921467, - "learning_rate": 2.5314971014916207e-06, - "loss": 0.9834, - "step": 5748 - }, - { - "epoch": 0.43206072448519467, - "grad_norm": 1.9671695359964938, - "learning_rate": 2.5310277492465486e-06, - "loss": 1.04, - "step": 5749 - }, - { - "epoch": 0.4321358785510296, - "grad_norm": 1.6827271392906147, - "learning_rate": 2.5305583655360495e-06, - "loss": 1.0464, - "step": 5750 - }, - { - "epoch": 0.43221103261686455, - "grad_norm": 2.1228714548474557, - "learning_rate": 2.530088950387935e-06, - "loss": 0.986, - "step": 5751 - }, - { - "epoch": 0.4322861866826995, - "grad_norm": 1.5390489451784526, - "learning_rate": 2.529619503830021e-06, - "loss": 0.9977, - "step": 5752 - }, - { - "epoch": 0.4323613407485345, - "grad_norm": 2.1942832407376844, - "learning_rate": 2.5291500258901234e-06, - "loss": 1.0117, - "step": 5753 - }, - { - "epoch": 0.43243649481436947, - "grad_norm": 1.520635914291053, - "learning_rate": 2.5286805165960597e-06, - "loss": 0.9774, - "step": 5754 - }, - { - "epoch": 0.43251164888020444, - "grad_norm": 1.5800303674749243, - "learning_rate": 2.528210975975652e-06, - "loss": 1.0318, - "step": 5755 - }, - { - "epoch": 0.4325868029460394, - "grad_norm": 2.728909419702523, - "learning_rate": 2.52774140405672e-06, - "loss": 1.0047, - "step": 5756 - }, - { - "epoch": 0.4326619570118743, - "grad_norm": 6.851444743589697, - "learning_rate": 2.5272718008670895e-06, - "loss": 1.0281, - "step": 5757 - }, - { - "epoch": 0.4327371110777093, - "grad_norm": 2.341400189243908, - "learning_rate": 2.5268021664345865e-06, - "loss": 0.9996, - "step": 5758 - }, - { - "epoch": 0.43281226514354426, - "grad_norm": 0.7614906556400851, - "learning_rate": 2.526332500787037e-06, - "loss": 0.8526, - "step": 5759 - }, - { - "epoch": 0.43288741920937923, - "grad_norm": 1.6669392680711257, - "learning_rate": 2.525862803952272e-06, - "loss": 1.0251, - "step": 5760 - }, - { - "epoch": 0.4329625732752142, - "grad_norm": 1.6181684826621887, - "learning_rate": 2.5253930759581213e-06, - "loss": 1.0982, - "step": 5761 - }, - { - "epoch": 0.4330377273410492, - "grad_norm": 1.8300212578502906, - "learning_rate": 2.5249233168324196e-06, - "loss": 0.986, - "step": 5762 - }, - { - "epoch": 0.4331128814068841, - "grad_norm": 6.61003697464336, - "learning_rate": 2.5244535266030014e-06, - "loss": 1.0647, - "step": 5763 - }, - { - "epoch": 0.43318803547271906, - "grad_norm": 1.6042157037679496, - "learning_rate": 2.5239837052977032e-06, - "loss": 1.1299, - "step": 5764 - }, - { - "epoch": 0.433263189538554, - "grad_norm": 1.995241763445056, - "learning_rate": 2.523513852944364e-06, - "loss": 0.9733, - "step": 5765 - }, - { - "epoch": 0.433338343604389, - "grad_norm": 2.856071690308754, - "learning_rate": 2.5230439695708244e-06, - "loss": 0.8932, - "step": 5766 - }, - { - "epoch": 0.43341349767022397, - "grad_norm": 1.9521571088085927, - "learning_rate": 2.5225740552049267e-06, - "loss": 1.0191, - "step": 5767 - }, - { - "epoch": 0.43348865173605894, - "grad_norm": 2.0523613936613723, - "learning_rate": 2.5221041098745157e-06, - "loss": 1.0499, - "step": 5768 - }, - { - "epoch": 0.4335638058018939, - "grad_norm": 0.6995747688122127, - "learning_rate": 2.5216341336074363e-06, - "loss": 0.8527, - "step": 5769 - }, - { - "epoch": 0.4336389598677288, - "grad_norm": 1.3956941943305972, - "learning_rate": 2.5211641264315372e-06, - "loss": 0.9749, - "step": 5770 - }, - { - "epoch": 0.4337141139335638, - "grad_norm": 1.9613122467996806, - "learning_rate": 2.520694088374668e-06, - "loss": 1.0635, - "step": 5771 - }, - { - "epoch": 0.43378926799939876, - "grad_norm": 1.2670727679033642, - "learning_rate": 2.52022401946468e-06, - "loss": 1.0257, - "step": 5772 - }, - { - "epoch": 0.43386442206523373, - "grad_norm": 1.7170866807354064, - "learning_rate": 2.519753919729427e-06, - "loss": 1.0364, - "step": 5773 - }, - { - "epoch": 0.4339395761310687, - "grad_norm": 1.9682496661245654, - "learning_rate": 2.519283789196764e-06, - "loss": 1.0704, - "step": 5774 - }, - { - "epoch": 0.4340147301969037, - "grad_norm": 1.9723765562082662, - "learning_rate": 2.518813627894548e-06, - "loss": 1.0418, - "step": 5775 - }, - { - "epoch": 0.4340898842627386, - "grad_norm": 1.4909126468042992, - "learning_rate": 2.5183434358506373e-06, - "loss": 1.0409, - "step": 5776 - }, - { - "epoch": 0.43416503832857356, - "grad_norm": 0.744458188357885, - "learning_rate": 2.5178732130928943e-06, - "loss": 0.8722, - "step": 5777 - }, - { - "epoch": 0.43424019239440853, - "grad_norm": 7.61607964700036, - "learning_rate": 2.5174029596491792e-06, - "loss": 1.0276, - "step": 5778 - }, - { - "epoch": 0.4343153464602435, - "grad_norm": 2.140696392121827, - "learning_rate": 2.5169326755473582e-06, - "loss": 0.9527, - "step": 5779 - }, - { - "epoch": 0.43439050052607847, - "grad_norm": 1.8877486740913934, - "learning_rate": 2.516462360815297e-06, - "loss": 0.9542, - "step": 5780 - }, - { - "epoch": 0.43446565459191344, - "grad_norm": 1.597383807020643, - "learning_rate": 2.5159920154808615e-06, - "loss": 0.9887, - "step": 5781 - }, - { - "epoch": 0.4345408086577484, - "grad_norm": 4.286988137166699, - "learning_rate": 2.5155216395719253e-06, - "loss": 1.0024, - "step": 5782 - }, - { - "epoch": 0.4346159627235833, - "grad_norm": 0.8323855295038024, - "learning_rate": 2.5150512331163564e-06, - "loss": 0.9268, - "step": 5783 - }, - { - "epoch": 0.4346911167894183, - "grad_norm": 6.370504552001381, - "learning_rate": 2.5145807961420303e-06, - "loss": 0.8446, - "step": 5784 - }, - { - "epoch": 0.43476627085525327, - "grad_norm": 1.4846330117303916, - "learning_rate": 2.514110328676822e-06, - "loss": 1.0324, - "step": 5785 - }, - { - "epoch": 0.43484142492108824, - "grad_norm": 1.6844632410492568, - "learning_rate": 2.5136398307486075e-06, - "loss": 1.0083, - "step": 5786 - }, - { - "epoch": 0.4349165789869232, - "grad_norm": 2.074257485697925, - "learning_rate": 2.5131693023852663e-06, - "loss": 0.8446, - "step": 5787 - }, - { - "epoch": 0.4349917330527582, - "grad_norm": 1.893534478743213, - "learning_rate": 2.5126987436146786e-06, - "loss": 0.9893, - "step": 5788 - }, - { - "epoch": 0.4350668871185931, - "grad_norm": 1.4256881056368687, - "learning_rate": 2.5122281544647273e-06, - "loss": 0.9129, - "step": 5789 - }, - { - "epoch": 0.43514204118442806, - "grad_norm": 1.7409539752510004, - "learning_rate": 2.511757534963297e-06, - "loss": 0.9317, - "step": 5790 - }, - { - "epoch": 0.43521719525026303, - "grad_norm": 1.6345239458357586, - "learning_rate": 2.5112868851382724e-06, - "loss": 0.9639, - "step": 5791 - }, - { - "epoch": 0.435292349316098, - "grad_norm": 1.6046693765569569, - "learning_rate": 2.5108162050175425e-06, - "loss": 0.9895, - "step": 5792 - }, - { - "epoch": 0.435367503381933, - "grad_norm": 1.7021541102933655, - "learning_rate": 2.510345494628996e-06, - "loss": 1.052, - "step": 5793 - }, - { - "epoch": 0.43544265744776794, - "grad_norm": 1.349175301952817, - "learning_rate": 2.509874754000525e-06, - "loss": 1.0289, - "step": 5794 - }, - { - "epoch": 0.43551781151360286, - "grad_norm": 1.5496079933616147, - "learning_rate": 2.5094039831600217e-06, - "loss": 0.9479, - "step": 5795 - }, - { - "epoch": 0.43559296557943783, - "grad_norm": 1.9347401312864607, - "learning_rate": 2.5089331821353827e-06, - "loss": 0.9601, - "step": 5796 - }, - { - "epoch": 0.4356681196452728, - "grad_norm": 2.2287741936500693, - "learning_rate": 2.5084623509545034e-06, - "loss": 1.0625, - "step": 5797 - }, - { - "epoch": 0.43574327371110777, - "grad_norm": 1.7016719954528965, - "learning_rate": 2.5079914896452823e-06, - "loss": 0.9846, - "step": 5798 - }, - { - "epoch": 0.43581842777694274, - "grad_norm": 1.6376584798984943, - "learning_rate": 2.507520598235621e-06, - "loss": 1.0385, - "step": 5799 - }, - { - "epoch": 0.4358935818427777, - "grad_norm": 7.170995188749628, - "learning_rate": 2.5070496767534202e-06, - "loss": 0.9746, - "step": 5800 - }, - { - "epoch": 0.4359687359086127, - "grad_norm": 1.616298928991632, - "learning_rate": 2.5065787252265848e-06, - "loss": 1.0226, - "step": 5801 - }, - { - "epoch": 0.4360438899744476, - "grad_norm": 1.8326238718879546, - "learning_rate": 2.50610774368302e-06, - "loss": 0.9778, - "step": 5802 - }, - { - "epoch": 0.43611904404028257, - "grad_norm": 1.7392548652636806, - "learning_rate": 2.505636732150633e-06, - "loss": 1.0152, - "step": 5803 - }, - { - "epoch": 0.43619419810611754, - "grad_norm": 1.7379996425246387, - "learning_rate": 2.505165690657334e-06, - "loss": 1.0249, - "step": 5804 - }, - { - "epoch": 0.4362693521719525, - "grad_norm": 1.7767595945082852, - "learning_rate": 2.504694619231033e-06, - "loss": 1.0386, - "step": 5805 - }, - { - "epoch": 0.4363445062377875, - "grad_norm": 1.99857899729354, - "learning_rate": 2.5042235178996436e-06, - "loss": 1.0698, - "step": 5806 - }, - { - "epoch": 0.43641966030362245, - "grad_norm": 1.9345363592781766, - "learning_rate": 2.5037523866910797e-06, - "loss": 0.9669, - "step": 5807 - }, - { - "epoch": 0.43649481436945736, - "grad_norm": 2.7971142714462127, - "learning_rate": 2.503281225633258e-06, - "loss": 0.9994, - "step": 5808 - }, - { - "epoch": 0.43656996843529233, - "grad_norm": 1.5135753970009609, - "learning_rate": 2.5028100347540967e-06, - "loss": 0.9872, - "step": 5809 - }, - { - "epoch": 0.4366451225011273, - "grad_norm": 2.493625142502048, - "learning_rate": 2.5023388140815148e-06, - "loss": 0.9832, - "step": 5810 - }, - { - "epoch": 0.4367202765669623, - "grad_norm": 1.8379466000666764, - "learning_rate": 2.5018675636434353e-06, - "loss": 1.0626, - "step": 5811 - }, - { - "epoch": 0.43679543063279724, - "grad_norm": 2.3451820576155313, - "learning_rate": 2.5013962834677804e-06, - "loss": 1.0507, - "step": 5812 - }, - { - "epoch": 0.4368705846986322, - "grad_norm": 0.7381723717226236, - "learning_rate": 2.5009249735824757e-06, - "loss": 0.8817, - "step": 5813 - }, - { - "epoch": 0.4369457387644672, - "grad_norm": 2.052404840008135, - "learning_rate": 2.500453634015449e-06, - "loss": 1.0083, - "step": 5814 - }, - { - "epoch": 0.4370208928303021, - "grad_norm": 2.8613249586693232, - "learning_rate": 2.4999822647946273e-06, - "loss": 1.025, - "step": 5815 - }, - { - "epoch": 0.43709604689613707, - "grad_norm": 2.241553452857871, - "learning_rate": 2.499510865947942e-06, - "loss": 0.9688, - "step": 5816 - }, - { - "epoch": 0.43717120096197204, - "grad_norm": 1.878802322400229, - "learning_rate": 2.4990394375033247e-06, - "loss": 1.0146, - "step": 5817 - }, - { - "epoch": 0.437246355027807, - "grad_norm": 1.869820625555545, - "learning_rate": 2.4985679794887106e-06, - "loss": 0.9631, - "step": 5818 - }, - { - "epoch": 0.437321509093642, - "grad_norm": 2.069096622067567, - "learning_rate": 2.4980964919320343e-06, - "loss": 1.0049, - "step": 5819 - }, - { - "epoch": 0.43739666315947695, - "grad_norm": 2.2803571861293546, - "learning_rate": 2.4976249748612332e-06, - "loss": 1.0248, - "step": 5820 - }, - { - "epoch": 0.43747181722531187, - "grad_norm": 1.450387735155896, - "learning_rate": 2.497153428304247e-06, - "loss": 0.9573, - "step": 5821 - }, - { - "epoch": 0.43754697129114684, - "grad_norm": 1.8082424012041227, - "learning_rate": 2.496681852289016e-06, - "loss": 1.0227, - "step": 5822 - }, - { - "epoch": 0.4376221253569818, - "grad_norm": 1.9814895316614374, - "learning_rate": 2.4962102468434843e-06, - "loss": 0.927, - "step": 5823 - }, - { - "epoch": 0.4376972794228168, - "grad_norm": 3.2965340822235145, - "learning_rate": 2.4957386119955954e-06, - "loss": 1.0684, - "step": 5824 - }, - { - "epoch": 0.43777243348865175, - "grad_norm": 1.568114531182944, - "learning_rate": 2.4952669477732938e-06, - "loss": 1.0008, - "step": 5825 - }, - { - "epoch": 0.4378475875544867, - "grad_norm": 2.332589189760176, - "learning_rate": 2.4947952542045307e-06, - "loss": 1.0819, - "step": 5826 - }, - { - "epoch": 0.43792274162032163, - "grad_norm": 1.78291417065177, - "learning_rate": 2.494323531317253e-06, - "loss": 1.0219, - "step": 5827 - }, - { - "epoch": 0.4379978956861566, - "grad_norm": 1.7245676245123336, - "learning_rate": 2.493851779139414e-06, - "loss": 0.9849, - "step": 5828 - }, - { - "epoch": 0.4380730497519916, - "grad_norm": 1.4748244779216542, - "learning_rate": 2.493379997698966e-06, - "loss": 0.9624, - "step": 5829 - }, - { - "epoch": 0.43814820381782654, - "grad_norm": 3.8928193181751345, - "learning_rate": 2.4929081870238635e-06, - "loss": 0.9746, - "step": 5830 - }, - { - "epoch": 0.4382233578836615, - "grad_norm": 0.5822372583982741, - "learning_rate": 2.4924363471420634e-06, - "loss": 0.7729, - "step": 5831 - }, - { - "epoch": 0.4382985119494965, - "grad_norm": 1.6374576330164106, - "learning_rate": 2.491964478081524e-06, - "loss": 1.0453, - "step": 5832 - }, - { - "epoch": 0.43837366601533145, - "grad_norm": 1.947670116903102, - "learning_rate": 2.4914925798702057e-06, - "loss": 0.9843, - "step": 5833 - }, - { - "epoch": 0.43844882008116637, - "grad_norm": 1.0973954620626238, - "learning_rate": 2.49102065253607e-06, - "loss": 0.8058, - "step": 5834 - }, - { - "epoch": 0.43852397414700134, - "grad_norm": 1.94704406151193, - "learning_rate": 2.49054869610708e-06, - "loss": 0.785, - "step": 5835 - }, - { - "epoch": 0.4385991282128363, - "grad_norm": 3.187293102057043, - "learning_rate": 2.490076710611202e-06, - "loss": 1.0259, - "step": 5836 - }, - { - "epoch": 0.4386742822786713, - "grad_norm": 1.873586885097545, - "learning_rate": 2.4896046960764015e-06, - "loss": 0.8858, - "step": 5837 - }, - { - "epoch": 0.43874943634450625, - "grad_norm": 4.137734840389727, - "learning_rate": 2.4891326525306487e-06, - "loss": 0.8658, - "step": 5838 - }, - { - "epoch": 0.4388245904103412, - "grad_norm": 1.719174835412494, - "learning_rate": 2.4886605800019123e-06, - "loss": 1.0412, - "step": 5839 - }, - { - "epoch": 0.43889974447617613, - "grad_norm": 1.78667493865722, - "learning_rate": 2.488188478518166e-06, - "loss": 1.0086, - "step": 5840 - }, - { - "epoch": 0.4389748985420111, - "grad_norm": 2.39781822137288, - "learning_rate": 2.487716348107383e-06, - "loss": 0.9615, - "step": 5841 - }, - { - "epoch": 0.4390500526078461, - "grad_norm": 1.5865410469051895, - "learning_rate": 2.4872441887975386e-06, - "loss": 0.9579, - "step": 5842 - }, - { - "epoch": 0.43912520667368105, - "grad_norm": 3.092807048088604, - "learning_rate": 2.48677200061661e-06, - "loss": 0.9869, - "step": 5843 - }, - { - "epoch": 0.439200360739516, - "grad_norm": 0.6789222662718871, - "learning_rate": 2.486299783592576e-06, - "loss": 0.8748, - "step": 5844 - }, - { - "epoch": 0.439275514805351, - "grad_norm": 2.2751147188878944, - "learning_rate": 2.485827537753419e-06, - "loss": 0.9941, - "step": 5845 - }, - { - "epoch": 0.43935066887118596, - "grad_norm": 2.271162959915189, - "learning_rate": 2.4853552631271193e-06, - "loss": 1.0073, - "step": 5846 - }, - { - "epoch": 0.43942582293702087, - "grad_norm": 4.284693314784363, - "learning_rate": 2.4848829597416615e-06, - "loss": 0.9572, - "step": 5847 - }, - { - "epoch": 0.43950097700285584, - "grad_norm": 2.855238460329406, - "learning_rate": 2.484410627625032e-06, - "loss": 1.0385, - "step": 5848 - }, - { - "epoch": 0.4395761310686908, - "grad_norm": 1.1703986468523069, - "learning_rate": 2.483938266805217e-06, - "loss": 0.9355, - "step": 5849 - }, - { - "epoch": 0.4396512851345258, - "grad_norm": 2.1095304078674784, - "learning_rate": 2.483465877310208e-06, - "loss": 0.9483, - "step": 5850 - }, - { - "epoch": 0.43972643920036075, - "grad_norm": 1.6412660345498509, - "learning_rate": 2.482993459167993e-06, - "loss": 1.0158, - "step": 5851 - }, - { - "epoch": 0.4398015932661957, - "grad_norm": 0.7243701303366097, - "learning_rate": 2.482521012406567e-06, - "loss": 0.8394, - "step": 5852 - }, - { - "epoch": 0.43987674733203064, - "grad_norm": 1.4619282509644642, - "learning_rate": 2.4820485370539233e-06, - "loss": 1.0195, - "step": 5853 - }, - { - "epoch": 0.4399519013978656, - "grad_norm": 1.5185947228946781, - "learning_rate": 2.481576033138057e-06, - "loss": 1.0359, - "step": 5854 - }, - { - "epoch": 0.4400270554637006, - "grad_norm": 1.778350746024251, - "learning_rate": 2.4811035006869677e-06, - "loss": 1.0496, - "step": 5855 - }, - { - "epoch": 0.44010220952953555, - "grad_norm": 0.8481115006431603, - "learning_rate": 2.4806309397286534e-06, - "loss": 0.8764, - "step": 5856 - }, - { - "epoch": 0.4401773635953705, - "grad_norm": 1.3764499042372798, - "learning_rate": 2.4801583502911154e-06, - "loss": 1.0338, - "step": 5857 - }, - { - "epoch": 0.4402525176612055, - "grad_norm": 1.8333081185889966, - "learning_rate": 2.4796857324023564e-06, - "loss": 0.8654, - "step": 5858 - }, - { - "epoch": 0.44032767172704046, - "grad_norm": 1.6900745109068394, - "learning_rate": 2.479213086090381e-06, - "loss": 1.0017, - "step": 5859 - }, - { - "epoch": 0.4404028257928754, - "grad_norm": 1.7410373740306686, - "learning_rate": 2.478740411383195e-06, - "loss": 1.0298, - "step": 5860 - }, - { - "epoch": 0.44047797985871034, - "grad_norm": 1.581834267491633, - "learning_rate": 2.478267708308807e-06, - "loss": 0.9067, - "step": 5861 - }, - { - "epoch": 0.4405531339245453, - "grad_norm": 1.597994369878865, - "learning_rate": 2.4777949768952255e-06, - "loss": 1.0125, - "step": 5862 - }, - { - "epoch": 0.4406282879903803, - "grad_norm": 2.6639561785739465, - "learning_rate": 2.477322217170462e-06, - "loss": 1.0274, - "step": 5863 - }, - { - "epoch": 0.44070344205621526, - "grad_norm": 1.7855330388866886, - "learning_rate": 2.476849429162529e-06, - "loss": 0.939, - "step": 5864 - }, - { - "epoch": 0.4407785961220502, - "grad_norm": 1.7869057127590402, - "learning_rate": 2.4763766128994423e-06, - "loss": 1.0378, - "step": 5865 - }, - { - "epoch": 0.44085375018788514, - "grad_norm": 1.864831307684914, - "learning_rate": 2.475903768409216e-06, - "loss": 0.9885, - "step": 5866 - }, - { - "epoch": 0.4409289042537201, - "grad_norm": 1.432842421557248, - "learning_rate": 2.47543089571987e-06, - "loss": 0.8944, - "step": 5867 - }, - { - "epoch": 0.4410040583195551, - "grad_norm": 2.0996352651020036, - "learning_rate": 2.4749579948594224e-06, - "loss": 0.8743, - "step": 5868 - }, - { - "epoch": 0.44107921238539005, - "grad_norm": 1.4728280936034237, - "learning_rate": 2.4744850658558943e-06, - "loss": 0.944, - "step": 5869 - }, - { - "epoch": 0.441154366451225, - "grad_norm": 1.8572564758401493, - "learning_rate": 2.47401210873731e-06, - "loss": 0.9216, - "step": 5870 - }, - { - "epoch": 0.44122952051706, - "grad_norm": 1.601880967645919, - "learning_rate": 2.473539123531693e-06, - "loss": 0.9702, - "step": 5871 - }, - { - "epoch": 0.4413046745828949, - "grad_norm": 2.979889379662751, - "learning_rate": 2.4730661102670692e-06, - "loss": 1.0805, - "step": 5872 - }, - { - "epoch": 0.4413798286487299, - "grad_norm": 1.7810253202383048, - "learning_rate": 2.4725930689714673e-06, - "loss": 0.9569, - "step": 5873 - }, - { - "epoch": 0.44145498271456485, - "grad_norm": 1.6024075395449584, - "learning_rate": 2.4721199996729167e-06, - "loss": 0.9711, - "step": 5874 - }, - { - "epoch": 0.4415301367803998, - "grad_norm": 1.8212046913056388, - "learning_rate": 2.471646902399448e-06, - "loss": 0.9401, - "step": 5875 - }, - { - "epoch": 0.4416052908462348, - "grad_norm": 1.5035703605982027, - "learning_rate": 2.471173777179094e-06, - "loss": 0.9892, - "step": 5876 - }, - { - "epoch": 0.44168044491206976, - "grad_norm": 2.7108865782602654, - "learning_rate": 2.4707006240398894e-06, - "loss": 0.9964, - "step": 5877 - }, - { - "epoch": 0.44175559897790473, - "grad_norm": 1.9614880765367277, - "learning_rate": 2.4702274430098707e-06, - "loss": 0.9787, - "step": 5878 - }, - { - "epoch": 0.44183075304373964, - "grad_norm": 2.244929534383956, - "learning_rate": 2.469754234117075e-06, - "loss": 1.0082, - "step": 5879 - }, - { - "epoch": 0.4419059071095746, - "grad_norm": 1.5144115978546502, - "learning_rate": 2.4692809973895426e-06, - "loss": 0.9857, - "step": 5880 - }, - { - "epoch": 0.4419810611754096, - "grad_norm": 1.779552922841603, - "learning_rate": 2.4688077328553136e-06, - "loss": 0.9933, - "step": 5881 - }, - { - "epoch": 0.44205621524124455, - "grad_norm": 2.5504530597697954, - "learning_rate": 2.4683344405424316e-06, - "loss": 0.9486, - "step": 5882 - }, - { - "epoch": 0.4421313693070795, - "grad_norm": 2.368362185655701, - "learning_rate": 2.4678611204789405e-06, - "loss": 0.9772, - "step": 5883 - }, - { - "epoch": 0.4422065233729145, - "grad_norm": 3.555747877985974, - "learning_rate": 2.4673877726928865e-06, - "loss": 1.0551, - "step": 5884 - }, - { - "epoch": 0.4422816774387494, - "grad_norm": 2.039265706544198, - "learning_rate": 2.4669143972123178e-06, - "loss": 0.8297, - "step": 5885 - }, - { - "epoch": 0.4423568315045844, - "grad_norm": 1.5581002661206622, - "learning_rate": 2.4664409940652817e-06, - "loss": 0.9878, - "step": 5886 - }, - { - "epoch": 0.44243198557041935, - "grad_norm": 1.9768788080845112, - "learning_rate": 2.465967563279832e-06, - "loss": 1.0516, - "step": 5887 - }, - { - "epoch": 0.4425071396362543, - "grad_norm": 1.488575743526986, - "learning_rate": 2.4654941048840184e-06, - "loss": 1.0194, - "step": 5888 - }, - { - "epoch": 0.4425822937020893, - "grad_norm": 1.9553599273602094, - "learning_rate": 2.465020618905898e-06, - "loss": 1.0686, - "step": 5889 - }, - { - "epoch": 0.44265744776792426, - "grad_norm": 1.5553415734086016, - "learning_rate": 2.464547105373525e-06, - "loss": 1.0671, - "step": 5890 - }, - { - "epoch": 0.44273260183375923, - "grad_norm": 1.5608996015344987, - "learning_rate": 2.4640735643149566e-06, - "loss": 0.9743, - "step": 5891 - }, - { - "epoch": 0.44280775589959415, - "grad_norm": 0.7210242737005328, - "learning_rate": 2.4635999957582526e-06, - "loss": 0.8652, - "step": 5892 - }, - { - "epoch": 0.4428829099654291, - "grad_norm": 1.8610082553637377, - "learning_rate": 2.4631263997314734e-06, - "loss": 0.9862, - "step": 5893 - }, - { - "epoch": 0.4429580640312641, - "grad_norm": 1.5817032085387364, - "learning_rate": 2.4626527762626822e-06, - "loss": 1.0621, - "step": 5894 - }, - { - "epoch": 0.44303321809709906, - "grad_norm": 1.6604392135920851, - "learning_rate": 2.462179125379942e-06, - "loss": 0.9715, - "step": 5895 - }, - { - "epoch": 0.443108372162934, - "grad_norm": 2.030716928812236, - "learning_rate": 2.461705447111319e-06, - "loss": 0.9804, - "step": 5896 - }, - { - "epoch": 0.443183526228769, - "grad_norm": 1.9923293717674544, - "learning_rate": 2.4612317414848803e-06, - "loss": 1.0016, - "step": 5897 - }, - { - "epoch": 0.4432586802946039, - "grad_norm": 1.514088904246028, - "learning_rate": 2.460758008528694e-06, - "loss": 1.0352, - "step": 5898 - }, - { - "epoch": 0.4433338343604389, - "grad_norm": 1.2604740014184954, - "learning_rate": 2.460284248270833e-06, - "loss": 0.9624, - "step": 5899 - }, - { - "epoch": 0.44340898842627385, - "grad_norm": 31.266173549035987, - "learning_rate": 2.4598104607393666e-06, - "loss": 1.0173, - "step": 5900 - }, - { - "epoch": 0.4434841424921088, - "grad_norm": 2.129984003642733, - "learning_rate": 2.4593366459623698e-06, - "loss": 1.0653, - "step": 5901 - }, - { - "epoch": 0.4435592965579438, - "grad_norm": 1.5899088328393935, - "learning_rate": 2.458862803967918e-06, - "loss": 0.8974, - "step": 5902 - }, - { - "epoch": 0.44363445062377876, - "grad_norm": 0.6774778072832669, - "learning_rate": 2.4583889347840873e-06, - "loss": 0.8814, - "step": 5903 - }, - { - "epoch": 0.44370960468961373, - "grad_norm": 2.5279239418986026, - "learning_rate": 2.4579150384389574e-06, - "loss": 0.9935, - "step": 5904 - }, - { - "epoch": 0.44378475875544865, - "grad_norm": 1.843062450425105, - "learning_rate": 2.4574411149606076e-06, - "loss": 0.9195, - "step": 5905 - }, - { - "epoch": 0.4438599128212836, - "grad_norm": 1.5558929370781849, - "learning_rate": 2.456967164377121e-06, - "loss": 1.076, - "step": 5906 - }, - { - "epoch": 0.4439350668871186, - "grad_norm": 1.7769116893070598, - "learning_rate": 2.4564931867165795e-06, - "loss": 0.9341, - "step": 5907 - }, - { - "epoch": 0.44401022095295356, - "grad_norm": 1.617674970641021, - "learning_rate": 2.4560191820070683e-06, - "loss": 0.9716, - "step": 5908 - }, - { - "epoch": 0.44408537501878853, - "grad_norm": 1.4914090987308686, - "learning_rate": 2.4555451502766754e-06, - "loss": 0.8637, - "step": 5909 - }, - { - "epoch": 0.4441605290846235, - "grad_norm": 0.6706602070606663, - "learning_rate": 2.4550710915534863e-06, - "loss": 0.8766, - "step": 5910 - }, - { - "epoch": 0.4442356831504584, - "grad_norm": 7.395509553367846, - "learning_rate": 2.4545970058655938e-06, - "loss": 1.0599, - "step": 5911 - }, - { - "epoch": 0.4443108372162934, - "grad_norm": 2.3294400590962026, - "learning_rate": 2.454122893241088e-06, - "loss": 0.9538, - "step": 5912 - }, - { - "epoch": 0.44438599128212836, - "grad_norm": 1.7422353284625594, - "learning_rate": 2.453648753708061e-06, - "loss": 0.972, - "step": 5913 - }, - { - "epoch": 0.4444611453479633, - "grad_norm": 1.3919992835574664, - "learning_rate": 2.4531745872946085e-06, - "loss": 0.9784, - "step": 5914 - }, - { - "epoch": 0.4445362994137983, - "grad_norm": 1.923557630393027, - "learning_rate": 2.4527003940288264e-06, - "loss": 1.0528, - "step": 5915 - }, - { - "epoch": 0.44461145347963327, - "grad_norm": 1.3669573780941382, - "learning_rate": 2.4522261739388127e-06, - "loss": 0.9538, - "step": 5916 - }, - { - "epoch": 0.4446866075454682, - "grad_norm": 1.5429544568908795, - "learning_rate": 2.451751927052666e-06, - "loss": 1.0163, - "step": 5917 - }, - { - "epoch": 0.44476176161130315, - "grad_norm": 1.5658546294584033, - "learning_rate": 2.4512776533984882e-06, - "loss": 0.8628, - "step": 5918 - }, - { - "epoch": 0.4448369156771381, - "grad_norm": 1.8365811132916137, - "learning_rate": 2.450803353004382e-06, - "loss": 0.921, - "step": 5919 - }, - { - "epoch": 0.4449120697429731, - "grad_norm": 1.4791969332038608, - "learning_rate": 2.4503290258984493e-06, - "loss": 1.052, - "step": 5920 - }, - { - "epoch": 0.44498722380880806, - "grad_norm": 1.9015112588665983, - "learning_rate": 2.4498546721087984e-06, - "loss": 1.0049, - "step": 5921 - }, - { - "epoch": 0.44506237787464303, - "grad_norm": 1.615682592785818, - "learning_rate": 2.4493802916635355e-06, - "loss": 1.0431, - "step": 5922 - }, - { - "epoch": 0.445137531940478, - "grad_norm": 1.5324118741010806, - "learning_rate": 2.448905884590769e-06, - "loss": 1.0554, - "step": 5923 - }, - { - "epoch": 0.4452126860063129, - "grad_norm": 1.4211634437957243, - "learning_rate": 2.448431450918611e-06, - "loss": 0.9826, - "step": 5924 - }, - { - "epoch": 0.4452878400721479, - "grad_norm": 2.010325243622047, - "learning_rate": 2.4479569906751714e-06, - "loss": 0.9045, - "step": 5925 - }, - { - "epoch": 0.44536299413798286, - "grad_norm": 1.9917642181107342, - "learning_rate": 2.4474825038885655e-06, - "loss": 0.8476, - "step": 5926 - }, - { - "epoch": 0.44543814820381783, - "grad_norm": 1.7595740414060277, - "learning_rate": 2.4470079905869066e-06, - "loss": 0.9289, - "step": 5927 - }, - { - "epoch": 0.4455133022696528, - "grad_norm": 6.108478294966606, - "learning_rate": 2.446533450798314e-06, - "loss": 0.9205, - "step": 5928 - }, - { - "epoch": 0.44558845633548777, - "grad_norm": 1.7832747880589135, - "learning_rate": 2.4460588845509036e-06, - "loss": 1.0023, - "step": 5929 - }, - { - "epoch": 0.4456636104013227, - "grad_norm": 2.7881995824310186, - "learning_rate": 2.4455842918727957e-06, - "loss": 0.9341, - "step": 5930 - }, - { - "epoch": 0.44573876446715766, - "grad_norm": 2.7202048081538313, - "learning_rate": 2.4451096727921135e-06, - "loss": 0.9828, - "step": 5931 - }, - { - "epoch": 0.4458139185329926, - "grad_norm": 3.850280300687193, - "learning_rate": 2.444635027336977e-06, - "loss": 1.051, - "step": 5932 - }, - { - "epoch": 0.4458890725988276, - "grad_norm": 1.664817531313008, - "learning_rate": 2.4441603555355142e-06, - "loss": 1.0005, - "step": 5933 - }, - { - "epoch": 0.44596422666466257, - "grad_norm": 2.177470621464852, - "learning_rate": 2.443685657415849e-06, - "loss": 1.042, - "step": 5934 - }, - { - "epoch": 0.44603938073049754, - "grad_norm": 2.5795934945347296, - "learning_rate": 2.4432109330061096e-06, - "loss": 0.9358, - "step": 5935 - }, - { - "epoch": 0.4461145347963325, - "grad_norm": 1.755934326473151, - "learning_rate": 2.4427361823344256e-06, - "loss": 1.0083, - "step": 5936 - }, - { - "epoch": 0.4461896888621674, - "grad_norm": 1.5148787086706454, - "learning_rate": 2.4422614054289264e-06, - "loss": 0.9237, - "step": 5937 - }, - { - "epoch": 0.4462648429280024, - "grad_norm": 1.4190936710014657, - "learning_rate": 2.4417866023177466e-06, - "loss": 0.9625, - "step": 5938 - }, - { - "epoch": 0.44633999699383736, - "grad_norm": 1.7082154380621497, - "learning_rate": 2.4413117730290186e-06, - "loss": 1.0408, - "step": 5939 - }, - { - "epoch": 0.44641515105967233, - "grad_norm": 2.190734869232913, - "learning_rate": 2.440836917590878e-06, - "loss": 1.0436, - "step": 5940 - }, - { - "epoch": 0.4464903051255073, - "grad_norm": 1.5203056029489703, - "learning_rate": 2.440362036031462e-06, - "loss": 0.9093, - "step": 5941 - }, - { - "epoch": 0.4465654591913423, - "grad_norm": 1.5992572848732673, - "learning_rate": 2.4398871283789088e-06, - "loss": 0.9897, - "step": 5942 - }, - { - "epoch": 0.4466406132571772, - "grad_norm": 2.0636383152113407, - "learning_rate": 2.439412194661359e-06, - "loss": 0.9623, - "step": 5943 - }, - { - "epoch": 0.44671576732301216, - "grad_norm": 5.161339300061693, - "learning_rate": 2.4389372349069544e-06, - "loss": 0.9006, - "step": 5944 - }, - { - "epoch": 0.44679092138884713, - "grad_norm": 1.8510105259955145, - "learning_rate": 2.4384622491438374e-06, - "loss": 0.9264, - "step": 5945 - }, - { - "epoch": 0.4468660754546821, - "grad_norm": 1.8507112888367234, - "learning_rate": 2.437987237400153e-06, - "loss": 0.9209, - "step": 5946 - }, - { - "epoch": 0.44694122952051707, - "grad_norm": 2.3439128564838505, - "learning_rate": 2.4375121997040477e-06, - "loss": 0.9255, - "step": 5947 - }, - { - "epoch": 0.44701638358635204, - "grad_norm": 1.7406880721713496, - "learning_rate": 2.4370371360836697e-06, - "loss": 1.0654, - "step": 5948 - }, - { - "epoch": 0.447091537652187, - "grad_norm": 1.9177436152632752, - "learning_rate": 2.436562046567167e-06, - "loss": 0.9921, - "step": 5949 - }, - { - "epoch": 0.4471666917180219, - "grad_norm": 1.8993671793900633, - "learning_rate": 2.4360869311826927e-06, - "loss": 0.9888, - "step": 5950 - }, - { - "epoch": 0.4472418457838569, - "grad_norm": 1.261439242272326, - "learning_rate": 2.435611789958397e-06, - "loss": 0.9817, - "step": 5951 - }, - { - "epoch": 0.44731699984969187, - "grad_norm": 1.5749821101792374, - "learning_rate": 2.435136622922434e-06, - "loss": 0.9701, - "step": 5952 - }, - { - "epoch": 0.44739215391552684, - "grad_norm": 1.6488090927952372, - "learning_rate": 2.4346614301029613e-06, - "loss": 1.0158, - "step": 5953 - }, - { - "epoch": 0.4474673079813618, - "grad_norm": 1.6776626091952302, - "learning_rate": 2.434186211528133e-06, - "loss": 1.095, - "step": 5954 - }, - { - "epoch": 0.4475424620471968, - "grad_norm": 2.3862990589404234, - "learning_rate": 2.4337109672261097e-06, - "loss": 1.0333, - "step": 5955 - }, - { - "epoch": 0.4476176161130317, - "grad_norm": 1.4662383498097296, - "learning_rate": 2.433235697225051e-06, - "loss": 0.866, - "step": 5956 - }, - { - "epoch": 0.44769277017886666, - "grad_norm": 1.639691768616692, - "learning_rate": 2.4327604015531177e-06, - "loss": 1.0752, - "step": 5957 - }, - { - "epoch": 0.44776792424470163, - "grad_norm": 1.6028223804296522, - "learning_rate": 2.432285080238474e-06, - "loss": 1.0152, - "step": 5958 - }, - { - "epoch": 0.4478430783105366, - "grad_norm": 0.6971529915073573, - "learning_rate": 2.4318097333092837e-06, - "loss": 0.8478, - "step": 5959 - }, - { - "epoch": 0.4479182323763716, - "grad_norm": 1.9719518026880458, - "learning_rate": 2.4313343607937135e-06, - "loss": 0.9774, - "step": 5960 - }, - { - "epoch": 0.44799338644220654, - "grad_norm": 2.523415391193299, - "learning_rate": 2.430858962719931e-06, - "loss": 0.8855, - "step": 5961 - }, - { - "epoch": 0.44806854050804146, - "grad_norm": 1.311442266865255, - "learning_rate": 2.4303835391161047e-06, - "loss": 1.0151, - "step": 5962 - }, - { - "epoch": 0.4481436945738764, - "grad_norm": 2.0652895471812878, - "learning_rate": 2.4299080900104055e-06, - "loss": 1.0475, - "step": 5963 - }, - { - "epoch": 0.4482188486397114, - "grad_norm": 1.9890469798451231, - "learning_rate": 2.4294326154310058e-06, - "loss": 0.9244, - "step": 5964 - }, - { - "epoch": 0.44829400270554637, - "grad_norm": 1.8943720485219993, - "learning_rate": 2.4289571154060794e-06, - "loss": 1.0746, - "step": 5965 - }, - { - "epoch": 0.44836915677138134, - "grad_norm": 0.6962217750512706, - "learning_rate": 2.4284815899638012e-06, - "loss": 0.879, - "step": 5966 - }, - { - "epoch": 0.4484443108372163, - "grad_norm": 1.7509595420167596, - "learning_rate": 2.428006039132348e-06, - "loss": 0.9295, - "step": 5967 - }, - { - "epoch": 0.4485194649030513, - "grad_norm": 1.3771010731964994, - "learning_rate": 2.4275304629398985e-06, - "loss": 0.9947, - "step": 5968 - }, - { - "epoch": 0.4485946189688862, - "grad_norm": 3.8109380305228466, - "learning_rate": 2.427054861414631e-06, - "loss": 1.0029, - "step": 5969 - }, - { - "epoch": 0.44866977303472116, - "grad_norm": 1.5018939935480915, - "learning_rate": 2.426579234584728e-06, - "loss": 1.0449, - "step": 5970 - }, - { - "epoch": 0.44874492710055613, - "grad_norm": 1.9738173891610122, - "learning_rate": 2.426103582478372e-06, - "loss": 0.9947, - "step": 5971 - }, - { - "epoch": 0.4488200811663911, - "grad_norm": 1.6107378624129136, - "learning_rate": 2.4256279051237473e-06, - "loss": 0.9656, - "step": 5972 - }, - { - "epoch": 0.4488952352322261, - "grad_norm": 1.7290614758710312, - "learning_rate": 2.4251522025490393e-06, - "loss": 0.9023, - "step": 5973 - }, - { - "epoch": 0.44897038929806105, - "grad_norm": 2.1160434010531026, - "learning_rate": 2.4246764747824347e-06, - "loss": 1.0675, - "step": 5974 - }, - { - "epoch": 0.44904554336389596, - "grad_norm": 1.8990908154009858, - "learning_rate": 2.4242007218521236e-06, - "loss": 0.9781, - "step": 5975 - }, - { - "epoch": 0.44912069742973093, - "grad_norm": 1.7543994235685334, - "learning_rate": 2.423724943786295e-06, - "loss": 0.9752, - "step": 5976 - }, - { - "epoch": 0.4491958514955659, - "grad_norm": 1.7431375282469288, - "learning_rate": 2.4232491406131408e-06, - "loss": 0.8964, - "step": 5977 - }, - { - "epoch": 0.44927100556140087, - "grad_norm": 1.7625879820379005, - "learning_rate": 2.4227733123608548e-06, - "loss": 0.9667, - "step": 5978 - }, - { - "epoch": 0.44934615962723584, - "grad_norm": 1.5717597479307879, - "learning_rate": 2.4222974590576303e-06, - "loss": 1.0351, - "step": 5979 - }, - { - "epoch": 0.4494213136930708, - "grad_norm": 1.560276356573946, - "learning_rate": 2.4218215807316647e-06, - "loss": 0.9993, - "step": 5980 - }, - { - "epoch": 0.4494964677589058, - "grad_norm": 1.9732553308874792, - "learning_rate": 2.4213456774111553e-06, - "loss": 1.0509, - "step": 5981 - }, - { - "epoch": 0.4495716218247407, - "grad_norm": 2.4478278827123017, - "learning_rate": 2.420869749124301e-06, - "loss": 0.8641, - "step": 5982 - }, - { - "epoch": 0.44964677589057567, - "grad_norm": 1.5294797356850918, - "learning_rate": 2.4203937958993027e-06, - "loss": 0.9767, - "step": 5983 - }, - { - "epoch": 0.44972192995641064, - "grad_norm": 1.6750913680479025, - "learning_rate": 2.4199178177643617e-06, - "loss": 0.979, - "step": 5984 - }, - { - "epoch": 0.4497970840222456, - "grad_norm": 1.998338605329205, - "learning_rate": 2.4194418147476827e-06, - "loss": 0.9941, - "step": 5985 - }, - { - "epoch": 0.4498722380880806, - "grad_norm": 1.9707439558262405, - "learning_rate": 2.4189657868774688e-06, - "loss": 1.0398, - "step": 5986 - }, - { - "epoch": 0.44994739215391555, - "grad_norm": 1.383391997039465, - "learning_rate": 2.418489734181929e-06, - "loss": 1.0467, - "step": 5987 - }, - { - "epoch": 0.45002254621975046, - "grad_norm": 1.8267490392870092, - "learning_rate": 2.4180136566892696e-06, - "loss": 1.0406, - "step": 5988 - }, - { - "epoch": 0.45009770028558543, - "grad_norm": 2.991986380010602, - "learning_rate": 2.4175375544276998e-06, - "loss": 0.9738, - "step": 5989 - }, - { - "epoch": 0.4501728543514204, - "grad_norm": 2.289724312891539, - "learning_rate": 2.4170614274254317e-06, - "loss": 1.0047, - "step": 5990 - }, - { - "epoch": 0.4502480084172554, - "grad_norm": 1.6776150004824557, - "learning_rate": 2.4165852757106762e-06, - "loss": 0.9076, - "step": 5991 - }, - { - "epoch": 0.45032316248309034, - "grad_norm": 1.7153947157988452, - "learning_rate": 2.416109099311649e-06, - "loss": 1.0069, - "step": 5992 - }, - { - "epoch": 0.4503983165489253, - "grad_norm": 2.181853390720241, - "learning_rate": 2.4156328982565636e-06, - "loss": 1.011, - "step": 5993 - }, - { - "epoch": 0.4504734706147603, - "grad_norm": 1.5261627860930012, - "learning_rate": 2.4151566725736375e-06, - "loss": 1.0762, - "step": 5994 - }, - { - "epoch": 0.4505486246805952, - "grad_norm": 1.8457312849207639, - "learning_rate": 2.414680422291089e-06, - "loss": 0.876, - "step": 5995 - }, - { - "epoch": 0.45062377874643017, - "grad_norm": 1.3658323274616093, - "learning_rate": 2.4142041474371368e-06, - "loss": 0.9688, - "step": 5996 - }, - { - "epoch": 0.45069893281226514, - "grad_norm": 0.7118766366673546, - "learning_rate": 2.4137278480400038e-06, - "loss": 0.9204, - "step": 5997 - }, - { - "epoch": 0.4507740868781001, - "grad_norm": 2.66660882966581, - "learning_rate": 2.4132515241279106e-06, - "loss": 0.8989, - "step": 5998 - }, - { - "epoch": 0.4508492409439351, - "grad_norm": 2.1889406092687307, - "learning_rate": 2.4127751757290826e-06, - "loss": 0.9397, - "step": 5999 - }, - { - "epoch": 0.45092439500977005, - "grad_norm": 1.7972861441568706, - "learning_rate": 2.4122988028717454e-06, - "loss": 0.9367, - "step": 6000 - }, - { - "epoch": 0.45099954907560497, - "grad_norm": 1.8121948643044843, - "learning_rate": 2.4118224055841243e-06, - "loss": 0.9695, - "step": 6001 - }, - { - "epoch": 0.45107470314143994, - "grad_norm": 2.0773186517514124, - "learning_rate": 2.4113459838944496e-06, - "loss": 0.9127, - "step": 6002 - }, - { - "epoch": 0.4511498572072749, - "grad_norm": 1.370431332454146, - "learning_rate": 2.4108695378309495e-06, - "loss": 0.962, - "step": 6003 - }, - { - "epoch": 0.4512250112731099, - "grad_norm": 2.400785210429586, - "learning_rate": 2.4103930674218565e-06, - "loss": 0.9269, - "step": 6004 - }, - { - "epoch": 0.45130016533894485, - "grad_norm": 1.6816426098819586, - "learning_rate": 2.4099165726954026e-06, - "loss": 0.979, - "step": 6005 - }, - { - "epoch": 0.4513753194047798, - "grad_norm": 1.5875113058814005, - "learning_rate": 2.409440053679822e-06, - "loss": 0.9707, - "step": 6006 - }, - { - "epoch": 0.45145047347061473, - "grad_norm": 1.9529323025058438, - "learning_rate": 2.40896351040335e-06, - "loss": 1.0015, - "step": 6007 - }, - { - "epoch": 0.4515256275364497, - "grad_norm": 3.4540705012121204, - "learning_rate": 2.4084869428942243e-06, - "loss": 1.0029, - "step": 6008 - }, - { - "epoch": 0.4516007816022847, - "grad_norm": 1.6342016999832143, - "learning_rate": 2.4080103511806836e-06, - "loss": 1.0463, - "step": 6009 - }, - { - "epoch": 0.45167593566811964, - "grad_norm": 1.745566883861147, - "learning_rate": 2.4075337352909667e-06, - "loss": 1.0567, - "step": 6010 - }, - { - "epoch": 0.4517510897339546, - "grad_norm": 1.4815109731246574, - "learning_rate": 2.4070570952533155e-06, - "loss": 0.9964, - "step": 6011 - }, - { - "epoch": 0.4518262437997896, - "grad_norm": 3.777415325192805, - "learning_rate": 2.4065804310959725e-06, - "loss": 0.9972, - "step": 6012 - }, - { - "epoch": 0.45190139786562455, - "grad_norm": 8.252020386190376, - "learning_rate": 2.406103742847182e-06, - "loss": 1.1717, - "step": 6013 - }, - { - "epoch": 0.45197655193145947, - "grad_norm": 1.2784955259445492, - "learning_rate": 2.4056270305351896e-06, - "loss": 0.9096, - "step": 6014 - }, - { - "epoch": 0.45205170599729444, - "grad_norm": 0.7656805446484157, - "learning_rate": 2.4051502941882422e-06, - "loss": 0.8421, - "step": 6015 - }, - { - "epoch": 0.4521268600631294, - "grad_norm": 2.35507872026733, - "learning_rate": 2.4046735338345897e-06, - "loss": 0.9684, - "step": 6016 - }, - { - "epoch": 0.4522020141289644, - "grad_norm": 1.6126905934857383, - "learning_rate": 2.4041967495024796e-06, - "loss": 0.9657, - "step": 6017 - }, - { - "epoch": 0.45227716819479935, - "grad_norm": 1.8161627493456074, - "learning_rate": 2.403719941220164e-06, - "loss": 1.0009, - "step": 6018 - }, - { - "epoch": 0.4523523222606343, - "grad_norm": 2.139000714944414, - "learning_rate": 2.403243109015897e-06, - "loss": 0.791, - "step": 6019 - }, - { - "epoch": 0.45242747632646924, - "grad_norm": 1.5600093827821313, - "learning_rate": 2.402766252917931e-06, - "loss": 1.039, - "step": 6020 - }, - { - "epoch": 0.4525026303923042, - "grad_norm": 1.686811801310256, - "learning_rate": 2.402289372954523e-06, - "loss": 1.049, - "step": 6021 - }, - { - "epoch": 0.4525777844581392, - "grad_norm": 1.5833427361995505, - "learning_rate": 2.4018124691539286e-06, - "loss": 0.9734, - "step": 6022 - }, - { - "epoch": 0.45265293852397415, - "grad_norm": 2.6773797154411096, - "learning_rate": 2.401335541544406e-06, - "loss": 0.9486, - "step": 6023 - }, - { - "epoch": 0.4527280925898091, - "grad_norm": 1.6928168707064604, - "learning_rate": 2.400858590154217e-06, - "loss": 0.9203, - "step": 6024 - }, - { - "epoch": 0.4528032466556441, - "grad_norm": 1.6966764527109215, - "learning_rate": 2.400381615011621e-06, - "loss": 1.0517, - "step": 6025 - }, - { - "epoch": 0.45287840072147906, - "grad_norm": 1.8147103216655402, - "learning_rate": 2.399904616144881e-06, - "loss": 1.0144, - "step": 6026 - }, - { - "epoch": 0.452953554787314, - "grad_norm": 1.5310400994130566, - "learning_rate": 2.3994275935822618e-06, - "loss": 0.9673, - "step": 6027 - }, - { - "epoch": 0.45302870885314894, - "grad_norm": 2.532210650469433, - "learning_rate": 2.398950547352028e-06, - "loss": 0.9302, - "step": 6028 - }, - { - "epoch": 0.4531038629189839, - "grad_norm": 0.8175743544009071, - "learning_rate": 2.398473477482446e-06, - "loss": 0.8641, - "step": 6029 - }, - { - "epoch": 0.4531790169848189, - "grad_norm": 2.0943308921183283, - "learning_rate": 2.397996384001785e-06, - "loss": 0.9466, - "step": 6030 - }, - { - "epoch": 0.45325417105065385, - "grad_norm": 4.253181388755855, - "learning_rate": 2.397519266938314e-06, - "loss": 0.9, - "step": 6031 - }, - { - "epoch": 0.4533293251164888, - "grad_norm": 2.6429351993949233, - "learning_rate": 2.3970421263203045e-06, - "loss": 0.9671, - "step": 6032 - }, - { - "epoch": 0.45340447918232374, - "grad_norm": 1.717064395063576, - "learning_rate": 2.396564962176028e-06, - "loss": 1.0078, - "step": 6033 - }, - { - "epoch": 0.4534796332481587, - "grad_norm": 1.5722899630540583, - "learning_rate": 2.39608777453376e-06, - "loss": 0.9894, - "step": 6034 - }, - { - "epoch": 0.4535547873139937, - "grad_norm": 1.9797705147430957, - "learning_rate": 2.395610563421774e-06, - "loss": 0.9944, - "step": 6035 - }, - { - "epoch": 0.45362994137982865, - "grad_norm": 1.8431378657686528, - "learning_rate": 2.3951333288683476e-06, - "loss": 1.056, - "step": 6036 - }, - { - "epoch": 0.4537050954456636, - "grad_norm": 1.9051109412821972, - "learning_rate": 2.394656070901757e-06, - "loss": 0.9637, - "step": 6037 - }, - { - "epoch": 0.4537802495114986, - "grad_norm": 1.9090712260391696, - "learning_rate": 2.394178789550285e-06, - "loss": 1.0184, - "step": 6038 - }, - { - "epoch": 0.45385540357733356, - "grad_norm": 1.7523157601368187, - "learning_rate": 2.3937014848422094e-06, - "loss": 0.9986, - "step": 6039 - }, - { - "epoch": 0.4539305576431685, - "grad_norm": 1.905568986158922, - "learning_rate": 2.3932241568058127e-06, - "loss": 1.0776, - "step": 6040 - }, - { - "epoch": 0.45400571170900345, - "grad_norm": 1.6124235741298463, - "learning_rate": 2.3927468054693797e-06, - "loss": 0.9001, - "step": 6041 - }, - { - "epoch": 0.4540808657748384, - "grad_norm": 1.5045128329773414, - "learning_rate": 2.392269430861194e-06, - "loss": 0.9714, - "step": 6042 - }, - { - "epoch": 0.4541560198406734, - "grad_norm": 0.7464119597103417, - "learning_rate": 2.391792033009543e-06, - "loss": 0.8756, - "step": 6043 - }, - { - "epoch": 0.45423117390650836, - "grad_norm": 1.405289852001402, - "learning_rate": 2.391314611942714e-06, - "loss": 0.8477, - "step": 6044 - }, - { - "epoch": 0.4543063279723433, - "grad_norm": 2.0644534714625418, - "learning_rate": 2.390837167688995e-06, - "loss": 0.9876, - "step": 6045 - }, - { - "epoch": 0.45438148203817824, - "grad_norm": 1.5591724822307835, - "learning_rate": 2.3903597002766777e-06, - "loss": 0.8913, - "step": 6046 - }, - { - "epoch": 0.4544566361040132, - "grad_norm": 1.2319171434196956, - "learning_rate": 2.3898822097340527e-06, - "loss": 1.0121, - "step": 6047 - }, - { - "epoch": 0.4545317901698482, - "grad_norm": 1.3191250760351234, - "learning_rate": 2.389404696089415e-06, - "loss": 1.0017, - "step": 6048 - }, - { - "epoch": 0.45460694423568315, - "grad_norm": 1.91592223457971, - "learning_rate": 2.388927159371057e-06, - "loss": 1.1626, - "step": 6049 - }, - { - "epoch": 0.4546820983015181, - "grad_norm": 2.2883377750665725, - "learning_rate": 2.3884495996072755e-06, - "loss": 0.9426, - "step": 6050 - }, - { - "epoch": 0.4547572523673531, - "grad_norm": 1.4267959955159792, - "learning_rate": 2.3879720168263683e-06, - "loss": 0.9515, - "step": 6051 - }, - { - "epoch": 0.454832406433188, - "grad_norm": 2.460856312214851, - "learning_rate": 2.387494411056633e-06, - "loss": 1.0595, - "step": 6052 - }, - { - "epoch": 0.454907560499023, - "grad_norm": 1.6160212932779565, - "learning_rate": 2.38701678232637e-06, - "loss": 1.0204, - "step": 6053 - }, - { - "epoch": 0.45498271456485795, - "grad_norm": 1.796741779198715, - "learning_rate": 2.386539130663881e-06, - "loss": 1.0908, - "step": 6054 - }, - { - "epoch": 0.4550578686306929, - "grad_norm": 1.8410924735603482, - "learning_rate": 2.386061456097468e-06, - "loss": 1.0414, - "step": 6055 - }, - { - "epoch": 0.4551330226965279, - "grad_norm": 1.7101757147784817, - "learning_rate": 2.3855837586554356e-06, - "loss": 0.9881, - "step": 6056 - }, - { - "epoch": 0.45520817676236286, - "grad_norm": 1.7291752305642207, - "learning_rate": 2.3851060383660893e-06, - "loss": 1.026, - "step": 6057 - }, - { - "epoch": 0.45528333082819783, - "grad_norm": 5.149981948672759, - "learning_rate": 2.3846282952577354e-06, - "loss": 0.8428, - "step": 6058 - }, - { - "epoch": 0.45535848489403274, - "grad_norm": 1.8858534445752946, - "learning_rate": 2.384150529358681e-06, - "loss": 1.0373, - "step": 6059 - }, - { - "epoch": 0.4554336389598677, - "grad_norm": 2.6259725676336294, - "learning_rate": 2.383672740697238e-06, - "loss": 1.0939, - "step": 6060 - }, - { - "epoch": 0.4555087930257027, - "grad_norm": 1.7903077568514731, - "learning_rate": 2.3831949293017166e-06, - "loss": 0.9161, - "step": 6061 - }, - { - "epoch": 0.45558394709153766, - "grad_norm": 1.8441292970677488, - "learning_rate": 2.3827170952004266e-06, - "loss": 0.983, - "step": 6062 - }, - { - "epoch": 0.4556591011573726, - "grad_norm": 1.5309860624828842, - "learning_rate": 2.382239238421684e-06, - "loss": 1.0395, - "step": 6063 - }, - { - "epoch": 0.4557342552232076, - "grad_norm": 2.0210113709142625, - "learning_rate": 2.3817613589938026e-06, - "loss": 0.9182, - "step": 6064 - }, - { - "epoch": 0.4558094092890425, - "grad_norm": 1.6922925780861833, - "learning_rate": 2.381283456945099e-06, - "loss": 0.8715, - "step": 6065 - }, - { - "epoch": 0.4558845633548775, - "grad_norm": 1.5511415000675508, - "learning_rate": 2.3808055323038907e-06, - "loss": 0.8893, - "step": 6066 - }, - { - "epoch": 0.45595971742071245, - "grad_norm": 4.228011300756678, - "learning_rate": 2.3803275850984963e-06, - "loss": 0.9503, - "step": 6067 - }, - { - "epoch": 0.4560348714865474, - "grad_norm": 1.2748067784222836, - "learning_rate": 2.3798496153572363e-06, - "loss": 1.0134, - "step": 6068 - }, - { - "epoch": 0.4561100255523824, - "grad_norm": 2.2770917435677176, - "learning_rate": 2.3793716231084313e-06, - "loss": 0.9372, - "step": 6069 - }, - { - "epoch": 0.45618517961821736, - "grad_norm": 1.585504106206518, - "learning_rate": 2.3788936083804058e-06, - "loss": 0.9403, - "step": 6070 - }, - { - "epoch": 0.45626033368405233, - "grad_norm": 1.7450754494737697, - "learning_rate": 2.3784155712014827e-06, - "loss": 0.886, - "step": 6071 - }, - { - "epoch": 0.45633548774988725, - "grad_norm": 1.7314527482391096, - "learning_rate": 2.3779375115999877e-06, - "loss": 1.0185, - "step": 6072 - }, - { - "epoch": 0.4564106418157222, - "grad_norm": 1.5850391568422355, - "learning_rate": 2.3774594296042485e-06, - "loss": 0.9875, - "step": 6073 - }, - { - "epoch": 0.4564857958815572, - "grad_norm": 1.7394628730533335, - "learning_rate": 2.376981325242592e-06, - "loss": 0.9381, - "step": 6074 - }, - { - "epoch": 0.45656094994739216, - "grad_norm": 3.1998008785284506, - "learning_rate": 2.376503198543349e-06, - "loss": 1.0122, - "step": 6075 - }, - { - "epoch": 0.45663610401322713, - "grad_norm": 1.772912304787326, - "learning_rate": 2.3760250495348495e-06, - "loss": 1.0028, - "step": 6076 - }, - { - "epoch": 0.4567112580790621, - "grad_norm": 4.135737460335256, - "learning_rate": 2.3755468782454265e-06, - "loss": 0.9999, - "step": 6077 - }, - { - "epoch": 0.456786412144897, - "grad_norm": 1.2667827256053865, - "learning_rate": 2.375068684703413e-06, - "loss": 0.9793, - "step": 6078 - }, - { - "epoch": 0.456861566210732, - "grad_norm": 2.199999817934896, - "learning_rate": 2.3745904689371423e-06, - "loss": 1.0504, - "step": 6079 - }, - { - "epoch": 0.45693672027656695, - "grad_norm": 1.5090629649995968, - "learning_rate": 2.374112230974953e-06, - "loss": 0.9076, - "step": 6080 - }, - { - "epoch": 0.4570118743424019, - "grad_norm": 1.666676791478239, - "learning_rate": 2.3736339708451803e-06, - "loss": 0.9918, - "step": 6081 - }, - { - "epoch": 0.4570870284082369, - "grad_norm": 1.392708835111921, - "learning_rate": 2.3731556885761656e-06, - "loss": 0.9926, - "step": 6082 - }, - { - "epoch": 0.45716218247407187, - "grad_norm": 1.5097991821191845, - "learning_rate": 2.3726773841962472e-06, - "loss": 1.0706, - "step": 6083 - }, - { - "epoch": 0.45723733653990684, - "grad_norm": 1.899845558715933, - "learning_rate": 2.372199057733766e-06, - "loss": 0.9332, - "step": 6084 - }, - { - "epoch": 0.45731249060574175, - "grad_norm": 1.3492343674171676, - "learning_rate": 2.371720709217066e-06, - "loss": 0.9151, - "step": 6085 - }, - { - "epoch": 0.4573876446715767, - "grad_norm": 0.9141644152042989, - "learning_rate": 2.3712423386744897e-06, - "loss": 0.9155, - "step": 6086 - }, - { - "epoch": 0.4574627987374117, - "grad_norm": 0.8707728799951246, - "learning_rate": 2.370763946134384e-06, - "loss": 0.9778, - "step": 6087 - }, - { - "epoch": 0.45753795280324666, - "grad_norm": 1.382899308577962, - "learning_rate": 2.3702855316250943e-06, - "loss": 0.9574, - "step": 6088 - }, - { - "epoch": 0.45761310686908163, - "grad_norm": 2.0551527533642466, - "learning_rate": 2.3698070951749692e-06, - "loss": 0.9491, - "step": 6089 - }, - { - "epoch": 0.4576882609349166, - "grad_norm": 1.6372753280379069, - "learning_rate": 2.3693286368123576e-06, - "loss": 0.982, - "step": 6090 - }, - { - "epoch": 0.4577634150007515, - "grad_norm": 1.9756786194584517, - "learning_rate": 2.3688501565656104e-06, - "loss": 1.1472, - "step": 6091 - }, - { - "epoch": 0.4578385690665865, - "grad_norm": 1.8268631060405922, - "learning_rate": 2.3683716544630784e-06, - "loss": 0.9197, - "step": 6092 - }, - { - "epoch": 0.45791372313242146, - "grad_norm": 1.8710839384773417, - "learning_rate": 2.367893130533116e-06, - "loss": 0.9639, - "step": 6093 - }, - { - "epoch": 0.4579888771982564, - "grad_norm": 1.7118509023083783, - "learning_rate": 2.367414584804076e-06, - "loss": 1.072, - "step": 6094 - }, - { - "epoch": 0.4580640312640914, - "grad_norm": 1.5356371565552254, - "learning_rate": 2.3669360173043155e-06, - "loss": 1.0572, - "step": 6095 - }, - { - "epoch": 0.45813918532992637, - "grad_norm": 1.8218241951141834, - "learning_rate": 2.3664574280621907e-06, - "loss": 0.9207, - "step": 6096 - }, - { - "epoch": 0.4582143393957613, - "grad_norm": 1.683982644890994, - "learning_rate": 2.36597881710606e-06, - "loss": 0.9906, - "step": 6097 - }, - { - "epoch": 0.45828949346159625, - "grad_norm": 1.3607365158705318, - "learning_rate": 2.3655001844642828e-06, - "loss": 1.0204, - "step": 6098 - }, - { - "epoch": 0.4583646475274312, - "grad_norm": 1.778758665855889, - "learning_rate": 2.3650215301652207e-06, - "loss": 1.0557, - "step": 6099 - }, - { - "epoch": 0.4584398015932662, - "grad_norm": 2.163222166985581, - "learning_rate": 2.3645428542372347e-06, - "loss": 1.0112, - "step": 6100 - }, - { - "epoch": 0.45851495565910116, - "grad_norm": 1.930021990616191, - "learning_rate": 2.3640641567086887e-06, - "loss": 0.9321, - "step": 6101 - }, - { - "epoch": 0.45859010972493613, - "grad_norm": 10.670808305950574, - "learning_rate": 2.363585437607947e-06, - "loss": 0.9303, - "step": 6102 - }, - { - "epoch": 0.4586652637907711, - "grad_norm": 1.8208557758941415, - "learning_rate": 2.3631066969633755e-06, - "loss": 1.081, - "step": 6103 - }, - { - "epoch": 0.458740417856606, - "grad_norm": 2.076583060800733, - "learning_rate": 2.362627934803343e-06, - "loss": 1.0093, - "step": 6104 - }, - { - "epoch": 0.458815571922441, - "grad_norm": 2.046507824295413, - "learning_rate": 2.362149151156216e-06, - "loss": 1.0152, - "step": 6105 - }, - { - "epoch": 0.45889072598827596, - "grad_norm": 1.8930125648254843, - "learning_rate": 2.3616703460503654e-06, - "loss": 1.1116, - "step": 6106 - }, - { - "epoch": 0.45896588005411093, - "grad_norm": 1.464562717182308, - "learning_rate": 2.3611915195141615e-06, - "loss": 0.864, - "step": 6107 - }, - { - "epoch": 0.4590410341199459, - "grad_norm": 2.0704102691021324, - "learning_rate": 2.3607126715759773e-06, - "loss": 0.9584, - "step": 6108 - }, - { - "epoch": 0.45911618818578087, - "grad_norm": 1.668311983836353, - "learning_rate": 2.360233802264186e-06, - "loss": 1.0436, - "step": 6109 - }, - { - "epoch": 0.4591913422516158, - "grad_norm": 1.9108439262729808, - "learning_rate": 2.359754911607163e-06, - "loss": 0.9673, - "step": 6110 - }, - { - "epoch": 0.45926649631745076, - "grad_norm": 1.2991195466503276, - "learning_rate": 2.3592759996332824e-06, - "loss": 1.0509, - "step": 6111 - }, - { - "epoch": 0.4593416503832857, - "grad_norm": 1.4578602250475718, - "learning_rate": 2.358797066370924e-06, - "loss": 0.9254, - "step": 6112 - }, - { - "epoch": 0.4594168044491207, - "grad_norm": 1.6282066837255609, - "learning_rate": 2.358318111848466e-06, - "loss": 0.9858, - "step": 6113 - }, - { - "epoch": 0.45949195851495567, - "grad_norm": 1.7616972995983127, - "learning_rate": 2.3578391360942872e-06, - "loss": 1.0463, - "step": 6114 - }, - { - "epoch": 0.45956711258079064, - "grad_norm": 1.5130701456443647, - "learning_rate": 2.3573601391367696e-06, - "loss": 1.0462, - "step": 6115 - }, - { - "epoch": 0.4596422666466256, - "grad_norm": 1.599010387985716, - "learning_rate": 2.3568811210042947e-06, - "loss": 0.9503, - "step": 6116 - }, - { - "epoch": 0.4597174207124605, - "grad_norm": 2.016997941711616, - "learning_rate": 2.3564020817252476e-06, - "loss": 0.8968, - "step": 6117 - }, - { - "epoch": 0.4597925747782955, - "grad_norm": 2.0820469381517883, - "learning_rate": 2.3559230213280115e-06, - "loss": 0.9637, - "step": 6118 - }, - { - "epoch": 0.45986772884413046, - "grad_norm": 0.6844150514074618, - "learning_rate": 2.3554439398409743e-06, - "loss": 0.8478, - "step": 6119 - }, - { - "epoch": 0.45994288290996543, - "grad_norm": 1.9898194843635397, - "learning_rate": 2.354964837292522e-06, - "loss": 1.0346, - "step": 6120 - }, - { - "epoch": 0.4600180369758004, - "grad_norm": 1.7377704684287258, - "learning_rate": 2.354485713711044e-06, - "loss": 0.8966, - "step": 6121 - }, - { - "epoch": 0.4600931910416354, - "grad_norm": 1.7595505323641585, - "learning_rate": 2.354006569124931e-06, - "loss": 0.9287, - "step": 6122 - }, - { - "epoch": 0.4601683451074703, - "grad_norm": 1.5709017886247358, - "learning_rate": 2.3535274035625713e-06, - "loss": 0.8812, - "step": 6123 - }, - { - "epoch": 0.46024349917330526, - "grad_norm": 2.2191538913238276, - "learning_rate": 2.353048217052361e-06, - "loss": 1.0147, - "step": 6124 - }, - { - "epoch": 0.46031865323914023, - "grad_norm": 1.7700356949693004, - "learning_rate": 2.3525690096226906e-06, - "loss": 0.9955, - "step": 6125 - }, - { - "epoch": 0.4603938073049752, - "grad_norm": 1.4703778924294972, - "learning_rate": 2.3520897813019566e-06, - "loss": 1.0732, - "step": 6126 - }, - { - "epoch": 0.46046896137081017, - "grad_norm": 1.5372550148899966, - "learning_rate": 2.351610532118555e-06, - "loss": 0.9982, - "step": 6127 - }, - { - "epoch": 0.46054411543664514, - "grad_norm": 1.6892355718172927, - "learning_rate": 2.3511312621008832e-06, - "loss": 1.0308, - "step": 6128 - }, - { - "epoch": 0.4606192695024801, - "grad_norm": 1.789628451588686, - "learning_rate": 2.35065197127734e-06, - "loss": 0.9709, - "step": 6129 - }, - { - "epoch": 0.460694423568315, - "grad_norm": 3.100983149999695, - "learning_rate": 2.350172659676323e-06, - "loss": 1.0128, - "step": 6130 - }, - { - "epoch": 0.46076957763415, - "grad_norm": 2.1329658648785976, - "learning_rate": 2.349693327326237e-06, - "loss": 0.9828, - "step": 6131 - }, - { - "epoch": 0.46084473169998497, - "grad_norm": 1.6210877200094929, - "learning_rate": 2.3492139742554816e-06, - "loss": 0.9839, - "step": 6132 - }, - { - "epoch": 0.46091988576581994, - "grad_norm": 1.9711130281126288, - "learning_rate": 2.3487346004924605e-06, - "loss": 0.9645, - "step": 6133 - }, - { - "epoch": 0.4609950398316549, - "grad_norm": 2.1153914938328024, - "learning_rate": 2.34825520606558e-06, - "loss": 0.8835, - "step": 6134 - }, - { - "epoch": 0.4610701938974899, - "grad_norm": 2.138028428640748, - "learning_rate": 2.3477757910032434e-06, - "loss": 0.919, - "step": 6135 - }, - { - "epoch": 0.4611453479633248, - "grad_norm": 3.1544752513418337, - "learning_rate": 2.347296355333861e-06, - "loss": 0.9156, - "step": 6136 - }, - { - "epoch": 0.46122050202915976, - "grad_norm": 1.4225201662642062, - "learning_rate": 2.346816899085839e-06, - "loss": 1.0746, - "step": 6137 - }, - { - "epoch": 0.46129565609499473, - "grad_norm": 1.838793736397326, - "learning_rate": 2.346337422287587e-06, - "loss": 0.9488, - "step": 6138 - }, - { - "epoch": 0.4613708101608297, - "grad_norm": 1.7079251739942762, - "learning_rate": 2.3458579249675176e-06, - "loss": 0.9762, - "step": 6139 - }, - { - "epoch": 0.4614459642266647, - "grad_norm": 1.6510224526748418, - "learning_rate": 2.345378407154041e-06, - "loss": 1.0292, - "step": 6140 - }, - { - "epoch": 0.46152111829249964, - "grad_norm": 1.6580509974202804, - "learning_rate": 2.344898868875572e-06, - "loss": 0.9768, - "step": 6141 - }, - { - "epoch": 0.46159627235833456, - "grad_norm": 1.3346708662085784, - "learning_rate": 2.3444193101605237e-06, - "loss": 0.9647, - "step": 6142 - }, - { - "epoch": 0.46167142642416953, - "grad_norm": 1.8481280831294165, - "learning_rate": 2.3439397310373126e-06, - "loss": 0.9828, - "step": 6143 - }, - { - "epoch": 0.4617465804900045, - "grad_norm": 0.6861642519216768, - "learning_rate": 2.343460131534356e-06, - "loss": 0.7871, - "step": 6144 - }, - { - "epoch": 0.46182173455583947, - "grad_norm": 1.753161435760103, - "learning_rate": 2.34298051168007e-06, - "loss": 0.9924, - "step": 6145 - }, - { - "epoch": 0.46189688862167444, - "grad_norm": 1.4229352582142358, - "learning_rate": 2.3425008715028766e-06, - "loss": 1.0551, - "step": 6146 - }, - { - "epoch": 0.4619720426875094, - "grad_norm": 1.4556614877690064, - "learning_rate": 2.3420212110311943e-06, - "loss": 0.8805, - "step": 6147 - }, - { - "epoch": 0.4620471967533444, - "grad_norm": 1.640629868273097, - "learning_rate": 2.3415415302934457e-06, - "loss": 1.0547, - "step": 6148 - }, - { - "epoch": 0.4621223508191793, - "grad_norm": 2.861240966302652, - "learning_rate": 2.341061829318054e-06, - "loss": 0.9685, - "step": 6149 - }, - { - "epoch": 0.46219750488501427, - "grad_norm": 1.7573622741564057, - "learning_rate": 2.340582108133442e-06, - "loss": 0.9884, - "step": 6150 - }, - { - "epoch": 0.46227265895084924, - "grad_norm": 1.7636732268301851, - "learning_rate": 2.340102366768037e-06, - "loss": 0.9584, - "step": 6151 - }, - { - "epoch": 0.4623478130166842, - "grad_norm": 1.7383460557837191, - "learning_rate": 2.339622605250264e-06, - "loss": 1.0056, - "step": 6152 - }, - { - "epoch": 0.4624229670825192, - "grad_norm": 1.5770329388823254, - "learning_rate": 2.339142823608551e-06, - "loss": 1.0175, - "step": 6153 - }, - { - "epoch": 0.46249812114835415, - "grad_norm": 1.5009009516577074, - "learning_rate": 2.3386630218713273e-06, - "loss": 0.9683, - "step": 6154 - }, - { - "epoch": 0.46257327521418906, - "grad_norm": 1.8118111189193118, - "learning_rate": 2.3381832000670223e-06, - "loss": 1.0072, - "step": 6155 - }, - { - "epoch": 0.46264842928002403, - "grad_norm": 2.0338362441858644, - "learning_rate": 2.3377033582240684e-06, - "loss": 1.0001, - "step": 6156 - }, - { - "epoch": 0.462723583345859, - "grad_norm": 2.1231107728496608, - "learning_rate": 2.3372234963708966e-06, - "loss": 0.9454, - "step": 6157 - }, - { - "epoch": 0.462798737411694, - "grad_norm": 2.533764285107068, - "learning_rate": 2.336743614535942e-06, - "loss": 0.9664, - "step": 6158 - }, - { - "epoch": 0.46287389147752894, - "grad_norm": 1.849619171535566, - "learning_rate": 2.3362637127476383e-06, - "loss": 1.0011, - "step": 6159 - }, - { - "epoch": 0.4629490455433639, - "grad_norm": 1.9169352246535558, - "learning_rate": 2.335783791034422e-06, - "loss": 0.9375, - "step": 6160 - }, - { - "epoch": 0.4630241996091989, - "grad_norm": 2.2358959863249046, - "learning_rate": 2.3353038494247305e-06, - "loss": 0.93, - "step": 6161 - }, - { - "epoch": 0.4630993536750338, - "grad_norm": 2.1688500065240937, - "learning_rate": 2.3348238879470015e-06, - "loss": 0.8911, - "step": 6162 - }, - { - "epoch": 0.46317450774086877, - "grad_norm": 1.993617303910787, - "learning_rate": 2.334343906629676e-06, - "loss": 0.9456, - "step": 6163 - }, - { - "epoch": 0.46324966180670374, - "grad_norm": 1.6651443761370175, - "learning_rate": 2.3338639055011924e-06, - "loss": 0.9193, - "step": 6164 - }, - { - "epoch": 0.4633248158725387, - "grad_norm": 1.6374732124525377, - "learning_rate": 2.333383884589995e-06, - "loss": 1.0249, - "step": 6165 - }, - { - "epoch": 0.4633999699383737, - "grad_norm": 1.6080757007240154, - "learning_rate": 2.3329038439245257e-06, - "loss": 0.9728, - "step": 6166 - }, - { - "epoch": 0.46347512400420865, - "grad_norm": 2.0900597006197073, - "learning_rate": 2.332423783533228e-06, - "loss": 1.061, - "step": 6167 - }, - { - "epoch": 0.46355027807004356, - "grad_norm": 1.5607819075835552, - "learning_rate": 2.331943703444549e-06, - "loss": 0.9346, - "step": 6168 - }, - { - "epoch": 0.46362543213587853, - "grad_norm": 2.016281021172781, - "learning_rate": 2.331463603686934e-06, - "loss": 1.065, - "step": 6169 - }, - { - "epoch": 0.4637005862017135, - "grad_norm": 1.4970976247865333, - "learning_rate": 2.330983484288832e-06, - "loss": 0.8619, - "step": 6170 - }, - { - "epoch": 0.4637757402675485, - "grad_norm": 1.831129360456533, - "learning_rate": 2.3305033452786905e-06, - "loss": 1.0585, - "step": 6171 - }, - { - "epoch": 0.46385089433338345, - "grad_norm": 2.13053252373351, - "learning_rate": 2.3300231866849606e-06, - "loss": 1.012, - "step": 6172 - }, - { - "epoch": 0.4639260483992184, - "grad_norm": 1.7229542031248752, - "learning_rate": 2.3295430085360927e-06, - "loss": 0.9335, - "step": 6173 - }, - { - "epoch": 0.4640012024650534, - "grad_norm": 1.240001939033715, - "learning_rate": 2.32906281086054e-06, - "loss": 1.0528, - "step": 6174 - }, - { - "epoch": 0.4640763565308883, - "grad_norm": 1.595959347496384, - "learning_rate": 2.3285825936867556e-06, - "loss": 1.0439, - "step": 6175 - }, - { - "epoch": 0.46415151059672327, - "grad_norm": 2.375287891808007, - "learning_rate": 2.328102357043194e-06, - "loss": 1.024, - "step": 6176 - }, - { - "epoch": 0.46422666466255824, - "grad_norm": 2.1346788449810705, - "learning_rate": 2.3276221009583116e-06, - "loss": 0.9415, - "step": 6177 - }, - { - "epoch": 0.4643018187283932, - "grad_norm": 2.161242799851717, - "learning_rate": 2.327141825460566e-06, - "loss": 1.025, - "step": 6178 - }, - { - "epoch": 0.4643769727942282, - "grad_norm": 2.213606123640264, - "learning_rate": 2.3266615305784126e-06, - "loss": 0.9509, - "step": 6179 - }, - { - "epoch": 0.46445212686006315, - "grad_norm": 1.6499279931279802, - "learning_rate": 2.3261812163403144e-06, - "loss": 1.1231, - "step": 6180 - }, - { - "epoch": 0.46452728092589807, - "grad_norm": 2.5740149826882353, - "learning_rate": 2.3257008827747294e-06, - "loss": 1.0152, - "step": 6181 - }, - { - "epoch": 0.46460243499173304, - "grad_norm": 1.7545874956936618, - "learning_rate": 2.32522052991012e-06, - "loss": 1.0049, - "step": 6182 - }, - { - "epoch": 0.464677589057568, - "grad_norm": 1.8448738293809794, - "learning_rate": 2.324740157774949e-06, - "loss": 1.0003, - "step": 6183 - }, - { - "epoch": 0.464752743123403, - "grad_norm": 1.8639782255808284, - "learning_rate": 2.3242597663976793e-06, - "loss": 0.9146, - "step": 6184 - }, - { - "epoch": 0.46482789718923795, - "grad_norm": 0.6932078236180222, - "learning_rate": 2.3237793558067776e-06, - "loss": 0.8873, - "step": 6185 - }, - { - "epoch": 0.4649030512550729, - "grad_norm": 2.2933126838257136, - "learning_rate": 2.3232989260307087e-06, - "loss": 1.0352, - "step": 6186 - }, - { - "epoch": 0.46497820532090783, - "grad_norm": 1.6034239581318188, - "learning_rate": 2.322818477097941e-06, - "loss": 1.0424, - "step": 6187 - }, - { - "epoch": 0.4650533593867428, - "grad_norm": 5.2147910811947025, - "learning_rate": 2.322338009036943e-06, - "loss": 1.0078, - "step": 6188 - }, - { - "epoch": 0.4651285134525778, - "grad_norm": 1.4438083554318994, - "learning_rate": 2.3218575218761816e-06, - "loss": 0.9975, - "step": 6189 - }, - { - "epoch": 0.46520366751841274, - "grad_norm": 1.5401471184150899, - "learning_rate": 2.3213770156441314e-06, - "loss": 0.972, - "step": 6190 - }, - { - "epoch": 0.4652788215842477, - "grad_norm": 1.7270888825048638, - "learning_rate": 2.3208964903692613e-06, - "loss": 0.9101, - "step": 6191 - }, - { - "epoch": 0.4653539756500827, - "grad_norm": 1.5456248925159262, - "learning_rate": 2.3204159460800458e-06, - "loss": 0.9617, - "step": 6192 - }, - { - "epoch": 0.46542912971591766, - "grad_norm": 2.5420131968253967, - "learning_rate": 2.319935382804959e-06, - "loss": 1.0341, - "step": 6193 - }, - { - "epoch": 0.46550428378175257, - "grad_norm": 1.8909986142577566, - "learning_rate": 2.3194548005724748e-06, - "loss": 0.9942, - "step": 6194 - }, - { - "epoch": 0.46557943784758754, - "grad_norm": 2.7882633720630707, - "learning_rate": 2.318974199411071e-06, - "loss": 0.9905, - "step": 6195 - }, - { - "epoch": 0.4656545919134225, - "grad_norm": 1.8329603364839324, - "learning_rate": 2.318493579349224e-06, - "loss": 0.9571, - "step": 6196 - }, - { - "epoch": 0.4657297459792575, - "grad_norm": 2.1855902918916965, - "learning_rate": 2.3180129404154133e-06, - "loss": 0.9603, - "step": 6197 - }, - { - "epoch": 0.46580490004509245, - "grad_norm": 1.689103318531954, - "learning_rate": 2.317532282638118e-06, - "loss": 0.9973, - "step": 6198 - }, - { - "epoch": 0.4658800541109274, - "grad_norm": 1.7684017391198872, - "learning_rate": 2.3170516060458188e-06, - "loss": 0.9477, - "step": 6199 - }, - { - "epoch": 0.46595520817676234, - "grad_norm": 1.9875818607578157, - "learning_rate": 2.3165709106669983e-06, - "loss": 0.9737, - "step": 6200 - }, - { - "epoch": 0.4660303622425973, - "grad_norm": 1.5014480912429244, - "learning_rate": 2.3160901965301386e-06, - "loss": 0.9196, - "step": 6201 - }, - { - "epoch": 0.4661055163084323, - "grad_norm": 0.7981558958210896, - "learning_rate": 2.315609463663725e-06, - "loss": 0.8059, - "step": 6202 - }, - { - "epoch": 0.46618067037426725, - "grad_norm": 2.052924266274892, - "learning_rate": 2.315128712096242e-06, - "loss": 0.8148, - "step": 6203 - }, - { - "epoch": 0.4662558244401022, - "grad_norm": 1.5568465380247325, - "learning_rate": 2.314647941856175e-06, - "loss": 0.9509, - "step": 6204 - }, - { - "epoch": 0.4663309785059372, - "grad_norm": 1.7453749028237058, - "learning_rate": 2.314167152972014e-06, - "loss": 0.9934, - "step": 6205 - }, - { - "epoch": 0.46640613257177216, - "grad_norm": 1.8504833182918037, - "learning_rate": 2.313686345472245e-06, - "loss": 1.0109, - "step": 6206 - }, - { - "epoch": 0.4664812866376071, - "grad_norm": 1.53164496485653, - "learning_rate": 2.3132055193853597e-06, - "loss": 0.9436, - "step": 6207 - }, - { - "epoch": 0.46655644070344204, - "grad_norm": 1.5388395661132435, - "learning_rate": 2.312724674739847e-06, - "loss": 0.9534, - "step": 6208 - }, - { - "epoch": 0.466631594769277, - "grad_norm": 1.473892219417634, - "learning_rate": 2.3122438115642013e-06, - "loss": 1.0801, - "step": 6209 - }, - { - "epoch": 0.466706748835112, - "grad_norm": 1.9547649974996557, - "learning_rate": 2.3117629298869135e-06, - "loss": 1.0024, - "step": 6210 - }, - { - "epoch": 0.46678190290094695, - "grad_norm": 1.8171895624937915, - "learning_rate": 2.3112820297364775e-06, - "loss": 0.9845, - "step": 6211 - }, - { - "epoch": 0.4668570569667819, - "grad_norm": 0.7325121200912839, - "learning_rate": 2.3108011111413904e-06, - "loss": 0.7933, - "step": 6212 - }, - { - "epoch": 0.46693221103261684, - "grad_norm": 4.372093761279815, - "learning_rate": 2.3103201741301465e-06, - "loss": 1.0443, - "step": 6213 - }, - { - "epoch": 0.4670073650984518, - "grad_norm": 1.8443612200882464, - "learning_rate": 2.3098392187312445e-06, - "loss": 1.0281, - "step": 6214 - }, - { - "epoch": 0.4670825191642868, - "grad_norm": 1.8015971621299602, - "learning_rate": 2.309358244973182e-06, - "loss": 1.0004, - "step": 6215 - }, - { - "epoch": 0.46715767323012175, - "grad_norm": 9.511959378373119, - "learning_rate": 2.3088772528844588e-06, - "loss": 0.9297, - "step": 6216 - }, - { - "epoch": 0.4672328272959567, - "grad_norm": 2.0981106115080204, - "learning_rate": 2.308396242493576e-06, - "loss": 1.0082, - "step": 6217 - }, - { - "epoch": 0.4673079813617917, - "grad_norm": 1.6193112052268301, - "learning_rate": 2.3079152138290347e-06, - "loss": 0.9795, - "step": 6218 - }, - { - "epoch": 0.46738313542762666, - "grad_norm": 1.8048737665136907, - "learning_rate": 2.307434166919338e-06, - "loss": 0.9373, - "step": 6219 - }, - { - "epoch": 0.4674582894934616, - "grad_norm": 1.7973490214177268, - "learning_rate": 2.30695310179299e-06, - "loss": 0.9776, - "step": 6220 - }, - { - "epoch": 0.46753344355929655, - "grad_norm": 1.4847754490423728, - "learning_rate": 2.3064720184784946e-06, - "loss": 0.9947, - "step": 6221 - }, - { - "epoch": 0.4676085976251315, - "grad_norm": 1.5301529204547486, - "learning_rate": 2.305990917004359e-06, - "loss": 0.9609, - "step": 6222 - }, - { - "epoch": 0.4676837516909665, - "grad_norm": 1.683585748852616, - "learning_rate": 2.3055097973990894e-06, - "loss": 1.0706, - "step": 6223 - }, - { - "epoch": 0.46775890575680146, - "grad_norm": 1.2472214812541103, - "learning_rate": 2.305028659691195e-06, - "loss": 1.0011, - "step": 6224 - }, - { - "epoch": 0.4678340598226364, - "grad_norm": 1.4902688518650697, - "learning_rate": 2.3045475039091846e-06, - "loss": 0.8896, - "step": 6225 - }, - { - "epoch": 0.46790921388847134, - "grad_norm": 1.6863211293414513, - "learning_rate": 2.3040663300815673e-06, - "loss": 0.8754, - "step": 6226 - }, - { - "epoch": 0.4679843679543063, - "grad_norm": 1.4663877767497748, - "learning_rate": 2.303585138236857e-06, - "loss": 0.9903, - "step": 6227 - }, - { - "epoch": 0.4680595220201413, - "grad_norm": 1.4115328423047, - "learning_rate": 2.3031039284035636e-06, - "loss": 0.9395, - "step": 6228 - }, - { - "epoch": 0.46813467608597625, - "grad_norm": 1.7666145185055593, - "learning_rate": 2.3026227006102025e-06, - "loss": 0.9317, - "step": 6229 - }, - { - "epoch": 0.4682098301518112, - "grad_norm": 1.7987802107701556, - "learning_rate": 2.3021414548852864e-06, - "loss": 0.9579, - "step": 6230 - }, - { - "epoch": 0.4682849842176462, - "grad_norm": 1.297765540952742, - "learning_rate": 2.3016601912573333e-06, - "loss": 1.0606, - "step": 6231 - }, - { - "epoch": 0.4683601382834811, - "grad_norm": 1.525180777794227, - "learning_rate": 2.301178909754859e-06, - "loss": 1.0226, - "step": 6232 - }, - { - "epoch": 0.4684352923493161, - "grad_norm": 1.709591061398528, - "learning_rate": 2.30069761040638e-06, - "loss": 1.0487, - "step": 6233 - }, - { - "epoch": 0.46851044641515105, - "grad_norm": 1.4958785975557938, - "learning_rate": 2.300216293240417e-06, - "loss": 0.9044, - "step": 6234 - }, - { - "epoch": 0.468585600480986, - "grad_norm": 1.5993310036371289, - "learning_rate": 2.299734958285488e-06, - "loss": 1.0361, - "step": 6235 - }, - { - "epoch": 0.468660754546821, - "grad_norm": 2.062896112493589, - "learning_rate": 2.2992536055701157e-06, - "loss": 1.0936, - "step": 6236 - }, - { - "epoch": 0.46873590861265596, - "grad_norm": 4.4604748487649655, - "learning_rate": 2.2987722351228216e-06, - "loss": 0.9883, - "step": 6237 - }, - { - "epoch": 0.46881106267849093, - "grad_norm": 1.7371568107604762, - "learning_rate": 2.298290846972128e-06, - "loss": 0.9327, - "step": 6238 - }, - { - "epoch": 0.46888621674432585, - "grad_norm": 1.5023768984211878, - "learning_rate": 2.29780944114656e-06, - "loss": 1.0908, - "step": 6239 - }, - { - "epoch": 0.4689613708101608, - "grad_norm": 2.1562979872519152, - "learning_rate": 2.2973280176746413e-06, - "loss": 0.9581, - "step": 6240 - }, - { - "epoch": 0.4690365248759958, - "grad_norm": 1.6288528250662546, - "learning_rate": 2.2968465765849e-06, - "loss": 0.8722, - "step": 6241 - }, - { - "epoch": 0.46911167894183076, - "grad_norm": 1.7060138105747995, - "learning_rate": 2.296365117905862e-06, - "loss": 1.0247, - "step": 6242 - }, - { - "epoch": 0.4691868330076657, - "grad_norm": 1.3558688493956466, - "learning_rate": 2.2958836416660556e-06, - "loss": 0.9875, - "step": 6243 - }, - { - "epoch": 0.4692619870735007, - "grad_norm": 1.5335154998762144, - "learning_rate": 2.295402147894011e-06, - "loss": 1.0656, - "step": 6244 - }, - { - "epoch": 0.4693371411393356, - "grad_norm": 2.1339573285342146, - "learning_rate": 2.294920636618257e-06, - "loss": 0.9942, - "step": 6245 - }, - { - "epoch": 0.4694122952051706, - "grad_norm": 2.036638009680858, - "learning_rate": 2.2944391078673267e-06, - "loss": 0.9957, - "step": 6246 - }, - { - "epoch": 0.46948744927100555, - "grad_norm": 1.5648042092015109, - "learning_rate": 2.2939575616697516e-06, - "loss": 0.9242, - "step": 6247 - }, - { - "epoch": 0.4695626033368405, - "grad_norm": 1.7221136298177036, - "learning_rate": 2.2934759980540654e-06, - "loss": 0.9794, - "step": 6248 - }, - { - "epoch": 0.4696377574026755, - "grad_norm": 1.835528843627352, - "learning_rate": 2.2929944170488025e-06, - "loss": 1.038, - "step": 6249 - }, - { - "epoch": 0.46971291146851046, - "grad_norm": 0.7316970697515022, - "learning_rate": 2.2925128186824983e-06, - "loss": 0.8341, - "step": 6250 - }, - { - "epoch": 0.46978806553434543, - "grad_norm": 1.965171346789183, - "learning_rate": 2.29203120298369e-06, - "loss": 0.9137, - "step": 6251 - }, - { - "epoch": 0.46986321960018035, - "grad_norm": 1.56416186646809, - "learning_rate": 2.2915495699809134e-06, - "loss": 0.9536, - "step": 6252 - }, - { - "epoch": 0.4699383736660153, - "grad_norm": 0.8792365915655205, - "learning_rate": 2.2910679197027093e-06, - "loss": 0.9504, - "step": 6253 - }, - { - "epoch": 0.4700135277318503, - "grad_norm": 6.139357574354278, - "learning_rate": 2.290586252177617e-06, - "loss": 0.9993, - "step": 6254 - }, - { - "epoch": 0.47008868179768526, - "grad_norm": 1.9091894281241286, - "learning_rate": 2.290104567434175e-06, - "loss": 1.0458, - "step": 6255 - }, - { - "epoch": 0.47016383586352023, - "grad_norm": 1.353478721566947, - "learning_rate": 2.2896228655009276e-06, - "loss": 0.8811, - "step": 6256 - }, - { - "epoch": 0.4702389899293552, - "grad_norm": 1.6572843956283314, - "learning_rate": 2.2891411464064155e-06, - "loss": 0.9572, - "step": 6257 - }, - { - "epoch": 0.4703141439951901, - "grad_norm": 1.5436010682702601, - "learning_rate": 2.2886594101791845e-06, - "loss": 0.9804, - "step": 6258 - }, - { - "epoch": 0.4703892980610251, - "grad_norm": 2.875464857915827, - "learning_rate": 2.2881776568477777e-06, - "loss": 0.9531, - "step": 6259 - }, - { - "epoch": 0.47046445212686006, - "grad_norm": 1.9427063424626054, - "learning_rate": 2.2876958864407407e-06, - "loss": 0.9913, - "step": 6260 - }, - { - "epoch": 0.470539606192695, - "grad_norm": 1.813066459796877, - "learning_rate": 2.287214098986621e-06, - "loss": 1.0254, - "step": 6261 - }, - { - "epoch": 0.47061476025853, - "grad_norm": 2.0407688995611473, - "learning_rate": 2.286732294513966e-06, - "loss": 0.989, - "step": 6262 - }, - { - "epoch": 0.47068991432436497, - "grad_norm": 0.741163088896328, - "learning_rate": 2.286250473051325e-06, - "loss": 0.8333, - "step": 6263 - }, - { - "epoch": 0.47076506839019994, - "grad_norm": 1.850862974501751, - "learning_rate": 2.2857686346272475e-06, - "loss": 1.0079, - "step": 6264 - }, - { - "epoch": 0.47084022245603485, - "grad_norm": 1.5854472638192443, - "learning_rate": 2.2852867792702835e-06, - "loss": 0.9421, - "step": 6265 - }, - { - "epoch": 0.4709153765218698, - "grad_norm": 1.3830666604819635, - "learning_rate": 2.284804907008986e-06, - "loss": 1.0034, - "step": 6266 - }, - { - "epoch": 0.4709905305877048, - "grad_norm": 1.7181940046422293, - "learning_rate": 2.2843230178719063e-06, - "loss": 0.9587, - "step": 6267 - }, - { - "epoch": 0.47106568465353976, - "grad_norm": 0.825850692615333, - "learning_rate": 2.2838411118875997e-06, - "loss": 0.8183, - "step": 6268 - }, - { - "epoch": 0.47114083871937473, - "grad_norm": 1.6661260284299306, - "learning_rate": 2.2833591890846204e-06, - "loss": 0.8864, - "step": 6269 - }, - { - "epoch": 0.4712159927852097, - "grad_norm": 1.7956091361003979, - "learning_rate": 2.282877249491523e-06, - "loss": 0.895, - "step": 6270 - }, - { - "epoch": 0.4712911468510446, - "grad_norm": 0.8025509515672199, - "learning_rate": 2.2823952931368667e-06, - "loss": 0.878, - "step": 6271 - }, - { - "epoch": 0.4713663009168796, - "grad_norm": 1.5427378034985815, - "learning_rate": 2.2819133200492073e-06, - "loss": 0.9173, - "step": 6272 - }, - { - "epoch": 0.47144145498271456, - "grad_norm": 1.9663080481165434, - "learning_rate": 2.281431330257105e-06, - "loss": 1.0115, - "step": 6273 - }, - { - "epoch": 0.47151660904854953, - "grad_norm": 2.438367933634925, - "learning_rate": 2.280949323789117e-06, - "loss": 0.9748, - "step": 6274 - }, - { - "epoch": 0.4715917631143845, - "grad_norm": 1.5696913216491388, - "learning_rate": 2.280467300673807e-06, - "loss": 0.944, - "step": 6275 - }, - { - "epoch": 0.47166691718021947, - "grad_norm": 0.7695340384031016, - "learning_rate": 2.2799852609397353e-06, - "loss": 0.8267, - "step": 6276 - }, - { - "epoch": 0.4717420712460544, - "grad_norm": 1.8810174386383742, - "learning_rate": 2.2795032046154644e-06, - "loss": 0.9661, - "step": 6277 - }, - { - "epoch": 0.47181722531188935, - "grad_norm": 2.4055758436001855, - "learning_rate": 2.279021131729559e-06, - "loss": 0.9514, - "step": 6278 - }, - { - "epoch": 0.4718923793777243, - "grad_norm": 1.7464538475991351, - "learning_rate": 2.2785390423105822e-06, - "loss": 1.0777, - "step": 6279 - }, - { - "epoch": 0.4719675334435593, - "grad_norm": 2.3846918418023924, - "learning_rate": 2.2780569363871016e-06, - "loss": 1.0476, - "step": 6280 - }, - { - "epoch": 0.47204268750939427, - "grad_norm": 0.7786235912467643, - "learning_rate": 2.277574813987682e-06, - "loss": 0.8633, - "step": 6281 - }, - { - "epoch": 0.47211784157522924, - "grad_norm": 1.6362926866341478, - "learning_rate": 2.2770926751408916e-06, - "loss": 0.9878, - "step": 6282 - }, - { - "epoch": 0.4721929956410642, - "grad_norm": 1.3715413115701498, - "learning_rate": 2.2766105198753e-06, - "loss": 1.0608, - "step": 6283 - }, - { - "epoch": 0.4722681497068991, - "grad_norm": 1.5081305291983103, - "learning_rate": 2.2761283482194747e-06, - "loss": 0.9773, - "step": 6284 - }, - { - "epoch": 0.4723433037727341, - "grad_norm": 1.697547178104414, - "learning_rate": 2.2756461602019886e-06, - "loss": 0.8924, - "step": 6285 - }, - { - "epoch": 0.47241845783856906, - "grad_norm": 1.1400169946842658, - "learning_rate": 2.2751639558514117e-06, - "loss": 0.9358, - "step": 6286 - }, - { - "epoch": 0.47249361190440403, - "grad_norm": 2.71931022046102, - "learning_rate": 2.2746817351963163e-06, - "loss": 0.9453, - "step": 6287 - }, - { - "epoch": 0.472568765970239, - "grad_norm": 1.340703571569136, - "learning_rate": 2.274199498265276e-06, - "loss": 0.9129, - "step": 6288 - }, - { - "epoch": 0.472643920036074, - "grad_norm": 2.0535809107970255, - "learning_rate": 2.2737172450868663e-06, - "loss": 1.0045, - "step": 6289 - }, - { - "epoch": 0.4727190741019089, - "grad_norm": 1.86392418347424, - "learning_rate": 2.2732349756896615e-06, - "loss": 1.0562, - "step": 6290 - }, - { - "epoch": 0.47279422816774386, - "grad_norm": 1.8733028996643846, - "learning_rate": 2.272752690102238e-06, - "loss": 0.9814, - "step": 6291 - }, - { - "epoch": 0.4728693822335788, - "grad_norm": 1.521910934493983, - "learning_rate": 2.272270388353173e-06, - "loss": 0.96, - "step": 6292 - }, - { - "epoch": 0.4729445362994138, - "grad_norm": 1.495179616669966, - "learning_rate": 2.2717880704710453e-06, - "loss": 0.9959, - "step": 6293 - }, - { - "epoch": 0.47301969036524877, - "grad_norm": 1.522007119085678, - "learning_rate": 2.2713057364844323e-06, - "loss": 0.8756, - "step": 6294 - }, - { - "epoch": 0.47309484443108374, - "grad_norm": 1.6292170312743808, - "learning_rate": 2.2708233864219175e-06, - "loss": 0.9646, - "step": 6295 - }, - { - "epoch": 0.4731699984969187, - "grad_norm": 1.6756874894097367, - "learning_rate": 2.270341020312078e-06, - "loss": 0.9893, - "step": 6296 - }, - { - "epoch": 0.4732451525627536, - "grad_norm": 1.913804577946816, - "learning_rate": 2.2698586381834993e-06, - "loss": 0.9323, - "step": 6297 - }, - { - "epoch": 0.4733203066285886, - "grad_norm": 1.9104869844841394, - "learning_rate": 2.269376240064763e-06, - "loss": 0.9712, - "step": 6298 - }, - { - "epoch": 0.47339546069442356, - "grad_norm": 3.0791957174194557, - "learning_rate": 2.2688938259844525e-06, - "loss": 1.0463, - "step": 6299 - }, - { - "epoch": 0.47347061476025853, - "grad_norm": 1.497539170633335, - "learning_rate": 2.268411395971153e-06, - "loss": 0.9189, - "step": 6300 - }, - { - "epoch": 0.4735457688260935, - "grad_norm": 1.7223753408161326, - "learning_rate": 2.2679289500534504e-06, - "loss": 0.9649, - "step": 6301 - }, - { - "epoch": 0.4736209228919285, - "grad_norm": 1.5225574005280826, - "learning_rate": 2.267446488259932e-06, - "loss": 0.8174, - "step": 6302 - }, - { - "epoch": 0.4736960769577634, - "grad_norm": 1.5985777792432507, - "learning_rate": 2.266964010619185e-06, - "loss": 1.042, - "step": 6303 - }, - { - "epoch": 0.47377123102359836, - "grad_norm": 1.6213427984286604, - "learning_rate": 2.2664815171597983e-06, - "loss": 1.0173, - "step": 6304 - }, - { - "epoch": 0.47384638508943333, - "grad_norm": 1.6368167820526014, - "learning_rate": 2.2659990079103604e-06, - "loss": 1.023, - "step": 6305 - }, - { - "epoch": 0.4739215391552683, - "grad_norm": 1.5916768221839086, - "learning_rate": 2.2655164828994635e-06, - "loss": 1.062, - "step": 6306 - }, - { - "epoch": 0.47399669322110327, - "grad_norm": 2.63629879468145, - "learning_rate": 2.2650339421556982e-06, - "loss": 1.0505, - "step": 6307 - }, - { - "epoch": 0.47407184728693824, - "grad_norm": 1.9283079088403872, - "learning_rate": 2.2645513857076567e-06, - "loss": 0.9382, - "step": 6308 - }, - { - "epoch": 0.4741470013527732, - "grad_norm": 1.4121915125932305, - "learning_rate": 2.2640688135839326e-06, - "loss": 0.8407, - "step": 6309 - }, - { - "epoch": 0.4742221554186081, - "grad_norm": 2.024766169104665, - "learning_rate": 2.26358622581312e-06, - "loss": 1.0091, - "step": 6310 - }, - { - "epoch": 0.4742973094844431, - "grad_norm": 1.6866766722511783, - "learning_rate": 2.2631036224238144e-06, - "loss": 0.9757, - "step": 6311 - }, - { - "epoch": 0.47437246355027807, - "grad_norm": 1.9353844567757525, - "learning_rate": 2.262621003444611e-06, - "loss": 0.9871, - "step": 6312 - }, - { - "epoch": 0.47444761761611304, - "grad_norm": 1.454337076004975, - "learning_rate": 2.2621383689041087e-06, - "loss": 0.9423, - "step": 6313 - }, - { - "epoch": 0.474522771681948, - "grad_norm": 4.636507718844464, - "learning_rate": 2.2616557188309033e-06, - "loss": 0.8974, - "step": 6314 - }, - { - "epoch": 0.474597925747783, - "grad_norm": 2.5628926976455286, - "learning_rate": 2.261173053253595e-06, - "loss": 0.9954, - "step": 6315 - }, - { - "epoch": 0.4746730798136179, - "grad_norm": 1.58170139971356, - "learning_rate": 2.260690372200783e-06, - "loss": 0.9597, - "step": 6316 - }, - { - "epoch": 0.47474823387945286, - "grad_norm": 1.367636034822762, - "learning_rate": 2.260207675701069e-06, - "loss": 0.9571, - "step": 6317 - }, - { - "epoch": 0.47482338794528783, - "grad_norm": 1.8031728093570685, - "learning_rate": 2.259724963783052e-06, - "loss": 0.8991, - "step": 6318 - }, - { - "epoch": 0.4748985420111228, - "grad_norm": 1.5705093336341114, - "learning_rate": 2.2592422364753377e-06, - "loss": 1.0354, - "step": 6319 - }, - { - "epoch": 0.4749736960769578, - "grad_norm": 1.5334385393511163, - "learning_rate": 2.2587594938065285e-06, - "loss": 1.0067, - "step": 6320 - }, - { - "epoch": 0.47504885014279274, - "grad_norm": 1.613969706760387, - "learning_rate": 2.2582767358052272e-06, - "loss": 1.0219, - "step": 6321 - }, - { - "epoch": 0.47512400420862766, - "grad_norm": 1.4862631584335741, - "learning_rate": 2.2577939625000414e-06, - "loss": 1.0069, - "step": 6322 - }, - { - "epoch": 0.47519915827446263, - "grad_norm": 0.6528084293455869, - "learning_rate": 2.2573111739195756e-06, - "loss": 0.803, - "step": 6323 - }, - { - "epoch": 0.4752743123402976, - "grad_norm": 1.7306800179434287, - "learning_rate": 2.2568283700924375e-06, - "loss": 1.0687, - "step": 6324 - }, - { - "epoch": 0.47534946640613257, - "grad_norm": 1.8274905416136906, - "learning_rate": 2.2563455510472353e-06, - "loss": 0.9804, - "step": 6325 - }, - { - "epoch": 0.47542462047196754, - "grad_norm": 1.3973430421203166, - "learning_rate": 2.255862716812577e-06, - "loss": 0.883, - "step": 6326 - }, - { - "epoch": 0.4754997745378025, - "grad_norm": 1.742923119399513, - "learning_rate": 2.2553798674170735e-06, - "loss": 0.9778, - "step": 6327 - }, - { - "epoch": 0.4755749286036375, - "grad_norm": 1.696753667213408, - "learning_rate": 2.2548970028893348e-06, - "loss": 0.9163, - "step": 6328 - }, - { - "epoch": 0.4756500826694724, - "grad_norm": 2.0750521549886045, - "learning_rate": 2.254414123257973e-06, - "loss": 1.0227, - "step": 6329 - }, - { - "epoch": 0.47572523673530737, - "grad_norm": 6.647879194940845, - "learning_rate": 2.2539312285516e-06, - "loss": 1.0405, - "step": 6330 - }, - { - "epoch": 0.47580039080114234, - "grad_norm": 1.7145522441185306, - "learning_rate": 2.2534483187988288e-06, - "loss": 0.9387, - "step": 6331 - }, - { - "epoch": 0.4758755448669773, - "grad_norm": 1.876237524449846, - "learning_rate": 2.2529653940282743e-06, - "loss": 1.0347, - "step": 6332 - }, - { - "epoch": 0.4759506989328123, - "grad_norm": 2.3812292623743296, - "learning_rate": 2.2524824542685515e-06, - "loss": 1.0955, - "step": 6333 - }, - { - "epoch": 0.47602585299864725, - "grad_norm": 1.3484893877250455, - "learning_rate": 2.2519994995482774e-06, - "loss": 0.868, - "step": 6334 - }, - { - "epoch": 0.47610100706448216, - "grad_norm": 0.6761695099276341, - "learning_rate": 2.2515165298960674e-06, - "loss": 0.8419, - "step": 6335 - }, - { - "epoch": 0.47617616113031713, - "grad_norm": 2.073044381608285, - "learning_rate": 2.251033545340539e-06, - "loss": 1.0483, - "step": 6336 - }, - { - "epoch": 0.4762513151961521, - "grad_norm": 1.6473294552184004, - "learning_rate": 2.2505505459103133e-06, - "loss": 1.0056, - "step": 6337 - }, - { - "epoch": 0.4763264692619871, - "grad_norm": 1.6178643473377856, - "learning_rate": 2.250067531634007e-06, - "loss": 0.8952, - "step": 6338 - }, - { - "epoch": 0.47640162332782204, - "grad_norm": 1.601989837947623, - "learning_rate": 2.249584502540242e-06, - "loss": 1.0184, - "step": 6339 - }, - { - "epoch": 0.476476777393657, - "grad_norm": 2.3974660092550732, - "learning_rate": 2.2491014586576404e-06, - "loss": 0.8994, - "step": 6340 - }, - { - "epoch": 0.476551931459492, - "grad_norm": 0.7560209747185094, - "learning_rate": 2.248618400014823e-06, - "loss": 0.8656, - "step": 6341 - }, - { - "epoch": 0.4766270855253269, - "grad_norm": 2.3088786664219807, - "learning_rate": 2.248135326640414e-06, - "loss": 0.9391, - "step": 6342 - }, - { - "epoch": 0.47670223959116187, - "grad_norm": 1.7510066543095373, - "learning_rate": 2.2476522385630354e-06, - "loss": 0.94, - "step": 6343 - }, - { - "epoch": 0.47677739365699684, - "grad_norm": 0.677576711823186, - "learning_rate": 2.2471691358113146e-06, - "loss": 0.8226, - "step": 6344 - }, - { - "epoch": 0.4768525477228318, - "grad_norm": 1.471234594116966, - "learning_rate": 2.246686018413875e-06, - "loss": 1.0093, - "step": 6345 - }, - { - "epoch": 0.4769277017886668, - "grad_norm": 2.3850045549201258, - "learning_rate": 2.246202886399345e-06, - "loss": 0.9008, - "step": 6346 - }, - { - "epoch": 0.47700285585450175, - "grad_norm": 1.7488278823317274, - "learning_rate": 2.245719739796351e-06, - "loss": 0.9078, - "step": 6347 - }, - { - "epoch": 0.47707800992033667, - "grad_norm": 1.727954976166394, - "learning_rate": 2.2452365786335214e-06, - "loss": 1.1029, - "step": 6348 - }, - { - "epoch": 0.47715316398617164, - "grad_norm": 0.7466581915760433, - "learning_rate": 2.2447534029394856e-06, - "loss": 0.8481, - "step": 6349 - }, - { - "epoch": 0.4772283180520066, - "grad_norm": 1.656418557856756, - "learning_rate": 2.244270212742873e-06, - "loss": 0.8437, - "step": 6350 - }, - { - "epoch": 0.4773034721178416, - "grad_norm": 0.7755670226727086, - "learning_rate": 2.2437870080723153e-06, - "loss": 0.8172, - "step": 6351 - }, - { - "epoch": 0.47737862618367655, - "grad_norm": 1.8618052652597428, - "learning_rate": 2.243303788956444e-06, - "loss": 0.94, - "step": 6352 - }, - { - "epoch": 0.4774537802495115, - "grad_norm": 1.6659056436029207, - "learning_rate": 2.2428205554238914e-06, - "loss": 0.8536, - "step": 6353 - }, - { - "epoch": 0.4775289343153465, - "grad_norm": 1.4030228779423601, - "learning_rate": 2.2423373075032913e-06, - "loss": 1.0025, - "step": 6354 - }, - { - "epoch": 0.4776040883811814, - "grad_norm": 2.2721180446073594, - "learning_rate": 2.241854045223277e-06, - "loss": 1.064, - "step": 6355 - }, - { - "epoch": 0.4776792424470164, - "grad_norm": 1.7894511904203279, - "learning_rate": 2.241370768612485e-06, - "loss": 1.0688, - "step": 6356 - }, - { - "epoch": 0.47775439651285134, - "grad_norm": 1.885562202639463, - "learning_rate": 2.2408874776995508e-06, - "loss": 1.013, - "step": 6357 - }, - { - "epoch": 0.4778295505786863, - "grad_norm": 2.945786819918122, - "learning_rate": 2.2404041725131106e-06, - "loss": 1.0514, - "step": 6358 - }, - { - "epoch": 0.4779047046445213, - "grad_norm": 1.3178758879069432, - "learning_rate": 2.239920853081803e-06, - "loss": 0.9952, - "step": 6359 - }, - { - "epoch": 0.47797985871035625, - "grad_norm": 1.5628352759184672, - "learning_rate": 2.2394375194342653e-06, - "loss": 0.995, - "step": 6360 - }, - { - "epoch": 0.47805501277619117, - "grad_norm": 1.4491132096266928, - "learning_rate": 2.2389541715991385e-06, - "loss": 0.992, - "step": 6361 - }, - { - "epoch": 0.47813016684202614, - "grad_norm": 4.849171612969709, - "learning_rate": 2.238470809605062e-06, - "loss": 0.9424, - "step": 6362 - }, - { - "epoch": 0.4782053209078611, - "grad_norm": 1.9118321717557232, - "learning_rate": 2.2379874334806764e-06, - "loss": 0.9216, - "step": 6363 - }, - { - "epoch": 0.4782804749736961, - "grad_norm": 1.5563004910654628, - "learning_rate": 2.237504043254625e-06, - "loss": 1.0771, - "step": 6364 - }, - { - "epoch": 0.47835562903953105, - "grad_norm": 1.5894842467749877, - "learning_rate": 2.2370206389555485e-06, - "loss": 0.7828, - "step": 6365 - }, - { - "epoch": 0.478430783105366, - "grad_norm": 1.8453408829382076, - "learning_rate": 2.2365372206120923e-06, - "loss": 0.9991, - "step": 6366 - }, - { - "epoch": 0.47850593717120093, - "grad_norm": 1.4883597445876726, - "learning_rate": 2.2360537882528996e-06, - "loss": 1.0276, - "step": 6367 - }, - { - "epoch": 0.4785810912370359, - "grad_norm": 1.669028238253044, - "learning_rate": 2.2355703419066163e-06, - "loss": 1.1188, - "step": 6368 - }, - { - "epoch": 0.4786562453028709, - "grad_norm": 1.8950299737215506, - "learning_rate": 2.2350868816018886e-06, - "loss": 0.8968, - "step": 6369 - }, - { - "epoch": 0.47873139936870585, - "grad_norm": 0.87862646100016, - "learning_rate": 2.234603407367362e-06, - "loss": 0.8971, - "step": 6370 - }, - { - "epoch": 0.4788065534345408, - "grad_norm": 3.242774413885898, - "learning_rate": 2.234119919231686e-06, - "loss": 1.0122, - "step": 6371 - }, - { - "epoch": 0.4788817075003758, - "grad_norm": 1.6415151451915728, - "learning_rate": 2.2336364172235074e-06, - "loss": 0.9316, - "step": 6372 - }, - { - "epoch": 0.47895686156621076, - "grad_norm": 2.0375731425818433, - "learning_rate": 2.2331529013714775e-06, - "loss": 1.032, - "step": 6373 - }, - { - "epoch": 0.47903201563204567, - "grad_norm": 1.452604221012538, - "learning_rate": 2.2326693717042446e-06, - "loss": 0.9336, - "step": 6374 - }, - { - "epoch": 0.47910716969788064, - "grad_norm": 1.6057437559098757, - "learning_rate": 2.2321858282504603e-06, - "loss": 0.9022, - "step": 6375 - }, - { - "epoch": 0.4791823237637156, - "grad_norm": 0.7265275516103304, - "learning_rate": 2.231702271038777e-06, - "loss": 0.8344, - "step": 6376 - }, - { - "epoch": 0.4792574778295506, - "grad_norm": 1.7342096198336814, - "learning_rate": 2.2312187000978467e-06, - "loss": 1.0162, - "step": 6377 - }, - { - "epoch": 0.47933263189538555, - "grad_norm": 1.6659612434339592, - "learning_rate": 2.230735115456324e-06, - "loss": 0.9533, - "step": 6378 - }, - { - "epoch": 0.4794077859612205, - "grad_norm": 1.8552928319154856, - "learning_rate": 2.2302515171428613e-06, - "loss": 0.9564, - "step": 6379 - }, - { - "epoch": 0.47948294002705544, - "grad_norm": 1.4686388825934067, - "learning_rate": 2.229767905186114e-06, - "loss": 0.966, - "step": 6380 - }, - { - "epoch": 0.4795580940928904, - "grad_norm": 2.7711338845834605, - "learning_rate": 2.2292842796147395e-06, - "loss": 0.8771, - "step": 6381 - }, - { - "epoch": 0.4796332481587254, - "grad_norm": 1.4456098096076655, - "learning_rate": 2.2288006404573922e-06, - "loss": 0.9706, - "step": 6382 - }, - { - "epoch": 0.47970840222456035, - "grad_norm": 1.4240191714785744, - "learning_rate": 2.228316987742732e-06, - "loss": 1.0487, - "step": 6383 - }, - { - "epoch": 0.4797835562903953, - "grad_norm": 1.53446871282925, - "learning_rate": 2.227833321499415e-06, - "loss": 0.9986, - "step": 6384 - }, - { - "epoch": 0.4798587103562303, - "grad_norm": 1.98981654879097, - "learning_rate": 2.227349641756102e-06, - "loss": 0.7955, - "step": 6385 - }, - { - "epoch": 0.47993386442206526, - "grad_norm": 1.6248613445108164, - "learning_rate": 2.2268659485414526e-06, - "loss": 1.0022, - "step": 6386 - }, - { - "epoch": 0.4800090184879002, - "grad_norm": 1.617271458017437, - "learning_rate": 2.226382241884126e-06, - "loss": 1.0101, - "step": 6387 - }, - { - "epoch": 0.48008417255373514, - "grad_norm": 1.8481925847245229, - "learning_rate": 2.225898521812785e-06, - "loss": 1.0449, - "step": 6388 - }, - { - "epoch": 0.4801593266195701, - "grad_norm": 1.8935104294120257, - "learning_rate": 2.2254147883560916e-06, - "loss": 1.0018, - "step": 6389 - }, - { - "epoch": 0.4802344806854051, - "grad_norm": 1.4347247825436409, - "learning_rate": 2.2249310415427087e-06, - "loss": 0.9856, - "step": 6390 - }, - { - "epoch": 0.48030963475124006, - "grad_norm": 2.002063640242257, - "learning_rate": 2.224447281401301e-06, - "loss": 0.854, - "step": 6391 - }, - { - "epoch": 0.480384788817075, - "grad_norm": 1.4794993586098897, - "learning_rate": 2.2239635079605316e-06, - "loss": 1.0088, - "step": 6392 - }, - { - "epoch": 0.48045994288290994, - "grad_norm": 1.6448842301836282, - "learning_rate": 2.223479721249067e-06, - "loss": 0.8516, - "step": 6393 - }, - { - "epoch": 0.4805350969487449, - "grad_norm": 1.8175528240267025, - "learning_rate": 2.222995921295573e-06, - "loss": 0.9024, - "step": 6394 - }, - { - "epoch": 0.4806102510145799, - "grad_norm": 1.8365115303157413, - "learning_rate": 2.2225121081287174e-06, - "loss": 0.9073, - "step": 6395 - }, - { - "epoch": 0.48068540508041485, - "grad_norm": 2.115337844811762, - "learning_rate": 2.2220282817771668e-06, - "loss": 0.9492, - "step": 6396 - }, - { - "epoch": 0.4807605591462498, - "grad_norm": 1.6572343312465228, - "learning_rate": 2.2215444422695906e-06, - "loss": 0.8553, - "step": 6397 - }, - { - "epoch": 0.4808357132120848, - "grad_norm": 2.3916661606986365, - "learning_rate": 2.2210605896346575e-06, - "loss": 0.9964, - "step": 6398 - }, - { - "epoch": 0.48091086727791976, - "grad_norm": 1.7613983201222592, - "learning_rate": 2.2205767239010376e-06, - "loss": 1.0558, - "step": 6399 - }, - { - "epoch": 0.4809860213437547, - "grad_norm": 1.9776276142403755, - "learning_rate": 2.2200928450974028e-06, - "loss": 1.0683, - "step": 6400 - }, - { - "epoch": 0.48106117540958965, - "grad_norm": 2.1512538137185993, - "learning_rate": 2.2196089532524244e-06, - "loss": 1.0193, - "step": 6401 - }, - { - "epoch": 0.4811363294754246, - "grad_norm": 1.7595914527994192, - "learning_rate": 2.2191250483947736e-06, - "loss": 1.0435, - "step": 6402 - }, - { - "epoch": 0.4812114835412596, - "grad_norm": 1.6907291598854004, - "learning_rate": 2.2186411305531254e-06, - "loss": 1.0264, - "step": 6403 - }, - { - "epoch": 0.48128663760709456, - "grad_norm": 1.2431033135349785, - "learning_rate": 2.2181571997561523e-06, - "loss": 0.9916, - "step": 6404 - }, - { - "epoch": 0.48136179167292953, - "grad_norm": 1.6596776653925693, - "learning_rate": 2.2176732560325302e-06, - "loss": 1.0821, - "step": 6405 - }, - { - "epoch": 0.48143694573876444, - "grad_norm": 1.4357798692192423, - "learning_rate": 2.217189299410934e-06, - "loss": 0.9332, - "step": 6406 - }, - { - "epoch": 0.4815120998045994, - "grad_norm": 2.6332677792787607, - "learning_rate": 2.21670532992004e-06, - "loss": 0.9791, - "step": 6407 - }, - { - "epoch": 0.4815872538704344, - "grad_norm": 4.076673227358446, - "learning_rate": 2.2162213475885262e-06, - "loss": 1.0862, - "step": 6408 - }, - { - "epoch": 0.48166240793626935, - "grad_norm": 1.7871989576889933, - "learning_rate": 2.215737352445069e-06, - "loss": 0.8562, - "step": 6409 - }, - { - "epoch": 0.4817375620021043, - "grad_norm": 1.786062398766686, - "learning_rate": 2.2152533445183477e-06, - "loss": 1.0027, - "step": 6410 - }, - { - "epoch": 0.4818127160679393, - "grad_norm": 1.4803088260320774, - "learning_rate": 2.214769323837041e-06, - "loss": 0.9986, - "step": 6411 - }, - { - "epoch": 0.4818878701337742, - "grad_norm": 0.7633512170090422, - "learning_rate": 2.21428529042983e-06, - "loss": 0.8468, - "step": 6412 - }, - { - "epoch": 0.4819630241996092, - "grad_norm": 1.587387399359637, - "learning_rate": 2.213801244325395e-06, - "loss": 0.9875, - "step": 6413 - }, - { - "epoch": 0.48203817826544415, - "grad_norm": 1.9379868049644793, - "learning_rate": 2.2133171855524167e-06, - "loss": 1.0378, - "step": 6414 - }, - { - "epoch": 0.4821133323312791, - "grad_norm": 0.7271873494448504, - "learning_rate": 2.21283311413958e-06, - "loss": 0.8325, - "step": 6415 - }, - { - "epoch": 0.4821884863971141, - "grad_norm": 1.4274382695147088, - "learning_rate": 2.2123490301155647e-06, - "loss": 0.8606, - "step": 6416 - }, - { - "epoch": 0.48226364046294906, - "grad_norm": 1.6957235936579573, - "learning_rate": 2.2118649335090568e-06, - "loss": 1.0334, - "step": 6417 - }, - { - "epoch": 0.48233879452878403, - "grad_norm": 1.8138824978154864, - "learning_rate": 2.2113808243487404e-06, - "loss": 1.0128, - "step": 6418 - }, - { - "epoch": 0.48241394859461895, - "grad_norm": 2.1476556638492745, - "learning_rate": 2.210896702663301e-06, - "loss": 1.0053, - "step": 6419 - }, - { - "epoch": 0.4824891026604539, - "grad_norm": 1.712485810381165, - "learning_rate": 2.2104125684814238e-06, - "loss": 0.9279, - "step": 6420 - }, - { - "epoch": 0.4825642567262889, - "grad_norm": 1.5606416714060056, - "learning_rate": 2.209928421831796e-06, - "loss": 1.0253, - "step": 6421 - }, - { - "epoch": 0.48263941079212386, - "grad_norm": 1.5251016770533539, - "learning_rate": 2.209444262743106e-06, - "loss": 1.0044, - "step": 6422 - }, - { - "epoch": 0.4827145648579588, - "grad_norm": 1.3952618563048664, - "learning_rate": 2.2089600912440413e-06, - "loss": 0.8717, - "step": 6423 - }, - { - "epoch": 0.4827897189237938, - "grad_norm": 2.4932716427105337, - "learning_rate": 2.2084759073632904e-06, - "loss": 0.9544, - "step": 6424 - }, - { - "epoch": 0.4828648729896287, - "grad_norm": 1.746459171708, - "learning_rate": 2.2079917111295448e-06, - "loss": 0.9765, - "step": 6425 - }, - { - "epoch": 0.4829400270554637, - "grad_norm": 1.7246347427662534, - "learning_rate": 2.2075075025714922e-06, - "loss": 0.8314, - "step": 6426 - }, - { - "epoch": 0.48301518112129865, - "grad_norm": 1.9360958056192303, - "learning_rate": 2.2070232817178272e-06, - "loss": 1.0678, - "step": 6427 - }, - { - "epoch": 0.4830903351871336, - "grad_norm": 1.6280161669300974, - "learning_rate": 2.206539048597239e-06, - "loss": 0.9588, - "step": 6428 - }, - { - "epoch": 0.4831654892529686, - "grad_norm": 1.7240495302294323, - "learning_rate": 2.206054803238422e-06, - "loss": 0.9988, - "step": 6429 - }, - { - "epoch": 0.48324064331880356, - "grad_norm": 0.8008072267366321, - "learning_rate": 2.2055705456700686e-06, - "loss": 0.8986, - "step": 6430 - }, - { - "epoch": 0.48331579738463853, - "grad_norm": 0.8998061660148342, - "learning_rate": 2.2050862759208728e-06, - "loss": 0.8202, - "step": 6431 - }, - { - "epoch": 0.48339095145047345, - "grad_norm": 1.8215977138767967, - "learning_rate": 2.2046019940195303e-06, - "loss": 1.0611, - "step": 6432 - }, - { - "epoch": 0.4834661055163084, - "grad_norm": 1.6712942808300926, - "learning_rate": 2.2041176999947353e-06, - "loss": 0.9046, - "step": 6433 - }, - { - "epoch": 0.4835412595821434, - "grad_norm": 1.3976923912838515, - "learning_rate": 2.203633393875186e-06, - "loss": 1.0474, - "step": 6434 - }, - { - "epoch": 0.48361641364797836, - "grad_norm": 1.6964614420409516, - "learning_rate": 2.2031490756895784e-06, - "loss": 1.0027, - "step": 6435 - }, - { - "epoch": 0.48369156771381333, - "grad_norm": 3.276134608920861, - "learning_rate": 2.2026647454666097e-06, - "loss": 1.0391, - "step": 6436 - }, - { - "epoch": 0.4837667217796483, - "grad_norm": 1.6043032385748899, - "learning_rate": 2.202180403234979e-06, - "loss": 0.944, - "step": 6437 - }, - { - "epoch": 0.4838418758454832, - "grad_norm": 1.3870108283403377, - "learning_rate": 2.2016960490233845e-06, - "loss": 0.9838, - "step": 6438 - }, - { - "epoch": 0.4839170299113182, - "grad_norm": 1.7724970538030116, - "learning_rate": 2.2012116828605275e-06, - "loss": 0.9873, - "step": 6439 - }, - { - "epoch": 0.48399218397715316, - "grad_norm": 1.4872190835303964, - "learning_rate": 2.200727304775108e-06, - "loss": 0.9586, - "step": 6440 - }, - { - "epoch": 0.4840673380429881, - "grad_norm": 1.6894210194971455, - "learning_rate": 2.200242914795826e-06, - "loss": 0.9196, - "step": 6441 - }, - { - "epoch": 0.4841424921088231, - "grad_norm": 2.152341866881833, - "learning_rate": 2.1997585129513852e-06, - "loss": 0.8764, - "step": 6442 - }, - { - "epoch": 0.48421764617465807, - "grad_norm": 1.8086143805560988, - "learning_rate": 2.1992740992704877e-06, - "loss": 0.9648, - "step": 6443 - }, - { - "epoch": 0.48429280024049304, - "grad_norm": 1.8436381419243186, - "learning_rate": 2.1987896737818365e-06, - "loss": 1.1255, - "step": 6444 - }, - { - "epoch": 0.48436795430632795, - "grad_norm": 2.150343171166914, - "learning_rate": 2.198305236514136e-06, - "loss": 1.111, - "step": 6445 - }, - { - "epoch": 0.4844431083721629, - "grad_norm": 0.6838497227574973, - "learning_rate": 2.1978207874960908e-06, - "loss": 0.8017, - "step": 6446 - }, - { - "epoch": 0.4845182624379979, - "grad_norm": 1.3067657912537036, - "learning_rate": 2.1973363267564063e-06, - "loss": 1.0187, - "step": 6447 - }, - { - "epoch": 0.48459341650383286, - "grad_norm": 2.390470611972484, - "learning_rate": 2.196851854323789e-06, - "loss": 1.0694, - "step": 6448 - }, - { - "epoch": 0.48466857056966783, - "grad_norm": 2.150759465238554, - "learning_rate": 2.1963673702269454e-06, - "loss": 1.0088, - "step": 6449 - }, - { - "epoch": 0.4847437246355028, - "grad_norm": 1.7333910788796272, - "learning_rate": 2.195882874494583e-06, - "loss": 0.9514, - "step": 6450 - }, - { - "epoch": 0.4848188787013377, - "grad_norm": 0.8893931385798988, - "learning_rate": 2.195398367155411e-06, - "loss": 0.8844, - "step": 6451 - }, - { - "epoch": 0.4848940327671727, - "grad_norm": 1.3593929333435666, - "learning_rate": 2.194913848238137e-06, - "loss": 0.934, - "step": 6452 - }, - { - "epoch": 0.48496918683300766, - "grad_norm": 1.7099186201975707, - "learning_rate": 2.194429317771471e-06, - "loss": 0.9263, - "step": 6453 - }, - { - "epoch": 0.48504434089884263, - "grad_norm": 1.6461930022803632, - "learning_rate": 2.1939447757841236e-06, - "loss": 1.0444, - "step": 6454 - }, - { - "epoch": 0.4851194949646776, - "grad_norm": 1.3866087963946037, - "learning_rate": 2.1934602223048054e-06, - "loss": 0.9497, - "step": 6455 - }, - { - "epoch": 0.48519464903051257, - "grad_norm": 1.6069152701163967, - "learning_rate": 2.1929756573622282e-06, - "loss": 0.9771, - "step": 6456 - }, - { - "epoch": 0.4852698030963475, - "grad_norm": 1.3597603175085167, - "learning_rate": 2.192491080985105e-06, - "loss": 0.9825, - "step": 6457 - }, - { - "epoch": 0.48534495716218246, - "grad_norm": 1.607220438946592, - "learning_rate": 2.192006493202147e-06, - "loss": 1.0041, - "step": 6458 - }, - { - "epoch": 0.4854201112280174, - "grad_norm": 1.9274907136883903, - "learning_rate": 2.19152189404207e-06, - "loss": 0.9275, - "step": 6459 - }, - { - "epoch": 0.4854952652938524, - "grad_norm": 1.4794077430521433, - "learning_rate": 2.191037283533587e-06, - "loss": 1.0156, - "step": 6460 - }, - { - "epoch": 0.48557041935968737, - "grad_norm": 2.4400348687416433, - "learning_rate": 2.1905526617054136e-06, - "loss": 0.891, - "step": 6461 - }, - { - "epoch": 0.48564557342552234, - "grad_norm": 2.466267556902828, - "learning_rate": 2.1900680285862655e-06, - "loss": 0.9815, - "step": 6462 - }, - { - "epoch": 0.4857207274913573, - "grad_norm": 1.9840197508005741, - "learning_rate": 2.1895833842048583e-06, - "loss": 1.026, - "step": 6463 - }, - { - "epoch": 0.4857958815571922, - "grad_norm": 1.653315193181863, - "learning_rate": 2.18909872858991e-06, - "loss": 1.0162, - "step": 6464 - }, - { - "epoch": 0.4858710356230272, - "grad_norm": 8.719462703985345, - "learning_rate": 2.1886140617701374e-06, - "loss": 0.9588, - "step": 6465 - }, - { - "epoch": 0.48594618968886216, - "grad_norm": 10.779930498398075, - "learning_rate": 2.1881293837742604e-06, - "loss": 0.8524, - "step": 6466 - }, - { - "epoch": 0.48602134375469713, - "grad_norm": 1.9379563563248208, - "learning_rate": 2.1876446946309965e-06, - "loss": 1.0398, - "step": 6467 - }, - { - "epoch": 0.4860964978205321, - "grad_norm": 1.3295728084889222, - "learning_rate": 2.187159994369065e-06, - "loss": 0.9289, - "step": 6468 - }, - { - "epoch": 0.4861716518863671, - "grad_norm": 2.3427680438076197, - "learning_rate": 2.1866752830171884e-06, - "loss": 0.8907, - "step": 6469 - }, - { - "epoch": 0.486246805952202, - "grad_norm": 1.5987302718984668, - "learning_rate": 2.1861905606040857e-06, - "loss": 1.0081, - "step": 6470 - }, - { - "epoch": 0.48632196001803696, - "grad_norm": 1.7918184201342562, - "learning_rate": 2.18570582715848e-06, - "loss": 1.0111, - "step": 6471 - }, - { - "epoch": 0.48639711408387193, - "grad_norm": 2.1578852768985692, - "learning_rate": 2.1852210827090922e-06, - "loss": 1.0125, - "step": 6472 - }, - { - "epoch": 0.4864722681497069, - "grad_norm": 1.9705024065043175, - "learning_rate": 2.1847363272846465e-06, - "loss": 0.8809, - "step": 6473 - }, - { - "epoch": 0.48654742221554187, - "grad_norm": 1.9306592463663375, - "learning_rate": 2.184251560913866e-06, - "loss": 0.9549, - "step": 6474 - }, - { - "epoch": 0.48662257628137684, - "grad_norm": 4.8533708086418414, - "learning_rate": 2.1837667836254746e-06, - "loss": 1.0061, - "step": 6475 - }, - { - "epoch": 0.4866977303472118, - "grad_norm": 1.7229968921414163, - "learning_rate": 2.183281995448198e-06, - "loss": 0.9833, - "step": 6476 - }, - { - "epoch": 0.4867728844130467, - "grad_norm": 2.714303073970953, - "learning_rate": 2.1827971964107607e-06, - "loss": 0.898, - "step": 6477 - }, - { - "epoch": 0.4868480384788817, - "grad_norm": 2.378512946628686, - "learning_rate": 2.1823123865418903e-06, - "loss": 0.9942, - "step": 6478 - }, - { - "epoch": 0.48692319254471667, - "grad_norm": 1.585762126979077, - "learning_rate": 2.1818275658703128e-06, - "loss": 1.068, - "step": 6479 - }, - { - "epoch": 0.48699834661055164, - "grad_norm": 2.046698293265523, - "learning_rate": 2.1813427344247557e-06, - "loss": 1.0511, - "step": 6480 - }, - { - "epoch": 0.4870735006763866, - "grad_norm": 0.8744603604689023, - "learning_rate": 2.1808578922339466e-06, - "loss": 0.8878, - "step": 6481 - }, - { - "epoch": 0.4871486547422216, - "grad_norm": 1.3210027120794832, - "learning_rate": 2.180373039326615e-06, - "loss": 0.9837, - "step": 6482 - }, - { - "epoch": 0.4872238088080565, - "grad_norm": 1.3310991980876317, - "learning_rate": 2.1798881757314905e-06, - "loss": 0.909, - "step": 6483 - }, - { - "epoch": 0.48729896287389146, - "grad_norm": 1.5460688484426417, - "learning_rate": 2.1794033014773033e-06, - "loss": 0.9998, - "step": 6484 - }, - { - "epoch": 0.48737411693972643, - "grad_norm": 2.0156214514412802, - "learning_rate": 2.1789184165927824e-06, - "loss": 0.8402, - "step": 6485 - }, - { - "epoch": 0.4874492710055614, - "grad_norm": 2.142754936051461, - "learning_rate": 2.178433521106661e-06, - "loss": 1.0635, - "step": 6486 - }, - { - "epoch": 0.4875244250713964, - "grad_norm": 1.7396500245366835, - "learning_rate": 2.177948615047669e-06, - "loss": 1.0555, - "step": 6487 - }, - { - "epoch": 0.48759957913723134, - "grad_norm": 1.8398663626737048, - "learning_rate": 2.1774636984445417e-06, - "loss": 0.9936, - "step": 6488 - }, - { - "epoch": 0.4876747332030663, - "grad_norm": 2.027063956826399, - "learning_rate": 2.1769787713260097e-06, - "loss": 1.0778, - "step": 6489 - }, - { - "epoch": 0.4877498872689012, - "grad_norm": 1.6505051793176373, - "learning_rate": 2.1764938337208075e-06, - "loss": 1.0347, - "step": 6490 - }, - { - "epoch": 0.4878250413347362, - "grad_norm": 1.6892199052131533, - "learning_rate": 2.1760088856576706e-06, - "loss": 1.0604, - "step": 6491 - }, - { - "epoch": 0.48790019540057117, - "grad_norm": 1.2653135693583983, - "learning_rate": 2.175523927165333e-06, - "loss": 0.8999, - "step": 6492 - }, - { - "epoch": 0.48797534946640614, - "grad_norm": 1.6544928676697044, - "learning_rate": 2.1750389582725307e-06, - "loss": 0.9203, - "step": 6493 - }, - { - "epoch": 0.4880505035322411, - "grad_norm": 0.7376752515857252, - "learning_rate": 2.1745539790079993e-06, - "loss": 0.8123, - "step": 6494 - }, - { - "epoch": 0.4881256575980761, - "grad_norm": 2.214214255678535, - "learning_rate": 2.1740689894004773e-06, - "loss": 1.0144, - "step": 6495 - }, - { - "epoch": 0.488200811663911, - "grad_norm": 1.441020634870747, - "learning_rate": 2.1735839894787003e-06, - "loss": 0.9911, - "step": 6496 - }, - { - "epoch": 0.48827596572974596, - "grad_norm": 1.8439586973881328, - "learning_rate": 2.1730989792714074e-06, - "loss": 1.0378, - "step": 6497 - }, - { - "epoch": 0.48835111979558093, - "grad_norm": 1.3409524228775391, - "learning_rate": 2.1726139588073374e-06, - "loss": 0.9265, - "step": 6498 - }, - { - "epoch": 0.4884262738614159, - "grad_norm": 1.9207201954034747, - "learning_rate": 2.172128928115229e-06, - "loss": 0.9433, - "step": 6499 - }, - { - "epoch": 0.4885014279272509, - "grad_norm": 0.663231138531299, - "learning_rate": 2.1716438872238227e-06, - "loss": 0.7854, - "step": 6500 - }, - { - "epoch": 0.48857658199308585, - "grad_norm": 2.1916551635595027, - "learning_rate": 2.171158836161859e-06, - "loss": 0.9731, - "step": 6501 - }, - { - "epoch": 0.48865173605892076, - "grad_norm": 0.7768078974671027, - "learning_rate": 2.1706737749580783e-06, - "loss": 0.9014, - "step": 6502 - }, - { - "epoch": 0.48872689012475573, - "grad_norm": 3.949980921638867, - "learning_rate": 2.1701887036412236e-06, - "loss": 1.0605, - "step": 6503 - }, - { - "epoch": 0.4888020441905907, - "grad_norm": 1.887005707142361, - "learning_rate": 2.1697036222400355e-06, - "loss": 1.0229, - "step": 6504 - }, - { - "epoch": 0.48887719825642567, - "grad_norm": 2.013312380768, - "learning_rate": 2.169218530783259e-06, - "loss": 1.0123, - "step": 6505 - }, - { - "epoch": 0.48895235232226064, - "grad_norm": 1.541120441000256, - "learning_rate": 2.1687334292996363e-06, - "loss": 0.913, - "step": 6506 - }, - { - "epoch": 0.4890275063880956, - "grad_norm": 1.5158031398261005, - "learning_rate": 2.168248317817911e-06, - "loss": 0.9511, - "step": 6507 - }, - { - "epoch": 0.4891026604539306, - "grad_norm": 0.8202564401998692, - "learning_rate": 2.1677631963668298e-06, - "loss": 0.9618, - "step": 6508 - }, - { - "epoch": 0.4891778145197655, - "grad_norm": 2.2983450823680207, - "learning_rate": 2.1672780649751353e-06, - "loss": 0.9729, - "step": 6509 - }, - { - "epoch": 0.48925296858560047, - "grad_norm": 1.7337062336761382, - "learning_rate": 2.166792923671576e-06, - "loss": 0.946, - "step": 6510 - }, - { - "epoch": 0.48932812265143544, - "grad_norm": 1.723779982208573, - "learning_rate": 2.166307772484898e-06, - "loss": 0.9944, - "step": 6511 - }, - { - "epoch": 0.4894032767172704, - "grad_norm": 1.5416212161126046, - "learning_rate": 2.1658226114438457e-06, - "loss": 1.0305, - "step": 6512 - }, - { - "epoch": 0.4894784307831054, - "grad_norm": 1.8798084908197972, - "learning_rate": 2.1653374405771696e-06, - "loss": 1.0659, - "step": 6513 - }, - { - "epoch": 0.48955358484894035, - "grad_norm": 1.7496220316751685, - "learning_rate": 2.1648522599136173e-06, - "loss": 0.8521, - "step": 6514 - }, - { - "epoch": 0.48962873891477526, - "grad_norm": 1.6064767017022632, - "learning_rate": 2.1643670694819375e-06, - "loss": 0.96, - "step": 6515 - }, - { - "epoch": 0.48970389298061023, - "grad_norm": 1.447420634464118, - "learning_rate": 2.163881869310879e-06, - "loss": 1.0279, - "step": 6516 - }, - { - "epoch": 0.4897790470464452, - "grad_norm": 1.5401797039904488, - "learning_rate": 2.163396659429192e-06, - "loss": 0.9826, - "step": 6517 - }, - { - "epoch": 0.4898542011122802, - "grad_norm": 1.8023166689183772, - "learning_rate": 2.162911439865628e-06, - "loss": 0.9975, - "step": 6518 - }, - { - "epoch": 0.48992935517811514, - "grad_norm": 2.2754340040795267, - "learning_rate": 2.1624262106489368e-06, - "loss": 0.9019, - "step": 6519 - }, - { - "epoch": 0.4900045092439501, - "grad_norm": 1.7502221920102115, - "learning_rate": 2.161940971807871e-06, - "loss": 1.0103, - "step": 6520 - }, - { - "epoch": 0.4900796633097851, - "grad_norm": 2.0602904535355417, - "learning_rate": 2.1614557233711817e-06, - "loss": 0.9288, - "step": 6521 - }, - { - "epoch": 0.49015481737562, - "grad_norm": 3.3877648450633018, - "learning_rate": 2.1609704653676234e-06, - "loss": 0.9724, - "step": 6522 - }, - { - "epoch": 0.49022997144145497, - "grad_norm": 1.4733593625722465, - "learning_rate": 2.1604851978259485e-06, - "loss": 0.8662, - "step": 6523 - }, - { - "epoch": 0.49030512550728994, - "grad_norm": 2.1519355179084907, - "learning_rate": 2.159999920774911e-06, - "loss": 0.9579, - "step": 6524 - }, - { - "epoch": 0.4903802795731249, - "grad_norm": 1.8468630743529215, - "learning_rate": 2.1595146342432655e-06, - "loss": 1.0096, - "step": 6525 - }, - { - "epoch": 0.4904554336389599, - "grad_norm": 1.78760539190999, - "learning_rate": 2.1590293382597667e-06, - "loss": 1.0256, - "step": 6526 - }, - { - "epoch": 0.49053058770479485, - "grad_norm": 2.1705733316226983, - "learning_rate": 2.158544032853171e-06, - "loss": 1.0226, - "step": 6527 - }, - { - "epoch": 0.49060574177062977, - "grad_norm": 2.3126114483345486, - "learning_rate": 2.1580587180522345e-06, - "loss": 0.8795, - "step": 6528 - }, - { - "epoch": 0.49068089583646474, - "grad_norm": 1.4874869402143838, - "learning_rate": 2.1575733938857134e-06, - "loss": 1.0679, - "step": 6529 - }, - { - "epoch": 0.4907560499022997, - "grad_norm": 1.4002014628278057, - "learning_rate": 2.1570880603823654e-06, - "loss": 0.9909, - "step": 6530 - }, - { - "epoch": 0.4908312039681347, - "grad_norm": 2.431553752527018, - "learning_rate": 2.156602717570948e-06, - "loss": 0.8724, - "step": 6531 - }, - { - "epoch": 0.49090635803396965, - "grad_norm": 2.9062135755655945, - "learning_rate": 2.15611736548022e-06, - "loss": 0.946, - "step": 6532 - }, - { - "epoch": 0.4909815120998046, - "grad_norm": 2.3672442035996872, - "learning_rate": 2.1556320041389407e-06, - "loss": 1.0372, - "step": 6533 - }, - { - "epoch": 0.4910566661656396, - "grad_norm": 1.5018898661651998, - "learning_rate": 2.155146633575869e-06, - "loss": 0.9648, - "step": 6534 - }, - { - "epoch": 0.4911318202314745, - "grad_norm": 1.9772881544143126, - "learning_rate": 2.154661253819765e-06, - "loss": 0.8152, - "step": 6535 - }, - { - "epoch": 0.4912069742973095, - "grad_norm": 1.6179675740928927, - "learning_rate": 2.1541758648993895e-06, - "loss": 1.0443, - "step": 6536 - }, - { - "epoch": 0.49128212836314444, - "grad_norm": 1.6069663828886567, - "learning_rate": 2.1536904668435035e-06, - "loss": 0.9545, - "step": 6537 - }, - { - "epoch": 0.4913572824289794, - "grad_norm": 3.110533685307729, - "learning_rate": 2.153205059680869e-06, - "loss": 1.0721, - "step": 6538 - }, - { - "epoch": 0.4914324364948144, - "grad_norm": 1.4631669276315071, - "learning_rate": 2.1527196434402483e-06, - "loss": 0.9912, - "step": 6539 - }, - { - "epoch": 0.49150759056064935, - "grad_norm": 1.8189269592611, - "learning_rate": 2.152234218150404e-06, - "loss": 0.8633, - "step": 6540 - }, - { - "epoch": 0.49158274462648427, - "grad_norm": 9.308965024855576, - "learning_rate": 2.1517487838400984e-06, - "loss": 1.0101, - "step": 6541 - }, - { - "epoch": 0.49165789869231924, - "grad_norm": 1.6446735383951852, - "learning_rate": 2.1512633405380972e-06, - "loss": 1.0181, - "step": 6542 - }, - { - "epoch": 0.4917330527581542, - "grad_norm": 3.8160616388201087, - "learning_rate": 2.1507778882731635e-06, - "loss": 0.9672, - "step": 6543 - }, - { - "epoch": 0.4918082068239892, - "grad_norm": 2.0316737979996375, - "learning_rate": 2.1502924270740626e-06, - "loss": 0.9365, - "step": 6544 - }, - { - "epoch": 0.49188336088982415, - "grad_norm": 2.46732163454196, - "learning_rate": 2.1498069569695605e-06, - "loss": 0.9241, - "step": 6545 - }, - { - "epoch": 0.4919585149556591, - "grad_norm": 0.7475472716627098, - "learning_rate": 2.149321477988421e-06, - "loss": 0.8858, - "step": 6546 - }, - { - "epoch": 0.49203366902149404, - "grad_norm": 2.023745598037202, - "learning_rate": 2.1488359901594137e-06, - "loss": 0.9729, - "step": 6547 - }, - { - "epoch": 0.492108823087329, - "grad_norm": 1.5839514111570765, - "learning_rate": 2.148350493511303e-06, - "loss": 0.9582, - "step": 6548 - }, - { - "epoch": 0.492183977153164, - "grad_norm": 4.7949359963514055, - "learning_rate": 2.1478649880728582e-06, - "loss": 1.0454, - "step": 6549 - }, - { - "epoch": 0.49225913121899895, - "grad_norm": 1.792589334472402, - "learning_rate": 2.1473794738728466e-06, - "loss": 1.0364, - "step": 6550 - }, - { - "epoch": 0.4923342852848339, - "grad_norm": 1.3737122834766595, - "learning_rate": 2.1468939509400363e-06, - "loss": 1.0263, - "step": 6551 - }, - { - "epoch": 0.4924094393506689, - "grad_norm": 1.5589196956620932, - "learning_rate": 2.1464084193031976e-06, - "loss": 0.9021, - "step": 6552 - }, - { - "epoch": 0.49248459341650386, - "grad_norm": 1.814974181356776, - "learning_rate": 2.1459228789910983e-06, - "loss": 0.9457, - "step": 6553 - }, - { - "epoch": 0.4925597474823388, - "grad_norm": 1.43713523549077, - "learning_rate": 2.1454373300325103e-06, - "loss": 0.9842, - "step": 6554 - }, - { - "epoch": 0.49263490154817374, - "grad_norm": 1.4701446242779432, - "learning_rate": 2.1449517724562037e-06, - "loss": 1.0106, - "step": 6555 - }, - { - "epoch": 0.4927100556140087, - "grad_norm": 1.7262476293062785, - "learning_rate": 2.1444662062909495e-06, - "loss": 0.9534, - "step": 6556 - }, - { - "epoch": 0.4927852096798437, - "grad_norm": 4.732751549082393, - "learning_rate": 2.1439806315655197e-06, - "loss": 1.0586, - "step": 6557 - }, - { - "epoch": 0.49286036374567865, - "grad_norm": 1.631702392519514, - "learning_rate": 2.1434950483086856e-06, - "loss": 1.025, - "step": 6558 - }, - { - "epoch": 0.4929355178115136, - "grad_norm": 1.8636755056351857, - "learning_rate": 2.143009456549221e-06, - "loss": 0.9489, - "step": 6559 - }, - { - "epoch": 0.49301067187734854, - "grad_norm": 1.563031983412577, - "learning_rate": 2.1425238563158975e-06, - "loss": 0.9846, - "step": 6560 - }, - { - "epoch": 0.4930858259431835, - "grad_norm": 1.6580420102409947, - "learning_rate": 2.1420382476374905e-06, - "loss": 1.0078, - "step": 6561 - }, - { - "epoch": 0.4931609800090185, - "grad_norm": 1.892942600171959, - "learning_rate": 2.1415526305427735e-06, - "loss": 1.0126, - "step": 6562 - }, - { - "epoch": 0.49323613407485345, - "grad_norm": 1.7614633582291837, - "learning_rate": 2.1410670050605203e-06, - "loss": 0.8628, - "step": 6563 - }, - { - "epoch": 0.4933112881406884, - "grad_norm": 0.7004819998678228, - "learning_rate": 2.140581371219508e-06, - "loss": 0.8137, - "step": 6564 - }, - { - "epoch": 0.4933864422065234, - "grad_norm": 1.7086832882095904, - "learning_rate": 2.14009572904851e-06, - "loss": 1.0269, - "step": 6565 - }, - { - "epoch": 0.49346159627235836, - "grad_norm": 2.324087238598144, - "learning_rate": 2.139610078576305e-06, - "loss": 0.903, - "step": 6566 - }, - { - "epoch": 0.4935367503381933, - "grad_norm": 1.6109895477618863, - "learning_rate": 2.139124419831667e-06, - "loss": 0.9718, - "step": 6567 - }, - { - "epoch": 0.49361190440402825, - "grad_norm": 1.6161727301469526, - "learning_rate": 2.1386387528433743e-06, - "loss": 0.9604, - "step": 6568 - }, - { - "epoch": 0.4936870584698632, - "grad_norm": 2.0435738770543637, - "learning_rate": 2.1381530776402058e-06, - "loss": 1.0289, - "step": 6569 - }, - { - "epoch": 0.4937622125356982, - "grad_norm": 2.4009585969557357, - "learning_rate": 2.1376673942509373e-06, - "loss": 0.9451, - "step": 6570 - }, - { - "epoch": 0.49383736660153316, - "grad_norm": 2.3752786573265823, - "learning_rate": 2.1371817027043494e-06, - "loss": 1.0765, - "step": 6571 - }, - { - "epoch": 0.4939125206673681, - "grad_norm": 1.8301053528607527, - "learning_rate": 2.1366960030292195e-06, - "loss": 0.906, - "step": 6572 - }, - { - "epoch": 0.49398767473320304, - "grad_norm": 2.226512065115466, - "learning_rate": 2.1362102952543277e-06, - "loss": 0.805, - "step": 6573 - }, - { - "epoch": 0.494062828799038, - "grad_norm": 1.3947587882799415, - "learning_rate": 2.135724579408456e-06, - "loss": 0.9986, - "step": 6574 - }, - { - "epoch": 0.494137982864873, - "grad_norm": 1.5806824008448688, - "learning_rate": 2.1352388555203814e-06, - "loss": 1.0365, - "step": 6575 - }, - { - "epoch": 0.49421313693070795, - "grad_norm": 1.44800807014872, - "learning_rate": 2.1347531236188878e-06, - "loss": 0.9794, - "step": 6576 - }, - { - "epoch": 0.4942882909965429, - "grad_norm": 1.5470302003009866, - "learning_rate": 2.134267383732755e-06, - "loss": 0.9197, - "step": 6577 - }, - { - "epoch": 0.4943634450623779, - "grad_norm": 1.839115330691624, - "learning_rate": 2.1337816358907663e-06, - "loss": 0.9464, - "step": 6578 - }, - { - "epoch": 0.49443859912821286, - "grad_norm": 1.435930058207857, - "learning_rate": 2.133295880121703e-06, - "loss": 1.0151, - "step": 6579 - }, - { - "epoch": 0.4945137531940478, - "grad_norm": 1.8940386259496809, - "learning_rate": 2.132810116454348e-06, - "loss": 1.0374, - "step": 6580 - }, - { - "epoch": 0.49458890725988275, - "grad_norm": 1.8089901714195018, - "learning_rate": 2.132324344917486e-06, - "loss": 1.0051, - "step": 6581 - }, - { - "epoch": 0.4946640613257177, - "grad_norm": 3.6174768451616606, - "learning_rate": 2.131838565539899e-06, - "loss": 1.0343, - "step": 6582 - }, - { - "epoch": 0.4947392153915527, - "grad_norm": 1.9558662403764069, - "learning_rate": 2.1313527783503727e-06, - "loss": 0.9126, - "step": 6583 - }, - { - "epoch": 0.49481436945738766, - "grad_norm": 0.5932952996942049, - "learning_rate": 2.1308669833776907e-06, - "loss": 0.8083, - "step": 6584 - }, - { - "epoch": 0.49488952352322263, - "grad_norm": 1.6522226852904334, - "learning_rate": 2.130381180650639e-06, - "loss": 0.9317, - "step": 6585 - }, - { - "epoch": 0.49496467758905754, - "grad_norm": 1.4325085100117567, - "learning_rate": 2.1298953701980038e-06, - "loss": 1.1341, - "step": 6586 - }, - { - "epoch": 0.4950398316548925, - "grad_norm": 0.7488434934755339, - "learning_rate": 2.129409552048569e-06, - "loss": 0.8884, - "step": 6587 - }, - { - "epoch": 0.4951149857207275, - "grad_norm": 2.3751768749027415, - "learning_rate": 2.1289237262311243e-06, - "loss": 1.0084, - "step": 6588 - }, - { - "epoch": 0.49519013978656246, - "grad_norm": 1.7715871964027081, - "learning_rate": 2.1284378927744546e-06, - "loss": 0.9801, - "step": 6589 - }, - { - "epoch": 0.4952652938523974, - "grad_norm": 2.126120664507922, - "learning_rate": 2.1279520517073475e-06, - "loss": 0.971, - "step": 6590 - }, - { - "epoch": 0.4953404479182324, - "grad_norm": 1.409712322661787, - "learning_rate": 2.127466203058592e-06, - "loss": 1.0693, - "step": 6591 - }, - { - "epoch": 0.4954156019840673, - "grad_norm": 1.4520462297746273, - "learning_rate": 2.1269803468569756e-06, - "loss": 1.0243, - "step": 6592 - }, - { - "epoch": 0.4954907560499023, - "grad_norm": 0.6902103534311597, - "learning_rate": 2.1264944831312874e-06, - "loss": 0.8613, - "step": 6593 - }, - { - "epoch": 0.49556591011573725, - "grad_norm": 1.519403055074035, - "learning_rate": 2.1260086119103165e-06, - "loss": 0.9334, - "step": 6594 - }, - { - "epoch": 0.4956410641815722, - "grad_norm": 1.6860152176275442, - "learning_rate": 2.1255227332228527e-06, - "loss": 1.0305, - "step": 6595 - }, - { - "epoch": 0.4957162182474072, - "grad_norm": 1.303196540533688, - "learning_rate": 2.125036847097687e-06, - "loss": 0.9901, - "step": 6596 - }, - { - "epoch": 0.49579137231324216, - "grad_norm": 1.5707394603064178, - "learning_rate": 2.1245509535636088e-06, - "loss": 1.0319, - "step": 6597 - }, - { - "epoch": 0.49586652637907713, - "grad_norm": 1.5900561591493105, - "learning_rate": 2.1240650526494096e-06, - "loss": 0.9555, - "step": 6598 - }, - { - "epoch": 0.49594168044491205, - "grad_norm": 0.7875876287116422, - "learning_rate": 2.1235791443838804e-06, - "loss": 0.8367, - "step": 6599 - }, - { - "epoch": 0.496016834510747, - "grad_norm": 1.777983900213994, - "learning_rate": 2.123093228795815e-06, - "loss": 0.913, - "step": 6600 - }, - { - "epoch": 0.496091988576582, - "grad_norm": 2.031085667931935, - "learning_rate": 2.122607305914004e-06, - "loss": 0.8825, - "step": 6601 - }, - { - "epoch": 0.49616714264241696, - "grad_norm": 1.5860922601666714, - "learning_rate": 2.1221213757672404e-06, - "loss": 0.886, - "step": 6602 - }, - { - "epoch": 0.49624229670825193, - "grad_norm": 0.8534556701692604, - "learning_rate": 2.1216354383843176e-06, - "loss": 0.9155, - "step": 6603 - }, - { - "epoch": 0.4963174507740869, - "grad_norm": 2.551522537170783, - "learning_rate": 2.1211494937940296e-06, - "loss": 0.9117, - "step": 6604 - }, - { - "epoch": 0.4963926048399218, - "grad_norm": 1.6972109102093118, - "learning_rate": 2.12066354202517e-06, - "loss": 0.9023, - "step": 6605 - }, - { - "epoch": 0.4964677589057568, - "grad_norm": 1.2763049366603927, - "learning_rate": 2.1201775831065336e-06, - "loss": 0.9406, - "step": 6606 - }, - { - "epoch": 0.49654291297159175, - "grad_norm": 1.6235099710217613, - "learning_rate": 2.119691617066915e-06, - "loss": 0.826, - "step": 6607 - }, - { - "epoch": 0.4966180670374267, - "grad_norm": 2.393037962029358, - "learning_rate": 2.11920564393511e-06, - "loss": 1.0208, - "step": 6608 - }, - { - "epoch": 0.4966932211032617, - "grad_norm": 1.6988846373659046, - "learning_rate": 2.1187196637399138e-06, - "loss": 0.9492, - "step": 6609 - }, - { - "epoch": 0.49676837516909667, - "grad_norm": 1.627042293967195, - "learning_rate": 2.118233676510123e-06, - "loss": 0.9471, - "step": 6610 - }, - { - "epoch": 0.49684352923493164, - "grad_norm": 2.1784373427497568, - "learning_rate": 2.1177476822745344e-06, - "loss": 1.0524, - "step": 6611 - }, - { - "epoch": 0.49691868330076655, - "grad_norm": 1.568322662265863, - "learning_rate": 2.1172616810619446e-06, - "loss": 0.9515, - "step": 6612 - }, - { - "epoch": 0.4969938373666015, - "grad_norm": 2.5350797903230666, - "learning_rate": 2.116775672901151e-06, - "loss": 0.9254, - "step": 6613 - }, - { - "epoch": 0.4970689914324365, - "grad_norm": 1.2835267470291982, - "learning_rate": 2.1162896578209516e-06, - "loss": 0.8385, - "step": 6614 - }, - { - "epoch": 0.49714414549827146, - "grad_norm": 2.118496648444126, - "learning_rate": 2.115803635850145e-06, - "loss": 1.0782, - "step": 6615 - }, - { - "epoch": 0.49721929956410643, - "grad_norm": 1.638116499184882, - "learning_rate": 2.1153176070175297e-06, - "loss": 0.8779, - "step": 6616 - }, - { - "epoch": 0.4972944536299414, - "grad_norm": 32.14867766465578, - "learning_rate": 2.1148315713519036e-06, - "loss": 0.9735, - "step": 6617 - }, - { - "epoch": 0.4973696076957763, - "grad_norm": 2.4126334790872903, - "learning_rate": 2.114345528882068e-06, - "loss": 0.8683, - "step": 6618 - }, - { - "epoch": 0.4974447617616113, - "grad_norm": 2.1281714614972285, - "learning_rate": 2.1138594796368213e-06, - "loss": 0.9134, - "step": 6619 - }, - { - "epoch": 0.49751991582744626, - "grad_norm": 5.823436989053674, - "learning_rate": 2.1133734236449654e-06, - "loss": 0.9549, - "step": 6620 - }, - { - "epoch": 0.4975950698932812, - "grad_norm": 1.7051877470488541, - "learning_rate": 2.112887360935299e-06, - "loss": 0.9658, - "step": 6621 - }, - { - "epoch": 0.4976702239591162, - "grad_norm": 1.7049049385619954, - "learning_rate": 2.112401291536625e-06, - "loss": 0.9854, - "step": 6622 - }, - { - "epoch": 0.49774537802495117, - "grad_norm": 0.7277971258050616, - "learning_rate": 2.1119152154777442e-06, - "loss": 0.926, - "step": 6623 - }, - { - "epoch": 0.49782053209078614, - "grad_norm": 1.6772387730689946, - "learning_rate": 2.1114291327874578e-06, - "loss": 1.0843, - "step": 6624 - }, - { - "epoch": 0.49789568615662105, - "grad_norm": 1.8418081370198496, - "learning_rate": 2.1109430434945685e-06, - "loss": 1.0078, - "step": 6625 - }, - { - "epoch": 0.497970840222456, - "grad_norm": 2.0246560687701654, - "learning_rate": 2.1104569476278794e-06, - "loss": 1.0481, - "step": 6626 - }, - { - "epoch": 0.498045994288291, - "grad_norm": 1.5904997051182377, - "learning_rate": 2.109970845216193e-06, - "loss": 0.9376, - "step": 6627 - }, - { - "epoch": 0.49812114835412596, - "grad_norm": 1.4348616229604434, - "learning_rate": 2.109484736288313e-06, - "loss": 1.0371, - "step": 6628 - }, - { - "epoch": 0.49819630241996093, - "grad_norm": 1.8163154825481627, - "learning_rate": 2.108998620873043e-06, - "loss": 1.0412, - "step": 6629 - }, - { - "epoch": 0.4982714564857959, - "grad_norm": 0.6902750535429788, - "learning_rate": 2.1085124989991876e-06, - "loss": 0.838, - "step": 6630 - }, - { - "epoch": 0.4983466105516308, - "grad_norm": 1.6762738693096721, - "learning_rate": 2.108026370695551e-06, - "loss": 1.0207, - "step": 6631 - }, - { - "epoch": 0.4984217646174658, - "grad_norm": 1.7278875729359306, - "learning_rate": 2.107540235990938e-06, - "loss": 1.0608, - "step": 6632 - }, - { - "epoch": 0.49849691868330076, - "grad_norm": 1.576685182544682, - "learning_rate": 2.107054094914155e-06, - "loss": 0.9044, - "step": 6633 - }, - { - "epoch": 0.49857207274913573, - "grad_norm": 2.1841639557314276, - "learning_rate": 2.106567947494006e-06, - "loss": 1.1051, - "step": 6634 - }, - { - "epoch": 0.4986472268149707, - "grad_norm": 2.1413721047775383, - "learning_rate": 2.106081793759298e-06, - "loss": 1.0064, - "step": 6635 - }, - { - "epoch": 0.49872238088080567, - "grad_norm": 1.1868839673772715, - "learning_rate": 2.1055956337388376e-06, - "loss": 1.0165, - "step": 6636 - }, - { - "epoch": 0.4987975349466406, - "grad_norm": 1.9118511271038474, - "learning_rate": 2.1051094674614327e-06, - "loss": 1.0645, - "step": 6637 - }, - { - "epoch": 0.49887268901247556, - "grad_norm": 2.4919079471320726, - "learning_rate": 2.1046232949558887e-06, - "loss": 0.9709, - "step": 6638 - }, - { - "epoch": 0.4989478430783105, - "grad_norm": 1.641410857407042, - "learning_rate": 2.104137116251013e-06, - "loss": 0.9318, - "step": 6639 - }, - { - "epoch": 0.4990229971441455, - "grad_norm": 1.2763759668189838, - "learning_rate": 2.103650931375615e-06, - "loss": 1.0386, - "step": 6640 - }, - { - "epoch": 0.49909815120998047, - "grad_norm": 0.715443385704093, - "learning_rate": 2.103164740358502e-06, - "loss": 0.8689, - "step": 6641 - }, - { - "epoch": 0.49917330527581544, - "grad_norm": 1.9087602940090072, - "learning_rate": 2.1026785432284837e-06, - "loss": 0.9477, - "step": 6642 - }, - { - "epoch": 0.4992484593416504, - "grad_norm": 1.8423207449186454, - "learning_rate": 2.1021923400143683e-06, - "loss": 1.0599, - "step": 6643 - }, - { - "epoch": 0.4993236134074853, - "grad_norm": 1.5833229349029134, - "learning_rate": 2.101706130744966e-06, - "loss": 1.0704, - "step": 6644 - }, - { - "epoch": 0.4993987674733203, - "grad_norm": 1.6236782567338828, - "learning_rate": 2.1012199154490852e-06, - "loss": 0.929, - "step": 6645 - }, - { - "epoch": 0.49947392153915526, - "grad_norm": 1.5007669554411396, - "learning_rate": 2.1007336941555374e-06, - "loss": 0.9963, - "step": 6646 - }, - { - "epoch": 0.49954907560499023, - "grad_norm": 1.6660403823354673, - "learning_rate": 2.100247466893132e-06, - "loss": 0.9498, - "step": 6647 - }, - { - "epoch": 0.4996242296708252, - "grad_norm": 2.1718215386925683, - "learning_rate": 2.0997612336906805e-06, - "loss": 0.9611, - "step": 6648 - }, - { - "epoch": 0.4996993837366602, - "grad_norm": 1.3640882841384674, - "learning_rate": 2.099274994576994e-06, - "loss": 0.9336, - "step": 6649 - }, - { - "epoch": 0.4997745378024951, - "grad_norm": 1.432087619002967, - "learning_rate": 2.098788749580884e-06, - "loss": 0.875, - "step": 6650 - }, - { - "epoch": 0.49984969186833006, - "grad_norm": 1.4323235063192814, - "learning_rate": 2.098302498731162e-06, - "loss": 0.9303, - "step": 6651 - }, - { - "epoch": 0.49992484593416503, - "grad_norm": 1.590898843212068, - "learning_rate": 2.0978162420566406e-06, - "loss": 1.0315, - "step": 6652 - }, - { - "epoch": 0.5, - "grad_norm": 0.663147846191505, - "learning_rate": 2.0973299795861322e-06, - "loss": 0.8574, - "step": 6653 - }, - { - "epoch": 0.5000751540658349, - "grad_norm": 1.4692734941983348, - "learning_rate": 2.09684371134845e-06, - "loss": 0.9448, - "step": 6654 - }, - { - "epoch": 0.5001503081316699, - "grad_norm": 1.72245050356415, - "learning_rate": 2.0963574373724074e-06, - "loss": 1.0433, - "step": 6655 - }, - { - "epoch": 0.5002254621975049, - "grad_norm": 4.060125038213446, - "learning_rate": 2.095871157686817e-06, - "loss": 1.0152, - "step": 6656 - }, - { - "epoch": 0.5003006162633399, - "grad_norm": 1.8967283742126106, - "learning_rate": 2.095384872320494e-06, - "loss": 0.9177, - "step": 6657 - }, - { - "epoch": 0.5003757703291748, - "grad_norm": 1.6869867745306737, - "learning_rate": 2.094898581302251e-06, - "loss": 0.9558, - "step": 6658 - }, - { - "epoch": 0.5004509243950098, - "grad_norm": 1.5006578115667748, - "learning_rate": 2.094412284660905e-06, - "loss": 0.9572, - "step": 6659 - }, - { - "epoch": 0.5005260784608447, - "grad_norm": 1.3924753544405388, - "learning_rate": 2.093925982425269e-06, - "loss": 0.9958, - "step": 6660 - }, - { - "epoch": 0.5006012325266797, - "grad_norm": 3.3209052779677974, - "learning_rate": 2.093439674624158e-06, - "loss": 0.9157, - "step": 6661 - }, - { - "epoch": 0.5006763865925147, - "grad_norm": 1.5668565668848522, - "learning_rate": 2.09295336128639e-06, - "loss": 0.9719, - "step": 6662 - }, - { - "epoch": 0.5007515406583496, - "grad_norm": 1.3575343682310017, - "learning_rate": 2.0924670424407785e-06, - "loss": 0.9177, - "step": 6663 - }, - { - "epoch": 0.5008266947241846, - "grad_norm": 1.5406459659906273, - "learning_rate": 2.091980718116141e-06, - "loss": 0.9659, - "step": 6664 - }, - { - "epoch": 0.5009018487900195, - "grad_norm": 1.5846196604572012, - "learning_rate": 2.0914943883412935e-06, - "loss": 0.9125, - "step": 6665 - }, - { - "epoch": 0.5009770028558544, - "grad_norm": 1.4869890986532315, - "learning_rate": 2.0910080531450534e-06, - "loss": 1.0243, - "step": 6666 - }, - { - "epoch": 0.5010521569216895, - "grad_norm": 1.625397193490205, - "learning_rate": 2.0905217125562378e-06, - "loss": 0.9955, - "step": 6667 - }, - { - "epoch": 0.5011273109875244, - "grad_norm": 0.9284815299354503, - "learning_rate": 2.0900353666036635e-06, - "loss": 0.9315, - "step": 6668 - }, - { - "epoch": 0.5012024650533594, - "grad_norm": 1.7283994129145481, - "learning_rate": 2.0895490153161496e-06, - "loss": 1.0812, - "step": 6669 - }, - { - "epoch": 0.5012776191191943, - "grad_norm": 41.441519471814885, - "learning_rate": 2.089062658722513e-06, - "loss": 1.0293, - "step": 6670 - }, - { - "epoch": 0.5013527731850294, - "grad_norm": 2.5269832678279784, - "learning_rate": 2.0885762968515737e-06, - "loss": 1.1782, - "step": 6671 - }, - { - "epoch": 0.5014279272508643, - "grad_norm": 5.389258877789325, - "learning_rate": 2.088089929732149e-06, - "loss": 0.9529, - "step": 6672 - }, - { - "epoch": 0.5015030813166992, - "grad_norm": 1.748326455033951, - "learning_rate": 2.0876035573930587e-06, - "loss": 1.0256, - "step": 6673 - }, - { - "epoch": 0.5015782353825342, - "grad_norm": 1.8340778428575186, - "learning_rate": 2.0871171798631224e-06, - "loss": 0.968, - "step": 6674 - }, - { - "epoch": 0.5016533894483691, - "grad_norm": 1.5363178857093076, - "learning_rate": 2.0866307971711594e-06, - "loss": 1.0156, - "step": 6675 - }, - { - "epoch": 0.5017285435142042, - "grad_norm": 1.793879790711108, - "learning_rate": 2.08614440934599e-06, - "loss": 0.9163, - "step": 6676 - }, - { - "epoch": 0.5018036975800391, - "grad_norm": 1.6575470649861026, - "learning_rate": 2.0856580164164344e-06, - "loss": 0.9911, - "step": 6677 - }, - { - "epoch": 0.5018788516458741, - "grad_norm": 1.7523953529273655, - "learning_rate": 2.085171618411313e-06, - "loss": 0.9703, - "step": 6678 - }, - { - "epoch": 0.501954005711709, - "grad_norm": 1.9355697861440417, - "learning_rate": 2.0846852153594477e-06, - "loss": 1.0322, - "step": 6679 - }, - { - "epoch": 0.5020291597775439, - "grad_norm": 1.6161370297961646, - "learning_rate": 2.0841988072896585e-06, - "loss": 0.9843, - "step": 6680 - }, - { - "epoch": 0.502104313843379, - "grad_norm": 1.9889303470649933, - "learning_rate": 2.0837123942307677e-06, - "loss": 0.9837, - "step": 6681 - }, - { - "epoch": 0.5021794679092139, - "grad_norm": 2.0695079463742587, - "learning_rate": 2.0832259762115977e-06, - "loss": 0.9615, - "step": 6682 - }, - { - "epoch": 0.5022546219750489, - "grad_norm": 1.7999511924060456, - "learning_rate": 2.0827395532609685e-06, - "loss": 1.0338, - "step": 6683 - }, - { - "epoch": 0.5023297760408838, - "grad_norm": 1.6072308228654495, - "learning_rate": 2.082253125407705e-06, - "loss": 1.0244, - "step": 6684 - }, - { - "epoch": 0.5024049301067188, - "grad_norm": 1.5031246383571388, - "learning_rate": 2.0817666926806287e-06, - "loss": 1.0216, - "step": 6685 - }, - { - "epoch": 0.5024800841725537, - "grad_norm": 1.5463299031686588, - "learning_rate": 2.0812802551085633e-06, - "loss": 1.0028, - "step": 6686 - }, - { - "epoch": 0.5025552382383887, - "grad_norm": 1.9624813127539138, - "learning_rate": 2.0807938127203304e-06, - "loss": 1.0009, - "step": 6687 - }, - { - "epoch": 0.5026303923042237, - "grad_norm": 1.3255128641644678, - "learning_rate": 2.080307365544755e-06, - "loss": 0.9817, - "step": 6688 - }, - { - "epoch": 0.5027055463700586, - "grad_norm": 2.599677425327857, - "learning_rate": 2.0798209136106615e-06, - "loss": 1.0958, - "step": 6689 - }, - { - "epoch": 0.5027807004358936, - "grad_norm": 1.579354288971532, - "learning_rate": 2.0793344569468725e-06, - "loss": 0.9769, - "step": 6690 - }, - { - "epoch": 0.5028558545017285, - "grad_norm": 11.33933308680522, - "learning_rate": 2.0788479955822136e-06, - "loss": 0.9718, - "step": 6691 - }, - { - "epoch": 0.5029310085675635, - "grad_norm": 1.9049117597032834, - "learning_rate": 2.0783615295455082e-06, - "loss": 0.993, - "step": 6692 - }, - { - "epoch": 0.5030061626333985, - "grad_norm": 1.446989993310585, - "learning_rate": 2.077875058865583e-06, - "loss": 0.9602, - "step": 6693 - }, - { - "epoch": 0.5030813166992334, - "grad_norm": 1.8440473930904995, - "learning_rate": 2.077388583571262e-06, - "loss": 0.9645, - "step": 6694 - }, - { - "epoch": 0.5031564707650684, - "grad_norm": 1.6894515726600527, - "learning_rate": 2.076902103691371e-06, - "loss": 0.9534, - "step": 6695 - }, - { - "epoch": 0.5032316248309033, - "grad_norm": 1.6507340474563184, - "learning_rate": 2.076415619254736e-06, - "loss": 1.1025, - "step": 6696 - }, - { - "epoch": 0.5033067788967384, - "grad_norm": 3.0581883811034936, - "learning_rate": 2.075929130290183e-06, - "loss": 0.9823, - "step": 6697 - }, - { - "epoch": 0.5033819329625733, - "grad_norm": 0.6106778912786305, - "learning_rate": 2.075442636826538e-06, - "loss": 0.8405, - "step": 6698 - }, - { - "epoch": 0.5034570870284082, - "grad_norm": 1.6683179860484127, - "learning_rate": 2.0749561388926283e-06, - "loss": 1.0009, - "step": 6699 - }, - { - "epoch": 0.5035322410942432, - "grad_norm": 1.5639800881730503, - "learning_rate": 2.07446963651728e-06, - "loss": 0.9436, - "step": 6700 - }, - { - "epoch": 0.5036073951600781, - "grad_norm": 2.170782129876097, - "learning_rate": 2.073983129729321e-06, - "loss": 0.8823, - "step": 6701 - }, - { - "epoch": 0.5036825492259132, - "grad_norm": 2.0536518460278517, - "learning_rate": 2.073496618557577e-06, - "loss": 0.9378, - "step": 6702 - }, - { - "epoch": 0.5037577032917481, - "grad_norm": 2.2977711492849076, - "learning_rate": 2.073010103030878e-06, - "loss": 0.9768, - "step": 6703 - }, - { - "epoch": 0.5038328573575831, - "grad_norm": 2.0090719940820403, - "learning_rate": 2.0725235831780516e-06, - "loss": 0.8294, - "step": 6704 - }, - { - "epoch": 0.503908011423418, - "grad_norm": 1.5543622007253333, - "learning_rate": 2.0720370590279234e-06, - "loss": 1.0076, - "step": 6705 - }, - { - "epoch": 0.5039831654892529, - "grad_norm": 1.518056197698065, - "learning_rate": 2.0715505306093247e-06, - "loss": 1.0362, - "step": 6706 - }, - { - "epoch": 0.504058319555088, - "grad_norm": 0.6371344397109193, - "learning_rate": 2.071063997951082e-06, - "loss": 0.8559, - "step": 6707 - }, - { - "epoch": 0.5041334736209229, - "grad_norm": 2.080725398117937, - "learning_rate": 2.0705774610820267e-06, - "loss": 0.906, - "step": 6708 - }, - { - "epoch": 0.5042086276867579, - "grad_norm": 1.5642724474563936, - "learning_rate": 2.070090920030986e-06, - "loss": 0.9399, - "step": 6709 - }, - { - "epoch": 0.5042837817525928, - "grad_norm": 1.464060011691622, - "learning_rate": 2.0696043748267897e-06, - "loss": 1.0802, - "step": 6710 - }, - { - "epoch": 0.5043589358184277, - "grad_norm": 1.783813572628233, - "learning_rate": 2.0691178254982684e-06, - "loss": 0.9331, - "step": 6711 - }, - { - "epoch": 0.5044340898842627, - "grad_norm": 0.7522022973932034, - "learning_rate": 2.0686312720742504e-06, - "loss": 0.8249, - "step": 6712 - }, - { - "epoch": 0.5045092439500977, - "grad_norm": 1.8636595144123806, - "learning_rate": 2.068144714583567e-06, - "loss": 1.0529, - "step": 6713 - }, - { - "epoch": 0.5045843980159327, - "grad_norm": 2.054009387305927, - "learning_rate": 2.0676581530550485e-06, - "loss": 1.0045, - "step": 6714 - }, - { - "epoch": 0.5046595520817676, - "grad_norm": 1.6353388320316182, - "learning_rate": 2.067171587517525e-06, - "loss": 1.0386, - "step": 6715 - }, - { - "epoch": 0.5047347061476026, - "grad_norm": 1.8014404679032316, - "learning_rate": 2.066685017999828e-06, - "loss": 0.9849, - "step": 6716 - }, - { - "epoch": 0.5048098602134375, - "grad_norm": 1.764289283197256, - "learning_rate": 2.0661984445307886e-06, - "loss": 0.9519, - "step": 6717 - }, - { - "epoch": 0.5048850142792725, - "grad_norm": 1.7182805200422429, - "learning_rate": 2.0657118671392373e-06, - "loss": 0.9703, - "step": 6718 - }, - { - "epoch": 0.5049601683451075, - "grad_norm": 1.886171439262074, - "learning_rate": 2.0652252858540064e-06, - "loss": 0.9704, - "step": 6719 - }, - { - "epoch": 0.5050353224109424, - "grad_norm": 1.4399163213894617, - "learning_rate": 2.0647387007039277e-06, - "loss": 0.9823, - "step": 6720 - }, - { - "epoch": 0.5051104764767774, - "grad_norm": 1.9551049534560339, - "learning_rate": 2.0642521117178332e-06, - "loss": 1.0489, - "step": 6721 - }, - { - "epoch": 0.5051856305426123, - "grad_norm": 1.5982646203544129, - "learning_rate": 2.0637655189245548e-06, - "loss": 0.9678, - "step": 6722 - }, - { - "epoch": 0.5052607846084474, - "grad_norm": 1.3284591254513611, - "learning_rate": 2.0632789223529254e-06, - "loss": 1.0298, - "step": 6723 - }, - { - "epoch": 0.5053359386742823, - "grad_norm": 1.7332267740702612, - "learning_rate": 2.0627923220317767e-06, - "loss": 1.072, - "step": 6724 - }, - { - "epoch": 0.5054110927401172, - "grad_norm": 1.4989610888906693, - "learning_rate": 2.062305717989943e-06, - "loss": 0.9886, - "step": 6725 - }, - { - "epoch": 0.5054862468059522, - "grad_norm": 2.5647999746100125, - "learning_rate": 2.0618191102562575e-06, - "loss": 0.9541, - "step": 6726 - }, - { - "epoch": 0.5055614008717871, - "grad_norm": 2.0860624240228685, - "learning_rate": 2.061332498859553e-06, - "loss": 0.9302, - "step": 6727 - }, - { - "epoch": 0.5056365549376222, - "grad_norm": 1.2869075912194776, - "learning_rate": 2.060845883828663e-06, - "loss": 0.9185, - "step": 6728 - }, - { - "epoch": 0.5057117090034571, - "grad_norm": 0.804794637714234, - "learning_rate": 2.0603592651924206e-06, - "loss": 0.7833, - "step": 6729 - }, - { - "epoch": 0.5057868630692921, - "grad_norm": 1.5218430220046573, - "learning_rate": 2.059872642979661e-06, - "loss": 0.8556, - "step": 6730 - }, - { - "epoch": 0.505862017135127, - "grad_norm": 2.2122606826922624, - "learning_rate": 2.0593860172192178e-06, - "loss": 0.8804, - "step": 6731 - }, - { - "epoch": 0.5059371712009619, - "grad_norm": 1.669627309774892, - "learning_rate": 2.0588993879399265e-06, - "loss": 1.0012, - "step": 6732 - }, - { - "epoch": 0.506012325266797, - "grad_norm": 1.4485921404774793, - "learning_rate": 2.0584127551706202e-06, - "loss": 0.9704, - "step": 6733 - }, - { - "epoch": 0.5060874793326319, - "grad_norm": 2.0184422403065225, - "learning_rate": 2.0579261189401345e-06, - "loss": 0.9817, - "step": 6734 - }, - { - "epoch": 0.5061626333984669, - "grad_norm": 1.7482437449152026, - "learning_rate": 2.0574394792773048e-06, - "loss": 1.0461, - "step": 6735 - }, - { - "epoch": 0.5062377874643018, - "grad_norm": 2.16702475400833, - "learning_rate": 2.0569528362109662e-06, - "loss": 1.0442, - "step": 6736 - }, - { - "epoch": 0.5063129415301367, - "grad_norm": 1.6215596092670321, - "learning_rate": 2.056466189769953e-06, - "loss": 0.8851, - "step": 6737 - }, - { - "epoch": 0.5063880955959718, - "grad_norm": 1.947570103569854, - "learning_rate": 2.055979539983103e-06, - "loss": 0.8984, - "step": 6738 - }, - { - "epoch": 0.5064632496618067, - "grad_norm": 0.823949833250188, - "learning_rate": 2.05549288687925e-06, - "loss": 0.8244, - "step": 6739 - }, - { - "epoch": 0.5065384037276417, - "grad_norm": 1.3815118308484038, - "learning_rate": 2.0550062304872317e-06, - "loss": 0.9976, - "step": 6740 - }, - { - "epoch": 0.5066135577934766, - "grad_norm": 1.5152362442890153, - "learning_rate": 2.054519570835883e-06, - "loss": 0.8591, - "step": 6741 - }, - { - "epoch": 0.5066887118593116, - "grad_norm": 1.6213996322862927, - "learning_rate": 2.0540329079540414e-06, - "loss": 1.0387, - "step": 6742 - }, - { - "epoch": 0.5067638659251466, - "grad_norm": 2.346283725664818, - "learning_rate": 2.053546241870543e-06, - "loss": 0.9794, - "step": 6743 - }, - { - "epoch": 0.5068390199909815, - "grad_norm": 1.3617895759026688, - "learning_rate": 2.053059572614224e-06, - "loss": 1.0697, - "step": 6744 - }, - { - "epoch": 0.5069141740568165, - "grad_norm": 2.5467561097274474, - "learning_rate": 2.0525729002139233e-06, - "loss": 1.0233, - "step": 6745 - }, - { - "epoch": 0.5069893281226514, - "grad_norm": 1.8444926818925842, - "learning_rate": 2.052086224698476e-06, - "loss": 0.9088, - "step": 6746 - }, - { - "epoch": 0.5070644821884864, - "grad_norm": 2.8670143678037037, - "learning_rate": 2.0515995460967204e-06, - "loss": 0.9865, - "step": 6747 - }, - { - "epoch": 0.5071396362543213, - "grad_norm": 1.520304818834776, - "learning_rate": 2.0511128644374953e-06, - "loss": 1.0017, - "step": 6748 - }, - { - "epoch": 0.5072147903201564, - "grad_norm": 1.8163320218578152, - "learning_rate": 2.0506261797496357e-06, - "loss": 0.881, - "step": 6749 - }, - { - "epoch": 0.5072899443859913, - "grad_norm": 1.7699608018274002, - "learning_rate": 2.0501394920619822e-06, - "loss": 1.0047, - "step": 6750 - }, - { - "epoch": 0.5073650984518262, - "grad_norm": 1.708627473893434, - "learning_rate": 2.0496528014033717e-06, - "loss": 1.0333, - "step": 6751 - }, - { - "epoch": 0.5074402525176612, - "grad_norm": 1.6363838963988349, - "learning_rate": 2.0491661078026423e-06, - "loss": 0.9273, - "step": 6752 - }, - { - "epoch": 0.5075154065834961, - "grad_norm": 0.7687661099490423, - "learning_rate": 2.0486794112886328e-06, - "loss": 0.8652, - "step": 6753 - }, - { - "epoch": 0.5075905606493312, - "grad_norm": 1.7093058430910542, - "learning_rate": 2.0481927118901817e-06, - "loss": 0.9702, - "step": 6754 - }, - { - "epoch": 0.5076657147151661, - "grad_norm": 3.7708269965749084, - "learning_rate": 2.047706009636128e-06, - "loss": 1.0054, - "step": 6755 - }, - { - "epoch": 0.507740868781001, - "grad_norm": 1.5048433629137346, - "learning_rate": 2.0472193045553104e-06, - "loss": 1.0364, - "step": 6756 - }, - { - "epoch": 0.507816022846836, - "grad_norm": 2.0165851287541647, - "learning_rate": 2.0467325966765683e-06, - "loss": 0.9937, - "step": 6757 - }, - { - "epoch": 0.5078911769126709, - "grad_norm": 1.677464135890934, - "learning_rate": 2.046245886028741e-06, - "loss": 1.0583, - "step": 6758 - }, - { - "epoch": 0.507966330978506, - "grad_norm": 0.751556529697489, - "learning_rate": 2.045759172640668e-06, - "loss": 0.827, - "step": 6759 - }, - { - "epoch": 0.5080414850443409, - "grad_norm": 2.0055266552877278, - "learning_rate": 2.0452724565411886e-06, - "loss": 0.9538, - "step": 6760 - }, - { - "epoch": 0.5081166391101759, - "grad_norm": 1.5304173911231687, - "learning_rate": 2.044785737759143e-06, - "loss": 0.9936, - "step": 6761 - }, - { - "epoch": 0.5081917931760108, - "grad_norm": 1.5464340506254899, - "learning_rate": 2.0442990163233704e-06, - "loss": 0.9387, - "step": 6762 - }, - { - "epoch": 0.5082669472418457, - "grad_norm": 1.8293792464946936, - "learning_rate": 2.0438122922627114e-06, - "loss": 1.0192, - "step": 6763 - }, - { - "epoch": 0.5083421013076808, - "grad_norm": 5.016308128919384, - "learning_rate": 2.0433255656060066e-06, - "loss": 0.9761, - "step": 6764 - }, - { - "epoch": 0.5084172553735157, - "grad_norm": 1.9463391639703584, - "learning_rate": 2.0428388363820966e-06, - "loss": 0.8634, - "step": 6765 - }, - { - "epoch": 0.5084924094393507, - "grad_norm": 1.3667855680117387, - "learning_rate": 2.0423521046198206e-06, - "loss": 1.0154, - "step": 6766 - }, - { - "epoch": 0.5085675635051856, - "grad_norm": 0.7794714710576712, - "learning_rate": 2.041865370348021e-06, - "loss": 0.8498, - "step": 6767 - }, - { - "epoch": 0.5086427175710206, - "grad_norm": 1.745808281487357, - "learning_rate": 2.0413786335955374e-06, - "loss": 0.9636, - "step": 6768 - }, - { - "epoch": 0.5087178716368556, - "grad_norm": 1.7922138295322356, - "learning_rate": 2.0408918943912113e-06, - "loss": 0.9034, - "step": 6769 - }, - { - "epoch": 0.5087930257026905, - "grad_norm": 1.687970413762284, - "learning_rate": 2.0404051527638844e-06, - "loss": 0.9907, - "step": 6770 - }, - { - "epoch": 0.5088681797685255, - "grad_norm": 1.4366019387647528, - "learning_rate": 2.039918408742397e-06, - "loss": 0.9361, - "step": 6771 - }, - { - "epoch": 0.5089433338343604, - "grad_norm": 5.393482987567738, - "learning_rate": 2.039431662355591e-06, - "loss": 0.9899, - "step": 6772 - }, - { - "epoch": 0.5090184879001954, - "grad_norm": 1.6834939807617693, - "learning_rate": 2.0389449136323082e-06, - "loss": 0.9586, - "step": 6773 - }, - { - "epoch": 0.5090936419660304, - "grad_norm": 1.708026517282514, - "learning_rate": 2.0384581626013905e-06, - "loss": 0.9667, - "step": 6774 - }, - { - "epoch": 0.5091687960318654, - "grad_norm": 1.7860772826592213, - "learning_rate": 2.037971409291679e-06, - "loss": 1.0398, - "step": 6775 - }, - { - "epoch": 0.5092439500977003, - "grad_norm": 1.7863785380197033, - "learning_rate": 2.037484653732016e-06, - "loss": 1.0097, - "step": 6776 - }, - { - "epoch": 0.5093191041635352, - "grad_norm": 2.0298080020928353, - "learning_rate": 2.036997895951244e-06, - "loss": 1.016, - "step": 6777 - }, - { - "epoch": 0.5093942582293702, - "grad_norm": 2.160925840179475, - "learning_rate": 2.0365111359782046e-06, - "loss": 0.9632, - "step": 6778 - }, - { - "epoch": 0.5094694122952051, - "grad_norm": 2.336184893619881, - "learning_rate": 2.0360243738417414e-06, - "loss": 0.9111, - "step": 6779 - }, - { - "epoch": 0.5095445663610402, - "grad_norm": 1.7357873063647549, - "learning_rate": 2.035537609570695e-06, - "loss": 1.0141, - "step": 6780 - }, - { - "epoch": 0.5096197204268751, - "grad_norm": 0.7322532357938518, - "learning_rate": 2.03505084319391e-06, - "loss": 0.8255, - "step": 6781 - }, - { - "epoch": 0.50969487449271, - "grad_norm": 3.1984239273267834, - "learning_rate": 2.0345640747402283e-06, - "loss": 0.9828, - "step": 6782 - }, - { - "epoch": 0.509770028558545, - "grad_norm": 1.4891037157725162, - "learning_rate": 2.034077304238492e-06, - "loss": 0.9925, - "step": 6783 - }, - { - "epoch": 0.50984518262438, - "grad_norm": 1.4964294853688884, - "learning_rate": 2.0335905317175457e-06, - "loss": 1.0257, - "step": 6784 - }, - { - "epoch": 0.509920336690215, - "grad_norm": 2.2852566167194994, - "learning_rate": 2.0331037572062314e-06, - "loss": 1.0255, - "step": 6785 - }, - { - "epoch": 0.5099954907560499, - "grad_norm": 1.3777092771566817, - "learning_rate": 2.032616980733393e-06, - "loss": 0.9177, - "step": 6786 - }, - { - "epoch": 0.5100706448218849, - "grad_norm": 1.7453953927350074, - "learning_rate": 2.0321302023278734e-06, - "loss": 0.9982, - "step": 6787 - }, - { - "epoch": 0.5101457988877198, - "grad_norm": 1.6844065534081452, - "learning_rate": 2.031643422018516e-06, - "loss": 0.9416, - "step": 6788 - }, - { - "epoch": 0.5102209529535547, - "grad_norm": 1.4617039103241494, - "learning_rate": 2.0311566398341653e-06, - "loss": 0.907, - "step": 6789 - }, - { - "epoch": 0.5102961070193898, - "grad_norm": 1.9818313997080823, - "learning_rate": 2.0306698558036635e-06, - "loss": 1.0301, - "step": 6790 - }, - { - "epoch": 0.5103712610852247, - "grad_norm": 1.426692723559164, - "learning_rate": 2.0301830699558563e-06, - "loss": 0.9667, - "step": 6791 - }, - { - "epoch": 0.5104464151510597, - "grad_norm": 1.600539983586617, - "learning_rate": 2.029696282319586e-06, - "loss": 1.0619, - "step": 6792 - }, - { - "epoch": 0.5105215692168946, - "grad_norm": 1.7009822420048855, - "learning_rate": 2.0292094929236976e-06, - "loss": 1.0098, - "step": 6793 - }, - { - "epoch": 0.5105967232827296, - "grad_norm": 1.3058593877803197, - "learning_rate": 2.028722701797035e-06, - "loss": 0.9054, - "step": 6794 - }, - { - "epoch": 0.5106718773485646, - "grad_norm": 0.7687299787814004, - "learning_rate": 2.0282359089684417e-06, - "loss": 0.8133, - "step": 6795 - }, - { - "epoch": 0.5107470314143995, - "grad_norm": 2.9319771326378103, - "learning_rate": 2.027749114466763e-06, - "loss": 0.9551, - "step": 6796 - }, - { - "epoch": 0.5108221854802345, - "grad_norm": 2.1039686078315136, - "learning_rate": 2.0272623183208433e-06, - "loss": 0.9585, - "step": 6797 - }, - { - "epoch": 0.5108973395460694, - "grad_norm": 0.80300252848948, - "learning_rate": 2.0267755205595266e-06, - "loss": 0.8902, - "step": 6798 - }, - { - "epoch": 0.5109724936119044, - "grad_norm": 1.818316105738544, - "learning_rate": 2.026288721211658e-06, - "loss": 0.9467, - "step": 6799 - }, - { - "epoch": 0.5110476476777394, - "grad_norm": 1.5184826211295956, - "learning_rate": 2.0258019203060816e-06, - "loss": 0.988, - "step": 6800 - }, - { - "epoch": 0.5111228017435743, - "grad_norm": 4.182896831176757, - "learning_rate": 2.025315117871643e-06, - "loss": 0.9656, - "step": 6801 - }, - { - "epoch": 0.5111979558094093, - "grad_norm": 1.9049904210808488, - "learning_rate": 2.0248283139371862e-06, - "loss": 0.9509, - "step": 6802 - }, - { - "epoch": 0.5112731098752442, - "grad_norm": 1.4271740942539028, - "learning_rate": 2.0243415085315573e-06, - "loss": 1.0769, - "step": 6803 - }, - { - "epoch": 0.5113482639410792, - "grad_norm": 1.6791874340783042, - "learning_rate": 2.023854701683601e-06, - "loss": 0.8696, - "step": 6804 - }, - { - "epoch": 0.5114234180069142, - "grad_norm": 2.008566154607939, - "learning_rate": 2.0233678934221615e-06, - "loss": 0.9639, - "step": 6805 - }, - { - "epoch": 0.5114985720727492, - "grad_norm": 1.7967616833325155, - "learning_rate": 2.0228810837760853e-06, - "loss": 0.8793, - "step": 6806 - }, - { - "epoch": 0.5115737261385841, - "grad_norm": 2.9624964653670074, - "learning_rate": 2.0223942727742168e-06, - "loss": 0.9613, - "step": 6807 - }, - { - "epoch": 0.511648880204419, - "grad_norm": 1.9016797044503793, - "learning_rate": 2.0219074604454026e-06, - "loss": 0.9742, - "step": 6808 - }, - { - "epoch": 0.511724034270254, - "grad_norm": 1.7039925002036018, - "learning_rate": 2.021420646818487e-06, - "loss": 1.0161, - "step": 6809 - }, - { - "epoch": 0.511799188336089, - "grad_norm": 1.5907001110397014, - "learning_rate": 2.0209338319223155e-06, - "loss": 1.0562, - "step": 6810 - }, - { - "epoch": 0.511874342401924, - "grad_norm": 1.9271327106920115, - "learning_rate": 2.0204470157857354e-06, - "loss": 0.8965, - "step": 6811 - }, - { - "epoch": 0.5119494964677589, - "grad_norm": 0.7002953493809003, - "learning_rate": 2.0199601984375907e-06, - "loss": 0.8615, - "step": 6812 - }, - { - "epoch": 0.5120246505335939, - "grad_norm": 1.5311680985949208, - "learning_rate": 2.0194733799067284e-06, - "loss": 0.952, - "step": 6813 - }, - { - "epoch": 0.5120998045994288, - "grad_norm": 2.0018380778680482, - "learning_rate": 2.0189865602219934e-06, - "loss": 0.9787, - "step": 6814 - }, - { - "epoch": 0.5121749586652637, - "grad_norm": 1.5438064039791126, - "learning_rate": 2.0184997394122317e-06, - "loss": 1.0729, - "step": 6815 - }, - { - "epoch": 0.5122501127310988, - "grad_norm": 1.8083741155616821, - "learning_rate": 2.01801291750629e-06, - "loss": 1.0626, - "step": 6816 - }, - { - "epoch": 0.5123252667969337, - "grad_norm": 2.075254135195729, - "learning_rate": 2.0175260945330134e-06, - "loss": 0.9296, - "step": 6817 - }, - { - "epoch": 0.5124004208627687, - "grad_norm": 1.4974736555581123, - "learning_rate": 2.0170392705212495e-06, - "loss": 0.9753, - "step": 6818 - }, - { - "epoch": 0.5124755749286036, - "grad_norm": 1.559759865398363, - "learning_rate": 2.016552445499843e-06, - "loss": 0.9301, - "step": 6819 - }, - { - "epoch": 0.5125507289944387, - "grad_norm": 1.4962912645760702, - "learning_rate": 2.0160656194976407e-06, - "loss": 1.0187, - "step": 6820 - }, - { - "epoch": 0.5126258830602736, - "grad_norm": 1.4630830890269795, - "learning_rate": 2.0155787925434893e-06, - "loss": 1.1034, - "step": 6821 - }, - { - "epoch": 0.5127010371261085, - "grad_norm": 1.4773590522832976, - "learning_rate": 2.0150919646662342e-06, - "loss": 0.9453, - "step": 6822 - }, - { - "epoch": 0.5127761911919435, - "grad_norm": 1.525494873548388, - "learning_rate": 2.014605135894723e-06, - "loss": 0.9917, - "step": 6823 - }, - { - "epoch": 0.5128513452577784, - "grad_norm": 1.5548760381272395, - "learning_rate": 2.0141183062578013e-06, - "loss": 0.9195, - "step": 6824 - }, - { - "epoch": 0.5129264993236134, - "grad_norm": 1.9675052280186915, - "learning_rate": 2.013631475784316e-06, - "loss": 0.8642, - "step": 6825 - }, - { - "epoch": 0.5130016533894484, - "grad_norm": 1.6415075925383604, - "learning_rate": 2.0131446445031134e-06, - "loss": 0.9543, - "step": 6826 - }, - { - "epoch": 0.5130768074552833, - "grad_norm": 0.6062916731521676, - "learning_rate": 2.0126578124430402e-06, - "loss": 0.7667, - "step": 6827 - }, - { - "epoch": 0.5131519615211183, - "grad_norm": 1.4586536691430618, - "learning_rate": 2.012170979632944e-06, - "loss": 1.071, - "step": 6828 - }, - { - "epoch": 0.5132271155869532, - "grad_norm": 1.6970477386845273, - "learning_rate": 2.0116841461016685e-06, - "loss": 1.0134, - "step": 6829 - }, - { - "epoch": 0.5133022696527882, - "grad_norm": 1.8084846611986094, - "learning_rate": 2.0111973118780653e-06, - "loss": 0.9931, - "step": 6830 - }, - { - "epoch": 0.5133774237186232, - "grad_norm": 1.7608581606331288, - "learning_rate": 2.0107104769909773e-06, - "loss": 1.0141, - "step": 6831 - }, - { - "epoch": 0.5134525777844582, - "grad_norm": 2.8949559115147605, - "learning_rate": 2.0102236414692515e-06, - "loss": 0.8631, - "step": 6832 - }, - { - "epoch": 0.5135277318502931, - "grad_norm": 1.3689429647910207, - "learning_rate": 2.009736805341737e-06, - "loss": 0.9608, - "step": 6833 - }, - { - "epoch": 0.513602885916128, - "grad_norm": 1.8374388704057687, - "learning_rate": 2.0092499686372794e-06, - "loss": 0.8929, - "step": 6834 - }, - { - "epoch": 0.513678039981963, - "grad_norm": 1.4653155978335426, - "learning_rate": 2.0087631313847252e-06, - "loss": 0.9046, - "step": 6835 - }, - { - "epoch": 0.513753194047798, - "grad_norm": 1.5484938194587596, - "learning_rate": 2.0082762936129226e-06, - "loss": 0.9675, - "step": 6836 - }, - { - "epoch": 0.513828348113633, - "grad_norm": 1.3948114364776543, - "learning_rate": 2.0077894553507174e-06, - "loss": 0.8967, - "step": 6837 - }, - { - "epoch": 0.5139035021794679, - "grad_norm": 1.702856550145709, - "learning_rate": 2.0073026166269577e-06, - "loss": 0.9222, - "step": 6838 - }, - { - "epoch": 0.5139786562453029, - "grad_norm": 2.0632674205566444, - "learning_rate": 2.006815777470489e-06, - "loss": 1.071, - "step": 6839 - }, - { - "epoch": 0.5140538103111378, - "grad_norm": 1.572127853896881, - "learning_rate": 2.0063289379101606e-06, - "loss": 0.9533, - "step": 6840 - }, - { - "epoch": 0.5141289643769728, - "grad_norm": 1.489388041821366, - "learning_rate": 2.0058420979748172e-06, - "loss": 1.0861, - "step": 6841 - }, - { - "epoch": 0.5142041184428078, - "grad_norm": 1.4442122111739433, - "learning_rate": 2.005355257693308e-06, - "loss": 0.9413, - "step": 6842 - }, - { - "epoch": 0.5142792725086427, - "grad_norm": 2.6574189250704814, - "learning_rate": 2.0048684170944795e-06, - "loss": 1.1076, - "step": 6843 - }, - { - "epoch": 0.5143544265744777, - "grad_norm": 1.837119318655147, - "learning_rate": 2.004381576207178e-06, - "loss": 1.0729, - "step": 6844 - }, - { - "epoch": 0.5144295806403126, - "grad_norm": 1.9828962688621983, - "learning_rate": 2.0038947350602516e-06, - "loss": 0.9675, - "step": 6845 - }, - { - "epoch": 0.5145047347061475, - "grad_norm": 1.6491420740674816, - "learning_rate": 2.0034078936825467e-06, - "loss": 0.9724, - "step": 6846 - }, - { - "epoch": 0.5145798887719826, - "grad_norm": 1.6134579910375024, - "learning_rate": 2.002921052102912e-06, - "loss": 0.9498, - "step": 6847 - }, - { - "epoch": 0.5146550428378175, - "grad_norm": 1.5757382918002614, - "learning_rate": 2.0024342103501934e-06, - "loss": 0.9772, - "step": 6848 - }, - { - "epoch": 0.5147301969036525, - "grad_norm": 1.545875766031244, - "learning_rate": 2.001947368453238e-06, - "loss": 1.0222, - "step": 6849 - }, - { - "epoch": 0.5148053509694874, - "grad_norm": 0.751430815582509, - "learning_rate": 2.001460526440894e-06, - "loss": 0.9269, - "step": 6850 - }, - { - "epoch": 0.5148805050353225, - "grad_norm": 2.4850992550590645, - "learning_rate": 2.0009736843420076e-06, - "loss": 1.0582, - "step": 6851 - }, - { - "epoch": 0.5149556591011574, - "grad_norm": 1.5200603123042857, - "learning_rate": 2.0004868421854274e-06, - "loss": 0.9425, - "step": 6852 - }, - { - "epoch": 0.5150308131669923, - "grad_norm": 1.860736252467049, - "learning_rate": 2.0000000000000003e-06, - "loss": 0.9947, - "step": 6853 - }, - { - "epoch": 0.5151059672328273, - "grad_norm": 1.9861753211797852, - "learning_rate": 1.999513157814572e-06, - "loss": 1.0043, - "step": 6854 - }, - { - "epoch": 0.5151811212986622, - "grad_norm": 1.5227829529611971, - "learning_rate": 1.9990263156579922e-06, - "loss": 1.0603, - "step": 6855 - }, - { - "epoch": 0.5152562753644973, - "grad_norm": 1.816377963594436, - "learning_rate": 1.998539473559106e-06, - "loss": 0.8729, - "step": 6856 - }, - { - "epoch": 0.5153314294303322, - "grad_norm": 38.46395177224384, - "learning_rate": 1.998052631546762e-06, - "loss": 0.8067, - "step": 6857 - }, - { - "epoch": 0.5154065834961672, - "grad_norm": 1.6358839286769822, - "learning_rate": 1.9975657896498073e-06, - "loss": 1.0174, - "step": 6858 - }, - { - "epoch": 0.5154817375620021, - "grad_norm": 1.63330859081186, - "learning_rate": 1.9970789478970882e-06, - "loss": 1.0632, - "step": 6859 - }, - { - "epoch": 0.515556891627837, - "grad_norm": 1.4723743604028259, - "learning_rate": 1.996592106317453e-06, - "loss": 0.9802, - "step": 6860 - }, - { - "epoch": 0.515632045693672, - "grad_norm": 1.5569084827278987, - "learning_rate": 1.9961052649397486e-06, - "loss": 0.8854, - "step": 6861 - }, - { - "epoch": 0.515707199759507, - "grad_norm": 3.702779890630421, - "learning_rate": 1.995618423792822e-06, - "loss": 0.9441, - "step": 6862 - }, - { - "epoch": 0.515782353825342, - "grad_norm": 1.8585489185084727, - "learning_rate": 1.9951315829055208e-06, - "loss": 1.0165, - "step": 6863 - }, - { - "epoch": 0.5158575078911769, - "grad_norm": 1.4361233961382103, - "learning_rate": 1.994644742306692e-06, - "loss": 0.9774, - "step": 6864 - }, - { - "epoch": 0.5159326619570119, - "grad_norm": 1.612012535164799, - "learning_rate": 1.9941579020251826e-06, - "loss": 0.9389, - "step": 6865 - }, - { - "epoch": 0.5160078160228468, - "grad_norm": 3.728679319677966, - "learning_rate": 1.9936710620898396e-06, - "loss": 0.9442, - "step": 6866 - }, - { - "epoch": 0.5160829700886818, - "grad_norm": 1.801273369726867, - "learning_rate": 1.9931842225295108e-06, - "loss": 0.9791, - "step": 6867 - }, - { - "epoch": 0.5161581241545168, - "grad_norm": 2.084159178762955, - "learning_rate": 1.9926973833730426e-06, - "loss": 1.0806, - "step": 6868 - }, - { - "epoch": 0.5162332782203517, - "grad_norm": 2.044517382354992, - "learning_rate": 1.9922105446492824e-06, - "loss": 0.983, - "step": 6869 - }, - { - "epoch": 0.5163084322861867, - "grad_norm": 1.5723520575039218, - "learning_rate": 1.9917237063870777e-06, - "loss": 1.0662, - "step": 6870 - }, - { - "epoch": 0.5163835863520216, - "grad_norm": 1.3841364067225657, - "learning_rate": 1.9912368686152746e-06, - "loss": 1.0022, - "step": 6871 - }, - { - "epoch": 0.5164587404178566, - "grad_norm": 1.7999096662318497, - "learning_rate": 1.990750031362721e-06, - "loss": 1.0345, - "step": 6872 - }, - { - "epoch": 0.5165338944836916, - "grad_norm": 1.442057640701378, - "learning_rate": 1.9902631946582627e-06, - "loss": 0.9773, - "step": 6873 - }, - { - "epoch": 0.5166090485495265, - "grad_norm": 1.8925780620122217, - "learning_rate": 1.9897763585307483e-06, - "loss": 0.9842, - "step": 6874 - }, - { - "epoch": 0.5166842026153615, - "grad_norm": 1.7273026342860063, - "learning_rate": 1.989289523009024e-06, - "loss": 1.0453, - "step": 6875 - }, - { - "epoch": 0.5167593566811964, - "grad_norm": 1.7855698922225354, - "learning_rate": 1.988802688121935e-06, - "loss": 0.9504, - "step": 6876 - }, - { - "epoch": 0.5168345107470315, - "grad_norm": 1.4773307295396447, - "learning_rate": 1.988315853898331e-06, - "loss": 0.9373, - "step": 6877 - }, - { - "epoch": 0.5169096648128664, - "grad_norm": 1.7170693243180837, - "learning_rate": 1.9878290203670563e-06, - "loss": 1.0532, - "step": 6878 - }, - { - "epoch": 0.5169848188787013, - "grad_norm": 2.161858713839157, - "learning_rate": 1.98734218755696e-06, - "loss": 1.0125, - "step": 6879 - }, - { - "epoch": 0.5170599729445363, - "grad_norm": 1.4195810614723423, - "learning_rate": 1.986855355496887e-06, - "loss": 1.0841, - "step": 6880 - }, - { - "epoch": 0.5171351270103712, - "grad_norm": 1.8410528466152598, - "learning_rate": 1.986368524215684e-06, - "loss": 1.0041, - "step": 6881 - }, - { - "epoch": 0.5172102810762063, - "grad_norm": 1.441420144114946, - "learning_rate": 1.985881693742199e-06, - "loss": 0.983, - "step": 6882 - }, - { - "epoch": 0.5172854351420412, - "grad_norm": 1.59263324880743, - "learning_rate": 1.985394864105277e-06, - "loss": 0.9408, - "step": 6883 - }, - { - "epoch": 0.5173605892078762, - "grad_norm": 0.8295325068013235, - "learning_rate": 1.9849080353337656e-06, - "loss": 0.858, - "step": 6884 - }, - { - "epoch": 0.5174357432737111, - "grad_norm": 7.137665583999692, - "learning_rate": 1.984421207456511e-06, - "loss": 0.8251, - "step": 6885 - }, - { - "epoch": 0.517510897339546, - "grad_norm": 1.772417825642186, - "learning_rate": 1.983934380502359e-06, - "loss": 0.9586, - "step": 6886 - }, - { - "epoch": 0.517586051405381, - "grad_norm": 1.6149337470339022, - "learning_rate": 1.983447554500157e-06, - "loss": 0.879, - "step": 6887 - }, - { - "epoch": 0.517661205471216, - "grad_norm": 1.5989523109877637, - "learning_rate": 1.9829607294787503e-06, - "loss": 0.9522, - "step": 6888 - }, - { - "epoch": 0.517736359537051, - "grad_norm": 1.8052550905412368, - "learning_rate": 1.9824739054669864e-06, - "loss": 0.8708, - "step": 6889 - }, - { - "epoch": 0.5178115136028859, - "grad_norm": 2.686431583677142, - "learning_rate": 1.98198708249371e-06, - "loss": 0.9939, - "step": 6890 - }, - { - "epoch": 0.5178866676687208, - "grad_norm": 1.339220930984959, - "learning_rate": 1.9815002605877685e-06, - "loss": 0.9305, - "step": 6891 - }, - { - "epoch": 0.5179618217345558, - "grad_norm": 2.011310543384266, - "learning_rate": 1.9810134397780073e-06, - "loss": 0.7537, - "step": 6892 - }, - { - "epoch": 0.5180369758003908, - "grad_norm": 1.860305553277847, - "learning_rate": 1.980526620093272e-06, - "loss": 1.0527, - "step": 6893 - }, - { - "epoch": 0.5181121298662258, - "grad_norm": 1.8960680833736774, - "learning_rate": 1.9800398015624095e-06, - "loss": 0.9811, - "step": 6894 - }, - { - "epoch": 0.5181872839320607, - "grad_norm": 1.429730023861705, - "learning_rate": 1.9795529842142644e-06, - "loss": 1.056, - "step": 6895 - }, - { - "epoch": 0.5182624379978957, - "grad_norm": 1.6502509966591408, - "learning_rate": 1.979066168077684e-06, - "loss": 0.9311, - "step": 6896 - }, - { - "epoch": 0.5183375920637306, - "grad_norm": 1.850074910889643, - "learning_rate": 1.978579353181513e-06, - "loss": 0.9267, - "step": 6897 - }, - { - "epoch": 0.5184127461295656, - "grad_norm": 1.9269531893387, - "learning_rate": 1.9780925395545977e-06, - "loss": 0.9877, - "step": 6898 - }, - { - "epoch": 0.5184879001954006, - "grad_norm": 3.1550607990385675, - "learning_rate": 1.977605727225783e-06, - "loss": 1.0328, - "step": 6899 - }, - { - "epoch": 0.5185630542612355, - "grad_norm": 1.9652201176404664, - "learning_rate": 1.977118916223915e-06, - "loss": 1.0213, - "step": 6900 - }, - { - "epoch": 0.5186382083270705, - "grad_norm": 1.4491811577787344, - "learning_rate": 1.9766321065778387e-06, - "loss": 1.0215, - "step": 6901 - }, - { - "epoch": 0.5187133623929054, - "grad_norm": 2.2534210899389273, - "learning_rate": 1.9761452983163996e-06, - "loss": 1.0059, - "step": 6902 - }, - { - "epoch": 0.5187885164587405, - "grad_norm": 1.652893768464783, - "learning_rate": 1.9756584914684425e-06, - "loss": 1.029, - "step": 6903 - }, - { - "epoch": 0.5188636705245754, - "grad_norm": 1.7270707983184055, - "learning_rate": 1.975171686062814e-06, - "loss": 1.016, - "step": 6904 - }, - { - "epoch": 0.5189388245904103, - "grad_norm": 1.5623103980422497, - "learning_rate": 1.974684882128357e-06, - "loss": 1.0036, - "step": 6905 - }, - { - "epoch": 0.5190139786562453, - "grad_norm": 1.5351741100563336, - "learning_rate": 1.974198079693918e-06, - "loss": 1.0652, - "step": 6906 - }, - { - "epoch": 0.5190891327220802, - "grad_norm": 1.6385221537959822, - "learning_rate": 1.973711278788342e-06, - "loss": 0.9459, - "step": 6907 - }, - { - "epoch": 0.5191642867879153, - "grad_norm": 1.6099374029181064, - "learning_rate": 1.973224479440473e-06, - "loss": 0.8948, - "step": 6908 - }, - { - "epoch": 0.5192394408537502, - "grad_norm": 1.4828058883792932, - "learning_rate": 1.972737681679157e-06, - "loss": 0.9097, - "step": 6909 - }, - { - "epoch": 0.5193145949195852, - "grad_norm": 3.026052364433319, - "learning_rate": 1.9722508855332367e-06, - "loss": 0.9718, - "step": 6910 - }, - { - "epoch": 0.5193897489854201, - "grad_norm": 1.6571765683061932, - "learning_rate": 1.971764091031558e-06, - "loss": 0.9381, - "step": 6911 - }, - { - "epoch": 0.519464903051255, - "grad_norm": 1.4489301615247279, - "learning_rate": 1.971277298202965e-06, - "loss": 0.9755, - "step": 6912 - }, - { - "epoch": 0.5195400571170901, - "grad_norm": 1.52808379759366, - "learning_rate": 1.9707905070763027e-06, - "loss": 0.908, - "step": 6913 - }, - { - "epoch": 0.519615211182925, - "grad_norm": 1.6976386780757642, - "learning_rate": 1.970303717680414e-06, - "loss": 0.9939, - "step": 6914 - }, - { - "epoch": 0.51969036524876, - "grad_norm": 1.6649061759655006, - "learning_rate": 1.9698169300441435e-06, - "loss": 1.0142, - "step": 6915 - }, - { - "epoch": 0.5197655193145949, - "grad_norm": 1.9893939608845714, - "learning_rate": 1.9693301441963363e-06, - "loss": 0.993, - "step": 6916 - }, - { - "epoch": 0.5198406733804298, - "grad_norm": 0.7109574535210691, - "learning_rate": 1.9688433601658345e-06, - "loss": 0.8583, - "step": 6917 - }, - { - "epoch": 0.5199158274462649, - "grad_norm": 5.979663717453359, - "learning_rate": 1.9683565779814838e-06, - "loss": 1.0131, - "step": 6918 - }, - { - "epoch": 0.5199909815120998, - "grad_norm": 1.8886660050120454, - "learning_rate": 1.967869797672127e-06, - "loss": 1.0323, - "step": 6919 - }, - { - "epoch": 0.5200661355779348, - "grad_norm": 1.9645204069456825, - "learning_rate": 1.967383019266607e-06, - "loss": 1.0028, - "step": 6920 - }, - { - "epoch": 0.5201412896437697, - "grad_norm": 1.6289973610844322, - "learning_rate": 1.966896242793769e-06, - "loss": 0.9823, - "step": 6921 - }, - { - "epoch": 0.5202164437096047, - "grad_norm": 1.8634649222194024, - "learning_rate": 1.9664094682824545e-06, - "loss": 1.0038, - "step": 6922 - }, - { - "epoch": 0.5202915977754397, - "grad_norm": 1.8484679812113265, - "learning_rate": 1.965922695761508e-06, - "loss": 0.9726, - "step": 6923 - }, - { - "epoch": 0.5203667518412746, - "grad_norm": 2.164951073179594, - "learning_rate": 1.9654359252597723e-06, - "loss": 1.0451, - "step": 6924 - }, - { - "epoch": 0.5204419059071096, - "grad_norm": 1.5367858623827388, - "learning_rate": 1.96494915680609e-06, - "loss": 0.8854, - "step": 6925 - }, - { - "epoch": 0.5205170599729445, - "grad_norm": 1.677352630918647, - "learning_rate": 1.964462390429305e-06, - "loss": 0.8949, - "step": 6926 - }, - { - "epoch": 0.5205922140387795, - "grad_norm": 1.7690885886981085, - "learning_rate": 1.963975626158259e-06, - "loss": 1.0148, - "step": 6927 - }, - { - "epoch": 0.5206673681046144, - "grad_norm": 2.0073392436458795, - "learning_rate": 1.963488864021795e-06, - "loss": 1.0813, - "step": 6928 - }, - { - "epoch": 0.5207425221704495, - "grad_norm": 2.103644831947516, - "learning_rate": 1.9630021040487557e-06, - "loss": 0.9523, - "step": 6929 - }, - { - "epoch": 0.5208176762362844, - "grad_norm": 11.964835460534184, - "learning_rate": 1.962515346267984e-06, - "loss": 1.0314, - "step": 6930 - }, - { - "epoch": 0.5208928303021193, - "grad_norm": 1.3639734909888015, - "learning_rate": 1.9620285907083213e-06, - "loss": 1.0299, - "step": 6931 - }, - { - "epoch": 0.5209679843679543, - "grad_norm": 1.6154463235715448, - "learning_rate": 1.9615418373986097e-06, - "loss": 1.0082, - "step": 6932 - }, - { - "epoch": 0.5210431384337892, - "grad_norm": 1.6565652223430458, - "learning_rate": 1.961055086367692e-06, - "loss": 1.0246, - "step": 6933 - }, - { - "epoch": 0.5211182924996243, - "grad_norm": 1.507124986200395, - "learning_rate": 1.960568337644409e-06, - "loss": 0.934, - "step": 6934 - }, - { - "epoch": 0.5211934465654592, - "grad_norm": 1.6886348440578152, - "learning_rate": 1.9600815912576034e-06, - "loss": 0.9182, - "step": 6935 - }, - { - "epoch": 0.5212686006312941, - "grad_norm": 1.6773064348452524, - "learning_rate": 1.9595948472361163e-06, - "loss": 0.9712, - "step": 6936 - }, - { - "epoch": 0.5213437546971291, - "grad_norm": 1.7726977986106303, - "learning_rate": 1.959108105608788e-06, - "loss": 0.9901, - "step": 6937 - }, - { - "epoch": 0.521418908762964, - "grad_norm": 0.7059586962269843, - "learning_rate": 1.958621366404463e-06, - "loss": 0.8389, - "step": 6938 - }, - { - "epoch": 0.5214940628287991, - "grad_norm": 2.7316136831653917, - "learning_rate": 1.958134629651979e-06, - "loss": 1.0691, - "step": 6939 - }, - { - "epoch": 0.521569216894634, - "grad_norm": 1.7574878986316436, - "learning_rate": 1.957647895380179e-06, - "loss": 0.9651, - "step": 6940 - }, - { - "epoch": 0.521644370960469, - "grad_norm": 1.4969757906243688, - "learning_rate": 1.9571611636179037e-06, - "loss": 0.9891, - "step": 6941 - }, - { - "epoch": 0.5217195250263039, - "grad_norm": 1.6601687442084996, - "learning_rate": 1.956674434393993e-06, - "loss": 1.0472, - "step": 6942 - }, - { - "epoch": 0.5217946790921388, - "grad_norm": 1.6293542918128339, - "learning_rate": 1.9561877077372884e-06, - "loss": 0.9675, - "step": 6943 - }, - { - "epoch": 0.5218698331579739, - "grad_norm": 1.4511139602707808, - "learning_rate": 1.9557009836766294e-06, - "loss": 0.8927, - "step": 6944 - }, - { - "epoch": 0.5219449872238088, - "grad_norm": 1.6479909572260634, - "learning_rate": 1.9552142622408574e-06, - "loss": 1.0628, - "step": 6945 - }, - { - "epoch": 0.5220201412896438, - "grad_norm": 1.8797087987642809, - "learning_rate": 1.954727543458812e-06, - "loss": 0.9125, - "step": 6946 - }, - { - "epoch": 0.5220952953554787, - "grad_norm": 2.021625075304241, - "learning_rate": 1.9542408273593324e-06, - "loss": 0.9945, - "step": 6947 - }, - { - "epoch": 0.5221704494213137, - "grad_norm": 2.0283896627678413, - "learning_rate": 1.9537541139712594e-06, - "loss": 1.0971, - "step": 6948 - }, - { - "epoch": 0.5222456034871487, - "grad_norm": 1.587918176916374, - "learning_rate": 1.9532674033234315e-06, - "loss": 1.036, - "step": 6949 - }, - { - "epoch": 0.5223207575529836, - "grad_norm": 1.76324160239317, - "learning_rate": 1.95278069544469e-06, - "loss": 0.9809, - "step": 6950 - }, - { - "epoch": 0.5223959116188186, - "grad_norm": 2.809747238873312, - "learning_rate": 1.952293990363872e-06, - "loss": 1.0742, - "step": 6951 - }, - { - "epoch": 0.5224710656846535, - "grad_norm": 1.673393771813931, - "learning_rate": 1.9518072881098185e-06, - "loss": 0.9759, - "step": 6952 - }, - { - "epoch": 0.5225462197504885, - "grad_norm": 0.8992503925393684, - "learning_rate": 1.9513205887113675e-06, - "loss": 0.9437, - "step": 6953 - }, - { - "epoch": 0.5226213738163235, - "grad_norm": 1.57257645656132, - "learning_rate": 1.9508338921973576e-06, - "loss": 0.8931, - "step": 6954 - }, - { - "epoch": 0.5226965278821585, - "grad_norm": 3.149401647262351, - "learning_rate": 1.9503471985966285e-06, - "loss": 1.0331, - "step": 6955 - }, - { - "epoch": 0.5227716819479934, - "grad_norm": 1.6605754726893436, - "learning_rate": 1.9498605079380176e-06, - "loss": 0.9354, - "step": 6956 - }, - { - "epoch": 0.5228468360138283, - "grad_norm": 1.4731884710518999, - "learning_rate": 1.949373820250364e-06, - "loss": 0.9621, - "step": 6957 - }, - { - "epoch": 0.5229219900796633, - "grad_norm": 1.7874050541984174, - "learning_rate": 1.9488871355625054e-06, - "loss": 0.999, - "step": 6958 - }, - { - "epoch": 0.5229971441454982, - "grad_norm": 1.4903877149412426, - "learning_rate": 1.9484004539032786e-06, - "loss": 1.001, - "step": 6959 - }, - { - "epoch": 0.5230722982113333, - "grad_norm": 1.6155460891784441, - "learning_rate": 1.947913775301524e-06, - "loss": 0.9768, - "step": 6960 - }, - { - "epoch": 0.5231474522771682, - "grad_norm": 1.5933779861997994, - "learning_rate": 1.9474270997860766e-06, - "loss": 0.9693, - "step": 6961 - }, - { - "epoch": 0.5232226063430031, - "grad_norm": 2.0489117920759212, - "learning_rate": 1.946940427385776e-06, - "loss": 1.0347, - "step": 6962 - }, - { - "epoch": 0.5232977604088381, - "grad_norm": 2.0625907415600384, - "learning_rate": 1.9464537581294576e-06, - "loss": 0.9448, - "step": 6963 - }, - { - "epoch": 0.523372914474673, - "grad_norm": 0.6499084655794388, - "learning_rate": 1.945967092045959e-06, - "loss": 0.8011, - "step": 6964 - }, - { - "epoch": 0.5234480685405081, - "grad_norm": 2.2075606259826515, - "learning_rate": 1.945480429164117e-06, - "loss": 0.936, - "step": 6965 - }, - { - "epoch": 0.523523222606343, - "grad_norm": 1.4569506584562661, - "learning_rate": 1.944993769512768e-06, - "loss": 0.9558, - "step": 6966 - }, - { - "epoch": 0.523598376672178, - "grad_norm": 3.106400643884679, - "learning_rate": 1.9445071131207497e-06, - "loss": 0.9892, - "step": 6967 - }, - { - "epoch": 0.5236735307380129, - "grad_norm": 2.0922477229184926, - "learning_rate": 1.9440204600168975e-06, - "loss": 1.0058, - "step": 6968 - }, - { - "epoch": 0.5237486848038478, - "grad_norm": 1.5795189772308444, - "learning_rate": 1.9435338102300467e-06, - "loss": 1.0083, - "step": 6969 - }, - { - "epoch": 0.5238238388696829, - "grad_norm": 2.8945557957482366, - "learning_rate": 1.943047163789034e-06, - "loss": 1.002, - "step": 6970 - }, - { - "epoch": 0.5238989929355178, - "grad_norm": 1.7910285375164983, - "learning_rate": 1.942560520722695e-06, - "loss": 0.8933, - "step": 6971 - }, - { - "epoch": 0.5239741470013528, - "grad_norm": 1.440376223848911, - "learning_rate": 1.9420738810598653e-06, - "loss": 0.9679, - "step": 6972 - }, - { - "epoch": 0.5240493010671877, - "grad_norm": 1.5378096074939696, - "learning_rate": 1.9415872448293796e-06, - "loss": 1.0688, - "step": 6973 - }, - { - "epoch": 0.5241244551330227, - "grad_norm": 1.596931871569762, - "learning_rate": 1.9411006120600737e-06, - "loss": 0.9541, - "step": 6974 - }, - { - "epoch": 0.5241996091988577, - "grad_norm": 2.6629785922454516, - "learning_rate": 1.940613982780782e-06, - "loss": 0.9551, - "step": 6975 - }, - { - "epoch": 0.5242747632646926, - "grad_norm": 1.81859012700741, - "learning_rate": 1.940127357020339e-06, - "loss": 0.9361, - "step": 6976 - }, - { - "epoch": 0.5243499173305276, - "grad_norm": 2.3311079743922516, - "learning_rate": 1.9396407348075796e-06, - "loss": 1.0854, - "step": 6977 - }, - { - "epoch": 0.5244250713963625, - "grad_norm": 1.813319908693118, - "learning_rate": 1.939154116171337e-06, - "loss": 1.028, - "step": 6978 - }, - { - "epoch": 0.5245002254621975, - "grad_norm": 2.282121988103442, - "learning_rate": 1.9386675011404473e-06, - "loss": 0.9693, - "step": 6979 - }, - { - "epoch": 0.5245753795280325, - "grad_norm": 4.030661843146709, - "learning_rate": 1.9381808897437427e-06, - "loss": 1.0074, - "step": 6980 - }, - { - "epoch": 0.5246505335938674, - "grad_norm": 1.6354518165779586, - "learning_rate": 1.9376942820100563e-06, - "loss": 0.9123, - "step": 6981 - }, - { - "epoch": 0.5247256876597024, - "grad_norm": 1.582709800348292, - "learning_rate": 1.937207677968223e-06, - "loss": 0.9603, - "step": 6982 - }, - { - "epoch": 0.5248008417255373, - "grad_norm": 4.204481168438259, - "learning_rate": 1.9367210776470744e-06, - "loss": 1.0158, - "step": 6983 - }, - { - "epoch": 0.5248759957913723, - "grad_norm": 1.8012251234487646, - "learning_rate": 1.9362344810754455e-06, - "loss": 0.9621, - "step": 6984 - }, - { - "epoch": 0.5249511498572073, - "grad_norm": 1.4382735948913186, - "learning_rate": 1.935747888282167e-06, - "loss": 0.97, - "step": 6985 - }, - { - "epoch": 0.5250263039230423, - "grad_norm": 1.4478414319295185, - "learning_rate": 1.935261299296072e-06, - "loss": 0.9871, - "step": 6986 - }, - { - "epoch": 0.5251014579888772, - "grad_norm": 1.6913378836049684, - "learning_rate": 1.9347747141459934e-06, - "loss": 0.9902, - "step": 6987 - }, - { - "epoch": 0.5251766120547121, - "grad_norm": 1.7124379863787749, - "learning_rate": 1.9342881328607625e-06, - "loss": 0.9516, - "step": 6988 - }, - { - "epoch": 0.5252517661205471, - "grad_norm": 2.0170881298596264, - "learning_rate": 1.9338015554692116e-06, - "loss": 0.9306, - "step": 6989 - }, - { - "epoch": 0.525326920186382, - "grad_norm": 1.9120420418749848, - "learning_rate": 1.933314982000172e-06, - "loss": 0.9908, - "step": 6990 - }, - { - "epoch": 0.5254020742522171, - "grad_norm": 1.195219048730245, - "learning_rate": 1.932828412482475e-06, - "loss": 1.0159, - "step": 6991 - }, - { - "epoch": 0.525477228318052, - "grad_norm": 1.785963147397248, - "learning_rate": 1.9323418469449517e-06, - "loss": 0.9937, - "step": 6992 - }, - { - "epoch": 0.525552382383887, - "grad_norm": 1.369398672298848, - "learning_rate": 1.931855285416433e-06, - "loss": 1.0334, - "step": 6993 - }, - { - "epoch": 0.5256275364497219, - "grad_norm": 4.817896010033942, - "learning_rate": 1.93136872792575e-06, - "loss": 0.9451, - "step": 6994 - }, - { - "epoch": 0.5257026905155568, - "grad_norm": 1.626658840056388, - "learning_rate": 1.930882174501732e-06, - "loss": 0.9092, - "step": 6995 - }, - { - "epoch": 0.5257778445813919, - "grad_norm": 3.861307749021542, - "learning_rate": 1.93039562517321e-06, - "loss": 0.9602, - "step": 6996 - }, - { - "epoch": 0.5258529986472268, - "grad_norm": 1.7992296133239336, - "learning_rate": 1.929909079969014e-06, - "loss": 0.9526, - "step": 6997 - }, - { - "epoch": 0.5259281527130618, - "grad_norm": 0.662582925788787, - "learning_rate": 1.929422538917973e-06, - "loss": 0.8234, - "step": 6998 - }, - { - "epoch": 0.5260033067788967, - "grad_norm": 1.6161298011167349, - "learning_rate": 1.9289360020489177e-06, - "loss": 0.9291, - "step": 6999 - }, - { - "epoch": 0.5260784608447318, - "grad_norm": 1.5408318897169717, - "learning_rate": 1.928449469390675e-06, - "loss": 1.0481, - "step": 7000 - }, - { - "epoch": 0.5261536149105667, - "grad_norm": 1.3841002766493917, - "learning_rate": 1.927962940972077e-06, - "loss": 0.961, - "step": 7001 - }, - { - "epoch": 0.5262287689764016, - "grad_norm": 1.5898106826979994, - "learning_rate": 1.9274764168219495e-06, - "loss": 0.9397, - "step": 7002 - }, - { - "epoch": 0.5263039230422366, - "grad_norm": 1.200439130384655, - "learning_rate": 1.9269898969691214e-06, - "loss": 0.9492, - "step": 7003 - }, - { - "epoch": 0.5263790771080715, - "grad_norm": 2.130571018921618, - "learning_rate": 1.9265033814424227e-06, - "loss": 0.9793, - "step": 7004 - }, - { - "epoch": 0.5264542311739066, - "grad_norm": 1.888360361723405, - "learning_rate": 1.9260168702706794e-06, - "loss": 0.9467, - "step": 7005 - }, - { - "epoch": 0.5265293852397415, - "grad_norm": 1.4448696798409304, - "learning_rate": 1.9255303634827204e-06, - "loss": 1.0269, - "step": 7006 - }, - { - "epoch": 0.5266045393055764, - "grad_norm": 1.950153317904295, - "learning_rate": 1.9250438611073724e-06, - "loss": 0.8464, - "step": 7007 - }, - { - "epoch": 0.5266796933714114, - "grad_norm": 1.568051508690014, - "learning_rate": 1.924557363173462e-06, - "loss": 0.9705, - "step": 7008 - }, - { - "epoch": 0.5267548474372463, - "grad_norm": 5.748159362749985, - "learning_rate": 1.9240708697098174e-06, - "loss": 1.0438, - "step": 7009 - }, - { - "epoch": 0.5268300015030813, - "grad_norm": 1.5748379154666854, - "learning_rate": 1.9235843807452642e-06, - "loss": 1.0283, - "step": 7010 - }, - { - "epoch": 0.5269051555689163, - "grad_norm": 1.4755251030591416, - "learning_rate": 1.923097896308629e-06, - "loss": 0.924, - "step": 7011 - }, - { - "epoch": 0.5269803096347513, - "grad_norm": 1.3910333269680637, - "learning_rate": 1.9226114164287384e-06, - "loss": 1.0049, - "step": 7012 - }, - { - "epoch": 0.5270554637005862, - "grad_norm": 0.7063977264247825, - "learning_rate": 1.9221249411344173e-06, - "loss": 0.7941, - "step": 7013 - }, - { - "epoch": 0.5271306177664211, - "grad_norm": 1.864770386191271, - "learning_rate": 1.921638470454492e-06, - "loss": 0.8502, - "step": 7014 - }, - { - "epoch": 0.5272057718322561, - "grad_norm": 2.1462959852496644, - "learning_rate": 1.9211520044177866e-06, - "loss": 1.0858, - "step": 7015 - }, - { - "epoch": 0.5272809258980911, - "grad_norm": 1.880854874998673, - "learning_rate": 1.9206655430531277e-06, - "loss": 0.9027, - "step": 7016 - }, - { - "epoch": 0.5273560799639261, - "grad_norm": 1.597143188769479, - "learning_rate": 1.9201790863893387e-06, - "loss": 0.9812, - "step": 7017 - }, - { - "epoch": 0.527431234029761, - "grad_norm": 1.5289160399203923, - "learning_rate": 1.919692634455245e-06, - "loss": 0.869, - "step": 7018 - }, - { - "epoch": 0.527506388095596, - "grad_norm": 1.6716376875967323, - "learning_rate": 1.91920618727967e-06, - "loss": 0.9732, - "step": 7019 - }, - { - "epoch": 0.5275815421614309, - "grad_norm": 1.8529409436173208, - "learning_rate": 1.9187197448914374e-06, - "loss": 0.9389, - "step": 7020 - }, - { - "epoch": 0.5276566962272659, - "grad_norm": 1.4843210310912676, - "learning_rate": 1.918233307319371e-06, - "loss": 0.9785, - "step": 7021 - }, - { - "epoch": 0.5277318502931009, - "grad_norm": 1.533423224523224, - "learning_rate": 1.9177468745922944e-06, - "loss": 1.0199, - "step": 7022 - }, - { - "epoch": 0.5278070043589358, - "grad_norm": 1.7738044795997514, - "learning_rate": 1.917260446739031e-06, - "loss": 0.9803, - "step": 7023 - }, - { - "epoch": 0.5278821584247708, - "grad_norm": 1.8998693396163158, - "learning_rate": 1.916774023788403e-06, - "loss": 1.0207, - "step": 7024 - }, - { - "epoch": 0.5279573124906057, - "grad_norm": 1.6363641541412768, - "learning_rate": 1.9162876057692317e-06, - "loss": 0.9641, - "step": 7025 - }, - { - "epoch": 0.5280324665564406, - "grad_norm": 1.5420255844625947, - "learning_rate": 1.9158011927103413e-06, - "loss": 0.9054, - "step": 7026 - }, - { - "epoch": 0.5281076206222757, - "grad_norm": 1.6075492799125668, - "learning_rate": 1.9153147846405525e-06, - "loss": 0.9846, - "step": 7027 - }, - { - "epoch": 0.5281827746881106, - "grad_norm": 1.5228566162683073, - "learning_rate": 1.914828381588687e-06, - "loss": 0.8982, - "step": 7028 - }, - { - "epoch": 0.5282579287539456, - "grad_norm": 2.132710772313556, - "learning_rate": 1.9143419835835663e-06, - "loss": 1.0181, - "step": 7029 - }, - { - "epoch": 0.5283330828197805, - "grad_norm": 1.5080204163138133, - "learning_rate": 1.9138555906540103e-06, - "loss": 1.0271, - "step": 7030 - }, - { - "epoch": 0.5284082368856156, - "grad_norm": 5.732098452332822, - "learning_rate": 1.9133692028288413e-06, - "loss": 0.9584, - "step": 7031 - }, - { - "epoch": 0.5284833909514505, - "grad_norm": 5.210676698578074, - "learning_rate": 1.912882820136878e-06, - "loss": 0.88, - "step": 7032 - }, - { - "epoch": 0.5285585450172854, - "grad_norm": 1.417817331011157, - "learning_rate": 1.9123964426069416e-06, - "loss": 0.8978, - "step": 7033 - }, - { - "epoch": 0.5286336990831204, - "grad_norm": 1.7632835188729796, - "learning_rate": 1.9119100702678515e-06, - "loss": 0.9556, - "step": 7034 - }, - { - "epoch": 0.5287088531489553, - "grad_norm": 1.7883153296373495, - "learning_rate": 1.9114237031484266e-06, - "loss": 0.9898, - "step": 7035 - }, - { - "epoch": 0.5287840072147904, - "grad_norm": 1.3072233397522326, - "learning_rate": 1.9109373412774867e-06, - "loss": 1.0411, - "step": 7036 - }, - { - "epoch": 0.5288591612806253, - "grad_norm": 1.4900945395361231, - "learning_rate": 1.91045098468385e-06, - "loss": 1.0762, - "step": 7037 - }, - { - "epoch": 0.5289343153464603, - "grad_norm": 2.532364023216556, - "learning_rate": 1.9099646333963363e-06, - "loss": 0.951, - "step": 7038 - }, - { - "epoch": 0.5290094694122952, - "grad_norm": 0.7355575779537789, - "learning_rate": 1.9094782874437625e-06, - "loss": 0.8376, - "step": 7039 - }, - { - "epoch": 0.5290846234781301, - "grad_norm": 1.6514047249577533, - "learning_rate": 1.9089919468549464e-06, - "loss": 0.9604, - "step": 7040 - }, - { - "epoch": 0.5291597775439651, - "grad_norm": 0.7638758706418873, - "learning_rate": 1.9085056116587068e-06, - "loss": 0.8667, - "step": 7041 - }, - { - "epoch": 0.5292349316098001, - "grad_norm": 0.7959328486018716, - "learning_rate": 1.908019281883859e-06, - "loss": 0.8661, - "step": 7042 - }, - { - "epoch": 0.5293100856756351, - "grad_norm": 2.725342048385979, - "learning_rate": 1.9075329575592217e-06, - "loss": 0.9402, - "step": 7043 - }, - { - "epoch": 0.52938523974147, - "grad_norm": 1.4992332088057503, - "learning_rate": 1.9070466387136095e-06, - "loss": 0.9928, - "step": 7044 - }, - { - "epoch": 0.529460393807305, - "grad_norm": 1.6134932334859682, - "learning_rate": 1.906560325375841e-06, - "loss": 0.958, - "step": 7045 - }, - { - "epoch": 0.52953554787314, - "grad_norm": 1.67305891953149, - "learning_rate": 1.9060740175747317e-06, - "loss": 1.0231, - "step": 7046 - }, - { - "epoch": 0.5296107019389749, - "grad_norm": 1.6530426927927653, - "learning_rate": 1.9055877153390948e-06, - "loss": 1.0611, - "step": 7047 - }, - { - "epoch": 0.5296858560048099, - "grad_norm": 2.167590848961348, - "learning_rate": 1.9051014186977485e-06, - "loss": 0.9759, - "step": 7048 - }, - { - "epoch": 0.5297610100706448, - "grad_norm": 1.8420469532966346, - "learning_rate": 1.9046151276795062e-06, - "loss": 1.0142, - "step": 7049 - }, - { - "epoch": 0.5298361641364798, - "grad_norm": 2.421169547426713, - "learning_rate": 1.904128842313183e-06, - "loss": 0.8635, - "step": 7050 - }, - { - "epoch": 0.5299113182023147, - "grad_norm": 2.2416569417674004, - "learning_rate": 1.9036425626275929e-06, - "loss": 1.0301, - "step": 7051 - }, - { - "epoch": 0.5299864722681497, - "grad_norm": 1.9141831340532622, - "learning_rate": 1.9031562886515497e-06, - "loss": 0.9615, - "step": 7052 - }, - { - "epoch": 0.5300616263339847, - "grad_norm": 2.7471107564010904, - "learning_rate": 1.9026700204138676e-06, - "loss": 0.9875, - "step": 7053 - }, - { - "epoch": 0.5301367803998196, - "grad_norm": 3.2757624197340673, - "learning_rate": 1.9021837579433593e-06, - "loss": 1.0249, - "step": 7054 - }, - { - "epoch": 0.5302119344656546, - "grad_norm": 1.3792347287135893, - "learning_rate": 1.9016975012688382e-06, - "loss": 1.0176, - "step": 7055 - }, - { - "epoch": 0.5302870885314895, - "grad_norm": 0.7794104471841191, - "learning_rate": 1.901211250419116e-06, - "loss": 0.8328, - "step": 7056 - }, - { - "epoch": 0.5303622425973246, - "grad_norm": 1.8906324402213492, - "learning_rate": 1.900725005423006e-06, - "loss": 1.0514, - "step": 7057 - }, - { - "epoch": 0.5304373966631595, - "grad_norm": 1.9023682516349172, - "learning_rate": 1.9002387663093195e-06, - "loss": 0.9823, - "step": 7058 - }, - { - "epoch": 0.5305125507289944, - "grad_norm": 1.5058030094428256, - "learning_rate": 1.899752533106868e-06, - "loss": 1.1099, - "step": 7059 - }, - { - "epoch": 0.5305877047948294, - "grad_norm": 2.5657094999609167, - "learning_rate": 1.8992663058444629e-06, - "loss": 0.8991, - "step": 7060 - }, - { - "epoch": 0.5306628588606643, - "grad_norm": 0.740588745002409, - "learning_rate": 1.8987800845509146e-06, - "loss": 0.8462, - "step": 7061 - }, - { - "epoch": 0.5307380129264994, - "grad_norm": 2.266390125384298, - "learning_rate": 1.8982938692550344e-06, - "loss": 0.8774, - "step": 7062 - }, - { - "epoch": 0.5308131669923343, - "grad_norm": 1.3761002733377656, - "learning_rate": 1.8978076599856317e-06, - "loss": 1.0376, - "step": 7063 - }, - { - "epoch": 0.5308883210581693, - "grad_norm": 1.6424619291123232, - "learning_rate": 1.897321456771516e-06, - "loss": 0.982, - "step": 7064 - }, - { - "epoch": 0.5309634751240042, - "grad_norm": 1.4645754148758487, - "learning_rate": 1.8968352596414977e-06, - "loss": 0.9644, - "step": 7065 - }, - { - "epoch": 0.5310386291898391, - "grad_norm": 1.9279433178741083, - "learning_rate": 1.8963490686243847e-06, - "loss": 0.9758, - "step": 7066 - }, - { - "epoch": 0.5311137832556742, - "grad_norm": 2.1863105810026178, - "learning_rate": 1.895862883748987e-06, - "loss": 1.0497, - "step": 7067 - }, - { - "epoch": 0.5311889373215091, - "grad_norm": 1.7062668125316378, - "learning_rate": 1.895376705044112e-06, - "loss": 1.0116, - "step": 7068 - }, - { - "epoch": 0.5312640913873441, - "grad_norm": 3.0316572407356963, - "learning_rate": 1.8948905325385675e-06, - "loss": 0.9854, - "step": 7069 - }, - { - "epoch": 0.531339245453179, - "grad_norm": 1.2723921715518052, - "learning_rate": 1.894404366261162e-06, - "loss": 0.906, - "step": 7070 - }, - { - "epoch": 0.5314143995190139, - "grad_norm": 1.5453647986531005, - "learning_rate": 1.8939182062407017e-06, - "loss": 0.9684, - "step": 7071 - }, - { - "epoch": 0.531489553584849, - "grad_norm": 0.6601813497794053, - "learning_rate": 1.8934320525059944e-06, - "loss": 0.7853, - "step": 7072 - }, - { - "epoch": 0.5315647076506839, - "grad_norm": 1.615648799993945, - "learning_rate": 1.8929459050858458e-06, - "loss": 0.9646, - "step": 7073 - }, - { - "epoch": 0.5316398617165189, - "grad_norm": 1.8862489861851541, - "learning_rate": 1.892459764009062e-06, - "loss": 0.9452, - "step": 7074 - }, - { - "epoch": 0.5317150157823538, - "grad_norm": 1.3640779719398353, - "learning_rate": 1.8919736293044495e-06, - "loss": 0.9135, - "step": 7075 - }, - { - "epoch": 0.5317901698481888, - "grad_norm": 1.2713959600175895, - "learning_rate": 1.8914875010008124e-06, - "loss": 1.005, - "step": 7076 - }, - { - "epoch": 0.5318653239140237, - "grad_norm": 1.846046375469124, - "learning_rate": 1.891001379126957e-06, - "loss": 0.9964, - "step": 7077 - }, - { - "epoch": 0.5319404779798587, - "grad_norm": 1.5972129001437536, - "learning_rate": 1.8905152637116868e-06, - "loss": 0.9324, - "step": 7078 - }, - { - "epoch": 0.5320156320456937, - "grad_norm": 2.252855290580518, - "learning_rate": 1.890029154783807e-06, - "loss": 0.8531, - "step": 7079 - }, - { - "epoch": 0.5320907861115286, - "grad_norm": 1.7788848567408375, - "learning_rate": 1.889543052372121e-06, - "loss": 1.056, - "step": 7080 - }, - { - "epoch": 0.5321659401773636, - "grad_norm": 1.8808102546845566, - "learning_rate": 1.8890569565054313e-06, - "loss": 1.0332, - "step": 7081 - }, - { - "epoch": 0.5322410942431985, - "grad_norm": 2.777507964911659, - "learning_rate": 1.8885708672125425e-06, - "loss": 0.9135, - "step": 7082 - }, - { - "epoch": 0.5323162483090336, - "grad_norm": 1.5574922039149144, - "learning_rate": 1.888084784522256e-06, - "loss": 1.0473, - "step": 7083 - }, - { - "epoch": 0.5323914023748685, - "grad_norm": 1.6252988393803953, - "learning_rate": 1.8875987084633748e-06, - "loss": 0.9907, - "step": 7084 - }, - { - "epoch": 0.5324665564407034, - "grad_norm": 4.478120382962771, - "learning_rate": 1.887112639064701e-06, - "loss": 0.9166, - "step": 7085 - }, - { - "epoch": 0.5325417105065384, - "grad_norm": 1.3899597548414993, - "learning_rate": 1.8866265763550344e-06, - "loss": 0.885, - "step": 7086 - }, - { - "epoch": 0.5326168645723733, - "grad_norm": 1.7120206705030925, - "learning_rate": 1.8861405203631786e-06, - "loss": 1.1131, - "step": 7087 - }, - { - "epoch": 0.5326920186382084, - "grad_norm": 1.5968446045612368, - "learning_rate": 1.8856544711179317e-06, - "loss": 0.9562, - "step": 7088 - }, - { - "epoch": 0.5327671727040433, - "grad_norm": 1.6867167456483791, - "learning_rate": 1.8851684286480962e-06, - "loss": 0.907, - "step": 7089 - }, - { - "epoch": 0.5328423267698783, - "grad_norm": 1.63846795105635, - "learning_rate": 1.884682392982471e-06, - "loss": 0.8981, - "step": 7090 - }, - { - "epoch": 0.5329174808357132, - "grad_norm": 1.824451374521438, - "learning_rate": 1.884196364149855e-06, - "loss": 0.9429, - "step": 7091 - }, - { - "epoch": 0.5329926349015481, - "grad_norm": 1.7061854870795263, - "learning_rate": 1.8837103421790483e-06, - "loss": 0.968, - "step": 7092 - }, - { - "epoch": 0.5330677889673832, - "grad_norm": 2.115332547460667, - "learning_rate": 1.8832243270988488e-06, - "loss": 0.9062, - "step": 7093 - }, - { - "epoch": 0.5331429430332181, - "grad_norm": 1.540543207005291, - "learning_rate": 1.8827383189380556e-06, - "loss": 0.9803, - "step": 7094 - }, - { - "epoch": 0.5332180970990531, - "grad_norm": 1.5974810441255107, - "learning_rate": 1.8822523177254658e-06, - "loss": 0.9303, - "step": 7095 - }, - { - "epoch": 0.533293251164888, - "grad_norm": 3.0098971190248776, - "learning_rate": 1.881766323489877e-06, - "loss": 1.0037, - "step": 7096 - }, - { - "epoch": 0.5333684052307229, - "grad_norm": 2.296822994966904, - "learning_rate": 1.8812803362600865e-06, - "loss": 0.9515, - "step": 7097 - }, - { - "epoch": 0.533443559296558, - "grad_norm": 2.477745567800472, - "learning_rate": 1.8807943560648903e-06, - "loss": 0.9326, - "step": 7098 - }, - { - "epoch": 0.5335187133623929, - "grad_norm": 1.6336142291952227, - "learning_rate": 1.8803083829330853e-06, - "loss": 0.9207, - "step": 7099 - }, - { - "epoch": 0.5335938674282279, - "grad_norm": 1.7929078467835722, - "learning_rate": 1.8798224168934664e-06, - "loss": 1.0072, - "step": 7100 - }, - { - "epoch": 0.5336690214940628, - "grad_norm": 1.5469129153142132, - "learning_rate": 1.87933645797483e-06, - "loss": 0.8924, - "step": 7101 - }, - { - "epoch": 0.5337441755598978, - "grad_norm": 1.4581784393158541, - "learning_rate": 1.8788505062059708e-06, - "loss": 0.9975, - "step": 7102 - }, - { - "epoch": 0.5338193296257328, - "grad_norm": 1.682234071231827, - "learning_rate": 1.8783645616156822e-06, - "loss": 0.9722, - "step": 7103 - }, - { - "epoch": 0.5338944836915677, - "grad_norm": 2.81104597017038, - "learning_rate": 1.8778786242327598e-06, - "loss": 1.0223, - "step": 7104 - }, - { - "epoch": 0.5339696377574027, - "grad_norm": 1.3902747752618438, - "learning_rate": 1.877392694085996e-06, - "loss": 0.9974, - "step": 7105 - }, - { - "epoch": 0.5340447918232376, - "grad_norm": 1.7053169354169115, - "learning_rate": 1.876906771204185e-06, - "loss": 1.0806, - "step": 7106 - }, - { - "epoch": 0.5341199458890726, - "grad_norm": 1.6021715448283362, - "learning_rate": 1.8764208556161192e-06, - "loss": 0.972, - "step": 7107 - }, - { - "epoch": 0.5341950999549075, - "grad_norm": 1.8170729859368098, - "learning_rate": 1.87593494735059e-06, - "loss": 0.913, - "step": 7108 - }, - { - "epoch": 0.5342702540207426, - "grad_norm": 2.4297959282038613, - "learning_rate": 1.8754490464363917e-06, - "loss": 0.888, - "step": 7109 - }, - { - "epoch": 0.5343454080865775, - "grad_norm": 1.5775084093598208, - "learning_rate": 1.8749631529023129e-06, - "loss": 0.9796, - "step": 7110 - }, - { - "epoch": 0.5344205621524124, - "grad_norm": 2.9462125096223915, - "learning_rate": 1.874477266777147e-06, - "loss": 0.9593, - "step": 7111 - }, - { - "epoch": 0.5344957162182474, - "grad_norm": 1.6168534675641886, - "learning_rate": 1.8739913880896835e-06, - "loss": 0.9816, - "step": 7112 - }, - { - "epoch": 0.5345708702840823, - "grad_norm": 1.8951730801091906, - "learning_rate": 1.8735055168687126e-06, - "loss": 1.0082, - "step": 7113 - }, - { - "epoch": 0.5346460243499174, - "grad_norm": 1.2940897619843663, - "learning_rate": 1.8730196531430246e-06, - "loss": 0.9971, - "step": 7114 - }, - { - "epoch": 0.5347211784157523, - "grad_norm": 1.6162507665976042, - "learning_rate": 1.872533796941408e-06, - "loss": 0.9447, - "step": 7115 - }, - { - "epoch": 0.5347963324815872, - "grad_norm": 1.6772403367122812, - "learning_rate": 1.8720479482926523e-06, - "loss": 1.0235, - "step": 7116 - }, - { - "epoch": 0.5348714865474222, - "grad_norm": 1.623450347291487, - "learning_rate": 1.8715621072255457e-06, - "loss": 0.9847, - "step": 7117 - }, - { - "epoch": 0.5349466406132571, - "grad_norm": 1.611952190348056, - "learning_rate": 1.8710762737688757e-06, - "loss": 0.9958, - "step": 7118 - }, - { - "epoch": 0.5350217946790922, - "grad_norm": 1.234070704170365, - "learning_rate": 1.8705904479514305e-06, - "loss": 0.9821, - "step": 7119 - }, - { - "epoch": 0.5350969487449271, - "grad_norm": 2.0386265548109757, - "learning_rate": 1.8701046298019965e-06, - "loss": 1.0374, - "step": 7120 - }, - { - "epoch": 0.5351721028107621, - "grad_norm": 2.8232448898508338, - "learning_rate": 1.869618819349361e-06, - "loss": 0.9752, - "step": 7121 - }, - { - "epoch": 0.535247256876597, - "grad_norm": 2.2506958627178126, - "learning_rate": 1.8691330166223091e-06, - "loss": 0.9481, - "step": 7122 - }, - { - "epoch": 0.5353224109424319, - "grad_norm": 2.929671712197044, - "learning_rate": 1.8686472216496275e-06, - "loss": 0.9355, - "step": 7123 - }, - { - "epoch": 0.535397565008267, - "grad_norm": 1.5632167698012382, - "learning_rate": 1.8681614344601013e-06, - "loss": 1.0193, - "step": 7124 - }, - { - "epoch": 0.5354727190741019, - "grad_norm": 1.762027890979447, - "learning_rate": 1.8676756550825144e-06, - "loss": 0.9653, - "step": 7125 - }, - { - "epoch": 0.5355478731399369, - "grad_norm": 1.614322427978376, - "learning_rate": 1.8671898835456518e-06, - "loss": 0.9709, - "step": 7126 - }, - { - "epoch": 0.5356230272057718, - "grad_norm": 1.803003822102571, - "learning_rate": 1.8667041198782972e-06, - "loss": 0.9832, - "step": 7127 - }, - { - "epoch": 0.5356981812716068, - "grad_norm": 1.5168154259273527, - "learning_rate": 1.866218364109234e-06, - "loss": 1.0244, - "step": 7128 - }, - { - "epoch": 0.5357733353374418, - "grad_norm": 1.3628904115781129, - "learning_rate": 1.8657326162672452e-06, - "loss": 1.0088, - "step": 7129 - }, - { - "epoch": 0.5358484894032767, - "grad_norm": 1.671073792897062, - "learning_rate": 1.865246876381112e-06, - "loss": 0.9702, - "step": 7130 - }, - { - "epoch": 0.5359236434691117, - "grad_norm": 1.8595920724540203, - "learning_rate": 1.8647611444796182e-06, - "loss": 0.8173, - "step": 7131 - }, - { - "epoch": 0.5359987975349466, - "grad_norm": 1.9420115335163242, - "learning_rate": 1.8642754205915444e-06, - "loss": 1.0635, - "step": 7132 - }, - { - "epoch": 0.5360739516007816, - "grad_norm": 1.794544001705449, - "learning_rate": 1.8637897047456717e-06, - "loss": 0.9854, - "step": 7133 - }, - { - "epoch": 0.5361491056666166, - "grad_norm": 1.766446690389609, - "learning_rate": 1.8633039969707808e-06, - "loss": 0.9801, - "step": 7134 - }, - { - "epoch": 0.5362242597324516, - "grad_norm": 1.7428500707447123, - "learning_rate": 1.8628182972956509e-06, - "loss": 0.9626, - "step": 7135 - }, - { - "epoch": 0.5362994137982865, - "grad_norm": 1.4872719052885455, - "learning_rate": 1.8623326057490627e-06, - "loss": 0.9706, - "step": 7136 - }, - { - "epoch": 0.5363745678641214, - "grad_norm": 2.1820181688134457, - "learning_rate": 1.8618469223597943e-06, - "loss": 1.107, - "step": 7137 - }, - { - "epoch": 0.5364497219299564, - "grad_norm": 2.190687092160927, - "learning_rate": 1.8613612471566253e-06, - "loss": 1.0418, - "step": 7138 - }, - { - "epoch": 0.5365248759957914, - "grad_norm": 1.6398990296473606, - "learning_rate": 1.8608755801683334e-06, - "loss": 0.972, - "step": 7139 - }, - { - "epoch": 0.5366000300616264, - "grad_norm": 1.7417991444824654, - "learning_rate": 1.8603899214236956e-06, - "loss": 0.9997, - "step": 7140 - }, - { - "epoch": 0.5366751841274613, - "grad_norm": 1.537769374665201, - "learning_rate": 1.85990427095149e-06, - "loss": 0.9803, - "step": 7141 - }, - { - "epoch": 0.5367503381932962, - "grad_norm": 1.6522271586377686, - "learning_rate": 1.8594186287804923e-06, - "loss": 0.9977, - "step": 7142 - }, - { - "epoch": 0.5368254922591312, - "grad_norm": 1.8352735252145702, - "learning_rate": 1.8589329949394793e-06, - "loss": 0.9814, - "step": 7143 - }, - { - "epoch": 0.5369006463249661, - "grad_norm": 0.7395210553306195, - "learning_rate": 1.8584473694572268e-06, - "loss": 0.8345, - "step": 7144 - }, - { - "epoch": 0.5369758003908012, - "grad_norm": 2.6769566091584105, - "learning_rate": 1.8579617523625096e-06, - "loss": 0.9651, - "step": 7145 - }, - { - "epoch": 0.5370509544566361, - "grad_norm": 4.478519457995525, - "learning_rate": 1.8574761436841027e-06, - "loss": 0.9604, - "step": 7146 - }, - { - "epoch": 0.5371261085224711, - "grad_norm": 2.613976629822567, - "learning_rate": 1.8569905434507796e-06, - "loss": 1.0621, - "step": 7147 - }, - { - "epoch": 0.537201262588306, - "grad_norm": 1.6913338661122839, - "learning_rate": 1.8565049516913146e-06, - "loss": 0.8681, - "step": 7148 - }, - { - "epoch": 0.5372764166541409, - "grad_norm": 1.4676275631312257, - "learning_rate": 1.85601936843448e-06, - "loss": 0.8304, - "step": 7149 - }, - { - "epoch": 0.537351570719976, - "grad_norm": 1.7439754873530422, - "learning_rate": 1.8555337937090506e-06, - "loss": 1.0354, - "step": 7150 - }, - { - "epoch": 0.5374267247858109, - "grad_norm": 2.3135843569890198, - "learning_rate": 1.8550482275437964e-06, - "loss": 0.9621, - "step": 7151 - }, - { - "epoch": 0.5375018788516459, - "grad_norm": 1.7182004570236726, - "learning_rate": 1.854562669967489e-06, - "loss": 1.0415, - "step": 7152 - }, - { - "epoch": 0.5375770329174808, - "grad_norm": 1.7720145680192534, - "learning_rate": 1.8540771210089016e-06, - "loss": 0.833, - "step": 7153 - }, - { - "epoch": 0.5376521869833158, - "grad_norm": 1.45255407787511, - "learning_rate": 1.8535915806968026e-06, - "loss": 1.0262, - "step": 7154 - }, - { - "epoch": 0.5377273410491508, - "grad_norm": 1.8346569384000004, - "learning_rate": 1.8531060490599637e-06, - "loss": 1.1432, - "step": 7155 - }, - { - "epoch": 0.5378024951149857, - "grad_norm": 1.6898144991490704, - "learning_rate": 1.8526205261271538e-06, - "loss": 0.9592, - "step": 7156 - }, - { - "epoch": 0.5378776491808207, - "grad_norm": 2.1465596818426462, - "learning_rate": 1.8521350119271418e-06, - "loss": 0.9192, - "step": 7157 - }, - { - "epoch": 0.5379528032466556, - "grad_norm": 1.860999543687538, - "learning_rate": 1.8516495064886967e-06, - "loss": 0.9272, - "step": 7158 - }, - { - "epoch": 0.5380279573124906, - "grad_norm": 1.7861213329148595, - "learning_rate": 1.8511640098405863e-06, - "loss": 0.9567, - "step": 7159 - }, - { - "epoch": 0.5381031113783256, - "grad_norm": 2.0734115725450697, - "learning_rate": 1.8506785220115787e-06, - "loss": 0.9325, - "step": 7160 - }, - { - "epoch": 0.5381782654441605, - "grad_norm": 3.1452043085493013, - "learning_rate": 1.8501930430304402e-06, - "loss": 1.0317, - "step": 7161 - }, - { - "epoch": 0.5382534195099955, - "grad_norm": 1.9682137500744312, - "learning_rate": 1.8497075729259372e-06, - "loss": 1.0824, - "step": 7162 - }, - { - "epoch": 0.5383285735758304, - "grad_norm": 0.7378313419403347, - "learning_rate": 1.8492221117268367e-06, - "loss": 0.8595, - "step": 7163 - }, - { - "epoch": 0.5384037276416654, - "grad_norm": 1.8166484056074106, - "learning_rate": 1.8487366594619028e-06, - "loss": 0.9746, - "step": 7164 - }, - { - "epoch": 0.5384788817075004, - "grad_norm": 1.4074164321374902, - "learning_rate": 1.8482512161599016e-06, - "loss": 0.927, - "step": 7165 - }, - { - "epoch": 0.5385540357733354, - "grad_norm": 1.6489139248074625, - "learning_rate": 1.8477657818495963e-06, - "loss": 0.987, - "step": 7166 - }, - { - "epoch": 0.5386291898391703, - "grad_norm": 1.7604933577165485, - "learning_rate": 1.847280356559752e-06, - "loss": 0.9438, - "step": 7167 - }, - { - "epoch": 0.5387043439050052, - "grad_norm": 2.0479620456680405, - "learning_rate": 1.8467949403191312e-06, - "loss": 0.9805, - "step": 7168 - }, - { - "epoch": 0.5387794979708402, - "grad_norm": 1.8393317480053948, - "learning_rate": 1.8463095331564965e-06, - "loss": 1.0528, - "step": 7169 - }, - { - "epoch": 0.5388546520366752, - "grad_norm": 2.126795178758725, - "learning_rate": 1.8458241351006107e-06, - "loss": 0.8936, - "step": 7170 - }, - { - "epoch": 0.5389298061025102, - "grad_norm": 2.337686859658667, - "learning_rate": 1.8453387461802347e-06, - "loss": 1.0186, - "step": 7171 - }, - { - "epoch": 0.5390049601683451, - "grad_norm": 1.7425131044483408, - "learning_rate": 1.8448533664241316e-06, - "loss": 0.9183, - "step": 7172 - }, - { - "epoch": 0.5390801142341801, - "grad_norm": 1.5097707573136003, - "learning_rate": 1.84436799586106e-06, - "loss": 0.9687, - "step": 7173 - }, - { - "epoch": 0.539155268300015, - "grad_norm": 1.7851571848927392, - "learning_rate": 1.8438826345197796e-06, - "loss": 1.0612, - "step": 7174 - }, - { - "epoch": 0.53923042236585, - "grad_norm": 1.5753153349055, - "learning_rate": 1.843397282429052e-06, - "loss": 1.0252, - "step": 7175 - }, - { - "epoch": 0.539305576431685, - "grad_norm": 1.4940438432452177, - "learning_rate": 1.8429119396176348e-06, - "loss": 1.0377, - "step": 7176 - }, - { - "epoch": 0.5393807304975199, - "grad_norm": 1.93819599571456, - "learning_rate": 1.8424266061142869e-06, - "loss": 0.9894, - "step": 7177 - }, - { - "epoch": 0.5394558845633549, - "grad_norm": 2.7078205405686577, - "learning_rate": 1.841941281947766e-06, - "loss": 1.0111, - "step": 7178 - }, - { - "epoch": 0.5395310386291898, - "grad_norm": 4.445645445571922, - "learning_rate": 1.8414559671468288e-06, - "loss": 0.9769, - "step": 7179 - }, - { - "epoch": 0.5396061926950249, - "grad_norm": 2.207365243671678, - "learning_rate": 1.8409706617402333e-06, - "loss": 1.0129, - "step": 7180 - }, - { - "epoch": 0.5396813467608598, - "grad_norm": 1.9953821037178936, - "learning_rate": 1.8404853657567347e-06, - "loss": 0.993, - "step": 7181 - }, - { - "epoch": 0.5397565008266947, - "grad_norm": 1.5537743114923308, - "learning_rate": 1.8400000792250894e-06, - "loss": 1.0198, - "step": 7182 - }, - { - "epoch": 0.5398316548925297, - "grad_norm": 1.5889389875304716, - "learning_rate": 1.8395148021740518e-06, - "loss": 1.0056, - "step": 7183 - }, - { - "epoch": 0.5399068089583646, - "grad_norm": 1.7893171504, - "learning_rate": 1.8390295346323765e-06, - "loss": 0.8983, - "step": 7184 - }, - { - "epoch": 0.5399819630241997, - "grad_norm": 1.7107243513999497, - "learning_rate": 1.8385442766288181e-06, - "loss": 0.9834, - "step": 7185 - }, - { - "epoch": 0.5400571170900346, - "grad_norm": 1.5049722435758461, - "learning_rate": 1.8380590281921294e-06, - "loss": 1.0268, - "step": 7186 - }, - { - "epoch": 0.5401322711558695, - "grad_norm": 1.9728517341066094, - "learning_rate": 1.8375737893510635e-06, - "loss": 1.057, - "step": 7187 - }, - { - "epoch": 0.5402074252217045, - "grad_norm": 1.6523011114151722, - "learning_rate": 1.837088560134372e-06, - "loss": 0.9791, - "step": 7188 - }, - { - "epoch": 0.5402825792875394, - "grad_norm": 1.4944352401787147, - "learning_rate": 1.8366033405708076e-06, - "loss": 0.8924, - "step": 7189 - }, - { - "epoch": 0.5403577333533744, - "grad_norm": 0.7577042649832638, - "learning_rate": 1.8361181306891214e-06, - "loss": 0.8395, - "step": 7190 - }, - { - "epoch": 0.5404328874192094, - "grad_norm": 1.532423076099214, - "learning_rate": 1.8356329305180626e-06, - "loss": 1.0179, - "step": 7191 - }, - { - "epoch": 0.5405080414850444, - "grad_norm": 1.5191289766413874, - "learning_rate": 1.835147740086383e-06, - "loss": 0.9983, - "step": 7192 - }, - { - "epoch": 0.5405831955508793, - "grad_norm": 1.877989928201271, - "learning_rate": 1.8346625594228295e-06, - "loss": 0.9871, - "step": 7193 - }, - { - "epoch": 0.5406583496167142, - "grad_norm": 0.6699100415488675, - "learning_rate": 1.8341773885561539e-06, - "loss": 0.8021, - "step": 7194 - }, - { - "epoch": 0.5407335036825492, - "grad_norm": 1.4022307709048596, - "learning_rate": 1.8336922275151032e-06, - "loss": 1.0104, - "step": 7195 - }, - { - "epoch": 0.5408086577483842, - "grad_norm": 1.5988739521571032, - "learning_rate": 1.8332070763284236e-06, - "loss": 1.0515, - "step": 7196 - }, - { - "epoch": 0.5408838118142192, - "grad_norm": 1.3678089582272317, - "learning_rate": 1.8327219350248643e-06, - "loss": 1.058, - "step": 7197 - }, - { - "epoch": 0.5409589658800541, - "grad_norm": 4.384700375349699, - "learning_rate": 1.8322368036331705e-06, - "loss": 0.9763, - "step": 7198 - }, - { - "epoch": 0.5410341199458891, - "grad_norm": 1.8333267804230997, - "learning_rate": 1.8317516821820888e-06, - "loss": 1.017, - "step": 7199 - }, - { - "epoch": 0.541109274011724, - "grad_norm": 1.6169579384451926, - "learning_rate": 1.8312665707003643e-06, - "loss": 0.9145, - "step": 7200 - }, - { - "epoch": 0.541184428077559, - "grad_norm": 1.6445922115380418, - "learning_rate": 1.8307814692167412e-06, - "loss": 0.8035, - "step": 7201 - }, - { - "epoch": 0.541259582143394, - "grad_norm": 6.624915932175927, - "learning_rate": 1.8302963777599645e-06, - "loss": 1.048, - "step": 7202 - }, - { - "epoch": 0.5413347362092289, - "grad_norm": 0.6426077374994832, - "learning_rate": 1.8298112963587766e-06, - "loss": 0.8361, - "step": 7203 - }, - { - "epoch": 0.5414098902750639, - "grad_norm": 1.6012461280031463, - "learning_rate": 1.8293262250419217e-06, - "loss": 0.9471, - "step": 7204 - }, - { - "epoch": 0.5414850443408988, - "grad_norm": 1.4770968645961924, - "learning_rate": 1.8288411638381415e-06, - "loss": 0.9754, - "step": 7205 - }, - { - "epoch": 0.5415601984067338, - "grad_norm": 1.7285338320015675, - "learning_rate": 1.8283561127761773e-06, - "loss": 1.0133, - "step": 7206 - }, - { - "epoch": 0.5416353524725688, - "grad_norm": 1.3301343191770567, - "learning_rate": 1.8278710718847711e-06, - "loss": 0.9446, - "step": 7207 - }, - { - "epoch": 0.5417105065384037, - "grad_norm": 1.419319203337045, - "learning_rate": 1.8273860411926627e-06, - "loss": 0.974, - "step": 7208 - }, - { - "epoch": 0.5417856606042387, - "grad_norm": 2.5629609786570877, - "learning_rate": 1.8269010207285927e-06, - "loss": 0.9447, - "step": 7209 - }, - { - "epoch": 0.5418608146700736, - "grad_norm": 3.1315301948710568, - "learning_rate": 1.8264160105212995e-06, - "loss": 0.9539, - "step": 7210 - }, - { - "epoch": 0.5419359687359087, - "grad_norm": 1.3651404614543579, - "learning_rate": 1.825931010599523e-06, - "loss": 1.0649, - "step": 7211 - }, - { - "epoch": 0.5420111228017436, - "grad_norm": 1.6050938820639924, - "learning_rate": 1.8254460209920007e-06, - "loss": 0.9949, - "step": 7212 - }, - { - "epoch": 0.5420862768675785, - "grad_norm": 1.8807951697366625, - "learning_rate": 1.8249610417274695e-06, - "loss": 0.9237, - "step": 7213 - }, - { - "epoch": 0.5421614309334135, - "grad_norm": 1.659095048008527, - "learning_rate": 1.8244760728346674e-06, - "loss": 1.015, - "step": 7214 - }, - { - "epoch": 0.5422365849992484, - "grad_norm": 1.8718961456581946, - "learning_rate": 1.823991114342329e-06, - "loss": 0.9725, - "step": 7215 - }, - { - "epoch": 0.5423117390650835, - "grad_norm": 2.0376807899070934, - "learning_rate": 1.823506166279192e-06, - "loss": 0.8186, - "step": 7216 - }, - { - "epoch": 0.5423868931309184, - "grad_norm": 1.4608652025581161, - "learning_rate": 1.823021228673991e-06, - "loss": 0.9436, - "step": 7217 - }, - { - "epoch": 0.5424620471967534, - "grad_norm": 2.160932129076105, - "learning_rate": 1.8225363015554586e-06, - "loss": 0.9035, - "step": 7218 - }, - { - "epoch": 0.5425372012625883, - "grad_norm": 2.005818010923308, - "learning_rate": 1.822051384952331e-06, - "loss": 0.9777, - "step": 7219 - }, - { - "epoch": 0.5426123553284232, - "grad_norm": 1.8455482377554049, - "learning_rate": 1.8215664788933394e-06, - "loss": 1.016, - "step": 7220 - }, - { - "epoch": 0.5426875093942582, - "grad_norm": 0.7403507191561588, - "learning_rate": 1.8210815834072177e-06, - "loss": 0.8489, - "step": 7221 - }, - { - "epoch": 0.5427626634600932, - "grad_norm": 1.7749979153473123, - "learning_rate": 1.8205966985226975e-06, - "loss": 0.9852, - "step": 7222 - }, - { - "epoch": 0.5428378175259282, - "grad_norm": 1.5800225963666776, - "learning_rate": 1.8201118242685093e-06, - "loss": 1.0123, - "step": 7223 - }, - { - "epoch": 0.5429129715917631, - "grad_norm": 1.6904289822987408, - "learning_rate": 1.819626960673385e-06, - "loss": 0.9253, - "step": 7224 - }, - { - "epoch": 0.5429881256575981, - "grad_norm": 0.7529288166996018, - "learning_rate": 1.8191421077660535e-06, - "loss": 0.8345, - "step": 7225 - }, - { - "epoch": 0.543063279723433, - "grad_norm": 1.5124766103654184, - "learning_rate": 1.8186572655752448e-06, - "loss": 0.9918, - "step": 7226 - }, - { - "epoch": 0.543138433789268, - "grad_norm": 1.4409390651253429, - "learning_rate": 1.8181724341296877e-06, - "loss": 0.9084, - "step": 7227 - }, - { - "epoch": 0.543213587855103, - "grad_norm": 1.673641448143894, - "learning_rate": 1.8176876134581098e-06, - "loss": 1.05, - "step": 7228 - }, - { - "epoch": 0.5432887419209379, - "grad_norm": 1.6800169865566892, - "learning_rate": 1.8172028035892394e-06, - "loss": 0.9581, - "step": 7229 - }, - { - "epoch": 0.5433638959867729, - "grad_norm": 1.466922840952234, - "learning_rate": 1.816718004551802e-06, - "loss": 0.9779, - "step": 7230 - }, - { - "epoch": 0.5434390500526078, - "grad_norm": 1.7120722661140717, - "learning_rate": 1.8162332163745254e-06, - "loss": 0.8789, - "step": 7231 - }, - { - "epoch": 0.5435142041184428, - "grad_norm": 0.7558277527653247, - "learning_rate": 1.8157484390861342e-06, - "loss": 0.8866, - "step": 7232 - }, - { - "epoch": 0.5435893581842778, - "grad_norm": 1.690168319762028, - "learning_rate": 1.8152636727153536e-06, - "loss": 0.9843, - "step": 7233 - }, - { - "epoch": 0.5436645122501127, - "grad_norm": 1.789842327355461, - "learning_rate": 1.814778917290908e-06, - "loss": 0.9813, - "step": 7234 - }, - { - "epoch": 0.5437396663159477, - "grad_norm": 0.7660686997813767, - "learning_rate": 1.8142941728415204e-06, - "loss": 0.8926, - "step": 7235 - }, - { - "epoch": 0.5438148203817826, - "grad_norm": 1.482704266375571, - "learning_rate": 1.8138094393959144e-06, - "loss": 0.9448, - "step": 7236 - }, - { - "epoch": 0.5438899744476177, - "grad_norm": 1.559445791223918, - "learning_rate": 1.8133247169828114e-06, - "loss": 0.9835, - "step": 7237 - }, - { - "epoch": 0.5439651285134526, - "grad_norm": 1.667307809852215, - "learning_rate": 1.8128400056309345e-06, - "loss": 0.9336, - "step": 7238 - }, - { - "epoch": 0.5440402825792875, - "grad_norm": 1.339743251757204, - "learning_rate": 1.8123553053690046e-06, - "loss": 1.0108, - "step": 7239 - }, - { - "epoch": 0.5441154366451225, - "grad_norm": 1.9152752829315605, - "learning_rate": 1.81187061622574e-06, - "loss": 0.8904, - "step": 7240 - }, - { - "epoch": 0.5441905907109574, - "grad_norm": 1.4016843217621577, - "learning_rate": 1.8113859382298627e-06, - "loss": 0.9791, - "step": 7241 - }, - { - "epoch": 0.5442657447767925, - "grad_norm": 1.7702210284015782, - "learning_rate": 1.81090127141009e-06, - "loss": 0.907, - "step": 7242 - }, - { - "epoch": 0.5443408988426274, - "grad_norm": 1.8385374449194256, - "learning_rate": 1.8104166157951419e-06, - "loss": 0.9992, - "step": 7243 - }, - { - "epoch": 0.5444160529084624, - "grad_norm": 1.3492595036928066, - "learning_rate": 1.809931971413735e-06, - "loss": 1.0777, - "step": 7244 - }, - { - "epoch": 0.5444912069742973, - "grad_norm": 2.359012500123415, - "learning_rate": 1.8094473382945866e-06, - "loss": 1.0237, - "step": 7245 - }, - { - "epoch": 0.5445663610401322, - "grad_norm": 1.493451846066409, - "learning_rate": 1.8089627164664132e-06, - "loss": 1.0001, - "step": 7246 - }, - { - "epoch": 0.5446415151059673, - "grad_norm": 1.848360536316087, - "learning_rate": 1.80847810595793e-06, - "loss": 0.8755, - "step": 7247 - }, - { - "epoch": 0.5447166691718022, - "grad_norm": 1.6149261438825344, - "learning_rate": 1.8079935067978528e-06, - "loss": 0.9378, - "step": 7248 - }, - { - "epoch": 0.5447918232376372, - "grad_norm": 1.5452758537861238, - "learning_rate": 1.8075089190148956e-06, - "loss": 0.8898, - "step": 7249 - }, - { - "epoch": 0.5448669773034721, - "grad_norm": 1.7561407659369925, - "learning_rate": 1.8070243426377716e-06, - "loss": 0.9467, - "step": 7250 - }, - { - "epoch": 0.544942131369307, - "grad_norm": 1.344916170336934, - "learning_rate": 1.8065397776951946e-06, - "loss": 0.935, - "step": 7251 - }, - { - "epoch": 0.545017285435142, - "grad_norm": 1.492454544857627, - "learning_rate": 1.8060552242158765e-06, - "loss": 0.9949, - "step": 7252 - }, - { - "epoch": 0.545092439500977, - "grad_norm": 2.358355876783408, - "learning_rate": 1.8055706822285291e-06, - "loss": 1.0211, - "step": 7253 - }, - { - "epoch": 0.545167593566812, - "grad_norm": 2.2893715086797597, - "learning_rate": 1.8050861517618629e-06, - "loss": 0.9701, - "step": 7254 - }, - { - "epoch": 0.5452427476326469, - "grad_norm": 0.8662090107961943, - "learning_rate": 1.8046016328445893e-06, - "loss": 0.8667, - "step": 7255 - }, - { - "epoch": 0.5453179016984819, - "grad_norm": 1.6807843552607098, - "learning_rate": 1.804117125505417e-06, - "loss": 0.9219, - "step": 7256 - }, - { - "epoch": 0.5453930557643168, - "grad_norm": 1.5841546020101915, - "learning_rate": 1.803632629773054e-06, - "loss": 0.9555, - "step": 7257 - }, - { - "epoch": 0.5454682098301518, - "grad_norm": 2.016346528781002, - "learning_rate": 1.8031481456762112e-06, - "loss": 1.0453, - "step": 7258 - }, - { - "epoch": 0.5455433638959868, - "grad_norm": 1.6915367023298606, - "learning_rate": 1.802663673243593e-06, - "loss": 0.988, - "step": 7259 - }, - { - "epoch": 0.5456185179618217, - "grad_norm": 1.5992021240205003, - "learning_rate": 1.802179212503909e-06, - "loss": 0.9856, - "step": 7260 - }, - { - "epoch": 0.5456936720276567, - "grad_norm": 2.107129406366529, - "learning_rate": 1.801694763485864e-06, - "loss": 0.9195, - "step": 7261 - }, - { - "epoch": 0.5457688260934916, - "grad_norm": 1.57101773799447, - "learning_rate": 1.8012103262181635e-06, - "loss": 1.0381, - "step": 7262 - }, - { - "epoch": 0.5458439801593267, - "grad_norm": 1.4348967656607348, - "learning_rate": 1.8007259007295125e-06, - "loss": 0.974, - "step": 7263 - }, - { - "epoch": 0.5459191342251616, - "grad_norm": 2.581925336595949, - "learning_rate": 1.8002414870486144e-06, - "loss": 0.9479, - "step": 7264 - }, - { - "epoch": 0.5459942882909965, - "grad_norm": 1.9666368557669556, - "learning_rate": 1.7997570852041739e-06, - "loss": 0.9753, - "step": 7265 - }, - { - "epoch": 0.5460694423568315, - "grad_norm": 1.6055268140256704, - "learning_rate": 1.7992726952248926e-06, - "loss": 0.9429, - "step": 7266 - }, - { - "epoch": 0.5461445964226664, - "grad_norm": 1.9046270622196688, - "learning_rate": 1.7987883171394724e-06, - "loss": 0.9987, - "step": 7267 - }, - { - "epoch": 0.5462197504885015, - "grad_norm": 1.8031451748094491, - "learning_rate": 1.7983039509766156e-06, - "loss": 0.9549, - "step": 7268 - }, - { - "epoch": 0.5462949045543364, - "grad_norm": 15.497339820237093, - "learning_rate": 1.7978195967650214e-06, - "loss": 1.0191, - "step": 7269 - }, - { - "epoch": 0.5463700586201714, - "grad_norm": 2.5156445235922322, - "learning_rate": 1.7973352545333905e-06, - "loss": 0.8722, - "step": 7270 - }, - { - "epoch": 0.5464452126860063, - "grad_norm": 2.2269106944026125, - "learning_rate": 1.796850924310422e-06, - "loss": 0.9986, - "step": 7271 - }, - { - "epoch": 0.5465203667518412, - "grad_norm": 1.6759196048700888, - "learning_rate": 1.796366606124814e-06, - "loss": 1.0955, - "step": 7272 - }, - { - "epoch": 0.5465955208176763, - "grad_norm": 1.3659450880410222, - "learning_rate": 1.7958823000052643e-06, - "loss": 1.0055, - "step": 7273 - }, - { - "epoch": 0.5466706748835112, - "grad_norm": 1.5486629441296333, - "learning_rate": 1.79539800598047e-06, - "loss": 1.0014, - "step": 7274 - }, - { - "epoch": 0.5467458289493462, - "grad_norm": 1.7103192331469763, - "learning_rate": 1.7949137240791275e-06, - "loss": 1.0686, - "step": 7275 - }, - { - "epoch": 0.5468209830151811, - "grad_norm": 1.6851596144287593, - "learning_rate": 1.7944294543299317e-06, - "loss": 0.9781, - "step": 7276 - }, - { - "epoch": 0.546896137081016, - "grad_norm": 1.8402760639622544, - "learning_rate": 1.7939451967615783e-06, - "loss": 0.9153, - "step": 7277 - }, - { - "epoch": 0.5469712911468511, - "grad_norm": 4.272069513319715, - "learning_rate": 1.793460951402761e-06, - "loss": 0.9104, - "step": 7278 - }, - { - "epoch": 0.547046445212686, - "grad_norm": 1.525074944452534, - "learning_rate": 1.7929767182821724e-06, - "loss": 0.9115, - "step": 7279 - }, - { - "epoch": 0.547121599278521, - "grad_norm": 1.57055085304317, - "learning_rate": 1.7924924974285074e-06, - "loss": 1.0093, - "step": 7280 - }, - { - "epoch": 0.5471967533443559, - "grad_norm": 2.0439779731922307, - "learning_rate": 1.7920082888704553e-06, - "loss": 0.925, - "step": 7281 - }, - { - "epoch": 0.5472719074101909, - "grad_norm": 1.8046574445591492, - "learning_rate": 1.7915240926367092e-06, - "loss": 0.9987, - "step": 7282 - }, - { - "epoch": 0.5473470614760259, - "grad_norm": 1.546712770528514, - "learning_rate": 1.791039908755959e-06, - "loss": 0.9651, - "step": 7283 - }, - { - "epoch": 0.5474222155418608, - "grad_norm": 1.6573761764005472, - "learning_rate": 1.790555737256894e-06, - "loss": 0.8921, - "step": 7284 - }, - { - "epoch": 0.5474973696076958, - "grad_norm": 0.7187012780303016, - "learning_rate": 1.7900715781682039e-06, - "loss": 0.8094, - "step": 7285 - }, - { - "epoch": 0.5475725236735307, - "grad_norm": 1.9478648649729386, - "learning_rate": 1.7895874315185763e-06, - "loss": 0.9945, - "step": 7286 - }, - { - "epoch": 0.5476476777393657, - "grad_norm": 2.5724926082997945, - "learning_rate": 1.7891032973366996e-06, - "loss": 0.982, - "step": 7287 - }, - { - "epoch": 0.5477228318052006, - "grad_norm": 1.790495119826956, - "learning_rate": 1.7886191756512598e-06, - "loss": 0.9487, - "step": 7288 - }, - { - "epoch": 0.5477979858710357, - "grad_norm": 1.7914504726589542, - "learning_rate": 1.788135066490943e-06, - "loss": 0.9329, - "step": 7289 - }, - { - "epoch": 0.5478731399368706, - "grad_norm": 1.817927488507132, - "learning_rate": 1.7876509698844356e-06, - "loss": 0.9122, - "step": 7290 - }, - { - "epoch": 0.5479482940027055, - "grad_norm": 2.6287406427105324, - "learning_rate": 1.7871668858604206e-06, - "loss": 0.959, - "step": 7291 - }, - { - "epoch": 0.5480234480685405, - "grad_norm": 1.4087880436956925, - "learning_rate": 1.786682814447583e-06, - "loss": 0.9964, - "step": 7292 - }, - { - "epoch": 0.5480986021343754, - "grad_norm": 1.6095781661005084, - "learning_rate": 1.7861987556746056e-06, - "loss": 0.9927, - "step": 7293 - }, - { - "epoch": 0.5481737562002105, - "grad_norm": 1.510644812179777, - "learning_rate": 1.78571470957017e-06, - "loss": 0.9407, - "step": 7294 - }, - { - "epoch": 0.5482489102660454, - "grad_norm": 1.4277237710415467, - "learning_rate": 1.7852306761629592e-06, - "loss": 0.976, - "step": 7295 - }, - { - "epoch": 0.5483240643318803, - "grad_norm": 1.4632599673783049, - "learning_rate": 1.7847466554816526e-06, - "loss": 0.9789, - "step": 7296 - }, - { - "epoch": 0.5483992183977153, - "grad_norm": 1.8976183721437794, - "learning_rate": 1.7842626475549314e-06, - "loss": 1.0608, - "step": 7297 - }, - { - "epoch": 0.5484743724635502, - "grad_norm": 1.867434688789552, - "learning_rate": 1.783778652411474e-06, - "loss": 0.935, - "step": 7298 - }, - { - "epoch": 0.5485495265293853, - "grad_norm": 1.9180947940221778, - "learning_rate": 1.7832946700799596e-06, - "loss": 1.0787, - "step": 7299 - }, - { - "epoch": 0.5486246805952202, - "grad_norm": 3.636050514264778, - "learning_rate": 1.7828107005890663e-06, - "loss": 0.8909, - "step": 7300 - }, - { - "epoch": 0.5486998346610552, - "grad_norm": 1.6514050858907054, - "learning_rate": 1.7823267439674694e-06, - "loss": 1.0773, - "step": 7301 - }, - { - "epoch": 0.5487749887268901, - "grad_norm": 3.052807475732251, - "learning_rate": 1.7818428002438475e-06, - "loss": 1.0563, - "step": 7302 - }, - { - "epoch": 0.548850142792725, - "grad_norm": 0.7185715163495466, - "learning_rate": 1.7813588694468745e-06, - "loss": 0.8718, - "step": 7303 - }, - { - "epoch": 0.5489252968585601, - "grad_norm": 1.3386016515607917, - "learning_rate": 1.780874951605226e-06, - "loss": 0.9435, - "step": 7304 - }, - { - "epoch": 0.549000450924395, - "grad_norm": 1.5820478273629925, - "learning_rate": 1.7803910467475763e-06, - "loss": 0.9732, - "step": 7305 - }, - { - "epoch": 0.54907560499023, - "grad_norm": 2.4528496460149793, - "learning_rate": 1.779907154902597e-06, - "loss": 0.8864, - "step": 7306 - }, - { - "epoch": 0.5491507590560649, - "grad_norm": 1.844965001009491, - "learning_rate": 1.7794232760989623e-06, - "loss": 0.9955, - "step": 7307 - }, - { - "epoch": 0.5492259131219, - "grad_norm": 1.7623891295062732, - "learning_rate": 1.7789394103653425e-06, - "loss": 1.0627, - "step": 7308 - }, - { - "epoch": 0.5493010671877349, - "grad_norm": 0.7358598105209297, - "learning_rate": 1.7784555577304099e-06, - "loss": 0.814, - "step": 7309 - }, - { - "epoch": 0.5493762212535698, - "grad_norm": 0.791891559647472, - "learning_rate": 1.7779717182228335e-06, - "loss": 0.8705, - "step": 7310 - }, - { - "epoch": 0.5494513753194048, - "grad_norm": 1.461847440109989, - "learning_rate": 1.7774878918712828e-06, - "loss": 0.997, - "step": 7311 - }, - { - "epoch": 0.5495265293852397, - "grad_norm": 1.3355220572572106, - "learning_rate": 1.777004078704427e-06, - "loss": 0.9546, - "step": 7312 - }, - { - "epoch": 0.5496016834510747, - "grad_norm": 1.9182223834054997, - "learning_rate": 1.7765202787509327e-06, - "loss": 0.8716, - "step": 7313 - }, - { - "epoch": 0.5496768375169097, - "grad_norm": 1.4796646068054788, - "learning_rate": 1.7760364920394684e-06, - "loss": 0.8645, - "step": 7314 - }, - { - "epoch": 0.5497519915827447, - "grad_norm": 1.6211638286104666, - "learning_rate": 1.7755527185986996e-06, - "loss": 0.9904, - "step": 7315 - }, - { - "epoch": 0.5498271456485796, - "grad_norm": 1.4090521127500262, - "learning_rate": 1.775068958457291e-06, - "loss": 1.0623, - "step": 7316 - }, - { - "epoch": 0.5499022997144145, - "grad_norm": 1.2857188932396815, - "learning_rate": 1.7745852116439087e-06, - "loss": 1.0967, - "step": 7317 - }, - { - "epoch": 0.5499774537802495, - "grad_norm": 1.5589402657321325, - "learning_rate": 1.774101478187215e-06, - "loss": 1.0209, - "step": 7318 - }, - { - "epoch": 0.5500526078460845, - "grad_norm": 0.6208195350914292, - "learning_rate": 1.7736177581158742e-06, - "loss": 0.7923, - "step": 7319 - }, - { - "epoch": 0.5501277619119195, - "grad_norm": 1.75120707163178, - "learning_rate": 1.7731340514585474e-06, - "loss": 0.9258, - "step": 7320 - }, - { - "epoch": 0.5502029159777544, - "grad_norm": 1.6795891311584554, - "learning_rate": 1.7726503582438982e-06, - "loss": 0.8844, - "step": 7321 - }, - { - "epoch": 0.5502780700435893, - "grad_norm": 3.4231161018048377, - "learning_rate": 1.772166678500585e-06, - "loss": 0.9746, - "step": 7322 - }, - { - "epoch": 0.5503532241094243, - "grad_norm": 2.062092133854804, - "learning_rate": 1.771683012257268e-06, - "loss": 1.0435, - "step": 7323 - }, - { - "epoch": 0.5504283781752592, - "grad_norm": 1.869288328068891, - "learning_rate": 1.7711993595426076e-06, - "loss": 0.8762, - "step": 7324 - }, - { - "epoch": 0.5505035322410943, - "grad_norm": 1.395810819795397, - "learning_rate": 1.7707157203852608e-06, - "loss": 0.9981, - "step": 7325 - }, - { - "epoch": 0.5505786863069292, - "grad_norm": 1.6373329921907407, - "learning_rate": 1.770232094813886e-06, - "loss": 0.9708, - "step": 7326 - }, - { - "epoch": 0.5506538403727642, - "grad_norm": 1.9310313499153966, - "learning_rate": 1.7697484828571394e-06, - "loss": 0.9627, - "step": 7327 - }, - { - "epoch": 0.5507289944385991, - "grad_norm": 1.8941038927830447, - "learning_rate": 1.7692648845436764e-06, - "loss": 0.9708, - "step": 7328 - }, - { - "epoch": 0.550804148504434, - "grad_norm": 1.3700825149162141, - "learning_rate": 1.7687812999021531e-06, - "loss": 0.877, - "step": 7329 - }, - { - "epoch": 0.5508793025702691, - "grad_norm": 1.9232747632744047, - "learning_rate": 1.7682977289612226e-06, - "loss": 0.9695, - "step": 7330 - }, - { - "epoch": 0.550954456636104, - "grad_norm": 1.8060615230021302, - "learning_rate": 1.7678141717495395e-06, - "loss": 0.9747, - "step": 7331 - }, - { - "epoch": 0.551029610701939, - "grad_norm": 1.4965595685508377, - "learning_rate": 1.7673306282957559e-06, - "loss": 0.9507, - "step": 7332 - }, - { - "epoch": 0.5511047647677739, - "grad_norm": 1.474358584064785, - "learning_rate": 1.766847098628523e-06, - "loss": 1.0959, - "step": 7333 - }, - { - "epoch": 0.551179918833609, - "grad_norm": 1.263218133466709, - "learning_rate": 1.7663635827764924e-06, - "loss": 0.893, - "step": 7334 - }, - { - "epoch": 0.5512550728994439, - "grad_norm": 1.5587682028826457, - "learning_rate": 1.7658800807683142e-06, - "loss": 1.0008, - "step": 7335 - }, - { - "epoch": 0.5513302269652788, - "grad_norm": 2.6869422866194084, - "learning_rate": 1.7653965926326379e-06, - "loss": 0.9178, - "step": 7336 - }, - { - "epoch": 0.5514053810311138, - "grad_norm": 1.5340701233454306, - "learning_rate": 1.764913118398112e-06, - "loss": 1.0509, - "step": 7337 - }, - { - "epoch": 0.5514805350969487, - "grad_norm": 1.5048173794888111, - "learning_rate": 1.7644296580933835e-06, - "loss": 0.919, - "step": 7338 - }, - { - "epoch": 0.5515556891627837, - "grad_norm": 1.6676932114013434, - "learning_rate": 1.7639462117471004e-06, - "loss": 0.9277, - "step": 7339 - }, - { - "epoch": 0.5516308432286187, - "grad_norm": 6.35943228581001, - "learning_rate": 1.7634627793879075e-06, - "loss": 0.9857, - "step": 7340 - }, - { - "epoch": 0.5517059972944536, - "grad_norm": 1.49268847938355, - "learning_rate": 1.7629793610444513e-06, - "loss": 0.8878, - "step": 7341 - }, - { - "epoch": 0.5517811513602886, - "grad_norm": 1.517179812442306, - "learning_rate": 1.7624959567453746e-06, - "loss": 0.9988, - "step": 7342 - }, - { - "epoch": 0.5518563054261235, - "grad_norm": 1.8738962739596055, - "learning_rate": 1.7620125665193232e-06, - "loss": 0.9168, - "step": 7343 - }, - { - "epoch": 0.5519314594919585, - "grad_norm": 3.687699781275429, - "learning_rate": 1.7615291903949382e-06, - "loss": 0.9289, - "step": 7344 - }, - { - "epoch": 0.5520066135577935, - "grad_norm": 0.6837593967831148, - "learning_rate": 1.761045828400861e-06, - "loss": 0.87, - "step": 7345 - }, - { - "epoch": 0.5520817676236285, - "grad_norm": 0.8244727227028859, - "learning_rate": 1.7605624805657343e-06, - "loss": 0.9023, - "step": 7346 - }, - { - "epoch": 0.5521569216894634, - "grad_norm": 1.5955906131444364, - "learning_rate": 1.760079146918197e-06, - "loss": 0.9954, - "step": 7347 - }, - { - "epoch": 0.5522320757552983, - "grad_norm": 1.5880676396087203, - "learning_rate": 1.7595958274868896e-06, - "loss": 0.9586, - "step": 7348 - }, - { - "epoch": 0.5523072298211333, - "grad_norm": 1.6424657032490777, - "learning_rate": 1.75911252230045e-06, - "loss": 1.0602, - "step": 7349 - }, - { - "epoch": 0.5523823838869683, - "grad_norm": 2.15398188179068, - "learning_rate": 1.758629231387515e-06, - "loss": 0.8624, - "step": 7350 - }, - { - "epoch": 0.5524575379528033, - "grad_norm": 1.6092963338347583, - "learning_rate": 1.7581459547767233e-06, - "loss": 1.0079, - "step": 7351 - }, - { - "epoch": 0.5525326920186382, - "grad_norm": 2.6346953137335856, - "learning_rate": 1.7576626924967091e-06, - "loss": 1.015, - "step": 7352 - }, - { - "epoch": 0.5526078460844732, - "grad_norm": 1.5837710762578558, - "learning_rate": 1.7571794445761089e-06, - "loss": 0.9318, - "step": 7353 - }, - { - "epoch": 0.5526830001503081, - "grad_norm": 1.9785228552126035, - "learning_rate": 1.7566962110435563e-06, - "loss": 0.9418, - "step": 7354 - }, - { - "epoch": 0.552758154216143, - "grad_norm": 1.9316363050144438, - "learning_rate": 1.7562129919276845e-06, - "loss": 0.9552, - "step": 7355 - }, - { - "epoch": 0.5528333082819781, - "grad_norm": 1.6539719227549168, - "learning_rate": 1.7557297872571272e-06, - "loss": 0.9901, - "step": 7356 - }, - { - "epoch": 0.552908462347813, - "grad_norm": 2.90621390371569, - "learning_rate": 1.7552465970605145e-06, - "loss": 1.0613, - "step": 7357 - }, - { - "epoch": 0.552983616413648, - "grad_norm": 6.619962108307697, - "learning_rate": 1.7547634213664786e-06, - "loss": 0.9265, - "step": 7358 - }, - { - "epoch": 0.5530587704794829, - "grad_norm": 1.3805450124783625, - "learning_rate": 1.7542802602036492e-06, - "loss": 0.9527, - "step": 7359 - }, - { - "epoch": 0.553133924545318, - "grad_norm": 2.320221664757141, - "learning_rate": 1.753797113600655e-06, - "loss": 1.0219, - "step": 7360 - }, - { - "epoch": 0.5532090786111529, - "grad_norm": 1.4955818754770642, - "learning_rate": 1.7533139815861248e-06, - "loss": 1.0324, - "step": 7361 - }, - { - "epoch": 0.5532842326769878, - "grad_norm": 2.83034297738435, - "learning_rate": 1.7528308641886856e-06, - "loss": 1.0215, - "step": 7362 - }, - { - "epoch": 0.5533593867428228, - "grad_norm": 1.459400041189738, - "learning_rate": 1.7523477614369645e-06, - "loss": 0.8993, - "step": 7363 - }, - { - "epoch": 0.5534345408086577, - "grad_norm": 1.6829628879854794, - "learning_rate": 1.751864673359586e-06, - "loss": 0.9773, - "step": 7364 - }, - { - "epoch": 0.5535096948744928, - "grad_norm": 1.9548898886381885, - "learning_rate": 1.7513815999851767e-06, - "loss": 0.8614, - "step": 7365 - }, - { - "epoch": 0.5535848489403277, - "grad_norm": 0.6826834585085384, - "learning_rate": 1.7508985413423599e-06, - "loss": 0.8638, - "step": 7366 - }, - { - "epoch": 0.5536600030061626, - "grad_norm": 2.0308954956575747, - "learning_rate": 1.7504154974597572e-06, - "loss": 0.9029, - "step": 7367 - }, - { - "epoch": 0.5537351570719976, - "grad_norm": 0.7088984077394834, - "learning_rate": 1.7499324683659928e-06, - "loss": 0.8521, - "step": 7368 - }, - { - "epoch": 0.5538103111378325, - "grad_norm": 1.5390765968877724, - "learning_rate": 1.749449454089687e-06, - "loss": 0.9321, - "step": 7369 - }, - { - "epoch": 0.5538854652036675, - "grad_norm": 1.3803194493124658, - "learning_rate": 1.7489664546594606e-06, - "loss": 0.9654, - "step": 7370 - }, - { - "epoch": 0.5539606192695025, - "grad_norm": 2.151681012205037, - "learning_rate": 1.7484834701039333e-06, - "loss": 0.9018, - "step": 7371 - }, - { - "epoch": 0.5540357733353375, - "grad_norm": 1.4797556425394016, - "learning_rate": 1.7480005004517228e-06, - "loss": 1.0054, - "step": 7372 - }, - { - "epoch": 0.5541109274011724, - "grad_norm": 6.981102366581194, - "learning_rate": 1.7475175457314481e-06, - "loss": 0.9141, - "step": 7373 - }, - { - "epoch": 0.5541860814670073, - "grad_norm": 1.5759784716912388, - "learning_rate": 1.7470346059717253e-06, - "loss": 0.9698, - "step": 7374 - }, - { - "epoch": 0.5542612355328423, - "grad_norm": 1.656184645029103, - "learning_rate": 1.7465516812011713e-06, - "loss": 0.9578, - "step": 7375 - }, - { - "epoch": 0.5543363895986773, - "grad_norm": 1.715810238369508, - "learning_rate": 1.7460687714484008e-06, - "loss": 0.9575, - "step": 7376 - }, - { - "epoch": 0.5544115436645123, - "grad_norm": 1.6205042491081483, - "learning_rate": 1.7455858767420272e-06, - "loss": 0.9372, - "step": 7377 - }, - { - "epoch": 0.5544866977303472, - "grad_norm": 1.6982628249087481, - "learning_rate": 1.7451029971106653e-06, - "loss": 1.0979, - "step": 7378 - }, - { - "epoch": 0.5545618517961822, - "grad_norm": 1.5691770937278247, - "learning_rate": 1.7446201325829261e-06, - "loss": 0.9917, - "step": 7379 - }, - { - "epoch": 0.5546370058620171, - "grad_norm": 1.8845256435221382, - "learning_rate": 1.7441372831874228e-06, - "loss": 0.9727, - "step": 7380 - }, - { - "epoch": 0.554712159927852, - "grad_norm": 0.7759185070305127, - "learning_rate": 1.7436544489527652e-06, - "loss": 0.8508, - "step": 7381 - }, - { - "epoch": 0.5547873139936871, - "grad_norm": 0.6716160607934927, - "learning_rate": 1.7431716299075625e-06, - "loss": 0.9036, - "step": 7382 - }, - { - "epoch": 0.554862468059522, - "grad_norm": 1.696107036550835, - "learning_rate": 1.7426888260804247e-06, - "loss": 0.9985, - "step": 7383 - }, - { - "epoch": 0.554937622125357, - "grad_norm": 0.814990554708032, - "learning_rate": 1.7422060374999587e-06, - "loss": 0.8567, - "step": 7384 - }, - { - "epoch": 0.5550127761911919, - "grad_norm": 2.445314060003496, - "learning_rate": 1.7417232641947728e-06, - "loss": 0.8555, - "step": 7385 - }, - { - "epoch": 0.5550879302570269, - "grad_norm": 1.966386920893063, - "learning_rate": 1.7412405061934714e-06, - "loss": 1.0238, - "step": 7386 - }, - { - "epoch": 0.5551630843228619, - "grad_norm": 1.5059098013859522, - "learning_rate": 1.740757763524662e-06, - "loss": 0.9635, - "step": 7387 - }, - { - "epoch": 0.5552382383886968, - "grad_norm": 1.370408802715474, - "learning_rate": 1.740275036216948e-06, - "loss": 1.0311, - "step": 7388 - }, - { - "epoch": 0.5553133924545318, - "grad_norm": 2.1565265201783785, - "learning_rate": 1.7397923242989314e-06, - "loss": 0.9521, - "step": 7389 - }, - { - "epoch": 0.5553885465203667, - "grad_norm": 2.25944064349661, - "learning_rate": 1.739309627799217e-06, - "loss": 1.0287, - "step": 7390 - }, - { - "epoch": 0.5554637005862018, - "grad_norm": 2.1777339370795525, - "learning_rate": 1.7388269467464047e-06, - "loss": 1.0975, - "step": 7391 - }, - { - "epoch": 0.5555388546520367, - "grad_norm": 1.522457413978243, - "learning_rate": 1.7383442811690967e-06, - "loss": 0.9058, - "step": 7392 - }, - { - "epoch": 0.5556140087178716, - "grad_norm": 1.7236202941958658, - "learning_rate": 1.7378616310958917e-06, - "loss": 0.9909, - "step": 7393 - }, - { - "epoch": 0.5556891627837066, - "grad_norm": 2.2450834174684027, - "learning_rate": 1.7373789965553886e-06, - "loss": 0.964, - "step": 7394 - }, - { - "epoch": 0.5557643168495415, - "grad_norm": 4.567894868365776, - "learning_rate": 1.736896377576186e-06, - "loss": 0.8689, - "step": 7395 - }, - { - "epoch": 0.5558394709153766, - "grad_norm": 1.3606968295719444, - "learning_rate": 1.73641377418688e-06, - "loss": 1.0009, - "step": 7396 - }, - { - "epoch": 0.5559146249812115, - "grad_norm": 1.4498307819939664, - "learning_rate": 1.7359311864160677e-06, - "loss": 0.9817, - "step": 7397 - }, - { - "epoch": 0.5559897790470465, - "grad_norm": 1.5499508265416893, - "learning_rate": 1.7354486142923438e-06, - "loss": 0.988, - "step": 7398 - }, - { - "epoch": 0.5560649331128814, - "grad_norm": 1.609709695623394, - "learning_rate": 1.7349660578443022e-06, - "loss": 0.9341, - "step": 7399 - }, - { - "epoch": 0.5561400871787163, - "grad_norm": 1.4042324428075388, - "learning_rate": 1.7344835171005368e-06, - "loss": 1.0014, - "step": 7400 - }, - { - "epoch": 0.5562152412445514, - "grad_norm": 1.6863846801860007, - "learning_rate": 1.7340009920896392e-06, - "loss": 0.9388, - "step": 7401 - }, - { - "epoch": 0.5562903953103863, - "grad_norm": 1.4905843061414574, - "learning_rate": 1.7335184828402022e-06, - "loss": 0.9457, - "step": 7402 - }, - { - "epoch": 0.5563655493762213, - "grad_norm": 1.6319684489042654, - "learning_rate": 1.7330359893808154e-06, - "loss": 1.0285, - "step": 7403 - }, - { - "epoch": 0.5564407034420562, - "grad_norm": 2.586799132291484, - "learning_rate": 1.732553511740068e-06, - "loss": 0.9909, - "step": 7404 - }, - { - "epoch": 0.5565158575078912, - "grad_norm": 1.4127630933979418, - "learning_rate": 1.7320710499465494e-06, - "loss": 1.0151, - "step": 7405 - }, - { - "epoch": 0.5565910115737261, - "grad_norm": 1.6960938230959939, - "learning_rate": 1.7315886040288468e-06, - "loss": 0.9599, - "step": 7406 - }, - { - "epoch": 0.5566661656395611, - "grad_norm": 1.641948493093918, - "learning_rate": 1.7311061740155477e-06, - "loss": 0.9102, - "step": 7407 - }, - { - "epoch": 0.5567413197053961, - "grad_norm": 1.5125238999929644, - "learning_rate": 1.7306237599352365e-06, - "loss": 0.8662, - "step": 7408 - }, - { - "epoch": 0.556816473771231, - "grad_norm": 22.32719010464035, - "learning_rate": 1.7301413618165e-06, - "loss": 0.9734, - "step": 7409 - }, - { - "epoch": 0.556891627837066, - "grad_norm": 0.6454332455779008, - "learning_rate": 1.7296589796879215e-06, - "loss": 0.8555, - "step": 7410 - }, - { - "epoch": 0.5569667819029009, - "grad_norm": 1.6455780829302096, - "learning_rate": 1.7291766135780825e-06, - "loss": 0.9508, - "step": 7411 - }, - { - "epoch": 0.5570419359687359, - "grad_norm": 1.49746752581149, - "learning_rate": 1.728694263515567e-06, - "loss": 0.9992, - "step": 7412 - }, - { - "epoch": 0.5571170900345709, - "grad_norm": 1.7505949235298417, - "learning_rate": 1.728211929528955e-06, - "loss": 0.9617, - "step": 7413 - }, - { - "epoch": 0.5571922441004058, - "grad_norm": 1.8907086062270115, - "learning_rate": 1.727729611646827e-06, - "loss": 0.9194, - "step": 7414 - }, - { - "epoch": 0.5572673981662408, - "grad_norm": 1.6481271605320158, - "learning_rate": 1.7272473098977623e-06, - "loss": 0.9743, - "step": 7415 - }, - { - "epoch": 0.5573425522320757, - "grad_norm": 1.9331097854523664, - "learning_rate": 1.7267650243103384e-06, - "loss": 0.9871, - "step": 7416 - }, - { - "epoch": 0.5574177062979108, - "grad_norm": 0.7550448975255432, - "learning_rate": 1.7262827549131337e-06, - "loss": 0.8461, - "step": 7417 - }, - { - "epoch": 0.5574928603637457, - "grad_norm": 1.4385573603358472, - "learning_rate": 1.7258005017347234e-06, - "loss": 1.0179, - "step": 7418 - }, - { - "epoch": 0.5575680144295806, - "grad_norm": 1.5886131228859097, - "learning_rate": 1.725318264803684e-06, - "loss": 0.8905, - "step": 7419 - }, - { - "epoch": 0.5576431684954156, - "grad_norm": 1.4503442552933095, - "learning_rate": 1.724836044148589e-06, - "loss": 0.9529, - "step": 7420 - }, - { - "epoch": 0.5577183225612505, - "grad_norm": 1.7242362114420113, - "learning_rate": 1.7243538397980115e-06, - "loss": 1.0251, - "step": 7421 - }, - { - "epoch": 0.5577934766270856, - "grad_norm": 1.5654206254855065, - "learning_rate": 1.7238716517805249e-06, - "loss": 0.997, - "step": 7422 - }, - { - "epoch": 0.5578686306929205, - "grad_norm": 1.6470459040467669, - "learning_rate": 1.7233894801247002e-06, - "loss": 0.9662, - "step": 7423 - }, - { - "epoch": 0.5579437847587555, - "grad_norm": 2.143630430929725, - "learning_rate": 1.7229073248591084e-06, - "loss": 1.0191, - "step": 7424 - }, - { - "epoch": 0.5580189388245904, - "grad_norm": 1.5030606357173169, - "learning_rate": 1.7224251860123185e-06, - "loss": 0.9889, - "step": 7425 - }, - { - "epoch": 0.5580940928904253, - "grad_norm": 1.461156410502989, - "learning_rate": 1.7219430636128989e-06, - "loss": 0.9856, - "step": 7426 - }, - { - "epoch": 0.5581692469562604, - "grad_norm": 1.5240749492688244, - "learning_rate": 1.721460957689418e-06, - "loss": 0.97, - "step": 7427 - }, - { - "epoch": 0.5582444010220953, - "grad_norm": 1.9689428446946953, - "learning_rate": 1.720978868270441e-06, - "loss": 1.0831, - "step": 7428 - }, - { - "epoch": 0.5583195550879303, - "grad_norm": 1.5660636698469594, - "learning_rate": 1.7204967953845358e-06, - "loss": 1.0014, - "step": 7429 - }, - { - "epoch": 0.5583947091537652, - "grad_norm": 2.09819629046947, - "learning_rate": 1.7200147390602643e-06, - "loss": 0.9953, - "step": 7430 - }, - { - "epoch": 0.5584698632196001, - "grad_norm": 1.974978026738643, - "learning_rate": 1.7195326993261927e-06, - "loss": 0.9411, - "step": 7431 - }, - { - "epoch": 0.5585450172854352, - "grad_norm": 1.5834278864819966, - "learning_rate": 1.7190506762108828e-06, - "loss": 0.9158, - "step": 7432 - }, - { - "epoch": 0.5586201713512701, - "grad_norm": 2.3407857842220348, - "learning_rate": 1.7185686697428954e-06, - "loss": 0.9906, - "step": 7433 - }, - { - "epoch": 0.5586953254171051, - "grad_norm": 1.46465745885736, - "learning_rate": 1.7180866799507925e-06, - "loss": 0.9324, - "step": 7434 - }, - { - "epoch": 0.55877047948294, - "grad_norm": 1.4001485916028715, - "learning_rate": 1.717604706863133e-06, - "loss": 0.9487, - "step": 7435 - }, - { - "epoch": 0.558845633548775, - "grad_norm": 1.4815325374989132, - "learning_rate": 1.7171227505084764e-06, - "loss": 0.9748, - "step": 7436 - }, - { - "epoch": 0.55892078761461, - "grad_norm": 1.7165203544461054, - "learning_rate": 1.71664081091538e-06, - "loss": 1.0237, - "step": 7437 - }, - { - "epoch": 0.5589959416804449, - "grad_norm": 1.7892475282364972, - "learning_rate": 1.7161588881124003e-06, - "loss": 1.0626, - "step": 7438 - }, - { - "epoch": 0.5590710957462799, - "grad_norm": 1.8263606499187879, - "learning_rate": 1.7156769821280937e-06, - "loss": 0.9627, - "step": 7439 - }, - { - "epoch": 0.5591462498121148, - "grad_norm": 1.9836649187443876, - "learning_rate": 1.7151950929910145e-06, - "loss": 0.9049, - "step": 7440 - }, - { - "epoch": 0.5592214038779498, - "grad_norm": 1.808612205460643, - "learning_rate": 1.7147132207297165e-06, - "loss": 0.9559, - "step": 7441 - }, - { - "epoch": 0.5592965579437847, - "grad_norm": 0.7338108980424357, - "learning_rate": 1.7142313653727531e-06, - "loss": 0.8105, - "step": 7442 - }, - { - "epoch": 0.5593717120096198, - "grad_norm": 6.894589622185617, - "learning_rate": 1.7137495269486749e-06, - "loss": 1.0971, - "step": 7443 - }, - { - "epoch": 0.5594468660754547, - "grad_norm": 1.3390205901986543, - "learning_rate": 1.7132677054860339e-06, - "loss": 0.9217, - "step": 7444 - }, - { - "epoch": 0.5595220201412896, - "grad_norm": 2.852203380732348, - "learning_rate": 1.7127859010133788e-06, - "loss": 0.9109, - "step": 7445 - }, - { - "epoch": 0.5595971742071246, - "grad_norm": 1.4327084668911831, - "learning_rate": 1.7123041135592593e-06, - "loss": 0.8294, - "step": 7446 - }, - { - "epoch": 0.5596723282729595, - "grad_norm": 1.6249464466367056, - "learning_rate": 1.7118223431522227e-06, - "loss": 0.9006, - "step": 7447 - }, - { - "epoch": 0.5597474823387946, - "grad_norm": 1.7146275412266818, - "learning_rate": 1.7113405898208156e-06, - "loss": 0.9928, - "step": 7448 - }, - { - "epoch": 0.5598226364046295, - "grad_norm": 6.948015371495594, - "learning_rate": 1.710858853593584e-06, - "loss": 0.9995, - "step": 7449 - }, - { - "epoch": 0.5598977904704645, - "grad_norm": 2.272276067160552, - "learning_rate": 1.710377134499072e-06, - "loss": 0.9414, - "step": 7450 - }, - { - "epoch": 0.5599729445362994, - "grad_norm": 1.8810855654207237, - "learning_rate": 1.7098954325658249e-06, - "loss": 0.996, - "step": 7451 - }, - { - "epoch": 0.5600480986021343, - "grad_norm": 3.0482827089116435, - "learning_rate": 1.7094137478223831e-06, - "loss": 0.9952, - "step": 7452 - }, - { - "epoch": 0.5601232526679694, - "grad_norm": 2.028554095706536, - "learning_rate": 1.7089320802972901e-06, - "loss": 0.7878, - "step": 7453 - }, - { - "epoch": 0.5601984067338043, - "grad_norm": 1.7646525582971047, - "learning_rate": 1.7084504300190862e-06, - "loss": 0.9592, - "step": 7454 - }, - { - "epoch": 0.5602735607996393, - "grad_norm": 1.884465801524851, - "learning_rate": 1.7079687970163105e-06, - "loss": 1.0658, - "step": 7455 - }, - { - "epoch": 0.5603487148654742, - "grad_norm": 1.8149449530837014, - "learning_rate": 1.7074871813175018e-06, - "loss": 0.9885, - "step": 7456 - }, - { - "epoch": 0.5604238689313091, - "grad_norm": 1.6319786753570245, - "learning_rate": 1.7070055829511973e-06, - "loss": 0.9207, - "step": 7457 - }, - { - "epoch": 0.5604990229971442, - "grad_norm": 1.9509605316166454, - "learning_rate": 1.7065240019459347e-06, - "loss": 0.9758, - "step": 7458 - }, - { - "epoch": 0.5605741770629791, - "grad_norm": 1.9686879950553364, - "learning_rate": 1.7060424383302485e-06, - "loss": 0.9829, - "step": 7459 - }, - { - "epoch": 0.5606493311288141, - "grad_norm": 1.4357591952096382, - "learning_rate": 1.7055608921326731e-06, - "loss": 0.9568, - "step": 7460 - }, - { - "epoch": 0.560724485194649, - "grad_norm": 1.4850607643154223, - "learning_rate": 1.7050793633817431e-06, - "loss": 0.8504, - "step": 7461 - }, - { - "epoch": 0.560799639260484, - "grad_norm": 1.7971491728881124, - "learning_rate": 1.7045978521059894e-06, - "loss": 0.951, - "step": 7462 - }, - { - "epoch": 0.560874793326319, - "grad_norm": 0.72478242437514, - "learning_rate": 1.7041163583339446e-06, - "loss": 0.817, - "step": 7463 - }, - { - "epoch": 0.5609499473921539, - "grad_norm": 0.7574272356242016, - "learning_rate": 1.7036348820941386e-06, - "loss": 0.9391, - "step": 7464 - }, - { - "epoch": 0.5610251014579889, - "grad_norm": 1.8367344202097649, - "learning_rate": 1.7031534234151001e-06, - "loss": 0.9684, - "step": 7465 - }, - { - "epoch": 0.5611002555238238, - "grad_norm": 1.8274270706132254, - "learning_rate": 1.7026719823253585e-06, - "loss": 0.9566, - "step": 7466 - }, - { - "epoch": 0.5611754095896588, - "grad_norm": 1.7017740613778298, - "learning_rate": 1.7021905588534402e-06, - "loss": 1.059, - "step": 7467 - }, - { - "epoch": 0.5612505636554938, - "grad_norm": 1.7594513437839616, - "learning_rate": 1.701709153027872e-06, - "loss": 0.9177, - "step": 7468 - }, - { - "epoch": 0.5613257177213288, - "grad_norm": 1.635621132997308, - "learning_rate": 1.7012277648771787e-06, - "loss": 0.8893, - "step": 7469 - }, - { - "epoch": 0.5614008717871637, - "grad_norm": 1.9563762611861353, - "learning_rate": 1.700746394429884e-06, - "loss": 1.0007, - "step": 7470 - }, - { - "epoch": 0.5614760258529986, - "grad_norm": 1.4629901195148154, - "learning_rate": 1.7002650417145119e-06, - "loss": 0.8796, - "step": 7471 - }, - { - "epoch": 0.5615511799188336, - "grad_norm": 3.7485919852183254, - "learning_rate": 1.699783706759583e-06, - "loss": 0.9825, - "step": 7472 - }, - { - "epoch": 0.5616263339846685, - "grad_norm": 1.421218552858002, - "learning_rate": 1.6993023895936196e-06, - "loss": 1.0382, - "step": 7473 - }, - { - "epoch": 0.5617014880505036, - "grad_norm": 1.596410587510624, - "learning_rate": 1.6988210902451407e-06, - "loss": 1.0478, - "step": 7474 - }, - { - "epoch": 0.5617766421163385, - "grad_norm": 1.6158653419921902, - "learning_rate": 1.698339808742666e-06, - "loss": 0.9408, - "step": 7475 - }, - { - "epoch": 0.5618517961821734, - "grad_norm": 2.910535872104169, - "learning_rate": 1.697858545114713e-06, - "loss": 1.0679, - "step": 7476 - }, - { - "epoch": 0.5619269502480084, - "grad_norm": 1.4911991061457628, - "learning_rate": 1.6973772993897978e-06, - "loss": 0.9968, - "step": 7477 - }, - { - "epoch": 0.5620021043138433, - "grad_norm": 1.8161417450643227, - "learning_rate": 1.6968960715964364e-06, - "loss": 1.0164, - "step": 7478 - }, - { - "epoch": 0.5620772583796784, - "grad_norm": 1.6619782625835673, - "learning_rate": 1.6964148617631432e-06, - "loss": 0.9393, - "step": 7479 - }, - { - "epoch": 0.5621524124455133, - "grad_norm": 1.46243274892745, - "learning_rate": 1.6959336699184323e-06, - "loss": 0.9646, - "step": 7480 - }, - { - "epoch": 0.5622275665113483, - "grad_norm": 1.6119507852334987, - "learning_rate": 1.695452496090816e-06, - "loss": 1.0027, - "step": 7481 - }, - { - "epoch": 0.5623027205771832, - "grad_norm": 1.5857786653403458, - "learning_rate": 1.694971340308805e-06, - "loss": 0.9391, - "step": 7482 - }, - { - "epoch": 0.5623778746430181, - "grad_norm": 1.5291016747628967, - "learning_rate": 1.6944902026009107e-06, - "loss": 0.9658, - "step": 7483 - }, - { - "epoch": 0.5624530287088532, - "grad_norm": 2.2427543960237353, - "learning_rate": 1.694009082995641e-06, - "loss": 0.9868, - "step": 7484 - }, - { - "epoch": 0.5625281827746881, - "grad_norm": 2.36602583478816, - "learning_rate": 1.6935279815215056e-06, - "loss": 0.947, - "step": 7485 - }, - { - "epoch": 0.5626033368405231, - "grad_norm": 1.4347722253561939, - "learning_rate": 1.6930468982070106e-06, - "loss": 0.8982, - "step": 7486 - }, - { - "epoch": 0.562678490906358, - "grad_norm": 1.847958239661502, - "learning_rate": 1.6925658330806618e-06, - "loss": 0.9825, - "step": 7487 - }, - { - "epoch": 0.562753644972193, - "grad_norm": 1.695723031258237, - "learning_rate": 1.6920847861709653e-06, - "loss": 1.0172, - "step": 7488 - }, - { - "epoch": 0.562828799038028, - "grad_norm": 2.193113590100999, - "learning_rate": 1.6916037575064238e-06, - "loss": 0.9288, - "step": 7489 - }, - { - "epoch": 0.5629039531038629, - "grad_norm": 1.4169831857323048, - "learning_rate": 1.6911227471155408e-06, - "loss": 0.9614, - "step": 7490 - }, - { - "epoch": 0.5629791071696979, - "grad_norm": 1.9290910795273981, - "learning_rate": 1.6906417550268182e-06, - "loss": 0.9518, - "step": 7491 - }, - { - "epoch": 0.5630542612355328, - "grad_norm": 1.9564874620706114, - "learning_rate": 1.6901607812687558e-06, - "loss": 0.9841, - "step": 7492 - }, - { - "epoch": 0.5631294153013678, - "grad_norm": 1.6784218921550045, - "learning_rate": 1.6896798258698538e-06, - "loss": 0.9163, - "step": 7493 - }, - { - "epoch": 0.5632045693672028, - "grad_norm": 2.020974563959524, - "learning_rate": 1.6891988888586094e-06, - "loss": 0.9893, - "step": 7494 - }, - { - "epoch": 0.5632797234330378, - "grad_norm": 1.3477064040429625, - "learning_rate": 1.6887179702635219e-06, - "loss": 0.9948, - "step": 7495 - }, - { - "epoch": 0.5633548774988727, - "grad_norm": 1.4721130664513749, - "learning_rate": 1.6882370701130863e-06, - "loss": 0.9459, - "step": 7496 - }, - { - "epoch": 0.5634300315647076, - "grad_norm": 1.9151253376872004, - "learning_rate": 1.6877561884357987e-06, - "loss": 0.9219, - "step": 7497 - }, - { - "epoch": 0.5635051856305426, - "grad_norm": 1.503862652827896, - "learning_rate": 1.6872753252601525e-06, - "loss": 0.9187, - "step": 7498 - }, - { - "epoch": 0.5635803396963776, - "grad_norm": 1.5430576733424801, - "learning_rate": 1.6867944806146403e-06, - "loss": 0.9151, - "step": 7499 - }, - { - "epoch": 0.5636554937622126, - "grad_norm": 1.895917750844086, - "learning_rate": 1.6863136545277547e-06, - "loss": 0.8759, - "step": 7500 - }, - { - "epoch": 0.5637306478280475, - "grad_norm": 1.4721771189340431, - "learning_rate": 1.685832847027986e-06, - "loss": 0.9129, - "step": 7501 - }, - { - "epoch": 0.5638058018938824, - "grad_norm": 1.7367888827283946, - "learning_rate": 1.6853520581438246e-06, - "loss": 0.9792, - "step": 7502 - }, - { - "epoch": 0.5638809559597174, - "grad_norm": 1.8083628430866605, - "learning_rate": 1.6848712879037588e-06, - "loss": 0.9631, - "step": 7503 - }, - { - "epoch": 0.5639561100255523, - "grad_norm": 1.49443013496749, - "learning_rate": 1.6843905363362754e-06, - "loss": 0.8783, - "step": 7504 - }, - { - "epoch": 0.5640312640913874, - "grad_norm": 1.6420046863372872, - "learning_rate": 1.6839098034698616e-06, - "loss": 0.9242, - "step": 7505 - }, - { - "epoch": 0.5641064181572223, - "grad_norm": 1.9128922602479226, - "learning_rate": 1.6834290893330017e-06, - "loss": 0.9664, - "step": 7506 - }, - { - "epoch": 0.5641815722230573, - "grad_norm": 0.8232977182070108, - "learning_rate": 1.682948393954181e-06, - "loss": 0.9023, - "step": 7507 - }, - { - "epoch": 0.5642567262888922, - "grad_norm": 1.5115542457833118, - "learning_rate": 1.6824677173618822e-06, - "loss": 1.0618, - "step": 7508 - }, - { - "epoch": 0.5643318803547271, - "grad_norm": 9.224675747101367, - "learning_rate": 1.6819870595845867e-06, - "loss": 0.9901, - "step": 7509 - }, - { - "epoch": 0.5644070344205622, - "grad_norm": 1.7651696082041348, - "learning_rate": 1.681506420650776e-06, - "loss": 1.0764, - "step": 7510 - }, - { - "epoch": 0.5644821884863971, - "grad_norm": 1.4563277031465227, - "learning_rate": 1.6810258005889287e-06, - "loss": 0.8774, - "step": 7511 - }, - { - "epoch": 0.5645573425522321, - "grad_norm": 1.5572829315382846, - "learning_rate": 1.680545199427525e-06, - "loss": 1.032, - "step": 7512 - }, - { - "epoch": 0.564632496618067, - "grad_norm": 1.701674447348526, - "learning_rate": 1.6800646171950415e-06, - "loss": 1.0269, - "step": 7513 - }, - { - "epoch": 0.564707650683902, - "grad_norm": 1.6288724388071392, - "learning_rate": 1.6795840539199538e-06, - "loss": 0.9735, - "step": 7514 - }, - { - "epoch": 0.564782804749737, - "grad_norm": 2.523063419356898, - "learning_rate": 1.6791035096307387e-06, - "loss": 0.8627, - "step": 7515 - }, - { - "epoch": 0.5648579588155719, - "grad_norm": 1.7332196210583866, - "learning_rate": 1.6786229843558685e-06, - "loss": 1.0689, - "step": 7516 - }, - { - "epoch": 0.5649331128814069, - "grad_norm": 1.4744513218535724, - "learning_rate": 1.6781424781238178e-06, - "loss": 1.0017, - "step": 7517 - }, - { - "epoch": 0.5650082669472418, - "grad_norm": 0.6813824883516464, - "learning_rate": 1.6776619909630574e-06, - "loss": 0.8078, - "step": 7518 - }, - { - "epoch": 0.5650834210130768, - "grad_norm": 1.7263052217903176, - "learning_rate": 1.6771815229020586e-06, - "loss": 0.9101, - "step": 7519 - }, - { - "epoch": 0.5651585750789118, - "grad_norm": 2.1767723798371352, - "learning_rate": 1.676701073969291e-06, - "loss": 1.0126, - "step": 7520 - }, - { - "epoch": 0.5652337291447467, - "grad_norm": 0.8178651177486898, - "learning_rate": 1.676220644193222e-06, - "loss": 0.8816, - "step": 7521 - }, - { - "epoch": 0.5653088832105817, - "grad_norm": 2.4475130654786046, - "learning_rate": 1.6757402336023205e-06, - "loss": 1.0401, - "step": 7522 - }, - { - "epoch": 0.5653840372764166, - "grad_norm": 2.327791126044014, - "learning_rate": 1.6752598422250512e-06, - "loss": 0.9968, - "step": 7523 - }, - { - "epoch": 0.5654591913422516, - "grad_norm": 1.7433642189664598, - "learning_rate": 1.6747794700898803e-06, - "loss": 1.1114, - "step": 7524 - }, - { - "epoch": 0.5655343454080866, - "grad_norm": 1.7003747274618302, - "learning_rate": 1.674299117225271e-06, - "loss": 0.9695, - "step": 7525 - }, - { - "epoch": 0.5656094994739216, - "grad_norm": 3.9173822087518855, - "learning_rate": 1.6738187836596858e-06, - "loss": 0.8599, - "step": 7526 - }, - { - "epoch": 0.5656846535397565, - "grad_norm": 1.7997256016858585, - "learning_rate": 1.6733384694215872e-06, - "loss": 1.0257, - "step": 7527 - }, - { - "epoch": 0.5657598076055914, - "grad_norm": 0.672238140885405, - "learning_rate": 1.6728581745394346e-06, - "loss": 0.8002, - "step": 7528 - }, - { - "epoch": 0.5658349616714264, - "grad_norm": 2.254693434318193, - "learning_rate": 1.6723778990416883e-06, - "loss": 1.1329, - "step": 7529 - }, - { - "epoch": 0.5659101157372614, - "grad_norm": 1.2167899191276403, - "learning_rate": 1.671897642956806e-06, - "loss": 0.9826, - "step": 7530 - }, - { - "epoch": 0.5659852698030964, - "grad_norm": 2.2643955446470008, - "learning_rate": 1.6714174063132447e-06, - "loss": 0.8668, - "step": 7531 - }, - { - "epoch": 0.5660604238689313, - "grad_norm": 1.5324714615670916, - "learning_rate": 1.6709371891394605e-06, - "loss": 1.0266, - "step": 7532 - }, - { - "epoch": 0.5661355779347663, - "grad_norm": 1.4946624044249237, - "learning_rate": 1.6704569914639073e-06, - "loss": 0.9372, - "step": 7533 - }, - { - "epoch": 0.5662107320006012, - "grad_norm": 3.754582593248406, - "learning_rate": 1.6699768133150399e-06, - "loss": 0.9995, - "step": 7534 - }, - { - "epoch": 0.5662858860664362, - "grad_norm": 1.2573585875915518, - "learning_rate": 1.6694966547213098e-06, - "loss": 0.9307, - "step": 7535 - }, - { - "epoch": 0.5663610401322712, - "grad_norm": 1.9597350305393573, - "learning_rate": 1.669016515711168e-06, - "loss": 0.9497, - "step": 7536 - }, - { - "epoch": 0.5664361941981061, - "grad_norm": 1.568872275279523, - "learning_rate": 1.668536396313066e-06, - "loss": 1.0641, - "step": 7537 - }, - { - "epoch": 0.5665113482639411, - "grad_norm": 1.9256461755763863, - "learning_rate": 1.6680562965554508e-06, - "loss": 0.9626, - "step": 7538 - }, - { - "epoch": 0.566586502329776, - "grad_norm": 1.889701365436893, - "learning_rate": 1.6675762164667717e-06, - "loss": 1.0124, - "step": 7539 - }, - { - "epoch": 0.5666616563956111, - "grad_norm": 1.5945176912261139, - "learning_rate": 1.6670961560754744e-06, - "loss": 0.9478, - "step": 7540 - }, - { - "epoch": 0.566736810461446, - "grad_norm": 1.3805914245225301, - "learning_rate": 1.6666161154100052e-06, - "loss": 0.9973, - "step": 7541 - }, - { - "epoch": 0.5668119645272809, - "grad_norm": 0.7621855259267996, - "learning_rate": 1.6661360944988076e-06, - "loss": 0.8297, - "step": 7542 - }, - { - "epoch": 0.5668871185931159, - "grad_norm": 2.2543953244292787, - "learning_rate": 1.6656560933703244e-06, - "loss": 1.102, - "step": 7543 - }, - { - "epoch": 0.5669622726589508, - "grad_norm": 2.0304294983085027, - "learning_rate": 1.6651761120529983e-06, - "loss": 0.9005, - "step": 7544 - }, - { - "epoch": 0.5670374267247859, - "grad_norm": 2.304399996136435, - "learning_rate": 1.6646961505752696e-06, - "loss": 0.9776, - "step": 7545 - }, - { - "epoch": 0.5671125807906208, - "grad_norm": 1.9039263085962816, - "learning_rate": 1.6642162089655782e-06, - "loss": 1.0584, - "step": 7546 - }, - { - "epoch": 0.5671877348564557, - "grad_norm": 1.8864917184145948, - "learning_rate": 1.6637362872523621e-06, - "loss": 0.8749, - "step": 7547 - }, - { - "epoch": 0.5672628889222907, - "grad_norm": 2.0273890260554266, - "learning_rate": 1.6632563854640583e-06, - "loss": 1.0435, - "step": 7548 - }, - { - "epoch": 0.5673380429881256, - "grad_norm": 1.4982808911343706, - "learning_rate": 1.6627765036291034e-06, - "loss": 0.9223, - "step": 7549 - }, - { - "epoch": 0.5674131970539606, - "grad_norm": 1.3945121443265145, - "learning_rate": 1.6622966417759319e-06, - "loss": 1.0088, - "step": 7550 - }, - { - "epoch": 0.5674883511197956, - "grad_norm": 1.7352821108648933, - "learning_rate": 1.6618167999329778e-06, - "loss": 0.9402, - "step": 7551 - }, - { - "epoch": 0.5675635051856306, - "grad_norm": 1.7924441565376674, - "learning_rate": 1.6613369781286732e-06, - "loss": 0.9526, - "step": 7552 - }, - { - "epoch": 0.5676386592514655, - "grad_norm": 1.4046292183637779, - "learning_rate": 1.660857176391449e-06, - "loss": 0.9633, - "step": 7553 - }, - { - "epoch": 0.5677138133173004, - "grad_norm": 3.563572370851666, - "learning_rate": 1.6603773947497364e-06, - "loss": 0.9465, - "step": 7554 - }, - { - "epoch": 0.5677889673831354, - "grad_norm": 1.6295509434778424, - "learning_rate": 1.6598976332319631e-06, - "loss": 0.9485, - "step": 7555 - }, - { - "epoch": 0.5678641214489704, - "grad_norm": 1.6185219828727562, - "learning_rate": 1.6594178918665578e-06, - "loss": 1.0077, - "step": 7556 - }, - { - "epoch": 0.5679392755148054, - "grad_norm": 1.6409093701187776, - "learning_rate": 1.6589381706819467e-06, - "loss": 1.0151, - "step": 7557 - }, - { - "epoch": 0.5680144295806403, - "grad_norm": 1.46491251466202, - "learning_rate": 1.658458469706554e-06, - "loss": 1.0019, - "step": 7558 - }, - { - "epoch": 0.5680895836464753, - "grad_norm": 2.2454158921086167, - "learning_rate": 1.6579787889688062e-06, - "loss": 1.0884, - "step": 7559 - }, - { - "epoch": 0.5681647377123102, - "grad_norm": 1.729222315923072, - "learning_rate": 1.6574991284971235e-06, - "loss": 0.9458, - "step": 7560 - }, - { - "epoch": 0.5682398917781452, - "grad_norm": 1.6681564189743368, - "learning_rate": 1.6570194883199298e-06, - "loss": 0.9384, - "step": 7561 - }, - { - "epoch": 0.5683150458439802, - "grad_norm": 1.5723752569987934, - "learning_rate": 1.6565398684656442e-06, - "loss": 0.9375, - "step": 7562 - }, - { - "epoch": 0.5683901999098151, - "grad_norm": 3.491421267702724, - "learning_rate": 1.6560602689626872e-06, - "loss": 0.9051, - "step": 7563 - }, - { - "epoch": 0.5684653539756501, - "grad_norm": 4.307704525972163, - "learning_rate": 1.6555806898394764e-06, - "loss": 0.97, - "step": 7564 - }, - { - "epoch": 0.568540508041485, - "grad_norm": 1.8145828118599086, - "learning_rate": 1.655101131124428e-06, - "loss": 0.9481, - "step": 7565 - }, - { - "epoch": 0.56861566210732, - "grad_norm": 2.02203473693863, - "learning_rate": 1.6546215928459589e-06, - "loss": 0.8817, - "step": 7566 - }, - { - "epoch": 0.568690816173155, - "grad_norm": 0.6767986555008915, - "learning_rate": 1.6541420750324825e-06, - "loss": 0.8302, - "step": 7567 - }, - { - "epoch": 0.5687659702389899, - "grad_norm": 1.688939857794368, - "learning_rate": 1.6536625777124128e-06, - "loss": 0.8332, - "step": 7568 - }, - { - "epoch": 0.5688411243048249, - "grad_norm": 0.7853947367418699, - "learning_rate": 1.6531831009141616e-06, - "loss": 0.8745, - "step": 7569 - }, - { - "epoch": 0.5689162783706598, - "grad_norm": 1.9597417217497812, - "learning_rate": 1.6527036446661393e-06, - "loss": 0.9061, - "step": 7570 - }, - { - "epoch": 0.5689914324364949, - "grad_norm": 1.7552733668684612, - "learning_rate": 1.6522242089967564e-06, - "loss": 0.878, - "step": 7571 - }, - { - "epoch": 0.5690665865023298, - "grad_norm": 3.4420495184395787, - "learning_rate": 1.6517447939344205e-06, - "loss": 0.862, - "step": 7572 - }, - { - "epoch": 0.5691417405681647, - "grad_norm": 1.2669385992495823, - "learning_rate": 1.6512653995075393e-06, - "loss": 0.9382, - "step": 7573 - }, - { - "epoch": 0.5692168946339997, - "grad_norm": 1.5857788908622619, - "learning_rate": 1.650786025744519e-06, - "loss": 0.9913, - "step": 7574 - }, - { - "epoch": 0.5692920486998346, - "grad_norm": 1.622895785757685, - "learning_rate": 1.6503066726737632e-06, - "loss": 1.0193, - "step": 7575 - }, - { - "epoch": 0.5693672027656697, - "grad_norm": 1.8720356714648405, - "learning_rate": 1.6498273403236764e-06, - "loss": 0.8902, - "step": 7576 - }, - { - "epoch": 0.5694423568315046, - "grad_norm": 1.6702748421972873, - "learning_rate": 1.6493480287226605e-06, - "loss": 0.865, - "step": 7577 - }, - { - "epoch": 0.5695175108973396, - "grad_norm": 1.6203335001953976, - "learning_rate": 1.6488687378991168e-06, - "loss": 1.007, - "step": 7578 - }, - { - "epoch": 0.5695926649631745, - "grad_norm": 1.8568570695266238, - "learning_rate": 1.648389467881444e-06, - "loss": 0.9582, - "step": 7579 - }, - { - "epoch": 0.5696678190290094, - "grad_norm": 15.879349240438183, - "learning_rate": 1.6479102186980428e-06, - "loss": 0.9233, - "step": 7580 - }, - { - "epoch": 0.5697429730948445, - "grad_norm": 0.6650324973792028, - "learning_rate": 1.6474309903773098e-06, - "loss": 0.7793, - "step": 7581 - }, - { - "epoch": 0.5698181271606794, - "grad_norm": 1.6792089445914546, - "learning_rate": 1.6469517829476391e-06, - "loss": 1.0218, - "step": 7582 - }, - { - "epoch": 0.5698932812265144, - "grad_norm": 1.5302907313457228, - "learning_rate": 1.6464725964374285e-06, - "loss": 0.9989, - "step": 7583 - }, - { - "epoch": 0.5699684352923493, - "grad_norm": 1.7597349378003802, - "learning_rate": 1.6459934308750694e-06, - "loss": 1.1461, - "step": 7584 - }, - { - "epoch": 0.5700435893581843, - "grad_norm": 1.270053134167145, - "learning_rate": 1.6455142862889557e-06, - "loss": 1.0354, - "step": 7585 - }, - { - "epoch": 0.5701187434240192, - "grad_norm": 0.6933913532458446, - "learning_rate": 1.6450351627074781e-06, - "loss": 0.8307, - "step": 7586 - }, - { - "epoch": 0.5701938974898542, - "grad_norm": 2.074634230214682, - "learning_rate": 1.6445560601590257e-06, - "loss": 0.9767, - "step": 7587 - }, - { - "epoch": 0.5702690515556892, - "grad_norm": 2.059630970538714, - "learning_rate": 1.6440769786719883e-06, - "loss": 0.8595, - "step": 7588 - }, - { - "epoch": 0.5703442056215241, - "grad_norm": 1.5597202752043537, - "learning_rate": 1.6435979182747526e-06, - "loss": 1.0314, - "step": 7589 - }, - { - "epoch": 0.5704193596873591, - "grad_norm": 15.135218675670053, - "learning_rate": 1.6431188789957053e-06, - "loss": 0.9291, - "step": 7590 - }, - { - "epoch": 0.570494513753194, - "grad_norm": 1.5965736657269447, - "learning_rate": 1.642639860863231e-06, - "loss": 0.9251, - "step": 7591 - }, - { - "epoch": 0.570569667819029, - "grad_norm": 1.6296615496111848, - "learning_rate": 1.642160863905713e-06, - "loss": 0.992, - "step": 7592 - }, - { - "epoch": 0.570644821884864, - "grad_norm": 2.868033385685008, - "learning_rate": 1.6416818881515344e-06, - "loss": 0.8936, - "step": 7593 - }, - { - "epoch": 0.5707199759506989, - "grad_norm": 2.240654291575992, - "learning_rate": 1.6412029336290755e-06, - "loss": 0.9239, - "step": 7594 - }, - { - "epoch": 0.5707951300165339, - "grad_norm": 1.4778757865133825, - "learning_rate": 1.6407240003667172e-06, - "loss": 0.9884, - "step": 7595 - }, - { - "epoch": 0.5708702840823688, - "grad_norm": 1.427981333111054, - "learning_rate": 1.640245088392838e-06, - "loss": 0.9676, - "step": 7596 - }, - { - "epoch": 0.5709454381482039, - "grad_norm": 1.4342801895887372, - "learning_rate": 1.6397661977358142e-06, - "loss": 0.95, - "step": 7597 - }, - { - "epoch": 0.5710205922140388, - "grad_norm": 1.7299018430702116, - "learning_rate": 1.639287328424023e-06, - "loss": 1.0473, - "step": 7598 - }, - { - "epoch": 0.5710957462798737, - "grad_norm": 2.7451532307921473, - "learning_rate": 1.638808480485838e-06, - "loss": 1.0066, - "step": 7599 - }, - { - "epoch": 0.5711709003457087, - "grad_norm": 1.6809601686377798, - "learning_rate": 1.638329653949635e-06, - "loss": 0.9372, - "step": 7600 - }, - { - "epoch": 0.5712460544115436, - "grad_norm": 0.7337650038242828, - "learning_rate": 1.6378508488437835e-06, - "loss": 0.839, - "step": 7601 - }, - { - "epoch": 0.5713212084773787, - "grad_norm": 1.75314178458762, - "learning_rate": 1.6373720651966569e-06, - "loss": 1.0032, - "step": 7602 - }, - { - "epoch": 0.5713963625432136, - "grad_norm": 0.7587921653104795, - "learning_rate": 1.6368933030366241e-06, - "loss": 0.83, - "step": 7603 - }, - { - "epoch": 0.5714715166090486, - "grad_norm": 2.854439733951863, - "learning_rate": 1.6364145623920528e-06, - "loss": 0.9883, - "step": 7604 - }, - { - "epoch": 0.5715466706748835, - "grad_norm": 1.648818491799366, - "learning_rate": 1.6359358432913118e-06, - "loss": 1.0068, - "step": 7605 - }, - { - "epoch": 0.5716218247407184, - "grad_norm": 0.7022922670974209, - "learning_rate": 1.6354571457627656e-06, - "loss": 0.8489, - "step": 7606 - }, - { - "epoch": 0.5716969788065535, - "grad_norm": 1.6904233406780766, - "learning_rate": 1.6349784698347797e-06, - "loss": 0.859, - "step": 7607 - }, - { - "epoch": 0.5717721328723884, - "grad_norm": 0.6739038618343075, - "learning_rate": 1.6344998155357175e-06, - "loss": 0.7532, - "step": 7608 - }, - { - "epoch": 0.5718472869382234, - "grad_norm": 4.280915852893021, - "learning_rate": 1.63402118289394e-06, - "loss": 0.9317, - "step": 7609 - }, - { - "epoch": 0.5719224410040583, - "grad_norm": 1.554907241704133, - "learning_rate": 1.6335425719378097e-06, - "loss": 0.9712, - "step": 7610 - }, - { - "epoch": 0.5719975950698932, - "grad_norm": 0.67644375646652, - "learning_rate": 1.6330639826956848e-06, - "loss": 0.854, - "step": 7611 - }, - { - "epoch": 0.5720727491357283, - "grad_norm": 2.6680291589290226, - "learning_rate": 1.632585415195924e-06, - "loss": 0.9471, - "step": 7612 - }, - { - "epoch": 0.5721479032015632, - "grad_norm": 2.288707093649701, - "learning_rate": 1.6321068694668846e-06, - "loss": 0.9276, - "step": 7613 - }, - { - "epoch": 0.5722230572673982, - "grad_norm": 2.0576109304455175, - "learning_rate": 1.6316283455369215e-06, - "loss": 0.9982, - "step": 7614 - }, - { - "epoch": 0.5722982113332331, - "grad_norm": 2.2473616496138917, - "learning_rate": 1.63114984343439e-06, - "loss": 0.9537, - "step": 7615 - }, - { - "epoch": 0.5723733653990681, - "grad_norm": 2.252629333237909, - "learning_rate": 1.630671363187642e-06, - "loss": 0.9341, - "step": 7616 - }, - { - "epoch": 0.572448519464903, - "grad_norm": 1.6710404068760136, - "learning_rate": 1.6301929048250306e-06, - "loss": 0.9183, - "step": 7617 - }, - { - "epoch": 0.572523673530738, - "grad_norm": 1.678169664751093, - "learning_rate": 1.6297144683749057e-06, - "loss": 1.1063, - "step": 7618 - }, - { - "epoch": 0.572598827596573, - "grad_norm": 1.6242922929093044, - "learning_rate": 1.6292360538656162e-06, - "loss": 0.9939, - "step": 7619 - }, - { - "epoch": 0.5726739816624079, - "grad_norm": 2.5505132607416483, - "learning_rate": 1.6287576613255105e-06, - "loss": 0.8728, - "step": 7620 - }, - { - "epoch": 0.5727491357282429, - "grad_norm": 1.291464861620342, - "learning_rate": 1.6282792907829341e-06, - "loss": 0.9991, - "step": 7621 - }, - { - "epoch": 0.5728242897940778, - "grad_norm": 1.791184478754023, - "learning_rate": 1.6278009422662345e-06, - "loss": 1.0648, - "step": 7622 - }, - { - "epoch": 0.5728994438599129, - "grad_norm": 1.449290065604413, - "learning_rate": 1.6273226158037528e-06, - "loss": 0.8936, - "step": 7623 - }, - { - "epoch": 0.5729745979257478, - "grad_norm": 1.9510241386683937, - "learning_rate": 1.6268443114238345e-06, - "loss": 0.9236, - "step": 7624 - }, - { - "epoch": 0.5730497519915827, - "grad_norm": 2.2650741433646195, - "learning_rate": 1.6263660291548191e-06, - "loss": 0.9878, - "step": 7625 - }, - { - "epoch": 0.5731249060574177, - "grad_norm": 1.8890844957508337, - "learning_rate": 1.6258877690250472e-06, - "loss": 1.0316, - "step": 7626 - }, - { - "epoch": 0.5732000601232526, - "grad_norm": 2.993939317678208, - "learning_rate": 1.6254095310628578e-06, - "loss": 1.0307, - "step": 7627 - }, - { - "epoch": 0.5732752141890877, - "grad_norm": 1.9231184371722205, - "learning_rate": 1.6249313152965876e-06, - "loss": 1.0485, - "step": 7628 - }, - { - "epoch": 0.5733503682549226, - "grad_norm": 2.7485894140057856, - "learning_rate": 1.6244531217545738e-06, - "loss": 1.0129, - "step": 7629 - }, - { - "epoch": 0.5734255223207576, - "grad_norm": 0.7077162990920941, - "learning_rate": 1.6239749504651505e-06, - "loss": 0.8268, - "step": 7630 - }, - { - "epoch": 0.5735006763865925, - "grad_norm": 1.2856349807077376, - "learning_rate": 1.6234968014566509e-06, - "loss": 1.0009, - "step": 7631 - }, - { - "epoch": 0.5735758304524274, - "grad_norm": 1.828471876312518, - "learning_rate": 1.6230186747574077e-06, - "loss": 0.9409, - "step": 7632 - }, - { - "epoch": 0.5736509845182625, - "grad_norm": 1.6164872132163444, - "learning_rate": 1.6225405703957515e-06, - "loss": 0.9286, - "step": 7633 - }, - { - "epoch": 0.5737261385840974, - "grad_norm": 1.62617420908357, - "learning_rate": 1.6220624884000123e-06, - "loss": 1.0008, - "step": 7634 - }, - { - "epoch": 0.5738012926499324, - "grad_norm": 1.546149960537454, - "learning_rate": 1.6215844287985178e-06, - "loss": 0.9217, - "step": 7635 - }, - { - "epoch": 0.5738764467157673, - "grad_norm": 1.5161816076340857, - "learning_rate": 1.6211063916195945e-06, - "loss": 0.9604, - "step": 7636 - }, - { - "epoch": 0.5739516007816022, - "grad_norm": 1.6124213561759515, - "learning_rate": 1.6206283768915687e-06, - "loss": 0.9238, - "step": 7637 - }, - { - "epoch": 0.5740267548474373, - "grad_norm": 2.470101288483169, - "learning_rate": 1.620150384642764e-06, - "loss": 1.0036, - "step": 7638 - }, - { - "epoch": 0.5741019089132722, - "grad_norm": 2.200296520710955, - "learning_rate": 1.619672414901504e-06, - "loss": 0.9929, - "step": 7639 - }, - { - "epoch": 0.5741770629791072, - "grad_norm": 2.0862283682458824, - "learning_rate": 1.6191944676961097e-06, - "loss": 0.9489, - "step": 7640 - }, - { - "epoch": 0.5742522170449421, - "grad_norm": 2.010200831500447, - "learning_rate": 1.6187165430549011e-06, - "loss": 1.054, - "step": 7641 - }, - { - "epoch": 0.5743273711107771, - "grad_norm": 2.044764238491965, - "learning_rate": 1.6182386410061976e-06, - "loss": 0.9104, - "step": 7642 - }, - { - "epoch": 0.574402525176612, - "grad_norm": 2.3798349255928004, - "learning_rate": 1.6177607615783158e-06, - "loss": 1.0326, - "step": 7643 - }, - { - "epoch": 0.574477679242447, - "grad_norm": 1.6427539636067474, - "learning_rate": 1.6172829047995733e-06, - "loss": 0.9841, - "step": 7644 - }, - { - "epoch": 0.574552833308282, - "grad_norm": 1.629050634923798, - "learning_rate": 1.616805070698284e-06, - "loss": 0.9201, - "step": 7645 - }, - { - "epoch": 0.5746279873741169, - "grad_norm": 3.7676115375656827, - "learning_rate": 1.6163272593027615e-06, - "loss": 0.9144, - "step": 7646 - }, - { - "epoch": 0.5747031414399519, - "grad_norm": 1.749127511102208, - "learning_rate": 1.6158494706413187e-06, - "loss": 0.9672, - "step": 7647 - }, - { - "epoch": 0.5747782955057869, - "grad_norm": 2.425395449613447, - "learning_rate": 1.6153717047422648e-06, - "loss": 1.0032, - "step": 7648 - }, - { - "epoch": 0.5748534495716219, - "grad_norm": 1.4931790717239426, - "learning_rate": 1.614893961633911e-06, - "loss": 1.079, - "step": 7649 - }, - { - "epoch": 0.5749286036374568, - "grad_norm": 3.789744144751493, - "learning_rate": 1.6144162413445642e-06, - "loss": 1.0336, - "step": 7650 - }, - { - "epoch": 0.5750037577032917, - "grad_norm": 1.6585708048774614, - "learning_rate": 1.6139385439025319e-06, - "loss": 1.0455, - "step": 7651 - }, - { - "epoch": 0.5750789117691267, - "grad_norm": 1.6526841699002393, - "learning_rate": 1.6134608693361193e-06, - "loss": 0.9446, - "step": 7652 - }, - { - "epoch": 0.5751540658349616, - "grad_norm": 1.3609696604523382, - "learning_rate": 1.61298321767363e-06, - "loss": 0.9755, - "step": 7653 - }, - { - "epoch": 0.5752292199007967, - "grad_norm": 0.7502176445832767, - "learning_rate": 1.6125055889433674e-06, - "loss": 0.8541, - "step": 7654 - }, - { - "epoch": 0.5753043739666316, - "grad_norm": 4.221665342872634, - "learning_rate": 1.612027983173632e-06, - "loss": 1.0241, - "step": 7655 - }, - { - "epoch": 0.5753795280324665, - "grad_norm": 1.3527850316449586, - "learning_rate": 1.6115504003927245e-06, - "loss": 0.9735, - "step": 7656 - }, - { - "epoch": 0.5754546820983015, - "grad_norm": 2.2454747151369974, - "learning_rate": 1.6110728406289436e-06, - "loss": 0.8267, - "step": 7657 - }, - { - "epoch": 0.5755298361641364, - "grad_norm": 1.3723309795346172, - "learning_rate": 1.6105953039105855e-06, - "loss": 1.0335, - "step": 7658 - }, - { - "epoch": 0.5756049902299715, - "grad_norm": 1.5495139067359056, - "learning_rate": 1.6101177902659474e-06, - "loss": 0.9154, - "step": 7659 - }, - { - "epoch": 0.5756801442958064, - "grad_norm": 2.358533797713587, - "learning_rate": 1.6096402997233225e-06, - "loss": 0.9645, - "step": 7660 - }, - { - "epoch": 0.5757552983616414, - "grad_norm": 1.8188618786146966, - "learning_rate": 1.6091628323110053e-06, - "loss": 0.8811, - "step": 7661 - }, - { - "epoch": 0.5758304524274763, - "grad_norm": 3.780694589729584, - "learning_rate": 1.6086853880572868e-06, - "loss": 0.9374, - "step": 7662 - }, - { - "epoch": 0.5759056064933112, - "grad_norm": 1.7641972533421402, - "learning_rate": 1.6082079669904572e-06, - "loss": 0.8977, - "step": 7663 - }, - { - "epoch": 0.5759807605591463, - "grad_norm": 2.267093051281249, - "learning_rate": 1.607730569138806e-06, - "loss": 0.9749, - "step": 7664 - }, - { - "epoch": 0.5760559146249812, - "grad_norm": 1.2663789081387042, - "learning_rate": 1.60725319453062e-06, - "loss": 0.9678, - "step": 7665 - }, - { - "epoch": 0.5761310686908162, - "grad_norm": 1.9308074913331617, - "learning_rate": 1.606775843194187e-06, - "loss": 1.0367, - "step": 7666 - }, - { - "epoch": 0.5762062227566511, - "grad_norm": 2.286679221731405, - "learning_rate": 1.6062985151577904e-06, - "loss": 1.0732, - "step": 7667 - }, - { - "epoch": 0.5762813768224861, - "grad_norm": 1.52614193045553, - "learning_rate": 1.605821210449715e-06, - "loss": 0.9818, - "step": 7668 - }, - { - "epoch": 0.5763565308883211, - "grad_norm": 1.712282253260701, - "learning_rate": 1.6053439290982422e-06, - "loss": 1.0463, - "step": 7669 - }, - { - "epoch": 0.576431684954156, - "grad_norm": 2.4593562783446012, - "learning_rate": 1.6048666711316526e-06, - "loss": 1.039, - "step": 7670 - }, - { - "epoch": 0.576506839019991, - "grad_norm": 2.1963875198515446, - "learning_rate": 1.6043894365782262e-06, - "loss": 1.0551, - "step": 7671 - }, - { - "epoch": 0.5765819930858259, - "grad_norm": 1.6709684962573708, - "learning_rate": 1.60391222546624e-06, - "loss": 0.9555, - "step": 7672 - }, - { - "epoch": 0.5766571471516609, - "grad_norm": 1.6368377569447177, - "learning_rate": 1.6034350378239715e-06, - "loss": 1.0145, - "step": 7673 - }, - { - "epoch": 0.5767323012174959, - "grad_norm": 1.5098008402163248, - "learning_rate": 1.6029578736796958e-06, - "loss": 0.9353, - "step": 7674 - }, - { - "epoch": 0.5768074552833309, - "grad_norm": 1.5200636845347975, - "learning_rate": 1.6024807330616858e-06, - "loss": 0.9587, - "step": 7675 - }, - { - "epoch": 0.5768826093491658, - "grad_norm": 1.5293425532470215, - "learning_rate": 1.6020036159982154e-06, - "loss": 0.9515, - "step": 7676 - }, - { - "epoch": 0.5769577634150007, - "grad_norm": 1.5948801708351383, - "learning_rate": 1.601526522517554e-06, - "loss": 1.0333, - "step": 7677 - }, - { - "epoch": 0.5770329174808357, - "grad_norm": 1.5387770489717674, - "learning_rate": 1.6010494526479726e-06, - "loss": 1.0109, - "step": 7678 - }, - { - "epoch": 0.5771080715466707, - "grad_norm": 2.0557450158495896, - "learning_rate": 1.6005724064177387e-06, - "loss": 0.979, - "step": 7679 - }, - { - "epoch": 0.5771832256125057, - "grad_norm": 2.519053048581185, - "learning_rate": 1.6000953838551187e-06, - "loss": 0.8674, - "step": 7680 - }, - { - "epoch": 0.5772583796783406, - "grad_norm": 3.1747883162931485, - "learning_rate": 1.5996183849883793e-06, - "loss": 0.9622, - "step": 7681 - }, - { - "epoch": 0.5773335337441755, - "grad_norm": 1.6305481756557072, - "learning_rate": 1.599141409845783e-06, - "loss": 0.9911, - "step": 7682 - }, - { - "epoch": 0.5774086878100105, - "grad_norm": 1.577829692140217, - "learning_rate": 1.5986644584555937e-06, - "loss": 0.9839, - "step": 7683 - }, - { - "epoch": 0.5774838418758454, - "grad_norm": 2.3537183239021413, - "learning_rate": 1.598187530846072e-06, - "loss": 0.8959, - "step": 7684 - }, - { - "epoch": 0.5775589959416805, - "grad_norm": 1.6604727409719022, - "learning_rate": 1.5977106270454775e-06, - "loss": 0.8929, - "step": 7685 - }, - { - "epoch": 0.5776341500075154, - "grad_norm": 1.5820817350644563, - "learning_rate": 1.597233747082069e-06, - "loss": 1.005, - "step": 7686 - }, - { - "epoch": 0.5777093040733504, - "grad_norm": 2.1844797264829534, - "learning_rate": 1.5967568909841026e-06, - "loss": 0.9654, - "step": 7687 - }, - { - "epoch": 0.5777844581391853, - "grad_norm": 3.966236189449394, - "learning_rate": 1.5962800587798352e-06, - "loss": 0.9822, - "step": 7688 - }, - { - "epoch": 0.5778596122050202, - "grad_norm": 1.7740215398519383, - "learning_rate": 1.59580325049752e-06, - "loss": 0.9987, - "step": 7689 - }, - { - "epoch": 0.5779347662708553, - "grad_norm": 3.358261136650704, - "learning_rate": 1.5953264661654104e-06, - "loss": 1.0148, - "step": 7690 - }, - { - "epoch": 0.5780099203366902, - "grad_norm": 1.7593903644222897, - "learning_rate": 1.5948497058117574e-06, - "loss": 0.9232, - "step": 7691 - }, - { - "epoch": 0.5780850744025252, - "grad_norm": 2.3398221481421455, - "learning_rate": 1.59437296946481e-06, - "loss": 0.8989, - "step": 7692 - }, - { - "epoch": 0.5781602284683601, - "grad_norm": 4.574291303598382, - "learning_rate": 1.593896257152818e-06, - "loss": 0.8829, - "step": 7693 - }, - { - "epoch": 0.5782353825341952, - "grad_norm": 1.7042266362589147, - "learning_rate": 1.5934195689040276e-06, - "loss": 0.9571, - "step": 7694 - }, - { - "epoch": 0.5783105366000301, - "grad_norm": 1.8499182631504356, - "learning_rate": 1.592942904746685e-06, - "loss": 0.9467, - "step": 7695 - }, - { - "epoch": 0.578385690665865, - "grad_norm": 2.415757951125086, - "learning_rate": 1.592466264709034e-06, - "loss": 1.0274, - "step": 7696 - }, - { - "epoch": 0.5784608447317, - "grad_norm": 1.8439015471682358, - "learning_rate": 1.5919896488193166e-06, - "loss": 1.0479, - "step": 7697 - }, - { - "epoch": 0.5785359987975349, - "grad_norm": 0.8968025922622103, - "learning_rate": 1.5915130571057755e-06, - "loss": 0.911, - "step": 7698 - }, - { - "epoch": 0.57861115286337, - "grad_norm": 0.709319904893432, - "learning_rate": 1.5910364895966498e-06, - "loss": 0.831, - "step": 7699 - }, - { - "epoch": 0.5786863069292049, - "grad_norm": 3.086209442131743, - "learning_rate": 1.5905599463201785e-06, - "loss": 1.0082, - "step": 7700 - }, - { - "epoch": 0.5787614609950398, - "grad_norm": 2.6495827868259534, - "learning_rate": 1.590083427304598e-06, - "loss": 0.7643, - "step": 7701 - }, - { - "epoch": 0.5788366150608748, - "grad_norm": 1.7294797116071405, - "learning_rate": 1.5896069325781435e-06, - "loss": 0.8988, - "step": 7702 - }, - { - "epoch": 0.5789117691267097, - "grad_norm": 1.9168500881609882, - "learning_rate": 1.5891304621690508e-06, - "loss": 0.9707, - "step": 7703 - }, - { - "epoch": 0.5789869231925447, - "grad_norm": 4.002675592119846, - "learning_rate": 1.5886540161055507e-06, - "loss": 0.9653, - "step": 7704 - }, - { - "epoch": 0.5790620772583797, - "grad_norm": 2.1621344064367976, - "learning_rate": 1.5881775944158755e-06, - "loss": 0.8688, - "step": 7705 - }, - { - "epoch": 0.5791372313242147, - "grad_norm": 2.2016758041519497, - "learning_rate": 1.5877011971282553e-06, - "loss": 1.0217, - "step": 7706 - }, - { - "epoch": 0.5792123853900496, - "grad_norm": 5.396975991563759, - "learning_rate": 1.5872248242709168e-06, - "loss": 0.9388, - "step": 7707 - }, - { - "epoch": 0.5792875394558845, - "grad_norm": 1.8112401859078113, - "learning_rate": 1.5867484758720894e-06, - "loss": 0.9675, - "step": 7708 - }, - { - "epoch": 0.5793626935217195, - "grad_norm": 1.6498604368660594, - "learning_rate": 1.5862721519599963e-06, - "loss": 1.0091, - "step": 7709 - }, - { - "epoch": 0.5794378475875545, - "grad_norm": 2.3208562040543272, - "learning_rate": 1.585795852562863e-06, - "loss": 0.9467, - "step": 7710 - }, - { - "epoch": 0.5795130016533895, - "grad_norm": 1.8722372682056758, - "learning_rate": 1.585319577708911e-06, - "loss": 0.8959, - "step": 7711 - }, - { - "epoch": 0.5795881557192244, - "grad_norm": 0.6903142981033081, - "learning_rate": 1.5848433274263627e-06, - "loss": 0.8407, - "step": 7712 - }, - { - "epoch": 0.5796633097850594, - "grad_norm": 1.4899454899232067, - "learning_rate": 1.5843671017434366e-06, - "loss": 0.9398, - "step": 7713 - }, - { - "epoch": 0.5797384638508943, - "grad_norm": 1.6999972259274456, - "learning_rate": 1.583890900688351e-06, - "loss": 0.9428, - "step": 7714 - }, - { - "epoch": 0.5798136179167293, - "grad_norm": 1.9300283524797588, - "learning_rate": 1.5834147242893234e-06, - "loss": 0.9606, - "step": 7715 - }, - { - "epoch": 0.5798887719825643, - "grad_norm": 1.5848083234586705, - "learning_rate": 1.5829385725745684e-06, - "loss": 0.9222, - "step": 7716 - }, - { - "epoch": 0.5799639260483992, - "grad_norm": 1.5467264123317, - "learning_rate": 1.5824624455723e-06, - "loss": 0.9963, - "step": 7717 - }, - { - "epoch": 0.5800390801142342, - "grad_norm": 4.8799806241135455, - "learning_rate": 1.581986343310731e-06, - "loss": 1.0159, - "step": 7718 - }, - { - "epoch": 0.5801142341800691, - "grad_norm": 1.3291647935971702, - "learning_rate": 1.581510265818071e-06, - "loss": 0.8405, - "step": 7719 - }, - { - "epoch": 0.5801893882459042, - "grad_norm": 1.7606239046385217, - "learning_rate": 1.5810342131225308e-06, - "loss": 0.9445, - "step": 7720 - }, - { - "epoch": 0.5802645423117391, - "grad_norm": 2.2336560139578756, - "learning_rate": 1.5805581852523176e-06, - "loss": 0.8816, - "step": 7721 - }, - { - "epoch": 0.580339696377574, - "grad_norm": 1.7299922519576485, - "learning_rate": 1.5800821822356383e-06, - "loss": 0.8776, - "step": 7722 - }, - { - "epoch": 0.580414850443409, - "grad_norm": 2.136926728052334, - "learning_rate": 1.5796062041006978e-06, - "loss": 1.0562, - "step": 7723 - }, - { - "epoch": 0.5804900045092439, - "grad_norm": 0.8306360142697308, - "learning_rate": 1.579130250875699e-06, - "loss": 0.9026, - "step": 7724 - }, - { - "epoch": 0.580565158575079, - "grad_norm": 1.996440401975663, - "learning_rate": 1.578654322588845e-06, - "loss": 0.9469, - "step": 7725 - }, - { - "epoch": 0.5806403126409139, - "grad_norm": 1.9230704582545848, - "learning_rate": 1.5781784192683351e-06, - "loss": 0.9926, - "step": 7726 - }, - { - "epoch": 0.5807154667067488, - "grad_norm": 1.5664439969734945, - "learning_rate": 1.57770254094237e-06, - "loss": 0.9816, - "step": 7727 - }, - { - "epoch": 0.5807906207725838, - "grad_norm": 1.7009651417600145, - "learning_rate": 1.577226687639146e-06, - "loss": 0.885, - "step": 7728 - }, - { - "epoch": 0.5808657748384187, - "grad_norm": 1.4896217376808987, - "learning_rate": 1.5767508593868588e-06, - "loss": 0.9996, - "step": 7729 - }, - { - "epoch": 0.5809409289042538, - "grad_norm": 2.1836972561838874, - "learning_rate": 1.5762750562137056e-06, - "loss": 1.0484, - "step": 7730 - }, - { - "epoch": 0.5810160829700887, - "grad_norm": 1.87702051015957, - "learning_rate": 1.5757992781478762e-06, - "loss": 0.9578, - "step": 7731 - }, - { - "epoch": 0.5810912370359237, - "grad_norm": 1.6934553248975026, - "learning_rate": 1.575323525217565e-06, - "loss": 0.9033, - "step": 7732 - }, - { - "epoch": 0.5811663911017586, - "grad_norm": 1.365654832876373, - "learning_rate": 1.5748477974509606e-06, - "loss": 0.9995, - "step": 7733 - }, - { - "epoch": 0.5812415451675935, - "grad_norm": 1.9891959074460397, - "learning_rate": 1.5743720948762527e-06, - "loss": 0.9648, - "step": 7734 - }, - { - "epoch": 0.5813166992334285, - "grad_norm": 1.5967482245898108, - "learning_rate": 1.573896417521628e-06, - "loss": 1.0417, - "step": 7735 - }, - { - "epoch": 0.5813918532992635, - "grad_norm": 1.803037210902461, - "learning_rate": 1.5734207654152718e-06, - "loss": 0.9603, - "step": 7736 - }, - { - "epoch": 0.5814670073650985, - "grad_norm": 4.451641491516825, - "learning_rate": 1.572945138585369e-06, - "loss": 1.0146, - "step": 7737 - }, - { - "epoch": 0.5815421614309334, - "grad_norm": 1.5881752050061304, - "learning_rate": 1.572469537060102e-06, - "loss": 0.9838, - "step": 7738 - }, - { - "epoch": 0.5816173154967684, - "grad_norm": 0.7610057526043931, - "learning_rate": 1.5719939608676523e-06, - "loss": 0.8015, - "step": 7739 - }, - { - "epoch": 0.5816924695626033, - "grad_norm": 1.4512620695073888, - "learning_rate": 1.5715184100361992e-06, - "loss": 0.9684, - "step": 7740 - }, - { - "epoch": 0.5817676236284383, - "grad_norm": 1.7110105176699377, - "learning_rate": 1.5710428845939207e-06, - "loss": 0.9726, - "step": 7741 - }, - { - "epoch": 0.5818427776942733, - "grad_norm": 10.031111196347895, - "learning_rate": 1.5705673845689945e-06, - "loss": 1.0633, - "step": 7742 - }, - { - "epoch": 0.5819179317601082, - "grad_norm": 2.063269847186933, - "learning_rate": 1.5700919099895943e-06, - "loss": 0.9677, - "step": 7743 - }, - { - "epoch": 0.5819930858259432, - "grad_norm": 1.4433015597563177, - "learning_rate": 1.5696164608838956e-06, - "loss": 1.0702, - "step": 7744 - }, - { - "epoch": 0.5820682398917781, - "grad_norm": 3.8626877624897773, - "learning_rate": 1.5691410372800696e-06, - "loss": 0.9777, - "step": 7745 - }, - { - "epoch": 0.582143393957613, - "grad_norm": 2.070699288566704, - "learning_rate": 1.5686656392062863e-06, - "loss": 1.0417, - "step": 7746 - }, - { - "epoch": 0.5822185480234481, - "grad_norm": 3.367242303743035, - "learning_rate": 1.5681902666907161e-06, - "loss": 0.87, - "step": 7747 - }, - { - "epoch": 0.582293702089283, - "grad_norm": 8.838751900367495, - "learning_rate": 1.5677149197615257e-06, - "loss": 1.0104, - "step": 7748 - }, - { - "epoch": 0.582368856155118, - "grad_norm": 2.2347237908285593, - "learning_rate": 1.567239598446882e-06, - "loss": 0.9029, - "step": 7749 - }, - { - "epoch": 0.5824440102209529, - "grad_norm": 2.225567297461981, - "learning_rate": 1.5667643027749492e-06, - "loss": 0.8439, - "step": 7750 - }, - { - "epoch": 0.582519164286788, - "grad_norm": 1.5025472470438321, - "learning_rate": 1.5662890327738897e-06, - "loss": 0.8946, - "step": 7751 - }, - { - "epoch": 0.5825943183526229, - "grad_norm": 1.8631289757779252, - "learning_rate": 1.5658137884718672e-06, - "loss": 0.9681, - "step": 7752 - }, - { - "epoch": 0.5826694724184578, - "grad_norm": 2.188337329280756, - "learning_rate": 1.565338569897039e-06, - "loss": 0.8614, - "step": 7753 - }, - { - "epoch": 0.5827446264842928, - "grad_norm": 2.444671546857651, - "learning_rate": 1.5648633770775656e-06, - "loss": 0.9627, - "step": 7754 - }, - { - "epoch": 0.5828197805501277, - "grad_norm": 1.9695182391307775, - "learning_rate": 1.564388210041603e-06, - "loss": 1.0227, - "step": 7755 - }, - { - "epoch": 0.5828949346159628, - "grad_norm": 1.9051200144051579, - "learning_rate": 1.5639130688173077e-06, - "loss": 1.0988, - "step": 7756 - }, - { - "epoch": 0.5829700886817977, - "grad_norm": 1.6721764408630573, - "learning_rate": 1.5634379534328326e-06, - "loss": 1.0011, - "step": 7757 - }, - { - "epoch": 0.5830452427476327, - "grad_norm": 1.8018783570363202, - "learning_rate": 1.5629628639163304e-06, - "loss": 0.9465, - "step": 7758 - }, - { - "epoch": 0.5831203968134676, - "grad_norm": 1.9344692521559037, - "learning_rate": 1.5624878002959521e-06, - "loss": 0.8736, - "step": 7759 - }, - { - "epoch": 0.5831955508793025, - "grad_norm": 2.1131528579152827, - "learning_rate": 1.5620127625998469e-06, - "loss": 0.9457, - "step": 7760 - }, - { - "epoch": 0.5832707049451376, - "grad_norm": 2.2427314337783777, - "learning_rate": 1.5615377508561628e-06, - "loss": 0.966, - "step": 7761 - }, - { - "epoch": 0.5833458590109725, - "grad_norm": 0.8524693201646919, - "learning_rate": 1.561062765093046e-06, - "loss": 0.8729, - "step": 7762 - }, - { - "epoch": 0.5834210130768075, - "grad_norm": 4.131027903624877, - "learning_rate": 1.560587805338641e-06, - "loss": 0.9907, - "step": 7763 - }, - { - "epoch": 0.5834961671426424, - "grad_norm": 1.8444763304674812, - "learning_rate": 1.5601128716210915e-06, - "loss": 0.9918, - "step": 7764 - }, - { - "epoch": 0.5835713212084774, - "grad_norm": 2.441175770370848, - "learning_rate": 1.5596379639685382e-06, - "loss": 1.0013, - "step": 7765 - }, - { - "epoch": 0.5836464752743123, - "grad_norm": 1.3995879571688197, - "learning_rate": 1.5591630824091224e-06, - "loss": 0.9531, - "step": 7766 - }, - { - "epoch": 0.5837216293401473, - "grad_norm": 1.4621489698596533, - "learning_rate": 1.5586882269709819e-06, - "loss": 0.9609, - "step": 7767 - }, - { - "epoch": 0.5837967834059823, - "grad_norm": 1.8992201836100808, - "learning_rate": 1.5582133976822534e-06, - "loss": 1.0097, - "step": 7768 - }, - { - "epoch": 0.5838719374718172, - "grad_norm": 3.305093366080344, - "learning_rate": 1.5577385945710732e-06, - "loss": 0.9912, - "step": 7769 - }, - { - "epoch": 0.5839470915376522, - "grad_norm": 0.845471074995215, - "learning_rate": 1.5572638176655742e-06, - "loss": 0.9325, - "step": 7770 - }, - { - "epoch": 0.5840222456034871, - "grad_norm": 1.7402347860111047, - "learning_rate": 1.5567890669938905e-06, - "loss": 1.0041, - "step": 7771 - }, - { - "epoch": 0.5840973996693221, - "grad_norm": 1.7491605652728022, - "learning_rate": 1.5563143425841512e-06, - "loss": 0.8864, - "step": 7772 - }, - { - "epoch": 0.5841725537351571, - "grad_norm": 3.37443919290868, - "learning_rate": 1.5558396444644854e-06, - "loss": 1.0169, - "step": 7773 - }, - { - "epoch": 0.584247707800992, - "grad_norm": 1.6227639292517462, - "learning_rate": 1.5553649726630222e-06, - "loss": 0.9713, - "step": 7774 - }, - { - "epoch": 0.584322861866827, - "grad_norm": 1.5052291006307785, - "learning_rate": 1.5548903272078865e-06, - "loss": 0.9405, - "step": 7775 - }, - { - "epoch": 0.5843980159326619, - "grad_norm": 2.1714026946124965, - "learning_rate": 1.554415708127204e-06, - "loss": 0.8768, - "step": 7776 - }, - { - "epoch": 0.584473169998497, - "grad_norm": 1.5476560836245021, - "learning_rate": 1.5539411154490967e-06, - "loss": 0.9889, - "step": 7777 - }, - { - "epoch": 0.5845483240643319, - "grad_norm": 1.738489442851959, - "learning_rate": 1.5534665492016865e-06, - "loss": 0.9416, - "step": 7778 - }, - { - "epoch": 0.5846234781301668, - "grad_norm": 1.7819118943102863, - "learning_rate": 1.5529920094130932e-06, - "loss": 1.0516, - "step": 7779 - }, - { - "epoch": 0.5846986321960018, - "grad_norm": 1.6880616560247483, - "learning_rate": 1.552517496111435e-06, - "loss": 0.8257, - "step": 7780 - }, - { - "epoch": 0.5847737862618367, - "grad_norm": 1.828528465677382, - "learning_rate": 1.5520430093248286e-06, - "loss": 0.8998, - "step": 7781 - }, - { - "epoch": 0.5848489403276718, - "grad_norm": 2.4118472308210848, - "learning_rate": 1.5515685490813891e-06, - "loss": 0.9211, - "step": 7782 - }, - { - "epoch": 0.5849240943935067, - "grad_norm": 2.174356045159257, - "learning_rate": 1.5510941154092304e-06, - "loss": 0.9787, - "step": 7783 - }, - { - "epoch": 0.5849992484593417, - "grad_norm": 2.317994163690153, - "learning_rate": 1.5506197083364647e-06, - "loss": 1.0128, - "step": 7784 - }, - { - "epoch": 0.5850744025251766, - "grad_norm": 1.8511653425910803, - "learning_rate": 1.5501453278912013e-06, - "loss": 0.9288, - "step": 7785 - }, - { - "epoch": 0.5851495565910115, - "grad_norm": 1.5711655458561176, - "learning_rate": 1.5496709741015505e-06, - "loss": 0.954, - "step": 7786 - }, - { - "epoch": 0.5852247106568466, - "grad_norm": 1.719245769784529, - "learning_rate": 1.5491966469956187e-06, - "loss": 0.9467, - "step": 7787 - }, - { - "epoch": 0.5852998647226815, - "grad_norm": 0.7049503580498998, - "learning_rate": 1.5487223466015118e-06, - "loss": 0.8677, - "step": 7788 - }, - { - "epoch": 0.5853750187885165, - "grad_norm": 2.213861479749755, - "learning_rate": 1.5482480729473339e-06, - "loss": 1.0952, - "step": 7789 - }, - { - "epoch": 0.5854501728543514, - "grad_norm": 1.6818171994396218, - "learning_rate": 1.5477738260611875e-06, - "loss": 0.9824, - "step": 7790 - }, - { - "epoch": 0.5855253269201863, - "grad_norm": 1.8173370929524264, - "learning_rate": 1.5472996059711738e-06, - "loss": 0.9751, - "step": 7791 - }, - { - "epoch": 0.5856004809860214, - "grad_norm": 1.6406087965392178, - "learning_rate": 1.546825412705391e-06, - "loss": 0.9966, - "step": 7792 - }, - { - "epoch": 0.5856756350518563, - "grad_norm": 1.6975199308053992, - "learning_rate": 1.5463512462919393e-06, - "loss": 0.9628, - "step": 7793 - }, - { - "epoch": 0.5857507891176913, - "grad_norm": 1.4973178566794942, - "learning_rate": 1.5458771067589128e-06, - "loss": 0.9416, - "step": 7794 - }, - { - "epoch": 0.5858259431835262, - "grad_norm": 2.8211728630287083, - "learning_rate": 1.545402994134406e-06, - "loss": 1.0385, - "step": 7795 - }, - { - "epoch": 0.5859010972493612, - "grad_norm": 2.9166979288514785, - "learning_rate": 1.544928908446513e-06, - "loss": 0.9917, - "step": 7796 - }, - { - "epoch": 0.5859762513151962, - "grad_norm": 1.6565725624226466, - "learning_rate": 1.544454849723325e-06, - "loss": 0.8666, - "step": 7797 - }, - { - "epoch": 0.5860514053810311, - "grad_norm": 1.6694410914709885, - "learning_rate": 1.5439808179929316e-06, - "loss": 0.9121, - "step": 7798 - }, - { - "epoch": 0.5861265594468661, - "grad_norm": 1.786187473000929, - "learning_rate": 1.5435068132834204e-06, - "loss": 0.9363, - "step": 7799 - }, - { - "epoch": 0.586201713512701, - "grad_norm": 1.6152083222987732, - "learning_rate": 1.543032835622879e-06, - "loss": 0.9077, - "step": 7800 - }, - { - "epoch": 0.586276867578536, - "grad_norm": 1.5945849755827528, - "learning_rate": 1.5425588850393922e-06, - "loss": 0.9821, - "step": 7801 - }, - { - "epoch": 0.586352021644371, - "grad_norm": 1.6080600588718206, - "learning_rate": 1.5420849615610424e-06, - "loss": 0.9756, - "step": 7802 - }, - { - "epoch": 0.586427175710206, - "grad_norm": 1.6117110843448414, - "learning_rate": 1.541611065215913e-06, - "loss": 0.9064, - "step": 7803 - }, - { - "epoch": 0.5865023297760409, - "grad_norm": 1.5905168687680793, - "learning_rate": 1.5411371960320822e-06, - "loss": 1.055, - "step": 7804 - }, - { - "epoch": 0.5865774838418758, - "grad_norm": 1.5587678204996078, - "learning_rate": 1.5406633540376307e-06, - "loss": 0.9371, - "step": 7805 - }, - { - "epoch": 0.5866526379077108, - "grad_norm": 1.460568386510188, - "learning_rate": 1.5401895392606339e-06, - "loss": 0.9906, - "step": 7806 - }, - { - "epoch": 0.5867277919735457, - "grad_norm": 3.3176748772116516, - "learning_rate": 1.5397157517291674e-06, - "loss": 1.0139, - "step": 7807 - }, - { - "epoch": 0.5868029460393808, - "grad_norm": 2.5472393126372572, - "learning_rate": 1.5392419914713054e-06, - "loss": 1.0688, - "step": 7808 - }, - { - "epoch": 0.5868781001052157, - "grad_norm": 2.483292445757202, - "learning_rate": 1.5387682585151195e-06, - "loss": 0.9714, - "step": 7809 - }, - { - "epoch": 0.5869532541710507, - "grad_norm": 1.5986703950988543, - "learning_rate": 1.538294552888681e-06, - "loss": 1.0128, - "step": 7810 - }, - { - "epoch": 0.5870284082368856, - "grad_norm": 2.044239239944629, - "learning_rate": 1.537820874620058e-06, - "loss": 0.8584, - "step": 7811 - }, - { - "epoch": 0.5871035623027205, - "grad_norm": 1.7191608978164832, - "learning_rate": 1.537347223737318e-06, - "loss": 1.0215, - "step": 7812 - }, - { - "epoch": 0.5871787163685556, - "grad_norm": 0.803783835237284, - "learning_rate": 1.5368736002685266e-06, - "loss": 0.8287, - "step": 7813 - }, - { - "epoch": 0.5872538704343905, - "grad_norm": 1.5950199378271233, - "learning_rate": 1.5364000042417468e-06, - "loss": 0.8765, - "step": 7814 - }, - { - "epoch": 0.5873290245002255, - "grad_norm": 1.7611901278908533, - "learning_rate": 1.5359264356850435e-06, - "loss": 0.9787, - "step": 7815 - }, - { - "epoch": 0.5874041785660604, - "grad_norm": 1.814071368738751, - "learning_rate": 1.5354528946264757e-06, - "loss": 0.8907, - "step": 7816 - }, - { - "epoch": 0.5874793326318953, - "grad_norm": 1.3851647064243462, - "learning_rate": 1.534979381094102e-06, - "loss": 0.8993, - "step": 7817 - }, - { - "epoch": 0.5875544866977304, - "grad_norm": 2.88220655074116, - "learning_rate": 1.534505895115981e-06, - "loss": 0.9758, - "step": 7818 - }, - { - "epoch": 0.5876296407635653, - "grad_norm": 1.877923847413566, - "learning_rate": 1.5340324367201681e-06, - "loss": 0.9403, - "step": 7819 - }, - { - "epoch": 0.5877047948294003, - "grad_norm": 2.330696580257682, - "learning_rate": 1.533559005934718e-06, - "loss": 0.9448, - "step": 7820 - }, - { - "epoch": 0.5877799488952352, - "grad_norm": 1.6101884730417215, - "learning_rate": 1.5330856027876827e-06, - "loss": 1.0467, - "step": 7821 - }, - { - "epoch": 0.5878551029610702, - "grad_norm": 2.178311149272117, - "learning_rate": 1.5326122273071133e-06, - "loss": 1.0247, - "step": 7822 - }, - { - "epoch": 0.5879302570269052, - "grad_norm": 2.4618491769885615, - "learning_rate": 1.5321388795210597e-06, - "loss": 1.0542, - "step": 7823 - }, - { - "epoch": 0.5880054110927401, - "grad_norm": 1.9195079337828287, - "learning_rate": 1.5316655594575685e-06, - "loss": 0.9626, - "step": 7824 - }, - { - "epoch": 0.5880805651585751, - "grad_norm": 1.6352230694455494, - "learning_rate": 1.5311922671446864e-06, - "loss": 0.8712, - "step": 7825 - }, - { - "epoch": 0.58815571922441, - "grad_norm": 1.8548321404815593, - "learning_rate": 1.5307190026104574e-06, - "loss": 1.0199, - "step": 7826 - }, - { - "epoch": 0.588230873290245, - "grad_norm": 1.7167715997321753, - "learning_rate": 1.530245765882925e-06, - "loss": 1.0121, - "step": 7827 - }, - { - "epoch": 0.58830602735608, - "grad_norm": 1.6458940454544164, - "learning_rate": 1.5297725569901298e-06, - "loss": 1.0129, - "step": 7828 - }, - { - "epoch": 0.588381181421915, - "grad_norm": 1.6767241130768327, - "learning_rate": 1.5292993759601107e-06, - "loss": 0.9783, - "step": 7829 - }, - { - "epoch": 0.5884563354877499, - "grad_norm": 1.6030892886818788, - "learning_rate": 1.5288262228209066e-06, - "loss": 0.9244, - "step": 7830 - }, - { - "epoch": 0.5885314895535848, - "grad_norm": 1.3723480052202306, - "learning_rate": 1.5283530976005524e-06, - "loss": 0.9709, - "step": 7831 - }, - { - "epoch": 0.5886066436194198, - "grad_norm": 1.6070221673971201, - "learning_rate": 1.5278800003270838e-06, - "loss": 0.9944, - "step": 7832 - }, - { - "epoch": 0.5886817976852547, - "grad_norm": 1.9637680597767873, - "learning_rate": 1.527406931028533e-06, - "loss": 0.9348, - "step": 7833 - }, - { - "epoch": 0.5887569517510898, - "grad_norm": 1.8357804900864325, - "learning_rate": 1.5269338897329308e-06, - "loss": 0.9514, - "step": 7834 - }, - { - "epoch": 0.5888321058169247, - "grad_norm": 1.3991376093985786, - "learning_rate": 1.5264608764683074e-06, - "loss": 1.0177, - "step": 7835 - }, - { - "epoch": 0.5889072598827596, - "grad_norm": 1.8850502234125217, - "learning_rate": 1.5259878912626896e-06, - "loss": 1.0701, - "step": 7836 - }, - { - "epoch": 0.5889824139485946, - "grad_norm": 1.4937487518433021, - "learning_rate": 1.5255149341441053e-06, - "loss": 0.9353, - "step": 7837 - }, - { - "epoch": 0.5890575680144295, - "grad_norm": 2.2028217512789623, - "learning_rate": 1.5250420051405783e-06, - "loss": 0.9459, - "step": 7838 - }, - { - "epoch": 0.5891327220802646, - "grad_norm": 1.4776626605814205, - "learning_rate": 1.5245691042801302e-06, - "loss": 1.1141, - "step": 7839 - }, - { - "epoch": 0.5892078761460995, - "grad_norm": 1.6509161659781555, - "learning_rate": 1.524096231590784e-06, - "loss": 0.9774, - "step": 7840 - }, - { - "epoch": 0.5892830302119345, - "grad_norm": 1.6049913128056235, - "learning_rate": 1.523623387100558e-06, - "loss": 1.0214, - "step": 7841 - }, - { - "epoch": 0.5893581842777694, - "grad_norm": 5.061429322604409, - "learning_rate": 1.5231505708374707e-06, - "loss": 1.0421, - "step": 7842 - }, - { - "epoch": 0.5894333383436043, - "grad_norm": 1.6329301855577787, - "learning_rate": 1.5226777828295378e-06, - "loss": 1.067, - "step": 7843 - }, - { - "epoch": 0.5895084924094394, - "grad_norm": 1.742055092471443, - "learning_rate": 1.5222050231047747e-06, - "loss": 0.9948, - "step": 7844 - }, - { - "epoch": 0.5895836464752743, - "grad_norm": 2.2929507617147373, - "learning_rate": 1.5217322916911934e-06, - "loss": 0.9712, - "step": 7845 - }, - { - "epoch": 0.5896588005411093, - "grad_norm": 1.928943691454218, - "learning_rate": 1.5212595886168046e-06, - "loss": 1.0259, - "step": 7846 - }, - { - "epoch": 0.5897339546069442, - "grad_norm": 2.3021047829941983, - "learning_rate": 1.5207869139096191e-06, - "loss": 1.0547, - "step": 7847 - }, - { - "epoch": 0.5898091086727792, - "grad_norm": 1.4499613460781806, - "learning_rate": 1.5203142675976434e-06, - "loss": 0.9324, - "step": 7848 - }, - { - "epoch": 0.5898842627386142, - "grad_norm": 19.281725613309437, - "learning_rate": 1.5198416497088849e-06, - "loss": 0.9806, - "step": 7849 - }, - { - "epoch": 0.5899594168044491, - "grad_norm": 2.266042736610028, - "learning_rate": 1.519369060271347e-06, - "loss": 1.0675, - "step": 7850 - }, - { - "epoch": 0.5900345708702841, - "grad_norm": 2.2023433083124115, - "learning_rate": 1.5188964993130321e-06, - "loss": 0.8718, - "step": 7851 - }, - { - "epoch": 0.590109724936119, - "grad_norm": 1.5729983310678484, - "learning_rate": 1.5184239668619427e-06, - "loss": 1.0209, - "step": 7852 - }, - { - "epoch": 0.590184879001954, - "grad_norm": 5.070937295985275, - "learning_rate": 1.517951462946077e-06, - "loss": 0.8775, - "step": 7853 - }, - { - "epoch": 0.590260033067789, - "grad_norm": 1.5604139517271143, - "learning_rate": 1.5174789875934332e-06, - "loss": 1.0638, - "step": 7854 - }, - { - "epoch": 0.590335187133624, - "grad_norm": 2.0419686022818957, - "learning_rate": 1.517006540832007e-06, - "loss": 0.9249, - "step": 7855 - }, - { - "epoch": 0.5904103411994589, - "grad_norm": 1.8255743507743194, - "learning_rate": 1.5165341226897926e-06, - "loss": 0.9986, - "step": 7856 - }, - { - "epoch": 0.5904854952652938, - "grad_norm": 1.8079421515367196, - "learning_rate": 1.5160617331947828e-06, - "loss": 0.9678, - "step": 7857 - }, - { - "epoch": 0.5905606493311288, - "grad_norm": 1.565591043496182, - "learning_rate": 1.515589372374968e-06, - "loss": 1.097, - "step": 7858 - }, - { - "epoch": 0.5906358033969638, - "grad_norm": 1.4265420634382096, - "learning_rate": 1.5151170402583384e-06, - "loss": 0.9303, - "step": 7859 - }, - { - "epoch": 0.5907109574627988, - "grad_norm": 1.80409997401261, - "learning_rate": 1.5146447368728814e-06, - "loss": 1.0492, - "step": 7860 - }, - { - "epoch": 0.5907861115286337, - "grad_norm": 1.6013823617252765, - "learning_rate": 1.514172462246581e-06, - "loss": 1.0289, - "step": 7861 - }, - { - "epoch": 0.5908612655944686, - "grad_norm": 2.1789886557505636, - "learning_rate": 1.5137002164074234e-06, - "loss": 0.9459, - "step": 7862 - }, - { - "epoch": 0.5909364196603036, - "grad_norm": 1.9146547607890267, - "learning_rate": 1.5132279993833898e-06, - "loss": 0.9568, - "step": 7863 - }, - { - "epoch": 0.5910115737261386, - "grad_norm": 1.9988662367201575, - "learning_rate": 1.5127558112024617e-06, - "loss": 1.038, - "step": 7864 - }, - { - "epoch": 0.5910867277919736, - "grad_norm": 2.428655767178465, - "learning_rate": 1.512283651892617e-06, - "loss": 0.9088, - "step": 7865 - }, - { - "epoch": 0.5911618818578085, - "grad_norm": 1.7428543798795504, - "learning_rate": 1.5118115214818339e-06, - "loss": 1.0074, - "step": 7866 - }, - { - "epoch": 0.5912370359236435, - "grad_norm": 1.366052996086336, - "learning_rate": 1.5113394199980877e-06, - "loss": 1.0422, - "step": 7867 - }, - { - "epoch": 0.5913121899894784, - "grad_norm": 1.990347637214344, - "learning_rate": 1.5108673474693516e-06, - "loss": 1.034, - "step": 7868 - }, - { - "epoch": 0.5913873440553133, - "grad_norm": 1.5750210896851131, - "learning_rate": 1.5103953039235986e-06, - "loss": 0.9301, - "step": 7869 - }, - { - "epoch": 0.5914624981211484, - "grad_norm": 1.6678484620935843, - "learning_rate": 1.5099232893887983e-06, - "loss": 0.9854, - "step": 7870 - }, - { - "epoch": 0.5915376521869833, - "grad_norm": 1.7450956696569655, - "learning_rate": 1.5094513038929199e-06, - "loss": 1.073, - "step": 7871 - }, - { - "epoch": 0.5916128062528183, - "grad_norm": 1.6938232349736704, - "learning_rate": 1.5089793474639305e-06, - "loss": 0.899, - "step": 7872 - }, - { - "epoch": 0.5916879603186532, - "grad_norm": 1.786288180093377, - "learning_rate": 1.5085074201297943e-06, - "loss": 1.0657, - "step": 7873 - }, - { - "epoch": 0.5917631143844883, - "grad_norm": 2.6244251893439237, - "learning_rate": 1.5080355219184762e-06, - "loss": 0.967, - "step": 7874 - }, - { - "epoch": 0.5918382684503232, - "grad_norm": 1.7554067467476304, - "learning_rate": 1.5075636528579366e-06, - "loss": 1.0325, - "step": 7875 - }, - { - "epoch": 0.5919134225161581, - "grad_norm": 1.522862174141598, - "learning_rate": 1.507091812976137e-06, - "loss": 1.0495, - "step": 7876 - }, - { - "epoch": 0.5919885765819931, - "grad_norm": 2.009491333626964, - "learning_rate": 1.5066200023010347e-06, - "loss": 0.9204, - "step": 7877 - }, - { - "epoch": 0.592063730647828, - "grad_norm": 2.929018966691539, - "learning_rate": 1.5061482208605856e-06, - "loss": 0.9655, - "step": 7878 - }, - { - "epoch": 0.592138884713663, - "grad_norm": 1.7765736011705162, - "learning_rate": 1.505676468682747e-06, - "loss": 0.9689, - "step": 7879 - }, - { - "epoch": 0.592214038779498, - "grad_norm": 2.754254691082658, - "learning_rate": 1.5052047457954691e-06, - "loss": 1.0261, - "step": 7880 - }, - { - "epoch": 0.5922891928453329, - "grad_norm": 1.809139951944751, - "learning_rate": 1.5047330522267056e-06, - "loss": 0.9868, - "step": 7881 - }, - { - "epoch": 0.5923643469111679, - "grad_norm": 1.6857475081788424, - "learning_rate": 1.5042613880044053e-06, - "loss": 0.9926, - "step": 7882 - }, - { - "epoch": 0.5924395009770028, - "grad_norm": 1.6859102885089314, - "learning_rate": 1.5037897531565155e-06, - "loss": 0.9951, - "step": 7883 - }, - { - "epoch": 0.5925146550428378, - "grad_norm": 1.8351831712692839, - "learning_rate": 1.5033181477109835e-06, - "loss": 0.9942, - "step": 7884 - }, - { - "epoch": 0.5925898091086728, - "grad_norm": 1.7523829040370857, - "learning_rate": 1.5028465716957527e-06, - "loss": 0.9396, - "step": 7885 - }, - { - "epoch": 0.5926649631745078, - "grad_norm": 1.3986367877487669, - "learning_rate": 1.5023750251387668e-06, - "loss": 0.9556, - "step": 7886 - }, - { - "epoch": 0.5927401172403427, - "grad_norm": 1.772775198659864, - "learning_rate": 1.501903508067966e-06, - "loss": 0.9323, - "step": 7887 - }, - { - "epoch": 0.5928152713061776, - "grad_norm": 1.6426381430167243, - "learning_rate": 1.5014320205112897e-06, - "loss": 0.813, - "step": 7888 - }, - { - "epoch": 0.5928904253720126, - "grad_norm": 2.078537634704519, - "learning_rate": 1.5009605624966753e-06, - "loss": 0.9282, - "step": 7889 - }, - { - "epoch": 0.5929655794378476, - "grad_norm": 1.7834909630865465, - "learning_rate": 1.5004891340520583e-06, - "loss": 0.9777, - "step": 7890 - }, - { - "epoch": 0.5930407335036826, - "grad_norm": 2.442325510529574, - "learning_rate": 1.5000177352053732e-06, - "loss": 0.9425, - "step": 7891 - }, - { - "epoch": 0.5931158875695175, - "grad_norm": 1.5152312878358196, - "learning_rate": 1.4995463659845512e-06, - "loss": 0.9915, - "step": 7892 - }, - { - "epoch": 0.5931910416353525, - "grad_norm": 1.2770311632229854, - "learning_rate": 1.499075026417524e-06, - "loss": 0.9616, - "step": 7893 - }, - { - "epoch": 0.5932661957011874, - "grad_norm": 1.7020758100675666, - "learning_rate": 1.4986037165322199e-06, - "loss": 1.0315, - "step": 7894 - }, - { - "epoch": 0.5933413497670224, - "grad_norm": 0.7578181237080919, - "learning_rate": 1.498132436356565e-06, - "loss": 0.8854, - "step": 7895 - }, - { - "epoch": 0.5934165038328574, - "grad_norm": 1.377757601735521, - "learning_rate": 1.4976611859184852e-06, - "loss": 1.0122, - "step": 7896 - }, - { - "epoch": 0.5934916578986923, - "grad_norm": 2.4665118345003383, - "learning_rate": 1.4971899652459034e-06, - "loss": 0.8854, - "step": 7897 - }, - { - "epoch": 0.5935668119645273, - "grad_norm": 1.5315693113638293, - "learning_rate": 1.4967187743667423e-06, - "loss": 0.9667, - "step": 7898 - }, - { - "epoch": 0.5936419660303622, - "grad_norm": 2.9067477548586798, - "learning_rate": 1.4962476133089207e-06, - "loss": 1.0121, - "step": 7899 - }, - { - "epoch": 0.5937171200961973, - "grad_norm": 11.590662624846253, - "learning_rate": 1.4957764821003562e-06, - "loss": 0.9336, - "step": 7900 - }, - { - "epoch": 0.5937922741620322, - "grad_norm": 4.502869221122703, - "learning_rate": 1.4953053807689671e-06, - "loss": 0.9062, - "step": 7901 - }, - { - "epoch": 0.5938674282278671, - "grad_norm": 4.514538274498199, - "learning_rate": 1.4948343093426656e-06, - "loss": 0.9281, - "step": 7902 - }, - { - "epoch": 0.5939425822937021, - "grad_norm": 1.9904005226511745, - "learning_rate": 1.4943632678493668e-06, - "loss": 0.9591, - "step": 7903 - }, - { - "epoch": 0.594017736359537, - "grad_norm": 1.9305376041349218, - "learning_rate": 1.4938922563169801e-06, - "loss": 0.9083, - "step": 7904 - }, - { - "epoch": 0.594092890425372, - "grad_norm": 1.7089430798582241, - "learning_rate": 1.4934212747734153e-06, - "loss": 0.9189, - "step": 7905 - }, - { - "epoch": 0.594168044491207, - "grad_norm": 1.8779609824363617, - "learning_rate": 1.49295032324658e-06, - "loss": 0.98, - "step": 7906 - }, - { - "epoch": 0.5942431985570419, - "grad_norm": 1.4354088541749346, - "learning_rate": 1.492479401764379e-06, - "loss": 0.9137, - "step": 7907 - }, - { - "epoch": 0.5943183526228769, - "grad_norm": 1.4329096437758915, - "learning_rate": 1.4920085103547177e-06, - "loss": 0.962, - "step": 7908 - }, - { - "epoch": 0.5943935066887118, - "grad_norm": 1.9013232993618652, - "learning_rate": 1.491537649045497e-06, - "loss": 1.0581, - "step": 7909 - }, - { - "epoch": 0.5944686607545469, - "grad_norm": 1.3906564173203388, - "learning_rate": 1.4910668178646178e-06, - "loss": 0.8988, - "step": 7910 - }, - { - "epoch": 0.5945438148203818, - "grad_norm": 1.9058710565852097, - "learning_rate": 1.4905960168399783e-06, - "loss": 1.0424, - "step": 7911 - }, - { - "epoch": 0.5946189688862168, - "grad_norm": 1.7209221725377022, - "learning_rate": 1.4901252459994753e-06, - "loss": 0.9334, - "step": 7912 - }, - { - "epoch": 0.5946941229520517, - "grad_norm": 1.3417341947838288, - "learning_rate": 1.4896545053710044e-06, - "loss": 0.8694, - "step": 7913 - }, - { - "epoch": 0.5947692770178866, - "grad_norm": 1.7052002608524333, - "learning_rate": 1.4891837949824578e-06, - "loss": 1.034, - "step": 7914 - }, - { - "epoch": 0.5948444310837216, - "grad_norm": 1.562214024122664, - "learning_rate": 1.4887131148617279e-06, - "loss": 1.045, - "step": 7915 - }, - { - "epoch": 0.5949195851495566, - "grad_norm": 1.3850964580094838, - "learning_rate": 1.4882424650367034e-06, - "loss": 0.9606, - "step": 7916 - }, - { - "epoch": 0.5949947392153916, - "grad_norm": 1.9139654018141057, - "learning_rate": 1.4877718455352723e-06, - "loss": 1.0378, - "step": 7917 - }, - { - "epoch": 0.5950698932812265, - "grad_norm": 1.9103326696513314, - "learning_rate": 1.4873012563853213e-06, - "loss": 1.0347, - "step": 7918 - }, - { - "epoch": 0.5951450473470615, - "grad_norm": 1.5985178967316145, - "learning_rate": 1.4868306976147337e-06, - "loss": 0.9502, - "step": 7919 - }, - { - "epoch": 0.5952202014128964, - "grad_norm": 2.0066055648262684, - "learning_rate": 1.4863601692513927e-06, - "loss": 0.9846, - "step": 7920 - }, - { - "epoch": 0.5952953554787314, - "grad_norm": 1.6572731744328768, - "learning_rate": 1.4858896713231786e-06, - "loss": 1.0389, - "step": 7921 - }, - { - "epoch": 0.5953705095445664, - "grad_norm": 1.6655047260539175, - "learning_rate": 1.485419203857969e-06, - "loss": 1.0064, - "step": 7922 - }, - { - "epoch": 0.5954456636104013, - "grad_norm": 1.532841147681014, - "learning_rate": 1.4849487668836439e-06, - "loss": 1.0321, - "step": 7923 - }, - { - "epoch": 0.5955208176762363, - "grad_norm": 1.9008839232611034, - "learning_rate": 1.4844783604280746e-06, - "loss": 0.9132, - "step": 7924 - }, - { - "epoch": 0.5955959717420712, - "grad_norm": 1.2995309863973747, - "learning_rate": 1.4840079845191379e-06, - "loss": 0.9089, - "step": 7925 - }, - { - "epoch": 0.5956711258079062, - "grad_norm": 3.8542906595166264, - "learning_rate": 1.483537639184704e-06, - "loss": 1.0253, - "step": 7926 - }, - { - "epoch": 0.5957462798737412, - "grad_norm": 1.7154952703737076, - "learning_rate": 1.4830673244526418e-06, - "loss": 1.0583, - "step": 7927 - }, - { - "epoch": 0.5958214339395761, - "grad_norm": 1.770594532142641, - "learning_rate": 1.4825970403508208e-06, - "loss": 0.9706, - "step": 7928 - }, - { - "epoch": 0.5958965880054111, - "grad_norm": 1.7079685178307593, - "learning_rate": 1.482126786907106e-06, - "loss": 0.9673, - "step": 7929 - }, - { - "epoch": 0.595971742071246, - "grad_norm": 1.6757973979180831, - "learning_rate": 1.4816565641493623e-06, - "loss": 0.9178, - "step": 7930 - }, - { - "epoch": 0.5960468961370811, - "grad_norm": 1.757930293904781, - "learning_rate": 1.481186372105452e-06, - "loss": 0.9297, - "step": 7931 - }, - { - "epoch": 0.596122050202916, - "grad_norm": 1.3089051744343478, - "learning_rate": 1.4807162108032363e-06, - "loss": 1.0385, - "step": 7932 - }, - { - "epoch": 0.5961972042687509, - "grad_norm": 2.173905116771676, - "learning_rate": 1.4802460802705731e-06, - "loss": 0.9658, - "step": 7933 - }, - { - "epoch": 0.5962723583345859, - "grad_norm": 0.751454572006329, - "learning_rate": 1.4797759805353199e-06, - "loss": 0.8468, - "step": 7934 - }, - { - "epoch": 0.5963475124004208, - "grad_norm": 2.4606950912976653, - "learning_rate": 1.4793059116253322e-06, - "loss": 1.0321, - "step": 7935 - }, - { - "epoch": 0.5964226664662559, - "grad_norm": 16.835631160206024, - "learning_rate": 1.4788358735684626e-06, - "loss": 0.9794, - "step": 7936 - }, - { - "epoch": 0.5964978205320908, - "grad_norm": 1.7320812970038508, - "learning_rate": 1.4783658663925637e-06, - "loss": 1.0047, - "step": 7937 - }, - { - "epoch": 0.5965729745979258, - "grad_norm": 0.8285225327926352, - "learning_rate": 1.4778958901254847e-06, - "loss": 0.9165, - "step": 7938 - }, - { - "epoch": 0.5966481286637607, - "grad_norm": 1.5826259672582117, - "learning_rate": 1.477425944795073e-06, - "loss": 0.9097, - "step": 7939 - }, - { - "epoch": 0.5967232827295956, - "grad_norm": 1.6002057539251069, - "learning_rate": 1.4769560304291755e-06, - "loss": 0.9288, - "step": 7940 - }, - { - "epoch": 0.5967984367954307, - "grad_norm": 1.305685883413162, - "learning_rate": 1.4764861470556357e-06, - "loss": 0.9651, - "step": 7941 - }, - { - "epoch": 0.5968735908612656, - "grad_norm": 1.5337730174680866, - "learning_rate": 1.4760162947022972e-06, - "loss": 1.0951, - "step": 7942 - }, - { - "epoch": 0.5969487449271006, - "grad_norm": 1.4302550476550584, - "learning_rate": 1.475546473396999e-06, - "loss": 1.0428, - "step": 7943 - }, - { - "epoch": 0.5970238989929355, - "grad_norm": 2.297652028795585, - "learning_rate": 1.47507668316758e-06, - "loss": 0.8927, - "step": 7944 - }, - { - "epoch": 0.5970990530587705, - "grad_norm": 1.9722650423311614, - "learning_rate": 1.4746069240418785e-06, - "loss": 0.9798, - "step": 7945 - }, - { - "epoch": 0.5971742071246054, - "grad_norm": 1.9293897144187797, - "learning_rate": 1.474137196047728e-06, - "loss": 0.9339, - "step": 7946 - }, - { - "epoch": 0.5972493611904404, - "grad_norm": 2.8262746247843125, - "learning_rate": 1.473667499212963e-06, - "loss": 0.885, - "step": 7947 - }, - { - "epoch": 0.5973245152562754, - "grad_norm": 1.6823881218090475, - "learning_rate": 1.4731978335654138e-06, - "loss": 0.9378, - "step": 7948 - }, - { - "epoch": 0.5973996693221103, - "grad_norm": 1.7789962966906132, - "learning_rate": 1.47272819913291e-06, - "loss": 1.0642, - "step": 7949 - }, - { - "epoch": 0.5974748233879453, - "grad_norm": 2.324343586622489, - "learning_rate": 1.4722585959432802e-06, - "loss": 0.9646, - "step": 7950 - }, - { - "epoch": 0.5975499774537802, - "grad_norm": 1.8033193711582347, - "learning_rate": 1.4717890240243484e-06, - "loss": 1.0112, - "step": 7951 - }, - { - "epoch": 0.5976251315196152, - "grad_norm": 1.2963569709540086, - "learning_rate": 1.4713194834039401e-06, - "loss": 1.0149, - "step": 7952 - }, - { - "epoch": 0.5977002855854502, - "grad_norm": 1.7787435196673367, - "learning_rate": 1.470849974109877e-06, - "loss": 0.9787, - "step": 7953 - }, - { - "epoch": 0.5977754396512851, - "grad_norm": 1.4170242400589599, - "learning_rate": 1.470380496169979e-06, - "loss": 0.9331, - "step": 7954 - }, - { - "epoch": 0.5978505937171201, - "grad_norm": 2.0434295543067558, - "learning_rate": 1.4699110496120648e-06, - "loss": 0.9596, - "step": 7955 - }, - { - "epoch": 0.597925747782955, - "grad_norm": 12.684180816795543, - "learning_rate": 1.4694416344639503e-06, - "loss": 0.9149, - "step": 7956 - }, - { - "epoch": 0.5980009018487901, - "grad_norm": 1.7526713146328494, - "learning_rate": 1.4689722507534514e-06, - "loss": 0.9263, - "step": 7957 - }, - { - "epoch": 0.598076055914625, - "grad_norm": 1.618772605237388, - "learning_rate": 1.4685028985083794e-06, - "loss": 1.0324, - "step": 7958 - }, - { - "epoch": 0.5981512099804599, - "grad_norm": 2.031948263020323, - "learning_rate": 1.4680335777565462e-06, - "loss": 1.0248, - "step": 7959 - }, - { - "epoch": 0.5982263640462949, - "grad_norm": 1.3023689821358528, - "learning_rate": 1.467564288525761e-06, - "loss": 1.0574, - "step": 7960 - }, - { - "epoch": 0.5983015181121298, - "grad_norm": 1.2805972413628657, - "learning_rate": 1.4670950308438298e-06, - "loss": 1.0533, - "step": 7961 - }, - { - "epoch": 0.5983766721779649, - "grad_norm": 0.7581355792484411, - "learning_rate": 1.4666258047385588e-06, - "loss": 0.8536, - "step": 7962 - }, - { - "epoch": 0.5984518262437998, - "grad_norm": 1.8036766358786642, - "learning_rate": 1.4661566102377507e-06, - "loss": 0.9503, - "step": 7963 - }, - { - "epoch": 0.5985269803096348, - "grad_norm": 1.5654734738057146, - "learning_rate": 1.465687447369209e-06, - "loss": 1.021, - "step": 7964 - }, - { - "epoch": 0.5986021343754697, - "grad_norm": 1.7890817566214268, - "learning_rate": 1.4652183161607314e-06, - "loss": 0.8485, - "step": 7965 - }, - { - "epoch": 0.5986772884413046, - "grad_norm": 3.5147150515099437, - "learning_rate": 1.4647492166401159e-06, - "loss": 1.0061, - "step": 7966 - }, - { - "epoch": 0.5987524425071397, - "grad_norm": 1.8451669065452165, - "learning_rate": 1.4642801488351598e-06, - "loss": 0.9118, - "step": 7967 - }, - { - "epoch": 0.5988275965729746, - "grad_norm": 2.889766256258126, - "learning_rate": 1.4638111127736555e-06, - "loss": 0.9837, - "step": 7968 - }, - { - "epoch": 0.5989027506388096, - "grad_norm": 2.0326507433631162, - "learning_rate": 1.4633421084833965e-06, - "loss": 1.0579, - "step": 7969 - }, - { - "epoch": 0.5989779047046445, - "grad_norm": 1.8580434824040777, - "learning_rate": 1.4628731359921727e-06, - "loss": 0.9054, - "step": 7970 - }, - { - "epoch": 0.5990530587704794, - "grad_norm": 0.747683244973224, - "learning_rate": 1.462404195327772e-06, - "loss": 0.8675, - "step": 7971 - }, - { - "epoch": 0.5991282128363145, - "grad_norm": 1.599992978557439, - "learning_rate": 1.4619352865179814e-06, - "loss": 0.844, - "step": 7972 - }, - { - "epoch": 0.5992033669021494, - "grad_norm": 1.6126151938285522, - "learning_rate": 1.4614664095905856e-06, - "loss": 0.9765, - "step": 7973 - }, - { - "epoch": 0.5992785209679844, - "grad_norm": 1.4876971362617235, - "learning_rate": 1.460997564573367e-06, - "loss": 1.0094, - "step": 7974 - }, - { - "epoch": 0.5993536750338193, - "grad_norm": 2.0928154752908683, - "learning_rate": 1.4605287514941068e-06, - "loss": 1.0495, - "step": 7975 - }, - { - "epoch": 0.5994288290996543, - "grad_norm": 1.9375682941828922, - "learning_rate": 1.460059970380584e-06, - "loss": 1.0308, - "step": 7976 - }, - { - "epoch": 0.5995039831654893, - "grad_norm": 1.8041877880431199, - "learning_rate": 1.4595912212605755e-06, - "loss": 0.9464, - "step": 7977 - }, - { - "epoch": 0.5995791372313242, - "grad_norm": 1.8474958483245412, - "learning_rate": 1.459122504161856e-06, - "loss": 1.1106, - "step": 7978 - }, - { - "epoch": 0.5996542912971592, - "grad_norm": 4.87628352556827, - "learning_rate": 1.4586538191121999e-06, - "loss": 0.978, - "step": 7979 - }, - { - "epoch": 0.5997294453629941, - "grad_norm": 2.0495144664498604, - "learning_rate": 1.4581851661393776e-06, - "loss": 0.9508, - "step": 7980 - }, - { - "epoch": 0.5998045994288291, - "grad_norm": 1.7238691908117707, - "learning_rate": 1.4577165452711592e-06, - "loss": 1.0214, - "step": 7981 - }, - { - "epoch": 0.599879753494664, - "grad_norm": 1.6000396187168688, - "learning_rate": 1.4572479565353122e-06, - "loss": 0.9099, - "step": 7982 - }, - { - "epoch": 0.5999549075604991, - "grad_norm": 1.6935175521522154, - "learning_rate": 1.4567793999596014e-06, - "loss": 1.0206, - "step": 7983 - }, - { - "epoch": 0.600030061626334, - "grad_norm": 2.1805558201276076, - "learning_rate": 1.456310875571792e-06, - "loss": 0.9764, - "step": 7984 - }, - { - "epoch": 0.6001052156921689, - "grad_norm": 1.5467728089765191, - "learning_rate": 1.4558423833996443e-06, - "loss": 0.8794, - "step": 7985 - }, - { - "epoch": 0.6001803697580039, - "grad_norm": 1.6465642328229682, - "learning_rate": 1.4553739234709199e-06, - "loss": 0.9498, - "step": 7986 - }, - { - "epoch": 0.6002555238238388, - "grad_norm": 2.149475401818137, - "learning_rate": 1.4549054958133765e-06, - "loss": 0.9436, - "step": 7987 - }, - { - "epoch": 0.6003306778896739, - "grad_norm": 1.944564920810552, - "learning_rate": 1.4544371004547685e-06, - "loss": 0.8699, - "step": 7988 - }, - { - "epoch": 0.6004058319555088, - "grad_norm": 1.8337097937421873, - "learning_rate": 1.453968737422852e-06, - "loss": 0.925, - "step": 7989 - }, - { - "epoch": 0.6004809860213438, - "grad_norm": 1.8295636507332427, - "learning_rate": 1.4535004067453785e-06, - "loss": 1.049, - "step": 7990 - }, - { - "epoch": 0.6005561400871787, - "grad_norm": 1.5799259447295204, - "learning_rate": 1.453032108450099e-06, - "loss": 0.9111, - "step": 7991 - }, - { - "epoch": 0.6006312941530136, - "grad_norm": 1.4199130597162313, - "learning_rate": 1.4525638425647615e-06, - "loss": 0.9863, - "step": 7992 - }, - { - "epoch": 0.6007064482188487, - "grad_norm": 1.8123677961870779, - "learning_rate": 1.4520956091171121e-06, - "loss": 1.0391, - "step": 7993 - }, - { - "epoch": 0.6007816022846836, - "grad_norm": 1.5053596904469977, - "learning_rate": 1.4516274081348965e-06, - "loss": 0.9259, - "step": 7994 - }, - { - "epoch": 0.6008567563505186, - "grad_norm": 1.7264445683175422, - "learning_rate": 1.4511592396458565e-06, - "loss": 1.0611, - "step": 7995 - }, - { - "epoch": 0.6009319104163535, - "grad_norm": 1.942092804813703, - "learning_rate": 1.4506911036777335e-06, - "loss": 0.9851, - "step": 7996 - }, - { - "epoch": 0.6010070644821884, - "grad_norm": 1.6838011293075115, - "learning_rate": 1.4502230002582655e-06, - "loss": 1.0781, - "step": 7997 - }, - { - "epoch": 0.6010822185480235, - "grad_norm": 1.6823764303325885, - "learning_rate": 1.4497549294151905e-06, - "loss": 1.0095, - "step": 7998 - }, - { - "epoch": 0.6011573726138584, - "grad_norm": 1.9777751112978292, - "learning_rate": 1.4492868911762428e-06, - "loss": 0.9776, - "step": 7999 - }, - { - "epoch": 0.6012325266796934, - "grad_norm": 1.7545748585309329, - "learning_rate": 1.4488188855691555e-06, - "loss": 0.9408, - "step": 8000 - }, - { - "epoch": 0.6013076807455283, - "grad_norm": 8.92634024507796, - "learning_rate": 1.44835091262166e-06, - "loss": 0.9762, - "step": 8001 - }, - { - "epoch": 0.6013828348113633, - "grad_norm": 2.0582002846449727, - "learning_rate": 1.447882972361485e-06, - "loss": 1.0129, - "step": 8002 - }, - { - "epoch": 0.6014579888771983, - "grad_norm": 1.3898169495098398, - "learning_rate": 1.4474150648163588e-06, - "loss": 0.997, - "step": 8003 - }, - { - "epoch": 0.6015331429430332, - "grad_norm": 2.3178721737904318, - "learning_rate": 1.4469471900140056e-06, - "loss": 0.9743, - "step": 8004 - }, - { - "epoch": 0.6016082970088682, - "grad_norm": 1.5910979251756219, - "learning_rate": 1.4464793479821489e-06, - "loss": 0.9979, - "step": 8005 - }, - { - "epoch": 0.6016834510747031, - "grad_norm": 1.4997838977277436, - "learning_rate": 1.446011538748511e-06, - "loss": 0.9327, - "step": 8006 - }, - { - "epoch": 0.6017586051405381, - "grad_norm": 1.4791231104209177, - "learning_rate": 1.4455437623408097e-06, - "loss": 0.9472, - "step": 8007 - }, - { - "epoch": 0.601833759206373, - "grad_norm": 2.121428011976263, - "learning_rate": 1.4450760187867644e-06, - "loss": 0.9514, - "step": 8008 - }, - { - "epoch": 0.6019089132722081, - "grad_norm": 1.7664960214984144, - "learning_rate": 1.4446083081140904e-06, - "loss": 0.9417, - "step": 8009 - }, - { - "epoch": 0.601984067338043, - "grad_norm": 3.078213453232194, - "learning_rate": 1.4441406303504998e-06, - "loss": 0.9042, - "step": 8010 - }, - { - "epoch": 0.6020592214038779, - "grad_norm": 2.425312777139387, - "learning_rate": 1.4436729855237063e-06, - "loss": 0.8875, - "step": 8011 - }, - { - "epoch": 0.6021343754697129, - "grad_norm": 1.649942587807316, - "learning_rate": 1.443205373661418e-06, - "loss": 0.8725, - "step": 8012 - }, - { - "epoch": 0.6022095295355478, - "grad_norm": 0.8285541142395203, - "learning_rate": 1.442737794791344e-06, - "loss": 0.9253, - "step": 8013 - }, - { - "epoch": 0.6022846836013829, - "grad_norm": 2.0130903290419138, - "learning_rate": 1.4422702489411896e-06, - "loss": 0.894, - "step": 8014 - }, - { - "epoch": 0.6023598376672178, - "grad_norm": 1.4367556925551823, - "learning_rate": 1.441802736138658e-06, - "loss": 0.9523, - "step": 8015 - }, - { - "epoch": 0.6024349917330527, - "grad_norm": 2.324225315116778, - "learning_rate": 1.4413352564114525e-06, - "loss": 0.9126, - "step": 8016 - }, - { - "epoch": 0.6025101457988877, - "grad_norm": 0.7133136487039254, - "learning_rate": 1.4408678097872717e-06, - "loss": 0.8563, - "step": 8017 - }, - { - "epoch": 0.6025852998647226, - "grad_norm": 0.7299367834179615, - "learning_rate": 1.440400396293815e-06, - "loss": 0.8207, - "step": 8018 - }, - { - "epoch": 0.6026604539305577, - "grad_norm": 1.6754823075706224, - "learning_rate": 1.439933015958777e-06, - "loss": 0.9607, - "step": 8019 - }, - { - "epoch": 0.6027356079963926, - "grad_norm": 1.6288654862021306, - "learning_rate": 1.4394656688098526e-06, - "loss": 1.0029, - "step": 8020 - }, - { - "epoch": 0.6028107620622276, - "grad_norm": 1.6312807920626327, - "learning_rate": 1.4389983548747337e-06, - "loss": 1.0721, - "step": 8021 - }, - { - "epoch": 0.6028859161280625, - "grad_norm": 1.635232182036946, - "learning_rate": 1.4385310741811106e-06, - "loss": 0.9221, - "step": 8022 - }, - { - "epoch": 0.6029610701938974, - "grad_norm": 2.207553065905289, - "learning_rate": 1.4380638267566716e-06, - "loss": 0.9714, - "step": 8023 - }, - { - "epoch": 0.6030362242597325, - "grad_norm": 1.7454081646580215, - "learning_rate": 1.4375966126291022e-06, - "loss": 1.0221, - "step": 8024 - }, - { - "epoch": 0.6031113783255674, - "grad_norm": 1.6279087442882738, - "learning_rate": 1.4371294318260874e-06, - "loss": 0.9635, - "step": 8025 - }, - { - "epoch": 0.6031865323914024, - "grad_norm": 2.297502808179997, - "learning_rate": 1.4366622843753092e-06, - "loss": 1.002, - "step": 8026 - }, - { - "epoch": 0.6032616864572373, - "grad_norm": 1.4697287524641076, - "learning_rate": 1.4361951703044475e-06, - "loss": 0.9893, - "step": 8027 - }, - { - "epoch": 0.6033368405230723, - "grad_norm": 2.342360020773695, - "learning_rate": 1.4357280896411813e-06, - "loss": 0.9965, - "step": 8028 - }, - { - "epoch": 0.6034119945889073, - "grad_norm": 3.183064235344704, - "learning_rate": 1.4352610424131854e-06, - "loss": 1.0141, - "step": 8029 - }, - { - "epoch": 0.6034871486547422, - "grad_norm": 2.072226506905761, - "learning_rate": 1.4347940286481364e-06, - "loss": 0.8635, - "step": 8030 - }, - { - "epoch": 0.6035623027205772, - "grad_norm": 1.5723092967537282, - "learning_rate": 1.434327048373706e-06, - "loss": 1.0204, - "step": 8031 - }, - { - "epoch": 0.6036374567864121, - "grad_norm": 1.5685107014430542, - "learning_rate": 1.4338601016175624e-06, - "loss": 0.8828, - "step": 8032 - }, - { - "epoch": 0.6037126108522471, - "grad_norm": 2.0255272873448447, - "learning_rate": 1.4333931884073769e-06, - "loss": 0.9421, - "step": 8033 - }, - { - "epoch": 0.6037877649180821, - "grad_norm": 1.2634114818172641, - "learning_rate": 1.4329263087708144e-06, - "loss": 1.0278, - "step": 8034 - }, - { - "epoch": 0.6038629189839171, - "grad_norm": 1.7675690202847347, - "learning_rate": 1.4324594627355397e-06, - "loss": 0.9249, - "step": 8035 - }, - { - "epoch": 0.603938073049752, - "grad_norm": 1.6300581922850472, - "learning_rate": 1.431992650329215e-06, - "loss": 1.0092, - "step": 8036 - }, - { - "epoch": 0.6040132271155869, - "grad_norm": 2.054189643447644, - "learning_rate": 1.4315258715795007e-06, - "loss": 1.1011, - "step": 8037 - }, - { - "epoch": 0.6040883811814219, - "grad_norm": 1.4319600039176459, - "learning_rate": 1.4310591265140555e-06, - "loss": 0.9841, - "step": 8038 - }, - { - "epoch": 0.6041635352472569, - "grad_norm": 1.4974620329003427, - "learning_rate": 1.4305924151605354e-06, - "loss": 0.941, - "step": 8039 - }, - { - "epoch": 0.6042386893130919, - "grad_norm": 2.412982785634222, - "learning_rate": 1.4301257375465956e-06, - "loss": 1.1101, - "step": 8040 - }, - { - "epoch": 0.6043138433789268, - "grad_norm": 1.795723255734216, - "learning_rate": 1.4296590936998874e-06, - "loss": 0.9326, - "step": 8041 - }, - { - "epoch": 0.6043889974447617, - "grad_norm": 1.7077547892516045, - "learning_rate": 1.4291924836480625e-06, - "loss": 0.9094, - "step": 8042 - }, - { - "epoch": 0.6044641515105967, - "grad_norm": 1.7039790680589169, - "learning_rate": 1.4287259074187685e-06, - "loss": 0.9284, - "step": 8043 - }, - { - "epoch": 0.6045393055764317, - "grad_norm": 1.8658720354521892, - "learning_rate": 1.428259365039652e-06, - "loss": 0.9629, - "step": 8044 - }, - { - "epoch": 0.6046144596422667, - "grad_norm": 3.2055909131893556, - "learning_rate": 1.4277928565383577e-06, - "loss": 0.8721, - "step": 8045 - }, - { - "epoch": 0.6046896137081016, - "grad_norm": 1.4390316142890043, - "learning_rate": 1.4273263819425272e-06, - "loss": 1.0087, - "step": 8046 - }, - { - "epoch": 0.6047647677739366, - "grad_norm": 1.6043284280970014, - "learning_rate": 1.426859941279802e-06, - "loss": 0.9237, - "step": 8047 - }, - { - "epoch": 0.6048399218397715, - "grad_norm": 1.6804274237821522, - "learning_rate": 1.42639353457782e-06, - "loss": 0.9187, - "step": 8048 - }, - { - "epoch": 0.6049150759056064, - "grad_norm": 1.8436574104466172, - "learning_rate": 1.4259271618642166e-06, - "loss": 0.9894, - "step": 8049 - }, - { - "epoch": 0.6049902299714415, - "grad_norm": 1.341806470012091, - "learning_rate": 1.4254608231666286e-06, - "loss": 0.9311, - "step": 8050 - }, - { - "epoch": 0.6050653840372764, - "grad_norm": 1.3156598972213736, - "learning_rate": 1.4249945185126855e-06, - "loss": 0.9744, - "step": 8051 - }, - { - "epoch": 0.6051405381031114, - "grad_norm": 2.0155605335424136, - "learning_rate": 1.4245282479300199e-06, - "loss": 0.9535, - "step": 8052 - }, - { - "epoch": 0.6052156921689463, - "grad_norm": 1.3537262371510208, - "learning_rate": 1.424062011446259e-06, - "loss": 1.024, - "step": 8053 - }, - { - "epoch": 0.6052908462347814, - "grad_norm": 3.7738306065859115, - "learning_rate": 1.4235958090890293e-06, - "loss": 0.8931, - "step": 8054 - }, - { - "epoch": 0.6053660003006163, - "grad_norm": 2.0295109273386562, - "learning_rate": 1.4231296408859553e-06, - "loss": 1.0694, - "step": 8055 - }, - { - "epoch": 0.6054411543664512, - "grad_norm": 1.5856223709391388, - "learning_rate": 1.4226635068646586e-06, - "loss": 0.9912, - "step": 8056 - }, - { - "epoch": 0.6055163084322862, - "grad_norm": 1.9113846088158168, - "learning_rate": 1.4221974070527606e-06, - "loss": 0.9167, - "step": 8057 - }, - { - "epoch": 0.6055914624981211, - "grad_norm": 1.4766627686379494, - "learning_rate": 1.4217313414778786e-06, - "loss": 0.9488, - "step": 8058 - }, - { - "epoch": 0.6056666165639562, - "grad_norm": 1.623313101590851, - "learning_rate": 1.4212653101676285e-06, - "loss": 0.9227, - "step": 8059 - }, - { - "epoch": 0.6057417706297911, - "grad_norm": 2.500854822880227, - "learning_rate": 1.4207993131496254e-06, - "loss": 0.9248, - "step": 8060 - }, - { - "epoch": 0.605816924695626, - "grad_norm": 1.4363234307464254, - "learning_rate": 1.4203333504514805e-06, - "loss": 0.952, - "step": 8061 - }, - { - "epoch": 0.605892078761461, - "grad_norm": 1.358982204820392, - "learning_rate": 1.4198674221008045e-06, - "loss": 0.9485, - "step": 8062 - }, - { - "epoch": 0.6059672328272959, - "grad_norm": 1.4007549583405854, - "learning_rate": 1.419401528125205e-06, - "loss": 0.9039, - "step": 8063 - }, - { - "epoch": 0.606042386893131, - "grad_norm": 1.5120520409150688, - "learning_rate": 1.4189356685522884e-06, - "loss": 1.0702, - "step": 8064 - }, - { - "epoch": 0.6061175409589659, - "grad_norm": 1.3765040321360325, - "learning_rate": 1.4184698434096586e-06, - "loss": 0.8575, - "step": 8065 - }, - { - "epoch": 0.6061926950248009, - "grad_norm": 1.9131058774165026, - "learning_rate": 1.4180040527249172e-06, - "loss": 1.0395, - "step": 8066 - }, - { - "epoch": 0.6062678490906358, - "grad_norm": 1.5018124756357225, - "learning_rate": 1.4175382965256644e-06, - "loss": 1.0409, - "step": 8067 - }, - { - "epoch": 0.6063430031564707, - "grad_norm": 2.9488636737822818, - "learning_rate": 1.4170725748394977e-06, - "loss": 0.9061, - "step": 8068 - }, - { - "epoch": 0.6064181572223057, - "grad_norm": 1.9812879203753535, - "learning_rate": 1.4166068876940135e-06, - "loss": 1.0309, - "step": 8069 - }, - { - "epoch": 0.6064933112881407, - "grad_norm": 1.763264115723131, - "learning_rate": 1.4161412351168053e-06, - "loss": 0.8239, - "step": 8070 - }, - { - "epoch": 0.6065684653539757, - "grad_norm": 1.9670536665351606, - "learning_rate": 1.4156756171354637e-06, - "loss": 0.9948, - "step": 8071 - }, - { - "epoch": 0.6066436194198106, - "grad_norm": 2.0045098479503127, - "learning_rate": 1.4152100337775804e-06, - "loss": 1.094, - "step": 8072 - }, - { - "epoch": 0.6067187734856456, - "grad_norm": 1.5228652270492813, - "learning_rate": 1.414744485070741e-06, - "loss": 1.0603, - "step": 8073 - }, - { - "epoch": 0.6067939275514805, - "grad_norm": 1.4633546304709852, - "learning_rate": 1.4142789710425325e-06, - "loss": 1.0722, - "step": 8074 - }, - { - "epoch": 0.6068690816173155, - "grad_norm": 1.5269119159547064, - "learning_rate": 1.4138134917205377e-06, - "loss": 0.9404, - "step": 8075 - }, - { - "epoch": 0.6069442356831505, - "grad_norm": 1.3419451458119613, - "learning_rate": 1.413348047132338e-06, - "loss": 0.967, - "step": 8076 - }, - { - "epoch": 0.6070193897489854, - "grad_norm": 2.0314112672611055, - "learning_rate": 1.4128826373055134e-06, - "loss": 0.9744, - "step": 8077 - }, - { - "epoch": 0.6070945438148204, - "grad_norm": 2.1552859376407447, - "learning_rate": 1.4124172622676406e-06, - "loss": 0.9973, - "step": 8078 - }, - { - "epoch": 0.6071696978806553, - "grad_norm": 1.5520388022910827, - "learning_rate": 1.411951922046295e-06, - "loss": 1.1076, - "step": 8079 - }, - { - "epoch": 0.6072448519464904, - "grad_norm": 1.6348873261757708, - "learning_rate": 1.4114866166690498e-06, - "loss": 0.9822, - "step": 8080 - }, - { - "epoch": 0.6073200060123253, - "grad_norm": 1.55486116445743, - "learning_rate": 1.411021346163476e-06, - "loss": 0.8942, - "step": 8081 - }, - { - "epoch": 0.6073951600781602, - "grad_norm": 2.2060633334547264, - "learning_rate": 1.4105561105571428e-06, - "loss": 1.0335, - "step": 8082 - }, - { - "epoch": 0.6074703141439952, - "grad_norm": 1.48551046209975, - "learning_rate": 1.410090909877617e-06, - "loss": 0.9333, - "step": 8083 - }, - { - "epoch": 0.6075454682098301, - "grad_norm": 1.8765392343316032, - "learning_rate": 1.4096257441524643e-06, - "loss": 0.9459, - "step": 8084 - }, - { - "epoch": 0.6076206222756652, - "grad_norm": 1.8608843023093673, - "learning_rate": 1.4091606134092465e-06, - "loss": 0.9485, - "step": 8085 - }, - { - "epoch": 0.6076957763415001, - "grad_norm": 1.755311195083739, - "learning_rate": 1.4086955176755248e-06, - "loss": 0.8908, - "step": 8086 - }, - { - "epoch": 0.607770930407335, - "grad_norm": 1.7358493209129473, - "learning_rate": 1.4082304569788582e-06, - "loss": 0.9438, - "step": 8087 - }, - { - "epoch": 0.60784608447317, - "grad_norm": 8.748855079949658, - "learning_rate": 1.407765431346803e-06, - "loss": 0.9665, - "step": 8088 - }, - { - "epoch": 0.6079212385390049, - "grad_norm": 0.7939385933492009, - "learning_rate": 1.4073004408069138e-06, - "loss": 0.9489, - "step": 8089 - }, - { - "epoch": 0.60799639260484, - "grad_norm": 1.7457040100027126, - "learning_rate": 1.4068354853867429e-06, - "loss": 1.0265, - "step": 8090 - }, - { - "epoch": 0.6080715466706749, - "grad_norm": 1.4943654408581921, - "learning_rate": 1.406370565113841e-06, - "loss": 0.9696, - "step": 8091 - }, - { - "epoch": 0.6081467007365099, - "grad_norm": 1.981262048149179, - "learning_rate": 1.4059056800157567e-06, - "loss": 1.0715, - "step": 8092 - }, - { - "epoch": 0.6082218548023448, - "grad_norm": 1.4624938834298844, - "learning_rate": 1.4054408301200345e-06, - "loss": 1.0222, - "step": 8093 - }, - { - "epoch": 0.6082970088681797, - "grad_norm": 2.1179496266153564, - "learning_rate": 1.4049760154542214e-06, - "loss": 0.9724, - "step": 8094 - }, - { - "epoch": 0.6083721629340147, - "grad_norm": 1.5002956099096798, - "learning_rate": 1.4045112360458564e-06, - "loss": 0.9751, - "step": 8095 - }, - { - "epoch": 0.6084473169998497, - "grad_norm": 1.5137164496965034, - "learning_rate": 1.404046491922482e-06, - "loss": 0.9368, - "step": 8096 - }, - { - "epoch": 0.6085224710656847, - "grad_norm": 1.5365477799926026, - "learning_rate": 1.403581783111635e-06, - "loss": 1.0352, - "step": 8097 - }, - { - "epoch": 0.6085976251315196, - "grad_norm": 1.619987091672356, - "learning_rate": 1.4031171096408506e-06, - "loss": 0.9683, - "step": 8098 - }, - { - "epoch": 0.6086727791973546, - "grad_norm": 1.6551705207450773, - "learning_rate": 1.4026524715376637e-06, - "loss": 0.9499, - "step": 8099 - }, - { - "epoch": 0.6087479332631895, - "grad_norm": 1.7784506903443935, - "learning_rate": 1.4021878688296047e-06, - "loss": 0.9989, - "step": 8100 - }, - { - "epoch": 0.6088230873290245, - "grad_norm": 1.6707417577950923, - "learning_rate": 1.401723301544204e-06, - "loss": 1.0013, - "step": 8101 - }, - { - "epoch": 0.6088982413948595, - "grad_norm": 5.015476497935443, - "learning_rate": 1.4012587697089885e-06, - "loss": 1.0177, - "step": 8102 - }, - { - "epoch": 0.6089733954606944, - "grad_norm": 1.4644700044039236, - "learning_rate": 1.4007942733514836e-06, - "loss": 0.9112, - "step": 8103 - }, - { - "epoch": 0.6090485495265294, - "grad_norm": 2.656401775736216, - "learning_rate": 1.400329812499213e-06, - "loss": 1.0817, - "step": 8104 - }, - { - "epoch": 0.6091237035923643, - "grad_norm": 1.7788238734160524, - "learning_rate": 1.3998653871796964e-06, - "loss": 1.0822, - "step": 8105 - }, - { - "epoch": 0.6091988576581993, - "grad_norm": 2.4375106615664768, - "learning_rate": 1.3994009974204547e-06, - "loss": 0.9459, - "step": 8106 - }, - { - "epoch": 0.6092740117240343, - "grad_norm": 2.09055277837931, - "learning_rate": 1.3989366432490028e-06, - "loss": 1.0247, - "step": 8107 - }, - { - "epoch": 0.6093491657898692, - "grad_norm": 1.7168313848609533, - "learning_rate": 1.3984723246928569e-06, - "loss": 1.0148, - "step": 8108 - }, - { - "epoch": 0.6094243198557042, - "grad_norm": 1.6996831149791465, - "learning_rate": 1.3980080417795296e-06, - "loss": 0.9535, - "step": 8109 - }, - { - "epoch": 0.6094994739215391, - "grad_norm": 1.6218816474087416, - "learning_rate": 1.39754379453653e-06, - "loss": 1.0642, - "step": 8110 - }, - { - "epoch": 0.6095746279873742, - "grad_norm": 1.8790643510503262, - "learning_rate": 1.3970795829913682e-06, - "loss": 0.9886, - "step": 8111 - }, - { - "epoch": 0.6096497820532091, - "grad_norm": 2.383190787883214, - "learning_rate": 1.396615407171549e-06, - "loss": 1.0124, - "step": 8112 - }, - { - "epoch": 0.609724936119044, - "grad_norm": 0.6979672831120052, - "learning_rate": 1.3961512671045787e-06, - "loss": 0.8517, - "step": 8113 - }, - { - "epoch": 0.609800090184879, - "grad_norm": 1.5109139599391452, - "learning_rate": 1.3956871628179577e-06, - "loss": 1.0572, - "step": 8114 - }, - { - "epoch": 0.6098752442507139, - "grad_norm": 1.5224219434049768, - "learning_rate": 1.3952230943391856e-06, - "loss": 1.0053, - "step": 8115 - }, - { - "epoch": 0.609950398316549, - "grad_norm": 1.6766642487181767, - "learning_rate": 1.3947590616957618e-06, - "loss": 0.8482, - "step": 8116 - }, - { - "epoch": 0.6100255523823839, - "grad_norm": 1.563263210819755, - "learning_rate": 1.3942950649151808e-06, - "loss": 1.0063, - "step": 8117 - }, - { - "epoch": 0.6101007064482189, - "grad_norm": 2.0050024414047636, - "learning_rate": 1.3938311040249371e-06, - "loss": 0.9912, - "step": 8118 - }, - { - "epoch": 0.6101758605140538, - "grad_norm": 1.7170805018709505, - "learning_rate": 1.3933671790525215e-06, - "loss": 1.0157, - "step": 8119 - }, - { - "epoch": 0.6102510145798887, - "grad_norm": 1.2918803540743098, - "learning_rate": 1.3929032900254232e-06, - "loss": 1.0025, - "step": 8120 - }, - { - "epoch": 0.6103261686457238, - "grad_norm": 1.8079334479042035, - "learning_rate": 1.39243943697113e-06, - "loss": 0.9852, - "step": 8121 - }, - { - "epoch": 0.6104013227115587, - "grad_norm": 1.6514782814632767, - "learning_rate": 1.3919756199171263e-06, - "loss": 0.9957, - "step": 8122 - }, - { - "epoch": 0.6104764767773937, - "grad_norm": 1.9072845653302426, - "learning_rate": 1.3915118388908958e-06, - "loss": 1.0128, - "step": 8123 - }, - { - "epoch": 0.6105516308432286, - "grad_norm": 0.6928953310361184, - "learning_rate": 1.3910480939199184e-06, - "loss": 0.8337, - "step": 8124 - }, - { - "epoch": 0.6106267849090636, - "grad_norm": 1.7153297378694106, - "learning_rate": 1.3905843850316738e-06, - "loss": 0.8413, - "step": 8125 - }, - { - "epoch": 0.6107019389748986, - "grad_norm": 2.2701588483511075, - "learning_rate": 1.3901207122536383e-06, - "loss": 1.0205, - "step": 8126 - }, - { - "epoch": 0.6107770930407335, - "grad_norm": 1.5471647357286884, - "learning_rate": 1.3896570756132851e-06, - "loss": 0.9716, - "step": 8127 - }, - { - "epoch": 0.6108522471065685, - "grad_norm": 1.9308009468142675, - "learning_rate": 1.3891934751380879e-06, - "loss": 0.9634, - "step": 8128 - }, - { - "epoch": 0.6109274011724034, - "grad_norm": 1.4893369759953643, - "learning_rate": 1.3887299108555158e-06, - "loss": 0.9848, - "step": 8129 - }, - { - "epoch": 0.6110025552382384, - "grad_norm": 1.4542246933575127, - "learning_rate": 1.3882663827930375e-06, - "loss": 1.0447, - "step": 8130 - }, - { - "epoch": 0.6110777093040733, - "grad_norm": 1.354528359258139, - "learning_rate": 1.3878028909781187e-06, - "loss": 0.9489, - "step": 8131 - }, - { - "epoch": 0.6111528633699083, - "grad_norm": 2.668128348138656, - "learning_rate": 1.3873394354382225e-06, - "loss": 0.9894, - "step": 8132 - }, - { - "epoch": 0.6112280174357433, - "grad_norm": 2.0700888458940643, - "learning_rate": 1.3868760162008108e-06, - "loss": 0.9708, - "step": 8133 - }, - { - "epoch": 0.6113031715015782, - "grad_norm": 1.4642979126265965, - "learning_rate": 1.3864126332933425e-06, - "loss": 0.9677, - "step": 8134 - }, - { - "epoch": 0.6113783255674132, - "grad_norm": 3.203965090862695, - "learning_rate": 1.3859492867432765e-06, - "loss": 0.9992, - "step": 8135 - }, - { - "epoch": 0.6114534796332481, - "grad_norm": 1.983324929494711, - "learning_rate": 1.385485976578066e-06, - "loss": 0.9426, - "step": 8136 - }, - { - "epoch": 0.6115286336990832, - "grad_norm": 1.4949727054005295, - "learning_rate": 1.3850227028251639e-06, - "loss": 1.0097, - "step": 8137 - }, - { - "epoch": 0.6116037877649181, - "grad_norm": 1.6812109137887656, - "learning_rate": 1.3845594655120224e-06, - "loss": 0.9749, - "step": 8138 - }, - { - "epoch": 0.611678941830753, - "grad_norm": 1.8429754537644012, - "learning_rate": 1.3840962646660885e-06, - "loss": 0.889, - "step": 8139 - }, - { - "epoch": 0.611754095896588, - "grad_norm": 1.6701713509509961, - "learning_rate": 1.3836331003148101e-06, - "loss": 1.0246, - "step": 8140 - }, - { - "epoch": 0.6118292499624229, - "grad_norm": 2.195773147636465, - "learning_rate": 1.3831699724856307e-06, - "loss": 0.9453, - "step": 8141 - }, - { - "epoch": 0.611904404028258, - "grad_norm": 1.6874992935744326, - "learning_rate": 1.3827068812059918e-06, - "loss": 0.9034, - "step": 8142 - }, - { - "epoch": 0.6119795580940929, - "grad_norm": 1.3911216791822683, - "learning_rate": 1.3822438265033345e-06, - "loss": 0.9723, - "step": 8143 - }, - { - "epoch": 0.6120547121599279, - "grad_norm": 2.7256862686153394, - "learning_rate": 1.3817808084050957e-06, - "loss": 0.8987, - "step": 8144 - }, - { - "epoch": 0.6121298662257628, - "grad_norm": 1.4834543685966535, - "learning_rate": 1.3813178269387119e-06, - "loss": 1.0116, - "step": 8145 - }, - { - "epoch": 0.6122050202915977, - "grad_norm": 2.154392382112018, - "learning_rate": 1.380854882131615e-06, - "loss": 0.936, - "step": 8146 - }, - { - "epoch": 0.6122801743574328, - "grad_norm": 2.226886069896334, - "learning_rate": 1.3803919740112383e-06, - "loss": 0.8675, - "step": 8147 - }, - { - "epoch": 0.6123553284232677, - "grad_norm": 2.040084290539512, - "learning_rate": 1.379929102605009e-06, - "loss": 1.0466, - "step": 8148 - }, - { - "epoch": 0.6124304824891027, - "grad_norm": 1.5400783845843895, - "learning_rate": 1.379466267940355e-06, - "loss": 0.9108, - "step": 8149 - }, - { - "epoch": 0.6125056365549376, - "grad_norm": 2.477682251602248, - "learning_rate": 1.3790034700447008e-06, - "loss": 0.9659, - "step": 8150 - }, - { - "epoch": 0.6125807906207725, - "grad_norm": 1.4023409447602078, - "learning_rate": 1.378540708945469e-06, - "loss": 1.0548, - "step": 8151 - }, - { - "epoch": 0.6126559446866076, - "grad_norm": 1.669491218226192, - "learning_rate": 1.3780779846700799e-06, - "loss": 1.0157, - "step": 8152 - }, - { - "epoch": 0.6127310987524425, - "grad_norm": 1.6475903833861107, - "learning_rate": 1.3776152972459517e-06, - "loss": 0.9028, - "step": 8153 - }, - { - "epoch": 0.6128062528182775, - "grad_norm": 1.6369477978948617, - "learning_rate": 1.3771526467005004e-06, - "loss": 0.9633, - "step": 8154 - }, - { - "epoch": 0.6128814068841124, - "grad_norm": 1.407060897997714, - "learning_rate": 1.37669003306114e-06, - "loss": 1.0573, - "step": 8155 - }, - { - "epoch": 0.6129565609499474, - "grad_norm": 1.9314401057726045, - "learning_rate": 1.3762274563552811e-06, - "loss": 0.9776, - "step": 8156 - }, - { - "epoch": 0.6130317150157824, - "grad_norm": 1.6494676887282294, - "learning_rate": 1.375764916610335e-06, - "loss": 0.9215, - "step": 8157 - }, - { - "epoch": 0.6131068690816173, - "grad_norm": 0.7276401526349989, - "learning_rate": 1.3753024138537082e-06, - "loss": 0.8227, - "step": 8158 - }, - { - "epoch": 0.6131820231474523, - "grad_norm": 1.823942829315271, - "learning_rate": 1.3748399481128043e-06, - "loss": 0.9233, - "step": 8159 - }, - { - "epoch": 0.6132571772132872, - "grad_norm": 1.6749147251114798, - "learning_rate": 1.3743775194150281e-06, - "loss": 1.0432, - "step": 8160 - }, - { - "epoch": 0.6133323312791222, - "grad_norm": 1.6875415373034561, - "learning_rate": 1.3739151277877792e-06, - "loss": 0.9448, - "step": 8161 - }, - { - "epoch": 0.6134074853449571, - "grad_norm": 1.8607164560738896, - "learning_rate": 1.3734527732584568e-06, - "loss": 0.9892, - "step": 8162 - }, - { - "epoch": 0.6134826394107922, - "grad_norm": 1.4439830542386378, - "learning_rate": 1.372990455854457e-06, - "loss": 0.9559, - "step": 8163 - }, - { - "epoch": 0.6135577934766271, - "grad_norm": 1.5056597110964005, - "learning_rate": 1.372528175603173e-06, - "loss": 0.9452, - "step": 8164 - }, - { - "epoch": 0.613632947542462, - "grad_norm": 1.9143879516442415, - "learning_rate": 1.372065932531998e-06, - "loss": 0.9584, - "step": 8165 - }, - { - "epoch": 0.613708101608297, - "grad_norm": 2.343901464018606, - "learning_rate": 1.3716037266683203e-06, - "loss": 0.9628, - "step": 8166 - }, - { - "epoch": 0.613783255674132, - "grad_norm": 1.7706978091571017, - "learning_rate": 1.3711415580395288e-06, - "loss": 0.8713, - "step": 8167 - }, - { - "epoch": 0.613858409739967, - "grad_norm": 1.614994792398377, - "learning_rate": 1.3706794266730072e-06, - "loss": 1.0177, - "step": 8168 - }, - { - "epoch": 0.6139335638058019, - "grad_norm": 2.3041257011906287, - "learning_rate": 1.37021733259614e-06, - "loss": 0.9091, - "step": 8169 - }, - { - "epoch": 0.6140087178716369, - "grad_norm": 1.9667196552228765, - "learning_rate": 1.3697552758363079e-06, - "loss": 0.8089, - "step": 8170 - }, - { - "epoch": 0.6140838719374718, - "grad_norm": 1.890814464312985, - "learning_rate": 1.3692932564208884e-06, - "loss": 0.9784, - "step": 8171 - }, - { - "epoch": 0.6141590260033067, - "grad_norm": 3.907218629904782, - "learning_rate": 1.3688312743772588e-06, - "loss": 0.9849, - "step": 8172 - }, - { - "epoch": 0.6142341800691418, - "grad_norm": 1.4425618984488406, - "learning_rate": 1.3683693297327927e-06, - "loss": 0.9987, - "step": 8173 - }, - { - "epoch": 0.6143093341349767, - "grad_norm": 2.5933973693132977, - "learning_rate": 1.367907422514863e-06, - "loss": 0.9497, - "step": 8174 - }, - { - "epoch": 0.6143844882008117, - "grad_norm": 1.5677635799909282, - "learning_rate": 1.367445552750839e-06, - "loss": 0.9968, - "step": 8175 - }, - { - "epoch": 0.6144596422666466, - "grad_norm": 1.714664736638879, - "learning_rate": 1.3669837204680876e-06, - "loss": 0.856, - "step": 8176 - }, - { - "epoch": 0.6145347963324815, - "grad_norm": 1.736339933786475, - "learning_rate": 1.3665219256939753e-06, - "loss": 0.9949, - "step": 8177 - }, - { - "epoch": 0.6146099503983166, - "grad_norm": 1.396655945808822, - "learning_rate": 1.3660601684558639e-06, - "loss": 0.8573, - "step": 8178 - }, - { - "epoch": 0.6146851044641515, - "grad_norm": 1.5782482741147204, - "learning_rate": 1.3655984487811158e-06, - "loss": 1.0102, - "step": 8179 - }, - { - "epoch": 0.6147602585299865, - "grad_norm": 1.7338280030011883, - "learning_rate": 1.3651367666970895e-06, - "loss": 1.0478, - "step": 8180 - }, - { - "epoch": 0.6148354125958214, - "grad_norm": 1.4955116514047306, - "learning_rate": 1.3646751222311392e-06, - "loss": 0.926, - "step": 8181 - }, - { - "epoch": 0.6149105666616564, - "grad_norm": 1.4833191178029994, - "learning_rate": 1.3642135154106219e-06, - "loss": 0.9852, - "step": 8182 - }, - { - "epoch": 0.6149857207274914, - "grad_norm": 0.6461036921224003, - "learning_rate": 1.3637519462628876e-06, - "loss": 0.7817, - "step": 8183 - }, - { - "epoch": 0.6150608747933263, - "grad_norm": 1.6495132190629092, - "learning_rate": 1.3632904148152877e-06, - "loss": 0.9383, - "step": 8184 - }, - { - "epoch": 0.6151360288591613, - "grad_norm": 1.6976471045327177, - "learning_rate": 1.3628289210951687e-06, - "loss": 0.9948, - "step": 8185 - }, - { - "epoch": 0.6152111829249962, - "grad_norm": 1.52859055929856, - "learning_rate": 1.3623674651298752e-06, - "loss": 0.8594, - "step": 8186 - }, - { - "epoch": 0.6152863369908312, - "grad_norm": 2.6837882203293106, - "learning_rate": 1.361906046946752e-06, - "loss": 0.9987, - "step": 8187 - }, - { - "epoch": 0.6153614910566662, - "grad_norm": 1.5846626156113364, - "learning_rate": 1.3614446665731385e-06, - "loss": 0.858, - "step": 8188 - }, - { - "epoch": 0.6154366451225012, - "grad_norm": 1.5160396048171114, - "learning_rate": 1.3609833240363738e-06, - "loss": 0.9847, - "step": 8189 - }, - { - "epoch": 0.6155117991883361, - "grad_norm": 2.676885268507631, - "learning_rate": 1.3605220193637942e-06, - "loss": 0.9181, - "step": 8190 - }, - { - "epoch": 0.615586953254171, - "grad_norm": 2.5073882602732964, - "learning_rate": 1.3600607525827335e-06, - "loss": 0.9598, - "step": 8191 - }, - { - "epoch": 0.615662107320006, - "grad_norm": 0.8135098637133524, - "learning_rate": 1.359599523720524e-06, - "loss": 0.9127, - "step": 8192 - }, - { - "epoch": 0.615737261385841, - "grad_norm": 1.4786173736352373, - "learning_rate": 1.3591383328044943e-06, - "loss": 0.9057, - "step": 8193 - }, - { - "epoch": 0.615812415451676, - "grad_norm": 1.4667400648795865, - "learning_rate": 1.358677179861973e-06, - "loss": 1.0969, - "step": 8194 - }, - { - "epoch": 0.6158875695175109, - "grad_norm": 1.8787362067350142, - "learning_rate": 1.3582160649202844e-06, - "loss": 1.078, - "step": 8195 - }, - { - "epoch": 0.6159627235833458, - "grad_norm": 2.1356428321225636, - "learning_rate": 1.3577549880067516e-06, - "loss": 1.0294, - "step": 8196 - }, - { - "epoch": 0.6160378776491808, - "grad_norm": 1.9798940693811018, - "learning_rate": 1.3572939491486952e-06, - "loss": 1.0411, - "step": 8197 - }, - { - "epoch": 0.6161130317150157, - "grad_norm": 2.325390432246601, - "learning_rate": 1.3568329483734329e-06, - "loss": 1.029, - "step": 8198 - }, - { - "epoch": 0.6161881857808508, - "grad_norm": 2.2511098032013757, - "learning_rate": 1.3563719857082817e-06, - "loss": 0.9785, - "step": 8199 - }, - { - "epoch": 0.6162633398466857, - "grad_norm": 1.820338040283205, - "learning_rate": 1.3559110611805542e-06, - "loss": 1.007, - "step": 8200 - }, - { - "epoch": 0.6163384939125207, - "grad_norm": 1.9262389592789413, - "learning_rate": 1.3554501748175637e-06, - "loss": 0.9337, - "step": 8201 - }, - { - "epoch": 0.6164136479783556, - "grad_norm": 2.3933285653466183, - "learning_rate": 1.3549893266466188e-06, - "loss": 0.9725, - "step": 8202 - }, - { - "epoch": 0.6164888020441905, - "grad_norm": 1.9685056625606454, - "learning_rate": 1.3545285166950246e-06, - "loss": 0.9484, - "step": 8203 - }, - { - "epoch": 0.6165639561100256, - "grad_norm": 5.410366012968726, - "learning_rate": 1.3540677449900887e-06, - "loss": 0.9971, - "step": 8204 - }, - { - "epoch": 0.6166391101758605, - "grad_norm": 2.0595631352856216, - "learning_rate": 1.3536070115591118e-06, - "loss": 0.9547, - "step": 8205 - }, - { - "epoch": 0.6167142642416955, - "grad_norm": 2.029567197504382, - "learning_rate": 1.3531463164293952e-06, - "loss": 1.0753, - "step": 8206 - }, - { - "epoch": 0.6167894183075304, - "grad_norm": 1.4772833623703363, - "learning_rate": 1.352685659628236e-06, - "loss": 0.8824, - "step": 8207 - }, - { - "epoch": 0.6168645723733654, - "grad_norm": 1.438383742974462, - "learning_rate": 1.3522250411829301e-06, - "loss": 0.984, - "step": 8208 - }, - { - "epoch": 0.6169397264392004, - "grad_norm": 1.8802439791046992, - "learning_rate": 1.3517644611207715e-06, - "loss": 1.0003, - "step": 8209 - }, - { - "epoch": 0.6170148805050353, - "grad_norm": 2.5598475987549176, - "learning_rate": 1.35130391946905e-06, - "loss": 0.9449, - "step": 8210 - }, - { - "epoch": 0.6170900345708703, - "grad_norm": 1.4626350536100343, - "learning_rate": 1.350843416255056e-06, - "loss": 1.0313, - "step": 8211 - }, - { - "epoch": 0.6171651886367052, - "grad_norm": 1.8360938472324138, - "learning_rate": 1.350382951506075e-06, - "loss": 0.928, - "step": 8212 - }, - { - "epoch": 0.6172403427025402, - "grad_norm": 0.7751745504269397, - "learning_rate": 1.3499225252493918e-06, - "loss": 0.8702, - "step": 8213 - }, - { - "epoch": 0.6173154967683752, - "grad_norm": 1.4928925131950992, - "learning_rate": 1.3494621375122886e-06, - "loss": 1.0841, - "step": 8214 - }, - { - "epoch": 0.6173906508342102, - "grad_norm": 2.1478336109737333, - "learning_rate": 1.3490017883220443e-06, - "loss": 1.0158, - "step": 8215 - }, - { - "epoch": 0.6174658049000451, - "grad_norm": 0.6833206503164047, - "learning_rate": 1.3485414777059375e-06, - "loss": 0.8273, - "step": 8216 - }, - { - "epoch": 0.61754095896588, - "grad_norm": 2.1856452708385605, - "learning_rate": 1.3480812056912417e-06, - "loss": 0.9174, - "step": 8217 - }, - { - "epoch": 0.617616113031715, - "grad_norm": 1.8181333800929533, - "learning_rate": 1.3476209723052318e-06, - "loss": 0.9017, - "step": 8218 - }, - { - "epoch": 0.61769126709755, - "grad_norm": 1.6410343204514823, - "learning_rate": 1.3471607775751774e-06, - "loss": 0.9257, - "step": 8219 - }, - { - "epoch": 0.617766421163385, - "grad_norm": 1.7109411230876308, - "learning_rate": 1.3467006215283459e-06, - "loss": 0.9062, - "step": 8220 - }, - { - "epoch": 0.6178415752292199, - "grad_norm": 0.7393938591971143, - "learning_rate": 1.3462405041920053e-06, - "loss": 0.8465, - "step": 8221 - }, - { - "epoch": 0.6179167292950548, - "grad_norm": 1.5317202254773397, - "learning_rate": 1.3457804255934172e-06, - "loss": 0.9551, - "step": 8222 - }, - { - "epoch": 0.6179918833608898, - "grad_norm": 2.172336008008304, - "learning_rate": 1.3453203857598449e-06, - "loss": 0.8802, - "step": 8223 - }, - { - "epoch": 0.6180670374267248, - "grad_norm": 1.9570809546457473, - "learning_rate": 1.3448603847185464e-06, - "loss": 0.9052, - "step": 8224 - }, - { - "epoch": 0.6181421914925598, - "grad_norm": 1.4281363488879535, - "learning_rate": 1.3444004224967787e-06, - "loss": 0.8874, - "step": 8225 - }, - { - "epoch": 0.6182173455583947, - "grad_norm": 1.535385619444421, - "learning_rate": 1.3439404991217968e-06, - "loss": 0.9151, - "step": 8226 - }, - { - "epoch": 0.6182924996242297, - "grad_norm": 1.5891349393374288, - "learning_rate": 1.343480614620852e-06, - "loss": 0.9618, - "step": 8227 - }, - { - "epoch": 0.6183676536900646, - "grad_norm": 0.6423925810199042, - "learning_rate": 1.3430207690211953e-06, - "loss": 0.7782, - "step": 8228 - }, - { - "epoch": 0.6184428077558995, - "grad_norm": 4.682609346480661, - "learning_rate": 1.3425609623500738e-06, - "loss": 0.856, - "step": 8229 - }, - { - "epoch": 0.6185179618217346, - "grad_norm": 2.7350101278281422, - "learning_rate": 1.3421011946347323e-06, - "loss": 0.9149, - "step": 8230 - }, - { - "epoch": 0.6185931158875695, - "grad_norm": 2.3232526550441994, - "learning_rate": 1.3416414659024147e-06, - "loss": 1.0315, - "step": 8231 - }, - { - "epoch": 0.6186682699534045, - "grad_norm": 1.6754341388440568, - "learning_rate": 1.3411817761803608e-06, - "loss": 0.9872, - "step": 8232 - }, - { - "epoch": 0.6187434240192394, - "grad_norm": 1.9352916314422663, - "learning_rate": 1.34072212549581e-06, - "loss": 1.0665, - "step": 8233 - }, - { - "epoch": 0.6188185780850745, - "grad_norm": 4.064603759672587, - "learning_rate": 1.3402625138759972e-06, - "loss": 0.9718, - "step": 8234 - }, - { - "epoch": 0.6188937321509094, - "grad_norm": 0.6598126798374588, - "learning_rate": 1.3398029413481573e-06, - "loss": 0.78, - "step": 8235 - }, - { - "epoch": 0.6189688862167443, - "grad_norm": 1.6684670340551047, - "learning_rate": 1.3393434079395212e-06, - "loss": 0.9053, - "step": 8236 - }, - { - "epoch": 0.6190440402825793, - "grad_norm": 1.3954734290807713, - "learning_rate": 1.3388839136773174e-06, - "loss": 1.014, - "step": 8237 - }, - { - "epoch": 0.6191191943484142, - "grad_norm": 1.755240563644778, - "learning_rate": 1.3384244585887738e-06, - "loss": 1.0235, - "step": 8238 - }, - { - "epoch": 0.6191943484142493, - "grad_norm": 0.7553003890834651, - "learning_rate": 1.3379650427011141e-06, - "loss": 0.8322, - "step": 8239 - }, - { - "epoch": 0.6192695024800842, - "grad_norm": 1.6384982902604077, - "learning_rate": 1.337505666041561e-06, - "loss": 1.0783, - "step": 8240 - }, - { - "epoch": 0.6193446565459191, - "grad_norm": 2.3464919263992234, - "learning_rate": 1.337046328637334e-06, - "loss": 0.9388, - "step": 8241 - }, - { - "epoch": 0.6194198106117541, - "grad_norm": 1.432407314371131, - "learning_rate": 1.3365870305156502e-06, - "loss": 0.9723, - "step": 8242 - }, - { - "epoch": 0.619494964677589, - "grad_norm": 1.8432520177724854, - "learning_rate": 1.336127771703726e-06, - "loss": 0.9845, - "step": 8243 - }, - { - "epoch": 0.619570118743424, - "grad_norm": 2.0774265348846916, - "learning_rate": 1.3356685522287724e-06, - "loss": 1.0041, - "step": 8244 - }, - { - "epoch": 0.619645272809259, - "grad_norm": 0.7157295034140639, - "learning_rate": 1.3352093721180017e-06, - "loss": 0.7841, - "step": 8245 - }, - { - "epoch": 0.619720426875094, - "grad_norm": 2.284445314590661, - "learning_rate": 1.3347502313986216e-06, - "loss": 0.8033, - "step": 8246 - }, - { - "epoch": 0.6197955809409289, - "grad_norm": 1.528893038654117, - "learning_rate": 1.3342911300978373e-06, - "loss": 0.9478, - "step": 8247 - }, - { - "epoch": 0.6198707350067638, - "grad_norm": 1.956531940654751, - "learning_rate": 1.3338320682428527e-06, - "loss": 0.9974, - "step": 8248 - }, - { - "epoch": 0.6199458890725988, - "grad_norm": 1.6753782130017545, - "learning_rate": 1.3333730458608688e-06, - "loss": 0.9222, - "step": 8249 - }, - { - "epoch": 0.6200210431384338, - "grad_norm": 1.7490397952543277, - "learning_rate": 1.3329140629790851e-06, - "loss": 1.0299, - "step": 8250 - }, - { - "epoch": 0.6200961972042688, - "grad_norm": 1.6081989027907897, - "learning_rate": 1.3324551196246977e-06, - "loss": 0.9736, - "step": 8251 - }, - { - "epoch": 0.6201713512701037, - "grad_norm": 1.3141044619071192, - "learning_rate": 1.3319962158249e-06, - "loss": 0.9977, - "step": 8252 - }, - { - "epoch": 0.6202465053359387, - "grad_norm": 1.6404880284852412, - "learning_rate": 1.331537351606885e-06, - "loss": 1.0287, - "step": 8253 - }, - { - "epoch": 0.6203216594017736, - "grad_norm": 1.816464134032332, - "learning_rate": 1.3310785269978413e-06, - "loss": 0.8479, - "step": 8254 - }, - { - "epoch": 0.6203968134676086, - "grad_norm": 1.8078695540463177, - "learning_rate": 1.3306197420249566e-06, - "loss": 0.9717, - "step": 8255 - }, - { - "epoch": 0.6204719675334436, - "grad_norm": 1.9261632699025744, - "learning_rate": 1.3301609967154152e-06, - "loss": 1.039, - "step": 8256 - }, - { - "epoch": 0.6205471215992785, - "grad_norm": 1.7976941439179537, - "learning_rate": 1.3297022910964e-06, - "loss": 0.9541, - "step": 8257 - }, - { - "epoch": 0.6206222756651135, - "grad_norm": 2.333204424794025, - "learning_rate": 1.3292436251950906e-06, - "loss": 1.0044, - "step": 8258 - }, - { - "epoch": 0.6206974297309484, - "grad_norm": 1.683217159202995, - "learning_rate": 1.3287849990386647e-06, - "loss": 1.003, - "step": 8259 - }, - { - "epoch": 0.6207725837967835, - "grad_norm": 2.827651094806207, - "learning_rate": 1.3283264126542986e-06, - "loss": 0.912, - "step": 8260 - }, - { - "epoch": 0.6208477378626184, - "grad_norm": 1.548346234046455, - "learning_rate": 1.3278678660691638e-06, - "loss": 0.9649, - "step": 8261 - }, - { - "epoch": 0.6209228919284533, - "grad_norm": 1.4838079875358274, - "learning_rate": 1.327409359310432e-06, - "loss": 0.9192, - "step": 8262 - }, - { - "epoch": 0.6209980459942883, - "grad_norm": 1.9152779593070743, - "learning_rate": 1.3269508924052715e-06, - "loss": 0.9398, - "step": 8263 - }, - { - "epoch": 0.6210732000601232, - "grad_norm": 2.0137822206975264, - "learning_rate": 1.326492465380847e-06, - "loss": 0.9389, - "step": 8264 - }, - { - "epoch": 0.6211483541259583, - "grad_norm": 1.8355812538760845, - "learning_rate": 1.326034078264324e-06, - "loss": 0.9929, - "step": 8265 - }, - { - "epoch": 0.6212235081917932, - "grad_norm": 1.7319037211971282, - "learning_rate": 1.3255757310828614e-06, - "loss": 0.9695, - "step": 8266 - }, - { - "epoch": 0.6212986622576281, - "grad_norm": 1.0015878587865936, - "learning_rate": 1.3251174238636202e-06, - "loss": 0.878, - "step": 8267 - }, - { - "epoch": 0.6213738163234631, - "grad_norm": 1.9630617589033716, - "learning_rate": 1.3246591566337563e-06, - "loss": 0.9927, - "step": 8268 - }, - { - "epoch": 0.621448970389298, - "grad_norm": 2.213497984212057, - "learning_rate": 1.3242009294204223e-06, - "loss": 0.8782, - "step": 8269 - }, - { - "epoch": 0.621524124455133, - "grad_norm": 1.5369603085974137, - "learning_rate": 1.3237427422507721e-06, - "loss": 1.0084, - "step": 8270 - }, - { - "epoch": 0.621599278520968, - "grad_norm": 3.0058606440868014, - "learning_rate": 1.323284595151953e-06, - "loss": 1.0868, - "step": 8271 - }, - { - "epoch": 0.621674432586803, - "grad_norm": 1.4027668094311116, - "learning_rate": 1.3228264881511137e-06, - "loss": 0.9637, - "step": 8272 - }, - { - "epoch": 0.6217495866526379, - "grad_norm": 3.2021774751370966, - "learning_rate": 1.322368421275398e-06, - "loss": 0.8724, - "step": 8273 - }, - { - "epoch": 0.6218247407184728, - "grad_norm": 1.675983550125277, - "learning_rate": 1.3219103945519479e-06, - "loss": 0.9312, - "step": 8274 - }, - { - "epoch": 0.6218998947843078, - "grad_norm": 1.9322413174906348, - "learning_rate": 1.3214524080079038e-06, - "loss": 0.9365, - "step": 8275 - }, - { - "epoch": 0.6219750488501428, - "grad_norm": 1.820285387614948, - "learning_rate": 1.3209944616704023e-06, - "loss": 0.8949, - "step": 8276 - }, - { - "epoch": 0.6220502029159778, - "grad_norm": 1.8047139219521444, - "learning_rate": 1.3205365555665795e-06, - "loss": 1.0066, - "step": 8277 - }, - { - "epoch": 0.6221253569818127, - "grad_norm": 1.332916234262692, - "learning_rate": 1.3200786897235675e-06, - "loss": 0.9531, - "step": 8278 - }, - { - "epoch": 0.6222005110476477, - "grad_norm": 2.087510219708856, - "learning_rate": 1.3196208641684968e-06, - "loss": 0.9994, - "step": 8279 - }, - { - "epoch": 0.6222756651134826, - "grad_norm": 4.122411898567415, - "learning_rate": 1.3191630789284954e-06, - "loss": 1.0179, - "step": 8280 - }, - { - "epoch": 0.6223508191793176, - "grad_norm": 2.1145618105640236, - "learning_rate": 1.318705334030688e-06, - "loss": 0.9966, - "step": 8281 - }, - { - "epoch": 0.6224259732451526, - "grad_norm": 1.6958641174421614, - "learning_rate": 1.318247629502199e-06, - "loss": 1.0347, - "step": 8282 - }, - { - "epoch": 0.6225011273109875, - "grad_norm": 2.181825962919752, - "learning_rate": 1.317789965370148e-06, - "loss": 0.9439, - "step": 8283 - }, - { - "epoch": 0.6225762813768225, - "grad_norm": 3.139034342888743, - "learning_rate": 1.3173323416616549e-06, - "loss": 0.9634, - "step": 8284 - }, - { - "epoch": 0.6226514354426574, - "grad_norm": 1.5089766203710433, - "learning_rate": 1.3168747584038341e-06, - "loss": 0.9338, - "step": 8285 - }, - { - "epoch": 0.6227265895084924, - "grad_norm": 0.6528450872560861, - "learning_rate": 1.3164172156237992e-06, - "loss": 0.8084, - "step": 8286 - }, - { - "epoch": 0.6228017435743274, - "grad_norm": 1.6161402753151874, - "learning_rate": 1.3159597133486625e-06, - "loss": 0.9503, - "step": 8287 - }, - { - "epoch": 0.6228768976401623, - "grad_norm": 4.340718480772658, - "learning_rate": 1.315502251605532e-06, - "loss": 0.9899, - "step": 8288 - }, - { - "epoch": 0.6229520517059973, - "grad_norm": 1.3942886697195678, - "learning_rate": 1.3150448304215142e-06, - "loss": 0.961, - "step": 8289 - }, - { - "epoch": 0.6230272057718322, - "grad_norm": 0.7492596230281917, - "learning_rate": 1.3145874498237133e-06, - "loss": 0.8222, - "step": 8290 - }, - { - "epoch": 0.6231023598376673, - "grad_norm": 1.8084164361565838, - "learning_rate": 1.3141301098392302e-06, - "loss": 0.9293, - "step": 8291 - }, - { - "epoch": 0.6231775139035022, - "grad_norm": 1.7806392760422525, - "learning_rate": 1.3136728104951652e-06, - "loss": 1.0064, - "step": 8292 - }, - { - "epoch": 0.6232526679693371, - "grad_norm": 2.171529069973391, - "learning_rate": 1.3132155518186135e-06, - "loss": 0.9852, - "step": 8293 - }, - { - "epoch": 0.6233278220351721, - "grad_norm": 12.416531623942307, - "learning_rate": 1.3127583338366707e-06, - "loss": 0.9631, - "step": 8294 - }, - { - "epoch": 0.623402976101007, - "grad_norm": 1.6561061148852678, - "learning_rate": 1.312301156576428e-06, - "loss": 0.9847, - "step": 8295 - }, - { - "epoch": 0.6234781301668421, - "grad_norm": 2.4274018260145898, - "learning_rate": 1.3118440200649752e-06, - "loss": 0.9846, - "step": 8296 - }, - { - "epoch": 0.623553284232677, - "grad_norm": 4.805982263298183, - "learning_rate": 1.3113869243293993e-06, - "loss": 1.0673, - "step": 8297 - }, - { - "epoch": 0.623628438298512, - "grad_norm": 2.1019805152127904, - "learning_rate": 1.310929869396785e-06, - "loss": 0.8222, - "step": 8298 - }, - { - "epoch": 0.6237035923643469, - "grad_norm": 3.180950092201182, - "learning_rate": 1.3104728552942149e-06, - "loss": 0.8407, - "step": 8299 - }, - { - "epoch": 0.6237787464301818, - "grad_norm": 1.612594421342816, - "learning_rate": 1.3100158820487679e-06, - "loss": 1.0207, - "step": 8300 - }, - { - "epoch": 0.6238539004960169, - "grad_norm": 2.027155814160632, - "learning_rate": 1.3095589496875224e-06, - "loss": 1.0367, - "step": 8301 - }, - { - "epoch": 0.6239290545618518, - "grad_norm": 1.833639776743601, - "learning_rate": 1.309102058237553e-06, - "loss": 0.8854, - "step": 8302 - }, - { - "epoch": 0.6240042086276868, - "grad_norm": 2.86121363492149, - "learning_rate": 1.3086452077259323e-06, - "loss": 0.9799, - "step": 8303 - }, - { - "epoch": 0.6240793626935217, - "grad_norm": 1.6995031359992367, - "learning_rate": 1.3081883981797303e-06, - "loss": 0.9478, - "step": 8304 - }, - { - "epoch": 0.6241545167593567, - "grad_norm": 1.718042609555683, - "learning_rate": 1.3077316296260144e-06, - "loss": 0.9778, - "step": 8305 - }, - { - "epoch": 0.6242296708251917, - "grad_norm": 1.5276075364639394, - "learning_rate": 1.3072749020918514e-06, - "loss": 0.8868, - "step": 8306 - }, - { - "epoch": 0.6243048248910266, - "grad_norm": 1.5737096375007817, - "learning_rate": 1.3068182156043026e-06, - "loss": 0.9359, - "step": 8307 - }, - { - "epoch": 0.6243799789568616, - "grad_norm": 2.53374414835679, - "learning_rate": 1.306361570190428e-06, - "loss": 1.0252, - "step": 8308 - }, - { - "epoch": 0.6244551330226965, - "grad_norm": 1.8560838824094237, - "learning_rate": 1.3059049658772875e-06, - "loss": 0.9511, - "step": 8309 - }, - { - "epoch": 0.6245302870885315, - "grad_norm": 1.8605143157974113, - "learning_rate": 1.305448402691935e-06, - "loss": 0.9785, - "step": 8310 - }, - { - "epoch": 0.6246054411543664, - "grad_norm": 1.9847002138389203, - "learning_rate": 1.304991880661425e-06, - "loss": 0.9246, - "step": 8311 - }, - { - "epoch": 0.6246805952202014, - "grad_norm": 0.6874577552647627, - "learning_rate": 1.3045353998128073e-06, - "loss": 0.8172, - "step": 8312 - }, - { - "epoch": 0.6247557492860364, - "grad_norm": 4.564524580233377, - "learning_rate": 1.30407896017313e-06, - "loss": 0.9823, - "step": 8313 - }, - { - "epoch": 0.6248309033518713, - "grad_norm": 1.5759339938936976, - "learning_rate": 1.3036225617694391e-06, - "loss": 1.0177, - "step": 8314 - }, - { - "epoch": 0.6249060574177063, - "grad_norm": 3.960691425572499, - "learning_rate": 1.3031662046287778e-06, - "loss": 0.9687, - "step": 8315 - }, - { - "epoch": 0.6249812114835412, - "grad_norm": 1.6248358130038771, - "learning_rate": 1.302709888778188e-06, - "loss": 0.9023, - "step": 8316 - }, - { - "epoch": 0.6250563655493763, - "grad_norm": 1.6353168902565116, - "learning_rate": 1.3022536142447069e-06, - "loss": 0.9237, - "step": 8317 - }, - { - "epoch": 0.6251315196152112, - "grad_norm": 1.6461856883354158, - "learning_rate": 1.3017973810553709e-06, - "loss": 0.8864, - "step": 8318 - }, - { - "epoch": 0.6252066736810461, - "grad_norm": 1.7898510523580098, - "learning_rate": 1.301341189237214e-06, - "loss": 1.0278, - "step": 8319 - }, - { - "epoch": 0.6252818277468811, - "grad_norm": 1.5213666877723233, - "learning_rate": 1.3008850388172668e-06, - "loss": 0.9707, - "step": 8320 - }, - { - "epoch": 0.625356981812716, - "grad_norm": 1.6048729897985727, - "learning_rate": 1.3004289298225582e-06, - "loss": 0.8999, - "step": 8321 - }, - { - "epoch": 0.6254321358785511, - "grad_norm": 1.3118473882799873, - "learning_rate": 1.299972862280114e-06, - "loss": 0.9477, - "step": 8322 - }, - { - "epoch": 0.625507289944386, - "grad_norm": 2.082071558005692, - "learning_rate": 1.299516836216959e-06, - "loss": 0.9834, - "step": 8323 - }, - { - "epoch": 0.625582444010221, - "grad_norm": 1.6434886100845545, - "learning_rate": 1.2990608516601133e-06, - "loss": 0.9767, - "step": 8324 - }, - { - "epoch": 0.6256575980760559, - "grad_norm": 1.4186194742128584, - "learning_rate": 1.2986049086365963e-06, - "loss": 0.9083, - "step": 8325 - }, - { - "epoch": 0.6257327521418908, - "grad_norm": 1.638359904004254, - "learning_rate": 1.2981490071734244e-06, - "loss": 0.955, - "step": 8326 - }, - { - "epoch": 0.6258079062077259, - "grad_norm": 2.3078516043189112, - "learning_rate": 1.2976931472976106e-06, - "loss": 0.8668, - "step": 8327 - }, - { - "epoch": 0.6258830602735608, - "grad_norm": 3.0580148356085926, - "learning_rate": 1.2972373290361683e-06, - "loss": 0.8826, - "step": 8328 - }, - { - "epoch": 0.6259582143393958, - "grad_norm": 1.2976965542573724, - "learning_rate": 1.296781552416105e-06, - "loss": 0.9857, - "step": 8329 - }, - { - "epoch": 0.6260333684052307, - "grad_norm": 1.7294582059893897, - "learning_rate": 1.2963258174644266e-06, - "loss": 0.9693, - "step": 8330 - }, - { - "epoch": 0.6261085224710656, - "grad_norm": 1.930463997600762, - "learning_rate": 1.295870124208139e-06, - "loss": 0.9911, - "step": 8331 - }, - { - "epoch": 0.6261836765369007, - "grad_norm": 1.99970439872153, - "learning_rate": 1.2954144726742424e-06, - "loss": 0.8551, - "step": 8332 - }, - { - "epoch": 0.6262588306027356, - "grad_norm": 1.9018418043475747, - "learning_rate": 1.2949588628897367e-06, - "loss": 0.9892, - "step": 8333 - }, - { - "epoch": 0.6263339846685706, - "grad_norm": 5.022879704524392, - "learning_rate": 1.2945032948816183e-06, - "loss": 0.8673, - "step": 8334 - }, - { - "epoch": 0.6264091387344055, - "grad_norm": 2.2393121126348086, - "learning_rate": 1.2940477686768806e-06, - "loss": 1.0293, - "step": 8335 - }, - { - "epoch": 0.6264842928002405, - "grad_norm": 2.0696332861003, - "learning_rate": 1.2935922843025165e-06, - "loss": 0.953, - "step": 8336 - }, - { - "epoch": 0.6265594468660755, - "grad_norm": 1.537826118928309, - "learning_rate": 1.293136841785514e-06, - "loss": 0.9976, - "step": 8337 - }, - { - "epoch": 0.6266346009319104, - "grad_norm": 2.1126354873818545, - "learning_rate": 1.292681441152861e-06, - "loss": 0.907, - "step": 8338 - }, - { - "epoch": 0.6267097549977454, - "grad_norm": 1.5766668098051773, - "learning_rate": 1.2922260824315409e-06, - "loss": 0.9725, - "step": 8339 - }, - { - "epoch": 0.6267849090635803, - "grad_norm": 13.890296545962062, - "learning_rate": 1.2917707656485352e-06, - "loss": 1.0216, - "step": 8340 - }, - { - "epoch": 0.6268600631294153, - "grad_norm": 4.793621537975315, - "learning_rate": 1.2913154908308244e-06, - "loss": 0.9637, - "step": 8341 - }, - { - "epoch": 0.6269352171952502, - "grad_norm": 3.289794985288149, - "learning_rate": 1.2908602580053836e-06, - "loss": 0.9038, - "step": 8342 - }, - { - "epoch": 0.6270103712610853, - "grad_norm": 1.878495708827219, - "learning_rate": 1.2904050671991887e-06, - "loss": 0.8736, - "step": 8343 - }, - { - "epoch": 0.6270855253269202, - "grad_norm": 1.794235214374454, - "learning_rate": 1.2899499184392105e-06, - "loss": 0.8734, - "step": 8344 - }, - { - "epoch": 0.6271606793927551, - "grad_norm": 1.9657294095545106, - "learning_rate": 1.2894948117524188e-06, - "loss": 0.8157, - "step": 8345 - }, - { - "epoch": 0.6272358334585901, - "grad_norm": 1.3064424966127945, - "learning_rate": 1.2890397471657802e-06, - "loss": 1.0341, - "step": 8346 - }, - { - "epoch": 0.627310987524425, - "grad_norm": 1.5619988973078822, - "learning_rate": 1.2885847247062587e-06, - "loss": 1.0492, - "step": 8347 - }, - { - "epoch": 0.6273861415902601, - "grad_norm": 1.3026620367232096, - "learning_rate": 1.2881297444008165e-06, - "loss": 1.0062, - "step": 8348 - }, - { - "epoch": 0.627461295656095, - "grad_norm": 2.2944574413696035, - "learning_rate": 1.2876748062764127e-06, - "loss": 0.9427, - "step": 8349 - }, - { - "epoch": 0.6275364497219299, - "grad_norm": 3.173218541219523, - "learning_rate": 1.2872199103600046e-06, - "loss": 0.9689, - "step": 8350 - }, - { - "epoch": 0.6276116037877649, - "grad_norm": 1.7336517062992418, - "learning_rate": 1.286765056678547e-06, - "loss": 1.0307, - "step": 8351 - }, - { - "epoch": 0.6276867578535998, - "grad_norm": 2.0393394779030865, - "learning_rate": 1.2863102452589893e-06, - "loss": 0.984, - "step": 8352 - }, - { - "epoch": 0.6277619119194349, - "grad_norm": 4.565409947316949, - "learning_rate": 1.2858554761282837e-06, - "loss": 0.9962, - "step": 8353 - }, - { - "epoch": 0.6278370659852698, - "grad_norm": 1.456137129703064, - "learning_rate": 1.2854007493133754e-06, - "loss": 1.0296, - "step": 8354 - }, - { - "epoch": 0.6279122200511048, - "grad_norm": 1.32884475054386, - "learning_rate": 1.2849460648412092e-06, - "loss": 0.9476, - "step": 8355 - }, - { - "epoch": 0.6279873741169397, - "grad_norm": 2.0083666085089105, - "learning_rate": 1.2844914227387266e-06, - "loss": 0.9632, - "step": 8356 - }, - { - "epoch": 0.6280625281827746, - "grad_norm": 2.829187857352343, - "learning_rate": 1.2840368230328672e-06, - "loss": 0.8717, - "step": 8357 - }, - { - "epoch": 0.6281376822486097, - "grad_norm": 1.5677421371854061, - "learning_rate": 1.2835822657505678e-06, - "loss": 1.0667, - "step": 8358 - }, - { - "epoch": 0.6282128363144446, - "grad_norm": 2.4105842494709884, - "learning_rate": 1.2831277509187622e-06, - "loss": 0.9733, - "step": 8359 - }, - { - "epoch": 0.6282879903802796, - "grad_norm": 0.8277070052197626, - "learning_rate": 1.2826732785643826e-06, - "loss": 0.8365, - "step": 8360 - }, - { - "epoch": 0.6283631444461145, - "grad_norm": 1.5170790785796442, - "learning_rate": 1.2822188487143581e-06, - "loss": 0.9181, - "step": 8361 - }, - { - "epoch": 0.6284382985119495, - "grad_norm": 2.0150487504840653, - "learning_rate": 1.2817644613956153e-06, - "loss": 1.0006, - "step": 8362 - }, - { - "epoch": 0.6285134525777845, - "grad_norm": 1.4289477993538455, - "learning_rate": 1.2813101166350786e-06, - "loss": 1.0125, - "step": 8363 - }, - { - "epoch": 0.6285886066436194, - "grad_norm": 1.5826739477463063, - "learning_rate": 1.2808558144596692e-06, - "loss": 0.9377, - "step": 8364 - }, - { - "epoch": 0.6286637607094544, - "grad_norm": 2.301722698243243, - "learning_rate": 1.280401554896307e-06, - "loss": 1.0306, - "step": 8365 - }, - { - "epoch": 0.6287389147752893, - "grad_norm": 5.373354105345251, - "learning_rate": 1.2799473379719077e-06, - "loss": 1.054, - "step": 8366 - }, - { - "epoch": 0.6288140688411243, - "grad_norm": 1.621323608280387, - "learning_rate": 1.2794931637133863e-06, - "loss": 0.9628, - "step": 8367 - }, - { - "epoch": 0.6288892229069593, - "grad_norm": 3.8262623517729057, - "learning_rate": 1.2790390321476542e-06, - "loss": 1.0066, - "step": 8368 - }, - { - "epoch": 0.6289643769727943, - "grad_norm": 2.729115774442125, - "learning_rate": 1.2785849433016198e-06, - "loss": 0.9847, - "step": 8369 - }, - { - "epoch": 0.6290395310386292, - "grad_norm": 2.1049208343914447, - "learning_rate": 1.27813089720219e-06, - "loss": 1.0381, - "step": 8370 - }, - { - "epoch": 0.6291146851044641, - "grad_norm": 1.6439959817034395, - "learning_rate": 1.277676893876268e-06, - "loss": 1.0358, - "step": 8371 - }, - { - "epoch": 0.6291898391702991, - "grad_norm": 2.681958629092791, - "learning_rate": 1.277222933350757e-06, - "loss": 0.9294, - "step": 8372 - }, - { - "epoch": 0.629264993236134, - "grad_norm": 1.5456460251479127, - "learning_rate": 1.2767690156525554e-06, - "loss": 0.8868, - "step": 8373 - }, - { - "epoch": 0.6293401473019691, - "grad_norm": 1.2618707146082944, - "learning_rate": 1.276315140808558e-06, - "loss": 1.0082, - "step": 8374 - }, - { - "epoch": 0.629415301367804, - "grad_norm": 1.6938006432200863, - "learning_rate": 1.27586130884566e-06, - "loss": 0.9559, - "step": 8375 - }, - { - "epoch": 0.6294904554336389, - "grad_norm": 1.9201267012871759, - "learning_rate": 1.275407519790752e-06, - "loss": 1.0677, - "step": 8376 - }, - { - "epoch": 0.6295656094994739, - "grad_norm": 1.6987564334930014, - "learning_rate": 1.2749537736707239e-06, - "loss": 0.9378, - "step": 8377 - }, - { - "epoch": 0.6296407635653088, - "grad_norm": 1.5971281862450302, - "learning_rate": 1.274500070512461e-06, - "loss": 1.0942, - "step": 8378 - }, - { - "epoch": 0.6297159176311439, - "grad_norm": 1.632171139855132, - "learning_rate": 1.2740464103428463e-06, - "loss": 0.9674, - "step": 8379 - }, - { - "epoch": 0.6297910716969788, - "grad_norm": 1.5280301242075824, - "learning_rate": 1.2735927931887625e-06, - "loss": 0.9761, - "step": 8380 - }, - { - "epoch": 0.6298662257628138, - "grad_norm": 1.4844705651289052, - "learning_rate": 1.2731392190770866e-06, - "loss": 0.9138, - "step": 8381 - }, - { - "epoch": 0.6299413798286487, - "grad_norm": 1.6882440551445266, - "learning_rate": 1.2726856880346956e-06, - "loss": 0.999, - "step": 8382 - }, - { - "epoch": 0.6300165338944836, - "grad_norm": 1.8379728033359504, - "learning_rate": 1.2722322000884628e-06, - "loss": 1.0626, - "step": 8383 - }, - { - "epoch": 0.6300916879603187, - "grad_norm": 1.5742156452309255, - "learning_rate": 1.2717787552652585e-06, - "loss": 0.9438, - "step": 8384 - }, - { - "epoch": 0.6301668420261536, - "grad_norm": 3.3680165414200105, - "learning_rate": 1.2713253535919521e-06, - "loss": 0.9266, - "step": 8385 - }, - { - "epoch": 0.6302419960919886, - "grad_norm": 1.8337350824187462, - "learning_rate": 1.2708719950954082e-06, - "loss": 1.0141, - "step": 8386 - }, - { - "epoch": 0.6303171501578235, - "grad_norm": 1.4774669319492948, - "learning_rate": 1.2704186798024913e-06, - "loss": 1.0049, - "step": 8387 - }, - { - "epoch": 0.6303923042236586, - "grad_norm": 0.6975689396577633, - "learning_rate": 1.2699654077400608e-06, - "loss": 0.838, - "step": 8388 - }, - { - "epoch": 0.6304674582894935, - "grad_norm": 2.1043734559024303, - "learning_rate": 1.2695121789349757e-06, - "loss": 0.9725, - "step": 8389 - }, - { - "epoch": 0.6305426123553284, - "grad_norm": 4.265512541801125, - "learning_rate": 1.2690589934140912e-06, - "loss": 0.8369, - "step": 8390 - }, - { - "epoch": 0.6306177664211634, - "grad_norm": 3.163077193952102, - "learning_rate": 1.2686058512042594e-06, - "loss": 0.9408, - "step": 8391 - }, - { - "epoch": 0.6306929204869983, - "grad_norm": 1.5385654634440273, - "learning_rate": 1.268152752332333e-06, - "loss": 0.9917, - "step": 8392 - }, - { - "epoch": 0.6307680745528333, - "grad_norm": 1.5487100524922908, - "learning_rate": 1.2676996968251574e-06, - "loss": 0.9967, - "step": 8393 - }, - { - "epoch": 0.6308432286186683, - "grad_norm": 2.002872430884142, - "learning_rate": 1.2672466847095793e-06, - "loss": 1.0567, - "step": 8394 - }, - { - "epoch": 0.6309183826845032, - "grad_norm": 2.0524563006675445, - "learning_rate": 1.2667937160124416e-06, - "loss": 0.9841, - "step": 8395 - }, - { - "epoch": 0.6309935367503382, - "grad_norm": 1.988216736352063, - "learning_rate": 1.266340790760583e-06, - "loss": 0.9354, - "step": 8396 - }, - { - "epoch": 0.6310686908161731, - "grad_norm": 1.7790854838280539, - "learning_rate": 1.2658879089808423e-06, - "loss": 1.0736, - "step": 8397 - }, - { - "epoch": 0.6311438448820081, - "grad_norm": 1.9264611210566884, - "learning_rate": 1.2654350707000538e-06, - "loss": 1.0222, - "step": 8398 - }, - { - "epoch": 0.6312189989478431, - "grad_norm": 1.9080255088020652, - "learning_rate": 1.264982275945051e-06, - "loss": 0.9779, - "step": 8399 - }, - { - "epoch": 0.6312941530136781, - "grad_norm": 1.7684403650584508, - "learning_rate": 1.2645295247426625e-06, - "loss": 1.0175, - "step": 8400 - }, - { - "epoch": 0.631369307079513, - "grad_norm": 2.201300440421026, - "learning_rate": 1.2640768171197156e-06, - "loss": 0.9676, - "step": 8401 - }, - { - "epoch": 0.6314444611453479, - "grad_norm": 1.5034029824531165, - "learning_rate": 1.2636241531030355e-06, - "loss": 1.0087, - "step": 8402 - }, - { - "epoch": 0.6315196152111829, - "grad_norm": 1.748174669080439, - "learning_rate": 1.263171532719444e-06, - "loss": 0.9603, - "step": 8403 - }, - { - "epoch": 0.6315947692770179, - "grad_norm": 0.6439194058224703, - "learning_rate": 1.2627189559957612e-06, - "loss": 0.8652, - "step": 8404 - }, - { - "epoch": 0.6316699233428529, - "grad_norm": 2.615065804960962, - "learning_rate": 1.2622664229588033e-06, - "loss": 0.8685, - "step": 8405 - }, - { - "epoch": 0.6317450774086878, - "grad_norm": 1.9924402413207718, - "learning_rate": 1.2618139336353846e-06, - "loss": 1.0079, - "step": 8406 - }, - { - "epoch": 0.6318202314745228, - "grad_norm": 1.7268909815056697, - "learning_rate": 1.2613614880523172e-06, - "loss": 0.9074, - "step": 8407 - }, - { - "epoch": 0.6318953855403577, - "grad_norm": 1.6594710787384737, - "learning_rate": 1.2609090862364099e-06, - "loss": 0.9224, - "step": 8408 - }, - { - "epoch": 0.6319705396061926, - "grad_norm": 1.7855222230894015, - "learning_rate": 1.2604567282144696e-06, - "loss": 0.9673, - "step": 8409 - }, - { - "epoch": 0.6320456936720277, - "grad_norm": 1.4845349677372273, - "learning_rate": 1.2600044140132994e-06, - "loss": 0.9776, - "step": 8410 - }, - { - "epoch": 0.6321208477378626, - "grad_norm": 1.46349840564074, - "learning_rate": 1.259552143659702e-06, - "loss": 1.0574, - "step": 8411 - }, - { - "epoch": 0.6321960018036976, - "grad_norm": 0.6469092917452123, - "learning_rate": 1.2590999171804758e-06, - "loss": 0.8292, - "step": 8412 - }, - { - "epoch": 0.6322711558695325, - "grad_norm": 1.5233061122479417, - "learning_rate": 1.2586477346024154e-06, - "loss": 0.8266, - "step": 8413 - }, - { - "epoch": 0.6323463099353676, - "grad_norm": 1.5850381039151826, - "learning_rate": 1.258195595952317e-06, - "loss": 0.9478, - "step": 8414 - }, - { - "epoch": 0.6324214640012025, - "grad_norm": 2.0293808773743978, - "learning_rate": 1.2577435012569684e-06, - "loss": 1.0404, - "step": 8415 - }, - { - "epoch": 0.6324966180670374, - "grad_norm": 1.8000620566373153, - "learning_rate": 1.2572914505431609e-06, - "loss": 1.0692, - "step": 8416 - }, - { - "epoch": 0.6325717721328724, - "grad_norm": 1.572851529151597, - "learning_rate": 1.2568394438376788e-06, - "loss": 0.9236, - "step": 8417 - }, - { - "epoch": 0.6326469261987073, - "grad_norm": 1.7902823874361038, - "learning_rate": 1.2563874811673053e-06, - "loss": 0.9857, - "step": 8418 - }, - { - "epoch": 0.6327220802645424, - "grad_norm": 1.5820421006572338, - "learning_rate": 1.2559355625588208e-06, - "loss": 0.9606, - "step": 8419 - }, - { - "epoch": 0.6327972343303773, - "grad_norm": 1.8333248297176241, - "learning_rate": 1.2554836880390033e-06, - "loss": 0.9642, - "step": 8420 - }, - { - "epoch": 0.6328723883962122, - "grad_norm": 1.5674730886920527, - "learning_rate": 1.2550318576346287e-06, - "loss": 0.949, - "step": 8421 - }, - { - "epoch": 0.6329475424620472, - "grad_norm": 1.6007931233641075, - "learning_rate": 1.2545800713724694e-06, - "loss": 1.0431, - "step": 8422 - }, - { - "epoch": 0.6330226965278821, - "grad_norm": 2.3658690353159546, - "learning_rate": 1.2541283292792949e-06, - "loss": 0.9043, - "step": 8423 - }, - { - "epoch": 0.6330978505937171, - "grad_norm": 1.3710713180924086, - "learning_rate": 1.2536766313818732e-06, - "loss": 0.9967, - "step": 8424 - }, - { - "epoch": 0.6331730046595521, - "grad_norm": 2.009182355079278, - "learning_rate": 1.2532249777069686e-06, - "loss": 0.9978, - "step": 8425 - }, - { - "epoch": 0.6332481587253871, - "grad_norm": 1.9928195922974208, - "learning_rate": 1.252773368281344e-06, - "loss": 1.1121, - "step": 8426 - }, - { - "epoch": 0.633323312791222, - "grad_norm": 1.7928956791899884, - "learning_rate": 1.2523218031317586e-06, - "loss": 1.1068, - "step": 8427 - }, - { - "epoch": 0.6333984668570569, - "grad_norm": 2.013470229848357, - "learning_rate": 1.2518702822849694e-06, - "loss": 0.9466, - "step": 8428 - }, - { - "epoch": 0.633473620922892, - "grad_norm": 1.5881670233861245, - "learning_rate": 1.2514188057677309e-06, - "loss": 0.8907, - "step": 8429 - }, - { - "epoch": 0.6335487749887269, - "grad_norm": 1.5185404786061534, - "learning_rate": 1.250967373606794e-06, - "loss": 0.9249, - "step": 8430 - }, - { - "epoch": 0.6336239290545619, - "grad_norm": 1.9913573685966341, - "learning_rate": 1.2505159858289092e-06, - "loss": 1.0011, - "step": 8431 - }, - { - "epoch": 0.6336990831203968, - "grad_norm": 1.7619029285048247, - "learning_rate": 1.2500646424608217e-06, - "loss": 0.9344, - "step": 8432 - }, - { - "epoch": 0.6337742371862318, - "grad_norm": 1.7122782152891962, - "learning_rate": 1.2496133435292762e-06, - "loss": 0.9265, - "step": 8433 - }, - { - "epoch": 0.6338493912520667, - "grad_norm": 0.722789978850751, - "learning_rate": 1.2491620890610135e-06, - "loss": 0.8122, - "step": 8434 - }, - { - "epoch": 0.6339245453179017, - "grad_norm": 1.595356972548084, - "learning_rate": 1.2487108790827714e-06, - "loss": 0.9442, - "step": 8435 - }, - { - "epoch": 0.6339996993837367, - "grad_norm": 1.9546186915300294, - "learning_rate": 1.2482597136212877e-06, - "loss": 0.9861, - "step": 8436 - }, - { - "epoch": 0.6340748534495716, - "grad_norm": 1.7512083650114731, - "learning_rate": 1.2478085927032935e-06, - "loss": 1.0152, - "step": 8437 - }, - { - "epoch": 0.6341500075154066, - "grad_norm": 8.276735392344182, - "learning_rate": 1.2473575163555215e-06, - "loss": 0.9929, - "step": 8438 - }, - { - "epoch": 0.6342251615812415, - "grad_norm": 0.7955094773658884, - "learning_rate": 1.2469064846046986e-06, - "loss": 0.9172, - "step": 8439 - }, - { - "epoch": 0.6343003156470765, - "grad_norm": 1.6884889177495834, - "learning_rate": 1.2464554974775496e-06, - "loss": 0.9772, - "step": 8440 - }, - { - "epoch": 0.6343754697129115, - "grad_norm": 2.2547394321564935, - "learning_rate": 1.2460045550007985e-06, - "loss": 0.9667, - "step": 8441 - }, - { - "epoch": 0.6344506237787464, - "grad_norm": 4.115033681182406, - "learning_rate": 1.2455536572011643e-06, - "loss": 0.9876, - "step": 8442 - }, - { - "epoch": 0.6345257778445814, - "grad_norm": 1.993667531475336, - "learning_rate": 1.2451028041053656e-06, - "loss": 0.9883, - "step": 8443 - }, - { - "epoch": 0.6346009319104163, - "grad_norm": 1.9463713801269893, - "learning_rate": 1.2446519957401157e-06, - "loss": 0.9152, - "step": 8444 - }, - { - "epoch": 0.6346760859762514, - "grad_norm": 2.2799977039860995, - "learning_rate": 1.2442012321321277e-06, - "loss": 0.9431, - "step": 8445 - }, - { - "epoch": 0.6347512400420863, - "grad_norm": 1.5842940360605866, - "learning_rate": 1.2437505133081112e-06, - "loss": 0.9513, - "step": 8446 - }, - { - "epoch": 0.6348263941079212, - "grad_norm": 0.7955927911368538, - "learning_rate": 1.2432998392947723e-06, - "loss": 0.8329, - "step": 8447 - }, - { - "epoch": 0.6349015481737562, - "grad_norm": 1.9700542549949993, - "learning_rate": 1.2428492101188156e-06, - "loss": 1.0912, - "step": 8448 - }, - { - "epoch": 0.6349767022395911, - "grad_norm": 2.9403461709224645, - "learning_rate": 1.2423986258069428e-06, - "loss": 0.9412, - "step": 8449 - }, - { - "epoch": 0.6350518563054262, - "grad_norm": 2.359805844612556, - "learning_rate": 1.241948086385852e-06, - "loss": 1.0022, - "step": 8450 - }, - { - "epoch": 0.6351270103712611, - "grad_norm": 0.6837402622708074, - "learning_rate": 1.24149759188224e-06, - "loss": 0.8469, - "step": 8451 - }, - { - "epoch": 0.6352021644370961, - "grad_norm": 0.7346144346756465, - "learning_rate": 1.2410471423228002e-06, - "loss": 0.8468, - "step": 8452 - }, - { - "epoch": 0.635277318502931, - "grad_norm": 1.5974567913725004, - "learning_rate": 1.2405967377342236e-06, - "loss": 0.8416, - "step": 8453 - }, - { - "epoch": 0.6353524725687659, - "grad_norm": 1.5768652694226533, - "learning_rate": 1.2401463781431974e-06, - "loss": 0.9903, - "step": 8454 - }, - { - "epoch": 0.635427626634601, - "grad_norm": 1.516868160123929, - "learning_rate": 1.2396960635764093e-06, - "loss": 1.0347, - "step": 8455 - }, - { - "epoch": 0.6355027807004359, - "grad_norm": 1.5145522578364634, - "learning_rate": 1.2392457940605402e-06, - "loss": 0.8997, - "step": 8456 - }, - { - "epoch": 0.6355779347662709, - "grad_norm": 1.4353233941958528, - "learning_rate": 1.2387955696222702e-06, - "loss": 1.0037, - "step": 8457 - }, - { - "epoch": 0.6356530888321058, - "grad_norm": 1.6003993579048026, - "learning_rate": 1.2383453902882787e-06, - "loss": 1.0839, - "step": 8458 - }, - { - "epoch": 0.6357282428979408, - "grad_norm": 2.1970362293449326, - "learning_rate": 1.2378952560852386e-06, - "loss": 1.0276, - "step": 8459 - }, - { - "epoch": 0.6358033969637757, - "grad_norm": 1.5296792518151765, - "learning_rate": 1.2374451670398233e-06, - "loss": 0.9371, - "step": 8460 - }, - { - "epoch": 0.6358785510296107, - "grad_norm": 1.6141943760457838, - "learning_rate": 1.236995123178702e-06, - "loss": 1.0026, - "step": 8461 - }, - { - "epoch": 0.6359537050954457, - "grad_norm": 1.7360287591687913, - "learning_rate": 1.2365451245285413e-06, - "loss": 0.9516, - "step": 8462 - }, - { - "epoch": 0.6360288591612806, - "grad_norm": 1.5828902561561422, - "learning_rate": 1.2360951711160055e-06, - "loss": 0.9772, - "step": 8463 - }, - { - "epoch": 0.6361040132271156, - "grad_norm": 1.711936902657104, - "learning_rate": 1.2356452629677554e-06, - "loss": 0.9955, - "step": 8464 - }, - { - "epoch": 0.6361791672929505, - "grad_norm": 2.1318831322793623, - "learning_rate": 1.235195400110451e-06, - "loss": 1.036, - "step": 8465 - }, - { - "epoch": 0.6362543213587855, - "grad_norm": 1.6336986564618097, - "learning_rate": 1.2347455825707477e-06, - "loss": 0.816, - "step": 8466 - }, - { - "epoch": 0.6363294754246205, - "grad_norm": 1.5238289354640484, - "learning_rate": 1.2342958103752987e-06, - "loss": 0.9111, - "step": 8467 - }, - { - "epoch": 0.6364046294904554, - "grad_norm": 1.6312126827721662, - "learning_rate": 1.2338460835507554e-06, - "loss": 0.9427, - "step": 8468 - }, - { - "epoch": 0.6364797835562904, - "grad_norm": 2.0590488574996955, - "learning_rate": 1.233396402123765e-06, - "loss": 1.0225, - "step": 8469 - }, - { - "epoch": 0.6365549376221253, - "grad_norm": 1.6722418837097273, - "learning_rate": 1.2329467661209738e-06, - "loss": 1.018, - "step": 8470 - }, - { - "epoch": 0.6366300916879604, - "grad_norm": 2.1421991950341206, - "learning_rate": 1.232497175569024e-06, - "loss": 0.9136, - "step": 8471 - }, - { - "epoch": 0.6367052457537953, - "grad_norm": 1.7660384242715157, - "learning_rate": 1.2320476304945548e-06, - "loss": 0.9513, - "step": 8472 - }, - { - "epoch": 0.6367803998196302, - "grad_norm": 1.645360017472519, - "learning_rate": 1.2315981309242046e-06, - "loss": 0.9202, - "step": 8473 - }, - { - "epoch": 0.6368555538854652, - "grad_norm": 1.757272120172783, - "learning_rate": 1.2311486768846075e-06, - "loss": 0.9522, - "step": 8474 - }, - { - "epoch": 0.6369307079513001, - "grad_norm": 1.7636498400218872, - "learning_rate": 1.2306992684023955e-06, - "loss": 0.9251, - "step": 8475 - }, - { - "epoch": 0.6370058620171352, - "grad_norm": 0.7506602083535909, - "learning_rate": 1.230249905504197e-06, - "loss": 0.8005, - "step": 8476 - }, - { - "epoch": 0.6370810160829701, - "grad_norm": 1.818485964456871, - "learning_rate": 1.2298005882166406e-06, - "loss": 0.9816, - "step": 8477 - }, - { - "epoch": 0.6371561701488051, - "grad_norm": 3.5500439171022227, - "learning_rate": 1.229351316566348e-06, - "loss": 0.9917, - "step": 8478 - }, - { - "epoch": 0.63723132421464, - "grad_norm": 1.5820399154614144, - "learning_rate": 1.2289020905799401e-06, - "loss": 0.9412, - "step": 8479 - }, - { - "epoch": 0.6373064782804749, - "grad_norm": 2.8730933459871886, - "learning_rate": 1.2284529102840369e-06, - "loss": 1.0383, - "step": 8480 - }, - { - "epoch": 0.63738163234631, - "grad_norm": 0.784443242117465, - "learning_rate": 1.2280037757052527e-06, - "loss": 0.8722, - "step": 8481 - }, - { - "epoch": 0.6374567864121449, - "grad_norm": 1.7880466692293473, - "learning_rate": 1.2275546868702017e-06, - "loss": 0.9655, - "step": 8482 - }, - { - "epoch": 0.6375319404779799, - "grad_norm": 2.1191590761005497, - "learning_rate": 1.2271056438054933e-06, - "loss": 0.9061, - "step": 8483 - }, - { - "epoch": 0.6376070945438148, - "grad_norm": 1.849151844769225, - "learning_rate": 1.2266566465377343e-06, - "loss": 0.9423, - "step": 8484 - }, - { - "epoch": 0.6376822486096497, - "grad_norm": 1.7582312869189825, - "learning_rate": 1.2262076950935311e-06, - "loss": 0.9242, - "step": 8485 - }, - { - "epoch": 0.6377574026754848, - "grad_norm": 2.5752831719997524, - "learning_rate": 1.2257587894994842e-06, - "loss": 0.9384, - "step": 8486 - }, - { - "epoch": 0.6378325567413197, - "grad_norm": 1.535327697964418, - "learning_rate": 1.2253099297821948e-06, - "loss": 1.0271, - "step": 8487 - }, - { - "epoch": 0.6379077108071547, - "grad_norm": 4.685344963604976, - "learning_rate": 1.2248611159682582e-06, - "loss": 0.9341, - "step": 8488 - }, - { - "epoch": 0.6379828648729896, - "grad_norm": 2.055490779142453, - "learning_rate": 1.2244123480842685e-06, - "loss": 1.0211, - "step": 8489 - }, - { - "epoch": 0.6380580189388246, - "grad_norm": 3.289348381466792, - "learning_rate": 1.2239636261568174e-06, - "loss": 0.9291, - "step": 8490 - }, - { - "epoch": 0.6381331730046595, - "grad_norm": 2.459273487211193, - "learning_rate": 1.2235149502124924e-06, - "loss": 0.9881, - "step": 8491 - }, - { - "epoch": 0.6382083270704945, - "grad_norm": 3.4991687059922683, - "learning_rate": 1.2230663202778806e-06, - "loss": 0.9686, - "step": 8492 - }, - { - "epoch": 0.6382834811363295, - "grad_norm": 1.683707744518453, - "learning_rate": 1.2226177363795645e-06, - "loss": 1.0329, - "step": 8493 - }, - { - "epoch": 0.6383586352021644, - "grad_norm": 2.848663444995137, - "learning_rate": 1.2221691985441238e-06, - "loss": 0.9186, - "step": 8494 - }, - { - "epoch": 0.6384337892679994, - "grad_norm": 1.6564025808624228, - "learning_rate": 1.221720706798137e-06, - "loss": 1.0024, - "step": 8495 - }, - { - "epoch": 0.6385089433338343, - "grad_norm": 0.8326088776286733, - "learning_rate": 1.221272261168178e-06, - "loss": 0.8662, - "step": 8496 - }, - { - "epoch": 0.6385840973996694, - "grad_norm": 1.5307606284884474, - "learning_rate": 1.2208238616808202e-06, - "loss": 0.9291, - "step": 8497 - }, - { - "epoch": 0.6386592514655043, - "grad_norm": 1.7394937808196187, - "learning_rate": 1.2203755083626312e-06, - "loss": 0.9029, - "step": 8498 - }, - { - "epoch": 0.6387344055313392, - "grad_norm": 1.7927418816674832, - "learning_rate": 1.21992720124018e-06, - "loss": 1.0219, - "step": 8499 - }, - { - "epoch": 0.6388095595971742, - "grad_norm": 1.4923114026142252, - "learning_rate": 1.2194789403400289e-06, - "loss": 0.8966, - "step": 8500 - }, - { - "epoch": 0.6388847136630091, - "grad_norm": 2.048041324436976, - "learning_rate": 1.2190307256887384e-06, - "loss": 0.9674, - "step": 8501 - }, - { - "epoch": 0.6389598677288442, - "grad_norm": 1.934384640868386, - "learning_rate": 1.218582557312869e-06, - "loss": 0.897, - "step": 8502 - }, - { - "epoch": 0.6390350217946791, - "grad_norm": 3.0858719830861574, - "learning_rate": 1.2181344352389746e-06, - "loss": 1.022, - "step": 8503 - }, - { - "epoch": 0.6391101758605141, - "grad_norm": 1.7050393221395552, - "learning_rate": 1.2176863594936095e-06, - "loss": 0.9126, - "step": 8504 - }, - { - "epoch": 0.639185329926349, - "grad_norm": 1.751977280330098, - "learning_rate": 1.2172383301033233e-06, - "loss": 1.0232, - "step": 8505 - }, - { - "epoch": 0.6392604839921839, - "grad_norm": 1.5647642133499657, - "learning_rate": 1.216790347094663e-06, - "loss": 1.0475, - "step": 8506 - }, - { - "epoch": 0.639335638058019, - "grad_norm": 2.850719254066056, - "learning_rate": 1.2163424104941743e-06, - "loss": 0.9426, - "step": 8507 - }, - { - "epoch": 0.6394107921238539, - "grad_norm": 1.5431732428523521, - "learning_rate": 1.215894520328398e-06, - "loss": 0.9481, - "step": 8508 - }, - { - "epoch": 0.6394859461896889, - "grad_norm": 1.8045933025927063, - "learning_rate": 1.2154466766238742e-06, - "loss": 1.0026, - "step": 8509 - }, - { - "epoch": 0.6395611002555238, - "grad_norm": 1.5175454468811171, - "learning_rate": 1.2149988794071392e-06, - "loss": 0.9336, - "step": 8510 - }, - { - "epoch": 0.6396362543213587, - "grad_norm": 1.7675848017613884, - "learning_rate": 1.214551128704726e-06, - "loss": 1.0251, - "step": 8511 - }, - { - "epoch": 0.6397114083871938, - "grad_norm": 1.7032875193477095, - "learning_rate": 1.214103424543167e-06, - "loss": 0.9837, - "step": 8512 - }, - { - "epoch": 0.6397865624530287, - "grad_norm": 1.6794759927699257, - "learning_rate": 1.2136557669489886e-06, - "loss": 0.9728, - "step": 8513 - }, - { - "epoch": 0.6398617165188637, - "grad_norm": 1.703308865482799, - "learning_rate": 1.2132081559487177e-06, - "loss": 0.9122, - "step": 8514 - }, - { - "epoch": 0.6399368705846986, - "grad_norm": 2.3149603050569265, - "learning_rate": 1.2127605915688764e-06, - "loss": 1.0163, - "step": 8515 - }, - { - "epoch": 0.6400120246505336, - "grad_norm": 0.7541076312133389, - "learning_rate": 1.2123130738359842e-06, - "loss": 0.8689, - "step": 8516 - }, - { - "epoch": 0.6400871787163686, - "grad_norm": 1.2558121027140159, - "learning_rate": 1.2118656027765591e-06, - "loss": 0.9538, - "step": 8517 - }, - { - "epoch": 0.6401623327822035, - "grad_norm": 1.7026967901469232, - "learning_rate": 1.2114181784171144e-06, - "loss": 0.9424, - "step": 8518 - }, - { - "epoch": 0.6402374868480385, - "grad_norm": 1.5238398876386487, - "learning_rate": 1.2109708007841629e-06, - "loss": 0.9919, - "step": 8519 - }, - { - "epoch": 0.6403126409138734, - "grad_norm": 1.7181144926624798, - "learning_rate": 1.2105234699042117e-06, - "loss": 0.9752, - "step": 8520 - }, - { - "epoch": 0.6403877949797084, - "grad_norm": 3.2888121158500097, - "learning_rate": 1.2100761858037692e-06, - "loss": 1.026, - "step": 8521 - }, - { - "epoch": 0.6404629490455434, - "grad_norm": 1.7196059436511697, - "learning_rate": 1.2096289485093379e-06, - "loss": 0.9836, - "step": 8522 - }, - { - "epoch": 0.6405381031113784, - "grad_norm": 2.0561230652191447, - "learning_rate": 1.2091817580474164e-06, - "loss": 0.9123, - "step": 8523 - }, - { - "epoch": 0.6406132571772133, - "grad_norm": 1.5975439503195255, - "learning_rate": 1.2087346144445053e-06, - "loss": 0.9607, - "step": 8524 - }, - { - "epoch": 0.6406884112430482, - "grad_norm": 1.8963665753756325, - "learning_rate": 1.2082875177270974e-06, - "loss": 0.8994, - "step": 8525 - }, - { - "epoch": 0.6407635653088832, - "grad_norm": 1.669894605268937, - "learning_rate": 1.2078404679216862e-06, - "loss": 0.9671, - "step": 8526 - }, - { - "epoch": 0.6408387193747181, - "grad_norm": 1.964529569772742, - "learning_rate": 1.207393465054761e-06, - "loss": 1.0405, - "step": 8527 - }, - { - "epoch": 0.6409138734405532, - "grad_norm": 1.9729362666745267, - "learning_rate": 1.2069465091528074e-06, - "loss": 0.9344, - "step": 8528 - }, - { - "epoch": 0.6409890275063881, - "grad_norm": 2.321043676390253, - "learning_rate": 1.2064996002423105e-06, - "loss": 0.9778, - "step": 8529 - }, - { - "epoch": 0.641064181572223, - "grad_norm": 1.5572192410199217, - "learning_rate": 1.2060527383497501e-06, - "loss": 0.9736, - "step": 8530 - }, - { - "epoch": 0.641139335638058, - "grad_norm": 2.082490623425535, - "learning_rate": 1.2056059235016056e-06, - "loss": 1.0094, - "step": 8531 - }, - { - "epoch": 0.6412144897038929, - "grad_norm": 2.1235611476083838, - "learning_rate": 1.2051591557243526e-06, - "loss": 0.9944, - "step": 8532 - }, - { - "epoch": 0.641289643769728, - "grad_norm": 1.9659725154923053, - "learning_rate": 1.2047124350444624e-06, - "loss": 0.9206, - "step": 8533 - }, - { - "epoch": 0.6413647978355629, - "grad_norm": 0.7181194690992677, - "learning_rate": 1.2042657614884062e-06, - "loss": 0.8776, - "step": 8534 - }, - { - "epoch": 0.6414399519013979, - "grad_norm": 1.4997479703889147, - "learning_rate": 1.2038191350826506e-06, - "loss": 0.9534, - "step": 8535 - }, - { - "epoch": 0.6415151059672328, - "grad_norm": 2.3852388629419434, - "learning_rate": 1.20337255585366e-06, - "loss": 0.8297, - "step": 8536 - }, - { - "epoch": 0.6415902600330677, - "grad_norm": 1.5009702087942947, - "learning_rate": 1.2029260238278962e-06, - "loss": 0.9584, - "step": 8537 - }, - { - "epoch": 0.6416654140989028, - "grad_norm": 1.2737165742031629, - "learning_rate": 1.2024795390318172e-06, - "loss": 0.9228, - "step": 8538 - }, - { - "epoch": 0.6417405681647377, - "grad_norm": 1.8096858572882455, - "learning_rate": 1.2020331014918799e-06, - "loss": 0.9881, - "step": 8539 - }, - { - "epoch": 0.6418157222305727, - "grad_norm": 1.1914637817500144, - "learning_rate": 1.2015867112345367e-06, - "loss": 0.9933, - "step": 8540 - }, - { - "epoch": 0.6418908762964076, - "grad_norm": 3.528101910362679, - "learning_rate": 1.2011403682862384e-06, - "loss": 0.9937, - "step": 8541 - }, - { - "epoch": 0.6419660303622426, - "grad_norm": 1.760579419559971, - "learning_rate": 1.2006940726734315e-06, - "loss": 1.0228, - "step": 8542 - }, - { - "epoch": 0.6420411844280776, - "grad_norm": 1.6903172041802332, - "learning_rate": 1.2002478244225623e-06, - "loss": 0.9006, - "step": 8543 - }, - { - "epoch": 0.6421163384939125, - "grad_norm": 8.430591156528417, - "learning_rate": 1.1998016235600726e-06, - "loss": 1.0097, - "step": 8544 - }, - { - "epoch": 0.6421914925597475, - "grad_norm": 1.7347311393129208, - "learning_rate": 1.1993554701123993e-06, - "loss": 0.8901, - "step": 8545 - }, - { - "epoch": 0.6422666466255824, - "grad_norm": 1.8671137583707609, - "learning_rate": 1.1989093641059813e-06, - "loss": 1.0645, - "step": 8546 - }, - { - "epoch": 0.6423418006914174, - "grad_norm": 1.6866908075969729, - "learning_rate": 1.1984633055672508e-06, - "loss": 0.9175, - "step": 8547 - }, - { - "epoch": 0.6424169547572524, - "grad_norm": 1.4678520541986295, - "learning_rate": 1.1980172945226389e-06, - "loss": 1.0237, - "step": 8548 - }, - { - "epoch": 0.6424921088230874, - "grad_norm": 1.5259692160774863, - "learning_rate": 1.1975713309985732e-06, - "loss": 1.0685, - "step": 8549 - }, - { - "epoch": 0.6425672628889223, - "grad_norm": 1.927417238192864, - "learning_rate": 1.1971254150214788e-06, - "loss": 0.8357, - "step": 8550 - }, - { - "epoch": 0.6426424169547572, - "grad_norm": 2.0706851264014516, - "learning_rate": 1.1966795466177782e-06, - "loss": 1.067, - "step": 8551 - }, - { - "epoch": 0.6427175710205922, - "grad_norm": 1.9209754513388781, - "learning_rate": 1.1962337258138902e-06, - "loss": 0.9491, - "step": 8552 - }, - { - "epoch": 0.6427927250864272, - "grad_norm": 1.5105479830078608, - "learning_rate": 1.1957879526362323e-06, - "loss": 0.9074, - "step": 8553 - }, - { - "epoch": 0.6428678791522622, - "grad_norm": 2.6897664826015126, - "learning_rate": 1.1953422271112175e-06, - "loss": 0.9688, - "step": 8554 - }, - { - "epoch": 0.6429430332180971, - "grad_norm": 1.7485903102993294, - "learning_rate": 1.1948965492652565e-06, - "loss": 0.8632, - "step": 8555 - }, - { - "epoch": 0.643018187283932, - "grad_norm": 1.788680323130744, - "learning_rate": 1.1944509191247585e-06, - "loss": 1.0263, - "step": 8556 - }, - { - "epoch": 0.643093341349767, - "grad_norm": 1.9043892079979188, - "learning_rate": 1.1940053367161278e-06, - "loss": 0.9537, - "step": 8557 - }, - { - "epoch": 0.643168495415602, - "grad_norm": 1.9457748660248828, - "learning_rate": 1.1935598020657676e-06, - "loss": 0.8892, - "step": 8558 - }, - { - "epoch": 0.643243649481437, - "grad_norm": 2.379343077292905, - "learning_rate": 1.193114315200077e-06, - "loss": 0.9825, - "step": 8559 - }, - { - "epoch": 0.6433188035472719, - "grad_norm": 1.7430835694607134, - "learning_rate": 1.1926688761454531e-06, - "loss": 0.9074, - "step": 8560 - }, - { - "epoch": 0.6433939576131069, - "grad_norm": 2.103945377409558, - "learning_rate": 1.1922234849282897e-06, - "loss": 0.9515, - "step": 8561 - }, - { - "epoch": 0.6434691116789418, - "grad_norm": 1.7055568307104876, - "learning_rate": 1.1917781415749774e-06, - "loss": 1.1178, - "step": 8562 - }, - { - "epoch": 0.6435442657447767, - "grad_norm": 1.8843319408754182, - "learning_rate": 1.1913328461119062e-06, - "loss": 1.0233, - "step": 8563 - }, - { - "epoch": 0.6436194198106118, - "grad_norm": 1.8628146629020166, - "learning_rate": 1.1908875985654593e-06, - "loss": 0.9257, - "step": 8564 - }, - { - "epoch": 0.6436945738764467, - "grad_norm": 1.6930843072664281, - "learning_rate": 1.1904423989620216e-06, - "loss": 1.0008, - "step": 8565 - }, - { - "epoch": 0.6437697279422817, - "grad_norm": 1.992296402890447, - "learning_rate": 1.1899972473279713e-06, - "loss": 0.9484, - "step": 8566 - }, - { - "epoch": 0.6438448820081166, - "grad_norm": 1.6879666707468262, - "learning_rate": 1.1895521436896857e-06, - "loss": 0.9646, - "step": 8567 - }, - { - "epoch": 0.6439200360739517, - "grad_norm": 1.922374071280361, - "learning_rate": 1.1891070880735395e-06, - "loss": 1.0684, - "step": 8568 - }, - { - "epoch": 0.6439951901397866, - "grad_norm": 1.8861385740784877, - "learning_rate": 1.1886620805059027e-06, - "loss": 0.9261, - "step": 8569 - }, - { - "epoch": 0.6440703442056215, - "grad_norm": 1.2951055198009247, - "learning_rate": 1.1882171210131452e-06, - "loss": 0.9574, - "step": 8570 - }, - { - "epoch": 0.6441454982714565, - "grad_norm": 1.8303455006830616, - "learning_rate": 1.1877722096216313e-06, - "loss": 0.9023, - "step": 8571 - }, - { - "epoch": 0.6442206523372914, - "grad_norm": 12.64625256182328, - "learning_rate": 1.187327346357724e-06, - "loss": 0.916, - "step": 8572 - }, - { - "epoch": 0.6442958064031264, - "grad_norm": 1.521157696069959, - "learning_rate": 1.186882531247784e-06, - "loss": 0.9395, - "step": 8573 - }, - { - "epoch": 0.6443709604689614, - "grad_norm": 1.2440197465224794, - "learning_rate": 1.1864377643181671e-06, - "loss": 0.915, - "step": 8574 - }, - { - "epoch": 0.6444461145347963, - "grad_norm": 1.9002960902946637, - "learning_rate": 1.1859930455952283e-06, - "loss": 0.8915, - "step": 8575 - }, - { - "epoch": 0.6445212686006313, - "grad_norm": 2.3267847081857553, - "learning_rate": 1.185548375105319e-06, - "loss": 1.0674, - "step": 8576 - }, - { - "epoch": 0.6445964226664662, - "grad_norm": 1.7399887099941145, - "learning_rate": 1.1851037528747863e-06, - "loss": 0.9433, - "step": 8577 - }, - { - "epoch": 0.6446715767323012, - "grad_norm": 2.0585708182575937, - "learning_rate": 1.1846591789299774e-06, - "loss": 1.0025, - "step": 8578 - }, - { - "epoch": 0.6447467307981362, - "grad_norm": 1.4971484259067123, - "learning_rate": 1.184214653297234e-06, - "loss": 1.0038, - "step": 8579 - }, - { - "epoch": 0.6448218848639712, - "grad_norm": 2.294929147262414, - "learning_rate": 1.1837701760028962e-06, - "loss": 0.995, - "step": 8580 - }, - { - "epoch": 0.6448970389298061, - "grad_norm": 1.4839509860321947, - "learning_rate": 1.1833257470733013e-06, - "loss": 0.9596, - "step": 8581 - }, - { - "epoch": 0.644972192995641, - "grad_norm": 1.9038712715501627, - "learning_rate": 1.1828813665347828e-06, - "loss": 0.9403, - "step": 8582 - }, - { - "epoch": 0.645047347061476, - "grad_norm": 1.9442928966622994, - "learning_rate": 1.1824370344136724e-06, - "loss": 0.9443, - "step": 8583 - }, - { - "epoch": 0.645122501127311, - "grad_norm": 2.4597685444502386, - "learning_rate": 1.181992750736298e-06, - "loss": 1.0103, - "step": 8584 - }, - { - "epoch": 0.645197655193146, - "grad_norm": 0.7794671506022242, - "learning_rate": 1.1815485155289864e-06, - "loss": 0.9427, - "step": 8585 - }, - { - "epoch": 0.6452728092589809, - "grad_norm": 0.7577255828407118, - "learning_rate": 1.1811043288180583e-06, - "loss": 0.8117, - "step": 8586 - }, - { - "epoch": 0.6453479633248159, - "grad_norm": 1.9780563921126737, - "learning_rate": 1.180660190629835e-06, - "loss": 0.9511, - "step": 8587 - }, - { - "epoch": 0.6454231173906508, - "grad_norm": 2.009447078094336, - "learning_rate": 1.180216100990633e-06, - "loss": 1.039, - "step": 8588 - }, - { - "epoch": 0.6454982714564858, - "grad_norm": 1.7713592720649842, - "learning_rate": 1.179772059926766e-06, - "loss": 0.9511, - "step": 8589 - }, - { - "epoch": 0.6455734255223208, - "grad_norm": 2.095824167928, - "learning_rate": 1.1793280674645454e-06, - "loss": 0.982, - "step": 8590 - }, - { - "epoch": 0.6456485795881557, - "grad_norm": 7.25962296192553, - "learning_rate": 1.1788841236302789e-06, - "loss": 0.9255, - "step": 8591 - }, - { - "epoch": 0.6457237336539907, - "grad_norm": 2.3725902981552442, - "learning_rate": 1.178440228450273e-06, - "loss": 0.8823, - "step": 8592 - }, - { - "epoch": 0.6457988877198256, - "grad_norm": 1.8175422643603658, - "learning_rate": 1.1779963819508293e-06, - "loss": 1.0852, - "step": 8593 - }, - { - "epoch": 0.6458740417856607, - "grad_norm": 1.8147698363131657, - "learning_rate": 1.1775525841582475e-06, - "loss": 0.9887, - "step": 8594 - }, - { - "epoch": 0.6459491958514956, - "grad_norm": 1.74822969812546, - "learning_rate": 1.1771088350988247e-06, - "loss": 1.0146, - "step": 8595 - }, - { - "epoch": 0.6460243499173305, - "grad_norm": 1.830435377037356, - "learning_rate": 1.1766651347988542e-06, - "loss": 1.014, - "step": 8596 - }, - { - "epoch": 0.6460995039831655, - "grad_norm": 1.5454442507100061, - "learning_rate": 1.1762214832846274e-06, - "loss": 0.9923, - "step": 8597 - }, - { - "epoch": 0.6461746580490004, - "grad_norm": 1.4668952919036373, - "learning_rate": 1.1757778805824324e-06, - "loss": 1.0159, - "step": 8598 - }, - { - "epoch": 0.6462498121148355, - "grad_norm": 4.884828294842386, - "learning_rate": 1.1753343267185535e-06, - "loss": 0.9335, - "step": 8599 - }, - { - "epoch": 0.6463249661806704, - "grad_norm": 1.8086098985383616, - "learning_rate": 1.1748908217192744e-06, - "loss": 1.0385, - "step": 8600 - }, - { - "epoch": 0.6464001202465053, - "grad_norm": 1.9253549471502311, - "learning_rate": 1.1744473656108729e-06, - "loss": 0.9077, - "step": 8601 - }, - { - "epoch": 0.6464752743123403, - "grad_norm": 2.079647796962654, - "learning_rate": 1.1740039584196265e-06, - "loss": 0.9675, - "step": 8602 - }, - { - "epoch": 0.6465504283781752, - "grad_norm": 1.671181508203647, - "learning_rate": 1.1735606001718087e-06, - "loss": 1.0043, - "step": 8603 - }, - { - "epoch": 0.6466255824440102, - "grad_norm": 1.419337933088679, - "learning_rate": 1.17311729089369e-06, - "loss": 0.9884, - "step": 8604 - }, - { - "epoch": 0.6467007365098452, - "grad_norm": 2.8302075216277895, - "learning_rate": 1.172674030611538e-06, - "loss": 1.0134, - "step": 8605 - }, - { - "epoch": 0.6467758905756802, - "grad_norm": 1.6085101275873732, - "learning_rate": 1.172230819351617e-06, - "loss": 1.0305, - "step": 8606 - }, - { - "epoch": 0.6468510446415151, - "grad_norm": 0.7564778323660292, - "learning_rate": 1.1717876571401913e-06, - "loss": 0.8328, - "step": 8607 - }, - { - "epoch": 0.64692619870735, - "grad_norm": 1.4318181405335788, - "learning_rate": 1.1713445440035168e-06, - "loss": 0.9108, - "step": 8608 - }, - { - "epoch": 0.647001352773185, - "grad_norm": 1.7438011052562712, - "learning_rate": 1.170901479967852e-06, - "loss": 1.0339, - "step": 8609 - }, - { - "epoch": 0.64707650683902, - "grad_norm": 1.915239991769757, - "learning_rate": 1.1704584650594495e-06, - "loss": 0.9805, - "step": 8610 - }, - { - "epoch": 0.647151660904855, - "grad_norm": 1.5079696652424757, - "learning_rate": 1.1700154993045588e-06, - "loss": 0.9007, - "step": 8611 - }, - { - "epoch": 0.6472268149706899, - "grad_norm": 2.2060993218772045, - "learning_rate": 1.1695725827294286e-06, - "loss": 0.992, - "step": 8612 - }, - { - "epoch": 0.6473019690365249, - "grad_norm": 1.5120820784316116, - "learning_rate": 1.1691297153603023e-06, - "loss": 0.9891, - "step": 8613 - }, - { - "epoch": 0.6473771231023598, - "grad_norm": 1.524901249299159, - "learning_rate": 1.1686868972234227e-06, - "loss": 0.9918, - "step": 8614 - }, - { - "epoch": 0.6474522771681948, - "grad_norm": 1.5730099260922796, - "learning_rate": 1.1682441283450275e-06, - "loss": 0.9757, - "step": 8615 - }, - { - "epoch": 0.6475274312340298, - "grad_norm": 1.4117347568603023, - "learning_rate": 1.1678014087513522e-06, - "loss": 0.9009, - "step": 8616 - }, - { - "epoch": 0.6476025852998647, - "grad_norm": 1.7598727211662781, - "learning_rate": 1.1673587384686308e-06, - "loss": 0.9902, - "step": 8617 - }, - { - "epoch": 0.6476777393656997, - "grad_norm": 1.442906371845588, - "learning_rate": 1.1669161175230913e-06, - "loss": 0.9243, - "step": 8618 - }, - { - "epoch": 0.6477528934315346, - "grad_norm": 1.5925688012799644, - "learning_rate": 1.1664735459409632e-06, - "loss": 0.8857, - "step": 8619 - }, - { - "epoch": 0.6478280474973696, - "grad_norm": 1.3773381553878152, - "learning_rate": 1.1660310237484691e-06, - "loss": 0.9411, - "step": 8620 - }, - { - "epoch": 0.6479032015632046, - "grad_norm": 2.2282562773607655, - "learning_rate": 1.1655885509718304e-06, - "loss": 0.9316, - "step": 8621 - }, - { - "epoch": 0.6479783556290395, - "grad_norm": 0.7214691309203969, - "learning_rate": 1.165146127637265e-06, - "loss": 0.8635, - "step": 8622 - }, - { - "epoch": 0.6480535096948745, - "grad_norm": 1.7580194648125111, - "learning_rate": 1.1647037537709876e-06, - "loss": 0.9622, - "step": 8623 - }, - { - "epoch": 0.6481286637607094, - "grad_norm": 1.9143720104108253, - "learning_rate": 1.1642614293992123e-06, - "loss": 0.892, - "step": 8624 - }, - { - "epoch": 0.6482038178265445, - "grad_norm": 1.7027325659276493, - "learning_rate": 1.1638191545481476e-06, - "loss": 0.9449, - "step": 8625 - }, - { - "epoch": 0.6482789718923794, - "grad_norm": 1.7907234045021818, - "learning_rate": 1.163376929244e-06, - "loss": 0.9929, - "step": 8626 - }, - { - "epoch": 0.6483541259582143, - "grad_norm": 2.070733369477945, - "learning_rate": 1.1629347535129728e-06, - "loss": 0.8764, - "step": 8627 - }, - { - "epoch": 0.6484292800240493, - "grad_norm": 2.1911241210364696, - "learning_rate": 1.1624926273812664e-06, - "loss": 0.9951, - "step": 8628 - }, - { - "epoch": 0.6485044340898842, - "grad_norm": 0.8160817354629981, - "learning_rate": 1.162050550875079e-06, - "loss": 0.8464, - "step": 8629 - }, - { - "epoch": 0.6485795881557193, - "grad_norm": 1.8770501055159805, - "learning_rate": 1.1616085240206058e-06, - "loss": 0.9479, - "step": 8630 - }, - { - "epoch": 0.6486547422215542, - "grad_norm": 5.648337208977378, - "learning_rate": 1.1611665468440376e-06, - "loss": 1.0309, - "step": 8631 - }, - { - "epoch": 0.6487298962873892, - "grad_norm": 2.0582716398478844, - "learning_rate": 1.1607246193715637e-06, - "loss": 0.9659, - "step": 8632 - }, - { - "epoch": 0.6488050503532241, - "grad_norm": 1.8564129484948535, - "learning_rate": 1.160282741629369e-06, - "loss": 1.0267, - "step": 8633 - }, - { - "epoch": 0.648880204419059, - "grad_norm": 1.5557552485732242, - "learning_rate": 1.1598409136436385e-06, - "loss": 0.9467, - "step": 8634 - }, - { - "epoch": 0.648955358484894, - "grad_norm": 1.8513338615797692, - "learning_rate": 1.1593991354405505e-06, - "loss": 0.9805, - "step": 8635 - }, - { - "epoch": 0.649030512550729, - "grad_norm": 0.7149722291314959, - "learning_rate": 1.158957407046283e-06, - "loss": 0.8134, - "step": 8636 - }, - { - "epoch": 0.649105666616564, - "grad_norm": 1.7859373157731409, - "learning_rate": 1.1585157284870097e-06, - "loss": 1.0227, - "step": 8637 - }, - { - "epoch": 0.6491808206823989, - "grad_norm": 1.6798283007846926, - "learning_rate": 1.1580740997889008e-06, - "loss": 0.9906, - "step": 8638 - }, - { - "epoch": 0.6492559747482339, - "grad_norm": 1.4436326445736247, - "learning_rate": 1.1576325209781263e-06, - "loss": 1.0433, - "step": 8639 - }, - { - "epoch": 0.6493311288140688, - "grad_norm": 2.2011257325881277, - "learning_rate": 1.1571909920808498e-06, - "loss": 0.9302, - "step": 8640 - }, - { - "epoch": 0.6494062828799038, - "grad_norm": 0.7009181251518264, - "learning_rate": 1.156749513123235e-06, - "loss": 0.8552, - "step": 8641 - }, - { - "epoch": 0.6494814369457388, - "grad_norm": 0.6765762900341272, - "learning_rate": 1.1563080841314408e-06, - "loss": 0.7866, - "step": 8642 - }, - { - "epoch": 0.6495565910115737, - "grad_norm": 2.039426924488438, - "learning_rate": 1.155866705131623e-06, - "loss": 0.899, - "step": 8643 - }, - { - "epoch": 0.6496317450774087, - "grad_norm": 1.63779779885792, - "learning_rate": 1.1554253761499358e-06, - "loss": 0.9081, - "step": 8644 - }, - { - "epoch": 0.6497068991432436, - "grad_norm": 3.11315029352279, - "learning_rate": 1.154984097212528e-06, - "loss": 0.9781, - "step": 8645 - }, - { - "epoch": 0.6497820532090786, - "grad_norm": 2.8516589840804625, - "learning_rate": 1.154542868345549e-06, - "loss": 0.997, - "step": 8646 - }, - { - "epoch": 0.6498572072749136, - "grad_norm": 1.451384127100546, - "learning_rate": 1.1541016895751425e-06, - "loss": 0.9059, - "step": 8647 - }, - { - "epoch": 0.6499323613407485, - "grad_norm": 2.158304093014319, - "learning_rate": 1.1536605609274504e-06, - "loss": 0.8446, - "step": 8648 - }, - { - "epoch": 0.6500075154065835, - "grad_norm": 2.2069301345752845, - "learning_rate": 1.1532194824286107e-06, - "loss": 0.8719, - "step": 8649 - }, - { - "epoch": 0.6500826694724184, - "grad_norm": 1.6401929740650156, - "learning_rate": 1.1527784541047583e-06, - "loss": 1.0104, - "step": 8650 - }, - { - "epoch": 0.6501578235382535, - "grad_norm": 1.807126531467975, - "learning_rate": 1.1523374759820276e-06, - "loss": 0.9731, - "step": 8651 - }, - { - "epoch": 0.6502329776040884, - "grad_norm": 1.8420402228554043, - "learning_rate": 1.1518965480865474e-06, - "loss": 0.9806, - "step": 8652 - }, - { - "epoch": 0.6503081316699233, - "grad_norm": 1.5899691145674246, - "learning_rate": 1.1514556704444446e-06, - "loss": 0.9965, - "step": 8653 - }, - { - "epoch": 0.6503832857357583, - "grad_norm": 1.8366756821957473, - "learning_rate": 1.151014843081842e-06, - "loss": 0.9918, - "step": 8654 - }, - { - "epoch": 0.6504584398015932, - "grad_norm": 1.6245474552022485, - "learning_rate": 1.1505740660248606e-06, - "loss": 0.9886, - "step": 8655 - }, - { - "epoch": 0.6505335938674283, - "grad_norm": 1.5979121593161436, - "learning_rate": 1.1501333392996194e-06, - "loss": 0.9175, - "step": 8656 - }, - { - "epoch": 0.6506087479332632, - "grad_norm": 2.3064621122247324, - "learning_rate": 1.1496926629322316e-06, - "loss": 1.0524, - "step": 8657 - }, - { - "epoch": 0.6506839019990982, - "grad_norm": 0.7580981551887217, - "learning_rate": 1.14925203694881e-06, - "loss": 0.8487, - "step": 8658 - }, - { - "epoch": 0.6507590560649331, - "grad_norm": 2.1893998751260364, - "learning_rate": 1.148811461375463e-06, - "loss": 1.0106, - "step": 8659 - }, - { - "epoch": 0.650834210130768, - "grad_norm": 1.487610513124092, - "learning_rate": 1.1483709362382953e-06, - "loss": 1.0575, - "step": 8660 - }, - { - "epoch": 0.6509093641966031, - "grad_norm": 1.746186665701929, - "learning_rate": 1.1479304615634115e-06, - "loss": 0.7952, - "step": 8661 - }, - { - "epoch": 0.650984518262438, - "grad_norm": 2.102442787471532, - "learning_rate": 1.14749003737691e-06, - "loss": 0.9848, - "step": 8662 - }, - { - "epoch": 0.651059672328273, - "grad_norm": 1.6762716647233729, - "learning_rate": 1.147049663704889e-06, - "loss": 0.9975, - "step": 8663 - }, - { - "epoch": 0.6511348263941079, - "grad_norm": 2.3123391585217776, - "learning_rate": 1.1466093405734417e-06, - "loss": 1.0604, - "step": 8664 - }, - { - "epoch": 0.6512099804599428, - "grad_norm": 2.117203715920372, - "learning_rate": 1.1461690680086587e-06, - "loss": 1.0256, - "step": 8665 - }, - { - "epoch": 0.6512851345257779, - "grad_norm": 1.8278224727353989, - "learning_rate": 1.145728846036628e-06, - "loss": 1.0411, - "step": 8666 - }, - { - "epoch": 0.6513602885916128, - "grad_norm": 1.4292425912128224, - "learning_rate": 1.1452886746834335e-06, - "loss": 0.9289, - "step": 8667 - }, - { - "epoch": 0.6514354426574478, - "grad_norm": 2.085294245716187, - "learning_rate": 1.1448485539751586e-06, - "loss": 0.942, - "step": 8668 - }, - { - "epoch": 0.6515105967232827, - "grad_norm": 1.5238175920573713, - "learning_rate": 1.144408483937882e-06, - "loss": 0.881, - "step": 8669 - }, - { - "epoch": 0.6515857507891177, - "grad_norm": 1.6655150328902244, - "learning_rate": 1.1439684645976787e-06, - "loss": 0.9282, - "step": 8670 - }, - { - "epoch": 0.6516609048549526, - "grad_norm": 1.8901013997039207, - "learning_rate": 1.1435284959806218e-06, - "loss": 0.8497, - "step": 8671 - }, - { - "epoch": 0.6517360589207876, - "grad_norm": 2.0126749377304476, - "learning_rate": 1.1430885781127803e-06, - "loss": 0.9353, - "step": 8672 - }, - { - "epoch": 0.6518112129866226, - "grad_norm": 0.8958344607383081, - "learning_rate": 1.1426487110202228e-06, - "loss": 0.9179, - "step": 8673 - }, - { - "epoch": 0.6518863670524575, - "grad_norm": 2.0913286014430796, - "learning_rate": 1.142208894729012e-06, - "loss": 0.9194, - "step": 8674 - }, - { - "epoch": 0.6519615211182925, - "grad_norm": 2.490184876193101, - "learning_rate": 1.1417691292652091e-06, - "loss": 0.914, - "step": 8675 - }, - { - "epoch": 0.6520366751841274, - "grad_norm": 2.1470857563988472, - "learning_rate": 1.1413294146548716e-06, - "loss": 0.9993, - "step": 8676 - }, - { - "epoch": 0.6521118292499625, - "grad_norm": 2.260770028851243, - "learning_rate": 1.1408897509240537e-06, - "loss": 0.9842, - "step": 8677 - }, - { - "epoch": 0.6521869833157974, - "grad_norm": 1.605504464821917, - "learning_rate": 1.1404501380988084e-06, - "loss": 0.8982, - "step": 8678 - }, - { - "epoch": 0.6522621373816323, - "grad_norm": 1.899471864580812, - "learning_rate": 1.1400105762051833e-06, - "loss": 0.9049, - "step": 8679 - }, - { - "epoch": 0.6523372914474673, - "grad_norm": 1.6315028582918205, - "learning_rate": 1.139571065269226e-06, - "loss": 0.906, - "step": 8680 - }, - { - "epoch": 0.6524124455133022, - "grad_norm": 4.164935222731069, - "learning_rate": 1.1391316053169773e-06, - "loss": 0.9792, - "step": 8681 - }, - { - "epoch": 0.6524875995791373, - "grad_norm": 2.218640176983261, - "learning_rate": 1.1386921963744765e-06, - "loss": 0.9, - "step": 8682 - }, - { - "epoch": 0.6525627536449722, - "grad_norm": 1.3758147166837382, - "learning_rate": 1.1382528384677619e-06, - "loss": 1.0717, - "step": 8683 - }, - { - "epoch": 0.6526379077108072, - "grad_norm": 2.613163556241262, - "learning_rate": 1.137813531622866e-06, - "loss": 1.0017, - "step": 8684 - }, - { - "epoch": 0.6527130617766421, - "grad_norm": 1.9166125068752473, - "learning_rate": 1.1373742758658206e-06, - "loss": 0.9358, - "step": 8685 - }, - { - "epoch": 0.652788215842477, - "grad_norm": 1.6617918327555976, - "learning_rate": 1.1369350712226525e-06, - "loss": 1.0561, - "step": 8686 - }, - { - "epoch": 0.6528633699083121, - "grad_norm": 0.8046194621830854, - "learning_rate": 1.1364959177193863e-06, - "loss": 0.8489, - "step": 8687 - }, - { - "epoch": 0.652938523974147, - "grad_norm": 2.493751346138421, - "learning_rate": 1.1360568153820436e-06, - "loss": 1.033, - "step": 8688 - }, - { - "epoch": 0.653013678039982, - "grad_norm": 0.7227125816040743, - "learning_rate": 1.1356177642366422e-06, - "loss": 0.8946, - "step": 8689 - }, - { - "epoch": 0.6530888321058169, - "grad_norm": 1.4552119865544564, - "learning_rate": 1.1351787643091988e-06, - "loss": 0.9417, - "step": 8690 - }, - { - "epoch": 0.6531639861716518, - "grad_norm": 1.9611906822867091, - "learning_rate": 1.1347398156257253e-06, - "loss": 1.0124, - "step": 8691 - }, - { - "epoch": 0.6532391402374869, - "grad_norm": 2.4147722009668438, - "learning_rate": 1.134300918212231e-06, - "loss": 1.0308, - "step": 8692 - }, - { - "epoch": 0.6533142943033218, - "grad_norm": 2.0845323037151364, - "learning_rate": 1.1338620720947223e-06, - "loss": 0.8499, - "step": 8693 - }, - { - "epoch": 0.6533894483691568, - "grad_norm": 2.6508228284125397, - "learning_rate": 1.1334232772992018e-06, - "loss": 0.9618, - "step": 8694 - }, - { - "epoch": 0.6534646024349917, - "grad_norm": 2.3508350349680645, - "learning_rate": 1.132984533851671e-06, - "loss": 1.0842, - "step": 8695 - }, - { - "epoch": 0.6535397565008267, - "grad_norm": 1.4624670660761219, - "learning_rate": 1.132545841778127e-06, - "loss": 0.9969, - "step": 8696 - }, - { - "epoch": 0.6536149105666617, - "grad_norm": 1.2579261301970768, - "learning_rate": 1.1321072011045631e-06, - "loss": 0.9788, - "step": 8697 - }, - { - "epoch": 0.6536900646324966, - "grad_norm": 1.516609029757655, - "learning_rate": 1.1316686118569712e-06, - "loss": 1.0256, - "step": 8698 - }, - { - "epoch": 0.6537652186983316, - "grad_norm": 1.6007496329406479, - "learning_rate": 1.1312300740613382e-06, - "loss": 0.9887, - "step": 8699 - }, - { - "epoch": 0.6538403727641665, - "grad_norm": 2.206914469912358, - "learning_rate": 1.130791587743651e-06, - "loss": 0.9816, - "step": 8700 - }, - { - "epoch": 0.6539155268300015, - "grad_norm": 1.6090320851438416, - "learning_rate": 1.1303531529298898e-06, - "loss": 0.8948, - "step": 8701 - }, - { - "epoch": 0.6539906808958365, - "grad_norm": 0.7724534913293727, - "learning_rate": 1.1299147696460361e-06, - "loss": 0.8326, - "step": 8702 - }, - { - "epoch": 0.6540658349616715, - "grad_norm": 0.8437184928380203, - "learning_rate": 1.129476437918063e-06, - "loss": 0.8573, - "step": 8703 - }, - { - "epoch": 0.6541409890275064, - "grad_norm": 1.9073054409172099, - "learning_rate": 1.1290381577719436e-06, - "loss": 1.0406, - "step": 8704 - }, - { - "epoch": 0.6542161430933413, - "grad_norm": 9.097935503174435, - "learning_rate": 1.1285999292336495e-06, - "loss": 0.9104, - "step": 8705 - }, - { - "epoch": 0.6542912971591763, - "grad_norm": 1.6924491644793718, - "learning_rate": 1.1281617523291456e-06, - "loss": 1.0594, - "step": 8706 - }, - { - "epoch": 0.6543664512250112, - "grad_norm": 1.9041428097733628, - "learning_rate": 1.127723627084397e-06, - "loss": 0.8511, - "step": 8707 - }, - { - "epoch": 0.6544416052908463, - "grad_norm": 1.553841288701334, - "learning_rate": 1.1272855535253637e-06, - "loss": 0.966, - "step": 8708 - }, - { - "epoch": 0.6545167593566812, - "grad_norm": 2.5124221220330405, - "learning_rate": 1.1268475316780036e-06, - "loss": 0.999, - "step": 8709 - }, - { - "epoch": 0.6545919134225161, - "grad_norm": 2.468880082578354, - "learning_rate": 1.1264095615682704e-06, - "loss": 0.9449, - "step": 8710 - }, - { - "epoch": 0.6546670674883511, - "grad_norm": 2.2492655509062622, - "learning_rate": 1.125971643222115e-06, - "loss": 0.8929, - "step": 8711 - }, - { - "epoch": 0.654742221554186, - "grad_norm": 1.4803536000423843, - "learning_rate": 1.1255337766654873e-06, - "loss": 1.0032, - "step": 8712 - }, - { - "epoch": 0.6548173756200211, - "grad_norm": 1.4726772964552108, - "learning_rate": 1.1250959619243322e-06, - "loss": 0.9629, - "step": 8713 - }, - { - "epoch": 0.654892529685856, - "grad_norm": 2.3283179286409865, - "learning_rate": 1.1246581990245916e-06, - "loss": 0.9551, - "step": 8714 - }, - { - "epoch": 0.654967683751691, - "grad_norm": 2.1330437726958564, - "learning_rate": 1.1242204879922045e-06, - "loss": 1.0366, - "step": 8715 - }, - { - "epoch": 0.6550428378175259, - "grad_norm": 1.4353284604783771, - "learning_rate": 1.1237828288531063e-06, - "loss": 0.9647, - "step": 8716 - }, - { - "epoch": 0.6551179918833608, - "grad_norm": 1.7372476655244753, - "learning_rate": 1.1233452216332316e-06, - "loss": 0.8758, - "step": 8717 - }, - { - "epoch": 0.6551931459491959, - "grad_norm": 1.5751474069309834, - "learning_rate": 1.1229076663585094e-06, - "loss": 0.973, - "step": 8718 - }, - { - "epoch": 0.6552683000150308, - "grad_norm": 1.4907057830225472, - "learning_rate": 1.1224701630548665e-06, - "loss": 0.9485, - "step": 8719 - }, - { - "epoch": 0.6553434540808658, - "grad_norm": 1.4441656562961596, - "learning_rate": 1.122032711748227e-06, - "loss": 0.9555, - "step": 8720 - }, - { - "epoch": 0.6554186081467007, - "grad_norm": 1.4176621959540143, - "learning_rate": 1.12159531246451e-06, - "loss": 0.9886, - "step": 8721 - }, - { - "epoch": 0.6554937622125357, - "grad_norm": 4.171431957250964, - "learning_rate": 1.1211579652296355e-06, - "loss": 1.0371, - "step": 8722 - }, - { - "epoch": 0.6555689162783707, - "grad_norm": 2.1549065869258612, - "learning_rate": 1.1207206700695161e-06, - "loss": 0.9897, - "step": 8723 - }, - { - "epoch": 0.6556440703442056, - "grad_norm": 2.1708475880023665, - "learning_rate": 1.1202834270100655e-06, - "loss": 0.9305, - "step": 8724 - }, - { - "epoch": 0.6557192244100406, - "grad_norm": 1.5622772821005888, - "learning_rate": 1.1198462360771895e-06, - "loss": 0.9804, - "step": 8725 - }, - { - "epoch": 0.6557943784758755, - "grad_norm": 1.988496959803854, - "learning_rate": 1.1194090972967943e-06, - "loss": 0.9536, - "step": 8726 - }, - { - "epoch": 0.6558695325417105, - "grad_norm": 2.66638717577483, - "learning_rate": 1.1189720106947823e-06, - "loss": 1.0182, - "step": 8727 - }, - { - "epoch": 0.6559446866075455, - "grad_norm": 1.9494245951790026, - "learning_rate": 1.1185349762970515e-06, - "loss": 0.9019, - "step": 8728 - }, - { - "epoch": 0.6560198406733805, - "grad_norm": 1.7220371370774787, - "learning_rate": 1.1180979941294998e-06, - "loss": 0.9366, - "step": 8729 - }, - { - "epoch": 0.6560949947392154, - "grad_norm": 2.1683131588762854, - "learning_rate": 1.1176610642180184e-06, - "loss": 0.8662, - "step": 8730 - }, - { - "epoch": 0.6561701488050503, - "grad_norm": 1.7460513525814298, - "learning_rate": 1.117224186588498e-06, - "loss": 1.0656, - "step": 8731 - }, - { - "epoch": 0.6562453028708853, - "grad_norm": 1.8399823373485846, - "learning_rate": 1.1167873612668252e-06, - "loss": 1.1113, - "step": 8732 - }, - { - "epoch": 0.6563204569367203, - "grad_norm": 1.5759817242700356, - "learning_rate": 1.1163505882788821e-06, - "loss": 0.966, - "step": 8733 - }, - { - "epoch": 0.6563956110025553, - "grad_norm": 1.6922589766886598, - "learning_rate": 1.1159138676505516e-06, - "loss": 0.9218, - "step": 8734 - }, - { - "epoch": 0.6564707650683902, - "grad_norm": 1.3804036512067377, - "learning_rate": 1.1154771994077095e-06, - "loss": 1.0171, - "step": 8735 - }, - { - "epoch": 0.6565459191342251, - "grad_norm": 0.662846293003858, - "learning_rate": 1.1150405835762304e-06, - "loss": 0.8242, - "step": 8736 - }, - { - "epoch": 0.6566210732000601, - "grad_norm": 1.6591255129748186, - "learning_rate": 1.1146040201819855e-06, - "loss": 1.0068, - "step": 8737 - }, - { - "epoch": 0.656696227265895, - "grad_norm": 1.7001991688237068, - "learning_rate": 1.114167509250842e-06, - "loss": 1.0045, - "step": 8738 - }, - { - "epoch": 0.6567713813317301, - "grad_norm": 1.64173655320852, - "learning_rate": 1.1137310508086666e-06, - "loss": 0.9949, - "step": 8739 - }, - { - "epoch": 0.656846535397565, - "grad_norm": 1.8515955282235181, - "learning_rate": 1.11329464488132e-06, - "loss": 0.9361, - "step": 8740 - }, - { - "epoch": 0.6569216894634, - "grad_norm": 1.655550467115657, - "learning_rate": 1.112858291494661e-06, - "loss": 0.9907, - "step": 8741 - }, - { - "epoch": 0.6569968435292349, - "grad_norm": 2.042890441444036, - "learning_rate": 1.1124219906745458e-06, - "loss": 0.9042, - "step": 8742 - }, - { - "epoch": 0.6570719975950698, - "grad_norm": 0.8013430601760495, - "learning_rate": 1.1119857424468252e-06, - "loss": 0.8703, - "step": 8743 - }, - { - "epoch": 0.6571471516609049, - "grad_norm": 3.2889469516056464, - "learning_rate": 1.1115495468373505e-06, - "loss": 0.9837, - "step": 8744 - }, - { - "epoch": 0.6572223057267398, - "grad_norm": 2.0266801339672424, - "learning_rate": 1.111113403871967e-06, - "loss": 1.0564, - "step": 8745 - }, - { - "epoch": 0.6572974597925748, - "grad_norm": 1.8955135355199217, - "learning_rate": 1.1106773135765183e-06, - "loss": 0.9564, - "step": 8746 - }, - { - "epoch": 0.6573726138584097, - "grad_norm": 1.4602608159047885, - "learning_rate": 1.1102412759768455e-06, - "loss": 0.989, - "step": 8747 - }, - { - "epoch": 0.6574477679242448, - "grad_norm": 1.639374028975286, - "learning_rate": 1.1098052910987824e-06, - "loss": 0.8424, - "step": 8748 - }, - { - "epoch": 0.6575229219900797, - "grad_norm": 2.0954877559638203, - "learning_rate": 1.1093693589681654e-06, - "loss": 0.9368, - "step": 8749 - }, - { - "epoch": 0.6575980760559146, - "grad_norm": 1.4246949956972441, - "learning_rate": 1.1089334796108235e-06, - "loss": 0.929, - "step": 8750 - }, - { - "epoch": 0.6576732301217496, - "grad_norm": 1.3176614906318098, - "learning_rate": 1.1084976530525858e-06, - "loss": 0.996, - "step": 8751 - }, - { - "epoch": 0.6577483841875845, - "grad_norm": 1.9274711081272802, - "learning_rate": 1.108061879319276e-06, - "loss": 0.9837, - "step": 8752 - }, - { - "epoch": 0.6578235382534195, - "grad_norm": 1.80799410871373, - "learning_rate": 1.1076261584367154e-06, - "loss": 1.0152, - "step": 8753 - }, - { - "epoch": 0.6578986923192545, - "grad_norm": 1.6226898057651846, - "learning_rate": 1.107190490430722e-06, - "loss": 1.0153, - "step": 8754 - }, - { - "epoch": 0.6579738463850894, - "grad_norm": 2.625467440448133, - "learning_rate": 1.10675487532711e-06, - "loss": 1.0362, - "step": 8755 - }, - { - "epoch": 0.6580490004509244, - "grad_norm": 1.8291574154955148, - "learning_rate": 1.1063193131516928e-06, - "loss": 1.0228, - "step": 8756 - }, - { - "epoch": 0.6581241545167593, - "grad_norm": 2.8187702220521245, - "learning_rate": 1.1058838039302788e-06, - "loss": 0.8976, - "step": 8757 - }, - { - "epoch": 0.6581993085825943, - "grad_norm": 1.5346147586871979, - "learning_rate": 1.1054483476886727e-06, - "loss": 0.8971, - "step": 8758 - }, - { - "epoch": 0.6582744626484293, - "grad_norm": 1.9796388230214237, - "learning_rate": 1.1050129444526777e-06, - "loss": 0.9655, - "step": 8759 - }, - { - "epoch": 0.6583496167142643, - "grad_norm": 3.964975319168609, - "learning_rate": 1.104577594248092e-06, - "loss": 0.9441, - "step": 8760 - }, - { - "epoch": 0.6584247707800992, - "grad_norm": 1.7847246111479858, - "learning_rate": 1.1041422971007137e-06, - "loss": 0.9286, - "step": 8761 - }, - { - "epoch": 0.6584999248459341, - "grad_norm": 1.9396078272774278, - "learning_rate": 1.1037070530363343e-06, - "loss": 0.9279, - "step": 8762 - }, - { - "epoch": 0.6585750789117691, - "grad_norm": 2.6112595565569894, - "learning_rate": 1.1032718620807446e-06, - "loss": 0.886, - "step": 8763 - }, - { - "epoch": 0.658650232977604, - "grad_norm": 1.7197226806458177, - "learning_rate": 1.1028367242597307e-06, - "loss": 0.9315, - "step": 8764 - }, - { - "epoch": 0.6587253870434391, - "grad_norm": 2.03357668916021, - "learning_rate": 1.1024016395990756e-06, - "loss": 1.0074, - "step": 8765 - }, - { - "epoch": 0.658800541109274, - "grad_norm": 1.4462769460846705, - "learning_rate": 1.1019666081245613e-06, - "loss": 1.0059, - "step": 8766 - }, - { - "epoch": 0.658875695175109, - "grad_norm": 1.575876579370416, - "learning_rate": 1.1015316298619634e-06, - "loss": 0.9628, - "step": 8767 - }, - { - "epoch": 0.6589508492409439, - "grad_norm": 1.7046850915991452, - "learning_rate": 1.1010967048370577e-06, - "loss": 0.992, - "step": 8768 - }, - { - "epoch": 0.6590260033067789, - "grad_norm": 1.5468961926174145, - "learning_rate": 1.1006618330756153e-06, - "loss": 0.8755, - "step": 8769 - }, - { - "epoch": 0.6591011573726139, - "grad_norm": 2.316238062704816, - "learning_rate": 1.1002270146034013e-06, - "loss": 1.0139, - "step": 8770 - }, - { - "epoch": 0.6591763114384488, - "grad_norm": 1.727901026196045, - "learning_rate": 1.099792249446183e-06, - "loss": 0.9342, - "step": 8771 - }, - { - "epoch": 0.6592514655042838, - "grad_norm": 0.6950465561689992, - "learning_rate": 1.0993575376297201e-06, - "loss": 0.8444, - "step": 8772 - }, - { - "epoch": 0.6593266195701187, - "grad_norm": 1.859514567803034, - "learning_rate": 1.0989228791797729e-06, - "loss": 1.0615, - "step": 8773 - }, - { - "epoch": 0.6594017736359538, - "grad_norm": 1.7374946100165105, - "learning_rate": 1.0984882741220957e-06, - "loss": 1.037, - "step": 8774 - }, - { - "epoch": 0.6594769277017887, - "grad_norm": 8.326878552801492, - "learning_rate": 1.0980537224824403e-06, - "loss": 0.9807, - "step": 8775 - }, - { - "epoch": 0.6595520817676236, - "grad_norm": 2.0220099756389067, - "learning_rate": 1.0976192242865554e-06, - "loss": 0.9337, - "step": 8776 - }, - { - "epoch": 0.6596272358334586, - "grad_norm": 1.3538287792724846, - "learning_rate": 1.097184779560186e-06, - "loss": 0.9819, - "step": 8777 - }, - { - "epoch": 0.6597023898992935, - "grad_norm": 2.2366169172497083, - "learning_rate": 1.0967503883290768e-06, - "loss": 1.0384, - "step": 8778 - }, - { - "epoch": 0.6597775439651286, - "grad_norm": 1.8843185289819104, - "learning_rate": 1.0963160506189655e-06, - "loss": 0.9765, - "step": 8779 - }, - { - "epoch": 0.6598526980309635, - "grad_norm": 1.6187989687785076, - "learning_rate": 1.0958817664555886e-06, - "loss": 0.99, - "step": 8780 - }, - { - "epoch": 0.6599278520967984, - "grad_norm": 1.628326532407606, - "learning_rate": 1.0954475358646793e-06, - "loss": 0.9232, - "step": 8781 - }, - { - "epoch": 0.6600030061626334, - "grad_norm": 1.739914578938976, - "learning_rate": 1.0950133588719665e-06, - "loss": 0.9354, - "step": 8782 - }, - { - "epoch": 0.6600781602284683, - "grad_norm": 1.3464876719922194, - "learning_rate": 1.0945792355031785e-06, - "loss": 0.9879, - "step": 8783 - }, - { - "epoch": 0.6601533142943034, - "grad_norm": 2.0239847393214463, - "learning_rate": 1.0941451657840377e-06, - "loss": 0.9517, - "step": 8784 - }, - { - "epoch": 0.6602284683601383, - "grad_norm": 1.8338305131520216, - "learning_rate": 1.0937111497402648e-06, - "loss": 0.9825, - "step": 8785 - }, - { - "epoch": 0.6603036224259733, - "grad_norm": 1.578485277805059, - "learning_rate": 1.0932771873975764e-06, - "loss": 1.0329, - "step": 8786 - }, - { - "epoch": 0.6603787764918082, - "grad_norm": 0.7565950662567511, - "learning_rate": 1.0928432787816859e-06, - "loss": 0.8486, - "step": 8787 - }, - { - "epoch": 0.6604539305576431, - "grad_norm": 4.274000280727436, - "learning_rate": 1.092409423918306e-06, - "loss": 0.9031, - "step": 8788 - }, - { - "epoch": 0.6605290846234781, - "grad_norm": 0.7708602891968576, - "learning_rate": 1.091975622833142e-06, - "loss": 0.8159, - "step": 8789 - }, - { - "epoch": 0.6606042386893131, - "grad_norm": 1.5661554682792265, - "learning_rate": 1.0915418755519004e-06, - "loss": 0.9005, - "step": 8790 - }, - { - "epoch": 0.6606793927551481, - "grad_norm": 0.6815035007132209, - "learning_rate": 1.0911081821002811e-06, - "loss": 0.8623, - "step": 8791 - }, - { - "epoch": 0.660754546820983, - "grad_norm": 1.4945111781366232, - "learning_rate": 1.0906745425039829e-06, - "loss": 0.9793, - "step": 8792 - }, - { - "epoch": 0.660829700886818, - "grad_norm": 1.6492355367510043, - "learning_rate": 1.0902409567886996e-06, - "loss": 1.0058, - "step": 8793 - }, - { - "epoch": 0.6609048549526529, - "grad_norm": 2.20266047781813, - "learning_rate": 1.0898074249801227e-06, - "loss": 0.9416, - "step": 8794 - }, - { - "epoch": 0.6609800090184879, - "grad_norm": 1.8740993879948746, - "learning_rate": 1.089373947103942e-06, - "loss": 1.0057, - "step": 8795 - }, - { - "epoch": 0.6610551630843229, - "grad_norm": 2.2354456330550128, - "learning_rate": 1.0889405231858422e-06, - "loss": 0.8687, - "step": 8796 - }, - { - "epoch": 0.6611303171501578, - "grad_norm": 2.03589615212624, - "learning_rate": 1.0885071532515049e-06, - "loss": 1.007, - "step": 8797 - }, - { - "epoch": 0.6612054712159928, - "grad_norm": 2.748535112878064, - "learning_rate": 1.088073837326609e-06, - "loss": 0.9708, - "step": 8798 - }, - { - "epoch": 0.6612806252818277, - "grad_norm": 1.7713857873571066, - "learning_rate": 1.0876405754368296e-06, - "loss": 0.9197, - "step": 8799 - }, - { - "epoch": 0.6613557793476627, - "grad_norm": 1.5728260628918693, - "learning_rate": 1.0872073676078405e-06, - "loss": 0.9623, - "step": 8800 - }, - { - "epoch": 0.6614309334134977, - "grad_norm": 1.82656467721514, - "learning_rate": 1.0867742138653103e-06, - "loss": 1.0424, - "step": 8801 - }, - { - "epoch": 0.6615060874793326, - "grad_norm": 1.7039396804825282, - "learning_rate": 1.0863411142349046e-06, - "loss": 1.0062, - "step": 8802 - }, - { - "epoch": 0.6615812415451676, - "grad_norm": 1.7242528734862135, - "learning_rate": 1.0859080687422868e-06, - "loss": 0.9783, - "step": 8803 - }, - { - "epoch": 0.6616563956110025, - "grad_norm": 1.4092588237237853, - "learning_rate": 1.0854750774131153e-06, - "loss": 0.9088, - "step": 8804 - }, - { - "epoch": 0.6617315496768376, - "grad_norm": 2.0643251608578956, - "learning_rate": 1.0850421402730482e-06, - "loss": 1.0474, - "step": 8805 - }, - { - "epoch": 0.6618067037426725, - "grad_norm": 1.2888220360470974, - "learning_rate": 1.084609257347738e-06, - "loss": 0.8983, - "step": 8806 - }, - { - "epoch": 0.6618818578085074, - "grad_norm": 1.5935071124327174, - "learning_rate": 1.0841764286628344e-06, - "loss": 0.9979, - "step": 8807 - }, - { - "epoch": 0.6619570118743424, - "grad_norm": 1.6642388621973705, - "learning_rate": 1.0837436542439843e-06, - "loss": 1.0504, - "step": 8808 - }, - { - "epoch": 0.6620321659401773, - "grad_norm": 1.789520471584216, - "learning_rate": 1.0833109341168308e-06, - "loss": 0.9529, - "step": 8809 - }, - { - "epoch": 0.6621073200060124, - "grad_norm": 2.304041160858233, - "learning_rate": 1.0828782683070153e-06, - "loss": 0.9986, - "step": 8810 - }, - { - "epoch": 0.6621824740718473, - "grad_norm": 1.4807478910808238, - "learning_rate": 1.0824456568401735e-06, - "loss": 1.0017, - "step": 8811 - }, - { - "epoch": 0.6622576281376823, - "grad_norm": 2.8173603870153863, - "learning_rate": 1.0820130997419407e-06, - "loss": 1.0197, - "step": 8812 - }, - { - "epoch": 0.6623327822035172, - "grad_norm": 1.7342326475267615, - "learning_rate": 1.0815805970379473e-06, - "loss": 0.9832, - "step": 8813 - }, - { - "epoch": 0.6624079362693521, - "grad_norm": 1.6422453371964052, - "learning_rate": 1.08114814875382e-06, - "loss": 1.0757, - "step": 8814 - }, - { - "epoch": 0.6624830903351872, - "grad_norm": 2.570918513225954, - "learning_rate": 1.0807157549151838e-06, - "loss": 0.9092, - "step": 8815 - }, - { - "epoch": 0.6625582444010221, - "grad_norm": 1.8406133965481095, - "learning_rate": 1.0802834155476582e-06, - "loss": 0.8445, - "step": 8816 - }, - { - "epoch": 0.6626333984668571, - "grad_norm": 0.6767923585735963, - "learning_rate": 1.0798511306768628e-06, - "loss": 0.837, - "step": 8817 - }, - { - "epoch": 0.662708552532692, - "grad_norm": 1.3717496775603641, - "learning_rate": 1.0794189003284118e-06, - "loss": 0.9971, - "step": 8818 - }, - { - "epoch": 0.662783706598527, - "grad_norm": 1.7492938660631585, - "learning_rate": 1.0789867245279157e-06, - "loss": 0.9377, - "step": 8819 - }, - { - "epoch": 0.662858860664362, - "grad_norm": 1.4791990285565915, - "learning_rate": 1.0785546033009829e-06, - "loss": 1.0081, - "step": 8820 - }, - { - "epoch": 0.6629340147301969, - "grad_norm": 4.245855217684774, - "learning_rate": 1.0781225366732179e-06, - "loss": 0.9415, - "step": 8821 - }, - { - "epoch": 0.6630091687960319, - "grad_norm": 1.9566636035230187, - "learning_rate": 1.0776905246702233e-06, - "loss": 1.0135, - "step": 8822 - }, - { - "epoch": 0.6630843228618668, - "grad_norm": 1.6934660951629084, - "learning_rate": 1.077258567317597e-06, - "loss": 1.0548, - "step": 8823 - }, - { - "epoch": 0.6631594769277018, - "grad_norm": 1.7224538255988724, - "learning_rate": 1.076826664640934e-06, - "loss": 0.961, - "step": 8824 - }, - { - "epoch": 0.6632346309935367, - "grad_norm": 1.349235471832787, - "learning_rate": 1.076394816665826e-06, - "loss": 0.9817, - "step": 8825 - }, - { - "epoch": 0.6633097850593717, - "grad_norm": 1.7733773376746287, - "learning_rate": 1.075963023417861e-06, - "loss": 0.9536, - "step": 8826 - }, - { - "epoch": 0.6633849391252067, - "grad_norm": 3.954658904286863, - "learning_rate": 1.075531284922626e-06, - "loss": 1.0431, - "step": 8827 - }, - { - "epoch": 0.6634600931910416, - "grad_norm": 1.4393012330287893, - "learning_rate": 1.0750996012057028e-06, - "loss": 0.9529, - "step": 8828 - }, - { - "epoch": 0.6635352472568766, - "grad_norm": 0.7191328604640074, - "learning_rate": 1.0746679722926695e-06, - "loss": 0.841, - "step": 8829 - }, - { - "epoch": 0.6636104013227115, - "grad_norm": 1.5462444060462661, - "learning_rate": 1.0742363982091023e-06, - "loss": 0.87, - "step": 8830 - }, - { - "epoch": 0.6636855553885466, - "grad_norm": 1.6158435784353333, - "learning_rate": 1.0738048789805727e-06, - "loss": 0.9907, - "step": 8831 - }, - { - "epoch": 0.6637607094543815, - "grad_norm": 2.8036552378490667, - "learning_rate": 1.0733734146326513e-06, - "loss": 0.9447, - "step": 8832 - }, - { - "epoch": 0.6638358635202164, - "grad_norm": 1.5851045873654404, - "learning_rate": 1.072942005190903e-06, - "loss": 1.058, - "step": 8833 - }, - { - "epoch": 0.6639110175860514, - "grad_norm": 2.500845575384243, - "learning_rate": 1.0725106506808912e-06, - "loss": 0.9362, - "step": 8834 - }, - { - "epoch": 0.6639861716518863, - "grad_norm": 1.6786091026322052, - "learning_rate": 1.0720793511281754e-06, - "loss": 0.9822, - "step": 8835 - }, - { - "epoch": 0.6640613257177214, - "grad_norm": 4.341944916363897, - "learning_rate": 1.0716481065583108e-06, - "loss": 0.9565, - "step": 8836 - }, - { - "epoch": 0.6641364797835563, - "grad_norm": 2.1487630701471994, - "learning_rate": 1.071216916996851e-06, - "loss": 1.037, - "step": 8837 - }, - { - "epoch": 0.6642116338493913, - "grad_norm": 1.7618783679725634, - "learning_rate": 1.0707857824693446e-06, - "loss": 0.8877, - "step": 8838 - }, - { - "epoch": 0.6642867879152262, - "grad_norm": 2.187266528068111, - "learning_rate": 1.0703547030013399e-06, - "loss": 0.952, - "step": 8839 - }, - { - "epoch": 0.6643619419810611, - "grad_norm": 1.780008854040349, - "learning_rate": 1.0699236786183786e-06, - "loss": 0.9648, - "step": 8840 - }, - { - "epoch": 0.6644370960468962, - "grad_norm": 1.8268681958098263, - "learning_rate": 1.0694927093460007e-06, - "loss": 0.9615, - "step": 8841 - }, - { - "epoch": 0.6645122501127311, - "grad_norm": 1.9055056134764108, - "learning_rate": 1.069061795209743e-06, - "loss": 0.8208, - "step": 8842 - }, - { - "epoch": 0.6645874041785661, - "grad_norm": 1.5893685195316711, - "learning_rate": 1.068630936235138e-06, - "loss": 1.016, - "step": 8843 - }, - { - "epoch": 0.664662558244401, - "grad_norm": 1.831453537289406, - "learning_rate": 1.0682001324477173e-06, - "loss": 0.9519, - "step": 8844 - }, - { - "epoch": 0.6647377123102359, - "grad_norm": 5.038188438607663, - "learning_rate": 1.0677693838730068e-06, - "loss": 0.9451, - "step": 8845 - }, - { - "epoch": 0.664812866376071, - "grad_norm": 0.6620076869420347, - "learning_rate": 1.06733869053653e-06, - "loss": 0.862, - "step": 8846 - }, - { - "epoch": 0.6648880204419059, - "grad_norm": 1.6937502505594328, - "learning_rate": 1.0669080524638072e-06, - "loss": 0.9422, - "step": 8847 - }, - { - "epoch": 0.6649631745077409, - "grad_norm": 3.537581986734189, - "learning_rate": 1.0664774696803548e-06, - "loss": 0.9555, - "step": 8848 - }, - { - "epoch": 0.6650383285735758, - "grad_norm": 1.5476748007558478, - "learning_rate": 1.0660469422116876e-06, - "loss": 1.0352, - "step": 8849 - }, - { - "epoch": 0.6651134826394108, - "grad_norm": 1.5692900560148932, - "learning_rate": 1.0656164700833148e-06, - "loss": 0.9497, - "step": 8850 - }, - { - "epoch": 0.6651886367052458, - "grad_norm": 1.4606921147999983, - "learning_rate": 1.0651860533207452e-06, - "loss": 0.9904, - "step": 8851 - }, - { - "epoch": 0.6652637907710807, - "grad_norm": 1.8136531844481354, - "learning_rate": 1.0647556919494814e-06, - "loss": 0.933, - "step": 8852 - }, - { - "epoch": 0.6653389448369157, - "grad_norm": 2.4746495509329924, - "learning_rate": 1.0643253859950231e-06, - "loss": 0.9519, - "step": 8853 - }, - { - "epoch": 0.6654140989027506, - "grad_norm": 1.4859288966494417, - "learning_rate": 1.0638951354828693e-06, - "loss": 0.9574, - "step": 8854 - }, - { - "epoch": 0.6654892529685856, - "grad_norm": 1.630519150748697, - "learning_rate": 1.0634649404385127e-06, - "loss": 1.0544, - "step": 8855 - }, - { - "epoch": 0.6655644070344205, - "grad_norm": 1.5757103756052115, - "learning_rate": 1.0630348008874452e-06, - "loss": 0.9262, - "step": 8856 - }, - { - "epoch": 0.6656395611002556, - "grad_norm": 2.5380268508356534, - "learning_rate": 1.062604716855154e-06, - "loss": 1.0099, - "step": 8857 - }, - { - "epoch": 0.6657147151660905, - "grad_norm": 1.7914607868635448, - "learning_rate": 1.0621746883671226e-06, - "loss": 0.9795, - "step": 8858 - }, - { - "epoch": 0.6657898692319254, - "grad_norm": 3.04874835988306, - "learning_rate": 1.0617447154488322e-06, - "loss": 0.9525, - "step": 8859 - }, - { - "epoch": 0.6658650232977604, - "grad_norm": 1.845819960029786, - "learning_rate": 1.061314798125759e-06, - "loss": 1.0291, - "step": 8860 - }, - { - "epoch": 0.6659401773635953, - "grad_norm": 1.600324943447918, - "learning_rate": 1.0608849364233798e-06, - "loss": 0.8723, - "step": 8861 - }, - { - "epoch": 0.6660153314294304, - "grad_norm": 1.8252533749835373, - "learning_rate": 1.0604551303671641e-06, - "loss": 0.9576, - "step": 8862 - }, - { - "epoch": 0.6660904854952653, - "grad_norm": 1.4744307049766958, - "learning_rate": 1.0600253799825797e-06, - "loss": 1.0017, - "step": 8863 - }, - { - "epoch": 0.6661656395611003, - "grad_norm": 1.9624197172035132, - "learning_rate": 1.0595956852950907e-06, - "loss": 0.9221, - "step": 8864 - }, - { - "epoch": 0.6662407936269352, - "grad_norm": 1.8615809586118752, - "learning_rate": 1.0591660463301578e-06, - "loss": 0.9467, - "step": 8865 - }, - { - "epoch": 0.6663159476927701, - "grad_norm": 0.8020268127211643, - "learning_rate": 1.0587364631132402e-06, - "loss": 0.8476, - "step": 8866 - }, - { - "epoch": 0.6663911017586052, - "grad_norm": 2.1577050648433365, - "learning_rate": 1.0583069356697913e-06, - "loss": 0.9472, - "step": 8867 - }, - { - "epoch": 0.6664662558244401, - "grad_norm": 1.6963925063809868, - "learning_rate": 1.0578774640252626e-06, - "loss": 1.0546, - "step": 8868 - }, - { - "epoch": 0.6665414098902751, - "grad_norm": 1.6479989141812497, - "learning_rate": 1.0574480482051017e-06, - "loss": 1.0297, - "step": 8869 - }, - { - "epoch": 0.66661656395611, - "grad_norm": 2.7872786600820834, - "learning_rate": 1.0570186882347525e-06, - "loss": 1.043, - "step": 8870 - }, - { - "epoch": 0.6666917180219449, - "grad_norm": 2.157779648604642, - "learning_rate": 1.0565893841396575e-06, - "loss": 0.9648, - "step": 8871 - }, - { - "epoch": 0.66676687208778, - "grad_norm": 1.5076375276705931, - "learning_rate": 1.0561601359452537e-06, - "loss": 0.9906, - "step": 8872 - }, - { - "epoch": 0.6668420261536149, - "grad_norm": 1.6711349275356742, - "learning_rate": 1.0557309436769776e-06, - "loss": 0.9318, - "step": 8873 - }, - { - "epoch": 0.6669171802194499, - "grad_norm": 2.140290394269375, - "learning_rate": 1.0553018073602577e-06, - "loss": 0.9797, - "step": 8874 - }, - { - "epoch": 0.6669923342852848, - "grad_norm": 1.4560374943967314, - "learning_rate": 1.0548727270205227e-06, - "loss": 1.0109, - "step": 8875 - }, - { - "epoch": 0.6670674883511198, - "grad_norm": 1.474085915731097, - "learning_rate": 1.0544437026831983e-06, - "loss": 1.0625, - "step": 8876 - }, - { - "epoch": 0.6671426424169548, - "grad_norm": 1.6575938836575859, - "learning_rate": 1.0540147343737049e-06, - "loss": 1.0011, - "step": 8877 - }, - { - "epoch": 0.6672177964827897, - "grad_norm": 1.4326482248097105, - "learning_rate": 1.0535858221174614e-06, - "loss": 0.9972, - "step": 8878 - }, - { - "epoch": 0.6672929505486247, - "grad_norm": 1.6705767145111212, - "learning_rate": 1.0531569659398821e-06, - "loss": 1.0076, - "step": 8879 - }, - { - "epoch": 0.6673681046144596, - "grad_norm": 1.9637431101250176, - "learning_rate": 1.0527281658663782e-06, - "loss": 1.037, - "step": 8880 - }, - { - "epoch": 0.6674432586802946, - "grad_norm": 1.4249080126335043, - "learning_rate": 1.052299421922358e-06, - "loss": 0.9704, - "step": 8881 - }, - { - "epoch": 0.6675184127461296, - "grad_norm": 1.7430889038637907, - "learning_rate": 1.051870734133225e-06, - "loss": 1.0156, - "step": 8882 - }, - { - "epoch": 0.6675935668119646, - "grad_norm": 2.070892598195106, - "learning_rate": 1.0514421025243825e-06, - "loss": 0.9608, - "step": 8883 - }, - { - "epoch": 0.6676687208777995, - "grad_norm": 1.892300769493246, - "learning_rate": 1.0510135271212278e-06, - "loss": 0.9315, - "step": 8884 - }, - { - "epoch": 0.6677438749436344, - "grad_norm": 3.18011441750687, - "learning_rate": 1.0505850079491553e-06, - "loss": 0.9931, - "step": 8885 - }, - { - "epoch": 0.6678190290094694, - "grad_norm": 2.4193824985932904, - "learning_rate": 1.0501565450335573e-06, - "loss": 0.9062, - "step": 8886 - }, - { - "epoch": 0.6678941830753043, - "grad_norm": 1.715336756994273, - "learning_rate": 1.0497281383998195e-06, - "loss": 0.9976, - "step": 8887 - }, - { - "epoch": 0.6679693371411394, - "grad_norm": 1.8476559274040085, - "learning_rate": 1.0492997880733297e-06, - "loss": 0.9808, - "step": 8888 - }, - { - "epoch": 0.6680444912069743, - "grad_norm": 1.456520952550043, - "learning_rate": 1.0488714940794677e-06, - "loss": 0.9854, - "step": 8889 - }, - { - "epoch": 0.6681196452728092, - "grad_norm": 0.6777293400206411, - "learning_rate": 1.048443256443612e-06, - "loss": 0.8476, - "step": 8890 - }, - { - "epoch": 0.6681947993386442, - "grad_norm": 1.5279033445525276, - "learning_rate": 1.048015075191137e-06, - "loss": 0.9142, - "step": 8891 - }, - { - "epoch": 0.6682699534044791, - "grad_norm": 7.8946037308028485, - "learning_rate": 1.0475869503474133e-06, - "loss": 1.0196, - "step": 8892 - }, - { - "epoch": 0.6683451074703142, - "grad_norm": 1.6103691067731765, - "learning_rate": 1.0471588819378107e-06, - "loss": 0.8883, - "step": 8893 - }, - { - "epoch": 0.6684202615361491, - "grad_norm": 2.0118588296952047, - "learning_rate": 1.0467308699876922e-06, - "loss": 0.9507, - "step": 8894 - }, - { - "epoch": 0.6684954156019841, - "grad_norm": 1.7106127147630634, - "learning_rate": 1.0463029145224216e-06, - "loss": 1.0184, - "step": 8895 - }, - { - "epoch": 0.668570569667819, - "grad_norm": 1.4215905041136525, - "learning_rate": 1.0458750155673546e-06, - "loss": 1.0331, - "step": 8896 - }, - { - "epoch": 0.6686457237336539, - "grad_norm": 2.077794328352799, - "learning_rate": 1.0454471731478455e-06, - "loss": 0.93, - "step": 8897 - }, - { - "epoch": 0.668720877799489, - "grad_norm": 0.671589480213581, - "learning_rate": 1.0450193872892472e-06, - "loss": 0.8734, - "step": 8898 - }, - { - "epoch": 0.6687960318653239, - "grad_norm": 1.480742738679223, - "learning_rate": 1.0445916580169067e-06, - "loss": 0.9373, - "step": 8899 - }, - { - "epoch": 0.6688711859311589, - "grad_norm": 1.873446329460304, - "learning_rate": 1.0441639853561697e-06, - "loss": 0.9905, - "step": 8900 - }, - { - "epoch": 0.6689463399969938, - "grad_norm": 2.6066985880888276, - "learning_rate": 1.0437363693323765e-06, - "loss": 1.0388, - "step": 8901 - }, - { - "epoch": 0.6690214940628288, - "grad_norm": 2.0068752609818254, - "learning_rate": 1.0433088099708653e-06, - "loss": 0.8815, - "step": 8902 - }, - { - "epoch": 0.6690966481286638, - "grad_norm": 1.6607646231339857, - "learning_rate": 1.0428813072969704e-06, - "loss": 1.0287, - "step": 8903 - }, - { - "epoch": 0.6691718021944987, - "grad_norm": 1.5563610787945932, - "learning_rate": 1.0424538613360226e-06, - "loss": 0.97, - "step": 8904 - }, - { - "epoch": 0.6692469562603337, - "grad_norm": 1.5294614973656016, - "learning_rate": 1.0420264721133508e-06, - "loss": 1.004, - "step": 8905 - }, - { - "epoch": 0.6693221103261686, - "grad_norm": 1.5746621390161069, - "learning_rate": 1.041599139654279e-06, - "loss": 1.0383, - "step": 8906 - }, - { - "epoch": 0.6693972643920036, - "grad_norm": 1.8690867321571918, - "learning_rate": 1.041171863984128e-06, - "loss": 1.0478, - "step": 8907 - }, - { - "epoch": 0.6694724184578386, - "grad_norm": 1.8412369881330095, - "learning_rate": 1.040744645128216e-06, - "loss": 0.8364, - "step": 8908 - }, - { - "epoch": 0.6695475725236736, - "grad_norm": 0.7107504713933765, - "learning_rate": 1.0403174831118556e-06, - "loss": 0.8115, - "step": 8909 - }, - { - "epoch": 0.6696227265895085, - "grad_norm": 1.6799875390635168, - "learning_rate": 1.0398903779603604e-06, - "loss": 0.9422, - "step": 8910 - }, - { - "epoch": 0.6696978806553434, - "grad_norm": 1.93859573881924, - "learning_rate": 1.0394633296990364e-06, - "loss": 1.0343, - "step": 8911 - }, - { - "epoch": 0.6697730347211784, - "grad_norm": 2.395225337185116, - "learning_rate": 1.0390363383531888e-06, - "loss": 0.9513, - "step": 8912 - }, - { - "epoch": 0.6698481887870134, - "grad_norm": 3.8061273815028938, - "learning_rate": 1.0386094039481177e-06, - "loss": 0.9254, - "step": 8913 - }, - { - "epoch": 0.6699233428528484, - "grad_norm": 1.5275491639761576, - "learning_rate": 1.0381825265091197e-06, - "loss": 0.985, - "step": 8914 - }, - { - "epoch": 0.6699984969186833, - "grad_norm": 1.8821228157087027, - "learning_rate": 1.0377557060614913e-06, - "loss": 0.9292, - "step": 8915 - }, - { - "epoch": 0.6700736509845182, - "grad_norm": 2.669859097795481, - "learning_rate": 1.0373289426305211e-06, - "loss": 1.0151, - "step": 8916 - }, - { - "epoch": 0.6701488050503532, - "grad_norm": 1.737037196374371, - "learning_rate": 1.036902236241498e-06, - "loss": 1.0293, - "step": 8917 - }, - { - "epoch": 0.6702239591161882, - "grad_norm": 2.6542363331832637, - "learning_rate": 1.0364755869197064e-06, - "loss": 1.0191, - "step": 8918 - }, - { - "epoch": 0.6702991131820232, - "grad_norm": 1.7158168386685808, - "learning_rate": 1.0360489946904241e-06, - "loss": 1.0062, - "step": 8919 - }, - { - "epoch": 0.6703742672478581, - "grad_norm": 1.917822744868319, - "learning_rate": 1.0356224595789309e-06, - "loss": 0.9622, - "step": 8920 - }, - { - "epoch": 0.6704494213136931, - "grad_norm": 2.0670364659178913, - "learning_rate": 1.035195981610499e-06, - "loss": 1.0546, - "step": 8921 - }, - { - "epoch": 0.670524575379528, - "grad_norm": 1.5185443252258992, - "learning_rate": 1.0347695608104006e-06, - "loss": 0.964, - "step": 8922 - }, - { - "epoch": 0.670599729445363, - "grad_norm": 1.5578315419953062, - "learning_rate": 1.0343431972039017e-06, - "loss": 0.95, - "step": 8923 - }, - { - "epoch": 0.670674883511198, - "grad_norm": 1.6048372609235062, - "learning_rate": 1.0339168908162662e-06, - "loss": 0.9779, - "step": 8924 - }, - { - "epoch": 0.6707500375770329, - "grad_norm": 1.8384626189647062, - "learning_rate": 1.033490641672754e-06, - "loss": 0.9426, - "step": 8925 - }, - { - "epoch": 0.6708251916428679, - "grad_norm": 2.4292079047426367, - "learning_rate": 1.0330644497986218e-06, - "loss": 0.9141, - "step": 8926 - }, - { - "epoch": 0.6709003457087028, - "grad_norm": 0.8997144166524662, - "learning_rate": 1.0326383152191245e-06, - "loss": 0.8937, - "step": 8927 - }, - { - "epoch": 0.6709754997745379, - "grad_norm": 1.574983018450546, - "learning_rate": 1.0322122379595112e-06, - "loss": 1.0051, - "step": 8928 - }, - { - "epoch": 0.6710506538403728, - "grad_norm": 1.4492591379301016, - "learning_rate": 1.0317862180450285e-06, - "loss": 1.0121, - "step": 8929 - }, - { - "epoch": 0.6711258079062077, - "grad_norm": 1.9484748720502167, - "learning_rate": 1.03136025550092e-06, - "loss": 1.0205, - "step": 8930 - }, - { - "epoch": 0.6712009619720427, - "grad_norm": 1.9555793295063, - "learning_rate": 1.0309343503524248e-06, - "loss": 1.0128, - "step": 8931 - }, - { - "epoch": 0.6712761160378776, - "grad_norm": 1.7712830888649753, - "learning_rate": 1.030508502624781e-06, - "loss": 1.0176, - "step": 8932 - }, - { - "epoch": 0.6713512701037126, - "grad_norm": 1.914362732053698, - "learning_rate": 1.0300827123432206e-06, - "loss": 0.9271, - "step": 8933 - }, - { - "epoch": 0.6714264241695476, - "grad_norm": 1.4141345348240713, - "learning_rate": 1.0296569795329739e-06, - "loss": 0.9773, - "step": 8934 - }, - { - "epoch": 0.6715015782353825, - "grad_norm": 0.6228949143259532, - "learning_rate": 1.0292313042192664e-06, - "loss": 0.8154, - "step": 8935 - }, - { - "epoch": 0.6715767323012175, - "grad_norm": 1.4037120381196115, - "learning_rate": 1.0288056864273212e-06, - "loss": 0.9443, - "step": 8936 - }, - { - "epoch": 0.6716518863670524, - "grad_norm": 1.6536388326982312, - "learning_rate": 1.0283801261823587e-06, - "loss": 0.9926, - "step": 8937 - }, - { - "epoch": 0.6717270404328874, - "grad_norm": 2.2721964276856763, - "learning_rate": 1.027954623509593e-06, - "loss": 0.9389, - "step": 8938 - }, - { - "epoch": 0.6718021944987224, - "grad_norm": 1.680868682729175, - "learning_rate": 1.0275291784342394e-06, - "loss": 0.9782, - "step": 8939 - }, - { - "epoch": 0.6718773485645574, - "grad_norm": 1.4683592256189237, - "learning_rate": 1.0271037909815063e-06, - "loss": 0.9685, - "step": 8940 - }, - { - "epoch": 0.6719525026303923, - "grad_norm": 3.3678667485297056, - "learning_rate": 1.0266784611765976e-06, - "loss": 1.062, - "step": 8941 - }, - { - "epoch": 0.6720276566962272, - "grad_norm": 1.6515108358482158, - "learning_rate": 1.0262531890447182e-06, - "loss": 0.9236, - "step": 8942 - }, - { - "epoch": 0.6721028107620622, - "grad_norm": 1.4823479715654895, - "learning_rate": 1.025827974611065e-06, - "loss": 0.9422, - "step": 8943 - }, - { - "epoch": 0.6721779648278972, - "grad_norm": 1.5911236234406179, - "learning_rate": 1.0254028179008355e-06, - "loss": 0.9199, - "step": 8944 - }, - { - "epoch": 0.6722531188937322, - "grad_norm": 1.8177236066320483, - "learning_rate": 1.024977718939221e-06, - "loss": 0.8861, - "step": 8945 - }, - { - "epoch": 0.6723282729595671, - "grad_norm": 1.538252331649325, - "learning_rate": 1.0245526777514104e-06, - "loss": 1.0036, - "step": 8946 - }, - { - "epoch": 0.6724034270254021, - "grad_norm": 0.8912712563311243, - "learning_rate": 1.0241276943625887e-06, - "loss": 0.8382, - "step": 8947 - }, - { - "epoch": 0.672478581091237, - "grad_norm": 3.1799127372428777, - "learning_rate": 1.0237027687979371e-06, - "loss": 0.9149, - "step": 8948 - }, - { - "epoch": 0.672553735157072, - "grad_norm": 2.2333891501023007, - "learning_rate": 1.0232779010826361e-06, - "loss": 0.9903, - "step": 8949 - }, - { - "epoch": 0.672628889222907, - "grad_norm": 0.8043473645185408, - "learning_rate": 1.0228530912418594e-06, - "loss": 0.8426, - "step": 8950 - }, - { - "epoch": 0.6727040432887419, - "grad_norm": 1.8586526958391123, - "learning_rate": 1.0224283393007786e-06, - "loss": 1.0441, - "step": 8951 - }, - { - "epoch": 0.6727791973545769, - "grad_norm": 1.7311782243633633, - "learning_rate": 1.022003645284562e-06, - "loss": 0.9702, - "step": 8952 - }, - { - "epoch": 0.6728543514204118, - "grad_norm": 2.5364058931061764, - "learning_rate": 1.021579009218374e-06, - "loss": 1.0353, - "step": 8953 - }, - { - "epoch": 0.6729295054862469, - "grad_norm": 3.0542867646450675, - "learning_rate": 1.021154431127377e-06, - "loss": 0.9708, - "step": 8954 - }, - { - "epoch": 0.6730046595520818, - "grad_norm": 1.7595779708406016, - "learning_rate": 1.0207299110367282e-06, - "loss": 0.9523, - "step": 8955 - }, - { - "epoch": 0.6730798136179167, - "grad_norm": 1.8321378306800857, - "learning_rate": 1.020305448971582e-06, - "loss": 1.0337, - "step": 8956 - }, - { - "epoch": 0.6731549676837517, - "grad_norm": 1.5454471818728126, - "learning_rate": 1.0198810449570894e-06, - "loss": 0.9707, - "step": 8957 - }, - { - "epoch": 0.6732301217495866, - "grad_norm": 1.788076270518482, - "learning_rate": 1.0194566990183972e-06, - "loss": 0.9518, - "step": 8958 - }, - { - "epoch": 0.6733052758154217, - "grad_norm": 1.6448028412677478, - "learning_rate": 1.0190324111806514e-06, - "loss": 0.8904, - "step": 8959 - }, - { - "epoch": 0.6733804298812566, - "grad_norm": 1.5255603573742007, - "learning_rate": 1.0186081814689907e-06, - "loss": 0.987, - "step": 8960 - }, - { - "epoch": 0.6734555839470915, - "grad_norm": 1.46184695082784, - "learning_rate": 1.018184009908554e-06, - "loss": 0.9413, - "step": 8961 - }, - { - "epoch": 0.6735307380129265, - "grad_norm": 1.7902626110283157, - "learning_rate": 1.0177598965244744e-06, - "loss": 0.9147, - "step": 8962 - }, - { - "epoch": 0.6736058920787614, - "grad_norm": 1.980891135654531, - "learning_rate": 1.017335841341882e-06, - "loss": 0.9041, - "step": 8963 - }, - { - "epoch": 0.6736810461445965, - "grad_norm": 1.7498545586229783, - "learning_rate": 1.0169118443859037e-06, - "loss": 0.9434, - "step": 8964 - }, - { - "epoch": 0.6737562002104314, - "grad_norm": 1.6737778432409034, - "learning_rate": 1.0164879056816627e-06, - "loss": 1.0267, - "step": 8965 - }, - { - "epoch": 0.6738313542762664, - "grad_norm": 1.441315356027483, - "learning_rate": 1.01606402525428e-06, - "loss": 1.0355, - "step": 8966 - }, - { - "epoch": 0.6739065083421013, - "grad_norm": 4.81688322132637, - "learning_rate": 1.0156402031288717e-06, - "loss": 0.9604, - "step": 8967 - }, - { - "epoch": 0.6739816624079362, - "grad_norm": 1.7353847416768702, - "learning_rate": 1.0152164393305506e-06, - "loss": 0.9917, - "step": 8968 - }, - { - "epoch": 0.6740568164737712, - "grad_norm": 1.6082898527734537, - "learning_rate": 1.0147927338844267e-06, - "loss": 1.0065, - "step": 8969 - }, - { - "epoch": 0.6741319705396062, - "grad_norm": 1.9477258136566158, - "learning_rate": 1.014369086815605e-06, - "loss": 0.9608, - "step": 8970 - }, - { - "epoch": 0.6742071246054412, - "grad_norm": 1.7403329463832395, - "learning_rate": 1.0139454981491898e-06, - "loss": 1.0512, - "step": 8971 - }, - { - "epoch": 0.6742822786712761, - "grad_norm": 1.6693685406810201, - "learning_rate": 1.0135219679102797e-06, - "loss": 1.0182, - "step": 8972 - }, - { - "epoch": 0.6743574327371111, - "grad_norm": 1.680206715377467, - "learning_rate": 1.0130984961239706e-06, - "loss": 0.8589, - "step": 8973 - }, - { - "epoch": 0.674432586802946, - "grad_norm": 1.2793064262674223, - "learning_rate": 1.0126750828153546e-06, - "loss": 0.9489, - "step": 8974 - }, - { - "epoch": 0.674507740868781, - "grad_norm": 1.6703642677442507, - "learning_rate": 1.01225172800952e-06, - "loss": 1.0107, - "step": 8975 - }, - { - "epoch": 0.674582894934616, - "grad_norm": 2.3939884435554815, - "learning_rate": 1.0118284317315535e-06, - "loss": 1.0243, - "step": 8976 - }, - { - "epoch": 0.6746580490004509, - "grad_norm": 2.7427954366674094, - "learning_rate": 1.0114051940065365e-06, - "loss": 0.9256, - "step": 8977 - }, - { - "epoch": 0.6747332030662859, - "grad_norm": 1.8408822208588518, - "learning_rate": 1.0109820148595473e-06, - "loss": 1.1711, - "step": 8978 - }, - { - "epoch": 0.6748083571321208, - "grad_norm": 1.7151566135308456, - "learning_rate": 1.010558894315661e-06, - "loss": 0.9564, - "step": 8979 - }, - { - "epoch": 0.6748835111979558, - "grad_norm": 1.5014832633524635, - "learning_rate": 1.010135832399948e-06, - "loss": 0.9594, - "step": 8980 - }, - { - "epoch": 0.6749586652637908, - "grad_norm": 1.8077802043591884, - "learning_rate": 1.0097128291374786e-06, - "loss": 0.9741, - "step": 8981 - }, - { - "epoch": 0.6750338193296257, - "grad_norm": 1.7032796806998005, - "learning_rate": 1.009289884553315e-06, - "loss": 0.8631, - "step": 8982 - }, - { - "epoch": 0.6751089733954607, - "grad_norm": 2.0952812402997716, - "learning_rate": 1.0088669986725201e-06, - "loss": 1.0268, - "step": 8983 - }, - { - "epoch": 0.6751841274612956, - "grad_norm": 2.0628866353472235, - "learning_rate": 1.0084441715201513e-06, - "loss": 0.8886, - "step": 8984 - }, - { - "epoch": 0.6752592815271307, - "grad_norm": 1.6785776419365162, - "learning_rate": 1.008021403121262e-06, - "loss": 1.0088, - "step": 8985 - }, - { - "epoch": 0.6753344355929656, - "grad_norm": 1.7271776441248377, - "learning_rate": 1.0075986935009028e-06, - "loss": 0.8611, - "step": 8986 - }, - { - "epoch": 0.6754095896588005, - "grad_norm": 1.7014066794713087, - "learning_rate": 1.0071760426841208e-06, - "loss": 0.9005, - "step": 8987 - }, - { - "epoch": 0.6754847437246355, - "grad_norm": 1.928861433507405, - "learning_rate": 1.0067534506959608e-06, - "loss": 0.9743, - "step": 8988 - }, - { - "epoch": 0.6755598977904704, - "grad_norm": 2.675192574672798, - "learning_rate": 1.006330917561462e-06, - "loss": 0.8583, - "step": 8989 - }, - { - "epoch": 0.6756350518563055, - "grad_norm": 1.5999575132451027, - "learning_rate": 1.0059084433056616e-06, - "loss": 0.9848, - "step": 8990 - }, - { - "epoch": 0.6757102059221404, - "grad_norm": 1.9307894012410003, - "learning_rate": 1.0054860279535922e-06, - "loss": 0.9135, - "step": 8991 - }, - { - "epoch": 0.6757853599879754, - "grad_norm": 1.835938603826962, - "learning_rate": 1.0050636715302837e-06, - "loss": 0.9977, - "step": 8992 - }, - { - "epoch": 0.6758605140538103, - "grad_norm": 1.5266522259147237, - "learning_rate": 1.0046413740607626e-06, - "loss": 0.9641, - "step": 8993 - }, - { - "epoch": 0.6759356681196452, - "grad_norm": 1.8080003065514094, - "learning_rate": 1.004219135570052e-06, - "loss": 1.0046, - "step": 8994 - }, - { - "epoch": 0.6760108221854803, - "grad_norm": 1.8620045764674276, - "learning_rate": 1.0037969560831708e-06, - "loss": 0.9939, - "step": 8995 - }, - { - "epoch": 0.6760859762513152, - "grad_norm": 3.2829226635583093, - "learning_rate": 1.0033748356251343e-06, - "loss": 0.9331, - "step": 8996 - }, - { - "epoch": 0.6761611303171502, - "grad_norm": 1.5712553012366988, - "learning_rate": 1.0029527742209547e-06, - "loss": 0.9324, - "step": 8997 - }, - { - "epoch": 0.6762362843829851, - "grad_norm": 1.6850225134684065, - "learning_rate": 1.0025307718956417e-06, - "loss": 1.0645, - "step": 8998 - }, - { - "epoch": 0.6763114384488201, - "grad_norm": 1.7840624908057043, - "learning_rate": 1.0021088286742003e-06, - "loss": 0.9647, - "step": 8999 - }, - { - "epoch": 0.676386592514655, - "grad_norm": 2.5946380749460447, - "learning_rate": 1.0016869445816318e-06, - "loss": 1.0445, - "step": 9000 - }, - { - "epoch": 0.67646174658049, - "grad_norm": 3.8779268593779035, - "learning_rate": 1.0012651196429347e-06, - "loss": 0.9037, - "step": 9001 - }, - { - "epoch": 0.676536900646325, - "grad_norm": 2.583139801979805, - "learning_rate": 1.0008433538831028e-06, - "loss": 0.9834, - "step": 9002 - }, - { - "epoch": 0.6766120547121599, - "grad_norm": 2.022150285661655, - "learning_rate": 1.000421647327129e-06, - "loss": 1.0109, - "step": 9003 - }, - { - "epoch": 0.6766872087779949, - "grad_norm": 1.699832919268884, - "learning_rate": 9.999999999999995e-07, - "loss": 0.9825, - "step": 9004 - }, - { - "epoch": 0.6767623628438298, - "grad_norm": 1.392198936338365, - "learning_rate": 9.995784119267e-07, - "loss": 0.9446, - "step": 9005 - }, - { - "epoch": 0.6768375169096648, - "grad_norm": 1.914036840150069, - "learning_rate": 9.991568831322105e-07, - "loss": 0.9031, - "step": 9006 - }, - { - "epoch": 0.6769126709754998, - "grad_norm": 1.6200247675568293, - "learning_rate": 9.987354136415083e-07, - "loss": 0.9394, - "step": 9007 - }, - { - "epoch": 0.6769878250413347, - "grad_norm": 2.348612103598929, - "learning_rate": 9.983140034795667e-07, - "loss": 1.0444, - "step": 9008 - }, - { - "epoch": 0.6770629791071697, - "grad_norm": 1.8739441759961144, - "learning_rate": 9.978926526713556e-07, - "loss": 0.8305, - "step": 9009 - }, - { - "epoch": 0.6771381331730046, - "grad_norm": 1.5581790687920798, - "learning_rate": 9.974713612418427e-07, - "loss": 0.9123, - "step": 9010 - }, - { - "epoch": 0.6772132872388397, - "grad_norm": 1.4578628416865176, - "learning_rate": 9.970501292159904e-07, - "loss": 0.8944, - "step": 9011 - }, - { - "epoch": 0.6772884413046746, - "grad_norm": 1.6018606513158786, - "learning_rate": 9.96628956618759e-07, - "loss": 0.9921, - "step": 9012 - }, - { - "epoch": 0.6773635953705095, - "grad_norm": 1.7319647047089453, - "learning_rate": 9.962078434751038e-07, - "loss": 0.8761, - "step": 9013 - }, - { - "epoch": 0.6774387494363445, - "grad_norm": 0.8606898219810157, - "learning_rate": 9.957867898099768e-07, - "loss": 0.9374, - "step": 9014 - }, - { - "epoch": 0.6775139035021794, - "grad_norm": 3.5064944911311366, - "learning_rate": 9.953657956483287e-07, - "loss": 1.0573, - "step": 9015 - }, - { - "epoch": 0.6775890575680145, - "grad_norm": 1.9292848607129651, - "learning_rate": 9.949448610151043e-07, - "loss": 0.9596, - "step": 9016 - }, - { - "epoch": 0.6776642116338494, - "grad_norm": 1.4684654122456102, - "learning_rate": 9.945239859352455e-07, - "loss": 0.9457, - "step": 9017 - }, - { - "epoch": 0.6777393656996844, - "grad_norm": 1.9858531702896831, - "learning_rate": 9.941031704336908e-07, - "loss": 1.0558, - "step": 9018 - }, - { - "epoch": 0.6778145197655193, - "grad_norm": 2.0240449325037093, - "learning_rate": 9.936824145353742e-07, - "loss": 1.0283, - "step": 9019 - }, - { - "epoch": 0.6778896738313542, - "grad_norm": 1.6814954387093624, - "learning_rate": 9.932617182652288e-07, - "loss": 0.9822, - "step": 9020 - }, - { - "epoch": 0.6779648278971893, - "grad_norm": 1.8485279567380206, - "learning_rate": 9.928410816481808e-07, - "loss": 1.0171, - "step": 9021 - }, - { - "epoch": 0.6780399819630242, - "grad_norm": 1.758189928013333, - "learning_rate": 9.924205047091572e-07, - "loss": 0.9697, - "step": 9022 - }, - { - "epoch": 0.6781151360288592, - "grad_norm": 1.8646389475223641, - "learning_rate": 9.91999987473076e-07, - "loss": 1.0067, - "step": 9023 - }, - { - "epoch": 0.6781902900946941, - "grad_norm": 3.201281654472503, - "learning_rate": 9.915795299648545e-07, - "loss": 1.0795, - "step": 9024 - }, - { - "epoch": 0.678265444160529, - "grad_norm": 1.6320498935187895, - "learning_rate": 9.911591322094085e-07, - "loss": 0.9644, - "step": 9025 - }, - { - "epoch": 0.678340598226364, - "grad_norm": 1.3808210440445459, - "learning_rate": 9.90738794231646e-07, - "loss": 0.9785, - "step": 9026 - }, - { - "epoch": 0.678415752292199, - "grad_norm": 1.7360830745552478, - "learning_rate": 9.903185160564756e-07, - "loss": 0.959, - "step": 9027 - }, - { - "epoch": 0.678490906358034, - "grad_norm": 2.118573400288715, - "learning_rate": 9.89898297708799e-07, - "loss": 1.1084, - "step": 9028 - }, - { - "epoch": 0.6785660604238689, - "grad_norm": 1.8993747108913652, - "learning_rate": 9.894781392135167e-07, - "loss": 0.9656, - "step": 9029 - }, - { - "epoch": 0.6786412144897039, - "grad_norm": 2.473589246556742, - "learning_rate": 9.89058040595524e-07, - "loss": 0.9862, - "step": 9030 - }, - { - "epoch": 0.6787163685555389, - "grad_norm": 1.5966619181446244, - "learning_rate": 9.88638001879713e-07, - "loss": 0.9145, - "step": 9031 - }, - { - "epoch": 0.6787915226213738, - "grad_norm": 2.3612823860528307, - "learning_rate": 9.882180230909736e-07, - "loss": 1.0433, - "step": 9032 - }, - { - "epoch": 0.6788666766872088, - "grad_norm": 1.7071619049281945, - "learning_rate": 9.877981042541908e-07, - "loss": 1.0876, - "step": 9033 - }, - { - "epoch": 0.6789418307530437, - "grad_norm": 11.55298317700177, - "learning_rate": 9.873782453942462e-07, - "loss": 0.9652, - "step": 9034 - }, - { - "epoch": 0.6790169848188787, - "grad_norm": 1.4792246560921427, - "learning_rate": 9.869584465360184e-07, - "loss": 0.9748, - "step": 9035 - }, - { - "epoch": 0.6790921388847136, - "grad_norm": 1.71850424656858, - "learning_rate": 9.86538707704381e-07, - "loss": 0.9686, - "step": 9036 - }, - { - "epoch": 0.6791672929505487, - "grad_norm": 4.6945320788348, - "learning_rate": 9.861190289242067e-07, - "loss": 0.9886, - "step": 9037 - }, - { - "epoch": 0.6792424470163836, - "grad_norm": 1.9508366721655774, - "learning_rate": 9.856994102203623e-07, - "loss": 0.9203, - "step": 9038 - }, - { - "epoch": 0.6793176010822185, - "grad_norm": 1.8316375374437481, - "learning_rate": 9.852798516177119e-07, - "loss": 0.9793, - "step": 9039 - }, - { - "epoch": 0.6793927551480535, - "grad_norm": 1.3006122926149009, - "learning_rate": 9.848603531411159e-07, - "loss": 1.0349, - "step": 9040 - }, - { - "epoch": 0.6794679092138884, - "grad_norm": 1.719245769784529, - "learning_rate": 9.844409148154304e-07, - "loss": 0.9841, - "step": 9041 - }, - { - "epoch": 0.6795430632797235, - "grad_norm": 1.6088653053630007, - "learning_rate": 9.840215366655104e-07, - "loss": 0.9363, - "step": 9042 - }, - { - "epoch": 0.6796182173455584, - "grad_norm": 1.6534464157691795, - "learning_rate": 9.83602218716204e-07, - "loss": 0.9508, - "step": 9043 - }, - { - "epoch": 0.6796933714113934, - "grad_norm": 2.0850239442689977, - "learning_rate": 9.831829609923596e-07, - "loss": 0.9489, - "step": 9044 - }, - { - "epoch": 0.6797685254772283, - "grad_norm": 1.5525653099291699, - "learning_rate": 9.82763763518818e-07, - "loss": 0.8756, - "step": 9045 - }, - { - "epoch": 0.6798436795430632, - "grad_norm": 0.7871157738210165, - "learning_rate": 9.823446263204175e-07, - "loss": 0.9306, - "step": 9046 - }, - { - "epoch": 0.6799188336088983, - "grad_norm": 11.121113644827656, - "learning_rate": 9.819255494219957e-07, - "loss": 1.0359, - "step": 9047 - }, - { - "epoch": 0.6799939876747332, - "grad_norm": 5.64291850879268, - "learning_rate": 9.815065328483827e-07, - "loss": 0.9978, - "step": 9048 - }, - { - "epoch": 0.6800691417405682, - "grad_norm": 1.3205162658556244, - "learning_rate": 9.810875766244086e-07, - "loss": 1.0415, - "step": 9049 - }, - { - "epoch": 0.6801442958064031, - "grad_norm": 1.8880222153932622, - "learning_rate": 9.806686807748972e-07, - "loss": 0.9279, - "step": 9050 - }, - { - "epoch": 0.680219449872238, - "grad_norm": 1.7347405538140974, - "learning_rate": 9.802498453246697e-07, - "loss": 1.02, - "step": 9051 - }, - { - "epoch": 0.6802946039380731, - "grad_norm": 1.5576596627976251, - "learning_rate": 9.79831070298544e-07, - "loss": 0.9118, - "step": 9052 - }, - { - "epoch": 0.680369758003908, - "grad_norm": 1.8197934962064923, - "learning_rate": 9.794123557213328e-07, - "loss": 1.0032, - "step": 9053 - }, - { - "epoch": 0.680444912069743, - "grad_norm": 0.766025049442439, - "learning_rate": 9.789937016178485e-07, - "loss": 0.8903, - "step": 9054 - }, - { - "epoch": 0.6805200661355779, - "grad_norm": 1.40626593686716, - "learning_rate": 9.78575108012897e-07, - "loss": 1.0694, - "step": 9055 - }, - { - "epoch": 0.6805952202014129, - "grad_norm": 4.169772986583749, - "learning_rate": 9.781565749312816e-07, - "loss": 1.0082, - "step": 9056 - }, - { - "epoch": 0.6806703742672479, - "grad_norm": 2.0391411894014895, - "learning_rate": 9.777381023978022e-07, - "loss": 0.9678, - "step": 9057 - }, - { - "epoch": 0.6807455283330828, - "grad_norm": 1.4520980323401458, - "learning_rate": 9.773196904372539e-07, - "loss": 0.9623, - "step": 9058 - }, - { - "epoch": 0.6808206823989178, - "grad_norm": 1.8836489517053059, - "learning_rate": 9.769013390744307e-07, - "loss": 0.9767, - "step": 9059 - }, - { - "epoch": 0.6808958364647527, - "grad_norm": 2.475274458030488, - "learning_rate": 9.76483048334121e-07, - "loss": 0.9041, - "step": 9060 - }, - { - "epoch": 0.6809709905305877, - "grad_norm": 1.6625378568540996, - "learning_rate": 9.760648182411102e-07, - "loss": 0.9206, - "step": 9061 - }, - { - "epoch": 0.6810461445964227, - "grad_norm": 2.0240715535750455, - "learning_rate": 9.756466488201795e-07, - "loss": 0.8927, - "step": 9062 - }, - { - "epoch": 0.6811212986622577, - "grad_norm": 1.6642835586646156, - "learning_rate": 9.752285400961067e-07, - "loss": 0.9787, - "step": 9063 - }, - { - "epoch": 0.6811964527280926, - "grad_norm": 1.7029119935829902, - "learning_rate": 9.748104920936678e-07, - "loss": 0.9374, - "step": 9064 - }, - { - "epoch": 0.6812716067939275, - "grad_norm": 2.1599225819806938, - "learning_rate": 9.743925048376322e-07, - "loss": 0.9517, - "step": 9065 - }, - { - "epoch": 0.6813467608597625, - "grad_norm": 1.6644652689607369, - "learning_rate": 9.739745783527695e-07, - "loss": 0.9978, - "step": 9066 - }, - { - "epoch": 0.6814219149255974, - "grad_norm": 1.7945624023709563, - "learning_rate": 9.73556712663841e-07, - "loss": 0.9174, - "step": 9067 - }, - { - "epoch": 0.6814970689914325, - "grad_norm": 1.7195353707693553, - "learning_rate": 9.731389077956073e-07, - "loss": 1.0229, - "step": 9068 - }, - { - "epoch": 0.6815722230572674, - "grad_norm": 3.5994106658053457, - "learning_rate": 9.727211637728261e-07, - "loss": 1.0683, - "step": 9069 - }, - { - "epoch": 0.6816473771231023, - "grad_norm": 1.8296309569081308, - "learning_rate": 9.72303480620249e-07, - "loss": 0.9665, - "step": 9070 - }, - { - "epoch": 0.6817225311889373, - "grad_norm": 1.5679710734093086, - "learning_rate": 9.718858583626266e-07, - "loss": 0.9702, - "step": 9071 - }, - { - "epoch": 0.6817976852547722, - "grad_norm": 3.5535230804462437, - "learning_rate": 9.714682970247042e-07, - "loss": 0.9799, - "step": 9072 - }, - { - "epoch": 0.6818728393206073, - "grad_norm": 1.4605560620761975, - "learning_rate": 9.710507966312233e-07, - "loss": 0.9741, - "step": 9073 - }, - { - "epoch": 0.6819479933864422, - "grad_norm": 4.522334094359985, - "learning_rate": 9.706333572069232e-07, - "loss": 1.0197, - "step": 9074 - }, - { - "epoch": 0.6820231474522772, - "grad_norm": 1.7696857174448921, - "learning_rate": 9.702159787765376e-07, - "loss": 0.914, - "step": 9075 - }, - { - "epoch": 0.6820983015181121, - "grad_norm": 1.9004972610273247, - "learning_rate": 9.697986613647992e-07, - "loss": 1.0374, - "step": 9076 - }, - { - "epoch": 0.682173455583947, - "grad_norm": 1.8840313523755408, - "learning_rate": 9.69381404996435e-07, - "loss": 0.9783, - "step": 9077 - }, - { - "epoch": 0.6822486096497821, - "grad_norm": 1.5530027529464265, - "learning_rate": 9.689642096961692e-07, - "loss": 0.9286, - "step": 9078 - }, - { - "epoch": 0.682323763715617, - "grad_norm": 1.6979799161336573, - "learning_rate": 9.68547075488722e-07, - "loss": 0.949, - "step": 9079 - }, - { - "epoch": 0.682398917781452, - "grad_norm": 3.551879041657035, - "learning_rate": 9.681300023988095e-07, - "loss": 0.9217, - "step": 9080 - }, - { - "epoch": 0.6824740718472869, - "grad_norm": 2.008668116079762, - "learning_rate": 9.677129904511462e-07, - "loss": 0.951, - "step": 9081 - }, - { - "epoch": 0.682549225913122, - "grad_norm": 2.209201630045027, - "learning_rate": 9.672960396704416e-07, - "loss": 0.9925, - "step": 9082 - }, - { - "epoch": 0.6826243799789569, - "grad_norm": 1.7815761602172782, - "learning_rate": 9.668791500814007e-07, - "loss": 0.9073, - "step": 9083 - }, - { - "epoch": 0.6826995340447918, - "grad_norm": 2.537687428134158, - "learning_rate": 9.664623217087264e-07, - "loss": 0.9361, - "step": 9084 - }, - { - "epoch": 0.6827746881106268, - "grad_norm": 0.8290882985701941, - "learning_rate": 9.660455545771164e-07, - "loss": 0.9116, - "step": 9085 - }, - { - "epoch": 0.6828498421764617, - "grad_norm": 1.637323308832212, - "learning_rate": 9.656288487112673e-07, - "loss": 0.9755, - "step": 9086 - }, - { - "epoch": 0.6829249962422967, - "grad_norm": 2.0475643249393665, - "learning_rate": 9.652122041358693e-07, - "loss": 0.8378, - "step": 9087 - }, - { - "epoch": 0.6830001503081317, - "grad_norm": 2.988661000176268, - "learning_rate": 9.647956208756113e-07, - "loss": 1.0368, - "step": 9088 - }, - { - "epoch": 0.6830753043739667, - "grad_norm": 2.810797197064584, - "learning_rate": 9.643790989551775e-07, - "loss": 1.0482, - "step": 9089 - }, - { - "epoch": 0.6831504584398016, - "grad_norm": 1.5750563596299474, - "learning_rate": 9.63962638399246e-07, - "loss": 0.9222, - "step": 9090 - }, - { - "epoch": 0.6832256125056365, - "grad_norm": 2.040894251960833, - "learning_rate": 9.635462392324967e-07, - "loss": 1.0203, - "step": 9091 - }, - { - "epoch": 0.6833007665714715, - "grad_norm": 4.079105887708684, - "learning_rate": 9.631299014796003e-07, - "loss": 0.9909, - "step": 9092 - }, - { - "epoch": 0.6833759206373065, - "grad_norm": 1.5765461341454583, - "learning_rate": 9.62713625165229e-07, - "loss": 0.9251, - "step": 9093 - }, - { - "epoch": 0.6834510747031415, - "grad_norm": 2.1257243324407136, - "learning_rate": 9.622974103140468e-07, - "loss": 0.9438, - "step": 9094 - }, - { - "epoch": 0.6835262287689764, - "grad_norm": 4.846283477754158, - "learning_rate": 9.61881256950717e-07, - "loss": 0.8867, - "step": 9095 - }, - { - "epoch": 0.6836013828348113, - "grad_norm": 2.106398226901886, - "learning_rate": 9.614651650998982e-07, - "loss": 1.0481, - "step": 9096 - }, - { - "epoch": 0.6836765369006463, - "grad_norm": 2.8487846326208377, - "learning_rate": 9.610491347862439e-07, - "loss": 0.9379, - "step": 9097 - }, - { - "epoch": 0.6837516909664813, - "grad_norm": 2.305220099509771, - "learning_rate": 9.60633166034408e-07, - "loss": 0.9813, - "step": 9098 - }, - { - "epoch": 0.6838268450323163, - "grad_norm": 1.5759561573068643, - "learning_rate": 9.602172588690368e-07, - "loss": 0.9378, - "step": 9099 - }, - { - "epoch": 0.6839019990981512, - "grad_norm": 2.5048280825222204, - "learning_rate": 9.598014133147744e-07, - "loss": 0.8762, - "step": 9100 - }, - { - "epoch": 0.6839771531639862, - "grad_norm": 2.3135730212848036, - "learning_rate": 9.593856293962619e-07, - "loss": 0.9082, - "step": 9101 - }, - { - "epoch": 0.6840523072298211, - "grad_norm": 1.5187502229164493, - "learning_rate": 9.589699071381346e-07, - "loss": 1.013, - "step": 9102 - }, - { - "epoch": 0.684127461295656, - "grad_norm": 4.521189287414522, - "learning_rate": 9.585542465650274e-07, - "loss": 1.0317, - "step": 9103 - }, - { - "epoch": 0.6842026153614911, - "grad_norm": 1.7368118075872139, - "learning_rate": 9.581386477015691e-07, - "loss": 0.8697, - "step": 9104 - }, - { - "epoch": 0.684277769427326, - "grad_norm": 1.8361687697172637, - "learning_rate": 9.577231105723856e-07, - "loss": 0.9712, - "step": 9105 - }, - { - "epoch": 0.684352923493161, - "grad_norm": 2.8560945631341506, - "learning_rate": 9.573076352020989e-07, - "loss": 1.0599, - "step": 9106 - }, - { - "epoch": 0.6844280775589959, - "grad_norm": 1.4941901703642053, - "learning_rate": 9.568922216153266e-07, - "loss": 0.9725, - "step": 9107 - }, - { - "epoch": 0.684503231624831, - "grad_norm": 3.8311850014877904, - "learning_rate": 9.564768698366855e-07, - "loss": 1.0082, - "step": 9108 - }, - { - "epoch": 0.6845783856906659, - "grad_norm": 1.379129969789569, - "learning_rate": 9.560615798907849e-07, - "loss": 0.9176, - "step": 9109 - }, - { - "epoch": 0.6846535397565008, - "grad_norm": 1.619483828551913, - "learning_rate": 9.55646351802234e-07, - "loss": 0.9119, - "step": 9110 - }, - { - "epoch": 0.6847286938223358, - "grad_norm": 1.884950176157306, - "learning_rate": 9.552311855956364e-07, - "loss": 1.0344, - "step": 9111 - }, - { - "epoch": 0.6848038478881707, - "grad_norm": 2.1987827878484105, - "learning_rate": 9.548160812955905e-07, - "loss": 0.9178, - "step": 9112 - }, - { - "epoch": 0.6848790019540058, - "grad_norm": 2.273417679218526, - "learning_rate": 9.544010389266948e-07, - "loss": 1.0035, - "step": 9113 - }, - { - "epoch": 0.6849541560198407, - "grad_norm": 2.0422769399961145, - "learning_rate": 9.539860585135405e-07, - "loss": 1.0369, - "step": 9114 - }, - { - "epoch": 0.6850293100856756, - "grad_norm": 2.344115165237824, - "learning_rate": 9.535711400807185e-07, - "loss": 0.9685, - "step": 9115 - }, - { - "epoch": 0.6851044641515106, - "grad_norm": 1.8629532691307873, - "learning_rate": 9.531562836528135e-07, - "loss": 0.9746, - "step": 9116 - }, - { - "epoch": 0.6851796182173455, - "grad_norm": 1.6661859136850252, - "learning_rate": 9.527414892544075e-07, - "loss": 0.818, - "step": 9117 - }, - { - "epoch": 0.6852547722831805, - "grad_norm": 1.3275724720255084, - "learning_rate": 9.523267569100784e-07, - "loss": 0.9939, - "step": 9118 - }, - { - "epoch": 0.6853299263490155, - "grad_norm": 1.8317867677147572, - "learning_rate": 9.519120866443997e-07, - "loss": 1.0296, - "step": 9119 - }, - { - "epoch": 0.6854050804148505, - "grad_norm": 2.033564144333754, - "learning_rate": 9.514974784819443e-07, - "loss": 0.9822, - "step": 9120 - }, - { - "epoch": 0.6854802344806854, - "grad_norm": 1.962453491728937, - "learning_rate": 9.510829324472782e-07, - "loss": 1.0299, - "step": 9121 - }, - { - "epoch": 0.6855553885465203, - "grad_norm": 1.6587843308852361, - "learning_rate": 9.50668448564965e-07, - "loss": 0.9222, - "step": 9122 - }, - { - "epoch": 0.6856305426123553, - "grad_norm": 1.8996652785148487, - "learning_rate": 9.502540268595645e-07, - "loss": 1.0088, - "step": 9123 - }, - { - "epoch": 0.6857056966781903, - "grad_norm": 2.254581978193036, - "learning_rate": 9.498396673556317e-07, - "loss": 0.896, - "step": 9124 - }, - { - "epoch": 0.6857808507440253, - "grad_norm": 1.3766244915481802, - "learning_rate": 9.494253700777207e-07, - "loss": 0.9703, - "step": 9125 - }, - { - "epoch": 0.6858560048098602, - "grad_norm": 1.6021854584697457, - "learning_rate": 9.490111350503793e-07, - "loss": 0.9393, - "step": 9126 - }, - { - "epoch": 0.6859311588756952, - "grad_norm": 1.7057911713568372, - "learning_rate": 9.485969622981528e-07, - "loss": 0.9893, - "step": 9127 - }, - { - "epoch": 0.6860063129415301, - "grad_norm": 2.1582017993491402, - "learning_rate": 9.481828518455825e-07, - "loss": 0.9374, - "step": 9128 - }, - { - "epoch": 0.686081467007365, - "grad_norm": 2.5661487188852723, - "learning_rate": 9.477688037172051e-07, - "loss": 0.9798, - "step": 9129 - }, - { - "epoch": 0.6861566210732001, - "grad_norm": 1.9733960265862962, - "learning_rate": 9.473548179375561e-07, - "loss": 0.9572, - "step": 9130 - }, - { - "epoch": 0.686231775139035, - "grad_norm": 1.6673361943441678, - "learning_rate": 9.469408945311641e-07, - "loss": 0.914, - "step": 9131 - }, - { - "epoch": 0.68630692920487, - "grad_norm": 2.6571182627059744, - "learning_rate": 9.465270335225575e-07, - "loss": 1.0916, - "step": 9132 - }, - { - "epoch": 0.6863820832707049, - "grad_norm": 2.8165435545912776, - "learning_rate": 9.46113234936258e-07, - "loss": 0.9881, - "step": 9133 - }, - { - "epoch": 0.68645723733654, - "grad_norm": 1.5577164477826204, - "learning_rate": 9.45699498796785e-07, - "loss": 0.9399, - "step": 9134 - }, - { - "epoch": 0.6865323914023749, - "grad_norm": 2.211388100580805, - "learning_rate": 9.452858251286537e-07, - "loss": 0.9409, - "step": 9135 - }, - { - "epoch": 0.6866075454682098, - "grad_norm": 1.7691293558986074, - "learning_rate": 9.448722139563756e-07, - "loss": 0.8662, - "step": 9136 - }, - { - "epoch": 0.6866826995340448, - "grad_norm": 2.4205165868110856, - "learning_rate": 9.444586653044597e-07, - "loss": 0.9309, - "step": 9137 - }, - { - "epoch": 0.6867578535998797, - "grad_norm": 2.7756897283959447, - "learning_rate": 9.4404517919741e-07, - "loss": 1.0014, - "step": 9138 - }, - { - "epoch": 0.6868330076657148, - "grad_norm": 0.7477596518385601, - "learning_rate": 9.436317556597269e-07, - "loss": 0.8417, - "step": 9139 - }, - { - "epoch": 0.6869081617315497, - "grad_norm": 3.9541676469378904, - "learning_rate": 9.432183947159071e-07, - "loss": 1.0291, - "step": 9140 - }, - { - "epoch": 0.6869833157973846, - "grad_norm": 1.3043528601646028, - "learning_rate": 9.428050963904437e-07, - "loss": 1.0356, - "step": 9141 - }, - { - "epoch": 0.6870584698632196, - "grad_norm": 2.279668390352137, - "learning_rate": 9.423918607078272e-07, - "loss": 1.0024, - "step": 9142 - }, - { - "epoch": 0.6871336239290545, - "grad_norm": 1.838164452029213, - "learning_rate": 9.419786876925428e-07, - "loss": 1.0037, - "step": 9143 - }, - { - "epoch": 0.6872087779948896, - "grad_norm": 0.7248875991244718, - "learning_rate": 9.415655773690727e-07, - "loss": 0.8263, - "step": 9144 - }, - { - "epoch": 0.6872839320607245, - "grad_norm": 1.6342322642796743, - "learning_rate": 9.41152529761895e-07, - "loss": 0.9224, - "step": 9145 - }, - { - "epoch": 0.6873590861265595, - "grad_norm": 1.3009673664184347, - "learning_rate": 9.40739544895484e-07, - "loss": 0.8924, - "step": 9146 - }, - { - "epoch": 0.6874342401923944, - "grad_norm": 2.1903182267952177, - "learning_rate": 9.403266227943116e-07, - "loss": 0.8992, - "step": 9147 - }, - { - "epoch": 0.6875093942582293, - "grad_norm": 2.426303973061917, - "learning_rate": 9.399137634828447e-07, - "loss": 0.8228, - "step": 9148 - }, - { - "epoch": 0.6875845483240643, - "grad_norm": 4.990575300242706, - "learning_rate": 9.395009669855467e-07, - "loss": 1.0066, - "step": 9149 - }, - { - "epoch": 0.6876597023898993, - "grad_norm": 0.7771221592657782, - "learning_rate": 9.390882333268772e-07, - "loss": 0.8464, - "step": 9150 - }, - { - "epoch": 0.6877348564557343, - "grad_norm": 1.6804577857687995, - "learning_rate": 9.386755625312919e-07, - "loss": 0.9317, - "step": 9151 - }, - { - "epoch": 0.6878100105215692, - "grad_norm": 1.931645253545232, - "learning_rate": 9.382629546232442e-07, - "loss": 0.9621, - "step": 9152 - }, - { - "epoch": 0.6878851645874042, - "grad_norm": 1.79507595060962, - "learning_rate": 9.37850409627181e-07, - "loss": 0.9477, - "step": 9153 - }, - { - "epoch": 0.6879603186532391, - "grad_norm": 0.6957880601221985, - "learning_rate": 9.374379275675495e-07, - "loss": 0.8347, - "step": 9154 - }, - { - "epoch": 0.6880354727190741, - "grad_norm": 2.186534559554674, - "learning_rate": 9.370255084687895e-07, - "loss": 0.9074, - "step": 9155 - }, - { - "epoch": 0.6881106267849091, - "grad_norm": 2.102093937837325, - "learning_rate": 9.366131523553385e-07, - "loss": 0.897, - "step": 9156 - }, - { - "epoch": 0.688185780850744, - "grad_norm": 1.679878969147083, - "learning_rate": 9.362008592516302e-07, - "loss": 0.9946, - "step": 9157 - }, - { - "epoch": 0.688260934916579, - "grad_norm": 1.7391629501081285, - "learning_rate": 9.357886291820938e-07, - "loss": 1.0192, - "step": 9158 - }, - { - "epoch": 0.6883360889824139, - "grad_norm": 4.500988110034818, - "learning_rate": 9.353764621711568e-07, - "loss": 0.9837, - "step": 9159 - }, - { - "epoch": 0.6884112430482489, - "grad_norm": 2.1988300637246625, - "learning_rate": 9.349643582432414e-07, - "loss": 0.9786, - "step": 9160 - }, - { - "epoch": 0.6884863971140839, - "grad_norm": 1.6360510873209604, - "learning_rate": 9.345523174227658e-07, - "loss": 0.9544, - "step": 9161 - }, - { - "epoch": 0.6885615511799188, - "grad_norm": 1.7293434359113964, - "learning_rate": 9.341403397341457e-07, - "loss": 0.9515, - "step": 9162 - }, - { - "epoch": 0.6886367052457538, - "grad_norm": 2.8452603866941284, - "learning_rate": 9.337284252017907e-07, - "loss": 0.9174, - "step": 9163 - }, - { - "epoch": 0.6887118593115887, - "grad_norm": 1.5229912519998967, - "learning_rate": 9.333165738501105e-07, - "loss": 1.0001, - "step": 9164 - }, - { - "epoch": 0.6887870133774238, - "grad_norm": 1.897159978584959, - "learning_rate": 9.32904785703508e-07, - "loss": 0.8997, - "step": 9165 - }, - { - "epoch": 0.6888621674432587, - "grad_norm": 1.5152913149003564, - "learning_rate": 9.32493060786383e-07, - "loss": 0.9644, - "step": 9166 - }, - { - "epoch": 0.6889373215090936, - "grad_norm": 1.3353743230540713, - "learning_rate": 9.32081399123132e-07, - "loss": 0.8966, - "step": 9167 - }, - { - "epoch": 0.6890124755749286, - "grad_norm": 1.2653448948725567, - "learning_rate": 9.316698007381467e-07, - "loss": 0.9946, - "step": 9168 - }, - { - "epoch": 0.6890876296407635, - "grad_norm": 2.0781889023671836, - "learning_rate": 9.312582656558173e-07, - "loss": 1.0091, - "step": 9169 - }, - { - "epoch": 0.6891627837065986, - "grad_norm": 0.7154421776866647, - "learning_rate": 9.30846793900528e-07, - "loss": 0.8101, - "step": 9170 - }, - { - "epoch": 0.6892379377724335, - "grad_norm": 1.7827785692655358, - "learning_rate": 9.304353854966605e-07, - "loss": 0.889, - "step": 9171 - }, - { - "epoch": 0.6893130918382685, - "grad_norm": 3.0217578569368313, - "learning_rate": 9.300240404685917e-07, - "loss": 0.9195, - "step": 9172 - }, - { - "epoch": 0.6893882459041034, - "grad_norm": 3.290082397356711, - "learning_rate": 9.296127588406952e-07, - "loss": 1.0998, - "step": 9173 - }, - { - "epoch": 0.6894633999699383, - "grad_norm": 1.8609689243843954, - "learning_rate": 9.292015406373423e-07, - "loss": 0.8965, - "step": 9174 - }, - { - "epoch": 0.6895385540357734, - "grad_norm": 0.6650512290883579, - "learning_rate": 9.287903858828976e-07, - "loss": 0.8693, - "step": 9175 - }, - { - "epoch": 0.6896137081016083, - "grad_norm": 0.7252727669578054, - "learning_rate": 9.283792946017253e-07, - "loss": 0.7992, - "step": 9176 - }, - { - "epoch": 0.6896888621674433, - "grad_norm": 1.6412210744036064, - "learning_rate": 9.279682668181835e-07, - "loss": 0.9219, - "step": 9177 - }, - { - "epoch": 0.6897640162332782, - "grad_norm": 3.2320211940108248, - "learning_rate": 9.275573025566266e-07, - "loss": 0.9596, - "step": 9178 - }, - { - "epoch": 0.6898391702991132, - "grad_norm": 1.6429833310396726, - "learning_rate": 9.271464018414064e-07, - "loss": 0.9543, - "step": 9179 - }, - { - "epoch": 0.6899143243649482, - "grad_norm": 4.868491890447546, - "learning_rate": 9.267355646968694e-07, - "loss": 0.9112, - "step": 9180 - }, - { - "epoch": 0.6899894784307831, - "grad_norm": 2.5232357735799256, - "learning_rate": 9.263247911473606e-07, - "loss": 1.0959, - "step": 9181 - }, - { - "epoch": 0.6900646324966181, - "grad_norm": 1.8946795533328333, - "learning_rate": 9.259140812172192e-07, - "loss": 0.9731, - "step": 9182 - }, - { - "epoch": 0.690139786562453, - "grad_norm": 1.7679721456335804, - "learning_rate": 9.255034349307818e-07, - "loss": 1.0144, - "step": 9183 - }, - { - "epoch": 0.690214940628288, - "grad_norm": 1.844666657250095, - "learning_rate": 9.250928523123802e-07, - "loss": 0.9425, - "step": 9184 - }, - { - "epoch": 0.690290094694123, - "grad_norm": 1.5269664874093551, - "learning_rate": 9.246823333863425e-07, - "loss": 0.9914, - "step": 9185 - }, - { - "epoch": 0.6903652487599579, - "grad_norm": 0.6815083328893512, - "learning_rate": 9.242718781769949e-07, - "loss": 0.8442, - "step": 9186 - }, - { - "epoch": 0.6904404028257929, - "grad_norm": 0.743385833253336, - "learning_rate": 9.238614867086578e-07, - "loss": 0.8818, - "step": 9187 - }, - { - "epoch": 0.6905155568916278, - "grad_norm": 3.2611204775044462, - "learning_rate": 9.234511590056484e-07, - "loss": 1.012, - "step": 9188 - }, - { - "epoch": 0.6905907109574628, - "grad_norm": 2.280210702429305, - "learning_rate": 9.230408950922801e-07, - "loss": 0.9869, - "step": 9189 - }, - { - "epoch": 0.6906658650232977, - "grad_norm": 1.610689829778746, - "learning_rate": 9.226306949928622e-07, - "loss": 0.8685, - "step": 9190 - }, - { - "epoch": 0.6907410190891328, - "grad_norm": 1.4385796514528553, - "learning_rate": 9.222205587317015e-07, - "loss": 0.9472, - "step": 9191 - }, - { - "epoch": 0.6908161731549677, - "grad_norm": 1.724182075971231, - "learning_rate": 9.218104863330996e-07, - "loss": 0.9784, - "step": 9192 - }, - { - "epoch": 0.6908913272208026, - "grad_norm": 2.0242843917633144, - "learning_rate": 9.214004778213562e-07, - "loss": 1.0111, - "step": 9193 - }, - { - "epoch": 0.6909664812866376, - "grad_norm": 3.07324017044937, - "learning_rate": 9.209905332207639e-07, - "loss": 0.8336, - "step": 9194 - }, - { - "epoch": 0.6910416353524725, - "grad_norm": 2.370899274162675, - "learning_rate": 9.205806525556136e-07, - "loss": 0.9354, - "step": 9195 - }, - { - "epoch": 0.6911167894183076, - "grad_norm": 2.0408060504693544, - "learning_rate": 9.20170835850194e-07, - "loss": 0.9489, - "step": 9196 - }, - { - "epoch": 0.6911919434841425, - "grad_norm": 4.205700166993996, - "learning_rate": 9.197610831287863e-07, - "loss": 0.8475, - "step": 9197 - }, - { - "epoch": 0.6912670975499775, - "grad_norm": 1.7885089001091532, - "learning_rate": 9.193513944156719e-07, - "loss": 0.9476, - "step": 9198 - }, - { - "epoch": 0.6913422516158124, - "grad_norm": 1.9466298862906193, - "learning_rate": 9.189417697351254e-07, - "loss": 0.8758, - "step": 9199 - }, - { - "epoch": 0.6914174056816473, - "grad_norm": 1.7987992970893407, - "learning_rate": 9.185322091114187e-07, - "loss": 0.9743, - "step": 9200 - }, - { - "epoch": 0.6914925597474824, - "grad_norm": 0.8527454943419064, - "learning_rate": 9.181227125688197e-07, - "loss": 0.887, - "step": 9201 - }, - { - "epoch": 0.6915677138133173, - "grad_norm": 1.4086045368985416, - "learning_rate": 9.177132801315921e-07, - "loss": 1.0231, - "step": 9202 - }, - { - "epoch": 0.6916428678791523, - "grad_norm": 0.7503369289808013, - "learning_rate": 9.173039118239978e-07, - "loss": 0.838, - "step": 9203 - }, - { - "epoch": 0.6917180219449872, - "grad_norm": 2.3002927096323615, - "learning_rate": 9.168946076702926e-07, - "loss": 0.9646, - "step": 9204 - }, - { - "epoch": 0.6917931760108221, - "grad_norm": 1.6210416854402785, - "learning_rate": 9.164853676947293e-07, - "loss": 1.0027, - "step": 9205 - }, - { - "epoch": 0.6918683300766572, - "grad_norm": 2.158725257647807, - "learning_rate": 9.160761919215572e-07, - "loss": 1.0052, - "step": 9206 - }, - { - "epoch": 0.6919434841424921, - "grad_norm": 2.614864400115932, - "learning_rate": 9.156670803750203e-07, - "loss": 1.0012, - "step": 9207 - }, - { - "epoch": 0.6920186382083271, - "grad_norm": 1.672284227716402, - "learning_rate": 9.15258033079362e-07, - "loss": 0.985, - "step": 9208 - }, - { - "epoch": 0.692093792274162, - "grad_norm": 0.7146119122216696, - "learning_rate": 9.148490500588191e-07, - "loss": 0.8441, - "step": 9209 - }, - { - "epoch": 0.692168946339997, - "grad_norm": 1.5258613280237487, - "learning_rate": 9.144401313376253e-07, - "loss": 0.9516, - "step": 9210 - }, - { - "epoch": 0.692244100405832, - "grad_norm": 1.5303062333205952, - "learning_rate": 9.140312769400105e-07, - "loss": 0.9179, - "step": 9211 - }, - { - "epoch": 0.6923192544716669, - "grad_norm": 1.8028684754260524, - "learning_rate": 9.136224868902003e-07, - "loss": 0.9601, - "step": 9212 - }, - { - "epoch": 0.6923944085375019, - "grad_norm": 1.537885651642168, - "learning_rate": 9.132137612124184e-07, - "loss": 1.0484, - "step": 9213 - }, - { - "epoch": 0.6924695626033368, - "grad_norm": 1.5829782924452966, - "learning_rate": 9.12805099930882e-07, - "loss": 0.9605, - "step": 9214 - }, - { - "epoch": 0.6925447166691718, - "grad_norm": 1.7126700630114715, - "learning_rate": 9.123965030698082e-07, - "loss": 0.9505, - "step": 9215 - }, - { - "epoch": 0.6926198707350067, - "grad_norm": 2.979421129007622, - "learning_rate": 9.119879706534054e-07, - "loss": 0.9263, - "step": 9216 - }, - { - "epoch": 0.6926950248008418, - "grad_norm": 1.661297645890211, - "learning_rate": 9.11579502705881e-07, - "loss": 0.9119, - "step": 9217 - }, - { - "epoch": 0.6927701788666767, - "grad_norm": 2.464149824783037, - "learning_rate": 9.111710992514397e-07, - "loss": 1.0495, - "step": 9218 - }, - { - "epoch": 0.6928453329325116, - "grad_norm": 1.88819388462495, - "learning_rate": 9.107627603142793e-07, - "loss": 0.8882, - "step": 9219 - }, - { - "epoch": 0.6929204869983466, - "grad_norm": 1.812751686774066, - "learning_rate": 9.103544859185972e-07, - "loss": 0.9818, - "step": 9220 - }, - { - "epoch": 0.6929956410641815, - "grad_norm": 2.10945084046966, - "learning_rate": 9.099462760885843e-07, - "loss": 0.9934, - "step": 9221 - }, - { - "epoch": 0.6930707951300166, - "grad_norm": 1.5829313755113477, - "learning_rate": 9.095381308484284e-07, - "loss": 0.8357, - "step": 9222 - }, - { - "epoch": 0.6931459491958515, - "grad_norm": 1.687207938457496, - "learning_rate": 9.091300502223142e-07, - "loss": 1.0251, - "step": 9223 - }, - { - "epoch": 0.6932211032616865, - "grad_norm": 1.8880597200444873, - "learning_rate": 9.087220342344209e-07, - "loss": 0.9602, - "step": 9224 - }, - { - "epoch": 0.6932962573275214, - "grad_norm": 1.8501565480366904, - "learning_rate": 9.083140829089266e-07, - "loss": 0.9935, - "step": 9225 - }, - { - "epoch": 0.6933714113933563, - "grad_norm": 1.640785572731798, - "learning_rate": 9.079061962700032e-07, - "loss": 0.9523, - "step": 9226 - }, - { - "epoch": 0.6934465654591914, - "grad_norm": 1.5198465133019097, - "learning_rate": 9.074983743418196e-07, - "loss": 1.0301, - "step": 9227 - }, - { - "epoch": 0.6935217195250263, - "grad_norm": 1.6567772080187784, - "learning_rate": 9.070906171485408e-07, - "loss": 1.0327, - "step": 9228 - }, - { - "epoch": 0.6935968735908613, - "grad_norm": 0.8672463336953956, - "learning_rate": 9.066829247143273e-07, - "loss": 0.7869, - "step": 9229 - }, - { - "epoch": 0.6936720276566962, - "grad_norm": 1.8190485941858847, - "learning_rate": 9.062752970633376e-07, - "loss": 1.0111, - "step": 9230 - }, - { - "epoch": 0.6937471817225311, - "grad_norm": 1.7365390919756443, - "learning_rate": 9.058677342197249e-07, - "loss": 0.895, - "step": 9231 - }, - { - "epoch": 0.6938223357883662, - "grad_norm": 1.5700589255190553, - "learning_rate": 9.054602362076387e-07, - "loss": 0.9744, - "step": 9232 - }, - { - "epoch": 0.6938974898542011, - "grad_norm": 1.602015510317313, - "learning_rate": 9.050528030512246e-07, - "loss": 0.9302, - "step": 9233 - }, - { - "epoch": 0.6939726439200361, - "grad_norm": 8.943699107368523, - "learning_rate": 9.046454347746242e-07, - "loss": 0.979, - "step": 9234 - }, - { - "epoch": 0.694047797985871, - "grad_norm": 1.6167451557625403, - "learning_rate": 9.042381314019766e-07, - "loss": 0.8143, - "step": 9235 - }, - { - "epoch": 0.694122952051706, - "grad_norm": 1.5178255446534972, - "learning_rate": 9.038308929574152e-07, - "loss": 1.0177, - "step": 9236 - }, - { - "epoch": 0.694198106117541, - "grad_norm": 1.567639937656716, - "learning_rate": 9.034237194650724e-07, - "loss": 0.9406, - "step": 9237 - }, - { - "epoch": 0.6942732601833759, - "grad_norm": 1.806560122784151, - "learning_rate": 9.030166109490724e-07, - "loss": 0.9325, - "step": 9238 - }, - { - "epoch": 0.6943484142492109, - "grad_norm": 1.706237608476853, - "learning_rate": 9.026095674335384e-07, - "loss": 1.018, - "step": 9239 - }, - { - "epoch": 0.6944235683150458, - "grad_norm": 2.068783061982079, - "learning_rate": 9.022025889425902e-07, - "loss": 1.0061, - "step": 9240 - }, - { - "epoch": 0.6944987223808808, - "grad_norm": 1.7631969129196114, - "learning_rate": 9.017956755003415e-07, - "loss": 0.8678, - "step": 9241 - }, - { - "epoch": 0.6945738764467158, - "grad_norm": 0.7624642660804246, - "learning_rate": 9.013888271309053e-07, - "loss": 0.832, - "step": 9242 - }, - { - "epoch": 0.6946490305125508, - "grad_norm": 0.9426321697905864, - "learning_rate": 9.00982043858388e-07, - "loss": 0.8442, - "step": 9243 - }, - { - "epoch": 0.6947241845783857, - "grad_norm": 3.535010437039476, - "learning_rate": 9.005753257068929e-07, - "loss": 0.9707, - "step": 9244 - }, - { - "epoch": 0.6947993386442206, - "grad_norm": 1.9904172324800742, - "learning_rate": 9.001686727005196e-07, - "loss": 1.0121, - "step": 9245 - }, - { - "epoch": 0.6948744927100556, - "grad_norm": 1.5006815633476454, - "learning_rate": 8.997620848633634e-07, - "loss": 0.9457, - "step": 9246 - }, - { - "epoch": 0.6949496467758906, - "grad_norm": 1.6100938543365535, - "learning_rate": 8.993555622195175e-07, - "loss": 0.7962, - "step": 9247 - }, - { - "epoch": 0.6950248008417256, - "grad_norm": 1.9566134008644898, - "learning_rate": 8.98949104793069e-07, - "loss": 1.005, - "step": 9248 - }, - { - "epoch": 0.6950999549075605, - "grad_norm": 0.7618579835063213, - "learning_rate": 8.985427126081024e-07, - "loss": 0.8948, - "step": 9249 - }, - { - "epoch": 0.6951751089733954, - "grad_norm": 0.7315117636469364, - "learning_rate": 8.981363856886979e-07, - "loss": 0.8771, - "step": 9250 - }, - { - "epoch": 0.6952502630392304, - "grad_norm": 1.655485372563395, - "learning_rate": 8.977301240589313e-07, - "loss": 0.9985, - "step": 9251 - }, - { - "epoch": 0.6953254171050653, - "grad_norm": 1.451406139086718, - "learning_rate": 8.973239277428761e-07, - "loss": 0.9612, - "step": 9252 - }, - { - "epoch": 0.6954005711709004, - "grad_norm": 1.5898404507989417, - "learning_rate": 8.969177967646007e-07, - "loss": 0.9003, - "step": 9253 - }, - { - "epoch": 0.6954757252367353, - "grad_norm": 3.1392787496190997, - "learning_rate": 8.965117311481698e-07, - "loss": 1.0053, - "step": 9254 - }, - { - "epoch": 0.6955508793025703, - "grad_norm": 1.4072862833495654, - "learning_rate": 8.961057309176445e-07, - "loss": 0.9779, - "step": 9255 - }, - { - "epoch": 0.6956260333684052, - "grad_norm": 1.438279148081622, - "learning_rate": 8.956997960970809e-07, - "loss": 0.9964, - "step": 9256 - }, - { - "epoch": 0.6957011874342401, - "grad_norm": 2.4154109213497876, - "learning_rate": 8.952939267105339e-07, - "loss": 1.0238, - "step": 9257 - }, - { - "epoch": 0.6957763415000752, - "grad_norm": 1.621962937045659, - "learning_rate": 8.94888122782051e-07, - "loss": 0.9762, - "step": 9258 - }, - { - "epoch": 0.6958514955659101, - "grad_norm": 1.5819736799903243, - "learning_rate": 8.944823843356795e-07, - "loss": 1.0118, - "step": 9259 - }, - { - "epoch": 0.6959266496317451, - "grad_norm": 2.029726131738181, - "learning_rate": 8.940767113954608e-07, - "loss": 1.0431, - "step": 9260 - }, - { - "epoch": 0.69600180369758, - "grad_norm": 0.7275456985068585, - "learning_rate": 8.936711039854301e-07, - "loss": 0.7623, - "step": 9261 - }, - { - "epoch": 0.696076957763415, - "grad_norm": 1.745138090293463, - "learning_rate": 8.932655621296239e-07, - "loss": 0.9419, - "step": 9262 - }, - { - "epoch": 0.69615211182925, - "grad_norm": 4.145694493919452, - "learning_rate": 8.928600858520703e-07, - "loss": 1.0511, - "step": 9263 - }, - { - "epoch": 0.6962272658950849, - "grad_norm": 5.430002414709858, - "learning_rate": 8.924546751767968e-07, - "loss": 0.9744, - "step": 9264 - }, - { - "epoch": 0.6963024199609199, - "grad_norm": 3.3623140475109334, - "learning_rate": 8.920493301278249e-07, - "loss": 0.9361, - "step": 9265 - }, - { - "epoch": 0.6963775740267548, - "grad_norm": 1.685249311188106, - "learning_rate": 8.916440507291727e-07, - "loss": 1.0193, - "step": 9266 - }, - { - "epoch": 0.6964527280925898, - "grad_norm": 2.032461421361126, - "learning_rate": 8.912388370048549e-07, - "loss": 0.9558, - "step": 9267 - }, - { - "epoch": 0.6965278821584248, - "grad_norm": 1.5312106069051947, - "learning_rate": 8.908336889788807e-07, - "loss": 0.8728, - "step": 9268 - }, - { - "epoch": 0.6966030362242598, - "grad_norm": 3.4754645167484783, - "learning_rate": 8.904286066752589e-07, - "loss": 0.9859, - "step": 9269 - }, - { - "epoch": 0.6966781902900947, - "grad_norm": 0.6448234127119943, - "learning_rate": 8.900235901179907e-07, - "loss": 0.8347, - "step": 9270 - }, - { - "epoch": 0.6967533443559296, - "grad_norm": 0.7468086931490925, - "learning_rate": 8.896186393310752e-07, - "loss": 0.8333, - "step": 9271 - }, - { - "epoch": 0.6968284984217646, - "grad_norm": 1.5812825256138663, - "learning_rate": 8.892137543385072e-07, - "loss": 0.9892, - "step": 9272 - }, - { - "epoch": 0.6969036524875996, - "grad_norm": 2.347992579381807, - "learning_rate": 8.888089351642769e-07, - "loss": 0.8949, - "step": 9273 - }, - { - "epoch": 0.6969788065534346, - "grad_norm": 1.9280106521407154, - "learning_rate": 8.884041818323733e-07, - "loss": 0.8983, - "step": 9274 - }, - { - "epoch": 0.6970539606192695, - "grad_norm": 1.9227780337246099, - "learning_rate": 8.879994943667784e-07, - "loss": 1.0103, - "step": 9275 - }, - { - "epoch": 0.6971291146851044, - "grad_norm": 2.031172765583938, - "learning_rate": 8.875948727914713e-07, - "loss": 0.9891, - "step": 9276 - }, - { - "epoch": 0.6972042687509394, - "grad_norm": 1.7569413802866127, - "learning_rate": 8.87190317130428e-07, - "loss": 0.994, - "step": 9277 - }, - { - "epoch": 0.6972794228167744, - "grad_norm": 1.7443734042503085, - "learning_rate": 8.867858274076188e-07, - "loss": 0.9347, - "step": 9278 - }, - { - "epoch": 0.6973545768826094, - "grad_norm": 1.5416173497465415, - "learning_rate": 8.863814036470128e-07, - "loss": 1.0144, - "step": 9279 - }, - { - "epoch": 0.6974297309484443, - "grad_norm": 1.7604251688313626, - "learning_rate": 8.859770458725722e-07, - "loss": 0.9114, - "step": 9280 - }, - { - "epoch": 0.6975048850142793, - "grad_norm": 2.1229283949877726, - "learning_rate": 8.855727541082583e-07, - "loss": 1.0198, - "step": 9281 - }, - { - "epoch": 0.6975800390801142, - "grad_norm": 2.060242428819733, - "learning_rate": 8.85168528378027e-07, - "loss": 1.0443, - "step": 9282 - }, - { - "epoch": 0.6976551931459491, - "grad_norm": 3.8607284087896607, - "learning_rate": 8.847643687058277e-07, - "loss": 0.9491, - "step": 9283 - }, - { - "epoch": 0.6977303472117842, - "grad_norm": 2.609697698866245, - "learning_rate": 8.84360275115611e-07, - "loss": 0.8704, - "step": 9284 - }, - { - "epoch": 0.6978055012776191, - "grad_norm": 2.1137408252158263, - "learning_rate": 8.839562476313192e-07, - "loss": 0.8832, - "step": 9285 - }, - { - "epoch": 0.6978806553434541, - "grad_norm": 1.422248875017236, - "learning_rate": 8.83552286276894e-07, - "loss": 0.9589, - "step": 9286 - }, - { - "epoch": 0.697955809409289, - "grad_norm": 0.7789624483731513, - "learning_rate": 8.831483910762711e-07, - "loss": 0.8644, - "step": 9287 - }, - { - "epoch": 0.698030963475124, - "grad_norm": 0.829560026157231, - "learning_rate": 8.827445620533829e-07, - "loss": 0.9082, - "step": 9288 - }, - { - "epoch": 0.698106117540959, - "grad_norm": 1.853492985899676, - "learning_rate": 8.823407992321574e-07, - "loss": 0.982, - "step": 9289 - }, - { - "epoch": 0.6981812716067939, - "grad_norm": 1.7482381534881468, - "learning_rate": 8.819371026365188e-07, - "loss": 0.98, - "step": 9290 - }, - { - "epoch": 0.6982564256726289, - "grad_norm": 2.326531396046912, - "learning_rate": 8.815334722903889e-07, - "loss": 1.0487, - "step": 9291 - }, - { - "epoch": 0.6983315797384638, - "grad_norm": 1.8369677311197306, - "learning_rate": 8.811299082176837e-07, - "loss": 0.8912, - "step": 9292 - }, - { - "epoch": 0.6984067338042989, - "grad_norm": 1.8696480344330324, - "learning_rate": 8.807264104423158e-07, - "loss": 0.9535, - "step": 9293 - }, - { - "epoch": 0.6984818878701338, - "grad_norm": 2.0107968486453864, - "learning_rate": 8.80322978988194e-07, - "loss": 1.0032, - "step": 9294 - }, - { - "epoch": 0.6985570419359687, - "grad_norm": 1.923453450388589, - "learning_rate": 8.799196138792227e-07, - "loss": 0.9829, - "step": 9295 - }, - { - "epoch": 0.6986321960018037, - "grad_norm": 2.1408631129685145, - "learning_rate": 8.795163151393039e-07, - "loss": 0.8954, - "step": 9296 - }, - { - "epoch": 0.6987073500676386, - "grad_norm": 1.9966019731823856, - "learning_rate": 8.791130827923341e-07, - "loss": 0.9754, - "step": 9297 - }, - { - "epoch": 0.6987825041334736, - "grad_norm": 1.8021211418051815, - "learning_rate": 8.787099168622063e-07, - "loss": 0.9821, - "step": 9298 - }, - { - "epoch": 0.6988576581993086, - "grad_norm": 0.8369611729078547, - "learning_rate": 8.783068173728097e-07, - "loss": 0.9064, - "step": 9299 - }, - { - "epoch": 0.6989328122651436, - "grad_norm": 1.7644069149578006, - "learning_rate": 8.779037843480285e-07, - "loss": 0.953, - "step": 9300 - }, - { - "epoch": 0.6990079663309785, - "grad_norm": 1.4076960440341963, - "learning_rate": 8.775008178117458e-07, - "loss": 1.0087, - "step": 9301 - }, - { - "epoch": 0.6990831203968134, - "grad_norm": 1.7826266406684133, - "learning_rate": 8.770979177878373e-07, - "loss": 0.8988, - "step": 9302 - }, - { - "epoch": 0.6991582744626484, - "grad_norm": 13.32687475818365, - "learning_rate": 8.766950843001776e-07, - "loss": 1.0059, - "step": 9303 - }, - { - "epoch": 0.6992334285284834, - "grad_norm": 1.9897529835962002, - "learning_rate": 8.762923173726358e-07, - "loss": 1.0185, - "step": 9304 - }, - { - "epoch": 0.6993085825943184, - "grad_norm": 2.0477620306712967, - "learning_rate": 8.758896170290768e-07, - "loss": 0.9475, - "step": 9305 - }, - { - "epoch": 0.6993837366601533, - "grad_norm": 1.99024449758604, - "learning_rate": 8.754869832933629e-07, - "loss": 1.0819, - "step": 9306 - }, - { - "epoch": 0.6994588907259883, - "grad_norm": 0.8269746816327478, - "learning_rate": 8.750844161893503e-07, - "loss": 0.9085, - "step": 9307 - }, - { - "epoch": 0.6995340447918232, - "grad_norm": 3.3046303602107985, - "learning_rate": 8.746819157408944e-07, - "loss": 0.8739, - "step": 9308 - }, - { - "epoch": 0.6996091988576582, - "grad_norm": 2.5359618989025736, - "learning_rate": 8.742794819718442e-07, - "loss": 0.8287, - "step": 9309 - }, - { - "epoch": 0.6996843529234932, - "grad_norm": 1.6484052465094774, - "learning_rate": 8.738771149060453e-07, - "loss": 1.0419, - "step": 9310 - }, - { - "epoch": 0.6997595069893281, - "grad_norm": 2.0905323640972733, - "learning_rate": 8.734748145673396e-07, - "loss": 1.0256, - "step": 9311 - }, - { - "epoch": 0.6998346610551631, - "grad_norm": 3.0681184884159807, - "learning_rate": 8.730725809795641e-07, - "loss": 1.0094, - "step": 9312 - }, - { - "epoch": 0.699909815120998, - "grad_norm": 1.5102451446308653, - "learning_rate": 8.726704141665542e-07, - "loss": 0.9228, - "step": 9313 - }, - { - "epoch": 0.6999849691868331, - "grad_norm": 2.7456859215243434, - "learning_rate": 8.722683141521392e-07, - "loss": 0.8407, - "step": 9314 - }, - { - "epoch": 0.700060123252668, - "grad_norm": 2.2887061561039324, - "learning_rate": 8.718662809601447e-07, - "loss": 0.8585, - "step": 9315 - }, - { - "epoch": 0.7001352773185029, - "grad_norm": 3.4742120576746456, - "learning_rate": 8.714643146143932e-07, - "loss": 1.0357, - "step": 9316 - }, - { - "epoch": 0.7002104313843379, - "grad_norm": 1.7349716397476818, - "learning_rate": 8.710624151387018e-07, - "loss": 0.9793, - "step": 9317 - }, - { - "epoch": 0.7002855854501728, - "grad_norm": 1.629861382400121, - "learning_rate": 8.70660582556886e-07, - "loss": 0.9906, - "step": 9318 - }, - { - "epoch": 0.7003607395160079, - "grad_norm": 1.9157021486762045, - "learning_rate": 8.702588168927551e-07, - "loss": 0.9955, - "step": 9319 - }, - { - "epoch": 0.7004358935818428, - "grad_norm": 1.9824728071312894, - "learning_rate": 8.698571181701154e-07, - "loss": 0.924, - "step": 9320 - }, - { - "epoch": 0.7005110476476777, - "grad_norm": 5.658300207356646, - "learning_rate": 8.69455486412769e-07, - "loss": 1.019, - "step": 9321 - }, - { - "epoch": 0.7005862017135127, - "grad_norm": 1.2962406399424795, - "learning_rate": 8.690539216445136e-07, - "loss": 1.0231, - "step": 9322 - }, - { - "epoch": 0.7006613557793476, - "grad_norm": 2.3251375834457417, - "learning_rate": 8.686524238891446e-07, - "loss": 0.9052, - "step": 9323 - }, - { - "epoch": 0.7007365098451827, - "grad_norm": 2.404785304965097, - "learning_rate": 8.682509931704511e-07, - "loss": 0.9404, - "step": 9324 - }, - { - "epoch": 0.7008116639110176, - "grad_norm": 1.969147596628795, - "learning_rate": 8.678496295122208e-07, - "loss": 0.9567, - "step": 9325 - }, - { - "epoch": 0.7008868179768526, - "grad_norm": 2.158788320337111, - "learning_rate": 8.674483329382351e-07, - "loss": 1.0284, - "step": 9326 - }, - { - "epoch": 0.7009619720426875, - "grad_norm": 1.585475008572389, - "learning_rate": 8.670471034722726e-07, - "loss": 0.9416, - "step": 9327 - }, - { - "epoch": 0.7010371261085224, - "grad_norm": 1.690063648492509, - "learning_rate": 8.666459411381075e-07, - "loss": 0.9146, - "step": 9328 - }, - { - "epoch": 0.7011122801743574, - "grad_norm": 1.4733140523578128, - "learning_rate": 8.662448459595095e-07, - "loss": 0.9179, - "step": 9329 - }, - { - "epoch": 0.7011874342401924, - "grad_norm": 2.806603587416544, - "learning_rate": 8.658438179602468e-07, - "loss": 0.9261, - "step": 9330 - }, - { - "epoch": 0.7012625883060274, - "grad_norm": 1.6876853558503027, - "learning_rate": 8.654428571640806e-07, - "loss": 0.9176, - "step": 9331 - }, - { - "epoch": 0.7013377423718623, - "grad_norm": 1.4788085163627565, - "learning_rate": 8.650419635947696e-07, - "loss": 0.9629, - "step": 9332 - }, - { - "epoch": 0.7014128964376973, - "grad_norm": 1.4728399915940031, - "learning_rate": 8.646411372760685e-07, - "loss": 1.0423, - "step": 9333 - }, - { - "epoch": 0.7014880505035322, - "grad_norm": 2.935827712624726, - "learning_rate": 8.642403782317269e-07, - "loss": 0.9455, - "step": 9334 - }, - { - "epoch": 0.7015632045693672, - "grad_norm": 2.1816713336091316, - "learning_rate": 8.638396864854927e-07, - "loss": 0.8973, - "step": 9335 - }, - { - "epoch": 0.7016383586352022, - "grad_norm": 1.8249000077746327, - "learning_rate": 8.634390620611076e-07, - "loss": 0.9842, - "step": 9336 - }, - { - "epoch": 0.7017135127010371, - "grad_norm": 1.6719607037119861, - "learning_rate": 8.630385049823101e-07, - "loss": 0.984, - "step": 9337 - }, - { - "epoch": 0.7017886667668721, - "grad_norm": 2.0123583915598986, - "learning_rate": 8.626380152728352e-07, - "loss": 0.8975, - "step": 9338 - }, - { - "epoch": 0.701863820832707, - "grad_norm": 1.57363093076463, - "learning_rate": 8.622375929564123e-07, - "loss": 1.0437, - "step": 9339 - }, - { - "epoch": 0.701938974898542, - "grad_norm": 1.855421912957373, - "learning_rate": 8.618372380567696e-07, - "loss": 0.9389, - "step": 9340 - }, - { - "epoch": 0.702014128964377, - "grad_norm": 2.0188641211787175, - "learning_rate": 8.614369505976287e-07, - "loss": 0.9835, - "step": 9341 - }, - { - "epoch": 0.7020892830302119, - "grad_norm": 1.7040230019722769, - "learning_rate": 8.610367306027084e-07, - "loss": 1.0019, - "step": 9342 - }, - { - "epoch": 0.7021644370960469, - "grad_norm": 2.1171522524669153, - "learning_rate": 8.606365780957232e-07, - "loss": 0.9512, - "step": 9343 - }, - { - "epoch": 0.7022395911618818, - "grad_norm": 1.8010239179040763, - "learning_rate": 8.602364931003831e-07, - "loss": 0.9817, - "step": 9344 - }, - { - "epoch": 0.7023147452277169, - "grad_norm": 2.465701478417484, - "learning_rate": 8.598364756403957e-07, - "loss": 0.9628, - "step": 9345 - }, - { - "epoch": 0.7023898992935518, - "grad_norm": 1.7530692297348567, - "learning_rate": 8.594365257394626e-07, - "loss": 0.9197, - "step": 9346 - }, - { - "epoch": 0.7024650533593867, - "grad_norm": 1.4239250746635566, - "learning_rate": 8.590366434212835e-07, - "loss": 1.0413, - "step": 9347 - }, - { - "epoch": 0.7025402074252217, - "grad_norm": 1.806186994357906, - "learning_rate": 8.586368287095522e-07, - "loss": 0.9004, - "step": 9348 - }, - { - "epoch": 0.7026153614910566, - "grad_norm": 3.491591298117639, - "learning_rate": 8.582370816279594e-07, - "loss": 0.9292, - "step": 9349 - }, - { - "epoch": 0.7026905155568917, - "grad_norm": 4.48645821807462, - "learning_rate": 8.578374022001917e-07, - "loss": 0.8825, - "step": 9350 - }, - { - "epoch": 0.7027656696227266, - "grad_norm": 1.724392316730596, - "learning_rate": 8.574377904499308e-07, - "loss": 0.9492, - "step": 9351 - }, - { - "epoch": 0.7028408236885616, - "grad_norm": 1.8129789443100537, - "learning_rate": 8.570382464008568e-07, - "loss": 0.9807, - "step": 9352 - }, - { - "epoch": 0.7029159777543965, - "grad_norm": 1.6184679941920004, - "learning_rate": 8.566387700766434e-07, - "loss": 1.0624, - "step": 9353 - }, - { - "epoch": 0.7029911318202314, - "grad_norm": 1.6620972539905787, - "learning_rate": 8.562393615009609e-07, - "loss": 0.9488, - "step": 9354 - }, - { - "epoch": 0.7030662858860665, - "grad_norm": 2.0108885005503185, - "learning_rate": 8.558400206974761e-07, - "loss": 0.9801, - "step": 9355 - }, - { - "epoch": 0.7031414399519014, - "grad_norm": 1.5002074098242066, - "learning_rate": 8.554407476898506e-07, - "loss": 0.9599, - "step": 9356 - }, - { - "epoch": 0.7032165940177364, - "grad_norm": 1.8217807464036166, - "learning_rate": 8.550415425017443e-07, - "loss": 0.9108, - "step": 9357 - }, - { - "epoch": 0.7032917480835713, - "grad_norm": 1.4147541188370358, - "learning_rate": 8.546424051568111e-07, - "loss": 1.0598, - "step": 9358 - }, - { - "epoch": 0.7033669021494063, - "grad_norm": 2.966363208409264, - "learning_rate": 8.542433356787011e-07, - "loss": 0.997, - "step": 9359 - }, - { - "epoch": 0.7034420562152413, - "grad_norm": 1.8371585762302363, - "learning_rate": 8.538443340910608e-07, - "loss": 0.898, - "step": 9360 - }, - { - "epoch": 0.7035172102810762, - "grad_norm": 2.2910736935176086, - "learning_rate": 8.53445400417532e-07, - "loss": 0.9316, - "step": 9361 - }, - { - "epoch": 0.7035923643469112, - "grad_norm": 1.6423937754197675, - "learning_rate": 8.530465346817543e-07, - "loss": 0.9072, - "step": 9362 - }, - { - "epoch": 0.7036675184127461, - "grad_norm": 3.438775398163374, - "learning_rate": 8.526477369073616e-07, - "loss": 0.9447, - "step": 9363 - }, - { - "epoch": 0.7037426724785811, - "grad_norm": 1.6015142666136297, - "learning_rate": 8.522490071179839e-07, - "loss": 0.9578, - "step": 9364 - }, - { - "epoch": 0.703817826544416, - "grad_norm": 1.3391132198240399, - "learning_rate": 8.518503453372477e-07, - "loss": 1.0275, - "step": 9365 - }, - { - "epoch": 0.703892980610251, - "grad_norm": 2.8613142931086326, - "learning_rate": 8.51451751588774e-07, - "loss": 0.9988, - "step": 9366 - }, - { - "epoch": 0.703968134676086, - "grad_norm": 2.1004537046870246, - "learning_rate": 8.510532258961831e-07, - "loss": 0.91, - "step": 9367 - }, - { - "epoch": 0.7040432887419209, - "grad_norm": 2.2460603131003025, - "learning_rate": 8.506547682830876e-07, - "loss": 1.0323, - "step": 9368 - }, - { - "epoch": 0.7041184428077559, - "grad_norm": 2.8359099247540214, - "learning_rate": 8.502563787730987e-07, - "loss": 0.9364, - "step": 9369 - }, - { - "epoch": 0.7041935968735908, - "grad_norm": 2.3045135949682116, - "learning_rate": 8.498580573898219e-07, - "loss": 0.939, - "step": 9370 - }, - { - "epoch": 0.7042687509394259, - "grad_norm": 1.6482685599153644, - "learning_rate": 8.494598041568597e-07, - "loss": 0.9695, - "step": 9371 - }, - { - "epoch": 0.7043439050052608, - "grad_norm": 1.7957500751192104, - "learning_rate": 8.490616190978097e-07, - "loss": 1.0264, - "step": 9372 - }, - { - "epoch": 0.7044190590710957, - "grad_norm": 1.5850618698126686, - "learning_rate": 8.486635022362651e-07, - "loss": 1.0136, - "step": 9373 - }, - { - "epoch": 0.7044942131369307, - "grad_norm": 1.6481835047580247, - "learning_rate": 8.482654535958178e-07, - "loss": 0.9219, - "step": 9374 - }, - { - "epoch": 0.7045693672027656, - "grad_norm": 1.996785083352212, - "learning_rate": 8.478674732000524e-07, - "loss": 1.0325, - "step": 9375 - }, - { - "epoch": 0.7046445212686007, - "grad_norm": 1.6658335113270855, - "learning_rate": 8.474695610725513e-07, - "loss": 0.9954, - "step": 9376 - }, - { - "epoch": 0.7047196753344356, - "grad_norm": 1.9436093883619745, - "learning_rate": 8.470717172368917e-07, - "loss": 1.0245, - "step": 9377 - }, - { - "epoch": 0.7047948294002706, - "grad_norm": 1.4402157382157612, - "learning_rate": 8.466739417166473e-07, - "loss": 0.9622, - "step": 9378 - }, - { - "epoch": 0.7048699834661055, - "grad_norm": 2.1858652818705595, - "learning_rate": 8.462762345353887e-07, - "loss": 0.912, - "step": 9379 - }, - { - "epoch": 0.7049451375319404, - "grad_norm": 1.3906773332328177, - "learning_rate": 8.458785957166812e-07, - "loss": 0.9675, - "step": 9380 - }, - { - "epoch": 0.7050202915977755, - "grad_norm": 1.533297512769389, - "learning_rate": 8.454810252840863e-07, - "loss": 0.9475, - "step": 9381 - }, - { - "epoch": 0.7050954456636104, - "grad_norm": 1.8785320075818568, - "learning_rate": 8.450835232611618e-07, - "loss": 0.9888, - "step": 9382 - }, - { - "epoch": 0.7051705997294454, - "grad_norm": 0.8640001160656886, - "learning_rate": 8.4468608967146e-07, - "loss": 0.8891, - "step": 9383 - }, - { - "epoch": 0.7052457537952803, - "grad_norm": 4.603532612267131, - "learning_rate": 8.442887245385324e-07, - "loss": 0.9804, - "step": 9384 - }, - { - "epoch": 0.7053209078611152, - "grad_norm": 1.7149144464401158, - "learning_rate": 8.438914278859231e-07, - "loss": 0.8789, - "step": 9385 - }, - { - "epoch": 0.7053960619269503, - "grad_norm": 1.5816092976307938, - "learning_rate": 8.434941997371738e-07, - "loss": 0.9407, - "step": 9386 - }, - { - "epoch": 0.7054712159927852, - "grad_norm": 2.16998560114008, - "learning_rate": 8.43097040115822e-07, - "loss": 0.9807, - "step": 9387 - }, - { - "epoch": 0.7055463700586202, - "grad_norm": 8.917253088755851, - "learning_rate": 8.426999490453996e-07, - "loss": 0.9917, - "step": 9388 - }, - { - "epoch": 0.7056215241244551, - "grad_norm": 2.7421570691976926, - "learning_rate": 8.423029265494377e-07, - "loss": 0.9297, - "step": 9389 - }, - { - "epoch": 0.7056966781902901, - "grad_norm": 0.7650621641993652, - "learning_rate": 8.419059726514597e-07, - "loss": 0.8194, - "step": 9390 - }, - { - "epoch": 0.705771832256125, - "grad_norm": 1.733048146274095, - "learning_rate": 8.415090873749882e-07, - "loss": 0.9483, - "step": 9391 - }, - { - "epoch": 0.70584698632196, - "grad_norm": 1.789982854665127, - "learning_rate": 8.411122707435394e-07, - "loss": 0.9086, - "step": 9392 - }, - { - "epoch": 0.705922140387795, - "grad_norm": 7.245945026691411, - "learning_rate": 8.407155227806264e-07, - "loss": 1.0006, - "step": 9393 - }, - { - "epoch": 0.7059972944536299, - "grad_norm": 2.6665821856786107, - "learning_rate": 8.403188435097576e-07, - "loss": 0.9884, - "step": 9394 - }, - { - "epoch": 0.7060724485194649, - "grad_norm": 1.5425679519252182, - "learning_rate": 8.399222329544375e-07, - "loss": 1.0737, - "step": 9395 - }, - { - "epoch": 0.7061476025852998, - "grad_norm": 1.852848169964928, - "learning_rate": 8.395256911381681e-07, - "loss": 0.971, - "step": 9396 - }, - { - "epoch": 0.7062227566511349, - "grad_norm": 1.965343191932596, - "learning_rate": 8.391292180844451e-07, - "loss": 1.054, - "step": 9397 - }, - { - "epoch": 0.7062979107169698, - "grad_norm": 1.622193333544206, - "learning_rate": 8.387328138167613e-07, - "loss": 0.9693, - "step": 9398 - }, - { - "epoch": 0.7063730647828047, - "grad_norm": 1.5604403844982346, - "learning_rate": 8.383364783586051e-07, - "loss": 1.1061, - "step": 9399 - }, - { - "epoch": 0.7064482188486397, - "grad_norm": 1.6962925060459075, - "learning_rate": 8.379402117334601e-07, - "loss": 1.057, - "step": 9400 - }, - { - "epoch": 0.7065233729144746, - "grad_norm": 1.9803912205916991, - "learning_rate": 8.375440139648082e-07, - "loss": 0.9187, - "step": 9401 - }, - { - "epoch": 0.7065985269803097, - "grad_norm": 1.7948710005796862, - "learning_rate": 8.371478850761247e-07, - "loss": 0.9863, - "step": 9402 - }, - { - "epoch": 0.7066736810461446, - "grad_norm": 2.4887970729574014, - "learning_rate": 8.367518250908818e-07, - "loss": 1.0132, - "step": 9403 - }, - { - "epoch": 0.7067488351119796, - "grad_norm": 1.5106500204189899, - "learning_rate": 8.363558340325478e-07, - "loss": 0.9574, - "step": 9404 - }, - { - "epoch": 0.7068239891778145, - "grad_norm": 1.4307013913645437, - "learning_rate": 8.359599119245857e-07, - "loss": 1.0386, - "step": 9405 - }, - { - "epoch": 0.7068991432436494, - "grad_norm": 2.0676573803931086, - "learning_rate": 8.355640587904569e-07, - "loss": 0.9983, - "step": 9406 - }, - { - "epoch": 0.7069742973094845, - "grad_norm": 0.8002067179631702, - "learning_rate": 8.351682746536166e-07, - "loss": 0.8325, - "step": 9407 - }, - { - "epoch": 0.7070494513753194, - "grad_norm": 5.334378418253701, - "learning_rate": 8.347725595375165e-07, - "loss": 1.1093, - "step": 9408 - }, - { - "epoch": 0.7071246054411544, - "grad_norm": 2.152588762852303, - "learning_rate": 8.343769134656043e-07, - "loss": 0.9451, - "step": 9409 - }, - { - "epoch": 0.7071997595069893, - "grad_norm": 1.5048733858856689, - "learning_rate": 8.339813364613224e-07, - "loss": 1.0386, - "step": 9410 - }, - { - "epoch": 0.7072749135728242, - "grad_norm": 2.4954520820078696, - "learning_rate": 8.335858285481124e-07, - "loss": 0.9535, - "step": 9411 - }, - { - "epoch": 0.7073500676386593, - "grad_norm": 1.80890806033441, - "learning_rate": 8.331903897494077e-07, - "loss": 0.9689, - "step": 9412 - }, - { - "epoch": 0.7074252217044942, - "grad_norm": 2.142153897011474, - "learning_rate": 8.327950200886409e-07, - "loss": 0.9909, - "step": 9413 - }, - { - "epoch": 0.7075003757703292, - "grad_norm": 1.7058668551795295, - "learning_rate": 8.323997195892389e-07, - "loss": 0.9383, - "step": 9414 - }, - { - "epoch": 0.7075755298361641, - "grad_norm": 7.789159225123478, - "learning_rate": 8.320044882746246e-07, - "loss": 1.0136, - "step": 9415 - }, - { - "epoch": 0.7076506839019991, - "grad_norm": 1.5594804961271247, - "learning_rate": 8.316093261682169e-07, - "loss": 0.9278, - "step": 9416 - }, - { - "epoch": 0.7077258379678341, - "grad_norm": 2.4272058698858547, - "learning_rate": 8.312142332934299e-07, - "loss": 0.9643, - "step": 9417 - }, - { - "epoch": 0.707800992033669, - "grad_norm": 1.5339597742309738, - "learning_rate": 8.308192096736759e-07, - "loss": 0.9652, - "step": 9418 - }, - { - "epoch": 0.707876146099504, - "grad_norm": 1.6746663828872836, - "learning_rate": 8.304242553323608e-07, - "loss": 0.9909, - "step": 9419 - }, - { - "epoch": 0.7079513001653389, - "grad_norm": 2.137684605529376, - "learning_rate": 8.300293702928873e-07, - "loss": 0.993, - "step": 9420 - }, - { - "epoch": 0.7080264542311739, - "grad_norm": 2.0447095525742593, - "learning_rate": 8.296345545786536e-07, - "loss": 0.9475, - "step": 9421 - }, - { - "epoch": 0.7081016082970089, - "grad_norm": 1.857203907015567, - "learning_rate": 8.292398082130534e-07, - "loss": 0.9346, - "step": 9422 - }, - { - "epoch": 0.7081767623628439, - "grad_norm": 1.52302874436683, - "learning_rate": 8.288451312194787e-07, - "loss": 0.898, - "step": 9423 - }, - { - "epoch": 0.7082519164286788, - "grad_norm": 1.498429032064145, - "learning_rate": 8.284505236213144e-07, - "loss": 0.8674, - "step": 9424 - }, - { - "epoch": 0.7083270704945137, - "grad_norm": 1.9194660130656993, - "learning_rate": 8.280559854419427e-07, - "loss": 1.04, - "step": 9425 - }, - { - "epoch": 0.7084022245603487, - "grad_norm": 0.6743393879592072, - "learning_rate": 8.276615167047416e-07, - "loss": 0.765, - "step": 9426 - }, - { - "epoch": 0.7084773786261837, - "grad_norm": 2.2283587789212076, - "learning_rate": 8.272671174330841e-07, - "loss": 0.943, - "step": 9427 - }, - { - "epoch": 0.7085525326920187, - "grad_norm": 1.7045434067453742, - "learning_rate": 8.268727876503411e-07, - "loss": 0.886, - "step": 9428 - }, - { - "epoch": 0.7086276867578536, - "grad_norm": 2.379691960411375, - "learning_rate": 8.26478527379878e-07, - "loss": 0.9629, - "step": 9429 - }, - { - "epoch": 0.7087028408236885, - "grad_norm": 1.7264734996106335, - "learning_rate": 8.260843366450549e-07, - "loss": 0.9644, - "step": 9430 - }, - { - "epoch": 0.7087779948895235, - "grad_norm": 2.455624612879559, - "learning_rate": 8.256902154692318e-07, - "loss": 0.8988, - "step": 9431 - }, - { - "epoch": 0.7088531489553584, - "grad_norm": 2.1274070570621855, - "learning_rate": 8.252961638757585e-07, - "loss": 1.1674, - "step": 9432 - }, - { - "epoch": 0.7089283030211935, - "grad_norm": 1.8272692641937551, - "learning_rate": 8.249021818879865e-07, - "loss": 0.9555, - "step": 9433 - }, - { - "epoch": 0.7090034570870284, - "grad_norm": 0.6811897513669749, - "learning_rate": 8.245082695292592e-07, - "loss": 0.8716, - "step": 9434 - }, - { - "epoch": 0.7090786111528634, - "grad_norm": 1.9560309802412261, - "learning_rate": 8.24114426822919e-07, - "loss": 0.9679, - "step": 9435 - }, - { - "epoch": 0.7091537652186983, - "grad_norm": 1.4906563617598056, - "learning_rate": 8.237206537923016e-07, - "loss": 0.9381, - "step": 9436 - }, - { - "epoch": 0.7092289192845332, - "grad_norm": 2.917166603475304, - "learning_rate": 8.233269504607398e-07, - "loss": 1.0038, - "step": 9437 - }, - { - "epoch": 0.7093040733503683, - "grad_norm": 2.934989180796761, - "learning_rate": 8.229333168515622e-07, - "loss": 1.0125, - "step": 9438 - }, - { - "epoch": 0.7093792274162032, - "grad_norm": 1.8934555932764088, - "learning_rate": 8.225397529880919e-07, - "loss": 1.0155, - "step": 9439 - }, - { - "epoch": 0.7094543814820382, - "grad_norm": 1.735191496689021, - "learning_rate": 8.22146258893651e-07, - "loss": 0.8729, - "step": 9440 - }, - { - "epoch": 0.7095295355478731, - "grad_norm": 3.235334023826217, - "learning_rate": 8.217528345915543e-07, - "loss": 0.9518, - "step": 9441 - }, - { - "epoch": 0.7096046896137082, - "grad_norm": 2.290691746341662, - "learning_rate": 8.21359480105114e-07, - "loss": 0.9511, - "step": 9442 - }, - { - "epoch": 0.7096798436795431, - "grad_norm": 1.8439833283281, - "learning_rate": 8.209661954576379e-07, - "loss": 0.9271, - "step": 9443 - }, - { - "epoch": 0.709754997745378, - "grad_norm": 0.7258736000395699, - "learning_rate": 8.205729806724288e-07, - "loss": 0.8587, - "step": 9444 - }, - { - "epoch": 0.709830151811213, - "grad_norm": 1.6567662712001576, - "learning_rate": 8.201798357727876e-07, - "loss": 1.0139, - "step": 9445 - }, - { - "epoch": 0.7099053058770479, - "grad_norm": 1.753631502309517, - "learning_rate": 8.19786760782009e-07, - "loss": 0.8755, - "step": 9446 - }, - { - "epoch": 0.709980459942883, - "grad_norm": 2.3491905644574658, - "learning_rate": 8.193937557233841e-07, - "loss": 0.9871, - "step": 9447 - }, - { - "epoch": 0.7100556140087179, - "grad_norm": 0.7952275168950168, - "learning_rate": 8.190008206202002e-07, - "loss": 0.8381, - "step": 9448 - }, - { - "epoch": 0.7101307680745529, - "grad_norm": 2.8222347059833166, - "learning_rate": 8.186079554957392e-07, - "loss": 0.9916, - "step": 9449 - }, - { - "epoch": 0.7102059221403878, - "grad_norm": 1.583679470909502, - "learning_rate": 8.182151603732814e-07, - "loss": 1.0363, - "step": 9450 - }, - { - "epoch": 0.7102810762062227, - "grad_norm": 6.346462628159485, - "learning_rate": 8.178224352761008e-07, - "loss": 0.9329, - "step": 9451 - }, - { - "epoch": 0.7103562302720577, - "grad_norm": 1.3159998597547926, - "learning_rate": 8.174297802274668e-07, - "loss": 0.9841, - "step": 9452 - }, - { - "epoch": 0.7104313843378927, - "grad_norm": 1.7660783843879202, - "learning_rate": 8.170371952506483e-07, - "loss": 1.0154, - "step": 9453 - }, - { - "epoch": 0.7105065384037277, - "grad_norm": 1.6808601012373519, - "learning_rate": 8.166446803689045e-07, - "loss": 1.0113, - "step": 9454 - }, - { - "epoch": 0.7105816924695626, - "grad_norm": 1.5799544654996764, - "learning_rate": 8.162522356054952e-07, - "loss": 1.0377, - "step": 9455 - }, - { - "epoch": 0.7106568465353975, - "grad_norm": 1.8061177584803736, - "learning_rate": 8.158598609836733e-07, - "loss": 0.9991, - "step": 9456 - }, - { - "epoch": 0.7107320006012325, - "grad_norm": 2.343293819537494, - "learning_rate": 8.154675565266898e-07, - "loss": 0.9574, - "step": 9457 - }, - { - "epoch": 0.7108071546670675, - "grad_norm": 1.6445690884620365, - "learning_rate": 8.150753222577893e-07, - "loss": 0.9984, - "step": 9458 - }, - { - "epoch": 0.7108823087329025, - "grad_norm": 2.7774190342567695, - "learning_rate": 8.146831582002134e-07, - "loss": 0.9355, - "step": 9459 - }, - { - "epoch": 0.7109574627987374, - "grad_norm": 1.6968251173580307, - "learning_rate": 8.142910643771992e-07, - "loss": 1.0131, - "step": 9460 - }, - { - "epoch": 0.7110326168645724, - "grad_norm": 6.035912169488008, - "learning_rate": 8.13899040811979e-07, - "loss": 0.9627, - "step": 9461 - }, - { - "epoch": 0.7111077709304073, - "grad_norm": 1.4497867920757925, - "learning_rate": 8.135070875277834e-07, - "loss": 1.0915, - "step": 9462 - }, - { - "epoch": 0.7111829249962422, - "grad_norm": 1.8405537460343329, - "learning_rate": 8.131152045478362e-07, - "loss": 0.9783, - "step": 9463 - }, - { - "epoch": 0.7112580790620773, - "grad_norm": 1.9643690624141257, - "learning_rate": 8.12723391895358e-07, - "loss": 0.9893, - "step": 9464 - }, - { - "epoch": 0.7113332331279122, - "grad_norm": 1.8295864556196944, - "learning_rate": 8.123316495935653e-07, - "loss": 0.8795, - "step": 9465 - }, - { - "epoch": 0.7114083871937472, - "grad_norm": 1.3324689845041524, - "learning_rate": 8.119399776656695e-07, - "loss": 1.0685, - "step": 9466 - }, - { - "epoch": 0.7114835412595821, - "grad_norm": 1.8070689420483985, - "learning_rate": 8.115483761348801e-07, - "loss": 1.0515, - "step": 9467 - }, - { - "epoch": 0.7115586953254172, - "grad_norm": 1.6818554748739998, - "learning_rate": 8.111568450244004e-07, - "loss": 1.0062, - "step": 9468 - }, - { - "epoch": 0.7116338493912521, - "grad_norm": 1.5649801026319508, - "learning_rate": 8.107653843574298e-07, - "loss": 0.9869, - "step": 9469 - }, - { - "epoch": 0.711709003457087, - "grad_norm": 1.454135092508187, - "learning_rate": 8.103739941571641e-07, - "loss": 1.0146, - "step": 9470 - }, - { - "epoch": 0.711784157522922, - "grad_norm": 1.8162781372831245, - "learning_rate": 8.099826744467941e-07, - "loss": 0.9989, - "step": 9471 - }, - { - "epoch": 0.7118593115887569, - "grad_norm": 1.8499385617338597, - "learning_rate": 8.095914252495082e-07, - "loss": 0.9259, - "step": 9472 - }, - { - "epoch": 0.711934465654592, - "grad_norm": 2.023539890364588, - "learning_rate": 8.092002465884886e-07, - "loss": 0.9291, - "step": 9473 - }, - { - "epoch": 0.7120096197204269, - "grad_norm": 1.6643182262517113, - "learning_rate": 8.088091384869136e-07, - "loss": 1.0126, - "step": 9474 - }, - { - "epoch": 0.7120847737862618, - "grad_norm": 1.3066722826123385, - "learning_rate": 8.084181009679592e-07, - "loss": 1.0411, - "step": 9475 - }, - { - "epoch": 0.7121599278520968, - "grad_norm": 2.475059365995866, - "learning_rate": 8.080271340547953e-07, - "loss": 0.9873, - "step": 9476 - }, - { - "epoch": 0.7122350819179317, - "grad_norm": 1.66442000439629, - "learning_rate": 8.076362377705881e-07, - "loss": 1.0112, - "step": 9477 - }, - { - "epoch": 0.7123102359837667, - "grad_norm": 1.5023549191064196, - "learning_rate": 8.072454121384988e-07, - "loss": 0.8576, - "step": 9478 - }, - { - "epoch": 0.7123853900496017, - "grad_norm": 1.6144060919496437, - "learning_rate": 8.06854657181687e-07, - "loss": 0.9397, - "step": 9479 - }, - { - "epoch": 0.7124605441154367, - "grad_norm": 2.6443794240713925, - "learning_rate": 8.064639729233056e-07, - "loss": 0.9854, - "step": 9480 - }, - { - "epoch": 0.7125356981812716, - "grad_norm": 1.7265575978481384, - "learning_rate": 8.060733593865041e-07, - "loss": 0.9043, - "step": 9481 - }, - { - "epoch": 0.7126108522471065, - "grad_norm": 2.040940045139196, - "learning_rate": 8.05682816594428e-07, - "loss": 1.0179, - "step": 9482 - }, - { - "epoch": 0.7126860063129415, - "grad_norm": 0.6392310258408794, - "learning_rate": 8.052923445702175e-07, - "loss": 0.786, - "step": 9483 - }, - { - "epoch": 0.7127611603787765, - "grad_norm": 2.1882421052176477, - "learning_rate": 8.049019433370114e-07, - "loss": 1.0388, - "step": 9484 - }, - { - "epoch": 0.7128363144446115, - "grad_norm": 1.731820777421452, - "learning_rate": 8.045116129179412e-07, - "loss": 0.9514, - "step": 9485 - }, - { - "epoch": 0.7129114685104464, - "grad_norm": 1.8840770353008711, - "learning_rate": 8.041213533361359e-07, - "loss": 0.8952, - "step": 9486 - }, - { - "epoch": 0.7129866225762814, - "grad_norm": 1.8023941198353826, - "learning_rate": 8.037311646147198e-07, - "loss": 1.0214, - "step": 9487 - }, - { - "epoch": 0.7130617766421163, - "grad_norm": 1.5862067985212933, - "learning_rate": 8.033410467768122e-07, - "loss": 0.8778, - "step": 9488 - }, - { - "epoch": 0.7131369307079513, - "grad_norm": 2.9033503680015955, - "learning_rate": 8.029509998455308e-07, - "loss": 0.9, - "step": 9489 - }, - { - "epoch": 0.7132120847737863, - "grad_norm": 2.085594349042336, - "learning_rate": 8.025610238439864e-07, - "loss": 0.8754, - "step": 9490 - }, - { - "epoch": 0.7132872388396212, - "grad_norm": 2.540729902478304, - "learning_rate": 8.021711187952864e-07, - "loss": 0.8392, - "step": 9491 - }, - { - "epoch": 0.7133623929054562, - "grad_norm": 3.0773664595188976, - "learning_rate": 8.017812847225347e-07, - "loss": 0.943, - "step": 9492 - }, - { - "epoch": 0.7134375469712911, - "grad_norm": 3.0966076224467023, - "learning_rate": 8.013915216488294e-07, - "loss": 0.9079, - "step": 9493 - }, - { - "epoch": 0.7135127010371262, - "grad_norm": 2.7587767042035147, - "learning_rate": 8.010018295972667e-07, - "loss": 1.027, - "step": 9494 - }, - { - "epoch": 0.7135878551029611, - "grad_norm": 1.2690881513395205, - "learning_rate": 8.00612208590937e-07, - "loss": 0.9112, - "step": 9495 - }, - { - "epoch": 0.713663009168796, - "grad_norm": 1.8238134812097848, - "learning_rate": 8.002226586529261e-07, - "loss": 0.8861, - "step": 9496 - }, - { - "epoch": 0.713738163234631, - "grad_norm": 1.867720843002145, - "learning_rate": 7.998331798063176e-07, - "loss": 0.9814, - "step": 9497 - }, - { - "epoch": 0.7138133173004659, - "grad_norm": 1.8464377258360556, - "learning_rate": 7.994437720741889e-07, - "loss": 1.0306, - "step": 9498 - }, - { - "epoch": 0.713888471366301, - "grad_norm": 1.5781200899859802, - "learning_rate": 7.990544354796139e-07, - "loss": 0.8884, - "step": 9499 - }, - { - "epoch": 0.7139636254321359, - "grad_norm": 1.6440792957775618, - "learning_rate": 7.986651700456617e-07, - "loss": 0.9953, - "step": 9500 - }, - { - "epoch": 0.7140387794979708, - "grad_norm": 2.1344731052801547, - "learning_rate": 7.982759757953989e-07, - "loss": 0.9931, - "step": 9501 - }, - { - "epoch": 0.7141139335638058, - "grad_norm": 1.7597453701534462, - "learning_rate": 7.978868527518864e-07, - "loss": 1.0364, - "step": 9502 - }, - { - "epoch": 0.7141890876296407, - "grad_norm": 1.5051384967939827, - "learning_rate": 7.974978009381812e-07, - "loss": 0.9595, - "step": 9503 - }, - { - "epoch": 0.7142642416954758, - "grad_norm": 1.6470165908431813, - "learning_rate": 7.97108820377336e-07, - "loss": 0.9954, - "step": 9504 - }, - { - "epoch": 0.7143393957613107, - "grad_norm": 1.6985216135711045, - "learning_rate": 7.967199110923983e-07, - "loss": 0.9515, - "step": 9505 - }, - { - "epoch": 0.7144145498271457, - "grad_norm": 1.6657199395982787, - "learning_rate": 7.963310731064146e-07, - "loss": 0.9489, - "step": 9506 - }, - { - "epoch": 0.7144897038929806, - "grad_norm": 1.9230113817930499, - "learning_rate": 7.959423064424238e-07, - "loss": 0.9946, - "step": 9507 - }, - { - "epoch": 0.7145648579588155, - "grad_norm": 2.037703722732872, - "learning_rate": 7.95553611123462e-07, - "loss": 0.9945, - "step": 9508 - }, - { - "epoch": 0.7146400120246506, - "grad_norm": 1.605251548163929, - "learning_rate": 7.95164987172561e-07, - "loss": 0.9087, - "step": 9509 - }, - { - "epoch": 0.7147151660904855, - "grad_norm": 1.770205674479411, - "learning_rate": 7.947764346127472e-07, - "loss": 0.9807, - "step": 9510 - }, - { - "epoch": 0.7147903201563205, - "grad_norm": 1.4075958593326598, - "learning_rate": 7.943879534670457e-07, - "loss": 1.041, - "step": 9511 - }, - { - "epoch": 0.7148654742221554, - "grad_norm": 1.4946273110800863, - "learning_rate": 7.939995437584744e-07, - "loss": 1.0118, - "step": 9512 - }, - { - "epoch": 0.7149406282879904, - "grad_norm": 1.8637923653262947, - "learning_rate": 7.936112055100481e-07, - "loss": 0.9867, - "step": 9513 - }, - { - "epoch": 0.7150157823538253, - "grad_norm": 1.7979071928137886, - "learning_rate": 7.932229387447777e-07, - "loss": 1.0092, - "step": 9514 - }, - { - "epoch": 0.7150909364196603, - "grad_norm": 1.3441699946331365, - "learning_rate": 7.928347434856683e-07, - "loss": 1.0184, - "step": 9515 - }, - { - "epoch": 0.7151660904854953, - "grad_norm": 1.9215217203604587, - "learning_rate": 7.924466197557238e-07, - "loss": 0.9654, - "step": 9516 - }, - { - "epoch": 0.7152412445513302, - "grad_norm": 2.0722844934638163, - "learning_rate": 7.920585675779412e-07, - "loss": 0.9839, - "step": 9517 - }, - { - "epoch": 0.7153163986171652, - "grad_norm": 1.714795852722143, - "learning_rate": 7.916705869753131e-07, - "loss": 0.9825, - "step": 9518 - }, - { - "epoch": 0.7153915526830001, - "grad_norm": 1.6442955736198719, - "learning_rate": 7.912826779708304e-07, - "loss": 0.9255, - "step": 9519 - }, - { - "epoch": 0.7154667067488351, - "grad_norm": 0.8716858703134168, - "learning_rate": 7.908948405874775e-07, - "loss": 0.8314, - "step": 9520 - }, - { - "epoch": 0.7155418608146701, - "grad_norm": 0.7571203079040454, - "learning_rate": 7.905070748482354e-07, - "loss": 0.8981, - "step": 9521 - }, - { - "epoch": 0.715617014880505, - "grad_norm": 2.088053455194691, - "learning_rate": 7.901193807760797e-07, - "loss": 0.999, - "step": 9522 - }, - { - "epoch": 0.71569216894634, - "grad_norm": 1.4326434818863367, - "learning_rate": 7.897317583939846e-07, - "loss": 0.9524, - "step": 9523 - }, - { - "epoch": 0.7157673230121749, - "grad_norm": 1.6803774104351838, - "learning_rate": 7.89344207724917e-07, - "loss": 1.0629, - "step": 9524 - }, - { - "epoch": 0.71584247707801, - "grad_norm": 2.2846702124007203, - "learning_rate": 7.889567287918413e-07, - "loss": 0.9795, - "step": 9525 - }, - { - "epoch": 0.7159176311438449, - "grad_norm": 2.22960414931175, - "learning_rate": 7.885693216177165e-07, - "loss": 1.0047, - "step": 9526 - }, - { - "epoch": 0.7159927852096798, - "grad_norm": 2.2237658689124604, - "learning_rate": 7.881819862254977e-07, - "loss": 1.0222, - "step": 9527 - }, - { - "epoch": 0.7160679392755148, - "grad_norm": 11.669610514936327, - "learning_rate": 7.877947226381372e-07, - "loss": 1.0581, - "step": 9528 - }, - { - "epoch": 0.7161430933413497, - "grad_norm": 1.7731015059346997, - "learning_rate": 7.874075308785813e-07, - "loss": 0.9501, - "step": 9529 - }, - { - "epoch": 0.7162182474071848, - "grad_norm": 1.6633036141170936, - "learning_rate": 7.870204109697724e-07, - "loss": 1.0445, - "step": 9530 - }, - { - "epoch": 0.7162934014730197, - "grad_norm": 1.6707157144346438, - "learning_rate": 7.866333629346491e-07, - "loss": 0.8673, - "step": 9531 - }, - { - "epoch": 0.7163685555388547, - "grad_norm": 1.951563890271879, - "learning_rate": 7.862463867961446e-07, - "loss": 0.8919, - "step": 9532 - }, - { - "epoch": 0.7164437096046896, - "grad_norm": 0.8220813390672498, - "learning_rate": 7.858594825771902e-07, - "loss": 0.8961, - "step": 9533 - }, - { - "epoch": 0.7165188636705245, - "grad_norm": 2.6091037997633313, - "learning_rate": 7.854726503007107e-07, - "loss": 0.8661, - "step": 9534 - }, - { - "epoch": 0.7165940177363596, - "grad_norm": 1.8844660545605525, - "learning_rate": 7.850858899896273e-07, - "loss": 1.009, - "step": 9535 - }, - { - "epoch": 0.7166691718021945, - "grad_norm": 2.1664345201201587, - "learning_rate": 7.846992016668572e-07, - "loss": 1.0709, - "step": 9536 - }, - { - "epoch": 0.7167443258680295, - "grad_norm": 1.6208233343500784, - "learning_rate": 7.843125853553125e-07, - "loss": 1.0507, - "step": 9537 - }, - { - "epoch": 0.7168194799338644, - "grad_norm": 2.2364953923867663, - "learning_rate": 7.839260410779029e-07, - "loss": 1.015, - "step": 9538 - }, - { - "epoch": 0.7168946339996994, - "grad_norm": 0.7093511636233074, - "learning_rate": 7.835395688575319e-07, - "loss": 0.8371, - "step": 9539 - }, - { - "epoch": 0.7169697880655344, - "grad_norm": 1.60098002937056, - "learning_rate": 7.831531687170992e-07, - "loss": 0.8985, - "step": 9540 - }, - { - "epoch": 0.7170449421313693, - "grad_norm": 2.060917912202867, - "learning_rate": 7.827668406795014e-07, - "loss": 0.9797, - "step": 9541 - }, - { - "epoch": 0.7171200961972043, - "grad_norm": 1.4485370852773174, - "learning_rate": 7.823805847676292e-07, - "loss": 0.9756, - "step": 9542 - }, - { - "epoch": 0.7171952502630392, - "grad_norm": 2.011954580761302, - "learning_rate": 7.819944010043702e-07, - "loss": 1.0219, - "step": 9543 - }, - { - "epoch": 0.7172704043288742, - "grad_norm": 3.0246201205737053, - "learning_rate": 7.816082894126061e-07, - "loss": 0.8376, - "step": 9544 - }, - { - "epoch": 0.7173455583947091, - "grad_norm": 1.426333803728785, - "learning_rate": 7.812222500152172e-07, - "loss": 0.9394, - "step": 9545 - }, - { - "epoch": 0.7174207124605441, - "grad_norm": 2.092031542297703, - "learning_rate": 7.80836282835077e-07, - "loss": 0.9816, - "step": 9546 - }, - { - "epoch": 0.7174958665263791, - "grad_norm": 1.5578375872674348, - "learning_rate": 7.804503878950555e-07, - "loss": 0.9603, - "step": 9547 - }, - { - "epoch": 0.717571020592214, - "grad_norm": 3.1741182259244844, - "learning_rate": 7.800645652180184e-07, - "loss": 0.9295, - "step": 9548 - }, - { - "epoch": 0.717646174658049, - "grad_norm": 2.2949716375921283, - "learning_rate": 7.796788148268267e-07, - "loss": 0.8233, - "step": 9549 - }, - { - "epoch": 0.717721328723884, - "grad_norm": 1.3757790179345537, - "learning_rate": 7.79293136744339e-07, - "loss": 1.0423, - "step": 9550 - }, - { - "epoch": 0.717796482789719, - "grad_norm": 2.1982480442641883, - "learning_rate": 7.78907530993407e-07, - "loss": 0.92, - "step": 9551 - }, - { - "epoch": 0.7178716368555539, - "grad_norm": 1.8622879803712762, - "learning_rate": 7.785219975968798e-07, - "loss": 1.0465, - "step": 9552 - }, - { - "epoch": 0.7179467909213888, - "grad_norm": 3.314741276099543, - "learning_rate": 7.781365365776016e-07, - "loss": 0.9166, - "step": 9553 - }, - { - "epoch": 0.7180219449872238, - "grad_norm": 1.6764566976505209, - "learning_rate": 7.777511479584118e-07, - "loss": 0.9924, - "step": 9554 - }, - { - "epoch": 0.7180970990530587, - "grad_norm": 1.8741458218733962, - "learning_rate": 7.773658317621474e-07, - "loss": 0.9556, - "step": 9555 - }, - { - "epoch": 0.7181722531188938, - "grad_norm": 1.8710078018613003, - "learning_rate": 7.769805880116391e-07, - "loss": 1.0561, - "step": 9556 - }, - { - "epoch": 0.7182474071847287, - "grad_norm": 2.3055375939809615, - "learning_rate": 7.765954167297144e-07, - "loss": 0.9463, - "step": 9557 - }, - { - "epoch": 0.7183225612505637, - "grad_norm": 1.762029311725443, - "learning_rate": 7.762103179391961e-07, - "loss": 0.9086, - "step": 9558 - }, - { - "epoch": 0.7183977153163986, - "grad_norm": 2.1818657385816134, - "learning_rate": 7.758252916629017e-07, - "loss": 0.8376, - "step": 9559 - }, - { - "epoch": 0.7184728693822335, - "grad_norm": 1.5242563247937355, - "learning_rate": 7.754403379236474e-07, - "loss": 1.0081, - "step": 9560 - }, - { - "epoch": 0.7185480234480686, - "grad_norm": 2.0751102923760225, - "learning_rate": 7.75055456744242e-07, - "loss": 0.9731, - "step": 9561 - }, - { - "epoch": 0.7186231775139035, - "grad_norm": 1.672324717278906, - "learning_rate": 7.74670648147491e-07, - "loss": 0.9246, - "step": 9562 - }, - { - "epoch": 0.7186983315797385, - "grad_norm": 3.1929405007677283, - "learning_rate": 7.742859121561967e-07, - "loss": 0.9179, - "step": 9563 - }, - { - "epoch": 0.7187734856455734, - "grad_norm": 1.8770610290060254, - "learning_rate": 7.739012487931555e-07, - "loss": 1.005, - "step": 9564 - }, - { - "epoch": 0.7188486397114083, - "grad_norm": 1.801911237433665, - "learning_rate": 7.735166580811607e-07, - "loss": 0.9914, - "step": 9565 - }, - { - "epoch": 0.7189237937772434, - "grad_norm": 1.6390658281290686, - "learning_rate": 7.731321400429995e-07, - "loss": 1.0206, - "step": 9566 - }, - { - "epoch": 0.7189989478430783, - "grad_norm": 1.4816660204279162, - "learning_rate": 7.727476947014578e-07, - "loss": 1.0069, - "step": 9567 - }, - { - "epoch": 0.7190741019089133, - "grad_norm": 1.7821019460927874, - "learning_rate": 7.723633220793146e-07, - "loss": 1.0383, - "step": 9568 - }, - { - "epoch": 0.7191492559747482, - "grad_norm": 1.5096519990038477, - "learning_rate": 7.719790221993456e-07, - "loss": 0.9732, - "step": 9569 - }, - { - "epoch": 0.7192244100405832, - "grad_norm": 1.7847641528885987, - "learning_rate": 7.715947950843218e-07, - "loss": 0.9549, - "step": 9570 - }, - { - "epoch": 0.7192995641064182, - "grad_norm": 2.4234178120485437, - "learning_rate": 7.712106407570096e-07, - "loss": 1.0138, - "step": 9571 - }, - { - "epoch": 0.7193747181722531, - "grad_norm": 1.8390357314554342, - "learning_rate": 7.70826559240173e-07, - "loss": 0.9818, - "step": 9572 - }, - { - "epoch": 0.7194498722380881, - "grad_norm": 1.7840218643850079, - "learning_rate": 7.704425505565697e-07, - "loss": 0.9116, - "step": 9573 - }, - { - "epoch": 0.719525026303923, - "grad_norm": 1.9647628737199103, - "learning_rate": 7.700586147289534e-07, - "loss": 0.978, - "step": 9574 - }, - { - "epoch": 0.719600180369758, - "grad_norm": 1.2586938838326147, - "learning_rate": 7.696747517800742e-07, - "loss": 0.9272, - "step": 9575 - }, - { - "epoch": 0.719675334435593, - "grad_norm": 1.5873690002938423, - "learning_rate": 7.692909617326761e-07, - "loss": 0.9849, - "step": 9576 - }, - { - "epoch": 0.719750488501428, - "grad_norm": 1.9092477459496497, - "learning_rate": 7.689072446095022e-07, - "loss": 1.0306, - "step": 9577 - }, - { - "epoch": 0.7198256425672629, - "grad_norm": 0.6701282594811783, - "learning_rate": 7.685236004332883e-07, - "loss": 0.8182, - "step": 9578 - }, - { - "epoch": 0.7199007966330978, - "grad_norm": 2.1840436604994915, - "learning_rate": 7.681400292267663e-07, - "loss": 1.0628, - "step": 9579 - }, - { - "epoch": 0.7199759506989328, - "grad_norm": 3.5862794933256454, - "learning_rate": 7.67756531012665e-07, - "loss": 1.0234, - "step": 9580 - }, - { - "epoch": 0.7200511047647677, - "grad_norm": 1.594635362230677, - "learning_rate": 7.673731058137072e-07, - "loss": 0.9607, - "step": 9581 - }, - { - "epoch": 0.7201262588306028, - "grad_norm": 1.9113485597554603, - "learning_rate": 7.669897536526133e-07, - "loss": 0.9061, - "step": 9582 - }, - { - "epoch": 0.7202014128964377, - "grad_norm": 1.7622719042203154, - "learning_rate": 7.666064745520982e-07, - "loss": 0.9658, - "step": 9583 - }, - { - "epoch": 0.7202765669622727, - "grad_norm": 2.35910345877007, - "learning_rate": 7.662232685348718e-07, - "loss": 0.8509, - "step": 9584 - }, - { - "epoch": 0.7203517210281076, - "grad_norm": 1.9067496598572138, - "learning_rate": 7.658401356236417e-07, - "loss": 1.0102, - "step": 9585 - }, - { - "epoch": 0.7204268750939425, - "grad_norm": 1.686478764970026, - "learning_rate": 7.654570758411096e-07, - "loss": 1.0153, - "step": 9586 - }, - { - "epoch": 0.7205020291597776, - "grad_norm": 1.4806137617851336, - "learning_rate": 7.650740892099731e-07, - "loss": 1.0147, - "step": 9587 - }, - { - "epoch": 0.7205771832256125, - "grad_norm": 1.761871263622532, - "learning_rate": 7.646911757529251e-07, - "loss": 1.0348, - "step": 9588 - }, - { - "epoch": 0.7206523372914475, - "grad_norm": 2.3394564147202512, - "learning_rate": 7.643083354926558e-07, - "loss": 0.9426, - "step": 9589 - }, - { - "epoch": 0.7207274913572824, - "grad_norm": 2.3943869716869757, - "learning_rate": 7.639255684518495e-07, - "loss": 0.9345, - "step": 9590 - }, - { - "epoch": 0.7208026454231173, - "grad_norm": 2.0515578189282206, - "learning_rate": 7.635428746531865e-07, - "loss": 0.9396, - "step": 9591 - }, - { - "epoch": 0.7208777994889524, - "grad_norm": 2.415111424973565, - "learning_rate": 7.631602541193429e-07, - "loss": 0.9951, - "step": 9592 - }, - { - "epoch": 0.7209529535547873, - "grad_norm": 1.6249489409421285, - "learning_rate": 7.627777068729897e-07, - "loss": 0.9795, - "step": 9593 - }, - { - "epoch": 0.7210281076206223, - "grad_norm": 2.7343157080625167, - "learning_rate": 7.623952329367955e-07, - "loss": 0.9208, - "step": 9594 - }, - { - "epoch": 0.7211032616864572, - "grad_norm": 0.8162668282496021, - "learning_rate": 7.620128323334231e-07, - "loss": 0.9384, - "step": 9595 - }, - { - "epoch": 0.7211784157522922, - "grad_norm": 1.738335426429276, - "learning_rate": 7.616305050855309e-07, - "loss": 0.9447, - "step": 9596 - }, - { - "epoch": 0.7212535698181272, - "grad_norm": 1.7138289314195434, - "learning_rate": 7.612482512157732e-07, - "loss": 0.9295, - "step": 9597 - }, - { - "epoch": 0.7213287238839621, - "grad_norm": 1.488645093602374, - "learning_rate": 7.608660707467996e-07, - "loss": 0.8889, - "step": 9598 - }, - { - "epoch": 0.7214038779497971, - "grad_norm": 0.6619924256332275, - "learning_rate": 7.604839637012568e-07, - "loss": 0.873, - "step": 9599 - }, - { - "epoch": 0.721479032015632, - "grad_norm": 2.3117029001943763, - "learning_rate": 7.601019301017857e-07, - "loss": 0.9752, - "step": 9600 - }, - { - "epoch": 0.721554186081467, - "grad_norm": 1.6128608211203637, - "learning_rate": 7.597199699710224e-07, - "loss": 0.8901, - "step": 9601 - }, - { - "epoch": 0.721629340147302, - "grad_norm": 1.7891655105186721, - "learning_rate": 7.593380833316017e-07, - "loss": 0.9934, - "step": 9602 - }, - { - "epoch": 0.721704494213137, - "grad_norm": 2.1659821627261953, - "learning_rate": 7.589562702061487e-07, - "loss": 0.9494, - "step": 9603 - }, - { - "epoch": 0.7217796482789719, - "grad_norm": 1.5561983061634783, - "learning_rate": 7.585745306172899e-07, - "loss": 0.9354, - "step": 9604 - }, - { - "epoch": 0.7218548023448068, - "grad_norm": 1.656128285092293, - "learning_rate": 7.581928645876439e-07, - "loss": 0.8258, - "step": 9605 - }, - { - "epoch": 0.7219299564106418, - "grad_norm": 2.2543404357646355, - "learning_rate": 7.578112721398251e-07, - "loss": 0.9285, - "step": 9606 - }, - { - "epoch": 0.7220051104764768, - "grad_norm": 1.598619315358989, - "learning_rate": 7.574297532964457e-07, - "loss": 1.0156, - "step": 9607 - }, - { - "epoch": 0.7220802645423118, - "grad_norm": 1.7742061209722602, - "learning_rate": 7.570483080801118e-07, - "loss": 1.105, - "step": 9608 - }, - { - "epoch": 0.7221554186081467, - "grad_norm": 2.4837122579009847, - "learning_rate": 7.56666936513425e-07, - "loss": 0.9898, - "step": 9609 - }, - { - "epoch": 0.7222305726739816, - "grad_norm": 2.5934264199516295, - "learning_rate": 7.562856386189826e-07, - "loss": 0.9526, - "step": 9610 - }, - { - "epoch": 0.7223057267398166, - "grad_norm": 1.3586603237203072, - "learning_rate": 7.559044144193793e-07, - "loss": 0.962, - "step": 9611 - }, - { - "epoch": 0.7223808808056515, - "grad_norm": 2.245603716924871, - "learning_rate": 7.555232639372033e-07, - "loss": 0.9812, - "step": 9612 - }, - { - "epoch": 0.7224560348714866, - "grad_norm": 1.8918664812715997, - "learning_rate": 7.551421871950396e-07, - "loss": 0.9808, - "step": 9613 - }, - { - "epoch": 0.7225311889373215, - "grad_norm": 1.7198882755321958, - "learning_rate": 7.54761184215468e-07, - "loss": 0.9589, - "step": 9614 - }, - { - "epoch": 0.7226063430031565, - "grad_norm": 1.6376429022248775, - "learning_rate": 7.543802550210641e-07, - "loss": 0.8923, - "step": 9615 - }, - { - "epoch": 0.7226814970689914, - "grad_norm": 1.9635915235535775, - "learning_rate": 7.539993996344003e-07, - "loss": 0.8752, - "step": 9616 - }, - { - "epoch": 0.7227566511348263, - "grad_norm": 1.6307315250925096, - "learning_rate": 7.536186180780435e-07, - "loss": 1.0547, - "step": 9617 - }, - { - "epoch": 0.7228318052006614, - "grad_norm": 1.3282859704696948, - "learning_rate": 7.532379103745561e-07, - "loss": 1.0188, - "step": 9618 - }, - { - "epoch": 0.7229069592664963, - "grad_norm": 1.5647163693503885, - "learning_rate": 7.528572765464969e-07, - "loss": 0.9578, - "step": 9619 - }, - { - "epoch": 0.7229821133323313, - "grad_norm": 1.6386663461803332, - "learning_rate": 7.524767166164187e-07, - "loss": 0.9242, - "step": 9620 - }, - { - "epoch": 0.7230572673981662, - "grad_norm": 1.7108708895602347, - "learning_rate": 7.520962306068732e-07, - "loss": 0.9941, - "step": 9621 - }, - { - "epoch": 0.7231324214640013, - "grad_norm": 1.737726221520373, - "learning_rate": 7.517158185404044e-07, - "loss": 1.008, - "step": 9622 - }, - { - "epoch": 0.7232075755298362, - "grad_norm": 1.6960504569384365, - "learning_rate": 7.513354804395527e-07, - "loss": 0.9929, - "step": 9623 - }, - { - "epoch": 0.7232827295956711, - "grad_norm": 1.8049894798585122, - "learning_rate": 7.509552163268564e-07, - "loss": 0.9885, - "step": 9624 - }, - { - "epoch": 0.7233578836615061, - "grad_norm": 2.0555452942234096, - "learning_rate": 7.505750262248453e-07, - "loss": 0.9938, - "step": 9625 - }, - { - "epoch": 0.723433037727341, - "grad_norm": 2.12367162136605, - "learning_rate": 7.50194910156049e-07, - "loss": 1.0237, - "step": 9626 - }, - { - "epoch": 0.723508191793176, - "grad_norm": 1.8429510034060048, - "learning_rate": 7.498148681429893e-07, - "loss": 0.9825, - "step": 9627 - }, - { - "epoch": 0.723583345859011, - "grad_norm": 1.7604946442739011, - "learning_rate": 7.494349002081866e-07, - "loss": 0.9637, - "step": 9628 - }, - { - "epoch": 0.723658499924846, - "grad_norm": 4.16778249422753, - "learning_rate": 7.490550063741548e-07, - "loss": 0.8898, - "step": 9629 - }, - { - "epoch": 0.7237336539906809, - "grad_norm": 1.5389518891684992, - "learning_rate": 7.486751866634043e-07, - "loss": 0.9151, - "step": 9630 - }, - { - "epoch": 0.7238088080565158, - "grad_norm": 1.8339241188558792, - "learning_rate": 7.482954410984403e-07, - "loss": 1.0326, - "step": 9631 - }, - { - "epoch": 0.7238839621223508, - "grad_norm": 1.5696250967220304, - "learning_rate": 7.479157697017639e-07, - "loss": 1.0364, - "step": 9632 - }, - { - "epoch": 0.7239591161881858, - "grad_norm": 1.9739297212189504, - "learning_rate": 7.475361724958735e-07, - "loss": 1.013, - "step": 9633 - }, - { - "epoch": 0.7240342702540208, - "grad_norm": 2.070581497854603, - "learning_rate": 7.471566495032608e-07, - "loss": 0.9581, - "step": 9634 - }, - { - "epoch": 0.7241094243198557, - "grad_norm": 1.5658615573182106, - "learning_rate": 7.467772007464142e-07, - "loss": 0.9989, - "step": 9635 - }, - { - "epoch": 0.7241845783856906, - "grad_norm": 3.330271140850551, - "learning_rate": 7.463978262478172e-07, - "loss": 0.9718, - "step": 9636 - }, - { - "epoch": 0.7242597324515256, - "grad_norm": 0.7455845238442419, - "learning_rate": 7.460185260299488e-07, - "loss": 0.8273, - "step": 9637 - }, - { - "epoch": 0.7243348865173606, - "grad_norm": 1.6863328641604416, - "learning_rate": 7.456393001152849e-07, - "loss": 0.946, - "step": 9638 - }, - { - "epoch": 0.7244100405831956, - "grad_norm": 2.817004940403732, - "learning_rate": 7.452601485262961e-07, - "loss": 1.0641, - "step": 9639 - }, - { - "epoch": 0.7244851946490305, - "grad_norm": 4.48809192547008, - "learning_rate": 7.448810712854483e-07, - "loss": 0.9812, - "step": 9640 - }, - { - "epoch": 0.7245603487148655, - "grad_norm": 22.506037601399875, - "learning_rate": 7.445020684152031e-07, - "loss": 1.0515, - "step": 9641 - }, - { - "epoch": 0.7246355027807004, - "grad_norm": 1.8716357248567517, - "learning_rate": 7.44123139938017e-07, - "loss": 1.0921, - "step": 9642 - }, - { - "epoch": 0.7247106568465354, - "grad_norm": 0.735368705640872, - "learning_rate": 7.437442858763447e-07, - "loss": 0.833, - "step": 9643 - }, - { - "epoch": 0.7247858109123704, - "grad_norm": 2.6554276034743824, - "learning_rate": 7.433655062526343e-07, - "loss": 0.873, - "step": 9644 - }, - { - "epoch": 0.7248609649782053, - "grad_norm": 1.8491378553752456, - "learning_rate": 7.429868010893287e-07, - "loss": 1.0021, - "step": 9645 - }, - { - "epoch": 0.7249361190440403, - "grad_norm": 2.027053959298136, - "learning_rate": 7.426081704088694e-07, - "loss": 0.956, - "step": 9646 - }, - { - "epoch": 0.7250112731098752, - "grad_norm": 1.6821599461940198, - "learning_rate": 7.422296142336908e-07, - "loss": 1.0478, - "step": 9647 - }, - { - "epoch": 0.7250864271757103, - "grad_norm": 1.5996133992909543, - "learning_rate": 7.418511325862238e-07, - "loss": 1.0429, - "step": 9648 - }, - { - "epoch": 0.7251615812415452, - "grad_norm": 1.4660489020368497, - "learning_rate": 7.414727254888944e-07, - "loss": 0.9488, - "step": 9649 - }, - { - "epoch": 0.7252367353073801, - "grad_norm": 1.640942860647064, - "learning_rate": 7.410943929641258e-07, - "loss": 0.9034, - "step": 9650 - }, - { - "epoch": 0.7253118893732151, - "grad_norm": 1.6528010896522203, - "learning_rate": 7.407161350343352e-07, - "loss": 0.9777, - "step": 9651 - }, - { - "epoch": 0.72538704343905, - "grad_norm": 2.0826530744660197, - "learning_rate": 7.403379517219354e-07, - "loss": 1.0338, - "step": 9652 - }, - { - "epoch": 0.725462197504885, - "grad_norm": 1.5783666293543923, - "learning_rate": 7.399598430493359e-07, - "loss": 1.0175, - "step": 9653 - }, - { - "epoch": 0.72553735157072, - "grad_norm": 1.6968929114195532, - "learning_rate": 7.395818090389401e-07, - "loss": 0.9497, - "step": 9654 - }, - { - "epoch": 0.7256125056365549, - "grad_norm": 2.243882020769435, - "learning_rate": 7.39203849713149e-07, - "loss": 1.0384, - "step": 9655 - }, - { - "epoch": 0.7256876597023899, - "grad_norm": 3.1664841415270786, - "learning_rate": 7.388259650943578e-07, - "loss": 0.9992, - "step": 9656 - }, - { - "epoch": 0.7257628137682248, - "grad_norm": 5.0003213779162525, - "learning_rate": 7.384481552049575e-07, - "loss": 1.0281, - "step": 9657 - }, - { - "epoch": 0.7258379678340598, - "grad_norm": 1.797838964216112, - "learning_rate": 7.380704200673349e-07, - "loss": 0.9148, - "step": 9658 - }, - { - "epoch": 0.7259131218998948, - "grad_norm": 3.127380684971126, - "learning_rate": 7.376927597038714e-07, - "loss": 0.9175, - "step": 9659 - }, - { - "epoch": 0.7259882759657298, - "grad_norm": 1.428849438455094, - "learning_rate": 7.373151741369463e-07, - "loss": 0.9546, - "step": 9660 - }, - { - "epoch": 0.7260634300315647, - "grad_norm": 1.9700941311184927, - "learning_rate": 7.369376633889324e-07, - "loss": 0.8488, - "step": 9661 - }, - { - "epoch": 0.7261385840973996, - "grad_norm": 1.6651530386210178, - "learning_rate": 7.365602274821983e-07, - "loss": 1.0414, - "step": 9662 - }, - { - "epoch": 0.7262137381632346, - "grad_norm": 2.1474738161846654, - "learning_rate": 7.361828664391088e-07, - "loss": 0.9978, - "step": 9663 - }, - { - "epoch": 0.7262888922290696, - "grad_norm": 1.6853492594451323, - "learning_rate": 7.358055802820234e-07, - "loss": 0.8668, - "step": 9664 - }, - { - "epoch": 0.7263640462949046, - "grad_norm": 2.21019762148116, - "learning_rate": 7.354283690332987e-07, - "loss": 0.9653, - "step": 9665 - }, - { - "epoch": 0.7264392003607395, - "grad_norm": 1.9201841902797294, - "learning_rate": 7.350512327152858e-07, - "loss": 1.0432, - "step": 9666 - }, - { - "epoch": 0.7265143544265745, - "grad_norm": 1.8203987461336395, - "learning_rate": 7.346741713503304e-07, - "loss": 1.0358, - "step": 9667 - }, - { - "epoch": 0.7265895084924094, - "grad_norm": 1.7357102486565146, - "learning_rate": 7.342971849607762e-07, - "loss": 0.9333, - "step": 9668 - }, - { - "epoch": 0.7266646625582444, - "grad_norm": 1.9901133791575432, - "learning_rate": 7.339202735689606e-07, - "loss": 0.8549, - "step": 9669 - }, - { - "epoch": 0.7267398166240794, - "grad_norm": 1.7272087717307942, - "learning_rate": 7.335434371972169e-07, - "loss": 0.9035, - "step": 9670 - }, - { - "epoch": 0.7268149706899143, - "grad_norm": 1.501167875865778, - "learning_rate": 7.331666758678734e-07, - "loss": 0.9959, - "step": 9671 - }, - { - "epoch": 0.7268901247557493, - "grad_norm": 1.509057277307816, - "learning_rate": 7.327899896032561e-07, - "loss": 1.0287, - "step": 9672 - }, - { - "epoch": 0.7269652788215842, - "grad_norm": 2.295763642164322, - "learning_rate": 7.324133784256846e-07, - "loss": 1.0064, - "step": 9673 - }, - { - "epoch": 0.7270404328874193, - "grad_norm": 0.743389200803938, - "learning_rate": 7.320368423574741e-07, - "loss": 0.798, - "step": 9674 - }, - { - "epoch": 0.7271155869532542, - "grad_norm": 1.8546004341598064, - "learning_rate": 7.316603814209359e-07, - "loss": 0.9681, - "step": 9675 - }, - { - "epoch": 0.7271907410190891, - "grad_norm": 1.663342745581672, - "learning_rate": 7.312839956383765e-07, - "loss": 0.991, - "step": 9676 - }, - { - "epoch": 0.7272658950849241, - "grad_norm": 9.403453322434638, - "learning_rate": 7.309076850320993e-07, - "loss": 0.9385, - "step": 9677 - }, - { - "epoch": 0.727341049150759, - "grad_norm": 1.4614367116614295, - "learning_rate": 7.305314496244012e-07, - "loss": 1.0742, - "step": 9678 - }, - { - "epoch": 0.7274162032165941, - "grad_norm": 2.778787186854704, - "learning_rate": 7.30155289437576e-07, - "loss": 0.9787, - "step": 9679 - }, - { - "epoch": 0.727491357282429, - "grad_norm": 3.895560079909132, - "learning_rate": 7.297792044939124e-07, - "loss": 0.942, - "step": 9680 - }, - { - "epoch": 0.7275665113482639, - "grad_norm": 3.3302274698457954, - "learning_rate": 7.294031948156942e-07, - "loss": 1.0053, - "step": 9681 - }, - { - "epoch": 0.7276416654140989, - "grad_norm": 1.9383020279188499, - "learning_rate": 7.290272604252028e-07, - "loss": 1.0406, - "step": 9682 - }, - { - "epoch": 0.7277168194799338, - "grad_norm": 1.9478943019405834, - "learning_rate": 7.286514013447134e-07, - "loss": 0.9209, - "step": 9683 - }, - { - "epoch": 0.7277919735457689, - "grad_norm": 2.3414776785758176, - "learning_rate": 7.282756175964966e-07, - "loss": 1.0163, - "step": 9684 - }, - { - "epoch": 0.7278671276116038, - "grad_norm": 1.8843743268924353, - "learning_rate": 7.278999092028191e-07, - "loss": 0.9724, - "step": 9685 - }, - { - "epoch": 0.7279422816774388, - "grad_norm": 3.14102656136364, - "learning_rate": 7.275242761859426e-07, - "loss": 0.9623, - "step": 9686 - }, - { - "epoch": 0.7280174357432737, - "grad_norm": 15.035377490902757, - "learning_rate": 7.27148718568126e-07, - "loss": 0.9866, - "step": 9687 - }, - { - "epoch": 0.7280925898091086, - "grad_norm": 2.4033316217099823, - "learning_rate": 7.267732363716219e-07, - "loss": 0.922, - "step": 9688 - }, - { - "epoch": 0.7281677438749437, - "grad_norm": 2.4640717423386036, - "learning_rate": 7.263978296186784e-07, - "loss": 0.9231, - "step": 9689 - }, - { - "epoch": 0.7282428979407786, - "grad_norm": 1.593038886501406, - "learning_rate": 7.260224983315413e-07, - "loss": 0.9359, - "step": 9690 - }, - { - "epoch": 0.7283180520066136, - "grad_norm": 1.8381691862448712, - "learning_rate": 7.256472425324494e-07, - "loss": 1.0459, - "step": 9691 - }, - { - "epoch": 0.7283932060724485, - "grad_norm": 2.0624389639406226, - "learning_rate": 7.252720622436382e-07, - "loss": 0.8991, - "step": 9692 - }, - { - "epoch": 0.7284683601382835, - "grad_norm": 2.5825885396368267, - "learning_rate": 7.248969574873378e-07, - "loss": 1.0188, - "step": 9693 - }, - { - "epoch": 0.7285435142041184, - "grad_norm": 1.6327868737943068, - "learning_rate": 7.245219282857761e-07, - "loss": 0.9624, - "step": 9694 - }, - { - "epoch": 0.7286186682699534, - "grad_norm": 1.7045597717377456, - "learning_rate": 7.241469746611743e-07, - "loss": 1.0041, - "step": 9695 - }, - { - "epoch": 0.7286938223357884, - "grad_norm": 3.409003728272024, - "learning_rate": 7.237720966357499e-07, - "loss": 1.0084, - "step": 9696 - }, - { - "epoch": 0.7287689764016233, - "grad_norm": 6.488919129729386, - "learning_rate": 7.233972942317157e-07, - "loss": 0.8512, - "step": 9697 - }, - { - "epoch": 0.7288441304674583, - "grad_norm": 1.6368646306324488, - "learning_rate": 7.230225674712795e-07, - "loss": 0.9229, - "step": 9698 - }, - { - "epoch": 0.7289192845332932, - "grad_norm": 1.8638561329598238, - "learning_rate": 7.22647916376647e-07, - "loss": 0.9193, - "step": 9699 - }, - { - "epoch": 0.7289944385991282, - "grad_norm": 1.5324995431074797, - "learning_rate": 7.222733409700165e-07, - "loss": 1.0033, - "step": 9700 - }, - { - "epoch": 0.7290695926649632, - "grad_norm": 1.9951478392813373, - "learning_rate": 7.218988412735833e-07, - "loss": 0.951, - "step": 9701 - }, - { - "epoch": 0.7291447467307981, - "grad_norm": 1.724641447535862, - "learning_rate": 7.215244173095381e-07, - "loss": 0.9357, - "step": 9702 - }, - { - "epoch": 0.7292199007966331, - "grad_norm": 1.6555110074492174, - "learning_rate": 7.21150069100066e-07, - "loss": 0.9054, - "step": 9703 - }, - { - "epoch": 0.729295054862468, - "grad_norm": 2.2563060764096514, - "learning_rate": 7.2077579666735e-07, - "loss": 0.9121, - "step": 9704 - }, - { - "epoch": 0.7293702089283031, - "grad_norm": 1.4641652074266318, - "learning_rate": 7.204016000335666e-07, - "loss": 0.9375, - "step": 9705 - }, - { - "epoch": 0.729445362994138, - "grad_norm": 2.3414875554811614, - "learning_rate": 7.200274792208882e-07, - "loss": 0.9169, - "step": 9706 - }, - { - "epoch": 0.7295205170599729, - "grad_norm": 0.6549319700773374, - "learning_rate": 7.196534342514831e-07, - "loss": 0.7812, - "step": 9707 - }, - { - "epoch": 0.7295956711258079, - "grad_norm": 1.7318641427125503, - "learning_rate": 7.192794651475141e-07, - "loss": 0.937, - "step": 9708 - }, - { - "epoch": 0.7296708251916428, - "grad_norm": 1.8435745802366927, - "learning_rate": 7.189055719311416e-07, - "loss": 0.9753, - "step": 9709 - }, - { - "epoch": 0.7297459792574779, - "grad_norm": 2.1250871752640936, - "learning_rate": 7.185317546245198e-07, - "loss": 1.0115, - "step": 9710 - }, - { - "epoch": 0.7298211333233128, - "grad_norm": 1.8723091526332016, - "learning_rate": 7.181580132497978e-07, - "loss": 0.9228, - "step": 9711 - }, - { - "epoch": 0.7298962873891478, - "grad_norm": 3.3045006376783324, - "learning_rate": 7.177843478291225e-07, - "loss": 0.9327, - "step": 9712 - }, - { - "epoch": 0.7299714414549827, - "grad_norm": 2.0263125933910726, - "learning_rate": 7.174107583846348e-07, - "loss": 0.985, - "step": 9713 - }, - { - "epoch": 0.7300465955208176, - "grad_norm": 1.7660772368971867, - "learning_rate": 7.17037244938471e-07, - "loss": 0.9464, - "step": 9714 - }, - { - "epoch": 0.7301217495866527, - "grad_norm": 1.7190268553654924, - "learning_rate": 7.166638075127625e-07, - "loss": 0.9762, - "step": 9715 - }, - { - "epoch": 0.7301969036524876, - "grad_norm": 1.5676097479795312, - "learning_rate": 7.162904461296382e-07, - "loss": 0.925, - "step": 9716 - }, - { - "epoch": 0.7302720577183226, - "grad_norm": 1.8165418348674183, - "learning_rate": 7.159171608112207e-07, - "loss": 1.0198, - "step": 9717 - }, - { - "epoch": 0.7303472117841575, - "grad_norm": 2.1910807049859478, - "learning_rate": 7.155439515796284e-07, - "loss": 0.9941, - "step": 9718 - }, - { - "epoch": 0.7304223658499925, - "grad_norm": 4.049228293282334, - "learning_rate": 7.151708184569758e-07, - "loss": 0.9359, - "step": 9719 - }, - { - "epoch": 0.7304975199158275, - "grad_norm": 3.377570409711483, - "learning_rate": 7.147977614653711e-07, - "loss": 0.9732, - "step": 9720 - }, - { - "epoch": 0.7305726739816624, - "grad_norm": 0.6877066778420953, - "learning_rate": 7.144247806269213e-07, - "loss": 0.8768, - "step": 9721 - }, - { - "epoch": 0.7306478280474974, - "grad_norm": 2.4108580026728066, - "learning_rate": 7.140518759637259e-07, - "loss": 0.9317, - "step": 9722 - }, - { - "epoch": 0.7307229821133323, - "grad_norm": 1.9111078618389177, - "learning_rate": 7.136790474978814e-07, - "loss": 0.9838, - "step": 9723 - }, - { - "epoch": 0.7307981361791673, - "grad_norm": 1.4389628554978628, - "learning_rate": 7.133062952514786e-07, - "loss": 0.9892, - "step": 9724 - }, - { - "epoch": 0.7308732902450022, - "grad_norm": 1.7779109510901, - "learning_rate": 7.129336192466044e-07, - "loss": 0.872, - "step": 9725 - }, - { - "epoch": 0.7309484443108372, - "grad_norm": 1.6064573339690777, - "learning_rate": 7.125610195053424e-07, - "loss": 0.9402, - "step": 9726 - }, - { - "epoch": 0.7310235983766722, - "grad_norm": 1.9134764104883768, - "learning_rate": 7.1218849604977e-07, - "loss": 0.9004, - "step": 9727 - }, - { - "epoch": 0.7310987524425071, - "grad_norm": 2.0245652756478085, - "learning_rate": 7.118160489019605e-07, - "loss": 1.0144, - "step": 9728 - }, - { - "epoch": 0.7311739065083421, - "grad_norm": 3.8039892288280144, - "learning_rate": 7.114436780839827e-07, - "loss": 0.9483, - "step": 9729 - }, - { - "epoch": 0.731249060574177, - "grad_norm": 1.5910963517989751, - "learning_rate": 7.110713836179007e-07, - "loss": 1.0096, - "step": 9730 - }, - { - "epoch": 0.7313242146400121, - "grad_norm": 1.5667520269852853, - "learning_rate": 7.106991655257754e-07, - "loss": 0.9132, - "step": 9731 - }, - { - "epoch": 0.731399368705847, - "grad_norm": 1.049954794864305, - "learning_rate": 7.103270238296619e-07, - "loss": 0.8484, - "step": 9732 - }, - { - "epoch": 0.7314745227716819, - "grad_norm": 2.784703029229245, - "learning_rate": 7.099549585516098e-07, - "loss": 0.9966, - "step": 9733 - }, - { - "epoch": 0.7315496768375169, - "grad_norm": 1.274768251038914, - "learning_rate": 7.095829697136671e-07, - "loss": 1.0051, - "step": 9734 - }, - { - "epoch": 0.7316248309033518, - "grad_norm": 2.030514510586875, - "learning_rate": 7.092110573378747e-07, - "loss": 0.9241, - "step": 9735 - }, - { - "epoch": 0.7316999849691869, - "grad_norm": 1.7900558445919537, - "learning_rate": 7.0883922144627e-07, - "loss": 0.8883, - "step": 9736 - }, - { - "epoch": 0.7317751390350218, - "grad_norm": 1.490572469813361, - "learning_rate": 7.08467462060885e-07, - "loss": 0.9468, - "step": 9737 - }, - { - "epoch": 0.7318502931008568, - "grad_norm": 1.7782321595951915, - "learning_rate": 7.08095779203749e-07, - "loss": 0.9802, - "step": 9738 - }, - { - "epoch": 0.7319254471666917, - "grad_norm": 1.690214375888263, - "learning_rate": 7.077241728968852e-07, - "loss": 1.0473, - "step": 9739 - }, - { - "epoch": 0.7320006012325266, - "grad_norm": 1.5591809689089922, - "learning_rate": 7.073526431623127e-07, - "loss": 0.9747, - "step": 9740 - }, - { - "epoch": 0.7320757552983617, - "grad_norm": 2.074104377768667, - "learning_rate": 7.069811900220458e-07, - "loss": 0.9598, - "step": 9741 - }, - { - "epoch": 0.7321509093641966, - "grad_norm": 0.7578046346039954, - "learning_rate": 7.06609813498094e-07, - "loss": 0.8097, - "step": 9742 - }, - { - "epoch": 0.7322260634300316, - "grad_norm": 1.9316537700394807, - "learning_rate": 7.062385136124642e-07, - "loss": 0.912, - "step": 9743 - }, - { - "epoch": 0.7323012174958665, - "grad_norm": 1.83681645557396, - "learning_rate": 7.058672903871568e-07, - "loss": 0.968, - "step": 9744 - }, - { - "epoch": 0.7323763715617014, - "grad_norm": 2.0738425048283595, - "learning_rate": 7.054961438441678e-07, - "loss": 0.9179, - "step": 9745 - }, - { - "epoch": 0.7324515256275365, - "grad_norm": 1.5156810789450066, - "learning_rate": 7.051250740054892e-07, - "loss": 0.9928, - "step": 9746 - }, - { - "epoch": 0.7325266796933714, - "grad_norm": 3.806568346663412, - "learning_rate": 7.047540808931078e-07, - "loss": 1.0482, - "step": 9747 - }, - { - "epoch": 0.7326018337592064, - "grad_norm": 2.0871292881530623, - "learning_rate": 7.043831645290077e-07, - "loss": 0.9099, - "step": 9748 - }, - { - "epoch": 0.7326769878250413, - "grad_norm": 0.747772804032415, - "learning_rate": 7.040123249351662e-07, - "loss": 0.8664, - "step": 9749 - }, - { - "epoch": 0.7327521418908763, - "grad_norm": 1.5776751793081583, - "learning_rate": 7.036415621335572e-07, - "loss": 0.9323, - "step": 9750 - }, - { - "epoch": 0.7328272959567113, - "grad_norm": 1.3845103126684593, - "learning_rate": 7.032708761461496e-07, - "loss": 0.9402, - "step": 9751 - }, - { - "epoch": 0.7329024500225462, - "grad_norm": 2.237575026804651, - "learning_rate": 7.029002669949075e-07, - "loss": 0.989, - "step": 9752 - }, - { - "epoch": 0.7329776040883812, - "grad_norm": 1.652024186347531, - "learning_rate": 7.025297347017922e-07, - "loss": 0.9291, - "step": 9753 - }, - { - "epoch": 0.7330527581542161, - "grad_norm": 2.4648432663616306, - "learning_rate": 7.021592792887585e-07, - "loss": 0.9373, - "step": 9754 - }, - { - "epoch": 0.7331279122200511, - "grad_norm": 4.053886554969382, - "learning_rate": 7.017889007777566e-07, - "loss": 1.0122, - "step": 9755 - }, - { - "epoch": 0.733203066285886, - "grad_norm": 1.5708217269644071, - "learning_rate": 7.014185991907342e-07, - "loss": 0.8826, - "step": 9756 - }, - { - "epoch": 0.7332782203517211, - "grad_norm": 1.6196375913620014, - "learning_rate": 7.010483745496322e-07, - "loss": 1.0151, - "step": 9757 - }, - { - "epoch": 0.733353374417556, - "grad_norm": 2.102263606658571, - "learning_rate": 7.006782268763885e-07, - "loss": 0.9826, - "step": 9758 - }, - { - "epoch": 0.7334285284833909, - "grad_norm": 1.7167089654593823, - "learning_rate": 7.003081561929342e-07, - "loss": 1.0224, - "step": 9759 - }, - { - "epoch": 0.7335036825492259, - "grad_norm": 1.8417029574286645, - "learning_rate": 6.999381625211993e-07, - "loss": 1.0301, - "step": 9760 - }, - { - "epoch": 0.7335788366150608, - "grad_norm": 36.7216554701548, - "learning_rate": 6.995682458831064e-07, - "loss": 0.9745, - "step": 9761 - }, - { - "epoch": 0.7336539906808959, - "grad_norm": 2.8886271402997425, - "learning_rate": 6.991984063005747e-07, - "loss": 0.9949, - "step": 9762 - }, - { - "epoch": 0.7337291447467308, - "grad_norm": 1.9000876205719646, - "learning_rate": 6.988286437955186e-07, - "loss": 0.9938, - "step": 9763 - }, - { - "epoch": 0.7338042988125658, - "grad_norm": 1.8644214396458774, - "learning_rate": 6.984589583898473e-07, - "loss": 0.9055, - "step": 9764 - }, - { - "epoch": 0.7338794528784007, - "grad_norm": 2.859087986950815, - "learning_rate": 6.98089350105467e-07, - "loss": 0.7445, - "step": 9765 - }, - { - "epoch": 0.7339546069442356, - "grad_norm": 4.82764854672776, - "learning_rate": 6.977198189642783e-07, - "loss": 0.9298, - "step": 9766 - }, - { - "epoch": 0.7340297610100707, - "grad_norm": 2.4848931689872096, - "learning_rate": 6.973503649881769e-07, - "loss": 0.7594, - "step": 9767 - }, - { - "epoch": 0.7341049150759056, - "grad_norm": 1.818783293743771, - "learning_rate": 6.969809881990547e-07, - "loss": 0.9941, - "step": 9768 - }, - { - "epoch": 0.7341800691417406, - "grad_norm": 1.3813723376197278, - "learning_rate": 6.966116886187978e-07, - "loss": 0.9671, - "step": 9769 - }, - { - "epoch": 0.7342552232075755, - "grad_norm": 0.6946208814492352, - "learning_rate": 6.962424662692903e-07, - "loss": 0.8354, - "step": 9770 - }, - { - "epoch": 0.7343303772734104, - "grad_norm": 2.647229934908864, - "learning_rate": 6.958733211724089e-07, - "loss": 1.0827, - "step": 9771 - }, - { - "epoch": 0.7344055313392455, - "grad_norm": 1.5403262144605427, - "learning_rate": 6.955042533500261e-07, - "loss": 0.9424, - "step": 9772 - }, - { - "epoch": 0.7344806854050804, - "grad_norm": 4.005811284602854, - "learning_rate": 6.951352628240133e-07, - "loss": 0.9784, - "step": 9773 - }, - { - "epoch": 0.7345558394709154, - "grad_norm": 0.7167187889869202, - "learning_rate": 6.947663496162313e-07, - "loss": 0.8332, - "step": 9774 - }, - { - "epoch": 0.7346309935367503, - "grad_norm": 1.7413736674913152, - "learning_rate": 6.943975137485418e-07, - "loss": 1.065, - "step": 9775 - }, - { - "epoch": 0.7347061476025853, - "grad_norm": 1.3896540992176478, - "learning_rate": 6.940287552427992e-07, - "loss": 0.9872, - "step": 9776 - }, - { - "epoch": 0.7347813016684203, - "grad_norm": 1.7731835271664365, - "learning_rate": 6.936600741208529e-07, - "loss": 0.9876, - "step": 9777 - }, - { - "epoch": 0.7348564557342552, - "grad_norm": 1.5366472375983382, - "learning_rate": 6.932914704045505e-07, - "loss": 0.9726, - "step": 9778 - }, - { - "epoch": 0.7349316098000902, - "grad_norm": 2.038654856657388, - "learning_rate": 6.929229441157321e-07, - "loss": 1.0175, - "step": 9779 - }, - { - "epoch": 0.7350067638659251, - "grad_norm": 6.163149922050406, - "learning_rate": 6.925544952762341e-07, - "loss": 1.018, - "step": 9780 - }, - { - "epoch": 0.7350819179317601, - "grad_norm": 0.6703153072200583, - "learning_rate": 6.921861239078883e-07, - "loss": 0.8385, - "step": 9781 - }, - { - "epoch": 0.7351570719975951, - "grad_norm": 0.7001232285684039, - "learning_rate": 6.918178300325235e-07, - "loss": 0.8231, - "step": 9782 - }, - { - "epoch": 0.7352322260634301, - "grad_norm": 6.6038486472334, - "learning_rate": 6.914496136719614e-07, - "loss": 0.8927, - "step": 9783 - }, - { - "epoch": 0.735307380129265, - "grad_norm": 1.561737103185015, - "learning_rate": 6.910814748480204e-07, - "loss": 0.8637, - "step": 9784 - }, - { - "epoch": 0.7353825341950999, - "grad_norm": 0.7033163022364234, - "learning_rate": 6.907134135825146e-07, - "loss": 0.8201, - "step": 9785 - }, - { - "epoch": 0.7354576882609349, - "grad_norm": 1.5129124859235832, - "learning_rate": 6.903454298972515e-07, - "loss": 0.9838, - "step": 9786 - }, - { - "epoch": 0.7355328423267699, - "grad_norm": 1.7699523828857955, - "learning_rate": 6.899775238140375e-07, - "loss": 1.067, - "step": 9787 - }, - { - "epoch": 0.7356079963926049, - "grad_norm": 3.120194669665873, - "learning_rate": 6.896096953546717e-07, - "loss": 0.9826, - "step": 9788 - }, - { - "epoch": 0.7356831504584398, - "grad_norm": 1.9039917373718365, - "learning_rate": 6.892419445409492e-07, - "loss": 0.9529, - "step": 9789 - }, - { - "epoch": 0.7357583045242747, - "grad_norm": 1.3981803785533824, - "learning_rate": 6.88874271394661e-07, - "loss": 0.9997, - "step": 9790 - }, - { - "epoch": 0.7358334585901097, - "grad_norm": 1.5168577077388417, - "learning_rate": 6.885066759375917e-07, - "loss": 1.0518, - "step": 9791 - }, - { - "epoch": 0.7359086126559446, - "grad_norm": 1.5569665201516205, - "learning_rate": 6.881391581915248e-07, - "loss": 0.9625, - "step": 9792 - }, - { - "epoch": 0.7359837667217797, - "grad_norm": 1.6809741393034088, - "learning_rate": 6.877717181782363e-07, - "loss": 0.9578, - "step": 9793 - }, - { - "epoch": 0.7360589207876146, - "grad_norm": 1.586879657078956, - "learning_rate": 6.874043559194976e-07, - "loss": 0.9364, - "step": 9794 - }, - { - "epoch": 0.7361340748534496, - "grad_norm": 0.5810267081055721, - "learning_rate": 6.870370714370784e-07, - "loss": 0.7863, - "step": 9795 - }, - { - "epoch": 0.7362092289192845, - "grad_norm": 1.3336401477110424, - "learning_rate": 6.866698647527391e-07, - "loss": 0.9048, - "step": 9796 - }, - { - "epoch": 0.7362843829851194, - "grad_norm": 1.6237370645126756, - "learning_rate": 6.8630273588824e-07, - "loss": 1.0027, - "step": 9797 - }, - { - "epoch": 0.7363595370509545, - "grad_norm": 1.6996284078743553, - "learning_rate": 6.859356848653344e-07, - "loss": 0.9286, - "step": 9798 - }, - { - "epoch": 0.7364346911167894, - "grad_norm": 2.1875920957524695, - "learning_rate": 6.855687117057707e-07, - "loss": 1.0195, - "step": 9799 - }, - { - "epoch": 0.7365098451826244, - "grad_norm": 1.9531659541604696, - "learning_rate": 6.852018164312947e-07, - "loss": 0.9999, - "step": 9800 - }, - { - "epoch": 0.7365849992484593, - "grad_norm": 1.9828411993019435, - "learning_rate": 6.848349990636457e-07, - "loss": 0.8854, - "step": 9801 - }, - { - "epoch": 0.7366601533142944, - "grad_norm": 1.9750500346738895, - "learning_rate": 6.844682596245592e-07, - "loss": 0.967, - "step": 9802 - }, - { - "epoch": 0.7367353073801293, - "grad_norm": 1.4078486786151263, - "learning_rate": 6.841015981357652e-07, - "loss": 0.9647, - "step": 9803 - }, - { - "epoch": 0.7368104614459642, - "grad_norm": 1.9997505390039179, - "learning_rate": 6.837350146189909e-07, - "loss": 0.9607, - "step": 9804 - }, - { - "epoch": 0.7368856155117992, - "grad_norm": 1.6325412506686892, - "learning_rate": 6.833685090959575e-07, - "loss": 0.9496, - "step": 9805 - }, - { - "epoch": 0.7369607695776341, - "grad_norm": 1.863745097838926, - "learning_rate": 6.830020815883815e-07, - "loss": 1.0626, - "step": 9806 - }, - { - "epoch": 0.7370359236434691, - "grad_norm": 1.8113391710528242, - "learning_rate": 6.826357321179754e-07, - "loss": 0.9892, - "step": 9807 - }, - { - "epoch": 0.7371110777093041, - "grad_norm": 1.760343501208936, - "learning_rate": 6.822694607064461e-07, - "loss": 0.8769, - "step": 9808 - }, - { - "epoch": 0.7371862317751391, - "grad_norm": 1.399198825679415, - "learning_rate": 6.819032673754976e-07, - "loss": 1.0283, - "step": 9809 - }, - { - "epoch": 0.737261385840974, - "grad_norm": 2.4438186323669875, - "learning_rate": 6.81537152146828e-07, - "loss": 0.9332, - "step": 9810 - }, - { - "epoch": 0.7373365399068089, - "grad_norm": 3.2183858424729426, - "learning_rate": 6.811711150421309e-07, - "loss": 0.9649, - "step": 9811 - }, - { - "epoch": 0.737411693972644, - "grad_norm": 3.1861249164199803, - "learning_rate": 6.808051560830954e-07, - "loss": 0.9792, - "step": 9812 - }, - { - "epoch": 0.7374868480384789, - "grad_norm": 2.6885780124111704, - "learning_rate": 6.804392752914052e-07, - "loss": 0.9805, - "step": 9813 - }, - { - "epoch": 0.7375620021043139, - "grad_norm": 1.7412509196895847, - "learning_rate": 6.800734726887416e-07, - "loss": 1.0077, - "step": 9814 - }, - { - "epoch": 0.7376371561701488, - "grad_norm": 2.1546407029213253, - "learning_rate": 6.79707748296779e-07, - "loss": 0.9582, - "step": 9815 - }, - { - "epoch": 0.7377123102359837, - "grad_norm": 2.1306766533066543, - "learning_rate": 6.793421021371872e-07, - "loss": 0.9687, - "step": 9816 - }, - { - "epoch": 0.7377874643018187, - "grad_norm": 1.7270787360571525, - "learning_rate": 6.789765342316341e-07, - "loss": 1.0332, - "step": 9817 - }, - { - "epoch": 0.7378626183676537, - "grad_norm": 2.1044270445616706, - "learning_rate": 6.786110446017794e-07, - "loss": 0.9593, - "step": 9818 - }, - { - "epoch": 0.7379377724334887, - "grad_norm": 2.964955203407858, - "learning_rate": 6.782456332692805e-07, - "loss": 0.9794, - "step": 9819 - }, - { - "epoch": 0.7380129264993236, - "grad_norm": 1.9741802711495766, - "learning_rate": 6.778803002557891e-07, - "loss": 1.0012, - "step": 9820 - }, - { - "epoch": 0.7380880805651586, - "grad_norm": 2.18499470707931, - "learning_rate": 6.775150455829521e-07, - "loss": 1.0071, - "step": 9821 - }, - { - "epoch": 0.7381632346309935, - "grad_norm": 3.2373029199655807, - "learning_rate": 6.771498692724133e-07, - "loss": 0.9436, - "step": 9822 - }, - { - "epoch": 0.7382383886968285, - "grad_norm": 1.7971059899509076, - "learning_rate": 6.767847713458104e-07, - "loss": 0.9005, - "step": 9823 - }, - { - "epoch": 0.7383135427626635, - "grad_norm": 1.7116371013436718, - "learning_rate": 6.764197518247767e-07, - "loss": 1.0112, - "step": 9824 - }, - { - "epoch": 0.7383886968284984, - "grad_norm": 1.8056877633948516, - "learning_rate": 6.760548107309403e-07, - "loss": 1.0628, - "step": 9825 - }, - { - "epoch": 0.7384638508943334, - "grad_norm": 1.7132680688047441, - "learning_rate": 6.756899480859268e-07, - "loss": 1.0081, - "step": 9826 - }, - { - "epoch": 0.7385390049601683, - "grad_norm": 1.4718766716625657, - "learning_rate": 6.753251639113551e-07, - "loss": 0.9099, - "step": 9827 - }, - { - "epoch": 0.7386141590260034, - "grad_norm": 1.3941116342819653, - "learning_rate": 6.749604582288397e-07, - "loss": 0.9817, - "step": 9828 - }, - { - "epoch": 0.7386893130918383, - "grad_norm": 0.6368746888344317, - "learning_rate": 6.745958310599913e-07, - "loss": 0.738, - "step": 9829 - }, - { - "epoch": 0.7387644671576732, - "grad_norm": 1.8553104373251545, - "learning_rate": 6.742312824264145e-07, - "loss": 1.0352, - "step": 9830 - }, - { - "epoch": 0.7388396212235082, - "grad_norm": 1.7327577078354348, - "learning_rate": 6.738668123497115e-07, - "loss": 0.9227, - "step": 9831 - }, - { - "epoch": 0.7389147752893431, - "grad_norm": 1.7784919132266521, - "learning_rate": 6.735024208514782e-07, - "loss": 1.1008, - "step": 9832 - }, - { - "epoch": 0.7389899293551782, - "grad_norm": 1.8987214931736371, - "learning_rate": 6.731381079533056e-07, - "loss": 0.9301, - "step": 9833 - }, - { - "epoch": 0.7390650834210131, - "grad_norm": 1.9942098487847235, - "learning_rate": 6.727738736767812e-07, - "loss": 0.9318, - "step": 9834 - }, - { - "epoch": 0.739140237486848, - "grad_norm": 1.7150442227814977, - "learning_rate": 6.724097180434865e-07, - "loss": 0.9301, - "step": 9835 - }, - { - "epoch": 0.739215391552683, - "grad_norm": 1.9390694814059894, - "learning_rate": 6.720456410750002e-07, - "loss": 0.9237, - "step": 9836 - }, - { - "epoch": 0.7392905456185179, - "grad_norm": 1.8327265804669282, - "learning_rate": 6.716816427928949e-07, - "loss": 0.9768, - "step": 9837 - }, - { - "epoch": 0.739365699684353, - "grad_norm": 3.248730925224759, - "learning_rate": 6.71317723218738e-07, - "loss": 0.8273, - "step": 9838 - }, - { - "epoch": 0.7394408537501879, - "grad_norm": 1.6953321183295678, - "learning_rate": 6.709538823740943e-07, - "loss": 1.0003, - "step": 9839 - }, - { - "epoch": 0.7395160078160229, - "grad_norm": 3.294609122818026, - "learning_rate": 6.705901202805226e-07, - "loss": 1.0011, - "step": 9840 - }, - { - "epoch": 0.7395911618818578, - "grad_norm": 2.0366849521163664, - "learning_rate": 6.702264369595767e-07, - "loss": 0.9135, - "step": 9841 - }, - { - "epoch": 0.7396663159476927, - "grad_norm": 1.707834923152879, - "learning_rate": 6.698628324328066e-07, - "loss": 0.9457, - "step": 9842 - }, - { - "epoch": 0.7397414700135277, - "grad_norm": 2.2902548747257003, - "learning_rate": 6.694993067217565e-07, - "loss": 0.948, - "step": 9843 - }, - { - "epoch": 0.7398166240793627, - "grad_norm": 1.8325594728194465, - "learning_rate": 6.691358598479679e-07, - "loss": 1.0209, - "step": 9844 - }, - { - "epoch": 0.7398917781451977, - "grad_norm": 1.703940729896902, - "learning_rate": 6.687724918329758e-07, - "loss": 0.9611, - "step": 9845 - }, - { - "epoch": 0.7399669322110326, - "grad_norm": 2.079756934929231, - "learning_rate": 6.684092026983113e-07, - "loss": 0.9759, - "step": 9846 - }, - { - "epoch": 0.7400420862768676, - "grad_norm": 1.67835541178713, - "learning_rate": 6.680459924654997e-07, - "loss": 1.0077, - "step": 9847 - }, - { - "epoch": 0.7401172403427025, - "grad_norm": 0.7497170629869938, - "learning_rate": 6.676828611560643e-07, - "loss": 0.8387, - "step": 9848 - }, - { - "epoch": 0.7401923944085375, - "grad_norm": 1.7450688915262014, - "learning_rate": 6.673198087915211e-07, - "loss": 0.9972, - "step": 9849 - }, - { - "epoch": 0.7402675484743725, - "grad_norm": 2.216525614942624, - "learning_rate": 6.669568353933824e-07, - "loss": 1.0433, - "step": 9850 - }, - { - "epoch": 0.7403427025402074, - "grad_norm": 0.8459097453462329, - "learning_rate": 6.665939409831556e-07, - "loss": 0.8642, - "step": 9851 - }, - { - "epoch": 0.7404178566060424, - "grad_norm": 1.4731988286780435, - "learning_rate": 6.662311255823432e-07, - "loss": 0.9006, - "step": 9852 - }, - { - "epoch": 0.7404930106718773, - "grad_norm": 1.4191010633183871, - "learning_rate": 6.658683892124446e-07, - "loss": 1.043, - "step": 9853 - }, - { - "epoch": 0.7405681647377124, - "grad_norm": 1.9881125268002238, - "learning_rate": 6.655057318949526e-07, - "loss": 0.9442, - "step": 9854 - }, - { - "epoch": 0.7406433188035473, - "grad_norm": 1.6746578407971242, - "learning_rate": 6.651431536513563e-07, - "loss": 0.8825, - "step": 9855 - }, - { - "epoch": 0.7407184728693822, - "grad_norm": 2.179414574821994, - "learning_rate": 6.647806545031396e-07, - "loss": 1.0122, - "step": 9856 - }, - { - "epoch": 0.7407936269352172, - "grad_norm": 2.4156244157047726, - "learning_rate": 6.644182344717813e-07, - "loss": 0.9378, - "step": 9857 - }, - { - "epoch": 0.7408687810010521, - "grad_norm": 1.9153064033459817, - "learning_rate": 6.640558935787575e-07, - "loss": 0.9628, - "step": 9858 - }, - { - "epoch": 0.7409439350668872, - "grad_norm": 2.1814780042345108, - "learning_rate": 6.636936318455377e-07, - "loss": 0.9271, - "step": 9859 - }, - { - "epoch": 0.7410190891327221, - "grad_norm": 2.2062656391227087, - "learning_rate": 6.633314492935866e-07, - "loss": 0.8872, - "step": 9860 - }, - { - "epoch": 0.741094243198557, - "grad_norm": 2.0988488811671417, - "learning_rate": 6.629693459443664e-07, - "loss": 0.9428, - "step": 9861 - }, - { - "epoch": 0.741169397264392, - "grad_norm": 1.8451769204847008, - "learning_rate": 6.62607321819332e-07, - "loss": 0.9659, - "step": 9862 - }, - { - "epoch": 0.7412445513302269, - "grad_norm": 1.5520038541716472, - "learning_rate": 6.622453769399353e-07, - "loss": 0.9858, - "step": 9863 - }, - { - "epoch": 0.741319705396062, - "grad_norm": 1.7006181939271712, - "learning_rate": 6.618835113276225e-07, - "loss": 0.9622, - "step": 9864 - }, - { - "epoch": 0.7413948594618969, - "grad_norm": 2.0756905429304693, - "learning_rate": 6.61521725003835e-07, - "loss": 0.9221, - "step": 9865 - }, - { - "epoch": 0.7414700135277319, - "grad_norm": 2.3864387310482367, - "learning_rate": 6.611600179900112e-07, - "loss": 1.0054, - "step": 9866 - }, - { - "epoch": 0.7415451675935668, - "grad_norm": 3.787458383060858, - "learning_rate": 6.607983903075832e-07, - "loss": 1.0327, - "step": 9867 - }, - { - "epoch": 0.7416203216594017, - "grad_norm": 1.6458482701392352, - "learning_rate": 6.604368419779787e-07, - "loss": 0.9824, - "step": 9868 - }, - { - "epoch": 0.7416954757252368, - "grad_norm": 1.8551494122815089, - "learning_rate": 6.600753730226203e-07, - "loss": 0.981, - "step": 9869 - }, - { - "epoch": 0.7417706297910717, - "grad_norm": 1.5004742985131925, - "learning_rate": 6.597139834629275e-07, - "loss": 0.9685, - "step": 9870 - }, - { - "epoch": 0.7418457838569067, - "grad_norm": 1.6569662164986338, - "learning_rate": 6.593526733203134e-07, - "loss": 0.9744, - "step": 9871 - }, - { - "epoch": 0.7419209379227416, - "grad_norm": 1.8334787340015397, - "learning_rate": 6.589914426161871e-07, - "loss": 1.0499, - "step": 9872 - }, - { - "epoch": 0.7419960919885766, - "grad_norm": 1.9119321840382006, - "learning_rate": 6.58630291371953e-07, - "loss": 0.9852, - "step": 9873 - }, - { - "epoch": 0.7420712460544115, - "grad_norm": 3.9180131721450415, - "learning_rate": 6.582692196090101e-07, - "loss": 1.048, - "step": 9874 - }, - { - "epoch": 0.7421464001202465, - "grad_norm": 1.7392197036637738, - "learning_rate": 6.579082273487541e-07, - "loss": 1.0166, - "step": 9875 - }, - { - "epoch": 0.7422215541860815, - "grad_norm": 2.34944265127646, - "learning_rate": 6.575473146125749e-07, - "loss": 1.0578, - "step": 9876 - }, - { - "epoch": 0.7422967082519164, - "grad_norm": 1.7259494159044715, - "learning_rate": 6.57186481421858e-07, - "loss": 0.9763, - "step": 9877 - }, - { - "epoch": 0.7423718623177514, - "grad_norm": 1.5846423042124695, - "learning_rate": 6.568257277979841e-07, - "loss": 0.9735, - "step": 9878 - }, - { - "epoch": 0.7424470163835863, - "grad_norm": 1.5509349064648672, - "learning_rate": 6.564650537623284e-07, - "loss": 0.8166, - "step": 9879 - }, - { - "epoch": 0.7425221704494213, - "grad_norm": 2.266753553613856, - "learning_rate": 6.561044593362636e-07, - "loss": 1.0625, - "step": 9880 - }, - { - "epoch": 0.7425973245152563, - "grad_norm": 1.9400725359973092, - "learning_rate": 6.557439445411559e-07, - "loss": 0.9763, - "step": 9881 - }, - { - "epoch": 0.7426724785810912, - "grad_norm": 3.7019464734049796, - "learning_rate": 6.553835093983662e-07, - "loss": 0.9783, - "step": 9882 - }, - { - "epoch": 0.7427476326469262, - "grad_norm": 2.4705244037385614, - "learning_rate": 6.550231539292533e-07, - "loss": 0.9297, - "step": 9883 - }, - { - "epoch": 0.7428227867127611, - "grad_norm": 1.7505063959139424, - "learning_rate": 6.546628781551687e-07, - "loss": 0.9695, - "step": 9884 - }, - { - "epoch": 0.7428979407785962, - "grad_norm": 1.343300766483072, - "learning_rate": 6.543026820974599e-07, - "loss": 1.0291, - "step": 9885 - }, - { - "epoch": 0.7429730948444311, - "grad_norm": 1.592356053528386, - "learning_rate": 6.539425657774706e-07, - "loss": 1.0825, - "step": 9886 - }, - { - "epoch": 0.743048248910266, - "grad_norm": 2.0886836459511455, - "learning_rate": 6.535825292165377e-07, - "loss": 0.9632, - "step": 9887 - }, - { - "epoch": 0.743123402976101, - "grad_norm": 2.362728943403699, - "learning_rate": 6.532225724359967e-07, - "loss": 0.9138, - "step": 9888 - }, - { - "epoch": 0.7431985570419359, - "grad_norm": 1.8887418716225464, - "learning_rate": 6.528626954571753e-07, - "loss": 1.0557, - "step": 9889 - }, - { - "epoch": 0.743273711107771, - "grad_norm": 1.6239158241461757, - "learning_rate": 6.525028983013976e-07, - "loss": 0.9255, - "step": 9890 - }, - { - "epoch": 0.7433488651736059, - "grad_norm": 1.5415390924130015, - "learning_rate": 6.521431809899827e-07, - "loss": 0.849, - "step": 9891 - }, - { - "epoch": 0.7434240192394409, - "grad_norm": 2.290262890514103, - "learning_rate": 6.517835435442461e-07, - "loss": 0.9144, - "step": 9892 - }, - { - "epoch": 0.7434991733052758, - "grad_norm": 1.6836965578202974, - "learning_rate": 6.514239859854973e-07, - "loss": 1.0274, - "step": 9893 - }, - { - "epoch": 0.7435743273711107, - "grad_norm": 3.2419870337592327, - "learning_rate": 6.510645083350412e-07, - "loss": 0.8894, - "step": 9894 - }, - { - "epoch": 0.7436494814369458, - "grad_norm": 2.0346779874550855, - "learning_rate": 6.507051106141786e-07, - "loss": 0.9278, - "step": 9895 - }, - { - "epoch": 0.7437246355027807, - "grad_norm": 2.777266403387425, - "learning_rate": 6.503457928442042e-07, - "loss": 0.9275, - "step": 9896 - }, - { - "epoch": 0.7437997895686157, - "grad_norm": 2.426647578293618, - "learning_rate": 6.499865550464103e-07, - "loss": 1.0393, - "step": 9897 - }, - { - "epoch": 0.7438749436344506, - "grad_norm": 6.878737387461038, - "learning_rate": 6.496273972420827e-07, - "loss": 1.0085, - "step": 9898 - }, - { - "epoch": 0.7439500977002856, - "grad_norm": 1.3923205636394547, - "learning_rate": 6.492683194525028e-07, - "loss": 0.9102, - "step": 9899 - }, - { - "epoch": 0.7440252517661206, - "grad_norm": 2.0906621453559087, - "learning_rate": 6.489093216989472e-07, - "loss": 1.022, - "step": 9900 - }, - { - "epoch": 0.7441004058319555, - "grad_norm": 1.808582347067691, - "learning_rate": 6.485504040026872e-07, - "loss": 1.0053, - "step": 9901 - }, - { - "epoch": 0.7441755598977905, - "grad_norm": 1.7941126280763147, - "learning_rate": 6.481915663849917e-07, - "loss": 0.9743, - "step": 9902 - }, - { - "epoch": 0.7442507139636254, - "grad_norm": 1.6257394428734582, - "learning_rate": 6.478328088671221e-07, - "loss": 0.9047, - "step": 9903 - }, - { - "epoch": 0.7443258680294604, - "grad_norm": 2.37652689388571, - "learning_rate": 6.474741314703358e-07, - "loss": 0.9487, - "step": 9904 - }, - { - "epoch": 0.7444010220952954, - "grad_norm": 1.2712063111756544, - "learning_rate": 6.471155342158871e-07, - "loss": 0.9931, - "step": 9905 - }, - { - "epoch": 0.7444761761611303, - "grad_norm": 1.586952748927482, - "learning_rate": 6.467570171250234e-07, - "loss": 0.9547, - "step": 9906 - }, - { - "epoch": 0.7445513302269653, - "grad_norm": 1.6917918692924099, - "learning_rate": 6.463985802189884e-07, - "loss": 0.8114, - "step": 9907 - }, - { - "epoch": 0.7446264842928002, - "grad_norm": 0.8637118754274629, - "learning_rate": 6.46040223519021e-07, - "loss": 0.9404, - "step": 9908 - }, - { - "epoch": 0.7447016383586352, - "grad_norm": 1.834018824764638, - "learning_rate": 6.456819470463542e-07, - "loss": 0.8384, - "step": 9909 - }, - { - "epoch": 0.7447767924244701, - "grad_norm": 1.9154761251875028, - "learning_rate": 6.453237508222186e-07, - "loss": 0.9047, - "step": 9910 - }, - { - "epoch": 0.7448519464903052, - "grad_norm": 2.1152069850849577, - "learning_rate": 6.449656348678383e-07, - "loss": 0.9757, - "step": 9911 - }, - { - "epoch": 0.7449271005561401, - "grad_norm": 2.0216440626087837, - "learning_rate": 6.446075992044329e-07, - "loss": 0.8937, - "step": 9912 - }, - { - "epoch": 0.745002254621975, - "grad_norm": 2.5084092805671583, - "learning_rate": 6.442496438532168e-07, - "loss": 0.9528, - "step": 9913 - }, - { - "epoch": 0.74507740868781, - "grad_norm": 0.8557952536481416, - "learning_rate": 6.438917688354013e-07, - "loss": 0.9973, - "step": 9914 - }, - { - "epoch": 0.7451525627536449, - "grad_norm": 2.0879118644411974, - "learning_rate": 6.435339741721915e-07, - "loss": 0.9889, - "step": 9915 - }, - { - "epoch": 0.74522771681948, - "grad_norm": 1.6932755991838009, - "learning_rate": 6.431762598847879e-07, - "loss": 0.9095, - "step": 9916 - }, - { - "epoch": 0.7453028708853149, - "grad_norm": 1.5682258216554494, - "learning_rate": 6.428186259943866e-07, - "loss": 0.9497, - "step": 9917 - }, - { - "epoch": 0.7453780249511499, - "grad_norm": 0.6909609723945825, - "learning_rate": 6.424610725221779e-07, - "loss": 0.8177, - "step": 9918 - }, - { - "epoch": 0.7454531790169848, - "grad_norm": 1.9586551956380123, - "learning_rate": 6.4210359948935e-07, - "loss": 1.034, - "step": 9919 - }, - { - "epoch": 0.7455283330828197, - "grad_norm": 3.1078429161643184, - "learning_rate": 6.417462069170834e-07, - "loss": 1.0126, - "step": 9920 - }, - { - "epoch": 0.7456034871486548, - "grad_norm": 1.7384815572133971, - "learning_rate": 6.413888948265551e-07, - "loss": 0.9828, - "step": 9921 - }, - { - "epoch": 0.7456786412144897, - "grad_norm": 3.5143286455392047, - "learning_rate": 6.410316632389372e-07, - "loss": 1.1001, - "step": 9922 - }, - { - "epoch": 0.7457537952803247, - "grad_norm": 0.6926788213844116, - "learning_rate": 6.406745121753964e-07, - "loss": 0.8263, - "step": 9923 - }, - { - "epoch": 0.7458289493461596, - "grad_norm": 2.15053874518253, - "learning_rate": 6.403174416570967e-07, - "loss": 0.895, - "step": 9924 - }, - { - "epoch": 0.7459041034119945, - "grad_norm": 4.006434510455932, - "learning_rate": 6.399604517051953e-07, - "loss": 1.0162, - "step": 9925 - }, - { - "epoch": 0.7459792574778296, - "grad_norm": 4.210233781716177, - "learning_rate": 6.396035423408442e-07, - "loss": 0.9563, - "step": 9926 - }, - { - "epoch": 0.7460544115436645, - "grad_norm": 2.7503564776942153, - "learning_rate": 6.392467135851934e-07, - "loss": 0.9417, - "step": 9927 - }, - { - "epoch": 0.7461295656094995, - "grad_norm": 3.0686482585504304, - "learning_rate": 6.388899654593853e-07, - "loss": 0.9307, - "step": 9928 - }, - { - "epoch": 0.7462047196753344, - "grad_norm": 13.63798757882538, - "learning_rate": 6.385332979845588e-07, - "loss": 0.9356, - "step": 9929 - }, - { - "epoch": 0.7462798737411694, - "grad_norm": 1.893178492256631, - "learning_rate": 6.381767111818479e-07, - "loss": 1.0152, - "step": 9930 - }, - { - "epoch": 0.7463550278070044, - "grad_norm": 2.25508010417482, - "learning_rate": 6.378202050723809e-07, - "loss": 0.87, - "step": 9931 - }, - { - "epoch": 0.7464301818728393, - "grad_norm": 1.390757307980743, - "learning_rate": 6.374637796772835e-07, - "loss": 1.0197, - "step": 9932 - }, - { - "epoch": 0.7465053359386743, - "grad_norm": 1.5360718925410248, - "learning_rate": 6.371074350176746e-07, - "loss": 0.976, - "step": 9933 - }, - { - "epoch": 0.7465804900045092, - "grad_norm": 1.773946142985432, - "learning_rate": 6.367511711146691e-07, - "loss": 0.9812, - "step": 9934 - }, - { - "epoch": 0.7466556440703442, - "grad_norm": 2.0238305376602104, - "learning_rate": 6.363949879893764e-07, - "loss": 1.0305, - "step": 9935 - }, - { - "epoch": 0.7467307981361792, - "grad_norm": 1.3800910851922505, - "learning_rate": 6.360388856629029e-07, - "loss": 0.9464, - "step": 9936 - }, - { - "epoch": 0.7468059522020142, - "grad_norm": 2.5011899976954037, - "learning_rate": 6.356828641563483e-07, - "loss": 0.9628, - "step": 9937 - }, - { - "epoch": 0.7468811062678491, - "grad_norm": 2.2148629108656417, - "learning_rate": 6.353269234908083e-07, - "loss": 0.9011, - "step": 9938 - }, - { - "epoch": 0.746956260333684, - "grad_norm": 2.5649310419877933, - "learning_rate": 6.349710636873739e-07, - "loss": 1.0224, - "step": 9939 - }, - { - "epoch": 0.747031414399519, - "grad_norm": 2.4498890715418336, - "learning_rate": 6.346152847671302e-07, - "loss": 0.9603, - "step": 9940 - }, - { - "epoch": 0.747106568465354, - "grad_norm": 0.7685072228632712, - "learning_rate": 6.3425958675116e-07, - "loss": 0.8395, - "step": 9941 - }, - { - "epoch": 0.747181722531189, - "grad_norm": 1.7267727961136616, - "learning_rate": 6.33903969660539e-07, - "loss": 1.0554, - "step": 9942 - }, - { - "epoch": 0.7472568765970239, - "grad_norm": 2.5348883018488886, - "learning_rate": 6.335484335163384e-07, - "loss": 0.929, - "step": 9943 - }, - { - "epoch": 0.7473320306628589, - "grad_norm": 2.2418393386459017, - "learning_rate": 6.331929783396268e-07, - "loss": 0.9513, - "step": 9944 - }, - { - "epoch": 0.7474071847286938, - "grad_norm": 2.0110020814616445, - "learning_rate": 6.32837604151464e-07, - "loss": 0.9885, - "step": 9945 - }, - { - "epoch": 0.7474823387945287, - "grad_norm": 1.8562640282714145, - "learning_rate": 6.324823109729087e-07, - "loss": 1.0399, - "step": 9946 - }, - { - "epoch": 0.7475574928603638, - "grad_norm": 1.6165337462036964, - "learning_rate": 6.321270988250134e-07, - "loss": 0.9463, - "step": 9947 - }, - { - "epoch": 0.7476326469261987, - "grad_norm": 1.3657374948916805, - "learning_rate": 6.317719677288245e-07, - "loss": 0.9988, - "step": 9948 - }, - { - "epoch": 0.7477078009920337, - "grad_norm": 11.16343615665582, - "learning_rate": 6.314169177053866e-07, - "loss": 0.9578, - "step": 9949 - }, - { - "epoch": 0.7477829550578686, - "grad_norm": 2.1097920323431807, - "learning_rate": 6.310619487757369e-07, - "loss": 0.8714, - "step": 9950 - }, - { - "epoch": 0.7478581091237035, - "grad_norm": 1.6611643165323489, - "learning_rate": 6.307070609609086e-07, - "loss": 0.953, - "step": 9951 - }, - { - "epoch": 0.7479332631895386, - "grad_norm": 1.7777726881961022, - "learning_rate": 6.303522542819306e-07, - "loss": 0.9985, - "step": 9952 - }, - { - "epoch": 0.7480084172553735, - "grad_norm": 1.8982458449586639, - "learning_rate": 6.299975287598255e-07, - "loss": 0.9896, - "step": 9953 - }, - { - "epoch": 0.7480835713212085, - "grad_norm": 3.562277134399173, - "learning_rate": 6.296428844156137e-07, - "loss": 1.0407, - "step": 9954 - }, - { - "epoch": 0.7481587253870434, - "grad_norm": 1.3480022483829572, - "learning_rate": 6.292883212703082e-07, - "loss": 0.956, - "step": 9955 - }, - { - "epoch": 0.7482338794528784, - "grad_norm": 1.9983871632575259, - "learning_rate": 6.289338393449187e-07, - "loss": 0.9712, - "step": 9956 - }, - { - "epoch": 0.7483090335187134, - "grad_norm": 1.8688719744728943, - "learning_rate": 6.285794386604484e-07, - "loss": 0.9053, - "step": 9957 - }, - { - "epoch": 0.7483841875845483, - "grad_norm": 1.6530888457932869, - "learning_rate": 6.282251192378987e-07, - "loss": 0.9966, - "step": 9958 - }, - { - "epoch": 0.7484593416503833, - "grad_norm": 1.8444437564455356, - "learning_rate": 6.278708810982635e-07, - "loss": 0.8956, - "step": 9959 - }, - { - "epoch": 0.7485344957162182, - "grad_norm": 1.6301183787462175, - "learning_rate": 6.27516724262533e-07, - "loss": 0.9723, - "step": 9960 - }, - { - "epoch": 0.7486096497820532, - "grad_norm": 1.923654802134538, - "learning_rate": 6.271626487516921e-07, - "loss": 0.8946, - "step": 9961 - }, - { - "epoch": 0.7486848038478882, - "grad_norm": 1.3942823428321498, - "learning_rate": 6.268086545867206e-07, - "loss": 0.9911, - "step": 9962 - }, - { - "epoch": 0.7487599579137232, - "grad_norm": 1.7656699655934236, - "learning_rate": 6.264547417885953e-07, - "loss": 0.9175, - "step": 9963 - }, - { - "epoch": 0.7488351119795581, - "grad_norm": 1.4271279859402184, - "learning_rate": 6.261009103782861e-07, - "loss": 0.991, - "step": 9964 - }, - { - "epoch": 0.748910266045393, - "grad_norm": 1.6054762494359895, - "learning_rate": 6.257471603767583e-07, - "loss": 0.9955, - "step": 9965 - }, - { - "epoch": 0.748985420111228, - "grad_norm": 1.8785782684396428, - "learning_rate": 6.25393491804975e-07, - "loss": 1.0314, - "step": 9966 - }, - { - "epoch": 0.749060574177063, - "grad_norm": 0.8244212837327735, - "learning_rate": 6.250399046838897e-07, - "loss": 0.7994, - "step": 9967 - }, - { - "epoch": 0.749135728242898, - "grad_norm": 3.557832571583699, - "learning_rate": 6.246863990344557e-07, - "loss": 1.0004, - "step": 9968 - }, - { - "epoch": 0.7492108823087329, - "grad_norm": 2.7832283955815478, - "learning_rate": 6.243329748776192e-07, - "loss": 0.9767, - "step": 9969 - }, - { - "epoch": 0.7492860363745678, - "grad_norm": 1.8583507120776719, - "learning_rate": 6.23979632234321e-07, - "loss": 1.0605, - "step": 9970 - }, - { - "epoch": 0.7493611904404028, - "grad_norm": 1.7715915702174836, - "learning_rate": 6.236263711254993e-07, - "loss": 0.92, - "step": 9971 - }, - { - "epoch": 0.7494363445062378, - "grad_norm": 1.8171613538834033, - "learning_rate": 6.232731915720855e-07, - "loss": 0.9059, - "step": 9972 - }, - { - "epoch": 0.7495114985720728, - "grad_norm": 0.7847811433920171, - "learning_rate": 6.22920093595007e-07, - "loss": 0.8337, - "step": 9973 - }, - { - "epoch": 0.7495866526379077, - "grad_norm": 5.31889012798756, - "learning_rate": 6.225670772151861e-07, - "loss": 0.8942, - "step": 9974 - }, - { - "epoch": 0.7496618067037427, - "grad_norm": 1.4251010055884286, - "learning_rate": 6.222141424535399e-07, - "loss": 0.9434, - "step": 9975 - }, - { - "epoch": 0.7497369607695776, - "grad_norm": 1.4828566967695485, - "learning_rate": 6.218612893309823e-07, - "loss": 0.9511, - "step": 9976 - }, - { - "epoch": 0.7498121148354125, - "grad_norm": 2.419586872746177, - "learning_rate": 6.215085178684205e-07, - "loss": 0.9592, - "step": 9977 - }, - { - "epoch": 0.7498872689012476, - "grad_norm": 0.7971305437426137, - "learning_rate": 6.211558280867575e-07, - "loss": 0.8331, - "step": 9978 - }, - { - "epoch": 0.7499624229670825, - "grad_norm": 1.771021335570219, - "learning_rate": 6.208032200068911e-07, - "loss": 0.8637, - "step": 9979 - }, - { - "epoch": 0.7500375770329175, - "grad_norm": 1.9127726042696584, - "learning_rate": 6.20450693649716e-07, - "loss": 0.9928, - "step": 9980 - }, - { - "epoch": 0.7501127310987524, - "grad_norm": 2.4776587722360364, - "learning_rate": 6.200982490361197e-07, - "loss": 1.0154, - "step": 9981 - }, - { - "epoch": 0.7501878851645875, - "grad_norm": 3.6269740287412557, - "learning_rate": 6.197458861869862e-07, - "loss": 0.9905, - "step": 9982 - }, - { - "epoch": 0.7502630392304224, - "grad_norm": 1.98535450467456, - "learning_rate": 6.193936051231945e-07, - "loss": 0.9122, - "step": 9983 - }, - { - "epoch": 0.7503381932962573, - "grad_norm": 4.369747387461271, - "learning_rate": 6.190414058656175e-07, - "loss": 0.9356, - "step": 9984 - }, - { - "epoch": 0.7504133473620923, - "grad_norm": 1.5164285476358057, - "learning_rate": 6.18689288435126e-07, - "loss": 0.9648, - "step": 9985 - }, - { - "epoch": 0.7504885014279272, - "grad_norm": 1.8191141924044583, - "learning_rate": 6.183372528525834e-07, - "loss": 0.8714, - "step": 9986 - }, - { - "epoch": 0.7505636554937622, - "grad_norm": 2.3866712999387696, - "learning_rate": 6.17985299138849e-07, - "loss": 0.9539, - "step": 9987 - }, - { - "epoch": 0.7506388095595972, - "grad_norm": 1.6582517942513488, - "learning_rate": 6.17633427314778e-07, - "loss": 0.863, - "step": 9988 - }, - { - "epoch": 0.7507139636254322, - "grad_norm": 3.8297155695944496, - "learning_rate": 6.1728163740122e-07, - "loss": 0.9666, - "step": 9989 - }, - { - "epoch": 0.7507891176912671, - "grad_norm": 0.7434346211567284, - "learning_rate": 6.169299294190198e-07, - "loss": 0.8664, - "step": 9990 - }, - { - "epoch": 0.750864271757102, - "grad_norm": 1.655284168129857, - "learning_rate": 6.165783033890175e-07, - "loss": 0.9586, - "step": 9991 - }, - { - "epoch": 0.750939425822937, - "grad_norm": 12.12779156512307, - "learning_rate": 6.162267593320474e-07, - "loss": 1.0756, - "step": 9992 - }, - { - "epoch": 0.751014579888772, - "grad_norm": 2.0306183052833218, - "learning_rate": 6.158752972689414e-07, - "loss": 1.0258, - "step": 9993 - }, - { - "epoch": 0.751089733954607, - "grad_norm": 1.5129680350760035, - "learning_rate": 6.15523917220524e-07, - "loss": 0.9263, - "step": 9994 - }, - { - "epoch": 0.7511648880204419, - "grad_norm": 1.6570333391794203, - "learning_rate": 6.151726192076161e-07, - "loss": 0.8893, - "step": 9995 - }, - { - "epoch": 0.7512400420862768, - "grad_norm": 1.680604267552841, - "learning_rate": 6.148214032510335e-07, - "loss": 1.0206, - "step": 9996 - }, - { - "epoch": 0.7513151961521118, - "grad_norm": 2.1115061978631515, - "learning_rate": 6.144702693715862e-07, - "loss": 1.0175, - "step": 9997 - }, - { - "epoch": 0.7513903502179468, - "grad_norm": 1.4531756976728192, - "learning_rate": 6.141192175900818e-07, - "loss": 0.9689, - "step": 9998 - }, - { - "epoch": 0.7514655042837818, - "grad_norm": 7.959363487903311, - "learning_rate": 6.137682479273205e-07, - "loss": 0.9353, - "step": 9999 - }, - { - "epoch": 0.7515406583496167, - "grad_norm": 1.844162716982692, - "learning_rate": 6.134173604040987e-07, - "loss": 1.0074, - "step": 10000 - }, - { - "epoch": 0.7516158124154517, - "grad_norm": 2.044708969560861, - "learning_rate": 6.130665550412073e-07, - "loss": 0.9863, - "step": 10001 - }, - { - "epoch": 0.7516909664812866, - "grad_norm": 1.949384968919572, - "learning_rate": 6.127158318594341e-07, - "loss": 1.0404, - "step": 10002 - }, - { - "epoch": 0.7517661205471216, - "grad_norm": 1.7130291142258216, - "learning_rate": 6.1236519087956e-07, - "loss": 0.9872, - "step": 10003 - }, - { - "epoch": 0.7518412746129566, - "grad_norm": 2.5258769712472047, - "learning_rate": 6.12014632122362e-07, - "loss": 0.9825, - "step": 10004 - }, - { - "epoch": 0.7519164286787915, - "grad_norm": 2.761617828640238, - "learning_rate": 6.116641556086122e-07, - "loss": 0.9535, - "step": 10005 - }, - { - "epoch": 0.7519915827446265, - "grad_norm": 1.880876614342075, - "learning_rate": 6.113137613590767e-07, - "loss": 1.0155, - "step": 10006 - }, - { - "epoch": 0.7520667368104614, - "grad_norm": 1.5282900481228707, - "learning_rate": 6.109634493945191e-07, - "loss": 0.9329, - "step": 10007 - }, - { - "epoch": 0.7521418908762965, - "grad_norm": 1.9539519123536293, - "learning_rate": 6.106132197356959e-07, - "loss": 0.9726, - "step": 10008 - }, - { - "epoch": 0.7522170449421314, - "grad_norm": 1.5290442948745717, - "learning_rate": 6.102630724033593e-07, - "loss": 1.0818, - "step": 10009 - }, - { - "epoch": 0.7522921990079663, - "grad_norm": 2.433077296630375, - "learning_rate": 6.099130074182581e-07, - "loss": 0.9824, - "step": 10010 - }, - { - "epoch": 0.7523673530738013, - "grad_norm": 1.7312137558651457, - "learning_rate": 6.095630248011341e-07, - "loss": 0.93, - "step": 10011 - }, - { - "epoch": 0.7524425071396362, - "grad_norm": 1.5774023082003823, - "learning_rate": 6.09213124572725e-07, - "loss": 0.9785, - "step": 10012 - }, - { - "epoch": 0.7525176612054713, - "grad_norm": 1.5221444927257166, - "learning_rate": 6.088633067537643e-07, - "loss": 1.0126, - "step": 10013 - }, - { - "epoch": 0.7525928152713062, - "grad_norm": 3.2061628125328805, - "learning_rate": 6.085135713649787e-07, - "loss": 0.999, - "step": 10014 - }, - { - "epoch": 0.7526679693371411, - "grad_norm": 1.950637820665248, - "learning_rate": 6.081639184270932e-07, - "loss": 0.9789, - "step": 10015 - }, - { - "epoch": 0.7527431234029761, - "grad_norm": 1.6955834800622787, - "learning_rate": 6.078143479608253e-07, - "loss": 1.0177, - "step": 10016 - }, - { - "epoch": 0.752818277468811, - "grad_norm": 2.100720431865036, - "learning_rate": 6.074648599868884e-07, - "loss": 1.053, - "step": 10017 - }, - { - "epoch": 0.752893431534646, - "grad_norm": 2.0398040241652464, - "learning_rate": 6.071154545259907e-07, - "loss": 0.9456, - "step": 10018 - }, - { - "epoch": 0.752968585600481, - "grad_norm": 1.9050003708566219, - "learning_rate": 6.067661315988353e-07, - "loss": 1.0607, - "step": 10019 - }, - { - "epoch": 0.753043739666316, - "grad_norm": 0.6297385826877966, - "learning_rate": 6.064168912261225e-07, - "loss": 0.8118, - "step": 10020 - }, - { - "epoch": 0.7531188937321509, - "grad_norm": 0.8365639451662448, - "learning_rate": 6.060677334285452e-07, - "loss": 0.8822, - "step": 10021 - }, - { - "epoch": 0.7531940477979858, - "grad_norm": 1.809442176955372, - "learning_rate": 6.057186582267923e-07, - "loss": 0.9985, - "step": 10022 - }, - { - "epoch": 0.7532692018638208, - "grad_norm": 2.723503429776955, - "learning_rate": 6.053696656415474e-07, - "loss": 1.0079, - "step": 10023 - }, - { - "epoch": 0.7533443559296558, - "grad_norm": 1.6185115240875259, - "learning_rate": 6.05020755693491e-07, - "loss": 1.0437, - "step": 10024 - }, - { - "epoch": 0.7534195099954908, - "grad_norm": 1.6045255465995398, - "learning_rate": 6.046719284032963e-07, - "loss": 0.8209, - "step": 10025 - }, - { - "epoch": 0.7534946640613257, - "grad_norm": 1.3421220010711683, - "learning_rate": 6.043231837916332e-07, - "loss": 1.0057, - "step": 10026 - }, - { - "epoch": 0.7535698181271607, - "grad_norm": 1.9278545863924526, - "learning_rate": 6.039745218791658e-07, - "loss": 0.9081, - "step": 10027 - }, - { - "epoch": 0.7536449721929956, - "grad_norm": 1.7738896268336048, - "learning_rate": 6.036259426865531e-07, - "loss": 1.0098, - "step": 10028 - }, - { - "epoch": 0.7537201262588306, - "grad_norm": 1.6884260109371627, - "learning_rate": 6.032774462344507e-07, - "loss": 0.9417, - "step": 10029 - }, - { - "epoch": 0.7537952803246656, - "grad_norm": 1.4576630641480708, - "learning_rate": 6.029290325435084e-07, - "loss": 0.8459, - "step": 10030 - }, - { - "epoch": 0.7538704343905005, - "grad_norm": 1.5122615815455593, - "learning_rate": 6.025807016343698e-07, - "loss": 1.0217, - "step": 10031 - }, - { - "epoch": 0.7539455884563355, - "grad_norm": 1.4298663418335376, - "learning_rate": 6.022324535276763e-07, - "loss": 1.0518, - "step": 10032 - }, - { - "epoch": 0.7540207425221704, - "grad_norm": 2.113322090135063, - "learning_rate": 6.018842882440625e-07, - "loss": 0.8836, - "step": 10033 - }, - { - "epoch": 0.7540958965880055, - "grad_norm": 0.7710357520525869, - "learning_rate": 6.015362058041584e-07, - "loss": 0.8691, - "step": 10034 - }, - { - "epoch": 0.7541710506538404, - "grad_norm": 1.8269141335982755, - "learning_rate": 6.011882062285892e-07, - "loss": 1.1002, - "step": 10035 - }, - { - "epoch": 0.7542462047196753, - "grad_norm": 1.8882069533239552, - "learning_rate": 6.008402895379743e-07, - "loss": 0.9897, - "step": 10036 - }, - { - "epoch": 0.7543213587855103, - "grad_norm": 2.068446055785202, - "learning_rate": 6.004924557529307e-07, - "loss": 1.1235, - "step": 10037 - }, - { - "epoch": 0.7543965128513452, - "grad_norm": 1.7996260439989014, - "learning_rate": 6.001447048940682e-07, - "loss": 0.9638, - "step": 10038 - }, - { - "epoch": 0.7544716669171803, - "grad_norm": 1.7226237944921299, - "learning_rate": 5.997970369819925e-07, - "loss": 0.8614, - "step": 10039 - }, - { - "epoch": 0.7545468209830152, - "grad_norm": 1.9518595754189236, - "learning_rate": 5.994494520373039e-07, - "loss": 1.0039, - "step": 10040 - }, - { - "epoch": 0.7546219750488501, - "grad_norm": 1.8431412289882558, - "learning_rate": 5.991019500805976e-07, - "loss": 1.022, - "step": 10041 - }, - { - "epoch": 0.7546971291146851, - "grad_norm": 1.6970921330075923, - "learning_rate": 5.98754531132466e-07, - "loss": 0.9916, - "step": 10042 - }, - { - "epoch": 0.75477228318052, - "grad_norm": 1.8660756395961262, - "learning_rate": 5.984071952134941e-07, - "loss": 1.0795, - "step": 10043 - }, - { - "epoch": 0.7548474372463551, - "grad_norm": 1.8882783560677425, - "learning_rate": 5.98059942344263e-07, - "loss": 0.9561, - "step": 10044 - }, - { - "epoch": 0.75492259131219, - "grad_norm": 1.8400997299657682, - "learning_rate": 5.977127725453482e-07, - "loss": 0.938, - "step": 10045 - }, - { - "epoch": 0.754997745378025, - "grad_norm": 2.8211822436727054, - "learning_rate": 5.973656858373217e-07, - "loss": 0.9033, - "step": 10046 - }, - { - "epoch": 0.7550728994438599, - "grad_norm": 2.644870122161814, - "learning_rate": 5.970186822407495e-07, - "loss": 0.9836, - "step": 10047 - }, - { - "epoch": 0.7551480535096948, - "grad_norm": 1.9054644482553496, - "learning_rate": 5.966717617761925e-07, - "loss": 0.8571, - "step": 10048 - }, - { - "epoch": 0.7552232075755299, - "grad_norm": 1.758442676754184, - "learning_rate": 5.963249244642077e-07, - "loss": 1.0185, - "step": 10049 - }, - { - "epoch": 0.7552983616413648, - "grad_norm": 1.7612228879879133, - "learning_rate": 5.959781703253452e-07, - "loss": 0.9491, - "step": 10050 - }, - { - "epoch": 0.7553735157071998, - "grad_norm": 2.0729476568566865, - "learning_rate": 5.956314993801532e-07, - "loss": 0.7826, - "step": 10051 - }, - { - "epoch": 0.7554486697730347, - "grad_norm": 1.7726003544730145, - "learning_rate": 5.952849116491728e-07, - "loss": 0.9492, - "step": 10052 - }, - { - "epoch": 0.7555238238388697, - "grad_norm": 1.5194393022269286, - "learning_rate": 5.949384071529395e-07, - "loss": 0.9015, - "step": 10053 - }, - { - "epoch": 0.7555989779047046, - "grad_norm": 1.8310690754643049, - "learning_rate": 5.945919859119865e-07, - "loss": 0.8887, - "step": 10054 - }, - { - "epoch": 0.7556741319705396, - "grad_norm": 1.8690779943546265, - "learning_rate": 5.942456479468401e-07, - "loss": 0.9948, - "step": 10055 - }, - { - "epoch": 0.7557492860363746, - "grad_norm": 0.628835022488383, - "learning_rate": 5.938993932780221e-07, - "loss": 0.8274, - "step": 10056 - }, - { - "epoch": 0.7558244401022095, - "grad_norm": 1.8213477731195609, - "learning_rate": 5.935532219260493e-07, - "loss": 0.9386, - "step": 10057 - }, - { - "epoch": 0.7558995941680445, - "grad_norm": 1.3888437242581964, - "learning_rate": 5.932071339114331e-07, - "loss": 1.0246, - "step": 10058 - }, - { - "epoch": 0.7559747482338794, - "grad_norm": 1.5499645167565437, - "learning_rate": 5.928611292546819e-07, - "loss": 1.0699, - "step": 10059 - }, - { - "epoch": 0.7560499022997144, - "grad_norm": 1.773105405390635, - "learning_rate": 5.92515207976297e-07, - "loss": 0.9238, - "step": 10060 - }, - { - "epoch": 0.7561250563655494, - "grad_norm": 3.674787299819348, - "learning_rate": 5.921693700967758e-07, - "loss": 0.9443, - "step": 10061 - }, - { - "epoch": 0.7562002104313843, - "grad_norm": 1.809351060145625, - "learning_rate": 5.918236156366101e-07, - "loss": 0.9958, - "step": 10062 - }, - { - "epoch": 0.7562753644972193, - "grad_norm": 1.6310682699766013, - "learning_rate": 5.91477944616287e-07, - "loss": 0.9877, - "step": 10063 - }, - { - "epoch": 0.7563505185630542, - "grad_norm": 2.5555438085184687, - "learning_rate": 5.911323570562898e-07, - "loss": 0.8793, - "step": 10064 - }, - { - "epoch": 0.7564256726288893, - "grad_norm": 1.8035649362903166, - "learning_rate": 5.907868529770957e-07, - "loss": 0.9377, - "step": 10065 - }, - { - "epoch": 0.7565008266947242, - "grad_norm": 2.114781888713021, - "learning_rate": 5.904414323991764e-07, - "loss": 0.91, - "step": 10066 - }, - { - "epoch": 0.7565759807605591, - "grad_norm": 1.585442602051561, - "learning_rate": 5.900960953429992e-07, - "loss": 0.9406, - "step": 10067 - }, - { - "epoch": 0.7566511348263941, - "grad_norm": 0.7763187033430979, - "learning_rate": 5.89750841829028e-07, - "loss": 0.8748, - "step": 10068 - }, - { - "epoch": 0.756726288892229, - "grad_norm": 3.2704869631595064, - "learning_rate": 5.894056718777196e-07, - "loss": 0.9197, - "step": 10069 - }, - { - "epoch": 0.7568014429580641, - "grad_norm": 1.4976631876785305, - "learning_rate": 5.890605855095265e-07, - "loss": 1.011, - "step": 10070 - }, - { - "epoch": 0.756876597023899, - "grad_norm": 0.7317309973972898, - "learning_rate": 5.887155827448968e-07, - "loss": 0.8275, - "step": 10071 - }, - { - "epoch": 0.756951751089734, - "grad_norm": 1.8438235122767141, - "learning_rate": 5.883706636042722e-07, - "loss": 0.9604, - "step": 10072 - }, - { - "epoch": 0.7570269051555689, - "grad_norm": 2.390355013891816, - "learning_rate": 5.880258281080921e-07, - "loss": 1.0011, - "step": 10073 - }, - { - "epoch": 0.7571020592214038, - "grad_norm": 1.6021044302726548, - "learning_rate": 5.876810762767883e-07, - "loss": 1.0362, - "step": 10074 - }, - { - "epoch": 0.7571772132872389, - "grad_norm": 1.730932582580336, - "learning_rate": 5.873364081307884e-07, - "loss": 0.8468, - "step": 10075 - }, - { - "epoch": 0.7572523673530738, - "grad_norm": 2.2834844228314686, - "learning_rate": 5.869918236905162e-07, - "loss": 0.9298, - "step": 10076 - }, - { - "epoch": 0.7573275214189088, - "grad_norm": 1.8182877179212955, - "learning_rate": 5.866473229763893e-07, - "loss": 0.9181, - "step": 10077 - }, - { - "epoch": 0.7574026754847437, - "grad_norm": 1.8111584401073582, - "learning_rate": 5.863029060088205e-07, - "loss": 1.0211, - "step": 10078 - }, - { - "epoch": 0.7574778295505787, - "grad_norm": 1.6468351992696375, - "learning_rate": 5.859585728082181e-07, - "loss": 0.9264, - "step": 10079 - }, - { - "epoch": 0.7575529836164137, - "grad_norm": 1.7291709562807611, - "learning_rate": 5.856143233949844e-07, - "loss": 0.9439, - "step": 10080 - }, - { - "epoch": 0.7576281376822486, - "grad_norm": 2.668204211991359, - "learning_rate": 5.852701577895184e-07, - "loss": 0.9246, - "step": 10081 - }, - { - "epoch": 0.7577032917480836, - "grad_norm": 1.8559657665951361, - "learning_rate": 5.849260760122132e-07, - "loss": 0.9874, - "step": 10082 - }, - { - "epoch": 0.7577784458139185, - "grad_norm": 1.4655678548835829, - "learning_rate": 5.845820780834568e-07, - "loss": 1.038, - "step": 10083 - }, - { - "epoch": 0.7578535998797535, - "grad_norm": 2.423775204335151, - "learning_rate": 5.842381640236318e-07, - "loss": 0.9035, - "step": 10084 - }, - { - "epoch": 0.7579287539455885, - "grad_norm": 1.7242992636407524, - "learning_rate": 5.838943338531166e-07, - "loss": 1.0053, - "step": 10085 - }, - { - "epoch": 0.7580039080114234, - "grad_norm": 1.6030394652155555, - "learning_rate": 5.835505875922853e-07, - "loss": 1.0, - "step": 10086 - }, - { - "epoch": 0.7580790620772584, - "grad_norm": 0.7184574112159395, - "learning_rate": 5.832069252615058e-07, - "loss": 0.826, - "step": 10087 - }, - { - "epoch": 0.7581542161430933, - "grad_norm": 1.6191814863803229, - "learning_rate": 5.82863346881141e-07, - "loss": 1.0024, - "step": 10088 - }, - { - "epoch": 0.7582293702089283, - "grad_norm": 1.4329047353295281, - "learning_rate": 5.825198524715489e-07, - "loss": 0.9857, - "step": 10089 - }, - { - "epoch": 0.7583045242747632, - "grad_norm": 0.7837024723590785, - "learning_rate": 5.821764420530842e-07, - "loss": 0.864, - "step": 10090 - }, - { - "epoch": 0.7583796783405983, - "grad_norm": 1.6302596582817783, - "learning_rate": 5.818331156460943e-07, - "loss": 1.0689, - "step": 10091 - }, - { - "epoch": 0.7584548324064332, - "grad_norm": 0.7022593573764695, - "learning_rate": 5.814898732709228e-07, - "loss": 0.8422, - "step": 10092 - }, - { - "epoch": 0.7585299864722681, - "grad_norm": 2.180106720893187, - "learning_rate": 5.811467149479083e-07, - "loss": 0.9673, - "step": 10093 - }, - { - "epoch": 0.7586051405381031, - "grad_norm": 1.792767814729648, - "learning_rate": 5.808036406973835e-07, - "loss": 0.9173, - "step": 10094 - }, - { - "epoch": 0.758680294603938, - "grad_norm": 1.667881125799613, - "learning_rate": 5.804606505396781e-07, - "loss": 1.0107, - "step": 10095 - }, - { - "epoch": 0.7587554486697731, - "grad_norm": 1.975867891957328, - "learning_rate": 5.801177444951148e-07, - "loss": 0.8222, - "step": 10096 - }, - { - "epoch": 0.758830602735608, - "grad_norm": 1.7117618334252396, - "learning_rate": 5.797749225840117e-07, - "loss": 1.0346, - "step": 10097 - }, - { - "epoch": 0.758905756801443, - "grad_norm": 1.366052996086336, - "learning_rate": 5.794321848266835e-07, - "loss": 0.9693, - "step": 10098 - }, - { - "epoch": 0.7589809108672779, - "grad_norm": 1.64112831751238, - "learning_rate": 5.790895312434378e-07, - "loss": 0.928, - "step": 10099 - }, - { - "epoch": 0.7590560649331128, - "grad_norm": 2.39106377458258, - "learning_rate": 5.787469618545786e-07, - "loss": 0.9344, - "step": 10100 - }, - { - "epoch": 0.7591312189989479, - "grad_norm": 2.388017316765035, - "learning_rate": 5.784044766804044e-07, - "loss": 0.9703, - "step": 10101 - }, - { - "epoch": 0.7592063730647828, - "grad_norm": 1.815776034719345, - "learning_rate": 5.780620757412078e-07, - "loss": 0.9958, - "step": 10102 - }, - { - "epoch": 0.7592815271306178, - "grad_norm": 1.9823292076363401, - "learning_rate": 5.777197590572789e-07, - "loss": 1.0765, - "step": 10103 - }, - { - "epoch": 0.7593566811964527, - "grad_norm": 1.9418538304879471, - "learning_rate": 5.773775266489005e-07, - "loss": 0.9878, - "step": 10104 - }, - { - "epoch": 0.7594318352622876, - "grad_norm": 1.4427419536910215, - "learning_rate": 5.770353785363511e-07, - "loss": 0.9509, - "step": 10105 - }, - { - "epoch": 0.7595069893281227, - "grad_norm": 1.8038594381504258, - "learning_rate": 5.766933147399045e-07, - "loss": 1.0253, - "step": 10106 - }, - { - "epoch": 0.7595821433939576, - "grad_norm": 0.7635117775209148, - "learning_rate": 5.763513352798286e-07, - "loss": 0.828, - "step": 10107 - }, - { - "epoch": 0.7596572974597926, - "grad_norm": 2.1938206500987736, - "learning_rate": 5.760094401763884e-07, - "loss": 0.9881, - "step": 10108 - }, - { - "epoch": 0.7597324515256275, - "grad_norm": 1.596865731229216, - "learning_rate": 5.756676294498415e-07, - "loss": 1.0214, - "step": 10109 - }, - { - "epoch": 0.7598076055914625, - "grad_norm": 1.578034804382195, - "learning_rate": 5.753259031204416e-07, - "loss": 0.9547, - "step": 10110 - }, - { - "epoch": 0.7598827596572975, - "grad_norm": 1.8948931476190298, - "learning_rate": 5.74984261208437e-07, - "loss": 0.8985, - "step": 10111 - }, - { - "epoch": 0.7599579137231324, - "grad_norm": 0.6201532788925141, - "learning_rate": 5.746427037340722e-07, - "loss": 0.8126, - "step": 10112 - }, - { - "epoch": 0.7600330677889674, - "grad_norm": 1.4146110355921315, - "learning_rate": 5.743012307175852e-07, - "loss": 1.0099, - "step": 10113 - }, - { - "epoch": 0.7601082218548023, - "grad_norm": 2.307912864904606, - "learning_rate": 5.739598421792091e-07, - "loss": 0.9081, - "step": 10114 - }, - { - "epoch": 0.7601833759206373, - "grad_norm": 1.5608622550966424, - "learning_rate": 5.736185381391743e-07, - "loss": 1.0166, - "step": 10115 - }, - { - "epoch": 0.7602585299864723, - "grad_norm": 2.513418141847437, - "learning_rate": 5.732773186177016e-07, - "loss": 0.996, - "step": 10116 - }, - { - "epoch": 0.7603336840523073, - "grad_norm": 1.7756693236241203, - "learning_rate": 5.729361836350119e-07, - "loss": 0.8982, - "step": 10117 - }, - { - "epoch": 0.7604088381181422, - "grad_norm": 1.8495536239906556, - "learning_rate": 5.725951332113179e-07, - "loss": 1.0223, - "step": 10118 - }, - { - "epoch": 0.7604839921839771, - "grad_norm": 1.917111393569026, - "learning_rate": 5.722541673668275e-07, - "loss": 0.9886, - "step": 10119 - }, - { - "epoch": 0.7605591462498121, - "grad_norm": 1.5903461234311926, - "learning_rate": 5.719132861217455e-07, - "loss": 1.0231, - "step": 10120 - }, - { - "epoch": 0.760634300315647, - "grad_norm": 2.2780975088964657, - "learning_rate": 5.715724894962699e-07, - "loss": 0.998, - "step": 10121 - }, - { - "epoch": 0.7607094543814821, - "grad_norm": 1.5918856168946314, - "learning_rate": 5.712317775105939e-07, - "loss": 0.9175, - "step": 10122 - }, - { - "epoch": 0.760784608447317, - "grad_norm": 1.6390352812362543, - "learning_rate": 5.708911501849065e-07, - "loss": 1.0222, - "step": 10123 - }, - { - "epoch": 0.760859762513152, - "grad_norm": 1.599637991960139, - "learning_rate": 5.7055060753939e-07, - "loss": 1.019, - "step": 10124 - }, - { - "epoch": 0.7609349165789869, - "grad_norm": 1.7620837727916954, - "learning_rate": 5.702101495942245e-07, - "loss": 0.8587, - "step": 10125 - }, - { - "epoch": 0.7610100706448218, - "grad_norm": 1.724139831147298, - "learning_rate": 5.698697763695826e-07, - "loss": 1.006, - "step": 10126 - }, - { - "epoch": 0.7610852247106569, - "grad_norm": 1.4102244268194009, - "learning_rate": 5.695294878856327e-07, - "loss": 0.9359, - "step": 10127 - }, - { - "epoch": 0.7611603787764918, - "grad_norm": 4.133282755470514, - "learning_rate": 5.691892841625385e-07, - "loss": 0.8523, - "step": 10128 - }, - { - "epoch": 0.7612355328423268, - "grad_norm": 1.9349561419186434, - "learning_rate": 5.688491652204573e-07, - "loss": 0.9159, - "step": 10129 - }, - { - "epoch": 0.7613106869081617, - "grad_norm": 1.720244919283637, - "learning_rate": 5.68509131079544e-07, - "loss": 0.8373, - "step": 10130 - }, - { - "epoch": 0.7613858409739966, - "grad_norm": 2.2615858180588835, - "learning_rate": 5.681691817599463e-07, - "loss": 0.9675, - "step": 10131 - }, - { - "epoch": 0.7614609950398317, - "grad_norm": 2.0731117757353035, - "learning_rate": 5.678293172818074e-07, - "loss": 0.8598, - "step": 10132 - }, - { - "epoch": 0.7615361491056666, - "grad_norm": 1.426618523557865, - "learning_rate": 5.674895376652649e-07, - "loss": 0.9146, - "step": 10133 - }, - { - "epoch": 0.7616113031715016, - "grad_norm": 2.3303306423574233, - "learning_rate": 5.671498429304535e-07, - "loss": 1.1051, - "step": 10134 - }, - { - "epoch": 0.7616864572373365, - "grad_norm": 1.6912081208157699, - "learning_rate": 5.668102330975007e-07, - "loss": 1.0305, - "step": 10135 - }, - { - "epoch": 0.7617616113031715, - "grad_norm": 1.5819997525317435, - "learning_rate": 5.664707081865288e-07, - "loss": 1.0541, - "step": 10136 - }, - { - "epoch": 0.7618367653690065, - "grad_norm": 1.4549794822897941, - "learning_rate": 5.661312682176582e-07, - "loss": 0.9282, - "step": 10137 - }, - { - "epoch": 0.7619119194348414, - "grad_norm": 1.7375320706049184, - "learning_rate": 5.657919132109991e-07, - "loss": 0.8443, - "step": 10138 - }, - { - "epoch": 0.7619870735006764, - "grad_norm": 0.6941590331925995, - "learning_rate": 5.65452643186662e-07, - "loss": 0.8382, - "step": 10139 - }, - { - "epoch": 0.7620622275665113, - "grad_norm": 1.6404542379975466, - "learning_rate": 5.65113458164749e-07, - "loss": 1.0063, - "step": 10140 - }, - { - "epoch": 0.7621373816323463, - "grad_norm": 1.5060847369819843, - "learning_rate": 5.647743581653575e-07, - "loss": 1.0199, - "step": 10141 - }, - { - "epoch": 0.7622125356981813, - "grad_norm": 0.9930628722187338, - "learning_rate": 5.644353432085818e-07, - "loss": 0.9027, - "step": 10142 - }, - { - "epoch": 0.7622876897640163, - "grad_norm": 1.4586744272878744, - "learning_rate": 5.64096413314509e-07, - "loss": 0.9781, - "step": 10143 - }, - { - "epoch": 0.7623628438298512, - "grad_norm": 2.1289572053463277, - "learning_rate": 5.637575685032223e-07, - "loss": 0.9339, - "step": 10144 - }, - { - "epoch": 0.7624379978956861, - "grad_norm": 3.0392234779224254, - "learning_rate": 5.634188087947993e-07, - "loss": 0.9965, - "step": 10145 - }, - { - "epoch": 0.7625131519615211, - "grad_norm": 1.7006617940884996, - "learning_rate": 5.630801342093123e-07, - "loss": 1.0231, - "step": 10146 - }, - { - "epoch": 0.762588306027356, - "grad_norm": 1.6729702793270043, - "learning_rate": 5.627415447668304e-07, - "loss": 0.9346, - "step": 10147 - }, - { - "epoch": 0.7626634600931911, - "grad_norm": 1.7331255974753086, - "learning_rate": 5.624030404874154e-07, - "loss": 1.0354, - "step": 10148 - }, - { - "epoch": 0.762738614159026, - "grad_norm": 1.6533843386893865, - "learning_rate": 5.620646213911253e-07, - "loss": 0.8196, - "step": 10149 - }, - { - "epoch": 0.7628137682248609, - "grad_norm": 1.4553989952682584, - "learning_rate": 5.617262874980122e-07, - "loss": 0.9183, - "step": 10150 - }, - { - "epoch": 0.7628889222906959, - "grad_norm": 1.671037410662021, - "learning_rate": 5.613880388281245e-07, - "loss": 0.9986, - "step": 10151 - }, - { - "epoch": 0.7629640763565309, - "grad_norm": 2.5531666746438555, - "learning_rate": 5.610498754015043e-07, - "loss": 1.0047, - "step": 10152 - }, - { - "epoch": 0.7630392304223659, - "grad_norm": 1.6853675083807875, - "learning_rate": 5.607117972381892e-07, - "loss": 1.043, - "step": 10153 - }, - { - "epoch": 0.7631143844882008, - "grad_norm": 2.1456183785731673, - "learning_rate": 5.603738043582113e-07, - "loss": 0.9274, - "step": 10154 - }, - { - "epoch": 0.7631895385540358, - "grad_norm": 1.6479045132825052, - "learning_rate": 5.600358967815977e-07, - "loss": 0.9264, - "step": 10155 - }, - { - "epoch": 0.7632646926198707, - "grad_norm": 1.6742496916616998, - "learning_rate": 5.59698074528372e-07, - "loss": 1.0361, - "step": 10156 - }, - { - "epoch": 0.7633398466857056, - "grad_norm": 2.308350009605498, - "learning_rate": 5.593603376185503e-07, - "loss": 0.847, - "step": 10157 - }, - { - "epoch": 0.7634150007515407, - "grad_norm": 2.082009492513544, - "learning_rate": 5.590226860721447e-07, - "loss": 1.0315, - "step": 10158 - }, - { - "epoch": 0.7634901548173756, - "grad_norm": 1.4279243979436291, - "learning_rate": 5.586851199091635e-07, - "loss": 0.9969, - "step": 10159 - }, - { - "epoch": 0.7635653088832106, - "grad_norm": 1.5991946697052768, - "learning_rate": 5.583476391496083e-07, - "loss": 1.019, - "step": 10160 - }, - { - "epoch": 0.7636404629490455, - "grad_norm": 1.7388343873926506, - "learning_rate": 5.580102438134755e-07, - "loss": 1.0299, - "step": 10161 - }, - { - "epoch": 0.7637156170148806, - "grad_norm": 1.9247861693878676, - "learning_rate": 5.576729339207578e-07, - "loss": 0.9547, - "step": 10162 - }, - { - "epoch": 0.7637907710807155, - "grad_norm": 2.9934981142608073, - "learning_rate": 5.573357094914413e-07, - "loss": 0.9368, - "step": 10163 - }, - { - "epoch": 0.7638659251465504, - "grad_norm": 1.829436198762785, - "learning_rate": 5.569985705455087e-07, - "loss": 1.0637, - "step": 10164 - }, - { - "epoch": 0.7639410792123854, - "grad_norm": 2.115309441838003, - "learning_rate": 5.566615171029365e-07, - "loss": 1.0203, - "step": 10165 - }, - { - "epoch": 0.7640162332782203, - "grad_norm": 2.252259920897617, - "learning_rate": 5.563245491836963e-07, - "loss": 0.9241, - "step": 10166 - }, - { - "epoch": 0.7640913873440554, - "grad_norm": 13.737559846534223, - "learning_rate": 5.559876668077548e-07, - "loss": 0.8813, - "step": 10167 - }, - { - "epoch": 0.7641665414098903, - "grad_norm": 2.1017216618618195, - "learning_rate": 5.556508699950728e-07, - "loss": 1.003, - "step": 10168 - }, - { - "epoch": 0.7642416954757253, - "grad_norm": 0.7662700738191015, - "learning_rate": 5.553141587656083e-07, - "loss": 0.7998, - "step": 10169 - }, - { - "epoch": 0.7643168495415602, - "grad_norm": 1.6695842557783493, - "learning_rate": 5.549775331393118e-07, - "loss": 0.9809, - "step": 10170 - }, - { - "epoch": 0.7643920036073951, - "grad_norm": 1.4848693024633344, - "learning_rate": 5.546409931361299e-07, - "loss": 0.9361, - "step": 10171 - }, - { - "epoch": 0.7644671576732301, - "grad_norm": 1.7662407805572728, - "learning_rate": 5.543045387760035e-07, - "loss": 1.0826, - "step": 10172 - }, - { - "epoch": 0.7645423117390651, - "grad_norm": 1.8189631360152672, - "learning_rate": 5.539681700788694e-07, - "loss": 0.903, - "step": 10173 - }, - { - "epoch": 0.7646174658049001, - "grad_norm": 1.57467357038093, - "learning_rate": 5.536318870646586e-07, - "loss": 0.9857, - "step": 10174 - }, - { - "epoch": 0.764692619870735, - "grad_norm": 2.1481170970248615, - "learning_rate": 5.532956897532968e-07, - "loss": 0.9543, - "step": 10175 - }, - { - "epoch": 0.7647677739365699, - "grad_norm": 1.9629667808221014, - "learning_rate": 5.529595781647054e-07, - "loss": 1.0102, - "step": 10176 - }, - { - "epoch": 0.7648429280024049, - "grad_norm": 0.7903003991265142, - "learning_rate": 5.526235523187992e-07, - "loss": 0.8358, - "step": 10177 - }, - { - "epoch": 0.7649180820682399, - "grad_norm": 1.3608691017665675, - "learning_rate": 5.522876122354907e-07, - "loss": 1.025, - "step": 10178 - }, - { - "epoch": 0.7649932361340749, - "grad_norm": 2.170553450257797, - "learning_rate": 5.519517579346849e-07, - "loss": 0.9986, - "step": 10179 - }, - { - "epoch": 0.7650683901999098, - "grad_norm": 2.340477146016729, - "learning_rate": 5.516159894362817e-07, - "loss": 0.8468, - "step": 10180 - }, - { - "epoch": 0.7651435442657448, - "grad_norm": 1.8543446558985461, - "learning_rate": 5.512803067601779e-07, - "loss": 0.9076, - "step": 10181 - }, - { - "epoch": 0.7652186983315797, - "grad_norm": 12.144864200164186, - "learning_rate": 5.509447099262636e-07, - "loss": 1.0474, - "step": 10182 - }, - { - "epoch": 0.7652938523974147, - "grad_norm": 2.2621087510521933, - "learning_rate": 5.506091989544239e-07, - "loss": 0.8878, - "step": 10183 - }, - { - "epoch": 0.7653690064632497, - "grad_norm": 2.5196642936651297, - "learning_rate": 5.502737738645393e-07, - "loss": 0.9343, - "step": 10184 - }, - { - "epoch": 0.7654441605290846, - "grad_norm": 1.3631652083100914, - "learning_rate": 5.499384346764843e-07, - "loss": 0.9782, - "step": 10185 - }, - { - "epoch": 0.7655193145949196, - "grad_norm": 1.705442828734135, - "learning_rate": 5.496031814101303e-07, - "loss": 0.973, - "step": 10186 - }, - { - "epoch": 0.7655944686607545, - "grad_norm": 1.6879396219167107, - "learning_rate": 5.492680140853418e-07, - "loss": 0.8777, - "step": 10187 - }, - { - "epoch": 0.7656696227265896, - "grad_norm": 1.590567908920642, - "learning_rate": 5.489329327219787e-07, - "loss": 0.9959, - "step": 10188 - }, - { - "epoch": 0.7657447767924245, - "grad_norm": 2.1309468699813015, - "learning_rate": 5.485979373398959e-07, - "loss": 0.9455, - "step": 10189 - }, - { - "epoch": 0.7658199308582594, - "grad_norm": 1.6966725883805234, - "learning_rate": 5.482630279589424e-07, - "loss": 0.9092, - "step": 10190 - }, - { - "epoch": 0.7658950849240944, - "grad_norm": 0.6752181601307049, - "learning_rate": 5.479282045989644e-07, - "loss": 0.8349, - "step": 10191 - }, - { - "epoch": 0.7659702389899293, - "grad_norm": 4.197494195294527, - "learning_rate": 5.475934672798004e-07, - "loss": 0.9969, - "step": 10192 - }, - { - "epoch": 0.7660453930557644, - "grad_norm": 2.3955443387642807, - "learning_rate": 5.47258816021285e-07, - "loss": 0.9624, - "step": 10193 - }, - { - "epoch": 0.7661205471215993, - "grad_norm": 1.8491000770359642, - "learning_rate": 5.469242508432472e-07, - "loss": 0.9997, - "step": 10194 - }, - { - "epoch": 0.7661957011874342, - "grad_norm": 3.160941235120059, - "learning_rate": 5.465897717655123e-07, - "loss": 1.0099, - "step": 10195 - }, - { - "epoch": 0.7662708552532692, - "grad_norm": 3.727588440365904, - "learning_rate": 5.462553788078992e-07, - "loss": 0.9018, - "step": 10196 - }, - { - "epoch": 0.7663460093191041, - "grad_norm": 4.785956964257065, - "learning_rate": 5.459210719902211e-07, - "loss": 0.9925, - "step": 10197 - }, - { - "epoch": 0.7664211633849392, - "grad_norm": 1.601872930447257, - "learning_rate": 5.455868513322881e-07, - "loss": 1.0003, - "step": 10198 - }, - { - "epoch": 0.7664963174507741, - "grad_norm": 2.1158742738088003, - "learning_rate": 5.452527168539025e-07, - "loss": 0.9329, - "step": 10199 - }, - { - "epoch": 0.7665714715166091, - "grad_norm": 2.1992007537701737, - "learning_rate": 5.449186685748648e-07, - "loss": 0.8503, - "step": 10200 - }, - { - "epoch": 0.766646625582444, - "grad_norm": 1.6388569342017778, - "learning_rate": 5.445847065149678e-07, - "loss": 1.1061, - "step": 10201 - }, - { - "epoch": 0.7667217796482789, - "grad_norm": 1.823599144472732, - "learning_rate": 5.442508306939995e-07, - "loss": 0.9284, - "step": 10202 - }, - { - "epoch": 0.766796933714114, - "grad_norm": 1.711554498770446, - "learning_rate": 5.439170411317446e-07, - "loss": 0.9544, - "step": 10203 - }, - { - "epoch": 0.7668720877799489, - "grad_norm": 2.215911123634953, - "learning_rate": 5.435833378479807e-07, - "loss": 0.886, - "step": 10204 - }, - { - "epoch": 0.7669472418457839, - "grad_norm": 3.5058078262662193, - "learning_rate": 5.432497208624809e-07, - "loss": 0.9575, - "step": 10205 - }, - { - "epoch": 0.7670223959116188, - "grad_norm": 1.5431947952993534, - "learning_rate": 5.429161901950134e-07, - "loss": 1.0492, - "step": 10206 - }, - { - "epoch": 0.7670975499774538, - "grad_norm": 2.2122129394172143, - "learning_rate": 5.425827458653407e-07, - "loss": 0.8313, - "step": 10207 - }, - { - "epoch": 0.7671727040432887, - "grad_norm": 2.217359080003248, - "learning_rate": 5.422493878932217e-07, - "loss": 0.9714, - "step": 10208 - }, - { - "epoch": 0.7672478581091237, - "grad_norm": 1.8035718103108953, - "learning_rate": 5.419161162984085e-07, - "loss": 1.0315, - "step": 10209 - }, - { - "epoch": 0.7673230121749587, - "grad_norm": 5.9191127968023665, - "learning_rate": 5.415829311006487e-07, - "loss": 1.0058, - "step": 10210 - }, - { - "epoch": 0.7673981662407936, - "grad_norm": 2.6758461157739872, - "learning_rate": 5.412498323196852e-07, - "loss": 0.9993, - "step": 10211 - }, - { - "epoch": 0.7674733203066286, - "grad_norm": 1.5576640250599036, - "learning_rate": 5.409168199752543e-07, - "loss": 0.9803, - "step": 10212 - }, - { - "epoch": 0.7675484743724635, - "grad_norm": 1.9520867601796799, - "learning_rate": 5.405838940870897e-07, - "loss": 1.024, - "step": 10213 - }, - { - "epoch": 0.7676236284382986, - "grad_norm": 1.7149970262017895, - "learning_rate": 5.402510546749177e-07, - "loss": 1.0025, - "step": 10214 - }, - { - "epoch": 0.7676987825041335, - "grad_norm": 1.5563720318238443, - "learning_rate": 5.399183017584605e-07, - "loss": 1.0059, - "step": 10215 - }, - { - "epoch": 0.7677739365699684, - "grad_norm": 1.7452277781961516, - "learning_rate": 5.395856353574344e-07, - "loss": 0.9857, - "step": 10216 - }, - { - "epoch": 0.7678490906358034, - "grad_norm": 0.8441447288346349, - "learning_rate": 5.392530554915522e-07, - "loss": 0.8421, - "step": 10217 - }, - { - "epoch": 0.7679242447016383, - "grad_norm": 1.6157785073406365, - "learning_rate": 5.389205621805202e-07, - "loss": 1.0099, - "step": 10218 - }, - { - "epoch": 0.7679993987674734, - "grad_norm": 2.5900253761550673, - "learning_rate": 5.385881554440397e-07, - "loss": 0.9611, - "step": 10219 - }, - { - "epoch": 0.7680745528333083, - "grad_norm": 1.683516285966229, - "learning_rate": 5.382558353018069e-07, - "loss": 0.9767, - "step": 10220 - }, - { - "epoch": 0.7681497068991432, - "grad_norm": 1.0108578233406982, - "learning_rate": 5.37923601773513e-07, - "loss": 0.9825, - "step": 10221 - }, - { - "epoch": 0.7682248609649782, - "grad_norm": 1.725250297852598, - "learning_rate": 5.375914548788447e-07, - "loss": 0.9739, - "step": 10222 - }, - { - "epoch": 0.7683000150308131, - "grad_norm": 3.144983408388363, - "learning_rate": 5.372593946374826e-07, - "loss": 0.9582, - "step": 10223 - }, - { - "epoch": 0.7683751690966482, - "grad_norm": 1.9946729526769218, - "learning_rate": 5.369274210691022e-07, - "loss": 1.0547, - "step": 10224 - }, - { - "epoch": 0.7684503231624831, - "grad_norm": 1.6227579054674421, - "learning_rate": 5.36595534193375e-07, - "loss": 1.0072, - "step": 10225 - }, - { - "epoch": 0.7685254772283181, - "grad_norm": 1.5736828215958385, - "learning_rate": 5.362637340299662e-07, - "loss": 1.0468, - "step": 10226 - }, - { - "epoch": 0.768600631294153, - "grad_norm": 2.0070793505436355, - "learning_rate": 5.359320205985363e-07, - "loss": 0.9783, - "step": 10227 - }, - { - "epoch": 0.7686757853599879, - "grad_norm": 1.617846589608902, - "learning_rate": 5.356003939187402e-07, - "loss": 0.9526, - "step": 10228 - }, - { - "epoch": 0.768750939425823, - "grad_norm": 1.9203679448466016, - "learning_rate": 5.352688540102279e-07, - "loss": 0.8314, - "step": 10229 - }, - { - "epoch": 0.7688260934916579, - "grad_norm": 1.7901972874015377, - "learning_rate": 5.349374008926454e-07, - "loss": 0.9765, - "step": 10230 - }, - { - "epoch": 0.7689012475574929, - "grad_norm": 1.6153682482592415, - "learning_rate": 5.34606034585632e-07, - "loss": 0.9401, - "step": 10231 - }, - { - "epoch": 0.7689764016233278, - "grad_norm": 1.8313824576427227, - "learning_rate": 5.342747551088225e-07, - "loss": 0.9208, - "step": 10232 - }, - { - "epoch": 0.7690515556891628, - "grad_norm": 3.9819401500654945, - "learning_rate": 5.339435624818463e-07, - "loss": 0.9138, - "step": 10233 - }, - { - "epoch": 0.7691267097549978, - "grad_norm": 1.6502278083981605, - "learning_rate": 5.336124567243275e-07, - "loss": 0.9369, - "step": 10234 - }, - { - "epoch": 0.7692018638208327, - "grad_norm": 1.5186562654430988, - "learning_rate": 5.332814378558861e-07, - "loss": 0.9872, - "step": 10235 - }, - { - "epoch": 0.7692770178866677, - "grad_norm": 2.223560652066678, - "learning_rate": 5.329505058961361e-07, - "loss": 0.9862, - "step": 10236 - }, - { - "epoch": 0.7693521719525026, - "grad_norm": 1.6961312140953209, - "learning_rate": 5.326196608646862e-07, - "loss": 0.9398, - "step": 10237 - }, - { - "epoch": 0.7694273260183376, - "grad_norm": 2.1977690395536795, - "learning_rate": 5.322889027811402e-07, - "loss": 0.9186, - "step": 10238 - }, - { - "epoch": 0.7695024800841725, - "grad_norm": 2.3328864032871572, - "learning_rate": 5.31958231665097e-07, - "loss": 0.9737, - "step": 10239 - }, - { - "epoch": 0.7695776341500075, - "grad_norm": 2.1506612470566036, - "learning_rate": 5.316276475361505e-07, - "loss": 0.8553, - "step": 10240 - }, - { - "epoch": 0.7696527882158425, - "grad_norm": 2.4900405868225195, - "learning_rate": 5.312971504138883e-07, - "loss": 0.9685, - "step": 10241 - }, - { - "epoch": 0.7697279422816774, - "grad_norm": 3.237512071505985, - "learning_rate": 5.309667403178944e-07, - "loss": 0.8902, - "step": 10242 - }, - { - "epoch": 0.7698030963475124, - "grad_norm": 1.4070639903544795, - "learning_rate": 5.306364172677455e-07, - "loss": 1.0458, - "step": 10243 - }, - { - "epoch": 0.7698782504133473, - "grad_norm": 0.7233963759002733, - "learning_rate": 5.303061812830163e-07, - "loss": 0.8164, - "step": 10244 - }, - { - "epoch": 0.7699534044791824, - "grad_norm": 1.8512760375340644, - "learning_rate": 5.299760323832734e-07, - "loss": 0.9884, - "step": 10245 - }, - { - "epoch": 0.7700285585450173, - "grad_norm": 2.541746065327516, - "learning_rate": 5.296459705880794e-07, - "loss": 0.8792, - "step": 10246 - }, - { - "epoch": 0.7701037126108522, - "grad_norm": 1.5715584438087304, - "learning_rate": 5.293159959169924e-07, - "loss": 0.9209, - "step": 10247 - }, - { - "epoch": 0.7701788666766872, - "grad_norm": 2.252381336164504, - "learning_rate": 5.289861083895642e-07, - "loss": 0.9628, - "step": 10248 - }, - { - "epoch": 0.7702540207425221, - "grad_norm": 1.351019557320015, - "learning_rate": 5.286563080253421e-07, - "loss": 0.9319, - "step": 10249 - }, - { - "epoch": 0.7703291748083572, - "grad_norm": 2.325483116080409, - "learning_rate": 5.283265948438678e-07, - "loss": 0.9271, - "step": 10250 - }, - { - "epoch": 0.7704043288741921, - "grad_norm": 1.965318990154714, - "learning_rate": 5.279969688646777e-07, - "loss": 0.9838, - "step": 10251 - }, - { - "epoch": 0.7704794829400271, - "grad_norm": 1.4595846348236812, - "learning_rate": 5.276674301073045e-07, - "loss": 1.0087, - "step": 10252 - }, - { - "epoch": 0.770554637005862, - "grad_norm": 1.5801768042231794, - "learning_rate": 5.273379785912739e-07, - "loss": 0.9407, - "step": 10253 - }, - { - "epoch": 0.7706297910716969, - "grad_norm": 2.1108086129462995, - "learning_rate": 5.270086143361072e-07, - "loss": 0.8694, - "step": 10254 - }, - { - "epoch": 0.770704945137532, - "grad_norm": 2.953312297714556, - "learning_rate": 5.266793373613207e-07, - "loss": 0.9652, - "step": 10255 - }, - { - "epoch": 0.7707800992033669, - "grad_norm": 1.6412793988792849, - "learning_rate": 5.263501476864245e-07, - "loss": 0.9428, - "step": 10256 - }, - { - "epoch": 0.7708552532692019, - "grad_norm": 1.7590916027617034, - "learning_rate": 5.260210453309257e-07, - "loss": 0.9034, - "step": 10257 - }, - { - "epoch": 0.7709304073350368, - "grad_norm": 1.7046577485982215, - "learning_rate": 5.256920303143242e-07, - "loss": 0.9575, - "step": 10258 - }, - { - "epoch": 0.7710055614008718, - "grad_norm": 2.5902392975713386, - "learning_rate": 5.253631026561154e-07, - "loss": 0.9405, - "step": 10259 - }, - { - "epoch": 0.7710807154667068, - "grad_norm": 1.6717934276609978, - "learning_rate": 5.250342623757889e-07, - "loss": 0.8502, - "step": 10260 - }, - { - "epoch": 0.7711558695325417, - "grad_norm": 2.201968599745218, - "learning_rate": 5.24705509492831e-07, - "loss": 0.8701, - "step": 10261 - }, - { - "epoch": 0.7712310235983767, - "grad_norm": 1.9202860024147306, - "learning_rate": 5.243768440267209e-07, - "loss": 0.8418, - "step": 10262 - }, - { - "epoch": 0.7713061776642116, - "grad_norm": 2.2017402355176383, - "learning_rate": 5.240482659969332e-07, - "loss": 0.9663, - "step": 10263 - }, - { - "epoch": 0.7713813317300466, - "grad_norm": 1.8340249346553334, - "learning_rate": 5.237197754229376e-07, - "loss": 0.9033, - "step": 10264 - }, - { - "epoch": 0.7714564857958816, - "grad_norm": 1.7279331066243935, - "learning_rate": 5.23391372324198e-07, - "loss": 0.933, - "step": 10265 - }, - { - "epoch": 0.7715316398617165, - "grad_norm": 2.710046363377663, - "learning_rate": 5.230630567201744e-07, - "loss": 0.8309, - "step": 10266 - }, - { - "epoch": 0.7716067939275515, - "grad_norm": 1.6558432889401329, - "learning_rate": 5.227348286303201e-07, - "loss": 0.9145, - "step": 10267 - }, - { - "epoch": 0.7716819479933864, - "grad_norm": 1.9082544138330213, - "learning_rate": 5.224066880740836e-07, - "loss": 0.9928, - "step": 10268 - }, - { - "epoch": 0.7717571020592214, - "grad_norm": 1.4756848991933045, - "learning_rate": 5.220786350709094e-07, - "loss": 0.9583, - "step": 10269 - }, - { - "epoch": 0.7718322561250563, - "grad_norm": 2.6275331899069325, - "learning_rate": 5.217506696402354e-07, - "loss": 0.956, - "step": 10270 - }, - { - "epoch": 0.7719074101908914, - "grad_norm": 1.9686035828413329, - "learning_rate": 5.214227918014951e-07, - "loss": 1.0503, - "step": 10271 - }, - { - "epoch": 0.7719825642567263, - "grad_norm": 1.6707969826464433, - "learning_rate": 5.21095001574116e-07, - "loss": 0.8861, - "step": 10272 - }, - { - "epoch": 0.7720577183225612, - "grad_norm": 1.7416120870455523, - "learning_rate": 5.207672989775205e-07, - "loss": 0.9708, - "step": 10273 - }, - { - "epoch": 0.7721328723883962, - "grad_norm": 1.7028702011595218, - "learning_rate": 5.204396840311276e-07, - "loss": 1.0153, - "step": 10274 - }, - { - "epoch": 0.7722080264542311, - "grad_norm": 2.5043345068753613, - "learning_rate": 5.20112156754349e-07, - "loss": 0.9999, - "step": 10275 - }, - { - "epoch": 0.7722831805200662, - "grad_norm": 1.6721370170841856, - "learning_rate": 5.197847171665919e-07, - "loss": 0.9579, - "step": 10276 - }, - { - "epoch": 0.7723583345859011, - "grad_norm": 1.744404976729703, - "learning_rate": 5.194573652872585e-07, - "loss": 1.0171, - "step": 10277 - }, - { - "epoch": 0.7724334886517361, - "grad_norm": 1.6261462057242446, - "learning_rate": 5.191301011357451e-07, - "loss": 0.9322, - "step": 10278 - }, - { - "epoch": 0.772508642717571, - "grad_norm": 1.822990837404763, - "learning_rate": 5.188029247314442e-07, - "loss": 0.8914, - "step": 10279 - }, - { - "epoch": 0.7725837967834059, - "grad_norm": 5.558934129599537, - "learning_rate": 5.184758360937422e-07, - "loss": 1.011, - "step": 10280 - }, - { - "epoch": 0.772658950849241, - "grad_norm": 1.7110781677110651, - "learning_rate": 5.181488352420198e-07, - "loss": 0.9708, - "step": 10281 - }, - { - "epoch": 0.7727341049150759, - "grad_norm": 1.775945830532863, - "learning_rate": 5.178219221956528e-07, - "loss": 1.0453, - "step": 10282 - }, - { - "epoch": 0.7728092589809109, - "grad_norm": 1.5367888876303222, - "learning_rate": 5.17495096974013e-07, - "loss": 0.9319, - "step": 10283 - }, - { - "epoch": 0.7728844130467458, - "grad_norm": 36.857036805871616, - "learning_rate": 5.17168359596466e-07, - "loss": 1.0119, - "step": 10284 - }, - { - "epoch": 0.7729595671125807, - "grad_norm": 2.020959699422394, - "learning_rate": 5.168417100823707e-07, - "loss": 0.8798, - "step": 10285 - }, - { - "epoch": 0.7730347211784158, - "grad_norm": 1.870231158814959, - "learning_rate": 5.16515148451085e-07, - "loss": 0.999, - "step": 10286 - }, - { - "epoch": 0.7731098752442507, - "grad_norm": 1.582830382657495, - "learning_rate": 5.161886747219562e-07, - "loss": 0.9073, - "step": 10287 - }, - { - "epoch": 0.7731850293100857, - "grad_norm": 1.8467246160584292, - "learning_rate": 5.158622889143309e-07, - "loss": 0.9428, - "step": 10288 - }, - { - "epoch": 0.7732601833759206, - "grad_norm": 1.7943013875617797, - "learning_rate": 5.155359910475481e-07, - "loss": 0.9946, - "step": 10289 - }, - { - "epoch": 0.7733353374417556, - "grad_norm": 2.3290632776596634, - "learning_rate": 5.152097811409415e-07, - "loss": 0.9071, - "step": 10290 - }, - { - "epoch": 0.7734104915075906, - "grad_norm": 2.067403801337127, - "learning_rate": 5.148836592138417e-07, - "loss": 0.9382, - "step": 10291 - }, - { - "epoch": 0.7734856455734255, - "grad_norm": 0.9819377821307056, - "learning_rate": 5.145576252855719e-07, - "loss": 0.8758, - "step": 10292 - }, - { - "epoch": 0.7735607996392605, - "grad_norm": 5.537426963996166, - "learning_rate": 5.142316793754511e-07, - "loss": 0.9696, - "step": 10293 - }, - { - "epoch": 0.7736359537050954, - "grad_norm": 1.4084007769956561, - "learning_rate": 5.139058215027927e-07, - "loss": 0.9136, - "step": 10294 - }, - { - "epoch": 0.7737111077709304, - "grad_norm": 1.7905109655475366, - "learning_rate": 5.135800516869042e-07, - "loss": 0.9858, - "step": 10295 - }, - { - "epoch": 0.7737862618367654, - "grad_norm": 2.2002213193369755, - "learning_rate": 5.132543699470904e-07, - "loss": 0.9263, - "step": 10296 - }, - { - "epoch": 0.7738614159026004, - "grad_norm": 1.6032005307137682, - "learning_rate": 5.129287763026479e-07, - "loss": 0.9365, - "step": 10297 - }, - { - "epoch": 0.7739365699684353, - "grad_norm": 2.2793529408807145, - "learning_rate": 5.126032707728702e-07, - "loss": 0.9255, - "step": 10298 - }, - { - "epoch": 0.7740117240342702, - "grad_norm": 1.5657152283151552, - "learning_rate": 5.122778533770442e-07, - "loss": 0.9481, - "step": 10299 - }, - { - "epoch": 0.7740868781001052, - "grad_norm": 1.486533512957947, - "learning_rate": 5.119525241344515e-07, - "loss": 1.0031, - "step": 10300 - }, - { - "epoch": 0.7741620321659402, - "grad_norm": 2.3660620100548515, - "learning_rate": 5.116272830643707e-07, - "loss": 0.9844, - "step": 10301 - }, - { - "epoch": 0.7742371862317752, - "grad_norm": 1.7018891496521908, - "learning_rate": 5.113021301860725e-07, - "loss": 0.9675, - "step": 10302 - }, - { - "epoch": 0.7743123402976101, - "grad_norm": 3.511596405987237, - "learning_rate": 5.109770655188236e-07, - "loss": 0.9228, - "step": 10303 - }, - { - "epoch": 0.7743874943634451, - "grad_norm": 2.1942148960413532, - "learning_rate": 5.106520890818853e-07, - "loss": 0.9958, - "step": 10304 - }, - { - "epoch": 0.77446264842928, - "grad_norm": 1.78333535179428, - "learning_rate": 5.103272008945141e-07, - "loss": 1.0079, - "step": 10305 - }, - { - "epoch": 0.774537802495115, - "grad_norm": 0.8031334546308389, - "learning_rate": 5.100024009759605e-07, - "loss": 0.9007, - "step": 10306 - }, - { - "epoch": 0.77461295656095, - "grad_norm": 1.5152434035816749, - "learning_rate": 5.096776893454697e-07, - "loss": 1.0247, - "step": 10307 - }, - { - "epoch": 0.7746881106267849, - "grad_norm": 2.4925018399307364, - "learning_rate": 5.09353066022284e-07, - "loss": 0.9586, - "step": 10308 - }, - { - "epoch": 0.7747632646926199, - "grad_norm": 5.261878245541077, - "learning_rate": 5.090285310256359e-07, - "loss": 1.0276, - "step": 10309 - }, - { - "epoch": 0.7748384187584548, - "grad_norm": 1.636106317329976, - "learning_rate": 5.087040843747572e-07, - "loss": 0.9019, - "step": 10310 - }, - { - "epoch": 0.7749135728242897, - "grad_norm": 2.4442726337375564, - "learning_rate": 5.08379726088872e-07, - "loss": 0.9326, - "step": 10311 - }, - { - "epoch": 0.7749887268901248, - "grad_norm": 1.4780629925292912, - "learning_rate": 5.080554561871995e-07, - "loss": 0.8685, - "step": 10312 - }, - { - "epoch": 0.7750638809559597, - "grad_norm": 2.113573431381391, - "learning_rate": 5.077312746889547e-07, - "loss": 0.9504, - "step": 10313 - }, - { - "epoch": 0.7751390350217947, - "grad_norm": 1.807514305216424, - "learning_rate": 5.074071816133461e-07, - "loss": 0.9363, - "step": 10314 - }, - { - "epoch": 0.7752141890876296, - "grad_norm": 2.025679476675449, - "learning_rate": 5.070831769795773e-07, - "loss": 0.9502, - "step": 10315 - }, - { - "epoch": 0.7752893431534646, - "grad_norm": 1.6856228312929973, - "learning_rate": 5.067592608068474e-07, - "loss": 1.0588, - "step": 10316 - }, - { - "epoch": 0.7753644972192996, - "grad_norm": 4.8389195136234875, - "learning_rate": 5.064354331143485e-07, - "loss": 1.0143, - "step": 10317 - }, - { - "epoch": 0.7754396512851345, - "grad_norm": 1.6581091612667966, - "learning_rate": 5.061116939212702e-07, - "loss": 0.9786, - "step": 10318 - }, - { - "epoch": 0.7755148053509695, - "grad_norm": 2.676735901212308, - "learning_rate": 5.057880432467943e-07, - "loss": 1.0244, - "step": 10319 - }, - { - "epoch": 0.7755899594168044, - "grad_norm": 0.7204111639871332, - "learning_rate": 5.054644811100986e-07, - "loss": 0.8861, - "step": 10320 - }, - { - "epoch": 0.7756651134826394, - "grad_norm": 2.275830769024688, - "learning_rate": 5.051410075303555e-07, - "loss": 0.9306, - "step": 10321 - }, - { - "epoch": 0.7757402675484744, - "grad_norm": 1.498434521430499, - "learning_rate": 5.048176225267311e-07, - "loss": 0.968, - "step": 10322 - }, - { - "epoch": 0.7758154216143094, - "grad_norm": 2.7694416327564793, - "learning_rate": 5.044943261183887e-07, - "loss": 1.0853, - "step": 10323 - }, - { - "epoch": 0.7758905756801443, - "grad_norm": 2.1821184719311675, - "learning_rate": 5.041711183244842e-07, - "loss": 0.9598, - "step": 10324 - }, - { - "epoch": 0.7759657297459792, - "grad_norm": 3.003276783802071, - "learning_rate": 5.038479991641689e-07, - "loss": 1.0407, - "step": 10325 - }, - { - "epoch": 0.7760408838118142, - "grad_norm": 2.132295427069045, - "learning_rate": 5.035249686565881e-07, - "loss": 0.9769, - "step": 10326 - }, - { - "epoch": 0.7761160378776492, - "grad_norm": 2.5714312962108887, - "learning_rate": 5.032020268208838e-07, - "loss": 0.9819, - "step": 10327 - }, - { - "epoch": 0.7761911919434842, - "grad_norm": 5.820947558684671, - "learning_rate": 5.028791736761913e-07, - "loss": 1.0253, - "step": 10328 - }, - { - "epoch": 0.7762663460093191, - "grad_norm": 1.7626131413716013, - "learning_rate": 5.025564092416397e-07, - "loss": 1.0445, - "step": 10329 - }, - { - "epoch": 0.776341500075154, - "grad_norm": 1.6532815927566968, - "learning_rate": 5.022337335363558e-07, - "loss": 1.0376, - "step": 10330 - }, - { - "epoch": 0.776416654140989, - "grad_norm": 1.6113432172588658, - "learning_rate": 5.019111465794583e-07, - "loss": 0.9666, - "step": 10331 - }, - { - "epoch": 0.776491808206824, - "grad_norm": 1.6745617392803107, - "learning_rate": 5.01588648390062e-07, - "loss": 0.9653, - "step": 10332 - }, - { - "epoch": 0.776566962272659, - "grad_norm": 2.047247932763779, - "learning_rate": 5.012662389872762e-07, - "loss": 1.0235, - "step": 10333 - }, - { - "epoch": 0.7766421163384939, - "grad_norm": 2.039571998052949, - "learning_rate": 5.009439183902043e-07, - "loss": 0.9326, - "step": 10334 - }, - { - "epoch": 0.7767172704043289, - "grad_norm": 2.179259227393998, - "learning_rate": 5.006216866179458e-07, - "loss": 0.9372, - "step": 10335 - }, - { - "epoch": 0.7767924244701638, - "grad_norm": 1.5478608043888131, - "learning_rate": 5.002995436895938e-07, - "loss": 0.9932, - "step": 10336 - }, - { - "epoch": 0.7768675785359987, - "grad_norm": 1.8102769043430758, - "learning_rate": 4.999774896242368e-07, - "loss": 1.0471, - "step": 10337 - }, - { - "epoch": 0.7769427326018338, - "grad_norm": 1.5789895286285323, - "learning_rate": 4.996555244409575e-07, - "loss": 1.0262, - "step": 10338 - }, - { - "epoch": 0.7770178866676687, - "grad_norm": 0.9893207134648381, - "learning_rate": 4.993336481588331e-07, - "loss": 0.9649, - "step": 10339 - }, - { - "epoch": 0.7770930407335037, - "grad_norm": 1.8945051997645228, - "learning_rate": 4.990118607969367e-07, - "loss": 0.9223, - "step": 10340 - }, - { - "epoch": 0.7771681947993386, - "grad_norm": 2.547810295007641, - "learning_rate": 4.986901623743356e-07, - "loss": 0.9525, - "step": 10341 - }, - { - "epoch": 0.7772433488651737, - "grad_norm": 3.5335824740641306, - "learning_rate": 4.98368552910091e-07, - "loss": 0.9761, - "step": 10342 - }, - { - "epoch": 0.7773185029310086, - "grad_norm": 2.220998457542684, - "learning_rate": 4.9804703242326e-07, - "loss": 0.8736, - "step": 10343 - }, - { - "epoch": 0.7773936569968435, - "grad_norm": 1.7921931432154574, - "learning_rate": 4.97725600932893e-07, - "loss": 0.951, - "step": 10344 - }, - { - "epoch": 0.7774688110626785, - "grad_norm": 2.295400029143662, - "learning_rate": 4.974042584580372e-07, - "loss": 0.9827, - "step": 10345 - }, - { - "epoch": 0.7775439651285134, - "grad_norm": 2.2187080916624207, - "learning_rate": 4.97083005017733e-07, - "loss": 0.9433, - "step": 10346 - }, - { - "epoch": 0.7776191191943485, - "grad_norm": 1.685747154598898, - "learning_rate": 4.967618406310158e-07, - "loss": 0.9065, - "step": 10347 - }, - { - "epoch": 0.7776942732601834, - "grad_norm": 1.7181944209256343, - "learning_rate": 4.964407653169154e-07, - "loss": 1.0111, - "step": 10348 - }, - { - "epoch": 0.7777694273260184, - "grad_norm": 1.7520107570256995, - "learning_rate": 4.961197790944576e-07, - "loss": 0.8287, - "step": 10349 - }, - { - "epoch": 0.7778445813918533, - "grad_norm": 9.836636737332842, - "learning_rate": 4.957988819826617e-07, - "loss": 0.8894, - "step": 10350 - }, - { - "epoch": 0.7779197354576882, - "grad_norm": 1.517807401915972, - "learning_rate": 4.954780740005413e-07, - "loss": 0.9702, - "step": 10351 - }, - { - "epoch": 0.7779948895235232, - "grad_norm": 1.9818071586765242, - "learning_rate": 4.951573551671069e-07, - "loss": 0.9494, - "step": 10352 - }, - { - "epoch": 0.7780700435893582, - "grad_norm": 1.3370100924525388, - "learning_rate": 4.948367255013617e-07, - "loss": 0.9258, - "step": 10353 - }, - { - "epoch": 0.7781451976551932, - "grad_norm": 1.3431731915603997, - "learning_rate": 4.945161850223041e-07, - "loss": 0.9945, - "step": 10354 - }, - { - "epoch": 0.7782203517210281, - "grad_norm": 1.6540551667705823, - "learning_rate": 4.941957337489273e-07, - "loss": 1.0543, - "step": 10355 - }, - { - "epoch": 0.778295505786863, - "grad_norm": 1.843437232951772, - "learning_rate": 4.938753717002189e-07, - "loss": 0.9105, - "step": 10356 - }, - { - "epoch": 0.778370659852698, - "grad_norm": 1.6043427688576908, - "learning_rate": 4.935550988951627e-07, - "loss": 0.9987, - "step": 10357 - }, - { - "epoch": 0.778445813918533, - "grad_norm": 2.3757697414030385, - "learning_rate": 4.932349153527353e-07, - "loss": 0.9523, - "step": 10358 - }, - { - "epoch": 0.778520967984368, - "grad_norm": 1.7753582606287566, - "learning_rate": 4.929148210919092e-07, - "loss": 0.9076, - "step": 10359 - }, - { - "epoch": 0.7785961220502029, - "grad_norm": 4.950969431573773, - "learning_rate": 4.925948161316506e-07, - "loss": 0.9505, - "step": 10360 - }, - { - "epoch": 0.7786712761160379, - "grad_norm": 1.5620459850639308, - "learning_rate": 4.922749004909213e-07, - "loss": 0.8262, - "step": 10361 - }, - { - "epoch": 0.7787464301818728, - "grad_norm": 1.643354778859154, - "learning_rate": 4.919550741886777e-07, - "loss": 0.9965, - "step": 10362 - }, - { - "epoch": 0.7788215842477078, - "grad_norm": 2.704673808485755, - "learning_rate": 4.916353372438711e-07, - "loss": 1.0201, - "step": 10363 - }, - { - "epoch": 0.7788967383135428, - "grad_norm": 1.7076257155064845, - "learning_rate": 4.913156896754462e-07, - "loss": 0.9423, - "step": 10364 - }, - { - "epoch": 0.7789718923793777, - "grad_norm": 2.363804480269289, - "learning_rate": 4.909961315023441e-07, - "loss": 1.0415, - "step": 10365 - }, - { - "epoch": 0.7790470464452127, - "grad_norm": 2.1383490053089695, - "learning_rate": 4.90676662743499e-07, - "loss": 0.848, - "step": 10366 - }, - { - "epoch": 0.7791222005110476, - "grad_norm": 2.379036334510343, - "learning_rate": 4.903572834178417e-07, - "loss": 0.9112, - "step": 10367 - }, - { - "epoch": 0.7791973545768827, - "grad_norm": 1.655232314777717, - "learning_rate": 4.900379935442964e-07, - "loss": 0.9609, - "step": 10368 - }, - { - "epoch": 0.7792725086427176, - "grad_norm": 1.4959889666563533, - "learning_rate": 4.897187931417817e-07, - "loss": 0.9841, - "step": 10369 - }, - { - "epoch": 0.7793476627085525, - "grad_norm": 1.5835286906672261, - "learning_rate": 4.893996822292115e-07, - "loss": 0.9525, - "step": 10370 - }, - { - "epoch": 0.7794228167743875, - "grad_norm": 1.8380252736110854, - "learning_rate": 4.89080660825495e-07, - "loss": 0.9759, - "step": 10371 - }, - { - "epoch": 0.7794979708402224, - "grad_norm": 2.376887826014782, - "learning_rate": 4.887617289495349e-07, - "loss": 0.9356, - "step": 10372 - }, - { - "epoch": 0.7795731249060575, - "grad_norm": 1.7965739122321234, - "learning_rate": 4.884428866202288e-07, - "loss": 0.9592, - "step": 10373 - }, - { - "epoch": 0.7796482789718924, - "grad_norm": 1.8496189781833925, - "learning_rate": 4.881241338564706e-07, - "loss": 1.058, - "step": 10374 - }, - { - "epoch": 0.7797234330377273, - "grad_norm": 0.7510521581572172, - "learning_rate": 4.878054706771466e-07, - "loss": 0.8532, - "step": 10375 - }, - { - "epoch": 0.7797985871035623, - "grad_norm": 2.030257466718089, - "learning_rate": 4.87486897101139e-07, - "loss": 0.9835, - "step": 10376 - }, - { - "epoch": 0.7798737411693972, - "grad_norm": 2.70824718827403, - "learning_rate": 4.871684131473246e-07, - "loss": 0.9917, - "step": 10377 - }, - { - "epoch": 0.7799488952352323, - "grad_norm": 2.161422275717377, - "learning_rate": 4.868500188345744e-07, - "loss": 0.9642, - "step": 10378 - }, - { - "epoch": 0.7800240493010672, - "grad_norm": 2.0235403616546326, - "learning_rate": 4.865317141817551e-07, - "loss": 0.9655, - "step": 10379 - }, - { - "epoch": 0.7800992033669022, - "grad_norm": 1.8733632255128687, - "learning_rate": 4.862134992077274e-07, - "loss": 1.0212, - "step": 10380 - }, - { - "epoch": 0.7801743574327371, - "grad_norm": 22.61655736384712, - "learning_rate": 4.858953739313463e-07, - "loss": 1.0008, - "step": 10381 - }, - { - "epoch": 0.780249511498572, - "grad_norm": 2.5108050498378036, - "learning_rate": 4.855773383714623e-07, - "loss": 0.9359, - "step": 10382 - }, - { - "epoch": 0.780324665564407, - "grad_norm": 2.469897812185095, - "learning_rate": 4.852593925469198e-07, - "loss": 1.0276, - "step": 10383 - }, - { - "epoch": 0.780399819630242, - "grad_norm": 0.6954810334738318, - "learning_rate": 4.849415364765587e-07, - "loss": 0.8111, - "step": 10384 - }, - { - "epoch": 0.780474973696077, - "grad_norm": 2.9807685511283006, - "learning_rate": 4.846237701792136e-07, - "loss": 0.8816, - "step": 10385 - }, - { - "epoch": 0.7805501277619119, - "grad_norm": 1.3125445494584362, - "learning_rate": 4.843060936737125e-07, - "loss": 0.9703, - "step": 10386 - }, - { - "epoch": 0.7806252818277469, - "grad_norm": 2.422462736616733, - "learning_rate": 4.839885069788796e-07, - "loss": 1.0059, - "step": 10387 - }, - { - "epoch": 0.7807004358935818, - "grad_norm": 1.8891915806542536, - "learning_rate": 4.836710101135322e-07, - "loss": 1.0113, - "step": 10388 - }, - { - "epoch": 0.7807755899594168, - "grad_norm": 1.9571662901926425, - "learning_rate": 4.833536030964842e-07, - "loss": 0.8181, - "step": 10389 - }, - { - "epoch": 0.7808507440252518, - "grad_norm": 2.717803790422941, - "learning_rate": 4.830362859465431e-07, - "loss": 0.954, - "step": 10390 - }, - { - "epoch": 0.7809258980910867, - "grad_norm": 1.5283826333889037, - "learning_rate": 4.827190586825109e-07, - "loss": 1.0414, - "step": 10391 - }, - { - "epoch": 0.7810010521569217, - "grad_norm": 1.5953373203719283, - "learning_rate": 4.824019213231838e-07, - "loss": 1.0519, - "step": 10392 - }, - { - "epoch": 0.7810762062227566, - "grad_norm": 1.365724140121423, - "learning_rate": 4.820848738873549e-07, - "loss": 0.8529, - "step": 10393 - }, - { - "epoch": 0.7811513602885917, - "grad_norm": 2.011627847043588, - "learning_rate": 4.817679163938095e-07, - "loss": 1.0039, - "step": 10394 - }, - { - "epoch": 0.7812265143544266, - "grad_norm": 1.9733373089483954, - "learning_rate": 4.814510488613284e-07, - "loss": 1.0264, - "step": 10395 - }, - { - "epoch": 0.7813016684202615, - "grad_norm": 2.011780969342741, - "learning_rate": 4.811342713086881e-07, - "loss": 1.0043, - "step": 10396 - }, - { - "epoch": 0.7813768224860965, - "grad_norm": 1.5070553000010825, - "learning_rate": 4.808175837546582e-07, - "loss": 1.0135, - "step": 10397 - }, - { - "epoch": 0.7814519765519314, - "grad_norm": 1.9894739556996228, - "learning_rate": 4.805009862180038e-07, - "loss": 1.0604, - "step": 10398 - }, - { - "epoch": 0.7815271306177665, - "grad_norm": 1.5191141453360437, - "learning_rate": 4.801844787174847e-07, - "loss": 0.9068, - "step": 10399 - }, - { - "epoch": 0.7816022846836014, - "grad_norm": 0.8222543892353268, - "learning_rate": 4.798680612718544e-07, - "loss": 0.8556, - "step": 10400 - }, - { - "epoch": 0.7816774387494363, - "grad_norm": 3.4641587398445397, - "learning_rate": 4.795517338998629e-07, - "loss": 0.8823, - "step": 10401 - }, - { - "epoch": 0.7817525928152713, - "grad_norm": 1.4144247023226968, - "learning_rate": 4.792354966202534e-07, - "loss": 1.078, - "step": 10402 - }, - { - "epoch": 0.7818277468811062, - "grad_norm": 2.059767097343846, - "learning_rate": 4.78919349451764e-07, - "loss": 0.9649, - "step": 10403 - }, - { - "epoch": 0.7819029009469413, - "grad_norm": 4.597455664499238, - "learning_rate": 4.78603292413128e-07, - "loss": 0.8771, - "step": 10404 - }, - { - "epoch": 0.7819780550127762, - "grad_norm": 1.681212119203969, - "learning_rate": 4.78287325523072e-07, - "loss": 0.8812, - "step": 10405 - }, - { - "epoch": 0.7820532090786112, - "grad_norm": 1.9634612966695542, - "learning_rate": 4.779714488003197e-07, - "loss": 0.996, - "step": 10406 - }, - { - "epoch": 0.7821283631444461, - "grad_norm": 1.7134452800445392, - "learning_rate": 4.776556622635872e-07, - "loss": 0.9419, - "step": 10407 - }, - { - "epoch": 0.782203517210281, - "grad_norm": 1.875482179313492, - "learning_rate": 4.77339965931586e-07, - "loss": 0.9567, - "step": 10408 - }, - { - "epoch": 0.782278671276116, - "grad_norm": 2.476749160422994, - "learning_rate": 4.770243598230228e-07, - "loss": 1.0251, - "step": 10409 - }, - { - "epoch": 0.782353825341951, - "grad_norm": 1.6968828654238677, - "learning_rate": 4.7670884395659737e-07, - "loss": 1.009, - "step": 10410 - }, - { - "epoch": 0.782428979407786, - "grad_norm": 0.7548938666517614, - "learning_rate": 4.7639341835100654e-07, - "loss": 0.8904, - "step": 10411 - }, - { - "epoch": 0.7825041334736209, - "grad_norm": 3.8465807655787763, - "learning_rate": 4.7607808302494003e-07, - "loss": 0.8888, - "step": 10412 - }, - { - "epoch": 0.7825792875394559, - "grad_norm": 1.7402550624384143, - "learning_rate": 4.757628379970826e-07, - "loss": 0.9125, - "step": 10413 - }, - { - "epoch": 0.7826544416052909, - "grad_norm": 1.4960039475383797, - "learning_rate": 4.7544768328611317e-07, - "loss": 0.9649, - "step": 10414 - }, - { - "epoch": 0.7827295956711258, - "grad_norm": 1.6400350327355369, - "learning_rate": 4.7513261891070676e-07, - "loss": 0.9337, - "step": 10415 - }, - { - "epoch": 0.7828047497369608, - "grad_norm": 1.666917742255205, - "learning_rate": 4.74817644889532e-07, - "loss": 0.9151, - "step": 10416 - }, - { - "epoch": 0.7828799038027957, - "grad_norm": 1.831821388962079, - "learning_rate": 4.7450276124125153e-07, - "loss": 0.9836, - "step": 10417 - }, - { - "epoch": 0.7829550578686307, - "grad_norm": 8.866231105498732, - "learning_rate": 4.741879679845244e-07, - "loss": 0.9841, - "step": 10418 - }, - { - "epoch": 0.7830302119344656, - "grad_norm": 1.8339663048647188, - "learning_rate": 4.738732651380031e-07, - "loss": 0.9332, - "step": 10419 - }, - { - "epoch": 0.7831053660003006, - "grad_norm": 1.8343928555556996, - "learning_rate": 4.7355865272033455e-07, - "loss": 0.9266, - "step": 10420 - }, - { - "epoch": 0.7831805200661356, - "grad_norm": 2.0103174162036206, - "learning_rate": 4.7324413075016114e-07, - "loss": 0.976, - "step": 10421 - }, - { - "epoch": 0.7832556741319705, - "grad_norm": 1.7796966070221814, - "learning_rate": 4.729296992461187e-07, - "loss": 0.946, - "step": 10422 - }, - { - "epoch": 0.7833308281978055, - "grad_norm": 1.4260198524131136, - "learning_rate": 4.726153582268397e-07, - "loss": 0.9567, - "step": 10423 - }, - { - "epoch": 0.7834059822636404, - "grad_norm": 3.8093259762871075, - "learning_rate": 4.7230110771094933e-07, - "loss": 1.0174, - "step": 10424 - }, - { - "epoch": 0.7834811363294755, - "grad_norm": 1.4203362994828161, - "learning_rate": 4.7198694771706836e-07, - "loss": 0.9252, - "step": 10425 - }, - { - "epoch": 0.7835562903953104, - "grad_norm": 2.63177515076485, - "learning_rate": 4.7167287826381196e-07, - "loss": 1.0033, - "step": 10426 - }, - { - "epoch": 0.7836314444611453, - "grad_norm": 1.6464556310278262, - "learning_rate": 4.713588993697892e-07, - "loss": 0.9971, - "step": 10427 - }, - { - "epoch": 0.7837065985269803, - "grad_norm": 1.5550462318490297, - "learning_rate": 4.7104501105360594e-07, - "loss": 1.019, - "step": 10428 - }, - { - "epoch": 0.7837817525928152, - "grad_norm": 1.571357948400667, - "learning_rate": 4.7073121333386056e-07, - "loss": 1.0859, - "step": 10429 - }, - { - "epoch": 0.7838569066586503, - "grad_norm": 2.025964756725942, - "learning_rate": 4.7041750622914645e-07, - "loss": 0.9319, - "step": 10430 - }, - { - "epoch": 0.7839320607244852, - "grad_norm": 6.653692336147018, - "learning_rate": 4.701038897580525e-07, - "loss": 0.9726, - "step": 10431 - }, - { - "epoch": 0.7840072147903202, - "grad_norm": 2.038987666251319, - "learning_rate": 4.6979036393916093e-07, - "loss": 0.9823, - "step": 10432 - }, - { - "epoch": 0.7840823688561551, - "grad_norm": 2.7068953486866563, - "learning_rate": 4.694769287910503e-07, - "loss": 0.9354, - "step": 10433 - }, - { - "epoch": 0.78415752292199, - "grad_norm": 27.50325797462457, - "learning_rate": 4.6916358433229233e-07, - "loss": 0.9731, - "step": 10434 - }, - { - "epoch": 0.7842326769878251, - "grad_norm": 1.8088833471994912, - "learning_rate": 4.688503305814542e-07, - "loss": 1.0039, - "step": 10435 - }, - { - "epoch": 0.78430783105366, - "grad_norm": 1.6700944777849496, - "learning_rate": 4.6853716755709635e-07, - "loss": 1.0095, - "step": 10436 - }, - { - "epoch": 0.784382985119495, - "grad_norm": 2.312467213991085, - "learning_rate": 4.682240952777763e-07, - "loss": 0.858, - "step": 10437 - }, - { - "epoch": 0.7844581391853299, - "grad_norm": 1.4907187378397977, - "learning_rate": 4.679111137620442e-07, - "loss": 1.0043, - "step": 10438 - }, - { - "epoch": 0.7845332932511649, - "grad_norm": 1.9590976792616615, - "learning_rate": 4.675982230284448e-07, - "loss": 0.9869, - "step": 10439 - }, - { - "epoch": 0.7846084473169999, - "grad_norm": 3.457948241950181, - "learning_rate": 4.6728542309551923e-07, - "loss": 0.9498, - "step": 10440 - }, - { - "epoch": 0.7846836013828348, - "grad_norm": 1.8745004624279447, - "learning_rate": 4.669727139818014e-07, - "loss": 0.9462, - "step": 10441 - }, - { - "epoch": 0.7847587554486698, - "grad_norm": 3.3091080771371484, - "learning_rate": 4.6666009570582064e-07, - "loss": 0.9094, - "step": 10442 - }, - { - "epoch": 0.7848339095145047, - "grad_norm": 1.764672419053115, - "learning_rate": 4.663475682861009e-07, - "loss": 0.991, - "step": 10443 - }, - { - "epoch": 0.7849090635803397, - "grad_norm": 1.54396700858906, - "learning_rate": 4.6603513174115973e-07, - "loss": 1.0413, - "step": 10444 - }, - { - "epoch": 0.7849842176461747, - "grad_norm": 5.059890828217685, - "learning_rate": 4.6572278608951165e-07, - "loss": 0.9142, - "step": 10445 - }, - { - "epoch": 0.7850593717120096, - "grad_norm": 1.6842795831551833, - "learning_rate": 4.654105313496637e-07, - "loss": 0.9936, - "step": 10446 - }, - { - "epoch": 0.7851345257778446, - "grad_norm": 2.4402803067714283, - "learning_rate": 4.6509836754011787e-07, - "loss": 0.9851, - "step": 10447 - }, - { - "epoch": 0.7852096798436795, - "grad_norm": 1.7469694599960628, - "learning_rate": 4.647862946793715e-07, - "loss": 0.9822, - "step": 10448 - }, - { - "epoch": 0.7852848339095145, - "grad_norm": 2.247893300846859, - "learning_rate": 4.644743127859152e-07, - "loss": 0.9195, - "step": 10449 - }, - { - "epoch": 0.7853599879753494, - "grad_norm": 1.7812885481026264, - "learning_rate": 4.641624218782365e-07, - "loss": 0.9614, - "step": 10450 - }, - { - "epoch": 0.7854351420411845, - "grad_norm": 1.6445900369679411, - "learning_rate": 4.6385062197481527e-07, - "loss": 0.8524, - "step": 10451 - }, - { - "epoch": 0.7855102961070194, - "grad_norm": 2.6250669380009333, - "learning_rate": 4.635389130941272e-07, - "loss": 0.9144, - "step": 10452 - }, - { - "epoch": 0.7855854501728543, - "grad_norm": 2.1247740232700933, - "learning_rate": 4.6322729525464185e-07, - "loss": 0.9656, - "step": 10453 - }, - { - "epoch": 0.7856606042386893, - "grad_norm": 2.1659308675791857, - "learning_rate": 4.629157684748233e-07, - "loss": 0.9847, - "step": 10454 - }, - { - "epoch": 0.7857357583045242, - "grad_norm": 2.9930200439454917, - "learning_rate": 4.6260433277313215e-07, - "loss": 0.994, - "step": 10455 - }, - { - "epoch": 0.7858109123703593, - "grad_norm": 1.8516723986496035, - "learning_rate": 4.6229298816802066e-07, - "loss": 0.9653, - "step": 10456 - }, - { - "epoch": 0.7858860664361942, - "grad_norm": 1.4356759149592881, - "learning_rate": 4.619817346779391e-07, - "loss": 0.8976, - "step": 10457 - }, - { - "epoch": 0.7859612205020292, - "grad_norm": 2.232519379603222, - "learning_rate": 4.6167057232132787e-07, - "loss": 0.971, - "step": 10458 - }, - { - "epoch": 0.7860363745678641, - "grad_norm": 1.3782289046304899, - "learning_rate": 4.613595011166267e-07, - "loss": 0.8955, - "step": 10459 - }, - { - "epoch": 0.786111528633699, - "grad_norm": 2.513631563967516, - "learning_rate": 4.61048521082267e-07, - "loss": 0.9534, - "step": 10460 - }, - { - "epoch": 0.7861866826995341, - "grad_norm": 1.7327021186145055, - "learning_rate": 4.6073763223667474e-07, - "loss": 0.9452, - "step": 10461 - }, - { - "epoch": 0.786261836765369, - "grad_norm": 4.449801948512029, - "learning_rate": 4.6042683459827245e-07, - "loss": 0.999, - "step": 10462 - }, - { - "epoch": 0.786336990831204, - "grad_norm": 1.427249601753301, - "learning_rate": 4.6011612818547597e-07, - "loss": 0.9166, - "step": 10463 - }, - { - "epoch": 0.7864121448970389, - "grad_norm": 1.7396415274322106, - "learning_rate": 4.5980551301669535e-07, - "loss": 0.9971, - "step": 10464 - }, - { - "epoch": 0.7864872989628738, - "grad_norm": 2.5120528549173557, - "learning_rate": 4.5949498911033566e-07, - "loss": 1.0347, - "step": 10465 - }, - { - "epoch": 0.7865624530287089, - "grad_norm": 2.1797076261951647, - "learning_rate": 4.5918455648479647e-07, - "loss": 1.0008, - "step": 10466 - }, - { - "epoch": 0.7866376070945438, - "grad_norm": 1.6881647566711324, - "learning_rate": 4.58874215158473e-07, - "loss": 0.9973, - "step": 10467 - }, - { - "epoch": 0.7867127611603788, - "grad_norm": 2.329783111595673, - "learning_rate": 4.585639651497539e-07, - "loss": 0.882, - "step": 10468 - }, - { - "epoch": 0.7867879152262137, - "grad_norm": 1.7639476269064085, - "learning_rate": 4.5825380647702207e-07, - "loss": 1.0399, - "step": 10469 - }, - { - "epoch": 0.7868630692920487, - "grad_norm": 0.6575300584384073, - "learning_rate": 4.5794373915865625e-07, - "loss": 0.8351, - "step": 10470 - }, - { - "epoch": 0.7869382233578837, - "grad_norm": 1.845538032050974, - "learning_rate": 4.5763376321302804e-07, - "loss": 1.0136, - "step": 10471 - }, - { - "epoch": 0.7870133774237186, - "grad_norm": 1.8148696799033115, - "learning_rate": 4.573238786585061e-07, - "loss": 0.9513, - "step": 10472 - }, - { - "epoch": 0.7870885314895536, - "grad_norm": 1.6388541701064328, - "learning_rate": 4.5701408551345166e-07, - "loss": 0.9799, - "step": 10473 - }, - { - "epoch": 0.7871636855553885, - "grad_norm": 1.9048340338151812, - "learning_rate": 4.56704383796221e-07, - "loss": 0.867, - "step": 10474 - }, - { - "epoch": 0.7872388396212235, - "grad_norm": 1.8969898120001265, - "learning_rate": 4.5639477352516543e-07, - "loss": 0.9606, - "step": 10475 - }, - { - "epoch": 0.7873139936870585, - "grad_norm": 1.8693975034919705, - "learning_rate": 4.560852547186298e-07, - "loss": 0.9819, - "step": 10476 - }, - { - "epoch": 0.7873891477528935, - "grad_norm": 1.6259580868882761, - "learning_rate": 4.5577582739495545e-07, - "loss": 0.9736, - "step": 10477 - }, - { - "epoch": 0.7874643018187284, - "grad_norm": 1.9716366994176073, - "learning_rate": 4.5546649157247597e-07, - "loss": 0.984, - "step": 10478 - }, - { - "epoch": 0.7875394558845633, - "grad_norm": 1.5652572717745992, - "learning_rate": 4.551572472695224e-07, - "loss": 1.0033, - "step": 10479 - }, - { - "epoch": 0.7876146099503983, - "grad_norm": 2.566294582033579, - "learning_rate": 4.548480945044164e-07, - "loss": 0.8761, - "step": 10480 - }, - { - "epoch": 0.7876897640162333, - "grad_norm": 2.6857159037715594, - "learning_rate": 4.5453903329547816e-07, - "loss": 0.9716, - "step": 10481 - }, - { - "epoch": 0.7877649180820683, - "grad_norm": 2.7058675447288065, - "learning_rate": 4.5423006366102015e-07, - "loss": 1.086, - "step": 10482 - }, - { - "epoch": 0.7878400721479032, - "grad_norm": 1.5133888029163969, - "learning_rate": 4.539211856193494e-07, - "loss": 0.9584, - "step": 10483 - }, - { - "epoch": 0.7879152262137382, - "grad_norm": 1.621257580671036, - "learning_rate": 4.5361239918876946e-07, - "loss": 0.9983, - "step": 10484 - }, - { - "epoch": 0.7879903802795731, - "grad_norm": 1.799867079383499, - "learning_rate": 4.5330370438757624e-07, - "loss": 0.8962, - "step": 10485 - }, - { - "epoch": 0.788065534345408, - "grad_norm": 1.49303735628298, - "learning_rate": 4.5299510123406115e-07, - "loss": 0.9702, - "step": 10486 - }, - { - "epoch": 0.7881406884112431, - "grad_norm": 1.6810171143286352, - "learning_rate": 4.5268658974651044e-07, - "loss": 0.9794, - "step": 10487 - }, - { - "epoch": 0.788215842477078, - "grad_norm": 0.7971010448510698, - "learning_rate": 4.5237816994320365e-07, - "loss": 0.8343, - "step": 10488 - }, - { - "epoch": 0.788290996542913, - "grad_norm": 1.948816536971962, - "learning_rate": 4.5206984184241715e-07, - "loss": 0.984, - "step": 10489 - }, - { - "epoch": 0.7883661506087479, - "grad_norm": 1.8740345694652112, - "learning_rate": 4.517616054624198e-07, - "loss": 0.9124, - "step": 10490 - }, - { - "epoch": 0.7884413046745828, - "grad_norm": 1.5344716649576366, - "learning_rate": 4.5145346082147594e-07, - "loss": 0.9008, - "step": 10491 - }, - { - "epoch": 0.7885164587404179, - "grad_norm": 1.6131248860172556, - "learning_rate": 4.511454079378445e-07, - "loss": 0.9331, - "step": 10492 - }, - { - "epoch": 0.7885916128062528, - "grad_norm": 1.6432950771874837, - "learning_rate": 4.5083744682977775e-07, - "loss": 0.9438, - "step": 10493 - }, - { - "epoch": 0.7886667668720878, - "grad_norm": 1.7464212882725612, - "learning_rate": 4.505295775155251e-07, - "loss": 0.9006, - "step": 10494 - }, - { - "epoch": 0.7887419209379227, - "grad_norm": 1.6333552284459956, - "learning_rate": 4.502218000133284e-07, - "loss": 0.8736, - "step": 10495 - }, - { - "epoch": 0.7888170750037578, - "grad_norm": 2.4352307638370596, - "learning_rate": 4.4991411434142445e-07, - "loss": 0.8727, - "step": 10496 - }, - { - "epoch": 0.7888922290695927, - "grad_norm": 2.019682238485031, - "learning_rate": 4.49606520518045e-07, - "loss": 0.8926, - "step": 10497 - }, - { - "epoch": 0.7889673831354276, - "grad_norm": 6.630126121552689, - "learning_rate": 4.492990185614154e-07, - "loss": 0.8582, - "step": 10498 - }, - { - "epoch": 0.7890425372012626, - "grad_norm": 4.525173559074402, - "learning_rate": 4.489916084897576e-07, - "loss": 1.079, - "step": 10499 - }, - { - "epoch": 0.7891176912670975, - "grad_norm": 1.5757374596173168, - "learning_rate": 4.4868429032128575e-07, - "loss": 0.9606, - "step": 10500 - }, - { - "epoch": 0.7891928453329325, - "grad_norm": 1.6427530202389373, - "learning_rate": 4.483770640742104e-07, - "loss": 0.9926, - "step": 10501 - }, - { - "epoch": 0.7892679993987675, - "grad_norm": 2.718999741582658, - "learning_rate": 4.480699297667356e-07, - "loss": 0.9481, - "step": 10502 - }, - { - "epoch": 0.7893431534646025, - "grad_norm": 2.10207261485104, - "learning_rate": 4.4776288741706047e-07, - "loss": 0.9849, - "step": 10503 - }, - { - "epoch": 0.7894183075304374, - "grad_norm": 1.438145865596116, - "learning_rate": 4.474559370433779e-07, - "loss": 0.9328, - "step": 10504 - }, - { - "epoch": 0.7894934615962723, - "grad_norm": 3.6955122460219294, - "learning_rate": 4.4714907866387565e-07, - "loss": 0.9993, - "step": 10505 - }, - { - "epoch": 0.7895686156621073, - "grad_norm": 1.5620091239428275, - "learning_rate": 4.468423122967373e-07, - "loss": 0.8817, - "step": 10506 - }, - { - "epoch": 0.7896437697279423, - "grad_norm": 3.7874986705211158, - "learning_rate": 4.465356379601395e-07, - "loss": 0.9697, - "step": 10507 - }, - { - "epoch": 0.7897189237937773, - "grad_norm": 0.8737664724060622, - "learning_rate": 4.462290556722537e-07, - "loss": 0.9289, - "step": 10508 - }, - { - "epoch": 0.7897940778596122, - "grad_norm": 1.4157707897170762, - "learning_rate": 4.4592256545124616e-07, - "loss": 1.0218, - "step": 10509 - }, - { - "epoch": 0.7898692319254471, - "grad_norm": 1.5999616856755923, - "learning_rate": 4.4561616731527695e-07, - "loss": 0.8812, - "step": 10510 - }, - { - "epoch": 0.7899443859912821, - "grad_norm": 1.518317122444406, - "learning_rate": 4.4530986128250257e-07, - "loss": 0.9725, - "step": 10511 - }, - { - "epoch": 0.790019540057117, - "grad_norm": 1.4695005731052606, - "learning_rate": 4.450036473710721e-07, - "loss": 1.0009, - "step": 10512 - }, - { - "epoch": 0.7900946941229521, - "grad_norm": 1.5517528958779883, - "learning_rate": 4.446975255991301e-07, - "loss": 0.9618, - "step": 10513 - }, - { - "epoch": 0.790169848188787, - "grad_norm": 1.8472717401214138, - "learning_rate": 4.443914959848154e-07, - "loss": 0.9686, - "step": 10514 - }, - { - "epoch": 0.790245002254622, - "grad_norm": 1.7669165284627024, - "learning_rate": 4.4408555854626085e-07, - "loss": 0.9044, - "step": 10515 - }, - { - "epoch": 0.7903201563204569, - "grad_norm": 2.095454874115777, - "learning_rate": 4.437797133015955e-07, - "loss": 0.998, - "step": 10516 - }, - { - "epoch": 0.7903953103862918, - "grad_norm": 16.08425842681673, - "learning_rate": 4.434739602689412e-07, - "loss": 0.918, - "step": 10517 - }, - { - "epoch": 0.7904704644521269, - "grad_norm": 1.462932835620578, - "learning_rate": 4.43168299466415e-07, - "loss": 0.9346, - "step": 10518 - }, - { - "epoch": 0.7905456185179618, - "grad_norm": 2.2141622473754348, - "learning_rate": 4.428627309121287e-07, - "loss": 0.9927, - "step": 10519 - }, - { - "epoch": 0.7906207725837968, - "grad_norm": 2.2246888961176747, - "learning_rate": 4.425572546241878e-07, - "loss": 0.9921, - "step": 10520 - }, - { - "epoch": 0.7906959266496317, - "grad_norm": 2.511935069022507, - "learning_rate": 4.422518706206939e-07, - "loss": 1.0006, - "step": 10521 - }, - { - "epoch": 0.7907710807154668, - "grad_norm": 2.171098748771346, - "learning_rate": 4.4194657891974097e-07, - "loss": 1.0018, - "step": 10522 - }, - { - "epoch": 0.7908462347813017, - "grad_norm": 0.744790141675406, - "learning_rate": 4.416413795394203e-07, - "loss": 0.8191, - "step": 10523 - }, - { - "epoch": 0.7909213888471366, - "grad_norm": 1.8870835988095158, - "learning_rate": 4.413362724978149e-07, - "loss": 0.9259, - "step": 10524 - }, - { - "epoch": 0.7909965429129716, - "grad_norm": 1.4711423018066325, - "learning_rate": 4.41031257813004e-07, - "loss": 0.9403, - "step": 10525 - }, - { - "epoch": 0.7910716969788065, - "grad_norm": 2.842796920527364, - "learning_rate": 4.407263355030608e-07, - "loss": 1.0005, - "step": 10526 - }, - { - "epoch": 0.7911468510446416, - "grad_norm": 1.7760834302121673, - "learning_rate": 4.404215055860525e-07, - "loss": 1.0211, - "step": 10527 - }, - { - "epoch": 0.7912220051104765, - "grad_norm": 3.0141692922290875, - "learning_rate": 4.4011676808004263e-07, - "loss": 0.9198, - "step": 10528 - }, - { - "epoch": 0.7912971591763115, - "grad_norm": 2.103474481121821, - "learning_rate": 4.398121230030876e-07, - "loss": 0.8858, - "step": 10529 - }, - { - "epoch": 0.7913723132421464, - "grad_norm": 1.5508807940688087, - "learning_rate": 4.3950757037323826e-07, - "loss": 0.9757, - "step": 10530 - }, - { - "epoch": 0.7914474673079813, - "grad_norm": 2.522090587831361, - "learning_rate": 4.3920311020854117e-07, - "loss": 1.0495, - "step": 10531 - }, - { - "epoch": 0.7915226213738163, - "grad_norm": 1.5786055031657669, - "learning_rate": 4.3889874252703585e-07, - "loss": 0.9804, - "step": 10532 - }, - { - "epoch": 0.7915977754396513, - "grad_norm": 1.658650368269014, - "learning_rate": 4.385944673467585e-07, - "loss": 0.9916, - "step": 10533 - }, - { - "epoch": 0.7916729295054863, - "grad_norm": 2.596720637428612, - "learning_rate": 4.3829028468573793e-07, - "loss": 0.8604, - "step": 10534 - }, - { - "epoch": 0.7917480835713212, - "grad_norm": 2.27362468774722, - "learning_rate": 4.3798619456199803e-07, - "loss": 0.9776, - "step": 10535 - }, - { - "epoch": 0.7918232376371561, - "grad_norm": 1.7881366716288636, - "learning_rate": 4.376821969935578e-07, - "loss": 0.9813, - "step": 10536 - }, - { - "epoch": 0.7918983917029911, - "grad_norm": 1.8130814507562576, - "learning_rate": 4.3737829199842903e-07, - "loss": 0.8224, - "step": 10537 - }, - { - "epoch": 0.7919735457688261, - "grad_norm": 1.5779260944373237, - "learning_rate": 4.3707447959462087e-07, - "loss": 1.0023, - "step": 10538 - }, - { - "epoch": 0.7920486998346611, - "grad_norm": 1.7522139169093944, - "learning_rate": 4.3677075980013465e-07, - "loss": 0.9644, - "step": 10539 - }, - { - "epoch": 0.792123853900496, - "grad_norm": 1.7749721927882445, - "learning_rate": 4.3646713263296677e-07, - "loss": 0.9566, - "step": 10540 - }, - { - "epoch": 0.792199007966331, - "grad_norm": 1.8594822572178087, - "learning_rate": 4.3616359811110847e-07, - "loss": 0.9161, - "step": 10541 - }, - { - "epoch": 0.7922741620321659, - "grad_norm": 1.806707531489145, - "learning_rate": 4.35860156252545e-07, - "loss": 0.9652, - "step": 10542 - }, - { - "epoch": 0.7923493160980009, - "grad_norm": 5.405598331049986, - "learning_rate": 4.355568070752571e-07, - "loss": 1.0129, - "step": 10543 - }, - { - "epoch": 0.7924244701638359, - "grad_norm": 1.6528219338225538, - "learning_rate": 4.352535505972186e-07, - "loss": 0.9891, - "step": 10544 - }, - { - "epoch": 0.7924996242296708, - "grad_norm": 1.5904237781046193, - "learning_rate": 4.349503868363993e-07, - "loss": 0.9898, - "step": 10545 - }, - { - "epoch": 0.7925747782955058, - "grad_norm": 0.8264140322054301, - "learning_rate": 4.346473158107629e-07, - "loss": 0.8708, - "step": 10546 - }, - { - "epoch": 0.7926499323613407, - "grad_norm": 1.420042009269518, - "learning_rate": 4.3434433753826696e-07, - "loss": 1.0166, - "step": 10547 - }, - { - "epoch": 0.7927250864271758, - "grad_norm": 2.1486868418875593, - "learning_rate": 4.340414520368645e-07, - "loss": 0.9215, - "step": 10548 - }, - { - "epoch": 0.7928002404930107, - "grad_norm": 1.706216019512145, - "learning_rate": 4.3373865932450184e-07, - "loss": 0.968, - "step": 10549 - }, - { - "epoch": 0.7928753945588456, - "grad_norm": 2.0929525194610274, - "learning_rate": 4.334359594191217e-07, - "loss": 0.9358, - "step": 10550 - }, - { - "epoch": 0.7929505486246806, - "grad_norm": 0.6115944864039514, - "learning_rate": 4.3313335233865976e-07, - "loss": 0.7589, - "step": 10551 - }, - { - "epoch": 0.7930257026905155, - "grad_norm": 2.08449821965834, - "learning_rate": 4.328308381010466e-07, - "loss": 0.9955, - "step": 10552 - }, - { - "epoch": 0.7931008567563506, - "grad_norm": 1.7110659059041338, - "learning_rate": 4.325284167242076e-07, - "loss": 0.91, - "step": 10553 - }, - { - "epoch": 0.7931760108221855, - "grad_norm": 3.4925880970421614, - "learning_rate": 4.3222608822606134e-07, - "loss": 0.8987, - "step": 10554 - }, - { - "epoch": 0.7932511648880204, - "grad_norm": 1.6850533586198018, - "learning_rate": 4.3192385262452344e-07, - "loss": 0.9478, - "step": 10555 - }, - { - "epoch": 0.7933263189538554, - "grad_norm": 1.722927911145912, - "learning_rate": 4.316217099375017e-07, - "loss": 0.9657, - "step": 10556 - }, - { - "epoch": 0.7934014730196903, - "grad_norm": 1.6853341225811362, - "learning_rate": 4.3131966018289946e-07, - "loss": 0.9865, - "step": 10557 - }, - { - "epoch": 0.7934766270855254, - "grad_norm": 4.337651424348249, - "learning_rate": 4.3101770337861445e-07, - "loss": 0.9848, - "step": 10558 - }, - { - "epoch": 0.7935517811513603, - "grad_norm": 6.300196375132633, - "learning_rate": 4.3071583954253765e-07, - "loss": 0.9007, - "step": 10559 - }, - { - "epoch": 0.7936269352171953, - "grad_norm": 1.6336249561512672, - "learning_rate": 4.3041406869255726e-07, - "loss": 0.9806, - "step": 10560 - }, - { - "epoch": 0.7937020892830302, - "grad_norm": 1.4267141138339225, - "learning_rate": 4.301123908465536e-07, - "loss": 0.9107, - "step": 10561 - }, - { - "epoch": 0.7937772433488651, - "grad_norm": 1.5433801030239693, - "learning_rate": 4.298108060224024e-07, - "loss": 0.9358, - "step": 10562 - }, - { - "epoch": 0.7938523974147002, - "grad_norm": 1.5107783268241286, - "learning_rate": 4.295093142379735e-07, - "loss": 0.9592, - "step": 10563 - }, - { - "epoch": 0.7939275514805351, - "grad_norm": 1.4486668607802655, - "learning_rate": 4.29207915511131e-07, - "loss": 1.0129, - "step": 10564 - }, - { - "epoch": 0.7940027055463701, - "grad_norm": 2.2249705194813334, - "learning_rate": 4.289066098597349e-07, - "loss": 1.0065, - "step": 10565 - }, - { - "epoch": 0.794077859612205, - "grad_norm": 2.1429277272179736, - "learning_rate": 4.286053973016379e-07, - "loss": 1.0391, - "step": 10566 - }, - { - "epoch": 0.79415301367804, - "grad_norm": 0.6723408303983092, - "learning_rate": 4.28304277854689e-07, - "loss": 0.8416, - "step": 10567 - }, - { - "epoch": 0.794228167743875, - "grad_norm": 1.5047962280159237, - "learning_rate": 4.2800325153673e-07, - "loss": 0.953, - "step": 10568 - }, - { - "epoch": 0.7943033218097099, - "grad_norm": 1.8893975931697604, - "learning_rate": 4.277023183655977e-07, - "loss": 0.8647, - "step": 10569 - }, - { - "epoch": 0.7943784758755449, - "grad_norm": 1.7058976728378175, - "learning_rate": 4.27401478359124e-07, - "loss": 0.8594, - "step": 10570 - }, - { - "epoch": 0.7944536299413798, - "grad_norm": 2.650550202166815, - "learning_rate": 4.2710073153513404e-07, - "loss": 1.0364, - "step": 10571 - }, - { - "epoch": 0.7945287840072148, - "grad_norm": 2.924312288998291, - "learning_rate": 4.268000779114491e-07, - "loss": 0.9665, - "step": 10572 - }, - { - "epoch": 0.7946039380730497, - "grad_norm": 2.6580523490888637, - "learning_rate": 4.264995175058841e-07, - "loss": 0.9849, - "step": 10573 - }, - { - "epoch": 0.7946790921388848, - "grad_norm": 0.7838187901486543, - "learning_rate": 4.261990503362478e-07, - "loss": 0.8816, - "step": 10574 - }, - { - "epoch": 0.7947542462047197, - "grad_norm": 2.1709212809470655, - "learning_rate": 4.2589867642034427e-07, - "loss": 0.9586, - "step": 10575 - }, - { - "epoch": 0.7948294002705546, - "grad_norm": 3.8230161870463557, - "learning_rate": 4.255983957759712e-07, - "loss": 1.0079, - "step": 10576 - }, - { - "epoch": 0.7949045543363896, - "grad_norm": 2.288837929603834, - "learning_rate": 4.252982084209225e-07, - "loss": 1.017, - "step": 10577 - }, - { - "epoch": 0.7949797084022245, - "grad_norm": 1.3768876729789779, - "learning_rate": 4.24998114372985e-07, - "loss": 0.9397, - "step": 10578 - }, - { - "epoch": 0.7950548624680596, - "grad_norm": 1.968202302831344, - "learning_rate": 4.2469811364994037e-07, - "loss": 0.9294, - "step": 10579 - }, - { - "epoch": 0.7951300165338945, - "grad_norm": 2.0049580867886125, - "learning_rate": 4.2439820626956455e-07, - "loss": 0.9929, - "step": 10580 - }, - { - "epoch": 0.7952051705997294, - "grad_norm": 1.762789381903863, - "learning_rate": 4.2409839224962795e-07, - "loss": 0.9666, - "step": 10581 - }, - { - "epoch": 0.7952803246655644, - "grad_norm": 1.3535218782266716, - "learning_rate": 4.237986716078965e-07, - "loss": 0.9429, - "step": 10582 - }, - { - "epoch": 0.7953554787313993, - "grad_norm": 1.6495849809913792, - "learning_rate": 4.234990443621298e-07, - "loss": 0.9789, - "step": 10583 - }, - { - "epoch": 0.7954306327972344, - "grad_norm": 1.4238948518582963, - "learning_rate": 4.2319951053008116e-07, - "loss": 0.9538, - "step": 10584 - }, - { - "epoch": 0.7955057868630693, - "grad_norm": 1.7584135257218099, - "learning_rate": 4.229000701294998e-07, - "loss": 0.9125, - "step": 10585 - }, - { - "epoch": 0.7955809409289043, - "grad_norm": 1.7571449877341685, - "learning_rate": 4.2260072317812766e-07, - "loss": 0.9101, - "step": 10586 - }, - { - "epoch": 0.7956560949947392, - "grad_norm": 1.9154071677786124, - "learning_rate": 4.223014696937035e-07, - "loss": 0.8663, - "step": 10587 - }, - { - "epoch": 0.7957312490605741, - "grad_norm": 1.9275369127551278, - "learning_rate": 4.220023096939582e-07, - "loss": 0.8876, - "step": 10588 - }, - { - "epoch": 0.7958064031264092, - "grad_norm": 3.7153359777976314, - "learning_rate": 4.217032431966192e-07, - "loss": 0.9182, - "step": 10589 - }, - { - "epoch": 0.7958815571922441, - "grad_norm": 1.6922355187137839, - "learning_rate": 4.214042702194067e-07, - "loss": 0.9539, - "step": 10590 - }, - { - "epoch": 0.7959567112580791, - "grad_norm": 1.4773603433355955, - "learning_rate": 4.211053907800359e-07, - "loss": 0.9522, - "step": 10591 - }, - { - "epoch": 0.796031865323914, - "grad_norm": 1.9351474877268067, - "learning_rate": 4.208066048962169e-07, - "loss": 0.9764, - "step": 10592 - }, - { - "epoch": 0.796107019389749, - "grad_norm": 1.931683083772494, - "learning_rate": 4.20507912585653e-07, - "loss": 1.0185, - "step": 10593 - }, - { - "epoch": 0.796182173455584, - "grad_norm": 2.8769131804656554, - "learning_rate": 4.202093138660443e-07, - "loss": 0.9498, - "step": 10594 - }, - { - "epoch": 0.7962573275214189, - "grad_norm": 1.7772668172094568, - "learning_rate": 4.199108087550829e-07, - "loss": 0.9769, - "step": 10595 - }, - { - "epoch": 0.7963324815872539, - "grad_norm": 2.023856336923069, - "learning_rate": 4.196123972704568e-07, - "loss": 0.8876, - "step": 10596 - }, - { - "epoch": 0.7964076356530888, - "grad_norm": 2.019524284660679, - "learning_rate": 4.1931407942984777e-07, - "loss": 0.8715, - "step": 10597 - }, - { - "epoch": 0.7964827897189238, - "grad_norm": 1.462089777740484, - "learning_rate": 4.19015855250932e-07, - "loss": 0.9438, - "step": 10598 - }, - { - "epoch": 0.7965579437847587, - "grad_norm": 1.7146080741472403, - "learning_rate": 4.1871772475138136e-07, - "loss": 0.9607, - "step": 10599 - }, - { - "epoch": 0.7966330978505937, - "grad_norm": 2.55379402798619, - "learning_rate": 4.184196879488604e-07, - "loss": 0.8686, - "step": 10600 - }, - { - "epoch": 0.7967082519164287, - "grad_norm": 0.6890653432843634, - "learning_rate": 4.181217448610295e-07, - "loss": 0.7849, - "step": 10601 - }, - { - "epoch": 0.7967834059822636, - "grad_norm": 1.550773793546109, - "learning_rate": 4.178238955055424e-07, - "loss": 1.0143, - "step": 10602 - }, - { - "epoch": 0.7968585600480986, - "grad_norm": 1.6209838829957208, - "learning_rate": 4.175261399000476e-07, - "loss": 0.9456, - "step": 10603 - }, - { - "epoch": 0.7969337141139335, - "grad_norm": 2.333235999756626, - "learning_rate": 4.172284780621893e-07, - "loss": 0.9285, - "step": 10604 - }, - { - "epoch": 0.7970088681797686, - "grad_norm": 2.067124701574714, - "learning_rate": 4.1693091000960454e-07, - "loss": 0.9538, - "step": 10605 - }, - { - "epoch": 0.7970840222456035, - "grad_norm": 2.1883758971158582, - "learning_rate": 4.1663343575992526e-07, - "loss": 0.9967, - "step": 10606 - }, - { - "epoch": 0.7971591763114384, - "grad_norm": 1.4956928243852754, - "learning_rate": 4.16336055330778e-07, - "loss": 1.0089, - "step": 10607 - }, - { - "epoch": 0.7972343303772734, - "grad_norm": 1.517367668729337, - "learning_rate": 4.1603876873978327e-07, - "loss": 1.0014, - "step": 10608 - }, - { - "epoch": 0.7973094844431083, - "grad_norm": 3.332699779065437, - "learning_rate": 4.157415760045573e-07, - "loss": 1.0489, - "step": 10609 - }, - { - "epoch": 0.7973846385089434, - "grad_norm": 1.7283059547975503, - "learning_rate": 4.15444477142709e-07, - "loss": 0.9026, - "step": 10610 - }, - { - "epoch": 0.7974597925747783, - "grad_norm": 1.6628989149787121, - "learning_rate": 4.1514747217184355e-07, - "loss": 1.0361, - "step": 10611 - }, - { - "epoch": 0.7975349466406133, - "grad_norm": 3.3250768609592503, - "learning_rate": 4.148505611095594e-07, - "loss": 0.8862, - "step": 10612 - }, - { - "epoch": 0.7976101007064482, - "grad_norm": 1.4902482938303954, - "learning_rate": 4.145537439734492e-07, - "loss": 0.9476, - "step": 10613 - }, - { - "epoch": 0.7976852547722831, - "grad_norm": 1.9031505088161456, - "learning_rate": 4.142570207811009e-07, - "loss": 0.959, - "step": 10614 - }, - { - "epoch": 0.7977604088381182, - "grad_norm": 2.0935841964434236, - "learning_rate": 4.139603915500958e-07, - "loss": 1.0459, - "step": 10615 - }, - { - "epoch": 0.7978355629039531, - "grad_norm": 1.6601761401611799, - "learning_rate": 4.1366385629801126e-07, - "loss": 0.9683, - "step": 10616 - }, - { - "epoch": 0.7979107169697881, - "grad_norm": 2.4446515539843197, - "learning_rate": 4.1336741504241803e-07, - "loss": 0.9254, - "step": 10617 - }, - { - "epoch": 0.797985871035623, - "grad_norm": 1.5665617982112119, - "learning_rate": 4.1307106780088065e-07, - "loss": 0.9404, - "step": 10618 - }, - { - "epoch": 0.798061025101458, - "grad_norm": 1.8612318624479933, - "learning_rate": 4.1277481459095954e-07, - "loss": 0.9441, - "step": 10619 - }, - { - "epoch": 0.798136179167293, - "grad_norm": 4.9900998330688875, - "learning_rate": 4.1247865543020797e-07, - "loss": 1.0287, - "step": 10620 - }, - { - "epoch": 0.7982113332331279, - "grad_norm": 1.5790771029529598, - "learning_rate": 4.121825903361755e-07, - "loss": 0.9197, - "step": 10621 - }, - { - "epoch": 0.7982864872989629, - "grad_norm": 2.4096510989942668, - "learning_rate": 4.1188661932640503e-07, - "loss": 1.0054, - "step": 10622 - }, - { - "epoch": 0.7983616413647978, - "grad_norm": 1.964993783974947, - "learning_rate": 4.1159074241843326e-07, - "loss": 0.9417, - "step": 10623 - }, - { - "epoch": 0.7984367954306328, - "grad_norm": 1.5476261973652554, - "learning_rate": 4.112949596297928e-07, - "loss": 0.8797, - "step": 10624 - }, - { - "epoch": 0.7985119494964678, - "grad_norm": 4.735534123201609, - "learning_rate": 4.109992709780088e-07, - "loss": 0.9844, - "step": 10625 - }, - { - "epoch": 0.7985871035623027, - "grad_norm": 1.6348606387365896, - "learning_rate": 4.107036764806031e-07, - "loss": 0.9198, - "step": 10626 - }, - { - "epoch": 0.7986622576281377, - "grad_norm": 2.1945139019539477, - "learning_rate": 4.104081761550902e-07, - "loss": 0.8948, - "step": 10627 - }, - { - "epoch": 0.7987374116939726, - "grad_norm": 2.012753829586625, - "learning_rate": 4.101127700189806e-07, - "loss": 1.0002, - "step": 10628 - }, - { - "epoch": 0.7988125657598076, - "grad_norm": 1.9428208414861718, - "learning_rate": 4.0981745808977707e-07, - "loss": 0.9867, - "step": 10629 - }, - { - "epoch": 0.7988877198256426, - "grad_norm": 1.4466019111121755, - "learning_rate": 4.0952224038497764e-07, - "loss": 0.9506, - "step": 10630 - }, - { - "epoch": 0.7989628738914776, - "grad_norm": 1.563416936763178, - "learning_rate": 4.0922711692207645e-07, - "loss": 0.9576, - "step": 10631 - }, - { - "epoch": 0.7990380279573125, - "grad_norm": 1.5925356408507034, - "learning_rate": 4.089320877185596e-07, - "loss": 0.9158, - "step": 10632 - }, - { - "epoch": 0.7991131820231474, - "grad_norm": 2.5325316963584688, - "learning_rate": 4.086371527919097e-07, - "loss": 0.9876, - "step": 10633 - }, - { - "epoch": 0.7991883360889824, - "grad_norm": 2.1416516591695376, - "learning_rate": 4.083423121596021e-07, - "loss": 0.9777, - "step": 10634 - }, - { - "epoch": 0.7992634901548173, - "grad_norm": 0.7104974160117208, - "learning_rate": 4.080475658391076e-07, - "loss": 0.821, - "step": 10635 - }, - { - "epoch": 0.7993386442206524, - "grad_norm": 1.8176435296895646, - "learning_rate": 4.077529138478906e-07, - "loss": 1.0106, - "step": 10636 - }, - { - "epoch": 0.7994137982864873, - "grad_norm": 3.3076716807955733, - "learning_rate": 4.074583562034102e-07, - "loss": 0.9537, - "step": 10637 - }, - { - "epoch": 0.7994889523523223, - "grad_norm": 1.4814358976865882, - "learning_rate": 4.071638929231207e-07, - "loss": 0.8722, - "step": 10638 - }, - { - "epoch": 0.7995641064181572, - "grad_norm": 2.3040163258994033, - "learning_rate": 4.0686952402447016e-07, - "loss": 1.0085, - "step": 10639 - }, - { - "epoch": 0.7996392604839921, - "grad_norm": 1.4768214730318652, - "learning_rate": 4.0657524952490087e-07, - "loss": 0.9364, - "step": 10640 - }, - { - "epoch": 0.7997144145498272, - "grad_norm": 1.626775212079778, - "learning_rate": 4.0628106944184947e-07, - "loss": 0.8008, - "step": 10641 - }, - { - "epoch": 0.7997895686156621, - "grad_norm": 1.5800078085211455, - "learning_rate": 4.05986983792747e-07, - "loss": 0.9505, - "step": 10642 - }, - { - "epoch": 0.7998647226814971, - "grad_norm": 2.056111469646126, - "learning_rate": 4.0569299259502035e-07, - "loss": 1.0467, - "step": 10643 - }, - { - "epoch": 0.799939876747332, - "grad_norm": 2.0530308764291716, - "learning_rate": 4.0539909586608866e-07, - "loss": 0.9109, - "step": 10644 - }, - { - "epoch": 0.8000150308131669, - "grad_norm": 1.4494131939762929, - "learning_rate": 4.051052936233668e-07, - "loss": 0.9774, - "step": 10645 - }, - { - "epoch": 0.800090184879002, - "grad_norm": 2.070119481715754, - "learning_rate": 4.0481158588426334e-07, - "loss": 1.0383, - "step": 10646 - }, - { - "epoch": 0.8001653389448369, - "grad_norm": 2.287982047730822, - "learning_rate": 4.045179726661816e-07, - "loss": 0.8795, - "step": 10647 - }, - { - "epoch": 0.8002404930106719, - "grad_norm": 1.7196621641787904, - "learning_rate": 4.0422445398651985e-07, - "loss": 0.9508, - "step": 10648 - }, - { - "epoch": 0.8003156470765068, - "grad_norm": 2.2309554816591524, - "learning_rate": 4.0393102986266925e-07, - "loss": 1.0386, - "step": 10649 - }, - { - "epoch": 0.8003908011423418, - "grad_norm": 2.802221122294099, - "learning_rate": 4.0363770031201793e-07, - "loss": 1.0377, - "step": 10650 - }, - { - "epoch": 0.8004659552081768, - "grad_norm": 1.5741986824820113, - "learning_rate": 4.033444653519449e-07, - "loss": 0.9785, - "step": 10651 - }, - { - "epoch": 0.8005411092740117, - "grad_norm": 1.7485203618518563, - "learning_rate": 4.030513249998266e-07, - "loss": 0.9853, - "step": 10652 - }, - { - "epoch": 0.8006162633398467, - "grad_norm": 1.4680337173841025, - "learning_rate": 4.0275827927303265e-07, - "loss": 0.9873, - "step": 10653 - }, - { - "epoch": 0.8006914174056816, - "grad_norm": 1.556716515044496, - "learning_rate": 4.024653281889261e-07, - "loss": 0.998, - "step": 10654 - }, - { - "epoch": 0.8007665714715166, - "grad_norm": 4.936253837068119, - "learning_rate": 4.021724717648669e-07, - "loss": 0.947, - "step": 10655 - }, - { - "epoch": 0.8008417255373516, - "grad_norm": 3.954164029205268, - "learning_rate": 4.018797100182072e-07, - "loss": 0.9578, - "step": 10656 - }, - { - "epoch": 0.8009168796031866, - "grad_norm": 1.4132534684402334, - "learning_rate": 4.0158704296629445e-07, - "loss": 0.9376, - "step": 10657 - }, - { - "epoch": 0.8009920336690215, - "grad_norm": 1.9467341731833956, - "learning_rate": 4.0129447062646983e-07, - "loss": 1.0123, - "step": 10658 - }, - { - "epoch": 0.8010671877348564, - "grad_norm": 2.405357988167045, - "learning_rate": 4.010019930160695e-07, - "loss": 0.9812, - "step": 10659 - }, - { - "epoch": 0.8011423418006914, - "grad_norm": 1.503087759845378, - "learning_rate": 4.0070961015242433e-07, - "loss": 0.9663, - "step": 10660 - }, - { - "epoch": 0.8012174958665264, - "grad_norm": 1.6804840328385284, - "learning_rate": 4.0041732205285883e-07, - "loss": 0.8992, - "step": 10661 - }, - { - "epoch": 0.8012926499323614, - "grad_norm": 2.0676826328118487, - "learning_rate": 4.001251287346925e-07, - "loss": 0.947, - "step": 10662 - }, - { - "epoch": 0.8013678039981963, - "grad_norm": 1.3641775950042832, - "learning_rate": 3.998330302152384e-07, - "loss": 0.9411, - "step": 10663 - }, - { - "epoch": 0.8014429580640313, - "grad_norm": 2.0439541775966372, - "learning_rate": 3.995410265118042e-07, - "loss": 0.9453, - "step": 10664 - }, - { - "epoch": 0.8015181121298662, - "grad_norm": 1.8865199013880374, - "learning_rate": 3.992491176416932e-07, - "loss": 0.9767, - "step": 10665 - }, - { - "epoch": 0.8015932661957011, - "grad_norm": 1.6300200901080586, - "learning_rate": 3.989573036222018e-07, - "loss": 1.0509, - "step": 10666 - }, - { - "epoch": 0.8016684202615362, - "grad_norm": 2.2292211888380766, - "learning_rate": 3.986655844706208e-07, - "loss": 0.9742, - "step": 10667 - }, - { - "epoch": 0.8017435743273711, - "grad_norm": 1.6539232717150767, - "learning_rate": 3.9837396020423595e-07, - "loss": 0.9504, - "step": 10668 - }, - { - "epoch": 0.8018187283932061, - "grad_norm": 3.6098158265788087, - "learning_rate": 3.9808243084032657e-07, - "loss": 0.9349, - "step": 10669 - }, - { - "epoch": 0.801893882459041, - "grad_norm": 1.949029152618387, - "learning_rate": 3.9779099639616766e-07, - "loss": 0.9097, - "step": 10670 - }, - { - "epoch": 0.801969036524876, - "grad_norm": 1.8311152333739915, - "learning_rate": 3.9749965688902696e-07, - "loss": 0.959, - "step": 10671 - }, - { - "epoch": 0.802044190590711, - "grad_norm": 2.3223786579211887, - "learning_rate": 3.9720841233616875e-07, - "loss": 0.9117, - "step": 10672 - }, - { - "epoch": 0.8021193446565459, - "grad_norm": 1.4641240093781833, - "learning_rate": 3.969172627548494e-07, - "loss": 0.9299, - "step": 10673 - }, - { - "epoch": 0.8021944987223809, - "grad_norm": 2.700810847562999, - "learning_rate": 3.966262081623208e-07, - "loss": 1.0417, - "step": 10674 - }, - { - "epoch": 0.8022696527882158, - "grad_norm": 1.6177426919360884, - "learning_rate": 3.963352485758291e-07, - "loss": 0.9615, - "step": 10675 - }, - { - "epoch": 0.8023448068540509, - "grad_norm": 1.9399675223001405, - "learning_rate": 3.960443840126144e-07, - "loss": 0.828, - "step": 10676 - }, - { - "epoch": 0.8024199609198858, - "grad_norm": 1.4333132435625344, - "learning_rate": 3.957536144899123e-07, - "loss": 1.054, - "step": 10677 - }, - { - "epoch": 0.8024951149857207, - "grad_norm": 2.159675530858765, - "learning_rate": 3.954629400249516e-07, - "loss": 0.9502, - "step": 10678 - }, - { - "epoch": 0.8025702690515557, - "grad_norm": 2.0653588104712486, - "learning_rate": 3.9517236063495596e-07, - "loss": 0.955, - "step": 10679 - }, - { - "epoch": 0.8026454231173906, - "grad_norm": 0.6811566533871841, - "learning_rate": 3.9488187633714333e-07, - "loss": 0.8289, - "step": 10680 - }, - { - "epoch": 0.8027205771832256, - "grad_norm": 1.8487955012728512, - "learning_rate": 3.9459148714872526e-07, - "loss": 0.9855, - "step": 10681 - }, - { - "epoch": 0.8027957312490606, - "grad_norm": 2.110036110591591, - "learning_rate": 3.943011930869098e-07, - "loss": 0.9759, - "step": 10682 - }, - { - "epoch": 0.8028708853148956, - "grad_norm": 1.7283714794681202, - "learning_rate": 3.940109941688969e-07, - "loss": 1.0512, - "step": 10683 - }, - { - "epoch": 0.8029460393807305, - "grad_norm": 1.3054553302323975, - "learning_rate": 3.9372089041188275e-07, - "loss": 0.8706, - "step": 10684 - }, - { - "epoch": 0.8030211934465654, - "grad_norm": 6.896086105216251, - "learning_rate": 3.934308818330565e-07, - "loss": 0.962, - "step": 10685 - }, - { - "epoch": 0.8030963475124004, - "grad_norm": 1.4047237166678248, - "learning_rate": 3.9314096844960186e-07, - "loss": 0.988, - "step": 10686 - }, - { - "epoch": 0.8031715015782354, - "grad_norm": 0.7473783767912293, - "learning_rate": 3.9285115027869863e-07, - "loss": 0.8297, - "step": 10687 - }, - { - "epoch": 0.8032466556440704, - "grad_norm": 1.6208969548282168, - "learning_rate": 3.9256142733751886e-07, - "loss": 0.9907, - "step": 10688 - }, - { - "epoch": 0.8033218097099053, - "grad_norm": 1.6815827079484567, - "learning_rate": 3.9227179964322985e-07, - "loss": 0.9529, - "step": 10689 - }, - { - "epoch": 0.8033969637757402, - "grad_norm": 19.067490221568473, - "learning_rate": 3.919822672129931e-07, - "loss": 0.9907, - "step": 10690 - }, - { - "epoch": 0.8034721178415752, - "grad_norm": 1.715239876634275, - "learning_rate": 3.9169283006396394e-07, - "loss": 0.965, - "step": 10691 - }, - { - "epoch": 0.8035472719074102, - "grad_norm": 1.8622547577044268, - "learning_rate": 3.914034882132937e-07, - "loss": 0.9431, - "step": 10692 - }, - { - "epoch": 0.8036224259732452, - "grad_norm": 1.891385201451673, - "learning_rate": 3.911142416781261e-07, - "loss": 0.9008, - "step": 10693 - }, - { - "epoch": 0.8036975800390801, - "grad_norm": 1.8789350543124088, - "learning_rate": 3.90825090475601e-07, - "loss": 0.8942, - "step": 10694 - }, - { - "epoch": 0.8037727341049151, - "grad_norm": 1.8121023070651259, - "learning_rate": 3.9053603462285124e-07, - "loss": 1.0025, - "step": 10695 - }, - { - "epoch": 0.80384788817075, - "grad_norm": 1.8471791334871086, - "learning_rate": 3.902470741370045e-07, - "loss": 0.9185, - "step": 10696 - }, - { - "epoch": 0.803923042236585, - "grad_norm": 1.5601371446871406, - "learning_rate": 3.899582090351827e-07, - "loss": 1.0041, - "step": 10697 - }, - { - "epoch": 0.80399819630242, - "grad_norm": 1.8957921187842968, - "learning_rate": 3.8966943933450167e-07, - "loss": 0.94, - "step": 10698 - }, - { - "epoch": 0.8040733503682549, - "grad_norm": 4.8944042094945495, - "learning_rate": 3.893807650520735e-07, - "loss": 0.9849, - "step": 10699 - }, - { - "epoch": 0.8041485044340899, - "grad_norm": 1.8962009897857746, - "learning_rate": 3.890921862050023e-07, - "loss": 0.9657, - "step": 10700 - }, - { - "epoch": 0.8042236584999248, - "grad_norm": 2.784700460710952, - "learning_rate": 3.888037028103877e-07, - "loss": 1.036, - "step": 10701 - }, - { - "epoch": 0.8042988125657599, - "grad_norm": 2.295185220096531, - "learning_rate": 3.8851531488532353e-07, - "loss": 0.891, - "step": 10702 - }, - { - "epoch": 0.8043739666315948, - "grad_norm": 1.8630343419552977, - "learning_rate": 3.882270224468969e-07, - "loss": 1.0284, - "step": 10703 - }, - { - "epoch": 0.8044491206974297, - "grad_norm": 1.5114689110408355, - "learning_rate": 3.879388255121918e-07, - "loss": 1.0314, - "step": 10704 - }, - { - "epoch": 0.8045242747632647, - "grad_norm": 2.3338245033831386, - "learning_rate": 3.8765072409828424e-07, - "loss": 0.8478, - "step": 10705 - }, - { - "epoch": 0.8045994288290996, - "grad_norm": 2.3966544914024777, - "learning_rate": 3.873627182222454e-07, - "loss": 0.9476, - "step": 10706 - }, - { - "epoch": 0.8046745828949347, - "grad_norm": 1.677343605016623, - "learning_rate": 3.870748079011408e-07, - "loss": 0.9639, - "step": 10707 - }, - { - "epoch": 0.8047497369607696, - "grad_norm": 2.060942900146628, - "learning_rate": 3.867869931520296e-07, - "loss": 0.9603, - "step": 10708 - }, - { - "epoch": 0.8048248910266046, - "grad_norm": 0.7864630911643171, - "learning_rate": 3.864992739919668e-07, - "loss": 0.8907, - "step": 10709 - }, - { - "epoch": 0.8049000450924395, - "grad_norm": 1.5344496792335602, - "learning_rate": 3.8621165043800065e-07, - "loss": 1.0789, - "step": 10710 - }, - { - "epoch": 0.8049751991582744, - "grad_norm": 1.5349381075878488, - "learning_rate": 3.8592412250717366e-07, - "loss": 0.9813, - "step": 10711 - }, - { - "epoch": 0.8050503532241094, - "grad_norm": 1.4289306972241453, - "learning_rate": 3.8563669021652334e-07, - "loss": 1.0039, - "step": 10712 - }, - { - "epoch": 0.8051255072899444, - "grad_norm": 4.288422305719481, - "learning_rate": 3.853493535830803e-07, - "loss": 0.9651, - "step": 10713 - }, - { - "epoch": 0.8052006613557794, - "grad_norm": 1.8372523367962035, - "learning_rate": 3.8506211262387155e-07, - "loss": 0.9991, - "step": 10714 - }, - { - "epoch": 0.8052758154216143, - "grad_norm": 1.9003507917724456, - "learning_rate": 3.84774967355916e-07, - "loss": 0.9406, - "step": 10715 - }, - { - "epoch": 0.8053509694874492, - "grad_norm": 1.6377773457717384, - "learning_rate": 3.844879177962295e-07, - "loss": 0.9384, - "step": 10716 - }, - { - "epoch": 0.8054261235532842, - "grad_norm": 1.8251539687958935, - "learning_rate": 3.842009639618198e-07, - "loss": 0.9735, - "step": 10717 - }, - { - "epoch": 0.8055012776191192, - "grad_norm": 1.79815714330388, - "learning_rate": 3.839141058696904e-07, - "loss": 0.976, - "step": 10718 - }, - { - "epoch": 0.8055764316849542, - "grad_norm": 2.333225270448332, - "learning_rate": 3.836273435368387e-07, - "loss": 0.9265, - "step": 10719 - }, - { - "epoch": 0.8056515857507891, - "grad_norm": 4.349506104591858, - "learning_rate": 3.8334067698025583e-07, - "loss": 0.8118, - "step": 10720 - }, - { - "epoch": 0.8057267398166241, - "grad_norm": 2.1692545059401738, - "learning_rate": 3.83054106216929e-07, - "loss": 1.0566, - "step": 10721 - }, - { - "epoch": 0.805801893882459, - "grad_norm": 1.443665344266448, - "learning_rate": 3.827676312638379e-07, - "loss": 0.9929, - "step": 10722 - }, - { - "epoch": 0.805877047948294, - "grad_norm": 1.8468253785775552, - "learning_rate": 3.824812521379577e-07, - "loss": 0.9149, - "step": 10723 - }, - { - "epoch": 0.805952202014129, - "grad_norm": 2.598659786183288, - "learning_rate": 3.821949688562571e-07, - "loss": 0.9439, - "step": 10724 - }, - { - "epoch": 0.8060273560799639, - "grad_norm": 3.485605202314773, - "learning_rate": 3.8190878143569896e-07, - "loss": 0.9766, - "step": 10725 - }, - { - "epoch": 0.8061025101457989, - "grad_norm": 1.6803527933900984, - "learning_rate": 3.816226898932422e-07, - "loss": 0.9873, - "step": 10726 - }, - { - "epoch": 0.8061776642116338, - "grad_norm": 1.335272193905657, - "learning_rate": 3.8133669424583847e-07, - "loss": 1.0015, - "step": 10727 - }, - { - "epoch": 0.8062528182774689, - "grad_norm": 1.583690310273947, - "learning_rate": 3.8105079451043355e-07, - "loss": 1.0107, - "step": 10728 - }, - { - "epoch": 0.8063279723433038, - "grad_norm": 1.555444964193961, - "learning_rate": 3.807649907039685e-07, - "loss": 0.8882, - "step": 10729 - }, - { - "epoch": 0.8064031264091387, - "grad_norm": 1.4694033853571067, - "learning_rate": 3.804792828433778e-07, - "loss": 1.0127, - "step": 10730 - }, - { - "epoch": 0.8064782804749737, - "grad_norm": 1.957389935727905, - "learning_rate": 3.8019367094559173e-07, - "loss": 0.9246, - "step": 10731 - }, - { - "epoch": 0.8065534345408086, - "grad_norm": 2.202753455033243, - "learning_rate": 3.7990815502753317e-07, - "loss": 0.9475, - "step": 10732 - }, - { - "epoch": 0.8066285886066437, - "grad_norm": 1.6950215032783293, - "learning_rate": 3.796227351061201e-07, - "loss": 0.868, - "step": 10733 - }, - { - "epoch": 0.8067037426724786, - "grad_norm": 2.3738918730898573, - "learning_rate": 3.79337411198265e-07, - "loss": 0.8687, - "step": 10734 - }, - { - "epoch": 0.8067788967383135, - "grad_norm": 1.9881155848107788, - "learning_rate": 3.790521833208735e-07, - "loss": 0.9945, - "step": 10735 - }, - { - "epoch": 0.8068540508041485, - "grad_norm": 0.6409543749123641, - "learning_rate": 3.7876705149084786e-07, - "loss": 0.8391, - "step": 10736 - }, - { - "epoch": 0.8069292048699834, - "grad_norm": 1.7277281955761143, - "learning_rate": 3.784820157250819e-07, - "loss": 0.9129, - "step": 10737 - }, - { - "epoch": 0.8070043589358185, - "grad_norm": 0.9866109914525824, - "learning_rate": 3.781970760404665e-07, - "loss": 0.9133, - "step": 10738 - }, - { - "epoch": 0.8070795130016534, - "grad_norm": 1.9946280695377683, - "learning_rate": 3.779122324538844e-07, - "loss": 1.0088, - "step": 10739 - }, - { - "epoch": 0.8071546670674884, - "grad_norm": 2.527846603248394, - "learning_rate": 3.7762748498221385e-07, - "loss": 0.8321, - "step": 10740 - }, - { - "epoch": 0.8072298211333233, - "grad_norm": 1.926944280882869, - "learning_rate": 3.7734283364232745e-07, - "loss": 0.8606, - "step": 10741 - }, - { - "epoch": 0.8073049751991582, - "grad_norm": 1.4556871202250625, - "learning_rate": 3.7705827845109117e-07, - "loss": 0.9616, - "step": 10742 - }, - { - "epoch": 0.8073801292649933, - "grad_norm": 1.7850176801260056, - "learning_rate": 3.767738194253669e-07, - "loss": 0.9497, - "step": 10743 - }, - { - "epoch": 0.8074552833308282, - "grad_norm": 2.2359893942088376, - "learning_rate": 3.7648945658200983e-07, - "loss": 1.0046, - "step": 10744 - }, - { - "epoch": 0.8075304373966632, - "grad_norm": 2.1146957542965246, - "learning_rate": 3.762051899378691e-07, - "loss": 0.9224, - "step": 10745 - }, - { - "epoch": 0.8076055914624981, - "grad_norm": 1.6309586363411583, - "learning_rate": 3.7592101950978883e-07, - "loss": 0.9211, - "step": 10746 - }, - { - "epoch": 0.8076807455283331, - "grad_norm": 3.947556142860744, - "learning_rate": 3.7563694531460686e-07, - "loss": 0.9782, - "step": 10747 - }, - { - "epoch": 0.807755899594168, - "grad_norm": 2.1155952575486188, - "learning_rate": 3.7535296736915623e-07, - "loss": 0.8742, - "step": 10748 - }, - { - "epoch": 0.807831053660003, - "grad_norm": 2.1910319561144798, - "learning_rate": 3.750690856902636e-07, - "loss": 1.0116, - "step": 10749 - }, - { - "epoch": 0.807906207725838, - "grad_norm": 7.075069888143801, - "learning_rate": 3.7478530029474987e-07, - "loss": 0.9695, - "step": 10750 - }, - { - "epoch": 0.8079813617916729, - "grad_norm": 1.8030191611923683, - "learning_rate": 3.7450161119943056e-07, - "loss": 0.9537, - "step": 10751 - }, - { - "epoch": 0.8080565158575079, - "grad_norm": 1.7536517597940893, - "learning_rate": 3.7421801842111454e-07, - "loss": 0.9439, - "step": 10752 - }, - { - "epoch": 0.8081316699233428, - "grad_norm": 3.78957969045997, - "learning_rate": 3.7393452197660723e-07, - "loss": 0.9758, - "step": 10753 - }, - { - "epoch": 0.8082068239891779, - "grad_norm": 1.6650981993255582, - "learning_rate": 3.7365112188270585e-07, - "loss": 0.9448, - "step": 10754 - }, - { - "epoch": 0.8082819780550128, - "grad_norm": 1.8016706740234487, - "learning_rate": 3.7336781815620345e-07, - "loss": 1.0483, - "step": 10755 - }, - { - "epoch": 0.8083571321208477, - "grad_norm": 2.0818942885591842, - "learning_rate": 3.730846108138863e-07, - "loss": 1.1174, - "step": 10756 - }, - { - "epoch": 0.8084322861866827, - "grad_norm": 1.2681618215029098, - "learning_rate": 3.728014998725357e-07, - "loss": 1.0112, - "step": 10757 - }, - { - "epoch": 0.8085074402525176, - "grad_norm": 1.5299944498547384, - "learning_rate": 3.725184853489274e-07, - "loss": 0.9927, - "step": 10758 - }, - { - "epoch": 0.8085825943183527, - "grad_norm": 1.6085871277163801, - "learning_rate": 3.722355672598305e-07, - "loss": 0.9942, - "step": 10759 - }, - { - "epoch": 0.8086577483841876, - "grad_norm": 2.5537484685937115, - "learning_rate": 3.7195274562200996e-07, - "loss": 1.0242, - "step": 10760 - }, - { - "epoch": 0.8087329024500225, - "grad_norm": 1.8261144292435254, - "learning_rate": 3.716700204522234e-07, - "loss": 1.0287, - "step": 10761 - }, - { - "epoch": 0.8088080565158575, - "grad_norm": 2.9384189548087747, - "learning_rate": 3.7138739176722323e-07, - "loss": 0.9076, - "step": 10762 - }, - { - "epoch": 0.8088832105816924, - "grad_norm": 2.3542774028108893, - "learning_rate": 3.711048595837567e-07, - "loss": 1.0285, - "step": 10763 - }, - { - "epoch": 0.8089583646475275, - "grad_norm": 2.188263024360129, - "learning_rate": 3.70822423918564e-07, - "loss": 1.0254, - "step": 10764 - }, - { - "epoch": 0.8090335187133624, - "grad_norm": 1.7678288571735714, - "learning_rate": 3.7054008478838197e-07, - "loss": 0.9437, - "step": 10765 - }, - { - "epoch": 0.8091086727791974, - "grad_norm": 1.7659838826533791, - "learning_rate": 3.702578422099394e-07, - "loss": 0.9175, - "step": 10766 - }, - { - "epoch": 0.8091838268450323, - "grad_norm": 2.07316582746213, - "learning_rate": 3.6997569619996027e-07, - "loss": 0.9235, - "step": 10767 - }, - { - "epoch": 0.8092589809108672, - "grad_norm": 3.665577004094883, - "learning_rate": 3.69693646775163e-07, - "loss": 0.9975, - "step": 10768 - }, - { - "epoch": 0.8093341349767023, - "grad_norm": 2.0352793709383814, - "learning_rate": 3.6941169395225956e-07, - "loss": 1.0082, - "step": 10769 - }, - { - "epoch": 0.8094092890425372, - "grad_norm": 2.306375486686762, - "learning_rate": 3.691298377479577e-07, - "loss": 0.9706, - "step": 10770 - }, - { - "epoch": 0.8094844431083722, - "grad_norm": 1.474378959388478, - "learning_rate": 3.6884807817895804e-07, - "loss": 0.992, - "step": 10771 - }, - { - "epoch": 0.8095595971742071, - "grad_norm": 1.8227407606720256, - "learning_rate": 3.685664152619556e-07, - "loss": 0.9296, - "step": 10772 - }, - { - "epoch": 0.8096347512400421, - "grad_norm": 1.5391167955454843, - "learning_rate": 3.6828484901364054e-07, - "loss": 0.9163, - "step": 10773 - }, - { - "epoch": 0.809709905305877, - "grad_norm": 2.46429853258294, - "learning_rate": 3.680033794506958e-07, - "loss": 0.9215, - "step": 10774 - }, - { - "epoch": 0.809785059371712, - "grad_norm": 1.616599081861808, - "learning_rate": 3.6772200658980057e-07, - "loss": 0.9295, - "step": 10775 - }, - { - "epoch": 0.809860213437547, - "grad_norm": 1.6208620939996383, - "learning_rate": 3.67440730447627e-07, - "loss": 0.9794, - "step": 10776 - }, - { - "epoch": 0.8099353675033819, - "grad_norm": 2.4861795841024756, - "learning_rate": 3.671595510408416e-07, - "loss": 1.1114, - "step": 10777 - }, - { - "epoch": 0.8100105215692169, - "grad_norm": 2.0939757951286633, - "learning_rate": 3.6687846838610527e-07, - "loss": 0.9956, - "step": 10778 - }, - { - "epoch": 0.8100856756350518, - "grad_norm": 1.490003175412065, - "learning_rate": 3.6659748250007283e-07, - "loss": 0.9206, - "step": 10779 - }, - { - "epoch": 0.8101608297008868, - "grad_norm": 1.7312948007674218, - "learning_rate": 3.663165933993948e-07, - "loss": 0.9721, - "step": 10780 - }, - { - "epoch": 0.8102359837667218, - "grad_norm": 2.2375816330323333, - "learning_rate": 3.660358011007141e-07, - "loss": 1.041, - "step": 10781 - }, - { - "epoch": 0.8103111378325567, - "grad_norm": 1.4695199612108352, - "learning_rate": 3.6575510562066937e-07, - "loss": 0.9228, - "step": 10782 - }, - { - "epoch": 0.8103862918983917, - "grad_norm": 2.0168238187146796, - "learning_rate": 3.6547450697589243e-07, - "loss": 0.9495, - "step": 10783 - }, - { - "epoch": 0.8104614459642266, - "grad_norm": 3.520608311672332, - "learning_rate": 3.6519400518301023e-07, - "loss": 0.9436, - "step": 10784 - }, - { - "epoch": 0.8105366000300617, - "grad_norm": 1.507304684209951, - "learning_rate": 3.6491360025864324e-07, - "loss": 0.8566, - "step": 10785 - }, - { - "epoch": 0.8106117540958966, - "grad_norm": 1.490020776642205, - "learning_rate": 3.6463329221940597e-07, - "loss": 1.0333, - "step": 10786 - }, - { - "epoch": 0.8106869081617315, - "grad_norm": 1.6034499049222632, - "learning_rate": 3.643530810819091e-07, - "loss": 0.9546, - "step": 10787 - }, - { - "epoch": 0.8107620622275665, - "grad_norm": 1.5557253646506137, - "learning_rate": 3.640729668627553e-07, - "loss": 0.9242, - "step": 10788 - }, - { - "epoch": 0.8108372162934014, - "grad_norm": 1.7301080814402798, - "learning_rate": 3.6379294957854257e-07, - "loss": 0.8254, - "step": 10789 - }, - { - "epoch": 0.8109123703592365, - "grad_norm": 2.3979833713492633, - "learning_rate": 3.6351302924586326e-07, - "loss": 1.0442, - "step": 10790 - }, - { - "epoch": 0.8109875244250714, - "grad_norm": 1.418103088236524, - "learning_rate": 3.6323320588130277e-07, - "loss": 1.0132, - "step": 10791 - }, - { - "epoch": 0.8110626784909064, - "grad_norm": 2.229321186259972, - "learning_rate": 3.6295347950144305e-07, - "loss": 1.0123, - "step": 10792 - }, - { - "epoch": 0.8111378325567413, - "grad_norm": 1.8584027994398362, - "learning_rate": 3.6267385012285836e-07, - "loss": 0.9229, - "step": 10793 - }, - { - "epoch": 0.8112129866225762, - "grad_norm": 1.820641811200257, - "learning_rate": 3.6239431776211757e-07, - "loss": 0.9363, - "step": 10794 - }, - { - "epoch": 0.8112881406884113, - "grad_norm": 0.7507324615726616, - "learning_rate": 3.6211488243578445e-07, - "loss": 0.8978, - "step": 10795 - }, - { - "epoch": 0.8113632947542462, - "grad_norm": 1.9444923357136927, - "learning_rate": 3.6183554416041597e-07, - "loss": 0.9573, - "step": 10796 - }, - { - "epoch": 0.8114384488200812, - "grad_norm": 1.7085475864810982, - "learning_rate": 3.615563029525648e-07, - "loss": 1.0784, - "step": 10797 - }, - { - "epoch": 0.8115136028859161, - "grad_norm": 1.6175751889424874, - "learning_rate": 3.612771588287764e-07, - "loss": 0.9917, - "step": 10798 - }, - { - "epoch": 0.8115887569517511, - "grad_norm": 1.930242111259104, - "learning_rate": 3.609981118055923e-07, - "loss": 0.9966, - "step": 10799 - }, - { - "epoch": 0.8116639110175861, - "grad_norm": 1.7986788775795637, - "learning_rate": 3.6071916189954575e-07, - "loss": 0.9168, - "step": 10800 - }, - { - "epoch": 0.811739065083421, - "grad_norm": 1.9944762602239832, - "learning_rate": 3.604403091271655e-07, - "loss": 0.9377, - "step": 10801 - }, - { - "epoch": 0.811814219149256, - "grad_norm": 1.5704458758899433, - "learning_rate": 3.601615535049758e-07, - "loss": 0.9778, - "step": 10802 - }, - { - "epoch": 0.8118893732150909, - "grad_norm": 1.4552751446157028, - "learning_rate": 3.5988289504949297e-07, - "loss": 0.9915, - "step": 10803 - }, - { - "epoch": 0.8119645272809259, - "grad_norm": 1.539615280102103, - "learning_rate": 3.5960433377722945e-07, - "loss": 0.9667, - "step": 10804 - }, - { - "epoch": 0.8120396813467609, - "grad_norm": 1.6062306176585857, - "learning_rate": 3.5932586970469057e-07, - "loss": 0.8594, - "step": 10805 - }, - { - "epoch": 0.8121148354125958, - "grad_norm": 2.168606501072553, - "learning_rate": 3.5904750284837657e-07, - "loss": 0.9549, - "step": 10806 - }, - { - "epoch": 0.8121899894784308, - "grad_norm": 0.6369045431156233, - "learning_rate": 3.587692332247818e-07, - "loss": 0.8417, - "step": 10807 - }, - { - "epoch": 0.8122651435442657, - "grad_norm": 2.305652170825168, - "learning_rate": 3.5849106085039393e-07, - "loss": 1.0044, - "step": 10808 - }, - { - "epoch": 0.8123402976101007, - "grad_norm": 1.49525184946818, - "learning_rate": 3.582129857416971e-07, - "loss": 0.8756, - "step": 10809 - }, - { - "epoch": 0.8124154516759357, - "grad_norm": 1.6920107846964731, - "learning_rate": 3.5793500791516773e-07, - "loss": 0.9688, - "step": 10810 - }, - { - "epoch": 0.8124906057417707, - "grad_norm": 1.609409368944304, - "learning_rate": 3.576571273872768e-07, - "loss": 1.0158, - "step": 10811 - }, - { - "epoch": 0.8125657598076056, - "grad_norm": 7.057048212523204, - "learning_rate": 3.573793441744901e-07, - "loss": 0.8865, - "step": 10812 - }, - { - "epoch": 0.8126409138734405, - "grad_norm": 1.750010422266851, - "learning_rate": 3.5710165829326686e-07, - "loss": 0.9333, - "step": 10813 - }, - { - "epoch": 0.8127160679392755, - "grad_norm": 1.6488135754038893, - "learning_rate": 3.5682406976006196e-07, - "loss": 0.9557, - "step": 10814 - }, - { - "epoch": 0.8127912220051104, - "grad_norm": 1.4170983535671973, - "learning_rate": 3.565465785913231e-07, - "loss": 0.9778, - "step": 10815 - }, - { - "epoch": 0.8128663760709455, - "grad_norm": 1.5617144326498134, - "learning_rate": 3.5626918480349244e-07, - "loss": 0.9112, - "step": 10816 - }, - { - "epoch": 0.8129415301367804, - "grad_norm": 0.8751555032153111, - "learning_rate": 3.559918884130071e-07, - "loss": 0.8691, - "step": 10817 - }, - { - "epoch": 0.8130166842026154, - "grad_norm": 1.7597382571921552, - "learning_rate": 3.55714689436297e-07, - "loss": 0.8249, - "step": 10818 - }, - { - "epoch": 0.8130918382684503, - "grad_norm": 1.7911977893703257, - "learning_rate": 3.554375878897886e-07, - "loss": 0.9557, - "step": 10819 - }, - { - "epoch": 0.8131669923342852, - "grad_norm": 1.5211770527134256, - "learning_rate": 3.551605837898999e-07, - "loss": 0.9506, - "step": 10820 - }, - { - "epoch": 0.8132421464001203, - "grad_norm": 1.819752619352996, - "learning_rate": 3.5488367715304637e-07, - "loss": 1.0197, - "step": 10821 - }, - { - "epoch": 0.8133173004659552, - "grad_norm": 1.5617580941278886, - "learning_rate": 3.5460686799563375e-07, - "loss": 0.9347, - "step": 10822 - }, - { - "epoch": 0.8133924545317902, - "grad_norm": 1.5485871212882563, - "learning_rate": 3.543301563340646e-07, - "loss": 0.9891, - "step": 10823 - }, - { - "epoch": 0.8134676085976251, - "grad_norm": 1.8781711464796678, - "learning_rate": 3.540535421847357e-07, - "loss": 1.0294, - "step": 10824 - }, - { - "epoch": 0.81354276266346, - "grad_norm": 1.7467844576223452, - "learning_rate": 3.5377702556403664e-07, - "loss": 1.0219, - "step": 10825 - }, - { - "epoch": 0.8136179167292951, - "grad_norm": 2.305793729478071, - "learning_rate": 3.535006064883532e-07, - "loss": 0.936, - "step": 10826 - }, - { - "epoch": 0.81369307079513, - "grad_norm": 1.6964975601165253, - "learning_rate": 3.5322428497406387e-07, - "loss": 1.0134, - "step": 10827 - }, - { - "epoch": 0.813768224860965, - "grad_norm": 2.13806790398014, - "learning_rate": 3.5294806103754124e-07, - "loss": 0.8911, - "step": 10828 - }, - { - "epoch": 0.8138433789267999, - "grad_norm": 3.6463653757531227, - "learning_rate": 3.5267193469515324e-07, - "loss": 1.1312, - "step": 10829 - }, - { - "epoch": 0.813918532992635, - "grad_norm": 1.9082672202084077, - "learning_rate": 3.523959059632606e-07, - "loss": 1.0375, - "step": 10830 - }, - { - "epoch": 0.8139936870584699, - "grad_norm": 1.275926556228468, - "learning_rate": 3.5211997485822e-07, - "loss": 0.9872, - "step": 10831 - }, - { - "epoch": 0.8140688411243048, - "grad_norm": 2.094211925964235, - "learning_rate": 3.518441413963811e-07, - "loss": 0.9635, - "step": 10832 - }, - { - "epoch": 0.8141439951901398, - "grad_norm": 1.5612378172326589, - "learning_rate": 3.5156840559408816e-07, - "loss": 0.8933, - "step": 10833 - }, - { - "epoch": 0.8142191492559747, - "grad_norm": 1.2776288755473975, - "learning_rate": 3.5129276746767886e-07, - "loss": 1.0377, - "step": 10834 - }, - { - "epoch": 0.8142943033218097, - "grad_norm": 2.953376880379237, - "learning_rate": 3.510172270334875e-07, - "loss": 0.8211, - "step": 10835 - }, - { - "epoch": 0.8143694573876447, - "grad_norm": 2.0033231782999636, - "learning_rate": 3.507417843078386e-07, - "loss": 1.0425, - "step": 10836 - }, - { - "epoch": 0.8144446114534797, - "grad_norm": 2.056453859375047, - "learning_rate": 3.504664393070551e-07, - "loss": 0.915, - "step": 10837 - }, - { - "epoch": 0.8145197655193146, - "grad_norm": 2.557645805580393, - "learning_rate": 3.5019119204745097e-07, - "loss": 0.9386, - "step": 10838 - }, - { - "epoch": 0.8145949195851495, - "grad_norm": 1.286763771441465, - "learning_rate": 3.499160425453371e-07, - "loss": 0.9227, - "step": 10839 - }, - { - "epoch": 0.8146700736509845, - "grad_norm": 1.4814940754478765, - "learning_rate": 3.496409908170157e-07, - "loss": 0.9925, - "step": 10840 - }, - { - "epoch": 0.8147452277168195, - "grad_norm": 1.7025942894232997, - "learning_rate": 3.4936603687878496e-07, - "loss": 0.9374, - "step": 10841 - }, - { - "epoch": 0.8148203817826545, - "grad_norm": 1.9622907490547072, - "learning_rate": 3.490911807469383e-07, - "loss": 0.9441, - "step": 10842 - }, - { - "epoch": 0.8148955358484894, - "grad_norm": 1.627362954131241, - "learning_rate": 3.488164224377599e-07, - "loss": 0.9746, - "step": 10843 - }, - { - "epoch": 0.8149706899143244, - "grad_norm": 2.0792178385316817, - "learning_rate": 3.485417619675317e-07, - "loss": 0.9581, - "step": 10844 - }, - { - "epoch": 0.8150458439801593, - "grad_norm": 1.3609482879736658, - "learning_rate": 3.482671993525286e-07, - "loss": 0.9865, - "step": 10845 - }, - { - "epoch": 0.8151209980459942, - "grad_norm": 9.473046464256619, - "learning_rate": 3.479927346090179e-07, - "loss": 0.8752, - "step": 10846 - }, - { - "epoch": 0.8151961521118293, - "grad_norm": 1.4725336079413345, - "learning_rate": 3.4771836775326333e-07, - "loss": 0.91, - "step": 10847 - }, - { - "epoch": 0.8152713061776642, - "grad_norm": 1.5056635114495847, - "learning_rate": 3.474440988015233e-07, - "loss": 0.9874, - "step": 10848 - }, - { - "epoch": 0.8153464602434992, - "grad_norm": 1.7057799897132908, - "learning_rate": 3.471699277700484e-07, - "loss": 0.9955, - "step": 10849 - }, - { - "epoch": 0.8154216143093341, - "grad_norm": 1.8999893062692164, - "learning_rate": 3.468958546750844e-07, - "loss": 0.9715, - "step": 10850 - }, - { - "epoch": 0.815496768375169, - "grad_norm": 2.291623918539208, - "learning_rate": 3.466218795328706e-07, - "loss": 0.9716, - "step": 10851 - }, - { - "epoch": 0.8155719224410041, - "grad_norm": 1.805408944592431, - "learning_rate": 3.4634800235964255e-07, - "loss": 0.8739, - "step": 10852 - }, - { - "epoch": 0.815647076506839, - "grad_norm": 1.5913891229096473, - "learning_rate": 3.460742231716267e-07, - "loss": 0.9904, - "step": 10853 - }, - { - "epoch": 0.815722230572674, - "grad_norm": 1.4759738293630411, - "learning_rate": 3.4580054198504716e-07, - "loss": 0.9339, - "step": 10854 - }, - { - "epoch": 0.8157973846385089, - "grad_norm": 1.8299443899366505, - "learning_rate": 3.455269588161196e-07, - "loss": 0.972, - "step": 10855 - }, - { - "epoch": 0.815872538704344, - "grad_norm": 1.292202817740746, - "learning_rate": 3.4525347368105504e-07, - "loss": 1.0007, - "step": 10856 - }, - { - "epoch": 0.8159476927701789, - "grad_norm": 1.8946636350331403, - "learning_rate": 3.4498008659605836e-07, - "loss": 0.949, - "step": 10857 - }, - { - "epoch": 0.8160228468360138, - "grad_norm": 1.7493095398240373, - "learning_rate": 3.4470679757732945e-07, - "loss": 1.0295, - "step": 10858 - }, - { - "epoch": 0.8160980009018488, - "grad_norm": 1.966543444582535, - "learning_rate": 3.4443360664106135e-07, - "loss": 0.8542, - "step": 10859 - }, - { - "epoch": 0.8161731549676837, - "grad_norm": 1.8254589627202813, - "learning_rate": 3.441605138034416e-07, - "loss": 1.0274, - "step": 10860 - }, - { - "epoch": 0.8162483090335187, - "grad_norm": 1.96596445084852, - "learning_rate": 3.438875190806516e-07, - "loss": 0.9669, - "step": 10861 - }, - { - "epoch": 0.8163234630993537, - "grad_norm": 2.0360172376146557, - "learning_rate": 3.4361462248886875e-07, - "loss": 0.7731, - "step": 10862 - }, - { - "epoch": 0.8163986171651887, - "grad_norm": 1.763509309865779, - "learning_rate": 3.433418240442611e-07, - "loss": 1.0137, - "step": 10863 - }, - { - "epoch": 0.8164737712310236, - "grad_norm": 2.4415156225501096, - "learning_rate": 3.4306912376299437e-07, - "loss": 1.0172, - "step": 10864 - }, - { - "epoch": 0.8165489252968585, - "grad_norm": 2.898636063100394, - "learning_rate": 3.4279652166122717e-07, - "loss": 0.9368, - "step": 10865 - }, - { - "epoch": 0.8166240793626935, - "grad_norm": 1.7323220965048094, - "learning_rate": 3.4252401775511255e-07, - "loss": 0.9688, - "step": 10866 - }, - { - "epoch": 0.8166992334285285, - "grad_norm": 1.5680236076942524, - "learning_rate": 3.422516120607957e-07, - "loss": 0.9207, - "step": 10867 - }, - { - "epoch": 0.8167743874943635, - "grad_norm": 2.0387839642285326, - "learning_rate": 3.4197930459441883e-07, - "loss": 0.8981, - "step": 10868 - }, - { - "epoch": 0.8168495415601984, - "grad_norm": 1.6789350376705388, - "learning_rate": 3.4170709537211815e-07, - "loss": 0.9836, - "step": 10869 - }, - { - "epoch": 0.8169246956260333, - "grad_norm": 1.7832686379839928, - "learning_rate": 3.4143498441002105e-07, - "loss": 1.0005, - "step": 10870 - }, - { - "epoch": 0.8169998496918683, - "grad_norm": 1.696589819336331, - "learning_rate": 3.4116297172425277e-07, - "loss": 0.9908, - "step": 10871 - }, - { - "epoch": 0.8170750037577033, - "grad_norm": 1.7843967549990385, - "learning_rate": 3.408910573309305e-07, - "loss": 0.9222, - "step": 10872 - }, - { - "epoch": 0.8171501578235383, - "grad_norm": 1.4189259054559369, - "learning_rate": 3.4061924124616613e-07, - "loss": 0.9599, - "step": 10873 - }, - { - "epoch": 0.8172253118893732, - "grad_norm": 1.6071206015605068, - "learning_rate": 3.4034752348606553e-07, - "loss": 0.8757, - "step": 10874 - }, - { - "epoch": 0.8173004659552082, - "grad_norm": 2.110390877699495, - "learning_rate": 3.400759040667298e-07, - "loss": 0.9808, - "step": 10875 - }, - { - "epoch": 0.8173756200210431, - "grad_norm": 1.4154374829995244, - "learning_rate": 3.398043830042532e-07, - "loss": 0.9592, - "step": 10876 - }, - { - "epoch": 0.817450774086878, - "grad_norm": 1.6781026060858508, - "learning_rate": 3.395329603147241e-07, - "loss": 1.047, - "step": 10877 - }, - { - "epoch": 0.8175259281527131, - "grad_norm": 1.9023205640190601, - "learning_rate": 3.3926163601422485e-07, - "loss": 0.8517, - "step": 10878 - }, - { - "epoch": 0.817601082218548, - "grad_norm": 1.6141026509434253, - "learning_rate": 3.3899041011883433e-07, - "loss": 1.0125, - "step": 10879 - }, - { - "epoch": 0.817676236284383, - "grad_norm": 1.4226646326730399, - "learning_rate": 3.3871928264462124e-07, - "loss": 0.9738, - "step": 10880 - }, - { - "epoch": 0.8177513903502179, - "grad_norm": 1.8289038148494707, - "learning_rate": 3.3844825360765273e-07, - "loss": 0.8845, - "step": 10881 - }, - { - "epoch": 0.817826544416053, - "grad_norm": 1.9585156930738339, - "learning_rate": 3.38177323023987e-07, - "loss": 0.9086, - "step": 10882 - }, - { - "epoch": 0.8179016984818879, - "grad_norm": 1.6048490715959987, - "learning_rate": 3.379064909096796e-07, - "loss": 0.9304, - "step": 10883 - }, - { - "epoch": 0.8179768525477228, - "grad_norm": 2.298497775057035, - "learning_rate": 3.376357572807762e-07, - "loss": 0.9385, - "step": 10884 - }, - { - "epoch": 0.8180520066135578, - "grad_norm": 2.7898422561679963, - "learning_rate": 3.373651221533198e-07, - "loss": 0.9608, - "step": 10885 - }, - { - "epoch": 0.8181271606793927, - "grad_norm": 1.5025090055782935, - "learning_rate": 3.3709458554334735e-07, - "loss": 0.9169, - "step": 10886 - }, - { - "epoch": 0.8182023147452278, - "grad_norm": 1.474593691710888, - "learning_rate": 3.368241474668876e-07, - "loss": 0.9985, - "step": 10887 - }, - { - "epoch": 0.8182774688110627, - "grad_norm": 3.035268106449139, - "learning_rate": 3.3655380793996636e-07, - "loss": 0.8906, - "step": 10888 - }, - { - "epoch": 0.8183526228768977, - "grad_norm": 1.5045342261344083, - "learning_rate": 3.3628356697860216e-07, - "loss": 0.9106, - "step": 10889 - }, - { - "epoch": 0.8184277769427326, - "grad_norm": 2.018677049387574, - "learning_rate": 3.3601342459880643e-07, - "loss": 0.8983, - "step": 10890 - }, - { - "epoch": 0.8185029310085675, - "grad_norm": 1.986895124892088, - "learning_rate": 3.3574338081658724e-07, - "loss": 0.9285, - "step": 10891 - }, - { - "epoch": 0.8185780850744026, - "grad_norm": 1.5784315812342242, - "learning_rate": 3.3547343564794605e-07, - "loss": 0.9797, - "step": 10892 - }, - { - "epoch": 0.8186532391402375, - "grad_norm": 1.4993316432954098, - "learning_rate": 3.352035891088776e-07, - "loss": 0.9166, - "step": 10893 - }, - { - "epoch": 0.8187283932060725, - "grad_norm": 2.5833786581021556, - "learning_rate": 3.3493384121537147e-07, - "loss": 0.9304, - "step": 10894 - }, - { - "epoch": 0.8188035472719074, - "grad_norm": 2.133427457918521, - "learning_rate": 3.346641919834108e-07, - "loss": 0.9628, - "step": 10895 - }, - { - "epoch": 0.8188787013377423, - "grad_norm": 1.8234585928330294, - "learning_rate": 3.3439464142897467e-07, - "loss": 0.9684, - "step": 10896 - }, - { - "epoch": 0.8189538554035773, - "grad_norm": 2.282944768250071, - "learning_rate": 3.3412518956803306e-07, - "loss": 1.0581, - "step": 10897 - }, - { - "epoch": 0.8190290094694123, - "grad_norm": 1.5702578406998313, - "learning_rate": 3.338558364165536e-07, - "loss": 0.9902, - "step": 10898 - }, - { - "epoch": 0.8191041635352473, - "grad_norm": 1.8184295214635153, - "learning_rate": 3.335865819904957e-07, - "loss": 1.0032, - "step": 10899 - }, - { - "epoch": 0.8191793176010822, - "grad_norm": 1.6779097974946218, - "learning_rate": 3.3331742630581405e-07, - "loss": 0.9983, - "step": 10900 - }, - { - "epoch": 0.8192544716669172, - "grad_norm": 3.0682210617895165, - "learning_rate": 3.330483693784567e-07, - "loss": 1.0711, - "step": 10901 - }, - { - "epoch": 0.8193296257327521, - "grad_norm": 1.8064066307503681, - "learning_rate": 3.3277941122436714e-07, - "loss": 1.0288, - "step": 10902 - }, - { - "epoch": 0.8194047797985871, - "grad_norm": 1.787452939887885, - "learning_rate": 3.325105518594815e-07, - "loss": 1.0143, - "step": 10903 - }, - { - "epoch": 0.8194799338644221, - "grad_norm": 1.4698125364181358, - "learning_rate": 3.322417912997311e-07, - "loss": 0.9568, - "step": 10904 - }, - { - "epoch": 0.819555087930257, - "grad_norm": 0.8250717883231985, - "learning_rate": 3.3197312956104016e-07, - "loss": 0.8929, - "step": 10905 - }, - { - "epoch": 0.819630241996092, - "grad_norm": 2.1827078508155227, - "learning_rate": 3.317045666593297e-07, - "loss": 0.9846, - "step": 10906 - }, - { - "epoch": 0.8197053960619269, - "grad_norm": 5.566482489883595, - "learning_rate": 3.314361026105108e-07, - "loss": 0.9236, - "step": 10907 - }, - { - "epoch": 0.819780550127762, - "grad_norm": 1.582943801479951, - "learning_rate": 3.3116773743049244e-07, - "loss": 0.8824, - "step": 10908 - }, - { - "epoch": 0.8198557041935969, - "grad_norm": 1.8043700099938171, - "learning_rate": 3.3089947113517647e-07, - "loss": 0.9881, - "step": 10909 - }, - { - "epoch": 0.8199308582594318, - "grad_norm": 1.842291044604458, - "learning_rate": 3.306313037404582e-07, - "loss": 1.02, - "step": 10910 - }, - { - "epoch": 0.8200060123252668, - "grad_norm": 5.416089579823244, - "learning_rate": 3.303632352622276e-07, - "loss": 1.0543, - "step": 10911 - }, - { - "epoch": 0.8200811663911017, - "grad_norm": 1.8963690269898654, - "learning_rate": 3.3009526571636827e-07, - "loss": 1.026, - "step": 10912 - }, - { - "epoch": 0.8201563204569368, - "grad_norm": 1.978287076411967, - "learning_rate": 3.2982739511876e-07, - "loss": 0.9972, - "step": 10913 - }, - { - "epoch": 0.8202314745227717, - "grad_norm": 0.6792123383297731, - "learning_rate": 3.295596234852731e-07, - "loss": 0.8254, - "step": 10914 - }, - { - "epoch": 0.8203066285886066, - "grad_norm": 1.650468271449174, - "learning_rate": 3.2929195083177554e-07, - "loss": 0.9038, - "step": 10915 - }, - { - "epoch": 0.8203817826544416, - "grad_norm": 1.829942109906855, - "learning_rate": 3.2902437717412743e-07, - "loss": 0.9997, - "step": 10916 - }, - { - "epoch": 0.8204569367202765, - "grad_norm": 1.796641259954481, - "learning_rate": 3.2875690252818357e-07, - "loss": 1.0082, - "step": 10917 - }, - { - "epoch": 0.8205320907861116, - "grad_norm": 1.6124830141577509, - "learning_rate": 3.2848952690979224e-07, - "loss": 0.8767, - "step": 10918 - }, - { - "epoch": 0.8206072448519465, - "grad_norm": 1.4403420424871585, - "learning_rate": 3.282222503347978e-07, - "loss": 1.0355, - "step": 10919 - }, - { - "epoch": 0.8206823989177815, - "grad_norm": 0.9275160361906133, - "learning_rate": 3.2795507281903655e-07, - "loss": 0.8414, - "step": 10920 - }, - { - "epoch": 0.8207575529836164, - "grad_norm": 1.7304963051558946, - "learning_rate": 3.2768799437833994e-07, - "loss": 0.9743, - "step": 10921 - }, - { - "epoch": 0.8208327070494513, - "grad_norm": 1.9424968396410278, - "learning_rate": 3.274210150285328e-07, - "loss": 0.9574, - "step": 10922 - }, - { - "epoch": 0.8209078611152864, - "grad_norm": 1.677371464337333, - "learning_rate": 3.271541347854363e-07, - "loss": 0.9568, - "step": 10923 - }, - { - "epoch": 0.8209830151811213, - "grad_norm": 1.5032760290458251, - "learning_rate": 3.268873536648622e-07, - "loss": 0.9815, - "step": 10924 - }, - { - "epoch": 0.8210581692469563, - "grad_norm": 0.8447979670868603, - "learning_rate": 3.2662067168261966e-07, - "loss": 0.8141, - "step": 10925 - }, - { - "epoch": 0.8211333233127912, - "grad_norm": 1.8276243257828555, - "learning_rate": 3.2635408885450956e-07, - "loss": 0.9056, - "step": 10926 - }, - { - "epoch": 0.8212084773786262, - "grad_norm": 1.646541209840718, - "learning_rate": 3.260876051963295e-07, - "loss": 0.9423, - "step": 10927 - }, - { - "epoch": 0.8212836314444611, - "grad_norm": 2.132111039281417, - "learning_rate": 3.2582122072386755e-07, - "loss": 0.9131, - "step": 10928 - }, - { - "epoch": 0.8213587855102961, - "grad_norm": 1.574240710386916, - "learning_rate": 3.2555493545290927e-07, - "loss": 0.9014, - "step": 10929 - }, - { - "epoch": 0.8214339395761311, - "grad_norm": 2.0450269203932394, - "learning_rate": 3.25288749399234e-07, - "loss": 0.9051, - "step": 10930 - }, - { - "epoch": 0.821509093641966, - "grad_norm": 2.584901077620806, - "learning_rate": 3.2502266257861213e-07, - "loss": 1.0245, - "step": 10931 - }, - { - "epoch": 0.821584247707801, - "grad_norm": 1.8882482422424403, - "learning_rate": 3.247566750068118e-07, - "loss": 1.0475, - "step": 10932 - }, - { - "epoch": 0.821659401773636, - "grad_norm": 2.7295822428960888, - "learning_rate": 3.2449078669959406e-07, - "loss": 0.9824, - "step": 10933 - }, - { - "epoch": 0.821734555839471, - "grad_norm": 1.5553077725581392, - "learning_rate": 3.242249976727123e-07, - "loss": 0.9977, - "step": 10934 - }, - { - "epoch": 0.8218097099053059, - "grad_norm": 1.5878316908218881, - "learning_rate": 3.2395930794191607e-07, - "loss": 1.0578, - "step": 10935 - }, - { - "epoch": 0.8218848639711408, - "grad_norm": 2.632643878189315, - "learning_rate": 3.236937175229495e-07, - "loss": 0.9386, - "step": 10936 - }, - { - "epoch": 0.8219600180369758, - "grad_norm": 4.985209906318749, - "learning_rate": 3.234282264315493e-07, - "loss": 0.919, - "step": 10937 - }, - { - "epoch": 0.8220351721028107, - "grad_norm": 1.441733718518196, - "learning_rate": 3.2316283468344653e-07, - "loss": 0.9863, - "step": 10938 - }, - { - "epoch": 0.8221103261686458, - "grad_norm": 2.494375577776982, - "learning_rate": 3.228975422943665e-07, - "loss": 0.9586, - "step": 10939 - }, - { - "epoch": 0.8221854802344807, - "grad_norm": 1.7818397081212503, - "learning_rate": 3.2263234928003e-07, - "loss": 1.0387, - "step": 10940 - }, - { - "epoch": 0.8222606343003156, - "grad_norm": 1.5577973360262931, - "learning_rate": 3.22367255656149e-07, - "loss": 0.9909, - "step": 10941 - }, - { - "epoch": 0.8223357883661506, - "grad_norm": 1.9717844632458388, - "learning_rate": 3.2210226143843257e-07, - "loss": 1.0058, - "step": 10942 - }, - { - "epoch": 0.8224109424319855, - "grad_norm": 1.895877446370544, - "learning_rate": 3.218373666425822e-07, - "loss": 0.915, - "step": 10943 - }, - { - "epoch": 0.8224860964978206, - "grad_norm": 1.4488863910921352, - "learning_rate": 3.2157257128429406e-07, - "loss": 0.9553, - "step": 10944 - }, - { - "epoch": 0.8225612505636555, - "grad_norm": 1.9088540972769925, - "learning_rate": 3.2130787537925776e-07, - "loss": 0.908, - "step": 10945 - }, - { - "epoch": 0.8226364046294905, - "grad_norm": 1.9109801093503092, - "learning_rate": 3.2104327894315785e-07, - "loss": 0.9181, - "step": 10946 - }, - { - "epoch": 0.8227115586953254, - "grad_norm": 1.6690260955205842, - "learning_rate": 3.2077878199167384e-07, - "loss": 0.9737, - "step": 10947 - }, - { - "epoch": 0.8227867127611603, - "grad_norm": 2.138433964007402, - "learning_rate": 3.2051438454047677e-07, - "loss": 1.0563, - "step": 10948 - }, - { - "epoch": 0.8228618668269954, - "grad_norm": 1.6486662213352794, - "learning_rate": 3.202500866052331e-07, - "loss": 1.064, - "step": 10949 - }, - { - "epoch": 0.8229370208928303, - "grad_norm": 3.06286945839238, - "learning_rate": 3.1998588820160486e-07, - "loss": 0.9313, - "step": 10950 - }, - { - "epoch": 0.8230121749586653, - "grad_norm": 3.3687407767712765, - "learning_rate": 3.1972178934524506e-07, - "loss": 1.0426, - "step": 10951 - }, - { - "epoch": 0.8230873290245002, - "grad_norm": 1.5099551142406467, - "learning_rate": 3.194577900518034e-07, - "loss": 1.0774, - "step": 10952 - }, - { - "epoch": 0.8231624830903352, - "grad_norm": 1.61605522581196, - "learning_rate": 3.1919389033692336e-07, - "loss": 0.9868, - "step": 10953 - }, - { - "epoch": 0.8232376371561702, - "grad_norm": 2.1594243663659944, - "learning_rate": 3.189300902162417e-07, - "loss": 0.8225, - "step": 10954 - }, - { - "epoch": 0.8233127912220051, - "grad_norm": 2.252106951631768, - "learning_rate": 3.186663897053892e-07, - "loss": 0.9111, - "step": 10955 - }, - { - "epoch": 0.8233879452878401, - "grad_norm": 1.667438074246031, - "learning_rate": 3.1840278881999115e-07, - "loss": 0.9693, - "step": 10956 - }, - { - "epoch": 0.823463099353675, - "grad_norm": 1.3388764028419864, - "learning_rate": 3.1813928757566786e-07, - "loss": 0.9179, - "step": 10957 - }, - { - "epoch": 0.82353825341951, - "grad_norm": 2.799911810984766, - "learning_rate": 3.1787588598803126e-07, - "loss": 0.9488, - "step": 10958 - }, - { - "epoch": 0.823613407485345, - "grad_norm": 2.391628877383318, - "learning_rate": 3.176125840726902e-07, - "loss": 0.7809, - "step": 10959 - }, - { - "epoch": 0.8236885615511799, - "grad_norm": 1.8054111235470622, - "learning_rate": 3.1734938184524576e-07, - "loss": 1.0003, - "step": 10960 - }, - { - "epoch": 0.8237637156170149, - "grad_norm": 1.5218785844012588, - "learning_rate": 3.170862793212936e-07, - "loss": 0.9717, - "step": 10961 - }, - { - "epoch": 0.8238388696828498, - "grad_norm": 2.80753209789539, - "learning_rate": 3.1682327651642336e-07, - "loss": 0.9041, - "step": 10962 - }, - { - "epoch": 0.8239140237486848, - "grad_norm": 2.125902208772414, - "learning_rate": 3.1656037344621987e-07, - "loss": 0.9986, - "step": 10963 - }, - { - "epoch": 0.8239891778145197, - "grad_norm": 1.9911156856547232, - "learning_rate": 3.1629757012626044e-07, - "loss": 0.9734, - "step": 10964 - }, - { - "epoch": 0.8240643318803548, - "grad_norm": 1.6325248209306313, - "learning_rate": 3.160348665721173e-07, - "loss": 0.9477, - "step": 10965 - }, - { - "epoch": 0.8241394859461897, - "grad_norm": 1.7748670823741182, - "learning_rate": 3.157722627993562e-07, - "loss": 0.939, - "step": 10966 - }, - { - "epoch": 0.8242146400120246, - "grad_norm": 2.151379821158434, - "learning_rate": 3.155097588235389e-07, - "loss": 0.796, - "step": 10967 - }, - { - "epoch": 0.8242897940778596, - "grad_norm": 1.5829718913404003, - "learning_rate": 3.1524735466021766e-07, - "loss": 1.0368, - "step": 10968 - }, - { - "epoch": 0.8243649481436945, - "grad_norm": 1.6763837393732772, - "learning_rate": 3.1498505032494204e-07, - "loss": 1.0261, - "step": 10969 - }, - { - "epoch": 0.8244401022095296, - "grad_norm": 1.5148819350515028, - "learning_rate": 3.1472284583325516e-07, - "loss": 0.932, - "step": 10970 - }, - { - "epoch": 0.8245152562753645, - "grad_norm": 2.660395898541249, - "learning_rate": 3.1446074120069346e-07, - "loss": 0.9415, - "step": 10971 - }, - { - "epoch": 0.8245904103411995, - "grad_norm": 1.835589891357086, - "learning_rate": 3.1419873644278606e-07, - "loss": 0.9571, - "step": 10972 - }, - { - "epoch": 0.8246655644070344, - "grad_norm": 1.4994055841784366, - "learning_rate": 3.13936831575059e-07, - "loss": 0.8688, - "step": 10973 - }, - { - "epoch": 0.8247407184728693, - "grad_norm": 0.7520883810097614, - "learning_rate": 3.1367502661303215e-07, - "loss": 0.8793, - "step": 10974 - }, - { - "epoch": 0.8248158725387044, - "grad_norm": 2.396209079192046, - "learning_rate": 3.134133215722161e-07, - "loss": 0.9236, - "step": 10975 - }, - { - "epoch": 0.8248910266045393, - "grad_norm": 1.519604835591727, - "learning_rate": 3.1315171646811964e-07, - "loss": 0.9222, - "step": 10976 - }, - { - "epoch": 0.8249661806703743, - "grad_norm": 1.3870101407649178, - "learning_rate": 3.1289021131624347e-07, - "loss": 0.9962, - "step": 10977 - }, - { - "epoch": 0.8250413347362092, - "grad_norm": 2.061147535378692, - "learning_rate": 3.1262880613208274e-07, - "loss": 0.924, - "step": 10978 - }, - { - "epoch": 0.8251164888020442, - "grad_norm": 0.7370057567387133, - "learning_rate": 3.12367500931126e-07, - "loss": 0.8345, - "step": 10979 - }, - { - "epoch": 0.8251916428678792, - "grad_norm": 3.3367245748438243, - "learning_rate": 3.121062957288576e-07, - "loss": 1.004, - "step": 10980 - }, - { - "epoch": 0.8252667969337141, - "grad_norm": 1.4830482576869493, - "learning_rate": 3.118451905407549e-07, - "loss": 0.8533, - "step": 10981 - }, - { - "epoch": 0.8253419509995491, - "grad_norm": 1.9742520666537917, - "learning_rate": 3.115841853822887e-07, - "loss": 1.0213, - "step": 10982 - }, - { - "epoch": 0.825417105065384, - "grad_norm": 1.6426440213260876, - "learning_rate": 3.1132328026892454e-07, - "loss": 0.9057, - "step": 10983 - }, - { - "epoch": 0.825492259131219, - "grad_norm": 2.2209507947518197, - "learning_rate": 3.110624752161233e-07, - "loss": 1.0076, - "step": 10984 - }, - { - "epoch": 0.825567413197054, - "grad_norm": 3.7060459383131246, - "learning_rate": 3.1080177023933685e-07, - "loss": 1.0393, - "step": 10985 - }, - { - "epoch": 0.8256425672628889, - "grad_norm": 0.735844825368319, - "learning_rate": 3.105411653540144e-07, - "loss": 0.8682, - "step": 10986 - }, - { - "epoch": 0.8257177213287239, - "grad_norm": 1.4487920168822586, - "learning_rate": 3.102806605755972e-07, - "loss": 1.0509, - "step": 10987 - }, - { - "epoch": 0.8257928753945588, - "grad_norm": 2.106505299838353, - "learning_rate": 3.100202559195213e-07, - "loss": 0.944, - "step": 10988 - }, - { - "epoch": 0.8258680294603938, - "grad_norm": 2.049911576201608, - "learning_rate": 3.0975995140121613e-07, - "loss": 0.9602, - "step": 10989 - }, - { - "epoch": 0.8259431835262288, - "grad_norm": 2.109974980680553, - "learning_rate": 3.0949974703610604e-07, - "loss": 0.9991, - "step": 10990 - }, - { - "epoch": 0.8260183375920638, - "grad_norm": 1.6206637260261783, - "learning_rate": 3.0923964283961046e-07, - "loss": 0.996, - "step": 10991 - }, - { - "epoch": 0.8260934916578987, - "grad_norm": 2.348121533486491, - "learning_rate": 3.0897963882713976e-07, - "loss": 0.964, - "step": 10992 - }, - { - "epoch": 0.8261686457237336, - "grad_norm": 1.4484318244436067, - "learning_rate": 3.087197350141004e-07, - "loss": 0.9726, - "step": 10993 - }, - { - "epoch": 0.8262437997895686, - "grad_norm": 1.88605824157495, - "learning_rate": 3.08459931415894e-07, - "loss": 0.8868, - "step": 10994 - }, - { - "epoch": 0.8263189538554035, - "grad_norm": 1.7036696096977162, - "learning_rate": 3.082002280479132e-07, - "loss": 0.9926, - "step": 10995 - }, - { - "epoch": 0.8263941079212386, - "grad_norm": 1.4345888431357214, - "learning_rate": 3.0794062492554716e-07, - "loss": 0.9877, - "step": 10996 - }, - { - "epoch": 0.8264692619870735, - "grad_norm": 2.022420738185413, - "learning_rate": 3.0768112206417885e-07, - "loss": 0.9559, - "step": 10997 - }, - { - "epoch": 0.8265444160529085, - "grad_norm": 1.9620005245344139, - "learning_rate": 3.074217194791844e-07, - "loss": 0.9814, - "step": 10998 - }, - { - "epoch": 0.8266195701187434, - "grad_norm": 1.8840133193369322, - "learning_rate": 3.071624171859344e-07, - "loss": 1.0354, - "step": 10999 - }, - { - "epoch": 0.8266947241845783, - "grad_norm": 2.097269658386592, - "learning_rate": 3.069032151997928e-07, - "loss": 0.887, - "step": 11000 - }, - { - "epoch": 0.8267698782504134, - "grad_norm": 1.625707325682226, - "learning_rate": 3.066441135361202e-07, - "loss": 0.9753, - "step": 11001 - }, - { - "epoch": 0.8268450323162483, - "grad_norm": 1.9932817034695816, - "learning_rate": 3.063851122102672e-07, - "loss": 1.0868, - "step": 11002 - }, - { - "epoch": 0.8269201863820833, - "grad_norm": 2.509322807937882, - "learning_rate": 3.0612621123758196e-07, - "loss": 0.9241, - "step": 11003 - }, - { - "epoch": 0.8269953404479182, - "grad_norm": 6.554155548661832, - "learning_rate": 3.0586741063340494e-07, - "loss": 0.8901, - "step": 11004 - }, - { - "epoch": 0.8270704945137531, - "grad_norm": 3.7736251500071005, - "learning_rate": 3.0560871041307137e-07, - "loss": 1.0436, - "step": 11005 - }, - { - "epoch": 0.8271456485795882, - "grad_norm": 1.3643909299290091, - "learning_rate": 3.0535011059190916e-07, - "loss": 0.9424, - "step": 11006 - }, - { - "epoch": 0.8272208026454231, - "grad_norm": 0.6478545544485951, - "learning_rate": 3.0509161118524283e-07, - "loss": 0.8171, - "step": 11007 - }, - { - "epoch": 0.8272959567112581, - "grad_norm": 1.6972760901441246, - "learning_rate": 3.0483321220838876e-07, - "loss": 0.993, - "step": 11008 - }, - { - "epoch": 0.827371110777093, - "grad_norm": 1.4882350323727167, - "learning_rate": 3.04574913676658e-07, - "loss": 0.9229, - "step": 11009 - }, - { - "epoch": 0.827446264842928, - "grad_norm": 1.6994978752201508, - "learning_rate": 3.0431671560535545e-07, - "loss": 0.9697, - "step": 11010 - }, - { - "epoch": 0.827521418908763, - "grad_norm": 4.858580104172629, - "learning_rate": 3.040586180097815e-07, - "loss": 0.9856, - "step": 11011 - }, - { - "epoch": 0.8275965729745979, - "grad_norm": 1.6140528720151266, - "learning_rate": 3.0380062090522796e-07, - "loss": 0.9675, - "step": 11012 - }, - { - "epoch": 0.8276717270404329, - "grad_norm": 1.701790873297121, - "learning_rate": 3.035427243069826e-07, - "loss": 0.9447, - "step": 11013 - }, - { - "epoch": 0.8277468811062678, - "grad_norm": 1.5867535976563258, - "learning_rate": 3.0328492823032756e-07, - "loss": 0.9452, - "step": 11014 - }, - { - "epoch": 0.8278220351721028, - "grad_norm": 1.527207078209126, - "learning_rate": 3.030272326905381e-07, - "loss": 0.9175, - "step": 11015 - }, - { - "epoch": 0.8278971892379378, - "grad_norm": 1.8004619455974502, - "learning_rate": 3.027696377028821e-07, - "loss": 0.9037, - "step": 11016 - }, - { - "epoch": 0.8279723433037728, - "grad_norm": 1.472579751696597, - "learning_rate": 3.025121432826245e-07, - "loss": 0.9526, - "step": 11017 - }, - { - "epoch": 0.8280474973696077, - "grad_norm": 1.672166674118374, - "learning_rate": 3.022547494450234e-07, - "loss": 0.8973, - "step": 11018 - }, - { - "epoch": 0.8281226514354426, - "grad_norm": 1.614471218359401, - "learning_rate": 3.019974562053285e-07, - "loss": 1.0139, - "step": 11019 - }, - { - "epoch": 0.8281978055012776, - "grad_norm": 1.4783331532928001, - "learning_rate": 3.017402635787869e-07, - "loss": 0.9629, - "step": 11020 - }, - { - "epoch": 0.8282729595671126, - "grad_norm": 1.8006585135182929, - "learning_rate": 3.0148317158063763e-07, - "loss": 0.9649, - "step": 11021 - }, - { - "epoch": 0.8283481136329476, - "grad_norm": 1.9660364251085267, - "learning_rate": 3.0122618022611467e-07, - "loss": 1.041, - "step": 11022 - }, - { - "epoch": 0.8284232676987825, - "grad_norm": 2.159858227774896, - "learning_rate": 3.00969289530445e-07, - "loss": 0.9661, - "step": 11023 - }, - { - "epoch": 0.8284984217646175, - "grad_norm": 3.621578575030793, - "learning_rate": 3.0071249950885145e-07, - "loss": 0.938, - "step": 11024 - }, - { - "epoch": 0.8285735758304524, - "grad_norm": 2.3198227285204904, - "learning_rate": 3.0045581017654933e-07, - "loss": 0.8317, - "step": 11025 - }, - { - "epoch": 0.8286487298962874, - "grad_norm": 7.965514719316784, - "learning_rate": 3.0019922154874853e-07, - "loss": 0.944, - "step": 11026 - }, - { - "epoch": 0.8287238839621224, - "grad_norm": 1.8130579122645525, - "learning_rate": 2.9994273364065235e-07, - "loss": 1.0225, - "step": 11027 - }, - { - "epoch": 0.8287990380279573, - "grad_norm": 1.81601459833823, - "learning_rate": 2.9968634646745995e-07, - "loss": 0.9702, - "step": 11028 - }, - { - "epoch": 0.8288741920937923, - "grad_norm": 1.4128457414259887, - "learning_rate": 2.9943006004436153e-07, - "loss": 0.8902, - "step": 11029 - }, - { - "epoch": 0.8289493461596272, - "grad_norm": 1.7400127573751427, - "learning_rate": 2.991738743865444e-07, - "loss": 0.968, - "step": 11030 - }, - { - "epoch": 0.8290245002254621, - "grad_norm": 1.6963810519683284, - "learning_rate": 2.9891778950918836e-07, - "loss": 1.0454, - "step": 11031 - }, - { - "epoch": 0.8290996542912972, - "grad_norm": 7.079478422877651, - "learning_rate": 2.98661805427467e-07, - "loss": 1.0287, - "step": 11032 - }, - { - "epoch": 0.8291748083571321, - "grad_norm": 1.8818297927985759, - "learning_rate": 2.98405922156548e-07, - "loss": 0.8813, - "step": 11033 - }, - { - "epoch": 0.8292499624229671, - "grad_norm": 1.45995954940883, - "learning_rate": 2.9815013971159395e-07, - "loss": 0.9969, - "step": 11034 - }, - { - "epoch": 0.829325116488802, - "grad_norm": 1.4508379915364593, - "learning_rate": 2.97894458107762e-07, - "loss": 0.839, - "step": 11035 - }, - { - "epoch": 0.829400270554637, - "grad_norm": 2.263484812158687, - "learning_rate": 2.9763887736020035e-07, - "loss": 0.8654, - "step": 11036 - }, - { - "epoch": 0.829475424620472, - "grad_norm": 1.712138412058768, - "learning_rate": 2.9738339748405426e-07, - "loss": 1.0618, - "step": 11037 - }, - { - "epoch": 0.8295505786863069, - "grad_norm": 3.5232503295716384, - "learning_rate": 2.9712801849446223e-07, - "loss": 0.8796, - "step": 11038 - }, - { - "epoch": 0.8296257327521419, - "grad_norm": 1.5281047827508558, - "learning_rate": 2.9687274040655477e-07, - "loss": 0.9592, - "step": 11039 - }, - { - "epoch": 0.8297008868179768, - "grad_norm": 1.8247815719948357, - "learning_rate": 2.966175632354593e-07, - "loss": 0.9409, - "step": 11040 - }, - { - "epoch": 0.8297760408838118, - "grad_norm": 1.7627983084305034, - "learning_rate": 2.963624869962962e-07, - "loss": 0.9639, - "step": 11041 - }, - { - "epoch": 0.8298511949496468, - "grad_norm": 2.0100008546058774, - "learning_rate": 2.9610751170417935e-07, - "loss": 0.9703, - "step": 11042 - }, - { - "epoch": 0.8299263490154818, - "grad_norm": 0.6701982334084652, - "learning_rate": 2.9585263737421717e-07, - "loss": 0.7368, - "step": 11043 - }, - { - "epoch": 0.8300015030813167, - "grad_norm": 1.4254965301921017, - "learning_rate": 2.955978640215115e-07, - "loss": 1.0214, - "step": 11044 - }, - { - "epoch": 0.8300766571471516, - "grad_norm": 1.5716119959529973, - "learning_rate": 2.9534319166115975e-07, - "loss": 1.0053, - "step": 11045 - }, - { - "epoch": 0.8301518112129866, - "grad_norm": 2.3435424712808737, - "learning_rate": 2.9508862030825075e-07, - "loss": 1.0861, - "step": 11046 - }, - { - "epoch": 0.8302269652788216, - "grad_norm": 6.263555855742632, - "learning_rate": 2.948341499778697e-07, - "loss": 0.9235, - "step": 11047 - }, - { - "epoch": 0.8303021193446566, - "grad_norm": 2.235858131562165, - "learning_rate": 2.9457978068509494e-07, - "loss": 0.9369, - "step": 11048 - }, - { - "epoch": 0.8303772734104915, - "grad_norm": 1.2996855006898516, - "learning_rate": 2.943255124449988e-07, - "loss": 1.0296, - "step": 11049 - }, - { - "epoch": 0.8304524274763264, - "grad_norm": 4.277331710171412, - "learning_rate": 2.940713452726469e-07, - "loss": 0.957, - "step": 11050 - }, - { - "epoch": 0.8305275815421614, - "grad_norm": 2.1795663013739595, - "learning_rate": 2.938172791831006e-07, - "loss": 1.0628, - "step": 11051 - }, - { - "epoch": 0.8306027356079964, - "grad_norm": 1.2837727529260758, - "learning_rate": 2.93563314191414e-07, - "loss": 0.9328, - "step": 11052 - }, - { - "epoch": 0.8306778896738314, - "grad_norm": 2.5210796477564985, - "learning_rate": 2.9330945031263545e-07, - "loss": 0.8992, - "step": 11053 - }, - { - "epoch": 0.8307530437396663, - "grad_norm": 1.7785630288311474, - "learning_rate": 2.9305568756180686e-07, - "loss": 0.8715, - "step": 11054 - }, - { - "epoch": 0.8308281978055013, - "grad_norm": 1.9029244353245285, - "learning_rate": 2.928020259539661e-07, - "loss": 0.9444, - "step": 11055 - }, - { - "epoch": 0.8309033518713362, - "grad_norm": 1.5426568210955829, - "learning_rate": 2.9254846550414146e-07, - "loss": 0.9537, - "step": 11056 - }, - { - "epoch": 0.8309785059371712, - "grad_norm": 1.9261647552500996, - "learning_rate": 2.922950062273586e-07, - "loss": 0.9353, - "step": 11057 - }, - { - "epoch": 0.8310536600030062, - "grad_norm": 2.0868547687309595, - "learning_rate": 2.9204164813863654e-07, - "loss": 0.9401, - "step": 11058 - }, - { - "epoch": 0.8311288140688411, - "grad_norm": 2.820797611688435, - "learning_rate": 2.917883912529873e-07, - "loss": 0.986, - "step": 11059 - }, - { - "epoch": 0.8312039681346761, - "grad_norm": 2.522781617518877, - "learning_rate": 2.9153523558541613e-07, - "loss": 0.9746, - "step": 11060 - }, - { - "epoch": 0.831279122200511, - "grad_norm": 1.385047485763181, - "learning_rate": 2.912821811509243e-07, - "loss": 0.8892, - "step": 11061 - }, - { - "epoch": 0.8313542762663461, - "grad_norm": 1.5362298138179187, - "learning_rate": 2.9102922796450745e-07, - "loss": 1.074, - "step": 11062 - }, - { - "epoch": 0.831429430332181, - "grad_norm": 1.5102331466555354, - "learning_rate": 2.9077637604115193e-07, - "loss": 1.0719, - "step": 11063 - }, - { - "epoch": 0.8315045843980159, - "grad_norm": 0.8105873925066983, - "learning_rate": 2.9052362539584164e-07, - "loss": 0.8756, - "step": 11064 - }, - { - "epoch": 0.8315797384638509, - "grad_norm": 1.6432943517589376, - "learning_rate": 2.9027097604355265e-07, - "loss": 0.9517, - "step": 11065 - }, - { - "epoch": 0.8316548925296858, - "grad_norm": 1.7467701261154485, - "learning_rate": 2.9001842799925526e-07, - "loss": 0.9384, - "step": 11066 - }, - { - "epoch": 0.8317300465955209, - "grad_norm": 1.668867454252884, - "learning_rate": 2.8976598127791364e-07, - "loss": 0.9663, - "step": 11067 - }, - { - "epoch": 0.8318052006613558, - "grad_norm": 1.5466309942593173, - "learning_rate": 2.8951363589448676e-07, - "loss": 1.0604, - "step": 11068 - }, - { - "epoch": 0.8318803547271908, - "grad_norm": 1.890417986168617, - "learning_rate": 2.8926139186392707e-07, - "loss": 0.8387, - "step": 11069 - }, - { - "epoch": 0.8319555087930257, - "grad_norm": 1.672146356255056, - "learning_rate": 2.8900924920118064e-07, - "loss": 0.9784, - "step": 11070 - }, - { - "epoch": 0.8320306628588606, - "grad_norm": 1.4958530162624926, - "learning_rate": 2.8875720792118754e-07, - "loss": 0.9974, - "step": 11071 - }, - { - "epoch": 0.8321058169246957, - "grad_norm": 1.7453031179368885, - "learning_rate": 2.8850526803888376e-07, - "loss": 0.9667, - "step": 11072 - }, - { - "epoch": 0.8321809709905306, - "grad_norm": 1.738056502653998, - "learning_rate": 2.882534295691954e-07, - "loss": 0.998, - "step": 11073 - }, - { - "epoch": 0.8322561250563656, - "grad_norm": 1.8503446799161551, - "learning_rate": 2.8800169252704675e-07, - "loss": 0.9379, - "step": 11074 - }, - { - "epoch": 0.8323312791222005, - "grad_norm": 2.194131227934635, - "learning_rate": 2.8775005692735344e-07, - "loss": 0.9377, - "step": 11075 - }, - { - "epoch": 0.8324064331880354, - "grad_norm": 1.358098625702946, - "learning_rate": 2.8749852278502573e-07, - "loss": 0.9739, - "step": 11076 - }, - { - "epoch": 0.8324815872538704, - "grad_norm": 2.05699556046648, - "learning_rate": 2.8724709011496795e-07, - "loss": 0.9923, - "step": 11077 - }, - { - "epoch": 0.8325567413197054, - "grad_norm": 2.396329170554902, - "learning_rate": 2.8699575893207837e-07, - "loss": 0.9482, - "step": 11078 - }, - { - "epoch": 0.8326318953855404, - "grad_norm": 1.8924452791151427, - "learning_rate": 2.867445292512507e-07, - "loss": 0.9305, - "step": 11079 - }, - { - "epoch": 0.8327070494513753, - "grad_norm": 1.674656203558201, - "learning_rate": 2.864934010873692e-07, - "loss": 0.9879, - "step": 11080 - }, - { - "epoch": 0.8327822035172103, - "grad_norm": 1.9636311059577953, - "learning_rate": 2.862423744553157e-07, - "loss": 0.9311, - "step": 11081 - }, - { - "epoch": 0.8328573575830452, - "grad_norm": 1.759234450910817, - "learning_rate": 2.8599144936996424e-07, - "loss": 0.95, - "step": 11082 - }, - { - "epoch": 0.8329325116488802, - "grad_norm": 1.5978798558232186, - "learning_rate": 2.8574062584618206e-07, - "loss": 1.041, - "step": 11083 - }, - { - "epoch": 0.8330076657147152, - "grad_norm": 1.35331700396591, - "learning_rate": 2.854899038988319e-07, - "loss": 0.9743, - "step": 11084 - }, - { - "epoch": 0.8330828197805501, - "grad_norm": 2.517926413250869, - "learning_rate": 2.8523928354277085e-07, - "loss": 0.9238, - "step": 11085 - }, - { - "epoch": 0.8331579738463851, - "grad_norm": 2.4085255551652414, - "learning_rate": 2.849887647928484e-07, - "loss": 0.9862, - "step": 11086 - }, - { - "epoch": 0.83323312791222, - "grad_norm": 1.844661745838035, - "learning_rate": 2.847383476639089e-07, - "loss": 0.9074, - "step": 11087 - }, - { - "epoch": 0.8333082819780551, - "grad_norm": 1.6806542741489363, - "learning_rate": 2.844880321707901e-07, - "loss": 0.9981, - "step": 11088 - }, - { - "epoch": 0.83338343604389, - "grad_norm": 0.7462786300636832, - "learning_rate": 2.842378183283254e-07, - "loss": 0.8289, - "step": 11089 - }, - { - "epoch": 0.8334585901097249, - "grad_norm": 1.2833347107933546, - "learning_rate": 2.8398770615133915e-07, - "loss": 0.933, - "step": 11090 - }, - { - "epoch": 0.8335337441755599, - "grad_norm": 1.7194661815764103, - "learning_rate": 2.837376956546527e-07, - "loss": 0.9836, - "step": 11091 - }, - { - "epoch": 0.8336088982413948, - "grad_norm": 1.4550442890099073, - "learning_rate": 2.8348778685307983e-07, - "loss": 0.9272, - "step": 11092 - }, - { - "epoch": 0.8336840523072299, - "grad_norm": 2.1374398920185307, - "learning_rate": 2.832379797614286e-07, - "loss": 0.9029, - "step": 11093 - }, - { - "epoch": 0.8337592063730648, - "grad_norm": 2.034551197396886, - "learning_rate": 2.829882743945007e-07, - "loss": 0.9507, - "step": 11094 - }, - { - "epoch": 0.8338343604388997, - "grad_norm": 2.811707109372611, - "learning_rate": 2.8273867076709225e-07, - "loss": 1.0155, - "step": 11095 - }, - { - "epoch": 0.8339095145047347, - "grad_norm": 1.4799096654696324, - "learning_rate": 2.8248916889399434e-07, - "loss": 0.9785, - "step": 11096 - }, - { - "epoch": 0.8339846685705696, - "grad_norm": 2.3642488374661847, - "learning_rate": 2.822397687899893e-07, - "loss": 0.9269, - "step": 11097 - }, - { - "epoch": 0.8340598226364047, - "grad_norm": 1.6910665051936287, - "learning_rate": 2.819904704698555e-07, - "loss": 0.9996, - "step": 11098 - }, - { - "epoch": 0.8341349767022396, - "grad_norm": 2.3568958115853786, - "learning_rate": 2.8174127394836577e-07, - "loss": 1.0625, - "step": 11099 - }, - { - "epoch": 0.8342101307680746, - "grad_norm": 1.893410136630626, - "learning_rate": 2.8149217924028443e-07, - "loss": 0.9629, - "step": 11100 - }, - { - "epoch": 0.8342852848339095, - "grad_norm": 2.2376391703197083, - "learning_rate": 2.8124318636037193e-07, - "loss": 0.9416, - "step": 11101 - }, - { - "epoch": 0.8343604388997444, - "grad_norm": 0.7866470076911706, - "learning_rate": 2.809942953233828e-07, - "loss": 0.87, - "step": 11102 - }, - { - "epoch": 0.8344355929655795, - "grad_norm": 2.1321808155530344, - "learning_rate": 2.8074550614406424e-07, - "loss": 0.9783, - "step": 11103 - }, - { - "epoch": 0.8345107470314144, - "grad_norm": 1.934240799348388, - "learning_rate": 2.804968188371577e-07, - "loss": 0.9381, - "step": 11104 - }, - { - "epoch": 0.8345859010972494, - "grad_norm": 2.4416134677685513, - "learning_rate": 2.8024823341739876e-07, - "loss": 1.0401, - "step": 11105 - }, - { - "epoch": 0.8346610551630843, - "grad_norm": 2.5213687317654787, - "learning_rate": 2.7999974989951813e-07, - "loss": 0.9886, - "step": 11106 - }, - { - "epoch": 0.8347362092289193, - "grad_norm": 2.1686854372116144, - "learning_rate": 2.7975136829823775e-07, - "loss": 0.8682, - "step": 11107 - }, - { - "epoch": 0.8348113632947542, - "grad_norm": 2.3335185204541946, - "learning_rate": 2.7950308862827675e-07, - "loss": 0.942, - "step": 11108 - }, - { - "epoch": 0.8348865173605892, - "grad_norm": 1.8896331076776438, - "learning_rate": 2.7925491090434583e-07, - "loss": 1.0756, - "step": 11109 - }, - { - "epoch": 0.8349616714264242, - "grad_norm": 2.214392560587222, - "learning_rate": 2.7900683514115054e-07, - "loss": 0.9056, - "step": 11110 - }, - { - "epoch": 0.8350368254922591, - "grad_norm": 1.685908591488391, - "learning_rate": 2.7875886135339e-07, - "loss": 0.9595, - "step": 11111 - }, - { - "epoch": 0.8351119795580941, - "grad_norm": 1.8005829079185924, - "learning_rate": 2.7851098955575845e-07, - "loss": 1.0549, - "step": 11112 - }, - { - "epoch": 0.835187133623929, - "grad_norm": 1.9652474142304333, - "learning_rate": 2.782632197629426e-07, - "loss": 0.9181, - "step": 11113 - }, - { - "epoch": 0.8352622876897641, - "grad_norm": 1.7810216389219153, - "learning_rate": 2.7801555198962433e-07, - "loss": 0.9694, - "step": 11114 - }, - { - "epoch": 0.835337441755599, - "grad_norm": 1.9361320711871564, - "learning_rate": 2.7776798625047784e-07, - "loss": 0.9883, - "step": 11115 - }, - { - "epoch": 0.8354125958214339, - "grad_norm": 1.631524412963292, - "learning_rate": 2.77520522560174e-07, - "loss": 1.0349, - "step": 11116 - }, - { - "epoch": 0.8354877498872689, - "grad_norm": 2.396873436321758, - "learning_rate": 2.7727316093337406e-07, - "loss": 1.0142, - "step": 11117 - }, - { - "epoch": 0.8355629039531038, - "grad_norm": 1.9874611832768831, - "learning_rate": 2.770259013847365e-07, - "loss": 0.9939, - "step": 11118 - }, - { - "epoch": 0.8356380580189389, - "grad_norm": 0.8212246863936187, - "learning_rate": 2.76778743928912e-07, - "loss": 0.8467, - "step": 11119 - }, - { - "epoch": 0.8357132120847738, - "grad_norm": 1.6252125821080257, - "learning_rate": 2.765316885805458e-07, - "loss": 0.989, - "step": 11120 - }, - { - "epoch": 0.8357883661506087, - "grad_norm": 1.9018836120952036, - "learning_rate": 2.762847353542759e-07, - "loss": 0.973, - "step": 11121 - }, - { - "epoch": 0.8358635202164437, - "grad_norm": 1.6836491197300938, - "learning_rate": 2.7603788426473615e-07, - "loss": 0.9642, - "step": 11122 - }, - { - "epoch": 0.8359386742822786, - "grad_norm": 1.596235020865207, - "learning_rate": 2.7579113532655407e-07, - "loss": 0.9609, - "step": 11123 - }, - { - "epoch": 0.8360138283481137, - "grad_norm": 2.045417324844952, - "learning_rate": 2.755444885543488e-07, - "loss": 0.9364, - "step": 11124 - }, - { - "epoch": 0.8360889824139486, - "grad_norm": 1.8149218328578658, - "learning_rate": 2.752979439627363e-07, - "loss": 0.9834, - "step": 11125 - }, - { - "epoch": 0.8361641364797836, - "grad_norm": 1.8263319302352934, - "learning_rate": 2.750515015663251e-07, - "loss": 0.9816, - "step": 11126 - }, - { - "epoch": 0.8362392905456185, - "grad_norm": 6.352904981414476, - "learning_rate": 2.7480516137971776e-07, - "loss": 0.9977, - "step": 11127 - }, - { - "epoch": 0.8363144446114534, - "grad_norm": 1.8846046497602162, - "learning_rate": 2.745589234175103e-07, - "loss": 0.9876, - "step": 11128 - }, - { - "epoch": 0.8363895986772885, - "grad_norm": 1.8478038640161587, - "learning_rate": 2.7431278769429414e-07, - "loss": 0.977, - "step": 11129 - }, - { - "epoch": 0.8364647527431234, - "grad_norm": 2.0312294592185576, - "learning_rate": 2.740667542246535e-07, - "loss": 1.0201, - "step": 11130 - }, - { - "epoch": 0.8365399068089584, - "grad_norm": 2.734190754813884, - "learning_rate": 2.738208230231667e-07, - "loss": 1.0916, - "step": 11131 - }, - { - "epoch": 0.8366150608747933, - "grad_norm": 1.381495435839033, - "learning_rate": 2.7357499410440564e-07, - "loss": 0.9814, - "step": 11132 - }, - { - "epoch": 0.8366902149406283, - "grad_norm": 3.5173128272647727, - "learning_rate": 2.7332926748293797e-07, - "loss": 0.9223, - "step": 11133 - }, - { - "epoch": 0.8367653690064633, - "grad_norm": 2.440805394811679, - "learning_rate": 2.730836431733221e-07, - "loss": 0.8741, - "step": 11134 - }, - { - "epoch": 0.8368405230722982, - "grad_norm": 1.708493023673649, - "learning_rate": 2.7283812119011386e-07, - "loss": 0.8994, - "step": 11135 - }, - { - "epoch": 0.8369156771381332, - "grad_norm": 1.7336279144977746, - "learning_rate": 2.7259270154786063e-07, - "loss": 0.8689, - "step": 11136 - }, - { - "epoch": 0.8369908312039681, - "grad_norm": 2.070471300476636, - "learning_rate": 2.723473842611044e-07, - "loss": 0.9551, - "step": 11137 - }, - { - "epoch": 0.8370659852698031, - "grad_norm": 2.8546702550942418, - "learning_rate": 2.721021693443808e-07, - "loss": 0.8899, - "step": 11138 - }, - { - "epoch": 0.837141139335638, - "grad_norm": 7.204279411967801, - "learning_rate": 2.718570568122203e-07, - "loss": 0.9502, - "step": 11139 - }, - { - "epoch": 0.837216293401473, - "grad_norm": 1.7257417825422803, - "learning_rate": 2.716120466791476e-07, - "loss": 1.0079, - "step": 11140 - }, - { - "epoch": 0.837291447467308, - "grad_norm": 1.52419915355323, - "learning_rate": 2.71367138959679e-07, - "loss": 1.0228, - "step": 11141 - }, - { - "epoch": 0.8373666015331429, - "grad_norm": 1.9277536068529975, - "learning_rate": 2.7112233366832657e-07, - "loss": 0.8489, - "step": 11142 - }, - { - "epoch": 0.8374417555989779, - "grad_norm": 1.6851236070522904, - "learning_rate": 2.70877630819597e-07, - "loss": 1.0345, - "step": 11143 - }, - { - "epoch": 0.8375169096648128, - "grad_norm": 2.085942414810163, - "learning_rate": 2.7063303042798803e-07, - "loss": 0.9906, - "step": 11144 - }, - { - "epoch": 0.8375920637306479, - "grad_norm": 1.296684434976576, - "learning_rate": 2.703885325079944e-07, - "loss": 1.0046, - "step": 11145 - }, - { - "epoch": 0.8376672177964828, - "grad_norm": 1.8849384129832323, - "learning_rate": 2.7014413707410356e-07, - "loss": 0.9115, - "step": 11146 - }, - { - "epoch": 0.8377423718623177, - "grad_norm": 1.4478159075836254, - "learning_rate": 2.698998441407969e-07, - "loss": 0.9644, - "step": 11147 - }, - { - "epoch": 0.8378175259281527, - "grad_norm": 1.6473440729195372, - "learning_rate": 2.696556537225492e-07, - "loss": 1.0226, - "step": 11148 - }, - { - "epoch": 0.8378926799939876, - "grad_norm": 1.7593346680918829, - "learning_rate": 2.6941156583382965e-07, - "loss": 1.0384, - "step": 11149 - }, - { - "epoch": 0.8379678340598227, - "grad_norm": 2.574872178543984, - "learning_rate": 2.691675804891027e-07, - "loss": 0.999, - "step": 11150 - }, - { - "epoch": 0.8380429881256576, - "grad_norm": 1.9281255292350747, - "learning_rate": 2.6892369770282333e-07, - "loss": 1.0695, - "step": 11151 - }, - { - "epoch": 0.8381181421914926, - "grad_norm": 1.9176788421744744, - "learning_rate": 2.686799174894441e-07, - "loss": 1.0426, - "step": 11152 - }, - { - "epoch": 0.8381932962573275, - "grad_norm": 1.8107541324705785, - "learning_rate": 2.684362398634095e-07, - "loss": 0.9875, - "step": 11153 - }, - { - "epoch": 0.8382684503231624, - "grad_norm": 1.9653062522571725, - "learning_rate": 2.6819266483915813e-07, - "loss": 0.9368, - "step": 11154 - }, - { - "epoch": 0.8383436043889975, - "grad_norm": 2.434003816838277, - "learning_rate": 2.6794919243112256e-07, - "loss": 0.9067, - "step": 11155 - }, - { - "epoch": 0.8384187584548324, - "grad_norm": 5.361286322981389, - "learning_rate": 2.6770582265373033e-07, - "loss": 0.8324, - "step": 11156 - }, - { - "epoch": 0.8384939125206674, - "grad_norm": 1.505635800320917, - "learning_rate": 2.674625555214014e-07, - "loss": 1.0459, - "step": 11157 - }, - { - "epoch": 0.8385690665865023, - "grad_norm": 2.989797568980048, - "learning_rate": 2.672193910485505e-07, - "loss": 0.9141, - "step": 11158 - }, - { - "epoch": 0.8386442206523373, - "grad_norm": 1.688136298697213, - "learning_rate": 2.6697632924958524e-07, - "loss": 0.8915, - "step": 11159 - }, - { - "epoch": 0.8387193747181723, - "grad_norm": 1.3972151118216185, - "learning_rate": 2.6673337013890986e-07, - "loss": 0.9818, - "step": 11160 - }, - { - "epoch": 0.8387945287840072, - "grad_norm": 1.560366891118849, - "learning_rate": 2.6649051373091834e-07, - "loss": 0.9787, - "step": 11161 - }, - { - "epoch": 0.8388696828498422, - "grad_norm": 0.8154070274265405, - "learning_rate": 2.6624776004000194e-07, - "loss": 0.8628, - "step": 11162 - }, - { - "epoch": 0.8389448369156771, - "grad_norm": 1.8169887470113146, - "learning_rate": 2.660051090805453e-07, - "loss": 1.0463, - "step": 11163 - }, - { - "epoch": 0.8390199909815121, - "grad_norm": 3.375860740257196, - "learning_rate": 2.657625608669263e-07, - "loss": 0.9635, - "step": 11164 - }, - { - "epoch": 0.8390951450473471, - "grad_norm": 1.898881271676389, - "learning_rate": 2.655201154135154e-07, - "loss": 0.9248, - "step": 11165 - }, - { - "epoch": 0.839170299113182, - "grad_norm": 1.8054437415232694, - "learning_rate": 2.6527777273467934e-07, - "loss": 0.9158, - "step": 11166 - }, - { - "epoch": 0.839245453179017, - "grad_norm": 2.1715220432133258, - "learning_rate": 2.650355328447791e-07, - "loss": 0.8866, - "step": 11167 - }, - { - "epoch": 0.8393206072448519, - "grad_norm": 1.5830390639988523, - "learning_rate": 2.6479339575816607e-07, - "loss": 0.908, - "step": 11168 - }, - { - "epoch": 0.8393957613106869, - "grad_norm": 1.9162007124821157, - "learning_rate": 2.6455136148918946e-07, - "loss": 1.0343, - "step": 11169 - }, - { - "epoch": 0.8394709153765219, - "grad_norm": 2.191018462931617, - "learning_rate": 2.6430943005219e-07, - "loss": 0.9862, - "step": 11170 - }, - { - "epoch": 0.8395460694423569, - "grad_norm": 1.5384559016858055, - "learning_rate": 2.640676014615033e-07, - "loss": 0.9987, - "step": 11171 - }, - { - "epoch": 0.8396212235081918, - "grad_norm": 1.789377642865083, - "learning_rate": 2.638258757314582e-07, - "loss": 1.0397, - "step": 11172 - }, - { - "epoch": 0.8396963775740267, - "grad_norm": 1.8933962223976322, - "learning_rate": 2.635842528763785e-07, - "loss": 1.0329, - "step": 11173 - }, - { - "epoch": 0.8397715316398617, - "grad_norm": 1.9445117696580978, - "learning_rate": 2.6334273291058105e-07, - "loss": 0.8907, - "step": 11174 - }, - { - "epoch": 0.8398466857056966, - "grad_norm": 2.7903177978656726, - "learning_rate": 2.631013158483768e-07, - "loss": 0.9623, - "step": 11175 - }, - { - "epoch": 0.8399218397715317, - "grad_norm": 2.400227925286004, - "learning_rate": 2.628600017040703e-07, - "loss": 1.0382, - "step": 11176 - }, - { - "epoch": 0.8399969938373666, - "grad_norm": 2.4766887067051147, - "learning_rate": 2.6261879049196125e-07, - "loss": 0.963, - "step": 11177 - }, - { - "epoch": 0.8400721479032016, - "grad_norm": 1.6194359817123818, - "learning_rate": 2.6237768222634103e-07, - "loss": 0.9111, - "step": 11178 - }, - { - "epoch": 0.8401473019690365, - "grad_norm": 1.8533279436476757, - "learning_rate": 2.621366769214974e-07, - "loss": 0.9362, - "step": 11179 - }, - { - "epoch": 0.8402224560348714, - "grad_norm": 1.7534544093000464, - "learning_rate": 2.6189577459171033e-07, - "loss": 0.9502, - "step": 11180 - }, - { - "epoch": 0.8402976101007065, - "grad_norm": 2.056360760056583, - "learning_rate": 2.6165497525125423e-07, - "loss": 1.0086, - "step": 11181 - }, - { - "epoch": 0.8403727641665414, - "grad_norm": 4.13762374646777, - "learning_rate": 2.614142789143972e-07, - "loss": 0.8474, - "step": 11182 - }, - { - "epoch": 0.8404479182323764, - "grad_norm": 1.3519210339881496, - "learning_rate": 2.611736855954014e-07, - "loss": 0.9695, - "step": 11183 - }, - { - "epoch": 0.8405230722982113, - "grad_norm": 2.301677950095497, - "learning_rate": 2.6093319530852407e-07, - "loss": 1.01, - "step": 11184 - }, - { - "epoch": 0.8405982263640462, - "grad_norm": 1.702749228363467, - "learning_rate": 2.606928080680133e-07, - "loss": 0.9609, - "step": 11185 - }, - { - "epoch": 0.8406733804298813, - "grad_norm": 3.047445547041426, - "learning_rate": 2.6045252388811434e-07, - "loss": 0.9788, - "step": 11186 - }, - { - "epoch": 0.8407485344957162, - "grad_norm": 2.4793981449401814, - "learning_rate": 2.602123427830651e-07, - "loss": 0.9898, - "step": 11187 - }, - { - "epoch": 0.8408236885615512, - "grad_norm": 1.532063326580077, - "learning_rate": 2.5997226476709567e-07, - "loss": 0.9316, - "step": 11188 - }, - { - "epoch": 0.8408988426273861, - "grad_norm": 3.576659856281984, - "learning_rate": 2.5973228985443274e-07, - "loss": 0.9919, - "step": 11189 - }, - { - "epoch": 0.8409739966932211, - "grad_norm": 1.6574770862267076, - "learning_rate": 2.594924180592957e-07, - "loss": 1.031, - "step": 11190 - }, - { - "epoch": 0.8410491507590561, - "grad_norm": 2.018977961940948, - "learning_rate": 2.59252649395898e-07, - "loss": 0.8919, - "step": 11191 - }, - { - "epoch": 0.841124304824891, - "grad_norm": 1.6343205984481635, - "learning_rate": 2.590129838784465e-07, - "loss": 1.059, - "step": 11192 - }, - { - "epoch": 0.841199458890726, - "grad_norm": 7.068554738768789, - "learning_rate": 2.587734215211419e-07, - "loss": 0.979, - "step": 11193 - }, - { - "epoch": 0.8412746129565609, - "grad_norm": 1.9050225230009405, - "learning_rate": 2.585339623381806e-07, - "loss": 1.0811, - "step": 11194 - }, - { - "epoch": 0.841349767022396, - "grad_norm": 2.176229050747448, - "learning_rate": 2.582946063437497e-07, - "loss": 0.9823, - "step": 11195 - }, - { - "epoch": 0.8414249210882309, - "grad_norm": 1.95359058171485, - "learning_rate": 2.5805535355203313e-07, - "loss": 0.9892, - "step": 11196 - }, - { - "epoch": 0.8415000751540659, - "grad_norm": 1.7064756977755027, - "learning_rate": 2.5781620397720715e-07, - "loss": 0.898, - "step": 11197 - }, - { - "epoch": 0.8415752292199008, - "grad_norm": 1.370246081770661, - "learning_rate": 2.575771576334427e-07, - "loss": 0.9672, - "step": 11198 - }, - { - "epoch": 0.8416503832857357, - "grad_norm": 1.4608164038423388, - "learning_rate": 2.5733821453490303e-07, - "loss": 0.9664, - "step": 11199 - }, - { - "epoch": 0.8417255373515707, - "grad_norm": 1.5550345028793617, - "learning_rate": 2.5709937469574794e-07, - "loss": 0.9768, - "step": 11200 - }, - { - "epoch": 0.8418006914174057, - "grad_norm": 0.6363577023839478, - "learning_rate": 2.568606381301288e-07, - "loss": 0.8018, - "step": 11201 - }, - { - "epoch": 0.8418758454832407, - "grad_norm": 2.278800171484463, - "learning_rate": 2.566220048521919e-07, - "loss": 0.9491, - "step": 11202 - }, - { - "epoch": 0.8419509995490756, - "grad_norm": 2.0785637872711837, - "learning_rate": 2.5638347487607646e-07, - "loss": 1.0344, - "step": 11203 - }, - { - "epoch": 0.8420261536149106, - "grad_norm": 0.7378703189800246, - "learning_rate": 2.5614504821591776e-07, - "loss": 0.8492, - "step": 11204 - }, - { - "epoch": 0.8421013076807455, - "grad_norm": 1.6515407910900655, - "learning_rate": 2.5590672488584177e-07, - "loss": 0.895, - "step": 11205 - }, - { - "epoch": 0.8421764617465805, - "grad_norm": 1.7956228786393569, - "learning_rate": 2.5566850489997096e-07, - "loss": 1.0174, - "step": 11206 - }, - { - "epoch": 0.8422516158124155, - "grad_norm": 1.6509689491535982, - "learning_rate": 2.5543038827242113e-07, - "loss": 0.935, - "step": 11207 - }, - { - "epoch": 0.8423267698782504, - "grad_norm": 0.7716856488707465, - "learning_rate": 2.5519237501730174e-07, - "loss": 0.7887, - "step": 11208 - }, - { - "epoch": 0.8424019239440854, - "grad_norm": 1.5608612622343625, - "learning_rate": 2.5495446514871457e-07, - "loss": 0.8889, - "step": 11209 - }, - { - "epoch": 0.8424770780099203, - "grad_norm": 1.3045844762637309, - "learning_rate": 2.547166586807574e-07, - "loss": 0.9055, - "step": 11210 - }, - { - "epoch": 0.8425522320757552, - "grad_norm": 1.7944036322966372, - "learning_rate": 2.544789556275222e-07, - "loss": 0.9879, - "step": 11211 - }, - { - "epoch": 0.8426273861415903, - "grad_norm": 1.6794246734098666, - "learning_rate": 2.542413560030923e-07, - "loss": 0.98, - "step": 11212 - }, - { - "epoch": 0.8427025402074252, - "grad_norm": 1.607013043201737, - "learning_rate": 2.5400385982154726e-07, - "loss": 1.035, - "step": 11213 - }, - { - "epoch": 0.8427776942732602, - "grad_norm": 2.0728274634511443, - "learning_rate": 2.5376646709695925e-07, - "loss": 0.8651, - "step": 11214 - }, - { - "epoch": 0.8428528483390951, - "grad_norm": 2.1528715120096926, - "learning_rate": 2.53529177843395e-07, - "loss": 0.9103, - "step": 11215 - }, - { - "epoch": 0.8429280024049302, - "grad_norm": 2.2460481058541544, - "learning_rate": 2.532919920749144e-07, - "loss": 0.8936, - "step": 11216 - }, - { - "epoch": 0.8430031564707651, - "grad_norm": 1.8496535879004896, - "learning_rate": 2.530549098055721e-07, - "loss": 0.8862, - "step": 11217 - }, - { - "epoch": 0.8430783105366, - "grad_norm": 1.8648786115643992, - "learning_rate": 2.528179310494158e-07, - "loss": 0.9511, - "step": 11218 - }, - { - "epoch": 0.843153464602435, - "grad_norm": 1.5261569277966531, - "learning_rate": 2.5258105582048775e-07, - "loss": 0.9037, - "step": 11219 - }, - { - "epoch": 0.8432286186682699, - "grad_norm": 2.3495932409631535, - "learning_rate": 2.5234428413282273e-07, - "loss": 1.0452, - "step": 11220 - }, - { - "epoch": 0.843303772734105, - "grad_norm": 1.8034378943858478, - "learning_rate": 2.521076160004523e-07, - "loss": 0.9761, - "step": 11221 - }, - { - "epoch": 0.8433789267999399, - "grad_norm": 1.8309265580170195, - "learning_rate": 2.5187105143739764e-07, - "loss": 0.954, - "step": 11222 - }, - { - "epoch": 0.8434540808657749, - "grad_norm": 2.2434347584749257, - "learning_rate": 2.5163459045767754e-07, - "loss": 0.8668, - "step": 11223 - }, - { - "epoch": 0.8435292349316098, - "grad_norm": 2.550085933957687, - "learning_rate": 2.5139823307530307e-07, - "loss": 0.9291, - "step": 11224 - }, - { - "epoch": 0.8436043889974447, - "grad_norm": 1.7140859958703978, - "learning_rate": 2.511619793042792e-07, - "loss": 0.9966, - "step": 11225 - }, - { - "epoch": 0.8436795430632797, - "grad_norm": 2.1604706740610444, - "learning_rate": 2.5092582915860427e-07, - "loss": 0.9395, - "step": 11226 - }, - { - "epoch": 0.8437546971291147, - "grad_norm": 1.7802817406724911, - "learning_rate": 2.5068978265227157e-07, - "loss": 1.0501, - "step": 11227 - }, - { - "epoch": 0.8438298511949497, - "grad_norm": 0.6024202696489809, - "learning_rate": 2.5045383979926884e-07, - "loss": 0.8204, - "step": 11228 - }, - { - "epoch": 0.8439050052607846, - "grad_norm": 3.3481239873426345, - "learning_rate": 2.5021800061357454e-07, - "loss": 0.9825, - "step": 11229 - }, - { - "epoch": 0.8439801593266195, - "grad_norm": 1.9426349152165363, - "learning_rate": 2.499822651091645e-07, - "loss": 0.8584, - "step": 11230 - }, - { - "epoch": 0.8440553133924545, - "grad_norm": 1.6687987359748946, - "learning_rate": 2.497466333000071e-07, - "loss": 0.8982, - "step": 11231 - }, - { - "epoch": 0.8441304674582895, - "grad_norm": 1.9661398039129576, - "learning_rate": 2.4951110520006294e-07, - "loss": 1.0378, - "step": 11232 - }, - { - "epoch": 0.8442056215241245, - "grad_norm": 2.4441001738763957, - "learning_rate": 2.492756808232888e-07, - "loss": 0.9057, - "step": 11233 - }, - { - "epoch": 0.8442807755899594, - "grad_norm": 1.4050868840351554, - "learning_rate": 2.4904036018363483e-07, - "loss": 0.8629, - "step": 11234 - }, - { - "epoch": 0.8443559296557944, - "grad_norm": 1.4417282613190614, - "learning_rate": 2.4880514329504463e-07, - "loss": 0.9545, - "step": 11235 - }, - { - "epoch": 0.8444310837216293, - "grad_norm": 2.275411685960738, - "learning_rate": 2.4857003017145526e-07, - "loss": 0.9841, - "step": 11236 - }, - { - "epoch": 0.8445062377874643, - "grad_norm": 1.6959663220246723, - "learning_rate": 2.48335020826798e-07, - "loss": 1.0414, - "step": 11237 - }, - { - "epoch": 0.8445813918532993, - "grad_norm": 2.0703872379272252, - "learning_rate": 2.481001152749993e-07, - "loss": 0.9051, - "step": 11238 - }, - { - "epoch": 0.8446565459191342, - "grad_norm": 1.5120616592923966, - "learning_rate": 2.478653135299762e-07, - "loss": 0.9399, - "step": 11239 - }, - { - "epoch": 0.8447316999849692, - "grad_norm": 2.056794105591965, - "learning_rate": 2.476306156056431e-07, - "loss": 1.0135, - "step": 11240 - }, - { - "epoch": 0.8448068540508041, - "grad_norm": 0.7355845210266494, - "learning_rate": 2.4739602151590635e-07, - "loss": 0.8569, - "step": 11241 - }, - { - "epoch": 0.8448820081166392, - "grad_norm": 1.6615631963309765, - "learning_rate": 2.471615312746664e-07, - "loss": 1.0479, - "step": 11242 - }, - { - "epoch": 0.8449571621824741, - "grad_norm": 2.3355109294161136, - "learning_rate": 2.4692714489581746e-07, - "loss": 0.9586, - "step": 11243 - }, - { - "epoch": 0.845032316248309, - "grad_norm": 0.7368862556329112, - "learning_rate": 2.466928623932485e-07, - "loss": 0.84, - "step": 11244 - }, - { - "epoch": 0.845107470314144, - "grad_norm": 2.7137267964204415, - "learning_rate": 2.4645868378084133e-07, - "loss": 0.9087, - "step": 11245 - }, - { - "epoch": 0.8451826243799789, - "grad_norm": 1.7718042591647052, - "learning_rate": 2.462246090724718e-07, - "loss": 0.9204, - "step": 11246 - }, - { - "epoch": 0.845257778445814, - "grad_norm": 1.781651769468132, - "learning_rate": 2.459906382820096e-07, - "loss": 1.0012, - "step": 11247 - }, - { - "epoch": 0.8453329325116489, - "grad_norm": 1.831799848375711, - "learning_rate": 2.457567714233193e-07, - "loss": 1.0213, - "step": 11248 - }, - { - "epoch": 0.8454080865774839, - "grad_norm": 1.5328255935716326, - "learning_rate": 2.455230085102571e-07, - "loss": 0.9422, - "step": 11249 - }, - { - "epoch": 0.8454832406433188, - "grad_norm": 1.4608735259221568, - "learning_rate": 2.452893495566748e-07, - "loss": 1.08, - "step": 11250 - }, - { - "epoch": 0.8455583947091537, - "grad_norm": 1.5287391161901605, - "learning_rate": 2.4505579457641824e-07, - "loss": 0.9702, - "step": 11251 - }, - { - "epoch": 0.8456335487749888, - "grad_norm": 2.1355058480845632, - "learning_rate": 2.448223435833261e-07, - "loss": 1.0994, - "step": 11252 - }, - { - "epoch": 0.8457087028408237, - "grad_norm": 1.7734346767856795, - "learning_rate": 2.445889965912311e-07, - "loss": 0.938, - "step": 11253 - }, - { - "epoch": 0.8457838569066587, - "grad_norm": 1.7124543455400736, - "learning_rate": 2.4435575361395934e-07, - "loss": 1.0756, - "step": 11254 - }, - { - "epoch": 0.8458590109724936, - "grad_norm": 1.8345834486897288, - "learning_rate": 2.44122614665333e-07, - "loss": 0.9967, - "step": 11255 - }, - { - "epoch": 0.8459341650383285, - "grad_norm": 1.8546156678915957, - "learning_rate": 2.4388957975916466e-07, - "loss": 0.8858, - "step": 11256 - }, - { - "epoch": 0.8460093191041635, - "grad_norm": 2.042544377380657, - "learning_rate": 2.436566489092635e-07, - "loss": 0.9697, - "step": 11257 - }, - { - "epoch": 0.8460844731699985, - "grad_norm": 2.707289822381979, - "learning_rate": 2.434238221294316e-07, - "loss": 0.7759, - "step": 11258 - }, - { - "epoch": 0.8461596272358335, - "grad_norm": 1.4192041317259243, - "learning_rate": 2.4319109943346473e-07, - "loss": 1.0206, - "step": 11259 - }, - { - "epoch": 0.8462347813016684, - "grad_norm": 1.362169528395438, - "learning_rate": 2.429584808351517e-07, - "loss": 0.926, - "step": 11260 - }, - { - "epoch": 0.8463099353675034, - "grad_norm": 3.761689341460849, - "learning_rate": 2.427259663482775e-07, - "loss": 0.9408, - "step": 11261 - }, - { - "epoch": 0.8463850894333383, - "grad_norm": 1.8438520241651233, - "learning_rate": 2.424935559866188e-07, - "loss": 1.0148, - "step": 11262 - }, - { - "epoch": 0.8464602434991733, - "grad_norm": 2.374083091020887, - "learning_rate": 2.422612497639469e-07, - "loss": 0.958, - "step": 11263 - }, - { - "epoch": 0.8465353975650083, - "grad_norm": 2.549031380335635, - "learning_rate": 2.4202904769402633e-07, - "loss": 0.9747, - "step": 11264 - }, - { - "epoch": 0.8466105516308432, - "grad_norm": 1.5885546657676601, - "learning_rate": 2.4179694979061717e-07, - "loss": 0.7789, - "step": 11265 - }, - { - "epoch": 0.8466857056966782, - "grad_norm": 1.643248649114915, - "learning_rate": 2.4156495606747065e-07, - "loss": 0.9484, - "step": 11266 - }, - { - "epoch": 0.8467608597625131, - "grad_norm": 0.6995852271874766, - "learning_rate": 2.413330665383342e-07, - "loss": 0.8049, - "step": 11267 - }, - { - "epoch": 0.8468360138283482, - "grad_norm": 1.8735437460099218, - "learning_rate": 2.4110128121694816e-07, - "loss": 0.914, - "step": 11268 - }, - { - "epoch": 0.8469111678941831, - "grad_norm": 1.687907558251167, - "learning_rate": 2.408696001170463e-07, - "loss": 0.9849, - "step": 11269 - }, - { - "epoch": 0.846986321960018, - "grad_norm": 1.6206982233502492, - "learning_rate": 2.406380232523566e-07, - "loss": 0.9782, - "step": 11270 - }, - { - "epoch": 0.847061476025853, - "grad_norm": 1.747954944771929, - "learning_rate": 2.4040655063660085e-07, - "loss": 0.9503, - "step": 11271 - }, - { - "epoch": 0.8471366300916879, - "grad_norm": 1.5329801166436903, - "learning_rate": 2.4017518228349586e-07, - "loss": 0.9554, - "step": 11272 - }, - { - "epoch": 0.847211784157523, - "grad_norm": 1.8058555093039417, - "learning_rate": 2.399439182067491e-07, - "loss": 0.9316, - "step": 11273 - }, - { - "epoch": 0.8472869382233579, - "grad_norm": 1.5360971144731617, - "learning_rate": 2.397127584200656e-07, - "loss": 1.0363, - "step": 11274 - }, - { - "epoch": 0.8473620922891928, - "grad_norm": 1.722061573589174, - "learning_rate": 2.394817029371421e-07, - "loss": 0.991, - "step": 11275 - }, - { - "epoch": 0.8474372463550278, - "grad_norm": 1.7490815068354335, - "learning_rate": 2.392507517716682e-07, - "loss": 0.9303, - "step": 11276 - }, - { - "epoch": 0.8475124004208627, - "grad_norm": 1.800829640563728, - "learning_rate": 2.3901990493732957e-07, - "loss": 0.9606, - "step": 11277 - }, - { - "epoch": 0.8475875544866978, - "grad_norm": 1.7155389788520736, - "learning_rate": 2.387891624478056e-07, - "loss": 0.9848, - "step": 11278 - }, - { - "epoch": 0.8476627085525327, - "grad_norm": 3.029866797663632, - "learning_rate": 2.385585243167676e-07, - "loss": 1.0025, - "step": 11279 - }, - { - "epoch": 0.8477378626183677, - "grad_norm": 1.722565386877869, - "learning_rate": 2.383279905578821e-07, - "loss": 0.9449, - "step": 11280 - }, - { - "epoch": 0.8478130166842026, - "grad_norm": 1.935966130646089, - "learning_rate": 2.3809756118480863e-07, - "loss": 1.0012, - "step": 11281 - }, - { - "epoch": 0.8478881707500375, - "grad_norm": 1.6043913630210973, - "learning_rate": 2.378672362112022e-07, - "loss": 0.991, - "step": 11282 - }, - { - "epoch": 0.8479633248158726, - "grad_norm": 1.8957572195123646, - "learning_rate": 2.3763701565070882e-07, - "loss": 1.0007, - "step": 11283 - }, - { - "epoch": 0.8480384788817075, - "grad_norm": 1.8780452793866265, - "learning_rate": 2.3740689951697135e-07, - "loss": 1.0046, - "step": 11284 - }, - { - "epoch": 0.8481136329475425, - "grad_norm": 1.717122573249892, - "learning_rate": 2.3717688782362444e-07, - "loss": 0.9019, - "step": 11285 - }, - { - "epoch": 0.8481887870133774, - "grad_norm": 1.533353100870394, - "learning_rate": 2.369469805842972e-07, - "loss": 0.9425, - "step": 11286 - }, - { - "epoch": 0.8482639410792124, - "grad_norm": 1.2357419328250685, - "learning_rate": 2.3671717781261225e-07, - "loss": 1.0133, - "step": 11287 - }, - { - "epoch": 0.8483390951450474, - "grad_norm": 1.3576673546441316, - "learning_rate": 2.364874795221865e-07, - "loss": 1.0402, - "step": 11288 - }, - { - "epoch": 0.8484142492108823, - "grad_norm": 3.7660879270040657, - "learning_rate": 2.362578857266313e-07, - "loss": 1.0278, - "step": 11289 - }, - { - "epoch": 0.8484894032767173, - "grad_norm": 1.9496904616555808, - "learning_rate": 2.3602839643954997e-07, - "loss": 0.9034, - "step": 11290 - }, - { - "epoch": 0.8485645573425522, - "grad_norm": 1.5248873340699816, - "learning_rate": 2.3579901167454008e-07, - "loss": 0.984, - "step": 11291 - }, - { - "epoch": 0.8486397114083872, - "grad_norm": 1.4056545692410563, - "learning_rate": 2.3556973144519542e-07, - "loss": 0.9218, - "step": 11292 - }, - { - "epoch": 0.8487148654742221, - "grad_norm": 1.8501526821125591, - "learning_rate": 2.353405557650998e-07, - "loss": 1.0273, - "step": 11293 - }, - { - "epoch": 0.8487900195400572, - "grad_norm": 1.6640519692531006, - "learning_rate": 2.3511148464783348e-07, - "loss": 0.8418, - "step": 11294 - }, - { - "epoch": 0.8488651736058921, - "grad_norm": 1.959755773217308, - "learning_rate": 2.3488251810697047e-07, - "loss": 0.9602, - "step": 11295 - }, - { - "epoch": 0.848940327671727, - "grad_norm": 1.6605555155225755, - "learning_rate": 2.3465365615607723e-07, - "loss": 1.0745, - "step": 11296 - }, - { - "epoch": 0.849015481737562, - "grad_norm": 1.395044953432058, - "learning_rate": 2.3442489880871475e-07, - "loss": 0.9648, - "step": 11297 - }, - { - "epoch": 0.8490906358033969, - "grad_norm": 0.7561072206883713, - "learning_rate": 2.341962460784377e-07, - "loss": 0.8488, - "step": 11298 - }, - { - "epoch": 0.849165789869232, - "grad_norm": 2.3341018455663143, - "learning_rate": 2.3396769797879544e-07, - "loss": 0.9785, - "step": 11299 - }, - { - "epoch": 0.8492409439350669, - "grad_norm": 3.0288442916771237, - "learning_rate": 2.3373925452332877e-07, - "loss": 1.0233, - "step": 11300 - }, - { - "epoch": 0.8493160980009018, - "grad_norm": 1.8947444835098841, - "learning_rate": 2.3351091572557524e-07, - "loss": 0.897, - "step": 11301 - }, - { - "epoch": 0.8493912520667368, - "grad_norm": 1.6449371344429948, - "learning_rate": 2.3328268159906428e-07, - "loss": 0.8903, - "step": 11302 - }, - { - "epoch": 0.8494664061325717, - "grad_norm": 0.6651472768252242, - "learning_rate": 2.3305455215731952e-07, - "loss": 0.8308, - "step": 11303 - }, - { - "epoch": 0.8495415601984068, - "grad_norm": 1.7530870457312975, - "learning_rate": 2.3282652741385834e-07, - "loss": 1.0869, - "step": 11304 - }, - { - "epoch": 0.8496167142642417, - "grad_norm": 1.7375462038771545, - "learning_rate": 2.3259860738219284e-07, - "loss": 0.9193, - "step": 11305 - }, - { - "epoch": 0.8496918683300767, - "grad_norm": 2.4470220567888497, - "learning_rate": 2.3237079207582755e-07, - "loss": 0.9898, - "step": 11306 - }, - { - "epoch": 0.8497670223959116, - "grad_norm": 1.460872220299561, - "learning_rate": 2.3214308150826166e-07, - "loss": 0.9437, - "step": 11307 - }, - { - "epoch": 0.8498421764617465, - "grad_norm": 1.4211084164811045, - "learning_rate": 2.319154756929873e-07, - "loss": 1.0158, - "step": 11308 - }, - { - "epoch": 0.8499173305275816, - "grad_norm": 2.0421959197520936, - "learning_rate": 2.3168797464349232e-07, - "loss": 0.9145, - "step": 11309 - }, - { - "epoch": 0.8499924845934165, - "grad_norm": 1.6489598319006076, - "learning_rate": 2.3146057837325527e-07, - "loss": 0.996, - "step": 11310 - }, - { - "epoch": 0.8500676386592515, - "grad_norm": 2.127464660279297, - "learning_rate": 2.3123328689575115e-07, - "loss": 1.0633, - "step": 11311 - }, - { - "epoch": 0.8501427927250864, - "grad_norm": 1.838031629611922, - "learning_rate": 2.3100610022444877e-07, - "loss": 0.9303, - "step": 11312 - }, - { - "epoch": 0.8502179467909214, - "grad_norm": 1.7272623982417266, - "learning_rate": 2.3077901837280844e-07, - "loss": 0.9946, - "step": 11313 - }, - { - "epoch": 0.8502931008567564, - "grad_norm": 2.8794776214932916, - "learning_rate": 2.305520413542854e-07, - "loss": 0.9293, - "step": 11314 - }, - { - "epoch": 0.8503682549225913, - "grad_norm": 2.9090882255260055, - "learning_rate": 2.303251691823298e-07, - "loss": 0.9652, - "step": 11315 - }, - { - "epoch": 0.8504434089884263, - "grad_norm": 1.6042397800314412, - "learning_rate": 2.3009840187038533e-07, - "loss": 0.9982, - "step": 11316 - }, - { - "epoch": 0.8505185630542612, - "grad_norm": 1.6523990847293015, - "learning_rate": 2.2987173943188697e-07, - "loss": 0.9565, - "step": 11317 - }, - { - "epoch": 0.8505937171200962, - "grad_norm": 1.5982761812516624, - "learning_rate": 2.2964518188026693e-07, - "loss": 1.0717, - "step": 11318 - }, - { - "epoch": 0.8506688711859312, - "grad_norm": 1.5051095879536074, - "learning_rate": 2.294187292289489e-07, - "loss": 1.0063, - "step": 11319 - }, - { - "epoch": 0.8507440252517661, - "grad_norm": 1.9552561615053135, - "learning_rate": 2.2919238149135122e-07, - "loss": 0.9491, - "step": 11320 - }, - { - "epoch": 0.8508191793176011, - "grad_norm": 1.9790305308318823, - "learning_rate": 2.2896613868088543e-07, - "loss": 0.9281, - "step": 11321 - }, - { - "epoch": 0.850894333383436, - "grad_norm": 1.4136862491362818, - "learning_rate": 2.287400008109579e-07, - "loss": 0.8416, - "step": 11322 - }, - { - "epoch": 0.850969487449271, - "grad_norm": 1.8158697176762122, - "learning_rate": 2.2851396789496812e-07, - "loss": 0.9679, - "step": 11323 - }, - { - "epoch": 0.851044641515106, - "grad_norm": 1.8965327750727243, - "learning_rate": 2.2828803994630917e-07, - "loss": 0.9371, - "step": 11324 - }, - { - "epoch": 0.851119795580941, - "grad_norm": 1.4690544949077924, - "learning_rate": 2.280622169783677e-07, - "loss": 0.9653, - "step": 11325 - }, - { - "epoch": 0.8511949496467759, - "grad_norm": 1.5892822623789422, - "learning_rate": 2.2783649900452584e-07, - "loss": 0.8901, - "step": 11326 - }, - { - "epoch": 0.8512701037126108, - "grad_norm": 1.5370524491623017, - "learning_rate": 2.276108860381567e-07, - "loss": 0.9629, - "step": 11327 - }, - { - "epoch": 0.8513452577784458, - "grad_norm": 1.6471119111284978, - "learning_rate": 2.2738537809263003e-07, - "loss": 0.9988, - "step": 11328 - }, - { - "epoch": 0.8514204118442807, - "grad_norm": 1.5188764323288317, - "learning_rate": 2.2715997518130758e-07, - "loss": 0.9205, - "step": 11329 - }, - { - "epoch": 0.8514955659101158, - "grad_norm": 2.7077307299700557, - "learning_rate": 2.2693467731754511e-07, - "loss": 1.0418, - "step": 11330 - }, - { - "epoch": 0.8515707199759507, - "grad_norm": 1.6315062193779766, - "learning_rate": 2.2670948451469195e-07, - "loss": 1.035, - "step": 11331 - }, - { - "epoch": 0.8516458740417857, - "grad_norm": 1.9406238107477989, - "learning_rate": 2.2648439678609254e-07, - "loss": 0.9994, - "step": 11332 - }, - { - "epoch": 0.8517210281076206, - "grad_norm": 1.5893565188168575, - "learning_rate": 2.2625941414508442e-07, - "loss": 0.8894, - "step": 11333 - }, - { - "epoch": 0.8517961821734555, - "grad_norm": 1.7297127409129702, - "learning_rate": 2.2603453660499782e-07, - "loss": 1.0089, - "step": 11334 - }, - { - "epoch": 0.8518713362392906, - "grad_norm": 1.7987855125457992, - "learning_rate": 2.2580976417915766e-07, - "loss": 0.9625, - "step": 11335 - }, - { - "epoch": 0.8519464903051255, - "grad_norm": 1.5820745014824575, - "learning_rate": 2.2558509688088324e-07, - "loss": 0.9109, - "step": 11336 - }, - { - "epoch": 0.8520216443709605, - "grad_norm": 1.5412651604543335, - "learning_rate": 2.2536053472348592e-07, - "loss": 0.8497, - "step": 11337 - }, - { - "epoch": 0.8520967984367954, - "grad_norm": 2.305803449037507, - "learning_rate": 2.2513607772027243e-07, - "loss": 0.8976, - "step": 11338 - }, - { - "epoch": 0.8521719525026304, - "grad_norm": 1.5724590301028287, - "learning_rate": 2.2491172588454322e-07, - "loss": 0.942, - "step": 11339 - }, - { - "epoch": 0.8522471065684654, - "grad_norm": 1.7711175877431282, - "learning_rate": 2.2468747922959143e-07, - "loss": 0.9164, - "step": 11340 - }, - { - "epoch": 0.8523222606343003, - "grad_norm": 2.19172282660683, - "learning_rate": 2.2446333776870484e-07, - "loss": 1.0214, - "step": 11341 - }, - { - "epoch": 0.8523974147001353, - "grad_norm": 1.2754323581568172, - "learning_rate": 2.242393015151638e-07, - "loss": 1.0101, - "step": 11342 - }, - { - "epoch": 0.8524725687659702, - "grad_norm": 2.6077405184804823, - "learning_rate": 2.2401537048224495e-07, - "loss": 1.0726, - "step": 11343 - }, - { - "epoch": 0.8525477228318052, - "grad_norm": 1.8347087887851266, - "learning_rate": 2.2379154468321525e-07, - "loss": 1.0265, - "step": 11344 - }, - { - "epoch": 0.8526228768976402, - "grad_norm": 1.954212953823196, - "learning_rate": 2.2356782413133834e-07, - "loss": 0.8637, - "step": 11345 - }, - { - "epoch": 0.8526980309634751, - "grad_norm": 1.701257994757036, - "learning_rate": 2.233442088398705e-07, - "loss": 1.011, - "step": 11346 - }, - { - "epoch": 0.8527731850293101, - "grad_norm": 2.043789350498286, - "learning_rate": 2.231206988220613e-07, - "loss": 0.9895, - "step": 11347 - }, - { - "epoch": 0.852848339095145, - "grad_norm": 1.7732352927122539, - "learning_rate": 2.2289729409115466e-07, - "loss": 0.9701, - "step": 11348 - }, - { - "epoch": 0.85292349316098, - "grad_norm": 2.3303666555693865, - "learning_rate": 2.226739946603886e-07, - "loss": 0.9121, - "step": 11349 - }, - { - "epoch": 0.852998647226815, - "grad_norm": 2.569488950005989, - "learning_rate": 2.2245080054299415e-07, - "loss": 0.886, - "step": 11350 - }, - { - "epoch": 0.85307380129265, - "grad_norm": 1.8724080926726367, - "learning_rate": 2.2222771175219668e-07, - "loss": 1.015, - "step": 11351 - }, - { - "epoch": 0.8531489553584849, - "grad_norm": 8.034346759023927, - "learning_rate": 2.2200472830121431e-07, - "loss": 0.8987, - "step": 11352 - }, - { - "epoch": 0.8532241094243198, - "grad_norm": 1.6944713703302203, - "learning_rate": 2.2178185020326091e-07, - "loss": 0.9541, - "step": 11353 - }, - { - "epoch": 0.8532992634901548, - "grad_norm": 1.3865392434433288, - "learning_rate": 2.2155907747154122e-07, - "loss": 0.8985, - "step": 11354 - }, - { - "epoch": 0.8533744175559898, - "grad_norm": 1.6044113501014858, - "learning_rate": 2.2133641011925652e-07, - "loss": 1.0207, - "step": 11355 - }, - { - "epoch": 0.8534495716218248, - "grad_norm": 2.0321378674782293, - "learning_rate": 2.2111384815960132e-07, - "loss": 0.9352, - "step": 11356 - }, - { - "epoch": 0.8535247256876597, - "grad_norm": 1.8721371093040502, - "learning_rate": 2.2089139160576197e-07, - "loss": 0.9702, - "step": 11357 - }, - { - "epoch": 0.8535998797534947, - "grad_norm": 6.211447256989887, - "learning_rate": 2.206690404709197e-07, - "loss": 1.0296, - "step": 11358 - }, - { - "epoch": 0.8536750338193296, - "grad_norm": 2.308417040779542, - "learning_rate": 2.2044679476825045e-07, - "loss": 0.971, - "step": 11359 - }, - { - "epoch": 0.8537501878851645, - "grad_norm": 1.8155514254082703, - "learning_rate": 2.2022465451092386e-07, - "loss": 0.933, - "step": 11360 - }, - { - "epoch": 0.8538253419509996, - "grad_norm": 1.459332568840383, - "learning_rate": 2.2000261971210098e-07, - "loss": 0.9425, - "step": 11361 - }, - { - "epoch": 0.8539004960168345, - "grad_norm": 3.087725852705723, - "learning_rate": 2.1978069038493906e-07, - "loss": 0.9987, - "step": 11362 - }, - { - "epoch": 0.8539756500826695, - "grad_norm": 1.7364854773505354, - "learning_rate": 2.1955886654258848e-07, - "loss": 1.0029, - "step": 11363 - }, - { - "epoch": 0.8540508041485044, - "grad_norm": 1.6018544745051173, - "learning_rate": 2.1933714819819248e-07, - "loss": 1.1111, - "step": 11364 - }, - { - "epoch": 0.8541259582143393, - "grad_norm": 2.0175525999716446, - "learning_rate": 2.191155353648888e-07, - "loss": 0.9948, - "step": 11365 - }, - { - "epoch": 0.8542011122801744, - "grad_norm": 0.7488159130933402, - "learning_rate": 2.188940280558096e-07, - "loss": 0.7919, - "step": 11366 - }, - { - "epoch": 0.8542762663460093, - "grad_norm": 2.3781517598755695, - "learning_rate": 2.1867262628407945e-07, - "loss": 1.0571, - "step": 11367 - }, - { - "epoch": 0.8543514204118443, - "grad_norm": 1.5253061425018526, - "learning_rate": 2.1845133006281745e-07, - "loss": 0.9386, - "step": 11368 - }, - { - "epoch": 0.8544265744776792, - "grad_norm": 1.5802357975738424, - "learning_rate": 2.182301394051358e-07, - "loss": 0.9655, - "step": 11369 - }, - { - "epoch": 0.8545017285435142, - "grad_norm": 2.1159048101074376, - "learning_rate": 2.1800905432414197e-07, - "loss": 0.913, - "step": 11370 - }, - { - "epoch": 0.8545768826093492, - "grad_norm": 1.8862707265596135, - "learning_rate": 2.1778807483293437e-07, - "loss": 0.9342, - "step": 11371 - }, - { - "epoch": 0.8546520366751841, - "grad_norm": 1.559986381471318, - "learning_rate": 2.1756720094460856e-07, - "loss": 0.9254, - "step": 11372 - }, - { - "epoch": 0.8547271907410191, - "grad_norm": 1.3321117477480875, - "learning_rate": 2.173464326722514e-07, - "loss": 0.9285, - "step": 11373 - }, - { - "epoch": 0.854802344806854, - "grad_norm": 1.664042584653788, - "learning_rate": 2.1712577002894417e-07, - "loss": 1.0517, - "step": 11374 - }, - { - "epoch": 0.854877498872689, - "grad_norm": 2.1876264263131207, - "learning_rate": 2.1690521302776198e-07, - "loss": 0.9392, - "step": 11375 - }, - { - "epoch": 0.854952652938524, - "grad_norm": 3.766471799237733, - "learning_rate": 2.1668476168177374e-07, - "loss": 1.0411, - "step": 11376 - }, - { - "epoch": 0.855027807004359, - "grad_norm": 1.4007013845999405, - "learning_rate": 2.1646441600404297e-07, - "loss": 0.9583, - "step": 11377 - }, - { - "epoch": 0.8551029610701939, - "grad_norm": 2.0589026087311635, - "learning_rate": 2.1624417600762435e-07, - "loss": 1.0771, - "step": 11378 - }, - { - "epoch": 0.8551781151360288, - "grad_norm": 3.3609281985811696, - "learning_rate": 2.160240417055692e-07, - "loss": 0.9112, - "step": 11379 - }, - { - "epoch": 0.8552532692018638, - "grad_norm": 1.6162709758014024, - "learning_rate": 2.1580401311092112e-07, - "loss": 0.9584, - "step": 11380 - }, - { - "epoch": 0.8553284232676988, - "grad_norm": 1.5914732434381258, - "learning_rate": 2.1558409023671677e-07, - "loss": 1.0115, - "step": 11381 - }, - { - "epoch": 0.8554035773335338, - "grad_norm": 1.8250456085776794, - "learning_rate": 2.1536427309598792e-07, - "loss": 1.0077, - "step": 11382 - }, - { - "epoch": 0.8554787313993687, - "grad_norm": 1.4627846041510963, - "learning_rate": 2.1514456170176021e-07, - "loss": 0.9194, - "step": 11383 - }, - { - "epoch": 0.8555538854652037, - "grad_norm": 3.277227642926193, - "learning_rate": 2.1492495606705184e-07, - "loss": 1.0218, - "step": 11384 - }, - { - "epoch": 0.8556290395310386, - "grad_norm": 2.8195525014514558, - "learning_rate": 2.1470545620487557e-07, - "loss": 0.9602, - "step": 11385 - }, - { - "epoch": 0.8557041935968736, - "grad_norm": 1.3605086324236355, - "learning_rate": 2.1448606212823673e-07, - "loss": 0.914, - "step": 11386 - }, - { - "epoch": 0.8557793476627086, - "grad_norm": 0.7217484829830219, - "learning_rate": 2.142667738501367e-07, - "loss": 0.7689, - "step": 11387 - }, - { - "epoch": 0.8558545017285435, - "grad_norm": 2.7639234232914878, - "learning_rate": 2.1404759138356753e-07, - "loss": 0.8837, - "step": 11388 - }, - { - "epoch": 0.8559296557943785, - "grad_norm": 1.6235195899159771, - "learning_rate": 2.1382851474151799e-07, - "loss": 0.9894, - "step": 11389 - }, - { - "epoch": 0.8560048098602134, - "grad_norm": 1.6914828593676454, - "learning_rate": 2.136095439369685e-07, - "loss": 1.0108, - "step": 11390 - }, - { - "epoch": 0.8560799639260483, - "grad_norm": 0.7821918151224361, - "learning_rate": 2.133906789828941e-07, - "loss": 0.8616, - "step": 11391 - }, - { - "epoch": 0.8561551179918834, - "grad_norm": 2.2443434289142115, - "learning_rate": 2.1317191989226302e-07, - "loss": 1.0454, - "step": 11392 - }, - { - "epoch": 0.8562302720577183, - "grad_norm": 1.798170203422038, - "learning_rate": 2.129532666780385e-07, - "loss": 1.0398, - "step": 11393 - }, - { - "epoch": 0.8563054261235533, - "grad_norm": 6.142564772938589, - "learning_rate": 2.1273471935317567e-07, - "loss": 1.0291, - "step": 11394 - }, - { - "epoch": 0.8563805801893882, - "grad_norm": 2.310060090278394, - "learning_rate": 2.1251627793062466e-07, - "loss": 0.9907, - "step": 11395 - }, - { - "epoch": 0.8564557342552233, - "grad_norm": 2.2578680434209315, - "learning_rate": 2.1229794242332866e-07, - "loss": 0.9564, - "step": 11396 - }, - { - "epoch": 0.8565308883210582, - "grad_norm": 0.9168797953245572, - "learning_rate": 2.1207971284422577e-07, - "loss": 0.8189, - "step": 11397 - }, - { - "epoch": 0.8566060423868931, - "grad_norm": 2.1980228729862885, - "learning_rate": 2.1186158920624563e-07, - "loss": 0.9669, - "step": 11398 - }, - { - "epoch": 0.8566811964527281, - "grad_norm": 2.4705562502663203, - "learning_rate": 2.116435715223135e-07, - "loss": 0.949, - "step": 11399 - }, - { - "epoch": 0.856756350518563, - "grad_norm": 1.4859498353399998, - "learning_rate": 2.1142565980534877e-07, - "loss": 1.005, - "step": 11400 - }, - { - "epoch": 0.856831504584398, - "grad_norm": 2.5804186241831952, - "learning_rate": 2.1120785406826204e-07, - "loss": 0.9885, - "step": 11401 - }, - { - "epoch": 0.856906658650233, - "grad_norm": 1.7123279930661686, - "learning_rate": 2.1099015432395939e-07, - "loss": 0.962, - "step": 11402 - }, - { - "epoch": 0.856981812716068, - "grad_norm": 1.7283267160409752, - "learning_rate": 2.1077256058534055e-07, - "loss": 0.832, - "step": 11403 - }, - { - "epoch": 0.8570569667819029, - "grad_norm": 1.4979049038414116, - "learning_rate": 2.1055507286529984e-07, - "loss": 1.0257, - "step": 11404 - }, - { - "epoch": 0.8571321208477378, - "grad_norm": 1.955031418218624, - "learning_rate": 2.1033769117672229e-07, - "loss": 0.9764, - "step": 11405 - }, - { - "epoch": 0.8572072749135728, - "grad_norm": 1.79949209941922, - "learning_rate": 2.1012041553249028e-07, - "loss": 1.0045, - "step": 11406 - }, - { - "epoch": 0.8572824289794078, - "grad_norm": 2.0458295642121422, - "learning_rate": 2.0990324594547748e-07, - "loss": 0.8479, - "step": 11407 - }, - { - "epoch": 0.8573575830452428, - "grad_norm": 1.7141720231707676, - "learning_rate": 2.0968618242855207e-07, - "loss": 0.9741, - "step": 11408 - }, - { - "epoch": 0.8574327371110777, - "grad_norm": 1.529220637617216, - "learning_rate": 2.0946922499457552e-07, - "loss": 0.9127, - "step": 11409 - }, - { - "epoch": 0.8575078911769126, - "grad_norm": 3.2137689659481903, - "learning_rate": 2.0925237365640424e-07, - "loss": 0.9034, - "step": 11410 - }, - { - "epoch": 0.8575830452427476, - "grad_norm": 1.6529736772300267, - "learning_rate": 2.090356284268868e-07, - "loss": 1.0223, - "step": 11411 - }, - { - "epoch": 0.8576581993085826, - "grad_norm": 1.799965034251215, - "learning_rate": 2.0881898931886677e-07, - "loss": 1.0285, - "step": 11412 - }, - { - "epoch": 0.8577333533744176, - "grad_norm": 2.025832243019019, - "learning_rate": 2.0860245634518002e-07, - "loss": 1.0252, - "step": 11413 - }, - { - "epoch": 0.8578085074402525, - "grad_norm": 1.4664963822058434, - "learning_rate": 2.0838602951865812e-07, - "loss": 0.9351, - "step": 11414 - }, - { - "epoch": 0.8578836615060875, - "grad_norm": 1.7190526522416931, - "learning_rate": 2.0816970885212392e-07, - "loss": 1.0078, - "step": 11415 - }, - { - "epoch": 0.8579588155719224, - "grad_norm": 1.4921773540691148, - "learning_rate": 2.0795349435839605e-07, - "loss": 0.9785, - "step": 11416 - }, - { - "epoch": 0.8580339696377574, - "grad_norm": 1.827916125241018, - "learning_rate": 2.0773738605028602e-07, - "loss": 0.968, - "step": 11417 - }, - { - "epoch": 0.8581091237035924, - "grad_norm": 1.7217556422243465, - "learning_rate": 2.075213839405987e-07, - "loss": 0.9936, - "step": 11418 - }, - { - "epoch": 0.8581842777694273, - "grad_norm": 2.2060864612170654, - "learning_rate": 2.07305488042133e-07, - "loss": 1.0345, - "step": 11419 - }, - { - "epoch": 0.8582594318352623, - "grad_norm": 1.5012789677883105, - "learning_rate": 2.0708969836768176e-07, - "loss": 1.0316, - "step": 11420 - }, - { - "epoch": 0.8583345859010972, - "grad_norm": 1.398545074587543, - "learning_rate": 2.068740149300321e-07, - "loss": 0.976, - "step": 11421 - }, - { - "epoch": 0.8584097399669323, - "grad_norm": 1.8235478934087979, - "learning_rate": 2.0665843774196269e-07, - "loss": 0.9749, - "step": 11422 - }, - { - "epoch": 0.8584848940327672, - "grad_norm": 2.330511112064697, - "learning_rate": 2.064429668162484e-07, - "loss": 0.9234, - "step": 11423 - }, - { - "epoch": 0.8585600480986021, - "grad_norm": 1.9261808464415386, - "learning_rate": 2.0622760216565683e-07, - "loss": 0.9236, - "step": 11424 - }, - { - "epoch": 0.8586352021644371, - "grad_norm": 1.9805274367807144, - "learning_rate": 2.0601234380294775e-07, - "loss": 0.9869, - "step": 11425 - }, - { - "epoch": 0.858710356230272, - "grad_norm": 1.9487391552675353, - "learning_rate": 2.0579719174087696e-07, - "loss": 0.9623, - "step": 11426 - }, - { - "epoch": 0.8587855102961071, - "grad_norm": 2.242214721089118, - "learning_rate": 2.0558214599219337e-07, - "loss": 0.9003, - "step": 11427 - }, - { - "epoch": 0.858860664361942, - "grad_norm": 1.5930359680751263, - "learning_rate": 2.0536720656963902e-07, - "loss": 1.0518, - "step": 11428 - }, - { - "epoch": 0.858935818427777, - "grad_norm": 1.4670220619992242, - "learning_rate": 2.0515237348594972e-07, - "loss": 1.0271, - "step": 11429 - }, - { - "epoch": 0.8590109724936119, - "grad_norm": 1.8185093015821492, - "learning_rate": 2.0493764675385484e-07, - "loss": 0.9246, - "step": 11430 - }, - { - "epoch": 0.8590861265594468, - "grad_norm": 1.758288238573877, - "learning_rate": 2.0472302638607885e-07, - "loss": 0.9356, - "step": 11431 - }, - { - "epoch": 0.8591612806252819, - "grad_norm": 1.556851361989987, - "learning_rate": 2.045085123953374e-07, - "loss": 0.9532, - "step": 11432 - }, - { - "epoch": 0.8592364346911168, - "grad_norm": 1.777781673599319, - "learning_rate": 2.0429410479434228e-07, - "loss": 0.948, - "step": 11433 - }, - { - "epoch": 0.8593115887569518, - "grad_norm": 1.759577090106029, - "learning_rate": 2.040798035957978e-07, - "loss": 1.0175, - "step": 11434 - }, - { - "epoch": 0.8593867428227867, - "grad_norm": 1.8471635802799442, - "learning_rate": 2.03865608812402e-07, - "loss": 0.911, - "step": 11435 - }, - { - "epoch": 0.8594618968886216, - "grad_norm": 1.573445776393909, - "learning_rate": 2.036515204568463e-07, - "loss": 1.0606, - "step": 11436 - }, - { - "epoch": 0.8595370509544566, - "grad_norm": 1.9552515888527133, - "learning_rate": 2.0343753854181655e-07, - "loss": 0.8715, - "step": 11437 - }, - { - "epoch": 0.8596122050202916, - "grad_norm": 2.4877410733516405, - "learning_rate": 2.0322366307999307e-07, - "loss": 1.0108, - "step": 11438 - }, - { - "epoch": 0.8596873590861266, - "grad_norm": 6.832355925586465, - "learning_rate": 2.0300989408404745e-07, - "loss": 0.9364, - "step": 11439 - }, - { - "epoch": 0.8597625131519615, - "grad_norm": 1.5145421043077096, - "learning_rate": 2.027962315666465e-07, - "loss": 0.9612, - "step": 11440 - }, - { - "epoch": 0.8598376672177965, - "grad_norm": 1.8787263082202077, - "learning_rate": 2.0258267554045138e-07, - "loss": 0.9952, - "step": 11441 - }, - { - "epoch": 0.8599128212836314, - "grad_norm": 0.7838209954189139, - "learning_rate": 2.0236922601811491e-07, - "loss": 0.8317, - "step": 11442 - }, - { - "epoch": 0.8599879753494664, - "grad_norm": 1.33299269894706, - "learning_rate": 2.0215588301228515e-07, - "loss": 0.9921, - "step": 11443 - }, - { - "epoch": 0.8600631294153014, - "grad_norm": 1.7007241782275322, - "learning_rate": 2.0194264653560467e-07, - "loss": 1.034, - "step": 11444 - }, - { - "epoch": 0.8601382834811363, - "grad_norm": 2.2635203089425464, - "learning_rate": 2.017295166007067e-07, - "loss": 1.0154, - "step": 11445 - }, - { - "epoch": 0.8602134375469713, - "grad_norm": 3.182004342268613, - "learning_rate": 2.0151649322022134e-07, - "loss": 0.9874, - "step": 11446 - }, - { - "epoch": 0.8602885916128062, - "grad_norm": 1.4790905498566655, - "learning_rate": 2.0130357640677033e-07, - "loss": 1.0376, - "step": 11447 - }, - { - "epoch": 0.8603637456786413, - "grad_norm": 1.4715700064366608, - "learning_rate": 2.010907661729706e-07, - "loss": 1.0622, - "step": 11448 - }, - { - "epoch": 0.8604388997444762, - "grad_norm": 1.956020985321088, - "learning_rate": 2.0087806253143103e-07, - "loss": 0.8984, - "step": 11449 - }, - { - "epoch": 0.8605140538103111, - "grad_norm": 1.3420386395043886, - "learning_rate": 2.0066546549475573e-07, - "loss": 1.0527, - "step": 11450 - }, - { - "epoch": 0.8605892078761461, - "grad_norm": 1.7660314041576926, - "learning_rate": 2.004529750755417e-07, - "loss": 0.8518, - "step": 11451 - }, - { - "epoch": 0.860664361941981, - "grad_norm": 1.9109395611206816, - "learning_rate": 2.0024059128637983e-07, - "loss": 0.9882, - "step": 11452 - }, - { - "epoch": 0.8607395160078161, - "grad_norm": 1.413329972692009, - "learning_rate": 2.0002831413985443e-07, - "loss": 0.9135, - "step": 11453 - }, - { - "epoch": 0.860814670073651, - "grad_norm": 2.007731157725775, - "learning_rate": 1.9981614364854415e-07, - "loss": 0.9731, - "step": 11454 - }, - { - "epoch": 0.8608898241394859, - "grad_norm": 0.5852347673978293, - "learning_rate": 1.9960407982502093e-07, - "loss": 0.8335, - "step": 11455 - }, - { - "epoch": 0.8609649782053209, - "grad_norm": 1.6482391961309357, - "learning_rate": 1.9939212268185002e-07, - "loss": 1.038, - "step": 11456 - }, - { - "epoch": 0.8610401322711558, - "grad_norm": 1.991585494692374, - "learning_rate": 1.991802722315905e-07, - "loss": 0.9501, - "step": 11457 - }, - { - "epoch": 0.8611152863369909, - "grad_norm": 3.4024115817362843, - "learning_rate": 1.9896852848679634e-07, - "loss": 0.9939, - "step": 11458 - }, - { - "epoch": 0.8611904404028258, - "grad_norm": 1.7992674449537676, - "learning_rate": 1.9875689146001262e-07, - "loss": 0.9843, - "step": 11459 - }, - { - "epoch": 0.8612655944686608, - "grad_norm": 1.5165405654185642, - "learning_rate": 1.9854536116378107e-07, - "loss": 0.9597, - "step": 11460 - }, - { - "epoch": 0.8613407485344957, - "grad_norm": 5.430506099769504, - "learning_rate": 1.9833393761063523e-07, - "loss": 0.9777, - "step": 11461 - }, - { - "epoch": 0.8614159026003306, - "grad_norm": 1.719774669037186, - "learning_rate": 1.9812262081310237e-07, - "loss": 1.0672, - "step": 11462 - }, - { - "epoch": 0.8614910566661657, - "grad_norm": 1.52822834749763, - "learning_rate": 1.9791141078370366e-07, - "loss": 1.0493, - "step": 11463 - }, - { - "epoch": 0.8615662107320006, - "grad_norm": 2.3419937674604796, - "learning_rate": 1.977003075349548e-07, - "loss": 1.0271, - "step": 11464 - }, - { - "epoch": 0.8616413647978356, - "grad_norm": 1.521118825305121, - "learning_rate": 1.9748931107936473e-07, - "loss": 0.9461, - "step": 11465 - }, - { - "epoch": 0.8617165188636705, - "grad_norm": 2.148347055091964, - "learning_rate": 1.9727842142943453e-07, - "loss": 1.0555, - "step": 11466 - }, - { - "epoch": 0.8617916729295055, - "grad_norm": 1.3923078919490406, - "learning_rate": 1.970676385976613e-07, - "loss": 0.9961, - "step": 11467 - }, - { - "epoch": 0.8618668269953405, - "grad_norm": 1.9387037168151862, - "learning_rate": 1.9685696259653438e-07, - "loss": 0.8704, - "step": 11468 - }, - { - "epoch": 0.8619419810611754, - "grad_norm": 2.2490234375, - "learning_rate": 1.9664639343853717e-07, - "loss": 1.1218, - "step": 11469 - }, - { - "epoch": 0.8620171351270104, - "grad_norm": 0.7018748295444951, - "learning_rate": 1.9643593113614632e-07, - "loss": 0.7983, - "step": 11470 - }, - { - "epoch": 0.8620922891928453, - "grad_norm": 2.347007623777941, - "learning_rate": 1.9622557570183318e-07, - "loss": 0.9849, - "step": 11471 - }, - { - "epoch": 0.8621674432586803, - "grad_norm": 1.7771053615455488, - "learning_rate": 1.960153271480618e-07, - "loss": 1.0661, - "step": 11472 - }, - { - "epoch": 0.8622425973245152, - "grad_norm": 1.9528651560550117, - "learning_rate": 1.958051854872902e-07, - "loss": 0.9582, - "step": 11473 - }, - { - "epoch": 0.8623177513903503, - "grad_norm": 1.3881080223120446, - "learning_rate": 1.9559515073196952e-07, - "loss": 0.9767, - "step": 11474 - }, - { - "epoch": 0.8623929054561852, - "grad_norm": 1.6299337170001622, - "learning_rate": 1.9538522289454674e-07, - "loss": 0.9687, - "step": 11475 - }, - { - "epoch": 0.8624680595220201, - "grad_norm": 1.6193264439324897, - "learning_rate": 1.9517540198745896e-07, - "loss": 0.9372, - "step": 11476 - }, - { - "epoch": 0.8625432135878551, - "grad_norm": 2.75513516324721, - "learning_rate": 1.9496568802314007e-07, - "loss": 1.0522, - "step": 11477 - }, - { - "epoch": 0.86261836765369, - "grad_norm": 1.9731049584430298, - "learning_rate": 1.947560810140161e-07, - "loss": 0.8191, - "step": 11478 - }, - { - "epoch": 0.8626935217195251, - "grad_norm": 1.4540561440921458, - "learning_rate": 1.945465809725071e-07, - "loss": 0.9912, - "step": 11479 - }, - { - "epoch": 0.86276867578536, - "grad_norm": 1.3176849674771736, - "learning_rate": 1.943371879110265e-07, - "loss": 0.9792, - "step": 11480 - }, - { - "epoch": 0.8628438298511949, - "grad_norm": 1.6427123823420378, - "learning_rate": 1.9412790184198168e-07, - "loss": 0.9385, - "step": 11481 - }, - { - "epoch": 0.8629189839170299, - "grad_norm": 1.6364488766124017, - "learning_rate": 1.9391872277777456e-07, - "loss": 0.964, - "step": 11482 - }, - { - "epoch": 0.8629941379828648, - "grad_norm": 1.8746941634929473, - "learning_rate": 1.937096507307987e-07, - "loss": 0.8793, - "step": 11483 - }, - { - "epoch": 0.8630692920486999, - "grad_norm": 1.687706122348858, - "learning_rate": 1.9350068571344246e-07, - "loss": 0.9601, - "step": 11484 - }, - { - "epoch": 0.8631444461145348, - "grad_norm": 1.9898541717196472, - "learning_rate": 1.9329182773808904e-07, - "loss": 0.8457, - "step": 11485 - }, - { - "epoch": 0.8632196001803698, - "grad_norm": 1.9765831263977498, - "learning_rate": 1.930830768171121e-07, - "loss": 0.9718, - "step": 11486 - }, - { - "epoch": 0.8632947542462047, - "grad_norm": 2.2170863832155137, - "learning_rate": 1.928744329628822e-07, - "loss": 0.884, - "step": 11487 - }, - { - "epoch": 0.8633699083120396, - "grad_norm": 2.0819061986073737, - "learning_rate": 1.9266589618776296e-07, - "loss": 0.9049, - "step": 11488 - }, - { - "epoch": 0.8634450623778747, - "grad_norm": 1.3020972848780556, - "learning_rate": 1.924574665041092e-07, - "loss": 0.9888, - "step": 11489 - }, - { - "epoch": 0.8635202164437096, - "grad_norm": 2.0872862380904253, - "learning_rate": 1.9224914392427238e-07, - "loss": 1.0127, - "step": 11490 - }, - { - "epoch": 0.8635953705095446, - "grad_norm": 1.3931927494981926, - "learning_rate": 1.9204092846059595e-07, - "loss": 0.9878, - "step": 11491 - }, - { - "epoch": 0.8636705245753795, - "grad_norm": 1.6680600143586186, - "learning_rate": 1.9183282012541824e-07, - "loss": 0.9244, - "step": 11492 - }, - { - "epoch": 0.8637456786412145, - "grad_norm": 2.1676948993003227, - "learning_rate": 1.9162481893106918e-07, - "loss": 1.0128, - "step": 11493 - }, - { - "epoch": 0.8638208327070495, - "grad_norm": 1.7396320024028586, - "learning_rate": 1.914169248898747e-07, - "loss": 0.931, - "step": 11494 - }, - { - "epoch": 0.8638959867728844, - "grad_norm": 1.6412060389910386, - "learning_rate": 1.9120913801415294e-07, - "loss": 0.859, - "step": 11495 - }, - { - "epoch": 0.8639711408387194, - "grad_norm": 1.8116091808169845, - "learning_rate": 1.9100145831621627e-07, - "loss": 0.9761, - "step": 11496 - }, - { - "epoch": 0.8640462949045543, - "grad_norm": 1.9659966485896219, - "learning_rate": 1.9079388580836975e-07, - "loss": 0.9144, - "step": 11497 - }, - { - "epoch": 0.8641214489703893, - "grad_norm": 1.5154823944015612, - "learning_rate": 1.9058642050291374e-07, - "loss": 0.9155, - "step": 11498 - }, - { - "epoch": 0.8641966030362243, - "grad_norm": 1.5662143808563205, - "learning_rate": 1.9037906241214109e-07, - "loss": 0.9154, - "step": 11499 - }, - { - "epoch": 0.8642717571020592, - "grad_norm": 1.9185818005129205, - "learning_rate": 1.901718115483384e-07, - "loss": 0.9489, - "step": 11500 - }, - { - "epoch": 0.8643469111678942, - "grad_norm": 1.7026189349695, - "learning_rate": 1.8996466792378584e-07, - "loss": 0.9892, - "step": 11501 - }, - { - "epoch": 0.8644220652337291, - "grad_norm": 1.6625206479963002, - "learning_rate": 1.8975763155075875e-07, - "loss": 0.9596, - "step": 11502 - }, - { - "epoch": 0.8644972192995641, - "grad_norm": 1.881497442251348, - "learning_rate": 1.8955070244152284e-07, - "loss": 0.8688, - "step": 11503 - }, - { - "epoch": 0.864572373365399, - "grad_norm": 1.7182690034079384, - "learning_rate": 1.893438806083405e-07, - "loss": 1.0294, - "step": 11504 - }, - { - "epoch": 0.8646475274312341, - "grad_norm": 1.4834071969633165, - "learning_rate": 1.8913716606346776e-07, - "loss": 0.9711, - "step": 11505 - }, - { - "epoch": 0.864722681497069, - "grad_norm": 1.7271941397552937, - "learning_rate": 1.8893055881915143e-07, - "loss": 0.9703, - "step": 11506 - }, - { - "epoch": 0.8647978355629039, - "grad_norm": 1.5153443381405975, - "learning_rate": 1.8872405888763443e-07, - "loss": 0.9787, - "step": 11507 - }, - { - "epoch": 0.8648729896287389, - "grad_norm": 1.6706751145232737, - "learning_rate": 1.8851766628115273e-07, - "loss": 1.0554, - "step": 11508 - }, - { - "epoch": 0.8649481436945738, - "grad_norm": 1.6793662540277616, - "learning_rate": 1.8831138101193655e-07, - "loss": 0.9833, - "step": 11509 - }, - { - "epoch": 0.8650232977604089, - "grad_norm": 1.5445607917210313, - "learning_rate": 1.881052030922079e-07, - "loss": 0.8754, - "step": 11510 - }, - { - "epoch": 0.8650984518262438, - "grad_norm": 1.5836064538130905, - "learning_rate": 1.8789913253418433e-07, - "loss": 0.9297, - "step": 11511 - }, - { - "epoch": 0.8651736058920788, - "grad_norm": 0.6621346257889023, - "learning_rate": 1.876931693500763e-07, - "loss": 0.7814, - "step": 11512 - }, - { - "epoch": 0.8652487599579137, - "grad_norm": 1.6449975011974707, - "learning_rate": 1.874873135520878e-07, - "loss": 0.9937, - "step": 11513 - }, - { - "epoch": 0.8653239140237486, - "grad_norm": 1.4384289932413807, - "learning_rate": 1.87281565152416e-07, - "loss": 0.9321, - "step": 11514 - }, - { - "epoch": 0.8653990680895837, - "grad_norm": 2.69724467584981, - "learning_rate": 1.8707592416325336e-07, - "loss": 0.829, - "step": 11515 - }, - { - "epoch": 0.8654742221554186, - "grad_norm": 1.8390736517167001, - "learning_rate": 1.8687039059678433e-07, - "loss": 0.9835, - "step": 11516 - }, - { - "epoch": 0.8655493762212536, - "grad_norm": 2.654293461699884, - "learning_rate": 1.8666496446518765e-07, - "loss": 0.9476, - "step": 11517 - }, - { - "epoch": 0.8656245302870885, - "grad_norm": 0.6906781862242098, - "learning_rate": 1.8645964578063512e-07, - "loss": 0.8447, - "step": 11518 - }, - { - "epoch": 0.8656996843529235, - "grad_norm": 2.8309866405887356, - "learning_rate": 1.8625443455529366e-07, - "loss": 0.9474, - "step": 11519 - }, - { - "epoch": 0.8657748384187585, - "grad_norm": 0.7289410696268519, - "learning_rate": 1.860493308013218e-07, - "loss": 0.8388, - "step": 11520 - }, - { - "epoch": 0.8658499924845934, - "grad_norm": 1.5909710011264098, - "learning_rate": 1.8584433453087335e-07, - "loss": 1.022, - "step": 11521 - }, - { - "epoch": 0.8659251465504284, - "grad_norm": 2.1090678803836926, - "learning_rate": 1.8563944575609503e-07, - "loss": 1.0298, - "step": 11522 - }, - { - "epoch": 0.8660003006162633, - "grad_norm": 1.5467747357173858, - "learning_rate": 1.8543466448912713e-07, - "loss": 1.0212, - "step": 11523 - }, - { - "epoch": 0.8660754546820983, - "grad_norm": 2.2227314431824556, - "learning_rate": 1.8522999074210355e-07, - "loss": 1.0536, - "step": 11524 - }, - { - "epoch": 0.8661506087479333, - "grad_norm": 1.8060256158653296, - "learning_rate": 1.8502542452715207e-07, - "loss": 0.9342, - "step": 11525 - }, - { - "epoch": 0.8662257628137682, - "grad_norm": 1.5939863441102502, - "learning_rate": 1.8482096585639506e-07, - "loss": 0.8896, - "step": 11526 - }, - { - "epoch": 0.8663009168796032, - "grad_norm": 7.399039448436634, - "learning_rate": 1.846166147419459e-07, - "loss": 1.0107, - "step": 11527 - }, - { - "epoch": 0.8663760709454381, - "grad_norm": 1.6938655321195688, - "learning_rate": 1.8441237119591403e-07, - "loss": 0.9513, - "step": 11528 - }, - { - "epoch": 0.8664512250112731, - "grad_norm": 1.5692866376386332, - "learning_rate": 1.8420823523040197e-07, - "loss": 0.8978, - "step": 11529 - }, - { - "epoch": 0.866526379077108, - "grad_norm": 1.4877340757696917, - "learning_rate": 1.8400420685750452e-07, - "loss": 0.9042, - "step": 11530 - }, - { - "epoch": 0.8666015331429431, - "grad_norm": 3.291941200750958, - "learning_rate": 1.8380028608931152e-07, - "loss": 1.0018, - "step": 11531 - }, - { - "epoch": 0.866676687208778, - "grad_norm": 1.8448596783132358, - "learning_rate": 1.8359647293790713e-07, - "loss": 0.9343, - "step": 11532 - }, - { - "epoch": 0.8667518412746129, - "grad_norm": 2.340732615789207, - "learning_rate": 1.8339276741536657e-07, - "loss": 0.9814, - "step": 11533 - }, - { - "epoch": 0.8668269953404479, - "grad_norm": 1.5070597296397321, - "learning_rate": 1.8318916953376106e-07, - "loss": 0.9523, - "step": 11534 - }, - { - "epoch": 0.8669021494062829, - "grad_norm": 1.582340690584974, - "learning_rate": 1.8298567930515386e-07, - "loss": 0.9861, - "step": 11535 - }, - { - "epoch": 0.8669773034721179, - "grad_norm": 0.6770718549097784, - "learning_rate": 1.8278229674160373e-07, - "loss": 0.833, - "step": 11536 - }, - { - "epoch": 0.8670524575379528, - "grad_norm": 4.59065609500084, - "learning_rate": 1.825790218551604e-07, - "loss": 0.9981, - "step": 11537 - }, - { - "epoch": 0.8671276116037878, - "grad_norm": 2.5046902528629893, - "learning_rate": 1.8237585465786976e-07, - "loss": 0.9903, - "step": 11538 - }, - { - "epoch": 0.8672027656696227, - "grad_norm": 0.865125111037943, - "learning_rate": 1.8217279516176976e-07, - "loss": 0.9386, - "step": 11539 - }, - { - "epoch": 0.8672779197354576, - "grad_norm": 1.8550626611536982, - "learning_rate": 1.8196984337889276e-07, - "loss": 0.9795, - "step": 11540 - }, - { - "epoch": 0.8673530738012927, - "grad_norm": 1.5986067129531811, - "learning_rate": 1.8176699932126383e-07, - "loss": 1.0211, - "step": 11541 - }, - { - "epoch": 0.8674282278671276, - "grad_norm": 2.6681479174661136, - "learning_rate": 1.8156426300090288e-07, - "loss": 0.9187, - "step": 11542 - }, - { - "epoch": 0.8675033819329626, - "grad_norm": 1.5286309557249778, - "learning_rate": 1.8136163442982277e-07, - "loss": 1.0046, - "step": 11543 - }, - { - "epoch": 0.8675785359987975, - "grad_norm": 2.174226763727989, - "learning_rate": 1.811591136200299e-07, - "loss": 0.9879, - "step": 11544 - }, - { - "epoch": 0.8676536900646324, - "grad_norm": 1.7244212133250902, - "learning_rate": 1.8095670058352374e-07, - "loss": 0.9763, - "step": 11545 - }, - { - "epoch": 0.8677288441304675, - "grad_norm": 1.7435335852208207, - "learning_rate": 1.8075439533229964e-07, - "loss": 0.9474, - "step": 11546 - }, - { - "epoch": 0.8678039981963024, - "grad_norm": 1.6277833722958355, - "learning_rate": 1.8055219787834308e-07, - "loss": 1.0277, - "step": 11547 - }, - { - "epoch": 0.8678791522621374, - "grad_norm": 2.2819191983199434, - "learning_rate": 1.8035010823363606e-07, - "loss": 0.9321, - "step": 11548 - }, - { - "epoch": 0.8679543063279723, - "grad_norm": 1.6360719263032784, - "learning_rate": 1.8014812641015364e-07, - "loss": 0.9366, - "step": 11549 - }, - { - "epoch": 0.8680294603938074, - "grad_norm": 1.4305672362178885, - "learning_rate": 1.7994625241986293e-07, - "loss": 0.9246, - "step": 11550 - }, - { - "epoch": 0.8681046144596423, - "grad_norm": 2.1156342499163094, - "learning_rate": 1.7974448627472615e-07, - "loss": 0.9673, - "step": 11551 - }, - { - "epoch": 0.8681797685254772, - "grad_norm": 1.9241851940009251, - "learning_rate": 1.795428279866986e-07, - "loss": 1.0597, - "step": 11552 - }, - { - "epoch": 0.8682549225913122, - "grad_norm": 1.8537363417386565, - "learning_rate": 1.793412775677303e-07, - "loss": 0.9718, - "step": 11553 - }, - { - "epoch": 0.8683300766571471, - "grad_norm": 2.0971843963086223, - "learning_rate": 1.7913983502976237e-07, - "loss": 0.8755, - "step": 11554 - }, - { - "epoch": 0.8684052307229821, - "grad_norm": 2.409791000611044, - "learning_rate": 1.7893850038473192e-07, - "loss": 0.9375, - "step": 11555 - }, - { - "epoch": 0.8684803847888171, - "grad_norm": 2.1755335887717346, - "learning_rate": 1.787372736445687e-07, - "loss": 0.9686, - "step": 11556 - }, - { - "epoch": 0.8685555388546521, - "grad_norm": 0.8008672156362712, - "learning_rate": 1.7853615482119633e-07, - "loss": 0.8447, - "step": 11557 - }, - { - "epoch": 0.868630692920487, - "grad_norm": 2.725778548987448, - "learning_rate": 1.7833514392653104e-07, - "loss": 1.0006, - "step": 11558 - }, - { - "epoch": 0.8687058469863219, - "grad_norm": 1.2373776674011625, - "learning_rate": 1.7813424097248443e-07, - "loss": 0.9725, - "step": 11559 - }, - { - "epoch": 0.8687810010521569, - "grad_norm": 2.048650539327681, - "learning_rate": 1.779334459709607e-07, - "loss": 0.9656, - "step": 11560 - }, - { - "epoch": 0.8688561551179919, - "grad_norm": 1.8877912991309995, - "learning_rate": 1.777327589338571e-07, - "loss": 0.9935, - "step": 11561 - }, - { - "epoch": 0.8689313091838269, - "grad_norm": 2.0214695142116805, - "learning_rate": 1.7753217987306536e-07, - "loss": 0.8513, - "step": 11562 - }, - { - "epoch": 0.8690064632496618, - "grad_norm": 0.7400206405750943, - "learning_rate": 1.7733170880047132e-07, - "loss": 0.8274, - "step": 11563 - }, - { - "epoch": 0.8690816173154968, - "grad_norm": 2.39299621665901, - "learning_rate": 1.771313457279522e-07, - "loss": 1.0019, - "step": 11564 - }, - { - "epoch": 0.8691567713813317, - "grad_norm": 1.6974455603071041, - "learning_rate": 1.7693109066738154e-07, - "loss": 0.9483, - "step": 11565 - }, - { - "epoch": 0.8692319254471667, - "grad_norm": 2.057526690100241, - "learning_rate": 1.767309436306248e-07, - "loss": 0.9208, - "step": 11566 - }, - { - "epoch": 0.8693070795130017, - "grad_norm": 1.7540575399758533, - "learning_rate": 1.7653090462954112e-07, - "loss": 0.9666, - "step": 11567 - }, - { - "epoch": 0.8693822335788366, - "grad_norm": 1.5985790469683154, - "learning_rate": 1.763309736759837e-07, - "loss": 0.9989, - "step": 11568 - }, - { - "epoch": 0.8694573876446716, - "grad_norm": 1.3298455705833052, - "learning_rate": 1.7613115078179952e-07, - "loss": 0.9054, - "step": 11569 - }, - { - "epoch": 0.8695325417105065, - "grad_norm": 1.5494309703693474, - "learning_rate": 1.759314359588293e-07, - "loss": 0.8693, - "step": 11570 - }, - { - "epoch": 0.8696076957763414, - "grad_norm": 1.65410539944409, - "learning_rate": 1.7573182921890562e-07, - "loss": 0.9058, - "step": 11571 - }, - { - "epoch": 0.8696828498421765, - "grad_norm": 1.6509957372331954, - "learning_rate": 1.7553233057385698e-07, - "loss": 1.0378, - "step": 11572 - }, - { - "epoch": 0.8697580039080114, - "grad_norm": 3.037573599218051, - "learning_rate": 1.7533294003550436e-07, - "loss": 1.0177, - "step": 11573 - }, - { - "epoch": 0.8698331579738464, - "grad_norm": 3.9352652853665324, - "learning_rate": 1.7513365761566167e-07, - "loss": 1.0235, - "step": 11574 - }, - { - "epoch": 0.8699083120396813, - "grad_norm": 1.5692447809166299, - "learning_rate": 1.7493448332613746e-07, - "loss": 0.9239, - "step": 11575 - }, - { - "epoch": 0.8699834661055164, - "grad_norm": 1.5989977141096179, - "learning_rate": 1.7473541717873474e-07, - "loss": 0.9814, - "step": 11576 - }, - { - "epoch": 0.8700586201713513, - "grad_norm": 1.906354870022146, - "learning_rate": 1.7453645918524695e-07, - "loss": 0.9105, - "step": 11577 - }, - { - "epoch": 0.8701337742371862, - "grad_norm": 1.3082559177958089, - "learning_rate": 1.7433760935746465e-07, - "loss": 0.951, - "step": 11578 - }, - { - "epoch": 0.8702089283030212, - "grad_norm": 2.333686688324964, - "learning_rate": 1.7413886770716935e-07, - "loss": 0.9531, - "step": 11579 - }, - { - "epoch": 0.8702840823688561, - "grad_norm": 1.5100477184561985, - "learning_rate": 1.7394023424613868e-07, - "loss": 0.9982, - "step": 11580 - }, - { - "epoch": 0.8703592364346912, - "grad_norm": 2.0119783993162517, - "learning_rate": 1.7374170898614106e-07, - "loss": 0.9122, - "step": 11581 - }, - { - "epoch": 0.8704343905005261, - "grad_norm": 2.283257319816335, - "learning_rate": 1.7354329193894058e-07, - "loss": 1.0179, - "step": 11582 - }, - { - "epoch": 0.8705095445663611, - "grad_norm": 1.2895453271313568, - "learning_rate": 1.7334498311629385e-07, - "loss": 0.8834, - "step": 11583 - }, - { - "epoch": 0.870584698632196, - "grad_norm": 1.5096313890639557, - "learning_rate": 1.7314678252995173e-07, - "loss": 1.0052, - "step": 11584 - }, - { - "epoch": 0.8706598526980309, - "grad_norm": 1.7620150365588394, - "learning_rate": 1.7294869019165792e-07, - "loss": 0.8662, - "step": 11585 - }, - { - "epoch": 0.870735006763866, - "grad_norm": 1.5518645915413292, - "learning_rate": 1.72750706113151e-07, - "loss": 1.0109, - "step": 11586 - }, - { - "epoch": 0.8708101608297009, - "grad_norm": 2.003122395768161, - "learning_rate": 1.7255283030616142e-07, - "loss": 0.833, - "step": 11587 - }, - { - "epoch": 0.8708853148955359, - "grad_norm": 1.8413432301875232, - "learning_rate": 1.7235506278241463e-07, - "loss": 0.8148, - "step": 11588 - }, - { - "epoch": 0.8709604689613708, - "grad_norm": 1.574494595651325, - "learning_rate": 1.721574035536284e-07, - "loss": 0.9894, - "step": 11589 - }, - { - "epoch": 0.8710356230272057, - "grad_norm": 1.9751491393745984, - "learning_rate": 1.7195985263151603e-07, - "loss": 1.0542, - "step": 11590 - }, - { - "epoch": 0.8711107770930407, - "grad_norm": 2.2162774506755247, - "learning_rate": 1.7176241002778168e-07, - "loss": 1.0757, - "step": 11591 - }, - { - "epoch": 0.8711859311588757, - "grad_norm": 2.5983492048886836, - "learning_rate": 1.7156507575412537e-07, - "loss": 0.8944, - "step": 11592 - }, - { - "epoch": 0.8712610852247107, - "grad_norm": 2.199037588460053, - "learning_rate": 1.7136784982224062e-07, - "loss": 0.9115, - "step": 11593 - }, - { - "epoch": 0.8713362392905456, - "grad_norm": 1.960077530339557, - "learning_rate": 1.711707322438123e-07, - "loss": 0.9393, - "step": 11594 - }, - { - "epoch": 0.8714113933563806, - "grad_norm": 1.6014533028941893, - "learning_rate": 1.709737230305215e-07, - "loss": 0.947, - "step": 11595 - }, - { - "epoch": 0.8714865474222155, - "grad_norm": 0.85049883005554, - "learning_rate": 1.707768221940411e-07, - "loss": 0.9002, - "step": 11596 - }, - { - "epoch": 0.8715617014880505, - "grad_norm": 1.4043415258787204, - "learning_rate": 1.7058002974603936e-07, - "loss": 1.0079, - "step": 11597 - }, - { - "epoch": 0.8716368555538855, - "grad_norm": 2.141011335497037, - "learning_rate": 1.7038334569817536e-07, - "loss": 0.9958, - "step": 11598 - }, - { - "epoch": 0.8717120096197204, - "grad_norm": 1.9081018551804414, - "learning_rate": 1.7018677006210446e-07, - "loss": 0.9025, - "step": 11599 - }, - { - "epoch": 0.8717871636855554, - "grad_norm": 1.9362181453165477, - "learning_rate": 1.6999030284947424e-07, - "loss": 0.9435, - "step": 11600 - }, - { - "epoch": 0.8718623177513903, - "grad_norm": 1.6664390964910307, - "learning_rate": 1.6979394407192625e-07, - "loss": 0.9734, - "step": 11601 - }, - { - "epoch": 0.8719374718172254, - "grad_norm": 3.3536852536543456, - "learning_rate": 1.6959769374109523e-07, - "loss": 1.0316, - "step": 11602 - }, - { - "epoch": 0.8720126258830603, - "grad_norm": 1.5931702849760103, - "learning_rate": 1.6940155186861004e-07, - "loss": 0.9734, - "step": 11603 - }, - { - "epoch": 0.8720877799488952, - "grad_norm": 1.9000000351353692, - "learning_rate": 1.6920551846609276e-07, - "loss": 0.871, - "step": 11604 - }, - { - "epoch": 0.8721629340147302, - "grad_norm": 2.0477463127161712, - "learning_rate": 1.690095935451592e-07, - "loss": 0.9266, - "step": 11605 - }, - { - "epoch": 0.8722380880805651, - "grad_norm": 1.4604079975010542, - "learning_rate": 1.6881377711741807e-07, - "loss": 1.0203, - "step": 11606 - }, - { - "epoch": 0.8723132421464002, - "grad_norm": 1.8297536393353644, - "learning_rate": 1.686180691944734e-07, - "loss": 1.0149, - "step": 11607 - }, - { - "epoch": 0.8723883962122351, - "grad_norm": 1.8404368357826728, - "learning_rate": 1.684224697879204e-07, - "loss": 0.9253, - "step": 11608 - }, - { - "epoch": 0.8724635502780701, - "grad_norm": 1.3812748609005725, - "learning_rate": 1.6822697890935e-07, - "loss": 0.9431, - "step": 11609 - }, - { - "epoch": 0.872538704343905, - "grad_norm": 0.7117871248500459, - "learning_rate": 1.6803159657034537e-07, - "loss": 0.9037, - "step": 11610 - }, - { - "epoch": 0.8726138584097399, - "grad_norm": 1.9048693300222008, - "learning_rate": 1.6783632278248371e-07, - "loss": 0.9179, - "step": 11611 - }, - { - "epoch": 0.872689012475575, - "grad_norm": 2.0565912398361994, - "learning_rate": 1.6764115755733532e-07, - "loss": 0.9188, - "step": 11612 - }, - { - "epoch": 0.8727641665414099, - "grad_norm": 0.7819651191790862, - "learning_rate": 1.6744610090646517e-07, - "loss": 0.8211, - "step": 11613 - }, - { - "epoch": 0.8728393206072449, - "grad_norm": 1.8483053281065287, - "learning_rate": 1.6725115284143132e-07, - "loss": 1.0164, - "step": 11614 - }, - { - "epoch": 0.8729144746730798, - "grad_norm": 1.8628332211206384, - "learning_rate": 1.670563133737841e-07, - "loss": 0.9462, - "step": 11615 - }, - { - "epoch": 0.8729896287389147, - "grad_norm": 1.6392709866742823, - "learning_rate": 1.6686158251506943e-07, - "loss": 0.9511, - "step": 11616 - }, - { - "epoch": 0.8730647828047498, - "grad_norm": 1.5397107458324466, - "learning_rate": 1.6666696027682602e-07, - "loss": 0.9124, - "step": 11617 - }, - { - "epoch": 0.8731399368705847, - "grad_norm": 5.618521286634434, - "learning_rate": 1.664724466705847e-07, - "loss": 0.963, - "step": 11618 - }, - { - "epoch": 0.8732150909364197, - "grad_norm": 1.7085415162804807, - "learning_rate": 1.6627804170787196e-07, - "loss": 0.9567, - "step": 11619 - }, - { - "epoch": 0.8732902450022546, - "grad_norm": 2.088488101668709, - "learning_rate": 1.6608374540020776e-07, - "loss": 0.8401, - "step": 11620 - }, - { - "epoch": 0.8733653990680896, - "grad_norm": 1.3654359769614928, - "learning_rate": 1.658895577591035e-07, - "loss": 1.0197, - "step": 11621 - }, - { - "epoch": 0.8734405531339245, - "grad_norm": 2.3263637357905087, - "learning_rate": 1.656954787960665e-07, - "loss": 0.9901, - "step": 11622 - }, - { - "epoch": 0.8735157071997595, - "grad_norm": 2.4086026666991533, - "learning_rate": 1.6550150852259615e-07, - "loss": 0.9313, - "step": 11623 - }, - { - "epoch": 0.8735908612655945, - "grad_norm": 1.9946436681866537, - "learning_rate": 1.6530764695018684e-07, - "loss": 1.0196, - "step": 11624 - }, - { - "epoch": 0.8736660153314294, - "grad_norm": 1.7033257322396862, - "learning_rate": 1.6511389409032428e-07, - "loss": 0.9456, - "step": 11625 - }, - { - "epoch": 0.8737411693972644, - "grad_norm": 3.2359535677376012, - "learning_rate": 1.6492024995449017e-07, - "loss": 1.0873, - "step": 11626 - }, - { - "epoch": 0.8738163234630993, - "grad_norm": 2.446017227754723, - "learning_rate": 1.6472671455415821e-07, - "loss": 1.0866, - "step": 11627 - }, - { - "epoch": 0.8738914775289344, - "grad_norm": 1.7347217934801957, - "learning_rate": 1.645332879007959e-07, - "loss": 0.9974, - "step": 11628 - }, - { - "epoch": 0.8739666315947693, - "grad_norm": 1.5482051794413187, - "learning_rate": 1.6433997000586475e-07, - "loss": 0.9021, - "step": 11629 - }, - { - "epoch": 0.8740417856606042, - "grad_norm": 1.8756936379737381, - "learning_rate": 1.6414676088081937e-07, - "loss": 0.9109, - "step": 11630 - }, - { - "epoch": 0.8741169397264392, - "grad_norm": 2.676609685140083, - "learning_rate": 1.6395366053710902e-07, - "loss": 0.9575, - "step": 11631 - }, - { - "epoch": 0.8741920937922741, - "grad_norm": 1.5320900928609982, - "learning_rate": 1.637606689861748e-07, - "loss": 0.8963, - "step": 11632 - }, - { - "epoch": 0.8742672478581092, - "grad_norm": 2.1835278004631653, - "learning_rate": 1.63567786239452e-07, - "loss": 1.0288, - "step": 11633 - }, - { - "epoch": 0.8743424019239441, - "grad_norm": 1.4366756231525921, - "learning_rate": 1.6337501230837059e-07, - "loss": 1.0136, - "step": 11634 - }, - { - "epoch": 0.874417555989779, - "grad_norm": 2.293260494043902, - "learning_rate": 1.631823472043521e-07, - "loss": 0.9121, - "step": 11635 - }, - { - "epoch": 0.874492710055614, - "grad_norm": 2.217592286573209, - "learning_rate": 1.6298979093881292e-07, - "loss": 0.9794, - "step": 11636 - }, - { - "epoch": 0.8745678641214489, - "grad_norm": 1.7695131953583627, - "learning_rate": 1.62797343523164e-07, - "loss": 1.0599, - "step": 11637 - }, - { - "epoch": 0.874643018187284, - "grad_norm": 1.8608352312738032, - "learning_rate": 1.626050049688066e-07, - "loss": 0.8858, - "step": 11638 - }, - { - "epoch": 0.8747181722531189, - "grad_norm": 1.9139199962896216, - "learning_rate": 1.6241277528713916e-07, - "loss": 1.0981, - "step": 11639 - }, - { - "epoch": 0.8747933263189539, - "grad_norm": 1.7218215546387716, - "learning_rate": 1.6222065448955081e-07, - "loss": 0.9816, - "step": 11640 - }, - { - "epoch": 0.8748684803847888, - "grad_norm": 2.0925319032048653, - "learning_rate": 1.6202864258742688e-07, - "loss": 0.9307, - "step": 11641 - }, - { - "epoch": 0.8749436344506237, - "grad_norm": 3.8993699176141945, - "learning_rate": 1.6183673959214316e-07, - "loss": 0.8502, - "step": 11642 - }, - { - "epoch": 0.8750187885164588, - "grad_norm": 2.532815613882985, - "learning_rate": 1.6164494551507168e-07, - "loss": 0.8954, - "step": 11643 - }, - { - "epoch": 0.8750939425822937, - "grad_norm": 2.9829198675080297, - "learning_rate": 1.6145326036757667e-07, - "loss": 0.9752, - "step": 11644 - }, - { - "epoch": 0.8751690966481287, - "grad_norm": 2.0404922325324817, - "learning_rate": 1.6126168416101638e-07, - "loss": 1.0018, - "step": 11645 - }, - { - "epoch": 0.8752442507139636, - "grad_norm": 2.085717464657356, - "learning_rate": 1.6107021690674193e-07, - "loss": 1.0438, - "step": 11646 - }, - { - "epoch": 0.8753194047797986, - "grad_norm": 1.6141863263058311, - "learning_rate": 1.608788586160992e-07, - "loss": 0.9638, - "step": 11647 - }, - { - "epoch": 0.8753945588456336, - "grad_norm": 2.1098576876480757, - "learning_rate": 1.6068760930042657e-07, - "loss": 0.9485, - "step": 11648 - }, - { - "epoch": 0.8754697129114685, - "grad_norm": 0.7697788750218475, - "learning_rate": 1.604964689710564e-07, - "loss": 0.796, - "step": 11649 - }, - { - "epoch": 0.8755448669773035, - "grad_norm": 1.4432874359662364, - "learning_rate": 1.6030543763931382e-07, - "loss": 0.9764, - "step": 11650 - }, - { - "epoch": 0.8756200210431384, - "grad_norm": 1.5547949979029874, - "learning_rate": 1.6011451531651953e-07, - "loss": 0.9115, - "step": 11651 - }, - { - "epoch": 0.8756951751089734, - "grad_norm": 1.8186154292813974, - "learning_rate": 1.5992370201398496e-07, - "loss": 0.904, - "step": 11652 - }, - { - "epoch": 0.8757703291748083, - "grad_norm": 1.4302062880836452, - "learning_rate": 1.5973299774301707e-07, - "loss": 1.0064, - "step": 11653 - }, - { - "epoch": 0.8758454832406433, - "grad_norm": 2.33754651844431, - "learning_rate": 1.5954240251491659e-07, - "loss": 0.9068, - "step": 11654 - }, - { - "epoch": 0.8759206373064783, - "grad_norm": 1.916206560333046, - "learning_rate": 1.59351916340976e-07, - "loss": 1.0175, - "step": 11655 - }, - { - "epoch": 0.8759957913723132, - "grad_norm": 1.8772697697674317, - "learning_rate": 1.5916153923248254e-07, - "loss": 1.0368, - "step": 11656 - }, - { - "epoch": 0.8760709454381482, - "grad_norm": 2.0876147210126375, - "learning_rate": 1.5897127120071674e-07, - "loss": 1.0176, - "step": 11657 - }, - { - "epoch": 0.8761460995039831, - "grad_norm": 1.819221267731217, - "learning_rate": 1.5878111225695357e-07, - "loss": 0.9522, - "step": 11658 - }, - { - "epoch": 0.8762212535698182, - "grad_norm": 2.674011892909991, - "learning_rate": 1.5859106241245934e-07, - "loss": 0.9547, - "step": 11659 - }, - { - "epoch": 0.8762964076356531, - "grad_norm": 1.6823295927545936, - "learning_rate": 1.584011216784962e-07, - "loss": 0.9329, - "step": 11660 - }, - { - "epoch": 0.876371561701488, - "grad_norm": 1.6654982842488357, - "learning_rate": 1.582112900663186e-07, - "loss": 0.9129, - "step": 11661 - }, - { - "epoch": 0.876446715767323, - "grad_norm": 1.864268427279563, - "learning_rate": 1.5802156758717478e-07, - "loss": 0.905, - "step": 11662 - }, - { - "epoch": 0.8765218698331579, - "grad_norm": 1.932354832633875, - "learning_rate": 1.578319542523061e-07, - "loss": 1.0136, - "step": 11663 - }, - { - "epoch": 0.876597023898993, - "grad_norm": 0.7107222514175092, - "learning_rate": 1.5764245007294875e-07, - "loss": 0.8427, - "step": 11664 - }, - { - "epoch": 0.8766721779648279, - "grad_norm": 1.6160378908020865, - "learning_rate": 1.574530550603308e-07, - "loss": 0.8688, - "step": 11665 - }, - { - "epoch": 0.8767473320306629, - "grad_norm": 2.954951937263631, - "learning_rate": 1.5726376922567486e-07, - "loss": 0.9845, - "step": 11666 - }, - { - "epoch": 0.8768224860964978, - "grad_norm": 1.5092331118660096, - "learning_rate": 1.5707459258019684e-07, - "loss": 0.9431, - "step": 11667 - }, - { - "epoch": 0.8768976401623327, - "grad_norm": 23.243155179435355, - "learning_rate": 1.5688552513510688e-07, - "loss": 0.9745, - "step": 11668 - }, - { - "epoch": 0.8769727942281678, - "grad_norm": 1.9766746157485808, - "learning_rate": 1.566965669016065e-07, - "loss": 1.0247, - "step": 11669 - }, - { - "epoch": 0.8770479482940027, - "grad_norm": 2.9593551554513007, - "learning_rate": 1.5650771789089358e-07, - "loss": 0.9203, - "step": 11670 - }, - { - "epoch": 0.8771231023598377, - "grad_norm": 2.2301455930044822, - "learning_rate": 1.563189781141574e-07, - "loss": 1.0597, - "step": 11671 - }, - { - "epoch": 0.8771982564256726, - "grad_norm": 1.749498840506083, - "learning_rate": 1.561303475825817e-07, - "loss": 0.9724, - "step": 11672 - }, - { - "epoch": 0.8772734104915076, - "grad_norm": 5.100670729269606, - "learning_rate": 1.5594182630734332e-07, - "loss": 0.8846, - "step": 11673 - }, - { - "epoch": 0.8773485645573426, - "grad_norm": 1.6639838401578293, - "learning_rate": 1.5575341429961286e-07, - "loss": 0.9471, - "step": 11674 - }, - { - "epoch": 0.8774237186231775, - "grad_norm": 1.8744955655887765, - "learning_rate": 1.5556511157055563e-07, - "loss": 0.9074, - "step": 11675 - }, - { - "epoch": 0.8774988726890125, - "grad_norm": 1.7993300542223236, - "learning_rate": 1.5537691813132803e-07, - "loss": 0.9895, - "step": 11676 - }, - { - "epoch": 0.8775740267548474, - "grad_norm": 1.7263273881695815, - "learning_rate": 1.5518883399308112e-07, - "loss": 1.0248, - "step": 11677 - }, - { - "epoch": 0.8776491808206824, - "grad_norm": 2.4743881548218454, - "learning_rate": 1.5500085916696072e-07, - "loss": 0.8698, - "step": 11678 - }, - { - "epoch": 0.8777243348865174, - "grad_norm": 1.9426869518204515, - "learning_rate": 1.548129936641036e-07, - "loss": 0.9756, - "step": 11679 - }, - { - "epoch": 0.8777994889523523, - "grad_norm": 1.663658988635817, - "learning_rate": 1.546252374956425e-07, - "loss": 1.0282, - "step": 11680 - }, - { - "epoch": 0.8778746430181873, - "grad_norm": 5.381082331474354, - "learning_rate": 1.5443759067270313e-07, - "loss": 1.0622, - "step": 11681 - }, - { - "epoch": 0.8779497970840222, - "grad_norm": 1.7298807561715084, - "learning_rate": 1.5425005320640282e-07, - "loss": 0.9446, - "step": 11682 - }, - { - "epoch": 0.8780249511498572, - "grad_norm": 2.5723819460162183, - "learning_rate": 1.5406262510785518e-07, - "loss": 0.925, - "step": 11683 - }, - { - "epoch": 0.8781001052156922, - "grad_norm": 2.063626299731579, - "learning_rate": 1.5387530638816525e-07, - "loss": 0.9953, - "step": 11684 - }, - { - "epoch": 0.8781752592815272, - "grad_norm": 2.7223249586878087, - "learning_rate": 1.5368809705843334e-07, - "loss": 1.0351, - "step": 11685 - }, - { - "epoch": 0.8782504133473621, - "grad_norm": 1.9428835492383167, - "learning_rate": 1.5350099712975095e-07, - "loss": 1.0313, - "step": 11686 - }, - { - "epoch": 0.878325567413197, - "grad_norm": 3.29562339708885, - "learning_rate": 1.533140066132055e-07, - "loss": 0.9743, - "step": 11687 - }, - { - "epoch": 0.878400721479032, - "grad_norm": 1.9566360044053548, - "learning_rate": 1.531271255198767e-07, - "loss": 0.9227, - "step": 11688 - }, - { - "epoch": 0.878475875544867, - "grad_norm": 2.1422732057128098, - "learning_rate": 1.529403538608378e-07, - "loss": 0.989, - "step": 11689 - }, - { - "epoch": 0.878551029610702, - "grad_norm": 3.589420131723095, - "learning_rate": 1.5275369164715547e-07, - "loss": 0.9043, - "step": 11690 - }, - { - "epoch": 0.8786261836765369, - "grad_norm": 1.907904485411065, - "learning_rate": 1.5256713888989082e-07, - "loss": 0.9781, - "step": 11691 - }, - { - "epoch": 0.8787013377423719, - "grad_norm": 2.5679851534940137, - "learning_rate": 1.523806956000977e-07, - "loss": 0.9327, - "step": 11692 - }, - { - "epoch": 0.8787764918082068, - "grad_norm": 1.384311402455239, - "learning_rate": 1.5219436178882305e-07, - "loss": 0.9272, - "step": 11693 - }, - { - "epoch": 0.8788516458740417, - "grad_norm": 1.4821461859713283, - "learning_rate": 1.5200813746710806e-07, - "loss": 1.0137, - "step": 11694 - }, - { - "epoch": 0.8789267999398768, - "grad_norm": 2.7283538792263196, - "learning_rate": 1.5182202264598787e-07, - "loss": 0.9301, - "step": 11695 - }, - { - "epoch": 0.8790019540057117, - "grad_norm": 1.8067165709332873, - "learning_rate": 1.5163601733648945e-07, - "loss": 0.9683, - "step": 11696 - }, - { - "epoch": 0.8790771080715467, - "grad_norm": 1.7901727155121474, - "learning_rate": 1.5145012154963466e-07, - "loss": 0.9886, - "step": 11697 - }, - { - "epoch": 0.8791522621373816, - "grad_norm": 2.9011055318946846, - "learning_rate": 1.5126433529643956e-07, - "loss": 0.9882, - "step": 11698 - }, - { - "epoch": 0.8792274162032165, - "grad_norm": 1.7445644021888098, - "learning_rate": 1.5107865858791157e-07, - "loss": 0.922, - "step": 11699 - }, - { - "epoch": 0.8793025702690516, - "grad_norm": 1.8510289449546202, - "learning_rate": 1.508930914350528e-07, - "loss": 1.0275, - "step": 11700 - }, - { - "epoch": 0.8793777243348865, - "grad_norm": 1.242278857440452, - "learning_rate": 1.5070763384885888e-07, - "loss": 0.9966, - "step": 11701 - }, - { - "epoch": 0.8794528784007215, - "grad_norm": 1.7822471638364106, - "learning_rate": 1.5052228584031969e-07, - "loss": 0.9661, - "step": 11702 - }, - { - "epoch": 0.8795280324665564, - "grad_norm": 1.7476876513099067, - "learning_rate": 1.5033704742041664e-07, - "loss": 0.9555, - "step": 11703 - }, - { - "epoch": 0.8796031865323914, - "grad_norm": 1.7399691155860668, - "learning_rate": 1.5015191860012676e-07, - "loss": 1.0077, - "step": 11704 - }, - { - "epoch": 0.8796783405982264, - "grad_norm": 0.7254075746296968, - "learning_rate": 1.4996689939041907e-07, - "loss": 0.8575, - "step": 11705 - }, - { - "epoch": 0.8797534946640613, - "grad_norm": 2.0944870747422164, - "learning_rate": 1.4978198980225698e-07, - "loss": 0.9407, - "step": 11706 - }, - { - "epoch": 0.8798286487298963, - "grad_norm": 2.1093356658659013, - "learning_rate": 1.4959718984659663e-07, - "loss": 0.9559, - "step": 11707 - }, - { - "epoch": 0.8799038027957312, - "grad_norm": 2.65006388461284, - "learning_rate": 1.4941249953438882e-07, - "loss": 1.1113, - "step": 11708 - }, - { - "epoch": 0.8799789568615662, - "grad_norm": 0.7718824301296413, - "learning_rate": 1.492279188765766e-07, - "loss": 0.8983, - "step": 11709 - }, - { - "epoch": 0.8800541109274012, - "grad_norm": 2.954738761340182, - "learning_rate": 1.490434478840974e-07, - "loss": 0.9545, - "step": 11710 - }, - { - "epoch": 0.8801292649932362, - "grad_norm": 2.1546933733532962, - "learning_rate": 1.4885908656788137e-07, - "loss": 0.8814, - "step": 11711 - }, - { - "epoch": 0.8802044190590711, - "grad_norm": 1.717833048971877, - "learning_rate": 1.4867483493885357e-07, - "loss": 0.9771, - "step": 11712 - }, - { - "epoch": 0.880279573124906, - "grad_norm": 1.5524983544578985, - "learning_rate": 1.4849069300793037e-07, - "loss": 0.8939, - "step": 11713 - }, - { - "epoch": 0.880354727190741, - "grad_norm": 2.2423566694607153, - "learning_rate": 1.4830666078602372e-07, - "loss": 0.9876, - "step": 11714 - }, - { - "epoch": 0.880429881256576, - "grad_norm": 1.7938422441855182, - "learning_rate": 1.4812273828403822e-07, - "loss": 0.9502, - "step": 11715 - }, - { - "epoch": 0.880505035322411, - "grad_norm": 2.2144011739873806, - "learning_rate": 1.4793892551287157e-07, - "loss": 0.9371, - "step": 11716 - }, - { - "epoch": 0.8805801893882459, - "grad_norm": 2.8957293009195633, - "learning_rate": 1.477552224834153e-07, - "loss": 1.0031, - "step": 11717 - }, - { - "epoch": 0.8806553434540809, - "grad_norm": 1.993177280260711, - "learning_rate": 1.4757162920655496e-07, - "loss": 0.9598, - "step": 11718 - }, - { - "epoch": 0.8807304975199158, - "grad_norm": 1.5904416921097662, - "learning_rate": 1.473881456931696e-07, - "loss": 1.062, - "step": 11719 - }, - { - "epoch": 0.8808056515857507, - "grad_norm": 1.9210171529509472, - "learning_rate": 1.4720477195413006e-07, - "loss": 0.9379, - "step": 11720 - }, - { - "epoch": 0.8808808056515858, - "grad_norm": 1.5620783427638592, - "learning_rate": 1.47021508000303e-07, - "loss": 0.9781, - "step": 11721 - }, - { - "epoch": 0.8809559597174207, - "grad_norm": 2.3557590286776553, - "learning_rate": 1.468383538425475e-07, - "loss": 0.9399, - "step": 11722 - }, - { - "epoch": 0.8810311137832557, - "grad_norm": 2.193282631323811, - "learning_rate": 1.466553094917149e-07, - "loss": 1.0308, - "step": 11723 - }, - { - "epoch": 0.8811062678490906, - "grad_norm": 17.61464955388696, - "learning_rate": 1.4647237495865227e-07, - "loss": 1.0407, - "step": 11724 - }, - { - "epoch": 0.8811814219149255, - "grad_norm": 2.9758432876077165, - "learning_rate": 1.4628955025419986e-07, - "loss": 0.9865, - "step": 11725 - }, - { - "epoch": 0.8812565759807606, - "grad_norm": 2.1020290608452443, - "learning_rate": 1.46106835389189e-07, - "loss": 0.9661, - "step": 11726 - }, - { - "epoch": 0.8813317300465955, - "grad_norm": 1.3639279118894725, - "learning_rate": 1.459242303744477e-07, - "loss": 1.037, - "step": 11727 - }, - { - "epoch": 0.8814068841124305, - "grad_norm": 2.0354161892763147, - "learning_rate": 1.4574173522079502e-07, - "loss": 1.0095, - "step": 11728 - }, - { - "epoch": 0.8814820381782654, - "grad_norm": 1.9756979879903571, - "learning_rate": 1.4555934993904572e-07, - "loss": 0.9583, - "step": 11729 - }, - { - "epoch": 0.8815571922441005, - "grad_norm": 1.833847999580441, - "learning_rate": 1.4537707454000536e-07, - "loss": 0.863, - "step": 11730 - }, - { - "epoch": 0.8816323463099354, - "grad_norm": 1.8950295333775269, - "learning_rate": 1.4519490903447528e-07, - "loss": 1.0469, - "step": 11731 - }, - { - "epoch": 0.8817075003757703, - "grad_norm": 2.088255091259656, - "learning_rate": 1.4501285343324975e-07, - "loss": 0.9646, - "step": 11732 - }, - { - "epoch": 0.8817826544416053, - "grad_norm": 2.276899395837385, - "learning_rate": 1.448309077471157e-07, - "loss": 0.9648, - "step": 11733 - }, - { - "epoch": 0.8818578085074402, - "grad_norm": 2.0199107656503505, - "learning_rate": 1.4464907198685382e-07, - "loss": 1.0971, - "step": 11734 - }, - { - "epoch": 0.8819329625732752, - "grad_norm": 1.7880670035009283, - "learning_rate": 1.4446734616323953e-07, - "loss": 0.9548, - "step": 11735 - }, - { - "epoch": 0.8820081166391102, - "grad_norm": 1.8340563287727032, - "learning_rate": 1.4428573028704017e-07, - "loss": 0.9282, - "step": 11736 - }, - { - "epoch": 0.8820832707049452, - "grad_norm": 1.8313785520876762, - "learning_rate": 1.4410422436901736e-07, - "loss": 0.924, - "step": 11737 - }, - { - "epoch": 0.8821584247707801, - "grad_norm": 1.4493409796732126, - "learning_rate": 1.4392282841992566e-07, - "loss": 0.8712, - "step": 11738 - }, - { - "epoch": 0.882233578836615, - "grad_norm": 1.795328154466603, - "learning_rate": 1.437415424505144e-07, - "loss": 1.0361, - "step": 11739 - }, - { - "epoch": 0.88230873290245, - "grad_norm": 2.169052045824705, - "learning_rate": 1.4356036647152413e-07, - "loss": 1.0004, - "step": 11740 - }, - { - "epoch": 0.882383886968285, - "grad_norm": 2.026022066807677, - "learning_rate": 1.4337930049369117e-07, - "loss": 0.9464, - "step": 11741 - }, - { - "epoch": 0.88245904103412, - "grad_norm": 1.9396428594527273, - "learning_rate": 1.4319834452774447e-07, - "loss": 0.9257, - "step": 11742 - }, - { - "epoch": 0.8825341950999549, - "grad_norm": 1.77455379626606, - "learning_rate": 1.4301749858440593e-07, - "loss": 0.9402, - "step": 11743 - }, - { - "epoch": 0.8826093491657898, - "grad_norm": 1.354558985685036, - "learning_rate": 1.4283676267439094e-07, - "loss": 0.9894, - "step": 11744 - }, - { - "epoch": 0.8826845032316248, - "grad_norm": 1.6651373602078554, - "learning_rate": 1.4265613680840938e-07, - "loss": 0.9635, - "step": 11745 - }, - { - "epoch": 0.8827596572974598, - "grad_norm": 2.702192200698196, - "learning_rate": 1.424756209971647e-07, - "loss": 0.8421, - "step": 11746 - }, - { - "epoch": 0.8828348113632948, - "grad_norm": 2.804556075818358, - "learning_rate": 1.4229521525135168e-07, - "loss": 0.9417, - "step": 11747 - }, - { - "epoch": 0.8829099654291297, - "grad_norm": 1.495262452885457, - "learning_rate": 1.4211491958166112e-07, - "loss": 1.0052, - "step": 11748 - }, - { - "epoch": 0.8829851194949647, - "grad_norm": 3.184400959302156, - "learning_rate": 1.4193473399877598e-07, - "loss": 0.9578, - "step": 11749 - }, - { - "epoch": 0.8830602735607996, - "grad_norm": 1.7955295335278878, - "learning_rate": 1.4175465851337266e-07, - "loss": 0.9917, - "step": 11750 - }, - { - "epoch": 0.8831354276266346, - "grad_norm": 1.4206783583052702, - "learning_rate": 1.4157469313612147e-07, - "loss": 0.9478, - "step": 11751 - }, - { - "epoch": 0.8832105816924696, - "grad_norm": 1.8341935986013769, - "learning_rate": 1.4139483787768614e-07, - "loss": 0.8712, - "step": 11752 - }, - { - "epoch": 0.8832857357583045, - "grad_norm": 1.6314776499249353, - "learning_rate": 1.412150927487239e-07, - "loss": 0.9187, - "step": 11753 - }, - { - "epoch": 0.8833608898241395, - "grad_norm": 1.6150283774371794, - "learning_rate": 1.4103545775988512e-07, - "loss": 0.9983, - "step": 11754 - }, - { - "epoch": 0.8834360438899744, - "grad_norm": 0.7883038792608719, - "learning_rate": 1.4085593292181375e-07, - "loss": 0.92, - "step": 11755 - }, - { - "epoch": 0.8835111979558095, - "grad_norm": 9.455660090732156, - "learning_rate": 1.406765182451479e-07, - "loss": 1.0062, - "step": 11756 - }, - { - "epoch": 0.8835863520216444, - "grad_norm": 1.9789195123914054, - "learning_rate": 1.404972137405176e-07, - "loss": 1.0266, - "step": 11757 - }, - { - "epoch": 0.8836615060874793, - "grad_norm": 1.8096062151769257, - "learning_rate": 1.4031801941854827e-07, - "loss": 1.0166, - "step": 11758 - }, - { - "epoch": 0.8837366601533143, - "grad_norm": 1.6217022224963826, - "learning_rate": 1.4013893528985744e-07, - "loss": 1.0195, - "step": 11759 - }, - { - "epoch": 0.8838118142191492, - "grad_norm": 3.3701258183658678, - "learning_rate": 1.3995996136505662e-07, - "loss": 0.8611, - "step": 11760 - }, - { - "epoch": 0.8838869682849843, - "grad_norm": 1.83586334728076, - "learning_rate": 1.3978109765475044e-07, - "loss": 0.9312, - "step": 11761 - }, - { - "epoch": 0.8839621223508192, - "grad_norm": 1.787576583075393, - "learning_rate": 1.396023441695373e-07, - "loss": 0.9464, - "step": 11762 - }, - { - "epoch": 0.8840372764166542, - "grad_norm": 6.1146326880816195, - "learning_rate": 1.3942370092000988e-07, - "loss": 1.0351, - "step": 11763 - }, - { - "epoch": 0.8841124304824891, - "grad_norm": 2.4212578756335277, - "learning_rate": 1.3924516791675212e-07, - "loss": 0.9193, - "step": 11764 - }, - { - "epoch": 0.884187584548324, - "grad_norm": 0.6489264471712276, - "learning_rate": 1.39066745170344e-07, - "loss": 0.7906, - "step": 11765 - }, - { - "epoch": 0.884262738614159, - "grad_norm": 2.4771213803418632, - "learning_rate": 1.3888843269135732e-07, - "loss": 0.8994, - "step": 11766 - }, - { - "epoch": 0.884337892679994, - "grad_norm": 1.7671397640347903, - "learning_rate": 1.3871023049035713e-07, - "loss": 0.9767, - "step": 11767 - }, - { - "epoch": 0.884413046745829, - "grad_norm": 1.6989648388103251, - "learning_rate": 1.3853213857790304e-07, - "loss": 0.9359, - "step": 11768 - }, - { - "epoch": 0.8844882008116639, - "grad_norm": 1.6576202770108623, - "learning_rate": 1.3835415696454856e-07, - "loss": 0.9921, - "step": 11769 - }, - { - "epoch": 0.8845633548774988, - "grad_norm": 1.7297345879545323, - "learning_rate": 1.3817628566083817e-07, - "loss": 1.0576, - "step": 11770 - }, - { - "epoch": 0.8846385089433338, - "grad_norm": 1.5958105492355643, - "learning_rate": 1.3799852467731275e-07, - "loss": 0.9328, - "step": 11771 - }, - { - "epoch": 0.8847136630091688, - "grad_norm": 1.634670897443087, - "learning_rate": 1.3782087402450437e-07, - "loss": 0.9902, - "step": 11772 - }, - { - "epoch": 0.8847888170750038, - "grad_norm": 1.9677656831422896, - "learning_rate": 1.376433337129408e-07, - "loss": 0.9052, - "step": 11773 - }, - { - "epoch": 0.8848639711408387, - "grad_norm": 1.8043019597142307, - "learning_rate": 1.374659037531405e-07, - "loss": 1.0235, - "step": 11774 - }, - { - "epoch": 0.8849391252066737, - "grad_norm": 1.6940385804900433, - "learning_rate": 1.3728858415561772e-07, - "loss": 0.9736, - "step": 11775 - }, - { - "epoch": 0.8850142792725086, - "grad_norm": 1.4380778105087912, - "learning_rate": 1.37111374930879e-07, - "loss": 0.9521, - "step": 11776 - }, - { - "epoch": 0.8850894333383436, - "grad_norm": 2.1902811082508107, - "learning_rate": 1.3693427608942497e-07, - "loss": 0.9625, - "step": 11777 - }, - { - "epoch": 0.8851645874041786, - "grad_norm": 1.7013609963896583, - "learning_rate": 1.3675728764174887e-07, - "loss": 0.9592, - "step": 11778 - }, - { - "epoch": 0.8852397414700135, - "grad_norm": 1.76790431263845, - "learning_rate": 1.3658040959833827e-07, - "loss": 1.0218, - "step": 11779 - }, - { - "epoch": 0.8853148955358485, - "grad_norm": 3.9270805675925207, - "learning_rate": 1.3640364196967459e-07, - "loss": 1.1573, - "step": 11780 - }, - { - "epoch": 0.8853900496016834, - "grad_norm": 1.772685425048793, - "learning_rate": 1.3622698476623097e-07, - "loss": 0.8855, - "step": 11781 - }, - { - "epoch": 0.8854652036675185, - "grad_norm": 1.8507080037866999, - "learning_rate": 1.3605043799847527e-07, - "loss": 0.9909, - "step": 11782 - }, - { - "epoch": 0.8855403577333534, - "grad_norm": 1.9696859526715138, - "learning_rate": 1.3587400167686892e-07, - "loss": 1.0749, - "step": 11783 - }, - { - "epoch": 0.8856155117991883, - "grad_norm": 1.5684147085117432, - "learning_rate": 1.3569767581186574e-07, - "loss": 0.9224, - "step": 11784 - }, - { - "epoch": 0.8856906658650233, - "grad_norm": 0.6910863017526714, - "learning_rate": 1.355214604139141e-07, - "loss": 0.8797, - "step": 11785 - }, - { - "epoch": 0.8857658199308582, - "grad_norm": 9.141514713643774, - "learning_rate": 1.3534535549345626e-07, - "loss": 0.9532, - "step": 11786 - }, - { - "epoch": 0.8858409739966933, - "grad_norm": 2.1254526665944247, - "learning_rate": 1.351693610609257e-07, - "loss": 0.9169, - "step": 11787 - }, - { - "epoch": 0.8859161280625282, - "grad_norm": 2.700181803057299, - "learning_rate": 1.3499347712675158e-07, - "loss": 0.9142, - "step": 11788 - }, - { - "epoch": 0.8859912821283631, - "grad_norm": 2.0257930521358265, - "learning_rate": 1.3481770370135537e-07, - "loss": 0.9817, - "step": 11789 - }, - { - "epoch": 0.8860664361941981, - "grad_norm": 2.1141599284500603, - "learning_rate": 1.3464204079515296e-07, - "loss": 1.0007, - "step": 11790 - }, - { - "epoch": 0.886141590260033, - "grad_norm": 2.097122891777631, - "learning_rate": 1.3446648841855202e-07, - "loss": 0.9839, - "step": 11791 - }, - { - "epoch": 0.886216744325868, - "grad_norm": 2.6259927234882956, - "learning_rate": 1.3429104658195555e-07, - "loss": 0.9137, - "step": 11792 - }, - { - "epoch": 0.886291898391703, - "grad_norm": 1.8420127831097286, - "learning_rate": 1.3411571529575882e-07, - "loss": 0.9635, - "step": 11793 - }, - { - "epoch": 0.886367052457538, - "grad_norm": 1.573283104391101, - "learning_rate": 1.3394049457035105e-07, - "loss": 1.0044, - "step": 11794 - }, - { - "epoch": 0.8864422065233729, - "grad_norm": 1.669056164947869, - "learning_rate": 1.3376538441611396e-07, - "loss": 1.0483, - "step": 11795 - }, - { - "epoch": 0.8865173605892078, - "grad_norm": 1.8484165812813769, - "learning_rate": 1.3359038484342478e-07, - "loss": 0.9697, - "step": 11796 - }, - { - "epoch": 0.8865925146550429, - "grad_norm": 3.605015090720327, - "learning_rate": 1.334154958626521e-07, - "loss": 0.9604, - "step": 11797 - }, - { - "epoch": 0.8866676687208778, - "grad_norm": 2.232467690946363, - "learning_rate": 1.33240717484159e-07, - "loss": 1.0153, - "step": 11798 - }, - { - "epoch": 0.8867428227867128, - "grad_norm": 1.7258175584903177, - "learning_rate": 1.3306604971830115e-07, - "loss": 0.9564, - "step": 11799 - }, - { - "epoch": 0.8868179768525477, - "grad_norm": 1.3147986582811575, - "learning_rate": 1.3289149257542964e-07, - "loss": 0.8268, - "step": 11800 - }, - { - "epoch": 0.8868931309183827, - "grad_norm": 1.5456501128134512, - "learning_rate": 1.3271704606588618e-07, - "loss": 0.9968, - "step": 11801 - }, - { - "epoch": 0.8869682849842176, - "grad_norm": 1.6995967050026197, - "learning_rate": 1.3254271020000852e-07, - "loss": 0.9966, - "step": 11802 - }, - { - "epoch": 0.8870434390500526, - "grad_norm": 1.4806290592574338, - "learning_rate": 1.3236848498812592e-07, - "loss": 1.0214, - "step": 11803 - }, - { - "epoch": 0.8871185931158876, - "grad_norm": 1.7478032629654616, - "learning_rate": 1.3219437044056258e-07, - "loss": 1.0154, - "step": 11804 - }, - { - "epoch": 0.8871937471817225, - "grad_norm": 1.982493191643626, - "learning_rate": 1.320203665676345e-07, - "loss": 1.0518, - "step": 11805 - }, - { - "epoch": 0.8872689012475575, - "grad_norm": 2.0784750048520375, - "learning_rate": 1.318464733796527e-07, - "loss": 0.9926, - "step": 11806 - }, - { - "epoch": 0.8873440553133924, - "grad_norm": 2.1751967900139206, - "learning_rate": 1.3167269088692167e-07, - "loss": 1.0508, - "step": 11807 - }, - { - "epoch": 0.8874192093792275, - "grad_norm": 0.7563633384581042, - "learning_rate": 1.3149901909973738e-07, - "loss": 0.8418, - "step": 11808 - }, - { - "epoch": 0.8874943634450624, - "grad_norm": 1.9306126897881903, - "learning_rate": 1.3132545802839158e-07, - "loss": 0.9673, - "step": 11809 - }, - { - "epoch": 0.8875695175108973, - "grad_norm": 1.6059180603216607, - "learning_rate": 1.3115200768316803e-07, - "loss": 0.932, - "step": 11810 - }, - { - "epoch": 0.8876446715767323, - "grad_norm": 3.41694948530048, - "learning_rate": 1.309786680743441e-07, - "loss": 1.0239, - "step": 11811 - }, - { - "epoch": 0.8877198256425672, - "grad_norm": 0.7460524063239258, - "learning_rate": 1.308054392121909e-07, - "loss": 0.8009, - "step": 11812 - }, - { - "epoch": 0.8877949797084023, - "grad_norm": 1.546968707704583, - "learning_rate": 1.3063232110697375e-07, - "loss": 0.9378, - "step": 11813 - }, - { - "epoch": 0.8878701337742372, - "grad_norm": 2.4979897045903736, - "learning_rate": 1.3045931376894915e-07, - "loss": 0.7752, - "step": 11814 - }, - { - "epoch": 0.8879452878400721, - "grad_norm": 1.6492285977082677, - "learning_rate": 1.3028641720836953e-07, - "loss": 0.9353, - "step": 11815 - }, - { - "epoch": 0.8880204419059071, - "grad_norm": 2.2437239111443543, - "learning_rate": 1.301136314354787e-07, - "loss": 0.9786, - "step": 11816 - }, - { - "epoch": 0.888095595971742, - "grad_norm": 1.8883096057560453, - "learning_rate": 1.299409564605165e-07, - "loss": 1.0041, - "step": 11817 - }, - { - "epoch": 0.8881707500375771, - "grad_norm": 0.8153690521173006, - "learning_rate": 1.2976839229371272e-07, - "loss": 0.8235, - "step": 11818 - }, - { - "epoch": 0.888245904103412, - "grad_norm": 2.4947274876079666, - "learning_rate": 1.2959593894529364e-07, - "loss": 0.9685, - "step": 11819 - }, - { - "epoch": 0.888321058169247, - "grad_norm": 1.4706146092013321, - "learning_rate": 1.294235964254775e-07, - "loss": 0.8685, - "step": 11820 - }, - { - "epoch": 0.8883962122350819, - "grad_norm": 1.5735261590975402, - "learning_rate": 1.2925136474447597e-07, - "loss": 1.0313, - "step": 11821 - }, - { - "epoch": 0.8884713663009168, - "grad_norm": 2.1868142279210057, - "learning_rate": 1.290792439124946e-07, - "loss": 0.9552, - "step": 11822 - }, - { - "epoch": 0.8885465203667519, - "grad_norm": 1.7622717012846116, - "learning_rate": 1.2890723393973213e-07, - "loss": 1.0073, - "step": 11823 - }, - { - "epoch": 0.8886216744325868, - "grad_norm": 1.6167799578950057, - "learning_rate": 1.2873533483638155e-07, - "loss": 0.9209, - "step": 11824 - }, - { - "epoch": 0.8886968284984218, - "grad_norm": 1.850326318571666, - "learning_rate": 1.285635466126278e-07, - "loss": 0.9551, - "step": 11825 - }, - { - "epoch": 0.8887719825642567, - "grad_norm": 2.2381707874888606, - "learning_rate": 1.2839186927864965e-07, - "loss": 0.8717, - "step": 11826 - }, - { - "epoch": 0.8888471366300917, - "grad_norm": 2.556577117807256, - "learning_rate": 1.282203028446207e-07, - "loss": 0.9762, - "step": 11827 - }, - { - "epoch": 0.8889222906959267, - "grad_norm": 0.7350588516107185, - "learning_rate": 1.2804884732070574e-07, - "loss": 0.8556, - "step": 11828 - }, - { - "epoch": 0.8889974447617616, - "grad_norm": 1.6824346743367615, - "learning_rate": 1.2787750271706487e-07, - "loss": 0.9203, - "step": 11829 - }, - { - "epoch": 0.8890725988275966, - "grad_norm": 1.8413774127992304, - "learning_rate": 1.2770626904385128e-07, - "loss": 0.9985, - "step": 11830 - }, - { - "epoch": 0.8891477528934315, - "grad_norm": 2.324147866073767, - "learning_rate": 1.275351463112102e-07, - "loss": 0.9459, - "step": 11831 - }, - { - "epoch": 0.8892229069592665, - "grad_norm": 2.8327568440090785, - "learning_rate": 1.2736413452928218e-07, - "loss": 0.9285, - "step": 11832 - }, - { - "epoch": 0.8892980610251014, - "grad_norm": 7.738494700646476, - "learning_rate": 1.2719323370819955e-07, - "loss": 0.921, - "step": 11833 - }, - { - "epoch": 0.8893732150909364, - "grad_norm": 2.378633730958995, - "learning_rate": 1.2702244385809e-07, - "loss": 0.875, - "step": 11834 - }, - { - "epoch": 0.8894483691567714, - "grad_norm": 1.6451059818622136, - "learning_rate": 1.2685176498907213e-07, - "loss": 1.0353, - "step": 11835 - }, - { - "epoch": 0.8895235232226063, - "grad_norm": 1.807464576680099, - "learning_rate": 1.2668119711126023e-07, - "loss": 0.918, - "step": 11836 - }, - { - "epoch": 0.8895986772884413, - "grad_norm": 2.671582746974513, - "learning_rate": 1.2651074023476095e-07, - "loss": 1.001, - "step": 11837 - }, - { - "epoch": 0.8896738313542762, - "grad_norm": 2.056666708234707, - "learning_rate": 1.2634039436967414e-07, - "loss": 0.8847, - "step": 11838 - }, - { - "epoch": 0.8897489854201113, - "grad_norm": 2.382581155521567, - "learning_rate": 1.2617015952609356e-07, - "loss": 0.9986, - "step": 11839 - }, - { - "epoch": 0.8898241394859462, - "grad_norm": 1.5346336348768355, - "learning_rate": 1.2600003571410668e-07, - "loss": 0.9093, - "step": 11840 - }, - { - "epoch": 0.8898992935517811, - "grad_norm": 1.7545366067793537, - "learning_rate": 1.2583002294379363e-07, - "loss": 0.8865, - "step": 11841 - }, - { - "epoch": 0.8899744476176161, - "grad_norm": 1.5531942586212577, - "learning_rate": 1.2566012122522862e-07, - "loss": 0.9754, - "step": 11842 - }, - { - "epoch": 0.890049601683451, - "grad_norm": 1.5030347800986368, - "learning_rate": 1.2549033056847825e-07, - "loss": 0.9284, - "step": 11843 - }, - { - "epoch": 0.8901247557492861, - "grad_norm": 2.7529782727011485, - "learning_rate": 1.2532065098360445e-07, - "loss": 0.9003, - "step": 11844 - }, - { - "epoch": 0.890199909815121, - "grad_norm": 1.7110679263215496, - "learning_rate": 1.2515108248066008e-07, - "loss": 0.9267, - "step": 11845 - }, - { - "epoch": 0.890275063880956, - "grad_norm": 1.4119139307876634, - "learning_rate": 1.2498162506969312e-07, - "loss": 1.0431, - "step": 11846 - }, - { - "epoch": 0.8903502179467909, - "grad_norm": 4.93259524331265, - "learning_rate": 1.2481227876074575e-07, - "loss": 1.0409, - "step": 11847 - }, - { - "epoch": 0.8904253720126258, - "grad_norm": 1.485568560538046, - "learning_rate": 1.2464304356385102e-07, - "loss": 1.0199, - "step": 11848 - }, - { - "epoch": 0.8905005260784609, - "grad_norm": 1.6039913461504056, - "learning_rate": 1.2447391948903673e-07, - "loss": 0.9523, - "step": 11849 - }, - { - "epoch": 0.8905756801442958, - "grad_norm": 3.101661815062153, - "learning_rate": 1.2430490654632487e-07, - "loss": 0.8952, - "step": 11850 - }, - { - "epoch": 0.8906508342101308, - "grad_norm": 1.5159014420240329, - "learning_rate": 1.2413600474573028e-07, - "loss": 1.036, - "step": 11851 - }, - { - "epoch": 0.8907259882759657, - "grad_norm": 14.163651508274677, - "learning_rate": 1.2396721409725987e-07, - "loss": 1.0851, - "step": 11852 - }, - { - "epoch": 0.8908011423418007, - "grad_norm": 1.8763091285532707, - "learning_rate": 1.2379853461091628e-07, - "loss": 0.959, - "step": 11853 - }, - { - "epoch": 0.8908762964076357, - "grad_norm": 1.7088080963773407, - "learning_rate": 1.2362996629669376e-07, - "loss": 0.9803, - "step": 11854 - }, - { - "epoch": 0.8909514504734706, - "grad_norm": 1.9559733259141399, - "learning_rate": 1.2346150916458098e-07, - "loss": 0.9712, - "step": 11855 - }, - { - "epoch": 0.8910266045393056, - "grad_norm": 1.7667379331909432, - "learning_rate": 1.232931632245593e-07, - "loss": 0.9147, - "step": 11856 - }, - { - "epoch": 0.8911017586051405, - "grad_norm": 4.057366987058945, - "learning_rate": 1.2312492848660448e-07, - "loss": 0.9494, - "step": 11857 - }, - { - "epoch": 0.8911769126709755, - "grad_norm": 1.617409732310351, - "learning_rate": 1.229568049606844e-07, - "loss": 0.9116, - "step": 11858 - }, - { - "epoch": 0.8912520667368105, - "grad_norm": 1.7145156025013897, - "learning_rate": 1.2278879265676122e-07, - "loss": 1.0374, - "step": 11859 - }, - { - "epoch": 0.8913272208026454, - "grad_norm": 3.397495604028728, - "learning_rate": 1.2262089158479038e-07, - "loss": 0.9703, - "step": 11860 - }, - { - "epoch": 0.8914023748684804, - "grad_norm": 2.1275090383478874, - "learning_rate": 1.2245310175472125e-07, - "loss": 0.9777, - "step": 11861 - }, - { - "epoch": 0.8914775289343153, - "grad_norm": 2.289319437338428, - "learning_rate": 1.222854231764947e-07, - "loss": 1.01, - "step": 11862 - }, - { - "epoch": 0.8915526830001503, - "grad_norm": 7.402202288803329, - "learning_rate": 1.2211785586004751e-07, - "loss": 0.9414, - "step": 11863 - }, - { - "epoch": 0.8916278370659853, - "grad_norm": 1.5077292463809462, - "learning_rate": 1.219503998153082e-07, - "loss": 0.8189, - "step": 11864 - }, - { - "epoch": 0.8917029911318203, - "grad_norm": 0.7721935248207443, - "learning_rate": 1.217830550521992e-07, - "loss": 0.8181, - "step": 11865 - }, - { - "epoch": 0.8917781451976552, - "grad_norm": 1.9532949755138873, - "learning_rate": 1.2161582158063622e-07, - "loss": 1.0159, - "step": 11866 - }, - { - "epoch": 0.8918532992634901, - "grad_norm": 1.551608616814875, - "learning_rate": 1.2144869941052837e-07, - "loss": 1.0052, - "step": 11867 - }, - { - "epoch": 0.8919284533293251, - "grad_norm": 1.9749451303706778, - "learning_rate": 1.2128168855177933e-07, - "loss": 0.8818, - "step": 11868 - }, - { - "epoch": 0.89200360739516, - "grad_norm": 1.7144388402942157, - "learning_rate": 1.2111478901428363e-07, - "loss": 0.8651, - "step": 11869 - }, - { - "epoch": 0.8920787614609951, - "grad_norm": 1.6391883736307478, - "learning_rate": 1.2094800080793177e-07, - "loss": 0.9342, - "step": 11870 - }, - { - "epoch": 0.89215391552683, - "grad_norm": 2.391180235766822, - "learning_rate": 1.2078132394260654e-07, - "loss": 1.0032, - "step": 11871 - }, - { - "epoch": 0.892229069592665, - "grad_norm": 2.1035022504942735, - "learning_rate": 1.2061475842818335e-07, - "loss": 1.0224, - "step": 11872 - }, - { - "epoch": 0.8923042236584999, - "grad_norm": 1.8807237998134367, - "learning_rate": 1.2044830427453234e-07, - "loss": 0.9833, - "step": 11873 - }, - { - "epoch": 0.8923793777243348, - "grad_norm": 2.112609756598008, - "learning_rate": 1.2028196149151716e-07, - "loss": 0.8907, - "step": 11874 - }, - { - "epoch": 0.8924545317901699, - "grad_norm": 1.7086937531908997, - "learning_rate": 1.20115730088993e-07, - "loss": 1.0078, - "step": 11875 - }, - { - "epoch": 0.8925296858560048, - "grad_norm": 1.9626167675681674, - "learning_rate": 1.199496100768107e-07, - "loss": 0.8666, - "step": 11876 - }, - { - "epoch": 0.8926048399218398, - "grad_norm": 0.7707694302094762, - "learning_rate": 1.1978360146481281e-07, - "loss": 0.8332, - "step": 11877 - }, - { - "epoch": 0.8926799939876747, - "grad_norm": 1.595026440057475, - "learning_rate": 1.1961770426283723e-07, - "loss": 1.0557, - "step": 11878 - }, - { - "epoch": 0.8927551480535096, - "grad_norm": 4.071969601051163, - "learning_rate": 1.1945191848071234e-07, - "loss": 0.9084, - "step": 11879 - }, - { - "epoch": 0.8928303021193447, - "grad_norm": 0.6687346634933372, - "learning_rate": 1.1928624412826272e-07, - "loss": 0.8024, - "step": 11880 - }, - { - "epoch": 0.8929054561851796, - "grad_norm": 5.5870146879867155, - "learning_rate": 1.1912068121530494e-07, - "loss": 0.9763, - "step": 11881 - }, - { - "epoch": 0.8929806102510146, - "grad_norm": 1.7288921398719572, - "learning_rate": 1.1895522975164918e-07, - "loss": 1.0219, - "step": 11882 - }, - { - "epoch": 0.8930557643168495, - "grad_norm": 1.7189461336067804, - "learning_rate": 1.1878988974709869e-07, - "loss": 0.9366, - "step": 11883 - }, - { - "epoch": 0.8931309183826845, - "grad_norm": 0.8489710722983594, - "learning_rate": 1.1862466121145098e-07, - "loss": 0.8938, - "step": 11884 - }, - { - "epoch": 0.8932060724485195, - "grad_norm": 1.354012402281878, - "learning_rate": 1.1845954415449666e-07, - "loss": 0.9602, - "step": 11885 - }, - { - "epoch": 0.8932812265143544, - "grad_norm": 2.7149637099733335, - "learning_rate": 1.1829453858601901e-07, - "loss": 0.8905, - "step": 11886 - }, - { - "epoch": 0.8933563805801894, - "grad_norm": 1.4475093332625535, - "learning_rate": 1.1812964451579532e-07, - "loss": 0.9384, - "step": 11887 - }, - { - "epoch": 0.8934315346460243, - "grad_norm": 7.362311099467463, - "learning_rate": 1.1796486195359711e-07, - "loss": 0.9829, - "step": 11888 - }, - { - "epoch": 0.8935066887118593, - "grad_norm": 1.8259242578405281, - "learning_rate": 1.178001909091868e-07, - "loss": 1.0052, - "step": 11889 - }, - { - "epoch": 0.8935818427776943, - "grad_norm": 1.4367735312443046, - "learning_rate": 1.1763563139232257e-07, - "loss": 0.9372, - "step": 11890 - }, - { - "epoch": 0.8936569968435293, - "grad_norm": 1.7523046032516447, - "learning_rate": 1.1747118341275597e-07, - "loss": 0.9154, - "step": 11891 - }, - { - "epoch": 0.8937321509093642, - "grad_norm": 1.6500330430392411, - "learning_rate": 1.1730684698023007e-07, - "loss": 0.9879, - "step": 11892 - }, - { - "epoch": 0.8938073049751991, - "grad_norm": 1.642519919218052, - "learning_rate": 1.1714262210448245e-07, - "loss": 1.0432, - "step": 11893 - }, - { - "epoch": 0.8938824590410341, - "grad_norm": 1.8792500964448007, - "learning_rate": 1.1697850879524462e-07, - "loss": 1.0564, - "step": 11894 - }, - { - "epoch": 0.893957613106869, - "grad_norm": 1.7651520492288966, - "learning_rate": 1.1681450706224106e-07, - "loss": 0.9952, - "step": 11895 - }, - { - "epoch": 0.8940327671727041, - "grad_norm": 2.486924692459415, - "learning_rate": 1.1665061691518884e-07, - "loss": 0.983, - "step": 11896 - }, - { - "epoch": 0.894107921238539, - "grad_norm": 1.8369389176905284, - "learning_rate": 1.1648683836379935e-07, - "loss": 0.9415, - "step": 11897 - }, - { - "epoch": 0.894183075304374, - "grad_norm": 2.504001276412005, - "learning_rate": 1.1632317141777748e-07, - "loss": 0.9625, - "step": 11898 - }, - { - "epoch": 0.8942582293702089, - "grad_norm": 1.6316294057148466, - "learning_rate": 1.1615961608682057e-07, - "loss": 1.0269, - "step": 11899 - }, - { - "epoch": 0.8943333834360438, - "grad_norm": 2.1970631417120066, - "learning_rate": 1.1599617238061976e-07, - "loss": 1.0179, - "step": 11900 - }, - { - "epoch": 0.8944085375018789, - "grad_norm": 1.9060629455844074, - "learning_rate": 1.1583284030886087e-07, - "loss": 0.9526, - "step": 11901 - }, - { - "epoch": 0.8944836915677138, - "grad_norm": 0.6734438477679461, - "learning_rate": 1.1566961988122037e-07, - "loss": 0.8175, - "step": 11902 - }, - { - "epoch": 0.8945588456335488, - "grad_norm": 1.4810614292816853, - "learning_rate": 1.1550651110737097e-07, - "loss": 0.976, - "step": 11903 - }, - { - "epoch": 0.8946339996993837, - "grad_norm": 1.6687913068023754, - "learning_rate": 1.153435139969765e-07, - "loss": 0.9041, - "step": 11904 - }, - { - "epoch": 0.8947091537652186, - "grad_norm": 1.45718243916496, - "learning_rate": 1.1518062855969635e-07, - "loss": 0.8743, - "step": 11905 - }, - { - "epoch": 0.8947843078310537, - "grad_norm": 4.486750276040643, - "learning_rate": 1.1501785480518078e-07, - "loss": 0.9585, - "step": 11906 - }, - { - "epoch": 0.8948594618968886, - "grad_norm": 0.6934948641071865, - "learning_rate": 1.1485519274307564e-07, - "loss": 0.8374, - "step": 11907 - }, - { - "epoch": 0.8949346159627236, - "grad_norm": 1.804207147521482, - "learning_rate": 1.1469264238301924e-07, - "loss": 0.9552, - "step": 11908 - }, - { - "epoch": 0.8950097700285585, - "grad_norm": 1.8826067167027132, - "learning_rate": 1.1453020373464295e-07, - "loss": 1.0029, - "step": 11909 - }, - { - "epoch": 0.8950849240943936, - "grad_norm": 1.9309990012997496, - "learning_rate": 1.1436787680757176e-07, - "loss": 0.9396, - "step": 11910 - }, - { - "epoch": 0.8951600781602285, - "grad_norm": 2.008807933757676, - "learning_rate": 1.1420566161142442e-07, - "loss": 0.9231, - "step": 11911 - }, - { - "epoch": 0.8952352322260634, - "grad_norm": 1.9738843664102055, - "learning_rate": 1.1404355815581345e-07, - "loss": 0.9148, - "step": 11912 - }, - { - "epoch": 0.8953103862918984, - "grad_norm": 1.541502978252413, - "learning_rate": 1.1388156645034275e-07, - "loss": 1.0025, - "step": 11913 - }, - { - "epoch": 0.8953855403577333, - "grad_norm": 1.7764261748684325, - "learning_rate": 1.1371968650461216e-07, - "loss": 1.0474, - "step": 11914 - }, - { - "epoch": 0.8954606944235683, - "grad_norm": 3.243305059651487, - "learning_rate": 1.1355791832821338e-07, - "loss": 1.0503, - "step": 11915 - }, - { - "epoch": 0.8955358484894033, - "grad_norm": 1.8249035352530627, - "learning_rate": 1.1339626193073093e-07, - "loss": 0.9852, - "step": 11916 - }, - { - "epoch": 0.8956110025552383, - "grad_norm": 1.6321176026737532, - "learning_rate": 1.132347173217445e-07, - "loss": 0.9884, - "step": 11917 - }, - { - "epoch": 0.8956861566210732, - "grad_norm": 1.783375258561964, - "learning_rate": 1.1307328451082643e-07, - "loss": 0.8706, - "step": 11918 - }, - { - "epoch": 0.8957613106869081, - "grad_norm": 1.8087508131340428, - "learning_rate": 1.1291196350754107e-07, - "loss": 1.0346, - "step": 11919 - }, - { - "epoch": 0.8958364647527431, - "grad_norm": 2.1394617860238125, - "learning_rate": 1.1275075432144831e-07, - "loss": 0.9866, - "step": 11920 - }, - { - "epoch": 0.8959116188185781, - "grad_norm": 2.0632666116792473, - "learning_rate": 1.125896569621001e-07, - "loss": 0.9229, - "step": 11921 - }, - { - "epoch": 0.8959867728844131, - "grad_norm": 2.455019177053101, - "learning_rate": 1.1242867143904233e-07, - "loss": 1.014, - "step": 11922 - }, - { - "epoch": 0.896061926950248, - "grad_norm": 0.6517137694917327, - "learning_rate": 1.1226779776181339e-07, - "loss": 0.7851, - "step": 11923 - }, - { - "epoch": 0.8961370810160829, - "grad_norm": 0.739504371152988, - "learning_rate": 1.121070359399463e-07, - "loss": 0.8239, - "step": 11924 - }, - { - "epoch": 0.8962122350819179, - "grad_norm": 2.04715755929308, - "learning_rate": 1.1194638598296658e-07, - "loss": 0.8415, - "step": 11925 - }, - { - "epoch": 0.8962873891477529, - "grad_norm": 1.3452711145501737, - "learning_rate": 1.1178584790039348e-07, - "loss": 1.0352, - "step": 11926 - }, - { - "epoch": 0.8963625432135879, - "grad_norm": 0.7547627066028789, - "learning_rate": 1.1162542170173873e-07, - "loss": 0.9451, - "step": 11927 - }, - { - "epoch": 0.8964376972794228, - "grad_norm": 2.0690092771385773, - "learning_rate": 1.1146510739650939e-07, - "loss": 1.0129, - "step": 11928 - }, - { - "epoch": 0.8965128513452578, - "grad_norm": 1.945917671167523, - "learning_rate": 1.1130490499420386e-07, - "loss": 1.0206, - "step": 11929 - }, - { - "epoch": 0.8965880054110927, - "grad_norm": 2.458640246441044, - "learning_rate": 1.1114481450431523e-07, - "loss": 1.0411, - "step": 11930 - }, - { - "epoch": 0.8966631594769277, - "grad_norm": 1.736691345739951, - "learning_rate": 1.109848359363288e-07, - "loss": 0.8619, - "step": 11931 - }, - { - "epoch": 0.8967383135427627, - "grad_norm": 2.517809659670548, - "learning_rate": 1.1082496929972496e-07, - "loss": 0.9764, - "step": 11932 - }, - { - "epoch": 0.8968134676085976, - "grad_norm": 1.393358480091198, - "learning_rate": 1.1066521460397527e-07, - "loss": 0.9397, - "step": 11933 - }, - { - "epoch": 0.8968886216744326, - "grad_norm": 1.8387538654175253, - "learning_rate": 1.1050557185854636e-07, - "loss": 1.0044, - "step": 11934 - }, - { - "epoch": 0.8969637757402675, - "grad_norm": 0.7763410839754733, - "learning_rate": 1.1034604107289847e-07, - "loss": 0.8303, - "step": 11935 - }, - { - "epoch": 0.8970389298061026, - "grad_norm": 1.377191963855078, - "learning_rate": 1.1018662225648267e-07, - "loss": 0.9859, - "step": 11936 - }, - { - "epoch": 0.8971140838719375, - "grad_norm": 2.2197744663908145, - "learning_rate": 1.1002731541874654e-07, - "loss": 1.0901, - "step": 11937 - }, - { - "epoch": 0.8971892379377724, - "grad_norm": 1.9177301262303286, - "learning_rate": 1.0986812056912898e-07, - "loss": 0.9228, - "step": 11938 - }, - { - "epoch": 0.8972643920036074, - "grad_norm": 1.641543903053048, - "learning_rate": 1.0970903771706352e-07, - "loss": 0.9771, - "step": 11939 - }, - { - "epoch": 0.8973395460694423, - "grad_norm": 1.7840566775423565, - "learning_rate": 1.0955006687197533e-07, - "loss": 1.0867, - "step": 11940 - }, - { - "epoch": 0.8974147001352774, - "grad_norm": 2.138242969442062, - "learning_rate": 1.0939120804328505e-07, - "loss": 1.0025, - "step": 11941 - }, - { - "epoch": 0.8974898542011123, - "grad_norm": 3.0261636119614987, - "learning_rate": 1.0923246124040542e-07, - "loss": 0.9778, - "step": 11942 - }, - { - "epoch": 0.8975650082669473, - "grad_norm": 1.7620041440553462, - "learning_rate": 1.0907382647274266e-07, - "loss": 0.9532, - "step": 11943 - }, - { - "epoch": 0.8976401623327822, - "grad_norm": 3.080121317616855, - "learning_rate": 1.0891530374969615e-07, - "loss": 0.8835, - "step": 11944 - }, - { - "epoch": 0.8977153163986171, - "grad_norm": 1.9972286693213686, - "learning_rate": 1.0875689308065994e-07, - "loss": 1.0235, - "step": 11945 - }, - { - "epoch": 0.8977904704644522, - "grad_norm": 1.4767785292737452, - "learning_rate": 1.085985944750194e-07, - "loss": 0.9477, - "step": 11946 - }, - { - "epoch": 0.8978656245302871, - "grad_norm": 1.5396943320325434, - "learning_rate": 1.0844040794215503e-07, - "loss": 0.8705, - "step": 11947 - }, - { - "epoch": 0.8979407785961221, - "grad_norm": 1.9649337838933527, - "learning_rate": 1.0828233349143934e-07, - "loss": 0.983, - "step": 11948 - }, - { - "epoch": 0.898015932661957, - "grad_norm": 3.154180017910865, - "learning_rate": 1.0812437113223993e-07, - "loss": 1.0125, - "step": 11949 - }, - { - "epoch": 0.8980910867277919, - "grad_norm": 1.9920620149472943, - "learning_rate": 1.0796652087391556e-07, - "loss": 0.9703, - "step": 11950 - }, - { - "epoch": 0.898166240793627, - "grad_norm": 2.2635824533068596, - "learning_rate": 1.0780878272582006e-07, - "loss": 0.9421, - "step": 11951 - }, - { - "epoch": 0.8982413948594619, - "grad_norm": 2.138643558959237, - "learning_rate": 1.0765115669729974e-07, - "loss": 0.9953, - "step": 11952 - }, - { - "epoch": 0.8983165489252969, - "grad_norm": 3.942347736405764, - "learning_rate": 1.074936427976949e-07, - "loss": 1.0402, - "step": 11953 - }, - { - "epoch": 0.8983917029911318, - "grad_norm": 1.4972892745429578, - "learning_rate": 1.0733624103633831e-07, - "loss": 1.0523, - "step": 11954 - }, - { - "epoch": 0.8984668570569668, - "grad_norm": 1.314281707514703, - "learning_rate": 1.0717895142255672e-07, - "loss": 1.0241, - "step": 11955 - }, - { - "epoch": 0.8985420111228017, - "grad_norm": 1.393187358858761, - "learning_rate": 1.0702177396567114e-07, - "loss": 0.9764, - "step": 11956 - }, - { - "epoch": 0.8986171651886367, - "grad_norm": 1.860704731897084, - "learning_rate": 1.0686470867499342e-07, - "loss": 0.9194, - "step": 11957 - }, - { - "epoch": 0.8986923192544717, - "grad_norm": 1.7653998172265954, - "learning_rate": 1.0670775555983147e-07, - "loss": 0.9126, - "step": 11958 - }, - { - "epoch": 0.8987674733203066, - "grad_norm": 1.8580109537579879, - "learning_rate": 1.0655091462948518e-07, - "loss": 0.9833, - "step": 11959 - }, - { - "epoch": 0.8988426273861416, - "grad_norm": 1.524308019513934, - "learning_rate": 1.063941858932469e-07, - "loss": 0.9726, - "step": 11960 - }, - { - "epoch": 0.8989177814519765, - "grad_norm": 1.9214905765291455, - "learning_rate": 1.0623756936040451e-07, - "loss": 0.9729, - "step": 11961 - }, - { - "epoch": 0.8989929355178116, - "grad_norm": 1.9952046005959228, - "learning_rate": 1.0608106504023817e-07, - "loss": 0.9743, - "step": 11962 - }, - { - "epoch": 0.8990680895836465, - "grad_norm": 1.938624025208215, - "learning_rate": 1.0592467294202046e-07, - "loss": 0.9864, - "step": 11963 - }, - { - "epoch": 0.8991432436494814, - "grad_norm": 1.6641401532461129, - "learning_rate": 1.0576839307501928e-07, - "loss": 1.0236, - "step": 11964 - }, - { - "epoch": 0.8992183977153164, - "grad_norm": 1.8565147904624435, - "learning_rate": 1.056122254484939e-07, - "loss": 1.0249, - "step": 11965 - }, - { - "epoch": 0.8992935517811513, - "grad_norm": 3.2689447640332334, - "learning_rate": 1.0545617007169871e-07, - "loss": 0.955, - "step": 11966 - }, - { - "epoch": 0.8993687058469864, - "grad_norm": 2.0426020393423405, - "learning_rate": 1.0530022695387964e-07, - "loss": 0.9892, - "step": 11967 - }, - { - "epoch": 0.8994438599128213, - "grad_norm": 1.6009216515111986, - "learning_rate": 1.0514439610427772e-07, - "loss": 1.0165, - "step": 11968 - }, - { - "epoch": 0.8995190139786562, - "grad_norm": 2.207135169359311, - "learning_rate": 1.0498867753212626e-07, - "loss": 0.8629, - "step": 11969 - }, - { - "epoch": 0.8995941680444912, - "grad_norm": 1.371779137392488, - "learning_rate": 1.0483307124665208e-07, - "loss": 0.9633, - "step": 11970 - }, - { - "epoch": 0.8996693221103261, - "grad_norm": 2.172911472137572, - "learning_rate": 1.0467757725707538e-07, - "loss": 0.8355, - "step": 11971 - }, - { - "epoch": 0.8997444761761612, - "grad_norm": 2.1676135072829674, - "learning_rate": 1.0452219557260966e-07, - "loss": 1.074, - "step": 11972 - }, - { - "epoch": 0.8998196302419961, - "grad_norm": 1.8886697921031366, - "learning_rate": 1.043669262024629e-07, - "loss": 0.9634, - "step": 11973 - }, - { - "epoch": 0.8998947843078311, - "grad_norm": 1.7815253730870642, - "learning_rate": 1.0421176915583419e-07, - "loss": 0.8563, - "step": 11974 - }, - { - "epoch": 0.899969938373666, - "grad_norm": 1.4516584317440564, - "learning_rate": 1.0405672444191727e-07, - "loss": 0.925, - "step": 11975 - }, - { - "epoch": 0.9000450924395009, - "grad_norm": 1.692856024143339, - "learning_rate": 1.0390179206990012e-07, - "loss": 1.0225, - "step": 11976 - }, - { - "epoch": 0.900120246505336, - "grad_norm": 1.7343043407446352, - "learning_rate": 1.037469720489621e-07, - "loss": 0.96, - "step": 11977 - }, - { - "epoch": 0.9001954005711709, - "grad_norm": 1.8026919871396323, - "learning_rate": 1.0359226438827695e-07, - "loss": 0.984, - "step": 11978 - }, - { - "epoch": 0.9002705546370059, - "grad_norm": 1.7291304190026877, - "learning_rate": 1.0343766909701268e-07, - "loss": 0.9636, - "step": 11979 - }, - { - "epoch": 0.9003457087028408, - "grad_norm": 1.6466309831719497, - "learning_rate": 1.0328318618432819e-07, - "loss": 0.9919, - "step": 11980 - }, - { - "epoch": 0.9004208627686758, - "grad_norm": 2.226315882895082, - "learning_rate": 1.0312881565937837e-07, - "loss": 0.9768, - "step": 11981 - }, - { - "epoch": 0.9004960168345107, - "grad_norm": 2.4091966100375974, - "learning_rate": 1.0297455753130946e-07, - "loss": 0.9927, - "step": 11982 - }, - { - "epoch": 0.9005711709003457, - "grad_norm": 1.6941558830166965, - "learning_rate": 1.0282041180926282e-07, - "loss": 1.0171, - "step": 11983 - }, - { - "epoch": 0.9006463249661807, - "grad_norm": 2.2794113065078534, - "learning_rate": 1.0266637850237115e-07, - "loss": 0.9818, - "step": 11984 - }, - { - "epoch": 0.9007214790320156, - "grad_norm": 2.6073307092852236, - "learning_rate": 1.0251245761976202e-07, - "loss": 0.808, - "step": 11985 - }, - { - "epoch": 0.9007966330978506, - "grad_norm": 1.4056162359728581, - "learning_rate": 1.023586491705557e-07, - "loss": 1.0112, - "step": 11986 - }, - { - "epoch": 0.9008717871636855, - "grad_norm": 0.8737620042622981, - "learning_rate": 1.0220495316386601e-07, - "loss": 0.8594, - "step": 11987 - }, - { - "epoch": 0.9009469412295206, - "grad_norm": 1.357837026218724, - "learning_rate": 1.020513696087999e-07, - "loss": 0.9487, - "step": 11988 - }, - { - "epoch": 0.9010220952953555, - "grad_norm": 3.897998731610017, - "learning_rate": 1.0189789851445829e-07, - "loss": 1.0086, - "step": 11989 - }, - { - "epoch": 0.9010972493611904, - "grad_norm": 1.7401444982402638, - "learning_rate": 1.0174453988993392e-07, - "loss": 0.9297, - "step": 11990 - }, - { - "epoch": 0.9011724034270254, - "grad_norm": 1.5412991145739843, - "learning_rate": 1.0159129374431463e-07, - "loss": 0.986, - "step": 11991 - }, - { - "epoch": 0.9012475574928603, - "grad_norm": 1.9048841617438252, - "learning_rate": 1.0143816008668049e-07, - "loss": 1.011, - "step": 11992 - }, - { - "epoch": 0.9013227115586954, - "grad_norm": 2.091523994149488, - "learning_rate": 1.0128513892610623e-07, - "loss": 1.0227, - "step": 11993 - }, - { - "epoch": 0.9013978656245303, - "grad_norm": 2.2255189826136905, - "learning_rate": 1.0113223027165729e-07, - "loss": 0.9564, - "step": 11994 - }, - { - "epoch": 0.9014730196903652, - "grad_norm": 1.7731133387400317, - "learning_rate": 1.0097943413239507e-07, - "loss": 0.8842, - "step": 11995 - }, - { - "epoch": 0.9015481737562002, - "grad_norm": 2.1156650150633807, - "learning_rate": 1.0082675051737388e-07, - "loss": 0.8389, - "step": 11996 - }, - { - "epoch": 0.9016233278220351, - "grad_norm": 1.55825725546509, - "learning_rate": 1.0067417943563982e-07, - "loss": 0.9795, - "step": 11997 - }, - { - "epoch": 0.9016984818878702, - "grad_norm": 0.6668825743655333, - "learning_rate": 1.0052172089623324e-07, - "loss": 0.8333, - "step": 11998 - }, - { - "epoch": 0.9017736359537051, - "grad_norm": 2.4923108109249084, - "learning_rate": 1.0036937490818842e-07, - "loss": 0.912, - "step": 11999 - }, - { - "epoch": 0.9018487900195401, - "grad_norm": 2.0866382579068063, - "learning_rate": 1.0021714148053262e-07, - "loss": 0.9529, - "step": 12000 - }, - { - "epoch": 0.901923944085375, - "grad_norm": 1.473827837052508, - "learning_rate": 1.0006502062228572e-07, - "loss": 1.0064, - "step": 12001 - }, - { - "epoch": 0.9019990981512099, - "grad_norm": 1.8335347787405272, - "learning_rate": 9.991301234246163e-08, - "loss": 0.9566, - "step": 12002 - }, - { - "epoch": 0.902074252217045, - "grad_norm": 2.0647148031441906, - "learning_rate": 9.976111665006781e-08, - "loss": 0.9711, - "step": 12003 - }, - { - "epoch": 0.9021494062828799, - "grad_norm": 1.8567320046950533, - "learning_rate": 9.960933355410417e-08, - "loss": 0.8533, - "step": 12004 - }, - { - "epoch": 0.9022245603487149, - "grad_norm": 1.7251771227805017, - "learning_rate": 9.945766306356418e-08, - "loss": 1.1034, - "step": 12005 - }, - { - "epoch": 0.9022997144145498, - "grad_norm": 1.7296054314059606, - "learning_rate": 9.930610518743599e-08, - "loss": 0.9169, - "step": 12006 - }, - { - "epoch": 0.9023748684803848, - "grad_norm": 5.2704898083987, - "learning_rate": 9.915465993469884e-08, - "loss": 0.9919, - "step": 12007 - }, - { - "epoch": 0.9024500225462198, - "grad_norm": 1.4564133220984237, - "learning_rate": 9.900332731432715e-08, - "loss": 0.8934, - "step": 12008 - }, - { - "epoch": 0.9025251766120547, - "grad_norm": 1.4661195616008578, - "learning_rate": 9.885210733528748e-08, - "loss": 0.9423, - "step": 12009 - }, - { - "epoch": 0.9026003306778897, - "grad_norm": 2.01561352630425, - "learning_rate": 9.870100000654091e-08, - "loss": 0.9606, - "step": 12010 - }, - { - "epoch": 0.9026754847437246, - "grad_norm": 1.241016387699074, - "learning_rate": 9.855000533704006e-08, - "loss": 1.0076, - "step": 12011 - }, - { - "epoch": 0.9027506388095596, - "grad_norm": 1.6576544367215507, - "learning_rate": 9.839912333573285e-08, - "loss": 0.9242, - "step": 12012 - }, - { - "epoch": 0.9028257928753946, - "grad_norm": 0.8086841551141561, - "learning_rate": 9.824835401155928e-08, - "loss": 0.8017, - "step": 12013 - }, - { - "epoch": 0.9029009469412295, - "grad_norm": 1.5040035067730462, - "learning_rate": 9.809769737345308e-08, - "loss": 0.9772, - "step": 12014 - }, - { - "epoch": 0.9029761010070645, - "grad_norm": 1.7409493190437457, - "learning_rate": 9.794715343034088e-08, - "loss": 1.0146, - "step": 12015 - }, - { - "epoch": 0.9030512550728994, - "grad_norm": 1.9418737205596923, - "learning_rate": 9.779672219114332e-08, - "loss": 1.0354, - "step": 12016 - }, - { - "epoch": 0.9031264091387344, - "grad_norm": 2.895666067292049, - "learning_rate": 9.764640366477439e-08, - "loss": 0.9559, - "step": 12017 - }, - { - "epoch": 0.9032015632045693, - "grad_norm": 0.719942347814487, - "learning_rate": 9.749619786014052e-08, - "loss": 0.8587, - "step": 12018 - }, - { - "epoch": 0.9032767172704044, - "grad_norm": 1.7473115706463318, - "learning_rate": 9.734610478614192e-08, - "loss": 1.0871, - "step": 12019 - }, - { - "epoch": 0.9033518713362393, - "grad_norm": 0.7137317868262234, - "learning_rate": 9.719612445167258e-08, - "loss": 0.8105, - "step": 12020 - }, - { - "epoch": 0.9034270254020742, - "grad_norm": 2.4354889227267305, - "learning_rate": 9.704625686561896e-08, - "loss": 0.8644, - "step": 12021 - }, - { - "epoch": 0.9035021794679092, - "grad_norm": 0.832948313619623, - "learning_rate": 9.689650203686128e-08, - "loss": 0.8192, - "step": 12022 - }, - { - "epoch": 0.9035773335337441, - "grad_norm": 1.8089229539540026, - "learning_rate": 9.6746859974274e-08, - "loss": 1.0092, - "step": 12023 - }, - { - "epoch": 0.9036524875995792, - "grad_norm": 3.8189979585101237, - "learning_rate": 9.659733068672293e-08, - "loss": 0.9628, - "step": 12024 - }, - { - "epoch": 0.9037276416654141, - "grad_norm": 2.3637056332221498, - "learning_rate": 9.644791418306853e-08, - "loss": 0.9178, - "step": 12025 - }, - { - "epoch": 0.9038027957312491, - "grad_norm": 1.881900170600373, - "learning_rate": 9.629861047216436e-08, - "loss": 1.0421, - "step": 12026 - }, - { - "epoch": 0.903877949797084, - "grad_norm": 2.567834743975824, - "learning_rate": 9.614941956285782e-08, - "loss": 0.9345, - "step": 12027 - }, - { - "epoch": 0.9039531038629189, - "grad_norm": 2.077372594018382, - "learning_rate": 9.600034146398806e-08, - "loss": 1.0066, - "step": 12028 - }, - { - "epoch": 0.904028257928754, - "grad_norm": 1.6461575908981665, - "learning_rate": 9.58513761843891e-08, - "loss": 1.0041, - "step": 12029 - }, - { - "epoch": 0.9041034119945889, - "grad_norm": 1.5896477355842995, - "learning_rate": 9.570252373288745e-08, - "loss": 0.9955, - "step": 12030 - }, - { - "epoch": 0.9041785660604239, - "grad_norm": 1.8405361290090179, - "learning_rate": 9.555378411830339e-08, - "loss": 0.9234, - "step": 12031 - }, - { - "epoch": 0.9042537201262588, - "grad_norm": 0.9569232801918481, - "learning_rate": 9.54051573494501e-08, - "loss": 0.8963, - "step": 12032 - }, - { - "epoch": 0.9043288741920938, - "grad_norm": 2.9978661895878256, - "learning_rate": 9.525664343513495e-08, - "loss": 0.9271, - "step": 12033 - }, - { - "epoch": 0.9044040282579288, - "grad_norm": 3.2698528172678105, - "learning_rate": 9.510824238415672e-08, - "loss": 0.9745, - "step": 12034 - }, - { - "epoch": 0.9044791823237637, - "grad_norm": 2.549240230992375, - "learning_rate": 9.495995420530988e-08, - "loss": 0.979, - "step": 12035 - }, - { - "epoch": 0.9045543363895987, - "grad_norm": 1.6130379038387725, - "learning_rate": 9.481177890738012e-08, - "loss": 0.9149, - "step": 12036 - }, - { - "epoch": 0.9046294904554336, - "grad_norm": 1.8130787550227025, - "learning_rate": 9.466371649914883e-08, - "loss": 0.9939, - "step": 12037 - }, - { - "epoch": 0.9047046445212686, - "grad_norm": 1.7325785500470476, - "learning_rate": 9.451576698938746e-08, - "loss": 1.0038, - "step": 12038 - }, - { - "epoch": 0.9047797985871036, - "grad_norm": 0.6825757539368933, - "learning_rate": 9.436793038686364e-08, - "loss": 0.8421, - "step": 12039 - }, - { - "epoch": 0.9048549526529385, - "grad_norm": 1.9434385043956064, - "learning_rate": 9.42202067003377e-08, - "loss": 0.9691, - "step": 12040 - }, - { - "epoch": 0.9049301067187735, - "grad_norm": 1.558499670415996, - "learning_rate": 9.407259593856199e-08, - "loss": 0.9627, - "step": 12041 - }, - { - "epoch": 0.9050052607846084, - "grad_norm": 2.066108090788637, - "learning_rate": 9.392509811028282e-08, - "loss": 0.9952, - "step": 12042 - }, - { - "epoch": 0.9050804148504434, - "grad_norm": 1.4108276459604998, - "learning_rate": 9.377771322424055e-08, - "loss": 0.9924, - "step": 12043 - }, - { - "epoch": 0.9051555689162784, - "grad_norm": 1.2317176898785753, - "learning_rate": 9.363044128916864e-08, - "loss": 0.9848, - "step": 12044 - }, - { - "epoch": 0.9052307229821134, - "grad_norm": 2.4442130350144744, - "learning_rate": 9.348328231379255e-08, - "loss": 1.011, - "step": 12045 - }, - { - "epoch": 0.9053058770479483, - "grad_norm": 2.1894123982190057, - "learning_rate": 9.333623630683285e-08, - "loss": 0.9373, - "step": 12046 - }, - { - "epoch": 0.9053810311137832, - "grad_norm": 1.9405656372426312, - "learning_rate": 9.318930327700214e-08, - "loss": 0.9124, - "step": 12047 - }, - { - "epoch": 0.9054561851796182, - "grad_norm": 1.8978803154499655, - "learning_rate": 9.304248323300701e-08, - "loss": 0.914, - "step": 12048 - }, - { - "epoch": 0.9055313392454531, - "grad_norm": 1.8436647169141587, - "learning_rate": 9.289577618354649e-08, - "loss": 0.9089, - "step": 12049 - }, - { - "epoch": 0.9056064933112882, - "grad_norm": 4.04900760501252, - "learning_rate": 9.274918213731475e-08, - "loss": 0.9303, - "step": 12050 - }, - { - "epoch": 0.9056816473771231, - "grad_norm": 2.0306569334027738, - "learning_rate": 9.260270110299684e-08, - "loss": 0.8734, - "step": 12051 - }, - { - "epoch": 0.9057568014429581, - "grad_norm": 2.112717078979234, - "learning_rate": 9.245633308927293e-08, - "loss": 1.019, - "step": 12052 - }, - { - "epoch": 0.905831955508793, - "grad_norm": 1.4283380028208044, - "learning_rate": 9.231007810481539e-08, - "loss": 0.9804, - "step": 12053 - }, - { - "epoch": 0.905907109574628, - "grad_norm": 3.4658035273614396, - "learning_rate": 9.216393615829154e-08, - "loss": 0.9501, - "step": 12054 - }, - { - "epoch": 0.905982263640463, - "grad_norm": 1.8242575010313018, - "learning_rate": 9.201790725835933e-08, - "loss": 1.0688, - "step": 12055 - }, - { - "epoch": 0.9060574177062979, - "grad_norm": 1.3660498981577858, - "learning_rate": 9.187199141367274e-08, - "loss": 0.9304, - "step": 12056 - }, - { - "epoch": 0.9061325717721329, - "grad_norm": 2.489462004914631, - "learning_rate": 9.17261886328773e-08, - "loss": 0.967, - "step": 12057 - }, - { - "epoch": 0.9062077258379678, - "grad_norm": 1.5916938982459854, - "learning_rate": 9.158049892461228e-08, - "loss": 1.0266, - "step": 12058 - }, - { - "epoch": 0.9062828799038027, - "grad_norm": 1.5814662348242021, - "learning_rate": 9.143492229751038e-08, - "loss": 0.9653, - "step": 12059 - }, - { - "epoch": 0.9063580339696378, - "grad_norm": 1.5739085457394157, - "learning_rate": 9.128945876019777e-08, - "loss": 0.9238, - "step": 12060 - }, - { - "epoch": 0.9064331880354727, - "grad_norm": 2.2362969941883435, - "learning_rate": 9.114410832129404e-08, - "loss": 0.9807, - "step": 12061 - }, - { - "epoch": 0.9065083421013077, - "grad_norm": 1.810393918519672, - "learning_rate": 9.099887098941095e-08, - "loss": 1.0712, - "step": 12062 - }, - { - "epoch": 0.9065834961671426, - "grad_norm": 1.5469223169353297, - "learning_rate": 9.085374677315516e-08, - "loss": 0.9503, - "step": 12063 - }, - { - "epoch": 0.9066586502329776, - "grad_norm": 4.270721855879645, - "learning_rate": 9.070873568112581e-08, - "loss": 0.9522, - "step": 12064 - }, - { - "epoch": 0.9067338042988126, - "grad_norm": 1.2232463740971986, - "learning_rate": 9.056383772191422e-08, - "loss": 0.9267, - "step": 12065 - }, - { - "epoch": 0.9068089583646475, - "grad_norm": 3.2066270989443986, - "learning_rate": 9.041905290410711e-08, - "loss": 0.8472, - "step": 12066 - }, - { - "epoch": 0.9068841124304825, - "grad_norm": 2.416876871459962, - "learning_rate": 9.027438123628406e-08, - "loss": 1.0443, - "step": 12067 - }, - { - "epoch": 0.9069592664963174, - "grad_norm": 2.106241569162582, - "learning_rate": 9.012982272701597e-08, - "loss": 0.9685, - "step": 12068 - }, - { - "epoch": 0.9070344205621524, - "grad_norm": 1.4964650144148628, - "learning_rate": 8.998537738486977e-08, - "loss": 0.9011, - "step": 12069 - }, - { - "epoch": 0.9071095746279874, - "grad_norm": 1.506693924190317, - "learning_rate": 8.984104521840352e-08, - "loss": 0.9864, - "step": 12070 - }, - { - "epoch": 0.9071847286938224, - "grad_norm": 1.8066521721548212, - "learning_rate": 8.969682623617014e-08, - "loss": 0.9628, - "step": 12071 - }, - { - "epoch": 0.9072598827596573, - "grad_norm": 1.7550810212129035, - "learning_rate": 8.955272044671459e-08, - "loss": 0.9542, - "step": 12072 - }, - { - "epoch": 0.9073350368254922, - "grad_norm": 1.9436975232857034, - "learning_rate": 8.940872785857623e-08, - "loss": 0.9646, - "step": 12073 - }, - { - "epoch": 0.9074101908913272, - "grad_norm": 1.690574388574979, - "learning_rate": 8.92648484802867e-08, - "loss": 0.9424, - "step": 12074 - }, - { - "epoch": 0.9074853449571622, - "grad_norm": 1.4825274564313728, - "learning_rate": 8.912108232037163e-08, - "loss": 0.9309, - "step": 12075 - }, - { - "epoch": 0.9075604990229972, - "grad_norm": 1.900344581477483, - "learning_rate": 8.897742938734953e-08, - "loss": 0.9473, - "step": 12076 - }, - { - "epoch": 0.9076356530888321, - "grad_norm": 28.296473803064227, - "learning_rate": 8.883388968973293e-08, - "loss": 0.9194, - "step": 12077 - }, - { - "epoch": 0.9077108071546671, - "grad_norm": 2.554761599711383, - "learning_rate": 8.869046323602636e-08, - "loss": 0.9948, - "step": 12078 - }, - { - "epoch": 0.907785961220502, - "grad_norm": 1.3399288940067913, - "learning_rate": 8.85471500347288e-08, - "loss": 0.966, - "step": 12079 - }, - { - "epoch": 0.907861115286337, - "grad_norm": 1.752229904714861, - "learning_rate": 8.840395009433188e-08, - "loss": 0.9301, - "step": 12080 - }, - { - "epoch": 0.907936269352172, - "grad_norm": 1.8769909143365162, - "learning_rate": 8.82608634233215e-08, - "loss": 0.9208, - "step": 12081 - }, - { - "epoch": 0.9080114234180069, - "grad_norm": 1.60637488863019, - "learning_rate": 8.81178900301749e-08, - "loss": 1.0164, - "step": 12082 - }, - { - "epoch": 0.9080865774838419, - "grad_norm": 8.037164669843202, - "learning_rate": 8.797502992336436e-08, - "loss": 1.0246, - "step": 12083 - }, - { - "epoch": 0.9081617315496768, - "grad_norm": 1.459676187872006, - "learning_rate": 8.783228311135559e-08, - "loss": 1.0014, - "step": 12084 - }, - { - "epoch": 0.9082368856155117, - "grad_norm": 3.8259097793797507, - "learning_rate": 8.768964960260582e-08, - "loss": 0.983, - "step": 12085 - }, - { - "epoch": 0.9083120396813468, - "grad_norm": 1.6010203118307567, - "learning_rate": 8.754712940556675e-08, - "loss": 0.8798, - "step": 12086 - }, - { - "epoch": 0.9083871937471817, - "grad_norm": 2.076795109450073, - "learning_rate": 8.740472252868359e-08, - "loss": 0.9756, - "step": 12087 - }, - { - "epoch": 0.9084623478130167, - "grad_norm": 2.082498065082068, - "learning_rate": 8.726242898039516e-08, - "loss": 0.9819, - "step": 12088 - }, - { - "epoch": 0.9085375018788516, - "grad_norm": 2.6612281376275293, - "learning_rate": 8.712024876913138e-08, - "loss": 0.7816, - "step": 12089 - }, - { - "epoch": 0.9086126559446867, - "grad_norm": 1.7619584083367974, - "learning_rate": 8.697818190331818e-08, - "loss": 0.939, - "step": 12090 - }, - { - "epoch": 0.9086878100105216, - "grad_norm": 1.6501530402908773, - "learning_rate": 8.683622839137306e-08, - "loss": 0.9491, - "step": 12091 - }, - { - "epoch": 0.9087629640763565, - "grad_norm": 2.513847528706347, - "learning_rate": 8.669438824170727e-08, - "loss": 0.9783, - "step": 12092 - }, - { - "epoch": 0.9088381181421915, - "grad_norm": 1.8648197372290463, - "learning_rate": 8.655266146272544e-08, - "loss": 0.9476, - "step": 12093 - }, - { - "epoch": 0.9089132722080264, - "grad_norm": 1.503421616185897, - "learning_rate": 8.641104806282595e-08, - "loss": 0.9369, - "step": 12094 - }, - { - "epoch": 0.9089884262738614, - "grad_norm": 1.742482318651641, - "learning_rate": 8.626954805039921e-08, - "loss": 0.9482, - "step": 12095 - }, - { - "epoch": 0.9090635803396964, - "grad_norm": 3.601572164990082, - "learning_rate": 8.612816143382983e-08, - "loss": 0.926, - "step": 12096 - }, - { - "epoch": 0.9091387344055314, - "grad_norm": 1.9420463995649935, - "learning_rate": 8.598688822149557e-08, - "loss": 0.9309, - "step": 12097 - }, - { - "epoch": 0.9092138884713663, - "grad_norm": 2.53917536558041, - "learning_rate": 8.584572842176774e-08, - "loss": 1.0781, - "step": 12098 - }, - { - "epoch": 0.9092890425372012, - "grad_norm": 1.83875263361739, - "learning_rate": 8.570468204300984e-08, - "loss": 0.949, - "step": 12099 - }, - { - "epoch": 0.9093641966030362, - "grad_norm": 1.6009981230963641, - "learning_rate": 8.556374909358011e-08, - "loss": 0.9445, - "step": 12100 - }, - { - "epoch": 0.9094393506688712, - "grad_norm": 1.7508160187223543, - "learning_rate": 8.542292958182917e-08, - "loss": 1.0443, - "step": 12101 - }, - { - "epoch": 0.9095145047347062, - "grad_norm": 1.800538085937235, - "learning_rate": 8.528222351610104e-08, - "loss": 0.959, - "step": 12102 - }, - { - "epoch": 0.9095896588005411, - "grad_norm": 3.214446366549053, - "learning_rate": 8.514163090473281e-08, - "loss": 0.9182, - "step": 12103 - }, - { - "epoch": 0.909664812866376, - "grad_norm": 2.062867565347588, - "learning_rate": 8.50011517560556e-08, - "loss": 0.9091, - "step": 12104 - }, - { - "epoch": 0.909739966932211, - "grad_norm": 2.3093858318565217, - "learning_rate": 8.486078607839341e-08, - "loss": 1.0441, - "step": 12105 - }, - { - "epoch": 0.909815120998046, - "grad_norm": 1.6133350428002078, - "learning_rate": 8.472053388006295e-08, - "loss": 0.9939, - "step": 12106 - }, - { - "epoch": 0.909890275063881, - "grad_norm": 2.1444995648487493, - "learning_rate": 8.45803951693751e-08, - "loss": 0.9387, - "step": 12107 - }, - { - "epoch": 0.9099654291297159, - "grad_norm": 1.6248793190379949, - "learning_rate": 8.444036995463366e-08, - "loss": 0.906, - "step": 12108 - }, - { - "epoch": 0.9100405831955509, - "grad_norm": 1.72806887268196, - "learning_rate": 8.430045824413512e-08, - "loss": 1.0099, - "step": 12109 - }, - { - "epoch": 0.9101157372613858, - "grad_norm": 2.4874744394800623, - "learning_rate": 8.416066004616995e-08, - "loss": 0.9726, - "step": 12110 - }, - { - "epoch": 0.9101908913272208, - "grad_norm": 1.867087772531247, - "learning_rate": 8.402097536902242e-08, - "loss": 0.963, - "step": 12111 - }, - { - "epoch": 0.9102660453930558, - "grad_norm": 3.4543799972152938, - "learning_rate": 8.388140422096856e-08, - "loss": 0.8568, - "step": 12112 - }, - { - "epoch": 0.9103411994588907, - "grad_norm": 1.8038481374626145, - "learning_rate": 8.374194661027889e-08, - "loss": 0.8357, - "step": 12113 - }, - { - "epoch": 0.9104163535247257, - "grad_norm": 1.8553664651083444, - "learning_rate": 8.360260254521656e-08, - "loss": 0.9387, - "step": 12114 - }, - { - "epoch": 0.9104915075905606, - "grad_norm": 1.9670860282536347, - "learning_rate": 8.346337203403874e-08, - "loss": 0.8727, - "step": 12115 - }, - { - "epoch": 0.9105666616563957, - "grad_norm": 1.7371194107043701, - "learning_rate": 8.332425508499463e-08, - "loss": 0.9272, - "step": 12116 - }, - { - "epoch": 0.9106418157222306, - "grad_norm": 1.460403426360199, - "learning_rate": 8.318525170632829e-08, - "loss": 0.9093, - "step": 12117 - }, - { - "epoch": 0.9107169697880655, - "grad_norm": 1.7523405907543341, - "learning_rate": 8.304636190627557e-08, - "loss": 0.8881, - "step": 12118 - }, - { - "epoch": 0.9107921238539005, - "grad_norm": 1.7008560886134911, - "learning_rate": 8.290758569306633e-08, - "loss": 0.9639, - "step": 12119 - }, - { - "epoch": 0.9108672779197354, - "grad_norm": 1.5431008584168522, - "learning_rate": 8.276892307492356e-08, - "loss": 1.0356, - "step": 12120 - }, - { - "epoch": 0.9109424319855705, - "grad_norm": 1.742058171830593, - "learning_rate": 8.263037406006423e-08, - "loss": 1.0099, - "step": 12121 - }, - { - "epoch": 0.9110175860514054, - "grad_norm": 1.339179761419424, - "learning_rate": 8.249193865669669e-08, - "loss": 0.9527, - "step": 12122 - }, - { - "epoch": 0.9110927401172404, - "grad_norm": 1.6412889862659739, - "learning_rate": 8.235361687302478e-08, - "loss": 0.9157, - "step": 12123 - }, - { - "epoch": 0.9111678941830753, - "grad_norm": 1.5693908567854262, - "learning_rate": 8.221540871724398e-08, - "loss": 0.8387, - "step": 12124 - }, - { - "epoch": 0.9112430482489102, - "grad_norm": 1.5904311236206752, - "learning_rate": 8.207731419754415e-08, - "loss": 0.9306, - "step": 12125 - }, - { - "epoch": 0.9113182023147453, - "grad_norm": 2.3251722415578975, - "learning_rate": 8.193933332210745e-08, - "loss": 0.9963, - "step": 12126 - }, - { - "epoch": 0.9113933563805802, - "grad_norm": 1.709782458739526, - "learning_rate": 8.180146609910998e-08, - "loss": 1.0234, - "step": 12127 - }, - { - "epoch": 0.9114685104464152, - "grad_norm": 0.8173832500278847, - "learning_rate": 8.166371253672122e-08, - "loss": 0.8577, - "step": 12128 - }, - { - "epoch": 0.9115436645122501, - "grad_norm": 1.6731024541180102, - "learning_rate": 8.152607264310308e-08, - "loss": 1.0218, - "step": 12129 - }, - { - "epoch": 0.911618818578085, - "grad_norm": 2.695226872507602, - "learning_rate": 8.138854642641147e-08, - "loss": 0.9893, - "step": 12130 - }, - { - "epoch": 0.91169397264392, - "grad_norm": 1.899045619189657, - "learning_rate": 8.125113389479521e-08, - "loss": 0.8651, - "step": 12131 - }, - { - "epoch": 0.911769126709755, - "grad_norm": 2.146345861268137, - "learning_rate": 8.111383505639713e-08, - "loss": 0.9821, - "step": 12132 - }, - { - "epoch": 0.91184428077559, - "grad_norm": 1.597816739031016, - "learning_rate": 8.09766499193516e-08, - "loss": 0.9118, - "step": 12133 - }, - { - "epoch": 0.9119194348414249, - "grad_norm": 1.9793418070615088, - "learning_rate": 8.083957849178835e-08, - "loss": 1.006, - "step": 12134 - }, - { - "epoch": 0.9119945889072599, - "grad_norm": 1.5241081913318646, - "learning_rate": 8.07026207818291e-08, - "loss": 0.9375, - "step": 12135 - }, - { - "epoch": 0.9120697429730948, - "grad_norm": 2.5495235035320336, - "learning_rate": 8.056577679758891e-08, - "loss": 0.8946, - "step": 12136 - }, - { - "epoch": 0.9121448970389298, - "grad_norm": 2.178603035833775, - "learning_rate": 8.042904654717642e-08, - "loss": 0.8776, - "step": 12137 - }, - { - "epoch": 0.9122200511047648, - "grad_norm": 1.8866174643046383, - "learning_rate": 8.029243003869379e-08, - "loss": 0.9909, - "step": 12138 - }, - { - "epoch": 0.9122952051705997, - "grad_norm": 1.8965391235620577, - "learning_rate": 8.015592728023523e-08, - "loss": 0.9206, - "step": 12139 - }, - { - "epoch": 0.9123703592364347, - "grad_norm": 1.774680957866096, - "learning_rate": 8.001953827988984e-08, - "loss": 0.9743, - "step": 12140 - }, - { - "epoch": 0.9124455133022696, - "grad_norm": 2.388093293455486, - "learning_rate": 7.988326304573844e-08, - "loss": 1.0174, - "step": 12141 - }, - { - "epoch": 0.9125206673681047, - "grad_norm": 0.7465237481848812, - "learning_rate": 7.974710158585706e-08, - "loss": 0.8384, - "step": 12142 - }, - { - "epoch": 0.9125958214339396, - "grad_norm": 2.17483044982917, - "learning_rate": 7.961105390831257e-08, - "loss": 0.9336, - "step": 12143 - }, - { - "epoch": 0.9126709754997745, - "grad_norm": 1.6458292932903424, - "learning_rate": 7.947512002116697e-08, - "loss": 1.0014, - "step": 12144 - }, - { - "epoch": 0.9127461295656095, - "grad_norm": 2.106340953096948, - "learning_rate": 7.933929993247468e-08, - "loss": 0.9251, - "step": 12145 - }, - { - "epoch": 0.9128212836314444, - "grad_norm": 1.5697087887674623, - "learning_rate": 7.92035936502835e-08, - "loss": 0.9305, - "step": 12146 - }, - { - "epoch": 0.9128964376972795, - "grad_norm": 1.6705325432227618, - "learning_rate": 7.906800118263456e-08, - "loss": 0.9001, - "step": 12147 - }, - { - "epoch": 0.9129715917631144, - "grad_norm": 2.5744567668438605, - "learning_rate": 7.893252253756211e-08, - "loss": 0.8665, - "step": 12148 - }, - { - "epoch": 0.9130467458289493, - "grad_norm": 8.00029706403412, - "learning_rate": 7.879715772309458e-08, - "loss": 1.0241, - "step": 12149 - }, - { - "epoch": 0.9131218998947843, - "grad_norm": 1.8341720208808134, - "learning_rate": 7.866190674725159e-08, - "loss": 1.0882, - "step": 12150 - }, - { - "epoch": 0.9131970539606192, - "grad_norm": 4.635621735683658, - "learning_rate": 7.852676961804827e-08, - "loss": 0.9735, - "step": 12151 - }, - { - "epoch": 0.9132722080264543, - "grad_norm": 3.503890599924986, - "learning_rate": 7.839174634349178e-08, - "loss": 0.9752, - "step": 12152 - }, - { - "epoch": 0.9133473620922892, - "grad_norm": 1.708844093014798, - "learning_rate": 7.825683693158236e-08, - "loss": 0.9405, - "step": 12153 - }, - { - "epoch": 0.9134225161581242, - "grad_norm": 2.954358847267084, - "learning_rate": 7.812204139031409e-08, - "loss": 0.965, - "step": 12154 - }, - { - "epoch": 0.9134976702239591, - "grad_norm": 1.8175835188575133, - "learning_rate": 7.798735972767478e-08, - "loss": 0.937, - "step": 12155 - }, - { - "epoch": 0.913572824289794, - "grad_norm": 1.384515995397502, - "learning_rate": 7.785279195164384e-08, - "loss": 1.0048, - "step": 12156 - }, - { - "epoch": 0.913647978355629, - "grad_norm": 2.723984250783555, - "learning_rate": 7.771833807019557e-08, - "loss": 0.9062, - "step": 12157 - }, - { - "epoch": 0.913723132421464, - "grad_norm": 2.364163421675246, - "learning_rate": 7.758399809129645e-08, - "loss": 1.0593, - "step": 12158 - }, - { - "epoch": 0.913798286487299, - "grad_norm": 1.531439088318121, - "learning_rate": 7.744977202290725e-08, - "loss": 1.0095, - "step": 12159 - }, - { - "epoch": 0.9138734405531339, - "grad_norm": 1.9499790337242375, - "learning_rate": 7.73156598729805e-08, - "loss": 1.04, - "step": 12160 - }, - { - "epoch": 0.9139485946189689, - "grad_norm": 1.41971768218866, - "learning_rate": 7.718166164946361e-08, - "loss": 0.8996, - "step": 12161 - }, - { - "epoch": 0.9140237486848038, - "grad_norm": 0.6661535215393959, - "learning_rate": 7.704777736029621e-08, - "loss": 0.8619, - "step": 12162 - }, - { - "epoch": 0.9140989027506388, - "grad_norm": 1.5103532010479845, - "learning_rate": 7.691400701341156e-08, - "loss": 1.0304, - "step": 12163 - }, - { - "epoch": 0.9141740568164738, - "grad_norm": 1.799592658152191, - "learning_rate": 7.678035061673572e-08, - "loss": 0.9807, - "step": 12164 - }, - { - "epoch": 0.9142492108823087, - "grad_norm": 2.025094432748224, - "learning_rate": 7.664680817818925e-08, - "loss": 0.9029, - "step": 12165 - }, - { - "epoch": 0.9143243649481437, - "grad_norm": 2.130282399973697, - "learning_rate": 7.651337970568361e-08, - "loss": 1.0474, - "step": 12166 - }, - { - "epoch": 0.9143995190139786, - "grad_norm": 1.8808868818163205, - "learning_rate": 7.638006520712625e-08, - "loss": 0.8933, - "step": 12167 - }, - { - "epoch": 0.9144746730798137, - "grad_norm": 2.239654279664483, - "learning_rate": 7.624686469041575e-08, - "loss": 1.0544, - "step": 12168 - }, - { - "epoch": 0.9145498271456486, - "grad_norm": 1.6122656481872666, - "learning_rate": 7.611377816344533e-08, - "loss": 0.9507, - "step": 12169 - }, - { - "epoch": 0.9146249812114835, - "grad_norm": 1.3432914040050705, - "learning_rate": 7.598080563410048e-08, - "loss": 0.9758, - "step": 12170 - }, - { - "epoch": 0.9147001352773185, - "grad_norm": 1.8675144819032619, - "learning_rate": 7.584794711026021e-08, - "loss": 0.9208, - "step": 12171 - }, - { - "epoch": 0.9147752893431534, - "grad_norm": 1.930218704560839, - "learning_rate": 7.571520259979757e-08, - "loss": 0.9036, - "step": 12172 - }, - { - "epoch": 0.9148504434089885, - "grad_norm": 1.8048674918908318, - "learning_rate": 7.558257211057739e-08, - "loss": 1.0902, - "step": 12173 - }, - { - "epoch": 0.9149255974748234, - "grad_norm": 1.8760516078698994, - "learning_rate": 7.545005565045914e-08, - "loss": 0.983, - "step": 12174 - }, - { - "epoch": 0.9150007515406583, - "grad_norm": 1.8416057985123357, - "learning_rate": 7.531765322729455e-08, - "loss": 1.0487, - "step": 12175 - }, - { - "epoch": 0.9150759056064933, - "grad_norm": 1.5695708691850028, - "learning_rate": 7.518536484892956e-08, - "loss": 0.8713, - "step": 12176 - }, - { - "epoch": 0.9151510596723282, - "grad_norm": 1.776612854632306, - "learning_rate": 7.50531905232017e-08, - "loss": 1.0103, - "step": 12177 - }, - { - "epoch": 0.9152262137381633, - "grad_norm": 1.7580543182192427, - "learning_rate": 7.492113025794378e-08, - "loss": 0.9907, - "step": 12178 - }, - { - "epoch": 0.9153013678039982, - "grad_norm": 3.536102067233781, - "learning_rate": 7.478918406098044e-08, - "loss": 0.9567, - "step": 12179 - }, - { - "epoch": 0.9153765218698332, - "grad_norm": 1.7062060982781757, - "learning_rate": 7.46573519401299e-08, - "loss": 1.0031, - "step": 12180 - }, - { - "epoch": 0.9154516759356681, - "grad_norm": 1.472793855992255, - "learning_rate": 7.452563390320388e-08, - "loss": 0.8745, - "step": 12181 - }, - { - "epoch": 0.915526830001503, - "grad_norm": 2.0379652089002867, - "learning_rate": 7.43940299580077e-08, - "loss": 0.925, - "step": 12182 - }, - { - "epoch": 0.9156019840673381, - "grad_norm": 2.0989907564320425, - "learning_rate": 7.426254011233823e-08, - "loss": 1.0197, - "step": 12183 - }, - { - "epoch": 0.915677138133173, - "grad_norm": 1.7214280505538542, - "learning_rate": 7.41311643739877e-08, - "loss": 0.9412, - "step": 12184 - }, - { - "epoch": 0.915752292199008, - "grad_norm": 1.5528046214210334, - "learning_rate": 7.399990275074009e-08, - "loss": 1.004, - "step": 12185 - }, - { - "epoch": 0.9158274462648429, - "grad_norm": 2.2639311674053855, - "learning_rate": 7.386875525037384e-08, - "loss": 0.9794, - "step": 12186 - }, - { - "epoch": 0.9159026003306779, - "grad_norm": 1.7067764755526718, - "learning_rate": 7.373772188065919e-08, - "loss": 0.8729, - "step": 12187 - }, - { - "epoch": 0.9159777543965129, - "grad_norm": 1.723862759648277, - "learning_rate": 7.360680264936037e-08, - "loss": 0.9847, - "step": 12188 - }, - { - "epoch": 0.9160529084623478, - "grad_norm": 5.549877460948392, - "learning_rate": 7.347599756423584e-08, - "loss": 0.962, - "step": 12189 - }, - { - "epoch": 0.9161280625281828, - "grad_norm": 1.7196484384982353, - "learning_rate": 7.334530663303539e-08, - "loss": 0.9194, - "step": 12190 - }, - { - "epoch": 0.9162032165940177, - "grad_norm": 2.1410245870490683, - "learning_rate": 7.321472986350286e-08, - "loss": 1.0736, - "step": 12191 - }, - { - "epoch": 0.9162783706598527, - "grad_norm": 1.63981179564712, - "learning_rate": 7.308426726337557e-08, - "loss": 0.9868, - "step": 12192 - }, - { - "epoch": 0.9163535247256877, - "grad_norm": 8.244443467178128, - "learning_rate": 7.295391884038493e-08, - "loss": 1.0102, - "step": 12193 - }, - { - "epoch": 0.9164286787915226, - "grad_norm": 0.7769410897191118, - "learning_rate": 7.282368460225297e-08, - "loss": 0.8567, - "step": 12194 - }, - { - "epoch": 0.9165038328573576, - "grad_norm": 2.171437610521361, - "learning_rate": 7.269356455669772e-08, - "loss": 0.9548, - "step": 12195 - }, - { - "epoch": 0.9165789869231925, - "grad_norm": 1.6129678417152626, - "learning_rate": 7.256355871142883e-08, - "loss": 1.0171, - "step": 12196 - }, - { - "epoch": 0.9166541409890275, - "grad_norm": 2.146930734946509, - "learning_rate": 7.243366707414988e-08, - "loss": 0.9343, - "step": 12197 - }, - { - "epoch": 0.9167292950548624, - "grad_norm": 1.657951416753317, - "learning_rate": 7.230388965255695e-08, - "loss": 0.9718, - "step": 12198 - }, - { - "epoch": 0.9168044491206975, - "grad_norm": 2.1866348735700023, - "learning_rate": 7.217422645434079e-08, - "loss": 0.9617, - "step": 12199 - }, - { - "epoch": 0.9168796031865324, - "grad_norm": 2.0506634778310904, - "learning_rate": 7.204467748718324e-08, - "loss": 0.9779, - "step": 12200 - }, - { - "epoch": 0.9169547572523673, - "grad_norm": 1.9456083386838556, - "learning_rate": 7.191524275876148e-08, - "loss": 1.033, - "step": 12201 - }, - { - "epoch": 0.9170299113182023, - "grad_norm": 1.7427826963779602, - "learning_rate": 7.178592227674451e-08, - "loss": 0.87, - "step": 12202 - }, - { - "epoch": 0.9171050653840372, - "grad_norm": 3.23211753311413, - "learning_rate": 7.165671604879575e-08, - "loss": 0.9501, - "step": 12203 - }, - { - "epoch": 0.9171802194498723, - "grad_norm": 1.7156883019614328, - "learning_rate": 7.152762408257018e-08, - "loss": 0.9401, - "step": 12204 - }, - { - "epoch": 0.9172553735157072, - "grad_norm": 1.9964589122744971, - "learning_rate": 7.139864638571768e-08, - "loss": 0.9157, - "step": 12205 - }, - { - "epoch": 0.9173305275815422, - "grad_norm": 1.9515919276172768, - "learning_rate": 7.12697829658806e-08, - "loss": 0.989, - "step": 12206 - }, - { - "epoch": 0.9174056816473771, - "grad_norm": 0.7380717146695016, - "learning_rate": 7.114103383069436e-08, - "loss": 0.8633, - "step": 12207 - }, - { - "epoch": 0.917480835713212, - "grad_norm": 1.500000635782743, - "learning_rate": 7.101239898778778e-08, - "loss": 1.0011, - "step": 12208 - }, - { - "epoch": 0.9175559897790471, - "grad_norm": 1.7994645408407708, - "learning_rate": 7.088387844478316e-08, - "loss": 0.9644, - "step": 12209 - }, - { - "epoch": 0.917631143844882, - "grad_norm": 1.6404672456010119, - "learning_rate": 7.075547220929624e-08, - "loss": 0.904, - "step": 12210 - }, - { - "epoch": 0.917706297910717, - "grad_norm": 1.4199157462847016, - "learning_rate": 7.062718028893466e-08, - "loss": 0.9599, - "step": 12211 - }, - { - "epoch": 0.9177814519765519, - "grad_norm": 1.3522868337835283, - "learning_rate": 7.049900269130105e-08, - "loss": 0.9269, - "step": 12212 - }, - { - "epoch": 0.917856606042387, - "grad_norm": 2.155227529281401, - "learning_rate": 7.03709394239902e-08, - "loss": 0.9918, - "step": 12213 - }, - { - "epoch": 0.9179317601082219, - "grad_norm": 1.5002264805523329, - "learning_rate": 7.024299049459003e-08, - "loss": 1.0656, - "step": 12214 - }, - { - "epoch": 0.9180069141740568, - "grad_norm": 1.7038653803020631, - "learning_rate": 7.011515591068184e-08, - "loss": 1.0374, - "step": 12215 - }, - { - "epoch": 0.9180820682398918, - "grad_norm": 2.127066448965384, - "learning_rate": 6.998743567984133e-08, - "loss": 0.9357, - "step": 12216 - }, - { - "epoch": 0.9181572223057267, - "grad_norm": 3.958508350451683, - "learning_rate": 6.985982980963534e-08, - "loss": 0.8977, - "step": 12217 - }, - { - "epoch": 0.9182323763715617, - "grad_norm": 2.057279047545027, - "learning_rate": 6.97323383076256e-08, - "loss": 0.9788, - "step": 12218 - }, - { - "epoch": 0.9183075304373967, - "grad_norm": 0.7910196563241217, - "learning_rate": 6.960496118136627e-08, - "loss": 0.9061, - "step": 12219 - }, - { - "epoch": 0.9183826845032316, - "grad_norm": 1.9820704858104379, - "learning_rate": 6.947769843840511e-08, - "loss": 0.9262, - "step": 12220 - }, - { - "epoch": 0.9184578385690666, - "grad_norm": 2.4768859459526618, - "learning_rate": 6.935055008628254e-08, - "loss": 0.7958, - "step": 12221 - }, - { - "epoch": 0.9185329926349015, - "grad_norm": 3.446808021150579, - "learning_rate": 6.922351613253297e-08, - "loss": 0.9054, - "step": 12222 - }, - { - "epoch": 0.9186081467007365, - "grad_norm": 2.0000077485888106, - "learning_rate": 6.909659658468347e-08, - "loss": 0.9115, - "step": 12223 - }, - { - "epoch": 0.9186833007665715, - "grad_norm": 1.832723133090568, - "learning_rate": 6.896979145025472e-08, - "loss": 0.9424, - "step": 12224 - }, - { - "epoch": 0.9187584548324065, - "grad_norm": 1.4775164716547935, - "learning_rate": 6.88431007367598e-08, - "loss": 0.9189, - "step": 12225 - }, - { - "epoch": 0.9188336088982414, - "grad_norm": 1.7179428285982492, - "learning_rate": 6.871652445170672e-08, - "loss": 1.0173, - "step": 12226 - }, - { - "epoch": 0.9189087629640763, - "grad_norm": 1.8269495649386684, - "learning_rate": 6.859006260259437e-08, - "loss": 0.9388, - "step": 12227 - }, - { - "epoch": 0.9189839170299113, - "grad_norm": 1.9918239486180018, - "learning_rate": 6.846371519691673e-08, - "loss": 1.0306, - "step": 12228 - }, - { - "epoch": 0.9190590710957462, - "grad_norm": 3.0208909303282456, - "learning_rate": 6.833748224216029e-08, - "loss": 0.9566, - "step": 12229 - }, - { - "epoch": 0.9191342251615813, - "grad_norm": 1.9064874344838596, - "learning_rate": 6.821136374580528e-08, - "loss": 0.9274, - "step": 12230 - }, - { - "epoch": 0.9192093792274162, - "grad_norm": 2.236640262059421, - "learning_rate": 6.808535971532393e-08, - "loss": 0.897, - "step": 12231 - }, - { - "epoch": 0.9192845332932512, - "grad_norm": 1.9114927519177662, - "learning_rate": 6.79594701581827e-08, - "loss": 1.079, - "step": 12232 - }, - { - "epoch": 0.9193596873590861, - "grad_norm": 1.943154605064494, - "learning_rate": 6.783369508184145e-08, - "loss": 1.0222, - "step": 12233 - }, - { - "epoch": 0.919434841424921, - "grad_norm": 1.6973157025885137, - "learning_rate": 6.770803449375262e-08, - "loss": 0.8355, - "step": 12234 - }, - { - "epoch": 0.9195099954907561, - "grad_norm": 2.2759038911113816, - "learning_rate": 6.75824884013616e-08, - "loss": 0.9581, - "step": 12235 - }, - { - "epoch": 0.919585149556591, - "grad_norm": 1.764823934402437, - "learning_rate": 6.745705681210778e-08, - "loss": 0.9972, - "step": 12236 - }, - { - "epoch": 0.919660303622426, - "grad_norm": 2.005708891248455, - "learning_rate": 6.73317397334241e-08, - "loss": 0.8862, - "step": 12237 - }, - { - "epoch": 0.9197354576882609, - "grad_norm": 1.431636045274266, - "learning_rate": 6.720653717273506e-08, - "loss": 1.0204, - "step": 12238 - }, - { - "epoch": 0.9198106117540958, - "grad_norm": 5.197300489008392, - "learning_rate": 6.708144913746006e-08, - "loss": 0.9498, - "step": 12239 - }, - { - "epoch": 0.9198857658199309, - "grad_norm": 1.637601191182302, - "learning_rate": 6.695647563501072e-08, - "loss": 0.9533, - "step": 12240 - }, - { - "epoch": 0.9199609198857658, - "grad_norm": 1.653809747056359, - "learning_rate": 6.683161667279247e-08, - "loss": 0.9861, - "step": 12241 - }, - { - "epoch": 0.9200360739516008, - "grad_norm": 2.797968986137569, - "learning_rate": 6.670687225820315e-08, - "loss": 0.8195, - "step": 12242 - }, - { - "epoch": 0.9201112280174357, - "grad_norm": 1.5493807293907924, - "learning_rate": 6.65822423986353e-08, - "loss": 0.9467, - "step": 12243 - }, - { - "epoch": 0.9201863820832707, - "grad_norm": 2.021679442203637, - "learning_rate": 6.645772710147279e-08, - "loss": 0.9419, - "step": 12244 - }, - { - "epoch": 0.9202615361491057, - "grad_norm": 2.2106329502591526, - "learning_rate": 6.633332637409395e-08, - "loss": 0.9122, - "step": 12245 - }, - { - "epoch": 0.9203366902149406, - "grad_norm": 1.7579968843182934, - "learning_rate": 6.620904022386997e-08, - "loss": 0.9592, - "step": 12246 - }, - { - "epoch": 0.9204118442807756, - "grad_norm": 0.8099389059668471, - "learning_rate": 6.608486865816587e-08, - "loss": 0.9068, - "step": 12247 - }, - { - "epoch": 0.9204869983466105, - "grad_norm": 1.890821147233282, - "learning_rate": 6.59608116843382e-08, - "loss": 0.9831, - "step": 12248 - }, - { - "epoch": 0.9205621524124455, - "grad_norm": 1.8622598787707105, - "learning_rate": 6.583686930973864e-08, - "loss": 0.8868, - "step": 12249 - }, - { - "epoch": 0.9206373064782805, - "grad_norm": 2.6512558606346928, - "learning_rate": 6.571304154171087e-08, - "loss": 1.0094, - "step": 12250 - }, - { - "epoch": 0.9207124605441155, - "grad_norm": 0.7613547213328764, - "learning_rate": 6.558932838759234e-08, - "loss": 0.841, - "step": 12251 - }, - { - "epoch": 0.9207876146099504, - "grad_norm": 1.2917805078033096, - "learning_rate": 6.546572985471322e-08, - "loss": 0.9964, - "step": 12252 - }, - { - "epoch": 0.9208627686757853, - "grad_norm": 2.2347897231415166, - "learning_rate": 6.534224595039739e-08, - "loss": 0.9656, - "step": 12253 - }, - { - "epoch": 0.9209379227416203, - "grad_norm": 1.996729680416579, - "learning_rate": 6.521887668196213e-08, - "loss": 0.9269, - "step": 12254 - }, - { - "epoch": 0.9210130768074553, - "grad_norm": 1.4810603829231204, - "learning_rate": 6.509562205671692e-08, - "loss": 0.9541, - "step": 12255 - }, - { - "epoch": 0.9210882308732903, - "grad_norm": 2.321277353580587, - "learning_rate": 6.49724820819657e-08, - "loss": 0.9863, - "step": 12256 - }, - { - "epoch": 0.9211633849391252, - "grad_norm": 1.7564789863717214, - "learning_rate": 6.484945676500464e-08, - "loss": 0.9559, - "step": 12257 - }, - { - "epoch": 0.9212385390049602, - "grad_norm": 1.422393620406778, - "learning_rate": 6.4726546113123e-08, - "loss": 1.0313, - "step": 12258 - }, - { - "epoch": 0.9213136930707951, - "grad_norm": 2.518637139226891, - "learning_rate": 6.46037501336043e-08, - "loss": 0.9758, - "step": 12259 - }, - { - "epoch": 0.92138884713663, - "grad_norm": 1.7407866178641587, - "learning_rate": 6.448106883372495e-08, - "loss": 1.0369, - "step": 12260 - }, - { - "epoch": 0.9214640012024651, - "grad_norm": 1.9137684498899838, - "learning_rate": 6.435850222075334e-08, - "loss": 0.9847, - "step": 12261 - }, - { - "epoch": 0.9215391552683, - "grad_norm": 1.7376260615661772, - "learning_rate": 6.423605030195278e-08, - "loss": 0.9847, - "step": 12262 - }, - { - "epoch": 0.921614309334135, - "grad_norm": 1.6270496206987488, - "learning_rate": 6.411371308457857e-08, - "loss": 0.9889, - "step": 12263 - }, - { - "epoch": 0.9216894633999699, - "grad_norm": 1.5946151031021956, - "learning_rate": 6.399149057588027e-08, - "loss": 1.0045, - "step": 12264 - }, - { - "epoch": 0.9217646174658048, - "grad_norm": 1.6232729317134997, - "learning_rate": 6.386938278309916e-08, - "loss": 0.974, - "step": 12265 - }, - { - "epoch": 0.9218397715316399, - "grad_norm": 1.7184364032759205, - "learning_rate": 6.374738971347149e-08, - "loss": 0.9228, - "step": 12266 - }, - { - "epoch": 0.9219149255974748, - "grad_norm": 1.9736532279142647, - "learning_rate": 6.362551137422522e-08, - "loss": 0.936, - "step": 12267 - }, - { - "epoch": 0.9219900796633098, - "grad_norm": 1.8057818376949972, - "learning_rate": 6.350374777258216e-08, - "loss": 0.931, - "step": 12268 - }, - { - "epoch": 0.9220652337291447, - "grad_norm": 1.6805321277565255, - "learning_rate": 6.33820989157574e-08, - "loss": 0.9443, - "step": 12269 - }, - { - "epoch": 0.9221403877949798, - "grad_norm": 1.4556378203401135, - "learning_rate": 6.326056481095942e-08, - "loss": 0.9029, - "step": 12270 - }, - { - "epoch": 0.9222155418608147, - "grad_norm": 2.819339235552929, - "learning_rate": 6.313914546538868e-08, - "loss": 0.9711, - "step": 12271 - }, - { - "epoch": 0.9222906959266496, - "grad_norm": 1.7179102840423752, - "learning_rate": 6.301784088624073e-08, - "loss": 1.028, - "step": 12272 - }, - { - "epoch": 0.9223658499924846, - "grad_norm": 1.6437336126750788, - "learning_rate": 6.289665108070252e-08, - "loss": 0.9608, - "step": 12273 - }, - { - "epoch": 0.9224410040583195, - "grad_norm": 2.0751490114320026, - "learning_rate": 6.277557605595607e-08, - "loss": 0.9589, - "step": 12274 - }, - { - "epoch": 0.9225161581241546, - "grad_norm": 0.746806857460077, - "learning_rate": 6.26546158191743e-08, - "loss": 0.8379, - "step": 12275 - }, - { - "epoch": 0.9225913121899895, - "grad_norm": 2.8152113666761327, - "learning_rate": 6.253377037752506e-08, - "loss": 1.0134, - "step": 12276 - }, - { - "epoch": 0.9226664662558245, - "grad_norm": 2.038837171944438, - "learning_rate": 6.241303973816947e-08, - "loss": 0.9378, - "step": 12277 - }, - { - "epoch": 0.9227416203216594, - "grad_norm": 2.581773430537788, - "learning_rate": 6.229242390826029e-08, - "loss": 0.9677, - "step": 12278 - }, - { - "epoch": 0.9228167743874943, - "grad_norm": 0.7320493030255829, - "learning_rate": 6.217192289494533e-08, - "loss": 0.7999, - "step": 12279 - }, - { - "epoch": 0.9228919284533293, - "grad_norm": 1.5245205672776245, - "learning_rate": 6.2051536705364e-08, - "loss": 1.0661, - "step": 12280 - }, - { - "epoch": 0.9229670825191643, - "grad_norm": 2.1932752394398523, - "learning_rate": 6.193126534665061e-08, - "loss": 0.976, - "step": 12281 - }, - { - "epoch": 0.9230422365849993, - "grad_norm": 1.8345135949983484, - "learning_rate": 6.181110882593054e-08, - "loss": 0.8913, - "step": 12282 - }, - { - "epoch": 0.9231173906508342, - "grad_norm": 3.2494801325592024, - "learning_rate": 6.169106715032435e-08, - "loss": 0.9498, - "step": 12283 - }, - { - "epoch": 0.9231925447166691, - "grad_norm": 2.074233692639797, - "learning_rate": 6.157114032694477e-08, - "loss": 1.031, - "step": 12284 - }, - { - "epoch": 0.9232676987825041, - "grad_norm": 1.2950629938482832, - "learning_rate": 6.14513283628979e-08, - "loss": 0.9578, - "step": 12285 - }, - { - "epoch": 0.9233428528483391, - "grad_norm": 2.5367139077852614, - "learning_rate": 6.133163126528273e-08, - "loss": 0.9751, - "step": 12286 - }, - { - "epoch": 0.9234180069141741, - "grad_norm": 1.9007448844394284, - "learning_rate": 6.121204904119248e-08, - "loss": 0.9075, - "step": 12287 - }, - { - "epoch": 0.923493160980009, - "grad_norm": 2.257597916381653, - "learning_rate": 6.109258169771215e-08, - "loss": 0.9636, - "step": 12288 - }, - { - "epoch": 0.923568315045844, - "grad_norm": 2.0220962851415005, - "learning_rate": 6.097322924192094e-08, - "loss": 0.9385, - "step": 12289 - }, - { - "epoch": 0.9236434691116789, - "grad_norm": 2.3578863436977073, - "learning_rate": 6.085399168089079e-08, - "loss": 0.9583, - "step": 12290 - }, - { - "epoch": 0.9237186231775139, - "grad_norm": 2.066273099074393, - "learning_rate": 6.073486902168756e-08, - "loss": 0.9893, - "step": 12291 - }, - { - "epoch": 0.9237937772433489, - "grad_norm": 1.9073427538971899, - "learning_rate": 6.061586127136875e-08, - "loss": 0.9569, - "step": 12292 - }, - { - "epoch": 0.9238689313091838, - "grad_norm": 1.7329518433278333, - "learning_rate": 6.049696843698692e-08, - "loss": 1.0131, - "step": 12293 - }, - { - "epoch": 0.9239440853750188, - "grad_norm": 1.856215027759059, - "learning_rate": 6.037819052558668e-08, - "loss": 0.9293, - "step": 12294 - }, - { - "epoch": 0.9240192394408537, - "grad_norm": 1.6954233862830526, - "learning_rate": 6.025952754420571e-08, - "loss": 1.0196, - "step": 12295 - }, - { - "epoch": 0.9240943935066888, - "grad_norm": 1.3820675497425556, - "learning_rate": 6.014097949987529e-08, - "loss": 0.9927, - "step": 12296 - }, - { - "epoch": 0.9241695475725237, - "grad_norm": 1.3403838378677495, - "learning_rate": 6.002254639962023e-08, - "loss": 1.0296, - "step": 12297 - }, - { - "epoch": 0.9242447016383586, - "grad_norm": 1.758500096095226, - "learning_rate": 5.990422825045827e-08, - "loss": 1.1183, - "step": 12298 - }, - { - "epoch": 0.9243198557041936, - "grad_norm": 2.5559156539623284, - "learning_rate": 5.978602505939956e-08, - "loss": 1.0431, - "step": 12299 - }, - { - "epoch": 0.9243950097700285, - "grad_norm": 1.5650520844978808, - "learning_rate": 5.96679368334485e-08, - "loss": 0.9759, - "step": 12300 - }, - { - "epoch": 0.9244701638358636, - "grad_norm": 1.6731703544115262, - "learning_rate": 5.954996357960262e-08, - "loss": 0.9313, - "step": 12301 - }, - { - "epoch": 0.9245453179016985, - "grad_norm": 0.7096315902933442, - "learning_rate": 5.943210530485121e-08, - "loss": 0.8531, - "step": 12302 - }, - { - "epoch": 0.9246204719675335, - "grad_norm": 1.72799126381785, - "learning_rate": 5.9314362016178674e-08, - "loss": 1.0206, - "step": 12303 - }, - { - "epoch": 0.9246956260333684, - "grad_norm": 1.3322710733062377, - "learning_rate": 5.9196733720561884e-08, - "loss": 1.0116, - "step": 12304 - }, - { - "epoch": 0.9247707800992033, - "grad_norm": 3.8936962583210044, - "learning_rate": 5.9079220424970154e-08, - "loss": 0.9285, - "step": 12305 - }, - { - "epoch": 0.9248459341650384, - "grad_norm": 0.7395312913305875, - "learning_rate": 5.896182213636702e-08, - "loss": 0.845, - "step": 12306 - }, - { - "epoch": 0.9249210882308733, - "grad_norm": 2.7307589790093942, - "learning_rate": 5.884453886170848e-08, - "loss": 0.9364, - "step": 12307 - }, - { - "epoch": 0.9249962422967083, - "grad_norm": 1.8010050537318059, - "learning_rate": 5.872737060794475e-08, - "loss": 0.929, - "step": 12308 - }, - { - "epoch": 0.9250713963625432, - "grad_norm": 1.41215023335167, - "learning_rate": 5.8610317382017383e-08, - "loss": 0.8312, - "step": 12309 - }, - { - "epoch": 0.9251465504283781, - "grad_norm": 1.9035487813428003, - "learning_rate": 5.849337919086283e-08, - "loss": 1.0255, - "step": 12310 - }, - { - "epoch": 0.9252217044942131, - "grad_norm": 1.8396124886794092, - "learning_rate": 5.837655604141045e-08, - "loss": 0.9766, - "step": 12311 - }, - { - "epoch": 0.9252968585600481, - "grad_norm": 2.415692319339089, - "learning_rate": 5.825984794058181e-08, - "loss": 1.0929, - "step": 12312 - }, - { - "epoch": 0.9253720126258831, - "grad_norm": 2.3653960546773978, - "learning_rate": 5.814325489529248e-08, - "loss": 0.9572, - "step": 12313 - }, - { - "epoch": 0.925447166691718, - "grad_norm": 3.947142767563248, - "learning_rate": 5.8026776912451613e-08, - "loss": 0.9158, - "step": 12314 - }, - { - "epoch": 0.925522320757553, - "grad_norm": 1.4237868482260674, - "learning_rate": 5.791041399896013e-08, - "loss": 1.0126, - "step": 12315 - }, - { - "epoch": 0.925597474823388, - "grad_norm": 1.593415692739969, - "learning_rate": 5.77941661617134e-08, - "loss": 0.9566, - "step": 12316 - }, - { - "epoch": 0.9256726288892229, - "grad_norm": 2.1480408458413525, - "learning_rate": 5.767803340759947e-08, - "loss": 0.9684, - "step": 12317 - }, - { - "epoch": 0.9257477829550579, - "grad_norm": 1.5469137630037066, - "learning_rate": 5.756201574350017e-08, - "loss": 1.0708, - "step": 12318 - }, - { - "epoch": 0.9258229370208928, - "grad_norm": 1.3636695287023732, - "learning_rate": 5.744611317628889e-08, - "loss": 1.0047, - "step": 12319 - }, - { - "epoch": 0.9258980910867278, - "grad_norm": 1.4382586757664138, - "learning_rate": 5.7330325712834135e-08, - "loss": 0.9136, - "step": 12320 - }, - { - "epoch": 0.9259732451525627, - "grad_norm": 1.800209931211495, - "learning_rate": 5.721465335999709e-08, - "loss": 0.9285, - "step": 12321 - }, - { - "epoch": 0.9260483992183978, - "grad_norm": 1.5597635339368967, - "learning_rate": 5.7099096124630705e-08, - "loss": 0.8501, - "step": 12322 - }, - { - "epoch": 0.9261235532842327, - "grad_norm": 2.238678635610484, - "learning_rate": 5.6983654013582846e-08, - "loss": 1.0115, - "step": 12323 - }, - { - "epoch": 0.9261987073500676, - "grad_norm": 2.008444601224172, - "learning_rate": 5.68683270336936e-08, - "loss": 0.8178, - "step": 12324 - }, - { - "epoch": 0.9262738614159026, - "grad_norm": 1.7633730951067579, - "learning_rate": 5.675311519179726e-08, - "loss": 0.9063, - "step": 12325 - }, - { - "epoch": 0.9263490154817375, - "grad_norm": 1.3590903148493245, - "learning_rate": 5.66380184947195e-08, - "loss": 1.0105, - "step": 12326 - }, - { - "epoch": 0.9264241695475726, - "grad_norm": 1.7388827879693387, - "learning_rate": 5.652303694928107e-08, - "loss": 0.9691, - "step": 12327 - }, - { - "epoch": 0.9264993236134075, - "grad_norm": 1.5502812807138144, - "learning_rate": 5.640817056229474e-08, - "loss": 1.08, - "step": 12328 - }, - { - "epoch": 0.9265744776792424, - "grad_norm": 1.9392848868645536, - "learning_rate": 5.629341934056686e-08, - "loss": 0.8545, - "step": 12329 - }, - { - "epoch": 0.9266496317450774, - "grad_norm": 1.7613359191816833, - "learning_rate": 5.617878329089665e-08, - "loss": 0.9807, - "step": 12330 - }, - { - "epoch": 0.9267247858109123, - "grad_norm": 1.554821372857783, - "learning_rate": 5.606426242007734e-08, - "loss": 1.0493, - "step": 12331 - }, - { - "epoch": 0.9267999398767474, - "grad_norm": 1.75922564182546, - "learning_rate": 5.594985673489372e-08, - "loss": 0.8081, - "step": 12332 - }, - { - "epoch": 0.9268750939425823, - "grad_norm": 1.5185405571086945, - "learning_rate": 5.583556624212571e-08, - "loss": 1.0453, - "step": 12333 - }, - { - "epoch": 0.9269502480084173, - "grad_norm": 1.8207128518382236, - "learning_rate": 5.5721390948544775e-08, - "loss": 0.9472, - "step": 12334 - }, - { - "epoch": 0.9270254020742522, - "grad_norm": 2.7837162013372345, - "learning_rate": 5.560733086091707e-08, - "loss": 0.83, - "step": 12335 - }, - { - "epoch": 0.9271005561400871, - "grad_norm": 1.5174844091830557, - "learning_rate": 5.549338598600028e-08, - "loss": 0.8674, - "step": 12336 - }, - { - "epoch": 0.9271757102059222, - "grad_norm": 1.4860554067722611, - "learning_rate": 5.537955633054614e-08, - "loss": 0.8995, - "step": 12337 - }, - { - "epoch": 0.9272508642717571, - "grad_norm": 1.7465474266866536, - "learning_rate": 5.526584190130035e-08, - "loss": 0.9943, - "step": 12338 - }, - { - "epoch": 0.9273260183375921, - "grad_norm": 2.1429319550306776, - "learning_rate": 5.515224270499996e-08, - "loss": 0.9933, - "step": 12339 - }, - { - "epoch": 0.927401172403427, - "grad_norm": 4.005227725904301, - "learning_rate": 5.503875874837649e-08, - "loss": 0.9958, - "step": 12340 - }, - { - "epoch": 0.927476326469262, - "grad_norm": 1.7524731054704774, - "learning_rate": 5.4925390038154105e-08, - "loss": 1.027, - "step": 12341 - }, - { - "epoch": 0.927551480535097, - "grad_norm": 2.523407549134127, - "learning_rate": 5.481213658105121e-08, - "loss": 0.9969, - "step": 12342 - }, - { - "epoch": 0.9276266346009319, - "grad_norm": 1.7780125292453488, - "learning_rate": 5.469899838377734e-08, - "loss": 1.0012, - "step": 12343 - }, - { - "epoch": 0.9277017886667669, - "grad_norm": 1.780228305062514, - "learning_rate": 5.4585975453036894e-08, - "loss": 1.0043, - "step": 12344 - }, - { - "epoch": 0.9277769427326018, - "grad_norm": 2.3297220167421195, - "learning_rate": 5.4473067795526964e-08, - "loss": 0.9253, - "step": 12345 - }, - { - "epoch": 0.9278520967984368, - "grad_norm": 1.673328089053585, - "learning_rate": 5.436027541793775e-08, - "loss": 0.967, - "step": 12346 - }, - { - "epoch": 0.9279272508642717, - "grad_norm": 1.9508415606994824, - "learning_rate": 5.4247598326952357e-08, - "loss": 0.9321, - "step": 12347 - }, - { - "epoch": 0.9280024049301068, - "grad_norm": 1.9472825213189977, - "learning_rate": 5.4135036529248115e-08, - "loss": 1.0074, - "step": 12348 - }, - { - "epoch": 0.9280775589959417, - "grad_norm": 2.7496119138670974, - "learning_rate": 5.4022590031493454e-08, - "loss": 0.9319, - "step": 12349 - }, - { - "epoch": 0.9281527130617766, - "grad_norm": 1.5218638582233, - "learning_rate": 5.3910258840352386e-08, - "loss": 0.9229, - "step": 12350 - }, - { - "epoch": 0.9282278671276116, - "grad_norm": 2.6913327233357367, - "learning_rate": 5.379804296248025e-08, - "loss": 1.0324, - "step": 12351 - }, - { - "epoch": 0.9283030211934465, - "grad_norm": 2.3135586969973128, - "learning_rate": 5.3685942404527063e-08, - "loss": 0.8651, - "step": 12352 - }, - { - "epoch": 0.9283781752592816, - "grad_norm": 2.126660651773459, - "learning_rate": 5.35739571731344e-08, - "loss": 0.9243, - "step": 12353 - }, - { - "epoch": 0.9284533293251165, - "grad_norm": 1.757880451160576, - "learning_rate": 5.3462087274938064e-08, - "loss": 0.8667, - "step": 12354 - }, - { - "epoch": 0.9285284833909514, - "grad_norm": 1.9261064544998965, - "learning_rate": 5.335033271656697e-08, - "loss": 0.9878, - "step": 12355 - }, - { - "epoch": 0.9286036374567864, - "grad_norm": 1.8004732675387274, - "learning_rate": 5.323869350464294e-08, - "loss": 0.9334, - "step": 12356 - }, - { - "epoch": 0.9286787915226213, - "grad_norm": 1.8364966007889785, - "learning_rate": 5.31271696457809e-08, - "loss": 0.9423, - "step": 12357 - }, - { - "epoch": 0.9287539455884564, - "grad_norm": 3.5805352101826093, - "learning_rate": 5.3015761146589345e-08, - "loss": 0.89, - "step": 12358 - }, - { - "epoch": 0.9288290996542913, - "grad_norm": 3.292863620152352, - "learning_rate": 5.290446801366899e-08, - "loss": 1.0315, - "step": 12359 - }, - { - "epoch": 0.9289042537201263, - "grad_norm": 1.7139996860508313, - "learning_rate": 5.279329025361523e-08, - "loss": 0.9308, - "step": 12360 - }, - { - "epoch": 0.9289794077859612, - "grad_norm": 2.068723363791301, - "learning_rate": 5.268222787301502e-08, - "loss": 0.9567, - "step": 12361 - }, - { - "epoch": 0.9290545618517961, - "grad_norm": 1.476615380041655, - "learning_rate": 5.257128087844997e-08, - "loss": 0.9292, - "step": 12362 - }, - { - "epoch": 0.9291297159176312, - "grad_norm": 1.7644944749217604, - "learning_rate": 5.246044927649351e-08, - "loss": 0.902, - "step": 12363 - }, - { - "epoch": 0.9292048699834661, - "grad_norm": 1.4091910231591023, - "learning_rate": 5.2349733073712824e-08, - "loss": 0.9566, - "step": 12364 - }, - { - "epoch": 0.9292800240493011, - "grad_norm": 2.1967991041202484, - "learning_rate": 5.2239132276669096e-08, - "loss": 0.8879, - "step": 12365 - }, - { - "epoch": 0.929355178115136, - "grad_norm": 1.8628774401309531, - "learning_rate": 5.212864689191465e-08, - "loss": 0.8568, - "step": 12366 - }, - { - "epoch": 0.929430332180971, - "grad_norm": 1.6536278751229205, - "learning_rate": 5.201827692599714e-08, - "loss": 0.9996, - "step": 12367 - }, - { - "epoch": 0.929505486246806, - "grad_norm": 1.6300770602118797, - "learning_rate": 5.1908022385455774e-08, - "loss": 0.9092, - "step": 12368 - }, - { - "epoch": 0.9295806403126409, - "grad_norm": 1.563922768371347, - "learning_rate": 5.179788327682444e-08, - "loss": 0.8708, - "step": 12369 - }, - { - "epoch": 0.9296557943784759, - "grad_norm": 2.3176242655962835, - "learning_rate": 5.1687859606627915e-08, - "loss": 1.0216, - "step": 12370 - }, - { - "epoch": 0.9297309484443108, - "grad_norm": 1.7332769131036647, - "learning_rate": 5.1577951381386763e-08, - "loss": 0.8217, - "step": 12371 - }, - { - "epoch": 0.9298061025101458, - "grad_norm": 1.4877524249890883, - "learning_rate": 5.146815860761289e-08, - "loss": 0.9703, - "step": 12372 - }, - { - "epoch": 0.9298812565759808, - "grad_norm": 2.2221139470849254, - "learning_rate": 5.1358481291812194e-08, - "loss": 0.9689, - "step": 12373 - }, - { - "epoch": 0.9299564106418157, - "grad_norm": 1.6607997948449804, - "learning_rate": 5.124891944048282e-08, - "loss": 0.9792, - "step": 12374 - }, - { - "epoch": 0.9300315647076507, - "grad_norm": 1.7248935832494583, - "learning_rate": 5.113947306011801e-08, - "loss": 0.9735, - "step": 12375 - }, - { - "epoch": 0.9301067187734856, - "grad_norm": 2.2950768730902142, - "learning_rate": 5.103014215720147e-08, - "loss": 0.9461, - "step": 12376 - }, - { - "epoch": 0.9301818728393206, - "grad_norm": 1.543391534368381, - "learning_rate": 5.092092673821224e-08, - "loss": 0.9949, - "step": 12377 - }, - { - "epoch": 0.9302570269051555, - "grad_norm": 2.5101822444212134, - "learning_rate": 5.0811826809621596e-08, - "loss": 1.1149, - "step": 12378 - }, - { - "epoch": 0.9303321809709906, - "grad_norm": 1.6777512146501006, - "learning_rate": 5.0702842377894574e-08, - "loss": 0.9481, - "step": 12379 - }, - { - "epoch": 0.9304073350368255, - "grad_norm": 0.8043142026912421, - "learning_rate": 5.059397344948802e-08, - "loss": 0.8933, - "step": 12380 - }, - { - "epoch": 0.9304824891026604, - "grad_norm": 1.6402274967202917, - "learning_rate": 5.0485220030853204e-08, - "loss": 0.96, - "step": 12381 - }, - { - "epoch": 0.9305576431684954, - "grad_norm": 1.9097214654477865, - "learning_rate": 5.0376582128434766e-08, - "loss": 1.0659, - "step": 12382 - }, - { - "epoch": 0.9306327972343303, - "grad_norm": 1.9406325949798227, - "learning_rate": 5.026805974866932e-08, - "loss": 0.9283, - "step": 12383 - }, - { - "epoch": 0.9307079513001654, - "grad_norm": 1.4244485557749413, - "learning_rate": 5.015965289798707e-08, - "loss": 0.9388, - "step": 12384 - }, - { - "epoch": 0.9307831053660003, - "grad_norm": 1.6617391064055658, - "learning_rate": 5.005136158281198e-08, - "loss": 0.9634, - "step": 12385 - }, - { - "epoch": 0.9308582594318353, - "grad_norm": 2.0478669303786745, - "learning_rate": 4.994318580956092e-08, - "loss": 1.0009, - "step": 12386 - }, - { - "epoch": 0.9309334134976702, - "grad_norm": 1.619909235213001, - "learning_rate": 4.983512558464276e-08, - "loss": 0.9716, - "step": 12387 - }, - { - "epoch": 0.9310085675635051, - "grad_norm": 2.338620179578085, - "learning_rate": 4.9727180914461485e-08, - "loss": 1.0471, - "step": 12388 - }, - { - "epoch": 0.9310837216293402, - "grad_norm": 2.149346176801665, - "learning_rate": 4.961935180541288e-08, - "loss": 0.875, - "step": 12389 - }, - { - "epoch": 0.9311588756951751, - "grad_norm": 1.540016459587618, - "learning_rate": 4.9511638263886045e-08, - "loss": 0.9087, - "step": 12390 - }, - { - "epoch": 0.9312340297610101, - "grad_norm": 1.6444097551060648, - "learning_rate": 4.940404029626344e-08, - "loss": 0.9259, - "step": 12391 - }, - { - "epoch": 0.931309183826845, - "grad_norm": 1.6012412144392385, - "learning_rate": 4.929655790892107e-08, - "loss": 0.9498, - "step": 12392 - }, - { - "epoch": 0.93138433789268, - "grad_norm": 2.47924832335132, - "learning_rate": 4.918919110822717e-08, - "loss": 1.0661, - "step": 12393 - }, - { - "epoch": 0.931459491958515, - "grad_norm": 2.3826387936171964, - "learning_rate": 4.908193990054377e-08, - "loss": 0.9936, - "step": 12394 - }, - { - "epoch": 0.9315346460243499, - "grad_norm": 1.801099769739089, - "learning_rate": 4.8974804292226e-08, - "loss": 0.9978, - "step": 12395 - }, - { - "epoch": 0.9316098000901849, - "grad_norm": 1.727456530232461, - "learning_rate": 4.886778428962235e-08, - "loss": 0.9663, - "step": 12396 - }, - { - "epoch": 0.9316849541560198, - "grad_norm": 1.9994228245464085, - "learning_rate": 4.876087989907374e-08, - "loss": 1.0826, - "step": 12397 - }, - { - "epoch": 0.9317601082218548, - "grad_norm": 1.5842083303453327, - "learning_rate": 4.865409112691465e-08, - "loss": 0.9045, - "step": 12398 - }, - { - "epoch": 0.9318352622876898, - "grad_norm": 1.6738582505994872, - "learning_rate": 4.854741797947293e-08, - "loss": 0.9878, - "step": 12399 - }, - { - "epoch": 0.9319104163535247, - "grad_norm": 2.0878942791127986, - "learning_rate": 4.8440860463069496e-08, - "loss": 1.0215, - "step": 12400 - }, - { - "epoch": 0.9319855704193597, - "grad_norm": 1.6474313422384335, - "learning_rate": 4.8334418584017764e-08, - "loss": 0.8733, - "step": 12401 - }, - { - "epoch": 0.9320607244851946, - "grad_norm": 1.1912748670834545, - "learning_rate": 4.822809234862557e-08, - "loss": 0.8278, - "step": 12402 - }, - { - "epoch": 0.9321358785510296, - "grad_norm": 1.9901569265283736, - "learning_rate": 4.812188176319232e-08, - "loss": 0.9743, - "step": 12403 - }, - { - "epoch": 0.9322110326168646, - "grad_norm": 4.721667265851034, - "learning_rate": 4.801578683401186e-08, - "loss": 0.9667, - "step": 12404 - }, - { - "epoch": 0.9322861866826996, - "grad_norm": 2.047051342072914, - "learning_rate": 4.790980756737073e-08, - "loss": 1.0037, - "step": 12405 - }, - { - "epoch": 0.9323613407485345, - "grad_norm": 1.6668010975346386, - "learning_rate": 4.780394396954901e-08, - "loss": 0.9535, - "step": 12406 - }, - { - "epoch": 0.9324364948143694, - "grad_norm": 1.695077343641798, - "learning_rate": 4.769819604681857e-08, - "loss": 1.0076, - "step": 12407 - }, - { - "epoch": 0.9325116488802044, - "grad_norm": 1.7385313389926411, - "learning_rate": 4.759256380544574e-08, - "loss": 0.929, - "step": 12408 - }, - { - "epoch": 0.9325868029460394, - "grad_norm": 1.320409015084837, - "learning_rate": 4.748704725169017e-08, - "loss": 0.9142, - "step": 12409 - }, - { - "epoch": 0.9326619570118744, - "grad_norm": 1.5968048140000304, - "learning_rate": 4.7381646391803534e-08, - "loss": 0.887, - "step": 12410 - }, - { - "epoch": 0.9327371110777093, - "grad_norm": 1.9392970580341544, - "learning_rate": 4.727636123203149e-08, - "loss": 0.8485, - "step": 12411 - }, - { - "epoch": 0.9328122651435443, - "grad_norm": 1.9675111051485978, - "learning_rate": 4.7171191778612396e-08, - "loss": 0.9531, - "step": 12412 - }, - { - "epoch": 0.9328874192093792, - "grad_norm": 2.70321284686104, - "learning_rate": 4.706613803777837e-08, - "loss": 0.8905, - "step": 12413 - }, - { - "epoch": 0.9329625732752141, - "grad_norm": 2.018648113124316, - "learning_rate": 4.6961200015753546e-08, - "loss": 1.0445, - "step": 12414 - }, - { - "epoch": 0.9330377273410492, - "grad_norm": 2.500563558001472, - "learning_rate": 4.68563777187565e-08, - "loss": 0.9105, - "step": 12415 - }, - { - "epoch": 0.9331128814068841, - "grad_norm": 1.847286647097025, - "learning_rate": 4.6751671152998276e-08, - "loss": 1.0482, - "step": 12416 - }, - { - "epoch": 0.9331880354727191, - "grad_norm": 1.8400642926966775, - "learning_rate": 4.664708032468301e-08, - "loss": 1.0316, - "step": 12417 - }, - { - "epoch": 0.933263189538554, - "grad_norm": 1.8175873228816404, - "learning_rate": 4.654260524000797e-08, - "loss": 0.9671, - "step": 12418 - }, - { - "epoch": 0.9333383436043889, - "grad_norm": 1.5864380295627896, - "learning_rate": 4.643824590516399e-08, - "loss": 0.9502, - "step": 12419 - }, - { - "epoch": 0.933413497670224, - "grad_norm": 1.3237036319208777, - "learning_rate": 4.6334002326334554e-08, - "loss": 0.9607, - "step": 12420 - }, - { - "epoch": 0.9334886517360589, - "grad_norm": 1.6075262914141997, - "learning_rate": 4.622987450969651e-08, - "loss": 0.9487, - "step": 12421 - }, - { - "epoch": 0.9335638058018939, - "grad_norm": 3.8055428986598008, - "learning_rate": 4.612586246141981e-08, - "loss": 0.9144, - "step": 12422 - }, - { - "epoch": 0.9336389598677288, - "grad_norm": 1.818645450693892, - "learning_rate": 4.602196618766796e-08, - "loss": 1.0138, - "step": 12423 - }, - { - "epoch": 0.9337141139335638, - "grad_norm": 1.4482587323365892, - "learning_rate": 4.591818569459671e-08, - "loss": 0.9719, - "step": 12424 - }, - { - "epoch": 0.9337892679993988, - "grad_norm": 2.816745097481018, - "learning_rate": 4.581452098835537e-08, - "loss": 0.9548, - "step": 12425 - }, - { - "epoch": 0.9338644220652337, - "grad_norm": 1.7218680102973207, - "learning_rate": 4.571097207508723e-08, - "loss": 0.9441, - "step": 12426 - }, - { - "epoch": 0.9339395761310687, - "grad_norm": 2.5345799712468637, - "learning_rate": 4.560753896092739e-08, - "loss": 1.0308, - "step": 12427 - }, - { - "epoch": 0.9340147301969036, - "grad_norm": 1.783633260669436, - "learning_rate": 4.5504221652004295e-08, - "loss": 0.9684, - "step": 12428 - }, - { - "epoch": 0.9340898842627386, - "grad_norm": 8.152727908563747, - "learning_rate": 4.5401020154440586e-08, - "loss": 0.9602, - "step": 12429 - }, - { - "epoch": 0.9341650383285736, - "grad_norm": 5.440209075612532, - "learning_rate": 4.529793447435137e-08, - "loss": 0.9754, - "step": 12430 - }, - { - "epoch": 0.9342401923944086, - "grad_norm": 4.974572566854402, - "learning_rate": 4.5194964617844225e-08, - "loss": 0.9227, - "step": 12431 - }, - { - "epoch": 0.9343153464602435, - "grad_norm": 1.4267574782896597, - "learning_rate": 4.509211059102092e-08, - "loss": 0.8509, - "step": 12432 - }, - { - "epoch": 0.9343905005260784, - "grad_norm": 1.876077977884065, - "learning_rate": 4.498937239997613e-08, - "loss": 1.0718, - "step": 12433 - }, - { - "epoch": 0.9344656545919134, - "grad_norm": 1.6865187017024201, - "learning_rate": 4.488675005079723e-08, - "loss": 1.0006, - "step": 12434 - }, - { - "epoch": 0.9345408086577484, - "grad_norm": 2.674468538485429, - "learning_rate": 4.478424354956467e-08, - "loss": 0.774, - "step": 12435 - }, - { - "epoch": 0.9346159627235834, - "grad_norm": 2.0601769282460887, - "learning_rate": 4.4681852902353154e-08, - "loss": 0.9452, - "step": 12436 - }, - { - "epoch": 0.9346911167894183, - "grad_norm": 1.8888344601038443, - "learning_rate": 4.4579578115228943e-08, - "loss": 0.9387, - "step": 12437 - }, - { - "epoch": 0.9347662708552533, - "grad_norm": 3.934910604001928, - "learning_rate": 4.447741919425274e-08, - "loss": 1.0417, - "step": 12438 - }, - { - "epoch": 0.9348414249210882, - "grad_norm": 1.7425513464916642, - "learning_rate": 4.437537614547726e-08, - "loss": 0.9706, - "step": 12439 - }, - { - "epoch": 0.9349165789869232, - "grad_norm": 2.257301880009924, - "learning_rate": 4.427344897494989e-08, - "loss": 1.0615, - "step": 12440 - }, - { - "epoch": 0.9349917330527582, - "grad_norm": 2.183040105139384, - "learning_rate": 4.4171637688709354e-08, - "loss": 0.9932, - "step": 12441 - }, - { - "epoch": 0.9350668871185931, - "grad_norm": 1.4971317047251838, - "learning_rate": 4.4069942292788596e-08, - "loss": 0.9212, - "step": 12442 - }, - { - "epoch": 0.9351420411844281, - "grad_norm": 1.5926932028764265, - "learning_rate": 4.39683627932137e-08, - "loss": 0.9605, - "step": 12443 - }, - { - "epoch": 0.935217195250263, - "grad_norm": 1.720429588165256, - "learning_rate": 4.3866899196003393e-08, - "loss": 1.0154, - "step": 12444 - }, - { - "epoch": 0.935292349316098, - "grad_norm": 1.6323879000160393, - "learning_rate": 4.376555150716954e-08, - "loss": 0.9242, - "step": 12445 - }, - { - "epoch": 0.935367503381933, - "grad_norm": 1.8009804307254644, - "learning_rate": 4.3664319732718227e-08, - "loss": 0.9208, - "step": 12446 - }, - { - "epoch": 0.9354426574477679, - "grad_norm": 0.7069852192263059, - "learning_rate": 4.356320387864687e-08, - "loss": 0.8425, - "step": 12447 - }, - { - "epoch": 0.9355178115136029, - "grad_norm": 1.8246236674411123, - "learning_rate": 4.346220395094735e-08, - "loss": 0.9273, - "step": 12448 - }, - { - "epoch": 0.9355929655794378, - "grad_norm": 2.237762870477957, - "learning_rate": 4.336131995560444e-08, - "loss": 0.9769, - "step": 12449 - }, - { - "epoch": 0.9356681196452729, - "grad_norm": 2.490795643703069, - "learning_rate": 4.326055189859601e-08, - "loss": 0.9773, - "step": 12450 - }, - { - "epoch": 0.9357432737111078, - "grad_norm": 2.3092833132990727, - "learning_rate": 4.3159899785892403e-08, - "loss": 1.0049, - "step": 12451 - }, - { - "epoch": 0.9358184277769427, - "grad_norm": 1.6721866352875365, - "learning_rate": 4.305936362345797e-08, - "loss": 1.082, - "step": 12452 - }, - { - "epoch": 0.9358935818427777, - "grad_norm": 1.9522829605780645, - "learning_rate": 4.295894341725037e-08, - "loss": 0.9385, - "step": 12453 - }, - { - "epoch": 0.9359687359086126, - "grad_norm": 1.8984248156967585, - "learning_rate": 4.285863917321886e-08, - "loss": 0.8911, - "step": 12454 - }, - { - "epoch": 0.9360438899744477, - "grad_norm": 2.0891803579816344, - "learning_rate": 4.2758450897307565e-08, - "loss": 0.9356, - "step": 12455 - }, - { - "epoch": 0.9361190440402826, - "grad_norm": 1.589614439227063, - "learning_rate": 4.2658378595452626e-08, - "loss": 0.9935, - "step": 12456 - }, - { - "epoch": 0.9361941981061176, - "grad_norm": 2.0653360692805047, - "learning_rate": 4.2558422273584414e-08, - "loss": 0.9559, - "step": 12457 - }, - { - "epoch": 0.9362693521719525, - "grad_norm": 1.9450433996894299, - "learning_rate": 4.245858193762486e-08, - "loss": 0.9479, - "step": 12458 - }, - { - "epoch": 0.9363445062377874, - "grad_norm": 2.0232952760178873, - "learning_rate": 4.2358857593490337e-08, - "loss": 0.9895, - "step": 12459 - }, - { - "epoch": 0.9364196603036224, - "grad_norm": 2.7600559694724236, - "learning_rate": 4.225924924708968e-08, - "loss": 0.9733, - "step": 12460 - }, - { - "epoch": 0.9364948143694574, - "grad_norm": 1.757951993705591, - "learning_rate": 4.215975690432549e-08, - "loss": 1.059, - "step": 12461 - }, - { - "epoch": 0.9365699684352924, - "grad_norm": 3.1796879498905546, - "learning_rate": 4.20603805710924e-08, - "loss": 0.9848, - "step": 12462 - }, - { - "epoch": 0.9366451225011273, - "grad_norm": 8.438272172421282, - "learning_rate": 4.196112025327969e-08, - "loss": 1.0165, - "step": 12463 - }, - { - "epoch": 0.9367202765669622, - "grad_norm": 2.0113974305357143, - "learning_rate": 4.1861975956767994e-08, - "loss": 1.0059, - "step": 12464 - }, - { - "epoch": 0.9367954306327972, - "grad_norm": 1.5599380852565863, - "learning_rate": 4.1762947687432605e-08, - "loss": 0.9301, - "step": 12465 - }, - { - "epoch": 0.9368705846986322, - "grad_norm": 1.6287646334994204, - "learning_rate": 4.166403545114105e-08, - "loss": 0.9594, - "step": 12466 - }, - { - "epoch": 0.9369457387644672, - "grad_norm": 2.0071247273582915, - "learning_rate": 4.1565239253754655e-08, - "loss": 0.8962, - "step": 12467 - }, - { - "epoch": 0.9370208928303021, - "grad_norm": 2.869993910035374, - "learning_rate": 4.146655910112673e-08, - "loss": 0.9608, - "step": 12468 - }, - { - "epoch": 0.9370960468961371, - "grad_norm": 1.9002287149801809, - "learning_rate": 4.1367994999105036e-08, - "loss": 0.8084, - "step": 12469 - }, - { - "epoch": 0.937171200961972, - "grad_norm": 1.5715706562855372, - "learning_rate": 4.126954695353002e-08, - "loss": 0.9599, - "step": 12470 - }, - { - "epoch": 0.937246355027807, - "grad_norm": 0.7080457374675578, - "learning_rate": 4.117121497023457e-08, - "loss": 0.8076, - "step": 12471 - }, - { - "epoch": 0.937321509093642, - "grad_norm": 1.8366287553385752, - "learning_rate": 4.107299905504558e-08, - "loss": 0.8874, - "step": 12472 - }, - { - "epoch": 0.9373966631594769, - "grad_norm": 2.08510421510775, - "learning_rate": 4.097489921378261e-08, - "loss": 0.8804, - "step": 12473 - }, - { - "epoch": 0.9374718172253119, - "grad_norm": 1.9947870385603508, - "learning_rate": 4.0876915452258577e-08, - "loss": 0.8865, - "step": 12474 - }, - { - "epoch": 0.9375469712911468, - "grad_norm": 1.7973117463450254, - "learning_rate": 4.077904777627905e-08, - "loss": 0.8972, - "step": 12475 - }, - { - "epoch": 0.9376221253569819, - "grad_norm": 1.69638800895211, - "learning_rate": 4.068129619164362e-08, - "loss": 0.9515, - "step": 12476 - }, - { - "epoch": 0.9376972794228168, - "grad_norm": 1.5406157889874244, - "learning_rate": 4.0583660704143874e-08, - "loss": 0.9736, - "step": 12477 - }, - { - "epoch": 0.9377724334886517, - "grad_norm": 2.4017044333198196, - "learning_rate": 4.0486141319565624e-08, - "loss": 1.0968, - "step": 12478 - }, - { - "epoch": 0.9378475875544867, - "grad_norm": 3.370743787508964, - "learning_rate": 4.038873804368648e-08, - "loss": 0.9098, - "step": 12479 - }, - { - "epoch": 0.9379227416203216, - "grad_norm": 1.680275534673187, - "learning_rate": 4.0291450882279144e-08, - "loss": 0.94, - "step": 12480 - }, - { - "epoch": 0.9379978956861567, - "grad_norm": 3.272251542511777, - "learning_rate": 4.0194279841107014e-08, - "loss": 1.0057, - "step": 12481 - }, - { - "epoch": 0.9380730497519916, - "grad_norm": 1.7839513005080123, - "learning_rate": 4.00972249259286e-08, - "loss": 0.9611, - "step": 12482 - }, - { - "epoch": 0.9381482038178266, - "grad_norm": 1.8291890234945911, - "learning_rate": 4.00002861424944e-08, - "loss": 0.9734, - "step": 12483 - }, - { - "epoch": 0.9382233578836615, - "grad_norm": 2.4211192274645694, - "learning_rate": 3.990346349654894e-08, - "loss": 0.9133, - "step": 12484 - }, - { - "epoch": 0.9382985119494964, - "grad_norm": 1.8453017348347744, - "learning_rate": 3.980675699382852e-08, - "loss": 1.0053, - "step": 12485 - }, - { - "epoch": 0.9383736660153315, - "grad_norm": 3.5010172864176097, - "learning_rate": 3.97101666400641e-08, - "loss": 0.9142, - "step": 12486 - }, - { - "epoch": 0.9384488200811664, - "grad_norm": 1.6464964661431407, - "learning_rate": 3.96136924409789e-08, - "loss": 0.8578, - "step": 12487 - }, - { - "epoch": 0.9385239741470014, - "grad_norm": 2.0138209349761444, - "learning_rate": 3.951733440228899e-08, - "loss": 0.9507, - "step": 12488 - }, - { - "epoch": 0.9385991282128363, - "grad_norm": 1.7283247157991428, - "learning_rate": 3.942109252970427e-08, - "loss": 0.969, - "step": 12489 - }, - { - "epoch": 0.9386742822786712, - "grad_norm": 3.531344387177094, - "learning_rate": 3.93249668289275e-08, - "loss": 0.947, - "step": 12490 - }, - { - "epoch": 0.9387494363445062, - "grad_norm": 1.5325612273439067, - "learning_rate": 3.922895730565412e-08, - "loss": 0.9792, - "step": 12491 - }, - { - "epoch": 0.9388245904103412, - "grad_norm": 1.8332402321266419, - "learning_rate": 3.913306396557336e-08, - "loss": 1.0284, - "step": 12492 - }, - { - "epoch": 0.9388997444761762, - "grad_norm": 1.89760266695977, - "learning_rate": 3.903728681436735e-08, - "loss": 0.98, - "step": 12493 - }, - { - "epoch": 0.9389748985420111, - "grad_norm": 1.6659672541153587, - "learning_rate": 3.894162585771132e-08, - "loss": 0.9083, - "step": 12494 - }, - { - "epoch": 0.9390500526078461, - "grad_norm": 1.8289804657841402, - "learning_rate": 3.884608110127319e-08, - "loss": 0.9733, - "step": 12495 - }, - { - "epoch": 0.939125206673681, - "grad_norm": 1.7288856584414714, - "learning_rate": 3.875065255071419e-08, - "loss": 0.8889, - "step": 12496 - }, - { - "epoch": 0.939200360739516, - "grad_norm": 1.7075938818662248, - "learning_rate": 3.8655340211689594e-08, - "loss": 0.9494, - "step": 12497 - }, - { - "epoch": 0.939275514805351, - "grad_norm": 1.7683407313377573, - "learning_rate": 3.856014408984643e-08, - "loss": 0.8241, - "step": 12498 - }, - { - "epoch": 0.9393506688711859, - "grad_norm": 1.7391944115470863, - "learning_rate": 3.846506419082551e-08, - "loss": 0.8573, - "step": 12499 - }, - { - "epoch": 0.9394258229370209, - "grad_norm": 1.7379174698350919, - "learning_rate": 3.837010052026057e-08, - "loss": 1.0578, - "step": 12500 - }, - { - "epoch": 0.9395009770028558, - "grad_norm": 1.872127557952839, - "learning_rate": 3.827525308377932e-08, - "loss": 0.9419, - "step": 12501 - }, - { - "epoch": 0.9395761310686909, - "grad_norm": 1.657848162901157, - "learning_rate": 3.8180521887000825e-08, - "loss": 0.8725, - "step": 12502 - }, - { - "epoch": 0.9396512851345258, - "grad_norm": 1.8900197218819657, - "learning_rate": 3.8085906935538815e-08, - "loss": 0.857, - "step": 12503 - }, - { - "epoch": 0.9397264392003607, - "grad_norm": 2.833674092464532, - "learning_rate": 3.799140823499947e-08, - "loss": 0.986, - "step": 12504 - }, - { - "epoch": 0.9398015932661957, - "grad_norm": 1.9852853924428653, - "learning_rate": 3.7897025790982305e-08, - "loss": 0.9357, - "step": 12505 - }, - { - "epoch": 0.9398767473320306, - "grad_norm": 4.969984560967457, - "learning_rate": 3.780275960907975e-08, - "loss": 1.0056, - "step": 12506 - }, - { - "epoch": 0.9399519013978657, - "grad_norm": 1.754116733894452, - "learning_rate": 3.770860969487755e-08, - "loss": 1.1059, - "step": 12507 - }, - { - "epoch": 0.9400270554637006, - "grad_norm": 1.6142368396058926, - "learning_rate": 3.7614576053954126e-08, - "loss": 1.0156, - "step": 12508 - }, - { - "epoch": 0.9401022095295355, - "grad_norm": 1.4602355086448888, - "learning_rate": 3.752065869188148e-08, - "loss": 0.9578, - "step": 12509 - }, - { - "epoch": 0.9401773635953705, - "grad_norm": 4.450463500154893, - "learning_rate": 3.74268576142247e-08, - "loss": 0.9186, - "step": 12510 - }, - { - "epoch": 0.9402525176612054, - "grad_norm": 3.440890772617177, - "learning_rate": 3.7333172826542025e-08, - "loss": 1.0591, - "step": 12511 - }, - { - "epoch": 0.9403276717270405, - "grad_norm": 1.5854310227628456, - "learning_rate": 3.7239604334384336e-08, - "loss": 0.8823, - "step": 12512 - }, - { - "epoch": 0.9404028257928754, - "grad_norm": 1.8900434371962263, - "learning_rate": 3.714615214329564e-08, - "loss": 0.96, - "step": 12513 - }, - { - "epoch": 0.9404779798587104, - "grad_norm": 1.3489842160759955, - "learning_rate": 3.705281625881418e-08, - "loss": 0.9507, - "step": 12514 - }, - { - "epoch": 0.9405531339245453, - "grad_norm": 0.7717488668450135, - "learning_rate": 3.695959668646975e-08, - "loss": 0.8229, - "step": 12515 - }, - { - "epoch": 0.9406282879903802, - "grad_norm": 0.6957000980814692, - "learning_rate": 3.686649343178616e-08, - "loss": 0.8467, - "step": 12516 - }, - { - "epoch": 0.9407034420562153, - "grad_norm": 2.318475169220177, - "learning_rate": 3.677350650028033e-08, - "loss": 0.95, - "step": 12517 - }, - { - "epoch": 0.9407785961220502, - "grad_norm": 2.0799296867883883, - "learning_rate": 3.668063589746206e-08, - "loss": 1.0207, - "step": 12518 - }, - { - "epoch": 0.9408537501878852, - "grad_norm": 1.9065762850318242, - "learning_rate": 3.658788162883364e-08, - "loss": 0.9479, - "step": 12519 - }, - { - "epoch": 0.9409289042537201, - "grad_norm": 2.0958193900502136, - "learning_rate": 3.649524369989221e-08, - "loss": 0.9167, - "step": 12520 - }, - { - "epoch": 0.9410040583195551, - "grad_norm": 2.210897276819129, - "learning_rate": 3.640272211612605e-08, - "loss": 0.9507, - "step": 12521 - }, - { - "epoch": 0.94107921238539, - "grad_norm": 40.50219720602359, - "learning_rate": 3.631031688301789e-08, - "loss": 0.8852, - "step": 12522 - }, - { - "epoch": 0.941154366451225, - "grad_norm": 1.8813885254409544, - "learning_rate": 3.6218028006042676e-08, - "loss": 0.9048, - "step": 12523 - }, - { - "epoch": 0.94122952051706, - "grad_norm": 1.8112966554897163, - "learning_rate": 3.612585549066938e-08, - "loss": 0.9899, - "step": 12524 - }, - { - "epoch": 0.9413046745828949, - "grad_norm": 1.9941052948365443, - "learning_rate": 3.603379934235917e-08, - "loss": 0.8033, - "step": 12525 - }, - { - "epoch": 0.9413798286487299, - "grad_norm": 2.468110786449225, - "learning_rate": 3.5941859566566816e-08, - "loss": 0.9821, - "step": 12526 - }, - { - "epoch": 0.9414549827145648, - "grad_norm": 2.0725136625532112, - "learning_rate": 3.585003616874016e-08, - "loss": 1.0476, - "step": 12527 - }, - { - "epoch": 0.9415301367803999, - "grad_norm": 0.6913214378279231, - "learning_rate": 3.575832915432042e-08, - "loss": 0.7894, - "step": 12528 - }, - { - "epoch": 0.9416052908462348, - "grad_norm": 1.601119859515493, - "learning_rate": 3.566673852874103e-08, - "loss": 0.9443, - "step": 12529 - }, - { - "epoch": 0.9416804449120697, - "grad_norm": 0.7119614071271665, - "learning_rate": 3.5575264297429185e-08, - "loss": 0.9044, - "step": 12530 - }, - { - "epoch": 0.9417555989779047, - "grad_norm": 2.611970718283918, - "learning_rate": 3.548390646580546e-08, - "loss": 0.9412, - "step": 12531 - }, - { - "epoch": 0.9418307530437396, - "grad_norm": 1.5945995534858188, - "learning_rate": 3.539266503928284e-08, - "loss": 1.0157, - "step": 12532 - }, - { - "epoch": 0.9419059071095747, - "grad_norm": 1.8104050466547206, - "learning_rate": 3.5301540023267684e-08, - "loss": 0.9106, - "step": 12533 - }, - { - "epoch": 0.9419810611754096, - "grad_norm": 1.674046398695676, - "learning_rate": 3.521053142315988e-08, - "loss": 0.9206, - "step": 12534 - }, - { - "epoch": 0.9420562152412445, - "grad_norm": 2.00938846470903, - "learning_rate": 3.5119639244351575e-08, - "loss": 0.9001, - "step": 12535 - }, - { - "epoch": 0.9421313693070795, - "grad_norm": 4.475045418109254, - "learning_rate": 3.502886349222844e-08, - "loss": 1.0121, - "step": 12536 - }, - { - "epoch": 0.9422065233729144, - "grad_norm": 1.8413746290117843, - "learning_rate": 3.493820417216975e-08, - "loss": 1.0146, - "step": 12537 - }, - { - "epoch": 0.9422816774387495, - "grad_norm": 1.6723031894997642, - "learning_rate": 3.4847661289547417e-08, - "loss": 0.9764, - "step": 12538 - }, - { - "epoch": 0.9423568315045844, - "grad_norm": 1.5111521366259908, - "learning_rate": 3.4757234849726036e-08, - "loss": 0.9527, - "step": 12539 - }, - { - "epoch": 0.9424319855704194, - "grad_norm": 1.7823122437666177, - "learning_rate": 3.4666924858063776e-08, - "loss": 0.9058, - "step": 12540 - }, - { - "epoch": 0.9425071396362543, - "grad_norm": 1.5617479421798366, - "learning_rate": 3.4576731319912125e-08, - "loss": 0.9127, - "step": 12541 - }, - { - "epoch": 0.9425822937020892, - "grad_norm": 2.1653033393339065, - "learning_rate": 3.448665424061525e-08, - "loss": 0.9357, - "step": 12542 - }, - { - "epoch": 0.9426574477679243, - "grad_norm": 2.046908662242583, - "learning_rate": 3.439669362551045e-08, - "loss": 0.9531, - "step": 12543 - }, - { - "epoch": 0.9427326018337592, - "grad_norm": 1.7581291083143114, - "learning_rate": 3.4306849479928344e-08, - "loss": 1.0221, - "step": 12544 - }, - { - "epoch": 0.9428077558995942, - "grad_norm": 0.7193959691946218, - "learning_rate": 3.421712180919289e-08, - "loss": 0.8909, - "step": 12545 - }, - { - "epoch": 0.9428829099654291, - "grad_norm": 1.7747659958804898, - "learning_rate": 3.412751061862007e-08, - "loss": 0.9516, - "step": 12546 - }, - { - "epoch": 0.9429580640312641, - "grad_norm": 2.156685494935727, - "learning_rate": 3.403801591352029e-08, - "loss": 0.9437, - "step": 12547 - }, - { - "epoch": 0.9430332180970991, - "grad_norm": 3.028181115290893, - "learning_rate": 3.394863769919598e-08, - "loss": 0.9446, - "step": 12548 - }, - { - "epoch": 0.943108372162934, - "grad_norm": 4.528964183016346, - "learning_rate": 3.3859375980943797e-08, - "loss": 1.0895, - "step": 12549 - }, - { - "epoch": 0.943183526228769, - "grad_norm": 1.4794247451367126, - "learning_rate": 3.3770230764051946e-08, - "loss": 0.9453, - "step": 12550 - }, - { - "epoch": 0.9432586802946039, - "grad_norm": 2.0524897552123043, - "learning_rate": 3.368120205380376e-08, - "loss": 1.0277, - "step": 12551 - }, - { - "epoch": 0.9433338343604389, - "grad_norm": 1.5195769864531967, - "learning_rate": 3.3592289855473244e-08, - "loss": 0.8482, - "step": 12552 - }, - { - "epoch": 0.9434089884262739, - "grad_norm": 1.4619730986535024, - "learning_rate": 3.3503494174329516e-08, - "loss": 1.0621, - "step": 12553 - }, - { - "epoch": 0.9434841424921088, - "grad_norm": 3.6849492189384083, - "learning_rate": 3.341481501563437e-08, - "loss": 0.9331, - "step": 12554 - }, - { - "epoch": 0.9435592965579438, - "grad_norm": 1.8726190072351998, - "learning_rate": 3.332625238464204e-08, - "loss": 0.9315, - "step": 12555 - }, - { - "epoch": 0.9436344506237787, - "grad_norm": 1.7531228131775298, - "learning_rate": 3.3237806286599667e-08, - "loss": 0.9873, - "step": 12556 - }, - { - "epoch": 0.9437096046896137, - "grad_norm": 1.9088862591205897, - "learning_rate": 3.314947672674862e-08, - "loss": 0.9947, - "step": 12557 - }, - { - "epoch": 0.9437847587554486, - "grad_norm": 2.237779917336613, - "learning_rate": 3.3061263710322917e-08, - "loss": 1.0697, - "step": 12558 - }, - { - "epoch": 0.9438599128212837, - "grad_norm": 0.7393086061954792, - "learning_rate": 3.297316724254906e-08, - "loss": 0.8194, - "step": 12559 - }, - { - "epoch": 0.9439350668871186, - "grad_norm": 1.7280954313594068, - "learning_rate": 3.288518732864731e-08, - "loss": 1.0125, - "step": 12560 - }, - { - "epoch": 0.9440102209529535, - "grad_norm": 2.2527891461636513, - "learning_rate": 3.2797323973830834e-08, - "loss": 0.9997, - "step": 12561 - }, - { - "epoch": 0.9440853750187885, - "grad_norm": 2.8729371672948436, - "learning_rate": 3.270957718330591e-08, - "loss": 1.0374, - "step": 12562 - }, - { - "epoch": 0.9441605290846234, - "grad_norm": 3.9076492855088207, - "learning_rate": 3.2621946962271715e-08, - "loss": 1.0613, - "step": 12563 - }, - { - "epoch": 0.9442356831504585, - "grad_norm": 0.69521099592672, - "learning_rate": 3.2534433315920765e-08, - "loss": 0.8088, - "step": 12564 - }, - { - "epoch": 0.9443108372162934, - "grad_norm": 1.9619754916457866, - "learning_rate": 3.2447036249438455e-08, - "loss": 0.9992, - "step": 12565 - }, - { - "epoch": 0.9443859912821284, - "grad_norm": 2.0403426671671823, - "learning_rate": 3.235975576800376e-08, - "loss": 1.0488, - "step": 12566 - }, - { - "epoch": 0.9444611453479633, - "grad_norm": 1.6255829572392482, - "learning_rate": 3.227259187678788e-08, - "loss": 0.9532, - "step": 12567 - }, - { - "epoch": 0.9445362994137982, - "grad_norm": 2.8505255499020756, - "learning_rate": 3.218554458095602e-08, - "loss": 0.9611, - "step": 12568 - }, - { - "epoch": 0.9446114534796333, - "grad_norm": 2.498703238816364, - "learning_rate": 3.2098613885665816e-08, - "loss": 0.9317, - "step": 12569 - }, - { - "epoch": 0.9446866075454682, - "grad_norm": 2.8164044722764414, - "learning_rate": 3.20117997960685e-08, - "loss": 0.9811, - "step": 12570 - }, - { - "epoch": 0.9447617616113032, - "grad_norm": 2.0855031221997913, - "learning_rate": 3.1925102317307716e-08, - "loss": 0.9023, - "step": 12571 - }, - { - "epoch": 0.9448369156771381, - "grad_norm": 2.0456613917716577, - "learning_rate": 3.183852145452115e-08, - "loss": 0.8733, - "step": 12572 - }, - { - "epoch": 0.9449120697429731, - "grad_norm": 0.722781938499668, - "learning_rate": 3.175205721283847e-08, - "loss": 0.8277, - "step": 12573 - }, - { - "epoch": 0.9449872238088081, - "grad_norm": 1.7299185194551383, - "learning_rate": 3.166570959738335e-08, - "loss": 0.9263, - "step": 12574 - }, - { - "epoch": 0.945062377874643, - "grad_norm": 3.9987891271786986, - "learning_rate": 3.1579478613272594e-08, - "loss": 1.0714, - "step": 12575 - }, - { - "epoch": 0.945137531940478, - "grad_norm": 3.320015209576801, - "learning_rate": 3.149336426561522e-08, - "loss": 0.9505, - "step": 12576 - }, - { - "epoch": 0.9452126860063129, - "grad_norm": 1.8602216059431664, - "learning_rate": 3.140736655951359e-08, - "loss": 0.938, - "step": 12577 - }, - { - "epoch": 0.945287840072148, - "grad_norm": 1.5275266884128331, - "learning_rate": 3.1321485500064084e-08, - "loss": 1.059, - "step": 12578 - }, - { - "epoch": 0.9453629941379829, - "grad_norm": 2.035926834043375, - "learning_rate": 3.123572109235484e-08, - "loss": 0.959, - "step": 12579 - }, - { - "epoch": 0.9454381482038178, - "grad_norm": 2.2292025791971266, - "learning_rate": 3.1150073341468016e-08, - "loss": 0.8924, - "step": 12580 - }, - { - "epoch": 0.9455133022696528, - "grad_norm": 1.5475069546889897, - "learning_rate": 3.1064542252478896e-08, - "loss": 1.0099, - "step": 12581 - }, - { - "epoch": 0.9455884563354877, - "grad_norm": 1.9118618517570398, - "learning_rate": 3.097912783045498e-08, - "loss": 0.9844, - "step": 12582 - }, - { - "epoch": 0.9456636104013227, - "grad_norm": 1.4661048445271143, - "learning_rate": 3.0893830080457764e-08, - "loss": 0.9373, - "step": 12583 - }, - { - "epoch": 0.9457387644671577, - "grad_norm": 2.457340578824375, - "learning_rate": 3.080864900754121e-08, - "loss": 0.9903, - "step": 12584 - }, - { - "epoch": 0.9458139185329927, - "grad_norm": 2.0871249473029394, - "learning_rate": 3.072358461675284e-08, - "loss": 0.9722, - "step": 12585 - }, - { - "epoch": 0.9458890725988276, - "grad_norm": 1.769427433294784, - "learning_rate": 3.063863691313284e-08, - "loss": 0.9371, - "step": 12586 - }, - { - "epoch": 0.9459642266646625, - "grad_norm": 1.759876378984749, - "learning_rate": 3.0553805901714745e-08, - "loss": 0.8882, - "step": 12587 - }, - { - "epoch": 0.9460393807304975, - "grad_norm": 1.4512870404020413, - "learning_rate": 3.04690915875252e-08, - "loss": 1.0544, - "step": 12588 - }, - { - "epoch": 0.9461145347963325, - "grad_norm": 1.5547328923178663, - "learning_rate": 3.038449397558396e-08, - "loss": 0.9985, - "step": 12589 - }, - { - "epoch": 0.9461896888621675, - "grad_norm": 2.4772756614699496, - "learning_rate": 3.030001307090346e-08, - "loss": 0.9441, - "step": 12590 - }, - { - "epoch": 0.9462648429280024, - "grad_norm": 2.107444048916014, - "learning_rate": 3.021564887848971e-08, - "loss": 1.0102, - "step": 12591 - }, - { - "epoch": 0.9463399969938374, - "grad_norm": 1.8584018372481055, - "learning_rate": 3.0131401403341584e-08, - "loss": 0.9028, - "step": 12592 - }, - { - "epoch": 0.9464151510596723, - "grad_norm": 2.0567422897845358, - "learning_rate": 3.00472706504511e-08, - "loss": 0.9992, - "step": 12593 - }, - { - "epoch": 0.9464903051255072, - "grad_norm": 1.3875177365105895, - "learning_rate": 2.9963256624803144e-08, - "loss": 0.9982, - "step": 12594 - }, - { - "epoch": 0.9465654591913423, - "grad_norm": 2.2213241259089074, - "learning_rate": 2.987935933137642e-08, - "loss": 1.0303, - "step": 12595 - }, - { - "epoch": 0.9466406132571772, - "grad_norm": 1.6470655906462095, - "learning_rate": 2.979557877514116e-08, - "loss": 0.9547, - "step": 12596 - }, - { - "epoch": 0.9467157673230122, - "grad_norm": 1.684825543957346, - "learning_rate": 2.9711914961062512e-08, - "loss": 0.955, - "step": 12597 - }, - { - "epoch": 0.9467909213888471, - "grad_norm": 1.620736471124096, - "learning_rate": 2.9628367894097615e-08, - "loss": 0.9582, - "step": 12598 - }, - { - "epoch": 0.946866075454682, - "grad_norm": 4.984611481703981, - "learning_rate": 2.9544937579197183e-08, - "loss": 0.7517, - "step": 12599 - }, - { - "epoch": 0.9469412295205171, - "grad_norm": 1.6333449376271434, - "learning_rate": 2.9461624021304366e-08, - "loss": 0.8855, - "step": 12600 - }, - { - "epoch": 0.947016383586352, - "grad_norm": 1.7251430562726422, - "learning_rate": 2.9378427225356107e-08, - "loss": 0.9751, - "step": 12601 - }, - { - "epoch": 0.947091537652187, - "grad_norm": 1.6334719261743844, - "learning_rate": 2.9295347196282015e-08, - "loss": 0.993, - "step": 12602 - }, - { - "epoch": 0.9471666917180219, - "grad_norm": 2.2582577593030413, - "learning_rate": 2.9212383939004827e-08, - "loss": 0.9794, - "step": 12603 - }, - { - "epoch": 0.947241845783857, - "grad_norm": 1.6783975305309984, - "learning_rate": 2.912953745844082e-08, - "loss": 0.9635, - "step": 12604 - }, - { - "epoch": 0.9473169998496919, - "grad_norm": 1.9781560693503537, - "learning_rate": 2.9046807759498303e-08, - "loss": 0.9653, - "step": 12605 - }, - { - "epoch": 0.9473921539155268, - "grad_norm": 2.820543697105921, - "learning_rate": 2.8964194847080238e-08, - "loss": 0.9069, - "step": 12606 - }, - { - "epoch": 0.9474673079813618, - "grad_norm": 1.704428008108177, - "learning_rate": 2.8881698726080705e-08, - "loss": 1.0193, - "step": 12607 - }, - { - "epoch": 0.9475424620471967, - "grad_norm": 1.8838894878096388, - "learning_rate": 2.879931940138869e-08, - "loss": 0.9022, - "step": 12608 - }, - { - "epoch": 0.9476176161130317, - "grad_norm": 1.4427180743204215, - "learning_rate": 2.8717056877885394e-08, - "loss": 0.9774, - "step": 12609 - }, - { - "epoch": 0.9476927701788667, - "grad_norm": 1.5140770154637093, - "learning_rate": 2.8634911160444696e-08, - "loss": 0.9439, - "step": 12610 - }, - { - "epoch": 0.9477679242447017, - "grad_norm": 1.7013492951576112, - "learning_rate": 2.8552882253934485e-08, - "loss": 0.9704, - "step": 12611 - }, - { - "epoch": 0.9478430783105366, - "grad_norm": 2.0026462452952507, - "learning_rate": 2.8470970163215312e-08, - "loss": 0.9352, - "step": 12612 - }, - { - "epoch": 0.9479182323763715, - "grad_norm": 1.6186976361734502, - "learning_rate": 2.838917489314041e-08, - "loss": 1.0146, - "step": 12613 - }, - { - "epoch": 0.9479933864422065, - "grad_norm": 1.6714542608184948, - "learning_rate": 2.830749644855679e-08, - "loss": 1.0085, - "step": 12614 - }, - { - "epoch": 0.9480685405080415, - "grad_norm": 1.62512991459255, - "learning_rate": 2.8225934834304133e-08, - "loss": 1.0159, - "step": 12615 - }, - { - "epoch": 0.9481436945738765, - "grad_norm": 1.8751579218163168, - "learning_rate": 2.8144490055215465e-08, - "loss": 0.9674, - "step": 12616 - }, - { - "epoch": 0.9482188486397114, - "grad_norm": 1.7818541589733734, - "learning_rate": 2.8063162116116256e-08, - "loss": 0.9454, - "step": 12617 - }, - { - "epoch": 0.9482940027055464, - "grad_norm": 1.5873904032684953, - "learning_rate": 2.7981951021825544e-08, - "loss": 1.0047, - "step": 12618 - }, - { - "epoch": 0.9483691567713813, - "grad_norm": 1.9158876397709048, - "learning_rate": 2.7900856777156147e-08, - "loss": 0.9452, - "step": 12619 - }, - { - "epoch": 0.9484443108372163, - "grad_norm": 1.858682647798284, - "learning_rate": 2.7819879386912214e-08, - "loss": 1.0237, - "step": 12620 - }, - { - "epoch": 0.9485194649030513, - "grad_norm": 1.625195124722078, - "learning_rate": 2.773901885589258e-08, - "loss": 0.9382, - "step": 12621 - }, - { - "epoch": 0.9485946189688862, - "grad_norm": 1.5582518238367247, - "learning_rate": 2.7658275188888526e-08, - "loss": 1.0575, - "step": 12622 - }, - { - "epoch": 0.9486697730347212, - "grad_norm": 0.6720157741954552, - "learning_rate": 2.7577648390683995e-08, - "loss": 0.8303, - "step": 12623 - }, - { - "epoch": 0.9487449271005561, - "grad_norm": 1.768567224770926, - "learning_rate": 2.7497138466056724e-08, - "loss": 1.0471, - "step": 12624 - }, - { - "epoch": 0.948820081166391, - "grad_norm": 2.3679443760859975, - "learning_rate": 2.7416745419777344e-08, - "loss": 0.9073, - "step": 12625 - }, - { - "epoch": 0.9488952352322261, - "grad_norm": 1.4259064083818676, - "learning_rate": 2.7336469256609152e-08, - "loss": 0.9898, - "step": 12626 - }, - { - "epoch": 0.948970389298061, - "grad_norm": 1.4641632533951852, - "learning_rate": 2.7256309981309234e-08, - "loss": 0.8976, - "step": 12627 - }, - { - "epoch": 0.949045543363896, - "grad_norm": 2.4632170277120022, - "learning_rate": 2.71762675986269e-08, - "loss": 0.9336, - "step": 12628 - }, - { - "epoch": 0.9491206974297309, - "grad_norm": 2.1601176198088594, - "learning_rate": 2.709634211330547e-08, - "loss": 1.0602, - "step": 12629 - }, - { - "epoch": 0.949195851495566, - "grad_norm": 2.072303591855896, - "learning_rate": 2.7016533530080044e-08, - "loss": 0.9136, - "step": 12630 - }, - { - "epoch": 0.9492710055614009, - "grad_norm": 1.5128682815237422, - "learning_rate": 2.6936841853680393e-08, - "loss": 0.9536, - "step": 12631 - }, - { - "epoch": 0.9493461596272358, - "grad_norm": 1.5056595527481431, - "learning_rate": 2.6857267088828073e-08, - "loss": 0.9063, - "step": 12632 - }, - { - "epoch": 0.9494213136930708, - "grad_norm": 2.5200633825172525, - "learning_rate": 2.6777809240238425e-08, - "loss": 0.9611, - "step": 12633 - }, - { - "epoch": 0.9494964677589057, - "grad_norm": 6.717558147376091, - "learning_rate": 2.669846831261946e-08, - "loss": 1.0022, - "step": 12634 - }, - { - "epoch": 0.9495716218247408, - "grad_norm": 1.567644500277757, - "learning_rate": 2.661924431067275e-08, - "loss": 0.891, - "step": 12635 - }, - { - "epoch": 0.9496467758905757, - "grad_norm": 1.5698503411998415, - "learning_rate": 2.6540137239092098e-08, - "loss": 0.9478, - "step": 12636 - }, - { - "epoch": 0.9497219299564107, - "grad_norm": 1.9167893273729917, - "learning_rate": 2.6461147102565527e-08, - "loss": 1.0008, - "step": 12637 - }, - { - "epoch": 0.9497970840222456, - "grad_norm": 1.4680230797036962, - "learning_rate": 2.6382273905772858e-08, - "loss": 0.9061, - "step": 12638 - }, - { - "epoch": 0.9498722380880805, - "grad_norm": 13.193650099729853, - "learning_rate": 2.630351765338812e-08, - "loss": 0.9081, - "step": 12639 - }, - { - "epoch": 0.9499473921539155, - "grad_norm": 6.1360946133915, - "learning_rate": 2.6224878350077585e-08, - "loss": 0.9717, - "step": 12640 - }, - { - "epoch": 0.9500225462197505, - "grad_norm": 0.739956041009838, - "learning_rate": 2.614635600050108e-08, - "loss": 0.8613, - "step": 12641 - }, - { - "epoch": 0.9500977002855855, - "grad_norm": 2.2534725095816968, - "learning_rate": 2.6067950609311552e-08, - "loss": 0.9667, - "step": 12642 - }, - { - "epoch": 0.9501728543514204, - "grad_norm": 1.9327007659218984, - "learning_rate": 2.5989662181154835e-08, - "loss": 0.972, - "step": 12643 - }, - { - "epoch": 0.9502480084172553, - "grad_norm": 2.0347288418757272, - "learning_rate": 2.5911490720669227e-08, - "loss": 0.9595, - "step": 12644 - }, - { - "epoch": 0.9503231624830903, - "grad_norm": 2.586419098139492, - "learning_rate": 2.5833436232487238e-08, - "loss": 0.9847, - "step": 12645 - }, - { - "epoch": 0.9503983165489253, - "grad_norm": 3.1505062604707876, - "learning_rate": 2.575549872123384e-08, - "loss": 1.0536, - "step": 12646 - }, - { - "epoch": 0.9504734706147603, - "grad_norm": 2.1434747146599316, - "learning_rate": 2.5677678191526885e-08, - "loss": 0.9032, - "step": 12647 - }, - { - "epoch": 0.9505486246805952, - "grad_norm": 1.7908465556275956, - "learning_rate": 2.5599974647977805e-08, - "loss": 0.8369, - "step": 12648 - }, - { - "epoch": 0.9506237787464302, - "grad_norm": 2.9766669467813403, - "learning_rate": 2.5522388095190472e-08, - "loss": 0.9361, - "step": 12649 - }, - { - "epoch": 0.9506989328122651, - "grad_norm": 2.1814323196072016, - "learning_rate": 2.544491853776276e-08, - "loss": 1.0319, - "step": 12650 - }, - { - "epoch": 0.9507740868781, - "grad_norm": 2.0891776190847136, - "learning_rate": 2.5367565980284332e-08, - "loss": 0.93, - "step": 12651 - }, - { - "epoch": 0.9508492409439351, - "grad_norm": 0.8181775603520741, - "learning_rate": 2.52903304273393e-08, - "loss": 0.9187, - "step": 12652 - }, - { - "epoch": 0.95092439500977, - "grad_norm": 1.7996229969036561, - "learning_rate": 2.5213211883503784e-08, - "loss": 0.9418, - "step": 12653 - }, - { - "epoch": 0.950999549075605, - "grad_norm": 1.8990183126240565, - "learning_rate": 2.5136210353347452e-08, - "loss": 0.9964, - "step": 12654 - }, - { - "epoch": 0.9510747031414399, - "grad_norm": 1.4640347701045051, - "learning_rate": 2.5059325841432667e-08, - "loss": 0.9908, - "step": 12655 - }, - { - "epoch": 0.951149857207275, - "grad_norm": 2.300843009702716, - "learning_rate": 2.4982558352315775e-08, - "loss": 0.9581, - "step": 12656 - }, - { - "epoch": 0.9512250112731099, - "grad_norm": 2.5817381538096433, - "learning_rate": 2.490590789054492e-08, - "loss": 0.8241, - "step": 12657 - }, - { - "epoch": 0.9513001653389448, - "grad_norm": 1.864300335198808, - "learning_rate": 2.4829374460662244e-08, - "loss": 0.8618, - "step": 12658 - }, - { - "epoch": 0.9513753194047798, - "grad_norm": 5.027081581280885, - "learning_rate": 2.4752958067202347e-08, - "loss": 1.0097, - "step": 12659 - }, - { - "epoch": 0.9514504734706147, - "grad_norm": 1.9341355306520756, - "learning_rate": 2.467665871469382e-08, - "loss": 0.9166, - "step": 12660 - }, - { - "epoch": 0.9515256275364498, - "grad_norm": 2.0876711381213338, - "learning_rate": 2.4600476407656835e-08, - "loss": 1.0885, - "step": 12661 - }, - { - "epoch": 0.9516007816022847, - "grad_norm": 1.6698646223281324, - "learning_rate": 2.4524411150605995e-08, - "loss": 1.0339, - "step": 12662 - }, - { - "epoch": 0.9516759356681197, - "grad_norm": 1.7308947725492458, - "learning_rate": 2.444846294804881e-08, - "loss": 0.9078, - "step": 12663 - }, - { - "epoch": 0.9517510897339546, - "grad_norm": 1.456526190635221, - "learning_rate": 2.4372631804484567e-08, - "loss": 0.9264, - "step": 12664 - }, - { - "epoch": 0.9518262437997895, - "grad_norm": 1.5412348408815946, - "learning_rate": 2.429691772440745e-08, - "loss": 0.9266, - "step": 12665 - }, - { - "epoch": 0.9519013978656246, - "grad_norm": 2.3449140582573156, - "learning_rate": 2.422132071230343e-08, - "loss": 0.9605, - "step": 12666 - }, - { - "epoch": 0.9519765519314595, - "grad_norm": 1.8706164298735892, - "learning_rate": 2.414584077265158e-08, - "loss": 0.8952, - "step": 12667 - }, - { - "epoch": 0.9520517059972945, - "grad_norm": 1.9552492110691349, - "learning_rate": 2.407047790992478e-08, - "loss": 1.0418, - "step": 12668 - }, - { - "epoch": 0.9521268600631294, - "grad_norm": 1.731550167241537, - "learning_rate": 2.3995232128588782e-08, - "loss": 1.0258, - "step": 12669 - }, - { - "epoch": 0.9522020141289643, - "grad_norm": 1.9889295678918713, - "learning_rate": 2.39201034331018e-08, - "loss": 0.9372, - "step": 12670 - }, - { - "epoch": 0.9522771681947994, - "grad_norm": 1.8441011773277733, - "learning_rate": 2.3845091827915608e-08, - "loss": 0.9488, - "step": 12671 - }, - { - "epoch": 0.9523523222606343, - "grad_norm": 0.7481155563179116, - "learning_rate": 2.377019731747465e-08, - "loss": 0.7794, - "step": 12672 - }, - { - "epoch": 0.9524274763264693, - "grad_norm": 2.2685072882007393, - "learning_rate": 2.3695419906217594e-08, - "loss": 0.8937, - "step": 12673 - }, - { - "epoch": 0.9525026303923042, - "grad_norm": 2.5838892451368793, - "learning_rate": 2.3620759598574013e-08, - "loss": 0.8971, - "step": 12674 - }, - { - "epoch": 0.9525777844581392, - "grad_norm": 1.731397392526386, - "learning_rate": 2.3546216398969033e-08, - "loss": 1.0613, - "step": 12675 - }, - { - "epoch": 0.9526529385239741, - "grad_norm": 1.6100751964707751, - "learning_rate": 2.3471790311818675e-08, - "loss": 0.9923, - "step": 12676 - }, - { - "epoch": 0.9527280925898091, - "grad_norm": 1.7643361071179167, - "learning_rate": 2.3397481341533632e-08, - "loss": 0.9406, - "step": 12677 - }, - { - "epoch": 0.9528032466556441, - "grad_norm": 1.9275395102601378, - "learning_rate": 2.3323289492516607e-08, - "loss": 1.052, - "step": 12678 - }, - { - "epoch": 0.952878400721479, - "grad_norm": 2.319878739936955, - "learning_rate": 2.324921476916386e-08, - "loss": 0.9003, - "step": 12679 - }, - { - "epoch": 0.952953554787314, - "grad_norm": 2.3298833978316287, - "learning_rate": 2.3175257175864772e-08, - "loss": 0.9228, - "step": 12680 - }, - { - "epoch": 0.9530287088531489, - "grad_norm": 1.4507651086257498, - "learning_rate": 2.310141671700139e-08, - "loss": 0.9733, - "step": 12681 - }, - { - "epoch": 0.953103862918984, - "grad_norm": 2.0852660051990886, - "learning_rate": 2.30276933969491e-08, - "loss": 0.9142, - "step": 12682 - }, - { - "epoch": 0.9531790169848189, - "grad_norm": 1.60204847452497, - "learning_rate": 2.295408722007641e-08, - "loss": 0.904, - "step": 12683 - }, - { - "epoch": 0.9532541710506538, - "grad_norm": 1.6824878857702215, - "learning_rate": 2.2880598190744503e-08, - "loss": 0.9543, - "step": 12684 - }, - { - "epoch": 0.9533293251164888, - "grad_norm": 1.3888578008775232, - "learning_rate": 2.280722631330789e-08, - "loss": 0.9251, - "step": 12685 - }, - { - "epoch": 0.9534044791823237, - "grad_norm": 1.5621849505378322, - "learning_rate": 2.2733971592114654e-08, - "loss": 0.9366, - "step": 12686 - }, - { - "epoch": 0.9534796332481588, - "grad_norm": 3.3408158878496717, - "learning_rate": 2.266083403150487e-08, - "loss": 0.9365, - "step": 12687 - }, - { - "epoch": 0.9535547873139937, - "grad_norm": 7.1852976202861525, - "learning_rate": 2.2587813635812414e-08, - "loss": 1.0342, - "step": 12688 - }, - { - "epoch": 0.9536299413798286, - "grad_norm": 2.256598017262098, - "learning_rate": 2.251491040936404e-08, - "loss": 0.9661, - "step": 12689 - }, - { - "epoch": 0.9537050954456636, - "grad_norm": 4.778973205606236, - "learning_rate": 2.244212435647963e-08, - "loss": 0.978, - "step": 12690 - }, - { - "epoch": 0.9537802495114985, - "grad_norm": 2.624501408046607, - "learning_rate": 2.236945548147173e-08, - "loss": 0.8819, - "step": 12691 - }, - { - "epoch": 0.9538554035773336, - "grad_norm": 1.8539961904385798, - "learning_rate": 2.229690378864668e-08, - "loss": 0.9954, - "step": 12692 - }, - { - "epoch": 0.9539305576431685, - "grad_norm": 1.7368293785316435, - "learning_rate": 2.2224469282303037e-08, - "loss": 0.9664, - "step": 12693 - }, - { - "epoch": 0.9540057117090035, - "grad_norm": 4.275368764933836, - "learning_rate": 2.2152151966733146e-08, - "loss": 0.9626, - "step": 12694 - }, - { - "epoch": 0.9540808657748384, - "grad_norm": 2.122053909062809, - "learning_rate": 2.20799518462218e-08, - "loss": 0.8457, - "step": 12695 - }, - { - "epoch": 0.9541560198406733, - "grad_norm": 2.1866316025333927, - "learning_rate": 2.2007868925047135e-08, - "loss": 0.9655, - "step": 12696 - }, - { - "epoch": 0.9542311739065084, - "grad_norm": 3.321208375232772, - "learning_rate": 2.1935903207480844e-08, - "loss": 1.0268, - "step": 12697 - }, - { - "epoch": 0.9543063279723433, - "grad_norm": 1.7441359089513158, - "learning_rate": 2.1864054697786626e-08, - "loss": 0.9838, - "step": 12698 - }, - { - "epoch": 0.9543814820381783, - "grad_norm": 1.7241979780099665, - "learning_rate": 2.1792323400221745e-08, - "loss": 1.0479, - "step": 12699 - }, - { - "epoch": 0.9544566361040132, - "grad_norm": 1.7205098938883354, - "learning_rate": 2.1720709319037024e-08, - "loss": 0.9523, - "step": 12700 - }, - { - "epoch": 0.9545317901698482, - "grad_norm": 21.15162488679209, - "learning_rate": 2.1649212458475508e-08, - "loss": 0.9581, - "step": 12701 - }, - { - "epoch": 0.9546069442356832, - "grad_norm": 1.725606316973425, - "learning_rate": 2.157783282277381e-08, - "loss": 0.9183, - "step": 12702 - }, - { - "epoch": 0.9546820983015181, - "grad_norm": 1.5939797628498926, - "learning_rate": 2.1506570416161217e-08, - "loss": 0.921, - "step": 12703 - }, - { - "epoch": 0.9547572523673531, - "grad_norm": 1.6806859796880398, - "learning_rate": 2.1435425242861015e-08, - "loss": 0.9984, - "step": 12704 - }, - { - "epoch": 0.954832406433188, - "grad_norm": 2.4052806734881513, - "learning_rate": 2.1364397307087834e-08, - "loss": 1.0041, - "step": 12705 - }, - { - "epoch": 0.954907560499023, - "grad_norm": 1.6058592681031039, - "learning_rate": 2.129348661305075e-08, - "loss": 1.0224, - "step": 12706 - }, - { - "epoch": 0.954982714564858, - "grad_norm": 1.4983703980124181, - "learning_rate": 2.1222693164951956e-08, - "loss": 0.9791, - "step": 12707 - }, - { - "epoch": 0.955057868630693, - "grad_norm": 2.1247017595026123, - "learning_rate": 2.115201696698543e-08, - "loss": 0.9985, - "step": 12708 - }, - { - "epoch": 0.9551330226965279, - "grad_norm": 1.875504680105994, - "learning_rate": 2.1081458023339605e-08, - "loss": 0.875, - "step": 12709 - }, - { - "epoch": 0.9552081767623628, - "grad_norm": 1.6301621825619308, - "learning_rate": 2.101101633819513e-08, - "loss": 0.9793, - "step": 12710 - }, - { - "epoch": 0.9552833308281978, - "grad_norm": 1.650286031264081, - "learning_rate": 2.0940691915726005e-08, - "loss": 0.9353, - "step": 12711 - }, - { - "epoch": 0.9553584848940327, - "grad_norm": 2.0286756194200555, - "learning_rate": 2.0870484760099117e-08, - "loss": 0.9393, - "step": 12712 - }, - { - "epoch": 0.9554336389598678, - "grad_norm": 1.8715169981404425, - "learning_rate": 2.0800394875474915e-08, - "loss": 0.9349, - "step": 12713 - }, - { - "epoch": 0.9555087930257027, - "grad_norm": 1.615812444973216, - "learning_rate": 2.0730422266005853e-08, - "loss": 0.8753, - "step": 12714 - }, - { - "epoch": 0.9555839470915376, - "grad_norm": 2.2395582567333974, - "learning_rate": 2.066056693583862e-08, - "loss": 0.9679, - "step": 12715 - }, - { - "epoch": 0.9556591011573726, - "grad_norm": 2.4707034144947952, - "learning_rate": 2.05908288891119e-08, - "loss": 0.8906, - "step": 12716 - }, - { - "epoch": 0.9557342552232075, - "grad_norm": 1.635964303949321, - "learning_rate": 2.0521208129958613e-08, - "loss": 0.9784, - "step": 12717 - }, - { - "epoch": 0.9558094092890426, - "grad_norm": 1.6397060181968108, - "learning_rate": 2.0451704662503456e-08, - "loss": 0.9887, - "step": 12718 - }, - { - "epoch": 0.9558845633548775, - "grad_norm": 1.5437323503605023, - "learning_rate": 2.0382318490865134e-08, - "loss": 1.0057, - "step": 12719 - }, - { - "epoch": 0.9559597174207125, - "grad_norm": 1.6760881049639982, - "learning_rate": 2.031304961915459e-08, - "loss": 1.0399, - "step": 12720 - }, - { - "epoch": 0.9560348714865474, - "grad_norm": 1.3713225258049138, - "learning_rate": 2.024389805147697e-08, - "loss": 0.9741, - "step": 12721 - }, - { - "epoch": 0.9561100255523823, - "grad_norm": 1.3443273146936248, - "learning_rate": 2.017486379192901e-08, - "loss": 1.0011, - "step": 12722 - }, - { - "epoch": 0.9561851796182174, - "grad_norm": 1.487478685627025, - "learning_rate": 2.0105946844601874e-08, - "loss": 0.9859, - "step": 12723 - }, - { - "epoch": 0.9562603336840523, - "grad_norm": 1.7929337774452114, - "learning_rate": 2.0037147213578964e-08, - "loss": 0.9549, - "step": 12724 - }, - { - "epoch": 0.9563354877498873, - "grad_norm": 1.5836897833621133, - "learning_rate": 1.99684649029368e-08, - "loss": 0.922, - "step": 12725 - }, - { - "epoch": 0.9564106418157222, - "grad_norm": 2.0613633694074567, - "learning_rate": 1.989989991674501e-08, - "loss": 1.0125, - "step": 12726 - }, - { - "epoch": 0.9564857958815572, - "grad_norm": 2.7253709173180245, - "learning_rate": 1.983145225906657e-08, - "loss": 0.8272, - "step": 12727 - }, - { - "epoch": 0.9565609499473922, - "grad_norm": 1.9380171916147975, - "learning_rate": 1.9763121933957128e-08, - "loss": 0.8976, - "step": 12728 - }, - { - "epoch": 0.9566361040132271, - "grad_norm": 1.490766797628898, - "learning_rate": 1.9694908945465438e-08, - "loss": 0.948, - "step": 12729 - }, - { - "epoch": 0.9567112580790621, - "grad_norm": 5.608673646907498, - "learning_rate": 1.9626813297633604e-08, - "loss": 1.0692, - "step": 12730 - }, - { - "epoch": 0.956786412144897, - "grad_norm": 2.069651256982926, - "learning_rate": 1.9558834994496397e-08, - "loss": 1.0003, - "step": 12731 - }, - { - "epoch": 0.956861566210732, - "grad_norm": 1.611898499310508, - "learning_rate": 1.949097404008193e-08, - "loss": 0.8876, - "step": 12732 - }, - { - "epoch": 0.956936720276567, - "grad_norm": 1.5657236033929103, - "learning_rate": 1.9423230438410987e-08, - "loss": 0.9335, - "step": 12733 - }, - { - "epoch": 0.9570118743424019, - "grad_norm": 1.6935819591763221, - "learning_rate": 1.935560419349791e-08, - "loss": 0.9985, - "step": 12734 - }, - { - "epoch": 0.9570870284082369, - "grad_norm": 2.02350678196424, - "learning_rate": 1.9288095309349718e-08, - "loss": 1.0047, - "step": 12735 - }, - { - "epoch": 0.9571621824740718, - "grad_norm": 3.022758624526351, - "learning_rate": 1.9220703789966318e-08, - "loss": 0.9297, - "step": 12736 - }, - { - "epoch": 0.9572373365399068, - "grad_norm": 1.7581651122029405, - "learning_rate": 1.915342963934119e-08, - "loss": 1.0008, - "step": 12737 - }, - { - "epoch": 0.9573124906057418, - "grad_norm": 2.1761636449342894, - "learning_rate": 1.9086272861460695e-08, - "loss": 0.9471, - "step": 12738 - }, - { - "epoch": 0.9573876446715768, - "grad_norm": 1.5983327165641563, - "learning_rate": 1.9019233460303652e-08, - "loss": 1.0248, - "step": 12739 - }, - { - "epoch": 0.9574627987374117, - "grad_norm": 2.3961304742954654, - "learning_rate": 1.8952311439843106e-08, - "loss": 1.0033, - "step": 12740 - }, - { - "epoch": 0.9575379528032466, - "grad_norm": 0.7410863236327263, - "learning_rate": 1.8885506804043884e-08, - "loss": 0.8472, - "step": 12741 - }, - { - "epoch": 0.9576131068690816, - "grad_norm": 2.3334754037748016, - "learning_rate": 1.8818819556864374e-08, - "loss": 0.9695, - "step": 12742 - }, - { - "epoch": 0.9576882609349165, - "grad_norm": 1.675711534735034, - "learning_rate": 1.8752249702256307e-08, - "loss": 0.9855, - "step": 12743 - }, - { - "epoch": 0.9577634150007516, - "grad_norm": 0.6818051289985141, - "learning_rate": 1.8685797244164524e-08, - "loss": 0.7603, - "step": 12744 - }, - { - "epoch": 0.9578385690665865, - "grad_norm": 2.7886895109325116, - "learning_rate": 1.861946218652588e-08, - "loss": 0.9191, - "step": 12745 - }, - { - "epoch": 0.9579137231324215, - "grad_norm": 2.2972875730584663, - "learning_rate": 1.8553244533271227e-08, - "loss": 0.9115, - "step": 12746 - }, - { - "epoch": 0.9579888771982564, - "grad_norm": 2.0035131117161074, - "learning_rate": 1.848714428832454e-08, - "loss": 0.9465, - "step": 12747 - }, - { - "epoch": 0.9580640312640913, - "grad_norm": 1.7437059431082076, - "learning_rate": 1.8421161455602242e-08, - "loss": 1.0667, - "step": 12748 - }, - { - "epoch": 0.9581391853299264, - "grad_norm": 2.7857625122227514, - "learning_rate": 1.8355296039013867e-08, - "loss": 0.9426, - "step": 12749 - }, - { - "epoch": 0.9582143393957613, - "grad_norm": 1.8701112592717721, - "learning_rate": 1.828954804246252e-08, - "loss": 0.8589, - "step": 12750 - }, - { - "epoch": 0.9582894934615963, - "grad_norm": 0.7662672346457615, - "learning_rate": 1.8223917469844198e-08, - "loss": 0.8183, - "step": 12751 - }, - { - "epoch": 0.9583646475274312, - "grad_norm": 1.7367188021667994, - "learning_rate": 1.8158404325047338e-08, - "loss": 0.9879, - "step": 12752 - }, - { - "epoch": 0.9584398015932662, - "grad_norm": 0.8272502886004159, - "learning_rate": 1.8093008611953952e-08, - "loss": 0.8651, - "step": 12753 - }, - { - "epoch": 0.9585149556591012, - "grad_norm": 1.7298581529358308, - "learning_rate": 1.802773033443894e-08, - "loss": 1.0189, - "step": 12754 - }, - { - "epoch": 0.9585901097249361, - "grad_norm": 1.467347307205966, - "learning_rate": 1.796256949637054e-08, - "loss": 0.9479, - "step": 12755 - }, - { - "epoch": 0.9586652637907711, - "grad_norm": 1.4439134579558992, - "learning_rate": 1.789752610160944e-08, - "loss": 0.9894, - "step": 12756 - }, - { - "epoch": 0.958740417856606, - "grad_norm": 2.864272629042397, - "learning_rate": 1.7832600154010114e-08, - "loss": 0.9138, - "step": 12757 - }, - { - "epoch": 0.958815571922441, - "grad_norm": 1.6558324179465238, - "learning_rate": 1.7767791657419484e-08, - "loss": 0.9255, - "step": 12758 - }, - { - "epoch": 0.958890725988276, - "grad_norm": 1.3919893065896858, - "learning_rate": 1.770310061567759e-08, - "loss": 0.9648, - "step": 12759 - }, - { - "epoch": 0.9589658800541109, - "grad_norm": 2.090465189437976, - "learning_rate": 1.763852703261759e-08, - "loss": 1.0086, - "step": 12760 - }, - { - "epoch": 0.9590410341199459, - "grad_norm": 1.5633681365140246, - "learning_rate": 1.7574070912065975e-08, - "loss": 0.8639, - "step": 12761 - }, - { - "epoch": 0.9591161881857808, - "grad_norm": 1.727896472796656, - "learning_rate": 1.7509732257841693e-08, - "loss": 0.9944, - "step": 12762 - }, - { - "epoch": 0.9591913422516158, - "grad_norm": 1.5960313513121658, - "learning_rate": 1.7445511073757468e-08, - "loss": 0.9974, - "step": 12763 - }, - { - "epoch": 0.9592664963174508, - "grad_norm": 2.6454675276383224, - "learning_rate": 1.738140736361826e-08, - "loss": 0.967, - "step": 12764 - }, - { - "epoch": 0.9593416503832858, - "grad_norm": 2.176771503609086, - "learning_rate": 1.7317421131222808e-08, - "loss": 0.767, - "step": 12765 - }, - { - "epoch": 0.9594168044491207, - "grad_norm": 1.7831737770765754, - "learning_rate": 1.725355238036208e-08, - "loss": 0.9662, - "step": 12766 - }, - { - "epoch": 0.9594919585149556, - "grad_norm": 1.8758079695317855, - "learning_rate": 1.7189801114820825e-08, - "loss": 1.0167, - "step": 12767 - }, - { - "epoch": 0.9595671125807906, - "grad_norm": 1.9146169676816744, - "learning_rate": 1.7126167338376908e-08, - "loss": 0.9259, - "step": 12768 - }, - { - "epoch": 0.9596422666466256, - "grad_norm": 2.6421334737694253, - "learning_rate": 1.7062651054800203e-08, - "loss": 0.9799, - "step": 12769 - }, - { - "epoch": 0.9597174207124606, - "grad_norm": 1.6222070019732304, - "learning_rate": 1.6999252267854592e-08, - "loss": 0.9496, - "step": 12770 - }, - { - "epoch": 0.9597925747782955, - "grad_norm": 1.5492503106715734, - "learning_rate": 1.6935970981297067e-08, - "loss": 1.0291, - "step": 12771 - }, - { - "epoch": 0.9598677288441305, - "grad_norm": 1.6598570071530097, - "learning_rate": 1.6872807198876404e-08, - "loss": 0.8744, - "step": 12772 - }, - { - "epoch": 0.9599428829099654, - "grad_norm": 1.8185108748607182, - "learning_rate": 1.680976092433606e-08, - "loss": 0.9919, - "step": 12773 - }, - { - "epoch": 0.9600180369758003, - "grad_norm": 2.551547766302474, - "learning_rate": 1.6746832161411482e-08, - "loss": 0.8863, - "step": 12774 - }, - { - "epoch": 0.9600931910416354, - "grad_norm": 2.019512360888452, - "learning_rate": 1.6684020913831476e-08, - "loss": 0.9632, - "step": 12775 - }, - { - "epoch": 0.9601683451074703, - "grad_norm": 1.2892592771210056, - "learning_rate": 1.6621327185317947e-08, - "loss": 0.9772, - "step": 12776 - }, - { - "epoch": 0.9602434991733053, - "grad_norm": 2.0213147668128397, - "learning_rate": 1.655875097958548e-08, - "loss": 1.0053, - "step": 12777 - }, - { - "epoch": 0.9603186532391402, - "grad_norm": 2.5240327587692057, - "learning_rate": 1.649629230034244e-08, - "loss": 0.994, - "step": 12778 - }, - { - "epoch": 0.9603938073049751, - "grad_norm": 2.3121987223903853, - "learning_rate": 1.6433951151288983e-08, - "loss": 0.994, - "step": 12779 - }, - { - "epoch": 0.9604689613708102, - "grad_norm": 1.7509283600622592, - "learning_rate": 1.6371727536119705e-08, - "loss": 0.9766, - "step": 12780 - }, - { - "epoch": 0.9605441154366451, - "grad_norm": 0.8054374977361057, - "learning_rate": 1.6309621458521437e-08, - "loss": 0.8508, - "step": 12781 - }, - { - "epoch": 0.9606192695024801, - "grad_norm": 2.372350720933824, - "learning_rate": 1.6247632922174348e-08, - "loss": 0.8978, - "step": 12782 - }, - { - "epoch": 0.960694423568315, - "grad_norm": 1.6555470108320915, - "learning_rate": 1.618576193075083e-08, - "loss": 0.8794, - "step": 12783 - }, - { - "epoch": 0.96076957763415, - "grad_norm": 2.652710295168927, - "learning_rate": 1.6124008487917727e-08, - "loss": 1.0607, - "step": 12784 - }, - { - "epoch": 0.960844731699985, - "grad_norm": 1.7323624214713766, - "learning_rate": 1.6062372597333896e-08, - "loss": 0.9132, - "step": 12785 - }, - { - "epoch": 0.9609198857658199, - "grad_norm": 1.4282437732146143, - "learning_rate": 1.600085426265152e-08, - "loss": 0.921, - "step": 12786 - }, - { - "epoch": 0.9609950398316549, - "grad_norm": 1.4871888644730604, - "learning_rate": 1.5939453487515686e-08, - "loss": 1.0705, - "step": 12787 - }, - { - "epoch": 0.9610701938974898, - "grad_norm": 2.9340171457633475, - "learning_rate": 1.5878170275564818e-08, - "loss": 0.9687, - "step": 12788 - }, - { - "epoch": 0.9611453479633248, - "grad_norm": 2.8960429785790223, - "learning_rate": 1.581700463043001e-08, - "loss": 0.9616, - "step": 12789 - }, - { - "epoch": 0.9612205020291598, - "grad_norm": 2.025765982914957, - "learning_rate": 1.5755956555735473e-08, - "loss": 0.9774, - "step": 12790 - }, - { - "epoch": 0.9612956560949948, - "grad_norm": 2.2140623190912727, - "learning_rate": 1.569502605509876e-08, - "loss": 0.9035, - "step": 12791 - }, - { - "epoch": 0.9613708101608297, - "grad_norm": 3.006579971445101, - "learning_rate": 1.5634213132130537e-08, - "loss": 0.9493, - "step": 12792 - }, - { - "epoch": 0.9614459642266646, - "grad_norm": 1.7625282609986972, - "learning_rate": 1.5573517790433255e-08, - "loss": 0.8823, - "step": 12793 - }, - { - "epoch": 0.9615211182924996, - "grad_norm": 2.122491140897525, - "learning_rate": 1.551294003360426e-08, - "loss": 0.918, - "step": 12794 - }, - { - "epoch": 0.9615962723583346, - "grad_norm": 1.6295027338258437, - "learning_rate": 1.5452479865232683e-08, - "loss": 0.9692, - "step": 12795 - }, - { - "epoch": 0.9616714264241696, - "grad_norm": 2.5963317706877653, - "learning_rate": 1.5392137288900764e-08, - "loss": 0.94, - "step": 12796 - }, - { - "epoch": 0.9617465804900045, - "grad_norm": 1.9698590379232013, - "learning_rate": 1.5331912308184537e-08, - "loss": 0.9858, - "step": 12797 - }, - { - "epoch": 0.9618217345558395, - "grad_norm": 1.961563496767927, - "learning_rate": 1.5271804926652032e-08, - "loss": 0.9897, - "step": 12798 - }, - { - "epoch": 0.9618968886216744, - "grad_norm": 2.701881607827642, - "learning_rate": 1.5211815147865514e-08, - "loss": 1.0194, - "step": 12799 - }, - { - "epoch": 0.9619720426875094, - "grad_norm": 1.725174289688861, - "learning_rate": 1.5151942975378808e-08, - "loss": 0.9797, - "step": 12800 - }, - { - "epoch": 0.9620471967533444, - "grad_norm": 1.7452227235549684, - "learning_rate": 1.509218841274018e-08, - "loss": 1.004, - "step": 12801 - }, - { - "epoch": 0.9621223508191793, - "grad_norm": 2.3907615273578795, - "learning_rate": 1.503255146349014e-08, - "loss": 0.8973, - "step": 12802 - }, - { - "epoch": 0.9621975048850143, - "grad_norm": 1.7925083342162913, - "learning_rate": 1.49730321311623e-08, - "loss": 1.0179, - "step": 12803 - }, - { - "epoch": 0.9622726589508492, - "grad_norm": 1.6776842813707156, - "learning_rate": 1.4913630419283617e-08, - "loss": 1.0143, - "step": 12804 - }, - { - "epoch": 0.9623478130166842, - "grad_norm": 2.7966906097836075, - "learning_rate": 1.4854346331373725e-08, - "loss": 0.9593, - "step": 12805 - }, - { - "epoch": 0.9624229670825192, - "grad_norm": 1.9633834599743574, - "learning_rate": 1.4795179870945141e-08, - "loss": 0.9122, - "step": 12806 - }, - { - "epoch": 0.9624981211483541, - "grad_norm": 1.6145148560143812, - "learning_rate": 1.4736131041504173e-08, - "loss": 0.9032, - "step": 12807 - }, - { - "epoch": 0.9625732752141891, - "grad_norm": 0.6914540505992559, - "learning_rate": 1.4677199846549581e-08, - "loss": 0.8583, - "step": 12808 - }, - { - "epoch": 0.962648429280024, - "grad_norm": 1.8537276601956498, - "learning_rate": 1.4618386289573237e-08, - "loss": 0.9727, - "step": 12809 - }, - { - "epoch": 0.9627235833458591, - "grad_norm": 1.9283098871008566, - "learning_rate": 1.4559690374059907e-08, - "loss": 0.979, - "step": 12810 - }, - { - "epoch": 0.962798737411694, - "grad_norm": 1.5742174626574683, - "learning_rate": 1.45011121034877e-08, - "loss": 0.9714, - "step": 12811 - }, - { - "epoch": 0.9628738914775289, - "grad_norm": 1.7448335405325863, - "learning_rate": 1.4442651481327839e-08, - "loss": 0.9762, - "step": 12812 - }, - { - "epoch": 0.9629490455433639, - "grad_norm": 2.1131217177105213, - "learning_rate": 1.4384308511043775e-08, - "loss": 0.973, - "step": 12813 - }, - { - "epoch": 0.9630241996091988, - "grad_norm": 1.63832519646943, - "learning_rate": 1.4326083196092963e-08, - "loss": 0.8325, - "step": 12814 - }, - { - "epoch": 0.9630993536750339, - "grad_norm": 1.4363157120853776, - "learning_rate": 1.4267975539925536e-08, - "loss": 1.0478, - "step": 12815 - }, - { - "epoch": 0.9631745077408688, - "grad_norm": 1.501704995403223, - "learning_rate": 1.4209985545984294e-08, - "loss": 1.0244, - "step": 12816 - }, - { - "epoch": 0.9632496618067038, - "grad_norm": 2.822216458535838, - "learning_rate": 1.4152113217705375e-08, - "loss": 0.9046, - "step": 12817 - }, - { - "epoch": 0.9633248158725387, - "grad_norm": 1.5924580891602176, - "learning_rate": 1.409435855851826e-08, - "loss": 0.9703, - "step": 12818 - }, - { - "epoch": 0.9633999699383736, - "grad_norm": 1.368741705185652, - "learning_rate": 1.4036721571844879e-08, - "loss": 0.9512, - "step": 12819 - }, - { - "epoch": 0.9634751240042086, - "grad_norm": 1.7722007707648177, - "learning_rate": 1.3979202261100497e-08, - "loss": 0.9329, - "step": 12820 - }, - { - "epoch": 0.9635502780700436, - "grad_norm": 1.617059380067959, - "learning_rate": 1.392180062969328e-08, - "loss": 0.8745, - "step": 12821 - }, - { - "epoch": 0.9636254321358786, - "grad_norm": 1.4690990439070866, - "learning_rate": 1.386451668102473e-08, - "loss": 1.0646, - "step": 12822 - }, - { - "epoch": 0.9637005862017135, - "grad_norm": 2.5308278991654274, - "learning_rate": 1.3807350418488795e-08, - "loss": 0.9861, - "step": 12823 - }, - { - "epoch": 0.9637757402675484, - "grad_norm": 2.751176062443569, - "learning_rate": 1.3750301845473211e-08, - "loss": 1.0003, - "step": 12824 - }, - { - "epoch": 0.9638508943333834, - "grad_norm": 1.607030178843036, - "learning_rate": 1.3693370965357942e-08, - "loss": 0.7703, - "step": 12825 - }, - { - "epoch": 0.9639260483992184, - "grad_norm": 1.8366807447725868, - "learning_rate": 1.3636557781516733e-08, - "loss": 0.9318, - "step": 12826 - }, - { - "epoch": 0.9640012024650534, - "grad_norm": 2.024561860522246, - "learning_rate": 1.3579862297315558e-08, - "loss": 0.9306, - "step": 12827 - }, - { - "epoch": 0.9640763565308883, - "grad_norm": 2.0266622526137485, - "learning_rate": 1.3523284516113953e-08, - "loss": 0.9875, - "step": 12828 - }, - { - "epoch": 0.9641515105967233, - "grad_norm": 1.7764767722563481, - "learning_rate": 1.3466824441264791e-08, - "loss": 0.9774, - "step": 12829 - }, - { - "epoch": 0.9642266646625582, - "grad_norm": 1.690909720259444, - "learning_rate": 1.341048207611295e-08, - "loss": 0.9611, - "step": 12830 - }, - { - "epoch": 0.9643018187283932, - "grad_norm": 2.583457472084803, - "learning_rate": 1.3354257423997318e-08, - "loss": 1.0212, - "step": 12831 - }, - { - "epoch": 0.9643769727942282, - "grad_norm": 1.3435174607738356, - "learning_rate": 1.3298150488249449e-08, - "loss": 0.9923, - "step": 12832 - }, - { - "epoch": 0.9644521268600631, - "grad_norm": 1.8213982352500702, - "learning_rate": 1.3242161272193575e-08, - "loss": 0.8575, - "step": 12833 - }, - { - "epoch": 0.9645272809258981, - "grad_norm": 1.9719553693785266, - "learning_rate": 1.3186289779147264e-08, - "loss": 0.8603, - "step": 12834 - }, - { - "epoch": 0.964602434991733, - "grad_norm": 1.7556632232579135, - "learning_rate": 1.3130536012421645e-08, - "loss": 1.0297, - "step": 12835 - }, - { - "epoch": 0.9646775890575681, - "grad_norm": 1.5017806451425848, - "learning_rate": 1.3074899975319853e-08, - "loss": 1.0079, - "step": 12836 - }, - { - "epoch": 0.964752743123403, - "grad_norm": 1.9156026443745173, - "learning_rate": 1.3019381671138806e-08, - "loss": 0.9869, - "step": 12837 - }, - { - "epoch": 0.9648278971892379, - "grad_norm": 2.280120883715214, - "learning_rate": 1.2963981103167875e-08, - "loss": 0.9094, - "step": 12838 - }, - { - "epoch": 0.9649030512550729, - "grad_norm": 2.0312107962713175, - "learning_rate": 1.2908698274689989e-08, - "loss": 0.9854, - "step": 12839 - }, - { - "epoch": 0.9649782053209078, - "grad_norm": 0.6442908098664571, - "learning_rate": 1.2853533188980747e-08, - "loss": 0.8164, - "step": 12840 - }, - { - "epoch": 0.9650533593867429, - "grad_norm": 2.0447977022874486, - "learning_rate": 1.2798485849309092e-08, - "loss": 0.9749, - "step": 12841 - }, - { - "epoch": 0.9651285134525778, - "grad_norm": 1.4854087843647925, - "learning_rate": 1.2743556258936639e-08, - "loss": 0.9018, - "step": 12842 - }, - { - "epoch": 0.9652036675184128, - "grad_norm": 3.0671267689518946, - "learning_rate": 1.2688744421118115e-08, - "loss": 0.9007, - "step": 12843 - }, - { - "epoch": 0.9652788215842477, - "grad_norm": 2.2168579630900185, - "learning_rate": 1.2634050339101366e-08, - "loss": 0.9771, - "step": 12844 - }, - { - "epoch": 0.9653539756500826, - "grad_norm": 1.761804413668755, - "learning_rate": 1.2579474016127355e-08, - "loss": 0.9789, - "step": 12845 - }, - { - "epoch": 0.9654291297159177, - "grad_norm": 2.436969992896272, - "learning_rate": 1.2525015455429943e-08, - "loss": 0.9617, - "step": 12846 - }, - { - "epoch": 0.9655042837817526, - "grad_norm": 1.6370167608876967, - "learning_rate": 1.247067466023588e-08, - "loss": 0.8342, - "step": 12847 - }, - { - "epoch": 0.9655794378475876, - "grad_norm": 1.856329017781188, - "learning_rate": 1.2416451633764813e-08, - "loss": 0.9168, - "step": 12848 - }, - { - "epoch": 0.9656545919134225, - "grad_norm": 2.0415086360676793, - "learning_rate": 1.2362346379230393e-08, - "loss": 0.9483, - "step": 12849 - }, - { - "epoch": 0.9657297459792574, - "grad_norm": 1.7075192519927567, - "learning_rate": 1.2308358899837833e-08, - "loss": 0.8534, - "step": 12850 - }, - { - "epoch": 0.9658049000450925, - "grad_norm": 1.8008399672461808, - "learning_rate": 1.2254489198786354e-08, - "loss": 0.9382, - "step": 12851 - }, - { - "epoch": 0.9658800541109274, - "grad_norm": 2.3928267369239737, - "learning_rate": 1.220073727926807e-08, - "loss": 0.9766, - "step": 12852 - }, - { - "epoch": 0.9659552081767624, - "grad_norm": 1.8826309053214298, - "learning_rate": 1.2147103144467985e-08, - "loss": 0.9586, - "step": 12853 - }, - { - "epoch": 0.9660303622425973, - "grad_norm": 2.0507536968015123, - "learning_rate": 1.2093586797564004e-08, - "loss": 0.8786, - "step": 12854 - }, - { - "epoch": 0.9661055163084323, - "grad_norm": 2.388342771527031, - "learning_rate": 1.2040188241726924e-08, - "loss": 0.9487, - "step": 12855 - }, - { - "epoch": 0.9661806703742672, - "grad_norm": 1.988803997157922, - "learning_rate": 1.1986907480121545e-08, - "loss": 1.0236, - "step": 12856 - }, - { - "epoch": 0.9662558244401022, - "grad_norm": 1.7110711311166953, - "learning_rate": 1.1933744515904232e-08, - "loss": 0.9052, - "step": 12857 - }, - { - "epoch": 0.9663309785059372, - "grad_norm": 2.249225695149815, - "learning_rate": 1.1880699352225354e-08, - "loss": 0.9826, - "step": 12858 - }, - { - "epoch": 0.9664061325717721, - "grad_norm": 1.6278254081403223, - "learning_rate": 1.1827771992228175e-08, - "loss": 1.0868, - "step": 12859 - }, - { - "epoch": 0.9664812866376071, - "grad_norm": 1.744856223055568, - "learning_rate": 1.177496243904863e-08, - "loss": 1.0178, - "step": 12860 - }, - { - "epoch": 0.966556440703442, - "grad_norm": 1.7242499006021612, - "learning_rate": 1.1722270695815994e-08, - "loss": 0.9332, - "step": 12861 - }, - { - "epoch": 0.9666315947692771, - "grad_norm": 1.9075185588368804, - "learning_rate": 1.1669696765652659e-08, - "loss": 1.0551, - "step": 12862 - }, - { - "epoch": 0.966706748835112, - "grad_norm": 1.6847811094333565, - "learning_rate": 1.1617240651673243e-08, - "loss": 0.8994, - "step": 12863 - }, - { - "epoch": 0.9667819029009469, - "grad_norm": 1.6841879944329377, - "learning_rate": 1.1564902356986595e-08, - "loss": 0.9939, - "step": 12864 - }, - { - "epoch": 0.9668570569667819, - "grad_norm": 1.5477306427038595, - "learning_rate": 1.1512681884693565e-08, - "loss": 0.9395, - "step": 12865 - }, - { - "epoch": 0.9669322110326168, - "grad_norm": 1.3763122798815044, - "learning_rate": 1.146057923788879e-08, - "loss": 1.0063, - "step": 12866 - }, - { - "epoch": 0.9670073650984519, - "grad_norm": 2.37153684193294, - "learning_rate": 1.1408594419659135e-08, - "loss": 1.0315, - "step": 12867 - }, - { - "epoch": 0.9670825191642868, - "grad_norm": 2.2539585047966644, - "learning_rate": 1.1356727433085245e-08, - "loss": 0.9641, - "step": 12868 - }, - { - "epoch": 0.9671576732301217, - "grad_norm": 3.5547742895912484, - "learning_rate": 1.1304978281239996e-08, - "loss": 0.9522, - "step": 12869 - }, - { - "epoch": 0.9672328272959567, - "grad_norm": 1.5554825173366984, - "learning_rate": 1.125334696719027e-08, - "loss": 0.9301, - "step": 12870 - }, - { - "epoch": 0.9673079813617916, - "grad_norm": 1.6511125599495537, - "learning_rate": 1.120183349399495e-08, - "loss": 0.9541, - "step": 12871 - }, - { - "epoch": 0.9673831354276267, - "grad_norm": 1.521195547090042, - "learning_rate": 1.1150437864706708e-08, - "loss": 1.0033, - "step": 12872 - }, - { - "epoch": 0.9674582894934616, - "grad_norm": 1.747731577819111, - "learning_rate": 1.1099160082371106e-08, - "loss": 1.03, - "step": 12873 - }, - { - "epoch": 0.9675334435592966, - "grad_norm": 1.828067550408576, - "learning_rate": 1.1048000150025939e-08, - "loss": 0.9372, - "step": 12874 - }, - { - "epoch": 0.9676085976251315, - "grad_norm": 0.8394485608416595, - "learning_rate": 1.0996958070703e-08, - "loss": 0.856, - "step": 12875 - }, - { - "epoch": 0.9676837516909664, - "grad_norm": 1.5818219081691272, - "learning_rate": 1.0946033847426761e-08, - "loss": 1.0634, - "step": 12876 - }, - { - "epoch": 0.9677589057568015, - "grad_norm": 2.2530775215977052, - "learning_rate": 1.0895227483214587e-08, - "loss": 0.8952, - "step": 12877 - }, - { - "epoch": 0.9678340598226364, - "grad_norm": 2.5758896818469643, - "learning_rate": 1.0844538981076956e-08, - "loss": 0.9653, - "step": 12878 - }, - { - "epoch": 0.9679092138884714, - "grad_norm": 10.066363807147622, - "learning_rate": 1.0793968344017467e-08, - "loss": 0.9871, - "step": 12879 - }, - { - "epoch": 0.9679843679543063, - "grad_norm": 1.8567306564143196, - "learning_rate": 1.0743515575032392e-08, - "loss": 0.9309, - "step": 12880 - }, - { - "epoch": 0.9680595220201413, - "grad_norm": 2.898337802046288, - "learning_rate": 1.0693180677111557e-08, - "loss": 0.8936, - "step": 12881 - }, - { - "epoch": 0.9681346760859763, - "grad_norm": 1.8757561748310978, - "learning_rate": 1.0642963653237246e-08, - "loss": 0.9375, - "step": 12882 - }, - { - "epoch": 0.9682098301518112, - "grad_norm": 1.6117709204569108, - "learning_rate": 1.0592864506385079e-08, - "loss": 0.9352, - "step": 12883 - }, - { - "epoch": 0.9682849842176462, - "grad_norm": 1.6611948870722795, - "learning_rate": 1.054288323952357e-08, - "loss": 1.0642, - "step": 12884 - }, - { - "epoch": 0.9683601382834811, - "grad_norm": 1.6936684648659033, - "learning_rate": 1.0493019855614572e-08, - "loss": 0.959, - "step": 12885 - }, - { - "epoch": 0.9684352923493161, - "grad_norm": 2.3679737761823927, - "learning_rate": 1.0443274357612386e-08, - "loss": 1.0266, - "step": 12886 - }, - { - "epoch": 0.968510446415151, - "grad_norm": 2.0659610723057478, - "learning_rate": 1.0393646748464658e-08, - "loss": 0.8843, - "step": 12887 - }, - { - "epoch": 0.9685856004809861, - "grad_norm": 1.8229924068156778, - "learning_rate": 1.0344137031112143e-08, - "loss": 0.946, - "step": 12888 - }, - { - "epoch": 0.968660754546821, - "grad_norm": 2.199939778977581, - "learning_rate": 1.0294745208488276e-08, - "loss": 0.9309, - "step": 12889 - }, - { - "epoch": 0.9687359086126559, - "grad_norm": 1.4196321173413329, - "learning_rate": 1.0245471283520046e-08, - "loss": 1.0275, - "step": 12890 - }, - { - "epoch": 0.9688110626784909, - "grad_norm": 2.576069700399886, - "learning_rate": 1.0196315259126897e-08, - "loss": 0.9105, - "step": 12891 - }, - { - "epoch": 0.9688862167443258, - "grad_norm": 0.7839846623852121, - "learning_rate": 1.0147277138221388e-08, - "loss": 0.8886, - "step": 12892 - }, - { - "epoch": 0.9689613708101609, - "grad_norm": 2.083061378530665, - "learning_rate": 1.0098356923709417e-08, - "loss": 0.9795, - "step": 12893 - }, - { - "epoch": 0.9690365248759958, - "grad_norm": 1.647560935159819, - "learning_rate": 1.0049554618489552e-08, - "loss": 0.996, - "step": 12894 - }, - { - "epoch": 0.9691116789418307, - "grad_norm": 1.5705040202849923, - "learning_rate": 1.0000870225453705e-08, - "loss": 0.9693, - "step": 12895 - }, - { - "epoch": 0.9691868330076657, - "grad_norm": 1.5240094798386463, - "learning_rate": 9.952303747486678e-09, - "loss": 1.0025, - "step": 12896 - }, - { - "epoch": 0.9692619870735006, - "grad_norm": 1.9650918185886401, - "learning_rate": 9.903855187465948e-09, - "loss": 0.9215, - "step": 12897 - }, - { - "epoch": 0.9693371411393357, - "grad_norm": 1.6300260139214928, - "learning_rate": 9.855524548262106e-09, - "loss": 0.9803, - "step": 12898 - }, - { - "epoch": 0.9694122952051706, - "grad_norm": 1.4127731768006937, - "learning_rate": 9.807311832739529e-09, - "loss": 1.0064, - "step": 12899 - }, - { - "epoch": 0.9694874492710056, - "grad_norm": 1.9586060787219173, - "learning_rate": 9.759217043754597e-09, - "loss": 0.9707, - "step": 12900 - }, - { - "epoch": 0.9695626033368405, - "grad_norm": 2.090518564385878, - "learning_rate": 9.711240184157255e-09, - "loss": 0.8881, - "step": 12901 - }, - { - "epoch": 0.9696377574026754, - "grad_norm": 2.131376572595224, - "learning_rate": 9.663381256790116e-09, - "loss": 0.8773, - "step": 12902 - }, - { - "epoch": 0.9697129114685105, - "grad_norm": 2.1098393812108203, - "learning_rate": 9.615640264489134e-09, - "loss": 1.0111, - "step": 12903 - }, - { - "epoch": 0.9697880655343454, - "grad_norm": 1.617853147465089, - "learning_rate": 9.568017210083379e-09, - "loss": 1.0075, - "step": 12904 - }, - { - "epoch": 0.9698632196001804, - "grad_norm": 1.5504920363087913, - "learning_rate": 9.52051209639415e-09, - "loss": 0.8931, - "step": 12905 - }, - { - "epoch": 0.9699383736660153, - "grad_norm": 3.0594378362637804, - "learning_rate": 9.473124926236975e-09, - "loss": 0.9889, - "step": 12906 - }, - { - "epoch": 0.9700135277318503, - "grad_norm": 1.5965597778464358, - "learning_rate": 9.42585570241916e-09, - "loss": 0.801, - "step": 12907 - }, - { - "epoch": 0.9700886817976853, - "grad_norm": 2.345232685314317, - "learning_rate": 9.378704427742024e-09, - "loss": 1.0239, - "step": 12908 - }, - { - "epoch": 0.9701638358635202, - "grad_norm": 1.5796244079487307, - "learning_rate": 9.331671104998884e-09, - "loss": 0.9432, - "step": 12909 - }, - { - "epoch": 0.9702389899293552, - "grad_norm": 2.5039871845465855, - "learning_rate": 9.284755736977513e-09, - "loss": 0.9707, - "step": 12910 - }, - { - "epoch": 0.9703141439951901, - "grad_norm": 1.6335322057878723, - "learning_rate": 9.237958326457018e-09, - "loss": 1.0578, - "step": 12911 - }, - { - "epoch": 0.9703892980610251, - "grad_norm": 1.7034435849176428, - "learning_rate": 9.191278876210518e-09, - "loss": 1.0173, - "step": 12912 - }, - { - "epoch": 0.97046445212686, - "grad_norm": 1.755406814657411, - "learning_rate": 9.144717389004241e-09, - "loss": 0.9761, - "step": 12913 - }, - { - "epoch": 0.970539606192695, - "grad_norm": 2.0016238058023874, - "learning_rate": 9.098273867596873e-09, - "loss": 0.9197, - "step": 12914 - }, - { - "epoch": 0.97061476025853, - "grad_norm": 1.8268054209981845, - "learning_rate": 9.051948314740432e-09, - "loss": 0.916, - "step": 12915 - }, - { - "epoch": 0.9706899143243649, - "grad_norm": 1.830786048001882, - "learning_rate": 9.005740733180055e-09, - "loss": 0.8914, - "step": 12916 - }, - { - "epoch": 0.9707650683901999, - "grad_norm": 1.9635819313784597, - "learning_rate": 8.959651125653556e-09, - "loss": 0.8131, - "step": 12917 - }, - { - "epoch": 0.9708402224560349, - "grad_norm": 3.3274506794679906, - "learning_rate": 8.913679494891857e-09, - "loss": 1.0276, - "step": 12918 - }, - { - "epoch": 0.9709153765218699, - "grad_norm": 1.3979854172667767, - "learning_rate": 8.867825843618782e-09, - "loss": 0.9737, - "step": 12919 - }, - { - "epoch": 0.9709905305877048, - "grad_norm": 2.3283242774009945, - "learning_rate": 8.822090174551933e-09, - "loss": 1.1261, - "step": 12920 - }, - { - "epoch": 0.9710656846535397, - "grad_norm": 0.7791998092892001, - "learning_rate": 8.776472490400922e-09, - "loss": 0.8415, - "step": 12921 - }, - { - "epoch": 0.9711408387193747, - "grad_norm": 1.8268769397392506, - "learning_rate": 8.730972793868696e-09, - "loss": 1.0368, - "step": 12922 - }, - { - "epoch": 0.9712159927852096, - "grad_norm": 1.6840496817755932, - "learning_rate": 8.685591087651323e-09, - "loss": 0.9357, - "step": 12923 - }, - { - "epoch": 0.9712911468510447, - "grad_norm": 0.7298031798977413, - "learning_rate": 8.640327374438205e-09, - "loss": 0.8495, - "step": 12924 - }, - { - "epoch": 0.9713663009168796, - "grad_norm": 3.0846301436094037, - "learning_rate": 8.595181656910978e-09, - "loss": 0.9347, - "step": 12925 - }, - { - "epoch": 0.9714414549827146, - "grad_norm": 1.626426144325326, - "learning_rate": 8.55015393774483e-09, - "loss": 0.8294, - "step": 12926 - }, - { - "epoch": 0.9715166090485495, - "grad_norm": 1.5144329298954151, - "learning_rate": 8.505244219607854e-09, - "loss": 0.8663, - "step": 12927 - }, - { - "epoch": 0.9715917631143844, - "grad_norm": 1.861220461762804, - "learning_rate": 8.460452505161031e-09, - "loss": 1.0627, - "step": 12928 - }, - { - "epoch": 0.9716669171802195, - "grad_norm": 2.1040479292904, - "learning_rate": 8.415778797058681e-09, - "loss": 1.0001, - "step": 12929 - }, - { - "epoch": 0.9717420712460544, - "grad_norm": 1.7925564825929845, - "learning_rate": 8.371223097947356e-09, - "loss": 0.9502, - "step": 12930 - }, - { - "epoch": 0.9718172253118894, - "grad_norm": 2.097440399487107, - "learning_rate": 8.326785410468052e-09, - "loss": 1.0301, - "step": 12931 - }, - { - "epoch": 0.9718923793777243, - "grad_norm": 1.3308956193929102, - "learning_rate": 8.282465737252887e-09, - "loss": 0.9953, - "step": 12932 - }, - { - "epoch": 0.9719675334435594, - "grad_norm": 1.9726547996591204, - "learning_rate": 8.238264080928647e-09, - "loss": 0.9936, - "step": 12933 - }, - { - "epoch": 0.9720426875093943, - "grad_norm": 0.6817068597918209, - "learning_rate": 8.19418044411413e-09, - "loss": 0.8491, - "step": 12934 - }, - { - "epoch": 0.9721178415752292, - "grad_norm": 1.3839584171575179, - "learning_rate": 8.150214829421687e-09, - "loss": 0.9243, - "step": 12935 - }, - { - "epoch": 0.9721929956410642, - "grad_norm": 1.756576103262288, - "learning_rate": 8.106367239456124e-09, - "loss": 0.9425, - "step": 12936 - }, - { - "epoch": 0.9722681497068991, - "grad_norm": 3.9719309637057307, - "learning_rate": 8.062637676816031e-09, - "loss": 1.0559, - "step": 12937 - }, - { - "epoch": 0.9723433037727341, - "grad_norm": 3.4789386301441954, - "learning_rate": 8.019026144092001e-09, - "loss": 0.9456, - "step": 12938 - }, - { - "epoch": 0.9724184578385691, - "grad_norm": 0.7857209257984732, - "learning_rate": 7.975532643868632e-09, - "loss": 0.8704, - "step": 12939 - }, - { - "epoch": 0.972493611904404, - "grad_norm": 2.1333789562274266, - "learning_rate": 7.932157178722976e-09, - "loss": 0.979, - "step": 12940 - }, - { - "epoch": 0.972568765970239, - "grad_norm": 1.6571267164787893, - "learning_rate": 7.888899751224976e-09, - "loss": 0.9071, - "step": 12941 - }, - { - "epoch": 0.9726439200360739, - "grad_norm": 2.686796584226235, - "learning_rate": 7.845760363938136e-09, - "loss": 0.8923, - "step": 12942 - }, - { - "epoch": 0.9727190741019089, - "grad_norm": 1.708678264995796, - "learning_rate": 7.802739019418192e-09, - "loss": 0.9688, - "step": 12943 - }, - { - "epoch": 0.9727942281677439, - "grad_norm": 1.5630564652419137, - "learning_rate": 7.75983572021488e-09, - "loss": 0.9138, - "step": 12944 - }, - { - "epoch": 0.9728693822335789, - "grad_norm": 1.744979263772697, - "learning_rate": 7.717050468870168e-09, - "loss": 1.007, - "step": 12945 - }, - { - "epoch": 0.9729445362994138, - "grad_norm": 2.1423157189614868, - "learning_rate": 7.674383267918916e-09, - "loss": 0.992, - "step": 12946 - }, - { - "epoch": 0.9730196903652487, - "grad_norm": 0.7070997215541809, - "learning_rate": 7.631834119889768e-09, - "loss": 0.8264, - "step": 12947 - }, - { - "epoch": 0.9730948444310837, - "grad_norm": 2.661143294847474, - "learning_rate": 7.589403027303598e-09, - "loss": 0.9757, - "step": 12948 - }, - { - "epoch": 0.9731699984969187, - "grad_norm": 1.7125955847356606, - "learning_rate": 7.547089992674838e-09, - "loss": 0.9387, - "step": 12949 - }, - { - "epoch": 0.9732451525627537, - "grad_norm": 1.7969690712892545, - "learning_rate": 7.504895018510593e-09, - "loss": 0.9157, - "step": 12950 - }, - { - "epoch": 0.9733203066285886, - "grad_norm": 1.8226561296563533, - "learning_rate": 7.462818107311086e-09, - "loss": 1.0313, - "step": 12951 - }, - { - "epoch": 0.9733954606944236, - "grad_norm": 1.2925513017164811, - "learning_rate": 7.420859261569434e-09, - "loss": 0.9612, - "step": 12952 - }, - { - "epoch": 0.9734706147602585, - "grad_norm": 1.8709107630055162, - "learning_rate": 7.379018483772092e-09, - "loss": 1.0489, - "step": 12953 - }, - { - "epoch": 0.9735457688260934, - "grad_norm": 1.627148748133823, - "learning_rate": 7.337295776398189e-09, - "loss": 1.0114, - "step": 12954 - }, - { - "epoch": 0.9736209228919285, - "grad_norm": 2.287833342731452, - "learning_rate": 7.295691141919746e-09, - "loss": 1.0182, - "step": 12955 - }, - { - "epoch": 0.9736960769577634, - "grad_norm": 2.06178144017783, - "learning_rate": 7.254204582802348e-09, - "loss": 0.9045, - "step": 12956 - }, - { - "epoch": 0.9737712310235984, - "grad_norm": 1.8304225471321656, - "learning_rate": 7.2128361015040274e-09, - "loss": 1.0184, - "step": 12957 - }, - { - "epoch": 0.9738463850894333, - "grad_norm": 1.5957500399506312, - "learning_rate": 7.171585700476157e-09, - "loss": 0.9122, - "step": 12958 - }, - { - "epoch": 0.9739215391552682, - "grad_norm": 1.7867642095376264, - "learning_rate": 7.13045338216256e-09, - "loss": 0.9862, - "step": 12959 - }, - { - "epoch": 0.9739966932211033, - "grad_norm": 1.3235069770095864, - "learning_rate": 7.0894391490010644e-09, - "loss": 0.9008, - "step": 12960 - }, - { - "epoch": 0.9740718472869382, - "grad_norm": 2.217895343206701, - "learning_rate": 7.048543003421725e-09, - "loss": 0.9064, - "step": 12961 - }, - { - "epoch": 0.9741470013527732, - "grad_norm": 2.106333369294557, - "learning_rate": 7.007764947847494e-09, - "loss": 0.871, - "step": 12962 - }, - { - "epoch": 0.9742221554186081, - "grad_norm": 3.482220723605515, - "learning_rate": 6.967104984695105e-09, - "loss": 1.0382, - "step": 12963 - }, - { - "epoch": 0.9742973094844432, - "grad_norm": 1.7597040468337957, - "learning_rate": 6.9265631163735186e-09, - "loss": 0.9533, - "step": 12964 - }, - { - "epoch": 0.9743724635502781, - "grad_norm": 1.6639887117314756, - "learning_rate": 6.8861393452848134e-09, - "loss": 0.9861, - "step": 12965 - }, - { - "epoch": 0.974447617616113, - "grad_norm": 2.243832825218509, - "learning_rate": 6.84583367382463e-09, - "loss": 1.0242, - "step": 12966 - }, - { - "epoch": 0.974522771681948, - "grad_norm": 2.2643292108718467, - "learning_rate": 6.80564610438128e-09, - "loss": 0.9026, - "step": 12967 - }, - { - "epoch": 0.9745979257477829, - "grad_norm": 1.6045933028137356, - "learning_rate": 6.765576639335746e-09, - "loss": 0.9518, - "step": 12968 - }, - { - "epoch": 0.974673079813618, - "grad_norm": 1.740271982114166, - "learning_rate": 6.725625281062352e-09, - "loss": 0.9511, - "step": 12969 - }, - { - "epoch": 0.9747482338794529, - "grad_norm": 3.096526624252501, - "learning_rate": 6.6857920319283165e-09, - "loss": 0.9977, - "step": 12970 - }, - { - "epoch": 0.9748233879452879, - "grad_norm": 3.1125600786040937, - "learning_rate": 6.646076894294195e-09, - "loss": 0.9545, - "step": 12971 - }, - { - "epoch": 0.9748985420111228, - "grad_norm": 2.271500014220936, - "learning_rate": 6.606479870512993e-09, - "loss": 0.9764, - "step": 12972 - }, - { - "epoch": 0.9749736960769577, - "grad_norm": 2.2290348715161588, - "learning_rate": 6.5670009629312794e-09, - "loss": 1.0427, - "step": 12973 - }, - { - "epoch": 0.9750488501427927, - "grad_norm": 2.127444376055851, - "learning_rate": 6.5276401738878495e-09, - "loss": 1.0007, - "step": 12974 - }, - { - "epoch": 0.9751240042086277, - "grad_norm": 1.5557829098464189, - "learning_rate": 6.488397505715504e-09, - "loss": 0.979, - "step": 12975 - }, - { - "epoch": 0.9751991582744627, - "grad_norm": 1.8238204096379496, - "learning_rate": 6.44927296073905e-09, - "loss": 0.9824, - "step": 12976 - }, - { - "epoch": 0.9752743123402976, - "grad_norm": 2.084639356054666, - "learning_rate": 6.410266541277077e-09, - "loss": 0.8564, - "step": 12977 - }, - { - "epoch": 0.9753494664061326, - "grad_norm": 1.5516150704781178, - "learning_rate": 6.371378249640624e-09, - "loss": 0.9666, - "step": 12978 - }, - { - "epoch": 0.9754246204719675, - "grad_norm": 1.4966764186946393, - "learning_rate": 6.332608088134295e-09, - "loss": 0.9519, - "step": 12979 - }, - { - "epoch": 0.9754997745378025, - "grad_norm": 1.7364617930261312, - "learning_rate": 6.293956059055139e-09, - "loss": 1.018, - "step": 12980 - }, - { - "epoch": 0.9755749286036375, - "grad_norm": 1.7147950185051601, - "learning_rate": 6.255422164693547e-09, - "loss": 0.9561, - "step": 12981 - }, - { - "epoch": 0.9756500826694724, - "grad_norm": 1.953870585229345, - "learning_rate": 6.217006407332803e-09, - "loss": 0.9106, - "step": 12982 - }, - { - "epoch": 0.9757252367353074, - "grad_norm": 1.7461996366410253, - "learning_rate": 6.178708789248866e-09, - "loss": 0.9905, - "step": 12983 - }, - { - "epoch": 0.9758003908011423, - "grad_norm": 2.5810418476849097, - "learning_rate": 6.140529312711473e-09, - "loss": 0.9113, - "step": 12984 - }, - { - "epoch": 0.9758755448669773, - "grad_norm": 1.5901212332949075, - "learning_rate": 6.102467979982817e-09, - "loss": 0.9888, - "step": 12985 - }, - { - "epoch": 0.9759506989328123, - "grad_norm": 5.101090273535625, - "learning_rate": 6.064524793317982e-09, - "loss": 0.9537, - "step": 12986 - }, - { - "epoch": 0.9760258529986472, - "grad_norm": 1.8477927030537982, - "learning_rate": 6.026699754965392e-09, - "loss": 0.9769, - "step": 12987 - }, - { - "epoch": 0.9761010070644822, - "grad_norm": 0.7231714010559155, - "learning_rate": 5.988992867166143e-09, - "loss": 0.8642, - "step": 12988 - }, - { - "epoch": 0.9761761611303171, - "grad_norm": 2.6369614326975244, - "learning_rate": 5.951404132154669e-09, - "loss": 0.9443, - "step": 12989 - }, - { - "epoch": 0.9762513151961522, - "grad_norm": 1.4925309030729674, - "learning_rate": 5.9139335521583015e-09, - "loss": 0.9344, - "step": 12990 - }, - { - "epoch": 0.9763264692619871, - "grad_norm": 1.8099990559148698, - "learning_rate": 5.876581129397262e-09, - "loss": 0.9454, - "step": 12991 - }, - { - "epoch": 0.976401623327822, - "grad_norm": 2.2041328235280337, - "learning_rate": 5.839346866084893e-09, - "loss": 0.9176, - "step": 12992 - }, - { - "epoch": 0.976476777393657, - "grad_norm": 1.9071393674264394, - "learning_rate": 5.802230764426985e-09, - "loss": 0.9473, - "step": 12993 - }, - { - "epoch": 0.9765519314594919, - "grad_norm": 2.43022154798865, - "learning_rate": 5.765232826623556e-09, - "loss": 1.03, - "step": 12994 - }, - { - "epoch": 0.976627085525327, - "grad_norm": 1.5697226104363822, - "learning_rate": 5.728353054866408e-09, - "loss": 1.028, - "step": 12995 - }, - { - "epoch": 0.9767022395911619, - "grad_norm": 1.6541613959061112, - "learning_rate": 5.691591451340905e-09, - "loss": 0.9956, - "step": 12996 - }, - { - "epoch": 0.9767773936569969, - "grad_norm": 1.971929737425566, - "learning_rate": 5.654948018225303e-09, - "loss": 0.9745, - "step": 12997 - }, - { - "epoch": 0.9768525477228318, - "grad_norm": 1.7100571409795169, - "learning_rate": 5.6184227576909774e-09, - "loss": 0.9818, - "step": 12998 - }, - { - "epoch": 0.9769277017886667, - "grad_norm": 2.7359136121786527, - "learning_rate": 5.582015671901974e-09, - "loss": 0.999, - "step": 12999 - }, - { - "epoch": 0.9770028558545018, - "grad_norm": 1.7567289954944216, - "learning_rate": 5.5457267630159014e-09, - "loss": 0.9425, - "step": 13000 - }, - { - "epoch": 0.9770780099203367, - "grad_norm": 3.2104811146823358, - "learning_rate": 5.509556033182372e-09, - "loss": 1.0262, - "step": 13001 - }, - { - "epoch": 0.9771531639861717, - "grad_norm": 1.3941071022900786, - "learning_rate": 5.47350348454545e-09, - "loss": 0.9116, - "step": 13002 - }, - { - "epoch": 0.9772283180520066, - "grad_norm": 2.666376624625975, - "learning_rate": 5.437569119240981e-09, - "loss": 0.9701, - "step": 13003 - }, - { - "epoch": 0.9773034721178415, - "grad_norm": 0.8271993105758214, - "learning_rate": 5.40175293939793e-09, - "loss": 0.8665, - "step": 13004 - }, - { - "epoch": 0.9773786261836765, - "grad_norm": 1.4732260980173806, - "learning_rate": 5.3660549471392645e-09, - "loss": 1.0261, - "step": 13005 - }, - { - "epoch": 0.9774537802495115, - "grad_norm": 3.3550898701076584, - "learning_rate": 5.330475144579516e-09, - "loss": 0.8856, - "step": 13006 - }, - { - "epoch": 0.9775289343153465, - "grad_norm": 2.1685601055108847, - "learning_rate": 5.295013533827219e-09, - "loss": 0.7498, - "step": 13007 - }, - { - "epoch": 0.9776040883811814, - "grad_norm": 2.6753110152219173, - "learning_rate": 5.259670116983805e-09, - "loss": 0.9772, - "step": 13008 - }, - { - "epoch": 0.9776792424470164, - "grad_norm": 1.200076712699453, - "learning_rate": 5.224444896143154e-09, - "loss": 0.9141, - "step": 13009 - }, - { - "epoch": 0.9777543965128513, - "grad_norm": 2.4728499543067493, - "learning_rate": 5.189337873392485e-09, - "loss": 0.9762, - "step": 13010 - }, - { - "epoch": 0.9778295505786863, - "grad_norm": 1.6267510296619159, - "learning_rate": 5.1543490508123565e-09, - "loss": 0.8943, - "step": 13011 - }, - { - "epoch": 0.9779047046445213, - "grad_norm": 2.0901094356888392, - "learning_rate": 5.119478430475999e-09, - "loss": 0.957, - "step": 13012 - }, - { - "epoch": 0.9779798587103562, - "grad_norm": 2.473205312601449, - "learning_rate": 5.0847260144490926e-09, - "loss": 0.9566, - "step": 13013 - }, - { - "epoch": 0.9780550127761912, - "grad_norm": 3.1368648357268487, - "learning_rate": 5.0500918047915455e-09, - "loss": 0.9907, - "step": 13014 - }, - { - "epoch": 0.9781301668420261, - "grad_norm": 1.5822720568079849, - "learning_rate": 5.01557580355505e-09, - "loss": 1.066, - "step": 13015 - }, - { - "epoch": 0.9782053209078612, - "grad_norm": 1.5802413045133683, - "learning_rate": 4.981178012785081e-09, - "loss": 0.885, - "step": 13016 - }, - { - "epoch": 0.9782804749736961, - "grad_norm": 0.7895923006727829, - "learning_rate": 4.946898434519564e-09, - "loss": 0.7755, - "step": 13017 - }, - { - "epoch": 0.978355629039531, - "grad_norm": 2.539985371645157, - "learning_rate": 4.912737070789985e-09, - "loss": 0.9666, - "step": 13018 - }, - { - "epoch": 0.978430783105366, - "grad_norm": 2.217068424481804, - "learning_rate": 4.878693923620725e-09, - "loss": 0.8981, - "step": 13019 - }, - { - "epoch": 0.9785059371712009, - "grad_norm": 1.8217238819924604, - "learning_rate": 4.84476899502817e-09, - "loss": 0.9888, - "step": 13020 - }, - { - "epoch": 0.978581091237036, - "grad_norm": 2.04011561066144, - "learning_rate": 4.810962287023379e-09, - "loss": 0.9173, - "step": 13021 - }, - { - "epoch": 0.9786562453028709, - "grad_norm": 1.6571518943568386, - "learning_rate": 4.777273801608972e-09, - "loss": 1.0572, - "step": 13022 - }, - { - "epoch": 0.9787313993687059, - "grad_norm": 2.185042935959961, - "learning_rate": 4.743703540781574e-09, - "loss": 1.0715, - "step": 13023 - }, - { - "epoch": 0.9788065534345408, - "grad_norm": 2.8445979939514094, - "learning_rate": 4.710251506529816e-09, - "loss": 0.968, - "step": 13024 - }, - { - "epoch": 0.9788817075003757, - "grad_norm": 2.960552260971153, - "learning_rate": 4.6769177008363355e-09, - "loss": 1.0057, - "step": 13025 - }, - { - "epoch": 0.9789568615662108, - "grad_norm": 1.5523875490948897, - "learning_rate": 4.643702125675775e-09, - "loss": 1.0059, - "step": 13026 - }, - { - "epoch": 0.9790320156320457, - "grad_norm": 1.9627796654155738, - "learning_rate": 4.610604783016781e-09, - "loss": 0.8934, - "step": 13027 - }, - { - "epoch": 0.9791071696978807, - "grad_norm": 2.587019403025822, - "learning_rate": 4.577625674820451e-09, - "loss": 0.8982, - "step": 13028 - }, - { - "epoch": 0.9791823237637156, - "grad_norm": 1.8982526901147683, - "learning_rate": 4.544764803040557e-09, - "loss": 0.9604, - "step": 13029 - }, - { - "epoch": 0.9792574778295505, - "grad_norm": 1.7723934780766206, - "learning_rate": 4.512022169624652e-09, - "loss": 1.0214, - "step": 13030 - }, - { - "epoch": 0.9793326318953856, - "grad_norm": 1.766285325525231, - "learning_rate": 4.479397776512517e-09, - "loss": 0.8709, - "step": 13031 - }, - { - "epoch": 0.9794077859612205, - "grad_norm": 1.762581759781702, - "learning_rate": 4.446891625637495e-09, - "loss": 0.9065, - "step": 13032 - }, - { - "epoch": 0.9794829400270555, - "grad_norm": 1.8577036678946808, - "learning_rate": 4.4145037189255995e-09, - "loss": 0.8744, - "step": 13033 - }, - { - "epoch": 0.9795580940928904, - "grad_norm": 1.771905110959462, - "learning_rate": 4.382234058295964e-09, - "loss": 0.9939, - "step": 13034 - }, - { - "epoch": 0.9796332481587254, - "grad_norm": 3.737963624094503, - "learning_rate": 4.350082645660613e-09, - "loss": 0.8389, - "step": 13035 - }, - { - "epoch": 0.9797084022245603, - "grad_norm": 2.0045606351508396, - "learning_rate": 4.318049482924913e-09, - "loss": 0.9085, - "step": 13036 - }, - { - "epoch": 0.9797835562903953, - "grad_norm": 0.6889583552194904, - "learning_rate": 4.286134571986455e-09, - "loss": 0.8284, - "step": 13037 - }, - { - "epoch": 0.9798587103562303, - "grad_norm": 2.4084153774062633, - "learning_rate": 4.254337914736839e-09, - "loss": 0.9806, - "step": 13038 - }, - { - "epoch": 0.9799338644220652, - "grad_norm": 1.7113377351192804, - "learning_rate": 4.22265951305989e-09, - "loss": 0.9638, - "step": 13039 - }, - { - "epoch": 0.9800090184879002, - "grad_norm": 2.1054117906328886, - "learning_rate": 4.191099368832774e-09, - "loss": 0.9271, - "step": 13040 - }, - { - "epoch": 0.9800841725537351, - "grad_norm": 3.200755959863171, - "learning_rate": 4.159657483925328e-09, - "loss": 0.9394, - "step": 13041 - }, - { - "epoch": 0.9801593266195702, - "grad_norm": 5.59795592331783, - "learning_rate": 4.12833386020095e-09, - "loss": 0.8732, - "step": 13042 - }, - { - "epoch": 0.9802344806854051, - "grad_norm": 2.062291105846875, - "learning_rate": 4.097128499515268e-09, - "loss": 1.0284, - "step": 13043 - }, - { - "epoch": 0.98030963475124, - "grad_norm": 1.9013774697173187, - "learning_rate": 4.06604140371769e-09, - "loss": 0.9314, - "step": 13044 - }, - { - "epoch": 0.980384788817075, - "grad_norm": 13.144434899349694, - "learning_rate": 4.035072574650078e-09, - "loss": 1.0398, - "step": 13045 - }, - { - "epoch": 0.9804599428829099, - "grad_norm": 2.1855216753532143, - "learning_rate": 4.004222014147629e-09, - "loss": 0.9833, - "step": 13046 - }, - { - "epoch": 0.980535096948745, - "grad_norm": 2.9719492488855965, - "learning_rate": 3.973489724037993e-09, - "loss": 1.0186, - "step": 13047 - }, - { - "epoch": 0.9806102510145799, - "grad_norm": 2.0165657391264253, - "learning_rate": 3.942875706142379e-09, - "loss": 0.9709, - "step": 13048 - }, - { - "epoch": 0.9806854050804148, - "grad_norm": 1.5724979205337626, - "learning_rate": 3.912379962274892e-09, - "loss": 0.9355, - "step": 13049 - }, - { - "epoch": 0.9807605591462498, - "grad_norm": 1.6688918121316239, - "learning_rate": 3.882002494242309e-09, - "loss": 0.9634, - "step": 13050 - }, - { - "epoch": 0.9808357132120847, - "grad_norm": 1.8112796753110283, - "learning_rate": 3.8517433038449675e-09, - "loss": 0.9897, - "step": 13051 - }, - { - "epoch": 0.9809108672779198, - "grad_norm": 1.6329373398653464, - "learning_rate": 3.821602392875434e-09, - "loss": 1.0439, - "step": 13052 - }, - { - "epoch": 0.9809860213437547, - "grad_norm": 2.5041731336597928, - "learning_rate": 3.791579763119834e-09, - "loss": 0.8802, - "step": 13053 - }, - { - "epoch": 0.9810611754095897, - "grad_norm": 2.1592088384226207, - "learning_rate": 3.761675416356969e-09, - "loss": 0.9351, - "step": 13054 - }, - { - "epoch": 0.9811363294754246, - "grad_norm": 1.7757843218943403, - "learning_rate": 3.7318893543591966e-09, - "loss": 0.9815, - "step": 13055 - }, - { - "epoch": 0.9812114835412595, - "grad_norm": 2.046766205437064, - "learning_rate": 3.702221578891107e-09, - "loss": 0.7816, - "step": 13056 - }, - { - "epoch": 0.9812866376070946, - "grad_norm": 3.468483442082503, - "learning_rate": 3.6726720917106268e-09, - "loss": 0.8579, - "step": 13057 - }, - { - "epoch": 0.9813617916729295, - "grad_norm": 2.5597131973781813, - "learning_rate": 3.643240894569022e-09, - "loss": 0.9243, - "step": 13058 - }, - { - "epoch": 0.9814369457387645, - "grad_norm": 2.86321613399056, - "learning_rate": 3.613927989209786e-09, - "loss": 0.849, - "step": 13059 - }, - { - "epoch": 0.9815120998045994, - "grad_norm": 3.199873301858875, - "learning_rate": 3.584733377369975e-09, - "loss": 0.9289, - "step": 13060 - }, - { - "epoch": 0.9815872538704344, - "grad_norm": 2.6436468586743778, - "learning_rate": 3.5556570607795377e-09, - "loss": 0.9295, - "step": 13061 - }, - { - "epoch": 0.9816624079362694, - "grad_norm": 1.9658577882744932, - "learning_rate": 3.5266990411613183e-09, - "loss": 0.9928, - "step": 13062 - }, - { - "epoch": 0.9817375620021043, - "grad_norm": 1.7996059728200693, - "learning_rate": 3.4978593202312777e-09, - "loss": 0.955, - "step": 13063 - }, - { - "epoch": 0.9818127160679393, - "grad_norm": 1.617147325394517, - "learning_rate": 3.4691378996980493e-09, - "loss": 1.0201, - "step": 13064 - }, - { - "epoch": 0.9818878701337742, - "grad_norm": 1.3227727416236947, - "learning_rate": 3.440534781263604e-09, - "loss": 0.9528, - "step": 13065 - }, - { - "epoch": 0.9819630241996092, - "grad_norm": 2.2429431877951695, - "learning_rate": 3.41204996662281e-09, - "loss": 0.9843, - "step": 13066 - }, - { - "epoch": 0.9820381782654442, - "grad_norm": 7.831130861196583, - "learning_rate": 3.3836834574636485e-09, - "loss": 0.9586, - "step": 13067 - }, - { - "epoch": 0.9821133323312792, - "grad_norm": 2.4123257312191897, - "learning_rate": 3.3554352554665545e-09, - "loss": 0.9929, - "step": 13068 - }, - { - "epoch": 0.9821884863971141, - "grad_norm": 1.6847062473460475, - "learning_rate": 3.3273053623059655e-09, - "loss": 0.9714, - "step": 13069 - }, - { - "epoch": 0.982263640462949, - "grad_norm": 1.4724352440576514, - "learning_rate": 3.2992937796478824e-09, - "loss": 0.9356, - "step": 13070 - }, - { - "epoch": 0.982338794528784, - "grad_norm": 4.155028428991496, - "learning_rate": 3.2714005091527554e-09, - "loss": 0.9012, - "step": 13071 - }, - { - "epoch": 0.982413948594619, - "grad_norm": 2.149776083353131, - "learning_rate": 3.2436255524730394e-09, - "loss": 0.9653, - "step": 13072 - }, - { - "epoch": 0.982489102660454, - "grad_norm": 2.33793039694397, - "learning_rate": 3.215968911254752e-09, - "loss": 0.9342, - "step": 13073 - }, - { - "epoch": 0.9825642567262889, - "grad_norm": 1.6921000478307706, - "learning_rate": 3.1884305871363593e-09, - "loss": 1.0212, - "step": 13074 - }, - { - "epoch": 0.9826394107921238, - "grad_norm": 0.7343974414399609, - "learning_rate": 3.16101058174989e-09, - "loss": 0.8418, - "step": 13075 - }, - { - "epoch": 0.9827145648579588, - "grad_norm": 1.710835911073274, - "learning_rate": 3.1337088967198223e-09, - "loss": 0.9942, - "step": 13076 - }, - { - "epoch": 0.9827897189237937, - "grad_norm": 0.6550928085765125, - "learning_rate": 3.1065255336639727e-09, - "loss": 0.8452, - "step": 13077 - }, - { - "epoch": 0.9828648729896288, - "grad_norm": 1.6327129863000829, - "learning_rate": 3.0794604941932754e-09, - "loss": 1.0146, - "step": 13078 - }, - { - "epoch": 0.9829400270554637, - "grad_norm": 0.70028813596778, - "learning_rate": 3.0525137799111146e-09, - "loss": 0.8619, - "step": 13079 - }, - { - "epoch": 0.9830151811212987, - "grad_norm": 4.785387430640086, - "learning_rate": 3.0256853924144344e-09, - "loss": 0.9846, - "step": 13080 - }, - { - "epoch": 0.9830903351871336, - "grad_norm": 2.098968152448406, - "learning_rate": 2.9989753332928526e-09, - "loss": 1.0976, - "step": 13081 - }, - { - "epoch": 0.9831654892529685, - "grad_norm": 2.1742806045116994, - "learning_rate": 2.9723836041288806e-09, - "loss": 0.941, - "step": 13082 - }, - { - "epoch": 0.9832406433188036, - "grad_norm": 10.089756792840543, - "learning_rate": 2.94591020649837e-09, - "loss": 0.9873, - "step": 13083 - }, - { - "epoch": 0.9833157973846385, - "grad_norm": 2.20937796542604, - "learning_rate": 2.9195551419698426e-09, - "loss": 1.0493, - "step": 13084 - }, - { - "epoch": 0.9833909514504735, - "grad_norm": 1.5396735048364512, - "learning_rate": 2.8933184121051613e-09, - "loss": 0.9757, - "step": 13085 - }, - { - "epoch": 0.9834661055163084, - "grad_norm": 2.071827920462227, - "learning_rate": 2.8672000184586377e-09, - "loss": 0.8478, - "step": 13086 - }, - { - "epoch": 0.9835412595821434, - "grad_norm": 1.7234874977475083, - "learning_rate": 2.841199962578145e-09, - "loss": 0.9738, - "step": 13087 - }, - { - "epoch": 0.9836164136479784, - "grad_norm": 1.6300919788831367, - "learning_rate": 2.815318246004006e-09, - "loss": 1.016, - "step": 13088 - }, - { - "epoch": 0.9836915677138133, - "grad_norm": 1.9028444982310673, - "learning_rate": 2.7895548702703277e-09, - "loss": 0.8936, - "step": 13089 - }, - { - "epoch": 0.9837667217796483, - "grad_norm": 2.6272377285421644, - "learning_rate": 2.7639098369032222e-09, - "loss": 0.8229, - "step": 13090 - }, - { - "epoch": 0.9838418758454832, - "grad_norm": 1.8417034752505959, - "learning_rate": 2.738383147422141e-09, - "loss": 0.9694, - "step": 13091 - }, - { - "epoch": 0.9839170299113182, - "grad_norm": 2.496786436341224, - "learning_rate": 2.7129748033400956e-09, - "loss": 1.0101, - "step": 13092 - }, - { - "epoch": 0.9839921839771532, - "grad_norm": 1.6569173655926803, - "learning_rate": 2.687684806162549e-09, - "loss": 0.998, - "step": 13093 - }, - { - "epoch": 0.9840673380429881, - "grad_norm": 2.77714115053065, - "learning_rate": 2.662513157387636e-09, - "loss": 1.0242, - "step": 13094 - }, - { - "epoch": 0.9841424921088231, - "grad_norm": 1.5817273260100952, - "learning_rate": 2.637459858507274e-09, - "loss": 0.9023, - "step": 13095 - }, - { - "epoch": 0.984217646174658, - "grad_norm": 1.7515682958801368, - "learning_rate": 2.612524911005831e-09, - "loss": 0.8996, - "step": 13096 - }, - { - "epoch": 0.984292800240493, - "grad_norm": 2.230924275876441, - "learning_rate": 2.5877083163607927e-09, - "loss": 0.938, - "step": 13097 - }, - { - "epoch": 0.984367954306328, - "grad_norm": 2.245100302547696, - "learning_rate": 2.5630100760425378e-09, - "loss": 0.992, - "step": 13098 - }, - { - "epoch": 0.984443108372163, - "grad_norm": 3.5693647934025106, - "learning_rate": 2.5384301915145624e-09, - "loss": 1.01, - "step": 13099 - }, - { - "epoch": 0.9845182624379979, - "grad_norm": 1.6618513720648613, - "learning_rate": 2.513968664233701e-09, - "loss": 0.998, - "step": 13100 - }, - { - "epoch": 0.9845934165038328, - "grad_norm": 1.7700775179079238, - "learning_rate": 2.489625495648795e-09, - "loss": 1.056, - "step": 13101 - }, - { - "epoch": 0.9846685705696678, - "grad_norm": 2.0051069622095286, - "learning_rate": 2.46540068720269e-09, - "loss": 0.9521, - "step": 13102 - }, - { - "epoch": 0.9847437246355027, - "grad_norm": 2.1909729772352504, - "learning_rate": 2.4412942403306826e-09, - "loss": 0.9818, - "step": 13103 - }, - { - "epoch": 0.9848188787013378, - "grad_norm": 1.7881088713712219, - "learning_rate": 2.4173061564609632e-09, - "loss": 0.9564, - "step": 13104 - }, - { - "epoch": 0.9848940327671727, - "grad_norm": 1.5749363174885158, - "learning_rate": 2.3934364370152836e-09, - "loss": 1.0405, - "step": 13105 - }, - { - "epoch": 0.9849691868330077, - "grad_norm": 1.5341556773015752, - "learning_rate": 2.3696850834078463e-09, - "loss": 0.9759, - "step": 13106 - }, - { - "epoch": 0.9850443408988426, - "grad_norm": 1.7188775448725158, - "learning_rate": 2.3460520970459697e-09, - "loss": 1.0329, - "step": 13107 - }, - { - "epoch": 0.9851194949646775, - "grad_norm": 2.488851676484861, - "learning_rate": 2.322537479330089e-09, - "loss": 1.0054, - "step": 13108 - }, - { - "epoch": 0.9851946490305126, - "grad_norm": 1.8677560108380666, - "learning_rate": 2.2991412316533122e-09, - "loss": 0.8991, - "step": 13109 - }, - { - "epoch": 0.9852698030963475, - "grad_norm": 2.1261367001585834, - "learning_rate": 2.2758633554023078e-09, - "loss": 0.9764, - "step": 13110 - }, - { - "epoch": 0.9853449571621825, - "grad_norm": 1.3684537418897347, - "learning_rate": 2.2527038519561948e-09, - "loss": 1.0337, - "step": 13111 - }, - { - "epoch": 0.9854201112280174, - "grad_norm": 1.4412250353196931, - "learning_rate": 2.2296627226872088e-09, - "loss": 0.9574, - "step": 13112 - }, - { - "epoch": 0.9854952652938525, - "grad_norm": 2.8427247934690776, - "learning_rate": 2.2067399689607024e-09, - "loss": 1.0001, - "step": 13113 - }, - { - "epoch": 0.9855704193596874, - "grad_norm": 2.419869657111653, - "learning_rate": 2.1839355921349224e-09, - "loss": 0.8651, - "step": 13114 - }, - { - "epoch": 0.9856455734255223, - "grad_norm": 1.8817845621108842, - "learning_rate": 2.16124959356101e-09, - "loss": 0.9709, - "step": 13115 - }, - { - "epoch": 0.9857207274913573, - "grad_norm": 1.515406012691246, - "learning_rate": 2.138681974583223e-09, - "loss": 0.9654, - "step": 13116 - }, - { - "epoch": 0.9857958815571922, - "grad_norm": 2.0287668161876344, - "learning_rate": 2.1162327365391587e-09, - "loss": 0.9774, - "step": 13117 - }, - { - "epoch": 0.9858710356230272, - "grad_norm": 2.2654368355321965, - "learning_rate": 2.0939018807584196e-09, - "loss": 0.9864, - "step": 13118 - }, - { - "epoch": 0.9859461896888622, - "grad_norm": 0.8362944010421127, - "learning_rate": 2.071689408564614e-09, - "loss": 0.8332, - "step": 13119 - }, - { - "epoch": 0.9860213437546971, - "grad_norm": 0.6854845980535411, - "learning_rate": 2.0495953212738005e-09, - "loss": 0.8001, - "step": 13120 - }, - { - "epoch": 0.9860964978205321, - "grad_norm": 0.764741231810082, - "learning_rate": 2.0276196201951535e-09, - "loss": 0.8585, - "step": 13121 - }, - { - "epoch": 0.986171651886367, - "grad_norm": 1.4835946692232376, - "learning_rate": 2.005762306630743e-09, - "loss": 0.9472, - "step": 13122 - }, - { - "epoch": 0.986246805952202, - "grad_norm": 1.9520248977019143, - "learning_rate": 1.9840233818757546e-09, - "loss": 1.0033, - "step": 13123 - }, - { - "epoch": 0.986321960018037, - "grad_norm": 1.5414285819615399, - "learning_rate": 1.9624028472182696e-09, - "loss": 1.0573, - "step": 13124 - }, - { - "epoch": 0.986397114083872, - "grad_norm": 1.7899588792222687, - "learning_rate": 1.9409007039392632e-09, - "loss": 1.0235, - "step": 13125 - }, - { - "epoch": 0.9864722681497069, - "grad_norm": 1.5593084164395912, - "learning_rate": 1.9195169533132714e-09, - "loss": 0.9869, - "step": 13126 - }, - { - "epoch": 0.9865474222155418, - "grad_norm": 1.640863383095324, - "learning_rate": 1.8982515966068367e-09, - "loss": 0.8549, - "step": 13127 - }, - { - "epoch": 0.9866225762813768, - "grad_norm": 2.318954121532687, - "learning_rate": 1.8771046350805063e-09, - "loss": 1.0392, - "step": 13128 - }, - { - "epoch": 0.9866977303472118, - "grad_norm": 1.6483638078027998, - "learning_rate": 1.856076069986834e-09, - "loss": 0.938, - "step": 13129 - }, - { - "epoch": 0.9867728844130468, - "grad_norm": 3.87812260974254, - "learning_rate": 1.8351659025721555e-09, - "loss": 0.9192, - "step": 13130 - }, - { - "epoch": 0.9868480384788817, - "grad_norm": 1.7795745599426682, - "learning_rate": 1.8143741340752583e-09, - "loss": 1.0813, - "step": 13131 - }, - { - "epoch": 0.9869231925447167, - "grad_norm": 1.6827923135069132, - "learning_rate": 1.7937007657282677e-09, - "loss": 0.8994, - "step": 13132 - }, - { - "epoch": 0.9869983466105516, - "grad_norm": 1.4900642987913209, - "learning_rate": 1.7731457987562038e-09, - "loss": 0.9904, - "step": 13133 - }, - { - "epoch": 0.9870735006763866, - "grad_norm": 1.6564595071971318, - "learning_rate": 1.752709234376981e-09, - "loss": 1.0882, - "step": 13134 - }, - { - "epoch": 0.9871486547422216, - "grad_norm": 1.439300901731072, - "learning_rate": 1.732391073801409e-09, - "loss": 1.0466, - "step": 13135 - }, - { - "epoch": 0.9872238088080565, - "grad_norm": 1.3497932628997598, - "learning_rate": 1.7121913182336356e-09, - "loss": 0.8767, - "step": 13136 - }, - { - "epoch": 0.9872989628738915, - "grad_norm": 1.8929602332716673, - "learning_rate": 1.692109968870703e-09, - "loss": 0.9195, - "step": 13137 - }, - { - "epoch": 0.9873741169397264, - "grad_norm": 0.7909078642358306, - "learning_rate": 1.6721470269021042e-09, - "loss": 0.8791, - "step": 13138 - }, - { - "epoch": 0.9874492710055613, - "grad_norm": 3.923823628629517, - "learning_rate": 1.6523024935108931e-09, - "loss": 0.9573, - "step": 13139 - }, - { - "epoch": 0.9875244250713964, - "grad_norm": 1.8085508403260515, - "learning_rate": 1.6325763698727957e-09, - "loss": 0.9805, - "step": 13140 - }, - { - "epoch": 0.9875995791372313, - "grad_norm": 1.9155953633690201, - "learning_rate": 1.6129686571570988e-09, - "loss": 1.0132, - "step": 13141 - }, - { - "epoch": 0.9876747332030663, - "grad_norm": 12.61889198529425, - "learning_rate": 1.593479356525096e-09, - "loss": 1.0152, - "step": 13142 - }, - { - "epoch": 0.9877498872689012, - "grad_norm": 2.3890723877546876, - "learning_rate": 1.5741084691318628e-09, - "loss": 1.0028, - "step": 13143 - }, - { - "epoch": 0.9878250413347363, - "grad_norm": 1.6100666078567152, - "learning_rate": 1.5548559961253705e-09, - "loss": 0.8636, - "step": 13144 - }, - { - "epoch": 0.9879001954005712, - "grad_norm": 1.6344036763521916, - "learning_rate": 1.5357219386460397e-09, - "loss": 1.0063, - "step": 13145 - }, - { - "epoch": 0.9879753494664061, - "grad_norm": 1.925363119977193, - "learning_rate": 1.5167062978278521e-09, - "loss": 0.9335, - "step": 13146 - }, - { - "epoch": 0.9880505035322411, - "grad_norm": 2.5511388367391628, - "learning_rate": 1.4978090747976846e-09, - "loss": 0.9486, - "step": 13147 - }, - { - "epoch": 0.988125657598076, - "grad_norm": 1.8154887189291373, - "learning_rate": 1.4790302706750856e-09, - "loss": 1.0726, - "step": 13148 - }, - { - "epoch": 0.988200811663911, - "grad_norm": 2.038963812416671, - "learning_rate": 1.4603698865724989e-09, - "loss": 0.9543, - "step": 13149 - }, - { - "epoch": 0.988275965729746, - "grad_norm": 1.581538747727078, - "learning_rate": 1.4418279235961506e-09, - "loss": 0.8643, - "step": 13150 - }, - { - "epoch": 0.988351119795581, - "grad_norm": 12.48551406724759, - "learning_rate": 1.4234043828444952e-09, - "loss": 0.9698, - "step": 13151 - }, - { - "epoch": 0.9884262738614159, - "grad_norm": 1.6817165450659657, - "learning_rate": 1.4050992654091043e-09, - "loss": 0.9999, - "step": 13152 - }, - { - "epoch": 0.9885014279272508, - "grad_norm": 2.0027862452826093, - "learning_rate": 1.3869125723746655e-09, - "loss": 0.9427, - "step": 13153 - }, - { - "epoch": 0.9885765819930858, - "grad_norm": 2.295670381591296, - "learning_rate": 1.3688443048189836e-09, - "loss": 1.0023, - "step": 13154 - }, - { - "epoch": 0.9886517360589208, - "grad_norm": 4.381760958971005, - "learning_rate": 1.3508944638125353e-09, - "loss": 0.9647, - "step": 13155 - }, - { - "epoch": 0.9887268901247558, - "grad_norm": 2.036600080339179, - "learning_rate": 1.3330630504189143e-09, - "loss": 0.9287, - "step": 13156 - }, - { - "epoch": 0.9888020441905907, - "grad_norm": 2.7368840504435896, - "learning_rate": 1.3153500656948313e-09, - "loss": 0.9302, - "step": 13157 - }, - { - "epoch": 0.9888771982564257, - "grad_norm": 2.7234635107272314, - "learning_rate": 1.2977555106894467e-09, - "loss": 0.9212, - "step": 13158 - }, - { - "epoch": 0.9889523523222606, - "grad_norm": 3.3543225996524337, - "learning_rate": 1.280279386445704e-09, - "loss": 0.8376, - "step": 13159 - }, - { - "epoch": 0.9890275063880956, - "grad_norm": 1.90173035431693, - "learning_rate": 1.2629216939992194e-09, - "loss": 1.0031, - "step": 13160 - }, - { - "epoch": 0.9891026604539306, - "grad_norm": 1.703791076881461, - "learning_rate": 1.2456824343780592e-09, - "loss": 1.0461, - "step": 13161 - }, - { - "epoch": 0.9891778145197655, - "grad_norm": 7.503447948084876, - "learning_rate": 1.2285616086040728e-09, - "loss": 1.1096, - "step": 13162 - }, - { - "epoch": 0.9892529685856005, - "grad_norm": 1.4838377130769353, - "learning_rate": 1.2115592176915601e-09, - "loss": 0.8485, - "step": 13163 - }, - { - "epoch": 0.9893281226514354, - "grad_norm": 1.838785113966017, - "learning_rate": 1.1946752626481594e-09, - "loss": 0.9686, - "step": 13164 - }, - { - "epoch": 0.9894032767172704, - "grad_norm": 1.9171253222243692, - "learning_rate": 1.1779097444739594e-09, - "loss": 0.9183, - "step": 13165 - }, - { - "epoch": 0.9894784307831054, - "grad_norm": 1.8611406550110567, - "learning_rate": 1.161262664162832e-09, - "loss": 1.0118, - "step": 13166 - }, - { - "epoch": 0.9895535848489403, - "grad_norm": 1.592947963786649, - "learning_rate": 1.1447340227008772e-09, - "loss": 1.0029, - "step": 13167 - }, - { - "epoch": 0.9896287389147753, - "grad_norm": 2.524761691036819, - "learning_rate": 1.1283238210675338e-09, - "loss": 0.9943, - "step": 13168 - }, - { - "epoch": 0.9897038929806102, - "grad_norm": 3.072112897507072, - "learning_rate": 1.1120320602351352e-09, - "loss": 1.0888, - "step": 13169 - }, - { - "epoch": 0.9897790470464453, - "grad_norm": 1.989037570362633, - "learning_rate": 1.095858741169131e-09, - "loss": 0.9407, - "step": 13170 - }, - { - "epoch": 0.9898542011122802, - "grad_norm": 1.5814455808526464, - "learning_rate": 1.0798038648278663e-09, - "loss": 0.9837, - "step": 13171 - }, - { - "epoch": 0.9899293551781151, - "grad_norm": 0.6559046336059631, - "learning_rate": 1.0638674321625796e-09, - "loss": 0.8396, - "step": 13172 - }, - { - "epoch": 0.9900045092439501, - "grad_norm": 1.7299093543543942, - "learning_rate": 1.0480494441174047e-09, - "loss": 0.9428, - "step": 13173 - }, - { - "epoch": 0.990079663309785, - "grad_norm": 1.8937190566351856, - "learning_rate": 1.0323499016300364e-09, - "loss": 1.0712, - "step": 13174 - }, - { - "epoch": 0.99015481737562, - "grad_norm": 2.0263411848858763, - "learning_rate": 1.016768805630397e-09, - "loss": 0.9568, - "step": 13175 - }, - { - "epoch": 0.990229971441455, - "grad_norm": 1.5504431367969203, - "learning_rate": 1.00130615704197e-09, - "loss": 0.8992, - "step": 13176 - }, - { - "epoch": 0.99030512550729, - "grad_norm": 1.5202734539353768, - "learning_rate": 9.859619567806898e-10, - "loss": 1.0677, - "step": 13177 - }, - { - "epoch": 0.9903802795731249, - "grad_norm": 2.1569261458287277, - "learning_rate": 9.707362057558289e-10, - "loss": 1.0131, - "step": 13178 - }, - { - "epoch": 0.9904554336389598, - "grad_norm": 2.571035266119492, - "learning_rate": 9.556289048697763e-10, - "loss": 0.9622, - "step": 13179 - }, - { - "epoch": 0.9905305877047949, - "grad_norm": 1.4575933032000281, - "learning_rate": 9.40640055017594e-10, - "loss": 0.8997, - "step": 13180 - }, - { - "epoch": 0.9906057417706298, - "grad_norm": 3.6739563582418375, - "learning_rate": 9.257696570872386e-10, - "loss": 0.873, - "step": 13181 - }, - { - "epoch": 0.9906808958364648, - "grad_norm": 1.604919562418661, - "learning_rate": 9.110177119600048e-10, - "loss": 0.8915, - "step": 13182 - }, - { - "epoch": 0.9907560499022997, - "grad_norm": 1.6410426017071207, - "learning_rate": 8.963842205100824e-10, - "loss": 1.0338, - "step": 13183 - }, - { - "epoch": 0.9908312039681346, - "grad_norm": 1.5863321499185195, - "learning_rate": 8.818691836045556e-10, - "loss": 0.9798, - "step": 13184 - }, - { - "epoch": 0.9909063580339696, - "grad_norm": 2.340775700646541, - "learning_rate": 8.674726021034028e-10, - "loss": 0.8615, - "step": 13185 - }, - { - "epoch": 0.9909815120998046, - "grad_norm": 2.3913798418303602, - "learning_rate": 8.531944768594979e-10, - "loss": 1.0152, - "step": 13186 - }, - { - "epoch": 0.9910566661656396, - "grad_norm": 1.674252824530028, - "learning_rate": 8.390348087192745e-10, - "loss": 0.9972, - "step": 13187 - }, - { - "epoch": 0.9911318202314745, - "grad_norm": 2.0699387698861726, - "learning_rate": 8.249935985213952e-10, - "loss": 0.8792, - "step": 13188 - }, - { - "epoch": 0.9912069742973095, - "grad_norm": 5.028649078426041, - "learning_rate": 8.110708470980831e-10, - "loss": 0.8995, - "step": 13189 - }, - { - "epoch": 0.9912821283631444, - "grad_norm": 4.755815660091699, - "learning_rate": 7.972665552742342e-10, - "loss": 1.0601, - "step": 13190 - }, - { - "epoch": 0.9913572824289794, - "grad_norm": 1.5556129499852278, - "learning_rate": 7.835807238676384e-10, - "loss": 1.0287, - "step": 13191 - }, - { - "epoch": 0.9914324364948144, - "grad_norm": 1.6886519279775523, - "learning_rate": 7.700133536896469e-10, - "loss": 1.015, - "step": 13192 - }, - { - "epoch": 0.9915075905606493, - "grad_norm": 2.6086284917327034, - "learning_rate": 7.565644455436171e-10, - "loss": 0.8697, - "step": 13193 - }, - { - "epoch": 0.9915827446264843, - "grad_norm": 1.5511743157895208, - "learning_rate": 7.432340002269111e-10, - "loss": 0.9272, - "step": 13194 - }, - { - "epoch": 0.9916578986923192, - "grad_norm": 0.8732856918825357, - "learning_rate": 7.300220185293416e-10, - "loss": 0.813, - "step": 13195 - }, - { - "epoch": 0.9917330527581543, - "grad_norm": 1.7587320592815168, - "learning_rate": 7.169285012336157e-10, - "loss": 0.9959, - "step": 13196 - }, - { - "epoch": 0.9918082068239892, - "grad_norm": 1.8309068299677576, - "learning_rate": 7.039534491155574e-10, - "loss": 0.9597, - "step": 13197 - }, - { - "epoch": 0.9918833608898241, - "grad_norm": 1.7613680674477603, - "learning_rate": 6.910968629443292e-10, - "loss": 1.0424, - "step": 13198 - }, - { - "epoch": 0.9919585149556591, - "grad_norm": 1.870256654802753, - "learning_rate": 6.783587434813221e-10, - "loss": 0.9969, - "step": 13199 - }, - { - "epoch": 0.992033669021494, - "grad_norm": 2.32799637202344, - "learning_rate": 6.657390914814875e-10, - "loss": 0.9259, - "step": 13200 - }, - { - "epoch": 0.9921088230873291, - "grad_norm": 1.6284230931328667, - "learning_rate": 6.532379076924499e-10, - "loss": 1.0111, - "step": 13201 - }, - { - "epoch": 0.992183977153164, - "grad_norm": 1.867440306336321, - "learning_rate": 6.40855192855172e-10, - "loss": 1.0513, - "step": 13202 - }, - { - "epoch": 0.992259131218999, - "grad_norm": 3.7304204328513646, - "learning_rate": 6.285909477032892e-10, - "loss": 0.9418, - "step": 13203 - }, - { - "epoch": 0.9923342852848339, - "grad_norm": 2.7583998792137154, - "learning_rate": 6.164451729635534e-10, - "loss": 0.971, - "step": 13204 - }, - { - "epoch": 0.9924094393506688, - "grad_norm": 7.961200084604668, - "learning_rate": 6.044178693553892e-10, - "loss": 0.8636, - "step": 13205 - }, - { - "epoch": 0.9924845934165039, - "grad_norm": 1.5573240380978683, - "learning_rate": 5.925090375917818e-10, - "loss": 0.9105, - "step": 13206 - }, - { - "epoch": 0.9925597474823388, - "grad_norm": 1.6448634303718304, - "learning_rate": 5.807186783783891e-10, - "loss": 1.0354, - "step": 13207 - }, - { - "epoch": 0.9926349015481738, - "grad_norm": 2.12192391295652, - "learning_rate": 5.690467924135412e-10, - "loss": 0.9264, - "step": 13208 - }, - { - "epoch": 0.9927100556140087, - "grad_norm": 1.5240147988459432, - "learning_rate": 5.574933803891291e-10, - "loss": 1.0283, - "step": 13209 - }, - { - "epoch": 0.9927852096798436, - "grad_norm": 1.583914006148276, - "learning_rate": 5.460584429894944e-10, - "loss": 1.0124, - "step": 13210 - }, - { - "epoch": 0.9928603637456787, - "grad_norm": 1.8366940501964664, - "learning_rate": 5.34741980892317e-10, - "loss": 0.9277, - "step": 13211 - }, - { - "epoch": 0.9929355178115136, - "grad_norm": 1.6739544637176789, - "learning_rate": 5.235439947681719e-10, - "loss": 0.9966, - "step": 13212 - }, - { - "epoch": 0.9930106718773486, - "grad_norm": 2.8946286080855965, - "learning_rate": 5.124644852805282e-10, - "loss": 0.9146, - "step": 13213 - }, - { - "epoch": 0.9930858259431835, - "grad_norm": 1.8340142748328125, - "learning_rate": 5.015034530859719e-10, - "loss": 1.0218, - "step": 13214 - }, - { - "epoch": 0.9931609800090185, - "grad_norm": 2.1341428974004195, - "learning_rate": 4.906608988339833e-10, - "loss": 0.9112, - "step": 13215 - }, - { - "epoch": 0.9932361340748534, - "grad_norm": 1.8238504760967456, - "learning_rate": 4.799368231669376e-10, - "loss": 0.9106, - "step": 13216 - }, - { - "epoch": 0.9933112881406884, - "grad_norm": 2.95381261500092, - "learning_rate": 4.693312267201044e-10, - "loss": 1.0513, - "step": 13217 - }, - { - "epoch": 0.9933864422065234, - "grad_norm": 1.851501850148883, - "learning_rate": 4.5884411012231395e-10, - "loss": 0.9869, - "step": 13218 - }, - { - "epoch": 0.9934615962723583, - "grad_norm": 2.3690188534631726, - "learning_rate": 4.484754739948471e-10, - "loss": 0.8898, - "step": 13219 - }, - { - "epoch": 0.9935367503381933, - "grad_norm": 2.605486319281093, - "learning_rate": 4.382253189518792e-10, - "loss": 0.9671, - "step": 13220 - }, - { - "epoch": 0.9936119044040282, - "grad_norm": 2.1890772991238356, - "learning_rate": 4.2809364560070225e-10, - "loss": 0.9127, - "step": 13221 - }, - { - "epoch": 0.9936870584698633, - "grad_norm": 1.6234952855771585, - "learning_rate": 4.18080454542169e-10, - "loss": 0.9689, - "step": 13222 - }, - { - "epoch": 0.9937622125356982, - "grad_norm": 1.7823644130121357, - "learning_rate": 4.081857463691385e-10, - "loss": 0.98, - "step": 13223 - }, - { - "epoch": 0.9938373666015331, - "grad_norm": 1.6383372023036493, - "learning_rate": 3.9840952166803054e-10, - "loss": 0.9765, - "step": 13224 - }, - { - "epoch": 0.9939125206673681, - "grad_norm": 1.9108190342262343, - "learning_rate": 3.8875178101815955e-10, - "loss": 1.0246, - "step": 13225 - }, - { - "epoch": 0.993987674733203, - "grad_norm": 4.459966390166636, - "learning_rate": 3.792125249917344e-10, - "loss": 1.081, - "step": 13226 - }, - { - "epoch": 0.9940628287990381, - "grad_norm": 0.8476955914487029, - "learning_rate": 3.697917541540807e-10, - "loss": 0.9388, - "step": 13227 - }, - { - "epoch": 0.994137982864873, - "grad_norm": 1.4776414431079885, - "learning_rate": 3.604894690634186e-10, - "loss": 1.0031, - "step": 13228 - }, - { - "epoch": 0.9942131369307079, - "grad_norm": 1.963600812134193, - "learning_rate": 3.5130567027086277e-10, - "loss": 1.0255, - "step": 13229 - }, - { - "epoch": 0.9942882909965429, - "grad_norm": 1.8610137001406535, - "learning_rate": 3.4224035832042254e-10, - "loss": 0.9794, - "step": 13230 - }, - { - "epoch": 0.9943634450623778, - "grad_norm": 1.6575116084916406, - "learning_rate": 3.3329353374966783e-10, - "loss": 0.9955, - "step": 13231 - }, - { - "epoch": 0.9944385991282129, - "grad_norm": 2.56726217875228, - "learning_rate": 3.2446519708839713e-10, - "loss": 1.0072, - "step": 13232 - }, - { - "epoch": 0.9945137531940478, - "grad_norm": 2.0131458738773196, - "learning_rate": 3.1575534885996957e-10, - "loss": 0.8919, - "step": 13233 - }, - { - "epoch": 0.9945889072598828, - "grad_norm": 2.151678796089353, - "learning_rate": 3.071639895801947e-10, - "loss": 1.0396, - "step": 13234 - }, - { - "epoch": 0.9946640613257177, - "grad_norm": 3.635563422067345, - "learning_rate": 2.986911197582209e-10, - "loss": 0.8769, - "step": 13235 - }, - { - "epoch": 0.9947392153915526, - "grad_norm": 1.6142194850684746, - "learning_rate": 2.90336739896313e-10, - "loss": 0.8354, - "step": 13236 - }, - { - "epoch": 0.9948143694573877, - "grad_norm": 1.7308705984899742, - "learning_rate": 2.8210085048940844e-10, - "loss": 1.0044, - "step": 13237 - }, - { - "epoch": 0.9948895235232226, - "grad_norm": 2.1542654441553473, - "learning_rate": 2.7398345202533925e-10, - "loss": 1.096, - "step": 13238 - }, - { - "epoch": 0.9949646775890576, - "grad_norm": 1.2897662929149842, - "learning_rate": 2.65984544985276e-10, - "loss": 1.0457, - "step": 13239 - }, - { - "epoch": 0.9950398316548925, - "grad_norm": 0.7863050568291035, - "learning_rate": 2.58104129843062e-10, - "loss": 0.8846, - "step": 13240 - }, - { - "epoch": 0.9951149857207275, - "grad_norm": 1.7251441618899501, - "learning_rate": 2.503422070656569e-10, - "loss": 1.0527, - "step": 13241 - }, - { - "epoch": 0.9951901397865625, - "grad_norm": 1.60531853114609, - "learning_rate": 2.426987771131372e-10, - "loss": 0.9916, - "step": 13242 - }, - { - "epoch": 0.9952652938523974, - "grad_norm": 1.588816617924122, - "learning_rate": 2.3517384043825194e-10, - "loss": 1.0377, - "step": 13243 - }, - { - "epoch": 0.9953404479182324, - "grad_norm": 1.7814080017686176, - "learning_rate": 2.277673974868666e-10, - "loss": 1.0572, - "step": 13244 - }, - { - "epoch": 0.9954156019840673, - "grad_norm": 1.4213588742012448, - "learning_rate": 2.204794486979633e-10, - "loss": 1.0011, - "step": 13245 - }, - { - "epoch": 0.9954907560499023, - "grad_norm": 2.553697866840355, - "learning_rate": 2.133099945034189e-10, - "loss": 0.7746, - "step": 13246 - }, - { - "epoch": 0.9955659101157373, - "grad_norm": 0.740852681505554, - "learning_rate": 2.0625903532778266e-10, - "loss": 0.8438, - "step": 13247 - }, - { - "epoch": 0.9956410641815723, - "grad_norm": 15.658809612529469, - "learning_rate": 1.9932657158916455e-10, - "loss": 1.014, - "step": 13248 - }, - { - "epoch": 0.9957162182474072, - "grad_norm": 2.2837498175960773, - "learning_rate": 1.9251260369812506e-10, - "loss": 0.9158, - "step": 13249 - }, - { - "epoch": 0.9957913723132421, - "grad_norm": 2.5650325448869737, - "learning_rate": 1.8581713205834126e-10, - "loss": 0.9813, - "step": 13250 - }, - { - "epoch": 0.9958665263790771, - "grad_norm": 1.7474583151761298, - "learning_rate": 1.7924015706682893e-10, - "loss": 0.9444, - "step": 13251 - }, - { - "epoch": 0.995941680444912, - "grad_norm": 2.20299805620396, - "learning_rate": 1.7278167911327635e-10, - "loss": 0.9474, - "step": 13252 - }, - { - "epoch": 0.9960168345107471, - "grad_norm": 1.7149872947867302, - "learning_rate": 1.664416985800443e-10, - "loss": 0.8469, - "step": 13253 - }, - { - "epoch": 0.996091988576582, - "grad_norm": 1.3971309418856968, - "learning_rate": 1.6022021584327638e-10, - "loss": 0.9356, - "step": 13254 - }, - { - "epoch": 0.9961671426424169, - "grad_norm": 2.5193432168510137, - "learning_rate": 1.5411723127112253e-10, - "loss": 0.9583, - "step": 13255 - }, - { - "epoch": 0.9962422967082519, - "grad_norm": 1.8640909734759217, - "learning_rate": 1.4813274522551545e-10, - "loss": 0.9902, - "step": 13256 - }, - { - "epoch": 0.9963174507740868, - "grad_norm": 1.998608343413929, - "learning_rate": 1.4226675806106037e-10, - "loss": 1.0195, - "step": 13257 - }, - { - "epoch": 0.9963926048399219, - "grad_norm": 2.072577737930919, - "learning_rate": 1.3651927012503506e-10, - "loss": 0.9337, - "step": 13258 - }, - { - "epoch": 0.9964677589057568, - "grad_norm": 2.2223127532108293, - "learning_rate": 1.3089028175850004e-10, - "loss": 0.9116, - "step": 13259 - }, - { - "epoch": 0.9965429129715918, - "grad_norm": 1.6241599625980334, - "learning_rate": 1.2537979329474424e-10, - "loss": 0.8844, - "step": 13260 - }, - { - "epoch": 0.9966180670374267, - "grad_norm": 2.0913096767924175, - "learning_rate": 1.1998780505995122e-10, - "loss": 0.9589, - "step": 13261 - }, - { - "epoch": 0.9966932211032616, - "grad_norm": 1.7026237660148624, - "learning_rate": 1.1471431737430926e-10, - "loss": 1.0292, - "step": 13262 - }, - { - "epoch": 0.9967683751690967, - "grad_norm": 1.6225324382390072, - "learning_rate": 1.0955933054956901e-10, - "loss": 0.9003, - "step": 13263 - }, - { - "epoch": 0.9968435292349316, - "grad_norm": 1.6425443775098538, - "learning_rate": 1.0452284489170793e-10, - "loss": 1.0453, - "step": 13264 - }, - { - "epoch": 0.9969186833007666, - "grad_norm": 2.454665362515052, - "learning_rate": 9.960486069915396e-11, - "loss": 0.9607, - "step": 13265 - }, - { - "epoch": 0.9969938373666015, - "grad_norm": 1.498818886186342, - "learning_rate": 9.480537826278556e-11, - "loss": 0.9608, - "step": 13266 - }, - { - "epoch": 0.9970689914324365, - "grad_norm": 1.7686259330667786, - "learning_rate": 9.012439786770798e-11, - "loss": 0.9678, - "step": 13267 - }, - { - "epoch": 0.9971441454982715, - "grad_norm": 2.049655452709796, - "learning_rate": 8.55619197905888e-11, - "loss": 0.9533, - "step": 13268 - }, - { - "epoch": 0.9972192995641064, - "grad_norm": 2.0737391490318977, - "learning_rate": 8.111794430232244e-11, - "loss": 0.976, - "step": 13269 - }, - { - "epoch": 0.9972944536299414, - "grad_norm": 2.0916410614310723, - "learning_rate": 7.679247166603175e-11, - "loss": 1.0033, - "step": 13270 - }, - { - "epoch": 0.9973696076957763, - "grad_norm": 1.4525959323318214, - "learning_rate": 7.258550213795622e-11, - "loss": 1.0254, - "step": 13271 - }, - { - "epoch": 0.9974447617616113, - "grad_norm": 2.1751590845969044, - "learning_rate": 6.849703596722989e-11, - "loss": 0.9057, - "step": 13272 - }, - { - "epoch": 0.9975199158274463, - "grad_norm": 2.630223934517597, - "learning_rate": 6.452707339654751e-11, - "loss": 0.9741, - "step": 13273 - }, - { - "epoch": 0.9975950698932812, - "grad_norm": 2.0246431153960094, - "learning_rate": 6.067561466083227e-11, - "loss": 0.9602, - "step": 13274 - }, - { - "epoch": 0.9976702239591162, - "grad_norm": 1.9699037592272561, - "learning_rate": 5.694265998834602e-11, - "loss": 0.937, - "step": 13275 - }, - { - "epoch": 0.9977453780249511, - "grad_norm": 2.5742986779546566, - "learning_rate": 5.33282096002452e-11, - "loss": 0.9321, - "step": 13276 - }, - { - "epoch": 0.9978205320907861, - "grad_norm": 1.813767483270091, - "learning_rate": 4.9832263710802845e-11, - "loss": 0.9923, - "step": 13277 - }, - { - "epoch": 0.997895686156621, - "grad_norm": 1.70303372479756, - "learning_rate": 4.645482252718658e-11, - "loss": 0.9506, - "step": 13278 - }, - { - "epoch": 0.9979708402224561, - "grad_norm": 1.5302144655745764, - "learning_rate": 4.3195886249458577e-11, - "loss": 0.8785, - "step": 13279 - }, - { - "epoch": 0.998045994288291, - "grad_norm": 1.998827769548013, - "learning_rate": 4.0055455070575614e-11, - "loss": 0.8633, - "step": 13280 - }, - { - "epoch": 0.9981211483541259, - "grad_norm": 1.5067316638520105, - "learning_rate": 3.7033529177055156e-11, - "loss": 0.913, - "step": 13281 - }, - { - "epoch": 0.9981963024199609, - "grad_norm": 1.0794084621735995, - "learning_rate": 3.413010874742106e-11, - "loss": 0.7512, - "step": 13282 - }, - { - "epoch": 0.9982714564857958, - "grad_norm": 0.6478749328206973, - "learning_rate": 3.134519395397994e-11, - "loss": 0.8101, - "step": 13283 - }, - { - "epoch": 0.9983466105516309, - "grad_norm": 1.7880410689303303, - "learning_rate": 2.8678784961710945e-11, - "loss": 0.9021, - "step": 13284 - }, - { - "epoch": 0.9984217646174658, - "grad_norm": 3.7525933517240206, - "learning_rate": 2.6130881928709823e-11, - "loss": 0.9544, - "step": 13285 - }, - { - "epoch": 0.9984969186833008, - "grad_norm": 2.189883650036184, - "learning_rate": 2.370148500574487e-11, - "loss": 0.9871, - "step": 13286 - }, - { - "epoch": 0.9985720727491357, - "grad_norm": 1.63487711791715, - "learning_rate": 2.1390594337145073e-11, - "loss": 0.9036, - "step": 13287 - }, - { - "epoch": 0.9986472268149706, - "grad_norm": 2.35048217290834, - "learning_rate": 1.9198210059245822e-11, - "loss": 0.9384, - "step": 13288 - }, - { - "epoch": 0.9987223808808057, - "grad_norm": 1.7692627692597962, - "learning_rate": 1.7124332302609346e-11, - "loss": 0.8892, - "step": 13289 - }, - { - "epoch": 0.9987975349466406, - "grad_norm": 1.5441132369098676, - "learning_rate": 1.5168961189582218e-11, - "loss": 1.0211, - "step": 13290 - }, - { - "epoch": 0.9988726890124756, - "grad_norm": 4.600498147611491, - "learning_rate": 1.3332096836293772e-11, - "loss": 0.985, - "step": 13291 - }, - { - "epoch": 0.9989478430783105, - "grad_norm": 1.707908771450422, - "learning_rate": 1.161373935154586e-11, - "loss": 1.0056, - "step": 13292 - }, - { - "epoch": 0.9990229971441456, - "grad_norm": 1.506898672265236, - "learning_rate": 1.0013888837256957e-11, - "loss": 0.9441, - "step": 13293 - }, - { - "epoch": 0.9990981512099805, - "grad_norm": 1.4543309283822812, - "learning_rate": 8.532545388018064e-12, - "loss": 0.8533, - "step": 13294 - }, - { - "epoch": 0.9991733052758154, - "grad_norm": 1.6330084432183927, - "learning_rate": 7.169709091536802e-12, - "loss": 0.9464, - "step": 13295 - }, - { - "epoch": 0.9992484593416504, - "grad_norm": 2.9802660391930913, - "learning_rate": 5.9253800290814945e-12, - "loss": 0.9548, - "step": 13296 - }, - { - "epoch": 0.9993236134074853, - "grad_norm": 2.433911347215826, - "learning_rate": 4.799558273704818e-12, - "loss": 0.9727, - "step": 13297 - }, - { - "epoch": 0.9993987674733203, - "grad_norm": 5.023543052691606, - "learning_rate": 3.7922438926862866e-12, - "loss": 1.0654, - "step": 13298 - }, - { - "epoch": 0.9994739215391553, - "grad_norm": 1.4156280618596595, - "learning_rate": 2.9034369453118103e-12, - "loss": 0.963, - "step": 13299 - }, - { - "epoch": 0.9995490756049902, - "grad_norm": 4.42439182218481, - "learning_rate": 2.1331374846500495e-12, - "loss": 1.0374, - "step": 13300 - }, - { - "epoch": 0.9996242296708252, - "grad_norm": 1.7935080787130047, - "learning_rate": 1.481345555776059e-12, - "loss": 1.0212, - "step": 13301 - }, - { - "epoch": 0.9996993837366601, - "grad_norm": 1.9229200669654989, - "learning_rate": 9.480611977696894e-13, - "loss": 1.0815, - "step": 13302 - }, - { - "epoch": 0.9997745378024951, - "grad_norm": 1.8800301788829727, - "learning_rate": 5.332844421612748e-13, - "loss": 0.9347, - "step": 13303 - }, - { - "epoch": 0.9998496918683301, - "grad_norm": 1.9423610250671293, - "learning_rate": 2.3701531359776597e-13, - "loss": 1.0381, - "step": 13304 - }, - { - "epoch": 0.9999248459341651, - "grad_norm": 1.592331647852202, - "learning_rate": 5.925382917659761e-14, - "loss": 0.9552, - "step": 13305 - }, - { - "epoch": 1.0, - "grad_norm": 1.7559513666243929, - "learning_rate": 0.0, - "loss": 0.9136, - "step": 13306 - }, - { - "epoch": 1.0, - "step": 13306, - "total_flos": 1.3375485825840579e+18, - "train_loss": 0.9802125646721129, - "train_runtime": 189864.5773, - "train_samples_per_second": 3.504, - "train_steps_per_second": 0.07 - } - ], - "logging_steps": 1.0, - "max_steps": 13306, - "num_input_tokens_seen": 0, - "num_train_epochs": 1, - "save_steps": 1109, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": true - }, - "attributes": {} - } - }, - "total_flos": 1.3375485825840579e+18, - "train_batch_size": 5, - "trial_name": null, - "trial_params": null -} diff --git a/sft_full/smoe_perturbed/training_args.bin b/sft_full/smoe_perturbed/training_args.bin deleted file mode 100644 index 95084a647274c5afde618f6ce5e37555419857d2..0000000000000000000000000000000000000000 --- a/sft_full/smoe_perturbed/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:985bc07390b3936f213db23c5b7944a7d708a68d89bdfa75468a5acd9084a4a7 -size 8184